summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c21
1 files changed, 21 insertions, 0 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index cfa4a7dc4d7..d078b00c98e 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -1096,6 +1096,8 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo,
char key[512] = {0,};
glusterd_brickinfo_t *brickinfo = NULL;
int32_t i = 1;
+ char uuid_str[50] = {0,};
+ char *volume_id_str = NULL;
GF_ASSERT (dict);
GF_ASSERT (volinfo);
@@ -1141,6 +1143,17 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo,
if (ret)
goto out;
+ uuid_unparse (volinfo->volume_id, uuid_str);
+ volume_id_str = gf_strdup (uuid_str);
+ if (!volume_id_str)
+ goto out;
+
+ memset (&key, 0, sizeof (key));
+ snprintf (key, 256, "volume%d.volume_id", count);
+ ret = dict_set_dynstr (dict, key, volume_id_str);
+ if (ret)
+ goto out;
+
list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
memset (&key, 0, sizeof (key));
snprintf (key, sizeof (key), "volume%d.brick%d.hostname",
@@ -1289,6 +1302,7 @@ glusterd_import_friend_volume (dict_t *vols, int count)
glusterd_brickinfo_t *tmp = NULL;
int new_volinfo = 0;
int i = 1;
+ char *volume_id_str = NULL;
GF_ASSERT (vols);
@@ -1346,6 +1360,13 @@ glusterd_import_friend_volume (dict_t *vols, int count)
if (ret)
goto out;
+ memset (&key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.volume_id", count);
+ ret = dict_get_str (vols, key, &volume_id_str);
+ if (ret)
+ goto out;
+ uuid_parse (volume_id_str, volinfo->volume_id);
+
list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks,
brick_list) {
ret = glusterd_brickinfo_delete (brickinfo);
akefile.am?h=round-robin2&id2=2f15ffd6b5beef9abd501c594bc3cb38c2683f77'>xlators/cluster/afr/src/Makefile.am32
-rw-r--r--xlators/cluster/afr/src/afr-common.c5252
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.c823
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.h36
-rw-r--r--xlators/cluster/afr/src/afr-dir-write.c2635
-rw-r--r--xlators/cluster/afr/src/afr-dir-write.h46
-rw-r--r--xlators/cluster/afr/src/afr-inode-read.c2142
-rw-r--r--xlators/cluster/afr/src/afr-inode-read.h42
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.c3023
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.h76
-rw-r--r--xlators/cluster/afr/src/afr-lk-common.c1764
-rw-r--r--xlators/cluster/afr/src/afr-mem-types.h40
-rw-r--r--xlators/cluster/afr/src/afr-messages.h373
-rw-r--r--xlators/cluster/afr/src/afr-open.c566
-rw-r--r--xlators/cluster/afr/src/afr-read-txn.c268
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-algorithm.c1074
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-algorithm.h60
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c2817
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.h70
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c1675
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c3158
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c1071
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-name.c719
-rw-r--r--xlators/cluster/afr/src/afr-self-heal.h297
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.c1222
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.h80
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c2898
-rw-r--r--xlators/cluster/afr/src/afr-transaction.h61
-rw-r--r--xlators/cluster/afr/src/afr.c3721
-rw-r--r--xlators/cluster/afr/src/afr.h1502
-rw-r--r--xlators/cluster/afr/src/pump.c2470
-rw-r--r--xlators/cluster/afr/src/pump.h81
-rw-r--r--xlators/cluster/dht/src/Makefile.am49
-rw-r--r--xlators/cluster/dht/src/dht-common.c10053
-rw-r--r--xlators/cluster/dht/src/dht-common.h1295
-rw-r--r--xlators/cluster/dht/src/dht-diskusage.c547
-rw-r--r--xlators/cluster/dht/src/dht-hashfn.c140
-rw-r--r--xlators/cluster/dht/src/dht-helper.c2492
-rw-r--r--xlators/cluster/dht/src/dht-helper.h19
-rw-r--r--xlators/cluster/dht/src/dht-inode-read.c1397
-rw-r--r--xlators/cluster/dht/src/dht-inode-write.c1208
-rw-r--r--xlators/cluster/dht/src/dht-layout.c1052
-rw-r--r--xlators/cluster/dht/src/dht-linkfile.c464
-rw-r--r--xlators/cluster/dht/src/dht-mem-types.h37
-rw-r--r--xlators/cluster/dht/src/dht-messages.h1075
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c4201
-rw-r--r--xlators/cluster/dht/src/dht-rename.c1819
-rw-r--r--xlators/cluster/dht/src/dht-selfheal.c2779
-rw-r--r--xlators/cluster/dht/src/dht-shared.c1087
-rw-r--r--xlators/cluster/dht/src/dht.c484
-rw-r--r--xlators/cluster/dht/src/dht.sym8
-rw-r--r--xlators/cluster/dht/src/nufa.c949
-rw-r--r--xlators/cluster/dht/src/nufa.sym8
-rw-r--r--xlators/cluster/dht/src/switch.c1057
-rw-r--r--xlators/cluster/dht/src/switch.sym8
-rw-r--r--xlators/cluster/dht/src/tier-common.c1084
-rw-r--r--xlators/cluster/dht/src/tier-common.h62
-rw-r--r--xlators/cluster/dht/src/tier.c2518
-rw-r--r--xlators/cluster/dht/src/tier.h105
-rw-r--r--xlators/cluster/dht/src/tier.sym9
-rw-r--r--xlators/cluster/dht/src/unittest/dht_layout_mock.c72
-rw-r--r--xlators/cluster/dht/src/unittest/dht_layout_unittest.c125
-rw-r--r--xlators/cluster/ec/Makefile.am (renamed from xlators/cluster/unify/Makefile.am)0
-rw-r--r--xlators/cluster/ec/src/Makefile.am53
-rw-r--r--xlators/cluster/ec/src/ec-combine.c916
-rw-r--r--xlators/cluster/ec/src/ec-combine.h38
-rw-r--r--xlators/cluster/ec/src/ec-common.c2264
-rw-r--r--xlators/cluster/ec/src/ec-common.h120
-rw-r--r--xlators/cluster/ec/src/ec-data.c317
-rw-r--r--xlators/cluster/ec/src/ec-data.h335
-rw-r--r--xlators/cluster/ec/src/ec-dir-read.c625
-rw-r--r--xlators/cluster/ec/src/ec-dir-write.c1498
-rw-r--r--xlators/cluster/ec/src/ec-fops.h202
-rw-r--r--xlators/cluster/ec/src/ec-generic.c1448
-rw-r--r--xlators/cluster/ec/src/ec-gf.c11635
-rw-r--r--xlators/cluster/ec/src/ec-gf.h23
-rw-r--r--xlators/cluster/ec/src/ec-heal.c2616
-rw-r--r--xlators/cluster/ec/src/ec-heald.c607
-rw-r--r--xlators/cluster/ec/src/ec-heald.h47
-rw-r--r--xlators/cluster/ec/src/ec-helpers.c848
-rw-r--r--xlators/cluster/ec/src/ec-helpers.h72
-rw-r--r--xlators/cluster/ec/src/ec-inode-read.c2046
-rw-r--r--xlators/cluster/ec/src/ec-inode-write.c1678
-rw-r--r--xlators/cluster/ec/src/ec-locks.c1169
-rw-r--r--xlators/cluster/ec/src/ec-mem-types.h27
-rw-r--r--xlators/cluster/ec/src/ec-messages.h526
-rw-r--r--xlators/cluster/ec/src/ec-method.c159
-rw-r--r--xlators/cluster/ec/src/ec-method.h32
-rw-r--r--xlators/cluster/ec/src/ec.c1378
-rw-r--r--xlators/cluster/ec/src/ec.h74
-rw-r--r--xlators/cluster/ha/src/Makefile.am8
-rw-r--r--xlators/cluster/ha/src/ha-helpers.c24
-rw-r--r--xlators/cluster/ha/src/ha-mem-types.h21
-rw-r--r--xlators/cluster/ha/src/ha.c55
-rw-r--r--xlators/cluster/ha/src/ha.h24
-rw-r--r--xlators/cluster/map/src/Makefile.am8
-rw-r--r--xlators/cluster/map/src/map-helper.c29
-rw-r--r--xlators/cluster/map/src/map-mem-types.h21
-rw-r--r--xlators/cluster/map/src/map.c42
-rw-r--r--xlators/cluster/map/src/map.h22
-rw-r--r--xlators/cluster/stripe/src/Makefile.am16
-rw-r--r--xlators/cluster/stripe/src/stripe-helpers.c677
-rw-r--r--xlators/cluster/stripe/src/stripe-mem-types.h29
-rw-r--r--xlators/cluster/stripe/src/stripe.c3922
-rw-r--r--xlators/cluster/stripe/src/stripe.h176
-rw-r--r--xlators/cluster/unify/src/Makefile.am16
-rw-r--r--xlators/cluster/unify/src/unify-mem-types.h41
-rw-r--r--xlators/cluster/unify/src/unify-self-heal.c1239
-rw-r--r--xlators/cluster/unify/src/unify.c4589
-rw-r--r--xlators/cluster/unify/src/unify.h146
-rw-r--r--xlators/debug/error-gen/src/Makefile.am9
-rw-r--r--xlators/debug/error-gen/src/error-gen-mem-types.h20
-rw-r--r--xlators/debug/error-gen/src/error-gen.c1382
-rw-r--r--xlators/debug/error-gen/src/error-gen.h84
-rw-r--r--xlators/debug/io-stats/src/Makefile.am10
-rw-r--r--xlators/debug/io-stats/src/io-stats-mem-types.h27
-rw-r--r--xlators/debug/io-stats/src/io-stats.c3306
-rw-r--r--xlators/debug/trace/src/Makefile.am8
-rw-r--r--xlators/debug/trace/src/trace-mem-types.h21
-rw-r--r--xlators/debug/trace/src/trace.c3334
-rw-r--r--xlators/debug/trace/src/trace.h56
-rw-r--r--xlators/encryption/Makefile.am2
-rw-r--r--xlators/encryption/crypt/Makefile.am (renamed from xlators/protocol/legacy/client/Makefile.am)0
-rw-r--r--xlators/encryption/crypt/src/Makefile.am24
-rw-r--r--xlators/encryption/crypt/src/atom.c957
-rw-r--r--xlators/encryption/crypt/src/crypt-common.h141
-rw-r--r--xlators/encryption/crypt/src/crypt-mem-types.h45
-rw-r--r--xlators/encryption/crypt/src/crypt.c4525
-rw-r--r--xlators/encryption/crypt/src/crypt.h900
-rw-r--r--xlators/encryption/crypt/src/data.c764
-rw-r--r--xlators/encryption/crypt/src/keys.c297
-rw-r--r--xlators/encryption/crypt/src/metadata.c614
-rw-r--r--xlators/encryption/crypt/src/metadata.h74
-rw-r--r--xlators/encryption/rot-13/src/Makefile.am7
-rw-r--r--xlators/encryption/rot-13/src/rot-13.c95
-rw-r--r--xlators/encryption/rot-13/src/rot-13.h25
-rw-r--r--xlators/experimental/Makefile.am3
-rw-r--r--xlators/experimental/README.md107
-rw-r--r--xlators/experimental/dht2/Makefile.am3
-rw-r--r--xlators/experimental/dht2/README.md47
-rw-r--r--xlators/experimental/dht2/TODO.md3
-rw-r--r--xlators/experimental/dht2/dht2-client/Makefile.am (renamed from xlators/features/access-control/Makefile.am)0
-rw-r--r--xlators/experimental/dht2/dht2-client/src/Makefile.am19
-rw-r--r--xlators/experimental/dht2/dht2-client/src/dht2-client-main.c59
-rw-r--r--xlators/experimental/dht2/dht2-common/src/dht2-common-map.c19
-rw-r--r--xlators/experimental/dht2/dht2-server/Makefile.am (renamed from xlators/nfs/lib/Makefile.am)0
-rw-r--r--xlators/experimental/dht2/dht2-server/src/Makefile.am19
-rw-r--r--xlators/experimental/dht2/dht2-server/src/dht2-server-main.c59
-rw-r--r--xlators/experimental/fdl/Makefile.am (renamed from xlators/storage/bdb/Makefile.am)2
-rw-r--r--xlators/experimental/fdl/src/Makefile.am43
-rw-r--r--xlators/experimental/fdl/src/dump-tmpl.c156
-rw-r--r--xlators/experimental/fdl/src/fdl-tmpl.c506
-rwxr-xr-xxlators/experimental/fdl/src/gen_dumper.py116
-rwxr-xr-xxlators/experimental/fdl/src/gen_fdl.py328
-rwxr-xr-xxlators/experimental/fdl/src/gen_recon.py213
-rw-r--r--xlators/experimental/fdl/src/jnl-types.h14
-rw-r--r--xlators/experimental/fdl/src/logdump.c50
-rw-r--r--xlators/experimental/fdl/src/recon-tmpl.c305
-rw-r--r--xlators/experimental/fdl/src/recon.c89
-rw-r--r--xlators/experimental/jbr-client/Makefile.am3
-rw-r--r--xlators/experimental/jbr-client/src/Makefile.am32
-rw-r--r--xlators/experimental/jbr-client/src/fop-template.c113
-rwxr-xr-xxlators/experimental/jbr-client/src/gen-fops.py57
-rw-r--r--xlators/experimental/jbr-client/src/jbr-messages.h113
-rw-r--r--xlators/experimental/jbr-client/src/jbrc.c320
-rw-r--r--xlators/experimental/jbr-client/src/jbrc.h27
-rw-r--r--xlators/experimental/jbr-server/Makefile.am3
-rw-r--r--xlators/experimental/jbr-server/src/Makefile.am35
-rw-r--r--xlators/experimental/jbr-server/src/all-templates.c431
-rwxr-xr-xxlators/experimental/jbr-server/src/gen-fops.py178
-rw-r--r--xlators/experimental/jbr-server/src/jbr-internal.h116
-rw-r--r--xlators/experimental/jbr-server/src/jbr.c1675
-rw-r--r--xlators/experimental/posix2/Makefile.am3
-rw-r--r--xlators/experimental/posix2/README.md7
-rw-r--r--xlators/experimental/posix2/TODO.md3
-rw-r--r--xlators/experimental/posix2/common/Makefile.am3
-rw-r--r--xlators/experimental/posix2/common/src/Makefile.am13
-rw-r--r--xlators/experimental/posix2/common/src/posix2-common.c18
-rw-r--r--xlators/experimental/posix2/ds/Makefile.am3
-rw-r--r--xlators/experimental/posix2/ds/src/Makefile.am18
-rw-r--r--xlators/experimental/posix2/ds/src/posix2-ds-main.c59
-rw-r--r--xlators/experimental/posix2/mds/Makefile.am3
-rw-r--r--xlators/experimental/posix2/mds/src/Makefile.am18
-rw-r--r--xlators/experimental/posix2/mds/src/posix2-mds-main.c59
-rw-r--r--xlators/features/Makefile.am7
-rw-r--r--xlators/features/access-control/src/Makefile.am13
-rw-r--r--xlators/features/access-control/src/access-control.c1841
-rw-r--r--xlators/features/access-control/src/access-control.h55
-rw-r--r--xlators/features/arbiter/Makefile.am3
-rw-r--r--xlators/features/arbiter/src/Makefile.am15
-rw-r--r--xlators/features/arbiter/src/arbiter-mem-types.h19
-rw-r--r--xlators/features/arbiter/src/arbiter.c360
-rw-r--r--xlators/features/arbiter/src/arbiter.h21
-rw-r--r--xlators/features/barrier/Makefile.am3
-rw-r--r--xlators/features/barrier/src/Makefile.am16
-rw-r--r--xlators/features/barrier/src/barrier-mem-types.h20
-rw-r--r--xlators/features/barrier/src/barrier.c799
-rw-r--r--xlators/features/barrier/src/barrier.h82
-rw-r--r--xlators/features/bit-rot/Makefile.am (renamed from xlators/protocol/legacy/transport/ib-verbs/Makefile.am)0
-rw-r--r--xlators/features/bit-rot/src/Makefile.am1
-rw-r--r--xlators/features/bit-rot/src/bitd/Makefile.am22
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h448
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c73
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h48
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub.c1984
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub.h36
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-ssm.c114
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-ssm.h36
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot.c2148
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot.h307
-rw-r--r--xlators/features/bit-rot/src/stub/Makefile.am17
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-common.h179
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-object-version.h30
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub-helpers.c633
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h34
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h271
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub.c3245
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub.h463
-rw-r--r--xlators/features/changelog/Makefile.am3
-rw-r--r--xlators/features/changelog/lib/Makefile.am3
-rw-r--r--xlators/features/changelog/lib/examples/c/get-changes-multi.c88
-rw-r--r--xlators/features/changelog/lib/examples/c/get-changes.c93
-rw-r--r--xlators/features/changelog/lib/examples/c/get-history.c116
-rw-r--r--xlators/features/changelog/lib/examples/python/changes.py33
-rw-r--r--xlators/features/changelog/lib/examples/python/libgfchangelog.py70
-rw-r--r--xlators/features/changelog/lib/src/Makefile.am31
-rw-r--r--xlators/features/changelog/lib/src/changelog-lib-messages.h287
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-api.c224
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-helpers.c219
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-helpers.h259
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-journal-handler.c1065
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-journal.h116
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-reborp.c425
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-rpc.c99
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-rpc.h26
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog.c623
-rw-r--r--xlators/features/changelog/lib/src/gf-history-changelog.c991
-rw-r--r--xlators/features/changelog/src/Makefile.am28
-rw-r--r--xlators/features/changelog/src/changelog-barrier.c134
-rw-r--r--xlators/features/changelog/src/changelog-encoders.c236
-rw-r--r--xlators/features/changelog/src/changelog-encoders.h52
-rw-r--r--xlators/features/changelog/src/changelog-ev-handle.c398
-rw-r--r--xlators/features/changelog/src/changelog-ev-handle.h140
-rw-r--r--xlators/features/changelog/src/changelog-helpers.c1979
-rw-r--r--xlators/features/changelog/src/changelog-helpers.h680
-rw-r--r--xlators/features/changelog/src/changelog-mem-types.h34
-rw-r--r--xlators/features/changelog/src/changelog-messages.h450
-rw-r--r--xlators/features/changelog/src/changelog-misc.h131
-rw-r--r--xlators/features/changelog/src/changelog-rpc-common.c349
-rw-r--r--xlators/features/changelog/src/changelog-rpc-common.h84
-rw-r--r--xlators/features/changelog/src/changelog-rpc.c305
-rw-r--r--xlators/features/changelog/src/changelog-rpc.h29
-rw-r--r--xlators/features/changelog/src/changelog-rt.c67
-rw-r--r--xlators/features/changelog/src/changelog-rt.h33
-rw-r--r--xlators/features/changelog/src/changelog.c2988
-rw-r--r--xlators/features/changetimerecorder/Makefile.am3
-rw-r--r--xlators/features/changetimerecorder/src/Makefile.am23
-rw-r--r--xlators/features/changetimerecorder/src/changetimerecorder.c2308
-rw-r--r--xlators/features/changetimerecorder/src/changetimerecorder.h21
-rw-r--r--xlators/features/changetimerecorder/src/ctr-helper.c308
-rw-r--r--xlators/features/changetimerecorder/src/ctr-helper.h923
-rw-r--r--xlators/features/changetimerecorder/src/ctr-xlator-ctx.c409
-rw-r--r--xlators/features/changetimerecorder/src/ctr-xlator-ctx.h90
-rw-r--r--xlators/features/changetimerecorder/src/ctr_mem_types.h24
-rw-r--r--xlators/features/compress/Makefile.am3
-rw-r--r--xlators/features/compress/src/Makefile.am17
-rw-r--r--xlators/features/compress/src/cdc-helper.c543
-rw-r--r--xlators/features/compress/src/cdc-mem-types.h23
-rw-r--r--xlators/features/compress/src/cdc.c356
-rw-r--r--xlators/features/compress/src/cdc.h107
-rw-r--r--xlators/features/filter/src/Makefile.am7
-rw-r--r--xlators/features/filter/src/filter-mem-types.h20
-rw-r--r--xlators/features/filter/src/filter.c29
-rw-r--r--xlators/features/ganesha/Makefile.am3
-rw-r--r--xlators/features/ganesha/src/Makefile.am18
-rw-r--r--xlators/features/ganesha/src/ganesha-mem-types.h21
-rw-r--r--xlators/features/ganesha/src/ganesha.c90
-rw-r--r--xlators/features/ganesha/src/ganesha.h18
-rw-r--r--xlators/features/gfid-access/Makefile.am (renamed from xlators/bindings/python/Makefile.am)0
-rw-r--r--xlators/features/gfid-access/src/Makefile.am15
-rw-r--r--xlators/features/gfid-access/src/gfid-access-mem-types.h23
-rw-r--r--xlators/features/gfid-access/src/gfid-access.c1428
-rw-r--r--xlators/features/gfid-access/src/gfid-access.h107
-rw-r--r--xlators/features/glupy/Makefile.am3
-rw-r--r--xlators/features/glupy/doc/README.md44
-rw-r--r--xlators/features/glupy/doc/TESTING9
-rw-r--r--xlators/features/glupy/doc/test.vol10
-rw-r--r--xlators/features/glupy/examples/Makefile.am5
-rw-r--r--xlators/features/glupy/examples/debug-trace.py775
-rw-r--r--xlators/features/glupy/examples/helloworld.py19
-rw-r--r--xlators/features/glupy/examples/negative.py91
-rw-r--r--xlators/features/glupy/src/Makefile.am29
-rw-r--r--xlators/features/glupy/src/__init__.py.in2
-rw-r--r--xlators/features/glupy/src/glupy.c2496
-rw-r--r--xlators/features/glupy/src/glupy.h56
-rw-r--r--xlators/features/glupy/src/glupy.sym101
-rw-r--r--xlators/features/glupy/src/glupy/Makefile.am5
-rw-r--r--xlators/features/glupy/src/glupy/__init__.py852
-rw-r--r--xlators/features/glupy/src/setup.py.in24
-rw-r--r--xlators/features/index/Makefile.am3
-rw-r--r--xlators/features/index/src/Makefile.am17
-rw-r--r--xlators/features/index/src/index-mem-types.h23
-rw-r--r--xlators/features/index/src/index-messages.h121
-rw-r--r--xlators/features/index/src/index.c2558
-rw-r--r--xlators/features/index/src/index.h86
-rw-r--r--xlators/features/leases/Makefile.am3
-rw-r--r--xlators/features/leases/src/Makefile.am17
-rw-r--r--xlators/features/leases/src/leases-internal.c1351
-rw-r--r--xlators/features/leases/src/leases-mem-types.h28
-rw-r--r--xlators/features/leases/src/leases-messages.h129
-rw-r--r--xlators/features/leases/src/leases.c1168
-rw-r--r--xlators/features/leases/src/leases.h252
-rw-r--r--xlators/features/locks/src/Makefile.am17
-rw-r--r--xlators/features/locks/src/clear.c422
-rw-r--r--xlators/features/locks/src/clear.h71
-rw-r--r--xlators/features/locks/src/common.c673
-rw-r--r--xlators/features/locks/src/common.h106
-rw-r--r--xlators/features/locks/src/entrylk.c1017
-rw-r--r--xlators/features/locks/src/inodelk.c1038
-rw-r--r--xlators/features/locks/src/locks-mem-types.h23
-rw-r--r--xlators/features/locks/src/locks.h165
-rw-r--r--xlators/features/locks/src/pl-messages.h64
-rw-r--r--xlators/features/locks/src/posix.c3658
-rw-r--r--xlators/features/locks/src/reservelk.c438
-rw-r--r--xlators/features/locks/tests/unit-test.c27
-rw-r--r--xlators/features/mac-compat/src/Makefile.am10
-rw-r--r--xlators/features/mac-compat/src/mac-compat.c291
-rw-r--r--xlators/features/mac-compat/src/mac-compat.h41
-rw-r--r--xlators/features/marker/Makefile.am3
-rw-r--r--xlators/features/marker/src/Makefile.am17
-rw-r--r--xlators/features/marker/src/marker-common.c65
-rw-r--r--xlators/features/marker/src/marker-common.h22
-rw-r--r--xlators/features/marker/src/marker-mem-types.h27
-rw-r--r--xlators/features/marker/src/marker-quota-helper.c481
-rw-r--r--xlators/features/marker/src/marker-quota-helper.h81
-rw-r--r--xlators/features/marker/src/marker-quota.c2189
-rw-r--r--xlators/features/marker/src/marker-quota.h156
-rw-r--r--xlators/features/marker/src/marker.c3520
-rw-r--r--xlators/features/marker/src/marker.h149
-rw-r--r--xlators/features/path-convertor/src/Makefile.am7
-rw-r--r--xlators/features/path-convertor/src/path-mem-types.h20
-rw-r--r--xlators/features/path-convertor/src/path.c36
-rw-r--r--xlators/features/protect/Makefile.am (renamed from xlators/protocol/legacy/lib/Makefile.am)0
-rw-r--r--xlators/features/protect/src/Makefile.am21
-rw-r--r--xlators/features/protect/src/prot_client.c213
-rw-r--r--xlators/features/protect/src/prot_dht.c163
-rw-r--r--xlators/features/protect/src/prot_server.c46
-rw-r--r--xlators/features/quiesce/Makefile.am3
-rw-r--r--xlators/features/quiesce/src/Makefile.am15
-rw-r--r--xlators/features/quiesce/src/quiesce-mem-types.h20
-rw-r--r--xlators/features/quiesce/src/quiesce.c2605
-rw-r--r--xlators/features/quiesce/src/quiesce.h51
-rw-r--r--xlators/features/quota/src/Makefile.am24
-rw-r--r--xlators/features/quota/src/quota-enforcer-client.c491
-rw-r--r--xlators/features/quota/src/quota-mem-types.h33
-rw-r--r--xlators/features/quota/src/quota-messages.h247
-rw-r--r--xlators/features/quota/src/quota.c5801
-rw-r--r--xlators/features/quota/src/quota.h282
-rw-r--r--xlators/features/quota/src/quotad-aggregator.c456
-rw-r--r--xlators/features/quota/src/quotad-aggregator.h37
-rw-r--r--xlators/features/quota/src/quotad-helpers.c107
-rw-r--r--xlators/features/quota/src/quotad-helpers.h24
-rw-r--r--xlators/features/quota/src/quotad.c242
-rw-r--r--xlators/features/quota/src/quotad.sym7
-rw-r--r--xlators/features/read-only/src/Makefile.am19
-rw-r--r--xlators/features/read-only/src/read-only-common.c417
-rw-r--r--xlators/features/read-only/src/read-only-common.h113
-rw-r--r--xlators/features/read-only/src/read-only-mem-types.h20
-rw-r--r--xlators/features/read-only/src/read-only.c301
-rw-r--r--xlators/features/read-only/src/read-only.h37
-rw-r--r--xlators/features/read-only/src/worm-helper.c413
-rw-r--r--xlators/features/read-only/src/worm-helper.h37
-rw-r--r--xlators/features/read-only/src/worm.c606
-rw-r--r--xlators/features/shard/Makefile.am3
-rw-r--r--xlators/features/shard/src/Makefile.am16
-rw-r--r--xlators/features/shard/src/shard-mem-types.h23
-rw-r--r--xlators/features/shard/src/shard-messages.h184
-rw-r--r--xlators/features/shard/src/shard.c4925
-rw-r--r--xlators/features/shard/src/shard.h272
-rw-r--r--xlators/features/snapview-client/Makefile.am (renamed from xlators/performance/stat-prefetch/Makefile.am)0
-rw-r--r--xlators/features/snapview-client/src/Makefile.am15
-rw-r--r--xlators/features/snapview-client/src/snapview-client-mem-types.h24
-rw-r--r--xlators/features/snapview-client/src/snapview-client.c2454
-rw-r--r--xlators/features/snapview-client/src/snapview-client.h97
-rw-r--r--xlators/features/snapview-server/Makefile.am (renamed from xlators/protocol/lib/Makefile.am)0
-rw-r--r--xlators/features/snapview-server/src/Makefile.am22
-rw-r--r--xlators/features/snapview-server/src/snapview-server-helpers.c598
-rw-r--r--xlators/features/snapview-server/src/snapview-server-mem-types.h26
-rw-r--r--xlators/features/snapview-server/src/snapview-server-mgmt.c476
-rw-r--r--xlators/features/snapview-server/src/snapview-server.c2350
-rw-r--r--xlators/features/snapview-server/src/snapview-server.h240
-rw-r--r--xlators/features/trash/src/Makefile.am9
-rw-r--r--xlators/features/trash/src/trash-mem-types.h26
-rw-r--r--xlators/features/trash/src/trash.c2811
-rw-r--r--xlators/features/trash/src/trash.h89
-rw-r--r--xlators/features/upcall/Makefile.am3
-rw-r--r--xlators/features/upcall/src/Makefile.am21
-rw-r--r--xlators/features/upcall/src/upcall-cache-invalidation.h22
-rw-r--r--xlators/features/upcall/src/upcall-internal.c662
-rw-r--r--xlators/features/upcall/src/upcall-mem-types.h24
-rw-r--r--xlators/features/upcall/src/upcall-messages.h59
-rw-r--r--xlators/features/upcall/src/upcall.c2315
-rw-r--r--xlators/features/upcall/src/upcall.h135
-rw-r--r--xlators/lib/src/libxlator.c512
-rw-r--r--xlators/lib/src/libxlator.h149
-rw-r--r--xlators/meta/src/Makefile.am47
-rw-r--r--xlators/meta/src/active-link.c39
-rw-r--r--xlators/meta/src/cmdline-file.c43
-rw-r--r--xlators/meta/src/frames-file.c117
-rw-r--r--xlators/meta/src/graph-dir.c101
-rw-r--r--xlators/meta/src/graphs-dir.c74
-rw-r--r--xlators/meta/src/history-file.c47
-rw-r--r--xlators/meta/src/logfile-link.c39
-rw-r--r--xlators/meta/src/logging-dir.c46
-rw-r--r--xlators/meta/src/loglevel-file.c54
-rw-r--r--xlators/meta/src/mallinfo-file.c39
-rw-r--r--xlators/meta/src/measure-file.c52
-rw-r--r--xlators/meta/src/meminfo-file.c47
-rw-r--r--xlators/meta/src/meta-defaults.c636
-rw-r--r--xlators/meta/src/meta-helpers.c350
-rw-r--r--xlators/meta/src/meta-hooks.h46
-rw-r--r--xlators/meta/src/meta-mem-types.h31
-rw-r--r--xlators/meta/src/meta.c1336
-rw-r--r--xlators/meta/src/meta.h143
-rw-r--r--xlators/meta/src/misc.c67
-rw-r--r--xlators/meta/src/misc.h31
-rw-r--r--xlators/meta/src/name-file.c48
-rw-r--r--xlators/meta/src/option-file.c51
-rw-r--r--xlators/meta/src/options-dir.c71
-rw-r--r--xlators/meta/src/private-file.c47
-rw-r--r--xlators/meta/src/process_uuid-file.c41
-rw-r--r--xlators/meta/src/profile-file.c47
-rw-r--r--xlators/meta/src/root-dir.c74
-rw-r--r--xlators/meta/src/subvolume-link.c61
-rw-r--r--xlators/meta/src/subvolumes-dir.c67
-rw-r--r--xlators/meta/src/top-link.c45
-rw-r--r--xlators/meta/src/tree.c179
-rw-r--r--xlators/meta/src/tree.h35
-rw-r--r--xlators/meta/src/type-file.c48
-rw-r--r--xlators/meta/src/version-file.c42
-rw-r--r--xlators/meta/src/view-dir.c40
-rw-r--r--xlators/meta/src/view.c258
-rw-r--r--xlators/meta/src/view.h32
-rw-r--r--xlators/meta/src/volfile-file.c86
-rw-r--r--xlators/meta/src/xlator-dir.c95
-rw-r--r--xlators/mgmt/Makefile.am3
-rw-r--r--xlators/mgmt/glusterd/Makefile.am (renamed from xlators/protocol/legacy/server/Makefile.am)0
-rw-r--r--xlators/mgmt/glusterd/src/Makefile.am67
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-bitd-svc.c207
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-bitd-svc.h40
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-bitrot.c709
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c3025
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-conn-helper.c21
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-conn-helper.h21
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c136
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h51
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-errno.h32
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-ganesha.c882
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-geo-rep.c6521
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-geo-rep.h49
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c5405
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handshake.c2289
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-hooks.c598
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-hooks.h84
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-locks.c714
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-locks.h47
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-log-ops.c285
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mem-types.h77
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-messages.h4679
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c1015
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.c2411
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.h77
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mountbroker.c698
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mountbroker.h42
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-nfs-svc.c201
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-nfs-svc.h25
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c7877
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.h302
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-peer-utils.c1058
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-peer-utils.h93
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.c467
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.h47
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c135
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-proc-mgmt.h44
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-quota.c2107
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-quotad-svc.c222
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-quotad-svc.h31
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rcu.h36
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rebalance.c1134
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-replace-brick.c905
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rpc-ops.c2452
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-scrub-svc.c207
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-scrub-svc.h45
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-server-quorum.c421
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-server-quorum.h46
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-shd-svc.c250
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-shd-svc.h30
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.c1500
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.h222
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapd-svc-helper.c63
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapd-svc-helper.h32
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapd-svc.c439
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapd-svc.h42
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c4093
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot-utils.h166
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot.c9990
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-statedump.c247
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-statedump.h18
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.c4657
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.h195
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-svc-helper.c251
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-svc-helper.h36
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c338
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h74
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.c1978
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.h83
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c11510
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.h718
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c6587
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.h295
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-ops.c3200
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c3026
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.c2019
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h1193
-rw-r--r--xlators/mount/fuse/src/Makefile.am36
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.c4927
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.h428
-rw-r--r--xlators/mount/fuse/src/fuse-helpers.c679
-rw-r--r--xlators/mount/fuse/src/fuse-mem-types.h24
-rw-r--r--xlators/mount/fuse/src/fuse-resolve.c721
-rw-r--r--xlators/mount/fuse/utils/Makefile.am9
-rwxr-xr-xxlators/mount/fuse/utils/mount.glusterfs.in769
-rwxr-xr-xxlators/mount/fuse/utils/mount_glusterfs.in620
-rw-r--r--xlators/nfs/Makefile.am2
-rw-r--r--xlators/nfs/lib/src/Makefile.am11
-rw-r--r--xlators/nfs/lib/src/auth-null.c71
-rw-r--r--xlators/nfs/lib/src/auth-unix.c91
-rw-r--r--xlators/nfs/lib/src/msg-nfs3.c536
-rw-r--r--xlators/nfs/lib/src/msg-nfs3.h186
-rw-r--r--xlators/nfs/lib/src/rpc-socket.c358
-rw-r--r--xlators/nfs/lib/src/rpc-socket.h65
-rw-r--r--xlators/nfs/lib/src/rpcsvc-auth.c391
-rw-r--r--xlators/nfs/lib/src/rpcsvc.c2770
-rw-r--r--xlators/nfs/lib/src/rpcsvc.h721
-rw-r--r--xlators/nfs/lib/src/xdr-common.h48
-rw-r--r--xlators/nfs/lib/src/xdr-nfs3.c1898
-rw-r--r--xlators/nfs/lib/src/xdr-nfs3.h1205
-rw-r--r--xlators/nfs/lib/src/xdr-rpc.c229
-rw-r--r--xlators/nfs/lib/src/xdr-rpc.h82
-rw-r--r--xlators/nfs/server/src/Makefile.am31
-rw-r--r--xlators/nfs/server/src/acl3.c954
-rw-r--r--xlators/nfs/server/src/acl3.h42
-rw-r--r--xlators/nfs/server/src/auth-cache.c489
-rw-r--r--xlators/nfs/server/src/auth-cache.h54
-rw-r--r--xlators/nfs/server/src/exports.c1472
-rw-r--r--xlators/nfs/server/src/exports.h92
-rw-r--r--xlators/nfs/server/src/mount3-auth.c644
-rw-r--r--xlators/nfs/server/src/mount3-auth.h59
-rw-r--r--xlators/nfs/server/src/mount3.c3703
-rw-r--r--xlators/nfs/server/src/mount3.h147
-rw-r--r--xlators/nfs/server/src/mount3udp_svc.c234
-rw-r--r--xlators/nfs/server/src/netgroups.c1160
-rw-r--r--xlators/nfs/server/src/netgroups.h54
-rw-r--r--xlators/nfs/server/src/nfs-common.c316
-rw-r--r--xlators/nfs/server/src/nfs-common.h53
-rw-r--r--xlators/nfs/server/src/nfs-fops.c959
-rw-r--r--xlators/nfs/server/src/nfs-fops.h61
-rw-r--r--xlators/nfs/server/src/nfs-generics.c69
-rw-r--r--xlators/nfs/server/src/nfs-generics.h45
-rw-r--r--xlators/nfs/server/src/nfs-inodes.c161
-rw-r--r--xlators/nfs/server/src/nfs-inodes.h28
-rw-r--r--xlators/nfs/server/src/nfs-mem-types.h38
-rw-r--r--xlators/nfs/server/src/nfs-messages.h1669
-rw-r--r--xlators/nfs/server/src/nfs.c1710
-rw-r--r--xlators/nfs/server/src/nfs.h96
-rw-r--r--xlators/nfs/server/src/nfs3-fh.c235
-rw-r--r--xlators/nfs/server/src/nfs3-fh.h97
-rw-r--r--xlators/nfs/server/src/nfs3-helpers.c3379
-rw-r--r--xlators/nfs/server/src/nfs3-helpers.h122
-rw-r--r--xlators/nfs/server/src/nfs3.c2702
-rw-r--r--xlators/nfs/server/src/nfs3.h158
-rw-r--r--xlators/nfs/server/src/nfsserver.sym20
-rw-r--r--xlators/nfs/server/src/nlm4.c2621
-rw-r--r--xlators/nfs/server/src/nlm4.h111
-rw-r--r--xlators/nfs/server/src/nlmcbk_svc.c126
-rw-r--r--xlators/performance/Makefile.am2
-rw-r--r--xlators/performance/decompounder/Makefile.am1
-rw-r--r--xlators/performance/decompounder/src/Makefile.am16
-rw-r--r--xlators/performance/decompounder/src/decompounder-mem-types.h20
-rw-r--r--xlators/performance/decompounder/src/decompounder-messages.h27
-rw-r--r--xlators/performance/decompounder/src/decompounder.c952
-rw-r--r--xlators/performance/decompounder/src/decompounder.h74
-rw-r--r--xlators/performance/io-cache/src/Makefile.am10
-rw-r--r--xlators/performance/io-cache/src/io-cache-messages.h137
-rw-r--r--xlators/performance/io-cache/src/io-cache.c2261
-rw-r--r--xlators/performance/io-cache/src/io-cache.h313
-rw-r--r--xlators/performance/io-cache/src/ioc-inode.c304
-rw-r--r--xlators/performance/io-cache/src/ioc-mem-types.h23
-rw-r--r--xlators/performance/io-cache/src/page.c1303
-rw-r--r--xlators/performance/io-threads/src/Makefile.am9
-rw-r--r--xlators/performance/io-threads/src/io-threads-messages.h103
-rw-r--r--xlators/performance/io-threads/src/io-threads.c2359
-rw-r--r--xlators/performance/io-threads/src/io-threads.h55
-rw-r--r--xlators/performance/io-threads/src/iot-mem-types.h22
-rw-r--r--xlators/performance/md-cache/Makefile.am1
-rw-r--r--xlators/performance/md-cache/src/Makefile.am28
-rw-r--r--xlators/performance/md-cache/src/md-cache-mem-types.h24
-rw-r--r--xlators/performance/md-cache/src/md-cache-messages.h74
-rw-r--r--xlators/performance/md-cache/src/md-cache.c2665
-rw-r--r--xlators/performance/open-behind/Makefile.am1
-rw-r--r--xlators/performance/open-behind/src/Makefile.am15
-rw-r--r--xlators/performance/open-behind/src/open-behind-mem-types.h21
-rw-r--r--xlators/performance/open-behind/src/open-behind-messages.h85
-rw-r--r--xlators/performance/open-behind/src/open-behind.c1026
-rw-r--r--xlators/performance/quick-read/src/Makefile.am9
-rw-r--r--xlators/performance/quick-read/src/quick-read-mem-types.h26
-rw-r--r--xlators/performance/quick-read/src/quick-read-messages.h128
-rw-r--r--xlators/performance/quick-read/src/quick-read.c2919
-rw-r--r--xlators/performance/quick-read/src/quick-read.h68
-rw-r--r--xlators/performance/read-ahead/src/Makefile.am9
-rw-r--r--xlators/performance/read-ahead/src/page.c699
-rw-r--r--xlators/performance/read-ahead/src/read-ahead-mem-types.h23
-rw-r--r--xlators/performance/read-ahead/src/read-ahead-messages.h111
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.c1594
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.h152
-rw-r--r--xlators/performance/readdir-ahead/Makefile.am3
-rw-r--r--xlators/performance/readdir-ahead/src/Makefile.am15
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h24
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead-messages.h105
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.c682
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.h48
-rw-r--r--xlators/performance/stat-prefetch/src/Makefile.am14
-rw-r--r--xlators/performance/stat-prefetch/src/stat-prefetch-mem-types.h36
-rw-r--r--xlators/performance/stat-prefetch/src/stat-prefetch.c3779
-rw-r--r--xlators/performance/stat-prefetch/src/stat-prefetch.h105
-rw-r--r--xlators/performance/symlink-cache/src/Makefile.am9
-rw-r--r--xlators/performance/symlink-cache/src/symlink-cache-messages.h93
-rw-r--r--xlators/performance/symlink-cache/src/symlink-cache.c135
-rw-r--r--xlators/performance/write-behind/src/Makefile.am9
-rw-r--r--xlators/performance/write-behind/src/write-behind-mem-types.h23
-rw-r--r--xlators/performance/write-behind/src/write-behind-messages.h121
-rw-r--r--xlators/performance/write-behind/src/write-behind.c4122
-rw-r--r--xlators/playground/Makefile.am2
-rw-r--r--xlators/playground/template/Makefile.am2
-rw-r--r--xlators/playground/template/src/Makefile.am16
-rw-r--r--xlators/playground/template/src/template.c44
-rw-r--r--xlators/playground/template/src/template.h19
-rw-r--r--xlators/protocol/Makefile.am2
-rw-r--r--xlators/protocol/auth/addr/src/Makefile.am14
-rw-r--r--xlators/protocol/auth/addr/src/addr.c410
-rw-r--r--xlators/protocol/auth/login/src/Makefile.am11
-rw-r--r--xlators/protocol/auth/login/src/login.c250
-rw-r--r--xlators/protocol/client/src/Makefile.am18
-rw-r--r--xlators/protocol/client/src/client-callback.c195
-rw-r--r--xlators/protocol/client/src/client-common.c2162
-rw-r--r--xlators/protocol/client/src/client-common.h403
-rw-r--r--xlators/protocol/client/src/client-handshake.c1731
-rw-r--r--xlators/protocol/client/src/client-helpers.c1739
-rw-r--r--xlators/protocol/client/src/client-lk.c576
-rw-r--r--xlators/protocol/client/src/client-mem-types.h27
-rw-r--r--xlators/protocol/client/src/client-messages.h651
-rw-r--r--xlators/protocol/client/src/client-rpc-fops.c6473
-rw-r--r--xlators/protocol/client/src/client.c2031
-rw-r--r--xlators/protocol/client/src/client.h328
-rw-r--r--xlators/protocol/client/src/client3_1-fops.c4826
-rw-r--r--xlators/protocol/legacy/Makefile.am3
-rw-r--r--xlators/protocol/legacy/client/src/Makefile.am21
-rw-r--r--xlators/protocol/legacy/client/src/client-mem-types.h43
-rw-r--r--xlators/protocol/legacy/client/src/client-protocol.c6739
-rw-r--r--xlators/protocol/legacy/client/src/client-protocol.h178
-rw-r--r--xlators/protocol/legacy/client/src/saved-frames.c194
-rw-r--r--xlators/protocol/legacy/client/src/saved-frames.h79
-rw-r--r--xlators/protocol/legacy/lib/src/Makefile.am14
-rw-r--r--xlators/protocol/legacy/lib/src/protocol.c108
-rw-r--r--xlators/protocol/legacy/lib/src/protocol.h1119
-rw-r--r--xlators/protocol/legacy/lib/src/transport.c422
-rw-r--r--xlators/protocol/legacy/lib/src/transport.h106
-rw-r--r--xlators/protocol/legacy/server/src/Makefile.am24
-rw-r--r--xlators/protocol/legacy/server/src/server-helpers.c626
-rw-r--r--xlators/protocol/legacy/server/src/server-helpers.h48
-rw-r--r--xlators/protocol/legacy/server/src/server-mem-types.h39
-rw-r--r--xlators/protocol/legacy/server/src/server-protocol.c6629
-rw-r--r--xlators/protocol/legacy/server/src/server-protocol.h190
-rw-r--r--xlators/protocol/legacy/server/src/server-resolve.c660
-rw-r--r--xlators/protocol/legacy/transport/Makefile.am3
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/Makefile.am19
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs-mem-types.h39
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs.c2617
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs.h220
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/name.c712
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/name.h47
-rw-r--r--xlators/protocol/legacy/transport/socket/Makefile.am1
-rw-r--r--xlators/protocol/legacy/transport/socket/src/Makefile.am19
-rw-r--r--xlators/protocol/legacy/transport/socket/src/name.c740
-rw-r--r--xlators/protocol/legacy/transport/socket/src/name.h44
-rw-r--r--xlators/protocol/legacy/transport/socket/src/socket-mem-types.h36
-rw-r--r--xlators/protocol/legacy/transport/socket/src/socket.c1622
-rw-r--r--xlators/protocol/legacy/transport/socket/src/socket.h129
-rw-r--r--xlators/protocol/lib/src/Makefile.am14
-rw-r--r--xlators/protocol/lib/src/authenticate.c249
-rw-r--r--xlators/protocol/lib/src/authenticate.h60
-rw-r--r--xlators/protocol/lib/src/glusterfs-xdr.c1798
-rw-r--r--xlators/protocol/lib/src/glusterfs-xdr.h1336
-rw-r--r--xlators/protocol/lib/src/glusterfs3.x779
-rw-r--r--xlators/protocol/lib/src/msg-xdr.c1264
-rw-r--r--xlators/protocol/lib/src/msg-xdr.h538
-rw-r--r--xlators/protocol/lib/src/protocol-common.h95
-rw-r--r--xlators/protocol/server/src/Makefile.am27
-rw-r--r--xlators/protocol/server/src/authenticate.c237
-rw-r--r--xlators/protocol/server/src/authenticate.h46
-rw-r--r--xlators/protocol/server/src/server-common.c472
-rw-r--r--xlators/protocol/server/src/server-common.h132
-rw-r--r--xlators/protocol/server/src/server-handshake.c682
-rw-r--r--xlators/protocol/server/src/server-helpers.c4008
-rw-r--r--xlators/protocol/server/src/server-helpers.h83
-rw-r--r--xlators/protocol/server/src/server-mem-types.h25
-rw-r--r--xlators/protocol/server/src/server-messages.h855
-rw-r--r--xlators/protocol/server/src/server-resolve.c551
-rw-r--r--xlators/protocol/server/src/server-rpc-fops.c6835
-rw-r--r--xlators/protocol/server/src/server.c1517
-rw-r--r--xlators/protocol/server/src/server.h214
-rw-r--r--xlators/protocol/server/src/server3_1-fops.c4875
-rw-r--r--xlators/storage/Makefile.am6
-rw-r--r--xlators/storage/bd/Makefile.am3
-rw-r--r--xlators/storage/bd/src/Makefile.am20
-rw-r--r--xlators/storage/bd/src/bd-aio.c523
-rw-r--r--xlators/storage/bd/src/bd-aio.h36
-rw-r--r--xlators/storage/bd/src/bd-helper.c1020
-rw-r--r--xlators/storage/bd/src/bd-mem-types.h27
-rw-r--r--xlators/storage/bd/src/bd.c2448
-rw-r--r--xlators/storage/bd/src/bd.h168
-rw-r--r--xlators/storage/bdb/src/Makefile.am18
-rw-r--r--xlators/storage/bdb/src/bctx.c341
-rw-r--r--xlators/storage/bdb/src/bdb-ll.c1464
-rw-r--r--xlators/storage/bdb/src/bdb-mem-types.h42
-rw-r--r--xlators/storage/bdb/src/bdb.c3603
-rw-r--r--xlators/storage/bdb/src/bdb.h530
-rw-r--r--xlators/storage/posix/src/Makefile.am19
-rw-r--r--xlators/storage/posix/src/posix-aio.c568
-rw-r--r--xlators/storage/posix/src/posix-aio.h34
-rw-r--r--xlators/storage/posix/src/posix-handle.c997
-rw-r--r--xlators/storage/posix/src/posix-handle.h288
-rw-r--r--xlators/storage/posix/src/posix-helpers.c2224
-rw-r--r--xlators/storage/posix/src/posix-mem-types.h22
-rw-r--r--xlators/storage/posix/src/posix-messages.h951
-rw-r--r--xlators/storage/posix/src/posix.c7424
-rw-r--r--xlators/storage/posix/src/posix.h219
-rw-r--r--xlators/system/Makefile.am1
-rw-r--r--xlators/system/posix-acl/Makefile.am1
-rw-r--r--xlators/system/posix-acl/src/Makefile.am26
-rw-r--r--xlators/system/posix-acl/src/posix-acl-mem-types.h24
-rw-r--r--xlators/system/posix-acl/src/posix-acl-xattr.c180
-rw-r--r--xlators/system/posix-acl/src/posix-acl-xattr.h26
-rw-r--r--xlators/system/posix-acl/src/posix-acl.c2406
-rw-r--r--xlators/system/posix-acl/src/posix-acl.h30
-rw-r--r--xlators/xlator.sym9
765 files changed, 385252 insertions, 117016 deletions
diff --git a/xlators/Makefile.am b/xlators/Makefile.am
index 8ca471f9445..ea1be844ef4 100644
--- a/xlators/Makefile.am
+++ b/xlators/Makefile.am
@@ -1,3 +1,10 @@
-SUBDIRS = cluster storage protocol performance debug features encryption mount nfs
+if ENABLE_EXPERIMENTAL
+ EXPERIMENTAL = experimental
+endif
-CLEANFILES =
+SUBDIRS = cluster storage protocol performance debug features encryption mount nfs mgmt system \
+ playground meta $(EXPERIMENTAL)
+
+EXTRA_DIST = xlator.sym
+
+CLEANFILES =
diff --git a/xlators/bindings/Makefile.am b/xlators/bindings/Makefile.am
deleted file mode 100644
index f7766580257..00000000000
--- a/xlators/bindings/Makefile.am
+++ /dev/null
@@ -1 +0,0 @@
-SUBDIRS = $(BINDINGS_SUBDIRS)
diff --git a/xlators/bindings/python/src/Makefile.am b/xlators/bindings/python/src/Makefile.am
deleted file mode 100644
index c0b9141c667..00000000000
--- a/xlators/bindings/python/src/Makefile.am
+++ /dev/null
@@ -1,19 +0,0 @@
-
-xlator_PROGRAMS = python.so
-
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/bindings
-
-python_PYTHON = gluster.py glustertypes.py glusterstack.py
-
-pythondir = $(xlatordir)/python
-
-python_so_SOURCES = python.c
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles \
- $(PYTHON_CPPLAGS) -DGLUSTER_PYTHON_PATH=\"$(pythondir)\"
-
-AM_LDFLAGS = $(PYTHON_LDFLAGS)
-
-CLEANFILES =
-
diff --git a/xlators/bindings/python/src/gluster.py b/xlators/bindings/python/src/gluster.py
deleted file mode 100644
index ee0eb131011..00000000000
--- a/xlators/bindings/python/src/gluster.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
-from ctypes import *
-from glustertypes import *
-from glusterstack import *
-import sys
-import inspect
-
-libglusterfs = CDLL("libglusterfs.so")
-_gf_log = libglusterfs._gf_log
-_gf_log.restype = c_int32
-_gf_log.argtypes = [c_char_p, c_char_p, c_char_p, c_int32, c_int, c_char_p]
-
-gf_log_loglevel = c_int.in_dll(libglusterfs, "gf_log_loglevel")
-
-GF_LOG_NONE = 0
-GF_LOG_CRITICAL = 1
-GF_LOG_ERROR = 2
-GF_LOG_WARNING = 3
-GF_LOG_DEBUG = 4
-
-def gf_log(module, level, fmt, *params):
- if level <= gf_log_loglevel:
- frame = sys._getframe(1)
- _gf_log(module, frame.f_code.co_filename, frame.f_code.co_name,
- frame.f_lineno, level, fmt, *params)
-
-class ComplexTranslator(object):
- def __init__(self, xlator):
- self.xlator = xlator_t.from_address(xlator)
-
- def __getattr__(self, item):
- return getattr(self.xlator, item)
diff --git a/xlators/bindings/python/src/glusterstack.py b/xlators/bindings/python/src/glusterstack.py
deleted file mode 100644
index ba24c81652e..00000000000
--- a/xlators/bindings/python/src/glusterstack.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
-from ctypes import *
-from glustertypes import *
-
-libc = CDLL("libc.so.6")
-calloc = libc.calloc
-calloc.argtypes = [c_int, c_int]
-calloc.restype = c_void_p
-
-# TODO: Can these be done in C somehow?
-def stack_wind(frame, rfn, obj, fn, *params):
- """Frame is a frame object"""
- _new = cast(calloc(1, sizeof(call_frame_t)), POINTER(call_frame_t))
- _new[0].root = frame.root
- _new[0].next = frame.root[0].frames.next
- _new[0].prev = pointer(frame.root[0].frames)
- if frame.root[0].frames.next:
- frame.root[0].frames.next[0].prev = _new
- frame.root[0].frames.next = _new
- _new[0].this = obj
- # TODO: Type checking like tmp_cbk?
- _new[0].ret = rfn
- _new[0].parent = pointer(frame)
- _new[0].cookie = cast(_new, c_void_p)
- # TODO: Initialize lock
- #_new.lock.init()
- frame.ref_count += 1
- fn(_new, obj, *params)
-
-def stack_unwind(frame, *params):
- """Frame is a frame object"""
- fn = frame[0].ret
- parent = frame[0].parent[0]
- parent.ref_count -= 1
-
- op_ret = params[0]
- op_err = params[1]
- params = params[2:]
- fn(parent, call_frame_t.from_address(frame[0].cookie), parent.this,
- op_ret, op_err, *params)
diff --git a/xlators/bindings/python/src/glustertypes.py b/xlators/bindings/python/src/glustertypes.py
deleted file mode 100644
index e9069d07c72..00000000000
--- a/xlators/bindings/python/src/glustertypes.py
+++ /dev/null
@@ -1,167 +0,0 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
-from ctypes import *
-import collections
-
-#
-# Forward declaration of some gluster types
-#
-class call_frame_t(Structure):
- pass
-
-class call_ctx_t(Structure):
- pass
-
-class call_pool_t(Structure):
- pass
-
-class xlator_t(Structure):
- def _getFirstChild(self):
- return self.children[0].xlator
- firstChild = property(_getFirstChild)
-
-class xlator_list_t(Structure):
- pass
-
-class xlator_fops(Structure):
- pass
-
-class xlator_mops(Structure):
- pass
-
-class glusterfs_ctx_t(Structure):
- pass
-
-class list_head(Structure):
- pass
-
-class dict_t(Structure):
- pass
-
-class inode_table_t(Structure):
- pass
-
-class fd_t(Structure):
- pass
-
-class iovec(Structure):
- _fields_ = [
- ("iov_base", c_void_p),
- ("iov_len", c_size_t),
- ]
-
- def __init__(self, s):
- self.iov_base = cast(c_char_p(s), c_void_p)
- self.iov_len = len(s)
-
- def getBytes(self):
- return string_at(self.iov_base, self.iov_len)
-
-# This is a pthread_spinlock_t
-# TODO: what happens to volatile-ness?
-gf_lock_t = c_int
-
-uid_t = c_uint32
-gid_t = c_uint32
-pid_t = c_int32
-
-off_t = c_int64
-
-#
-# Function pointer types
-#
-ret_fn_t = CFUNCTYPE(c_int32, POINTER(call_frame_t), POINTER(call_frame_t),
- POINTER(xlator_t), c_int32, c_int32)
-
-fini_fn_t = CFUNCTYPE(None, POINTER(xlator_t))
-init_fn_t = CFUNCTYPE(c_int32, POINTER(xlator_t))
-event_notify_fn_t = CFUNCTYPE(c_int32, POINTER(xlator_t), c_int32, c_void_p)
-
-list_head._fields_ = [
- ("next", POINTER(list_head)),
- ("prev", POINTER(list_head)),
- ]
-
-call_frame_t._fields_ = [
- ("root", POINTER(call_ctx_t)),
- ("parent", POINTER(call_frame_t)),
- ("next", POINTER(call_frame_t)),
- ("prev", POINTER(call_frame_t)),
- ("local", c_void_p),
- ("this", POINTER(xlator_t)),
- ("ret", ret_fn_t),
- ("ref_count", c_int32),
- ("lock", gf_lock_t),
- ("cookie", c_void_p),
- ("op", c_int32),
- ("type", c_int8),
- ]
-
-call_ctx_t._fields_ = [
- ("all_frames", list_head),
- ("trans", c_void_p),
- ("pool", call_pool_t),
- ("unique", c_uint64),
- ("state", c_void_p),
- ("uid", uid_t),
- ("gid", gid_t),
- ("pid", pid_t),
- ("frames", call_frame_t),
- ("req_refs", POINTER(dict_t)),
- ("rsp_refs", POINTER(dict_t)),
- ]
-
-xlator_t._fields_ = [
- ("name", c_char_p),
- ("type", c_char_p),
- ("next", POINTER(xlator_t)),
- ("prev", POINTER(xlator_t)),
- ("parent", POINTER(xlator_t)),
- ("children", POINTER(xlator_list_t)),
- ("fops", POINTER(xlator_fops)),
- ("mops", POINTER(xlator_mops)),
- ("fini", fini_fn_t),
- ("init", init_fn_t),
- ("notify", event_notify_fn_t),
- ("options", POINTER(dict_t)),
- ("ctx", POINTER(glusterfs_ctx_t)),
- ("itable", POINTER(inode_table_t)),
- ("ready", c_char),
- ("private", c_void_p),
- ]
-
-xlator_list_t._fields_ = [
- ("xlator", POINTER(xlator_t)),
- ("next", POINTER(xlator_list_t)),
- ]
-
-fop_functions = collections.defaultdict(lambda: c_void_p)
-fop_function_names = ['lookup', 'forget', 'stat', 'fstat', 'chmod', 'fchmod',
- 'chown', 'fchown', 'truncate', 'ftruncate', 'utimens', 'access',
- 'readlink', 'mknod', 'mkdir', 'unlink', 'rmdir', 'symlink',
- 'rename', 'link', 'create', 'open', 'readv', 'writev', 'flush',
- 'close', 'fsync', 'opendir', 'readdir', 'closedir', 'fsyncdir',
- 'statfs', 'setxattr', 'getxattr', 'removexattr', 'lk', 'writedir',
- # TODO: Call backs?
- ]
-
-fop_writev_t = CFUNCTYPE(c_int32, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(fd_t), POINTER(iovec), c_int32,
- off_t)
-
-fop_functions['writev'] = fop_writev_t
-xlator_fops._fields_ = [(f, fop_functions[f]) for f in fop_function_names]
diff --git a/xlators/bindings/python/src/python.c b/xlators/bindings/python/src/python.c
deleted file mode 100644
index c11323cda4f..00000000000
--- a/xlators/bindings/python/src/python.c
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- Copyright (c) 2007-2009 Chris AtLee <chris@atlee.ca>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include <Python.h>
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-#include "defaults.h"
-
-typedef struct
-{
- char *scriptname;
- PyObject *pXlator;
- PyObject *pScriptModule;
- PyObject *pGlusterModule;
- PyThreadState *pInterp;
-
- PyObject *pFrameType, *pVectorType, *pFdType;
-} python_private_t;
-
-int32_t
-python_writev (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t offset)
-{
- python_private_t *priv = (python_private_t *)this->private;
- gf_log("python", GF_LOG_DEBUG, "In writev");
- if (PyObject_HasAttrString(priv->pXlator, "writev"))
- {
-
- PyObject *retval = PyObject_CallMethod(priv->pXlator, "writev",
- "O O O i l",
- PyObject_CallMethod(priv->pFrameType, "from_address", "O&", PyLong_FromVoidPtr, frame),
- PyObject_CallMethod(priv->pFdType, "from_address", "O&", PyLong_FromVoidPtr, fd),
- PyObject_CallMethod(priv->pVectorType, "from_address", "O&", PyLong_FromVoidPtr, vector),
- count,
- offset);
- if (PyErr_Occurred())
- {
- PyErr_Print();
- }
- Py_XDECREF(retval);
- }
- else
- {
- return default_writev(frame, this, fd, vector, count, offset);
- }
- return 0;
-}
-
-struct xlator_fops fops = {
- .writev = python_writev
-};
-
-static PyObject *
-AnonModule_FromFile (const char* fname)
-{
- // Get the builtins
- PyThreadState* pThread = PyThreadState_Get();
- PyObject *pBuiltins = pThread->interp->builtins;
-
- if (PyErr_Occurred())
- {
- PyErr_Print();
- return NULL;
- }
-
- // Create a new dictionary for running code in
- PyObject *pModuleDict = PyDict_New();
- PyDict_SetItemString(pModuleDict, "__builtins__", pBuiltins);
- Py_INCREF(pBuiltins);
-
- // Run the file in the new context
- FILE* fp = fopen(fname, "r");
- PyRun_File(fp, fname, Py_file_input, pModuleDict, pModuleDict);
- fclose(fp);
- if (PyErr_Occurred())
- {
- PyErr_Print();
- Py_DECREF(pModuleDict);
- Py_DECREF(pBuiltins);
- return NULL;
- }
-
- // Create an object to hold the new context
- PyRun_String("class ModuleWrapper(object):\n\tpass\n", Py_single_input, pModuleDict, pModuleDict);
- if (PyErr_Occurred())
- {
- PyErr_Print();
- Py_DECREF(pModuleDict);
- Py_DECREF(pBuiltins);
- return NULL;
- }
- PyObject *pModule = PyRun_String("ModuleWrapper()", Py_eval_input, pModuleDict, pModuleDict);
- if (PyErr_Occurred())
- {
- PyErr_Print();
- Py_DECREF(pModuleDict);
- Py_DECREF(pBuiltins);
- Py_XDECREF(pModule);
- return NULL;
- }
-
- // Set the new context's dictionary to the one we used to run the code
- // inside
- PyObject_SetAttrString(pModule, "__dict__", pModuleDict);
- if (PyErr_Occurred())
- {
- PyErr_Print();
- Py_DECREF(pModuleDict);
- Py_DECREF(pBuiltins);
- Py_DECREF(pModule);
- return NULL;
- }
-
- return pModule;
-}
-
-int32_t
-init (xlator_t *this)
-{
- // This is ok to call more than once per process
- Py_InitializeEx(0);
-
- if (!this->children) {
- gf_log ("python", GF_LOG_ERROR,
- "FATAL: python should have exactly one child");
- return -1;
- }
-
- python_private_t *priv = CALLOC (sizeof (python_private_t), 1);
- ERR_ABORT (priv);
-
- data_t *scriptname = dict_get (this->options, "scriptname");
- if (scriptname) {
- priv->scriptname = data_to_str(scriptname);
- } else {
- gf_log("python", GF_LOG_ERROR,
- "FATAL: python requires the scriptname parameter");
- return -1;
- }
-
- priv->pInterp = Py_NewInterpreter();
-
- // Adjust python's path
- PyObject *syspath = PySys_GetObject("path");
- PyObject *path = PyString_FromString(GLUSTER_PYTHON_PATH);
- PyList_Append(syspath, path);
- Py_DECREF(path);
-
- gf_log("python", GF_LOG_DEBUG,
- "Loading gluster module");
-
- priv->pGlusterModule = PyImport_ImportModule("gluster");
- if (PyErr_Occurred())
- {
- PyErr_Print();
- return -1;
- }
-
- priv->pFrameType = PyObject_GetAttrString(priv->pGlusterModule, "call_frame_t");
- priv->pFdType = PyObject_GetAttrString(priv->pGlusterModule, "fd_t");
- priv->pVectorType = PyObject_GetAttrString(priv->pGlusterModule, "iovec");
-
- gf_log("python", GF_LOG_DEBUG, "Loading script...%s", priv->scriptname);
-
- priv->pScriptModule = AnonModule_FromFile(priv->scriptname);
- if (!priv->pScriptModule || PyErr_Occurred())
- {
- gf_log("python", GF_LOG_ERROR, "Error loading %s", priv->scriptname);
- PyErr_Print();
- return -1;
- }
-
- if (!PyObject_HasAttrString(priv->pScriptModule, "xlator"))
- {
- gf_log("python", GF_LOG_ERROR, "%s does not have a xlator attribute", priv->scriptname);
- return -1;
- }
- gf_log("python", GF_LOG_DEBUG, "Instantiating translator");
- priv->pXlator = PyObject_CallMethod(priv->pScriptModule, "xlator", "O&",
- PyLong_FromVoidPtr, this);
- if (PyErr_Occurred() || !priv->pXlator)
- {
- PyErr_Print();
- return -1;
- }
-
- this->private = priv;
-
- gf_log ("python", GF_LOG_DEBUG, "python xlator loaded");
- return 0;
-}
-
-void
-fini (xlator_t *this)
-{
- python_private_t *priv = (python_private_t*)(this->private);
- Py_DECREF(priv->pXlator);
- Py_DECREF(priv->pScriptModule);
- Py_DECREF(priv->pGlusterModule);
- Py_DECREF(priv->pFrameType);
- Py_DECREF(priv->pFdType);
- Py_DECREF(priv->pVectorType);
- Py_EndInterpreter(priv->pInterp);
- return;
-}
diff --git a/xlators/bindings/python/src/testxlator.py b/xlators/bindings/python/src/testxlator.py
deleted file mode 100644
index 507455c856a..00000000000
--- a/xlators/bindings/python/src/testxlator.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
-
-"""
-This is a test translator written in python.
-
-Important things to note:
- This file must be import-able from glusterfsd. This probably means
- setting PYTHONPATH to where this file is located.
-
- This file must have a top-level xlator class object that will be
- used to instantiate individual translators.
-"""
-from gluster import *
-
-class MyXlator(ComplexTranslator):
- name = "MyXlator"
- def writev_cbk(self, frame, cookie, op_ret, op_errno, buf):
- stack_unwind(frame, op_ret, op_errno, buf)
- return 0
-
- def writev(self, frame, fd, vector, count, offset):
- gf_log(self.name, GF_LOG_WARNING, "writev %i bytes", vector.iov_len)
- # TODO: Use cookie to pass this to writev_cbk
- old_count = vector.iov_len
-
- data = vector.getBytes().encode("zlib")
-
- vector = iovec(data)
- gf_log(self.name, GF_LOG_WARNING, "writev %i bytes", vector.iov_len)
-
- @ret_fn_t
- def rfn(frame, prev, this, op_ret, op_errno, *params):
- if len(params) == 0:
- params = [0]
- return self.writev_cbk(frame, prev, old_count, op_errno, *params)
-
- stack_wind(frame, rfn, self.firstChild,
- self.firstChild[0].fops[0].writev, fd, vector, count, offset)
- return 0
-
-xlator = MyXlator
diff --git a/xlators/cluster/Makefile.am b/xlators/cluster/Makefile.am
index 0990822a7d3..903fbb39f12 100644
--- a/xlators/cluster/Makefile.am
+++ b/xlators/cluster/Makefile.am
@@ -1,3 +1,3 @@
-SUBDIRS = stripe afr dht
+SUBDIRS = stripe afr dht ec
CLEANFILES =
diff --git a/xlators/cluster/afr/src/Makefile.am b/xlators/cluster/afr/src/Makefile.am
index ece459ca772..5612733d3ed 100644
--- a/xlators/cluster/afr/src/Makefile.am
+++ b/xlators/cluster/afr/src/Makefile.am
@@ -1,15 +1,33 @@
-xlator_LTLIBRARIES = afr.la
+xlator_LTLIBRARIES = afr.la pump.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
-afr_la_LDFLAGS = -module -avoidversion
+afr_common_source = afr-dir-read.c afr-dir-write.c afr-inode-read.c \
+ afr-inode-write.c afr-open.c afr-transaction.c afr-lk-common.c \
+ afr-read-txn.c \
+ $(top_builddir)/xlators/lib/src/libxlator.c
-afr_la_SOURCES = afr.c afr-dir-read.c afr-dir-write.c afr-inode-read.c afr-inode-write.c afr-open.c afr-transaction.c afr-self-heal-data.c afr-self-heal-common.c afr-self-heal-metadata.c afr-self-heal-entry.c afr-self-heal-algorithm.c
+AFR_SELFHEAL_SOURCES = afr-self-heal-common.c afr-self-heal-data.c \
+ afr-self-heal-entry.c afr-self-heal-metadata.c afr-self-heald.c \
+ afr-self-heal-name.c
+
+afr_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+afr_la_SOURCES = $(afr_common_source) $(AFR_SELFHEAL_SOURCES) afr.c
afr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-self-heal-common.h afr-self-heal-algorithm.h afr-mem-types.h
+pump_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+pump_la_SOURCES = $(afr_common_source) $(AFR_SELFHEAL_SOURCES) pump.c
+pump_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h \
+ afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-mem-types.h \
+ afr-common.c afr-self-heald.h pump.h \
+ $(top_builddir)/xlators/lib/src/libxlator.h afr-messages.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
+ -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/xlators/lib/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/contrib/md5 -shared -nostartfiles $(GF_CFLAGS)
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
@@ -17,4 +35,4 @@ uninstall-local:
rm -f $(DESTDIR)$(xlatordir)/replicate.so
install-data-hook:
- ln -sf afr.so $(DESTDIR)$(xlatordir)/replicate.so \ No newline at end of file
+ ln -sf afr.so $(DESTDIR)$(xlatordir)/replicate.so
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
new file mode 100644
index 00000000000..e59f160db0c
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -0,0 +1,5252 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <libgen.h>
+#include <unistd.h>
+#include <fnmatch.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <signal.h>
+
+#include "glusterfs.h"
+#include "afr.h"
+#include "dict.h"
+#include "xlator.h"
+#include "hashfn.h"
+#include "logging.h"
+#include "stack.h"
+#include "list.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "common-utils.h"
+#include "compat-errno.h"
+#include "compat.h"
+#include "byte-order.h"
+#include "statedump.h"
+#include "inode.h"
+
+#include "fd.h"
+
+#include "afr-inode-read.h"
+#include "afr-inode-write.h"
+#include "afr-dir-read.h"
+#include "afr-dir-write.h"
+#include "afr-transaction.h"
+#include "afr-self-heal.h"
+#include "afr-self-heald.h"
+#include "afr-messages.h"
+
+call_frame_t *
+afr_copy_frame (call_frame_t *base)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ int op_errno = 0;
+
+ frame = copy_frame (base);
+ if (!frame)
+ return NULL;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local) {
+ AFR_STACK_DESTROY (frame);
+ return NULL;
+ }
+
+ return frame;
+}
+
+/* Check if an entry or inode could be undergoing a transaction. */
+gf_boolean_t
+afr_is_possibly_under_txn (afr_transaction_type type, afr_local_t *local,
+ xlator_t *this)
+{
+ int i = 0;
+ int tmp = 0;
+ afr_private_t *priv = NULL;
+ GF_UNUSED char *key = NULL;
+
+ priv = this->private;
+
+ if (type == AFR_ENTRY_TRANSACTION)
+ key = GLUSTERFS_PARENT_ENTRYLK;
+ else if (type == AFR_DATA_TRANSACTION)
+ /*FIXME: Use GLUSTERFS_INODELK_DOM_COUNT etc. once
+ * pl_inodelk_xattr_fill supports separate keys for different
+ * domains.*/
+ key = GLUSTERFS_INODELK_COUNT;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].xdata)
+ continue;
+ if (dict_get_int32 (local->replies[i].xdata, key, &tmp) == 0)
+ if (tmp)
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
+int
+__afr_inode_ctx_get (xlator_t *this, inode_t *inode, afr_inode_ctx_t **ctx)
+{
+ uint64_t ctx_int = 0;
+ int ret = -1;
+ afr_inode_ctx_t *tmp_ctx = NULL;
+
+ ret = __inode_ctx_get (inode, this, &ctx_int);
+ if (ret) {
+ tmp_ctx = GF_CALLOC (1, sizeof (afr_inode_ctx_t),
+ gf_afr_mt_inode_ctx_t);
+ if (!tmp_ctx)
+ goto out;
+
+ ctx_int = (long) tmp_ctx;
+ ret = __inode_ctx_set (inode, this, &ctx_int);
+ if (ret) {
+ GF_FREE (tmp_ctx);
+ goto out;
+ }
+ tmp_ctx->spb_choice = -1;
+ tmp_ctx->read_subvol = 0;
+ } else {
+ tmp_ctx = (afr_inode_ctx_t *) ctx_int;
+ }
+
+ *ctx = tmp_ctx;
+ ret = 0;
+out:
+ return ret;
+}
+/*
+ * INODE CTX 64-bit VALUE FORMAT FOR SMALL (<= 16) SUBVOL COUNTS:
+ *
+ * |<---------- 64bit ------------>|
+ * 63 32 31 16 15 0
+ * | EVENT_GEN | DATA | METADATA |
+ *
+ *
+ * METADATA (bit-0 .. bit-15): bitmap representing subvolumes from which
+ * metadata can be attempted to be read.
+ *
+ * bit-0 => priv->subvolumes[0]
+ * bit-1 => priv->subvolumes[1]
+ * ... etc. till bit-15
+ *
+ * DATA (bit-16 .. bit-31): bitmap representing subvolumes from which data
+ * can be attempted to be read.
+ *
+ * bit-16 => priv->subvolumes[0]
+ * bit-17 => priv->subvolumes[1]
+ * ... etc. till bit-31
+ *
+ * EVENT_GEN (bit-32 .. bit-63): event generation (i.e priv->event_generation)
+ * when DATA and METADATA was last updated.
+ *
+ * If EVENT_GEN is < priv->event_generation,
+ * or is 0, it means afr_inode_refresh() needs
+ * to be called to recalculate the bitmaps.
+ */
+
+int
+__afr_inode_read_subvol_get_small (inode_t *inode, xlator_t *this,
+ unsigned char *data, unsigned char *metadata,
+ int *event_p)
+{
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ uint16_t datamap = 0;
+ uint16_t metadatamap = 0;
+ uint32_t event = 0;
+ uint64_t val = 0;
+ int i = 0;
+ afr_inode_ctx_t *ctx = NULL;
+
+ priv = this->private;
+
+ ret = __afr_inode_ctx_get (this, inode, &ctx);
+ if (ret < 0)
+ return ret;
+
+ val = ctx->read_subvol;
+
+ metadatamap = (val & 0x000000000000ffff);
+ datamap = (val & 0x00000000ffff0000) >> 16;
+ event = (val & 0xffffffff00000000) >> 32;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (metadata)
+ metadata[i] = (metadatamap >> i) & 1;
+ if (data)
+ data[i] = (datamap >> i) & 1;
+ }
+
+ if (event_p)
+ *event_p = event;
+ return ret;
+}
+
+
+int
+__afr_inode_read_subvol_set_small (inode_t *inode, xlator_t *this,
+ unsigned char *data, unsigned char *metadata,
+ int event)
+{
+ afr_private_t *priv = NULL;
+ uint16_t datamap = 0;
+ uint16_t metadatamap = 0;
+ uint64_t val = 0;
+ int i = 0;
+ int ret = -1;
+ afr_inode_ctx_t *ctx = NULL;
+
+ priv = this->private;
+
+ ret = __afr_inode_ctx_get (this, inode, &ctx);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (data[i])
+ datamap |= (1 << i);
+ if (metadata[i])
+ metadatamap |= (1 << i);
+ }
+
+ val = ((uint64_t) metadatamap) |
+ (((uint64_t) datamap) << 16) |
+ (((uint64_t) event) << 32);
+
+ ctx->read_subvol = val;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+__afr_inode_read_subvol_reset_small (inode_t *inode, xlator_t *this)
+{
+ int ret = -1;
+ uint16_t datamap = 0;
+ uint16_t metadatamap = 0;
+ uint32_t event = 0;
+ uint64_t val = 0;
+ afr_inode_ctx_t *ctx = NULL;
+
+ ret = __afr_inode_ctx_get (this, inode, &ctx);
+ if (ret)
+ return ret;
+
+ val = ctx->read_subvol;
+
+ metadatamap = (val & 0x000000000000ffff) >> 0;
+ datamap = (val & 0x00000000ffff0000) >> 16;
+ event = 0;
+
+ val = ((uint64_t) metadatamap) |
+ (((uint64_t) datamap) << 16) |
+ (((uint64_t) event) << 32);
+
+ ctx->read_subvol = val;
+
+ return ret;
+}
+
+
+int
+__afr_inode_read_subvol_get (inode_t *inode, xlator_t *this,
+ unsigned char *data, unsigned char *metadata,
+ int *event_p)
+{
+ afr_private_t *priv = NULL;
+ int ret = -1;
+
+ priv = this->private;
+
+ if (priv->child_count <= 16)
+ ret = __afr_inode_read_subvol_get_small (inode, this, data,
+ metadata, event_p);
+ else
+ /* TBD: allocate structure with array and read from it */
+ ret = -1;
+
+ return ret;
+}
+
+int
+__afr_inode_split_brain_choice_get (inode_t *inode, xlator_t *this,
+ int *spb_choice)
+{
+ afr_inode_ctx_t *ctx = NULL;
+ int ret = -1;
+
+ ret = __afr_inode_ctx_get (this, inode, &ctx);
+ if (ret < 0)
+ return ret;
+
+ *spb_choice = ctx->spb_choice;
+ return 0;
+}
+
+int
+__afr_inode_read_subvol_set (inode_t *inode, xlator_t *this, unsigned char *data,
+ unsigned char *metadata, int event)
+{
+ afr_private_t *priv = NULL;
+ int ret = -1;
+
+ priv = this->private;
+
+ if (priv->child_count <= 16)
+ ret = __afr_inode_read_subvol_set_small (inode, this, data,
+ metadata, event);
+ else
+ ret = -1;
+
+ return ret;
+}
+
+int
+__afr_inode_split_brain_choice_set (inode_t *inode, xlator_t *this,
+ int spb_choice)
+{
+ afr_inode_ctx_t *ctx = NULL;
+ int ret = -1;
+
+ ret = __afr_inode_ctx_get (this, inode, &ctx);
+ if (ret)
+ goto out;
+
+ ctx->spb_choice = spb_choice;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+__afr_inode_read_subvol_reset (inode_t *inode, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int ret = -1;
+
+ priv = this->private;
+
+ if (priv->child_count <= 16)
+ ret = __afr_inode_read_subvol_reset_small (inode, this);
+ else
+ ret = -1;
+
+ return ret;
+}
+
+
+int
+afr_inode_read_subvol_get (inode_t *inode, xlator_t *this, unsigned char *data,
+ unsigned char *metadata, int *event_p)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_read_subvol_get (inode, this, data,
+ metadata, event_p);
+ }
+ UNLOCK(&inode->lock);
+out:
+ return ret;
+}
+
+int
+afr_inode_get_readable (call_frame_t *frame, inode_t *inode, xlator_t *this,
+ unsigned char *readable, int *event_p, int type)
+{
+
+ afr_private_t *priv = this->private;
+ afr_local_t *local = frame->local;
+ unsigned char *data = alloca0 (priv->child_count);
+ unsigned char *metadata = alloca0 (priv->child_count);
+ int data_count = 0;
+ int metadata_count = 0;
+ int event_generation = 0;
+ int ret = 0;
+
+ ret = afr_inode_read_subvol_get (inode, this, data, metadata,
+ &event_generation);
+ if (ret == -1)
+ return -EIO;
+
+ data_count = AFR_COUNT (data, priv->child_count);
+ metadata_count = AFR_COUNT (metadata, priv->child_count);
+
+ if (inode->ia_type == IA_IFDIR) {
+ /* For directories, allow even if it is in data split-brain. */
+ if (type == AFR_METADATA_TRANSACTION ||
+ local->op == GF_FOP_STAT || local->op == GF_FOP_FSTAT) {
+ if (!metadata_count)
+ return -EIO;
+ }
+ } else {
+ /* For files, abort in case of data/metadata split-brain. */
+ if (!data_count || !metadata_count)
+ return -EIO;
+ }
+
+ if (type == AFR_METADATA_TRANSACTION && readable)
+ memcpy (readable, metadata, priv->child_count * sizeof *metadata);
+ if (type == AFR_DATA_TRANSACTION && readable) {
+ if (!data_count)
+ memcpy (readable, local->child_up,
+ priv->child_count * sizeof *readable);
+ else
+ memcpy (readable, data, priv->child_count * sizeof *data);
+ }
+ if (event_p)
+ *event_p = event_generation;
+ return 0;
+}
+
+int
+afr_inode_split_brain_choice_get (inode_t *inode, xlator_t *this,
+ int *spb_choice)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_split_brain_choice_get (inode, this,
+ spb_choice);
+ }
+ UNLOCK(&inode->lock);
+out:
+ return ret;
+}
+
+
+int
+afr_inode_read_subvol_set (inode_t *inode, xlator_t *this, unsigned char *data,
+ unsigned char *metadata, int event)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_read_subvol_set (inode, this, data, metadata,
+ event);
+ }
+ UNLOCK(&inode->lock);
+out:
+ return ret;
+}
+
+
+int
+afr_inode_split_brain_choice_set (inode_t *inode, xlator_t *this,
+ int spb_choice)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_split_brain_choice_set (inode, this,
+ spb_choice);
+ }
+ UNLOCK(&inode->lock);
+out:
+ return ret;
+}
+
+
+int
+afr_inode_read_subvol_reset (inode_t *inode, xlator_t *this)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_read_subvol_reset (inode, this);
+ }
+ UNLOCK(&inode->lock);
+out:
+ return ret;
+}
+
+int
+afr_spb_choice_timeout_cancel (xlator_t *this, inode_t *inode)
+{
+ afr_inode_ctx_t *ctx = NULL;
+ int ret = -1;
+
+ if (!inode)
+ return ret;
+
+ LOCK(&inode->lock);
+ {
+ __afr_inode_ctx_get (this, inode, &ctx);
+ if (!ctx) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR,
+ "Failed to cancel split-brain choice timer.");
+ goto out;
+ }
+ ctx->spb_choice = -1;
+ if (ctx->timer) {
+ gf_timer_call_cancel (this->ctx, ctx->timer);
+ ctx->timer = NULL;
+ }
+ ret = 0;
+ }
+out:
+ UNLOCK(&inode->lock);
+ return ret;
+}
+
+void
+afr_set_split_brain_choice_cbk (void *data)
+{
+ inode_t *inode = data;
+ xlator_t *this = THIS;
+
+ afr_spb_choice_timeout_cancel (this, inode);
+ inode_unref (inode);
+ return;
+}
+
+
+int
+afr_set_split_brain_choice (int ret, call_frame_t *frame, void *opaque)
+{
+ int op_errno = ENOMEM;
+ afr_private_t *priv = NULL;
+ afr_inode_ctx_t *ctx = NULL;
+ inode_t *inode = NULL;
+ loc_t *loc = NULL;
+ xlator_t *this = NULL;
+ afr_spbc_timeout_t *data = opaque;
+ struct timespec delta = {0, };
+
+ if (ret)
+ goto out;
+
+ frame = data->frame;
+ loc = data->loc;
+ this = frame->this;
+ priv = this->private;
+
+ delta.tv_sec = priv->spb_choice_timeout;
+ delta.tv_nsec = 0;
+
+ inode = loc->inode;
+ if (!inode)
+ goto out;
+
+ if (!(data->d_spb || data->m_spb)) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR, "Cannot set "
+ "replica.split-brain-choice on %s. File is"
+ " not in data/metadata split-brain.",
+ uuid_utoa (loc->gfid));
+ ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_ctx_get (this, inode, &ctx);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR,
+ "Failed to get inode_ctx for %s", loc->name);
+ goto unlock;
+ }
+
+ ctx->spb_choice = data->spb_child_index;
+
+ /* Possible changes in spb-choice :
+ * -1 to valid : ref and inject timer
+ *
+ * valid to valid : cancel timer and inject new one
+ *
+ * valid to -1 : cancel timer and unref
+ *
+ * -1 to -1 : do not do anything
+ */
+
+ /* ctx->timer is NULL iff previous value of
+ * ctx->spb_choice is -1
+ */
+ if (ctx->timer) {
+ if (ctx->spb_choice == -1) {
+ gf_timer_call_cancel (this->ctx, ctx->timer);
+ ctx->timer = NULL;
+ inode_unref (inode);
+ goto unlock;
+ }
+ goto reset_timer;
+ } else {
+ if (ctx->spb_choice == -1)
+ goto unlock;
+ }
+
+ inode = inode_ref (loc->inode);
+ goto set_timer;
+
+reset_timer:
+ gf_timer_call_cancel (this->ctx, ctx->timer);
+ ctx->timer = NULL;
+
+set_timer:
+ ctx->timer = gf_timer_call_after (this->ctx, delta,
+ afr_set_split_brain_choice_cbk,
+ inode);
+ }
+unlock:
+ UNLOCK(&inode->lock);
+ inode_invalidate (inode);
+out:
+ if (data)
+ GF_FREE (data);
+ AFR_STACK_UNWIND (setxattr, frame, ret, op_errno, NULL);
+ return 0;
+}
+
+int
+afr_accused_fill (xlator_t *this, dict_t *xdata, unsigned char *accused,
+ afr_transaction_type type)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int idx = afr_index_for_transaction_type (type);
+ void *pending_raw = NULL;
+ int pending[3];
+ int ret = 0;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_get_ptr (xdata, priv->pending_key[i],
+ &pending_raw);
+ if (ret) /* no pending flags */
+ continue;
+ memcpy (pending, pending_raw, sizeof(pending));
+
+ if (ntoh32 (pending[idx]))
+ accused[i] = 1;
+ }
+
+ return 0;
+}
+
+int
+afr_accuse_smallfiles (xlator_t *this, struct afr_reply *replies,
+ unsigned char *data_accused)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ uint64_t maxsize = 0;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].valid && replies[i].xdata &&
+ dict_get (replies[i].xdata, GLUSTERFS_BAD_INODE))
+ continue;
+ if (data_accused[i])
+ continue;
+ if (replies[i].poststat.ia_size > maxsize)
+ maxsize = replies[i].poststat.ia_size;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (data_accused[i])
+ continue;
+ if (AFR_IS_ARBITER_BRICK(priv, i))
+ continue;
+ if (replies[i].poststat.ia_size < maxsize)
+ data_accused[i] = 1;
+ }
+
+ return 0;
+}
+
+int
+afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ gf_boolean_t *start_heal)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
+ int event_generation = 0;
+ int i = 0;
+ unsigned char *data_accused = NULL;
+ unsigned char *metadata_accused = NULL;
+ unsigned char *data_readable = NULL;
+ unsigned char *metadata_readable = NULL;
+ int ret = 0;
+
+ local = frame->local;
+ priv = this->private;
+ replies = local->replies;
+ event_generation = local->event_generation;
+
+ data_accused = alloca0 (priv->child_count);
+ data_readable = alloca0 (priv->child_count);
+ metadata_accused = alloca0 (priv->child_count);
+ metadata_readable = alloca0 (priv->child_count);
+
+ for (i = 0; i < priv->child_count; i++) {
+ data_readable[i] = 1;
+ metadata_readable[i] = 1;
+ }
+ if (AFR_IS_ARBITER_BRICK (priv, ARBITER_BRICK_INDEX)) {
+ data_readable[ARBITER_BRICK_INDEX] = 0;
+ metadata_readable[ARBITER_BRICK_INDEX] = 0;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid) {
+ data_readable[i] = 0;
+ metadata_readable[i] = 0;
+ continue;
+ }
+
+ if (replies[i].op_ret == -1) {
+ data_readable[i] = 0;
+ metadata_readable[i] = 0;
+ continue;
+ }
+
+ if (replies[i].xdata &&
+ dict_get (replies[i].xdata, GLUSTERFS_BAD_INODE)) {
+ data_readable[i] = 0;
+ metadata_readable[i] = 0;
+ continue;
+ }
+
+ afr_accused_fill (this, replies[i].xdata, data_accused,
+ (replies[i].poststat.ia_type == IA_IFDIR) ?
+ AFR_ENTRY_TRANSACTION : AFR_DATA_TRANSACTION);
+
+ afr_accused_fill (this, replies[i].xdata,
+ metadata_accused, AFR_METADATA_TRANSACTION);
+
+ }
+
+ if ((inode->ia_type != IA_IFDIR) &&
+ /* We want to accuse small files only when we know for sure that
+ * there is no IO happening. Otherwise, the ia_sizes obtained in
+ * post-refresh replies may mismatch due to a race between inode-
+ * refresh and ongoing writes, causing spurious heal launches*/
+ !afr_is_possibly_under_txn (AFR_DATA_TRANSACTION, local, this))
+ afr_accuse_smallfiles (this, replies, data_accused);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (data_accused[i]) {
+ data_readable[i] = 0;
+ ret = 1;
+ }
+ if (metadata_accused[i]) {
+ metadata_readable[i] = 0;
+ ret = 1;
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (start_heal && priv->child_up[i] &&
+ (!data_readable[i] || !metadata_readable[i])) {
+ *start_heal = _gf_true;
+ break;
+ }
+ }
+ afr_inode_read_subvol_set (inode, this, data_readable,
+ metadata_readable, event_generation);
+ return ret;
+}
+
+
+
+int
+afr_refresh_selfheal_done (int ret, call_frame_t *heal, void *opaque)
+{
+ if (heal)
+ STACK_DESTROY (heal->root);
+ return 0;
+}
+
+int
+afr_inode_refresh_err (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int err = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].valid && !local->replies[i].op_ret) {
+ err = 0;
+ goto ret;
+ }
+ }
+
+ err = afr_final_errno (local, priv);
+ret:
+ return -err;
+}
+
+gf_boolean_t
+afr_selfheal_enabled (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ gf_boolean_t data = _gf_false;
+ int ret = 0;
+
+ priv = this->private;
+
+ ret = gf_string2boolean (priv->data_self_heal, &data);
+ GF_ASSERT (!ret);
+
+ return data || priv->metadata_self_heal || priv->entry_self_heal;
+}
+
+int
+afr_inode_refresh_done (call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t *heal_frame = NULL;
+ afr_local_t *local = NULL;
+ gf_boolean_t start_heal = _gf_false;
+ afr_local_t *heal_local = NULL;
+ int op_errno = ENOMEM;
+ int ret = 0;
+ int err = 0;
+
+ local = frame->local;
+
+ ret = afr_replies_interpret (frame, this, local->refreshinode,
+ &start_heal);
+
+ err = afr_inode_refresh_err (frame, this);
+
+ afr_local_replies_wipe (local, this->private);
+
+ if (ret && afr_selfheal_enabled (this) && start_heal) {
+ heal_frame = copy_frame (frame);
+ if (!heal_frame)
+ goto refresh_done;
+ heal_frame->root->pid = GF_CLIENT_PID_SELF_HEALD;
+ heal_local = AFR_FRAME_INIT (heal_frame, op_errno);
+ if (!heal_local) {
+ AFR_STACK_DESTROY (heal_frame);
+ goto refresh_done;
+ }
+ heal_local->refreshinode = inode_ref (local->refreshinode);
+ heal_local->heal_frame = heal_frame;
+ afr_throttled_selfheal (heal_frame, this);
+ }
+
+refresh_done:
+ local->refreshfn (frame, this, err);
+
+ return 0;
+}
+
+void
+afr_inode_refresh_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *buf,
+ dict_t *xdata, struct iatt *par)
+{
+ afr_local_t *local = NULL;
+ int call_child = (long) cookie;
+ int8_t need_heal = 1;
+ int call_count = 0;
+ GF_UNUSED int ret = 0;
+
+ local = frame->local;
+ local->replies[call_child].valid = 1;
+ local->replies[call_child].op_ret = op_ret;
+ local->replies[call_child].op_errno = op_errno;
+ if (op_ret != -1) {
+ local->replies[call_child].poststat = *buf;
+ if (par)
+ local->replies[call_child].postparent = *par;
+ if (xdata)
+ local->replies[call_child].xdata = dict_ref (xdata);
+ }
+ if (xdata) {
+ ret = dict_get_int8 (xdata, "link-count", &need_heal);
+ local->replies[call_child].need_heal = need_heal;
+ } else {
+ local->replies[call_child].need_heal = need_heal;
+ }
+
+ call_count = afr_frame_return (frame);
+ if (call_count == 0) {
+ afr_set_need_heal (this, local);
+ afr_inode_refresh_done (frame, this);
+ }
+
+}
+
+int
+afr_inode_refresh_subvol_with_lookup_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *par)
+{
+ afr_inode_refresh_subvol_cbk (frame, cookie, this, op_ret, op_errno,
+ buf, xdata, par);
+ return 0;
+}
+
+
+int
+afr_inode_refresh_subvol_with_lookup (call_frame_t *frame, xlator_t *this,
+ int i, inode_t *inode, uuid_t gfid,
+ dict_t *xdata)
+{
+ loc_t loc = {0, };
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ loc.inode = inode;
+ if (gf_uuid_is_null (inode->gfid) && gfid) {
+ /* To handle setattr/setxattr on yet to be linked inode from
+ * dht */
+ gf_uuid_copy (loc.gfid, gfid);
+ } else {
+ gf_uuid_copy (loc.gfid, inode->gfid);
+ }
+
+ STACK_WIND_COOKIE (frame, afr_inode_refresh_subvol_with_lookup_cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->lookup, &loc, xdata);
+ return 0;
+}
+
+int
+afr_inode_refresh_subvol_with_fstat_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *buf, dict_t *xdata)
+{
+ afr_inode_refresh_subvol_cbk (frame, cookie, this, op_ret, op_errno,
+ buf, xdata, NULL);
+ return 0;
+}
+
+int
+afr_inode_refresh_subvol_with_fstat (call_frame_t *frame, xlator_t *this, int i,
+ dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ STACK_WIND_COOKIE (frame, afr_inode_refresh_subvol_with_fstat_cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->fstat, local->fd, xdata);
+ return 0;
+}
+
+int
+afr_inode_refresh_do (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int i = 0;
+ int ret = 0;
+ dict_t *xdata = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ unsigned char *wind_subvols = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ wind_subvols = alloca0 (priv->child_count);
+
+ afr_local_replies_wipe (local, priv);
+
+ if (local->fd) {
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
+ if (!fd_ctx) {
+ afr_inode_refresh_done (frame, this);
+ return 0;
+ }
+ }
+
+ xdata = dict_new ();
+ if (!xdata) {
+ afr_inode_refresh_done (frame, this);
+ return 0;
+ }
+
+ if (afr_xattr_req_prepare (this, xdata) != 0) {
+ dict_unref (xdata);
+ afr_inode_refresh_done (frame, this);
+ return 0;
+ }
+
+ ret = dict_set_str (xdata, "link-count", GF_XATTROP_INDEX_COUNT);
+ if (ret) {
+ gf_msg_debug (this->name, -ret,
+ "Unable to set link-count in dict ");
+ }
+
+ ret = dict_set_str (xdata, GLUSTERFS_INODELK_DOM_COUNT, this->name);
+ if (ret) {
+ gf_msg_debug (this->name, -ret,
+ "Unable to set inodelk-dom-count in dict ");
+
+ }
+
+ if (local->fd) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i] &&
+ fd_ctx->opened_on[i] == AFR_FD_OPENED)
+ wind_subvols[i] = 1;
+ }
+ } else {
+ memcpy (wind_subvols, local->child_up,
+ sizeof (*local->child_up) * priv->child_count);
+ }
+
+ local->call_count = AFR_COUNT (wind_subvols, priv->child_count);
+
+ call_count = local->call_count;
+ if (!call_count) {
+ dict_unref (xdata);
+ afr_inode_refresh_done (frame, this);
+ return 0;
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (!wind_subvols[i])
+ continue;
+
+ if (local->fd)
+ afr_inode_refresh_subvol_with_fstat (frame, this, i,
+ xdata);
+ else
+ afr_inode_refresh_subvol_with_lookup (frame, this, i,
+ local->refreshinode,
+ local->refreshgfid, xdata);
+
+ if (!--call_count)
+ break;
+ }
+
+ dict_unref (xdata);
+
+ return 0;
+}
+
+
+int
+afr_inode_refresh (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ uuid_t gfid, afr_inode_refresh_cbk_t refreshfn)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ local->refreshfn = refreshfn;
+
+ if (local->refreshinode) {
+ inode_unref (local->refreshinode);
+ local->refreshinode = NULL;
+ }
+
+ local->refreshinode = inode_ref (inode);
+
+ if (gfid)
+ gf_uuid_copy (local->refreshgfid, gfid);
+ else
+ gf_uuid_clear (local->refreshgfid);
+
+ afr_inode_refresh_do (frame, this);
+
+ return 0;
+}
+
+
+int
+afr_xattr_req_prepare (xlator_t *this, dict_t *xattr_req)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_set_uint64 (xattr_req, priv->pending_key[i],
+ AFR_NUM_CHANGE_LOGS * sizeof(int));
+ if (ret < 0)
+ gf_msg (this->name, GF_LOG_WARNING,
+ -ret, AFR_MSG_DICT_SET_FAILED,
+ "Unable to set dict value for %s",
+ priv->pending_key[i]);
+ /* 3 = data+metadata+entry */
+ }
+ ret = dict_set_uint64 (xattr_req, AFR_DIRTY,
+ AFR_NUM_CHANGE_LOGS * sizeof(int));
+ if (ret) {
+ gf_msg_debug (this->name, -ret, "failed to set dirty "
+ "query flag");
+ }
+
+ ret = dict_set_int32 (xattr_req, "list-xattr", 1);
+ if (ret) {
+ gf_msg_debug (this->name, -ret,
+ "Unable to set list-xattr in dict ");
+ }
+
+ return ret;
+}
+
+int
+afr_lookup_xattr_req_prepare (afr_local_t *local, xlator_t *this,
+ dict_t *xattr_req, loc_t *loc)
+{
+ int ret = -ENOMEM;
+
+ if (!local->xattr_req)
+ local->xattr_req = dict_new ();
+
+ if (!local->xattr_req)
+ goto out;
+
+ if (xattr_req && (xattr_req != local->xattr_req))
+ dict_copy (xattr_req, local->xattr_req);
+
+ ret = afr_xattr_req_prepare (this, local->xattr_req);
+
+ ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_INODELK_COUNT, 0);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING,
+ -ret, AFR_MSG_DICT_SET_FAILED,
+ "%s: Unable to set dict value for %s",
+ loc->path, GLUSTERFS_INODELK_COUNT);
+ }
+ ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_ENTRYLK_COUNT, 0);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING,
+ -ret, AFR_MSG_DICT_SET_FAILED,
+ "%s: Unable to set dict value for %s",
+ loc->path, GLUSTERFS_ENTRYLK_COUNT);
+ }
+
+ ret = dict_set_uint32 (local->xattr_req, GLUSTERFS_PARENT_ENTRYLK, 0);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING,
+ -ret, AFR_MSG_DICT_SET_FAILED,
+ "%s: Unable to set dict value for %s",
+ loc->path, GLUSTERFS_PARENT_ENTRYLK);
+ }
+
+ ret = dict_set_str (xattr_req, "link-count", GF_XATTROP_INDEX_COUNT);
+ if (ret) {
+ gf_msg_debug (this->name, -ret,
+ "Unable to set link-count in dict ");
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int
+afr_hash_child (afr_read_subvol_args_t *args, int32_t child_count, int hashmode)
+{
+ uuid_t gfid_copy = {0,};
+ pid_t pid;
+
+ if (!hashmode) {
+ return -1;
+ }
+
+ gf_uuid_copy (gfid_copy, args->gfid);
+
+ if ((hashmode > 1) && (args->ia_type != IA_IFDIR)) {
+ /*
+ * Why getpid? Because it's one of the cheapest calls
+ * available - faster than gethostname etc. - and returns a
+ * constant-length value that's sure to be shorter than a UUID.
+ * It's still very unlikely to be the same across clients, so
+ * it still provides good mixing. We're not trying for
+ * perfection here. All we need is a low probability that
+ * multiple clients won't converge on the same subvolume.
+ */
+ pid = getpid();
+ memcpy (gfid_copy, &pid, sizeof(pid));
+ }
+
+ return SuperFastHash((char *)gfid_copy,
+ sizeof(gfid_copy)) % child_count;
+}
+
+
+int
+afr_read_subvol_select_by_policy (inode_t *inode, xlator_t *this,
+ unsigned char *readable,
+ afr_read_subvol_args_t *args)
+{
+ int i = 0;
+ int read_subvol = -1;
+ afr_private_t *priv = NULL;
+ afr_read_subvol_args_t local_args = {0,};
+
+ priv = this->private;
+
+ /* first preference - explicitly specified or local subvolume */
+ if (priv->read_child >= 0 && readable[priv->read_child])
+ return priv->read_child;
+
+ if (inode_is_linked (inode)) {
+ gf_uuid_copy (local_args.gfid, inode->gfid);
+ local_args.ia_type = inode->ia_type;
+ } else if (args) {
+ local_args = *args;
+ }
+
+ /* second preference - use hashed mode */
+ read_subvol = afr_hash_child (&local_args, priv->child_count,
+ priv->hash_mode);
+ if (read_subvol >= 0 && readable[read_subvol])
+ return read_subvol;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (readable[i])
+ return i;
+ }
+
+ /* no readable subvolumes, either split brain or all subvols down */
+
+ return -1;
+}
+
+
+int
+afr_inode_read_subvol_type_get (inode_t *inode, xlator_t *this,
+ unsigned char *readable, int *event_p,
+ int type)
+{
+ int ret = -1;
+
+ if (type == AFR_METADATA_TRANSACTION)
+ ret = afr_inode_read_subvol_get (inode, this, 0, readable,
+ event_p);
+ else
+ ret = afr_inode_read_subvol_get (inode, this, readable, 0,
+ event_p);
+ return ret;
+}
+
+
+int
+afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p,
+ unsigned char *readables,
+ int *event_p, afr_transaction_type type,
+ afr_read_subvol_args_t *args)
+{
+ afr_private_t *priv = NULL;
+ unsigned char *data_readable = NULL;
+ unsigned char *metadata_readable = NULL;
+ unsigned char *readable = NULL;
+ unsigned char *intersection = NULL;
+ int subvol = -1;
+ int event = 0;
+
+ priv = this->private;
+
+ readable = alloca0 (priv->child_count);
+ data_readable = alloca0 (priv->child_count);
+ metadata_readable = alloca0 (priv->child_count);
+ intersection = alloca0 (priv->child_count);
+
+ afr_inode_read_subvol_type_get (inode, this, readable, &event, type);
+
+ afr_inode_read_subvol_get (inode, this, data_readable, metadata_readable,
+ &event);
+
+ AFR_INTERSECT (intersection, data_readable, metadata_readable,
+ priv->child_count);
+
+ if (AFR_COUNT (intersection, priv->child_count) > 0)
+ subvol = afr_read_subvol_select_by_policy (inode, this,
+ intersection, args);
+ else
+ subvol = afr_read_subvol_select_by_policy (inode, this,
+ readable, args);
+ if (subvol_p)
+ *subvol_p = subvol;
+ if (event_p)
+ *event_p = event;
+ if (readables)
+ memcpy (readables, readable,
+ sizeof (*readables) * priv->child_count);
+ return subvol;
+}
+
+
+void
+afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ priv = this->private;
+
+ afr_matrix_cleanup (local->pending, priv->child_count);
+
+ GF_FREE (local->internal_lock.locked_nodes);
+
+ for (i = 0; local->internal_lock.inodelk[i].domain; i++) {
+ GF_FREE (local->internal_lock.inodelk[i].locked_nodes);
+ }
+
+ GF_FREE (local->internal_lock.lower_locked_nodes);
+
+ afr_entry_lockee_cleanup (&local->internal_lock);
+
+ GF_FREE (local->transaction.pre_op);
+
+ GF_FREE (local->transaction.pre_op_sources);
+ if (local->transaction.pre_op_xdata) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.pre_op_xdata[i])
+ continue;
+ dict_unref (local->transaction.pre_op_xdata[i]);
+ }
+ GF_FREE (local->transaction.pre_op_xdata);
+ }
+
+ GF_FREE (local->transaction.eager_lock);
+ GF_FREE (local->transaction.failed_subvols);
+
+ GF_FREE (local->transaction.basename);
+ GF_FREE (local->transaction.new_basename);
+
+ loc_wipe (&local->transaction.parent_loc);
+ loc_wipe (&local->transaction.new_parent_loc);
+
+}
+
+
+void
+afr_replies_wipe (struct afr_reply *replies, int count)
+{
+ int i = 0;
+
+ for (i = 0; i < count; i++) {
+ if (replies[i].xdata) {
+ dict_unref (replies[i].xdata);
+ replies[i].xdata = NULL;
+ }
+
+ if (replies[i].xattr) {
+ dict_unref (replies[i].xattr);
+ replies[i].xattr = NULL;
+ }
+ }
+}
+
+void
+afr_local_replies_wipe (afr_local_t *local, afr_private_t *priv)
+{
+
+ if (!local->replies)
+ return;
+
+ afr_replies_wipe (local->replies, priv->child_count);
+
+ memset (local->replies, 0, sizeof(*local->replies) * priv->child_count);
+}
+
+void
+afr_remove_eager_lock_stub (afr_local_t *local)
+{
+ LOCK (&local->fd->lock);
+ {
+ list_del_init (&local->transaction.eager_locked);
+ }
+ UNLOCK (&local->fd->lock);
+}
+
+void
+afr_local_cleanup (afr_local_t *local, xlator_t *this)
+{
+ afr_private_t * priv = NULL;
+
+ if (!local)
+ return;
+
+ syncbarrier_destroy (&local->barrier);
+
+ if (local->transaction.eager_lock_on &&
+ !list_empty (&local->transaction.eager_locked))
+ afr_remove_eager_lock_stub (local);
+
+ afr_local_transaction_cleanup (local, this);
+
+ priv = this->private;
+
+ loc_wipe (&local->loc);
+ loc_wipe (&local->newloc);
+
+ if (local->fd)
+ fd_unref (local->fd);
+
+ if (local->xattr_req)
+ dict_unref (local->xattr_req);
+
+ if (local->xattr_rsp)
+ dict_unref (local->xattr_rsp);
+
+ if (local->dict)
+ dict_unref (local->dict);
+
+ afr_local_replies_wipe (local, priv);
+ GF_FREE(local->replies);
+
+ GF_FREE (local->child_up);
+
+ GF_FREE (local->read_attempted);
+
+ GF_FREE (local->readable);
+ GF_FREE (local->readable2);
+
+ if (local->inode)
+ inode_unref (local->inode);
+
+ if (local->parent)
+ inode_unref (local->parent);
+
+ if (local->parent2)
+ inode_unref (local->parent2);
+
+ if (local->refreshinode)
+ inode_unref (local->refreshinode);
+
+ { /* getxattr */
+ GF_FREE (local->cont.getxattr.name);
+ }
+
+ { /* lk */
+ GF_FREE (local->cont.lk.locked_nodes);
+ }
+
+ { /* create */
+ if (local->cont.create.fd)
+ fd_unref (local->cont.create.fd);
+ if (local->cont.create.params)
+ dict_unref (local->cont.create.params);
+ }
+
+ { /* mknod */
+ if (local->cont.mknod.params)
+ dict_unref (local->cont.mknod.params);
+ }
+
+ { /* mkdir */
+ if (local->cont.mkdir.params)
+ dict_unref (local->cont.mkdir.params);
+ }
+
+ { /* symlink */
+ if (local->cont.symlink.params)
+ dict_unref (local->cont.symlink.params);
+ }
+
+ { /* writev */
+ GF_FREE (local->cont.writev.vector);
+ if (local->cont.writev.iobref)
+ iobref_unref (local->cont.writev.iobref);
+ }
+
+ { /* setxattr */
+ if (local->cont.setxattr.dict)
+ dict_unref (local->cont.setxattr.dict);
+ }
+
+ { /* fsetxattr */
+ if (local->cont.fsetxattr.dict)
+ dict_unref (local->cont.fsetxattr.dict);
+ }
+
+ { /* removexattr */
+ GF_FREE (local->cont.removexattr.name);
+ }
+ { /* xattrop */
+ if (local->cont.xattrop.xattr)
+ dict_unref (local->cont.xattrop.xattr);
+ }
+ { /* symlink */
+ GF_FREE (local->cont.symlink.linkpath);
+ }
+
+ { /* opendir */
+ GF_FREE (local->cont.opendir.checksum);
+ }
+
+ { /* readdirp */
+ if (local->cont.readdir.dict)
+ dict_unref (local->cont.readdir.dict);
+ }
+
+ { /* inodelk */
+ GF_FREE (local->cont.inodelk.volume);
+ }
+
+ if (local->xdata_req)
+ dict_unref (local->xdata_req);
+
+ if (local->xdata_rsp)
+ dict_unref (local->xdata_rsp);
+}
+
+
+int
+afr_frame_return (call_frame_t *frame)
+{
+ afr_local_t *local = NULL;
+ int call_count = 0;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ call_count = --local->call_count;
+ }
+ UNLOCK (&frame->lock);
+
+ return call_count;
+}
+
+static char *afr_ignore_xattrs[] = {
+ GLUSTERFS_OPEN_FD_COUNT,
+ GLUSTERFS_PARENT_ENTRYLK,
+ GLUSTERFS_ENTRYLK_COUNT,
+ GLUSTERFS_INODELK_COUNT,
+ GF_SELINUX_XATTR_KEY,
+ QUOTA_SIZE_KEY,
+ NULL
+};
+
+gf_boolean_t
+afr_is_xattr_ignorable (char *key)
+{
+ int i = 0;
+
+ if (!strncmp (key, AFR_XATTR_PREFIX, strlen(AFR_XATTR_PREFIX)))
+ return _gf_true;
+ for (i = 0; afr_ignore_xattrs[i]; i++) {
+ if (!strcmp (key, afr_ignore_xattrs[i]))
+ return _gf_true;
+ }
+ return _gf_false;
+}
+
+static gf_boolean_t
+afr_xattr_match (dict_t *this, char *key1, data_t *value1, void *data)
+{
+ if (!afr_is_xattr_ignorable (key1))
+ return _gf_true;
+
+ return _gf_false;
+}
+
+gf_boolean_t
+afr_xattrs_are_equal (dict_t *dict1, dict_t *dict2)
+{
+ return are_dicts_equal (dict1, dict2, afr_xattr_match, NULL);
+}
+
+static int
+afr_get_parent_read_subvol (xlator_t *this, inode_t *parent,
+ struct afr_reply *replies, unsigned char *readable)
+{
+ int i = 0;
+ int par_read_subvol = -1;
+ int par_read_subvol_iter = -1;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ if (parent)
+ par_read_subvol = afr_data_subvol_get (parent, this, NULL, NULL,
+ NULL, NULL);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if (replies[i].op_ret < 0)
+ continue;
+
+ if (par_read_subvol_iter == -1) {
+ par_read_subvol_iter = i;
+ continue;
+ }
+
+ if ((par_read_subvol_iter != par_read_subvol) && readable[i])
+ par_read_subvol_iter = i;
+
+ if (i == par_read_subvol)
+ par_read_subvol_iter = i;
+ }
+ /* At the end of the for-loop, the only reason why @par_read_subvol_iter
+ * could be -1 is when this LOOKUP has failed on all sub-volumes.
+ * So it is okay to send an arbitrary subvolume (0 in this case)
+ * as parent read subvol.
+ */
+ if (par_read_subvol_iter == -1)
+ par_read_subvol_iter = 0;
+
+ return par_read_subvol_iter;
+
+}
+
+int
+afr_read_subvol_decide (inode_t *inode, xlator_t *this,
+ afr_read_subvol_args_t *args)
+{
+ int data_subvol = -1;
+ int mdata_subvol = -1;
+
+ data_subvol = afr_data_subvol_get (inode, this, NULL, NULL, NULL, args);
+ mdata_subvol = afr_metadata_subvol_get (inode, this,
+ NULL, NULL, NULL, args);
+ if (data_subvol == -1 || mdata_subvol == -1)
+ return -1;
+
+ return data_subvol;
+}
+
+static inline int
+afr_first_up_child (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++)
+ if (local->replies[i].valid &&
+ local->replies[i].op_ret == 0)
+ return i;
+ return 0;
+}
+
+static void
+afr_lookup_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = -1;
+ int op_errno = 0;
+ int read_subvol = 0;
+ int par_read_subvol = 0;
+ unsigned char *readable = NULL;
+ int event = 0;
+ struct afr_reply *replies = NULL;
+ uuid_t read_gfid = {0, };
+ gf_boolean_t locked_entry = _gf_false;
+ gf_boolean_t can_interpret = _gf_true;
+ inode_t *parent = NULL;
+ int spb_choice = -1;
+ ia_type_t ia_type = IA_INVAL;
+ afr_read_subvol_args_t args = {0,};
+
+ priv = this->private;
+ local = frame->local;
+ replies = local->replies;
+ parent = local->loc.parent;
+
+ locked_entry = afr_is_possibly_under_txn (AFR_ENTRY_TRANSACTION, local,
+ this);
+
+ readable = alloca0 (priv->child_count);
+
+ afr_inode_read_subvol_get (parent, this, readable, NULL, &event);
+
+ afr_inode_split_brain_choice_get (local->inode, this,
+ &spb_choice);
+ /* First, check if we have a gfid-change from somewhere,
+ If so, propagate that so that a fresh lookup can be
+ issued
+ */
+ if (local->cont.lookup.needs_fresh_lookup) {
+ local->op_ret = -1;
+ local->op_errno = ESTALE;
+ goto unwind;
+ }
+
+ op_errno = afr_final_errno (frame->local, this->private);
+ local->op_errno = op_errno;
+
+ read_subvol = -1;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if (locked_entry && replies[i].op_ret == -1 &&
+ replies[i].op_errno == ENOENT) {
+ /* Second, check entry is still
+ "underway" in creation */
+ local->op_ret = -1;
+ local->op_errno = ENOENT;
+ goto unwind;
+ }
+
+ if (replies[i].op_ret == -1)
+ continue;
+
+ if (read_subvol == -1 || !readable[read_subvol]) {
+ read_subvol = i;
+ gf_uuid_copy (read_gfid, replies[i].poststat.ia_gfid);
+ ia_type = replies[i].poststat.ia_type;
+ local->op_ret = 0;
+ }
+ }
+
+ if (read_subvol == -1)
+ goto unwind;
+ /* We now have a read_subvol, which is readable[] (if there
+ were any). Next we look for GFID mismatches. We don't
+ consider a GFID mismatch as an error if read_subvol is
+ readable[] but the mismatching GFID subvol is not.
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1) {
+ if (priv->child_up[i])
+ can_interpret = _gf_false;
+ continue;
+ }
+
+ if (!gf_uuid_compare (replies[i].poststat.ia_gfid, read_gfid))
+ continue;
+
+ can_interpret = _gf_false;
+
+ if (locked_entry)
+ continue;
+
+ /* Now GFIDs mismatch. It's OK as long as this subvol
+ is not readable[] but read_subvol is */
+ if (readable[read_subvol] && !readable[i])
+ continue;
+
+ /* LOG ERROR */
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto unwind;
+ }
+
+ /* Forth, for the finalized GFID, pick the best subvolume
+ to return stats from.
+ */
+ if (can_interpret) {
+ /* It is safe to call afr_replies_interpret() because we have
+ a response from all the UP subvolumes and all of them resolved
+ to the same GFID
+ */
+ gf_uuid_copy (args.gfid, read_gfid);
+ args.ia_type = ia_type;
+ if (afr_replies_interpret (frame, this, local->inode, NULL)) {
+ read_subvol = afr_read_subvol_decide (local->inode,
+ this, &args);
+ afr_inode_read_subvol_reset (local->inode, this);
+ goto cant_interpret;
+ } else {
+ read_subvol = afr_data_subvol_get (local->inode, this,
+ NULL, NULL, NULL, &args);
+ }
+ } else {
+ cant_interpret:
+ if (read_subvol == -1) {
+ if (spb_choice >= 0)
+ read_subvol = spb_choice;
+ else
+ read_subvol = afr_first_up_child (frame, this);
+ }
+ dict_del (replies[read_subvol].xdata, GF_CONTENT_KEY);
+ }
+
+ afr_handle_quota_size (frame, this);
+
+unwind:
+ afr_set_need_heal (this, local);
+ if (read_subvol == -1) {
+ if (spb_choice >= 0)
+ read_subvol = spb_choice;
+ else
+ read_subvol = afr_first_up_child (frame, this);
+
+ }
+ par_read_subvol = afr_get_parent_read_subvol (this, parent, replies,
+ readable);
+ if (AFR_IS_ARBITER_BRICK (priv, read_subvol) && local->op_ret == 0) {
+ local->op_ret = -1;
+ local->op_errno = ENOTCONN;
+ }
+
+ AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->replies[read_subvol].poststat,
+ local->replies[read_subvol].xdata,
+ &local->replies[par_read_subvol].postparent);
+}
+
+/*
+ * During a lookup, some errors are more "important" than
+ * others in that they must be given higher priority while
+ * returning to the user.
+ *
+ * The hierarchy is ENODATA > ENOENT > ESTALE > others
+ */
+
+int
+afr_higher_errno (int32_t old_errno, int32_t new_errno)
+{
+ if (old_errno == ENODATA || new_errno == ENODATA)
+ return ENODATA;
+ if (old_errno == ENOENT || new_errno == ENOENT)
+ return ENOENT;
+ if (old_errno == ESTALE || new_errno == ESTALE)
+ return ESTALE;
+
+ return new_errno;
+}
+
+
+int
+afr_final_errno (afr_local_t *local, afr_private_t *priv)
+{
+ int i = 0;
+ int op_errno = 0;
+ int tmp_errno = 0;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret >= 0)
+ continue;
+ tmp_errno = local->replies[i].op_errno;
+ op_errno = afr_higher_errno (op_errno, tmp_errno);
+ }
+
+ return op_errno;
+}
+
+static int32_t
+afr_local_discovery_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ int ret = 0;
+ char *pathinfo = NULL;
+ gf_boolean_t is_local = _gf_false;
+ afr_private_t *priv = NULL;
+ int32_t child_index = -1;
+
+ if (op_ret != 0) {
+ goto out;
+ }
+
+ priv = this->private;
+ child_index = (int32_t)(long)cookie;
+
+ ret = dict_get_str (dict, GF_XATTR_PATHINFO_KEY, &pathinfo);
+ if (ret != 0) {
+ goto out;
+ }
+
+ ret = glusterfs_is_local_pathinfo (pathinfo, &is_local);
+ if (ret) {
+ goto out;
+ }
+
+ /*
+ * Note that one local subvolume will override another here. The only
+ * way to avoid that would be to retain extra information about whether
+ * the previous read_child is local, and it's just not worth it. Even
+ * the slowest local subvolume is far preferable to a remote one.
+ */
+ if (is_local) {
+ priv->local[child_index] = 1;
+ /* Don't set arbiter as read child. */
+ if (AFR_IS_ARBITER_BRICK(priv, child_index))
+ goto out;
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ AFR_MSG_LOCAL_CHILD, "selecting local read_child %s",
+ priv->children[child_index]->name);
+
+ priv->read_child = child_index;
+ }
+out:
+ STACK_DESTROY(frame->root);
+ return 0;
+}
+
+static void
+afr_attempt_local_discovery (xlator_t *this, int32_t child_index)
+{
+ call_frame_t *newframe = NULL;
+ loc_t tmploc = {0,};
+ afr_private_t *priv = this->private;
+
+ newframe = create_frame(this,this->ctx->pool);
+ if (!newframe) {
+ return;
+ }
+
+ tmploc.gfid[sizeof(tmploc.gfid)-1] = 1;
+ STACK_WIND_COOKIE (newframe, afr_local_discovery_cbk,
+ (void *)(long)child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->getxattr,
+ &tmploc, GF_XATTR_PATHINFO_KEY, NULL);
+}
+
+int
+afr_lookup_sh_metadata_wrap (void *opaque)
+{
+ call_frame_t *frame = opaque;
+ afr_local_t *local = NULL;
+ xlator_t *this = NULL;
+ inode_t *inode = NULL;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
+ int i= 0, first = -1;
+ int ret = -1;
+ dict_t *dict = NULL;
+
+ local = frame->local;
+ this = frame->this;
+ priv = this->private;
+ replies = local->replies;
+
+ for (i =0; i < priv->child_count; i++) {
+ if(!replies[i].valid || replies[i].op_ret == -1)
+ continue;
+ first = i;
+ break;
+ }
+ if (first == -1)
+ goto out;
+
+ if (afr_selfheal_metadata_by_stbuf (this, &replies[first].poststat))
+ goto out;
+
+ afr_local_replies_wipe (local, this->private);
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+ ret = dict_set_str (dict, "link-count", GF_XATTROP_INDEX_COUNT);
+ if (ret) {
+ gf_msg_debug (this->name, -ret,
+ "Unable to set link-count in dict ");
+ }
+
+ inode = afr_selfheal_unlocked_lookup_on (frame, local->loc.parent,
+ local->loc.name, local->replies,
+ local->child_up, dict);
+ if (inode)
+ inode_unref (inode);
+out:
+ afr_lookup_done (frame, this);
+
+ if (dict)
+ dict_unref (dict);
+
+ return 0;
+}
+
+static gf_boolean_t
+afr_can_start_metadata_self_heal(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
+ int i = 0, first = -1;
+ gf_boolean_t start = _gf_false;
+ struct iatt stbuf = {0, };
+
+ local = frame->local;
+ replies = local->replies;
+ priv = this->private;
+
+ if (!priv->metadata_self_heal)
+ return _gf_false;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if(!replies[i].valid || replies[i].op_ret == -1)
+ continue;
+ if (first == -1) {
+ first = i;
+ stbuf = replies[i].poststat;
+ continue;
+ }
+
+ if (gf_uuid_compare (stbuf.ia_gfid, replies[i].poststat.ia_gfid)) {
+ start = _gf_false;
+ break;
+ }
+ if (!IA_EQUAL (stbuf, replies[i].poststat, type)) {
+ start = _gf_false;
+ break;
+ }
+
+ /*Check if iattrs need heal*/
+ if ((!IA_EQUAL (stbuf, replies[i].poststat, uid)) ||
+ (!IA_EQUAL (stbuf, replies[i].poststat, gid)) ||
+ (!IA_EQUAL (stbuf, replies[i].poststat, prot))) {
+ start = _gf_true;
+ continue;
+ }
+
+ /*Check if xattrs need heal*/
+ if (!afr_xattrs_are_equal (replies[first].xdata,
+ replies[i].xdata))
+ start = _gf_true;
+ }
+
+ return start;
+}
+
+int
+afr_lookup_metadata_heal_check (call_frame_t *frame, xlator_t *this)
+
+{
+ call_frame_t *heal = NULL;
+ int ret = 0;
+
+ if (!afr_can_start_metadata_self_heal (frame, this))
+ goto out;
+
+ heal = copy_frame (frame);
+ if (heal)
+ heal->root->pid = GF_CLIENT_PID_SELF_HEALD;
+ ret = synctask_new (this->ctx->env, afr_lookup_sh_metadata_wrap,
+ afr_refresh_selfheal_done, heal, frame);
+ if(ret)
+ goto out;
+ return ret;
+out:
+ afr_lookup_done (frame, this);
+ return ret;
+}
+
+int
+afr_lookup_selfheal_wrap (void *opaque)
+{
+ int ret = 0;
+ call_frame_t *frame = opaque;
+ afr_local_t *local = NULL;
+ xlator_t *this = NULL;
+ inode_t *inode = NULL;
+
+ local = frame->local;
+ this = frame->this;
+
+ ret = afr_selfheal_name (frame->this, local->loc.pargfid,
+ local->loc.name, &local->cont.lookup.gfid_req);
+ if (ret == -EIO)
+ goto unwind;
+
+ afr_local_replies_wipe (local, this->private);
+
+ inode = afr_selfheal_unlocked_lookup_on (frame, local->loc.parent,
+ local->loc.name, local->replies,
+ local->child_up, NULL);
+ if (inode)
+ inode_unref (inode);
+
+ afr_lookup_metadata_heal_check(frame, this);
+ return 0;
+
+unwind:
+ AFR_STACK_UNWIND (lookup, frame, -1, EIO, NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int
+afr_lookup_entry_heal (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ call_frame_t *heal = NULL;
+ int i = 0, first = -1;
+ gf_boolean_t need_heal = _gf_false;
+ struct afr_reply *replies = NULL;
+ int ret = 0;
+
+ local = frame->local;
+ replies = local->replies;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if ((replies[i].op_ret == -1) &&
+ (replies[i].op_errno == ENODATA))
+ need_heal = _gf_true;
+
+ if (first == -1) {
+ first = i;
+ continue;
+ }
+
+ if (replies[i].op_ret != replies[first].op_ret) {
+ need_heal = _gf_true;
+ break;
+ }
+
+ if (gf_uuid_compare (replies[i].poststat.ia_gfid,
+ replies[first].poststat.ia_gfid)) {
+ need_heal = _gf_true;
+ break;
+ }
+ }
+
+ if (need_heal) {
+ heal = copy_frame (frame);
+ if (heal)
+ heal->root->pid = GF_CLIENT_PID_SELF_HEALD;
+ ret = synctask_new (this->ctx->env, afr_lookup_selfheal_wrap,
+ afr_refresh_selfheal_done, heal, frame);
+ if (ret)
+ goto metadata_heal;
+ return ret;
+ }
+metadata_heal:
+ ret = afr_lookup_metadata_heal_check (frame, this);
+
+ return ret;
+}
+
+
+int
+afr_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode, struct iatt *buf,
+ dict_t *xdata, struct iatt *postparent)
+{
+ afr_local_t * local = NULL;
+ int call_count = -1;
+ int child_index = -1;
+ GF_UNUSED int ret = 0;
+ int8_t need_heal = 1;
+
+ child_index = (long) cookie;
+
+ local = frame->local;
+
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+ /*
+ * On revalidate lookup if the gfid-changed, afr should unwind the fop
+ * with ESTALE so that a fresh lookup will be sent by the top xlator.
+ * So remember it.
+ */
+ if (xdata && dict_get (xdata, "gfid-changed"))
+ local->cont.lookup.needs_fresh_lookup = _gf_true;
+
+ if (xdata) {
+ ret = dict_get_int8 (xdata, "link-count", &need_heal);
+ local->replies[child_index].need_heal = need_heal;
+ } else {
+ local->replies[child_index].need_heal = need_heal;
+ }
+ if (op_ret != -1) {
+ local->replies[child_index].poststat = *buf;
+ local->replies[child_index].postparent = *postparent;
+ if (xdata)
+ local->replies[child_index].xdata = dict_ref (xdata);
+ }
+
+ call_count = afr_frame_return (frame);
+ if (call_count == 0) {
+ afr_set_need_heal (this, local);
+ afr_lookup_entry_heal (frame, this);
+ }
+
+ return 0;
+}
+
+
+
+static void
+afr_discover_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = -1;
+ int op_errno = 0;
+ int spb_choice = -1;
+ int read_subvol = -1;
+
+ priv = this->private;
+ local = frame->local;
+
+ afr_inode_split_brain_choice_get (local->inode, this,
+ &spb_choice);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret == 0)
+ local->op_ret = 0;
+ }
+
+ op_errno = afr_final_errno (frame->local, this->private);
+
+ if (local->op_ret < 0) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ goto unwind;
+ }
+
+ afr_replies_interpret (frame, this, local->inode, NULL);
+
+ read_subvol = afr_read_subvol_decide (local->inode, this, NULL);
+ if (read_subvol == -1) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_READ_SUBVOL_ERROR, "no read subvols for %s",
+ local->loc.path);
+
+ if (spb_choice >= 0) {
+ read_subvol = spb_choice;
+ } else {
+ read_subvol = afr_first_up_child (frame, this);
+ }
+ }
+
+unwind:
+ if (read_subvol == -1) {
+ if (spb_choice >= 0)
+ read_subvol = spb_choice;
+ else
+ read_subvol = afr_first_up_child (frame, this);
+ }
+ if (AFR_IS_ARBITER_BRICK (priv, read_subvol) && local->op_ret == 0) {
+ local->op_ret = -1;
+ local->op_errno = ENOTCONN;
+ }
+
+ AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->replies[read_subvol].poststat,
+ local->replies[read_subvol].xdata,
+ &local->replies[read_subvol].postparent);
+}
+
+
+int
+afr_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode, struct iatt *buf,
+ dict_t *xdata, struct iatt *postparent)
+{
+ afr_local_t * local = NULL;
+ int call_count = -1;
+ int child_index = -1;
+ GF_UNUSED int ret = 0;
+ int8_t need_heal = 1;
+
+ child_index = (long) cookie;
+
+ local = frame->local;
+
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+ if (op_ret != -1) {
+ local->replies[child_index].poststat = *buf;
+ local->replies[child_index].postparent = *postparent;
+ if (xdata)
+ local->replies[child_index].xdata = dict_ref (xdata);
+ }
+
+ if (local->do_discovery && (op_ret == 0))
+ afr_attempt_local_discovery (this, child_index);
+
+ if (xdata) {
+ ret = dict_get_int8 (xdata, "link-count", &need_heal);
+ local->replies[child_index].need_heal = need_heal;
+ } else {
+ local->replies[child_index].need_heal = need_heal;
+ }
+
+ call_count = afr_frame_return (frame);
+ if (call_count == 0) {
+ afr_set_need_heal (this, local);
+ afr_discover_done (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+afr_discover_do (call_frame_t *frame, xlator_t *this, int err)
+{
+ int ret = 0;
+ int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (err) {
+ local->op_errno = -err;
+ ret = -1;
+ goto out;
+ }
+
+ call_count = local->call_count = AFR_COUNT (local->child_up,
+ priv->child_count);
+
+ ret = afr_lookup_xattr_req_prepare (local, this, local->xattr_req,
+ &local->loc);
+ if (ret) {
+ local->op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_discover_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->lookup,
+ &local->loc, local->xattr_req);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+out:
+ AFR_STACK_UNWIND (lookup, frame, -1, local->op_errno, 0, 0, 0, 0);
+ return 0;
+}
+
+
+int
+afr_discover (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
+{
+ int op_errno = ENOMEM;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int event = 0;
+
+ priv = this->private;
+
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
+
+ if (!local->call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ if (__is_root_gfid (loc->inode->gfid)) {
+ if (!this->itable)
+ this->itable = loc->inode->table;
+ if (!priv->root_inode)
+ priv->root_inode = inode_ref (loc->inode);
+
+ if (priv->choose_local && !priv->did_discovery) {
+ /* Logic to detect which subvolumes of AFR are
+ local, in order to prefer them for reads
+ */
+ local->do_discovery = _gf_true;
+ priv->did_discovery = _gf_true;
+ }
+ }
+
+ local->op = GF_FOP_LOOKUP;
+
+ loc_copy (&local->loc, loc);
+
+ local->inode = inode_ref (loc->inode);
+
+ if (xattr_req)
+ /* If xattr_req was null, afr_lookup_xattr_req_prepare() will
+ allocate one for us */
+ local->xattr_req = dict_ref (xattr_req);
+
+ if (gf_uuid_is_null (loc->inode->gfid)) {
+ afr_discover_do (frame, this, 0);
+ return 0;
+ }
+
+ afr_read_subvol_get (loc->inode, this, NULL, NULL, &event,
+ AFR_DATA_TRANSACTION, NULL);
+
+ if (event != local->event_generation)
+ afr_inode_refresh (frame, this, loc->inode, NULL,
+ afr_discover_do);
+ else
+ afr_discover_do (frame, this, 0);
+
+ return 0;
+out:
+ AFR_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int
+afr_lookup_do (call_frame_t *frame, xlator_t *this, int err)
+{
+ int ret = 0;
+ int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (err < 0) {
+ local->op_errno = -err;
+ ret = -1;
+ goto out;
+ }
+
+ call_count = local->call_count = AFR_COUNT (local->child_up,
+ priv->child_count);
+
+ ret = afr_lookup_xattr_req_prepare (local, this, local->xattr_req,
+ &local->loc);
+ if (ret) {
+ local->op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_lookup_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->lookup,
+ &local->loc, local->xattr_req);
+ if (!--call_count)
+ break;
+ }
+ }
+ return 0;
+out:
+ AFR_STACK_UNWIND (lookup, frame, -1, local->op_errno, 0, 0, 0, 0);
+ return 0;
+}
+
+/*
+ * afr_lookup()
+ *
+ * The goal here is to figure out what the element getting looked up is.
+ * i.e what is the GFID, inode type and a conservative estimate of the
+ * inode attributes are.
+ *
+ * As we lookup, operations may be underway on the entry name and the
+ * inode. In lookup() we are primarily concerned only with the entry
+ * operations. If the entry is getting unlinked or renamed, we detect
+ * what operation is underway by querying for on-going transactions and
+ * pending self-healing on the entry through xdata.
+ *
+ * If the entry is a file/dir, it may need self-heal and/or in a
+ * split-brain condition. Lookup is not the place to worry about these
+ * conditions. Outcast marking will naturally handle them in the read
+ * paths.
+ *
+ * Here is a brief goal of what we are trying to achieve:
+ *
+ * - LOOKUP on all subvolumes concurrently, querying on-going transaction
+ * and pending self-heal info from the servers.
+ *
+ * - If all servers reply the same inode type and GFID, the overall call
+ * MUST be a success.
+ *
+ * - If inode types or GFIDs mismatch, and there IS either an on-going
+ * transaction or pending self-heal, inspect what the nature of the
+ * transaction or pending heal is, and select the appropriate subvolume's
+ * reply as the winner.
+ *
+ * - If inode types or GFIDs mismatch, and there are no on-going transactions
+ * or pending self-heal on the entry name on any of the servers, fail the
+ * lookup with EIO. Something has gone wrong beyond reasonable action.
+ */
+
+int
+afr_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
+{
+ afr_local_t *local = NULL;
+ int32_t op_errno = 0;
+ int event = 0;
+ void *gfid_req = NULL;
+ int ret = 0;
+
+ if (!loc->parent && gf_uuid_is_null (loc->pargfid)) {
+ if (xattr_req)
+ dict_del (xattr_req, "gfid-req");
+ afr_discover (frame, this, loc, xattr_req);
+ return 0;
+ }
+
+ if (__is_root_gfid (loc->parent->gfid)) {
+ if (!strcmp (loc->name, GF_REPLICATE_TRASH_DIR)) {
+ op_errno = EPERM;
+ goto out;
+ }
+ }
+
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
+
+ if (!local->call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ local->op = GF_FOP_LOOKUP;
+
+ loc_copy (&local->loc, loc);
+
+ local->inode = inode_ref (loc->inode);
+
+ if (xattr_req) {
+ /* If xattr_req was null, afr_lookup_xattr_req_prepare() will
+ allocate one for us */
+ local->xattr_req = dict_copy_with_ref (xattr_req, NULL);
+ if (!local->xattr_req) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ ret = dict_get_ptr (local->xattr_req, "gfid-req", &gfid_req);
+ if (ret == 0) {
+ gf_uuid_copy (local->cont.lookup.gfid_req, gfid_req);
+ dict_del (local->xattr_req, "gfid-req");
+ }
+ }
+
+ afr_read_subvol_get (loc->parent, this, NULL, NULL, &event,
+ AFR_DATA_TRANSACTION, NULL);
+
+ if (event != local->event_generation)
+ afr_inode_refresh (frame, this, loc->parent, NULL,
+ afr_lookup_do);
+ else
+ afr_lookup_do (frame, this, 0);
+
+ return 0;
+out:
+ AFR_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+/* {{{ open */
+
+afr_fd_ctx_t *
+__afr_fd_ctx_get (fd_t *fd, xlator_t *this)
+{
+ uint64_t ctx = 0;
+ int ret = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ ret = __fd_ctx_get (fd, this, &ctx);
+
+ if (ret < 0) {
+ ret = __afr_fd_ctx_set (this, fd);
+ if (ret < 0)
+ goto out;
+
+ ret = __fd_ctx_get (fd, this, &ctx);
+ if (ret < 0)
+ goto out;
+ }
+
+ fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+out:
+ return fd_ctx;
+}
+
+
+afr_fd_ctx_t *
+afr_fd_ctx_get (fd_t *fd, xlator_t *this)
+{
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ LOCK(&fd->lock);
+ {
+ fd_ctx = __afr_fd_ctx_get (fd, this);
+ }
+ UNLOCK(&fd->lock);
+
+ return fd_ctx;
+}
+
+
+int
+__afr_fd_ctx_set (xlator_t *this, fd_t *fd)
+{
+ afr_private_t * priv = NULL;
+ int ret = -1;
+ uint64_t ctx = 0;
+ afr_fd_ctx_t * fd_ctx = NULL;
+ int i = 0;
+
+ VALIDATE_OR_GOTO (this->private, out);
+ VALIDATE_OR_GOTO (fd, out);
+
+ priv = this->private;
+
+ ret = __fd_ctx_get (fd, this, &ctx);
+
+ if (ret == 0)
+ goto out;
+
+ fd_ctx = GF_CALLOC (1, sizeof (afr_fd_ctx_t),
+ gf_afr_mt_afr_fd_ctx_t);
+ if (!fd_ctx) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ for (i = 0; i < AFR_NUM_CHANGE_LOGS; i++) {
+ fd_ctx->pre_op_done[i] = GF_CALLOC (sizeof (*fd_ctx->pre_op_done[i]),
+ priv->child_count,
+ gf_afr_mt_int32_t);
+ if (!fd_ctx->pre_op_done[i]) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+
+ fd_ctx->opened_on = GF_CALLOC (sizeof (*fd_ctx->opened_on),
+ priv->child_count,
+ gf_afr_mt_int32_t);
+ if (!fd_ctx->opened_on) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (fd_is_anonymous (fd))
+ fd_ctx->opened_on[i] = AFR_FD_OPENED;
+ else
+ fd_ctx->opened_on[i] = AFR_FD_NOT_OPENED;
+ }
+
+ fd_ctx->lock_piggyback = GF_CALLOC (sizeof (*fd_ctx->lock_piggyback),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!fd_ctx->lock_piggyback) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ fd_ctx->lock_acquired = GF_CALLOC (sizeof (*fd_ctx->lock_acquired),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!fd_ctx->lock_acquired) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ fd_ctx->readdir_subvol = -1;
+
+ pthread_mutex_init (&fd_ctx->delay_lock, NULL);
+
+ INIT_LIST_HEAD (&fd_ctx->eager_locked);
+
+ ret = __fd_ctx_set (fd, this, (uint64_t)(long) fd_ctx);
+ if (ret)
+ gf_msg_debug (this->name, 0,
+ "failed to set fd ctx (%p)", fd);
+out:
+ return ret;
+}
+
+
+int
+afr_fd_ctx_set (xlator_t *this, fd_t *fd)
+{
+ int ret = -1;
+
+ LOCK (&fd->lock);
+ {
+ ret = __afr_fd_ctx_set (this, fd);
+ }
+ UNLOCK (&fd->lock);
+
+ return ret;
+}
+
+/* {{{ flush */
+
+int
+afr_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret != -1) {
+ local->op_ret = op_ret;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref (xdata);
+ } else {
+ local->op_errno = op_errno;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (flush, frame, local->op_ret,
+ local->op_errno, local->xdata_rsp);
+
+ return 0;
+}
+
+static int
+afr_flush_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+
+ priv = this->private;
+ local = frame->local;
+ call_count = local->call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_flush_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->flush,
+ local->fd, xdata);
+ if (!--call_count)
+ break;
+
+ }
+ }
+
+ return 0;
+}
+
+int
+afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+ int op_errno = ENOMEM;
+
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
+
+ if (!local->call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ local->fd = fd_ref(fd);
+
+ stub = fop_flush_stub (frame, afr_flush_wrapper, fd, xdata);
+ if (!stub)
+ goto out;
+
+ afr_delayed_changelog_wake_resume (this, fd, stub);
+
+ return 0;
+out:
+ AFR_STACK_UNWIND (flush, frame, -1, op_errno, NULL);
+ return 0;
+}
+
+/* }}} */
+
+
+int
+afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd)
+{
+ uint64_t ctx = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int ret = 0;
+ int i = 0;
+
+ ret = fd_ctx_get (fd, this, &ctx);
+ if (ret < 0)
+ goto out;
+
+ fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+
+ if (fd_ctx) {
+ //no need to take any locks
+ if (!list_empty (&fd_ctx->eager_locked))
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_INVALID_DATA, "%s: Stale "
+ "Eager-lock stubs found",
+ uuid_utoa (fd->inode->gfid));
+
+ for (i = 0; i < AFR_NUM_CHANGE_LOGS; i++)
+ GF_FREE (fd_ctx->pre_op_done[i]);
+
+ GF_FREE (fd_ctx->opened_on);
+
+ GF_FREE (fd_ctx->lock_piggyback);
+
+ GF_FREE (fd_ctx->lock_acquired);
+
+ pthread_mutex_destroy (&fd_ctx->delay_lock);
+
+ GF_FREE (fd_ctx);
+ }
+
+out:
+ return 0;
+}
+
+
+int
+afr_release (xlator_t *this, fd_t *fd)
+{
+ afr_cleanup_fd_ctx (this, fd);
+
+ return 0;
+}
+
+
+/* {{{ fsync */
+
+int
+afr_fsync_unwind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ AFR_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+int
+afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int call_count = -1;
+ int child_index = (long) cookie;
+ int read_subvol = 0;
+ call_stub_t *stub = NULL;
+
+ local = frame->local;
+
+ read_subvol = afr_data_subvol_get (local->inode, this, NULL, NULL,
+ NULL, NULL);
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0) {
+ if (local->op_ret == -1) {
+ local->op_ret = 0;
+
+ local->cont.inode_wfop.prebuf = *prebuf;
+ local->cont.inode_wfop.postbuf = *postbuf;
+
+ if (xdata)
+ local->xdata_rsp = dict_ref (xdata);
+ }
+
+ if (child_index == read_subvol) {
+ local->cont.inode_wfop.prebuf = *prebuf;
+ local->cont.inode_wfop.postbuf = *postbuf;
+ if (xdata) {
+ if (local->xdata_rsp)
+ dict_unref (local->xdata_rsp);
+ local->xdata_rsp = dict_ref (xdata);
+ }
+ }
+ } else {
+ local->op_errno = op_errno;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ /* Make a stub out of the frame, and register it
+ with the waking up post-op. When the call-stub resumes,
+ we are guaranteed that there was no post-op pending
+ (i.e changelogs were unset in the server). This is an
+ essential "guarantee", that fsync() returns only after
+ completely finishing EVERYTHING, including the delayed
+ post-op. This guarantee is expected by FUSE graph switching
+ for example.
+ */
+ stub = fop_fsync_cbk_stub (frame, afr_fsync_unwind_cbk,
+ local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ local->xdata_rsp);
+ if (!stub) {
+ AFR_STACK_UNWIND (fsync, frame, -1, ENOMEM, 0, 0, 0);
+ return 0;
+ }
+
+ /* If no new unstable writes happened between the
+ time we cleared the unstable write witness flag in afr_fsync
+ and now, calling afr_delayed_changelog_wake_up() should
+ wake up and skip over the fsync phase and go straight to
+ afr_changelog_post_op_now()
+ */
+ afr_delayed_changelog_wake_resume (this, local->fd, stub);
+ }
+
+ return 0;
+}
+
+
+int
+afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_errno = ENOMEM;
+
+ priv = this->private;
+
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
+
+ call_count = local->call_count;
+ if (!call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ local->fd = fd_ref (fd);
+
+ if (afr_fd_has_witnessed_unstable_write (this, fd)) {
+ /* don't care. we only wanted to CLEAR the bit */
+ }
+
+ local->inode = inode_ref (fd->inode);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_fsync_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fsync,
+ fd, datasync, xdata);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+out:
+ AFR_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+/* }}} */
+
+/* {{{ fsync */
+
+int
+afr_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0) {
+ local->op_ret = 0;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref (xdata);
+ } else {
+ local->op_errno = op_errno;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (fsyncdir, frame, local->op_ret,
+ local->op_errno, local->xdata_rsp);
+
+ return 0;
+}
+
+
+int
+afr_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_errno = ENOMEM;
+
+ priv = this->private;
+
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
+
+ call_count = local->call_count;
+ if (!call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_fsyncdir_cbk,
+ priv->children[i],
+ priv->children[i]->fops->fsyncdir,
+ fd, datasync, xdata);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+out:
+ AFR_STACK_UNWIND (fsyncdir, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+/* }}} */
+
+int32_t
+afr_unlock_partial_inodelk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int child_index = (long)cookie;
+ uuid_t gfid = {0};
+
+ local = frame->local;
+ priv = this->private;
+
+ if (op_ret < 0 && op_errno != ENOTCONN) {
+ loc_gfid (&local->loc, gfid);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_INODE_UNLOCK_FAIL,
+ "%s: Failed to unlock %s "
+ "with lk_owner: %s (%s)", uuid_utoa (gfid),
+ priv->children[child_index]->name,
+ lkowner_utoa (&frame->root->lk_owner),
+ strerror (op_errno));
+ }
+
+ call_count = afr_frame_return (frame);
+ if (call_count == 0) {
+ AFR_STACK_UNWIND (inodelk, frame, local->op_ret,
+ local->op_errno, local->xdata_rsp);
+ }
+
+ return 0;
+}
+
+int32_t
+afr_unlock_inodelks_and_unwind (call_frame_t *frame, xlator_t *this,
+ int call_count)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+ priv = this->private;
+ local->call_count = call_count;
+ local->cont.inodelk.flock.l_type = F_UNLCK;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+
+ if (local->replies[i].op_ret == -1)
+ continue;
+
+ STACK_WIND_COOKIE (frame, afr_unlock_partial_inodelk_cbk,
+ (void*) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->inodelk,
+ local->cont.inodelk.volume,
+ &local->loc, local->cont.inodelk.cmd,
+ &local->cont.inodelk.flock, 0);
+
+ if (!--call_count)
+ break;
+ }
+
+ return 0;
+}
+
+int32_t
+afr_inodelk_done (call_frame_t *frame, xlator_t *this)
+{
+ int i = 0;
+ int lock_count = 0;
+
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+
+ if (local->replies[i].op_ret == 0)
+ lock_count++;
+
+ if (local->op_ret == -1 && local->op_errno == EAGAIN)
+ continue;
+
+ if ((local->replies[i].op_ret == -1) &&
+ (local->replies[i].op_errno == EAGAIN)) {
+ local->op_ret = -1;
+ local->op_errno = EAGAIN;
+ continue;
+ }
+
+ if (local->replies[i].op_ret == 0)
+ local->op_ret = 0;
+
+ local->op_errno = local->replies[i].op_errno;
+ }
+
+ if (lock_count && local->cont.inodelk.flock.l_type != F_UNLCK &&
+ (local->op_ret == -1 && local->op_errno == EAGAIN)) {
+ afr_unlock_inodelks_and_unwind (frame, this,
+ lock_count);
+ } else {
+ AFR_STACK_UNWIND (inodelk, frame, local->op_ret,
+ local->op_errno, local->xdata_rsp);
+ }
+
+ return 0;
+}
+
+int
+afr_common_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int child_index = (long)cookie;
+
+ local = frame->local;
+
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+ if (op_ret == 0 && xdata) {
+ local->replies[child_index].xdata = dict_ref (xdata);
+ LOCK (&frame->lock);
+ {
+ if (!local->xdata_rsp)
+ local->xdata_rsp = dict_ref (xdata);
+ }
+ UNLOCK (&frame->lock);
+ }
+ return 0;
+}
+
+static int32_t
+afr_parallel_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+
+{
+ int call_count = 0;
+
+ afr_common_inodelk_cbk (frame, cookie, this, op_ret, op_errno, xdata);
+
+ call_count = afr_frame_return (frame);
+ if (call_count == 0)
+ afr_inodelk_done (frame, this);
+
+ return 0;
+}
+
+static gf_boolean_t
+afr_is_conflicting_lock_present (int32_t op_ret, int32_t op_errno)
+{
+ if (op_ret == -1 && op_errno == EAGAIN)
+ return _gf_true;
+ return _gf_false;
+}
+
+static int32_t
+afr_serialized_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int child_index = (long)cookie;
+ int next_child = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ afr_common_inodelk_cbk (frame, cookie, this, op_ret, op_errno, xdata);
+
+ for (next_child = child_index + 1; next_child < priv->child_count;
+ next_child++) {
+ if (local->child_up[next_child])
+ break;
+ }
+
+ if (afr_is_conflicting_lock_present (op_ret, op_errno) ||
+ (next_child == priv->child_count)) {
+ afr_inodelk_done (frame, this);
+ } else {
+ STACK_WIND_COOKIE (frame, afr_serialized_inodelk_cbk,
+ (void *) (long) next_child,
+ priv->children[next_child],
+ priv->children[next_child]->fops->inodelk,
+ (const char *)local->cont.inodelk.volume,
+ &local->loc, local->cont.inodelk.cmd,
+ &local->cont.inodelk.flock,
+ local->xdata_req);
+ }
+
+ return 0;
+}
+
+static int
+afr_parallel_inodelk_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int call_count = 0;
+ int i = 0;
+
+ priv = this->private;
+ local = frame->local;
+ call_count = local->call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->child_up[i])
+ continue;
+ STACK_WIND_COOKIE (frame, afr_parallel_inodelk_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->inodelk,
+ (const char *)local->cont.inodelk.volume,
+ &local->loc, local->cont.inodelk.cmd,
+ &local->cont.inodelk.flock,
+ local->xdata_req);
+ if (!--call_count)
+ break;
+ }
+ return 0;
+}
+
+static int
+afr_serialized_inodelk_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+
+ priv = this->private;
+ local = frame->local;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_serialized_inodelk_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->inodelk,
+ (const char *)local->cont.inodelk.volume,
+ &local->loc, local->cont.inodelk.cmd,
+ &local->cont.inodelk.flock,
+ local->xdata_req);
+ break;
+ }
+ }
+ return 0;
+}
+
+int32_t
+afr_inodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t op_errno = ENOMEM;
+
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
+
+ loc_copy (&local->loc, loc);
+ local->cont.inodelk.volume = gf_strdup (volume);
+ if (!local->cont.inodelk.volume) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->cont.inodelk.cmd = cmd;
+ local->cont.inodelk.flock = *flock;
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
+
+ /* At least one child is up */
+ /*
+ * Non-blocking locks also need to be serialized. Otherwise there is
+ * a chance that both the mounts which issued same non-blocking inodelk
+ * may endup not acquiring the lock on any-brick.
+ * Ex: Mount1 and Mount2
+ * request for full length lock on file f1. Mount1 afr may acquire the
+ * partial lock on brick-1 and may not acquire the lock on brick-2
+ * because Mount2 already got the lock on brick-2, vice versa. Since
+ * both the mounts only got partial locks, afr treats them as failure in
+ * gaining the locks and unwinds with EAGAIN errno.
+ */
+ if (flock->l_type == F_UNLCK) {
+ afr_parallel_inodelk_wind (frame, this);
+ } else {
+ afr_serialized_inodelk_wind (frame, this);
+ }
+
+ return 0;
+out:
+ AFR_STACK_UNWIND (inodelk, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int32_t
+afr_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+
+{
+ afr_local_t *local = NULL;
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0)
+ local->op_ret = 0;
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (finodelk, frame, local->op_ret,
+ local->op_errno, xdata);
+
+ return 0;
+}
+
+
+int32_t
+afr_finodelk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ int32_t cmd, struct gf_flock *flock, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_errno = ENOMEM;
+
+ priv = this->private;
+
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
+
+ call_count = local->call_count;
+ if (!call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_finodelk_cbk,
+ priv->children[i],
+ priv->children[i]->fops->finodelk,
+ volume, fd, cmd, flock, xdata);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+out:
+ AFR_STACK_UNWIND (finodelk, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int32_t
+afr_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0)
+ local->op_ret = 0;
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (entrylk, frame, local->op_ret,
+ local->op_errno, xdata);
+
+ return 0;
+}
+
+
+int
+afr_entrylk (call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_errno = 0;
+
+ priv = this->private;
+
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
+
+ call_count = local->call_count;
+ if (!call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_entrylk_cbk,
+ priv->children[i],
+ priv->children[i]->fops->entrylk,
+ volume, loc, basename, cmd, type, xdata);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+out:
+ AFR_STACK_UNWIND (entrylk, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+
+int
+afr_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+
+{
+ afr_local_t *local = NULL;
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0)
+ local->op_ret = 0;
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (fentrylk, frame, local->op_ret,
+ local->op_errno, xdata);
+
+ return 0;
+}
+
+
+int
+afr_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_errno = ENOMEM;
+
+ priv = this->private;
+
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
+
+ call_count = local->call_count;
+ if (!call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_fentrylk_cbk,
+ priv->children[i],
+ priv->children[i]->fops->fentrylk,
+ volume, fd, basename, cmd, type, xdata);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+out:
+ AFR_STACK_UNWIND (fentrylk, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+afr_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct statvfs *statvfs, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int call_count = 0;
+ struct statvfs *buf = NULL;
+
+ LOCK (&frame->lock);
+ {
+ local = frame->local;
+
+ if (op_ret != 0) {
+ local->op_errno = op_errno;
+ goto unlock;
+ }
+
+ local->op_ret = op_ret;
+
+ buf = &local->cont.statfs.buf;
+ if (local->cont.statfs.buf_set) {
+ if (statvfs->f_bavail < buf->f_bavail) {
+ *buf = *statvfs;
+ if (xdata) {
+ if (local->xdata_rsp)
+ dict_unref (local->xdata_rsp);
+ local->xdata_rsp = dict_ref (xdata);
+ }
+ }
+ } else {
+ *buf = *statvfs;
+ local->cont.statfs.buf_set = 1;
+ if (xdata)
+ local->xdata_rsp = dict_ref (xdata);
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (statfs, frame, local->op_ret, local->op_errno,
+ &local->cont.statfs.buf, local->xdata_rsp);
+
+ return 0;
+}
+
+
+int
+afr_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ afr_local_t * local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int call_count = 0;
+ int32_t op_errno = ENOMEM;
+
+ priv = this->private;
+
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
+
+ if (priv->arbiter_count == 1 && local->child_up[ARBITER_BRICK_INDEX])
+ local->call_count--;
+ call_count = local->call_count;
+ if (!call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ if (AFR_IS_ARBITER_BRICK(priv, i))
+ continue;
+ STACK_WIND (frame, afr_statfs_cbk,
+ priv->children[i],
+ priv->children[i]->fops->statfs,
+ loc, xdata);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+out:
+ AFR_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+afr_lk_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ afr_local_t * local = NULL;
+ int call_count = -1;
+
+ local = frame->local;
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
+ lock, xdata);
+
+ return 0;
+}
+
+
+int32_t
+afr_lk_unlock (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ int i = 0;
+ int call_count = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_locked_nodes_count (local->cont.lk.locked_nodes,
+ priv->child_count);
+
+ if (call_count == 0) {
+ AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
+ &local->cont.lk.ret_flock, NULL);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ local->cont.lk.user_flock.l_type = F_UNLCK;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->cont.lk.locked_nodes[i]) {
+ STACK_WIND (frame, afr_lk_unlock_cbk,
+ priv->children[i],
+ priv->children[i]->fops->lk,
+ local->fd, F_SETLK,
+ &local->cont.lk.user_flock, NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int32_t
+afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int child_index = -1;
+/* int ret = 0; */
+
+
+ local = frame->local;
+ priv = this->private;
+
+ child_index = (long) cookie;
+
+ if (!child_went_down (op_ret, op_errno) && (op_ret == -1)) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+
+ afr_lk_unlock (frame, this);
+ return 0;
+ }
+
+ if (op_ret == 0) {
+ local->op_ret = 0;
+ local->op_errno = 0;
+ local->cont.lk.locked_nodes[child_index] = 1;
+ local->cont.lk.ret_flock = *lock;
+ }
+
+ child_index++;
+
+ if (child_index < priv->child_count) {
+ STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->lk,
+ local->fd, local->cont.lk.cmd,
+ &local->cont.lk.user_flock, xdata);
+ } else if (local->op_ret == -1) {
+ /* all nodes have gone down */
+
+ AFR_STACK_UNWIND (lk, frame, -1, ENOTCONN,
+ &local->cont.lk.ret_flock, NULL);
+ } else {
+ AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
+ &local->cont.lk.ret_flock, NULL);
+ }
+
+ return 0;
+}
+
+
+int
+afr_lk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+ int32_t op_errno = ENOMEM;
+
+ priv = this->private;
+
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->cont.lk.locked_nodes = GF_CALLOC (priv->child_count,
+ sizeof (*local->cont.lk.locked_nodes),
+ gf_afr_mt_char);
+
+ if (!local->cont.lk.locked_nodes) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->fd = fd_ref (fd);
+ local->cont.lk.cmd = cmd;
+ local->cont.lk.user_flock = *flock;
+ local->cont.lk.ret_flock = *flock;
+
+ STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) 0,
+ priv->children[i],
+ priv->children[i]->fops->lk,
+ fd, cmd, flock, xdata);
+
+ return 0;
+out:
+ AFR_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int
+afr_forget (xlator_t *this, inode_t *inode)
+{
+ uint64_t ctx_int = 0;
+ afr_inode_ctx_t *ctx = NULL;
+
+ afr_spb_choice_timeout_cancel (this, inode);
+ inode_ctx_del (inode, this, &ctx_int);
+ if (!ctx_int)
+ return 0;
+
+ ctx = (afr_inode_ctx_t *)ctx_int;
+ GF_FREE (ctx);
+ return 0;
+}
+
+int
+afr_priv_dump (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i = 0;
+
+
+ GF_ASSERT (this);
+ priv = this->private;
+
+ GF_ASSERT (priv);
+ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
+ gf_proc_dump_add_section(key_prefix);
+ gf_proc_dump_write("child_count", "%u", priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ sprintf (key, "child_up[%d]", i);
+ gf_proc_dump_write(key, "%d", priv->child_up[i]);
+ sprintf (key, "pending_key[%d]", i);
+ gf_proc_dump_write(key, "%s", priv->pending_key[i]);
+ }
+ gf_proc_dump_write("data_self_heal", "%s", priv->data_self_heal);
+ gf_proc_dump_write("metadata_self_heal", "%d", priv->metadata_self_heal);
+ gf_proc_dump_write("entry_self_heal", "%d", priv->entry_self_heal);
+ gf_proc_dump_write("data_change_log", "%d", priv->data_change_log);
+ gf_proc_dump_write("metadata_change_log", "%d", priv->metadata_change_log);
+ gf_proc_dump_write("entry-change_log", "%d", priv->entry_change_log);
+ gf_proc_dump_write("read_child", "%d", priv->read_child);
+ gf_proc_dump_write("favorite_child", "%d", priv->favorite_child);
+ gf_proc_dump_write("wait_count", "%u", priv->wait_count);
+ gf_proc_dump_write("quorum-reads", "%d", priv->quorum_reads);
+ gf_proc_dump_write("heal-wait-queue-length", "%d",
+ priv->heal_wait_qlen);
+ gf_proc_dump_write("heal-waiters", "%d", priv->heal_waiters);
+ gf_proc_dump_write("background-self-heal-count", "%d",
+ priv->background_self_heal_count);
+ gf_proc_dump_write("healers", "%d", priv->healers);
+
+ return 0;
+}
+
+
+/**
+ * find_child_index - find the child's index in the array of subvolumes
+ * @this: AFR
+ * @child: child
+ */
+
+static int
+find_child_index (xlator_t *this, xlator_t *child)
+{
+ afr_private_t *priv = NULL;
+ int i = -1;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if ((xlator_t *) child == priv->children[i])
+ break;
+ }
+
+ return i;
+}
+
+static int
+__afr_get_up_children_count (afr_private_t *priv)
+{
+ int up_children = 0;
+ int i = 0;
+
+ for (i = 0; i < priv->child_count; i++)
+ if (priv->child_up[i] == 1)
+ up_children++;
+
+ return up_children;
+}
+
+glusterfs_event_t
+__afr_transform_event_from_state (afr_private_t *priv)
+{
+ int i = 0;
+ int up_children = 0;
+
+ if (AFR_COUNT (priv->last_event, priv->child_count) ==
+ priv->child_count)
+ /* have_heard_from_all. Let afr_notify() do the propagation. */
+ return GF_EVENT_MAXVAL;
+
+ up_children = __afr_get_up_children_count (priv);
+ if (up_children) {
+ /* We received at least one child up and there are pending
+ * notifications from some children. Treat these children as
+ * having sent a GF_EVENT_CHILD_DOWN. i.e. set the event as
+ * GF_EVENT_CHILD_MODIFIED, as done in afr_notify() */
+ for (i = 0; i < priv->child_count; i++) {
+ if (priv->last_event[i])
+ continue;
+ priv->last_event[i] = GF_EVENT_CHILD_MODIFIED;
+ priv->child_up[i] = 0;
+ }
+ return GF_EVENT_CHILD_UP;
+ } else {
+ for (i = 0; i < priv->child_count; i++) {
+ if (priv->last_event[i])
+ continue;
+ priv->last_event[i] = GF_EVENT_SOME_CHILD_DOWN;
+ priv->child_up[i] = 0;
+ }
+ return GF_EVENT_CHILD_DOWN;
+ }
+
+ return GF_EVENT_MAXVAL;
+}
+
+static void
+afr_notify_cbk (void *data)
+{
+ xlator_t *this = data;
+ afr_private_t *priv = this->private;
+ glusterfs_event_t event = GF_EVENT_MAXVAL;
+ gf_boolean_t propagate = _gf_false;
+
+ LOCK (&priv->lock);
+ {
+ if (!priv->timer) {
+ /*
+ * Either child_up/child_down is already sent to parent.
+ * This is a spurious wake up.
+ */
+ goto unlock;
+ }
+ priv->timer = NULL;
+ event = __afr_transform_event_from_state (priv);
+ if (event != GF_EVENT_MAXVAL)
+ propagate = _gf_true;
+ }
+unlock:
+ UNLOCK (&priv->lock);
+ if (propagate)
+ default_notify (this, event, NULL);
+}
+
+static void
+__afr_launch_notify_timer (xlator_t *this, afr_private_t *priv)
+{
+
+ struct timespec delay = {0, };
+
+ gf_msg_debug (this->name, 0, "Initiating child-down timer");
+ delay.tv_sec = 10;
+ delay.tv_nsec = 0;
+ priv->timer = gf_timer_call_after (this->ctx, delay,
+ afr_notify_cbk, this);
+ if (priv->timer == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, AFR_MSG_TIMER_CREATE_FAIL,
+ "Cannot create timer for delayed initialization");
+ }
+}
+
+int
+__get_heard_from_all_status (xlator_t *this)
+{
+ afr_private_t *priv = this->private;
+ int heard_from_all = 1;
+ int i = 0;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!priv->last_event[i]) {
+ heard_from_all = 0;
+ break;
+ }
+ }
+ return heard_from_all;
+}
+
+int32_t
+afr_notify (xlator_t *this, int32_t event,
+ void *data, void *data2)
+{
+ afr_private_t *priv = NULL;
+ int i = -1;
+ int up_children = 0;
+ int down_children = 0;
+ int propagate = 0;
+ int had_heard_from_all = 0;
+ int have_heard_from_all = 0;
+ int idx = -1;
+ int ret = -1;
+ int call_psh = 0;
+ dict_t *input = NULL;
+ dict_t *output = NULL;
+ gf_boolean_t had_quorum = _gf_false;
+ gf_boolean_t has_quorum = _gf_false;
+
+ priv = this->private;
+
+ if (!priv)
+ return 0;
+
+ /*
+ * We need to reset this in case children come up in "staggered"
+ * fashion, so that we discover a late-arriving local subvolume. Note
+ * that we could end up issuing N lookups to the first subvolume, and
+ * O(N^2) overall, but N is small for AFR so it shouldn't be an issue.
+ */
+ priv->did_discovery = _gf_false;
+
+
+ /* parent xlators dont need to know about every child_up, child_down
+ * because of afr ha. If all subvolumes go down, child_down has
+ * to be triggered. In that state when 1 subvolume comes up child_up
+ * needs to be triggered. dht optimizes revalidate lookup by sending
+ * it only to one of its subvolumes. When child up/down happens
+ * for afr's subvolumes dht should be notified by child_modified. The
+ * subsequent revalidate lookup happens on all the dht's subvolumes
+ * which triggers afr self-heals if any.
+ */
+ idx = find_child_index (this, data);
+ if (idx < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_CHILD_UP,
+ "Received child_up from invalid subvolume");
+ goto out;
+ }
+
+ had_quorum = priv->quorum_count && afr_has_quorum (priv->child_up,
+ this);
+ if (event == GF_EVENT_TRANSLATOR_OP) {
+ LOCK (&priv->lock);
+ {
+ had_heard_from_all = __get_heard_from_all_status (this);
+ }
+ UNLOCK (&priv->lock);
+
+ if (!had_heard_from_all) {
+ ret = -1;
+ } else {
+ input = data;
+ output = data2;
+ ret = afr_xl_op (this, input, output);
+ }
+ goto out;
+ }
+
+ LOCK (&priv->lock);
+ {
+ had_heard_from_all = __get_heard_from_all_status (this);
+ switch (event) {
+ case GF_EVENT_PARENT_UP:
+ __afr_launch_notify_timer (this, priv);
+ propagate = 1;
+ break;
+ case GF_EVENT_CHILD_UP:
+ /*
+ * This only really counts if the child was never up
+ * (value = -1) or had been down (value = 0). See
+ * comment at GF_EVENT_CHILD_DOWN for a more detailed
+ * explanation.
+ */
+ if (priv->child_up[idx] != 1) {
+ priv->event_generation++;
+ }
+ priv->child_up[idx] = 1;
+
+ call_psh = 1;
+ up_children = __afr_get_up_children_count (priv);
+ if (up_children == 1) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ AFR_MSG_SUBVOL_UP,
+ "Subvolume '%s' came back up; "
+ "going online.", ((xlator_t *)data)->name);
+ } else {
+ event = GF_EVENT_CHILD_MODIFIED;
+ }
+
+ priv->last_event[idx] = event;
+
+ break;
+
+ case GF_EVENT_CHILD_DOWN:
+ if (priv->child_up[idx] == 1) {
+ priv->event_generation++;
+ }
+ priv->child_up[idx] = 0;
+
+ for (i = 0; i < priv->child_count; i++)
+ if (priv->child_up[i] == 0)
+ down_children++;
+ if (down_children == priv->child_count) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_ALL_SUBVOLS_DOWN,
+ "All subvolumes are down. Going offline "
+ "until atleast one of them comes back up.");
+ } else {
+ event = GF_EVENT_SOME_CHILD_DOWN;
+ }
+
+ priv->last_event[idx] = event;
+
+ break;
+
+ case GF_EVENT_CHILD_CONNECTING:
+ priv->last_event[idx] = event;
+
+ break;
+
+ case GF_EVENT_SOME_CHILD_DOWN:
+ priv->last_event[idx] = event;
+ break;
+
+ default:
+ propagate = 1;
+ break;
+ }
+ have_heard_from_all = __get_heard_from_all_status (this);
+ if (!had_heard_from_all && have_heard_from_all) {
+ if (priv->timer) {
+ gf_timer_call_cancel (this->ctx, priv->timer);
+ priv->timer = NULL;
+ }
+ /* This is the first event which completes aggregation
+ of events from all subvolumes. If at least one subvol
+ had come up, propagate CHILD_UP, but only this time
+ */
+ event = GF_EVENT_CHILD_DOWN;
+ up_children = __afr_get_up_children_count (priv);
+ for (i = 0; i < priv->child_count; i++) {
+ if (priv->last_event[i] == GF_EVENT_CHILD_UP) {
+ event = GF_EVENT_CHILD_UP;
+ break;
+ }
+
+ if (priv->last_event[i] ==
+ GF_EVENT_CHILD_CONNECTING) {
+ event = GF_EVENT_CHILD_CONNECTING;
+ /* continue to check other events for CHILD_UP */
+ }
+ }
+ }
+ }
+ UNLOCK (&priv->lock);
+
+ if (priv->quorum_count) {
+ has_quorum = afr_has_quorum (priv->child_up, this);
+ if (!had_quorum && has_quorum)
+ gf_msg (this->name, GF_LOG_INFO, 0, AFR_MSG_QUORUM_MET,
+ "Client-quorum is met");
+ if (had_quorum && !has_quorum)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_QUORUM_FAIL,
+ "Client-quorum is not met");
+ }
+
+ /* if all subvols have reported status, no need to hide anything
+ or wait for anything else. Just propagate blindly */
+ if (have_heard_from_all)
+ propagate = 1;
+
+ ret = 0;
+ if (propagate)
+ ret = default_notify (this, event, data);
+
+ if ((!had_heard_from_all) || call_psh) {
+ /* Launch self-heal on all local subvolumes if:
+ * a) We have_heard_from_all for the first time
+ * b) Already heard from everyone, but we now got a child-up
+ * event.
+ */
+ if (have_heard_from_all && priv->shd.iamshd) {
+ for (i = 0; i < priv->child_count; i++)
+ if (priv->child_up[i])
+ afr_selfheal_childup (this, i);
+ }
+ }
+out:
+ return ret;
+}
+
+
+int
+afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno)
+{
+ local->op_ret = -1;
+ local->op_errno = EUCLEAN;
+
+ syncbarrier_init (&local->barrier);
+
+ local->child_up = GF_CALLOC (priv->child_count,
+ sizeof (*local->child_up),
+ gf_afr_mt_char);
+ if (!local->child_up) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ memcpy (local->child_up, priv->child_up,
+ sizeof (*local->child_up) * priv->child_count);
+ local->call_count = AFR_COUNT (local->child_up, priv->child_count);
+ if (local->call_count == 0) {
+ gf_msg (THIS->name, GF_LOG_INFO, 0,
+ AFR_MSG_ALL_SUBVOLS_DOWN, "no subvolumes up");
+ if (op_errno)
+ *op_errno = ENOTCONN;
+ goto out;
+ }
+ local->event_generation = priv->event_generation;
+
+ local->read_attempted = GF_CALLOC (priv->child_count, sizeof (char),
+ gf_afr_mt_char);
+ if (!local->read_attempted) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->readable = GF_CALLOC (priv->child_count, sizeof (char),
+ gf_afr_mt_char);
+ if (!local->readable) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->readable2 = GF_CALLOC (priv->child_count, sizeof (char),
+ gf_afr_mt_char);
+ if (!local->readable2) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->replies = GF_CALLOC(priv->child_count, sizeof(*local->replies),
+ gf_afr_mt_reply_t);
+ if (!local->replies) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->need_full_crawl = _gf_false;
+
+ INIT_LIST_HEAD (&local->healer);
+ return 0;
+out:
+ return -1;
+}
+
+int
+afr_internal_lock_init (afr_internal_lock_t *lk, size_t child_count,
+ transaction_lk_type_t lk_type)
+{
+ int ret = -ENOMEM;
+
+ lk->locked_nodes = GF_CALLOC (sizeof (*lk->locked_nodes),
+ child_count, gf_afr_mt_char);
+ if (NULL == lk->locked_nodes)
+ goto out;
+
+ lk->lower_locked_nodes = GF_CALLOC (sizeof (*lk->lower_locked_nodes),
+ child_count, gf_afr_mt_char);
+ if (NULL == lk->lower_locked_nodes)
+ goto out;
+
+ lk->lock_op_ret = -1;
+ lk->lock_op_errno = EUCLEAN;
+ lk->transaction_lk_type = lk_type;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+void
+afr_matrix_cleanup (int32_t **matrix, unsigned int m)
+{
+ int i = 0;
+
+ if (!matrix)
+ goto out;
+ for (i = 0; i < m; i++) {
+ GF_FREE (matrix[i]);
+ }
+
+ GF_FREE (matrix);
+out:
+ return;
+}
+
+int32_t**
+afr_matrix_create (unsigned int m, unsigned int n)
+{
+ int32_t **matrix = NULL;
+ int i = 0;
+
+ matrix = GF_CALLOC (sizeof (*matrix), m, gf_afr_mt_int32_t);
+ if (!matrix)
+ goto out;
+
+ for (i = 0; i < m; i++) {
+ matrix[i] = GF_CALLOC (sizeof (*matrix[i]), n,
+ gf_afr_mt_int32_t);
+ if (!matrix[i])
+ goto out;
+ }
+ return matrix;
+out:
+ afr_matrix_cleanup (matrix, m);
+ return NULL;
+}
+
+int
+afr_inodelk_init (afr_inodelk_t *lk, char *dom, size_t child_count)
+{
+ int ret = -ENOMEM;
+
+ lk->domain = dom;
+ lk->locked_nodes = GF_CALLOC (sizeof (*lk->locked_nodes),
+ child_count, gf_afr_mt_char);
+ if (NULL == lk->locked_nodes)
+ goto out;
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+afr_transaction_local_init (afr_local_t *local, xlator_t *this)
+{
+ int child_up_count = 0;
+ int ret = -ENOMEM;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ ret = afr_internal_lock_init (&local->internal_lock, priv->child_count,
+ AFR_TRANSACTION_LK);
+ if (ret < 0)
+ goto out;
+
+ if ((local->transaction.type == AFR_DATA_TRANSACTION) ||
+ (local->transaction.type == AFR_METADATA_TRANSACTION)) {
+ ret = afr_inodelk_init (&local->internal_lock.inodelk[0],
+ this->name, priv->child_count);
+ if (ret < 0)
+ goto out;
+ }
+
+ ret = -ENOMEM;
+ child_up_count = AFR_COUNT (local->child_up, priv->child_count);
+ if (priv->optimistic_change_log && child_up_count == priv->child_count)
+ local->optimistic_change_log = 1;
+
+ local->pre_op_compat = priv->pre_op_compat;
+
+ local->transaction.eager_lock =
+ GF_CALLOC (sizeof (*local->transaction.eager_lock),
+ priv->child_count,
+ gf_afr_mt_int32_t);
+
+ if (!local->transaction.eager_lock)
+ goto out;
+
+ local->transaction.pre_op = GF_CALLOC (sizeof (*local->transaction.pre_op),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!local->transaction.pre_op)
+ goto out;
+
+ if (priv->arbiter_count == 1) {
+ local->transaction.pre_op_xdata =
+ GF_CALLOC (sizeof (*local->transaction.pre_op_xdata),
+ priv->child_count, gf_afr_mt_dict_t);
+ if (!local->transaction.pre_op_xdata)
+ goto out;
+
+ local->transaction.pre_op_sources =
+ GF_CALLOC (sizeof (*local->transaction.pre_op_sources),
+ priv->child_count, gf_afr_mt_char);
+ if (!local->transaction.pre_op_sources)
+ goto out;
+ }
+
+ local->transaction.failed_subvols = GF_CALLOC (sizeof (*local->transaction.failed_subvols),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!local->transaction.failed_subvols)
+ goto out;
+
+ local->pending = afr_matrix_create (priv->child_count,
+ AFR_NUM_CHANGE_LOGS);
+ if (!local->pending)
+ goto out;
+
+ INIT_LIST_HEAD (&local->transaction.eager_locked);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+void
+afr_set_low_priority (call_frame_t *frame)
+{
+ frame->root->pid = LOW_PRIO_PROC_PID;
+}
+
+
+gf_boolean_t
+afr_have_quorum (char *logname, afr_private_t *priv)
+{
+ unsigned int quorum = 0;
+ unsigned int up_children = 0;
+
+ GF_VALIDATE_OR_GOTO(logname,priv,out);
+
+ up_children = __afr_get_up_children_count (priv);
+ quorum = priv->quorum_count;
+ if (quorum != AFR_QUORUM_AUTO)
+ return up_children >= quorum;
+
+ quorum = priv->child_count / 2 + 1;
+ if (up_children >= quorum)
+ return _gf_true;
+
+ /*
+ * Special case for even numbers of nodes: if we have exactly half
+ * and that includes the first ("senior-most") node, then that counts
+ * as quorum even if it wouldn't otherwise. This supports e.g. N=2
+ * while preserving the critical property that there can only be one
+ * such group.
+ */
+ if ((priv->child_count % 2) == 0) {
+ quorum = priv->child_count / 2;
+ if (up_children >= quorum) {
+ if (priv->child_up[0]) {
+ return _gf_true;
+ }
+ }
+ }
+
+out:
+ return _gf_false;
+}
+
+void
+afr_priv_destroy (afr_private_t *priv)
+{
+ int i = 0;
+
+ if (!priv)
+ goto out;
+ GF_FREE (priv->last_event);
+ if (priv->pending_key) {
+ for (i = 0; i < priv->child_count; i++)
+ GF_FREE (priv->pending_key[i]);
+ }
+ GF_FREE (priv->pending_key);
+ GF_FREE (priv->children);
+ GF_FREE (priv->child_up);
+ LOCK_DESTROY (&priv->lock);
+
+ GF_FREE (priv);
+out:
+ return;
+}
+
+void
+afr_handle_open_fd_count (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ local = frame->local;
+
+ if (!local->fd)
+ return;
+
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
+ if (!fd_ctx)
+ return;
+
+ fd_ctx->open_fd_count = local->open_fd_count;
+}
+
+int**
+afr_mark_pending_changelog (afr_private_t *priv, unsigned char *pending,
+ dict_t *xattr, ia_type_t iat)
+{
+ int i = 0;
+ int **changelog = NULL;
+ int idx = -1;
+ int m_idx = 0;
+ int d_idx = 0;
+ int ret = 0;
+
+ m_idx = afr_index_for_transaction_type (AFR_METADATA_TRANSACTION);
+ d_idx = afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
+
+ idx = afr_index_from_ia_type (iat);
+
+ changelog = afr_matrix_create (priv->child_count, AFR_NUM_CHANGE_LOGS);
+ if (!changelog)
+ goto out;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!pending[i])
+ continue;
+
+ changelog[i][m_idx] = hton32(1);
+ if (idx != -1)
+ changelog[i][idx] = hton32(1);
+ /* If the newentry marking is on a newly created directory,
+ * then mark it with the full-heal indicator.
+ */
+ if ((IA_ISDIR (iat)) && (priv->esh_granular))
+ changelog[i][d_idx] = hton32(1);
+ }
+ ret = afr_set_pending_dict (priv, xattr, changelog);
+ if (ret < 0) {
+ afr_matrix_cleanup (changelog, priv->child_count);
+ return NULL;
+ }
+out:
+ return changelog;
+}
+
+gf_boolean_t
+afr_decide_heal_info (afr_private_t *priv, unsigned char *sources, int source)
+{
+ int sources_count = 0;
+
+ if (source < 0)
+ goto out;
+
+ sources_count = AFR_COUNT (sources, priv->child_count);
+ if (sources_count == priv->child_count)
+ return _gf_false;
+out:
+ return _gf_true;
+}
+
+int
+afr_selfheal_locked_metadata_inspect (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, gf_boolean_t *msh,
+ gf_boolean_t *pending)
+{
+ int ret = -1;
+ unsigned char *locked_on = NULL;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *healed_sinks = NULL;
+ struct afr_reply *locked_replies = NULL;
+
+ afr_private_t *priv = this->private;
+
+ locked_on = alloca0 (priv->child_count);
+ sources = alloca0 (priv->child_count);
+ sinks = alloca0 (priv->child_count);
+ healed_sinks = alloca0 (priv->child_count);
+
+ locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count);
+
+ ret = afr_selfheal_inodelk (frame, this, inode, this->name,
+ LLONG_MAX - 1, 0, locked_on);
+ {
+ if (ret == 0) {
+ /* Not a single lock */
+ ret = -afr_final_errno (frame->local, priv);
+ if (ret == 0)
+ ret = -ENOTCONN;/* all invalid responses */
+ goto out;
+ }
+ ret = __afr_selfheal_metadata_prepare (frame, this, inode,
+ locked_on, sources,
+ sinks, healed_sinks,
+ locked_replies,
+ pending);
+ *msh = afr_decide_heal_info (priv, sources, ret);
+ }
+ afr_selfheal_uninodelk (frame, this, inode, this->name,
+ LLONG_MAX - 1, 0, locked_on);
+out:
+ if (locked_replies)
+ afr_replies_wipe (locked_replies, priv->child_count);
+ return ret;
+}
+
+int
+afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, gf_boolean_t *dsh,
+ gf_boolean_t *pflag)
+{
+ int ret = -1;
+ unsigned char *locked_on = NULL;
+ unsigned char *data_lock = NULL;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *healed_sinks = NULL;
+ afr_private_t *priv = NULL;
+ fd_t *fd = NULL;
+ struct afr_reply *locked_replies = NULL;
+ gf_boolean_t granular_locks = _gf_false;
+
+ priv = this->private;
+ if (strcmp ("granular", priv->locking_scheme) == 0)
+ granular_locks = _gf_true;
+ locked_on = alloca0 (priv->child_count);
+ data_lock = alloca0 (priv->child_count);
+ sources = alloca0 (priv->child_count);
+ sinks = alloca0 (priv->child_count);
+ healed_sinks = alloca0 (priv->child_count);
+
+ /* Heal-info does an open() on the file being examined so that the
+ * current eager-lock holding client, if present, at some point sees
+ * open-fd count being > 1 and releases the eager-lock so that heal-info
+ * doesn't remain blocked forever until IO completes.
+ */
+ ret = afr_selfheal_data_open (this, inode, &fd);
+ if (ret < 0) {
+ gf_msg_debug (this->name, -ret, "%s: Failed to open",
+ uuid_utoa (inode->gfid));
+ goto out;
+ }
+
+ locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count);
+
+ if (!granular_locks) {
+ ret = afr_selfheal_tryinodelk (frame, this, inode,
+ priv->sh_domain, 0, 0, locked_on);
+ }
+ {
+ if (!granular_locks && (ret == 0)) {
+ ret = -afr_final_errno (frame->local, priv);
+ if (ret == 0)
+ ret = -ENOTCONN;/* all invalid responses */
+ goto out;
+ }
+ ret = afr_selfheal_inodelk (frame, this, inode, this->name,
+ 0, 0, data_lock);
+ {
+ if (ret == 0) {
+ ret = -afr_final_errno (frame->local, priv);
+ if (ret == 0)
+ ret = -ENOTCONN;
+ /* all invalid responses */
+ goto unlock;
+ }
+ ret = __afr_selfheal_data_prepare (frame, this, inode,
+ data_lock, sources,
+ sinks, healed_sinks,
+ locked_replies,
+ pflag);
+ *dsh = afr_decide_heal_info (priv, sources, ret);
+ }
+ afr_selfheal_uninodelk (frame, this, inode, this->name, 0, 0,
+ data_lock);
+ }
+unlock:
+ if (!granular_locks)
+ afr_selfheal_uninodelk (frame, this, inode, priv->sh_domain, 0,
+ 0, locked_on);
+out:
+ if (locked_replies)
+ afr_replies_wipe (locked_replies, priv->child_count);
+ if (fd)
+ fd_unref (fd);
+ return ret;
+}
+
+int
+afr_selfheal_locked_entry_inspect (call_frame_t *frame, xlator_t *this,
+ inode_t *inode,
+ gf_boolean_t *esh, gf_boolean_t *pflag)
+{
+ int ret = -1;
+ int source = -1;
+ afr_private_t *priv = NULL;
+ unsigned char *locked_on = NULL;
+ unsigned char *data_lock = NULL;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *healed_sinks = NULL;
+ struct afr_reply *locked_replies = NULL;
+ gf_boolean_t granular_locks = _gf_false;
+
+ priv = this->private;
+ if (strcmp ("granular", priv->locking_scheme) == 0)
+ granular_locks = _gf_true;
+ locked_on = alloca0 (priv->child_count);
+ data_lock = alloca0 (priv->child_count);
+ sources = alloca0 (priv->child_count);
+ sinks = alloca0 (priv->child_count);
+ healed_sinks = alloca0 (priv->child_count);
+
+ locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count);
+
+ if (!granular_locks) {
+ ret = afr_selfheal_tryentrylk (frame, this, inode,
+ priv->sh_domain, NULL, locked_on);
+ }
+ {
+ if (!granular_locks && ret == 0) {
+ ret = -afr_final_errno (frame->local, priv);
+ if (ret == 0)
+ ret = -ENOTCONN;/* all invalid responses */
+ goto out;
+ }
+
+ ret = afr_selfheal_entrylk (frame, this, inode, this->name,
+ NULL, data_lock);
+ {
+ if (ret == 0) {
+ ret = -afr_final_errno (frame->local, priv);
+ if (ret == 0)
+ ret = -ENOTCONN;
+ /* all invalid responses */
+ goto unlock;
+ }
+ ret = __afr_selfheal_entry_prepare (frame, this, inode,
+ data_lock, sources,
+ sinks, healed_sinks,
+ locked_replies,
+ &source, pflag);
+ if ((ret == 0) && source < 0)
+ ret = -EIO;
+ *esh = afr_decide_heal_info (priv, sources, ret);
+ }
+ afr_selfheal_unentrylk (frame, this, inode, this->name, NULL,
+ data_lock, NULL);
+ }
+unlock:
+ if (!granular_locks)
+ afr_selfheal_unentrylk (frame, this, inode, priv->sh_domain,
+ NULL, locked_on, NULL);
+out:
+ if (locked_replies)
+ afr_replies_wipe (locked_replies, priv->child_count);
+ return ret;
+}
+
+int
+afr_selfheal_locked_inspect (call_frame_t *frame, xlator_t *this, uuid_t gfid,
+ inode_t **inode,
+ gf_boolean_t *entry_selfheal,
+ gf_boolean_t *data_selfheal,
+ gf_boolean_t *metadata_selfheal,
+ gf_boolean_t *pending)
+
+{
+ int ret = -1;
+ gf_boolean_t dsh = _gf_false;
+ gf_boolean_t msh = _gf_false;
+ gf_boolean_t esh = _gf_false;
+
+ ret = afr_selfheal_unlocked_inspect (frame, this, gfid, inode,
+ &dsh, &msh, &esh);
+ if (ret)
+ goto out;
+
+ /* For every heal type hold locks and check if it indeed needs heal */
+
+ if (msh) {
+ ret = afr_selfheal_locked_metadata_inspect (frame, this,
+ *inode, &msh,
+ pending);
+ if (ret == -EIO)
+ goto out;
+ }
+
+ if (dsh) {
+ ret = afr_selfheal_locked_data_inspect (frame, this, *inode,
+ &dsh, pending);
+ if (ret == -EIO || (ret == -EAGAIN))
+ goto out;
+ }
+
+ if (esh) {
+ ret = afr_selfheal_locked_entry_inspect (frame, this, *inode,
+ &esh, pending);
+ }
+
+out:
+ *data_selfheal = dsh;
+ *entry_selfheal = esh;
+ *metadata_selfheal = msh;
+ return ret;
+}
+
+dict_t*
+afr_set_heal_info (char *status)
+{
+ dict_t *dict = NULL;
+ int ret = -1;
+
+ dict = dict_new ();
+ if (!dict) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "heal-info", status);
+ if (ret)
+ gf_msg ("", GF_LOG_WARNING, -ret,
+ AFR_MSG_DICT_SET_FAILED,
+ "Failed to set heal-info key to "
+ "%s", status);
+out:
+ return dict;
+}
+
+int
+afr_get_heal_info (call_frame_t *frame, xlator_t *this, loc_t *loc)
+{
+ gf_boolean_t data_selfheal = _gf_false;
+ gf_boolean_t metadata_selfheal = _gf_false;
+ gf_boolean_t entry_selfheal = _gf_false;
+ gf_boolean_t pending = _gf_false;
+ dict_t *dict = NULL;
+ int ret = -1;
+ int op_errno = 0;
+ int size = 0;
+ inode_t *inode = NULL;
+ char *substr = NULL;
+ char *status = NULL;
+
+ ret = afr_selfheal_locked_inspect (frame, this, loc->gfid, &inode,
+ &entry_selfheal,
+ &data_selfheal, &metadata_selfheal,
+ &pending);
+
+ if (ret == -ENOMEM) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ if (pending) {
+ size = strlen ("-pending") + 1;
+ gf_asprintf (&substr, "-pending");
+ if (!substr)
+ goto out;
+ }
+
+ if (ret == -EIO) {
+ size += strlen ("split-brain") + 1;
+ ret = gf_asprintf (&status, "split-brain%s",
+ substr? substr : "");
+ if (ret < 0)
+ goto out;
+ dict = afr_set_heal_info (status);
+ } else if (ret == -EAGAIN) {
+ size += strlen ("possibly-healing") + 1;
+ ret = gf_asprintf (&status, "possibly-healing%s",
+ substr? substr : "");
+ if (ret < 0)
+ goto out;
+ dict = afr_set_heal_info (status);
+ } else if (ret >= 0) {
+ /* value of ret = source index
+ * so ret >= 0 and at least one of the 3 booleans set to
+ * true means a source is identified; heal is required.
+ */
+ if (!data_selfheal && !entry_selfheal &&
+ !metadata_selfheal) {
+ dict = afr_set_heal_info ("no-heal");
+ } else {
+ size += strlen ("heal") + 1;
+ ret = gf_asprintf (&status, "heal%s",
+ substr? substr : "");
+ if (ret < 0)
+ goto out;
+ dict = afr_set_heal_info (status);
+ }
+ } else if (ret < 0) {
+ /* Apart from above checked -ve ret values, there are
+ * other possible ret values like ENOTCONN
+ * (returned when number of valid replies received are
+ * less than 2)
+ * in which case heal is required when one of the
+ * selfheal booleans is set.
+ */
+ if (data_selfheal || entry_selfheal ||
+ metadata_selfheal) {
+ size += strlen ("heal") + 1;
+ ret = gf_asprintf (&status, "heal%s",
+ substr? substr : "");
+ if (ret < 0)
+ goto out;
+ dict = afr_set_heal_info (status);
+ }
+ }
+ ret = 0;
+
+out:
+ AFR_STACK_UNWIND (getxattr, frame, ret, op_errno, dict, NULL);
+ if (dict)
+ dict_unref (dict);
+ if (inode)
+ inode_unref (inode);
+ GF_FREE (substr);
+ return ret;
+}
+
+int
+_afr_is_split_brain (call_frame_t *frame, xlator_t *this,
+ struct afr_reply *replies,
+ afr_transaction_type type,
+ gf_boolean_t *spb)
+{
+ afr_private_t *priv = NULL;
+ uint64_t *witness = NULL;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ int sources_count = 0;
+ int ret = 0;
+
+ priv = this->private;
+
+ sources = alloca0 (priv->child_count);
+ sinks = alloca0 (priv->child_count);
+ witness = alloca0(priv->child_count * sizeof (*witness));
+
+ ret = afr_selfheal_find_direction (frame, this, replies,
+ type, priv->child_up, sources,
+ sinks, witness, NULL);
+ if (ret)
+ return ret;
+
+ sources_count = AFR_COUNT (sources, priv->child_count);
+ if (!sources_count)
+ *spb = _gf_true;
+
+ return ret;
+}
+
+int
+afr_is_split_brain (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ uuid_t gfid, gf_boolean_t *d_spb, gf_boolean_t *m_spb)
+{
+ int ret = -1;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
+
+ priv = this->private;
+
+ replies = alloca0 (sizeof (*replies) * priv->child_count);
+
+ ret = afr_selfheal_unlocked_discover (frame, inode, gfid, replies);
+ if (ret)
+ goto out;
+
+ ret = _afr_is_split_brain (frame, this, replies,
+ AFR_DATA_TRANSACTION, d_spb);
+ if (ret)
+ goto out;
+
+ ret = _afr_is_split_brain (frame, this, replies,
+ AFR_METADATA_TRANSACTION, m_spb);
+out:
+ if (replies) {
+ afr_replies_wipe (replies, priv->child_count);
+ replies = NULL;
+ }
+ return ret;
+}
+
+int
+afr_get_split_brain_status_cbk (int ret, call_frame_t *frame, void *opaque)
+{
+ GF_FREE (opaque);
+ return 0;
+}
+
+int
+afr_get_split_brain_status (void *opaque)
+{
+ gf_boolean_t d_spb = _gf_false;
+ gf_boolean_t m_spb = _gf_false;
+ int ret = -1;
+ int op_errno = 0;
+ int i = 0;
+ char *choices = NULL;
+ char *status = NULL;
+ dict_t *dict = NULL;
+ inode_t *inode = NULL;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ loc_t *loc = NULL;
+ afr_spb_status_t *data = NULL;
+
+ data = opaque;
+ frame = data->frame;
+ this = frame->this;
+ loc = data->loc;
+ priv = this->private;
+ children = priv->children;
+
+ inode = afr_inode_find (this, loc->gfid);
+ if (!inode)
+ goto out;
+
+ /* Calculation for string length :
+ * (child_count X length of child-name) + strlen (" Choices :")
+ * child-name consists of :
+ * a) 256 = max characters for volname according to GD_VOLUME_NAME_MAX
+ * b) strlen ("-client-00,") assuming 16 replicas
+ */
+ choices = alloca0 (priv->child_count * (256 + strlen ("-client-00,")) +
+ strlen (" Choices:"));
+
+ ret = afr_is_split_brain (frame, this, inode, loc->gfid, &d_spb,
+ &m_spb);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict) {
+ op_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ if (d_spb || m_spb) {
+ sprintf (choices, " Choices:");
+ for (i = 0; i < priv->child_count; i++) {
+ strcat (choices, children[i]->name);
+ strcat (choices, ",");
+ }
+ choices[strlen (choices) - 1] = '\0';
+
+ ret = gf_asprintf (&status, "data-split-brain:%s "
+ "metadata-split-brain:%s%s",
+ (d_spb) ? "yes" : "no",
+ (m_spb) ? "yes" : "no", choices);
+
+ if (-1 == ret) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ ret = dict_set_dynstr (dict, GF_AFR_SBRAIN_STATUS, status);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+ } else {
+ ret = dict_set_str (dict, GF_AFR_SBRAIN_STATUS,
+ "The file is not under data or"
+ " metadata split-brain");
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ AFR_STACK_UNWIND (getxattr, frame, ret, op_errno, dict, NULL);
+ if (dict)
+ dict_unref (dict);
+ if (inode)
+ inode_unref (inode);
+ return ret;
+}
+
+int32_t
+afr_heal_splitbrain_file(call_frame_t *frame, xlator_t *this, loc_t *loc)
+{
+ int ret = 0;
+ int op_errno = 0;
+ dict_t *dict = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+ dict = dict_new ();
+ if (!dict) {
+ op_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ ret = afr_selfheal_do (frame, this, loc->gfid);
+
+ if (ret == 1 || ret == 2) {
+ ret = dict_set_str (dict, "sh-fail-msg",
+ "File not in split-brain");
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING,
+ -ret, AFR_MSG_DICT_SET_FAILED,
+ "Failed to set sh-fail-msg in dict");
+ ret = 0;
+ goto out;
+ } else {
+ if (local->xdata_rsp) {
+ /* 'sh-fail-msg' has been set in the dict during self-heal.*/
+ dict_copy (local->xdata_rsp, dict);
+ ret = 0;
+ } else if (ret < 0) {
+ op_errno = -ret;
+ ret = -1;
+ }
+ }
+
+out:
+ if (local->op == GF_FOP_GETXATTR)
+ AFR_STACK_UNWIND (getxattr, frame, ret, op_errno, dict, NULL);
+ else if (local->op == GF_FOP_SETXATTR)
+ AFR_STACK_UNWIND (setxattr, frame, ret, op_errno, NULL);
+ if (dict)
+ dict_unref(dict);
+ return ret;
+}
+
+int
+afr_get_child_index_from_name (xlator_t *this, char *name)
+{
+ afr_private_t *priv = this->private;
+ int index = -1;
+
+ for (index = 0; index < priv->child_count; index++) {
+ if (!strcmp (priv->children[index]->name, name))
+ goto out;
+ }
+ index = -1;
+out:
+ return index;
+}
+
+void
+afr_priv_need_heal_set (afr_private_t *priv, gf_boolean_t need_heal)
+{
+ LOCK (&priv->lock);
+ {
+ priv->need_heal = need_heal;
+ }
+ UNLOCK (&priv->lock);
+}
+
+void
+afr_set_need_heal (xlator_t *this, afr_local_t *local)
+{
+ int i = 0;
+ afr_private_t *priv = this->private;
+ gf_boolean_t need_heal = _gf_false;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].valid && local->replies[i].need_heal) {
+ need_heal = _gf_true;
+ break;
+ }
+ }
+ afr_priv_need_heal_set (priv, need_heal);
+ return;
+}
+
+gf_boolean_t
+afr_get_need_heal (xlator_t *this)
+{
+ afr_private_t *priv = this->private;
+ gf_boolean_t need_heal = _gf_true;
+
+ LOCK (&priv->lock);
+ {
+ need_heal = priv->need_heal;
+ }
+ UNLOCK (&priv->lock);
+ return need_heal;
+}
+
+int
+afr_get_msg_id (char *op_type)
+{
+
+ if (!strcmp (op_type, GF_AFR_REPLACE_BRICK))
+ return AFR_MSG_REPLACE_BRICK_STATUS;
+ else if (!strcmp (op_type, GF_AFR_ADD_BRICK))
+ return AFR_MSG_ADD_BRICK_STATUS;
+ return -1;
+}
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index e23aa414fec..2260e5dac26 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -26,11 +17,6 @@
#include <signal.h>
#include <string.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "glusterfs.h"
#include "dict.h"
#include "xlator.h"
@@ -46,679 +32,313 @@
#include "checksum.h"
#include "afr.h"
-#include "afr-self-heal.h"
-
-
-int
-afr_examine_dir_sh_unwind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- afr_set_opendir_done (this, local->fd->inode);
-
- AFR_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd);
-
- return 0;
-}
-
-
-gf_boolean_t
-__checksums_differ (uint32_t *checksum, int child_count)
-{
- int ret = _gf_false;
- int i = 0;
-
- uint32_t cksum;
-
- cksum = checksum[0];
-
- while (i < child_count) {
- if (cksum != checksum[i]) {
- ret = _gf_true;
- break;
- }
-
- cksum = checksum[i];
- i++;
- }
-
- return ret;
-}
+#include "afr-transaction.h"
int32_t
-afr_examine_dir_readdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries)
+afr_opendir_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ fd_t *fd, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
-
- gf_dirent_t * entry = NULL;
- gf_dirent_t * tmp = NULL;
-
- int child_index = 0;
-
- uint32_t entry_cksum;
+ afr_local_t *local = NULL;
+ int call_count = -1;
+ int32_t child_index = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
- int call_count = 0;
- off_t last_offset = 0;
-
- priv = this->private;
local = frame->local;
- sh = &local->self_heal;
-
+ fd_ctx = local->fd_ctx;
child_index = (long) cookie;
- if (op_ret == -1) {
- local->op_ret = -1;
- local->op_ret = op_errno;
- goto out;
- }
-
- if (op_ret == 0)
- goto out;
-
- list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- entry_cksum = gf_rsync_weak_checksum (entry->d_name,
- strlen (entry->d_name));
- local->cont.opendir.checksum[child_index] ^= entry_cksum;
- }
-
- list_for_each_entry (entry, &entries->list, list) {
- last_offset = entry->d_off;
- }
-
- /* read more entries */
-
- STACK_WIND_COOKIE (frame, afr_examine_dir_readdir_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->readdir,
- local->fd, 131072, last_offset);
-
-out:
- if ((op_ret == 0) || (op_ret == -1)) {
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (__checksums_differ (local->cont.opendir.checksum,
- priv->child_count)) {
-
- sh->need_entry_self_heal = _gf_true;
- sh->forced_merge = _gf_true;
- sh->type = local->fd->inode->ia_type;
- sh->background = _gf_false;
- sh->unwind = afr_examine_dir_sh_unwind;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "checksums of directory %s differ,"
- " triggering forced merge",
- local->loc.path);
-
- afr_self_heal (frame, this);
- } else {
- afr_set_opendir_done (this, local->fd->inode);
-
- AFR_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd);
- }
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
+ } else {
+ local->op_ret = op_ret;
+ fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref (xdata);
}
}
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+ if (call_count == 0)
+ AFR_STACK_UNWIND (opendir, frame, local->op_ret,
+ local->op_errno, local->fd, NULL);
return 0;
}
int
-afr_examine_dir (call_frame_t *frame, xlator_t *this)
+afr_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ int i = 0;
+ int call_count = -1;
+ int32_t op_errno = ENOMEM;
+ afr_fd_ctx_t *fd_ctx = NULL;
- int i;
- int call_count = 0;
+ priv = this->private;
- local = frame->local;
- priv = this->private;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- local->cont.opendir.checksum = GF_CALLOC (priv->child_count,
- sizeof (*local->cont.opendir.checksum),
- gf_afr_mt_int32_t);
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx)
+ goto out;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ loc_copy (&local->loc, loc);
- local->call_count = call_count;
+ local->fd = fd_ref (fd);
+ local->fd_ctx = fd_ctx;
+
+ call_count = local->call_count;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_examine_dir_readdir_cbk,
- (void *) (long) i,
+ STACK_WIND_COOKIE (frame, afr_opendir_cbk,
+ (void*) (long) i,
priv->children[i],
- priv->children[i]->fops->readdir,
- local->fd, 131072, 0);
+ priv->children[i]->fops->opendir,
+ loc, fd, NULL);
if (!--call_count)
break;
}
}
- return 0;
-}
-
-
-int32_t
-afr_opendir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- fd_t *fd)
-{
- afr_local_t * local = NULL;
-
- int call_count = -1;
- int ret = 0;
-
- LOCK (&frame->lock);
- {
- local = frame->local;
-
- if (op_ret >= 0)
- local->op_ret = op_ret;
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (local->op_ret == 0) {
- ret = afr_fd_ctx_set (this, local->fd);
-
- if (!afr_is_opendir_done (this, local->fd->inode)) {
-
- /*
- * This is the first opendir on this inode. We need
- * to check if the directory's entries are the same
- * on all subvolumes. This is needed in addition
- * to regular entry self-heal because the readdir
- * call is sent only to the first subvolume, and
- * thus files that exist only there will never be healed
- * otherwise (assuming changelog shows no anamolies).
- */
-
- gf_log (this->name, GF_LOG_TRACE,
- "reading contents of directory %s looking for mismatch",
- local->loc.path);
-
- afr_examine_dir (frame, this);
-
- } else {
- AFR_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd);
- }
- } else {
- AFR_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd);
- }
- }
-
return 0;
-}
-
-
-int32_t
-afr_opendir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, fd_t *fd)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
-
- int child_count = 0;
- int i = 0;
-
- int ret = -1;
- int call_count = -1;
-
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- child_count = priv->child_count;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- loc_copy (&local->loc, loc);
-
- frame->local = local;
- local->fd = fd_ref (fd);
-
- call_count = local->call_count;
-
- for (i = 0; i < child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_opendir_cbk,
- priv->children[i],
- priv->children[i]->fops->opendir,
- loc, fd);
-
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (opendir, frame, op_ret, op_errno, fd);
- }
-
- return 0;
+ AFR_STACK_UNWIND (opendir, frame, -1, op_errno, fd, NULL);
+ return 0;
}
-
-/**
- * Common algorithm for directory read calls:
- *
- * - Try the fop on the first child that is up
- * - if we have failed due to ENOTCONN:
- * try the next child
- *
- * Applicable to: readdir
- */
-
-
-struct entry_name {
- char *name;
- struct list_head list;
-};
-
-
-static gf_boolean_t
-remembered_name (const char *name, struct list_head *entries)
+static int
+afr_validate_read_subvol (inode_t *inode, xlator_t *this, int par_read_subvol)
{
- struct entry_name *e;
- gf_boolean_t ret = _gf_false;
+ int gen = 0;
+ int entry_read_subvol = 0;
+ unsigned char *data_readable = NULL;
+ unsigned char *metadata_readable = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ data_readable = alloca0 (priv->child_count);
+ metadata_readable = alloca0 (priv->child_count);
+
+ afr_inode_read_subvol_get (inode, this, data_readable,
+ metadata_readable, &gen);
+
+ if (gen != priv->event_generation ||
+ !data_readable[par_read_subvol] ||
+ !metadata_readable[par_read_subvol])
+ return -1;
+
+ /* Once the control reaches the following statement, it means that the
+ * parent's read subvol is perfectly readable. So calling
+ * either afr_data_subvol_get() or afr_metadata_subvol_get() would
+ * yield the same result. Hence, choosing afr_data_subvol_get() below.
+ */
+
+ if (!priv->consistent_metadata)
+ return 0;
+
+ /* For an inode fetched through readdirp which is yet to be linked,
+ * inode ctx would not be initialised (yet). So this function returns
+ * -1 above due to gen being 0, which is why it is OK to pass NULL for
+ * read_subvol_args here.
+ */
+ entry_read_subvol = afr_data_subvol_get (inode, this, NULL, NULL,
+ NULL, NULL);
+ if (entry_read_subvol != par_read_subvol)
+ return -1;
- list_for_each_entry (e, entries, list) {
- if (!strcmp (name, e->name)) {
- ret = _gf_true;
- goto out;
- }
- }
+ return 0;
-out:
- return ret;
}
-
static void
-afr_remember_entries (gf_dirent_t *entries, fd_t *fd)
-{
- struct entry_name *n = NULL;
- gf_dirent_t * entry = NULL;
-
- int ret = 0;
-
- uint64_t ctx;
- afr_fd_ctx_t *fd_ctx;
-
- ret = fd_ctx_get (fd, THIS, &ctx);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_DEBUG,
- "could not get fd ctx for fd=%p", fd);
- return;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- list_for_each_entry (entry, &entries->list, list) {
- n = GF_CALLOC (1, sizeof (*n), gf_afr_mt_entry_name);
- n->name = gf_strdup (entry->d_name);
- INIT_LIST_HEAD (&n->list);
-
- list_add (&n->list, &fd_ctx->entries);
- }
-}
-
-
-static off_t
-afr_filter_entries (gf_dirent_t *entries, fd_t *fd)
+afr_readdir_transform_entries (gf_dirent_t *subvol_entries, int subvol,
+ gf_dirent_t *entries, fd_t *fd)
{
- gf_dirent_t *entry, *tmp;
- int ret = 0;
-
- uint64_t ctx;
- afr_fd_ctx_t *fd_ctx;
-
- off_t offset = 0;
-
- ret = fd_ctx_get (fd, THIS, &ctx);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_DEBUG,
- "could not get fd ctx for fd=%p", fd);
- return -1;
- }
+ int ret = -1;
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+ xlator_t *this = NULL;
+ afr_private_t *priv = NULL;
+ gf_boolean_t need_heal = _gf_false;
+ gf_boolean_t validate_subvol = _gf_false;
+
+ this = THIS;
+ priv = this->private;
+
+ need_heal = afr_get_need_heal (this);
+ validate_subvol = need_heal | priv->consistent_metadata;
+
+ list_for_each_entry_safe (entry, tmp, &subvol_entries->list, list) {
+ if (__is_root_gfid (fd->inode->gfid) &&
+ !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) {
+ continue;
+ }
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ list_del_init (&entry->list);
+ list_add_tail (&entry->list, &entries->list);
- list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- offset = entry->d_off;
+ if (!validate_subvol)
+ continue;
- if (remembered_name (entry->d_name, &fd_ctx->entries)) {
- list_del (&entry->list);
- GF_FREE (entry);
+ if (entry->inode) {
+ ret = afr_validate_read_subvol (entry->inode, this,
+ subvol);
+ if (ret == -1) {
+ inode_unref (entry->inode);
+ entry->inode = NULL;
+ continue;
+ }
}
- }
-
- return offset;
-}
-
-
-static void
-afr_forget_entries (fd_t *fd)
-{
- struct entry_name *entry, *tmp;
- int ret = 0;
-
- uint64_t ctx;
- afr_fd_ctx_t *fd_ctx;
-
- ret = fd_ctx_get (fd, THIS, &ctx);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_DEBUG,
- "could not get fd ctx for fd=%p", fd);
- return;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- list_for_each_entry_safe (entry, tmp, &fd_ctx->entries, list) {
- GF_FREE (entry->name);
- list_del (&entry->list);
- GF_FREE (entry);
- }
+ }
}
int32_t
-afr_readdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries)
+afr_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *subvol_entries,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
+ afr_local_t *local = NULL;
+ gf_dirent_t entries;
- gf_dirent_t * entry = NULL;
- gf_dirent_t * tmp = NULL;
+ INIT_LIST_HEAD (&entries.list);
- int child_index = -1;
+ local = frame->local;
- priv = this->private;
- children = priv->children;
+ if (op_ret < 0 && !local->cont.readdir.offset) {
+ /* failover only if this was first readdir, detected
+ by offset == 0 */
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- local = frame->local;
+ afr_read_txn_continue (frame, this, (long) cookie);
+ return 0;
+ }
- child_index = (long) cookie;
+ if (op_ret >= 0)
+ afr_readdir_transform_entries (subvol_entries, (long) cookie,
+ &entries, local->fd);
- if (op_ret != -1) {
- list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- entry->d_ino = afr_itransform (entry->d_ino,
- priv->child_count,
- child_index);
-
- if ((local->fd->inode == local->fd->inode->table->root)
- && !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) {
- list_del_init (&entry->list);
- GF_FREE (entry);
- }
- }
- }
+ AFR_STACK_UNWIND (readdir, frame, op_ret, op_errno, &entries, xdata);
- AFR_STACK_UNWIND (readdir, frame, op_ret, op_errno, entries);
+ gf_dirent_free (&entries);
- return 0;
+ return 0;
}
-int32_t
-afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
+int
+afr_readdir_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- ino_t inum = 0;
-
- int call_child = 0;
- int ret = 0;
-
- gf_dirent_t * entry = NULL;
- gf_dirent_t * tmp = NULL;
-
- int child_index = -1;
-
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
-
- off_t offset = 0;
-
- priv = this->private;
- children = priv->children;
-
- local = frame->local;
-
- child_index = (long) cookie;
-
- if (priv->strict_readdir) {
- ret = fd_ctx_get (local->fd, this, &ctx);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not get fd ctx for fd=%p", local->fd);
- op_ret = -1;
- op_errno = -ret;
- goto out;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- if (child_went_down (op_ret, op_errno)) {
- if (all_tried (child_index, priv->child_count)) {
- goto out;
- }
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
- call_child = ++child_index;
-
- gf_log (this->name, GF_LOG_TRACE,
- "starting readdir afresh on child %d, offset %"PRId64,
- call_child, (uint64_t) 0);
-
- fd_ctx->failed_over = _gf_true;
-
- STACK_WIND_COOKIE (frame, afr_readdirp_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->readdirp, local->fd,
- local->cont.readdir.size, 0);
- return 0;
- }
- }
-
- if (op_ret != -1) {
- list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- inum = afr_itransform (entry->d_ino, priv->child_count,
- child_index);
- entry->d_ino = inum;
- inum = afr_itransform (entry->d_stat.ia_ino,
- priv->child_count, child_index);
- entry->d_stat.ia_ino = inum;
-
- if ((local->fd->inode == local->fd->inode->table->root)
- && !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) {
- list_del_init (&entry->list);
- GF_FREE (entry);
- }
- }
- }
-
- if (priv->strict_readdir) {
- if (fd_ctx->failed_over) {
- if (list_empty (&entries->list)) {
- goto out;
- }
-
- offset = afr_filter_entries (entries, local->fd);
-
- afr_remember_entries (entries, local->fd);
-
- if (list_empty (&entries->list)) {
- /* All the entries we got were duplicate. We
- shouldn't send an empty list now, because
- that'll make the application stop reading. So
- try to get more entries */
-
- gf_log (this->name, GF_LOG_TRACE,
- "trying to fetch non-duplicate entries from offset %"PRId64", child %s",
- offset, children[child_index]->name);
+ priv = this->private;
+ local = frame->local;
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
- STACK_WIND_COOKIE (frame, afr_readdirp_cbk,
- (void *) (long) child_index,
- children[child_index],
- children[child_index]->fops->readdirp,
- local->fd, local->cont.readdir.size, offset);
- return 0;
- }
- } else {
- afr_remember_entries (entries, local->fd);
- }
+ if (subvol == -1) {
+ AFR_STACK_UNWIND (readdir, frame, local->op_ret,
+ local->op_errno, 0, 0);
+ return 0;
}
-out:
- AFR_STACK_UNWIND (readdirp, frame, op_ret, op_errno, entries);
+ fd_ctx->readdir_subvol = subvol;
- return 0;
+ if (local->op == GF_FOP_READDIR)
+ STACK_WIND_COOKIE (frame, afr_readdir_cbk,
+ (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->readdir,
+ local->fd, local->cont.readdir.size,
+ local->cont.readdir.offset,
+ local->xdata_req);
+ else
+ STACK_WIND_COOKIE (frame, afr_readdir_cbk,
+ (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->readdirp,
+ local->fd, local->cont.readdir.size,
+ local->cont.readdir.offset,
+ local->xdata_req);
+ return 0;
}
-int32_t
-afr_do_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset, int whichop)
+int
+afr_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, int whichop, dict_t *dict)
{
- afr_private_t * priv = NULL;
- xlator_t ** children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
+ int32_t op_errno = 0;
+ int subvol = -1;
+ afr_fd_ctx_t *fd_ctx = NULL;
- uint64_t ctx;
- afr_fd_ctx_t *fd_ctx;
-
- int ret = -1;
-
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
- children = priv->children;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
goto out;
- }
-
- frame->local = local;
- call_child = afr_first_up_child (priv);
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_DEBUG,
- "no child is up");
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx) {
+ op_errno = EINVAL;
goto out;
- }
-
- local->fd = fd_ref (fd);
- local->cont.readdir.size = size;
-
- if (priv->strict_readdir) {
- ret = fd_ctx_get (fd, this, &ctx);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not get fd ctx for fd=%p", fd);
- op_errno = -ret;
- goto out;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- if (fd_ctx->last_tried != call_child) {
- gf_log (this->name, GF_LOG_TRACE,
- "first up child has changed from %d to %d, restarting readdir from offset 0",
- fd_ctx->last_tried, call_child);
-
- fd_ctx->failed_over = _gf_true;
- offset = 0;
- }
+ }
- fd_ctx->last_tried = call_child;
+ local->op = whichop;
+ local->fd = fd_ref (fd);
+ local->cont.readdir.size = size;
+ local->cont.readdir.offset = offset;
+ local->xdata_req = (dict)? dict_ref (dict) : NULL;
+
+ subvol = fd_ctx->readdir_subvol;
+
+ if (offset == 0 || subvol == -1) {
+ /* First readdir has option of failing over and selecting
+ an appropriate read subvolume */
+ afr_read_txn (frame, this, fd->inode, afr_readdir_wind,
+ AFR_DATA_TRANSACTION);
+ } else {
+ /* But continued readdirs MUST stick to the same subvolume
+ without an option to failover */
+ afr_readdir_wind (frame, this, subvol);
}
- if (whichop == GF_FOP_READDIR)
- STACK_WIND_COOKIE (frame, afr_readdir_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->readdir, fd,
- size, offset);
- else
- STACK_WIND_COOKIE (frame, afr_readdirp_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->readdirp, fd,
- size, offset);
-
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (readdir, frame, op_ret, op_errno, NULL);
- }
- return 0;
+ AFR_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL);
+ return 0;
}
int32_t
afr_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+ off_t offset, dict_t *xdata)
{
- afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIR);
+ afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIR, xdata);
+
return 0;
}
int32_t
afr_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+ off_t offset, dict_t *dict)
{
- afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIRP);
+ afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIRP, dict);
+
return 0;
}
@@ -726,8 +346,7 @@ afr_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
int32_t
afr_releasedir (xlator_t *this, fd_t *fd)
{
- afr_forget_entries (fd);
afr_cleanup_fd_ctx (this, fd);
- return 0;
+ return 0;
}
diff --git a/xlators/cluster/afr/src/afr-dir-read.h b/xlators/cluster/afr/src/afr-dir-read.h
index abde2534de9..09456d15949 100644
--- a/xlators/cluster/afr/src/afr-dir-read.h
+++ b/xlators/cluster/afr/src/afr-dir-read.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __DIR_READ_H__
@@ -23,28 +14,23 @@
int32_t
afr_opendir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, fd_t *fd);
+ loc_t *loc, fd_t *fd, dict_t *xdata);
int32_t
afr_releasedir (xlator_t *this, fd_t *fd);
int32_t
afr_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset);
+ fd_t *fd, size_t size, off_t offset, dict_t *xdata);
int32_t
afr_readdirp (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset);
-
-int32_t
-afr_getdents (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset, int32_t flag);
-
+ fd_t *fd, size_t size, off_t offset, dict_t *dict);
int32_t
afr_checksum (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags);
+ loc_t *loc, int32_t flags, dict_t *xdata);
#endif /* __DIR_READ_H__ */
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
index 439e8d8c765..f3de5352d7e 100644
--- a/xlators/cluster/afr/src/afr-dir-write.c
+++ b/xlators/cluster/afr/src/afr-dir-write.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -25,11 +16,6 @@
#include <stdlib.h>
#include <signal.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "glusterfs.h"
#include "afr.h"
#include "dict.h"
@@ -43,565 +29,641 @@
#include "common-utils.h"
#include "compat-errno.h"
#include "compat.h"
+#include "byte-order.h"
#include "afr.h"
#include "afr-transaction.h"
-
void
-afr_build_parent_loc (loc_t *parent, loc_t *child)
-{
- char *tmp = NULL;
-
- if (!child->parent) {
- loc_copy (parent, child);
- return;
- }
-
- tmp = gf_strdup (child->path);
- parent->path = gf_strdup (dirname (tmp));
- GF_FREE (tmp);
-
- parent->name = strrchr (parent->path, '/');
- if (parent->name)
- parent->name++;
-
- parent->inode = inode_ref (child->parent);
- parent->parent = inode_parent (parent->inode, 0, NULL);
- parent->ino = parent->inode->ino;
-}
-
-/* {{{ create */
+afr_mark_entry_pending_changelog (call_frame_t *frame, xlator_t *this);
int
-afr_create_unwind (call_frame_t *frame, xlator_t *this)
+afr_build_parent_loc (loc_t *parent, loc_t *child, int32_t *op_errno)
{
- call_frame_t *main_frame = NULL;
- afr_private_t * priv = NULL;
- afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
+ int ret = -1;
+ char *child_path = NULL;
- priv = this->private;
- local = frame->local;
+ if (!child->parent) {
+ if (op_errno)
+ *op_errno = EINVAL;
+ goto out;
+ }
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ child_path = gf_strdup (child->path);
+ if (!child_path) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
- if (main_frame) {
- if (local->cont.create.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.create.read_child_buf;
- } else {
- unwind_buf = &local->cont.create.buf;
- }
+ parent->path = gf_strdup (dirname (child_path));
+ if (!parent->path) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
- unwind_buf->ia_ino = local->cont.create.ino;
- unwind_buf->ia_gen = local->cont.create.gen;
+ parent->inode = inode_ref (child->parent);
+ gf_uuid_copy (parent->gfid, child->pargfid);
- local->cont.create.preparent.ia_ino = local->cont.create.parent_ino;
- local->cont.create.postparent.ia_ino = local->cont.create.parent_ino;
+ ret = 0;
+out:
+ GF_FREE (child_path);
- AFR_STACK_UNWIND (create, main_frame,
- local->op_ret, local->op_errno,
- local->cont.create.fd,
- local->cont.create.inode,
- unwind_buf, &local->cont.create.preparent,
- &local->cont.create.postparent);
- }
-
- return 0;
+ return ret;
}
-int
-afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- fd_t *fd, inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+static void
+__afr_dir_write_finalize (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- uint64_t ctx;
- afr_fd_ctx_t *fd_ctx;
-
- int ret = 0;
-
- int call_count = -1;
- int child_index = -1;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int inode_read_subvol = -1;
+ int parent_read_subvol = -1;
+ int parent2_read_subvol = -1;
+ int i = 0;
+ afr_read_subvol_args_t args = {0,};
local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- ret = afr_fd_ctx_set (this, fd);
-
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not set ctx on fd=%p", fd);
+ priv = this->private;
- local->op_ret = -1;
- local->op_errno = -ret;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret == -1)
+ continue;
+ gf_uuid_copy (args.gfid, local->replies[i].poststat.ia_gfid);
+ args.ia_type = local->replies[i].poststat.ia_type;
+ break;
+ }
- ret = fd_ctx_get (fd, this, &ctx);
+ if (local->inode) {
+ afr_replies_interpret (frame, this, local->inode, NULL);
+ inode_read_subvol = afr_data_subvol_get (local->inode, this,
+ NULL, NULL, NULL, &args);
+ }
+
+ if (local->parent)
+ parent_read_subvol = afr_data_subvol_get (local->parent, this,
+ NULL, local->readable, NULL, NULL);
+
+ if (local->parent2)
+ parent2_read_subvol = afr_data_subvol_get (local->parent2, this,
+ NULL, local->readable2, NULL, NULL);
+
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno (local, priv);
+ afr_pick_error_xdata (local, priv, local->parent, local->readable,
+ local->parent2, local->readable2);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret < 0) {
+ if (local->inode)
+ afr_inode_read_subvol_reset (local->inode,
+ this);
+ if (local->parent)
+ afr_inode_read_subvol_reset (local->parent,
+ this);
+ if (local->parent2)
+ afr_inode_read_subvol_reset (local->parent2,
+ this);
+ continue;
+ }
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not get fd ctx for fd=%p", fd);
- local->op_ret = -1;
- local->op_errno = -ret;
+ if (local->op_ret == -1) {
+ local->op_ret = local->replies[i].op_ret;
+ local->op_errno = local->replies[i].op_errno;
+
+ local->cont.dir_fop.buf =
+ local->replies[i].poststat;
+ local->cont.dir_fop.preparent =
+ local->replies[i].preparent;
+ local->cont.dir_fop.postparent =
+ local->replies[i].postparent;
+ local->cont.dir_fop.prenewparent =
+ local->replies[i].preparent2;
+ local->cont.dir_fop.postnewparent =
+ local->replies[i].postparent2;
+ if (local->xdata_rsp) {
+ dict_unref (local->xdata_rsp);
+ local->xdata_rsp = NULL;
}
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- fd_ctx->opened_on[child_index] = 1;
- fd_ctx->flags = local->cont.create.flags;
-
- if (local->success_count == 0) {
- local->cont.create.buf = *buf;
-
- local->cont.create.ino =
- afr_itransform (buf->ia_ino,
- priv->child_count,
- child_index);
- local->cont.create.gen = buf->ia_gen;
+ if (local->replies[i].xdata)
+ local->xdata_rsp =
+ dict_ref (local->replies[i].xdata);
+ continue;
+ }
- if (priv->read_child >= 0) {
- afr_set_read_child (this, inode,
- priv->read_child);
- } else {
- afr_set_read_child (this, inode,
- local->read_child_index);
- }
+ if (i == inode_read_subvol) {
+ local->cont.dir_fop.buf =
+ local->replies[i].poststat;
+ if (local->replies[i].xdata) {
+ if (local->xdata_rsp)
+ dict_unref (local->xdata_rsp);
+ local->xdata_rsp =
+ dict_ref (local->replies[i].xdata);
}
-
- if (child_index == local->first_up_child) {
- local->cont.create.ino =
- afr_itransform (buf->ia_ino,
- priv->child_count,
- local->first_up_child);
- local->cont.create.gen = buf->ia_gen;
- }
-
- if (child_index == local->read_child_index) {
- local->cont.create.read_child_buf = *buf;
- local->cont.create.preparent = *preparent;
- local->cont.create.postparent = *postparent;
- }
-
- local->cont.create.inode = inode;
+ }
- local->success_count++;
+ if (i == parent_read_subvol) {
+ local->cont.dir_fop.preparent =
+ local->replies[i].preparent;
+ local->cont.dir_fop.postparent =
+ local->replies[i].postparent;
}
- local->op_errno = op_errno;
+ if (i == parent2_read_subvol) {
+ local->cont.dir_fop.prenewparent =
+ local->replies[i].preparent2;
+ local->cont.dir_fop.postnewparent =
+ local->replies[i].postparent2;
+ }
}
- UNLOCK (&frame->lock);
- call_count = afr_frame_return (frame);
+ afr_txn_arbitrate_fop_cbk (frame, this);
+}
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
- local->transaction.resume (frame, this);
+static void
+__afr_dir_write_fill (call_frame_t *frame, xlator_t *this, int child_index,
+ int op_ret, int op_errno, struct iatt *poststat,
+ struct iatt *preparent, struct iatt *postparent,
+ struct iatt *preparent2, struct iatt *postparent2,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ local = frame->local;
+ fd_ctx = local->fd_ctx;
+
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+ if (xdata)
+ local->replies[child_index].xdata = dict_ref (xdata);
+
+
+ if (op_ret >= 0) {
+ if (poststat)
+ local->replies[child_index].poststat = *poststat;
+ if (preparent)
+ local->replies[child_index].preparent = *preparent;
+ if (postparent)
+ local->replies[child_index].postparent = *postparent;
+ if (preparent2)
+ local->replies[child_index].preparent2 = *preparent2;
+ if (postparent2)
+ local->replies[child_index].postparent2 = *postparent2;
+ if (fd_ctx)
+ fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
+ } else {
+ if (op_errno != ENOTEMPTY)
+ afr_transaction_fop_failed (frame, this, child_index);
+ if (fd_ctx)
+ fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
}
-
- return 0;
+
+ return;
}
-int
-afr_create_wind (call_frame_t *frame, xlator_t *this)
+static int
+__afr_dir_write_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ struct iatt *preparent2, struct iatt *postparent2,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
+ afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
+ priv = this->private;
+ local = frame->local;
- local = frame->local;
- priv = this->private;
+ LOCK (&frame->lock);
+ {
+ __afr_dir_write_fill (frame, this, child_index, op_ret,
+ op_errno, buf, preparent, postparent,
+ preparent2, postparent2, xdata);
+ }
+ UNLOCK (&frame->lock);
+ call_count = afr_frame_return (frame);
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ if (call_count == 0) {
+ __afr_dir_write_finalize (frame, this);
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ if (afr_txn_nothing_failed (frame, this)) {
+ /*if it did pre-op, it will do post-op changing ctime*/
+ if (priv->consistent_metadata &&
+ afr_needs_changelog_update (local))
+ afr_zero_fill_stat (local);
+ local->transaction.unwind (frame, this);
+ }
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_create_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->create,
- &local->loc,
- local->cont.create.flags,
- local->cont.create.mode,
- local->cont.create.fd);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ afr_mark_entry_pending_changelog (frame, this);
+
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
}
int
-afr_create_done (call_frame_t *frame, xlator_t *this)
+afr_mark_new_entry_changelog_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ dict_t *xattr, dict_t *xdata)
{
- afr_local_t * local = NULL;
-
- local = frame->local;
+ int call_count = 0;
- local->transaction.unwind (frame, this);
+ call_count = afr_frame_return (frame);
- AFR_STACK_DESTROY (frame);
+ if (call_count == 0)
+ AFR_STACK_DESTROY (frame);
- return 0;
+ return 0;
}
-int
-afr_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode, fd_t *fd)
+void
+afr_mark_new_entry_changelog (call_frame_t *frame, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
-
- int ret = -1;
-
- int op_ret = -1;
- int op_errno = 0;
+ call_frame_t *new_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_local_t *new_local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xattr = NULL;
+ int32_t **changelog = NULL;
+ int i = 0;
+ int op_errno = ENOMEM;
+ unsigned char *pending = NULL;
+ int call_count = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ new_frame = copy_frame (frame);
+ if (!new_frame)
+ goto out;
+
+ new_local = AFR_FRAME_INIT (new_frame, op_errno);
+ if (!new_local)
+ goto out;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ xattr = dict_new ();
+ if (!xattr)
+ goto out;
- priv = this->private;
+ pending = alloca0 (priv->child_count);
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i] &&
+ !local->transaction.failed_subvols[i]) {
+ call_count ++;
+ continue;
+ }
+ pending[i] = 1;
}
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ changelog = afr_mark_pending_changelog (priv, pending, xattr,
+ local->cont.dir_fop.buf.ia_type);
+ if (!changelog)
+ goto out;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ new_local->pending = changelog;
+ gf_uuid_copy (new_local->loc.gfid, local->cont.dir_fop.buf.ia_gfid);
+ new_local->loc.inode = inode_ref (local->inode);
- transaction_frame->local = local;
+ new_local->call_count = call_count;
- loc_copy (&local->loc, loc);
+ for (i = 0; i < priv->child_count; i++) {
+ if (pending[i])
+ continue;
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
+ STACK_WIND_COOKIE (new_frame, afr_mark_new_entry_changelog_cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &new_local->loc, GF_XATTROP_ADD_ARRAY,
+ xattr, NULL);
+ if (!--call_count)
+ break;
}
- UNLOCK (&priv->read_child_lock);
- local->cont.create.flags = flags;
- local->cont.create.mode = mode;
- local->cont.create.fd = fd_ref (fd);
+ new_frame = NULL;
+out:
+ if (new_frame)
+ AFR_STACK_DESTROY (new_frame);
+ if (xattr)
+ dict_unref (xattr);
+ return;
+}
+
- if (loc->parent)
- local->cont.create.parent_ino = loc->parent->ino;
+void
+afr_mark_entry_pending_changelog (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int pre_op_count = 0;
+ int failed_count = 0;
+
+ local = frame->local;
+ priv = this->private;
- local->transaction.fop = afr_create_wind;
- local->transaction.done = afr_create_done;
- local->transaction.unwind = afr_create_unwind;
+ if (local->op_ret < 0)
+ return;
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
+ if (local->op != GF_FOP_CREATE && local->op != GF_FOP_MKNOD &&
+ local->op != GF_FOP_MKDIR)
+ return;
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
+ pre_op_count = AFR_COUNT (local->transaction.pre_op, priv->child_count);
+ failed_count = AFR_COUNT (local->transaction.failed_subvols,
+ priv->child_count);
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (pre_op_count == priv->child_count && !failed_count)
+ return;
- op_ret = 0;
-out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (create, frame, op_ret, op_errno,
- NULL, NULL, NULL, NULL, NULL);
- }
+ afr_mark_new_entry_changelog (frame, this);
- return 0;
+ return;
}
-/* }}} */
-/* {{{ mknod */
+/* {{{ create */
int
-afr_mknod_unwind (call_frame_t *frame, xlator_t *this)
+afr_create_unwind (call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
+ local = frame->local;
- local = frame->local;
+ main_frame = afr_transaction_detach_fop_frame (frame);
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ if (!main_frame)
+ return 0;
- if (main_frame) {
- if (local->cont.mknod.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.mknod.read_child_buf;
- } else {
- unwind_buf = &local->cont.mknod.buf;
- }
+ AFR_STACK_UNWIND (create, main_frame, local->op_ret, local->op_errno,
+ local->cont.create.fd, local->inode,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
+ return 0;
+}
- unwind_buf->ia_ino = local->cont.mknod.ino;
- unwind_buf->ia_gen = local->cont.mknod.gen;
- local->cont.mknod.preparent.ia_ino = local->cont.mknod.parent_ino;
- local->cont.mknod.postparent.ia_ino = local->cont.mknod.parent_ino;
+int
+afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ fd_t *fd, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
+}
- AFR_STACK_UNWIND (mknod, main_frame,
- local->op_ret, local->op_errno,
- local->cont.mknod.inode,
- unwind_buf, &local->cont.mknod.preparent,
- &local->cont.mknod.postparent);
- }
- return 0;
+int
+afr_create_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE (frame, afr_create_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->create,
+ &local->loc, local->cont.create.flags,
+ local->cont.create.mode, local->umask,
+ local->cont.create.fd, local->xdata_req);
+ return 0;
}
int
-afr_mknod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+afr_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- int call_count = -1;
- int child_index = -1;
+ priv = this->private;
- local = frame->local;
- priv = this->private;
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
+ goto out;
- child_index = (long) cookie;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0){
- local->cont.mknod.buf = *buf;
- local->cont.mknod.ino =
- afr_itransform (buf->ia_ino,
- priv->child_count,
- child_index);
- local->cont.mknod.gen = buf->ia_gen;
-
- if (priv->read_child >= 0) {
- afr_set_read_child (this, inode,
- priv->read_child);
- } else {
- afr_set_read_child (this, inode,
- local->read_child_index);
- }
- }
+ loc_copy (&local->loc, loc);
- if (child_index == local->first_up_child) {
- local->cont.mknod.ino =
- afr_itransform (buf->ia_ino,
- priv->child_count,
- local->first_up_child);
- local->cont.mknod.gen = buf->ia_gen;
- }
+ local->fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!local->fd_ctx)
+ goto out;
- if (child_index == local->read_child_index) {
- local->cont.mknod.read_child_buf = *buf;
- local->cont.mknod.preparent = *preparent;
- local->cont.mknod.postparent = *postparent;
- }
-
- local->cont.mknod.inode = inode;
+ local->inode = inode_ref (loc->inode);
+ local->parent = inode_ref (loc->parent);
- local->success_count++;
- }
+ local->op = GF_FOP_CREATE;
+ local->cont.create.flags = flags;
+ local->fd_ctx->flags = flags;
+ local->cont.create.mode = mode;
+ local->cont.create.fd = fd_ref (fd);
+ local->umask = umask;
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- call_count = afr_frame_return (frame);
+ if (!local->xdata_req)
+ goto out;
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
+ local->transaction.wind = afr_create_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_create_unwind;
+
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- local->transaction.resume (frame, this);
- }
-
return 0;
-}
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL, NULL);
+ return 0;
+}
-int32_t
-afr_mknod_wind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+/* }}} */
- int call_count = -1;
- int i = 0;
+/* {{{ mknod */
- local = frame->local;
- priv = this->private;
+int
+afr_mknod_unwind (call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ local = frame->local;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
return 0;
- }
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_mknod_wind_cbk, (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->mknod,
- &local->loc, local->cont.mknod.mode,
- local->cont.mknod.dev);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ AFR_STACK_UNWIND (mknod, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
+ return 0;
}
int
-afr_mknod_done (call_frame_t *frame, xlator_t *this)
+afr_mknod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t * local = NULL;
-
- local = frame->local;
-
- local->transaction.unwind (frame, this);
- AFR_STACK_DESTROY (frame);
-
- return 0;
+ return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
}
int
-afr_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t dev)
+afr_mknod_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- int ret = -1;
-
- int op_ret = -1;
- int op_errno = 0;
+ local = frame->local;
+ priv = this->private;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ STACK_WIND_COOKIE (frame, afr_mknod_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->mknod,
+ &local->loc, local->cont.mknod.mode,
+ local->cont.mknod.dev, local->umask,
+ local->xdata_req);
+ return 0;
+}
- priv = this->private;
+int
+afr_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t dev, mode_t umask, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
+ priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
+ goto out;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
goto out;
- }
-
- transaction_frame->local = local;
-
- loc_copy (&local->loc, loc);
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
- }
- UNLOCK (&priv->read_child_lock);
-
- local->cont.mknod.mode = mode;
- local->cont.mknod.dev = dev;
-
- if (loc->parent)
- local->cont.mknod.parent_ino = loc->parent->ino;
+ loc_copy (&local->loc, loc);
+ local->inode = inode_ref (loc->inode);
+ local->parent = inode_ref (loc->parent);
- local->transaction.fop = afr_mknod_wind;
- local->transaction.done = afr_mknod_done;
- local->transaction.unwind = afr_mknod_unwind;
+ local->op = GF_FOP_MKNOD;
+ local->cont.mknod.mode = mode;
+ local->cont.mknod.dev = dev;
+ local->umask = umask;
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
+ if (!local->xdata_req)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ local->transaction.wind = afr_mknod_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_mknod_unwind;
+
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (mknod, frame, op_ret, op_errno,
- NULL, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
- return 0;
+ AFR_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
}
/* }}} */
@@ -612,43 +674,20 @@ out:
int
afr_mkdir_unwind (call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
-
- struct iatt *unwind_buf = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.mkdir.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.mkdir.read_child_buf;
- } else {
- unwind_buf = &local->cont.mkdir.buf;
- }
-
- unwind_buf->ia_ino = local->cont.mkdir.ino;
- unwind_buf->ia_gen = local->cont.mkdir.gen;
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- local->cont.mkdir.preparent.ia_ino = local->cont.mkdir.parent_ino;
- local->cont.mkdir.postparent.ia_ino = local->cont.mkdir.parent_ino;
+ local = frame->local;
- AFR_STACK_UNWIND (mkdir, main_frame,
- local->op_ret, local->op_errno,
- local->cont.mkdir.inode,
- unwind_buf, &local->cont.mkdir.preparent,
- &local->cont.mkdir.postparent);
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
- return 0;
+ AFR_STACK_UNWIND (mkdir, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
+ return 0;
}
@@ -656,204 +695,111 @@ int
afr_mkdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int child_index = -1;
-
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0) {
- local->cont.mkdir.buf = *buf;
-
- local->cont.mkdir.ino =
- afr_itransform (buf->ia_ino,
- priv->child_count,
- child_index);
- local->cont.mkdir.gen = buf->ia_gen;
-
- if (priv->read_child >= 0) {
- afr_set_read_child (this, inode,
- priv->read_child);
- } else {
- afr_set_read_child (this, inode,
- local->read_child_index);
- }
- }
-
- if (child_index == local->first_up_child) {
- local->cont.mkdir.ino =
- afr_itransform (buf->ia_ino,
- priv->child_count,
- local->first_up_child);
- local->cont.mkdir.gen = buf->ia_gen;
- }
-
- if (child_index == local->read_child_index) {
- local->cont.mkdir.read_child_buf = *buf;
- local->cont.mkdir.preparent = *preparent;
- local->cont.mkdir.postparent = *postparent;
- }
-
- local->cont.mkdir.inode = inode;
-
- local->success_count++;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
}
int
-afr_mkdir_wind (call_frame_t *frame, xlator_t *this)
+afr_mkdir_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ local = frame->local;
+ priv = this->private;
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_mkdir_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->mkdir,
- &local->loc, local->cont.mkdir.mode);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ STACK_WIND_COOKIE (frame, afr_mkdir_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->mkdir, &local->loc,
+ local->cont.mkdir.mode, local->umask,
+ local->xdata_req);
+ return 0;
}
int
-afr_mkdir_done (call_frame_t *frame, xlator_t *this)
+afr_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
{
- afr_local_t * local = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- local = frame->local;
+ priv = this->private;
- local->transaction.unwind (frame, this);
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
+ goto out;
- AFR_STACK_DESTROY (frame);
-
- return 0;
-}
-
-
-int
-afr_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
-
- int ret = -1;
-
- int op_ret = -1;
- int op_errno = 0;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ loc_copy (&local->loc, loc);
+ local->inode = inode_ref (loc->inode);
+ local->parent = inode_ref (loc->parent);
- priv = this->private;
+ local->cont.mkdir.mode = mode;
+ local->umask = umask;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
+ if (!xdata || !dict_get (xdata, "gfid-req")) {
+ op_errno = EPERM;
+ gf_msg_callingfn (this->name, GF_LOG_WARNING, op_errno,
+ AFR_MSG_GFID_NULL, "mkdir: %s is received "
+ "without gfid-req %p", loc->path, xdata);
+ goto out;
+ }
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ if (!local->xdata_req) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
+ local->op = GF_FOP_MKDIR;
+ local->transaction.wind = afr_mkdir_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_mkdir_unwind;
+
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
op_errno = -ret;
goto out;
- }
-
- transaction_frame->local = local;
-
- loc_copy (&local->loc, loc);
-
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
}
- UNLOCK (&priv->read_child_lock);
-
- local->cont.mkdir.mode = mode;
-
- if (loc->parent)
- local->cont.mkdir.parent_ino = loc->parent->ino;
- local->transaction.fop = afr_mkdir_wind;
- local->transaction.done = afr_mkdir_done;
- local->transaction.unwind = afr_mkdir_unwind;
-
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
-
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (mkdir, frame, op_ret, op_errno,
- NULL, NULL, NULL, NULL);
- }
-
- return 0;
+ AFR_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
}
/* }}} */
@@ -864,233 +810,125 @@ out:
int
afr_link_unwind (call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
-
- struct iatt *unwind_buf = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.link.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.link.read_child_buf;
- } else {
- unwind_buf = &local->cont.link.buf;
- }
-
- unwind_buf->ia_ino = local->cont.link.ino;
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- local->cont.link.preparent.ia_ino = local->cont.link.parent_ino;
- local->cont.link.postparent.ia_ino = local->cont.link.parent_ino;
+ local = frame->local;
- AFR_STACK_UNWIND (link, main_frame,
- local->op_ret, local->op_errno,
- local->cont.link.inode,
- unwind_buf, &local->cont.link.preparent,
- &local->cont.link.postparent);
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
- return 0;
+ AFR_STACK_UNWIND (link, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
+ return 0;
}
int
-afr_link_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
+afr_link_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int child_index = -1;
-
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0) {
- local->cont.link.buf = *buf;
-
- if (priv->read_child >= 0) {
- afr_set_read_child (this, inode,
- priv->read_child);
- } else {
- afr_set_read_child (this, inode,
- local->read_child_index);
- }
- }
-
- if (child_index == local->read_child_index) {
- local->cont.link.read_child_buf = *buf;
- local->cont.link.preparent = *preparent;
- local->cont.link.postparent = *postparent;
- }
-
- local->cont.link.inode = inode;
-
- local->success_count++;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
}
int
-afr_link_wind (call_frame_t *frame, xlator_t *this)
+afr_link_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ local = frame->local;
+ priv = this->private;
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_link_wind_cbk, (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->link,
- &local->loc,
- &local->newloc);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ STACK_WIND_COOKIE (frame, afr_link_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->link,
+ &local->loc, &local->newloc, local->xdata_req);
+ return 0;
}
int
-afr_link_done (call_frame_t *frame, xlator_t *this)
+afr_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- afr_local_t * local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- return 0;
-}
+ priv = this->private;
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
+ goto out;
-int
-afr_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
- int ret = -1;
+ loc_copy (&local->loc, oldloc);
+ loc_copy (&local->newloc, newloc);
- int op_ret = -1;
- int op_errno = 0;
+ local->inode = inode_ref (oldloc->inode);
+ local->parent = inode_ref (newloc->parent);
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- priv = this->private;
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
+ if (!local->xdata_req)
goto out;
- }
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
+ local->op = GF_FOP_LINK;
+
+ local->transaction.wind = afr_link_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_link_unwind;
+
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, newloc,
+ &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME (newloc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
op_errno = -ret;
goto out;
- }
-
- transaction_frame->local = local;
-
- loc_copy (&local->loc, oldloc);
- loc_copy (&local->newloc, newloc);
-
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
}
- UNLOCK (&priv->read_child_lock);
-
- local->cont.link.ino = oldloc->inode->ino;
-
- if (oldloc->parent)
- local->cont.link.parent_ino = newloc->parent->ino;
-
- local->transaction.fop = afr_link_wind;
- local->transaction.done = afr_link_done;
- local->transaction.unwind = afr_link_unwind;
-
- afr_build_parent_loc (&local->transaction.parent_loc, oldloc);
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (oldloc->path);
- local->transaction.new_basename = AFR_BASENAME (newloc->path);
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
-
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (link, frame, op_ret, op_errno,
- NULL, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
- return 0;
+ AFR_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
}
/* }}} */
@@ -1101,246 +939,126 @@ out:
int
afr_symlink_unwind (call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
-
- struct iatt *unwind_buf = NULL;
-
- local = frame->local;
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ local = frame->local;
- if (main_frame) {
- if (local->cont.symlink.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.symlink.read_child_buf;
- } else {
- unwind_buf = &local->cont.symlink.buf;
- }
-
- unwind_buf->ia_ino = local->cont.symlink.ino;
- unwind_buf->ia_gen = local->cont.symlink.gen;
-
- local->cont.symlink.preparent.ia_ino = local->cont.symlink.parent_ino;
- local->cont.symlink.postparent.ia_ino = local->cont.symlink.parent_ino;
-
- AFR_STACK_UNWIND (symlink, main_frame,
- local->op_ret, local->op_errno,
- local->cont.symlink.inode,
- unwind_buf, &local->cont.symlink.preparent,
- &local->cont.symlink.postparent);
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
- return 0;
+ AFR_STACK_UNWIND (symlink, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
+ return 0;
}
int
-afr_symlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
+afr_symlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int child_index = -1;
-
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0) {
- local->cont.symlink.buf = *buf;
- local->cont.symlink.ino =
- afr_itransform (buf->ia_ino, priv->child_count,
- child_index);
- local->cont.symlink.gen = buf->ia_gen;
-
- if (priv->read_child >= 0) {
- afr_set_read_child (this, inode,
- priv->read_child);
- } else {
- afr_set_read_child (this, inode,
- local->read_child_index);
- }
- }
-
- if (child_index == local->first_up_child) {
- local->cont.symlink.ino =
- afr_itransform (buf->ia_ino,
- priv->child_count,
- local->first_up_child);
- local->cont.symlink.gen = buf->ia_gen;
- }
-
- if (child_index == local->read_child_index) {
- local->cont.symlink.read_child_buf = *buf;
- local->cont.symlink.preparent = *preparent;
- local->cont.symlink.postparent = *postparent;
- }
-
- local->cont.symlink.inode = inode;
-
- local->success_count++;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
}
int
-afr_symlink_wind (call_frame_t *frame, xlator_t *this)
+afr_symlink_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_symlink_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->symlink,
- local->cont.symlink.linkpath,
- &local->loc);
-
- if (!--call_count)
- break;
-
- }
- }
-
- return 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE (frame, afr_symlink_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->symlink,
+ local->cont.symlink.linkpath, &local->loc,
+ local->umask, local->xdata_req);
+ return 0;
}
int
-afr_symlink_done (call_frame_t *frame, xlator_t *this)
+afr_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
- afr_local_t * local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
- return 0;
-}
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+ priv = this->private;
-int
-afr_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *loc)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
+ goto out;
- int ret = -1;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
- int op_ret = -1;
- int op_errno = 0;
+ loc_copy (&local->loc, loc);
+ local->inode = inode_ref (loc->inode);
+ local->parent = inode_ref (loc->parent);
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ local->cont.symlink.linkpath = gf_strdup (linkpath);
+ local->umask = umask;
- priv = this->private;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
+ if (!local->xdata_req)
goto out;
- }
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
+ local->op = GF_FOP_SYMLINK;
+ local->transaction.wind = afr_symlink_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_symlink_unwind;
+
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
op_errno = -ret;
goto out;
- }
-
- transaction_frame->local = local;
-
- loc_copy (&local->loc, loc);
-
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
}
- UNLOCK (&priv->read_child_lock);
- local->cont.symlink.linkpath = gf_strdup (linkpath);
-
- if (loc->parent)
- local->cont.symlink.parent_ino = loc->parent->ino;
-
- local->transaction.fop = afr_symlink_wind;
- local->transaction.done = afr_symlink_done;
- local->transaction.unwind = afr_symlink_unwind;
-
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
-
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (symlink, frame, op_ret, op_errno,
- NULL, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
- return 0;
+ AFR_STACK_UNWIND (symlink, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL);
+ return 0;
}
/* }}} */
@@ -1350,231 +1068,159 @@ out:
int
afr_rename_unwind (call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
-
- struct iatt *unwind_buf = NULL;
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.rename.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.rename.read_child_buf;
- } else {
- unwind_buf = &local->cont.rename.buf;
- }
-
- unwind_buf->ia_ino = local->cont.rename.ino;
-
- local->cont.rename.preoldparent.ia_ino = local->cont.rename.oldparent_ino;
- local->cont.rename.postoldparent.ia_ino = local->cont.rename.oldparent_ino;
- local->cont.rename.prenewparent.ia_ino = local->cont.rename.newparent_ino;
- local->cont.rename.postnewparent.ia_ino = local->cont.rename.newparent_ino;
+ local = frame->local;
- AFR_STACK_UNWIND (rename, main_frame,
- local->op_ret, local->op_errno,
- unwind_buf,
- &local->cont.rename.preoldparent,
- &local->cont.rename.postoldparent,
- &local->cont.rename.prenewparent,
- &local->cont.rename.postnewparent);
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
- return 0;
+ AFR_STACK_UNWIND (rename, main_frame, local->op_ret, local->op_errno,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ &local->cont.dir_fop.prenewparent,
+ &local->cont.dir_fop.postnewparent, local->xdata_rsp);
+ return 0;
}
int
-afr_rename_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
+afr_rename_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int child_index = -1;
-
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
-
- if (buf) {
- local->cont.rename.buf = *buf;
- }
-
- local->success_count++;
- }
-
- if (child_index == local->read_child_index) {
- local->cont.rename.read_child_buf = *buf;
-
- local->cont.rename.preoldparent = *preoldparent;
- local->cont.rename.postoldparent = *postoldparent;
- local->cont.rename.prenewparent = *prenewparent;
- local->cont.rename.postnewparent = *postnewparent;
- }
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, buf,
+ preoldparent, postoldparent, prenewparent,
+ postnewparent, xdata);
}
-int32_t
-afr_rename_wind (call_frame_t *frame, xlator_t *this)
+int
+afr_rename_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
local = frame->local;
priv = this->private;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_rename_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->rename,
- &local->loc,
- &local->newloc);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
-
-
-int
-afr_rename_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
- return 0;
+ STACK_WIND_COOKIE (frame, afr_rename_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->rename,
+ &local->loc, &local->newloc, local->xdata_req);
+ return 0;
}
int
-afr_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+afr_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+ int nlockee = 0;
+
+ priv = this->private;
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- int ret = -1;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
- int op_ret = -1;
- int op_errno = 0;
+ loc_copy (&local->loc, oldloc);
+ loc_copy (&local->newloc, newloc);
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ local->inode = inode_ref (oldloc->inode);
+ local->parent = inode_ref (oldloc->parent);
+ local->parent2 = inode_ref (newloc->parent);
- priv = this->private;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
+ if (!local->xdata_req)
goto out;
- }
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ local->op = GF_FOP_RENAME;
+ local->transaction.wind = afr_rename_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_rename_unwind;
+
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, oldloc,
+ &op_errno);
+ if (ret)
+ goto out;
+ ret = afr_build_parent_loc (&local->transaction.new_parent_loc, newloc,
+ &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME (oldloc->path);
+ local->transaction.new_basename = AFR_BASENAME (newloc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = nlockee = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->transaction.new_parent_loc,
+ local->transaction.new_basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ nlockee++;
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ nlockee++;
+ if (local->newloc.inode && IA_ISDIR (local->newloc.inode->ia_type)) {
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->newloc,
+ NULL,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ nlockee++;
+ }
+ qsort (int_lock->lockee, nlockee, sizeof (*int_lock->lockee),
+ afr_entry_lockee_cmp);
+ int_lock->lockee_count = nlockee;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_RENAME_TRANSACTION);
+ if (ret < 0) {
op_errno = -ret;
goto out;
- }
-
- transaction_frame->local = local;
-
- loc_copy (&local->loc, oldloc);
- loc_copy (&local->newloc, newloc);
-
- local->read_child_index = afr_read_child (this, oldloc->inode);
-
- local->cont.rename.ino = oldloc->inode->ino;
-
- if (oldloc->parent)
- local->cont.rename.oldparent_ino = oldloc->parent->ino;
- if (newloc->parent)
- local->cont.rename.newparent_ino = newloc->parent->ino;
-
- local->transaction.fop = afr_rename_wind;
- local->transaction.done = afr_rename_done;
- local->transaction.unwind = afr_rename_unwind;
-
- afr_build_parent_loc (&local->transaction.parent_loc, oldloc);
- afr_build_parent_loc (&local->transaction.new_parent_loc, newloc);
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (oldloc->path);
- local->transaction.new_basename = AFR_BASENAME (newloc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_RENAME_TRANSACTION);
+ }
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
-
- AFR_STACK_UNWIND (rename, frame, op_ret, op_errno,
- NULL, NULL, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
- return 0;
+ AFR_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL, NULL);
+ return 0;
}
/* }}} */
@@ -1584,210 +1230,121 @@ out:
int
afr_unlink_unwind (call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- if (main_frame) {
- local->cont.unlink.preparent.ia_ino = local->cont.unlink.parent_ino;
- local->cont.unlink.postparent.ia_ino = local->cont.unlink.parent_ino;
+ local = frame->local;
- AFR_STACK_UNWIND (unlink, main_frame,
- local->op_ret, local->op_errno,
- &local->cont.unlink.preparent,
- &local->cont.unlink.postparent);
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
- return 0;
+ AFR_STACK_UNWIND (unlink, main_frame, local->op_ret, local->op_errno,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
+ return 0;
}
int
-afr_unlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+afr_unlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int child_index = (long) cookie;
- int need_unwind = 0;
- int read_child = 0;
-
- local = frame->local;
- priv = this->private;
-
- read_child = afr_read_child (this, local->loc.inode);
-
- LOCK (&frame->lock);
- {
- if (child_index == local->read_child_index) {
- local->read_child_returned = _gf_true;
- }
-
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.unlink.preparent = *preparent;
- local->cont.unlink.postparent = *postparent;
- }
-
- if (child_index == local->read_child_index) {
- local->cont.unlink.preparent = *preparent;
- local->cont.unlink.postparent = *postparent;
- }
-
- local->success_count++;
-
- if ((local->success_count == priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, NULL,
+ preparent, postparent, NULL, NULL, xdata);
}
-int32_t
-afr_unlink_wind (call_frame_t *frame, xlator_t *this)
+int
+afr_unlink_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ local = frame->local;
+ priv = this->private;
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_unlink_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->unlink,
- &local->loc);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ STACK_WIND_COOKIE (frame, afr_unlink_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->unlink,
+ &local->loc, local->xflag, local->xdata_req);
+ return 0;
}
-int32_t
-afr_unlink_done (call_frame_t *frame, xlator_t *this)
+int
+afr_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
- afr_local_t * local = frame->local;
-
- local->transaction.unwind (frame, this);
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- AFR_STACK_DESTROY (frame);
-
- return 0;
-}
+ priv = this->private;
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
+ goto out;
-int32_t
-afr_unlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
-
- int ret = -1;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
- int op_ret = -1;
- int op_errno = 0;
+ loc_copy (&local->loc, loc);
+ local->xflag = xflag;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ local->inode = inode_ref (loc->inode);
+ local->parent = inode_ref (loc->parent);
- priv = this->private;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
+ if (!local->xdata_req)
goto out;
- }
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
+ local->op = GF_FOP_UNLINK;
+ local->transaction.wind = afr_unlink_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_unlink_unwind;
+
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
op_errno = -ret;
goto out;
- }
-
- transaction_frame->local = local;
-
- loc_copy (&local->loc, loc);
-
- if (loc->parent)
- local->cont.unlink.parent_ino = loc->parent->ino;
-
- local->transaction.fop = afr_unlink_wind;
- local->transaction.done = afr_unlink_done;
- local->transaction.unwind = afr_unlink_unwind;
-
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ }
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (unlink, frame, op_ret, op_errno,
- NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
- return 0;
+ AFR_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
/* }}} */
@@ -1799,211 +1356,135 @@ out:
int
afr_rmdir_unwind (call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- local->cont.rmdir.preparent.ia_ino = local->cont.rmdir.parent_ino;
- local->cont.rmdir.postparent.ia_ino = local->cont.rmdir.parent_ino;
-
- AFR_STACK_UNWIND (rmdir, main_frame,
- local->op_ret, local->op_errno,
- &local->cont.rmdir.preparent,
- &local->cont.rmdir.postparent);
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
- return 0;
+ AFR_STACK_UNWIND (rmdir, main_frame, local->op_ret, local->op_errno,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
+ return 0;
}
int
-afr_rmdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+afr_rmdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int child_index = (long) cookie;
- int need_unwind = 0;
- int read_child = 0;
-
- local = frame->local;
- priv = this->private;
-
- read_child = afr_read_child (this, local->loc.inode);
-
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
-
- if (afr_fop_failed (op_ret, op_errno) && (op_errno != ENOTEMPTY))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.rmdir.preparent = *preparent;
- local->cont.rmdir.postparent = *postparent;
-
- }
-
- if (child_index == read_child) {
- local->cont.rmdir.preparent = *preparent;
- local->cont.rmdir.postparent = *postparent;
- }
-
- local->success_count++;
-
- if ((local->success_count == priv->wait_count)
- && local->read_child_returned)
- need_unwind = 1;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, NULL,
+ preparent, postparent, NULL, NULL, xdata);
}
int
-afr_rmdir_wind (call_frame_t *frame, xlator_t *this)
+afr_rmdir_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_rmdir_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->rmdir,
- &local->loc);
+ local = frame->local;
+ priv = this->private;
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ STACK_WIND_COOKIE (frame, afr_rmdir_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->rmdir,
+ &local->loc, local->cont.rmdir.flags, local->xdata_req);
+ return 0;
}
int
-afr_rmdir_done (call_frame_t *frame, xlator_t *this)
+afr_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
- afr_local_t * local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
- return 0;
-}
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+ int nlockee = 0;
+
+ priv = this->private;
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
-int
-afr_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
-
- int ret = -1;
+ loc_copy (&local->loc, loc);
+ local->inode = inode_ref (loc->inode);
+ local->parent = inode_ref (loc->parent);
- int op_ret = -1;
- int op_errno = 0;
+ local->cont.rmdir.flags = flags;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
+ if (!local->xdata_req)
goto out;
- }
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
+ local->op = GF_FOP_RMDIR;
+ local->transaction.wind = afr_rmdir_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_rmdir_unwind;
+
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = nlockee = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ nlockee++;
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->loc,
+ NULL,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ nlockee++;
+ qsort (int_lock->lockee, nlockee, sizeof (*int_lock->lockee),
+ afr_entry_lockee_cmp);
+ int_lock->lockee_count = nlockee;
+
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
op_errno = -ret;
goto out;
- }
-
- transaction_frame->local = local;
-
- loc_copy (&local->loc, loc);
-
- if (loc->parent)
- local->cont.rmdir.parent_ino = loc->parent->ino;
-
- local->transaction.fop = afr_rmdir_wind;
- local->transaction.done = afr_rmdir_done;
- local->transaction.unwind = afr_rmdir_unwind;
-
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ }
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (rmdir, frame, op_ret, op_errno,
- NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
- return 0;
+ AFR_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
/* }}} */
-
diff --git a/xlators/cluster/afr/src/afr-dir-write.h b/xlators/cluster/afr/src/afr-dir-write.h
index 4fa618b6575..02f0a3682d9 100644
--- a/xlators/cluster/afr/src/afr-dir-write.h
+++ b/xlators/cluster/afr/src/afr-dir-write.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __DIR_WRITE_H__
@@ -22,38 +13,35 @@
int32_t
afr_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode, fd_t *fd);
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *xdata);
int32_t
afr_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t dev);
+ loc_t *loc, mode_t mode, dev_t dev, mode_t umask, dict_t *xdata);
int32_t
afr_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode);
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata);
int32_t
afr_unlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc);
+ loc_t *loc, int xflag, dict_t *xdata);
int32_t
afr_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc);
+ loc_t *loc, int flags, dict_t *xdata);
int32_t
afr_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc);
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata);
int32_t
afr_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc);
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata);
-int32_t
+int
afr_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *oldloc);
-
-int32_t
-afr_setdents (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t flags, dir_entry_t *entries, int32_t count);
+ const char *linkpath, loc_t *oldloc, mode_t umask, dict_t *params);
#endif /* __DIR_WRITE_H__ */
diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
index ef72fb19779..1690cb684dd 100644
--- a/xlators/cluster/afr/src/afr-inode-read.c
+++ b/xlators/cluster/afr/src/afr-inode-read.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -25,11 +16,6 @@
#include <stdlib.h>
#include <signal.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "glusterfs.h"
#include "afr.h"
#include "dict.h"
@@ -39,693 +25,1681 @@
#include "stack.h"
#include "list.h"
#include "call-stub.h"
+#include "byte-order.h"
#include "defaults.h"
#include "common-utils.h"
#include "compat-errno.h"
#include "compat.h"
+#include "quota-common-utils.h"
-#include "afr.h"
+#include "afr-transaction.h"
+#include "afr-messages.h"
+/*
+ * Quota size xattrs are not maintained by afr. There is a
+ * possibility that they differ even when both the directory changelog xattrs
+ * suggest everything is fine. So if there is at least one 'source' check among
+ * the sources which has the maximum quota size. Otherwise check among all the
+ * available ones for maximum quota size. This way if there is a source and
+ * stale copies it always votes for the 'source'.
+ * */
+
+int
+afr_handle_quota_size (call_frame_t *frame, xlator_t *this)
+{
+ unsigned char *readable = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
+ int i = 0;
+ int ret = 0;
+ quota_meta_t size = {0, };
+ quota_meta_t max_size = {0, };
+ int readable_cnt = 0;
+ int read_subvol = -1;
+
+ local = frame->local;
+ priv = this->private;
+ replies = local->replies;
+
+ readable = alloca0 (priv->child_count);
+
+ afr_inode_read_subvol_get (local->inode, this, readable, 0, 0);
+
+ readable_cnt = AFR_COUNT (readable, priv->child_count);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1)
+ continue;
+ if (readable_cnt && !readable[i])
+ continue;
+ if (!replies[i].xdata)
+ continue;
+ ret = quota_dict_get_meta (replies[i].xdata, QUOTA_SIZE_KEY,
+ &size);
+ if (ret == -1)
+ continue;
+ if (read_subvol == -1)
+ read_subvol = i;
+ if (size.size > max_size.size ||
+ (size.file_count + size.dir_count) >
+ (max_size.file_count + max_size.dir_count))
+ read_subvol = i;
+
+ if (size.size > max_size.size)
+ max_size.size = size.size;
+ if (size.file_count > max_size.file_count)
+ max_size.file_count = size.file_count;
+ if (size.dir_count > max_size.dir_count)
+ max_size.dir_count = size.dir_count;
+ }
+
+ if (max_size.size == 0 && max_size.file_count == 0 &&
+ max_size.dir_count == 0)
+ return read_subvol;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1)
+ continue;
+ if (readable_cnt && !readable[i])
+ continue;
+ if (!replies[i].xdata)
+ continue;
+ quota_dict_set_meta (replies[i].xdata, QUOTA_SIZE_KEY,
+ &max_size, IA_IFDIR);
+ }
+
+ return read_subvol;
+}
-/**
- * Common algorithm for inode read calls:
- *
- * - Try the fop on the first child that is up
- * - if we have failed due to ENOTCONN:
- * try the next child
- *
- * Applicable to: access, stat, fstat, readlink, getxattr
- */
/* {{{ access */
-int32_t
-afr_access_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+int
+afr_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
+ afr_local_t *local = NULL;
- int unwind = 1;
- int last_tried = -1;
- int this_try = -1;
- int read_child = -1;
+ local = frame->local;
- priv = this->private;
- children = priv->children;
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- local = frame->local;
+ afr_read_txn_continue (frame, this, (long) cookie);
+ return 0;
+ }
- read_child = (long) cookie;
+ AFR_STACK_UNWIND (access, frame, op_ret, op_errno, xdata);
- if (op_ret == -1) {
- retry:
- last_tried = local->cont.access.last_tried;
+ return 0;
+}
- if (all_tried (last_tried, priv->child_count)) {
- goto out;
- }
- this_try = ++local->cont.access.last_tried;
- if (this_try == read_child) {
- goto retry;
- }
+int
+afr_access_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
- unwind = 0;
+ priv = this->private;
+ local = frame->local;
- STACK_WIND_COOKIE (frame, afr_access_cbk,
- (void *) (long) read_child,
- children[this_try],
- children[this_try]->fops->access,
- &local->loc, local->cont.access.mask);
+ if (subvol == -1) {
+ AFR_STACK_UNWIND (access, frame, local->op_ret,
+ local->op_errno, 0);
+ return 0;
}
-out:
- if (unwind) {
- AFR_STACK_UNWIND (access, frame, op_ret, op_errno);
- }
+ STACK_WIND_COOKIE (frame, afr_access_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->access,
+ &local->loc, local->cont.access.mask,
+ local->xdata_req);
+ return 0;
+}
+
+int
+afr_access (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int mask, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int op_errno = 0;
+
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->op = GF_FOP_ACCESS;
+ loc_copy (&local->loc, loc);
+ local->cont.access.mask = mask;
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
+
+ afr_read_txn (frame, this, loc->inode, afr_access_wind,
+ AFR_METADATA_TRANSACTION);
return 0;
+out:
+ AFR_STACK_UNWIND (access, frame, -1, op_errno, NULL);
+
+ return 0;
}
+/* }}} */
-int32_t
-afr_access (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t mask)
+/* {{{ stat */
+
+int
+afr_stat_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ struct iatt *buf, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- xlator_t ** children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
- int32_t read_child = -1;
+ local = frame->local;
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+ afr_read_txn_continue (frame, this, (long) cookie);
+ return 0;
+ }
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ AFR_STACK_UNWIND (stat, frame, op_ret, op_errno, buf, xdata);
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
+ return 0;
+}
- children = priv->children;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+int
+afr_stat_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
- read_child = afr_read_child (this, loc->inode);
+ priv = this->private;
+ local = frame->local;
- if (read_child >= 0) {
- call_child = read_child;
+ if (subvol == -1) {
+ AFR_STACK_UNWIND (stat, frame, local->op_ret, local->op_errno,
+ 0, 0);
+ return 0;
+ }
- local->cont.access.last_tried = -1;
+ STACK_WIND_COOKIE (frame, afr_stat_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->stat,
+ &local->loc, local->xdata_req);
+ return 0;
+}
- } else {
- call_child = afr_first_up_child (priv);
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_DEBUG,
- "no child is up");
- goto out;
- }
+int
+afr_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int op_errno = 0;
- local->cont.access.last_tried = call_child;
- }
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
+ local->op = GF_FOP_STAT;
loc_copy (&local->loc, loc);
- local->cont.access.mask = mask;
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
- STACK_WIND_COOKIE (frame, afr_access_cbk,
- (void *) (long) call_child,
- children[call_child], children[call_child]->fops->access,
- loc, mask);
+ afr_read_txn (frame, this, loc->inode, afr_stat_wind,
+ AFR_DATA_TRANSACTION);
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (access, frame, op_ret, op_errno);
- }
return 0;
+out:
+ AFR_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
}
/* }}} */
-/* {{{ stat */
+/* {{{ fstat */
-int32_t
-afr_stat_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iatt *buf)
+int
+afr_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
+ afr_local_t *local = NULL;
- int unwind = 1;
- int last_tried = -1;
- int this_try = -1;
- int read_child = -1;
+ local = frame->local;
- priv = this->private;
- children = priv->children;
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- read_child = (long) cookie;
-
- local = frame->local;
-
- if (op_ret == -1) {
- retry:
- last_tried = local->cont.stat.last_tried;
+ afr_read_txn_continue (frame, this, (long) cookie);
+ return 0;
+ }
- if (all_tried (last_tried, priv->child_count)) {
- goto out;
- }
- this_try = ++local->cont.stat.last_tried;
+ AFR_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf, xdata);
- if (this_try == read_child) {
- goto retry;
- }
+ return 0;
+}
- unwind = 0;
- STACK_WIND_COOKIE (frame, afr_stat_cbk,
- (void *) (long) read_child,
- children[this_try],
- children[this_try]->fops->stat,
- &local->loc);
- }
+int
+afr_fstat_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
-out:
- if (unwind) {
- if (buf)
- buf->ia_ino = local->cont.stat.ino;
+ priv = this->private;
+ local = frame->local;
- AFR_STACK_UNWIND (stat, frame, op_ret, op_errno, buf);
+ if (subvol == -1) {
+ AFR_STACK_UNWIND (fstat, frame, local->op_ret, local->op_errno,
+ 0, 0);
+ return 0;
}
+ STACK_WIND_COOKIE (frame, afr_fstat_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fstat,
+ local->fd, local->xdata_req);
return 0;
}
int32_t
-afr_stat (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
+afr_fstat (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
+ afr_local_t *local = NULL;
+ int op_errno = 0;
- int32_t read_child = -1;
- int call_child = 0;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+ local->op = GF_FOP_FSTAT;
+ local->fd = fd_ref (fd);
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ afr_fix_open (fd, this);
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
+ afr_read_txn (frame, this, fd->inode, afr_fstat_wind,
+ AFR_DATA_TRANSACTION);
- children = priv->children;
+ return 0;
+out:
+ AFR_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, NULL);
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ return 0;
+}
- frame->local = local;
+/* }}} */
- read_child = afr_read_child (this, loc->inode);
+/* {{{ readlink */
- if (read_child >= 0) {
- call_child = read_child;
+int
+afr_readlink_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ const char *buf, struct iatt *sbuf, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
- local->cont.stat.last_tried = -1;
+ local = frame->local;
- } else {
- call_child = afr_first_up_child (priv);
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_DEBUG,
- "no child is up");
- goto out;
- }
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
- local->cont.stat.last_tried = call_child;
+ afr_read_txn_continue (frame, this, (long) cookie);
+ return 0;
}
- loc_copy (&local->loc, loc);
+ AFR_STACK_UNWIND (readlink, frame, op_ret, op_errno,
+ buf, sbuf, xdata);
+ return 0;
+}
- local->cont.stat.ino = loc->inode->ino;
+int
+afr_readlink_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- STACK_WIND_COOKIE (frame, afr_stat_cbk, (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->stat,
- loc);
+ local = frame->local;
+ priv = this->private;
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (stat, frame, op_ret, op_errno, NULL);
+ if (subvol == -1) {
+ AFR_STACK_UNWIND (readlink, frame, local->op_ret,
+ local->op_errno, 0, 0, 0);
+ return 0;
}
+ STACK_WIND_COOKIE (frame, afr_readlink_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->readlink,
+ &local->loc, local->cont.readlink.size,
+ local->xdata_req);
+ return 0;
+}
+
+
+int
+afr_readlink (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, size_t size, dict_t *xdata)
+{
+ afr_local_t * local = NULL;
+ int32_t op_errno = 0;
+
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->op = GF_FOP_READLINK;
+ loc_copy (&local->loc, loc);
+ local->cont.readlink.size = size;
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
+
+ afr_read_txn (frame, this, loc->inode, afr_readlink_wind,
+ AFR_DATA_TRANSACTION);
+
+ return 0;
+out:
+ AFR_STACK_UNWIND(readlink, frame, -1, op_errno, 0, 0, 0);
+
return 0;
}
/* }}} */
-/* {{{ fstat */
+/* {{{ getxattr */
-int32_t
-afr_fstat_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iatt *buf)
+struct _xattr_key {
+ char *key;
+ struct list_head list;
+};
+
+
+int
+__gather_xattr_keys (dict_t *dict, char *key, data_t *value,
+ void *data)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
+ struct list_head * list = data;
+ struct _xattr_key * xkey = NULL;
- int unwind = 1;
- int last_tried = -1;
- int this_try = -1;
- int read_child = -1;
+ if (!strncmp (key, AFR_XATTR_PREFIX,
+ strlen (AFR_XATTR_PREFIX))) {
- priv = this->private;
- children = priv->children;
+ xkey = GF_CALLOC (1, sizeof (*xkey), gf_afr_mt_xattr_key);
+ if (!xkey)
+ return -1;
- local = frame->local;
+ xkey->key = key;
+ INIT_LIST_HEAD (&xkey->list);
- read_child = (long) cookie;
+ list_add_tail (&xkey->list, list);
+ }
+ return 0;
+}
- if (op_ret == -1) {
- retry:
- last_tried = local->cont.fstat.last_tried;
- if (all_tried (last_tried, priv->child_count)) {
- goto out;
- }
- this_try = ++local->cont.fstat.last_tried;
+void
+afr_filter_xattrs (dict_t *dict)
+{
+ struct list_head keys = {0,};
+ struct _xattr_key *key = NULL;
+ struct _xattr_key *tmp = NULL;
- if (this_try == read_child) {
- goto retry;
- }
+ INIT_LIST_HEAD (&keys);
- unwind = 0;
+ dict_foreach (dict, __gather_xattr_keys,
+ (void *) &keys);
- STACK_WIND_COOKIE (frame, afr_fstat_cbk,
- (void *) (long) read_child,
- children[this_try],
- children[this_try]->fops->fstat,
- local->fd);
- }
+ list_for_each_entry_safe (key, tmp, &keys, list) {
+ dict_del (dict, key->key);
-out:
- if (unwind) {
- if (buf)
- buf->ia_ino = local->cont.fstat.ino;
+ list_del_init (&key->list);
+
+ GF_FREE (key);
+ }
+}
+
+static
+gf_boolean_t
+afr_getxattr_ignorable_errnos (int32_t op_errno)
+{
+ if (op_errno == ENODATA || op_errno == ENOTSUP || op_errno == ERANGE ||
+ op_errno == ENAMETOOLONG)
+ return _gf_true;
+
+ return _gf_false;
+}
+int
+afr_getxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
- AFR_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf);
+ local = frame->local;
+
+ if (op_ret < 0 && !afr_getxattr_ignorable_errnos(op_errno)) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ afr_read_txn_continue (frame, this, (long) cookie);
+ return 0;
}
- return 0;
+ if (dict)
+ afr_filter_xattrs (dict);
+
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
+
+ return 0;
}
-int32_t
-afr_fstat (call_frame_t *frame, xlator_t *this,
- fd_t *fd)
+int
+afr_getxattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- int call_child = 0;
- int32_t read_child = -1;
+ local = frame->local;
+ priv = this->private;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+ if (subvol == -1) {
+ AFR_STACK_UNWIND (getxattr, frame, local->op_ret,
+ local->op_errno, NULL, NULL);
+ return 0;
+ }
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (fd, out);
- VALIDATE_OR_GOTO (this->private, out);
+ STACK_WIND_COOKIE (frame, afr_getxattr_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->getxattr,
+ &local->loc, local->cont.getxattr.name,
+ local->xdata_req);
+ return 0;
+}
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
- children = priv->children;
+int32_t
+afr_getxattr_unwind (call_frame_t *frame, int op_ret, int op_errno,
+ dict_t *dict, dict_t *xdata)
- ALLOC_OR_GOTO (local, afr_local_t, out);
+{
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
- frame->local = local;
+int32_t
+afr_fgetxattr_clrlk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ dict_t *xattr = NULL;
+ char *tmp_report = NULL;
+ char lk_summary[1024] = {0,};
+ int serz_len = 0;
+ int32_t callcnt = 0;
+ long int cky = 0;
+ int ret = 0;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+ cky = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+ if (op_ret == -1)
+ local->replies[cky].op_errno = op_errno;
+
+ if (!local->dict)
+ local->dict = dict_new ();
+ if (local->dict) {
+ ret = dict_get_str (dict, local->cont.getxattr.name,
+ &tmp_report);
+ if (ret)
+ goto unlock;
+ ret = dict_set_dynstr (local->dict,
+ children[cky]->name,
+ gf_strdup (tmp_report));
+ if (ret)
+ goto unlock;
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ xattr = dict_new ();
+ if (!xattr) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ ret = dict_serialize_value_with_delim (local->dict,
+ lk_summary,
+ &serz_len, '\n');
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ if (serz_len == -1)
+ snprintf (lk_summary, sizeof (lk_summary),
+ "No locks cleared.");
+ ret = dict_set_dynstr (xattr, local->cont.getxattr.name,
+ gf_strdup (lk_summary));
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_msg (this->name, GF_LOG_ERROR,
+ ENOMEM, AFR_MSG_DICT_SET_FAILED,
+ "Error setting dictionary");
+ goto unwind;
+ }
- VALIDATE_OR_GOTO (fd->inode, out);
+ unwind:
+ // Updating child_errno with more recent 'events'
+ op_errno = afr_final_errno (local, priv);
- read_child = afr_read_child (this, fd->inode);
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, xattr,
+ xdata);
+ if (xattr)
+ dict_unref (xattr);
+ }
- if (read_child >= 0) {
- call_child = read_child;
+ return ret;
+}
- local->cont.fstat.last_tried = -1;
- } else {
- call_child = afr_first_up_child (priv);
+int32_t
+afr_getxattr_clrlk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ dict_t *xattr = NULL;
+ char *tmp_report = NULL;
+ char lk_summary[1024] = {0,};
+ int serz_len = 0;
+ int32_t callcnt = 0;
+ long int cky = 0;
+ int ret = 0;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+ cky = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+ if (op_ret == -1)
+ local->replies[cky].op_errno = op_errno;
+
+ if (!local->dict)
+ local->dict = dict_new ();
+ if (local->dict) {
+ ret = dict_get_str (dict, local->cont.getxattr.name,
+ &tmp_report);
+ if (ret)
+ goto unlock;
+ ret = dict_set_dynstr (local->dict,
+ children[cky]->name,
+ gf_strdup (tmp_report));
+ if (ret)
+ goto unlock;
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ xattr = dict_new ();
+ if (!xattr) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ ret = dict_serialize_value_with_delim (local->dict,
+ lk_summary,
+ &serz_len, '\n');
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ if (serz_len == -1)
+ snprintf (lk_summary, sizeof (lk_summary),
+ "No locks cleared.");
+ ret = dict_set_dynstr (xattr, local->cont.getxattr.name,
+ gf_strdup (lk_summary));
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_msg (this->name, GF_LOG_ERROR,
+ ENOMEM, AFR_MSG_DICT_SET_FAILED,
+ "Error setting dictionary");
+ goto unwind;
+ }
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_DEBUG,
- "no child is up");
- goto out;
- }
+ unwind:
+ // Updating child_errno with more recent 'events'
+ op_errno = afr_final_errno (local, priv);
+
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr, xdata);
- local->cont.fstat.last_tried = call_child;
+ if (xattr)
+ dict_unref (xattr);
}
- local->cont.fstat.ino = fd->inode->ino;
- local->fd = fd_ref (fd);
+ return ret;
+}
- STACK_WIND_COOKIE (frame, afr_fstat_cbk, (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->fstat,
- fd);
+/**
+ * node-uuid cbk uses next child querying mechanism
+ */
+int32_t
+afr_getxattr_node_uuid_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t **children = NULL;
+ int unwind = 1;
+ int curr_call_child = 0;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+
+ if (op_ret == -1) { /** query the _next_ child */
+
+ /**
+ * _current_ becomes _next_
+ * If done with all childs and yet no success; give up !
+ */
+ curr_call_child = (int) ((long)cookie);
+ if (++curr_call_child == priv->child_count)
+ goto unwind;
+
+ gf_msg_debug (this->name, op_errno,
+ "op_ret (-1): Re-querying afr-child (%d/%d)",
+ curr_call_child, priv->child_count);
+
+ unwind = 0;
+ STACK_WIND_COOKIE (frame, afr_getxattr_node_uuid_cbk,
+ (void *) (long) curr_call_child,
+ children[curr_call_child],
+ children[curr_call_child]->fops->getxattr,
+ &local->loc,
+ local->cont.getxattr.name,
+ NULL);
+ }
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (fstat, frame, op_ret, op_errno, NULL);
- }
+ unwind:
+ if (unwind)
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict,
+ NULL);
- return 0;
+ return 0;
}
-/* }}} */
+int32_t
+afr_getxattr_quota_size_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ int idx = (long) cookie;
+ int call_count = 0;
+ afr_local_t *local = frame->local;
+ int read_subvol = -1;
+
+ local->replies[idx].valid = 1;
+ local->replies[idx].op_ret = op_ret;
+ local->replies[idx].op_errno = op_errno;
+ if (dict)
+ local->replies[idx].xdata = dict_ref (dict);
+ call_count = afr_frame_return (frame);
+ if (call_count == 0) {
+ local->inode = inode_ref (local->loc.inode);
+ read_subvol = afr_handle_quota_size (frame, this);
+ if (read_subvol != -1) {
+ op_ret = local->replies[read_subvol].op_ret;
+ op_errno = local->replies[read_subvol].op_errno;
+ dict = local->replies[read_subvol].xdata;
+ }
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict,
+ xdata);
+ }
-/* {{{ readlink */
+ return 0;
+}
int32_t
-afr_readlink_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- const char *buf, struct iatt *sbuf)
+afr_getxattr_lockinfo_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
+ int call_cnt = 0, len = 0;
+ char *lockinfo_buf = NULL;
+ dict_t *lockinfo = NULL, *newdict = NULL;
+ afr_local_t *local = NULL;
- int unwind = 1;
- int last_tried = -1;
- int this_try = -1;
- int read_child = -1;
+ LOCK (&frame->lock);
+ {
+ local = frame->local;
- priv = this->private;
- children = priv->children;
+ call_cnt = --local->call_count;
- local = frame->local;
+ if ((op_ret < 0) || (!dict && !xdata)) {
+ goto unlock;
+ }
- read_child = (long) cookie;
+ if (xdata) {
+ if (!local->xdata_rsp) {
+ local->xdata_rsp = dict_new ();
+ if (!local->xdata_rsp) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
- if (op_ret == -1) {
- retry:
- last_tried = local->cont.readlink.last_tried;
+ if (!dict) {
+ goto unlock;
+ }
- if (all_tried (last_tried, priv->child_count)) {
- goto out;
- }
- this_try = ++local->cont.readlink.last_tried;
+ op_ret = dict_get_ptr_and_len (dict, GF_XATTR_LOCKINFO_KEY,
+ (void **)&lockinfo_buf, &len);
- if (this_try == read_child) {
- goto retry;
+ if (!lockinfo_buf) {
+ goto unlock;
}
- unwind = 0;
- STACK_WIND_COOKIE (frame, afr_readlink_cbk,
- (void *) (long) read_child,
- children[this_try],
- children[this_try]->fops->readlink,
- &local->loc,
- local->cont.readlink.size);
- }
+ if (!local->dict) {
+ local->dict = dict_new ();
+ if (!local->dict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (lockinfo_buf != NULL) {
+ lockinfo = dict_new ();
+ if (lockinfo == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ } else {
+ op_ret = dict_unserialize (lockinfo_buf, len,
+ &lockinfo);
+
+ if (lockinfo && local->dict) {
+ dict_copy (lockinfo, local->dict);
+ }
+ }
+ }
-out:
- if (unwind) {
- if (sbuf)
- sbuf->ia_ino = local->cont.readlink.ino;
+ if (xdata && local->xdata_rsp) {
+ dict_copy (xdata, local->xdata_rsp);
+ }
- AFR_STACK_UNWIND (readlink, frame, op_ret, op_errno, buf, sbuf);
- }
+ if (!call_cnt) {
+ newdict = dict_new ();
+ if (!newdict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
- return 0;
-}
+ len = dict_serialized_length (local->dict);
+ if (len <= 0) {
+ goto unwind;
+ }
+
+ lockinfo_buf = GF_CALLOC (1, len, gf_common_mt_char);
+ if (!lockinfo_buf) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
+ op_ret = dict_serialize (local->dict, lockinfo_buf);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ }
+
+ op_ret = dict_set_dynptr (newdict, GF_XATTR_LOCKINFO_KEY,
+ (void *)lockinfo_buf, len);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ goto unwind;
+ }
+
+ unwind:
+ AFR_STACK_UNWIND (getxattr, frame, op_ret,
+ op_errno, newdict,
+ local->xdata_rsp);
+ }
+
+ dict_unref (lockinfo);
+
+ return 0;
+}
int32_t
-afr_readlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, size_t size)
+afr_fgetxattr_lockinfo_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- xlator_t ** children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
+ int call_cnt = 0, len = 0;
+ char *lockinfo_buf = NULL;
+ dict_t *lockinfo = NULL, *newdict = NULL;
+ afr_local_t *local = NULL;
+
+ LOCK (&frame->lock);
+ {
+ local = frame->local;
- int32_t read_child = -1;
+ call_cnt = --local->call_count;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+ if ((op_ret < 0) || (!dict && !xdata)) {
+ goto unlock;
+ }
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ if (xdata) {
+ if (!local->xdata_rsp) {
+ local->xdata_rsp = dict_new ();
+ if (!local->xdata_rsp) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
+ if (!dict) {
+ goto unlock;
+ }
- children = priv->children;
+ op_ret = dict_get_ptr_and_len (dict, GF_XATTR_LOCKINFO_KEY,
+ (void **)&lockinfo_buf, &len);
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ if (!lockinfo_buf) {
+ goto unlock;
+ }
+
+ if (!local->dict) {
+ local->dict = dict_new ();
+ if (!local->dict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (lockinfo_buf != NULL) {
+ lockinfo = dict_new ();
+ if (lockinfo == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ } else {
+ op_ret = dict_unserialize (lockinfo_buf, len,
+ &lockinfo);
+
+ if (lockinfo && local->dict) {
+ dict_copy (lockinfo, local->dict);
+ }
+ }
+ }
- frame->local = local;
+ if (xdata && local->xdata_rsp) {
+ dict_copy (xdata, local->xdata_rsp);
+ }
- read_child = afr_read_child (this, loc->inode);
+ if (!call_cnt) {
+ newdict = dict_new ();
+ if (!newdict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
- if (read_child >= 0) {
- call_child = read_child;
+ len = dict_serialized_length (local->dict);
+ if (len <= 0) {
+ goto unwind;
+ }
- local->cont.readlink.last_tried = -1;
+ lockinfo_buf = GF_CALLOC (1, len, gf_common_mt_char);
+ if (!lockinfo_buf) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
- } else {
- call_child = afr_first_up_child (priv);
+ op_ret = dict_serialize (local->dict, lockinfo_buf);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ }
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_DEBUG,
- "no child is up");
- goto out;
+ op_ret = dict_set_dynptr (newdict, GF_XATTR_LOCKINFO_KEY,
+ (void *)lockinfo_buf, len);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ goto unwind;
}
- local->cont.readlink.last_tried = call_child;
+ unwind:
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret,
+ op_errno, newdict,
+ local->xdata_rsp);
}
- loc_copy (&local->loc, loc);
+ dict_unref (lockinfo);
+
+ return 0;
+}
+
+int32_t
+afr_fgetxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t callcnt = 0;
+ int ret = 0;
+ char *xattr = NULL;
+ char *xattr_serz = NULL;
+ char xattr_cky[1024] = {0,};
+ dict_t *nxattr = NULL;
+ long cky = 0;
+ int32_t padding = 0;
+ int32_t tlen = 0;
+
+ if (!frame || !frame->local || !this) {
+ gf_msg ("", GF_LOG_ERROR, 0,
+ AFR_MSG_INVALID_ARG, "possible NULL deref");
+ goto out;
+ }
+
+ local = frame->local;
+ cky = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret < 0) {
+ local->op_errno = op_errno;
+ } else {
+ local->op_ret = op_ret;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref (xdata);
+ }
+
+ if (!dict || (op_ret < 0))
+ goto unlock;
+
+ if (!local->dict)
+ local->dict = dict_new ();
+
+ if (local->dict) {
+ ret = dict_get_str (dict,
+ local->cont.getxattr.name,
+ &xattr);
+ if (ret)
+ goto unlock;
+
+ xattr = gf_strdup (xattr);
+
+ (void)snprintf (xattr_cky, 1024, "%s-%ld",
+ local->cont.getxattr.name, cky);
+ ret = dict_set_dynstr (local->dict,
+ xattr_cky, xattr);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR,
+ -ret, AFR_MSG_DICT_SET_FAILED,
+ "Cannot set xattr cookie key");
+ goto unlock;
+ }
+
+ local->cont.getxattr.xattr_len
+ += strlen (xattr) + 1;
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if (!local->cont.getxattr.xattr_len)
+ goto unwind;
+
+ nxattr = dict_new ();
+ if (!nxattr)
+ goto unwind;
+
+ /* extra bytes for decorations (brackets and <>'s) */
+ padding += strlen (this->name)
+ + strlen (AFR_PATHINFO_HEADER) + 4;
+ local->cont.getxattr.xattr_len += (padding + 2);
+
+ xattr_serz = GF_CALLOC (local->cont.getxattr.xattr_len,
+ sizeof (char), gf_common_mt_char);
+
+ if (!xattr_serz)
+ goto unwind;
+
+ /* the xlator info */
+ (void) sprintf (xattr_serz, "(<"AFR_PATHINFO_HEADER"%s> ",
+ this->name);
+
+ /* actual series of pathinfo */
+ ret = dict_serialize_value_with_delim (local->dict,
+ xattr_serz
+ + strlen (xattr_serz),
+ &tlen, ' ');
+ if (ret) {
+ goto unwind;
+ }
+
+ /* closing part */
+ *(xattr_serz + padding + tlen) = ')';
+ *(xattr_serz + padding + tlen + 1) = '\0';
- local->cont.readlink.size = size;
- local->cont.readlink.ino = loc->inode->ino;
+ ret = dict_set_dynstr (nxattr, local->cont.getxattr.name,
+ xattr_serz);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR,
+ -ret, AFR_MSG_DICT_SET_FAILED,
+ "Cannot set pathinfo key in dict");
- STACK_WIND_COOKIE (frame, afr_readlink_cbk,
- (void *) (long) call_child,
- children[call_child], children[call_child]->fops->readlink,
- loc, size);
+ unwind:
+ AFR_STACK_UNWIND (fgetxattr, frame, local->op_ret,
+ local->op_errno, nxattr, local->xdata_rsp);
+
+ if (nxattr)
+ dict_unref (nxattr);
+ }
- op_ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (readlink, frame, op_ret, op_errno, NULL, NULL);
- }
- return 0;
+ return ret;
}
+int32_t
+afr_getxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t callcnt = 0;
+ int ret = 0;
+ char *xattr = NULL;
+ char *xattr_serz = NULL;
+ char xattr_cky[1024] = {0,};
+ dict_t *nxattr = NULL;
+ long cky = 0;
+ int32_t padding = 0;
+ int32_t tlen = 0;
+
+ if (!frame || !frame->local || !this) {
+ gf_msg ("", GF_LOG_ERROR, 0,
+ AFR_MSG_INVALID_ARG, "possible NULL deref");
+ goto out;
+ }
-/* }}} */
+ local = frame->local;
+ cky = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret < 0) {
+ local->op_errno = op_errno;
+ } else {
+ local->op_ret = op_ret;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref (xdata);
+ }
+
+ if (!dict || (op_ret < 0))
+ goto unlock;
+
+ if (!local->dict)
+ local->dict = dict_new ();
+
+ if (local->dict) {
+ ret = dict_get_str (dict,
+ local->cont.getxattr.name,
+ &xattr);
+ if (ret)
+ goto unlock;
+
+ xattr = gf_strdup (xattr);
+
+ (void)snprintf (xattr_cky, 1024, "%s-%ld",
+ local->cont.getxattr.name, cky);
+ ret = dict_set_dynstr (local->dict,
+ xattr_cky, xattr);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR,
+ -ret,
+ AFR_MSG_DICT_SET_FAILED,
+ "Cannot set xattr "
+ "cookie key");
+ goto unlock;
+ }
+
+ local->cont.getxattr.xattr_len += strlen (xattr) + 1;
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
-/* {{{ getxattr */
+ if (!callcnt) {
+ if (!local->cont.getxattr.xattr_len)
+ goto unwind;
-struct _xattr_key {
- char *key;
- struct list_head list;
-};
+ nxattr = dict_new ();
+ if (!nxattr)
+ goto unwind;
+ /* extra bytes for decorations (brackets and <>'s) */
+ padding += strlen (this->name) + strlen (AFR_PATHINFO_HEADER) + 4;
+ local->cont.getxattr.xattr_len += (padding + 2);
-void
-__gather_xattr_keys (dict_t *dict, char *key, data_t *value,
- void *data)
+ xattr_serz = GF_CALLOC (local->cont.getxattr.xattr_len,
+ sizeof (char), gf_common_mt_char);
+
+ if (!xattr_serz)
+ goto unwind;
+
+ /* the xlator info */
+ (void) sprintf (xattr_serz, "(<"AFR_PATHINFO_HEADER"%s> ",
+ this->name);
+
+ /* actual series of pathinfo */
+ ret = dict_serialize_value_with_delim (local->dict,
+ xattr_serz + strlen (xattr_serz),
+ &tlen, ' ');
+ if (ret) {
+ goto unwind;
+ }
+
+ /* closing part */
+ *(xattr_serz + padding + tlen) = ')';
+ *(xattr_serz + padding + tlen + 1) = '\0';
+
+ ret = dict_set_dynstr (nxattr, local->cont.getxattr.name,
+ xattr_serz);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR,
+ -ret, AFR_MSG_DICT_SET_FAILED,
+ "Cannot set pathinfo key in dict");
+
+ unwind:
+ AFR_STACK_UNWIND (getxattr, frame, local->op_ret,
+ local->op_errno, nxattr, local->xdata_rsp);
+
+ if (nxattr)
+ dict_unref (nxattr);
+ }
+
+out:
+ return ret;
+}
+
+static int
+afr_aggregate_stime_xattr (dict_t *this, char *key, data_t *value, void *data)
{
- struct list_head * list = data;
- struct _xattr_key * xkey = NULL;
+ int ret = 0;
- if (!strncmp (key, AFR_XATTR_PREFIX,
- strlen (AFR_XATTR_PREFIX))) {
+ if (fnmatch (GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0)
+ ret = gf_get_max_stime (THIS, data, key, value);
- xkey = GF_CALLOC (1, sizeof (*xkey), gf_afr_mt_xattr_key);
- if (!xkey)
- return;
+ return ret;
+}
- xkey->key = key;
- INIT_LIST_HEAD (&xkey->list);
+int32_t
+afr_common_getxattr_stime_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t callcnt = 0;
- list_add_tail (&xkey->list, list);
+ if (!frame || !frame->local || !this) {
+ gf_msg ("", GF_LOG_ERROR, 0,
+ AFR_MSG_INVALID_ARG, "possible NULL deref");
+ goto out;
}
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (!dict || (op_ret < 0)) {
+ local->op_errno = op_errno;
+ goto cleanup;
+ }
+
+ if (!local->dict)
+ local->dict = dict_copy_with_ref (dict, NULL);
+ else
+ dict_foreach (dict, afr_aggregate_stime_xattr,
+ local->dict);
+ local->op_ret = 0;
+ }
+
+cleanup:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ AFR_STACK_UNWIND (getxattr, frame, local->op_ret,
+ local->op_errno, local->dict, xdata);
+ }
+
+out:
+ return 0;
}
-void
-__filter_xattrs (dict_t *dict)
+static gf_boolean_t
+afr_is_special_xattr (const char *name, fop_getxattr_cbk_t *cbk,
+ gf_boolean_t is_fgetxattr)
{
- struct list_head keys;
+ gf_boolean_t is_spl = _gf_true;
- struct _xattr_key *key;
- struct _xattr_key *tmp;
+ GF_ASSERT (cbk);
+ if (!cbk || !name) {
+ is_spl = _gf_false;
+ goto out;
+ }
- INIT_LIST_HEAD (&keys);
+ if (!strcmp (name, GF_XATTR_PATHINFO_KEY) ||
+ !strcmp (name, GF_XATTR_USER_PATHINFO_KEY)) {
+ if (is_fgetxattr) {
+ *cbk = afr_fgetxattr_pathinfo_cbk;
+ } else {
+ *cbk = afr_getxattr_pathinfo_cbk;
+ }
+ } else if (!strncmp (name, GF_XATTR_CLRLK_CMD,
+ strlen (GF_XATTR_CLRLK_CMD))) {
+ if (is_fgetxattr) {
+ *cbk = afr_fgetxattr_clrlk_cbk;
+ } else {
+ *cbk = afr_getxattr_clrlk_cbk;
+ }
+ } else if (!strncmp (name, GF_XATTR_LOCKINFO_KEY,
+ strlen (GF_XATTR_LOCKINFO_KEY))) {
+ if (is_fgetxattr) {
+ *cbk = afr_fgetxattr_lockinfo_cbk;
+ } else {
+ *cbk = afr_getxattr_lockinfo_cbk;
+ }
+ } else if (fnmatch (GF_XATTR_STIME_PATTERN, name, FNM_NOESCAPE) == 0) {
+ *cbk = afr_common_getxattr_stime_cbk;
+ } else if (strcmp (name, QUOTA_SIZE_KEY) == 0) {
+ *cbk = afr_getxattr_quota_size_cbk;
+ } else {
+ is_spl = _gf_false;
+ }
- dict_foreach (dict, __gather_xattr_keys,
- (void *) &keys);
+out:
+ return is_spl;
+}
- list_for_each_entry_safe (key, tmp, &keys, list) {
- dict_del (dict, key->key);
+static void
+afr_getxattr_all_subvols (xlator_t *this, call_frame_t *frame,
+ const char *name, loc_t *loc,
+ fop_getxattr_cbk_t cbk)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+ int call_count = 0;
+
+ priv = this->private;
+
+ local = frame->local;
+ //local->call_count set in afr_local_init
+ call_count = local->call_count;
+
+ //If up-children count is 0, afr_local_init would have failed already
+ //and the call would have unwound so not handling it here.
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->getxattr,
+ loc, name, NULL);
+ if (!--call_count)
+ break;
+ }
+ }
+ return;
+}
- list_del_init (&key->list);
+int
+afr_marker_populate_args (call_frame_t *frame, int type, int *gauge,
+ xlator_t **subvols)
+{
+ xlator_t *this = frame->this;
+ afr_private_t *priv = this->private;
- GF_FREE (key);
+ memcpy (subvols, priv->children, sizeof (*subvols) * priv->child_count);
+
+ if (type == MARKER_XTIME_TYPE) {
+ /*Don't error out on ENOENT/ENOTCONN */
+ gauge[MCNT_NOTFOUND] = 0;
+ gauge[MCNT_ENOTCONN] = 0;
}
+ return priv->child_count;
}
+static int
+afr_handle_heal_xattrs (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *heal_op)
+{
+ int ret = -1;
+ afr_spb_status_t *data = NULL;
+
+ if (!strcmp (heal_op, GF_HEAL_INFO)) {
+ afr_get_heal_info (frame, this, loc);
+ ret = 0;
+ goto out;
+ }
+
+ if (!strcmp (heal_op, GF_AFR_HEAL_SBRAIN)) {
+ afr_heal_splitbrain_file (frame, this, loc);
+ ret = 0;
+ goto out;
+ }
+
+ if (!strcmp (heal_op, GF_AFR_SBRAIN_STATUS)) {
+ data = GF_CALLOC (1, sizeof (*data), gf_afr_mt_spb_status_t);
+ if (!data) {
+ ret = 1;
+ goto out;
+ }
+ data->frame = frame;
+ data->loc = loc;
+ ret = synctask_new (this->ctx->env,
+ afr_get_split_brain_status,
+ afr_get_split_brain_status_cbk,
+ NULL, data);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_SPLIT_BRAIN_STATUS,
+ "Failed to create"
+ " synctask. Unable to fetch split-brain status"
+ " for %s.", loc->name);
+ ret = 1;
+ goto out;
+ }
+ goto out;
+ }
+out:
+ if (ret == 1) {
+ AFR_STACK_UNWIND (getxattr, frame, -1, ENOMEM, NULL, NULL);
+ if (data)
+ GF_FREE (data);
+ ret = 0;
+ }
+ return ret;
+}
int32_t
-afr_getxattr_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict)
+afr_getxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+ int32_t op_errno = 0;
+ int ret = -1;
+ fop_getxattr_cbk_t cbk = NULL;
- int unwind = 1;
- int last_tried = -1;
- int this_try = -1;
- int read_child = -1;
- priv = this->private;
- children = priv->children;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- local = frame->local;
+ priv = this->private;
- read_child = (long) cookie;
+ children = priv->children;
- if (op_ret == -1) {
- retry:
- last_tried = local->cont.getxattr.last_tried;
+ loc_copy (&local->loc, loc);
- if (all_tried (last_tried, priv->child_count)) {
- goto out;
- }
- this_try = ++local->cont.getxattr.last_tried;
+ local->op = GF_FOP_GETXATTR;
- if (this_try == read_child) {
- goto retry;
- }
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
- unwind = 0;
- STACK_WIND_COOKIE (frame, afr_getxattr_cbk,
- (void *) (long) read_child,
- children[this_try],
- children[this_try]->fops->getxattr,
- &local->loc,
- local->cont.getxattr.name);
- }
+ if (!name)
+ goto no_name;
-out:
- if (unwind) {
- if (op_ret >= 0 && dict)
- __filter_xattrs (dict);
+ local->cont.getxattr.name = gf_strdup (name);
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict);
+ if (!local->cont.getxattr.name) {
+ op_errno = ENOMEM;
+ goto out;
}
- return 0;
+ if (!strncmp (name, AFR_XATTR_PREFIX,
+ strlen (AFR_XATTR_PREFIX))) {
+ op_errno = ENODATA;
+ goto out;
+ }
+
+ if (cluster_handle_marker_getxattr (frame, loc, name, priv->vol_uuid,
+ afr_getxattr_unwind,
+ afr_marker_populate_args) == 0)
+ return 0;
+
+ ret = afr_handle_heal_xattrs (frame, this, &local->loc, name);
+ if (ret == 0)
+ return 0;
+
+ /*
+ * Special xattrs which need responses from all subvols
+ */
+ if (afr_is_special_xattr (name, &cbk, 0)) {
+ afr_getxattr_all_subvols (this, frame, name, loc, cbk);
+ return 0;
+ }
+
+ if (XATTR_IS_NODE_UUID (name)) {
+ i = 0;
+ STACK_WIND_COOKIE (frame, afr_getxattr_node_uuid_cbk,
+ (void *) (long) i,
+ children[i],
+ children[i]->fops->getxattr,
+ loc, name, xdata);
+ return 0;
+ }
+
+no_name:
+
+ afr_read_txn (frame, this, local->loc.inode, afr_getxattr_wind,
+ AFR_METADATA_TRANSACTION);
+
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL);
+ return 0;
}
+/* {{{ fgetxattr */
+
int32_t
-afr_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
+afr_fgetxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- xlator_t ** children = NULL;
- int call_child = 0;
- afr_local_t * local = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+
+ afr_read_txn_continue (frame, this, (long) cookie);
+ return 0;
+ }
- int read_child = -1;
+ if (dict)
+ afr_filter_xattrs (dict);
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, dict, xdata);
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ return 0;
+}
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
+int
+afr_fgetxattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- children = priv->children;
+ local = frame->local;
+ priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
- frame->local = local;
+ if (subvol == -1) {
+ AFR_STACK_UNWIND (fgetxattr, frame, local->op_ret,
+ local->op_errno, NULL, NULL);
+ return 0;
+ }
- if (name) {
- if (!strncmp (name, AFR_XATTR_PREFIX,
- strlen (AFR_XATTR_PREFIX))) {
+ STACK_WIND_COOKIE (frame, afr_fgetxattr_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fgetxattr,
+ local->fd, local->cont.getxattr.name,
+ local->xdata_req);
+ return 0;
+}
- op_errno = ENODATA;
- goto out;
+
+static void
+afr_fgetxattr_all_subvols (xlator_t *this, call_frame_t *frame,
+ fop_fgetxattr_cbk_t cbk)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+ int call_count = 0;
+
+ priv = this->private;
+
+ local = frame->local;
+ //local->call_count set in afr_local_init
+ call_count = local->call_count;
+
+ //If up-children count is 0, afr_local_init would have failed already
+ //and the call would have unwound so not handling it here.
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fgetxattr,
+ local->fd, local->cont.getxattr.name,
+ NULL);
+ if (!--call_count)
+ break;
}
}
- read_child = afr_read_child (this, loc->inode);
+ return;
+}
- if (read_child >= 0) {
- call_child = read_child;
- local->cont.getxattr.last_tried = -1;
- } else {
- call_child = afr_first_up_child (priv);
+int
+afr_fgetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t op_errno = 0;
+ fop_fgetxattr_cbk_t cbk = NULL;
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_DEBUG,
- "no child is up");
- goto out;
- }
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- local->cont.getxattr.last_tried = call_child;
+ local->op = GF_FOP_FGETXATTR;
+ local->fd = fd_ref (fd);
+ if (name) {
+ local->cont.getxattr.name = gf_strdup (name);
+ if (!local->cont.getxattr.name) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ }
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
+
+ /* pathinfo gets handled only in getxattr(), but we need to handle
+ * lockinfo.
+ * If we are doing fgetxattr with lockinfo as the key then we
+ * collect information from all children.
+ */
+ if (afr_is_special_xattr (name, &cbk, 1)) {
+ afr_fgetxattr_all_subvols (this, frame, cbk);
+ return 0;
}
- loc_copy (&local->loc, loc);
- if (name)
- local->cont.getxattr.name = gf_strdup (name);
+ afr_fix_open (fd, this);
- STACK_WIND_COOKIE (frame, afr_getxattr_cbk,
- (void *) (long) call_child,
- children[call_child], children[call_child]->fops->getxattr,
- loc, name);
+ afr_read_txn (frame, this, fd->inode, afr_fgetxattr_wind,
+ AFR_METADATA_TRANSACTION);
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, NULL);
- }
return 0;
+out:
+ AFR_STACK_UNWIND (fgetxattr, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
}
@@ -733,154 +1707,162 @@ out:
/* {{{ readv */
-/**
- * read algorithm:
- *
- * if the user has specified a read subvolume, use it
- * otherwise -
- * use the inode number to hash it to one of the subvolumes, and
- * read from there (to balance read load)
- *
- * if any of the above read's fail, try the children in sequence
- * beginning at the beginning
- */
-
-int32_t
+int
afr_readv_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iovec *vector, int32_t count, struct iatt *buf,
- struct iobref *iobref)
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ struct iovec *vector, int32_t count, struct iatt *buf,
+ struct iobref *iobref, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
- int unwind = 1;
- int last_tried = -1;
- int this_try = -1;
- int read_child = -1;
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ afr_read_txn_continue (frame, this, (long) cookie);
+ return 0;
+ }
+
+ AFR_STACK_UNWIND (readv, frame, op_ret, op_errno,
+ vector, count, buf, iobref, xdata);
+ return 0;
+}
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
- children = priv->children;
+int
+afr_readv_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
local = frame->local;
+ priv = this->private;
- read_child = (long) cookie;
+ if (subvol == -1) {
+ AFR_STACK_UNWIND (readv, frame, local->op_ret, local->op_errno,
+ 0, 0, 0, 0, 0);
+ return 0;
+ }
- if (op_ret == -1) {
- retry:
- last_tried = local->cont.readv.last_tried;
+ STACK_WIND_COOKIE (frame, afr_readv_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->readv,
+ local->fd, local->cont.readv.size,
+ local->cont.readv.offset, local->cont.readv.flags,
+ local->xdata_req);
+ return 0;
+}
- if (all_tried (last_tried, priv->child_count)) {
- goto out;
- }
- this_try = ++local->cont.readv.last_tried;
-
- if (this_try == read_child) {
- /*
- skip the read child since if we are here
- we must have already tried that child
- */
- goto retry;
- }
- unwind = 0;
+int
+afr_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ afr_local_t * local = NULL;
+ int32_t op_errno = 0;
- STACK_WIND_COOKIE (frame, afr_readv_cbk,
- (void *) (long) read_child,
- children[this_try],
- children[this_try]->fops->readv,
- local->fd, local->cont.readv.size,
- local->cont.readv.offset);
- }
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
-out:
- if (unwind) {
- if (buf && local)
- buf->ia_ino = local->cont.readv.ino;
+ local->op = GF_FOP_READ;
+ local->fd = fd_ref (fd);
+ local->cont.readv.size = size;
+ local->cont.readv.offset = offset;
+ local->cont.readv.flags = flags;
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
- AFR_STACK_UNWIND (readv, frame, op_ret, op_errno,
- vector, count, buf, iobref);
- }
+ afr_fix_open (fd, this);
+
+ afr_read_txn (frame, this, fd->inode, afr_readv_wind,
+ AFR_DATA_TRANSACTION);
+
+ return 0;
+out:
+ AFR_STACK_UNWIND(readv, frame, -1, op_errno, 0, 0, 0, 0, 0);
return 0;
}
+/* }}} */
+
+/* {{{ seek */
-int32_t
-afr_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset)
+int
+afr_seek_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, off_t offset, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
+ afr_local_t *local = NULL;
- int32_t read_child = -1;
- int call_child = 0;
+ local = frame->local;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (fd, out);
+ afr_read_txn_continue (frame, this, (long) cookie);
+ return 0;
+ }
- priv = this->private;
- children = priv->children;
+ AFR_STACK_UNWIND (seek, frame, op_ret, op_errno, offset, xdata);
+ return 0;
+}
- ALLOC_OR_GOTO (local, afr_local_t, out);
- frame->local = local;
+int
+afr_seek_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- read_child = afr_read_child (this, fd->inode);
+ local = frame->local;
+ priv = this->private;
- if (read_child >= 0) {
- call_child = read_child;
+ if (subvol == -1) {
+ AFR_STACK_UNWIND (seek, frame, local->op_ret, local->op_errno,
+ 0, NULL);
+ return 0;
+ }
- /*
- if read fails from the read child, we try
- all children starting with the first one
- */
- local->cont.readv.last_tried = -1;
+ STACK_WIND_COOKIE (frame, afr_seek_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->seek,
+ local->fd, local->cont.seek.offset,
+ local->cont.seek.what, local->xdata_req);
+ return 0;
+}
- } else {
- call_child = afr_first_up_child (priv);
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_DEBUG,
- "no child is up");
- goto out;
- }
- local->cont.readv.last_tried = call_child;
- }
+int
+afr_seek (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t op_errno = 0;
- local->fd = fd_ref (fd);
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- local->cont.readv.ino = fd->inode->ino;
- local->cont.readv.size = size;
- local->cont.readv.offset = offset;
+ local->op = GF_FOP_SEEK;
+ local->fd = fd_ref (fd);
+ local->cont.seek.offset = offset;
+ local->cont.seek.what = what;
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
- STACK_WIND_COOKIE (frame, afr_readv_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->readv,
- fd, size, offset);
+ afr_fix_open (fd, this);
- op_ret = 0;
+ afr_read_txn (frame, this, fd->inode, afr_seek_wind,
+ AFR_DATA_TRANSACTION);
+
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (readv, frame, op_ret, op_errno, NULL, 0, NULL,
- NULL);
- }
- return 0;
-}
+ AFR_STACK_UNWIND (seek, frame, -1, op_errno, 0, NULL);
+ return 0;
+}
/* }}} */
diff --git a/xlators/cluster/afr/src/afr-inode-read.h b/xlators/cluster/afr/src/afr-inode-read.h
index 2b22131db1a..d128134ef2a 100644
--- a/xlators/cluster/afr/src/afr-inode-read.h
+++ b/xlators/cluster/afr/src/afr-inode-read.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __INODE_READ_H__
@@ -22,26 +13,33 @@
int32_t
afr_access (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t mask);
+ loc_t *loc, int32_t mask, dict_t *xdata);
int32_t
afr_stat (call_frame_t *frame, xlator_t *this,
- loc_t *loc);
+ loc_t *loc, dict_t *xdata);
int32_t
afr_fstat (call_frame_t *frame, xlator_t *this,
- fd_t *fd);
+ fd_t *fd, dict_t *xdata);
int32_t
afr_readlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, size_t size);
+ loc_t *loc, size_t size, dict_t *xdata);
int32_t
afr_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset);
+ fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata);
int32_t
afr_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name);
+ loc_t *loc, const char *name, dict_t *xdata);
+
+int32_t
+afr_fgetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata);
+
+int
+afr_handle_quota_size (call_frame_t *frame, xlator_t *this);
#endif /* __INODE_READ_H__ */
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index 37909181e21..76526bcf177 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -25,11 +16,6 @@
#include <stdlib.h>
#include <signal.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "glusterfs.h"
#include "afr.h"
#include "dict.h"
@@ -43,287 +29,518 @@
#include "common-utils.h"
#include "compat-errno.h"
#include "compat.h"
-
-#include "afr.h"
+#include "protocol-common.h"
+#include "byte-order.h"
#include "afr-transaction.h"
+#include "afr-self-heal.h"
+#include "afr-messages.h"
-
-/* {{{ writev */
-
-int
-afr_writev_unwind (call_frame_t *frame, xlator_t *this)
+static void
+__afr_inode_write_finalize (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int read_subvol = 0;
+ int i = 0;
+ afr_read_subvol_args_t args = {0,};
+ struct iatt *stbuf = NULL;
+ int ret = 0;
local = frame->local;
- priv = this->private;
+ priv = this->private;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
+ /*This code needs to stay till DHT sends fops on linked
+ * inodes*/
+ if (local->inode && !inode_is_linked (local->inode)) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret == -1)
+ continue;
+ if (!gf_uuid_is_null
+ (local->replies[i].poststat.ia_gfid)) {
+ gf_uuid_copy (args.gfid,
+ local->replies[i].poststat.ia_gfid);
+ args.ia_type =
+ local->replies[i].poststat.ia_type;
+ break;
+ } else {
+ ret = dict_get_bin (local->replies[i].xdata,
+ DHT_IATT_IN_XDATA_KEY,
+ (void **) &stbuf);
+ if (ret)
+ continue;
+ gf_uuid_copy (args.gfid, stbuf->ia_gfid);
+ args.ia_type = stbuf->ia_type;
+ break;
+ }
+ }
+ }
+
+ if (local->inode) {
+ if (local->transaction.type == AFR_METADATA_TRANSACTION)
+ read_subvol = afr_metadata_subvol_get (local->inode,
+ this, NULL, local->readable, NULL, &args);
+ else
+ read_subvol = afr_data_subvol_get (local->inode, this,
+ NULL, local->readable, NULL, &args);
}
- UNLOCK (&frame->lock);
- if (main_frame) {
- local->cont.writev.prebuf.ia_ino = local->cont.writev.ino;
- local->cont.writev.postbuf.ia_ino = local->cont.writev.ino;
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno (local, priv);
+ afr_pick_error_xdata (local, priv, local->inode, local->readable, NULL,
+ NULL);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret < 0) {
+ afr_inode_read_subvol_reset (local->inode, this);
+ continue;
+ }
- AFR_STACK_UNWIND (writev, main_frame,
- local->op_ret, local->op_errno,
- &local->cont.writev.prebuf,
- &local->cont.writev.postbuf);
+ /* Order of checks in the compound conditional
+ below is important.
+
+ - Highest precedence: largest op_ret
+ - Next precendence: if all op_rets are equal, read subvol
+ - Least precedence: any succeeded subvol
+ */
+ if ((local->op_ret < local->replies[i].op_ret) ||
+ ((local->op_ret == local->replies[i].op_ret) &&
+ (i == read_subvol))) {
+
+ local->op_ret = local->replies[i].op_ret;
+ local->op_errno = local->replies[i].op_errno;
+
+ local->cont.inode_wfop.prebuf =
+ local->replies[i].prestat;
+ local->cont.inode_wfop.postbuf =
+ local->replies[i].poststat;
+
+ if (local->replies[i].xdata) {
+ if (local->xdata_rsp)
+ dict_unref (local->xdata_rsp);
+ local->xdata_rsp =
+ dict_ref (local->replies[i].xdata);
+ }
+ if (local->replies[i].xattr) {
+ if (local->xattr_rsp)
+ dict_unref (local->xattr_rsp);
+ local->xattr_rsp =
+ dict_ref (local->replies[i].xattr);
+ }
+ }
}
- return 0;
+
+ afr_txn_arbitrate_fop_cbk (frame, this);
}
-int
-afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+static void
+__afr_inode_write_fill (call_frame_t *frame, xlator_t *this, int child_index,
+ int op_ret, int op_errno,
+ struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xattr, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- int child_index = (long) cookie;
- int call_count = -1;
- int need_unwind = 0;
- int read_child = 0;
+ local = frame->local;
+ priv = this->private;
+
+ local->replies[child_index].valid = 1;
+
+ if (AFR_IS_ARBITER_BRICK(priv, child_index) && op_ret == 1)
+ op_ret = iov_length (local->cont.writev.vector,
+ local->cont.writev.count);
+
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+ if (xdata)
+ local->replies[child_index].xdata = dict_ref (xdata);
+
+ if (op_ret >= 0) {
+ if (prebuf)
+ local->replies[child_index].prestat = *prebuf;
+ if (postbuf)
+ local->replies[child_index].poststat = *postbuf;
+ if (xattr)
+ local->replies[child_index].xattr = dict_ref (xattr);
+ } else {
+ afr_transaction_fop_failed (frame, this, child_index);
+ }
- local = frame->local;
- priv = this->private;
+ return;
+}
- read_child = afr_read_child (this, local->fd->inode);
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
+static int
+__afr_inode_write_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xattr, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
+ afr_private_t *priv = NULL;
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
+ priv = this->private;
+ local = frame->local;
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.writev.prebuf = *prebuf;
- local->cont.writev.postbuf = *postbuf;
- }
+ LOCK (&frame->lock);
+ {
+ __afr_inode_write_fill (frame, this, child_index, op_ret,
+ op_errno, prebuf, postbuf, xattr,
+ xdata);
+ }
+ UNLOCK (&frame->lock);
- if (child_index == read_child) {
- local->cont.writev.prebuf = *prebuf;
- local->cont.writev.postbuf = *postbuf;
- }
+ call_count = afr_frame_return (frame);
- local->success_count++;
+ if (call_count == 0) {
+ __afr_inode_write_finalize (frame, this);
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
- }
+ if (afr_txn_nothing_failed (frame, this)) {
+ /*if it did pre-op, it will do post-op changing ctime*/
+ if (priv->consistent_metadata &&
+ afr_needs_changelog_update (local))
+ afr_zero_fill_stat (local);
+ local->transaction.unwind (frame, this);
+ }
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ local->transaction.resume (frame, this);
+ }
- call_count = afr_frame_return (frame);
+ return 0;
+}
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
+/* {{{ writev */
- local->transaction.resume (frame, this);
- }
-
- return 0;
+void
+afr_writev_copy_outvars (call_frame_t *src_frame, call_frame_t *dst_frame)
+{
+ afr_local_t *src_local = NULL;
+ afr_local_t *dst_local = NULL;
+
+ src_local = src_frame->local;
+ dst_local = dst_frame->local;
+
+ dst_local->op_ret = src_local->op_ret;
+ dst_local->op_errno = src_local->op_errno;
+ dst_local->cont.inode_wfop.prebuf = src_local->cont.inode_wfop.prebuf;
+ dst_local->cont.inode_wfop.postbuf = src_local->cont.inode_wfop.postbuf;
+ if (src_local->xdata_rsp)
+ dst_local->xdata_rsp = dict_ref (src_local->xdata_rsp);
+}
+
+void
+afr_writev_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ afr_private_t *priv = this->private;
+
+ local = frame->local;
+
+ if (priv->consistent_metadata)
+ afr_zero_fill_stat (local);
+
+ AFR_STACK_UNWIND (writev, frame,
+ local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ local->xdata_rsp);
}
int
-afr_writev_wind (call_frame_t *frame, xlator_t *this)
+afr_transaction_writev_unwind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int i = 0;
- int call_count = -1;
+ call_frame_t *fop_frame = NULL;
- local = frame->local;
- priv = this->private;
+ fop_frame = afr_transaction_detach_fop_frame (frame);
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ if (fop_frame) {
+ afr_writev_copy_outvars (frame, fop_frame);
+ afr_writev_unwind (fop_frame, this);
+ }
+ return 0;
+}
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+static void
+afr_writev_handle_short_writes (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_writev_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->writev,
- local->fd,
- local->cont.writev.vector,
- local->cont.writev.count,
- local->cont.writev.offset,
- local->cont.writev.iobref);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ local = frame->local;
+ priv = this->private;
+ /*
+ * We already have the best case result of the writev calls staged
+ * as the return value. Any writev that returns some value less
+ * than the best case is now out of sync, so mark the fop as
+ * failed. Note that fops that have returned with errors have
+ * already been marked as failed.
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if ((!local->replies[i].valid) ||
+ (local->replies[i].op_ret == -1))
+ continue;
+
+ if (local->replies[i].op_ret < local->op_ret)
+ afr_transaction_fop_failed (frame, this, i);
+ }
}
-
int
-afr_writev_done (call_frame_t *frame, xlator_t *this)
+afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- afr_local_t *local = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *fop_frame = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
+ int ret = 0;
+ uint32_t open_fd_count = 0;
+ uint32_t write_is_append = 0;
- local = frame->local;
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ __afr_inode_write_fill (frame, this, child_index, op_ret,
+ op_errno, prebuf, postbuf, NULL, xdata);
+ if (op_ret == -1 || !xdata)
+ goto unlock;
+
+ write_is_append = 0;
+ ret = dict_get_uint32 (xdata, GLUSTERFS_WRITE_IS_APPEND,
+ &write_is_append);
+ if (ret || !write_is_append)
+ local->append_write = _gf_false;
+
+ ret = dict_get_uint32 (xdata, GLUSTERFS_OPEN_FD_COUNT,
+ &open_fd_count);
+ if (ret == -1)
+ goto unlock;
+ if ((open_fd_count > local->open_fd_count)) {
+ local->open_fd_count = open_fd_count;
+ local->update_open_fd_count = _gf_true;
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ if (!local->stable_write && !local->append_write)
+ /* An appended write removes the necessity to
+ fsync() the file. This is because self-heal
+ has the logic to check for larger file when
+ the xattrs are not reliably pointing at
+ a stale file.
+ */
+ afr_fd_report_unstable_write (this, local->fd);
+
+ __afr_inode_write_finalize (frame, this);
+
+ afr_writev_handle_short_writes (frame, this);
+
+ if (local->update_open_fd_count)
+ afr_handle_open_fd_count (frame, this);
+
+ if (!afr_txn_nothing_failed (frame, this)) {
+ //Don't unwind until post-op is complete
+ local->transaction.resume (frame, this);
+ } else {
+ /*
+ * Generally inode-write fops do transaction.unwind then
+ * transaction.resume, but writev needs to make sure that
+ * delayed post-op frame is placed in fdctx before unwind
+ * happens. This prevents the race of flush doing the
+ * changelog wakeup first in fuse thread and then this
+ * writev placing its delayed post-op frame in fdctx.
+ * This helps flush make sure all the delayed post-ops are
+ * completed.
+ */
+
+ fop_frame = afr_transaction_detach_fop_frame (frame);
+ afr_writev_copy_outvars (frame, fop_frame);
+ local->transaction.resume (frame, this);
+ afr_writev_unwind (fop_frame, this);
+ }
+ }
+ return 0;
+}
- iobref_unref (local->cont.writev.iobref);
- local->cont.writev.iobref = NULL;
+static int
+afr_arbiter_writev_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = this->private;
+ static char byte = 0xFF;
+ static struct iovec vector = {&byte, 1};
+ int32_t count = 1;
+
+ STACK_WIND_COOKIE (frame, afr_writev_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->writev,
+ local->fd, &vector, count, local->cont.writev.offset,
+ local->cont.writev.flags, local->cont.writev.iobref,
+ local->xdata_req);
- local->transaction.unwind (frame, this);
+ return 0;
+}
- AFR_STACK_DESTROY (frame);
+int
+afr_writev_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- return 0;
+ local = frame->local;
+ priv = this->private;
+
+ if (AFR_IS_ARBITER_BRICK(priv, subvol)) {
+ afr_arbiter_writev_wind (frame, this, subvol);
+ return 0;
+ }
+
+ STACK_WIND_COOKIE (frame, afr_writev_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->writev,
+ local->fd, local->cont.writev.vector,
+ local->cont.writev.count, local->cont.writev.offset,
+ local->cont.writev.flags, local->cont.writev.iobref,
+ local->xdata_req);
+ return 0;
}
int
afr_do_writev (call_frame_t *frame, xlator_t *this)
{
- call_frame_t * transaction_frame = NULL;
- afr_local_t * local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- int op_ret = -1;
- int op_errno = 0;
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
+ goto out;
local = frame->local;
+ transaction_frame->local = local;
+ frame->local = NULL;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_errno = ENOMEM;
+ if (!AFR_FRAME_INIT (frame, op_errno))
goto out;
- }
-
- transaction_frame->local = local;
- frame->local = NULL;
-
- local->op = GF_FOP_WRITE;
-
- local->success_count = 0;
-
- local->transaction.fop = afr_writev_wind;
- local->transaction.done = afr_writev_done;
- local->transaction.unwind = afr_writev_unwind;
- local->transaction.main_frame = frame;
- if (local->fd->flags & O_APPEND) {
- local->transaction.start = 0;
- local->transaction.len = 0;
- } else {
- local->transaction.start = local->cont.writev.offset;
- local->transaction.len = iov_length (local->cont.writev.vector,
+ local->op = GF_FOP_WRITE;
+
+ local->transaction.wind = afr_writev_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_transaction_writev_unwind;
+
+ local->transaction.main_frame = frame;
+
+ if (local->fd->flags & O_APPEND) {
+ /*
+ * Backend vfs ignores the 'offset' for append mode fd so
+ * locking just the region provided for the writev does not
+ * give consistency guarantee. The actual write may happen at a
+ * completely different range than the one provided by the
+ * offset, len in the fop. So lock the entire file.
+ */
+ local->transaction.start = 0;
+ local->transaction.len = 0;
+ } else {
+ local->transaction.start = local->cont.writev.offset;
+ local->transaction.len = iov_length (local->cont.writev.vector,
local->cont.writev.count);
- }
+ }
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (writev, frame, op_ret, op_errno, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
int
-afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t offset,
- struct iobref *iobref)
+afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
-
- int ret = -1;
+ afr_local_t *local = NULL;
+ int op_errno = ENOMEM;
- int op_ret = -1;
- int op_errno = 0;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- uint64_t ctx;
- afr_fd_ctx_t *fd_ctx = NULL;
+ local->cont.writev.vector = iov_dup (vector, count);
+ if (!local->cont.writev.vector)
+ goto out;
+ local->cont.writev.count = count;
+ local->cont.writev.offset = offset;
+ local->cont.writev.flags = flags;
+ local->cont.writev.iobref = iobref_ref (iobref);
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- priv = this->private;
+ if (!local->xdata_req)
+ goto out;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ local->fd = fd_ref (fd);
+ local->inode = inode_ref (fd->inode);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ if (dict_set_uint32 (local->xdata_req, GLUSTERFS_OPEN_FD_COUNT, 4)) {
+ op_errno = ENOMEM;
goto out;
}
- frame->local = local;
-
- local->cont.writev.vector = iov_dup (vector, count);
- local->cont.writev.count = count;
- local->cont.writev.offset = offset;
- local->cont.writev.ino = fd->inode->ino;
- local->cont.writev.iobref = iobref_ref (iobref);
-
- local->fd = fd_ref (fd);
-
- ret = fd_ctx_get (fd, this, &ctx);
- if (ret < 0) {
- goto out;
- }
+ if (dict_set_uint32 (local->xdata_req, GLUSTERFS_WRITE_IS_APPEND, 4)) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ /* Set append_write to be true speculatively. If on any
+ server it turns not be true, we unset it in the
+ callback.
+ */
+ local->append_write = _gf_true;
- if (fd_ctx->down_count < priv->down_count) {
- local->up_down_flush_cbk = afr_do_writev;
- afr_up_down_flush (frame, this, fd, AFR_CHILD_DOWN_FLUSH);
+ /* detect here, but set it in writev_wind_cbk *after* the unstable
+ write is performed
+ */
+ local->stable_write = !!((fd->flags|flags)&(O_SYNC|O_DSYNC));
- } else if (fd_ctx->up_count < priv->up_count) {
- local->up_down_flush_cbk = afr_do_writev;
- afr_up_down_flush (frame, this, fd, AFR_CHILD_UP_FLUSH);
+ afr_fix_open (fd, this);
- } else {
- afr_do_writev (frame, this);
- }
+ afr_do_writev (frame, this);
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (writev, frame, op_ret, op_errno, NULL, NULL);
- }
+ AFR_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ return 0;
}
@@ -334,1293 +551,1941 @@ out:
int
afr_truncate_unwind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- call_frame_t *main_frame = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
- if (main_frame) {
- local->cont.truncate.prebuf.ia_ino = local->cont.truncate.ino;
- local->cont.truncate.postbuf.ia_ino = local->cont.truncate.ino;
+ AFR_STACK_UNWIND (truncate, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+ return 0;
+}
- AFR_STACK_UNWIND (truncate, main_frame, local->op_ret,
- local->op_errno,
- &local->cont.truncate.prebuf,
- &local->cont.truncate.postbuf);
- }
- return 0;
+int
+afr_truncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret == 0 && prebuf->ia_size != postbuf->ia_size)
+ local->stable_write = _gf_false;
+
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ prebuf, postbuf, NULL, xdata);
}
int
-afr_truncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+afr_truncate_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- int child_index = (long) cookie;
- int read_child = 0;
- int call_count = -1;
- int need_unwind = 0;
+ local = frame->local;
+ priv = this->private;
- local = frame->local;
- priv = this->private;
+ STACK_WIND_COOKIE (frame, afr_truncate_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->truncate,
+ &local->loc, local->cont.truncate.offset,
+ local->xdata_req);
+ return 0;
+}
- read_child = afr_read_child (this, local->loc.inode);
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
+int
+afr_truncate (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, off_t offset, dict_t *xdata)
+{
+ afr_local_t * local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
+ goto out;
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.truncate.prebuf = *prebuf;
- local->cont.truncate.postbuf = *postbuf;
- }
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
- if (child_index == read_child) {
- local->cont.truncate.prebuf = *prebuf;
- local->cont.truncate.postbuf = *postbuf;
- }
+ local->cont.truncate.offset = offset;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- local->success_count++;
+ if (!local->xdata_req)
+ goto out;
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
- }
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ local->transaction.wind = afr_truncate_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_truncate_unwind;
- if (need_unwind)
- local->transaction.unwind (frame, this);
+ loc_copy (&local->loc, loc);
+ local->inode = inode_ref (loc->inode);
- call_count = afr_frame_return (frame);
+ local->op = GF_FOP_TRUNCATE;
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = offset;
+ local->transaction.len = 0;
+
+ /* Set it true speculatively, will get reset in afr_truncate_wind_cbk
+ if truncate was not a NOP */
+ local->stable_write = _gf_true;
+
+ ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+
+ AFR_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
-int32_t
-afr_truncate_wind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int i = 0;
+/* }}} */
- local = frame->local;
- priv = this->private;
+/* {{{ ftruncate */
- call_count = afr_up_children_count (priv->child_count, local->child_up);
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+int
+afr_ftruncate_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
- local->call_count = call_count;
+ local = frame->local;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_truncate_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->truncate,
- &local->loc,
- local->cont.truncate.offset);
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ AFR_STACK_UNWIND (ftruncate, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+ return 0;
}
int
-afr_truncate_done (call_frame_t *frame, xlator_t *this)
+afr_ftruncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
afr_local_t *local = NULL;
local = frame->local;
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
+ if (op_ret == 0 && prebuf->ia_size != postbuf->ia_size)
+ local->stable_write = _gf_false;
- return 0;
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ prebuf, postbuf, NULL, xdata);
}
int
-afr_truncate (call_frame_t *frame, xlator_t *this,
- loc_t *loc, off_t offset)
+afr_ftruncate_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- int ret = -1;
+ local = frame->local;
+ priv = this->private;
- int op_ret = -1;
- int op_errno = 0;
+ STACK_WIND_COOKIE (frame, afr_ftruncate_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->ftruncate,
+ local->fd, local->cont.ftruncate.offset,
+ local->xdata_req);
+ return 0;
+}
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- priv = this->private;
+int
+afr_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
+ if (!transaction_frame)
goto out;
- }
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ local->cont.ftruncate.offset = offset;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
+
+ if (!local->xdata_req)
goto out;
- }
- transaction_frame->local = local;
+ local->fd = fd_ref (fd);
+ local->inode = inode_ref (fd->inode);
- local->op_ret = -1;
+ local->op = GF_FOP_FTRUNCATE;
- local->cont.truncate.offset = offset;
- local->cont.truncate.ino = loc->inode->ino;
+ local->transaction.wind = afr_ftruncate_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_ftruncate_unwind;
- local->transaction.fop = afr_truncate_wind;
- local->transaction.done = afr_truncate_done;
- local->transaction.unwind = afr_truncate_unwind;
+ local->transaction.main_frame = frame;
- loc_copy (&local->loc, loc);
+ local->transaction.start = local->cont.ftruncate.offset;
+ local->transaction.len = 0;
- local->transaction.main_frame = frame;
- local->transaction.start = 0;
- local->transaction.len = offset;
+ afr_fix_open (fd, this);
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ /* Set it true speculatively, will get reset in afr_ftruncate_wind_cbk
+ if truncate was not a NOP */
+ local->stable_write = _gf_true;
- op_ret = 0;
-out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (truncate, frame, op_ret, op_errno, NULL, NULL);
- }
+ ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
return 0;
-}
+out:
+ AFR_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
/* }}} */
-/* {{{ ftruncate */
-
+/* {{{ setattr */
int
-afr_ftruncate_unwind (call_frame_t *frame, xlator_t *this)
+afr_setattr_unwind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
- if (main_frame) {
- local->cont.ftruncate.prebuf.ia_ino = local->cont.ftruncate.ino;
- local->cont.ftruncate.postbuf.ia_ino = local->cont.ftruncate.ino;
+ AFR_STACK_UNWIND (setattr, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ local->xdata_rsp);
+ return 0;
+}
- AFR_STACK_UNWIND (ftruncate, main_frame, local->op_ret,
- local->op_errno,
- &local->cont.ftruncate.prebuf,
- &local->cont.ftruncate.postbuf);
- }
- return 0;
+
+int
+afr_setattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
+{
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ preop, postop, NULL, xdata);
}
int
-afr_ftruncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+afr_setattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- int child_index = (long) cookie;
- int call_count = -1;
- int need_unwind = 0;
- int read_child = 0;
+ local = frame->local;
+ priv = this->private;
- local = frame->local;
- priv = this->private;
+ STACK_WIND_COOKIE (frame, afr_setattr_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->setattr,
+ &local->loc, &local->cont.setattr.in_buf,
+ local->cont.setattr.valid, local->xdata_req);
+ return 0;
+}
- read_child = afr_read_child (this, local->fd->inode);
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
+int
+afr_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *buf,
+ int32_t valid, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
+ goto out;
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.ftruncate.prebuf = *prebuf;
- local->cont.ftruncate.postbuf = *postbuf;
- }
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
- if (child_index == read_child) {
- local->cont.ftruncate.prebuf = *prebuf;
- local->cont.ftruncate.postbuf = *postbuf;
- }
+ local->cont.setattr.in_buf = *buf;
+ local->cont.setattr.valid = valid;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- local->success_count++;
+ if (!local->xdata_req)
+ goto out;
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
- }
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ local->transaction.wind = afr_setattr_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_setattr_unwind;
- if (need_unwind)
- local->transaction.unwind (frame, this);
+ loc_copy (&local->loc, loc);
+ local->inode = inode_ref (loc->inode);
- call_count = afr_frame_return (frame);
+ local->op = GF_FOP_SETATTR;
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
+
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+
+ AFR_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
+/* {{{ fsetattr */
int
-afr_ftruncate_wind (call_frame_t *frame, xlator_t *this)
+afr_fsetattr_unwind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ local = frame->local;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
return 0;
- }
-
- local->call_count = call_count;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_ftruncate_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->ftruncate,
- local->fd, local->cont.ftruncate.offset);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ AFR_STACK_UNWIND (fsetattr, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+ return 0;
}
int
-afr_ftruncate_done (call_frame_t *frame, xlator_t *this)
+afr_fsetattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
- afr_local_t *local = NULL;
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ preop, postop, NULL, xdata);
+}
- local = frame->local;
- local->transaction.unwind (frame, this);
+int
+afr_fsetattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- AFR_STACK_DESTROY (frame);
+ local = frame->local;
+ priv = this->private;
- return 0;
+ STACK_WIND_COOKIE (frame, afr_fsetattr_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fsetattr,
+ local->fd, &local->cont.fsetattr.in_buf,
+ local->cont.fsetattr.valid, local->xdata_req);
+ return 0;
}
int
-afr_do_ftruncate (call_frame_t *frame, xlator_t *this)
+afr_fsetattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, struct iatt *buf, int32_t valid, dict_t *xdata)
{
- call_frame_t * transaction_frame = NULL;
- afr_local_t * local = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- int op_ret = -1;
- int op_errno = 0;
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
+ goto out;
- local = frame->local;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
+ local->cont.fsetattr.in_buf = *buf;
+ local->cont.fsetattr.valid = valid;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
+
+ if (!local->xdata_req)
goto out;
- }
- transaction_frame->local = local;
- frame->local = NULL;
+ local->transaction.wind = afr_fsetattr_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_fsetattr_unwind;
- local->op = GF_FOP_FTRUNCATE;
+ local->fd = fd_ref (fd);
+ local->inode = inode_ref (fd->inode);
- local->transaction.fop = afr_ftruncate_wind;
- local->transaction.done = afr_ftruncate_done;
- local->transaction.unwind = afr_ftruncate_unwind;
+ local->op = GF_FOP_FSETATTR;
- local->transaction.main_frame = frame;
+ afr_fix_open (fd, this);
- local->transaction.start = 0;
- local->transaction.len = local->cont.ftruncate.offset;
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
+/* {{{ setxattr */
+
+
int
-afr_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset)
+afr_setxattr_unwind (call_frame_t *frame, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
- int ret = -1;
+ local = frame->local;
- int op_ret = -1;
- int op_errno = 0;
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
- uint64_t ctx;
- afr_fd_ctx_t *fd_ctx = NULL;
+ AFR_STACK_UNWIND (setxattr, main_frame, local->op_ret, local->op_errno,
+ local->xdata_rsp);
+ return 0;
+}
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- priv = this->private;
+int
+afr_setxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ NULL, NULL, NULL, xdata);
+}
- ALLOC_OR_GOTO (local, afr_local_t, out);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+int
+afr_setxattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- frame->local = local;
+ local = frame->local;
+ priv = this->private;
- local->cont.ftruncate.offset = offset;
- local->cont.ftruncate.ino = fd->inode->ino;
+ STACK_WIND_COOKIE (frame, afr_setxattr_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->setxattr,
+ &local->loc, local->cont.setxattr.dict,
+ local->cont.setxattr.flags, local->xdata_req);
+ return 0;
+}
- local->fd = fd_ref (fd);
+int
+afr_emptyb_set_pending_changelog_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ dict_t *xattr, dict_t *xdata)
- ret = fd_ctx_get (fd, this, &ctx);
- if (ret < 0) {
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i, ret = 0;
+ char *op_type = NULL;
+
+ local = frame->local;
+ priv = this->private;
+ i = (long) cookie;
+
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+
+ ret = dict_get_str (local->xdata_req, "replicate-brick-op", &op_type);
+ if (ret)
goto out;
+
+ gf_msg (this->name, op_ret ? GF_LOG_ERROR : GF_LOG_INFO,
+ op_ret ? op_errno : 0,
+ afr_get_msg_id (op_type),
+ "Set of pending xattr %s on"
+ " %s.", op_ret ? "failed" : "succeeded",
+ priv->children[i]->name);
+
+out:
+ syncbarrier_wake (&local->barrier);
+ return 0;
+}
+
+int
+afr_emptyb_set_pending_changelog (call_frame_t *frame, xlator_t *this,
+ unsigned char *locked_nodes)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int ret = 0, i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ AFR_ONLIST (locked_nodes, frame, afr_emptyb_set_pending_changelog_cbk,
+ xattrop, &local->loc, GF_XATTROP_ADD_ARRAY,
+ local->xattr_req, NULL);
+
+ /* It is sufficient if xattrop was successful on one child */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+
+ if (local->replies[i].op_ret == 0) {
+ ret = 0;
+ goto out;
+ } else {
+ ret = afr_higher_errno (ret,
+ local->replies[i].op_errno);
+ }
}
+out:
+ return -ret;
+}
+
+int
+_afr_handle_empty_brick_type (xlator_t *this, call_frame_t *frame,
+ loc_t *loc, int empty_index,
+ afr_transaction_type type,
+ char *op_type)
+{
+ int count = 0;
+ int ret = -ENOMEM;
+ int idx = -1;
+ int d_idx = -1;
+ unsigned char *locked_nodes = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ locked_nodes = alloca0 (priv->child_count);
+
+ idx = afr_index_for_transaction_type (type);
+ d_idx = afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
+
+ local->pending = afr_matrix_create (priv->child_count,
+ AFR_NUM_CHANGE_LOGS);
+ if (!local->pending)
+ goto out;
+
+ local->pending[empty_index][idx] = hton32 (1);
+
+ if ((priv->esh_granular) && (type == AFR_ENTRY_TRANSACTION))
+ local->pending[empty_index][d_idx] = hton32 (1);
+
+ local->xdata_req = dict_new ();
+ if (!local->xdata_req)
+ goto out;
+
+ ret = dict_set_str (local->xdata_req, "replicate-brick-op", op_type);
+ if (ret)
+ goto out;
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ local->xattr_req = dict_new ();
+ if (!local->xattr_req)
+ goto out;
- if (fd_ctx->down_count < priv->down_count) {
- local->up_down_flush_cbk = afr_do_ftruncate;
- afr_up_down_flush (frame, this, fd, AFR_CHILD_DOWN_FLUSH);
+ ret = afr_set_pending_dict (priv, local->xattr_req, local->pending);
+ if (ret < 0)
+ goto out;
+
+ if (AFR_ENTRY_TRANSACTION == type) {
+ count = afr_selfheal_entrylk (frame, this, loc->inode,
+ this->name, NULL, locked_nodes);
} else {
- afr_do_ftruncate (frame, this);
+ count = afr_selfheal_inodelk (frame, this, loc->inode,
+ this->name, LLONG_MAX - 1, 0,
+ locked_nodes);
}
- op_ret = 0;
-out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, NULL, NULL);
- }
+ if (!count) {
+ gf_msg (this->name, GF_LOG_ERROR, EAGAIN,
+ AFR_MSG_REPLACE_BRICK_STATUS, "Couldn't acquire lock on"
+ " any child.");
+ ret = -EAGAIN;
+ goto unlock;
+ }
- return 0;
+ ret = afr_emptyb_set_pending_changelog (frame, this, locked_nodes);
+ if (ret)
+ goto unlock;
+ ret = 0;
+unlock:
+ if (AFR_ENTRY_TRANSACTION == type) {
+ afr_selfheal_unentrylk (frame, this, loc->inode, this->name,
+ NULL, locked_nodes, NULL);
+ } else {
+ afr_selfheal_uninodelk (frame, this, loc->inode, this->name,
+ LLONG_MAX - 1, 0, locked_nodes);
+ }
+out:
+ return ret;
}
-/* }}} */
+void
+afr_brick_args_cleanup (void *opaque)
+{
+ afr_empty_brick_args_t *data = NULL;
-/* {{{ setattr */
+ data = opaque;
+ loc_wipe (&data->loc);
+ GF_FREE (data);
+}
int
-afr_setattr_unwind (call_frame_t *frame, xlator_t *this)
+_afr_handle_empty_brick_cbk (int ret, call_frame_t *frame, void *opaque)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- call_frame_t *main_frame = NULL;
+ afr_brick_args_cleanup (opaque);
+ return 0;
+}
- local = frame->local;
- priv = this->private;
+int
+_afr_handle_empty_brick (void *opaque)
+{
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int empty_index = -1;
+ int ret = -1;
+ int op_errno = ENOMEM;
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ char *op_type = NULL;
+ afr_empty_brick_args_t *data = NULL;
+
+ data = opaque;
+ frame = data->frame;
+ empty_index = data->empty_index;
+ op_type = data->op_type;
+ this = frame->this;
+ priv = this->private;
+
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
+
+ loc_copy (&local->loc, &data->loc);
- if (main_frame) {
- local->cont.setattr.preop_buf.ia_ino = local->cont.setattr.ino;
- local->cont.setattr.postop_buf.ia_ino = local->cont.setattr.ino;
+ gf_msg_debug (this->name, 0, "New brick is : %s",
+ priv->children[empty_index]->name);
- AFR_STACK_UNWIND (setattr, main_frame, local->op_ret,
- local->op_errno,
- &local->cont.setattr.preop_buf,
- &local->cont.setattr.postop_buf);
+ ret = _afr_handle_empty_brick_type (this, frame, &local->loc, empty_index,
+ AFR_METADATA_TRANSACTION, op_type);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
}
- return 0;
+ dict_unref (local->xdata_req);
+ dict_unref (local->xattr_req);
+ afr_matrix_cleanup (local->pending, priv->child_count);
+ local->pending = NULL;
+ local->xattr_req = NULL;
+ local->xdata_req = NULL;
+
+ ret = _afr_handle_empty_brick_type (this, frame, &local->loc, empty_index,
+ AFR_ENTRY_TRANSACTION, op_type);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+out:
+ AFR_STACK_UNWIND (setxattr, frame, ret, op_errno, NULL);
+ return 0;
}
int
-afr_setattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+afr_split_brain_resolve_do (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ char *data)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int op_errno = EINVAL;
- int child_index = (long) cookie;
- int read_child = 0;
- int call_count = -1;
- int need_unwind = 0;
+ local = frame->local;
+ local->xdata_req = dict_new ();
- local = frame->local;
- priv = this->private;
+ if (!local->xdata_req) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ ret = dict_set_int32 (local->xdata_req, "heal-op",
+ GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_str (local->xdata_req, "child-name", data);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+ /* set spb choice to -1 whether heal succeeds or not:
+ * If heal succeeds : spb-choice should be set to -1 as
+ * it is no longer valid; file is not
+ * in split-brain anymore.
+ * If heal doesn't succeed:
+ * spb-choice should be set to -1
+ * otherwise reads will be served
+ * from spb-choice which is misleading.
+ */
+ ret = afr_inode_split_brain_choice_set (loc->inode, this, -1);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR, "Failed to set"
+ "split-brain choice to -1");
+ afr_heal_splitbrain_file (frame, this, loc);
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
+ return 0;
+}
- read_child = afr_read_child (this, local->loc.inode);
+int
+afr_get_split_brain_child_index (xlator_t *this, void *value, size_t len)
+{
+ int spb_child_index = -1;
+ char *spb_child_str = NULL;
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
+ spb_child_str = alloca0 (len + 1);
+ memcpy (spb_child_str, value, len);
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
+ if (!strcmp (spb_child_str, "none"))
+ return -2;
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.setattr.preop_buf = *preop;
- local->cont.setattr.postop_buf = *postop;
- }
+ spb_child_index = afr_get_child_index_from_name (this,
+ spb_child_str);
+ if (spb_child_index < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_INVALID_SUBVOL, "Invalid subvol: %s",
+ spb_child_str);
+ }
+ return spb_child_index;
+}
- if (child_index == read_child) {
- local->cont.setattr.preop_buf = *preop;
- local->cont.setattr.postop_buf = *postop;
+int
+afr_can_set_split_brain_choice (void *opaque)
+{
+ afr_spbc_timeout_t *data = opaque;
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ loc_t *loc = NULL;
+ int ret = -1;
+
+ frame = data->frame;
+ loc = data->loc;
+ this = frame->this;
+
+ ret = afr_is_split_brain (frame, this, loc->inode, loc->gfid,
+ &data->d_spb, &data->m_spb);
+
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR,
+ "Failed to determine if %s"
+ " is in split-brain. "
+ "Aborting split-brain-choice set.",
+ uuid_utoa (loc->gfid));
+ return ret;
+}
+
+int
+afr_handle_split_brain_commands (xlator_t *this, call_frame_t *frame,
+ loc_t *loc, dict_t *dict)
+{
+ void *value = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_spbc_timeout_t *data = NULL;
+ int len = 0;
+ int spb_child_index = -1;
+ int ret = -1;
+ int op_errno = EINVAL;
+
+ priv = this->private;
+
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local) {
+ ret = 1;
+ goto out;
+ }
+
+ local->op = GF_FOP_SETXATTR;
+
+ ret = dict_get_ptr_and_len (dict, GF_AFR_SBRAIN_CHOICE, &value,
+ &len);
+ if (value) {
+ spb_child_index = afr_get_split_brain_child_index (this, value,
+ len);
+ if (spb_child_index < 0) {
+ /* Case where value was "none" */
+ if (spb_child_index == -2)
+ spb_child_index = -1;
+ else {
+ ret = 1;
+ op_errno = EINVAL;
+ goto out;
}
+ }
- local->success_count++;
+ data = GF_CALLOC (1, sizeof (*data), gf_afr_mt_spbc_timeout_t);
+ if (!data) {
+ ret = 1;
+ goto out;
+ }
+ data->spb_child_index = spb_child_index;
+ data->frame = frame;
+ data->loc = loc;
+ ret = synctask_new (this->ctx->env,
+ afr_can_set_split_brain_choice,
+ afr_set_split_brain_choice, NULL, data);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR,
+ "Failed to create"
+ " synctask. Aborting split-brain choice set"
+ " for %s", loc->name);
+ ret = 1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+ ret = 0;
+ goto out;
+ }
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
- }
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ ret = dict_get_ptr_and_len (dict, GF_AFR_SBRAIN_RESOLVE, &value, &len);
+ if (value) {
+ spb_child_index = afr_get_split_brain_child_index (this, value,
+ len);
+ if (spb_child_index < 0) {
+ ret = 1;
+ goto out;
+ }
- if (need_unwind)
- local->transaction.unwind (frame, this);
+ afr_split_brain_resolve_do (frame, this, loc,
+ priv->children[spb_child_index]->name);
+ ret = 0;
+ }
+out:
+ /* key was correct but value was invalid when ret == 1 */
+ if (ret == 1) {
+ AFR_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
+ if (data)
+ GF_FREE (data);
+ ret = 0;
+ }
+ return ret;
+}
- call_count = afr_frame_return (frame);
+int
+afr_handle_spb_choice_timeout (xlator_t *this, call_frame_t *frame,
+ dict_t *dict)
+{
+ int ret = -1;
+ int op_errno = 0;
+ uint64_t timeout = 0;
+ afr_private_t *priv = NULL;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
+ priv = this->private;
- return 0;
+ ret = dict_get_uint64 (dict, GF_AFR_SPB_CHOICE_TIMEOUT, &timeout);
+ if (!ret) {
+ priv->spb_choice_timeout = timeout * 60;
+ AFR_STACK_UNWIND (setxattr, frame, ret, op_errno, NULL);
+ }
+
+ return ret;
}
+int
+afr_handle_empty_brick (xlator_t *this, call_frame_t *frame, loc_t *loc,
+ dict_t *dict)
+{
+ int ret = -1;
+ int ab_ret = -1;
+ int empty_index = -1;
+ int op_errno = EPERM;
+ char *empty_brick = NULL;
+ char *op_type = NULL;
+ afr_empty_brick_args_t *data = NULL;
+
+ ret = dict_get_str (dict, GF_AFR_REPLACE_BRICK, &empty_brick);
+ if (!ret)
+ op_type = GF_AFR_REPLACE_BRICK;
+
+ ab_ret = dict_get_str (dict, GF_AFR_ADD_BRICK, &empty_brick);
+ if (!ab_ret)
+ op_type = GF_AFR_ADD_BRICK;
+
+ if (ret && ab_ret)
+ goto out;
-int32_t
-afr_setattr_wind (call_frame_t *frame, xlator_t *this)
+ if (frame->root->pid != GF_CLIENT_PID_SELF_HEALD) {
+ gf_msg (this->name, GF_LOG_ERROR, EPERM,
+ afr_get_msg_id (op_type),
+ "'%s' is an internal extended attribute.",
+ op_type);
+ ret = 1;
+ goto out;
+ }
+ empty_index = afr_get_child_index_from_name (this, empty_brick);
+
+ if (empty_index < 0) {
+ /* Didn't belong to this replica pair
+ * Just do a no-op
+ */
+ AFR_STACK_UNWIND (setxattr, frame, 0, 0, NULL);
+ return 0;
+ } else {
+ data = GF_CALLOC (1, sizeof (*data),
+ gf_afr_mt_empty_brick_t);
+ if (!data) {
+ ret = 1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+ data->frame = frame;
+ loc_copy (&data->loc, loc);
+ data->empty_index = empty_index;
+ data->op_type = op_type;
+ ret = synctask_new (this->ctx->env,
+ _afr_handle_empty_brick,
+ _afr_handle_empty_brick_cbk,
+ NULL, data);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ afr_get_msg_id (op_type),
+ "Failed to create synctask.");
+ ret = 1;
+ op_errno = ENOMEM;
+ afr_brick_args_cleanup (data);
+ goto out;
+ }
+ }
+ ret = 0;
+out:
+ if (ret == 1) {
+ AFR_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
+ ret = 0;
+ }
+ return ret;
+}
+
+static int
+afr_handle_special_xattr (xlator_t *this, call_frame_t *frame, loc_t *loc,
+ dict_t *dict)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ int ret = -1;
- int call_count = -1;
- int i = 0;
+ ret = afr_handle_split_brain_commands (this, frame, loc, dict);
+ if (ret == 0)
+ goto out;
- local = frame->local;
- priv = this->private;
+ ret = afr_handle_spb_choice_timeout (this, frame, dict);
+ if (ret == 0)
+ goto out;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ /* Applicable for replace-brick and add-brick commands */
+ ret = afr_handle_empty_brick (this, frame, loc, dict);
+out:
+ return ret;
+}
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+int
+afr_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = EINVAL;
- local->call_count = call_count;
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.afr.*", dict,
+ op_errno, out);
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_setattr_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->setattr,
- &local->loc,
- &local->cont.setattr.in_buf,
- local->cont.setattr.valid);
-
- if (!--call_count)
- break;
- }
- }
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.afr.*", dict,
+ op_errno, out);
+
+ ret = afr_handle_special_xattr (this, frame, loc, dict);
+ if (ret == 0)
+ return 0;
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->cont.setxattr.dict = dict_ref (dict);
+ local->cont.setxattr.flags = flags;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->transaction.wind = afr_setxattr_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_setxattr_unwind;
+
+ loc_copy (&local->loc, loc);
+ local->inode = inode_ref (loc->inode);
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
+
+ local->op = GF_FOP_SETXATTR;
+
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+
+ AFR_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
+
+ return 0;
}
+/* {{{ fsetxattr */
+
int
-afr_setattr_done (call_frame_t *frame, xlator_t *this)
+afr_fsetxattr_unwind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- local = frame->local;
+ local = frame->local;
- local->transaction.unwind (frame, this);
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
- AFR_STACK_DESTROY (frame);
+ AFR_STACK_UNWIND (fsetxattr, main_frame, local->op_ret, local->op_errno,
+ local->xdata_rsp);
+ return 0;
+}
- return 0;
+
+int
+afr_fsetxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ NULL, NULL, NULL, xdata);
}
int
-afr_setattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct iatt *buf, int32_t valid)
+afr_fsetxattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- int ret = -1;
+ local = frame->local;
+ priv = this->private;
- int op_ret = -1;
- int op_errno = 0;
+ STACK_WIND_COOKIE (frame, afr_fsetxattr_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fsetxattr,
+ local->fd, local->cont.fsetxattr.dict,
+ local->cont.fsetxattr.flags, local->xdata_req);
+ return 0;
+}
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- priv = this->private;
+int
+afr_fsetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.afr.*", dict,
+ op_errno, out);
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.afr.*", dict,
+ op_errno, out);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
goto out;
- }
- transaction_frame->local = local;
+ local->cont.fsetxattr.dict = dict_ref (dict);
+ local->cont.fsetxattr.flags = flags;
- local->op_ret = -1;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- local->cont.setattr.ino = loc->inode->ino;
+ if (!local->xdata_req)
+ goto out;
- local->cont.setattr.in_buf = *buf;
- local->cont.setattr.valid = valid;
+ local->transaction.wind = afr_fsetxattr_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_fsetxattr_unwind;
- local->transaction.fop = afr_setattr_wind;
- local->transaction.done = afr_setattr_done;
- local->transaction.unwind = afr_setattr_unwind;
+ local->fd = fd_ref (fd);
+ local->inode = inode_ref (fd->inode);
- loc_copy (&local->loc, loc);
+ local->op = GF_FOP_FSETXATTR;
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (setattr, frame, op_ret, op_errno, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
- return 0;
+ AFR_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL);
+ return 0;
}
-/* {{{ fsetattr */
+/* }}} */
+
+
+/* {{{ removexattr */
+
int
-afr_fsetattr_unwind (call_frame_t *frame, xlator_t *this)
+afr_removexattr_unwind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- call_frame_t *main_frame = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- local->cont.fsetattr.preop_buf.ia_ino =
- local->cont.fsetattr.ino;
- local->cont.fsetattr.postop_buf.ia_ino =
- local->cont.fsetattr.ino;
-
- AFR_STACK_UNWIND (fsetattr, main_frame, local->op_ret,
- local->op_errno,
- &local->cont.fsetattr.preop_buf,
- &local->cont.fsetattr.postop_buf);
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
- return 0;
+ AFR_STACK_UNWIND (removexattr, main_frame, local->op_ret, local->op_errno,
+ local->xdata_rsp);
+ return 0;
}
int
-afr_fsetattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+afr_removexattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ NULL, NULL, NULL, xdata);
+}
- int child_index = (long) cookie;
- int read_child = 0;
- int call_count = -1;
- int need_unwind = 0;
+
+int
+afr_removexattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
local = frame->local;
- priv = this->private;
+ priv = this->private;
- read_child = afr_read_child (this, local->fd->inode);
+ STACK_WIND_COOKIE (frame, afr_removexattr_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->removexattr,
+ &local->loc, local->cont.removexattr.name,
+ local->xdata_req);
+ return 0;
+}
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
+int
+afr_removexattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.fsetattr.preop_buf = *preop;
- local->cont.fsetattr.postop_buf = *postop;
- }
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.afr.*",
+ name, op_errno, out);
- if (child_index == read_child) {
- local->cont.fsetattr.preop_buf = *preop;
- local->cont.fsetattr.postop_buf = *postop;
- }
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.afr.*",
+ name, op_errno, out);
- local->success_count++;
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
+ goto out;
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
- }
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
- if (need_unwind)
- local->transaction.unwind (frame, this);
+ local->cont.removexattr.name = gf_strdup (name);
- call_count = afr_frame_return (frame);
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
+ if (!local->xdata_req)
+ goto out;
- return 0;
-}
+ local->transaction.wind = afr_removexattr_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_removexattr_unwind;
+ loc_copy (&local->loc, loc);
+ local->inode = inode_ref (loc->inode);
-int32_t
-afr_fsetattr_wind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ local->op = GF_FOP_REMOVEXATTR;
- int call_count = -1;
- int i = 0;
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
- local = frame->local;
- priv = this->private;
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ AFR_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
+ return 0;
+}
- local->call_count = call_count;
+/* ffremovexattr */
+int
+afr_fremovexattr_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_fsetattr_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fsetattr,
- local->fd,
- &local->cont.fsetattr.in_buf,
- local->cont.fsetattr.valid);
-
- if (!--call_count)
- break;
- }
- }
+ local = frame->local;
- return 0;
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
+
+ AFR_STACK_UNWIND (fremovexattr, main_frame, local->op_ret, local->op_errno,
+ local->xdata_rsp);
+ return 0;
}
int
-afr_fsetattr_done (call_frame_t *frame, xlator_t *this)
+afr_fremovexattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_local_t *local = NULL;
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ NULL, NULL, NULL, xdata);
+}
- local = frame->local;
- local->transaction.unwind (frame, this);
+int
+afr_fremovexattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- AFR_STACK_DESTROY (frame);
+ local = frame->local;
+ priv = this->private;
- return 0;
+ STACK_WIND_COOKIE (frame, afr_fremovexattr_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fremovexattr,
+ local->fd, local->cont.removexattr.name,
+ local->xdata_req);
+ return 0;
}
int
-afr_fsetattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, struct iatt *buf, int32_t valid)
+afr_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- int ret = -1;
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.afr.*",
+ name, op_errno, out);
- int op_ret = -1;
- int op_errno = 0;
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.afr.*",
+ name, op_errno, out);
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
+ goto out;
- priv = this->private;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
+ local->cont.removexattr.name = gf_strdup (name);
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
+
+ if (!local->xdata_req)
goto out;
- }
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ local->transaction.wind = afr_fremovexattr_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_fremovexattr_unwind;
+
+ local->fd = fd_ref (fd);
+ local->inode = inode_ref (fd->inode);
+
+ local->op = GF_FOP_FREMOVEXATTR;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
+
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
op_errno = -ret;
goto out;
- }
+ }
- transaction_frame->local = local;
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
- local->op_ret = -1;
+ AFR_STACK_UNWIND (fremovexattr, frame, -1, op_errno, NULL);
- local->cont.fsetattr.ino = fd->inode->ino;
+ return 0;
+}
- local->cont.fsetattr.in_buf = *buf;
- local->cont.fsetattr.valid = valid;
- local->transaction.fop = afr_fsetattr_wind;
- local->transaction.done = afr_fsetattr_done;
- local->transaction.unwind = afr_fsetattr_unwind;
+int
+afr_fallocate_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
- local->fd = fd_ref (fd);
+ local = frame->local;
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ AFR_STACK_UNWIND (fallocate, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+ return 0;
+}
- op_ret = 0;
-out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, NULL, NULL);
- }
- return 0;
+int
+afr_fallocate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ prebuf, postbuf, NULL, xdata);
}
-/* {{{ setxattr */
+int
+afr_fallocate_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE (frame, afr_fallocate_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fallocate,
+ local->fd, local->cont.fallocate.mode,
+ local->cont.fallocate.offset,
+ local->cont.fallocate.len, local->xdata_req);
+ return 0;
+}
int
-afr_setxattr_unwind (call_frame_t *frame, xlator_t *this)
+afr_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- call_frame_t *main_frame = NULL;
+ call_frame_t *transaction_frame = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- local = frame->local;
- priv = this->private;
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
+ goto out;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
- if (main_frame) {
- AFR_STACK_UNWIND (setxattr, main_frame,
- local->op_ret, local->op_errno)
- }
- return 0;
-}
+ local->cont.fallocate.mode = mode;
+ local->cont.fallocate.offset = offset;
+ local->cont.fallocate.len = len;
+ local->fd = fd_ref (fd);
+ local->inode = inode_ref (fd->inode);
-int
-afr_setxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- int call_count = -1;
- int need_unwind = 0;
+ if (!local->xdata_req)
+ goto out;
- local = frame->local;
- priv = this->private;
+ local->op = GF_FOP_FALLOCATE;
- LOCK (&frame->lock);
- {
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
+ local->transaction.wind = afr_fallocate_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_fallocate_unwind;
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
- }
+ local->transaction.main_frame = frame;
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ local->transaction.start = local->cont.fallocate.offset;
+ local->transaction.len = 0;
- if (need_unwind)
- local->transaction.unwind (frame, this);
+ afr_fix_open (fd, this);
- call_count = afr_frame_return (frame);
+ ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+
+ AFR_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
-int
-afr_setxattr_wind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+/* }}} */
- int call_count = -1;
- int i = 0;
+/* {{{ discard */
- local = frame->local;
- priv = this->private;
+int
+afr_discard_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ local = frame->local;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
return 0;
- }
- local->call_count = call_count;
+ AFR_STACK_UNWIND (discard, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+ return 0;
+}
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_setxattr_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->setxattr,
- &local->loc,
- local->cont.setxattr.dict,
- local->cont.setxattr.flags);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+int
+afr_discard_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ prebuf, postbuf, NULL, xdata);
}
int
-afr_setxattr_done (call_frame_t *frame, xlator_t *this)
+afr_discard_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t * local = frame->local;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- local->transaction.unwind (frame, this);
+ local = frame->local;
+ priv = this->private;
- AFR_STACK_DESTROY (frame);
-
- return 0;
+ STACK_WIND_COOKIE (frame, afr_discard_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->discard,
+ local->fd, local->cont.discard.offset,
+ local->cont.discard.len, local->xdata_req);
+ return 0;
}
int
-afr_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *dict, int32_t flags)
+afr_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- int ret = -1;
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
+ goto out;
- int op_ret = -1;
- int op_errno = 0;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ local->cont.discard.offset = offset;
+ local->cont.discard.len = len;
- priv = this->private;
+ local->fd = fd_ref (fd);
+ local->inode = inode_ref (fd->inode);
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
+
+ if (!local->xdata_req)
goto out;
- }
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ local->op = GF_FOP_DISCARD;
+
+ local->transaction.wind = afr_discard_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_discard_unwind;
+
+ local->transaction.main_frame = frame;
+
+ local->transaction.start = local->cont.discard.offset;
+ local->transaction.len = 0;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
+ afr_fix_open (fd, this);
+
+ ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
op_errno = -ret;
goto out;
- }
-
- transaction_frame->local = local;
+ }
- local->op_ret = -1;
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
- local->cont.setxattr.dict = dict_ref (dict);
- local->cont.setxattr.flags = flags;
+ AFR_STACK_UNWIND (discard, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
- local->transaction.fop = afr_setxattr_wind;
- local->transaction.done = afr_setxattr_done;
- local->transaction.unwind = afr_setxattr_unwind;
- loc_copy (&local->loc, loc);
+/* {{{ zerofill */
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
+int
+afr_zerofill_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ local = frame->local;
- op_ret = 0;
-out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (setxattr, frame, op_ret, op_errno);
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
- return 0;
+ AFR_STACK_UNWIND (discard, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+ return 0;
}
-/* }}} */
-/* {{{ removexattr */
+int
+afr_zerofill_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ prebuf, postbuf, NULL, xdata);
+}
int
-afr_removexattr_unwind (call_frame_t *frame, xlator_t *this)
+afr_zerofill_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- call_frame_t *main_frame = NULL;
-
- local = frame->local;
- priv = this->private;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ local = frame->local;
+ priv = this->private;
- if (main_frame) {
- AFR_STACK_UNWIND (removexattr, main_frame,
- local->op_ret, local->op_errno)
- }
- return 0;
+ STACK_WIND_COOKIE (frame, afr_zerofill_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->zerofill,
+ local->fd, local->cont.zerofill.offset,
+ local->cont.zerofill.len, local->xdata_req);
+ return 0;
}
-
int
-afr_removexattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+afr_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- int call_count = -1;
- int need_unwind = 0;
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
+ goto out;
- local = frame->local;
- priv = this->private;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
- LOCK (&frame->lock);
- {
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
+ local->cont.zerofill.offset = offset;
+ local->cont.zerofill.len = len;
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
- }
+ local->fd = fd_ref (fd);
+ local->inode = inode_ref (fd->inode);
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- if (need_unwind)
- local->transaction.unwind (frame, this);
+ if (!local->xdata_req)
+ goto out;
- call_count = afr_frame_return (frame);
+ local->op = GF_FOP_ZEROFILL;
+
+ local->transaction.wind = afr_zerofill_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_zerofill_unwind;
+
+ local->transaction.main_frame = frame;
+
+ local->transaction.start = local->cont.discard.offset;
+ local->transaction.len = len;
+
+ afr_fix_open (fd, this);
+
+ ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+
+ AFR_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
+/* }}} */
int32_t
-afr_removexattr_wind (call_frame_t *frame, xlator_t *this)
+afr_xattrop_wind_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xattr, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ NULL, NULL, xattr, xdata);
+}
- int call_count = -1;
- int i = 0;
+int
+afr_xattrop_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
+ priv = this->private;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ STACK_WIND_COOKIE (frame, afr_xattrop_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->xattrop,
+ &local->loc, local->cont.xattrop.optype,
+ local->cont.xattrop.xattr, local->xdata_req);
+ return 0;
+}
+
+int
+afr_xattrop_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
return 0;
- }
- local->call_count = call_count;
+ AFR_STACK_UNWIND (xattrop, main_frame, local->op_ret, local->op_errno,
+ local->xattr_rsp, local->xdata_rsp);
+ return 0;
+}
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_removexattr_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->removexattr,
- &local->loc,
- local->cont.removexattr.name);
+int32_t
+afr_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->cont.xattrop.xattr = dict_ref (xattr);
+ local->cont.xattrop.optype = optype;
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
+
+ local->transaction.wind = afr_xattrop_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_xattrop_unwind;
+
+ loc_copy (&local->loc, loc);
+ local->inode = inode_ref (loc->inode);
+
+ local->op = GF_FOP_XATTROP;
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
+
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- if (!--call_count)
- break;
- }
- }
-
return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+
+ AFR_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL, NULL);
+ return 0;
}
+int32_t
+afr_fxattrop_wind_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xattr, dict_t *xdata)
+{
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ NULL, NULL, xattr, xdata);
+}
int
-afr_removexattr_done (call_frame_t *frame, xlator_t *this)
+afr_fxattrop_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t * local = frame->local;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- local->transaction.unwind (frame, this);
+ local = frame->local;
+ priv = this->private;
- AFR_STACK_DESTROY (frame);
-
- return 0;
+ STACK_WIND_COOKIE (frame, afr_fxattrop_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fxattrop,
+ local->fd, local->cont.xattrop.optype,
+ local->cont.xattrop.xattr, local->xdata_req);
+ return 0;
}
-
int
-afr_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
+afr_fxattrop_unwind (call_frame_t *frame, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- int ret = -1;
-
- int op_ret = -1;
- int op_errno = 0;
+ local = frame->local;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (loc, out);
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
- priv = this->private;
+ AFR_STACK_UNWIND (fxattrop, main_frame, local->op_ret, local->op_errno,
+ local->xattr_rsp, local->xdata_rsp);
+ return 0;
+}
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
+int32_t
+afr_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
+ goto out;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
goto out;
- }
- transaction_frame->local = local;
-
- local->op_ret = -1;
+ local->cont.xattrop.xattr = dict_ref (xattr);
+ local->cont.xattrop.optype = optype;
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
- local->cont.removexattr.name = gf_strdup (name);
+ local->transaction.wind = afr_fxattrop_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_fxattrop_unwind;
- local->transaction.fop = afr_removexattr_wind;
- local->transaction.done = afr_removexattr_done;
- local->transaction.unwind = afr_removexattr_unwind;
+ local->fd = fd_ref (fd);
+ local->inode = inode_ref (fd->inode);
- loc_copy (&local->loc, loc);
+ local->op = GF_FOP_FXATTROP;
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ ret = afr_transaction (transaction_frame, this,
+ AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (removexattr, frame, op_ret, op_errno);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
- return 0;
+ AFR_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL, NULL);
+ return 0;
}
diff --git a/xlators/cluster/afr/src/afr-inode-write.h b/xlators/cluster/afr/src/afr-inode-write.h
index f0c2fbe0078..e174cc2d610 100644
--- a/xlators/cluster/afr/src/afr-inode-write.h
+++ b/xlators/cluster/afr/src/afr-inode-write.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __INODE_WRITE_H__
@@ -22,51 +13,78 @@
int32_t
afr_chmod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode);
+ loc_t *loc, mode_t mode, dict_t *xdata);
int32_t
afr_chown (call_frame_t *frame, xlator_t *this,
- loc_t *loc, uid_t uid, gid_t gid);
+ loc_t *loc, uid_t uid, gid_t gid, dict_t *xdata);
int
afr_fchown (call_frame_t *frame, xlator_t *this,
- fd_t *fd, uid_t uid, gid_t gid);
+ fd_t *fd, uid_t uid, gid_t gid, dict_t *xdata);
int32_t
afr_fchmod (call_frame_t *frame, xlator_t *this,
- fd_t *fd, mode_t mode);
+ fd_t *fd, mode_t mode, dict_t *xdata);
int32_t
-afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int32_t count, off_t offset,
- struct iobref *iobref);
+ uint32_t flags, struct iobref *iobref, dict_t *xdata);
int32_t
afr_truncate (call_frame_t *frame, xlator_t *this,
- loc_t *loc, off_t offset);
+ loc_t *loc, off_t offset, dict_t *xdata);
int32_t
afr_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset);
+ fd_t *fd, off_t offset, dict_t *xdata);
int32_t
afr_utimens (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct timespec tv[2]);
+ loc_t *loc, struct timespec tv[2], dict_t *xdata);
int
afr_setattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct iatt *buf, int32_t valid);
+ loc_t *loc, struct iatt *buf, int32_t valid, dict_t *xdata);
int
afr_fsetattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, struct iatt *buf, int32_t valid);
+ fd_t *fd, struct iatt *buf, int32_t valid, dict_t *xdata);
int32_t
afr_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *dict, int32_t flags);
+ loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata);
+
+int32_t
+afr_fsetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, dict_t *dict, int32_t flags, dict_t *xdata);
int32_t
afr_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name);
+ loc_t *loc, const char *name, dict_t *xdata);
+
+int32_t
+afr_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata);
+
+int
+afr_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata);
+int
+afr_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata);
+
+int
+afr_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata);
+
+int32_t
+afr_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata);
+
+int32_t
+afr_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata);
#endif /* __INODE_WRITE_H__ */
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c
new file mode 100644
index 00000000000..c2a5f526c08
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-lk-common.c
@@ -0,0 +1,1764 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "dict.h"
+#include "byte-order.h"
+#include "common-utils.h"
+
+#include "afr.h"
+#include "afr-transaction.h"
+#include "afr-messages.h"
+
+#include <signal.h>
+
+
+#define LOCKED_NO 0x0 /* no lock held */
+#define LOCKED_YES 0x1 /* for DATA, METADATA, ENTRY and higher_path */
+#define LOCKED_LOWER 0x2 /* for lower path */
+
+#define AFR_TRACE_INODELK_IN(frame, this, params ...) \
+ do { \
+ afr_private_t *_priv = this->private; \
+ if (!_priv->inodelk_trace) \
+ break; \
+ afr_trace_inodelk_in (frame, this, params); \
+ } while (0);
+
+#define AFR_TRACE_INODELK_OUT(frame, this, params ...) \
+ do { \
+ afr_private_t *_priv = this->private; \
+ if (!_priv->inodelk_trace) \
+ break; \
+ afr_trace_inodelk_out (frame, this, params); \
+ } while (0);
+
+#define AFR_TRACE_ENTRYLK_IN(frame, this, params ...) \
+ do { \
+ afr_private_t *_priv = this->private; \
+ if (!_priv->entrylk_trace) \
+ break; \
+ afr_trace_entrylk_in (frame, this, params); \
+ } while (0);
+
+#define AFR_TRACE_ENTRYLK_OUT(frame, this, params ...) \
+ do { \
+ afr_private_t *_priv = this->private; \
+ if (!_priv->entrylk_trace) \
+ break; \
+ afr_trace_entrylk_out (frame, this, params); \
+ } while (0);
+
+int
+afr_entry_lockee_cmp (const void *l1, const void *l2)
+{
+ const afr_entry_lockee_t *r1 = l1;
+ const afr_entry_lockee_t *r2 = l2;
+ int ret = 0;
+ uuid_t gfid1 = {0};
+ uuid_t gfid2 = {0};
+
+ loc_gfid ((loc_t*)&r1->loc, gfid1);
+ loc_gfid ((loc_t*)&r2->loc, gfid2);
+ ret = gf_uuid_compare (gfid1, gfid2);
+ /*Entrylks with NULL basename are the 'smallest'*/
+ if (ret == 0) {
+ if (!r1->basename)
+ return -1;
+ if (!r2->basename)
+ return 1;
+ ret = strcmp (r1->basename, r2->basename);
+ }
+
+ if (ret <= 0)
+ return -1;
+ else
+ return 1;
+}
+
+int afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index);
+
+static int
+afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this);
+
+static uint64_t afr_lock_number = 1;
+
+static uint64_t
+get_afr_lock_number ()
+{
+ return (++afr_lock_number);
+}
+
+int
+afr_set_lock_number (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ int_lock->lock_number = get_afr_lock_number ();
+
+ return 0;
+}
+
+void
+afr_set_lk_owner (call_frame_t *frame, xlator_t *this, void *lk_owner)
+{
+ gf_msg_trace (this->name, 0,
+ "Setting lk-owner=%llu",
+ (unsigned long long) (unsigned long)lk_owner);
+
+ set_lk_owner_from_ptr (&frame->root->lk_owner, lk_owner);
+}
+
+static int
+is_afr_lock_selfheal (afr_local_t *local)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ int ret = -1;
+
+ int_lock = &local->internal_lock;
+
+ switch (int_lock->selfheal_lk_type) {
+ case AFR_DATA_SELF_HEAL_LK:
+ case AFR_METADATA_SELF_HEAL_LK:
+ ret = 1;
+ break;
+ case AFR_ENTRY_SELF_HEAL_LK:
+ ret = 0;
+ break;
+ }
+
+ return ret;
+
+}
+
+int32_t
+internal_lock_count (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int32_t call_count = 0;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i])
+ ++call_count;
+ }
+
+ return call_count;
+}
+
+static void
+afr_print_inodelk (char *str, int size, int cmd,
+ struct gf_flock *flock, gf_lkowner_t *owner)
+{
+ char *cmd_str = NULL;
+ char *type_str = NULL;
+
+ switch (cmd) {
+#if F_GETLK != F_GETLK64
+ case F_GETLK64:
+#endif
+ case F_GETLK:
+ cmd_str = "GETLK";
+ break;
+
+#if F_SETLK != F_SETLK64
+ case F_SETLK64:
+#endif
+ case F_SETLK:
+ cmd_str = "SETLK";
+ break;
+
+#if F_SETLKW != F_SETLKW64
+ case F_SETLKW64:
+#endif
+ case F_SETLKW:
+ cmd_str = "SETLKW";
+ break;
+
+ default:
+ cmd_str = "<null>";
+ break;
+ }
+
+ switch (flock->l_type) {
+ case F_RDLCK:
+ type_str = "READ";
+ break;
+ case F_WRLCK:
+ type_str = "WRITE";
+ break;
+ case F_UNLCK:
+ type_str = "UNLOCK";
+ break;
+ default:
+ type_str = "UNKNOWN";
+ break;
+ }
+
+ snprintf (str, size, "lock=INODELK, cmd=%s, type=%s, "
+ "start=%llu, len=%llu, pid=%llu, lk-owner=%s",
+ cmd_str, type_str, (unsigned long long) flock->l_start,
+ (unsigned long long) flock->l_len,
+ (unsigned long long) flock->l_pid,
+ lkowner_utoa (owner));
+
+}
+
+static void
+afr_print_lockee (char *str, int size, loc_t *loc, fd_t *fd,
+ int child_index)
+{
+ snprintf (str, size, "path=%s, fd=%p, child=%d",
+ loc->path ? loc->path : "<nul>",
+ fd ? fd : NULL,
+ child_index);
+}
+
+void
+afr_print_entrylk (char *str, int size, const char *basename,
+ gf_lkowner_t *owner)
+{
+ snprintf (str, size, "Basename=%s, lk-owner=%s",
+ basename ? basename : "<nul>",
+ lkowner_utoa (owner));
+}
+
+static void
+afr_print_verdict (int op_ret, int op_errno, char *str)
+{
+ if (op_ret < 0) {
+ if (op_errno == EAGAIN)
+ strcpy (str, "EAGAIN");
+ else
+ strcpy (str, "FAILED");
+ }
+ else
+ strcpy (str, "GRANTED");
+}
+
+static void
+afr_set_lock_call_type (afr_lock_call_type_t lock_call_type,
+ char *lock_call_type_str,
+ afr_internal_lock_t *int_lock)
+{
+ switch (lock_call_type) {
+ case AFR_INODELK_TRANSACTION:
+ if (int_lock->transaction_lk_type == AFR_TRANSACTION_LK)
+ strcpy (lock_call_type_str, "AFR_INODELK_TRANSACTION");
+ else
+ strcpy (lock_call_type_str, "AFR_INODELK_SELFHEAL");
+ break;
+ case AFR_INODELK_NB_TRANSACTION:
+ if (int_lock->transaction_lk_type == AFR_TRANSACTION_LK)
+ strcpy (lock_call_type_str, "AFR_INODELK_NB_TRANSACTION");
+ else
+ strcpy (lock_call_type_str, "AFR_INODELK_NB_SELFHEAL");
+ break;
+ case AFR_ENTRYLK_TRANSACTION:
+ if (int_lock->transaction_lk_type == AFR_TRANSACTION_LK)
+ strcpy (lock_call_type_str, "AFR_ENTRYLK_TRANSACTION");
+ else
+ strcpy (lock_call_type_str, "AFR_ENTRYLK_SELFHEAL");
+ break;
+ case AFR_ENTRYLK_NB_TRANSACTION:
+ if (int_lock->transaction_lk_type == AFR_TRANSACTION_LK)
+ strcpy (lock_call_type_str, "AFR_ENTRYLK_NB_TRANSACTION");
+ else
+ strcpy (lock_call_type_str, "AFR_ENTRYLK_NB_SELFHEAL");
+ break;
+ default:
+ strcpy (lock_call_type_str, "UNKNOWN");
+ break;
+ }
+
+}
+
+static void
+afr_trace_inodelk_out (call_frame_t *frame, xlator_t *this,
+ afr_lock_call_type_t lock_call_type,
+ afr_lock_op_type_t lk_op_type, struct gf_flock *flock,
+ int op_ret, int op_errno, int32_t child_index)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+
+ char lockee[256];
+ char lock_call_type_str[256];
+ char verdict[16];
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index);
+
+ afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
+
+ afr_print_verdict (op_ret, op_errno, verdict);
+
+ gf_msg (this->name, GF_LOG_INFO, 0, AFR_MSG_LOCK_INFO,
+ "[%s %s] [%s] lk-owner=%s Lockee={%s} Number={%llu}",
+ lock_call_type_str,
+ lk_op_type == AFR_LOCK_OP ? "LOCK REPLY" : "UNLOCK REPLY",
+ verdict, lkowner_utoa (&frame->root->lk_owner), lockee,
+ (unsigned long long) int_lock->lock_number);
+
+}
+
+static void
+afr_trace_inodelk_in (call_frame_t *frame, xlator_t *this,
+ afr_lock_call_type_t lock_call_type,
+ afr_lock_op_type_t lk_op_type, struct gf_flock *flock,
+ int32_t cmd, int32_t child_index)
+{
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+
+ char lock[256];
+ char lockee[256];
+ char lock_call_type_str[256];
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ afr_print_inodelk (lock, 256, cmd, flock, &frame->root->lk_owner);
+ afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index);
+
+ afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
+
+ gf_msg (this->name, GF_LOG_INFO, 0, AFR_MSG_LOCK_INFO,
+ "[%s %s] Lock={%s} Lockee={%s} Number={%llu}",
+ lock_call_type_str,
+ lk_op_type == AFR_LOCK_OP ? "LOCK REQUEST" : "UNLOCK REQUEST",
+ lock, lockee,
+ (unsigned long long) int_lock->lock_number);
+
+}
+
+static void
+afr_trace_entrylk_in (call_frame_t *frame, xlator_t *this,
+ afr_lock_call_type_t lock_call_type,
+ afr_lock_op_type_t lk_op_type, const char *basename,
+ int32_t cookie)
+{
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_private_t *priv = NULL;
+ int child_index = 0;
+ int lockee_no = 0;
+
+ char lock[256];
+ char lockee[256];
+ char lock_call_type_str[256];
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+
+ if (!priv->entrylk_trace) {
+ return;
+ }
+ lockee_no = cookie / priv->child_count;
+ child_index = cookie % priv->child_count;
+
+ afr_print_entrylk (lock, 256, basename, &frame->root->lk_owner);
+ afr_print_lockee (lockee, 256, &int_lock->lockee[lockee_no].loc, local->fd,
+ child_index);
+
+ afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
+
+ gf_msg (this->name, GF_LOG_INFO, 0, AFR_MSG_LOCK_INFO,
+ "[%s %s] Lock={%s} Lockee={%s} Number={%llu}, Cookie={%d}",
+ lock_call_type_str,
+ lk_op_type == AFR_LOCK_OP ? "LOCK REQUEST" : "UNLOCK REQUEST",
+ lock, lockee,
+ (unsigned long long) int_lock->lock_number,
+ cookie);
+}
+
+static void
+afr_trace_entrylk_out (call_frame_t *frame, xlator_t *this,
+ afr_lock_call_type_t lock_call_type,
+ afr_lock_op_type_t lk_op_type, const char *basename,
+ int op_ret, int op_errno, int32_t cookie)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int lockee_no = 0;
+ int child_index = 0;
+
+ char lock[256];
+ char lockee[256];
+ char lock_call_type_str[256];
+ char verdict[16];
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+
+ if (!priv->entrylk_trace) {
+ return;
+ }
+ lockee_no = cookie / priv->child_count;
+ child_index = cookie % priv->child_count;
+
+ afr_print_entrylk (lock, 256, basename, &frame->root->lk_owner);
+ afr_print_lockee (lockee, 256, &int_lock->lockee[lockee_no].loc, local->fd,
+ child_index);
+
+ afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
+
+ afr_print_verdict (op_ret, op_errno, verdict);
+
+ gf_msg (this->name, GF_LOG_INFO, 0, AFR_MSG_LOCK_INFO,
+ "[%s %s] [%s] Lock={%s} Lockee={%s} Number={%llu} Cookie={%d}",
+ lock_call_type_str,
+ lk_op_type == AFR_LOCK_OP ? "LOCK REPLY" : "UNLOCK REPLY",
+ verdict,
+ lock, lockee,
+ (unsigned long long) int_lock->lock_number,
+ cookie);
+
+}
+
+static int
+transaction_lk_op (afr_local_t *local)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ int ret = -1;
+
+ int_lock = &local->internal_lock;
+
+ if (int_lock->transaction_lk_type == AFR_TRANSACTION_LK) {
+ gf_msg_debug (THIS->name, 0,
+ "lk op is for a transaction");
+ ret = 1;
+ }
+ else if (int_lock->transaction_lk_type == AFR_SELFHEAL_LK) {
+ gf_msg_debug (THIS->name, 0,
+ "lk op is for a self heal");
+
+ ret = 0;
+ }
+
+ if (ret == -1)
+ gf_msg_debug (THIS->name, 0,
+ "lk op is not set");
+
+ return ret;
+
+}
+
+static int
+is_afr_lock_transaction (afr_local_t *local)
+{
+ int ret = 0;
+
+ switch (local->transaction.type) {
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ ret = 1;
+ break;
+
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ case AFR_ENTRY_TRANSACTION:
+ ret = 0;
+ break;
+
+ }
+
+ return ret;
+}
+
+int
+afr_init_entry_lockee (afr_entry_lockee_t *lockee, afr_local_t *local,
+ loc_t *loc, char *basename, int child_count)
+{
+ int ret = -1;
+
+ loc_copy (&lockee->loc, loc);
+ lockee->basename = (basename)? gf_strdup (basename): NULL;
+ if (basename && !lockee->basename)
+ goto out;
+
+ lockee->locked_count = 0;
+ lockee->locked_nodes = GF_CALLOC (child_count,
+ sizeof (*lockee->locked_nodes),
+ gf_afr_mt_afr_node_character);
+
+ if (!lockee->locked_nodes)
+ goto out;
+
+ ret = 0;
+out:
+ return ret;
+
+}
+
+void
+afr_entry_lockee_cleanup (afr_internal_lock_t *int_lock)
+{
+ int i = 0;
+
+ for (i = 0; i < int_lock->lockee_count; i++) {
+ loc_wipe (&int_lock->lockee[i].loc);
+ if (int_lock->lockee[i].basename)
+ GF_FREE (int_lock->lockee[i].basename);
+ if (int_lock->lockee[i].locked_nodes)
+ GF_FREE (int_lock->lockee[i].locked_nodes);
+ }
+
+ return;
+}
+
+static int
+initialize_entrylk_variables (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_private_t *priv = NULL;
+
+ int i = 0;
+
+ priv = this->private;
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ int_lock->entrylk_lock_count = 0;
+ int_lock->lock_op_ret = -1;
+ int_lock->lock_op_errno = 0;
+
+ for (i = 0; i < AFR_LOCKEE_COUNT_MAX; i++) {
+ if (!int_lock->lockee[i].locked_nodes)
+ break;
+ int_lock->lockee[i].locked_count = 0;
+ memset (int_lock->lockee[i].locked_nodes, 0,
+ sizeof (*int_lock->lockee[i].locked_nodes) *
+ priv->child_count);
+ }
+
+ return 0;
+}
+
+static int
+initialize_inodelk_variables (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_private_t *priv = NULL;
+ afr_inodelk_t *inodelk = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+
+ inodelk->lock_count = 0;
+ int_lock->lk_attempted_count = 0;
+ int_lock->lock_op_ret = -1;
+ int_lock->lock_op_errno = 0;
+
+ memset (inodelk->locked_nodes, 0,
+ sizeof (*inodelk->locked_nodes) * priv->child_count);
+ memset (int_lock->locked_nodes, 0,
+ sizeof (*int_lock->locked_nodes) * priv->child_count);
+
+ return 0;
+}
+
+int
+afr_lockee_locked_nodes_count (afr_internal_lock_t *int_lock)
+{
+ int call_count = 0;
+ int i = 0;
+
+ for (i = 0; i < int_lock->lockee_count; i++)
+ call_count += int_lock->lockee[i].locked_count;
+
+ return call_count;
+}
+
+int
+afr_locked_nodes_count (unsigned char *locked_nodes, int child_count)
+
+{
+ int i = 0;
+ int call_count = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (locked_nodes[i] & LOCKED_YES)
+ call_count++;
+ }
+
+ return call_count;
+}
+
+/* FIXME: What if UNLOCK fails */
+static int32_t
+afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ int call_count = 0;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ LOCK (&frame->lock);
+ {
+ call_count = --int_lock->lk_call_count;
+ }
+ UNLOCK (&frame->lock);
+
+ if (call_count == 0) {
+ gf_msg_trace (this->name, 0,
+ "All internal locks unlocked");
+
+ int_lock->lock_cbk (frame, this);
+ }
+
+ return 0;
+}
+
+static int32_t
+afr_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
+ int32_t child_index = (long)cookie;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_TRANSACTION,
+ AFR_UNLOCK_OP, NULL, op_ret,
+ op_errno, child_index);
+
+ priv = this->private;
+
+ if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) {
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ AFR_MSG_INODE_UNLOCK_FAIL,
+ "path=%s gfid=%s: unlock failed on subvolume %s "
+ "with lock owner %s", local->loc.path,
+ loc_gfid_utoa (&(local->loc)),
+ priv->children[child_index]->name,
+ lkowner_utoa (&frame->root->lk_owner));
+ }
+
+
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ inodelk->locked_nodes[child_index] &= LOCKED_NO;
+ if (local->transaction.eager_lock)
+ local->transaction.eager_lock[child_index] = 0;
+
+ afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno, xdata);
+
+ return 0;
+
+}
+
+static int
+afr_unlock_inodelk (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ struct gf_flock flock = {0,};
+ struct gf_flock full_flock = {0,};
+ struct gf_flock *flock_use = NULL;
+ int call_count = 0;
+ int i = 0;
+ int piggyback = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+
+ flock.l_start = inodelk->flock.l_start;
+ flock.l_len = inodelk->flock.l_len;
+ flock.l_type = F_UNLCK;
+
+ full_flock.l_type = F_UNLCK;
+ call_count = afr_locked_nodes_count (inodelk->locked_nodes,
+ priv->child_count);
+
+ int_lock->lk_call_count = call_count;
+
+ if (!call_count) {
+ gf_msg_trace (this->name, 0,
+ "No internal locks unlocked");
+
+ int_lock->lock_cbk (frame, this);
+ goto out;
+ }
+
+ if (local->fd)
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if ((inodelk->locked_nodes[i] & LOCKED_YES) != LOCKED_YES)
+ continue;
+
+ if (local->fd) {
+ flock_use = &flock;
+ if (!local->transaction.eager_lock[i]) {
+ goto wind;
+ }
+
+ piggyback = 0;
+
+ LOCK (&local->fd->lock);
+ {
+ if (fd_ctx->lock_piggyback[i]) {
+ fd_ctx->lock_piggyback[i]--;
+ piggyback = 1;
+ } else {
+ fd_ctx->lock_acquired[i]--;
+ }
+ }
+ UNLOCK (&local->fd->lock);
+
+ if (piggyback) {
+ afr_unlock_inodelk_cbk (frame, (void *) (long) i,
+ this, 1, 0, NULL);
+ if (!--call_count)
+ break;
+ continue;
+ }
+
+ flock_use = &full_flock;
+ wind:
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_TRANSACTION,
+ AFR_UNLOCK_OP, flock_use, F_SETLK,
+ i);
+
+ STACK_WIND_COOKIE (frame, afr_unlock_inodelk_cbk,
+ (void *) (long)i,
+ priv->children[i],
+ priv->children[i]->fops->finodelk,
+ int_lock->domain, local->fd,
+ F_SETLK, flock_use, NULL);
+
+ if (!--call_count)
+ break;
+
+ } else {
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_TRANSACTION,
+ AFR_UNLOCK_OP, &flock, F_SETLK, i);
+
+ STACK_WIND_COOKIE (frame, afr_unlock_inodelk_cbk,
+ (void *) (long)i,
+ priv->children[i],
+ priv->children[i]->fops->inodelk,
+ int_lock->domain, &local->loc,
+ F_SETLK, &flock, NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+out:
+ return 0;
+}
+
+static int32_t
+afr_unlock_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ int32_t child_index = 0;
+ int lockee_no = 0;
+
+ priv = this->private;
+ lockee_no = (int)((long) cookie) / priv->child_count;
+ child_index = (int) ((long) cookie) % priv->child_count;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION,
+ AFR_UNLOCK_OP,
+ int_lock->lockee[lockee_no].basename, op_ret,
+ op_errno, (int) ((long)cookie));
+
+ if (op_ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ AFR_MSG_ENTRY_UNLOCK_FAIL,
+ "%s: unlock failed on %s", local->loc.path,
+ priv->children[child_index]->name);
+ }
+
+ int_lock->lockee[lockee_no].locked_nodes[child_index] &= LOCKED_NO;
+ afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno, NULL);
+
+ return 0;
+}
+
+static int
+afr_unlock_entrylk (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int index = 0;
+ int lockee_no = 0;
+ int copies = 0;
+ int i = -1;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+ copies = priv->child_count;
+
+ call_count = afr_lockee_locked_nodes_count (int_lock);
+
+ int_lock->lk_call_count = call_count;
+
+ if (!call_count){
+ gf_msg_trace (this->name, 0,
+ "No internal locks unlocked");
+ int_lock->lock_cbk (frame, this);
+ goto out;
+ }
+
+ for (i = 0; i < int_lock->lockee_count * priv->child_count; i++) {
+ lockee_no = i / copies;
+ index = i % copies;
+ if (int_lock->lockee[lockee_no].locked_nodes[index] & LOCKED_YES) {
+ AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_NB_TRANSACTION,
+ AFR_UNLOCK_OP,
+ int_lock->lockee[lockee_no].basename,
+ i);
+
+ STACK_WIND_COOKIE (frame, afr_unlock_entrylk_cbk,
+ (void *) (long) i,
+ priv->children[index],
+ priv->children[index]->fops->entrylk,
+ int_lock->domain,
+ &int_lock->lockee[lockee_no].loc,
+ int_lock->lockee[lockee_no].basename,
+ ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+out:
+ return 0;
+
+}
+
+static int32_t
+afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int cky = (long) cookie;
+ int child_index = 0;
+ int lockee_no = 0;
+
+ priv = this->private;
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ child_index = ((int)cky) % priv->child_count;
+ lockee_no = ((int)cky) / priv->child_count;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ if (op_errno == ENOSYS) {
+ /* return ENOTSUP */
+ gf_msg (this->name, GF_LOG_ERROR, ENOSYS,
+ AFR_MSG_LOCK_XLATOR_NOT_LOADED,
+ "subvolume does not support locking. "
+ "please load features/locks xlator on server");
+ local->op_ret = op_ret;
+ int_lock->lock_op_ret = op_ret;
+ }
+
+ local->op_errno = op_errno;
+ int_lock->lock_op_errno = op_errno;
+ }
+
+ int_lock->lk_attempted_count++;
+ }
+ UNLOCK (&frame->lock);
+
+ if ((op_ret == -1) &&
+ (op_errno == ENOSYS)) {
+ afr_unlock (frame, this);
+ } else {
+ if (op_ret == 0) {
+ if (local->transaction.type == AFR_ENTRY_TRANSACTION ||
+ local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
+ int_lock->lockee[lockee_no].locked_nodes[child_index] |= LOCKED_YES;
+ int_lock->lockee[lockee_no].locked_count++;
+ int_lock->entrylk_lock_count++;
+ } else {
+ int_lock->locked_nodes[child_index] |= LOCKED_YES;
+ int_lock->lock_count++;
+ }
+ }
+ afr_lock_blocking (frame, this, cky + 1);
+ }
+
+ return 0;
+}
+
+static int32_t
+afr_blocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_TRANSACTION,
+ AFR_LOCK_OP, NULL, op_ret,
+ op_errno, (long) cookie);
+
+ afr_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata);
+ return 0;
+
+}
+
+static int32_t
+afr_blocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION,
+ AFR_LOCK_OP, NULL, op_ret,
+ op_errno, (long)cookie);
+
+ afr_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata);
+ return 0;
+}
+
+static int
+afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ switch (local->transaction.type) {
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ memcpy (inodelk->locked_nodes, int_lock->locked_nodes,
+ sizeof (*inodelk->locked_nodes) * priv->child_count);
+ inodelk->lock_count = int_lock->lock_count;
+ break;
+
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ case AFR_ENTRY_TRANSACTION:
+ /*entrylk_count is being used in both non-blocking and blocking
+ * modes */
+ break;
+ }
+
+ return 0;
+
+}
+
+static gf_boolean_t
+afr_is_entrylk (afr_internal_lock_t *int_lock,
+ afr_transaction_type trans_type)
+{
+ gf_boolean_t is_entrylk = _gf_false;
+
+ if ((int_lock->transaction_lk_type == AFR_SELFHEAL_LK) &&
+ int_lock->selfheal_lk_type == AFR_ENTRY_SELF_HEAL_LK) {
+
+ is_entrylk = _gf_true;
+
+ } else if ((int_lock->transaction_lk_type == AFR_TRANSACTION_LK) &&
+ (trans_type == AFR_ENTRY_TRANSACTION ||
+ trans_type == AFR_ENTRY_RENAME_TRANSACTION)) {
+
+ is_entrylk = _gf_true;
+
+ } else {
+ is_entrylk = _gf_false;
+ }
+
+ return is_entrylk;
+}
+
+static gf_boolean_t
+_is_lock_wind_needed (afr_local_t *local, int child_index)
+{
+ if (!local->child_up[child_index])
+ return _gf_false;
+
+ return _gf_true;
+}
+
+static void
+afr_log_entry_locks_failure(xlator_t *this, afr_local_t *local,
+ afr_internal_lock_t *int_lock)
+{
+ const char *fop = NULL;
+ char *pargfid = NULL;
+ const char *name = NULL;
+
+ fop = gf_fop_list[local->op];
+
+ switch (local->op) {
+ case GF_FOP_LINK:
+ pargfid = uuid_utoa(local->newloc.pargfid);
+ name = local->newloc.name;
+ break;
+ default:
+ pargfid = uuid_utoa(local->loc.pargfid);
+ name = local->loc.name;
+ break;
+ }
+
+ gf_msg (this->name, GF_LOG_WARNING, 0, AFR_MSG_BLOCKING_LKS_FAILED,
+ "Unable to obtain sufficient blocking entry locks on at least "
+ "one child while attempting %s on {pgfid:%s, name:%s}.", fop,
+ pargfid, name);
+}
+
+static gf_boolean_t
+is_blocking_locks_count_sufficient (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ gf_boolean_t is_entrylk = _gf_false;
+ int child = 0;
+ int nlockee = 0;
+ int lockee_count = 0;
+ gf_boolean_t ret = _gf_true;
+
+ local = frame->local;
+ priv = this->private;
+ int_lock = &local->internal_lock;
+ lockee_count = int_lock->lockee_count;
+ is_entrylk = afr_is_entrylk (int_lock, local->transaction.type);
+
+ if (!is_entrylk) {
+ if (int_lock->lock_count == 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_BLOCKING_LKS_FAILED, "Unable to obtain "
+ "blocking inode lock on even one child for "
+ "gfid:%s.", uuid_utoa (local->inode->gfid));
+ return _gf_false;
+ } else {
+ /*inodelk succeded on atleast one child. */
+ return _gf_true;
+ }
+
+ } else {
+ if (int_lock->entrylk_lock_count == 0) {
+ afr_log_entry_locks_failure (this, local, int_lock);
+ return _gf_false;
+ }
+ /* For FOPS that take multiple sets of locks (mkdir, rename),
+ * there must be atleast one brick on which the locks from
+ * all lock sets were successful. */
+ for (child = 0; child < priv->child_count; child++) {
+ ret = _gf_true;
+ for (nlockee = 0; nlockee < lockee_count; nlockee++) {
+ if (!(int_lock->lockee[nlockee].locked_nodes[child] & LOCKED_YES))
+ ret = _gf_false;
+ }
+ if (ret)
+ return ret;
+ }
+ if (!ret)
+ afr_log_entry_locks_failure (this, local, int_lock);
+ }
+
+ return ret;
+
+}
+
+int
+afr_lock_blocking (call_frame_t *frame, xlator_t *this, int cookie)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ struct gf_flock flock = {0,};
+ uint64_t ctx = 0;
+ int ret = 0;
+ int child_index = 0;
+ int lockee_no = 0;
+ gf_boolean_t is_entrylk = _gf_false;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+ child_index = cookie % priv->child_count;
+ lockee_no = cookie / priv->child_count;
+ is_entrylk = afr_is_entrylk (int_lock, local->transaction.type);
+
+
+ if (!is_entrylk) {
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ flock.l_start = inodelk->flock.l_start;
+ flock.l_len = inodelk->flock.l_len;
+ flock.l_type = inodelk->flock.l_type;
+ }
+
+ if (local->fd) {
+ ret = fd_ctx_get (local->fd, this, &ctx);
+
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ AFR_MSG_FD_CTX_GET_FAILED,
+ "unable to get fd ctx for fd=%p",
+ local->fd);
+
+ local->op_ret = -1;
+ int_lock->lock_op_ret = -1;
+
+ afr_copy_locked_nodes (frame, this);
+
+ afr_unlock (frame, this);
+
+ return 0;
+ }
+ }
+
+ if (int_lock->lk_expected_count == int_lock->lk_attempted_count) {
+ if (!is_blocking_locks_count_sufficient (frame, this)) {
+
+ local->op_ret = -1;
+ int_lock->lock_op_ret = -1;
+
+ afr_copy_locked_nodes (frame, this);
+
+ afr_unlock(frame, this);
+
+ return 0;
+ }
+ }
+
+ if (int_lock->lk_expected_count == int_lock->lk_attempted_count) {
+ /* we're done locking */
+
+ gf_msg_debug (this->name, 0,
+ "we're done locking");
+
+ afr_copy_locked_nodes (frame, this);
+
+ int_lock->lock_op_ret = 0;
+ int_lock->lock_cbk (frame, this);
+ return 0;
+ }
+
+ if (!_is_lock_wind_needed (local, child_index)) {
+ afr_lock_blocking (frame, this, cookie + 1);
+ return 0;
+ }
+
+ switch (local->transaction.type) {
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+
+ if (local->fd) {
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_TRANSACTION,
+ AFR_LOCK_OP, &flock, F_SETLKW,
+ child_index);
+
+ STACK_WIND_COOKIE (frame, afr_blocking_inodelk_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->finodelk,
+ int_lock->domain, local->fd,
+ F_SETLKW, &flock, NULL);
+
+ } else {
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_TRANSACTION,
+ AFR_LOCK_OP, &flock, F_SETLKW,
+ child_index);
+
+ STACK_WIND_COOKIE (frame, afr_blocking_inodelk_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->inodelk,
+ int_lock->domain, &local->loc,
+ F_SETLKW, &flock, NULL);
+ }
+
+ break;
+
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ case AFR_ENTRY_TRANSACTION:
+ /*Accounting for child_index increments on 'down'
+ *and 'fd-less' children */
+
+ if (local->fd) {
+ AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_TRANSACTION,
+ AFR_LOCK_OP,
+ int_lock->lockee[lockee_no].basename,
+ cookie);
+
+ STACK_WIND_COOKIE (frame, afr_blocking_entrylk_cbk,
+ (void *) (long) cookie,
+ priv->children[child_index],
+ priv->children[child_index]->fops->fentrylk,
+ int_lock->domain, local->fd,
+ int_lock->lockee[lockee_no].basename,
+ ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
+ } else {
+ AFR_TRACE_ENTRYLK_IN (frame, this,
+ AFR_ENTRYLK_TRANSACTION,
+ AFR_LOCK_OP, local->transaction.basename,
+ child_index);
+
+ STACK_WIND_COOKIE (frame, afr_blocking_entrylk_cbk,
+ (void *) (long) cookie,
+ priv->children[child_index],
+ priv->children[child_index]->fops->entrylk,
+ int_lock->domain,
+ &int_lock->lockee[lockee_no].loc,
+ int_lock->lockee[lockee_no].basename,
+ ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
+ }
+
+ break;
+ }
+
+ return 0;
+}
+
+int32_t
+afr_blocking_lock (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int up_count = 0;
+
+ priv = this->private;
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ switch (local->transaction.type) {
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ initialize_inodelk_variables (frame, this);
+ break;
+
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ case AFR_ENTRY_TRANSACTION:
+ up_count = AFR_COUNT (local->child_up, priv->child_count);
+ int_lock->lk_call_count = int_lock->lk_expected_count
+ = (int_lock->lockee_count *
+ up_count);
+ initialize_entrylk_variables (frame, this);
+ break;
+ }
+
+ afr_lock_blocking (frame, this, 0);
+
+ return 0;
+}
+
+static int32_t
+afr_nonblocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ int call_count = 0;
+ int child_index = (long) cookie;
+ int copies = 0;
+ int index = 0;
+ int lockee_no = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ copies = priv->child_count;
+ index = child_index % copies;
+ lockee_no = child_index / copies;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION,
+ AFR_LOCK_OP,
+ int_lock->lockee[lockee_no].basename, op_ret,
+ op_errno, (long) cookie);
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret < 0 ) {
+ if (op_errno == ENOSYS) {
+ /* return ENOTSUP */
+ gf_msg (this->name, GF_LOG_ERROR,
+ ENOSYS, AFR_MSG_LOCK_XLATOR_NOT_LOADED,
+ "subvolume does not support "
+ "locking. please load features/locks"
+ " xlator on server");
+ local->op_ret = op_ret;
+ int_lock->lock_op_ret = op_ret;
+
+ int_lock->lock_op_errno = op_errno;
+ local->op_errno = op_errno;
+ }
+ } else if (op_ret == 0) {
+ int_lock->lockee[lockee_no].locked_nodes[index] |= \
+ LOCKED_YES;
+ int_lock->lockee[lockee_no].locked_count++;
+ int_lock->entrylk_lock_count++;
+ }
+
+ call_count = --int_lock->lk_call_count;
+ }
+ UNLOCK (&frame->lock);
+
+ if (call_count == 0) {
+ gf_msg_trace (this->name, 0,
+ "Last locking reply received");
+ /* all locks successful. Proceed to call FOP */
+ if (int_lock->entrylk_lock_count ==
+ int_lock->lk_expected_count) {
+ gf_msg_trace (this->name, 0,
+ "All servers locked. Calling the cbk");
+ int_lock->lock_op_ret = 0;
+ int_lock->lock_cbk (frame, this);
+ }
+ /* Not all locks were successful. Unlock and try locking
+ again, this time with serially blocking locks */
+ else {
+ gf_msg_trace (this->name, 0,
+ "%d servers locked. Trying again "
+ "with blocking calls",
+ int_lock->lock_count);
+
+ afr_unlock(frame, this);
+ }
+ }
+
+ return 0;
+}
+
+int
+afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int copies = 0;
+ int index = 0;
+ int lockee_no = 0;
+ int32_t call_count = 0;
+ int i = 0;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+
+ copies = priv->child_count;
+ initialize_entrylk_variables (frame, this);
+
+ if (local->fd) {
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
+ if (!fd_ctx) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ AFR_MSG_FD_CTX_GET_FAILED,
+ "unable to get fd ctx for fd=%p",
+ local->fd);
+
+ local->op_ret = -1;
+ int_lock->lock_op_ret = -1;
+ local->op_errno = EINVAL;
+ int_lock->lock_op_errno = EINVAL;
+
+ afr_unlock (frame, this);
+ return -1;
+ }
+
+ call_count = int_lock->lockee_count * internal_lock_count (frame, this);
+ int_lock->lk_call_count = call_count;
+ int_lock->lk_expected_count = call_count;
+
+ if (!call_count) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ AFR_MSG_INFO_COMMON,
+ "fd not open on any subvolumes. aborting.");
+ afr_unlock (frame, this);
+ goto out;
+ }
+
+ /* Send non-blocking entrylk calls only on up children
+ and where the fd has been opened */
+ for (i = 0; i < int_lock->lockee_count*priv->child_count; i++) {
+ index = i%copies;
+ lockee_no = i/copies;
+ if (local->child_up[index]) {
+ AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_NB_TRANSACTION,
+ AFR_LOCK_OP,
+ int_lock->lockee[lockee_no].basename,
+ i);
+
+ STACK_WIND_COOKIE (frame, afr_nonblocking_entrylk_cbk,
+ (void *) (long) i,
+ priv->children[index],
+ priv->children[index]->fops->fentrylk,
+ this->name, local->fd,
+ int_lock->lockee[lockee_no].basename,
+ ENTRYLK_LOCK_NB, ENTRYLK_WRLCK,
+ NULL);
+ if (!--call_count)
+ break;
+ }
+ }
+ } else {
+ call_count = int_lock->lockee_count * internal_lock_count (frame, this);
+ int_lock->lk_call_count = call_count;
+ int_lock->lk_expected_count = call_count;
+
+ for (i = 0; i < int_lock->lockee_count*priv->child_count; i++) {
+ index = i%copies;
+ lockee_no = i/copies;
+ if (local->child_up[index]) {
+ AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_NB_TRANSACTION,
+ AFR_LOCK_OP,
+ int_lock->lockee[lockee_no].basename,
+ i);
+
+ STACK_WIND_COOKIE (frame, afr_nonblocking_entrylk_cbk,
+ (void *) (long) i,
+ priv->children[index],
+ priv->children[index]->fops->entrylk,
+ this->name, &int_lock->lockee[lockee_no].loc,
+ int_lock->lockee[lockee_no].basename,
+ ENTRYLK_LOCK_NB, ENTRYLK_WRLCK,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+ }
+out:
+ return 0;
+}
+
+int32_t
+afr_nonblocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
+ afr_local_t *local = NULL;
+ int call_count = 0;
+ int child_index = (long) cookie;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+
+ AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_NB_TRANSACTION,
+ AFR_LOCK_OP, NULL, op_ret,
+ op_errno, (long) cookie);
+
+ if (local->fd)
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret < 0) {
+ if (op_errno == ENOSYS) {
+ /* return ENOTSUP */
+ gf_msg (this->name, GF_LOG_ERROR, ENOSYS,
+ AFR_MSG_LOCK_XLATOR_NOT_LOADED,
+ "subvolume does not support "
+ "locking. please load features/locks"
+ " xlator on server");
+ local->op_ret = op_ret;
+ int_lock->lock_op_ret = op_ret;
+ int_lock->lock_op_errno = op_errno;
+ local->op_errno = op_errno;
+ }
+ if (local->transaction.eager_lock)
+ local->transaction.eager_lock[child_index] = 0;
+ } else {
+ inodelk->locked_nodes[child_index] |= LOCKED_YES;
+ inodelk->lock_count++;
+
+ if (local->transaction.eager_lock &&
+ local->transaction.eager_lock[child_index] &&
+ local->fd) {
+ /* piggybacked */
+ if (op_ret == 1) {
+ /* piggybacked */
+ } else if (op_ret == 0) {
+ /* lock acquired from server */
+ fd_ctx->lock_acquired[child_index]++;
+ }
+ }
+ }
+
+ call_count = --int_lock->lk_call_count;
+ }
+ UNLOCK (&frame->lock);
+
+ if (call_count == 0) {
+ gf_msg_trace (this->name, 0,
+ "Last inode locking reply received");
+ /* all locks successful. Proceed to call FOP */
+ if (inodelk->lock_count == int_lock->lk_expected_count) {
+ gf_msg_trace (this->name, 0,
+ "All servers locked. Calling the cbk");
+ int_lock->lock_op_ret = 0;
+ int_lock->lock_cbk (frame, this);
+ }
+ /* Not all locks were successful. Unlock and try locking
+ again, this time with serially blocking locks */
+ else {
+ gf_msg_trace (this->name, 0,
+ "%d servers locked. "
+ "Trying again with blocking calls",
+ int_lock->lock_count);
+
+ afr_unlock(frame, this);
+ }
+ }
+
+ return 0;
+}
+
+int
+afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int32_t call_count = 0;
+ int i = 0;
+ int ret = 0;
+ struct gf_flock flock = {0,};
+ struct gf_flock full_flock = {0,};
+ struct gf_flock *flock_use = NULL;
+ int piggyback = 0;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+
+ flock.l_start = inodelk->flock.l_start;
+ flock.l_len = inodelk->flock.l_len;
+ flock.l_type = inodelk->flock.l_type;
+
+ full_flock.l_type = inodelk->flock.l_type;
+
+ initialize_inodelk_variables (frame, this);
+
+ if (local->fd) {
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
+ if (!fd_ctx) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ AFR_MSG_FD_CTX_GET_FAILED,
+ "unable to get fd ctx for fd=%p",
+ local->fd);
+
+ local->op_ret = -1;
+ int_lock->lock_op_ret = -1;
+ local->op_errno = EINVAL;
+ int_lock->lock_op_errno = EINVAL;
+
+ afr_unlock (frame, this);
+ ret = -1;
+ goto out;
+ }
+
+ call_count = internal_lock_count (frame, this);
+ int_lock->lk_call_count = call_count;
+ int_lock->lk_expected_count = call_count;
+
+ if (!call_count) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ AFR_MSG_ALL_SUBVOLS_DOWN,
+ "All bricks are down, aborting.");
+ afr_unlock (frame, this);
+ goto out;
+ }
+
+ /* Send non-blocking inodelk calls only on up children
+ and where the fd has been opened */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->child_up[i])
+ continue;
+
+ flock_use = &flock;
+ if (!local->transaction.eager_lock_on) {
+ goto wind;
+ }
+
+ piggyback = 0;
+ local->transaction.eager_lock[i] = 1;
+
+ afr_set_delayed_post_op (frame, this);
+
+ LOCK (&local->fd->lock);
+ {
+ if (fd_ctx->lock_acquired[i]) {
+ fd_ctx->lock_piggyback[i]++;
+ piggyback = 1;
+ }
+ }
+ UNLOCK (&local->fd->lock);
+
+ if (piggyback) {
+ /* (op_ret == 1) => indicate piggybacked lock */
+ afr_nonblocking_inodelk_cbk (frame, (void *) (long) i,
+ this, 1, 0, NULL);
+ if (!--call_count)
+ break;
+ continue;
+ }
+ flock_use = &full_flock;
+ wind:
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_NB_TRANSACTION,
+ AFR_LOCK_OP, flock_use, F_SETLK, i);
+
+ STACK_WIND_COOKIE (frame, afr_nonblocking_inodelk_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->finodelk,
+ int_lock->domain, local->fd,
+ F_SETLK, flock_use, NULL);
+
+ if (!--call_count)
+ break;
+ }
+ } else {
+ call_count = internal_lock_count (frame, this);
+ int_lock->lk_call_count = call_count;
+ int_lock->lk_expected_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->child_up[i])
+ continue;
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_NB_TRANSACTION,
+ AFR_LOCK_OP, &flock, F_SETLK, i);
+
+ STACK_WIND_COOKIE (frame, afr_nonblocking_inodelk_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->inodelk,
+ int_lock->domain, &local->loc,
+ F_SETLK, &flock, NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+out:
+ return ret;
+}
+
+int32_t
+afr_unlock (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (transaction_lk_op (local)) {
+ if (is_afr_lock_transaction (local))
+ afr_unlock_inodelk (frame, this);
+ else
+ afr_unlock_entrylk (frame, this);
+
+ } else {
+ if (is_afr_lock_selfheal (local))
+ afr_unlock_inodelk (frame, this);
+ else
+ afr_unlock_entrylk (frame, this);
+ }
+
+ return 0;
+}
+
+int
+afr_lk_transfer_datalock (call_frame_t *dst, call_frame_t *src, char *dom,
+ unsigned int child_count)
+{
+ afr_local_t *dst_local = NULL;
+ afr_local_t *src_local = NULL;
+ afr_internal_lock_t *dst_lock = NULL;
+ afr_internal_lock_t *src_lock = NULL;
+ afr_inodelk_t *dst_inodelk = NULL;
+ afr_inodelk_t *src_inodelk = NULL;
+ int ret = -1;
+
+ src_local = src->local;
+ src_lock = &src_local->internal_lock;
+ src_inodelk = afr_get_inodelk (src_lock, dom);
+ dst_local = dst->local;
+ dst_lock = &dst_local->internal_lock;
+ dst_inodelk = afr_get_inodelk (dst_lock, dom);
+ if (!dst_inodelk || !src_inodelk)
+ goto out;
+ if (src_inodelk->locked_nodes) {
+ memcpy (dst_inodelk->locked_nodes, src_inodelk->locked_nodes,
+ sizeof (*dst_inodelk->locked_nodes) * child_count);
+ memset (src_inodelk->locked_nodes, 0,
+ sizeof (*src_inodelk->locked_nodes) * child_count);
+ }
+
+ dst_lock->transaction_lk_type = src_lock->transaction_lk_type;
+ dst_lock->selfheal_lk_type = src_lock->selfheal_lk_type;
+ dst_inodelk->lock_count = src_inodelk->lock_count;
+ src_inodelk->lock_count = 0;
+ ret = 0;
+out:
+ return ret;
+}
diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h
index 27117c1848c..7f7962013d7 100644
--- a/xlators/cluster/afr/src/afr-mem-types.h
+++ b/xlators/cluster/afr/src/afr-mem-types.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -26,20 +17,35 @@
enum gf_afr_mem_types_ {
gf_afr_mt_iovec = gf_common_mt_end + 1,
gf_afr_mt_afr_fd_ctx_t,
- gf_afr_mt_afr_local_t,
gf_afr_mt_afr_private_t,
gf_afr_mt_int32_t,
gf_afr_mt_char,
gf_afr_mt_xattr_key,
gf_afr_mt_dict_t,
gf_afr_mt_xlator_t,
- gf_afr_mt_stat,
+ gf_afr_mt_iatt,
gf_afr_mt_int,
gf_afr_mt_afr_node_character,
gf_afr_mt_sh_diff_loop_state,
gf_afr_mt_uint8_t,
gf_afr_mt_loc_t,
gf_afr_mt_entry_name,
+ gf_afr_mt_pump_priv,
+ gf_afr_mt_locked_fd,
+ gf_afr_mt_inode_ctx_t,
+ gf_afr_fd_paused_call_t,
+ gf_afr_mt_crawl_data_t,
+ gf_afr_mt_brick_pos_t,
+ gf_afr_mt_shd_bool_t,
+ gf_afr_mt_shd_timer_t,
+ gf_afr_mt_shd_event_t,
+ gf_afr_mt_time_t,
+ gf_afr_mt_pos_data_t,
+ gf_afr_mt_reply_t,
+ gf_afr_mt_subvol_healer_t,
+ gf_afr_mt_spbc_timeout_t,
+ gf_afr_mt_spb_status_t,
+ gf_afr_mt_empty_brick_t,
gf_afr_mt_end
};
#endif
diff --git a/xlators/cluster/afr/src/afr-messages.h b/xlators/cluster/afr/src/afr-messages.h
new file mode 100644
index 00000000000..c7af18d0f25
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-messages.h
@@ -0,0 +1,373 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
+
+#ifndef _AFR_MESSAGES_H_
+#define _AFR_MESSAGES_H_
+
+#include "glfs-message-id.h"
+
+/*! \file afr-messages.h
+ * \brief AFR log-message IDs and their descriptions.
+ */
+
+/* NOTE: Rules for message additions
+ * 1) Each instance of a message is _better_ left with a unique message ID, even
+ * if the message format is the same. Reasoning is that, if the message
+ * format needs to change in one instance, the other instances are not
+ * impacted or the new change does not change the ID of the instance being
+ * modified.
+ * 2) Addition of a message,
+ * - Should increment the GLFS_NUM_MESSAGES
+ * - Append to the list of messages defined, towards the end
+ * - Retain macro naming as glfs_msg_X (for redability across developers)
+ * NOTE: Rules for message format modifications
+ * 3) Check acorss the code if the message ID macro in question is reused
+ * anywhere. If reused then then the modifications should ensure correctness
+ * everywhere, or needs a new message ID as (1) above was not adhered to. If
+ * not used anywhere, proceed with the required modification.
+ * NOTE: Rules for message deletion
+ * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used
+ * anywhere, then can be deleted, but will leave a hole by design, as
+ * addition rules specify modification to the end of the list and not filling
+ * holes.
+ */
+
+#define GLFS_COMP_BASE_AFR GLFS_MSGID_COMP_AFR
+#define GLFS_NUM_MESSAGES 42
+#define GLFS_MSGID_END (GLFS_COMP_BASE_AFR + GLFS_NUM_MESSAGES + 1)
+
+#define glfs_msg_start_x GLFS_COMP_BASE_AFR, "Invalid: Start of messages"
+
+/*!
+ * @messageid 108001
+ * @diagnosis Client quorum is not met due to which file modification
+ * operations are disallowed.
+ * @recommendedaction Some brick processes are down/ not visible from the
+ * client. Ensure that the bricks are up/ network traffic is not blocked.
+ */
+#define AFR_MSG_QUORUM_FAIL (GLFS_COMP_BASE_AFR + 1)
+
+
+/*!
+ * @messageid 108002
+ * @diagnosis The bricks that were down are now up and quorum is restored.
+ * @recommendedaction Possibly check why the bricks went down to begin with.
+ */
+#define AFR_MSG_QUORUM_MET (GLFS_COMP_BASE_AFR + 2)
+
+
+/*!
+ * @messageid 108003
+ * @diagnosis Client quorum-type was set to auto due to which the quorum-count
+ * option is no longer valid.
+ * @recommendedaction None.
+ */
+#define AFR_MSG_QUORUM_OVERRIDE (GLFS_COMP_BASE_AFR + 3)
+
+
+/*!
+ * @messageid 108004
+ * @diagnosis Replication sub volume witnessed a connection notification
+ * from a brick which does not belong to its replica set.
+ * @recommendedaction None. This is a safety check in code.
+ */
+#define AFR_MSG_INVALID_CHILD_UP (GLFS_COMP_BASE_AFR + 4)
+
+
+/*!
+ * @messageid 108005
+ * @diagnosis A replica set that was inaccessible because all its bricks were
+ * down is now accessible because at least one of its bricks came back up.
+ * @recommendedaction Possibly check why all the bricks of that replica set
+ * went down to begin with.
+ */
+#define AFR_MSG_SUBVOL_UP (GLFS_COMP_BASE_AFR + 5)
+
+
+/*!
+ * @messageid 108006
+ * @diagnosis All bricks of a replica set are down. Data residing in that
+ * replica cannot be accessed until one of the bricks come back up.
+ * @recommendedaction Ensure that the bricks are up.
+ */
+#define AFR_MSG_ALL_SUBVOLS_DOWN (GLFS_COMP_BASE_AFR + 6)
+
+
+/*!
+ * @messageid 108007
+ * @diagnosis Entry unlocks failed on a brick.
+ * @recommendedaction Error number in the log should give the reason why it
+ * failed. Also observe brick logs for more information.
+*/
+#define AFR_MSG_ENTRY_UNLOCK_FAIL (GLFS_COMP_BASE_AFR + 7)
+
+
+/*!
+ * @messageid 108008
+ * @diagnosis There is an inconsistency in the file's data/metadata/gfid
+ * amongst the bricks of a replica set.
+ * @recommendedaction Resolve the split brain by clearing the AFR changelog
+ * attributes from the appropriate brick and trigger self-heal.
+ */
+#define AFR_MSG_SPLIT_BRAIN (GLFS_COMP_BASE_AFR + 8)
+
+
+/*!
+ * @messageid 108009
+ * @diagnosis open/opendir failed on a brick.
+ * @recommendedaction Error number in the log should give the reason why it
+ * failed. Also observe brick logs for more information.
+ */
+#define AFR_MSG_OPEN_FAIL (GLFS_COMP_BASE_AFR + 9)
+
+
+/*!
+ * @messageid 108010
+ * @diagnosis Inode unlocks failed on a brick.
+ * @recommendedaction Error number in the log should give the reason why it
+ * failed. Also observe brick logs for more information.
+*/
+#define AFR_MSG_INODE_UNLOCK_FAIL (GLFS_COMP_BASE_AFR + 10)
+
+/*!
+ * @messageid 108011
+ * @diagnosis Setting of pending xattrs succeeded/failed during replace-brick
+ * operation.
+ * @recommendedaction In case of failure, error number in the log should give
+ * the reason why it failed. Also observe brick logs for more information.
+*/
+#define AFR_MSG_REPLACE_BRICK_STATUS (GLFS_COMP_BASE_AFR + 11)
+
+/*!
+ * @messageid 108012
+ * @diagnosis
+ * @recommendedaction
+*/
+#define AFR_MSG_GFID_NULL (GLFS_COMP_BASE_AFR + 12)
+
+/*!
+ * @messageid 108013
+ * @diagnosis
+ * @recommendedaction
+*/
+#define AFR_MSG_FD_CREATE_FAILED (GLFS_COMP_BASE_AFR + 13)
+
+/*!
+ * @messageid 108014
+ * @diagnosis
+ * @recommendedaction
+*/
+#define AFR_MSG_DICT_SET_FAILED (GLFS_COMP_BASE_AFR + 14)
+
+/*!
+ * @messageid 108015
+ * @diagnosis
+ * @recommendedaction
+*/
+#define AFR_MSG_EXPUNGING_FILE_OR_DIR (GLFS_COMP_BASE_AFR + 15)
+
+/*!
+ * @messageid 108016
+ * @diagnosis
+ * @recommendedaction
+*/
+#define AFR_MSG_MIGRATION_IN_PROGRESS (GLFS_COMP_BASE_AFR + 16)
+
+/*!
+ * @messageid 108017
+ * @diagnosis
+ * @recommendedaction
+*/
+#define AFR_MSG_CHILD_MISCONFIGURED (GLFS_COMP_BASE_AFR + 17)
+
+/*!
+ * @messageid 108018
+ * @diagnosis
+ * @recommendedaction
+*/
+#define AFR_MSG_VOL_MISCONFIGURED (GLFS_COMP_BASE_AFR + 18)
+
+/*!
+ * @messageid 108019
+ * @diagnosis
+ * @recommendedaction
+*/
+#define AFR_MSG_BLOCKING_LKS_FAILED (GLFS_COMP_BASE_AFR + 19)
+
+/*!
+ * @messageid 108020
+ * @diagnosis
+ * @recommendedaction
+*/
+#define AFR_MSG_INVALID_FD (GLFS_COMP_BASE_AFR + 20)
+
+/*!
+ * @messageid 108021
+ * @diagnosis
+ * @recommendedaction
+*/
+#define AFR_MSG_LOCK_INFO (GLFS_COMP_BASE_AFR + 21)
+
+/*!
+ * @messageid 108022
+ * @diagnosis
+ * @recommendedaction
+*/
+#define AFR_MSG_LOCK_XLATOR_NOT_LOADED (GLFS_COMP_BASE_AFR + 22)
+
+/*!
+ * @messageid 108023
+ * @diagnosis
+ * @recommendedaction
+*/
+#define AFR_MSG_FD_CTX_GET_FAILED (GLFS_COMP_BASE_AFR + 23)
+
+/*!
+ * @messageid 108024
+ * @diagnosis
+ * @recommendedaction
+*/
+#define AFR_MSG_INVALID_SUBVOL (GLFS_COMP_BASE_AFR + 24)
+
+/*!
+ * @messageid 108025
+ * @diagnosis
+ * @recommendedaction
+*/
+#define AFR_MSG_PUMP_XLATOR_ERROR (GLFS_COMP_BASE_AFR + 25)
+
+/*!
+ * @messageid 108026
+ * @diagnosis
+ * @recommendedaction
+*/
+#define AFR_MSG_SELF_HEAL_INFO (GLFS_COMP_BASE_AFR + 26)
+
+/*!
+ * @messageid 108027
+ * @diagnosis
+ * @recommendedaction
+*/
+#define AFR_MSG_READ_SUBVOL_ERROR (GLFS_COMP_BASE_AFR + 27)
+
+/*!
+ * @messageid 108028
+ * @diagnosis
+ * @recommendedaction
+*/
+#define AFR_MSG_DICT_GET_FAILED (GLFS_COMP_BASE_AFR + 28)
+
+
+/*!
+ * @messageid 108029
+ * @diagnosis
+ * @recommendedaction
+*/
+#define AFR_MSG_INFO_COMMON (GLFS_COMP_BASE_AFR + 29)
+
+/*!
+ * @messageid 108030
+ * @diagnosis
+ * @recommendedaction
+*/
+#define AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR (GLFS_COMP_BASE_AFR + 30)
+
+/*!
+ * @messageid 108031
+ * @diagnosis
+ * @recommendedaction
+*/
+#define AFR_MSG_LOCAL_CHILD (GLFS_COMP_BASE_AFR + 31)
+
+/*!
+ * @messageid 108032
+ * @diagnosis
+ * @recommendedaction
+*/
+#define AFR_MSG_INVALID_DATA (GLFS_COMP_BASE_AFR + 32)
+
+/*!
+ * @messageid 108033
+ * @diagnosis
+ * @recommendedaction
+*/
+#define AFR_MSG_INVALID_ARG (GLFS_COMP_BASE_AFR + 33)
+
+/*!
+ * @messageid 108034
+ * @diagnosis
+ * @recommendedaction
+*/
+#define AFR_MSG_INDEX_DIR_GET_FAILED (GLFS_COMP_BASE_AFR + 34)
+
+/*!
+ * @messageid 108035
+ * @diagnosis
+ * @recommendedaction
+*/
+#define AFR_MSG_FSYNC_FAILED (GLFS_COMP_BASE_AFR + 35)
+
+/*!
+ * @messageid 108036
+ * @diagnosis
+ * @recommendedaction
+*/
+#define AFR_MSG_FAVORITE_CHILD (GLFS_COMP_BASE_AFR + 36)
+/*!
+ * @messageid 108037
+ * @diagnosis
+ * @recommendedaction
+*/
+#define AFR_MSG_SELF_HEAL_FAILED (GLFS_COMP_BASE_AFR + 37)
+
+/*!
+ * @messageid 108038
+ * @diagnosis
+ * @recommendedaction
+*/
+#define AFR_MSG_SPLIT_BRAIN_STATUS (GLFS_COMP_BASE_AFR + 38)
+
+/*!
+ * @messageid 108039
+ * @diagnosis Setting of pending xattrs succeeded/failed during add-brick
+ * operation.
+ * @recommendedaction In case of failure, error number in the log should give
+ * the reason why it failed. Also observe brick logs for more information.
+*/
+#define AFR_MSG_ADD_BRICK_STATUS (GLFS_COMP_BASE_AFR + 39)
+
+
+/*!
+ * @messageid 108040
+ * @diagnosis AFR was unable to be loaded because the pending-changelog xattrs
+ * were not found in the volfile.
+ * @recommendedaction Please ensure cluster op-version is atleast 30707 and the
+ * volfiles are regenerated.
+*/
+#define AFR_MSG_NO_CHANGELOG (GLFS_COMP_BASE_AFR + 40)
+
+/*!
+ * @messageid 108041
+ * @diagnosis Unable to create timer thread for delayed initialization.
+ * @recommendedaction Possibly check process's log file for messages from
+ * timer infra.
+*/
+#define AFR_MSG_TIMER_CREATE_FAIL (GLFS_COMP_BASE_AFR + 41)
+
+/*!
+ * @messageid 108042
+ * @diagnosis Log messages relating to automated resolution of split-brain files
+ * based on favorite child policies.
+ * @recommendedaction
+*/
+#define AFR_MSG_SBRAIN_FAV_CHILD_POLICY (GLFS_COMP_BASE_AFR + 42)
+
+#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
+#endif /* !_AFR_MESSAGES_H_ */
diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c
index 1bfeb9cdfb8..059d3f9bd71 100644
--- a/xlators/cluster/afr/src/afr-open.c
+++ b/xlators/cluster/afr/src/afr-open.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <libgen.h>
@@ -24,11 +15,6 @@
#include <stdlib.h>
#include <signal.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "glusterfs.h"
#include "afr.h"
#include "dict.h"
@@ -53,435 +39,291 @@
#include "afr-dir-write.h"
#include "afr-transaction.h"
-#include "afr-self-heal.h"
-
-int
-afr_open_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+gf_boolean_t
+afr_is_fd_fixable (fd_t *fd)
{
- afr_local_t * local = frame->local;
-
- AFR_STACK_UNWIND (open, frame, local->op_ret, local->op_errno,
- local->fd);
- return 0;
-}
-
-
-int
-afr_open_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- fd_t *fd)
-{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int child_index = (long) cookie;
-
- uint64_t ctx;
- afr_fd_ctx_t *fd_ctx;
-
- int ret = 0;
-
- int call_count = -1;
-
- priv = this->private;
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- }
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- local->success_count++;
-
- ret = afr_fd_ctx_set (this, fd);
-
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not set fd ctx for fd=%p",
- fd);
-
- local->op_ret = -1;
- local->op_errno = -ret;
- }
-
- ret = fd_ctx_get (fd, this, &ctx);
-
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not get fd ctx for fd=%p", fd);
- local->op_ret = -1;
- local->op_errno = -ret;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- fd_ctx->opened_on[child_index] = 1;
- fd_ctx->flags = local->cont.open.flags;
- fd_ctx->wbflags = local->cont.open.wbflags;
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if ((local->cont.open.flags & O_TRUNC)
- && (local->op_ret >= 0)) {
- STACK_WIND (frame, afr_open_ftruncate_cbk,
- this, this->fops->ftruncate,
- fd, 0);
- } else {
- AFR_STACK_UNWIND (open, frame, local->op_ret,
- local->op_errno, local->fd);
- }
- }
-
- return 0;
+ if (!fd || !fd->inode)
+ return _gf_false;
+ else if (fd_is_anonymous (fd))
+ return _gf_false;
+ else if (gf_uuid_is_null (fd->inode->gfid))
+ return _gf_false;
+
+ return _gf_true;
}
int
-afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int32_t wbflags)
+afr_open_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
-
- int i = 0;
- int ret = -1;
-
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int32_t wind_flags = flags & (~O_TRUNC);
+ afr_local_t * local = frame->local;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (loc, out);
-
- priv = this->private;
-
- if (afr_is_split_brain (this, loc->inode)) {
- /* self-heal failed */
- op_errno = EIO;
- goto out;
- }
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- frame->local = local;
- call_count = local->call_count;
-
- loc_copy (&local->loc, loc);
-
- local->cont.open.flags = flags;
- local->cont.open.wbflags = wbflags;
-
- local->fd = fd_ref (fd);
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_open_cbk, (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->open,
- loc, wind_flags, fd, wbflags);
-
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (open, frame, op_ret, op_errno, fd);
- }
-
- return 0;
+ AFR_STACK_UNWIND (open, frame, local->op_ret, local->op_errno,
+ local->fd, xdata);
+ return 0;
}
int
-afr_up_down_flush_open_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- fd_t *fd)
+afr_open_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ fd_t *fd, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int ret = 0;
-
- uint64_t ctx;
- afr_fd_ctx_t *fd_ctx;
+ afr_local_t * local = NULL;
+ int call_count = -1;
+ int child_index = (long) cookie;
+ afr_fd_ctx_t *fd_ctx = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
-
- priv = this->private;
local = frame->local;
+ fd_ctx = local->fd_ctx;
LOCK (&frame->lock);
{
- if (op_ret >= 0) {
- ret = fd_ctx_get (fd, this, &ctx);
-
- if (ret < 0) {
- goto out;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- fd_ctx->opened_on[child_index] = 1;
-
- gf_log (this->name, GF_LOG_TRACE,
- "fd for %s opened successfully on subvolume %s",
- local->loc.path, priv->children[child_index]->name);
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
+ } else {
+ local->op_ret = op_ret;
+ fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref (xdata);
}
}
-out:
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
if (call_count == 0) {
- local->transaction.post_post_op (frame, this);
+ if ((fd_ctx->flags & O_TRUNC) && (local->op_ret >= 0)) {
+ STACK_WIND (frame, afr_open_ftruncate_cbk,
+ this, this->fops->ftruncate,
+ fd, 0, NULL);
+ } else {
+ AFR_STACK_UNWIND (open, frame, local->op_ret,
+ local->op_errno, local->fd,
+ local->xdata_rsp);
+ }
}
return 0;
}
-
-static int
-__unopened_count (int child_count, unsigned char *opened_on, unsigned char *child_up)
-{
- int i;
- int count = 0;
-
- for (i = 0; i < child_count; i++) {
- if (!opened_on[i] && child_up[i])
- count++;
- }
-
- return count;
-}
-
-
int
-afr_up_down_flush_sh_unwind (call_frame_t *frame, xlator_t *this)
+afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_errno = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
- uint64_t ctx;
- afr_fd_ctx_t *fd_ctx;
+ //We can't let truncation to happen outside transaction.
- int abandon = 0;
- int ret = 0;
- int i;
- int call_count = 0;
-
- priv = this->private;
- local = frame->local;
+ priv = this->private;
- /*
- * Some subvolumes might have come up on which we never
- * opened this fd in the first place. Re-open fd's on those
- * subvolumes now.
- */
-
- ret = fd_ctx_get (local->fd, this, &ctx);
-
- if (ret < 0) {
- abandon = 1;
- goto out;
- }
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- call_count = __unopened_count (priv->child_count, fd_ctx->opened_on,
- local->child_up);
+ local->fd = fd_ref (fd);
+ local->fd_ctx = fd_ctx;
+ fd_ctx->flags = flags;
- if (call_count == 0) {
- abandon = 1;
- goto out;
- }
+ call_count = local->call_count;
- local->call_count = call_count;
+ local->cont.open.flags = flags;
for (i = 0; i < priv->child_count; i++) {
- if (!fd_ctx->opened_on[i] && local->child_up[i]) {
- gf_log (this->name, GF_LOG_TRACE,
- "opening fd for %s on subvolume %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_up_down_flush_open_cbk,
- (void *)(long) i,
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_open_cbk, (void *) (long) i,
priv->children[i],
priv->children[i]->fops->open,
- &local->loc, fd_ctx->flags, local->fd,
- fd_ctx->wbflags);
-
+ loc, (flags & ~O_TRUNC), fd, xdata);
if (!--call_count)
break;
}
}
+ return 0;
out:
- if (abandon)
- local->transaction.post_post_op (frame, this);
+ AFR_STACK_UNWIND (open, frame, -1, op_errno, fd, NULL);
return 0;
}
-
int
-afr_up_down_flush_post_post_op (call_frame_t *frame, xlator_t *this)
+afr_openfd_fix_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- inode_path (local->fd->inode, NULL, (char **)&local->loc.path);
- local->loc.name = strrchr (local->loc.path, '/');
- local->loc.inode = inode_ref (local->fd->inode);
- local->loc.parent = inode_parent (local->fd->inode, 0, NULL);
-
- /* forcibly trigger missing-entries self-heal */
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int call_count = 0;
+ int child_index = (long) cookie;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (op_ret >= 0) {
+ gf_msg_debug (this->name, 0, "fd for %s opened "
+ "successfully on subvolume %s", local->loc.path,
+ priv->children[child_index]->name);
+ } else {
+ gf_msg (this->name, fop_log_level (GF_FOP_OPEN, op_errno),
+ op_errno, AFR_MSG_OPEN_FAIL, "Failed to open %s on "
+ "subvolume %s", local->loc.path,
+ priv->children[child_index]->name);
+ }
- local->success_count = 1;
- local->enoent_count = 1;
+ fd_ctx = local->fd_ctx;
- sh->data_lock_held = _gf_true;
- sh->need_data_self_heal = _gf_true;
- sh->type = local->fd->inode->ia_type;
- sh->background = _gf_false;
- sh->unwind = afr_up_down_flush_sh_unwind;
+ LOCK (&local->fd->lock);
+ {
+ if (op_ret >= 0) {
+ fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
+ } else {
+ fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
+ }
+ }
+ UNLOCK (&local->fd->lock);
- afr_self_heal (frame, this);
+ call_count = afr_frame_return (frame);
+ if (call_count == 0)
+ AFR_STACK_DESTROY (frame);
return 0;
}
-int
-afr_up_down_flush_wind (call_frame_t *frame, xlator_t *this)
+static int
+afr_fd_ctx_need_open (fd_t *fd, xlator_t *this, unsigned char *need_open)
{
- afr_local_t *local = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
afr_private_t *priv = NULL;
+ int i = 0;
+ int count = 0;
- local = frame->local;
- priv = this->private;
+ priv = this->private;
- local->transaction.resume (frame, this);
- return 0;
-}
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx)
+ return 0;
+ LOCK (&fd->lock);
+ {
+ for (i = 0; i < priv->child_count; i++) {
+ if (fd_ctx->opened_on[i] == AFR_FD_NOT_OPENED &&
+ priv->child_up[i]) {
+ fd_ctx->opened_on[i] = AFR_FD_OPENING;
+ need_open[i] = 1;
+ count++;
+ } else {
+ need_open[i] = 0;
+ }
+ }
+ }
+ UNLOCK (&fd->lock);
-int
-afr_up_down_flush_done (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
+ return count;
+}
- uint64_t ctx;
- afr_fd_ctx_t * fd_ctx = NULL;
- int _ret = -1;
- int i = 0;
+void
+afr_fix_open (fd_t *fd, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int32_t op_errno = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ unsigned char *need_open = NULL;
+ int call_count = 0;
priv = this->private;
- local = frame->local;
-
- LOCK (&local->fd->lock);
- {
- _ret = __fd_ctx_get (local->fd, this, &ctx);
-
- if (_ret < 0) {
- goto out;
- }
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- fd_ctx->down_count = priv->down_count;
- fd_ctx->up_count = priv->up_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i])
- fd_ctx->pre_op_done[i] = 0;
- }
- }
-out:
- UNLOCK (&local->fd->lock);
+ if (!afr_is_fd_fixable (fd))
+ goto out;
- afr_local_transaction_cleanup (local, this);
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx)
+ goto out;
- local->up_down_flush_cbk (frame, this);
-
- return 0;
-}
+ need_open = alloca0 (priv->child_count);
+ call_count = afr_fd_ctx_need_open (fd, this, need_open);
+ if (!call_count)
+ goto out;
-int
-afr_up_down_flush (call_frame_t *frame, xlator_t *this, fd_t *fd,
- afr_flush_type type)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ goto out;
- int op_ret = -1;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ local->loc.inode = inode_ref (fd->inode);
+ ret = loc_path (&local->loc, NULL);
+ if (ret < 0)
+ goto out;
- priv = this->private;
+ local->fd = fd_ref (fd);
+ local->fd_ctx = fd_ctx;
- local = frame->local;
+ local->call_count = call_count;
- local->op = GF_FOP_FLUSH;
+ gf_msg_debug (this->name, 0, "need open count: %d",
+ call_count);
-// local->fd = fd_ref (local->fd);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!need_open[i])
+ continue;
- local->transaction.fop = afr_up_down_flush_wind;
- local->transaction.done = afr_up_down_flush_done;
+ if (IA_IFDIR == fd->inode->ia_type) {
+ gf_msg_debug (this->name, 0,
+ "opening fd for dir %s on subvolume %s",
+ local->loc.path, priv->children[i]->name);
- switch (type) {
- case AFR_CHILD_UP_FLUSH:
- local->transaction.post_post_op = afr_up_down_flush_post_post_op;
- break;
+ STACK_WIND_COOKIE (frame, afr_openfd_fix_open_cbk,
+ (void*) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->opendir,
+ &local->loc, local->fd,
+ NULL);
+ } else {
+ gf_msg_debug (this->name, 0,
+ "opening fd for file %s on subvolume %s",
+ local->loc.path, priv->children[i]->name);
+
+ STACK_WIND_COOKIE (frame, afr_openfd_fix_open_cbk,
+ (void *)(long) i,
+ priv->children[i],
+ priv->children[i]->fops->open,
+ &local->loc,
+ fd_ctx->flags & (~O_TRUNC),
+ local->fd, NULL);
+ }
- case AFR_CHILD_DOWN_FLUSH:
- local->transaction.post_post_op = NULL;
- break;
+ if (!--call_count)
+ break;
}
- local->transaction.start = 0;
- local->transaction.len = 0;
-
- gf_log (this->name, GF_LOG_TRACE,
- "doing up/down flush on fd=%p",
- fd);
-
- afr_transaction (frame, this, AFR_FLUSH_TRANSACTION);
-
- op_ret = 0;
+ return;
out:
- return 0;
+ if (frame)
+ AFR_STACK_DESTROY (frame);
}
diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c
new file mode 100644
index 00000000000..74749f029c8
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-read-txn.c
@@ -0,0 +1,268 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "afr.h"
+#include "afr-transaction.h"
+#include "afr-messages.h"
+
+int
+afr_read_txn_next_subvol (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int subvol = -1;
+
+ local = frame->local;
+ priv = this->private;
+
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->readable[i]) {
+ /* don't even bother trying here.
+ just mark as attempted and move on. */
+ local->read_attempted[i] = 1;
+ continue;
+ }
+
+ if (!local->read_attempted[i]) {
+ subvol = i;
+ break;
+ }
+ }
+
+ /* If no more subvols were available for reading, we leave
+ @subvol as -1, which is an indication we have run out of
+ readable subvols. */
+ if (subvol != -1)
+ local->read_attempted[subvol] = 1;
+ local->readfn (frame, this, subvol);
+
+ return 0;
+}
+
+#define AFR_READ_TXN_SET_ERROR_AND_GOTO(ret, errnum, index, label) \
+ do { \
+ local->op_ret = ret; \
+ local->op_errno = errnum; \
+ read_subvol = index; \
+ gf_msg (this->name, GF_LOG_ERROR, EIO, AFR_MSG_SPLIT_BRAIN,\
+ "Failing %s on gfid %s: split-brain observed.",\
+ gf_fop_list[local->op], uuid_utoa (inode->gfid));\
+ goto label; \
+ } while (0)
+
+int
+afr_read_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err)
+{
+ afr_local_t *local = NULL;
+ int read_subvol = 0;
+ int event_generation = 0;
+ inode_t *inode = NULL;
+ int ret = -1;
+ int spb_choice = -1;
+
+ local = frame->local;
+ inode = local->inode;
+
+ if (err) {
+ local->op_errno = -err;
+ local->op_ret = -1;
+ read_subvol = -1;
+ goto readfn;
+ }
+
+ ret = afr_inode_get_readable (frame, inode, this, local->readable,
+ &event_generation,
+ local->transaction.type);
+
+ if (ret == -EIO || !event_generation)
+ /* Even after refresh, we don't have a good
+ read subvolume. Time to bail */
+ AFR_READ_TXN_SET_ERROR_AND_GOTO (-1, EIO, -1, readfn);
+
+ read_subvol = afr_read_subvol_select_by_policy (inode, this,
+ local->readable, NULL);
+ if (read_subvol == -1)
+ AFR_READ_TXN_SET_ERROR_AND_GOTO (-1, EIO, -1, readfn);
+
+ if (local->read_attempted[read_subvol]) {
+ afr_read_txn_next_subvol (frame, this);
+ return 0;
+ }
+
+ local->read_attempted[read_subvol] = 1;
+readfn:
+ if (read_subvol == -1) {
+ ret = afr_inode_split_brain_choice_get (inode, this,
+ &spb_choice);
+ if ((ret == 0) && spb_choice >= 0)
+ read_subvol = spb_choice;
+ }
+ local->readfn (frame, this, read_subvol);
+
+ return 0;
+}
+
+
+int
+afr_read_txn_continue (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (!local->refreshed) {
+ local->refreshed = _gf_true;
+ afr_inode_refresh (frame, this, local->inode, NULL,
+ afr_read_txn_refresh_done);
+ } else {
+ afr_read_txn_next_subvol (frame, this);
+ }
+
+ return 0;
+}
+
+
+/* afr_read_txn_wipe:
+
+ clean internal variables in @local in order to make
+ it possible to call afr_read_txn() multiple times from
+ the same frame
+*/
+
+void
+afr_read_txn_wipe (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ local->readfn = NULL;
+
+ if (local->inode)
+ inode_unref (local->inode);
+
+ for (i = 0; i < priv->child_count; i++) {
+ local->read_attempted[i] = 0;
+ local->readable[i] = 0;
+ }
+}
+
+
+/*
+ afr_read_txn:
+
+ This is the read transaction function. The way it works:
+
+ - Determine read-subvolume from inode ctx.
+
+ - If read-subvolume's generation was stale, refresh ctx once by
+ calling afr_inode_refresh()
+
+ Else make an attempt to read on read-subvolume.
+
+ - If attempted read on read-subvolume fails, refresh ctx once
+ by calling afr_inode_refresh()
+
+ - After ctx refresh, query read-subvolume freshly and attempt
+ read once.
+
+ - If read fails, try every other readable[] subvolume before
+ finally giving up. readable[] elements are set by afr_inode_refresh()
+ based on dirty and pending flags.
+
+ - If file is in split brain in the backend, generation will be
+ kept 0 by afr_inode_refresh() and readable[] will be set 0 for
+ all elements. Therefore reads always fail.
+*/
+
+int
+afr_read_txn (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ afr_read_txn_wind_t readfn, afr_transaction_type type)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ unsigned char *data = NULL;
+ unsigned char *metadata = NULL;
+ int read_subvol = -1;
+ int event_generation = 0;
+ int ret = -1;
+
+ priv = this->private;
+ local = frame->local;
+ data = alloca0 (priv->child_count);
+ metadata = alloca0 (priv->child_count);
+
+ afr_read_txn_wipe (frame, this);
+
+ local->readfn = readfn;
+ local->inode = inode_ref (inode);
+
+ if (priv->quorum_reads &&
+ priv->quorum_count && !afr_has_quorum (priv->child_up, this)) {
+ local->op_ret = -1;
+ local->op_errno = ENOTCONN;
+ read_subvol = -1;
+ goto read;
+ }
+
+ local->transaction.type = type;
+ ret = afr_inode_read_subvol_get (inode, this, data, metadata,
+ &event_generation);
+ if (ret == -1)
+ /* very first transaction on this inode */
+ goto refresh;
+ AFR_INTERSECT (local->readable, data, metadata, priv->child_count);
+
+ gf_msg_debug (this->name, 0, "%s: generation now vs cached: %d, "
+ "%d", uuid_utoa (inode->gfid), local->event_generation,
+ event_generation);
+ if (local->event_generation != event_generation)
+ /* servers have disconnected / reconnected, and possibly
+ rebooted, very likely changing the state of freshness
+ of copies */
+ goto refresh;
+
+ read_subvol = afr_read_subvol_select_by_policy (inode, this,
+ local->readable, NULL);
+
+ if (read_subvol < 0 || read_subvol > priv->child_count) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, AFR_MSG_SPLIT_BRAIN,
+ "Unreadable subvolume %d found with event generation "
+ "%d for gfid %s. (Possible split-brain)",
+ read_subvol, event_generation, uuid_utoa(inode->gfid));
+ goto refresh;
+ }
+
+ if (!local->child_up[read_subvol]) {
+ /* should never happen, just in case */
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_READ_SUBVOL_ERROR, "subvolume %d is the "
+ "read subvolume in this generation, but is not up",
+ read_subvol);
+ goto refresh;
+ }
+
+ local->read_attempted[read_subvol] = 1;
+
+read:
+ local->readfn (frame, this, read_subvol);
+
+ return 0;
+
+refresh:
+ afr_inode_refresh (frame, this, inode, NULL, afr_read_txn_refresh_done);
+
+ return 0;
+}
diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.c b/xlators/cluster/afr/src/afr-self-heal-algorithm.c
deleted file mode 100644
index ef9d4026e0d..00000000000
--- a/xlators/cluster/afr/src/afr-self-heal-algorithm.c
+++ /dev/null
@@ -1,1074 +0,0 @@
-/*
- Copyright (c) 2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#include "glusterfs.h"
-#include "afr.h"
-#include "xlator.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "byte-order.h"
-#include "md5.h"
-
-#include "afr-transaction.h"
-#include "afr-self-heal.h"
-#include "afr-self-heal-common.h"
-#include "afr-self-heal-algorithm.h"
-
-/*
- This file contains the various self-heal algorithms
-*/
-
-
-/*
- The "full" algorithm. Copies the entire file from
- source to sinks.
-*/
-
-
-static void
-sh_full_private_cleanup (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- afr_sh_algo_full_private_t *sh_priv = NULL;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- sh_priv = sh->private;
-
- if (sh_priv)
- GF_FREE (sh_priv);
-}
-
-
-static int
-sh_full_loop_driver (call_frame_t *frame, xlator_t *this);
-
-static int
-sh_full_loop_return (call_frame_t *rw_frame, xlator_t *this, off_t offset)
-{
- afr_private_t * priv = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t * rw_sh = NULL;
-
- call_frame_t *sh_frame = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_sh_algo_full_private_t *sh_priv = NULL;
-
- priv = this->private;
-
- rw_local = rw_frame->local;
- rw_sh = &rw_local->self_heal;
-
- sh_frame = rw_sh->sh_frame;
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
- sh_priv = sh->private;
-
- LOCK (&sh_priv->lock);
- {
- sh_priv->loops_running--;
- }
- UNLOCK (&sh_priv->lock);
-
- gf_log (this->name, GF_LOG_TRACE,
- "loop for offset %"PRId64" returned", offset);
-
- AFR_STACK_DESTROY (rw_frame);
-
- sh_full_loop_driver (sh_frame, this);
-
- return 0;
-}
-
-
-static int
-sh_full_write_cbk (call_frame_t *rw_frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
-{
- afr_private_t * priv = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t *rw_sh = NULL;
-
- call_frame_t *sh_frame = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
-
- int child_index = (long) cookie;
- int call_count = 0;
-
- priv = this->private;
-
- rw_local = rw_frame->local;
- rw_sh = &rw_local->self_heal;
-
- sh_frame = rw_sh->sh_frame;
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
-
- gf_log (this->name, GF_LOG_TRACE,
- "wrote %d bytes of data from %s to child %d, offset %"PRId64"",
- op_ret, sh_local->loc.path, child_index,
- rw_sh->offset - op_ret);
-
- LOCK (&sh_frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "write to %s failed on subvolume %s (%s)",
- sh_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
-
- sh->op_failed = 1;
- }
- }
- UNLOCK (&sh_frame->lock);
-
- call_count = afr_frame_return (rw_frame);
-
- if (call_count == 0) {
- sh_full_loop_return (rw_frame, this, rw_sh->offset - op_ret);
- }
-
- return 0;
-}
-
-
-static int
-sh_full_read_cbk (call_frame_t *rw_frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iovec *vector, int32_t count, struct iatt *buf,
- struct iobref *iobref)
-{
- afr_private_t * priv = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t *rw_sh = NULL;
-
- call_frame_t *sh_frame = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
-
- int i = 0;
- int call_count = 0;
-
- off_t offset = (long) cookie;
-
- priv = this->private;
- rw_local = rw_frame->local;
- rw_sh = &rw_local->self_heal;
-
- sh_frame = rw_sh->sh_frame;
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
-
- call_count = sh->active_sinks;
-
- rw_local->call_count = call_count;
-
- gf_log (this->name, GF_LOG_TRACE,
- "read %d bytes of data from %s, offset %"PRId64"",
- op_ret, sh_local->loc.path, offset);
-
- if (op_ret <= 0) {
- sh->op_failed = 1;
-
- sh_full_loop_return (rw_frame, this, offset);
- return 0;
- }
-
- rw_sh->offset += op_ret;
-
- if (sh->file_has_holes) {
- if (iov_0filled (vector, count) == 0) {
- /* the iter function depends on the
- sh->offset already being updated
- above
- */
-
- sh_full_loop_return (rw_frame, this, offset);
- goto out;
- }
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] || !sh_local->child_up[i])
- continue;
-
- /* this is a sink, so write to it */
-
- STACK_WIND_COOKIE (rw_frame, sh_full_write_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->writev,
- sh->healing_fd, vector, count, offset,
- iobref);
-
- if (!--call_count)
- break;
- }
-
-out:
- return 0;
-}
-
-
-static int
-sh_full_read_write (call_frame_t *frame, xlator_t *this, off_t offset)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t *rw_sh = NULL;
- afr_self_heal_t *sh = NULL;
-
- call_frame_t *rw_frame = NULL;
-
- int32_t op_errno = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- rw_frame = copy_frame (frame);
- if (!rw_frame)
- goto out;
-
- ALLOC_OR_GOTO (rw_local, afr_local_t, out);
-
- rw_frame->local = rw_local;
- rw_sh = &rw_local->self_heal;
-
- rw_sh->offset = sh->offset;
- rw_sh->sh_frame = frame;
-
- STACK_WIND_COOKIE (rw_frame, sh_full_read_cbk,
- (void *) (long) offset,
- priv->children[sh->source],
- priv->children[sh->source]->fops->readv,
- sh->healing_fd, sh->block_size,
- offset);
- return 0;
-
-out:
- sh->op_failed = 1;
-
- sh_full_loop_driver (frame, this);
-
- return 0;
-}
-
-
-static int
-sh_full_loop_driver (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_sh_algo_full_private_t *sh_priv = NULL;
-
- int loop = 0;
- int recurse = 0;
-
- off_t offset = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
- sh_priv = sh->private;
-
- if (sh->op_failed) {
- if (sh_priv->loops_running == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "full self-heal aborting on %s",
- local->loc.path);
-
- sh_full_private_cleanup (frame, this);
- local->self_heal.algo_abort_cbk (frame, this);
- }
-
- goto out;
- }
-
- if (sh_priv->offset >= sh->file_size) {
- if (sh_priv->loops_running == 0) {
-
- gf_log (this->name, GF_LOG_TRACE,
- "full self-heal completed on %s",
- local->loc.path);
-
- sh_full_private_cleanup (frame, this);
- local->self_heal.algo_completion_cbk (frame, this);
- }
-
- goto out;
- }
-
-spawn:
- loop = 0;
- recurse = 0;
-
- LOCK (&sh_priv->lock);
- {
- if ((sh_priv->loops_running < priv->data_self_heal_window_size)
- && (sh_priv->offset < sh->file_size)) {
-
- gf_log (this->name, GF_LOG_TRACE,
- "spawning a loop for offset %"PRId64,
- sh_priv->offset);
-
- offset = sh_priv->offset;
- sh_priv->offset += sh->block_size;
-
- sh_priv->loops_running++;
-
- loop = 1;
-
- if (sh_priv->offset < sh->file_size)
- recurse = 1;
- }
- }
- UNLOCK (&sh_priv->lock);
-
- if (loop) {
- sh_full_read_write (frame, this, offset);
- if (recurse)
- goto spawn;
- }
-
-out:
- return 0;
-}
-
-
-int
-afr_sh_algo_full (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- afr_sh_algo_full_private_t *sh_priv = NULL;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- sh_priv = GF_CALLOC (1, sizeof (*sh_priv),
- gf_afr_mt_afr_private_t);
-
- LOCK_INIT (&sh_priv->lock);
-
- sh->private = sh_priv;
-
- local->call_count = 0;
-
- sh_full_loop_driver (frame, this);
- return 0;
-}
-
-
-/*
- * The "diff" algorithm. Copies only those blocks whose checksums
- * don't match with those of source.
- */
-
-
-static void
-sh_diff_private_cleanup (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- afr_sh_algo_diff_private_t *sh_priv = NULL;
-
- int i;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- sh_priv = sh->private;
-
- for (i = 0; i < priv->data_self_heal_window_size; i++) {
- if (sh_priv->loops[i]) {
- if (sh_priv->loops[i]->write_needed)
- GF_FREE (sh_priv->loops[i]->write_needed);
-
- if (sh_priv->loops[i]->checksum)
- GF_FREE (sh_priv->loops[i]->checksum);
- }
- }
-
- if (sh_priv) {
- if (sh_priv->loops)
- GF_FREE (sh_priv->loops);
-
- GF_FREE (sh_priv);
- }
-
-
-}
-
-
-static uint32_t
-__make_cookie (int loop_index, int child_index)
-{
- uint32_t ret = (loop_index << 16) | child_index;
- return ret;
-}
-
-
-static int
-__loop_index (uint32_t cookie)
-{
- return (cookie & 0xFFFF0000) >> 16;
-}
-
-
-static int
-__child_index (uint32_t cookie)
-{
- return (cookie & 0x0000FFFF);
-}
-
-
-static void
-sh_diff_loop_state_reset (struct sh_diff_loop_state *loop_state, int child_count)
-{
- loop_state->active = _gf_false;
-// loop_state->offset = 0;
-
- memset (loop_state->write_needed,
- 0, sizeof (*loop_state->write_needed) * child_count);
-
- memset (loop_state->checksum,
- 0, MD5_DIGEST_LEN * child_count);
-}
-
-
-static int
-sh_diff_number_of_writes_needed (unsigned char *write_needed, int child_count)
-{
- int writes = 0;
- int i;
-
- for (i = 0; i < child_count; i++) {
- if (write_needed[i])
- writes++;
- }
-
- return writes;
-}
-
-
-static int
-sh_diff_loop_driver (call_frame_t *frame, xlator_t *this);
-
-
-static int
-sh_diff_loop_return (call_frame_t *rw_frame, xlator_t *this,
- struct sh_diff_loop_state *loop_state)
-{
- afr_private_t * priv = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t * rw_sh = NULL;
-
- call_frame_t *sh_frame = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_sh_algo_diff_private_t *sh_priv = NULL;
-
- priv = this->private;
-
- rw_local = rw_frame->local;
- rw_sh = &rw_local->self_heal;
-
- sh_frame = rw_sh->sh_frame;
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
- sh_priv = sh->private;
-
- gf_log (this->name, GF_LOG_TRACE,
- "loop for offset %"PRId64" returned", loop_state->offset);
-
- LOCK (&sh_priv->lock);
- {
- sh_priv->loops_running--;
- sh_diff_loop_state_reset (loop_state, priv->child_count);
- }
- UNLOCK (&sh_priv->lock);
-
- AFR_STACK_DESTROY (rw_frame);
-
- sh_diff_loop_driver (sh_frame, this);
-
- return 0;
-}
-
-
-static int
-sh_diff_write_cbk (call_frame_t *rw_frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *postbuf)
-{
- afr_private_t * priv = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t * rw_sh = NULL;
-
- call_frame_t *sh_frame = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
-
- afr_sh_algo_diff_private_t *sh_priv;
- struct sh_diff_loop_state *loop_state;
-
- int call_count = 0;
- int child_index = 0;
- int loop_index = 0;
-
- priv = this->private;
- rw_local = rw_frame->local;
- rw_sh = &rw_local->self_heal;
-
- sh_frame = rw_sh->sh_frame;
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
- sh_priv = sh->private;
-
- child_index = __child_index ((uint32_t) (long) cookie);
- loop_index = __loop_index ((uint32_t) (long) cookie);
- loop_state = sh_priv->loops[loop_index];
-
- gf_log (this->name, GF_LOG_TRACE,
- "wrote %d bytes of data from %s to child %d, offset %"PRId64"",
- op_ret, sh_local->loc.path, child_index,
- loop_state->offset);
-
- LOCK (&sh_frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "write to %s failed on subvolume %s (%s)",
- sh_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
-
- sh->op_failed = 1;
- }
- }
- UNLOCK (&sh_frame->lock);
-
- call_count = afr_frame_return (rw_frame);
-
- if (call_count == 0) {
- sh_diff_loop_return (rw_frame, this, loop_state);
- }
-
- return 0;
-}
-
-
-static int
-sh_diff_read_cbk (call_frame_t *rw_frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iovec *vector, int32_t count, struct iatt *buf,
- struct iobref *iobref)
-{
- afr_private_t * priv = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t * rw_sh = NULL;
-
- afr_sh_algo_diff_private_t * sh_priv = NULL;
-
- call_frame_t *sh_frame = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
-
- int loop_index;
- struct sh_diff_loop_state *loop_state;
-
- uint32_t wcookie;
-
- int i = 0;
- int call_count = 0;
-
- priv = this->private;
- rw_local = rw_frame->local;
- rw_sh = &rw_local->self_heal;
-
- sh_frame = rw_sh->sh_frame;
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
- sh_priv = sh->private;
-
- loop_index = __loop_index ((uint32_t) (long) cookie);
- loop_state = sh_priv->loops[loop_index];
-
- call_count = sh_diff_number_of_writes_needed (loop_state->write_needed,
- priv->child_count);
-
- rw_local->call_count = call_count;
-
- gf_log (this->name, GF_LOG_TRACE,
- "read %d bytes of data from %s, offset %"PRId64"",
- op_ret, sh_local->loc.path, sh->offset);
-
- if ((op_ret <= 0) ||
- (call_count == 0)) {
- sh_diff_loop_return (rw_frame, this, loop_state);
-
- return 0;
- }
-
- if (sh->file_has_holes) {
- if (iov_0filled (vector, count) == 0) {
-
- sh_diff_loop_return (rw_frame, this, loop_state);
- goto out;
- }
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (loop_state->write_needed[i]) {
- wcookie = __make_cookie (loop_index, i);
-
- STACK_WIND_COOKIE (rw_frame, sh_diff_write_cbk,
- (void *) (long) wcookie,
- priv->children[i],
- priv->children[i]->fops->writev,
- sh->healing_fd, vector, count,
- loop_state->offset, iobref);
-
- if (!--call_count)
- break;
- }
- }
-
-out:
- return 0;
-}
-
-
-static int
-sh_diff_read (call_frame_t *rw_frame, xlator_t *this,
- int loop_index)
-{
- afr_private_t * priv = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t * rw_sh = NULL;
-
- afr_sh_algo_diff_private_t * sh_priv = NULL;
- struct sh_diff_loop_state *loop_state;
-
- call_frame_t *sh_frame = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
-
- uint32_t cookie;
-
- priv = this->private;
- rw_local = rw_frame->local;
- rw_sh = &rw_local->self_heal;
-
- sh_frame = rw_sh->sh_frame;
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
- sh_priv = sh->private;
-
- loop_state = sh_priv->loops[loop_index];
-
- cookie = __make_cookie (loop_index, sh->source);
-
- STACK_WIND_COOKIE (rw_frame, sh_diff_read_cbk,
- (void *) (long) cookie,
- priv->children[sh->source],
- priv->children[sh->source]->fops->readv,
- sh->healing_fd, sh_priv->block_size,
- loop_state->offset);
-
- return 0;
-}
-
-
-static int
-sh_diff_checksum_cbk (call_frame_t *rw_frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- uint32_t weak_checksum, uint8_t *strong_checksum)
-{
- afr_private_t * priv = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t *rw_sh = NULL;
-
- call_frame_t *sh_frame = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
-
- afr_sh_algo_diff_private_t * sh_priv = NULL;
-
- int loop_index = 0;
- int child_index = 0;
- struct sh_diff_loop_state *loop_state;
-
- int call_count = 0;
- int i = 0;
- int write_needed = 0;
-
- priv = this->private;
-
- rw_local = rw_frame->local;
- rw_sh = &rw_local->self_heal;
-
- sh_frame = rw_sh->sh_frame;
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
-
- sh_priv = sh->private;
-
- child_index = __child_index ((uint32_t) (long) cookie);
- loop_index = __loop_index ((uint32_t) (long) cookie);
-
- loop_state = sh_priv->loops[loop_index];
-
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "checksum on %s failed on subvolume %s (%s)",
- sh_local->loc.path, priv->children[child_index]->name,
- strerror (op_errno));
-
- sh->op_failed = 1;
- } else {
- memcpy (loop_state->checksum + child_index * MD5_DIGEST_LEN,
- strong_checksum,
- MD5_DIGEST_LEN);
- }
-
- call_count = afr_frame_return (rw_frame);
-
- if (call_count == 0) {
- for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] || !sh_local->child_up[i])
- continue;
-
- if (memcmp (loop_state->checksum + (i * MD5_DIGEST_LEN),
- loop_state->checksum + (sh->source * MD5_DIGEST_LEN),
- MD5_DIGEST_LEN)) {
- /*
- Checksums differ, so this block
- must be written to this sink
- */
-
- gf_log (this->name, GF_LOG_TRACE,
- "checksum on subvolume %s at offset %"
- PRId64" differs from that on source",
- priv->children[i]->name, loop_state->offset);
-
- write_needed = loop_state->write_needed[i] = 1;
- }
- }
-
- LOCK (&sh_priv->lock);
- {
- sh_priv->total_blocks++;
- if (write_needed)
- sh_priv->diff_blocks++;
- }
- UNLOCK (&sh_priv->lock);
-
- if (write_needed && !sh->op_failed) {
- sh_diff_read (rw_frame, this, loop_index);
- } else {
- sh->offset += sh_priv->block_size;
-
- sh_diff_loop_return (rw_frame, this, loop_state);
- }
- }
-
- return 0;
-}
-
-
-static int
-sh_diff_find_unused_loop (afr_sh_algo_diff_private_t *sh_priv, int max)
-{
- int i;
-
- LOCK (&sh_priv->lock);
- {
- for (i = 0; i < max; i++) {
- if (sh_priv->loops[i]->active == _gf_false) {
- sh_priv->loops[i]->active = _gf_true;
- break;
- }
- }
- }
- UNLOCK (&sh_priv->lock);
-
- if (i == max) {
- gf_log ("[sh-diff]", GF_LOG_ERROR,
- "no free loops found! This shouldn't happen. Please"
- " report this to gluster-devel@nongnu.org");
- }
-
- return i;
-}
-
-
-static int
-sh_diff_checksum (call_frame_t *frame, xlator_t *this, off_t offset)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t * sh = NULL;
- afr_self_heal_t * rw_sh = NULL;
-
- afr_sh_algo_diff_private_t * sh_priv = NULL;
-
- call_frame_t *rw_frame = NULL;
-
- uint32_t cookie;
- int loop_index = 0;
- struct sh_diff_loop_state *loop_state = NULL;
-
- int32_t op_errno = 0;
-
- int call_count = 0;
- int i = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- sh_priv = sh->private;
-
- rw_frame = copy_frame (frame);
- if (!rw_frame)
- goto out;
-
- ALLOC_OR_GOTO (rw_local, afr_local_t, out);
-
- rw_frame->local = rw_local;
- rw_sh = &rw_local->self_heal;
-
- rw_sh->offset = sh->offset;
- rw_sh->sh_frame = frame;
-
- call_count = sh->active_sinks + 1; /* sinks and source */
-
- rw_local->call_count = call_count;
-
- loop_index = sh_diff_find_unused_loop (sh_priv, priv->data_self_heal_window_size);
-
- loop_state = sh_priv->loops[loop_index];
- loop_state->offset = offset;
-
- /* we need to send both the loop index and child index,
- so squeeze them both into a 32-bit number */
-
- cookie = __make_cookie (loop_index, sh->source);
-
- STACK_WIND_COOKIE (rw_frame, sh_diff_checksum_cbk,
- (void *) (long) cookie,
- priv->children[sh->source],
- priv->children[sh->source]->fops->rchecksum,
- sh->healing_fd,
- offset, sh_priv->block_size);
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] || !local->child_up[i])
- continue;
-
- cookie = __make_cookie (loop_index, i);
-
- STACK_WIND_COOKIE (rw_frame, sh_diff_checksum_cbk,
- (void *) (long) cookie,
- priv->children[i],
- priv->children[i]->fops->rchecksum,
- sh->healing_fd,
- offset, sh_priv->block_size);
-
- if (!--call_count)
- break;
- }
-
- return 0;
-
-out:
- sh->op_failed = 1;
-
- sh_diff_loop_driver (frame, this);
-
- return 0;
-}
-
-
-static int
-sh_diff_loop_driver (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- afr_sh_algo_diff_private_t *sh_priv = NULL;
-
- int loop = 0;
- int recurse = 0;
-
- off_t offset = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
- sh_priv = sh->private;
-
- if (sh->op_failed) {
- if (sh_priv->loops_running == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "diff self-heal aborting on %s",
- local->loc.path);
-
- sh_diff_private_cleanup (frame, this);
- local->self_heal.algo_abort_cbk (frame, this);
- }
-
- goto out;
- }
-
- if (sh_priv->offset >= sh->file_size) {
- if (sh_priv->loops_running == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "diff self-heal completed on %s",
- local->loc.path);
-
-
- gf_log (this->name, GF_LOG_DEBUG,
- "diff self-heal on %s: %d blocks of %d were different (%.2f%%)",
- local->loc.path, sh_priv->diff_blocks,
- sh_priv->total_blocks,
- ((sh_priv->diff_blocks * 1.0)/sh_priv->total_blocks) * 100);
-
- sh_diff_private_cleanup (frame, this);
- local->self_heal.algo_completion_cbk (frame, this);
- }
-
- goto out;
- }
-
-spawn:
- loop = 0;
- recurse = 0;
-
- LOCK (&sh_priv->lock);
- {
- if ((sh_priv->loops_running < priv->data_self_heal_window_size)
- && (sh_priv->offset < sh->file_size)) {
-
- gf_log (this->name, GF_LOG_TRACE,
- "spawning a loop for offset %"PRId64,
- sh_priv->offset);
-
- offset = sh_priv->offset;
- sh_priv->offset += sh_priv->block_size;
-
- sh_priv->loops_running++;
-
- loop = 1;
-
- if (sh_priv->offset < sh->file_size)
- recurse = 1;
- }
- }
- UNLOCK (&sh_priv->lock);
-
- if (loop) {
- sh_diff_checksum (frame, this, offset);
- if (recurse)
- goto spawn;
- }
-
-out:
- return 0;
-}
-
-
-int
-afr_sh_algo_diff (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- afr_sh_algo_diff_private_t *sh_priv = NULL;
-
- int i;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- sh_priv = GF_CALLOC (1, sizeof (*sh_priv),
- gf_afr_mt_afr_private_t);
-
- sh_priv->block_size = this->ctx->page_size;
-
- sh->private = sh_priv;
-
- LOCK_INIT (&sh_priv->lock);
-
- local->call_count = 0;
-
- sh_priv->loops = GF_CALLOC (priv->data_self_heal_window_size,
- sizeof (*sh_priv->loops),
- gf_afr_mt_sh_diff_loop_state);
-
- for (i = 0; i < priv->data_self_heal_window_size; i++) {
- sh_priv->loops[i] = GF_CALLOC (1, sizeof (*sh_priv->loops[i]),
- gf_afr_mt_sh_diff_loop_state);
-
- sh_priv->loops[i]->checksum = GF_CALLOC (priv->child_count,
- MD5_DIGEST_LEN, gf_afr_mt_uint8_t);
- sh_priv->loops[i]->write_needed = GF_CALLOC (priv->child_count,
- sizeof (*sh_priv->loops[i]->write_needed),
- gf_afr_mt_char);
- }
-
- sh_diff_loop_driver (frame, this);
-
- return 0;
-}
-
-
-struct afr_sh_algorithm afr_self_heal_algorithms[] = {
- {.name = "full", .fn = afr_sh_algo_full},
- {.name = "diff", .fn = afr_sh_algo_diff},
- {0, 0},
-};
diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.h b/xlators/cluster/afr/src/afr-self-heal-algorithm.h
deleted file mode 100644
index 0bdae3aa77f..00000000000
--- a/xlators/cluster/afr/src/afr-self-heal-algorithm.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- Copyright (c) 2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef __AFR_SELF_HEAL_ALGORITHM_H__
-#define __AFR_SELF_HEAL_ALGORITHM_H__
-
-
-typedef int (*afr_sh_algo_fn) (call_frame_t *frame,
- xlator_t *this);
-
-struct afr_sh_algorithm {
- const char *name;
- afr_sh_algo_fn fn;
-};
-
-extern struct afr_sh_algorithm afr_self_heal_algorithms[3];
-
-typedef struct {
- gf_lock_t lock;
- unsigned int loops_running;
- off_t offset;
-} afr_sh_algo_full_private_t;
-
-struct sh_diff_loop_state {
- off_t offset;
- unsigned char *write_needed;
- uint8_t *checksum;
- gf_boolean_t active;
-};
-
-typedef struct {
- size_t block_size;
-
- gf_lock_t lock;
- unsigned int loops_running;
- off_t offset;
-
- int32_t total_blocks;
- int32_t diff_blocks;
-
- struct sh_diff_loop_state **loops;
-} afr_sh_algo_diff_private_t;
-
-#endif /* __AFR_SELF_HEAL_ALGORITHM_H__ */
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 70b6cf85153..a4c0e89e434 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -1,1621 +1,2042 @@
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#include "glusterfs.h"
-#include "xlator.h"
-#include "byte-order.h"
#include "afr.h"
-#include "afr-transaction.h"
-#include "afr-self-heal-common.h"
#include "afr-self-heal.h"
+#include "byte-order.h"
+#include "protocol-common.h"
+#include "afr-messages.h"
-
-/**
- * select_source - select a source and return it
- */
+void
+afr_heal_synctask (xlator_t *this, afr_local_t *local);
int
-afr_sh_select_source (int sources[], int child_count)
+afr_selfheal_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
{
- int i;
- for (i = 0; i < child_count; i++)
- if (sources[i])
- return i;
+ afr_local_t *local = NULL;
- return -1;
-}
+ local = frame->local;
+ syncbarrier_wake (&local->barrier);
+
+ return 0;
+}
-/**
- * sink_count - return number of sinks in sources array
- */
int
-afr_sh_sink_count (int sources[], int child_count)
+afr_selfheal_post_op (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ int subvol, dict_t *xattr, dict_t *xdata)
{
- int i;
- int sinks = 0;
- for (i = 0; i < child_count; i++)
- if (!sources[i])
- sinks++;
- return sinks;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ loc_t loc = {0, };
+
+ priv = this->private;
+ local = frame->local;
+
+ loc.inode = inode_ref (inode);
+ gf_uuid_copy (loc.gfid, inode->gfid);
+
+ STACK_WIND (frame, afr_selfheal_post_op_cbk, priv->children[subvol],
+ priv->children[subvol]->fops->xattrop, &loc,
+ GF_XATTROP_ADD_ARRAY, xattr, xdata);
+
+ syncbarrier_wait (&local->barrier, 1);
+
+ loc_wipe (&loc);
+
+ return 0;
}
int
-afr_sh_source_count (int sources[], int child_count)
+afr_check_stale_error (struct afr_reply *replies, afr_private_t *priv)
{
- int i;
- int nsource = 0;
+ int i = 0;
+ int op_errno = 0;
+ int tmp_errno = 0;
+ int stale_count = 0;
- for (i = 0; i < child_count; i++)
- if (sources[i])
- nsource++;
- return nsource;
+ for (i = 0; i < priv->child_count; i++) {
+ tmp_errno = replies[i].op_errno;
+ if (tmp_errno == ENOENT || tmp_errno == ESTALE) {
+ op_errno = afr_higher_errno (op_errno, tmp_errno);
+ stale_count++;
+ }
+ }
+ if (stale_count != priv->child_count)
+ return -ENOTCONN;
+ else
+ return -op_errno;
}
-int
-afr_sh_supress_errenous_children (int sources[], int child_errno[],
- int child_count)
+dict_t *
+afr_selfheal_output_xattr (xlator_t *this, gf_boolean_t is_full_crawl,
+ afr_transaction_type type, int *output_dirty,
+ int **output_matrix, int subvol,
+ int **full_heal_mtx_out)
{
- int i = 0;
+ int j = 0;
+ int idx = 0;
+ int d_idx = 0;
+ int ret = 0;
+ int *raw = 0;
+ dict_t *xattr = NULL;
+ afr_private_t *priv = NULL;
- for (i = 0; i < child_count; i++) {
- if (child_errno[i] && sources[i]) {
- sources[i] = 0;
- }
+ priv = this->private;
+ idx = afr_index_for_transaction_type (type);
+ d_idx = afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
+
+ xattr = dict_new ();
+ if (!xattr)
+ return NULL;
+
+ /* clear dirty */
+ raw = GF_CALLOC (sizeof(int), AFR_NUM_CHANGE_LOGS, gf_afr_mt_int32_t);
+ if (!raw)
+ goto err;
+
+ raw[idx] = hton32 (output_dirty[subvol]);
+ ret = dict_set_bin (xattr, AFR_DIRTY, raw,
+ sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret) {
+ GF_FREE (raw);
+ goto err;
+ }
+
+ /* clear/set pending */
+ for (j = 0; j < priv->child_count; j++) {
+ raw = GF_CALLOC (sizeof(int), AFR_NUM_CHANGE_LOGS,
+ gf_afr_mt_int32_t);
+ if (!raw)
+ goto err;
+
+ raw[idx] = hton32 (output_matrix[subvol][j]);
+ if (is_full_crawl)
+ raw[d_idx] = hton32 (full_heal_mtx_out[subvol][j]);
+
+ ret = dict_set_bin (xattr, priv->pending_key[j],
+ raw, sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret) {
+ GF_FREE (raw);
+ goto err;
+ }
}
- return 0;
+ return xattr;
+err:
+ if (xattr)
+ dict_unref (xattr);
+ return NULL;
}
-void
-afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this)
+int
+afr_selfheal_undo_pending (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ unsigned char *sources, unsigned char *sinks,
+ unsigned char *healed_sinks, afr_transaction_type type,
+ struct afr_reply *replies, unsigned char *locked_on)
{
- afr_private_t * priv = this->private;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+ int j = 0;
+ unsigned char *pending = NULL;
+ int *input_dirty = NULL;
+ int **input_matrix = NULL;
+ int **full_heal_mtx_in = NULL;
+ int **full_heal_mtx_out = NULL;
+ int *output_dirty = NULL;
+ int **output_matrix = NULL;
+ dict_t *xattr = NULL;
+ dict_t *xdata = NULL;
- char *buf = NULL;
- char *ptr = NULL;
+ priv = this->private;
+ local = frame->local;
+
+ pending = alloca0 (priv->child_count);
- int i, j;
+ input_dirty = alloca0 (priv->child_count * sizeof (int));
+ input_matrix = ALLOC_MATRIX (priv->child_count, int);
+ full_heal_mtx_in = ALLOC_MATRIX (priv->child_count, int);
+ full_heal_mtx_out = ALLOC_MATRIX (priv->child_count, int);
+ output_dirty = alloca0 (priv->child_count * sizeof (int));
+ output_matrix = ALLOC_MATRIX (priv->child_count, int);
- /* 10 digits per entry + 1 space + '[' and ']' */
- buf = GF_MALLOC (priv->child_count * 11 + 8, gf_afr_mt_char);
+ xdata = dict_new ();
+ if (!xdata)
+ return -1;
+
+ afr_selfheal_extract_xattr (this, replies, type, input_dirty,
+ input_matrix);
+
+ if (local->need_full_crawl)
+ afr_selfheal_extract_xattr (this, replies, AFR_DATA_TRANSACTION,
+ NULL, full_heal_mtx_in);
+
+ for (i = 0; i < priv->child_count; i++)
+ if (sinks[i] && !healed_sinks[i])
+ pending[i] = 1;
for (i = 0; i < priv->child_count; i++) {
- ptr = buf;
- ptr += sprintf (ptr, "[ ");
for (j = 0; j < priv->child_count; j++) {
- ptr += sprintf (ptr, "%d ", pending_matrix[i][j]);
+ if (pending[j]) {
+ output_matrix[i][j] = 1;
+ if (type == AFR_ENTRY_TRANSACTION)
+ full_heal_mtx_out[i][j] = 1;
+ } else {
+ output_matrix[i][j] = -input_matrix[i][j];
+ if (type == AFR_ENTRY_TRANSACTION)
+ full_heal_mtx_out[i][j] = -full_heal_mtx_in[i][j];
+ }
}
- ptr += sprintf (ptr, "]");
- gf_log (this->name, GF_LOG_TRACE,
- "pending_matrix: %s", buf);
}
- GF_FREE (buf);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!pending[i])
+ output_dirty[i] = -input_dirty[i];
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!locked_on[i])
+ /* perform post-op only on subvols we had locked
+ and inspected on.
+ */
+ continue;
+
+ xattr = afr_selfheal_output_xattr (this, local->need_full_crawl,
+ type, output_dirty,
+ output_matrix, i,
+ full_heal_mtx_out);
+ if (!xattr) {
+ continue;
+ }
+
+ if ((type == AFR_ENTRY_TRANSACTION) && (priv->esh_granular)) {
+ if (xdata &&
+ dict_set_int8 (xdata, GF_XATTROP_PURGE_INDEX, 1))
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_DICT_SET_FAILED, "Failed to set"
+ " dict value for %s",
+ GF_XATTROP_PURGE_INDEX);
+ }
+
+ afr_selfheal_post_op (frame, this, inode, i, xattr, xdata);
+ dict_unref (xattr);
+ }
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
}
void
-afr_sh_build_pending_matrix (afr_private_t *priv,
- int32_t *pending_matrix[], dict_t *xattr[],
- int child_count, afr_transaction_type type)
+afr_replies_copy (struct afr_reply *dst, struct afr_reply *src, int count)
{
- int i, j, k;
+ int i = 0;
+ dict_t *xdata = NULL;
+
+ if (dst == src)
+ return;
+
+ for (i = 0; i < count; i++) {
+ dst[i].valid = src[i].valid;
+ dst[i].op_ret = src[i].op_ret;
+ dst[i].op_errno = src[i].op_errno;
+ dst[i].prestat = src[i].prestat;
+ dst[i].poststat = src[i].poststat;
+ dst[i].preparent = src[i].preparent;
+ dst[i].postparent = src[i].postparent;
+ dst[i].preparent2 = src[i].preparent2;
+ dst[i].postparent2 = src[i].postparent2;
+ if (src[i].xdata)
+ xdata = dict_ref (src[i].xdata);
+ else
+ xdata = NULL;
+ if (dst[i].xdata)
+ dict_unref (dst[i].xdata);
+ dst[i].xdata = xdata;
+ memcpy (dst[i].checksum, src[i].checksum,
+ MD5_DIGEST_LENGTH);
+ }
+}
- /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
- int32_t pending[3];
- void *pending_raw = NULL;
- int ret = -1;
- unsigned char *ignorant_subvols = NULL;
+int
+afr_selfheal_fill_dirty (xlator_t *this, int *dirty, int subvol,
+ int idx, dict_t *xdata)
+{
+ void *pending_raw = NULL;
+ int pending[3] = {0, };
- ignorant_subvols = GF_CALLOC (sizeof (*ignorant_subvols), child_count,
- gf_afr_mt_char);
+ if (!dirty)
+ return 0;
- /* start clean */
- for (i = 0; i < child_count; i++) {
- for (j = 0; j < child_count; j++) {
- pending_matrix[i][j] = 0;
- }
- }
+ if (dict_get_ptr (xdata, AFR_DIRTY, &pending_raw))
+ return -1;
- for (i = 0; i < child_count; i++) {
- pending_raw = NULL;
-
- for (j = 0; j < child_count; j++) {
- ret = dict_get_ptr (xattr[i], priv->pending_key[j],
- &pending_raw);
-
- if (ret != 0) {
- /*
- * There is no xattr present. This means this
- * subvolume should be considered an 'ignorant'
- * subvolume.
- */
-
- ignorant_subvols[i] = 1;
- continue;
- }
+ if (!pending_raw)
+ return -1;
- memcpy (pending, pending_raw, sizeof(pending));
- k = afr_index_for_transaction_type (type);
-
- pending_matrix[i][j] = ntoh32 (pending[k]);
- }
- }
+ memcpy (pending, pending_raw, sizeof(pending));
- /*
- * Make all non-ignorant subvols point towards the ignorant
- * subvolumes.
- */
+ dirty[subvol] = ntoh32 (pending[idx]);
- for (i = 0; i < child_count; i++) {
- if (ignorant_subvols[i]) {
- for (j = 0; j < child_count; j++) {
- if (!ignorant_subvols[j])
- pending_matrix[j][i] += 1;
- }
- }
- }
-
- GF_FREE (ignorant_subvols);
+ return 0;
}
-/**
- * mark_sources: Mark all 'source' nodes and return number of source
- * nodes found
- *
- * A node (a row in the pending matrix) belongs to one of
- * three categories:
- *
- * M is the pending matrix.
- *
- * 'innocent' - M[i] is all zeroes
- * 'fool' - M[i] has i'th element = 1 (self-reference)
- * 'wise' - M[i] has i'th element = 0, others are 1 or 0.
- *
- * All 'innocent' nodes are sinks. If all nodes are innocent, no self-heal is
- * needed.
- *
- * A 'wise' node can be a source. If two 'wise' nodes conflict, it is
- * a split-brain. If one wise node refers to the other but the other doesn't
- * refer back, the referrer is a source.
- *
- * All fools are sinks, unless there are no 'wise' nodes. In that case,
- * one of the fools is made a source.
- */
+int
+afr_selfheal_fill_matrix (xlator_t *this, int **matrix, int subvol,
+ int idx, dict_t *xdata)
+{
+ int i = 0;
+ void *pending_raw = NULL;
+ int pending[3] = {0, };
+ afr_private_t *priv = NULL;
-typedef enum {
- AFR_NODE_INNOCENT,
- AFR_NODE_FOOL,
- AFR_NODE_WISE
-} afr_node_type;
+ priv = this->private;
-typedef struct {
- afr_node_type type;
- int wisdom;
-} afr_node_character;
+ if (!matrix)
+ return 0;
+ for (i = 0; i < priv->child_count; i++) {
+ if (dict_get_ptr (xdata, priv->pending_key[i], &pending_raw))
+ continue;
-static int
-afr_sh_is_innocent (int32_t *array, int child_count)
-{
- int i = 0;
- int ret = 1; /* innocent until proven guilty */
+ if (!pending_raw)
+ continue;
- for (i = 0; i < child_count; i++) {
- if (array[i]) {
- ret = 0;
- break;
- }
- }
+ memcpy (pending, pending_raw, sizeof(pending));
- return ret;
+ matrix[subvol][i] = ntoh32 (pending[idx]);
+ }
+
+ return 0;
}
-static int
-afr_sh_is_fool (int32_t *array, int i, int child_count)
+int
+afr_selfheal_extract_xattr (xlator_t *this, struct afr_reply *replies,
+ afr_transaction_type type, int *dirty, int **matrix)
{
- return array[i]; /* fool if accuses itself */
-}
+ afr_private_t *priv = NULL;
+ int i = 0;
+ dict_t *xdata = NULL;
+ int idx = -1;
+ idx = afr_index_for_transaction_type (type);
-static int
-afr_sh_is_wise (int32_t *array, int i, int child_count)
-{
- return !array[i]; /* wise if does not accuse itself */
-}
+ priv = this->private;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].xdata)
+ continue;
-static int
-afr_sh_all_nodes_innocent (afr_node_character *characters,
- int child_count)
-{
- int i = 0;
- int ret = 1;
+ xdata = replies[i].xdata;
- for (i = 0; i < child_count; i++) {
- if (characters[i].type != AFR_NODE_INNOCENT) {
- ret = 0;
- break;
- }
- }
+ afr_selfheal_fill_dirty (this, dirty, i, idx, xdata);
+ afr_selfheal_fill_matrix (this, matrix, i, idx, xdata);
+ }
- return ret;
+ return 0;
}
-
-static int
-afr_sh_wise_nodes_exist (afr_node_character *characters, int child_count)
+/*
+ * If by chance there are multiple sources with differing sizes, select
+ * the largest file as the source.
+ *
+ * This can happen if data was directly modified in the backend or for snapshots
+ */
+void
+afr_mark_largest_file_as_source (xlator_t *this, unsigned char *sources,
+ struct afr_reply *replies)
{
- int i = 0;
- int ret = 0;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ uint64_t size = 0;
- for (i = 0; i < child_count; i++) {
- if (characters[i].type == AFR_NODE_WISE) {
- ret = 1;
- break;
+ /* Find source with biggest file size */
+ priv = this->private;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if (!replies[i].valid || replies[i].op_ret != 0) {
+ sources[i] = 0;
+ continue;
+ }
+ if (size <= replies[i].poststat.ia_size) {
+ size = replies[i].poststat.ia_size;
}
}
- return ret;
+ /* Mark sources with less size as not source */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if (size > replies[i].poststat.ia_size)
+ sources[i] = 0;
+ }
}
+void
+afr_mark_latest_mtime_file_as_source (xlator_t *this, unsigned char *sources,
+ struct afr_reply *replies)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ uint32_t mtime = 0;
+ uint32_t mtime_nsec = 0;
-/*
- * The 'wisdom' of a wise node is 0 if any other wise node accuses it.
- * It is 1 if no other wise node accuses it.
- * Only wise nodes with wisdom 1 are sources.
- *
- * If no nodes with wisdom 1 exist, a split-brain has occured.
- */
+ priv = this->private;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if (!replies[i].valid || replies[i].op_ret != 0) {
+ sources[i] = 0;
+ continue;
+ }
+ if ((mtime < replies[i].poststat.ia_mtime) ||
+ ((mtime == replies[i].poststat.ia_mtime) &&
+ (mtime_nsec < replies[i].poststat.ia_mtime_nsec))) {
+ mtime = replies[i].poststat.ia_mtime;
+ mtime_nsec = replies[i].poststat.ia_mtime_nsec;
+ }
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if ((mtime > replies[i].poststat.ia_mtime) ||
+ ((mtime == replies[i].poststat.ia_mtime) &&
+ (mtime_nsec > replies[i].poststat.ia_mtime_nsec))) {
+ sources[i] = 0;
+ }
+ }
+}
-static void
-afr_sh_compute_wisdom (int32_t *pending_matrix[],
- afr_node_character characters[], int child_count)
+void
+afr_mark_active_sinks (xlator_t *this, unsigned char *sources,
+ unsigned char *locked_on, unsigned char *sinks)
{
int i = 0;
- int j = 0;
+ afr_private_t *priv = NULL;
- for (i = 0; i < child_count; i++) {
- if (characters[i].type == AFR_NODE_WISE) {
- characters[i].wisdom = 1;
+ priv = this->private;
- for (j = 0; j < child_count; j++) {
- if ((characters[j].type == AFR_NODE_WISE)
- && pending_matrix[j][i]) {
-
- characters[i].wisdom = 0;
- }
- }
- }
+ memset (sinks, 0, sizeof (*sinks) * priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i] && locked_on[i])
+ sinks[i] = 1;
}
}
-
-static int
-afr_sh_wise_nodes_conflict (afr_node_character *characters,
- int child_count)
+gf_boolean_t
+afr_dict_contains_heal_op (call_frame_t *frame)
{
- int i = 0;
- int ret = 1;
+ afr_local_t *local = NULL;
+ dict_t *xdata_req = NULL;
+ int ret = 0;
+ int heal_op = -1;
- for (i = 0; i < child_count; i++) {
- if ((characters[i].type == AFR_NODE_WISE)
- && characters[i].wisdom == 1) {
-
- /* There is atleast one bona-fide wise node */
- ret = 0;
- break;
- }
+ local = frame->local;
+ xdata_req = local->xdata_req;
+ ret = dict_get_int32 (xdata_req, "heal-op", &heal_op);
+ if (ret)
+ return _gf_false;
+ if (local->xdata_rsp == NULL) {
+ local->xdata_rsp = dict_new();
+ if (!local->xdata_rsp)
+ return _gf_true;
}
+ ret = dict_set_str (local->xdata_rsp, "sh-fail-msg",
+ "File not in split-brain");
- return ret;
+ return _gf_true;
}
-
-static int
-afr_sh_mark_wisest_as_sources (int sources[],
- afr_node_character *characters,
- int child_count)
+int
+afr_mark_split_brain_source_sinks_by_heal_op (call_frame_t *frame,
+ xlator_t *this, unsigned char *sources,
+ unsigned char *sinks,
+ unsigned char *healed_sinks,
+ unsigned char *locked_on,
+ struct afr_reply *replies,
+ afr_transaction_type type, int heal_op)
{
- int nsources = 0;
-
- int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xdata_req = NULL;
+ dict_t *xdata_rsp = NULL;
+ int ret = 0;
+ int i = 0;
+ char *name = NULL;
+ int source = -1;
- for (i = 0; i < child_count; i++) {
- if (characters[i].wisdom == 1) {
+ local = frame->local;
+ priv = this->private;
+ xdata_req = local->xdata_req;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (locked_on[i])
+ if (sources[i] || !sinks[i] || !healed_sinks[i]) {
+ ret = -1;
+ goto out;
+ }
+ }
+ if (local->xdata_rsp == NULL) {
+ local->xdata_rsp = dict_new();
+ if (!local->xdata_rsp) {
+ ret = -1;
+ goto out;
+ }
+ }
+ xdata_rsp = local->xdata_rsp;
+
+ for (i = 0 ; i < priv->child_count; i++)
+ if (locked_on[i])
sources[i] = 1;
- nsources++;
+ switch (heal_op) {
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE:
+ if (type == AFR_METADATA_TRANSACTION) {
+ ret = dict_set_str (xdata_rsp, "sh-fail-msg",
+ "Use source-brick option to"
+ " heal metadata split-brain");
+ if (!ret)
+ ret = -1;
+ goto out;
+ }
+ afr_mark_largest_file_as_source (this, sources, replies);
+ if (AFR_COUNT (sources, priv->child_count) != 1) {
+ ret = dict_set_str (xdata_rsp, "sh-fail-msg",
+ "No bigger file");
+ if (!ret)
+ ret = -1;
+ goto out;
+ }
+ break;
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME:
+ if (type == AFR_METADATA_TRANSACTION) {
+ ret = dict_set_str (xdata_rsp, "sh-fail-msg",
+ "Use source-brick option to"
+ " heal metadata split-brain");
+ if (!ret)
+ ret = -1;
+ goto out;
+ }
+ afr_mark_latest_mtime_file_as_source (this, sources, replies);
+ if (AFR_COUNT (sources, priv->child_count) != 1) {
+ ret = dict_set_str (xdata_rsp, "sh-fail-msg",
+ "No difference in mtime");
+ if (!ret)
+ ret = -1;
+ goto out;
+ }
+ break;
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK:
+ ret = dict_get_str (xdata_req, "child-name", &name);
+ if (ret)
+ goto out;
+ source = afr_get_child_index_from_name (this, name);
+ if (source < 0) {
+ ret = dict_set_str (xdata_rsp, "sh-fail-msg",
+ "Invalid brick name");
+ if (!ret)
+ ret = -1;
+ goto out;
+ }
+ if (locked_on[source] != 1) {
+ ret = dict_set_str (xdata_rsp, "sh-fail-msg",
+ "Brick is not up");
+ if (!ret)
+ ret = -1;
+ goto out;
+ }
+ memset (sources, 0, sizeof (*sources) * priv->child_count);
+ sources[source] = 1;
+ break;
+ default:
+ ret = -1;
+ goto out;
+ }
+ for (i = 0 ; i < priv->child_count; i++) {
+ if (sources[i]) {
+ source = i;
+ break;
}
}
+ sinks[source] = 0;
+ healed_sinks[source] = 0;
+ ret = source;
+out:
+ if (ret < 0)
+ memset (sources, 0, sizeof (*sources) * priv->child_count);
+ return ret;
- return nsources;
}
-
-static int
-afr_sh_mark_if_size_differs (afr_self_heal_t *sh, int child_count)
+int
+afr_sh_fav_by_majority (xlator_t *this, struct afr_reply *replies,
+ inode_t *inode)
{
- int32_t ** pending_matrix;
- int i, j;
-
- int size_differs = 0;
+ afr_private_t *priv;
+ int vote_count = -1;
+ int fav_child = -1;
+ int i = 0;
+ int k = 0;
- pending_matrix = sh->pending_matrix;
+ priv = this->private;
- for (i = 0; i < child_count; i++) {
- for (j = 0; j < child_count; j++) {
- if (!sh->buf)
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].valid == 1) {
+ gf_msg_debug (this->name, 0, "Child:%s "
+ "mtime_sec = %d, size = %lu for gfid %s",
+ priv->children[i]->name,
+ replies[i].poststat.ia_mtime,
+ replies[i].poststat.ia_size,
+ uuid_utoa (inode->gfid));
+ vote_count = 0;
+ for (k = 0; k < priv->child_count; k++) {
+ if ((replies[k].poststat.ia_mtime ==
+ replies[i].poststat.ia_mtime) &&
+ (replies[k].poststat.ia_size ==
+ replies[i].poststat.ia_size)
+ ) {
+ vote_count++;
+ }
+ }
+ if (vote_count > priv->child_count/2) {
+ fav_child = i;
break;
+ }
+ }
+ }
+ return fav_child;
+}
- if (SIZE_DIFFERS (&sh->buf[i], &sh->buf[j])
- && (pending_matrix[i][j] == 0)
- && (pending_matrix[j][i] == 0)) {
+/*
+ * afr_sh_fav_by_mtime: Choose favorite child by mtime.
+ */
+int
+afr_sh_fav_by_mtime (xlator_t *this, struct afr_reply *replies, inode_t *inode)
+{
+ afr_private_t *priv;
+ int fav_child = -1;
+ int i = 0;
+ uint32_t cmp_mtime = 0;
+ uint32_t cmp_mtime_nsec = 0;
- pending_matrix[i][j] = 1;
- pending_matrix[j][i] = 1;
+ priv = this->private;
- size_differs = 1;
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].valid == 1) {
+ gf_msg_debug (this->name, 0, "Child:%s "
+ "mtime = %d, mtime_nsec = %d for gfid %s",
+ priv->children[i]->name,
+ replies[i].poststat.ia_mtime,
+ replies[i].poststat.ia_mtime_nsec,
+ uuid_utoa (inode->gfid));
+ if (replies[i].poststat.ia_mtime > cmp_mtime) {
+ cmp_mtime = replies[i].poststat.ia_mtime;
+ cmp_mtime_nsec =
+ replies[i].poststat.ia_mtime_nsec;
+ fav_child = i;
+ } else if ((replies[i].poststat.ia_mtime == cmp_mtime)
+ && (replies[i].poststat.ia_mtime_nsec >
+ cmp_mtime_nsec)) {
+ cmp_mtime = replies[i].poststat.ia_mtime;
+ cmp_mtime_nsec =
+ replies[i].poststat.ia_mtime_nsec;
+ fav_child = i;
}
}
}
-
- return size_differs;
+ return fav_child;
}
-
-static int
-afr_sh_mark_biggest_fool_as_source (afr_self_heal_t *sh,
- afr_node_character *characters,
- int child_count)
+/*
+ * afr_sh_fav_by_ctime: Choose favorite child by ctime.
+ */
+int
+afr_sh_fav_by_ctime (xlator_t *this, struct afr_reply *replies, inode_t *inode)
{
+ afr_private_t *priv;
+ int fav_child = -1;
int i = 0;
- int biggest = 0;
+ uint32_t cmp_ctime = 0;
+ uint32_t cmp_ctime_nsec = 0;
- for (i = 0; i < child_count; i++) {
- if (characters[i].type == AFR_NODE_FOOL) {
- biggest = i;
- break;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].valid == 1) {
+ gf_msg_debug (this->name, 0, "Child:%s "
+ "ctime = %d, ctime_nsec = %d for gfid %s",
+ priv->children[i]->name,
+ replies[i].poststat.ia_ctime,
+ replies[i].poststat.ia_ctime_nsec,
+ uuid_utoa (inode->gfid));
+ if (replies[i].poststat.ia_ctime > cmp_ctime) {
+ cmp_ctime = replies[i].poststat.ia_ctime;
+ cmp_ctime_nsec =
+ replies[i].poststat.ia_ctime_nsec;
+ fav_child = i;
+ } else if ((replies[i].poststat.ia_ctime == cmp_ctime)
+ && (replies[i].poststat.ia_ctime_nsec >
+ cmp_ctime_nsec)) {
+ cmp_ctime = replies[i].poststat.ia_ctime;
+ cmp_ctime_nsec =
+ replies[i].poststat.ia_ctime_nsec;
+ fav_child = i;
+ }
}
}
+ return fav_child;
+}
- for (i = 0; i < child_count; i++) {
- if (characters[i].type != AFR_NODE_FOOL)
- continue;
+/*
+ * afr_sh_fav_by_size: Choose favorite child by size.
+ */
+int
+afr_sh_fav_by_size (xlator_t *this, struct afr_reply *replies, inode_t *inode)
+{
+ afr_private_t *priv;
+ int fav_child = -1;
+ int i = 0;
+ uint64_t cmp_sz = 0;
- if (!sh->buf)
- break;
+ priv = this->private;
- if (SIZE_GREATER (&sh->buf[i], &sh->buf[biggest])) {
- biggest = i;
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].valid == 1) {
+ gf_msg_debug (this->name, 0, "Child:%s "
+ "file size = %lu for gfid %s",
+ priv->children[i]->name,
+ replies[i].poststat.ia_size,
+ uuid_utoa (inode->gfid));
+ if (replies[i].poststat.ia_size > cmp_sz) {
+ cmp_sz = replies[i].poststat.ia_size;
+ fav_child = i;
+ }
}
}
-
- sh->sources[biggest] = 1;
-
- return 1;
+ return fav_child;
}
-static int
-afr_sh_mark_biggest_as_source (afr_self_heal_t *sh, int child_count)
+int
+afr_mark_split_brain_source_sinks_by_policy (call_frame_t *frame,
+ xlator_t *this,
+ inode_t *inode,
+ unsigned char *sources,
+ unsigned char *sinks,
+ unsigned char *healed_sinks,
+ unsigned char *locked_on,
+ struct afr_reply *replies,
+ afr_transaction_type type)
{
- int biggest = 0;
- int i;
-
- for (i = 0; i < child_count; i++) {
- if (!sh->buf)
- break;
+ afr_private_t *priv = NULL;
+ int fav_child = -1;
+ char mtime_str[256];
+ char ctime_str[256];
+ char *policy_str = NULL;
+ struct tm *tm_ptr;
+ time_t time;
- if (SIZE_GREATER (&sh->buf[i], &sh->buf[biggest])) {
- biggest = i;
- }
+ priv = this->private;
+ if (priv->fav_child_policy == AFR_FAV_CHILD_BY_MAJORITY) {
+ fav_child = afr_sh_fav_by_majority (this, replies, inode);
+ if (fav_child >= 0)
+ policy_str = "MAJORITY";
+ } else if (priv->fav_child_policy == AFR_FAV_CHILD_BY_MTIME) {
+ fav_child = afr_sh_fav_by_mtime (this, replies, inode);
+ if (fav_child >= 0)
+ policy_str = "MTIME";
+ } else if (priv->fav_child_policy == AFR_FAV_CHILD_BY_CTIME) {
+ fav_child = afr_sh_fav_by_ctime (this, replies, inode);
+ if (fav_child >= 0)
+ policy_str = "CTIME";
+ } else if (priv->fav_child_policy == AFR_FAV_CHILD_BY_SIZE) {
+ fav_child = afr_sh_fav_by_size (this, replies, inode);
+ if (fav_child >= 0)
+ policy_str = "SIZE";
}
- sh->sources[biggest] = 1;
-
- return 1;
+ if (fav_child > priv->child_count - 1) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_SBRAIN_FAV_CHILD_POLICY, "Invalid child (%d) "
+ "selected by policy %s.", fav_child, policy_str);
+ } else if (fav_child >= 0) {
+ time = replies[fav_child].poststat.ia_mtime;
+ tm_ptr = localtime (&time);
+ strftime (mtime_str, sizeof (mtime_str), "%Y-%m-%d %H:%M:%S",
+ tm_ptr);
+ time = replies[fav_child].poststat.ia_ctime;
+ tm_ptr = localtime (&time);
+ strftime (ctime_str, sizeof (ctime_str), "%Y-%m-%d %H:%M:%S",
+ tm_ptr);
+
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_SBRAIN_FAV_CHILD_POLICY, "Source %s "
+ "selected as authentic to resolve conflicting "
+ "data in file (gfid:%s) by %s (%lu bytes @ %s mtime, "
+ "%s ctime).",
+ priv->children[fav_child]->name,
+ uuid_utoa (inode->gfid),
+ policy_str,
+ replies[fav_child].poststat.ia_size,
+ mtime_str,
+ ctime_str);
+
+ sources[fav_child] = 1;
+ sinks[fav_child] = 0;
+ healed_sinks[fav_child] = 0;
+ }
+ return fav_child;
}
-
-static int
-afr_sh_mark_loweia_uid_as_source (afr_self_heal_t *sh, int child_count)
+/* Return a source depending on the type of heal_op, and set sources[source],
+ * sinks[source] and healed_sinks[source] to 1, 0 and 0 respectively. Do so
+ * only if the following condition is met:
+ * ∀i((i ∈ locked_on[] ∧ i=1)==>(sources[i]=0 ∧ sinks[i]=1 ∧ healed_sinks[i]=1))
+ * i.e. for each locked node, sources[node] is 0; healed_sinks[node] and
+ * sinks[node] are 1. This should be the case if the file is in split-brain.
+ */
+int
+afr_mark_split_brain_source_sinks (call_frame_t *frame, xlator_t *this,
+ inode_t *inode,
+ unsigned char *sources,
+ unsigned char *sinks,
+ unsigned char *healed_sinks,
+ unsigned char *locked_on,
+ struct afr_reply *replies,
+ afr_transaction_type type)
{
- uid_t smallest = 0;
- int i;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xdata_req = NULL;
+ int heal_op = -1;
+ int ret = -1;
- for (i = 0; i < child_count; i++) {
- if (!sh->buf)
- break;
+ local = frame->local;
+ priv = this->private;
+ xdata_req = local->xdata_req;
- if (sh->buf[i].ia_uid < sh->buf[smallest].ia_uid) {
- smallest = i;
- }
- }
+ ret = dict_get_int32 (xdata_req, "heal-op", &heal_op);
+ if (ret)
+ goto autoheal;
- sh->sources[smallest] = 1;
+ ret = afr_mark_split_brain_source_sinks_by_heal_op (frame, this,
+ sources, sinks,
+ healed_sinks,
+ locked_on, replies,
+ type, heal_op);
+ return ret;
- return 1;
-}
+autoheal:
+ /* Automatically heal if fav_child_policy is set. */
+ if (priv->fav_child_policy != AFR_FAV_CHILD_NONE) {
+ ret = afr_mark_split_brain_source_sinks_by_policy (frame, this,
+ inode,
+ sources,
+ sinks,
+ healed_sinks,
+ locked_on,
+ replies,
+ type);
+ }
+ return ret;
+}
-int
-afr_sh_mark_sources (afr_self_heal_t *sh, int child_count,
- afr_self_heal_type type)
+gf_boolean_t
+afr_does_witness_exist (xlator_t *this, uint64_t *witness)
{
- int i = 0;
-
- int32_t ** pending_matrix;
- int * sources;
+ int i = 0;
+ afr_private_t *priv = NULL;
- int size_differs = 0;
+ priv = this->private;
- pending_matrix = sh->pending_matrix;
- sources = sh->sources;
+ for (i = 0; i < priv->child_count; i++) {
+ if (witness[i])
+ return _gf_true;
+ }
+ return _gf_false;
+}
- int nsources = 0;
+/*
+ * This function determines if a self-heal is required for a given inode,
+ * and if needed, in what direction.
+ *
+ * locked_on[] is the array representing servers which have been locked and
+ * from which xattrs have been fetched for analysis.
+ *
+ * The output of the function is by filling the arrays sources[] and sinks[].
+ *
+ * sources[i] is set if i'th server is an eligible source for a selfheal.
+ *
+ * sinks[i] is set if i'th server needs to be healed.
+ *
+ * if sources[0..N] are all set, there is no need for a selfheal.
+ *
+ * if sinks[0..N] are all set, the inode is in split brain.
+ *
+ */
- /* stores the 'characters' (innocent, fool, wise) of the nodes */
- afr_node_character *
- characters = GF_CALLOC (sizeof (afr_node_character),
- child_count,
- gf_afr_mt_afr_node_character) ;
+int
+afr_selfheal_find_direction (call_frame_t *frame, xlator_t *this,
+ struct afr_reply *replies,
+ afr_transaction_type type,
+ unsigned char *locked_on, unsigned char *sources,
+ unsigned char *sinks, uint64_t *witness,
+ gf_boolean_t *pflag)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int j = 0;
+ int *dirty = NULL; /* Denotes if dirty xattr is set */
+ int **matrix = NULL;/* Changelog matrix */
+ char *accused = NULL;/* Accused others without any self-accusal */
+ char *pending = NULL;/* Have pending operations on others */
+ char *self_accused = NULL; /* Accused itself */
- /* start clean */
- for (i = 0; i < child_count; i++) {
- sources[i] = 0;
- }
-
- for (i = 0; i < child_count; i++) {
- if (afr_sh_is_innocent (pending_matrix[i], child_count)) {
- characters[i].type = AFR_NODE_INNOCENT;
+ priv = this->private;
- } else if (afr_sh_is_fool (pending_matrix[i], i, child_count)) {
- characters[i].type = AFR_NODE_FOOL;
+ dirty = alloca0 (priv->child_count * sizeof (int));
+ accused = alloca0 (priv->child_count);
+ pending = alloca0 (priv->child_count);
+ self_accused = alloca0 (priv->child_count);
+ matrix = ALLOC_MATRIX(priv->child_count, int);
+ memset (witness, 0, sizeof (*witness) * priv->child_count);
- } else if (afr_sh_is_wise (pending_matrix[i], i, child_count)) {
- characters[i].type = AFR_NODE_WISE;
+ /* First construct the pending matrix for further analysis */
+ afr_selfheal_extract_xattr (this, replies, type, dirty, matrix);
- } else {
- gf_log ("[module:replicate]", GF_LOG_ERROR,
- "Could not determine the state of subvolume %d!"
- " (This message should never appear."
- " Please file a bug report to "
- "<gluster-devel@nongnu.org>.)", i);
+ if (pflag) {
+ for (i = 0; i < priv->child_count; i++) {
+ for (j = 0; j < priv->child_count; j++)
+ if (matrix[i][j])
+ *pflag = _gf_true;
+ if (*pflag)
+ break;
}
}
- if (type == AFR_SELF_HEAL_DATA) {
- size_differs = afr_sh_mark_if_size_differs (sh, child_count);
+ if (afr_success_count (replies,
+ priv->child_count) < AFR_SH_MIN_PARTICIPANTS) {
+ /* Treat this just like locks not being acquired */
+ return -ENOTCONN;
}
- if ((type == AFR_SELF_HEAL_METADATA)
- && afr_sh_all_nodes_innocent (characters, child_count)) {
-
- nsources = afr_sh_mark_loweia_uid_as_source (sh, child_count);
- goto out;
+ /* short list all self-accused */
+ for (i = 0; i < priv->child_count; i++) {
+ if (matrix[i][i])
+ self_accused[i] = 1;
}
- if (afr_sh_all_nodes_innocent (characters, child_count)) {
- if (size_differs) {
- nsources = afr_sh_mark_biggest_as_source (sh,
- child_count);
+ /* Next short list all accused to exclude them from being sources */
+ /* Self-accused can't accuse others as they are FOOLs */
+ for (i = 0; i < priv->child_count; i++) {
+ for (j = 0; j < priv->child_count; j++) {
+ if (matrix[i][j]) {
+ if (!self_accused[i])
+ accused[j] = 1;
+
+ if (i != j)
+ pending[i] = 1;
+ }
+ }
+ }
+
+ /* Short list all non-accused as sources */
+ memset (sources, 0, priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!accused[i] && locked_on[i])
+ sources[i] = 1;
+ }
+
+ /* Everyone accused by non-self-accused sources are sinks */
+ memset (sinks, 0, priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if (self_accused[i])
+ continue;
+ for (j = 0; j < priv->child_count; j++) {
+ if (matrix[i][j])
+ sinks[j] = 1;
}
+ }
- } else if (afr_sh_wise_nodes_exist (characters, child_count)) {
- afr_sh_compute_wisdom (pending_matrix, characters, child_count);
+ /* For breaking ties provide with number of fops they witnessed */
- if (afr_sh_wise_nodes_conflict (characters, child_count)) {
- /* split-brain */
+ /*
+ * count the pending fops witnessed from itself to others when it is
+ * self-accused
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!self_accused[i])
+ continue;
+ for (j = 0; j < priv->child_count; j++) {
+ if (i == j)
+ continue;
+ witness[i] += matrix[i][j];
+ }
+ }
- nsources = -1;
- goto out;
+ /* If no sources, all locked nodes are sinks - split brain */
+ if (AFR_COUNT (sources, priv->child_count) == 0) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (locked_on[i])
+ sinks[i] = 1;
+ }
+ }
- } else {
- nsources = afr_sh_mark_wisest_as_sources (sources,
- characters,
- child_count);
+ /* One more class of witness similar to dirty in v2 is where no pending
+ * exists but we have self-accusing markers. This can happen in afr-v1
+ * if the brick crashes just after doing xattrop on self but
+ * before xattrop on the other xattrs on the brick in pre-op. */
+ if (AFR_COUNT (pending, priv->child_count) == 0) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (self_accused[i])
+ witness[i] += matrix[i][i];
}
} else {
- nsources = afr_sh_mark_biggest_fool_as_source (sh, characters,
- child_count);
+ /* In afr-v1 if a file is self-accused and has pending
+ * operations on others then it is similar to 'dirty' in afr-v2.
+ * Consider such cases as witness.
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if (self_accused[i] && pending[i])
+ witness[i] += matrix[i][i];
+ }
}
-out:
- GF_FREE (characters);
- return nsources;
-}
+ /* count the number of dirty fops witnessed */
+ for (i = 0; i < priv->child_count; i++)
+ witness[i] += dirty[i];
+ return 0;
+}
void
-afr_sh_pending_to_delta (afr_private_t *priv, dict_t **xattr,
- int32_t *delta_matrix[], int success[],
- int child_count, afr_transaction_type type)
+afr_log_selfheal (uuid_t gfid, xlator_t *this, int ret, char *type,
+ int source, unsigned char *sources,
+ unsigned char *healed_sinks)
{
- int i = 0;
- int j = 0;
- int k = 0;
+ char *status = NULL;
+ char *sinks_str = NULL;
+ char *p = NULL;
+ char *sources_str = NULL;
+ char *q = NULL;
+ afr_private_t *priv = NULL;
+ gf_loglevel_t loglevel = GF_LOG_NONE;
+ int i = 0;
- /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
- int32_t pending[3];
- void * pending_raw = NULL;
- int ret = 0;
+ priv = this->private;
+ sinks_str = alloca0 (priv->child_count * 8);
+ p = sinks_str;
+ sources_str = alloca0 (priv->child_count * 8);
+ q = sources_str;
+ for (i = 0; i < priv->child_count; i++) {
+ if (healed_sinks[i])
+ p += sprintf (p, "%d ", i);
+ if (sources[i]) {
+ if (source == i) {
+ q += sprintf (q, "[%d] ", i);
+ } else {
+ q += sprintf (q, "%d ", i);
+ }
+ }
+ }
- /* start clean */
- for (i = 0; i < child_count; i++) {
- for (j = 0; j < child_count; j++) {
- delta_matrix[i][j] = 0;
- }
- }
+ if (ret < 0) {
+ status = "Failed";
+ loglevel = GF_LOG_DEBUG;
+ } else {
+ status = "Completed";
+ loglevel = GF_LOG_INFO;
+ }
- for (i = 0; i < child_count; i++) {
- pending_raw = NULL;
+ gf_msg (this->name, loglevel, 0,
+ AFR_MSG_SELF_HEAL_INFO, "%s %s selfheal on %s. "
+ "sources=%s sinks=%s", status, type, uuid_utoa (gfid),
+ sources_str, sinks_str);
+}
- for (j = 0; j < child_count; j++) {
- ret = dict_get_ptr (xattr[i], priv->pending_key[j],
- &pending_raw);
+int
+afr_selfheal_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *parbuf)
+{
+ afr_local_t *local = NULL;
+ int i = -1;
+ GF_UNUSED int ret = -1;
+ int8_t need_heal = 1;
- if (!success[j])
- continue;
+ local = frame->local;
+ i = (long) cookie;
+
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ if (buf)
+ local->replies[i].poststat = *buf;
+ if (parbuf)
+ local->replies[i].postparent = *parbuf;
+ if (xdata) {
+ local->replies[i].xdata = dict_ref (xdata);
+ ret = dict_get_int8 (xdata, "link-count", &need_heal);
+ local->replies[i].need_heal = need_heal;
+ } else {
+ local->replies[i].need_heal = need_heal;
+ }
- k = afr_index_for_transaction_type (type);
-
- if (pending_raw) {
- memcpy (pending, pending_raw, sizeof(pending));
- delta_matrix[i][j] = -(ntoh32 (pending[k]));
- } else {
- delta_matrix[i][j] = 0;
- }
+ syncbarrier_wake (&local->barrier);
- }
- }
+ return 0;
}
-int
-afr_sh_delta_to_xattr (afr_private_t *priv,
- int32_t *delta_matrix[], dict_t *xattr[],
- int child_count, afr_transaction_type type)
+inode_t *
+afr_selfheal_unlocked_lookup_on (call_frame_t *frame, inode_t *parent,
+ const char *name, struct afr_reply *replies,
+ unsigned char *lookup_on, dict_t *xattr)
{
- int i = 0;
- int j = 0;
- int k = 0;
+ loc_t loc = {0, };
+ dict_t *xattr_req = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ inode_t *inode = NULL;
- int ret = 0;
+ local = frame->local;
+ priv = frame->this->private;
- int32_t *pending = 0;
+ xattr_req = dict_new ();
+ if (!xattr_req)
+ return NULL;
- for (i = 0; i < child_count; i++) {
- if (!xattr[i])
- continue;
+ if (xattr)
+ dict_copy (xattr, xattr_req);
- for (j = 0; j < child_count; j++) {
- pending = GF_CALLOC (sizeof (int32_t), 3,
- gf_afr_mt_int32_t);
- /* 3 = data+metadata+entry */
+ if (afr_xattr_req_prepare (frame->this, xattr_req) != 0) {
+ dict_destroy (xattr_req);
+ return NULL;
+ }
- k = afr_index_for_transaction_type (type);
+ inode = inode_new (parent->table);
+ if (!inode) {
+ dict_destroy (xattr_req);
+ return NULL;
+ }
- pending[k] = hton32 (delta_matrix[i][j]);
+ loc.parent = inode_ref (parent);
+ gf_uuid_copy (loc.pargfid, parent->gfid);
+ loc.name = name;
+ loc.inode = inode_ref (inode);
- ret = dict_set_bin (xattr[i], priv->pending_key[j],
- pending,
- 3 * sizeof (int32_t));
- }
- }
+ AFR_ONLIST (lookup_on, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ xattr_req);
- return 0;
-}
+ afr_replies_copy (replies, local->replies, priv->child_count);
+ loc_wipe (&loc);
+ dict_unref (xattr_req);
+
+ return inode;
+}
int
-afr_sh_has_metadata_pending (dict_t *xattr, int child_count, xlator_t *this)
+afr_selfheal_unlocked_discover_on (call_frame_t *frame, inode_t *inode,
+ uuid_t gfid, struct afr_reply *replies,
+ unsigned char *discover_on)
{
+ loc_t loc = {0, };
+ dict_t *xattr_req = NULL;
+ afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
- int32_t pending[3];
- void *pending_raw = NULL;
- int ret = -1;
- int i = 0;
- int j = 0;
+ local = frame->local;
+ priv = frame->this->private;
- priv = this->private;
+ xattr_req = dict_new ();
+ if (!xattr_req)
+ return -ENOMEM;
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_get_ptr (xattr, priv->pending_key[i],
- &pending_raw);
+ if (afr_xattr_req_prepare (frame->this, xattr_req) != 0) {
+ dict_destroy (xattr_req);
+ return -ENOMEM;
+ }
- if (ret != 0)
- return 0;
+ loc.inode = inode_ref (inode);
+ gf_uuid_copy (loc.gfid, gfid);
- memcpy (pending, pending_raw, sizeof(pending));
- j = afr_index_for_transaction_type (AFR_METADATA_TRANSACTION);
+ AFR_ONLIST (discover_on, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ xattr_req);
- if (pending[j])
- return 1;
- }
+ afr_replies_copy (replies, local->replies, priv->child_count);
+
+ loc_wipe (&loc);
+ dict_unref (xattr_req);
return 0;
}
-
int
-afr_sh_has_data_pending (dict_t *xattr, int child_count, xlator_t *this)
+afr_selfheal_unlocked_discover (call_frame_t *frame, inode_t *inode,
+ uuid_t gfid, struct afr_reply *replies)
{
afr_private_t *priv = NULL;
- /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
- int32_t pending[3];
- void *pending_raw = NULL;
- int ret = -1;
- int i = 0;
- int j = 0;
+ priv = frame->this->private;
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_get_ptr (xattr, priv->pending_key[i],
- &pending_raw);
-
- if (ret != 0)
- return 0;
-
- memcpy (pending, pending_raw, sizeof(pending));
- j = afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
+ return afr_selfheal_unlocked_discover_on (frame, inode, gfid, replies,
+ priv->child_up);
+}
- if (pending[j])
- return 1;
- }
+unsigned int
+afr_success_count (struct afr_reply *replies, unsigned int count)
+{
+ int i = 0;
+ unsigned int success = 0;
- return 0;
+ for (i = 0; i < count; i++)
+ if (replies[i].valid && replies[i].op_ret == 0)
+ success++;
+ return success;
}
-
int
-afr_sh_has_entry_pending (dict_t *xattr, int child_count, xlator_t *this)
+afr_selfheal_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
- int32_t pending[3];
- void *pending_raw = NULL;
-
- int ret = -1;
- int i = 0;
- int j = 0;
-
- priv = this->private;
+ afr_local_t *local = NULL;
+ int i = 0;
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_get_ptr (xattr, priv->pending_key[i],
- &pending_raw);
+ local = frame->local;
+ i = (long) cookie;
- if (ret != 0)
- return 0;
-
- memcpy (pending, pending_raw, sizeof(pending));
- j = afr_index_for_transaction_type (AFR_ENTRY_TRANSACTION);
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
- if (pending[j])
- return 1;
- }
+ syncbarrier_wake (&local->barrier);
return 0;
}
-/**
- * is_matrix_zero - return true if pending matrix is all zeroes
- */
-
int
-afr_sh_is_matrix_zero (int32_t *pending_matrix[], int child_count)
+afr_locked_fill (call_frame_t *frame, xlator_t *this,
+ unsigned char *locked_on)
{
- int i, j;
-
- for (i = 0; i < child_count; i++)
- for (j = 0; j < child_count; j++)
- if (pending_matrix[i][j])
- return 0;
- return 1;
-}
-
-
-int
-afr_sh_missing_entries_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int count = 0;
local = frame->local;
- sh = &local->self_heal;
priv = this->private;
-// memset (sh->child_errno, 0, sizeof (int) * priv->child_count);
- memset (sh->buf, 0, sizeof (struct stat) * priv->child_count);
-
- for (i = 0; i < priv->child_count; i++) {
- sh->locked_nodes[i] = 0;
- }
-
for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i])
- dict_unref (sh->xattr[i]);
- sh->xattr[i] = NULL;
- }
-
- if (local->govinda_gOvinda) {
- gf_log (this->name, GF_LOG_TRACE,
- "aborting selfheal of %s",
- local->loc.path);
- sh->completion_cbk (frame, this);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "proceeding to metadata check on %s",
- local->loc.path);
- afr_self_heal_metadata (frame, this);
+ if (local->replies[i].valid && local->replies[i].op_ret == 0) {
+ locked_on[i] = 1;
+ count++;
+ } else {
+ locked_on[i] = 0;
+ }
}
- return 0;
+ return count;
}
int
-sh_missing_entries_unlck_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+afr_selfheal_tryinodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, off_t off, size_t size,
+ unsigned char *locked_on)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
+ loc_t loc = {0,};
+ struct gf_flock flock = {0, };
+ loc.inode = inode_ref (inode);
+ gf_uuid_copy (loc.gfid, inode->gfid);
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ flock.l_type = F_WRLCK;
+ flock.l_start = off;
+ flock.l_len = size;
- LOCK (&frame->lock);
- {
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
+ AFR_ONALL (frame, afr_selfheal_lock_cbk, inodelk, dom,
+ &loc, F_SETLK, &flock, NULL);
- if (call_count == 0) {
- afr_sh_missing_entries_done (frame, this);
- }
+ loc_wipe (&loc);
- return 0;
+ return afr_locked_fill (frame, this, locked_on);
}
-
-static int
-sh_missing_entries_finish (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int i = 0;
- int call_count = 0;
- afr_self_heal_t *sh = NULL;
+int
+afr_selfheal_inodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, off_t off, size_t size,
+ unsigned char *locked_on)
+{
+ loc_t loc = {0,};
+ struct gf_flock flock = {0, };
+ afr_local_t *local = NULL;
+ int i = 0;
+ afr_private_t *priv = NULL;
- local = frame->local;
- sh = &local->self_heal;
priv = this->private;
+ local = frame->local;
- for (i = 0; i < priv->child_count; i++) {
- if (sh->locked_nodes[i])
- call_count++;
- }
+ loc.inode = inode_ref (inode);
+ gf_uuid_copy (loc.gfid, inode->gfid);
- if (call_count == 0) {
- afr_sh_missing_entries_done (frame, this);
- return 0;
- }
+ flock.l_type = F_WRLCK;
+ flock.l_start = off;
+ flock.l_len = size;
- local->call_count = call_count;
+ AFR_ONALL (frame, afr_selfheal_lock_cbk, inodelk, dom,
+ &loc, F_SETLK, &flock, NULL);
for (i = 0; i < priv->child_count; i++) {
- if (sh->locked_nodes[i]) {
- gf_log (this->name, GF_LOG_TRACE,
- "unlocking %"PRId64"/%s on subvolume %s",
- sh->parent_loc.inode->ino, local->loc.name,
- priv->children[i]->name);
-
- STACK_WIND (frame, sh_missing_entries_unlck_cbk,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name,
- &sh->parent_loc, local->loc.name,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
-
- if (!--call_count)
- break;
+ if (local->replies[i].op_ret == -1 &&
+ local->replies[i].op_errno == EAGAIN) {
+ afr_locked_fill (frame, this, locked_on);
+ afr_selfheal_uninodelk (frame, this, inode, dom, off,
+ size, locked_on);
+
+ AFR_SEQ (frame, afr_selfheal_lock_cbk, inodelk, dom,
+ &loc, F_SETLKW, &flock, NULL);
+ break;
}
}
- return 0;
+
+ loc_wipe (&loc);
+
+ return afr_locked_fill (frame, this, locked_on);
}
+static void
+afr_get_lock_and_eagain_counts (afr_private_t *priv, struct afr_reply *replies,
+ int *lock_count, int *eagain_count)
+{
+ int i = 0;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+ if (replies[i].op_ret == 0) {
+ (*lock_count)++;
+ } else if (replies[i].op_ret == -1 &&
+ replies[i].op_errno == EAGAIN) {
+ (*eagain_count)++;
+ }
+ }
+}
-static int
-sh_destroy_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int op_errno,
- struct iatt *preop, struct iatt *postop)
+/*Do blocking locks if number of locks acquired is majority and there were some
+ * EAGAINs. Useful for odd-way replication*/
+int
+afr_selfheal_tie_breaker_inodelk (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, char *dom, off_t off,
+ size_t size, unsigned char *locked_on)
{
- afr_local_t *local = NULL;
+ loc_t loc = {0,};
+ struct gf_flock flock = {0, };
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int lock_count = 0;
+ int eagain_count = 0;
+
+ priv = this->private;
+ local = frame->local;
- loc_t *parent_loc = cookie;
+ loc.inode = inode_ref (inode);
+ gf_uuid_copy (loc.gfid, inode->gfid);
- int call_count = 0;
+ flock.l_type = F_WRLCK;
+ flock.l_start = off;
+ flock.l_len = size;
- local = frame->local;
+ AFR_ONALL (frame, afr_selfheal_lock_cbk, inodelk, dom,
+ &loc, F_SETLK, &flock, NULL);
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "setattr on %s failed: %s",
- local->loc.path, strerror (op_errno));
- }
+ afr_get_lock_and_eagain_counts (priv, local->replies, &lock_count,
+ &eagain_count);
- if (parent_loc) {
- loc_wipe (parent_loc);
- GF_FREE (parent_loc);
- }
+ if (lock_count > priv->child_count/2 && eagain_count) {
+ afr_locked_fill (frame, this, locked_on);
+ afr_selfheal_uninodelk (frame, this, inode, dom, off,
+ size, locked_on);
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- STACK_DESTROY (frame->root);
+ AFR_SEQ (frame, afr_selfheal_lock_cbk, inodelk, dom,
+ &loc, F_SETLKW, &flock, NULL);
}
-
- return 0;
-}
+ loc_wipe (&loc);
+
+ return afr_locked_fill (frame, this, locked_on);
+}
-static int
-sh_missing_entries_newentry_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
+int
+afr_selfheal_uninodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, off_t off, size_t size,
+ const unsigned char *locked_on)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- call_frame_t *setattr_frame = NULL;
- int call_count = 0;
- int child_index = 0;
+ loc_t loc = {0,};
+ struct gf_flock flock = {0, };
- loc_t *parent_loc = NULL;
- struct iatt stbuf;
- int32_t valid;
+ loc.inode = inode_ref (inode);
+ gf_uuid_copy (loc.gfid, inode->gfid);
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- child_index = (long) cookie;
-
- stbuf.ia_atime = sh->buf[sh->source].ia_atime;
- stbuf.ia_atime_nsec = sh->buf[sh->source].ia_atime_nsec;
- stbuf.ia_mtime = sh->buf[sh->source].ia_mtime;
- stbuf.ia_mtime_nsec = sh->buf[sh->source].ia_mtime_nsec;
-
- stbuf.ia_uid = sh->buf[sh->source].ia_uid;
- stbuf.ia_gid = sh->buf[sh->source].ia_gid;
-
- valid = GF_SET_ATTR_UID | GF_SET_ATTR_GID |
- GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
-
- if (op_ret == 0) {
- setattr_frame = copy_frame (frame);
-
- setattr_frame->local = GF_CALLOC (1, sizeof (afr_local_t),
- gf_afr_mt_afr_local_t);
-
- ((afr_local_t *)setattr_frame->local)->call_count = 2;
-
- gf_log (this->name, GF_LOG_TRACE,
- "setattr (%s) on subvolume %s",
- local->loc.path, priv->children[child_index]->name);
-
- STACK_WIND_COOKIE (setattr_frame, sh_destroy_cbk,
- (void *) (long) 0,
- priv->children[child_index],
- priv->children[child_index]->fops->setattr,
- &local->loc, &stbuf, valid);
-
- valid = GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
- parent_loc = GF_CALLOC (1, sizeof (*parent_loc),
- gf_afr_mt_loc_t);
- afr_build_parent_loc (parent_loc, &local->loc);
-
- STACK_WIND_COOKIE (setattr_frame, sh_destroy_cbk,
- (void *) (long) parent_loc,
- priv->children[child_index],
- priv->children[child_index]->fops->setattr,
- parent_loc, &sh->parentbuf, valid);
- }
+ flock.l_type = F_UNLCK;
+ flock.l_start = off;
+ flock.l_len = size;
- call_count = afr_frame_return (frame);
+ AFR_ONLIST (locked_on, frame, afr_selfheal_lock_cbk, inodelk,
+ dom, &loc, F_SETLK, &flock, NULL);
- if (call_count == 0) {
- sh_missing_entries_finish (frame, this);
- }
+ loc_wipe (&loc);
return 0;
}
-static int
-sh_missing_entries_mknod (call_frame_t *frame, xlator_t *this)
+int
+afr_selfheal_tryentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, const char *name, unsigned char *locked_on)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int enoent_count = 0;
- int call_count = 0;
- mode_t st_mode = 0;
- dev_t ia_gen = 0;
+ loc_t loc = {0,};
+ loc.inode = inode_ref (inode);
+ gf_uuid_copy (loc.gfid, inode->gfid);
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ AFR_ONALL (frame, afr_selfheal_lock_cbk, entrylk, dom,
+ &loc, name, ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL);
- for (i = 0; i < priv->child_count; i++)
- if (sh->child_errno[i] == ENOENT)
- enoent_count++;
+ loc_wipe (&loc);
- call_count = enoent_count;
- local->call_count = call_count;
-
- st_mode = st_mode_from_ia (sh->buf[sh->source].ia_prot,
- sh->buf[sh->source].ia_type);
- ia_gen = sh->buf[sh->source].ia_gen;
-
- gf_log (this->name, GF_LOG_TRACE,
- "mknod %s mode 0%o on %d subvolumes",
- local->loc.path, st_mode, enoent_count);
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->child_errno[i] == ENOENT) {
- STACK_WIND_COOKIE (frame,
- sh_missing_entries_newentry_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->mknod,
- &local->loc, st_mode, ia_gen);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ return afr_locked_fill (frame, this, locked_on);
}
-static int
-sh_missing_entries_mkdir (call_frame_t *frame, xlator_t *this)
+int
+afr_selfheal_entrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, const char *name, unsigned char *locked_on)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int enoent_count = 0;
- int call_count = 0;
- mode_t st_mode = 0;
-
+ loc_t loc = {0,};
+ afr_local_t *local = NULL;
+ int i = 0;
+ afr_private_t *priv = NULL;
- local = frame->local;
- sh = &local->self_heal;
priv = this->private;
+ local = frame->local;
- for (i = 0; i < priv->child_count; i++)
- if (sh->child_errno[i] == ENOENT)
- enoent_count++;
-
- call_count = enoent_count;
- local->call_count = call_count;
-
- st_mode = st_mode_from_ia (sh->buf[sh->source].ia_prot,
- sh->buf[sh->source].ia_type);
+ loc.inode = inode_ref (inode);
+ gf_uuid_copy (loc.gfid, inode->gfid);
- gf_log (this->name, GF_LOG_TRACE,
- "mkdir %s mode 0%o on %d subvolumes",
- local->loc.path, st_mode, enoent_count);
+ AFR_ONALL (frame, afr_selfheal_lock_cbk, entrylk, dom, &loc,
+ name, ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL);
for (i = 0; i < priv->child_count; i++) {
- if (sh->child_errno[i] == ENOENT) {
- if (!strcmp (local->loc.path, "/")) {
- /* We shouldn't try to create "/" */
-
- sh_missing_entries_finish (frame, this);
-
- return 0;
- } else {
- STACK_WIND_COOKIE (frame,
- sh_missing_entries_newentry_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->mkdir,
- &local->loc, st_mode);
- if (!--call_count)
- break;
- }
+ if (local->replies[i].op_ret == -1 &&
+ local->replies[i].op_errno == EAGAIN) {
+ afr_locked_fill (frame, this, locked_on);
+ afr_selfheal_unentrylk (frame, this, inode, dom, name,
+ locked_on, NULL);
+
+ AFR_SEQ (frame, afr_selfheal_lock_cbk, entrylk, dom,
+ &loc, name, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
+ break;
}
}
- return 0;
-}
+ loc_wipe (&loc);
+ return afr_locked_fill (frame, this, locked_on);
+}
-static int
-sh_missing_entries_symlink (call_frame_t *frame, xlator_t *this,
- const char *link)
+int
+afr_selfheal_tie_breaker_entrylk (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, char *dom, const char *name,
+ unsigned char *locked_on)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int enoent_count = 0;
- int call_count = 0;
-
+ loc_t loc = {0,};
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int lock_count = 0;
+ int eagain_count = 0;
- local = frame->local;
- sh = &local->self_heal;
priv = this->private;
+ local = frame->local;
- for (i = 0; i < priv->child_count; i++)
- if (sh->child_errno[i] == ENOENT)
- enoent_count++;
-
- call_count = enoent_count;
- local->call_count = call_count;
+ loc.inode = inode_ref (inode);
+ gf_uuid_copy (loc.gfid, inode->gfid);
- gf_log (this->name, GF_LOG_TRACE,
- "symlink %s -> %s on %d subvolumes",
- local->loc.path, link, enoent_count);
+ AFR_ONALL (frame, afr_selfheal_lock_cbk, entrylk, dom, &loc,
+ name, ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL);
- for (i = 0; i < priv->child_count; i++) {
- if (sh->child_errno[i] == ENOENT) {
- STACK_WIND_COOKIE (frame,
- sh_missing_entries_newentry_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->symlink,
- link, &local->loc);
- if (!--call_count)
- break;
- }
- }
+ afr_get_lock_and_eagain_counts (priv, local->replies, &lock_count,
+ &eagain_count);
- return 0;
-}
+ if (lock_count > priv->child_count/2 && eagain_count) {
+ afr_locked_fill (frame, this, locked_on);
+ afr_selfheal_unentrylk (frame, this, inode, dom, name,
+ locked_on, NULL);
+ AFR_SEQ (frame, afr_selfheal_lock_cbk, entrylk, dom,
+ &loc, name, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
+ }
-static int
-sh_missing_entries_readlink_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- const char *link, struct iatt *sbuf)
-{
- if (op_ret > 0)
- sh_missing_entries_symlink (frame, this, link);
- else
- sh_missing_entries_finish (frame, this);
+ loc_wipe (&loc);
- return 0;
+ return afr_locked_fill (frame, this, locked_on);
}
-static int
-sh_missing_entries_readlink (call_frame_t *frame, xlator_t *this)
+int
+afr_selfheal_unentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, const char *name, unsigned char *locked_on,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
+ loc_t loc = {0,};
+ loc.inode = inode_ref (inode);
+ gf_uuid_copy (loc.gfid, inode->gfid);
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ AFR_ONLIST (locked_on, frame, afr_selfheal_lock_cbk, entrylk,
+ dom, &loc, name, ENTRYLK_UNLOCK, ENTRYLK_WRLCK, xdata);
- STACK_WIND (frame, sh_missing_entries_readlink_cbk,
- priv->children[sh->source],
- priv->children[sh->source]->fops->readlink,
- &local->loc, 4096);
+ loc_wipe (&loc);
return 0;
}
-static int
-sh_missing_entries_create (call_frame_t *frame, xlator_t *this)
+gf_boolean_t
+afr_is_pending_set (xlator_t *this, dict_t *xdata, int type)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int type = 0;
- int i = 0;
- afr_private_t *priv = NULL;
- int enoent_count = 0;
- int govinda_gOvinda = 0;
-
+ int idx = -1;
+ afr_private_t *priv = NULL;
+ void *pending_raw = NULL;
+ int *pending_int = NULL;
+ int i = 0;
- local = frame->local;
- sh = &local->self_heal;
priv = this->private;
+ idx = afr_index_for_transaction_type (type);
- for (i = 0; i < priv->child_count; i++) {
- if (sh->child_errno[i]) {
- if (sh->child_errno[i] == ENOENT)
- enoent_count++;
- } else {
- if (type) {
- if (type != sh->buf[i].ia_type) {
- gf_log (this->name, GF_LOG_TRACE,
- "file %s is govinda!",
- local->loc.path);
+ if (dict_get_ptr (xdata, AFR_DIRTY, &pending_raw) == 0) {
+ if (pending_raw) {
+ pending_int = pending_raw;
- govinda_gOvinda = 1;
- }
- } else {
- sh->source = i;
- type = sh->buf[i].ia_type;
- }
+ if (ntoh32 (pending_int[idx]))
+ return _gf_true;
}
}
- if (govinda_gOvinda) {
- gf_log (this->name, GF_LOG_ERROR,
- "conflicing filetypes exist for path %s. returning.",
- local->loc.path);
-
- local->govinda_gOvinda = 1;
- sh_missing_entries_finish (frame, this);
- return 0;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (dict_get_ptr (xdata, priv->pending_key[i],
+ &pending_raw))
+ continue;
+ if (!pending_raw)
+ continue;
+ pending_int = pending_raw;
- if (!type) {
- gf_log (this->name, GF_LOG_ERROR,
- "no source found for %s. all nodes down?. returning.",
- local->loc.path);
- /* subvolumes down and/or file does not exist */
- sh_missing_entries_finish (frame, this);
- return 0;
+ if (ntoh32 (pending_int[idx]))
+ return _gf_true;
}
- if (enoent_count == 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "no missing files - %s. proceeding to metadata check",
- local->loc.path);
- /* proceed to next step - metadata self-heal */
- sh_missing_entries_finish (frame, this);
- return 0;
- }
+ return _gf_false;
+}
- switch (type) {
- case IA_IFSOCK:
- case IA_IFREG:
- case IA_IFBLK:
- case IA_IFCHR:
- case IA_IFIFO:
- sh_missing_entries_mknod (frame, this);
- break;
- case IA_IFLNK:
- sh_missing_entries_readlink (frame, this);
- break;
- case IA_IFDIR:
- sh_missing_entries_mkdir (frame, this);
- break;
- default:
- gf_log (this->name, GF_LOG_ERROR,
- "unknown file type: 0%o", type);
- local->govinda_gOvinda = 1;
- sh_missing_entries_finish (frame, this);
- }
- return 0;
+gf_boolean_t
+afr_is_data_set (xlator_t *this, dict_t *xdata)
+{
+ return afr_is_pending_set (this, xdata, AFR_DATA_TRANSACTION);
}
+gf_boolean_t
+afr_is_metadata_set (xlator_t *this, dict_t *xdata)
+{
+ return afr_is_pending_set (this, xdata, AFR_METADATA_TRANSACTION);
+}
-static int
-sh_missing_entries_lookup_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
+gf_boolean_t
+afr_is_entry_set (xlator_t *this, dict_t *xdata)
{
- int child_index = 0;
- afr_local_t *local = NULL;
- int call_count = 0;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
+ return afr_is_pending_set (this, xdata, AFR_ENTRY_TRANSACTION);
+}
+/*
+ * This function inspects the looked up replies (in an unlocked manner)
+ * and decides whether a locked verification and possible healing is
+ * required or not. It updates the three booleans for each type
+ * of healing. If the boolean flag gets set to FALSE, then we are sure
+ * no healing is required. If the boolean flag gets set to TRUE then
+ * we have to proceed with locked reinspection.
+ */
+
+int
+afr_selfheal_unlocked_inspect (call_frame_t *frame, xlator_t *this,
+ uuid_t gfid, inode_t **link_inode,
+ gf_boolean_t *data_selfheal,
+ gf_boolean_t *metadata_selfheal,
+ gf_boolean_t *entry_selfheal)
+{
+ afr_private_t *priv = NULL;
+ inode_t *inode = NULL;
+ int i = 0;
+ int valid_cnt = 0;
+ struct iatt first = {0, };
+ struct afr_reply *replies = NULL;
+ int ret = -1;
- local = frame->local;
- sh = &local->self_heal;
priv = this->private;
- child_index = (long) cookie;
+ inode = afr_inode_find (this, gfid);
+ if (!inode)
+ goto out;
- LOCK (&frame->lock);
- {
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "path %s on subvolume %s is of mode 0%o",
- local->loc.path,
- priv->children[child_index]->name,
- buf->ia_type);
+ replies = alloca0 (sizeof (*replies) * priv->child_count);
- local->self_heal.buf[child_index] = *buf;
- local->self_heal.parentbuf = *postparent;
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "path %s on subvolume %s => -1 (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
+ ret = afr_selfheal_unlocked_discover (frame, inode, gfid, replies);
+ if (ret)
+ goto out;
- local->self_heal.child_errno[child_index] = op_errno;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+ if (replies[i].op_ret == -1)
+ continue;
- }
- UNLOCK (&frame->lock);
+ /* The data segment of the changelog can be non-zero to indicate
+ * the directory needs a full heal. So the check below ensures
+ * it's not a directory before setting the data_selfheal boolean.
+ */
+ if (data_selfheal && !IA_ISDIR (replies[i].poststat.ia_type) &&
+ afr_is_data_set (this, replies[i].xdata))
+ *data_selfheal = _gf_true;
- call_count = afr_frame_return (frame);
+ if (metadata_selfheal &&
+ afr_is_metadata_set (this, replies[i].xdata))
+ *metadata_selfheal = _gf_true;
- if (call_count == 0) {
- sh_missing_entries_create (frame, this);
- }
+ if (entry_selfheal && afr_is_entry_set (this, replies[i].xdata))
+ *entry_selfheal = _gf_true;
- return 0;
-}
+ valid_cnt++;
+ if (valid_cnt == 1) {
+ first = replies[i].poststat;
+ continue;
+ }
+ if (!IA_EQUAL (first, replies[i].poststat, type)) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_SPLIT_BRAIN,
+ "TYPE mismatch %d vs %d on %s for gfid:%s",
+ (int) first.ia_type,
+ (int) replies[i].poststat.ia_type,
+ priv->children[i]->name,
+ uuid_utoa (replies[i].poststat.ia_gfid));
+ ret = -EIO;
+ goto out;
+ }
-static int
-sh_missing_entries_lookup (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- int i = 0;
- int call_count = 0;
- afr_private_t *priv = NULL;
- dict_t *xattr_req = NULL;
- int ret = -1;
+ if (!IA_EQUAL (first, replies[i].poststat, uid)) {
+ gf_msg_debug (this->name, 0,
+ "UID mismatch "
+ "%d vs %d on %s for gfid:%s",
+ (int) first.ia_uid,
+ (int) replies[i].poststat.ia_uid,
+ priv->children[i]->name,
+ uuid_utoa (replies[i].poststat.ia_gfid));
+
+ if (metadata_selfheal)
+ *metadata_selfheal = _gf_true;
+ }
- local = frame->local;
- priv = this->private;
+ if (!IA_EQUAL (first, replies[i].poststat, gid)) {
+ gf_msg_debug (this->name, 0,
+ "GID mismatch "
+ "%d vs %d on %s for gfid:%s",
+ (int) first.ia_uid,
+ (int) replies[i].poststat.ia_uid,
+ priv->children[i]->name,
+ uuid_utoa (replies[i].poststat.ia_gfid));
+
+ if (metadata_selfheal)
+ *metadata_selfheal = _gf_true;
+ }
- call_count = afr_up_children_count (priv->child_count,
- local->child_up);
+ if (!IA_EQUAL (first, replies[i].poststat, prot)) {
+ gf_msg_debug (this->name, 0,
+ "MODE mismatch "
+ "%d vs %d on %s for gfid:%s",
+ (int) st_mode_from_ia (first.ia_prot, 0),
+ (int) st_mode_from_ia
+ (replies[i].poststat.ia_prot, 0),
+ priv->children[i]->name,
+ uuid_utoa (replies[i].poststat.ia_gfid));
+
+ if (metadata_selfheal)
+ *metadata_selfheal = _gf_true;
+ }
- local->call_count = call_count;
-
- xattr_req = dict_new();
-
- if (xattr_req) {
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_uint64 (xattr_req,
- priv->pending_key[i],
- 3 * sizeof(int32_t));
+ if (IA_ISREG(first.ia_type) &&
+ !IA_EQUAL (first, replies[i].poststat, size)) {
+ gf_msg_debug (this->name, 0,
+ "SIZE mismatch "
+ "%lld vs %lld on %s for gfid:%s",
+ (long long) first.ia_size,
+ (long long) replies[i].poststat.ia_size,
+ priv->children[i]->name,
+ uuid_utoa (replies[i].poststat.ia_gfid));
+
+ if (data_selfheal)
+ *data_selfheal = _gf_true;
+ }
+ }
+
+ if (valid_cnt > 0 && link_inode) {
+ *link_inode = inode_link (inode, NULL, NULL, &first);
+ if (!*link_inode) {
+ ret = -EINVAL;
+ goto out;
}
+ } else if (valid_cnt < 2) {
+ ret = afr_check_stale_error (replies, priv);
+ goto out;
}
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s on subvolume %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame,
- sh_missing_entries_lookup_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->lookup,
- &local->loc, xattr_req);
-
- if (!--call_count)
- break;
- }
- }
-
- if (xattr_req)
- dict_unref (xattr_req);
+ ret = 0;
+out:
+ if (inode)
+ inode_unref (inode);
+ if (replies)
+ afr_replies_wipe (replies, priv->child_count);
- return 0;
+ return ret;
}
-static int
-sh_missing_entries_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+inode_t *
+afr_inode_find (xlator_t *this, uuid_t gfid)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
+ inode_table_t *table = NULL;
+ inode_t *inode = NULL;
+ table = this->itable;
+ if (!table)
+ return NULL;
- local = frame->local;
- sh = &local->self_heal;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- sh->op_failed = 1;
-
- sh->locked_nodes[child_index] = 0;
- gf_log (this->name, GF_LOG_DEBUG,
- "locking inode of %s on child %d failed: %s",
- local->loc.path, child_index,
- strerror (op_errno));
- } else {
- sh->locked_nodes[child_index] = 1;
- gf_log (this->name, GF_LOG_TRACE,
- "inode of %s on child %d locked",
- local->loc.path, child_index);
- }
- }
- UNLOCK (&frame->lock);
+ inode = inode_find (table, gfid);
+ if (inode)
+ return inode;
- call_count = afr_frame_return (frame);
+ inode = inode_new (table);
+ if (!inode)
+ return NULL;
- if (call_count == 0) {
- if (sh->op_failed == 1) {
- sh_missing_entries_finish (frame, this);
- return 0;
- }
-
- sh_missing_entries_lookup (frame, this);
- }
+ gf_uuid_copy (inode->gfid, gfid);
- return 0;
+ return inode;
}
-static int
-afr_self_heal_missing_entries (call_frame_t *frame, xlator_t *this)
+call_frame_t *
+afr_frame_create (xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int call_count = 0;
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ int op_errno = 0;
+ pid_t pid = GF_CLIENT_PID_SELF_HEALD;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ return NULL;
+
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local) {
+ STACK_DESTROY (frame->root);
+ return NULL;
+ }
+ syncopctx_setfspid (&pid);
+
+ frame->root->pid = pid;
+
+ afr_set_lk_owner (frame, this, frame->root);
+
+ return frame;
+}
+
+int
+afr_selfheal_newentry_mark (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ int source, struct afr_reply *replies,
+ unsigned char *sources, unsigned char *newentry)
+{
+ int ret = 0;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ dict_t *xattr = NULL;
+ int **changelog = NULL;
- local = frame->local;
- sh = &local->self_heal;
priv = this->private;
- gf_log (this->name, GF_LOG_TRACE,
- "attempting to recreate missing entries for path=%s",
- local->loc.path);
+ gf_uuid_copy (inode->gfid, replies[source].poststat.ia_gfid);
- afr_build_parent_loc (&sh->parent_loc, &local->loc);
+ xattr = dict_new();
+ if (!xattr)
+ return -ENOMEM;
- call_count = afr_up_children_count (priv->child_count,
- local->child_up);
+ changelog = afr_mark_pending_changelog (priv, newentry, xattr,
+ replies[source].poststat.ia_type);
- local->call_count = call_count;
+ if (!changelog)
+ goto out;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, sh_missing_entries_lk_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name,
- &sh->parent_loc, local->loc.name,
- ENTRYLK_LOCK_NB, ENTRYLK_WRLCK);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ if (!sources[i])
+ continue;
+ afr_selfheal_post_op (frame, this, inode, i, xattr, NULL);
+ }
+out:
+ if (changelog)
+ afr_matrix_cleanup (changelog, priv->child_count);
+ if (xattr)
+ dict_unref (xattr);
+ return ret;
}
-
-afr_local_t *afr_local_copy (afr_local_t *l, xlator_t *this)
+int
+afr_selfheal_do (call_frame_t *frame, xlator_t *this, uuid_t gfid)
{
- afr_private_t *priv = NULL;
- afr_local_t *lc = NULL;
- afr_self_heal_t *sh = NULL;
- afr_self_heal_t *shc = NULL;
-
+ int ret = -1;
+ int entry_ret = 1;
+ int metadata_ret = 1;
+ int data_ret = 1;
+ int or_ret = 0;
+ inode_t *inode = NULL;
+ gf_boolean_t data_selfheal = _gf_false;
+ gf_boolean_t metadata_selfheal = _gf_false;
+ gf_boolean_t entry_selfheal = _gf_false;
+ afr_private_t *priv = NULL;
+ gf_boolean_t dataheal_enabled = _gf_false;
priv = this->private;
+ gf_string2boolean (priv->data_self_heal, &dataheal_enabled);
- sh = &l->self_heal;
+ ret = afr_selfheal_unlocked_inspect (frame, this, gfid, &inode,
+ &data_selfheal,
+ &metadata_selfheal,
+ &entry_selfheal);
+ if (ret)
+ goto out;
- lc = GF_CALLOC (1, sizeof (afr_local_t),
- gf_afr_mt_afr_local_t);
+ if (!(data_selfheal || metadata_selfheal || entry_selfheal)) {
+ ret = 2;
+ goto out;
+ }
- shc = &lc->self_heal;
+ if (data_selfheal && dataheal_enabled)
+ data_ret = afr_selfheal_data (frame, this, inode);
- shc->unwind = sh->unwind;
- shc->need_data_self_heal = sh->need_data_self_heal;
- shc->need_metadata_self_heal = sh->need_metadata_self_heal;
- shc->need_entry_self_heal = sh->need_entry_self_heal;
- shc->forced_merge = sh->forced_merge;
- shc->healing_fd_opened = sh->healing_fd_opened;
- shc->data_lock_held = sh->data_lock_held;
- if (sh->healing_fd && !sh->healing_fd_opened)
- shc->healing_fd = fd_ref (sh->healing_fd);
- else
- shc->healing_fd = sh->healing_fd;
- shc->background = sh->background;
- shc->type = sh->type;
+ if (metadata_selfheal && priv->metadata_self_heal)
+ metadata_ret = afr_selfheal_metadata (frame, this, inode);
- if (l->loc.path)
- loc_copy (&lc->loc, &l->loc);
+ if (entry_selfheal && priv->entry_self_heal)
+ entry_ret = afr_selfheal_entry (frame, this, inode);
- lc->child_up = memdup (l->child_up, priv->child_count);
- if (l->xattr_req)
- lc->xattr_req = dict_ref (l->xattr_req);
+ or_ret = (data_ret | metadata_ret | entry_ret);
- if (l->cont.lookup.inode)
- lc->cont.lookup.inode = inode_ref (l->cont.lookup.inode);
- if (l->cont.lookup.xattr)
- lc->cont.lookup.xattr = dict_ref (l->cont.lookup.xattr);
+ if (data_ret == -EIO || metadata_ret == -EIO || entry_ret == -EIO)
+ ret = -EIO;
+ else if (data_ret == 1 && metadata_ret == 1 && entry_ret == 1)
+ ret = 1;
+ else if (or_ret < 0)
+ ret = or_ret;
+ else
+ ret = 0;
- return lc;
+out:
+ if (inode)
+ inode_unref (inode);
+ return ret;
}
-
+/*
+ * This is the entry point for healing a given GFID. The return values for this
+ * function are as follows:
+ * '0' if the self-heal is successful
+ * '1' if the afr-xattrs are non-zero (due to on-going IO) and no heal is needed
+ * '2' if the afr-xattrs are all-zero and no heal is needed
+ * $errno if the heal on the gfid failed.
+ */
int
-afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)
+afr_selfheal (xlator_t *this, uuid_t gfid)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
-
- priv = this->private;
- local = bgsh_frame->local;
- sh = &local->self_heal;
-
- if (local->govinda_gOvinda) {
- afr_set_split_brain (this, local->cont.lookup.inode, _gf_true);
- } else {
- afr_set_split_brain (this, local->cont.lookup.inode, _gf_false);
- }
+ int ret = -1;
+ call_frame_t *frame = NULL;
- gf_log (this->name, GF_LOG_TRACE,
- "background self-heal completed");
+ frame = afr_frame_create (this);
+ if (!frame)
+ return ret;
- if (!sh->unwound) {
- sh->unwind (sh->orig_frame, this);
- }
+ ret = afr_selfheal_do (frame, this, gfid);
- if (sh->background) {
- LOCK (&priv->lock);
- {
- priv->background_self_heals_started--;
- }
- UNLOCK (&priv->lock);
- }
-
- AFR_STACK_DESTROY (bgsh_frame);
+ if (frame)
+ AFR_STACK_DESTROY (frame);
- return 0;
+ return ret;
}
-
-int
-afr_self_heal (call_frame_t *frame, xlator_t *this)
+afr_local_t*
+__afr_dequeue_heals (afr_private_t *priv)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
-
- call_frame_t *sh_frame = NULL;
- afr_local_t *sh_local = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
- priv = this->private;
+ if (list_empty (&priv->heal_waiting))
+ goto none;
+ if ((priv->background_self_heal_count > 0) &&
+ (priv->healers >= priv->background_self_heal_count))
+ goto none;
+
+ local = list_entry (priv->heal_waiting.next, afr_local_t, healer);
+ priv->heal_waiters--;
+ GF_ASSERT (priv->heal_waiters >= 0);
+ list_del_init(&local->healer);
+ list_add(&local->healer, &priv->healing);
+ priv->healers++;
+ return local;
+none:
+ gf_msg_debug (THIS->name, 0, "Nothing dequeued. "
+ "Num healers: %d, Num Waiters: %d",
+ priv->healers, priv->heal_waiters);
+ return NULL;
+}
- if (local->self_heal.background) {
- LOCK (&priv->lock);
- {
- if (priv->background_self_heals_started
- > priv->background_self_heal_count) {
+int
+afr_refresh_selfheal_wrap (void *opaque)
+{
+ call_frame_t *heal_frame = opaque;
+ afr_local_t *local = heal_frame->local;
+ int ret = 0;
- local->self_heal.background = _gf_false;
+ ret = afr_selfheal (heal_frame->this, local->refreshinode->gfid);
+ return ret;
+}
- } else {
- priv->background_self_heals_started++;
- }
- }
- UNLOCK (&priv->lock);
+int
+afr_refresh_heal_done (int ret, call_frame_t *frame, void *opaque)
+{
+ call_frame_t *heal_frame = opaque;
+ xlator_t *this = heal_frame->this;
+ afr_private_t *priv = this->private;
+ afr_local_t *local = heal_frame->local;
+
+ LOCK (&priv->lock);
+ {
+ list_del_init(&local->healer);
+ priv->healers--;
+ GF_ASSERT (priv->healers >= 0);
+ local = __afr_dequeue_heals (priv);
}
+ UNLOCK (&priv->lock);
- gf_log (this->name, GF_LOG_TRACE,
- "performing self heal on %s (metadata=%d data=%d entry=%d)",
- local->loc.path,
- local->self_heal.need_metadata_self_heal,
- local->self_heal.need_data_self_heal,
- local->self_heal.need_entry_self_heal);
-
- sh_frame = copy_frame (frame);
- sh_local = afr_local_copy (local, this);
- sh_frame->local = sh_local;
- sh = &sh_local->self_heal;
-
- sh->orig_frame = frame;
+ if (heal_frame)
+ AFR_STACK_DESTROY (heal_frame);
- sh->completion_cbk = afr_self_heal_completion_cbk;
+ if (local)
+ afr_heal_synctask (this, local);
+ return 0;
+}
- sh->buf = GF_CALLOC (priv->child_count, sizeof (struct stat),
- gf_afr_mt_stat);
- sh->child_errno = GF_CALLOC (priv->child_count, sizeof (int),
- gf_afr_mt_int);
- sh->success = GF_CALLOC (priv->child_count, sizeof (int),
- gf_afr_mt_int);
- sh->xattr = GF_CALLOC (priv->child_count, sizeof (dict_t *),
- gf_afr_mt_dict_t);
- sh->sources = GF_CALLOC (sizeof (*sh->sources), priv->child_count,
- gf_afr_mt_int);
- sh->locked_nodes = GF_CALLOC (sizeof (*sh->locked_nodes),
- priv->child_count,
- gf_afr_mt_int);
-
- sh->pending_matrix = GF_CALLOC (sizeof (int32_t *), priv->child_count,
- gf_afr_mt_int32_t);
+void
+afr_heal_synctask (xlator_t *this, afr_local_t *local)
+{
+ int ret = 0;
+ call_frame_t *heal_frame = NULL;
+
+ heal_frame = local->heal_frame;
+ ret = synctask_new (this->ctx->env, afr_refresh_selfheal_wrap,
+ afr_refresh_heal_done, heal_frame, heal_frame);
+ if (ret < 0)
+ /* Heal not launched. Will be queued when the next inode
+ * refresh happens and shd hasn't healed it yet. */
+ afr_refresh_heal_done (ret, heal_frame, heal_frame);
+}
- for (i = 0; i < priv->child_count; i++) {
- sh->pending_matrix[i] = GF_CALLOC (sizeof (int32_t),
- priv->child_count,
- gf_afr_mt_int32_t);
- }
+void
+afr_throttled_selfheal (call_frame_t *frame, xlator_t *this)
+{
+ gf_boolean_t can_heal = _gf_true;
+ afr_private_t *priv = this->private;
+ afr_local_t *local = frame->local;
+
+ LOCK (&priv->lock);
+ {
+ if ((priv->background_self_heal_count > 0) &&
+ (priv->heal_wait_qlen + priv->background_self_heal_count) >
+ (priv->heal_waiters + priv->healers)) {
+ list_add_tail(&local->healer, &priv->heal_waiting);
+ priv->heal_waiters++;
+ local = __afr_dequeue_heals (priv);
+ } else {
+ can_heal = _gf_false;
+ }
+ }
+ UNLOCK (&priv->lock);
+
+ if (can_heal) {
+ if (local)
+ afr_heal_synctask (this, local);
+ else
+ gf_msg_debug (this->name, 0, "Max number of heals are "
+ "pending, background self-heal rejected.");
+ }
+}
- sh->delta_matrix = GF_CALLOC (sizeof (int32_t *), priv->child_count,
- gf_afr_mt_int32_t);
- for (i = 0; i < priv->child_count; i++) {
- sh->delta_matrix[i] = GF_CALLOC (sizeof (int32_t),
- priv->child_count,
- gf_afr_mt_int32_t);
- }
+int
+afr_choose_source_by_policy (afr_private_t *priv, unsigned char *sources,
+ afr_transaction_type type)
+{
+ int source = -1;
+ int i = 0;
- if (local->success_count && local->enoent_count) {
- afr_self_heal_missing_entries (sh_frame, this);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "proceeding to metadata check on %s",
- local->loc.path);
+ /* Give preference to local child to save on bandwidth */
+ for (i = 0; i < priv->child_count; i++) {
+ if (priv->local[i] && sources[i]) {
+ if ((type == AFR_DATA_TRANSACTION) &&
+ AFR_IS_ARBITER_BRICK (priv, i))
+ continue;
- afr_sh_missing_entries_done (sh_frame, this);
- }
+ source = i;
+ goto out;
+ }
+ }
- return 0;
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i]) {
+ source = i;
+ goto out;
+ }
+ }
+out:
+ return source;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.h b/xlators/cluster/afr/src/afr-self-heal-common.h
deleted file mode 100644
index 298ac022a9b..00000000000
--- a/xlators/cluster/afr/src/afr-self-heal-common.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef __AFR_SELF_HEAL_COMMON_H__
-#define __AFR_SELF_HEAL_COMMON_H__
-
-#define FILE_HAS_HOLES(buf) (((buf)->ia_size) > ((buf)->ia_blocks * 512))
-
-typedef enum {
- AFR_SELF_HEAL_ENTRY,
- AFR_SELF_HEAL_METADATA,
- AFR_SELF_HEAL_DATA,
-} afr_self_heal_type;
-
-int
-afr_sh_select_source (int sources[], int child_count);
-
-int
-afr_sh_sink_count (int sources[], int child_count);
-
-int
-afr_sh_source_count (int sources[], int child_count);
-
-int
-afr_sh_supress_errenous_children (int sources[], int child_errno[],
- int child_count);
-
-void
-afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this);
-
-void
-afr_sh_build_pending_matrix (afr_private_t *priv,
- int32_t *pending_matrix[], dict_t *xattr[],
- int child_count, afr_transaction_type type);
-
-void
-afr_sh_pending_to_delta (afr_private_t *priv, dict_t **xattr,
- int32_t *delta_matrix[], int success[],
- int child_count, afr_transaction_type type);
-
-int
-afr_sh_mark_sources (afr_self_heal_t *sh, int child_count,
- afr_self_heal_type type);
-
-int
-afr_sh_delta_to_xattr (afr_private_t *priv,
- int32_t *delta_matrix[], dict_t *xattr[],
- int child_count, afr_transaction_type type);
-
-int
-afr_sh_is_matrix_zero (int32_t *pending_matrix[], int child_count);
-
-
-#endif /* __AFR_SELF_HEAL_COMMON_H__ */
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index 366cac81721..2a33e53764c 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -1,1212 +1,875 @@
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
-#include <libgen.h>
-#include <unistd.h>
-#include <fnmatch.h>
-#include <sys/time.h>
-#include <stdlib.h>
-#include <signal.h>
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-#include "glusterfs.h"
#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
+#include "afr-self-heal.h"
#include "byte-order.h"
+#include "protocol-common.h"
+#include "afr-messages.h"
-#include "afr-transaction.h"
-#include "afr-self-heal.h"
-#include "afr-self-heal-common.h"
-#include "afr-self-heal-algorithm.h"
+enum {
+ AFR_SELFHEAL_DATA_FULL = 0,
+ AFR_SELFHEAL_DATA_DIFF,
+};
-int
-afr_sh_data_done (call_frame_t *frame, xlator_t *this)
+#define HAS_HOLES(i) ((i->ia_blocks * 512) < (i->ia_size))
+static int
+__checksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, uint32_t weak, uint8_t *strong,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
+ afr_local_t *local = NULL;
+ struct afr_reply *replies = NULL;
+ int i = (long) cookie;
local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- /*
- TODO: cleanup sh->*
- */
-
- if (sh->healing_fd && !sh->healing_fd_opened) {
- /* unref only if we created the fd ourselves */
-
- fd_unref (sh->healing_fd);
- sh->healing_fd = NULL;
- }
-
- for (i = 0; i < priv->child_count; i++)
- sh->locked_nodes[i] = 0;
-
- gf_log (this->name, GF_LOG_TRACE,
- "self heal of %s completed",
- local->loc.path);
-
- sh->completion_cbk (frame, this);
-
+ replies = local->replies;
+
+ replies[i].valid = 1;
+ replies[i].op_ret = op_ret;
+ replies[i].op_errno = op_errno;
+ if (xdata)
+ replies[i].buf_has_zeroes = dict_get_str_boolean (xdata,
+ "buf-has-zeroes", _gf_false);
+ if (strong)
+ memcpy (local->replies[i].checksum, strong, MD5_DIGEST_LENGTH);
+
+ syncbarrier_wake (&local->barrier);
return 0;
}
-int
-afr_sh_data_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+static int
+attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *pre, struct iatt *post,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_self_heal_t *sh = NULL;
- int call_count = 0;
-
- int child_index = (long) cookie;
+ int i = (long) cookie;
+ afr_local_t *local = NULL;
local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "flush or setattr failed on %s on subvolume %s: %s",
- local->loc.path, priv->children[child_index]->name,
- strerror (op_errno));
- }
- }
- UNLOCK (&frame->lock);
- call_count = afr_frame_return (frame);
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ if (pre)
+ local->replies[i].prestat = *pre;
+ if (post)
+ local->replies[i].poststat = *post;
+ if (xdata)
+ local->replies[i].xdata = dict_ref (xdata);
- if (call_count == 0) {
- afr_sh_data_done (frame, this);
- }
+ syncbarrier_wake (&local->barrier);
return 0;
}
-int
-afr_sh_data_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *statpre, struct iatt *statpost)
-{
- afr_sh_data_flush_cbk (frame, cookie, this, op_ret, op_errno);
-
- return 0;
-}
-
-
-int
-afr_sh_data_close (call_frame_t *frame, xlator_t *this)
+static gf_boolean_t
+__afr_can_skip_data_block_heal (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int source, unsigned char *healed_sinks,
+ off_t offset, size_t size,
+ struct iatt *poststat)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_self_heal_t *sh = NULL;
-
- int i = 0;
- int call_count = 0;
- int source = 0;
- int active_sinks = 0;
- int32_t valid = 0;
-
- struct iatt stbuf = {0,};
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- source = sh->source;
- active_sinks = sh->active_sinks;
-
- valid |= (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME);
-
- stbuf.ia_atime = sh->buf[source].ia_atime;
- stbuf.ia_atime_nsec = sh->buf[source].ia_atime_nsec;
- stbuf.ia_mtime = sh->buf[source].ia_mtime;
- stbuf.ia_mtime_nsec = sh->buf[source].ia_mtime_nsec;
-
- if (sh->healing_fd_opened) {
- /* not our job to close the fd */
-
- afr_sh_data_done (frame, this);
- return 0;
- }
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ unsigned char *wind_subvols = NULL;
+ gf_boolean_t checksum_match = _gf_true;
+ dict_t *xdata = NULL;
+ int i = 0;
- if (!sh->healing_fd) {
- afr_sh_data_done (frame, this);
- return 0;
+ priv = this->private;
+ local = frame->local;
+ xdata = dict_new();
+ if (xdata)
+ i = dict_set_int32 (xdata, "check-zero-filled", 1);
+ wind_subvols = alloca0 (priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == source || healed_sinks[i])
+ wind_subvols[i] = 1;
}
- call_count = (sh->active_sinks + 1) * 2;
- local->call_count = call_count;
-
- /* closed source */
- gf_log (this->name, GF_LOG_TRACE,
- "closing fd of %s on %s",
- local->loc.path, priv->children[sh->source]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_data_flush_cbk,
- (void *) (long) sh->source,
- priv->children[sh->source],
- priv->children[sh->source]->fops->flush,
- sh->healing_fd);
- call_count--;
-
- STACK_WIND_COOKIE (frame, afr_sh_data_setattr_cbk,
- (void *) (long) sh->source,
- priv->children[sh->source],
- priv->children[sh->source]->fops->setattr,
- &local->loc, &stbuf, valid);
-
- call_count--;
-
- if (call_count == 0)
- return 0;
+ AFR_ONLIST (wind_subvols, frame, __checksum_cbk, rchecksum, fd,
+ offset, size, xdata);
+ if (xdata)
+ dict_unref (xdata);
+
+ if (!local->replies[source].valid || local->replies[source].op_ret != 0)
+ return _gf_false;
for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] || !local->child_up[i])
+ if (i == source)
continue;
+ if (local->replies[i].valid) {
+ if (memcmp (local->replies[source].checksum,
+ local->replies[i].checksum,
+ MD5_DIGEST_LENGTH)) {
+ checksum_match = _gf_false;
+ break;
+ }
+ }
+ }
- gf_log (this->name, GF_LOG_TRACE,
- "closing fd of %s on %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_data_flush_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->flush,
- sh->healing_fd);
-
- call_count--;
-
- STACK_WIND_COOKIE (frame, afr_sh_data_setattr_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->setattr,
- &local->loc, &stbuf, valid);
+ if (checksum_match) {
+ if (HAS_HOLES (poststat))
+ return _gf_true;
- if (!--call_count)
- break;
- }
+ /* For non-sparse files, we might be better off writing the
+ * zeroes to sinks to avoid mismatch of disk-usage in bricks. */
+ if (local->replies[source].buf_has_zeroes)
+ return _gf_false;
+ else
+ return _gf_true;
+ }
- return 0;
+ return _gf_false;
}
-int
-afr_sh_data_unlck_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+static gf_boolean_t
+__afr_is_sink_zero_filled (xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, int sink)
{
- afr_local_t * local = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
-
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "locking inode of %s on child %d failed: %s",
- local->loc.path, child_index,
- strerror (op_errno));
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "inode of %s on child %d locked",
- local->loc.path, child_index);
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- afr_sh_data_close (frame, this);
- }
-
- return 0;
+ afr_private_t *priv = NULL;
+ struct iobref *iobref = NULL;
+ struct iovec *iovec = NULL;
+ int count = 0;
+ int ret = 0;
+ gf_boolean_t zero_filled = _gf_false;
+
+ priv = this->private;
+ ret = syncop_readv (priv->children[sink], fd, size, offset, 0, &iovec,
+ &count, &iobref, NULL, NULL);
+ if (ret < 0)
+ goto out;
+ ret = iov_0filled (iovec, count);
+ if (!ret)
+ zero_filled = _gf_true;
+out:
+ if (iovec)
+ GF_FREE (iovec);
+ if (iobref)
+ iobref_unref (iobref);
+ return zero_filled;
}
-
-int
-afr_sh_data_unlock (call_frame_t *frame, xlator_t *this)
+static int
+__afr_selfheal_data_read_write (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int source, unsigned char *healed_sinks,
+ off_t offset, size_t size,
+ struct afr_reply *replies, int type)
{
- struct flock flock;
- int i = 0;
- int call_count = 0;
-
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- afr_self_heal_t * sh = NULL;
-
+ struct iovec *iovec = NULL;
+ int count = 0;
+ struct iobref *iobref = NULL;
+ int ret = 0;
+ int i = 0;
+ afr_private_t *priv = NULL;
- local = frame->local;
- sh = &local->self_heal;
priv = this->private;
- if (sh->data_lock_held) {
- /* not our job to unlock, proceed to close */
-
- afr_sh_data_close (frame, this);
- return 0;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->locked_nodes[i])
- call_count++;
- }
+ ret = syncop_readv (priv->children[source], fd, size, offset, 0,
+ &iovec, &count, &iobref, NULL, NULL);
+ if (ret <= 0)
+ return ret;
- if (call_count == 0) {
- afr_sh_data_close (frame, this);
- return 0;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (!healed_sinks[i])
+ continue;
- local->call_count = call_count;
+ /*
+ * TODO: Use fiemap() and discard() to heal holes
+ * in the future.
+ *
+ * For now,
+ *
+ * - if the source had any holes at all,
+ * AND
+ * - if we are writing past the original file size
+ * of the sink
+ * AND
+ * - is NOT the last block of the source file. if
+ * the block contains EOF, it has to be written
+ * in order to set the file size even if the
+ * last block is 0-filled.
+ * AND
+ * - if the read buffer is filled with only 0's
+ *
+ * then, skip writing to this source. We don't depend
+ * on the write to happen to update the size as we
+ * have performed an ftruncate() upfront anyways.
+ */
+#define is_last_block(o,b,s) ((s >= o) && (s <= (o + b)))
+ if (HAS_HOLES ((&replies[source].poststat)) &&
+ offset >= replies[i].poststat.ia_size &&
+ !is_last_block (offset, size,
+ replies[source].poststat.ia_size) &&
+ (iov_0filled (iovec, count) == 0))
+ continue;
- flock.l_start = 0;
- flock.l_len = 0;
- flock.l_type = F_UNLCK;
+ /* Avoid filling up sparse regions of the sink with 0-filled
+ * writes.*/
+ if (type == AFR_SELFHEAL_DATA_FULL &&
+ HAS_HOLES ((&replies[source].poststat)) &&
+ ((offset + size) <= replies[i].poststat.ia_size) &&
+ (iov_0filled (iovec, count) == 0) &&
+ __afr_is_sink_zero_filled (this, fd, size, offset, i)) {
+ continue;
+ }
- for (i = 0; i < priv->child_count; i++) {
- if (sh->locked_nodes[i]) {
- gf_log (this->name, GF_LOG_TRACE,
- "unlocking %s on subvolume %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_data_unlck_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->inodelk,
- this->name,
- &local->loc, F_SETLK, &flock);
- if (!--call_count)
- break;
+ ret = syncop_writev (priv->children[i], fd, iovec, count,
+ offset, iobref, 0, NULL, NULL);
+ if (ret != iov_length (iovec, count)) {
+ /* write() failed on this sink. unset the corresponding
+ member in sinks[] (which is healed_sinks[] in the
+ caller) so that this server does NOT get considered
+ as successfully healed.
+ */
+ healed_sinks[i] = 0;
}
}
+ if (iovec)
+ GF_FREE (iovec);
+ if (iobref)
+ iobref_unref (iobref);
- return 0;
+ return ret;
}
-
-int
-afr_sh_data_finish (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- gf_log (this->name, GF_LOG_TRACE,
- "finishing data selfheal of %s", local->loc.path);
-
- afr_sh_data_unlock (frame, this);
-
- return 0;
-}
-
-
-int
-afr_sh_data_erase_pending_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xattr)
+static int
+afr_selfheal_data_block (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int source, unsigned char *healed_sinks, off_t offset,
+ size_t size, int type, struct afr_reply *replies)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
+ int ret = -1;
+ int sink_count = 0;
+ afr_private_t *priv = NULL;
+ unsigned char *data_lock = NULL;
- local = frame->local;
- sh = &local->self_heal;
priv = this->private;
+ sink_count = AFR_COUNT (healed_sinks, priv->child_count);
+ data_lock = alloca0 (priv->child_count);
- LOCK (&frame->lock);
+ ret = afr_selfheal_inodelk (frame, this, fd->inode, this->name,
+ offset, size, data_lock);
{
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- afr_sh_data_finish (frame, this);
-
- return 0;
-}
-
-
-int
-afr_sh_data_erase_pending (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int i = 0;
- dict_t **erase_xattr = NULL;
-
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix, sh->success,
- priv->child_count, AFR_DATA_TRANSACTION);
-
- erase_xattr = GF_CALLOC (sizeof (*erase_xattr), priv->child_count,
- gf_afr_mt_dict_t);
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i]) {
- call_count++;
-
- erase_xattr[i] = get_new_dict();
- dict_ref (erase_xattr[i]);
+ if (ret < sink_count) {
+ ret = -ENOTCONN;
+ goto unlock;
}
- }
-
- afr_sh_delta_to_xattr (priv, sh->delta_matrix, erase_xattr,
- priv->child_count, AFR_DATA_TRANSACTION);
- local->call_count = call_count;
- for (i = 0; i < priv->child_count; i++) {
- if (!erase_xattr[i])
- continue;
-
- gf_log (this->name, GF_LOG_TRACE,
- "erasing pending flags from %s on %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_data_erase_pending_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- sh->healing_fd,
- GF_XATTROP_ADD_ARRAY, erase_xattr[i]);
- if (!--call_count)
- break;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (erase_xattr[i]) {
- dict_unref (erase_xattr[i]);
+ if (type == AFR_SELFHEAL_DATA_DIFF &&
+ __afr_can_skip_data_block_heal (frame, this, fd, source,
+ healed_sinks, offset, size,
+ &replies[source].poststat)) {
+ ret = 0;
+ goto unlock;
}
- }
- GF_FREE (erase_xattr);
- return 0;
+ ret = __afr_selfheal_data_read_write (frame, this, fd, source,
+ healed_sinks, offset, size,
+ replies, type);
+ }
+unlock:
+ afr_selfheal_uninodelk (frame, this, fd->inode, this->name,
+ offset, size, data_lock);
+ return ret;
}
-int
-afr_sh_data_trim_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+
+static int
+afr_selfheal_data_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ unsigned char *healed_sinks)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t *sh = NULL;
- int call_count = 0;
- int child_index = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
- priv = this->private;
local = frame->local;
- sh = &local->self_heal;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1)
- gf_log (this->name, GF_LOG_DEBUG,
- "ftruncate of %s on subvolume %s failed (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- else
- gf_log (this->name, GF_LOG_TRACE,
- "ftruncate of %s on subvolume %s completed",
- local->loc.path,
- priv->children[child_index]->name);
- }
- UNLOCK (&frame->lock);
+ priv = this->private;
- call_count = afr_frame_return (frame);
+ if (!priv->ensure_durability)
+ return 0;
- if (call_count == 0) {
- afr_sh_data_erase_pending (frame, this);
- }
+ AFR_ONLIST (healed_sinks, frame, attr_cbk, fsync, fd, 0, NULL);
+ for (i = 0; i < priv->child_count; i++)
+ if (healed_sinks[i] && local->replies[i].op_ret != 0)
+ /* fsync() failed. Do NOT consider this server
+ as successfully healed. Mark it so.
+ */
+ healed_sinks[i] = 0;
return 0;
}
-int
-afr_sh_data_trim_sinks (call_frame_t *frame, xlator_t *this)
+static int
+afr_selfheal_data_restore_time (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, int source,
+ unsigned char *healed_sinks,
+ struct afr_reply *replies)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t *sh = NULL;
- int *sources = NULL;
- int call_count = 0;
- int i = 0;
+ loc_t loc = {0, };
+ loc.inode = inode_ref (inode);
+ gf_uuid_copy (loc.gfid, inode->gfid);
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- sources = sh->sources;
- call_count = sh->active_sinks;
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (sources[i] || !local->child_up[i])
- continue;
-
- STACK_WIND_COOKIE (frame, afr_sh_data_trim_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->ftruncate,
- sh->healing_fd, sh->file_size);
+ AFR_ONLIST (healed_sinks, frame, attr_cbk, setattr, &loc,
+ &replies[source].poststat,
+ (GF_SET_ATTR_ATIME|GF_SET_ATTR_MTIME), NULL);
- if (!--call_count)
- break;
- }
+ loc_wipe (&loc);
return 0;
}
-
-static struct afr_sh_algorithm *
-sh_algo_from_name (xlator_t *this, char *name)
+static int
+afr_data_self_heal_type_get (afr_private_t *priv, unsigned char *healed_sinks,
+ int source, struct afr_reply *replies)
{
+ int type = AFR_SELFHEAL_DATA_FULL;
int i = 0;
- while (afr_self_heal_algorithms[i].name) {
- if (!strcmp (name, afr_self_heal_algorithms[i].name)) {
- return &afr_self_heal_algorithms[i];
+ if (priv->data_self_heal_algorithm == NULL) {
+ type = AFR_SELFHEAL_DATA_FULL;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!healed_sinks[i] && i != source)
+ continue;
+ if (replies[i].poststat.ia_size) {
+ type = AFR_SELFHEAL_DATA_DIFF;
+ break;
+ }
}
-
- i++;
+ } else if (strcmp (priv->data_self_heal_algorithm, "full") == 0) {
+ type = AFR_SELFHEAL_DATA_FULL;
+ } else if (strcmp (priv->data_self_heal_algorithm, "diff") == 0) {
+ type = AFR_SELFHEAL_DATA_DIFF;
}
-
- return NULL;
+ return type;
}
-
static int
-sh_zero_byte_files_exist (afr_self_heal_t *sh, int child_count)
+afr_selfheal_data_do (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int source, unsigned char *healed_sinks,
+ struct afr_reply *replies)
{
- int i;
- int ret = 0;
+ afr_private_t *priv = NULL;
+ off_t off = 0;
+ size_t block = 128 * 1024;
+ int type = AFR_SELFHEAL_DATA_FULL;
+ int ret = -1;
+ call_frame_t *iter_frame = NULL;
+ unsigned char arbiter_sink_status = 0;
- for (i = 0; i < child_count; i++) {
- if (sh->buf[i].ia_size == 0) {
- ret = 1;
- break;
- }
+ priv = this->private;
+ if (priv->arbiter_count) {
+ arbiter_sink_status = healed_sinks[ARBITER_BRICK_INDEX];
+ healed_sinks[ARBITER_BRICK_INDEX] = 0;
}
- return ret;
-}
+ type = afr_data_self_heal_type_get (priv, healed_sinks, source,
+ replies);
-
-struct afr_sh_algorithm *
-afr_sh_data_pick_algo (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t * priv = NULL;
- struct afr_sh_algorithm * algo = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
- algo = sh_algo_from_name (this, priv->data_self_heal_algorithm);
-
- if (algo == NULL) {
- /* option not set, so fall back on heuristics */
-
- if ((local->enoent_count != 0)
- || sh_zero_byte_files_exist (sh, priv->child_count)
- || (sh->file_size <= (priv->data_self_heal_window_size * this->ctx->page_size))) {
-
- /*
- * If the file does not exist on one of the subvolumes,
- * or a zero-byte file exists (created by entry self-heal)
- * the entire content has to be copied anyway, so there
- * is no benefit from using the "diff" algorithm.
- *
- * If the file size is about the same as page size,
- * the entire file can be read and written with a few
- * (pipelined) STACK_WINDs, which will be faster
- * than "diff" which has to read checksums and then
- * read and write.
- */
-
- algo = sh_algo_from_name (this, "full");
-
- } else {
- algo = sh_algo_from_name (this, "diff");
- }
+ iter_frame = afr_copy_frame (frame);
+ if (!iter_frame) {
+ ret = -ENOMEM;
+ goto out;
}
- return algo;
-}
-
-
-int
-afr_sh_data_sync_prepare (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int active_sinks = 0;
- int source = 0;
- int i = 0;
-
- struct afr_sh_algorithm *sh_algo = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ for (off = 0; off < replies[source].poststat.ia_size; off += block) {
+ if (AFR_COUNT (healed_sinks, priv->child_count) == 0) {
+ ret = -ENOTCONN;
+ goto out;
+ }
- source = sh->source;
+ ret = afr_selfheal_data_block (iter_frame, this, fd, source,
+ healed_sinks, off, block, type,
+ replies);
+ if (ret < 0)
+ goto out;
- for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] == 0 && local->child_up[i] == 1) {
- active_sinks++;
- sh->success[i] = 1;
- }
- }
- sh->success[source] = 1;
-
- if (active_sinks == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "no active sinks for performing self-heal on file %s",
- local->loc.path);
- afr_sh_data_finish (frame, this);
- return 0;
+ AFR_STACK_RESET (iter_frame);
+ if (iter_frame->local == NULL) {
+ ret = -ENOTCONN;
+ goto out;
+ }
}
- sh->active_sinks = active_sinks;
- gf_log (this->name, GF_LOG_DEBUG,
- "self-healing file %s from subvolume %s to %d other",
- local->loc.path, priv->children[source]->name, active_sinks);
+ ret = afr_selfheal_data_fsync (frame, this, fd, healed_sinks);
- sh->algo_completion_cbk = afr_sh_data_trim_sinks;
- sh->algo_abort_cbk = afr_sh_data_finish;
+out:
+ if (arbiter_sink_status)
+ healed_sinks[ARBITER_BRICK_INDEX] = arbiter_sink_status;
- sh_algo = afr_sh_data_pick_algo (frame, this);
-
- sh_algo->fn (frame, this);
-
- return 0;
+ if (iter_frame)
+ AFR_STACK_DESTROY (iter_frame);
+ return ret;
}
-int
-afr_sh_data_fix (call_frame_t *frame, xlator_t *this)
+static int
+__afr_selfheal_truncate_sinks (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, unsigned char *healed_sinks,
+ uint64_t size)
{
- afr_local_t *local = NULL;
- afr_local_t * orig_local = NULL;
-
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int nsources = 0;
- int source = 0;
- int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ unsigned char arbiter_sink_status = 0;
+ int i = 0;
local = frame->local;
- sh = &local->self_heal;
priv = this->private;
- afr_sh_build_pending_matrix (priv, sh->pending_matrix, sh->xattr,
- priv->child_count, AFR_DATA_TRANSACTION);
-
- afr_sh_print_pending_matrix (sh->pending_matrix, this);
-
- nsources = afr_sh_mark_sources (sh, priv->child_count,
- AFR_SELF_HEAL_DATA);
-
- afr_sh_supress_errenous_children (sh->sources, sh->child_errno,
- priv->child_count);
-
- if (nsources == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "No self-heal needed for %s",
- local->loc.path);
-
- afr_sh_data_finish (frame, this);
- return 0;
- }
-
- if ((nsources == -1)
- && (priv->favorite_child != -1)
- && (sh->child_errno[priv->favorite_child] == 0)) {
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Picking favorite child %s as authentic source to resolve conflicting data of %s",
- priv->children[priv->favorite_child]->name,
- local->loc.path);
-
- sh->sources[priv->favorite_child] = 1;
-
- nsources = afr_sh_source_count (sh->sources,
- priv->child_count);
- }
-
- if (nsources == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "Unable to self-heal contents of '%s' (possible split-brain). "
- "Please delete the file from all but the preferred "
- "subvolume.", local->loc.path);
-
- local->govinda_gOvinda = 1;
-
- afr_sh_data_finish (frame, this);
- return 0;
- }
-
- source = afr_sh_select_source (sh->sources, priv->child_count);
-
- if (source == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "No active sources found.");
-
- afr_sh_data_finish (frame, this);
- return 0;
+ if (priv->arbiter_count) {
+ arbiter_sink_status = healed_sinks[ARBITER_BRICK_INDEX];
+ healed_sinks[ARBITER_BRICK_INDEX] = 0;
}
- sh->source = source;
- sh->block_size = 65536;
- sh->file_size = sh->buf[source].ia_size;
-
- if (FILE_HAS_HOLES (&sh->buf[source]))
- sh->file_has_holes = 1;
-
- orig_local = sh->orig_frame->local;
- orig_local->cont.lookup.buf.ia_size = sh->buf[source].ia_size;
-
- /* detect changes not visible through pending flags -- JIC */
- for (i = 0; i < priv->child_count; i++) {
- if (i == source || sh->child_errno[i])
- continue;
-
- if (SIZE_DIFFERS (&sh->buf[i], &sh->buf[source]))
- sh->sources[i] = 0;
- }
-
- afr_set_read_child (this, local->loc.inode, sh->source);
-
- /*
- quick-read might have read the file, so send xattr from
- the source subvolume (http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=815)
- */
+ AFR_ONLIST (healed_sinks, frame, attr_cbk, ftruncate, fd, size, NULL);
- dict_unref (orig_local->cont.lookup.xattr);
- if (orig_local->cont.lookup.xattrs)
- orig_local->cont.lookup.xattr = dict_ref (orig_local->cont.lookup.xattrs[sh->source]);
-
- if (sh->background) {
- sh->unwind (sh->orig_frame, this);
- sh->unwound = _gf_true;
- }
-
- afr_sh_data_sync_prepare (frame, this);
+ for (i = 0; i < priv->child_count; i++)
+ if (healed_sinks[i] && local->replies[i].op_ret == -1)
+ /* truncate() failed. Do NOT consider this server
+ as successfully healed. Mark it so.
+ */
+ healed_sinks[i] = 0;
+ if (arbiter_sink_status)
+ healed_sinks[ARBITER_BRICK_INDEX] = arbiter_sink_status;
return 0;
}
-
-int
-afr_self_heal_get_source (xlator_t *this, afr_local_t *local, dict_t **xattr)
+gf_boolean_t
+afr_has_source_witnesses (xlator_t *this, unsigned char *sources,
+ uint64_t *witness)
{
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
-
- int nsources = 0;
- int source = 0;
- int i = 0;
-
- sh = &local->self_heal;
- priv = this->private;
-
- sh->pending_matrix = GF_CALLOC (sizeof (int32_t *), priv->child_count,
- gf_afr_mt_int32_t);
- for (i = 0; i < priv->child_count; i++) {
- sh->pending_matrix[i] = GF_CALLOC (sizeof (int32_t),
- priv->child_count,
- gf_afr_mt_int32_t);
- }
-
- sh->sources = GF_CALLOC (priv->child_count, sizeof (*sh->sources),
- gf_afr_mt_int32_t);
-
- afr_sh_build_pending_matrix (priv, sh->pending_matrix, xattr,
- priv->child_count, AFR_DATA_TRANSACTION);
-
- nsources = afr_sh_mark_sources (sh, priv->child_count,
- AFR_SELF_HEAL_DATA);
+ int i = 0;
+ afr_private_t *priv = NULL;
- source = afr_sh_select_source (sh->sources, priv->child_count);
+ priv = this->private;
- return source;
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i] && witness[i])
+ return _gf_true;
+ }
+ return _gf_false;
}
-
-int
-afr_sh_data_fstat_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iatt *buf)
+static gf_boolean_t
+afr_does_size_mismatch (xlator_t *this, unsigned char *sources,
+ struct afr_reply *replies)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- int call_count = -1;
- int child_index = (long) cookie;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ struct iatt *min = NULL;
+ struct iatt *max = NULL;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ priv = this->private;
- LOCK (&frame->lock);
- {
- if (op_ret != -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "fstat of %s on %s succeeded",
- local->loc.path,
- priv->children[child_index]->name);
-
- sh->buf[child_index] = *buf;
- }
- }
- UNLOCK (&frame->lock);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
- call_count = afr_frame_return (frame);
+ if (replies[i].op_ret < 0)
+ continue;
- if (call_count == 0) {
- afr_sh_data_fix (frame, this);
- }
+ if (!sources[i])
+ continue;
- return 0;
-}
+ if (AFR_IS_ARBITER_BRICK (priv, i) &&
+ (replies[i].poststat.ia_size == 0))
+ continue;
+ if (!min)
+ min = &replies[i].poststat;
-int
-afr_sh_data_fstat (call_frame_t *frame, xlator_t *this)
-{
- afr_self_heal_t *sh = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ if (!max)
+ max = &replies[i].poststat;
- int call_count = 0;
- int i = 0;
+ if (min->ia_size > replies[i].poststat.ia_size)
+ min = &replies[i].poststat;
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- call_count = afr_up_children_count (priv->child_count,
- local->child_up);
-
- local->call_count = call_count;
+ if (max->ia_size < replies[i].poststat.ia_size)
+ max = &replies[i].poststat;
+ }
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_sh_data_fstat_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fstat,
- sh->healing_fd);
-
- if (!--call_count)
- break;
- }
- }
+ if (min && max) {
+ if (min->ia_size != max->ia_size)
+ return _gf_true;
+ }
- return 0;
+ return _gf_false;
}
-
-int
-afr_sh_data_fxattrop_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr)
+static void
+afr_mark_biggest_witness_as_source (xlator_t *this, unsigned char *sources,
+ uint64_t *witness)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- int call_count = -1;
- int child_index = (long) cookie;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (op_ret != -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "fxattrop of %s on %s succeeded",
- local->loc.path,
- priv->children[child_index]->name);
-
- sh->xattr[child_index] = dict_ref (xattr);
- }
- }
- UNLOCK (&frame->lock);
+ int i = 0;
+ afr_private_t *priv = NULL;
+ uint64_t biggest_witness = 0;
- call_count = afr_frame_return (frame);
+ priv = this->private;
+ /* Find source with biggest witness count */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if (biggest_witness < witness[i])
+ biggest_witness = witness[i];
+ }
- if (call_count == 0) {
- afr_sh_data_fstat (frame, this);
- }
+ /* Mark files with less witness count as not source */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if (witness[i] < biggest_witness)
+ sources[i] = 0;
+ }
- return 0;
+ return;
}
-
-int
-afr_sh_data_fxattrop (call_frame_t *frame, xlator_t *this)
+/* This is a tie breaker function. Only one source be assigned here */
+static void
+afr_mark_newest_file_as_source (xlator_t *this, unsigned char *sources,
+ struct afr_reply *replies)
{
- afr_self_heal_t *sh = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- dict_t *xattr_req = NULL;
-
- int32_t zero_pending[3] = {0, 0, 0};
-
- int call_count = 0;
- int i = 0;
- int ret = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int source = -1;
+ uint32_t max_ctime = 0;
- call_count = afr_up_children_count (priv->child_count,
- local->child_up);
+ priv = this->private;
+ /* Find source with latest ctime */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
- local->call_count = call_count;
-
- xattr_req = dict_new();
- if (xattr_req) {
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_static_bin (xattr_req, priv->pending_key[i],
- zero_pending, 3 * sizeof(int32_t));
+ if (max_ctime <= replies[i].poststat.ia_ctime) {
+ source = i;
+ max_ctime = replies[i].poststat.ia_ctime;
}
}
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_sh_data_fxattrop_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- sh->healing_fd, GF_XATTROP_ADD_ARRAY,
- xattr_req);
-
- if (!--call_count)
- break;
- }
- }
-
- if (xattr_req)
- dict_unref (xattr_req);
-
- return 0;
-}
-
-
-int
-afr_sh_data_lock_rec (call_frame_t *frame, xlator_t *this, int child_index);
-
-int
-afr_sh_data_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int child_index = (long) cookie;
-
- /* TODO: what if lock fails? */
-
- local = frame->local;
- sh = &local->self_heal;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- sh->locked_nodes[child_index] = 0;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "locking of %s on child %d failed: %s",
- local->loc.path, child_index,
- strerror (op_errno));
- } else {
- sh->locked_nodes[child_index] = 1;
- sh->lock_count++;
-
- gf_log (this->name, GF_LOG_TRACE,
- "inode of %s on child %d locked",
- local->loc.path, child_index);
- }
- }
- UNLOCK (&frame->lock);
-
- afr_sh_data_lock_rec (frame, this, child_index + 1);
-
- return 0;
+ /* Only mark one of the files as source to break ties */
+ memset (sources, 0, sizeof (*sources) * priv->child_count);
+ sources[source] = 1;
}
-
-int
-afr_sh_data_lock_rec (call_frame_t *frame, xlator_t *this, int child_index)
+static int
+__afr_selfheal_data_finalize_source (call_frame_t *frame, xlator_t *this,
+ inode_t *inode,
+ unsigned char *sources,
+ unsigned char *sinks,
+ unsigned char *healed_sinks,
+ unsigned char *locked_on,
+ struct afr_reply *replies,
+ uint64_t *witness)
{
- struct flock flock;
- int i = 0;
-
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- afr_self_heal_t * sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
+ afr_private_t *priv = NULL;
+ int source = -1;
+ int sources_count = 0;
priv = this->private;
- flock.l_start = 0;
- flock.l_len = 0;
- flock.l_type = F_WRLCK;
+ sources_count = AFR_COUNT (sources, priv->child_count);
- /* skip over children that are down */
- while ((child_index < priv->child_count)
- && !local->child_up[child_index])
- child_index++;
-
- if ((child_index == priv->child_count) &&
- sh->lock_count == 0) {
-
- gf_log (this->name, GF_LOG_DEBUG,
- "unable to lock on even one child");
-
- afr_sh_data_done (frame, this);
- return 0;
+ if ((AFR_CMP (locked_on, healed_sinks, priv->child_count) == 0)
+ || !sources_count) {
+ /* split brain */
+ source = afr_mark_split_brain_source_sinks (frame, this, inode,
+ sources, sinks,
+ healed_sinks,
+ locked_on, replies,
+ AFR_DATA_TRANSACTION);
+ if (source < 0)
+ return -EIO;
+ return source;
}
- if ((child_index == priv->child_count)
- || (sh->lock_count == afr_lock_server_count (priv, AFR_DATA_TRANSACTION))) {
- afr_sh_data_fxattrop (frame, this);
- return 0;
- }
+ /* No split brain at this point. If we were called from
+ * afr_heal_splitbrain_file(), abort.*/
+ if (afr_dict_contains_heal_op(frame))
+ return -EIO;
- gf_log (this->name, GF_LOG_TRACE,
- "locking %s on subvolume %s",
- local->loc.path, priv->children[i]->name);
+ /* If there are no witnesses/size-mismatches on sources we are done*/
+ if (!afr_does_size_mismatch (this, sources, replies) &&
+ !afr_has_source_witnesses (this, sources, witness))
+ goto out;
- STACK_WIND_COOKIE (frame, afr_sh_data_lock_cbk,
- (void *) (long) child_index,
- priv->children[i],
- priv->children[i]->fops->inodelk,
- this->name,
- &local->loc, F_SETLKW, &flock);
+ afr_mark_largest_file_as_source (this, sources, replies);
+ afr_mark_biggest_witness_as_source (this, sources, witness);
+ afr_mark_newest_file_as_source (this, sources, replies);
- return 0;
-}
+out:
+ afr_mark_active_sinks (this, sources, locked_on, healed_sinks);
+ source = afr_choose_source_by_policy (priv, sources,
+ AFR_DATA_TRANSACTION);
+ return source;
+}
+/*
+ * __afr_selfheal_data_prepare:
+ *
+ * This function inspects the on-disk xattrs and determines which subvols
+ * are sources and sinks.
+ *
+ * The return value is the index of the subvolume to be used as the source
+ * for self-healing, or -1 if no healing is necessary/split brain.
+ */
int
-afr_sh_data_lock (call_frame_t *frame, xlator_t *this)
+__afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, unsigned char *locked_on,
+ unsigned char *sources, unsigned char *sinks,
+ unsigned char *healed_sinks,
+ struct afr_reply *replies, gf_boolean_t *pflag)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- afr_self_heal_t * sh = NULL;
-
- int i = 0;
+ int ret = -1;
+ int source = -1;
+ afr_private_t *priv = NULL;
+ uint64_t *witness = NULL;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- if (sh->data_lock_held) {
- /* caller has held the lock already,
- so skip locking */
-
- afr_sh_data_fxattrop (frame, this);
- return 0;
- }
+ priv = this->private;
- for (i = 0; i < priv->child_count; i++)
- sh->locked_nodes[i] = 0;
+ ret = afr_selfheal_unlocked_discover (frame, inode, inode->gfid,
+ replies);
+
+ if (ret)
+ return ret;
+
+ witness = alloca0(priv->child_count * sizeof (*witness));
+ ret = afr_selfheal_find_direction (frame, this, replies,
+ AFR_DATA_TRANSACTION,
+ locked_on, sources, sinks, witness,
+ pflag);
+ if (ret)
+ return ret;
+
+ /* Initialize the healed_sinks[] array optimistically to
+ the intersection of to-be-healed (i.e sinks[]) and
+ the list of servers which are up (i.e locked_on[]).
+ As we encounter failures in the healing process, we
+ will unmark the respective servers in the healed_sinks[]
+ array.
+ */
+ AFR_INTERSECT (healed_sinks, sinks, locked_on, priv->child_count);
+
+ source = __afr_selfheal_data_finalize_source (frame, this, inode,
+ sources, sinks,
+ healed_sinks,
+ locked_on, replies,
+ witness);
+ if (source < 0)
+ return -EIO;
- return afr_sh_data_lock_rec (frame, this, 0);
+ return source;
}
-int
-afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+static int
+__afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ unsigned char *locked_on)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int child_index = 0;
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *data_lock = NULL;
+ unsigned char *healed_sinks = NULL;
+ struct afr_reply *locked_replies = NULL;
+ int source = -1;
+ gf_boolean_t did_sh = _gf_true;
+ gf_boolean_t is_arbiter_the_only_sink = _gf_false;
- local = frame->local;
- sh = &local->self_heal;
priv = this->private;
- child_index = (long) cookie;
+ sources = alloca0 (priv->child_count);
+ sinks = alloca0 (priv->child_count);
+ healed_sinks = alloca0 (priv->child_count);
+ data_lock = alloca0 (priv->child_count);
- /* TODO: some of the open's might fail.
- In that case, modify cleanup fn to send flush on those
- fd's which are already open */
+ locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count);
- LOCK (&frame->lock);
+ ret = afr_selfheal_inodelk (frame, this, fd->inode, this->name, 0, 0,
+ data_lock);
{
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "open of %s failed on child %s (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- sh->op_failed = 1;
+ if (ret < AFR_SH_MIN_PARTICIPANTS) {
+ gf_msg_debug (this->name, 0, "%s: Skipping "
+ "self-heal as only %d number "
+ "of subvolumes "
+ "could be locked",
+ uuid_utoa (fd->inode->gfid),
+ ret);
+ ret = -ENOTCONN;
+ goto unlock;
}
- gf_log (this->name, GF_LOG_TRACE,
- "open of %s succeeded on child %s",
- local->loc.path,
- priv->children[child_index]->name);
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
+ ret = __afr_selfheal_data_prepare (frame, this, fd->inode,
+ data_lock, sources, sinks,
+ healed_sinks,
+ locked_replies,
+ NULL);
+ if (ret < 0)
+ goto unlock;
+
+ if (AFR_COUNT(healed_sinks, priv->child_count) == 0) {
+ did_sh = _gf_false;
+ goto unlock;
+ }
- if (call_count == 0) {
- if (sh->op_failed) {
- afr_sh_data_finish (frame, this);
- return 0;
- }
+ source = ret;
- gf_log (this->name, GF_LOG_TRACE,
- "fd for %s opened, commencing sync",
- local->loc.path);
+ if (AFR_IS_ARBITER_BRICK(priv, source)) {
+ did_sh = _gf_false;
+ goto unlock;
+ }
- afr_sh_data_lock (frame, this);
- }
+ if (priv->arbiter_count &&
+ AFR_COUNT (healed_sinks, priv->child_count) == 1 &&
+ healed_sinks[ARBITER_BRICK_INDEX]) {
+ is_arbiter_the_only_sink = _gf_true;
+ goto restore_time;
+ }
- return 0;
+ ret = __afr_selfheal_truncate_sinks (frame, this, fd, healed_sinks,
+ locked_replies[source].poststat.ia_size);
+ if (ret < 0)
+ goto unlock;
+
+ ret = 0;
+
+ }
+unlock:
+ afr_selfheal_uninodelk (frame, this, fd->inode, this->name, 0, 0,
+ data_lock);
+ if (ret < 0)
+ goto out;
+
+ if (!did_sh)
+ goto out;
+
+ ret = afr_selfheal_data_do (frame, this, fd, source, healed_sinks,
+ locked_replies);
+ if (ret)
+ goto out;
+restore_time:
+ afr_selfheal_data_restore_time (frame, this, fd->inode, source,
+ healed_sinks, locked_replies);
+
+ if (!is_arbiter_the_only_sink) {
+ ret = afr_selfheal_inodelk (frame, this, fd->inode, this->name,
+ 0, 0, data_lock);
+ if (ret < AFR_SH_MIN_PARTICIPANTS) {
+ ret = -ENOTCONN;
+ did_sh = _gf_false;
+ goto skip_undo_pending;
+ }
+ }
+ ret = afr_selfheal_undo_pending (frame, this, fd->inode,
+ sources, sinks, healed_sinks,
+ AFR_DATA_TRANSACTION,
+ locked_replies, data_lock);
+skip_undo_pending:
+ afr_selfheal_uninodelk (frame, this, fd->inode, this->name, 0, 0,
+ data_lock);
+out:
+
+ if (did_sh)
+ afr_log_selfheal (fd->inode->gfid, this, ret, "data", source,
+ sources, healed_sinks);
+ else
+ ret = 1;
+
+ if (locked_replies)
+ afr_replies_wipe (locked_replies, priv->child_count);
+
+ return ret;
}
int
-afr_sh_data_open (call_frame_t *frame, xlator_t *this)
+afr_selfheal_data_open (xlator_t *this, inode_t *inode, fd_t **fd)
{
- int i = 0;
- int call_count = 0;
-
- fd_t *fd = NULL;
-
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- if (sh->healing_fd_opened) {
- /* caller has opened the fd for us already, so skip open */
+ int ret = 0;
+ fd_t *fd_tmp = NULL;
+ loc_t loc = {0,};
- afr_sh_data_lock (frame, this);
- return 0;
- }
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
- local->call_count = call_count;
-
- fd = fd_create (local->loc.inode, frame->root->pid);
- sh->healing_fd = fd;
-
- /* open sinks */
- for (i = 0; i < priv->child_count; i++) {
- if(!local->child_up[i])
- continue;
+ fd_tmp = fd_create (inode, 0);
+ if (!fd_tmp)
+ return -ENOMEM;
- STACK_WIND_COOKIE (frame, afr_sh_data_open_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->open,
- &local->loc,
- O_RDWR|O_LARGEFILE, fd, 0);
+ loc.inode = inode_ref (inode);
+ gf_uuid_copy (loc.gfid, inode->gfid);
- if (!--call_count)
- break;
+ ret = syncop_open (this, &loc, O_RDWR|O_LARGEFILE, fd_tmp, NULL, NULL);
+ if (ret < 0) {
+ fd_unref (fd_tmp);
+ goto out;
+ } else {
+ fd_bind (fd_tmp);
}
- return 0;
+ *fd = fd_tmp;
+out:
+ loc_wipe (&loc);
+ return ret;
}
-
int
-afr_self_heal_data (call_frame_t *frame, xlator_t *this)
+afr_selfheal_data (call_frame_t *frame, xlator_t *this, inode_t *inode)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = this->private;
+ afr_private_t *priv = NULL;
+ unsigned char *locked_on = NULL;
+ int ret = 0;
+ fd_t *fd = NULL;
+ priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
+ ret = afr_selfheal_data_open (this, inode, &fd);
+ if (!fd) {
+ gf_msg_debug (this->name, -ret, "%s: Failed to open",
+ uuid_utoa (inode->gfid));
+ return -EIO;
+ }
- if (sh->need_data_self_heal && priv->data_self_heal) {
- afr_sh_data_open (frame, this);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "not doing data self heal on %s",
- local->loc.path);
- afr_sh_data_done (frame, this);
+ locked_on = alloca0 (priv->child_count);
+
+ ret = afr_selfheal_tie_breaker_inodelk (frame, this, inode,
+ priv->sh_domain, 0, 0,
+ locked_on);
+ {
+ if (ret < AFR_SH_MIN_PARTICIPANTS) {
+ gf_msg_debug (this->name, 0, "%s: Skipping "
+ "self-heal as only %d number of "
+ "subvolumes could be locked",
+ uuid_utoa (fd->inode->gfid),
+ ret);
+ /* Either less than two subvols available, or another
+ selfheal (from another server) is in progress. Skip
+ for now in any case there isn't anything to do.
+ */
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_data (frame, this, fd, locked_on);
}
+unlock:
+ afr_selfheal_uninodelk (frame, this, inode, priv->sh_domain, 0, 0,
+ locked_on);
- return 0;
-}
+ if (fd)
+ fd_unref (fd);
+ return ret;
+}
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index dcb8d0d71df..985cebe76b9 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -1,2555 +1,1099 @@
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#include <libgen.h>
-#include <unistd.h>
-#include <fnmatch.h>
-#include <sys/time.h>
-#include <stdlib.h>
-#include <signal.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "inode.h"
#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
+#include "afr-self-heal.h"
#include "byte-order.h"
-
#include "afr-transaction.h"
-#include "afr-self-heal.h"
-#include "afr-self-heal-common.h"
-
+#include "afr-messages.h"
+#include "syncop-utils.h"
+
+/* Max file name length is 255 this filename is of length 256. No file with
+ * this name can ever come, entry-lock with this name is going to prevent
+ * self-heals from older versions while the granular entry-self-heal is going
+ * on in newer version.*/
+#define LONG_FILENAME "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"\
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"\
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"\
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"\
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-
-int
-afr_sh_entry_done (call_frame_t *frame, xlator_t *this)
+static int
+afr_selfheal_entry_delete (xlator_t *this, inode_t *dir, const char *name,
+ inode_t *inode, int child, struct afr_reply *replies)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
+ afr_private_t *priv = NULL;
+ xlator_t *subvol = NULL;
+ int ret = 0;
+ loc_t loc = {0, };
+ char g[64];
- local = frame->local;
- sh = &local->self_heal;
priv = this->private;
- /*
- TODO: cleanup sh->*
- */
-
- if (sh->healing_fd)
- fd_unref (sh->healing_fd);
- sh->healing_fd = NULL;
-
- for (i = 0; i < priv->child_count; i++) {
- sh->locked_nodes[i] = 0;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "self heal of %s completed",
- local->loc.path);
-
- sh->completion_cbk (frame, this);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_unlck_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
-
- /* TODO: what if lock fails? */
-
- local = frame->local;
- sh = &local->self_heal;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "unlocking inode of %s on child %d failed: %s",
- local->loc.path, child_index,
- strerror (op_errno));
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "unlocked inode of %s on child %d",
- local->loc.path, child_index);
+ subvol = priv->children[child];
+
+ loc.parent = inode_ref (dir);
+ gf_uuid_copy (loc.pargfid, dir->gfid);
+ loc.name = name;
+ loc.inode = inode_ref (inode);
+
+ if (replies[child].valid && replies[child].op_ret == 0) {
+ switch (replies[child].poststat.ia_type) {
+ case IA_IFDIR:
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "expunging dir %s/%s (%s) on %s",
+ uuid_utoa (dir->gfid), name,
+ uuid_utoa_r (replies[child].poststat.ia_gfid, g),
+ subvol->name);
+ ret = syncop_rmdir (subvol, &loc, 1, NULL, NULL);
+ break;
+ default:
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "expunging file %s/%s (%s) on %s",
+ uuid_utoa (dir->gfid), name,
+ uuid_utoa_r (replies[child].poststat.ia_gfid, g),
+ subvol->name);
+ ret = syncop_unlink (subvol, &loc, NULL, NULL);
+ break;
}
}
- UNLOCK (&frame->lock);
- call_count = afr_frame_return (frame);
+ loc_wipe (&loc);
- if (call_count == 0) {
- afr_sh_entry_done (frame, this);
- }
-
- return 0;
+ return ret;
}
int
-afr_sh_entry_unlock (call_frame_t *frame, xlator_t *this)
+afr_selfheal_recreate_entry (xlator_t *this, int dst, int source, inode_t *dir,
+ const char *name, inode_t *inode,
+ struct afr_reply *replies,
+ unsigned char *newentry)
{
- int i = 0;
- int call_count = 0;
-
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- afr_self_heal_t * sh = NULL;
-
-
- local = frame->local;
- sh = &local->self_heal;
+ int ret = 0;
+ loc_t loc = {0,};
+ loc_t srcloc = {0,};
+ afr_private_t *priv = NULL;
+ dict_t *xdata = NULL;
+ struct iatt *iatt = NULL;
+ char *linkname = NULL;
+ mode_t mode = 0;
+ struct iatt newent = {0,};
priv = this->private;
- for (i = 0; i < priv->child_count; i++) {
- if (sh->locked_nodes[i])
- call_count++;
- }
-
- if (call_count == 0) {
- afr_sh_entry_done (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->locked_nodes[i]) {
- gf_log (this->name, GF_LOG_TRACE,
- "unlocking %s on subvolume %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_entry_unlck_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name,
- &local->loc, NULL,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
-
-
-int
-afr_sh_entry_finish (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
+ xdata = dict_new();
+ if (!xdata)
+ return -ENOMEM;
- local = frame->local;
+ loc.parent = inode_ref (dir);
+ gf_uuid_copy (loc.pargfid, dir->gfid);
+ loc.name = name;
+ loc.inode = inode_ref (inode);
- gf_log (this->name, GF_LOG_TRACE,
- "finishing entry selfheal of %s", local->loc.path);
+ ret = afr_selfheal_entry_delete (this, dir, name, inode, dst, replies);
+ if (ret)
+ goto out;
- afr_sh_entry_unlock (frame, this);
+ ret = dict_set_static_bin (xdata, "gfid-req",
+ replies[source].poststat.ia_gfid, 16);
+ if (ret)
+ goto out;
- return 0;
-}
+ iatt = &replies[source].poststat;
+ srcloc.inode = inode_ref (inode);
+ gf_uuid_copy (srcloc.gfid, iatt->ia_gfid);
-int
-afr_sh_entry_erase_pending_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xattr)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
+ mode = st_mode_from_ia (iatt->ia_prot, iatt->ia_type);
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
+ switch (iatt->ia_type) {
+ case IA_IFDIR:
+ ret = syncop_mkdir (priv->children[dst], &loc, mode, 0,
+ xdata, NULL);
+ if (ret == 0)
+ newentry[dst] = 1;
+ break;
+ case IA_IFLNK:
+ ret = syncop_lookup (priv->children[dst], &srcloc, 0, 0, 0, 0);
+ if (ret == 0) {
+ ret = syncop_link (priv->children[dst], &srcloc, &loc,
+ &newent, NULL, NULL);
+ } else {
+ ret = syncop_readlink (priv->children[source], &srcloc,
+ &linkname, 4096, NULL, NULL);
+ if (ret <= 0)
+ goto out;
+ ret = syncop_symlink (priv->children[dst], &loc,
+ linkname, NULL, xdata, NULL);
+ if (ret == 0)
+ newentry[dst] = 1;
+ }
+ break;
+ default:
+ ret = dict_set_int32 (xdata, GLUSTERFS_INTERNAL_FOP_KEY, 1);
+ if (ret)
+ goto out;
+ ret = syncop_mknod (priv->children[dst], &loc, mode,
+ iatt->ia_rdev, &newent, xdata, NULL);
+ if (ret == 0 && newent.ia_nlink == 1) {
+ /* New entry created. Mark @dst pending on all sources */
+ newentry[dst] = 1;
+ }
+ break;
}
- UNLOCK (&frame->lock);
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- afr_sh_entry_finish (frame, this);
-
- return 0;
+out:
+ if (xdata)
+ dict_unref (xdata);
+ GF_FREE (linkname);
+ loc_wipe (&loc);
+ loc_wipe (&srcloc);
+ return ret;
}
-int
-afr_sh_entry_erase_pending (call_frame_t *frame, xlator_t *this)
+static int
+__afr_selfheal_heal_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ char *name, inode_t *inode, int source,
+ unsigned char *sources, unsigned char *healed_sinks,
+ unsigned char *locked_on, struct afr_reply *replies)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int i = 0;
- dict_t **erase_xattr = NULL;
- int need_unwind = 0;
-
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ unsigned char *newentry = NULL;
- local = frame->local;
- sh = &local->self_heal;
priv = this->private;
- afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix, sh->success,
- priv->child_count, AFR_ENTRY_TRANSACTION);
+ newentry = alloca0 (priv->child_count);
- erase_xattr = GF_CALLOC (sizeof (*erase_xattr), priv->child_count,
- gf_afr_mt_dict_t);
+ if (!replies[source].valid)
+ return -EIO;
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i]) {
- call_count++;
-
- erase_xattr[i] = get_new_dict();
- dict_ref (erase_xattr[i]);
- }
- }
+ /* Skip healing this entry if the last lookup on it failed for reasons
+ * other than ENOENT.
+ */
+ if ((replies[source].op_ret < 0) &&
+ (replies[source].op_errno != ENOENT))
+ return -replies[source].op_errno;
- if (call_count == 0)
- need_unwind = 1;
-
- afr_sh_delta_to_xattr (priv, sh->delta_matrix, erase_xattr,
- priv->child_count, AFR_ENTRY_TRANSACTION);
-
- local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (!erase_xattr[i])
+ if (!healed_sinks[i])
continue;
-
- gf_log (this->name, GF_LOG_TRACE,
- "erasing pending flags from %s on %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_entry_erase_pending_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->loc,
- GF_XATTROP_ADD_ARRAY, erase_xattr[i]);
- if (!--call_count)
+ if (replies[source].op_ret == -1 &&
+ replies[source].op_errno == ENOENT) {
+ ret = afr_selfheal_entry_delete (this, fd->inode, name,
+ inode, i, replies);
+ } else {
+ if (!gf_uuid_compare (replies[i].poststat.ia_gfid,
+ replies[source].poststat.ia_gfid))
+ continue;
+
+ ret = afr_selfheal_recreate_entry (this, i, source,
+ fd->inode, name, inode,
+ replies, newentry);
+ }
+ if (ret < 0)
break;
}
- for (i = 0; i < priv->child_count; i++) {
- if (erase_xattr[i]) {
- dict_unref (erase_xattr[i]);
- }
- }
- GF_FREE (erase_xattr);
-
- if (need_unwind)
- afr_sh_entry_finish (frame, this);
-
- return 0;
+ if (AFR_COUNT (newentry, priv->child_count))
+ afr_selfheal_newentry_mark (frame, this, inode, source, replies,
+ sources, newentry);
+ return ret;
}
-
-
static int
-next_active_source (call_frame_t *frame, xlator_t *this,
- int current_active_source)
+afr_selfheal_detect_gfid_and_type_mismatch (xlator_t *this,
+ struct afr_reply *replies,
+ uuid_t pargfid, char *bname,
+ int src_idx)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int source = -1;
- int next_active_source = -1;
- int i = 0;
+ int i = 0;
+ char g1[64] = {0,};
+ char g2[64] = {0,};
+ afr_private_t *priv = NULL;
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- source = sh->source;
-
- if (source != -1) {
- if (current_active_source != source)
- next_active_source = source;
- goto out;
- }
+ priv = this->private;
- /*
- the next active sink becomes the source for the
- 'conservative decision' of merging all entries
- */
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == src_idx)
+ continue;
+
+ if (!replies[i].valid)
+ continue;
+
+ if (replies[i].op_ret != 0)
+ continue;
+
+ if (gf_uuid_compare (replies[src_idx].poststat.ia_gfid,
+ replies[i].poststat.ia_gfid)) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_SPLIT_BRAIN, "Gfid mismatch "
+ "detected for <%s/%s>, %s on %s and %s on %s. "
+ "Skipping conservative merge on the file.",
+ uuid_utoa (pargfid), bname,
+ uuid_utoa_r (replies[i].poststat.ia_gfid, g1),
+ priv->children[i]->name,
+ uuid_utoa_r (replies[src_idx].poststat.ia_gfid,
+ g2), priv->children[src_idx]->name);
+ return -1;
+ }
- for (i = 0; i < priv->child_count; i++) {
- if ((sh->sources[i] == 0)
- && (local->child_up[i] == 1)
- && (i > current_active_source)) {
+ if ((replies[src_idx].poststat.ia_type) !=
+ (replies[i].poststat.ia_type)) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_SPLIT_BRAIN, "Type mismatch "
+ "detected for <%s/%s>, %d on %s and %d on %s. "
+ "Skipping conservative merge on the file.",
+ uuid_utoa (pargfid), bname,
+ replies[i].poststat.ia_type,
+ priv->children[i]->name,
+ replies[src_idx].poststat.ia_type,
+ priv->children[src_idx]->name);
+ return -1;
+ }
+ }
- next_active_source = i;
- break;
- }
- }
-out:
- return next_active_source;
+ return 0;
}
-
-
static int
-next_active_sink (call_frame_t *frame, xlator_t *this,
- int current_active_sink)
+__afr_selfheal_merge_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ char *name, inode_t *inode, unsigned char *sources,
+ unsigned char *healed_sinks, unsigned char *locked_on,
+ struct afr_reply *replies)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int next_active_sink = -1;
- int i = 0;
+ int ret = 0;
+ int i = 0;
+ int source = -1;
+ unsigned char *newentry = NULL;
+ afr_private_t *priv = NULL;
priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
- /*
- the next active sink becomes the source for the
- 'conservative decision' of merging all entries
- */
+ newentry = alloca0 (priv->child_count);
for (i = 0; i < priv->child_count; i++) {
- if ((sh->sources[i] == 0)
- && (local->child_up[i] == 1)
- && (i > current_active_sink)) {
-
- next_active_sink = i;
+ if (replies[i].valid && replies[i].op_ret == 0) {
+ source = i;
break;
}
}
- return next_active_sink;
-}
-
-
-int
-build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name)
-{
- int ret = -1;
-
- if (!child) {
- goto out;
- }
-
- if (strcmp (parent->path, "/") == 0)
- ret = gf_asprintf ((char **)&child->path, "/%s", name);
- else
- ret = gf_asprintf ((char **)&child->path, "%s/%s",
- parent->path, name);
-
- if (-1 == ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "asprintf failed while setting child path");
- }
-
- if (!child->path) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
-
- child->name = strrchr (child->path, '/');
- if (child->name)
- child->name++;
-
- child->parent = inode_ref (parent->inode);
- child->inode = inode_new (parent->inode->table);
-
- if (!child->inode) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
+ if (source == -1) {
+ /* entry got deleted in the mean time? */
+ return 0;
}
- ret = 0;
-out:
- if (ret == -1)
- loc_wipe (child);
-
- return ret;
-}
-
-
-int
-afr_sh_entry_expunge_all (call_frame_t *frame, xlator_t *this);
-
-int
-afr_sh_entry_expunge_subvol (call_frame_t *frame, xlator_t *this,
- int active_src);
-
-int
-afr_sh_entry_expunge_entry_done (call_frame_t *frame, xlator_t *this,
- int active_src)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int call_count = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- LOCK (&frame->lock);
- {
+ /* Set all the sources as 1, otheriwse newentry_mark won't be set */
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].valid && replies[i].op_ret == 0) {
+ sources[i] = 1;
+ }
}
- UNLOCK (&frame->lock);
- call_count = afr_frame_return (frame);
+ /* In case of a gfid or type mismatch on the entry, return -1.*/
+ ret = afr_selfheal_detect_gfid_and_type_mismatch (this, replies,
+ fd->inode->gfid,
+ name, source);
- if (call_count == 0)
- afr_sh_entry_expunge_subvol (frame, this, active_src);
+ if (ret < 0)
+ return ret;
- return 0;
-}
-
-int
-afr_sh_entry_expunge_parent_setattr_cbk (call_frame_t *expunge_frame,
- void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
-{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- call_frame_t *frame = NULL;
-
- int active_src = (long) cookie;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
- expunge_sh = &expunge_local->self_heal;
- frame = expunge_sh->sh_frame;
-
- if (op_ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "setattr on parent directory of %s on subvolume %s failed: %s",
- expunge_local->loc.path,
- priv->children[active_src]->name, strerror (op_errno));
- }
-
- AFR_STACK_DESTROY (expunge_frame);
- afr_sh_entry_expunge_entry_done (frame, this, active_src);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_expunge_rename_cbk (call_frame_t *expunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *buf,
- struct iatt *preoldparent,
- struct iatt *postoldparent,
- struct iatt *prenewparent,
- struct iatt *postnewparent)
-{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- int active_src = 0;
- call_frame_t *frame = NULL;
-
- int32_t valid = 0;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
- expunge_sh = &expunge_local->self_heal;
- frame = expunge_sh->sh_frame;
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == source || !healed_sinks[i])
+ continue;
- active_src = (long) cookie;
+ if (replies[i].op_errno != ENOENT)
+ continue;
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "removed %s on %s",
- expunge_local->loc.path,
- priv->children[active_src]->name);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "removing %s on %s failed (%s)",
- expunge_local->loc.path,
- priv->children[active_src]->name,
- strerror (op_errno));
+ ret = afr_selfheal_recreate_entry (this, i, source, fd->inode,
+ name, inode, replies,
+ newentry);
}
- valid = GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
- afr_build_parent_loc (&expunge_sh->parent_loc, &expunge_local->loc);
-
- STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_parent_setattr_cbk,
- (void *) (long) active_src,
- priv->children[active_src],
- priv->children[active_src]->fops->setattr,
- &expunge_sh->parent_loc,
- &expunge_sh->parentbuf,
- valid);
-
- return 0;
-}
-
-
-static void
-init_trash_loc (loc_t *trash_loc, inode_table_t *table)
-{
- trash_loc->path = gf_strdup ("/" GF_REPLICATE_TRASH_DIR);
- trash_loc->name = GF_REPLICATE_TRASH_DIR;
- trash_loc->parent = table->root;
- trash_loc->inode = inode_new (table);
-}
-
-
-char *
-make_trash_path (const char *path)
-{
- char *c = NULL;
- char *tp = NULL;
-
- tp = GF_CALLOC (strlen ("/" GF_REPLICATE_TRASH_DIR) + strlen (path) + 1,
- sizeof (char), gf_afr_mt_char);
-
- strcpy (tp, GF_REPLICATE_TRASH_DIR);
- strcat (tp, path);
-
- c = strchr (tp, '/') + 1;
- while (*c++)
- if (*c == '/')
- *c = '-';
-
- return tp;
+ if (AFR_COUNT (newentry, priv->child_count))
+ afr_selfheal_newentry_mark (frame, this, inode, source, replies,
+ sources, newentry);
+ return ret;
}
-int
-afr_sh_entry_expunge_rename (call_frame_t *expunge_frame, xlator_t *this,
- int active_src, inode_t *trash_inode)
+static int
+__afr_selfheal_entry_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ char *name, inode_t *inode, int source,
+ unsigned char *sources, unsigned char *healed_sinks,
+ unsigned char *locked_on,
+ struct afr_reply *replies)
{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
-
- loc_t rename_loc;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
-
- rename_loc.inode = inode_ref (expunge_local->loc.inode);
- rename_loc.path = make_trash_path (expunge_local->loc.path);
- rename_loc.name = strrchr (rename_loc.path, '/') + 1;
- rename_loc.parent = trash_inode;
-
- gf_log (this->name, GF_LOG_TRACE,
- "moving file/directory %s on %s to %s",
- expunge_local->loc.path, priv->children[active_src]->name,
- rename_loc.path);
-
- STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_rename_cbk,
- (void *) (long) active_src,
- priv->children[active_src],
- priv->children[active_src]->fops->rename,
- &expunge_local->loc, &rename_loc);
+ int ret = -1;
- loc_wipe (&rename_loc);
-
- return 0;
+ if (source < 0)
+ ret = __afr_selfheal_merge_dirent (frame, this, fd, name, inode,
+ sources, healed_sinks,
+ locked_on, replies);
+ else
+ ret = __afr_selfheal_heal_dirent (frame, this, fd, name, inode,
+ source, sources, healed_sinks,
+ locked_on, replies);
+ return ret;
}
-
-int
-afr_sh_entry_expunge_mkdir_cbk (call_frame_t *expunge_frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+static gf_boolean_t
+is_full_heal_marker_present (xlator_t *this, dict_t *xdata, int idx)
{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- call_frame_t *frame = NULL;
+ int i = 0;
+ int pending[3] = {0,};
+ void *pending_raw = NULL;
+ afr_private_t *priv = NULL;
- int active_src = (long) cookie;
+ priv = this->private;
- inode_t *trash_inode = NULL;
+ if (!xdata)
+ return _gf_false;
- priv = this->private;
- expunge_local = expunge_frame->local;
- expunge_sh = &expunge_local->self_heal;
- frame = expunge_sh->sh_frame;
+ /* Iterate over each of the priv->pending_keys[] elements and then
+ * see if any of them have data segment non-zero. If they do, return
+ * true. Else return false.
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if (dict_get_ptr (xdata, priv->pending_key[i], &pending_raw))
+ continue;
- if (op_ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "mkdir of /" GF_REPLICATE_TRASH_DIR " failed on %s",
- priv->children[active_src]->name);
+ if (!pending_raw)
+ continue;
- goto out;
+ memcpy (pending, pending_raw, sizeof (pending));
+ if (ntoh32 (pending[idx]))
+ return _gf_true;
}
- /* mkdir successful */
-
- trash_inode = inode_link (inode, expunge_local->loc.inode->table->root,
- GF_REPLICATE_TRASH_DIR, buf);
-
- afr_sh_entry_expunge_rename (expunge_frame, this, active_src,
- trash_inode);
- return 0;
-out:
- AFR_STACK_DESTROY (expunge_frame);
- afr_sh_entry_expunge_entry_done (frame, this, active_src);
- return 0;
+ return _gf_false;
}
-
-int
-afr_sh_entry_expunge_lookup_trash_cbk (call_frame_t *expunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf,
- dict_t *xattr, struct iatt *postparent)
+static gf_boolean_t
+afr_need_full_heal (xlator_t *this, struct afr_reply *replies, int source,
+ unsigned char *healed_sinks, afr_transaction_type type)
{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- call_frame_t *frame = NULL;
-
- int active_src = (long) cookie;
-
- inode_t *trash_inode;
- loc_t trash_loc;
+ int i = 0;
+ int idx = 0;
+ afr_private_t *priv = NULL;
- priv = this->private;
- expunge_local = expunge_frame->local;
- expunge_sh = &expunge_local->self_heal;
- frame = expunge_sh->sh_frame;
+ priv = this->private;
- if ((op_ret != 0) && (op_errno == ENOENT)) {
- init_trash_loc (&trash_loc, expunge_local->loc.inode->table);
+ if (!priv->esh_granular)
+ return _gf_true;
- gf_log (this->name, GF_LOG_TRACE,
- "creating directory " GF_REPLICATE_TRASH_DIR " on subvolume %s",
- priv->children[active_src]->name);
+ if (type != AFR_ENTRY_TRANSACTION)
+ return _gf_true;
- STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_mkdir_cbk,
- (void *) (long) active_src,
- priv->children[active_src],
- priv->children[active_src]->fops->mkdir,
- &trash_loc, 0777);
+ priv = this->private;
+ idx = afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
- loc_wipe (&trash_loc);
- return 0;
- }
+ /* If there is a clear source, check whether the full-heal-indicator
+ * is present in its xdata. Otherwise, we need to examine all the
+ * participating bricks and then figure if *even* one of them has a
+ * full-heal-indicator.
+ */
- if (op_ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "lookup of /" GF_REPLICATE_TRASH_DIR " failed on %s",
- priv->children[active_src]->name);
- goto out;
+ if (source != -1) {
+ if (is_full_heal_marker_present (this, replies[source].xdata,
+ idx))
+ return _gf_true;
}
- /* lookup successful */
-
- trash_inode = inode_link (inode, expunge_local->loc.inode->table->root,
- GF_REPLICATE_TRASH_DIR, buf);
-
- afr_sh_entry_expunge_rename (expunge_frame, this, active_src,
- trash_inode);
- return 0;
-out:
- AFR_STACK_DESTROY (expunge_frame);
- afr_sh_entry_expunge_entry_done (frame, this, active_src);
- return 0;
-}
-
-
-int
-afr_sh_entry_expunge_lookup_trash (call_frame_t *expunge_frame, xlator_t *this,
- int active_src)
-{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
-
- inode_t *root = NULL;
- inode_t *trash = NULL;
- loc_t trash_loc;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
-
- root = expunge_local->loc.inode->table->root;
+ /* else ..*/
- trash = inode_grep (root->table, root, GF_REPLICATE_TRASH_DIR);
-
- if (trash) {
- /* inode is in cache, so no need to mkdir */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!healed_sinks[i])
+ continue;
- afr_sh_entry_expunge_rename (expunge_frame, this, active_src,
- trash);
- return 0;
+ if (is_full_heal_marker_present (this, replies[i].xdata, idx))
+ return _gf_true;
}
- /* Not in cache, so look it up */
-
- init_trash_loc (&trash_loc, expunge_local->loc.inode->table);
-
- gf_log (this->name, GF_LOG_TRACE,
- "looking up /" GF_REPLICATE_TRASH_DIR " on %s",
- priv->children[active_src]->name);
-
- STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_lookup_trash_cbk,
- (void *) (long) active_src,
- priv->children[active_src],
- priv->children[active_src]->fops->lookup,
- &trash_loc, NULL);
-
- loc_wipe (&trash_loc);
-
- return 0;
+ return _gf_false;
}
-
-int
-afr_sh_entry_expunge_remove (call_frame_t *expunge_frame, xlator_t *this,
- int active_src, struct iatt *buf)
+static int
+__afr_selfheal_entry_finalize_source (xlator_t *this, unsigned char *sources,
+ unsigned char *healed_sinks,
+ unsigned char *locked_on,
+ struct afr_reply *replies,
+ uint64_t *witness)
{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- int source = 0;
- call_frame_t *frame = NULL;
- int type = 0;
+ afr_private_t *priv = NULL;
+ int source = -1;
+ int sources_count = 0;
priv = this->private;
- expunge_local = expunge_frame->local;
- expunge_sh = &expunge_local->self_heal;
- frame = expunge_sh->sh_frame;
- source = expunge_sh->source;
-
- type = buf->ia_type;
-
- switch (type) {
- case IA_IFSOCK:
- case IA_IFREG:
- case IA_IFBLK:
- case IA_IFCHR:
- case IA_IFIFO:
- case IA_IFLNK:
- case IA_IFDIR:
- afr_sh_entry_expunge_lookup_trash (expunge_frame, this, active_src);
- break;
- default:
- gf_log (this->name, GF_LOG_ERROR,
- "%s has unknown file type on %s: 0%o",
- expunge_local->loc.path,
- priv->children[source]->name, type);
- goto out;
- break;
- }
-
- return 0;
-out:
- AFR_STACK_DESTROY (expunge_frame);
- afr_sh_entry_expunge_entry_done (frame, this, active_src);
-
- return 0;
-}
+ sources_count = AFR_COUNT (sources, priv->child_count);
-int
-afr_sh_entry_expunge_lookup_cbk (call_frame_t *expunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *x,
- struct iatt *postparent)
-{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- call_frame_t *frame = NULL;
- int active_src = 0;
+ if ((AFR_CMP (locked_on, healed_sinks, priv->child_count) == 0)
+ || !sources_count || afr_does_witness_exist (this, witness)) {
- priv = this->private;
- expunge_local = expunge_frame->local;
- expunge_sh = &expunge_local->self_heal;
- frame = expunge_sh->sh_frame;
- active_src = (long) cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "lookup of %s on %s failed (%s)",
- expunge_local->loc.path,
- priv->children[active_src]->name,
- strerror (op_errno));
- goto out;
+ memset (sources, 0, sizeof (*sources) * priv->child_count);
+ afr_mark_active_sinks (this, sources, locked_on, healed_sinks);
+ return -1;
}
- afr_sh_entry_expunge_remove (expunge_frame, this, active_src, buf);
-
- return 0;
-out:
- AFR_STACK_DESTROY (expunge_frame);
- afr_sh_entry_expunge_entry_done (frame, this, active_src);
-
- return 0;
+ source = afr_choose_source_by_policy (priv, sources,
+ AFR_ENTRY_TRANSACTION);
+ return source;
}
-
int
-afr_sh_entry_expunge_purge (call_frame_t *expunge_frame, xlator_t *this,
- int active_src)
+__afr_selfheal_entry_prepare (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, unsigned char *locked_on,
+ unsigned char *sources, unsigned char *sinks,
+ unsigned char *healed_sinks,
+ struct afr_reply *replies, int *source_p,
+ gf_boolean_t *pflag)
{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
-
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s on %s",
- expunge_local->loc.path, priv->children[active_src]->name);
-
- STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_lookup_cbk,
- (void *) (long) active_src,
- priv->children[active_src],
- priv->children[active_src]->fops->lookup,
- &expunge_local->loc, 0);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_expunge_entry_cbk (call_frame_t *expunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *x,
- struct iatt *postparent)
-{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- int source = 0;
- call_frame_t *frame = NULL;
- int active_src = 0;
-
+ int ret = -1;
+ int source = -1;
+ afr_private_t *priv = NULL;
+ uint64_t *witness = NULL;
priv = this->private;
- expunge_local = expunge_frame->local;
- expunge_sh = &expunge_local->self_heal;
- frame = expunge_sh->sh_frame;
- active_src = expunge_sh->active_source;
- source = (long) cookie;
- if (op_ret == -1 && op_errno == ENOENT) {
+ ret = afr_selfheal_unlocked_discover (frame, inode, inode->gfid,
+ replies);
+ if (ret)
+ return ret;
- gf_log (this->name, GF_LOG_TRACE,
- "missing entry %s on %s",
- expunge_local->loc.path,
- priv->children[source]->name);
+ witness = alloca0 (sizeof (*witness) * priv->child_count);
+ ret = afr_selfheal_find_direction (frame, this, replies,
+ AFR_ENTRY_TRANSACTION,
+ locked_on, sources, sinks, witness,
+ pflag);
+ if (ret)
+ return ret;
- expunge_sh->parentbuf = *postparent;
+ /* Initialize the healed_sinks[] array optimistically to
+ the intersection of to-be-healed (i.e sinks[]) and
+ the list of servers which are up (i.e locked_on[]).
- afr_sh_entry_expunge_purge (expunge_frame, this, active_src);
+ As we encounter failures in the healing process, we
+ will unmark the respective servers in the healed_sinks[]
+ array.
+ */
+ AFR_INTERSECT (healed_sinks, sinks, locked_on, priv->child_count);
- return 0;
- }
+ source = __afr_selfheal_entry_finalize_source (this, sources,
+ healed_sinks,
+ locked_on, replies,
+ witness);
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "%s exists under %s",
- expunge_local->loc.path,
- priv->children[source]->name);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s under %s failed (%s)",
- expunge_local->loc.path,
- priv->children[source]->name,
- strerror (op_errno));
+ if (source < 0) {
+ /* If source is < 0 (typically split-brain), we perform a
+ conservative merge of entries rather than erroring out */
}
+ *source_p = source;
- AFR_STACK_DESTROY (expunge_frame);
- afr_sh_entry_expunge_entry_done (frame, this, active_src);
-
- return 0;
+ return ret;
}
-
-int
-afr_sh_entry_expunge_entry (call_frame_t *frame, xlator_t *this,
- char *name)
+static int
+afr_selfheal_entry_dirent (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, char *name, inode_t *parent_idx_inode,
+ xlator_t *subvol, gf_boolean_t full_crawl)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int ret = -1;
- call_frame_t *expunge_frame = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- int active_src = 0;
- int source = 0;
- int op_errno = 0;
+ int ret = 0;
+ int source = -1;
+ unsigned char *locked_on = NULL;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *healed_sinks = NULL;
+ inode_t *inode = NULL;
+ struct afr_reply *replies = NULL;
+ struct afr_reply *par_replies = NULL;
+ afr_private_t *priv = NULL;
priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- active_src = sh->active_source;
- source = sh->source;
-
- if ((strcmp (name, ".") == 0)
- || (strcmp (name, "..") == 0)
- || ((strcmp (local->loc.path, "/") == 0)
- && (strcmp (name, GF_REPLICATE_TRASH_DIR) == 0))) {
-
- gf_log (this->name, GF_LOG_TRACE,
- "skipping inspection of %s under %s",
- name, local->loc.path);
- goto out;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "inspecting existance of %s under %s",
- name, local->loc.path);
-
- expunge_frame = copy_frame (frame);
- if (!expunge_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
-
- ALLOC_OR_GOTO (expunge_local, afr_local_t, out);
- expunge_frame->local = expunge_local;
- expunge_sh = &expunge_local->self_heal;
- expunge_sh->sh_frame = frame;
- expunge_sh->active_source = active_src;
+ sources = alloca0 (priv->child_count);
+ sinks = alloca0 (priv->child_count);
+ healed_sinks = alloca0 (priv->child_count);
+ locked_on = alloca0 (priv->child_count);
- ret = build_child_loc (this, &expunge_local->loc, &local->loc, name);
- if (ret != 0) {
- goto out;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s on %s", expunge_local->loc.path,
- priv->children[source]->name);
-
- STACK_WIND_COOKIE (expunge_frame,
- afr_sh_entry_expunge_entry_cbk,
- (void *) (long) source,
- priv->children[source],
- priv->children[source]->fops->lookup,
- &expunge_local->loc, 0);
-
- ret = 0;
-out:
- if (ret == -1)
- afr_sh_entry_expunge_entry_done (frame, this, active_src);
+ replies = alloca0 (priv->child_count * sizeof(*replies));
+ par_replies = alloca0 (priv->child_count * sizeof(*par_replies));
- return 0;
-}
-
-
-int
-afr_sh_entry_expunge_readdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- gf_dirent_t *entry = NULL;
- off_t last_offset = 0;
- int active_src = 0;
- int entry_count = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- active_src = sh->active_source;
-
- if (op_ret <= 0) {
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "readdir of %s on subvolume %s failed (%s)",
- local->loc.path,
- priv->children[active_src]->name,
- strerror (op_errno));
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "readdir of %s on subvolume %s complete",
- local->loc.path,
- priv->children[active_src]->name);
+ ret = afr_selfheal_entrylk (frame, this, fd->inode, this->name, NULL,
+ locked_on);
+ {
+ if (ret < AFR_SH_MIN_PARTICIPANTS) {
+ gf_msg_debug (this->name, 0, "%s: Skipping "
+ "entry self-heal as only %d sub-volumes "
+ " could be locked in %s domain",
+ uuid_utoa (fd->inode->gfid),
+ ret, this->name);
+ ret = -ENOTCONN;
+ goto unlock;
}
- afr_sh_entry_expunge_all (frame, this);
- return 0;
- }
-
- list_for_each_entry (entry, &entries->list, list) {
- last_offset = entry->d_off;
- entry_count++;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "readdir'ed %d entries from %s",
- entry_count, priv->children[active_src]->name);
-
- sh->offset = last_offset;
- local->call_count = entry_count;
+ ret = __afr_selfheal_entry_prepare (frame, this, fd->inode,
+ locked_on,
+ sources, sinks,
+ healed_sinks, par_replies,
+ &source, NULL);
+ if (ret < 0)
+ goto unlock;
+
+ inode = afr_selfheal_unlocked_lookup_on (frame, fd->inode, name,
+ replies, locked_on,
+ NULL);
+ if (!inode) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
- list_for_each_entry (entry, &entries->list, list) {
- afr_sh_entry_expunge_entry (frame, this, entry->d_name);
+ ret = __afr_selfheal_entry_dirent (frame, this, fd, name, inode,
+ source, sources, healed_sinks,
+ locked_on, replies);
+
+ if ((ret == 0) && (priv->esh_granular) && parent_idx_inode) {
+ ret = afr_shd_index_purge (subvol, parent_idx_inode,
+ name, inode->ia_type);
+ /* Why is ret force-set to 0? We do not care about
+ * index purge failing for full heal as it is quite
+ * possible during replace-brick that not all files
+ * and directories have their name indices present in
+ * entry-changes/.
+ */
+ ret = 0;
+ }
}
- return 0;
-}
+unlock:
+ afr_selfheal_unentrylk (frame, this, fd->inode, this->name, NULL,
+ locked_on, NULL);
+ if (inode)
+ inode_unref (inode);
+ if (replies)
+ afr_replies_wipe (replies, priv->child_count);
+ if (par_replies)
+ afr_replies_wipe (par_replies, priv->child_count);
-int
-afr_sh_entry_expunge_subvol (call_frame_t *frame, xlator_t *this,
- int active_src)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- STACK_WIND (frame, afr_sh_entry_expunge_readdir_cbk,
- priv->children[active_src],
- priv->children[active_src]->fops->readdir,
- sh->healing_fd, sh->block_size, sh->offset);
-
- return 0;
+ return ret;
}
-int
-afr_sh_entry_expunge_all (call_frame_t *frame, xlator_t *this)
+static inode_t *
+afr_shd_entry_changes_index_inode (xlator_t *this, xlator_t *subvol,
+ uuid_t pargfid)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int active_src = -1;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- sh->offset = 0;
-
- if (sh->source == -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "no active sources for %s to expunge entries",
- local->loc.path);
- goto out;
- }
+ int ret = -1;
+ void *index_gfid = NULL;
+ loc_t rootloc = {0,};
+ loc_t loc = {0,};
+ dict_t *xattr = NULL;
+ inode_t *inode = NULL;
+ struct iatt iatt = {0,};
+
+ rootloc.inode = inode_ref (this->itable->root);
+ gf_uuid_copy (rootloc.gfid, rootloc.inode->gfid);
+
+ ret = syncop_getxattr (subvol, &rootloc, &xattr,
+ GF_XATTROP_ENTRY_CHANGES_GFID, NULL, NULL);
+ if (ret || !xattr) {
+ errno = -ret;
+ goto out;
+ }
- active_src = next_active_sink (frame, this, sh->active_source);
- sh->active_source = active_src;
+ ret = dict_get_ptr (xattr, GF_XATTROP_ENTRY_CHANGES_GFID, &index_gfid);
+ if (ret) {
+ errno = EINVAL;
+ goto out;
+ }
- if (sh->op_failed) {
- goto out;
- }
+ loc.inode = inode_new (this->itable);
+ if (!loc.inode) {
+ errno = ENOMEM;
+ goto out;
+ }
- if (active_src == -1) {
- /* completed creating missing files on all subvolumes */
- goto out;
- }
+ gf_uuid_copy (loc.pargfid, index_gfid);
+ loc.name = gf_strdup (uuid_utoa (pargfid));
- gf_log (this->name, GF_LOG_TRACE,
- "expunging entries of %s on %s to other sinks",
- local->loc.path, priv->children[active_src]->name);
+ ret = syncop_lookup (subvol, &loc, &iatt, NULL, NULL, NULL);
+ if (ret < 0) {
+ errno = -ret;
+ goto out;
+ }
- afr_sh_entry_expunge_subvol (frame, this, active_src);
+ inode = inode_link (loc.inode, NULL, NULL, &iatt);
- return 0;
out:
- afr_sh_entry_erase_pending (frame, this);
- return 0;
+ if (xattr)
+ dict_unref (xattr);
+ loc_wipe (&rootloc);
+ GF_FREE ((char *)loc.name);
+ loc_wipe (&loc);
+ return inode;
}
-
-int
-afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this);
-
-int
-afr_sh_entry_impunge_subvol (call_frame_t *frame, xlator_t *this,
- int active_src);
-
-int
-afr_sh_entry_impunge_entry_done (call_frame_t *frame, xlator_t *this,
- int active_src)
+static int
+afr_selfheal_entry_do_subvol (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int child)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int call_count = 0;
+ int ret = 0;
+ gf_dirent_t entries;
+ gf_dirent_t *entry = NULL;
+ off_t offset = 0;
+ call_frame_t *iter_frame = NULL;
+ xlator_t *subvol = NULL;
+ afr_private_t *priv = NULL;
+ gf_boolean_t mismatch = _gf_false;
+ afr_local_t *iter_local = NULL;
+ afr_local_t *local = NULL;
+ loc_t loc = {0,};
priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- LOCK (&frame->lock);
- {
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
+ subvol = priv->children[child];
- if (call_count == 0)
- afr_sh_entry_impunge_subvol (frame, this, active_src);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_impunge_setattr_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
-{
- int call_count = 0;
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- call_frame_t *frame = NULL;
- int active_src = 0;
- int child_index = 0;
+ INIT_LIST_HEAD (&entries.list);
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- frame = impunge_sh->sh_frame;
local = frame->local;
- sh = &local->self_heal;
- active_src = sh->active_source;
- child_index = (long) cookie;
-
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "setattr done for %s on %s",
- impunge_local->loc.path,
- priv->children[child_index]->name);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "setattr (%s) on %s failed (%s)",
- impunge_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- }
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
+ iter_frame = afr_copy_frame (frame);
+ if (!iter_frame)
+ return -ENOMEM;
+
+ loc.inode = afr_shd_entry_changes_index_inode (this, subvol,
+ fd->inode->gfid);
+
+ while ((ret = syncop_readdir (subvol, fd, 131072, offset, &entries,
+ NULL, NULL))) {
+ if (ret > 0)
+ ret = 0;
+ list_for_each_entry (entry, &entries.list, list) {
+ offset = entry->d_off;
+
+ if (!strcmp (entry->d_name, ".") ||
+ !strcmp (entry->d_name, ".."))
+ continue;
+
+ if (__is_root_gfid (fd->inode->gfid) &&
+ !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR))
+ continue;
+
+ ret = afr_selfheal_entry_dirent (iter_frame, this, fd,
+ entry->d_name,
+ loc.inode, subvol,
+ local->need_full_crawl);
+ AFR_STACK_RESET (iter_frame);
+ if (iter_frame->local == NULL) {
+ ret = -ENOTCONN;
+ break;
+ }
+
+ if (ret == -1) {
+ /* gfid or type mismatch. */
+ mismatch = _gf_true;
+ ret = 0;
+ }
+ if (ret)
+ break;
+ }
- if (call_count == 0) {
- AFR_STACK_DESTROY (impunge_frame);
- afr_sh_entry_impunge_entry_done (frame, this, active_src);
+ gf_dirent_free (&entries);
+ if (ret)
+ break;
}
- return 0;
-}
-
+ loc_wipe (&loc);
-int
-afr_sh_entry_impunge_xattrop_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- dict_t *xattr)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- call_frame_t *frame = NULL;
- int child_index = 0;
-
- struct iatt stbuf;
- int32_t valid = 0;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- frame = impunge_sh->sh_frame;
-
- child_index = (long) cookie;
-
- gf_log (this->name, GF_LOG_TRACE,
- "setting ownership of %s on %s to %d/%d",
- impunge_local->loc.path,
- priv->children[child_index]->name,
- impunge_local->cont.lookup.buf.ia_uid,
- impunge_local->cont.lookup.buf.ia_gid);
-
- stbuf.ia_atime = impunge_local->cont.lookup.buf.ia_atime;
- stbuf.ia_atime_nsec = impunge_local->cont.lookup.buf.ia_atime_nsec;
- stbuf.ia_mtime = impunge_local->cont.lookup.buf.ia_mtime;
- stbuf.ia_mtime_nsec = impunge_local->cont.lookup.buf.ia_mtime_nsec;
-
- stbuf.ia_uid = impunge_local->cont.lookup.buf.ia_uid;
- stbuf.ia_gid = impunge_local->cont.lookup.buf.ia_gid;
-
- valid = GF_SET_ATTR_UID | GF_SET_ATTR_GID |
- GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_setattr_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->setattr,
- &impunge_local->loc,
- &stbuf, valid);
- return 0;
+ AFR_STACK_DESTROY (iter_frame);
+ if (mismatch == _gf_true)
+ /* undo pending will be skipped */
+ ret = -1;
+ return ret;
}
-
-int
-afr_sh_entry_impunge_parent_setattr_cbk (call_frame_t *setattr_frame,
- void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+static int
+afr_selfheal_entry_granular_dirent (xlator_t *subvol, gf_dirent_t *entry,
+ loc_t *parent, void *data)
{
- loc_t *parent_loc = cookie;
-
- if (op_ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "setattr on parent directory failed: %s",
- strerror (op_errno));
+ int ret = 0;
+ loc_t loc = {0,};
+ struct iatt iatt = {0,};
+ afr_granular_esh_args_t *args = data;
+
+ /* Look up the actual inode associated with entry. If the lookup returns
+ * ESTALE or ENOENT, then it means we have a stale index. Remove it.
+ * This is analogous to the check in afr_shd_index_heal() except that
+ * here it is achieved through LOOKUP and in afr_shd_index_heal() through
+ * a GETXATTR.
+ */
+
+ loc.inode = inode_new (args->xl->itable);
+ loc.parent = inode_ref (args->heal_fd->inode);
+ gf_uuid_copy (loc.pargfid, loc.parent->gfid);
+ loc.name = entry->d_name;
+
+ ret = syncop_lookup (args->xl, &loc, &iatt, NULL, NULL, NULL);
+ if ((ret == -ENOENT) || (ret == -ESTALE)) {
+ /* The name indices under the pgfid index dir are guaranteed
+ * to be regular files. Hence the hardcoding.
+ */
+ afr_shd_index_purge (subvol, parent->inode, entry->d_name,
+ IA_IFREG);
+ ret = 0;
+ goto out;
}
+ /* TBD: afr_shd_zero_xattrop? */
- loc_wipe (parent_loc);
-
- GF_FREE (parent_loc);
-
- AFR_STACK_DESTROY (setattr_frame);
- return 0;
-}
-
-
-int
-afr_sh_entry_impunge_newfile_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- int call_count = 0;
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- call_frame_t *frame = NULL;
- int active_src = 0;
- int child_index = 0;
- int pending_array[3] = {0, };
- dict_t *xattr = NULL;
- int ret = 0;
- int idx = 0;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- call_frame_t *setattr_frame = NULL;
- int32_t valid = 0;
- loc_t *parent_loc = NULL;
- struct iatt parentbuf;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- frame = impunge_sh->sh_frame;
- local = frame->local;
- sh = &local->self_heal;
- active_src = sh->active_source;
-
- child_index = (long) cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "creation of %s on %s failed (%s)",
- impunge_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- goto out;
- }
-
- inode->ia_type = stbuf->ia_type;
-
- xattr = get_new_dict ();
- dict_ref (xattr);
-
- idx = afr_index_for_transaction_type (AFR_METADATA_TRANSACTION);
- pending_array[idx] = hton32 (1);
- if (IA_ISDIR (stbuf->ia_type))
- idx = afr_index_for_transaction_type (AFR_ENTRY_TRANSACTION);
- else
- idx = afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
- pending_array[idx] = hton32 (1);
-
- ret = dict_set_static_bin (xattr, priv->pending_key[child_index],
- pending_array, sizeof (pending_array));
-
- valid = GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
- parentbuf = impunge_sh->parentbuf;
- setattr_frame = copy_frame (impunge_frame);
-
- parent_loc = GF_CALLOC (1, sizeof (*parent_loc), gf_afr_mt_loc_t);
- afr_build_parent_loc (parent_loc, &impunge_local->loc);
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_xattrop_cbk,
- (void *) (long) child_index,
- priv->children[active_src],
- priv->children[active_src]->fops->xattrop,
- &impunge_local->loc, GF_XATTROP_ADD_ARRAY, xattr);
-
- STACK_WIND_COOKIE (setattr_frame, afr_sh_entry_impunge_parent_setattr_cbk,
- (void *) (long) parent_loc,
- priv->children[child_index],
- priv->children[child_index]->fops->setattr,
- parent_loc, &parentbuf, valid);
-
- dict_unref (xattr);
-
- return 0;
-
-out:
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
-
- if (call_count == 0) {
- AFR_STACK_DESTROY (impunge_frame);
- afr_sh_entry_impunge_entry_done (frame, this, active_src);
- }
-
- return 0;
-}
-
-
-int
-afr_sh_entry_impunge_mknod (call_frame_t *impunge_frame, xlator_t *this,
- int child_index, struct iatt *stbuf)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
-
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "creating missing file %s on %s",
- impunge_local->loc.path,
- priv->children[child_index]->name);
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->mknod,
- &impunge_local->loc,
- st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type),
- stbuf->ia_rdev);
-
- return 0;
-}
-
-
-
-int
-afr_sh_entry_impunge_mkdir (call_frame_t *impunge_frame, xlator_t *this,
- int child_index, struct iatt *stbuf)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
-
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "creating missing directory %s on %s",
- impunge_local->loc.path,
- priv->children[child_index]->name);
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->mkdir,
- &impunge_local->loc,
- st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type));
-
- return 0;
-}
-
-
-int
-afr_sh_entry_impunge_symlink (call_frame_t *impunge_frame, xlator_t *this,
- int child_index, const char *linkname)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
-
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
+ ret = afr_selfheal_entry_dirent (args->frame, args->xl, args->heal_fd,
+ entry->d_name, parent->inode, subvol,
+ _gf_false);
+ AFR_STACK_RESET (args->frame);
+ if (args->frame->local == NULL)
+ ret = -ENOTCONN;
- gf_log (this->name, GF_LOG_DEBUG,
- "creating missing symlink %s -> %s on %s",
- impunge_local->loc.path, linkname,
- priv->children[child_index]->name);
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->symlink,
- linkname, &impunge_local->loc);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_impunge_symlink_unlink_cbk (call_frame_t *impunge_frame,
- void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int child_index = -1;
- call_frame_t *frame = NULL;
- int call_count = -1;
- int active_src = -1;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- frame = impunge_sh->sh_frame;
- active_src = impunge_sh->active_source;
-
- child_index = (long) cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "unlink of %s on %s failed (%s)",
- impunge_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- goto out;
- }
+ if (ret == -1)
+ args->mismatch = _gf_true;
- afr_sh_entry_impunge_symlink (impunge_frame, this, child_index,
- impunge_sh->linkname);
-
- return 0;
out:
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
-
- if (call_count == 0) {
- AFR_STACK_DESTROY (impunge_frame);
- afr_sh_entry_impunge_entry_done (frame, this, active_src);
- }
-
- return 0;
-}
-
-
-int
-afr_sh_entry_impunge_symlink_unlink (call_frame_t *impunge_frame, xlator_t *this,
- int child_index)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "unlinking symlink %s with wrong target on %s",
- impunge_local->loc.path,
- priv->children[child_index]->name);
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_symlink_unlink_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->unlink,
- &impunge_local->loc);
-
+ loc_wipe (&loc);
return 0;
}
-
-int
-afr_sh_entry_impunge_readlink_sink_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- const char *linkname, struct iatt *sbuf)
+static int
+afr_selfheal_entry_granular (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int subvol_idx, gf_boolean_t is_src)
{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int child_index = -1;
- call_frame_t *frame = NULL;
- int call_count = -1;
- int active_src = -1;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- frame = impunge_sh->sh_frame;
- active_src = impunge_sh->active_source;
-
- child_index = (long) cookie;
-
- if ((op_ret == -1) && (op_errno != ENOENT)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "readlink of %s on %s failed (%s)",
- impunge_local->loc.path,
- priv->children[active_src]->name,
- strerror (op_errno));
- goto out;
- }
-
- /* symlink doesn't exist on the sink */
-
- if ((op_ret == -1) && (op_errno == ENOENT)) {
- afr_sh_entry_impunge_symlink (impunge_frame, this,
- child_index, impunge_sh->linkname);
- return 0;
- }
-
-
- /* symlink exists on the sink, so check if targets match */
-
- if (strcmp (linkname, impunge_sh->linkname) == 0) {
- /* targets match, nothing to do */
-
- goto out;
- } else {
- /*
- * Hah! Sneaky wolf in sheep's clothing!
+ int ret = 0;
+ loc_t loc = {0,};
+ xlator_t *subvol = NULL;
+ afr_private_t *priv = NULL;
+ afr_granular_esh_args_t args = {0,};
+
+ priv = this->private;
+ subvol = priv->children[subvol_idx];
+
+ args.frame = afr_copy_frame (frame);
+ args.xl = this;
+ /* args.heal_fd represents the fd associated with the original directory
+ * on which entry heal is being attempted.
+ */
+ args.heal_fd = fd;
+
+ /* @subvol here represents the subvolume of AFR where
+ * indices/entry-changes/<pargfid> will be processed
+ */
+ loc.inode = afr_shd_entry_changes_index_inode (this, subvol,
+ fd->inode->gfid);
+ if (!loc.inode) {
+ /* If granular heal failed on the sink (as it might sometimes
+ * because it is the src that would mostly contain the granular
+ * changelogs and the sink's entry-changes would be empty),
+ * do not treat heal as failure.
*/
-
- afr_sh_entry_impunge_symlink_unlink (impunge_frame, this,
- child_index);
- return 0;
+ if (is_src)
+ return -errno;
+ else
+ return 0;
}
-out:
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
-
- if (call_count == 0) {
- AFR_STACK_DESTROY (impunge_frame);
- afr_sh_entry_impunge_entry_done (frame, this, active_src);
- }
-
- return 0;
-}
-
-
-int
-afr_sh_entry_impunge_readlink_sink (call_frame_t *impunge_frame, xlator_t *this,
- int child_index)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
-
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
+ ret = syncop_dir_scan (subvol, &loc, GF_CLIENT_PID_SELF_HEALD,
+ &args, afr_selfheal_entry_granular_dirent);
- gf_log (this->name, GF_LOG_DEBUG,
- "checking symlink target of %s on %s",
- impunge_local->loc.path, priv->children[child_index]->name);
+ loc_wipe (&loc);
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_readlink_sink_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->readlink,
- &impunge_local->loc, 4096);
+ if (args.mismatch == _gf_true)
+ ret = -1;
- return 0;
+ return ret;
}
-
-int
-afr_sh_entry_impunge_readlink_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- const char *linkname, struct iatt *sbuf)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int child_index = -1;
- call_frame_t *frame = NULL;
- int call_count = -1;
- int active_src = -1;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- frame = impunge_sh->sh_frame;
- active_src = impunge_sh->active_source;
-
- child_index = (long) cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "readlink of %s on %s failed (%s)",
- impunge_local->loc.path,
- priv->children[active_src]->name,
- strerror (op_errno));
- goto out;
- }
-
- impunge_sh->linkname = gf_strdup (linkname);
-
- afr_sh_entry_impunge_readlink_sink (impunge_frame, this, child_index);
-
- return 0;
-
-out:
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
-
- if (call_count == 0) {
- AFR_STACK_DESTROY (impunge_frame);
- afr_sh_entry_impunge_entry_done (frame, this, active_src);
- }
-
- return 0;
-}
-
-
-int
-afr_sh_entry_impunge_readlink (call_frame_t *impunge_frame, xlator_t *this,
- int child_index, struct iatt *stbuf)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int active_src = -1;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- active_src = impunge_sh->active_source;
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_readlink_cbk,
- (void *) (long) child_index,
- priv->children[active_src],
- priv->children[active_src]->fops->readlink,
- &impunge_local->loc, 4096);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_impunge_recreate_lookup_cbk (call_frame_t *impunge_frame,
- void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf,
- dict_t *xattr,struct iatt *postparent)
+static int
+afr_selfheal_entry_do (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int source, unsigned char *sources,
+ unsigned char *healed_sinks)
{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int active_src = 0;
- int type = 0;
- int child_index = 0;
- call_frame_t *frame = NULL;
- int call_count = 0;
+ int i = 0;
+ int ret = 0;
+ gf_boolean_t mismatch = _gf_false;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- frame = impunge_sh->sh_frame;
-
- child_index = (long) cookie;
-
- active_src = impunge_sh->active_source;
-
- if (op_ret != 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s on %s (for %s) failed (%s)",
- impunge_local->loc.path,
- priv->children[active_src]->name,
- priv->children[child_index]->name,
- strerror (op_errno));
- goto out;
- }
-
- impunge_sh->parentbuf = *postparent;
-
- impunge_local->cont.lookup.buf = *buf;
- type = buf->ia_type;
-
- switch (type) {
- case IA_IFSOCK:
- case IA_IFREG:
- case IA_IFBLK:
- case IA_IFCHR:
- case IA_IFIFO:
- afr_sh_entry_impunge_mknod (impunge_frame, this,
- child_index, buf);
- break;
- case IA_IFLNK:
- afr_sh_entry_impunge_readlink (impunge_frame, this,
- child_index, buf);
- break;
- case IA_IFDIR:
- afr_sh_entry_impunge_mkdir (impunge_frame, this,
- child_index, buf);
- break;
- default:
- gf_log (this->name, GF_LOG_ERROR,
- "%s has unknown file type on %s: 0%o",
- impunge_local->loc.path,
- priv->children[active_src]->name, type);
- goto out;
- break;
- }
-
- return 0;
-
-out:
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
-
- if (call_count == 0) {
- AFR_STACK_DESTROY (impunge_frame);
- afr_sh_entry_impunge_entry_done (frame, this, active_src);
- }
-
- return 0;
-}
-
-
-int
-afr_sh_entry_impunge_recreate (call_frame_t *impunge_frame, xlator_t *this,
- int child_index)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int active_src = 0;
-
+ local = frame->local;
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ AFR_MSG_SELF_HEAL_INFO, "performing entry selfheal on %s",
+ uuid_utoa (fd->inode->gfid));
- active_src = impunge_sh->active_source;
+ for (i = 0; i < priv->child_count; i++) {
+ /* Expunge */
+ if (!healed_sinks[i])
+ continue;
- STACK_WIND_COOKIE (impunge_frame,
- afr_sh_entry_impunge_recreate_lookup_cbk,
- (void *) (long) child_index,
- priv->children[active_src],
- priv->children[active_src]->fops->lookup,
- &impunge_local->loc, 0);
+ if (!local->need_full_crawl)
+ /* Why call afr_selfheal_entry_granular() on a "healed sink",
+ * given that it is the source that contains the granular
+ * indices?
+ * If the index for this directory is non-existent or empty on
+ * this subvol (=> clear sink), then it will return early
+ * without failure status.
+ * If the index is non-empty and it is yet a 'healed sink', then
+ * it is due to a split-brain in which case we anyway need to
+ * crawl the indices/entry-changes/pargfid directory.
+ */
+ ret = afr_selfheal_entry_granular (frame, this, fd, i,
+ _gf_false);
+ else
+ ret = afr_selfheal_entry_do_subvol (frame, this, fd, i);
+
+ if (ret == -1) {
+ /* gfid or type mismatch. */
+ mismatch = _gf_true;
+ ret = 0;
+ }
+ if (ret)
+ break;
+ }
+
+ if (!ret && source != -1) {
+ /* Impunge */
+ if (local->need_full_crawl)
+ ret = afr_selfheal_entry_do_subvol (frame, this, fd,
+ source);
+ else
+ ret = afr_selfheal_entry_granular (frame, this, fd,
+ source, _gf_true);
+ }
- return 0;
+ if (mismatch == _gf_true)
+ /* undo pending will be skipped */
+ ret = -1;
+ return ret;
}
-
-int
-afr_sh_entry_impunge_entry_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *x,
- struct iatt *postparent)
+static int
+__afr_selfheal_entry (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ unsigned char *locked_on)
{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int call_count = 0;
- int child_index = 0;
- call_frame_t *frame = NULL;
- int active_src = 0;
+ int ret = -1;
+ int source = -1;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *data_lock = NULL;
+ unsigned char *postop_lock = NULL;
+ unsigned char *healed_sinks = NULL;
+ struct afr_reply *locked_replies = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ gf_boolean_t did_sh = _gf_true;
priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- frame = impunge_sh->sh_frame;
- child_index = (long) cookie;
- active_src = impunge_sh->active_source;
-
- if ((op_ret == -1 && op_errno == ENOENT)
- || (IA_ISLNK (impunge_sh->impunging_entry_mode))) {
-
- /*
- * A symlink's target might have changed, so
- * always go down the recreate path for them.
- */
-
- /* decrease call_count in recreate-callback */
-
- gf_log (this->name, GF_LOG_TRACE,
- "missing entry %s on %s",
- impunge_local->loc.path,
- priv->children[child_index]->name);
+ local = frame->local;
- afr_sh_entry_impunge_recreate (impunge_frame, this,
- child_index);
- return 0;
- }
+ sources = alloca0 (priv->child_count);
+ sinks = alloca0 (priv->child_count);
+ healed_sinks = alloca0 (priv->child_count);
+ data_lock = alloca0 (priv->child_count);
+ postop_lock = alloca0 (priv->child_count);
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "%s exists under %s",
- impunge_local->loc.path,
- priv->children[child_index]->name);
+ locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count);
- impunge_sh->parentbuf = *postparent;
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s under %s failed (%s)",
- impunge_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- }
-
- LOCK (&impunge_frame->lock);
+ ret = afr_selfheal_entrylk (frame, this, fd->inode, this->name, NULL,
+ data_lock);
{
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
-
- if (call_count == 0) {
- AFR_STACK_DESTROY (impunge_frame);
- afr_sh_entry_impunge_entry_done (frame, this, active_src);
- }
-
- return 0;
-}
-
-
-int
-afr_sh_entry_impunge_entry (call_frame_t *frame, xlator_t *this,
- gf_dirent_t *entry)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int ret = -1;
- call_frame_t *impunge_frame = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int active_src = 0;
- int i = 0;
- int call_count = 0;
- int op_errno = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- active_src = sh->active_source;
+ if (ret < AFR_SH_MIN_PARTICIPANTS) {
+ gf_msg_debug (this->name, 0, "%s: Skipping "
+ "entry self-heal as only %d sub-volumes could "
+ "be locked in %s domain",
+ uuid_utoa (fd->inode->gfid), ret,
+ this->name);
+ ret = -ENOTCONN;
+ goto unlock;
+ }
- if ((strcmp (entry->d_name, ".") == 0)
- || (strcmp (entry->d_name, "..") == 0)
- || ((strcmp (local->loc.path, "/") == 0)
- && (strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR) == 0))) {
+ ret = __afr_selfheal_entry_prepare (frame, this, fd->inode,
+ data_lock, sources, sinks,
+ healed_sinks,
+ locked_replies, &source,
+ NULL);
+ if (AFR_COUNT(healed_sinks, priv->child_count) == 0) {
+ did_sh = _gf_false;
+ goto unlock;
+ }
- gf_log (this->name, GF_LOG_TRACE,
- "skipping inspection of %s under %s",
- entry->d_name, local->loc.path);
- goto out;
+ local->need_full_crawl = afr_need_full_heal (this,
+ locked_replies,
+ source,
+ healed_sinks,
+ AFR_ENTRY_TRANSACTION);
}
-
- gf_log (this->name, GF_LOG_TRACE,
- "inspecting existance of %s under %s",
- entry->d_name, local->loc.path);
-
- impunge_frame = copy_frame (frame);
- if (!impunge_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
+unlock:
+ afr_selfheal_unentrylk (frame, this, fd->inode, this->name, NULL,
+ data_lock, NULL);
+ if (ret < 0)
goto out;
- }
-
- ALLOC_OR_GOTO (impunge_local, afr_local_t, out);
-
- impunge_frame->local = impunge_local;
- impunge_sh = &impunge_local->self_heal;
- impunge_sh->sh_frame = frame;
- impunge_sh->active_source = active_src;
- impunge_sh->impunging_entry_mode =
- st_mode_from_ia (entry->d_stat.ia_prot, entry->d_stat.ia_type);
+ if (!did_sh)
+ goto out;
- ret = build_child_loc (this, &impunge_local->loc, &local->loc, entry->d_name);
- if (ret != 0) {
+ ret = afr_selfheal_entry_do (frame, this, fd, source, sources,
+ healed_sinks);
+ if (ret)
goto out;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (i == active_src)
- continue;
- if (local->child_up[i] == 0)
- continue;
- if (sh->sources[i] == 1)
- continue;
- call_count++;
- }
-
- impunge_local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (i == active_src)
- continue;
- if (local->child_up[i] == 0)
- continue;
- if (sh->sources[i] == 1)
- continue;
-
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s on %s", impunge_local->loc.path,
- priv->children[i]->name);
- STACK_WIND_COOKIE (impunge_frame,
- afr_sh_entry_impunge_entry_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->lookup,
- &impunge_local->loc, 0);
-
- if (!--call_count)
- break;
- }
+ /* Take entrylks in xlator domain before doing post-op (undo-pending) in
+ * entry self-heal. This is to prevent a parallel name self-heal on
+ * an entry under @fd->inode from reading pending xattrs while it is
+ * being modified by SHD after entry sh below, given that
+ * name self-heal takes locks ONLY in xlator domain and is free to read
+ * pending changelog in the absence of the following locking.
+ */
+ ret = afr_selfheal_entrylk (frame, this, fd->inode, this->name, NULL,
+ postop_lock);
+ {
+ if (AFR_CMP (data_lock, postop_lock, priv->child_count) != 0) {
+ gf_msg_debug (this->name, 0, "%s: Skipping "
+ "post-op after entry self-heal as %d "
+ "sub-volumes, as opposed to %d, "
+ "could be locked in %s domain",
+ uuid_utoa (fd->inode->gfid),
+ ret, AFR_COUNT (data_lock,
+ priv->child_count), this->name);
+ ret = -ENOTCONN;
+ goto postop_unlock;
+ }
- ret = 0;
+ ret = afr_selfheal_undo_pending (frame, this, fd->inode,
+ sources, sinks, healed_sinks,
+ AFR_ENTRY_TRANSACTION,
+ locked_replies, postop_lock);
+ }
+postop_unlock:
+ afr_selfheal_unentrylk (frame, this, fd->inode, this->name, NULL,
+ postop_lock, NULL);
out:
- if (ret == -1)
- afr_sh_entry_impunge_entry_done (frame, this, active_src);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_impunge_readdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- gf_dirent_t *entry = NULL;
- off_t last_offset = 0;
- int active_src = 0;
- int entry_count = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- active_src = sh->active_source;
-
- if (op_ret <= 0) {
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "readdir of %s on subvolume %s failed (%s)",
- local->loc.path,
- priv->children[active_src]->name,
- strerror (op_errno));
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "readdir of %s on subvolume %s complete",
- local->loc.path,
- priv->children[active_src]->name);
- }
-
- afr_sh_entry_impunge_all (frame, this);
- return 0;
- }
-
- list_for_each_entry (entry, &entries->list, list) {
- last_offset = entry->d_off;
- entry_count++;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "readdir'ed %d entries from %s",
- entry_count, priv->children[active_src]->name);
-
- sh->offset = last_offset;
- local->call_count = entry_count;
-
- list_for_each_entry (entry, &entries->list, list) {
- afr_sh_entry_impunge_entry (frame, this, entry);
- }
-
- return 0;
-}
-
-
-int
-afr_sh_entry_impunge_subvol (call_frame_t *frame, xlator_t *this,
- int active_src)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- STACK_WIND (frame, afr_sh_entry_impunge_readdir_cbk,
- priv->children[active_src],
- priv->children[active_src]->fops->readdirp,
- sh->healing_fd, sh->block_size, sh->offset);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int active_src = -1;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- sh->offset = 0;
-
- active_src = next_active_source (frame, this, sh->active_source);
- sh->active_source = active_src;
-
- if (sh->op_failed) {
- afr_sh_entry_finish (frame, this);
- return 0;
- }
-
- if (active_src == -1) {
- /* completed creating missing files on all subvolumes */
- afr_sh_entry_expunge_all (frame, this);
- return 0;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "impunging entries of %s on %s to other sinks",
- local->loc.path, priv->children[active_src]->name);
-
- afr_sh_entry_impunge_subvol (frame, this, active_src);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int child_index = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- child_index = (long) cookie;
-
- /* TODO: some of the open's might fail.
- In that case, modify cleanup fn to send flush on those
- fd's which are already open */
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "opendir of %s failed on child %s (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- sh->op_failed = 1;
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (sh->op_failed) {
- afr_sh_entry_finish (frame, this);
- return 0;
- }
- gf_log (this->name, GF_LOG_TRACE,
- "fd for %s opened, commencing sync",
- local->loc.path);
-
- sh->active_source = -1;
- afr_sh_entry_impunge_all (frame, this);
- }
+ if (did_sh)
+ afr_log_selfheal (fd->inode->gfid, this, ret, "entry", source,
+ sources, healed_sinks);
+ else
+ ret = 1;
- return 0;
+ if (locked_replies)
+ afr_replies_wipe (locked_replies, priv->child_count);
+ return ret;
}
-int
-afr_sh_entry_open (call_frame_t *frame, xlator_t *this)
+static fd_t *
+afr_selfheal_data_opendir (xlator_t *this, inode_t *inode)
{
- int i = 0;
- int call_count = 0;
-
- int source = -1;
- int *sources = NULL;
-
+ loc_t loc = {0,};
+ int ret = 0;
fd_t *fd = NULL;
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- source = local->self_heal.source;
- sources = local->self_heal.sources;
-
- sh->block_size = 131072;
- sh->offset = 0;
-
- call_count = sh->active_sinks;
- if (source != -1)
- call_count++;
-
- local->call_count = call_count;
-
- fd = fd_create (local->loc.inode, frame->root->pid);
- sh->healing_fd = fd;
-
- if (source != -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "opening directory %s on subvolume %s (source)",
- local->loc.path, priv->children[source]->name);
-
- /* open source */
- STACK_WIND_COOKIE (frame, afr_sh_entry_opendir_cbk,
- (void *) (long) source,
- priv->children[source],
- priv->children[source]->fops->opendir,
- &local->loc, fd);
- call_count--;
- }
-
- /* open sinks */
- for (i = 0; i < priv->child_count; i++) {
- if (sources[i] || !local->child_up[i])
- continue;
-
- gf_log (this->name, GF_LOG_TRACE,
- "opening directory %s on subvolume %s (sink)",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_entry_opendir_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->opendir,
- &local->loc, fd);
-
- if (!--call_count)
- break;
- }
-
- return 0;
-}
-
-
-int
-afr_sh_entry_sync_prepare (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int active_sinks = 0;
- int source = 0;
- int i = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ fd = fd_create (inode, 0);
+ if (!fd)
+ return NULL;
- source = sh->source;
+ loc.inode = inode_ref (inode);
+ gf_uuid_copy (loc.gfid, inode->gfid);
- for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] == 0 && local->child_up[i] == 1) {
- active_sinks++;
- sh->success[i] = 1;
- }
- }
- if (source != -1)
- sh->success[source] = 1;
-
- if (active_sinks == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "no active sinks for self-heal on dir %s",
- local->loc.path);
- afr_sh_entry_finish (frame, this);
- return 0;
- }
- if (source == -1 && active_sinks < 2) {
- gf_log (this->name, GF_LOG_TRACE,
- "cannot sync with 0 sources and 1 sink on dir %s",
- local->loc.path);
- afr_sh_entry_finish (frame, this);
- return 0;
+ ret = syncop_opendir (this, &loc, fd, NULL, NULL);
+ if (ret) {
+ fd_unref (fd);
+ fd = NULL;
+ } else {
+ fd_bind (fd);
}
- sh->active_sinks = active_sinks;
-
- if (source != -1)
- gf_log (this->name, GF_LOG_DEBUG,
- "self-healing directory %s from subvolume %s to "
- "%d other",
- local->loc.path, priv->children[source]->name,
- active_sinks);
- else
- gf_log (this->name, GF_LOG_DEBUG,
- "no active sources for %s found. "
- "merging all entries as a conservative decision",
- local->loc.path);
-
- afr_sh_entry_open (frame, this);
- return 0;
+ loc_wipe (&loc);
+ return fd;
}
int
-afr_sh_entry_fix (call_frame_t *frame, xlator_t *this)
+afr_selfheal_entry (call_frame_t *frame, xlator_t *this, inode_t *inode)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int source = 0;
-
- int nsources = 0;
+ afr_private_t *priv = NULL;
+ unsigned char *locked_on = NULL;
+ unsigned char *long_name_locked = NULL;
+ fd_t *fd = NULL;
+ int ret = 0;
+ gf_boolean_t granular_locks = _gf_false;
- local = frame->local;
- sh = &local->self_heal;
priv = this->private;
+ if (strcmp ("granular", priv->locking_scheme) == 0)
+ granular_locks = _gf_true;
- if (sh->forced_merge) {
- sh->source = -1;
- goto heal;
- }
-
- afr_sh_build_pending_matrix (priv, sh->pending_matrix, sh->xattr,
- priv->child_count, AFR_ENTRY_TRANSACTION);
-
- afr_sh_print_pending_matrix (sh->pending_matrix, this);
-
- nsources = afr_sh_mark_sources (sh, priv->child_count,
- AFR_SELF_HEAL_ENTRY);
-
- if (nsources == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "No self-heal needed for %s",
- local->loc.path);
-
- afr_sh_entry_finish (frame, this);
- return 0;
- }
-
- afr_sh_supress_errenous_children (sh->sources, sh->child_errno,
- priv->child_count);
-
- source = afr_sh_select_source (sh->sources, priv->child_count);
-
- sh->source = source;
-
-heal:
- afr_sh_entry_sync_prepare (frame, this);
-
- return 0;
-}
-
-
+ fd = afr_selfheal_data_opendir (this, inode);
+ if (!fd)
+ return -EIO;
-int
-afr_sh_entry_lookup_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- int call_count = -1;
- int child_index = (long) cookie;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ locked_on = alloca0 (priv->child_count);
+ long_name_locked = alloca0 (priv->child_count);
- LOCK (&frame->lock);
+ ret = afr_selfheal_tie_breaker_entrylk (frame, this, inode,
+ priv->sh_domain, NULL,
+ locked_on);
{
- if (op_ret != -1) {
- sh->xattr[child_index] = dict_ref (xattr);
- sh->buf[child_index] = *buf;
+ if (ret < AFR_SH_MIN_PARTICIPANTS) {
+ gf_msg_debug (this->name, 0, "%s: Skipping "
+ "entry self-heal as only %d sub-volumes could "
+ "be locked in %s domain",
+ uuid_utoa (fd->inode->gfid), ret,
+ priv->sh_domain);
+ /* Either less than two subvols available, or another
+ selfheal (from another server) is in progress. Skip
+ for now in any case there isn't anything to do.
+ */
+ ret = -ENOTCONN;
+ goto unlock;
}
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- afr_sh_entry_fix (frame, this);
- }
-
- return 0;
-}
-
-
-int
-afr_sh_entry_lookup (call_frame_t *frame, xlator_t *this)
-{
- afr_self_heal_t * sh = NULL;
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- dict_t *xattr_req = NULL;
- int ret = 0;
- int call_count = 0;
- int i = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- call_count = afr_up_children_count (priv->child_count,
- local->child_up);
-
- local->call_count = call_count;
-
- xattr_req = dict_new();
- if (xattr_req) {
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_uint64 (xattr_req,
- priv->pending_key[i],
- 3 * sizeof(int32_t));
+ if (!granular_locks) {
+ ret = afr_selfheal_tryentrylk (frame, this, inode,
+ this->name, LONG_FILENAME,
+ long_name_locked);
}
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame,
- afr_sh_entry_lookup_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->lookup,
- &local->loc, xattr_req);
- if (!--call_count)
- break;
- }
- }
-
- if (xattr_req)
- dict_unref (xattr_req);
-
- return 0;
-}
-
-
-
-int
-afr_sh_entry_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
-
- /* TODO: what if lock fails? */
-
- local = frame->local;
- sh = &local->self_heal;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- sh->op_failed = 1;
-
- sh->locked_nodes[child_index] = 0;
- gf_log (this->name, GF_LOG_DEBUG,
- "locking inode of %s on child %d failed: %s",
- local->loc.path, child_index,
- strerror (op_errno));
- } else {
- sh->locked_nodes[child_index] = 1;
- gf_log (this->name, GF_LOG_TRACE,
- "inode of %s on child %d locked",
- local->loc.path, child_index);
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (sh->op_failed == 1) {
- afr_sh_entry_finish (frame, this);
- return 0;
- }
-
- afr_sh_entry_lookup (frame, this);
- }
-
- return 0;
-}
-
-
-int
-afr_sh_entry_lock (call_frame_t *frame, xlator_t *this)
-{
- int i = 0;
- int call_count = 0;
-
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- afr_self_heal_t * sh = NULL;
-
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count,
- local->child_up);
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- gf_log (this->name, GF_LOG_TRACE,
- "locking %s on subvolume %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_entry_lock_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name,
- &local->loc, NULL,
- ENTRYLK_LOCK_NB, ENTRYLK_WRLCK);
- if (!--call_count)
- break;
- }
+ {
+ if (!granular_locks && ret < 1) {
+ gf_msg_debug (this->name, 0, "%s: Skipping"
+ " entry self-heal as only %d "
+ "sub-volumes could be "
+ "locked in special-filename "
+ "domain",
+ uuid_utoa (fd->inode->gfid),
+ ret);
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+ ret = __afr_selfheal_entry (frame, this, fd, locked_on);
+ }
+ if (!granular_locks)
+ afr_selfheal_unentrylk (frame, this, inode, this->name,
+ LONG_FILENAME, long_name_locked,
+ NULL);
}
+unlock:
+ afr_selfheal_unentrylk (frame, this, inode, priv->sh_domain, NULL,
+ locked_on, NULL);
- return 0;
-}
-
-
-int
-afr_self_heal_entry (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
-
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- if (local->self_heal.need_entry_self_heal && priv->entry_self_heal) {
- afr_sh_entry_lock (frame, this);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "proceeding to completion on %s",
- local->loc.path);
- afr_sh_entry_done (frame, this);
- }
+ if (fd)
+ fd_unref (fd);
- return 0;
+ return ret;
}
-
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index 4501595b7a4..130a3daa203 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -1,817 +1,476 @@
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
-#include <libgen.h>
-#include <unistd.h>
-#include <fnmatch.h>
-#include <sys/time.h>
-#include <stdlib.h>
-#include <signal.h>
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-#include "glusterfs.h"
#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "byte-order.h"
-
-#include "afr-transaction.h"
#include "afr-self-heal.h"
-#include "afr-self-heal-common.h"
+#include "byte-order.h"
+#include "protocol-common.h"
+#define AFR_HEAL_ATTR (GF_SET_ATTR_UID|GF_SET_ATTR_GID|GF_SET_ATTR_MODE)
-int
-afr_sh_metadata_done (call_frame_t *frame, xlator_t *this)
+static gf_boolean_t
+_afr_ignorable_key_match (dict_t *d, char *k, data_t *val, void *mdata)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
-// memset (sh->child_errno, 0, sizeof (int) * priv->child_count);
- memset (sh->buf, 0, sizeof (struct stat) * priv->child_count);
- memset (sh->success, 0, sizeof (int) * priv->child_count);
-
- for (i = 0; i < priv->child_count; i++) {
- sh->locked_nodes[i] = 1;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i])
- dict_unref (sh->xattr[i]);
- sh->xattr[i] = NULL;
- }
-
- if (local->govinda_gOvinda) {
- gf_log (this->name, GF_LOG_DEBUG,
- "aborting selfheal of %s",
- local->loc.path);
- sh->completion_cbk (frame, this);
- } else {
- if (IA_ISREG (sh->type)) {
- gf_log (this->name, GF_LOG_TRACE,
- "proceeding to data check on %s",
- local->loc.path);
- afr_self_heal_data (frame, this);
- return 0;
- }
-
- if (IA_ISDIR (sh->type)) {
- gf_log (this->name, GF_LOG_TRACE,
- "proceeding to entry check on %s",
- local->loc.path);
- afr_self_heal_entry (frame, this);
- return 0;
- }
- gf_log (this->name, GF_LOG_DEBUG,
- "completed self heal of %s",
- local->loc.path);
-
- sh->completion_cbk (frame, this);
- }
-
- return 0;
+ return afr_is_xattr_ignorable (k);
}
-
-int
-afr_sh_metadata_unlck_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+void
+afr_delete_ignorable_xattrs (dict_t *xattr)
{
- afr_local_t *local = NULL;
- int call_count = 0;
-
-
- local = frame->local;
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- afr_sh_metadata_done (frame, this);
-
- return 0;
+ dict_foreach_match (xattr, _afr_ignorable_key_match, NULL,
+ dict_remove_foreach_fn, NULL);
}
-
int
-afr_sh_metadata_finish (call_frame_t *frame, xlator_t *this)
+__afr_selfheal_metadata_do (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ int source, unsigned char *healed_sinks,
+ struct afr_reply *locked_replies)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int call_count = 0;
- struct flock flock = {0, };
-
+ int ret = -1;
+ loc_t loc = {0,};
+ dict_t *xattr = NULL;
+ dict_t *old_xattr = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
- local = frame->local;
- sh = &local->self_heal;
priv = this->private;
- for (i = 0; i < priv->child_count; i++) {
- if (sh->locked_nodes[i])
- call_count++;
- }
-
- if (call_count == 0) {
- afr_sh_metadata_done (frame, this);
- return 0;
- }
+ loc.inode = inode_ref (inode);
+ gf_uuid_copy (loc.gfid, inode->gfid);
- local->call_count = call_count;
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ AFR_MSG_SELF_HEAL_INFO, "performing metadata selfheal on %s",
+ uuid_utoa (inode->gfid));
- for (i = 0; i < priv->child_count; i++) {
- flock.l_start = 0;
- flock.l_len = 0;
- flock.l_type = F_UNLCK;
-
- if (sh->locked_nodes[i]) {
- gf_log (this->name, GF_LOG_TRACE,
- "unlocking %s on subvolume %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND (frame, afr_sh_metadata_unlck_cbk,
- priv->children[i],
- priv->children[i]->fops->inodelk,
- this->name,
- &local->loc, F_SETLK, &flock);
-
- if (!--call_count)
- break;
- }
+ ret = syncop_getxattr (priv->children[source], &loc, &xattr, NULL,
+ NULL, NULL);
+ if (ret < 0) {
+ ret = -EIO;
+ goto out;
}
- return 0;
-}
-
-
-int
-afr_sh_metadata_erase_pending_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xattr)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- afr_sh_metadata_finish (frame, this);
-
- return 0;
-}
-
-
-int
-afr_sh_metadata_erase_pending (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int i = 0;
- dict_t **erase_xattr = NULL;
-
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix,
- sh->success, priv->child_count,
- AFR_METADATA_TRANSACTION);
-
- erase_xattr = GF_CALLOC (sizeof (*erase_xattr), priv->child_count,
- gf_afr_mt_dict_t);
+ afr_delete_ignorable_xattrs (xattr);
for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i]) {
- call_count++;
-
- erase_xattr[i] = get_new_dict();
- dict_ref (erase_xattr[i]);
- }
- }
-
- afr_sh_delta_to_xattr (priv, sh->delta_matrix, erase_xattr,
- priv->child_count, AFR_METADATA_TRANSACTION);
-
- local->call_count = call_count;
-
- if (call_count == 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "metadata of %s not healed on any subvolume",
- local->loc.path);
-
- afr_sh_metadata_finish (frame, this);
- }
+ if (old_xattr) {
+ dict_unref (old_xattr);
+ old_xattr = NULL;
+ }
- for (i = 0; i < priv->child_count; i++) {
- if (!erase_xattr[i])
+ if (!healed_sinks[i])
continue;
- gf_log (this->name, GF_LOG_TRACE,
- "erasing pending flags from %s on %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_metadata_erase_pending_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->loc,
- GF_XATTROP_ADD_ARRAY, erase_xattr[i]);
- if (!--call_count)
- break;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (erase_xattr[i]) {
- dict_unref (erase_xattr[i]);
+ ret = syncop_setattr (priv->children[i], &loc,
+ &locked_replies[source].poststat,
+ AFR_HEAL_ATTR, NULL, NULL, NULL, NULL);
+ if (ret)
+ healed_sinks[i] = 0;
+
+ ret = syncop_getxattr (priv->children[i], &loc, &old_xattr, 0,
+ NULL, NULL);
+ if (old_xattr) {
+ afr_delete_ignorable_xattrs (old_xattr);
+ ret = syncop_removexattr (priv->children[i], &loc, "",
+ old_xattr, NULL);
}
- }
- GF_FREE (erase_xattr);
-
- return 0;
-}
-
-
-int
-afr_sh_metadata_sync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int child_index = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "setting attributes failed for %s on %s (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
-
- sh->success[child_index] = 0;
- }
+ ret = syncop_setxattr (priv->children[i], &loc, xattr, 0, NULL,
+ NULL);
+ if (ret)
+ healed_sinks[i] = 0;
}
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
+ ret = 0;
- if (call_count == 0)
- afr_sh_metadata_erase_pending (frame, this);
+out:
+ loc_wipe (&loc);
+ if (xattr)
+ dict_unref (xattr);
+ if (old_xattr)
+ dict_unref (old_xattr);
- return 0;
+ return ret;
}
-
-int
-afr_sh_metadata_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+static uint64_t
+mtime_ns(struct iatt *ia)
{
- afr_sh_metadata_sync_cbk (frame, cookie, this, op_ret, op_errno);
-
- return 0;
-}
-
+ uint64_t ret;
-int
-afr_sh_metadata_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_sh_metadata_sync_cbk (frame, cookie, this, op_ret, op_errno);
+ ret = (((uint64_t)(ia->ia_mtime)) * 1000000000)
+ + (uint64_t)(ia->ia_mtime_nsec);
- return 0;
+ return ret;
}
-
-int
-afr_sh_metadata_sync (call_frame_t *frame, xlator_t *this, dict_t *xattr)
+/*
+ * When directory content is modified, [mc]time is updated. On
+ * Linux, the filesystem does it, while at least on NetBSD, the
+ * kernel file-system independent code does it. This means that
+ * when entries are added while bricks are down, the kernel sends
+ * a SETATTR [mc]time which will cause metadata split brain for
+ * the directory. In this case, clear the split brain by finding
+ * the source with the most recent modification date.
+ */
+static int
+afr_dirtime_splitbrain_source (call_frame_t *frame, xlator_t *this,
+ struct afr_reply *replies,
+ unsigned char *locked_on)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int source = 0;
- int active_sinks = 0;
- int call_count = 0;
- int i = 0;
-
- struct iatt stbuf;
- int32_t valid = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- source = sh->source;
- active_sinks = sh->active_sinks;
-
- /*
- * 2 calls per sink - setattr, setxattr
- */
- if (xattr)
- call_count = active_sinks * 2;
- else
- call_count = active_sinks;
-
- local->call_count = call_count;
-
- stbuf.ia_atime = sh->buf[source].ia_atime;
- stbuf.ia_atime_nsec = sh->buf[source].ia_atime_nsec;
- stbuf.ia_mtime = sh->buf[source].ia_mtime;
- stbuf.ia_mtime_nsec = sh->buf[source].ia_mtime_nsec;
-
- stbuf.ia_uid = sh->buf[source].ia_uid;
- stbuf.ia_gid = sh->buf[source].ia_gid;
-
- stbuf.ia_type = sh->buf[source].ia_type;
- stbuf.ia_prot = sh->buf[source].ia_prot;
-
- valid = GF_SET_ATTR_MODE |
- GF_SET_ATTR_UID | GF_SET_ATTR_GID |
- GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
+ afr_private_t *priv = NULL;
+ int source = -1;
+ struct iatt source_ia;
+ struct iatt child_ia;
+ uint64_t mtime = 0;
+ int i;
+ int ret = -1;
- for (i = 0; i < priv->child_count; i++) {
- if (call_count == 0) {
- break;
- }
- if (sh->sources[i] || !local->child_up[i])
- continue;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "self-healing metadata of %s from %s to %s",
- local->loc.path, priv->children[source]->name,
- priv->children[i]->name);
+ priv = this->private;
- STACK_WIND_COOKIE (frame, afr_sh_metadata_setattr_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->setattr,
- &local->loc, &stbuf, valid);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!locked_on[i])
+ continue;
- call_count--;
+ if (!replies[i].valid)
+ continue;
- if (!xattr)
- continue;
+ if (replies[i].op_ret != 0)
+ continue;
- STACK_WIND_COOKIE (frame, afr_sh_metadata_xattr_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->setxattr,
- &local->loc, xattr, 0);
- call_count--;
- }
+ if (mtime_ns(&replies[i].poststat) <= mtime)
+ continue;
- return 0;
-}
+ mtime = mtime_ns(&replies[i].poststat);
+ source = i;
+ }
+ if (source == -1)
+ goto out;
-int
-afr_sh_metadata_getxattr_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int source = 0;
+ source_ia = replies[source].poststat;
+ if (source_ia.ia_type != IA_IFDIR)
+ goto out;
- int i;
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == source)
+ continue;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ if (!replies[i].valid)
+ continue;
- source = sh->source;
+ if (replies[i].op_ret != 0)
+ continue;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "getxattr of %s failed on subvolume %s (%s). proceeding without xattr",
- local->loc.path, priv->children[source]->name,
- strerror (op_errno));
+ child_ia = replies[i].poststat;
- afr_sh_metadata_sync (frame, this, NULL);
- } else {
- for (i = 0; i < priv->child_count; i++) {
- dict_del (xattr, priv->pending_key[i]);
- }
-
- afr_sh_metadata_sync (frame, this, xattr);
- }
+ if (!IA_EQUAL(source_ia, child_ia, gfid) ||
+ !IA_EQUAL(source_ia, child_ia, type) ||
+ !IA_EQUAL(source_ia, child_ia, prot) ||
+ !IA_EQUAL(source_ia, child_ia, uid) ||
+ !IA_EQUAL(source_ia, child_ia, gid) ||
+ !afr_xattrs_are_equal (replies[source].xdata,
+ replies[i].xdata))
+ goto out;
+ }
- return 0;
+ /*
+ * Metadata split brain is just about [amc]time
+ * We return our source.
+ */
+ ret = source;
+out:
+ return ret;
}
-int
-afr_sh_metadata_sync_prepare (call_frame_t *frame, xlator_t *this)
+/*
+ * Look for mismatching uid/gid or mode or user xattrs even if
+ * AFR xattrs don't say so, and pick one arbitrarily as winner. */
+
+static int
+__afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this,
+ inode_t *inode,
+ unsigned char *sources,
+ unsigned char *sinks,
+ unsigned char *healed_sinks,
+ unsigned char *locked_on,
+ struct afr_reply *replies)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int active_sinks = 0;
- int source = 0;
- int i = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- source = sh->source;
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] == 0 && local->child_up[i] == 1) {
- active_sinks++;
- sh->success[i] = 1;
- }
- }
- sh->success[source] = 1;
-
- if (active_sinks == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no active sinks for performing self-heal on file %s",
- local->loc.path);
- afr_sh_metadata_finish (frame, this);
- return 0;
- }
- sh->active_sinks = active_sinks;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ struct iatt srcstat = {0, };
+ int source = -1;
+ int sources_count = 0;
- gf_log (this->name, GF_LOG_TRACE,
- "syncing metadata of %s from subvolume %s to %d active sinks",
- local->loc.path, priv->children[source]->name, active_sinks);
-
- STACK_WIND (frame, afr_sh_metadata_getxattr_cbk,
- priv->children[source],
- priv->children[source]->fops->getxattr,
- &local->loc, NULL);
-
- return 0;
-}
-
-
-int
-afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int nsources = 0;
- int source = 0;
- int i = 0;
-
- local = frame->local;
- sh = &local->self_heal;
priv = this->private;
- afr_sh_build_pending_matrix (priv, sh->pending_matrix, sh->xattr,
- priv->child_count,
- AFR_METADATA_TRANSACTION);
-
- afr_sh_print_pending_matrix (sh->pending_matrix, this);
-
- nsources = afr_sh_mark_sources (sh, priv->child_count,
- AFR_SELF_HEAL_METADATA);
-
- afr_sh_supress_errenous_children (sh->sources, sh->child_errno,
- priv->child_count);
-
- if (nsources == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "No self-heal needed for %s",
- local->loc.path);
-
- afr_sh_metadata_finish (frame, this);
- return 0;
- }
-
- if ((nsources == -1)
- && (priv->favorite_child != -1)
- && (sh->child_errno[priv->favorite_child] == 0)) {
-
- gf_log (this->name, GF_LOG_WARNING,
- "Picking favorite child %s as authentic source to resolve conflicting metadata of %s",
- priv->children[priv->favorite_child]->name,
- local->loc.path);
-
- sh->sources[priv->favorite_child] = 1;
-
- nsources = afr_sh_source_count (sh->sources,
- priv->child_count);
- }
-
- if (nsources == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "Unable to self-heal permissions/ownership of '%s' "
- "(possible split-brain). Please fix the file on "
- "all backend volumes", local->loc.path);
+ sources_count = AFR_COUNT (sources, priv->child_count);
+
+ if ((AFR_CMP (locked_on, healed_sinks, priv->child_count) == 0)
+ || !sources_count) {
+
+ source = afr_mark_split_brain_source_sinks (frame, this, inode,
+ sources, sinks,
+ healed_sinks,
+ locked_on, replies,
+ AFR_METADATA_TRANSACTION);
+ if (source >= 0)
+ return source;
+
+ /* If this is a directory mtime/ctime only split brain
+ use the most recent */
+ source = afr_dirtime_splitbrain_source (frame, this,
+ replies, locked_on);
+ if (source != -1) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ AFR_MSG_SPLIT_BRAIN, "clear time "
+ "split brain on %s",
+ uuid_utoa (replies[source].poststat.ia_gfid));
+ sources[source] = 1;
+ healed_sinks[source] = 0;
+ return source;
+ }
- local->govinda_gOvinda = 1;
+ if (!priv->metadata_splitbrain_forced_heal) {
+ return -EIO;
+ }
- afr_sh_metadata_finish (frame, this);
- return 0;
+ /* Metadata split brain, select one subvol
+ arbitrarily */
+ for (i = 0; i < priv->child_count; i++) {
+ if (locked_on[i] && healed_sinks[i]) {
+ sources[i] = 1;
+ healed_sinks[i] = 0;
+ break;
+ }
+ }
}
- source = afr_sh_select_source (sh->sources, priv->child_count);
-
- if (source == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "No active sources found.");
+ /* No split brain at this point. If we were called from
+ * afr_heal_splitbrain_file(), abort.*/
+ if (afr_dict_contains_heal_op(frame))
+ return -EIO;
- afr_sh_metadata_finish (frame, this);
- return 0;
- }
-
- sh->source = source;
+ source = afr_choose_source_by_policy (priv, sources,
+ AFR_METADATA_TRANSACTION);
+ srcstat = replies[source].poststat;
- /* detect changes not visible through pending flags -- JIC */
for (i = 0; i < priv->child_count; i++) {
- if (i == source || sh->child_errno[i])
+ if (!sources[i] || i == source)
continue;
-
- if (PERMISSION_DIFFERS (&sh->buf[i], &sh->buf[source]))
- sh->sources[i] = 0;
-
- if (OWNERSHIP_DIFFERS (&sh->buf[i], &sh->buf[source]))
- sh->sources[i] = 0;
+ if (!IA_EQUAL (srcstat, replies[i].poststat, type) ||
+ !IA_EQUAL (srcstat, replies[i].poststat, uid) ||
+ !IA_EQUAL (srcstat, replies[i].poststat, gid) ||
+ !IA_EQUAL (srcstat, replies[i].poststat, prot)) {
+ gf_msg_debug (this->name, 0, "%s: iatt mismatch "
+ "for source(%d) vs (%d)",
+ uuid_utoa
+ (replies[source].poststat.ia_gfid),
+ source, i);
+ sources[i] = 0;
+ healed_sinks[i] = 1;
+ }
}
- afr_sh_metadata_sync_prepare (frame, this);
+ for (i =0; i < priv->child_count; i++) {
+ if (!sources[i] || i == source)
+ continue;
+ if (!afr_xattrs_are_equal (replies[source].xdata,
+ replies[i].xdata)) {
+ gf_msg_debug (this->name, 0, "%s: xattr mismatch "
+ "for source(%d) vs (%d)",
+ uuid_utoa
+ (replies[source].poststat.ia_gfid),
+ source, i);
+ sources[i] = 0;
+ healed_sinks[i] = 1;
+ }
+ }
- return 0;
+ return source;
}
int
-afr_sh_metadata_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
+__afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ unsigned char *locked_on, unsigned char *sources,
+ unsigned char *sinks, unsigned char *healed_sinks,
+ struct afr_reply *replies, gf_boolean_t *pflag)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int child_index = 0;
-
+ int ret = -1;
+ int source = -1;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ uint64_t *witness = NULL;
- local = frame->local;
- sh = &local->self_heal;
priv = this->private;
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "path %s on subvolume %s is of mode 0%o",
- local->loc.path,
- priv->children[child_index]->name,
- buf->ia_type);
-
- sh->buf[child_index] = *buf;
- if (xattr)
- sh->xattr[child_index] = dict_ref (xattr);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "path %s on subvolume %s => -1 (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
-
- sh->child_errno[child_index] = op_errno;
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- afr_sh_metadata_fix (frame, this);
-
- return 0;
-}
-
-
-int
-afr_sh_metadata_lookup (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int call_count = 0;
- dict_t *xattr_req = NULL;
- int ret = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ ret = afr_selfheal_unlocked_discover (frame, inode, inode->gfid,
+ replies);
+ if (ret)
+ return ret;
+
+ witness = alloca0 (sizeof (*witness) * priv->child_count);
+ ret = afr_selfheal_find_direction (frame, this, replies,
+ AFR_METADATA_TRANSACTION,
+ locked_on, sources, sinks, witness,
+ pflag);
+ if (ret)
+ return ret;
+
+ /* Initialize the healed_sinks[] array optimistically to
+ the intersection of to-be-healed (i.e sinks[]) and
+ the list of servers which are up (i.e locked_on[]).
+
+ As we encounter failures in the healing process, we
+ will unmark the respective servers in the healed_sinks[]
+ array.
+ */
+ AFR_INTERSECT (healed_sinks, sinks, locked_on, priv->child_count);
+
+ /* If any source has witness, pick first
+ * witness source and make everybody else sinks */
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i] && witness[i]) {
+ source = i;
+ break;
+ }
+ }
- call_count = afr_up_children_count (priv->child_count,
- local->child_up);
- local->call_count = call_count;
-
- xattr_req = dict_new();
-
- if (xattr_req) {
+ if (source != -1) {
for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_uint64 (xattr_req,
- priv->pending_key[i],
- 3 * sizeof(int32_t));
+ if (i != source && sources[i]) {
+ sources[i] = 0;
+ healed_sinks[i] = 1;
+ }
}
}
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s on %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_metadata_lookup_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->lookup,
- &local->loc, xattr_req);
- if (!--call_count)
- break;
- }
- }
-
- if (xattr_req)
- dict_unref (xattr_req);
+ source = __afr_selfheal_metadata_finalize_source (frame, this, inode,
+ sources, sinks,
+ healed_sinks,
+ locked_on, replies);
- return 0;
-}
+ if (source < 0)
+ return -EIO;
+ return source;
+}
int
-afr_sh_metadata_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
-
- /* TODO: what if lock fails? */
-
- local = frame->local;
- sh = &local->self_heal;
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *data_lock = NULL;
+ unsigned char *healed_sinks = NULL;
+ struct afr_reply *locked_replies = NULL;
+ gf_boolean_t did_sh = _gf_true;
+ int source = -1;
+
priv = this->private;
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- sh->op_failed = 1;
-
- sh->locked_nodes[child_index] = 0;
- gf_log (this->name, GF_LOG_DEBUG,
- "locking of %s on child %d failed: %s",
- local->loc.path, child_index,
- strerror (op_errno));
- } else {
- sh->locked_nodes[child_index] = 1;
- gf_log (this->name, GF_LOG_TRACE,
- "inode of %s on child %d locked",
- local->loc.path, child_index);
- }
- }
- UNLOCK (&frame->lock);
+ sources = alloca0 (priv->child_count);
+ sinks = alloca0 (priv->child_count);
+ healed_sinks = alloca0 (priv->child_count);
+ data_lock = alloca0 (priv->child_count);
- call_count = afr_frame_return (frame);
+ locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count);
- if (call_count == 0) {
- if (sh->op_failed) {
- afr_sh_metadata_finish (frame, this);
- return 0;
+ ret = afr_selfheal_inodelk (frame, this, inode, this->name,
+ LLONG_MAX - 1, 0, data_lock);
+ {
+ if (ret < AFR_SH_MIN_PARTICIPANTS) {
+ ret = -ENOTCONN;
+ goto unlock;
}
- afr_sh_metadata_lookup (frame, this);
- }
-
- return 0;
-}
-
-
-int
-afr_sh_metadata_lock (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int call_count = 0;
- struct flock flock = {0, };
+ ret = __afr_selfheal_metadata_prepare (frame, this, inode,
+ data_lock, sources,
+ sinks, healed_sinks,
+ locked_replies, NULL);
+ if (ret < 0)
+ goto unlock;
+ source = ret;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ if (AFR_COUNT (healed_sinks, priv->child_count) == 0) {
+ did_sh = _gf_false;
+ goto unlock;
+ }
- call_count = afr_up_children_count (priv->child_count,
- local->child_up);
- local->call_count = call_count;
+ ret = __afr_selfheal_metadata_do (frame, this, inode, source,
+ healed_sinks, locked_replies);
+ if (ret)
+ goto unlock;
- for (i = 0; i < priv->child_count; i++) {
- flock.l_start = 0;
- flock.l_len = 0;
- flock.l_type = F_WRLCK;
-
- if (local->child_up[i]) {
- gf_log (this->name, GF_LOG_TRACE,
- "locking %s on subvolume %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_metadata_lk_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->inodelk,
- this->name,
- &local->loc, F_SETLK, &flock);
-
- if (!--call_count)
- break;
- }
+ ret = afr_selfheal_undo_pending (frame, this, inode, sources,
+ sinks, healed_sinks,
+ AFR_METADATA_TRANSACTION,
+ locked_replies, data_lock);
}
-
- return 0;
+unlock:
+ afr_selfheal_uninodelk (frame, this, inode, this->name,
+ LLONG_MAX -1, 0, data_lock);
+
+ if (did_sh)
+ afr_log_selfheal (inode->gfid, this, ret, "metadata", source,
+ sources, healed_sinks);
+ else
+ ret = 1;
+
+ if (locked_replies)
+ afr_replies_wipe (locked_replies, priv->child_count);
+ return ret;
}
-
int
-afr_self_heal_metadata (call_frame_t *frame, xlator_t *this)
+afr_selfheal_metadata_by_stbuf (xlator_t *this, struct iatt *stbuf)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = this->private;
+ inode_t *inode = NULL;
+ inode_t *link_inode = NULL;
+ call_frame_t *frame = NULL;
+ int ret = 0;
+
+ if (gf_uuid_is_null (stbuf->ia_gfid)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ inode = inode_new (this->itable);
+ if (!inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
- local = frame->local;
- sh = &local->self_heal;
+ link_inode = inode_link (inode, NULL, NULL, stbuf);
+ if (!link_inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
- if (local->self_heal.need_metadata_self_heal && priv->metadata_self_heal) {
- afr_sh_metadata_lock (frame, this);
- } else {
- afr_sh_metadata_done (frame, this);
- }
+ frame = afr_frame_create (this);
+ if (!frame) {
+ ret = -ENOMEM;
+ goto out;
+ }
- return 0;
+ ret = afr_selfheal_metadata (frame, this, link_inode);
+out:
+ if (inode)
+ inode_unref (inode);
+ if (link_inode)
+ inode_unref (link_inode);
+ if (frame)
+ AFR_STACK_DESTROY (frame);
+ return ret;
}
-
diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c
new file mode 100644
index 00000000000..3445ecccf9c
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-self-heal-name.c
@@ -0,0 +1,719 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#include "afr.h"
+#include "afr-self-heal.h"
+#include "afr-messages.h"
+
+int
+__afr_selfheal_assign_gfid (xlator_t *this, inode_t *parent, uuid_t pargfid,
+ const char *bname, inode_t *inode,
+ struct afr_reply *replies, void *gfid,
+ unsigned char *locked_on,
+ gf_boolean_t is_gfid_absent)
+{
+ int ret = 0;
+ int up_count = 0;
+ int locked_count = 0;
+ afr_private_t *priv = NULL;
+ dict_t *xdata = NULL;
+ loc_t loc = {0, };
+ call_frame_t *new_frame = NULL;
+ afr_local_t *new_local = NULL;
+
+ priv = this->private;
+
+ new_frame = afr_frame_create (this);
+ if (!new_frame) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ new_local = new_frame->local;
+
+ gf_uuid_copy (parent->gfid, pargfid);
+
+ xdata = dict_new ();
+ if (!xdata) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = dict_set_static_bin (xdata, "gfid-req", gfid, 16);
+ if (ret) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ loc.parent = inode_ref (parent);
+ loc.inode = inode_ref (inode);
+ gf_uuid_copy (loc.pargfid, pargfid);
+ loc.name = bname;
+
+ if (is_gfid_absent) {
+ /* Ensure all children of AFR are up before performing gfid heal, to
+ * guard against the possibility of gfid split brain. */
+
+ up_count = AFR_COUNT (priv->child_up, priv->child_count);
+ if (up_count != priv->child_count) {
+ ret = -EIO;
+ goto out;
+ }
+
+ locked_count = AFR_COUNT (locked_on, priv->child_count);
+ if (locked_count != priv->child_count) {
+ ret = -EIO;
+ goto out;
+ }
+ }
+
+ /* Clear out old replies here and wind lookup on all locked
+ * subvolumes to achieve two things:
+ * a. gfid heal on those subvolumes that do not have gfid associated
+ * with the inode, and
+ * b. refresh replies, which can be consumed by
+ * __afr_selfheal_name_impunge().
+ */
+
+ AFR_ONLIST (locked_on, new_frame, afr_selfheal_discover_cbk, lookup,
+ &loc, xdata);
+
+ afr_replies_wipe (replies, priv->child_count);
+
+ afr_replies_copy (replies, new_local->replies, priv->child_count);
+
+out:
+ loc_wipe (&loc);
+ if (xdata)
+ dict_unref (xdata);
+ if (new_frame)
+ AFR_STACK_DESTROY (new_frame);
+
+ return ret;
+}
+
+int
+__afr_selfheal_name_impunge (call_frame_t *frame, xlator_t *this,
+ inode_t *parent, uuid_t pargfid,
+ const char *bname, inode_t *inode,
+ struct afr_reply *replies, int gfid_idx)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int ret = 0;
+ unsigned char *newentry = NULL;
+ unsigned char *sources = NULL;
+
+ priv = this->private;
+
+ newentry = alloca0 (priv->child_count);
+ sources = alloca0 (priv->child_count);
+
+ gf_uuid_copy (parent->gfid, pargfid);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if (gf_uuid_compare (replies[i].poststat.ia_gfid,
+ replies[gfid_idx].poststat.ia_gfid) == 0) {
+ sources[i] = 1;
+ continue;
+ }
+
+ ret |= afr_selfheal_recreate_entry (this, i, gfid_idx, parent,
+ bname, inode, replies,
+ newentry);
+ }
+
+ if (AFR_COUNT (newentry, priv->child_count))
+ afr_selfheal_newentry_mark (frame, this, inode, gfid_idx, replies,
+ sources, newentry);
+ return ret;
+}
+
+
+int
+__afr_selfheal_name_expunge (xlator_t *this, inode_t *parent, uuid_t pargfid,
+ const char *bname, inode_t *inode,
+ struct afr_reply *replies)
+{
+ loc_t loc = {0, };
+ int i = 0;
+ afr_private_t *priv = NULL;
+ char g[64];
+ int ret = 0;
+
+ priv = this->private;
+
+ loc.parent = inode_ref (parent);
+ gf_uuid_copy (loc.pargfid, pargfid);
+ loc.name = bname;
+ loc.inode = inode_ref (inode);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if (replies[i].op_ret)
+ continue;
+
+ switch (replies[i].poststat.ia_type) {
+ case IA_IFDIR:
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "expunging dir %s/%s (%s) on %s",
+ uuid_utoa (pargfid), bname,
+ uuid_utoa_r (replies[i].poststat.ia_gfid, g),
+ priv->children[i]->name);
+
+ ret |= syncop_rmdir (priv->children[i], &loc, 1, NULL,
+ NULL);
+ break;
+ default:
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "expunging file %s/%s (%s) on %s",
+ uuid_utoa (pargfid), bname,
+ uuid_utoa_r (replies[i].poststat.ia_gfid, g),
+ priv->children[i]->name);
+
+ ret |= syncop_unlink (priv->children[i], &loc, NULL,
+ NULL);
+ break;
+ }
+ }
+
+ loc_wipe (&loc);
+
+ return ret;
+
+}
+
+/* This function is to be called after ensuring that there is no gfid mismatch
+ * for the inode across multiple sources
+ */
+static int
+afr_selfheal_gfid_idx_get (xlator_t *this, struct afr_reply *replies,
+ unsigned char *sources)
+{
+ int i = 0;
+ int gfid_idx = -1;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if (!sources[i])
+ continue;
+
+ if (gf_uuid_is_null (replies[i].poststat.ia_gfid))
+ continue;
+
+ gfid_idx = i;
+ break;
+ }
+ return gfid_idx;
+}
+
+static gf_boolean_t
+afr_selfheal_name_need_heal_check (xlator_t *this, struct afr_reply *replies)
+{
+ int i = 0;
+ int first_idx = -1;
+ gf_boolean_t need_heal = _gf_false;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if ((replies[i].op_ret == -1) &&
+ (replies[i].op_errno == ENODATA))
+ need_heal = _gf_true;
+
+ if (first_idx == -1) {
+ first_idx = i;
+ continue;
+ }
+
+ if (replies[i].op_ret != replies[first_idx].op_ret)
+ need_heal = _gf_true;
+
+ if (gf_uuid_compare (replies[i].poststat.ia_gfid,
+ replies[first_idx].poststat.ia_gfid))
+ need_heal = _gf_true;
+
+ if ((replies[i].op_ret == 0) &&
+ (gf_uuid_is_null(replies[i].poststat.ia_gfid)))
+ need_heal = _gf_true;
+
+ }
+
+ return need_heal;
+}
+
+static int
+afr_selfheal_name_type_mismatch_check (xlator_t *this, struct afr_reply *replies,
+ int source, unsigned char *sources,
+ uuid_t pargfid, const char *bname)
+{
+ int i = 0;
+ int type_idx = -1;
+ ia_type_t inode_type = IA_INVAL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if (replies[i].poststat.ia_type == IA_INVAL)
+ continue;
+
+ if (inode_type == IA_INVAL) {
+ inode_type = replies[i].poststat.ia_type;
+ type_idx = i;
+ continue;
+ }
+
+ if (sources[i] || source == -1) {
+ if ((sources[type_idx] || source == -1) &&
+ (inode_type != replies[i].poststat.ia_type)) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_SPLIT_BRAIN,
+ "Type mismatch for <gfid:%s>/%s: "
+ "%d on %s and %d on %s",
+ uuid_utoa(pargfid), bname,
+ replies[i].poststat.ia_type,
+ priv->children[i]->name,
+ replies[type_idx].poststat.ia_type,
+ priv->children[type_idx]->name);
+
+ return -EIO;
+ }
+ inode_type = replies[i].poststat.ia_type;
+ type_idx = i;
+ }
+ }
+ return 0;
+}
+
+static int
+afr_selfheal_name_gfid_mismatch_check (xlator_t *this, struct afr_reply *replies,
+ int source, unsigned char *sources,
+ int *gfid_idx, uuid_t pargfid,
+ const char *bname)
+{
+ int i = 0;
+ int gfid_idx_iter = -1;
+ void *gfid = NULL;
+ afr_private_t *priv = NULL;
+ char g1[64], g2[64];
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if (gf_uuid_is_null (replies[i].poststat.ia_gfid))
+ continue;
+
+ if (!gfid) {
+ gfid = &replies[i].poststat.ia_gfid;
+ gfid_idx_iter = i;
+ continue;
+ }
+
+ if (sources[i] || source == -1) {
+ if ((sources[gfid_idx_iter] || source == -1) &&
+ gf_uuid_compare (gfid, replies[i].poststat.ia_gfid)) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_SPLIT_BRAIN,
+ "GFID mismatch for <gfid:%s>/%s "
+ "%s on %s and %s on %s",
+ uuid_utoa (pargfid), bname,
+ uuid_utoa_r (replies[i].poststat.ia_gfid, g1),
+ priv->children[i]->name,
+ uuid_utoa_r (replies[gfid_idx_iter].poststat.ia_gfid, g2),
+ priv->children[gfid_idx_iter]->name);
+
+ return -EIO;
+ }
+
+ gfid = &replies[i].poststat.ia_gfid;
+ gfid_idx_iter = i;
+ }
+ }
+
+ *gfid_idx = gfid_idx_iter;
+ return 0;
+}
+
+static gf_boolean_t
+afr_selfheal_name_source_empty_check (xlator_t *this, struct afr_reply *replies,
+ unsigned char *sources, int source)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ gf_boolean_t source_is_empty = _gf_true;
+
+ priv = this->private;
+
+ if (source == -1) {
+ source_is_empty = _gf_false;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+
+ if (replies[i].op_ret == -1 && replies[i].op_errno == ENOENT)
+ continue;
+
+ source_is_empty = _gf_false;
+ break;
+ }
+out:
+ return source_is_empty;
+}
+
+int
+__afr_selfheal_name_do (call_frame_t *frame, xlator_t *this, inode_t *parent,
+ uuid_t pargfid, const char *bname, inode_t *inode,
+ unsigned char *sources, unsigned char *sinks,
+ unsigned char *healed_sinks, int source,
+ unsigned char *locked_on, struct afr_reply *replies,
+ void *gfid_req)
+{
+ int gfid_idx = -1;
+ int ret = -1;
+ void *gfid = NULL;
+ gf_boolean_t source_is_empty = _gf_true;
+ gf_boolean_t need_heal = _gf_false;
+ gf_boolean_t is_gfid_absent = _gf_false;
+
+ need_heal = afr_selfheal_name_need_heal_check (this, replies);
+ if (!need_heal)
+ return 0;
+
+ source_is_empty = afr_selfheal_name_source_empty_check (this, replies,
+ sources,
+ source);
+ if (source_is_empty) {
+ ret = __afr_selfheal_name_expunge (this, parent, pargfid,
+ bname, inode, replies);
+ if (ret == -EIO)
+ ret = -1;
+ return ret;
+ }
+
+ ret = afr_selfheal_name_type_mismatch_check (this, replies, source,
+ sources, pargfid, bname);
+ if (ret)
+ return ret;
+
+ ret = afr_selfheal_name_gfid_mismatch_check (this, replies, source,
+ sources, &gfid_idx,
+ pargfid, bname);
+ if (ret)
+ return ret;
+
+ if (gfid_idx == -1) {
+ if (!gfid_req || gf_uuid_is_null (gfid_req))
+ return -1;
+ gfid = gfid_req;
+ } else {
+ gfid = &replies[gfid_idx].poststat.ia_gfid;
+ }
+
+ is_gfid_absent = (gfid_idx == -1) ? _gf_true : _gf_false;
+ ret = __afr_selfheal_assign_gfid (this, parent, pargfid, bname, inode,
+ replies, gfid, locked_on,
+ is_gfid_absent);
+ if (ret)
+ return ret;
+
+ if (gfid_idx == -1) {
+ gfid_idx = afr_selfheal_gfid_idx_get (this, replies, sources);
+ if (gfid_idx == -1)
+ return -1;
+ }
+
+ ret = __afr_selfheal_name_impunge (frame, this, parent, pargfid,
+ bname, inode,
+ replies, gfid_idx);
+ if (ret == -EIO)
+ ret = -1;
+
+ return ret;
+}
+
+
+int
+__afr_selfheal_name_finalize_source (xlator_t *this, unsigned char *sources,
+ unsigned char *healed_sinks,
+ unsigned char *locked_on,
+ struct afr_reply *replies,
+ uint64_t *witness)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int source = -1;
+ int sources_count = 0;
+
+ priv = this->private;
+
+ sources_count = AFR_COUNT (sources, priv->child_count);
+
+ if ((AFR_CMP (locked_on, healed_sinks, priv->child_count) == 0)
+ || !sources_count || afr_does_witness_exist (this, witness)) {
+ memset (sources, 0, sizeof (*sources) * priv->child_count);
+ afr_mark_active_sinks (this, sources, locked_on, healed_sinks);
+ return -1;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i]) {
+ source = i;
+ break;
+ }
+ }
+
+ return source;
+}
+
+int
+__afr_selfheal_name_prepare (call_frame_t *frame, xlator_t *this, inode_t *parent,
+ uuid_t pargfid, unsigned char *locked_on,
+ unsigned char *sources, unsigned char *sinks,
+ unsigned char *healed_sinks, int *source_p)
+{
+ int ret = -1;
+ int source = -1;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
+ uint64_t *witness = NULL;
+
+ priv = this->private;
+
+ replies = alloca0 (priv->child_count * sizeof(*replies));
+
+ ret = afr_selfheal_unlocked_discover (frame, parent, pargfid, replies);
+ if (ret)
+ goto out;
+
+ witness = alloca0 (sizeof (*witness) * priv->child_count);
+ ret = afr_selfheal_find_direction (frame, this, replies,
+ AFR_ENTRY_TRANSACTION,
+ locked_on, sources, sinks, witness,
+ NULL);
+ if (ret)
+ goto out;
+
+ /* Initialize the healed_sinks[] array optimistically to
+ the intersection of to-be-healed (i.e sinks[]) and
+ the list of servers which are up (i.e locked_on[]).
+
+ As we encounter failures in the healing process, we
+ will unmark the respective servers in the healed_sinks[]
+ array.
+ */
+ AFR_INTERSECT (healed_sinks, sinks, locked_on, priv->child_count);
+
+ source = __afr_selfheal_name_finalize_source (this, sources,
+ healed_sinks,
+ locked_on, replies,
+ witness);
+ if (source < 0) {
+ /* If source is < 0 (typically split-brain), we perform a
+ conservative merge of entries rather than erroring out */
+ }
+ *source_p = source;
+
+out:
+ if (replies)
+ afr_replies_wipe (replies, priv->child_count);
+
+ return ret;
+}
+
+
+int
+afr_selfheal_name_do (call_frame_t *frame, xlator_t *this, inode_t *parent,
+ uuid_t pargfid, const char *bname, void *gfid_req)
+{
+ afr_private_t *priv = NULL;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *healed_sinks = NULL;
+ unsigned char *locked_on = NULL;
+ int source = -1;
+ struct afr_reply *replies = NULL;
+ int ret = -1;
+ inode_t *inode = NULL;
+ dict_t *xattr = NULL;
+
+ xattr = dict_new ();
+ if (!xattr)
+ return -ENOMEM;
+
+ ret = dict_set_int32 (xattr, GF_GFIDLESS_LOOKUP, 1);
+ if (ret) {
+ dict_destroy (xattr);
+ return -1;
+ }
+
+ priv = this->private;
+
+ locked_on = alloca0 (priv->child_count);
+ sources = alloca0 (priv->child_count);
+ sinks = alloca0 (priv->child_count);
+ healed_sinks = alloca0 (priv->child_count);
+
+ replies = alloca0 (priv->child_count * sizeof(*replies));
+
+ ret = afr_selfheal_entrylk (frame, this, parent, this->name, bname,
+ locked_on);
+ {
+ if (ret < AFR_SH_MIN_PARTICIPANTS) {
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_name_prepare (frame, this, parent, pargfid,
+ locked_on, sources, sinks,
+ healed_sinks, &source);
+ if (ret)
+ goto unlock;
+
+ inode = afr_selfheal_unlocked_lookup_on (frame, parent, bname,
+ replies, locked_on,
+ xattr);
+ if (!inode) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_name_do (frame, this, parent, pargfid,
+ bname, inode, sources, sinks,
+ healed_sinks, source, locked_on,
+ replies, gfid_req);
+ }
+unlock:
+ afr_selfheal_unentrylk (frame, this, parent, this->name, bname,
+ locked_on, NULL);
+ if (inode)
+ inode_unref (inode);
+
+ if (replies)
+ afr_replies_wipe (replies, priv->child_count);
+ if (xattr)
+ dict_unref (xattr);
+
+ return ret;
+}
+
+
+int
+afr_selfheal_name_unlocked_inspect (call_frame_t *frame, xlator_t *this,
+ inode_t *parent, uuid_t pargfid,
+ const char *bname, gf_boolean_t *need_heal)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ struct afr_reply *replies = NULL;
+ inode_t *inode = NULL;
+ int first_idx = -1;
+
+ priv = this->private;
+
+ replies = alloca0 (sizeof (*replies) * priv->child_count);
+
+ inode = afr_selfheal_unlocked_lookup_on (frame, parent, bname,
+ replies, priv->child_up, NULL);
+ if (!inode)
+ return -ENOMEM;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if ((replies[i].op_ret == -1) &&
+ (replies[i].op_errno == ENODATA))
+ *need_heal = _gf_true;
+
+ if (first_idx == -1) {
+ first_idx = i;
+ continue;
+ }
+
+ if (replies[i].op_ret != replies[first_idx].op_ret)
+ *need_heal = _gf_true;
+
+ if (gf_uuid_compare (replies[i].poststat.ia_gfid,
+ replies[first_idx].poststat.ia_gfid))
+ *need_heal = _gf_true;
+ }
+
+ if (inode)
+ inode_unref (inode);
+ if (replies)
+ afr_replies_wipe (replies, priv->child_count);
+ return 0;
+}
+
+int
+afr_selfheal_name (xlator_t *this, uuid_t pargfid, const char *bname,
+ void *gfid_req)
+{
+ inode_t *parent = NULL;
+ call_frame_t *frame = NULL;
+ int ret = -1;
+ gf_boolean_t need_heal = _gf_false;
+
+ parent = afr_inode_find (this, pargfid);
+ if (!parent)
+ goto out;
+
+ frame = afr_frame_create (this);
+ if (!frame)
+ goto out;
+
+ ret = afr_selfheal_name_unlocked_inspect (frame, this, parent, pargfid,
+ bname, &need_heal);
+ if (ret)
+ goto out;
+
+ if (need_heal) {
+ ret = afr_selfheal_name_do (frame, this, parent, pargfid, bname,
+ gfid_req);
+ if (ret)
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (parent)
+ inode_unref (parent);
+ if (frame)
+ AFR_STACK_DESTROY (frame);
+
+ return ret;
+}
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
index c43473a4332..ec5337e60b2 100644
--- a/xlators/cluster/afr/src/afr-self-heal.h
+++ b/xlators/cluster/afr/src/afr-self-heal.h
@@ -1,54 +1,285 @@
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef __AFR_SELF_HEAL_H__
-#define __AFR_SELF_HEAL_H__
-#include <sys/stat.h>
+#ifndef _AFR_SELFHEAL_H
+#define _AFR_SELFHEAL_H
+
+#define AFR_SH_MIN_PARTICIPANTS 2
+
+/* Perform fop on all UP subvolumes and wait for all callbacks to return */
+
+#define AFR_ONALL(frame, rfn, fop, args ...) do { \
+ afr_local_t *__local = frame->local; \
+ afr_private_t *__priv = frame->this->private; \
+ int __i = 0, __count = 0; \
+ \
+ afr_local_replies_wipe (__local, __priv); \
+ \
+ for (__i = 0; __i < __priv->child_count; __i++) { \
+ if (!__priv->child_up[__i]) continue; \
+ STACK_WIND_COOKIE (frame, rfn, (void *)(long) __i, \
+ __priv->children[__i], \
+ __priv->children[__i]->fops->fop, args); \
+ __count++; \
+ } \
+ syncbarrier_wait (&__local->barrier, __count); \
+ } while (0)
+
+
+/* Perform fop on all subvolumes represented by list[] array and wait
+ for all callbacks to return */
+
+#define AFR_ONLIST(list, frame, rfn, fop, args ...) do { \
+ afr_local_t *__local = frame->local; \
+ afr_private_t *__priv = frame->this->private; \
+ int __i = 0, __count = 0; \
+ \
+ afr_local_replies_wipe (__local, __priv); \
+ \
+ for (__i = 0; __i < __priv->child_count; __i++) { \
+ if (!list[__i]) continue; \
+ STACK_WIND_COOKIE (frame, rfn, (void *)(long) __i, \
+ __priv->children[__i], \
+ __priv->children[__i]->fops->fop, args); \
+ __count++; \
+ } \
+ syncbarrier_wait (&__local->barrier, __count); \
+ } while (0)
+
+
+#define AFR_SEQ(frame, rfn, fop, args ...) do { \
+ afr_local_t *__local = frame->local; \
+ afr_private_t *__priv = frame->this->private; \
+ int __i = 0; \
+ \
+ afr_local_replies_wipe (__local, __priv); \
+ \
+ for (__i = 0; __i < __priv->child_count; __i++) { \
+ if (!__priv->child_up[__i]) continue; \
+ STACK_WIND_COOKIE (frame, rfn, (void *)(long) __i, \
+ __priv->children[__i], \
+ __priv->children[__i]->fops->fop, args); \
+ syncbarrier_wait (&__local->barrier, 1); \
+ } \
+ } while (0)
+
+
+#define ALLOC_MATRIX(n, type) ({type **__ptr = NULL; \
+ int __i; \
+ __ptr = alloca0 (n * sizeof(type *)); \
+ for (__i = 0; __i < n; __i++) __ptr[__i] = alloca0 (n * sizeof(type)); \
+ __ptr;})
+
+
+#define IA_EQUAL(f,s,field) (memcmp (&(f.ia_##field), &(s.ia_##field), sizeof (s.ia_##field)) == 0)
+
+
+int
+afr_selfheal (xlator_t *this, uuid_t gfid);
+
+void
+afr_throttled_selfheal (call_frame_t *frame, xlator_t *this);
+
+int
+afr_selfheal_name (xlator_t *this, uuid_t gfid, const char *name,
+ void *gfid_req);
+
+int
+afr_selfheal_data (call_frame_t *frame, xlator_t *this, inode_t *inode);
+
+int
+afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode);
+
+int
+afr_selfheal_entry (call_frame_t *frame, xlator_t *this, inode_t *inode);
+
+
+int
+afr_selfheal_inodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, off_t off, size_t size,
+ unsigned char *locked_on);
-#define FILETYPE_DIFFERS(buf1,buf2) ((buf1)->ia_type != (buf2)->ia_type)
-#define PERMISSION_DIFFERS(buf1,buf2) (st_mode_from_ia ((buf1)->ia_prot, (buf1)->ia_type) != st_mode_from_ia ((buf2)->ia_prot, (buf2)->ia_type))
-#define OWNERSHIP_DIFFERS(buf1,buf2) (((buf1)->ia_uid != (buf2)->ia_uid) || ((buf1)->ia_gid != (buf2)->ia_gid))
-#define SIZE_DIFFERS(buf1,buf2) ((buf1)->ia_size != (buf2)->ia_size)
+int
+afr_selfheal_tryinodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, off_t off, size_t size,
+ unsigned char *locked_on);
-#define SIZE_GREATER(buf1,buf2) ((buf1)->ia_size > (buf2)->ia_size)
+int
+afr_selfheal_tie_breaker_inodelk (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, char *dom, off_t off,
+ size_t size, unsigned char *locked_on);
int
-afr_sh_has_metadata_pending (dict_t *xattr, int child_count, xlator_t *this);
+afr_selfheal_uninodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, off_t off, size_t size,
+ const unsigned char *locked_on);
+
int
-afr_sh_has_entry_pending (dict_t *xattr, int child_count, xlator_t *this);
+afr_selfheal_entrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, const char *name, unsigned char *locked_on);
+
int
-afr_sh_has_data_pending (dict_t *xattr, int child_count, xlator_t *this);
+afr_selfheal_tryentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, const char *name, unsigned char *locked_on);
int
-afr_self_heal_entry (call_frame_t *frame, xlator_t *this);
+afr_selfheal_tie_breaker_entrylk (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, char *dom, const char *name,
+ unsigned char *locked_on);
int
-afr_self_heal_data (call_frame_t *frame, xlator_t *this);
+afr_selfheal_unentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, const char *name, unsigned char *locked_on,
+ dict_t *xdata);
int
-afr_self_heal_metadata (call_frame_t *frame, xlator_t *this);
+afr_selfheal_unlocked_discover (call_frame_t *frame, inode_t *inode,
+ uuid_t gfid, struct afr_reply *replies);
+
+inode_t *
+afr_selfheal_unlocked_lookup_on (call_frame_t *frame, inode_t *parent,
+ const char *name, struct afr_reply *replies,
+ unsigned char *lookup_on, dict_t *xattr);
+
+int
+afr_selfheal_find_direction (call_frame_t *frame, xlator_t *this,
+ struct afr_reply *replies,
+ afr_transaction_type type,
+ unsigned char *locked_on, unsigned char *sources,
+ unsigned char *sinks, uint64_t *witness,
+ gf_boolean_t *flag);
+int
+afr_selfheal_fill_matrix (xlator_t *this, int **matrix, int subvol, int idx,
+ dict_t *xdata);
int
-afr_self_heal_get_source (xlator_t *this, afr_local_t *local, dict_t **xattr);
+afr_selfheal_extract_xattr (xlator_t *this, struct afr_reply *replies,
+ afr_transaction_type type, int *dirty, int **matrix);
int
-afr_self_heal (call_frame_t *frame, xlator_t *this);
+afr_selfheal_undo_pending (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ unsigned char *sources, unsigned char *sinks,
+ unsigned char *healed_sinks, afr_transaction_type type,
+ struct afr_reply *replies, unsigned char *locked_on);
-#endif /* __AFR_SELF_HEAL_H__ */
+int
+afr_selfheal_recreate_entry (xlator_t *this, int dst, int source, inode_t *dir,
+ const char *name, inode_t *inode,
+ struct afr_reply *replies,
+ unsigned char *newentry);
+
+int
+afr_selfheal_post_op (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ int subvol, dict_t *xattr, dict_t *xdata);
+
+call_frame_t *
+afr_frame_create (xlator_t *this);
+
+inode_t *
+afr_inode_find (xlator_t *this, uuid_t gfid);
+
+int
+afr_selfheal_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *parbuf);
+
+void
+afr_replies_copy (struct afr_reply *dst, struct afr_reply *src, int count);
+
+int
+afr_selfheal_newentry_mark (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ int source, struct afr_reply *replies,
+ unsigned char *sources, unsigned char *newentry);
+
+unsigned int
+afr_success_count (struct afr_reply *replies, unsigned int count);
+
+void
+afr_log_selfheal (uuid_t gfid, xlator_t *this, int ret, char *type,
+ int source, unsigned char *sources,
+ unsigned char *healed_sinks);
+
+void
+afr_mark_largest_file_as_source (xlator_t *this, unsigned char *sources,
+ struct afr_reply *replies);
+void
+afr_mark_active_sinks (xlator_t *this, unsigned char *sources,
+ unsigned char *locked_on, unsigned char *sinks);
+
+gf_boolean_t
+afr_dict_contains_heal_op (call_frame_t *frame);
+
+int
+afr_mark_split_brain_source_sinks (call_frame_t *frame, xlator_t *this,
+ inode_t *inode,
+ unsigned char *sources,
+ unsigned char *sinks,
+ unsigned char *healed_sinks,
+ unsigned char *locked_on,
+ struct afr_reply *replies,
+ afr_transaction_type type);
+
+int
+afr_get_child_index_from_name (xlator_t *this, char *name);
+
+gf_boolean_t
+afr_does_witness_exist (xlator_t *this, uint64_t *witness);
+
+int
+__afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, unsigned char *locked_on,
+ unsigned char *sources,
+ unsigned char *sinks, unsigned char *healed_sinks,
+ struct afr_reply *replies,
+ gf_boolean_t *flag);
+
+int
+__afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, unsigned char *locked_on,
+ unsigned char *sources,
+ unsigned char *sinks,
+ unsigned char *healed_sinks,
+ struct afr_reply *replies,
+ gf_boolean_t *flag);
+int
+__afr_selfheal_entry_prepare (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, unsigned char *locked_on,
+ unsigned char *sources,
+ unsigned char *sinks,
+ unsigned char *healed_sinks,
+ struct afr_reply *replies, int *source_p,
+ gf_boolean_t *flag);
+
+int
+afr_selfheal_unlocked_inspect (call_frame_t *frame, xlator_t *this,
+ uuid_t gfid, inode_t **link_inode,
+ gf_boolean_t *data_selfheal,
+ gf_boolean_t *metadata_selfheal,
+ gf_boolean_t *entry_selfheal);
+
+int
+afr_selfheal_do (call_frame_t *frame, xlator_t *this, uuid_t gfid);
+
+int
+afr_selfheal_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata);
+
+int
+afr_locked_fill (call_frame_t *frame, xlator_t *this,
+ unsigned char *locked_on);
+int
+afr_choose_source_by_policy (afr_private_t *priv, unsigned char *sources,
+ afr_transaction_type type);
+
+int
+afr_selfheal_metadata_by_stbuf (xlator_t *this, struct iatt *stbuf);
+#endif /* !_AFR_SELFHEAL_H */
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
new file mode 100644
index 00000000000..7ccac919769
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@@ -0,0 +1,1222 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#include "afr.h"
+#include "afr-self-heal.h"
+#include "afr-self-heald.h"
+#include "protocol-common.h"
+#include "syncop-utils.h"
+#include "afr-messages.h"
+
+#define SHD_INODE_LRU_LIMIT 2048
+#define AFR_EH_SPLIT_BRAIN_LIMIT 1024
+#define AFR_STATISTICS_HISTORY_SIZE 50
+
+
+#define ASSERT_LOCAL(this, healer) \
+ if (!afr_shd_is_subvol_local(this, healer->subvol)) { \
+ healer->local = _gf_false; \
+ if (safe_break (healer)) { \
+ break; \
+ } else { \
+ continue; \
+ } \
+ } else { \
+ healer->local = _gf_true; \
+ }
+
+
+#define NTH_INDEX_HEALER(this, n) &((((afr_private_t *)this->private))->shd.index_healers[n])
+#define NTH_FULL_HEALER(this, n) &((((afr_private_t *)this->private))->shd.full_healers[n])
+
+char *
+afr_subvol_name (xlator_t *this, int subvol)
+{
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ if (subvol < 0 || subvol > priv->child_count)
+ return NULL;
+
+ return priv->children[subvol]->name;
+}
+
+
+void
+afr_destroy_crawl_event_data (void *data)
+{
+ return;
+}
+
+
+void
+afr_destroy_shd_event_data (void *data)
+{
+ shd_event_t *shd_event = data;
+
+ if (!shd_event)
+ return;
+ GF_FREE (shd_event->path);
+
+ return;
+}
+
+
+gf_boolean_t
+afr_shd_is_subvol_local (xlator_t *this, int subvol)
+{
+ afr_private_t *priv = NULL;
+ gf_boolean_t is_local = _gf_false;
+ loc_t loc = {0, };
+
+ loc.inode = this->itable->root;
+ gf_uuid_copy (loc.gfid, loc.inode->gfid);
+ priv = this->private;
+ syncop_is_subvol_local(priv->children[subvol], &loc, &is_local);
+ return is_local;
+}
+
+
+int
+__afr_shd_healer_wait (struct subvol_healer *healer)
+{
+ afr_private_t *priv = NULL;
+ struct timespec wait_till = {0, };
+ int ret = 0;
+
+ priv = healer->this->private;
+
+disabled_loop:
+ wait_till.tv_sec = time (NULL) + priv->shd.timeout;
+
+ while (!healer->rerun) {
+ ret = pthread_cond_timedwait (&healer->cond,
+ &healer->mutex,
+ &wait_till);
+ if (ret == ETIMEDOUT)
+ break;
+ }
+
+ ret = healer->rerun;
+ healer->rerun = 0;
+
+ if (!priv->shd.enabled)
+ goto disabled_loop;
+
+ return ret;
+}
+
+
+int
+afr_shd_healer_wait (struct subvol_healer *healer)
+{
+ int ret = 0;
+
+ pthread_mutex_lock (&healer->mutex);
+ {
+ ret = __afr_shd_healer_wait (healer);
+ }
+ pthread_mutex_unlock (&healer->mutex);
+
+ return ret;
+}
+
+
+gf_boolean_t
+safe_break (struct subvol_healer *healer)
+{
+ gf_boolean_t ret = _gf_false;
+
+ pthread_mutex_lock (&healer->mutex);
+ {
+ if (healer->rerun)
+ goto unlock;
+
+ healer->running = _gf_false;
+ ret = _gf_true;
+ }
+unlock:
+ pthread_mutex_unlock (&healer->mutex);
+
+ return ret;
+}
+
+
+inode_t *
+afr_shd_inode_find (xlator_t *this, xlator_t *subvol, uuid_t gfid)
+{
+ int ret = 0;
+ uint64_t val = IA_INVAL;
+ loc_t loc = {0, };
+ dict_t *xdata = NULL;
+ dict_t *rsp_dict = NULL;
+ inode_t *inode = NULL;
+ struct iatt iatt = {0, };
+
+ inode = inode_find (this->itable, gfid);
+ if (inode)
+ goto out;
+
+ loc.inode = inode_new (this->itable);
+ if (!loc.inode)
+ goto out;
+ gf_uuid_copy (loc.gfid, gfid);
+
+ xdata = dict_new ();
+ if (!xdata)
+ goto out;
+
+ ret = dict_set_int8 (xdata, GF_INDEX_IA_TYPE_GET_REQ, 1);
+ if (ret)
+ goto out;
+
+ ret = syncop_lookup (subvol, &loc, &iatt, NULL, xdata, &rsp_dict);
+ if (ret < 0)
+ goto out;
+
+ if (rsp_dict) {
+ ret = dict_get_uint64 (rsp_dict, GF_INDEX_IA_TYPE_GET_RSP,
+ &val);
+ if (ret)
+ goto out;
+ }
+
+ inode = inode_link (loc.inode, NULL, NULL, &iatt);
+ ret = inode_ctx_set2 (inode, subvol, 0, &val);
+out:
+ if (xdata)
+ dict_unref (xdata);
+ if (rsp_dict)
+ dict_unref (rsp_dict);
+ loc_wipe (&loc);
+ return inode;
+}
+
+inode_t*
+afr_shd_index_inode (xlator_t *this, xlator_t *subvol, char *vgfid)
+{
+ loc_t rootloc = {0, };
+ inode_t *inode = NULL;
+ int ret = 0;
+ dict_t *xattr = NULL;
+ void *index_gfid = NULL;
+
+ rootloc.inode = inode_ref (this->itable->root);
+ gf_uuid_copy (rootloc.gfid, rootloc.inode->gfid);
+
+ ret = syncop_getxattr (subvol, &rootloc, &xattr,
+ vgfid, NULL, NULL);
+ if (ret || !xattr) {
+ errno = -ret;
+ goto out;
+ }
+
+ ret = dict_get_ptr (xattr, vgfid, &index_gfid);
+ if (ret)
+ goto out;
+
+ gf_msg_debug (this->name, 0, "%s dir gfid for %s: %s",
+ vgfid, subvol->name, uuid_utoa (index_gfid));
+
+ inode = afr_shd_inode_find (this, subvol, index_gfid);
+
+out:
+ loc_wipe (&rootloc);
+
+ if (xattr)
+ dict_unref (xattr);
+
+ return inode;
+}
+
+int
+afr_shd_index_purge (xlator_t *subvol, inode_t *inode, char *name,
+ ia_type_t type)
+{
+ int ret = 0;
+ loc_t loc = {0,};
+
+ loc.parent = inode_ref (inode);
+ loc.name = name;
+
+ if (IA_ISDIR (type))
+ ret = syncop_rmdir (subvol, &loc, 1, NULL, NULL);
+ else
+ ret = syncop_unlink (subvol, &loc, NULL, NULL);
+
+ loc_wipe (&loc);
+ return ret;
+}
+
+void
+afr_shd_zero_xattrop (xlator_t *this, uuid_t gfid)
+{
+
+ call_frame_t *frame = NULL;
+ inode_t *inode = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xattr = NULL;
+ int ret = 0;
+ int i = 0;
+ int raw[AFR_NUM_CHANGE_LOGS] = {0};
+
+ priv = this->private;
+ frame = afr_frame_create (this);
+ if (!frame)
+ goto out;
+ inode = afr_inode_find (this, gfid);
+ if (!inode)
+ goto out;
+ xattr = dict_new();
+ if (!xattr)
+ goto out;
+ ret = dict_set_static_bin (xattr, AFR_DIRTY, raw,
+ sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret)
+ goto out;
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_set_static_bin (xattr, priv->pending_key[i], raw,
+ sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret)
+ goto out;
+ }
+
+ /*Send xattrop to all bricks. Doing a lookup to see if bricks are up or
+ * has valid repies for this gfid seems a bit of an overkill.*/
+ for (i = 0; i < priv->child_count; i++)
+ afr_selfheal_post_op (frame, this, inode, i, xattr, NULL);
+
+out:
+ if (frame)
+ AFR_STACK_DESTROY (frame);
+ if (inode)
+ inode_unref (inode);
+ if (xattr)
+ dict_unref (xattr);
+ return;
+}
+
+int
+afr_shd_selfheal_name (struct subvol_healer *healer, int child, uuid_t parent,
+ const char *bname)
+{
+ int ret = -1;
+
+ ret = afr_selfheal_name (THIS, parent, bname, NULL);
+
+ return ret;
+}
+
+int
+afr_shd_selfheal (struct subvol_healer *healer, int child, uuid_t gfid)
+{
+ int ret = 0;
+ eh_t *eh = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ shd_event_t *shd_event = NULL;
+ char *path = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *this = NULL;
+ crawl_event_t *crawl_event = NULL;
+
+ this = healer->this;
+ priv = this->private;
+ shd = &priv->shd;
+ crawl_event = &healer->crawl_event;
+
+ subvol = priv->children[child];
+
+ //If this fails with ENOENT/ESTALE index is stale
+ ret = syncop_gfid_to_path (this->itable, subvol, gfid, &path);
+ if (ret < 0)
+ return ret;
+
+ ret = afr_selfheal (this, gfid);
+
+ LOCK (&priv->lock);
+ {
+ if (ret == -EIO) {
+ eh = shd->split_brain;
+ crawl_event->split_brain_count++;
+ } else if (ret < 0) {
+ crawl_event->heal_failed_count++;
+ } else if (ret == 0) {
+ crawl_event->healed_count++;
+ }
+ }
+ UNLOCK (&priv->lock);
+
+ if (eh) {
+ shd_event = GF_CALLOC (1, sizeof(*shd_event),
+ gf_afr_mt_shd_event_t);
+ if (!shd_event)
+ goto out;
+
+ shd_event->child = child;
+ shd_event->path = path;
+
+ if (eh_save_history (eh, shd_event) < 0)
+ goto out;
+
+ shd_event = NULL;
+ path = NULL;
+ }
+out:
+ GF_FREE (shd_event);
+ GF_FREE (path);
+ return ret;
+}
+
+
+void
+afr_shd_sweep_prepare (struct subvol_healer *healer)
+{
+ crawl_event_t *event = NULL;
+
+ event = &healer->crawl_event;
+
+ event->healed_count = 0;
+ event->split_brain_count = 0;
+ event->heal_failed_count = 0;
+
+ time (&event->start_time);
+ event->end_time = 0;
+}
+
+
+void
+afr_shd_sweep_done (struct subvol_healer *healer)
+{
+ crawl_event_t *event = NULL;
+ crawl_event_t *history = NULL;
+ afr_self_heald_t *shd = NULL;
+
+ event = &healer->crawl_event;
+ shd = &(((afr_private_t *)healer->this->private)->shd);
+
+ time (&event->end_time);
+ history = memdup (event, sizeof (*event));
+ event->start_time = 0;
+
+ if (!history)
+ return;
+
+ if (eh_save_history (shd->statistics[healer->subvol], history) < 0)
+ GF_FREE (history);
+}
+
+int
+afr_shd_index_heal (xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data)
+{
+ struct subvol_healer *healer = data;
+ afr_private_t *priv = NULL;
+ uuid_t gfid = {0};
+ int ret = 0;
+ uint64_t val = IA_INVAL;
+
+ priv = healer->this->private;
+ if (!priv->shd.enabled)
+ return -EBUSY;
+
+ gf_msg_debug (healer->this->name, 0, "got entry: %s",
+ entry->d_name);
+
+ ret = gf_uuid_parse (entry->d_name, gfid);
+ if (ret)
+ return 0;
+
+ inode_ctx_get2 (parent->inode, subvol, NULL, &val);
+
+ ret = afr_shd_selfheal (healer, healer->subvol, gfid);
+
+ if (ret == -ENOENT || ret == -ESTALE)
+ afr_shd_index_purge (subvol, parent->inode, entry->d_name, val);
+
+ if (ret == 2)
+ /* If bricks crashed in pre-op after creating indices/xattrop
+ * link but before setting afr changelogs, we end up with stale
+ * xattrop links but zero changelogs. Remove such entries by
+ * sending a post-op with zero changelogs.
+ */
+ afr_shd_zero_xattrop (healer->this, gfid);
+
+ return 0;
+}
+
+int
+afr_shd_index_sweep (struct subvol_healer *healer, char *vgfid)
+{
+ loc_t loc = {0};
+ afr_private_t *priv = NULL;
+ int ret = 0;
+ xlator_t *subvol = NULL;
+ dict_t *xdata = NULL;
+ call_frame_t *frame = NULL;
+
+ priv = healer->this->private;
+ subvol = priv->children[healer->subvol];
+
+ frame = afr_frame_create (healer->this);
+ if (!frame) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ loc.inode = afr_shd_index_inode (healer->this, subvol, vgfid);
+ if (!loc.inode) {
+ gf_msg (healer->this->name, GF_LOG_WARNING,
+ 0, AFR_MSG_INDEX_DIR_GET_FAILED,
+ "unable to get index-dir on %s", subvol->name);
+ ret = -errno;
+ goto out;
+ }
+
+ xdata = dict_new ();
+ if (!xdata || dict_set_int32 (xdata, "get-gfid-type", 1)) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_mt_dir_scan (frame, subvol, &loc, GF_CLIENT_PID_SELF_HEALD,
+ healer, afr_shd_index_heal, xdata,
+ priv->shd.max_threads, priv->shd.wait_qlength);
+
+ if (ret == 0)
+ ret = healer->crawl_event.healed_count;
+
+out:
+ loc_wipe (&loc);
+
+ if (xdata)
+ dict_unref (xdata);
+ if (frame)
+ AFR_STACK_DESTROY (frame);
+ return ret;
+}
+
+int
+afr_shd_index_sweep_all (struct subvol_healer *healer)
+{
+ int ret = 0;
+ int count = 0;
+
+ ret = afr_shd_index_sweep (healer, GF_XATTROP_INDEX_GFID);
+ if (ret < 0)
+ goto out;
+ count = ret;
+
+ ret = afr_shd_index_sweep (healer, GF_XATTROP_DIRTY_GFID);
+ if (ret < 0)
+ goto out;
+ count += ret;
+
+ ret = afr_shd_index_sweep (healer, GF_XATTROP_ENTRY_CHANGES_GFID);
+ if (ret < 0)
+ goto out;
+ count += ret;
+out:
+ if (ret < 0)
+ return ret;
+ else
+ return count;
+}
+
+int
+afr_shd_full_heal (xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data)
+{
+ struct subvol_healer *healer = data;
+ xlator_t *this = healer->this;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ if (!priv->shd.enabled)
+ return -EBUSY;
+
+ afr_shd_selfheal_name (healer, healer->subvol,
+ parent->inode->gfid, entry->d_name);
+
+ afr_shd_selfheal (healer, healer->subvol, entry->d_stat.ia_gfid);
+
+ return 0;
+}
+
+int
+afr_shd_full_sweep (struct subvol_healer *healer, inode_t *inode)
+{
+ afr_private_t *priv = NULL;
+ loc_t loc = {0};
+
+ priv = healer->this->private;
+ loc.inode = inode;
+ return syncop_ftw (priv->children[healer->subvol], &loc,
+ GF_CLIENT_PID_SELF_HEALD, healer,
+ afr_shd_full_heal);
+}
+
+
+void *
+afr_shd_index_healer (void *data)
+{
+ struct subvol_healer *healer = NULL;
+ xlator_t *this = NULL;
+ int ret = 0;
+ afr_private_t *priv = NULL;
+
+ healer = data;
+ THIS = this = healer->this;
+ priv = this->private;
+
+ for (;;) {
+ afr_shd_healer_wait (healer);
+
+ ASSERT_LOCAL(this, healer);
+ priv->local[healer->subvol] = healer->local;
+
+ do {
+ gf_msg_debug (this->name, 0,
+ "starting index sweep on subvol %s",
+ afr_subvol_name (this, healer->subvol));
+
+ afr_shd_sweep_prepare (healer);
+
+ ret = afr_shd_index_sweep_all (healer);
+
+ afr_shd_sweep_done (healer);
+ /*
+ As long as at least one gfid was
+ healed, keep retrying. We may have
+ just healed a directory and thereby
+ created entries for other gfids which
+ could not be healed thus far.
+ */
+
+ gf_msg_debug (this->name, 0,
+ "finished index sweep on subvol %s",
+ afr_subvol_name (this, healer->subvol));
+ /*
+ Give a pause before retrying to avoid a busy loop
+ in case the only entry in index is because of
+ an ongoing I/O.
+ */
+ sleep (1);
+ } while (ret > 0);
+ }
+
+ return NULL;
+}
+
+
+void *
+afr_shd_full_healer (void *data)
+{
+ struct subvol_healer *healer = NULL;
+ xlator_t *this = NULL;
+ int run = 0;
+
+ healer = data;
+ THIS = this = healer->this;
+
+ for (;;) {
+ pthread_mutex_lock (&healer->mutex);
+ {
+ run = __afr_shd_healer_wait (healer);
+ if (!run)
+ healer->running = _gf_false;
+ }
+ pthread_mutex_unlock (&healer->mutex);
+
+ if (!run)
+ break;
+
+ ASSERT_LOCAL(this, healer);
+
+ gf_msg (this->name, GF_LOG_INFO, 0, AFR_MSG_SELF_HEAL_INFO,
+ "starting full sweep on subvol %s",
+ afr_subvol_name (this, healer->subvol));
+
+ afr_shd_sweep_prepare (healer);
+
+ afr_shd_full_sweep (healer, this->itable->root);
+
+ afr_shd_sweep_done (healer);
+
+ gf_msg (this->name, GF_LOG_INFO, 0, AFR_MSG_SELF_HEAL_INFO,
+ "finished full sweep on subvol %s",
+ afr_subvol_name (this, healer->subvol));
+ }
+
+ return NULL;
+}
+
+
+int
+afr_shd_healer_init (xlator_t *this, struct subvol_healer *healer)
+{
+ int ret = 0;
+
+ ret = pthread_mutex_init (&healer->mutex, NULL);
+ if (ret)
+ goto out;
+
+ ret = pthread_cond_init (&healer->cond, NULL);
+ if (ret)
+ goto out;
+
+ healer->this = this;
+ healer->running = _gf_false;
+ healer->rerun = _gf_false;
+ healer->local = _gf_false;
+out:
+ return ret;
+}
+
+
+int
+afr_shd_healer_spawn (xlator_t *this, struct subvol_healer *healer,
+ void *(threadfn)(void *))
+{
+ int ret = 0;
+
+ pthread_mutex_lock (&healer->mutex);
+ {
+ if (healer->running) {
+ pthread_cond_signal (&healer->cond);
+ } else {
+ ret = gf_thread_create (&healer->thread, NULL,
+ threadfn, healer);
+ if (ret)
+ goto unlock;
+ healer->running = 1;
+ }
+
+ healer->rerun = 1;
+ }
+unlock:
+ pthread_mutex_unlock (&healer->mutex);
+
+ return ret;
+}
+
+
+int
+afr_shd_full_healer_spawn (xlator_t *this, int subvol)
+{
+ return afr_shd_healer_spawn (this, NTH_FULL_HEALER (this, subvol),
+ afr_shd_full_healer);
+}
+
+
+int
+afr_shd_index_healer_spawn (xlator_t *this, int subvol)
+{
+ return afr_shd_healer_spawn (this, NTH_INDEX_HEALER (this, subvol),
+ afr_shd_index_healer);
+}
+
+
+int
+afr_shd_dict_add_crawl_event (xlator_t *this, dict_t *output,
+ crawl_event_t *crawl_event)
+{
+ int ret = 0;
+ uint64_t count = 0;
+ char key[256] = {0};
+ int xl_id = 0;
+ uint64_t healed_count = 0;
+ uint64_t split_brain_count = 0;
+ uint64_t heal_failed_count = 0;
+ char *start_time_str = 0;
+ char *end_time_str = NULL;
+ char *crawl_type = NULL;
+ int progress = -1;
+ int child = -1;
+
+ child = crawl_event->child;
+ healed_count = crawl_event->healed_count;
+ split_brain_count = crawl_event->split_brain_count;
+ heal_failed_count = crawl_event->heal_failed_count;
+ crawl_type = crawl_event->crawl_type;
+
+ if (!crawl_event->start_time)
+ goto out;
+
+ start_time_str = gf_strdup (ctime (&crawl_event->start_time));
+
+ if (crawl_event->end_time)
+ end_time_str = gf_strdup (ctime (&crawl_event->end_time));
+
+ ret = dict_get_int32 (output, this->name, &xl_id);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_DICT_GET_FAILED, "xl does not have id");
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "statistics-%d-%d-count", xl_id, child);
+ ret = dict_get_uint64 (output, key, &count);
+
+
+ snprintf (key, sizeof (key), "statistics_healed_cnt-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ ret = dict_set_uint64(output, key, healed_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR,
+ -ret, AFR_MSG_DICT_SET_FAILED,
+ "Could not add statistics_healed_count to output");
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "statistics_sb_cnt-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ ret = dict_set_uint64 (output, key, split_brain_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR,
+ -ret, AFR_MSG_DICT_SET_FAILED,
+ "Could not add statistics_split_brain_count to output");
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "statistics_crawl_type-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ ret = dict_set_str (output, key, crawl_type);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR,
+ -ret, AFR_MSG_DICT_SET_FAILED,
+ "Could not add statistics_crawl_type to output");
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "statistics_heal_failed_cnt-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ ret = dict_set_uint64 (output, key, heal_failed_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR,
+ -ret, AFR_MSG_DICT_SET_FAILED,
+ "Could not add statistics_healed_failed_count to output");
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "statistics_strt_time-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ ret = dict_set_dynstr (output, key, start_time_str);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR,
+ -ret, AFR_MSG_DICT_SET_FAILED,
+ "Could not add statistics_crawl_start_time to output");
+ goto out;
+ } else {
+ start_time_str = NULL;
+ }
+
+ if (!end_time_str)
+ progress = 1;
+ else
+ progress = 0;
+
+ snprintf (key, sizeof (key), "statistics_end_time-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ if (!end_time_str)
+ end_time_str = gf_strdup ("Could not determine the end time");
+ ret = dict_set_dynstr (output, key, end_time_str);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR,
+ -ret, AFR_MSG_DICT_SET_FAILED,
+ "Could not add statistics_crawl_end_time to output");
+ goto out;
+ } else {
+ end_time_str = NULL;
+ }
+
+ snprintf (key, sizeof (key), "statistics_inprogress-%d-%d-%"PRIu64,
+ xl_id, child, count);
+
+ ret = dict_set_int32 (output, key, progress);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR,
+ -ret, AFR_MSG_DICT_SET_FAILED,
+ "Could not add statistics_inprogress to output");
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "statistics-%d-%d-count", xl_id, child);
+ ret = dict_set_uint64 (output, key, count + 1);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR,
+ -ret, AFR_MSG_DICT_SET_FAILED,
+ "Could not increment the counter.");
+ goto out;
+ }
+out:
+ GF_FREE (start_time_str);
+ GF_FREE (end_time_str);
+ return ret;
+}
+
+
+int
+afr_shd_dict_add_path (xlator_t *this, dict_t *output, int child, char *path,
+ struct timeval *tv)
+{
+ int ret = -1;
+ uint64_t count = 0;
+ char key[256] = {0};
+ int xl_id = 0;
+
+ ret = dict_get_int32 (output, this->name, &xl_id);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_DICT_GET_FAILED, "xl does not have id");
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "%d-%d-count", xl_id, child);
+ ret = dict_get_uint64 (output, key, &count);
+
+ snprintf (key, sizeof (key), "%d-%d-%"PRIu64, xl_id, child, count);
+ ret = dict_set_dynstr (output, key, path);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_DICT_SET_FAILED, "%s: Could not add to output",
+ path);
+ goto out;
+ }
+
+ if (tv) {
+ snprintf (key, sizeof (key), "%d-%d-%"PRIu64"-time", xl_id,
+ child, count);
+ ret = dict_set_uint32 (output, key, tv->tv_sec);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR,
+ -ret, AFR_MSG_DICT_SET_FAILED,
+ "%s: Could not set time",
+ path);
+ goto out;
+ }
+ }
+
+ snprintf (key, sizeof (key), "%d-%d-count", xl_id, child);
+
+ ret = dict_set_uint64 (output, key, count + 1);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR,
+ -ret, AFR_MSG_DICT_SET_FAILED,
+ "Could not increment count");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+afr_add_shd_event (circular_buffer_t *cb, void *data)
+{
+ dict_t *output = NULL;
+ xlator_t *this = THIS;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ shd_event_t *shd_event = NULL;
+ char *path = NULL;
+
+ output = data;
+ priv = this->private;
+ shd = &priv->shd;
+ shd_event = cb->data;
+
+ if (!shd->index_healers[shd_event->child].local)
+ return 0;
+
+ path = gf_strdup (shd_event->path);
+ if (!path)
+ return -ENOMEM;
+
+ afr_shd_dict_add_path (this, output, shd_event->child, path,
+ &cb->tv);
+ return 0;
+}
+
+int
+afr_add_crawl_event (circular_buffer_t *cb, void *data)
+{
+ dict_t *output = NULL;
+ xlator_t *this = THIS;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ crawl_event_t *crawl_event = NULL;
+
+ output = data;
+ priv = this->private;
+ shd = &priv->shd;
+ crawl_event = cb->data;
+
+ if (!shd->index_healers[crawl_event->child].local)
+ return 0;
+
+ afr_shd_dict_add_crawl_event (this, output, crawl_event);
+
+ return 0;
+}
+
+
+int
+afr_selfheal_daemon_init (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int ret = -1;
+ int i = 0;
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ this->itable = inode_table_new (SHD_INODE_LRU_LIMIT, this);
+ if (!this->itable)
+ goto out;
+
+ shd->index_healers = GF_CALLOC (sizeof(*shd->index_healers),
+ priv->child_count,
+ gf_afr_mt_subvol_healer_t);
+ if (!shd->index_healers)
+ goto out;
+
+ for (i = 0; i < priv->child_count; i++) {
+ shd->index_healers[i].subvol = i;
+ ret = afr_shd_healer_init (this, &shd->index_healers[i]);
+ if (ret)
+ goto out;
+ }
+
+ shd->full_healers = GF_CALLOC (sizeof(*shd->full_healers),
+ priv->child_count,
+ gf_afr_mt_subvol_healer_t);
+ if (!shd->full_healers)
+ goto out;
+ for (i = 0; i < priv->child_count; i++) {
+ shd->full_healers[i].subvol = i;
+ ret = afr_shd_healer_init (this, &shd->full_healers[i]);
+ if (ret)
+ goto out;
+ }
+
+ shd->split_brain = eh_new (AFR_EH_SPLIT_BRAIN_LIMIT, _gf_false,
+ afr_destroy_shd_event_data);
+ if (!shd->split_brain)
+ goto out;
+
+ shd->statistics = GF_CALLOC (sizeof(eh_t *), priv->child_count,
+ gf_common_mt_eh_t);
+ if (!shd->statistics)
+ goto out;
+
+ for (i = 0; i < priv->child_count ; i++) {
+ shd->statistics[i] = eh_new (AFR_STATISTICS_HISTORY_SIZE,
+ _gf_false,
+ afr_destroy_crawl_event_data);
+ if (!shd->statistics[i])
+ goto out;
+ shd->full_healers[i].crawl_event.child = i;
+ shd->full_healers[i].crawl_event.crawl_type = "FULL";
+ shd->index_healers[i].crawl_event.child = i;
+ shd->index_healers[i].crawl_event.crawl_type = "INDEX";
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int
+afr_selfheal_childup (xlator_t *this, int subvol)
+{
+ afr_shd_index_healer_spawn (this, subvol);
+
+ return 0;
+}
+
+
+int
+afr_shd_get_index_count (xlator_t *this, int i, uint64_t *count)
+{
+ afr_private_t *priv = NULL;
+ xlator_t *subvol = NULL;
+ loc_t rootloc = {0, };
+ dict_t *xattr = NULL;
+ int ret = -1;
+
+ priv = this->private;
+ subvol = priv->children[i];
+
+ rootloc.inode = inode_ref (this->itable->root);
+ gf_uuid_copy (rootloc.gfid, rootloc.inode->gfid);
+
+ ret = syncop_getxattr (subvol, &rootloc, &xattr,
+ GF_XATTROP_INDEX_COUNT, NULL, NULL);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_uint64 (xattr, GF_XATTROP_INDEX_COUNT, count);
+ if (ret)
+ goto out;
+
+ ret = 0;
+
+out:
+ if (xattr)
+ dict_unref (xattr);
+ loc_wipe (&rootloc);
+
+ return ret;
+}
+
+
+int
+afr_xl_op (xlator_t *this, dict_t *input, dict_t *output)
+{
+ gf_xl_afr_op_t op = GF_SHD_OP_INVALID;
+ int ret = 0;
+ int xl_id = 0;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ struct subvol_healer *healer = NULL;
+ int i = 0;
+ char key[64];
+ int op_ret = 0;
+ uint64_t cnt = 0;
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ ret = dict_get_int32 (input, "xl-op", (int32_t*)&op);
+ if (ret)
+ goto out;
+ ret = dict_get_int32 (input, this->name, &xl_id);
+ if (ret)
+ goto out;
+ ret = dict_set_int32 (output, this->name, xl_id);
+ if (ret)
+ goto out;
+ switch (op) {
+ case GF_SHD_OP_HEAL_INDEX:
+ op_ret = 0;
+
+ for (i = 0; i < priv->child_count; i++) {
+ healer = &shd->index_healers[i];
+ snprintf (key, sizeof (key), "%d-%d-status", xl_id, i);
+
+ if (!priv->child_up[i]) {
+ ret = dict_set_str (output, key,
+ "Brick is not connected");
+ op_ret = -1;
+ } else if (AFR_COUNT (priv->child_up,
+ priv->child_count) < 2) {
+ ret = dict_set_str (output, key,
+ "< 2 bricks in replica are up");
+ op_ret = -1;
+ } else if (!afr_shd_is_subvol_local (this, healer->subvol)) {
+ ret = dict_set_str (output, key,
+ "Brick is remote");
+ } else {
+ ret = dict_set_str (output, key,
+ "Started self-heal");
+ afr_shd_index_healer_spawn (this, i);
+ }
+ }
+ break;
+ case GF_SHD_OP_HEAL_FULL:
+ op_ret = -1;
+
+ for (i = 0; i < priv->child_count; i++) {
+ healer = &shd->full_healers[i];
+ snprintf (key, sizeof (key), "%d-%d-status", xl_id, i);
+
+ if (!priv->child_up[i]) {
+ ret = dict_set_str (output, key,
+ "Brick is not connected");
+ } else if (AFR_COUNT (priv->child_up,
+ priv->child_count) < 2) {
+ ret = dict_set_str (output, key,
+ "< 2 bricks in replica are up");
+ } else if (!afr_shd_is_subvol_local (this, healer->subvol)) {
+ ret = dict_set_str (output, key,
+ "Brick is remote");
+ } else {
+ ret = dict_set_str (output, key,
+ "Started self-heal");
+ afr_shd_full_healer_spawn (this, i);
+ op_ret = 0;
+ }
+ }
+ break;
+ case GF_SHD_OP_INDEX_SUMMARY:
+ /* this case has been handled in glfs-heal.c */
+ break;
+ case GF_SHD_OP_HEALED_FILES:
+ case GF_SHD_OP_HEAL_FAILED_FILES:
+ for (i = 0; i < priv->child_count; i++) {
+ snprintf (key, sizeof (key), "%d-%d-status", xl_id, i);
+ ret = dict_set_str (output, key, "Operation Not "
+ "Supported");
+ }
+ break;
+ case GF_SHD_OP_SPLIT_BRAIN_FILES:
+ eh_dump (shd->split_brain, output, afr_add_shd_event);
+ break;
+ case GF_SHD_OP_STATISTICS:
+ for (i = 0; i < priv->child_count; i++) {
+ eh_dump (shd->statistics[i], output,
+ afr_add_crawl_event);
+ afr_shd_dict_add_crawl_event (this, output,
+ &shd->index_healers[i].crawl_event);
+ afr_shd_dict_add_crawl_event (this, output,
+ &shd->full_healers[i].crawl_event);
+ }
+ break;
+ case GF_SHD_OP_STATISTICS_HEAL_COUNT:
+ case GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA:
+ op_ret = -1;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!priv->child_up[i]) {
+ snprintf (key, sizeof (key), "%d-%d-status",
+ xl_id, i);
+ ret = dict_set_str (output, key,
+ "Brick is not connected");
+ } else {
+ snprintf (key, sizeof (key), "%d-%d-hardlinks",
+ xl_id, i);
+ ret = afr_shd_get_index_count (this, i, &cnt);
+ if (ret == 0) {
+ ret = dict_set_uint64 (output, key, cnt);
+ }
+ op_ret = 0;
+ }
+ }
+
+// ret = _do_crawl_op_on_local_subvols (this, INDEX_TO_BE_HEALED,
+// STATISTICS_TO_BE_HEALED,
+// output);
+ break;
+
+ default:
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_INVALID_ARG, "Unknown set op %d", op);
+ break;
+ }
+out:
+ dict_del (output, this->name);
+ return op_ret;
+}
diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h
new file mode 100644
index 00000000000..c6ac5ebfd1b
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-self-heald.h
@@ -0,0 +1,80 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _AFR_SELF_HEALD_H
+#define _AFR_SELF_HEALD_H
+
+#include <pthread.h>
+
+
+typedef struct {
+ int child;
+ char *path;
+} shd_event_t;
+
+typedef struct {
+ int child;
+ uint64_t healed_count;
+ uint64_t split_brain_count;
+ uint64_t heal_failed_count;
+
+ /* If start_time is 0, it means crawler is not in progress
+ and stats are not valid */
+ time_t start_time;
+ /* If start_time is NOT 0 and end_time is 0, it means
+ cralwer is in progress */
+ time_t end_time;
+ char *crawl_type;
+} crawl_event_t;
+
+struct subvol_healer {
+ xlator_t *this;
+ int subvol;
+ gf_boolean_t local;
+ gf_boolean_t running;
+ gf_boolean_t rerun;
+ crawl_event_t crawl_event;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+ pthread_t thread;
+};
+
+typedef struct {
+ gf_boolean_t iamshd;
+ gf_boolean_t enabled;
+ int timeout;
+ struct subvol_healer *index_healers;
+ struct subvol_healer *full_healers;
+
+ eh_t *split_brain;
+ eh_t **statistics;
+ uint32_t max_threads;
+ uint32_t wait_qlength;
+} afr_self_heald_t;
+
+
+int
+afr_selfheal_childup (xlator_t *this, int subvol);
+
+int
+afr_selfheal_daemon_init (xlator_t *this);
+
+int
+afr_xl_op (xlator_t *this, dict_t *input, dict_t *output);
+
+int
+afr_shd_gfid_to_path (xlator_t *this, xlator_t *subvol, uuid_t gfid,
+ char **path_p);
+
+int
+afr_shd_index_purge (xlator_t *subvol, inode_t *inode, char *name,
+ ia_type_t type);
+#endif /* !_AFR_SELF_HEALD_H */
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 2d689e389bc..df4662258bb 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -1,1408 +1,2059 @@
/*
- Copyright (c) 2007-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include "dict.h"
#include "byte-order.h"
#include "common-utils.h"
+#include "timer.h"
#include "afr.h"
#include "afr-transaction.h"
+#include "afr-self-heal.h"
+#include "afr-messages.h"
#include <signal.h>
+typedef enum {
+ AFR_TRANSACTION_PRE_OP,
+ AFR_TRANSACTION_POST_OP,
+} afr_xattrop_type_t;
-#define LOCKED_NO 0x0 /* no lock held */
-#define LOCKED_YES 0x1 /* for DATA, METADATA, ENTRY and higher_path
- of RENAME */
-#define LOCKED_LOWER 0x2 /* for lower_path of RENAME */
+gf_boolean_t
+afr_changelog_pre_op_uninherit (call_frame_t *frame, xlator_t *this);
+gf_boolean_t
+afr_changelog_pre_op_update (call_frame_t *frame, xlator_t *this);
-static void
-afr_pid_save (call_frame_t *frame)
+int
+afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
+ afr_changelog_resume_t changelog_resume,
+ afr_xattrop_type_t op);
+
+void
+afr_zero_fill_stat (afr_local_t *local)
{
- afr_local_t * local = NULL;
+ if (!local)
+ return;
+ if (local->transaction.type == AFR_DATA_TRANSACTION ||
+ local->transaction.type == AFR_METADATA_TRANSACTION) {
+ gf_zero_fill_stat (&local->cont.inode_wfop.prebuf);
+ gf_zero_fill_stat (&local->cont.inode_wfop.postbuf);
+ } else if (local->transaction.type == AFR_ENTRY_TRANSACTION ||
+ local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
+ gf_zero_fill_stat (&local->cont.dir_fop.buf);
+ gf_zero_fill_stat (&local->cont.dir_fop.preparent);
+ gf_zero_fill_stat (&local->cont.dir_fop.postparent);
+ if (local->transaction.type == AFR_ENTRY_TRANSACTION)
+ return;
+ gf_zero_fill_stat (&local->cont.dir_fop.prenewparent);
+ gf_zero_fill_stat (&local->cont.dir_fop.postnewparent);
+ }
+}
- local = frame->local;
+/* In case of errors afr needs to choose which xdata from lower xlators it needs
+ * to unwind with. The way it is done is by checking if there are
+ * any good subvols which failed. Give preference to errnos other than
+ * ENOTCONN even if the child is source */
+void
+afr_pick_error_xdata (afr_local_t *local, afr_private_t *priv,
+ inode_t *inode1, unsigned char *readable1,
+ inode_t *inode2, unsigned char *readable2)
+{
+ int s = -1;/*selection*/
+ int i = 0;
+ unsigned char *readable = NULL;
+
+ if (local->xdata_rsp) {
+ dict_unref (local->xdata_rsp);
+ local->xdata_rsp = NULL;
+ }
+
+ readable = alloca0 (priv->child_count * sizeof (*readable));
+ if (inode2 && readable2) {/*rename fop*/
+ AFR_INTERSECT (readable, readable1, readable2,
+ priv->child_count);
+ } else {
+ memcpy (readable, readable1,
+ sizeof (*readable) * priv->child_count);
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+
+ if (local->replies[i].op_ret >= 0)
+ continue;
+
+ if (local->replies[i].op_errno == ENOTCONN)
+ continue;
+
+ /*Order is important in the following condition*/
+ if ((s < 0) || (!readable[s] && readable[i]))
+ s = i;
+ }
+
+ if (s != -1 && local->replies[s].xdata) {
+ local->xdata_rsp = dict_ref (local->replies[s].xdata);
+ } else if (s == -1) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+
+ if (local->replies[i].op_ret >= 0)
+ continue;
- local->saved_pid = frame->root->pid;
+ if (!local->replies[i].xdata)
+ continue;
+ local->xdata_rsp = dict_ref (local->replies[i].xdata);
+ break;
+ }
+ }
}
+gf_boolean_t
+afr_needs_changelog_update (afr_local_t *local)
+{
+ if (local->transaction.type == AFR_DATA_TRANSACTION)
+ return _gf_true;
+ if (!local->optimistic_change_log)
+ return _gf_true;
+ return _gf_false;
+}
-static void
-afr_pid_restore (call_frame_t *frame)
+static int32_t
+afr_quorum_errno (afr_private_t *priv)
{
- afr_local_t * local = NULL;
+ if (priv->quorum_reads)
+ return ENOTCONN;
+ return EROFS;
+}
+
+int
+__afr_txn_write_fop (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ unsigned char *failed_subvols = NULL;
+ int i = 0;
local = frame->local;
+ priv = this->private;
- frame->root->pid = local->saved_pid;
-}
+ failed_subvols = local->transaction.failed_subvols;
+ call_count = priv->child_count - AFR_COUNT (failed_subvols,
+ priv->child_count);
-static void
-__mark_all_pending (int32_t *pending[], int child_count,
- afr_transaction_type type)
-{
- int i;
- int j;
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
- for (i = 0; i < child_count; i++) {
- j = afr_index_for_transaction_type (type);
- pending[i][j] = hton32 (1);
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i] && !failed_subvols[i]) {
+ local->transaction.wind (frame, this, i);
+
+ if (!--call_count)
+ break;
+ }
}
+
+ return 0;
}
-static void
-__mark_child_dead (int32_t *pending[], int child_count, int child,
- afr_transaction_type type)
+int
+__afr_txn_write_done (call_frame_t *frame, xlator_t *this)
{
- int j;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ gf_boolean_t unwind = _gf_false;
+
+ priv = this->private;
+ local = frame->local;
- j = afr_index_for_transaction_type (type);
-
- pending[child][j] = 0;
+ if (priv->consistent_metadata) {
+ LOCK (&frame->lock);
+ {
+ unwind = (local->transaction.main_frame != NULL);
+ }
+ UNLOCK (&frame->lock);
+ if (unwind)/*It definitely did post-op*/
+ afr_zero_fill_stat (local);
+ }
+ local->transaction.unwind (frame, this);
+
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
}
-static void
-__mark_fop_failed_on_fd (fd_t *fd, xlator_t *this,
- int child_index)
+call_frame_t*
+afr_transaction_detach_fop_frame (call_frame_t *frame)
{
- uint64_t ctx;
- afr_fd_ctx_t * fd_ctx = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *fop_frame = NULL;
- int ret = 0;
+ local = frame->local;
- ret = fd_ctx_get (fd, this, &ctx);
+ LOCK (&frame->lock);
+ {
+ fop_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
- if (ret < 0)
- goto out;
+ return fop_frame;
+}
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
- fd_ctx->child_failed[child_index] = 1;
-out:
- return;
+static void
+afr_save_lk_owner (call_frame_t *frame)
+{
+ afr_local_t * local = NULL;
+
+ local = frame->local;
+
+ local->saved_lk_owner = frame->root->lk_owner;
}
static void
-__mark_failed_children (int32_t *pending[], int child_count,
- xlator_t *this, fd_t *fd, afr_transaction_type type)
+afr_restore_lk_owner (call_frame_t *frame)
{
- uint64_t ctx;
- afr_fd_ctx_t * fd_ctx = NULL;
+ afr_local_t * local = NULL;
- int ret = 0;
- int i = 0;
- int j = 0;
+ local = frame->local;
- ret = fd_ctx_get (fd, this, &ctx);
+ frame->root->lk_owner = local->saved_lk_owner;
+}
- if (ret < 0)
- goto out;
+void
+__mark_all_success (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i;
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ local = frame->local;
+ priv = this->private;
- for (i = 0; i < child_count; i++) {
- j = afr_index_for_transaction_type (type);
+ for (i = 0; i < priv->child_count; i++) {
+ local->transaction.failed_subvols[i] = 0;
+ }
+}
+
+void
+afr_compute_pre_op_sources (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_transaction_type type = -1;
+ dict_t *xdata = NULL;
+ int **matrix = NULL;
+ int idx = -1;
+ int i = 0;
+ int j = 0;
- if (fd_ctx->child_failed[i])
- pending[i][j] = 0;
+ priv = this->private;
+ local = frame->local;
+ type = local->transaction.type;
+ idx = afr_index_for_transaction_type (type);
+ matrix = ALLOC_MATRIX (priv->child_count, int);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.pre_op_xdata[i])
+ continue;
+ xdata = local->transaction.pre_op_xdata[i];
+ afr_selfheal_fill_matrix (this, matrix, i, idx, xdata);
}
-
-out:
- return;
-}
+ memset (local->transaction.pre_op_sources, 1, priv->child_count);
-static void
-__mark_pre_op_done_on_fd (call_frame_t *frame, xlator_t *this, int child_index)
+ /*If lock or pre-op failed on a brick, it is not a source. */
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.failed_subvols[i])
+ local->transaction.pre_op_sources[i] = 0;
+ }
+
+ /* If brick is blamed by others, it is not a source. */
+ for (i = 0; i < priv->child_count; i++)
+ for (j = 0; j < priv->child_count; j++)
+ if (matrix[i][j] != 0)
+ local->transaction.pre_op_sources[j] = 0;
+
+ /*We don't need the xattrs any more. */
+ for (i = 0; i < priv->child_count; i++)
+ if (local->transaction.pre_op_xdata[i]) {
+ dict_unref (local->transaction.pre_op_xdata[i]);
+ local->transaction.pre_op_xdata[i] = NULL;
+ }
+}
+
+void
+afr_txn_arbitrate_fop_cbk (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
-
- uint64_t ctx;
- afr_fd_ctx_t * fd_ctx = NULL;
- int ret = 0;
+ afr_private_t *priv = NULL;
+ gf_boolean_t fop_failed = _gf_false;
+ unsigned char *pre_op_sources = NULL;
+ int i = 0;
local = frame->local;
+ priv = this->private;
+ pre_op_sources = local->transaction.pre_op_sources;
- ret = fd_ctx_get (local->fd, this, &ctx);
+ if (priv->arbiter_count != 1 || local->op_ret < 0)
+ return;
- if (ret < 0)
- goto out;
+ /* If the fop failed on the brick, it is not a source. */
+ for (i = 0; i < priv->child_count; i++)
+ if (local->transaction.failed_subvols[i])
+ pre_op_sources[i] = 0;
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ switch (AFR_COUNT (pre_op_sources, priv->child_count)) {
+ case 1:
+ if (pre_op_sources[ARBITER_BRICK_INDEX])
+ fop_failed = _gf_true;
+ break;
+ case 0:
+ fop_failed = _gf_true;
+ break;
+ }
- if ((local->op == GF_FOP_WRITE)
- || (local->op == GF_FOP_FTRUNCATE)) {
- fd_ctx->pre_op_done[child_index] = 1;
+ if (fop_failed) {
+ local->op_ret = -1;
+ local->op_errno = ENOTCONN;
}
-out:
return;
}
-
-static void
-__mark_down_children (int32_t *pending[], int child_count,
- unsigned char *child_up, afr_transaction_type type)
+void
+afr_txn_arbitrate_fop (call_frame_t *frame, xlator_t *this)
{
- int i;
- int j;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int pre_op_sources_count = 0;
- for (i = 0; i < child_count; i++) {
- j = afr_index_for_transaction_type (type);
+ priv = this->private;
+ local = frame->local;
- if (!child_up[i])
- pending[i][j] = 0;
+ afr_compute_pre_op_sources (frame, this);
+ pre_op_sources_count = AFR_COUNT (local->transaction.pre_op_sources,
+ priv->child_count);
+
+ /* If arbiter is the only source, do not proceed. */
+ if (pre_op_sources_count < 2 &&
+ local->transaction.pre_op_sources[ARBITER_BRICK_INDEX]) {
+ local->internal_lock.lock_cbk = local->transaction.done;
+ local->op_ret = -1;
+ local->op_errno = ENOTCONN;
+ afr_restore_lk_owner (frame);
+ afr_unlock (frame, this);
+ } else {
+ local->transaction.fop (frame, this);
}
-}
+ return;
+}
-static void
-__mark_all_success (int32_t *pending[], int child_count,
- afr_transaction_type type)
+int
+afr_transaction_perform_fop (call_frame_t *frame, xlator_t *this)
{
- int i;
- int j;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ fd_t *fd = NULL;
- for (i = 0; i < child_count; i++) {
- j = afr_index_for_transaction_type (type);
- pending[i][j] = hton32 (-1);
+ local = frame->local;
+ priv = this->private;
+ fd = local->fd;
+
+ /* Perform fops with the lk-owner from top xlator.
+ * Eg: lk-owner of posix-lk and flush should be same,
+ * flush cant clear the posix-lks without that lk-owner.
+ */
+ afr_save_lk_owner (frame);
+ frame->root->lk_owner =
+ local->transaction.main_frame->root->lk_owner;
+
+ if (local->pre_op_compat)
+ /* old mode, pre-op was done as afr_changelog_do()
+ just now, before OP */
+ afr_changelog_pre_op_update (frame, this);
+
+ /* The wake up needs to happen independent of
+ what type of fop arrives here. If it was
+ a write, then it has already inherited the
+ lock and changelog. If it was not a write,
+ then the presumption of the optimization (of
+ optimizing for successive write operations)
+ fails.
+ */
+ if (fd)
+ afr_delayed_changelog_wake_up (this, fd);
+ if (priv->arbiter_count == 1) {
+ afr_txn_arbitrate_fop (frame, this);
+ } else {
+ local->transaction.fop (frame, this);
}
-}
+ return 0;
+}
static int
-__is_first_write_on_fd (xlator_t *this, fd_t *fd)
+__changelog_enabled (afr_private_t *priv, afr_transaction_type type)
{
- int op_ret = 0;
- int _ret = -1;
- int i = 0;
-
- uint64_t ctx;
- afr_fd_ctx_t * fd_ctx = NULL;
+ int ret = 0;
- afr_private_t *priv = NULL;
+ switch (type) {
+ case AFR_DATA_TRANSACTION:
+ if (priv->data_change_log)
+ ret = 1;
- priv = this->private;
+ break;
- LOCK (&fd->lock);
- {
- _ret = __fd_ctx_get (fd, this, &ctx);
-
- if (_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not get fd ctx on fd=%p",
- fd);
- goto out;
- }
+ case AFR_METADATA_TRANSACTION:
+ if (priv->metadata_change_log)
+ ret = 1;
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ break;
- op_ret = 1;
- for (i = 0; i < priv->child_count; i++) {
- if (fd_ctx->pre_op_done[i] == 0)
- continue;
+ case AFR_ENTRY_TRANSACTION:
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ if (priv->entry_change_log)
+ ret = 1;
- op_ret = 0;
- }
+ break;
}
-out:
- UNLOCK (&fd->lock);
- return op_ret;
+ return ret;
}
static int
-__if_fd_pre_op_done (xlator_t *this, fd_t *fd, int child_index)
+__fop_changelog_needed (call_frame_t *frame, xlator_t *this)
{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
int op_ret = 0;
- int _ret = -1;
+ afr_transaction_type type = -1;
- uint64_t ctx;
- afr_fd_ctx_t * fd_ctx = NULL;
+ priv = this->private;
+ local = frame->local;
+ type = local->transaction.type;
- LOCK (&fd->lock);
- {
- _ret = __fd_ctx_get (fd, this, &ctx);
+ if (__changelog_enabled (priv, type)) {
+ switch (local->op) {
- if (_ret < 0) {
- goto out;
- }
+ case GF_FOP_WRITE:
+ case GF_FOP_FTRUNCATE:
+ op_ret = 1;
+ break;
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ case GF_FOP_FLUSH:
+ op_ret = 0;
+ break;
- if (fd_ctx->pre_op_done[child_index]) {
+ default:
op_ret = 1;
}
- fd_ctx->pre_op_done[child_index] = 0;
}
-out:
- UNLOCK (&fd->lock);
return op_ret;
}
-static int
-afr_pre_op_done_count (xlator_t *this, fd_t *fd, unsigned char *child_up)
+int
+afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int **pending)
{
int i = 0;
- int count = 0;
+ int ret = 0;
- int _ret = 0;
- uint64_t ctx;
- afr_fd_ctx_t * fd_ctx = NULL;
+ for (i = 0; i < priv->child_count; i++) {
- afr_private_t *priv = NULL;
+ ret = dict_set_static_bin (xattr, priv->pending_key[i],
+ pending[i],
+ AFR_NUM_CHANGE_LOGS * sizeof (int));
+ /* 3 = data+metadata+entry */
- priv = this->private;
+ if (ret)
+ break;
+ }
- LOCK (&fd->lock);
- {
- _ret = __fd_ctx_get (fd, this, &ctx);
+ return ret;
+}
- if (_ret < 0) {
- goto out;
- }
+int
+afr_lock_server_count (afr_private_t *priv, afr_transaction_type type)
+{
+ int ret = 0;
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ switch (type) {
+ case AFR_DATA_TRANSACTION:
+ ret = priv->child_count;
+ break;
- for (i = 0; i < priv->child_count; i++) {
- if (fd_ctx->pre_op_done[i] && child_up[i]) {
- count++;
- }
- }
+ case AFR_METADATA_TRANSACTION:
+ ret = priv->child_count;
+ break;
+
+ case AFR_ENTRY_TRANSACTION:
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ ret = priv->child_count;
+ break;
}
-out:
- UNLOCK (&fd->lock);
- return count;
+ return ret;
+}
+
+/* {{{ pending */
+
+
+int
+afr_changelog_post_op_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+
+ local = frame->local;
+ priv = this->private;
+ int_lock = &local->internal_lock;
+
+ if (local->transaction.resume_stub) {
+ call_resume (local->transaction.resume_stub);
+ local->transaction.resume_stub = NULL;
+ }
+
+ if (afr_lock_server_count (priv, local->transaction.type) == 0) {
+ local->transaction.done (frame, this);
+ } else {
+ int_lock->lock_cbk = local->transaction.done;
+ afr_unlock (frame, this);
+ }
+
+ return 0;
}
-static int
-__changelog_enabled (afr_private_t *priv, afr_transaction_type type)
+afr_inodelk_t*
+afr_get_inodelk (afr_internal_lock_t *int_lock, char *dom)
{
- int ret = 0;
+ afr_inodelk_t *inodelk = NULL;
+ int i = 0;
+
+ for (i = 0; int_lock->inodelk[i].domain; i++) {
+ inodelk = &int_lock->inodelk[i];
+ if (strcmp (dom, inodelk->domain) == 0)
+ return inodelk;
+ }
+ return NULL;
+}
- switch (type) {
- case AFR_DATA_TRANSACTION:
- if (priv->data_change_log)
- ret = 1;
-
- break;
+unsigned char*
+afr_locked_nodes_get (afr_transaction_type type, afr_internal_lock_t *int_lock)
+{
+ unsigned char *locked_nodes = NULL;
+ afr_inodelk_t *inodelk = NULL;
+ switch (type) {
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ locked_nodes = inodelk->locked_nodes;
+ break;
+
+ case AFR_ENTRY_TRANSACTION:
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ /*Because same set of subvols participate in all lockee
+ * entities*/
+ locked_nodes = int_lock->lockee[0].locked_nodes;
+ break;
+ }
+ return locked_nodes;
+}
- case AFR_METADATA_TRANSACTION:
- if (priv->metadata_change_log)
- ret = 1;
- break;
+int
+afr_changelog_call_count (afr_transaction_type type,
+ unsigned char *pre_op_subvols,
+ unsigned int child_count)
+{
+ int call_count = 0;
- case AFR_ENTRY_TRANSACTION:
- case AFR_ENTRY_RENAME_TRANSACTION:
- if (priv->entry_change_log)
- ret = 1;
+ call_count = AFR_COUNT(pre_op_subvols, child_count);
- break;
-
- case AFR_FLUSH_TRANSACTION:
- ret = 1;
- }
+ if (type == AFR_ENTRY_RENAME_TRANSACTION)
+ call_count *= 2;
- return ret;
+ return call_count;
}
-static int
-__changelog_needed_pre_op (call_frame_t *frame, xlator_t *this)
+gf_boolean_t
+afr_txn_nothing_failed (call_frame_t *frame, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- fd_t * fd = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
- int op_ret = 0;
+ local = frame->local;
+ priv = this->private;
- priv = this->private;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i] &&
+ local->transaction.failed_subvols[i])
+ return _gf_false;
+ }
+
+ return _gf_true;
+}
+
+
+void
+afr_handle_symmetric_errors (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int op_errno = 0;
+ int i_errno = 0;
+ gf_boolean_t matching_errors = _gf_true;
+ int i = 0;
+
+ priv = this->private;
local = frame->local;
-
- if (__changelog_enabled (priv, local->transaction.type)) {
- switch (local->op) {
-
- case GF_FOP_WRITE:
- case GF_FOP_FTRUNCATE:
- /*
- if it's a data transaction, we write the changelog
- only on the first write on an fd
- */
-
- fd = local->fd;
- if (!fd || __is_first_write_on_fd (this, fd))
- op_ret = 1;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret != -1) {
+ /* Operation succeeded on at least on subvol,
+ so it is not a failed-everywhere situation.
+ */
+ matching_errors = _gf_false;
break;
+ }
+ i_errno = local->replies[i].op_errno;
- case GF_FOP_FLUSH:
- /* only do post-op on flush() */
-
- op_ret = 0;
+ if (i_errno == ENOTCONN) {
+ /* ENOTCONN is not a symmetric error. We do not
+ know if the operation was performed on the
+ backend or not.
+ */
+ matching_errors = _gf_false;
break;
+ }
- default:
- op_ret = 1;
+ if (!op_errno) {
+ op_errno = i_errno;
+ } else if (op_errno != i_errno) {
+ /* Mismatching op_errno's */
+ matching_errors = _gf_false;
+ break;
}
}
- return op_ret;
+ if (matching_errors)
+ __mark_all_success (frame, this);
}
-
-static int
-__changelog_needed_post_op (call_frame_t *frame, xlator_t *this)
+gf_boolean_t
+afr_has_quorum (unsigned char *subvols, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
+ unsigned int quorum_count = 0;
+ afr_private_t *priv = NULL;
+ unsigned int up_children_count = 0;
- int op_ret = 0;
- afr_transaction_type type = -1;
+ priv = this->private;
+ up_children_count = AFR_COUNT (subvols, priv->child_count);
- priv = this->private;
- local = frame->local;
- type = local->transaction.type;
+ if (priv->quorum_count == AFR_QUORUM_AUTO) {
+ /*
+ * Special case for even numbers of nodes in auto-quorum:
+ * if we have exactly half children up
+ * and that includes the first ("senior-most") node, then that counts
+ * as quorum even if it wouldn't otherwise. This supports e.g. N=2
+ * while preserving the critical property that there can only be one
+ * such group.
+ */
+ if ((priv->child_count % 2 == 0) &&
+ (up_children_count == (priv->child_count/2)))
+ return subvols[0];
+ }
- if (__changelog_enabled (priv, type)) {
- switch (local->op) {
+ if (priv->quorum_count == AFR_QUORUM_AUTO) {
+ quorum_count = priv->child_count/2 + 1;
+ } else {
+ quorum_count = priv->quorum_count;
+ }
- case GF_FOP_WRITE:
- case GF_FOP_FTRUNCATE:
- op_ret = 0;
- break;
+ if (up_children_count >= quorum_count)
+ return _gf_true;
- case GF_FOP_FLUSH:
- op_ret = 1;
- break;
+ return _gf_false;
+}
- default:
- op_ret = 1;
- }
- }
+static gf_boolean_t
+afr_has_fop_quorum (call_frame_t *frame)
+{
+ xlator_t *this = frame->this;
+ afr_local_t *local = frame->local;
+ unsigned char *locked_nodes = NULL;
- return op_ret;
+ locked_nodes = afr_locked_nodes_get (local->transaction.type,
+ &local->internal_lock);
+ return afr_has_quorum (locked_nodes, this);
}
-
-static int
-afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending)
+static gf_boolean_t
+afr_has_fop_cbk_quorum (call_frame_t *frame)
{
- int i;
- int ret = 0;
+ afr_local_t *local = frame->local;
+ xlator_t *this = frame->this;
+ afr_private_t *priv = this->private;
+ unsigned char *success = alloca0(priv->child_count);
+ int i = 0;
for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_static_bin (xattr, priv->pending_key[i],
- pending[i], 3 * sizeof (int32_t));
- /* 3 = data+metadata+entry */
-
- if (ret < 0)
- goto out;
+ if (local->transaction.pre_op[i])
+ if (!local->transaction.failed_subvols[i])
+ success[i] = 1;
}
-out:
- return ret;
+ return afr_has_quorum (success, this);
}
-
-int
-afr_lock_server_count (afr_private_t *priv, afr_transaction_type type)
+void
+afr_handle_quorum (call_frame_t *frame)
{
- int ret = 0;
-
- switch (type) {
- case AFR_FLUSH_TRANSACTION:
- case AFR_DATA_TRANSACTION:
- ret = priv->data_lock_server_count;
- break;
-
- case AFR_METADATA_TRANSACTION:
- ret = priv->metadata_lock_server_count;
- break;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ const char *file = NULL;
+ uuid_t gfid = {0};
- case AFR_ENTRY_TRANSACTION:
- case AFR_ENTRY_RENAME_TRANSACTION:
- ret = priv->entry_lock_server_count;
- break;
- }
+ local = frame->local;
+ priv = frame->this->private;
- return ret;
-}
+ if (priv->quorum_count == 0)
+ return;
+ /* If the fop already failed return right away to preserve errno */
+ if (local->op_ret == -1)
+ return;
-/* {{{ unlock */
+ /*
+ * Network split may happen just after the fops are unwound, so check
+ * if the fop succeeded in a way it still follows quorum. If it doesn't,
+ * mark the fop as failure, mark the changelogs so it reflects that
+ * failure.
+ *
+ * Scenario:
+ * There are 3 mounts on 3 machines(node1, node2, node3) all writing to
+ * single file. Network split happened in a way that node1 can't see
+ * node2, node3. Node2, node3 both of them can't see node1. Now at the
+ * time of sending write all the bricks are up. Just after write fop is
+ * wound on node1, network split happens. Node1 thinks write fop failed
+ * on node2, node3 so marks pending changelog for those 2 extended
+ * attributes on node1. Node2, node3 thinks writes failed on node1 so
+ * they mark pending changelog for node1. When the network is stable
+ * again the file already is in split-brain. These checks prevent
+ * marking pending changelog on other subvolumes if the fop doesn't
+ * succeed in a way it is still following quorum. So with this fix what
+ * is happening is, node1 will have all pending changelog(FOOL) because
+ * the write succeeded only on node1 but failed on node2, node3 so
+ * instead of marking pending changelogs on node2, node3 it just treats
+ * the fop as failure and goes into DIRTY state. Where as node2, node3
+ * say they are sources and have pending changelog to node1 so there is
+ * no split-brain with the fix. The problem is eliminated completely.
+ */
+
+ if (afr_has_fop_cbk_quorum (frame))
+ return;
-static int
-afr_transaction_locked_nodes_count (afr_local_t *local, int child_count)
-{
- int i;
- int call_count = 0;
+ if (local->fd) {
+ gf_uuid_copy (gfid, local->fd->inode->gfid);
+ file = uuid_utoa (gfid);
+ } else {
+ loc_path (&local->loc, local->loc.name);
+ file = local->loc.path;
+ }
- for (i = 0; i < child_count; i++) {
- if (local->transaction.locked_nodes[i] & LOCKED_YES)
- call_count++;
+ gf_msg (frame->this->name, GF_LOG_WARNING, 0, AFR_MSG_QUORUM_FAIL,
+ "%s: Failing %s as quorum is not met",
+ file, gf_fop_list[local->op]);
- if ((local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION)
- && (local->transaction.locked_nodes[i] & LOCKED_LOWER)) {
- call_count++;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i])
+ afr_transaction_fop_failed (frame, frame->this, i);
}
- return call_count;
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno (local, priv);
+ if (local->op_errno == 0)
+ local->op_errno = afr_quorum_errno (priv);
+ switch (local->transaction.type) {
+ case AFR_ENTRY_TRANSACTION:
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ afr_pick_error_xdata (local, priv, local->parent,
+ local->readable, local->parent2,
+ local->readable2);
+ break;
+ default:
+ afr_pick_error_xdata (local, priv, local->inode,
+ local->readable, NULL, NULL);
+ break;
+ }
}
-
-static loc_t *
-lower_path (loc_t *l1, const char *b1, loc_t *l2, const char *b2)
+int
+afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
{
+ afr_private_t * priv = this->private;
+ int i = 0;
int ret = 0;
+ int idx = 0;
+ afr_local_t * local = NULL;
+ dict_t *xattr = NULL;
+ int nothing_failed = 1;
+ gf_boolean_t need_undirty = _gf_false;
- ret = strcmp (l1->path, l2->path);
-
- if (ret == 0)
- ret = strcmp (b1, b2);
+ afr_handle_quorum (frame);
+ local = frame->local;
+ idx = afr_index_for_transaction_type (local->transaction.type);
+
+ nothing_failed = afr_txn_nothing_failed (frame, this);
- if (ret <= 0)
- return l1;
+ if (afr_changelog_pre_op_uninherit (frame, this))
+ need_undirty = _gf_false;
else
- return l2;
-}
+ need_undirty = _gf_true;
+ if (local->op_ret < 0 && !nothing_failed) {
+ afr_changelog_post_op_done (frame, this);
+ goto out;
+ }
-int32_t
-afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t *local;
- int call_count = 0;
+ if (nothing_failed && !need_undirty) {
+ afr_changelog_post_op_done (frame, this);
+ goto out;
+ }
- local = frame->local;
+ xattr = dict_new ();
+ if (!xattr) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ afr_changelog_post_op_done (frame, this);
+ goto out;
+ }
- LOCK (&frame->lock);
- {
- call_count = --local->call_count;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.failed_subvols[i])
+ local->pending[i][idx] = hton32(1);
}
- UNLOCK (&frame->lock);
- if (call_count == 0) {
- local->transaction.done (frame, this);
+ ret = afr_set_pending_dict (priv, xattr, local->pending);
+ if (ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ afr_changelog_post_op_done (frame, this);
+ goto out;
}
-
- return 0;
+
+ if (need_undirty)
+ local->dirty[idx] = hton32(-1);
+ else
+ local->dirty[idx] = hton32(0);
+
+ ret = dict_set_static_bin (xattr, AFR_DIRTY, local->dirty,
+ sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ afr_changelog_post_op_done (frame, this);
+ goto out;
+ }
+
+ afr_changelog_do (frame, this, xattr, afr_changelog_post_op_done,
+ AFR_TRANSACTION_POST_OP);
+out:
+ if (xattr)
+ dict_unref (xattr);
+
+ return 0;
}
-int
-afr_unlock (call_frame_t *frame, xlator_t *this)
+gf_boolean_t
+afr_changelog_pre_op_uninherit (call_frame_t *frame, xlator_t *this)
{
- struct flock flock;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ fd_t *fd = NULL;
+ int i = 0;
+ gf_boolean_t ret = _gf_false;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int type = 0;
- int i = 0;
- int call_count = 0;
+ local = frame->local;
+ priv = this->private;
+ fd = local->fd;
- afr_local_t *local = NULL;
- afr_private_t * priv = this->private;
+ type = afr_index_for_transaction_type (local->transaction.type);
+ if (type != AFR_DATA_TRANSACTION)
+ return !local->transaction.dirtied;
- loc_t * lower = NULL;
- loc_t * higher = NULL;
+ if (!fd)
+ return !local->transaction.dirtied;
- const char *lower_name = NULL;
- const char *higher_name = NULL;
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx)
+ return _gf_false;
- local = frame->local;
+ if (local->transaction.no_uninherit)
+ return _gf_false;
- /*
- pid has been restored to saved_pid in the fop,
- so set it back to frame->root
- */
+ /* This function must be idempotent. So check if we
+ were called before and return the same answer again.
- frame->root->pid = (long) frame->root;
+ It is important to keep this function idempotent for
+ the call in afr_changelog_post_op_safe() to not have
+ side effects on the call from afr_changelog_post_op_now()
+ */
+ if (local->transaction.uninherit_done)
+ return local->transaction.uninherit_value;
- call_count = afr_transaction_locked_nodes_count (local,
- priv->child_count);
+ LOCK(&fd->lock);
+ {
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i] !=
+ fd_ctx->pre_op_done[type][i]) {
+ ret = !local->transaction.dirtied;
+ goto unlock;
+ }
+ }
- if (call_count == 0) {
- local->transaction.done (frame, this);
- return 0;
+ if (fd_ctx->inherited[type]) {
+ ret = _gf_true;
+ fd_ctx->inherited[type]--;
+ } else if (fd_ctx->on_disk[type]) {
+ ret = _gf_false;
+ fd_ctx->on_disk[type]--;
+ } else {
+ /* ASSERT */
+ ret = _gf_false;
+ }
+
+ if (!fd_ctx->inherited[type] && !fd_ctx->on_disk[type]) {
+ for (i = 0; i < priv->child_count; i++)
+ fd_ctx->pre_op_done[type][i] = 0;
+ }
}
+unlock:
+ UNLOCK(&fd->lock);
- local->call_count = call_count;
+ local->transaction.uninherit_done = _gf_true;
+ local->transaction.uninherit_value = ret;
- for (i = 0; i < priv->child_count; i++) {
- flock.l_start = local->transaction.start;
- flock.l_len = local->transaction.len;
- flock.l_type = F_UNLCK;
+ return ret;
+}
- switch (local->transaction.type) {
- case AFR_DATA_TRANSACTION:
- case AFR_METADATA_TRANSACTION:
- case AFR_FLUSH_TRANSACTION:
-
- if (local->transaction.locked_nodes[i] & LOCKED_YES) {
- if (local->fd) {
- STACK_WIND (frame, afr_unlock_common_cbk,
- priv->children[i],
- priv->children[i]->fops->finodelk,
- this->name, local->fd,
- F_SETLK, &flock);
- } else {
- STACK_WIND (frame, afr_unlock_common_cbk,
- priv->children[i],
- priv->children[i]->fops->inodelk,
- this->name, &local->loc,
- F_SETLK, &flock);
- }
-
- call_count--;
- }
- break;
+gf_boolean_t
+afr_changelog_pre_op_inherit (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ fd_t *fd = NULL;
+ int i = 0;
+ gf_boolean_t ret = _gf_false;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int type = 0;
- case AFR_ENTRY_RENAME_TRANSACTION:
- lower = lower_path (&local->transaction.parent_loc,
- local->transaction.basename,
- &local->transaction.new_parent_loc,
- local->transaction.new_basename);
-
- lower_name = (lower == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- higher = (lower == &local->transaction.parent_loc ?
- &local->transaction.new_parent_loc :
- &local->transaction.parent_loc);
-
- higher_name = (higher == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- if (local->transaction.locked_nodes[i] & LOCKED_LOWER) {
- STACK_WIND (frame, afr_unlock_common_cbk,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name,
- lower, lower_name,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
-
- call_count--;
- }
+ local = frame->local;
+ priv = this->private;
+ fd = local->fd;
- if (call_count &&
- local->transaction.locked_nodes[i] & LOCKED_YES) {
- STACK_WIND (frame, afr_unlock_common_cbk,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name,
- higher, higher_name,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
+ if (local->transaction.type != AFR_DATA_TRANSACTION)
+ return _gf_false;
- call_count--;
- }
+ type = afr_index_for_transaction_type (local->transaction.type);
- break;
+ if (!fd)
+ return _gf_false;
- case AFR_ENTRY_TRANSACTION:
- if (local->transaction.locked_nodes[i] & LOCKED_YES) {
- if (local->fd) {
- STACK_WIND (frame, afr_unlock_common_cbk,
- priv->children[i],
- priv->children[i]->fops->fentrylk,
- this->name, local->fd,
- local->transaction.basename,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
- } else {
- STACK_WIND (frame, afr_unlock_common_cbk,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name,
- &local->transaction.parent_loc,
- local->transaction.basename,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
-
- }
-
- call_count--;
- }
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx)
+ return _gf_false;
- break;
- }
+ LOCK(&fd->lock);
+ {
+ if (!fd_ctx->on_disk[type]) {
+ /* nothing to inherit yet */
+ ret = _gf_false;
+ goto unlock;
+ }
- if (!call_count)
- break;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i] !=
+ fd_ctx->pre_op_done[type][i]) {
+ /* either inherit exactly, or don't */
+ ret = _gf_false;
+ goto unlock;
+ }
+ }
+
+ fd_ctx->inherited[type]++;
+
+ ret = _gf_true;
+
+ local->transaction.inherited = _gf_true;
}
+unlock:
+ UNLOCK(&fd->lock);
- return 0;
+ return ret;
}
-/* }}} */
+gf_boolean_t
+afr_changelog_pre_op_update (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ fd_t *fd = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int i = 0;
+ gf_boolean_t ret = _gf_false;
+ int type = 0;
+
+ local = frame->local;
+ priv = this->private;
+ fd = local->fd;
-/* {{{ pending */
+ if (!fd)
+ return _gf_false;
-int32_t
-afr_changelog_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx)
+ return _gf_false;
- int call_count = -1;
+ if (local->transaction.inherited)
+ /* was already inherited in afr_changelog_pre_op */
+ return _gf_false;
- int (*post_post_op) (call_frame_t *, xlator_t *);
+ if (!local->transaction.dirtied)
+ return _gf_false;
- priv = this->private;
- local = frame->local;
+ if (!afr_txn_nothing_failed (frame, this))
+ return _gf_false;
- LOCK (&frame->lock);
- {
- call_count = --local->call_count;
- }
- UNLOCK (&frame->lock);
+ type = afr_index_for_transaction_type (local->transaction.type);
- if (call_count == 0) {
- if (local->transaction.post_post_op) {
- post_post_op = local->transaction.post_post_op;
+ ret = _gf_false;
- if (afr_lock_server_count (priv, local->transaction.type) == 0) {
- local->transaction.post_post_op = local->transaction.done;
- } else {
- local->transaction.post_post_op = afr_unlock;
- }
+ LOCK(&fd->lock);
+ {
+ if (!fd_ctx->on_disk[type]) {
+ for (i = 0; i < priv->child_count; i++)
+ fd_ctx->pre_op_done[type][i] =
+ local->transaction.pre_op[i];
+ } else {
+ for (i = 0; i < priv->child_count; i++)
+ if (fd_ctx->pre_op_done[type][i] !=
+ local->transaction.pre_op[i]) {
+ local->transaction.no_uninherit = 1;
+ goto unlock;
+ }
+ }
+ fd_ctx->on_disk[type]++;
- post_post_op (frame, this);
- } else {
- if (afr_lock_server_count (priv, local->transaction.type) == 0) {
- local->transaction.done (frame, this);
- } else {
- afr_unlock (frame, this);
- }
- }
+ ret = _gf_true;
}
+unlock:
+ UNLOCK(&fd->lock);
- return 0;
+ return ret;
}
-int
-afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
+int
+afr_changelog_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
{
- afr_private_t * priv = this->private;
-
- int ret = 0;
- int i = 0;
- int call_count = 0;
-
- afr_local_t * local = NULL;
- dict_t **xattr = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int child_index = -1;
- local = frame->local;
+ local = frame->local;
+ priv = this->private;
+ child_index = (long) cookie;
- __mark_down_children (local->pending, priv->child_count,
- local->child_up, local->transaction.type);
-
- if (local->op == GF_FOP_FLUSH) {
- __mark_failed_children (local->pending, priv->child_count,
- this, local->fd,
- local->transaction.type);
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ afr_transaction_fop_failed (frame, this, child_index);
}
- xattr = alloca (priv->child_count * sizeof (*xattr));
- memset (xattr, 0, (priv->child_count * sizeof (*xattr)));
- for (i = 0; i < priv->child_count; i++) {
- xattr[i] = get_new_dict ();
- dict_ref (xattr[i]);
+ if (priv->arbiter_count == 1 && !op_ret) {
+ if (xattr)
+ local->transaction.pre_op_xdata[child_index] =
+ dict_ref (xattr);
}
- if (local->op == GF_FOP_FLUSH) {
- call_count = afr_pre_op_done_count (this, local->fd, local->child_up);
- } else {
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_frame_return (frame);
- if (local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
- call_count *= 2;
- }
+ if (call_count == 0)
+ local->transaction.changelog_resume (frame, this);
+
+ return 0;
+}
+
+void
+afr_changelog_populate_xdata (call_frame_t *frame, afr_xattrop_type_t op,
+ dict_t **xdata, dict_t **newloc_xdata)
+{
+ dict_t *xdata1 = NULL;
+ dict_t *xdata2 = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int ret = 0;
+ const char *name = NULL;
+
+ local = frame->local;
+ priv = THIS->private;
+
+ /*Populate xdata for POST_OP only.*/
+ if (op == AFR_TRANSACTION_PRE_OP)
+ goto out;
+ if (local->transaction.type == AFR_DATA_TRANSACTION ||
+ local->transaction.type == AFR_METADATA_TRANSACTION)
+ goto out;
+
+ if (!priv->esh_granular)
+ goto out;
+
+ xdata1 = dict_new();
+ if (!xdata1)
+ goto out;
+ name = local->loc.name;
+ if (local->op == GF_FOP_LINK)
+ name = local->newloc.name;
+ ret = dict_set_str (xdata1, GF_XATTROP_ENTRY_IN_KEY, (char *)name);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_SET_FAILED,
+ "%s/%s: Could not set xattrop-entry key during post-op",
+ uuid_utoa (local->loc.pargfid), local->loc.name);
+ if (local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
+ xdata2 = dict_new();
+ if (!xdata2)
+ goto out;
+ ret = dict_set_str (xdata2, GF_XATTROP_ENTRY_IN_KEY,
+ (char *)local->newloc.name);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ AFR_MSG_DICT_SET_FAILED,
+ "%s/%s: Could not set xattrop-entry key during"
+ " post-op", uuid_utoa (local->newloc.pargfid),
+ local->newloc.name);
}
- local->call_count = call_count;
+ *xdata = xdata1;
+ *newloc_xdata = xdata2;
+ xdata1 = xdata2 = NULL;
+out:
+ if (xdata1)
+ dict_unref (xdata1);
+ if (xdata2)
+ dict_unref (xdata2);
+ return;
+}
+
+int
+afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
+ afr_changelog_resume_t changelog_resume,
+ afr_xattrop_type_t op)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xdata = NULL;
+ dict_t *newloc_xdata = NULL;
+ int i = 0;
+ int call_count = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_changelog_call_count (local->transaction.type,
+ local->transaction.pre_op,
+ priv->child_count);
if (call_count == 0) {
- /* no child is up */
- for (i = 0; i < priv->child_count; i++) {
- dict_unref (xattr[i]);
- }
-
- afr_unlock (frame, this);
+ changelog_resume (frame, this);
return 0;
}
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- ret = afr_set_pending_dict (priv, xattr[i],
- local->pending);
-
- if (ret < 0)
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set pending entry");
-
-
- switch (local->transaction.type) {
- case AFR_DATA_TRANSACTION:
- case AFR_METADATA_TRANSACTION:
- {
- if (local->fd)
- STACK_WIND (frame, afr_changelog_post_op_cbk,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- else
- STACK_WIND (frame, afr_changelog_post_op_cbk,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->loc,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- call_count--;
- }
- break;
+ afr_changelog_populate_xdata (frame, op, &xdata, &newloc_xdata);
+ local->call_count = call_count;
- case AFR_FLUSH_TRANSACTION:
- {
- if (__if_fd_pre_op_done (this, local->fd, i)) {
- STACK_WIND (frame, afr_changelog_post_op_cbk,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- call_count--;
- }
- }
- break;
+ local->transaction.changelog_resume = changelog_resume;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.pre_op[i])
+ continue;
- case AFR_ENTRY_RENAME_TRANSACTION:
- {
- STACK_WIND_COOKIE (frame, afr_changelog_post_op_cbk,
+ switch (local->transaction.type) {
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ if (!local->fd) {
+ STACK_WIND_COOKIE (frame, afr_changelog_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->xattrop,
- &local->transaction.new_parent_loc,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
-
- call_count--;
- }
-
- /*
- set it again because previous stack_wind
- might have already returned (think of case
- where subvolume is posix) and would have
- used the dict as placeholder for return
- value
- */
-
- ret = afr_set_pending_dict (priv, xattr[i],
- local->pending);
-
- if (ret < 0)
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set pending entry");
-
- /* fall through */
-
- case AFR_ENTRY_TRANSACTION:
- {
- if (local->fd)
- STACK_WIND (frame, afr_changelog_post_op_cbk,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- else
- STACK_WIND (frame, afr_changelog_post_op_cbk,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->transaction.parent_loc,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- call_count--;
- }
+ &local->loc,
+ GF_XATTROP_ADD_ARRAY, xattr,
+ xdata);
+ } else {
+ STACK_WIND_COOKIE (frame, afr_changelog_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fxattrop,
+ local->fd,
+ GF_XATTROP_ADD_ARRAY, xattr,
+ xdata);
+ }
break;
- }
+ case AFR_ENTRY_RENAME_TRANSACTION:
- if (!call_count)
- break;
+ STACK_WIND_COOKIE (frame, afr_changelog_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &local->transaction.new_parent_loc,
+ GF_XATTROP_ADD_ARRAY, xattr,
+ newloc_xdata);
+ call_count--;
+
+ /* fall through */
+
+ case AFR_ENTRY_TRANSACTION:
+ if (local->fd)
+ STACK_WIND_COOKIE (frame, afr_changelog_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fxattrop,
+ local->fd,
+ GF_XATTROP_ADD_ARRAY, xattr,
+ xdata);
+ else
+ STACK_WIND_COOKIE (frame, afr_changelog_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &local->transaction.parent_loc,
+ GF_XATTROP_ADD_ARRAY, xattr,
+ xdata);
+ break;
}
- }
- for (i = 0; i < priv->child_count; i++) {
- dict_unref (xattr[i]);
+ if (!--call_count)
+ break;
}
+ if (xdata)
+ dict_unref (xdata);
+ if (newloc_xdata)
+ dict_unref (newloc_xdata);
return 0;
}
-int32_t
-afr_changelog_pre_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
+int
+afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = this->private;
- loc_t * loc = NULL;
+ afr_private_t * priv = this->private;
+ int i = 0;
+ int ret = 0;
+ int call_count = 0;
+ int op_errno = 0;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ unsigned char *locked_nodes = NULL;
+ int idx = -1;
+ gf_boolean_t pre_nop = _gf_true;
+ dict_t *xdata_req = NULL;
- int call_count = -1;
- int child_index = (long) cookie;
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ idx = afr_index_for_transaction_type (local->transaction.type);
- local = frame->local;
- loc = &local->loc;
+ locked_nodes = afr_locked_nodes_get (local->transaction.type, int_lock);
- LOCK (&frame->lock);
- {
- if (op_ret == 0) {
- __mark_pre_op_done_on_fd (frame, this, child_index);
+ for (i = 0; i < priv->child_count; i++) {
+ if (locked_nodes[i]) {
+ local->transaction.pre_op[i] = 1;
+ call_count++;
+ } else {
+ local->transaction.failed_subvols[i] = 1;
}
+ }
- if (op_ret == -1) {
- local->child_up[child_index] = 0;
-
- if (op_errno == ENOTSUP) {
- gf_log (this->name, GF_LOG_ERROR,
- "xattrop not supported by %s",
- priv->children[child_index]->name);
- local->op_ret = -1;
-
- } else if (!child_went_down (op_ret, op_errno)) {
- gf_log (this->name, GF_LOG_ERROR,
- "xattrop failed on child %s: %s",
- priv->children[child_index]->name,
- strerror (op_errno));
- }
- local->op_errno = op_errno;
- }
+ /* This condition should not be met with present code, as
+ * transaction.done will be called if locks are not acquired on even a
+ * single node.
+ */
+ if (call_count == 0) {
+ op_errno = ENOTCONN;
+ goto err;
+ }
- call_count = --local->call_count;
+ /* Check if the fop can be performed on at least
+ * quorum number of nodes.
+ */
+ if (priv->quorum_count && !afr_has_fop_quorum (frame)) {
+ op_errno = afr_quorum_errno (priv);
+ goto err;
+ }
+
+ xdata_req = dict_new();
+ if (!xdata_req) {
+ op_errno = ENOMEM;
+ goto err;
}
- UNLOCK (&frame->lock);
- if (call_count == 0) {
- if ((local->op_ret == -1) &&
- (local->op_errno == ENOTSUP)) {
- local->transaction.resume (frame, this);
- } else {
- __mark_all_success (local->pending, priv->child_count,
- local->transaction.type);
+ if (afr_changelog_pre_op_inherit (frame, this))
+ goto next;
+
+ if (call_count < priv->child_count)
+ pre_nop = _gf_false;
+
+ /* Set an all-zero pending changelog so that in the cbk, we can get the
+ * current on-disk values. In a replica 3 volume with arbiter enabled,
+ * these values are needed to arrive at a go/ no-go of the fop phase to
+ * avoid ending up in split-brain.*/
+
+ ret = afr_set_pending_dict (priv, xdata_req, local->pending);
+ if (ret < 0) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ if (afr_needs_changelog_update (local)) {
- afr_pid_restore (frame);
+ local->dirty[idx] = hton32(1);
- local->transaction.fop (frame, this);
+ ret = dict_set_static_bin (xdata_req, AFR_DIRTY, local->dirty,
+ sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret) {
+ op_errno = ENOMEM;
+ goto err;
}
+
+ pre_nop = _gf_false;
+ local->transaction.dirtied = 1;
}
- return 0;
+ if (pre_nop)
+ goto next;
+
+ if (!local->pre_op_compat) {
+ dict_copy (xdata_req, local->xdata_req);
+ goto next;
+ }
+
+ afr_changelog_do (frame, this, xdata_req, afr_transaction_perform_fop,
+ AFR_TRANSACTION_PRE_OP);
+
+ if (xdata_req)
+ dict_unref (xdata_req);
+
+ return 0;
+next:
+ afr_transaction_perform_fop (frame, this);
+
+ if (xdata_req)
+ dict_unref (xdata_req);
+
+ return 0;
+err:
+ local->internal_lock.lock_cbk = local->transaction.done;
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+
+ afr_unlock (frame, this);
+
+ if (xdata_req)
+ dict_unref (xdata_req);
+
+ return 0;
}
-int
-afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
+int
+afr_post_blocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
{
- afr_private_t * priv = this->private;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
- int i = 0;
- int ret = 0;
- int call_count = 0;
- dict_t **xattr = NULL;
+ local = frame->local;
+ int_lock = &local->internal_lock;
- afr_local_t *local = NULL;
+ if (int_lock->lock_op_ret < 0) {
+ gf_msg (this->name, GF_LOG_INFO,
+ 0, AFR_MSG_BLOCKING_LKS_FAILED,
+ "Blocking inodelks failed.");
+ local->transaction.done (frame, this);
+ } else {
- local = frame->local;
-
- xattr = alloca (priv->child_count * sizeof (*xattr));
- memset (xattr, 0, (priv->child_count * sizeof (*xattr)));
+ gf_msg_debug (this->name, 0,
+ "Blocking inodelks done. Proceeding to FOP");
+ afr_internal_lock_finish (frame, this);
+ }
- for (i = 0; i < priv->child_count; i++) {
- xattr[i] = get_new_dict ();
- dict_ref (xattr[i]);
+ return 0;
+}
+
+
+int
+afr_post_nonblocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ /* Initiate blocking locks if non-blocking has failed */
+ if (int_lock->lock_op_ret < 0) {
+ gf_msg_debug (this->name, 0,
+ "Non blocking inodelks failed. Proceeding to blocking");
+ int_lock->lock_cbk = afr_post_blocking_inodelk_cbk;
+ afr_blocking_lock (frame, this);
+ } else {
+
+ gf_msg_debug (this->name, 0,
+ "Non blocking inodelks done. Proceeding to FOP");
+ afr_internal_lock_finish (frame, this);
}
- call_count = afr_up_children_count (priv->child_count,
- local->child_up);
+ return 0;
+}
- if (local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
- call_count *= 2;
- }
- if (call_count == 0) {
- /* no child is up */
- for (i = 0; i < priv->child_count; i++) {
- dict_unref (xattr[i]);
- }
-
- afr_unlock (frame, this);
- return 0;
- }
+int
+afr_post_blocking_entrylk_cbk (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
- local->call_count = call_count;
+ local = frame->local;
+ int_lock = &local->internal_lock;
- __mark_all_pending (local->pending, priv->child_count,
- local->transaction.type);
+ if (int_lock->lock_op_ret < 0) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ AFR_MSG_BLOCKING_LKS_FAILED,
+ "Blocking entrylks failed.");
+ local->transaction.done (frame, this);
+ } else {
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- ret = afr_set_pending_dict (priv, xattr[i],
- local->pending);
-
- if (ret < 0)
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set pending entry");
-
-
- switch (local->transaction.type) {
- case AFR_DATA_TRANSACTION:
- case AFR_METADATA_TRANSACTION:
- case AFR_FLUSH_TRANSACTION:
- {
- if (local->fd)
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- else
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &(local->loc),
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- }
- break;
-
- case AFR_ENTRY_RENAME_TRANSACTION:
- {
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->transaction.new_parent_loc,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
+ gf_msg_debug (this->name, 0,
+ "Blocking entrylks done. Proceeding to FOP");
+ afr_internal_lock_finish (frame, this);
+ }
- call_count--;
- }
+ return 0;
+}
- /*
- set it again because previous stack_wind
- might have already returned (think of case
- where subvolume is posix) and would have
- used the dict as placeholder for return
- value
- */
+int
+afr_post_nonblocking_entrylk_cbk (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
- ret = afr_set_pending_dict (priv, xattr[i],
- local->pending);
-
- if (ret < 0)
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set pending entry");
-
- /* fall through */
-
- case AFR_ENTRY_TRANSACTION:
- {
- if (local->fd)
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- else
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->transaction.parent_loc,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- }
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ /* Initiate blocking locks if non-blocking has failed */
+ if (int_lock->lock_op_ret < 0) {
+ gf_msg_debug (this->name, 0,
+ "Non blocking entrylks failed. Proceeding to blocking");
+ int_lock->lock_cbk = afr_post_blocking_entrylk_cbk;
+ afr_blocking_lock (frame, this);
+ } else {
- break;
- }
+ gf_msg_debug (this->name, 0,
+ "Non blocking entrylks done. Proceeding to FOP");
- if (!--call_count)
- break;
- }
- }
-
- for (i = 0; i < priv->child_count; i++) {
- dict_unref (xattr[i]);
+ afr_internal_lock_finish (frame, this);
}
-
- return 0;
+
+ return 0;
}
-/* }}} */
-/* {{{ lock */
+int
+afr_post_blocking_rename_cbk (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ if (int_lock->lock_op_ret < 0) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ AFR_MSG_BLOCKING_LKS_FAILED,
+ "Blocking entrylks failed.");
+
+ local->transaction.done (frame, this);
+ } else {
+
+ gf_msg_debug (this->name, 0,
+ "Blocking entrylks done. Proceeding to FOP");
-static
-int afr_lock_rec (call_frame_t *frame, xlator_t *this, int child_index);
+ afr_internal_lock_finish (frame, this);
+ }
+ return 0;
+}
-int32_t
-afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+int
+afr_post_lower_unlock_cbk (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int done = 0;
- int child_index = (long) cookie;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
+ int_lock = &local->internal_lock;
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- if (op_errno == ENOSYS) {
- /* return ENOTSUP */
- gf_log (this->name, GF_LOG_ERROR,
- "subvolume does not support locking. "
- "please load features/posix-locks xlator on server");
- local->op_ret = op_ret;
- done = 1;
- }
+ GF_ASSERT (!int_lock->higher_locked);
- local->child_up[child_index] = 0;
- local->op_errno = op_errno;
- }
- }
- UNLOCK (&frame->lock);
-
- if ((op_ret == -1) &&
- (op_errno == ENOSYS)) {
- afr_unlock (frame, this);
+ int_lock->lock_cbk = afr_post_blocking_rename_cbk;
+ afr_blocking_lock (frame, this);
+
+ return 0;
+}
+
+
+int
+afr_set_transaction_flock (xlator_t *this, afr_local_t *local)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
+ afr_private_t *priv = NULL;
+
+ int_lock = &local->internal_lock;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ priv = this->private;
+
+ if (priv->arbiter_count) {
+ /*Lock entire file to avoid network split brains.*/
+ inodelk->flock.l_len = 0;
+ inodelk->flock.l_start = 0;
} else {
- if (op_ret == 0) {
- local->transaction.locked_nodes[child_index]
- |= LOCKED_YES;
- local->transaction.lock_count++;
- }
- afr_lock_rec (frame, this, child_index + 1);
+ inodelk->flock.l_len = local->transaction.len;
+ inodelk->flock.l_start = local->transaction.start;
}
+ inodelk->flock.l_type = F_WRLCK;
- return 0;
+ return 0;
}
-
-int32_t
-afr_lock_lower_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+int
+afr_lock_rec (call_frame_t *frame, xlator_t *this)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
- int child_index = (long) cookie;
+ local = frame->local;
+ int_lock = &local->internal_lock;
- loc_t * lower = NULL;
- loc_t * higher = NULL;
+ int_lock->transaction_lk_type = AFR_TRANSACTION_LK;
+ int_lock->domain = this->name;
- const char *lower_name = NULL;
- const char *higher_name = NULL;
+ switch (local->transaction.type) {
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ afr_set_transaction_flock (this, local);
- priv = this->private;
- local = frame->local;
+ int_lock->lock_cbk = afr_post_nonblocking_inodelk_cbk;
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- if (op_errno == ENOSYS) {
- /* return ENOTSUP */
+ afr_nonblocking_inodelk (frame, this);
+ break;
- gf_log (this->name, GF_LOG_ERROR,
- "subvolume does not support locking. "
- "please load features/posix-locks xlator on server");
+ case AFR_ENTRY_RENAME_TRANSACTION:
- local->op_ret = op_ret;
- }
+ int_lock->lock_cbk = afr_post_nonblocking_entrylk_cbk;
+ afr_nonblocking_entrylk (frame, this);
+ break;
- local->child_up[child_index] = 0;
- local->op_errno = op_errno;
- }
- }
- UNLOCK (&frame->lock);
+ case AFR_ENTRY_TRANSACTION:
+ int_lock->lk_basename = local->transaction.basename;
+ if (local->transaction.parent_loc.path)
+ int_lock->lk_loc = &local->transaction.parent_loc;
+ else
+ GF_ASSERT (local->fd);
- if (op_ret != 0) {
- afr_unlock (frame, this);
- goto out;
- } else {
- local->transaction.locked_nodes[child_index] |= LOCKED_LOWER;
+ int_lock->lock_cbk = afr_post_nonblocking_entrylk_cbk;
+ afr_nonblocking_entrylk (frame, this);
+ break;
}
- /* The lower path has been locked. Now lock the higher path */
+ return 0;
+}
- lower = lower_path (&local->transaction.parent_loc,
- local->transaction.basename,
- &local->transaction.new_parent_loc,
- local->transaction.new_basename);
- lower_name = (lower == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
+int
+afr_lock (call_frame_t *frame, xlator_t *this)
+{
+ afr_set_lock_number (frame, this);
- higher = (lower == &local->transaction.parent_loc ?
- &local->transaction.new_parent_loc :
- &local->transaction.parent_loc);
+ return afr_lock_rec (frame, this);
+}
- higher_name = (higher == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
- STACK_WIND_COOKIE (frame, afr_lock_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->entrylk,
- this->name, higher, higher_name,
- ENTRYLK_LOCK, ENTRYLK_WRLCK);
+/* }}} */
+
+int
+afr_internal_lock_finish (call_frame_t *frame, xlator_t *this)
+{
+ if (__fop_changelog_needed (frame, this)) {
+ afr_changelog_pre_op (frame, this);
+ } else {
+ afr_transaction_perform_fop (frame, this);
+ }
-out:
return 0;
}
-static
-int afr_lock_rec (call_frame_t *frame, xlator_t *this, int child_index)
+void
+afr_set_delayed_post_op (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- uint64_t ctx;
- afr_fd_ctx_t *fd_ctx;
+ /* call this function from any of the related optimizations
+ which benefit from delaying post op are enabled, namely:
- struct flock flock;
+ - changelog piggybacking
+ - eager locking
+ */
- int ret = 0;
+ priv = this->private;
+ if (!priv)
+ return;
- loc_t * lower = NULL;
- loc_t * higher = NULL;
+ if (!priv->post_op_delay_secs)
+ return;
- const char *lower_name = NULL;
- const char *higher_name = NULL;
+ local = frame->local;
+ if (!local)
+ return;
- local = frame->local;
- priv = this->private;
+ if (!local->transaction.eager_lock_on)
+ return;
- flock.l_start = local->transaction.start;
- flock.l_len = local->transaction.len;
- flock.l_type = F_WRLCK;
+ if (!local->fd)
+ return;
- if (local->fd) {
- ret = fd_ctx_get (local->fd, this, &ctx);
+ if (local->op == GF_FOP_WRITE)
+ local->delayed_post_op = _gf_true;
+}
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "unable to get fd ctx for fd=%p",
- local->fd);
+gf_boolean_t
+afr_are_multiple_fds_opened (fd_t *fd, xlator_t *this)
+{
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ if (!fd) {
+ /* If false is returned, it may keep on taking eager-lock
+ * which may lead to starvation, so return true to avoid that.
+ */
+ gf_msg_callingfn (this->name, GF_LOG_ERROR, EBADF,
+ AFR_MSG_INVALID_ARG, "Invalid fd");
+ return _gf_true;
+ }
+ /* Lets say mount1 has eager-lock(full-lock) and after the eager-lock
+ * is taken mount2 opened the same file, it won't be able to
+ * perform any data operations until mount1 releases eager-lock.
+ * To avoid such scenario do not enable eager-lock for this transaction
+ * if open-fd-count is > 1
+ */
- local->op_ret = -1;
- local->op_errno = EINVAL;
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx)
+ return _gf_true;
- afr_unlock (frame, this);
+ if (fd_ctx->open_fd_count > 1)
+ return _gf_true;
- return 0;
- }
+ return _gf_false;
+}
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
- /* skip over children that or down
- or don't have the fd open */
+gf_boolean_t
+is_afr_delayed_changelog_post_op_needed (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ gf_boolean_t res = _gf_false;
- while ((child_index < priv->child_count)
- && (!local->child_up[child_index]
- || !fd_ctx->opened_on[child_index]))
+ local = frame->local;
+ if (!local)
+ goto out;
- child_index++;
- } else {
- /* skip over children that are down */
- while ((child_index < priv->child_count)
- && !local->child_up[child_index])
- child_index++;
+ if (!local->delayed_post_op)
+ goto out;
+
+ //Mark pending changelog ASAP
+ if (!afr_txn_nothing_failed (frame, this))
+ goto out;
+
+ if (local->fd && afr_are_multiple_fds_opened (local->fd, this))
+ goto out;
+
+ res = _gf_true;
+out:
+ return res;
+}
+
+
+void
+afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd,
+ call_stub_t *stub);
+
+void
+afr_delayed_changelog_wake_up_cbk (void *data)
+{
+ fd_t *fd = NULL;
+
+ fd = data;
+
+ afr_delayed_changelog_wake_up (THIS, fd);
+}
+
+
+/* SET operation */
+int
+afr_fd_report_unstable_write (xlator_t *this, fd_t *fd)
+{
+ afr_fd_ctx_t *fdctx = NULL;
+
+ fdctx = afr_fd_ctx_get (fd, this);
+
+ LOCK(&fd->lock);
+ {
+ fdctx->witnessed_unstable_write = _gf_true;
}
+ UNLOCK(&fd->lock);
- if ((child_index == priv->child_count) &&
- local->transaction.lock_count == 0) {
+ return 0;
+}
- gf_log (this->name, GF_LOG_DEBUG,
- "unable to lock on even one child");
+/* TEST and CLEAR operation */
+gf_boolean_t
+afr_fd_has_witnessed_unstable_write (xlator_t *this, fd_t *fd)
+{
+ afr_fd_ctx_t *fdctx = NULL;
+ gf_boolean_t witness = _gf_false;
- local->op_ret = -1;
- local->op_errno = EAGAIN;
+ fdctx = afr_fd_ctx_get (fd, this);
+ if (!fdctx)
+ return _gf_true;
- afr_unlock (frame, this);
-
- return 0;
+ LOCK(&fd->lock);
+ {
+ if (fdctx->witnessed_unstable_write) {
+ witness = _gf_true;
+ fdctx->witnessed_unstable_write = _gf_false;
+ }
+ }
+ UNLOCK (&fd->lock);
+
+ return witness;
+}
+
+
+int
+afr_changelog_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ if (op_ret != 0) {
+ /* Failure of fsync() is as good as failure of previous
+ write(). So treat it like one.
+ */
+ gf_msg (this->name, GF_LOG_WARNING,
+ op_errno, AFR_MSG_FSYNC_FAILED,
+ "fsync(%s) failed on subvolume %s. Transaction was %s",
+ uuid_utoa (local->fd->inode->gfid),
+ priv->children[child_index]->name,
+ gf_fop_list[local->op]);
+
+ afr_transaction_fop_failed (frame, this, child_index);
}
- if ((child_index == priv->child_count)
- || (local->transaction.lock_count ==
- afr_lock_server_count (priv, local->transaction.type))) {
+ call_count = afr_frame_return (frame);
- /* we're done locking */
+ if (call_count == 0)
+ afr_changelog_post_op_now (frame, this);
- if (__changelog_needed_pre_op (frame, this)) {
- afr_changelog_pre_op (frame, this);
- } else {
- __mark_all_success (local->pending, priv->child_count,
- local->transaction.type);
+ return 0;
+}
- afr_pid_restore (frame);
- local->transaction.fop (frame, this);
- }
+int
+afr_changelog_fsync (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ int i = 0;
+ int call_count = 0;
+ afr_private_t *priv = NULL;
+ dict_t *xdata = NULL;
+ GF_UNUSED int ret = -1;
- return 0;
- }
+ local = frame->local;
+ priv = this->private;
- switch (local->transaction.type) {
- case AFR_DATA_TRANSACTION:
- case AFR_METADATA_TRANSACTION:
- case AFR_FLUSH_TRANSACTION:
-
- if (local->fd) {
- STACK_WIND_COOKIE (frame, afr_lock_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->finodelk,
- this->name, local->fd,
- F_SETLKW, &flock);
-
- } else {
- STACK_WIND_COOKIE (frame, afr_lock_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->inodelk,
- this->name, &local->loc,
- F_SETLKW, &flock);
- }
-
- break;
-
- case AFR_ENTRY_RENAME_TRANSACTION:
+ call_count = AFR_COUNT (local->transaction.pre_op, priv->child_count);
+
+ if (!call_count) {
+ /* will go straight to unlock */
+ afr_changelog_post_op_now (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ xdata = dict_new();
+ if (xdata)
+ ret = dict_set_int32 (xdata, "batch-fsync", 1);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.pre_op[i])
+ continue;
+
+ STACK_WIND_COOKIE (frame, afr_changelog_fsync_cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->fsync, local->fd,
+ 1, xdata);
+ if (!--call_count)
+ break;
+ }
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+
+int
+afr_changelog_post_op_safe (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (!local->fd || local->transaction.type != AFR_DATA_TRANSACTION) {
+ afr_changelog_post_op_now (frame, this);
+ return 0;
+ }
+
+ if (afr_changelog_pre_op_uninherit (frame, this) &&
+ afr_txn_nothing_failed (frame, this)) {
+ /* just detected that this post-op is about to
+ be optimized away as a new write() has
+ already piggybacked on this frame's changelog.
+ */
+ afr_changelog_post_op_now (frame, this);
+ return 0;
+ }
+
+ /* Calling afr_changelog_post_op_now() now will result in
+ issuing ->[f]xattrop().
+
+ Performing a hard POST-OP (->[f]xattrop() FOP) is a more
+ responsible operation that what it might appear on the surface.
+
+ The changelog of a file (in the xattr of the file on the server)
+ stores information (pending count) about the state of the file
+ on the OTHER server. This changelog is blindly trusted, and must
+ therefore be updated in such a way it remains trustworthy. This
+ implies that decrementing the pending count (essentially "clearing
+ the dirty flag") must be done STRICTLY after we are sure that the
+ operation on the other server has reached stable storage.
+
+ While the backend filesystem on that server will eventually flush
+ it to stable storage, we (being in userspace) have no mechanism
+ to get notified when the write became "stable".
+
+ This means we need take matter into our own hands and issue an
+ fsync() EVEN IF THE APPLICATION WAS PERFORMING UNSTABLE WRITES,
+ and get an acknowledgement for it. And we need to wait for the
+ fsync() acknowledgement before initiating the hard POST-OP.
+
+ However if the FD itself was opened in O_SYNC or O_DSYNC then
+ we are already guaranteed that the writes were made stable as
+ part of the FOP itself. The same holds true for NFS stable
+ writes which happen on an anonymous FD with O_DSYNC or O_SYNC
+ flag set in the writev() @flags param. For all other write types,
+ mark a flag in the fdctx whenever an unstable write is witnessed.
+ */
+
+ if (!afr_fd_has_witnessed_unstable_write (this, local->fd)) {
+ afr_changelog_post_op_now (frame, this);
+ return 0;
+ }
+
+ /* Check whether users want durability and perform fsync/post-op
+ * accordingly.
+ */
+ if (priv->ensure_durability) {
+ /* Time to fsync() */
+ afr_changelog_fsync (frame, this);
+ } else {
+ afr_changelog_post_op_now (frame, this);
+ }
+
+ return 0;
+}
+
+
+void
+afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd,
+ call_stub_t *stub)
+{
+ afr_fd_ctx_t *fd_ctx = NULL;
+ call_frame_t *prev_frame = NULL;
+ struct timespec delta = {0, };
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx)
+ goto out;
+
+ delta.tv_sec = priv->post_op_delay_secs;
+ delta.tv_nsec = 0;
+
+ pthread_mutex_lock (&fd_ctx->delay_lock);
{
- lower = lower_path (&local->transaction.parent_loc,
- local->transaction.basename,
- &local->transaction.new_parent_loc,
- local->transaction.new_basename);
-
- lower_name = (lower == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- higher = (lower == &local->transaction.parent_loc ?
- &local->transaction.new_parent_loc :
- &local->transaction.parent_loc);
-
- higher_name = (higher == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- STACK_WIND_COOKIE (frame, afr_lock_lower_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->entrylk,
- this->name, lower, lower_name,
- ENTRYLK_LOCK, ENTRYLK_WRLCK);
-
- break;
+ prev_frame = fd_ctx->delay_frame;
+ fd_ctx->delay_frame = NULL;
+ if (fd_ctx->delay_timer)
+ gf_timer_call_cancel (this->ctx, fd_ctx->delay_timer);
+ fd_ctx->delay_timer = NULL;
+ if (!frame)
+ goto unlock;
+ fd_ctx->delay_timer = gf_timer_call_after (this->ctx, delta,
+ afr_delayed_changelog_wake_up_cbk,
+ fd);
+ fd_ctx->delay_frame = frame;
}
-
- case AFR_ENTRY_TRANSACTION:
- if (local->fd) {
- STACK_WIND_COOKIE (frame, afr_lock_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->fentrylk,
- this->name, local->fd,
- local->transaction.basename,
- ENTRYLK_LOCK, ENTRYLK_WRLCK);
- } else {
- STACK_WIND_COOKIE (frame, afr_lock_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->entrylk,
- this->name,
- &local->transaction.parent_loc,
- local->transaction.basename,
- ENTRYLK_LOCK, ENTRYLK_WRLCK);
- }
+unlock:
+ pthread_mutex_unlock (&fd_ctx->delay_lock);
- break;
+out:
+ if (prev_frame) {
+ local = prev_frame->local;
+ local->transaction.resume_stub = stub;
+ afr_changelog_post_op_now (prev_frame, this);
+ } else if (stub) {
+ call_resume (stub);
}
-
- return 0;
}
-int32_t afr_lock (call_frame_t *frame, xlator_t *this)
+void
+afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
{
- afr_pid_save (frame);
+ afr_local_t *local = NULL;
- frame->root->pid = (long) frame->root;
+ local = frame->local;
- return afr_lock_rec (frame, this, 0);
+ if (is_afr_delayed_changelog_post_op_needed (frame, this))
+ afr_delayed_changelog_post_op (this, frame, local->fd, NULL);
+ else
+ afr_changelog_post_op_safe (frame, this);
}
-/* }}} */
-int32_t
+/* Wake up the sleeping/delayed post-op, and also register
+ a stub to have it resumed after this transaction
+ completely finishes.
+
+ The @stub gets saved in @local and gets resumed in
+ afr_local_cleanup()
+ */
+void
+afr_delayed_changelog_wake_resume (xlator_t *this, fd_t *fd, call_stub_t *stub)
+{
+ afr_delayed_changelog_post_op (this, NULL, fd, stub);
+}
+
+
+void
+afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd)
+{
+ afr_delayed_changelog_post_op (this, NULL, fd, NULL);
+}
+
+
+int
afr_transaction_resume (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
- if (__changelog_needed_post_op (frame, this)) {
- afr_changelog_post_op (frame, this);
- } else {
- if (afr_lock_server_count (priv, local->transaction.type) == 0) {
- local->transaction.done (frame, this);
- } else {
- afr_unlock (frame, this);
- }
- }
+ if (local->transaction.eager_lock_on) {
+ /* We don't need to retain "local" in the
+ fd list anymore, writes to all subvols
+ are finished by now */
+ afr_remove_eager_lock_stub (local);
+ }
- return 0;
+ afr_restore_lk_owner (frame);
+
+ afr_handle_symmetric_errors (frame, this);
+
+ if (!local->pre_op_compat)
+ /* new mode, pre-op was done along
+ with OP */
+ afr_changelog_pre_op_update (frame, this);
+
+ if (__fop_changelog_needed (frame, this)) {
+ afr_changelog_post_op (frame, this);
+ } else {
+ afr_changelog_post_op_done (frame, this);
+ }
+
+ return 0;
}
@@ -1411,54 +2062,195 @@ afr_transaction_resume (call_frame_t *frame, xlator_t *this)
*/
void
-afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this, int child_index)
+afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this,
+ int child_index)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
- switch (local->op) {
- case GF_FOP_WRITE:
- __mark_fop_failed_on_fd (local->fd, this, child_index);
- break;
- default:
- __mark_child_dead (local->pending, priv->child_count,
- child_index, local->transaction.type);
- break;
+ local->transaction.failed_subvols[child_index] = 1;
+}
+
+
+
+ static gf_boolean_t
+afr_locals_overlap (afr_local_t *local1, afr_local_t *local2)
+{
+ uint64_t start1 = local1->transaction.start;
+ uint64_t start2 = local2->transaction.start;
+ uint64_t end1 = 0;
+ uint64_t end2 = 0;
+
+ if (local1->transaction.len)
+ end1 = start1 + local1->transaction.len - 1;
+ else
+ end1 = ULLONG_MAX;
+
+ if (local2->transaction.len)
+ end2 = start2 + local2->transaction.len - 1;
+ else
+ end2 = ULLONG_MAX;
+
+ return ((end1 >= start2) && (end2 >= start1));
+}
+
+void
+afr_transaction_eager_lock_init (afr_local_t *local, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fdctx = NULL;
+ afr_local_t *each = NULL;
+
+ priv = this->private;
+
+ if (!local->fd)
+ return;
+
+ if (local->transaction.type != AFR_DATA_TRANSACTION)
+ return;
+
+ if (!priv->eager_lock)
+ return;
+
+ fdctx = afr_fd_ctx_get (local->fd, this);
+ if (!fdctx)
+ return;
+
+ if (afr_are_multiple_fds_opened (local->fd, this))
+ return;
+ /*
+ * Once full file lock is acquired in eager-lock phase, overlapping
+ * writes do not compete for inode-locks, instead are transferred to the
+ * next writes. Because of this overlapping writes are not ordered.
+ * This can cause inconsistencies in replication.
+ * Example:
+ * Two overlapping writes w1, w2 are sent in parallel on same fd
+ * in two threads t1, t2.
+ * Both threads can execute afr_writev_wind in the following manner.
+ * t1 winds w1 on brick-0
+ * t2 winds w2 on brick-0
+ * t2 winds w2 on brick-1
+ * t1 winds w1 on brick-1
+ *
+ * This check makes sure the locks are not transferred for
+ * overlapping writes.
+ */
+ LOCK (&local->fd->lock);
+ {
+ list_for_each_entry (each, &fdctx->eager_locked,
+ transaction.eager_locked) {
+ if (afr_locals_overlap (each, local)) {
+ local->transaction.eager_lock_on = _gf_false;
+ goto unlock;
+ }
+ }
+
+ local->transaction.eager_lock_on = _gf_true;
+ list_add_tail (&local->transaction.eager_locked,
+ &fdctx->eager_locked);
}
+unlock:
+ UNLOCK (&local->fd->lock);
}
+void
+afr_transaction_start (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = this->private;
+ fd_t *fd = NULL;
+
+ afr_transaction_eager_lock_init (local, this);
+
+ if (local->fd && local->transaction.eager_lock_on)
+ afr_set_lk_owner (frame, this, local->fd);
+ else
+ afr_set_lk_owner (frame, this, frame->root);
+
+ if (!local->transaction.eager_lock_on && local->loc.inode) {
+ fd = fd_lookup (local->loc.inode, frame->root->pid);
+ if (fd == NULL)
+ fd = fd_lookup_anonymous (local->loc.inode,
+ GF_ANON_FD_FLAGS);
+
+ if (fd) {
+ afr_delayed_changelog_wake_up (this, fd);
+ fd_unref (fd);
+ }
+ }
-int32_t
-afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type)
+ if (afr_lock_server_count (priv, local->transaction.type) == 0) {
+ afr_internal_lock_finish (frame, this);
+ } else {
+ afr_lock (frame, this);
+ }
+}
+
+int
+afr_write_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ afr_local_t *local = frame->local;
+ int ret = 0;
- local = frame->local;
- priv = this->private;
+ if (err) {
+ local->op_errno = -err;
+ local->op_ret = -1;
+ goto fail;
+ }
+ ret = afr_inode_get_readable (frame, local->inode, this,
+ local->readable, NULL,
+ local->transaction.type);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_SPLIT_BRAIN,
+ "Failing %s on gfid %s: split-brain observed.",
+ gf_fop_list[local->op], uuid_utoa (local->inode->gfid));
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ goto fail;
+ }
+ afr_transaction_start (frame, this);
+ return 0;
+fail:
+ local->transaction.unwind (frame, this);
+ AFR_STACK_DESTROY (frame);
+ return 0;
+}
- afr_transaction_local_init (local, priv);
+int
+afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ int event_generation = 0;
- local->transaction.resume = afr_transaction_resume;
- local->transaction.type = type;
+ local = frame->local;
+ priv = this->private;
- if (afr_lock_server_count (priv, local->transaction.type) == 0) {
- if (__changelog_needed_pre_op (frame, this)) {
- afr_changelog_pre_op (frame, this);
- } else {
- __mark_all_success (local->pending, priv->child_count,
- local->transaction.type);
+ local->transaction.resume = afr_transaction_resume;
+ local->transaction.type = type;
- afr_pid_restore (frame);
+ ret = afr_transaction_local_init (local, this);
+ if (ret < 0)
+ goto out;
- local->transaction.fop (frame, this);
- }
- } else {
- afr_lock (frame, this);
- }
+ if (type == AFR_ENTRY_TRANSACTION ||
+ type == AFR_ENTRY_RENAME_TRANSACTION) {
+ afr_transaction_start (frame, this);
+ ret = 0;
+ goto out;
+ }
- return 0;
+ ret = afr_inode_get_readable (frame, local->inode, this,
+ local->readable, &event_generation, type);
+ if (ret < 0 || event_generation != priv->event_generation) {
+ afr_inode_refresh (frame, this, local->inode, local->loc.gfid,
+ afr_write_txn_refresh_done);
+ } else {
+ afr_transaction_start (frame, this);
+ }
+ ret = 0;
+out:
+ return ret;
}
diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h
index 0d3d4443e30..ca8fcfefa89 100644
--- a/xlators/cluster/afr/src/afr-transaction.h
+++ b/xlators/cluster/afr/src/afr-transaction.h
@@ -1,33 +1,62 @@
/*
- Copyright (c) 2007-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __TRANSACTION_H__
#define __TRANSACTION_H__
+#include "afr.h"
+
void
afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this,
int child_index);
+void
+afr_txn_arbitrate_fop_cbk (call_frame_t *frame, xlator_t *this);
int
afr_lock_server_count (afr_private_t *priv, afr_transaction_type type);
+afr_inodelk_t*
+afr_get_inodelk (afr_internal_lock_t *int_lock, char *dom);
+
int32_t
afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type);
+int
+afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending);
+
+void
+afr_set_delayed_post_op (call_frame_t *frame, xlator_t *this);
+
+void
+afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd);
+
+void
+__mark_all_success (call_frame_t *frame, xlator_t *this);
+
+gf_boolean_t
+afr_txn_nothing_failed (call_frame_t *frame, xlator_t *this);
+
+int afr_read_txn (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ afr_read_txn_wind_t readfn, afr_transaction_type type);
+
+int afr_read_txn_continue (call_frame_t *frame, xlator_t *this, int subvol);
+
+int __afr_txn_write_fop (call_frame_t *frame, xlator_t *this);
+int __afr_txn_write_done (call_frame_t *frame, xlator_t *this);
+call_frame_t *afr_transaction_detach_fop_frame (call_frame_t *frame);
+gf_boolean_t afr_has_quorum (unsigned char *subvols, xlator_t *this);
+gf_boolean_t afr_needs_changelog_update (afr_local_t *local);
+void afr_zero_fill_stat (afr_local_t *local);
+
+void
+afr_pick_error_xdata (afr_local_t *local, afr_private_t *priv,
+ inode_t *inode1, unsigned char *readable1,
+ inode_t *inode2, unsigned char *readable2);
#endif /* __TRANSACTION_H__ */
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index 4ae128bbb55..da62564e93a 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <libgen.h>
@@ -24,3069 +15,635 @@
#include <stdlib.h>
#include <signal.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "byte-order.h"
-#include "statedump.h"
-
-#include "fd.h"
-
-#include "afr-inode-read.h"
-#include "afr-inode-write.h"
-#include "afr-dir-read.h"
-#include "afr-dir-write.h"
-#include "afr-transaction.h"
-
-#include "afr-self-heal.h"
-
-#define AFR_ICTX_OPENDIR_DONE_MASK 0x0000000200000000ULL
-#define AFR_ICTX_SPLIT_BRAIN_MASK 0x0000000100000000ULL
-#define AFR_ICTX_READ_CHILD_MASK 0x00000000FFFFFFFFULL
-
-
-uint64_t
-afr_is_split_brain (xlator_t *this, inode_t *inode)
-{
- int ret = 0;
-
- uint64_t ctx = 0;
- uint64_t split_brain = 0;
-
- VALIDATE_OR_GOTO (inode, out);
+#include "afr-common.c"
+#include "afr-messages.h"
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx);
-
- if (ret < 0)
- goto unlock;
-
- split_brain = ctx & AFR_ICTX_SPLIT_BRAIN_MASK;
- }
-unlock:
- UNLOCK (&inode->lock);
-
-out:
- return split_brain;
-}
+struct volume_options options[];
+static char *afr_favorite_child_policies[AFR_FAV_CHILD_POLICY_MAX + 1] = {
+ [AFR_FAV_CHILD_NONE] = "none",
+ [AFR_FAV_CHILD_BY_SIZE] = "size",
+ [AFR_FAV_CHILD_BY_CTIME] = "ctime",
+ [AFR_FAV_CHILD_BY_MTIME] = "mtime",
+ [AFR_FAV_CHILD_BY_MAJORITY] = "majority",
+ [AFR_FAV_CHILD_POLICY_MAX] = NULL,
+};
-void
-afr_set_split_brain (xlator_t *this, inode_t *inode, gf_boolean_t set)
+int32_t
+notify (xlator_t *this, int32_t event,
+ void *data, ...)
{
- uint64_t ctx = 0;
- int ret = 0;
+ int ret = -1;
+ va_list ap;
+ void *data2 = NULL;
- VALIDATE_OR_GOTO (inode, out);
+ va_start (ap, data);
+ data2 = va_arg (ap, dict_t*);
+ va_end (ap);
+ ret = afr_notify (this, event, data, data2);
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx);
-
- if (ret < 0) {
- ctx = 0;
- }
-
- if (set) {
- ctx = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx)
- | (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_SPLIT_BRAIN_MASK);
- } else {
- ctx = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx);
- }
- __inode_ctx_put (inode, this, ctx);
- }
- UNLOCK (&inode->lock);
-out:
- return;
+ return ret;
}
-
-uint64_t
-afr_is_opendir_done (xlator_t *this, inode_t *inode)
+int32_t
+mem_acct_init (xlator_t *this)
{
- int ret = 0;
-
- uint64_t ctx = 0;
- uint64_t opendir_done = 0;
+ int ret = -1;
- VALIDATE_OR_GOTO (inode, out);
+ if (!this)
+ return ret;
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx);
+ ret = xlator_mem_acct_init (this, gf_afr_mt_end + 1);
- if (ret < 0)
- goto unlock;
-
- opendir_done = ctx & AFR_ICTX_OPENDIR_DONE_MASK;
+ if (ret != 0) {
+ return ret;
}
-unlock:
- UNLOCK (&inode->lock);
-out:
- return opendir_done;
+ return ret;
}
-void
-afr_set_opendir_done (xlator_t *this, inode_t *inode)
+int
+xlator_subvolume_index (xlator_t *this, xlator_t *subvol)
{
- uint64_t ctx = 0;
- int ret = 0;
+ int index = -1;
+ int i = 0;
+ xlator_list_t *list = NULL;
- VALIDATE_OR_GOTO (inode, out);
+ list = this->children;
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx);
-
- if (ret < 0) {
- ctx = 0;
+ while (list) {
+ if (subvol == list->xlator ||
+ strcmp (subvol->name, list->xlator->name) == 0) {
+ index = i;
+ break;
}
-
- ctx = (~AFR_ICTX_OPENDIR_DONE_MASK & ctx)
- | (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_OPENDIR_DONE_MASK);
-
- __inode_ctx_put (inode, this, ctx);
- }
- UNLOCK (&inode->lock);
-out:
- return;
-}
-
-
-uint64_t
-afr_read_child (xlator_t *this, inode_t *inode)
-{
- int ret = 0;
-
- uint64_t ctx = 0;
- uint64_t read_child = 0;
-
- VALIDATE_OR_GOTO (inode, out);
-
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx);
-
- if (ret < 0)
- goto unlock;
-
- read_child = ctx & AFR_ICTX_READ_CHILD_MASK;
+ list = list->next;
+ i++;
}
-unlock:
- UNLOCK (&inode->lock);
-out:
- return read_child;
+ return index;
}
-
-void
-afr_set_read_child (xlator_t *this, inode_t *inode, int32_t read_child)
-{
- uint64_t ctx = 0;
- int ret = 0;
-
- VALIDATE_OR_GOTO (inode, out);
-
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx);
-
- if (ret < 0) {
- ctx = 0;
- }
-
- ctx = (~AFR_ICTX_READ_CHILD_MASK & ctx)
- | (AFR_ICTX_READ_CHILD_MASK & read_child);
-
- __inode_ctx_put (inode, this, ctx);
+static void
+fix_quorum_options (xlator_t *this, afr_private_t *priv, char *qtype,
+ dict_t *options)
+{
+ if (dict_get (options, "quorum-type") == NULL) {
+ /* If user doesn't configure anything enable auto-quorum if the
+ * replica has odd number of subvolumes */
+ if (priv->child_count % 2)
+ qtype = "auto";
}
- UNLOCK (&inode->lock);
-
-out:
- return;
-}
-
-
-/**
- * afr_local_cleanup - cleanup everything in frame->local
- */
-
-void
-afr_local_sh_cleanup (afr_local_t *local, xlator_t *this)
-{
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
-
-
- sh = &local->self_heal;
- priv = this->private;
- if (sh->buf)
- GF_FREE (sh->buf);
-
- if (sh->xattr) {
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i]) {
- dict_unref (sh->xattr[i]);
- sh->xattr[i] = NULL;
- }
- }
- GF_FREE (sh->xattr);
- }
-
- if (sh->child_errno)
- GF_FREE (sh->child_errno);
-
- if (sh->pending_matrix) {
- for (i = 0; i < priv->child_count; i++) {
- GF_FREE (sh->pending_matrix[i]);
- }
- GF_FREE (sh->pending_matrix);
- }
-
- if (sh->delta_matrix) {
- for (i = 0; i < priv->child_count; i++) {
- GF_FREE (sh->delta_matrix[i]);
- }
- GF_FREE (sh->delta_matrix);
- }
-
- if (sh->sources)
- GF_FREE (sh->sources);
-
- if (sh->success)
- GF_FREE (sh->success);
-
- if (sh->locked_nodes)
- GF_FREE (sh->locked_nodes);
-
- if (sh->healing_fd && !sh->healing_fd_opened) {
- fd_unref (sh->healing_fd);
- sh->healing_fd = NULL;
- }
-
- if (sh->linkname)
- GF_FREE ((char *)sh->linkname);
-
- loc_wipe (&sh->parent_loc);
-}
-
-
-void
-afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this)
-{
- int i = 0;
- afr_private_t * priv = NULL;
-
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->pending && local->pending[i])
- GF_FREE (local->pending[i]);
+ if (priv->quorum_count && strcmp (qtype, "fixed")) {
+ gf_msg (this->name,GF_LOG_WARNING, 0, AFR_MSG_QUORUM_OVERRIDE,
+ "quorum-type %s overriding quorum-count %u",
+ qtype, priv->quorum_count);
}
- GF_FREE (local->pending);
-
- GF_FREE (local->transaction.locked_nodes);
- GF_FREE (local->transaction.child_errno);
- GF_FREE (local->child_errno);
-
- GF_FREE (local->transaction.basename);
- GF_FREE (local->transaction.new_basename);
-
- loc_wipe (&local->transaction.parent_loc);
- loc_wipe (&local->transaction.new_parent_loc);
-}
-
-
-void
-afr_local_cleanup (afr_local_t *local, xlator_t *this)
-{
- int i;
- afr_private_t * priv = NULL;
-
- if (!local)
- return;
-
- afr_local_sh_cleanup (local, this);
-
- afr_local_transaction_cleanup (local, this);
-
- priv = this->private;
-
- loc_wipe (&local->loc);
- loc_wipe (&local->newloc);
-
- if (local->fd)
- fd_unref (local->fd);
-
- if (local->xattr_req)
- dict_unref (local->xattr_req);
-
- GF_FREE (local->child_up);
-
- { /* lookup */
- if (local->cont.lookup.xattrs) {
- for (i = 0; i < priv->child_count; i++) {
- if (local->cont.lookup.xattrs[i]) {
- dict_unref (local->cont.lookup.xattrs[i]);
- local->cont.lookup.xattrs[i] = NULL;
- }
- }
- GF_FREE (local->cont.lookup.xattrs);
- local->cont.lookup.xattrs = NULL;
- }
-
- if (local->cont.lookup.xattr) {
- dict_unref (local->cont.lookup.xattr);
- }
-
- if (local->cont.lookup.inode) {
- inode_unref (local->cont.lookup.inode);
- }
- }
-
- { /* getxattr */
- if (local->cont.getxattr.name)
- GF_FREE (local->cont.getxattr.name);
- }
-
- { /* lk */
- if (local->cont.lk.locked_nodes)
- GF_FREE (local->cont.lk.locked_nodes);
- }
-
- { /* checksum */
- if (local->cont.checksum.file_checksum)
- GF_FREE (local->cont.checksum.file_checksum);
- if (local->cont.checksum.dir_checksum)
- GF_FREE (local->cont.checksum.dir_checksum);
- }
-
- { /* create */
- if (local->cont.create.fd)
- fd_unref (local->cont.create.fd);
- }
-
- { /* writev */
- GF_FREE (local->cont.writev.vector);
- }
-
- { /* setxattr */
- if (local->cont.setxattr.dict)
- dict_unref (local->cont.setxattr.dict);
- }
-
- { /* removexattr */
- GF_FREE (local->cont.removexattr.name);
- }
-
- { /* symlink */
- GF_FREE (local->cont.symlink.linkpath);
- }
-
- { /* opendir */
- if (local->cont.opendir.checksum)
- GF_FREE (local->cont.opendir.checksum);
+ if (!strcmp (qtype, "none")) {
+ priv->quorum_count = 0;
+ } else if (!strcmp (qtype, "auto")) {
+ priv->quorum_count = AFR_QUORUM_AUTO;
}
}
-
int
-afr_frame_return (call_frame_t *frame)
+afr_set_favorite_child_policy (afr_private_t *priv, char *policy)
{
- afr_local_t *local = NULL;
- int call_count = 0;
+ int index = -1;
- local = frame->local;
+ index = gf_get_index_by_elem (afr_favorite_child_policies, policy);
+ if (index < 0 || index >= AFR_FAV_CHILD_POLICY_MAX)
+ return -1;
- LOCK (&frame->lock);
- {
- call_count = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- return call_count;
-}
-
-
-/**
- * up_children_count - return the number of children that are up
- */
-
-int
-afr_up_children_count (int child_count, unsigned char *child_up)
-{
- int i = 0;
- int ret = 0;
-
- for (i = 0; i < child_count; i++)
- if (child_up[i])
- ret++;
- return ret;
-}
-
-
-int
-afr_locked_nodes_count (unsigned char *locked_nodes, int child_count)
-{
- int ret = 0;
- int i;
-
- for (i = 0; i < child_count; i++)
- if (locked_nodes[i])
- ret++;
-
- return ret;
-}
-
-
-ino64_t
-afr_itransform (ino64_t ino, int child_count, int child_index)
-{
- ino64_t scaled_ino = -1;
-
- if (ino == ((uint64_t) -1)) {
- scaled_ino = ((uint64_t) -1);
- goto out;
- }
-
- scaled_ino = (ino * child_count) + child_index;
-
-out:
- return scaled_ino;
-}
-
-
-int
-afr_deitransform_orig (ino64_t ino, int child_count)
-{
- int index = -1;
+ priv->fav_child_policy = index;
- index = ino % child_count;
-
- return index;
-}
-
-
-int
-afr_deitransform (ino64_t ino, int child_count)
-{
- return 0;
+ return 0;
}
-
-
int
-afr_self_heal_lookup_unwind (call_frame_t *frame, xlator_t *this)
+reconfigure (xlator_t *this, dict_t *options)
{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- if (local->govinda_gOvinda) {
- afr_set_split_brain (this, local->cont.lookup.inode, _gf_true);
- }
-
- AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
- local->cont.lookup.inode,
- &local->cont.lookup.buf,
- local->cont.lookup.xattr,
- &local->cont.lookup.postparent);
-
- return 0;
-}
-
+ afr_private_t *priv = NULL;
+ xlator_t *read_subvol = NULL;
+ int read_subvol_index = -1;
+ int ret = -1;
+ int index = -1;
+ char *qtype = NULL;
+ char *fav_child_policy = NULL;
-static void
-afr_lookup_collect_xattr (afr_local_t *local, xlator_t *this,
- int child_index, dict_t *xattr)
-{
- uint32_t open_fd_count = 0;
- uint32_t inodelk_count = 0;
- uint32_t entrylk_count = 0;
+ priv = this->private;
- int ret = 0;
+ GF_OPTION_RECONF ("afr-dirty-xattr",
+ priv->afr_dirty, options, str,
+ out);
- if (afr_sh_has_metadata_pending (xattr, child_index, this))
- local->self_heal.need_metadata_self_heal = _gf_true;
+ GF_OPTION_RECONF ("metadata-splitbrain-forced-heal",
+ priv->metadata_splitbrain_forced_heal, options, bool,
+ out);
- if (afr_sh_has_entry_pending (xattr, child_index, this))
- local->self_heal.need_entry_self_heal = _gf_true;
+ GF_OPTION_RECONF ("background-self-heal-count",
+ priv->background_self_heal_count, options, uint32,
+ out);
- if (afr_sh_has_data_pending (xattr, child_index, this))
- local->self_heal.need_data_self_heal = _gf_true;
+ GF_OPTION_RECONF ("heal-wait-queue-length",
+ priv->heal_wait_qlen, options, uint32, out);
- ret = dict_get_uint32 (xattr, GLUSTERFS_OPEN_FD_COUNT,
- &open_fd_count);
- if (ret == 0)
- local->open_fd_count += open_fd_count;
- ret = dict_get_uint32 (xattr, GLUSTERFS_INODELK_COUNT,
- &inodelk_count);
- if (ret == 0)
- local->inodelk_count += inodelk_count;
+ GF_OPTION_RECONF ("metadata-self-heal",
+ priv->metadata_self_heal, options, bool, out);
- ret = dict_get_uint32 (xattr, GLUSTERFS_ENTRYLK_COUNT,
- &entrylk_count);
- if (ret == 0)
- local->entrylk_count += entrylk_count;
-}
+ GF_OPTION_RECONF ("data-self-heal", priv->data_self_heal, options, str,
+ out);
+ GF_OPTION_RECONF ("entry-self-heal", priv->entry_self_heal, options,
+ bool, out);
-static void
-afr_lookup_self_heal_check (afr_local_t *local, struct iatt *buf,
- struct iatt *lookup_buf)
-{
- if (FILETYPE_DIFFERS (buf, lookup_buf)) {
- /* mismatching filetypes with same name
- -- Govinda !! GOvinda !!!
- */
+ GF_OPTION_RECONF ("data-self-heal-window-size",
+ priv->data_self_heal_window_size, options,
+ uint32, out);
- gf_log ("afr", GF_LOG_TRACE,
- "file %s is govinda!", local->loc.path);
+ GF_OPTION_RECONF ("data-change-log", priv->data_change_log, options,
+ bool, out);
- local->govinda_gOvinda = 1;
- }
+ GF_OPTION_RECONF ("metadata-change-log",
+ priv->metadata_change_log, options, bool, out);
- if (PERMISSION_DIFFERS (buf, lookup_buf)) {
- /* mismatching permissions */
- local->self_heal.need_metadata_self_heal = _gf_true;
- }
+ GF_OPTION_RECONF ("entry-change-log", priv->entry_change_log, options,
+ bool, out);
- if (OWNERSHIP_DIFFERS (buf, lookup_buf)) {
- /* mismatching permissions */
- local->self_heal.need_metadata_self_heal = _gf_true;
- }
-
- if (SIZE_DIFFERS (buf, lookup_buf)
- && IA_ISREG (buf->ia_type)) {
- local->self_heal.need_data_self_heal = _gf_true;
- }
+ GF_OPTION_RECONF ("data-self-heal-algorithm",
+ priv->data_self_heal_algorithm, options, str, out);
-}
+ GF_OPTION_RECONF ("read-subvolume", read_subvol, options, xlator, out);
+ GF_OPTION_RECONF ("read-hash-mode", priv->hash_mode,
+ options, uint32, out);
-static void
-afr_lookup_done (call_frame_t *frame, xlator_t *this, struct iatt *lookup_buf)
-{
- int unwind = 1;
- int source = -1;
-
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- local->cont.lookup.postparent.ia_ino = local->cont.lookup.parent_ino;
-
- if (local->cont.lookup.ino) {
- local->cont.lookup.buf.ia_ino = local->cont.lookup.ino;
- local->cont.lookup.buf.ia_gen = local->cont.lookup.gen;
- }
-
- if (local->op_ret == 0) {
- /* KLUDGE: assuming DHT will not itransform in
- revalidate */
- if (local->cont.lookup.inode->ino) {
- local->cont.lookup.buf.ia_ino =
- local->cont.lookup.inode->ino;
- local->cont.lookup.buf.ia_gen =
- local->cont.lookup.inode->generation;
+ if (read_subvol) {
+ index = xlator_subvolume_index (this, read_subvol);
+ if (index == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_INVALID_SUBVOL, "%s not a subvolume",
+ read_subvol->name);
+ goto out;
}
+ priv->read_child = index;
}
- if (local->success_count && local->enoent_count) {
- local->self_heal.need_metadata_self_heal = _gf_true;
- local->self_heal.need_data_self_heal = _gf_true;
- local->self_heal.need_entry_self_heal = _gf_true;
- }
-
- if (local->success_count) {
- /* check for split-brain case in previous lookup */
- if (afr_is_split_brain (this,
- local->cont.lookup.inode))
- local->self_heal.need_data_self_heal = _gf_true;
- }
-
- if ((local->self_heal.need_metadata_self_heal
- || local->self_heal.need_data_self_heal
- || local->self_heal.need_entry_self_heal)
- && ((!local->cont.lookup.is_revalidate)
- || (local->op_ret != -1))) {
-
- if (local->open_fd_count
- || local->inodelk_count
- || local->entrylk_count) {
-
- /* Someone else is doing self-heal on this file.
- So just make a best effort to set the read-subvolume
- and return */
-
- if (IA_ISREG (local->cont.lookup.inode->ia_type)) {
- source = afr_self_heal_get_source (this, local, local->cont.lookup.xattrs);
-
- if (source >= 0) {
- afr_set_read_child (this,
- local->cont.lookup.inode,
- source);
- }
- }
- } else {
- if (!local->cont.lookup.inode->ia_type) {
- /* fix for RT #602 */
- local->cont.lookup.inode->ia_type =
- lookup_buf->ia_type;
- }
+ GF_OPTION_RECONF ("read-subvolume-index",read_subvol_index, options,int32,out);
- local->self_heal.background = _gf_true;
- local->self_heal.type = local->cont.lookup.buf.ia_type;
- local->self_heal.unwind = afr_self_heal_lookup_unwind;
-
- unwind = 0;
-
- afr_self_heal (frame, this);
+ if (read_subvol_index >-1) {
+ index=read_subvol_index;
+ if (index >= priv->child_count) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_INVALID_SUBVOL,
+ "%d not a subvolume-index", index);
+ goto out;
}
+ priv->read_child = index;
}
- if (unwind) {
- AFR_STACK_UNWIND (lookup, frame, local->op_ret,
- local->op_errno,
- local->cont.lookup.inode,
- &local->cont.lookup.buf,
- local->cont.lookup.xattr,
- &local->cont.lookup.postparent);
- }
-}
+ GF_OPTION_RECONF ("pre-op-compat", priv->pre_op_compat, options, bool,
+ out);
+ GF_OPTION_RECONF ("locking-scheme", priv->locking_scheme, options, str,
+ out);
+ GF_OPTION_RECONF ("granular-entry-heal", priv->esh_granular, options,
+ bool, out);
+ GF_OPTION_RECONF ("eager-lock", priv->eager_lock, options, bool, out);
+ GF_OPTION_RECONF ("quorum-type", qtype, options, str, out);
+ GF_OPTION_RECONF ("quorum-count", priv->quorum_count, options,
+ uint32, out);
+ fix_quorum_options (this, priv, qtype, options);
+ if (priv->quorum_count && !afr_has_quorum (priv->child_up, this))
+ gf_msg (this->name, GF_LOG_WARNING, 0, AFR_MSG_QUORUM_FAIL,
+ "Client-quorum is not met");
-/*
- * During a lookup, some errors are more "important" than
- * others in that they must be given higher priority while
- * returning to the user.
- *
- * The hierarchy is ESTALE > ENOENT > others
- *
- */
-
-static gf_boolean_t
-__error_more_important (int32_t old_errno, int32_t new_errno)
-{
- gf_boolean_t ret = _gf_true;
- /* Nothing should ever overwrite ESTALE */
- if (old_errno == ESTALE)
- ret = _gf_false;
+ GF_OPTION_RECONF ("post-op-delay-secs", priv->post_op_delay_secs, options,
+ uint32, out);
- /* Nothing should overwrite ENOENT, except ESTALE */
- else if ((old_errno == ENOENT) && (new_errno != ESTALE))
- ret = _gf_false;
-
- return ret;
-}
+ GF_OPTION_RECONF (AFR_SH_READDIR_SIZE_KEY, priv->sh_readdir_size,
+ options, size_uint64, out);
+ /* Reset this so we re-discover in case the topology changed. */
+ GF_OPTION_RECONF ("ensure-durability", priv->ensure_durability, options,
+ bool, out);
+ GF_OPTION_RECONF ("self-heal-daemon", priv->shd.enabled, options,
+ bool, out);
-int
-afr_fresh_lookup_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
-{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- struct iatt * lookup_buf = NULL;
+ GF_OPTION_RECONF ("iam-self-heal-daemon", priv->shd.iamshd, options,
+ bool, out);
- int call_count = -1;
- int child_index = -1;
- int first_up_child = -1;
+ GF_OPTION_RECONF ("heal-timeout", priv->shd.timeout, options,
+ int32, out);
- child_index = (long) cookie;
- priv = this->private;
+ GF_OPTION_RECONF ("quorum-reads", priv->quorum_reads, options,
+ bool, out);
+ GF_OPTION_RECONF ("consistent-metadata", priv->consistent_metadata,
+ options, bool, out);
- LOCK (&frame->lock);
- {
- local = frame->local;
+ GF_OPTION_RECONF ("shd-max-threads", priv->shd.max_threads,
+ options, uint32, out);
- lookup_buf = &local->cont.lookup.buf;
+ GF_OPTION_RECONF ("shd-wait-qlength", priv->shd.wait_qlength,
+ options, uint32, out);
- if (op_ret == -1) {
- if (op_errno == ENOENT)
- local->enoent_count++;
-
- if (__error_more_important (local->op_errno, op_errno))
- local->op_errno = op_errno;
-
- if (local->op_errno == ESTALE) {
- local->op_ret = -1;
- }
-
- goto unlock;
- }
-
- afr_lookup_collect_xattr (local, this, child_index, xattr);
-
- first_up_child = afr_first_up_child (priv);
-
- if (child_index == first_up_child) {
- local->cont.lookup.ino =
- afr_itransform (buf->ia_ino,
- priv->child_count,
- first_up_child);
- local->cont.lookup.gen = buf->ia_gen;
- }
-
- if (local->success_count == 0) {
- if (local->op_errno != ESTALE)
- local->op_ret = op_ret;
-
- local->cont.lookup.inode = inode_ref (inode);
- local->cont.lookup.xattr = dict_ref (xattr);
- local->cont.lookup.xattrs[child_index] = dict_ref (xattr);
- local->cont.lookup.postparent = *postparent;
-
- *lookup_buf = *buf;
-
- lookup_buf->ia_ino = afr_itransform (buf->ia_ino,
- priv->child_count,
- child_index);
- if (priv->read_child >= 0) {
- afr_set_read_child (this,
- local->cont.lookup.inode,
- priv->read_child);
- } else {
- afr_set_read_child (this,
- local->cont.lookup.inode,
- child_index);
- }
-
- } else {
- afr_lookup_self_heal_check (local, buf, lookup_buf);
-
- if (child_index == local->read_child_index) {
- /*
- lookup has succeeded on the read child.
- So use its inode number
- */
- if (local->cont.lookup.xattr)
- dict_unref (local->cont.lookup.xattr);
-
- local->cont.lookup.xattr = dict_ref (xattr);
- local->cont.lookup.xattrs[child_index] = dict_ref (xattr);
- local->cont.lookup.postparent = *postparent;
-
- *lookup_buf = *buf;
-
- if (priv->read_child >= 0) {
- afr_set_read_child (this,
- local->cont.lookup.inode,
- priv->read_child);
- } else {
- afr_set_read_child (this,
- local->cont.lookup.inode,
- local->read_child_index);
- }
- }
-
- }
-
- local->success_count++;
- }
-unlock:
- UNLOCK (&frame->lock);
+ GF_OPTION_RECONF ("favorite-child-policy", fav_child_policy, options,
+ str, out);
+ if (afr_set_favorite_child_policy (priv, fav_child_policy) == -1)
+ goto out;
- call_count = afr_frame_return (frame);
+ priv->did_discovery = _gf_false;
- if (call_count == 0) {
- afr_lookup_done (frame, this, lookup_buf);
- }
+ ret = 0;
+out:
+ return ret;
- return 0;
}
-int
-afr_revalidate_lookup_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
-{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- struct iatt * lookup_buf = NULL;
-
- int call_count = -1;
- int child_index = -1;
- int first_up_child = -1;
-
- child_index = (long) cookie;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- local = frame->local;
-
- lookup_buf = &local->cont.lookup.buf;
-
- if (op_ret == -1) {
- if (op_errno == ENOENT)
- local->enoent_count++;
-
- if (__error_more_important (local->op_errno, op_errno))
- local->op_errno = op_errno;
-
- if (local->op_errno == ESTALE) {
- local->op_ret = -1;
- }
-
- goto unlock;
- }
-
- afr_lookup_collect_xattr (local, this, child_index, xattr);
-
- first_up_child = afr_first_up_child (priv);
-
- if (child_index == first_up_child) {
- local->cont.lookup.ino =
- afr_itransform (buf->ia_ino,
- priv->child_count,
- first_up_child);
- local->cont.lookup.gen = buf->ia_gen;
- }
-
- /* in case of revalidate, we need to send stat of the
- * child whose stat was sent during the first lookup.
- * (so that time stamp does not vary with revalidate.
- * in case it is down, stat of the fist success will
- * be replied */
-
- /* inode number should be preserved across revalidates */
-
- if (local->success_count == 0) {
- if (local->op_errno != ESTALE)
- local->op_ret = op_ret;
-
- local->cont.lookup.inode = inode_ref (inode);
- local->cont.lookup.xattr = dict_ref (xattr);
- local->cont.lookup.xattrs[child_index] = dict_ref (xattr);
- local->cont.lookup.postparent = *postparent;
-
- *lookup_buf = *buf;
-
- lookup_buf->ia_ino = afr_itransform (buf->ia_ino,
- priv->child_count,
- child_index);
-
- if (priv->read_child >= 0) {
- afr_set_read_child (this,
- local->cont.lookup.inode,
- priv->read_child);
- } else {
- afr_set_read_child (this,
- local->cont.lookup.inode,
- child_index);
- }
-
- } else {
- afr_lookup_self_heal_check (local, buf, lookup_buf);
-
- if (child_index == local->read_child_index) {
-
- /*
- lookup has succeeded on the read child.
- So use its inode number
- */
-
- if (local->cont.lookup.xattr)
- dict_unref (local->cont.lookup.xattr);
-
- local->cont.lookup.xattr = dict_ref (xattr);
- local->cont.lookup.xattrs[child_index] = dict_ref (xattr);
- local->cont.lookup.postparent = *postparent;
-
- *lookup_buf = *buf;
-
- if (priv->read_child >= 0) {
- afr_set_read_child (this,
- local->cont.lookup.inode,
- priv->read_child);
- } else {
- afr_set_read_child (this,
- local->cont.lookup.inode,
- local->read_child_index);
- }
- }
-
- }
-
- local->success_count++;
- }
-unlock:
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- afr_lookup_done (frame, this, lookup_buf);
- }
-
- return 0;
-}
+static const char *favorite_child_warning_str = "You have specified subvolume '%s' "
+ "as the 'favorite child'. This means that if a discrepancy in the content "
+ "or attributes (ownership, permission, etc.) of a file is detected among "
+ "the subvolumes, the file on '%s' will be considered the definitive "
+ "version and its contents will OVERWRITE the contents of the file on other "
+ "subvolumes. All versions of the file except that on '%s' "
+ "WILL BE LOST.";
-int
-afr_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr_req)
+static int
+afr_pending_xattrs_init (afr_private_t *priv, xlator_t *this)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int ret = -1;
- int i = 0;
-
- fop_lookup_cbk_t callback;
-
- int call_count = 0;
-
- uint64_t ctx;
+ int ret = -1;
+ int i = 0;
+ char *ptr = NULL;
+ char *ptr1 = NULL;
+ char *xattrs_list = NULL;
+ xlator_list_t *trav = NULL;
- int32_t op_errno = 0;
+ trav = this->children;
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- local->op_ret = -1;
-
- frame->local = local;
-
- if (!strcmp (loc->path, "/" GF_REPLICATE_TRASH_DIR)) {
- op_errno = ENOENT;
+ GF_OPTION_INIT ("afr-pending-xattr", xattrs_list, str, out);
+ priv->pending_key = GF_CALLOC (sizeof (*priv->pending_key),
+ priv->child_count, gf_afr_mt_char);
+ if (!priv->pending_key) {
+ ret = -ENOMEM;
goto out;
}
-
- loc_copy (&local->loc, loc);
-
- ret = inode_ctx_get (loc->inode, this, &ctx);
- if (ret == 0) {
- /* lookup is a revalidate */
-
- callback = afr_revalidate_lookup_cbk;
-
- local->cont.lookup.is_revalidate = _gf_true;
- local->read_child_index = afr_read_child (this,
- loc->inode);
- } else {
- callback = afr_fresh_lookup_cbk;
-
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
+ if (!xattrs_list) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, AFR_MSG_NO_CHANGELOG,
+ "Unable to fetch afr-pending-xattr option from volfile."
+ " Falling back to using client translator names. ");
+
+ while (i < priv->child_count) {
+ ret = gf_asprintf (&priv->pending_key[i], "%s.%s",
+ AFR_XATTR_PREFIX,
+ trav->xlator->name);
+ if (ret == -1) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ trav = trav->next;
+ i++;
}
- UNLOCK (&priv->read_child_lock);
- }
-
- if (loc->parent)
- local->cont.lookup.parent_ino = loc->parent->ino;
-
- local->child_up = memdup (priv->child_up, priv->child_count);
-
- local->cont.lookup.xattrs = GF_CALLOC (priv->child_count,
- sizeof (*local->cont.lookup.xattr),
- gf_afr_mt_dict_t);
-
- local->call_count = afr_up_children_count (priv->child_count,
- local->child_up);
- call_count = local->call_count;
-
- if (local->call_count == 0) {
- ret = -1;
- op_errno = ENOTCONN;
+ ret = 0;
goto out;
}
- /* By default assume ENOTCONN. On success it will be set to 0. */
- local->op_errno = ENOTCONN;
-
- if (xattr_req == NULL)
- local->xattr_req = dict_new ();
- else
- local->xattr_req = dict_ref (xattr_req);
-
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_uint64 (local->xattr_req, priv->pending_key[i],
- 3 * sizeof(int32_t));
-
- /* 3 = data+metadata+entry */
+ ptr = ptr1 = gf_strdup (xattrs_list);
+ if (!ptr) {
+ ret = -ENOMEM;
+ goto out;
}
-
- ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_OPEN_FD_COUNT, 0);
- ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_INODELK_COUNT, 0);
- ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_ENTRYLK_COUNT, 0);
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, callback, (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->lookup,
- loc, local->xattr_req);
- if (!--call_count)
- break;
- }
- }
-
- ret = 0;
-out:
- if (ret == -1)
- AFR_STACK_UNWIND (lookup, frame, -1, op_errno,
- NULL, NULL, NULL, NULL);
-
- return 0;
-}
-
-
-/* {{{ open */
-
-int
-afr_fd_ctx_set (xlator_t *this, fd_t *fd)
-{
- afr_private_t * priv = NULL;
-
- int op_ret = 0;
- int ret = 0;
-
- uint64_t ctx;
- afr_fd_ctx_t * fd_ctx = NULL;
-
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (fd, out);
-
- priv = this->private;
-
- LOCK (&fd->lock);
- {
- ret = __fd_ctx_get (fd, this, &ctx);
-
- if (ret == 0)
- goto unlock;
-
- fd_ctx = GF_CALLOC (1, sizeof (afr_fd_ctx_t),
- gf_afr_mt_afr_fd_ctx_t);
- if (!fd_ctx) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
-
- op_ret = -ENOMEM;
- goto unlock;
- }
-
- fd_ctx->pre_op_done = GF_CALLOC (sizeof (*fd_ctx->pre_op_done),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->pre_op_done) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_ret = -ENOMEM;
- goto unlock;
- }
-
- fd_ctx->opened_on = GF_CALLOC (sizeof (*fd_ctx->opened_on),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->opened_on) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_ret = -ENOMEM;
- goto unlock;
- }
-
- fd_ctx->child_failed = GF_CALLOC (
- sizeof (*fd_ctx->child_failed),
- priv->child_count,
- gf_afr_mt_char);
-
- if (!fd_ctx->child_failed) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
-
- op_ret = -ENOMEM;
- goto unlock;
- }
-
- fd_ctx->up_count = priv->up_count;
- fd_ctx->down_count = priv->down_count;
-
- ret = __fd_ctx_set (fd, this, (uint64_t)(long) fd_ctx);
- if (ret < 0) {
- op_ret = ret;
+ for (i = 0, ptr = strtok (ptr, ","); ptr; ptr = strtok (NULL, ",")) {
+ ret = gf_asprintf (&priv->pending_key[i], "%s.%s",
+ AFR_XATTR_PREFIX, ptr);
+ if (ret == -1) {
+ ret = -ENOMEM;
+ goto out;
}
-
- INIT_LIST_HEAD (&fd_ctx->entries);
+ i++;
}
-unlock:
- UNLOCK (&fd->lock);
+ ret = 0;
+
out:
+ GF_FREE (ptr1);
return ret;
-}
-
-/* {{{ flush */
-
-int
-afr_flush_unwind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- call_frame_t *main_frame = NULL;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
- if (main_frame) {
- AFR_STACK_UNWIND (flush, main_frame,
- local->op_ret, local->op_errno);
- }
-
- return 0;
}
-
-int
-afr_flush_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int child_index = (long) cookie;
- int need_unwind = 0;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
-
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- if (need_unwind)
- afr_flush_unwind (frame, this);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
-}
-
-
-int
-afr_flush_wind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int i = 0;
- int call_count = -1;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_flush_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->flush,
- local->fd);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
-
-
-int
-afr_flush_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
- return 0;
-}
-
-
-int
-afr_plain_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-
+int32_t
+init (xlator_t *this)
{
- afr_local_t *local = NULL;
-
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
+ afr_private_t *priv = NULL;
+ int child_count = 0;
+ xlator_list_t *trav = NULL;
+ int i = 0;
+ int ret = -1;
+ GF_UNUSED int op_errno = 0;
+ xlator_t *read_subvol = NULL;
+ int read_subvol_index = -1;
+ xlator_t *fav_child = NULL;
+ char *qtype = NULL;
+ char *fav_child_policy = NULL;
+
+ if (!this->children) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_CHILD_MISCONFIGURED,
+ "replicate translator needs more than one "
+ "subvolume defined.");
+ return -1;
+ }
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ if (!this->parents) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_VOL_MISCONFIGURED, "Volume is dangling.");
+ }
- call_count = afr_frame_return (frame);
+ this->private = GF_CALLOC (1, sizeof (afr_private_t),
+ gf_afr_mt_afr_private_t);
+ if (!this->private)
+ goto out;
- if (call_count == 0)
- AFR_STACK_UNWIND (flush, frame, local->op_ret, local->op_errno);
+ priv = this->private;
+ LOCK_INIT (&priv->lock);
- return 0;
-}
+ child_count = xlator_subvolume_count (this);
+ priv->child_count = child_count;
-static int
-__no_pre_op_done (xlator_t *this, fd_t *fd)
-{
- int i = 0;
- int op_ret = 1;
+ priv->read_child = -1;
- int _ret = 0;
- uint64_t ctx;
- afr_fd_ctx_t * fd_ctx = NULL;
+ GF_OPTION_INIT ("arbiter-count", priv->arbiter_count, uint32, out);
+ INIT_LIST_HEAD (&priv->healing);
+ INIT_LIST_HEAD (&priv->heal_waiting);
- afr_private_t *priv = NULL;
+ priv->spb_choice_timeout = AFR_DEFAULT_SPB_CHOICE_TIMEOUT;
- priv = this->private;
+ GF_OPTION_INIT ("afr-dirty-xattr", priv->afr_dirty, str, out);
- LOCK (&fd->lock);
- {
- _ret = __fd_ctx_get (fd, this, &ctx);
+ GF_OPTION_INIT ("metadata-splitbrain-forced-heal",
+ priv->metadata_splitbrain_forced_heal, bool, out);
- if (_ret < 0) {
+ GF_OPTION_INIT ("read-subvolume", read_subvol, xlator, out);
+ if (read_subvol) {
+ priv->read_child = xlator_subvolume_index (this, read_subvol);
+ if (priv->read_child == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_INVALID_SUBVOL, "%s not a subvolume",
+ read_subvol->name);
goto out;
}
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- for (i = 0; i < priv->child_count; i++) {
- if (fd_ctx->pre_op_done[i]) {
- op_ret = 0;
- break;
- }
- }
}
-out:
- UNLOCK (&fd->lock);
-
- return op_ret;
-}
-
-
-int
-afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
-
- call_frame_t * transaction_frame = NULL;
-
- int ret = -1;
-
- int op_ret = -1;
- int op_errno = 0;
-
- int i = 0;
- int call_count = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (__no_pre_op_done (this, fd)) {
- frame->local = local;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_plain_flush_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->flush,
- fd);
- if (!--call_count)
- break;
- }
+ GF_OPTION_INIT ("read-subvolume-index",read_subvol_index,int32,out);
+ if (read_subvol_index > -1) {
+ if (read_subvol_index >= priv->child_count) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_INVALID_SUBVOL,
+ "%d not a subvolume-index", read_subvol_index);
+ goto out;
}
- } else {
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
+ priv->read_child = read_subvol_index;
+ }
+ GF_OPTION_INIT ("choose-local", priv->choose_local, bool, out);
+
+ GF_OPTION_INIT ("read-hash-mode", priv->hash_mode, uint32, out);
+
+ priv->favorite_child = -1;
+ GF_OPTION_INIT ("favorite-child", fav_child, xlator, out);
+ if (fav_child) {
+ priv->favorite_child = xlator_subvolume_index (this, fav_child);
+ if (priv->favorite_child == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_INVALID_SUBVOL, "%s not a subvolume, "
+ "cannot set it as favorite child",
+ fav_child->name);
goto out;
}
-
- transaction_frame->local = local;
-
- local->op = GF_FOP_FLUSH;
-
- local->transaction.fop = afr_flush_wind;
- local->transaction.done = afr_flush_done;
- local->transaction.unwind = afr_flush_unwind;
-
- local->fd = fd_ref (fd);
-
- local->transaction.main_frame = frame;
- local->transaction.start = 0;
- local->transaction.len = 0;
-
- afr_transaction (transaction_frame, this, AFR_FLUSH_TRANSACTION);
+ gf_msg (this->name, GF_LOG_WARNING, 0, AFR_MSG_FAVORITE_CHILD,
+ favorite_child_warning_str, fav_child->name,
+ fav_child->name, fav_child->name);
}
- op_ret = 0;
-out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
-
- AFR_STACK_UNWIND (flush, frame, op_ret, op_errno);
- }
-
- return 0;
-}
-
-/* }}} */
-
-
-int
-afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd)
-{
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- int ret = 0;
-
- ret = fd_ctx_get (fd, this, &ctx);
-
- if (ret < 0)
+ GF_OPTION_INIT ("favorite-child-policy", fav_child_policy, str, out);
+ if (afr_set_favorite_child_policy(priv, fav_child_policy) == -1)
goto out;
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ GF_OPTION_INIT ("shd-max-threads", priv->shd.max_threads,
+ uint32, out);
- if (fd_ctx) {
- if (fd_ctx->child_failed)
- GF_FREE (fd_ctx->child_failed);
+ GF_OPTION_INIT ("shd-wait-qlength", priv->shd.wait_qlength,
+ uint32, out);
- if (fd_ctx->pre_op_done)
- GF_FREE (fd_ctx->pre_op_done);
+ GF_OPTION_INIT ("background-self-heal-count",
+ priv->background_self_heal_count, uint32, out);
- if (fd_ctx->opened_on)
- GF_FREE (fd_ctx->opened_on);
+ GF_OPTION_INIT ("heal-wait-queue-length",
+ priv->heal_wait_qlen, uint32, out);
- GF_FREE (fd_ctx);
- }
-
-out:
- return 0;
-}
+ GF_OPTION_INIT ("data-self-heal", priv->data_self_heal, str, out);
+ GF_OPTION_INIT ("data-self-heal-algorithm",
+ priv->data_self_heal_algorithm, str, out);
-int
-afr_release (xlator_t *this, fd_t *fd)
-{
- afr_cleanup_fd_ctx (this, fd);
+ GF_OPTION_INIT ("data-self-heal-window-size",
+ priv->data_self_heal_window_size, uint32, out);
- return 0;
-}
+ GF_OPTION_INIT ("metadata-self-heal", priv->metadata_self_heal, bool,
+ out);
+ GF_OPTION_INIT ("entry-self-heal", priv->entry_self_heal, bool, out);
-/* {{{ fsync */
+ GF_OPTION_INIT ("data-change-log", priv->data_change_log, bool, out);
-int
-afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
-{
- afr_local_t *local = NULL;
-
- int call_count = -1;
+ GF_OPTION_INIT ("metadata-change-log", priv->metadata_change_log, bool,
+ out);
- int child_index = (long) cookie;
- int read_child = 0;
+ GF_OPTION_INIT ("entry-change-log", priv->entry_change_log, bool, out);
- local = frame->local;
+ GF_OPTION_INIT ("optimistic-change-log", priv->optimistic_change_log,
+ bool, out);
- read_child = afr_read_child (this, local->fd->inode);
+ GF_OPTION_INIT ("inodelk-trace", priv->inodelk_trace, bool, out);
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
+ GF_OPTION_INIT ("entrylk-trace", priv->entrylk_trace, bool, out);
- if (op_ret == 0) {
- local->op_ret = 0;
+ GF_OPTION_INIT ("pre-op-compat", priv->pre_op_compat, bool, out);
+ GF_OPTION_INIT ("locking-scheme", priv->locking_scheme, str, out);
+ GF_OPTION_INIT ("granular-entry-heal", priv->esh_granular, bool, out);
- if (local->success_count == 0) {
- local->cont.fsync.prebuf = *prebuf;
- local->cont.fsync.postbuf = *postbuf;
- }
+ GF_OPTION_INIT ("eager-lock", priv->eager_lock, bool, out);
+ GF_OPTION_INIT ("quorum-type", qtype, str, out);
+ GF_OPTION_INIT ("quorum-count", priv->quorum_count, uint32, out);
+ GF_OPTION_INIT (AFR_SH_READDIR_SIZE_KEY, priv->sh_readdir_size, size_uint64,
+ out);
+ fix_quorum_options (this, priv, qtype, this->options);
- if (child_index == read_child) {
- local->cont.fsync.prebuf = *prebuf;
- local->cont.fsync.postbuf = *postbuf;
- }
+ GF_OPTION_INIT ("post-op-delay-secs", priv->post_op_delay_secs, uint32, out);
+ GF_OPTION_INIT ("ensure-durability", priv->ensure_durability, bool,
+ out);
- local->success_count++;
- }
+ GF_OPTION_INIT ("self-heal-daemon", priv->shd.enabled, bool, out);
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ GF_OPTION_INIT ("iam-self-heal-daemon", priv->shd.iamshd, bool, out);
+ GF_OPTION_INIT ("heal-timeout", priv->shd.timeout, int32, out);
- call_count = afr_frame_return (frame);
+ GF_OPTION_INIT ("quorum-reads", priv->quorum_reads, bool, out);
+ GF_OPTION_INIT ("consistent-metadata", priv->consistent_metadata, bool,
+ out);
- if (call_count == 0) {
- local->cont.fsync.prebuf.ia_ino = local->cont.fsync.ino;
- local->cont.fsync.postbuf.ia_ino = local->cont.fsync.ino;
+ priv->wait_count = 1;
- AFR_STACK_UNWIND (fsync, frame, local->op_ret, local->op_errno,
- &local->cont.fsync.prebuf,
- &local->cont.fsync.postbuf);
+ priv->local = GF_CALLOC (sizeof (unsigned char), child_count,
+ gf_afr_mt_char);
+ if (!priv->local) {
+ ret = -ENOMEM;
+ goto out;
}
- return 0;
-}
-
-
-int
-afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t datasync)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int ret = -1;
-
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- call_count = local->call_count;
- frame->local = local;
-
- local->fd = fd_ref (fd);
- local->cont.fsync.ino = fd->inode->ino;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_fsync_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fsync,
- fd, datasync);
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (fsync, frame, op_ret, op_errno, NULL, NULL);
- }
- return 0;
-}
-
-/* }}} */
-
-/* {{{ fsync */
-
-int32_t
-afr_fsyncdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-{
- afr_local_t *local = NULL;
-
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (fsyncdir, frame, local->op_ret,
- local->op_errno);
-
- return 0;
-}
-
-
-int32_t
-afr_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t datasync)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int ret = -1;
-
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- call_count = local->call_count;
- frame->local = local;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_fsyncdir_cbk,
- priv->children[i],
- priv->children[i]->fops->fsyncdir,
- fd, datasync);
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (fsyncdir, frame, op_ret, op_errno);
- }
- return 0;
-}
-
-/* }}} */
-
-/* {{{ xattrop */
-
-int32_t
-afr_xattrop_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr)
-{
- afr_local_t *local = NULL;
-
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (xattrop, frame, local->op_ret, local->op_errno,
- xattr);
-
- return 0;
-}
-
-
-int32_t
-afr_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t optype, dict_t *xattr)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int ret = -1;
-
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- call_count = local->call_count;
- frame->local = local;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_xattrop_cbk,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- loc, optype, xattr);
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (xattrop, frame, op_ret, op_errno, NULL);
- }
- return 0;
-}
-
-/* }}} */
-
-/* {{{ fxattrop */
-
-int32_t
-afr_fxattrop_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr)
-{
- afr_local_t *local = NULL;
-
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (fxattrop, frame, local->op_ret, local->op_errno,
- xattr);
-
- return 0;
-}
-
-
-int32_t
-afr_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t optype, dict_t *xattr)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int ret = -1;
-
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- call_count = local->call_count;
- frame->local = local;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_fxattrop_cbk,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- fd, optype, xattr);
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, NULL);
- }
- return 0;
-}
-
-/* }}} */
-
-
-int32_t
-afr_inodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
-{
- afr_local_t *local = NULL;
-
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (inodelk, frame, local->op_ret,
- local->op_errno);
-
- return 0;
-}
-
-
-int32_t
-afr_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct flock *flock)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int ret = -1;
-
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- call_count = local->call_count;
- frame->local = local;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_inodelk_cbk,
- priv->children[i],
- priv->children[i]->fops->inodelk,
- volume, loc, cmd, flock);
-
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (inodelk, frame, op_ret, op_errno);
- }
- return 0;
-}
-
-
-int32_t
-afr_finodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
-{
- afr_local_t *local = NULL;
-
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (finodelk, frame, local->op_ret,
- local->op_errno);
-
- return 0;
-}
-
-
-int32_t
-afr_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct flock *flock)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int ret = -1;
-
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- call_count = local->call_count;
- frame->local = local;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_finodelk_cbk,
- priv->children[i],
- priv->children[i]->fops->finodelk,
- volume, fd, cmd, flock);
-
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (finodelk, frame, op_ret, op_errno);
- }
- return 0;
-}
-
-
-int32_t
-afr_entrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
-{
- afr_local_t *local = NULL;
-
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (entrylk, frame, local->op_ret,
- local->op_errno);
-
- return 0;
-}
-
-
-int32_t
-afr_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc,
- const char *basename, entrylk_cmd cmd, entrylk_type type)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int ret = -1;
-
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- call_count = local->call_count;
- frame->local = local;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_entrylk_cbk,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- volume, loc, basename, cmd, type);
-
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (entrylk, frame, op_ret, op_errno);
- }
- return 0;
-}
-
-
-
-int32_t
-afr_fentrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
-{
- afr_local_t *local = NULL;
-
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (fentrylk, frame, local->op_ret,
- local->op_errno);
-
- return 0;
-}
-
-
-int32_t
-afr_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd,
- const char *basename, entrylk_cmd cmd, entrylk_type type)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int ret = -1;
-
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- call_count = local->call_count;
- frame->local = local;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_fentrylk_cbk,
- priv->children[i],
- priv->children[i]->fops->fentrylk,
- volume, fd, basename, cmd, type);
-
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (fentrylk, frame, op_ret, op_errno);
- }
- return 0;
-}
-
-
-int32_t
-afr_checksum_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- uint8_t *file_checksum, uint8_t *dir_checksum)
-
-{
- afr_local_t *local = NULL;
-
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0 && (local->op_ret != 0)) {
- local->op_ret = 0;
-
- local->cont.checksum.file_checksum =
- GF_MALLOC (NAME_MAX, gf_afr_mt_char);
- memcpy (local->cont.checksum.file_checksum, file_checksum,
- NAME_MAX);
-
- local->cont.checksum.dir_checksum =
- GF_MALLOC (NAME_MAX, gf_afr_mt_char);
- memcpy (local->cont.checksum.dir_checksum, dir_checksum,
- NAME_MAX);
-
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (checksum, frame, local->op_ret, local->op_errno,
- local->cont.checksum.file_checksum,
- local->cont.checksum.dir_checksum);
-
- return 0;
-}
-
-
-int32_t
-afr_checksum (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flag)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int ret = -1;
-
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- call_count = local->call_count;
- frame->local = local;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_checksum_cbk,
- priv->children[i],
- priv->children[i]->fops->checksum,
- loc, flag);
-
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (checksum, frame, op_ret, op_errno,
- NULL, NULL);
- }
- return 0;
-}
-
-
-int32_t
-afr_statfs_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct statvfs *statvfs)
-{
- afr_local_t *local = NULL;
-
- int call_count = 0;
-
- LOCK (&frame->lock);
- {
- local = frame->local;
-
- if (op_ret == 0) {
- local->op_ret = op_ret;
-
- if (local->cont.statfs.buf_set) {
- if (statvfs->f_bavail < local->cont.statfs.buf.f_bavail)
- local->cont.statfs.buf = *statvfs;
- } else {
- local->cont.statfs.buf = *statvfs;
- local->cont.statfs.buf_set = 1;
- }
- }
-
- if (op_ret == -1)
- local->op_errno = op_errno;
-
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (statfs, frame, local->op_ret, local->op_errno,
- &local->cont.statfs.buf);
-
- return 0;
-}
-
-
-int32_t
-afr_statfs (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
-{
- afr_private_t * priv = NULL;
- int child_count = 0;
- afr_local_t * local = NULL;
- int i = 0;
-
- int ret = -1;
- int call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (loc, out);
-
- priv = this->private;
- child_count = priv->child_count;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- frame->local = local;
- call_count = local->call_count;
-
- for (i = 0; i < child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_statfs_cbk,
- priv->children[i],
- priv->children[i]->fops->statfs,
- loc);
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (statfs, frame, op_ret, op_errno, NULL);
- }
- return 0;
-}
-
-
-int32_t
-afr_lk_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct flock *lock)
-{
- afr_local_t * local = NULL;
-
- int call_count = -1;
-
- local = frame->local;
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
- lock);
-
- return 0;
-}
-
-
-int32_t
-afr_lk_unlock (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int i;
- int call_count = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_locked_nodes_count (local->cont.lk.locked_nodes,
- priv->child_count);
-
- if (call_count == 0) {
- AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
- &local->cont.lk.flock);
- return 0;
- }
-
- local->call_count = call_count;
-
- local->cont.lk.flock.l_type = F_UNLCK;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->cont.lk.locked_nodes[i]) {
- STACK_WIND (frame, afr_lk_unlock_cbk,
- priv->children[i],
- priv->children[i]->fops->lk,
- local->fd, F_SETLK,
- &local->cont.lk.flock);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
-
-
-int32_t
-afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct flock *lock)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int child_index = -1;
-
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
-
- call_count = --local->call_count;
-
- if (!child_went_down (op_ret, op_errno) && (op_ret == -1)) {
- local->op_ret = -1;
- local->op_errno = op_errno;
-
- afr_lk_unlock (frame, this);
- return 0;
- }
-
- if (op_ret == 0) {
- local->op_ret = 0;
- local->op_errno = 0;
- local->cont.lk.locked_nodes[child_index] = 1;
- local->cont.lk.flock = *lock;
- }
-
- child_index++;
-
- if (child_index < priv->child_count) {
- STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->lk,
- local->fd, local->cont.lk.cmd,
- &local->cont.lk.flock);
- } else if (local->op_ret == -1) {
- /* all nodes have gone down */
-
- AFR_STACK_UNWIND (lk, frame, -1, ENOTCONN, &local->cont.lk.flock);
- } else {
- /* locking has succeeded on all nodes that are up */
-
- AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
- &local->cont.lk.flock);
- }
-
- return 0;
-}
-
-
-int
-afr_lk (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t cmd,
- struct flock *flock)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int i = 0;
-
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
- AFR_LOCAL_INIT (local, priv);
-
- frame->local = local;
-
- local->cont.lk.locked_nodes = GF_CALLOC (priv->child_count,
- sizeof (*local->cont.lk.locked_nodes),
- gf_afr_mt_char);
-
- if (!local->cont.lk.locked_nodes) {
- gf_log (this->name, GF_LOG_ERROR, "Out of memory");
- op_errno = ENOMEM;
- goto out;
- }
-
- local->fd = fd_ref (fd);
- local->cont.lk.cmd = cmd;
- local->cont.lk.flock = *flock;
-
- STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) 0,
- priv->children[i],
- priv->children[i]->fops->lk,
- fd, cmd, flock);
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (lk, frame, op_ret, op_errno, NULL);
- }
- return 0;
-}
-
-int
-afr_priv_dump (xlator_t *this)
-{
- afr_private_t *priv = NULL;
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
- char key[GF_DUMP_MAX_BUF_LEN];
- int i = 0;
-
-
- assert(this);
- priv = this->private;
-
- assert(priv);
- snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
- gf_proc_dump_add_section(key_prefix);
- gf_proc_dump_build_key(key, key_prefix, "child_count");
- gf_proc_dump_write(key, "%u", priv->child_count);
- gf_proc_dump_build_key(key, key_prefix, "read_child_rr");
- gf_proc_dump_write(key, "%u", priv->read_child_rr);
- for (i = 0; i < priv->child_count; i++) {
- gf_proc_dump_build_key(key, key_prefix, "child_up[%d]", i);
- gf_proc_dump_write(key, "%d", priv->child_up[i]);
- gf_proc_dump_build_key(key, key_prefix,
- "pending_key[%d]", i);
- gf_proc_dump_write(key, "%s", priv->pending_key[i]);
+ priv->child_up = GF_CALLOC (sizeof (unsigned char), child_count,
+ gf_afr_mt_char);
+ if (!priv->child_up) {
+ ret = -ENOMEM;
+ goto out;
}
- gf_proc_dump_build_key(key, key_prefix, "data_self_heal");
- gf_proc_dump_write(key, "%d", priv->data_self_heal);
- gf_proc_dump_build_key(key, key_prefix, "metadata_self_heal");
- gf_proc_dump_write(key, "%d", priv->metadata_self_heal);
- gf_proc_dump_build_key(key, key_prefix, "entry_self_heal");
- gf_proc_dump_write(key, "%d", priv->entry_self_heal);
- gf_proc_dump_build_key(key, key_prefix, "data_change_log");
- gf_proc_dump_write(key, "%d", priv->data_change_log);
- gf_proc_dump_build_key(key, key_prefix, "metadata_change_log");
- gf_proc_dump_write(key, "%d", priv->metadata_change_log);
- gf_proc_dump_build_key(key, key_prefix, "entry_change_log");
- gf_proc_dump_write(key, "%d", priv->entry_change_log);
- gf_proc_dump_build_key(key, key_prefix, "read_child");
- gf_proc_dump_write(key, "%d", priv->read_child);
- gf_proc_dump_build_key(key, key_prefix, "favorite_child");
- gf_proc_dump_write(key, "%u", priv->favorite_child);
- gf_proc_dump_build_key(key, key_prefix, "data_lock_server_count");
- gf_proc_dump_write(key, "%u", priv->data_lock_server_count);
- gf_proc_dump_build_key(key, key_prefix, "metadata_lock_server_count");
- gf_proc_dump_write(key, "%u", priv->metadata_lock_server_count);
- gf_proc_dump_build_key(key, key_prefix, "entry_lock_server_count");
- gf_proc_dump_write(key, "%u", priv->entry_lock_server_count);
- gf_proc_dump_build_key(key, key_prefix, "wait_count");
- gf_proc_dump_write(key, "%u", priv->wait_count);
-
- return 0;
-}
-
-
-/**
- * find_child_index - find the child's index in the array of subvolumes
- * @this: AFR
- * @child: child
- */
-
-static int
-find_child_index (xlator_t *this, xlator_t *child)
-{
- afr_private_t *priv = NULL;
- int i = -1;
+ for (i = 0; i < child_count; i++)
+ priv->child_up[i] = -1; /* start with unknown state.
+ this initialization needed
+ for afr_notify() to work
+ reliably
+ */
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if ((xlator_t *) child == priv->children[i])
- break;
- }
-
- return i;
-}
-
-
-int32_t
-notify (xlator_t *this, int32_t event,
- void *data, ...)
-{
- afr_private_t * priv = NULL;
- unsigned char * child_up = NULL;
-
- int i = -1;
- int up_children = 0;
-
- priv = this->private;
-
- if (!priv)
- return 0;
-
- child_up = priv->child_up;
-
- switch (event) {
- case GF_EVENT_CHILD_UP:
- i = find_child_index (this, data);
-
- child_up[i] = 1;
-
- LOCK (&priv->lock);
- {
- priv->up_count++;
- }
- UNLOCK (&priv->lock);
-
- /*
- if all the children were down, and one child came up,
- send notify to parent
- */
-
- for (i = 0; i < priv->child_count; i++)
- if (child_up[i])
- up_children++;
-
- if (up_children == 1) {
- gf_log (this->name, GF_LOG_NORMAL,
- "Subvolume '%s' came back up; "
- "going online.", ((xlator_t *)data)->name);
-
- default_notify (this, event, data);
- }
-
- break;
-
- case GF_EVENT_CHILD_DOWN:
- i = find_child_index (this, data);
-
- child_up[i] = 0;
-
- LOCK (&priv->lock);
- {
- priv->down_count++;
- }
- UNLOCK (&priv->lock);
-
- /*
- if all children are down, and this was the last to go down,
- send notify to parent
- */
-
- for (i = 0; i < priv->child_count; i++)
- if (child_up[i])
- up_children++;
-
- if (up_children == 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "All subvolumes are down. Going offline "
- "until atleast one of them comes back up.");
-
- default_notify (this, event, data);
- }
-
- break;
-
- default:
- default_notify (this, event, data);
- }
-
- return 0;
-}
-
-int32_t
-mem_acct_init (xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init (this, gf_afr_mt_end + 1);
-
- if (ret != 0) {
- gf_log(this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
+ priv->children = GF_CALLOC (sizeof (xlator_t *), child_count,
+ gf_afr_mt_xlator_t);
+ if (!priv->children) {
+ ret = -ENOMEM;
+ goto out;
}
- return ret;
-}
-
-
-static const char *favorite_child_warning_str = "You have specified subvolume '%s' "
- "as the 'favorite child'. This means that if a discrepancy in the content "
- "or attributes (ownership, permission, etc.) of a file is detected among "
- "the subvolumes, the file on '%s' will be considered the definitive "
- "version and its contents will OVERWRITE the contents of the file on other "
- "subvolumes. All versions of the file except that on '%s' "
- "WILL BE LOST.";
-
-static const char *no_lock_servers_warning_str = "You have set lock-server-count = 0. "
- "This means correctness is NO LONGER GUARANTEED in all cases. If two or more "
- "applications write to the same region of a file, there is a possibility that "
- "its copies will be INCONSISTENT. Set it to a value greater than 0 unless you "
- "are ABSOLUTELY SURE of what you are doing and WILL NOT HOLD GlusterFS "
- "RESPONSIBLE for inconsistent data. If you are in doubt, set it to a value "
- "greater than 0.";
-
-int32_t
-init (xlator_t *this)
-{
- afr_private_t * priv = NULL;
- int child_count = 0;
- xlator_list_t * trav = NULL;
- int i = 0;
- int ret = -1;
- int op_errno = 0;
-
- char * read_subvol = NULL;
- char * fav_child = NULL;
- char * self_heal = NULL;
- char * algo = NULL;
- char * change_log = NULL;
- char * strict_readdir = NULL;
-
- int32_t background_count = 0;
- int32_t lock_server_count = 1;
- int32_t window_size = 0;
-
- int fav_ret = -1;
- int read_ret = -1;
- int dict_ret = -1;
-
-
- if (!this->children) {
- gf_log (this->name, GF_LOG_ERROR,
- "replicate translator needs more than one "
- "subvolume defined.");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "Volume is dangling.");
- }
-
-
- ALLOC_OR_GOTO (this->private, afr_private_t, out);
-
- priv = this->private;
-
- read_ret = dict_get_str (this->options, "read-subvolume", &read_subvol);
- priv->read_child = -1;
-
- fav_ret = dict_get_str (this->options, "favorite-child", &fav_child);
- priv->favorite_child = -1;
-
- priv->background_self_heal_count = 16;
-
- dict_ret = dict_get_int32 (this->options, "background-self-heal-count",
- &background_count);
- if (dict_ret == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Setting background self-heal count to %d",
- background_count);
-
- priv->background_self_heal_count = background_count;
- }
-
- /* Default values */
-
- priv->data_self_heal = 1;
- priv->metadata_self_heal = 1;
- priv->entry_self_heal = 1;
-
- dict_ret = dict_get_str (this->options, "data-self-heal", &self_heal);
- if (dict_ret == 0) {
- ret = gf_string2boolean (self_heal, &priv->data_self_heal);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Invalid 'option data-self-heal %s'. "
- "Defaulting to data-self-heal as 'on'",
- self_heal);
- priv->data_self_heal = 1;
- }
- }
-
- priv->data_self_heal_algorithm = "";
+ ret = afr_pending_xattrs_init (priv, this);
+ if (ret)
+ goto out;
- dict_ret = dict_get_str (this->options, "data-self-heal-algorithm",
- &algo);
- if (dict_ret == 0) {
- priv->data_self_heal_algorithm = gf_strdup (algo);
+ trav = this->children;
+ i = 0;
+ while (i < child_count) {
+ priv->children[i] = trav->xlator;
+ trav = trav->next;
+ i++;
}
+ ret = gf_asprintf (&priv->sh_domain, AFR_SH_DATA_DOMAIN_FMT,
+ this->name);
+ if (-1 == ret) {
+ ret = -ENOMEM;
+ goto out;
+ }
- priv->data_self_heal_window_size = 16;
-
- dict_ret = dict_get_int32 (this->options, "data-self-heal-window-size",
- &window_size);
- if (dict_ret == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Setting data self-heal window size to %d",
- window_size);
-
- priv->data_self_heal_window_size = window_size;
- }
-
- dict_ret = dict_get_str (this->options, "metadata-self-heal",
- &self_heal);
- if (dict_ret == 0) {
- ret = gf_string2boolean (self_heal, &priv->metadata_self_heal);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Invalid 'option metadata-self-heal %s'. "
- "Defaulting to metadata-self-heal as 'on'.",
- self_heal);
- priv->metadata_self_heal = 1;
- }
- }
-
- dict_ret = dict_get_str (this->options, "entry-self-heal", &self_heal);
- if (dict_ret == 0) {
- ret = gf_string2boolean (self_heal, &priv->entry_self_heal);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Invalid 'option entry-self-heal %s'. "
- "Defaulting to entry-self-heal as 'on'.",
- self_heal);
- priv->entry_self_heal = 1;
- }
- }
-
- /* Change log options */
-
- priv->data_change_log = 1;
- priv->metadata_change_log = 0;
- priv->entry_change_log = 1;
-
- dict_ret = dict_get_str (this->options, "data-change-log",
- &change_log);
- if (dict_ret == 0) {
- ret = gf_string2boolean (change_log, &priv->data_change_log);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Invalid 'option data-change-log %s'. "
- "Defaulting to data-change-log as 'on'.",
- change_log);
- priv->data_change_log = 1;
- }
- }
-
- dict_ret = dict_get_str (this->options, "metadata-change-log",
- &change_log);
- if (dict_ret == 0) {
- ret = gf_string2boolean (change_log,
- &priv->metadata_change_log);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Invalid 'option metadata-change-log %s'. "
- "Defaulting to metadata-change-log as 'off'.",
- change_log);
- priv->metadata_change_log = 0;
- }
- }
-
- dict_ret = dict_get_str (this->options, "entry-change-log",
- &change_log);
- if (dict_ret == 0) {
- ret = gf_string2boolean (change_log, &priv->entry_change_log);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Invalid 'option entry-change-log %s'. "
- "Defaulting to entry-change-log as 'on'.",
- change_log);
- priv->entry_change_log = 1;
- }
- }
-
- /* Locking options */
-
- priv->data_lock_server_count = 1;
- priv->metadata_lock_server_count = 0;
- priv->entry_lock_server_count = 1;
-
- dict_ret = dict_get_int32 (this->options, "data-lock-server-count",
- &lock_server_count);
- if (dict_ret == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Setting data lock server count to %d.",
- lock_server_count);
-
- if (lock_server_count == 0)
- gf_log (this->name, GF_LOG_WARNING, "%s",
- no_lock_servers_warning_str);
-
- priv->data_lock_server_count = lock_server_count;
- }
-
-
- dict_ret = dict_get_int32 (this->options,
- "metadata-lock-server-count",
- &lock_server_count);
- if (dict_ret == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Setting metadata lock server count to %d.",
- lock_server_count);
- priv->metadata_lock_server_count = lock_server_count;
- }
-
-
- dict_ret = dict_get_int32 (this->options, "entry-lock-server-count",
- &lock_server_count);
- if (dict_ret == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Setting entry lock server count to %d.",
- lock_server_count);
-
- priv->entry_lock_server_count = lock_server_count;
- }
-
- priv->strict_readdir = _gf_false;
-
- dict_ret = dict_get_str (this->options, "strict-readdir",
- &strict_readdir);
- if (dict_ret == 0) {
- ret = gf_string2boolean (strict_readdir, &priv->strict_readdir);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Invalid 'option strict-readdir %s'. "
- "Defaulting to strict-readdir as 'off'.",
- strict_readdir);
- }
- }
-
- trav = this->children;
- while (trav) {
- if (!read_ret && !strcmp (read_subvol, trav->xlator->name)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Subvolume '%s' specified as read child.",
- trav->xlator->name);
-
- priv->read_child = child_count;
- }
-
- if (fav_ret == 0 && !strcmp (fav_child, trav->xlator->name)) {
- gf_log (this->name, GF_LOG_WARNING,
- favorite_child_warning_str, trav->xlator->name,
- trav->xlator->name, trav->xlator->name);
- priv->favorite_child = child_count;
- }
-
- child_count++;
- trav = trav->next;
- }
-
- priv->wait_count = 1;
-
- priv->child_count = child_count;
-
- LOCK_INIT (&priv->lock);
- LOCK_INIT (&priv->read_child_lock);
-
- priv->child_up = GF_CALLOC (sizeof (unsigned char), child_count,
- gf_afr_mt_char);
- if (!priv->child_up) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_errno = ENOMEM;
- goto out;
- }
+ priv->last_event = GF_CALLOC (child_count, sizeof (*priv->last_event),
+ gf_afr_mt_int32_t);
+ if (!priv->last_event) {
+ ret = -ENOMEM;
+ goto out;
+ }
- priv->children = GF_CALLOC (sizeof (xlator_t *), child_count,
- gf_afr_mt_xlator_t);
- if (!priv->children) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_errno = ENOMEM;
+ ret = afr_selfheal_daemon_init (this);
+ if (ret) {
+ ret = -ENOMEM;
goto out;
}
- priv->pending_key = GF_CALLOC (sizeof (*priv->pending_key),
- child_count,
- gf_afr_mt_char);
- if (!priv->pending_key) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_errno = ENOMEM;
+ /* keep more local here as we may need them for self-heal etc */
+ this->local_pool = mem_pool_new (afr_local_t, 512);
+ if (!this->local_pool) {
+ ret = -1;
goto out;
}
- trav = this->children;
- i = 0;
- while (i < child_count) {
- priv->children[i] = trav->xlator;
-
- ret = gf_asprintf (&priv->pending_key[i], "%s.%s",
- AFR_XATTR_PREFIX,
- trav->xlator->name);
- if (-1 == ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "asprintf failed to set pending key");
- op_errno = ENOMEM;
- goto out;
- }
-
- trav = trav->next;
- i++;
- }
+ priv->root_inode = NULL;
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
int
fini (xlator_t *this)
{
- return 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ LOCK (&priv->lock);
+ if (priv->timer != NULL) {
+ gf_timer_call_cancel(this->ctx, priv->timer);
+ priv->timer = NULL;
+ }
+ UNLOCK (&priv->lock);
+ this->private = NULL;
+ afr_priv_destroy (priv);
+ //if (this->itable);//I dont see any destroy func
+
+ return 0;
}
struct xlator_fops fops = {
- .lookup = afr_lookup,
- .open = afr_open,
- .lk = afr_lk,
- .flush = afr_flush,
- .statfs = afr_statfs,
- .fsync = afr_fsync,
- .fsyncdir = afr_fsyncdir,
- .xattrop = afr_xattrop,
- .fxattrop = afr_fxattrop,
- .inodelk = afr_inodelk,
- .finodelk = afr_finodelk,
- .entrylk = afr_entrylk,
- .fentrylk = afr_fentrylk,
- .checksum = afr_checksum,
-
- /* inode read */
- .access = afr_access,
- .stat = afr_stat,
- .fstat = afr_fstat,
- .readlink = afr_readlink,
- .getxattr = afr_getxattr,
- .readv = afr_readv,
-
- /* inode write */
- .writev = afr_writev,
- .truncate = afr_truncate,
- .ftruncate = afr_ftruncate,
- .setxattr = afr_setxattr,
+ .lookup = afr_lookup,
+ .open = afr_open,
+ .lk = afr_lk,
+ .flush = afr_flush,
+ .statfs = afr_statfs,
+ .fsync = afr_fsync,
+ .fsyncdir = afr_fsyncdir,
+ .xattrop = afr_xattrop,
+ .fxattrop = afr_fxattrop,
+ .inodelk = afr_inodelk,
+ .finodelk = afr_finodelk,
+ .entrylk = afr_entrylk,
+ .fentrylk = afr_fentrylk,
+
+ /* inode read */
+ .access = afr_access,
+ .stat = afr_stat,
+ .fstat = afr_fstat,
+ .readlink = afr_readlink,
+ .getxattr = afr_getxattr,
+ .fgetxattr = afr_fgetxattr,
+ .readv = afr_readv,
+
+ /* inode write */
+ .writev = afr_writev,
+ .truncate = afr_truncate,
+ .ftruncate = afr_ftruncate,
+ .setxattr = afr_setxattr,
+ .fsetxattr = afr_fsetxattr,
.setattr = afr_setattr,
- .fsetattr = afr_fsetattr,
- .removexattr = afr_removexattr,
-
- /* dir read */
- .opendir = afr_opendir,
- .readdir = afr_readdir,
- .readdirp = afr_readdirp,
-
- /* dir write */
- .create = afr_create,
- .mknod = afr_mknod,
- .mkdir = afr_mkdir,
- .unlink = afr_unlink,
- .rmdir = afr_rmdir,
- .link = afr_link,
- .symlink = afr_symlink,
- .rename = afr_rename,
+ .fsetattr = afr_fsetattr,
+ .removexattr = afr_removexattr,
+ .fremovexattr = afr_fremovexattr,
+ .fallocate = afr_fallocate,
+ .discard = afr_discard,
+ .zerofill = afr_zerofill,
+
+ /* dir read */
+ .opendir = afr_opendir,
+ .readdir = afr_readdir,
+ .readdirp = afr_readdirp,
+
+ /* dir write */
+ .create = afr_create,
+ .mknod = afr_mknod,
+ .mkdir = afr_mkdir,
+ .unlink = afr_unlink,
+ .rmdir = afr_rmdir,
+ .link = afr_link,
+ .symlink = afr_symlink,
+ .rename = afr_rename,
};
@@ -3096,62 +653,338 @@ struct xlator_dumpops dumpops = {
struct xlator_cbks cbks = {
- .release = afr_release,
- .releasedir = afr_releasedir,
+ .release = afr_release,
+ .releasedir = afr_releasedir,
+ .forget = afr_forget,
};
struct volume_options options[] = {
- { .key = {"read-subvolume" },
- .type = GF_OPTION_TYPE_XLATOR
- },
- { .key = {"favorite-child"},
- .type = GF_OPTION_TYPE_XLATOR
- },
+ { .key = {"read-subvolume" },
+ .type = GF_OPTION_TYPE_XLATOR,
+ .description = "inode-read fops happen only on one of the bricks in "
+ "replicate. Afr will prefer the one specified using "
+ "this option if it is not stale. Option value must be "
+ "one of the xlator names of the children. "
+ "Ex: <volname>-client-0 till "
+ "<volname>-client-<number-of-bricks - 1>"
+ },
+ { .key = {"read-subvolume-index" },
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "-1",
+ .description = "inode-read fops happen only on one of the bricks in "
+ "replicate. AFR will prefer the one specified using "
+ "this option if it is not stale. allowed options"
+ " include -1 till replica-count - 1"
+ },
+ { .key = {"read-hash-mode" },
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = 2,
+ .default_value = "1",
+ .description = "inode-read fops happen only on one of the bricks in "
+ "replicate. AFR will prefer the one computed using "
+ "the method specified using this option"
+ "0 = first up server, "
+ "1 = hash by GFID of file (all clients use "
+ "same subvolume), "
+ "2 = hash by GFID of file and client PID",
+ },
+ { .key = {"choose-local" },
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "true",
+ .description = "Choose a local subvolume (i.e. Brick) to read from"
+ " if read-subvolume is not explicitly set.",
+ },
+ { .key = {"favorite-child"},
+ .type = GF_OPTION_TYPE_XLATOR,
+ .description = "If a split-brain happens choose subvol/brick set by "
+ "this option as source."
+ },
{ .key = {"background-self-heal-count"},
.type = GF_OPTION_TYPE_INT,
- .min = 0
+ .min = 0,
+ .max = 256,
+ .default_value = "8",
+ .validate = GF_OPT_VALIDATE_MIN,
+ .description = "This specifies the number of per client self-heal "
+ "jobs that can perform parallel heals in the "
+ "background."
+ },
+ { .key = {"heal-wait-queue-length"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = 10000, /*Around 100MB with sizeof(afr_local_t)= 10496 bytes*/
+ .default_value = "128",
+ .validate = GF_OPT_VALIDATE_MIN,
+ .description = "This specifies the number of heals that can be queued"
+ " for the parallel background self heal jobs."
+ },
+ { .key = {"data-self-heal"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = {"1", "on", "yes", "true", "enable",
+ "0", "off", "no", "false", "disable",
+ "open"},
+ .default_value = "on",
+ .description = "Using this option we can enable/disable data "
+ "self-heal on the file. \"open\" means data "
+ "self-heal action will only be triggered by file "
+ "open operations."
},
- { .key = {"data-self-heal"},
- .type = GF_OPTION_TYPE_BOOL
- },
{ .key = {"data-self-heal-algorithm"},
- .type = GF_OPTION_TYPE_STR
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Select between \"full\", \"diff\". The "
+ "\"full\" algorithm copies the entire file from "
+ "source to sink. The \"diff\" algorithm copies to "
+ "sink only those blocks whose checksums don't match "
+ "with those of source. If no option is configured "
+ "the option is chosen dynamically as follows: "
+ "If the file does not exist on one of the sinks "
+ "or empty file exists or if the source file size is "
+ "about the same as page size the entire file will "
+ "be read and written i.e \"full\" algo, "
+ "otherwise \"diff\" algo is chosen.",
+ .value = { "diff", "full"}
},
{ .key = {"data-self-heal-window-size"},
.type = GF_OPTION_TYPE_INT,
.min = 1,
- .max = 1024
+ .max = 1024,
+ .default_value = "1",
+ .description = "Maximum number blocks per file for which self-heal "
+ "process would be applied simultaneously."
},
- { .key = {"metadata-self-heal"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"entry-self-heal"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"data-change-log"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"metadata-change-log"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"entry-change-log"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"data-lock-server-count"},
- .type = GF_OPTION_TYPE_INT,
- .min = 0
+ { .key = {"metadata-self-heal"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Using this option we can enable/disable metadata "
+ "i.e. Permissions, ownerships, xattrs self-heal on "
+ "the file/directory."
+ },
+ { .key = {"entry-self-heal"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Using this option we can enable/disable entry "
+ "self-heal on the directory."
+ },
+ { .key = {"data-change-log"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Data fops like write/truncate will not perform "
+ "pre/post fop changelog operations in afr transaction "
+ "if this option is disabled"
+ },
+ { .key = {"metadata-change-log"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Metadata fops like setattr/setxattr will not perform "
+ "pre/post fop changelog operations in afr transaction "
+ "if this option is disabled"
+ },
+ { .key = {"entry-change-log"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Entry fops like create/unlink will not perform "
+ "pre/post fop changelog operations in afr transaction "
+ "if this option is disabled"
+ },
+ { .key = {"optimistic-change-log"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Entry/Metadata fops will not perform "
+ "pre fop changelog operations in afr transaction "
+ "if this option is enabled."
+ },
+ { .key = {"inodelk-trace"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Enabling this option logs inode lock/unlocks"
+ },
+ { .key = {"entrylk-trace"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Enabling this option logs entry lock/unlocks"
+ },
+ { .key = {"pre-op-compat"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Use separate pre-op xattrop() FOP rather than "
+ "overloading xdata of the OP"
},
- { .key = {"metadata-lock-server-count"},
- .type = GF_OPTION_TYPE_INT,
- .min = 0
+ { .key = {"eager-lock"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Enable/Disable eager lock for replica volume. "
+ "Lock phase of a transaction has two sub-phases. "
+ "First is an attempt to acquire locks in parallel by "
+ "broadcasting non-blocking lock requests. If lock "
+ "acquisition fails on any server, then the held locks "
+ "are unlocked and we revert to a blocking locks mode "
+ "sequentially on one server after another. If this "
+ "option is enabled the initial broadcasting lock "
+ "request attempts to acquire a full lock on the entire file. "
+ "If this fails, we revert back to the sequential "
+ "\"regional\" blocking locks as before. In the case "
+ "where such an \"eager\" lock is granted in the "
+ "non-blocking phase, it gives rise to an opportunity "
+ "for optimization. i.e, if the next write transaction "
+ "on the same FD arrives before the unlock phase of "
+ "the first transaction, it \"takes over\" the full "
+ "file lock. Similarly if yet another data transaction "
+ "arrives before the unlock phase of the \"optimized\" "
+ "transaction, that in turn \"takes over\" the lock as "
+ "well. The actual unlock now happens at the end of "
+ "the last \"optimized\" transaction."
+
+ },
+ { .key = {"self-heal-daemon"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "This option applies to only self-heal-daemon. "
+ "Index directory crawl and automatic healing of files "
+ "will not be performed if this option is turned off."
+ },
+ { .key = {"iam-self-heal-daemon"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option differentiates if the replicate "
+ "translator is running as part of self-heal-daemon "
+ "or not."
+ },
+ { .key = {"quorum-type"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = { "none", "auto", "fixed"},
+ .default_value = "none",
+ .description = "If value is \"fixed\" only allow writes if "
+ "quorum-count bricks are present. If value is "
+ "\"auto\" only allow writes if more than half of "
+ "bricks, or exactly half including the first, are "
+ "present.",
+ },
+ { .key = {"quorum-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = INT_MAX,
+ .default_value = 0,
+ .description = "If quorum-type is \"fixed\" only allow writes if "
+ "this many bricks or present. Other quorum types "
+ "will OVERWRITE this value.",
+ },
+ { .key = {"quorum-reads"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .description = "If quorum-reads is \"true\" only allow reads if "
+ "quorum is met when quorum is enabled.",
+ },
+ { .key = {"node-uuid"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Local glusterd uuid string, used in starting "
+ "self-heal-daemon so that it can crawl only on "
+ "local index directories.",
+ },
+ { .key = {"post-op-delay-secs"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = INT_MAX,
+ .default_value = "1",
+ .description = "Time interval induced artificially before "
+ "post-operation phase of the transaction to "
+ "enhance overlap of adjacent write operations.",
+ },
+ { .key = {AFR_SH_READDIR_SIZE_KEY},
+ .type = GF_OPTION_TYPE_SIZET,
+ .description = "readdirp size for performing entry self-heal",
+ .min = 1024,
+ .max = 131072,
+ .default_value = "1KB",
+ },
+ { .key = {"ensure-durability"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .description = "Afr performs fsyncs for transactions if this "
+ "option is on to make sure the changelogs/data is "
+ "written to the disk",
+ .default_value = "on",
+ },
+ { .key = {"afr-dirty-xattr"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = AFR_DIRTY_DEFAULT,
},
- { .key = {"entry-lock-server-count"},
- .type = GF_OPTION_TYPE_INT,
- .min = 0
+ { .key = {"afr-pending-xattr"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Comma separated list of xattrs that are used to "
+ "capture information on pending heals."
},
- { .key = {"strict-readdir"},
+ { .key = {"metadata-splitbrain-forced-heal"},
.type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
},
- { .key = {NULL} },
+ { .key = {"heal-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 60,
+ .max = INT_MAX,
+ .default_value = "600",
+ .description = "time interval for checking the need to self-heal "
+ "in self-heal-daemon"
+ },
+ { .key = {"consistent-metadata"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .description = "If this option is enabled, readdirp will force "
+ "lookups on those entries read whose read child is "
+ "not the same as that of the parent. This will "
+ "guarantee that all read operations on a file serve "
+ "attributes from the same subvol as long as it holds "
+ " a good copy of the file/dir.",
+ },
+ { .key = {"arbiter-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .description = "subset of child_count. Has to be 0 or 1."
+ },
+ { .key = {"shd-max-threads"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 64,
+ .default_value = "1",
+ .description = "Maximum number of threads SHD can use per local "
+ "brick. This can substantially lower heal times, "
+ "but can also crush your bricks if you don't have "
+ "the storage hardware to support this."
+ },
+ { .key = {"shd-wait-qlength"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 655536,
+ .default_value = "1024",
+ .description = "This option can be used to control number of heals"
+ " that can wait in SHD per subvolume",
+ },
+ { .key = {"locking-scheme"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = { "full", "granular"},
+ .default_value = "full",
+ .description = "If this option is set to granular, self-heal will "
+ "stop being compatible with afr-v1, which helps afr "
+ "be more granular while self-healing",
+ },
+ { .key = {"granular-entry-heal"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .description = "If this option is enabled, self-heal will resort to "
+ "granular way of recording changelogs and doing entry "
+ "self-heal.",
+ },
+ { .key = {"favorite-child-policy"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = {"none", "size", "ctime", "mtime", "majority"},
+ .default_value = "none",
+ .description = "This option can be used to automatically resolve "
+ "split-brains using various policies without user "
+ "intervention. \"size\" picks the file with the "
+ "biggest size as the source. \"ctime\" and \"mtime\" "
+ "pick the file with the latest ctime and mtime "
+ "respectively as the source. \"majority\" picks a file"
+ " with identical mtime and size in more than half the "
+ "number of bricks in the replica.",
+ },
+ { .key = {NULL} },
};
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 4580bcda278..31d761f638d 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -1,170 +1,194 @@
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __AFR_H__
#define __AFR_H__
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "call-stub.h"
#include "compat-errno.h"
#include "afr-mem-types.h"
-#define AFR_XATTR_PREFIX "trusted.afr"
-
-typedef struct _afr_private {
- gf_lock_t lock; /* to guard access to child_count, etc */
- unsigned int child_count; /* total number of children */
-
- unsigned int read_child_rr; /* round-robin index of the read_child */
- gf_lock_t read_child_lock; /* lock to protect above */
-
- xlator_t **children;
+#include "libxlator.h"
+#include "timer.h"
+#include "syncop.h"
- unsigned char *child_up;
+#include "afr-self-heald.h"
+#include "afr-messages.h"
- char **pending_key;
-
- gf_boolean_t data_self_heal; /* on/off */
- char * data_self_heal_algorithm; /* name of algorithm */
- unsigned int data_self_heal_window_size; /* max number of pipelined
- read/writes */
-
- unsigned int background_self_heal_count;
- unsigned int background_self_heals_started;
- gf_boolean_t metadata_self_heal; /* on/off */
- gf_boolean_t entry_self_heal; /* on/off */
-
- gf_boolean_t data_change_log; /* on/off */
- gf_boolean_t metadata_change_log; /* on/off */
- gf_boolean_t entry_change_log; /* on/off */
-
- int read_child; /* read-subvolume */
- unsigned int favorite_child; /* subvolume to be preferred in resolving
- split-brain cases */
-
- unsigned int data_lock_server_count;
- unsigned int metadata_lock_server_count;
- unsigned int entry_lock_server_count;
-
- gf_boolean_t strict_readdir;
-
- unsigned int wait_count; /* # of servers to wait for success */
-
- uint64_t up_count; /* number of CHILD_UPs we have seen */
- uint64_t down_count; /* number of CHILD_DOWNs we have seen */
-} afr_private_t;
-
-typedef struct {
- /* External interface: These are variables (some optional) that
- are set by whoever has triggered self-heal */
-
- gf_boolean_t need_data_self_heal;
- gf_boolean_t need_metadata_self_heal;
- gf_boolean_t need_entry_self_heal;
-
- gf_boolean_t forced_merge; /* Is this a self-heal triggered to
- forcibly merge the directories? */
-
- gf_boolean_t healing_fd_opened; /* true if caller has already
- opened fd */
-
- gf_boolean_t data_lock_held; /* true if caller has already
- acquired 0-0 lock */
-
- fd_t *healing_fd; /* set if callers has opened fd */
-
- gf_boolean_t background; /* do self-heal in background
- if possible */
-
- ia_type_t type; /* st_mode of the entry we're doing
- self-heal on */
+#define AFR_XATTR_PREFIX "trusted.afr"
+#define AFR_PATHINFO_HEADER "REPLICATE:"
+#define AFR_SH_READDIR_SIZE_KEY "self-heal-readdir-size"
+#define AFR_SH_DATA_DOMAIN_FMT "%s:self-heal"
+#define AFR_DIRTY_DEFAULT AFR_XATTR_PREFIX ".dirty"
+#define AFR_DIRTY (((afr_private_t *) (THIS->private))->afr_dirty)
- /* Function to call to unwind. If self-heal is being done in the
- background, this function will be called as soon as possible. */
+#define AFR_LOCKEE_COUNT_MAX 3
+#define AFR_DOM_COUNT_MAX 3
+#define AFR_NUM_CHANGE_LOGS 3 /*data + metadata + entry*/
+#define AFR_DEFAULT_SPB_CHOICE_TIMEOUT 300 /*in seconds*/
- int (*unwind) (call_frame_t *frame, xlator_t *this);
+#define ARBITER_BRICK_INDEX 2
- /* End of external interface members */
+typedef int (*afr_lock_cbk_t) (call_frame_t *frame, xlator_t *this);
+typedef int (*afr_read_txn_wind_t) (call_frame_t *frame, xlator_t *this, int subvol);
- /* array of stat's, one for each child */
- struct iatt *buf;
- struct iatt parentbuf;
+typedef int (*afr_inode_refresh_cbk_t) (call_frame_t *frame, xlator_t *this, int err);
- /* array of xattr's, one for each child */
- dict_t **xattr;
+typedef int (*afr_changelog_resume_t) (call_frame_t *frame, xlator_t *this);
- /* array of errno's, one for each child */
- int *child_errno;
+#define alloca0(size) ({void *__ptr; __ptr = alloca(size); memset(__ptr, 0, size); __ptr;})
+#define AFR_COUNT(array,max) ({int __i; int __res = 0; for (__i = 0; __i < max; __i++) if (array[__i]) __res++; __res;})
+#define AFR_INTERSECT(dst,src1,src2,max) ({int __i; for (__i = 0; __i < max; __i++) dst[__i] = src1[__i] && src2[__i];})
+#define AFR_CMP(a1,a2,len) ({int __cmp = 0; int __i; for (__i = 0; __i < len; __i++) if (a1[__i] != a2[__i]) { __cmp = 1; break;} __cmp;})
+#define AFR_IS_ARBITER_BRICK(priv, index) ((priv->arbiter_count == 1) && (index == ARBITER_BRICK_INDEX))
- int32_t **pending_matrix;
- int32_t **delta_matrix;
+typedef enum {
+ AFR_FAV_CHILD_NONE,
+ AFR_FAV_CHILD_BY_SIZE,
+ AFR_FAV_CHILD_BY_CTIME,
+ AFR_FAV_CHILD_BY_MTIME,
+ AFR_FAV_CHILD_BY_MAJORITY,
+ AFR_FAV_CHILD_POLICY_MAX,
+} afr_favorite_child_policy;
- int *sources;
- int source;
- int active_source;
- int active_sinks;
- int *success;
- int *locked_nodes;
- int lock_count;
+typedef struct _afr_private {
+ gf_lock_t lock; /* to guard access to child_count, etc */
+ unsigned int child_count; /* total number of children */
+ unsigned int arbiter_count; /*subset of child_count.
+ Has to be 0 or 1.*/
- mode_t impunging_entry_mode;
- const char *linkname;
+ xlator_t **children;
- int op_failed;
+ inode_t *root_inode;
- int file_has_holes;
- blksize_t block_size;
- off_t file_size;
- off_t offset;
+ unsigned char *child_up;
+ unsigned char *local;
- loc_t parent_loc;
+ char **pending_key;
- call_frame_t *orig_frame;
- gf_boolean_t unwound;
+ char *data_self_heal; /* on/off/open */
+ char * data_self_heal_algorithm; /* name of algorithm */
+ unsigned int data_self_heal_window_size; /* max number of pipelined
+ read/writes */
- /* private data for the particular self-heal algorithm */
- void *private;
+ struct list_head heal_waiting; /*queue for files that need heal*/
+ uint32_t heal_wait_qlen; /*configurable queue length for heal_waiting*/
+ int32_t heal_waiters; /* No. of elements currently in wait queue.*/
+
+ struct list_head healing;/* queue for files that are undergoing
+ background heal*/
+ uint32_t background_self_heal_count;/*configurable queue length for
+ healing queue*/
+ int32_t healers;/* No. of elements currently undergoing background
+ heal*/
+
+ gf_boolean_t metadata_self_heal; /* on/off */
+ gf_boolean_t entry_self_heal; /* on/off */
+
+ gf_boolean_t data_change_log; /* on/off */
+ gf_boolean_t metadata_change_log; /* on/off */
+ gf_boolean_t entry_change_log; /* on/off */
+
+ gf_boolean_t metadata_splitbrain_forced_heal; /* on/off */
+ int read_child; /* read-subvolume */
+ unsigned int hash_mode; /* for when read_child is not set */
+ int favorite_child; /* subvolume to be preferred in resolving
+ split-brain cases */
+
+ afr_favorite_child_policy fav_child_policy;/*Policy to use for automatic
+ resolution of split-brains.*/
+
+ gf_boolean_t inodelk_trace;
+ gf_boolean_t entrylk_trace;
+
+ unsigned int wait_count; /* # of servers to wait for success */
+
+ gf_timer_t *timer; /* launched when parent up is received */
+
+ gf_boolean_t optimistic_change_log;
+ gf_boolean_t eager_lock;
+ gf_boolean_t pre_op_compat; /* on/off */
+ uint32_t post_op_delay_secs;
+ unsigned int quorum_count;
+ gf_boolean_t quorum_reads;
+
+ char vol_uuid[UUID_SIZE + 1];
+ int32_t *last_event;
+
+ /* @event_generation: Keeps count of number of events received which can
+ potentially impact consistency decisions. The events are CHILD_UP
+ and CHILD_DOWN, when we have to recalculate the freshness/staleness
+ of copies to detect if changes had happened while the other server
+ was down. CHILD_DOWN and CHILD_UP can also be received on network
+ disconnect/reconnects and not necessarily server going down/up.
+ Recalculating freshness/staleness on network events is equally
+ important as we might have had a network split brain.
+ */
+ uint32_t event_generation;
+
+ gf_boolean_t choose_local;
+ gf_boolean_t did_discovery;
+ uint64_t sh_readdir_size;
+ gf_boolean_t ensure_durability;
+ char *sh_domain;
+ char *afr_dirty;
+
+ afr_self_heald_t shd;
+
+ gf_boolean_t consistent_metadata;
+ uint64_t spb_choice_timeout;
+ gf_boolean_t need_heal;
+
+ /* pump dependencies */
+ void *pump_private;
+ gf_boolean_t use_afr_in_pump;
+ char *locking_scheme;
+ gf_boolean_t esh_granular;
+} afr_private_t;
- int (*flush_self_heal_cbk) (call_frame_t *frame, xlator_t *this);
- int (*completion_cbk) (call_frame_t *frame, xlator_t *this);
- int (*algo_completion_cbk) (call_frame_t *frame, xlator_t *this);
- int (*algo_abort_cbk) (call_frame_t *frame, xlator_t *this);
+typedef enum {
+ AFR_DATA_TRANSACTION, /* truncate, write, ... */
+ AFR_METADATA_TRANSACTION, /* chmod, chown, ... */
+ AFR_ENTRY_TRANSACTION, /* create, rmdir, ... */
+ AFR_ENTRY_RENAME_TRANSACTION, /* rename */
+} afr_transaction_type;
- call_frame_t *sh_frame;
-} afr_self_heal_t;
+typedef enum {
+ AFR_TRANSACTION_LK,
+ AFR_SELFHEAL_LK,
+} transaction_lk_type_t;
+typedef enum {
+ AFR_LOCK_OP,
+ AFR_UNLOCK_OP,
+} afr_lock_op_type_t;
typedef enum {
- AFR_DATA_TRANSACTION, /* truncate, write, ... */
- AFR_METADATA_TRANSACTION, /* chmod, chown, ... */
- AFR_ENTRY_TRANSACTION, /* create, rmdir, ... */
- AFR_ENTRY_RENAME_TRANSACTION, /* rename */
- AFR_FLUSH_TRANSACTION, /* flush */
-} afr_transaction_type;
+ AFR_DATA_SELF_HEAL_LK,
+ AFR_METADATA_SELF_HEAL_LK,
+ AFR_ENTRY_SELF_HEAL_LK,
+}selfheal_lk_type_t;
+typedef enum {
+ AFR_INODELK_TRANSACTION,
+ AFR_INODELK_NB_TRANSACTION,
+ AFR_ENTRYLK_TRANSACTION,
+ AFR_ENTRYLK_NB_TRANSACTION,
+ AFR_INODELK_SELFHEAL,
+ AFR_INODELK_NB_SELFHEAL,
+ AFR_ENTRYLK_SELFHEAL,
+ AFR_ENTRYLK_NB_SELFHEAL,
+} afr_lock_call_type_t;
/*
xattr format: trusted.afr.volume = [x y z]
@@ -177,9 +201,8 @@ static inline int
afr_index_for_transaction_type (afr_transaction_type type)
{
switch (type) {
-
+
case AFR_DATA_TRANSACTION:
- case AFR_FLUSH_TRANSACTION:
return 0;
case AFR_METADATA_TRANSACTION:
@@ -193,441 +216,741 @@ afr_index_for_transaction_type (afr_transaction_type type)
return -1; /* make gcc happy */
}
+static inline int
+afr_index_from_ia_type (ia_type_t type)
+{
+ switch (type) {
+ case IA_IFDIR:
+ return afr_index_for_transaction_type (AFR_ENTRY_TRANSACTION);
+ case IA_IFREG:
+ return afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
+ default: return -1;
+ }
+}
+
+typedef struct {
+ loc_t loc;
+ char *basename;
+ unsigned char *locked_nodes;
+ int locked_count;
+
+} afr_entry_lockee_t;
+
+int
+afr_entry_lockee_cmp (const void *l1, const void *l2);
+
+typedef struct {
+ char *domain; /* Domain on which inodelk is taken */
+ struct gf_flock flock;
+ unsigned char *locked_nodes;
+ int32_t lock_count;
+} afr_inodelk_t;
+
+typedef struct {
+ loc_t *lk_loc;
+
+ int lockee_count;
+ afr_entry_lockee_t lockee[AFR_LOCKEE_COUNT_MAX];
+
+ afr_inodelk_t inodelk[AFR_DOM_COUNT_MAX];
+ const char *lk_basename;
+ const char *lower_basename;
+ const char *higher_basename;
+ char lower_locked;
+ char higher_locked;
+
+ unsigned char *locked_nodes;
+ unsigned char *lower_locked_nodes;
+
+ selfheal_lk_type_t selfheal_lk_type;
+ transaction_lk_type_t transaction_lk_type;
+
+ int32_t lock_count;
+ int32_t entrylk_lock_count;
+
+ uint64_t lock_number;
+ int32_t lk_call_count;
+ int32_t lk_expected_count;
+ int32_t lk_attempted_count;
+
+ int32_t lock_op_ret;
+ int32_t lock_op_errno;
+ afr_lock_cbk_t lock_cbk;
+ char *domain; /* Domain on which inode/entry lock/unlock in progress.*/
+} afr_internal_lock_t;
+
+struct afr_reply {
+ int valid;
+ int32_t op_ret;
+ int32_t op_errno;
+ dict_t *xattr;/*For xattrop*/
+ dict_t *xdata;
+ struct iatt poststat;
+ struct iatt postparent;
+ struct iatt prestat;
+ struct iatt preparent;
+ struct iatt preparent2;
+ struct iatt postparent2;
+ /* For rchecksum */
+ uint8_t checksum[MD5_DIGEST_LENGTH];
+ gf_boolean_t buf_has_zeroes;
+ /* For lookup */
+ int8_t need_heal;
+};
typedef enum {
- AFR_CHILD_UP_FLUSH,
- AFR_CHILD_DOWN_FLUSH,
-} afr_flush_type;
+ AFR_FD_NOT_OPENED,
+ AFR_FD_OPENED,
+ AFR_FD_OPENING
+} afr_fd_open_status_t;
+
+typedef struct {
+ unsigned int *pre_op_done[AFR_NUM_CHANGE_LOGS];
+ int inherited[AFR_NUM_CHANGE_LOGS];
+ int on_disk[AFR_NUM_CHANGE_LOGS];
+ afr_fd_open_status_t *opened_on; /* which subvolumes the fd is open on */
+
+ unsigned int *lock_piggyback;
+ unsigned int *lock_acquired;
+
+ int flags;
+
+ /* used for delayed-post-op optimization */
+ pthread_mutex_t delay_lock;
+ gf_timer_t *delay_timer;
+ call_frame_t *delay_frame;
+
+ /* set if any write on this fd was a non stable write
+ (i.e, without O_SYNC or O_DSYNC)
+ */
+ gf_boolean_t witnessed_unstable_write;
+
+ /* @open_fd_count:
+ Number of open FDs queried from the server, as queried through
+ xdata in FOPs. Currently, used to decide if eager-locking must be
+ temporarily disabled.
+ */
+ uint32_t open_fd_count;
+
+
+ /* list of frames currently in progress */
+ struct list_head eager_locked;
+
+ /* the subvolume on which the latest sequence of readdirs (starting
+ at offset 0) has begun. Till the next readdir request with 0 offset
+ arrives, we continue to read off this subvol.
+ */
+ int readdir_subvol;
+} afr_fd_ctx_t;
typedef struct _afr_local {
- unsigned int call_count;
- unsigned int success_count;
- unsigned int enoent_count;
+ glusterfs_fop_t op;
+ unsigned int call_count;
+
+ /* @event_generation: copy of priv->event_generation taken at the
+ time of starting the transaction. The copy is made so that we
+ have a stable value through the various phases of the transaction.
+ */
+ unsigned int event_generation;
- unsigned int govinda_gOvinda;
+ uint32_t open_fd_count;
+ gf_boolean_t update_open_fd_count;
- unsigned int read_child_index;
- unsigned char read_child_returned;
- unsigned int first_up_child;
+ gf_lkowner_t saved_lk_owner;
- pid_t saved_pid;
+ int32_t op_ret;
+ int32_t op_errno;
- int32_t op_ret;
- int32_t op_errno;
+ int32_t **pending;
- int32_t **pending;
+ int dirty[AFR_NUM_CHANGE_LOGS];
- loc_t loc;
- loc_t newloc;
+ loc_t loc;
+ loc_t newloc;
- fd_t *fd;
+ fd_t *fd;
+ afr_fd_ctx_t *fd_ctx;
- glusterfs_fop_t fop;
+ /* @child_up: copy of priv->child_up taken at the time of transaction
+ start. The copy is taken so that we have a stable child_up array
+ through the phases of the transaction as priv->child_up[i] can keep
+ changing through time.
+ */
+ unsigned char *child_up;
+
+ /* @read_attempted:
+ array of flags representing subvolumes where read operations of
+ the read transaction have already been attempted. The array is
+ first pre-filled with down subvolumes, and as reads are performed
+ on other subvolumes, those are set as well. This way if the read
+ operation fails we do not retry on that subvolume again.
+ */
+ unsigned char *read_attempted;
- unsigned char *child_up;
+ /* @readfn:
- int32_t *child_errno;
-
- dict_t *xattr_req;
- int open_fd_count;
+ pointer to function which will perform the read operation on a given
+ subvolume. Used in read transactions.
+ */
- int32_t inodelk_count;
- int32_t entrylk_count;
+ afr_read_txn_wind_t readfn;
- int (*up_down_flush_cbk) (call_frame_t *, xlator_t *);
+ /* @refreshed:
- /*
- This struct contains the arguments for the "continuation"
- (scheme-like) of fops
+ the inode was "refreshed" (i.e, pending xattrs from all subvols
+ freshly inspected and inode ctx updated accordingly) as part of
+ this transaction already.
*/
+ gf_boolean_t refreshed;
- int op;
- struct {
- struct {
- unsigned char buf_set;
- struct statvfs buf;
- } statfs;
+ /* @inode:
- struct {
- inode_t *inode;
- struct iatt buf;
- struct iatt read_child_buf;
- struct iatt postparent;
- ino_t ino;
- uint64_t gen;
- ino_t parent_ino;
- dict_t *xattr;
- dict_t **xattrs;
- gf_boolean_t is_revalidate;
- } lookup;
+ the inode on which the read txn is performed on. ref'ed and copied
+ from either fd->inode or loc.inode
+ */
- struct {
- int32_t flags;
- int32_t wbflags;
- } open;
+ inode_t *inode;
- struct {
- int32_t cmd;
- struct flock flock;
- unsigned char *locked_nodes;
- } lk;
+ /* @parent[2]:
- struct {
- uint8_t *file_checksum;
- uint8_t *dir_checksum;
- } checksum;
+ parent inode[s] on which directory transactions are performed.
+ */
- /* inode read */
+ inode_t *parent;
+ inode_t *parent2;
- struct {
- int32_t mask;
- int last_tried; /* index of the child we tried previously */
- } access;
+ /* @readable:
- struct {
- int last_tried;
- ino_t ino;
- } stat;
+ array of flags representing servers from which a read can be
+ performed. This is the output of afr_inode_refresh()
+ */
+ unsigned char *readable;
+ unsigned char *readable2; /*For rename transaction*/
- struct {
- int last_tried;
- ino_t ino;
- } fstat;
+ afr_inode_refresh_cbk_t refreshfn;
- struct {
- size_t size;
- int last_tried;
- ino_t ino;
- } readlink;
+ /* @refreshinode:
- struct {
- char *name;
- int last_tried;
- } getxattr;
+ Inode currently getting refreshed.
+ */
+ inode_t *refreshinode;
- struct {
- ino_t ino;
- size_t size;
- off_t offset;
- int last_tried;
- } readv;
+ /*To handle setattr/setxattr on yet to be linked inode from dht*/
+ uuid_t refreshgfid;
- /* dir read */
+ /*
+ @pre_op_compat:
- struct {
- int success_count;
- int32_t op_ret;
- int32_t op_errno;
+ compatibility mode of pre-op. send a separate pre-op and
+ op operations as part of transaction, rather than combining
+ */
- uint32_t *checksum;
- } opendir;
+ gf_boolean_t pre_op_compat;
- struct {
- int32_t op_ret;
- int32_t op_errno;
- size_t size;
- off_t offset;
+ dict_t *xattr_req;
- gf_boolean_t failed;
- int last_tried;
- } readdir;
+ afr_internal_lock_t internal_lock;
- struct {
- int32_t op_ret;
- int32_t op_errno;
+ dict_t *dict;
- size_t size;
- off_t offset;
- int32_t flag;
+ int optimistic_change_log;
+ gf_boolean_t delayed_post_op;
- int last_tried;
- } getdents;
+ /* Is the current writev() going to perform a stable write?
+ i.e, is fd->flags or @flags writev param have O_SYNC or
+ O_DSYNC?
+ */
+ gf_boolean_t stable_write;
- /* inode write */
+ /* This write appended to the file. Nnot necessarily O_APPEND,
+ just means the offset of write was at the end of file.
+ */
+ gf_boolean_t append_write;
- struct {
- ino_t ino;
- struct iatt prebuf;
- struct iatt postbuf;
+ /*
+ This struct contains the arguments for the "continuation"
+ (scheme-like) of fops
+ */
- int32_t op_ret;
+ struct {
+ struct {
+ gf_boolean_t needs_fresh_lookup;
+ uuid_t gfid_req;
+ } lookup;
- struct iovec *vector;
- struct iobref *iobref;
- int32_t count;
- off_t offset;
- } writev;
+ struct {
+ unsigned char buf_set;
+ struct statvfs buf;
+ } statfs;
+
+ struct {
+ int32_t flags;
+ } open;
+
+ struct {
+ int32_t cmd;
+ struct gf_flock user_flock;
+ struct gf_flock ret_flock;
+ unsigned char *locked_nodes;
+ } lk;
+
+ /* inode read */
+
+ struct {
+ int32_t mask;
+ int last_index; /* index of the child we tried previously */
+ } access;
+
+ struct {
+ int last_index;
+ } stat;
+
+ struct {
+ int last_index;
+ } fstat;
+
+ struct {
+ size_t size;
+ int last_index;
+ } readlink;
+
+ struct {
+ char *name;
+ int last_index;
+ long xattr_len;
+ } getxattr;
+
+ struct {
+ size_t size;
+ off_t offset;
+ int last_index;
+ uint32_t flags;
+ } readv;
+
+ /* dir read */
+
+ struct {
+ int success_count;
+ int32_t op_ret;
+ int32_t op_errno;
+
+ uint32_t *checksum;
+ } opendir;
+
+ struct {
+ int32_t op_ret;
+ int32_t op_errno;
+ size_t size;
+ off_t offset;
+ dict_t *dict;
+ gf_boolean_t failed;
+ int last_index;
+ } readdir;
+ /* inode write */
struct {
- ino_t ino;
struct iatt prebuf;
struct iatt postbuf;
- } fsync;
+ } inode_wfop; //common structure for all inode-write-fops
- struct {
- ino_t ino;
- off_t offset;
- struct iatt prebuf;
- struct iatt postbuf;
- } truncate;
+ struct {
+ int32_t op_ret;
- struct {
- ino_t ino;
- off_t offset;
- struct iatt prebuf;
- struct iatt postbuf;
- } ftruncate;
+ struct iovec *vector;
+ struct iobref *iobref;
+ int32_t count;
+ off_t offset;
+ uint32_t flags;
+ } writev;
- struct {
- ino_t ino;
- struct iatt in_buf;
+ struct {
+ off_t offset;
+ } truncate;
+
+ struct {
+ off_t offset;
+ } ftruncate;
+
+ struct {
+ struct iatt in_buf;
int32_t valid;
- struct iatt preop_buf;
- struct iatt postop_buf;
- } setattr;
+ } setattr;
- struct {
- ino_t ino;
- struct iatt in_buf;
+ struct {
+ struct iatt in_buf;
int32_t valid;
- struct iatt preop_buf;
- struct iatt postop_buf;
- } fsetattr;
+ } fsetattr;
- struct {
- dict_t *dict;
- int32_t flags;
- } setxattr;
+ struct {
+ dict_t *dict;
+ int32_t flags;
+ } setxattr;
- struct {
- char *name;
- } removexattr;
+ struct {
+ dict_t *dict;
+ int32_t flags;
+ } fsetxattr;
- /* dir write */
-
- struct {
- ino_t ino;
- uint64_t gen;
- ino_t parent_ino;
- fd_t *fd;
- int32_t flags;
- mode_t mode;
- inode_t *inode;
- struct iatt buf;
- struct iatt preparent;
- struct iatt postparent;
- struct iatt read_child_buf;
- } create;
+ struct {
+ char *name;
+ } removexattr;
- struct {
- ino_t ino;
- uint64_t gen;
- ino_t parent_ino;
- dev_t dev;
- mode_t mode;
- inode_t *inode;
- struct iatt buf;
+ struct {
+ dict_t *xattr;
+ gf_xattrop_flags_t optype;
+ } xattrop;
+
+ /* dir write */
+
+ struct {
+ inode_t *inode;
+ struct iatt buf;
struct iatt preparent;
struct iatt postparent;
- struct iatt read_child_buf;
- } mknod;
+ struct iatt prenewparent;
+ struct iatt postnewparent;
+ } dir_fop; //common structure for all dir fops
+
+ struct {
+ fd_t *fd;
+ dict_t *params;
+ int32_t flags;
+ mode_t mode;
+ } create;
+
+ struct {
+ dev_t dev;
+ mode_t mode;
+ dict_t *params;
+ } mknod;
+
+ struct {
+ int32_t mode;
+ dict_t *params;
+ } mkdir;
+
+ struct {
+ int flags;
+ } rmdir;
+
+ struct {
+ dict_t *params;
+ char *linkpath;
+ } symlink;
struct {
- ino_t ino;
- uint64_t gen;
- ino_t parent_ino;
int32_t mode;
- inode_t *inode;
- struct iatt buf;
- struct iatt read_child_buf;
- struct iatt preparent;
- struct iatt postparent;
- } mkdir;
+ off_t offset;
+ size_t len;
+ } fallocate;
struct {
- ino_t parent_ino;
- int32_t op_ret;
- int32_t op_errno;
- struct iatt preparent;
- struct iatt postparent;
- } unlink;
+ off_t offset;
+ size_t len;
+ } discard;
- struct {
- ino_t parent_ino;
- int32_t op_ret;
- int32_t op_errno;
- struct iatt preparent;
- struct iatt postparent;
- } rmdir;
+ struct {
+ off_t offset;
+ off_t len;
+ struct iatt prebuf;
+ struct iatt postbuf;
+ } zerofill;
- struct {
- ino_t oldparent_ino;
- ino_t newparent_ino;
- ino_t ino;
- struct iatt buf;
- struct iatt read_child_buf;
- struct iatt preoldparent;
- struct iatt prenewparent;
- struct iatt postoldparent;
- struct iatt postnewparent;
- } rename;
+ struct {
+ char *volume;
+ int32_t cmd;
+ struct gf_flock flock;
+ } inodelk;
- struct {
- ino_t ino;
- uint64_t gen;
- ino_t parent_ino;
- inode_t *inode;
- struct iatt buf;
- struct iatt read_child_buf;
- struct iatt preparent;
- struct iatt postparent;
- } link;
+ struct {
+ off_t offset;
+ gf_seek_what_t what;
+ } seek;
- struct {
- ino_t ino;
- uint64_t gen;
- ino_t parent_ino;
- inode_t *inode;
- struct iatt buf;
- struct iatt read_child_buf;
- char *linkpath;
- struct iatt preparent;
- struct iatt postparent;
- } symlink;
+ } cont;
- struct {
- int32_t flags;
- dir_entry_t *entries;
- int32_t count;
- } setdents;
- } cont;
-
- struct {
- off_t start, len;
+ struct {
+ off_t start, len;
+
+ gf_boolean_t eager_lock_on;
+ int *eager_lock;
+
+ char *basename;
+ char *new_basename;
+
+ loc_t parent_loc;
+ loc_t new_parent_loc;
+
+ afr_transaction_type type;
+
+ /* stub to resume on destruction
+ of the transaction frame */
+ call_stub_t *resume_stub;
+
+ struct list_head eager_locked;
+
+ unsigned char *pre_op;
- unsigned char *locked_nodes;
- int lock_count;
+ /* For arbiter configuration only. */
+ dict_t **pre_op_xdata;
+ unsigned char *pre_op_sources;
- char *basename;
- char *new_basename;
+ /* @failed_subvols: subvolumes on which a pre-op or a
+ FOP failed. */
+ unsigned char *failed_subvols;
- loc_t parent_loc;
- loc_t new_parent_loc;
+ /* @dirtied: flag which indicates whether we set dirty flag
+ in the OP. Typically true when we are performing operation
+ on more than one subvol and optimistic changelog is disabled
- afr_transaction_type type;
+ A 'true' value set in @dirtied flag means an 'undirtying'
+ has to be done in POST-OP phase.
+ */
+ gf_boolean_t dirtied;
- int success_count;
- int erase_pending;
- int failure_count;
+ /* @inherited: flag which indicates that the dirty flags
+ of the previous transaction were inherited
+ */
+ gf_boolean_t inherited;
- int last_tried;
- int32_t *child_errno;
+ /*
+ @no_uninherit: flag which indicates that a pre_op_uninherit()
+ must _not_ be attempted (and returned as failure) always. This
+ flag is set when a hard pre-op is performed, but not accounted
+ for it in fd_ctx->on_disk[]. Such transactions are "isolated"
+ from the pre-op piggybacking entirely and therefore uninherit
+ must not be attempted.
+ */
+ gf_boolean_t no_uninherit;
- call_frame_t *main_frame;
+ /* @uninherit_done:
+ @uninherit_value:
- int (*fop) (call_frame_t *frame, xlator_t *this);
+ The above pair variables make pre_op_uninherit() idempotent.
+ Both are FALSE initially. The first call to pre_op_uninherit
+ sets @uninherit_done to TRUE and the return value to
+ @uninherit_value. Further calls will check for @uninherit_done
+ to be TRUE and if so will simply return @uninherit_value.
+ */
+ gf_boolean_t uninherit_done;
+ gf_boolean_t uninherit_value;
- int (*done) (call_frame_t *frame, xlator_t *this);
+ /* @changelog_resume: function to be called after changlogging
+ (either pre-op or post-op) is done
+ */
- int (*resume) (call_frame_t *frame, xlator_t *this);
+ afr_changelog_resume_t changelog_resume;
- int (*unwind) (call_frame_t *frame, xlator_t *this);
+ call_frame_t *main_frame;
+
+ int (*wind) (call_frame_t *frame, xlator_t *this, int subvol);
+
+ int (*fop) (call_frame_t *frame, xlator_t *this);
+
+ int (*done) (call_frame_t *frame, xlator_t *this);
+
+ int (*resume) (call_frame_t *frame, xlator_t *this);
+
+ int (*unwind) (call_frame_t *frame, xlator_t *this);
/* post-op hook */
- int (*post_post_op) (call_frame_t *frame, xlator_t *this);
- } transaction;
+ } transaction;
- afr_self_heal_t self_heal;
-} afr_local_t;
+ syncbarrier_t barrier;
+ /* extra data for fops */
+ dict_t *xdata_req;
+ dict_t *xdata_rsp;
-typedef struct {
- unsigned char *pre_op_done;
- unsigned char *opened_on; /* which subvolumes the fd is open on */
- unsigned char *child_failed;
- int flags;
- int32_t wbflags;
- uint64_t up_count; /* number of CHILD_UPs this fd has seen */
- uint64_t down_count; /* number of CHILD_DOWNs this fd has seen */
+ dict_t *xattr_rsp; /*for [f]xattrop*/
- int32_t last_tried;
- gf_boolean_t failed_over;
- struct list_head entries; /* needed for readdir failover */
-} afr_fd_ctx_t;
+ mode_t umask;
+ int xflag;
+ gf_boolean_t do_discovery;
+ struct afr_reply *replies;
+ /* For client side background heals. */
+ struct list_head healer;
+ call_frame_t *heal_frame;
+
+ gf_boolean_t need_full_crawl;
+} afr_local_t;
-/* try alloc and if it fails, goto label */
-#define ALLOC_OR_GOTO(var, type, label) do { \
- var = GF_CALLOC (sizeof (type), 1, \
- gf_afr_mt_##type); \
- if (!var) { \
- gf_log (this->name, GF_LOG_ERROR, \
- "out of memory :("); \
- op_errno = ENOMEM; \
- goto label; \
- } \
- } while (0);
+typedef struct _afr_inode_ctx {
+ uint64_t read_subvol;
+ int spb_choice;
+ gf_timer_t *timer;
+} afr_inode_ctx_t;
+
+typedef struct afr_spbc_timeout {
+ call_frame_t *frame;
+ gf_boolean_t d_spb;
+ gf_boolean_t m_spb;
+ loc_t *loc;
+ int spb_child_index;
+} afr_spbc_timeout_t;
+
+typedef struct afr_spb_status {
+ call_frame_t *frame;
+ loc_t *loc;
+} afr_spb_status_t;
+
+typedef struct afr_empty_brick_args {
+ call_frame_t *frame;
+ loc_t loc;
+ int empty_index;
+ char *op_type;
+} afr_empty_brick_args_t;
+
+typedef struct afr_read_subvol_args {
+ ia_type_t ia_type;
+ uuid_t gfid;
+} afr_read_subvol_args_t;
+
+typedef struct afr_granular_esh_args {
+ fd_t *heal_fd;
+ xlator_t *xl;
+ call_frame_t *frame;
+ gf_boolean_t mismatch; /* flag to represent occurrence of type/gfid
+ mismatch */
+} afr_granular_esh_args_t;
/* did a call fail due to a child failing? */
-#define child_went_down(op_ret, op_errno) (((op_ret) < 0) && \
- ((op_errno == ENOTCONN) || \
- (op_errno == EBADFD)))
+#define child_went_down(op_ret, op_errno) (((op_ret) < 0) && \
+ ((op_errno == ENOTCONN) || \
+ (op_errno == EBADFD)))
-#define afr_fop_failed(op_ret, op_errno) ((op_ret) == -1)
+int
+afr_inode_get_readable (call_frame_t *frame, inode_t *inode, xlator_t *this,
+ unsigned char *readable, int *event_p, int type);
+int
+afr_inode_read_subvol_get (inode_t *inode, xlator_t *this,
+ unsigned char *data_subvols,
+ unsigned char *metadata_subvols,
+ int *event_generation);
+int
+__afr_inode_read_subvol_get (inode_t *inode, xlator_t *this,
+ unsigned char *data_subvols,
+ unsigned char *metadata_subvols,
+ int *event_generation);
+
+int
+__afr_inode_read_subvol_set (inode_t *inode, xlator_t *this,
+ unsigned char *data_subvols,
+ unsigned char *metadata_subvol,
+ int event_generation);
+int
+afr_inode_read_subvol_set (inode_t *inode, xlator_t *this,
+ unsigned char *data_subvols,
+ unsigned char *metadata_subvols,
+ int event_generation);
-/* have we tried all children? */
-#define all_tried(i, count) ((i) == (count) - 1)
+int
+afr_inode_read_subvol_reset (inode_t *inode, xlator_t *this);
int
-afr_fd_ctx_set (xlator_t *this, fd_t *fd);
+afr_read_subvol_select_by_policy (inode_t *inode, xlator_t *this,
+ unsigned char *readable,
+ afr_read_subvol_args_t *args);
-uint64_t
-afr_read_child (xlator_t *this, inode_t *inode);
+int
+afr_inode_read_subvol_type_get (inode_t *inode, xlator_t *this,
+ unsigned char *readable, int *event_p,
+ int type);
+int
+afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p,
+ unsigned char *readables,
+ int *event_p, afr_transaction_type type,
+ afr_read_subvol_args_t *args);
+
+#define afr_data_subvol_get(i, t, s, r, e, a) \
+ afr_read_subvol_get(i, t, s, r, e, AFR_DATA_TRANSACTION, a)
+
+#define afr_metadata_subvol_get(i, t, s, r, e, a) \
+ afr_read_subvol_get(i, t, s, r, e, AFR_METADATA_TRANSACTION, a)
+
+int
+afr_inode_refresh (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ uuid_t gfid, afr_inode_refresh_cbk_t cbk);
+
+int32_t
+afr_notify (xlator_t *this, int32_t event, void *data, void *data2);
+
+int
+xattr_is_equal (dict_t *this, char *key1, data_t *value1, void *data);
+
+int
+afr_init_entry_lockee (afr_entry_lockee_t *lockee, afr_local_t *local,
+ loc_t *loc, char *basename, int child_count);
void
-afr_set_read_child (xlator_t *this, inode_t *inode, int32_t read_child);
+afr_entry_lockee_cleanup (afr_internal_lock_t *int_lock);
+
+int
+afr_attempt_lock_recovery (xlator_t *this, int32_t child_index);
+
+int
+afr_mark_locked_nodes (xlator_t *this, fd_t *fd,
+ unsigned char *locked_nodes);
void
-afr_build_parent_loc (loc_t *parent, loc_t *child);
+afr_set_lk_owner (call_frame_t *frame, xlator_t *this, void *lk_owner);
int
-afr_up_children_count (int child_count, unsigned char *child_up);
+afr_set_lock_number (call_frame_t *frame, xlator_t *this);
+
+int32_t
+afr_unlock (call_frame_t *frame, xlator_t *this);
int
-afr_locked_nodes_count (unsigned char *locked_nodes, int child_count);
+afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this);
-ino64_t
-afr_itransform (ino64_t ino, int child_count, int child_index);
+int
+afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this);
int
-afr_deitransform (ino64_t ino, int child_count);
+afr_blocking_lock (call_frame_t *frame, xlator_t *this);
-void
-afr_local_cleanup (afr_local_t *local, xlator_t *this);
+int
+afr_internal_lock_finish (call_frame_t *frame, xlator_t *this);
int
-afr_frame_return (call_frame_t *frame);
+afr_lk_transfer_datalock (call_frame_t *dst, call_frame_t *src, char *dom,
+ unsigned int child_count);
-uint64_t
-afr_is_split_brain (xlator_t *this, inode_t *inode);
+int
+__afr_fd_ctx_set (xlator_t *this, fd_t *fd);
-void
-afr_set_split_brain (xlator_t *this, inode_t *inode, gf_boolean_t set);
+int
+afr_fd_ctx_set (xlator_t *this, fd_t *fd);
+
+afr_fd_ctx_t *
+afr_fd_ctx_get (fd_t *fd, xlator_t *this);
int
-afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int32_t wbflags);
+afr_build_parent_loc (loc_t *parent, loc_t *child, int32_t *op_errno);
int
-afr_up_down_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, afr_flush_type type);
+afr_locked_nodes_count (unsigned char *locked_nodes, int child_count);
+
+int
+afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ gf_boolean_t *start_heal);
void
-afr_set_opendir_done (xlator_t *this, inode_t *inode);
+afr_local_replies_wipe (afr_local_t *local, afr_private_t *priv);
-uint64_t
-afr_is_opendir_done (xlator_t *this, inode_t *inode);
+void
+afr_local_cleanup (afr_local_t *local, xlator_t *this);
+
+int
+afr_frame_return (call_frame_t *frame);
+
+int
+afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata);
void
afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this);
@@ -635,138 +958,195 @@ afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this);
int
afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd);
-#define AFR_STACK_UNWIND(fop, frame, params ...) \
- do { \
- afr_local_t *__local = NULL; \
- xlator_t *__this = NULL; \
- if (frame) { \
- __local = frame->local; \
- __this = frame->this; \
- frame->local = NULL; \
+#define AFR_STACK_UNWIND(fop, frame, params ...) \
+ do { \
+ afr_local_t *__local = NULL; \
+ xlator_t *__this = NULL; \
+ if (frame) { \
+ __local = frame->local; \
+ __this = frame->this; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT (fop, frame, params); \
+ if (__local) { \
+ afr_local_cleanup (__local, __this); \
+ mem_put (__local); \
+ } \
+ } while (0)
+
+#define AFR_STACK_DESTROY(frame) \
+ do { \
+ afr_local_t *__local = NULL; \
+ xlator_t *__this = NULL; \
+ __local = frame->local; \
+ __this = frame->this; \
+ frame->local = NULL; \
+ STACK_DESTROY (frame->root); \
+ if (__local) { \
+ afr_local_cleanup (__local, __this); \
+ mem_put (__local); \
} \
- STACK_UNWIND_STRICT (fop, frame, params); \
- afr_local_cleanup (__local, __this); \
- GF_FREE (__local); \
} while (0);
-#define AFR_STACK_DESTROY(frame) \
- do { \
- afr_local_t *__local = NULL; \
- xlator_t *__this = NULL; \
- __local = frame->local; \
- __this = frame->this; \
- frame->local = NULL; \
- STACK_DESTROY (frame->root); \
- afr_local_cleanup (__local, __this); \
- GF_FREE (__local); \
- } while (0);
+#define AFR_FRAME_INIT(frame, op_errno) \
+ ({frame->local = mem_get0 (THIS->local_pool); \
+ if (afr_local_init (frame->local, THIS->private, &op_errno)) { \
+ afr_local_cleanup (frame->local, THIS); \
+ mem_put (frame->local); \
+ frame->local = NULL; }; \
+ frame->local;})
+
+#define AFR_STACK_RESET(frame) \
+ do { \
+ afr_local_t *__local = NULL; \
+ xlator_t *__this = NULL; \
+ __local = frame->local; \
+ __this = frame->this; \
+ frame->local = NULL; \
+ int __opr; \
+ STACK_RESET (frame->root); \
+ if (__local) { \
+ afr_local_cleanup (__local, __this); \
+ mem_put (__local); \
+ } \
+ AFR_FRAME_INIT (frame, __opr); \
+ } while (0)
/* allocate and return a string that is the basename of argument */
-static inline char *
-AFR_BASENAME (const char *str)
+static inline char *
+AFR_BASENAME (const char *str)
{
- char *__tmp_str = NULL;
- char *__basename_str = NULL;
- __tmp_str = gf_strdup (str);
- __basename_str = gf_strdup (basename (__tmp_str));
- GF_FREE (__tmp_str);
- return __basename_str;
+ char *__tmp_str = NULL;
+ char *__basename_str = NULL;
+ __tmp_str = gf_strdup (str);
+ __basename_str = gf_strdup (basename (__tmp_str));
+ GF_FREE (__tmp_str);
+ return __basename_str;
}
-/* initialize local_t */
-static inline int
-AFR_LOCAL_INIT (afr_local_t *local, afr_private_t *priv)
-{
- local->child_up = GF_CALLOC (sizeof (*local->child_up),
- priv->child_count,
- gf_afr_mt_char);
- if (!local->child_up) {
- return -ENOMEM;
- }
+call_frame_t *
+afr_copy_frame (call_frame_t *base);
+
+int
+afr_transaction_local_init (afr_local_t *local, xlator_t *this);
- memcpy (local->child_up, priv->child_up,
- sizeof (*local->child_up) * priv->child_count);
+int32_t
+afr_marker_getxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name,afr_local_t *local, afr_private_t *priv );
+
+int
+afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno);
+int
+afr_internal_lock_init (afr_internal_lock_t *lk, size_t child_count,
+ transaction_lk_type_t lk_type);
- local->call_count = afr_up_children_count (priv->child_count, local->child_up);
- if (local->call_count == 0)
- return -ENOTCONN;
+int
+afr_higher_errno (int32_t old_errno, int32_t new_errno);
- local->transaction.erase_pending = 1;
+int
+afr_final_errno (afr_local_t *local, afr_private_t *priv);
- local->op_ret = -1;
- local->op_errno = EUCLEAN;
+int
+afr_xattr_req_prepare (xlator_t *this, dict_t *xattr_req);
- return 0;
-}
+void
+afr_fix_open (fd_t *fd, xlator_t *this);
+
+afr_fd_ctx_t *
+afr_fd_ctx_get (fd_t *fd, xlator_t *this);
+
+void
+afr_set_low_priority (call_frame_t *frame);
+int
+afr_child_fd_ctx_set (xlator_t *this, fd_t *fd, int32_t child,
+ int flags);
+void
+afr_matrix_cleanup (int32_t **pending, unsigned int m);
+
+int32_t**
+afr_matrix_create (unsigned int m, unsigned int n);
+
+int**
+afr_mark_pending_changelog (afr_private_t *priv, unsigned char *pending,
+ dict_t *xattr, ia_type_t iat);
+
+void
+afr_filter_xattrs (dict_t *xattr);
-/**
- * first_up_child - return the index of the first child that is up
+/*
+ * Special value indicating we should use the "auto" quorum method instead of
+ * a fixed value (including zero to turn off quorum enforcement).
*/
+#define AFR_QUORUM_AUTO INT_MAX
-static inline int
-afr_first_up_child (afr_private_t *priv)
-{
- xlator_t ** children = NULL;
- int ret = -1;
- int i = 0;
-
- LOCK (&priv->lock);
- {
- children = priv->children;
- for (i = 0; i < priv->child_count; i++) {
- if (priv->child_up[i]) {
- ret = i;
- break;
- }
- }
- }
- UNLOCK (&priv->lock);
-
- return ret;
-}
+int
+afr_fd_report_unstable_write (xlator_t *this, fd_t *fd);
+gf_boolean_t
+afr_fd_has_witnessed_unstable_write (xlator_t *this, fd_t *fd);
-static inline int
-afr_transaction_local_init (afr_local_t *local, afr_private_t *priv)
-{
- int i;
-
- local->first_up_child = afr_first_up_child (priv);
-
- local->child_errno = GF_CALLOC (sizeof (*local->child_errno),
- priv->child_count,
- gf_afr_mt_int32_t);
- if (!local->child_errno) {
- return -ENOMEM;
- }
-
- local->pending = GF_CALLOC (sizeof (*local->pending),
- priv->child_count,
- gf_afr_mt_int32_t);
-
- if (!local->pending) {
- return -ENOMEM;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- local->pending[i] = GF_CALLOC (sizeof (*local->pending[i]),
- 3, /* data + metadata + entry */
- gf_afr_mt_int32_t);
- if (!local->pending[i])
- return -ENOMEM;
- }
-
- local->transaction.locked_nodes = GF_CALLOC (sizeof (*local->transaction.locked_nodes),
- priv->child_count,
- gf_afr_mt_char);
+void
+afr_delayed_changelog_wake_resume (xlator_t *this, fd_t *fd, call_stub_t *stub);
- local->transaction.child_errno = GF_CALLOC (sizeof (*local->transaction.child_errno),
- priv->child_count,
- gf_afr_mt_int32_t);
+int
+afr_inodelk_init (afr_inodelk_t *lk, char *dom, size_t child_count);
- return 0;
-}
+void
+afr_handle_open_fd_count (call_frame_t *frame, xlator_t *this);
+
+void
+afr_remove_eager_lock_stub (afr_local_t *local);
+
+void
+afr_replies_wipe (struct afr_reply *replies, int count);
+gf_boolean_t
+afr_xattrs_are_equal (dict_t *dict1, dict_t *dict2);
+
+gf_boolean_t
+afr_is_xattr_ignorable (char *key);
+
+int
+afr_get_heal_info (call_frame_t *frame, xlator_t *this, loc_t *loc);
+
+int
+afr_heal_splitbrain_file(call_frame_t *frame, xlator_t *this, loc_t *loc);
+
+int
+afr_get_split_brain_status (void *opaque);
+
+int
+afr_get_split_brain_status_cbk (int ret, call_frame_t *frame, void *opaque);
+
+int
+afr_inode_split_brain_choice_set (inode_t *inode, xlator_t *this,
+ int spb_choice);
+int
+afr_inode_split_brain_choice_get (inode_t *inode, xlator_t *this,
+ int *spb_choice);
+int
+afr_get_child_index_from_name (xlator_t *this, char *name);
+
+int
+afr_is_split_brain (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ uuid_t gfid, gf_boolean_t *d_spb, gf_boolean_t *m_spb);
+int
+afr_spb_choice_timeout_cancel (xlator_t *this, inode_t *inode);
+
+int
+afr_set_split_brain_choice (int ret, call_frame_t *frame, void *opaque);
+
+gf_boolean_t
+afr_get_need_heal (xlator_t *this);
+
+void
+afr_set_need_heal (xlator_t *this, afr_local_t *local);
+
+int
+afr_selfheal_data_open (xlator_t *this, inode_t *inode, fd_t **fd);
+
+int
+afr_get_msg_id (char *op_type);
#endif /* __AFR_H__ */
diff --git a/xlators/cluster/afr/src/pump.c b/xlators/cluster/afr/src/pump.c
new file mode 100644
index 00000000000..ef299ec5855
--- /dev/null
+++ b/xlators/cluster/afr/src/pump.c
@@ -0,0 +1,2470 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <unistd.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <fnmatch.h>
+
+#include "afr-common.c"
+#include "defaults.h"
+#include "glusterfs.h"
+#include "pump.h"
+#include "afr-messages.h"
+
+
+static int
+afr_set_dict_gfid (dict_t *dict, uuid_t gfid)
+{
+ int ret = 0;
+ uuid_t *pgfid = NULL;
+
+ GF_ASSERT (gfid);
+
+ pgfid = GF_CALLOC (1, sizeof (uuid_t), gf_common_mt_char);
+ if (!pgfid) {
+ ret = -1;
+ goto out;
+ }
+
+ gf_uuid_copy (*pgfid, gfid);
+
+ ret = dict_set_dynptr (dict, "gfid-req", pgfid, sizeof (uuid_t));
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_DICT_SET_FAILED, "gfid set failed");
+
+out:
+ if (ret && pgfid)
+ GF_FREE (pgfid);
+ return ret;
+}
+
+static int
+afr_set_root_gfid (dict_t *dict)
+{
+ uuid_t gfid;
+ int ret = 0;
+
+ memset (gfid, 0, 16);
+ gfid[15] = 1;
+
+ ret = afr_set_dict_gfid (dict, gfid);
+
+ return ret;
+}
+
+static int
+afr_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name)
+{
+ int ret = -1;
+ uuid_t pargfid = {0};
+
+ if (!child)
+ goto out;
+
+ if (!gf_uuid_is_null (parent->inode->gfid))
+ gf_uuid_copy (pargfid, parent->inode->gfid);
+ else if (!gf_uuid_is_null (parent->gfid))
+ gf_uuid_copy (pargfid, parent->gfid);
+
+ if (gf_uuid_is_null (pargfid))
+ goto out;
+
+ if (strcmp (parent->path, "/") == 0)
+ ret = gf_asprintf ((char **)&child->path, "/%s", name);
+ else
+ ret = gf_asprintf ((char **)&child->path, "%s/%s", parent->path,
+ name);
+
+ if (-1 == ret) {
+ }
+
+ child->name = strrchr (child->path, '/');
+ if (child->name)
+ child->name++;
+
+ child->parent = inode_ref (parent->inode);
+ child->inode = inode_new (parent->inode->table);
+ gf_uuid_copy (child->pargfid, pargfid);
+
+ if (!child->inode) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if ((ret == -1) && child)
+ loc_wipe (child);
+
+ return ret;
+}
+
+static void
+afr_build_root_loc (xlator_t *this, loc_t *loc)
+{
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ loc->path = gf_strdup ("/");
+ loc->name = "";
+ loc->inode = inode_ref (priv->root_inode);
+ gf_uuid_copy (loc->gfid, loc->inode->gfid);
+}
+
+static void
+afr_update_loc_gfids (loc_t *loc, struct iatt *buf, struct iatt *postparent)
+{
+ GF_ASSERT (loc);
+ GF_ASSERT (buf);
+
+ gf_uuid_copy (loc->gfid, buf->ia_gfid);
+ if (postparent)
+ gf_uuid_copy (loc->pargfid, postparent->ia_gfid);
+}
+
+static uint64_t pump_pid = 0;
+static void
+pump_fill_loc_info (loc_t *loc, struct iatt *iatt, struct iatt *parent)
+{
+ afr_update_loc_gfids (loc, iatt, parent);
+ gf_uuid_copy (loc->inode->gfid, iatt->ia_gfid);
+}
+
+static int
+pump_mark_start_pending (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ pump_priv->pump_start_pending = 1;
+
+ return 0;
+}
+
+static int
+is_pump_start_pending (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ return (pump_priv->pump_start_pending);
+}
+
+static int
+pump_remove_start_pending (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ pump_priv->pump_start_pending = 0;
+
+ return 0;
+}
+
+static pump_state_t
+pump_get_state ()
+{
+ xlator_t *this = NULL;
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ pump_state_t ret;
+
+ this = THIS;
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ LOCK (&pump_priv->pump_state_lock);
+ {
+ ret = pump_priv->pump_state;
+ }
+ UNLOCK (&pump_priv->pump_state_lock);
+
+ return ret;
+}
+
+int
+pump_change_state (xlator_t *this, pump_state_t state)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ pump_state_t state_old;
+ pump_state_t state_new;
+
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ GF_ASSERT (pump_priv);
+
+ LOCK (&pump_priv->pump_state_lock);
+ {
+ state_old = pump_priv->pump_state;
+ state_new = state;
+
+ pump_priv->pump_state = state;
+
+ }
+ UNLOCK (&pump_priv->pump_state_lock);
+
+ gf_msg_debug (this->name, 0,
+ "Pump changing state from %d to %d",
+ state_old, state_new);
+
+ return 0;
+}
+
+static int
+pump_set_resume_path (xlator_t *this, const char *path)
+{
+ int ret = 0;
+
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ GF_ASSERT (pump_priv);
+
+ LOCK (&pump_priv->resume_path_lock);
+ {
+ strncpy (pump_priv->resume_path, path, strlen (path) + 1);
+ }
+ UNLOCK (&pump_priv->resume_path_lock);
+
+ return ret;
+}
+
+static int
+pump_save_path (xlator_t *this, const char *path)
+{
+ afr_private_t *priv = NULL;
+ pump_state_t state;
+ dict_t *dict = NULL;
+ loc_t loc = {0};
+ int dict_ret = 0;
+ int ret = -1;
+
+ state = pump_get_state ();
+ if (state == PUMP_STATE_RESUME)
+ return 0;
+
+ priv = this->private;
+
+ GF_ASSERT (priv->root_inode);
+
+ afr_build_root_loc (this, &loc);
+
+ dict = dict_new ();
+ dict_ret = dict_set_str (dict, PUMP_PATH, (char *)path);
+ if (dict_ret)
+ gf_msg (this->name, GF_LOG_WARNING,
+ -dict_ret, AFR_MSG_DICT_SET_FAILED,
+ "%s: failed to set the key %s", path, PUMP_PATH);
+
+ ret = syncop_setxattr (PUMP_SOURCE_CHILD (this), &loc, dict, 0, NULL,
+ NULL);
+
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_INFO, -ret, AFR_MSG_INFO_COMMON,
+ "setxattr failed - could not save path=%s", path);
+ } else {
+ gf_msg_debug (this->name, 0,
+ "setxattr succeeded - saved path=%s", path);
+ }
+
+ dict_unref (dict);
+
+ loc_wipe (&loc);
+ return 0;
+}
+
+static int
+pump_check_and_update_status (xlator_t *this)
+{
+ pump_state_t state;
+ int ret = -1;
+
+ state = pump_get_state ();
+
+ switch (state) {
+
+ case PUMP_STATE_RESUME:
+ case PUMP_STATE_RUNNING:
+ {
+ ret = 0;
+ break;
+ }
+ case PUMP_STATE_PAUSE:
+ {
+ ret = -1;
+ break;
+ }
+ case PUMP_STATE_ABORT:
+ {
+ pump_save_path (this, "/");
+ ret = -1;
+ break;
+ }
+ default:
+ {
+ gf_msg_debug (this->name, 0,
+ "Unknown pump state");
+ ret = -1;
+ break;
+ }
+
+ }
+
+ return ret;
+}
+
+static const char *
+pump_get_resume_path (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ const char *resume_path = NULL;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ resume_path = pump_priv->resume_path;
+
+ return resume_path;
+}
+
+static int
+pump_update_resume_state (xlator_t *this, const char *path)
+{
+ pump_state_t state;
+ const char *resume_path = NULL;
+
+ state = pump_get_state ();
+
+ if (state == PUMP_STATE_RESUME) {
+ resume_path = pump_get_resume_path (this);
+ if (strcmp (resume_path, "/") == 0) {
+ gf_msg_debug (this->name, 0,
+ "Reached the resume path (/). Proceeding to change state"
+ " to running");
+
+ pump_change_state (this, PUMP_STATE_RUNNING);
+ } else if (strcmp (resume_path, path) == 0) {
+ gf_msg_debug (this->name, 0,
+ "Reached the resume path. Proceeding to change state"
+ " to running");
+
+ pump_change_state (this, PUMP_STATE_RUNNING);
+ } else {
+ gf_msg_debug (this->name, 0,
+ "Not yet hit the resume path:res-path=%s,path=%s",
+ resume_path, path);
+ }
+ }
+
+ return 0;
+}
+
+static gf_boolean_t
+is_pump_traversal_allowed (xlator_t *this, const char *path)
+{
+ pump_state_t state;
+ const char *resume_path = NULL;
+ gf_boolean_t ret = _gf_true;
+
+ state = pump_get_state ();
+
+ if (state == PUMP_STATE_RESUME) {
+ resume_path = pump_get_resume_path (this);
+ if (strstr (resume_path, path)) {
+ gf_msg_debug (this->name, 0,
+ "On the right path to resumption path");
+ ret = _gf_true;
+ } else {
+ gf_msg_debug (this->name, 0,
+ "Not the right path to resuming=> ignoring traverse");
+ ret = _gf_false;
+ }
+ }
+
+ return ret;
+}
+
+static int
+pump_save_file_stats (xlator_t *this, const char *path)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ LOCK (&pump_priv->resume_path_lock);
+ {
+ pump_priv->number_files_pumped++;
+
+ strncpy (pump_priv->current_file, path,
+ PATH_MAX);
+ }
+ UNLOCK (&pump_priv->resume_path_lock);
+
+ return 0;
+}
+
+static int
+gf_pump_traverse_directory (loc_t *loc)
+{
+ xlator_t *this = NULL;
+ fd_t *fd = NULL;
+ off_t offset = 0;
+ loc_t entry_loc = {0};
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+ gf_dirent_t entries;
+ struct iatt iatt = {0};
+ struct iatt parent = {0};
+ dict_t *xattr_rsp = NULL;
+ int ret = 0;
+ gf_boolean_t is_directory_empty = _gf_true;
+ gf_boolean_t free_entries = _gf_false;
+
+ INIT_LIST_HEAD (&entries.list);
+ this = THIS;
+
+ GF_ASSERT (loc->inode);
+
+ fd = fd_create (loc->inode, pump_pid);
+ if (!fd) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, AFR_MSG_FD_CREATE_FAILED,
+ "Failed to create fd for %s", loc->path);
+ goto out;
+ }
+
+ ret = syncop_opendir (this, loc, fd, NULL, NULL);
+ if (ret < 0) {
+ gf_msg_debug (this->name, 0,
+ "opendir failed on %s", loc->path);
+ goto out;
+ }
+
+ gf_msg_trace (this->name, 0,
+ "pump opendir on %s returned=%d",
+ loc->path, ret);
+
+ while (syncop_readdirp (this, fd, 131072, offset, &entries, NULL,
+ NULL)) {
+ free_entries = _gf_true;
+
+ if (list_empty (&entries.list)) {
+ gf_msg_trace (this->name, 0,
+ "no more entries in directory");
+ goto out;
+ }
+
+ list_for_each_entry_safe (entry, tmp, &entries.list, list) {
+ gf_msg_debug (this->name, 0,
+ "found readdir entry=%s", entry->d_name);
+
+ offset = entry->d_off;
+ if (gf_uuid_is_null (entry->d_stat.ia_gfid)) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_GFID_NULL, "%s/%s: No "
+ "gfid present skipping",
+ loc->path, entry->d_name);
+ continue;
+ }
+ loc_wipe (&entry_loc);
+ ret = afr_build_child_loc (this, &entry_loc, loc,
+ entry->d_name);
+ if (ret)
+ goto out;
+
+ if ((strcmp (entry->d_name, ".") == 0) ||
+ (strcmp (entry->d_name, "..") == 0))
+ continue;
+
+ is_directory_empty = _gf_false;
+ gf_msg_debug (this->name, 0,
+ "lookup %s => %"PRId64,
+ entry_loc.path,
+ iatt.ia_ino);
+
+ ret = syncop_lookup (this, &entry_loc, &iatt, &parent,
+ NULL, &xattr_rsp);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR,
+ -ret, AFR_MSG_INFO_COMMON,
+ "%s: lookup failed", entry_loc.path);
+ continue;
+ }
+
+ ret = afr_selfheal_name (this, loc->gfid, entry->d_name,
+ NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_SELF_HEAL_FAILED,
+ "%s: name self-heal failed (%s/%s)",
+ entry_loc.path, uuid_utoa (loc->gfid),
+ entry->d_name);
+ continue;
+ }
+
+ ret = afr_selfheal (this, iatt.ia_gfid);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_SELF_HEAL_FAILED,
+ "%s: self-heal failed (%s)",
+ entry_loc.path,
+ uuid_utoa (iatt.ia_gfid));
+ continue;
+ }
+
+ pump_fill_loc_info (&entry_loc, &iatt, &parent);
+
+ pump_update_resume_state (this, entry_loc.path);
+
+ pump_save_path (this, entry_loc.path);
+ pump_save_file_stats (this, entry_loc.path);
+
+ ret = pump_check_and_update_status (this);
+ if (ret < 0) {
+ gf_msg_debug (this->name, 0,
+ "Pump beginning to exit out");
+ goto out;
+ }
+
+ if (IA_ISDIR (iatt.ia_type)) {
+ if (is_pump_traversal_allowed (this, entry_loc.path)) {
+ gf_msg_trace (this->name, 0,
+ "entering dir=%s",
+ entry->d_name);
+ gf_pump_traverse_directory (&entry_loc);
+ }
+ }
+ }
+
+ gf_dirent_free (&entries);
+ free_entries = _gf_false;
+ gf_msg_trace (this->name, 0, "offset incremented to %d",
+ (int32_t) offset);
+
+ }
+
+ ret = syncop_close (fd);
+ if (ret < 0)
+ gf_msg_debug (this->name, 0, "closing the fd failed");
+
+ if (is_directory_empty && (strcmp (loc->path, "/") == 0)) {
+ pump_change_state (this, PUMP_STATE_RUNNING);
+ gf_msg (this->name, GF_LOG_INFO, 0, AFR_MSG_INFO_COMMON,
+ "Empty source brick. Nothing to be done.");
+ }
+
+out:
+ if (entry_loc.path)
+ loc_wipe (&entry_loc);
+ if (free_entries)
+ gf_dirent_free (&entries);
+ return 0;
+}
+
+static int
+pump_update_resume_path (xlator_t *this)
+{
+ const char *resume_path = NULL;
+
+ resume_path = pump_get_resume_path (this);
+
+ if (resume_path) {
+ gf_msg_debug (this->name, 0,
+ "Found a path to resume from: %s",
+ resume_path);
+
+ }else {
+ gf_msg_debug (this->name, 0,
+ "Did not find a path=> setting to '/'");
+ pump_set_resume_path (this, "/");
+ }
+
+ pump_change_state (this, PUMP_STATE_RESUME);
+
+ return 0;
+}
+
+static int32_t
+pump_xattr_cleaner (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ loc_t loc = {0};
+ int i = 0;
+ int ret = 0;
+ int source = 0;
+ int sink = 1;
+
+ priv = this->private;
+
+ afr_build_root_loc (this, &loc);
+
+ ret = syncop_removexattr (priv->children[source], &loc,
+ PUMP_PATH, 0, NULL);
+
+ ret = syncop_removexattr (priv->children[sink], &loc,
+ PUMP_SINK_COMPLETE, 0, NULL);
+
+ for (i = 0; i < priv->child_count; i++) {
+ ret = syncop_removexattr (priv->children[i], &loc,
+ PUMP_SOURCE_COMPLETE, 0, NULL);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "removexattr "
+ "failed with %s", strerror (-ret));
+ }
+ }
+
+ loc_wipe (&loc);
+ return pump_command_reply (frame, this);
+}
+
+static int
+pump_complete_migration (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+ dict_t *dict = NULL;
+ pump_state_t state;
+ loc_t loc = {0};
+ int dict_ret = 0;
+ int ret = -1;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ GF_ASSERT (priv->root_inode);
+
+ afr_build_root_loc (this, &loc);
+
+ dict = dict_new ();
+
+ state = pump_get_state ();
+ if (state == PUMP_STATE_RUNNING) {
+ gf_msg_debug (this->name, 0,
+ "Pump finished pumping");
+
+ pump_priv->pump_finished = _gf_true;
+
+ dict_ret = dict_set_str (dict, PUMP_SOURCE_COMPLETE, "jargon");
+ if (dict_ret)
+ gf_msg (this->name, GF_LOG_WARNING, -dict_ret,
+ AFR_MSG_DICT_SET_FAILED,
+ "%s: failed to set the key %s",
+ loc.path, PUMP_SOURCE_COMPLETE);
+
+ ret = syncop_setxattr (PUMP_SOURCE_CHILD (this), &loc, dict, 0,
+ NULL, NULL);
+ if (ret < 0) {
+ gf_msg_debug (this->name, 0,
+ "setxattr failed - while "
+ "notifying source complete");
+ }
+ dict_ret = dict_set_str (dict, PUMP_SINK_COMPLETE, "jargon");
+ if (dict_ret)
+ gf_msg (this->name, GF_LOG_WARNING, -dict_ret,
+ AFR_MSG_DICT_SET_FAILED,
+ "%s: failed to set the key %s",
+ loc.path, PUMP_SINK_COMPLETE);
+
+ ret = syncop_setxattr (PUMP_SINK_CHILD (this), &loc, dict, 0,
+ NULL, NULL);
+ if (ret < 0) {
+ gf_msg_debug (this->name, 0,
+ "setxattr failed - while "
+ "notifying sink complete");
+ }
+
+ pump_save_path (this, "/");
+
+ } else if (state == PUMP_STATE_ABORT) {
+ gf_msg_debug (this->name, 0, "Starting cleanup "
+ "of pump internal xattrs");
+ call_resume (pump_priv->cleaner);
+ }
+
+ loc_wipe (&loc);
+ return 0;
+}
+
+static int
+pump_lookup_sink (loc_t *loc)
+{
+ xlator_t *this = NULL;
+ struct iatt iatt, parent;
+ dict_t *xattr_rsp;
+ dict_t *xattr_req = NULL;
+ int ret = 0;
+
+ this = THIS;
+
+ xattr_req = dict_new ();
+
+ ret = afr_set_root_gfid (xattr_req);
+ if (ret)
+ goto out;
+
+ ret = syncop_lookup (PUMP_SINK_CHILD (this), loc, &iatt, &parent,
+ xattr_req, &xattr_rsp);
+
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "Lookup on sink child failed");
+ ret = -1;
+ goto out;
+ }
+
+out:
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ return ret;
+}
+
+static int
+pump_task (void *data)
+{
+ xlator_t *this = NULL;
+ afr_private_t *priv = NULL;
+
+
+ loc_t loc = {0};
+ struct iatt iatt, parent;
+ dict_t *xattr_rsp = NULL;
+ dict_t *xattr_req = NULL;
+
+ int ret = -1;
+
+ this = THIS;
+ priv = this->private;
+
+ GF_ASSERT (priv->root_inode);
+
+ afr_build_root_loc (this, &loc);
+ xattr_req = dict_new ();
+ if (!xattr_req) {
+ gf_msg_debug (this->name, ENOMEM,
+ "Out of memory");
+ ret = -1;
+ goto out;
+ }
+
+ afr_set_root_gfid (xattr_req);
+ ret = syncop_lookup (this, &loc, &iatt, &parent,
+ xattr_req, &xattr_rsp);
+
+ gf_msg_trace (this->name, 0,
+ "lookup: path=%s gfid=%s",
+ loc.path, uuid_utoa (loc.inode->gfid));
+
+ ret = pump_check_and_update_status (this);
+ if (ret < 0) {
+ goto out;
+ }
+
+ pump_update_resume_path (this);
+
+ afr_set_root_gfid (xattr_req);
+ ret = pump_lookup_sink (&loc);
+ if (ret) {
+ pump_update_resume_path (this);
+ goto out;
+ }
+
+ gf_pump_traverse_directory (&loc);
+
+ pump_complete_migration (this);
+out:
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ loc_wipe (&loc);
+ return 0;
+}
+
+
+static int
+pump_task_completion (int ret, call_frame_t *sync_frame, void *data)
+{
+ xlator_t *this = NULL;
+ afr_private_t *priv = NULL;
+
+ this = THIS;
+
+ priv = this->private;
+
+ inode_unref (priv->root_inode);
+ STACK_DESTROY (sync_frame->root);
+
+ gf_msg_debug (this->name, 0,
+ "Pump xlator exiting");
+ return 0;
+}
+
+int
+pump_start (call_frame_t *pump_frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ int ret = -1;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ afr_set_lk_owner (pump_frame, this, pump_frame->root);
+ pump_pid = (uint64_t) (unsigned long)pump_frame->root;
+
+ ret = synctask_new (pump_priv->env, pump_task,
+ pump_task_completion,
+ pump_frame, NULL);
+ if (ret == -1) {
+ goto out;
+ }
+
+ gf_msg_debug (this->name, 0,
+ "setting pump as started lk_owner: %s %"PRIu64,
+ lkowner_utoa (&pump_frame->root->lk_owner), pump_pid);
+
+ priv->use_afr_in_pump = 1;
+out:
+ return ret;
+}
+
+static int
+pump_start_synctask (xlator_t *this)
+{
+ call_frame_t *frame = NULL;
+ int ret = 0;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ pump_change_state (this, PUMP_STATE_RUNNING);
+
+ ret = pump_start (frame, this);
+
+out:
+ return ret;
+}
+
+int32_t
+pump_cmd_start_setxattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+
+{
+ call_frame_t *prev = NULL;
+ afr_local_t *local = NULL;
+ int ret = 0;
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_INFO_COMMON,
+ "Could not initiate destination "
+ "brick connect");
+ ret = op_ret;
+ goto out;
+ }
+
+ gf_msg_debug (this->name, 0,
+ "Successfully initiated destination "
+ "brick connect");
+
+ pump_mark_start_pending (this);
+
+ /* send the PARENT_UP as pump is ready now */
+ prev = cookie;
+ if (prev && prev->this)
+ prev->this->notify (prev->this, GF_EVENT_PARENT_UP, this);
+
+out:
+ local->op_ret = ret;
+ pump_command_reply (frame, this);
+
+ return 0;
+}
+
+static int
+pump_initiate_sink_connect (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *dict = NULL;
+ data_t *data = NULL;
+ char *clnt_cmd = NULL;
+ loc_t loc = {0};
+
+ int ret = 0;
+
+ priv = this->private;
+ local = frame->local;
+
+ GF_ASSERT (priv->root_inode);
+
+ afr_build_root_loc (this, &loc);
+
+ data = data_ref (dict_get (local->dict, RB_PUMP_CMD_START));
+ if (!data) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ AFR_MSG_DICT_GET_FAILED,
+ "Could not get destination brick value");
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ clnt_cmd = GF_CALLOC (1, data->len+1, gf_common_mt_char);
+ if (!clnt_cmd) {
+ ret = -1;
+ goto out;
+ }
+
+ memcpy (clnt_cmd, data->data, data->len);
+ clnt_cmd[data->len] = '\0';
+ gf_msg_debug (this->name, 0, "Got destination brick %s\n",
+ clnt_cmd);
+
+ ret = dict_set_dynstr (dict, CLIENT_CMD_CONNECT, clnt_cmd);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_DICT_SET_FAILED,
+ "Could not inititiate destination brick "
+ "connect");
+ goto out;
+ }
+
+ STACK_WIND (frame,
+ pump_cmd_start_setxattr_cbk,
+ PUMP_SINK_CHILD(this),
+ PUMP_SINK_CHILD(this)->fops->setxattr,
+ &loc,
+ dict,
+ 0, NULL);
+
+ ret = 0;
+
+out:
+ if (dict)
+ dict_unref (dict);
+
+ if (data)
+ data_unref (data);
+
+ if (ret && clnt_cmd)
+ GF_FREE (clnt_cmd);
+
+ loc_wipe (&loc);
+ return ret;
+}
+
+static int
+is_pump_aborted (xlator_t *this)
+{
+ pump_state_t state;
+
+ state = pump_get_state ();
+
+ return ((state == PUMP_STATE_ABORT));
+}
+
+int32_t
+pump_cmd_start_getxattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ char *path = NULL;
+
+ pump_state_t state;
+ int ret = 0;
+ int need_unwind = 0;
+ int dict_ret = -1;
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ gf_msg_debug (this->name, 0,
+ "getxattr failed - changing pump "
+ "state to RUNNING with '/'");
+ path = "/";
+ ret = op_ret;
+ } else {
+ gf_msg_trace (this->name, 0,
+ "getxattr succeeded");
+
+ dict_ret = dict_get_str (dict, PUMP_PATH, &path);
+ if (dict_ret < 0)
+ path = "/";
+ }
+
+ state = pump_get_state ();
+ if ((state == PUMP_STATE_RUNNING) ||
+ (state == PUMP_STATE_RESUME)) {
+ gf_msg (this->name, GF_LOG_ERROR,
+ 0, AFR_MSG_PUMP_XLATOR_ERROR,
+ "Pump is already started");
+ ret = -1;
+ goto out;
+ }
+
+ pump_set_resume_path (this, path);
+
+ if (is_pump_aborted (this))
+ /* We're re-starting pump afresh */
+ ret = pump_initiate_sink_connect (frame, this);
+ else {
+ /* We're re-starting pump from a previous
+ pause */
+ gf_msg_debug (this->name, 0,
+ "about to start synctask");
+ ret = pump_start_synctask (this);
+ need_unwind = 1;
+ }
+
+out:
+ if ((ret < 0) || (need_unwind == 1)) {
+ local->op_ret = ret;
+ pump_command_reply (frame, this);
+ }
+ return 0;
+}
+
+int
+pump_execute_status (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ uint64_t number_files = 0;
+
+ char filename[PATH_MAX];
+ char summary[PATH_MAX+256];
+ char *dict_str = NULL;
+
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+
+ dict_t *dict = NULL;
+ int ret = -1;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ LOCK (&pump_priv->resume_path_lock);
+ {
+ number_files = pump_priv->number_files_pumped;
+ strncpy (filename, pump_priv->current_file, PATH_MAX);
+ }
+ UNLOCK (&pump_priv->resume_path_lock);
+
+ dict_str = GF_CALLOC (1, PATH_MAX + 256, gf_afr_mt_char);
+ if (!dict_str) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (pump_priv->pump_finished) {
+ snprintf (summary, PATH_MAX+256,
+ "no_of_files=%"PRIu64, number_files);
+ } else {
+ snprintf (summary, PATH_MAX+256,
+ "no_of_files=%"PRIu64":current_file=%s",
+ number_files, filename);
+ }
+ snprintf (dict_str, PATH_MAX+256, "status=%d:%s",
+ (pump_priv->pump_finished)?1:0, summary);
+
+ dict = dict_new ();
+
+ ret = dict_set_dynstr (dict, RB_PUMP_CMD_STATUS, dict_str);
+ if (ret < 0) {
+ gf_msg_debug (this->name, 0,
+ "dict_set_dynstr returned negative value");
+ } else {
+ dict_str = NULL;
+ }
+
+ op_ret = 0;
+
+out:
+
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, NULL);
+
+ if (dict)
+ dict_unref (dict);
+
+ GF_FREE (dict_str);
+
+ return 0;
+}
+
+int
+pump_execute_pause (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ pump_change_state (this, PUMP_STATE_PAUSE);
+
+ local->op_ret = 0;
+ pump_command_reply (frame, this);
+
+ return 0;
+}
+
+int
+pump_execute_start (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ int ret = 0;
+ loc_t loc = {0};
+
+ priv = this->private;
+ local = frame->local;
+
+ if (!priv->root_inode) {
+ gf_msg (this->name, GF_LOG_ERROR,
+ 0, AFR_MSG_PUMP_XLATOR_ERROR,
+ "Pump xlator cannot be started without an initial "
+ "lookup");
+ ret = -1;
+ goto out;
+ }
+
+ GF_ASSERT (priv->root_inode);
+
+ afr_build_root_loc (this, &loc);
+
+ STACK_WIND (frame,
+ pump_cmd_start_getxattr_cbk,
+ PUMP_SOURCE_CHILD(this),
+ PUMP_SOURCE_CHILD(this)->fops->getxattr,
+ &loc,
+ PUMP_PATH, NULL);
+
+ ret = 0;
+
+out:
+ if (ret < 0) {
+ local->op_ret = ret;
+ pump_command_reply (frame, this);
+ }
+
+ loc_wipe (&loc);
+ return 0;
+}
+
+static int
+pump_cleanup_helper (void *data) {
+ call_frame_t *frame = data;
+
+ pump_xattr_cleaner (frame, 0, frame->this, 0, 0, NULL);
+
+ return 0;
+}
+
+static int
+pump_cleanup_done (int ret, call_frame_t *sync_frame, void *data)
+{
+ STACK_DESTROY (sync_frame->root);
+
+ return 0;
+}
+
+int
+pump_execute_commit (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *sync_frame = NULL;
+ int ret = 0;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+ local = frame->local;
+
+ local->op_ret = 0;
+ if (pump_priv->pump_finished) {
+ pump_change_state (this, PUMP_STATE_COMMIT);
+ sync_frame = create_frame (this, this->ctx->pool);
+ ret = synctask_new (pump_priv->env, pump_cleanup_helper,
+ pump_cleanup_done, sync_frame, frame);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Couldn't create "
+ "synctask for cleaning up xattrs.");
+ }
+
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, EINPROGRESS,
+ AFR_MSG_MIGRATION_IN_PROGRESS,
+ "Commit can't proceed. Migration in progress");
+ local->op_ret = -1;
+ local->op_errno = EINPROGRESS;
+ pump_command_reply (frame, this);
+ }
+
+ return 0;
+}
+int
+pump_execute_abort (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *sync_frame = NULL;
+ int ret = 0;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+ local = frame->local;
+
+ pump_change_state (this, PUMP_STATE_ABORT);
+
+ LOCK (&pump_priv->resume_path_lock);
+ {
+ pump_priv->number_files_pumped = 0;
+ pump_priv->current_file[0] = '\0';
+ }
+ UNLOCK (&pump_priv->resume_path_lock);
+
+ local->op_ret = 0;
+ if (pump_priv->pump_finished) {
+ sync_frame = create_frame (this, this->ctx->pool);
+ ret = synctask_new (pump_priv->env, pump_cleanup_helper,
+ pump_cleanup_done, sync_frame, frame);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Couldn't create "
+ "synctask for cleaning up xattrs.");
+ }
+
+ } else {
+ pump_priv->cleaner = fop_setxattr_cbk_stub (frame,
+ pump_xattr_cleaner,
+ 0, 0, NULL);
+ }
+
+ return 0;
+}
+
+gf_boolean_t
+pump_command_status (xlator_t *this, dict_t *dict)
+{
+ char *cmd = NULL;
+ int dict_ret = -1;
+ int ret = _gf_true;
+
+ dict_ret = dict_get_str (dict, RB_PUMP_CMD_STATUS, &cmd);
+ if (dict_ret < 0) {
+ gf_msg_debug (this->name, 0,
+ "Not a pump status command");
+ ret = _gf_false;
+ goto out;
+ }
+
+ gf_msg_debug (this->name, 0,
+ "Hit a pump command - status");
+ ret = _gf_true;
+
+out:
+ return ret;
+
+}
+
+gf_boolean_t
+pump_command_pause (xlator_t *this, dict_t *dict)
+{
+ char *cmd = NULL;
+ int dict_ret = -1;
+ int ret = _gf_true;
+
+ dict_ret = dict_get_str (dict, RB_PUMP_CMD_PAUSE, &cmd);
+ if (dict_ret < 0) {
+ gf_msg_debug (this->name, 0,
+ "Not a pump pause command");
+ ret = _gf_false;
+ goto out;
+ }
+
+ gf_msg_debug (this->name, 0,
+ "Hit a pump command - pause");
+ ret = _gf_true;
+
+out:
+ return ret;
+
+}
+
+gf_boolean_t
+pump_command_commit (xlator_t *this, dict_t *dict)
+{
+ char *cmd = NULL;
+ int dict_ret = -1;
+ int ret = _gf_true;
+
+ dict_ret = dict_get_str (dict, RB_PUMP_CMD_COMMIT, &cmd);
+ if (dict_ret < 0) {
+ gf_msg_debug (this->name, 0,
+ "Not a pump commit command");
+ ret = _gf_false;
+ goto out;
+ }
+
+ gf_msg_debug (this->name, 0,
+ "Hit a pump command - commit");
+ ret = _gf_true;
+
+out:
+ return ret;
+
+}
+
+gf_boolean_t
+pump_command_abort (xlator_t *this, dict_t *dict)
+{
+ char *cmd = NULL;
+ int dict_ret = -1;
+ int ret = _gf_true;
+
+ dict_ret = dict_get_str (dict, RB_PUMP_CMD_ABORT, &cmd);
+ if (dict_ret < 0) {
+ gf_msg_debug (this->name, 0,
+ "Not a pump abort command");
+ ret = _gf_false;
+ goto out;
+ }
+
+ gf_msg_debug (this->name, 0,
+ "Hit a pump command - abort");
+ ret = _gf_true;
+
+out:
+ return ret;
+
+}
+
+gf_boolean_t
+pump_command_start (xlator_t *this, dict_t *dict)
+{
+ char *cmd = NULL;
+ int dict_ret = -1;
+ int ret = _gf_true;
+
+ dict_ret = dict_get_str (dict, RB_PUMP_CMD_START, &cmd);
+ if (dict_ret < 0) {
+ gf_msg_debug (this->name, 0,
+ "Not a pump start command");
+ ret = _gf_false;
+ goto out;
+ }
+
+ gf_msg_debug (this->name, 0,
+ "Hit a pump command - start");
+ ret = _gf_true;
+
+out:
+ return ret;
+
+}
+
+int
+pump_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ int op_errno = 0;
+ int ret = 0;
+
+ priv = this->private;
+
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame, default_getxattr_cbk,
+ FIRST_CHILD (this),
+ (FIRST_CHILD (this))->fops->getxattr,
+ loc, name, xdata);
+ return 0;
+ }
+
+ if (name) {
+ if (!strncmp (name, AFR_XATTR_PREFIX,
+ strlen (AFR_XATTR_PREFIX))) {
+
+ op_errno = ENODATA;
+ ret = -1;
+ goto out;
+ }
+
+ if (!strcmp (name, RB_PUMP_CMD_STATUS)) {
+ gf_msg_debug (this->name, 0,
+ "Hit pump command - status");
+ pump_execute_status (frame, this);
+ goto out;
+ }
+ }
+
+ afr_getxattr (frame, this, loc, name, xdata);
+
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+int
+pump_command_reply (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret < 0)
+ gf_msg (this->name, GF_LOG_INFO,
+ 0, AFR_MSG_INFO_COMMON,
+ "Command failed");
+ else
+ gf_msg (this->name, GF_LOG_INFO,
+ 0, AFR_MSG_INFO_COMMON,
+ "Command succeeded");
+
+ AFR_STACK_UNWIND (setxattr,
+ frame,
+ local->op_ret,
+ local->op_errno, NULL);
+
+ return 0;
+}
+
+int
+pump_parse_command (call_frame_t *frame, xlator_t *this, dict_t *dict,
+ int *op_errno_p)
+{
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int op_errno = 0;
+
+ if (pump_command_start (this, dict)) {
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
+ local->dict = dict_ref (dict);
+ ret = pump_execute_start (frame, this);
+
+ } else if (pump_command_pause (this, dict)) {
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
+ local->dict = dict_ref (dict);
+ ret = pump_execute_pause (frame, this);
+
+ } else if (pump_command_abort (this, dict)) {
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
+ local->dict = dict_ref (dict);
+ ret = pump_execute_abort (frame, this);
+
+ } else if (pump_command_commit (this, dict)) {
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
+ local->dict = dict_ref (dict);
+ ret = pump_execute_commit (frame, this);
+ }
+out:
+ if (op_errno_p)
+ *op_errno_p = op_errno;
+ return ret;
+}
+
+int
+pump_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ int op_errno = 0;
+
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.pump*", dict, op_errno, out);
+
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame, default_setxattr_cbk,
+ FIRST_CHILD (this),
+ (FIRST_CHILD (this))->fops->setxattr,
+ loc, dict, flags, xdata);
+ return 0;
+ }
+
+ ret = pump_parse_command (frame, this, dict, &op_errno);
+ if (ret >= 0)
+ goto out;
+
+ afr_setxattr (frame, this, loc, dict, flags, xdata);
+
+ ret = 0;
+out:
+ if (ret < 0) {
+ AFR_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
+ }
+
+ return 0;
+}
+
+/* Defaults */
+static int32_t
+pump_lookup (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *xattr_req)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_lookup_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup,
+ loc,
+ xattr_req);
+ return 0;
+ }
+
+ afr_lookup (frame, this, loc, xattr_req);
+ return 0;
+}
+
+
+static int32_t
+pump_truncate (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ off_t offset, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_truncate_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate,
+ loc,
+ offset, xdata);
+ return 0;
+ }
+
+ afr_truncate (frame, this, loc, offset, xdata);
+ return 0;
+}
+
+
+static int32_t
+pump_ftruncate (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_ftruncate_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate,
+ fd,
+ offset, xdata);
+ return 0;
+ }
+
+ afr_ftruncate (frame, this, fd, offset, xdata);
+ return 0;
+}
+
+
+
+
+int
+pump_mknod (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame, default_mknod_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod,
+ loc, mode, rdev, umask, xdata);
+ return 0;
+ }
+ afr_mknod (frame, this, loc, mode, rdev, umask, xdata);
+ return 0;
+
+}
+
+
+
+int
+pump_mkdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame, default_mkdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir,
+ loc, mode, umask, xdata);
+ return 0;
+ }
+ afr_mkdir (frame, this, loc, mode, umask, xdata);
+ return 0;
+
+}
+
+
+static int32_t
+pump_unlink (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, int xflag, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_unlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink,
+ loc, xflag, xdata);
+ return 0;
+ }
+ afr_unlink (frame, this, loc, xflag, xdata);
+ return 0;
+
+}
+
+
+static int
+pump_rmdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int flags, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame, default_rmdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir,
+ loc, flags, xdata);
+ return 0;
+ }
+
+ afr_rmdir (frame, this, loc, flags, xdata);
+ return 0;
+
+}
+
+
+
+int
+pump_symlink (call_frame_t *frame, xlator_t *this,
+ const char *linkpath, loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame, default_symlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink,
+ linkpath, loc, umask, xdata);
+ return 0;
+ }
+ afr_symlink (frame, this, linkpath, loc, umask, xdata);
+ return 0;
+
+}
+
+
+static int32_t
+pump_rename (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_rename_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename,
+ oldloc, newloc, xdata);
+ return 0;
+ }
+ afr_rename (frame, this, oldloc, newloc, xdata);
+ return 0;
+
+}
+
+
+static int32_t
+pump_link (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_link_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link,
+ oldloc, newloc, xdata);
+ return 0;
+ }
+ afr_link (frame, this, oldloc, newloc, xdata);
+ return 0;
+
+}
+
+
+static int32_t
+pump_create (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame, default_create_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create,
+ loc, flags, mode, umask, fd, xdata);
+ return 0;
+ }
+ afr_create (frame, this, loc, flags, mode, umask, fd, xdata);
+ return 0;
+
+}
+
+
+static int32_t
+pump_open (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_open_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open,
+ loc, flags, fd, xdata);
+ return 0;
+ }
+ afr_open (frame, this, loc, flags, fd, xdata);
+ return 0;
+
+}
+
+
+static int32_t
+pump_writev (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iovec *vector,
+ int32_t count,
+ off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_writev_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev,
+ fd,
+ vector,
+ count,
+ off, flags,
+ iobref, xdata);
+ return 0;
+ }
+
+ afr_writev (frame, this, fd, vector, count, off, flags, iobref, xdata);
+ return 0;
+}
+
+
+static int32_t
+pump_flush (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_flush_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush,
+ fd, xdata);
+ return 0;
+ }
+ afr_flush (frame, this, fd, xdata);
+ return 0;
+
+}
+
+
+static int32_t
+pump_fsync (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t flags, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_fsync_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync,
+ fd,
+ flags, xdata);
+ return 0;
+ }
+ afr_fsync (frame, this, fd, flags, xdata);
+ return 0;
+
+}
+
+
+static int32_t
+pump_opendir (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, fd_t *fd, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_opendir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir,
+ loc, fd, xdata);
+ return 0;
+ }
+ afr_opendir (frame, this, loc, fd, xdata);
+ return 0;
+
+}
+
+
+static int32_t
+pump_fsyncdir (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t flags, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_fsyncdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsyncdir,
+ fd,
+ flags, xdata);
+ return 0;
+ }
+ afr_fsyncdir (frame, this, fd, flags, xdata);
+ return 0;
+
+}
+
+
+static int32_t
+pump_xattrop (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ gf_xattrop_flags_t flags,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_xattrop_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->xattrop,
+ loc,
+ flags,
+ dict, xdata);
+ return 0;
+ }
+ afr_xattrop (frame, this, loc, flags, dict, xdata);
+ return 0;
+
+}
+
+static int32_t
+pump_fxattrop (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ gf_xattrop_flags_t flags,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_fxattrop_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fxattrop,
+ fd,
+ flags,
+ dict, xdata);
+ return 0;
+ }
+ afr_fxattrop (frame, this, fd, flags, dict, xdata);
+ return 0;
+
+}
+
+
+static int32_t
+pump_removexattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO (this, out);
+
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.pump*",
+ name, op_errno, out);
+
+ op_errno = 0;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_removexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr,
+ loc,
+ name, xdata);
+ return 0;
+ }
+ afr_removexattr (frame, this, loc, name, xdata);
+
+ out:
+ if (op_errno)
+ AFR_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
+ return 0;
+
+}
+
+
+
+static int32_t
+pump_readdir (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t off, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_readdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdir,
+ fd, size, off, xdata);
+ return 0;
+ }
+ afr_readdir (frame, this, fd, size, off, xdata);
+ return 0;
+
+}
+
+
+static int32_t
+pump_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t off, dict_t *dict)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_readdirp_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp,
+ fd, size, off, dict);
+ return 0;
+ }
+ afr_readdirp (frame, this, fd, size, off, dict);
+ return 0;
+
+}
+
+
+
+static int32_t
+pump_releasedir (xlator_t *this,
+ fd_t *fd)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (priv->use_afr_in_pump)
+ afr_releasedir (this, fd);
+ return 0;
+
+}
+
+static int32_t
+pump_release (xlator_t *this,
+ fd_t *fd)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (priv->use_afr_in_pump)
+ afr_release (this, fd);
+ return 0;
+
+}
+
+static int32_t
+pump_forget (xlator_t *this, inode_t *inode)
+{
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ if (priv->use_afr_in_pump)
+ afr_forget (this, inode);
+
+ return 0;
+}
+
+static int32_t
+pump_setattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_setattr_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setattr,
+ loc, stbuf, valid, xdata);
+ return 0;
+ }
+ afr_setattr (frame, this, loc, stbuf, valid, xdata);
+ return 0;
+
+}
+
+
+static int32_t
+pump_fsetattr (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_fsetattr_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fsetattr,
+ fd, stbuf, valid, xdata);
+ return 0;
+ }
+ afr_fsetattr (frame, this, fd, stbuf, valid, xdata);
+ return 0;
+
+}
+
+
+/* End of defaults */
+
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_afr_mt_end + 1);
+
+ if (ret != 0) {
+ return ret;
+ }
+
+ return ret;
+}
+
+static int
+is_xlator_pump_sink (xlator_t *child)
+{
+ return (child == PUMP_SINK_CHILD(THIS));
+}
+
+static int
+is_xlator_pump_source (xlator_t *child)
+{
+ return (child == PUMP_SOURCE_CHILD(THIS));
+}
+
+int32_t
+notify (xlator_t *this, int32_t event,
+ void *data, ...)
+{
+ int ret = -1;
+ xlator_t *child_xl = NULL;
+
+ child_xl = (xlator_t *) data;
+
+ ret = afr_notify (this, event, data, NULL);
+
+ switch (event) {
+ case GF_EVENT_CHILD_DOWN:
+ if (is_xlator_pump_source (child_xl))
+ pump_change_state (this, PUMP_STATE_ABORT);
+ break;
+
+ case GF_EVENT_CHILD_UP:
+ if (is_xlator_pump_sink (child_xl))
+ if (is_pump_start_pending (this)) {
+ gf_msg_debug (this->name, 0,
+ "about to start synctask");
+ ret = pump_start_synctask (this);
+ if (ret < 0)
+ gf_msg_debug (this->name, 0,
+ "Could not start pump "
+ "synctask");
+ else
+ pump_remove_start_pending (this);
+ }
+ }
+
+ return ret;
+}
+
+int32_t
+init (xlator_t *this)
+{
+ afr_private_t * priv = NULL;
+ pump_private_t *pump_priv = NULL;
+ int child_count = 0;
+ xlator_list_t * trav = NULL;
+ int i = 0;
+ int ret = -1;
+ GF_UNUSED int op_errno = 0;
+
+ int source_child = 0;
+
+ if (!this->children) {
+ gf_msg (this->name, GF_LOG_ERROR,
+ 0, AFR_MSG_CHILD_MISCONFIGURED,
+ "pump translator needs a source and sink"
+ "subvolumes defined.");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_VOL_MISCONFIGURED, "Volume is dangling.");
+ }
+
+ priv = GF_CALLOC (1, sizeof (afr_private_t), gf_afr_mt_afr_private_t);
+ if (!priv)
+ goto out;
+
+ LOCK_INIT (&priv->lock);
+
+ child_count = xlator_subvolume_count (this);
+ if (child_count != 2) {
+ gf_msg (this->name, GF_LOG_ERROR,
+ 0, AFR_MSG_CHILD_MISCONFIGURED,
+ "There should be exactly 2 children - one source "
+ "and one sink");
+ return -1;
+ }
+ priv->child_count = child_count;
+
+ priv->read_child = source_child;
+ priv->favorite_child = source_child;
+ priv->background_self_heal_count = 0;
+
+ priv->data_self_heal = "on";
+ priv->metadata_self_heal = 1;
+ priv->entry_self_heal = 1;
+
+ priv->data_self_heal_window_size = 16;
+
+ priv->data_change_log = 1;
+ priv->metadata_change_log = 1;
+ priv->entry_change_log = 1;
+ priv->use_afr_in_pump = 1;
+ priv->sh_readdir_size = 65536;
+
+ /* Locking options */
+
+ /* Lock server count infact does not matter. Locks are held
+ on all subvolumes, in this case being the source
+ and the sink.
+ */
+
+ priv->child_up = GF_CALLOC (sizeof (unsigned char), child_count,
+ gf_afr_mt_char);
+ if (!priv->child_up) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ priv->children = GF_CALLOC (sizeof (xlator_t *), child_count,
+ gf_afr_mt_xlator_t);
+ if (!priv->children) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ priv->pending_key = GF_CALLOC (sizeof (*priv->pending_key),
+ child_count,
+ gf_afr_mt_char);
+ if (!priv->pending_key) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ trav = this->children;
+ i = 0;
+ while (i < child_count) {
+ priv->children[i] = trav->xlator;
+
+ ret = gf_asprintf (&priv->pending_key[i], "%s.%s",
+ AFR_XATTR_PREFIX,
+ trav->xlator->name);
+ if (-1 == ret) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ trav = trav->next;
+ i++;
+ }
+
+ ret = gf_asprintf (&priv->sh_domain, "%s-self-heal", this->name);
+ if (-1 == ret) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ priv->root_inode = NULL;
+
+ priv->last_event = GF_CALLOC (child_count, sizeof (*priv->last_event),
+ gf_afr_mt_int32_t);
+ if (!priv->last_event) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ pump_priv = GF_CALLOC (1, sizeof (*pump_priv),
+ gf_afr_mt_pump_priv);
+ if (!pump_priv) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ LOCK_INIT (&pump_priv->resume_path_lock);
+ LOCK_INIT (&pump_priv->pump_state_lock);
+
+ pump_priv->resume_path = GF_CALLOC (1, PATH_MAX,
+ gf_afr_mt_char);
+ if (!pump_priv->resume_path) {
+ ret = -1;
+ goto out;
+ }
+
+ pump_priv->env = this->ctx->env;
+ if (!pump_priv->env) {
+ ret = -1;
+ goto out;
+ }
+
+ /* keep more local here as we may need them for self-heal etc */
+ this->local_pool = mem_pool_new (afr_local_t, 128);
+ if (!this->local_pool) {
+ ret = -1;
+ goto out;
+ }
+
+ priv->pump_private = pump_priv;
+ pump_priv = NULL;
+
+ this->private = priv;
+ priv = NULL;
+
+ pump_change_state (this, PUMP_STATE_ABORT);
+
+ ret = 0;
+out:
+
+ if (pump_priv) {
+ GF_FREE (pump_priv->resume_path);
+ LOCK_DESTROY (&pump_priv->resume_path_lock);
+ LOCK_DESTROY (&pump_priv->pump_state_lock);
+ GF_FREE (pump_priv);
+ }
+
+ if (priv) {
+ GF_FREE (priv->child_up);
+ GF_FREE (priv->children);
+ GF_FREE (priv->pending_key);
+ GF_FREE (priv->last_event);
+ LOCK_DESTROY (&priv->lock);
+ GF_FREE (priv);
+ }
+
+ return ret;
+}
+
+int
+fini (xlator_t *this)
+{
+ afr_private_t * priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ priv = this->private;
+ this->private = NULL;
+ if (!priv)
+ goto out;
+
+ pump_priv = priv->pump_private;
+ if (!pump_priv)
+ goto afr_priv;
+
+ GF_FREE (pump_priv->resume_path);
+ LOCK_DESTROY (&pump_priv->resume_path_lock);
+ LOCK_DESTROY (&pump_priv->pump_state_lock);
+ GF_FREE (pump_priv);
+afr_priv:
+ afr_priv_destroy (priv);
+out:
+ return 0;
+}
+
+
+struct xlator_fops fops = {
+ .lookup = pump_lookup,
+ .open = pump_open,
+ .flush = pump_flush,
+ .fsync = pump_fsync,
+ .fsyncdir = pump_fsyncdir,
+ .xattrop = pump_xattrop,
+ .fxattrop = pump_fxattrop,
+ .getxattr = pump_getxattr,
+
+ /* inode write */
+ .writev = pump_writev,
+ .truncate = pump_truncate,
+ .ftruncate = pump_ftruncate,
+ .setxattr = pump_setxattr,
+ .setattr = pump_setattr,
+ .fsetattr = pump_fsetattr,
+ .removexattr = pump_removexattr,
+
+ /* dir read */
+ .opendir = pump_opendir,
+ .readdir = pump_readdir,
+ .readdirp = pump_readdirp,
+
+ /* dir write */
+ .create = pump_create,
+ .mknod = pump_mknod,
+ .mkdir = pump_mkdir,
+ .unlink = pump_unlink,
+ .rmdir = pump_rmdir,
+ .link = pump_link,
+ .symlink = pump_symlink,
+ .rename = pump_rename,
+};
+
+struct xlator_dumpops dumpops = {
+ .priv = afr_priv_dump,
+};
+
+
+struct xlator_cbks cbks = {
+ .release = pump_release,
+ .releasedir = pump_releasedir,
+ .forget = pump_forget,
+};
+
+struct volume_options options[] = {
+ { .key = {NULL} },
+};
diff --git a/xlators/cluster/afr/src/pump.h b/xlators/cluster/afr/src/pump.h
new file mode 100644
index 00000000000..7d5acd02bf6
--- /dev/null
+++ b/xlators/cluster/afr/src/pump.h
@@ -0,0 +1,81 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __PUMP_H__
+#define __PUMP_H__
+
+#include "syncop.h"
+
+/* FIXME: Needs to be defined in a common file */
+#define CLIENT_CMD_CONNECT "trusted.glusterfs.client-connect"
+#define CLIENT_CMD_DISCONNECT "trusted.glusterfs.client-disconnect"
+
+#define PUMP_SOURCE_COMPLETE "trusted.glusterfs.pump-source-complete"
+#define PUMP_SINK_COMPLETE "trusted.glusterfs.pump-sink-complete"
+
+#define PUMP_PATH "trusted.glusterfs.pump-path"
+
+#define PUMP_SOURCE_CHILD(xl) (xl->children->xlator)
+#define PUMP_SINK_CHILD(xl) (xl->children->next->xlator)
+
+typedef enum {
+ PUMP_STATE_RUNNING, /* Pump is running and migrating files */
+ PUMP_STATE_RESUME, /* Pump is resuming from a previous pause */
+ PUMP_STATE_PAUSE, /* Pump is paused */
+ PUMP_STATE_ABORT, /* Pump is aborted */
+ PUMP_STATE_COMMIT, /* Pump is committed */
+} pump_state_t;
+
+typedef struct _pump_private {
+ struct syncenv *env; /* The env pointer to the pump synctask */
+ char *resume_path; /* path to resume from the last pause */
+ gf_lock_t resume_path_lock; /* Synchronize resume_path changes */
+ gf_lock_t pump_state_lock; /* Synchronize pump_state changes */
+ pump_state_t pump_state; /* State of pump */
+ char current_file[PATH_MAX]; /* Current file being pumped */
+ uint64_t number_files_pumped; /* Number of files pumped */
+ gf_boolean_t pump_finished; /* Boolean to indicate pump termination */
+ char pump_start_pending; /* Boolean to mark start pending until
+ CHILD_UP */
+ call_stub_t *cleaner;
+} pump_private_t;
+
+void
+build_root_loc (inode_t *inode, loc_t *loc);
+int pump_start (call_frame_t *frame, xlator_t *this);
+
+gf_boolean_t
+pump_command_start (xlator_t *this, dict_t *dict);
+
+int
+pump_execute_start (call_frame_t *frame, xlator_t *this);
+
+gf_boolean_t
+pump_command_pause (xlator_t *this, dict_t *dict);
+
+int
+pump_execute_pause (call_frame_t *frame, xlator_t *this);
+
+gf_boolean_t
+pump_command_abort (xlator_t *this, dict_t *dict);
+
+int
+pump_execute_abort (call_frame_t *frame, xlator_t *this);
+
+gf_boolean_t
+pump_command_status (xlator_t *this, dict_t *dict);
+
+int
+pump_execute_status (call_frame_t *frame, xlator_t *this);
+
+int
+pump_command_reply (call_frame_t *frame, xlator_t *this);
+
+#endif /* __PUMP_H__ */
diff --git a/xlators/cluster/dht/src/Makefile.am b/xlators/cluster/dht/src/Makefile.am
index 4b69aa07100..29be5ce4776 100644
--- a/xlators/cluster/dht/src/Makefile.am
+++ b/xlators/cluster/dht/src/Makefile.am
@@ -1,34 +1,57 @@
-
xlator_LTLIBRARIES = dht.la nufa.la switch.la
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
+if BUILD_GFDB
+ xlator_LTLIBRARIES += tier.la
+endif
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
-dht_common_source = dht-layout.c dht-helper.c dht-linkfile.c \
- dht-selfheal.c dht-rename.c dht-hashfn.c dht-diskusage.c
+dht_common_source = dht-layout.c dht-helper.c dht-linkfile.c dht-rebalance.c \
+ dht-selfheal.c dht-rename.c dht-hashfn.c dht-diskusage.c \
+ dht-common.c dht-inode-write.c dht-inode-read.c dht-shared.c \
+ $(top_builddir)/xlators/lib/src/libxlator.c
-dht_la_SOURCES = $(dht_common_source) dht.c
+dht_la_SOURCES = $(dht_common_source) dht.c
nufa_la_SOURCES = $(dht_common_source) nufa.c
switch_la_SOURCES = $(dht_common_source) switch.c
+tier_la_SOURCES = $(dht_common_source) tier.c tier-common.c
-dht_la_LDFLAGS = -module -avoidversion
+dht_la_LDFLAGS = -module -avoid-version -export-symbols $(top_srcdir)/xlators/cluster/dht/src/dht.sym
dht_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-nufa_la_LDFLAGS = -module -avoidversion
+nufa_la_LDFLAGS = -module -avoid-version -export-symbols $(top_srcdir)/xlators/cluster/dht/src/nufa.sym
nufa_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-switch_la_LDFLAGS = -module -avoidversion
+switch_la_LDFLAGS = -module -avoid-version -export-symbols $(top_srcdir)/xlators/cluster/dht/src/switch.sym
switch_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = dht-common.h dht-common.c dht-mem-types.h
+tier_la_LDFLAGS = -module -avoid-version -export-symbols $(top_srcdir)/xlators/cluster/dht/src/tier.sym
+tier_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+noinst_HEADERS = dht-common.h dht-mem-types.h dht-messages.h dht-helper.h tier-common.h tier.h\
+ $(top_builddir)/xlators/lib/src/libxlator.h
-CLEANFILES =
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/libglusterfs/src/gfdb \
+ -I$(top_srcdir)/xlators/lib/src \
+ -DDATADIR=\"$(localstatedir)\" \
+ -DLIBDIR=\"$(libdir)\" \
+ -DLIBGFDB_VERSION=\"$(LIBGFDB_VERSION)\"
+
+CLEANFILES =
+
+EXTRA_DIST = dht.sym nufa.sym switch.sym tier.sym
uninstall-local:
rm -f $(DESTDIR)$(xlatordir)/distribute.so
install-data-hook:
- ln -sf dht.so $(DESTDIR)$(xlatordir)/distribute.so \ No newline at end of file
+ ln -sf dht.so $(DESTDIR)$(xlatordir)/distribute.so
+
+if UNITTEST
+CLEANFILES += *.gcda *.gcno *_xunit.xml
+noinst_PROGRAMS =
+TESTS =
+endif
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index 1cfeae690f9..c667266fed8 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -1,37 +1,191 @@
/*
- Copyright (c) 2009-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
/* TODO: add NS locking */
#include "glusterfs.h"
#include "xlator.h"
+#include "libxlator.h"
#include "dht-common.h"
#include "defaults.h"
+#include "byte-order.h"
+#include "glusterfs-acl.h"
+#include "quota-common-utils.h"
#include <sys/time.h>
#include <libgen.h>
+#include <signal.h>
+
+int run_defrag = 0;
+
+
+
+int dht_link2 (xlator_t *this, xlator_t *dst_node, call_frame_t *frame,
+ int ret);
+
+int
+dht_removexattr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame,
+ int ret);
+
+int
+dht_setxattr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame,
+ int ret);
+
+
+/* Sets the blocks and size values to fixed values. This is to be called
+ * only for dirs. The caller is responsible for checking the type
+ */
+int32_t dht_set_fixed_dir_stat (struct iatt *stat)
+{
+ if (stat) {
+ stat->ia_blocks = DHT_DIR_STAT_BLOCKS;
+ stat->ia_size = DHT_DIR_STAT_SIZE;
+ return 0;
+ }
+ return -1;
+}
+
+
+int
+dht_rmdir_unlock (call_frame_t *frame, xlator_t *this);
+
+int
+dht_aggregate_quota_xattr (dict_t *dst, char *key, data_t *value)
+{
+ int ret = -1;
+ quota_meta_t *meta_dst = NULL;
+ quota_meta_t *meta_src = NULL;
+ int64_t *size = NULL;
+ int64_t dst_dir_count = 0;
+ int64_t src_dir_count = 0;
+
+ if (value == NULL) {
+ gf_msg ("dht", GF_LOG_WARNING, 0,
+ DHT_MSG_DATA_NULL, "data value is NULL");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_bin (dst, key, (void **)&meta_dst);
+ if (ret < 0) {
+ meta_dst = GF_CALLOC (1, sizeof (quota_meta_t),
+ gf_common_quota_meta_t);
+ if (meta_dst == NULL) {
+ gf_msg ("dht", GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_NO_MEMORY,
+ "Memory allocation failed");
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_bin (dst, key, meta_dst,
+ sizeof (quota_meta_t));
+ if (ret < 0) {
+ gf_msg ("dht", GF_LOG_WARNING, EINVAL,
+ DHT_MSG_DICT_SET_FAILED,
+ "dht aggregate dict set failed");
+ GF_FREE (meta_dst);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (value->len > sizeof (int64_t)) {
+ meta_src = data_to_bin (value);
+
+ meta_dst->size = hton64 (ntoh64 (meta_dst->size) +
+ ntoh64 (meta_src->size));
+ meta_dst->file_count = hton64 (ntoh64 (meta_dst->file_count) +
+ ntoh64 (meta_src->file_count));
+
+ if (value->len > (2 * sizeof (int64_t))) {
+ dst_dir_count = ntoh64 (meta_dst->dir_count);
+ src_dir_count = ntoh64 (meta_src->dir_count);
+
+ if (src_dir_count > dst_dir_count)
+ meta_dst->dir_count = meta_src->dir_count;
+ } else {
+ meta_dst->dir_count = 0;
+ }
+ } else {
+ size = data_to_bin (value);
+ meta_dst->size = hton64 (ntoh64 (meta_dst->size) +
+ ntoh64 (*size));
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+
+int
+dht_aggregate (dict_t *this, char *key, data_t *value, void *data)
+{
+ dict_t *dst = NULL;
+ int32_t ret = -1;
+ data_t *dict_data = NULL;
+
+ dst = data;
+
+ if (strcmp (key, QUOTA_SIZE_KEY) == 0) {
+ ret = dht_aggregate_quota_xattr (dst, key, value);
+ if (ret) {
+ gf_msg ("dht", GF_LOG_WARNING, 0,
+ DHT_MSG_AGGREGATE_QUOTA_XATTR_FAILED,
+ "Failed to aggregate quota xattr");
+ goto out;
+ }
+ } else if (fnmatch (GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0) {
+ ret = gf_get_min_stime (THIS, dst, key, value);
+ if (ret < 0)
+ goto out;
+ } else {
+ /* compare user xattrs only */
+ if (!strncmp (key, "user.", strlen ("user."))) {
+ ret = dict_lookup (dst, key, &dict_data);
+ if (!ret && dict_data && value) {
+ ret = is_data_equal (dict_data, value);
+ if (!ret)
+ gf_msg_debug ("dht", 0,
+ "xattr mismatch for %s",
+ key);
+ }
+ }
+ ret = dict_set (dst, key, value);
+ if (ret) {
+ gf_msg ("dht", GF_LOG_WARNING, 0,
+ DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value: key = %s",
+ key);
+ }
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+void
+dht_aggregate_xattr (dict_t *dst, dict_t *src)
+{
+ if ((dst == NULL) || (src == NULL)) {
+ goto out;
+ }
+
+ dict_foreach (src, dht_aggregate, dst);
+out:
+ return;
+}
/* TODO:
- use volumename in xattr instead of "dht"
@@ -43,40 +197,398 @@
int
dht_lookup_selfheal_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int op_ret, int op_errno)
+ xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- int ret = 0;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, out);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+
+ local = frame->local;
+ ret = op_ret;
+
+ FRAME_SU_UNDO (frame, dht_local_t);
+
+ if (ret == 0) {
+ layout = local->selfheal.layout;
+ ret = dht_layout_set (this, local->inode, layout);
+ }
+
+ dht_inode_ctx_time_update (local->inode, this, &local->stbuf, 1);
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
+ }
+
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
+ dht_set_fixed_dir_stat (&local->postparent);
+
+ DHT_STACK_UNWIND (lookup, frame, ret, local->op_errno, local->inode,
+ &local->stbuf, local->xattr, &local->postparent);
+
+out:
+ return ret;
+}
+
+int
+dht_discover_complete (xlator_t *this, call_frame_t *discover_frame)
+{
+ dht_local_t *local = NULL;
+ dht_local_t *heal_local = NULL;
+ call_frame_t *main_frame = NULL;
+ call_frame_t *heal_frame = NULL;
+ int op_errno = 0;
+ int ret = -1;
+ dht_layout_t *layout = NULL;
+ dht_conf_t *conf = NULL;
+ uint32_t vol_commit_hash = 0;
+ xlator_t *source = NULL;
+ int heal_path = 0;
+ int i = 0;
+ loc_t loc = {0 };
+ int8_t is_read_only = 0, layout_anomalies = 0;
+
+ local = discover_frame->local;
+ layout = local->layout;
+ conf = this->private;
+
+ LOCK(&discover_frame->lock);
+ {
+ main_frame = local->main_frame;
+ local->main_frame = NULL;
+ }
+ UNLOCK(&discover_frame->lock);
+
+ if (!main_frame)
+ return 0;
+
+ ret = dict_get_int8 (local->xattr_req, QUOTA_READ_ONLY_KEY,
+ &is_read_only);
+ if (ret < 0)
+ gf_msg_debug (this->name, 0, "key = %s not present in dict",
+ QUOTA_READ_ONLY_KEY);
+
+ if (local->file_count && local->dir_count) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_FILE_TYPE_MISMATCH,
+ "path %s exists as a file on one subvolume "
+ "and directory on another. "
+ "Please fix it manually",
+ local->loc.path);
+ op_errno = EIO;
+ goto out;
+ }
+
+ if (local->cached_subvol) {
+ ret = dht_layout_preset (this, local->cached_subvol,
+ local->inode);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LAYOUT_SET_FAILED,
+ "failed to set layout for subvolume %s",
+ local->cached_subvol ? local->cached_subvol->name : "<nil>");
+ op_errno = EINVAL;
+ goto out;
+ }
+ } else {
+ ret = dht_layout_normalize (this, &local->loc, layout);
+ if ((ret < 0) || ((ret > 0) && (local->op_ret != 0))) {
+ /* either the layout is incorrect or the directory is
+ * not found even in one subvolume.
+ */
+ gf_msg_debug (this->name, 0,
+ "normalizing failed on %s "
+ "(overlaps/holes present: %s, "
+ "ENOENT errors: %d)", local->loc.path,
+ (ret < 0) ? "yes" : "no", (ret > 0) ? ret : 0);
+ layout_anomalies = 1;
+ } else if (local->inode) {
+ dht_layout_set (this, local->inode, layout);
+ }
+ }
+
+ if (!conf->vch_forced) {
+ ret = dict_get_uint32 (local->xattr,
+ conf->commithash_xattr_name,
+ &vol_commit_hash);
+ if (ret == 0) {
+ conf->vol_commit_hash = vol_commit_hash;
+ }
+ }
+
+ if (IA_ISDIR (local->stbuf.ia_type) && !is_read_only) {
+ for (i = 0; i < layout->cnt; i++) {
+ if (!source && !layout->list[i].err)
+ source = layout->list[i].xlator;
+ if (layout->list[i].err == ENOENT ||
+ layout->list[i].err == ESTALE) {
+ heal_path = 1;
+ }
+
+ if (source && heal_path)
+ break;
+ }
+ }
+
+ if (source && (heal_path || layout_anomalies)) {
+ gf_uuid_copy (loc.gfid, local->gfid);
+ if (gf_uuid_is_null (loc.gfid)) {
+ goto done;
+ }
+
+ if (local->inode)
+ loc.inode = inode_ref (local->inode);
+ else
+ goto done;
+
+ heal_frame = create_frame (this, this->ctx->pool);
+ if (heal_frame) {
+ heal_local = dht_local_init (heal_frame, &loc,
+ NULL, 0);
+ if (!heal_local)
+ goto cleanup;
+
+ gf_uuid_copy (heal_local->gfid, local->gfid);
+ heal_frame->cookie = source;
+ heal_local->xattr = dict_ref (local->xattr);
+ heal_local->stbuf = local->stbuf;
+ heal_local->postparent = local->postparent;
+ heal_local->inode = inode_ref (loc.inode);
+ heal_local->main_frame = main_frame;
+ FRAME_SU_DO (heal_frame, dht_local_t);
+ ret = synctask_new (this->ctx->env,
+ dht_heal_full_path,
+ dht_heal_full_path_done,
+ heal_frame, heal_frame);
+ if (!ret) {
+ loc_wipe (&loc);
+ return 0;
+ }
+ /*
+ * Failed to spawn the synctask. Returning
+ * with out doing heal.
+ */
+cleanup:
+ loc_wipe (&loc);
+ DHT_STACK_DESTROY (heal_frame);
+ }
+
+ }
+done:
+ dht_set_fixed_dir_stat (&local->postparent);
+ DHT_STACK_UNWIND (lookup, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr,
+ &local->postparent);
+ return 0;
+out:
+ DHT_STACK_UNWIND (lookup, main_frame, -1, op_errno, NULL, NULL, NULL,
+ NULL);
+
+ return ret;
+}
+
+int
+dht_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+ int ret = -1;
+ int is_dir = 0;
+ int is_linkfile = 0;
+ int attempt_unwind = 0;
+ dht_conf_t *conf = 0;
+ char gfid_local[GF_UUID_BUF_SIZE] = {0};
+ char gfid_node[GF_UUID_BUF_SIZE] = {0};
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, out);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", this->private, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ layout = local->layout;
+
+
+ /* Check if the gfid is different for file from other node */
+ if (!op_ret && gf_uuid_compare (local->gfid, stbuf->ia_gfid)) {
+
+ gf_uuid_unparse(stbuf->ia_gfid, gfid_node);
+ gf_uuid_unparse(local->gfid, gfid_local);
+
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_GFID_MISMATCH,
+ "%s: gfid different on %s, gfid local = %s"
+ "gfid other = %s",
+ local->loc.path, prev->this->name,
+ gfid_local, gfid_node);
+ }
+
+
+ LOCK (&frame->lock);
+ {
+ /* TODO: assert equal mode on stbuf->st_mode and
+ local->stbuf->st_mode
+
+ else mkdir/chmod/chown and fix
+ */
+ ret = dht_layout_merge (this, layout, prev->this,
+ op_ret, op_errno, xattr);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LAYOUT_MERGE_FAILED,
+ "%s: failed to merge layouts for subvol %s",
+ local->loc.path, prev->this->name);
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_msg_debug (this->name, op_errno,
+ "lookup of %s on %s returned error",
+ local->loc.path, prev->this->name);
+
+ goto unlock;
+ }
+
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr,
+ conf->link_xattr_name);
+ is_dir = check_is_dir (inode, stbuf, xattr);
+
+ if (is_dir) {
+ local->dir_count ++;
+ } else {
+ local->file_count ++;
+
+ if (!is_linkfile) {
+ /* real file */
+ local->cached_subvol = prev->this;
+ attempt_unwind = 1;
+ } else {
+ goto unlock;
+ }
+ }
+
+ local->op_ret = 0;
+
+ if (local->xattr == NULL) {
+ local->xattr = dict_ref (xattr);
+ } else {
+ dht_aggregate_xattr (local->xattr, xattr);
+ }
+
+ if (local->inode == NULL)
+ local->inode = inode_ref (inode);
+
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+ dht_iatt_merge (this, &local->postparent, postparent,
+ prev->this);
+ }
+unlock:
+ UNLOCK (&frame->lock);
+out:
+ /* Make sure, the thread executing dht_discover_complete is the one
+ * which calls STACK_DESTROY (frame). In the case of "attempt_unwind",
+ * this makes sure that the thread don't call dht_frame_return, till
+ * call to dht_discover_complete is done.
+ */
+ if (attempt_unwind) {
+ dht_discover_complete (this, frame);
+ }
+
+ this_call_cnt = dht_frame_return (frame);
+
+ if (is_last_call (this_call_cnt) && !attempt_unwind) {
+ dht_discover_complete (this, frame);
+ }
+
+ if (is_last_call (this_call_cnt))
+ DHT_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+
+int
+dht_discover (call_frame_t *frame, xlator_t *this, loc_t *loc)
+{
+ int ret;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int call_cnt = 0;
+ int op_errno = EINVAL;
+ int i = 0;
+ call_frame_t *discover_frame = NULL;
+
+ conf = this->private;
+ local = frame->local;
+
+ ret = dict_set_uint32 (local->xattr_req, conf->xattr_name, 4 * 4);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DICT_SET_FAILED,
+ "%s: Failed to set dictionary value:key = %s",
+ loc->path, conf->xattr_name);
+
+ ret = dict_set_uint32 (local->xattr_req, conf->link_xattr_name, 256);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DICT_SET_FAILED,
+ "%s: Failed to set dictionary value:key = %s",
+ loc->path, conf->link_xattr_name);
+
+ if (__is_root_gfid(local->loc.gfid)) {
+ ret = dict_set_uint32 (local->xattr_req,
+ conf->commithash_xattr_name,
+ sizeof(uint32_t));
+ }
+
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+
+ local->layout = dht_layout_new (this, conf->subvolume_cnt);
- local = frame->local;
- ret = op_ret;
+ if (!local->layout) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- dht_frame_su_undo (frame);
+ gf_uuid_copy (local->gfid, loc->gfid);
- if (ret == 0) {
- layout = local->selfheal.layout;
- ret = dht_layout_set (this, local->inode, layout);
+ discover_frame = copy_frame (frame);
+ if (!discover_frame) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- if (local->ia_ino) {
- local->stbuf.ia_ino = local->ia_ino;
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not find hashed subvolume for %s",
- local->loc.path);
- }
+ discover_frame->local = local;
+ frame->local = NULL;
+ local->main_frame = frame;
- if (local->loc.parent)
- local->postparent.ia_ino = local->loc.parent->ino;
- }
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (discover_frame, dht_discover_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup,
+ &local->loc, local->xattr_req);
+ }
- WIPE (&local->postparent);
+ return 0;
- DHT_STACK_UNWIND (lookup, frame, ret, local->op_errno, local->inode,
- &local->stbuf, local->xattr, &local->postparent);
+err:
+ DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL);
- return 0;
+ return 0;
}
@@ -86,65 +598,91 @@ dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
inode_t *inode, struct iatt *stbuf, dict_t *xattr,
struct iatt *postparent)
{
- dht_conf_t *conf = NULL;
dht_local_t *local = NULL;
int this_call_cnt = 0;
call_frame_t *prev = NULL;
- dht_layout_t *layout = NULL;
- int ret = 0;
- int is_dir = 0;
+ dht_layout_t *layout = NULL;
+ int ret = -1;
+ int is_dir = 0;
+ char gfid_local[GF_UUID_BUF_SIZE] = {0};
+ char gfid_node[GF_UUID_BUF_SIZE] = {0};
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, out);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", this->private, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
- conf = this->private;
local = frame->local;
prev = cookie;
- layout = local->layout;
+ layout = local->layout;
+
+ if (!op_ret && gf_uuid_is_null (local->gfid))
+ memcpy (local->gfid, stbuf->ia_gfid, 16);
+
+ memcpy (local->loc.gfid, local->gfid, 16);
+
+ /* Check if the gfid is different for file from other node */
+ if (!op_ret && gf_uuid_compare (local->gfid, stbuf->ia_gfid)) {
+
+ gf_uuid_unparse(stbuf->ia_gfid, gfid_node);
+ gf_uuid_unparse(local->gfid, gfid_local);
+
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_GFID_MISMATCH,
+ "%s: gfid different on %s."
+ " gfid local = %s, gfid subvol = %s",
+ local->loc.path, prev->this->name,
+ gfid_local, gfid_node);
+ }
LOCK (&frame->lock);
{
/* TODO: assert equal mode on stbuf->st_mode and
- local->stbuf->st_mode
-
- else mkdir/chmod/chown and fix
- */
- ret = dht_layout_merge (this, layout, prev->this,
- op_ret, op_errno, xattr);
-
- if (op_ret == -1) {
- local->op_errno = ENOENT;
- gf_log (this->name, GF_LOG_DEBUG,
- "lookup of %s on %s returned error (%s)",
- local->loc.path, prev->this->name,
- strerror (op_errno));
-
- goto unlock;
- }
-
- is_dir = check_is_dir (inode, stbuf, xattr);
- if (!is_dir) {
- gf_log (this->name, GF_LOG_DEBUG,
- "lookup of %s on %s returned non dir 0%o",
- local->loc.path, prev->this->name,
- stbuf->ia_type);
- local->need_selfheal = 1;
- goto unlock;
+ local->stbuf->st_mode
+
+ else mkdir/chmod/chown and fix
+ */
+ ret = dht_layout_merge (this, layout, prev->this,
+ op_ret, op_errno, xattr);
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_msg_debug (this->name, op_errno,
+ "lookup of %s on %s returned error",
+ local->loc.path, prev->this->name);
+
+ goto unlock;
}
- local->op_ret = 0;
- if (local->xattr == NULL)
- local->xattr = dict_ref (xattr);
- if (local->inode == NULL)
- local->inode = inode_ref (inode);
+ is_dir = check_is_dir (inode, stbuf, xattr);
+ if (!is_dir) {
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
- dht_iatt_merge (this, &local->postparent, postparent,
- prev->this);
+ gf_msg_debug (this->name, 0,
+ "lookup of %s on %s returned non"
+ "dir 0%o"
+ "calling lookup_everywhere",
+ local->loc.path, prev->this->name,
+ stbuf->ia_type);
+
+ local->need_selfheal = 1;
+ goto unlock;
+ }
- if (prev->this == dht_first_up_subvol (this)) {
- local->ia_ino = local->stbuf.ia_ino;
- local->ia_gen = local->stbuf.ia_gen;
+ local->op_ret = 0;
+ if (local->xattr == NULL) {
+ local->xattr = dict_ref (xattr);
+ } else {
+ dht_aggregate_xattr (local->xattr, xattr);
}
+ if (local->inode == NULL)
+ local->inode = inode_ref (inode);
+
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+ dht_iatt_merge (this, &local->postparent, postparent,
+ prev->this);
}
unlock:
UNLOCK (&frame->lock);
@@ -159,45 +697,42 @@ unlock:
return 0;
}
- if (local->op_ret == 0) {
- ret = dht_layout_normalize (this, &local->loc, layout);
-
- if (ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "fixing assignment on %s",
- local->loc.path);
- goto selfheal;
- }
-
- dht_layout_set (this, local->inode, layout);
-
- if (local->ia_ino) {
- local->stbuf.ia_ino = local->ia_ino;
- local->stbuf.ia_gen = local->ia_gen;
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not find hashed subvol for %s",
- local->loc.path);
- }
-
- if (local->loc.parent)
- local->postparent.ia_ino =
- local->loc.parent->ino;
- }
-
- DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf, local->xattr,
+ if (local->op_ret == 0) {
+ ret = dht_layout_normalize (this, &local->loc, layout);
+
+ if (ret != 0) {
+ gf_msg_debug (this->name, 0,
+ "fixing assignment on %s",
+ local->loc.path);
+ goto selfheal;
+ }
+
+ dht_layout_set (this, local->inode, layout);
+ }
+
+ dht_inode_ctx_time_update (local->inode, this,
+ &local->stbuf, 1);
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
+ }
+
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
+ dht_set_fixed_dir_stat (&local->postparent);
+ DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr,
&local->postparent);
}
- return 0;
+ return 0;
selfheal:
- dht_frame_su_do (frame);
- ret = dht_selfheal_directory (frame, dht_lookup_selfheal_cbk,
- &local->loc, layout);
-
- return 0;
+ FRAME_SU_DO (frame, dht_local_t);
+ gf_uuid_copy (local->loc.gfid, local->gfid);
+ ret = dht_selfheal_directory (frame, dht_lookup_selfheal_cbk,
+ &local->loc, layout);
+out:
+ return ret;
}
int
@@ -207,272 +742,1086 @@ dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *postparent)
{
dht_local_t *local = NULL;
+ int this_call_cnt = 0;
call_frame_t *prev = NULL;
- dht_conf_t *conf = NULL;
+ dht_layout_t *layout = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ int is_dir = 0;
+ int is_linkfile = 0;
+ int follow_link = 0;
+ call_frame_t *copy = NULL;
+ dht_local_t *copy_local = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ uint32_t vol_commit_hash = 0;
+ xlator_t *subvol = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, err);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, err);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, err);
local = frame->local;
prev = cookie;
- conf = this->private;
+ conf = this->private;
+ if (!conf)
+ goto out;
- if (op_ret == 0) {
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+ if (!conf->vch_forced) {
+ ret = dict_get_uint32 (xattr, conf->commithash_xattr_name,
+ &vol_commit_hash);
+ if (ret == 0) {
+ conf->vol_commit_hash = vol_commit_hash;
+ }
+ }
+
+ gf_uuid_unparse (local->loc.gfid, gfid);
+
+ LOCK (&frame->lock);
+ {
+
+ gf_msg_debug (this->name, op_errno,
+ "revalidate lookup of %s "
+ "returned with op_ret %d",
+ local->loc.path, op_ret);
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+
+ if ((op_errno != ENOTCONN)
+ && (op_errno != ENOENT)
+ && (op_errno != ESTALE)) {
+ gf_msg (this->name, GF_LOG_INFO, op_errno,
+ DHT_MSG_REVALIDATE_CBK_INFO,
+ "Revalidate: subvolume %s for %s "
+ "(gfid = %s) returned -1",
+ prev->this->name, local->loc.path,
+ gfid);
+ }
+ if (op_errno == ESTALE) {
+ /* propagate the ESTALE to parent.
+ * setting local->return_estale would send
+ * ESTALE to parent. */
+ local->return_estale = 1;
+ }
+
+ /* if it is ENOENT, we may have to do a
+ * 'lookup_everywhere()' to make sure
+ * the file is not migrated */
+ if (op_errno == ENOENT) {
+ if (IA_ISREG (local->loc.inode->ia_type)) {
+
+ gf_msg_debug (this->name, 0,
+ "found ENOENT for %s. "
+ "Setting "
+ "need_lookup_everywhere"
+ " flag to 1",
+ local->loc.path);
+
+ local->need_lookup_everywhere = 1;
+ }
+ }
+ goto unlock;
+ }
+
+ if ((!IA_ISINVAL(local->inode->ia_type)) &&
+ stbuf->ia_type != local->inode->ia_type) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_FILE_TYPE_MISMATCH,
+ "mismatching filetypes 0%o v/s 0%o for %s,"
+ " gfid = %s",
+ (stbuf->ia_type), (local->inode->ia_type),
+ local->loc.path, gfid);
+
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+
+ goto unlock;
+
+ }
+
+ layout = local->layout;
+
+ is_dir = check_is_dir (inode, stbuf, xattr);
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr,
+ conf->link_xattr_name);
+ if (is_linkfile) {
+ follow_link = 1;
+ goto unlock;
+ }
+ if (is_dir) {
+ ret = dht_dir_has_layout (xattr, conf->xattr_name);
+ if (ret >= 0) {
+ if (is_greater_time(local->stbuf.ia_ctime,
+ local->stbuf.ia_ctime_nsec,
+ stbuf->ia_ctime,
+ stbuf->ia_ctime_nsec)) {
+ local->prebuf.ia_gid = stbuf->ia_gid;
+ local->prebuf.ia_uid = stbuf->ia_uid;
+ }
+ }
+ if (local->stbuf.ia_type != IA_INVAL)
+ {
+ if ((local->stbuf.ia_gid != stbuf->ia_gid) ||
+ (local->stbuf.ia_uid != stbuf->ia_uid)) {
+ local->need_selfheal = 1;
+ }
+ }
+ ret = dht_layout_dir_mismatch (this, layout,
+ prev->this, &local->loc,
+ xattr);
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_LAYOUT_MISMATCH,
+ "Mismatching layouts for %s, gfid = %s",
+ local->loc.path, gfid);
+
+ local->layout_mismatch = 1;
+
+ goto unlock;
+ }
+ }
+
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
dht_iatt_merge (this, &local->postparent, postparent,
prev->this);
- local->stbuf.ia_ino = local->ia_ino;
- local->stbuf.ia_gen = local->loc.inode->generation;
+ local->op_ret = 0;
- if (local->loc.parent)
- local->postparent.ia_ino = local->loc.parent->ino;
- }
+ if (!local->xattr) {
+ local->xattr = dict_ref (xattr);
+ } else if (is_dir) {
+ dht_aggregate_xattr (local->xattr, xattr);
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+ if (follow_link) {
+ gf_uuid_copy (local->gfid, stbuf->ia_gfid);
- if (!IA_ISDIR (local->stbuf.ia_type)
- && (local->hashed_subvol != local->cached_subvol)
- && (local->stbuf.ia_nlink == 1)
- && (conf->unhashed_sticky_bit)) {
- local->stbuf.ia_prot.sticky = 1;
+ subvol = dht_linkfile_subvol (this, inode, stbuf, xattr);
+ if (!subvol) {
+ op_errno = ESTALE;
+ local->op_ret = -1;
+ } else {
+
+ STACK_WIND (frame, dht_lookup_linkfile_cbk,
+ subvol, subvol->fops->lookup,
+ &local->loc, local->xattr_req);
+ return 0;
+ }
}
- DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno,
- inode, &local->stbuf, xattr,
- &local->postparent);
+out:
+ this_call_cnt = dht_frame_return (frame);
- return 0;
+ if (is_last_call (this_call_cnt)) {
+ if (!IA_ISDIR (local->stbuf.ia_type)
+ && (local->hashed_subvol != local->cached_subvol)
+ && (local->stbuf.ia_nlink == 1)
+ && (conf && conf->unhashed_sticky_bit)) {
+ local->stbuf.ia_prot.sticky = 1;
+ }
+ if (local->need_selfheal) {
+ local->need_selfheal = 0;
+ gf_uuid_copy (local->gfid, local->stbuf.ia_gfid);
+ local->stbuf.ia_gid = local->prebuf.ia_gid;
+ local->stbuf.ia_uid = local->prebuf.ia_uid;
+ copy = create_frame (this, this->ctx->pool);
+ if (copy) {
+ copy_local = dht_local_init (copy, &local->loc,
+ NULL, 0);
+ if (!copy_local)
+ goto cont;
+ copy_local->stbuf = local->stbuf;
+ copy->local = copy_local;
+ FRAME_SU_DO (copy, dht_local_t);
+ ret = synctask_new (this->ctx->env,
+ dht_dir_attr_heal,
+ dht_dir_attr_heal_done,
+ copy, copy);
+ }
+ }
+cont:
+ if (local->layout_mismatch) {
+ /* Found layout mismatch in the directory, need to
+ fix this in the inode context */
+ dht_layout_unref (this, local->layout);
+ local->layout = NULL;
+ dht_lookup_directory (frame, this, &local->loc);
+ return 0;
+ }
+
+ if (local->need_lookup_everywhere) {
+ /* As the current layout gave ENOENT error, we would
+ need a new layout */
+ dht_layout_unref (this, local->layout);
+ local->layout = NULL;
+
+ /* We know that current cached subvol is no more
+ valid, get the new one */
+ local->cached_subvol = NULL;
+ dht_lookup_everywhere (frame, this, &local->loc);
+ return 0;
+ }
+ if (local->return_estale) {
+ local->op_ret = -1;
+ local->op_errno = ESTALE;
+ }
+
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
+ }
+
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
+ dht_set_fixed_dir_stat (&local->postparent);
+ DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr,
+ &local->postparent);
+ }
+
+err:
+ return ret;
}
int
dht_lookup_linkfile_create_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- xlator_t *cached_subvol = NULL;
- dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *cached_subvol = NULL;
+ dht_conf_t *conf = NULL;
int ret = -1;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, out);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", this->private, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+
+ local = frame->local;
+ cached_subvol = local->cached_subvol;
+ conf = this->private;
- local = frame->local;
- cached_subvol = local->cached_subvol;
- conf = this->private;
+ gf_uuid_unparse(local->loc.gfid, gfid);
- ret = dht_layout_preset (this, local->cached_subvol, inode);
+ ret = dht_layout_preset (this, local->cached_subvol, local->loc.inode);
if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set layout for subvolume %s",
- cached_subvol ? cached_subvol->name : "<nil>");
+ gf_msg_debug (this->name, EINVAL,
+ "Failed to set layout for subvolume %s, "
+ "(gfid = %s)",
+ cached_subvol ? cached_subvol->name : "<nil>",
+ gfid);
local->op_ret = -1;
local->op_errno = EINVAL;
goto unwind;
}
- local->op_ret = 0;
- if ((local->stbuf.ia_nlink == 1)
- && (conf->unhashed_sticky_bit)) {
- local->stbuf.ia_prot.sticky = 1;
- }
+ local->op_ret = 0;
+ if ((local->stbuf.ia_nlink == 1)
+ && (conf && conf->unhashed_sticky_bit)) {
+ local->stbuf.ia_prot.sticky = 1;
+ }
- if (local->loc.parent)
- local->postparent.ia_ino = local->loc.parent->ino;
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ postparent, 1);
+ }
unwind:
- WIPE (&local->postparent);
+ gf_msg_debug (this->name, 0,
+ "creation of linkto on hashed subvol:%s, "
+ "returned with op_ret %d and op_errno %d: %s",
+ local->hashed_subvol->name,
+ op_ret, op_errno, uuid_utoa (local->loc.gfid));
+
+ if (local->linked == _gf_true)
+ dht_linkfile_attr_heal (frame, this);
+
- DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf, local->xattr,
+ dht_set_fixed_dir_stat (&local->postparent);
+
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
+ DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr,
&local->postparent);
- return 0;
+out:
+ return ret;
+}
+
+int
+dht_lookup_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ int this_call_cnt = 0;
+ dht_local_t *local = NULL;
+ const char *path = NULL;
+
+ local = (dht_local_t*)frame->local;
+ path = local->loc.path;
+
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_UNLINK_LOOKUP_INFO, "lookup_unlink returned with "
+ "op_ret -> %d and op-errno -> %d for %s", op_ret, op_errno,
+ ((path == NULL)? "null" : path ));
+
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ dht_lookup_everywhere_done (frame, this);
+ }
+
+ return 0;
+}
+
+int
+dht_lookup_unlink_of_false_linkto_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int this_call_cnt = 0;
+ dht_local_t *local = NULL;
+ const char *path = NULL;
+
+ local = (dht_local_t*)frame->local;
+ path = local->loc.path;
+
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_UNLINK_LOOKUP_INFO, "lookup_unlink returned with "
+ "op_ret -> %d and op-errno -> %d for %s", op_ret, op_errno,
+ ((path == NULL)? "null" : path ));
+
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+
+ if (op_ret == 0) {
+ dht_lookup_everywhere_done (frame, this);
+ } else {
+ /*When dht_lookup_everywhere is performed, one cached
+ *and one hashed file was found and hashed file does
+ *not point to the above mentioned cached node. So it
+ *was considered as stale and an unlink was performed.
+ *But unlink fails. So may be rebalance is in progress.
+ *now ideally we have two data-files. One obtained during
+ *lookup_everywhere and one where unlink-failed. So
+ *at this point in time we cannot decide which one to
+ *choose because there are chances of first cached
+ *file is truncated after rebalance and if it is chosen
+ *as cached node, application will fail. So return EIO.*/
+
+ if (op_errno == EBUSY) {
+
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_UNLINK_FAILED,
+ "Could not unlink the linkto file as "
+ "either fd is open and/or linkto xattr "
+ "is set for %s",
+ ((path == NULL)? "null":path));
+
+ }
+ DHT_STACK_UNWIND (lookup, frame, -1, EIO, NULL, NULL,
+ NULL, NULL);
+
+ }
+ }
+
+ return 0;
+}
+
+int
+dht_lookup_unlink_stale_linkto_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+
+ dht_local_t *local = NULL;
+ const char *path = NULL;
+
+ /* NOTE:
+ * If stale file unlink fails either there is an open-fd or is not an
+ * dht-linkto-file then posix_unlink returns EBUSY, which is overwritten
+ * to ENOENT
+ */
+
+ local = frame->local;
+
+ if (local && local->loc.path)
+ path = local->loc.path;
+
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_UNLINK_LOOKUP_INFO,
+ "Returned with op_ret %d and "
+ "op_errno %d for %s", op_ret, op_errno,
+ ((path==NULL)?"null":path));
+
+ DHT_STACK_UNWIND (lookup, frame, -1, ENOENT, NULL, NULL, NULL,
+ NULL);
+
+ return 0;
}
+int
+dht_fill_dict_to_avoid_unlink_of_migrating_file (dict_t *dict) {
+
+ int ret = 0;
+ xlator_t *this = NULL;
+ char *linktoskip_key = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("dht", this, err);
+
+ if (dht_is_tier_xlator (this))
+ linktoskip_key = TIER_SKIP_NON_LINKTO_UNLINK;
+ else
+ linktoskip_key = DHT_SKIP_NON_LINKTO_UNLINK;
+
+ ret = dict_set_int32 (dict, linktoskip_key, 1);
+
+ if (ret)
+ goto err;
+
+ ret = dict_set_int32 (dict, DHT_SKIP_OPEN_FD_UNLINK, 1);
+
+ if (ret)
+ goto err;
+
+
+ return 0;
+
+err:
+ return -1;
+
+}
+/* Rebalance is performed from cached_node to hashed_node. Initial cached_node
+ * contains a non-linkto file. After migration it is converted to linkto and
+ * then unlinked. And at hashed_subvolume, first a linkto file is present,
+ * then after migration it is converted to a non-linkto file.
+ *
+ * Lets assume a file is present on cached subvolume and a new brick is added
+ * and new brick is the new_hashed subvolume. So fresh lookup on newly added
+ * hashed subvolume will fail and dht_lookup_everywhere gets called. If just
+ * before sending the dht_lookup_everywhere request rebalance is in progress,
+ *
+ * from cached subvolume it may see: Nonlinkto or linkto or No file
+ * from hashed subvolume it may see: No file or linkto file or non-linkto file
+ *
+ * So this boils down to 9 cases:
+ * at cached_subvol at hashed_subvol
+ * ---------------- -----------------
+ *
+ *a) No file No file
+ * [request reached after [Request reached before
+ * migration] Migration]
+ *
+ *b) No file Linkto File
+ *
+ *c) No file Non-Linkto File
+ *
+ *d) Linkto No-File
+ *
+ *e) Linkto Linkto
+ *
+ *f) Linkto Non-Linkto
+ *
+ *g) NonLinkto No-File
+ *
+ *h) NonLinkto Linkto
+ *
+ *i) NonLinkto NonLinkto
+ *
+ * dht_lookup_everywhere_done takes decision based on any of the above case
+ */
+
+int
+dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this)
+{
+ int ret = 0;
+ dht_local_t *local = NULL;
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *cached_subvol = NULL;
+ dht_layout_t *layout = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ gf_boolean_t found_non_linkto_on_hashed = _gf_false;
+
+ local = frame->local;
+ hashed_subvol = local->hashed_subvol;
+ cached_subvol = local->cached_subvol;
+
+ gf_uuid_unparse (local->loc.gfid, gfid);
+
+ if (local->file_count && local->dir_count) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_FILE_TYPE_MISMATCH,
+ "path %s (gfid = %s)exists as a file on one "
+ "subvolume and directory on another. "
+ "Please fix it manually",
+ local->loc.path, gfid);
+ DHT_STACK_UNWIND (lookup, frame, -1, EIO, NULL, NULL, NULL,
+ NULL);
+ return 0;
+ }
+
+ if (local->dir_count) {
+ dht_lookup_directory (frame, this, &local->loc);
+ return 0;
+ }
+
+ gf_msg_debug (this->name, 0, "STATUS: hashed_subvol %s "
+ "cached_subvol %s",
+ (hashed_subvol == NULL)?"null":hashed_subvol->name,
+ (cached_subvol == NULL)?"null":cached_subvol->name);
+
+ if (!cached_subvol) {
+
+ if (local->skip_unlink.handle_valid_link && hashed_subvol) {
+
+ /*Purpose of "DHT_SKIP_NON_LINKTO_UNLINK":
+ * If this lookup is performed by rebalance and this
+ * rebalance process detected hashed file and by
+ * the time it sends the lookup request to cached node,
+ * file got migrated and now at initial hashed_node,
+ * final migrated file is present. With current logic,
+ * because this process fails to find the cached_node,
+ * it will unlink the file at initial hashed_node.
+ *
+ * So we avoid this by setting key, and checking at the
+ * posix_unlink that unlink the file only if file is a
+ * linkto file and not a migrated_file.
+ */
+
+
+ ret = dht_fill_dict_to_avoid_unlink_of_migrating_file
+ (local->xattr_req);
+
+ if (ret) {
+ /* If for some reason, setting key in the dict
+ * fails, return with ENOENT, as with respect to
+ * this process, it detected only a stale link
+ * file.
+ *
+ * Next lookup will delete it.
+ *
+ * Performing deletion of stale link file when
+ * setting key in dict fails, may cause the data
+ * loss becase of the above mentioned race.
+ */
+
+
+ DHT_STACK_UNWIND (lookup, frame, -1, ENOENT,
+ NULL, NULL, NULL, NULL);
+ } else {
+ local->skip_unlink.handle_valid_link = _gf_false;
+
+ gf_msg_debug (this->name, 0,
+ "No Cached was found and "
+ "unlink on hashed was skipped"
+ " so performing now: %s",
+ local->loc.path);
+
+ STACK_WIND (frame,
+ dht_lookup_unlink_stale_linkto_cbk,
+ hashed_subvol,
+ hashed_subvol->fops->unlink,
+ &local->loc, 0, local->xattr_req);
+ }
+
+ } else {
+
+ gf_msg_debug (this->name, 0,
+ "There was no cached file and "
+ "unlink on hashed is not skipped %s",
+ local->loc.path);
+
+ DHT_STACK_UNWIND (lookup, frame, -1, ENOENT, NULL, NULL,
+ NULL, NULL);
+ }
+ return 0;
+ }
+
+ /* At the time of dht_lookup, no file was found on hashed and that is
+ * why dht_lookup_everywhere is called, but by the time
+ * dht_lookup_everywhere
+ * reached to server, file might have already migrated. In that case we
+ * will find a migrated file at the hashed_node. In this case store the
+ * layout in context and return successfully.
+ */
+
+ if (hashed_subvol || local->need_lookup_everywhere) {
+
+ if (local->need_lookup_everywhere) {
+
+ found_non_linkto_on_hashed = _gf_true;
+
+ } else if ((local->file_count == 1) &&
+ (hashed_subvol == cached_subvol)) {
+
+ gf_msg_debug (this->name, 0,
+ "found cached file on hashed subvolume "
+ "so store in context and return for %s",
+ local->loc.path);
+
+ found_non_linkto_on_hashed = _gf_true;
+ }
+
+ if (found_non_linkto_on_hashed)
+ goto preset_layout;
+
+ }
+
+
+ if (hashed_subvol) {
+ if (local->skip_unlink.handle_valid_link == _gf_true) {
+ if (cached_subvol == local->skip_unlink.hash_links_to) {
+
+ if (gf_uuid_compare (local->skip_unlink.cached_gfid,
+ local->skip_unlink.hashed_gfid)){
+
+ /*GFID different, return error*/
+ DHT_STACK_UNWIND (lookup, frame, -1,
+ ESTALE, NULL, NULL,
+ NULL, NULL);
+
+ return 0;
+ }
+
+ ret = dht_layout_preset (this, cached_subvol,
+ local->loc.inode);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_LAYOUT_PRESET_FAILED,
+ "Could not set pre-set layout "
+ "for subvolume %s",
+ cached_subvol->name);
+ }
+
+ local->op_ret = (ret == 0) ? ret : -1;
+ local->op_errno = (ret == 0) ? ret : EINVAL;
+
+ /* Presence of local->cached_subvol validates
+ * that lookup from cached node is successful
+ */
+
+ if (!local->op_ret && local->loc.parent) {
+ dht_inode_ctx_time_update
+ (local->loc.parent, this,
+ &local->postparent, 1);
+ }
+
+ gf_msg_debug (this->name, 0,
+ "Skipped unlinking linkto file "
+ "on the hashed subvolume. "
+ "Returning success as it is a "
+ "valid linkto file. Path:%s"
+ ,local->loc.path);
+
+ goto unwind_hashed_and_cached;
+ } else {
+
+ local->skip_unlink.handle_valid_link = _gf_false;
+
+ gf_msg_debug (this->name, 0,
+ "Linkto file found on hashed "
+ "subvol "
+ "and data file found on cached "
+ "subvolume. But linkto points to "
+ "different cached subvolume (%s) "
+ "path %s",
+ (local->skip_unlink.hash_links_to ?
+ local->skip_unlink.hash_links_to->name :
+ " <nil>"), local->loc.path);
+
+ if (local->skip_unlink.opend_fd_count == 0) {
+
+
+ ret = dht_fill_dict_to_avoid_unlink_of_migrating_file
+ (local->xattr_req);
+
+
+ if (ret) {
+ DHT_STACK_UNWIND (lookup, frame, -1,
+ EIO, NULL, NULL,
+ NULL, NULL);
+ } else {
+ local->call_cnt = 1;
+ STACK_WIND (frame,
+ dht_lookup_unlink_of_false_linkto_cbk,
+ hashed_subvol,
+ hashed_subvol->fops->unlink,
+ &local->loc, 0,
+ local->xattr_req);
+ }
+
+ return 0;
+
+ }
+ }
+
+ }
+ }
+
+
+preset_layout:
+
+ if (found_non_linkto_on_hashed) {
+
+ if (local->need_lookup_everywhere) {
+ if (gf_uuid_compare (local->gfid, local->inode->gfid)) {
+ /* GFID different, return error */
+ DHT_STACK_UNWIND (lookup, frame, -1, ENOENT,
+ NULL, NULL, NULL, NULL);
+ return 0;
+ }
+ }
+
+ local->op_ret = 0;
+ local->op_errno = 0;
+ layout = dht_layout_for_subvol (this, cached_subvol);
+ if (!layout) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_SUBVOL_INFO,
+ "%s: no pre-set layout for subvolume %s,"
+ " gfid = %s",
+ local->loc.path, (cached_subvol ?
+ cached_subvol->name :
+ "<nil>"), gfid);
+ }
+
+ ret = dht_layout_set (this, local->inode, layout);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_SUBVOL_INFO,
+ "%s: failed to set layout for subvol %s, "
+ "gfid = %s",
+ local->loc.path, (cached_subvol ?
+ cached_subvol->name :
+ "<nil>"), gfid);
+ }
+
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
+ }
+
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
+ dht_set_fixed_dir_stat (&local->postparent);
+ DHT_STACK_UNWIND (lookup, frame, local->op_ret,
+ local->op_errno, local->inode,
+ &local->stbuf, local->xattr,
+ &local->postparent);
+ return 0;
+ }
+
+ if (!hashed_subvol) {
+
+ gf_msg_debug (this->name, 0,
+ "Cannot create linkfile for %s on %s: "
+ "hashed subvolume cannot be found, gfid = %s.",
+ local->loc.path, cached_subvol->name, gfid);
+
+ local->op_ret = 0;
+ local->op_errno = 0;
+
+ ret = dht_layout_preset (frame->this, cached_subvol,
+ local->inode);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_LAYOUT_PRESET_FAILED,
+ "Failed to set layout for subvol %s"
+ ", gfid = %s",
+ cached_subvol ? cached_subvol->name :
+ "<nil>", gfid);
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+ }
+
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
+ }
+
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
+ dht_set_fixed_dir_stat (&local->postparent);
+ DHT_STACK_UNWIND (lookup, frame, local->op_ret,
+ local->op_errno, local->inode,
+ &local->stbuf, local->xattr,
+ &local->postparent);
+ return 0;
+ }
+
+ gf_msg_debug (this->name, 0,
+ "Creating linkto file on %s(hash) to %s on %s (gfid = %s)",
+ hashed_subvol->name, local->loc.path,
+ cached_subvol->name, gfid);
+
+ ret = dht_linkfile_create (frame,
+ dht_lookup_linkfile_create_cbk, this,
+ cached_subvol, hashed_subvol, &local->loc);
+
+ return ret;
+
+unwind_hashed_and_cached:
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
+ dht_set_fixed_dir_stat (&local->postparent);
+ DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr,
+ &local->postparent);
+ return 0;
+}
int
dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
+ int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf, dict_t *xattr,
struct iatt *postparent)
{
- dht_conf_t *conf = NULL;
dht_local_t *local = NULL;
int this_call_cnt = 0;
call_frame_t *prev = NULL;
- int is_linkfile = 0;
- int is_dir = 0;
- xlator_t *subvol = NULL;
- loc_t *loc = NULL;
- xlator_t *link_subvol = NULL;
- xlator_t *hashed_subvol = NULL;
- xlator_t *cached_subvol = NULL;
- int ret = -1;
+ int is_linkfile = 0;
+ int is_dir = 0;
+ xlator_t *subvol = NULL;
+ loc_t *loc = NULL;
+ xlator_t *link_subvol = NULL;
+ int ret = -1;
+ int32_t fd_count = 0;
+ dht_conf_t *conf = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ dict_t *dict_req = {0};
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, out);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+ GF_VALIDATE_OR_GOTO ("dht", this->private, out);
+
+ local = frame->local;
+ loc = &local->loc;
+ conf = this->private;
- conf = this->private;
+ prev = cookie;
+ subvol = prev->this;
- local = frame->local;
- loc = &local->loc;
+ gf_msg_debug (this->name, 0,
+ "returned with op_ret %d and op_errno %d (%s) "
+ "from subvol %s", op_ret, op_errno, loc->path,
+ subvol->name);
- prev = cookie;
- subvol = prev->this;
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ if (op_errno != ENOENT)
+ local->op_errno = op_errno;
+ goto unlock;
+ }
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- if (op_errno != ENOENT)
- local->op_errno = op_errno;
- goto unlock;
- }
+ if (gf_uuid_is_null (local->gfid))
+ gf_uuid_copy (local->gfid, buf->ia_gfid);
- is_linkfile = check_is_linkfile (inode, buf, xattr);
- is_dir = check_is_dir (inode, buf, xattr);
+ gf_uuid_unparse(local->gfid, gfid);
- if (is_linkfile) {
- link_subvol = dht_linkfile_subvol (this, inode, buf,
- xattr);
- gf_log (this->name, GF_LOG_DEBUG,
- "found on %s linkfile %s (-> %s)",
- subvol->name, loc->path,
- link_subvol ? link_subvol->name : "''");
- goto unlock;
- }
+ if (gf_uuid_compare (local->gfid, buf->ia_gfid)) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_GFID_MISMATCH,
+ "%s: gfid differs on subvolume %s,"
+ " gfid local = %s, gfid node = %s",
+ loc->path, prev->this->name, gfid,
+ uuid_utoa(buf->ia_gfid));
+ }
+
+ is_linkfile = check_is_linkfile (inode, buf, xattr,
+ conf->link_xattr_name);
+
+ if (is_linkfile) {
+ link_subvol = dht_linkfile_subvol (this, inode, buf,
+ xattr);
+ gf_msg_debug (this->name, 0,
+ "found on %s linkfile %s (-> %s)",
+ subvol->name, loc->path,
+ link_subvol ? link_subvol->name : "''");
+ goto unlock;
+ }
+
+ is_dir = check_is_dir (inode, buf, xattr);
+
+ /* non linkfile GFID takes precedence but don't overwrite
+ gfid if we have already found a cached file*/
+ if (!local->cached_subvol)
+ gf_uuid_copy (local->gfid, buf->ia_gfid);
if (is_dir) {
local->dir_count++;
- gf_log (this->name, GF_LOG_DEBUG,
- "found on %s directory %s",
- subvol->name, loc->path);
+ gf_msg_debug (this->name, 0,
+ "found on %s directory %s",
+ subvol->name, loc->path);
} else {
local->file_count++;
+ gf_msg_debug (this->name, 0,
+ "found cached file on %s for %s",
+ subvol->name, loc->path);
+
if (!local->cached_subvol) {
/* found one file */
dht_iatt_merge (this, &local->stbuf, buf,
subvol);
local->xattr = dict_ref (xattr);
local->cached_subvol = subvol;
- gf_log (this->name, GF_LOG_DEBUG,
- "found on %s file %s",
- subvol->name, loc->path);
-
+
+ gf_msg_debug (this->name, 0,
+ "storing cached on %s file"
+ " %s", subvol->name, loc->path);
+
dht_iatt_merge (this, &local->postparent,
postparent, subvol);
+
+ gf_uuid_copy (local->skip_unlink.cached_gfid,
+ buf->ia_gfid);
} else {
- gf_log (this->name, GF_LOG_DEBUG,
+ /* This is where we need 'rename' both entries logic */
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_FILE_ON_MULT_SUBVOL,
"multiple subvolumes (%s and %s) have "
- "file %s", local->cached_subvol->name,
+ "file %s (preferably rename the file "
+ "in the backend,and do a fresh lookup)",
+ local->cached_subvol->name,
subvol->name, local->loc.path);
}
}
- }
+ }
unlock:
- UNLOCK (&frame->lock);
-
- if (is_linkfile) {
- gf_log (this->name, GF_LOG_DEBUG,
- "deleting stale linkfile %s on %s",
- loc->path, subvol->name);
- dht_linkfile_unlink (frame, this, subvol, loc);
- }
-
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- hashed_subvol = local->hashed_subvol;
- cached_subvol = local->cached_subvol;
-
- if (local->file_count && local->dir_count) {
- gf_log (this->name, GF_LOG_ERROR,
- "path %s exists as a file on one subvolume "
- "and directory on another. "
- "Please fix it manually",
- loc->path);
- DHT_STACK_UNWIND (lookup, frame, -1, EIO, NULL, NULL, NULL,
- NULL);
- return 0;
- }
-
- if (local->dir_count) {
- dht_lookup_directory (frame, this, &local->loc);
- return 0;
- }
-
- if (!cached_subvol) {
- DHT_STACK_UNWIND (lookup, frame, -1, ENOENT, NULL, NULL, NULL,
- NULL);
- return 0;
- }
+ UNLOCK (&frame->lock);
- if (!hashed_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "cannot create linkfile file for %s on %s: "
- "hashed subvolume cannot be found.",
- loc->path, cached_subvol->name);
-
- local->op_ret = 0;
- local->op_errno = 0;
+ if (is_linkfile) {
+ ret = dict_get_int32 (xattr, GLUSTERFS_OPEN_FD_COUNT, &fd_count);
+
+ /* Any linkto file found on the non-hashed subvolume should
+ * be unlinked (performed in the "else if" block below)
+ *
+ * But if a linkto file is found on hashed subvolume, it may be
+ * pointing to vaild cached node. So unlinking of linkto
+ * file on hashed subvolume is skipped and inside
+ * dht_lookup_everywhere_done, checks are performed. If this
+ * linkto file is found as stale linkto file, it is deleted
+ * otherwise unlink is skipped.
+ */
+
+ if (local->hashed_subvol && local->hashed_subvol == subvol) {
+
+ local->skip_unlink.handle_valid_link = _gf_true;
+ local->skip_unlink.opend_fd_count = fd_count;
+ local->skip_unlink.hash_links_to = link_subvol;
+ gf_uuid_copy (local->skip_unlink.hashed_gfid,
+ buf->ia_gfid);
+
+ gf_msg_debug (this->name, 0, "Found"
+ " one linkto file on hashed subvol %s "
+ "for %s: Skipping unlinking till "
+ "everywhere_done", subvol->name,
+ loc->path);
+
+ } else if (!ret && (fd_count == 0)) {
+
+ dict_req = dict_new ();
+
+ ret = dht_fill_dict_to_avoid_unlink_of_migrating_file
+ (dict_req);
+
+ if (ret) {
+
+ /* Skip unlinking for dict_failure
+ *File is found as a linkto file on non-hashed,
+ *subvolume. In the current implementation,
+ *finding a linkto-file on non-hashed does not
+ *always implies that it is stale. So deletion
+ *of file should be done only when both fd is
+ *closed and linkto-xattr is set. In case of
+ *dict_set failure, avoid skipping of file.
+ *NOTE: dht_frame_return should get called for
+ * this block.
+ */
+
+ dict_unref (dict_req);
- ret = dht_layout_preset (frame->this, cached_subvol,
- local->inode);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set layout for subvol %s",
- cached_subvol ? cached_subvol->name :
- "<nil>");
- local->op_ret = -1;
- local->op_errno = EINVAL;
- }
+ } else {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_SUBVOL_INFO,
+ "attempting deletion of stale linkfile "
+ "%s on %s (hashed subvol is %s)",
+ loc->path, subvol->name,
+ (local->hashed_subvol?
+ local->hashed_subvol->name : "<null>"));
- if (local->loc.parent)
- local->postparent.ia_ino =
- local->loc.parent->ino;
+ STACK_WIND (frame, dht_lookup_unlink_cbk,
+ subvol, subvol->fops->unlink, loc,
+ 0, dict_req);
- WIPE (&local->postparent);
+ dict_unref (dict_req);
- DHT_STACK_UNWIND (lookup, frame, local->op_ret,
- local->op_errno, local->inode,
- &local->stbuf, local->xattr,
- &local->postparent);
- return 0;
+ return 0;
+ }
}
+ }
- gf_log (this->name, GF_LOG_DEBUG,
- "linking file %s existing on %s to %s (hash)",
- loc->path, cached_subvol->name,
- hashed_subvol->name);
-
- dht_linkfile_create (frame,
- dht_lookup_linkfile_create_cbk,
- cached_subvol, hashed_subvol, loc);
- }
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ dht_lookup_everywhere_done (frame, this);
+ }
- return 0;
+out:
+ return ret;
}
int
dht_lookup_everywhere (call_frame_t *frame, xlator_t *this, loc_t *loc)
{
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- int i = 0;
- int call_cnt = 0;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ int i = 0;
+ int call_cnt = 0;
- conf = this->private;
- local = frame->local;
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", this->private, out);
+ GF_VALIDATE_OR_GOTO ("dht", loc, out);
- call_cnt = conf->subvolume_cnt;
- local->call_cnt = call_cnt;
+ conf = this->private;
+ local = frame->local;
+
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+
+ if (!local->inode)
+ local->inode = inode_ref (loc->inode);
- if (!local->inode)
- local->inode = inode_ref (loc->inode);
+ gf_msg_debug (this->name, 0,
+ "winding lookup call to %d subvols", call_cnt);
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_lookup_everywhere_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->lookup,
- loc, local->xattr_req);
- }
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_lookup_everywhere_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup,
+ loc, local->xattr_req);
+ }
- return 0;
+ return 0;
+out:
+ DHT_STACK_UNWIND (lookup, frame, -1, EINVAL, NULL, NULL, NULL, NULL);
+err:
+ return -1;
}
@@ -483,60 +1832,93 @@ dht_lookup_linkfile_cbk (call_frame_t *frame, void *cookie,
struct iatt *postparent)
{
call_frame_t *prev = NULL;
- dht_local_t *local = NULL;
- xlator_t *subvol = NULL;
- loc_t *loc = NULL;
- dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ loc_t *loc = NULL;
+ dht_conf_t *conf = NULL;
int ret = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, out);
+ GF_VALIDATE_OR_GOTO ("dht", this, unwind);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, unwind);
+ GF_VALIDATE_OR_GOTO ("dht", this->private, unwind);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, unwind);
prev = cookie;
- subvol = prev->this;
- conf = this->private;
- local = frame->local;
- loc = &local->loc;
+ subvol = prev->this;
+ conf = this->private;
+ local = frame->local;
+ loc = &local->loc;
+
+ gf_uuid_unparse(loc->gfid, gfid);
if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "lookup of %s on %s (following linkfile) failed (%s)",
- local->loc.path, subvol->name, strerror (op_errno));
- goto err;
- }
+ gf_msg (this->name, GF_LOG_INFO, op_errno,
+ DHT_MSG_LINK_FILE_LOOKUP_INFO,
+ "Lookup of %s on %s (following linkfile) failed "
+ ",gfid = %s", local->loc.path, subvol->name, gfid);
+
+ /* If cached subvol returned ENOTCONN, do not do
+ lookup_everywhere. We need to make sure linkfile does not get
+ removed, which can take away the namespace, and subvol is
+ anyways down. */
+
+ if (op_errno != ENOTCONN)
+ goto err;
+ else
+ goto unwind;
+ }
if (check_is_dir (inode, stbuf, xattr)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "lookup of %s on %s (following linkfile) reached dir",
- local->loc.path, subvol->name);
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_LINK_FILE_LOOKUP_INFO,
+ "Lookup of %s on %s (following linkfile) reached dir,"
+ " gfid = %s", local->loc.path, subvol->name, gfid);
goto err;
}
- if (check_is_linkfile (inode, stbuf, xattr)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "lookup of %s on %s (following linkfile) reached link",
- local->loc.path, subvol->name);
+ if (check_is_linkfile (inode, stbuf, xattr, conf->link_xattr_name)) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_LINK_FILE_LOOKUP_INFO,
+ "lookup of %s on %s (following linkfile) reached link,"
+ "gfid = %s", local->loc.path, subvol->name, gfid);
goto err;
}
- if ((stbuf->ia_nlink == 1)
- && (conf->unhashed_sticky_bit)) {
- stbuf->ia_prot.sticky = 1;
- }
- dht_itransform (this, prev->this, stbuf->ia_ino, &stbuf->ia_ino);
- if (local->loc.parent)
- postparent->ia_ino = local->loc.parent->ino;
-
- ret = dht_layout_preset (this, prev->this, inode);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set layout for subvolume %s",
- prev->this->name);
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
+ if (gf_uuid_compare (local->gfid, stbuf->ia_gfid)) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_GFID_MISMATCH,
+ "%s: gfid different on data file on %s,"
+ " gfid local = %s, gfid node = %s ",
+ local->loc.path, subvol->name, gfid,
+ uuid_utoa(stbuf->ia_gfid));
+ goto err;
+ }
-out:
- WIPE (postparent);
+ if ((stbuf->ia_nlink == 1)
+ && (conf && conf->unhashed_sticky_bit)) {
+ stbuf->ia_prot.sticky = 1;
+ }
+
+ ret = dht_layout_preset (this, prev->this, inode);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_LAYOUT_PRESET_FAILED,
+ "Failed to set layout for subvolume %s,"
+ "gfid = %s", prev->this->name, gfid);
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ postparent, 1);
+ }
+
+unwind:
+ DHT_STRIP_PHASE1_FLAGS (stbuf);
+ dht_set_fixed_dir_stat (postparent);
DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, stbuf, xattr,
postparent);
@@ -544,7 +1926,7 @@ out:
err:
dht_lookup_everywhere (frame, this, loc);
-
+out:
return 0;
}
@@ -556,21 +1938,40 @@ dht_lookup_directory (call_frame_t *frame, xlator_t *this, loc_t *loc)
int i = 0;
dht_conf_t *conf = NULL;
dht_local_t *local = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, out);
+ GF_VALIDATE_OR_GOTO ("dht", this, unwind);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, unwind);
+ GF_VALIDATE_OR_GOTO ("dht", this->private, unwind);
+ GF_VALIDATE_OR_GOTO ("dht", loc, unwind);
conf = this->private;
local = frame->local;
call_cnt = conf->subvolume_cnt;
local->call_cnt = call_cnt;
-
+
local->layout = dht_layout_new (this, conf->subvolume_cnt);
if (!local->layout) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- DHT_STACK_UNWIND (lookup, frame, -1, ENOMEM, NULL, NULL, NULL, NULL);
- return 0;
+ goto unwind;
+ }
+
+ if (local->xattr != NULL) {
+ dict_unref (local->xattr);
+ local->xattr = NULL;
+ }
+
+ if (!gf_uuid_is_null (local->gfid)) {
+ ret = dict_set_static_bin (local->xattr_req, "gfid-req",
+ local->gfid, 16);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DICT_SET_FAILED,
+ "%s: Failed to set dictionary value:"
+ " key = gfid-req", local->loc.path);
}
-
+
for (i = 0; i < call_cnt; i++) {
STACK_WIND (frame, dht_lookup_dir_cbk,
conf->subvolumes[i],
@@ -578,6 +1979,11 @@ dht_lookup_directory (call_frame_t *frame, xlator_t *this, loc_t *loc)
&local->loc, local->xattr_req);
}
return 0;
+unwind:
+ DHT_STACK_UNWIND (lookup, frame, -1, ENOMEM, NULL, NULL, NULL, NULL);
+out:
+ return 0;
+
}
@@ -595,8 +2001,14 @@ dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
loc_t *loc = NULL;
call_frame_t *prev = NULL;
int ret = 0;
- uint64_t tmp_layout = 0;
dht_layout_t *parent_layout = NULL;
+ uint32_t vol_commit_hash = 0;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+ GF_VALIDATE_OR_GOTO ("dht", this->private, out);
conf = this->private;
@@ -604,96 +2016,197 @@ dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
loc = &local->loc;
- if (ENTRY_MISSING (op_ret, op_errno)) {
- if (conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_ON) {
- local->op_errno = ENOENT;
- dht_lookup_everywhere (frame, this, loc);
- return 0;
- }
- if ((conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_AUTO) &&
- (loc->parent)) {
- ret = inode_ctx_get (loc->parent, this, &tmp_layout);
- parent_layout = (dht_layout_t *)(long)tmp_layout;
- if (parent_layout->search_unhashed) {
+ /* This is required for handling stale linkfile deletion,
+ * or any more call which happens from this 'loc'.
+ */
+ if (!op_ret && gf_uuid_is_null (local->gfid))
+ memcpy (local->gfid, stbuf->ia_gfid, 16);
+
+ gf_msg_debug (this->name, op_errno,
+ "fresh_lookup returned for %s with op_ret %d",
+ loc->path, op_ret);
+
+ if (!conf->vch_forced) {
+ ret = dict_get_uint32 (xattr, conf->commithash_xattr_name,
+ &vol_commit_hash);
+ if (ret == 0) {
+ conf->vol_commit_hash = vol_commit_hash;
+ }
+ }
+
+ if (ENTRY_MISSING (op_ret, op_errno)) {
+ gf_msg_debug (this->name, 0,
+ "Entry %s missing on subvol %s",
+ loc->path, prev->this->name);
+
+ /* lookup-optimize supercedes lookup-unhashed settings,
+ * - so if it is set, do not process search_unhashed
+ * - except, in the case of rebalance deamon, we want to
+ * force the lookup_everywhere behavior */
+ if (!conf->defrag && conf->lookup_optimize && loc->parent) {
+ ret = dht_inode_ctx_layout_get (loc->parent, this,
+ &parent_layout);
+ if (ret || !parent_layout ||
+ (parent_layout->commit_hash !=
+ conf->vol_commit_hash)) {
+ gf_msg_debug (this->name, 0,
+ "hashes don't match (ret - %d,"
+ " parent_layout - %p, parent_hash - %x,"
+ " vol_hash - %x), do global lookup",
+ ret, parent_layout,
+ (parent_layout ?
+ parent_layout->commit_hash : -1),
+ conf->vol_commit_hash);
+ local->op_errno = ENOENT;
+ dht_lookup_everywhere (frame, this, loc);
+ return 0;
+ }
+ } else {
+ if (conf->search_unhashed ==
+ GF_DHT_LOOKUP_UNHASHED_ON) {
local->op_errno = ENOENT;
dht_lookup_everywhere (frame, this, loc);
return 0;
}
+
+ if ((conf->search_unhashed ==
+ GF_DHT_LOOKUP_UNHASHED_AUTO) &&
+ (loc->parent)) {
+ ret = dht_inode_ctx_layout_get (loc->parent,
+ this,
+ &parent_layout);
+ if (ret || !parent_layout)
+ goto out;
+ if (parent_layout->search_unhashed) {
+ local->op_errno = ENOENT;
+ dht_lookup_everywhere (frame, this,
+ loc);
+ return 0;
+ }
+ }
}
- }
+ }
- if (op_ret == 0) {
- is_dir = check_is_dir (inode, stbuf, xattr);
- if (is_dir) {
- local->inode = inode_ref (inode);
- local->xattr = dict_ref (xattr);
- }
- }
+ if (op_ret == 0) {
+ is_dir = check_is_dir (inode, stbuf, xattr);
+ if (is_dir) {
+ local->inode = inode_ref (inode);
+ local->xattr = dict_ref (xattr);
+ }
+ }
- if (is_dir || (op_ret == -1 && op_errno == ENOTCONN)) {
+ if (is_dir || (op_ret == -1 && op_errno == ENOTCONN)) {
dht_lookup_directory (frame, this, &local->loc);
return 0;
- }
-
- if (op_ret == -1)
+ }
+
+ if (op_ret == -1) {
+ gf_msg_debug (this->name, op_errno,
+ "Lookup of %s for subvolume"
+ " %s failed", loc->path,
+ prev->this->name);
goto out;
+ }
- is_linkfile = check_is_linkfile (inode, stbuf, xattr);
- is_dir = check_is_dir (inode, stbuf, xattr);
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr,
+ conf->link_xattr_name);
- if (!is_dir && !is_linkfile) {
+ if (!is_linkfile) {
/* non-directory and not a linkfile */
- dht_itransform (this, prev->this, stbuf->ia_ino,
- &stbuf->ia_ino);
- if (loc->parent)
- postparent->ia_ino = loc->parent->ino;
-
- ret = dht_layout_preset (this, prev->this, inode);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not set pre-set layout for subvolume %s",
- prev->this->name);
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
- goto out;
- }
+ ret = dht_layout_preset (this, prev->this, inode);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_LAYOUT_PRESET_FAILED,
+ "could not set pre-set layout for subvolume %s",
+ prev->this->name);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+ goto out;
+ }
- if (is_linkfile) {
- subvol = dht_linkfile_subvol (this, inode, stbuf, xattr);
+ subvol = dht_linkfile_subvol (this, inode, stbuf, xattr);
+ if (!subvol) {
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "linkfile not having link subvolume. path=%s",
- loc->path);
- dht_lookup_everywhere (frame, this, loc);
- return 0;
- }
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_SUBVOL_INFO, "linkfile not having link "
+ "subvol for %s", loc->path);
- STACK_WIND (frame, dht_lookup_linkfile_cbk,
- subvol, subvol->fops->lookup,
- &local->loc, local->xattr_req);
+ gf_msg_debug (this->name, 0,
+ "linkfile not having link subvolume. path=%s",
+ loc->path);
+ dht_lookup_everywhere (frame, this, loc);
+ return 0;
}
+ gf_msg_debug (this->name, 0,
+ "Calling lookup on linkto target %s for path %s",
+ subvol->name, loc->path);
+
+ STACK_WIND (frame, dht_lookup_linkfile_cbk,
+ subvol, subvol->fops->lookup,
+ &local->loc, local->xattr_req);
+
return 0;
out:
- /*
- * FIXME: postparent->ia_size and postparent->st_blocks do not have
- * correct values. since, postparent corresponds to a directory these
+ /*
+ * FIXME: postparent->ia_size and postparent->st_blocks do not have
+ * correct values. since, postparent corresponds to a directory these
* two members should have values equal to sum of corresponding values
* from each of the subvolume. See dht_iatt_merge for reference.
*/
- WIPE (postparent);
+ if (!op_ret && local && local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ postparent, 1);
+ }
+ DHT_STRIP_PHASE1_FLAGS (stbuf);
+ dht_set_fixed_dir_stat (postparent);
DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, stbuf, xattr,
postparent);
+err:
return 0;
}
+/* For directories, check if acl xattrs have been requested (by the acl xlator),
+ * if not, request for them. These xattrs are needed for dht dir self-heal to
+ * perform proper self-healing of dirs
+ */
+void
+dht_check_and_set_acl_xattr_req (inode_t *inode, dict_t *xattr_req)
+{
+ int ret = 0;
+
+ GF_ASSERT (inode);
+ GF_ASSERT (xattr_req);
+
+ if (inode->ia_type != IA_IFDIR)
+ return;
+
+ if (!dict_get (xattr_req, POSIX_ACL_ACCESS_XATTR)) {
+ ret = dict_set_int8 (xattr_req, POSIX_ACL_ACCESS_XATTR, 0);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_WARNING, -ret,
+ DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value:key = %s",
+ POSIX_ACL_ACCESS_XATTR);
+ }
+
+ if (!dict_get (xattr_req, POSIX_ACL_DEFAULT_XATTR)) {
+ ret = dict_set_int8 (xattr_req, POSIX_ACL_DEFAULT_XATTR, 0);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_WARNING, -ret,
+ DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value:key = %s",
+ POSIX_ACL_DEFAULT_XATTR);
+ }
+
+ return;
+}
int
dht_lookup (call_frame_t *frame, xlator_t *this,
@@ -701,124 +2214,231 @@ dht_lookup (call_frame_t *frame, xlator_t *this,
{
xlator_t *subvol = NULL;
xlator_t *hashed_subvol = NULL;
- xlator_t *cached_subvol = NULL;
dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
+ dht_conf_t *conf = NULL;
int ret = -1;
int op_errno = -1;
- dht_layout_t *layout = NULL;
- int i = 0;
- int call_cnt = 0;
-
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int call_cnt = 0;
+ loc_t new_loc = {0,};
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- conf = this->private;
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ conf = this->private;
+ if (!conf)
+ goto err;
- ret = loc_dup (loc, &local->loc);
- if (ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "copying location failed for path=%s",
- loc->path);
+ local = dht_local_init (frame, loc, NULL, GF_FOP_LOOKUP);
+ if (!local) {
+ op_errno = ENOMEM;
goto err;
}
-
- if (xattr_req) {
- local->xattr_req = dict_ref (xattr_req);
- } else {
- local->xattr_req = dict_new ();
- }
- hashed_subvol = dht_subvol_get_hashed (this, loc);
- cached_subvol = dht_subvol_get_cached (this, loc->inode);
+ ret = dht_filter_loc_subvol_key (this, loc, &new_loc,
+ &hashed_subvol);
+ if (ret) {
+ loc_wipe (&local->loc);
+ ret = loc_dup (&new_loc, &local->loc);
+
+ /* we no more need 'new_loc' entries */
+ loc_wipe (&new_loc);
+
+ /* check if loc_dup() is successful */
+ if (ret == -1) {
+ op_errno = errno;
+ gf_msg_debug (this->name, errno,
+ "copying location failed for path=%s",
+ loc->path);
+ goto err;
+ }
+ }
- local->cached_subvol = cached_subvol;
- local->hashed_subvol = hashed_subvol;
+ if (xattr_req) {
+ local->xattr_req = dict_ref (xattr_req);
+ } else {
+ local->xattr_req = dict_new ();
+ }
- if (is_revalidate (loc)) {
- local->layout = layout = dht_layout_get (this, loc->inode);
+ if (gf_uuid_is_null (loc->pargfid) && !gf_uuid_is_null (loc->gfid) &&
+ !__is_root_gfid (loc->inode->gfid)) {
+ local->cached_subvol = NULL;
+ dht_discover (frame, this, loc);
+ return 0;
+ }
+
+ if (__is_root_gfid(loc->gfid)) {
+ ret = dict_set_uint32 (local->xattr_req,
+ conf->commithash_xattr_name,
+ sizeof(uint32_t));
+ }
+
+ if (!hashed_subvol)
+ hashed_subvol = dht_subvol_get_hashed (this, loc);
+ local->hashed_subvol = hashed_subvol;
+ if (is_revalidate (loc)) {
+ layout = local->layout;
if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "revalidate without cache. path=%s",
- loc->path);
+ gf_msg_debug (this->name, 0,
+ "Revalidate lookup without cache."
+ " path=%s", loc->path);
op_errno = EINVAL;
goto err;
}
- if (layout->gen && (layout->gen < conf->gen)) {
- gf_log (this->name, GF_LOG_TRACE,
- "incomplete layout failure for path=%s",
- loc->path);
+ if (layout->gen && (layout->gen < conf->gen)) {
+ gf_msg_trace (this->name, 0,
+ "incomplete layout failure for path=%s",
+ loc->path);
dht_layout_unref (this, local->layout);
local->layout = NULL;
- goto do_fresh_lookup;
- }
+ local->cached_subvol = NULL;
+
+ gf_msg_debug(this->name, 0,
+ "Called revalidate lookup for %s, "
+ "but layout->gen (%d) is less than "
+ "conf->gen (%d), calling fresh_lookup",
+ loc->path, layout->gen, conf->gen);
+
+ goto do_fresh_lookup;
+ }
+
+ local->inode = inode_ref (loc->inode);
- local->inode = inode_ref (loc->inode);
- local->ia_ino = loc->inode->ino;
+ ret = dict_set_uint32 (local->xattr_req,
+ conf->xattr_name, 4 * 4);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value:key = %s for "
+ "path %s", conf->xattr_name, loc->path);
+ goto err;
+ }
+ /* need it in case file is not found on cached file
+ * on revalidate path and we may encounter linkto files on
+ * with dht_lookup_everywhere*/
+ ret = dict_set_uint32 (local->xattr_req,
+ conf->link_xattr_name, 256);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value:key = %s for "
+ "path %s", conf->link_xattr_name, loc->path);
+ goto err;
+ }
+ if (IA_ISDIR (local->inode->ia_type)) {
+ local->call_cnt = call_cnt = conf->subvolume_cnt;
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_revalidate_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup,
+ loc, local->xattr_req);
+ }
+ return 0;
+ }
+
+ call_cnt = local->call_cnt = layout->cnt;
+
+ /* need it for self-healing linkfiles which is
+ 'in-migration' state */
+ ret = dict_set_uint32 (local->xattr_req,
+ GLUSTERFS_OPEN_FD_COUNT, 4);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value:key = %s for "
+ "path %s", GLUSTERFS_OPEN_FD_COUNT, loc->path);
+ goto err;
+ }
+ /* need it for dir self-heal */
+ dht_check_and_set_acl_xattr_req (loc->inode, local->xattr_req);
+ for (i = 0; i < call_cnt; i++) {
+ subvol = layout->list[i].xlator;
- /* NOTE: we don't require 'trusted.glusterfs.dht.linkto' attribute,
- * revalidates directly go to the cached-subvolume.
- */
- ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
+ gf_msg_debug (this->name, 0, "calling "
+ "revalidate lookup for %s at %s",
+ loc->path, subvol->name);
- subvol = layout->list[0].xlator;
+ STACK_WIND (frame, dht_revalidate_cbk,
+ subvol, subvol->fops->lookup,
+ &local->loc, local->xattr_req);
- STACK_WIND (frame, dht_revalidate_cbk,
- subvol, subvol->fops->lookup,
- loc, local->xattr_req);
+ }
} else {
do_fresh_lookup:
- /* TODO: remove the hard-coding */
- ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
+ /* TODO: remove the hard-coding */
+ ret = dict_set_uint32 (local->xattr_req,
+ conf->xattr_name, 4 * 4);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value:key = %s for "
+ "path %s", conf->xattr_name, loc->path);
+ goto err;
+ }
- ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht.linkto", 256);
+ ret = dict_set_uint32 (local->xattr_req,
+ conf->link_xattr_name, 256);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value:key = %s for "
+ "path %s", conf->link_xattr_name, loc->path);
+ goto err;
+ }
+ /* need it for self-healing linkfiles which is
+ 'in-migration' state */
+ ret = dict_set_uint32 (local->xattr_req,
+ GLUSTERFS_OPEN_FD_COUNT, 4);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value:key = %s for "
+ "path %s", GLUSTERFS_OPEN_FD_COUNT, loc->path);
+ goto err;
+ }
+ /* need it for dir self-heal */
+ dht_check_and_set_acl_xattr_req (loc->inode, local->xattr_req);
if (!hashed_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s, "
- "checking on all the subvols to see if "
- "it is a directory", loc->path);
- call_cnt = conf->subvolume_cnt;
- local->call_cnt = call_cnt;
-
- local->layout = dht_layout_new (this,
+
+ gf_msg_debug (this->name, 0,
+ "no subvolume in layout for path=%s, "
+ "checking on all the subvols to see if "
+ "it is a directory", loc->path);
+
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+
+ local->layout = dht_layout_new (this,
conf->subvolume_cnt);
- if (!local->layout) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_lookup_dir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->lookup,
- &local->loc, local->xattr_req);
- }
- return 0;
+ if (!local->layout) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ gf_msg_debug (this->name, 0,
+ "Found null hashed subvol. Calling lookup"
+ " on all nodes.");
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_lookup_dir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup,
+ &local->loc, local->xattr_req);
+ }
+ return 0;
}
+ gf_msg_debug (this->name, 0, "Calling fresh lookup for %s on"
+ " %s", loc->path, hashed_subvol->name);
+
STACK_WIND (frame, dht_lookup_cbk,
hashed_subvol, hashed_subvol->fops->lookup,
loc, local->xattr_req);
@@ -827,1302 +2447,2376 @@ dht_lookup (call_frame_t *frame, xlator_t *this,
return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL);
return 0;
}
-
int
-dht_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+dht_unlink_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ LOCK (&frame->lock);
+ {
+ if ((op_ret == -1) && !((op_errno == ENOENT) ||
+ (op_errno == ENOTCONN))) {
+ local->op_errno = op_errno;
+ gf_msg_debug (this->name, op_errno,
+ "Unlink link: subvolume %s"
+ " returned -1",
+ prev->this->name);
+ goto unlock;
+ }
+
+ local->op_ret = 0;
+ }
+unlock:
+ UNLOCK (&frame->lock);
+ dht_set_fixed_dir_stat (&local->preparent);
+ dht_set_fixed_dir_stat (&local->postparent);
+ DHT_STACK_UNWIND (unlink, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, xdata);
- local = frame->local;
- prev = cookie;
+ return 0;
+}
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
+int
+dht_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *hashed_subvol = NULL;
- dht_iatt_merge (this, &local->prebuf, prebuf, prev->this);
- dht_iatt_merge (this, &local->stbuf, postbuf, prev->this);
+ local = frame->local;
+ prev = cookie;
- if (local->inode) {
- local->stbuf.ia_ino = local->inode->ino;
- local->prebuf.ia_ino = local->inode->ino;
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ if (op_errno != ENOENT) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ } else {
+ local->op_ret = 0;
+ }
+ gf_msg_debug (this->name, op_errno,
+ "Unlink: subvolume %s returned -1",
+ prev->this->name);
+ goto unlock;
}
- local->op_ret = 0;
- }
+ local->op_ret = 0;
+
+ local->postparent = *postparent;
+ local->preparent = *preparent;
+
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->preparent, 0);
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
+ }
+ }
unlock:
- UNLOCK (&frame->lock);
+ UNLOCK (&frame->lock);
+
+ if (!local->op_ret) {
+ hashed_subvol = dht_subvol_get_hashed (this, &local->loc);
+ if (hashed_subvol &&
+ hashed_subvol != local->cached_subvol) {
+ /*
+ * If hashed and cached are different, then we need
+ * to unlink linkfile from hashed subvol if data
+ * file is deleted successfully
+ */
+ STACK_WIND (frame, dht_unlink_linkfile_cbk,
+ hashed_subvol,
+ hashed_subvol->fops->unlink, &local->loc,
+ local->flags, xdata);
+ return 0;
+ }
+ }
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (truncate, frame, local->op_ret, local->op_errno,
- &local->prebuf, &local->stbuf);
+ dht_set_fixed_dir_stat (&local->preparent);
+ dht_set_fixed_dir_stat (&local->postparent);
+ DHT_STACK_UNWIND (unlink, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, xdata);
return 0;
}
+int
+dht_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+ local = frame->local;
+ prev = cookie;
-int
-dht_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *stbuf)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
-
-
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
-
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
-
- if (local->inode)
- local->stbuf.ia_ino = local->inode->ino;
- local->op_ret = 0;
- }
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_msg_debug (this->name, op_errno,
+ "subvolume %s returned -1",
+ prev->this->name);
+ goto unlock;
+ }
+
+ local->op_ret = 0;
+ }
unlock:
- UNLOCK (&frame->lock);
+ UNLOCK (&frame->lock);
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (stat, frame, local->op_ret, local->op_errno,
- &local->stbuf);
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ DHT_STACK_UNWIND (setxattr, frame, local->op_ret,
+ local->op_errno, NULL);
+ }
return 0;
}
+static void
+fill_layout_info (dht_layout_t *layout, char *buf)
+{
+ int i = 0;
+ char tmp_buf[128] = {0,};
+
+ for (i = 0; i < layout->cnt; i++) {
+ snprintf (tmp_buf, 128, "(%s %u %u)",
+ layout->list[i].xlator->name,
+ layout->list[i].start,
+ layout->list[i].stop);
+ if (i)
+ strcat (buf, " ");
+ strcat (buf, tmp_buf);
+ }
+}
+
+void
+dht_fill_pathinfo_xattr (xlator_t *this, dht_local_t *local,
+ char *xattr_buf, int32_t alloc_len,
+ int flag, char *layout_buf)
+{
+ if (flag && local->xattr_val)
+ snprintf (xattr_buf, alloc_len,
+ "((<"DHT_PATHINFO_HEADER"%s> %s) (%s-layout %s))",
+ this->name, local->xattr_val, this->name,
+ layout_buf);
+ else if (local->xattr_val)
+ snprintf (xattr_buf, alloc_len,
+ "(<"DHT_PATHINFO_HEADER"%s> %s)",
+ this->name, local->xattr_val);
+ else if (flag)
+ snprintf (xattr_buf, alloc_len, "(%s-layout %s)",
+ this->name, layout_buf);
+}
int
-dht_stat (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
+dht_vgetxattr_alloc_and_fill (dht_local_t *local, dict_t *xattr, xlator_t *this,
+ int op_errno)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- int i = 0;
+ int ret = -1;
+ char *value = NULL;
+ int32_t plen = 0;
+
+ ret = dict_get_str (xattr, local->xsel, &value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_GET_XATTR_FAILED,
+ "Subvolume %s returned -1", this->name);
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto out;
+ }
+ local->alloc_len += strlen(value);
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ if (!local->xattr_val) {
+ local->alloc_len += (strlen (DHT_PATHINFO_HEADER) + 10);
+ local->xattr_val = GF_CALLOC (local->alloc_len, sizeof (char),
+ gf_common_mt_char);
+ if (!local->xattr_val) {
+ ret = -1;
+ goto out;
+ }
+ }
+ if (local->xattr_val) {
+ plen = strlen (local->xattr_val);
+ if (plen) {
+ /* extra byte(s) for \0 to be safe */
+ local->alloc_len += (plen + 2);
+ local->xattr_val = GF_REALLOC (local->xattr_val,
+ local->alloc_len);
+ if (!local->xattr_val) {
+ ret = -1;
+ goto out;
+ }
+ }
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ (void) strcat (local->xattr_val, value);
+ (void) strcat (local->xattr_val, " ");
+ local->op_ret = 0;
+ }
- local->layout = layout = dht_layout_get (this, loc->inode);
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no layout for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ ret = 0;
- local->inode = inode_ref (loc->inode);
- local->call_cnt = layout->cnt;
+ out:
+ return ret;
+}
- for (i = 0; i < layout->cnt; i++) {
- subvol = layout->list[i].xlator;
+int
+dht_vgetxattr_fill_and_set (dht_local_t *local, dict_t **dict, xlator_t *this,
+ gf_boolean_t flag)
+{
+ int ret = -1;
+ char *xattr_buf = NULL;
+ char layout_buf[8192] = {0,};
- STACK_WIND (frame, dht_attr_cbk,
- subvol, subvol->fops->stat,
- loc);
- }
+ if (flag)
+ fill_layout_info (local->layout, layout_buf);
- return 0;
+ *dict = dict_new ();
+ if (!*dict)
+ goto out;
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL);
+ local->xattr_val[strlen (local->xattr_val) - 1] = '\0';
+
+ /* we would need max this many bytes to create xattr string
+ * extra 40 bytes is just an estimated amount of additional
+ * space required as we include translator name and some
+ * spaces, brackets etc. when forming the pathinfo string.
+ *
+ * For node-uuid we just don't have all the pretty formatting,
+ * but since this is a generic routine for pathinfo & node-uuid
+ * we dont have conditional space allocation and try to be
+ * generic
+ */
+ local->alloc_len += (2 * strlen (this->name))
+ + strlen (layout_buf)
+ + 40;
+ xattr_buf = GF_CALLOC (local->alloc_len, sizeof (char),
+ gf_common_mt_char);
+ if (!xattr_buf)
+ goto out;
- return 0;
-}
+ if (XATTR_IS_PATHINFO (local->xsel)) {
+ (void) dht_fill_pathinfo_xattr (this, local, xattr_buf,
+ local->alloc_len, flag,
+ layout_buf);
+ } else if (XATTR_IS_NODE_UUID (local->xsel)) {
+ (void) snprintf (xattr_buf, local->alloc_len, "%s",
+ local->xattr_val);
+ } else {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_GET_XATTR_FAILED,
+ "Unknown local->xsel (%s)", local->xsel);
+ GF_FREE (xattr_buf);
+ goto out;
+ }
+ ret = dict_set_dynstr (*dict, local->xsel, xattr_buf);
+ if (ret)
+ GF_FREE (xattr_buf);
+ GF_FREE (local->xattr_val);
+ out:
+ return ret;
+}
int
-dht_fstat (call_frame_t *frame, xlator_t *this,
- fd_t *fd)
+dht_find_local_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr,
+ dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- int i = 0;
-
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ call_frame_t *prev = NULL;
+ int this_call_cnt = 0;
+ int ret = 0;
+ char *uuid_str = NULL;
+ char *uuid_list = NULL;
+ char *next_uuid_str = NULL;
+ char *saveptr = NULL;
+ uuid_t node_uuid = {0,};
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- local->layout = layout = dht_layout_get (this, fd->inode);
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no layout for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
-
- local->inode = inode_ref (fd->inode);
- local->call_cnt = layout->cnt;;
-
- for (i = 0; i < layout->cnt; i++) {
- subvol = layout->list[i].xlator;
- STACK_WIND (frame, dht_attr_cbk,
- subvol, subvol->fops->fstat,
- fd);
- }
-
- return 0;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (frame->local, out);
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fstat, frame, -1, op_errno, NULL);
-
- return 0;
-}
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+ LOCK (&frame->lock);
+ {
+ this_call_cnt = --local->call_cnt;
+ if (op_ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_GET_XATTR_FAILED,
+ "getxattr err for dir");
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto unlock;
+ }
-int
-dht_truncate (call_frame_t *frame, xlator_t *this,
- loc_t *loc, off_t offset)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
+ ret = dict_get_str (xattr, local->xsel, &uuid_list);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_DICT_GET_FAILED,
+ "Failed to get %s", local->xsel);
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+ goto unlock;
+ }
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ for (uuid_str = strtok_r (uuid_list, " ", &saveptr);
+ uuid_str;
+ uuid_str = next_uuid_str) {
- subvol = dht_subvol_get_cached (this, loc->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ next_uuid_str = strtok_r (NULL, " ", &saveptr);
+ if (gf_uuid_parse (uuid_str, node_uuid)) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_UUID_PARSE_ERROR,
+ "Failed to parse uuid"
+ " failed for %s", prev->this->name);
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+ goto unlock;
+ }
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ if (gf_uuid_compare (node_uuid, conf->defrag->node_uuid)) {
+ gf_msg_debug (this->name, 0, "subvol %s does not"
+ "belong to this node",
+ prev->this->name);
+ } else {
+ conf->local_subvols[(conf->local_subvols_cnt)++]
+ = prev->this;
+ gf_msg_debug (this->name, 0, "subvol %s belongs to"
+ " this node", prev->this->name);
+ break;
+ }
+ }
+ }
- local->inode = inode_ref (loc->inode);
- local->call_cnt = 1;
+ local->op_ret = 0;
+ unlock:
+ UNLOCK (&frame->lock);
- STACK_WIND (frame, dht_truncate_cbk,
- subvol, subvol->fops->truncate,
- loc, offset);
+ if (!is_last_call (this_call_cnt))
+ goto out;
- return 0;
+ if (local->op_ret == -1) {
+ goto unwind;
+ }
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL);
+ DHT_STACK_UNWIND (getxattr, frame, 0, 0, xattr, xdata);
+ goto out;
- return 0;
+ unwind:
+ DHT_STACK_UNWIND (getxattr, frame, -1, local->op_errno, NULL, xdata);
+ out:
+ return 0;
}
-
int
-dht_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset)
+dht_vgetxattr_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
+ int ret = 0;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ dict_t *dict = NULL;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (frame->local, out);
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+ local = frame->local;
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ LOCK (&frame->lock);
+ {
+ this_call_cnt = --local->call_cnt;
+ if (op_ret < 0) {
+ if (op_errno != ENOTCONN) {
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_GET_XATTR_FAILED,
+ "getxattr err for dir");
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ }
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ goto unlock;
+ }
- local->inode = inode_ref (fd->inode);
- local->call_cnt = 1;
+ ret = dht_vgetxattr_alloc_and_fill (local, xattr, this,
+ op_errno);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_DICT_SET_FAILED,
+ "alloc or fill failure");
+ }
+ unlock:
+ UNLOCK (&frame->lock);
- STACK_WIND (frame, dht_truncate_cbk,
- subvol, subvol->fops->ftruncate,
- fd, offset);
+ if (!is_last_call (this_call_cnt))
+ goto out;
- return 0;
+ /* -- last call: do patch ups -- */
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL);
+ if (local->op_ret == -1) {
+ goto unwind;
+ }
- return 0;
-}
+ ret = dht_vgetxattr_fill_and_set (local, &dict, this, _gf_true);
+ if (ret)
+ goto unwind;
+
+ DHT_STACK_UNWIND (getxattr, frame, 0, 0, dict, xdata);
+ goto cleanup;
+ unwind:
+ DHT_STACK_UNWIND (getxattr, frame, -1, local->op_errno, NULL, NULL);
+ cleanup:
+ if (dict)
+ dict_unref (dict);
+ out:
+ return 0;
+}
int
-dht_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *preparent,
- struct iatt *postparent)
+dht_vgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ int ret = 0;
+ dict_t *dict = NULL;
+ call_frame_t *prev = NULL;
+ gf_boolean_t flag = _gf_true;
- local = frame->local;
- prev = cookie;
+ local = frame->local;
+ prev = cookie;
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_ret = -1;
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_GET_XATTR_FAILED,
+ "vgetxattr: Subvolume %s returned -1",
+ prev->this->name);
+ goto unwind;
+ }
- preparent->ia_ino = local->loc.parent->ino;
- postparent->ia_ino = local->loc.parent->ino;
- local->op_ret = 0;
+ ret = dht_vgetxattr_alloc_and_fill (local, xattr, this,
+ op_errno);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_NO_MEMORY,
+ "Allocation or fill failure");
+ goto unwind;
+ }
- local->postparent = *postparent;
- local->preparent = *preparent;
+ flag = (local->layout->cnt > 1) ? _gf_true : _gf_false;
- WIPE (&local->postparent);
- WIPE (&local->preparent);
- }
-unlock:
- UNLOCK (&frame->lock);
+ ret = dht_vgetxattr_fill_and_set (local, &dict, this, flag);
+ if (ret)
+ goto unwind;
- DHT_STACK_UNWIND (unlink, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent);
+ DHT_STACK_UNWIND (getxattr, frame, 0, 0, dict, xdata);
+ goto cleanup;
+
+ unwind:
+ DHT_STACK_UNWIND (getxattr, frame, -1, local->op_errno,
+ NULL, NULL);
+ cleanup:
+ if (dict)
+ dict_unref (dict);
return 0;
}
+int
+dht_linkinfo_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+ int ret = 0;
+ char *value = NULL;
+
+ if (op_ret != -1) {
+ ret = dict_get_str (xattr, GF_XATTR_PATHINFO_KEY, &value);
+ if (!ret) {
+ ret = dict_set_str (xattr, GF_XATTR_LINKINFO_KEY, value);
+ if (!ret)
+ gf_msg_trace (this->name, 0,
+ "failed to set linkinfo");
+ }
+ }
+
+ DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr, xdata);
+
+ return 0;
+}
int
-dht_unlink_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *preparent,
- struct iatt *postparent)
+dht_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
+ int this_call_cnt = 0;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (frame->local, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ conf = this->private;
+ local = frame->local;
- xlator_t *cached_subvol = NULL;
+ LOCK (&frame->lock);
+ {
+ if (!xattr || (op_ret == -1)) {
+ local->op_ret = op_ret;
+ goto unlock;
+ }
- local = frame->local;
- prev = cookie;
+ if (dict_get (xattr, conf->xattr_name)) {
+ dict_del (xattr, conf->xattr_name);
+ }
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
+ if (frame->root->pid >= 0) {
+ GF_REMOVE_INTERNAL_XATTR
+ ("trusted.glusterfs.quota*", xattr);
+ GF_REMOVE_INTERNAL_XATTR("trusted.pgfid*", xattr);
+ }
- local->op_ret = 0;
- }
-unlock:
- UNLOCK (&frame->lock);
+ local->op_ret = 0;
- if (op_ret == -1)
- goto err;
+ if (!local->xattr) {
+ local->xattr = dict_copy_with_ref (xattr, NULL);
+ } else {
+ dht_aggregate_xattr (local->xattr, xattr);
+ }
- cached_subvol = dht_subvol_get_cached (this, local->loc.inode);
- if (!cached_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s",
- local->loc.path);
- local->op_errno = EINVAL;
- goto err;
}
+unlock:
+ UNLOCK (&frame->lock);
- STACK_WIND (frame, dht_unlink_cbk,
- cached_subvol, cached_subvol->fops->unlink,
- &local->loc);
+ this_call_cnt = dht_frame_return (frame);
+out:
+ if (is_last_call (this_call_cnt)) {
+
+ /* If we have a valid xattr received from any one of the
+ * subvolume, let's return it */
+ if (local->xattr) {
+ local->op_ret = 0;
+ }
+ DHT_STACK_UNWIND (getxattr, frame, local->op_ret, op_errno,
+ local->xattr, NULL);
+ }
return 0;
+}
-err:
- DHT_STACK_UNWIND (unlink, frame, -1, local->op_errno,
- NULL, NULL);
+int32_t
+dht_getxattr_unwind (call_frame_t *frame,
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
+{
+ DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
return 0;
}
int
-dht_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, struct iatt *prebuf, struct iatt *postbuf)
+dht_getxattr_get_real_filename_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ dict_t *xattr, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
+ int this_call_cnt = 0;
+ dht_local_t *local = NULL;
- local = frame->local;
- prev = cookie;
+ local = frame->local;
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
+ LOCK (&frame->lock);
+ {
+ if (local->op_errno == ENODATA ||
+ local->op_errno == EOPNOTSUPP) {
+ /* Nothing to do here, we have already found
+ * a subvol which does not have the get_real_filename
+ * optimization. If condition is for simple logic.
+ */
+ goto unlock;
+ }
- local->op_ret = 0;
- }
-unlock:
- UNLOCK (&frame->lock);
+ if (op_ret == -1) {
+
+ if (op_errno == ENODATA || op_errno == EOPNOTSUPP) {
+ /* This subvol does not have the optimization.
+ * Better let the user know we don't support it.
+ * Remove previous results if any.
+ */
+
+ if (local->xattr) {
+ dict_unref (local->xattr);
+ local->xattr = NULL;
+ }
+
+ if (local->xattr_req) {
+ dict_unref (local->xattr_req);
+ local->xattr_req = NULL;
+ }
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ gf_msg (this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_UPGRADE_BRICKS, "At least "
+ "one of the bricks does not support "
+ "this operation. Please upgrade all "
+ "bricks.");
+ goto unlock;
+ }
- if (local && (op_ret == 0)) {
- prebuf->ia_ino = local->ia_ino;
- postbuf->ia_ino = local->ia_ino;
+ if (op_errno == ENOENT) {
+ /* Do nothing, our defaults are set to this.
+ */
+ goto unlock;
+ }
+
+ /* This is a place holder for every other error
+ * case. I am not sure of how to interpret
+ * ENOTCONN etc. As of now, choosing to ignore
+ * down subvol and return a good result(if any)
+ * from other subvol.
+ */
+ gf_msg (this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_GET_XATTR_FAILED,
+ "Failed to get real filename.");
+ goto unlock;
+
+ }
+
+
+ /* This subvol has the required file.
+ * There could be other subvols which have returned
+ * success already, choosing to return the latest good
+ * result.
+ */
+ if (local->xattr)
+ dict_unref (local->xattr);
+ local->xattr = dict_ref (xattr);
+
+ if (local->xattr_req) {
+ dict_unref (local->xattr_req);
+ local->xattr_req = NULL;
+ }
+ if (xdata)
+ local->xattr_req = dict_ref (xdata);
+
+ local->op_ret = op_ret;
+ local->op_errno = 0;
+ gf_msg_debug (this->name, 0, "Found a matching "
+ "file.");
}
+unlock:
+ UNLOCK (&frame->lock);
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (fsync, frame, local->op_ret, local->op_errno,
- prebuf, postbuf);
+
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ DHT_STACK_UNWIND (getxattr, frame, local->op_ret,
+ local->op_errno, local->xattr,
+ local->xattr_req);
+ }
return 0;
}
-
int
-dht_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno)
+dht_getxattr_get_real_filename (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *key, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ int i = 0;
+ dht_layout_t *layout = NULL;
+ int cnt = 0;
+ xlator_t *subvol = NULL;
- local = frame->local;
- prev = cookie;
+ local = frame->local;
+ layout = local->layout;
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
+ cnt = local->call_cnt = layout->cnt;
- local->op_ret = 0;
- }
-unlock:
- UNLOCK (&frame->lock);
+ local->op_ret = -1;
+ local->op_errno = ENOENT;
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- DHT_STACK_UNWIND (setxattr, frame, local->op_ret, local->op_errno);
+ for (i = 0; i < cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND (frame, dht_getxattr_get_real_filename_cbk,
+ subvol, subvol->fops->getxattr,
+ loc, key, xdata);
}
return 0;
}
-
int
-dht_access (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t mask)
+dht_marker_populate_args (call_frame_t *frame, int type, int *gauge,
+ xlator_t **subvols)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ int i = 0;
+ dht_layout_t *layout = NULL;
+ local = frame->local;
+ layout = local->layout;
+
+ for (i = 0; i < layout->cnt; i++)
+ subvols[i] = layout->list[i].xlator;
+
+ return layout->cnt;
+}
+
+int
+dht_getxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *key, dict_t *xdata)
+#define DHT_IS_DIR(layout) (layout->cnt > 1)
+{
+ xlator_t *subvol = NULL;
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *cached_subvol = NULL;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int op_errno = -1;
+ int i = 0;
+ int cnt = 0;
+ char *node_uuid_key = NULL;
+ int ret = -1;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ VALIDATE_OR_GOTO (this->private, err);
- subvol = dht_subvol_get_cached (this, loc->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ conf = this->private;
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ local = dht_local_init (frame, loc, NULL, GF_FOP_GETXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
- local->call_cnt = 1;
+ goto err;
+ }
- STACK_WIND (frame, dht_err_cbk,
- subvol, subvol->fops->access,
- loc, mask);
+ layout = local->layout;
+ if (!layout) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LAYOUT_NULL,
+ "Layout is NULL");
+ op_errno = ENOENT;
+ goto err;
+ }
- return 0;
+ if (key) {
+ local->key = gf_strdup (key);
+ if (!local->key) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ }
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (access, frame, -1, op_errno);
+ if (key &&
+ (strncmp (key, GF_XATTR_GET_REAL_FILENAME_KEY,
+ strlen (GF_XATTR_GET_REAL_FILENAME_KEY)) == 0)
+ && DHT_IS_DIR(layout)) {
+ dht_getxattr_get_real_filename (frame, this, loc, key, xdata);
+ return 0;
+ }
- return 0;
-}
+ if (key && DHT_IS_DIR(layout) &&
+ (!strcmp (key, GF_REBAL_FIND_LOCAL_SUBVOL))) {
+ ret = gf_asprintf
+ (&node_uuid_key, "%s", GF_XATTR_NODE_UUID_KEY);
+ if (ret == -1 || !node_uuid_key) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_NO_MEMORY,
+ "Failed to copy key");
+ op_errno = ENOMEM;
+ goto err;
+ }
+ (void) strncpy (local->xsel, node_uuid_key, 256);
+ cnt = local->call_cnt = conf->subvolume_cnt;
+ for (i = 0; i < cnt; i++) {
+ STACK_WIND (frame, dht_find_local_subvol_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->getxattr,
+ loc, node_uuid_key, xdata);
+ }
+ if (node_uuid_key)
+ GF_FREE (node_uuid_key);
+ return 0;
+ }
+ /* for file use cached subvolume (obviously!): see if {}
+ * below
+ * for directory:
+ * wind to all subvolumes and exclude subvolumes which
+ * return ENOTCONN (in callback)
+ *
+ * NOTE: Don't trust inode here, as that may not be valid
+ * (until inode_link() happens)
+ */
-int
-dht_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, const char *path, struct iatt *sbuf)
-{
- dht_local_t *local = NULL;
+ if (key && DHT_IS_DIR(layout) &&
+ (XATTR_IS_PATHINFO (key)
+ || (strcmp (key, GF_XATTR_NODE_UUID_KEY) == 0))) {
+ (void) strncpy (local->xsel, key, 256);
+ cnt = local->call_cnt = layout->cnt;
+ for (i = 0; i < cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND (frame, dht_vgetxattr_dir_cbk,
+ subvol, subvol->fops->getxattr,
+ loc, key, xdata);
+ }
+ return 0;
+ }
- local = frame->local;
- if (op_ret == -1)
- goto err;
+ /* node-uuid or pathinfo for files */
+ if (key && ((strcmp (key, GF_XATTR_NODE_UUID_KEY) == 0)
+ || XATTR_IS_PATHINFO (key))) {
+ cached_subvol = local->cached_subvol;
+ (void) strncpy (local->xsel, key, 256);
+
+ local->call_cnt = 1;
+ STACK_WIND (frame, dht_vgetxattr_cbk, cached_subvol,
+ cached_subvol->fops->getxattr, loc, key, xdata);
+
+ return 0;
+ }
+
+ if (key && (strcmp (key, GF_XATTR_LINKINFO_KEY) == 0)) {
+
+ hashed_subvol = dht_subvol_get_hashed (this, loc);
+ if (!hashed_subvol) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "Failed to get hashed subvol for %s",
+ loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ cached_subvol = dht_subvol_get_cached (this, loc->inode);
+ if (!cached_subvol) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_CACHED_SUBVOL_GET_FAILED,
+ "Failed to get cached subvol for %s",
+ loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
- if (local) {
- sbuf->ia_ino = local->ia_ino;
+ if (hashed_subvol == cached_subvol) {
+ op_errno = ENODATA;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_linkinfo_getxattr_cbk, hashed_subvol,
+ hashed_subvol->fops->getxattr, loc,
+ GF_XATTR_PATHINFO_KEY, xdata);
+ return 0;
+ }
+
+ if (key && (!strcmp (QUOTA_LIMIT_KEY, key) ||
+ !strcmp (QUOTA_LIMIT_OBJECTS_KEY, key))) {
+ /* quota hardlimit and aggregated size of a directory is stored
+ * in inode contexts of each brick. Hence its good enough that
+ * we send getxattr for this key to any brick.
+ */
+ local->call_cnt = 1;
+ subvol = dht_first_up_subvol (this);
+ STACK_WIND (frame, dht_getxattr_cbk, subvol,
+ subvol->fops->getxattr, loc, key, xdata);
+ return 0;
+ }
+
+ if (cluster_handle_marker_getxattr (frame, loc, key, conf->vol_uuid,
+ dht_getxattr_unwind,
+ dht_marker_populate_args) == 0)
+ return 0;
+
+ if (DHT_IS_DIR(layout)) {
+ cnt = local->call_cnt = layout->cnt;
} else {
- op_ret = -1;
- op_errno = EINVAL;
+ cnt = local->call_cnt = 1;
+ }
+
+ for (i = 0; i < cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND (frame, dht_getxattr_cbk,
+ subvol, subvol->fops->getxattr,
+ loc, key, xdata);
}
+ return 0;
err:
- DHT_STACK_UNWIND (readlink, frame, op_ret, op_errno, path, sbuf);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL);
return 0;
}
-
+#undef DHT_IS_DIR
int
-dht_readlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, size_t size)
+dht_fgetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *key, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int op_errno = -1;
+ int i = 0;
+ int cnt = 0;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- subvol = dht_subvol_get_cached (this, loc->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- local->ia_ino = loc->inode->ino;
-
- STACK_WIND (frame, dht_readlink_cbk,
- subvol, subvol->fops->readlink,
- loc, size);
-
- return 0;
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+ VALIDATE_OR_GOTO (this->private, err);
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (readlink, frame, -1, op_errno, NULL, NULL);
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FGETXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
- return 0;
-}
+ goto err;
+ }
+ layout = local->layout;
+ if (!layout) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LAYOUT_NULL,
+ "Layout is NULL");
+ op_errno = ENOENT;
+ goto err;
+ }
-int
-dht_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr)
-{
- if (op_ret != -1) {
- if (dict_get (xattr, "trusted.glusterfs.dht")) {
- dict_del (xattr, "trusted.glusterfs.dht");
+ if (key) {
+ local->key = gf_strdup (key);
+ if (!local->key) {
+ op_errno = ENOMEM;
+ goto err;
}
}
- DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr);
+ if ((fd->inode->ia_type == IA_IFDIR)
+ && key
+ && (strncmp (key, GF_XATTR_LOCKINFO_KEY,
+ strlen (GF_XATTR_LOCKINFO_KEY)) != 0)) {
+ cnt = local->call_cnt = layout->cnt;
+ } else {
+ cnt = local->call_cnt = 1;
+ }
+ for (i = 0; i < cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND (frame, dht_getxattr_cbk,
+ subvol, subvol->fops->fgetxattr,
+ fd, key, NULL);
+ }
return 0;
-}
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fgetxattr, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
int
-dht_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *key)
+dht_file_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
+ int ret = -1;
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ struct iatt *stbuf = NULL;
+ inode_t *inode = NULL;
+ xlator_t *subvol1 = NULL, *subvol2 = NULL;
+ local = frame->local;
+ prev = cookie;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ local->op_errno = op_errno;
- subvol = dht_subvol_get_cached (this, loc->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ if ((op_ret == -1) && !dht_inode_missing (op_errno)) {
+ gf_msg_debug (this->name, op_errno,
+ "subvolume %s returned -1.",
+ prev->this->name);
+ goto out;
+ }
- STACK_WIND (frame, dht_getxattr_cbk,
- subvol, subvol->fops->getxattr,
- loc, key);
+ if (local->call_cnt != 1)
+ goto out;
- return 0;
+ ret = dict_get_bin (xdata, DHT_IATT_IN_XDATA_KEY, (void **) &stbuf);
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL);
+ if ((!op_ret) && !stbuf) {
+ goto out;
+ }
+
+ local->op_ret = op_ret;
+ local->rebalance.target_op_fn = dht_setxattr2;
+ if (xdata)
+ local->rebalance.xdata = dict_ref (xdata);
+
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (stbuf)) {
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Phase 1 of migration */
+ if (IS_DHT_MIGRATION_PHASE1 (stbuf)) {
+ inode = (local->fd) ? local->fd->inode : local->loc.inode;
+
+ ret = dht_inode_ctx_get_mig_info (this, inode,
+ &subvol1, &subvol2);
+ if (!dht_mig_info_is_invalid (local->cached_subvol,
+ subvol1, subvol2)) {
+ dht_setxattr2 (this, subvol2, frame, 0);
+ return 0;
+ }
+
+ ret = dht_rebalance_in_progress_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
- return 0;
+ if (local->fop == GF_FOP_SETXATTR) {
+ DHT_STACK_UNWIND (setxattr, frame, op_ret, op_errno, NULL);
+ } else {
+ DHT_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, NULL);
+ }
+
+ return 0;
}
+
int
-dht_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr, int flags)
+dht_fsetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, dict_t *xattr, int flags, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
+ xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *layout = NULL;
+ int ret = -1;
+ int call_cnt = 0;
+ int i = 0;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- subvol = dht_subvol_get_cached (this, loc->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+ VALIDATE_OR_GOTO (this->private, err);
- local->call_cnt = 1;
+ conf = this->private;
- STACK_WIND (frame, dht_err_cbk,
- subvol, subvol->fops->setxattr,
- loc, xattr, flags);
+ if (!conf->defrag)
+ GF_IF_INTERNAL_XATTR_GOTO (conf->wild_xattr_name, xattr,
+ op_errno, err);
- return 0;
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FSETXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (setxattr, frame, -1, op_errno);
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug (this->name, 0,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
- return 0;
-}
+ layout = local->layout;
+ if (!layout) {
+ gf_msg_debug (this->name, 0,
+ "no layout for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+ local->call_cnt = call_cnt = layout->cnt;
-int
-dht_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *key)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
+ if (IA_ISDIR (fd->inode->ia_type)) {
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_err_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->fsetxattr,
+ fd, xattr, flags, NULL);
+ }
+ } else {
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ local->call_cnt = 1;
+ local->rebalance.xattr = dict_ref (xattr);
+ local->rebalance.flags = flags;
+
+ xdata = xdata ? dict_ref (xdata) : dict_new ();
+ if (xdata)
+ ret = dict_set_dynstr_with_alloc (xdata,
+ DHT_IATT_IN_XDATA_KEY, "yes");
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "Failed to set dictionary key %s for fd=%p",
+ DHT_IATT_IN_XDATA_KEY, fd);
+ }
- subvol = dht_subvol_get_cached (this, loc->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ STACK_WIND (frame, dht_file_setxattr_cbk, subvol,
+ subvol->fops->fsetxattr, fd, xattr, flags, xdata);
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ if (xdata)
+ dict_unref (xdata);
- local->call_cnt = 1;
+ }
+ return 0;
- STACK_WIND (frame, dht_err_cbk,
- subvol, subvol->fops->removexattr,
- loc, key);
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
+}
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (removexattr, frame, -1, op_errno);
+static int
+dht_common_setxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
+{
+ DHT_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
- return 0;
+ return 0;
}
int
-dht_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, fd_t *fd)
+dht_checking_pathinfo_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
+ int i = -1;
+ int ret = -1;
+ char *value = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ call_frame_t *prev = NULL;
+ int this_call_cnt = 0;
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
- local = frame->local;
- prev = cookie;
+ if (op_ret == -1)
+ goto out;
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
- local->op_ret = 0;
- }
-unlock:
- UNLOCK (&frame->lock);
+ ret = dict_get_str (xattr, GF_XATTR_PATHINFO_KEY, &value);
+ if (ret)
+ goto out;
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (open, frame, local->op_ret, local->op_errno,
- local->fd);
+ if (!strcmp (value, local->key)) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == prev->this)
+ conf->decommissioned_bricks[i] = prev->this;
+ }
+ }
+out:
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ DHT_STACK_UNWIND (setxattr, frame, local->op_ret, ENOTSUP, NULL);
+ }
return 0;
+
}
int
-dht_open (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags, fd_t *fd, int wbflags)
+dht_setxattr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
- xlator_t *subvol = NULL;
- int ret = -1;
- int op_errno = -1;
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
+ if (!frame || !frame->local)
+ goto err;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+ local = frame->local;
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
-
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- local->fd = fd_ref (fd);
- ret = loc_dup (loc, &local->loc);
- if (ret == -1) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- local->call_cnt = 1;
-
- STACK_WIND (frame, dht_fd_cbk,
- subvol, subvol->fops->open,
- loc, flags, fd, wbflags);
-
- return 0;
+ if (we_are_not_migrating (ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND (setxattr, frame, local->op_ret,
+ local->op_errno, local->rebalance.xdata);
+ return 0;
+ }
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (open, frame, -1, op_errno, NULL);
+ if (subvol == NULL)
+ goto err;
- return 0;
-}
+ op_errno = local->op_errno;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ if (local->fop == GF_FOP_SETXATTR) {
+ STACK_WIND (frame, dht_file_setxattr_cbk, subvol,
+ subvol->fops->setxattr, &local->loc,
+ local->rebalance.xattr, local->rebalance.flags,
+ NULL);
+ } else {
+ STACK_WIND (frame, dht_file_setxattr_cbk, subvol,
+ subvol->fops->fsetxattr, local->fd,
+ local->rebalance.xattr, local->rebalance.flags,
+ NULL);
+ }
+
+ return 0;
+err:
+ DHT_STACK_UNWIND (setxattr, frame, (local ? local->op_ret : -1),
+ op_errno, NULL);
+ return 0;
+}
int
-dht_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- struct iovec *vector, int count, struct iatt *stbuf,
- struct iobref *iobref)
+dht_nuke_dir (call_frame_t *frame, xlator_t *this, loc_t *loc, data_t *tmp)
{
- dht_local_t *local = frame->local;
+ if (!IA_ISDIR(loc->inode->ia_type)) {
+ DHT_STACK_UNWIND (setxattr, frame, -1, ENOTSUP, NULL);
+ return 0;
+ }
- if (!local) {
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
+ /* Setxattr didn't need the parent, but rmdir does. */
+ loc->parent = inode_parent (loc->inode, NULL, NULL);
+ if (!loc->parent) {
+ DHT_STACK_UNWIND (setxattr, frame, -1, ENOENT, NULL);
+ return 0;
}
+ gf_uuid_copy (loc->pargfid, loc->parent->gfid);
- if (op_ret != -1)
- stbuf->ia_ino = local->ia_ino;
-out:
- DHT_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count, stbuf,
- iobref);
+ if (!loc->name && loc->path) {
+ loc->name = strrchr (loc->path, '/');
+ if (loc->name) {
+ ++(loc->name);
+ }
+ }
+
+ /*
+ * We do this instead of calling dht_rmdir_do directly for two reasons.
+ * The first is that we want to reuse all of the initialization that
+ * dht_rmdir does, so if it ever changes we'll just follow along. The
+ * second (i.e. why we don't use STACK_WIND_TAIL) is so that we don't
+ * obscure the fact that we came in via this path instead of a genuine
+ * rmdir. That makes debugging just a tiny bit easier.
+ */
+ STACK_WIND (frame, default_rmdir_cbk, this, this->fops->rmdir,
+ loc, 1, NULL);
return 0;
}
-
int
-dht_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t off)
+dht_setxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xattr, int flags, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ dht_methods_t *methods = NULL;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int op_errno = EINVAL;
+ int ret = -1;
+ data_t *tmp = NULL;
+ uint32_t dir_spread = 0;
+ char value[4096] = {0,};
+ gf_dht_migrate_data_type_t forced_rebalance = GF_DHT_MIGRATE_DATA;
+ int call_cnt = 0;
+ uint32_t new_hash = 0;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, conf, err);
- local = dht_local_init (frame);
+ methods = &(conf->methods);
+
+ /* Rebalance daemon is allowed to set internal keys */
+ if (!conf->defrag)
+ GF_IF_INTERNAL_XATTR_GOTO (conf->wild_xattr_name, xattr,
+ op_errno, err);
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_SETXATTR);
if (!local) {
- gf_log (this->name, GF_LOG_ERROR, "Out of memory");
op_errno = ENOMEM;
goto err;
}
- local->ia_ino = fd->inode->ino;
- STACK_WIND (frame, dht_readv_cbk,
- subvol, subvol->fops->readv,
- fd, size, off);
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug (this->name, 0,
+ "no cached subvolume for path=%s",
+ loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
- return 0;
+ layout = local->layout;
+ if (!layout) {
+ gf_msg_debug (this->name, 0,
+ "no layout for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->call_cnt = call_cnt = layout->cnt;
+
+ tmp = dict_get (xattr, GF_XATTR_FILE_MIGRATE_KEY);
+ if (tmp) {
+
+ if (IA_ISDIR (loc->inode->ia_type)) {
+ op_errno = ENOTSUP;
+ goto err;
+ }
+
+ /* TODO: need to interpret the 'value' for more meaning
+ (ie, 'target' subvolume given there, etc) */
+ memcpy (value, tmp->data, tmp->len);
+ if (strcmp (value, "force") == 0)
+ forced_rebalance =
+ GF_DHT_MIGRATE_DATA_EVEN_IF_LINK_EXISTS;
+
+ if (conf->decommission_in_progress)
+ forced_rebalance = GF_DHT_MIGRATE_HARDLINK;
+
+ if (!loc->path) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (!local->loc.name)
+ local->loc.name = strrchr (local->loc.path, '/')+1;
+
+ if (!local->loc.parent)
+ local->loc.parent =
+ inode_parent(local->loc.inode, NULL, NULL);
+
+ if ((!local->loc.name) || (!local->loc.parent)) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ methods->migration_get_dst_subvol(this, local);
+
+ if (!local->rebalance.target_node) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "Failed to get hashed subvol for %s",
+ loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->rebalance.from_subvol = local->cached_subvol;
+
+ if (local->rebalance.target_node == local->rebalance.from_subvol) {
+ op_errno = EEXIST;
+ goto err;
+ }
+ if (local->rebalance.target_node) {
+ local->flags = forced_rebalance;
+
+ /* Flag to suggest its a tiering migration
+ * The reason for this dic key-value is that
+ * promotions and demotions are multithreaded
+ * so the original frame from gf_defrag_start()
+ * is not carried. A new frame will be created when
+ * we do syncop_setxattr(). This doesnot have the
+ * frame->root->pid of the original frame. So we pass
+ * this dic key-value when we do syncop_setxattr() to do
+ * data migration and set the frame->root->pid to
+ * GF_CLIENT_PID_TIER_DEFRAG in dht_setxattr() just before
+ * calling dht_start_rebalance_task() */
+ tmp = dict_get (xattr, TIERING_MIGRATION_KEY);
+ if (tmp)
+ frame->root->pid = GF_CLIENT_PID_TIER_DEFRAG;
+ else
+ frame->root->pid = GF_CLIENT_PID_DEFRAG;
+
+ ret = dht_start_rebalance_task (this, frame);
+ if (!ret)
+ return 0;
+
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_REBALANCE_START_FAILED,
+ "%s: failed to create a new rebalance synctask",
+ loc->path);
+ }
+ op_errno = EINVAL;
+ goto err;
+
+ }
+
+ tmp = dict_get (xattr, "decommission-brick");
+ if (tmp) {
+ /* This operation should happen only on '/' */
+ if (!__is_root_gfid (loc->inode->gfid)) {
+ op_errno = ENOTSUP;
+ goto err;
+ }
+
+ memcpy (value, tmp->data, ((tmp->len < 4095) ? tmp->len : 4095));
+ local->key = gf_strdup (value);
+ local->call_cnt = conf->subvolume_cnt;
+
+ for (i = 0 ; i < conf->subvolume_cnt; i++) {
+ /* Get the pathinfo, and then compare */
+ STACK_WIND (frame, dht_checking_pathinfo_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->getxattr,
+ loc, GF_XATTR_PATHINFO_KEY, NULL);
+ }
+ return 0;
+ }
+
+ tmp = dict_get (xattr, GF_XATTR_FIX_LAYOUT_KEY);
+ if (tmp) {
+ ret = dict_get_uint32(xattr, "new-commit-hash", &new_hash);
+ if (ret == 0) {
+ gf_msg_debug (this->name, 0,
+ "updating commit hash for %s from %u to %u",
+ uuid_utoa(loc->gfid),
+ layout->commit_hash, new_hash);
+ layout->commit_hash = new_hash;
+
+ ret = dht_update_commit_hash_for_layout (frame);
+ if (ret) {
+ op_errno = ENOTCONN;
+ goto err;
+ }
+ return ret;
+ }
+
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_FIX_LAYOUT_INFO,
+ "fixing the layout of %s", loc->path);
+
+ ret = dht_fix_directory_layout (frame, dht_common_setxattr_cbk,
+ layout);
+ if (ret) {
+ op_errno = ENOTCONN;
+ goto err;
+ }
+ return ret;
+ }
+
+ tmp = dict_get (xattr, "distribute.directory-spread-count");
+ if (tmp) {
+ /* Setxattr value is packed as 'binary', not string */
+ memcpy (value, tmp->data, ((tmp->len < 4095)?tmp->len:4095));
+ ret = gf_string2uint32 (value, &dir_spread);
+ if (!ret && ((dir_spread <= conf->subvolume_cnt) &&
+ (dir_spread > 0))) {
+ layout->spread_cnt = dir_spread;
+
+ ret = dht_fix_directory_layout (frame,
+ dht_common_setxattr_cbk,
+ layout);
+ if (ret) {
+ op_errno = ENOTCONN;
+ goto err;
+ }
+ return ret;
+ }
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_OPERATION_NOT_SUP,
+ "wrong 'directory-spread-count' value (%s)", value);
+ op_errno = ENOTSUP;
+ goto err;
+ }
+
+ tmp = dict_get (xattr, "glusterfs.dht.nuke");
+ if (tmp) {
+ return dht_nuke_dir (frame, this, loc, tmp);
+ }
+
+ if (IA_ISDIR (loc->inode->ia_type)) {
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_err_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->setxattr,
+ loc, xattr, flags, xdata);
+ }
+
+ } else {
+
+ local->rebalance.xattr = dict_ref (xattr);
+ local->rebalance.flags = flags;
+ local->call_cnt = 1;
+
+ xdata = xdata ? dict_ref (xdata) : dict_new ();
+ if (xdata)
+ ret = dict_set_dynstr_with_alloc (xdata,
+ DHT_IATT_IN_XDATA_KEY, "yes");
+
+ STACK_WIND (frame, dht_file_setxattr_cbk,
+ subvol, subvol->fops->setxattr,
+ loc, xattr, flags, xdata);
+
+ if (xdata)
+ dict_unref (xdata);
+ }
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
+
+
int
-dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+dht_file_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
+ int ret = -1;
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ struct iatt *stbuf = NULL;
+ inode_t *inode = NULL;
+ xlator_t *subvol1 = NULL, *subvol2 = NULL;
- if (op_ret == -1) {
+ local = frame->local;
+ prev = cookie;
+
+ local->op_errno = op_errno;
+
+ if ((op_ret == -1) && !dht_inode_missing (op_errno)) {
+ gf_msg_debug (this->name, op_errno,
+ "subvolume %s returned -1",
+ prev->this->name);
goto out;
}
- local = frame->local;
- if (!local) {
- op_ret = -1;
- op_errno = EINVAL;
+ if (local->call_cnt != 1)
+ goto out;
+
+ ret = dict_get_bin (xdata, DHT_IATT_IN_XDATA_KEY, (void **) &stbuf);
+
+ if ((!op_ret) && !stbuf) {
goto out;
- }
-
- prebuf->ia_ino = local->ia_ino;
- postbuf->ia_ino = local->ia_ino;
+ }
+
+ local->op_ret = 0;
+
+ local->rebalance.target_op_fn = dht_removexattr2;
+ if (xdata)
+ local->rebalance.xdata = dict_ref (xdata);
+
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (stbuf)) {
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Phase 1 of migration */
+ if (IS_DHT_MIGRATION_PHASE1 (stbuf)) {
+ inode = (local->fd) ? local->fd->inode : local->loc.inode;
+
+ ret = dht_inode_ctx_get_mig_info (this, inode,
+ &subvol1, &subvol2);
+ if (!dht_mig_info_is_invalid (local->cached_subvol,
+ subvol1, subvol2)) {
+ dht_removexattr2 (this, subvol2, frame, 0);
+ return 0;
+ }
+
+ ret = dht_rebalance_in_progress_check (this, frame);
+ if (!ret)
+ return 0;
+ }
out:
- DHT_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf);
+ if (local->fop == GF_FOP_REMOVEXATTR) {
+ DHT_STACK_UNWIND (removexattr, frame, op_ret, op_errno, NULL);
+ } else {
+ DHT_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, NULL);
+ }
+ return 0;
+
+}
+
+int
+dht_removexattr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame,
+ int ret)
+{
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
+
+ if (!frame || !frame->local || !subvol)
+ goto err;
+
+ local = frame->local;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ if (we_are_not_migrating (ret)) {
+
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND (removexattr, frame, local->op_ret,
+ local->op_errno, local->rebalance.xdata);
+ return 0;
+ }
+
+ if (local->fop == GF_FOP_REMOVEXATTR) {
+ STACK_WIND (frame, dht_file_removexattr_cbk, subvol,
+ subvol->fops->removexattr, &local->loc,
+ local->key, NULL);
+ } else {
+ STACK_WIND (frame, dht_file_removexattr_cbk, subvol,
+ subvol->fops->fremovexattr, local->fd,
+ local->key, NULL);
+ }
+
+ return 0;
+err:
+ DHT_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
return 0;
}
int
-dht_writev (call_frame_t *frame, xlator_t *this,
- fd_t *fd, struct iovec *vector, int count, off_t off,
- struct iobref *iobref)
+dht_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+ local = frame->local;
+ prev = cookie;
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_msg_debug (this->name, op_errno,
+ "subvolume %s returned -1",
+ prev->this->name);
+ goto unlock;
+ }
- local = dht_local_init (frame);
- if (!local) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
+ local->op_ret = 0;
}
+unlock:
+ UNLOCK (&frame->lock);
- local->ia_ino = fd->inode->ino;
-
- STACK_WIND (frame, dht_writev_cbk,
- subvol, subvol->fops->writev,
- fd, vector, count, off, iobref);
- return 0;
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL);
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ DHT_STACK_UNWIND (removexattr, frame, local->op_ret,
+ local->op_errno, NULL);
+ }
- return 0;
+ return 0;
}
int
-dht_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
+dht_removexattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *key, dict_t *xdata)
{
- xlator_t *subvol = NULL;
+ xlator_t *subvol = NULL;
int op_errno = -1;
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int call_cnt = 0;
+ dht_conf_t *conf = NULL;
+ int i;
+ int ret = 0;
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (this->private, err);
+
+ conf = this->private;
+
+ GF_IF_NATIVE_XATTR_GOTO (conf->wild_xattr_name, key, op_errno, err);
VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ local = dht_local_init (frame, loc, NULL, GF_FOP_REMOVEXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug (this->name, 0,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!local->layout) {
+ gf_msg_debug (this->name, 0,
+ "no layout for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->call_cnt = call_cnt = layout->cnt;
+ local->key = gf_strdup (key);
+
+ if (IA_ISDIR (loc->inode->ia_type)) {
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_removexattr_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->removexattr,
+ loc, key, NULL);
+ }
- local->fd = fd_ref (fd);
- local->call_cnt = 1;
+ } else {
+
+ local->call_cnt = 1;
+ xdata = xdata ? dict_ref (xdata) : dict_new ();
+ if (xdata)
+ ret = dict_set_dynstr_with_alloc (xdata,
+ DHT_IATT_IN_XDATA_KEY, "yes");
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ DHT_MSG_DICT_SET_FAILED, "Failed to "
+ "set dictionary key %s for %s",
+ DHT_IATT_IN_XDATA_KEY, loc->path);
+ }
- STACK_WIND (frame, dht_err_cbk,
- subvol, subvol->fops->flush, fd);
+ STACK_WIND (frame, dht_file_removexattr_cbk,
+ subvol, subvol->fops->removexattr,
+ loc, key, xdata);
- return 0;
+ if (xdata)
+ dict_unref (xdata);
+ }
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (flush, frame, -1, op_errno);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
-
int
-dht_fsync (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int datasync)
+dht_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *key, dict_t *xdata)
{
- xlator_t *subvol = NULL;
+ xlator_t *subvol = NULL;
int op_errno = -1;
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int call_cnt = 0;
+ dht_conf_t *conf = 0;
+ int ret = 0;
+ int i;
- VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (this->private, err);
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ conf = this->private;
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
- local->call_cnt = 1;
+ GF_IF_NATIVE_XATTR_GOTO (conf->wild_xattr_name, key, op_errno, err);
- local->ia_ino = fd->inode->ino;
+ VALIDATE_OR_GOTO (frame, err);
- STACK_WIND (frame, dht_fsync_cbk,
- subvol, subvol->fops->fsync,
- fd, datasync);
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FREMOVEXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- return 0;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug (this->name, 0,
+ "no cached subvolume for inode=%s",
+ uuid_utoa (fd->inode->gfid));
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!local->layout) {
+ gf_msg_debug (this->name, 0,
+ "no layout for inode=%s",
+ uuid_utoa (fd->inode->gfid));
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->call_cnt = call_cnt = layout->cnt;
+ local->key = gf_strdup (key);
+
+ if (IA_ISDIR (fd->inode->ia_type)) {
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_removexattr_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->fremovexattr,
+ fd, key, NULL);
+ }
+
+ } else {
+
+ local->call_cnt = 1;
+ xdata = xdata ? dict_ref (xdata) : dict_new ();
+ if (xdata)
+ ret = dict_set_dynstr_with_alloc (xdata,
+ DHT_IATT_IN_XDATA_KEY, "yes");
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ DHT_MSG_DICT_SET_FAILED, "Failed to "
+ "set dictionary key %s for fd=%p",
+ DHT_IATT_IN_XDATA_KEY, fd);
+ }
+
+ STACK_WIND (frame, dht_file_removexattr_cbk,
+ subvol, subvol->fops->fremovexattr,
+ fd, key, xdata);
+
+ if (xdata)
+ dict_unref (xdata);
+ }
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fremovexattr, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
int
-dht_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct flock *flock)
+dht_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
{
- DHT_STACK_UNWIND (lk, frame, op_ret, op_errno, flock);
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_msg_debug (this->name, op_errno,
+ "subvolume %s returned -1",
+ prev->this->name);
+ goto unlock;
+ }
+
+ local->op_ret = 0;
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt))
+ DHT_STACK_UNWIND (open, frame, local->op_ret, local->op_errno,
+ local->fd, NULL);
return 0;
}
+/*
+ * dht_normalize_stats -
+ */
+static void
+dht_normalize_stats (struct statvfs *buf, unsigned long bsize,
+ unsigned long frsize)
+{
+ double factor = 0;
+
+ if (buf->f_bsize != bsize) {
+ buf->f_bsize = bsize;
+ }
+
+ if (buf->f_frsize != frsize) {
+ factor = ((double) buf->f_frsize) / frsize;
+ buf->f_frsize = frsize;
+ buf->f_blocks = (fsblkcnt_t) (factor * buf->f_blocks);
+ buf->f_bfree = (fsblkcnt_t) (factor * buf->f_bfree);
+ buf->f_bavail = (fsblkcnt_t) (factor * buf->f_bavail);
+
+ }
+}
int
-dht_lk (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int cmd, struct flock *flock)
+dht_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct statvfs *statvfs,
+ dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
+ gf_boolean_t event = _gf_false;
+ qdstatfs_action_t action = qdstatfs_action_OFF;
+ dht_local_t * local = NULL;
+ int this_call_cnt = 0;
+ int bsize = 0;
+ int frsize = 0;
+ GF_UNUSED int ret = 0;
+ unsigned long new_usage = 0;
+ unsigned long cur_usage = 0;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+ local = frame->local;
+ GF_ASSERT (local);
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ if (xdata)
+ ret = dict_get_int8 (xdata, "quota-deem-statfs",
+ (int8_t *)&event);
- STACK_WIND (frame, dht_lk_cbk,
- subvol, subvol->fops->lk,
- fd, cmd, flock);
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ goto unlock;
+ }
+ if (!statvfs) {
+ op_errno = EINVAL;
+ local->op_ret = -1;
+ goto unlock;
+ }
+ local->op_ret = 0;
+
+ switch (local->quota_deem_statfs) {
+ case _gf_true:
+ if (event == _gf_true)
+ action = qdstatfs_action_COMPARE;
+ else
+ action = qdstatfs_action_NEGLECT;
+ break;
- return 0;
+ case _gf_false:
+ if (event == _gf_true) {
+ action = qdstatfs_action_REPLACE;
+ local->quota_deem_statfs = _gf_true;
+ }
+ break;
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (lk, frame, -1, op_errno, NULL);
+ default:
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_INVALID_VALUE,
+ "Encountered third "
+ "value for boolean variable %d",
+ local->quota_deem_statfs);
+ break;
+ }
- return 0;
-}
+ if (local->quota_deem_statfs) {
+ switch (action) {
+ case qdstatfs_action_NEGLECT:
+ goto unlock;
+ case qdstatfs_action_REPLACE:
+ local->statvfs = *statvfs;
+ goto unlock;
-int
-dht_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct statvfs *statvfs)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
+ case qdstatfs_action_COMPARE:
+ new_usage = statvfs->f_blocks -
+ statvfs->f_bfree;
+ cur_usage = local->statvfs.f_blocks -
+ local->statvfs.f_bfree;
+ /* Take the max of the usage from subvols */
+ if (new_usage >= cur_usage)
+ local->statvfs = *statvfs;
+ goto unlock;
- local = frame->local;
+ default:
+ break;
+ }
+ }
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- goto unlock;
- }
- local->op_ret = 0;
+ if (local->statvfs.f_bsize != 0) {
+ bsize = max(local->statvfs.f_bsize, statvfs->f_bsize);
+ frsize = max(local->statvfs.f_frsize, statvfs->f_frsize);
+ dht_normalize_stats(&local->statvfs, bsize, frsize);
+ dht_normalize_stats(statvfs, bsize, frsize);
+ } else {
+ local->statvfs.f_bsize = statvfs->f_bsize;
+ local->statvfs.f_frsize = statvfs->f_frsize;
+ }
- /* TODO: normalize sizes */
- local->statvfs.f_bsize = statvfs->f_bsize;
- local->statvfs.f_frsize = statvfs->f_frsize;
+ local->statvfs.f_blocks += statvfs->f_blocks;
+ local->statvfs.f_bfree += statvfs->f_bfree;
+ local->statvfs.f_bavail += statvfs->f_bavail;
+ local->statvfs.f_files += statvfs->f_files;
+ local->statvfs.f_ffree += statvfs->f_ffree;
+ local->statvfs.f_favail += statvfs->f_favail;
+ local->statvfs.f_fsid = statvfs->f_fsid;
+ local->statvfs.f_flag = statvfs->f_flag;
+ local->statvfs.f_namemax = statvfs->f_namemax;
- local->statvfs.f_blocks += statvfs->f_blocks;
- local->statvfs.f_bfree += statvfs->f_bfree;
- local->statvfs.f_bavail += statvfs->f_bavail;
- local->statvfs.f_files += statvfs->f_files;
- local->statvfs.f_ffree += statvfs->f_ffree;
- local->statvfs.f_favail += statvfs->f_favail;
- local->statvfs.f_fsid = statvfs->f_fsid;
- local->statvfs.f_flag = statvfs->f_flag;
- local->statvfs.f_namemax = statvfs->f_namemax;
- }
+ }
unlock:
- UNLOCK (&frame->lock);
+ UNLOCK (&frame->lock);
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (statfs, frame, local->op_ret, local->op_errno,
- &local->statvfs);
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt))
+ DHT_STACK_UNWIND (statfs, frame, local->op_ret, local->op_errno,
+ &local->statvfs, xdata);
return 0;
}
int
-dht_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
+dht_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
+ xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
int op_errno = -1;
- int i = -1;
-
+ int i = -1;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ VALIDATE_OR_GOTO (this->private, err);
- conf = this->private;
+ conf = this->private;
- local = dht_local_init (frame);
- local->call_cnt = conf->subvolume_cnt;
+ local = dht_local_init (frame, NULL, NULL, GF_FOP_STATFS);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ if (!loc->inode || IA_ISDIR (loc->inode->ia_type)) {
+ local->call_cnt = conf->subvolume_cnt;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND (frame, dht_statfs_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->statfs, loc,
+ xdata);
+ }
+ return 0;
+ }
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (frame, dht_statfs_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->statfs, loc);
- }
+ subvol = dht_subvol_get_cached (this, loc->inode);
+ if (!subvol) {
+ gf_msg_debug (this->name, 0,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
- return 0;
+ local->call_cnt = 1;
+
+ STACK_WIND (frame, dht_statfs_cbk,
+ subvol, subvol->fops->statfs, loc, xdata);
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (statfs, frame, -1, op_errno, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
int
-dht_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
+dht_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int ret = -1;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
int op_errno = -1;
- int i = -1;
-
+ int i = -1;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (this->private, err);
+
+ conf = this->private;
- conf = this->private;
+ local = dht_local_init (frame, loc, fd, GF_FOP_OPENDIR);
+ if (!local) {
+ op_errno = ENOMEM;
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ goto err;
+ }
- local->fd = fd_ref (fd);
- ret = loc_dup (loc, &local->loc);
- if (ret == -1) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ if ((conf->defrag && conf->defrag->cmd == GF_DEFRAG_CMD_START_TIER) ||
+ (conf->defrag && conf->defrag->cmd ==
+ GF_DEFRAG_CMD_START_DETACH_TIER) ||
+ (!(conf->local_subvols_cnt) || !conf->defrag)) {
+ local->call_cnt = conf->subvolume_cnt;
- local->call_cnt = conf->subvolume_cnt;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND (frame, dht_fd_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->opendir,
+ loc, fd, xdata);
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (frame, dht_fd_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->opendir,
- loc, fd);
- }
+ }
+ } else {
+ local->call_cnt = conf->local_subvols_cnt;
+ for (i = 0; i < conf->local_subvols_cnt; i++) {
+ STACK_WIND (frame, dht_fd_cbk,
+ conf->local_subvols[i],
+ conf->local_subvols[i]->fops->opendir,
+ loc, fd, xdata);
+ }
+ }
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (opendir, frame, -1, op_errno, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (opendir, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
int
dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, gf_dirent_t *orig_entries)
-{
- dht_local_t *local = NULL;
- gf_dirent_t entries;
- gf_dirent_t *orig_entry = NULL;
- gf_dirent_t *entry = NULL;
- call_frame_t *prev = NULL;
- xlator_t *next_subvol = NULL;
- off_t next_offset = 0;
- int count = 0;
- dht_layout_t *layout = 0;
- dht_conf_t *conf = NULL;
- xlator_t *subvol = 0;
+ int op_errno, gf_dirent_t *orig_entries, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t *orig_entry = NULL;
+ gf_dirent_t *entry = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *next_subvol = NULL;
+ off_t next_offset = 0;
+ int count = 0;
+ dht_layout_t *layout = 0;
+ dht_conf_t *conf = NULL;
+ dht_methods_t *methods = NULL;
+ xlator_t *subvol = 0;
+ xlator_t *hashed_subvol = 0;
+ int ret = 0;
+ int readdir_optimize = 0;
+ inode_table_t *itable = NULL;
+ inode_t *inode = NULL;
+
+ INIT_LIST_HEAD (&entries.list);
+ prev = cookie;
+ local = frame->local;
+ itable = local->fd ? local->fd->inode->table : NULL;
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, unwind);
- INIT_LIST_HEAD (&entries.list);
- prev = cookie;
- local = frame->local;
- conf = this->private;
+ methods = &(conf->methods);
- if (op_ret < 0)
- goto done;
+ if (op_ret < 0)
+ goto done;
if (!local->layout)
local->layout = dht_layout_get (this, local->fd->inode);
layout = local->layout;
- list_for_each_entry (orig_entry, (&orig_entries->list), list) {
+ /* We have seen crashes in while running "rm -rf" on tier volumes
+ when the layout was NULL on the hot tier. This will skip the
+ entries on the subvol without a layout, hence preventing the crash
+ but rmdir might fail with "directory not empty" errors*/
+
+ if (layout == NULL)
+ goto done;
+
+ if (conf->readdir_optimize == _gf_true)
+ readdir_optimize = 1;
+
+ list_for_each_entry (orig_entry, (&orig_entries->list), list) {
next_offset = orig_entry->d_off;
- if (check_is_linkfile (NULL, (&orig_entry->d_stat), NULL)
- || (check_is_dir (NULL, (&orig_entry->d_stat), NULL)
- && (prev->this != dht_first_up_subvol (this)))) {
+ if (IA_ISINVAL(orig_entry->d_stat.ia_type)) {
+ /*stat failed somewhere- ignore this entry*/
+ gf_msg_debug (this->name, EINVAL,
+ "Invalid stat, ignoring entry "
+ "%s gfid %s", orig_entry->d_name,
+ uuid_utoa (orig_entry->d_stat.ia_gfid));
continue;
}
+ if (check_is_dir (NULL, (&orig_entry->d_stat), NULL)) {
+
+ /*Directory entries filtering :
+ * a) If rebalance is running, pick from first_up_subvol
+ * b) (rebalance not running)hashed subvolume is NULL or
+ * down then filter in first_up_subvolume. Other wise the
+ * corresponding hashed subvolume will take care of the
+ * directory entry.
+ */
+ if (readdir_optimize) {
+ if (prev->this == local->first_up_subvol)
+ goto list;
+ else
+ continue;
+
+ }
+
+ hashed_subvol = methods->layout_search (this, layout,
+ orig_entry->d_name);
+
+ if (prev->this == hashed_subvol)
+ goto list;
+ if ((hashed_subvol
+ && dht_subvol_status (conf, hashed_subvol))
+ ||(prev->this != local->first_up_subvol))
+ continue;
+
+ goto list;
+ }
+
+ if (check_is_linkfile (NULL, (&orig_entry->d_stat),
+ orig_entry->dict,
+ conf->link_xattr_name)) {
+ continue;
+ }
+list:
entry = gf_dirent_for_name (orig_entry->d_name);
if (!entry) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
+
goto unwind;
}
/* Do this if conf->search_unhashed is set to "auto" */
if (conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_AUTO) {
- subvol = dht_layout_search (this, layout,
- orig_entry->d_name);
+ subvol = methods->layout_search (this, layout,
+ orig_entry->d_name);
if (!subvol || (subvol != prev->this)) {
/* TODO: Count the number of entries which need
- linkfile to prove its existance in fs */
+ linkfile to prove its existence in fs */
layout->search_unhashed++;
}
}
- entry->d_stat = orig_entry->d_stat;
- dht_itransform (this, prev->this, orig_entry->d_ino,
- &entry->d_ino);
- dht_itransform (this, prev->this, orig_entry->d_off,
- &entry->d_off);
-
- entry->d_stat.ia_ino = entry->d_ino;
+ entry->d_off = orig_entry->d_off;
+ entry->d_stat = orig_entry->d_stat;
+ entry->d_ino = orig_entry->d_ino;
entry->d_type = orig_entry->d_type;
entry->d_len = orig_entry->d_len;
+ if (orig_entry->dict)
+ entry->dict = dict_ref (orig_entry->dict);
+
+ /* making sure we set the inode ctx right with layout,
+ currently possible only for non-directories, so for
+ directories don't set entry inodes */
+ if (IA_ISDIR(entry->d_stat.ia_type)) {
+ entry->d_stat.ia_blocks = DHT_DIR_STAT_BLOCKS;
+ entry->d_stat.ia_size = DHT_DIR_STAT_SIZE;
+ if (orig_entry->inode) {
+ dht_inode_ctx_time_update (orig_entry->inode,
+ this, &entry->d_stat,
+ 1);
+ }
+ } else {
+ if (orig_entry->inode) {
+ ret = dht_layout_preset (this, prev->this,
+ orig_entry->inode);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LAYOUT_SET_FAILED,
+ "failed to link the layout "
+ "in inode");
+
+ entry->inode = inode_ref (orig_entry->inode);
+ } else if (itable) {
+ /*
+ * orig_entry->inode might be null if any upper
+ * layer xlators below client set to null, to
+ * force a lookup on the inode even if the inode
+ * is present in the inode table. In that case
+ * we just update the ctx to make sure we didn't
+ * missed anything.
+ */
+ inode = inode_find (itable,
+ orig_entry->d_stat.ia_gfid);
+ if (inode) {
+ ret = dht_layout_preset
+ (this, prev->this,
+ inode);
+ if (ret)
+ gf_msg (this->name,
+ GF_LOG_WARNING, 0,
+ DHT_MSG_LAYOUT_SET_FAILED,
+ "failed to link the layout"
+ " in inode");
+ inode_unref (inode);
+ inode = NULL;
+ }
+ }
+ }
list_add_tail (&entry->list, &entries.list);
count++;
- }
- op_ret = count;
+ }
+ op_ret = count;
/* We need to ensure that only the last subvolume's end-of-directory
* notification is respected so that directory reading does not stop
* before all subvolumes have been read. That could happen because the
@@ -2134,7 +4828,7 @@ dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
op_errno = 0;
done:
- if (count == 0) {
+ if (count == 0) {
/* non-zero next_offset means that
EOF is not yet hit on the current subvol
*/
@@ -2144,23 +4838,40 @@ done:
next_subvol = prev->this;
}
- if (!next_subvol) {
- goto unwind;
- }
+ if (!next_subvol) {
+ goto unwind;
+ }
+
+ if (conf->readdir_optimize == _gf_true) {
+ if (next_subvol != local->first_up_subvol) {
+ ret = dict_set_int32 (local->xattr,
+ GF_READDIR_SKIP_DIRS, 1);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value"
+ ":key = %s",
+ GF_READDIR_SKIP_DIRS );
+ } else {
+ dict_del (local->xattr,
+ GF_READDIR_SKIP_DIRS);
+ }
+ }
- STACK_WIND (frame, dht_readdirp_cbk,
- next_subvol, next_subvol->fops->readdirp,
- local->fd, local->size, next_offset);
- return 0;
- }
+ STACK_WIND (frame, dht_readdirp_cbk,
+ next_subvol, next_subvol->fops->readdirp,
+ local->fd, local->size, next_offset,
+ local->xattr);
+ return 0;
+ }
unwind:
- if (op_ret < 0)
- op_ret = 0;
+ if (op_ret < 0)
+ op_ret = 0;
- DHT_STACK_UNWIND (readdirp, frame, op_ret, op_errno, &entries);
+ DHT_STACK_UNWIND (readdirp, frame, op_ret, op_errno, &entries, NULL);
- gf_dirent_free (&entries);
+ gf_dirent_free (&entries);
return 0;
}
@@ -2169,51 +4880,56 @@ unwind:
int
dht_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, gf_dirent_t *orig_entries)
-{
- dht_local_t *local = NULL;
- gf_dirent_t entries;
- gf_dirent_t *orig_entry = NULL;
- gf_dirent_t *entry = NULL;
- call_frame_t *prev = NULL;
- xlator_t *next_subvol = NULL;
+ int op_ret, int op_errno, gf_dirent_t *orig_entries,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t *orig_entry = NULL;
+ gf_dirent_t *entry = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *next_subvol = NULL;
off_t next_offset = 0;
- int count = 0;
+ int count = 0;
dht_layout_t *layout = 0;
- dht_conf_t *conf = NULL;
xlator_t *subvol = 0;
+ dht_conf_t *conf = NULL;
+ dht_methods_t *methods = NULL;
+
+ INIT_LIST_HEAD (&entries.list);
+ prev = cookie;
+ local = frame->local;
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, conf, done);
- INIT_LIST_HEAD (&entries.list);
- prev = cookie;
- local = frame->local;
- conf = this->private;
+ methods = &(conf->methods);
- if (op_ret < 0)
- goto done;
+ if (op_ret < 0)
+ goto done;
if (!local->layout)
local->layout = dht_layout_get (this, local->fd->inode);
layout = local->layout;
- list_for_each_entry (orig_entry, (&orig_entries->list), list) {
+ list_for_each_entry (orig_entry, (&orig_entries->list), list) {
next_offset = orig_entry->d_off;
- subvol = dht_layout_search (this, layout, orig_entry->d_name);
+ subvol = methods->layout_search (this, layout,
+ orig_entry->d_name);
if (!subvol || (subvol == prev->this)) {
entry = gf_dirent_for_name (orig_entry->d_name);
if (!entry) {
- gf_log (this->name, GF_LOG_ERROR,
- "memory allocation failed :(");
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ DHT_MSG_NO_MEMORY,
+ "Memory allocation failed ");
goto unwind;
}
- dht_itransform (this, prev->this, orig_entry->d_ino,
- &entry->d_ino);
- dht_itransform (this, prev->this, orig_entry->d_off,
- &entry->d_off);
-
+ entry->d_off = orig_entry->d_off;
+ entry->d_ino = orig_entry->d_ino;
entry->d_type = orig_entry->d_type;
entry->d_len = orig_entry->d_len;
@@ -2221,7 +4937,7 @@ dht_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
count++;
}
}
- op_ret = count;
+ op_ret = count;
/* We need to ensure that only the last subvolume's end-of-directory
* notification is respected so that directory reading does not stop
* before all subvolumes have been read. That could happen because the
@@ -2233,7 +4949,7 @@ dht_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
op_errno = 0;
done:
- if (count == 0) {
+ if (count == 0) {
/* non-zero next_offset means that
EOF is not yet hit on the current subvol
*/
@@ -2243,23 +4959,23 @@ done:
next_subvol = prev->this;
}
- if (!next_subvol) {
- goto unwind;
- }
+ if (!next_subvol) {
+ goto unwind;
+ }
- STACK_WIND (frame, dht_readdir_cbk,
- next_subvol, next_subvol->fops->readdir,
- local->fd, local->size, next_offset);
- return 0;
- }
+ STACK_WIND (frame, dht_readdir_cbk,
+ next_subvol, next_subvol->fops->readdir,
+ local->fd, local->size, next_offset, NULL);
+ return 0;
+ }
unwind:
- if (op_ret < 0)
- op_ret = 0;
+ if (op_ret < 0)
+ op_ret = 0;
- DHT_STACK_UNWIND (readdir, frame, op_ret, op_errno, &entries);
+ DHT_STACK_UNWIND (readdir, frame, op_ret, op_errno, &entries, NULL);
- gf_dirent_free (&entries);
+ gf_dirent_free (&entries);
return 0;
}
@@ -2267,61 +4983,98 @@ unwind:
int
dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t yoff, int whichop)
+ off_t yoff, int whichop, dict_t *dict)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
int op_errno = -1;
- xlator_t *xvol = NULL;
- off_t xoff = 0;
-
+ xlator_t *xvol = NULL;
+ int ret = 0;
+ dht_conf_t *conf = NULL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (this->private, err);
- conf = this->private;
-
- local = dht_local_init (frame);
- if (!local) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
+ conf = this->private;
- local->fd = fd_ref (fd);
- local->size = size;
+ local = dht_local_init (frame, NULL, NULL, whichop);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- dht_deitransform (this, yoff, &xvol, (uint64_t *)&xoff);
+ local->fd = fd_ref (fd);
+ local->size = size;
+ local->xattr_req = (dict)? dict_ref (dict) : NULL;
+ local->first_up_subvol = dht_first_up_subvol (this);
+
+ dht_deitransform (this, yoff, &xvol);
+
+ /* TODO: do proper readdir */
+ if (whichop == GF_FOP_READDIRP) {
+ if (dict)
+ local->xattr = dict_ref (dict);
+ else
+ local->xattr = dict_new ();
+
+ if (local->xattr) {
+ ret = dict_set_uint32 (local->xattr,
+ conf->link_xattr_name, 256);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value"
+ " : key = %s",
+ conf->link_xattr_name);
+
+ if (conf->readdir_optimize == _gf_true) {
+ if (xvol != local->first_up_subvol) {
+ ret = dict_set_int32 (local->xattr,
+ GF_READDIR_SKIP_DIRS, 1);
+ if (ret)
+ gf_msg (this->name,
+ GF_LOG_ERROR, 0,
+ DHT_MSG_DICT_SET_FAILED,
+ "Failed to set "
+ "dictionary value: "
+ "key = %s",
+ GF_READDIR_SKIP_DIRS);
+ } else {
+ dict_del (local->xattr,
+ GF_READDIR_SKIP_DIRS);
+ }
+ }
+ }
- /* TODO: do proper readdir */
- if (whichop == GF_FOP_READDIR)
- STACK_WIND (frame, dht_readdir_cbk, xvol, xvol->fops->readdir,
- fd, size, xoff);
- else
STACK_WIND (frame, dht_readdirp_cbk, xvol, xvol->fops->readdirp,
- fd, size, xoff);
+ fd, size, yoff, local->xattr);
+ } else {
+ STACK_WIND (frame, dht_readdir_cbk, xvol, xvol->fops->readdir,
+ fd, size, yoff, local->xattr);
+ }
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (readdir, frame, -1, op_errno, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
int
dht_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t yoff)
+ off_t yoff, dict_t *xdata)
{
int op = GF_FOP_READDIR;
dht_conf_t *conf = NULL;
int i = 0;
conf = this->private;
+ if (!conf)
+ goto out;
for (i = 0; i < conf->subvolume_cnt; i++) {
if (!conf->subvolume_status[i]) {
@@ -2330,15 +5083,19 @@ dht_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
}
}
- dht_do_readdir (frame, this, fd, size, yoff, op);
+ if (conf->use_readdirp)
+ op = GF_FOP_READDIRP;
+
+out:
+ dht_do_readdir (frame, this, fd, size, yoff, op, 0);
return 0;
}
int
dht_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t yoff)
+ off_t yoff, dict_t *dict)
{
- dht_do_readdir (frame, this, fd, size, yoff, GF_FOP_READDIRP);
+ dht_do_readdir (frame, this, fd, size, yoff, GF_FOP_READDIRP, dict);
return 0;
}
@@ -2346,88 +5103,88 @@ dht_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
int
dht_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno)
+ int op_ret, int op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
- local = frame->local;
+ local = frame->local;
- LOCK (&frame->lock);
- {
- if (op_ret == -1)
- local->op_errno = op_errno;
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1)
+ local->op_errno = op_errno;
- if (op_ret == 0)
- local->op_ret = 0;
- }
- UNLOCK (&frame->lock);
+ if (op_ret == 0)
+ local->op_ret = 0;
+ }
+ UNLOCK (&frame->lock);
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (fsyncdir, frame, local->op_ret, local->op_errno);
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt))
+ DHT_STACK_UNWIND (fsyncdir, frame, local->op_ret,
+ local->op_errno, xdata);
return 0;
}
int
-dht_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync)
+dht_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int datasync, dict_t *xdata)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
int op_errno = -1;
- int i = -1;
-
+ int i = -1;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (this->private, err);
- conf = this->private;
+ conf = this->private;
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ local = dht_local_init (frame, NULL, NULL, GF_FOP_FSYNCDIR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- local->fd = fd_ref (fd);
- local->call_cnt = conf->subvolume_cnt;
+ local->fd = fd_ref (fd);
+ local->call_cnt = conf->subvolume_cnt;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (frame, dht_fsyncdir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->fsyncdir,
- fd, datasync);
- }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND (frame, dht_fsyncdir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->fsyncdir,
+ fd, datasync, xdata);
+ }
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fsyncdir, frame, -1, op_errno);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fsyncdir, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
int
dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
+ int op_ret, int op_errno,
inode_t *inode, struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- call_frame_t *prev = NULL;
- int ret = -1;
+ xlator_t *prev = NULL;
+ int ret = -1;
dht_local_t *local = NULL;
- if (op_ret == -1)
- goto out;
+ if (op_ret == -1)
+ goto out;
local = frame->local;
if (!local) {
@@ -2436,38 +5193,55 @@ dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- prev = cookie;
+ prev = cookie;
- dht_itransform (this, prev->this, stbuf->ia_ino, &stbuf->ia_ino);
if (local->loc.parent) {
- preparent->ia_ino = local->loc.parent->ino;
- postparent->ia_ino = local->loc.parent->ino;
- WIPE (preparent);
- WIPE (postparent);
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ preparent, 0);
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ postparent, 1);
}
- ret = dht_layout_preset (this, prev->this, inode);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not set pre-set layout for subvolume %s",
- prev->this->name);
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
+ ret = dht_layout_preset (this, prev, inode);
+ if (ret < 0) {
+ gf_msg_debug (this->name, EINVAL,
+ "could not set pre-set layout for subvolume %s",
+ prev? prev->name: NULL);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+ if (local->linked == _gf_true)
+ dht_linkfile_attr_heal (frame, this);
out:
- /*
- * FIXME: ia_size and st_blocks of preparent and postparent do not have
+ /*
+ * FIXME: ia_size and st_blocks of preparent and postparent do not have
* correct values. since, preparent and postparent buffers correspond
* to a directory these two members should have values equal to sum of
* corresponding values from each of the subvolume.
* See dht_iatt_merge for reference.
- */
+ */
+ DHT_STRIP_PHASE1_FLAGS (stbuf);
+ dht_set_fixed_dir_stat (postparent);
+ dht_set_fixed_dir_stat (preparent);
+
+ if (local && local->lock.locks) {
+ /* store op_errno for failure case*/
+ local->op_errno = op_errno;
+ local->refresh_layout_unlock (frame, this, op_ret, 1);
+
+ if (op_ret == 0) {
+ DHT_STACK_UNWIND (mknod, frame, op_ret, op_errno,
+ inode, stbuf, preparent, postparent,
+ xdata);
+ }
+ } else {
+ DHT_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode,
+ stbuf, preparent, postparent, xdata);
+ }
- DHT_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, stbuf, preparent,
- postparent);
- return 0;
+ return 0;
}
int
@@ -2475,393 +5249,1121 @@ dht_mknod_linkfile_create_cbk (call_frame_t *frame, void *cookie,
xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- xlator_t *cached_subvol = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *cached_subvol = NULL;
+ dht_conf_t *conf = NULL;
+
+ local = frame->local;
+
+ if (!local || !local->cached_subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ goto err;
+ }
+
+ conf = this->private;
+ if (!conf) {
+ local->op_errno = EINVAL;
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ cached_subvol = local->cached_subvol;
+
+ if (local->params) {
+ dict_del (local->params, conf->link_xattr_name);
+ dict_del (local->params, GLUSTERFS_INTERNAL_FOP_KEY);
+ }
+
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)cached_subvol,
+ cached_subvol, cached_subvol->fops->mknod,
+ &local->loc, local->mode, local->rdev, local->umask,
+ local->params);
+
+ return 0;
+err:
+ if (local && local->lock.locks) {
+ local->refresh_layout_unlock (frame, this, -1, 1);
+ } else {
+ DHT_STACK_UNWIND (mknod, frame, -1,
+ op_errno, NULL, NULL, NULL,
+ NULL, NULL);
+ }
+ return 0;
+}
+
+int
+dht_mknod_wind_to_avail_subvol (call_frame_t *frame, xlator_t *this,
+ xlator_t *subvol, loc_t *loc, dev_t rdev,
+ mode_t mode, mode_t umask, dict_t *params)
+{
+ dht_local_t *local = NULL;
+ xlator_t *avail_subvol = NULL;
+
+ local = frame->local;
+
+ if (!dht_is_subvol_filled (this, subvol)) {
+ gf_msg_debug (this->name, 0,
+ "creating %s on %s", loc->path,
+ subvol->name);
+
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)subvol,
+ subvol, subvol->fops->mknod, loc, mode,
+ rdev, umask, params);
+ } else {
+ avail_subvol = dht_free_disk_available_subvol (this, subvol, local);
+
+ if (avail_subvol != subvol) {
+ local->params = dict_ref (params);
+ local->rdev = rdev;
+ local->mode = mode;
+ local->umask = umask;
+ local->cached_subvol = avail_subvol;
+ local->hashed_subvol = subvol;
+
+ gf_msg_debug (this->name, 0,
+ "creating %s on %s (link at %s)", loc->path,
+ avail_subvol->name, subvol->name);
+
+ dht_linkfile_create (frame,
+ dht_mknod_linkfile_create_cbk,
+ this, avail_subvol, subvol, loc);
+
+ goto out;
+ }
+
+ gf_msg_debug (this->name, 0,
+ "creating %s on %s", loc->path, subvol->name);
+
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk,
+ (void *)subvol, subvol,
+ subvol->fops->mknod, loc, mode,
+ rdev, umask, params);
+
+ }
+out:
+ return 0;
+}
+
+int32_t
+dht_mknod_do (call_frame_t *frame)
+{
+ dht_local_t *local = NULL;
+ dht_layout_t *refreshed = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ dht_methods_t *methods = NULL;
+
+ local = frame->local;
+
+ this = THIS;
+
+ conf = this->private;
+
+ GF_VALIDATE_OR_GOTO (this->name, conf, err);
+
+ methods = &(conf->methods);
+
+ /* We don't need parent_loc anymore */
+ loc_wipe (&local->loc);
+
+ loc_copy (&local->loc, &local->loc2);
+
+ loc_wipe (&local->loc2);
+
+ refreshed = local->selfheal.refreshed_layout;
+
+ subvol = methods->layout_search (this, refreshed, local->loc.name);
+
+ if (!subvol) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED, "no subvolume in "
+ "layout for path=%s", local->loc.path);
+ local->op_errno = ENOENT;
+ goto err;
+ }
+
+ dht_mknod_wind_to_avail_subvol (frame, this, subvol, &local->loc,
+ local->rdev, local->mode,
+ local->umask, local->params);
+ return 0;
+err:
+ local->refresh_layout_unlock (frame, this, -1, 1);
+
+ return 0;
+}
+
+
+int32_t
+dht_mknod_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ DHT_STACK_DESTROY (frame);
+ return 0;
+}
+
+int32_t
+dht_mknod_finish (call_frame_t *frame, xlator_t *this, int op_ret,
+ int invoke_cbk)
+{
+ dht_local_t *local = NULL, *lock_local = NULL;
+ call_frame_t *lock_frame = NULL;
+ int lock_count = 0;
+
+ local = frame->local;
+ lock_count = dht_lock_count (local->lock.locks, local->lock.lk_count);
+ if (lock_count == 0)
+ goto done;
+
+ lock_frame = copy_frame (frame);
+ if (lock_frame == NULL) {
+ goto done;
+ }
+
+ lock_local = dht_local_init (lock_frame, &local->loc, NULL,
+ lock_frame->root->op);
+ if (lock_local == NULL) {
+ goto done;
+ }
+
+ lock_local->lock.locks = local->lock.locks;
+ lock_local->lock.lk_count = local->lock.lk_count;
+
+ local->lock.locks = NULL;
+ local->lock.lk_count = 0;
+
+ dht_unlock_inodelk (lock_frame, lock_local->lock.locks,
+ lock_local->lock.lk_count,
+ dht_mknod_unlock_cbk);
+ lock_frame = NULL;
+
+done:
+ if (lock_frame != NULL) {
+ DHT_STACK_DESTROY (lock_frame);
+ }
+
+ if (op_ret == 0)
+ return 0;
+
+ DHT_STACK_UNWIND (mknod, frame, op_ret, local->op_errno, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+dht_mknod_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (!local) {
+ goto err;
+ }
+
+ if (op_ret < 0) {
+ gf_msg ("DHT", GF_LOG_ERROR, 0, DHT_MSG_INODE_LK_ERROR,
+ "mknod lock failed for file: %s", local->loc2.name);
+
+ local->op_errno = op_errno;
+
+ goto err;
+ }
+
+ local->refresh_layout_unlock = dht_mknod_finish;
+
+ local->refresh_layout_done = dht_mknod_do;
+
+ dht_refresh_layout (frame);
+
+ return 0;
+err:
+ dht_mknod_finish (frame, this, -1, 0);
+ return 0;
+}
+
+int32_t
+dht_mknod_lock (call_frame_t *frame, xlator_t *subvol)
+{
+ dht_local_t *local = NULL;
+ int count = 1, ret = -1;
+ dht_lock_t **lk_array = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO (frame->this->name, frame->local, err);
+
+ local = frame->local;
+
+ lk_array = GF_CALLOC (count, sizeof (*lk_array), gf_common_mt_char);
+
+ if (lk_array == NULL)
+ goto err;
+
+ lk_array[0] = dht_lock_new (frame->this, subvol, &local->loc, F_RDLCK,
+ DHT_LAYOUT_HEAL_DOMAIN);
+
+ if (lk_array[0] == NULL)
+ goto err;
+
+ local->lock.locks = lk_array;
+ local->lock.lk_count = count;
+
+ ret = dht_blocking_inodelk (frame, lk_array, count,
+ IGNORE_ENOENT_ESTALE, dht_mknod_lock_cbk);
+
+ if (ret < 0) {
+ local->lock.locks = NULL;
+ local->lock.lk_count = 0;
+ goto err;
+ }
+
+ return 0;
+err:
+ if (lk_array != NULL) {
+ dht_lock_array_free (lk_array, count);
+ GF_FREE (lk_array);
+ }
+
+ return -1;
+}
+
+int
+dht_refresh_parent_layout_resume (call_frame_t *frame, xlator_t *this, int ret,
+ int invoke_cbk)
+{
+ dht_local_t *local = NULL, *parent_local = NULL;
+ call_stub_t *stub = NULL;
+ call_frame_t *parent_frame = NULL;
+
+ local = frame->local;
+
+ stub = local->stub;
+ local->stub = NULL;
+
+ parent_frame = stub->frame;
+ parent_local = parent_frame->local;
+
+ if (ret < 0) {
+ parent_local->op_ret = -1;
+ parent_local->op_errno = local->op_errno
+ ? local->op_errno : EIO;
+ } else {
+ parent_local->op_ret = 0;
+ }
+
+ call_resume (stub);
+
+ DHT_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+
+int
+dht_refresh_parent_layout_done (call_frame_t *frame)
+{
+ dht_local_t *local = NULL;
+ int ret = 0;
+
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ ret = -1;
+ goto resume;
+ }
+
+ dht_layout_set (frame->this, local->loc.inode,
+ local->selfheal.refreshed_layout);
+
+resume:
+ dht_refresh_parent_layout_resume (frame, frame->this, ret, 1);
+ return 0;
+}
+
+
+int
+dht_handle_parent_layout_change (xlator_t *this, call_stub_t *stub)
+{
+ call_frame_t *refresh_frame = NULL, *frame = NULL;
+ dht_local_t *refresh_local = NULL, *local = NULL;
+
+ frame = stub->frame;
+ local = frame->local;
+
+ refresh_frame = copy_frame (frame);
+ refresh_local = dht_local_init (refresh_frame, NULL, NULL,
+ stub->fop);
+
+ refresh_local->loc.inode = inode_ref (local->loc.parent);
+ gf_uuid_copy (refresh_local->loc.gfid, local->loc.parent->gfid);
+
+ refresh_local->stub = stub;
+
+ refresh_local->refresh_layout_unlock = dht_refresh_parent_layout_resume;
+ refresh_local->refresh_layout_done = dht_refresh_parent_layout_done;
+
+ dht_refresh_layout (refresh_frame);
+ return 0;
+}
+
+int32_t
+dht_unlock_parent_layout_during_entry_fop_done (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ gf_uuid_unparse (local->lock.locks[0]->loc.inode->gfid, gfid);
+
+ if (op_ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "unlock failed on gfid: %s, stale lock might be left "
+ "in DHT_LAYOUT_HEAL_DOMAIN", gfid);
+ }
+
+ DHT_STACK_DESTROY (frame);
+ return 0;
+}
+
+int32_t
+dht_unlock_parent_layout_during_entry_fop (call_frame_t *frame)
+{
+ dht_local_t *local = NULL, *lock_local = NULL;
+ call_frame_t *lock_frame = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+
+ gf_uuid_unparse (local->loc.parent->gfid, pgfid);
+
+ lock_frame = copy_frame (frame);
+ if (lock_frame == NULL) {
+ gf_msg (frame->this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "mkdir (%s/%s) (path: %s): "
+ "copy frame failed", pgfid, local->loc.name,
+ local->loc.path);
+ goto done;
+ }
+
+ lock_local = mem_get0 (THIS->local_pool);
+ if (lock_local == NULL) {
+ gf_msg (frame->this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "mkdir (%s/%s) (path: %s): "
+ "local creation failed", pgfid, local->loc.name,
+ local->loc.path);
+ goto done;
+ }
+
+ lock_frame->local = lock_local;
+
+ lock_local->lock.locks = local->lock.locks;
+ lock_local->lock.lk_count = local->lock.lk_count;
+
+ local->lock.locks = NULL;
+ local->lock.lk_count = 0;
+
+ dht_unlock_inodelk (lock_frame, lock_local->lock.locks,
+ lock_local->lock.lk_count,
+ dht_unlock_parent_layout_during_entry_fop_done);
+
+done:
+ return 0;
+}
+
+int32_t
+dht_guard_parent_layout_during_entry_fop_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+
+ local = frame->local;
+ stub = local->stub;
+ local->stub = NULL;
+
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ } else {
+ local->op_ret = 0;
+ }
+
+ call_resume (stub);
+
+ return 0;
+}
+
+int32_t
+dht_guard_parent_layout_during_entry_fop (xlator_t *subvol, call_stub_t *stub)
+{
+ dht_local_t *local = NULL;
+ int count = 1, ret = -1;
+ dht_lock_t **lk_array = NULL;
+ loc_t *loc = NULL;
+ xlator_t *hashed_subvol = NULL, *this = NULL;;
+ call_frame_t *frame = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ loc_t parent = {0, };
+ int32_t *parent_disk_layout = NULL;
+ dht_layout_t *parent_layout = NULL;
+ dht_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", stub, err);
+
+ frame = stub->frame;
+ this = frame->this;
+
+ conf = this->private;
+
+ local = frame->local;
+
+ local->stub = stub;
+
+ /* TODO: recheck whether we should lock on src or dst if we do similar
+ * stale layout checks for rename.
+ */
+ loc = &stub->args.loc;
+
+ gf_uuid_unparse (loc->parent->gfid, pgfid);
+
+ if (local->params == NULL) {
+ local->params = dict_new ();
+ if (local->params == NULL) {
+ local->op_errno = ENOMEM;
+ gf_msg (this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "dict allocation failed",
+ gf_fop_list[stub->fop],
+ pgfid, loc->name, loc->path);
+ goto err;
+ }
+ }
+
+ hashed_subvol = dht_subvol_get_hashed (this, loc);
+ if (hashed_subvol == NULL) {
+ local->op_errno = EINVAL;
+
+ gf_msg (this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "hashed subvolume not found", gf_fop_list[stub->fop],
+ pgfid, loc->name, loc->path);
+ goto err;
+ }
+
+ parent_layout = dht_layout_get (this, loc->parent);
+
+ ret = dht_disk_layout_extract_for_subvol (this, parent_layout,
+ hashed_subvol,
+ &parent_disk_layout);
+ if (ret == -1) {
+ local->op_errno = EINVAL;
+ gf_msg (this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "extracting in-memory layout of parent failed. ",
+ gf_fop_list[stub->fop], pgfid, loc->name, loc->path);
+ goto err;
+ }
+
+ memcpy ((void *)local->parent_disk_layout, (void *)parent_disk_layout,
+ sizeof (local->parent_disk_layout));
+
+ dht_layout_unref (this, parent_layout);
+ parent_layout = NULL;
+
+ ret = dict_set_str (local->params, GF_PREOP_PARENT_KEY,
+ conf->xattr_name);
+ if (ret < 0) {
+ local->op_errno = -ret;
+ gf_msg (this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "setting %s key in params dictionary failed. ",
+ gf_fop_list[stub->fop], pgfid, loc->name, loc->path,
+ GF_PREOP_PARENT_KEY);
+ goto err;
+ }
+
+ ret = dict_set_bin (local->params, conf->xattr_name, parent_disk_layout,
+ 4 * 4);
+ if (ret < 0) {
+ local->op_errno = -ret;
+ gf_msg (this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "setting parent-layout in params dictionary failed. ",
+ gf_fop_list[stub->fop], pgfid, loc->name, loc->path);
+ goto err;
+ }
+
+ parent_disk_layout = NULL;
+
+ parent.inode = inode_ref (loc->parent);
+ gf_uuid_copy (parent.gfid, loc->parent->gfid);
+
+ lk_array = GF_CALLOC (count, sizeof (*lk_array), gf_common_mt_char);
+
+ if (lk_array == NULL) {
+ local->op_errno = ENOMEM;
+
+ gf_msg (this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "calloc failure",
+ gf_fop_list[stub->fop], pgfid, loc->name, loc->path);
+
+ goto err;
+ }
+
+ lk_array[0] = dht_lock_new (frame->this, hashed_subvol, &parent,
+ F_RDLCK, DHT_LAYOUT_HEAL_DOMAIN);
+
+ if (lk_array[0] == NULL) {
+ local->op_errno = ENOMEM;
+ gf_msg (this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "lock allocation failed",
+ gf_fop_list[stub->fop], pgfid, loc->name, loc->path);
- if (op_ret == -1)
goto err;
+ }
+
+ local->lock.locks = lk_array;
+ local->lock.lk_count = count;
+
+ ret = dht_blocking_inodelk (frame, lk_array, count, FAIL_ON_ANY_ERROR,
+ dht_guard_parent_layout_during_entry_fop_cbk);
- local = frame->local;
- cached_subvol = local->cached_subvol;
+ if (ret < 0) {
+ local->op_errno = EIO;
+ local->lock.locks = NULL;
+ local->lock.lk_count = 0;
+ gf_msg (this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "dht_blocking_inodelk failed",
+ gf_fop_list[stub->fop], pgfid, loc->name, loc->path);
+
+ goto err;
+ }
- STACK_WIND (frame, dht_newfile_cbk,
- cached_subvol, cached_subvol->fops->mknod,
- &local->loc, local->mode, local->rdev);
+ loc_wipe (&parent);
return 0;
- err:
- DHT_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL);
- return 0;
+err:
+ if (lk_array != NULL) {
+ dht_lock_array_free (lk_array, count);
+ GF_FREE (lk_array);
+ }
+
+ loc_wipe (&parent);
+
+ if (parent_disk_layout != NULL)
+ GF_FREE (parent_disk_layout);
+
+ if (parent_layout != NULL)
+ dht_layout_unref (this, parent_layout);
+
+ return -1;
}
int
dht_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t rdev)
+ loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *params)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- int ret = -1;
- xlator_t *avail_subvol = NULL;
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ int i = 0;
+ int ret = 0;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
- conf = this->private;
+ conf = this->private;
dht_get_du_info (frame, this, loc);
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- subvol = dht_subvol_get_hashed (this, loc);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = ENOENT;
- goto err;
- }
-
- ret = loc_dup (loc, &local->loc);
- if (ret == -1) {
+ local = dht_local_init (frame, loc, NULL, GF_FOP_MKNOD);
+ if (!local) {
op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
goto err;
}
- if (!dht_is_subvol_filled (this, subvol)) {
- gf_log (this->name, GF_LOG_TRACE,
- "creating %s on %s", loc->path, subvol->name);
-
- STACK_WIND (frame, dht_newfile_cbk,
- subvol, subvol->fops->mknod,
- loc, mode, rdev);
- } else {
- avail_subvol = dht_free_disk_available_subvol (this, subvol);
- if (avail_subvol != subvol) {
- /* Choose the minimum filled volume, and create the
- files there */
+ subvol = dht_subvol_get_hashed (this, loc);
+ if (!subvol) {
+ gf_msg_debug (this->name, 0,
+ "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = EIO;
+ goto err;
+ }
- local->cached_subvol = avail_subvol;
- local->mode = mode;
+ /* Post remove-brick, the client layout may not be in sync with
+ * disk layout because of lack of lookup. Hence,a mknod call
+ * may fall on the decommissioned brick. Hence, if the
+ * hashed_subvol is part of decommissioned bricks list, do a
+ * lookup on parent dir. If a fix-layout is already done by the
+ * remove-brick process, the parent directory layout will be in
+ * sync with that of the disk. If fix-layout is still ending
+ * on the parent directory, we can let the file get created on
+ * the decommissioned brick which will be eventually migrated to
+ * non-decommissioned brick based on the new layout.
+ */
+
+ if (conf->decommission_subvols_cnt) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->decommissioned_bricks[i] &&
+ conf->decommissioned_bricks[i] == subvol) {
+
+ gf_msg_debug (this->name, 0, "hashed subvol:%s is "
+ "part of decommission brick list for "
+ "file: %s", subvol->name, loc->path);
+
+ /* dht_refresh_layout needs directory info in
+ * local->loc. Hence, storing the parent_loc in
+ * local->loc and storing the create context in
+ * local->loc2. We will restore this information
+ * in dht_creation do */
+
+ ret = loc_copy (&local->loc2, &local->loc);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ DHT_MSG_NO_MEMORY,
+ "loc_copy failed %s", loc->path);
+
+ goto err;
+ }
+
+ local->params = dict_ref (params);
local->rdev = rdev;
-
- dht_linkfile_create (frame,
- dht_mknod_linkfile_create_cbk,
- avail_subvol, subvol, loc);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "creating %s on %s", loc->path, subvol->name);
-
- STACK_WIND (frame, dht_newfile_cbk,
- subvol, subvol->fops->mknod,
- loc, mode, rdev);
- }
+ local->mode = mode;
+ local->umask = umask;
+
+ loc_wipe (&local->loc);
+
+ ret = dht_build_parent_loc (this, &local->loc, loc,
+ &op_errno);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ DHT_MSG_NO_MEMORY,
+ "parent loc build failed");
+ goto err;
+ }
+
+ ret = dht_mknod_lock (frame, subvol);
+
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_INODE_LK_ERROR,
+ "locking parent failed");
+ goto err;
+ }
+
+ goto done;
+ }
+ }
}
- return 0;
+ dht_mknod_wind_to_avail_subvol (frame, this, subvol, loc, rdev, mode,
+ umask, params);
+
+done:
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (mknod, frame, -1, op_errno,
- NULL, NULL, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (mknod, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
}
int
dht_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkname, loc_t *loc)
+ const char *linkname, loc_t *loc, mode_t umask, dict_t *params)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
dht_local_t *local = NULL;
- int ret = -1;
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
-
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- subvol = dht_subvol_get_hashed (this, loc);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = ENOENT;
- goto err;
- }
-
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_TRACE, "Failed to copy loc");
+ local = dht_local_init (frame, loc, NULL, GF_FOP_SYMLINK);
+ if (!local) {
op_errno = ENOMEM;
goto err;
}
- gf_log (this->name, GF_LOG_TRACE,
- "creating %s on %s", loc->path, subvol->name);
+ subvol = dht_subvol_get_hashed (this, loc);
+ if (!subvol) {
+ gf_msg_debug (this->name, 0,
+ "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = EIO;
+ goto err;
+ }
+
+ gf_msg_trace (this->name, 0,
+ "creating %s on %s", loc->path, subvol->name);
- STACK_WIND (frame, dht_newfile_cbk,
- subvol, subvol->fops->symlink,
- linkname, loc);
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)subvol, subvol,
+ subvol->fops->symlink, linkname, loc, umask,
+ params);
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (link, frame, -1, op_errno,
- NULL, NULL, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (link, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
}
int
-dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
+dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
- xlator_t *cached_subvol = NULL;
- xlator_t *hashed_subvol = NULL;
- int ret = -1;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
-
- cached_subvol = dht_subvol_get_cached (this, loc->inode);
- if (!cached_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- hashed_subvol = dht_subvol_get_hashed (this, loc);
- if (!hashed_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- if (hashed_subvol != cached_subvol) {
- STACK_WIND (frame, dht_unlink_linkfile_cbk,
- hashed_subvol, hashed_subvol->fops->unlink, loc);
- } else {
- STACK_WIND (frame, dht_unlink_cbk,
- cached_subvol, cached_subvol->fops->unlink, loc);
+ xlator_t *cached_subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_UNLINK);
+ if (!local) {
+ op_errno = ENOMEM;
+
+ goto err;
}
- return 0;
+ cached_subvol = local->cached_subvol;
+ if (!cached_subvol) {
+ gf_msg_debug (this->name, 0,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->flags = xflag;
+ STACK_WIND (frame, dht_unlink_cbk,
+ cached_subvol, cached_subvol->fops->unlink, loc,
+ xflag, xdata);
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ return 0;
}
-
int
dht_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
+ int op_ret, int op_errno,
inode_t *inode, struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- call_frame_t *prev = NULL;
- dht_layout_t *layout = NULL;
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ int ret = -1;
+ gf_boolean_t stbuf_merged = _gf_false;
+ xlator_t *subvol = NULL;
- prev = cookie;
- local = frame->local;
+ local = frame->local;
- if (op_ret == -1)
+ if (op_ret == -1) {
+ /* No continuation on DHT inode missing errors, as we should
+ * then have a good stbuf that states P2 happened. We would
+ * get inode missing if, the file completed migrated between
+ * the lookup and the link call */
goto out;
+ }
- layout = dht_layout_for_subvol (this, prev->this);
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no pre-set layout for subvolume %s",
- prev->this->name);
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
+ /* Update parent on success, even if P1/2 checks are positve.
+ * The second call on success will further update the parent */
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ preparent, 0);
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ postparent, 1);
+ }
- stbuf->ia_ino = local->loc.inode->ino;
+ /* Update linkto attrs, if this is the first call and non-P2,
+ * if we detect P2 then we need to trust the attrs from the
+ * second call, not the first */
+ if (local->linked == _gf_true &&
+ ((local->call_cnt == 1 && !IS_DHT_MIGRATION_PHASE2 (stbuf))
+ || (local->call_cnt != 1 &&
+ IS_DHT_MIGRATION_PHASE2 (&local->stbuf)))) {
+ dht_iatt_merge (this, &local->stbuf, stbuf, NULL);
+ stbuf_merged = _gf_true;
+ dht_linkfile_attr_heal (frame, this);
+ }
- preparent->ia_ino = local->loc2.parent->ino;
- postparent->ia_ino = local->loc2.parent->ino;
+ /* No further P1/2 checks if we are in the second iteration of
+ * the call */
+ if (local->call_cnt != 1) {
+ goto out;
+ } else {
+ /* Preserve the return values, in case the migration decides
+ * to recreate the link on the same subvol that the current
+ * hased for the link was created on. */
+ dht_iatt_merge (this, &local->preparent,
+ preparent, NULL);
+ dht_iatt_merge (this, &local->postparent,
+ postparent, NULL);
+ if (!stbuf_merged) {
+ dht_iatt_merge (this, &local->stbuf,
+ stbuf, NULL);
+ stbuf_merged = _gf_true;
+ }
- WIPE (preparent);
- WIPE (postparent);
+ local->inode = inode_ref (inode);
+ }
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ local->rebalance.target_op_fn = dht_link2;
+ dht_set_local_rebalance (this, local, stbuf, preparent,
+ postparent, xdata);
+
+ /* Check if the rebalance phase2 is true */
+ if (IS_DHT_MIGRATION_PHASE2 (stbuf)) {
+ ret = dht_inode_ctx_get_mig_info (this, local->loc.inode, NULL,
+ &subvol);
+ if (!subvol) {
+ /* Phase 2 of migration */
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ } else {
+ dht_link2 (this, subvol, frame, 0);
+ return 0;
+ }
+ }
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1 (stbuf)) {
+ ret = dht_inode_ctx_get_mig_info (this, local->loc.inode, NULL,
+ &subvol);
+ if (subvol) {
+ dht_link2 (this, subvol, frame, 0);
+ return 0;
+ }
+ ret = dht_rebalance_in_progress_check (this, frame);
+ if (!ret)
+ return 0;
+ }
out:
- DHT_STACK_UNWIND (link, frame, op_ret, op_errno, inode, stbuf, preparent,
- postparent);
+ DHT_STRIP_PHASE1_FLAGS (stbuf);
- return 0;
+ dht_set_fixed_dir_stat (preparent);
+ dht_set_fixed_dir_stat (postparent);
+ DHT_STACK_UNWIND (link, frame, op_ret, op_errno, inode, stbuf,
+ preparent, postparent, NULL);
+
+ return 0;
}
int
+dht_link2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
+{
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
+
+ local = frame->local;
+ if (!local)
+ goto err;
+
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating (ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ dht_set_fixed_dir_stat (&local->preparent);
+ dht_set_fixed_dir_stat (&local->postparent);
+
+ DHT_STACK_UNWIND (link, frame, local->op_ret, op_errno,
+ local->inode,
+ &local->stbuf, &local->preparent,
+ &local->postparent, NULL);
+ return 0;
+ }
+
+ if (subvol == NULL) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ /* Second call to create link file could result in EEXIST as the
+ * first call created the linkto in the currently
+ * migrating subvol, which could be the new hashed subvol */
+ if (local->link_subvol == subvol) {
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
+ dht_set_fixed_dir_stat (&local->preparent);
+ dht_set_fixed_dir_stat (&local->postparent);
+ DHT_STACK_UNWIND (link, frame, 0, 0, local->inode,
+ &local->stbuf, &local->preparent,
+ &local->postparent, NULL);
+
+ return 0;
+ }
+
+ local->call_cnt = 2;
+
+ STACK_WIND (frame, dht_link_cbk, subvol, subvol->fops->link,
+ &local->loc, &local->loc2, NULL);
+
+ return 0;
+err:
+ DHT_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL);
+
+ return 0;
+}
+
+int
dht_link_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
+ int op_ret, int op_errno,
inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- xlator_t *srcvol = NULL;
-
+ dht_local_t *local = NULL;
+ xlator_t *srcvol = NULL;
- if (op_ret == -1)
- goto err;
+ if (op_ret == -1)
+ goto err;
- local = frame->local;
- srcvol = local->linkfile.srcvol;
+ local = frame->local;
+ srcvol = local->linkfile.srcvol;
- STACK_WIND (frame, dht_link_cbk,
- srcvol, srcvol->fops->link,
- &local->loc, &local->loc2);
+ STACK_WIND (frame, dht_link_cbk, srcvol, srcvol->fops->link,
+ &local->loc, &local->loc2, xdata);
- return 0;
+ return 0;
err:
- DHT_STACK_UNWIND (link, frame, op_ret, op_errno, inode, stbuf, preparent,
- postparent);
+ DHT_STRIP_PHASE1_FLAGS (stbuf);
+ dht_set_fixed_dir_stat (preparent);
+ dht_set_fixed_dir_stat (postparent);
+ DHT_STACK_UNWIND (link, frame, op_ret, op_errno, inode, stbuf, preparent,
+ postparent, NULL);
- return 0;
+ return 0;
}
int
dht_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
-{
- xlator_t *cached_subvol = NULL;
- xlator_t *hashed_subvol = NULL;
- int op_errno = -1;
- int ret = -1;
- dht_local_t *local = NULL;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (oldloc, err);
- VALIDATE_OR_GOTO (newloc, err);
-
- cached_subvol = dht_subvol_get_cached (this, oldloc->inode);
- if (!cached_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", oldloc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- hashed_subvol = dht_subvol_get_hashed (this, newloc);
- if (!hashed_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- newloc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- ret = loc_copy (&local->loc, oldloc);
- if (ret == -1) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- ret = loc_copy (&local->loc2, newloc);
- if (ret == -1) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- if (hashed_subvol != cached_subvol) {
- dht_linkfile_create (frame, dht_link_linkfile_cbk,
- cached_subvol, hashed_subvol, newloc);
- } else {
- STACK_WIND (frame, dht_link_cbk,
- cached_subvol, cached_subvol->fops->link,
- oldloc, newloc);
- }
-
- return 0;
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ xlator_t *cached_subvol = NULL;
+ xlator_t *hashed_subvol = NULL;
+ int op_errno = -1;
+ int ret = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (oldloc, err);
+ VALIDATE_OR_GOTO (newloc, err);
+
+ local = dht_local_init (frame, oldloc, NULL, GF_FOP_LINK);
+ if (!local) {
+ op_errno = ENOMEM;
+
+ goto err;
+ }
+ local->call_cnt = 1;
+
+ cached_subvol = local->cached_subvol;
+ if (!cached_subvol) {
+ gf_msg_debug (this->name, 0,
+ "no cached subvolume for path=%s", oldloc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ hashed_subvol = dht_subvol_get_hashed (this, newloc);
+ if (!hashed_subvol) {
+ gf_msg_debug (this->name, 0,
+ "no subvolume in layout for path=%s",
+ newloc->path);
+ op_errno = EIO;
+ goto err;
+ }
+
+ ret = loc_copy (&local->loc2, newloc);
+ if (ret == -1) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ if (hashed_subvol != cached_subvol) {
+ gf_uuid_copy (local->gfid, oldloc->inode->gfid);
+ dht_linkfile_create (frame, dht_link_linkfile_cbk, this,
+ cached_subvol, hashed_subvol, newloc);
+ } else {
+ STACK_WIND (frame, dht_link_cbk,
+ cached_subvol, cached_subvol->fops->link,
+ oldloc, newloc, xdata);
+ }
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
}
int
dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- fd_t *fd, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
+ int op_ret, int op_errno,
+ fd_t *fd, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- call_frame_t *prev = NULL;
- int ret = -1;
+ call_frame_t *prev = NULL;
+ int ret = -1;
dht_local_t *local = NULL;
- if (op_ret == -1)
- goto out;
-
local = frame->local;
if (!local) {
op_ret = -1;
@@ -2869,256 +6371,844 @@ dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- prev = cookie;
+ if (op_ret == -1)
+ goto out;
+
+ prev = cookie;
- dht_itransform (this, prev->this, stbuf->ia_ino, &stbuf->ia_ino);
if (local->loc.parent) {
- preparent->ia_ino = local->loc.parent->ino;
- postparent->ia_ino = local->loc.parent->ino;
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ preparent, 0);
- WIPE (preparent);
- WIPE (postparent);
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ postparent, 1);
}
ret = dht_layout_preset (this, prev->this, inode);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not set preset layout for subvol %s",
- prev->this->name);
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
+ if (ret != 0) {
+ gf_msg_debug (this->name, 0,
+ "could not set preset layout for subvol %s",
+ prev->this->name);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ local->op_errno = op_errno;
+ if (local->linked == _gf_true) {
+ local->stbuf = *stbuf;
+ dht_linkfile_attr_heal (frame, this);
+ }
out:
- DHT_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, stbuf, preparent,
- postparent);
- return 0;
-}
+ DHT_STRIP_PHASE1_FLAGS (stbuf);
+ dht_set_fixed_dir_stat (preparent);
+ dht_set_fixed_dir_stat (postparent);
+
+ if (local && local->lock.locks) {
+ /* store op_errno for failure case*/
+ local->op_errno = op_errno;
+ local->refresh_layout_unlock (frame, this, op_ret, 1);
+
+ if (op_ret == 0) {
+ DHT_STACK_UNWIND (create, frame, op_ret, op_errno, fd,
+ inode, stbuf, preparent, postparent,
+ xdata);
+ }
+ } else {
+ DHT_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode,
+ stbuf, preparent, postparent, xdata);
+ }
+ return 0;
+}
int
dht_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- xlator_t *cached_subvol = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *cached_subvol = NULL;
+ dht_conf_t *conf = NULL;
- if (op_ret == -1)
+ local = frame->local;
+ if (!local) {
+ op_errno = EINVAL;
goto err;
+ }
- local = frame->local;
- cached_subvol = local->cached_subvol;
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ goto err;
+ }
+
+ conf = this->private;
+ if (!conf) {
+ local->op_errno = EINVAL;
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ cached_subvol = local->cached_subvol;
+
+ if (local->params) {
+ dict_del (local->params, conf->link_xattr_name);
+ dict_del (local->params, GLUSTERFS_INTERNAL_FOP_KEY);
+ }
STACK_WIND (frame, dht_create_cbk,
cached_subvol, cached_subvol->fops->create,
- &local->loc, local->flags, local->mode, local->fd);
+ &local->loc, local->flags, local->mode,
+ local->umask, local->fd, local->params);
+
+ return 0;
+err:
+ if (local && local->lock.locks) {
+ local->refresh_layout_unlock (frame, this, -1, 1);
+ } else {
+ DHT_STACK_UNWIND (create, frame, -1,
+ op_errno, NULL, NULL, NULL,
+ NULL, NULL, NULL);
+ }
+ return 0;
+}
+
+int
+dht_create_wind_to_avail_subvol (call_frame_t *frame, xlator_t *this,
+ xlator_t *subvol, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *params)
+{
+ dht_local_t *local = NULL;
+ xlator_t *avail_subvol = NULL;
+
+ local = frame->local;
+
+ if (!dht_is_subvol_filled (this, subvol)) {
+ gf_msg_debug (this->name, 0,
+ "creating %s on %s", loc->path,
+ subvol->name);
+
+ STACK_WIND (frame, dht_create_cbk,
+ subvol, subvol->fops->create,
+ loc, flags, mode, umask, fd, params);
+
+ } else {
+ avail_subvol = dht_free_disk_available_subvol (this, subvol, local);
+
+ if (avail_subvol != subvol) {
+ local->params = dict_ref (params);
+ local->flags = flags;
+ local->mode = mode;
+ local->umask = umask;
+ local->cached_subvol = avail_subvol;
+ local->hashed_subvol = subvol;
+
+ gf_msg_debug (this->name, 0,
+ "creating %s on %s (link at %s)", loc->path,
+ avail_subvol->name, subvol->name);
+
+ dht_linkfile_create (frame, dht_create_linkfile_create_cbk,
+ this, avail_subvol, subvol, loc);
+
+ goto out;
+ }
+
+ gf_msg_debug (this->name, 0,
+ "creating %s on %s", loc->path, subvol->name);
+
+ STACK_WIND (frame, dht_create_cbk,
+ subvol, subvol->fops->create,
+ loc, flags, mode, umask, fd, params);
+ }
+out:
+ return 0;
+}
+
+int
+dht_build_parent_loc (xlator_t *this, loc_t *parent, loc_t *child,
+ int32_t *op_errno)
+{
+ inode_table_t *table = NULL;
+ int ret = -1;
+
+ if (!parent || !child) {
+ if (op_errno)
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ if (child->parent) {
+ parent->inode = inode_ref (child->parent);
+ if (!parent->inode) {
+ if (op_errno)
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ gf_uuid_copy (parent->gfid, child->pargfid);
+
+ ret = 0;
+
+ goto out;
+ } else {
+ if (gf_uuid_is_null (child->pargfid)) {
+ if (op_errno)
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ table = this->itable;
+
+ if (!table) {
+ if (op_errno) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+ }
+
+ parent->inode = inode_find (table, child->pargfid);
+
+ if (!parent->inode) {
+ if (op_errno) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+ }
+
+ gf_uuid_copy (parent->gfid, child->pargfid);
+
+ ret = 0;
+ }
+
+out:
+ return ret;
+}
+
+
+int32_t
+dht_create_do (call_frame_t *frame)
+{
+ dht_local_t *local = NULL;
+ dht_layout_t *refreshed = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ dht_methods_t *methods = NULL;
+
+ local = frame->local;
+
+ this = THIS;
+
+ conf = this->private;
+
+ GF_VALIDATE_OR_GOTO (this->name, conf, err);
+
+ methods = &(conf->methods);
+
+ /* We don't need parent_loc anymore */
+ loc_wipe (&local->loc);
+
+ loc_copy (&local->loc, &local->loc2);
+
+ loc_wipe (&local->loc2);
+
+ refreshed = local->selfheal.refreshed_layout;
+
+ subvol = methods->layout_search (this, refreshed, local->loc.name);
+
+ if (!subvol) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED, "no subvolume in "
+ "layout for path=%s", local->loc.path);
+ local->op_errno = ENOENT;
+ goto err;
+ }
+
+ dht_create_wind_to_avail_subvol (frame, this, subvol, &local->loc,
+ local->flags, local->mode,
+ local->umask, local->fd, local->params);
+ return 0;
+err:
+ local->refresh_layout_unlock (frame, this, -1, 1);
+
+ return 0;
+}
+
+int32_t
+dht_create_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ DHT_STACK_DESTROY (frame);
+ return 0;
+}
+
+int32_t
+dht_create_finish (call_frame_t *frame, xlator_t *this, int op_ret,
+ int invoke_cbk)
+{
+ dht_local_t *local = NULL, *lock_local = NULL;
+ call_frame_t *lock_frame = NULL;
+ int lock_count = 0;
+
+ local = frame->local;
+ lock_count = dht_lock_count (local->lock.locks, local->lock.lk_count);
+ if (lock_count == 0)
+ goto done;
+
+ lock_frame = copy_frame (frame);
+ if (lock_frame == NULL) {
+ goto done;
+ }
+
+ lock_local = dht_local_init (lock_frame, &local->loc, NULL,
+ lock_frame->root->op);
+ if (lock_local == NULL) {
+ goto done;
+ }
+
+ lock_local->lock.locks = local->lock.locks;
+ lock_local->lock.lk_count = local->lock.lk_count;
+
+ local->lock.locks = NULL;
+ local->lock.lk_count = 0;
+
+ dht_unlock_inodelk (lock_frame, lock_local->lock.locks,
+ lock_local->lock.lk_count,
+ dht_create_unlock_cbk);
+ lock_frame = NULL;
+
+done:
+ if (lock_frame != NULL) {
+ DHT_STACK_DESTROY (lock_frame);
+ }
+
+ if (op_ret == 0)
+ return 0;
+
+ DHT_STACK_UNWIND (create, frame, op_ret, local->op_errno, NULL, NULL,
+ NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+dht_create_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (!local) {
+ goto err;
+ }
+
+ if (op_ret < 0) {
+ gf_msg ("DHT", GF_LOG_ERROR, 0, DHT_MSG_INODE_LK_ERROR,
+ "Create lock failed for file: %s", local->loc2.name);
+
+ local->op_errno = op_errno;
+
+ goto err;
+ }
+
+ local->refresh_layout_unlock = dht_create_finish;
+
+ local->refresh_layout_done = dht_create_do;
+
+ dht_refresh_layout (frame);
+
+ return 0;
+err:
+ dht_create_finish (frame, this, -1, 0);
+ return 0;
+}
+
+int32_t
+dht_create_lock (call_frame_t *frame, xlator_t *subvol)
+{
+ dht_local_t *local = NULL;
+ int count = 1, ret = -1;
+ dht_lock_t **lk_array = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO (frame->this->name, frame->local, err);
+
+ local = frame->local;
+
+ lk_array = GF_CALLOC (count, sizeof (*lk_array), gf_common_mt_char);
+
+ if (lk_array == NULL)
+ goto err;
+
+ lk_array[0] = dht_lock_new (frame->this, subvol, &local->loc, F_RDLCK,
+ DHT_LAYOUT_HEAL_DOMAIN);
+
+ if (lk_array[0] == NULL)
+ goto err;
+
+ local->lock.locks = lk_array;
+ local->lock.lk_count = count;
+
+ ret = dht_blocking_inodelk (frame, lk_array, count,
+ IGNORE_ENOENT_ESTALE, dht_create_lock_cbk);
+
+ if (ret < 0) {
+ local->lock.locks = NULL;
+ local->lock.lk_count = 0;
+ goto err;
+ }
return 0;
- err:
- DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
- return 0;
+err:
+ if (lk_array != NULL) {
+ dht_lock_array_free (lk_array, count);
+ GF_FREE (lk_array);
+ }
+
+ return -1;
}
int
dht_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode, fd_t *fd)
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *params)
{
- int op_errno = -1;
- int ret = -1;
- xlator_t *subvol = NULL;
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- xlator_t *avail_subvol = NULL;
+ int op_errno = -1;
+ xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ int i = 0;
+ dht_conf_t *conf = NULL;
+ int ret = 0;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
- conf = this->private;
+ conf = this->private;
dht_get_du_info (frame, this, loc);
- local = dht_local_init (frame);
- if (!local) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- subvol = dht_subvol_get_hashed (this, loc);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = ENOENT;
- goto err;
- }
-
- ret = loc_dup (loc, &local->loc);
- if (ret == -1) {
+ local = dht_local_init (frame, loc, fd, GF_FOP_CREATE);
+ if (!local) {
op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
goto err;
}
- if (!dht_is_subvol_filled (this, subvol)) {
- gf_log (this->name, GF_LOG_TRACE,
- "creating %s on %s", loc->path, subvol->name);
+ if (dht_filter_loc_subvol_key (this, loc, &local->loc,
+ &subvol)) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_SUBVOL_INFO,
+ "creating %s on %s (got create on %s)",
+ local->loc.path, subvol->name, loc->path);
STACK_WIND (frame, dht_create_cbk,
subvol, subvol->fops->create,
- loc, flags, mode, fd);
- } else {
- /* Choose the minimum filled volume, and create the
- files there */
- /* TODO */
- avail_subvol = dht_free_disk_available_subvol (this, subvol);
- if (avail_subvol != subvol) {
- local->fd = fd_ref (fd);
+ &local->loc, flags, mode, umask, fd, params);
+ goto done;
+ }
+
+ subvol = dht_subvol_get_hashed (this, loc);
+ if (!subvol) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "no subvolume in layout for path=%s",
+ loc->path);
+
+ op_errno = EIO;
+ goto err;
+ }
+
+ /* Post remove-brick, the client layout may not be in sync with
+ * disk layout because of lack of lookup. Hence,a create call
+ * may fall on the decommissioned brick. Hence, if the
+ * hashed_subvol is part of decommissioned bricks list, do a
+ * lookup on parent dir. If a fix-layout is already done by the
+ * remove-brick process, the parent directory layout will be in
+ * sync with that of the disk. If fix-layout is still ending
+ * on the parent directory, we can let the file get created on
+ * the decommissioned brick which will be eventually migrated to
+ * non-decommissioned brick based on the new layout.
+ */
+
+ if (conf->decommission_subvols_cnt) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->decommissioned_bricks[i] &&
+ conf->decommissioned_bricks[i] == subvol) {
+
+ gf_msg_debug (this->name, 0, "hashed subvol:%s is "
+ "part of decommission brick list for "
+ "file: %s", subvol->name, loc->path);
+
+ /* dht_refresh_layout needs directory info in
+ * local->loc. Hence, storing the parent_loc in
+ * local->loc and storing the create context in
+ * local->loc2. We will restore this information
+ * in dht_creation do */
+
+ ret = loc_copy (&local->loc2, &local->loc);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ DHT_MSG_NO_MEMORY,
+ "loc_copy failed %s", loc->path);
+
+ goto err;
+ }
+
+ local->params = dict_ref (params);
local->flags = flags;
local->mode = mode;
+ local->umask = umask;
- local->cached_subvol = avail_subvol;
- local->hashed_subvol = subvol;
- gf_log (this->name, GF_LOG_TRACE,
- "creating %s on %s (link at %s)", loc->path,
- avail_subvol->name, subvol->name);
- dht_linkfile_create (frame,
- dht_create_linkfile_create_cbk,
- avail_subvol, subvol, loc);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "creating %s on %s", loc->path, subvol->name);
- STACK_WIND (frame, dht_create_cbk,
- subvol, subvol->fops->create,
- loc, flags, mode, fd);
-
- }
+ loc_wipe (&local->loc);
+
+ ret = dht_build_parent_loc (this, &local->loc, loc,
+ &op_errno);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ DHT_MSG_NO_MEMORY,
+ "parent loc build failed");
+ goto err;
+ }
+
+ ret = dht_create_lock (frame, subvol);
+
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_INODE_LK_ERROR,
+ "locking parent failed");
+ goto err;
+ }
+
+ goto done;
+ }
+ }
}
- return 0;
+
+ dht_create_wind_to_avail_subvol (frame, this, subvol, loc, flags, mode,
+ umask, fd, params);
+done:
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
- return 0;
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL, NULL);
+
+ return 0;
}
int
dht_mkdir_selfheal_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ local = frame->local;
+ layout = local->selfheal.layout;
- local = frame->local;
- layout = local->selfheal.layout;
+ dht_set_fixed_dir_stat (&local->preparent);
+ dht_set_fixed_dir_stat (&local->postparent);
- if (op_ret == 0) {
+ if (op_ret == 0) {
dht_layout_set (this, local->inode, layout);
- local->stbuf.ia_ino = local->ia_ino;
- local->stbuf.ia_gen = local->ia_gen;
+
+ dht_inode_ctx_time_update (local->inode, this,
+ &local->stbuf, 1);
if (local->loc.parent) {
- local->preparent.ia_ino = local->loc.parent->ino;
- local->postparent.ia_ino = local->loc.parent->ino;
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->preparent, 0);
- WIPE (&local->preparent);
- WIPE (&local->postparent);
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
}
- }
+ }
- DHT_STACK_UNWIND (mkdir, frame, op_ret, op_errno,
- local->inode, &local->stbuf, &local->preparent,
- &local->postparent);
+ DHT_STACK_UNWIND (mkdir, frame, op_ret, op_errno,
+ local->inode, &local->stbuf, &local->preparent,
+ &local->postparent, NULL);
- return 0;
+ return 0;
}
int
dht_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- int ret = -1;
- int subvol_filled = 0;
- call_frame_t *prev = NULL;
- dht_layout_t *layout = NULL;
- dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ int ret = -1;
+ gf_boolean_t subvol_filled = _gf_false;
+ gf_boolean_t dir_exists = _gf_false;
+ call_frame_t *prev = NULL;
+ dht_layout_t *layout = NULL;
- conf = this->private;
- local = frame->local;
- prev = cookie;
- layout = local->layout;
+ local = frame->local;
+ prev = cookie;
+ layout = local->layout;
subvol_filled = dht_is_subvol_filled (this, prev->this);
- LOCK (&frame->lock);
- {
+ LOCK (&frame->lock);
+ {
if (subvol_filled && (op_ret != -1)) {
ret = dht_layout_merge (this, layout, prev->this,
-1, ENOSPC, NULL);
} else {
+ if (op_ret == -1 && op_errno == EEXIST) {
+ /* Very likely just a race between mkdir and
+ self-heal (from lookup of a concurrent mkdir
+ attempt).
+ Ignore error for now. layout setting will
+ anyways fail if this was a different (old)
+ pre-existing different directory.
+ */
+ op_ret = 0;
+ dir_exists = _gf_true;
+ }
ret = dht_layout_merge (this, layout, prev->this,
op_ret, op_errno, NULL);
}
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LAYOUT_MERGE_FAILED,
+ "%s: failed to merge layouts for subvol %s",
+ local->loc.path, prev->this->name);
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ goto unlock;
+ }
+
+ if (dir_exists)
+ goto unlock;
- if (op_ret == -1) {
- local->op_errno = op_errno;
- goto unlock;
- }
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
dht_iatt_merge (this, &local->preparent, preparent, prev->this);
dht_iatt_merge (this, &local->postparent, postparent,
prev->this);
+ }
+unlock:
+ UNLOCK (&frame->lock);
- if (prev->this == dht_first_up_subvol (this)) {
- local->ia_ino = local->stbuf.ia_ino;
- local->ia_gen = local->stbuf.ia_gen;
- }
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ dht_selfheal_new_directory (frame, dht_mkdir_selfheal_cbk,
+ layout);
+ }
- }
-unlock:
- UNLOCK (&frame->lock);
+ return 0;
+}
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- dht_selfheal_new_directory (frame, dht_mkdir_selfheal_cbk,
- layout);
- }
+int
+dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata);
+
+int
+dht_mkdir_helper (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *params)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int op_errno = -1, ret = -1;
+ xlator_t *hashed_subvol = NULL;
+ int32_t *parent_disk_layout = NULL;
+ dht_layout_t *parent_layout = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+ VALIDATE_OR_GOTO (this->private, err);
+
+ gf_uuid_unparse (loc->parent->gfid, pgfid);
+
+ conf = this->private;
+ local = frame->local;
+
+ if (local->op_ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "mkdir (%s/%s) (path: %s): refreshing parent layout "
+ "failed.", pgfid, loc->name,
+ loc->path);
+
+ op_errno = local->op_errno;
+ goto err;
+ }
+
+ local->op_ret = -1;
+
+ hashed_subvol = dht_subvol_get_hashed (this, loc);
+ if (hashed_subvol == NULL) {
+ gf_msg_debug (this->name, 0,
+ "mkdir (%s/%s) (path: %s): hashed subvol not "
+ "found", pgfid, loc->name, loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ local->hashed_subvol = hashed_subvol;
+
+ parent_layout = dht_layout_get (this, loc->parent);
+
+ ret = dht_disk_layout_extract_for_subvol (this, parent_layout,
+ hashed_subvol,
+ &parent_disk_layout);
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING, EIO,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "mkdir (%s/%s) (path: %s): "
+ "extracting in-memory layout of parent failed. ",
+ pgfid, loc->name, loc->path);
+ goto err;
+ }
+
+ if (memcmp (local->parent_disk_layout, parent_disk_layout,
+ sizeof (local->parent_disk_layout)) == 0) {
+ gf_msg (this->name, GF_LOG_WARNING, EIO,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "mkdir (%s/%s) (path: %s): loop detected. "
+ "parent layout didn't change even though "
+ "previous attempt of mkdir failed because of "
+ "in-memory layout not matching with that on disk.",
+ pgfid, loc->name, loc->path);
+ op_errno = EIO;
+ goto err;
+ }
+
+ memcpy ((void *)local->parent_disk_layout, (void *)parent_disk_layout,
+ sizeof (local->parent_disk_layout));
+
+ dht_layout_unref (this, parent_layout);
+ parent_layout = NULL;
+
+ ret = dict_set_str (params, GF_PREOP_PARENT_KEY, conf->xattr_name);
+ if (ret < 0) {
+ local->op_errno = -ret;
+ gf_msg (this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "mkdir (%s/%s) (path: %s): "
+ "setting %s key in params dictionary failed. ",
+ pgfid, loc->name, loc->path, GF_PREOP_PARENT_KEY);
+ goto err;
+ }
+
+ ret = dict_set_bin (params, conf->xattr_name, parent_disk_layout,
+ 4 * 4);
+ if (ret < 0) {
+ local->op_errno = -ret;
+ gf_msg (this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "setting parent-layout in params dictionary failed. "
+ "mkdir (%s/%s) (path: %s)", pgfid, loc->name,
+ loc->path);
+ goto err;
+ }
+
+ parent_disk_layout = NULL;
+
+ STACK_WIND (frame, dht_mkdir_hashed_cbk,
+ hashed_subvol,
+ hashed_subvol->fops->mkdir,
+ loc, mode, umask, params);
+
+ return 0;
+
+err:
+ dht_unlock_parent_layout_during_entry_fop (frame);
+
+ op_errno = local ? local->op_errno : op_errno;
+ DHT_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL);
+
+ if (parent_disk_layout != NULL)
+ GF_FREE (parent_disk_layout);
+
+ if (parent_layout != NULL)
+ dht_layout_unref (this, parent_layout);
return 0;
}
int
-dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
+dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
-{
- dht_local_t *local = NULL;
- int ret = -1;
- call_frame_t *prev = NULL;
- dht_layout_t *layout = NULL;
- dht_conf_t *conf = NULL;
- int i = 0;
- xlator_t *hashed_subvol = NULL;
-
- local = frame->local;
- prev = cookie;
- layout = local->layout;
- conf = this->private;
- hashed_subvol = local->hashed_subvol;
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int ret = -1;
+ call_frame_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+ dht_conf_t *conf = NULL;
+ int i = 0;
+ xlator_t *hashed_subvol = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ gf_boolean_t parent_layout_changed = _gf_false;
+ call_stub_t *stub = NULL;
+
+ VALIDATE_OR_GOTO (this->private, err);
+
+ local = frame->local;
+ prev = cookie;
+ layout = local->layout;
+ conf = this->private;
+ hashed_subvol = local->hashed_subvol;
+
+ gf_uuid_unparse (local->loc.parent->gfid, pgfid);
+
+ if (gf_uuid_is_null (local->loc.gfid) && !op_ret)
+ gf_uuid_copy (local->loc.gfid, stbuf->ia_gfid);
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+
+ parent_layout_changed = (xdata && dict_get (xdata, GF_PREOP_CHECK_FAILED))
+ ? 1 : 0;
+ if (parent_layout_changed) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "mkdir (%s/%s) (path: %s): parent layout "
+ "changed. Attempting a refresh and then a "
+ "retry", pgfid, local->loc.name,
+ local->loc.path);
+
+ stub = fop_mkdir_stub (frame, dht_mkdir_helper,
+ &local->loc, local->mode,
+ local->umask, local->params);
+ if (stub == NULL) {
+ goto err;
+ }
+
+ dht_handle_parent_layout_change (this, stub);
+ stub = NULL;
+
+ return 0;
+ }
+
+ goto err;
+ }
+
+ dht_unlock_parent_layout_during_entry_fop (frame);
+ dict_del (local->params, GF_PREOP_PARENT_KEY);
+ dict_del (local->params, conf->xattr_name);
if (dht_is_subvol_filled (this, hashed_subvol))
ret = dht_layout_merge (this, layout, prev->this,
@@ -3126,197 +7216,581 @@ dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie,
else
ret = dht_layout_merge (this, layout, prev->this,
op_ret, op_errno, NULL);
-
- if (op_ret == -1) {
- local->op_errno = op_errno;
- goto err;
- }
- local->op_ret = 0;
-
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+
+ /* TODO: we may have to return from the function
+ if layout merge fails. For now, lets just log an error */
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LAYOUT_MERGE_FAILED,
+ "%s: failed to merge layouts for subvol %s",
+ local->loc.path, prev->this->name);
+
+ local->op_ret = 0;
+
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
dht_iatt_merge (this, &local->preparent, preparent, prev->this);
dht_iatt_merge (this, &local->postparent, postparent, prev->this);
- local->ia_ino = local->stbuf.ia_ino;
- local->ia_gen = local->stbuf.ia_gen;
-
- local->call_cnt = conf->subvolume_cnt - 1;
-
- if (local->call_cnt == 0) {
- dht_selfheal_directory (frame, dht_mkdir_selfheal_cbk,
- &local->loc, layout);
- }
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->subvolumes[i] == hashed_subvol)
- continue;
- STACK_WIND (frame, dht_mkdir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->mkdir,
- &local->loc, local->mode);
- }
- return 0;
+ local->call_cnt = conf->subvolume_cnt - 1;
+
+ if (gf_uuid_is_null (local->loc.gfid))
+ gf_uuid_copy (local->loc.gfid, stbuf->ia_gfid);
+ if (local->call_cnt == 0) {
+ dht_selfheal_directory (frame, dht_mkdir_selfheal_cbk,
+ &local->loc, layout);
+ }
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == hashed_subvol)
+ continue;
+ STACK_WIND (frame, dht_mkdir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->mkdir, &local->loc,
+ local->mode, local->umask, local->params);
+ }
+ return 0;
err:
- DHT_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ if (local->op_ret != 0)
+ dht_unlock_parent_layout_during_entry_fop (frame);
+
+ DHT_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL);
+ if (stub) {
+ call_stub_destroy (stub);
+ }
+
return 0;
}
int
-dht_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode)
+dht_mkdir_guard_parent_layout_cbk (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, mode_t umask,
+ dict_t *params)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int op_errno = -1;
- int ret = -1;
- xlator_t *hashed_subvol = NULL;
+ dht_local_t *local = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+
+ gf_uuid_unparse (loc->parent->gfid, pgfid);
+
+ if (local->op_ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "mkdir (%s/%s) (path: %s): "
+ "Acquiring lock on parent to guard against "
+ "layout-change failed.", pgfid, loc->name, loc->path);
+ goto err;
+ }
+
+ local->op_ret = -1;
+
+ STACK_WIND (frame, dht_mkdir_hashed_cbk,
+ local->hashed_subvol,
+ local->hashed_subvol->fops->mkdir,
+ loc, mode, umask, params);
+
+ return 0;
+err:
+ DHT_STACK_UNWIND (mkdir, frame, -1, local->op_errno, NULL, NULL, NULL,
+ NULL, NULL);
+
+ return 0;
+}
+int
+dht_mkdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *params)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int op_errno = -1, ret = -1;
+ xlator_t *hashed_subvol = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ call_stub_t *stub = NULL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
VALIDATE_OR_GOTO (loc->inode, err);
VALIDATE_OR_GOTO (loc->path, err);
+ VALIDATE_OR_GOTO (this->private, err);
+
+ gf_uuid_unparse (loc->parent->gfid, pgfid);
- conf = this->private;
+ conf = this->private;
+
+ if (!params || !dict_get (params, "gfid-req")) {
+ op_errno = EPERM;
+ gf_msg_callingfn (this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_GFID_NULL, "mkdir: %s is received "
+ "without gfid-req %p", loc->path, params);
+ goto err;
+ }
dht_get_du_info (frame, this, loc);
- local = dht_local_init (frame);
- if (!local) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
+ local = dht_local_init (frame, loc, NULL, GF_FOP_MKDIR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ hashed_subvol = dht_subvol_get_hashed (this, loc);
+ if (hashed_subvol == NULL) {
+ gf_msg_debug (this->name, 0,
+ "hashed subvol not found for %s",
+ loc->path);
+ local->op_errno = EIO;
+ goto err;
+ }
- hashed_subvol = dht_subvol_get_hashed (this, loc);
- if (hashed_subvol == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "hashed subvol not found for %s",
- loc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- local->hashed_subvol = hashed_subvol;
- local->inode = inode_ref (loc->inode);
- ret = loc_copy (&local->loc, loc);
- local->mode = mode;
-
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- local->layout = dht_layout_new (this, conf->subvolume_cnt);
- if (!local->layout) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- STACK_WIND (frame, dht_mkdir_hashed_cbk,
- hashed_subvol,
- hashed_subvol->fops->mkdir,
- loc, mode);
-
- return 0;
+ local->hashed_subvol = hashed_subvol;
+ local->mode = mode;
+ local->umask = umask;
+ if (params)
+ local->params = dict_ref (params);
+
+ local->inode = inode_ref (loc->inode);
+
+ local->layout = dht_layout_new (this, conf->subvolume_cnt);
+ if (!local->layout) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ /* set the newly created directory hash to the commit hash
+ * if the configuration option is set. If configuration option
+ * is not set, the older clients may still be connecting to the
+ * volume and hence we need to preserve the 1 in disk[0] part of the
+ * layout xattr */
+ if (conf->lookup_optimize)
+ local->layout->commit_hash = conf->vol_commit_hash;
+ else
+ local->layout->commit_hash = DHT_LAYOUT_HASH_INVALID;
+
+
+ stub = fop_mkdir_stub (frame, dht_mkdir_guard_parent_layout_cbk, loc,
+ mode, umask, params);
+ if (stub == NULL) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "mkdir (%s/%s) (path: %s): "
+ "creating stub failed.", pgfid, loc->name, loc->path);
+ local->op_errno = ENOMEM;
+ goto err;
+ }
+
+ ret = dht_guard_parent_layout_during_entry_fop (this, stub);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "mkdir (%s/%s) (path: %s) cannot wind lock request to "
+ "guard parent layout", pgfid, loc->name, loc->path);
+ goto err;
+ }
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ op_errno = local ? local->op_errno : op_errno;
+ DHT_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL);
- return 0;
+ return 0;
}
int
-dht_rmdir_selfheal_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno)
+dht_rmdir_selfheal_cbk (call_frame_t *heal_frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ dht_local_t *heal_local = NULL;
+ call_frame_t *main_frame = NULL;
- local = frame->local;
+ heal_local = heal_frame->local;
+ main_frame = heal_local->main_frame;
+ local = main_frame->local;
+
+ DHT_STACK_DESTROY (heal_frame);
+ dht_set_fixed_dir_stat (&local->preparent);
+ dht_set_fixed_dir_stat (&local->postparent);
+
+ DHT_STACK_UNWIND (rmdir, main_frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, NULL);
+
+ return 0;
+}
+
+
+int
+dht_rmdir_hashed_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ dht_local_t *heal_local = NULL;
+ call_frame_t *heal_frame = NULL;
+ dht_conf_t *conf = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+ char gfid[GF_UUID_BUF_SIZE] ={0};
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ gf_uuid_unparse(local->loc.gfid, gfid);
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ if (conf->subvolume_cnt != 1) {
+ if (op_errno != ENOENT && op_errno != EACCES
+ && op_errno != ESTALE) {
+ local->need_selfheal = 1;
+ }
+ }
+
+ gf_msg_debug (this->name, op_errno,
+ "rmdir on %s for %s failed "
+ "(gfid = %s)",
+ prev->this->name, local->loc.path,
+ gfid);
+ goto unlock;
+ }
+
+ dht_iatt_merge (this, &local->preparent, preparent, prev->this);
+ dht_iatt_merge (this, &local->postparent, postparent,
+ prev->this);
- if (local->loc.parent) {
- local->preparent.ia_ino = local->loc.parent->ino;
- local->postparent.ia_ino = local->loc.parent->ino;
}
+unlock:
+ UNLOCK (&frame->lock);
- DHT_STACK_UNWIND (rmdir, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent);
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ if (local->need_selfheal) {
+ dht_rmdir_unlock (frame, this);
+ local->layout =
+ dht_layout_get (this, local->loc.inode);
+
+ /* TODO: neater interface needed below */
+ local->stbuf.ia_type = local->loc.inode->ia_type;
+
+ gf_uuid_copy (local->gfid, local->loc.inode->gfid);
+
+ /* Use a different frame or else the rmdir op_ret is
+ * overwritten by that of the selfheal */
+
+ heal_frame = copy_frame (frame);
+
+ if (heal_frame == NULL) {
+ goto err;
+ }
+
+ heal_local = dht_local_init (heal_frame,
+ &local->loc,
+ NULL, 0);
+ if (!heal_local) {
+ DHT_STACK_DESTROY (heal_frame);
+ goto err;
+ }
+
+ heal_local->inode = inode_ref (local->loc.inode);
+ heal_local->main_frame = frame;
+ gf_uuid_copy (heal_local->gfid, local->loc.inode->gfid);
+
+ dht_selfheal_restore (heal_frame,
+ dht_rmdir_selfheal_cbk,
+ &heal_local->loc,
+ heal_local->layout);
+ return 0;
+ } else {
+
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent,
+ this,
+ &local->preparent,
+ 0);
+
+ dht_inode_ctx_time_update (local->loc.parent,
+ this,
+ &local->postparent,
+ 1);
+ }
+
+ dht_set_fixed_dir_stat (&local->preparent);
+ dht_set_fixed_dir_stat (&local->postparent);
+
+ dht_rmdir_unlock (frame, this);
+ DHT_STACK_UNWIND (rmdir, frame, local->op_ret,
+ local->op_errno, &local->preparent,
+ &local->postparent, NULL);
+ }
+ }
+
+ return 0;
+
+err:
+ DHT_STACK_UNWIND (rmdir, frame, local->op_ret,
+ local->op_errno, NULL, NULL, NULL);
+ return 0;
- return 0;
}
int
dht_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ int op_ret, int op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+ int done = 0;
+ char gfid[GF_UUID_BUF_SIZE] ={0};
+ dht_local_t *heal_local = NULL;
+ call_frame_t *heal_frame = NULL;
+ int ret = -1;
+
+ local = frame->local;
+ prev = cookie;
- local = frame->local;
- prev = cookie;
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- local->op_ret = -1;
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ if ((op_errno != ENOENT) && (op_errno != ESTALE)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
- if (op_errno != ENOENT)
- local->need_selfheal = 1;
+ if (op_errno != EACCES)
+ local->need_selfheal = 1;
+ }
+
+ gf_uuid_unparse(local->loc.gfid, gfid);
- gf_log (this->name, GF_LOG_DEBUG,
- "rmdir on %s for %s failed (%s)",
- prev->this->name, local->loc.path,
- strerror (op_errno));
- goto unlock;
- }
+ gf_msg_debug (this->name, op_errno,
+ "rmdir on %s for %s failed."
+ "(gfid = %s)",
+ prev->this->name, local->loc.path,
+ gfid);
+ goto unlock;
+ }
+ /* Track if rmdir succeeded on atleast one subvol*/
+ local->fop_succeeded = 1;
dht_iatt_merge (this, &local->preparent, preparent, prev->this);
dht_iatt_merge (this, &local->postparent, postparent,
prev->this);
- }
+ }
unlock:
- UNLOCK (&frame->lock);
+ UNLOCK (&frame->lock);
+
+
+ this_call_cnt = dht_frame_return (frame);
+
+ /* if local->hashed_subvol, we are yet to wind to hashed_subvol. */
+ if (local->hashed_subvol && (this_call_cnt == 1)) {
+ done = 1;
+ } else if (!local->hashed_subvol && !this_call_cnt) {
+ done = 1;
+ }
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- if (local->need_selfheal) {
+ if (done) {
+ if (local->need_selfheal && local->fop_succeeded) {
+ dht_rmdir_unlock (frame, this);
local->layout =
dht_layout_get (this, local->loc.inode);
- /* TODO: neater interface needed below */
- local->stbuf.ia_type = local->loc.inode->ia_type;
+ /* TODO: neater interface needed below */
+ local->stbuf.ia_type = local->loc.inode->ia_type;
+
+ gf_uuid_copy (local->gfid, local->loc.inode->gfid);
+ heal_frame = copy_frame (frame);
+ if (heal_frame == NULL) {
+ goto err;
+ }
+
+ heal_local = dht_local_init (heal_frame, &local->loc,
+ NULL, 0);
+ if (!heal_local) {
+ DHT_STACK_DESTROY (heal_frame);
+ goto err;
+ }
+
+ heal_local->inode = inode_ref (local->loc.inode);
+ heal_local->main_frame = frame;
+ gf_uuid_copy (heal_local->gfid, local->loc.inode->gfid);
+ ret = dht_selfheal_restore (heal_frame,
+ dht_rmdir_selfheal_cbk,
+ &heal_local->loc,
+ heal_local->layout);
+ if (ret) {
+ DHT_STACK_DESTROY (heal_frame);
+ goto err;
+ }
+
+ } else if (this_call_cnt) {
+ /* If non-hashed subvol's have responded, proceed */
+ if (local->op_ret == 0) {
+ /* Delete the dir from the hashed subvol if:
+ * The fop succeeded on at least one subvol
+ * and did not fail on any
+ * or
+ * The fop failed with ENOENT/ESTALE on
+ * all subvols */
+
+ STACK_WIND (frame, dht_rmdir_hashed_subvol_cbk,
+ local->hashed_subvol,
+ local->hashed_subvol->fops->rmdir,
+ &local->loc, local->flags, NULL);
+ } else {
+ /* hashed-subvol was non-NULL and rmdir failed on
+ * all non hashed-subvols. Unwind rmdir with
+ * local->op_ret and local->op_errno. */
+ dht_rmdir_unlock (frame, this);
+ DHT_STACK_UNWIND (rmdir, frame, local->op_ret,
+ local->op_errno, &local->preparent,
+ &local->postparent, NULL);
+
+ return 0;
+
+ }
+ } else if (!this_call_cnt) {
+ /* All subvol's have responded, proceed */
- dht_selfheal_restore (frame, dht_rmdir_selfheal_cbk,
- &local->loc, local->layout);
- } else {
if (local->loc.parent) {
- local->preparent.ia_ino =
- local->loc.parent->ino;
- local->postparent.ia_ino =
- local->loc.parent->ino;
- WIPE (&local->preparent);
- WIPE (&local->postparent);
+ dht_inode_ctx_time_update (local->loc.parent,
+ this,
+ &local->preparent,
+ 0);
+
+ dht_inode_ctx_time_update (local->loc.parent,
+ this,
+ &local->postparent,
+ 1);
+
}
- DHT_STACK_UNWIND (rmdir, frame, local->op_ret,
- local->op_errno, &local->preparent,
- &local->postparent);
- }
- }
+ dht_set_fixed_dir_stat (&local->preparent);
+ dht_set_fixed_dir_stat (&local->postparent);
+
+ dht_rmdir_unlock (frame, this);
+ DHT_STACK_UNWIND (rmdir, frame, local->op_ret,
+ local->op_errno, &local->preparent,
+ &local->postparent, NULL);
+ }
+ }
+
+ return 0;
+
+err:
+ DHT_STACK_UNWIND (rmdir, frame, -1, local->op_errno, NULL, NULL, NULL);
+ return 0;
+
+}
+
+
+int
+dht_rmdir_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ DHT_STACK_DESTROY (frame);
+ return 0;
+}
+
+
+int
+dht_rmdir_unlock (call_frame_t *frame, xlator_t *this)
+{
+ dht_local_t *local = NULL, *lock_local = NULL;
+ call_frame_t *lock_frame = NULL;
+ int lock_count = 0;
+
+ local = frame->local;
+ lock_count = dht_lock_count (local->lock.locks, local->lock.lk_count);
+
+ if (lock_count == 0)
+ goto done;
+
+ lock_frame = copy_frame (frame);
+ if (lock_frame == NULL)
+ goto done;
+
+ lock_local = dht_local_init (lock_frame, &local->loc, NULL,
+ lock_frame->root->op);
+ if (lock_local == NULL)
+ goto done;
+
+ lock_local->lock.locks = local->lock.locks;
+ lock_local->lock.lk_count = local->lock.lk_count;
+
+ local->lock.locks = NULL;
+ local->lock.lk_count = 0;
+ dht_unlock_inodelk (lock_frame, lock_local->lock.locks,
+ lock_local->lock.lk_count,
+ dht_rmdir_unlock_cbk);
+ lock_frame = NULL;
+
+done:
+ if (lock_frame != NULL) {
+ DHT_STACK_DESTROY (lock_frame);
+ }
+
+ return 0;
+}
+
+
+int
+dht_rmdir_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int i = 0;
+
+ VALIDATE_OR_GOTO (this->private, err);
+
+ conf = this->private;
+ local = frame->local;
+
+ if (op_ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_INODE_LK_ERROR,
+ "acquiring inodelk failed rmdir for %s)",
+ local->loc.path);
+
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto err;
+ }
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (local->hashed_subvol &&
+ (local->hashed_subvol == conf->subvolumes[i]))
+ continue;
+
+ STACK_WIND (frame, dht_rmdir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->rmdir,
+ &local->loc, local->flags, NULL);
+ }
+
+ return 0;
+
+err:
+ /* No harm in calling an extra rmdir unlock */
+ dht_rmdir_unlock (frame, this);
+ DHT_STACK_UNWIND (rmdir, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, NULL);
return 0;
}
@@ -3325,38 +7799,103 @@ unlock:
int
dht_rmdir_do (call_frame_t *frame, xlator_t *this)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int i = 0;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ dht_lock_t **lk_array = NULL;
+ int i = 0, ret = -1;
+ int count = 1;
+ xlator_t *hashed_subvol = NULL;
+ char gfid[GF_UUID_BUF_SIZE] ={0};
+
+ VALIDATE_OR_GOTO (this->private, err);
+
+ conf = this->private;
+ local = frame->local;
+
+ if (local->op_ret == -1)
+ goto err;
+
+ local->call_cnt = conf->subvolume_cnt;
+
+ /* first remove from non-hashed_subvol */
+ hashed_subvol = dht_subvol_get_hashed (this, &local->loc);
+
+ if (!hashed_subvol) {
+ gf_uuid_unparse(local->loc.gfid, gfid);
+
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "Failed to get hashed subvol for %s (gfid = %s)",
+ local->loc.path, gfid);
+ } else {
+ local->hashed_subvol = hashed_subvol;
+ }
+
+ /* When DHT has only 1 child */
+ if (conf->subvolume_cnt == 1) {
+ STACK_WIND (frame, dht_rmdir_hashed_subvol_cbk,
+ conf->subvolumes[0],
+ conf->subvolumes[0]->fops->rmdir,
+ &local->loc, local->flags, NULL);
+ return 0;
+ }
+
+ count = conf->subvolume_cnt;
- conf = this->private;
- local = frame->local;
+ lk_array = GF_CALLOC (count, sizeof (*lk_array), gf_common_mt_char);
+ if (lk_array == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
+ }
- if (local->op_ret == -1)
- goto err;
+ for (i = 0; i < count; i++) {
+ lk_array[i] = dht_lock_new (frame->this,
+ conf->subvolumes[i],
+ &local->loc, F_WRLCK,
+ DHT_LAYOUT_HEAL_DOMAIN);
+ if (lk_array[i] == NULL) {
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+ goto err;
+ }
+ }
- local->call_cnt = conf->subvolume_cnt;
+ local->lock.locks = lk_array;
+ local->lock.lk_count = count;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (frame, dht_rmdir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->rmdir,
- &local->loc);
- }
+ ret = dht_blocking_inodelk (frame, lk_array, count,
+ IGNORE_ENOENT_ESTALE,
+ dht_rmdir_lock_cbk);
+ if (ret < 0) {
+ local->lock.locks = NULL;
+ local->lock.lk_count = 0;
+ local->op_ret = -1;
+ local->op_errno = errno ? errno : EINVAL;
+ goto err;
+ }
- return 0;
+ return 0;
err:
- DHT_STACK_UNWIND (rmdir, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent);
- return 0;
+ dht_set_fixed_dir_stat (&local->preparent);
+ dht_set_fixed_dir_stat (&local->postparent);
+
+ if (lk_array != NULL) {
+ dht_lock_array_free (lk_array, count);
+ GF_FREE (lk_array);
+ }
+
+ DHT_STACK_UNWIND (rmdir, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, NULL);
+ return 0;
}
int
dht_rmdir_linkfile_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
@@ -3364,6 +7903,8 @@ dht_rmdir_linkfile_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this
call_frame_t *main_frame = NULL;
dht_local_t *main_local = NULL;
int this_call_cnt = 0;
+ char gfid[GF_UUID_BUF_SIZE] ={0};
+
local = frame->local;
prev = cookie;
@@ -3372,16 +7913,18 @@ dht_rmdir_linkfile_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this
main_frame = local->main_frame;
main_local = main_frame->local;
+ gf_uuid_unparse(local->loc.gfid, gfid);
+
if (op_ret == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "unlinked linkfile %s on %s",
- local->loc.path, src->name);
+ gf_msg_trace (this->name, 0,
+ "Unlinked linkfile %s on %s, gfid = %s",
+ local->loc.path, src->name, gfid);
} else {
main_local->op_ret = -1;
main_local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "unlink of %s on %s failed (%s)",
- local->loc.path, src->name, strerror (op_errno));
+ gf_msg_debug (this->name, op_errno,
+ "Unlink of %s on %s failed. (gfid = %s)",
+ local->loc.path, src->name, gfid);
}
this_call_cnt = dht_frame_return (main_frame);
@@ -3404,6 +7947,8 @@ dht_rmdir_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_frame_t *main_frame = NULL;
dht_local_t *main_local = NULL;
int this_call_cnt = 0;
+ dht_conf_t *conf = this->private;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
local = frame->local;
prev = cookie;
@@ -3415,18 +7960,92 @@ dht_rmdir_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret != 0)
goto err;
- if (check_is_linkfile (inode, stbuf, xattr) == 0) {
+ if (!check_is_linkfile (inode, stbuf, xattr, conf->link_xattr_name)) {
main_local->op_ret = -1;
main_local->op_errno = ENOTEMPTY;
- gf_log (this->name, GF_LOG_WARNING,
- "%s on %s found to be not a linkfile (type=0%o)",
- local->loc.path, src->name, stbuf->ia_type);
+ gf_uuid_unparse(local->loc.gfid, gfid);
+
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_NOT_LINK_FILE_ERROR,
+ "%s on %s is not a linkfile (type=0%o, gfid = %s)",
+ local->loc.path, src->name, stbuf->ia_type, gfid);
goto err;
}
STACK_WIND (frame, dht_rmdir_linkfile_unlink_cbk,
- src, src->fops->unlink, &local->loc);
+ src, src->fops->unlink, &local->loc, 0, NULL);
+ return 0;
+err:
+
+ this_call_cnt = dht_frame_return (main_frame);
+ if (is_last_call (this_call_cnt))
+ dht_rmdir_do (main_frame, this);
+
+ DHT_STACK_DESTROY (frame);
+ return 0;
+}
+
+
+int
+dht_rmdir_cached_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr,
+ struct iatt *parent)
+{
+ dht_local_t *local = NULL;
+ xlator_t *src = NULL;
+ call_frame_t *main_frame = NULL;
+ dht_local_t *main_local = NULL;
+ int this_call_cnt = 0;
+ dht_conf_t *conf = this->private;
+ dict_t *xattrs = NULL;
+ int ret = 0;
+
+ local = frame->local;
+ src = local->hashed_subvol;
+
+ main_frame = local->main_frame;
+ main_local = main_frame->local;
+
+ if (op_ret == 0) {
+ main_local->op_ret = -1;
+ main_local->op_errno = ENOTEMPTY;
+
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_SUBVOL_ERROR,
+ "%s found on cached subvol %s",
+ local->loc.path, src->name);
+ goto err;
+ } else if (op_errno != ENOENT) {
+ main_local->op_ret = -1;
+ main_local->op_errno = op_errno;
+ goto err;
+ }
+
+ xattrs = dict_new ();
+ if (!xattrs) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ DHT_MSG_NO_MEMORY, "dict_new failed");
+ goto err;
+ }
+
+ ret = dict_set_uint32 (xattrs, conf->link_xattr_name, 256);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value: key = %s",
+ conf->link_xattr_name);
+ if (xattrs)
+ dict_unref (xattrs);
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_rmdir_lookup_cbk,
+ src, src->fops->lookup, &local->loc, xattrs);
+ if (xattrs)
+ dict_unref (xattrs);
+
return 0;
err:
@@ -3443,12 +8062,16 @@ int
dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this,
gf_dirent_t *entries, xlator_t *src)
{
- int ret = 0;
- int build_ret = 0;
- gf_dirent_t *trav = NULL;
+ int ret = 0;
+ int build_ret = 0;
+ gf_dirent_t *trav = NULL;
call_frame_t *lookup_frame = NULL;
dht_local_t *lookup_local = NULL;
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ dict_t *xattrs = NULL;
+ dht_conf_t *conf = this->private;
+ xlator_t *subvol = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
local = frame->local;
@@ -3457,7 +8080,8 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this,
continue;
if (strcmp (trav->d_name, "..") == 0)
continue;
- if (check_is_linkfile (NULL, (&trav->d_stat), NULL) == 1) {
+ if (check_is_linkfile (NULL, (&trav->d_stat), trav->dict,
+ conf->link_xattr_name)) {
ret++;
continue;
}
@@ -3469,6 +8093,25 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this,
return 0;
}
+ xattrs = dict_new ();
+ if (!xattrs) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ DHT_MSG_NO_MEMORY, "dict_new failed");
+ return -1;
+ }
+
+ ret = dict_set_uint32 (xattrs, conf->link_xattr_name, 256);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value: key = %s",
+ conf->link_xattr_name);
+
+ if (xattrs)
+ dict_unref (xattrs);
+ return -1;
+ }
+
list_for_each_entry (trav, &entries->list, list) {
if (strcmp (trav->d_name, ".") == 0)
continue;
@@ -3480,32 +8123,32 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this,
lookup_frame = copy_frame (frame);
if (!lookup_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of Memory");
/* out of memory, let the rmdir fail
(as non-empty, unfortunately) */
goto err;
}
- lookup_local = GF_CALLOC (sizeof (*local), 1,
- gf_dht_mt_dht_local_t);
+ lookup_local = mem_get0 (this->local_pool);
if (!lookup_local) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of Memory");
goto err;
}
lookup_frame->local = lookup_local;
lookup_local->main_frame = frame;
+ lookup_local->hashed_subvol = src;
build_ret = dht_build_child_loc (this, &lookup_local->loc,
&local->loc, trav->d_name);
if (build_ret != 0)
goto err;
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s on %s",
- lookup_local->loc.path, src->name);
+ gf_uuid_copy (lookup_local->loc.gfid, trav->d_stat.ia_gfid);
+
+ gf_uuid_unparse(lookup_local->loc.gfid, gfid);
+
+ gf_msg_trace (this->name, 0,
+ "looking up %s on subvolume %s, gfid = %s",
+ lookup_local->loc.path, src->name, gfid);
LOCK (&frame->lock);
{
@@ -3513,755 +8156,829 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this,
}
UNLOCK (&frame->lock);
- STACK_WIND (lookup_frame, dht_rmdir_lookup_cbk,
- src, src->fops->lookup,
- &lookup_local->loc, NULL);
+ subvol = dht_linkfile_subvol (this, NULL, &trav->d_stat,
+ trav->dict);
+ if (!subvol) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_INVALID_LINKFILE,
+ "Linkfile does not have link subvolume. "
+ "path = %s, gfid = %s",
+ lookup_local->loc.path, gfid);
+ STACK_WIND (lookup_frame, dht_rmdir_lookup_cbk,
+ src, src->fops->lookup,
+ &lookup_local->loc, xattrs);
+ } else {
+ STACK_WIND (lookup_frame, dht_rmdir_cached_lookup_cbk,
+ subvol, subvol->fops->lookup,
+ &lookup_local->loc, xattrs);
+ }
ret++;
}
+ if (xattrs)
+ dict_unref (xattrs);
+
return ret;
err:
- DHT_STACK_DESTROY (lookup_frame);
+ if (xattrs)
+ dict_unref (xattrs);
+
+ if (lookup_frame)
+ DHT_STACK_DESTROY (lookup_frame);
return 0;
}
int
dht_rmdir_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, gf_dirent_t *entries)
+ int op_ret, int op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = -1;
- call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = -1;
+ call_frame_t *prev = NULL;
xlator_t *src = NULL;
int ret = 0;
- local = frame->local;
- prev = cookie;
+ local = frame->local;
+ prev = cookie;
src = prev->this;
- if (op_ret > 2) {
+ if (op_ret > 2) {
ret = dht_rmdir_is_subvol_empty (frame, this, entries, src);
switch (ret) {
case 0: /* non linkfiles exist */
- gf_log (this->name, GF_LOG_TRACE,
- "readdir on %s for %s returned %d entries",
- prev->this->name, local->loc.path, op_ret);
+ gf_msg_trace (this->name, 0,
+ "readdir on %s for %s returned %d "
+ "entries", prev->this->name,
+ local->loc.path, op_ret);
local->op_ret = -1;
local->op_errno = ENOTEMPTY;
break;
default:
/* @ret number of linkfiles are getting unlinked */
- gf_log (this->name, GF_LOG_TRACE,
- "readdir on %s for %s found %d linkfiles",
- prev->this->name, local->loc.path, ret);
+ gf_msg_trace (this->name, 0,
+ "readdir on %s for %s found %d "
+ "linkfiles", prev->this->name,
+ local->loc.path, ret);
break;
}
- }
+ }
- this_call_cnt = dht_frame_return (frame);
+ this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- dht_rmdir_do (frame, this);
- }
+ if (is_last_call (this_call_cnt)) {
+ dht_rmdir_do (frame, this);
+ }
- return 0;
+ return 0;
}
int
dht_rmdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, fd_t *fd)
+ int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = -1;
- call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = -1;
+ call_frame_t *prev = NULL;
+ dict_t *dict = NULL;
+ int ret = 0;
+ dht_conf_t *conf = this->private;
+ int i = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ local = frame->local;
+ prev = cookie;
- local = frame->local;
- prev = cookie;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "opendir on %s for %s failed (%s)",
- prev->this->name, local->loc.path,
- strerror (op_errno));
- goto err;
- }
+ this_call_cnt = dht_frame_return (frame);
+ if (op_ret == -1) {
+ gf_uuid_unparse(local->loc.gfid, gfid);
- STACK_WIND (frame, dht_rmdir_readdirp_cbk,
- prev->this, prev->this->fops->readdirp,
- local->fd, 4096, 0);
+ gf_msg_debug (this->name, op_errno,
+ "opendir on %s for %s failed, "
+ "gfid = %s,",
+ prev->this->name, local->loc.path, gfid);
+ if ((op_errno != ENOENT) && (op_errno != ESTALE)) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ }
+ goto err;
+ }
- return 0;
+ if (!is_last_call (this_call_cnt))
+ return 0;
-err:
- this_call_cnt = dht_frame_return (frame);
+ if (local->op_ret == -1)
+ goto err;
- if (is_last_call (this_call_cnt)) {
- dht_rmdir_do (frame, this);
- }
+ fd_bind (fd);
+ dict = dict_new ();
+ if (!dict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
+ }
- return 0;
+ ret = dict_set_uint32 (dict, conf->link_xattr_name, 256);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DICT_SET_FAILED,
+ "%s: Failed to set dictionary value:key = %s",
+ local->loc.path, conf->link_xattr_name);
+
+ local->call_cnt = conf->subvolume_cnt;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND (frame, dht_rmdir_readdirp_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->readdirp,
+ local->fd, 4096, 0, dict);
+ }
+
+ if (dict)
+ dict_unref (dict);
+
+ return 0;
+
+err:
+ if (is_last_call (this_call_cnt)) {
+ dht_rmdir_do (frame, this);
+ }
+
+ return 0;
}
int
-dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc)
+dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
int op_errno = -1;
- int i = -1;
- int ret = -1;
-
+ int i = -1;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
VALIDATE_OR_GOTO (loc->inode, err);
VALIDATE_OR_GOTO (loc->path, err);
+ VALIDATE_OR_GOTO (this->private, err);
+
+ conf = this->private;
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_RMDIR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->call_cnt = conf->subvolume_cnt;
+ local->op_ret = 0;
+ local->fop_succeeded = 0;
+
+ local->flags = flags;
+
+ local->fd = fd_create (local->loc.inode, frame->root->pid);
+ if (!local->fd) {
- conf = this->private;
-
- local = dht_local_init (frame);
- if (!local) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- local->call_cnt = conf->subvolume_cnt;
- local->op_ret = 0;
-
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- local->fd = fd_create (local->loc.inode, frame->root->pid);
- if (!local->fd) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (frame, dht_rmdir_opendir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->opendir,
- loc, local->fd);
- }
-
- return 0;
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ if (flags) {
+ return dht_rmdir_do (frame, this);
+ }
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND (frame, dht_rmdir_opendir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->opendir,
+ loc, local->fd, NULL);
+ }
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (rmdir, frame, -1, op_errno,
- NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (rmdir, frame, -1, op_errno,
+ NULL, NULL, NULL);
- return 0;
+ return 0;
}
-
int
-dht_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+dht_entrylk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
+
{
- DHT_STACK_UNWIND (xattrop, frame, op_ret, op_errno, dict);
- return 0;
+ DHT_STACK_UNWIND (entrylk, frame, op_ret, op_errno, xdata);
+ return 0;
}
-
+/* TODO
+ * Sending entrylk to cached subvol can result in stale lock
+ * as described in the bug 1311002.
+ */
int
-dht_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict)
+dht_entrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
- xlator_t *subvol = NULL;
+ xlator_t *subvol = NULL;
int op_errno = -1;
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
- subvol = dht_subvol_get_cached (this, loc->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ local = dht_local_init (frame, loc, NULL, GF_FOP_ENTRYLK);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_uuid_unparse(loc->gfid, gfid);
- local->inode = inode_ref (loc->inode);
- local->call_cnt = 1;
+ gf_msg_debug (this->name, 0,
+ "no cached subvolume for path=%s, "
+ "gfid = %s", loc->path, gfid);
+ op_errno = EINVAL;
+ goto err;
+ }
- STACK_WIND (frame,
- dht_xattrop_cbk,
- subvol, subvol->fops->xattrop,
- loc, flags, dict);
+ local->call_cnt = 1;
- return 0;
+ STACK_WIND (frame, dht_entrylk_cbk,
+ subvol, subvol->fops->entrylk,
+ volume, loc, basename, cmd, type, xdata);
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (entrylk, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
int
-dht_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+dht_fentrylk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
+
{
- DHT_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, dict);
- return 0;
+ DHT_STACK_UNWIND (fentrylk, frame, op_ret, op_errno, NULL);
+ return 0;
}
int
-dht_fxattrop (call_frame_t *frame, xlator_t *this,
- fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict)
+dht_fentrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
- xlator_t *subvol = NULL;
+ xlator_t *subvol = NULL;
int op_errno = -1;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO(fd->inode, err);
+
+ gf_uuid_unparse(fd->inode->gfid, gfid);
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ subvol = dht_subvol_get_cached (this, fd->inode);
+ if (!subvol) {
+ gf_msg_debug (this->name, 0,
+ "No cached subvolume for fd=%p,"
+ " gfid = %s", fd, gfid);
+ op_errno = EINVAL;
+ goto err;
+ }
- STACK_WIND (frame,
- dht_fxattrop_cbk,
- subvol, subvol->fops->fxattrop,
- fd, flags, dict);
+ STACK_WIND (frame, dht_fentrylk_cbk,
+ subvol, subvol->fops->fentrylk,
+ volume, fd, basename, cmd, type, xdata);
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fentrylk, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
int
-dht_inodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
+dht_forget (xlator_t *this, inode_t *inode)
{
- DHT_STACK_UNWIND (inodelk, frame, op_ret, op_errno);
- return 0;
+ uint64_t ctx_int = 0;
+ dht_inode_ctx_t *ctx = NULL;
+ dht_layout_t *layout = NULL;
+
+ inode_ctx_del (inode, this, &ctx_int);
+
+ if (!ctx_int)
+ return 0;
+
+ ctx = (dht_inode_ctx_t *) (long) ctx_int;
+
+ layout = ctx->layout;
+ ctx->layout = NULL;
+ dht_layout_unref (this, layout);
+ GF_FREE (ctx);
+
+ return 0;
}
-int32_t
-dht_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct flock *lock)
+int
+dht_notify (xlator_t *this, int event, void *data, ...)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ int cnt = -1;
+ int i = -1;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ int propagate = 0;
+
+ int had_heard_from_all = 0;
+ int have_heard_from_all = 0;
+ struct timeval time = {0,};
+ gf_defrag_info_t *defrag = NULL;
+ dict_t *dict = NULL;
+ gf_defrag_type cmd = 0;
+ dict_t *output = NULL;
+ va_list ap;
+ dht_methods_t *methods = NULL;
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ methods = &(conf->methods);
- subvol = dht_subvol_get_cached (this, loc->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ /* had all subvolumes reported status once till now? */
+ had_heard_from_all = 1;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!conf->last_event[i]) {
+ had_heard_from_all = 0;
+ }
+ }
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ switch (event) {
+ case GF_EVENT_CHILD_UP:
+ subvol = data;
- local->inode = inode_ref (loc->inode);
- local->call_cnt = 1;
+ conf->gen++;
- STACK_WIND (frame,
- dht_inodelk_cbk,
- subvol, subvol->fops->inodelk,
- volume, loc, cmd, lock);
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (subvol == conf->subvolumes[i]) {
+ cnt = i;
+ break;
+ }
+ }
- return 0;
+ if (cnt == -1) {
+ gf_msg_debug (this->name, 0,
+ "got GF_EVENT_CHILD_UP bad "
+ "subvolume %s",
+ subvol->name);
+ break;
+ }
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (inodelk, frame, -1, op_errno);
+ gettimeofday (&time, NULL);
+ LOCK (&conf->subvolume_lock);
+ {
+ conf->subvolume_status[cnt] = 1;
+ conf->last_event[cnt] = event;
+ conf->subvol_up_time[cnt] = time.tv_sec;
+ }
+ UNLOCK (&conf->subvolume_lock);
- return 0;
-}
+ /* one of the node came back up, do a stat update */
+ dht_get_du_info_for_subvol (this, cnt);
+ break;
-int
-dht_finodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+ case GF_EVENT_CHILD_MODIFIED:
+ subvol = data;
-{
- DHT_STACK_UNWIND (finodelk, frame, op_ret, op_errno);
- return 0;
-}
+ conf->gen++;
+ propagate = 1;
+ break;
-int
-dht_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct flock *lock)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
+ case GF_EVENT_SOME_CHILD_DOWN:
+ subvol = data;
+ propagate = 1;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+ break;
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ case GF_EVENT_CHILD_DOWN:
+ subvol = data;
+ if (conf->assert_no_child_down) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_CHILD_DOWN,
+ "Received CHILD_DOWN. Exiting");
+ if (conf->defrag) {
+ gf_defrag_stop (conf->defrag,
+ GF_DEFRAG_STATUS_FAILED, NULL);
+ } else {
+ kill (getpid(), SIGTERM);
+ }
+ }
- STACK_WIND (frame,
- dht_finodelk_cbk,
- subvol, subvol->fops->finodelk,
- volume, fd, cmd, lock);
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (subvol == conf->subvolumes[i]) {
+ cnt = i;
+ break;
+ }
+ }
- return 0;
+ if (cnt == -1) {
+ gf_msg_debug (this->name, 0,
+ "got GF_EVENT_CHILD_DOWN bad "
+ "subvolume %s", subvol->name);
+ break;
+ }
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (finodelk, frame, -1, op_errno);
+ LOCK (&conf->subvolume_lock);
+ {
+ conf->subvolume_status[cnt] = 0;
+ conf->last_event[cnt] = event;
+ conf->subvol_up_time[cnt] = 0;
+ }
+ UNLOCK (&conf->subvolume_lock);
- return 0;
-}
+ for (i = 0; i < conf->subvolume_cnt; i++)
+ if (conf->last_event[i] != event)
+ event = GF_EVENT_CHILD_MODIFIED;
+ break;
+ case GF_EVENT_CHILD_CONNECTING:
+ subvol = data;
-int
-dht_entrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (subvol == conf->subvolumes[i]) {
+ cnt = i;
+ break;
+ }
+ }
-{
- DHT_STACK_UNWIND (entrylk, frame, op_ret, op_errno);
- return 0;
-}
+ if (cnt == -1) {
+ gf_msg_debug (this->name, 0,
+ "got GF_EVENT_CHILD_CONNECTING"
+ " bad subvolume %s",
+ subvol->name);
+ break;
+ }
+ LOCK (&conf->subvolume_lock);
+ {
+ conf->last_event[cnt] = event;
+ }
+ UNLOCK (&conf->subvolume_lock);
-int
-dht_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
+ break;
+ case GF_EVENT_VOLUME_DEFRAG:
+ {
+ if (!conf->defrag) {
+ return ret;
+ }
+ defrag = conf->defrag;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ dict = data;
+ va_start (ap, data);
+ output = va_arg (ap, dict_t*);
- subvol = dht_subvol_get_cached (this, loc->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ ret = dict_get_int32 (dict, "rebalance-command",
+ (int32_t*)&cmd);
+ if (ret)
+ return ret;
+ LOCK (&defrag->lock);
+ {
+ if (defrag->is_exiting)
+ goto unlock;
+ if ((cmd == GF_DEFRAG_CMD_STATUS) ||
+ (cmd == GF_DEFRAG_CMD_STATUS_TIER))
+ gf_defrag_status_get (defrag, output);
+ else if (cmd == GF_DEFRAG_CMD_START_DETACH_TIER)
+ gf_defrag_start_detach_tier(defrag);
+ else if (cmd == GF_DEFRAG_CMD_STOP ||
+ cmd == GF_DEFRAG_CMD_STOP_DETACH_TIER)
+ gf_defrag_stop (defrag,
+ GF_DEFRAG_STATUS_STOPPED, output);
+ else if (cmd == GF_DEFRAG_CMD_PAUSE_TIER)
+ ret = gf_defrag_pause_tier (this, defrag);
+ else if (cmd == GF_DEFRAG_CMD_RESUME_TIER)
+ ret = gf_defrag_resume_tier (this, defrag);
+ }
+unlock:
+ UNLOCK (&defrag->lock);
+ return ret;
+ break;
+ }
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ default:
+ propagate = 1;
+ break;
+ }
- local->inode = inode_ref (loc->inode);
- local->call_cnt = 1;
- STACK_WIND (frame, dht_entrylk_cbk,
- subvol, subvol->fops->entrylk,
- volume, loc, basename, cmd, type);
+ /* have all subvolumes reported status once by now? */
+ have_heard_from_all = 1;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!conf->last_event[i])
+ have_heard_from_all = 0;
+ }
- return 0;
+ /* if all subvols have reported status, no need to hide anything
+ or wait for anything else. Just propagate blindly */
+ if (have_heard_from_all) {
+ propagate = 1;
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (entrylk, frame, -1, op_errno);
+ }
- return 0;
-}
+ if (!had_heard_from_all && have_heard_from_all) {
+ /* This is the first event which completes aggregation
+ of events from all subvolumes. If at least one subvol
+ had come up, propagate CHILD_UP, but only this time
+ */
+ event = GF_EVENT_CHILD_DOWN;
-int
-dht_fentrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->last_event[i] == GF_EVENT_CHILD_UP) {
+ event = GF_EVENT_CHILD_UP;
+ break;
+ }
-{
- DHT_STACK_UNWIND (fentrylk, frame, op_ret, op_errno);
- return 0;
-}
+ if (conf->last_event[i] == GF_EVENT_CHILD_CONNECTING) {
+ event = GF_EVENT_CHILD_CONNECTING;
+ /* continue to check other events for CHILD_UP */
+ }
+ }
+
+ /* Rebalance is started with assert_no_child_down. So we do
+ * not need to handle CHILD_DOWN event here.
+ *
+ * If there is a graph switch, we should not restart the
+ * rebalance daemon. Use 'run_defrag' to indicate if the
+ * thread has already started.
+ */
+ if (conf->defrag && !run_defrag) {
+ if (methods->migration_needed(this)) {
+ run_defrag = 1;
+ ret = gf_thread_create(&conf->defrag->th,
+ NULL,
+ gf_defrag_start, this);
+ if (ret) {
+ GF_FREE (conf->defrag);
+ conf->defrag = NULL;
+ kill (getpid(), SIGTERM);
+ }
+ }
+ }
+ }
+ ret = 0;
+ if (propagate)
+ ret = default_notify (this, event, data);
+out:
+ return ret;
+}
int
-dht_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+dht_inode_ctx_layout_get (inode_t *inode, xlator_t *this, dht_layout_t **layout)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
+ dht_inode_ctx_t *ctx = NULL;
+ int ret = -1;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+ ret = dht_inode_ctx_get (inode, this, &ctx);
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ if (!ret && ctx) {
+ if (ctx->layout) {
+ if (layout)
+ *layout = ctx->layout;
+ ret = 0;
+ } else {
+ ret = -1;
+ }
+ }
- STACK_WIND (frame, dht_fentrylk_cbk,
- subvol, subvol->fops->fentrylk,
- volume, fd, basename, cmd, type);
+ return ret;
+}
- return 0;
+void
+dht_log_new_layout_for_dir_selfheal (xlator_t *this, loc_t *loc,
+ dht_layout_t *layout)
+{
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fentrylk, frame, -1, op_errno);
+ char string[2048] = {0};
+ char *output_string = NULL;
+ int len = 0;
+ int off = 0;
+ int i = 0;
+ gf_loglevel_t log_level = gf_log_get_loglevel();
+ int ret = 0;
+ int max_string_len = 0;
- return 0;
-}
+ if (log_level < GF_LOG_INFO)
+ return;
+ if (!layout)
+ return;
-int
-dht_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *statpre,
- struct iatt *statpost)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
-
-
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
-
- dht_iatt_merge (this, &local->prebuf, statpre, prev->this);
- dht_iatt_merge (this, &local->stbuf, statpost, prev->this);
-
- if (local->inode) {
- local->prebuf.ia_ino = local->inode->ino;
- local->stbuf.ia_ino = local->inode->ino;
- }
-
- local->op_ret = 0;
- }
-unlock:
- UNLOCK (&frame->lock);
+ if (!layout->cnt)
+ return;
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (setattr, frame, local->op_ret, local->op_errno,
- &local->prebuf, &local->stbuf);
+ if (!loc)
+ return;
- return 0;
-}
+ if (!loc->path)
+ return;
+ max_string_len = sizeof (string);
-int
-dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid)
-{
- dht_layout_t *layout = NULL;
- dht_local_t *local = NULL;
- int op_errno = -1;
- int i = -1;
+ ret = snprintf (string, max_string_len, "Setting layout of %s with ",
+ loc->path);
+ if (ret < 0)
+ return;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ len += ret;
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_DEBUG,
- "memory allocation failed :(");
- goto err;
- }
-
- local->layout = layout = dht_layout_get (this, loc->inode);
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no layout for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- if (!layout_is_sane (layout)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "layout is not sane for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- local->inode = inode_ref (loc->inode);
- local->call_cnt = layout->cnt;
-
- for (i = 0; i < layout->cnt; i++) {
- STACK_WIND (frame, dht_setattr_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->setattr,
- loc, stbuf, valid);
- }
-
- return 0;
+ /* Calculation of total length of the string required to calloc
+ * output_string. Log includes subvolume-name, start-range, end-range and
+ * err value.
+ *
+ * This log will help to debug cases where:
+ * a) Different processes set different layout of a directory.
+ * b) Error captured in lookup, which will be filled in layout->err
+ * (like ENOENT, ESTALE etc)
+ */
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL);
+ for (i = 0; i < layout->cnt; i++) {
- return 0;
-}
+ ret = snprintf (string, max_string_len,
+ "[Subvol_name: %s, Err: %d , Start: "
+ "%"PRIu32 " , Stop: %"PRIu32 " , Hash: %"
+ PRIu32 " ], ",
+ layout->list[i].xlator->name,
+ layout->list[i].err, layout->list[i].start,
+ layout->list[i].stop,
+ layout->list[i].commit_hash);
+ if (ret < 0)
+ return;
-int
-dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
- int32_t valid)
-{
- dht_layout_t *layout = NULL;
- dht_local_t *local = NULL;
- int op_errno = -1;
- int i = -1;
+ len += ret;
+ }
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+ len++;
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- local->layout = layout = dht_layout_get (this, fd->inode);
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no layout for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
-
- if (!layout_is_sane (layout)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "layout is not sane for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
-
- local->inode = inode_ref (fd->inode);
- local->call_cnt = layout->cnt;
-
- for (i = 0; i < layout->cnt; i++) {
- STACK_WIND (frame, dht_setattr_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->fsetattr,
- fd, stbuf, valid);
- }
-
- return 0;
+ output_string = GF_CALLOC (len, sizeof (char), gf_common_mt_char);
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL);
+ if (!output_string)
+ return;
- return 0;
-}
+ ret = snprintf (output_string, len, "Setting layout of %s with ",
+ loc->path);
+ if (ret < 0)
+ goto err;
-int
-dht_forget (xlator_t *this, inode_t *inode)
-{
- uint64_t tmp_layout = 0;
- dht_layout_t *layout = NULL;
+ off += ret;
- inode_ctx_get (inode, this, &tmp_layout);
- if (!tmp_layout)
- return 0;
+ for (i = 0; i < layout->cnt; i++) {
- layout = (dht_layout_t *)(long)tmp_layout;
- dht_layout_unref (this, layout);
+ ret = snprintf (output_string + off, len - off,
+ "[Subvol_name: %s, Err: %d , Start: "
+ "%"PRIu32 " , Stop: %"PRIu32 " , Hash: %"
+ PRIu32 " ], ",
+ layout->list[i].xlator->name,
+ layout->list[i].err, layout->list[i].start,
+ layout->list[i].stop,
+ layout->list[i].commit_hash);
- return 0;
-}
+ if (ret < 0)
+ goto err;
+ off += ret;
+ }
-int
-dht_init_subvolumes (xlator_t *this, dht_conf_t *conf)
-{
- xlator_list_t *subvols = NULL;
- int cnt = 0;
+ gf_msg (this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_FIXED_LAYOUT,
+ "%s", output_string);
+err:
+ GF_FREE (output_string);
+}
- for (subvols = this->children; subvols; subvols = subvols->next)
- cnt++;
+int32_t dht_migration_get_dst_subvol(xlator_t *this, dht_local_t *local)
+{
+ int ret = -1;
- conf->subvolumes = GF_CALLOC (cnt, sizeof (xlator_t *),
- gf_dht_mt_xlator_t);
- if (!conf->subvolumes) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- return -1;
- }
- conf->subvolume_cnt = cnt;
+ if (!local)
+ goto out;
- cnt = 0;
- for (subvols = this->children; subvols; subvols = subvols->next)
- conf->subvolumes[cnt++] = subvols->xlator;
+ local->rebalance.target_node =
+ dht_subvol_get_hashed (this, &local->loc);
- conf->subvolume_status = GF_CALLOC (cnt, sizeof (char),
- gf_dht_mt_char);
- if (!conf->subvolume_status) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- return -1;
- }
+ if (local->rebalance.target_node)
+ ret = 0;
- return 0;
+out:
+ return ret;
}
-
-int
-dht_notify (xlator_t *this, int event, void *data, ...)
+int32_t dht_migration_needed(xlator_t *this)
{
- xlator_t *subvol = NULL;
- int cnt = -1;
- int i = -1;
- dht_conf_t *conf = NULL;
- int ret = -1;
+ gf_defrag_info_t *defrag = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = 0;
+ conf = this->private;
- conf = this->private;
+ GF_VALIDATE_OR_GOTO ("dht", conf, out);
+ GF_VALIDATE_OR_GOTO ("dht", conf->defrag, out);
- switch (event) {
- case GF_EVENT_CHILD_UP:
- subvol = data;
+ defrag = conf->defrag;
- conf->gen++;
+ if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) &&
+ (defrag->cmd != GF_DEFRAG_CMD_START_DETACH_TIER))
+ ret = 1;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (subvol == conf->subvolumes[i]) {
- cnt = i;
- break;
- }
- }
+out:
+ return ret;
+}
- if (cnt == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "got GF_EVENT_CHILD_UP bad subvolume %s",
- subvol->name);
- break;
- }
- LOCK (&conf->subvolume_lock);
- {
- conf->subvolume_status[cnt] = 1;
- }
- UNLOCK (&conf->subvolume_lock);
- /* one of the node came back up, do a stat update */
- dht_get_du_info_for_subvol (this, cnt);
+/*
+This function should not be called more then once during a FOP
+handling path. It is valid only for for ops on files
+*/
+int32_t dht_set_local_rebalance (xlator_t *this, dht_local_t *local,
+ struct iatt *stbuf,
+ struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
- break;
+ if (!local)
+ return -1;
+
+ if (local->rebalance.set) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_REBAL_STRUCT_SET,
+ "local->rebalance already set");
+ }
- case GF_EVENT_CHILD_DOWN:
- subvol = data;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (subvol == conf->subvolumes[i]) {
- cnt = i;
- break;
- }
- }
+ if (stbuf)
+ memcpy (&local->rebalance.stbuf, stbuf, sizeof (struct iatt));
- if (cnt == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "got GF_EVENT_CHILD_DOWN bad subvolume %s",
- subvol->name);
- break;
- }
+ if (prebuf)
+ memcpy (&local->rebalance.prebuf, prebuf, sizeof (struct iatt));
- LOCK (&conf->subvolume_lock);
- {
- conf->subvolume_status[cnt] = 0;
- }
- UNLOCK (&conf->subvolume_lock);
+ if (postbuf)
+ memcpy (&local->rebalance.postbuf, postbuf,
+ sizeof (struct iatt));
- break;
- }
+ if (xdata)
+ local->rebalance.xdata = dict_ref (xdata);
- ret = default_notify (this, event, data);
+ local->rebalance.set = 1;
- return ret;
+ return 0;
}
+gf_boolean_t
+dht_is_tier_xlator (xlator_t *this)
+{
+
+ if (strcmp (this->type, "cluster/tier") == 0)
+ return _gf_true;
+ return _gf_false;
+}
+
+int32_t
+dht_release (xlator_t *this, fd_t *fd)
+{
+ return dht_fd_ctx_destroy (this, fd);
+}
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index b361f14426e..9a71c46c8e4 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -1,287 +1,1232 @@
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+#include <regex.h>
+#include <signal.h>
#include "dht-mem-types.h"
+#include "dht-messages.h"
+#include "call-stub.h"
+#include "libxlator.h"
+#include "syncop.h"
+#include "refcount.h"
+#include "timer.h"
#ifndef _DHT_H
#define _DHT_H
-#define GF_DHT_LOOKUP_UNHASHED_ON 1
-#define GF_DHT_LOOKUP_UNHASHED_AUTO 2
+#define GF_XATTR_FIX_LAYOUT_KEY "distribute.fix.layout"
+#define GF_XATTR_TIER_LAYOUT_FIXED_KEY "trusted.tier.fix.layout.complete"
+#define GF_XATTR_FILE_MIGRATE_KEY "trusted.distribute.migrate-data"
+#define GF_DHT_LOOKUP_UNHASHED_ON 1
+#define GF_DHT_LOOKUP_UNHASHED_AUTO 2
+#define DHT_PATHINFO_HEADER "DISTRIBUTE:"
+#define DHT_FILE_MIGRATE_DOMAIN "dht.file.migrate"
+#define DHT_LAYOUT_HEAL_DOMAIN "dht.layout.heal"
+#define TIERING_MIGRATION_KEY "tiering.migration"
+#define DHT_LAYOUT_HASH_INVALID 1
+
+#define DHT_DIR_STAT_BLOCKS 8
+#define DHT_DIR_STAT_SIZE 4096
+
+#include <fnmatch.h>
typedef int (*dht_selfheal_dir_cbk_t) (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno);
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ dict_t *xdata);
+typedef int (*dht_defrag_cbk_fn_t) (xlator_t *this, xlator_t *dst_node,
+ call_frame_t *frame, int ret);
+
+typedef int (*dht_refresh_layout_unlock) (call_frame_t *frame, xlator_t *this,
+ int op_ret, int invoke_cbk);
+typedef int (*dht_refresh_layout_done_handle) (call_frame_t *frame);
struct dht_layout {
- int cnt;
- int preset;
- int gen;
- int type;
- int ref; /* use with dht_conf_t->layout_lock */
- int search_unhashed;
+ int spread_cnt; /* layout spread count per directory,
+ is controlled by 'setxattr()' with
+ special key */
+ int cnt;
+ int preset;
+ /*
+ * The last *configuration* state for which this directory was known
+ * to be in balance. The corresponding vol_commit_hash changes
+ * whenever bricks are added or removed. This value changes when a
+ * (full) rebalance is complete. If they match, it's safe to assume
+ * that every file is where it should be and there's no need to do
+ * lookups for files elsewhere. If they don't, then we have to do a
+ * global lookup to be sure.
+ */
+ uint32_t commit_hash;
+ /*
+ * The *runtime* state of the volume, changes when connections to
+ * bricks are made or lost.
+ */
+ int gen;
+ int type;
+ int ref; /* use with dht_conf_t->layout_lock */
+ gf_boolean_t search_unhashed;
struct {
- int err; /* 0 = normal
- -1 = dir exists and no xattr
- >0 = dir lookup failed with errno
- */
- uint32_t start;
- uint32_t stop;
- xlator_t *xlator;
- } list[0];
+ int err; /* 0 = normal
+ -1 = dir exists and no xattr
+ >0 = dir lookup failed with errno
+ */
+ uint32_t start;
+ uint32_t stop;
+ uint32_t commit_hash;
+ xlator_t *xlator;
+ } list[];
+};
+typedef struct dht_layout dht_layout_t;
+
+struct dht_stat_time {
+ uint32_t atime;
+ uint32_t atime_nsec;
+ uint32_t ctime;
+ uint32_t ctime_nsec;
+ uint32_t mtime;
+ uint32_t mtime_nsec;
+};
+
+typedef struct dht_stat_time dht_stat_time_t;
+
+struct dht_inode_ctx {
+ dht_layout_t *layout;
+ dht_stat_time_t time;
+ xlator_t *lock_subvol;
};
-typedef struct dht_layout dht_layout_t;
+
+typedef struct dht_inode_ctx dht_inode_ctx_t;
typedef enum {
- DHT_HASH_TYPE_DM,
+ DHT_HASH_TYPE_DM,
+ DHT_HASH_TYPE_DM_USER,
} dht_hashfn_type_t;
+/* rebalance related */
+struct dht_rebalance_ {
+ xlator_t *from_subvol;
+ xlator_t *target_node;
+ off_t offset;
+ size_t size;
+ int32_t flags;
+ int count;
+ struct iobref *iobref;
+ struct iovec *vector;
+ struct iatt stbuf;
+ struct iatt prebuf;
+ struct iatt postbuf;
+ dht_defrag_cbk_fn_t target_op_fn;
+ dict_t *xdata;
+ dict_t *xattr;
+ int32_t set;
+ struct gf_flock flock;
+ int lock_cmd;
+};
+
+/**
+ * Enum to store decided action based on the qdstatfs (quota-deem-statfs)
+ * events
+ **/
+typedef enum {
+ qdstatfs_action_OFF = 0,
+ qdstatfs_action_REPLACE,
+ qdstatfs_action_NEGLECT,
+ qdstatfs_action_COMPARE,
+} qdstatfs_action_t;
+
+typedef enum {
+ FAIL_ON_ANY_ERROR,
+ IGNORE_ENOENT_ESTALE
+} dht_reaction_type_t;
+
+struct dht_skip_linkto_unlink {
+
+ gf_boolean_t handle_valid_link;
+ int opend_fd_count;
+ xlator_t *hash_links_to;
+ uuid_t cached_gfid;
+ uuid_t hashed_gfid;
+};
+
+typedef struct {
+ xlator_t *xl;
+ loc_t loc; /* contains/points to inode to lock on. */
+ short type; /* read/write lock. */
+ char *domain; /* Only locks within a single domain
+ * contend with each other
+ */
+ gf_lkowner_t lk_owner;
+ gf_boolean_t locked;
+} dht_lock_t;
+
+typedef
+int (*dht_selfheal_layout_t)(call_frame_t *frame, loc_t *loc,
+ dht_layout_t *layout);
+
+typedef
+gf_boolean_t (*dht_need_heal_t)(call_frame_t *frame, dht_layout_t **inmem,
+ dht_layout_t **ondisk);
struct dht_local {
- int call_cnt;
- loc_t loc;
- loc_t loc2;
- int op_ret;
- int op_errno;
- int layout_mismatch;
+ int call_cnt;
+ loc_t loc;
+ loc_t loc2;
+ int op_ret;
+ int op_errno;
+ int layout_mismatch;
/* Use stbuf as the postbuf, when we require both
* pre and post attrs */
- struct iatt stbuf;
+ struct iatt stbuf;
struct iatt prebuf;
struct iatt preoldparent;
struct iatt postoldparent;
struct iatt preparent;
struct iatt postparent;
- struct statvfs statvfs;
- fd_t *fd;
- inode_t *inode;
- dict_t *xattr;
- dict_t *xattr_req;
- dht_layout_t *layout;
- size_t size;
- ino_t ia_ino;
- ino_t ia_gen;
- xlator_t *src_hashed, *src_cached;
- xlator_t *dst_hashed, *dst_cached;
- xlator_t *cached_subvol;
- xlator_t *hashed_subvol;
- char need_selfheal;
+ struct statvfs statvfs;
+ fd_t *fd;
+ inode_t *inode;
+ dict_t *params;
+ dict_t *xattr;
+ dict_t *xattr_req;
+ dht_layout_t *layout;
+ size_t size;
+ ino_t ia_ino;
+ xlator_t *src_hashed, *src_cached;
+ xlator_t *dst_hashed, *dst_cached;
+ xlator_t *cached_subvol;
+ xlator_t *hashed_subvol;
+ char need_selfheal;
int file_count;
int dir_count;
call_frame_t *main_frame;
- struct {
- fop_mknod_cbk_t linkfile_cbk;
- struct iatt stbuf;
- loc_t loc;
- inode_t *inode;
- dict_t *xattr;
- xlator_t *srcvol;
- } linkfile;
- struct {
- uint32_t hole_cnt;
- uint32_t overlaps_cnt;
- uint32_t missing;
- uint32_t down;
- uint32_t misc;
- dht_selfheal_dir_cbk_t dir_cbk;
- dht_layout_t *layout;
- } selfheal;
+ int fop_succeeded;
+ struct {
+ fop_mknod_cbk_t linkfile_cbk;
+ struct iatt stbuf;
+ loc_t loc;
+ inode_t *inode;
+ dict_t *xattr;
+ xlator_t *srcvol;
+ } linkfile;
+ struct {
+ uint32_t hole_cnt;
+ uint32_t overlaps_cnt;
+ uint32_t down;
+ uint32_t misc;
+ dht_selfheal_dir_cbk_t dir_cbk;
+ dht_selfheal_layout_t healer;
+ dht_need_heal_t should_heal;
+ gf_boolean_t force_mkdir;
+ dht_layout_t *layout, *refreshed_layout;
+ } selfheal;
+
+ dht_refresh_layout_unlock refresh_layout_unlock;
+ dht_refresh_layout_done_handle refresh_layout_done;
+
uint32_t uid;
uint32_t gid;
- /* needed by nufa */
- int32_t flags;
- mode_t mode;
- dev_t rdev;
+ /* needed by nufa */
+ int32_t flags;
+ mode_t mode;
+ dev_t rdev;
+ mode_t umask;
+
+ /* need for file-info */
+ char *xattr_val;
+ char *key;
+
+ /* which xattr request? */
+ char xsel[256];
+ int32_t alloc_len;
+
+ char *newpath;
+
+ /* gfid related */
+ uuid_t gfid;
+
+ /* flag used to make sure we need to return estale in
+ {lookup,revalidate}_cbk */
+ char return_estale;
+ char need_lookup_everywhere;
+
+ glusterfs_fop_t fop;
+
+ gf_boolean_t linked;
+ xlator_t *link_subvol;
+
+ struct dht_rebalance_ rebalance;
+ xlator_t *first_up_subvol;
+
+ gf_boolean_t quota_deem_statfs;
+
+ gf_boolean_t added_link;
+ gf_boolean_t is_linkfile;
+
+ struct dht_skip_linkto_unlink skip_unlink;
+
+ struct {
+ fop_inodelk_cbk_t inodelk_cbk;
+ dht_lock_t **locks;
+ int lk_count;
+ dht_reaction_type_t reaction;
+
+ /* whether locking failed on _any_ of the "locks" above */
+ int op_ret;
+ int op_errno;
+ } lock;
+
+ short lock_type;
+
+ call_stub_t *stub;
+ int32_t parent_disk_layout[4];
};
typedef struct dht_local dht_local_t;
/* du - disk-usage */
struct dht_du {
double avail_percent;
+ double avail_inodes;
uint64_t avail_space;
uint32_t log;
+ uint32_t chunks;
};
typedef struct dht_du dht_du_t;
+enum gf_defrag_type {
+ GF_DEFRAG_CMD_START = 1,
+ GF_DEFRAG_CMD_STOP = 1 + 1,
+ GF_DEFRAG_CMD_STATUS = 1 + 2,
+ GF_DEFRAG_CMD_START_LAYOUT_FIX = 1 + 3,
+ GF_DEFRAG_CMD_START_FORCE = 1 + 4,
+ GF_DEFRAG_CMD_START_TIER = 1 + 5,
+ GF_DEFRAG_CMD_STATUS_TIER = 1 + 6,
+ GF_DEFRAG_CMD_START_DETACH_TIER = 1 + 7,
+ GF_DEFRAG_CMD_STOP_DETACH_TIER = 1 + 8,
+ GF_DEFRAG_CMD_PAUSE_TIER = 1 + 9,
+ GF_DEFRAG_CMD_RESUME_TIER = 1 + 10,
+};
+typedef enum gf_defrag_type gf_defrag_type;
+
+enum gf_defrag_status_t {
+ GF_DEFRAG_STATUS_NOT_STARTED,
+ GF_DEFRAG_STATUS_STARTED,
+ GF_DEFRAG_STATUS_STOPPED,
+ GF_DEFRAG_STATUS_COMPLETE,
+ GF_DEFRAG_STATUS_FAILED,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_STOPPED,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_FAILED,
+};
+typedef enum gf_defrag_status_t gf_defrag_status_t;
+
+typedef struct gf_defrag_pattern_list gf_defrag_pattern_list_t;
+
+struct gf_defrag_pattern_list {
+ char path_pattern[256];
+ uint64_t size;
+ gf_defrag_pattern_list_t *next;
+};
+
+struct dht_container {
+ union {
+ struct list_head list;
+ struct {
+ struct _gf_dirent_t *next;
+ struct _gf_dirent_t *prev;
+ };
+ };
+ gf_dirent_t *df_entry;
+ xlator_t *this;
+ loc_t *parent_loc;
+ dict_t *migrate_data;
+};
+
+typedef enum tier_mode_ {
+ TIER_MODE_NONE = 0,
+ TIER_MODE_TEST,
+ TIER_MODE_WM
+} tier_mode_t;
+
+typedef enum tier_pause_state_ {
+ TIER_RUNNING = 0,
+ TIER_REQUEST_PAUSE,
+ TIER_PAUSED
+} tier_pause_state_t;
+
+/* This Structure is only used in tiering fixlayout */
+typedef struct gf_tier_fix_layout_arg {
+ xlator_t *this;
+ dict_t *fix_layout;
+ pthread_t thread_id;
+} gf_tier_fix_layout_arg_t;
+
+typedef struct gf_tier_conf {
+ int is_tier;
+ int watermark_hi;
+ int watermark_low;
+ int watermark_last;
+ fsblkcnt_t blocks_total;
+ fsblkcnt_t blocks_used;
+ int percent_full;
+ uint64_t max_migrate_bytes;
+ int max_migrate_files;
+ tier_mode_t mode;
+ int tier_max_promote_size;
+ int tier_promote_frequency;
+ int tier_demote_frequency;
+ uint64_t st_last_promoted_size;
+ uint64_t st_last_demoted_size;
+ tier_pause_state_t pause_state;
+ struct synctask *pause_synctask;
+ gf_timer_t *pause_timer;
+ pthread_mutex_t pause_mutex;
+ int promote_in_progress;
+ int demote_in_progress;
+ /* This Structure is only used in tiering fixlayout */
+ gf_tier_fix_layout_arg_t tier_fix_layout_arg;
+ /* Indicates the index of the first queryfile picked
+ * in the last cycle of promote or demote */
+ int32_t last_promote_qfile_index;
+ int32_t last_demote_qfile_index;
+} gf_tier_conf_t;
+
+struct gf_defrag_info_ {
+ uint64_t total_files;
+ uint64_t total_data;
+ uint64_t num_files_lookedup;
+ uint64_t total_failures;
+ uint64_t skipped;
+ gf_lock_t lock;
+ int cmd;
+ pthread_t th;
+ gf_defrag_status_t defrag_status;
+ struct rpc_clnt *rpc;
+ uint32_t connected;
+ uint32_t is_exiting;
+ pid_t pid;
+ inode_t *root_inode;
+ uuid_t node_uuid;
+ struct timeval start_time;
+ gf_boolean_t stats;
+ uint32_t new_commit_hash;
+ gf_defrag_pattern_list_t *defrag_pattern;
+ gf_tier_conf_t tier_conf;
+
+ /*Data Tiering params for scanner*/
+ uint64_t total_files_promoted;
+ uint64_t total_files_demoted;
+ int write_freq_threshold;
+ int read_freq_threshold;
+
+ pthread_cond_t parallel_migration_cond;
+ pthread_mutex_t dfq_mutex;
+ pthread_cond_t rebalance_crawler_alarm;
+ int32_t q_entry_count;
+ int32_t global_error;
+ struct dht_container *queue;
+ int32_t crawl_done;
+ int32_t abort;
+ int32_t wakeup_crawler;
+
+ /*Throttle params*/
+ /*stands for reconfigured thread count*/
+ int32_t recon_thread_count;
+ /*stands for current running thread count*/
+ int32_t current_thread_count;
+ pthread_cond_t df_wakeup_thread;
+
+ /* Hard link handle requirement */
+ synclock_t link_lock;
+
+ /* lock migration flag */
+ gf_boolean_t lock_migration_enabled;
+};
+
+typedef struct gf_defrag_info_ gf_defrag_info_t;
+
+struct dht_methods_s {
+ int32_t (*migration_get_dst_subvol)(xlator_t *this,
+ dht_local_t *local);
+ int32_t (*migration_other)(xlator_t *this,
+ gf_defrag_info_t *defrag);
+ int32_t (*migration_needed)(xlator_t *this);
+ xlator_t* (*layout_search)(xlator_t *this,
+ dht_layout_t *layout,
+ const char *name);
+};
+
+typedef struct dht_methods_s dht_methods_t;
+
struct dht_conf {
- gf_lock_t subvolume_lock;
+ gf_lock_t subvolume_lock;
int subvolume_cnt;
xlator_t **subvolumes;
- char *subvolume_status;
- dht_layout_t **file_layouts;
- dht_layout_t **dir_layouts;
- dht_layout_t *default_dir_layout;
- gf_boolean_t search_unhashed;
- int gen;
+ char *subvolume_status;
+ int *last_event;
+ dht_layout_t **file_layouts;
+ dht_layout_t **dir_layouts;
+ unsigned int search_unhashed;
+ gf_boolean_t lookup_optimize;
+ int gen;
dht_du_t *du_stats;
- uint64_t min_free_disk;
+ double min_free_disk;
+ double min_free_inodes;
char disk_unit;
int32_t refresh_interval;
gf_boolean_t unhashed_sticky_bit;
- struct timeval last_stat_fetch;
+ struct timeval last_stat_fetch;
gf_lock_t layout_lock;
+ dict_t *leaf_to_subvol;
void *private; /* Can be used by wrapper xlators over
dht */
+ gf_boolean_t use_readdirp;
+ char vol_uuid[UUID_SIZE + 1];
+ gf_boolean_t assert_no_child_down;
+ time_t *subvol_up_time;
+
+ /* This is the count used as the distribute layout for a directory */
+ /* Will be a global flag to control the layout spread count */
+ uint32_t dir_spread_cnt;
+
+ /* to keep track of nodes which are decommissioned */
+ xlator_t **decommissioned_bricks;
+ int decommission_in_progress;
+ int decommission_subvols_cnt;
+
+ /* defrag related */
+ gf_defrag_info_t *defrag;
+
+ /* Request to filter directory entries in readdir request */
+
+ gf_boolean_t readdir_optimize;
+
+ /* Support regex-based name reinterpretation. */
+ regex_t rsync_regex;
+ gf_boolean_t rsync_regex_valid;
+ regex_t extra_regex;
+ gf_boolean_t extra_regex_valid;
+
+ /* Support variable xattr names. */
+ char *xattr_name;
+ char *link_xattr_name;
+ char *commithash_xattr_name;
+ char *wild_xattr_name;
+
+ /* Support size-weighted rebalancing (heterogeneous bricks). */
+ gf_boolean_t do_weighting;
+ gf_boolean_t randomize_by_gfid;
+ char *dthrottle;
+
+ dht_methods_t methods;
+
+ struct mem_pool *lock_pool;
+
+ /*local subvol storage for rebalance*/
+ xlator_t **local_subvols;
+ int32_t local_subvols_cnt;
+
+ /*
+ * "Commit hash" for this volume topology. Changed whenever bricks
+ * are added or removed.
+ */
+ uint32_t vol_commit_hash;
+ gf_boolean_t vch_forced;
+
+ /* lock migration */
+
+ gf_boolean_t lock_migration_enabled;
};
typedef struct dht_conf dht_conf_t;
+struct dht_dfoffset_ctx {
+ xlator_t *this;
+ off_t offset;
+ int32_t readdir_done;
+};
+typedef struct dht_dfoffset_ctx dht_dfoffset_ctx_t;
struct dht_disk_layout {
- uint32_t cnt;
- uint32_t type;
- struct {
- uint32_t start;
- uint32_t stop;
- } list[1];
+ uint32_t cnt;
+ uint32_t type;
+ struct {
+ uint32_t start;
+ uint32_t stop;
+ } list[1];
};
typedef struct dht_disk_layout dht_disk_layout_t;
-#define WIPE(statp) do { typeof(*statp) z = {0,}; if (statp) *statp = z; } while (0)
+typedef enum {
+ GF_DHT_MIGRATE_DATA,
+ GF_DHT_MIGRATE_DATA_EVEN_IF_LINK_EXISTS,
+ GF_DHT_MIGRATE_HARDLINK,
+ GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS
+} gf_dht_migrate_data_type_t;
-#define ENTRY_MISSING(op_ret, op_errno) (op_ret == -1 && op_errno == ENOENT)
+typedef enum {
+ GF_DHT_EQUAL_DISTRIBUTION,
+ GF_DHT_WEIGHTED_DISTRIBUTION
+} dht_distribution_type_t;
-#define is_fs_root(loc) (strcmp (loc->path, "/") == 0)
+struct dir_dfmeta {
+ gf_dirent_t *equeue;
+ dht_dfoffset_ctx_t *offset_var;
+ struct list_head **head;
+ struct list_head **iterator;
+ int *fetch_entries;
+};
+
+typedef struct dht_migrate_info {
+ xlator_t *src_subvol;
+ xlator_t *dst_subvol;
+ GF_REF_DECL;
+} dht_migrate_info_t;
+
+
+
+typedef struct dht_fd_ctx {
+ uint64_t opened_on_dst;
+ GF_REF_DECL;
+} dht_fd_ctx_t;
+
+
+#define ENTRY_MISSING(op_ret, op_errno) (op_ret == -1 && op_errno == ENOENT)
-#define is_revalidate(loc) (inode_ctx_get (loc->inode, this, NULL) == 0)
+#define is_revalidate(loc) (dht_inode_ctx_layout_get (loc->inode, this, NULL) == 0)
#define is_last_call(cnt) (cnt == 0)
-#define DHT_LINKFILE_MODE (S_ISVTX)
-#define check_is_linkfile(i,s,x) ( \
- ((st_mode_from_ia (s->ia_prot, s->ia_type) & ~S_IFMT) \
- == DHT_LINKFILE_MODE) && \
- (s->ia_size == 0))
+#define DHT_MIGRATION_IN_PROGRESS 1
+#define DHT_MIGRATION_COMPLETED 2
+
+#define check_is_linkfile(i,s,x,n) (IS_DHT_LINKFILE_MODE (s) && dict_get (x, n))
+
+#define IS_DHT_MIGRATION_PHASE2(buf) ( \
+ IA_ISREG ((buf)->ia_type) && \
+ ((st_mode_from_ia ((buf)->ia_prot, (buf)->ia_type) & \
+ ~S_IFMT) == DHT_LINKFILE_MODE))
+
+#define IS_DHT_MIGRATION_PHASE1(buf) ( \
+ IA_ISREG ((buf)->ia_type) && \
+ ((buf)->ia_prot.sticky == 1) && \
+ ((buf)->ia_prot.sgid == 1))
+
+#define DHT_STRIP_PHASE1_FLAGS(buf) do { \
+ if ((buf) && IS_DHT_MIGRATION_PHASE1(buf)) { \
+ (buf)->ia_prot.sticky = 0; \
+ (buf)->ia_prot.sgid = 0; \
+ } \
+ } while (0)
+
+#define dht_inode_missing(op_errno) (op_errno == ENOENT || op_errno == ESTALE \
+ || op_errno == EIO) \
+/*Bad fix. Please revert the commit after fixing the bug 1329505*/
#define check_is_dir(i,s,x) (IA_ISDIR(s->ia_type))
#define layout_is_sane(layout) ((layout) && (layout->cnt > 0))
+#define we_are_not_migrating(x) ((x) == 1)
+
#define DHT_STACK_UNWIND(fop, frame, params ...) do { \
- dht_local_t *__local = NULL; \
- xlator_t *__xl = NULL; \
+ dht_local_t *__local = NULL; \
+ xlator_t *__xl = NULL; \
if (frame) { \
- __xl = frame->this; \
- __local = frame->local; \
+ __xl = frame->this; \
+ __local = frame->local; \
frame->local = NULL; \
} \
- STACK_UNWIND_STRICT (fop, frame, params); \
- dht_local_wipe (__xl, __local); \
- } while (0)
-
-#define DHT_STACK_DESTROY(frame) do { \
- dht_local_t *__local = NULL; \
- xlator_t *__xl = NULL; \
- __xl = frame->this; \
- __local = frame->local; \
- frame->local = NULL; \
- STACK_DESTROY (frame->root); \
- dht_local_wipe (__xl, __local); \
- } while (0)
-
-dht_layout_t *dht_layout_new (xlator_t *this, int cnt);
-dht_layout_t *dht_layout_get (xlator_t *this, inode_t *inode);
-dht_layout_t *dht_layout_for_subvol (xlator_t *this, xlator_t *subvol);
-xlator_t *dht_layout_search (xlator_t *this, dht_layout_t *layout,
- const char *name);
-int dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout);
-int dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout,
- uint32_t *holes_p, uint32_t *overlaps_p,
- uint32_t *missing_p, uint32_t *down_p,
- uint32_t *misc_p);
-int dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout,
- xlator_t *subvol, loc_t *loc, dict_t *xattr);
+ STACK_UNWIND_STRICT (fop, frame, params); \
+ dht_local_wipe (__xl, __local); \
+ } while (0)
+
+#define DHT_STACK_DESTROY(frame) do { \
+ dht_local_t *__local = NULL; \
+ xlator_t *__xl = NULL; \
+ __xl = frame->this; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ STACK_DESTROY (frame->root); \
+ dht_local_wipe (__xl, __local); \
+ } while (0)
+
+#define DHT_UPDATE_TIME(ctx_sec, ctx_nsec, new_sec, new_nsec, inode, post) do {\
+ LOCK (&inode->lock); \
+ { \
+ if (ctx_sec == new_sec) \
+ new_nsec = max (new_nsec, ctx_nsec); \
+ else if (ctx_sec > new_sec) { \
+ new_sec = ctx_sec; \
+ new_nsec = ctx_nsec; \
+ } \
+ if (post) { \
+ ctx_sec = new_sec; \
+ ctx_nsec = new_nsec; \
+ } \
+ } \
+ UNLOCK (&inode->lock); \
+ } while (0)
+
+#define is_greater_time(a, an, b, bn) (((a) < (b)) || (((a) == (b)) && ((an) < (bn))))
+
+#define DHT_MARK_FOP_INTERNAL(xattr) do { \
+ int tmp = -1; \
+ if (!xattr) { \
+ xattr = dict_new (); \
+ if (!xattr) \
+ break; \
+ } \
+ tmp = dict_set_str (xattr, GLUSTERFS_INTERNAL_FOP_KEY, "yes"); \
+ if (tmp) { \
+ gf_msg (this->name, GF_LOG_ERROR, 0, \
+ DHT_MSG_DICT_SET_FAILED, \
+ "Failed to set dictionary value: key = %s," \
+ " path = %s", GLUSTERFS_INTERNAL_FOP_KEY, \
+ local->loc.path); \
+ } \
+ } while (0)
+
+dht_layout_t *dht_layout_new (xlator_t *this, int cnt);
+dht_layout_t *dht_layout_get (xlator_t *this, inode_t *inode);
+dht_layout_t *dht_layout_for_subvol (xlator_t *this, xlator_t *subvol);
+xlator_t *dht_layout_search (xlator_t *this, dht_layout_t *layout,
+ const char *name);
+int32_t
+dht_migration_get_dst_subvol(xlator_t *this, dht_local_t *local);
+int32_t
+dht_migration_needed(xlator_t *this);
+int dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout);
+int dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout,
+ uint32_t *holes_p, uint32_t *overlaps_p,
+ uint32_t *missing_p, uint32_t *down_p,
+ uint32_t *misc_p, uint32_t *no_space_p);
+int dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout,
+ xlator_t *subvol, loc_t *loc, dict_t *xattr);
xlator_t *dht_linkfile_subvol (xlator_t *this, inode_t *inode,
- struct iatt *buf, dict_t *xattr);
-int dht_linkfile_unlink (call_frame_t *frame, xlator_t *this,
- xlator_t *subvol, loc_t *loc);
+ struct iatt *buf, dict_t *xattr);
+int dht_linkfile_unlink (call_frame_t *frame, xlator_t *this,
+ xlator_t *subvol, loc_t *loc);
int dht_layouts_init (xlator_t *this, dht_conf_t *conf);
int dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
- int op_ret, int op_errno, dict_t *xattr);
-
-int dht_disk_layout_extract (xlator_t *this, dht_layout_t *layout,
- int pos, int32_t **disk_layout_p);
-int dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout,
- int pos, void *disk_layout_raw);
+ int op_ret, int op_errno, dict_t *xattr);
+int dht_disk_layout_extract (xlator_t *this, dht_layout_t *layout,
+ int pos, int32_t **disk_layout_p);
+int dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout,
+ int pos, void *disk_layout_raw, int disk_layout_len);
+int
+dht_disk_layout_extract_for_subvol (xlator_t *this, dht_layout_t *layout,
+ xlator_t *subvol, int32_t **disk_layout_p);
int dht_frame_return (call_frame_t *frame);
-int dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y);
-int dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol,
- uint64_t *x);
+int dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol);
void dht_local_wipe (xlator_t *this, dht_local_t *local);
-dht_local_t *dht_local_init (call_frame_t *frame);
-int dht_iatt_merge (xlator_t *this, struct iatt *to, struct iatt *from,
- xlator_t *subvol);
+dht_local_t *dht_local_init (call_frame_t *frame, loc_t *loc, fd_t *fd,
+ glusterfs_fop_t fop);
+int dht_iatt_merge (xlator_t *this, struct iatt *to, struct iatt *from,
+ xlator_t *subvol);
xlator_t *dht_subvol_get_hashed (xlator_t *this, loc_t *loc);
xlator_t *dht_subvol_get_cached (xlator_t *this, inode_t *inode);
xlator_t *dht_subvol_next (xlator_t *this, xlator_t *prev);
-int dht_subvol_cnt (xlator_t *this, xlator_t *subvol);
+xlator_t *dht_subvol_next_available (xlator_t *this, xlator_t *prev);
+int dht_subvol_cnt (xlator_t *this, xlator_t *subvol);
+
+int dht_hash_compute (xlator_t *this, int type, const char *name, uint32_t *hash_p);
-int dht_hash_compute (int type, const char *name, uint32_t *hash_p);
+int dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
+ xlator_t *this, xlator_t *tovol,
+ xlator_t *fromvol, loc_t *loc);
+int dht_lookup_directory (call_frame_t *frame, xlator_t *this, loc_t *loc);
+int dht_lookup_everywhere (call_frame_t *frame, xlator_t *this, loc_t *loc);
+int
+dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
+ loc_t *loc, dht_layout_t *layout);
-int dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
- xlator_t *tovol, xlator_t *fromvol, loc_t *loc);
-int dht_lookup_directory (call_frame_t *frame, xlator_t *this, loc_t *loc);
-int dht_lookup_everywhere (call_frame_t *frame, xlator_t *this, loc_t *loc);
int
-dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
- loc_t *loc, dht_layout_t *layout);
+dht_selfheal_directory_for_nameless_lookup (call_frame_t *frame,
+ dht_selfheal_dir_cbk_t cbk,
+ loc_t *loc, dht_layout_t *layout);
+
int
dht_selfheal_new_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
- dht_layout_t *layout);
+ dht_layout_t *layout);
int
-dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
- loc_t *loc, dht_layout_t *layout);
+dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
+ loc_t *loc, dht_layout_t *layout);
int
dht_layout_sort_volname (dht_layout_t *layout);
-int dht_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc);
-
int dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc);
-int dht_is_subvol_filled (xlator_t *this, xlator_t *subvol);
-xlator_t *dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol);
-int dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx);
+gf_boolean_t dht_is_subvol_filled (xlator_t *this, xlator_t *subvol);
+xlator_t *dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol,
+ dht_local_t *layout);
+int dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx);
int dht_layout_preset (xlator_t *this, xlator_t *subvol, inode_t *inode);
-int dht_layout_set (xlator_t *this, inode_t *inode, dht_layout_t *layout);
-void dht_layout_unref (xlator_t *this, dht_layout_t *layout);
+int dht_layout_index_for_subvol (dht_layout_t *layout, xlator_t *subvol);
+int dht_layout_set (xlator_t *this, inode_t *inode, dht_layout_t *layout);;
+void dht_layout_unref (xlator_t *this, dht_layout_t *layout);
dht_layout_t *dht_layout_ref (xlator_t *this, dht_layout_t *layout);
-xlator_t *dht_first_up_subvol (xlator_t *this);
-xlator_t *dht_last_up_subvol (xlator_t *this);
-int dht_frame_su_do (call_frame_t *frame);
-int dht_frame_su_undo (call_frame_t *frame);
+xlator_t *dht_first_up_subvol (xlator_t *this);
+xlator_t *dht_last_up_subvol (xlator_t *this);
int dht_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name);
-#endif /* _DHT_H */
+int dht_filter_loc_subvol_key (xlator_t *this, loc_t *loc, loc_t *new_loc,
+ xlator_t **subvol);
+
+int dht_rename_cleanup (call_frame_t *frame);
+int dht_rename_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata);
+
+int dht_update_commit_hash_for_layout (call_frame_t *frame);
+int dht_fix_directory_layout (call_frame_t *frame,
+ dht_selfheal_dir_cbk_t dir_cbk,
+ dht_layout_t *layout);
+
+int dht_init_subvolumes (xlator_t *this, dht_conf_t *conf);
+
+/* migration/rebalance */
+int dht_start_rebalance_task (xlator_t *this, call_frame_t *frame);
+
+int dht_rebalance_in_progress_check (xlator_t *this, call_frame_t *frame);
+int dht_rebalance_complete_check (xlator_t *this, call_frame_t *frame);
+
+int
+dht_init_local_subvolumes (xlator_t *this, dht_conf_t *conf);
+
+/* FOPS */
+int32_t dht_lookup (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *xattr_req);
+
+int32_t dht_stat (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, dict_t *xdata);
+
+int32_t dht_fstat (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd, dict_t *xdata);
+
+int32_t dht_truncate (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ off_t offset, dict_t *xdata);
+
+int32_t dht_ftruncate (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset, dict_t *xdata);
+
+int32_t dht_access (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t mask, dict_t *xdata);
+
+int32_t dht_readlink (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ size_t size, dict_t *xdata);
+
+int32_t dht_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata);
+
+int32_t dht_mkdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata);
+
+int32_t dht_unlink (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, int xflag, dict_t *xdata);
+
+int32_t dht_rmdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int flags, dict_t *xdata);
+
+int32_t dht_symlink (call_frame_t *frame, xlator_t *this,
+ const char *linkpath, loc_t *loc, mode_t umask,
+ dict_t *xdata);
+
+int32_t dht_rename (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata);
+
+int32_t dht_link (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata);
+
+int32_t dht_create (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *params);
+
+int32_t dht_open (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata);
+
+int32_t dht_readv (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata);
+
+int32_t dht_writev (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iovec *vector,
+ int32_t count,
+ off_t offset,
+ uint32_t flags,
+ struct iobref *iobref, dict_t *xdata);
+
+int32_t dht_flush (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd, dict_t *xdata);
+
+int32_t dht_fsync (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t datasync, dict_t *xdata);
+
+int32_t dht_opendir (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, fd_t *fd, dict_t *xdata);
+
+int32_t dht_fsyncdir (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t datasync, dict_t *xdata);
+
+int32_t dht_statfs (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, dict_t *xdata);
+
+int32_t dht_setxattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *dict,
+ int32_t flags, dict_t *xdata);
+
+int32_t dht_getxattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name, dict_t *xdata);
+
+int32_t dht_fsetxattr (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ dict_t *dict,
+ int32_t flags, dict_t *xdata);
+
+int32_t dht_fgetxattr (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ const char *name, dict_t *xdata);
+
+int32_t dht_removexattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name, dict_t *xdata);
+int32_t dht_fremovexattr (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ const char *name, dict_t *xdata);
+
+int32_t dht_lk (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
+
+int32_t dht_lease (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ struct gf_lease *lease, dict_t *xdata);
+
+int32_t dht_inodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
+
+int32_t dht_finodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
+
+int32_t dht_entrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
+
+int32_t dht_fentrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
+
+int32_t dht_readdir (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size, off_t off, dict_t *xdata);
+
+int32_t dht_readdirp (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size, off_t off, dict_t *dict);
+
+int32_t dht_xattrop (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ gf_xattrop_flags_t flags,
+ dict_t *dict, dict_t *xdata);
+
+int32_t dht_fxattrop (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ gf_xattrop_flags_t flags,
+ dict_t *dict, dict_t *xdata);
+
+int32_t dht_forget (xlator_t *this, inode_t *inode);
+int32_t dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+int32_t dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+int32_t dht_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t mode, off_t offset, size_t len, dict_t *xdata);
+int32_t dht_discard(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata);
+int32_t dht_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, off_t len, dict_t *xdata);
+
+int
+dht_set_subvol_range(xlator_t *this);
+int32_t dht_init (xlator_t *this);
+void dht_fini (xlator_t *this);
+int dht_reconfigure (xlator_t *this, dict_t *options);
+int32_t dht_notify (xlator_t *this, int32_t event, void *data, ...);
+
+/* definitions for nufa/switch */
+int dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent);
+int dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent);
+int dht_lookup_linkfile_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent);
+int dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent);
+int dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ fd_t *fd, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata);
+int dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int
+gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict);
+
+void
+gf_defrag_set_pause_state (gf_tier_conf_t *tier_conf, tier_pause_state_t state);
+
+tier_pause_state_t
+gf_defrag_get_pause_state (gf_tier_conf_t *tier_conf);
+
+int
+gf_defrag_pause_tier (xlator_t *this, gf_defrag_info_t *defrag);
+
+tier_pause_state_t
+gf_defrag_check_pause_tier (gf_tier_conf_t *defrag);
+
+int
+gf_defrag_resume_tier (xlator_t *this, gf_defrag_info_t *defrag);
+
+int
+gf_defrag_start_detach_tier (gf_defrag_info_t *defrag);
+
+int
+gf_defrag_stop (gf_defrag_info_t *defrag, gf_defrag_status_t status,
+ dict_t *output);
+
+void*
+gf_defrag_start (void *this);
+
+int32_t
+gf_defrag_handle_hardlink (xlator_t *this, loc_t *loc, dict_t *xattrs,
+ struct iatt *stbuf);
+int
+dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
+ int flag);
+int
+dht_inode_ctx_layout_get (inode_t *inode, xlator_t *this,
+ dht_layout_t **layout_int);
+int
+dht_inode_ctx_layout_set (inode_t *inode, xlator_t *this,
+ dht_layout_t* layout_int);
+int
+dht_inode_ctx_time_update (inode_t *inode, xlator_t *this, struct iatt *stat,
+ int32_t update_ctx);
+void dht_inode_ctx_time_set (inode_t *inode, xlator_t *this, struct iatt *stat);
+
+int dht_inode_ctx_get (inode_t *inode, xlator_t *this, dht_inode_ctx_t **ctx);
+int dht_inode_ctx_set (inode_t *inode, xlator_t *this, dht_inode_ctx_t *ctx);
+int
+dht_dir_attr_heal (void *data);
+int
+dht_dir_attr_heal_done (int ret, call_frame_t *sync_frame, void *data);
+int
+dht_dir_has_layout (dict_t *xattr, char *name);
+gf_boolean_t
+dht_is_subvol_in_layout (dht_layout_t *layout, xlator_t *xlator);
+xlator_t *
+dht_subvol_with_free_space_inodes (xlator_t *this, xlator_t *subvol,
+ dht_layout_t *layout);
+xlator_t *
+dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol,
+ dht_layout_t *layout);
+int
+dht_linkfile_attr_heal (call_frame_t *frame, xlator_t *this);
+
+void
+dht_layout_dump (dht_layout_t *layout, const char *prefix);
+int32_t
+dht_priv_dump (xlator_t *this);
+int32_t
+dht_inodectx_dump (xlator_t *this, inode_t *inode);
+
+int
+dht_inode_ctx_get_mig_info (xlator_t *this, inode_t *inode,
+ xlator_t **src_subvol, xlator_t **dst_subvol);
+gf_boolean_t
+dht_mig_info_is_invalid (xlator_t *current, xlator_t *src_subvol,
+ xlator_t *dst_subvol);
+
+int
+dht_subvol_status (dht_conf_t *conf, xlator_t *subvol);
+
+void
+dht_log_new_layout_for_dir_selfheal (xlator_t *this, loc_t *loc,
+ dht_layout_t *layout);
+int
+dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this);
+
+int
+dht_fill_dict_to_avoid_unlink_of_migrating_file (dict_t *dict);
+
+
+/* Acquire non-blocking inodelk on a list of xlators.
+ *
+ * @lk_array: array of lock requests lock on.
+ *
+ * @lk_count: number of locks in @lk_array
+ *
+ * @inodelk_cbk: will be called after inodelk replies are received
+ *
+ * @retval: -1 if stack_winding inodelk fails. 0 otherwise.
+ * inodelk_cbk is called with appropriate error on errors.
+ * On failure to acquire lock on all members of list, successful
+ * locks are unlocked before invoking cbk.
+ */
+
+int
+dht_nonblocking_inodelk (call_frame_t *frame, dht_lock_t **lk_array,
+ int lk_count, fop_inodelk_cbk_t inodelk_cbk);
+
+/* same as dht_nonblocking_inodelk, but issues sequential blocking locks on
+ * @lk_array directly. locks are issued on some order which remains same
+ * for a list of xlators (irrespective of order of xlators within list).
+ */
+int
+dht_blocking_inodelk (call_frame_t *frame, dht_lock_t **lk_array,
+ int lk_count, dht_reaction_type_t reaction,
+ fop_inodelk_cbk_t inodelk_cbk);
+
+int32_t
+dht_unlock_inodelk (call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
+ fop_inodelk_cbk_t inodelk_cbk);
+
+dht_lock_t *
+dht_lock_new (xlator_t *this, xlator_t *xl, loc_t *loc, short type,
+ const char *domain);
+void
+dht_lock_array_free (dht_lock_t **lk_array, int count);
+
+int32_t
+dht_lock_count (dht_lock_t **lk_array, int lk_count);
+
+int
+dht_layout_sort (dht_layout_t *layout);
+
+int
+dht_heal_full_path (void *data);
+
+int
+dht_heal_full_path_done (int op_ret, call_frame_t *frame, void *data);
+
+int
+dht_layout_missing_dirs (dht_layout_t *layout);
+
+int
+dht_refresh_layout (call_frame_t *frame);
+
+gf_boolean_t
+dht_is_tier_xlator (xlator_t *this);
+
+int
+dht_build_parent_loc (xlator_t *this, loc_t *parent, loc_t *child,
+ int32_t *op_errno);
+
+int32_t
+dht_set_local_rebalance (xlator_t *this, dht_local_t *local,
+ struct iatt *stbuf,
+ struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+void
+dht_build_root_loc (inode_t *inode, loc_t *loc);
+
+gf_boolean_t
+dht_fd_open_on_dst (xlator_t *this, fd_t *fd, xlator_t *dst);
+
+int32_t
+dht_fd_ctx_destroy (xlator_t *this, fd_t *fd);
+
+int32_t
+dht_release (xlator_t *this, fd_t *fd);
+
+
+int32_t
+dht_set_fixed_dir_stat (struct iatt *stat);
+
+xlator_t*
+dht_get_lock_subvolume (xlator_t *this, struct gf_flock *lock,
+ dht_local_t *local);
+
+int
+dht_lk_inode_unref (call_frame_t *frame, int32_t op_ret);
+
+#endif/* _DHT_H */
diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c
index 51769c04296..1eb9e63c531 100644
--- a/xlators/cluster/dht/src/dht-diskusage.c
+++ b/xlators/cluster/dht/src/dht-diskusage.c
@@ -1,84 +1,106 @@
/*
- Copyright (c) 2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
/* TODO: add NS locking */
#include "glusterfs.h"
#include "xlator.h"
#include "dht-common.h"
+#include "dht-messages.h"
#include "defaults.h"
#include <sys/time.h>
-int
+int
dht_du_info_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct statvfs *statvfs)
+ int op_ret, int op_errno, struct statvfs *statvfs,
+ dict_t *xdata)
{
dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
+ call_frame_t *prev = NULL;
int this_call_cnt = 0;
- int i = 0;
- double percent = 0;
- uint64_t bytes = 0;
+ int i = 0;
+ double percent = 0;
+ double percent_inodes = 0;
+ uint64_t bytes = 0;
+ uint32_t bpc; /* blocks per chunk */
+ uint32_t chunks = 0;
- local = frame->local;
- conf = this->private;
- prev = cookie;
-
- if (op_ret == -1)
- goto out;
-
- if (statvfs && statvfs->f_blocks) {
- percent = (statvfs->f_bfree * 100) / statvfs->f_blocks;
- bytes = (statvfs->f_bfree * statvfs->f_bsize);
- }
-
- LOCK (&conf->subvolume_lock);
- {
- for (i = 0; i < conf->subvolume_cnt; i++)
- if (prev->this == conf->subvolumes[i]) {
- conf->du_stats[i].avail_percent = percent;
- conf->du_stats[i].avail_space = bytes;
- gf_log (this->name, GF_LOG_DEBUG,
- "on subvolume '%s': avail_percent is: "
- "%.2f and avail_space is: %"PRIu64"",
- prev->this->name,
- conf->du_stats[i].avail_percent,
- conf->du_stats[i].avail_space);
- }
- }
- UNLOCK (&conf->subvolume_lock);
-
- out:
+ conf = this->private;
+ prev = cookie;
+
+ if (op_ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_GET_DISK_INFO_ERROR,
+ "failed to get disk info from %s", prev->this->name);
+ goto out;
+ }
+
+ if (statvfs && statvfs->f_blocks) {
+ percent = (statvfs->f_bavail * 100) / statvfs->f_blocks;
+ bytes = (statvfs->f_bavail * statvfs->f_frsize);
+ /*
+ * A 32-bit count of 1MB chunks allows a maximum brick size of
+ * ~4PB. It's possible that we could see a single local FS
+ * bigger than that some day, but this code is likely to be
+ * irrelevant by then. Meanwhile, it's more important to keep
+ * the chunk size small so the layout-calculation code that
+ * uses this value can be tested on normal machines.
+ */
+ bpc = (1 << 20) / statvfs->f_bsize;
+ chunks = (statvfs->f_blocks + bpc - 1) / bpc;
+ }
+
+ if (statvfs && statvfs->f_files) {
+ percent_inodes = (statvfs->f_ffree * 100) / statvfs->f_files;
+ } else {
+ /*
+ * Set percent inodes to 100 for dynamically allocated inode
+ * filesystems. The rationale is that distribute need not
+ * worry about total inodes; rather, let the 'create()' be
+ * scheduled on the hashed subvol regardless of the total
+ * inodes.
+ */
+ percent_inodes = 100;
+ }
+
+ LOCK (&conf->subvolume_lock);
+ {
+ for (i = 0; i < conf->subvolume_cnt; i++)
+ if (prev->this == conf->subvolumes[i]) {
+ conf->du_stats[i].avail_percent = percent;
+ conf->du_stats[i].avail_space = bytes;
+ conf->du_stats[i].avail_inodes = percent_inodes;
+ conf->du_stats[i].chunks = chunks;
+ gf_msg_debug (this->name, 0,
+ "subvolume '%s': avail_percent "
+ "is: %.2f and avail_space "
+ "is: %" PRIu64" and avail_inodes"
+ " is: %.2f",
+ prev->this->name,
+ conf->du_stats[i].avail_percent,
+ conf->du_stats[i].avail_space,
+ conf->du_stats[i].avail_inodes);
+ break; /* no point in looping further */
+ }
+ }
+ UNLOCK (&conf->subvolume_lock);
+
+out:
this_call_cnt = dht_frame_return (frame);
if (is_last_call (this_call_cnt))
DHT_STACK_DESTROY (frame);
- return 0;
+ return 0;
}
int
@@ -87,177 +109,352 @@ dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx)
dht_conf_t *conf = NULL;
call_frame_t *statfs_frame = NULL;
dht_local_t *statfs_local = NULL;
- call_pool_t *pool = NULL;
+ call_pool_t *pool = NULL;
+ loc_t tmp_loc = {0,};
conf = this->private;
- pool = this->ctx->pool;
-
- statfs_frame = create_frame (this, pool);
- if (!statfs_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- statfs_local = dht_local_init (statfs_frame);
- if (!statfs_local) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- loc_t tmp_loc = { .inode = NULL,
- .path = "/",
- };
-
- statfs_local->call_cnt = 1;
- STACK_WIND (statfs_frame, dht_du_info_cbk,
- conf->subvolumes[subvol_idx],
- conf->subvolumes[subvol_idx]->fops->statfs,
- &tmp_loc);
-
- return 0;
- err:
+ pool = this->ctx->pool;
+
+ statfs_frame = create_frame (this, pool);
+ if (!statfs_frame) {
+ goto err;
+ }
+
+ /* local->fop value is not used in this case */
+ statfs_local = dht_local_init (statfs_frame, NULL, NULL,
+ GF_FOP_MAXVALUE);
+ if (!statfs_local) {
+ goto err;
+ }
+
+ /* make it root gfid, should be enough to get the proper info back */
+ tmp_loc.gfid[15] = 1;
+
+ statfs_local->call_cnt = 1;
+ STACK_WIND (statfs_frame, dht_du_info_cbk,
+ conf->subvolumes[subvol_idx],
+ conf->subvolumes[subvol_idx]->fops->statfs,
+ &tmp_loc, NULL);
+
+ return 0;
+err:
if (statfs_frame)
DHT_STACK_DESTROY (statfs_frame);
-
- return -1;
+
+ return -1;
}
int
dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc)
{
- int i = 0;
+ int i = 0;
+ int ret = -1;
dht_conf_t *conf = NULL;
call_frame_t *statfs_frame = NULL;
dht_local_t *statfs_local = NULL;
- struct timeval tv = {0,};
+ struct timeval tv = {0,};
+ loc_t tmp_loc = {0,};
conf = this->private;
gettimeofday (&tv, NULL);
- if (tv.tv_sec > (conf->refresh_interval
+
+ /* make it root gfid, should be enough to get the proper
+ info back */
+ tmp_loc.gfid[15] = 1;
+
+ if (tv.tv_sec > (conf->refresh_interval
+ conf->last_stat_fetch.tv_sec)) {
- statfs_frame = copy_frame (frame);
- if (!statfs_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ statfs_frame = copy_frame (frame);
+ if (!statfs_frame) {
+ goto err;
+ }
+
+ /* In this case, 'local->fop' is not used */
+ statfs_local = dht_local_init (statfs_frame, loc, NULL,
+ GF_FOP_MAXVALUE);
+ if (!statfs_local) {
+ goto err;
+ }
- statfs_local = dht_local_init (statfs_frame);
- if (!statfs_local) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
+ statfs_local->params = dict_new ();
+ if (!statfs_local->params)
goto err;
- }
- loc_copy (&statfs_local->loc, loc);
- loc_t tmp_loc = { .inode = NULL,
- .path = "/",
- };
-
- statfs_local->call_cnt = conf->subvolume_cnt;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (statfs_frame, dht_du_info_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->statfs,
- &tmp_loc);
+ ret = dict_set_int8 (statfs_local->params,
+ GF_INTERNAL_IGNORE_DEEM_STATFS, 1);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_DICT_SET_FAILED,
+ "Failed to set "
+ GF_INTERNAL_IGNORE_DEEM_STATFS" in dict");
+ goto err;
}
- conf->last_stat_fetch.tv_sec = tv.tv_sec;
- }
- return 0;
+ statfs_local->call_cnt = conf->subvolume_cnt;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND (statfs_frame, dht_du_info_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->statfs,
+ &tmp_loc, statfs_local->params);
+ }
+
+ conf->last_stat_fetch.tv_sec = tv.tv_sec;
+ }
+ return 0;
err:
if (statfs_frame)
DHT_STACK_DESTROY (statfs_frame);
- return -1;
+ return -1;
}
-int
+gf_boolean_t
dht_is_subvol_filled (xlator_t *this, xlator_t *subvol)
{
- int i = 0;
- int subvol_filled = 0;
+ int i = 0;
dht_conf_t *conf = NULL;
+ gf_boolean_t subvol_filled_inodes = _gf_false;
+ gf_boolean_t subvol_filled_space = _gf_false;
+ gf_boolean_t is_subvol_filled = _gf_false;
- conf = this->private;
+ conf = this->private;
+
+ /* Check for values above specified percent or free disk */
+ LOCK (&conf->subvolume_lock);
+ {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (subvol == conf->subvolumes[i]) {
+ if (conf->disk_unit == 'p') {
+ if (conf->du_stats[i].avail_percent <
+ conf->min_free_disk) {
+ subvol_filled_space = _gf_true;
+ break;
+ }
+
+ } else {
+ if (conf->du_stats[i].avail_space <
+ conf->min_free_disk) {
+ subvol_filled_space = _gf_true;
+ break;
+ }
+ }
+ if (conf->du_stats[i].avail_inodes <
+ conf->min_free_inodes) {
+ subvol_filled_inodes = _gf_true;
+ break;
+ }
+ }
+ }
+ }
+ UNLOCK (&conf->subvolume_lock);
+
+ if (subvol_filled_space && conf->subvolume_status[i]) {
+ if (!(conf->du_stats[i].log++ % (GF_UNIVERSAL_ANSWER * 10))) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_SUBVOL_INSUFF_SPACE,
+ "disk space on subvolume '%s' is getting "
+ "full (%.2f %%), consider adding more bricks",
+ subvol->name,
+ (100 - conf->du_stats[i].avail_percent));
+ }
+ }
+
+ if (subvol_filled_inodes && conf->subvolume_status[i]) {
+ if (!(conf->du_stats[i].log++ % (GF_UNIVERSAL_ANSWER * 10))) {
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ DHT_MSG_SUBVOL_INSUFF_INODES,
+ "inodes on subvolume '%s' are at "
+ "(%.2f %%), consider adding more bricks",
+ subvol->name,
+ (100 - conf->du_stats[i].avail_inodes));
+ }
+ }
+
+ is_subvol_filled = (subvol_filled_space || subvol_filled_inodes);
+
+ return is_subvol_filled;
+}
+
+
+/*Get the best subvolume to create the file in*/
+xlator_t *
+dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol,
+ dht_local_t *local)
+{
+ xlator_t *avail_subvol = NULL;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *layout = NULL;
+ loc_t *loc = NULL;
+
+ conf = this->private;
+ if (!local)
+ goto out;
+ loc = &local->loc;
+ if (!local->layout) {
+ layout = dht_layout_get (this, loc->parent);
+
+ if (!layout) {
+ gf_msg_debug (this->name, 0,
+ "Missing layout. path=%s,"
+ " parent gfid = %s", loc->path,
+ uuid_utoa (loc->parent->gfid));
+ goto out;
+ }
+ } else {
+ layout = dht_layout_ref (this, local->layout);
+ }
- /* Check for values above specified percent or free disk */
LOCK (&conf->subvolume_lock);
- {
+ {
+ avail_subvol = dht_subvol_with_free_space_inodes(this, subvol,
+ layout);
+ if(!avail_subvol)
+ {
+ avail_subvol = dht_subvol_maxspace_nonzeroinode(this,
+ subvol,
+ layout);
+ }
+
+ }
+ UNLOCK (&conf->subvolume_lock);
+out:
+ if (!avail_subvol) {
+ gf_msg_debug (this->name, 0,
+ "No subvolume has enough free space \
+ and/or inodes to create");
+ avail_subvol = subvol;
+ }
+
+ if (layout)
+ dht_layout_unref (this, layout);
+ return avail_subvol;
+}
+
+static inline
+int32_t dht_subvol_has_err (dht_conf_t *conf, xlator_t *this,
+ dht_layout_t *layout)
+{
+ int ret = -1;
+ int i = 0;
+
+ if (!this || !layout)
+ goto out;
+
+ /* check if subvol has layout errors, before selecting it */
+ for (i = 0; i < layout->cnt; i++) {
+ if (!strcmp (layout->list[i].xlator->name, this->name) &&
+ (layout->list[i].err != 0)) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ /* discard decommissioned subvol */
+ if (conf->decommission_subvols_cnt) {
for (i = 0; i < conf->subvolume_cnt; i++) {
- if (subvol == conf->subvolumes[i]) {
- if (conf->disk_unit == 'p') {
- if (conf->du_stats[i].avail_percent <
- conf->min_free_disk) {
- subvol_filled = 1;
- break;
- }
- } else {
- if (conf->du_stats[i].avail_space <
- conf->min_free_disk) {
- subvol_filled = 1;
- break;
- }
- }
+ if (conf->decommissioned_bricks[i] &&
+ conf->decommissioned_bricks[i] == this) {
+ ret = -1;
+ goto out;
}
}
}
- UNLOCK (&conf->subvolume_lock);
-
- if (subvol_filled && conf->subvolume_status[i]) {
- if (!(conf->du_stats[i].log++ % (GF_UNIVERSAL_ANSWER * 10))) {
- gf_log (this->name, GF_LOG_WARNING,
- "disk space on subvolume '%s' is getting "
- "full (%.2f %%), consider adding more nodes",
- subvol->name,
- (100 - conf->du_stats[i].avail_percent));
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/*Get subvolume which has both space and inodes more than the min criteria*/
+xlator_t *
+dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol,
+ dht_layout_t *layout)
+{
+ int i = 0;
+ double max = 0;
+ double max_inodes = 0;
+ int ignore_subvol = 0;
+
+ xlator_t *avail_subvol = NULL;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ for(i=0; i < conf->subvolume_cnt; i++) {
+ /* check if subvol has layout errors and also it is not a
+ * decommissioned brick, before selecting it */
+ ignore_subvol = dht_subvol_has_err (conf, conf->subvolumes[i],
+ layout);
+ if (ignore_subvol)
+ continue;
+
+ if ((conf->disk_unit == 'p') &&
+ (conf->du_stats[i].avail_percent > conf->min_free_disk) &&
+ (conf->du_stats[i].avail_inodes > conf->min_free_inodes)) {
+ if ((conf->du_stats[i].avail_inodes > max_inodes) ||
+ (conf->du_stats[i].avail_percent > max)) {
+ max = conf->du_stats[i].avail_percent;
+ max_inodes = conf->du_stats[i].avail_inodes;
+ avail_subvol = conf->subvolumes[i];
+ }
+ }
+
+ if ((conf->disk_unit != 'p') &&
+ (conf->du_stats[i].avail_space > conf->min_free_disk) &&
+ (conf->du_stats[i].avail_inodes > conf->min_free_inodes)) {
+ if ((conf->du_stats[i].avail_inodes > max_inodes) ||
+ (conf->du_stats[i].avail_space > max)) {
+ max = conf->du_stats[i].avail_space;
+ max_inodes = conf->du_stats[i].avail_inodes;
+ avail_subvol = conf->subvolumes[i];
+ }
}
}
- return subvol_filled;
+ return avail_subvol;
}
+
+/* Get subvol which has atleast one inode and maximum space */
xlator_t *
-dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol)
+dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol,
+ dht_layout_t *layout)
{
int i = 0;
- double max= 0;
+ double max = 0;
+ int ignore_subvol = 0;
+
xlator_t *avail_subvol = NULL;
- dht_conf_t *conf = NULL;
+ dht_conf_t *conf = NULL;
conf = this->private;
- avail_subvol = subvol;
- LOCK (&conf->subvolume_lock);
- {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->disk_unit == 'p') {
- if (conf->du_stats[i].avail_percent > max) {
- max = conf->du_stats[i].avail_percent;
- avail_subvol = conf->subvolumes[i];
- }
- } else {
- if (conf->du_stats[i].avail_space > max) {
- max = conf->du_stats[i].avail_space;
- avail_subvol = conf->subvolumes[i];
- }
- }
- }
- }
- UNLOCK (&conf->subvolume_lock);
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ /* check if subvol has layout errors and also it is not a
+ * decommissioned brick, before selecting it*/
- if (max < conf->min_free_disk)
- avail_subvol = subvol;
+ ignore_subvol = dht_subvol_has_err (conf, conf->subvolumes[i],
+ layout);
+ if (ignore_subvol)
+ continue;
- if (avail_subvol == subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume has enough free space to create");
+ if (conf->disk_unit == 'p') {
+ if ((conf->du_stats[i].avail_percent > max)
+ && (conf->du_stats[i].avail_inodes > 0 )) {
+ max = conf->du_stats[i].avail_percent;
+ avail_subvol = conf->subvolumes[i];
+ }
+ } else {
+ if ((conf->du_stats[i].avail_space > max)
+ && (conf->du_stats[i].avail_inodes > 0)) {
+ max = conf->du_stats[i].avail_space;
+ avail_subvol = conf->subvolumes[i];
+ }
+ }
}
-
+
return avail_subvol;
}
diff --git a/xlators/cluster/dht/src/dht-hashfn.c b/xlators/cluster/dht/src/dht-hashfn.c
index dfc1541fa1e..66e3ede736b 100644
--- a/xlators/cluster/dht/src/dht-hashfn.c
+++ b/xlators/cluster/dht/src/dht-hashfn.c
@@ -1,26 +1,12 @@
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include "glusterfs.h"
@@ -32,50 +18,82 @@
int
dht_hash_compute_internal (int type, const char *name, uint32_t *hash_p)
{
- int ret = 0;
- uint32_t hash = 0;
-
- switch (type) {
- case DHT_HASH_TYPE_DM:
- hash = gf_dm_hashfn (name, strlen (name));
- break;
- default:
- ret = -1;
- break;
- }
-
- if (ret == 0) {
- *hash_p = hash;
- }
-
- return ret;
+ int ret = 0;
+ uint32_t hash = 0;
+
+ switch (type) {
+ case DHT_HASH_TYPE_DM:
+ case DHT_HASH_TYPE_DM_USER:
+ hash = gf_dm_hashfn (name, strlen (name));
+ break;
+ default:
+ ret = -1;
+ break;
+ }
+
+ if (ret == 0) {
+ *hash_p = hash;
+ }
+
+ return ret;
}
-#define MAKE_RSYNC_FRIENDLY_NAME(rsync_frndly_name, name) do { \
- rsync_frndly_name = (char *) name; \
- if (name[0] == '.') { \
- char *dot = 0; \
- int namelen = 0; \
- \
- dot = strrchr (name, '.'); \
- if (dot && dot > (name + 1) && *(dot + 1)) { \
- namelen = (dot - name); \
- rsync_frndly_name = alloca (namelen); \
- strncpy (rsync_frndly_name, name + 1, \
- namelen); \
- rsync_frndly_name[namelen - 1] = 0; \
- } \
- } \
- } while (0);
-
+static
+gf_boolean_t
+dht_munge_name (const char *original, char *modified, size_t len, regex_t *re)
+{
+ regmatch_t matches[2];
+ size_t new_len;
+
+ if (regexec(re,original,2,matches,0) != REG_NOMATCH) {
+ if (matches[1].rm_so != -1) {
+ new_len = matches[1].rm_eo - matches[1].rm_so;
+ /* Equal would fail due to the NUL at the end. */
+ if (new_len < len) {
+ memcpy (modified,original+matches[1].rm_so,
+ new_len);
+ modified[new_len] = '\0';
+ return _gf_true;
+ }
+ }
+ }
+
+ /* This is guaranteed safe because of how the dest was allocated. */
+ strcpy(modified,original);
+ return _gf_false;
+}
int
-dht_hash_compute (int type, const char *name, uint32_t *hash_p)
+dht_hash_compute (xlator_t *this, int type, const char *name, uint32_t *hash_p)
{
- char *rsync_friendly_name = NULL;
-
- MAKE_RSYNC_FRIENDLY_NAME (rsync_friendly_name, name);
-
- return dht_hash_compute_internal (type, rsync_friendly_name, hash_p);
+ char *rsync_friendly_name = NULL;
+ dht_conf_t *priv = this->private;
+ size_t len = 0;
+ gf_boolean_t munged = _gf_false;
+
+ if (priv->extra_regex_valid) {
+ len = strlen(name) + 1;
+ rsync_friendly_name = alloca(len);
+ munged = dht_munge_name (name, rsync_friendly_name, len,
+ &priv->extra_regex);
+ }
+
+ if (!munged && priv->rsync_regex_valid) {
+ len = strlen(name) + 1;
+ rsync_friendly_name = alloca(len);
+ gf_msg_trace (this->name, 0, "trying regex for %s", name);
+ munged = dht_munge_name (name, rsync_friendly_name, len,
+ &priv->rsync_regex);
+ if (munged) {
+ gf_msg_debug (this->name, 0,
+ "munged down to %s", rsync_friendly_name);
+ }
+ }
+
+ if (!munged) {
+ rsync_friendly_name = (char *)name;
+ }
+
+ return dht_hash_compute_internal (type, rsync_friendly_name, hash_p);
}
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
index 767be38b41c..590d0043507 100644
--- a/xlators/cluster/dht/src/dht-helper.c
+++ b/xlators/cluster/dht/src/dht-helper.c
@@ -1,212 +1,711 @@
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include "glusterfs.h"
#include "xlator.h"
#include "dht-common.h"
+#include "dht-helper.h"
+
+
+void
+dht_free_fd_ctx (void *data)
+{
+ dht_fd_ctx_t *fd_ctx = NULL;
+
+ fd_ctx = (dht_fd_ctx_t *)data;
+ GF_FREE (fd_ctx);
+
+ return;
+}
+
+
+int32_t
+dht_fd_ctx_destroy (xlator_t *this, fd_t *fd)
+{
+ dht_fd_ctx_t *fd_ctx = NULL;
+ uint64_t value = 0;
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ ret = fd_ctx_del (fd, this, &value);
+ if (ret) {
+ goto out;
+ }
+
+ fd_ctx = (dht_fd_ctx_t *)value;
+ if (fd_ctx) {
+ GF_REF_PUT (fd_ctx);
+ }
+out:
+ return ret;
+}
+
+
+static int
+__dht_fd_ctx_set (xlator_t *this, fd_t *fd, xlator_t *dst)
+{
+ dht_fd_ctx_t *fd_ctx = NULL;
+ uint64_t value = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ fd_ctx = GF_CALLOC (1, sizeof (*fd_ctx), gf_dht_mt_fd_ctx_t);
+
+ if (!fd_ctx) {
+ goto out;
+ }
+
+ fd_ctx->opened_on_dst = (uint64_t) dst;
+ GF_REF_INIT (fd_ctx, dht_free_fd_ctx);
+
+ value = (uint64_t) fd_ctx;
+
+ ret = __fd_ctx_set (fd, this, value);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_FD_CTX_SET_FAILED,
+ "Failed to set fd ctx in fd=0x%p", fd);
+ GF_REF_PUT (fd_ctx);
+ }
+out:
+ return ret;
+}
+
int
-dht_frame_return (call_frame_t *frame)
+dht_fd_ctx_set (xlator_t *this, fd_t *fd, xlator_t *dst)
+{
+ dht_fd_ctx_t *fd_ctx = NULL;
+ uint64_t value = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ LOCK (&fd->lock);
+ {
+ ret = __fd_ctx_get (fd, this, &value);
+ if (ret && value) {
+
+ fd_ctx = (dht_fd_ctx_t *) value;
+ if (fd_ctx->opened_on_dst == (uint64_t) dst) {
+ /* This could happen due to racing
+ * check_progress tasks*/
+ goto unlock;
+ } else {
+ /* This would be a big problem*/
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_INVALID_VALUE,
+ "Different dst found in the fd ctx");
+
+ /* Overwrite and hope for the best*/
+ fd_ctx->opened_on_dst = (uint64_t)dst;
+ goto unlock;
+ }
+
+ }
+ ret = __dht_fd_ctx_set (this, fd, dst);
+ }
+unlock:
+ UNLOCK (&fd->lock);
+out:
+ return ret;
+}
+
+
+
+static
+dht_fd_ctx_t *
+dht_fd_ctx_get (xlator_t *this, fd_t *fd)
+{
+ dht_fd_ctx_t *fd_ctx = NULL;
+ int ret = -1;
+ uint64_t tmp_val = 0;
+
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ LOCK (&fd->lock);
+ {
+ ret = __fd_ctx_get (fd, this, &tmp_val);
+ if ((ret < 0) || (tmp_val == 0)) {
+ UNLOCK (&fd->lock);
+ goto out;
+ }
+
+ fd_ctx = (dht_fd_ctx_t *)tmp_val;
+ GF_REF_GET (fd_ctx);
+ }
+ UNLOCK (&fd->lock);
+
+out:
+ return fd_ctx;
+}
+
+gf_boolean_t
+dht_fd_open_on_dst (xlator_t *this, fd_t *fd, xlator_t *dst)
{
- dht_local_t *local = NULL;
- int this_call_cnt = -1;
+ dht_fd_ctx_t *fd_ctx = NULL;
+ gf_boolean_t opened = _gf_false;
+
+ fd_ctx = dht_fd_ctx_get (this, fd);
+
+ if (fd_ctx) {
+ if (fd_ctx->opened_on_dst == (uint64_t) dst) {
+ opened = _gf_true;
+ }
+ GF_REF_PUT (fd_ctx);
+ }
+
+ return opened;
+}
- if (!frame)
- return -1;
- local = frame->local;
+void
+dht_free_mig_info (void *data)
+{
+ dht_migrate_info_t *miginfo = NULL;
- LOCK (&frame->lock);
- {
- this_call_cnt = --local->call_cnt;
- }
- UNLOCK (&frame->lock);
+ miginfo = data;
+ GF_FREE (miginfo);
- return this_call_cnt;
+ return;
+}
+
+static int
+dht_inode_ctx_set_mig_info (xlator_t *this, inode_t *inode,
+ xlator_t *src_subvol, xlator_t *dst_subvol)
+{
+ dht_migrate_info_t *miginfo = NULL;
+ uint64_t value = 0;
+ int ret = -1;
+
+ miginfo = GF_CALLOC (1, sizeof (*miginfo), gf_dht_mt_miginfo_t);
+ if (miginfo == NULL)
+ goto out;
+
+ miginfo->src_subvol = src_subvol;
+ miginfo->dst_subvol = dst_subvol;
+ GF_REF_INIT (miginfo, dht_free_mig_info);
+
+ value = (uint64_t) miginfo;
+
+ ret = inode_ctx_set1 (inode, this, &value);
+ if (ret < 0) {
+ GF_REF_PUT (miginfo);
+ }
+
+out:
+ return ret;
}
int
-dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y_p)
+dht_inode_ctx_get_mig_info (xlator_t *this, inode_t *inode,
+ xlator_t **src_subvol, xlator_t **dst_subvol)
{
- dht_conf_t *conf = NULL;
- int cnt = 0;
- int max = 0;
- uint64_t y = 0;
+ int ret = -1;
+ uint64_t tmp_miginfo = 0;
+ dht_migrate_info_t *miginfo = NULL;
+ LOCK (&inode->lock);
+ {
+ ret = __inode_ctx_get1 (inode, this, &tmp_miginfo);
+ if ((ret < 0) || (tmp_miginfo == 0)) {
+ UNLOCK (&inode->lock);
+ goto out;
+ }
- if (x == ((uint64_t) -1)) {
- y = (uint64_t) -1;
- goto out;
- }
+ miginfo = (dht_migrate_info_t *)tmp_miginfo;
+ GF_REF_GET (miginfo);
+ }
+ UNLOCK (&inode->lock);
- conf = this->private;
+ if (src_subvol)
+ *src_subvol = miginfo->src_subvol;
- max = conf->subvolume_cnt;
- cnt = dht_subvol_cnt (this, subvol);
+ if (dst_subvol)
+ *dst_subvol = miginfo->dst_subvol;
- y = ((x * max) + cnt);
+ GF_REF_PUT (miginfo);
out:
- if (y_p)
- *y_p = y;
+ return ret;
+}
- return 0;
+gf_boolean_t
+dht_mig_info_is_invalid (xlator_t *current, xlator_t *src_subvol,
+ xlator_t *dst_subvol)
+{
+
+/* Not set
+ */
+ if (!src_subvol || !dst_subvol)
+ return _gf_true;
+
+/* Invalid scenarios:
+ * The src_subvol does not match the subvol on which the current op was sent
+ * so the cached subvol has changed between the last mig_info_set and now.
+ * src_subvol == dst_subvol. The file was migrated without any FOP detecting
+ * a P2 so the old dst is now the current subvol.
+ *
+ * There is still one scenario where the info could be outdated - if
+ * file has undergone multiple migrations and ends up on the same src_subvol
+ * on which the mig_info was first set.
+ */
+ if ((current == dst_subvol) || (current != src_subvol))
+ return _gf_true;
+
+ return _gf_false;
+}
+
+int
+dht_frame_return (call_frame_t *frame)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = -1;
+
+ if (!frame)
+ return -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ this_call_cnt = --local->call_cnt;
+ }
+ UNLOCK (&frame->lock);
+
+ return this_call_cnt;
}
int
-dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol_p,
- uint64_t *x_p)
+dht_filter_loc_subvol_key (xlator_t *this, loc_t *loc, loc_t *new_loc,
+ xlator_t **subvol)
{
- dht_conf_t *conf = NULL;
- int cnt = 0;
- int max = 0;
- uint64_t x = 0;
- xlator_t *subvol = 0;
+ char *new_name = NULL;
+ char *new_path = NULL;
+ xlator_list_t *trav = NULL;
+ char key[1024] = {0,};
+ int ret = 0; /* not found */
+
+ /* Why do other tasks if first required 'char' itself is not there */
+ if (!new_loc || !loc || !loc->name || !strchr (loc->name, '@'))
+ goto out;
+ trav = this->children;
+ while (trav) {
+ snprintf (key, 1024, "*@%s:%s", this->name, trav->xlator->name);
+ if (fnmatch (key, loc->name, FNM_NOESCAPE) == 0) {
+ new_name = GF_CALLOC(strlen (loc->name),
+ sizeof (char),
+ gf_common_mt_char);
+ if (!new_name)
+ goto out;
+ if (fnmatch (key, loc->path, FNM_NOESCAPE) == 0) {
+ new_path = GF_CALLOC(strlen (loc->path),
+ sizeof (char),
+ gf_common_mt_char);
+ if (!new_path)
+ goto out;
+ strncpy (new_path, loc->path, (strlen (loc->path) -
+ strlen (key) + 1));
+ }
+ strncpy (new_name, loc->name, (strlen (loc->name) -
+ strlen (key) + 1));
+
+ if (new_loc) {
+ new_loc->path = ((new_path) ? new_path:
+ gf_strdup (loc->path));
+ new_loc->name = new_name;
+ new_loc->inode = inode_ref (loc->inode);
+ new_loc->parent = inode_ref (loc->parent);
+ }
+ *subvol = trav->xlator;
+ ret = 1; /* success */
+ goto out;
+ }
+ trav = trav->next;
+ }
+out:
+ if (!ret) {
+ /* !success */
+ GF_FREE (new_path);
+ GF_FREE (new_name);
+ }
+ return ret;
+}
+
+static xlator_t *
+dht_get_subvol_from_id(xlator_t *this, int client_id)
+{
+ xlator_t *xl = NULL;
+ dht_conf_t *conf = NULL;
+ char sid[6] = { 0 };
+
+ conf = this->private;
+
+ sprintf(sid, "%d", client_id);
+ if (dict_get_ptr(conf->leaf_to_subvol, sid, (void **) &xl))
+ xl = NULL;
+
+ return xl;
+}
+
+int
+dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol_p)
+{
+ int client_id = 0;
+ xlator_t *subvol = 0;
+ dht_conf_t *conf = NULL;
- conf = this->private;
- max = conf->subvolume_cnt;
+ if (!this->private)
+ return -1;
+
+ conf = this->private;
- cnt = y % max;
- x = y / max;
+ client_id = gf_deitransform(this, y);
- subvol = conf->subvolumes[cnt];
+ subvol = dht_get_subvol_from_id(this, client_id);
- if (subvol_p)
- *subvol_p = subvol;
+ if (!subvol)
+ subvol = conf->subvolumes[0];
- if (x_p)
- *x_p = x;
+ if (subvol_p)
+ *subvol_p = subvol;
- return 0;
+ return 0;
}
+char *
+dht_lock_asprintf (dht_lock_t *lock)
+{
+ char *lk_buf = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0, };
+
+ if (lock == NULL)
+ goto out;
+
+ uuid_utoa_r (lock->loc.gfid, gfid);
+
+ gf_asprintf (&lk_buf, "%s:%s", lock->xl->name, gfid);
+
+out:
+ return lk_buf;
+}
+
+void
+dht_log_lk_array (char *name, gf_loglevel_t log_level, dht_lock_t **lk_array,
+ int count)
+{
+ int i = 0;
+ char *lk_buf = NULL;
+
+ if ((lk_array == NULL) || (count == 0))
+ goto out;
+
+ for (i = 0; i < count; i++) {
+ lk_buf = dht_lock_asprintf (lk_array[i]);
+ gf_msg (name, log_level, 0, DHT_MSG_LK_ARRAY_INFO,
+ "%d. %s", i, lk_buf);
+ GF_FREE (lk_buf);
+ }
+
+out:
+ return;
+}
+
+void
+dht_lock_stack_destroy (call_frame_t *lock_frame)
+{
+ dht_local_t *local = NULL;
+
+ local = lock_frame->local;
+
+ local->lock.locks = NULL;
+ local->lock.lk_count = 0;
+
+ DHT_STACK_DESTROY (lock_frame);
+ return;
+}
+
+void
+dht_lock_free (dht_lock_t *lock)
+{
+ if (lock == NULL)
+ goto out;
+
+ loc_wipe (&lock->loc);
+ GF_FREE (lock->domain);
+ mem_put (lock);
+
+out:
+ return;
+}
+
+void
+dht_lock_array_free (dht_lock_t **lk_array, int count)
+{
+ int i = 0;
+ dht_lock_t *lock = NULL;
+
+ if (lk_array == NULL)
+ goto out;
+
+ for (i = 0; i < count; i++) {
+ lock = lk_array[i];
+ lk_array[i] = NULL;
+ dht_lock_free (lock);
+ }
+
+out:
+ return;
+}
+
+dht_lock_t *
+dht_lock_new (xlator_t *this, xlator_t *xl, loc_t *loc, short type,
+ const char *domain)
+{
+ dht_conf_t *conf = NULL;
+ dht_lock_t *lock = NULL;
+
+ conf = this->private;
+
+ lock = mem_get0 (conf->lock_pool);
+ if (lock == NULL)
+ goto out;
+
+ lock->xl = xl;
+ lock->type = type;
+
+ lock->domain = gf_strdup (domain);
+ if (lock->domain == NULL) {
+ dht_lock_free (lock);
+ lock = NULL;
+ goto out;
+ }
+
+ /* Fill only inode and gfid.
+ posix and protocol/server give preference to pargfid/basename over
+ gfid/inode for resolution if all the three parameters of loc_t are
+ present. I want to avoid the following hypothetical situation:
+
+ 1. rebalance did a lookup on a dentry and got a gfid.
+ 2. rebalance acquires lock on loc_t which was filled with gfid and
+ path (pargfid/bname) from step 1.
+ 3. somebody deleted and recreated the same file
+ 4. rename on the same path acquires lock on loc_t which now points
+ to a different inode (and hence gets the lock).
+ 5. rebalance continues to migrate file (note that not all fops done
+ by rebalance during migration are inode/gfid based Eg., unlink)
+ 6. rename continues.
+ */
+ lock->loc.inode = inode_ref (loc->inode);
+ loc_gfid (loc, lock->loc.gfid);
+
+out:
+ return lock;
+}
+
+int
+dht_local_lock_init (call_frame_t *frame, dht_lock_t **lk_array,
+ int lk_count, fop_inodelk_cbk_t inodelk_cbk)
+{
+ int ret = -1;
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local == NULL) {
+ local = dht_local_init (frame, NULL, NULL, 0);
+ }
+
+ if (local == NULL) {
+ goto out;
+ }
+
+ local->lock.inodelk_cbk = inodelk_cbk;
+ local->lock.locks = lk_array;
+ local->lock.lk_count = lk_count;
+
+ ret = dht_lock_order_requests (local->lock.locks,
+ local->lock.lk_count);
+ if (ret < 0)
+ goto out;
+
+ ret = 0;
+out:
+ return ret;
+}
void
dht_local_wipe (xlator_t *this, dht_local_t *local)
{
- if (!local)
- return;
+ if (!local)
+ return;
- loc_wipe (&local->loc);
- loc_wipe (&local->loc2);
+ loc_wipe (&local->loc);
+ loc_wipe (&local->loc2);
- if (local->xattr)
- dict_unref (local->xattr);
+ if (local->xattr)
+ dict_unref (local->xattr);
- if (local->inode)
- inode_unref (local->inode);
+ if (local->inode)
+ inode_unref (local->inode);
- if (local->layout) {
- dht_layout_unref (this, local->layout);
+ if (local->layout) {
+ dht_layout_unref (this, local->layout);
local->layout = NULL;
}
- loc_wipe (&local->linkfile.loc);
+ loc_wipe (&local->linkfile.loc);
+
+ if (local->linkfile.xattr)
+ dict_unref (local->linkfile.xattr);
- if (local->linkfile.xattr)
- dict_unref (local->linkfile.xattr);
+ if (local->linkfile.inode)
+ inode_unref (local->linkfile.inode);
- if (local->linkfile.inode)
- inode_unref (local->linkfile.inode);
+ if (local->fd) {
+ fd_unref (local->fd);
+ local->fd = NULL;
+ }
- if (local->fd) {
- fd_unref (local->fd);
- local->fd = NULL;
- }
+ if (local->params) {
+ dict_unref (local->params);
+ local->params = NULL;
+ }
- if (local->xattr_req)
- dict_unref (local->xattr_req);
+ if (local->xattr_req)
+ dict_unref (local->xattr_req);
if (local->selfheal.layout) {
dht_layout_unref (this, local->selfheal.layout);
local->selfheal.layout = NULL;
}
- GF_FREE (local);
-}
+ if (local->selfheal.refreshed_layout) {
+ dht_layout_unref (this, local->selfheal.refreshed_layout);
+ local->selfheal.refreshed_layout = NULL;
+ }
+ dht_lock_array_free (local->lock.locks, local->lock.lk_count);
+ GF_FREE (local->lock.locks);
-dht_local_t *
-dht_local_init (call_frame_t *frame)
-{
- dht_local_t *local = NULL;
+ GF_FREE (local->newpath);
- /* TODO: use mem-pool */
- local = GF_CALLOC (1, sizeof (*local),
- gf_dht_mt_dht_local_t);
+ GF_FREE (local->key);
- if (!local)
- return NULL;
+ if (local->rebalance.xdata)
+ dict_unref (local->rebalance.xdata);
- local->op_ret = -1;
- local->op_errno = EUCLEAN;
+ if (local->rebalance.xattr)
+ dict_unref (local->rebalance.xattr);
- frame->local = local;
+ GF_FREE (local->rebalance.vector);
- return local;
+ if (local->rebalance.iobref)
+ iobref_unref (local->rebalance.iobref);
+
+ if (local->stub) {
+ call_stub_destroy (local->stub);
+ local->stub = NULL;
+ }
+
+ mem_put (local);
}
-char *
-basestr (const char *str)
+dht_local_t *
+dht_local_init (call_frame_t *frame, loc_t *loc, fd_t *fd, glusterfs_fop_t fop)
{
- char *basestr = NULL;
+ dht_local_t *local = NULL;
+ inode_t *inode = NULL;
+ int ret = 0;
- basestr = strrchr (str, '/');
- if (basestr)
- basestr ++;
+ local = mem_get0 (THIS->local_pool);
+ if (!local)
+ goto out;
- return basestr;
-}
+ if (loc) {
+ ret = loc_copy (&local->loc, loc);
+ if (ret)
+ goto out;
+
+ inode = loc->inode;
+ }
+
+ if (fd) {
+ local->fd = fd_ref (fd);
+ if (!inode)
+ inode = fd->inode;
+ }
+
+ local->op_ret = -1;
+ local->op_errno = EUCLEAN;
+ local->fop = fop;
+
+ if (inode) {
+ local->layout = dht_layout_get (frame->this, inode);
+ local->cached_subvol = dht_subvol_get_cached (frame->this,
+ inode);
+ }
+ frame->local = local;
+
+out:
+ if (ret) {
+ if (local)
+ mem_put (local);
+ local = NULL;
+ }
+ return local;
+}
xlator_t *
dht_first_up_subvol (xlator_t *this)
{
- dht_conf_t *conf = NULL;
- xlator_t *child = NULL;
- int i = 0;
+ dht_conf_t *conf = NULL;
+ xlator_t *child = NULL;
+ int i = 0;
+ time_t time = 0;
- conf = this->private;
+ conf = this->private;
+ if (!conf)
+ goto out;
- LOCK (&conf->subvolume_lock);
- {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->subvolume_status[i]) {
- child = conf->subvolumes[i];
- break;
- }
- }
- }
- UNLOCK (&conf->subvolume_lock);
+ LOCK (&conf->subvolume_lock);
+ {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvol_up_time[i]) {
+ if (!time) {
+ time = conf->subvol_up_time[i];
+ child = conf->subvolumes[i];
+ } else if (time > conf->subvol_up_time[i]) {
+ time = conf->subvol_up_time[i];
+ child = conf->subvolumes[i];
+ }
+ }
+ }
+ }
+ UNLOCK (&conf->subvolume_lock);
- return child;
+out:
+ return child;
}
xlator_t *
@@ -217,6 +716,9 @@ dht_last_up_subvol (xlator_t *this)
int i = 0;
conf = this->private;
+ if (!conf)
+ goto out;
+
LOCK (&conf->subvolume_lock);
{
for (i = conf->subvolume_cnt-1; i >= 0; i--) {
@@ -228,6 +730,7 @@ dht_last_up_subvol (xlator_t *this)
}
UNLOCK (&conf->subvolume_lock);
+out:
return child;
}
@@ -236,27 +739,40 @@ dht_subvol_get_hashed (xlator_t *this, loc_t *loc)
{
dht_layout_t *layout = NULL;
xlator_t *subvol = NULL;
+ dht_conf_t *conf = NULL;
+ dht_methods_t *methods = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
+
+ methods = &(conf->methods);
- if (is_fs_root (loc)) {
+ if (__is_root_gfid (loc->gfid)) {
subvol = dht_first_up_subvol (this);
goto out;
}
+ GF_VALIDATE_OR_GOTO (this->name, loc->parent, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc->name, out);
+
layout = dht_layout_get (this, loc->parent);
if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "layout missing path=%s parent=%"PRId64,
- loc->path, loc->parent->ino);
+ gf_msg_debug (this->name, 0,
+ "Missing layout. path=%s, parent gfid =%s",
+ loc->path, uuid_utoa (loc->parent->gfid));
goto out;
}
- subvol = dht_layout_search (this, layout, loc->name);
+ subvol = methods->layout_search (this, layout, loc->name);
if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not find subvolume for path=%s",
- loc->path);
+ gf_msg_debug (this->name, 0,
+ "No hashed subvolume for path=%s",
+ loc->path);
goto out;
}
@@ -275,6 +791,8 @@ dht_subvol_get_cached (xlator_t *this, inode_t *inode)
dht_layout_t *layout = NULL;
xlator_t *subvol = NULL;
+ GF_VALIDATE_OR_GOTO (this->name, this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
layout = dht_layout_get (this, inode);
@@ -282,7 +800,7 @@ dht_subvol_get_cached (xlator_t *this, inode_t *inode)
goto out;
}
- subvol = layout->list[0].xlator;
+ subvol = layout->list[0].xlator;
out:
if (layout) {
@@ -296,144 +814,1696 @@ out:
xlator_t *
dht_subvol_next (xlator_t *this, xlator_t *prev)
{
- dht_conf_t *conf = NULL;
- int i = 0;
- xlator_t *next = NULL;
+ dht_conf_t *conf = NULL;
+ int i = 0;
+ xlator_t *next = NULL;
- conf = this->private;
+ conf = this->private;
+ if (!conf)
+ goto out;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->subvolumes[i] == prev) {
- if ((i + 1) < conf->subvolume_cnt)
- next = conf->subvolumes[i + 1];
- break;
- }
- }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == prev) {
+ if ((i + 1) < conf->subvolume_cnt)
+ next = conf->subvolumes[i + 1];
+ break;
+ }
+ }
- return next;
+out:
+ return next;
}
+/* This func wraps around, if prev is actually the last subvol.
+ */
+xlator_t *
+dht_subvol_next_available (xlator_t *this, xlator_t *prev)
+{
+ dht_conf_t *conf = NULL;
+ int i = 0;
+ xlator_t *next = NULL;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == prev) {
+ /* if prev is last in conf->subvolumes, then wrap
+ * around.
+ */
+ if ((i + 1) < conf->subvolume_cnt) {
+ next = conf->subvolumes[i + 1];
+ } else {
+ next = conf->subvolumes[0];
+ }
+ break;
+ }
+ }
+out:
+ return next;
+}
int
dht_subvol_cnt (xlator_t *this, xlator_t *subvol)
{
- int i = 0;
- int ret = -1;
- dht_conf_t *conf = NULL;
-
+ int i = 0;
+ int ret = -1;
+ dht_conf_t *conf = NULL;
- conf = this->private;
+ conf = this->private;
+ if (!conf)
+ goto out;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (subvol == conf->subvolumes[i]) {
- ret = i;
- break;
- }
- }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (subvol == conf->subvolumes[i]) {
+ ret = i;
+ break;
+ }
+ }
- return ret;
+out:
+ return ret;
}
-#define set_if_greater(a, b) do { \
- if ((a) < (b)) \
- (a) = (b); \
- } while (0)
+#define set_if_greater(a, b) do { \
+ if ((a) < (b)) \
+ (a) = (b); \
+ } while (0)
+
+
+#define set_if_greater_time(a, an, b, bn) do { \
+ if (((a) < (b)) || (((a) == (b)) && ((an) < (bn)))){ \
+ (a) = (b); \
+ (an) = (bn); \
+ } \
+ } while (0) \
+
int
dht_iatt_merge (xlator_t *this, struct iatt *to,
- struct iatt *from, xlator_t *subvol)
+ struct iatt *from, xlator_t *subvol)
{
if (!from || !to)
return 0;
- to->ia_dev = from->ia_dev;
+ to->ia_dev = from->ia_dev;
- dht_itransform (this, subvol, from->ia_ino, &to->ia_ino);
- to->ia_gen = from->ia_gen;
+ gf_uuid_copy (to->ia_gfid, from->ia_gfid);
- to->ia_prot = from->ia_prot;
- to->ia_type = from->ia_type;
- to->ia_nlink = from->ia_nlink;
- to->ia_rdev = from->ia_rdev;
- to->ia_size += from->ia_size;
- to->ia_blksize = from->ia_blksize;
- to->ia_blocks += from->ia_blocks;
+ to->ia_ino = from->ia_ino;
+ to->ia_prot = from->ia_prot;
+ to->ia_type = from->ia_type;
+ to->ia_nlink = from->ia_nlink;
+ to->ia_rdev = from->ia_rdev;
+ to->ia_size += from->ia_size;
+ to->ia_blksize = from->ia_blksize;
+ to->ia_blocks += from->ia_blocks;
- set_if_greater (to->ia_uid, from->ia_uid);
- set_if_greater (to->ia_gid, from->ia_gid);
+ if (IA_ISDIR (from->ia_type)) {
+ to->ia_blocks = DHT_DIR_STAT_BLOCKS;
+ to->ia_size = DHT_DIR_STAT_SIZE;
+ }
+ set_if_greater (to->ia_uid, from->ia_uid);
+ set_if_greater (to->ia_gid, from->ia_gid);
- set_if_greater (to->ia_atime, from->ia_atime);
- set_if_greater (to->ia_mtime, from->ia_mtime);
- set_if_greater (to->ia_ctime, from->ia_ctime);
+ set_if_greater_time(to->ia_atime, to->ia_atime_nsec,
+ from->ia_atime, from->ia_atime_nsec);
+ set_if_greater_time (to->ia_mtime, to->ia_mtime_nsec,
+ from->ia_mtime, from->ia_mtime_nsec);
+ set_if_greater_time (to->ia_ctime, to->ia_ctime_nsec,
+ from->ia_ctime, from->ia_ctime_nsec);
- return 0;
+ return 0;
}
int
-dht_frame_su_do (call_frame_t *frame)
+dht_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name)
{
- dht_local_t *local = NULL;
+ if (!child) {
+ goto err;
+ }
+
+ if (strcmp (parent->path, "/") == 0)
+ gf_asprintf ((char **)&child->path, "/%s", name);
+ else
+ gf_asprintf ((char **)&child->path, "%s/%s", parent->path, name);
+
+ if (!child->path) {
+ goto err;
+ }
+
+ child->name = strrchr (child->path, '/');
+ if (child->name)
+ child->name++;
+
+ child->parent = inode_ref (parent->inode);
+ child->inode = inode_new (parent->inode->table);
+
+ if (!child->inode) {
+ goto err;
+ }
+
+ return 0;
+err:
+ loc_wipe (child);
+ return -1;
+}
+
+int
+dht_init_local_subvolumes (xlator_t *this, dht_conf_t *conf)
+{
+ xlator_list_t *subvols = NULL;
+ int cnt = 0;
+
+ if (!conf)
+ return -1;
+
+ for (subvols = this->children; subvols; subvols = subvols->next)
+ cnt++;
+
+ conf->local_subvols = GF_CALLOC (cnt, sizeof (xlator_t *),
+ gf_dht_mt_xlator_t);
+ if (!conf->local_subvols) {
+ return -1;
+ }
+
+ conf->local_subvols_cnt = 0;
+
+ return 0;
+}
+
+int
+dht_init_subvolumes (xlator_t *this, dht_conf_t *conf)
+{
+ xlator_list_t *subvols = NULL;
+ int cnt = 0;
+
+ if (!conf)
+ return -1;
+
+ for (subvols = this->children; subvols; subvols = subvols->next)
+ cnt++;
+
+ conf->subvolumes = GF_CALLOC (cnt, sizeof (xlator_t *),
+ gf_dht_mt_xlator_t);
+ if (!conf->subvolumes) {
+ return -1;
+ }
+ conf->subvolume_cnt = cnt;
+
+ conf->local_subvols_cnt = 0;
+
+ dht_set_subvol_range(this);
+
+ cnt = 0;
+ for (subvols = this->children; subvols; subvols = subvols->next)
+ conf->subvolumes[cnt++] = subvols->xlator;
+
+ conf->subvolume_status = GF_CALLOC (cnt, sizeof (char),
+ gf_dht_mt_char);
+ if (!conf->subvolume_status) {
+ return -1;
+ }
+
+ conf->last_event = GF_CALLOC (cnt, sizeof (int),
+ gf_dht_mt_char);
+ if (!conf->last_event) {
+ return -1;
+ }
+
+ conf->subvol_up_time = GF_CALLOC (cnt, sizeof (time_t),
+ gf_dht_mt_subvol_time);
+ if (!conf->subvol_up_time) {
+ return -1;
+ }
+
+ conf->du_stats = GF_CALLOC (conf->subvolume_cnt, sizeof (dht_du_t),
+ gf_dht_mt_dht_du_t);
+ if (!conf->du_stats) {
+ return -1;
+ }
+
+ conf->decommissioned_bricks = GF_CALLOC (cnt, sizeof (xlator_t *),
+ gf_dht_mt_xlator_t);
+ if (!conf->decommissioned_bricks) {
+ return -1;
+ }
+
+ return 0;
+}
+
+
+/*
+ op_ret values :
+ 0 : Success.
+ -1 : Failure.
+ 1 : File is being migrated but not by this DHT layer.
+*/
+
+static int
+dht_migration_complete_check_done (int op_ret, call_frame_t *frame, void *data)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
local = frame->local;
- local->uid = frame->root->uid;
- local->gid = frame->root->gid;
+ if (op_ret != 0)
+ goto out;
+
+ if (local->cached_subvol == NULL) {
+ local->op_errno = EINVAL;
+ goto out;
+ }
+
+ subvol = local->cached_subvol;
- frame->root->uid = 0;
- frame->root->gid = 0;
+out:
+ local->rebalance.target_op_fn (THIS, subvol, frame, op_ret);
return 0;
}
int
-dht_frame_su_undo (call_frame_t *frame)
+dht_migration_complete_check_task (void *data)
{
- dht_local_t *local = NULL;
+ int ret = -1;
+ xlator_t *src_node = NULL;
+ xlator_t *dst_node = NULL, *linkto_target = NULL;
+ dht_local_t *local = NULL;
+ dict_t *dict = NULL;
+ struct iatt stbuf = {0,};
+ xlator_t *this = NULL;
+ call_frame_t *frame = NULL;
+ loc_t tmp_loc = {0,};
+ char *path = NULL;
+ dht_conf_t *conf = NULL;
+ inode_t *inode = NULL;
+ fd_t *iter_fd = NULL;
+ fd_t *tmp = NULL;
+ uint64_t tmp_miginfo = 0;
+ dht_migrate_info_t *miginfo = NULL;
+ int open_failed = 0;
+
+ this = THIS;
+ frame = data;
+ local = frame->local;
+ conf = this->private;
+
+ src_node = local->cached_subvol;
+
+ if (!local->loc.inode && !local->fd) {
+ local->op_errno = EINVAL;
+ goto out;
+ }
+
+ inode = (!local->fd) ? local->loc.inode : local->fd->inode;
+
+ /* getxattr on cached_subvol for 'linkto' value. Do path based getxattr
+ * as root:root. If a fd is already open, access check wont be done*/
+
+ if (!local->loc.inode) {
+ ret = syncop_fgetxattr (src_node, local->fd, &dict,
+ conf->link_xattr_name, NULL, NULL);
+ } else {
+ SYNCTASK_SETID (0, 0);
+ ret = syncop_getxattr (src_node, &local->loc, &dict,
+ conf->link_xattr_name, NULL, NULL);
+ SYNCTASK_SETID (frame->root->uid, frame->root->gid);
+ }
+
+
+ /*
+ * Each DHT xlator layer has its own name for the linkto xattr.
+ * If the file mode bits indicate the the file is being migrated but
+ * this layer's linkto xattr is not set, it means that another
+ * DHT layer is migrating the file. In this case, return 1 so
+ * the mode bits can be passed on to the higher layer for appropriate
+ * action.
+ */
+ if (-ret == ENODATA) {
+ /* This DHT translator is not migrating this file */
+
+ ret = inode_ctx_reset1 (inode, this, &tmp_miginfo);
+ if (tmp_miginfo) {
+
+ /* This can be a problem if the file was
+ * migrated by two different layers. Raise
+ * a warning here.
+ */
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_HAS_MIGINFO,
+ "%s: Found miginfo in the inode ctx",
+ tmp_loc.path ? tmp_loc.path :
+ uuid_utoa (tmp_loc.gfid));
+
+ miginfo = (void *)tmp_miginfo;
+ GF_REF_PUT (miginfo);
+ }
+ ret = 1;
+ goto out;
+ }
+
+ if (!ret)
+ linkto_target = dht_linkfile_subvol (this, NULL, NULL, dict);
+
+ if (local->loc.inode) {
+ loc_copy (&tmp_loc, &local->loc);
+ } else {
+ tmp_loc.inode = inode_ref (inode);
+ gf_uuid_copy (tmp_loc.gfid, inode->gfid);
+ }
+
+ ret = syncop_lookup (this, &tmp_loc, &stbuf, 0, 0, 0);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_FILE_LOOKUP_FAILED,
+ "%s: failed to lookup the file on %s",
+ tmp_loc.path ? tmp_loc.path : uuid_utoa (tmp_loc.gfid),
+ this->name);
+ local->op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ dst_node = dht_subvol_get_cached (this, tmp_loc.inode);
+ if (linkto_target && dst_node != linkto_target) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_INVALID_LINKFILE,
+ "linkto target (%s) is "
+ "different from cached-subvol (%s). Treating %s as "
+ "destination subvol", linkto_target->name,
+ dst_node->name, dst_node->name);
+ }
+
+ if (gf_uuid_compare (stbuf.ia_gfid, tmp_loc.inode->gfid)) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_GFID_MISMATCH,
+ "%s: gfid different on the target file on %s",
+ tmp_loc.path ? tmp_loc.path :
+ uuid_utoa (tmp_loc.gfid), dst_node->name);
+ ret = -1;
+ local->op_errno = EIO;
+ goto out;
+ }
+
+ /* update local. A layout is set in inode-ctx in lookup already */
+
+ dht_layout_unref (this, local->layout);
+
+ local->layout = dht_layout_get (frame->this, inode);
+ local->cached_subvol = dst_node;
+
+ ret = 0;
+
+ /* once we detect the migration complete, the inode-ctx2 is no more
+ required.. delete the ctx and also, it means, open() already
+ done on all the fd of inode */
+ ret = inode_ctx_reset1 (inode, this, &tmp_miginfo);
+ if (tmp_miginfo) {
+ miginfo = (void *)tmp_miginfo;
+ GF_REF_PUT (miginfo);
+ goto out;
+ }
+
+ /* perform 'open()' on all the fd's present on the inode */
+ if (tmp_loc.path == NULL) {
+ inode_path (inode, NULL, &path);
+ if (path)
+ tmp_loc.path = path;
+ }
+
+ LOCK(&inode->lock);
+
+ if (list_empty (&inode->fd_list))
+ goto unlock;
+
+ /* perform open as root:root. There is window between linkfile
+ * creation(root:root) and setattr with the correct uid/gid
+ */
+ SYNCTASK_SETID(0, 0);
+
+ /* It's possible that we are the last user of iter_fd after each
+ * iteration. In this case the fd_unref() of iter_fd at the end of
+ * the loop will cause the destruction of the fd. So we need to
+ * iterate the list safely because iter_fd cannot be trusted.
+ */
+ list_for_each_entry_safe (iter_fd, tmp, &inode->fd_list, inode_list) {
+
+ if (fd_is_anonymous (iter_fd))
+ continue;
+
+ if (dht_fd_open_on_dst (this, iter_fd, dst_node))
+ continue;
+
+ /* We need to release the inode->lock before calling
+ * syncop_open() to avoid possible deadlocks. However this
+ * can cause the iter_fd to be released by other threads.
+ * To avoid this, we take a reference before releasing the
+ * lock.
+ */
+ __fd_ref(iter_fd);
+
+ UNLOCK(&inode->lock);
+
+ /* flags for open are stripped down to allow following the
+ * new location of the file, otherwise we can get EEXIST or
+ * truncate the file again as rebalance is moving the data */
+ ret = syncop_open (dst_node, &tmp_loc,
+ (iter_fd->flags &
+ ~(O_CREAT | O_EXCL | O_TRUNC)),
+ iter_fd, NULL, NULL);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_OPEN_FD_ON_DST_FAILED, "failed"
+ " to open the fd"
+ " (%p, flags=0%o) on file %s @ %s",
+ iter_fd, iter_fd->flags, path,
+ dst_node->name);
+
+ open_failed = 1;
+ local->op_errno = -ret;
+ ret = -1;
+ } else {
+ dht_fd_ctx_set (this, iter_fd, dst_node);
+ }
+
+ fd_unref(iter_fd);
+
+ LOCK(&inode->lock);
+ }
+
+ SYNCTASK_SETID (frame->root->uid, frame->root->gid);
+
+ if (open_failed) {
+ ret = -1;
+ goto unlock;
+ }
+ ret = 0;
+
+unlock:
+ UNLOCK(&inode->lock);
+
+out:
+
+ loc_wipe (&tmp_loc);
+
+ return ret;
+}
+
+int
+dht_rebalance_complete_check (xlator_t *this, call_frame_t *frame)
+{
+ int ret = -1;
+
+ ret = synctask_new (this->ctx->env, dht_migration_complete_check_task,
+ dht_migration_complete_check_done,
+ frame, frame);
+ return ret;
+
+}
+
+/* During 'in-progress' state, both nodes should have the file */
+/*
+ op_ret values :
+ 0 : Success
+ -1 : Failure.
+ 1 : File is being migrated but not by this DHT layer.
+*/
+static int
+dht_inprogress_check_done (int op_ret, call_frame_t *frame, void *data)
+{
+ dht_local_t *local = NULL;
+ xlator_t *dst_subvol = NULL, *src_subvol = NULL;
+ inode_t *inode = NULL;
local = frame->local;
- frame->root->uid = local->uid;
- frame->root->gid = local->gid;
+ if (op_ret != 0)
+ goto out;
+
+ inode = local->loc.inode ? local->loc.inode : local->fd->inode;
+
+ dht_inode_ctx_get_mig_info (THIS, inode, &src_subvol, &dst_subvol);
+ if (dht_mig_info_is_invalid (local->cached_subvol,
+ src_subvol, dst_subvol)) {
+ dst_subvol = dht_subvol_get_cached (THIS, inode);
+ if (!dst_subvol) {
+ local->op_errno = EINVAL;
+ goto out;
+ }
+ }
+
+out:
+ local->rebalance.target_op_fn (THIS, dst_subvol, frame, op_ret);
return 0;
}
+static int
+dht_rebalance_inprogress_task (void *data)
+{
+ int ret = -1;
+ xlator_t *src_node = NULL;
+ xlator_t *dst_node = NULL;
+ dht_local_t *local = NULL;
+ dict_t *dict = NULL;
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ char *path = NULL;
+ struct iatt stbuf = {0,};
+ loc_t tmp_loc = {0,};
+ dht_conf_t *conf = NULL;
+ inode_t *inode = NULL;
+ fd_t *iter_fd = NULL;
+ fd_t *tmp = NULL;
+ int open_failed = 0;
+ uint64_t tmp_miginfo = 0;
+ dht_migrate_info_t *miginfo = NULL;
+
+
+ this = THIS;
+ frame = data;
+ local = frame->local;
+ conf = this->private;
+
+ src_node = local->cached_subvol;
+
+ if (!local->loc.inode && !local->fd)
+ goto out;
+
+ inode = (!local->fd) ? local->loc.inode : local->fd->inode;
+
+ /* getxattr on cached_subvol for 'linkto' value. Do path based getxattr
+ * as root:root. If a fd is already open, access check wont be done*/
+ if (local->loc.inode) {
+ SYNCTASK_SETID (0, 0);
+ ret = syncop_getxattr (src_node, &local->loc, &dict,
+ conf->link_xattr_name, NULL, NULL);
+ SYNCTASK_SETID (frame->root->uid, frame->root->gid);
+ } else {
+ ret = syncop_fgetxattr (src_node, local->fd, &dict,
+ conf->link_xattr_name, NULL, NULL);
+ }
+
+ /*
+ * Each DHT xlator layer has its own name for the linkto xattr.
+ * If the file mode bits indicate the the file is being migrated but
+ * this layer's linkto xattr is not present, it means that another
+ * DHT layer is migrating the file. In this case, return 1 so
+ * the mode bits can be passed on to the higher layer for appropriate
+ * action.
+ */
+
+ if (-ret == ENODATA) {
+ /* This DHT layer is not migrating this file */
+ ret = inode_ctx_reset1 (inode, this, &tmp_miginfo);
+ if (tmp_miginfo) {
+ /* This can be a problem if the file was
+ * migrated by two different layers. Raise
+ * a warning here.
+ */
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_HAS_MIGINFO,
+ "%s: Found miginfo in the inode ctx",
+ tmp_loc.path ? tmp_loc.path :
+ uuid_utoa (tmp_loc.gfid));
+ miginfo = (void *)tmp_miginfo;
+ GF_REF_PUT (miginfo);
+ }
+ ret = 1;
+ goto out;
+ }
+
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_GET_XATTR_FAILED,
+ "%s: failed to get the 'linkto' xattr",
+ local->loc.path);
+ ret = -1;
+ goto out;
+ }
+
+ dst_node = dht_linkfile_subvol (this, NULL, NULL, dict);
+ if (!dst_node) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_SUBVOL_NOT_FOUND,
+ "%s: failed to get the 'linkto' xattr from dict",
+ local->loc.path);
+ ret = -1;
+ goto out;
+ }
+
+ local->rebalance.target_node = dst_node;
+
+ if (local->loc.inode) {
+ loc_copy (&tmp_loc, &local->loc);
+ } else {
+ tmp_loc.inode = inode_ref (inode);
+ gf_uuid_copy (tmp_loc.gfid, inode->gfid);
+ }
+
+ /* lookup on dst */
+ ret = syncop_lookup (dst_node, &tmp_loc, &stbuf, NULL,
+ NULL, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_FILE_LOOKUP_ON_DST_FAILED,
+ "%s: failed to lookup the file on %s",
+ tmp_loc.path ? tmp_loc.path : uuid_utoa (tmp_loc.gfid),
+ dst_node->name);
+ ret = -1;
+ goto out;
+ }
+
+ if (gf_uuid_compare (stbuf.ia_gfid, tmp_loc.inode->gfid)) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_GFID_MISMATCH,
+ "%s: gfid different on the target file on %s",
+ tmp_loc.path ? tmp_loc.path : uuid_utoa (tmp_loc.gfid),
+ dst_node->name);
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+
+ if (tmp_loc.path == NULL) {
+ inode_path (inode, NULL, &path);
+ if (path)
+ tmp_loc.path = path;
+ }
+
+ LOCK(&inode->lock);
+
+ if (list_empty (&inode->fd_list))
+ goto unlock;
+
+ /* perform open as root:root. There is window between linkfile
+ * creation(root:root) and setattr with the correct uid/gid
+ */
+ SYNCTASK_SETID (0, 0);
+
+ /* It's possible that we are the last user of iter_fd after each
+ * iteration. In this case the fd_unref() of iter_fd at the end of
+ * the loop will cause the destruction of the fd. So we need to
+ * iterate the list safely because iter_fd cannot be trusted.
+ */
+ list_for_each_entry_safe (iter_fd, tmp, &inode->fd_list, inode_list) {
+ if (fd_is_anonymous (iter_fd))
+ continue;
+
+ if (dht_fd_open_on_dst (this, iter_fd, dst_node))
+ continue;
+
+ /* We need to release the inode->lock before calling
+ * syncop_open() to avoid possible deadlocks. However this
+ * can cause the iter_fd to be released by other threads.
+ * To avoid this, we take a reference before releasing the
+ * lock.
+ */
+ __fd_ref(iter_fd);
+
+ UNLOCK(&inode->lock);
+
+ /* flags for open are stripped down to allow following the
+ * new location of the file, otherwise we can get EEXIST or
+ * truncate the file again as rebalance is moving the data */
+ ret = syncop_open (dst_node, &tmp_loc,
+ (iter_fd->flags &
+ ~(O_CREAT | O_EXCL | O_TRUNC)),
+ iter_fd, NULL, NULL);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_OPEN_FD_ON_DST_FAILED,
+ "failed to send open "
+ "the fd (%p, flags=0%o) on file %s @ %s",
+ iter_fd, iter_fd->flags, path,
+ dst_node->name);
+ ret = -1;
+ open_failed = 1;
+ } else {
+ /* Potential fd leak if this fails here as it will be
+ reopened at the next Phase1/2 check */
+ dht_fd_ctx_set (this, iter_fd, dst_node);
+ }
+
+ fd_unref(iter_fd);
+
+ LOCK(&inode->lock);
+ }
+
+ SYNCTASK_SETID (frame->root->uid, frame->root->gid);
+
+unlock:
+ UNLOCK(&inode->lock);
+
+ if (open_failed) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dht_inode_ctx_set_mig_info (this, inode, src_node, dst_node);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_SET_INODE_CTX_FAILED,
+ "%s: failed to set inode-ctx target file at %s",
+ local->loc.path, dst_node->name);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ loc_wipe (&tmp_loc);
+ return ret;
+}
+
+int
+dht_rebalance_in_progress_check (xlator_t *this, call_frame_t *frame)
+{
+
+ int ret = -1;
+
+ ret = synctask_new (this->ctx->env, dht_rebalance_inprogress_task,
+ dht_inprogress_check_done,
+ frame, frame);
+ return ret;
+}
int
-dht_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name)
+dht_inode_ctx_layout_set (inode_t *inode, xlator_t *this,
+ dht_layout_t *layout_int)
{
- if (!child) {
- goto err;
+ dht_inode_ctx_t *ctx = NULL;
+ int ret = -1;
+
+ ret = dht_inode_ctx_get (inode, this, &ctx);
+ if (!ret && ctx) {
+ ctx->layout = layout_int;
+ } else {
+ ctx = GF_CALLOC (1, sizeof (*ctx), gf_dht_mt_inode_ctx_t);
+ if (!ctx)
+ return ret;
+ ctx->layout = layout_int;
}
- if (strcmp (parent->path, "/") == 0)
- gf_asprintf ((char **)&child->path, "/%s", name);
- else
- gf_asprintf ((char **)&child->path, "%s/%s", parent->path, name);
+ ret = dht_inode_ctx_set (inode, this, ctx);
- if (!child->path) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
+ return ret;
+}
+
+
+void
+dht_inode_ctx_time_set (inode_t *inode, xlator_t *this, struct iatt *stat)
+{
+ dht_inode_ctx_t *ctx = NULL;
+ dht_stat_time_t *time = 0;
+ int ret = -1;
+
+ ret = dht_inode_ctx_get (inode, this, &ctx);
+
+ if (ret)
+ return;
+
+ time = &ctx->time;
+
+ time->mtime = stat->ia_mtime;
+ time->mtime_nsec = stat->ia_mtime_nsec;
+
+ time->ctime = stat->ia_ctime;
+ time->ctime_nsec = stat->ia_ctime_nsec;
+
+ time->atime = stat->ia_atime;
+ time->atime_nsec = stat->ia_atime_nsec;
+
+ return;
+}
+
+
+int
+dht_inode_ctx_time_update (inode_t *inode, xlator_t *this, struct iatt *stat,
+ int32_t post)
+{
+ dht_inode_ctx_t *ctx = NULL;
+ dht_stat_time_t *time = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO (this->name, stat, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ ret = dht_inode_ctx_get (inode, this, &ctx);
+
+ if (ret) {
+ ctx = GF_CALLOC (1, sizeof (*ctx), gf_dht_mt_inode_ctx_t);
+ if (!ctx)
+ return -1;
}
- child->name = strrchr (child->path, '/');
- if (child->name)
- child->name++;
+ time = &ctx->time;
- child->parent = inode_ref (parent->inode);
- child->inode = inode_new (parent->inode->table);
+ DHT_UPDATE_TIME(time->mtime, time->mtime_nsec,
+ stat->ia_mtime, stat->ia_mtime_nsec, inode, post);
+ DHT_UPDATE_TIME(time->ctime, time->ctime_nsec,
+ stat->ia_ctime, stat->ia_ctime_nsec, inode, post);
+ DHT_UPDATE_TIME(time->atime, time->atime_nsec,
+ stat->ia_atime, stat->ia_atime_nsec, inode, post);
- if (!child->inode) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
+ ret = dht_inode_ctx_set (inode, this, ctx);
+out:
+ return 0;
+}
+
+int
+dht_inode_ctx_get (inode_t *inode, xlator_t *this, dht_inode_ctx_t **ctx)
+{
+ int ret = -1;
+ uint64_t ctx_int = 0;
+
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ ret = inode_ctx_get (inode, this, &ctx_int);
+
+ if (ret)
+ return ret;
+
+ if (ctx)
+ *ctx = (dht_inode_ctx_t *) ctx_int;
+out:
+ return ret;
+}
+
+int dht_inode_ctx_set (inode_t *inode, xlator_t *this, dht_inode_ctx_t *ctx)
+{
+ int ret = -1;
+ uint64_t ctx_int = 0;
+
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+ GF_VALIDATE_OR_GOTO (this->name, ctx, out);
+
+ ctx_int = (long)ctx;
+ ret = inode_ctx_set (inode, this, &ctx_int);
+out:
+ return ret;
+}
+
+void
+dht_set_lkowner (dht_lock_t **lk_array, int count, gf_lkowner_t *lkowner)
+{
+ int i = 0;
+
+ if (!lk_array || !lkowner)
+ goto out;
+
+ for (i = 0; i < count; i++) {
+ lk_array[i]->lk_owner = *lkowner;
}
+out:
+ return;
+}
+
+int
+dht_subvol_status (dht_conf_t *conf, xlator_t *subvol)
+{
+ int i;
+
+ for (i=0 ; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == subvol) {
+ return conf->subvolume_status[i];
+ }
+ }
return 0;
-err:
- loc_wipe (child);
+}
+
+void
+dht_inodelk_done (call_frame_t *lock_frame)
+{
+ fop_inodelk_cbk_t inodelk_cbk = NULL;
+ call_frame_t *main_frame = NULL;
+ dht_local_t *local = NULL;
+
+ local = lock_frame->local;
+ main_frame = local->main_frame;
+
+ local->lock.locks = NULL;
+ local->lock.lk_count = 0;
+
+ inodelk_cbk = local->lock.inodelk_cbk;
+ local->lock.inodelk_cbk = NULL;
+
+ inodelk_cbk (main_frame, NULL, main_frame->this, local->lock.op_ret,
+ local->lock.op_errno, NULL);
+
+ dht_lock_stack_destroy (lock_frame);
+ return;
+}
+
+int
+dht_inodelk_cleanup_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
+{
+ dht_inodelk_done (frame);
+ return 0;
+}
+
+int32_t
+dht_lock_count (dht_lock_t **lk_array, int lk_count)
+{
+ int i = 0, locked = 0;
+
+ if ((lk_array == NULL) || (lk_count == 0))
+ goto out;
+
+ for (i = 0; i < lk_count; i++) {
+ if (lk_array[i]->locked)
+ locked++;
+ }
+out:
+ return locked;
+}
+
+void
+dht_inodelk_cleanup (call_frame_t *lock_frame)
+{
+ dht_lock_t **lk_array = NULL;
+ int lk_count = 0, lk_acquired = 0;
+ dht_local_t *local = NULL;
+
+ local = lock_frame->local;
+
+ lk_array = local->lock.locks;
+ lk_count = local->lock.lk_count;
+
+ lk_acquired = dht_lock_count (lk_array, lk_count);
+ if (lk_acquired != 0) {
+ dht_unlock_inodelk (lock_frame, lk_array, lk_count,
+ dht_inodelk_cleanup_cbk);
+ } else {
+ dht_inodelk_done (lock_frame);
+ }
+
+ return;
+}
+
+int32_t
+dht_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int lk_index = 0, call_cnt = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ lk_index = (long) cookie;
+
+ local = frame->local;
+ if (op_ret < 0) {
+ uuid_utoa_r (local->lock.locks[lk_index]->loc.gfid,
+ gfid);
+
+ gf_msg (this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_UNLOCKING_FAILED,
+ "unlocking failed on %s:%s",
+ local->lock.locks[lk_index]->xl->name,
+ gfid);
+ } else {
+ local->lock.locks[lk_index]->locked = 0;
+ }
+
+ call_cnt = dht_frame_return (frame);
+ if (is_last_call (call_cnt)) {
+ dht_inodelk_done (frame);
+ }
+
+ return 0;
+}
+
+call_frame_t *
+dht_lock_frame (call_frame_t *parent_frame)
+{
+ call_frame_t *lock_frame = NULL;
+
+ lock_frame = copy_frame (parent_frame);
+ if (lock_frame == NULL)
+ goto out;
+
+ set_lk_owner_from_ptr (&lock_frame->root->lk_owner, parent_frame->root);
+
+out:
+ return lock_frame;
+}
+
+int32_t
+dht_unlock_inodelk (call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
+ fop_inodelk_cbk_t inodelk_cbk)
+{
+ dht_local_t *local = NULL;
+ struct gf_flock flock = {0,};
+ int ret = -1 , i = 0;
+ call_frame_t *lock_frame = NULL;
+ int call_cnt = 0;
+
+ GF_VALIDATE_OR_GOTO ("dht-locks", frame, done);
+ GF_VALIDATE_OR_GOTO (frame->this->name, lk_array, done);
+ GF_VALIDATE_OR_GOTO (frame->this->name, inodelk_cbk, done);
+
+ call_cnt = dht_lock_count (lk_array, lk_count);
+ if (call_cnt == 0) {
+ ret = 0;
+ goto done;
+ }
+
+ lock_frame = dht_lock_frame (frame);
+ if (lock_frame == NULL) {
+ gf_msg (frame->this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_UNLOCKING_FAILED,
+ "cannot allocate a frame, not unlocking following "
+ "locks:");
+
+ dht_log_lk_array (frame->this->name, GF_LOG_WARNING, lk_array,
+ lk_count);
+ goto done;
+ }
+
+ ret = dht_local_lock_init (lock_frame, lk_array, lk_count, inodelk_cbk);
+ if (ret < 0) {
+ gf_msg (frame->this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_UNLOCKING_FAILED,
+ "storing locks in local failed, not unlocking "
+ "following locks:");
+
+ dht_log_lk_array (frame->this->name, GF_LOG_WARNING, lk_array,
+ lk_count);
+
+ goto done;
+ }
+
+ local = lock_frame->local;
+ local->main_frame = frame;
+ local->call_cnt = call_cnt;
+
+ flock.l_type = F_UNLCK;
+
+ for (i = 0; i < local->lock.lk_count; i++) {
+ if (!local->lock.locks[i]->locked)
+ continue;
+
+ lock_frame->root->lk_owner = local->lock.locks[i]->lk_owner;
+ STACK_WIND_COOKIE (lock_frame, dht_unlock_inodelk_cbk,
+ (void *)(long)i,
+ local->lock.locks[i]->xl,
+ local->lock.locks[i]->xl->fops->inodelk,
+ local->lock.locks[i]->domain,
+ &local->lock.locks[i]->loc, F_SETLK,
+ &flock, NULL);
+ if (!--call_cnt)
+ break;
+ }
+
+ return 0;
+
+done:
+ if (lock_frame)
+ dht_lock_stack_destroy (lock_frame);
+
+ /* no locks acquired, invoke inodelk_cbk */
+ if (ret == 0)
+ inodelk_cbk (frame, NULL, frame->this, 0, 0, NULL);
+
+ return ret;
+}
+
+int32_t
+dht_nonblocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int lk_index = 0, call_cnt = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ lk_index = (long) cookie;
+
+ if (op_ret == -1) {
+ local->lock.op_ret = -1;
+ local->lock.op_errno = op_errno;
+
+ if (local && local->lock.locks[lk_index]) {
+ uuid_utoa_r (local->lock.locks[lk_index]->loc.inode->gfid,
+ gfid);
+
+ gf_msg_debug (this->name, op_errno,
+ "inodelk failed on gfid: %s "
+ "subvolume: %s", gfid,
+ local->lock.locks[lk_index]->xl->name);
+ }
+
+ goto out;
+ }
+
+ local->lock.locks[lk_index]->locked = _gf_true;
+
+out:
+ call_cnt = dht_frame_return (frame);
+ if (is_last_call (call_cnt)) {
+ if (local->lock.op_ret < 0) {
+ dht_inodelk_cleanup (frame);
+ return 0;
+ }
+
+ dht_inodelk_done (frame);
+ }
+
+ return 0;
+}
+
+int
+dht_nonblocking_inodelk (call_frame_t *frame, dht_lock_t **lk_array,
+ int lk_count, fop_inodelk_cbk_t inodelk_cbk)
+{
+ struct gf_flock flock = {0,};
+ int i = 0, ret = 0;
+ dht_local_t *local = NULL;
+ call_frame_t *lock_frame = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht-locks", frame, out);
+ GF_VALIDATE_OR_GOTO (frame->this->name, lk_array, out);
+ GF_VALIDATE_OR_GOTO (frame->this->name, inodelk_cbk, out);
+
+ lock_frame = dht_lock_frame (frame);
+ if (lock_frame == NULL)
+ goto out;
+
+ ret = dht_local_lock_init (lock_frame, lk_array, lk_count, inodelk_cbk);
+ if (ret < 0) {
+ goto out;
+ }
+
+ dht_set_lkowner (lk_array, lk_count, &lock_frame->root->lk_owner);
+
+ local = lock_frame->local;
+ local->main_frame = frame;
+
+ local->call_cnt = lk_count;
+
+ for (i = 0; i < lk_count; i++) {
+ flock.l_type = local->lock.locks[i]->type;
+
+ STACK_WIND_COOKIE (lock_frame, dht_nonblocking_inodelk_cbk,
+ (void *) (long) i,
+ local->lock.locks[i]->xl,
+ local->lock.locks[i]->xl->fops->inodelk,
+ local->lock.locks[i]->domain,
+ &local->lock.locks[i]->loc, F_SETLK,
+ &flock, NULL);
+ }
+
+ return 0;
+
+out:
+ if (lock_frame)
+ dht_lock_stack_destroy (lock_frame);
+
+ return -1;
+}
+
+int32_t
+dht_blocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ int lk_index = 0;
+ int i = 0;
+ dht_local_t *local = NULL;
+
+ lk_index = (long) cookie;
+
+ local = frame->local;
+ if (op_ret == 0) {
+ local->lock.locks[lk_index]->locked = _gf_true;
+ } else {
+ switch (op_errno) {
+ case ESTALE:
+ case ENOENT:
+ if (local->lock.reaction != IGNORE_ENOENT_ESTALE) {
+ local->lock.op_ret = -1;
+ local->lock.op_errno = op_errno;
+ goto cleanup;
+ }
+ break;
+ default:
+ local->lock.op_ret = -1;
+ local->lock.op_errno = op_errno;
+ goto cleanup;
+ }
+ }
+
+ if (lk_index == (local->lock.lk_count - 1)) {
+ for (i = 0; (i < local->lock.lk_count) &&
+ (!local->lock.locks[i]->locked); i++)
+ ;
+
+ if (i == local->lock.lk_count) {
+ local->lock.op_ret = -1;
+ local->lock.op_errno = op_errno;
+ }
+
+ dht_inodelk_done (frame);
+ } else {
+ dht_blocking_inodelk_rec (frame, ++lk_index);
+ }
+
+ return 0;
+
+cleanup:
+ dht_inodelk_cleanup (frame);
+
+ return 0;
+}
+
+void
+dht_blocking_inodelk_rec (call_frame_t *frame, int i)
+{
+ dht_local_t *local = NULL;
+ struct gf_flock flock = {0,};
+
+ local = frame->local;
+
+ flock.l_type = local->lock.locks[i]->type;
+
+ STACK_WIND_COOKIE (frame, dht_blocking_inodelk_cbk,
+ (void *) (long) i,
+ local->lock.locks[i]->xl,
+ local->lock.locks[i]->xl->fops->inodelk,
+ local->lock.locks[i]->domain,
+ &local->lock.locks[i]->loc, F_SETLKW, &flock, NULL);
+
+ return;
+}
+
+int
+dht_lock_request_cmp (const void *val1, const void *val2)
+{
+ dht_lock_t *lock1 = NULL;
+ dht_lock_t *lock2 = NULL;
+ int ret = 0;
+
+ lock1 = *(dht_lock_t **)val1;
+ lock2 = *(dht_lock_t **)val2;
+
+ GF_VALIDATE_OR_GOTO ("dht-locks", lock1, out);
+ GF_VALIDATE_OR_GOTO ("dht-locks", lock2, out);
+
+ ret = strcmp (lock1->xl->name, lock2->xl->name);
+
+ if (ret == 0) {
+ ret = gf_uuid_compare (lock1->loc.gfid, lock2->loc.gfid);
+ }
+
+out:
+ return ret;
+}
+
+int
+dht_lock_order_requests (dht_lock_t **locks, int count)
+{
+ int ret = -1;
+
+ if (!locks || !count)
+ goto out;
+
+ qsort (locks, count, sizeof (*locks), dht_lock_request_cmp);
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int
+dht_blocking_inodelk (call_frame_t *frame, dht_lock_t **lk_array,
+ int lk_count, dht_reaction_type_t reaction,
+ fop_inodelk_cbk_t inodelk_cbk)
+{
+ int ret = -1;
+ call_frame_t *lock_frame = NULL;
+ dht_local_t *local = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht-locks", frame, out);
+ GF_VALIDATE_OR_GOTO (frame->this->name, lk_array, out);
+ GF_VALIDATE_OR_GOTO (frame->this->name, inodelk_cbk, out);
+
+ lock_frame = dht_lock_frame (frame);
+ if (lock_frame == NULL)
+ goto out;
+
+ ret = dht_local_lock_init (lock_frame, lk_array, lk_count, inodelk_cbk);
+ if (ret < 0) {
+ goto out;
+ }
+
+ dht_set_lkowner (lk_array, lk_count, &lock_frame->root->lk_owner);
+
+ local = lock_frame->local;
+ local->lock.reaction = reaction;
+ local->main_frame = frame;
+
+ dht_blocking_inodelk_rec (lock_frame, 0);
+
+ return 0;
+out:
+ if (lock_frame)
+ dht_lock_stack_destroy (lock_frame);
+
return -1;
}
+inode_t*
+dht_heal_path (xlator_t *this, char *path, inode_table_t *itable)
+{
+ int ret = -1;
+ struct iatt iatt = {0, };
+ inode_t *linked_inode = NULL;
+ loc_t loc = {0, };
+ char *bname = NULL;
+ char *save_ptr = NULL;
+ uuid_t gfid = {0, };
+ char *tmp_path = NULL;
+
+
+ tmp_path = gf_strdup (path);
+ if (!tmp_path) {
+ goto out;
+ }
+
+ memset (gfid, 0, 16);
+ gfid[15] = 1;
+
+ gf_uuid_copy (loc.pargfid, gfid);
+ loc.parent = inode_ref (itable->root);
+
+ bname = strtok_r (tmp_path, "/", &save_ptr);
+
+ /* sending a lookup on parent directory,
+ * Eg: if path is like /a/b/c/d/e/f/g/
+ * then we will send a lookup on a first and then b,c,d,etc
+ */
+
+ while (bname) {
+ linked_inode = NULL;
+ loc.inode = inode_grep (itable, loc.parent, bname);
+ if (loc.inode == NULL) {
+ loc.inode = inode_new (itable);
+ if (loc.inode == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ } else {
+ /*
+ * Inode is already populated in the inode table.
+ * Which means we already looked up the inde and
+ * linked with a dentry. So that we will skip
+ * lookup on this entry, and proceed to next.
+ */
+ bname = strtok_r (NULL, "/", &save_ptr);
+ inode_unref (loc.parent);
+ loc.parent = loc.inode;
+ gf_uuid_copy (loc.pargfid, loc.inode->gfid);
+ loc.inode = NULL;
+ continue;
+ }
+
+ loc.name = bname;
+ ret = loc_path (&loc, bname);
+
+ ret = syncop_lookup (this, &loc, &iatt, NULL, NULL, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_INFO, -ret,
+ DHT_MSG_DIR_SELFHEAL_FAILED,
+ "Healing of path %s failed on subvolume %s for "
+ "directory %s", path, this->name, bname);
+ goto out;
+ }
+
+ linked_inode = inode_link (loc.inode, loc.parent, bname, &iatt);
+ if (!linked_inode)
+ goto out;
+
+ loc_wipe (&loc);
+ gf_uuid_copy (loc.pargfid, linked_inode->gfid);
+ loc.inode = NULL;
+ loc.parent = linked_inode;
+
+ bname = strtok_r (NULL, "/", &save_ptr);
+ }
+out:
+ inode_ref (linked_inode);
+ loc_wipe (&loc);
+ GF_FREE (tmp_path);
+
+ return linked_inode;
+}
+
+
+int
+dht_heal_full_path (void *data)
+{
+ call_frame_t *heal_frame = data;
+ dht_local_t *local = NULL;
+ loc_t loc = {0, };
+ dict_t *dict = NULL;
+ char *path = NULL;
+ int ret = -1;
+ xlator_t *source = NULL;
+ xlator_t *this = NULL;
+ inode_table_t *itable = NULL;
+ inode_t *inode = NULL;
+ inode_t *tmp_inode = NULL;
+
+ GF_VALIDATE_OR_GOTO ("DHT", heal_frame, out);
+
+ local = heal_frame->local;
+ this = heal_frame->this;
+ source = heal_frame->cookie;
+ heal_frame->cookie = NULL;
+ gf_uuid_copy (loc.gfid, local->gfid);
+
+ if (local->loc.inode)
+ loc.inode = inode_ref (local->loc.inode);
+ else
+ goto out;
+
+ itable = loc.inode->table;
+ ret = syncop_getxattr (source, &loc, &dict,
+ GET_ANCESTRY_PATH_KEY, NULL, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_INFO, -ret,
+ DHT_MSG_DIR_SELFHEAL_FAILED,
+ "Failed to get path from subvol %s. Aborting "
+ "directory healing.", source->name);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, GET_ANCESTRY_PATH_KEY, &path);
+ if (path) {
+ inode = dht_heal_path (this, path, itable);
+ if (inode && inode != local->inode) {
+ /*
+ * if inode returned by heal function is different
+ * from what we passed, which means a racing thread
+ * already linked a different inode for dentry.
+ * So we will update our local->inode, so that we can
+ * retrurn proper inode.
+ */
+ tmp_inode = local->inode;
+ local->inode = inode;
+ inode_unref (tmp_inode);
+ tmp_inode = NULL;
+ } else {
+ inode_unref (inode);
+ }
+ }
+
+out:
+ loc_wipe (&loc);
+ if (dict)
+ dict_unref (dict);
+ return 0;
+}
+
+int
+dht_heal_full_path_done (int op_ret, call_frame_t *heal_frame, void *data)
+{
+
+ call_frame_t *main_frame = NULL;
+ dht_local_t *local = NULL;
+
+ local = heal_frame->local;
+ main_frame = local->main_frame;
+ local->main_frame = NULL;
+
+ dht_set_fixed_dir_stat (&local->postparent);
+
+ DHT_STACK_UNWIND (lookup, main_frame, 0, 0,
+ local->inode, &local->stbuf, local->xattr,
+ &local->postparent);
+
+ DHT_STACK_DESTROY (heal_frame);
+ return 0;
+}
+
+/* This function must be called inside an inode lock */
+int
+__dht_lock_subvol_set (inode_t *inode, xlator_t *this,
+ xlator_t *lock_subvol)
+{
+ dht_inode_ctx_t *ctx = NULL;
+ int ret = -1;
+ uint64_t value = 0;
+
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ ret = __inode_ctx_get0 (inode, this, &value);
+ if (ret || !value) {
+ return -1;
+ }
+
+ ctx = (dht_inode_ctx_t *) value;
+ ctx->lock_subvol = lock_subvol;
+out:
+ return ret;
+}
+
+xlator_t*
+dht_get_lock_subvolume (xlator_t *this, struct gf_flock *lock,
+ dht_local_t *local)
+{
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ int32_t ret = -1;
+ uint64_t value = 0;
+ xlator_t *cached_subvol = NULL;
+ dht_inode_ctx_t *ctx = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ GF_VALIDATE_OR_GOTO (this->name, lock, out);
+ GF_VALIDATE_OR_GOTO (this->name, local, out);
+
+ cached_subvol = local->cached_subvol;
+
+ if (local->loc.inode || local->fd) {
+ inode = local->loc.inode ? local->loc.inode : local->fd->inode;
+ }
+
+ if (!inode)
+ goto out;
+
+ if (!(IA_ISDIR (inode->ia_type) || IA_ISINVAL (inode->ia_type))) {
+ /*
+ * We may get non-linked inode for directories as part
+ * of the selfheal code path. So checking for IA_INVAL
+ * type also. This will only happen for directory.
+ */
+ subvol = local->cached_subvol;
+ goto out;
+ }
+
+ if (lock->l_type != F_UNLCK) {
+ /*
+ * inode purging might happen on NFS between a lk
+ * and unlk. Due to this lk and unlk might be sent
+ * to different subvols.
+ * So during a lock request, taking a ref on inode
+ * to prevent inode purging. inode unref will happen
+ * in unlock cbk code path.
+ */
+ inode_ref (inode);
+ }
+
+ LOCK (&inode->lock);
+ ret = __inode_ctx_get0 (inode, this, &value);
+ if (!ret && value) {
+ ctx = (dht_inode_ctx_t *) value;
+ subvol = ctx->lock_subvol;
+ }
+ if (!subvol && lock->l_type != F_UNLCK && cached_subvol) {
+ ret = __dht_lock_subvol_set (inode, this,
+ cached_subvol);
+ if (ret) {
+ gf_uuid_unparse(inode->gfid, gfid);
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_SET_INODE_CTX_FAILED,
+ "Failed to set lock_subvol in "
+ "inode ctx for gfid %s",
+ gfid);
+ goto unlock;
+ }
+ subvol = cached_subvol;
+ }
+unlock:
+ UNLOCK (&inode->lock);
+ if (!subvol && inode && lock->l_type != F_UNLCK) {
+ inode_unref (inode);
+ }
+out:
+ return subvol;
+}
+
+int
+dht_lk_inode_unref (call_frame_t *frame, int32_t op_ret)
+{
+ int ret = -1;
+ dht_local_t *local = NULL;
+ inode_t *inode = NULL;
+ xlator_t *this = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ this = frame->this;
+
+ if (local->loc.inode || local->fd) {
+ inode = local->loc.inode ? local->loc.inode : local->fd->inode;
+ }
+ if (!inode) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LOCK_INODE_UNREF_FAILED,
+ "Found a NULL inode. Failed to unref the inode");
+ goto out;
+ }
+
+ if (!(IA_ISDIR (inode->ia_type) || IA_ISINVAL (inode->ia_type))) {
+ ret = 0;
+ goto out;
+ }
+
+ switch (local->lock_type) {
+ case F_RDLCK:
+ case F_WRLCK:
+ if (op_ret) {
+ gf_uuid_unparse(inode->gfid, gfid);
+ gf_msg_debug (this->name, 0,
+ "lock request failed for gfid %s", gfid);
+ inode_unref (inode);
+ goto out;
+ }
+ break;
+
+ case F_UNLCK:
+ if (!op_ret) {
+ inode_unref (inode);
+ } else {
+ gf_uuid_unparse(inode->gfid, gfid);
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LOCK_INODE_UNREF_FAILED,
+ "Unlock request failed for gfid %s."
+ "Failed to unref the inode", gfid);
+ goto out;
+ }
+ default:
+ break;
+ }
+ ret = 0;
+out:
+ return ret;
+}
diff --git a/xlators/cluster/dht/src/dht-helper.h b/xlators/cluster/dht/src/dht-helper.h
new file mode 100644
index 00000000000..e3ab9c4d93b
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-helper.h
@@ -0,0 +1,19 @@
+/*
+ Copyright (c) 2008-2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _DHT_HELPER_H
+#define _DHT_HELPER_H
+
+int
+dht_lock_order_requests (dht_lock_t **lk_array, int count);
+
+void
+dht_blocking_inodelk_rec (call_frame_t *frame, int i);
+
+#endif
diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c
new file mode 100644
index 00000000000..549f1b9ea7e
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-inode-read.c
@@ -0,0 +1,1397 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "dht-common.h"
+
+int dht_access2 (xlator_t *this, xlator_t *dst_node,
+ call_frame_t *frame, int ret);
+int dht_readv2 (xlator_t *this, xlator_t *dst_node,
+ call_frame_t *frame, int ret);
+int dht_attr2 (xlator_t *this, xlator_t *dst_node,
+ call_frame_t *frame, int ret);
+int dht_open2 (xlator_t *this, xlator_t *dst_node,
+ call_frame_t *frame, int ret);
+int dht_flush2 (xlator_t *this, xlator_t *dst_node,
+ call_frame_t *frame, int ret);
+int dht_lk2 (xlator_t *this, xlator_t *dst_node,
+ call_frame_t *frame, int ret);
+int dht_fsync2 (xlator_t *this, xlator_t *dst_node,
+ call_frame_t *frame, int ret);
+
+
+
+int
+dht_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = 0;
+
+ local = frame->local;
+ prev = cookie;
+
+ local->op_errno = op_errno;
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
+ gf_msg_debug (this->name, op_errno,
+ "subvolume %s returned -1",
+ prev->this->name);
+ goto out;
+ }
+
+ if (!op_ret || (local->call_cnt != 1))
+ goto out;
+
+ /* rebalance would have happened */
+ local->rebalance.target_op_fn = dht_open2;
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+
+out:
+ DHT_STACK_UNWIND (open, frame, op_ret, op_errno, local->fd, xdata);
+
+ return 0;
+}
+
+int
+dht_open2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
+{
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
+
+ if (!frame || !frame->local)
+ goto out;
+
+ local = frame->local;
+ op_errno = ENOENT;
+
+ if (we_are_not_migrating (ret)) {
+ /* This DHT layer is not migrating the file */
+ DHT_STACK_UNWIND (open, frame, -1, local->op_errno,
+ NULL, NULL);
+ return 0;
+
+ }
+
+ if (subvol == NULL)
+ goto out;
+
+ local->call_cnt = 2;
+
+ STACK_WIND (frame, dht_open_cbk, subvol, subvol->fops->open,
+ &local->loc, local->rebalance.flags, local->fd,
+ NULL);
+ return 0;
+
+out:
+ DHT_STACK_UNWIND (open, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+
+int
+dht_open (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int flags, fd_t *fd, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, loc, fd, GF_FOP_OPEN);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug (this->name, 0,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->rebalance.flags = flags;
+ local->call_cnt = 1;
+
+ STACK_WIND (frame, dht_open_cbk, subvol, subvol->fops->open,
+ loc, flags, fd, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (open, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int
+dht_file_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *stbuf, dict_t *xdata)
+{
+ xlator_t *subvol1 = 0;
+ xlator_t *subvol2 = 0;
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+ inode_t *inode = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
+ local->op_errno = op_errno;
+ gf_msg_debug (this->name, op_errno,
+ "subvolume %s returned -1",
+ prev->this->name);
+ goto out;
+ }
+
+ if (local->call_cnt != 1)
+ goto out;
+
+ local->op_errno = op_errno;
+ local->op_ret = op_ret;
+
+ /* Check if the rebalance phase2 is true */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (stbuf)) {
+
+ local->rebalance.target_op_fn = dht_attr2;
+ dht_set_local_rebalance (this, local, NULL, NULL,
+ stbuf, xdata);
+ inode = (local->fd) ? local->fd->inode : local->loc.inode;
+
+ dht_inode_ctx_get_mig_info (this, inode, &subvol1, &subvol2);
+ if (dht_mig_info_is_invalid (local->cached_subvol,
+ subvol1, subvol2)){
+ /* Phase 2 of migration */
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ } else {
+ /* it is a non-fd op or it is an fd based Fop and
+ opened on the dst.*/
+ if (local->fd &&
+ !dht_fd_open_on_dst (this, local->fd, subvol2)) {
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ } else {
+ dht_attr2 (this, subvol2, frame, 0);
+ return 0;
+ }
+ }
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS (stbuf);
+ DHT_STACK_UNWIND (stat, frame, op_ret, op_errno, stbuf, xdata);
+err:
+ return 0;
+}
+
+int
+dht_attr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
+{
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating (ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND (stat, frame, local->op_ret, op_errno,
+ &local->rebalance.postbuf,
+ local->rebalance.xdata);
+ return 0;
+ }
+
+
+ if (subvol == NULL)
+ goto out;
+
+ local->call_cnt = 2;
+
+ if (local->fop == GF_FOP_FSTAT) {
+ STACK_WIND (frame, dht_file_attr_cbk, subvol,
+ subvol->fops->fstat, local->fd, NULL);
+ } else {
+ STACK_WIND (frame, dht_file_attr_cbk, subvol,
+ subvol->fops->stat, &local->loc, NULL);
+ }
+
+ return 0;
+
+out:
+ DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+int
+dht_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *stbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_msg_debug (this->name, op_errno,
+ "subvolume %s returned -1",
+ prev->this->name);
+
+ goto unlock;
+ }
+
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+
+ local->op_ret = 0;
+ }
+unlock:
+ UNLOCK (&frame->lock);
+out:
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ DHT_STACK_UNWIND (stat, frame, local->op_ret, local->op_errno,
+ &local->stbuf, xdata);
+ }
+err:
+ return 0;
+}
+
+int
+dht_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int call_cnt = 0;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_STAT);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_msg_debug (this->name, 0,
+ "no layout for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (IA_ISREG (loc->inode->ia_type)) {
+ local->call_cnt = 1;
+
+ subvol = local->cached_subvol;
+
+ STACK_WIND (frame, dht_file_attr_cbk, subvol,
+ subvol->fops->stat, loc, xdata);
+
+ return 0;
+ }
+
+ local->call_cnt = call_cnt = layout->cnt;
+
+ for (i = 0; i < call_cnt; i++) {
+ subvol = layout->list[i].xlator;
+
+ STACK_WIND (frame, dht_attr_cbk,
+ subvol, subvol->fops->stat,
+ loc, xdata);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+dht_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int call_cnt = 0;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FSTAT);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_msg_debug (this->name, 0,
+ "no layout for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (IA_ISREG (fd->inode->ia_type)) {
+ local->call_cnt = 1;
+
+ subvol = local->cached_subvol;
+
+ STACK_WIND (frame, dht_file_attr_cbk, subvol,
+ subvol->fops->fstat, fd, xdata);
+
+ return 0;
+ }
+
+ local->call_cnt = call_cnt = layout->cnt;
+
+ for (i = 0; i < call_cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND (frame, dht_attr_cbk,
+ subvol, subvol->fops->fstat,
+ fd, xdata);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int
+dht_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ struct iovec *vector, int count, struct iatt *stbuf,
+ struct iobref *iobref, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int ret = 0;
+ xlator_t *src_subvol = 0;
+ xlator_t *dst_subvol = 0;
+
+ local = frame->local;
+ if (!local) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ /* This is already second try, no need for re-check */
+ if (local->call_cnt != 1)
+ goto out;
+
+ if ((op_ret == -1) && !dht_inode_missing(op_errno))
+ goto out;
+
+ local->op_errno = op_errno;
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (stbuf)) {
+
+ local->op_ret = op_ret;
+ local->rebalance.target_op_fn = dht_readv2;
+ dht_set_local_rebalance (this, local, NULL, NULL,
+ stbuf, xdata);
+ /* File would be migrated to other node */
+ ret = dht_inode_ctx_get_mig_info (this, local->fd->inode,
+ &src_subvol,
+ &dst_subvol);
+
+ if (dht_mig_info_is_invalid (local->cached_subvol,
+ src_subvol, dst_subvol)
+ || !dht_fd_open_on_dst(this, local->fd, dst_subvol)) {
+
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ } else {
+ /* value is already set in fd_ctx, that means no need
+ to check for whether its complete or not. */
+ dht_readv2 (this, dst_subvol, frame, 0);
+ return 0;
+ }
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS (stbuf);
+
+ DHT_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count, stbuf,
+ iobref, xdata);
+
+ return 0;
+}
+
+int
+dht_readv2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
+{
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating (ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND (readv, frame, local->op_ret, op_errno,
+ NULL, 0, &local->rebalance.postbuf,
+ NULL, local->rebalance.xdata);
+ return 0;
+ }
+
+ if (subvol == NULL)
+ goto out;
+
+ local->call_cnt = 2;
+
+ STACK_WIND (frame, dht_readv_cbk, subvol, subvol->fops->readv,
+ local->fd, local->rebalance.size, local->rebalance.offset,
+ local->rebalance.flags, NULL);
+
+ return 0;
+
+out:
+ DHT_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int
+dht_readv (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t off, uint32_t flags, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_READ);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug (this->name, 0,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->rebalance.offset = off;
+ local->rebalance.size = size;
+ local->rebalance.flags = flags;
+ local->call_cnt = 1;
+
+ STACK_WIND (frame, dht_readv_cbk,
+ subvol, subvol->fops->readv,
+ fd, size, off, flags, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int
+dht_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ int ret = -1;
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ call_frame_t *prev = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ if (!prev || !prev->this)
+ goto out;
+ if (local->call_cnt != 1)
+ goto out;
+ if ((op_ret == -1) && ((op_errno == ENOTCONN) ||
+ dht_inode_missing(op_errno)) &&
+ IA_ISDIR(local->loc.inode->ia_type)) {
+ subvol = dht_subvol_next_available (this, prev->this);
+ if (!subvol)
+ goto out;
+
+ /* check if we are done with visiting every node */
+ if (subvol == local->cached_subvol) {
+ goto out;
+ }
+
+ STACK_WIND (frame, dht_access_cbk, subvol, subvol->fops->access,
+ &local->loc, local->rebalance.flags, NULL);
+ return 0;
+ }
+ if ((op_ret == -1) && dht_inode_missing(op_errno) &&
+ !(IA_ISDIR(local->loc.inode->ia_type))) {
+ /* File would be migrated to other node */
+ local->op_errno = op_errno;
+ local->rebalance.target_op_fn = dht_access2;
+ ret = dht_rebalance_complete_check (frame->this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ DHT_STACK_UNWIND (access, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+dht_access2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
+{
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating (ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+
+ DHT_STACK_UNWIND (access, frame, -1, op_errno, NULL);
+ return 0;
+ }
+
+ if (subvol == NULL)
+ goto out;
+
+ local->call_cnt = 2;
+
+ STACK_WIND (frame, dht_access_cbk, subvol, subvol->fops->access,
+ &local->loc, local->rebalance.flags, NULL);
+
+ return 0;
+
+out:
+ DHT_STACK_UNWIND (access, frame, -1, op_errno, NULL);
+ return 0;
+}
+
+
+int
+dht_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_ACCESS);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.flags = mask;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug (this->name, 0,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_access_cbk, subvol, subvol->fops->access,
+ loc, mask, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (access, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = 0;
+ int ret = 0;
+
+ local = frame->local;
+
+ local->op_errno = op_errno;
+
+ if (local->call_cnt != 1)
+ goto out;
+
+ local->rebalance.target_op_fn = dht_flush2;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (xdata)
+ local->rebalance.xdata = dict_ref (xdata);
+
+ /* If context is set, then send flush() it to the destination */
+ dht_inode_ctx_get_mig_info (this, local->fd->inode, NULL, &subvol);
+ if (subvol && dht_fd_open_on_dst (this, local->fd, subvol)) {
+ dht_flush2 (this, subvol, frame, 0);
+ return 0;
+ }
+
+ if (op_errno == EREMOTE) {
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret) {
+ return 0;
+ }
+ }
+
+out:
+ DHT_STACK_UNWIND (flush, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int
+dht_flush2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
+{
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+
+ if ((frame == NULL) || (frame->local == NULL))
+ goto out;
+
+ local = frame->local;
+
+ op_errno = local->op_errno;
+
+ if (subvol == NULL)
+ goto out;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND (frame, dht_flush_cbk,
+ subvol, subvol->fops->flush, local->fd,
+ local->rebalance.xdata);
+
+ return 0;
+
+out:
+ DHT_STACK_UNWIND (flush, frame, -1, op_errno, NULL);
+ return 0;
+}
+
+
+int
+dht_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FLUSH);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug (this->name, 0,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->call_cnt = 1;
+
+ STACK_WIND (frame, dht_flush_cbk,
+ subvol, subvol->fops->flush, fd, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (flush, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+ inode_t *inode = NULL;
+ xlator_t *src_subvol = 0;
+ xlator_t *dst_subvol = 0;
+
+ local = frame->local;
+ prev = cookie;
+
+ local->op_errno = op_errno;
+ if (op_ret == -1 && !dht_inode_missing(op_errno)) {
+ gf_msg_debug (this->name, op_errno,
+ "subvolume %s returned -1",
+ prev->this->name);
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
+ dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
+ }
+ goto out;
+ }
+
+ local->op_ret = op_ret;
+ inode = local->fd->inode;
+
+ local->rebalance.target_op_fn = dht_fsync2;
+ dht_set_local_rebalance (this, local, NULL, prebuf,
+ postbuf, xdata);
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
+
+ dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
+ dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
+
+ dht_inode_ctx_get_mig_info (this, inode, &src_subvol, &dst_subvol);
+
+ if (dht_mig_info_is_invalid (local->cached_subvol, src_subvol,
+ dst_subvol) ||
+ !dht_fd_open_on_dst (this, local->fd, dst_subvol)) {
+
+ ret = dht_rebalance_in_progress_check (this, frame);
+ if (!ret)
+ return 0;
+ } else {
+ dht_fsync2 (this, dst_subvol, frame, 0);
+ return 0;
+ }
+ }
+
+ if (IS_DHT_MIGRATION_PHASE2 (postbuf)) {
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS (postbuf);
+ DHT_STRIP_PHASE1_FLAGS (prebuf);
+
+ DHT_STACK_UNWIND (fsync, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+
+ return 0;
+}
+
+int
+dht_fsync2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
+{
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+
+ if ((frame == NULL) || (frame->local == NULL))
+ goto out;
+
+ local = frame->local;
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating (ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND (fsync, frame, local->op_ret,
+ op_errno, &local->rebalance.prebuf,
+ &local->rebalance.postbuf,
+ local->rebalance.xdata);
+ return 0;
+ }
+
+ if (subvol == NULL)
+ goto out;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND (frame, dht_fsync_cbk, subvol, subvol->fops->fsync,
+ local->fd, local->rebalance.flags, NULL);
+
+ return 0;
+
+out:
+ DHT_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+dht_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
+ dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FSYNC);
+ if (!local) {
+ op_errno = ENOMEM;
+
+ goto err;
+ }
+
+ local->call_cnt = 1;
+ local->rebalance.flags = datasync;
+
+ subvol = local->cached_subvol;
+
+ STACK_WIND (frame, dht_fsync_cbk, subvol, subvol->fops->fsync,
+ fd, datasync, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+/* TODO: for 'lk()' call, we need some other special error, may be ESTALE to
+ indicate that lock migration happened on the fd, so we can consider it as
+ phase 2 of migration */
+int
+dht_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct gf_flock *flock, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int ret = -1;
+ xlator_t *subvol = NULL;
+
+ local = frame->local;
+
+ if (!local) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ if (local->call_cnt != 1)
+ goto out;
+
+ local->rebalance.target_op_fn = dht_lk2;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (xdata)
+ local->rebalance.xdata = dict_ref (xdata);
+
+ if (op_errno == EREMOTE) {
+ dht_inode_ctx_get_mig_info (this, local->fd->inode,
+ NULL, &subvol);
+ if (subvol && dht_fd_open_on_dst (this, local->fd, subvol)) {
+ dht_lk2 (this, subvol, frame, 0);
+ return 0;
+ } else {
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret) {
+ return 0;
+ }
+ }
+ }
+
+out:
+ dht_lk_inode_unref (frame, op_ret);
+ DHT_STACK_UNWIND (lk, frame, op_ret, op_errno, flock, xdata);
+
+ return 0;
+}
+
+int
+dht_lk2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
+{
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+
+ if ((frame == NULL) || (frame->local == NULL))
+ goto out;
+
+ local = frame->local;
+
+ op_errno = local->op_errno;
+
+ if (subvol == NULL)
+ goto out;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND (frame, dht_lk_cbk, subvol, subvol->fops->lk, local->fd,
+ local->rebalance.lock_cmd, &local->rebalance.flock,
+ local->rebalance.xdata);
+
+ return 0;
+
+out:
+ DHT_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+int
+dht_lk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int cmd, struct gf_flock *flock, dict_t *xdata)
+{
+ xlator_t *lock_subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_LK);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->lock_type = flock->l_type;
+ lock_subvol = dht_get_lock_subvolume (this, flock, local);
+ if (!lock_subvol) {
+ gf_msg_debug (this->name, 0,
+ "no lock subvolume for path=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->rebalance.flock = *flock;
+ local->rebalance.lock_cmd = cmd;
+
+ local->call_cnt = 1;
+
+ STACK_WIND (frame, dht_lk_cbk, lock_subvol, lock_subvol->fops->lk, fd,
+ cmd, flock, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int
+dht_lease_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct gf_lease *lease, dict_t *xdata)
+{
+ DHT_STACK_UNWIND (lease, frame, op_ret, op_errno, lease, xdata);
+
+ return 0;
+}
+
+int
+dht_lease (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, struct gf_lease *lease, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+
+ subvol = dht_subvol_get_cached (this, loc->inode);
+ if (!subvol) {
+ gf_msg_debug (this->name, 0,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ /* TODO: for rebalance, we need to preserve the fop arguments */
+ STACK_WIND (frame, dht_lease_cbk, subvol, subvol->fops->lease,
+ loc, lease, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (lease, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+/* Symlinks are currently not migrated, so no need for any check here */
+int
+dht_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, const char *path,
+ struct iatt *stbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+ if (op_ret == -1)
+ goto err;
+
+ if (!local) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
+
+err:
+ DHT_STRIP_PHASE1_FLAGS (stbuf);
+ DHT_STACK_UNWIND (readlink, frame, op_ret, op_errno, path, stbuf, xdata);
+
+ return 0;
+}
+
+
+int
+dht_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_READLINK);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug (this->name, 0,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_readlink_cbk,
+ subvol, subvol->fops->readlink,
+ loc, size, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (readlink, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+/* Currently no translators on top of 'distribute' will be using
+ * below fops, hence not implementing 'migration' related checks
+ */
+
+int
+dht_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ DHT_STACK_UNWIND (xattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+
+int
+dht_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_XATTROP);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug (this->name, 0,
+ "no cached subvolume for gfid=%s",
+ uuid_utoa (loc->inode->gfid));
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->call_cnt = 1;
+
+ STACK_WIND (frame,
+ dht_xattrop_cbk,
+ subvol, subvol->fops->xattrop,
+ loc, flags, dict, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+dht_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ DHT_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+
+int
+dht_fxattrop (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ subvol = dht_subvol_get_cached (this, fd->inode);
+ if (!subvol) {
+ gf_msg_debug (this->name, 0,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame,
+ dht_fxattrop_cbk,
+ subvol, subvol->fops->fxattrop,
+ fd, flags, dict, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+dht_inodelk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
+
+{
+ dht_lk_inode_unref (frame, op_ret);
+ DHT_STACK_UNWIND (inodelk, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+
+int32_t
+dht_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+{
+ xlator_t *lock_subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_INODELK);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->lock_type = lock->l_type;
+ lock_subvol = dht_get_lock_subvolume (this, lock, local);
+ if (!lock_subvol) {
+ gf_msg_debug (this->name, 0,
+ "no lock subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->call_cnt = 1;
+
+ STACK_WIND (frame,
+ dht_inodelk_cbk,
+ lock_subvol, lock_subvol->fops->inodelk,
+ volume, loc, cmd, lock, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (inodelk, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+int
+dht_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+
+{
+
+ dht_lk_inode_unref (frame, op_ret);
+ DHT_STACK_UNWIND (finodelk, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+
+int
+dht_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+{
+ xlator_t *lock_subvol = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_INODELK);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->call_cnt = 1;
+ local->lock_type = lock->l_type;
+
+ lock_subvol = dht_get_lock_subvolume (this, lock, local);
+ if (!lock_subvol) {
+ gf_msg_debug (this->name, 0,
+ "no lock subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+
+ STACK_WIND (frame, dht_finodelk_cbk, lock_subvol,
+ lock_subvol->fops->finodelk,
+ volume, fd, cmd, lock, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (finodelk, frame, -1, op_errno, NULL);
+
+ return 0;
+}
diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c
new file mode 100644
index 00000000000..112685b659e
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-inode-write.c
@@ -0,0 +1,1208 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#include "dht-common.h"
+
+int dht_writev2 (xlator_t *this, xlator_t *subvol,
+ call_frame_t *frame, int ret);
+int dht_truncate2 (xlator_t *this, xlator_t *subvol,
+ call_frame_t *frame, int ret);
+int dht_setattr2 (xlator_t *this, xlator_t *subvol,
+ call_frame_t *frame, int ret);
+int dht_fallocate2 (xlator_t *this, xlator_t *subvol,
+ call_frame_t *frame, int ret);
+int dht_discard2 (xlator_t *this, xlator_t *subvol,
+ call_frame_t *frame, int ret);
+int dht_zerofill2 (xlator_t *this, xlator_t *subvol,
+ call_frame_t *frame, int ret);
+
+int
+dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+ xlator_t *subvol1 = NULL;
+ xlator_t *subvol2 = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ if (!local) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ if (op_ret == -1 && !dht_inode_missing(op_errno)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ gf_msg_debug (this->name, 0,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ /* preserve the modes of source */
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
+ dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
+ }
+ goto out;
+ }
+
+ local->rebalance.target_op_fn = dht_writev2;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ /* We might need to pass the stbuf information to the higher DHT
+ * layer for appropriate handling.
+ */
+
+ dht_set_local_rebalance (this, local, NULL, prebuf, postbuf, xdata);
+
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) {
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
+ dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
+ dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
+
+ ret = dht_inode_ctx_get_mig_info (this, local->fd->inode,
+ &subvol1, &subvol2);
+ if (!dht_mig_info_is_invalid (local->cached_subvol,
+ subvol1, subvol2)) {
+ if (dht_fd_open_on_dst (this, local->fd, subvol2)) {
+ dht_writev2 (this, subvol2, frame, 0);
+ return 0;
+ }
+ }
+ ret = dht_rebalance_in_progress_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS (postbuf);
+ DHT_STRIP_PHASE1_FLAGS (prebuf);
+
+ DHT_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ return 0;
+}
+
+int
+dht_writev2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
+{
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+
+ if ((frame == NULL) || (frame->local == NULL))
+ goto out;
+
+ local = frame->local;
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating (ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND (writev, frame, local->op_ret,
+ local->op_errno, &local->rebalance.prebuf,
+ &local->rebalance.postbuf,
+ local->rebalance.xdata);
+ return 0;
+ }
+
+
+ if (subvol == NULL)
+ goto out;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND (frame, dht_writev_cbk,
+ subvol, subvol->fops->writev,
+ local->fd, local->rebalance.vector, local->rebalance.count,
+ local->rebalance.offset, local->rebalance.flags,
+ local->rebalance.iobref, NULL);
+
+ return 0;
+
+out:
+ DHT_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int
+dht_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int count, off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_WRITE);
+ if (!local) {
+
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug (this->name, 0,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+
+ local->rebalance.vector = iov_dup (vector, count);
+ local->rebalance.offset = off;
+ local->rebalance.count = count;
+ local->rebalance.flags = flags;
+ local->rebalance.iobref = iobref_ref (iobref);
+ local->call_cnt = 1;
+
+ STACK_WIND (frame, dht_writev_cbk,
+ subvol, subvol->fops->writev,
+ fd, vector, count, off, flags, iobref, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+
+int
+dht_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+ xlator_t *src_subvol = NULL;
+ xlator_t *dst_subvol = NULL;
+ inode_t *inode = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ gf_msg_debug (this->name, op_errno,
+ "subvolume %s returned -1",
+ prev->this->name);
+
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
+ dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
+ }
+ goto out;
+ }
+
+ local->rebalance.target_op_fn = dht_truncate2;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ /* We might need to pass the stbuf information to the higher DHT
+ * layer for appropriate handling.
+ */
+
+ dht_set_local_rebalance (this, local, NULL, prebuf, postbuf, xdata);
+
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) {
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
+ dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
+ dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
+
+ inode = (local->fd) ? local->fd->inode : local->loc.inode;
+
+ dht_inode_ctx_get_mig_info (this, inode, &src_subvol,
+ &dst_subvol);
+ if (!dht_mig_info_is_invalid (local->cached_subvol,
+ src_subvol, dst_subvol)) {
+ if ((!local->fd) || ((local->fd) &&
+ dht_fd_open_on_dst (this, local->fd, dst_subvol))) {
+ dht_truncate2 (this, dst_subvol, frame, 0);
+ return 0;
+ }
+ }
+ ret = dht_rebalance_in_progress_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS (postbuf);
+ DHT_STRIP_PHASE1_FLAGS (prebuf);
+
+ DHT_STACK_UNWIND (truncate, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+err:
+ return 0;
+}
+
+
+int
+dht_truncate2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
+{
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+
+ if (!frame || !frame->local)
+ goto out;
+
+ local = frame->local;
+ op_errno = local->op_errno;
+
+ /* This dht xlator is not migrating the file */
+ if (we_are_not_migrating (ret)) {
+
+ DHT_STACK_UNWIND (truncate, frame, local->op_ret,
+ local->op_errno, &local->rebalance.prebuf,
+ &local->rebalance.postbuf,
+ local->rebalance.xdata);
+ return 0;
+ }
+
+ if (subvol == NULL)
+ goto out;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ if (local->fop == GF_FOP_TRUNCATE) {
+ STACK_WIND (frame, dht_truncate_cbk, subvol,
+ subvol->fops->truncate, &local->loc,
+ local->rebalance.offset, NULL);
+ } else {
+ STACK_WIND (frame, dht_truncate_cbk, subvol,
+ subvol->fops->ftruncate, local->fd,
+ local->rebalance.offset, NULL);
+ }
+
+ return 0;
+
+out:
+ DHT_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+dht_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_TRUNCATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.offset = offset;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug (this->name, 0,
+ "no cached subvolume for gfid=%s",
+ uuid_utoa (loc->inode->gfid));
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_truncate_cbk,
+ subvol, subvol->fops->truncate,
+ loc, offset, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int
+dht_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FTRUNCATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.offset = offset;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug (this->name, 0,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_truncate_cbk,
+ subvol, subvol->fops->ftruncate,
+ fd, offset, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+dht_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+ xlator_t *src_subvol = NULL;
+ xlator_t *dst_subvol = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ gf_msg_debug (this->name, op_errno,
+ "subvolume %s returned -1",
+ prev->this->name);
+
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
+ dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
+ }
+ goto out;
+ }
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ local->rebalance.target_op_fn = dht_fallocate2;
+
+ dht_set_local_rebalance (this, local, NULL, prebuf, postbuf, xdata);
+
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) {
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
+ dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
+ dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
+
+ dht_inode_ctx_get_mig_info (this, local->fd->inode, &src_subvol,
+ &dst_subvol);
+ if (!dht_mig_info_is_invalid (local->cached_subvol,
+ src_subvol, dst_subvol)) {
+ if (dht_fd_open_on_dst (this, local->fd, dst_subvol)) {
+ dht_fallocate2 (this, dst_subvol, frame, 0);
+ return 0;
+ }
+ }
+ ret = dht_rebalance_in_progress_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS (postbuf);
+ DHT_STRIP_PHASE1_FLAGS (prebuf);
+
+ DHT_STACK_UNWIND (fallocate, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+err:
+ return 0;
+}
+
+int
+dht_fallocate2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
+{
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+
+ if (!frame || !frame->local)
+ goto out;
+
+ local = frame->local;
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating (ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND (fallocate, frame, local->op_ret,
+ local->op_errno,
+ &local->rebalance.prebuf,
+ &local->rebalance.postbuf,
+ local->rebalance.xdata);
+ return 0;
+ }
+
+ if (subvol == NULL)
+ goto out;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND(frame, dht_fallocate_cbk, subvol, subvol->fops->fallocate,
+ local->fd, local->rebalance.flags, local->rebalance.offset,
+ local->rebalance.size, NULL);
+
+ return 0;
+
+out:
+ DHT_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+dht_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FALLOCATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.flags = mode;
+ local->rebalance.offset = offset;
+ local->rebalance.size = len;
+
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug (this->name, 0,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_fallocate_cbk,
+ subvol, subvol->fops->fallocate,
+ fd, mode, offset, len, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+dht_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+ xlator_t *src_subvol = NULL;
+ xlator_t *dst_subvol = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ gf_msg_debug (this->name, op_errno,
+ "subvolume %s returned -1",
+ prev->this->name);
+
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
+ dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
+ }
+ goto out;
+ }
+
+ local->rebalance.target_op_fn = dht_discard2;
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ dht_set_local_rebalance (this, local, NULL, prebuf, postbuf, xdata);
+
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) {
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
+ dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
+ dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
+
+ dht_inode_ctx_get_mig_info (this, local->fd->inode, &src_subvol,
+ &dst_subvol);
+ if (!dht_mig_info_is_invalid(local->cached_subvol,
+ src_subvol, dst_subvol)) {
+ if (dht_fd_open_on_dst (this, local->fd, dst_subvol)) {
+ dht_discard2 (this, dst_subvol, frame, 0);
+ return 0;
+ }
+ }
+ ret = dht_rebalance_in_progress_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS (postbuf);
+ DHT_STRIP_PHASE1_FLAGS (prebuf);
+
+ DHT_STACK_UNWIND (discard, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+err:
+ return 0;
+}
+
+int
+dht_discard2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
+{
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+
+ if (!frame || !frame->local)
+ goto out;
+
+ local = frame->local;
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating (ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND (discard, frame, local->op_ret,
+ local->op_errno,
+ &local->rebalance.prebuf,
+ &local->rebalance.postbuf,
+ local->rebalance.xdata);
+ return 0;
+ }
+
+ if (subvol == NULL)
+ goto out;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND(frame, dht_discard_cbk, subvol, subvol->fops->discard,
+ local->fd, local->rebalance.offset, local->rebalance.size,
+ NULL);
+
+ return 0;
+
+out:
+ DHT_STACK_UNWIND (discard, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+dht_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_DISCARD);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.offset = offset;
+ local->rebalance.size = len;
+
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug (this->name, 0,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_discard_cbk, subvol, subvol->fops->discard,
+ fd, offset, len, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (discard, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int
+dht_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+ xlator_t *subvol1 = NULL, *subvol2 = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ gf_msg_debug (this->name, op_errno,
+ "subvolume %s returned -1",
+ prev->this->name);
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
+ dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
+ }
+ goto out;
+ }
+
+ local->rebalance.target_op_fn = dht_zerofill2;
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ dht_set_local_rebalance (this, local, NULL, prebuf, postbuf, xdata);
+
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) {
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
+ dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
+ dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
+
+ ret = dht_inode_ctx_get_mig_info (this, local->fd->inode,
+ &subvol1, &subvol2);
+ if (!dht_mig_info_is_invalid (local->cached_subvol,
+ subvol1, subvol2)) {
+ if (dht_fd_open_on_dst (this, local->fd, subvol2)) {
+ dht_zerofill2 (this, subvol2, frame, 0);
+ return 0;
+ }
+ }
+
+ ret = dht_rebalance_in_progress_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS (postbuf);
+ DHT_STRIP_PHASE1_FLAGS (prebuf);
+
+ DHT_STACK_UNWIND (zerofill, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+err:
+ return 0;
+}
+
+int
+dht_zerofill2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
+{
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+
+ if (!frame || !frame->local)
+ goto out;
+
+ local = frame->local;
+
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating (ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND (zerofill, frame, local->op_ret,
+ local->op_errno,
+ &local->rebalance.prebuf,
+ &local->rebalance.postbuf,
+ local->rebalance.xdata);
+
+ return 0;
+ }
+
+ if (subvol == NULL)
+ goto out;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND(frame, dht_zerofill_cbk, subvol, subvol->fops->zerofill,
+ local->fd, local->rebalance.offset, local->rebalance.size,
+ NULL);
+
+ return 0;
+
+out:
+
+ DHT_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+dht_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_ZEROFILL);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.offset = offset;
+ local->rebalance.size = len;
+
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug (this->name, 0,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_zerofill_cbk, subvol, subvol->fops->zerofill,
+ fd, offset, len, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+
+/* handle cases of migration here for 'setattr()' calls */
+int
+dht_file_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+
+ local = frame->local;
+ prev = cookie;
+
+ local->op_errno = op_errno;
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
+ gf_msg_debug (this->name, op_errno,
+ "subvolume %s returned -1",
+ prev->this->name);
+ goto out;
+ }
+
+ if (local->call_cnt != 1)
+ goto out;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ local->rebalance.target_op_fn = dht_setattr2;
+
+
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) {
+
+ dht_set_local_rebalance (this, local, NULL, prebuf,
+ postbuf, xdata);
+
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* At the end of the migration process, whatever 'attr' we
+ have on source file will be migrated to destination file
+ in one shot, hence we don't need to check for in progress
+ state here (ie, PHASE1) */
+out:
+ DHT_STRIP_PHASE1_FLAGS (postbuf);
+ DHT_STRIP_PHASE1_FLAGS (prebuf);
+
+ DHT_STACK_UNWIND (setattr, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+
+ return 0;
+}
+
+int
+dht_setattr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
+{
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+
+ if (!frame || !frame->local)
+ goto out;
+
+ local = frame->local;
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating (ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND (setattr, frame, local->op_ret,
+ local->op_errno,
+ &local->rebalance.prebuf,
+ &local->rebalance.postbuf,
+ local->rebalance.xdata);
+ return 0;
+ }
+
+ if (subvol == NULL)
+ goto out;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ if (local->fop == GF_FOP_SETATTR) {
+ STACK_WIND (frame, dht_file_setattr_cbk, subvol,
+ subvol->fops->setattr, &local->loc,
+ &local->rebalance.stbuf, local->rebalance.flags,
+ NULL);
+ } else {
+ STACK_WIND (frame, dht_file_setattr_cbk, subvol,
+ subvol->fops->fsetattr, local->fd,
+ &local->rebalance.stbuf, local->rebalance.flags,
+ NULL);
+ }
+
+ return 0;
+
+out:
+ DHT_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+
+/* Keep the existing code same for all the cases other than regular file */
+int
+dht_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+
+
+ local = frame->local;
+ prev = cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_msg_debug (this->name, op_errno,
+ "subvolume %s returned -1",
+ prev->this->name);
+ goto unlock;
+ }
+
+ dht_iatt_merge (this, &local->prebuf, statpre, prev->this);
+ dht_iatt_merge (this, &local->stbuf, statpost, prev->this);
+
+ local->op_ret = 0;
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ if (local->op_ret == 0)
+ dht_inode_ctx_time_set (local->loc.inode, this,
+ &local->stbuf);
+ DHT_STACK_UNWIND (setattr, frame, local->op_ret, local->op_errno,
+ &local->prebuf, &local->stbuf, xdata);
+ }
+
+ return 0;
+}
+
+
+int
+dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+ int i = -1;
+ int call_cnt = 0;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_SETATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_msg_debug (this->name, 0,
+ "no layout for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (!layout_is_sane (layout)) {
+ gf_msg_debug (this->name, 0,
+ "layout is not sane for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (IA_ISREG (loc->inode->ia_type)) {
+ /* in the regular file _cbk(), we need to check for
+ migration possibilities */
+ local->rebalance.stbuf = *stbuf;
+ local->rebalance.flags = valid;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+
+ STACK_WIND (frame, dht_file_setattr_cbk, subvol,
+ subvol->fops->setattr,
+ loc, stbuf, valid, xdata);
+
+ return 0;
+ }
+
+ local->call_cnt = call_cnt = layout->cnt;
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_setattr_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->setattr,
+ loc, stbuf, valid, xdata);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+ int i = -1;
+ int call_cnt = 0;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FSETATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_msg_debug (this->name, 0,
+ "no layout for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (!layout_is_sane (layout)) {
+ gf_msg_debug (this->name, 0,
+ "layout is not sane for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (IA_ISREG (fd->inode->ia_type)) {
+ /* in the regular file _cbk(), we need to check for
+ migration possibilities */
+ local->rebalance.stbuf = *stbuf;
+ local->rebalance.flags = valid;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+
+ STACK_WIND (frame, dht_file_setattr_cbk, subvol,
+ subvol->fops->fsetattr,
+ fd, stbuf, valid, xdata);
+
+ return 0;
+ }
+
+ local->call_cnt = call_cnt = layout->cnt;
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_setattr_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->fsetattr,
+ fd, stbuf, valid, xdata);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c
index 41b6896743f..4352ffe5756 100644
--- a/xlators/cluster/dht/src/dht-layout.c
+++ b/xlators/cluster/dht/src/dht-layout.c
@@ -1,32 +1,21 @@
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include "glusterfs.h"
#include "xlator.h"
#include "dht-common.h"
#include "byte-order.h"
+#include "dht-messages.h"
+#include "unittest/unittest.h"
+
#define layout_base_size (sizeof (dht_layout_t))
@@ -34,32 +23,39 @@
#define layout_size(cnt) (layout_base_size + (cnt * layout_entry_size))
-
dht_layout_t *
dht_layout_new (xlator_t *this, int cnt)
{
- dht_layout_t *layout = NULL;
+ dht_layout_t *layout = NULL;
dht_conf_t *conf = NULL;
+ REQUIRE(NULL != this);
+ REQUIRE(cnt >= 0);
conf = this->private;
- layout = GF_CALLOC (1, layout_size (cnt),
+ layout = GF_CALLOC (1, layout_size (cnt),
gf_dht_mt_dht_layout_t);
- if (!layout) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto out;
- }
+ if (!layout) {
+ goto out;
+ }
layout->type = DHT_HASH_TYPE_DM;
- layout->cnt = cnt;
- if (conf)
+ layout->cnt = cnt;
+
+ if (conf) {
+ layout->spread_cnt = conf->dir_spread_cnt;
layout->gen = conf->gen;
+ }
layout->ref = 1;
+
+ ENSURE(NULL != layout);
+ ENSURE(layout->type == DHT_HASH_TYPE_DM);
+ ENSURE(layout->cnt == cnt);
+ ENSURE(layout->ref == 1);
out:
- return layout;
+ return layout;
}
@@ -67,21 +63,23 @@ dht_layout_t *
dht_layout_get (xlator_t *this, inode_t *inode)
{
dht_conf_t *conf = NULL;
- uint64_t layout_int = 0;
dht_layout_t *layout = NULL;
- int ret = -1;
+ int ret = 0;
conf = this->private;
+ if (!conf)
+ goto out;
+
LOCK (&conf->layout_lock);
{
- ret = inode_ctx_get (inode, this, &layout_int);
- if (ret == 0) {
- layout = (dht_layout_t *) (unsigned long) layout_int;
+ ret = dht_inode_ctx_layout_get (inode, this, &layout);
+ if ((!ret) && layout) {
layout->ref++;
}
}
UNLOCK (&conf->layout_lock);
+out:
return layout;
}
@@ -91,26 +89,27 @@ dht_layout_set (xlator_t *this, inode_t *inode, dht_layout_t *layout)
{
dht_conf_t *conf = NULL;
int oldret = -1;
- int ret = 0;
+ int ret = -1;
dht_layout_t *old_layout;
- uint64_t old_layout_int;
conf = this->private;
+ if (!conf || !layout)
+ goto out;
+
LOCK (&conf->layout_lock);
{
- oldret = inode_ctx_get (inode, this, &old_layout_int);
-
- layout->ref++;
- ret = inode_ctx_put (inode, this, (uint64_t) (unsigned long)
- layout);
+ oldret = dht_inode_ctx_layout_get (inode, this, &old_layout);
+ if (layout)
+ layout->ref++;
+ ret = dht_inode_ctx_layout_set (inode, this, layout);
}
UNLOCK (&conf->layout_lock);
- if (oldret == 0) {
- old_layout = (dht_layout_t *) (unsigned long) old_layout_int;
+ if (!oldret) {
dht_layout_unref (this, old_layout);
}
+out:
return ret;
}
@@ -121,10 +120,11 @@ dht_layout_unref (xlator_t *this, dht_layout_t *layout)
dht_conf_t *conf = NULL;
int ref = 0;
- if (layout->preset)
+ if (!layout || layout->preset || !this->private)
return;
conf = this->private;
+
LOCK (&conf->layout_lock);
{
ref = --layout->ref;
@@ -141,7 +141,7 @@ dht_layout_ref (xlator_t *this, dht_layout_t *layout)
{
dht_conf_t *conf = NULL;
- if (layout->preset)
+ if (layout->preset || !this->private)
return layout;
conf = this->private;
@@ -158,535 +158,689 @@ dht_layout_ref (xlator_t *this, dht_layout_t *layout)
xlator_t *
dht_layout_search (xlator_t *this, dht_layout_t *layout, const char *name)
{
- uint32_t hash = 0;
+ uint32_t hash = 0;
xlator_t *subvol = NULL;
- int i = 0;
- int ret = 0;
-
-
- ret = dht_hash_compute (layout->type, name, &hash);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "hash computation failed for type=%d name=%s",
- layout->type, name);
- goto out;
- }
+ int i = 0;
+ int ret = 0;
+
+ ret = dht_hash_compute (this, layout->type, name, &hash);
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_COMPUTE_HASH_FAILED,
+ "hash computation failed for type=%d name=%s",
+ layout->type, name);
+ goto out;
+ }
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].start <= hash
- && layout->list[i].stop >= hash) {
- subvol = layout->list[i].xlator;
- break;
- }
- }
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].start <= hash
+ && layout->list[i].stop >= hash) {
+ subvol = layout->list[i].xlator;
+ break;
+ }
+ }
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume for hash (value) = %u", hash);
- }
+ if (!subvol) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "no subvolume for hash (value) = %u", hash);
+ }
out:
- return subvol;
+ return subvol;
}
dht_layout_t *
dht_layout_for_subvol (xlator_t *this, xlator_t *subvol)
{
- dht_conf_t *conf = NULL;
- dht_layout_t *layout = NULL;
- int i = 0;
-
+ dht_conf_t *conf = NULL;
+ dht_layout_t *layout = NULL;
+ int i = 0;
- conf = this->private;
+ conf = this->private;
+ if (!conf)
+ goto out;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->subvolumes[i] == subvol) {
- layout = conf->file_layouts[i];
- break;
- }
- }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == subvol) {
+ layout = conf->file_layouts[i];
+ break;
+ }
+ }
- return layout;
+out:
+ return layout;
}
int
dht_layouts_init (xlator_t *this, dht_conf_t *conf)
{
- dht_layout_t *layout = NULL;
- int i = 0;
- int ret = -1;
-
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int ret = -1;
+
+ if (!conf)
+ goto out;
- conf->file_layouts = GF_CALLOC (conf->subvolume_cnt,
- sizeof (dht_layout_t *),
+ conf->file_layouts = GF_CALLOC (conf->subvolume_cnt,
+ sizeof (dht_layout_t *),
gf_dht_mt_dht_layout_t);
- if (!conf->file_layouts) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto out;
- }
+ if (!conf->file_layouts) {
+ goto out;
+ }
- for (i = 0; i < conf->subvolume_cnt; i++) {
- layout = dht_layout_new (this, 1);
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ layout = dht_layout_new (this, 1);
- if (!layout) {
- goto out;
- }
+ if (!layout) {
+ goto out;
+ }
- layout->preset = 1;
+ layout->preset = 1;
- layout->list[0].xlator = conf->subvolumes[i];
+ layout->list[0].xlator = conf->subvolumes[i];
- conf->file_layouts[i] = layout;
- }
+ conf->file_layouts[i] = layout;
+ }
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
int
dht_disk_layout_extract (xlator_t *this, dht_layout_t *layout,
- int pos, int32_t **disk_layout_p)
+ int pos, int32_t **disk_layout_p)
{
- int ret = -1;
- int32_t *disk_layout = NULL;
+ int ret = -1;
+ int32_t *disk_layout = NULL;
- disk_layout = GF_CALLOC (5, sizeof (int),
+ disk_layout = GF_CALLOC (5, sizeof (int),
gf_dht_mt_int32_t);
- if (!disk_layout) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto out;
- }
+ if (!disk_layout) {
+ goto out;
+ }
- disk_layout[0] = hton32 (1);
- disk_layout[1] = hton32 (layout->type);
- disk_layout[2] = hton32 (layout->list[pos].start);
- disk_layout[3] = hton32 (layout->list[pos].stop);
+ disk_layout[0] = hton32 (layout->list[pos].commit_hash);
+ disk_layout[1] = hton32 (layout->type);
+ disk_layout[2] = hton32 (layout->list[pos].start);
+ disk_layout[3] = hton32 (layout->list[pos].stop);
- if (disk_layout_p)
- *disk_layout_p = disk_layout;
- ret = 0;
+ if (disk_layout_p)
+ *disk_layout_p = disk_layout;
+ else
+ GF_FREE (disk_layout);
+
+ ret = 0;
out:
- return ret;
+ return ret;
}
+int
+dht_disk_layout_extract_for_subvol (xlator_t *this, dht_layout_t *layout,
+ xlator_t *subvol, int32_t **disk_layout_p)
+{
+ int i = 0;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].xlator == subvol)
+ break;
+ }
+
+ if (i == layout->cnt)
+ return -1;
+
+ return dht_disk_layout_extract (this, layout, i, disk_layout_p);
+}
int
dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout,
- int pos, void *disk_layout_raw)
+ int pos, void *disk_layout_raw, int disk_layout_len)
{
- int cnt = 0;
- int type = 0;
- int start_off = 0;
- int stop_off = 0;
+ int type = 0;
+ int start_off = 0;
+ int stop_off = 0;
+ int commit_hash = 0;
int disk_layout[4];
- /* TODO: assert disk_layout_ptr is of required length */
-
- memcpy (disk_layout, disk_layout_raw, sizeof (disk_layout));
+ if (!disk_layout_raw) {
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ DHT_MSG_LAYOUT_MERGE_FAILED,
+ "error no layout on disk for merge");
+ return -1;
+ }
- cnt = ntoh32 (disk_layout[0]);
- if (cnt != 1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "disk layout has invalid count %d", cnt);
+ GF_ASSERT (disk_layout_len == sizeof (disk_layout));
+
+ memcpy (disk_layout, disk_layout_raw, disk_layout_len);
+
+ type = ntoh32 (disk_layout[1]);
+ switch (type) {
+ case DHT_HASH_TYPE_DM_USER:
+ gf_msg_debug (this->name, 0, "found user-set layout");
+ layout->type = type;
+ /* Fall through. */
+ case DHT_HASH_TYPE_DM:
+ break;
+ default:
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ DHT_MSG_INVALID_DISK_LAYOUT,
+ "Invalid disk layout: "
+ "Catastrophic error layout with unknown type found %d",
+ disk_layout[1]);
return -1;
}
- /* TODO: assert type is compatible */
- type = ntoh32 (disk_layout[1]);
- start_off = ntoh32 (disk_layout[2]);
- stop_off = ntoh32 (disk_layout[3]);
+ commit_hash = ntoh32 (disk_layout[0]);
+ start_off = ntoh32 (disk_layout[2]);
+ stop_off = ntoh32 (disk_layout[3]);
- layout->list[pos].start = start_off;
- layout->list[pos].stop = stop_off;
+ layout->list[pos].commit_hash = commit_hash;
+ layout->list[pos].start = start_off;
+ layout->list[pos].stop = stop_off;
- gf_log (this->name, GF_LOG_TRACE,
- "merged to layout: %u - %u (type %d) from %s",
- start_off, stop_off, type,
- layout->list[pos].xlator->name);
+ gf_msg_trace (this->name, 0,
+ "merged to layout: %u - %u (type %d, hash %d) from %s",
+ start_off, stop_off, commit_hash, type,
+ layout->list[pos].xlator->name);
- return 0;
+ return 0;
}
-
int
dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
- int op_ret, int op_errno, dict_t *xattr)
+ int op_ret, int op_errno, dict_t *xattr)
{
- int i = 0;
- int ret = -1;
- int err = -1;
- void *disk_layout_raw = NULL;
+ int i = 0;
+ int ret = -1;
+ int err = -1;
+ void *disk_layout_raw = NULL;
+ int disk_layout_len = 0;
+ dht_conf_t *conf = this->private;
+
+ if (op_ret != 0) {
+ err = op_errno;
+ }
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].xlator == NULL) {
+ layout->list[i].err = err;
+ layout->list[i].xlator = subvol;
+ break;
+ }
+ }
- if (op_ret != 0) {
- err = op_errno;
- }
+ if (op_ret != 0) {
+ ret = 0;
+ goto out;
+ }
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].xlator == NULL) {
- layout->list[i].err = err;
- layout->list[i].xlator = subvol;
- break;
- }
- }
+ if (xattr) {
+ /* during lookup and not mkdir */
+ ret = dict_get_ptr_and_len (xattr, conf->xattr_name,
+ &disk_layout_raw, &disk_layout_len);
+ }
- if (op_ret != 0) {
- ret = 0;
- goto out;
- }
+ if (ret != 0) {
+ layout->list[i].err = 0;
+ gf_msg_trace (this->name, 0,
+ "Missing disk layout on %s. err = %d",
+ subvol->name, err);
+ ret = 0;
+ goto out;
+ }
- if (xattr) {
- /* during lookup and not mkdir */
- ret = dict_get_ptr (xattr, "trusted.glusterfs.dht",
- &disk_layout_raw);
- }
+ ret = dht_disk_layout_merge (this, layout, i, disk_layout_raw,
+ disk_layout_len);
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LAYOUT_MERGE_FAILED,
+ "layout merge from subvolume %s failed",
+ subvol->name);
+ goto out;
+ }
- if (ret != 0) {
- layout->list[i].err = -1;
- gf_log (this->name, GF_LOG_TRACE,
- "missing disk layout on %s. err = %d",
- subvol->name, err);
- ret = 0;
- goto out;
- }
+ if (layout->commit_hash == 0) {
+ layout->commit_hash = layout->list[i].commit_hash;
+ } else if (layout->commit_hash != layout->list[i].commit_hash) {
+ layout->commit_hash = DHT_LAYOUT_HASH_INVALID;
+ }
- ret = dht_disk_layout_merge (this, layout, i, disk_layout_raw);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "layout merge from subvolume %s failed",
- subvol->name);
- goto out;
- }
- layout->list[i].err = 0;
+ layout->list[i].err = 0;
out:
- return ret;
+ return ret;
}
void
dht_layout_entry_swap (dht_layout_t *layout, int i, int j)
{
- uint32_t start_swap = 0;
- uint32_t stop_swap = 0;
- xlator_t *xlator_swap = 0;
- int err_swap = 0;
-
-
- start_swap = layout->list[i].start;
- stop_swap = layout->list[i].stop;
- xlator_swap = layout->list[i].xlator;
- err_swap = layout->list[i].err;
-
- layout->list[i].start = layout->list[j].start;
- layout->list[i].stop = layout->list[j].stop;
- layout->list[i].xlator = layout->list[j].xlator;
- layout->list[i].err = layout->list[j].err;
-
- layout->list[j].start = start_swap;
- layout->list[j].stop = stop_swap;
- layout->list[j].xlator = xlator_swap;
- layout->list[j].err = err_swap;
+ uint32_t start_swap = 0;
+ uint32_t stop_swap = 0;
+ uint32_t commit_hash_swap = 0;
+ xlator_t *xlator_swap = 0;
+ int err_swap = 0;
+
+ start_swap = layout->list[i].start;
+ stop_swap = layout->list[i].stop;
+ xlator_swap = layout->list[i].xlator;
+ err_swap = layout->list[i].err;
+ commit_hash_swap = layout->list[i].commit_hash;
+
+ layout->list[i].start = layout->list[j].start;
+ layout->list[i].stop = layout->list[j].stop;
+ layout->list[i].xlator = layout->list[j].xlator;
+ layout->list[i].err = layout->list[j].err;
+ layout->list[i].commit_hash = layout->list[j].commit_hash;
+
+ layout->list[j].start = start_swap;
+ layout->list[j].stop = stop_swap;
+ layout->list[j].xlator = xlator_swap;
+ layout->list[j].err = err_swap;
+ layout->list[j].commit_hash = commit_hash_swap;
+}
+
+void
+dht_layout_range_swap (dht_layout_t *layout, int i, int j)
+{
+ uint32_t start_swap = 0;
+ uint32_t stop_swap = 0;
+
+ start_swap = layout->list[i].start;
+ stop_swap = layout->list[i].stop;
+
+ layout->list[i].start = layout->list[j].start;
+ layout->list[i].stop = layout->list[j].stop;
+
+ layout->list[j].start = start_swap;
+ layout->list[j].stop = stop_swap;
}
int64_t
dht_layout_entry_cmp_volname (dht_layout_t *layout, int i, int j)
{
- return (strcmp (layout->list[i].xlator->name,
- layout->list[j].xlator->name));
+ return (strcmp (layout->list[i].xlator->name,
+ layout->list[j].xlator->name));
+}
+
+gf_boolean_t
+dht_is_subvol_in_layout (dht_layout_t *layout, xlator_t *xlator)
+{
+ int i = 0;
+
+ for (i = 0; i < layout->cnt; i++) {
+ /* Check if xlator is already part of layout, and layout is
+ * non-zero. */
+ if (!strcmp (layout->list[i].xlator->name, xlator->name)) {
+ if (layout->list[i].start != layout->list[i].stop)
+ return _gf_true;
+ break;
+ }
+ }
+ return _gf_false;
}
int64_t
dht_layout_entry_cmp (dht_layout_t *layout, int i, int j)
{
- int64_t diff = 0;
+ int64_t diff = 0;
- if (layout->list[i].err || layout->list[j].err)
- diff = layout->list[i].err - layout->list[j].err;
- else
- diff = (int64_t) layout->list[i].start
- - (int64_t) layout->list[j].start;
+ /* swap zero'ed out layouts to front, if needed */
+ if (!layout->list[j].start && !layout->list[j].stop) {
+ diff = (int64_t) layout->list[i].stop
+ - (int64_t) layout->list[j].stop;
+ goto out;
+ }
+ diff = (int64_t) layout->list[i].start
+ - (int64_t) layout->list[j].start;
- return diff;
+out:
+ return diff;
}
int
dht_layout_sort (dht_layout_t *layout)
{
- int i = 0;
- int j = 0;
- int64_t ret = 0;
-
- /* TODO: O(n^2) -- bad bad */
-
- for (i = 0; i < layout->cnt - 1; i++) {
- for (j = i + 1; j < layout->cnt; j++) {
- ret = dht_layout_entry_cmp (layout, i, j);
- if (ret > 0)
- dht_layout_entry_swap (layout, i, j);
- }
- }
+ int i = 0;
+ int j = 0;
+ int64_t ret = 0;
+
+ /* TODO: O(n^2) -- bad bad */
+
+ for (i = 0; i < layout->cnt - 1; i++) {
+ for (j = i + 1; j < layout->cnt; j++) {
+ ret = dht_layout_entry_cmp (layout, i, j);
+ if (ret > 0)
+ dht_layout_entry_swap (layout, i, j);
+ }
+ }
- return 0;
+ return 0;
}
int
dht_layout_sort_volname (dht_layout_t *layout)
{
- int i = 0;
- int j = 0;
- int64_t ret = 0;
-
- /* TODO: O(n^2) -- bad bad */
-
- for (i = 0; i < layout->cnt - 1; i++) {
- for (j = i + 1; j < layout->cnt; j++) {
- ret = dht_layout_entry_cmp_volname (layout, i, j);
- if (ret > 0)
- dht_layout_entry_swap (layout, i, j);
- }
- }
+ int i = 0;
+ int j = 0;
+ int64_t ret = 0;
- return 0;
+ /* TODO: O(n^2) -- bad bad */
+
+ for (i = 0; i < layout->cnt - 1; i++) {
+ for (j = i + 1; j < layout->cnt; j++) {
+ ret = dht_layout_entry_cmp_volname (layout, i, j);
+ if (ret > 0)
+ dht_layout_entry_swap (layout, i, j);
+ }
+ }
+
+ return 0;
}
int
dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout,
- uint32_t *holes_p, uint32_t *overlaps_p,
- uint32_t *missing_p, uint32_t *down_p, uint32_t *misc_p)
+ uint32_t *holes_p, uint32_t *overlaps_p,
+ uint32_t *missing_p, uint32_t *down_p, uint32_t *misc_p,
+ uint32_t *no_space_p)
{
- dht_conf_t *conf = NULL;
- uint32_t holes = 0;
- uint32_t overlaps = 0;
- uint32_t missing = 0;
- uint32_t down = 0;
- uint32_t misc = 0;
- uint32_t hole_cnt = 0;
- uint32_t overlap_cnt = 0;
- int i = 0;
- int ret = 0;
- uint32_t prev_stop = 0;
- uint32_t last_stop = 0;
- char is_virgin = 1;
-
-
- conf = this->private;
-
- /* TODO: explain WTF is happening */
-
- last_stop = layout->list[0].start - 1;
- prev_stop = last_stop;
-
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err) {
- switch (layout->list[i].err) {
- case -1:
- case ENOENT:
- missing++;
- break;
- case ENOTCONN:
- down++;
- break;
- case ENOSPC:
- down++;
- break;
- default:
- misc++;
- }
- continue;
- }
-
- is_virgin = 0;
-
- if ((prev_stop + 1) < layout->list[i].start) {
- hole_cnt++;
- holes += (layout->list[i].start - (prev_stop + 1));
- }
-
- if ((prev_stop + 1) > layout->list[i].start) {
- overlap_cnt++;
- overlaps += ((prev_stop + 1) - layout->list[i].start);
- }
- prev_stop = layout->list[i].stop;
- }
+ uint32_t overlaps = 0;
+ uint32_t missing = 0;
+ uint32_t down = 0;
+ uint32_t misc = 0;
+ uint32_t hole_cnt = 0;
+ uint32_t overlap_cnt = 0;
+ int i = 0;
+ int ret = 0;
+ uint32_t prev_stop = 0;
+ uint32_t last_stop = 0;
+ char is_virgin = 1;
+ uint32_t no_space = 0;
+
+ /* This function scans through the layout spread of a directory to
+ check if there are any anomalies. Prior to calling this function
+ the layout entries should be sorted in the ascending order.
+
+ If the layout entry has err != 0
+ then increment the corresponding anomaly.
+ else
+ if (start of the current layout entry > stop + 1 of previous
+ non erroneous layout entry)
+ then it indicates a hole in the layout
+ if (start of the current layout entry < stop + 1 of previous
+ non erroneous layout entry)
+ then it indicates an overlap in the layout
+ */
+ last_stop = layout->list[0].start - 1;
+ prev_stop = last_stop;
+
+ for (i = 0; i < layout->cnt; i++) {
+ switch (layout->list[i].err) {
+ case -1:
+ case ENOENT:
+ case ESTALE:
+ missing++;
+ continue;
+ case ENOTCONN:
+ down++;
+ continue;
+ case ENOSPC:
+ no_space++;
+ continue;
+ case 0:
+ /* if err == 0 and start == stop, then it is a non misc++;
+ * participating subvolume(spread-cnt). Then, do not
+ * check for anomalies. If start != stop, then treat it
+ * as misc err */
+ if (layout->list[i].start == layout->list[i].stop) {
+ continue;
+ }
+ break;
+ default:
+ misc++;
+ continue;
+ }
+
+ is_virgin = 0;
+
+ if ((prev_stop + 1) < layout->list[i].start) {
+ hole_cnt++;
+ }
+
+ if ((prev_stop + 1) > layout->list[i].start) {
+ overlap_cnt++;
+ overlaps += ((prev_stop + 1) - layout->list[i].start);
+ }
+ prev_stop = layout->list[i].stop;
+ }
+
+ if ((last_stop - prev_stop) || is_virgin)
+ hole_cnt++;
- if ((last_stop - prev_stop) || is_virgin)
- hole_cnt++;
- holes += (last_stop - prev_stop);
+ if (holes_p)
+ *holes_p = hole_cnt;
- if (holes_p)
- *holes_p = hole_cnt;
+ if (overlaps_p)
+ *overlaps_p = overlap_cnt;
- if (overlaps_p)
- *overlaps_p = overlap_cnt;
+ if (missing_p)
+ *missing_p = missing;
- if (missing_p)
- *missing_p = missing;
+ if (down_p)
+ *down_p = down;
- if (down_p)
- *down_p = down;
+ if (misc_p)
+ *misc_p = misc;
- if (misc_p)
- *misc_p = misc;
+ if (no_space_p)
+ *no_space_p = no_space;
- return ret;
+ return ret;
+}
+
+
+int
+dht_layout_missing_dirs (dht_layout_t *layout)
+{
+ int i = 0, missing = 0;
+
+ if (layout == NULL)
+ goto out;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if ((layout->list[i].err == ENOENT)
+ || ((layout->list[i].err == -1)
+ && (layout->list[i].start == 0)
+ && (layout->list[i].stop == 0))) {
+ missing++;
+ }
+ }
+
+out:
+ return missing;
}
int
dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout)
{
- int ret = 0;
- int i = 0;
- uint32_t holes = 0;
- uint32_t overlaps = 0;
- uint32_t missing = 0;
- uint32_t down = 0;
- uint32_t misc = 0;
-
-
- ret = dht_layout_sort (layout);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "sort failed?! how the ....");
- goto out;
- }
+ int ret = 0;
+ uint32_t holes = 0;
+ uint32_t overlaps = 0;
+ uint32_t missing = 0;
+ uint32_t down = 0;
+ uint32_t misc = 0, missing_dirs = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ ret = dht_layout_sort (layout);
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LAYOUT_SORT_FAILED,
+ "sort failed?! how the ....");
+ goto out;
+ }
- ret = dht_layout_anomalies (this, loc, layout,
- &holes, &overlaps,
- &missing, &down, &misc);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "error while finding anomalies in %s -- not good news",
- loc->path);
- goto out;
- }
+ gf_uuid_unparse(loc->gfid, gfid);
+
+ ret = dht_layout_anomalies (this, loc, layout,
+ &holes, &overlaps,
+ &missing, &down, &misc, NULL);
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_FIND_LAYOUT_ANOMALIES_ERROR,
+ "Error finding anomalies in %s, gfid = %s",
+ loc->path, gfid);
+ goto out;
+ }
- if (holes || overlaps) {
- if (missing == layout->cnt) {
- gf_log (this->name, GF_LOG_DEBUG,
- "directory %s looked up first time",
- loc->path);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "found anomalies in %s. holes=%d overlaps=%d",
- loc->path, holes, overlaps);
- }
- ret = 1;
- }
+ if (holes || overlaps) {
+ if (missing == layout->cnt) {
+ gf_msg_debug (this->name, 0,
+ "Directory %s looked up first time"
+ " gfid = %s", loc->path, gfid);
+ } else {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_ANOMALIES_INFO,
+ "Found anomalies in %s (gfid = %s). "
+ "Holes=%d overlaps=%d",
+ loc->path, gfid, holes, overlaps );
+ }
+ ret = -1;
+ }
- for (i = 0; i < layout->cnt; i++) {
- /* TODO During DHT selfheal rewrite (almost) find a better place to
- * detect this - probably in dht_layout_anomalies()
- */
- if (layout->list[i].err > 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "path=%s err=%s on subvol=%s",
- loc->path, strerror (layout->list[i].err),
- (layout->list[i].xlator ?
- layout->list[i].xlator->name : "<>"));
- if (layout->list[i].err == ENOENT)
- ret = 1;
- }
- }
+ if (ret >= 0) {
+ missing_dirs = dht_layout_missing_dirs (layout);
+ /* TODO During DHT selfheal rewrite (almost) find a better place
+ * to detect this - probably in dht_layout_anomalies()
+ */
+ if (missing_dirs > 0)
+ ret += missing_dirs;
+ }
out:
- return ret;
+ return ret;
}
-
int
-dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
- loc_t *loc, dict_t *xattr)
+dht_dir_has_layout (dict_t *xattr, char *name)
{
- int idx = 0;
- int pos = -1;
- int ret = 0;
- int err = 0;
- int dict_ret = 0;
- int32_t disk_layout[4];
+
void *disk_layout_raw = NULL;
- int32_t count = -1;
- uint32_t start_off = -1;
- uint32_t stop_off = -1;
+ return dict_get_ptr (xattr, name, &disk_layout_raw);
+}
- for (idx = 0; idx < layout->cnt; idx++) {
- if (layout->list[idx].xlator == subvol) {
- pos = idx;
- break;
- }
- }
-
- if (pos == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s - no layout info for subvolume %s",
- loc->path, subvol->name);
- ret = 1;
- goto out;
- }
+int
+dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
+ loc_t *loc, dict_t *xattr)
+{
+ int idx = 0;
+ int pos = -1;
+ int ret = 0;
+ int err = 0;
+ int dict_ret = 0;
+ int32_t disk_layout[4];
+ void *disk_layout_raw = NULL;
+ uint32_t start_off = -1;
+ uint32_t stop_off = -1;
+ uint32_t commit_hash = -1;
+ dht_conf_t *conf = this->private;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ if(loc && loc->inode)
+ gf_uuid_unparse(loc->inode->gfid, gfid);
+
+ for (idx = 0; idx < layout->cnt; idx++) {
+ if (layout->list[idx].xlator == subvol) {
+ pos = idx;
+ break;
+ }
+ }
+
+ if (pos == -1) {
+ if (loc) {
+ gf_msg_debug (this->name, 0,
+ "%s - no layout info for subvolume %s",
+ loc ? loc->path : "path not found",
+ subvol->name);
+ }
+ ret = 1;
+ goto out;
+ }
err = layout->list[pos].err;
- if (!xattr) {
+ if (!xattr) {
if (err == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s - xattr dictionary is NULL",
- loc->path);
+ if (loc) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_DICT_GET_FAILED,
+ "%s: xattr dictionary is NULL",
+ loc->path);
+ } else {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_DICT_GET_FAILED,
+ "path not found: "
+ "xattr dictionary is NULL");
+ }
ret = -1;
}
- goto out;
- }
+ goto out;
+ }
- dict_ret = dict_get_ptr (xattr, "trusted.glusterfs.dht",
+ dict_ret = dict_get_ptr (xattr, conf->xattr_name,
&disk_layout_raw);
- if (dict_ret < 0) {
- if (err == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s - disk layout missing", loc->path);
+ if (dict_ret < 0) {
+ if (err == 0 && layout->list[pos].stop) {
+ if (loc) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_DISK_LAYOUT_MISSING,
+ "%s: Disk layout missing, gfid = %s",
+ loc->path, gfid);
+ } else {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_DISK_LAYOUT_MISSING,
+ "path not found: "
+ "Disk layout missing, gfid = %s",
+ gfid);
+ }
ret = -1;
}
- goto out;
- }
+ goto out;
+ }
memcpy (disk_layout, disk_layout_raw, sizeof (disk_layout));
- count = ntoh32 (disk_layout[0]);
- if (count != 1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s - disk layout has invalid count %d",
- loc->path, count);
- ret = -1;
- goto out;
- }
-
- start_off = ntoh32 (disk_layout[2]);
- stop_off = ntoh32 (disk_layout[3]);
-
- if ((layout->list[pos].start != start_off)
- || (layout->list[pos].stop != stop_off)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "subvol: %s; inode layout - %"PRId32" - %"PRId32"; "
- "disk layout - %"PRId32" - %"PRId32,
- layout->list[pos].xlator->name,
- layout->list[pos].start, layout->list[pos].stop,
- start_off, stop_off);
- ret = 1;
- } else {
- ret = 0;
- }
+ start_off = ntoh32 (disk_layout[2]);
+ stop_off = ntoh32 (disk_layout[3]);
+ commit_hash = ntoh32 (disk_layout[0]);
+
+ if ((layout->list[pos].start != start_off)
+ || (layout->list[pos].stop != stop_off)
+ || (layout->list[pos].commit_hash != commit_hash)) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_LAYOUT_INFO,
+ "subvol: %s; inode layout - %"PRIu32" - %"PRIu32
+ " - %"PRIu32"; "
+ "disk layout - %"PRIu32" - %"PRIu32" - %"PRIu32,
+ layout->list[pos].xlator->name,
+ layout->list[pos].start, layout->list[pos].stop,
+ layout->list[pos].commit_hash,
+ start_off, stop_off, commit_hash);
+ ret = 1;
+ } else {
+ ret = 0;
+ }
out:
- return ret;
+ return ret;
}
@@ -698,19 +852,22 @@ dht_layout_preset (xlator_t *this, xlator_t *subvol, inode_t *inode)
dht_conf_t *conf = NULL;
conf = this->private;
-
- layout = dht_layout_for_subvol (this, subvol);
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no pre-set layout for subvolume %s",
- subvol ? subvol->name : "<nil>");
- ret = -1;
- goto out;
- }
+ if (!conf)
+ goto out;
+
+ layout = dht_layout_for_subvol (this, subvol);
+ if (!layout) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_SUBVOL_NO_LAYOUT_INFO,
+ "no pre-set layout for subvolume %s",
+ subvol ? subvol->name : "<nil>");
+ ret = -1;
+ goto out;
+ }
LOCK (&conf->layout_lock);
{
- inode_ctx_put (inode, this, (uint64_t)(long)layout);
+ dht_inode_ctx_layout_set (inode, this, layout);
}
UNLOCK (&conf->layout_lock);
@@ -718,3 +875,18 @@ dht_layout_preset (xlator_t *this, xlator_t *subvol, inode_t *inode)
out:
return ret;
}
+
+int
+dht_layout_index_for_subvol (dht_layout_t *layout, xlator_t *subvol)
+{
+ int i = 0, ret = -1;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].xlator == subvol) {
+ ret = i;
+ break;
+ }
+ }
+
+ return ret;
+}
diff --git a/xlators/cluster/dht/src/dht-linkfile.c b/xlators/cluster/dht/src/dht-linkfile.c
index 3f4630cf4f7..deba2138672 100644
--- a/xlators/cluster/dht/src/dht-linkfile.c
+++ b/xlators/cluster/dht/src/dht-linkfile.c
@@ -1,226 +1,360 @@
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include "glusterfs.h"
#include "xlator.h"
#include "compat.h"
#include "dht-common.h"
-
-
+#include "dht-messages.h"
int
-dht_linkfile_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno)
+dht_linkfile_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
{
- dht_local_t *local = NULL;
-
-
- local = frame->local;
- local->linkfile.linkfile_cbk (frame, cookie, this, op_ret, op_errno,
- local->linkfile.inode,
- &local->linkfile.stbuf, NULL, NULL);
-
- return 0;
+ char is_linkfile = 0;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ if (op_ret)
+ goto out;
+
+ gf_uuid_unparse(local->loc.gfid, gfid);
+
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr,
+ conf->link_xattr_name);
+ if (!is_linkfile)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_NOT_LINK_FILE_ERROR,
+ "got non-linkfile %s:%s, gfid = %s",
+ prev->this->name, local->loc.path, gfid);
+out:
+ local->linkfile.linkfile_cbk (frame, cookie, this, op_ret, op_errno,
+ inode, stbuf, postparent, postparent,
+ xattr);
+ return 0;
}
-
+#define is_equal(a, b) ((a) == (b))
int
dht_linkfile_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- dict_t *xattr = NULL;
- data_t *str_data = NULL;
- int ret = -1;
-
- local = frame->local;
- prev = cookie;
-
- if (op_ret == -1)
- goto err;
-
- xattr = get_new_dict ();
- if (!xattr) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- local->linkfile.xattr = dict_ref (xattr);
- local->linkfile.inode = inode_ref (inode);
-
- str_data = str_to_data (local->linkfile.srcvol->name);
- if (!str_data) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- ret = dict_set (xattr, "trusted.glusterfs.dht.linkto", str_data);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to initialize linkfile data");
- op_errno = EINVAL;
- }
- str_data = NULL;
-
- local->linkfile.stbuf = *stbuf;
-
- STACK_WIND (frame, dht_linkfile_xattr_cbk,
- prev->this, prev->this->fops->setxattr,
- &local->linkfile.loc, local->linkfile.xattr, 0);
-
- return 0;
-
-err:
- if (str_data) {
- data_destroy (str_data);
- str_data = NULL;
- }
-
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ call_frame_t *prev = NULL;
+ dict_t *xattrs = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+
+ local = frame->local;
+
+ if (!op_ret)
+ local->linked = _gf_true;
+
+ FRAME_SU_UNDO (frame, dht_local_t);
+
+ if (op_ret && (op_errno == EEXIST)) {
+ conf = this->private;
+ prev = cookie;
+ subvol = prev->this;
+ if (!subvol)
+ goto out;
+ xattrs = dict_new ();
+ if (!xattrs)
+ goto out;
+ ret = dict_set_uint32 (xattrs, conf->link_xattr_name, 256);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value. key : %s",
+ conf->link_xattr_name);
+ goto out;
+ }
+
+ STACK_WIND (frame, dht_linkfile_lookup_cbk, subvol,
+ subvol->fops->lookup, &local->loc, xattrs);
+ if (xattrs)
+ dict_unref (xattrs);
+ return 0;
+ }
+out:
local->linkfile.linkfile_cbk (frame, cookie, this, op_ret, op_errno,
- inode, stbuf, preparent, postparent);
- return 0;
+ inode, stbuf, preparent, postparent,
+ xdata);
+ if (xattrs)
+ dict_unref (xattrs);
+ return 0;
}
int
dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
- xlator_t *tovol, xlator_t *fromvol, loc_t *loc)
+ xlator_t *this,
+ xlator_t *tovol, xlator_t *fromvol, loc_t *loc)
{
- dht_local_t *local = NULL;
-
-
- local = frame->local;
- local->linkfile.linkfile_cbk = linkfile_cbk;
- local->linkfile.srcvol = tovol;
- loc_copy (&local->linkfile.loc, loc);
+ dht_local_t *local = NULL;
+ dict_t *dict = NULL;
+ int need_unref = 0;
+ int ret = 0;
+ dht_conf_t *conf = this->private;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ local->linkfile.linkfile_cbk = linkfile_cbk;
+ local->linkfile.srcvol = tovol;
+
+ local->linked = _gf_false;
+
+ dict = local->params;
+ if (!dict) {
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+ need_unref = 1;
+ }
+
+
+ if (!gf_uuid_is_null (local->gfid)) {
+ gf_uuid_unparse(local->gfid, gfid);
+
+ ret = dict_set_static_bin (dict, "gfid-req", local->gfid, 16);
+ if (ret)
+ gf_msg ("dht-linkfile", GF_LOG_INFO, 0,
+ DHT_MSG_DICT_SET_FAILED,
+ "%s: Failed to set dictionary value: "
+ "key = gfid-req, gfid = %s ", loc->path, gfid);
+ } else {
+ gf_uuid_unparse(loc->gfid, gfid);
+ }
+
+ ret = dict_set_str (dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes");
+ if (ret)
+ gf_msg ("dht-linkfile", GF_LOG_INFO, 0,
+ DHT_MSG_DICT_SET_FAILED,
+ "%s: Failed to set dictionary value: key = %s,"
+ " gfid = %s", loc->path,
+ GLUSTERFS_INTERNAL_FOP_KEY, gfid);
+
+ ret = dict_set_str (dict, conf->link_xattr_name, tovol->name);
+
+ if (ret < 0) {
+ gf_msg (frame->this->name, GF_LOG_INFO, 0,
+ DHT_MSG_CREATE_LINK_FAILED,
+ "%s: failed to initialize linkfile data, gfid = %s",
+ loc->path, gfid);
+ goto out;
+ }
+
+ local->link_subvol = fromvol;
+ /* Always create as root:root. dht_linkfile_attr_heal fixes the
+ * ownsership */
+ FRAME_SU_DO (frame, dht_local_t);
+ STACK_WIND (frame, dht_linkfile_create_cbk,
+ fromvol, fromvol->fops->mknod, loc,
+ S_IFREG | DHT_LINKFILE_MODE, 0, 0, dict);
+
+ if (need_unref && dict)
+ dict_unref (dict);
+
+ return 0;
+out:
+ local->linkfile.linkfile_cbk (frame, NULL, frame->this, -1, ENOMEM,
+ loc->inode, NULL, NULL, NULL, NULL);
- STACK_WIND (frame, dht_linkfile_create_cbk,
- fromvol, fromvol->fops->mknod, loc,
- S_IFREG | DHT_LINKFILE_MODE, 0);
+ if (need_unref && dict)
+ dict_unref (dict);
- return 0;
+ return 0;
}
int
dht_linkfile_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent)
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *subvol = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ prev = cookie;
+ subvol = prev->this;
+
- local = frame->local;
- prev = cookie;
- subvol = prev->this;
+ if (op_ret == -1) {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "unlinking linkfile %s on %s failed (%s)",
- local->loc.path, subvol->name, strerror (op_errno));
- }
+ gf_uuid_unparse(local->loc.gfid, gfid);
+ gf_msg (this->name, GF_LOG_INFO, op_errno,
+ DHT_MSG_UNLINK_FAILED,
+ "Unlinking linkfile %s (gfid = %s)on "
+ "subvolume %s failed ",
+ local->loc.path, gfid, subvol->name);
+ }
- DHT_STACK_DESTROY (frame);
+ DHT_STACK_DESTROY (frame);
- return 0;
+ return 0;
}
int
dht_linkfile_unlink (call_frame_t *frame, xlator_t *this,
- xlator_t *subvol, loc_t *loc)
+ xlator_t *subvol, loc_t *loc)
{
- call_frame_t *unlink_frame = NULL;
- dht_local_t *unlink_local = NULL;
-
- unlink_frame = copy_frame (frame);
- if (!unlink_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- unlink_local = dht_local_init (unlink_frame);
- if (!unlink_local) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- loc_copy (&unlink_local->loc, loc);
-
- STACK_WIND (unlink_frame, dht_linkfile_unlink_cbk,
- subvol, subvol->fops->unlink,
- &unlink_local->loc);
-
- return 0;
+ call_frame_t *unlink_frame = NULL;
+ dht_local_t *unlink_local = NULL;
+
+ unlink_frame = copy_frame (frame);
+ if (!unlink_frame) {
+ goto err;
+ }
+
+ /* Using non-fop value here, as anyways, 'local->fop' is not used in
+ this particular case */
+ unlink_local = dht_local_init (unlink_frame, loc, NULL,
+ GF_FOP_MAXVALUE);
+ if (!unlink_local) {
+ goto err;
+ }
+
+ STACK_WIND (unlink_frame, dht_linkfile_unlink_cbk,
+ subvol, subvol->fops->unlink,
+ &unlink_local->loc, 0, NULL);
+
+ return 0;
err:
- if (unlink_frame)
- DHT_STACK_DESTROY (unlink_frame);
+ if (unlink_frame)
+ DHT_STACK_DESTROY (unlink_frame);
- return -1;
+ return -1;
}
xlator_t *
dht_linkfile_subvol (xlator_t *this, inode_t *inode, struct iatt *stbuf,
- dict_t *xattr)
+ dict_t *xattr)
{
- dht_conf_t *conf = NULL;
- xlator_t *subvol = NULL;
- void *volname = NULL;
- int i = 0, ret = 0;
+ dht_conf_t *conf = NULL;
+ xlator_t *subvol = NULL;
+ void *volname = NULL;
+ int i = 0, ret = 0;
+ conf = this->private;
- conf = this->private;
+ if (!xattr)
+ goto out;
- if (!xattr)
- goto out;
+ ret = dict_get_ptr (xattr, conf->link_xattr_name, &volname);
- ret = dict_get_ptr (xattr, "trusted.glusterfs.dht.linkto", &volname);
+ if ((-1 == ret) || !volname)
+ goto out;
- if ((-1 == ret) || !volname)
- goto out;
-
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (strcmp (conf->subvolumes[i]->name, (char *)volname) == 0) {
- subvol = conf->subvolumes[i];
- break;
- }
- }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (strcmp (conf->subvolumes[i]->name, (char *)volname) == 0) {
+ subvol = conf->subvolumes[i];
+ break;
+ }
+ }
out:
- return subvol;
+ return subvol;
+}
+
+int
+dht_linkfile_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ loc_t *loc = NULL;
+
+ local = frame->local;
+ loc = &local->loc;
+
+ if (op_ret)
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_SETATTR_FAILED,
+ "Failed to set attr uid/gid on %s"
+ " :<gfid:%s> ",
+ (loc->path? loc->path: "NULL"),
+ uuid_utoa(local->gfid));
+
+ DHT_STACK_DESTROY (frame);
+
+ return 0;
}
+int
+dht_linkfile_attr_heal (call_frame_t *frame, xlator_t *this)
+{
+ int ret = -1;
+ call_frame_t *copy = NULL;
+ dht_local_t *local = NULL;
+ dht_local_t *copy_local = NULL;
+ xlator_t *subvol = NULL;
+ struct iatt stbuf = {0,};
+ dict_t *xattr = NULL;
+
+ local = frame->local;
+
+ GF_VALIDATE_OR_GOTO ("dht", local, out);
+ GF_VALIDATE_OR_GOTO ("dht", local->link_subvol, out);
+
+ if (local->stbuf.ia_type == IA_INVAL)
+ return 0;
+
+ DHT_MARK_FOP_INTERNAL (xattr);
+ gf_uuid_copy (local->loc.gfid, local->stbuf.ia_gfid);
+
+ copy = copy_frame (frame);
+
+ if (!copy)
+ goto out;
+
+ copy_local = dht_local_init (copy, &local->loc, NULL, 0);
+
+ if (!copy_local)
+ goto out;
+
+ stbuf = local->stbuf;
+ subvol = local->link_subvol;
+
+ copy->local = copy_local;
+
+ FRAME_SU_DO (copy, dht_local_t);
+
+ STACK_WIND (copy, dht_linkfile_setattr_cbk, subvol,
+ subvol->fops->setattr, &copy_local->loc,
+ &stbuf, (GF_SET_ATTR_UID | GF_SET_ATTR_GID), xattr);
+ ret = 0;
+out:
+ if ((ret < 0) && (copy))
+ DHT_STACK_DESTROY (copy);
+
+ if (xattr)
+ dict_unref (xattr);
+
+ return ret;
+}
diff --git a/xlators/cluster/dht/src/dht-mem-types.h b/xlators/cluster/dht/src/dht-mem-types.h
index 4a7a8bd811a..5de5d1838ad 100644
--- a/xlators/cluster/dht/src/dht-mem-types.h
+++ b/xlators/cluster/dht/src/dht-mem-types.h
@@ -1,21 +1,11 @@
-
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -29,14 +19,25 @@ enum gf_dht_mem_types_ {
gf_dht_mt_dht_conf_t,
gf_dht_mt_char,
gf_dht_mt_int32_t,
- gf_dht_mt_dht_local_t,
gf_dht_mt_xlator_t,
gf_dht_mt_dht_layout_t,
gf_switch_mt_dht_conf_t,
gf_switch_mt_dht_du_t,
gf_switch_mt_switch_sched_array,
gf_switch_mt_switch_struct,
+ gf_dht_mt_subvol_time,
+ gf_dht_mt_loc_t,
+ gf_defrag_info_mt,
+ gf_dht_mt_inode_ctx_t,
+ gf_dht_mt_ctx_stat_time_t,
+ gf_dht_mt_dirent_t,
+ gf_dht_mt_container_t,
+ gf_dht_mt_octx_t,
+ gf_dht_mt_miginfo_t,
+ gf_tier_mt_bricklist_t,
+ gf_tier_mt_ipc_ctr_params_t,
+ gf_dht_mt_fd_ctx_t,
+ gf_tier_mt_qfile_array_t,
gf_dht_mt_end
};
#endif
-
diff --git a/xlators/cluster/dht/src/dht-messages.h b/xlators/cluster/dht/src/dht-messages.h
new file mode 100644
index 00000000000..8c0b9103df1
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-messages.h
@@ -0,0 +1,1075 @@
+/*Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _DHT_MESSAGES_H_
+#define _DHT_MESSAGES_H_
+
+#include "glfs-message-id.h"
+
+/*! \file dht-messages.h
+ * \brief DHT log-message IDs and their descriptions
+ *
+ */
+
+/* NOTE: Rules for message additions
+ * 1) Each instance of a message is _better_ left with a unique message ID, even
+ * if the message format is the same. Reasoning is that, if the message
+ * format needs to change in one instance, the other instances are not
+ * impacted or the new change does not change the ID of the instance being
+ * modified.
+ * 2) Addition of a message,
+ * - Should increment the GLFS_NUM_MESSAGES
+ * - Append to the list of messages defined, towards the end
+ * - Retain macro naming as glfs_msg_X (for redability across developers)
+ * NOTE: Rules for message format modifications
+ * 3) Check acorss the code if the message ID macro in question is reused
+ * anywhere. If reused then then the modifications should ensure correctness
+ * everywhere, or needs a new message ID as (1) above was not adhered to. If
+ * not used anywhere, proceed with the required modification.
+ * NOTE: Rules for message deletion
+ * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used
+ * anywhere, then can be deleted, but will leave a hole by design, as
+ * addition rules specify modification to the end of the list and not filling
+ * holes.
+ */
+
+#define GLFS_DHT_BASE GLFS_MSGID_COMP_DHT
+#define GLFS_DHT_NUM_MESSAGES 116
+#define GLFS_MSGID_END (GLFS_DHT_BASE + GLFS_DHT_NUM_MESSAGES + 1)
+
+/* Messages with message IDs */
+#define glfs_msg_start_x GLFS_DHT_BASE, "Invalid: Start of messages"
+
+
+
+
+/*!
+ * @messageid 109001
+ * @diagnosis Cached subvolume could not be found for the specified
+ * path
+ * @recommendedaction None
+ *
+ */
+
+#define DHT_MSG_CACHED_SUBVOL_GET_FAILED (GLFS_DHT_BASE + 1)
+
+/*!
+ * @messageid 109002
+ * @diagnosis Linkfile creation failed
+ * @recommendedaction None
+ *
+ */
+
+#define DHT_MSG_CREATE_LINK_FAILED (GLFS_DHT_BASE + 2)
+
+/*!
+ * @messageid 109003
+ * @diagnosis The value could not be set for the specified key in
+ * the dictionary
+ *
+ * @recommendedaction None
+ *
+ */
+
+#define DHT_MSG_DICT_SET_FAILED (GLFS_DHT_BASE + 3)
+
+/*!
+ * @messageid 109004
+ * @diagnosis Directory attributes could not be healed
+ * @recommendedaction None
+ *
+ */
+
+#define DHT_MSG_DIR_ATTR_HEAL_FAILED (GLFS_DHT_BASE + 4)
+
+/*!
+ * @messageid 109005
+ * @diagnosis Self-heal failed for the specified directory
+ * @recommendedaction Ensure that all subvolumes are online
+ * and reachable and perform a lookup operation
+ * on the directory again.
+ *
+ */
+
+#define DHT_MSG_DIR_SELFHEAL_FAILED (GLFS_DHT_BASE + 5)
+
+/*!
+ * @messageid 109006
+ * @diagnosis The extended attributes could not be healed for
+ * the specified directory on the specified subvolume
+ *
+ * @recommendedaction None
+ *
+ */
+
+#define DHT_MSG_DIR_SELFHEAL_XATTR_FAILED (GLFS_DHT_BASE + 6)
+
+/*!
+ * @messageid 109007
+ * @diagnosis A lookup operation found the file with the same path
+ * on multiple subvolumes.
+ * @recommendedaction
+ * 1. Create backups of the file on other subvolumes.
+ * 2. Inspect the content of the files to identify
+ * and retain the most appropriate file.
+ *
+ */
+
+#define DHT_MSG_FILE_ON_MULT_SUBVOL (GLFS_DHT_BASE + 7)
+
+/*!
+ * @messageid 109008
+ * @diagnosis A path resolves to a file on one subvolume and a directory
+ * on another
+ * @recommendedaction
+ * 1. Create a backup of the file with a different name
+ * and delete the original file.
+ * 2. In the newly created back up file, remove the "trusted.gfid"
+ * extended attribute.
+ * - Command: setfattr -x "trusted.gfid" \<path to the newly created backup file\>
+ * 3. Perform a new lookup operation on both the new and old paths.
+ * 4. From the mount point, inspect both the paths and retain the
+ * relevant file or directory.
+ *
+ */
+
+#define DHT_MSG_FILE_TYPE_MISMATCH (GLFS_DHT_BASE + 8)
+
+/*!
+ * @messageid 109009
+ * @diagnosis The GFID of the file/directory is different on different subvolumes
+ * @recommendedaction None
+ *
+ */
+
+#define DHT_MSG_GFID_MISMATCH (GLFS_DHT_BASE + 9)
+
+/*!
+ * @messageid 109010
+ * @diagnosis The GFID of the specified file/directory is NULL.
+ * @recommendedaction None
+ *
+ */
+
+#define DHT_MSG_GFID_NULL (GLFS_DHT_BASE + 10)
+
+/*
+ * @messageid 109011
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_HASHED_SUBVOL_GET_FAILED (GLFS_DHT_BASE + 11)
+
+/*!
+ * @messageid 109012
+ * @diagnosis The Distributed Hash Table Translator could not be initiated as the
+ * system is out of memory.
+ * @recommendedaction None
+ *
+ */
+
+#define DHT_MSG_INIT_FAILED (GLFS_DHT_BASE + 12)
+
+/*!
+ * @messageid 109013
+ * @diagnosis Invalid DHT configuration in the volfile
+ * @recommendedaction None
+ *
+ */
+
+#define DHT_MSG_INVALID_CONFIGURATION (GLFS_DHT_BASE + 13)
+
+/*!
+ * @messageid 109014
+ * @diagnosis Invalid disk layout
+ * @recommendedaction None
+ *
+ */
+
+#define DHT_MSG_INVALID_DISK_LAYOUT (GLFS_DHT_BASE + 14)
+
+/*!
+ * @messageid 109015
+ * @diagnosis Invalid DHT configuration option.
+ * @recommendedaction
+ * 1. Reset the option with a valid value using the volume
+ * set command.
+ * 2. Restart the process that logged the message in the
+ * log file.
+ *
+ */
+
+#define DHT_MSG_INVALID_OPTION (GLFS_DHT_BASE + 15)
+
+/*!
+ * @messageid 109016
+ * @diagnosis The fix layout operation failed
+ * @recommendedaction None
+ *
+ */
+
+#define DHT_MSG_LAYOUT_FIX_FAILED (GLFS_DHT_BASE + 16)
+
+/*!
+ * @messageid 109017
+ * @diagnosis Layout merge failed
+ * @recommendedaction None
+ *
+ */
+
+#define DHT_MSG_LAYOUT_MERGE_FAILED (GLFS_DHT_BASE + 17)
+
+/*!
+ * @messageid 109018
+ * @diagnosis The layout for the specified directory does not match
+ that on the disk.
+ * @recommendedaction None
+ *
+ */
+
+#define DHT_MSG_LAYOUT_MISMATCH (GLFS_DHT_BASE + 18)
+
+/*!
+ * @messageid 109019
+ * @diagnosis No layout is present for the specified file/directory
+ * @recommendedaction None
+ *
+ */
+
+#define DHT_MSG_LAYOUT_NULL (GLFS_DHT_BASE + 19)
+
+/*!
+ * @messageid 109020
+ * @diagnosis Informational message: Migration of data from the cached
+ * subvolume to the hashed subvolume is complete
+ * @recommendedaction None
+ *
+ */
+
+#define DHT_MSG_MIGRATE_DATA_COMPLETE (GLFS_DHT_BASE + 20)
+
+/*!
+ * @messageid 109021
+ * @diagnosis Migration of data failed during the rebalance operation
+ * \n Cause: Directories could not be read to identify the files for the
+ * migration process.
+ * @recommendedaction
+ * The log message would indicate the reason for the failure and
+ * the corrective action depends on the specific error that is
+ * encountered. The error is one of the standard UNIX errors.
+ *
+ */
+
+#define DHT_MSG_MIGRATE_DATA_FAILED (GLFS_DHT_BASE + 21)
+
+/*!
+ * @messageid 109022
+ * @diagnosis Informational message: The file was migrated successfully during
+ * the rebalance operation.
+ * @recommendedaction None
+ *
+ */
+
+#define DHT_MSG_MIGRATE_FILE_COMPLETE (GLFS_DHT_BASE + 22)
+
+/*!
+ * @messageid 109023
+ * @diagnosis File migration failed during the rebalance operation
+ * \n Cause: Rebalance moves data from the cached subvolume to
+ * the hashed subvolume. Migrating a single file is a multi-step operation
+ * which involves opening, reading, and writing the data and metadata.
+ * Any failures in this multi-step operation can result in a file
+ * migration failure.
+ * @recommendedaction The log message would indicate the reason for the failure and the
+ * corrective action depends on the specific error that is encountered.
+ * The error is one of the standard UNIX errors.
+ *
+ */
+
+#define DHT_MSG_MIGRATE_FILE_FAILED (GLFS_DHT_BASE + 23)
+
+/*!
+ * @messageid 109024
+ * @diagnosis Out of memory
+ * @recommendedaction None
+ *
+ */
+
+#define DHT_MSG_NO_MEMORY (GLFS_DHT_BASE + 24)
+
+/*!
+ * @messageid 109025
+ * @diagnosis The opendir() call failed on the specified directory
+ * \n Cause: When a directory is renamed, the Distribute Hash
+ * table translator checks whether the destination directory
+ * is empty. This message indicates that the opendir() call
+ * on the destination directory has failed.
+ * @recommendedaction The log message would indicate the reason for the
+ * failure and the corrective action depends on the specific
+ * error that is encountered. The error is one of the standard
+ * UNIX errors.
+ *
+ */
+
+#define DHT_MSG_OPENDIR_FAILED (GLFS_DHT_BASE + 25)
+
+/*!
+ * @messageid 109026
+ * @diagnosis The rebalance operation failed.
+ * @recommendedaction Check the log file for details about the failure.
+ * Possible causes:
+ * - A subvolume is down: Restart the rebalance operation after
+ * bringing up all subvolumes.
+ *
+ */
+
+#define DHT_MSG_REBALANCE_FAILED (GLFS_DHT_BASE + 26)
+
+/*!
+ * @messageid 109027
+ * @diagnosis Failed to start the rebalance process.
+ * @recommendedaction Check the log file for details about the failure.
+ *
+ */
+
+#define DHT_MSG_REBALANCE_START_FAILED (GLFS_DHT_BASE + 27)
+
+/*!
+ * @messageid 109028
+ * @diagnosis Informational message that indicates the status of the
+ * rebalance operation and details as to how many files were
+ * migrated, skipped, failed etc
+ * @recommendedaction None
+ *
+ */
+
+#define DHT_MSG_REBALANCE_STATUS (GLFS_DHT_BASE + 28)
+
+/*!
+ * @messageid 109029
+ * @diagnosis The rebalance operation was aborted by the user.
+ * @recommendedaction None
+ *
+ */
+
+#define DHT_MSG_REBALANCE_STOPPED (GLFS_DHT_BASE + 29)
+
+/*!
+ * @messageid 109030
+ * @diagnosis The file or directory could not be renamed
+ * @recommendedaction Ensure that all the subvolumes are
+ * online and reachable and try renaming
+ * the file or directory again.
+ *
+ */
+
+#define DHT_MSG_RENAME_FAILED (GLFS_DHT_BASE + 30)
+
+/*!
+ * @messageid 109031
+ * @diagnosis Attributes could not be set for the specified file or
+ * directory.
+ * @recommendedaction None
+ *
+ */
+
+#define DHT_MSG_SETATTR_FAILED (GLFS_DHT_BASE + 31)
+
+/*!
+ * @messageid 109032
+ * @diagnosis The specified subvolume is running out of file system inodes.
+ If all subvolumes run out of inodes, then new files cannot be created.
+ * @recommendedaction Consider adding more nodes to the cluster if all subvolumes
+ * run out of inodes
+ *
+ */
+
+#define DHT_MSG_SUBVOL_INSUFF_INODES (GLFS_DHT_BASE + 32)
+
+/*!
+ * @messageid 109033
+ * @diagnosis The specified subvolume is running out of disk space. If all
+ subvolumes run out of space, new files cannot be created.
+ * @recommendedaction Consider adding more bricks to the cluster if all subvolumes
+ * run out of disk space.
+ *
+ */
+
+#define DHT_MSG_SUBVOL_INSUFF_SPACE (GLFS_DHT_BASE + 33)
+
+/*!
+ * @messageid 109034
+ * @diagnosis Failed to unlink the specified file/directory
+ * @recommendedaction The log message would indicate the reason
+ for the failure and the corrective action depends on
+ the specific error that is encountered.
+ */
+
+#define DHT_MSG_UNLINK_FAILED (GLFS_DHT_BASE + 34)
+
+
+
+/*!
+ * @messageid 109035
+ * @diagnosis The layout information could not be set in the inode
+ * @recommendedaction None
+ *
+ */
+
+#define DHT_MSG_LAYOUT_SET_FAILED (GLFS_DHT_BASE + 35)
+
+/*!
+ * @messageid 109036
+ * @diagnosis Informational message regarding layout range distribution
+ * for a directory across subvolumes
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_LOG_FIXED_LAYOUT (GLFS_DHT_BASE + 36)
+
+/*
+ * @messageid 109037
+ * @diagnosis Informational message regarding error in tier operation
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_LOG_TIER_ERROR (GLFS_DHT_BASE + 37)
+
+/*
+ * @messageid 109038
+ * @diagnosis Informational message regarding tier operation
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_LOG_TIER_STATUS (GLFS_DHT_BASE + 38)
+
+/*
+ * @messageid 109039
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_GET_XATTR_FAILED (GLFS_DHT_BASE + 39)
+
+/*
+ * @messageid 109040
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_FILE_LOOKUP_FAILED (GLFS_DHT_BASE + 40)
+
+/*
+ * @messageid 109041
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_OPEN_FD_FAILED (GLFS_DHT_BASE + 41)
+
+/*
+ * @messageid 109042
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_SET_INODE_CTX_FAILED (GLFS_DHT_BASE + 42)
+
+/*
+ * @messageid 109043
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_UNLOCKING_FAILED (GLFS_DHT_BASE + 43)
+
+/*
+ * @messageid 109044
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_DISK_LAYOUT_NULL (GLFS_DHT_BASE + 44)
+
+/*
+ * @messageid 109045
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_SUBVOL_INFO (GLFS_DHT_BASE + 45)
+
+/*
+ * @messageid 109046
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_CHUNK_SIZE_INFO (GLFS_DHT_BASE + 46)
+
+/*
+ * @messageid 109047
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_LAYOUT_FORM_FAILED (GLFS_DHT_BASE + 47)
+
+/*
+ * @messageid 109048
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_SUBVOL_ERROR (GLFS_DHT_BASE + 48)
+
+/*
+ * @messageid 109049
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_LAYOUT_SORT_FAILED (GLFS_DHT_BASE + 49)
+
+/*
+ * @messageid 109050
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_REGEX_INFO (GLFS_DHT_BASE + 50)
+
+/*
+ * @messageid 109051
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_FOPEN_FAILED (GLFS_DHT_BASE + 51)
+
+/*
+ * @messageid 109052
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_SET_HOSTNAME_FAILED (GLFS_DHT_BASE + 52)
+
+/*
+ * @messageid 109053
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_BRICK_ERROR (GLFS_DHT_BASE + 53)
+
+/*
+ * @messageid 109054
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_SYNCOP_FAILED (GLFS_DHT_BASE + 54)
+
+/*
+ * @messageid 109055
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_MIGRATE_INFO (GLFS_DHT_BASE + 55)
+
+/*
+ * @messageid 109056
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_SOCKET_ERROR (GLFS_DHT_BASE + 56)
+
+/*
+ * @messageid 109057
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_CREATE_FD_FAILED (GLFS_DHT_BASE + 57)
+
+/*
+ * @messageid 109058
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_READDIR_ERROR (GLFS_DHT_BASE + 58)
+
+/*
+ * @messageid 109059
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_CHILD_LOC_BUILD_FAILED (GLFS_DHT_BASE + 59)
+
+/*
+ * @messageid 109060
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_SET_SWITCH_PATTERN_ERROR (GLFS_DHT_BASE + 60)
+
+/*
+ * @messageid 109061
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_COMPUTE_HASH_FAILED (GLFS_DHT_BASE + 61)
+
+/*
+ * @messageid 109062
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_FIND_LAYOUT_ANOMALIES_ERROR (GLFS_DHT_BASE + 62)
+
+/*
+ * @messageid 109063
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_ANOMALIES_INFO (GLFS_DHT_BASE + 63)
+
+/*
+ * @messageid 109064
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_LAYOUT_INFO (GLFS_DHT_BASE + 64)
+
+/*
+ * @messageid 109065
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_INODE_LK_ERROR (GLFS_DHT_BASE + 65)
+
+/*
+ * @messageid 109066
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_RENAME_INFO (GLFS_DHT_BASE + 66)
+
+/*
+ * @messageid 109067
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_DATA_NULL (GLFS_DHT_BASE + 67)
+
+/*
+ * @messageid 109068
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_AGGREGATE_QUOTA_XATTR_FAILED (GLFS_DHT_BASE + 68)
+
+/*
+ * @messageid 109069
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_UNLINK_LOOKUP_INFO (GLFS_DHT_BASE + 69)
+
+/*
+ * @messageid 109070
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_LINK_FILE_LOOKUP_INFO (GLFS_DHT_BASE + 70)
+
+/*
+ * @messageid 109071
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_OPERATION_NOT_SUP (GLFS_DHT_BASE + 71)
+
+/*
+ * @messageid 109072
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_NOT_LINK_FILE_ERROR (GLFS_DHT_BASE + 72)
+
+/*
+ * @messageid 109073
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_CHILD_DOWN (GLFS_DHT_BASE + 73)
+
+/*
+ * @messageid 109074
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_UUID_PARSE_ERROR (GLFS_DHT_BASE + 74)
+
+/*
+ * @messageid 109075
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_GET_DISK_INFO_ERROR (GLFS_DHT_BASE + 75)
+
+/*
+ * @messageid 109076
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_INVALID_VALUE (GLFS_DHT_BASE + 76)
+
+/*
+ * @messageid 109077
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_SWITCH_PATTERN_INFO (GLFS_DHT_BASE + 77)
+
+/*
+ * @messageid 109078
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_SUBVOL_OP_FAILED (GLFS_DHT_BASE + 78)
+
+/*
+ * @messageid 109079
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_LAYOUT_PRESET_FAILED (GLFS_DHT_BASE + 79)
+
+/*
+ * @messageid 109080
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_INVALID_LINKFILE (GLFS_DHT_BASE + 80)
+
+/*
+ * @messageid 109081
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_FIX_LAYOUT_INFO (GLFS_DHT_BASE + 81)
+
+/*
+ * @messageid 109082
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_GET_HOSTNAME_FAILED (GLFS_DHT_BASE + 82)
+
+/*
+ * @messageid 109083
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_WRITE_FAILED (GLFS_DHT_BASE + 83)
+
+/*
+ * @messageid 109084
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_MIGRATE_HARDLINK_FILE_FAILED (GLFS_DHT_BASE + 84)
+
+/*
+ * @messageid 109085
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_FSYNC_FAILED (GLFS_DHT_BASE + 85)
+
+/*
+ * @messageid 109086
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_SUBVOL_DECOMMISSION_INFO (GLFS_DHT_BASE + 86)
+
+/*
+ * @messageid 109087
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_BRICK_QUERY_FAILED (GLFS_DHT_BASE + 87)
+
+/*
+ * @messageid 109088
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_SUBVOL_NO_LAYOUT_INFO (GLFS_DHT_BASE + 88)
+
+/*
+ * @messageid 109089
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_OPEN_FD_ON_DST_FAILED (GLFS_DHT_BASE + 89)
+
+/*
+ * @messageid 109090
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_SUBVOL_NOT_FOUND (GLFS_DHT_BASE + 90)
+
+/*
+ * @messageid 109190
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_FILE_LOOKUP_ON_DST_FAILED (GLFS_DHT_BASE + 91)
+
+/*
+ * @messageid 109092
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_DISK_LAYOUT_MISSING (GLFS_DHT_BASE + 92)
+
+/*
+ * @messageid 109093
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_DICT_GET_FAILED (GLFS_DHT_BASE + 93)
+
+/*
+ * @messageid 109094
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_REVALIDATE_CBK_INFO (GLFS_DHT_BASE + 94)
+
+/*
+ * @messageid 109095
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_UPGRADE_BRICKS (GLFS_DHT_BASE + 95)
+
+/*
+ * @messageid 109096
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_LK_ARRAY_INFO (GLFS_DHT_BASE + 96)
+
+/*
+ * @messageid 109097
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_RENAME_NOT_LOCAL (GLFS_DHT_BASE + 97)
+
+/*
+ * @messageid 109098
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_RECONFIGURE_INFO (GLFS_DHT_BASE + 98)
+
+/*
+ * @messageid 109099
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_INIT_LOCAL_SUBVOL_FAILED (GLFS_DHT_BASE + 99)
+
+/*
+ * @messageid 109100
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_SYS_CALL_GET_TIME_FAILED (GLFS_DHT_BASE + 100)
+
+/*
+ * @messageid 109101
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_NO_DISK_USAGE_STATUS (GLFS_DHT_BASE + 101)
+
+/*
+ * @messageid 109102
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_SUBVOL_DOWN_ERROR (GLFS_DHT_BASE + 102)
+
+/*
+ * @messageid 109103
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_REBAL_THROTTLE_INFO (GLFS_DHT_BASE + 103)
+
+/*
+ * @messageid 109104
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_COMMIT_HASH_INFO (GLFS_DHT_BASE + 104)
+
+/*
+ * @messageid 109105
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_REBAL_STRUCT_SET (GLFS_DHT_BASE + 105)
+
+/*
+ * @messageid 109106
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_HAS_MIGINFO (GLFS_DHT_BASE + 106)
+
+/*
+ * @messageid 109107
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_LOG_IPC_TIER_ERROR (GLFS_DHT_BASE + 107)
+
+/*
+ * @messageid 109108
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_TIER_PAUSED (GLFS_DHT_BASE + 108)
+
+/*
+ * @messageid 109109
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_TIER_RESUME (GLFS_DHT_BASE + 109)
+
+
+/* @messageid 109110
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_SETTLE_HASH_FAILED (GLFS_DHT_BASE + 110)
+
+/*
+ * @messageid 109111
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_DEFRAG_PROCESS_DIR_FAILED (GLFS_DHT_BASE + 111)
+
+/*
+ * @messageid 109112
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_FD_CTX_SET_FAILED (GLFS_DHT_BASE + 112)
+
+/*
+ * @messageid 109113
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_STALE_LOOKUP (GLFS_DHT_BASE + 113)
+
+/*
+ * @messageid 109114
+ * @diagnosis
+ * @recommendedaction None
+ */
+#define DHT_MSG_PARENT_LAYOUT_CHANGED (GLFS_DHT_BASE + 114)
+
+/*
+ * @messageid 109115
+ * @diagnosis
+ * @recommendedaction None
+ */
+#define DHT_MSG_LOCK_MIGRATION_FAILED (GLFS_DHT_BASE + 115)
+
+/*
+ * @messageid 109116
+ * @diagnosis
+ * @recommendedaction None
+ */
+#define DHT_MSG_LOCK_INODE_UNREF_FAILED (GLFS_DHT_BASE + 116)
+
+#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
+#endif /* _DHT_MESSAGES_H_ */
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
new file mode 100644
index 00000000000..4c83ed478c0
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -0,0 +1,4201 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#include "tier.h"
+#include "dht-common.h"
+#include "xlator.h"
+#include "syscall.h"
+#include <signal.h>
+#include <fnmatch.h>
+#include <signal.h>
+
+
+#define GF_DISK_SECTOR_SIZE 512
+#define DHT_REBALANCE_PID 4242 /* Change it if required */
+#define DHT_REBALANCE_BLKSIZE (128 * 1024)
+#define MAX_MIGRATOR_THREAD_COUNT 40
+#define MAX_MIGRATE_QUEUE_COUNT 500
+#define MIN_MIGRATE_QUEUE_COUNT 200
+
+#ifndef MAX
+#define MAX(a, b) (((a) > (b))?(a):(b))
+#endif
+
+
+#define GF_CRAWL_INDEX_MOVE(idx, sv_cnt) { \
+ idx++; \
+ idx %= sv_cnt; \
+ }
+
+#define GF_FREE_DIR_DFMETA(dir_dfmeta) { \
+ if (dir_dfmeta) { \
+ GF_FREE (dir_dfmeta->head); \
+ GF_FREE (dir_dfmeta->equeue); \
+ GF_FREE (dir_dfmeta->iterator); \
+ GF_FREE (dir_dfmeta->offset_var); \
+ GF_FREE (dir_dfmeta->fetch_entries); \
+ GF_FREE (dir_dfmeta); \
+ } \
+ } \
+
+void
+gf_defrag_free_container (struct dht_container *container)
+{
+ if (container) {
+ gf_dirent_entry_free (container->df_entry);
+
+ if (container->parent_loc) {
+ loc_wipe (container->parent_loc);
+ }
+
+ GF_FREE (container->parent_loc);
+
+ GF_FREE (container);
+ }
+}
+
+void
+dht_set_global_defrag_error (gf_defrag_info_t *defrag, int ret)
+{
+ LOCK (&defrag->lock);
+ {
+ defrag->global_error = ret;
+ }
+ UNLOCK (&defrag->lock);
+ return;
+}
+
+static int
+dht_write_with_holes (xlator_t *to, fd_t *fd, struct iovec *vec, int count,
+ int32_t size, off_t offset, struct iobref *iobref)
+{
+ int i = 0;
+ int ret = -1;
+ int start_idx = 0;
+ int tmp_offset = 0;
+ int write_needed = 0;
+ int buf_len = 0;
+ int size_pending = 0;
+ char *buf = NULL;
+
+ /* loop through each vector */
+ for (i = 0; i < count; i++) {
+ buf = vec[i].iov_base;
+ buf_len = vec[i].iov_len;
+
+ for (start_idx = 0; (start_idx + GF_DISK_SECTOR_SIZE) <= buf_len;
+ start_idx += GF_DISK_SECTOR_SIZE) {
+
+ if (mem_0filled (buf + start_idx, GF_DISK_SECTOR_SIZE) != 0) {
+ write_needed = 1;
+ continue;
+ }
+
+ if (write_needed) {
+ ret = syncop_write (to, fd, (buf + tmp_offset),
+ (start_idx - tmp_offset),
+ (offset + tmp_offset),
+ iobref, 0, NULL, NULL);
+ /* 'path' will be logged in calling function */
+ if (ret < 0) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to write (%s)",
+ strerror (-ret));
+ ret = -1;
+ goto out;
+ }
+
+ write_needed = 0;
+ }
+ tmp_offset = start_idx + GF_DISK_SECTOR_SIZE;
+ }
+
+ if ((start_idx < buf_len) || write_needed) {
+ /* This means, last chunk is not yet written.. write it */
+ ret = syncop_write (to, fd, (buf + tmp_offset),
+ (buf_len - tmp_offset),
+ (offset + tmp_offset), iobref, 0,
+ NULL, NULL);
+ if (ret < 0) {
+ /* 'path' will be logged in calling function */
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to write (%s)",
+ strerror (-ret));
+ ret = -1;
+ goto out;
+ }
+ }
+
+ size_pending = (size - buf_len);
+ if (!size_pending)
+ break;
+ }
+
+ ret = size;
+out:
+ return ret;
+
+}
+
+/*
+ return values:
+ -1 : failure
+ -2 : success
+
+Hard link migration is carried out in three stages.
+
+(Say there are n hardlinks)
+Stage 1: Setting the new hashed subvol information on the 1st hardlink
+ encountered (linkto setxattr)
+
+Stage 2: Creating hardlinks on new hashed subvol for the 2nd to (n-1)th
+ hardlink
+
+Stage 3: Physical migration of the data file for nth hardlink
+
+Why to deem "-2" as success and not "0":
+
+ dht_migrate_file expects return value "0" from _is_file_migratable if
+the file has to be migrated.
+
+ _is_file_migratable returns zero only when it is called with the
+flag "GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS".
+
+ gf_defrag_handle_hardlink calls dht_migrate_file for physical migration
+of the data file with the flag "GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS"
+
+Hence, gf_defrag_handle_hardlink returning "0" for success will force
+"dht_migrate_file" to migrate each of the hardlink which is not intended.
+
+For each of the three stage mentioned above "-2" will be returned and will
+be converted to "0" in dht_migrate_file.
+
+*/
+
+int32_t
+gf_defrag_handle_hardlink (xlator_t *this, loc_t *loc, dict_t *xattrs,
+ struct iatt *stbuf)
+{
+ int32_t ret = -1;
+ xlator_t *cached_subvol = NULL;
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *linkto_subvol = NULL;
+ data_t *data = NULL;
+ struct iatt iatt = {0,};
+ int32_t op_errno = 0;
+ dht_conf_t *conf = NULL;
+ gf_loglevel_t loglevel = 0;
+ dict_t *link_xattr = NULL;
+
+ GF_VALIDATE_OR_GOTO ("defrag", loc, out);
+ GF_VALIDATE_OR_GOTO ("defrag", loc->name, out);
+ GF_VALIDATE_OR_GOTO ("defrag", stbuf, out);
+ GF_VALIDATE_OR_GOTO ("defrag", this, out);
+ GF_VALIDATE_OR_GOTO ("defrag", xattrs, out);
+ GF_VALIDATE_OR_GOTO ("defrag", this->private, out);
+
+ conf = this->private;
+
+ if (gf_uuid_is_null (loc->pargfid)) {
+ gf_msg ("", GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed :"
+ "loc->pargfid is NULL for %s", loc->path);
+ goto out;
+ }
+
+ if (gf_uuid_is_null (loc->gfid)) {
+ gf_msg ("", GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed :"
+ "loc->gfid is NULL for %s", loc->path);
+ goto out;
+ }
+
+ link_xattr = dict_new ();
+ if (!link_xattr) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ /*
+ Parallel migration can lead to migration of the hard link multiple
+ times which can lead to data loss. Hence, adding a fresh lookup to
+ decide whether migration is required or not.
+
+ Elaborating the scenario for let say 10 hardlinks [link{1..10}]:
+ Let say the first hard link "link1" does the setxattr of the
+ new hashed subvolume info on the cached file. As there are multiple
+ threads working, we might have already all the links created on the
+ new hashed by the time we reach hardlink let say link5. Now the
+ number of links on hashed is equal to that of cached. Hence, file
+ migration will happen for link6.
+
+ Cached Hashed
+ --------T link6 rwxrwxrwx link6
+
+ Now post above state all the link file on the cached will be zero
+ byte linkto files. Hence, if we still do migration for the following
+ files link{7..10}, we will end up migrating 0 data leading to data
+ loss.
+ Hence, a lookup can make sure whether we need to migrate the
+ file or not.
+ */
+
+ ret = syncop_lookup (this, loc, NULL, NULL,
+ NULL, NULL);
+ if (ret) {
+ /*Ignore ENOENT and ESTALE as file might have been
+ migrated already*/
+ if (-ret == ENOENT || -ret == ESTALE) {
+ ret = -2;
+ goto out;
+ }
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:%s lookup failed with ret = %d",
+ loc->path, ret);
+ ret = -1;
+ goto out;
+ }
+
+ cached_subvol = dht_subvol_get_cached (this, loc->inode);
+ if (!cached_subvol) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed :"
+ "Failed to get cached subvol"
+ " for %s on %s", loc->name, this->name);
+ goto out;
+ }
+
+ hashed_subvol = dht_subvol_get_hashed (this, loc);
+ if (!hashed_subvol) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed :"
+ "Failed to get hashed subvol"
+ " for %s on %s", loc->name, this->name);
+ goto out;
+ }
+
+ if (hashed_subvol == cached_subvol) {
+ ret = -2;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_INFO, "Attempting to migrate hardlink %s "
+ "with gfid %s from %s -> %s", loc->name, uuid_utoa (loc->gfid),
+ cached_subvol->name, hashed_subvol->name);
+ data = dict_get (xattrs, conf->link_xattr_name);
+ /* set linkto on cached -> hashed if not present, else link it */
+ if (!data) {
+ ret = dict_set_str (link_xattr, conf->link_xattr_name,
+ hashed_subvol->name);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed :"
+ "Failed to set dictionary value:"
+ " key = %s for %s",
+ conf->link_xattr_name, loc->name);
+ goto out;
+ }
+
+ ret = syncop_setxattr (cached_subvol, loc, link_xattr, 0, NULL,
+ NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed :"
+ "Linkto setxattr failed %s -> %s (%s)",
+ cached_subvol->name,
+ loc->name, strerror (-ret));
+ ret = -1;
+ goto out;
+ }
+ ret = -2;
+ goto out;
+ } else {
+ linkto_subvol = dht_linkfile_subvol (this, NULL, NULL, xattrs);
+ if (!linkto_subvol) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_SUBVOL_ERROR,
+ "Failed to get "
+ "linkto subvol for %s", loc->name);
+ } else {
+ hashed_subvol = linkto_subvol;
+ }
+
+ ret = syncop_link (hashed_subvol, loc, loc, &iatt, NULL, NULL);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+
+ loglevel = (op_errno == EEXIST) ? GF_LOG_DEBUG : \
+ GF_LOG_ERROR;
+ gf_msg (this->name, loglevel, op_errno,
+ DHT_MSG_MIGRATE_HARDLINK_FILE_FAILED,
+ "link of %s -> %s"
+ " failed on subvol %s", loc->name,
+ uuid_utoa(loc->gfid),
+ hashed_subvol->name);
+ if (op_errno != EEXIST)
+ goto out;
+ }
+ }
+ ret = syncop_lookup (hashed_subvol, loc, &iatt, NULL, NULL, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed :Failed lookup %s on %s ",
+ loc->name, hashed_subvol->name);
+
+ ret = -1;
+ goto out;
+ }
+
+ if (iatt.ia_nlink == stbuf->ia_nlink) {
+ ret = dht_migrate_file (this, loc, cached_subvol, hashed_subvol,
+ GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS);
+ if (ret)
+ goto out;
+ }
+ ret = -2;
+out:
+ if (link_xattr)
+ dict_unref (link_xattr);
+ return ret;
+}
+
+/*
+ return values
+ 0 : File will be migrated
+ -2 : File will not be migrated
+ (This is the return value from gf_defrag_handle_hardlink. Checkout
+ gf_defrag_handle_hardlink for description of "returning -2")
+ -1 : failure
+*/
+static int
+__is_file_migratable (xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, dict_t *xattrs, int flags,
+ gf_defrag_info_t *defrag)
+{
+ int ret = -1;
+ int lock_count = 0;
+
+ if (IA_ISDIR (stbuf->ia_type)) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: migrate-file called on directory", loc->path);
+ ret = -1;
+ goto out;
+ }
+
+ if (!defrag->lock_migration_enabled) {
+ ret = dict_get_int32 (xattrs, GLUSTERFS_POSIXLK_COUNT,
+ &lock_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: Unable to get lock count for file",
+ loc->path);
+ ret = -1;
+ goto out;
+ }
+
+ if (lock_count) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed: %s: File has locks."
+ " Skipping file migration", loc->path);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (flags == GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS) {
+ ret = 0;
+ goto out;
+ }
+
+ if (stbuf->ia_nlink > 1) {
+ /* support for decomission */
+ if (flags == GF_DHT_MIGRATE_HARDLINK) {
+ synclock_lock (&defrag->link_lock);
+ ret = gf_defrag_handle_hardlink
+ (this, loc, xattrs, stbuf);
+ synclock_unlock (&defrag->link_lock);
+ /*
+ Returning zero will force the file to be remigrated.
+ Checkout gf_defrag_handle_hardlink for more information.
+ */
+ if (ret && ret != -2) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: failed to migrate file with link",
+ loc->path);
+ }
+ } else {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: file has hardlinks", loc->path);
+ ret = -ENOTSUP;
+ }
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+
+static int
+__dht_rebalance_create_dst_file (xlator_t *to, xlator_t *from, loc_t *loc, struct iatt *stbuf,
+ fd_t **dst_fd, dict_t *xattr)
+{
+ xlator_t *this = NULL;
+ int ret = -1;
+ fd_t *fd = NULL;
+ struct iatt new_stbuf = {0,};
+ struct iatt check_stbuf= {0,};
+ dht_conf_t *conf = NULL;
+ dict_t *dict = NULL;
+
+ this = THIS;
+ conf = this->private;
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_set_static_bin (dict, "gfid-req", stbuf->ia_gfid, 16);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_DICT_SET_FAILED,
+ "%s: failed to set dictionary value: key = gfid-req",
+ loc->path);
+ goto out;
+ }
+
+ ret = dict_set_str (dict, conf->link_xattr_name, from->name);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_DICT_SET_FAILED,
+ "%s: failed to set dictionary value: key = %s ",
+ loc->path, conf->link_xattr_name);
+ goto out;
+ }
+
+ fd = fd_create (loc->inode, DHT_REBALANCE_PID);
+ if (!fd) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: fd create failed (destination) (%s)",
+ loc->path, strerror (errno));
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_lookup (to, loc, &new_stbuf, NULL, NULL, NULL);
+ if (!ret) {
+ /* File exits in the destination, check if gfid matches */
+ if (gf_uuid_compare (stbuf->ia_gfid, new_stbuf.ia_gfid) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_GFID_MISMATCH,
+ "file %s exists in %s with different gfid",
+ loc->path, to->name);
+ ret = -1;
+ goto out;
+ }
+ }
+ if ((ret < 0) && (-ret != ENOENT)) {
+ /* File exists in destination, but not accessible */
+ gf_msg (THIS->name, GF_LOG_WARNING, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to lookup file (%s)",
+ loc->path, strerror (-ret));
+ ret = -1;
+ goto out;
+ }
+
+ /* Create the destination with LINKFILE mode, and linkto xattr,
+ if the linkfile already exists, just open the file */
+ if (!ret) {
+ /*
+ * File already present, just open the file.
+ */
+ ret = syncop_open (to, loc, O_RDWR, fd, NULL, NULL);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "failed to open %s on %s",
+ loc->path, to->name);
+ ret = -1;
+ goto out;
+ }
+ } else {
+ ret = syncop_create (to, loc, O_RDWR, DHT_LINKFILE_MODE, fd,
+ &new_stbuf, dict, NULL);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "failed to create %s on %s",
+ loc->path, to->name);
+ ret = -1;
+ goto out;
+ }
+
+ }
+
+ fd_bind (fd);
+
+ /*Reason of doing lookup after create again:
+ *In the create, there is some time-gap between opening fd at the
+ *server (posix_layer) and binding it in server (incrementing fd count),
+ *so if in that time-gap, if other process sends unlink considering it
+ *as a linkto file, because inode->fd count will be 0, so file will be
+ *unlinked at the backend. And because furthur operations are performed
+ *on fd, so though migration will be done but will end with no file
+ *at the backend.
+ */
+
+ ret = syncop_lookup (to, loc, &check_stbuf, NULL, NULL, NULL);
+ if (!ret) {
+
+ if (gf_uuid_compare (stbuf->ia_gfid, check_stbuf.ia_gfid) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_GFID_MISMATCH,
+ "file %s exists in %s with different gfid,"
+ "found in lookup after create",
+ loc->path, to->name);
+ ret = -1;
+ goto out;
+ }
+
+ }
+
+ if (-ret == ENOENT) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED, "%s: file does not exists"
+ "on %s (%s)", loc->path, to->name, strerror (-ret));
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_fsetxattr (to, fd, xattr, 0, NULL, NULL);
+ if (ret < 0)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to set xattr on %s (%s)",
+ loc->path, to->name, strerror (-ret));
+
+ ret = syncop_ftruncate (to, fd, stbuf->ia_size, NULL, NULL);
+ if (ret < 0)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "ftruncate failed for %s on %s (%s)",
+ loc->path, to->name, strerror (-ret));
+
+ ret = syncop_fsetattr (to, fd, stbuf,
+ (GF_SET_ATTR_UID | GF_SET_ATTR_GID),
+ NULL, NULL, NULL, NULL);
+ if (ret < 0)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "chown failed for %s on %s (%s)",
+ loc->path, to->name, strerror (-ret));
+
+ /* success */
+ ret = 0;
+
+ if (dst_fd)
+ *dst_fd = fd;
+
+out:
+ if (ret) {
+ if (fd) {
+ fd_unref (fd);
+ }
+ }
+ if (dict)
+ dict_unref (dict);
+
+ return ret;
+}
+
+static int
+__dht_check_free_space (xlator_t *to, xlator_t *from, loc_t *loc,
+ struct iatt *stbuf, int flag)
+{
+ struct statvfs src_statfs = {0,};
+ struct statvfs dst_statfs = {0,};
+ int ret = -1;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+ uint64_t src_statfs_blocks = 1;
+ uint64_t dst_statfs_blocks = 1;
+
+ this = THIS;
+
+ xdata = dict_new ();
+ if (!xdata) {
+ errno = ENOMEM;
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ DHT_MSG_NO_MEMORY,
+ "failed to allocate dictionary");
+ goto out;
+ }
+
+ ret = dict_set_int8 (xdata, GF_INTERNAL_IGNORE_DEEM_STATFS, 1);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set "
+ GF_INTERNAL_IGNORE_DEEM_STATFS" in dict");
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_statfs (from, loc, &src_statfs, xdata, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "failed to get statfs of %s on %s (%s)",
+ loc->path, from->name, strerror (-ret));
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_statfs (to, loc, &dst_statfs, xdata, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "failed to get statfs of %s on %s (%s)",
+ loc->path, to->name, strerror (-ret));
+ ret = -1;
+ goto out;
+ }
+
+ /* if force option is given, do not check for space @ dst.
+ * Check only if space is avail for the file */
+ if (flag != GF_DHT_MIGRATE_DATA)
+ goto check_avail_space;
+
+ /* Check:
+ During rebalance `migrate-data` - Destination subvol experiences
+ a `reduction` in 'blocks' of free space, at the same time source
+ subvol gains certain 'blocks' of free space. A valid check is
+ necessary here to avoid errorneous move to destination where
+ the space could be scantily available.
+ */
+ if (stbuf) {
+ dst_statfs_blocks = ((dst_statfs.f_bavail *
+ dst_statfs.f_bsize) /
+ GF_DISK_SECTOR_SIZE);
+ src_statfs_blocks = ((src_statfs.f_bavail *
+ src_statfs.f_bsize) /
+ GF_DISK_SECTOR_SIZE);
+ if ((dst_statfs_blocks - stbuf->ia_blocks) <
+ (src_statfs_blocks + stbuf->ia_blocks)) {
+
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "data movement attempted from node "
+ "(%s:%"PRIu64") with higher disk space "
+ "to a node (%s:%"PRIu64") with lesser "
+ "disk space, file { blocks:%"PRIu64", "
+ "name:(%s) }", from->name, src_statfs_blocks,
+ to->name, dst_statfs_blocks,
+ stbuf->ia_blocks, loc->path);
+
+ /* this is not a 'failure', but we don't want to
+ consider this as 'success' too :-/ */
+ ret = -1;
+ goto out;
+ }
+ }
+check_avail_space:
+ if (((dst_statfs.f_bavail * dst_statfs.f_bsize) /
+ GF_DISK_SECTOR_SIZE) < stbuf->ia_blocks) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "data movement attempted from node (%s) to node (%s) "
+ "which does not have required free space for (%s)",
+ from->name, to->name, loc->path);
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (xdata)
+ dict_unref (xdata);
+ return ret;
+}
+
+static int
+__dht_rebalance_migrate_data (xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst,
+ uint64_t ia_size, int hole_exists)
+{
+ int ret = 0;
+ int count = 0;
+ off_t offset = 0;
+ struct iovec *vector = NULL;
+ struct iobref *iobref = NULL;
+ uint64_t total = 0;
+ size_t read_size = 0;
+
+ /* if file size is '0', no need to enter this loop */
+ while (total < ia_size) {
+ read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE) ?
+ DHT_REBALANCE_BLKSIZE : (ia_size - total));
+
+ ret = syncop_readv (from, src, read_size,
+ offset, 0, &vector, &count, &iobref, NULL,
+ NULL);
+ if (!ret || (ret < 0)) {
+ break;
+ }
+
+ if (hole_exists)
+ ret = dht_write_with_holes (to, dst, vector, count,
+ ret, offset, iobref);
+ else
+ ret = syncop_writev (to, dst, vector, count,
+ offset, iobref, 0, NULL, NULL);
+ if (ret < 0) {
+ break;
+ }
+ offset += ret;
+ total += ret;
+
+ GF_FREE (vector);
+ if (iobref)
+ iobref_unref (iobref);
+ iobref = NULL;
+ vector = NULL;
+ }
+ if (iobref)
+ iobref_unref (iobref);
+ GF_FREE (vector);
+
+ if (ret >= 0)
+ ret = 0;
+ else
+ ret = -1;
+
+ return ret;
+}
+
+static int
+__tier_migrate_data (gf_defrag_info_t *defrag, xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst,
+ uint64_t ia_size, int hole_exists)
+{
+ int ret = 0;
+ int count = 0;
+ off_t offset = 0;
+ struct iovec *vector = NULL;
+ struct iobref *iobref = NULL;
+ uint64_t total = 0;
+ size_t read_size = 0;
+
+ /* if file size is '0', no need to enter this loop */
+ while (total < ia_size) {
+
+ read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE) ?
+ DHT_REBALANCE_BLKSIZE : (ia_size - total));
+
+ ret = syncop_readv (from, src, read_size,
+ offset, 0, &vector, &count, &iobref, NULL,
+ NULL);
+ if (!ret || (ret < 0)) {
+ break;
+ }
+
+ if (hole_exists)
+ ret = dht_write_with_holes (to, dst, vector, count,
+ ret, offset, iobref);
+ else
+ ret = syncop_writev (to, dst, vector, count,
+ offset, iobref, 0, NULL, NULL);
+ if (gf_defrag_get_pause_state (&defrag->tier_conf) != TIER_RUNNING) {
+ gf_msg ("tier", GF_LOG_INFO, 0,
+ DHT_MSG_TIER_PAUSED,
+ "Migrate file paused");
+ ret = -1;
+ }
+
+ if (ret < 0) {
+ break;
+ }
+ offset += ret;
+ total += ret;
+
+ GF_FREE (vector);
+ if (iobref)
+ iobref_unref (iobref);
+ iobref = NULL;
+ vector = NULL;
+ }
+ if (iobref)
+ iobref_unref (iobref);
+ GF_FREE (vector);
+
+ if (ret >= 0)
+ ret = 0;
+ else
+ ret = -1;
+
+ return ret;
+}
+
+
+static int
+__dht_rebalance_open_src_file (xlator_t *from, xlator_t *to, loc_t *loc,
+ struct iatt *stbuf, fd_t **src_fd,
+ gf_boolean_t *clean_src)
+{
+ int ret = 0;
+ fd_t *fd = NULL;
+ dict_t *dict = NULL;
+ xlator_t *this = NULL;
+ struct iatt iatt = {0,};
+ dht_conf_t *conf = NULL;
+
+ this = THIS;
+ conf = this->private;
+
+ *clean_src = _gf_false;
+
+ fd = fd_create (loc->inode, DHT_REBALANCE_PID);
+ if (!fd) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: fd create failed (source)", loc->path);
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_open (from, loc, O_RDWR, fd, NULL, NULL);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "failed to open file %s on %s (%s)",
+ loc->path, from->name, strerror (-ret));
+ ret = -1;
+ goto out;
+ }
+
+ fd_bind (fd);
+
+ if (src_fd)
+ *src_fd = fd;
+
+ ret = -1;
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_set_str (dict, conf->link_xattr_name, to->name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set xattr in dict for %s (linkto:%s)",
+ loc->path, to->name);
+ goto out;
+ }
+
+ /* Once the migration starts, the source should have 'linkto' key set
+ to show which is the target, so other clients can work around it */
+ ret = syncop_setxattr (from, loc, dict, 0, NULL, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "failed to set xattr on %s in %s (%s)",
+ loc->path, from->name, strerror (-ret));
+ ret = -1;
+ goto out;
+ }
+
+ /* Reset source mode/xattr if migration fails*/
+ *clean_src = _gf_true;
+
+ /* mode should be (+S+T) to indicate migration is in progress */
+ iatt.ia_prot = stbuf->ia_prot;
+ iatt.ia_type = stbuf->ia_type;
+ iatt.ia_prot.sticky = 1;
+ iatt.ia_prot.sgid = 1;
+
+ ret = syncop_setattr (from, loc, &iatt, GF_SET_ATTR_MODE, NULL, NULL,
+ NULL, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "failed to set mode on %s in %s (%s)",
+ loc->path, from->name, strerror (-ret));
+ ret = -1;
+ goto out;
+ }
+
+ /* success */
+ ret = 0;
+out:
+ if (dict)
+ dict_unref (dict);
+
+ return ret;
+}
+
+int
+migrate_special_files (xlator_t *this, xlator_t *from, xlator_t *to, loc_t *loc,
+ struct iatt *buf)
+{
+ int ret = -1;
+ dict_t *rsp_dict = NULL;
+ dict_t *dict = NULL;
+ char *link = NULL;
+ struct iatt stbuf = {0,};
+ dht_conf_t *conf = this->private;
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_set_int32 (dict, conf->link_xattr_name, 256);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to set 'linkto' key in dict", loc->path);
+ goto out;
+ }
+
+ /* check in the destination if the file is link file */
+ ret = syncop_lookup (to, loc, &stbuf, NULL, dict, &rsp_dict);
+ if ((ret < 0) && (-ret != ENOENT)) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: lookup failed (%s)",
+ loc->path, strerror (-ret));
+ ret = -1;
+ goto out;
+ }
+
+ /* we no more require this key */
+ dict_del (dict, conf->link_xattr_name);
+
+ /* file exists in target node, only if it is 'linkfile' its valid,
+ otherwise, error out */
+ if (!ret) {
+ if (!check_is_linkfile (loc->inode, &stbuf, rsp_dict,
+ conf->link_xattr_name)) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: file exists in destination", loc->path);
+ ret = -1;
+ goto out;
+ }
+
+ /* as file is linkfile, delete it */
+ ret = syncop_unlink (to, loc, NULL, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to delete the linkfile (%s)",
+ loc->path, strerror (-ret));
+ ret = -1;
+ goto out;
+ }
+ }
+
+ /* Set the gfid of the source file in dict */
+ ret = dict_set_static_bin (dict, "gfid-req", buf->ia_gfid, 16);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to set gfid in dict for create", loc->path);
+ goto out;
+ }
+
+ /* Create the file in target */
+ if (IA_ISLNK (buf->ia_type)) {
+ /* Handle symlinks separately */
+ ret = syncop_readlink (from, loc, &link, buf->ia_size, NULL,
+ NULL);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: readlink on symlink failed (%s)",
+ loc->path, strerror (-ret));
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_symlink (to, loc, link, 0, dict, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: creating symlink failed (%s)",
+ loc->path, strerror (-ret));
+ ret = -1;
+ goto out;
+ }
+
+ goto done;
+ }
+
+ ret = syncop_mknod (to, loc, st_mode_from_ia (buf->ia_prot,
+ buf->ia_type),
+ makedev (ia_major (buf->ia_rdev),
+ ia_minor (buf->ia_rdev)), 0, dict, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: mknod failed (%s)",
+ loc->path, strerror (-ret));
+ ret = -1;
+ goto out;
+ }
+
+done:
+ ret = syncop_setattr (to, loc, buf,
+ (GF_SET_ATTR_MTIME |
+ GF_SET_ATTR_UID | GF_SET_ATTR_GID |
+ GF_SET_ATTR_MODE), NULL, NULL, NULL, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to perform setattr on %s (%s)",
+ loc->path, to->name, strerror (-ret));
+ ret = -1;
+ }
+
+ ret = syncop_unlink (from, loc, NULL, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: unlink failed (%s)",
+ loc->path, strerror (-ret));
+ ret = -1;
+ }
+
+out:
+ GF_FREE (link);
+ if (dict)
+ dict_unref (dict);
+
+ if (rsp_dict)
+ dict_unref (rsp_dict);
+
+ return ret;
+}
+
+static int
+__dht_migration_cleanup_src_file (xlator_t *this, loc_t *loc, fd_t *fd,
+ xlator_t *from, ia_prot_t *src_ia_prot)
+{
+ int ret = -1;
+ dht_conf_t *conf = NULL;
+ struct iatt new_stbuf = {0,};
+
+ if (!this || !fd || !from || !src_ia_prot) {
+ goto out;
+ }
+
+ conf = this->private;
+
+ /*Revert source mode and xattr changes*/
+ ret = syncop_fstat (from, fd, &new_stbuf, NULL, NULL);
+ if (ret < 0) {
+ /* Failed to get the stat info */
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file cleanup failed: failed to fstat "
+ "file %s on %s ", loc->path, from->name);
+ ret = -1;
+ goto out;
+ }
+
+
+ /* Remove the sticky bit and sgid bit set, reset it to 0*/
+ if (!src_ia_prot->sticky)
+ new_stbuf.ia_prot.sticky = 0;
+
+ if (!src_ia_prot->sgid)
+ new_stbuf.ia_prot.sgid = 0;
+
+ ret = syncop_fsetattr (from, fd, &new_stbuf,
+ (GF_SET_ATTR_GID | GF_SET_ATTR_MODE),
+ NULL, NULL, NULL, NULL);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file cleanup failed:"
+ "%s: failed to perform fsetattr on %s ",
+ loc->path, from->name);
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_fremovexattr (from, fd, conf->link_xattr_name, 0, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to remove linkto xattr on %s (%s)",
+ loc->path, from->name, strerror (-ret));
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+
+
+/*
+ return values:
+
+ -1 : failure
+ 0 : successfully migrated data
+ 1 : not a failure, but we can't migrate data as of now
+*/
+int
+dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
+ int flag)
+{
+ int ret = -1;
+ struct iatt new_stbuf = {0,};
+ struct iatt stbuf = {0,};
+ struct iatt empty_iatt = {0,};
+ ia_prot_t src_ia_prot = {0,};
+ fd_t *src_fd = NULL;
+ fd_t *dst_fd = NULL;
+ dict_t *dict = NULL;
+ dict_t *xattr = NULL;
+ dict_t *xattr_rsp = NULL;
+ int file_has_holes = 0;
+ dht_conf_t *conf = this->private;
+ int rcvd_enoent_from_src = 0;
+ struct gf_flock flock = {0, };
+ struct gf_flock plock = {0, };
+ loc_t tmp_loc = {0, };
+ gf_boolean_t locked = _gf_false;
+ gf_boolean_t p_locked = _gf_false;
+ int lk_ret = -1;
+ gf_defrag_info_t *defrag = NULL;
+ gf_boolean_t clean_src = _gf_false;
+ gf_boolean_t clean_dst = _gf_false;
+ int log_level = GF_LOG_INFO;
+ gf_boolean_t delete_src_linkto = _gf_true;
+ lock_migration_info_t locklist;
+ dict_t *meta_dict = NULL;
+ gf_boolean_t meta_locked = _gf_false;
+
+ defrag = conf->defrag;
+ if (!defrag)
+ goto out;
+
+ if (defrag->tier_conf.is_tier)
+ log_level = GF_LOG_TRACE;
+
+ gf_log (this->name,
+ log_level, "%s: attempting to move from %s to %s",
+ loc->path, from->name, to->name);
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_set_int32 (dict, conf->link_xattr_name, 256);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: failed to set 'linkto' key in dict", loc->path);
+ goto out;
+ }
+
+
+ /* Do not migrate file in case lock migration is not enabled on the
+ * volume*/
+ if (!defrag->lock_migration_enabled) {
+ ret = dict_set_int32 (dict,
+ GLUSTERFS_POSIXLK_COUNT, sizeof(int32_t));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed: %s: failed to "
+ "set "GLUSTERFS_POSIXLK_COUNT" key in dict",
+ loc->path);
+ goto out;
+ }
+ } else {
+ gf_msg (this->name, GF_LOG_INFO, 0, 0, "locks will be migrated"
+ " for file: %s", loc->path);
+ }
+
+ flock.l_type = F_WRLCK;
+
+ tmp_loc.inode = inode_ref (loc->inode);
+ gf_uuid_copy (tmp_loc.gfid, loc->gfid);
+ tmp_loc.path = gf_strdup(loc->path);
+
+ ret = syncop_inodelk (from, DHT_FILE_MIGRATE_DOMAIN, &tmp_loc, F_SETLKW,
+ &flock, NULL, NULL);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "migrate file failed: "
+ "%s: failed to lock file on %s (%s)",
+ loc->path, from->name, strerror (-ret));
+ ret = -1;
+ goto out;
+ }
+
+ locked = _gf_true;
+
+ /* Phase 1 - Data migration is in progress from now on */
+ ret = syncop_lookup (from, loc, &stbuf, NULL, dict, &xattr_rsp);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: lookup failed on %s (%s)",
+ loc->path, from->name, strerror (-ret));
+ ret = -1;
+ goto out;
+ }
+
+ /* we no more require this key */
+ dict_del (dict, conf->link_xattr_name);
+
+ /* preserve source mode, so set the same to the destination */
+ src_ia_prot = stbuf.ia_prot;
+
+ /* Check if file can be migrated */
+ ret = __is_file_migratable (this, loc, &stbuf, xattr_rsp, flag, defrag);
+ if (ret) {
+ if (ret == -2)
+ ret = 0;
+ goto out;
+ }
+ /* Take care of the special files */
+ if (!IA_ISREG (stbuf.ia_type)) {
+ /* Special files */
+ ret = migrate_special_files (this, from, to, loc, &stbuf);
+ goto out;
+ }
+
+
+ /* TODO: move all xattr related operations to fd based operations */
+ ret = syncop_listxattr (from, loc, &xattr, NULL, NULL);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: failed to get xattr from %s (%s)",
+ loc->path, from->name, strerror (-ret));
+ ret = -1;
+ }
+
+ /* create the destination, with required modes/xattr */
+ ret = __dht_rebalance_create_dst_file (to, from, loc, &stbuf,
+ &dst_fd, xattr);
+ if (ret)
+ goto out;
+
+ clean_dst = _gf_true;
+
+ ret = __dht_check_free_space (to, from, loc, &stbuf, flag);
+
+ if (ret) {
+ goto out;
+ }
+
+ /* Open the source, and also update mode/xattr */
+ ret = __dht_rebalance_open_src_file (from, to, loc, &stbuf, &src_fd,
+ &clean_src);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed: failed to open %s on %s",
+ loc->path, from->name);
+ goto out;
+ }
+
+
+ ret = syncop_fstat (from, src_fd, &stbuf, NULL, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:failed to lookup %s on %s ",
+ loc->path, from->name);
+ ret = -1;
+ goto out;
+ }
+
+ /* Try to preserve 'holes' while migrating data */
+ if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE))
+ file_has_holes = 1;
+
+
+ /* All I/O happens in this function */
+ if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) {
+ ret = __tier_migrate_data (defrag, from, to, src_fd, dst_fd,
+ stbuf.ia_size, file_has_holes);
+ } else {
+ ret = __dht_rebalance_migrate_data (from, to, src_fd, dst_fd,
+ stbuf.ia_size, file_has_holes);
+ }
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed: %s: failed to migrate data",
+ loc->path);
+
+ ret = -1;
+ goto out;
+ }
+
+ /* TODO: Sync the locks */
+
+ ret = syncop_fsync (to, dst_fd, 0, NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to fsync on %s (%s)",
+ loc->path, to->name, strerror (-ret));
+ ret = -1;
+ }
+
+
+ /* Phase 2 - Data-Migration Complete, Housekeeping updates pending */
+
+ ret = syncop_fstat (from, src_fd, &new_stbuf, NULL, NULL);
+ if (ret < 0) {
+ /* Failed to get the stat info */
+ gf_msg ( this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed: failed to fstat file %s on %s ",
+ loc->path, from->name);
+ ret = -1;
+ goto out;
+ }
+
+ /* Lock the entire source file to prevent clients from taking a
+ lock on it as dht_lk does not handle file migration.
+
+ This still leaves a small window where conflicting locks can
+ be granted to different clients. If client1 requests a blocking
+ lock on the src file, it will be granted after the migrating
+ process releases its lock. If client2 requests a lock on the dst
+ data file, it will also be granted, but all FOPs will be redirected
+ to the dst data file.
+ */
+
+ /* Take meta lock */
+
+ if (defrag->lock_migration_enabled) {
+ meta_dict = dict_new ();
+ if (!meta_dict) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Trace dict_new failed");
+
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str (meta_dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes");
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value: key = %s,"
+ " path = %s", GLUSTERFS_INTERNAL_FOP_KEY,
+ loc->path);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_int32 (meta_dict, GF_META_LOCK_KEY, 1);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Trace dict_set failed");
+
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_setxattr (from, loc, meta_dict, 0, NULL, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Trace syncop_setxattr metalock failed");
+
+ ret = -1;
+ goto out;
+ } else {
+ meta_locked = _gf_true;
+ }
+ }
+
+ if (!defrag->lock_migration_enabled) {
+ plock.l_type = F_WRLCK;
+ plock.l_start = 0;
+ plock.l_len = 0;
+ plock.l_whence = SEEK_SET;
+
+ ret = syncop_lk (from, src_fd, F_SETLK, &plock, NULL, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: Failed to lock on %s",
+ loc->path, from->name);
+ ret = -1;
+ goto out;
+ }
+
+ p_locked = _gf_true;
+
+ } else {
+
+ INIT_LIST_HEAD (&locklist.list);
+
+ ret = syncop_getactivelk (from, loc, &locklist, NULL, NULL);
+ if (ret == 0) {
+ gf_log (this->name, GF_LOG_INFO, "No active locks on:%s"
+ , loc->path);
+
+ } else if (ret > 0) {
+
+ ret = syncop_setactivelk (to, loc, &locklist, NULL,
+ NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOCK_MIGRATION_FAILED,
+ "write lock failed on:%s", loc->path);
+
+ ret = -1;
+ goto metaunlock;
+ }
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOCK_MIGRATION_FAILED,
+ "getactivelk failed for file: %s", loc->path);
+ }
+ }
+
+
+ /* source would have both sticky bit and sgid bit set, reset it to 0,
+ and set the source permission on destination, if it was not set
+ prior to setting rebalance-modes in source */
+ if (!src_ia_prot.sticky)
+ new_stbuf.ia_prot.sticky = 0;
+
+ if (!src_ia_prot.sgid)
+ new_stbuf.ia_prot.sgid = 0;
+
+ /* TODO: if the source actually had sticky bit, or sgid bit set,
+ we are not handling it */
+
+ ret = syncop_fsetattr (to, dst_fd, &new_stbuf,
+ (GF_SET_ATTR_UID | GF_SET_ATTR_GID |
+ GF_SET_ATTR_MODE), NULL, NULL, NULL, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: failed to perform setattr on %s ",
+ loc->path, to->name);
+ ret = -1;
+ goto metaunlock;
+ }
+
+ /* Because 'futimes' is not portable */
+ ret = syncop_setattr (to, loc, &new_stbuf,
+ (GF_SET_ATTR_MTIME | GF_SET_ATTR_ATIME),
+ NULL, NULL, NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to perform setattr on %s ",
+ loc->path, to->name);
+ ret = -1;
+ }
+
+
+ clean_dst = _gf_false;
+
+ /* Posix acls are not set on DHT linkto files as part of the initial
+ * initial xattrs set on the dst file, so these need
+ * to be set on the dst file after the linkto attrs are removed.
+ * TODO: Optimize this.
+ */
+ if (xattr) {
+ dict_unref (xattr);
+ xattr = NULL;
+ }
+
+ ret = syncop_listxattr (from, loc, &xattr, NULL, NULL);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: failed to get xattr from %s (%s)",
+ loc->path, from->name, strerror (-ret));
+ ret = -1;
+ } else {
+ ret = syncop_setxattr (to, loc, xattr, 0, NULL, NULL);
+ if (ret < 0) {
+ /* Potential problem here where Posix ACLs will
+ * not be set on the target file */
+
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: failed to set xattr on %s (%s)",
+ loc->path, to->name, strerror (-ret));
+ ret = -1;
+ }
+ }
+
+ /* store size of previous migrated file */
+ if (defrag->tier_conf.is_tier) {
+ if (from != TIER_HASHED_SUBVOL) {
+ defrag->tier_conf.st_last_promoted_size = stbuf.ia_size;
+ } else {
+ /* Don't delete the linkto file on the hashed subvol */
+ delete_src_linkto = _gf_false;
+ defrag->tier_conf.st_last_demoted_size = stbuf.ia_size;
+ }
+ }
+
+ /* The src file is being unlinked after this so we don't need
+ to clean it up */
+ clean_src = _gf_false;
+
+ /* Make the source as a linkfile first before deleting it */
+ empty_iatt.ia_prot.sticky = 1;
+ ret = syncop_fsetattr (from, src_fd, &empty_iatt,
+ GF_SET_ATTR_MODE, NULL, NULL, NULL, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: failed to perform setattr on %s ",
+ loc->path, from->name);
+ ret = -1;
+ goto metaunlock;
+ }
+
+ /* Free up the data blocks on the source node, as the whole
+ file is migrated */
+ ret = syncop_ftruncate (from, src_fd, 0, NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to perform truncate on %s (%s)",
+ loc->path, from->name, strerror (-ret));
+ ret = -1;
+ }
+
+ /* remove the 'linkto' xattr from the destination */
+ ret = syncop_fremovexattr (to, dst_fd, conf->link_xattr_name, 0, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to perform removexattr on %s (%s)",
+ loc->path, to->name, strerror (-ret));
+ ret = -1;
+ }
+
+ /* Do a stat and check the gfid before unlink */
+
+ /*
+ * Cached file changes its state from non-linkto to linkto file after
+ * migrating data. If lookup from any other mount-point is performed,
+ * converted-linkto-cached file will be treated as a stale and will be
+ * unlinked. But by this time, file is already migrated. So further
+ * failure because of ENOENT should not be treated as error
+ */
+
+ ret = syncop_stat (from, loc, &empty_iatt, NULL, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to do a stat on %s (%s)",
+ loc->path, from->name, strerror (-ret));
+
+ if (-ret != ENOENT) {
+ ret = -1;
+ goto metaunlock;
+ }
+
+ rcvd_enoent_from_src = 1;
+ }
+
+
+ if ((gf_uuid_compare (empty_iatt.ia_gfid, loc->gfid) == 0 ) &&
+ (!rcvd_enoent_from_src) && delete_src_linkto) {
+ /* take out the source from namespace */
+ ret = syncop_unlink (from, loc, NULL, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to perform unlink on %s (%s)",
+ loc->path, from->name, strerror (-ret));
+ ret = -1;
+ goto metaunlock;
+ }
+ }
+
+ ret = syncop_lookup (this, loc, NULL, NULL, NULL, NULL);
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "%s: failed to lookup the file on subvolumes (%s)",
+ loc->path, strerror (-ret));
+ ret = -1;
+ }
+
+ gf_msg (this->name, log_level, 0,
+ DHT_MSG_MIGRATE_FILE_COMPLETE,
+ "completed migration of %s from subvolume %s to %s",
+ loc->path, from->name, to->name);
+
+ ret = 0;
+
+metaunlock:
+
+ if (defrag->lock_migration_enabled && meta_locked) {
+
+ dict_del (meta_dict, GF_META_LOCK_KEY);
+
+ ret = dict_set_int32 (meta_dict, GF_META_UNLOCK_KEY, 1);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Trace dict_set failed");
+
+ ret = -1;
+ goto out;
+ }
+
+ if (clean_dst == _gf_false)
+ ret = dict_set_int32 (meta_dict, "status", 1);
+ else
+ ret = dict_set_int32 (meta_dict, "status", 0);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Trace dict_set failed");
+
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_setxattr (from, loc, meta_dict, 0, NULL, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Trace syncop_setxattr meta unlock failed");
+
+ ret = -1;
+ goto out;
+ }
+ }
+
+out:
+ if (clean_src) {
+ /* Revert source mode and xattr changes*/
+ lk_ret = __dht_migration_cleanup_src_file (this, loc, src_fd,
+ from, &src_ia_prot);
+ if (lk_ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to cleanup source file on %s",
+ loc->path, from->name);
+ }
+ }
+
+ /* reset the destination back to 0 */
+ if (clean_dst) {
+ lk_ret = syncop_ftruncate (to, dst_fd, 0, NULL, NULL);
+ if (lk_ret) {
+ gf_msg (this->name, GF_LOG_ERROR, -lk_ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed: "
+ "%s: failed to reset target size back to 0",
+ loc->path);
+ }
+ }
+
+ if (locked) {
+ flock.l_type = F_UNLCK;
+
+ lk_ret = syncop_inodelk (from, DHT_FILE_MIGRATE_DOMAIN,
+ &tmp_loc, F_SETLK, &flock, NULL, NULL);
+ if (lk_ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to unlock file on %s (%s)",
+ loc->path, from->name, strerror (-lk_ret));
+ }
+ }
+
+ if (p_locked) {
+ plock.l_type = F_UNLCK;
+ lk_ret = syncop_lk (from, src_fd, F_SETLK, &plock, NULL, NULL);
+
+ if (lk_ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, -lk_ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to unlock file on %s",
+ loc->path, from->name);
+ }
+ }
+
+ if (dict)
+ dict_unref (dict);
+
+ if (xattr)
+ dict_unref (xattr);
+ if (xattr_rsp)
+ dict_unref (xattr_rsp);
+
+ if (dst_fd)
+ syncop_close (dst_fd);
+ if (src_fd)
+ syncop_close (src_fd);
+
+ loc_wipe (&tmp_loc);
+
+ return ret;
+}
+
+static int
+rebalance_task (void *data)
+{
+ int ret = -1;
+ dht_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+
+ frame = data;
+
+ local = frame->local;
+
+ /* This function is 'synchrounous', hence if it returns,
+ we are done with the task */
+ ret = dht_migrate_file (THIS, &local->loc, local->rebalance.from_subvol,
+ local->rebalance.target_node, local->flags);
+
+ return ret;
+}
+
+static int
+rebalance_task_completion (int op_ret, call_frame_t *sync_frame, void *data)
+{
+ int ret = -1;
+ uint64_t layout_int = 0;
+ dht_layout_t *layout = 0;
+ xlator_t *this = NULL;
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+
+ this = THIS;
+ local = sync_frame->local;
+
+ if (!op_ret) {
+ /* Make sure we have valid 'layout' in inode ctx
+ after the operation */
+ ret = inode_ctx_del (local->loc.inode, this, &layout_int);
+ if (!ret && layout_int) {
+ layout = (dht_layout_t *)(long)layout_int;
+ dht_layout_unref (this, layout);
+ }
+
+ ret = dht_layout_preset (this, local->rebalance.target_node,
+ local->loc.inode);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set inode ctx", local->loc.path);
+ }
+
+ if (op_ret == -1) {
+ /* Failure of migration process, mostly due to write process.
+ as we can't preserve the exact errno, lets say there was
+ no space to migrate-data
+ */
+ op_errno = ENOSPC;
+ }
+
+ if (op_ret == 1) {
+ /* migration didn't happen, but is not a failure, let the user
+ understand that he doesn't have permission to migrate the
+ file.
+ */
+ op_ret = -1;
+ op_errno = EPERM;
+ }
+
+ DHT_STACK_UNWIND (setxattr, sync_frame, op_ret, op_errno, NULL);
+ return 0;
+}
+
+int
+dht_start_rebalance_task (xlator_t *this, call_frame_t *frame)
+{
+ int ret = -1;
+
+ ret = synctask_new (this->ctx->env, rebalance_task,
+ rebalance_task_completion,
+ frame, frame);
+ return ret;
+}
+
+int
+gf_listener_stop (xlator_t *this)
+{
+ glusterfs_ctx_t *ctx = NULL;
+ cmd_args_t *cmd_args = NULL;
+ int ret = 0;
+
+ ctx = this->ctx;
+ GF_ASSERT (ctx);
+ cmd_args = &ctx->cmd_args;
+ if (cmd_args->sock_file) {
+ ret = sys_unlink (cmd_args->sock_file);
+ if (ret && (ENOENT == errno)) {
+ ret = 0;
+ }
+ }
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ DHT_MSG_SOCKET_ERROR,
+ "Failed to unlink listener "
+ "socket %s", cmd_args->sock_file);
+ }
+ return ret;
+}
+
+void
+dht_build_root_inode (xlator_t *this, inode_t **inode)
+{
+ inode_table_t *itable = NULL;
+ uuid_t root_gfid = {0, };
+
+ itable = inode_table_new (0, this);
+ if (!itable)
+ return;
+
+ root_gfid[15] = 1;
+ *inode = inode_find (itable, root_gfid);
+}
+
+void
+dht_build_root_loc (inode_t *inode, loc_t *loc)
+{
+ loc->path = "/";
+ loc->inode = inode;
+ loc->inode->ia_type = IA_IFDIR;
+ memset (loc->gfid, 0, 16);
+ loc->gfid[15] = 1;
+}
+
+
+/* return values: 1 -> error, bug ignore and continue
+ 0 -> proceed
+ -1 -> error, handle it */
+int32_t
+gf_defrag_handle_migrate_error (int32_t op_errno, gf_defrag_info_t *defrag)
+{
+ /* if errno is not ENOSPC or ENOTCONN, we can still continue
+ with rebalance process */
+ if ((op_errno != ENOSPC) || (op_errno != ENOTCONN))
+ return 1;
+
+ if (op_errno == ENOTCONN) {
+ /* Most probably mount point went missing (mostly due
+ to a brick down), say rebalance failure to user,
+ let him restart it if everything is fine */
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+ return -1;
+ }
+
+ if (op_errno == ENOSPC) {
+ /* rebalance process itself failed, may be
+ remote brick went down, or write failed due to
+ disk full etc etc.. */
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+ return -1;
+ }
+
+ return 0;
+}
+
+static gf_boolean_t
+gf_defrag_pattern_match (gf_defrag_info_t *defrag, char *name, uint64_t size)
+{
+ gf_defrag_pattern_list_t *trav = NULL;
+ gf_boolean_t match = _gf_false;
+ gf_boolean_t ret = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("dht", defrag, out);
+
+ trav = defrag->defrag_pattern;
+ while (trav) {
+ if (!fnmatch (trav->path_pattern, name, FNM_NOESCAPE)) {
+ match = _gf_true;
+ break;
+ }
+ trav = trav->next;
+ }
+
+ if ((match == _gf_true) && (size >= trav->size))
+ ret = _gf_true;
+
+ out:
+ return ret;
+}
+
+int dht_dfreaddirp_done (dht_dfoffset_ctx_t *offset_var, int cnt) {
+
+ int i;
+ int result = 1;
+
+ for (i = 0; i < cnt; i++) {
+ if (offset_var[i].readdir_done == 0) {
+ result = 0;
+ break;
+ }
+ }
+ return result;
+}
+
+int static
+gf_defrag_ctx_subvols_init (dht_dfoffset_ctx_t *offset_var, xlator_t *this) {
+
+ int i;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf)
+ return -1;
+
+ for (i = 0; i < conf->local_subvols_cnt; i++) {
+ offset_var[i].this = conf->local_subvols[i];
+ offset_var[i].offset = (off_t) 0;
+ offset_var[i].readdir_done = 0;
+ }
+
+ return 0;
+}
+
+int
+gf_defrag_migrate_single_file (void *opaque)
+{
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ gf_defrag_info_t *defrag = NULL;
+ int ret = 0;
+ gf_dirent_t *entry = NULL;
+ struct timeval start = {0,};
+ loc_t entry_loc = {0,};
+ loc_t *loc = NULL;
+ struct iatt iatt = {0,};
+ dict_t *migrate_data = NULL;
+ int32_t op_errno = 0;
+ struct timeval end = {0,};
+ double elapsed = {0,};
+ struct dht_container *rebal_entry = NULL;
+ inode_t *inode = NULL;
+
+ rebal_entry = (struct dht_container *)opaque;
+ if (!rebal_entry) {
+ gf_log (this->name, GF_LOG_ERROR, "rebal_entry is NULL");
+ ret = -1;
+ goto out;
+ }
+
+ this = rebal_entry->this;
+
+ conf = this->private;
+
+ defrag = conf->defrag;
+
+ loc = rebal_entry->parent_loc;
+
+ migrate_data = rebal_entry->migrate_data;
+
+ entry = rebal_entry->df_entry;
+
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+ ret = -1;
+ goto out;
+ }
+
+ if (defrag->stats == _gf_true) {
+ gettimeofday (&start, NULL);
+ }
+
+ if (defrag->defrag_pattern &&
+ (gf_defrag_pattern_match (defrag, entry->d_name,
+ entry->d_stat.ia_size) == _gf_false)) {
+ gf_log (this->name, GF_LOG_ERROR, "pattern_match failed");
+ goto out;
+ }
+
+ memset (&entry_loc, 0, sizeof (entry_loc));
+
+ ret = dht_build_child_loc (this, &entry_loc, loc, entry->d_name);
+ if (ret) {
+ LOCK (&defrag->lock);
+ {
+ defrag->total_failures += 1;
+ }
+ UNLOCK (&defrag->lock);
+
+ ret = 0;
+
+ gf_log (this->name, GF_LOG_ERROR, "Child loc build failed");
+
+ goto out;
+ }
+
+ gf_uuid_copy (entry_loc.gfid, entry->d_stat.ia_gfid);
+
+ gf_uuid_copy (entry_loc.pargfid, loc->gfid);
+
+ ret = syncop_lookup (this, &entry_loc, &iatt, NULL, NULL, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed: %s lookup failed",
+ entry_loc.name);
+ ret = 0;
+ goto out;
+ }
+
+ inode = inode_link (entry_loc.inode, entry_loc.parent, entry->d_name, &iatt);
+ inode_unref (entry_loc.inode);
+ /* use the inode returned by inode_link */
+ entry_loc.inode = inode;
+
+ ret = syncop_setxattr (this, &entry_loc, migrate_data, 0, NULL, NULL);
+ if (ret < 0) {
+ op_errno = -ret;
+ /* errno is overloaded. See
+ * rebalance_task_completion () */
+ if (op_errno == ENOSPC) {
+ gf_msg_debug (this->name, 0, "migrate-data skipped for"
+ " %s due to space constraints",
+ entry_loc.path);
+ LOCK (&defrag->lock);
+ {
+ defrag->skipped += 1;
+ }
+ UNLOCK (&defrag->lock);
+ } else if (op_errno == ENOTSUP) {
+ gf_msg_debug (this->name, 0, "migrate-data skipped for"
+ " hardlink %s ", entry_loc.path);
+ LOCK (&defrag->lock);
+ {
+ defrag->skipped += 1;
+ }
+ UNLOCK (&defrag->lock);
+ } else if (op_errno != EEXIST) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "migrate-data failed for %s", entry_loc.path);
+
+ LOCK (&defrag->lock);
+ {
+ defrag->total_failures += 1;
+ }
+ UNLOCK (&defrag->lock);
+
+ }
+
+ ret = gf_defrag_handle_migrate_error (op_errno, defrag);
+
+ if (!ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "migrate-data on %s failed: %s", entry_loc.path,
+ strerror (op_errno));
+ } else if (ret == 1) {
+ ret = 0;
+ goto out;
+ } else if (ret == -1) {
+ goto out;
+ }
+ } else if (ret > 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "migrate-data failed for %s", entry_loc.path);
+ ret = 0;
+ LOCK (&defrag->lock);
+ {
+ defrag->total_failures += 1;
+ }
+ UNLOCK (&defrag->lock);
+ }
+
+ LOCK (&defrag->lock);
+ {
+ defrag->total_files += 1;
+ defrag->total_data += iatt.ia_size;
+ }
+ UNLOCK (&defrag->lock);
+
+ if (defrag->stats == _gf_true) {
+ gettimeofday (&end, NULL);
+ elapsed = (end.tv_sec - start.tv_sec) * 1e6 +
+ (end.tv_usec - start.tv_usec);
+ gf_log (this->name, GF_LOG_INFO, "Migration of "
+ "file:%s size:%"PRIu64" bytes took %.2f"
+ "secs and ret: %d", entry_loc.name,
+ iatt.ia_size, elapsed/1e6, ret);
+ }
+
+out:
+ loc_wipe (&entry_loc);
+
+ return ret;
+
+}
+
+void *
+gf_defrag_task (void *opaque)
+{
+ struct list_head *q_head = NULL;
+ struct dht_container *iterator = NULL;
+ gf_defrag_info_t *defrag = NULL;
+ int ret = 0;
+
+
+ defrag = (gf_defrag_info_t *)opaque;
+ if (!defrag) {
+ gf_msg ("dht", GF_LOG_ERROR, 0, 0, "defrag is NULL");
+ goto out;
+ }
+
+ q_head = &(defrag->queue[0].list);
+
+ /* The following while loop will dequeue one entry from the defrag->queue
+ under lock. We will update the defrag->global_error only when there
+ is an error which is critical to stop the rebalance process. The stop
+ message will be intimated to other migrator threads by setting the
+ defrag->defrag_status to GF_DEFRAG_STATUS_FAILED.
+
+ In defrag->queue, a low watermark (MIN_MIGRATE_QUEUE_COUNT) is
+ maintained so that crawler does not starve the file migration
+ workers and a high watermark (MAX_MIGRATE_QUEUE_COUNT) so that
+ crawler does not go far ahead in filling up the queue.
+ */
+
+ while (_gf_true) {
+
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+ pthread_cond_broadcast (
+ &defrag->rebalance_crawler_alarm);
+ pthread_cond_broadcast (
+ &defrag->parallel_migration_cond);
+ goto out;
+ }
+
+ pthread_mutex_lock (&defrag->dfq_mutex);
+ {
+
+ /*Throttle down:
+ If the reconfigured count is less than current thread
+ count, then the current thread will sleep */
+
+ /*TODO: Need to refactor the following block to work
+ *under defrag->lock. For now access
+ * defrag->current_thread_count and rthcount under
+ * dfq_mutex lock */
+ while (!defrag->crawl_done &&
+ (defrag->recon_thread_count <
+ defrag->current_thread_count)) {
+ defrag->current_thread_count--;
+ gf_log ("DHT", GF_LOG_INFO,
+ "Thread sleeping. "
+ "defrag->current_thread_count: %d",
+ defrag->current_thread_count);
+
+ pthread_cond_wait (
+ &defrag->df_wakeup_thread,
+ &defrag->dfq_mutex);
+
+ defrag->current_thread_count++;
+
+ gf_log ("DHT", GF_LOG_INFO,
+ "Thread wokeup. "
+ "defrag->current_thread_count: %d",
+ defrag->current_thread_count);
+ }
+
+ if (defrag->q_entry_count) {
+ iterator = list_entry (q_head->next,
+ typeof(*iterator), list);
+
+ gf_msg_debug ("DHT", 0, "picking entry "
+ "%s", iterator->df_entry->d_name);
+
+ list_del_init (&(iterator->list));
+
+ defrag->q_entry_count--;
+
+ if ((defrag->q_entry_count <
+ MIN_MIGRATE_QUEUE_COUNT) &&
+ defrag->wakeup_crawler) {
+ pthread_cond_broadcast (
+ &defrag->rebalance_crawler_alarm);
+ }
+ pthread_mutex_unlock (&defrag->dfq_mutex);
+ ret = gf_defrag_migrate_single_file
+ ((void *)iterator);
+
+ /*Critical errors: ENOTCONN and ENOSPACE*/
+ if (ret) {
+ dht_set_global_defrag_error
+ (defrag, ret);
+
+ defrag->defrag_status =
+ GF_DEFRAG_STATUS_FAILED;
+
+ pthread_cond_broadcast (
+ &defrag->rebalance_crawler_alarm);
+
+ pthread_cond_broadcast (
+ &defrag->parallel_migration_cond);
+
+ goto out;
+ }
+
+ gf_defrag_free_container (iterator);
+
+ continue;
+ } else {
+
+ /* defrag->crawl_done flag is set means crawling
+ file system is done and hence a list_empty when
+ the above flag is set indicates there are no more
+ entries to be added to the queue and rebalance is
+ finished */
+
+ if (!defrag->crawl_done) {
+ pthread_cond_wait (
+ &defrag->parallel_migration_cond,
+ &defrag->dfq_mutex);
+ }
+
+ if (defrag->crawl_done &&
+ !defrag->q_entry_count) {
+ pthread_cond_broadcast (
+ &defrag->parallel_migration_cond);
+ goto unlock;
+ } else {
+ pthread_mutex_unlock
+ (&defrag->dfq_mutex);
+ continue;
+ }
+ }
+
+ }
+unlock:
+ pthread_mutex_unlock (&defrag->dfq_mutex);
+ break;
+ }
+out:
+ return NULL;
+}
+
+int static
+gf_defrag_get_entry (xlator_t *this, int i, struct dht_container **container,
+ loc_t *loc, dht_conf_t *conf, gf_defrag_info_t *defrag,
+ fd_t *fd, dict_t *migrate_data,
+ struct dir_dfmeta *dir_dfmeta, dict_t *xattr_req,
+ int *should_commit_hash)
+{
+ int ret = -1;
+ char is_linkfile = 0;
+ gf_dirent_t *df_entry = NULL;
+ loc_t entry_loc = {0,};
+ dict_t *xattr_rsp = NULL;
+ struct iatt iatt = {0,};
+ struct dht_container *tmp_container = NULL;
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *cached_subvol = NULL;
+
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+ ret = -1;
+ goto out;
+ }
+
+ if (dir_dfmeta->offset_var[i].readdir_done == 1) {
+ ret = 0;
+ goto out;
+ }
+
+ if (dir_dfmeta->fetch_entries[i] == 1) {
+ ret = syncop_readdirp (conf->local_subvols[i], fd, 131072,
+ dir_dfmeta->offset_var[i].offset,
+ &(dir_dfmeta->equeue[i]),
+ NULL, NULL);
+ if (ret == 0) {
+ dir_dfmeta->offset_var[i].readdir_done = 1;
+ ret = 0;
+ goto out;
+ }
+
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_DATA_FAILED,
+ "%s: Migrate data failed: Readdir returned"
+ " %s. Aborting migrate-data", loc->path,
+ strerror(-ret));
+ ret = -1;
+ goto out;
+ }
+
+ if (list_empty (&(dir_dfmeta->equeue[i].list))) {
+ dir_dfmeta->offset_var[i].readdir_done = 1;
+ ret = 0;
+ goto out;
+ }
+
+ dir_dfmeta->fetch_entries[i] = 0;
+ }
+
+ while (1) {
+
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+ ret = -1;
+ goto out;
+ }
+
+ df_entry = list_entry (dir_dfmeta->iterator[i]->next,
+ typeof (*df_entry), list);
+
+ if (&df_entry->list == dir_dfmeta->head[i]) {
+ gf_dirent_free (&(dir_dfmeta->equeue[i]));
+ INIT_LIST_HEAD (&(dir_dfmeta->equeue[i].list));
+ dir_dfmeta->fetch_entries[i] = 1;
+ dir_dfmeta->iterator[i] = dir_dfmeta->head[i];
+ ret = 0;
+ goto out;
+ }
+
+ dir_dfmeta->iterator[i] = dir_dfmeta->iterator[i]->next;
+
+ dir_dfmeta->offset_var[i].offset = df_entry->d_off;
+ if (!strcmp (df_entry->d_name, ".") ||
+ !strcmp (df_entry->d_name, ".."))
+ continue;
+
+ if (IA_ISDIR (df_entry->d_stat.ia_type))
+ continue;
+
+ defrag->num_files_lookedup++;
+
+ if (defrag->defrag_pattern &&
+ (gf_defrag_pattern_match (defrag, df_entry->d_name,
+ df_entry->d_stat.ia_size)
+ == _gf_false)) {
+ continue;
+ }
+
+ loc_wipe (&entry_loc);
+ ret = dht_build_child_loc (this, &entry_loc, loc,
+ df_entry->d_name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Child loc"
+ " build failed");
+ ret = -1;
+ goto out;
+ }
+
+ if (gf_uuid_is_null (df_entry->d_stat.ia_gfid)) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_GFID_NULL,
+ "%s/%s gfid not present", loc->path,
+ df_entry->d_name);
+ continue;
+ }
+
+ gf_uuid_copy (entry_loc.gfid, df_entry->d_stat.ia_gfid);
+
+ if (gf_uuid_is_null (loc->gfid)) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_GFID_NULL,
+ "%s/%s gfid not present", loc->path,
+ df_entry->d_name);
+ continue;
+ }
+
+ gf_uuid_copy (entry_loc.pargfid, loc->gfid);
+
+ entry_loc.inode->ia_type = df_entry->d_stat.ia_type;
+
+ if (xattr_rsp) {
+ dict_unref (xattr_rsp);
+ xattr_rsp = NULL;
+ }
+
+ ret = syncop_lookup (conf->local_subvols[i], &entry_loc,
+ &iatt, NULL, xattr_req, &xattr_rsp);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:%s lookup failed",
+ entry_loc.path);
+
+ if (-ret != ENOENT && -ret != ESTALE) {
+
+ defrag->total_failures++;
+
+ if (conf->decommission_in_progress) {
+ ret = -1;
+ goto out;
+ } else {
+ *should_commit_hash = 0;
+ continue;
+ }
+ }
+
+ continue;
+ }
+
+
+ is_linkfile = check_is_linkfile (NULL, &iatt, xattr_rsp,
+ conf->link_xattr_name);
+
+ if (is_linkfile) {
+ /* No need to add linkto file to the queue for
+ migration. Only the actual data file need to
+ be checked for migration criteria.
+ */
+ gf_msg_debug (this->name, 0, "Skipping linkfile"
+ " %s on subvol: %s", entry_loc.path,
+ conf->local_subvols[i]->name);
+ continue;
+ }
+
+
+ ret = syncop_lookup (this, &entry_loc, NULL, NULL,
+ NULL, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:%s lookup failed",
+ entry_loc.path);
+
+ if (-ret != ENOENT && -ret != ESTALE) {
+
+ defrag->total_failures++;
+
+ if (conf->decommission_in_progress) {
+ ret = -1;
+ goto out;
+ } else {
+ *should_commit_hash = 0;
+ continue;
+ }
+ }
+
+ continue;
+ }
+
+ /* if distribute is present, it will honor this key.
+ * -1, ENODATA is returned if distribute is not present
+ * or file doesn't have a link-file. If file has
+ * link-file, the path of link-file will be the value,
+ * and also that guarantees that file has to be mostly
+ * migrated */
+
+ hashed_subvol = dht_subvol_get_hashed (this, &entry_loc);
+ if (!hashed_subvol) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "Failed to get hashed subvol for %s",
+ loc->path);
+ continue;
+ }
+
+ cached_subvol = dht_subvol_get_cached (this, entry_loc.inode);
+ if (!cached_subvol) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_CACHED_SUBVOL_GET_FAILED,
+ "Failed to get cached subvol for %s",
+ loc->path);
+
+ continue;
+ }
+
+ if (hashed_subvol == cached_subvol) {
+ continue;
+ }
+
+ /*Build Container Structure */
+
+ tmp_container = GF_CALLOC (1, sizeof(struct dht_container),
+ gf_dht_mt_container_t);
+ if (!tmp_container) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to allocate "
+ "memory for container");
+ ret = -1;
+ goto out;
+ }
+ tmp_container->df_entry = gf_dirent_for_name (df_entry->d_name);
+ if (!tmp_container->df_entry) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to allocate "
+ "memory for df_entry");
+ ret = -1;
+ goto out;
+ }
+
+ tmp_container->df_entry->d_stat = df_entry->d_stat;
+
+ tmp_container->df_entry->d_ino = df_entry->d_ino;
+
+ tmp_container->df_entry->d_type = df_entry->d_type;
+
+ tmp_container->df_entry->d_len = df_entry->d_len;
+
+ tmp_container->parent_loc = GF_CALLOC(1, sizeof(*loc),
+ gf_dht_mt_loc_t);
+ if (!tmp_container->parent_loc) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to allocate "
+ "memory for loc");
+ ret = -1;
+ goto out;
+ }
+
+
+ ret = loc_copy (tmp_container->parent_loc, loc);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "loc_copy failed");
+ ret = -1;
+ goto out;
+ }
+
+ tmp_container->migrate_data = migrate_data;
+
+ tmp_container->this = this;
+
+ if (df_entry->dict)
+ tmp_container->df_entry->dict =
+ dict_ref (df_entry->dict);
+
+ /*Build Container Structue >> END*/
+
+ ret = 0;
+ goto out;
+
+ }
+
+out:
+ loc_wipe (&entry_loc);
+
+ if (ret == 0) {
+ *container = tmp_container;
+ } else {
+ if (tmp_container) {
+ gf_defrag_free_container (tmp_container);
+ }
+ }
+
+ if (xattr_rsp)
+ dict_unref (xattr_rsp);
+ return ret;
+}
+
+int
+gf_defrag_process_dir (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+ dict_t *migrate_data)
+{
+ int ret = -1;
+ fd_t *fd = NULL;
+ dht_conf_t *conf = NULL;
+ gf_dirent_t entries;
+ dict_t *dict = NULL;
+ dict_t *xattr_req = NULL;
+ struct timeval dir_start = {0,};
+ struct timeval end = {0,};
+ double elapsed = {0,};
+ int local_subvols_cnt = 0;
+ int i = 0;
+ int j = 0;
+ struct dht_container *container = NULL;
+ int ldfq_count = 0;
+ int dfc_index = 0;
+ int throttle_up = 0;
+ struct dir_dfmeta *dir_dfmeta = NULL;
+ int should_commit_hash = 1;
+
+ gf_log (this->name, GF_LOG_INFO, "migrate data called on %s",
+ loc->path);
+ gettimeofday (&dir_start, NULL);
+
+ conf = this->private;
+ local_subvols_cnt = conf->local_subvols_cnt;
+
+ if (!local_subvols_cnt) {
+ ret = 0;
+ goto out;
+ }
+
+ fd = fd_create (loc->inode, defrag->pid);
+ if (!fd) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to create fd");
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_opendir (this, loc, fd, NULL, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_DATA_FAILED,
+ "Migrate data failed: Failed to open dir %s",
+ loc->path);
+ ret = -1;
+ goto out;
+ }
+
+ fd_bind (fd);
+ dir_dfmeta = GF_CALLOC (1, sizeof (*dir_dfmeta),
+ gf_common_mt_pointer);
+ if (!dir_dfmeta) {
+ gf_log (this->name, GF_LOG_ERROR, "dir_dfmeta is NULL");
+ ret = -1;
+ goto out;
+ }
+
+
+ dir_dfmeta->head = GF_CALLOC (local_subvols_cnt,
+ sizeof (*(dir_dfmeta->head)),
+ gf_common_mt_pointer);
+ if (!dir_dfmeta->head) {
+ gf_log (this->name, GF_LOG_ERROR, "dir_dfmeta->head is NULL");
+ ret = -1;
+ goto out;
+ }
+
+ dir_dfmeta->iterator = GF_CALLOC (local_subvols_cnt,
+ sizeof (*(dir_dfmeta->iterator)),
+ gf_common_mt_pointer);
+ if (!dir_dfmeta->iterator) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "dir_dfmeta->iterator is NULL");
+ ret = -1;
+ goto out;
+ }
+
+ dir_dfmeta->equeue = GF_CALLOC (local_subvols_cnt, sizeof (entries),
+ gf_dht_mt_dirent_t);
+ if (!dir_dfmeta->equeue) {
+ gf_log (this->name, GF_LOG_ERROR, "dir_dfmeta->equeue is NULL");
+ ret = -1;
+ goto out;
+ }
+
+ dir_dfmeta->offset_var = GF_CALLOC (local_subvols_cnt,
+ sizeof (dht_dfoffset_ctx_t),
+ gf_dht_mt_octx_t);
+ if (!dir_dfmeta->offset_var) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "dir_dfmeta->offset_var is NULL");
+ ret = -1;
+ goto out;
+ }
+ ret = gf_defrag_ctx_subvols_init (dir_dfmeta->offset_var, this);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "dht_dfoffset_ctx_t"
+ "initialization failed");
+ ret = -1;
+ goto out;
+ }
+
+ dir_dfmeta->fetch_entries = GF_CALLOC (local_subvols_cnt,
+ sizeof (int), gf_common_mt_int);
+ if (!dir_dfmeta->fetch_entries) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "dir_dfmeta->fetch_entries is NULL");
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; i < local_subvols_cnt ; i++) {
+ INIT_LIST_HEAD (&(dir_dfmeta->equeue[i].list));
+ dir_dfmeta->head[i] = &(dir_dfmeta->equeue[i].list);
+ dir_dfmeta->iterator[i] = dir_dfmeta->head[i];
+ dir_dfmeta->fetch_entries[i] = 1;
+ }
+
+ xattr_req = dict_new ();
+ if (!xattr_req) {
+ gf_log (this->name, GF_LOG_ERROR, "dict_new failed");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_uint32 (xattr_req,
+ conf->link_xattr_name, 256);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to set dict for "
+ "key: %s", conf->link_xattr_name);
+ ret = -1;
+ goto out;
+ }
+
+ /*
+ Job: Read entries from each local subvol and store the entries
+ in equeue array of linked list. Now pick one entry from the
+ equeue array in a round robin basis and add them to defrag Queue.
+ */
+
+ while (!dht_dfreaddirp_done(dir_dfmeta->offset_var,
+ local_subvols_cnt)) {
+
+ pthread_mutex_lock (&defrag->dfq_mutex);
+ {
+
+ /*Throttle up: If reconfigured count is higher than
+ current thread count, wake up the sleeping threads
+ TODO: Need to refactor this. Instead of making the
+ thread sleep and wake, we should terminate and spawn
+ threads on-demand*/
+
+ if (defrag->recon_thread_count >
+ defrag->current_thread_count) {
+ throttle_up =
+ (defrag->recon_thread_count -
+ defrag->current_thread_count);
+ for (j = 0; j < throttle_up; j++) {
+ pthread_cond_signal (
+ &defrag->df_wakeup_thread);
+ }
+
+ }
+
+ while (defrag->q_entry_count >
+ MAX_MIGRATE_QUEUE_COUNT) {
+ defrag->wakeup_crawler = 1;
+ pthread_cond_wait (
+ &defrag->rebalance_crawler_alarm,
+ &defrag->dfq_mutex);
+ }
+
+ ldfq_count = defrag->q_entry_count;
+
+ if (defrag->wakeup_crawler) {
+ defrag->wakeup_crawler = 0;
+ }
+
+ }
+ pthread_mutex_unlock (&defrag->dfq_mutex);
+
+ while (ldfq_count <= MAX_MIGRATE_QUEUE_COUNT &&
+ !dht_dfreaddirp_done(dir_dfmeta->offset_var,
+ local_subvols_cnt)) {
+
+ ret = gf_defrag_get_entry (this, dfc_index, &container,
+ loc, conf, defrag, fd,
+ migrate_data, dir_dfmeta,
+ xattr_req,
+ &should_commit_hash);
+ if (ret) {
+ gf_log ("DHT", GF_LOG_INFO, "Found critical "
+ "error from gf_defrag_get_entry");
+ ret = -1;
+ goto out;
+ }
+
+ /* Check if we got an entry, else we need to move the
+ index to the next subvol */
+ if (!container) {
+ GF_CRAWL_INDEX_MOVE(dfc_index,
+ local_subvols_cnt);
+ continue;
+ }
+
+ /* Q this entry in the dfq */
+ pthread_mutex_lock (&defrag->dfq_mutex);
+ {
+ list_add_tail (&container->list,
+ &(defrag->queue[0].list));
+ defrag->q_entry_count++;
+ ldfq_count = defrag->q_entry_count;
+
+ gf_msg_debug (this->name, 0, "added "
+ "file:%s parent:%s to the queue ",
+ container->df_entry->d_name,
+ container->parent_loc->path);
+
+ pthread_cond_signal (
+ &defrag->parallel_migration_cond);
+ }
+ pthread_mutex_unlock (&defrag->dfq_mutex);
+
+ GF_CRAWL_INDEX_MOVE(dfc_index, local_subvols_cnt);
+ }
+ }
+
+ gettimeofday (&end, NULL);
+ elapsed = (end.tv_sec - dir_start.tv_sec) * 1e6 +
+ (end.tv_usec - dir_start.tv_usec);
+ gf_log (this->name, GF_LOG_INFO, "Migration operation on dir %s took "
+ "%.2f secs", loc->path, elapsed/1e6);
+ ret = 0;
+out:
+
+ GF_FREE_DIR_DFMETA (dir_dfmeta);
+
+ if (dict)
+ dict_unref(dict);
+
+ if (xattr_req)
+ dict_unref(xattr_req);
+
+ if (fd)
+ fd_unref (fd);
+
+ if (ret == 0 && should_commit_hash == 0) {
+ ret = 2;
+ }
+
+ return ret;
+}
+int
+gf_defrag_settle_hash (xlator_t *this, gf_defrag_info_t *defrag,
+ loc_t *loc, dict_t *fix_layout)
+{
+ int ret;
+ dht_conf_t *conf = NULL;
+ /*
+ * Now we're ready to update the directory commit hash for the volume
+ * root, so that hash miscompares and broadcast lookups can stop.
+ * However, we want to skip that if fix-layout is all we did. In
+ * that case, we want the miscompares etc. to continue until a real
+ * rebalance is complete.
+ */
+ if (defrag->cmd == GF_DEFRAG_CMD_START_LAYOUT_FIX
+ || defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER) {
+ return 0;
+ }
+
+ conf = this->private;
+ if (!conf) {
+ /*Uh oh
+ */
+ return -1;
+ }
+
+ if (conf->local_subvols_cnt == 0 || !conf->lookup_optimize) {
+ /* Commit hash updates are only done on local subvolumes and
+ * only when lookup optmization is needed (for older client
+ * support)
+ */
+ return 0;
+ }
+
+ ret = dict_set_uint32 (fix_layout, "new-commit-hash",
+ defrag->new_commit_hash);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set new-commit-hash");
+ return -1;
+ }
+
+ ret = syncop_setxattr (this, loc, fix_layout, 0, NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "fix layout on %s failed", loc->path);
+ return -1;
+ }
+
+ /* TBD: find more efficient solution than adding/deleting every time */
+ dict_del(fix_layout, "new-commit-hash");
+
+ return 0;
+}
+
+
+
+/* Function for doing a named lookup on file inodes during an attach tier
+ * So that a hardlink lookup heal i.e gfid to parent gfid lookup heal
+ * happens on pre-existing data. This is required so that the ctr database has
+ * hardlinks of all the exisitng file in the volume. CTR xlator on the
+ * brick/server side does db update/insert of the hardlink on a namelookup.
+ * Currently the namedlookup is done synchronous to the fixlayout that is
+ * triggered by attach tier. This is not performant, adding more time to
+ * fixlayout. The performant approach is record the hardlinks on a compressed
+ * datastore and then do the namelookup asynchronously later, giving the ctr db
+ * eventual consistency
+ * */
+int
+gf_fix_layout_tier_attach_lookup (xlator_t *this,
+ loc_t *parent_loc,
+ gf_dirent_t *file_dentry)
+{
+ int ret = -1;
+ dict_t *lookup_xdata = NULL;
+ dht_conf_t *conf = NULL;
+ loc_t file_loc = {0,};
+ struct iatt iatt = {0,};
+
+ GF_VALIDATE_OR_GOTO ("tier", this, out);
+
+ GF_VALIDATE_OR_GOTO (this->name, parent_loc, out);
+
+ GF_VALIDATE_OR_GOTO (this->name, file_dentry, out);
+
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+
+ if (!parent_loc->inode) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
+ "%s/%s parent is NULL", parent_loc->path,
+ file_dentry->d_name);
+ goto out;
+ }
+
+
+ conf = this->private;
+
+ loc_wipe (&file_loc);
+
+ if (gf_uuid_is_null (file_dentry->d_stat.ia_gfid)) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
+ "%s/%s gfid not present", parent_loc->path,
+ file_dentry->d_name);
+ goto out;
+ }
+
+ gf_uuid_copy (file_loc.gfid, file_dentry->d_stat.ia_gfid);
+
+ if (gf_uuid_is_null (parent_loc->gfid)) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
+ "%s/%s"
+ " gfid not present", parent_loc->path,
+ file_dentry->d_name);
+ goto out;
+ }
+
+ gf_uuid_copy (file_loc.pargfid, parent_loc->gfid);
+
+
+ ret = dht_build_child_loc (this, &file_loc, parent_loc,
+ file_dentry->d_name);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
+ "Child loc build failed");
+ ret = -1;
+ goto out;
+ }
+
+ lookup_xdata = dict_new ();
+ if (!lookup_xdata) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
+ "Failed creating lookup dict for %s",
+ file_dentry->d_name);
+ goto out;
+ }
+
+ ret = dict_set_int32 (lookup_xdata, CTR_ATTACH_TIER_LOOKUP, 1);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
+ "Failed to set lookup flag");
+ goto out;
+ }
+
+ gf_uuid_copy (file_loc.parent->gfid, parent_loc->gfid);
+
+ /* Sending lookup to cold tier only */
+ ret = syncop_lookup (conf->subvolumes[0], &file_loc, &iatt,
+ NULL, lookup_xdata, NULL);
+ if (ret) {
+ /* If the file does not exist on the cold tier than it must */
+ /* have been discovered on the hot tier. This is not an error. */
+ gf_msg (this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
+ "%s lookup to cold tier on attach heal failed", file_loc.path);
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+
+ loc_wipe (&file_loc);
+
+ if (lookup_xdata)
+ dict_unref (lookup_xdata);
+
+ return ret;
+}
+
+
+int
+gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+ dict_t *fix_layout, dict_t *migrate_data)
+{
+ int ret = -1;
+ loc_t entry_loc = {0,};
+ fd_t *fd = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t *tmp = NULL;
+ gf_dirent_t *entry = NULL;
+ gf_boolean_t free_entries = _gf_false;
+ off_t offset = 0;
+ struct iatt iatt = {0,};
+ inode_t *linked_inode = NULL, *inode = NULL;
+ dht_conf_t *conf = NULL;
+ int should_commit_hash = 1;
+
+ conf = this->private;
+ if (!conf) {
+ ret = -1;
+ goto out;
+ }
+
+
+
+ ret = syncop_lookup (this, loc, &iatt, NULL, NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Lookup failed on %s",
+ loc->path);
+ ret = -1;
+ goto out;
+ }
+
+ if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) &&
+ (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX)) {
+ ret = gf_defrag_process_dir (this, defrag, loc, migrate_data);
+
+ if (ret && ret != 2) {
+ defrag->total_failures++;
+
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_DEFRAG_PROCESS_DIR_FAILED,
+ "gf_defrag_process_dir failed for directory: %s"
+ , loc->path);
+
+ if (conf->decommission_in_progress) {
+ goto out;
+ }
+
+ should_commit_hash = 0;
+ } else if (ret == 2) {
+ should_commit_hash = 0;
+ }
+ }
+
+ gf_msg_trace (this->name, 0, "fix layout called on %s", loc->path);
+
+ fd = fd_create (loc->inode, defrag->pid);
+ if (!fd) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to create fd");
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_opendir (this, loc, fd, NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to open dir %s",
+ loc->path);
+ ret = -1;
+ goto out;
+ }
+
+ fd_bind (fd);
+ INIT_LIST_HEAD (&entries.list);
+ while ((ret = syncop_readdirp (this, fd, 131072, offset, &entries,
+ NULL, NULL)) != 0)
+ {
+
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Readdir returned %s"
+ ". Aborting fix-layout",strerror(-ret));
+ ret = -1;
+ goto out;
+ }
+
+ if (list_empty (&entries.list))
+ break;
+
+ free_entries = _gf_true;
+
+ list_for_each_entry_safe (entry, tmp, &entries.list, list) {
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+ ret = 1;
+ goto out;
+ }
+
+ offset = entry->d_off;
+
+ if (!strcmp (entry->d_name, ".") ||
+ !strcmp (entry->d_name, ".."))
+ continue;
+ if (!IA_ISDIR (entry->d_stat.ia_type)) {
+
+ /* If its a fix layout during the attach
+ * tier operation do lookups on files
+ * on cold subvolume so that there is a
+ * CTR DB Lookup Heal triggered on existing
+ * data.
+ * */
+ if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) {
+ gf_fix_layout_tier_attach_lookup
+ (this, loc, entry);
+ }
+
+ continue;
+ }
+ loc_wipe (&entry_loc);
+
+ ret = dht_build_child_loc (this, &entry_loc, loc,
+ entry->d_name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Child loc"
+ " build failed for entry: %s",
+ entry->d_name);
+
+ if (conf->decommission_in_progress) {
+ defrag->defrag_status =
+ GF_DEFRAG_STATUS_FAILED;
+
+ goto out;
+ } else {
+ should_commit_hash = 0;
+
+ continue;
+ }
+ }
+
+ if (gf_uuid_is_null (entry->d_stat.ia_gfid)) {
+ gf_log (this->name, GF_LOG_ERROR, "%s/%s"
+ " gfid not present", loc->path,
+ entry->d_name);
+ continue;
+ }
+
+
+ gf_uuid_copy (entry_loc.gfid, entry->d_stat.ia_gfid);
+
+ /*In case the gfid stored in the inode by inode_link
+ * and the gfid obtained in the lookup differs, then
+ * client3_3_lookup_cbk will return ESTALE and proper
+ * error will be captured
+ */
+
+ linked_inode = inode_link (entry_loc.inode, loc->inode,
+ entry->d_name,
+ &entry->d_stat);
+
+ inode = entry_loc.inode;
+ entry_loc.inode = linked_inode;
+ inode_unref (inode);
+
+ if (gf_uuid_is_null (loc->gfid)) {
+ gf_log (this->name, GF_LOG_ERROR, "%s/%s"
+ " gfid not present", loc->path,
+ entry->d_name);
+ continue;
+ }
+
+ gf_uuid_copy (entry_loc.pargfid, loc->gfid);
+
+ ret = syncop_lookup (this, &entry_loc, &iatt, NULL,
+ NULL, NULL);
+ /*Check whether it is ENOENT or ESTALE*/
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s"
+ " lookup failed with %d",
+ entry_loc.path, -ret);
+
+ if (!conf->decommission_in_progress &&
+ -ret != ENOENT && -ret != ESTALE) {
+ should_commit_hash = 0;
+ }
+
+ continue;
+ }
+
+ ret = syncop_setxattr (this, &entry_loc, fix_layout,
+ 0, NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Setxattr "
+ "failed for %s", entry_loc.path);
+
+ defrag->total_failures++;
+
+ /*Don't go for fix-layout of child subtree if"
+ fix-layout failed*/
+ if (conf->decommission_in_progress) {
+ defrag->defrag_status =
+ GF_DEFRAG_STATUS_FAILED;
+
+ ret = -1;
+
+ goto out;
+ } else {
+ continue;
+ }
+ }
+
+
+ /* A return value of 2 means, either process_dir or
+ * lookup of a dir failed. Hence, don't commit hash
+ * for the current directory*/
+
+ ret = gf_defrag_fix_layout (this, defrag, &entry_loc,
+ fix_layout, migrate_data);
+
+ if (ret && ret != 2) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LAYOUT_FIX_FAILED,
+ "Fix layout failed for %s",
+ entry_loc.path);
+
+ defrag->total_failures++;
+
+ if (conf->decommission_in_progress) {
+ defrag->defrag_status =
+ GF_DEFRAG_STATUS_FAILED;
+
+ ret = -1;
+
+ goto out;
+ } else {
+ /* Let's not commit-hash if
+ * gf_defrag_fix_layout failed*/
+ continue;
+ }
+ }
+
+ if (ret != 2 &&
+ gf_defrag_settle_hash (this, defrag, &entry_loc,
+ fix_layout) != 0) {
+ defrag->total_failures++;
+
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_SETTLE_HASH_FAILED,
+ "Settle hash failed for %s",
+ entry_loc.path);
+
+ ret = -1;
+
+ if (conf->decommission_in_progress) {
+ defrag->defrag_status =
+ GF_DEFRAG_STATUS_FAILED;
+
+ goto out;
+ }
+ }
+ }
+ gf_dirent_free (&entries);
+ free_entries = _gf_false;
+ INIT_LIST_HEAD (&entries.list);
+ }
+
+ ret = 0;
+out:
+ if (free_entries)
+ gf_dirent_free (&entries);
+
+ loc_wipe (&entry_loc);
+
+ if (fd)
+ fd_unref (fd);
+
+ if (ret == 0 && should_commit_hash == 0) {
+ ret = 2;
+ }
+
+ return ret;
+
+}
+
+
+
+/******************************************************************************
+ * Tier background Fix layout functions
+ ******************************************************************************/
+/* This is the background tier fixlayout thread */
+void *
+gf_tier_do_fix_layout (void *args)
+{
+ gf_tier_fix_layout_arg_t *tier_fix_layout_arg = args;
+ int ret = -1;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ gf_defrag_info_t *defrag = NULL;
+ dict_t *dict = NULL;
+ loc_t loc = {0,};
+ struct iatt iatt = {0,};
+ struct iatt parent = {0,};
+
+ GF_VALIDATE_OR_GOTO ("tier", tier_fix_layout_arg, out);
+ GF_VALIDATE_OR_GOTO ("tier", tier_fix_layout_arg->this, out);
+ this = tier_fix_layout_arg->this;
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
+
+ defrag = conf->defrag;
+ GF_VALIDATE_OR_GOTO (this->name, defrag, out);
+ GF_VALIDATE_OR_GOTO (this->name, defrag->root_inode, out);
+
+ GF_VALIDATE_OR_GOTO (this->name, tier_fix_layout_arg->fix_layout, out);
+
+
+ /* Get Root loc_t */
+ dht_build_root_loc (defrag->root_inode, &loc);
+ ret = syncop_lookup (this, &loc, &iatt, &parent, NULL, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_REBALANCE_START_FAILED,
+ "Lookup on root failed.");
+ ret = -1;
+ goto out;
+ }
+
+
+ /* Start the crawl */
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_LOG_TIER_STATUS, "Tiering Fixlayout started");
+
+ ret = gf_defrag_fix_layout (this, defrag, &loc,
+ tier_fix_layout_arg->fix_layout, NULL);
+ if (ret && ret != 2) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_REBALANCE_FAILED,
+ "Tiering fixlayout failed.");
+ ret = -1;
+ goto out;
+ }
+
+ if (ret != 2 && gf_defrag_settle_hash
+ (this, defrag, &loc,
+ tier_fix_layout_arg->fix_layout) != 0) {
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str (dict, GF_XATTR_TIER_LAYOUT_FIXED_KEY, "yes");
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_REBALANCE_FAILED,
+ "Failed to set dictionary value: key = %s",
+ GF_XATTR_TIER_LAYOUT_FIXED_KEY);
+ ret = -1;
+ goto out;
+ }
+
+ /* Marking the completion of tiering fix layout via a xattr on root */
+ ret = syncop_setxattr (this, &loc, dict, 0, NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set tiering fix "
+ "layout completed xattr on %s", loc.path);
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret)
+ defrag->total_failures++;
+
+ if (dict)
+ dict_unref (dict);
+
+ return NULL;
+}
+
+int
+gf_tier_start_fix_layout (xlator_t *this,
+ loc_t *loc,
+ gf_defrag_info_t *defrag,
+ dict_t *fix_layout)
+{
+ int ret = -1;
+ dict_t *tier_dict = NULL;
+ gf_tier_fix_layout_arg_t *tier_fix_layout_arg = NULL;
+
+ tier_dict = dict_new ();
+ if (!tier_dict) {
+ gf_log ("tier", GF_LOG_ERROR, "Tier fix layout failed :"
+ "Creation of tier_dict failed");
+ ret = -1;
+ goto out;
+ }
+
+ /* Check if layout is fixed already */
+ ret = syncop_getxattr (this, loc, &tier_dict,
+ GF_XATTR_TIER_LAYOUT_FIXED_KEY,
+ NULL, NULL);
+ if (ret != 0) {
+
+ tier_fix_layout_arg = &defrag->tier_conf.tier_fix_layout_arg;
+
+ /*Fill crawl arguments */
+ tier_fix_layout_arg->this = this;
+ tier_fix_layout_arg->fix_layout = fix_layout;
+
+ /* Spawn the fix layout thread so that its done in the
+ * background */
+ ret = pthread_create (&tier_fix_layout_arg->thread_id, NULL,
+ gf_tier_do_fix_layout, tier_fix_layout_arg);
+ if (ret) {
+ gf_log ("tier", GF_LOG_ERROR, "Thread creation failed. "
+ "Background fix layout for tiering will not "
+ "work.");
+ defrag->total_failures++;
+ goto out;
+ }
+ }
+ ret = 0;
+out:
+ if (tier_dict)
+ dict_unref (tier_dict);
+
+ return ret;
+}
+
+void
+gf_tier_clear_fix_layout (xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag)
+{
+ int ret = -1;
+ dict_t *dict = NULL;
+
+ GF_VALIDATE_OR_GOTO ("tier", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO (this->name, defrag, out);
+
+ /* Check if background fixlayout is completed. This is not
+ * multi-process safe i.e there is a possibility that by the time
+ * we move to remove the xattr there it might have been cleared by some
+ * other detach process from other node. We ignore the error if such
+ * a thing happens */
+ ret = syncop_getxattr (this, loc, &dict,
+ GF_XATTR_TIER_LAYOUT_FIXED_KEY, NULL, NULL);
+ if (ret) {
+ /* Background fixlayout not complete - nothing to clear*/
+ gf_msg (this->name, GF_LOG_WARNING, -ret,
+ DHT_MSG_LOG_TIER_STATUS,
+ "Unable to retrieve fixlayout xattr."
+ "Assume background fix layout not complete");
+ goto out;
+ }
+
+ ret = syncop_removexattr (this, loc, GF_XATTR_TIER_LAYOUT_FIXED_KEY,
+ NULL, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, -ret,
+ DHT_MSG_LOG_TIER_STATUS,
+ "Failed removing tier fix layout "
+ "xattr from %s", loc->path);
+ goto out;
+ }
+ ret = 0;
+out:
+ if (dict)
+ dict_unref (dict);
+}
+
+void
+gf_tier_wait_fix_lookup (gf_defrag_info_t *defrag) {
+ if (defrag->tier_conf.tier_fix_layout_arg.thread_id) {
+ pthread_join (defrag->tier_conf.tier_fix_layout_arg.thread_id,
+ NULL);
+ }
+}
+/******************Tier background Fix layout functions END********************/
+
+
+
+int
+gf_defrag_start_crawl (void *data)
+{
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ gf_defrag_info_t *defrag = NULL;
+ int ret = -1;
+ loc_t loc = {0,};
+ struct iatt iatt = {0,};
+ struct iatt parent = {0,};
+ dict_t *fix_layout = NULL;
+ dict_t *migrate_data = NULL;
+ dict_t *status = NULL;
+ dict_t *dict = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ dht_methods_t *methods = NULL;
+ int i = 0;
+ int thread_index = 0;
+ int err = 0;
+ int thread_spawn_count = 0;
+ pthread_t tid[MAX_MIGRATOR_THREAD_COUNT];
+ gf_boolean_t is_tier_detach = _gf_false;
+
+ this = data;
+ if (!this)
+ goto exit;
+
+ ctx = this->ctx;
+ if (!ctx)
+ goto exit;
+
+ conf = this->private;
+ if (!conf)
+ goto exit;
+
+ defrag = conf->defrag;
+ if (!defrag)
+ goto exit;
+
+ gettimeofday (&defrag->start_time, NULL);
+ dht_build_root_inode (this, &defrag->root_inode);
+ if (!defrag->root_inode)
+ goto out;
+
+ dht_build_root_loc (defrag->root_inode, &loc);
+
+ /* fix-layout on '/' first */
+
+ ret = syncop_lookup (this, &loc, &iatt, &parent, NULL, NULL);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_REBALANCE_START_FAILED,
+ "Failed to start rebalance: look up on / failed");
+ ret = -1;
+ goto out;
+ }
+
+ fix_layout = dict_new ();
+ if (!fix_layout) {
+ ret = -1;
+ goto out;
+ }
+
+ /*
+ * Unfortunately, we can't do special xattrs (like fix.layout) and
+ * real ones in the same call currently, and changing it seems
+ * riskier than just doing two calls.
+ */
+
+ gf_log (this->name, GF_LOG_INFO, "%s using commit hash %u",
+ __func__, conf->vol_commit_hash);
+
+ ret = dict_set_uint32 (fix_layout, conf->commithash_xattr_name,
+ conf->vol_commit_hash);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set %s", conf->commithash_xattr_name);
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_setxattr (this, &loc, fix_layout, 0, NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "fix layout on %s failed",
+ loc.path);
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
+
+ /* We now return to our regularly scheduled program. */
+
+ ret = dict_set_str (fix_layout, GF_XATTR_FIX_LAYOUT_KEY, "yes");
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_REBALANCE_START_FAILED,
+ "Failed to start rebalance:"
+ "Failed to set dictionary value: key = %s",
+ GF_XATTR_FIX_LAYOUT_KEY);
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
+
+ defrag->new_commit_hash = conf->vol_commit_hash;
+
+ ret = syncop_setxattr (this, &loc, fix_layout, 0, NULL, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_REBALANCE_FAILED,
+ "fix layout on %s failed",
+ loc.path);
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
+
+ if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) {
+ migrate_data = dict_new ();
+ if (!migrate_data) {
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_str (migrate_data, GF_XATTR_FILE_MIGRATE_KEY,
+ (defrag->cmd == GF_DEFRAG_CMD_START_FORCE)
+ ? "force" : "non-force");
+ if (ret) {
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
+
+ /* Find local subvolumes */
+ ret = syncop_getxattr (this, &loc, &dict,
+ GF_REBAL_FIND_LOCAL_SUBVOL,
+ NULL, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, 0, "local "
+ "subvolume determination failed with error: %d",
+ -ret);
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0 ; i < conf->local_subvols_cnt; i++) {
+ gf_msg (this->name, GF_LOG_INFO, 0, 0, "local subvols "
+ "are %s", conf->local_subvols[i]->name);
+ }
+
+ /* Initialize global entry queue */
+ defrag->queue = GF_CALLOC (1, sizeof (struct dht_container),
+ gf_dht_mt_container_t);
+
+ if (!defrag->queue) {
+ gf_log (this->name, GF_LOG_INFO, "No memory for queue");
+ ret = -1;
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&(defrag->queue[0].list));
+
+ thread_spawn_count = MAX ((sysconf(_SC_NPROCESSORS_ONLN) - 4), 4);
+
+ gf_msg_debug (this->name, 0, "thread_spawn_count: %d",
+ thread_spawn_count);
+
+ defrag->current_thread_count = thread_spawn_count;
+
+ /*Spawn Threads Here*/
+ while (thread_index < thread_spawn_count) {
+ err = pthread_create(&(tid[thread_index]), NULL,
+ &gf_defrag_task, (void *)defrag);
+ if (err != 0) {
+ gf_log ("DHT", GF_LOG_ERROR,
+ "Thread[%d] creation failed. "
+ "Aborting Rebalance",
+ thread_index);
+ ret = -1;
+ goto out;
+ } else {
+ gf_log ("DHT", GF_LOG_INFO, "Thread[%d] "
+ "creation successful", thread_index);
+ }
+ thread_index++;
+ }
+ }
+
+ if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) {
+ /* Fix layout for attach tier */
+ ret = gf_tier_start_fix_layout (this, &loc, defrag, fix_layout);
+ if (ret) {
+ goto out;
+ }
+
+ methods = &(conf->methods);
+
+ /* Calling tier_start of tier.c */
+ methods->migration_other(this, defrag);
+ if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER) {
+
+ ret = dict_set_str (migrate_data,
+ GF_XATTR_FILE_MIGRATE_KEY,
+ "force");
+ if (ret)
+ goto out;
+
+ }
+ } else {
+ ret = gf_defrag_fix_layout (this, defrag, &loc, fix_layout,
+ migrate_data);
+ if (ret && ret != 2) {
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
+
+ if (ret != 2 && gf_defrag_settle_hash
+ (this, defrag, &loc, fix_layout) != 0) {
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
+
+ if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER)
+ is_tier_detach = _gf_true;
+
+ }
+
+ gf_log ("DHT", GF_LOG_INFO, "crawling file-system completed");
+out:
+
+ /* We are here means crawling the entire file system is done
+ or something failed. Set defrag->crawl_done flag to intimate
+ the migrator threads to exhaust the defrag->queue and terminate*/
+
+ if (ret) {
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+ }
+
+ pthread_mutex_lock (&defrag->dfq_mutex);
+ {
+ defrag->crawl_done = 1;
+
+ pthread_cond_broadcast (
+ &defrag->parallel_migration_cond);
+ pthread_cond_broadcast (
+ &defrag->df_wakeup_thread);
+ }
+ pthread_mutex_unlock (&defrag->dfq_mutex);
+
+ /*Wait for all the threads to complete their task*/
+ for (i = 0; i < thread_index; i++) {
+ pthread_join (tid[i], NULL);
+ }
+
+ if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) {
+ /* Wait for the tier fixlayout to
+ * complete if its was started.*/
+ gf_tier_wait_fix_lookup (defrag);
+ }
+
+ if (is_tier_detach && ret == 0) {
+ /* If it was a detach remove the tier fix-layout
+ * xattr on root. Ignoring the failure, as nothing has to be
+ * done, logging is done in gf_tier_clear_fix_layout */
+ gf_tier_clear_fix_layout (this, &loc, defrag);
+ }
+
+ if (defrag->queue) {
+ gf_dirent_free (defrag->queue[0].df_entry);
+ INIT_LIST_HEAD (&(defrag->queue[0].list));
+ }
+
+ if ((defrag->defrag_status != GF_DEFRAG_STATUS_STOPPED) &&
+ (defrag->defrag_status != GF_DEFRAG_STATUS_FAILED)) {
+ defrag->defrag_status = GF_DEFRAG_STATUS_COMPLETE;
+ }
+
+ LOCK (&defrag->lock);
+ {
+ status = dict_new ();
+ gf_defrag_status_get (defrag, status);
+ if (ctx && ctx->notify)
+ ctx->notify (GF_EN_DEFRAG_STATUS, status);
+ if (status)
+ dict_unref (status);
+ defrag->is_exiting = 1;
+ }
+ UNLOCK (&defrag->lock);
+
+ GF_FREE (defrag->queue);
+
+ GF_FREE (defrag);
+ conf->defrag = NULL;
+
+ if (dict)
+ dict_unref (dict);
+
+ if (migrate_data)
+ dict_unref (migrate_data);
+
+exit:
+ return ret;
+}
+
+
+
+static int
+gf_defrag_done (int ret, call_frame_t *sync_frame, void *data)
+{
+ gf_listener_stop (sync_frame->this);
+
+ STACK_DESTROY (sync_frame->root);
+ kill (getpid(), SIGTERM);
+ return 0;
+}
+
+void *
+gf_defrag_start (void *data)
+{
+ int ret = -1;
+ call_frame_t *frame = NULL;
+ dht_conf_t *conf = NULL;
+ gf_defrag_info_t *defrag = NULL;
+ xlator_t *this = NULL;
+ xlator_t *old_THIS = NULL;
+
+ this = data;
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ defrag = conf->defrag;
+ if (!defrag)
+ goto out;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ frame->root->pid = GF_CLIENT_PID_DEFRAG;
+
+ defrag->pid = frame->root->pid;
+
+ defrag->defrag_status = GF_DEFRAG_STATUS_STARTED;
+
+ old_THIS = THIS;
+ THIS = this;
+ ret = synctask_new (this->ctx->env, gf_defrag_start_crawl,
+ gf_defrag_done, frame, this);
+
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_REBALANCE_START_FAILED,
+ "Could not create task for rebalance");
+ THIS = old_THIS;
+out:
+ return NULL;
+}
+
+int
+gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict)
+{
+ int ret = 0;
+ uint64_t files = 0;
+ uint64_t size = 0;
+ uint64_t lookup = 0;
+ uint64_t failures = 0;
+ uint64_t skipped = 0;
+ uint64_t promoted = 0;
+ uint64_t demoted = 0;
+ char *status = "";
+ double elapsed = 0;
+ struct timeval end = {0,};
+
+
+ if (!defrag)
+ goto out;
+
+ ret = 0;
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED)
+ goto out;
+
+ files = defrag->total_files;
+ size = defrag->total_data;
+ lookup = defrag->num_files_lookedup;
+ failures = defrag->total_failures;
+ skipped = defrag->skipped;
+ promoted = defrag->total_files_promoted;
+ demoted = defrag->total_files_demoted;
+
+ gettimeofday (&end, NULL);
+
+ elapsed = end.tv_sec - defrag->start_time.tv_sec;
+
+ if (!dict)
+ goto log;
+
+ ret = dict_set_uint64 (dict, "promoted", promoted);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set promoted count");
+
+ ret = dict_set_uint64 (dict, "demoted", demoted);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set demoted count");
+
+ ret = dict_set_uint64 (dict, "files", files);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set file count");
+
+ ret = dict_set_uint64 (dict, "size", size);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set size of xfer");
+
+ ret = dict_set_uint64 (dict, "lookups", lookup);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set lookedup file count");
+
+
+ ret = dict_set_int32 (dict, "status", defrag->defrag_status);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set status");
+ if (elapsed) {
+ ret = dict_set_double (dict, "run-time", elapsed);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set run-time");
+ }
+
+ ret = dict_set_uint64 (dict, "failures", failures);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set failure count");
+
+ ret = dict_set_uint64 (dict, "skipped", skipped);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set skipped file count");
+log:
+ switch (defrag->defrag_status) {
+ case GF_DEFRAG_STATUS_NOT_STARTED:
+ status = "not started";
+ break;
+ case GF_DEFRAG_STATUS_STARTED:
+ status = "in progress";
+ break;
+ case GF_DEFRAG_STATUS_STOPPED:
+ status = "stopped";
+ break;
+ case GF_DEFRAG_STATUS_COMPLETE:
+ status = "completed";
+ break;
+ case GF_DEFRAG_STATUS_FAILED:
+ status = "failed";
+ break;
+ default:
+ break;
+ }
+
+ gf_msg (THIS->name, GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS,
+ "Rebalance is %s. Time taken is %.2f secs",
+ status, elapsed);
+ gf_msg (THIS->name, GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS,
+ "Files migrated: %"PRIu64", size: %"
+ PRIu64", lookups: %"PRIu64", failures: %"PRIu64", skipped: "
+ "%"PRIu64, files, size, lookup, failures, skipped);
+
+
+out:
+ return 0;
+}
+
+void
+gf_defrag_set_pause_state (gf_tier_conf_t *tier_conf, tier_pause_state_t state)
+{
+ pthread_mutex_lock (&tier_conf->pause_mutex);
+ tier_conf->pause_state = state;
+ pthread_mutex_unlock (&tier_conf->pause_mutex);
+}
+
+tier_pause_state_t
+gf_defrag_get_pause_state (gf_tier_conf_t *tier_conf)
+{
+ int state;
+
+ pthread_mutex_lock (&tier_conf->pause_mutex);
+ state = tier_conf->pause_state;
+ pthread_mutex_unlock (&tier_conf->pause_mutex);
+
+ return state;
+}
+
+tier_pause_state_t
+gf_defrag_check_pause_tier (gf_tier_conf_t *tier_conf)
+{
+ int woke = 0;
+ int state = -1;
+
+ pthread_mutex_lock (&tier_conf->pause_mutex);
+
+ if (tier_conf->pause_state == TIER_RUNNING)
+ goto out;
+
+ if (tier_conf->pause_state == TIER_PAUSED)
+ goto out;
+
+ if (tier_conf->promote_in_progress ||
+ tier_conf->demote_in_progress)
+ goto out;
+
+ tier_conf->pause_state = TIER_PAUSED;
+
+ if (tier_conf->pause_synctask) {
+ synctask_wake (tier_conf->pause_synctask);
+ tier_conf->pause_synctask = 0;
+ woke = 1;
+ }
+
+ gf_msg ("tier", GF_LOG_DEBUG, 0,
+ DHT_MSG_TIER_PAUSED,
+ "woken %d", woke);
+out:
+ state = tier_conf->pause_state;
+
+ pthread_mutex_unlock (&tier_conf->pause_mutex);
+
+ return state;
+}
+
+void
+gf_defrag_pause_tier_timeout (void *data)
+{
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ gf_defrag_info_t *defrag = NULL;
+
+ this = (xlator_t *) data;
+ GF_VALIDATE_OR_GOTO ("tier", this, out);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
+
+ defrag = conf->defrag;
+ GF_VALIDATE_OR_GOTO (this->name, defrag, out);
+
+ gf_msg (this->name, GF_LOG_DEBUG, 0,
+ DHT_MSG_TIER_PAUSED,
+ "Request pause timer timeout");
+
+ gf_defrag_check_pause_tier (&defrag->tier_conf);
+
+out:
+ return;
+}
+
+int
+gf_defrag_pause_tier (xlator_t *this, gf_defrag_info_t *defrag)
+{
+ int ret = 0;
+ struct timespec delta = {0,};
+ int delay = 2;
+
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED)
+ goto out;
+
+ /*
+ * Set flag requesting to pause tiering. Wait 'delay' seconds for
+ * tiering to actually stop as indicated by the pause state
+ * before returning success or failure.
+ */
+ gf_defrag_set_pause_state (&defrag->tier_conf, TIER_REQUEST_PAUSE);
+
+ /*
+ * If migration is not underway, can pause immediately.
+ */
+ gf_defrag_check_pause_tier (&defrag->tier_conf);
+ if (gf_defrag_get_pause_state (&defrag->tier_conf) == TIER_PAUSED)
+ goto out;
+
+ gf_msg (this->name, GF_LOG_DEBUG, 0,
+ DHT_MSG_TIER_PAUSED,
+ "Request pause tier");
+
+ defrag->tier_conf.pause_synctask = synctask_get ();
+ delta.tv_sec = delay;
+ delta.tv_nsec = 0;
+ defrag->tier_conf.pause_timer =
+ gf_timer_call_after (this->ctx, delta,
+ gf_defrag_pause_tier_timeout,
+ this);
+
+ synctask_yield (defrag->tier_conf.pause_synctask);
+
+ if (gf_defrag_get_pause_state (&defrag->tier_conf) == TIER_PAUSED)
+ goto out;
+
+ gf_defrag_set_pause_state (&defrag->tier_conf, TIER_RUNNING);
+
+ ret = -1;
+out:
+
+ gf_msg (this->name, GF_LOG_DEBUG, 0,
+ DHT_MSG_TIER_PAUSED,
+ "Pause tiering ret=%d", ret);
+
+ return ret;
+}
+
+int
+gf_defrag_resume_tier (xlator_t *this, gf_defrag_info_t *defrag)
+{
+ gf_msg (this->name, GF_LOG_DEBUG, 0,
+ DHT_MSG_TIER_RESUME,
+ "Pause end. Resume tiering");
+
+ gf_defrag_set_pause_state (&defrag->tier_conf, TIER_RUNNING);
+
+ return 0;
+}
+
+int
+gf_defrag_start_detach_tier (gf_defrag_info_t *defrag)
+{
+ defrag->cmd = GF_DEFRAG_CMD_START_DETACH_TIER;
+
+ return 0;
+}
+
+int
+gf_defrag_stop (gf_defrag_info_t *defrag, gf_defrag_status_t status,
+ dict_t *output)
+{
+ /* TODO: set a variable 'stop_defrag' here, it should be checked
+ in defrag loop */
+ int ret = -1;
+ GF_ASSERT (defrag);
+
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED) {
+ goto out;
+ }
+
+ gf_msg ("", GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STOPPED,
+ "Received stop command on rebalance");
+ defrag->defrag_status = status;
+
+ if (output)
+ gf_defrag_status_get (defrag, output);
+ ret = 0;
+out:
+ gf_msg_debug ("", 0, "Returning %d", ret);
+ return ret;
+}
diff --git a/xlators/cluster/dht/src/dht-rename.c b/xlators/cluster/dht/src/dht-rename.c
index d88fc74450a..777c63de685 100644
--- a/xlators/cluster/dht/src/dht-rename.c
+++ b/xlators/cluster/dht/src/dht-rename.c
@@ -1,347 +1,761 @@
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
/* TODO: link(oldpath, newpath) fails if newpath already exists. DHT should
* delete the newpath if it gets EEXISTS from link() call.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "glusterfs.h"
#include "xlator.h"
#include "dht-common.h"
#include "defaults.h"
+int dht_rename_unlock (call_frame_t *frame, xlator_t *this);
int
dht_rename_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
+ int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ local = frame->local;
+ prev = cookie;
- local = frame->local;
- prev = cookie;
- if (op_ret == -1) {
- /* TODO: undo the damage */
+ if (op_ret == -1) {
+ /* TODO: undo the damage */
+ gf_uuid_unparse(local->loc.inode->gfid, gfid);
- gf_log (this->name, GF_LOG_DEBUG,
- "rename %s -> %s on %s failed (%s)",
- local->loc.path, local->loc2.path,
- prev->this->name, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_INFO, op_errno,
+ DHT_MSG_RENAME_FAILED,
+ "Rename %s -> %s on %s failed, (gfid = %s)",
+ local->loc.path, local->loc2.path,
+ prev->this->name, gfid);
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- } else {
- /* TODO: construct proper stbuf for dir */
- /*
- * FIXME: is this the correct way to build stbuf and
- * parent bufs?
- */
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
- dht_iatt_merge (this, &local->preoldparent, preoldparent,
- prev->this);
- dht_iatt_merge (this, &local->postoldparent, postoldparent,
- prev->this);
- dht_iatt_merge (this, &local->preparent, prenewparent,
- prev->this);
- dht_iatt_merge (this, &local->postparent, postnewparent,
- prev->this);
- }
-
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- local->stbuf.ia_ino = local->loc.inode->ino;
-
- local->preoldparent.ia_ino = local->loc.parent->ino;
- local->postoldparent.ia_ino = local->loc.parent->ino;
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto unwind;
+ }
+ /* TODO: construct proper stbuf for dir */
+ /*
+ * FIXME: is this the correct way to build stbuf and
+ * parent bufs?
+ */
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+ dht_iatt_merge (this, &local->preoldparent, preoldparent,
+ prev->this);
+ dht_iatt_merge (this, &local->postoldparent, postoldparent,
+ prev->this);
+ dht_iatt_merge (this, &local->preparent, prenewparent,
+ prev->this);
+ dht_iatt_merge (this, &local->postparent, postnewparent,
+ prev->this);
- local->preparent.ia_ino = local->loc2.parent->ino;
- local->postparent.ia_ino = local->loc2.parent->ino;
+unwind:
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
WIPE (&local->preoldparent);
WIPE (&local->postoldparent);
WIPE (&local->preparent);
WIPE (&local->postparent);
- DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
- &local->stbuf, &local->preoldparent,
- &local->postoldparent,
- &local->preparent, &local->postparent);
- }
+ dht_rename_unlock (frame, this);
+ }
- return 0;
+ return 0;
}
+int
+dht_rename_hashed_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
+ struct iatt *preoldparent,
+ struct iatt *postoldparent,
+ struct iatt *prenewparent,
+ struct iatt *postnewparent, dict_t *xdata)
+{
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ int call_cnt = 0;
+ call_frame_t *prev = NULL;
+ int i = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ conf = this->private;
+ local = frame->local;
+ prev = cookie;
+
+
+ if (op_ret == -1) {
+ /* TODO: undo the damage */
+
+ gf_uuid_unparse(local->loc.inode->gfid, gfid);
+
+ gf_msg (this->name, GF_LOG_INFO, op_errno,
+ DHT_MSG_RENAME_FAILED,
+ "rename %s -> %s on %s failed, (gfid = %s) ",
+ local->loc.path, local->loc2.path,
+ prev->this->name, gfid );
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto unwind;
+ }
+ /* TODO: construct proper stbuf for dir */
+ /*
+ * FIXME: is this the correct way to build stbuf and
+ * parent bufs?
+ */
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+ dht_iatt_merge (this, &local->preoldparent, preoldparent,
+ prev->this);
+ dht_iatt_merge (this, &local->postoldparent, postoldparent,
+ prev->this);
+ dht_iatt_merge (this, &local->preparent, prenewparent,
+ prev->this);
+ dht_iatt_merge (this, &local->postparent, postnewparent,
+ prev->this);
+
+ call_cnt = local->call_cnt = conf->subvolume_cnt - 1;
+
+ if (!local->call_cnt)
+ goto unwind;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == local->dst_hashed)
+ continue;
+ STACK_WIND (frame, dht_rename_dir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->rename,
+ &local->loc, &local->loc2, NULL);
+ if (!--call_cnt)
+ break;
+ }
+
+
+ return 0;
+unwind:
+ WIPE (&local->preoldparent);
+ WIPE (&local->postoldparent);
+ WIPE (&local->preparent);
+ WIPE (&local->postparent);
+
+ dht_rename_unlock (frame, this);
+ return 0;
+}
+
int
dht_rename_dir_do (call_frame_t *frame, xlator_t *this)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int i = 0;
+ dht_local_t *local = NULL;
- conf = this->private;
- local = frame->local;
+ local = frame->local;
- if (local->op_ret == -1)
- goto err;
+ if (local->op_ret == -1)
+ goto err;
- local->call_cnt = conf->subvolume_cnt;
- local->op_ret = 0;
+ local->op_ret = 0;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (frame, dht_rename_dir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->rename,
- &local->loc, &local->loc2);
- }
-
- return 0;
+ STACK_WIND (frame, dht_rename_hashed_dir_cbk,
+ local->dst_hashed,
+ local->dst_hashed->fops->rename,
+ &local->loc, &local->loc2, NULL);
+ return 0;
err:
- DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno, NULL, NULL,
- NULL, NULL, NULL);
- return 0;
+ dht_rename_unlock (frame, this);
+ return 0;
}
int
dht_rename_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, gf_dirent_t *entries)
+ int op_ret, int op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = -1;
- call_frame_t *prev = NULL;
-
- local = frame->local;
- prev = cookie;
-
- if (op_ret > 2) {
- gf_log (this->name, GF_LOG_TRACE,
- "readdir on %s for %s returned %d entries",
- prev->this->name, local->loc.path, op_ret);
- local->op_ret = -1;
- local->op_errno = ENOTEMPTY;
- }
+ dht_local_t *local = NULL;
+ int this_call_cnt = -1;
+ call_frame_t *prev = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ if (op_ret > 2) {
+ gf_msg_trace (this->name, 0,
+ "readdir on %s for %s returned %d entries",
+ prev->this->name, local->loc.path, op_ret);
+ local->op_ret = -1;
+ local->op_errno = ENOTEMPTY;
+ }
- this_call_cnt = dht_frame_return (frame);
+ this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- dht_rename_dir_do (frame, this);
- }
+ if (is_last_call (this_call_cnt)) {
+ dht_rename_dir_do (frame, this);
+ }
- return 0;
+ return 0;
}
int
dht_rename_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, fd_t *fd)
+ int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = -1;
- call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = -1;
+ call_frame_t *prev = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ local = frame->local;
+ prev = cookie;
- local = frame->local;
- prev = cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "opendir on %s for %s failed (%s)",
- prev->this->name, local->loc.path,
- strerror (op_errno));
- goto err;
- }
- STACK_WIND (frame, dht_rename_readdir_cbk,
- prev->this, prev->this->fops->readdir,
- local->fd, 4096, 0);
+ if (op_ret == -1) {
- return 0;
+ gf_uuid_unparse(local->loc.inode->gfid, gfid);
+ gf_msg (this->name, GF_LOG_INFO, op_errno,
+ DHT_MSG_OPENDIR_FAILED,
+ "opendir on %s for %s failed,(gfid = %s) ",
+ prev->this->name, local->loc.path, gfid);
+ goto err;
+ }
+
+ fd_bind (fd);
+ STACK_WIND (frame, dht_rename_readdir_cbk,
+ prev->this, prev->this->fops->readdir,
+ local->fd, 4096, 0, NULL);
+
+ return 0;
err:
- this_call_cnt = dht_frame_return (frame);
+ this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- dht_rename_dir_do (frame, this);
- }
+ if (is_last_call (this_call_cnt)) {
+ dht_rename_dir_do (frame, this);
+ }
- return 0;
+ return 0;
}
int
-dht_rename_dir (call_frame_t *frame, xlator_t *this)
+dht_rename_dir_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- int i = 0;
- int op_errno = -1;
+ dht_local_t *local = NULL;
+ char src_gfid[GF_UUID_BUF_SIZE] = {0};
+ char dst_gfid[GF_UUID_BUF_SIZE] = {0};
+ dht_conf_t *conf = NULL;
+ int i = 0;
+
+ local = frame->local;
+ conf = this->private;
+
+ if (op_ret < 0) {
+ uuid_utoa_r (local->loc.inode->gfid, src_gfid);
+
+ if (local->loc2.inode)
+ uuid_utoa_r (local->loc2.inode->gfid, dst_gfid);
+
+ gf_msg (this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_INODE_LK_ERROR,
+ "acquiring inodelk failed "
+ "rename (%s:%s:%s %s:%s:%s)",
+ local->loc.path, src_gfid, local->src_cached->name,
+ local->loc2.path, dst_gfid,
+ local->dst_cached ? local->dst_cached->name : NULL);
+
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto err;
+ }
+ local->fd = fd_create (local->loc.inode, frame->root->pid);
+ if (!local->fd) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- conf = frame->this->private;
- local = frame->local;
+ local->op_ret = 0;
+
+ if (!local->dst_cached) {
+ dht_rename_dir_do (frame, this);
+ return 0;
+ }
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND (frame, dht_rename_opendir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->opendir,
+ &local->loc2, local->fd, NULL);
+ }
+
+ return 0;
+
+err:
+ /* No harm in calling an extra unlock */
+ dht_rename_unlock (frame, this);
+ return 0;
+}
- local->call_cnt = conf->subvolume_cnt;
+int
+dht_rename_dir (call_frame_t *frame, xlator_t *this)
+{
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ dht_lock_t **lk_array = NULL;
+ dht_layout_t *dst_layout = NULL;
+ xlator_t *first_subvol = NULL;
+ loc_t parent_loc = {0, };
+ int count = 1;
+ int i = 0;
+ int j = 0;
+ int ret = 0;
+ int op_errno = -1;
+
+ conf = frame->this->private;
+ local = frame->local;
+
+ /* We must take a lock on all the subvols with src gfid.
+ * Along with this if dst exists we must take lock on
+ * any one subvol with dst gfid.
+ */
+ count = local->call_cnt = conf->subvolume_cnt;
+ if (local->loc2.inode) {
+ dst_layout = dht_layout_get (this, local->loc2.inode);
+ if (dst_layout)
+ ++count;
+ } else if (gf_uuid_compare (local->loc.parent->gfid,
+ local->loc2.parent->gfid)) {
+ dst_layout = dht_layout_get (this, local->loc2.parent);
+ if (dst_layout)
+ ++count;
+ }
for (i = 0; i < conf->subvolume_cnt; i++) {
if (!conf->subvolume_status[i]) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_RENAME_FAILED,
+ "Rename dir failed: subvolume down (%s)",
+ conf->subvolumes[i]->name);
op_errno = ENOTCONN;
goto err;
}
}
- local->fd = fd_create (local->loc.inode, frame->root->pid);
- if (!local->fd) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
+ lk_array = GF_CALLOC (count, sizeof (*lk_array), gf_common_mt_char);
+ if (lk_array == NULL) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- local->op_ret = 0;
+ /* Rename must take locks on src to avoid lookup selfheal from
+ * recreating src on those subvols where the rename was successful.
+ * Rename must take locks on all subvols with src because selfheal
+ * in entry creation phase may not have acquired lock on all subvols.
+ */
+ for (i = 0; i < local->call_cnt; i++) {
+ lk_array[i] = dht_lock_new (frame->this,
+ conf->subvolumes[i],
+ &local->loc, F_WRLCK,
+ DHT_LAYOUT_HEAL_DOMAIN);
+ if (lk_array[i] == NULL) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ }
- if (!local->dst_cached) {
- dht_rename_dir_do (frame, this);
- return 0;
- }
+ /* If the dst exists, we are going to replace dst layout range with
+ * that of src. This will lead to anomalies in dst layout until the
+ * rename completes. To avoid a lookup selfheal to change dst layout
+ * during this interval we take a lock on one subvol of dst.
+ */
+ for (j = 0; dst_layout && (j < dst_layout->cnt) &&
+ (dst_layout->list[j].err == 0); j++) {
+
+ first_subvol = dst_layout->list[j].xlator;
+ if (local->loc2.inode) {
+ lk_array[i] = dht_lock_new (frame->this, first_subvol,
+ &local->loc2, F_WRLCK,
+ DHT_LAYOUT_HEAL_DOMAIN);
+ } else {
+ ret = dht_build_parent_loc (this, &parent_loc,
+ &local->loc2, &op_errno);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ DHT_MSG_NO_MEMORY,
+ "parent loc build failed");
+ goto err;
+ }
+
+ lk_array[i] = dht_lock_new (frame->this, first_subvol,
+ &parent_loc, F_WRLCK,
+ DHT_LAYOUT_HEAL_DOMAIN);
+ }
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (frame, dht_rename_opendir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->opendir,
- &local->loc2, local->fd);
- }
+ if (lk_array[i] == NULL) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ break;
+ }
- return 0;
+ if (!lk_array[i])
+ --count;
+
+ local->lock.locks = lk_array;
+ local->lock.lk_count = count;
+
+ ret = dht_blocking_inodelk (frame, lk_array, count,
+ IGNORE_ENOENT_ESTALE,
+ dht_rename_dir_lock_cbk);
+ if (ret < 0) {
+ local->lock.locks = NULL;
+ local->lock.lk_count = 0;
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ loc_wipe (&parent_loc);
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
- return 0;
+ if (lk_array != NULL) {
+ dht_lock_array_free (lk_array, count);
+ GF_FREE (lk_array);
+ }
+
+ loc_wipe (&parent_loc);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL, NULL);
+ return 0;
+}
+
+static int
+dht_rename_track_for_changelog (xlator_t *this, dict_t *xattr,
+ loc_t *oldloc, loc_t *newloc)
+{
+ int ret = -1;
+ dht_changelog_rename_info_t *info = NULL;
+ char *name = NULL;
+ int len1 = 0;
+ int len2 = 0;
+ int size = 0;
+
+ if (!xattr || !oldloc || !newloc || !this)
+ return ret;
+
+ len1 = strlen (oldloc->name) + 1;
+ len2 = strlen (newloc->name) + 1;
+ size = sizeof (dht_changelog_rename_info_t) + len1 + len2;
+
+ info = GF_CALLOC (size, sizeof(char), gf_common_mt_char);
+ if (!info) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_DICT_SET_FAILED,
+ "Failed to calloc memory");
+ return ret;
+ }
+
+ gf_uuid_copy (info->old_pargfid, oldloc->pargfid);
+ gf_uuid_copy (info->new_pargfid, newloc->pargfid);
+
+ info->oldname_len = len1;
+ info->newname_len = len2;
+ strncpy (info->buffer, oldloc->name, len1);
+ name = info->buffer + len1;
+ strncpy (name, newloc->name, len2);
+
+ ret = dict_set_bin (xattr, DHT_CHANGELOG_RENAME_OP_KEY,
+ info, size);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value: key = %s,"
+ " path = %s", DHT_CHANGELOG_RENAME_OP_KEY,
+ oldloc->name);
+ GF_FREE (info);
+ }
+
+ return ret;
+}
+
+
+
+#define DHT_MARKER_DONT_ACCOUNT(xattr) do { \
+ int tmp = -1; \
+ if (!xattr) { \
+ xattr = dict_new (); \
+ if (!xattr) \
+ break; \
+ } \
+ tmp = dict_set_str (xattr, GLUSTERFS_MARKER_DONT_ACCOUNT_KEY, \
+ "yes"); \
+ if (tmp) { \
+ gf_msg (this->name, GF_LOG_ERROR, 0, \
+ DHT_MSG_DICT_SET_FAILED, \
+ "Failed to set dictionary value: key = %s," \
+ " path = %s",GLUSTERFS_MARKER_DONT_ACCOUNT_KEY, \
+ local->loc.path); \
+ } \
+ }while (0)
+
+
+#define DHT_CHANGELOG_TRACK_AS_RENAME(xattr, oldloc, newloc) do { \
+ int tmp = -1; \
+ if (!xattr) { \
+ xattr = dict_new (); \
+ if (!xattr) { \
+ gf_msg (this->name, GF_LOG_ERROR, 0, \
+ DHT_MSG_DICT_SET_FAILED, \
+ "Failed to create dictionary to " \
+ "track rename"); \
+ break; \
+ } \
+ } \
+ \
+ tmp = dht_rename_track_for_changelog (this, xattr, \
+ oldloc, newloc); \
+ \
+ if (tmp) { \
+ gf_msg (this->name, GF_LOG_ERROR, 0, \
+ DHT_MSG_DICT_SET_FAILED, \
+ "Failed to set dictionary value: key = %s," \
+ " path = %s", DHT_CHANGELOG_RENAME_OP_KEY, \
+ (oldloc)->path); \
+ } \
+ } while (0)
+
+int
+dht_rename_unlock_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+
+ dht_set_fixed_dir_stat (&local->preoldparent);
+ dht_set_fixed_dir_stat (&local->postoldparent);
+ dht_set_fixed_dir_stat (&local->preparent);
+ dht_set_fixed_dir_stat (&local->postparent);
+
+ if (IA_ISREG (local->stbuf.ia_type))
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
+
+ DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
+ &local->stbuf, &local->preoldparent,
+ &local->postoldparent, &local->preparent,
+ &local->postparent, local->xattr);
+ return 0;
+}
+
+int
+dht_rename_unlock (call_frame_t *frame, xlator_t *this)
+{
+ dht_local_t *local = NULL;
+ int op_ret = -1;
+ char src_gfid[GF_UUID_BUF_SIZE] = {0};
+ char dst_gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ op_ret = dht_unlock_inodelk (frame, local->lock.locks,
+ local->lock.lk_count,
+ dht_rename_unlock_cbk);
+ if (op_ret < 0) {
+ uuid_utoa_r (local->loc.inode->gfid, src_gfid);
+
+ if (local->loc2.inode)
+ uuid_utoa_r (local->loc2.inode->gfid, dst_gfid);
+
+ if (IA_ISREG (local->stbuf.ia_type))
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_UNLOCKING_FAILED,
+ "winding unlock inodelk failed "
+ "rename (%s:%s:%s %s:%s:%s), "
+ "stale locks left on bricks",
+ local->loc.path, src_gfid,
+ local->src_cached->name,
+ local->loc2.path, dst_gfid,
+ local->dst_cached ?
+ local->dst_cached->name : NULL);
+ else
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_UNLOCKING_FAILED,
+ "winding unlock inodelk failed "
+ "rename (%s:%s %s:%s), "
+ "stale locks left on bricks",
+ local->loc.path, src_gfid,
+ local->loc2.path, dst_gfid);
+
+ dht_rename_unlock_cbk (frame, NULL, this, 0, 0, NULL);
+ }
+
+ return 0;
}
+int
+dht_rename_done (call_frame_t *frame, xlator_t *this)
+{
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->linked == _gf_true) {
+ local->linked = _gf_false;
+ dht_linkfile_attr_heal (frame, this);
+ }
+
+ dht_rename_unlock (frame, this);
+ return 0;
+}
int
dht_rename_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- int this_call_cnt = 0;
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int this_call_cnt = 0;
- local = frame->local;
- prev = cookie;
+ local = frame->local;
+ prev = cookie;
if (!local) {
- gf_log (this->name, GF_LOG_ERROR,
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_INVALID_VALUE,
"!local, should not happen");
goto out;
}
- this_call_cnt = dht_frame_return (frame);
+ this_call_cnt = dht_frame_return (frame);
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "unlink on %s failed (%s)",
- prev->this->name, strerror (op_errno));
- }
+ if (op_ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_UNLINK_FAILED,
+ "%s: Rename: unlink on %s failed ",
+ local->loc.path, prev->this->name);
+ }
WIPE (&local->preoldparent);
WIPE (&local->postoldparent);
WIPE (&local->preparent);
WIPE (&local->postparent);
- if (is_last_call (this_call_cnt)) {
- DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
- &local->stbuf, &local->preoldparent,
- &local->postoldparent, &local->preparent,
- &local->postparent);
+ if (is_last_call (this_call_cnt)) {
+ dht_rename_done (frame, this);
}
out:
- return 0;
+ return 0;
}
int
dht_rename_cleanup (call_frame_t *frame)
{
- dht_local_t *local = NULL;
- xlator_t *this = NULL;
- xlator_t *src_hashed = NULL;
- xlator_t *src_cached = NULL;
- xlator_t *dst_hashed = NULL;
- xlator_t *dst_cached = NULL;
- int call_cnt = 0;
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ xlator_t *src_hashed = NULL;
+ xlator_t *src_cached = NULL;
+ xlator_t *dst_hashed = NULL;
+ xlator_t *dst_cached = NULL;
+ int call_cnt = 0;
+ dict_t *xattr = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ this = frame->this;
+
+ src_hashed = local->src_hashed;
+ src_cached = local->src_cached;
+ dst_hashed = local->dst_hashed;
+ dst_cached = local->dst_cached;
+
+ if (src_cached == dst_cached)
+ goto nolinks;
+ if (local->linked && (dst_hashed != src_hashed )&&
+ (dst_hashed != src_cached)) {
+ call_cnt++;
+ }
- local = frame->local;
- this = frame->this;
+ if (local->added_link && (src_cached != dst_hashed)) {
+ call_cnt++;
+ }
- src_hashed = local->src_hashed;
- src_cached = local->src_cached;
- dst_hashed = local->dst_hashed;
- dst_cached = local->dst_cached;
+ local->call_cnt = call_cnt;
- if (src_cached == dst_cached)
- goto nolinks;
+ if (!call_cnt)
+ goto nolinks;
- if (dst_hashed != src_hashed && dst_hashed != src_cached)
- call_cnt++;
+ DHT_MARK_FOP_INTERNAL (xattr);
- if (src_cached != dst_hashed)
- call_cnt++;
+ gf_uuid_unparse(local->loc.inode->gfid, gfid);
- local->call_cnt = call_cnt;
+ if (local->linked && (dst_hashed != src_hashed) &&
+ (dst_hashed != src_cached)) {
+ dict_t *xattr_new = NULL;
- if (!call_cnt)
- goto nolinks;
+ gf_msg_trace (this->name, 0,
+ "unlinking linkfile %s @ %s => %s, (gfid = %s)",
+ local->loc.path, dst_hashed->name,
+ src_cached->name, gfid);
+
+ xattr_new = dict_copy_with_ref (xattr, NULL);
+
+
+ DHT_MARKER_DONT_ACCOUNT(xattr_new);
- if (dst_hashed != src_hashed && dst_hashed != src_cached) {
- gf_log (this->name, GF_LOG_TRACE,
- "unlinking linkfile %s @ %s => %s",
- local->loc.path, dst_hashed->name, src_cached->name);
STACK_WIND (frame, dht_rename_unlink_cbk,
dst_hashed, dst_hashed->fops->unlink,
- &local->loc);
- }
+ &local->loc, 0, xattr_new);
- if (src_cached != dst_hashed) {
- gf_log (this->name, GF_LOG_TRACE,
- "unlinking link %s => %s (%s)", local->loc.path,
- local->loc2.path, src_cached->name);
- STACK_WIND (frame, dht_rename_unlink_cbk,
- src_cached, src_cached->fops->unlink,
- &local->loc2);
- }
+ dict_unref (xattr_new);
+ xattr_new = NULL;
+ }
+
+ if (local->added_link && (src_cached != dst_hashed)) {
+ dict_t *xattr_new = NULL;
+
+ gf_msg_trace (this->name, 0,
+ "unlinking link %s => %s (%s), (gfid = %s)",
+ local->loc.path, local->loc2.path,
+ src_cached->name, gfid);
+
+ xattr_new = dict_copy_with_ref (xattr, NULL);
+
+ if (gf_uuid_compare (local->loc.pargfid,
+ local->loc2.pargfid) == 0) {
+ DHT_MARKER_DONT_ACCOUNT(xattr_new);
+ }
+
+ STACK_WIND (frame, dht_rename_unlink_cbk,
+ src_cached, src_cached->fops->unlink,
+ &local->loc2, 0, xattr_new);
+
+ dict_unref (xattr_new);
+ xattr_new = NULL;
+ }
+
+ if (xattr)
+ dict_unref (xattr);
return 0;
@@ -351,10 +765,36 @@ nolinks:
WIPE (&local->preparent);
WIPE (&local->postparent);
- DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
- &local->stbuf, &local->preoldparent,
- &local->postoldparent, &local->preparent,
- &local->postparent);
+ dht_rename_unlock (frame, this);
+ return 0;
+}
+
+
+int
+dht_rename_links_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+
+ prev = cookie;
+ local = frame->local;
+
+ if (op_ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_CREATE_LINK_FAILED,
+ "link/file %s on %s failed",
+ local->loc.path, prev->this->name);
+ }
+
+ if (local->linked == _gf_true) {
+ local->linked = _gf_false;
+ dht_linkfile_attr_heal (frame, this);
+ }
+ DHT_STACK_DESTROY (frame);
return 0;
}
@@ -362,104 +802,205 @@ nolinks:
int
dht_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
+ int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- xlator_t *src_hashed = NULL;
- xlator_t *src_cached = NULL;
- xlator_t *dst_hashed = NULL;
- xlator_t *dst_cached = NULL;
- xlator_t *rename_subvol = NULL;
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *src_hashed = NULL;
+ xlator_t *src_cached = NULL;
+ xlator_t *dst_hashed = NULL;
+ xlator_t *dst_cached = NULL;
+ xlator_t *rename_subvol = NULL;
+ call_frame_t *link_frame = NULL;
+ dht_local_t *link_local = NULL;
+ dict_t *xattr = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ src_hashed = local->src_hashed;
+ src_cached = local->src_cached;
+ dst_hashed = local->dst_hashed;
+ dst_cached = local->dst_cached;
+
+ if (local->linked == _gf_true)
+ FRAME_SU_UNDO (frame, dht_local_t);
+
+ /* It is a critical failure iff we fail to rename the cached file
+ * if the rename of the linkto failed, it is not a critical failure,
+ * and we do not want to lose the created hard link for the new
+ * name as that could have been read by other clients.
+ *
+ * NOTE: If another client is attempting the same oldname -> newname
+ * rename, and finds both file names as existing, and are hard links
+ * to each other, then FUSE would send in an unlink for oldname. In
+ * this time duration if we treat the linkto as a critical error and
+ * unlink the newname we created, we would have effectively lost the
+ * file to rename operations.
+ *
+ * Repercussions of treating this as a non-critical error is that
+ * we could leave behind a stale linkto file and/or not create the new
+ * linkto file, the second case would be rectified by a subsequent
+ * lookup, the first case by a rebalance, like for all stale linkto
+ * files */
+
+ if (op_ret == -1) {
+ /* Critical failure: unable to rename the cached file */
+ if (prev->this == src_cached) {
+ gf_msg (this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_RENAME_FAILED,
+ "%s: Rename on %s failed, (gfid = %s) ",
+ local->loc.path, prev->this->name,
+ local->loc.inode ?
+ uuid_utoa(local->loc.inode->gfid):"");
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto cleanup;
+ } else {
+ /* Non-critical failure, unable to rename the linkto
+ * file
+ */
+ gf_msg (this->name, GF_LOG_INFO, op_errno,
+ DHT_MSG_RENAME_FAILED,
+ "%s: Rename (linkto file) on %s failed, "
+ "(gfid = %s) ",
+ local->loc.path, prev->this->name,
+ local->loc.inode ?
+ uuid_utoa(local->loc.inode->gfid):"");
+ }
+ }
+ if (xdata) {
+ if (!local->xattr)
+ local->xattr = dict_ref (xdata);
+ else
+ local->xattr = dict_copy_with_ref (xdata, local->xattr);
+ }
- local = frame->local;
- prev = cookie;
+ if ((src_cached == dst_cached) && (dst_hashed != dst_cached)) {
+ link_frame = copy_frame (frame);
+ if (!link_frame) {
+ goto err;
+ }
- src_hashed = local->src_hashed;
- src_cached = local->src_cached;
- dst_hashed = local->dst_hashed;
- dst_cached = local->dst_cached;
+ /* fop value sent as maxvalue because it is not used
+ anywhere in this case */
+ link_local = dht_local_init (link_frame, &local->loc2, NULL,
+ GF_FOP_MAXVALUE);
+ if (!link_local) {
+ goto err;
+ }
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "rename on %s failed (%s)", prev->this->name,
- strerror (op_errno));
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- goto cleanup;
- }
+ if (link_local->loc.inode)
+ inode_unref (link_local->loc.inode);
+ link_local->loc.inode = inode_ref (local->loc.inode);
+ gf_uuid_copy (link_local->gfid, local->loc.inode->gfid);
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
- dht_iatt_merge (this, &local->preoldparent, preoldparent, prev->this);
- dht_iatt_merge (this, &local->postoldparent, postoldparent, prev->this);
- dht_iatt_merge (this, &local->preparent, prenewparent, prev->this);
- dht_iatt_merge (this, &local->postparent, postnewparent, prev->this);
-
- local->stbuf.ia_ino = local->loc.inode->ino;
-
- local->preoldparent.ia_ino = local->loc.parent->ino;
- local->postoldparent.ia_ino = local->loc.parent->ino;
-
- local->preparent.ia_ino = local->loc2.parent->ino;
- local->postparent.ia_ino = local->loc2.parent->ino;
-
- /* NOTE: rename_subvol is the same subvolume from which dht_rename_cbk
- * is called. since rename has already happened on rename_subvol,
- * unlink should not be sent for oldpath (either linkfile or cached-file)
- * on rename_subvol. */
- if (src_cached == dst_cached)
- rename_subvol = src_cached;
- else
- rename_subvol = dst_hashed;
-
- /* TODO: delete files in background */
-
- if (src_cached != dst_hashed && src_cached != dst_cached)
- local->call_cnt++;
-
- if (src_hashed != rename_subvol && src_hashed != src_cached)
- local->call_cnt++;
-
- if (dst_cached && dst_cached != dst_hashed && dst_cached != src_cached)
- local->call_cnt++;
-
- if (local->call_cnt == 0)
- goto unwind;
-
- if (src_cached != dst_hashed && src_cached != dst_cached) {
- gf_log (this->name, GF_LOG_TRACE,
- "deleting old src datafile %s @ %s",
- local->loc.path, src_cached->name);
-
- STACK_WIND (frame, dht_rename_unlink_cbk,
- src_cached, src_cached->fops->unlink,
- &local->loc);
- }
+ dht_linkfile_create (link_frame, dht_rename_links_create_cbk,
+ this, src_cached, dst_hashed,
+ &link_local->loc);
+ }
- if (src_hashed != rename_subvol && src_hashed != src_cached) {
- gf_log (this->name, GF_LOG_TRACE,
- "deleting old src linkfile %s @ %s",
- local->loc.path, src_hashed->name);
+err:
+ /* Merge attrs only from src_cached. In case there of src_cached !=
+ * dst_hashed, this ignores linkfile attrs. */
+ if (prev->this == src_cached) {
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+ dht_iatt_merge (this, &local->preoldparent, preoldparent,
+ prev->this);
+ dht_iatt_merge (this, &local->postoldparent, postoldparent,
+ prev->this);
+ dht_iatt_merge (this, &local->preparent, prenewparent,
+ prev->this);
+ dht_iatt_merge (this, &local->postparent, postnewparent,
+ prev->this);
+ }
- STACK_WIND (frame, dht_rename_unlink_cbk,
- src_hashed, src_hashed->fops->unlink,
- &local->loc);
- }
- if (dst_cached
- && (dst_cached != dst_hashed)
- && (dst_cached != src_cached)) {
- gf_log (this->name, GF_LOG_TRACE,
- "deleting old dst datafile %s @ %s",
- local->loc2.path, dst_cached->name);
+ /* NOTE: rename_subvol is the same subvolume from which dht_rename_cbk
+ * is called. since rename has already happened on rename_subvol,
+ * unlink should not be sent for oldpath (either linkfile or cached-file)
+ * on rename_subvol. */
+ if (src_cached == dst_cached)
+ rename_subvol = src_cached;
+ else
+ rename_subvol = dst_hashed;
- STACK_WIND (frame, dht_rename_unlink_cbk,
- dst_cached, dst_cached->fops->unlink,
- &local->loc2);
- }
- return 0;
+ /* TODO: delete files in background */
+
+ if (src_cached != dst_hashed && src_cached != dst_cached)
+ local->call_cnt++;
+
+ if (src_hashed != rename_subvol && src_hashed != src_cached)
+ local->call_cnt++;
+
+ if (dst_cached && dst_cached != dst_hashed && dst_cached != src_cached)
+ local->call_cnt++;
+
+ if (local->call_cnt == 0)
+ goto unwind;
+
+ DHT_MARK_FOP_INTERNAL (xattr);
+
+ if (src_cached != dst_hashed && src_cached != dst_cached) {
+ dict_t *xattr_new = NULL;
+
+ xattr_new = dict_copy_with_ref (xattr, NULL);
+
+ gf_msg_trace (this->name, 0,
+ "deleting old src datafile %s @ %s",
+ local->loc.path, src_cached->name);
+
+ if (gf_uuid_compare (local->loc.pargfid,
+ local->loc2.pargfid) == 0) {
+ DHT_MARKER_DONT_ACCOUNT(xattr_new);
+ }
+
+ DHT_CHANGELOG_TRACK_AS_RENAME(xattr_new, &local->loc,
+ &local->loc2);
+ STACK_WIND (frame, dht_rename_unlink_cbk,
+ src_cached, src_cached->fops->unlink,
+ &local->loc, 0, xattr_new);
+
+ dict_unref (xattr_new);
+ xattr_new = NULL;
+ }
+
+ if (src_hashed != rename_subvol && src_hashed != src_cached) {
+ dict_t *xattr_new = NULL;
+
+ xattr_new = dict_copy_with_ref (xattr, NULL);
+
+ gf_msg_trace (this->name, 0,
+ "deleting old src linkfile %s @ %s",
+ local->loc.path, src_hashed->name);
+
+ DHT_MARKER_DONT_ACCOUNT(xattr_new);
+
+ STACK_WIND (frame, dht_rename_unlink_cbk,
+ src_hashed, src_hashed->fops->unlink,
+ &local->loc, 0, xattr_new);
+
+ dict_unref (xattr_new);
+ xattr_new = NULL;
+ }
+
+ if (dst_cached
+ && (dst_cached != dst_hashed)
+ && (dst_cached != src_cached)) {
+ gf_msg_trace (this->name, 0,
+ "deleting old dst datafile %s @ %s",
+ local->loc2.path, dst_cached->name);
+
+ STACK_WIND (frame, dht_rename_unlink_cbk,
+ dst_cached, dst_cached->fops->unlink,
+ &local->loc2, 0, xattr);
+ }
+ if (xattr)
+ dict_unref (xattr);
+ return 0;
unwind:
WIPE (&local->preoldparent);
@@ -467,12 +1008,9 @@ unwind:
WIPE (&local->preparent);
WIPE (&local->postparent);
- DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
- &local->stbuf, &local->preoldparent,
- &local->postoldparent, &local->preparent,
- &local->postparent);
+ dht_rename_done (frame, this);
- return 0;
+ return 0;
cleanup:
dht_rename_cleanup (frame);
@@ -484,67 +1022,167 @@ cleanup:
int
dht_do_rename (call_frame_t *frame)
{
- dht_local_t *local = NULL;
- xlator_t *dst_hashed = NULL;
- xlator_t *src_cached = NULL;
- xlator_t *dst_cached = NULL;
- xlator_t *this = NULL;
- xlator_t *rename_subvol = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *dst_hashed = NULL;
+ xlator_t *src_cached = NULL;
+ xlator_t *dst_cached = NULL;
+ xlator_t *this = NULL;
+ xlator_t *rename_subvol = NULL;
+
+ local = frame->local;
+ this = frame->this;
+
+ dst_hashed = local->dst_hashed;
+ dst_cached = local->dst_cached;
+ src_cached = local->src_cached;
+
+ if (src_cached == dst_cached)
+ rename_subvol = src_cached;
+ else
+ rename_subvol = dst_hashed;
+
+ if ((src_cached != dst_hashed) && (rename_subvol == dst_hashed)) {
+ DHT_MARKER_DONT_ACCOUNT(local->xattr_req);
+ }
+ if (rename_subvol == src_cached) {
+ DHT_CHANGELOG_TRACK_AS_RENAME(local->xattr_req, &local->loc,
+ &local->loc2);
+ }
- local = frame->local;
- this = frame->this;
+ gf_msg_trace (this->name, 0,
+ "renaming %s => %s (%s)",
+ local->loc.path, local->loc2.path, rename_subvol->name);
- dst_hashed = local->dst_hashed;
- dst_cached = local->dst_cached;
- src_cached = local->src_cached;
+ if (local->linked == _gf_true)
+ FRAME_SU_DO (frame, dht_local_t);
+ STACK_WIND (frame, dht_rename_cbk,
+ rename_subvol, rename_subvol->fops->rename,
+ &local->loc, &local->loc2, local->xattr_req);
+ return 0;
+}
- if (src_cached == dst_cached)
- rename_subvol = src_cached;
- else
- rename_subvol = dst_hashed;
+int
+dht_rename_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ if (op_ret == -1) {
+ gf_msg_debug (this->name, 0,
+ "link/file on %s failed (%s)",
+ prev->this->name, strerror (op_errno));
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ local->added_link = _gf_false;
+ } else
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
- gf_log (this->name, GF_LOG_TRACE,
- "renaming %s => %s (%s)",
- local->loc.path, local->loc2.path, rename_subvol->name);
+ if (local->op_ret == -1)
+ goto cleanup;
- STACK_WIND (frame, dht_rename_cbk,
- rename_subvol, rename_subvol->fops->rename,
- &local->loc, &local->loc2);
+ dht_do_rename (frame);
- return 0;
-}
+ return 0;
+
+cleanup:
+ dht_rename_cleanup (frame);
+ return 0;
+}
int
-dht_rename_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
+dht_rename_linkto_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *src_cached = NULL;
+ dict_t *xattr = NULL;
+
+ local = frame->local;
+ DHT_MARK_FOP_INTERNAL (xattr);
+ prev = cookie;
+ src_cached = local->src_cached;
+
+ if (op_ret == -1) {
+ gf_msg_debug (this->name, 0,
+ "link/file on %s failed (%s)",
+ prev->this->name, strerror (op_errno));
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ }
+
+ /* If linkto creation failed move to failure cleanup code,
+ * instead of continuing with creating the link file */
+ if (local->op_ret != 0) {
+ goto cleanup;
+ }
+
+ gf_msg_trace (this->name, 0,
+ "link %s => %s (%s)", local->loc.path,
+ local->loc2.path, src_cached->name);
+ if (gf_uuid_compare (local->loc.pargfid,
+ local->loc2.pargfid) == 0) {
+ DHT_MARKER_DONT_ACCOUNT(xattr);
+ }
+
+ local->added_link = _gf_true;
+
+ STACK_WIND (frame, dht_rename_link_cbk,
+ src_cached, src_cached->fops->link,
+ &local->loc, &local->loc2, xattr);
+
+ if (xattr)
+ dict_unref (xattr);
+
+ return 0;
+
+cleanup:
+ dht_rename_cleanup (frame);
+
+ if (xattr)
+ dict_unref (xattr);
+
+ return 0;
+}
+
+int
+dht_rename_unlink_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
- int this_call_cnt = 0;
local = frame->local;
prev = cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "link/file on %s failed (%s)",
- prev->this->name, strerror (op_errno));
+
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ gf_msg_debug (this->name, 0,
+ "unlink of %s on %s failed (%s)",
+ local->loc2.path, prev->this->name,
+ strerror (op_errno));
local->op_ret = -1;
local->op_errno = op_errno;
}
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- if (local->op_ret == -1)
- goto cleanup;
-
- dht_do_rename (frame);
- }
+ if (local->op_ret == -1)
+ goto cleanup;
+
+ dht_do_rename (frame);
return 0;
@@ -558,155 +1196,386 @@ cleanup:
int
dht_rename_create_links (call_frame_t *frame)
{
- dht_local_t *local = NULL;
- xlator_t *this = NULL;
- xlator_t *src_hashed = NULL;
- xlator_t *src_cached = NULL;
- xlator_t *dst_hashed = NULL;
- xlator_t *dst_cached = NULL;
- int call_cnt = 0;
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ xlator_t *src_hashed = NULL;
+ xlator_t *src_cached = NULL;
+ xlator_t *dst_hashed = NULL;
+ xlator_t *dst_cached = NULL;
+ int call_cnt = 0;
+ dict_t *xattr = NULL;
- local = frame->local;
- this = frame->this;
+ local = frame->local;
+ this = frame->this;
- src_hashed = local->src_hashed;
- src_cached = local->src_cached;
- dst_hashed = local->dst_hashed;
- dst_cached = local->dst_cached;
+ src_hashed = local->src_hashed;
+ src_cached = local->src_cached;
+ dst_hashed = local->dst_hashed;
+ dst_cached = local->dst_cached;
- if (src_cached == dst_cached)
- goto nolinks;
+ DHT_MARK_FOP_INTERNAL (xattr);
- if (dst_hashed != src_hashed && dst_hashed != src_cached)
- call_cnt++;
+ if (src_cached == dst_cached) {
+ dict_t *xattr_new = NULL;
- if (src_cached != dst_hashed)
- call_cnt++;
+ if (dst_hashed == dst_cached)
+ goto nolinks;
- local->call_cnt = call_cnt;
+ xattr_new = dict_copy_with_ref (xattr, NULL);
- if (dst_hashed != src_hashed && dst_hashed != src_cached) {
- gf_log (this->name, GF_LOG_TRACE,
- "linkfile %s @ %s => %s",
- local->loc.path, dst_hashed->name, src_cached->name);
- dht_linkfile_create (frame, dht_rename_links_cbk,
- src_cached, dst_hashed, &local->loc);
- }
+ gf_msg_trace (this->name, 0,
+ "unlinking dst linkfile %s @ %s",
+ local->loc2.path, dst_hashed->name);
- if (src_cached != dst_hashed) {
- gf_log (this->name, GF_LOG_TRACE,
- "link %s => %s (%s)", local->loc.path,
- local->loc2.path, src_cached->name);
- STACK_WIND (frame, dht_rename_links_cbk,
- src_cached, src_cached->fops->link,
- &local->loc, &local->loc2);
- }
+ DHT_MARKER_DONT_ACCOUNT(xattr_new);
+
+ STACK_WIND (frame, dht_rename_unlink_links_cbk,
+ dst_hashed, dst_hashed->fops->unlink,
+ &local->loc2, 0, xattr_new);
+
+ dict_unref (xattr_new);
+ if (xattr)
+ dict_unref (xattr);
+
+ return 0;
+ }
+
+ if (src_cached != dst_hashed) {
+ /* needed to create the link file */
+ call_cnt++;
+ if (dst_hashed != src_hashed)
+ /* needed to create the linkto file */
+ call_cnt ++;
+ }
+
+ /* We should not have any failures post the link creation, as this
+ * introduces the newname into the namespace. Clients could have cached
+ * the existence of the newname and may start taking actions based on
+ * the same. Hence create the linkto first, and then attempt the link.
+ *
+ * NOTE: If another client is attempting the same oldname -> newname
+ * rename, and finds both file names as existing, and are hard links
+ * to each other, then FUSE would send in an unlink for oldname. In
+ * this time duration if we treat the linkto as a critical error and
+ * unlink the newname we created, we would have effectively lost the
+ * file to rename operations. */
+ if (dst_hashed != src_hashed && src_cached != dst_hashed) {
+ gf_msg_trace (this->name, 0,
+ "linkfile %s @ %s => %s",
+ local->loc.path, dst_hashed->name,
+ src_cached->name);
+
+ memcpy (local->gfid, local->loc.inode->gfid, 16);
+ dht_linkfile_create (frame, dht_rename_linkto_cbk, this,
+ src_cached, dst_hashed, &local->loc);
+ } else if (src_cached != dst_hashed) {
+ dict_t *xattr_new = NULL;
+
+ xattr_new = dict_copy_with_ref (xattr, NULL);
+
+ gf_msg_trace (this->name, 0,
+ "link %s => %s (%s)", local->loc.path,
+ local->loc2.path, src_cached->name);
+ if (gf_uuid_compare (local->loc.pargfid,
+ local->loc2.pargfid) == 0) {
+ DHT_MARKER_DONT_ACCOUNT(xattr_new);
+ }
+
+ local->added_link = _gf_true;
+
+ STACK_WIND (frame, dht_rename_link_cbk,
+ src_cached, src_cached->fops->link,
+ &local->loc, &local->loc2, xattr_new);
+
+ dict_unref (xattr_new);
+ }
nolinks:
- if (!call_cnt) {
- /* skip to next step */
- dht_do_rename (frame);
- }
+ if (!call_cnt) {
+ /* skip to next step */
+ dht_do_rename (frame);
+ }
+ if (xattr)
+ dict_unref (xattr);
- return 0;
+ return 0;
+}
+
+int
+dht_rename_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ dht_local_t *local = NULL;
+ int call_cnt = 0;
+ dht_conf_t *conf = NULL;
+
+ local = frame->local;
+ conf = this->private;
+
+ if (op_ret < 0) {
+ /* The meaning of is_linkfile is overloaded here. For locking
+ * to work properly both rebalance and rename should acquire
+ * lock on datafile. The reason for sending this lookup is to
+ * find out whether we've acquired a lock on data file.
+ * Between the lookup before rename and this rename, the
+ * file could be migrated by a rebalance process and now this
+ * file this might be a linkto file. We verify that by sending
+ * this lookup. However, if this lookup fails we cannot really
+ * say whether we've acquired lock on a datafile or linkto file.
+ * So, we act conservatively and _assume_
+ * that this is a linkfile and fail the rename operation.
+ */
+ local->is_linkfile = _gf_true;
+ } else if (xattr && check_is_linkfile (inode, stbuf, xattr,
+ conf->link_xattr_name)) {
+ local->is_linkfile = _gf_true;
+ }
+
+ call_cnt = dht_frame_return (frame);
+ if (is_last_call (call_cnt)) {
+ if (local->is_linkfile) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto fail;
+ }
+
+ dht_rename_create_links (frame);
+ }
+
+ return 0;
+fail:
+ dht_rename_unlock (frame, this);
+ return 0;
+}
+
+int32_t
+dht_rename_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ char src_gfid[GF_UUID_BUF_SIZE] = {0};
+ char dst_gfid[GF_UUID_BUF_SIZE] = {0};
+ dict_t *xattr_req = NULL;
+ dht_conf_t *conf = NULL;
+ int i = 0;
+
+ local = frame->local;
+ conf = this->private;
+
+ if (op_ret < 0) {
+ uuid_utoa_r (local->loc.inode->gfid, src_gfid);
+
+ if (local->loc2.inode)
+ uuid_utoa_r (local->loc2.inode->gfid, dst_gfid);
+
+ gf_msg (this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_INODE_LK_ERROR,
+ "acquiring inodelk failed "
+ "rename (%s:%s:%s %s:%s:%s)",
+ local->loc.path, src_gfid, local->src_cached->name,
+ local->loc2.path, dst_gfid,
+ local->dst_cached ? local->dst_cached->name : NULL);
+
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+
+ goto done;
+ }
+
+ xattr_req = dict_new ();
+ if (xattr_req == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto done;
+ }
+
+ op_ret = dict_set_uint32 (xattr_req,
+ conf->link_xattr_name, 256);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ goto done;
+ }
+
+ local->call_cnt = local->lock.lk_count;
+
+ for (i = 0; i < local->lock.lk_count; i++) {
+ STACK_WIND (frame, dht_rename_lookup_cbk,
+ local->lock.locks[i]->xl,
+ local->lock.locks[i]->xl->fops->lookup,
+ &local->lock.locks[i]->loc, xattr_req);
+ }
+
+ dict_unref (xattr_req);
+ return 0;
+
+done:
+ /* Its fine to call unlock even when no locks are acquired, as we check
+ * for lock->locked before winding a unlock call.
+ */
+ dht_rename_unlock (frame, this);
+
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ return 0;
}
+int
+dht_rename_lock (call_frame_t *frame)
+{
+ dht_local_t *local = NULL;
+ int count = 1, ret = -1;
+ dht_lock_t **lk_array = NULL;
+
+ local = frame->local;
+
+ if (local->dst_cached)
+ count++;
+
+ lk_array = GF_CALLOC (count, sizeof (*lk_array), gf_common_mt_char);
+ if (lk_array == NULL)
+ goto err;
+
+ lk_array[0] = dht_lock_new (frame->this, local->src_cached, &local->loc,
+ F_WRLCK, DHT_FILE_MIGRATE_DOMAIN);
+ if (lk_array[0] == NULL)
+ goto err;
+
+ if (local->dst_cached) {
+ lk_array[1] = dht_lock_new (frame->this, local->dst_cached,
+ &local->loc2, F_WRLCK,
+ DHT_FILE_MIGRATE_DOMAIN);
+ if (lk_array[1] == NULL)
+ goto err;
+ }
+
+ local->lock.locks = lk_array;
+ local->lock.lk_count = count;
+
+ ret = dht_blocking_inodelk (frame, lk_array, count,
+ FAIL_ON_ANY_ERROR, dht_rename_lock_cbk);
+ if (ret < 0) {
+ local->lock.locks = NULL;
+ local->lock.lk_count = 0;
+ goto err;
+ }
+
+ return 0;
+err:
+ if (lk_array != NULL) {
+ int tmp_count = 0, i = 0;
+
+ for (i = 0; (i < count) && (lk_array[i]); i++, tmp_count++);
+
+ dht_lock_array_free (lk_array, tmp_count);
+ GF_FREE (lk_array);
+ }
+
+ return -1;
+}
int
dht_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
- xlator_t *src_cached = NULL;
- xlator_t *src_hashed = NULL;
- xlator_t *dst_cached = NULL;
- xlator_t *dst_hashed = NULL;
- int op_errno = -1;
- int ret = -1;
- dht_local_t *local = NULL;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (oldloc, err);
- VALIDATE_OR_GOTO (newloc, err);
-
- src_hashed = dht_subvol_get_hashed (this, oldloc);
- if (!src_hashed) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- oldloc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- src_cached = dht_subvol_get_cached (this, oldloc->inode);
- if (!src_cached) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", oldloc->path);
- op_errno = EINVAL;
- goto err;
- }
+ xlator_t *src_cached = NULL;
+ xlator_t *src_hashed = NULL;
+ xlator_t *dst_cached = NULL;
+ xlator_t *dst_hashed = NULL;
+ int op_errno = -1;
+ int ret = -1;
+ dht_local_t *local = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (oldloc, err);
+ VALIDATE_OR_GOTO (newloc, err);
+
+ gf_uuid_unparse(oldloc->inode->gfid, gfid);
+
+ src_hashed = dht_subvol_get_hashed (this, oldloc);
+ if (!src_hashed) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_RENAME_FAILED,
+ "No hashed subvolume in layout for path=%s,"
+ "(gfid = %s)", oldloc->path, gfid);
+ op_errno = EINVAL;
+ goto err;
+ }
- dst_hashed = dht_subvol_get_hashed (this, newloc);
- if (!dst_hashed) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- newloc->path);
- op_errno = EINVAL;
- goto err;
- }
+ src_cached = dht_subvol_get_cached (this, oldloc->inode);
+ if (!src_cached) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_RENAME_FAILED,
+ "No cached subvolume for path = %s,"
+ "(gfid = %s)", oldloc->path, gfid);
- if (newloc->inode)
- dst_cached = dht_subvol_get_cached (this, newloc->inode);
+ op_errno = EINVAL;
+ goto err;
+ }
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ dst_hashed = dht_subvol_get_hashed (this, newloc);
+ if (!dst_hashed) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_RENAME_FAILED,
+ "No hashed subvolume in layout for path=%s",
+ newloc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
- ret = loc_copy (&local->loc, oldloc);
- if (ret == -1) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ if (newloc->inode)
+ dst_cached = dht_subvol_get_cached (this, newloc->inode);
- ret = loc_copy (&local->loc2, newloc);
- if (ret == -1) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ local = dht_local_init (frame, oldloc, NULL, GF_FOP_RENAME);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ /* cached_subvol will be set from dht_local_init, reset it to NULL,
+ as the logic of handling rename is different */
+ local->cached_subvol = NULL;
+
+ ret = loc_copy (&local->loc2, newloc);
+ if (ret == -1) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- local->src_hashed = src_hashed;
- local->src_cached = src_cached;
- local->dst_hashed = dst_hashed;
- local->dst_cached = dst_cached;
-
- gf_log (this->name, GF_LOG_TRACE,
- "renaming %s (hash=%s/cache=%s) => %s (hash=%s/cache=%s)",
- oldloc->path, src_hashed->name, src_cached->name,
- newloc->path, dst_hashed->name,
- dst_cached ? dst_cached->name : "<nul>");
-
- if (IA_ISDIR (oldloc->inode->ia_type)) {
- dht_rename_dir (frame, this);
- } else {
- local->op_ret = 0;
- dht_rename_create_links (frame);
- }
+ local->src_hashed = src_hashed;
+ local->src_cached = src_cached;
+ local->dst_hashed = dst_hashed;
+ local->dst_cached = dst_cached;
+ if (xdata)
+ local->xattr_req = dict_ref (xdata);
+
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_RENAME_INFO,
+ "renaming %s (hash=%s/cache=%s) => %s (hash=%s/cache=%s)",
+ oldloc->path, src_hashed->name, src_cached->name,
+ newloc->path, dst_hashed->name,
+ dst_cached ? dst_cached->name : "<nul>");
+
+ if (IA_ISDIR (oldloc->inode->ia_type)) {
+ dht_rename_dir (frame, this);
+ } else {
+ local->op_ret = 0;
+ ret = dht_rename_lock (frame);
+ if (ret < 0)
+ goto err;
+ }
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL, NULL);
- return 0;
+ return 0;
}
diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c
index 9270952e113..d3de1b76795 100644
--- a/xlators/cluster/dht/src/dht-selfheal.c
+++ b/xlators/cluster/dht/src/dht-selfheal.c
@@ -1,298 +1,1522 @@
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include "glusterfs.h"
#include "xlator.h"
#include "dht-common.h"
+#include "dht-messages.h"
+#include "glusterfs-acl.h"
+
+#define DHT_SET_LAYOUT_RANGE(layout,i,srt,chunk,path) do { \
+ layout->list[i].start = srt; \
+ layout->list[i].stop = srt + chunk - 1; \
+ layout->list[i].commit_hash = layout->commit_hash; \
+ \
+ gf_msg_trace (this->name, 0, \
+ "gave fix: %u - %u, with commit-hash %u" \
+ " on %s for %s", \
+ layout->list[i].start, \
+ layout->list[i].stop, \
+ layout->list[i].commit_hash, \
+ layout->list[i].xlator->name, path); \
+ } while (0)
+
+#define DHT_RESET_LAYOUT_RANGE(layout) do { \
+ int cnt = 0; \
+ for (cnt = 0; cnt < layout->cnt; cnt++ ) { \
+ layout->list[cnt].start = 0; \
+ layout->list[cnt].stop = 0; \
+ } \
+ } while (0)
+
+int
+dht_selfheal_layout_lock (call_frame_t *frame, dht_layout_t *layout,
+ gf_boolean_t newdir,
+ dht_selfheal_layout_t healer,
+ dht_need_heal_t should_heal);
+
+static uint32_t
+dht_overlap_calc (dht_layout_t *old, int o, dht_layout_t *new, int n)
+{
+ if (o >= old->cnt || n >= new->cnt)
+ return 0;
+
+ if (old->list[o].err > 0 || new->list[n].err > 0)
+ return 0;
+
+ if (old->list[o].start == old->list[o].stop) {
+ return 0;
+ }
+
+ if (new->list[n].start == new->list[n].stop) {
+ return 0;
+ }
+
+ if ((old->list[o].start > new->list[n].stop) ||
+ (old->list[o].stop < new->list[n].start))
+ return 0;
+
+ return min (old->list[o].stop, new->list[n].stop) -
+ max (old->list[o].start, new->list[n].start) + 1;
+}
+
+int
+dht_selfheal_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ DHT_STACK_DESTROY (frame);
+ return 0;
+}
+
+int
+dht_selfheal_dir_finish (call_frame_t *frame, xlator_t *this, int ret,
+ int invoke_cbk)
+{
+ dht_local_t *local = NULL, *lock_local = NULL;
+ call_frame_t *lock_frame = NULL;
+ int lock_count = 0;
+
+ local = frame->local;
+ lock_count = dht_lock_count (local->lock.locks, local->lock.lk_count);
+ if (lock_count == 0)
+ goto done;
+
+ lock_frame = copy_frame (frame);
+ if (lock_frame == NULL) {
+ goto done;
+ }
+
+ lock_local = dht_local_init (lock_frame, &local->loc, NULL,
+ lock_frame->root->op);
+ if (lock_local == NULL) {
+ goto done;
+ }
+
+ lock_local->lock.locks = local->lock.locks;
+ lock_local->lock.lk_count = local->lock.lk_count;
+
+ local->lock.locks = NULL;
+ local->lock.lk_count = 0;
+
+ dht_unlock_inodelk (lock_frame, lock_local->lock.locks,
+ lock_local->lock.lk_count,
+ dht_selfheal_unlock_cbk);
+ lock_frame = NULL;
+done:
+ if (invoke_cbk)
+ local->selfheal.dir_cbk (frame, NULL, frame->this, ret,
+ local->op_errno, NULL);
+ if (lock_frame != NULL) {
+ DHT_STACK_DESTROY (lock_frame);
+ }
+
+ return 0;
+}
int
-dht_selfheal_dir_finish (call_frame_t *frame, xlator_t *this, int ret)
+dht_refresh_layout_done (call_frame_t *frame)
{
- dht_local_t *local = NULL;
+ int ret = -1;
+ dht_layout_t *refreshed = NULL, *heal = NULL;
+ dht_local_t *local = NULL;
+ dht_need_heal_t should_heal = NULL;
+ dht_selfheal_layout_t healer = NULL;
+
+ local = frame->local;
+
+ refreshed = local->selfheal.refreshed_layout;
+ heal = local->selfheal.layout;
+
+ healer = local->selfheal.healer;
+ should_heal = local->selfheal.should_heal;
+
+ ret = dht_layout_sort (refreshed);
+ if (ret == -1) {
+ gf_msg (frame->this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LAYOUT_SORT_FAILED,
+ "sorting the layout failed");
+ goto err;
+ }
+
+ if (should_heal (frame, &heal, &refreshed)) {
+ healer (frame, &local->loc, heal);
+ } else {
+ local->selfheal.layout = NULL;
+ local->selfheal.refreshed_layout = NULL;
+ local->selfheal.layout = refreshed;
+ dht_layout_unref (frame->this, heal);
- local = frame->local;
- local->selfheal.dir_cbk (frame, NULL, frame->this, ret,
- local->op_errno);
+ dht_selfheal_dir_finish (frame, frame->this, 0, 1);
+ }
+
+ return 0;
- return 0;
+err:
+ dht_selfheal_dir_finish (frame, frame->this, -1, 1);
+ return 0;
}
+int
+dht_refresh_layout_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, err);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, err);
+ GF_VALIDATE_OR_GOTO ("dht", this->private, err);
+
+ local = frame->local;
+ prev = cookie;
+
+ layout = local->selfheal.refreshed_layout;
+
+ LOCK (&frame->lock);
+ {
+ op_ret = dht_layout_merge (this, layout, prev->this,
+ op_ret, op_errno, xattr);
+
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_msg_debug (this->name, op_errno,
+ "lookup of %s on %s returned error",
+ local->loc.path, prev->this->name);
+
+ goto unlock;
+ }
+
+ local->op_ret = 0;
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ this_call_cnt = dht_frame_return (frame);
+
+ if (is_last_call (this_call_cnt)) {
+ if (local->op_ret == 0) {
+ local->refresh_layout_done (frame);
+ } else {
+ goto err;
+ }
+
+ }
+
+ return 0;
+
+err:
+ local->refresh_layout_unlock (frame, this, -1, 1);
+ return 0;
+}
+
+int
+dht_refresh_layout (call_frame_t *frame)
+{
+ int call_cnt = 0;
+ int i = 0, ret = -1;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+
+ this = frame->this;
+ conf = this->private;
+ local = frame->local;
+
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+ local->op_ret = -1;
+
+ if (local->selfheal.refreshed_layout) {
+ dht_layout_unref (this, local->selfheal.refreshed_layout);
+ local->selfheal.refreshed_layout = NULL;
+ }
+
+ local->selfheal.refreshed_layout = dht_layout_new (this,
+ conf->subvolume_cnt);
+ if (!local->selfheal.refreshed_layout) {
+ goto out;
+ }
+
+ if (local->xattr != NULL) {
+ dict_del (local->xattr, conf->xattr_name);
+ }
+
+ if (local->xattr_req == NULL) {
+ local->xattr_req = dict_new ();
+ if (local->xattr_req == NULL) {
+ goto out;
+ }
+ }
+
+ if (dict_get (local->xattr_req, conf->xattr_name) == 0) {
+ ret = dict_set_uint32 (local->xattr_req, conf->xattr_name,
+ 4 * 4);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DICT_SET_FAILED,
+ "%s: Failed to set dictionary value:key = %s",
+ local->loc.path, conf->xattr_name);
+ }
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_refresh_layout_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup,
+ &local->loc, local->xattr_req);
+ }
+
+ return 0;
+
+out:
+ local->refresh_layout_unlock (frame, this, -1, 1);
+ return 0;
+}
+
+
+int32_t
+dht_selfheal_layout_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (!local) {
+ goto err;
+ }
+
+ if (op_ret < 0) {
+ local->op_errno = op_errno;
+ goto err;
+ }
+
+ local->refresh_layout_unlock = dht_selfheal_dir_finish;
+ local->refresh_layout_done = dht_refresh_layout_done;
+
+ dht_refresh_layout (frame);
+ return 0;
+
+err:
+ dht_selfheal_dir_finish (frame, this, -1, 1);
+ return 0;
+}
+
+
+gf_boolean_t
+dht_should_heal_layout (call_frame_t *frame, dht_layout_t **heal,
+ dht_layout_t **ondisk)
+{
+ gf_boolean_t fixit = _gf_true;
+ dht_local_t *local = NULL;
+ int ret = -1, heal_missing_dirs = 0;
+
+ local = frame->local;
+
+ if ((heal == NULL) || (*heal == NULL) || (ondisk == NULL)
+ || (*ondisk == NULL))
+ goto out;
+
+ ret = dht_layout_anomalies (frame->this, &local->loc, *ondisk,
+ &local->selfheal.hole_cnt,
+ &local->selfheal.overlaps_cnt,
+ NULL, &local->selfheal.down,
+ &local->selfheal.misc, NULL);
+
+ if (ret < 0)
+ goto out;
+
+ /* Directories might've been created as part of this self-heal. We've to
+ * sync non-layout xattrs and set range 0-0 on new directories
+ */
+ heal_missing_dirs = local->selfheal.force_mkdir
+ ? local->selfheal.force_mkdir : dht_layout_missing_dirs (*heal);
+
+ if ((local->selfheal.hole_cnt == 0)
+ && (local->selfheal.overlaps_cnt == 0) && heal_missing_dirs) {
+ dht_layout_t *tmp = NULL;
+
+ /* Just added a brick and need to set 0-0 range on this brick.
+ * But ondisk layout is well-formed. So, swap layouts "heal" and
+ * "ondisk". Now "ondisk" layout will be used for healing
+ * xattrs. If there are any non-participating subvols in
+ * "ondisk" layout, dht_selfheal_dir_xattr_persubvol will set
+ * 0-0 and non-layout xattrs. This way we won't end up in
+ * "corrupting" already set and well-formed "ondisk" layout.
+ */
+ tmp = *heal;
+ *heal = *ondisk;
+ *ondisk = tmp;
+
+ /* Current selfheal code, heals non-layout xattrs only after
+ * an add-brick. In fact non-layout xattrs are considered as
+ * secondary citizens which are healed only if layout xattrs
+ * need to be healed. This is wrong, since for eg., quota can be
+ * set when layout is well-formed, but a node is down. Also,
+ * just for healing non-layout xattrs, we don't need locking.
+ * This issue is _NOT FIXED_ by this patch.
+ */
+ }
+
+ fixit = (local->selfheal.hole_cnt || local->selfheal.overlaps_cnt
+ || heal_missing_dirs);
+
+out:
+ return fixit;
+}
+
+int
+dht_layout_span (dht_layout_t *layout)
+{
+ int i = 0, count = 0;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err)
+ continue;
+
+ if (layout->list[i].start != layout->list[i].stop)
+ count++;
+ }
+
+ return count;
+}
+
+int
+dht_decommissioned_bricks_in_layout (xlator_t *this, dht_layout_t *layout)
+{
+ dht_conf_t *conf = NULL;
+ int count = 0, i = 0, j = 0;
+
+ if ((this == NULL) || (layout == NULL))
+ goto out;
+
+ conf = this->private;
+
+ for (i = 0; i < layout->cnt; i++) {
+ for (j = 0; j < conf->subvolume_cnt; j++) {
+ if (conf->decommissioned_bricks[j] &&
+ conf->decommissioned_bricks[j]
+ == layout->list[i].xlator) {
+ count++;
+ }
+ }
+ }
+
+out:
+ return count;
+}
+
+dht_distribution_type_t
+dht_distribution_type (xlator_t *this, dht_layout_t *layout)
+{
+ dht_distribution_type_t type = GF_DHT_EQUAL_DISTRIBUTION;
+ int i = 0;
+ uint32_t start_range = 0, range = 0, diff = 0;
+
+ if ((this == NULL) || (layout == NULL) || (layout->cnt < 1)) {
+ goto out;
+ }
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (start_range == 0) {
+ start_range = layout->list[i].stop
+ - layout->list[i].start;
+ continue;
+ }
+
+ range = layout->list[i].stop - layout->list[i].start;
+ diff = (range >= start_range)
+ ? range - start_range
+ : start_range - range;
+
+ if ((range != 0) && (diff > layout->cnt)) {
+ type = GF_DHT_WEIGHTED_DISTRIBUTION;
+ break;
+ }
+ }
+
+out:
+ return type;
+}
+
+gf_boolean_t
+dht_should_fix_layout (call_frame_t *frame, dht_layout_t **inmem,
+ dht_layout_t **ondisk)
+{
+ gf_boolean_t fixit = _gf_true;
+
+ dht_local_t *local = NULL;
+ int layout_span = 0;
+ int decommissioned_bricks = 0;
+ int ret = 0;
+ dht_conf_t *conf = NULL;
+ dht_distribution_type_t inmem_dist_type = 0;
+ dht_distribution_type_t ondisk_dist_type = 0;
+
+ conf = frame->this->private;
+
+ local = frame->local;
+
+ if ((inmem == NULL) || (*inmem == NULL) || (ondisk == NULL)
+ || (*ondisk == NULL))
+ goto out;
+
+ ret = dht_layout_anomalies (frame->this, &local->loc, *ondisk,
+ &local->selfheal.hole_cnt,
+ &local->selfheal.overlaps_cnt, NULL,
+ &local->selfheal.down,
+ &local->selfheal.misc, NULL);
+ if (ret < 0) {
+ fixit = _gf_false;
+ goto out;
+ }
+
+ if (local->selfheal.down || local->selfheal.misc) {
+ fixit = _gf_false;
+ goto out;
+ }
+
+ if (local->selfheal.hole_cnt || local->selfheal.overlaps_cnt)
+ goto out;
+
+ /* If commit hashes are being updated, let it through */
+ if ((*inmem)->commit_hash != (*ondisk)->commit_hash)
+ goto out;
+
+ layout_span = dht_layout_span (*ondisk);
+
+ decommissioned_bricks
+ = dht_decommissioned_bricks_in_layout (frame->this,
+ *ondisk);
+ inmem_dist_type = dht_distribution_type (frame->this, *inmem);
+ ondisk_dist_type = dht_distribution_type (frame->this, *ondisk);
+
+ if ((decommissioned_bricks == 0)
+ && (layout_span == (conf->subvolume_cnt
+ - conf->decommission_subvols_cnt))
+ && (inmem_dist_type == ondisk_dist_type))
+ fixit = _gf_false;
+
+out:
+
+ return fixit;
+}
+
+int
+dht_selfheal_layout_lock (call_frame_t *frame, dht_layout_t *layout,
+ gf_boolean_t newdir,
+ dht_selfheal_layout_t healer,
+ dht_need_heal_t should_heal)
+{
+ dht_local_t *local = NULL;
+ int count = 1, ret = -1, i = 0;
+ dht_lock_t **lk_array = NULL;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *tmp = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO (frame->this->name, frame->local, err);
+
+ local = frame->local;
+
+ conf = frame->this->private;
+
+ local->selfheal.healer = healer;
+ local->selfheal.should_heal = should_heal;
+
+ tmp = local->selfheal.layout;
+ local->selfheal.layout = dht_layout_ref (frame->this, layout);
+ dht_layout_unref (frame->this, tmp);
+
+ if (!newdir) {
+ count = conf->subvolume_cnt;
+
+ lk_array = GF_CALLOC (count, sizeof (*lk_array),
+ gf_common_mt_char);
+ if (lk_array == NULL)
+ goto err;
+
+ for (i = 0; i < count; i++) {
+ lk_array[i] = dht_lock_new (frame->this,
+ conf->subvolumes[i],
+ &local->loc, F_WRLCK,
+ DHT_LAYOUT_HEAL_DOMAIN);
+ if (lk_array[i] == NULL)
+ goto err;
+ }
+ } else {
+ count = 1;
+ lk_array = GF_CALLOC (count, sizeof (*lk_array),
+ gf_common_mt_char);
+ if (lk_array == NULL)
+ goto err;
+
+ lk_array[0] = dht_lock_new (frame->this, local->hashed_subvol,
+ &local->loc, F_WRLCK,
+ DHT_LAYOUT_HEAL_DOMAIN);
+ if (lk_array[0] == NULL)
+ goto err;
+ }
+
+ local->lock.locks = lk_array;
+ local->lock.lk_count = count;
+
+ ret = dht_blocking_inodelk (frame, lk_array, count, FAIL_ON_ANY_ERROR,
+ dht_selfheal_layout_lock_cbk);
+ if (ret < 0) {
+ local->lock.locks = NULL;
+ local->lock.lk_count = 0;
+ goto err;
+ }
+
+ return 0;
+err:
+ if (lk_array != NULL) {
+ dht_lock_array_free (lk_array, count);
+ GF_FREE (lk_array);
+ }
+
+ return -1;
+}
int
dht_selfheal_dir_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno)
-{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- xlator_t *subvol = NULL;
- int i = 0;
- dht_layout_t *layout = NULL;
- int err = 0;
- int this_call_cnt = 0;
-
- local = frame->local;
- layout = local->selfheal.layout;
- prev = cookie;
- subvol = prev->this;
-
- if (op_ret == 0)
- err = 0;
- else
- err = op_errno;
-
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].xlator == subvol) {
- layout->list[i].err = err;
- break;
- }
- }
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ struct iatt *stbuf = NULL;
+ int i = 0;
+ int ret = 0;
+ dht_layout_t *layout = NULL;
+ int err = 0;
+ int this_call_cnt = 0;
+
+ local = frame->local;
+ layout = local->selfheal.layout;
+ subvol = cookie;
+
+ if (op_ret == 0)
+ err = 0;
+ else
+ err = op_errno;
+
+ ret = dict_get_bin (xdata, DHT_IATT_IN_XDATA_KEY, (void **) &stbuf);
+ if (ret < 0) {
+ gf_msg_debug (this->name, 0, "key = %s not present in dict",
+ DHT_IATT_IN_XDATA_KEY);
+ }
- this_call_cnt = dht_frame_return (frame);
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].xlator == subvol) {
+ layout->list[i].err = err;
+ break;
+ }
+ }
- if (is_last_call (this_call_cnt)) {
- dht_selfheal_dir_finish (frame, this, 0);
- }
+ LOCK (&frame->lock);
+ {
+ dht_iatt_merge (this, &local->stbuf, stbuf, subvol);
+ }
+ UNLOCK (&frame->lock);
+
+ this_call_cnt = dht_frame_return (frame);
- return 0;
+ if (is_last_call (this_call_cnt)) {
+ dht_selfheal_dir_finish (frame, this, 0, 1);
+ }
+
+ return 0;
}
int
dht_selfheal_dir_xattr_persubvol (call_frame_t *frame, loc_t *loc,
- dht_layout_t *layout, int i)
+ dht_layout_t *layout, int i,
+ xlator_t *req_subvol)
{
- xlator_t *subvol = NULL;
- dict_t *xattr = NULL;
- int ret = 0;
- xlator_t *this = NULL;
- int32_t *disk_layout = NULL;
+ xlator_t *subvol = NULL;
+ dict_t *xattr = NULL;
+ dict_t *xdata = NULL;
+ int ret = 0;
+ xlator_t *this = NULL;
+ int32_t *disk_layout = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ data_t *data = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ if (req_subvol)
+ subvol = req_subvol;
+ else
+ subvol = layout->list[i].xlator;
+ this = frame->this;
+
+ GF_VALIDATE_OR_GOTO ("", this, err);
+ GF_VALIDATE_OR_GOTO (this->name, layout, err);
+ GF_VALIDATE_OR_GOTO (this->name, local, err);
+ GF_VALIDATE_OR_GOTO (this->name, subvol, err);
+ VALIDATE_OR_GOTO (this->private, err);
+ conf = this->private;
- subvol = layout->list[i].xlator;
- this = frame->this;
+ xattr = get_new_dict ();
+ if (!xattr) {
+ goto err;
+ }
- xattr = get_new_dict ();
- if (!xattr) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ xdata = dict_new ();
+ if (!xdata)
+ goto err;
+
+ ret = dict_set_str (xdata, GLUSTERFS_INTERNAL_FOP_KEY, "yes");
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "%s: Failed to set dictionary value: key = %s,"
+ " gfid = %s", loc->path,
+ GLUSTERFS_INTERNAL_FOP_KEY, gfid);
+ goto err;
+ }
- ret = dht_disk_layout_extract (this, layout, i, &disk_layout);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to extract disk layout");
- goto err;
- }
+ ret = dict_set_dynstr_with_alloc (xdata, DHT_IATT_IN_XDATA_KEY, "yes");
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "%s: Failed to set dictionary value: key = %s,"
+ " gfid = %s", loc->path,
+ DHT_IATT_IN_XDATA_KEY, gfid);
+ goto err;
+ }
- ret = dict_set_bin (xattr, "trusted.glusterfs.dht",
- disk_layout, 4 * 4);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set xattr dictionary");
- goto err;
- }
- disk_layout = NULL;
+ gf_uuid_unparse(loc->inode->gfid, gfid);
+
+ ret = dht_disk_layout_extract (this, layout, i, &disk_layout);
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
+ "Directory self heal xattr failed:"
+ " %s: (subvol %s) Failed to extract disk layout,"
+ " gfid = %s", loc->path, subvol->name, gfid);
+ goto err;
+ }
- gf_log (this->name, GF_LOG_TRACE,
- "setting hash range %u - %u (type %d) on subvolume %s for %s",
- layout->list[i].start, layout->list[i].stop,
- layout->type, subvol->name, loc->path);
+ ret = dict_set_bin (xattr, conf->xattr_name, disk_layout, 4 * 4);
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
+ "Directory self heal xattr failed:"
+ "%s: (subvol %s) Failed to set xattr dictionary,"
+ " gfid = %s", loc->path, subvol->name, gfid);
+ GF_FREE (disk_layout);
+ goto err;
+ }
+ disk_layout = NULL;
+
+ gf_msg_trace (this->name, 0,
+ "setting hash range %u - %u (type %d) on subvolume %s"
+ " for %s", layout->list[i].start, layout->list[i].stop,
+ layout->type, subvol->name, loc->path);
+
+ dict_ref (xattr);
+ if (local->xattr) {
+ data = dict_get (local->xattr, QUOTA_LIMIT_KEY);
+ if (data) {
+ ret = dict_add (xattr, QUOTA_LIMIT_KEY, data);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_DICT_SET_FAILED,
+ "%s: Failed to set dictionary value:"
+ " key = %s",
+ loc->path, QUOTA_LIMIT_KEY);
+ }
+ }
+ data = dict_get (local->xattr, QUOTA_LIMIT_OBJECTS_KEY);
+ if (data) {
+ ret = dict_add (xattr, QUOTA_LIMIT_OBJECTS_KEY, data);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_DICT_SET_FAILED,
+ "%s: Failed to set dictionary value:"
+ " key = %s",
+ loc->path, QUOTA_LIMIT_OBJECTS_KEY);
+ }
+ }
+ }
- dict_ref (xattr);
+ if (!gf_uuid_is_null (local->gfid))
+ gf_uuid_copy (loc->gfid, local->gfid);
- STACK_WIND (frame, dht_selfheal_dir_xattr_cbk,
- subvol, subvol->fops->setxattr,
- loc, xattr, 0);
+ STACK_WIND_COOKIE (frame, dht_selfheal_dir_xattr_cbk,
+ (void *) subvol, subvol, subvol->fops->setxattr,
+ loc, xattr, 0, xdata);
- dict_unref (xattr);
+ dict_unref (xattr);
+ dict_unref (xdata);
- return 0;
+ return 0;
err:
- if (xattr)
- dict_destroy (xattr);
+ if (xattr)
+ dict_destroy (xattr);
+
+ if (xdata)
+ dict_unref (xdata);
- if (disk_layout)
- GF_FREE (disk_layout);
+ GF_FREE (disk_layout);
- dht_selfheal_dir_xattr_cbk (frame, subvol, frame->this,
- -1, ENOMEM);
- return 0;
+ dht_selfheal_dir_xattr_cbk (frame, (void *) subvol, frame->this,
+ -1, ENOMEM, NULL);
+ return 0;
}
+int
+dht_fix_dir_xattr (call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
+{
+ dht_local_t *local = NULL;
+ int i = 0;
+ int count = 0;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *dummy = NULL;
+
+ local = frame->local;
+ this = frame->this;
+ conf = this->private;
+
+ gf_msg_debug (this->name, 0,
+ "%s: Writing the new range for all subvolumes",
+ loc->path);
+
+ local->call_cnt = count = conf->subvolume_cnt;
+
+ dht_log_new_layout_for_dir_selfheal (this, loc, layout);
+
+ for (i = 0; i < layout->cnt; i++) {
+ dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i, NULL);
+
+ if (--count == 0)
+ goto out;
+ }
+ /* if we are here, subvolcount > layout_count. subvols-per-directory
+ * option might be set here. We need to clear out layout from the
+ * non-participating subvolumes, else it will result in overlaps */
+ dummy = dht_layout_new (this, 1);
+ if (!dummy)
+ goto out;
+ dummy->commit_hash = layout->commit_hash;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (_gf_false ==
+ dht_is_subvol_in_layout (layout, conf->subvolumes[i])) {
+ dht_selfheal_dir_xattr_persubvol (frame, loc, dummy, 0,
+ conf->subvolumes[i]);
+ if (--count == 0)
+ break;
+ }
+ }
+
+ dht_layout_unref (this, dummy);
+out:
+ return 0;
+}
int
dht_selfheal_dir_xattr (call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
{
- dht_local_t *local = NULL;
- int missing_xattr = 0;
- int i = 0;
- int ret = 0;
- xlator_t *this = NULL;
-
- local = frame->local;
- this = frame->this;
-
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err != -1 || !layout->list[i].stop) {
- /* err != -1 would mean xattr present on the directory
- * or the directory is itself non existant.
- * !layout->list[i].stop would mean layout absent
- */
- continue;
- }
- missing_xattr++;
- }
+ dht_local_t *local = NULL;
+ int missing_xattr = 0;
+ int i = 0;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *dummy = NULL;
- gf_log (this->name, GF_LOG_TRACE,
- "%d subvolumes missing xattr for %s",
- missing_xattr, loc->path);
+ local = frame->local;
+ this = frame->this;
+ conf = this->private;
- if (missing_xattr == 0) {
- dht_selfheal_dir_finish (frame, this, 0);
- return 0;
- }
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err != -1 || !layout->list[i].stop) {
+ /* err != -1 would mean xattr present on the directory
+ * or the directory is non existent.
+ * !layout->list[i].stop would mean layout absent
+ */
- local->call_cnt = missing_xattr;
+ continue;
+ }
+ missing_xattr++;
+ }
+ /* Also account for subvolumes with no-layout. Used for zero'ing out
+ * the layouts and for setting quota key's if present */
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (_gf_false ==
+ dht_is_subvol_in_layout (layout, conf->subvolumes[i])) {
+ missing_xattr++;
+ }
+ }
+ gf_msg_trace (this->name, 0,
+ "%d subvolumes missing xattr for %s",
+ missing_xattr, loc->path);
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err != -1 || !layout->list[i].stop)
- continue;
+ if (missing_xattr == 0) {
+ dht_selfheal_dir_finish (frame, this, 0, 1);
+ return 0;
+ }
- ret = dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i);
+ local->call_cnt = missing_xattr;
- if (--missing_xattr == 0)
- break;
- }
- return 0;
+ dht_log_new_layout_for_dir_selfheal (this, loc, layout);
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err != -1 || !layout->list[i].stop)
+ continue;
+
+ dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i, NULL);
+
+ if (--missing_xattr == 0)
+ break;
+ }
+ dummy = dht_layout_new (this, 1);
+ if (!dummy)
+ goto out;
+ for (i = 0; i < conf->subvolume_cnt && missing_xattr; i++) {
+ if (_gf_false ==
+ dht_is_subvol_in_layout (layout, conf->subvolumes[i])) {
+ dht_selfheal_dir_xattr_persubvol (frame, loc, dummy, 0,
+ conf->subvolumes[i]);
+ missing_xattr--;
+ }
+ }
+
+ dht_layout_unref (this, dummy);
+out:
+ return 0;
}
+gf_boolean_t
+dht_is_subvol_part_of_layout (dht_layout_t *layout, xlator_t *xlator)
+{
+ int i = 0;
+ gf_boolean_t ret = _gf_false;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (!strcmp (layout->list[i].xlator->name, xlator->name)) {
+ ret = _gf_true;
+ break;
+
+ }
+ }
+
+ return ret;
+}
int
-dht_selfheal_dir_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
+dht_layout_index_from_conf (dht_layout_t *layout, xlator_t *xlator)
+{
+ int i = -1;
+ int j = 0;
+
+ for (j = 0; j < layout->cnt; j++) {
+ if (!strcmp (layout->list[j].xlator->name, xlator->name)) {
+ i = j;
+ break;
+ }
+ }
+
+ return i;
+}
+
+
+static int
+dht_selfheal_dir_xattr_for_nameless_lookup (call_frame_t *frame, loc_t *loc,
+ dht_layout_t *layout)
+{
+ dht_local_t *local = NULL;
+ int missing_xattr = 0;
+ int i = 0;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *dummy = NULL;
+ int j = 0;
+
+ local = frame->local;
+ this = frame->this;
+ conf = this->private;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err != -1 || !layout->list[i].stop) {
+ /* err != -1 would mean xattr present on the directory
+ or the directory is non existent.
+ !layout->list[i].stop would mean layout absent
+ */
+
+ continue;
+ }
+ missing_xattr++;
+ }
+
+ /* Also account for subvolumes with no-layout. Used for zero'ing out
+ the layouts and for setting quota key's if present */
+
+ /* Send where either the subvol is not part of layout,
+ * or it is part of the layout but error is non-zero but error
+ * is not equal to -1 or ENOENT.
+ */
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (dht_is_subvol_part_of_layout (layout, conf->subvolumes[i])
+ == _gf_false) {
+ missing_xattr++;
+ continue;
+ }
+
+ j = dht_layout_index_from_conf (layout, conf->subvolumes[i]);
+
+ if ((j != -1) && (layout->list[j].err != -1) &&
+ (layout->list[j].err != 0) &&
+ (layout->list[j].err != ENOENT)) {
+ missing_xattr++;
+ }
+
+ }
+
+
+ gf_msg_trace (this->name, 0,
+ "%d subvolumes missing xattr for %s",
+ missing_xattr, loc->path);
+
+ if (missing_xattr == 0) {
+ dht_selfheal_dir_finish (frame, this, 0, 1);
+ return 0;
+ }
+
+ local->call_cnt = missing_xattr;
+
+ dht_log_new_layout_for_dir_selfheal (this, loc, layout);
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err != -1 || !layout->list[i].stop)
+ continue;
+
+ dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i, NULL);
+
+ if (--missing_xattr == 0)
+ break;
+ }
+
+ dummy = dht_layout_new (this, 1);
+ if (!dummy)
+ goto out;
+
+ for (i = 0; i < conf->subvolume_cnt && missing_xattr; i++) {
+ if (dht_is_subvol_part_of_layout (layout, conf->subvolumes[i])
+ == _gf_false) {
+ dht_selfheal_dir_xattr_persubvol (frame, loc, dummy, 0,
+ conf->subvolumes[i]);
+ missing_xattr--;
+ continue;
+ }
+
+ j = dht_layout_index_from_conf (layout, conf->subvolumes[i]);
+
+ if ((j != -1) && (layout->list[j].err != -1) &&
+ (layout->list[j].err != ENOENT) &&
+ (layout->list[j].err != 0)) {
+ dht_selfheal_dir_xattr_persubvol (frame, loc, dummy, 0,
+ conf->subvolumes[i]);
+ missing_xattr--;
+ }
+ }
+
+ dht_layout_unref (this, dummy);
+out:
+ return 0;
+
+}
+
+int
+dht_selfheal_dir_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int this_call_cnt = 0, ret = -1;
+
+ local = frame->local;
+ layout = local->selfheal.layout;
+
+ this_call_cnt = dht_frame_return (frame);
+
+ if (is_last_call (this_call_cnt)) {
+ ret = dht_selfheal_layout_lock (frame, layout, _gf_false,
+ dht_selfheal_dir_xattr,
+ dht_should_heal_layout);
+
+ if (ret < 0) {
+ dht_selfheal_dir_finish (frame, this, -1, 1);
+ }
+ }
+
+ return 0;
+}
+
+
+int
+dht_selfheal_dir_setattr (call_frame_t *frame, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dht_layout_t *layout)
{
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- call_frame_t *prev = NULL;
- xlator_t *subvol = NULL;
- int i = 0;
- int this_call_cnt = 0;
+ int missing_attr = 0;
+ int i = 0, ret = -1;
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ local = frame->local;
+ this = frame->this;
- local = frame->local;
- layout = local->selfheal.layout;
- prev = cookie;
- subvol = prev->this;
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err == -1)
+ missing_attr++;
+ }
+
+ if (missing_attr == 0) {
+ ret = dht_selfheal_layout_lock (frame, layout, _gf_false,
+ dht_selfheal_dir_xattr,
+ dht_should_heal_layout);
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
- if (prev->this == local->hashed_subvol)
- local->ia_ino = local->stbuf.ia_ino;
+ if (ret < 0) {
+ dht_selfheal_dir_finish (frame, this, -1, 1);
+ }
+
+ return 0;
+ }
+ if (!gf_uuid_is_null (local->gfid))
+ gf_uuid_copy (loc->gfid, local->gfid);
+
+ local->call_cnt = missing_attr;
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err == -1) {
+ gf_msg_trace (this->name, 0,
+ "%s: setattr on subvol %s, gfid = %s",
+ loc->path, layout->list[i].xlator->name,
+ uuid_utoa(loc->gfid));
+
+ STACK_WIND (frame, dht_selfheal_dir_setattr_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->setattr,
+ loc, stbuf, valid, NULL);
+ }
+ }
+
+ return 0;
+}
+
+int
+dht_selfheal_dir_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *subvol = NULL;
+ int i = 0, ret = -1;
+ int this_call_cnt = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ layout = local->selfheal.layout;
+ prev = cookie;
+ subvol = prev->this;
+
+ if ((op_ret == 0) || ((op_ret == -1) && (op_errno == EEXIST))) {
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].xlator == subvol) {
+ layout->list[i].err = -1;
+ break;
+ }
+ }
+ }
+
+ if (op_ret) {
+ gf_uuid_unparse(local->loc.gfid, gfid);
+ gf_msg (this->name, ((op_errno == EEXIST) ? GF_LOG_DEBUG :
+ GF_LOG_WARNING),
+ op_errno, DHT_MSG_DIR_SELFHEAL_FAILED,
+ "Directory selfheal failed: path = %s, gfid = %s",
+ local->loc.path, gfid );
+ goto out;
+ }
dht_iatt_merge (this, &local->preparent, preparent, prev->this);
dht_iatt_merge (this, &local->postparent, postparent, prev->this);
+ ret = 0;
- if ((op_ret == 0) || (op_errno == EEXIST)) {
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].xlator == subvol) {
- layout->list[i].err = -1;
- break;
- }
- }
- }
+out:
+ this_call_cnt = dht_frame_return (frame);
- this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ dht_selfheal_dir_finish (frame, this, ret, 0);
+ dht_selfheal_dir_setattr (frame, &local->loc, &local->stbuf, 0xffffff, layout);
+ }
- if (is_last_call (this_call_cnt)) {
- dht_selfheal_dir_xattr (frame, &local->loc, layout);
- }
+ return 0;
+}
+
+void
+dht_selfheal_dir_mkdir_setacl (dict_t *xattr, dict_t *dict)
+{
+ data_t *acl_default = NULL;
+ data_t *acl_access = NULL;
+ xlator_t *this = NULL;
+ int ret = -1;
- return 0;
+ GF_ASSERT (xattr);
+ GF_ASSERT (dict);
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ acl_default = dict_get (xattr, POSIX_ACL_DEFAULT_XATTR);
+
+ if (!acl_default) {
+ gf_msg_debug (this->name, 0,
+ "ACL_DEFAULT xattr not present");
+ goto cont;
+ }
+ ret = dict_set (dict, POSIX_ACL_DEFAULT_XATTR, acl_default);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value.key = %s",
+ POSIX_ACL_DEFAULT_XATTR);
+cont:
+ acl_access = dict_get (xattr, POSIX_ACL_ACCESS_XATTR);
+ if (!acl_access) {
+ gf_msg_debug (this->name, 0,
+ "ACL_ACCESS xattr not present");
+ goto out;
+ }
+ ret = dict_set (dict, POSIX_ACL_ACCESS_XATTR, acl_access);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value.key = %s",
+ POSIX_ACL_ACCESS_XATTR);
+
+out:
+ return;
}
+int
+dht_selfheal_dir_mkdir_lookup_done (call_frame_t *frame, xlator_t *this)
+{
+ dht_local_t *local = NULL;
+ int i = 0;
+ int ret = -1;
+ dict_t *dict = NULL;
+ dht_layout_t *layout = NULL;
+ loc_t *loc = NULL;
+
+ VALIDATE_OR_GOTO (this->private, err);
+
+ local = frame->local;
+ layout = local->layout;
+ loc = &local->loc;
+
+ if (!gf_uuid_is_null (local->gfid)) {
+ dict = dict_new ();
+ if (!dict)
+ return -1;
+
+ ret = dict_set_static_bin (dict, "gfid-req", local->gfid, 16);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DICT_SET_FAILED,
+ "%s: Failed to set dictionary value:"
+ " key = gfid-req", loc->path);
+ } else if (local->params) {
+ /* Send the dictionary from higher layers directly */
+
+ dict = dict_ref (local->params);
+ }
+ /* Set acls */
+ if (local->xattr && dict)
+ dht_selfheal_dir_mkdir_setacl (local->xattr, dict);
+
+ if (!dict)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DICT_SET_FAILED,
+ "dict is NULL, need to make sure gfids are same");
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err == ESTALE ||
+ layout->list[i].err == ENOENT ||
+ local->selfheal.force_mkdir) {
+ gf_msg_debug (this->name, 0,
+ "Creating directory %s on subvol %s",
+ loc->path, layout->list[i].xlator->name);
+
+ STACK_WIND (frame, dht_selfheal_dir_mkdir_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->mkdir,
+ loc,
+ st_mode_from_ia (local->stbuf.ia_prot,
+ local->stbuf.ia_type),
+ 0, dict);
+ }
+ }
+
+ if (dict)
+ dict_unref (dict);
+
+ return 0;
+
+err:
+ dht_selfheal_dir_finish (frame, this, -1, 1);
+ return 0;
+}
int
-dht_selfheal_dir_mkdir (call_frame_t *frame, loc_t *loc,
- dht_layout_t *layout, int force)
+dht_selfheal_dir_mkdir_lookup_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ dict_t *xattr, struct iatt *postparent)
{
- int missing_dirs = 0;
- int i = 0;
- dht_local_t *local = NULL;
- xlator_t *this = NULL;
+ dht_local_t *local = NULL;
+ int i = 0;
+ int this_call_cnt = 0;
+ int missing_dirs = 0;
+ dht_layout_t *layout = NULL;
+ loc_t *loc = NULL;
+ call_frame_t *prev = NULL;
+
+ VALIDATE_OR_GOTO (this->private, err);
+
+ local = frame->local;
+ layout = local->layout;
+ loc = &local->loc;
+ prev = cookie;
+
+ this_call_cnt = dht_frame_return (frame);
+
+ LOCK (&frame->lock);
+ {
+ if ((op_ret < 0) &&
+ (op_errno == ENOENT || op_errno == ESTALE)) {
+ local->selfheal.hole_cnt = !local->selfheal.hole_cnt ? 1
+ : local->selfheal.hole_cnt + 1;
+ }
+ if (!op_ret) {
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+ }
- local = frame->local;
- this = frame->this;
+ }
+ UNLOCK (&frame->lock);
+
+ if (is_last_call (this_call_cnt)) {
+ if (local->selfheal.hole_cnt == layout->cnt) {
+ gf_msg_debug (this->name, op_errno,
+ "Lookup failed, an rmdir could have "
+ "deleted this entry %s", loc->name);
+ local->op_errno = op_errno;
+ goto err;
+ } else {
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err == ENOENT ||
+ layout->list[i].err == ESTALE ||
+ local->selfheal.force_mkdir)
+ missing_dirs++;
+ }
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err == ENOENT || force)
- missing_dirs++;
- }
+ if (missing_dirs == 0) {
+ dht_selfheal_dir_finish (frame, this, 0, 0);
+ dht_selfheal_dir_setattr (frame, loc,
+ &local->stbuf,
+ 0xffffffff, layout);
+ return 0;
+ }
- if (missing_dirs == 0) {
- dht_selfheal_dir_xattr (frame, loc, layout);
- return 0;
- }
+ local->call_cnt = missing_dirs;
+ dht_selfheal_dir_mkdir_lookup_done (frame, this);
+ }
+ }
- local->call_cnt = missing_dirs;
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err == ENOENT || force) {
- gf_log (this->name, GF_LOG_TRACE,
- "creating directory %s on subvol %s",
- loc->path, layout->list[i].xlator->name);
-
- STACK_WIND (frame, dht_selfheal_dir_mkdir_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->mkdir,
- loc,
- st_mode_from_ia (local->stbuf.ia_prot,
- local->stbuf.ia_type));
- }
- }
+ return 0;
- return 0;
+err:
+ dht_selfheal_dir_finish (frame, this, -1, 1);
+ return 0;
}
int
+dht_selfheal_dir_mkdir_lock_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int i = 0;
+
+ VALIDATE_OR_GOTO (this->private, err);
+
+ conf = this->private;
+ local = frame->local;
+
+ local->call_cnt = conf->subvolume_cnt;
+
+ if (op_ret < 0) {
+
+ /* We get this error when the directory entry was not created
+ * on a newky attatched tier subvol. Hence proceed and do mkdir
+ * on the tier subvol.
+ */
+ if (op_errno == EINVAL) {
+ local->call_cnt = 1;
+ dht_selfheal_dir_mkdir_lookup_done (frame, this);
+ return 0;
+ }
+
+ gf_msg (this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_INODE_LK_ERROR,
+ "acquiring inodelk failed for %s",
+ local->loc.path);
+
+ local->op_errno = op_errno;
+ goto err;
+ }
+
+ /* After getting locks, perform lookup again to ensure that the
+ directory was not deleted by a racing rmdir
+ */
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND (frame, dht_selfheal_dir_mkdir_lookup_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup,
+ &local->loc, NULL);
+ }
+
+ return 0;
+
+err:
+ dht_selfheal_dir_finish (frame, this, -1, 1);
+ return 0;
+}
+
+int
+dht_selfheal_dir_mkdir (call_frame_t *frame, loc_t *loc,
+ dht_layout_t *layout, int force)
+{
+ int missing_dirs = 0;
+ int i = 0;
+ int ret = -1;
+ int count = 1;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ dht_lock_t **lk_array = NULL;
+
+ local = frame->local;
+ this = frame->this;
+ conf = this->private;
+
+ local->selfheal.force_mkdir = force;
+ local->selfheal.hole_cnt = 0;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err == ENOENT || force)
+ missing_dirs++;
+ }
+
+ if (missing_dirs == 0) {
+ dht_selfheal_dir_setattr (frame, loc, &local->stbuf,
+ 0xffffffff, layout);
+ return 0;
+ }
+
+ count = conf->subvolume_cnt;
+
+ /* Locking on all subvols in the mkdir phase of lookup selfheal is
+ is done to synchronize with rmdir/rename.
+ */
+ lk_array = GF_CALLOC (count, sizeof (*lk_array), gf_common_mt_char);
+ if (lk_array == NULL)
+ goto err;
+
+ for (i = 0; i < count; i++) {
+ lk_array[i] = dht_lock_new (frame->this,
+ conf->subvolumes[i],
+ &local->loc, F_WRLCK,
+ DHT_LAYOUT_HEAL_DOMAIN);
+ if (lk_array[i] == NULL)
+ goto err;
+ }
+
+ local->lock.locks = lk_array;
+ local->lock.lk_count = count;
+
+ ret = dht_blocking_inodelk (frame, lk_array, count,
+ IGNORE_ENOENT_ESTALE,
+ dht_selfheal_dir_mkdir_lock_cbk);
+
+ if (ret < 0) {
+ local->lock.locks = NULL;
+ local->lock.lk_count = 0;
+ goto err;
+ }
+
+ return 0;
+err:
+ if (lk_array != NULL) {
+ dht_lock_array_free (lk_array, count);
+ GF_FREE (lk_array);
+ }
+
+ return -1;
+}
+
+int
dht_selfheal_layout_alloc_start (xlator_t *this, loc_t *loc,
dht_layout_t *layout)
{
- int start = 0;
- dht_conf_t *conf = NULL;
- uint32_t hashval = 0;
- int ret = 0;
+ int start = 0;
+ uint32_t hashval = 0;
+ int ret = 0;
+ const char *str = NULL;
+ dht_conf_t *conf = NULL;
+ char buf[UUID_CANONICAL_FORM_LEN + 1] = {0, };
conf = this->private;
- ret = dht_hash_compute (layout->type, loc->path, &hashval);
+ if (conf->randomize_by_gfid) {
+ str = uuid_utoa_r (loc->gfid, buf);
+ } else {
+ str = loc->path;
+ }
+
+ ret = dht_hash_compute (this, layout->type, str, &hashval);
if (ret == 0) {
start = (hashval % layout->cnt);
}
@@ -300,237 +1524,1032 @@ dht_selfheal_layout_alloc_start (xlator_t *this, loc_t *loc,
return start;
}
+static int
+dht_get_layout_count (xlator_t *this, dht_layout_t *layout, int new_layout)
+{
+ int i = 0;
+ int j = 0;
+ int err = 0;
+ int count = 0;
+ dht_conf_t *conf = NULL;
-void
-dht_selfheal_layout_new_directory (call_frame_t *frame, loc_t *loc,
- dht_layout_t *layout)
-{
- dht_conf_t *conf = NULL;
- xlator_t *this = NULL;
- uint32_t chunk = 0;
- int i = 0;
- uint32_t start = 0;
- int cnt = 0;
- int err = 0;
- int start_subvol = 0;
-
- this = frame->this;
- conf = this->private;
+ /* Gets in use only for replace-brick, remove-brick */
+ conf = this->private;
+ for (i = 0; i < layout->cnt; i++) {
+ for (j = 0; j < conf->subvolume_cnt; j++) {
+ if (conf->decommissioned_bricks[j] &&
+ conf->decommissioned_bricks[j] == layout->list[i].xlator) {
+ layout->list[i].err = EINVAL;
+ break;
+ }
+ }
+ }
- for (i = 0; i < layout->cnt; i++) {
- err = layout->list[i].err;
- if (err == -1 || err == 0) {
- layout->list[i].err = -1;
- cnt++;
- }
- }
+ for (i = 0; i < layout->cnt; i++) {
+ err = layout->list[i].err;
+ if (err == -1 || err == 0 || err == ENOENT) {
+ /* Take this with a pinch of salt. The behaviour seems
+ * to be slightly different when this function is
+ * invoked from mkdir codepath. For eg., err == 0 in
+ * mkdir codepath means directory created but xattr
+ * is not set yet.
+ */
+
+ /* Setting list[i].err = -1 is an indication for
+ dht_selfheal_layout_new_directory() to assign
+ a range. We set it to -1 based on any one of
+ the three criteria:
+
+ - err == -1 already, which means directory
+ existed but layout was not set on it.
+
+ - err == 0, which means directory exists and
+ has an old layout piece which will be
+ overwritten now.
+
+ - err == ENOENT, which means directory does
+ not exist (possibly racing with mkdir or
+ finishing half done mkdir). The missing
+ directory will be attempted to be recreated.
+ */
+ count++;
+ if (!err)
+ layout->list[i].err = -1;
+ }
+ }
/* no subvolume has enough space, but can't stop directory creation */
- if (!cnt) {
+ if (!count || !new_layout) {
for (i = 0; i < layout->cnt; i++) {
err = layout->list[i].err;
if (err == ENOSPC) {
layout->list[i].err = -1;
- cnt++;
+ count++;
}
}
}
- chunk = ((unsigned long) 0xffffffff) / ((cnt) ? cnt : 1);
+ /* if layout->spread_cnt is set, check if it is <= available
+ * subvolumes (down brick and decommissioned bricks are considered
+ * un-availbale). Else return count (available up bricks) */
+ count = ((layout->spread_cnt &&
+ (layout->spread_cnt <= count)) ?
+ layout->spread_cnt : ((count) ? count : 1));
+
+ return count;
+}
- start_subvol = dht_selfheal_layout_alloc_start (this, loc, layout);
- for (i = start_subvol; i < layout->cnt; i++) {
- err = layout->list[i].err;
- if (err == -1) {
- layout->list[i].start = start;
- layout->list[i].stop = start + chunk - 1;
-
- start = start + chunk;
+void dht_selfheal_layout_new_directory (call_frame_t *frame, loc_t *loc,
+ dht_layout_t *new_layout);
- gf_log (this->name, GF_LOG_TRACE,
- "gave fix: %u - %u on %s for %s",
- layout->list[i].start, layout->list[i].stop,
- layout->list[i].xlator->name, loc->path);
- if (--cnt == 0) {
- layout->list[i].stop = 0xffffffff;
- break;
- }
- }
- }
+void dht_layout_entry_swap (dht_layout_t *layout, int i, int j);
+void dht_layout_range_swap (dht_layout_t *layout, int i, int j);
- for (i = 0; i < start_subvol; i++) {
- err = layout->list[i].err;
- if (err == -1) {
- layout->list[i].start = start;
- layout->list[i].stop = start + chunk - 1;
-
- start = start + chunk;
-
- gf_log (this->name, GF_LOG_TRACE,
- "gave fix: %u - %u on %s for %s",
- layout->list[i].start, layout->list[i].stop,
- layout->list[i].xlator->name, loc->path);
- if (--cnt == 0) {
- layout->list[i].stop = 0xffffffff;
- break;
- }
- }
+/*
+ * It's a bit icky using local variables in a macro, but it makes the rest
+ * of the code a lot clearer.
+ */
+#define OV_ENTRY(x,y) table[x*new->cnt+y]
+
+void
+dht_selfheal_layout_maximize_overlap (call_frame_t *frame, loc_t *loc,
+ dht_layout_t *new, dht_layout_t *old)
+{
+ int i = 0;
+ int j = 0;
+ uint32_t curr_overlap = 0;
+ uint32_t max_overlap = 0;
+ int max_overlap_idx = -1;
+ uint32_t overlap = 0;
+ uint32_t *table = NULL;
+
+ dht_layout_sort_volname (old);
+ /* Now both old_layout->list[] and new_layout->list[]
+ are match the same xlators/subvolumes. i.e,
+ old_layout->[i] and new_layout->[i] are referring
+ to the same subvolumes
+ */
+
+ /* Build a table of overlaps between new[i] and old[j]. */
+ table = alloca(sizeof(overlap)*old->cnt*new->cnt);
+ if (!table) {
+ return;
+ }
+ memset(table,0,sizeof(overlap)*old->cnt*new->cnt);
+ for (i = 0; i < new->cnt; ++i) {
+ for (j = 0; j < old->cnt; ++j) {
+ OV_ENTRY(i,j) = dht_overlap_calc(old,j,new,i);
+ }
+ }
+
+ for (i = 0; i < new->cnt; i++) {
+ if (new->list[i].err > 0) {
+ /* Subvol might be marked for decommission
+ with EINVAL, or some other serious error
+ marked with positive errno.
+ */
+ continue;
+ }
+
+ max_overlap = 0;
+ max_overlap_idx = i;
+ for (j = (i + 1); j < new->cnt; ++j) {
+ if (new->list[j].err > 0) {
+ /* Subvol might be marked for decommission
+ with EINVAL, or some other serious error
+ marked with positive errno.
+ */
+ continue;
+ }
+ /* Calculate the overlap now. */
+ curr_overlap = OV_ENTRY(i,i) + OV_ENTRY(j,j);
+ /* Calculate the overlap after the proposed swap. */
+ overlap = OV_ENTRY(i,j) + OV_ENTRY(j,i);
+ /* Are we better than status quo? */
+ if (overlap > curr_overlap) {
+ overlap -= curr_overlap;
+ /* Are we better than the previous choice? */
+ if (overlap > max_overlap) {
+ max_overlap = overlap;
+ max_overlap_idx = j;
+ }
+ }
+ }
+
+ if (max_overlap_idx != i) {
+ dht_layout_range_swap (new, i, max_overlap_idx);
+ /* Need to swap the table values too. */
+ for (j = 0; j < old->cnt; ++j) {
+ overlap = OV_ENTRY(i,j);
+ OV_ENTRY(i,j) = OV_ENTRY(max_overlap_idx,j);
+ OV_ENTRY(max_overlap_idx,j) = overlap;
+ }
+ }
}
}
+dht_layout_t *
+dht_fix_layout_of_directory (call_frame_t *frame, loc_t *loc,
+ dht_layout_t *layout)
+{
+ int i = 0;
+ xlator_t *this = NULL;
+ dht_layout_t *new_layout = NULL;
+ dht_conf_t *priv = NULL;
+ dht_local_t *local = NULL;
+ uint32_t subvol_down = 0;
+ int ret = 0;
+
+ this = frame->this;
+ priv = this->private;
+ local = frame->local;
+
+ if (layout->type == DHT_HASH_TYPE_DM_USER) {
+ gf_msg_debug (THIS->name, 0, "leaving %s alone",
+ loc->path);
+ goto done;
+ }
+
+ new_layout = dht_layout_new (this, priv->subvolume_cnt);
+ if (!new_layout)
+ goto done;
+
+ /* If a subvolume is down, do not re-write the layout. */
+ ret = dht_layout_anomalies (this, loc, layout, NULL, NULL, NULL,
+ &subvol_down, NULL, NULL);
+
+ if (subvol_down || (ret == -1)) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LAYOUT_FIX_FAILED,
+ "Layout fix failed: %u subvolume(s) are down"
+ ". Skipping fix layout.", subvol_down);
+ GF_FREE (new_layout);
+ return NULL;
+ }
+
+ for (i = 0; i < new_layout->cnt; i++) {
+ if (layout->list[i].err != ENOSPC)
+ new_layout->list[i].err = layout->list[i].err;
+ else
+ new_layout->list[i].err = -1;
+
+ new_layout->list[i].xlator = layout->list[i].xlator;
+ }
+
+ new_layout->commit_hash = layout->commit_hash;
+
+ if (priv->du_stats) {
+ for (i = 0; i < priv->subvolume_cnt; ++i) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_SUBVOL_INFO,
+ "subvolume %d (%s): %u chunks", i,
+ priv->subvolumes[i]->name,
+ priv->du_stats[i].chunks);
+ }
+ }
+ else {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_NO_DISK_USAGE_STATUS, "no du stats ?!?");
+ }
+
+ /* First give it a layout as though it is a new directory. This
+ ensures rotation to kick in */
+ dht_layout_sort_volname (new_layout);
+ dht_selfheal_layout_new_directory (frame, loc, new_layout);
+
+ /* Now selectively re-assign ranges only when it helps */
+ dht_selfheal_layout_maximize_overlap (frame, loc, new_layout, layout);
+
+done:
+ if (new_layout) {
+ /* Now that the new layout has all the proper layout, change the
+ inode context */
+ dht_layout_set (this, loc->inode, new_layout);
+
+ /* Make sure the extra 'ref' for existing layout is removed */
+ dht_layout_unref (this, local->layout);
+
+ local->layout = new_layout;
+ }
+
+ return local->layout;
+}
+
+
+/*
+ * Having to call this 2x for each entry in the layout is pretty horrible, but
+ * that's what all of this layout-sorting nonsense gets us.
+ */
+uint32_t
+dht_get_chunks_from_xl (xlator_t *parent, xlator_t *child)
+{
+ dht_conf_t *priv = parent->private;
+ xlator_list_t *trav;
+ uint32_t index = 0;
+
+ if (!priv->du_stats) {
+ return 0;
+ }
+
+ for (trav = parent->children; trav; trav = trav->next) {
+ if (trav->xlator == child) {
+ return priv->du_stats[index].chunks;
+ }
+ ++index;
+ }
+
+ return 0;
+}
+
+
+void
+dht_selfheal_layout_new_directory (call_frame_t *frame, loc_t *loc,
+ dht_layout_t *layout)
+{
+ xlator_t *this = NULL;
+ double chunk = 0;
+ int i = 0;
+ uint32_t start = 0;
+ int bricks_to_use = 0;
+ int err = 0;
+ int start_subvol = 0;
+ uint32_t curr_size;
+ uint32_t range_size;
+ uint64_t total_size = 0;
+ int real_i;
+ dht_conf_t *priv;
+ gf_boolean_t weight_by_size;
+ int bricks_used = 0;
+
+ this = frame->this;
+ priv = this->private;
+ weight_by_size = priv->do_weighting;
+
+ bricks_to_use = dht_get_layout_count (this, layout, 1);
+ GF_ASSERT (bricks_to_use > 0);
+
+ bricks_used = 0;
+ for (i = 0; i < layout->cnt; ++i) {
+ err = layout->list[i].err;
+ if ((err != -1) && (err != ENOENT)) {
+ continue;
+ }
+ curr_size = dht_get_chunks_from_xl (this,
+ layout->list[i].xlator);
+ if (!curr_size) {
+ weight_by_size = _gf_false;
+ break;
+ }
+ total_size += curr_size;
+ if (++bricks_used >= bricks_to_use) {
+ break;
+ }
+ }
+
+ if (weight_by_size && total_size) {
+ /* We know total_size is not zero. */
+ chunk = ((double) 0xffffffff) / ((double) total_size);
+ gf_msg_debug (this->name, 0,
+ "chunk size = 0xffffffff / %lu = %f",
+ total_size, chunk);
+ }
+ else {
+ weight_by_size = _gf_false;
+ chunk = ((unsigned long) 0xffffffff) / bricks_to_use;
+ }
+
+ start_subvol = dht_selfheal_layout_alloc_start (this, loc, layout);
+
+ /* clear out the range, as we are re-computing here */
+ DHT_RESET_LAYOUT_RANGE (layout);
+
+ /*
+ * OK, what's this "real_i" stuff about? This used to be two loops -
+ * from start_subvol to layout->cnt-1, then from 0 to start_subvol-1.
+ * That way is practically an open invitation to bugs when only one
+ * of the loops is updated. Using real_i and modulo operators to make
+ * it one loop avoids this problem. Remember, folks: it's everyone's
+ * responsibility to help stamp out copy/paste abuse.
+ */
+ bricks_used = 0;
+ for (real_i = 0; real_i < layout->cnt; real_i++) {
+ i = (real_i + start_subvol) % layout->cnt;
+ err = layout->list[i].err;
+ if ((err != -1) && (err != ENOENT)) {
+ continue;
+ }
+ if (weight_by_size) {
+ curr_size = dht_get_chunks_from_xl (this,
+ layout->list[i].xlator);
+ if (!curr_size) {
+ continue;
+ }
+ }
+ else {
+ curr_size = 1;
+ }
+ range_size = chunk * curr_size;
+ gf_msg_debug (this->name, 0,
+ "assigning range size 0x%x to %s",
+ range_size,
+ layout->list[i].xlator->name);
+ DHT_SET_LAYOUT_RANGE(layout, i, start, range_size,
+ loc->path);
+ if (++bricks_used >= bricks_to_use) {
+ layout->list[i].stop = 0xffffffff;
+ goto done;
+ }
+ start += range_size;
+ }
+
+done:
+ return;
+}
+
int
dht_selfheal_dir_getafix (call_frame_t *frame, loc_t *loc,
- dht_layout_t *layout)
-{
- dht_conf_t *conf = NULL;
- xlator_t *this = NULL;
- dht_local_t *local = NULL;
- int missing = -1;
- int down = -1;
- int holes = -1;
- int ret = -1;
- int i = -1;
- int overlaps = -1;
-
- this = frame->this;
- conf = this->private;
- local = frame->local;
-
- missing = local->selfheal.missing;
- down = local->selfheal.down;
- holes = local->selfheal.hole_cnt;
- overlaps = local->selfheal.overlaps_cnt;
-
- if ((missing + down) == conf->subvolume_cnt) {
- dht_selfheal_layout_new_directory (frame, loc, layout);
- ret = 0;
- }
+ dht_layout_t *layout)
+{
+ dht_local_t *local = NULL;
+ uint32_t holes = 0;
+ int ret = -1;
+ int i = -1;
+ uint32_t overlaps = 0;
+
+ local = frame->local;
+
+ holes = local->selfheal.hole_cnt;
+ overlaps = local->selfheal.overlaps_cnt;
+
+ if (holes || overlaps) {
+ /* If the layout has anomolies which would change the hash
+ * ranges, then we need to reset the commit_hash for this
+ * directory, as the layout would change and things may not
+ * be in place as expected */
+ layout->commit_hash = DHT_LAYOUT_HASH_INVALID;
+ dht_selfheal_layout_new_directory (frame, loc, layout);
+ ret = 0;
+ }
- if (holes <= down) {
- /* the down subvol might fill up the holes */
- ret = 0;
- }
+ for (i = 0; i < layout->cnt; i++) {
+ /* directory not present */
+ if (layout->list[i].err == ENOENT) {
+ ret = 0;
+ break;
+ }
+ }
- if (holes || overlaps) {
- dht_selfheal_layout_new_directory (frame, loc, layout);
- ret = 0;
- }
+ /* TODO: give a fix to these non-virgins */
- for (i = 0; i < layout->cnt; i++) {
- /* directory not present */
- if (layout->list[i].err == ENOENT) {
- ret = 0;
- break;
- }
- }
+ return ret;
+}
- /* TODO: give a fix to these non-virgins */
+int
+dht_selfheal_new_directory (call_frame_t *frame,
+ dht_selfheal_dir_cbk_t dir_cbk,
+ dht_layout_t *layout)
+{
+ dht_local_t *local = NULL;
+ int ret = 0;
+ inode_t *linked_inode = NULL, *inode = NULL;
+ loc_t *loc = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ int32_t op_errno = EIO;
+
+ local = frame->local;
+
+ loc = &local->loc;
+
+ gf_uuid_unparse(local->stbuf.ia_gfid, gfid);
+ gf_uuid_unparse(loc->parent->gfid, pgfid);
+
+ linked_inode = inode_link (loc->inode, loc->parent, loc->name,
+ &local->stbuf);
+ if (!linked_inode) {
+ gf_msg (frame->this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DIR_SELFHEAL_FAILED,
+ "linking inode failed (%s/%s) => %s",
+ pgfid, loc->name, gfid);
+ ret = -1;
+ goto out;
+ }
+
+ inode = loc->inode;
+ loc->inode = linked_inode;
+ inode_unref (inode);
+
+ local->selfheal.dir_cbk = dir_cbk;
+ local->selfheal.layout = dht_layout_ref (frame->this, layout);
+
+ dht_layout_sort_volname (layout);
+ dht_selfheal_layout_new_directory (frame, &local->loc, layout);
- return ret;
+ op_errno = ENOMEM;
+ ret = dht_selfheal_layout_lock (frame, layout, _gf_true,
+ dht_selfheal_dir_xattr,
+ dht_should_heal_layout);
+
+out:
+ if (ret < 0) {
+ dir_cbk (frame, NULL, frame->this, -1, op_errno, NULL);
+ }
+
+ return 0;
}
int
-dht_selfheal_new_directory (call_frame_t *frame,
- dht_selfheal_dir_cbk_t dir_cbk,
- dht_layout_t *layout)
+dht_fix_directory_layout (call_frame_t *frame,
+ dht_selfheal_dir_cbk_t dir_cbk,
+ dht_layout_t *layout)
{
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ dht_layout_t *tmp_layout = NULL;
+ int ret = 0;
- local = frame->local;
+ local = frame->local;
- local->selfheal.dir_cbk = dir_cbk;
- local->selfheal.layout = dht_layout_ref (frame->this, layout);
+ local->selfheal.dir_cbk = dir_cbk;
+ local->selfheal.layout = dht_layout_ref (frame->this, layout);
+
+ /* No layout sorting required here */
+ tmp_layout = dht_fix_layout_of_directory (frame, &local->loc, layout);
+ if (!tmp_layout) {
+ return -1;
+ }
- dht_layout_sort_volname (layout);
- dht_selfheal_layout_new_directory (frame, &local->loc, layout);
- dht_selfheal_dir_xattr (frame, &local->loc, layout);
- return 0;
+ ret = dht_selfheal_layout_lock (frame, tmp_layout, _gf_false,
+ dht_fix_dir_xattr,
+ dht_should_fix_layout);
+
+ return ret;
}
int
dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
- loc_t *loc, dht_layout_t *layout)
-{
- dht_local_t *local = NULL;
- uint32_t holes = 0;
- uint32_t overlaps = 0;
- uint32_t missing = 0;
- uint32_t down = 0;
- uint32_t misc = 0;
- int ret = 0;
- xlator_t *this = NULL;
-
- local = frame->local;
- this = frame->this;
-
- ret = dht_layout_anomalies (this, loc, layout,
- &local->selfheal.hole_cnt,
- &local->selfheal.overlaps_cnt,
- &local->selfheal.missing,
- &local->selfheal.down,
- &local->selfheal.misc);
-
- holes = local->selfheal.hole_cnt;
- overlaps = local->selfheal.overlaps_cnt;
- missing = local->selfheal.missing;
- down = local->selfheal.down;
- misc = local->selfheal.misc;
-
- local->selfheal.dir_cbk = dir_cbk;
- local->selfheal.layout = dht_layout_ref (this, layout);
-
- if (down) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%d subvolumes down -- not fixing", down);
- ret = 0;
- goto sorry_no_fix;
- }
+ loc_t *loc, dht_layout_t *layout)
+{
+ dht_local_t *local = NULL;
+ uint32_t down = 0;
+ uint32_t misc = 0;
+ int ret = 0;
+ xlator_t *this = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ inode_t *linked_inode = NULL, *inode = NULL;
+
+ local = frame->local;
+ this = frame->this;
+
+ local->selfheal.dir_cbk = dir_cbk;
+ local->selfheal.layout = dht_layout_ref (this, layout);
+
+ if (!__is_root_gfid (local->stbuf.ia_gfid)) {
+ gf_uuid_unparse(local->stbuf.ia_gfid, gfid);
+ gf_uuid_unparse(loc->parent->gfid, pgfid);
+
+ linked_inode = inode_link (loc->inode, loc->parent, loc->name,
+ &local->stbuf);
+ if (!linked_inode) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DIR_SELFHEAL_FAILED,
+ "linking inode failed (%s/%s) => %s",
+ pgfid, loc->name, gfid);
+ ret = 0;
+ goto sorry_no_fix;
+ }
- if (misc) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%d subvolumes have unrecoverable errors", misc);
- ret = 0;
- goto sorry_no_fix;
- }
+ inode = loc->inode;
+ loc->inode = linked_inode;
+ inode_unref (inode);
+ }
+
+ dht_layout_anomalies (this, loc, layout,
+ &local->selfheal.hole_cnt,
+ &local->selfheal.overlaps_cnt,
+ NULL, &local->selfheal.down,
+ &local->selfheal.misc, NULL);
+
+ down = local->selfheal.down;
+ misc = local->selfheal.misc;
+
+ if (down) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DIR_SELFHEAL_FAILED,
+ "Directory selfheal failed: %d subvolumes down."
+ "Not fixing. path = %s, gfid = %s",
+ down, loc->path, gfid);
+ ret = 0;
+ goto sorry_no_fix;
+ }
- dht_layout_sort_volname (layout);
- ret = dht_selfheal_dir_getafix (frame, loc, layout);
+ if (misc) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DIR_SELFHEAL_FAILED,
+ "Directory selfheal failed : %d subvolumes "
+ "have unrecoverable errors. path = %s, gfid = %s",
+ misc, loc->path, gfid);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "not able to form layout for the directory");
- goto sorry_no_fix;
- }
+ ret = 0;
+ goto sorry_no_fix;
+ }
+
+ dht_layout_sort_volname (layout);
+ ret = dht_selfheal_dir_getafix (frame, loc, layout);
- dht_selfheal_dir_mkdir (frame, loc, layout, 0);
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DIR_SELFHEAL_FAILED,
+ "Directory selfheal failed: "
+ "Unable to form layout for directory %s",
+ loc->path);
+ goto sorry_no_fix;
+ }
- return 0;
+ dht_selfheal_dir_mkdir (frame, loc, layout, 0);
+
+ return 0;
sorry_no_fix:
- /* TODO: need to put appropriate local->op_errno */
- dht_selfheal_dir_finish (frame, this, ret);
+ /* TODO: need to put appropriate local->op_errno */
+ dht_selfheal_dir_finish (frame, this, ret, 1);
- return 0;
+ return 0;
}
+int
+dht_selfheal_directory_for_nameless_lookup (call_frame_t *frame,
+ dht_selfheal_dir_cbk_t dir_cbk,
+ loc_t *loc, dht_layout_t *layout)
+{
+ dht_local_t *local = NULL;
+ uint32_t down = 0;
+ uint32_t misc = 0;
+ int ret = 0;
+ xlator_t *this = NULL;
+
+ local = frame->local;
+ this = frame->this;
+ dht_layout_anomalies (this, loc, layout,
+ &local->selfheal.hole_cnt,
+ &local->selfheal.overlaps_cnt,
+ NULL, &local->selfheal.down,
+ &local->selfheal.misc, NULL);
+
+ down = local->selfheal.down;
+ misc = local->selfheal.misc;
+
+ local->selfheal.dir_cbk = dir_cbk;
+ local->selfheal.layout = dht_layout_ref (this, layout);
+
+ if (down) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_SUBVOL_DOWN_ERROR,
+ "%d subvolumes down -- not fixing", down);
+ ret = 0;
+ goto sorry_no_fix;
+ }
+
+ if (misc) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_SUBVOL_ERROR,
+ "%d subvolumes have unrecoverable errors", misc);
+ ret = 0;
+ goto sorry_no_fix;
+ }
+
+ dht_layout_sort_volname (layout);
+ ret = dht_selfheal_dir_getafix (frame, loc, layout);
+
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LAYOUT_FORM_FAILED,
+ "not able to form layout for the directory");
+ goto sorry_no_fix;
+ }
+
+ ret = dht_selfheal_layout_lock (frame, layout, _gf_false,
+ dht_selfheal_dir_xattr_for_nameless_lookup,
+ dht_should_heal_layout);
+
+ if (ret < 0) {
+ goto sorry_no_fix;
+ }
+
+ return 0;
+
+sorry_no_fix:
+ /* TODO: need to put appropriate local->op_errno */
+ dht_selfheal_dir_finish (frame, this, ret, 1);
+
+ return 0;
+
+
+}
int
dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
- loc_t *loc, dht_layout_t *layout)
+ loc_t *loc, dht_layout_t *layout)
+{
+ int ret = 0;
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+
+ local->selfheal.dir_cbk = dir_cbk;
+ local->selfheal.layout = dht_layout_ref (frame->this, layout);
+
+ ret = dht_selfheal_dir_mkdir (frame, loc, layout, 1);
+
+ return ret;
+}
+
+int
+dht_dir_attr_heal (void *data)
+{
+ call_frame_t *frame = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ int call_cnt = 0;
+ int ret = -1;
+ int i = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+
+ GF_VALIDATE_OR_GOTO ("dht", data, out);
+
+ frame = data;
+ local = frame->local;
+ this = frame->this;
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", local, out);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO ("dht", conf, out);
+
+ call_cnt = conf->subvolume_cnt;
+
+ for (i = 0; i < call_cnt; i++) {
+ subvol = conf->subvolumes[i];
+ if (!subvol || (subvol == dht_first_up_subvol (this)))
+ continue;
+ ret = syncop_setattr (subvol, &local->loc, &local->stbuf,
+ (GF_SET_ATTR_UID | GF_SET_ATTR_GID),
+ NULL, NULL, NULL, NULL);
+ if (ret) {
+ gf_uuid_unparse(local->loc.gfid, gfid);
+
+ gf_msg ("dht", GF_LOG_ERROR, -ret,
+ DHT_MSG_DIR_ATTR_HEAL_FAILED,
+ "Directory attr heal failed. Failed to set"
+ " uid/gid on path %s on subvol %s, gfid = %s ",
+ local->loc.path, subvol->name, gfid);
+ }
+ }
+out:
+ return 0;
+}
+
+int
+dht_dir_attr_heal_done (int ret, call_frame_t *sync_frame, void *data)
+{
+ DHT_STACK_DESTROY (sync_frame);
+ return 0;
+}
+
+/* EXIT: dht_update_commit_hash_for_layout */
+int
+dht_update_commit_hash_for_layout_done (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+
+ /* preserve oldest error */
+ if (op_ret && !local->op_ret) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
+
+ DHT_STACK_UNWIND (setxattr, frame, local->op_ret,
+ local->op_errno, NULL);
+
+ return 0;
+}
+
+int
+dht_update_commit_hash_for_layout_unlock (call_frame_t *frame, xlator_t *this)
+{
+ dht_local_t *local = NULL;
+ int ret = 0;
+
+ local = frame->local;
+
+ ret = dht_unlock_inodelk (frame, local->lock.locks,
+ local->lock.lk_count,
+ dht_update_commit_hash_for_layout_done);
+ if (ret < 0) {
+ /* preserve oldest error, just ... */
+ if (!local->op_ret) {
+ local->op_errno = errno;
+ local->op_ret = -1;
+ }
+
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
+ "Winding unlock failed: stale locks left on brick"
+ " %s", local->loc.path);
+
+ dht_update_commit_hash_for_layout_done (frame, NULL, this,
+ 0, 0, NULL);
+ }
+
+ return 0;
+}
+
+int
+dht_update_commit_hash_for_layout_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret,
+ int op_errno, dict_t *xdata)
{
- int ret = 0;
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ /* store first failure, just because */
+ if (op_ret && !local->op_ret) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ this_call_cnt = dht_frame_return (frame);
+
+ if (is_last_call (this_call_cnt)) {
+ dht_update_commit_hash_for_layout_unlock (frame, this);
+ }
+
+ return 0;
+}
+
+int
+dht_update_commit_hash_for_layout_resume (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int count = 1, ret = -1, i = 0, j = 0;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *layout = NULL;
+ int32_t *disk_layout = NULL;
+ dict_t **xattr = NULL;
+
+ local = frame->local;
+ conf = frame->this->private;
+ count = conf->local_subvols_cnt;
+ layout = local->layout;
+
+ if (op_ret < 0) {
+ goto err_done;
+ }
+
+ /* We precreate the xattr list as we cannot change call count post the
+ * first wind as we may never continue from there. So we finish prep
+ * work before winding the setxattrs */
+ xattr = GF_CALLOC (count, sizeof (*xattr), gf_common_mt_char);
+ if (!xattr) {
+ local->op_errno = errno;
+
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
+ "Directory commit hash update failed:"
+ " %s: Allocation failed", local->loc.path);
+
+ goto err;
+ }
+
+ for (i = 0; i < count; i++) {
+ /* find the layout index for the subvolume */
+ ret = dht_layout_index_for_subvol (layout,
+ conf->local_subvols[i]);
+ if (ret < 0) {
+ local->op_errno = ENOENT;
+
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
+ "Directory commit hash update failed:"
+ " %s: (subvol %s) Failed to find disk layout",
+ local->loc.path, conf->local_subvols[i]->name);
+
+ goto err;
+ }
+ j = ret;
+
+ /* update the commit hash for the layout */
+ layout->list[j].commit_hash = layout->commit_hash;
+
+ /* extract the current layout */
+ ret = dht_disk_layout_extract (this, layout, j, &disk_layout);
+ if (ret == -1) {
+ local->op_errno = errno;
+
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
+ "Directory commit hash update failed:"
+ " %s: (subvol %s) Failed to extract disk"
+ " layout", local->loc.path,
+ conf->local_subvols[i]->name);
+
+ goto err;
+ }
+
+ xattr[i] = get_new_dict ();
+ if (!xattr[i]) {
+ local->op_errno = errno;
+
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
+ "Directory commit hash update failed:"
+ " %s: Allocation failed", local->loc.path);
+
+ goto err;
+ }
+ ret = dict_set_bin (xattr[i], conf->xattr_name,
+ disk_layout, 4 * 4);
+ if (ret != 0) {
+ local->op_errno = ENOMEM;
- local = frame->local;
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
+ "Directory self heal xattr failed:"
+ "%s: (subvol %s) Failed to set xattr"
+ " dictionary,", local->loc.path,
+ conf->local_subvols[i]->name);
- local->selfheal.dir_cbk = dir_cbk;
- local->selfheal.layout = dht_layout_ref (frame->this, layout);
+ GF_FREE (disk_layout);
- ret = dht_selfheal_dir_mkdir (frame, loc, layout, 1);
+ goto err;
+ }
+ disk_layout = NULL;
+
+ gf_msg_trace (this->name, 0,
+ "setting commit hash %u on subvolume %s"
+ " for %s", layout->list[j].commit_hash,
+ conf->local_subvols[i]->name, local->loc.path);
+ }
+
+ /* wind the setting of the commit hash across the local subvols */
+ local->call_cnt = count;
+ local->op_ret = 0;
+ local->op_errno = 0;
+ for (i = 0; i < count; i++) {
+ dict_ref (xattr[i]);
+
+ STACK_WIND (frame, dht_update_commit_hash_for_layout_cbk,
+ conf->local_subvols[i],
+ conf->local_subvols[i]->fops->setxattr,
+ &local->loc, xattr[i], 0, NULL);
+
+ dict_unref (xattr[i]);
+ }
+
+ return 0;
+err:
+ if (xattr) {
+ for (i = 0; i < count; i++) {
+ if (xattr[i])
+ dict_destroy (xattr[i]);
+ }
+
+ GF_FREE (xattr);
+ }
+
+ GF_FREE (disk_layout);
+
+ local->op_ret = -1;
+
+ dht_update_commit_hash_for_layout_unlock (frame, this);
+
+ return 0;
+err_done:
+ local->op_ret = -1;
+
+ dht_update_commit_hash_for_layout_done (frame, NULL, this, 0, 0, NULL);
+
+ return 0;
+}
+
+/* ENTER: dht_update_commit_hash_for_layout (see EXIT above)
+ * This function is invoked from rebalance only.
+ * As a result, the check here is simple enough to see if defrag is present
+ * in the conf, as other data would be populated appropriately if so.
+ * If ever this was to be used in other code paths, checks would need to
+ * change.
+ *
+ * Functional details:
+ * - Lock the inodes on the subvols that we want the commit hash updated
+ * - Update each layout with the inode layout, modified to take in the new
+ * commit hash.
+ * - Unlock and return.
+ */
+int
+dht_update_commit_hash_for_layout (call_frame_t *frame)
+{
+ dht_local_t *local = NULL;
+ int count = 1, ret = -1, i = 0;
+ dht_lock_t **lk_array = NULL;
+ dht_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO (frame->this->name, frame->local, err);
+
+ local = frame->local;
+ conf = frame->this->private;
+
+ if (!conf->defrag)
+ goto err;
+
+ count = conf->local_subvols_cnt;
+ lk_array = GF_CALLOC (count, sizeof (*lk_array),
+ gf_common_mt_char);
+ if (lk_array == NULL)
+ goto err;
+
+ for (i = 0; i < count; i++) {
+ lk_array[i] = dht_lock_new (frame->this,
+ conf->local_subvols[i],
+ &local->loc, F_WRLCK,
+ DHT_LAYOUT_HEAL_DOMAIN);
+ if (lk_array[i] == NULL)
+ goto err;
+ }
+
+ local->lock.locks = lk_array;
+ local->lock.lk_count = count;
+
+ ret = dht_blocking_inodelk (frame, lk_array, count, FAIL_ON_ANY_ERROR,
+ dht_update_commit_hash_for_layout_resume);
+ if (ret < 0) {
+ local->lock.locks = NULL;
+ local->lock.lk_count = 0;
+ goto err;
+ }
+
+ return 0;
+err:
+ if (lk_array != NULL) {
+ dht_lock_array_free (lk_array, count);
+ GF_FREE (lk_array);
+ }
- return 0;
+ return -1;
}
diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
new file mode 100644
index 00000000000..0fea1d58e58
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-shared.c
@@ -0,0 +1,1087 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+/* TODO: add NS locking */
+#include "statedump.h"
+#include "dht-common.h"
+#include "dht-messages.h"
+
+#ifndef MAX
+#define MAX(a, b) (((a) > (b))?(a):(b))
+#endif
+
+#define GF_DECIDE_DEFRAG_THROTTLE_COUNT(throttle_count, conf) { \
+ \
+ pthread_mutex_lock (&conf->defrag->dfq_mutex); \
+ \
+ if (!strcasecmp (conf->dthrottle, "lazy")) \
+ conf->defrag->recon_thread_count = 1; \
+ \
+ throttle_count = \
+ MAX ((sysconf(_SC_NPROCESSORS_ONLN) - 4), 4); \
+ \
+ if (!strcasecmp (conf->dthrottle, "normal")) \
+ conf->defrag->recon_thread_count = \
+ (throttle_count / 2); \
+ \
+ if (!strcasecmp (conf->dthrottle, "aggressive")) \
+ conf->defrag->recon_thread_count = \
+ throttle_count; \
+ \
+ pthread_mutex_unlock (&conf->defrag->dfq_mutex); \
+ } \
+
+/* TODO:
+ - use volumename in xattr instead of "dht"
+ - use NS locks
+ - handle all cases in self heal layout reconstruction
+ - complete linkfile selfheal
+*/
+struct volume_options options[];
+
+extern dht_methods_t dht_methods;
+
+void
+dht_layout_dump (dht_layout_t *layout, const char *prefix)
+{
+
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i = 0;
+
+ if (!layout)
+ goto out;
+ if (!prefix)
+ goto out;
+
+ gf_proc_dump_build_key(key, prefix, "cnt");
+ gf_proc_dump_write(key, "%d", layout->cnt);
+ gf_proc_dump_build_key(key, prefix, "preset");
+ gf_proc_dump_write(key, "%d", layout->preset);
+ gf_proc_dump_build_key(key, prefix, "gen");
+ gf_proc_dump_write(key, "%d", layout->gen);
+ if (layout->type != IA_INVAL) {
+ gf_proc_dump_build_key(key, prefix, "inode type");
+ gf_proc_dump_write(key, "%d", layout->type);
+ }
+
+ if (!IA_ISDIR (layout->type))
+ goto out;
+
+ for (i = 0; i < layout->cnt; i++) {
+ gf_proc_dump_build_key(key, prefix,"list[%d].err", i);
+ gf_proc_dump_write(key, "%d", layout->list[i].err);
+ gf_proc_dump_build_key(key, prefix,"list[%d].start", i);
+ gf_proc_dump_write(key, "%u", layout->list[i].start);
+ gf_proc_dump_build_key(key, prefix,"list[%d].stop", i);
+ gf_proc_dump_write(key, "%u", layout->list[i].stop);
+ if (layout->list[i].xlator) {
+ gf_proc_dump_build_key(key, prefix,
+ "list[%d].xlator.type", i);
+ gf_proc_dump_write(key, "%s",
+ layout->list[i].xlator->type);
+ gf_proc_dump_build_key(key, prefix,
+ "list[%d].xlator.name", i);
+ gf_proc_dump_write(key, "%s",
+ layout->list[i].xlator->name);
+ }
+ }
+
+out:
+ return;
+}
+
+
+int32_t
+dht_priv_dump (xlator_t *this)
+{
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i = 0;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+
+ if (!this)
+ goto out;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ ret = TRY_LOCK(&conf->subvolume_lock);
+ if (ret != 0) {
+ return ret;
+ }
+
+ gf_proc_dump_add_section("xlator.cluster.dht.%s.priv", this->name);
+ gf_proc_dump_build_key(key_prefix,"xlator.cluster.dht","%s.priv",
+ this->name);
+ gf_proc_dump_write("subvol_cnt","%d", conf->subvolume_cnt);
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ snprintf (key, sizeof (key), "subvolumes[%d]", i);
+ gf_proc_dump_write(key, "%s.%s", conf->subvolumes[i]->type,
+ conf->subvolumes[i]->name);
+ if (conf->file_layouts && conf->file_layouts[i]){
+ snprintf (key, sizeof (key), "file_layouts[%d]", i);
+ dht_layout_dump(conf->file_layouts[i], key);
+ }
+ if (conf->dir_layouts && conf->dir_layouts[i]) {
+ snprintf (key, sizeof (key), "dir_layouts[%d]", i);
+ dht_layout_dump(conf->dir_layouts[i], key);
+ }
+ if (conf->subvolume_status) {
+
+ snprintf (key, sizeof (key), "subvolume_status[%d]", i);
+ gf_proc_dump_write(key, "%d",
+ (int)conf->subvolume_status[i]);
+ }
+
+ }
+
+ gf_proc_dump_write("search_unhashed", "%d", conf->search_unhashed);
+ gf_proc_dump_write("gen", "%d", conf->gen);
+ gf_proc_dump_write("min_free_disk", "%lf", conf->min_free_disk);
+ gf_proc_dump_write("min_free_inodes", "%lf", conf->min_free_inodes);
+ gf_proc_dump_write("disk_unit", "%c", conf->disk_unit);
+ gf_proc_dump_write("refresh_interval", "%d", conf->refresh_interval);
+ gf_proc_dump_write("unhashed_sticky_bit", "%d", conf->unhashed_sticky_bit);
+ gf_proc_dump_write("use-readdirp", "%d", conf->use_readdirp);
+
+ if (conf->du_stats && conf->subvolume_status) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!conf->subvolume_status[i])
+ continue;
+
+ snprintf (key, sizeof (key), "subvolumes[%d]", i);
+ gf_proc_dump_write (key, "%s",
+ conf->subvolumes[i]->name);
+
+ snprintf (key, sizeof (key),
+ "du_stats[%d].avail_percent", i);
+ gf_proc_dump_write (key, "%lf",
+ conf->du_stats[i].avail_percent);
+
+ snprintf (key, sizeof (key), "du_stats[%d].avail_space",
+ i);
+ gf_proc_dump_write (key, "%lu",
+ conf->du_stats[i].avail_space);
+
+ snprintf (key, sizeof (key),
+ "du_stats[%d].avail_inodes", i);
+ gf_proc_dump_write (key, "%lf",
+ conf->du_stats[i].avail_inodes);
+
+ snprintf (key, sizeof (key), "du_stats[%d].log", i);
+ gf_proc_dump_write (key, "%lu",
+ conf->du_stats[i].log);
+ }
+ }
+
+ if (conf->last_stat_fetch.tv_sec)
+ gf_proc_dump_write("last_stat_fetch", "%s",
+ ctime(&conf->last_stat_fetch.tv_sec));
+
+ UNLOCK(&conf->subvolume_lock);
+
+out:
+ return ret;
+}
+
+int32_t
+dht_inodectx_dump (xlator_t *this, inode_t *inode)
+{
+ int ret = -1;
+ dht_layout_t *layout = NULL;
+
+ if (!this)
+ goto out;
+ if (!inode)
+ goto out;
+
+ ret = dht_inode_ctx_layout_get (inode, this, &layout);
+
+ if ((ret != 0) || !layout)
+ return ret;
+
+ gf_proc_dump_add_section("xlator.cluster.dht.%s.inode", this->name);
+ dht_layout_dump(layout, "layout");
+
+out:
+ return ret;
+}
+
+void
+dht_fini (xlator_t *this)
+{
+ int i = 0;
+ dht_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+
+ conf = this->private;
+ this->private = NULL;
+ if (conf) {
+ if (conf->file_layouts) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ GF_FREE (conf->file_layouts[i]);
+ }
+ GF_FREE (conf->file_layouts);
+ }
+
+ dict_destroy(conf->leaf_to_subvol);
+
+ GF_FREE (conf->subvolumes);
+
+ GF_FREE (conf->subvolume_status);
+
+ if (conf->lock_pool)
+ mem_pool_destroy (conf->lock_pool);
+
+ GF_FREE (conf);
+ }
+out:
+ return;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+
+ ret = xlator_mem_acct_init (this, gf_dht_mt_end + 1);
+
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_NO_MEMORY,
+ "Memory accounting init failed");
+ return ret;
+ }
+out:
+ return ret;
+}
+
+
+int
+dht_parse_decommissioned_bricks (xlator_t *this, dht_conf_t *conf,
+ const char *bricks)
+{
+ int i = 0;
+ int ret = -1;
+ char *tmpstr = NULL;
+ char *dup_brick = NULL;
+ char *node = NULL;
+
+ if (!conf || !bricks)
+ goto out;
+
+ dup_brick = gf_strdup (bricks);
+ node = strtok_r (dup_brick, ",", &tmpstr);
+ while (node) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!strcmp (conf->subvolumes[i]->name, node)) {
+ conf->decommissioned_bricks[i] =
+ conf->subvolumes[i];
+ conf->decommission_subvols_cnt++;
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_SUBVOL_DECOMMISSION_INFO,
+ "decommissioning subvolume %s",
+ conf->subvolumes[i]->name);
+ break;
+ }
+ }
+ if (i == conf->subvolume_cnt) {
+ /* Wrong node given. */
+ goto out;
+ }
+ node = strtok_r (NULL, ",", &tmpstr);
+ }
+
+ ret = 0;
+ conf->decommission_in_progress = 1;
+out:
+ GF_FREE (dup_brick);
+
+ return ret;
+}
+
+int
+dht_decommissioned_remove (xlator_t *this, dht_conf_t *conf)
+{
+ int i = 0;
+ int ret = -1;
+
+ if (!conf)
+ goto out;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->decommissioned_bricks[i]) {
+ conf->decommissioned_bricks[i] = NULL;
+ conf->decommission_subvols_cnt--;
+ }
+ }
+
+ ret = 0;
+out:
+
+ return ret;
+}
+void
+dht_init_regex (xlator_t *this, dict_t *odict, char *name,
+ regex_t *re, gf_boolean_t *re_valid)
+{
+ char *temp_str;
+
+ if (dict_get_str (odict, name, &temp_str) != 0) {
+ if (strcmp(name,"rsync-hash-regex")) {
+ return;
+ }
+ temp_str = "^\\.(.+)\\.[^.]+$";
+ }
+
+ if (*re_valid) {
+ regfree(re);
+ *re_valid = _gf_false;
+ }
+
+ if (!strcmp(temp_str,"none")) {
+ return;
+ }
+
+ if (regcomp(re,temp_str,REG_EXTENDED) == 0) {
+ gf_msg_debug (this->name, 0,
+ "using regex %s = %s", name, temp_str);
+ *re_valid = _gf_true;
+ }
+ else {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_REGEX_INFO,
+ "compiling regex %s failed", temp_str);
+ }
+}
+
+int
+dht_set_subvol_range(xlator_t *this)
+{
+ int ret = -1;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf)
+ goto out;
+
+ conf->leaf_to_subvol = dict_new();
+ if (!conf->leaf_to_subvol)
+ goto out;
+
+ ret = glusterfs_reachable_leaves(this, conf->leaf_to_subvol);
+
+out:
+ return ret;
+}
+
+int
+dht_reconfigure (xlator_t *this, dict_t *options)
+{
+ dht_conf_t *conf = NULL;
+ char *temp_str = NULL;
+ gf_boolean_t search_unhashed;
+ int ret = -1;
+ int throttle_count = 0;
+
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", options, out);
+
+ conf = this->private;
+ if (!conf)
+ return 0;
+
+ if (dict_get_str (options, "lookup-unhashed", &temp_str) == 0) {
+ /* If option is not "auto", other options _should_ be boolean*/
+ if (strcasecmp (temp_str, "auto")) {
+ if (!gf_string2boolean (temp_str, &search_unhashed)) {
+ gf_msg_debug(this->name, 0, "Reconfigure: "
+ "lookup-unhashed reconfigured(%s)",
+ temp_str);
+ conf->search_unhashed = search_unhashed;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_INVALID_OPTION,
+ "Invalid option: Reconfigure: "
+ "lookup-unhashed should be boolean,"
+ " not (%s), defaulting to (%d)",
+ temp_str, conf->search_unhashed);
+ ret = -1;
+ goto out;
+ }
+ } else {
+ gf_msg_debug(this->name, 0, "Reconfigure:"
+ " lookup-unhashed reconfigured auto ");
+ conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
+ }
+ }
+
+ GF_OPTION_RECONF ("lookup-optimize", conf->lookup_optimize, options,
+ bool, out);
+
+ GF_OPTION_RECONF ("min-free-disk", conf->min_free_disk, options,
+ percent_or_size, out);
+ /* option can be any one of percent or bytes */
+ conf->disk_unit = 0;
+ if (conf->min_free_disk < 100.0)
+ conf->disk_unit = 'p';
+
+ GF_OPTION_RECONF ("min-free-inodes", conf->min_free_inodes, options,
+ percent, out);
+
+ GF_OPTION_RECONF ("directory-layout-spread", conf->dir_spread_cnt,
+ options, uint32, out);
+
+ GF_OPTION_RECONF ("readdir-optimize", conf->readdir_optimize, options,
+ bool, out);
+ GF_OPTION_RECONF ("randomize-hash-range-by-gfid",
+ conf->randomize_by_gfid,
+ options, bool, out);
+
+ GF_OPTION_RECONF ("rebal-throttle", conf->dthrottle, options,
+ str, out);
+
+ GF_OPTION_RECONF ("lock-migration", conf->lock_migration_enabled,
+ options, bool, out);
+
+ if (conf->defrag) {
+ conf->defrag->lock_migration_enabled =
+ conf->lock_migration_enabled;
+
+ GF_DECIDE_DEFRAG_THROTTLE_COUNT (throttle_count, conf);
+ gf_msg ("DHT", GF_LOG_INFO, 0,
+ DHT_MSG_REBAL_THROTTLE_INFO,
+ "conf->dthrottle: %s, "
+ "conf->defrag->recon_thread_count: %d",
+ conf->dthrottle, conf->defrag->recon_thread_count);
+ }
+
+ if (conf->defrag) {
+ GF_OPTION_RECONF ("rebalance-stats", conf->defrag->stats,
+ options, bool, out);
+ }
+
+ if (dict_get_str (options, "decommissioned-bricks", &temp_str) == 0) {
+ ret = dht_parse_decommissioned_bricks (this, conf, temp_str);
+ if (ret == -1)
+ goto out;
+ } else {
+ ret = dht_decommissioned_remove (this, conf);
+ if (ret == -1)
+ goto out;
+ }
+
+ dht_init_regex (this, options, "rsync-hash-regex",
+ &conf->rsync_regex, &conf->rsync_regex_valid);
+ dht_init_regex (this, options, "extra-hash-regex",
+ &conf->extra_regex, &conf->extra_regex_valid);
+
+ GF_OPTION_RECONF ("weighted-rebalance", conf->do_weighting, options,
+ bool, out);
+
+ GF_OPTION_RECONF ("use-readdirp", conf->use_readdirp, options,
+ bool, out);
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+gf_defrag_pattern_list_fill (xlator_t *this, gf_defrag_info_t *defrag, char *data)
+{
+ int ret = -1;
+ char *tmp_str = NULL;
+ char *tmp_str1 = NULL;
+ char *dup_str = NULL;
+ char *num = NULL;
+ char *pattern_str = NULL;
+ char *pattern = NULL;
+ gf_defrag_pattern_list_t *temp_list = NULL;
+ gf_defrag_pattern_list_t *pattern_list = NULL;
+
+ if (!this || !defrag || !data)
+ goto out;
+
+ /* Get the pattern for pattern list. "pattern:<optional-size>"
+ * eg: *avi, *pdf:10MB, *:1TB
+ */
+ pattern_str = strtok_r (data, ",", &tmp_str);
+ while (pattern_str) {
+ dup_str = gf_strdup (pattern_str);
+ pattern_list = GF_CALLOC (1, sizeof (gf_defrag_pattern_list_t),
+ 1);
+ if (!pattern_list) {
+ goto out;
+ }
+ pattern = strtok_r (dup_str, ":", &tmp_str1);
+ num = strtok_r (NULL, ":", &tmp_str1);
+ if (!pattern)
+ goto out;
+ if (!num) {
+ if (gf_string2bytesize_uint64(pattern, &pattern_list->size)
+ == 0) {
+ pattern = "*";
+ }
+ } else if (gf_string2bytesize_uint64 (num, &pattern_list->size) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_INVALID_OPTION,
+ "Invalid option. Defrag pattern:"
+ " Invalid number format \"%s\"", num);
+ goto out;
+ }
+ memcpy (pattern_list->path_pattern, pattern, strlen (dup_str));
+
+ if (!defrag->defrag_pattern)
+ temp_list = NULL;
+ else
+ temp_list = defrag->defrag_pattern;
+
+ pattern_list->next = temp_list;
+
+ defrag->defrag_pattern = pattern_list;
+ pattern_list = NULL;
+
+ GF_FREE (dup_str);
+ dup_str = NULL;
+
+ pattern_str = strtok_r (NULL, ",", &tmp_str);
+ }
+
+ ret = 0;
+out:
+ if (ret)
+ GF_FREE (pattern_list);
+ GF_FREE (dup_str);
+
+ return ret;
+}
+
+
+
+int
+dht_init_methods (xlator_t *this)
+{
+ int ret = -1;
+ dht_conf_t *conf = NULL;
+ dht_methods_t *methods = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", this, err);
+
+ conf = this->private;
+ methods = &(conf->methods);
+
+ methods->migration_get_dst_subvol = dht_migration_get_dst_subvol;
+ methods->migration_needed = dht_migration_needed;
+ methods->migration_other = NULL;
+ methods->layout_search = dht_layout_search;
+
+ ret = 0;
+err:
+ return ret;
+}
+
+int
+dht_init (xlator_t *this)
+{
+ dht_conf_t *conf = NULL;
+ char *temp_str = NULL;
+ int ret = -1;
+ int i = 0;
+ gf_defrag_info_t *defrag = NULL;
+ int cmd = 0;
+ char *node_uuid = NULL;
+ int throttle_count = 0;
+ uint32_t commit_hash = 0;
+
+ GF_VALIDATE_OR_GOTO ("dht", this, err);
+
+ if (!this->children) {
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ DHT_MSG_INVALID_CONFIGURATION,
+ "Distribute needs more than one subvolume");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_INVALID_CONFIGURATION,
+ "dangling volume. check volfile");
+ }
+
+ conf = GF_CALLOC (1, sizeof (*conf), gf_dht_mt_dht_conf_t);
+ if (!conf) {
+ goto err;
+ }
+
+ /* We get the commit-hash to set only for rebalance process */
+ if (dict_get_uint32 (this->options,
+ "commit-hash", &commit_hash) == 0) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_COMMIT_HASH_INFO, "%s using commit hash %u",
+ __func__, commit_hash);
+ conf->vol_commit_hash = commit_hash;
+ conf->vch_forced = _gf_true;
+ }
+
+ ret = dict_get_int32 (this->options, "rebalance-cmd", &cmd);
+
+ if (cmd) {
+ defrag = GF_CALLOC (1, sizeof (gf_defrag_info_t),
+ gf_defrag_info_mt);
+
+ GF_VALIDATE_OR_GOTO (this->name, defrag, err);
+
+ LOCK_INIT (&defrag->lock);
+
+ defrag->is_exiting = 0;
+
+ conf->defrag = defrag;
+
+ ret = dict_get_str (this->options, "node-uuid", &node_uuid);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_INVALID_CONFIGURATION,
+ "Invalid volume configuration: "
+ "node-uuid not specified");
+ goto err;
+ }
+
+ if (gf_uuid_parse (node_uuid, defrag->node_uuid)) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_INVALID_OPTION, "Invalid option:"
+ " Cannot parse glusterd node uuid");
+ goto err;
+ }
+
+ defrag->cmd = cmd;
+
+ defrag->stats = _gf_false;
+
+ defrag->queue = NULL;
+
+ defrag->crawl_done = 0;
+
+ defrag->global_error = 0;
+
+ defrag->q_entry_count = 0;
+
+ defrag->wakeup_crawler = 0;
+
+ synclock_init (&defrag->link_lock, SYNC_LOCK_DEFAULT);
+ pthread_mutex_init (&defrag->dfq_mutex, 0);
+ pthread_cond_init (&defrag->parallel_migration_cond, 0);
+ pthread_cond_init (&defrag->rebalance_crawler_alarm, 0);
+ pthread_cond_init (&defrag->df_wakeup_thread, 0);
+
+ defrag->global_error = 0;
+
+ }
+
+ conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_ON;
+ if (dict_get_str (this->options, "lookup-unhashed", &temp_str) == 0) {
+ /* If option is not "auto", other options _should_ be boolean */
+ if (strcasecmp (temp_str, "auto")) {
+ ret = gf_string2boolean (temp_str,
+ &conf->search_unhashed);
+ if (ret == -1)
+ goto err;
+ }
+ else
+ conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
+ }
+
+ GF_OPTION_INIT ("lookup-optimize", conf->lookup_optimize, bool, err);
+
+ GF_OPTION_INIT ("unhashed-sticky-bit", conf->unhashed_sticky_bit, bool,
+ err);
+
+ GF_OPTION_INIT ("use-readdirp", conf->use_readdirp, bool, err);
+
+ GF_OPTION_INIT ("min-free-disk", conf->min_free_disk, percent_or_size,
+ err);
+
+ GF_OPTION_INIT ("min-free-inodes", conf->min_free_inodes, percent,
+ err);
+
+ conf->dir_spread_cnt = conf->subvolume_cnt;
+ GF_OPTION_INIT ("directory-layout-spread", conf->dir_spread_cnt,
+ uint32, err);
+
+ GF_OPTION_INIT ("assert-no-child-down", conf->assert_no_child_down,
+ bool, err);
+
+ GF_OPTION_INIT ("readdir-optimize", conf->readdir_optimize, bool, err);
+
+
+ GF_OPTION_INIT ("lock-migration", conf->lock_migration_enabled,
+ bool, err);
+
+ if (defrag) {
+ defrag->lock_migration_enabled = conf->lock_migration_enabled;
+
+ GF_OPTION_INIT ("rebalance-stats", defrag->stats, bool, err);
+ if (dict_get_str (this->options, "rebalance-filter", &temp_str)
+ == 0) {
+ if (gf_defrag_pattern_list_fill (this, defrag, temp_str)
+ == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_INVALID_OPTION,
+ "Invalid option:"
+ " Cannot parse rebalance-filter (%s)",
+ temp_str);
+
+ goto err;
+ }
+ }
+ }
+
+ /* option can be any one of percent or bytes */
+ conf->disk_unit = 0;
+ if (conf->min_free_disk < 100)
+ conf->disk_unit = 'p';
+
+ ret = dht_init_subvolumes (this, conf);
+ if (ret == -1) {
+ goto err;
+ }
+
+ if (cmd) {
+ ret = dht_init_local_subvolumes (this, conf);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_INIT_LOCAL_SUBVOL_FAILED,
+ "dht_init_local_subvolumes failed");
+ goto err;
+ }
+ }
+
+ if (dict_get_str (this->options, "decommissioned-bricks", &temp_str) == 0) {
+ ret = dht_parse_decommissioned_bricks (this, conf, temp_str);
+ if (ret == -1)
+ goto err;
+ }
+
+ dht_init_regex (this, this->options, "rsync-hash-regex",
+ &conf->rsync_regex, &conf->rsync_regex_valid);
+ dht_init_regex (this, this->options, "extra-hash-regex",
+ &conf->extra_regex, &conf->extra_regex_valid);
+
+ ret = dht_layouts_init (this, conf);
+ if (ret == -1) {
+ goto err;
+ }
+
+ LOCK_INIT (&conf->subvolume_lock);
+ LOCK_INIT (&conf->layout_lock);
+
+ conf->gen = 1;
+
+ this->local_pool = mem_pool_new (dht_local_t, 512);
+ if (!this->local_pool) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ DHT_MSG_NO_MEMORY,
+ " DHT initialisation failed. "
+ "failed to create local_t's memory pool");
+ goto err;
+ }
+
+ GF_OPTION_INIT ("randomize-hash-range-by-gfid",
+ conf->randomize_by_gfid, bool, err);
+
+ if (defrag) {
+ GF_OPTION_INIT ("rebal-throttle",
+ conf->dthrottle, str, err);
+
+ GF_DECIDE_DEFRAG_THROTTLE_COUNT(throttle_count, conf);
+
+ gf_msg_debug ("DHT", 0, "conf->dthrottle: %s, "
+ "conf->defrag->recon_thread_count: %d",
+ conf->dthrottle,
+ conf->defrag->recon_thread_count);
+ }
+
+ GF_OPTION_INIT ("xattr-name", conf->xattr_name, str, err);
+ gf_asprintf (&conf->link_xattr_name, "%s."DHT_LINKFILE_STR,
+ conf->xattr_name);
+ gf_asprintf (&conf->commithash_xattr_name, "%s."DHT_COMMITHASH_STR,
+ conf->xattr_name);
+ gf_asprintf (&conf->wild_xattr_name, "%s*", conf->xattr_name);
+ if (!conf->link_xattr_name || !conf->wild_xattr_name) {
+ goto err;
+ }
+
+ GF_OPTION_INIT ("weighted-rebalance", conf->do_weighting, bool, err);
+
+ conf->lock_pool = mem_pool_new (dht_lock_t, 512);
+ if (!conf->lock_pool) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_INIT_FAILED,
+ "failed to create lock mem_pool, failing "
+ "initialization");
+ goto err;
+ }
+
+ this->private = conf;
+
+ if (dht_set_subvol_range(this))
+ goto err;
+
+ if (dht_init_methods (this))
+ goto err;
+
+ return 0;
+
+err:
+ if (conf) {
+ if (conf->file_layouts) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ GF_FREE (conf->file_layouts[i]);
+ }
+ GF_FREE (conf->file_layouts);
+ }
+
+ GF_FREE (conf->subvolumes);
+
+ GF_FREE (conf->subvolume_status);
+
+ GF_FREE (conf->du_stats);
+
+ GF_FREE (conf->defrag);
+
+ GF_FREE (conf->xattr_name);
+ GF_FREE (conf->link_xattr_name);
+ GF_FREE (conf->wild_xattr_name);
+
+ if (conf->lock_pool)
+ mem_pool_destroy (conf->lock_pool);
+
+ GF_FREE (conf);
+ }
+
+ return -1;
+}
+
+
+struct volume_options options[] = {
+ { .key = {"lookup-unhashed"},
+ .value = {"auto", "yes", "no", "enable", "disable", "1", "0",
+ "on", "off"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "on",
+ .description = "This option if set to ON, does a lookup through "
+ "all the sub-volumes, in case a lookup didn't return any result "
+ "from the hash subvolume. If set to OFF, it does not do a lookup "
+ "on the remaining subvolumes."
+ },
+ { .key = {"lookup-optimize"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option if set to ON enables the optimization "
+ "of -ve lookups, by not doing a lookup on non-hashed subvolumes for "
+ "files, in case the hashed subvolume does not return any result. "
+ "This option disregards the lookup-unhashed setting, when enabled."
+ },
+ { .key = {"min-free-disk"},
+ .type = GF_OPTION_TYPE_PERCENT_OR_SIZET,
+ .default_value = "10%",
+ .description = "Percentage/Size of disk space, after which the "
+ "process starts balancing out the cluster, and logs will appear "
+ "in log files",
+ },
+ { .key = {"min-free-inodes"},
+ .type = GF_OPTION_TYPE_PERCENT,
+ .default_value = "5%",
+ .description = "after system has only N% of inodes, warnings "
+ "starts to appear in log files",
+ },
+ { .key = {"unhashed-sticky-bit"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ },
+ { .key = {"use-readdirp"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "This option if set to ON, forces the use of "
+ "readdirp, and hence also displays the stats of the files."
+ },
+ { .key = {"assert-no-child-down"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option if set to ON, in the event of "
+ "CHILD_DOWN, will call exit."
+ },
+ { .key = {"directory-layout-spread"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .validate = GF_OPT_VALIDATE_MIN,
+ .description = "Specifies the directory layout spread. Takes number "
+ "of subvolumes as default value."
+ },
+ { .key = {"decommissioned-bricks"},
+ .type = GF_OPTION_TYPE_ANY,
+ .description = "This option if set to ON, decommissions "
+ "the brick, so that no new data is allowed to be created "
+ "on that brick."
+ },
+ { .key = {"rebalance-cmd"},
+ .type = GF_OPTION_TYPE_INT,
+ },
+ { .key = {"commit-hash"},
+ .type = GF_OPTION_TYPE_INT,
+ },
+ { .key = {"node-uuid"},
+ .type = GF_OPTION_TYPE_STR,
+ },
+ { .key = {"rebalance-stats"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option if set to ON displays and logs the "
+ " time taken for migration of each file, during the rebalance "
+ "process. If set to OFF, the rebalance logs will only display the "
+ "time spent in each directory."
+ },
+ { .key = {"readdir-optimize"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option if set to ON enables the optimization "
+ "that allows DHT to requests non-first subvolumes to filter out "
+ "directory entries."
+ },
+ { .key = {"rsync-hash-regex"},
+ .type = GF_OPTION_TYPE_STR,
+ /* Setting a default here doesn't work. See dht_init_regex. */
+ .description = "Regular expression for stripping temporary-file "
+ "suffix and prefix used by rsync, to prevent relocation when the "
+ "file is renamed."
+ },
+ { .key = {"extra-hash-regex"},
+ .type = GF_OPTION_TYPE_STR,
+ /* Setting a default here doesn't work. See dht_init_regex. */
+ .description = "Regular expression for stripping temporary-file "
+ "suffix and prefix used by an application, to prevent relocation when "
+ "the file is renamed."
+ },
+ { .key = {"rebalance-filter"},
+ .type = GF_OPTION_TYPE_STR,
+ },
+
+ { .key = {"xattr-name"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "trusted.glusterfs.dht",
+ .description = "Base for extended attributes used by this "
+ "translator instance, to avoid conflicts with others above or "
+ "below it."
+ },
+
+ { .key = {"weighted-rebalance"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "When enabled, files will be allocated to bricks "
+ "with a probability proportional to their size. Otherwise, all "
+ "bricks will have the same probability (legacy behavior)."
+ },
+
+ /* NUFA option */
+ { .key = {"local-volume-name"},
+ .type = GF_OPTION_TYPE_XLATOR
+ },
+
+ /* tier options */
+ { .key = {"tier-pause"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ },
+
+ { .key = {"tier-promote-frequency"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "120",
+ .description = "Frequency to promote files to fast tier"
+ },
+
+ { .key = {"tier-demote-frequency"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "3600",
+ .description = "Frequency to demote files to slow tier"
+ },
+
+ { .key = {"write-freq-threshold"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "0",
+ },
+
+ { .key = {"read-freq-threshold"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "0",
+ },
+ { .key = {"watermark-hi"},
+ .type = GF_OPTION_TYPE_PERCENT,
+ .default_value = "90",
+ },
+ { .key = {"watermark-low"},
+ .type = GF_OPTION_TYPE_PERCENT,
+ .default_value = "75",
+ },
+ { .key = {"tier-mode"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "test",
+ },
+ { .key = {"tier-max-mb"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "4000",
+ },
+ { .key = {"tier-max-promote-file-size"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "0",
+ },
+ { .key = {"tier-max-files"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "10000",
+ },
+ /* switch option */
+ { .key = {"pattern.switch.case"},
+ .type = GF_OPTION_TYPE_ANY
+ },
+
+ { .key = {"randomize-hash-range-by-gfid"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Use gfid of directory to determine the subvolume "
+ "from which hash ranges are allocated starting with 0. "
+ "Note that we still use a directory/file's name to determine the "
+ "subvolume to which it hashes"
+ },
+
+ { .key = {"rebal-throttle"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "normal",
+ .description = " Sets the maximum number of parallel file migrations "
+ "allowed on a node during the rebalance operation. The"
+ " default value is normal and allows a max of "
+ "[($(processing units) - 4) / 2), 2] files to be "
+ "migrated at a time. Lazy will allow only one file to "
+ "be migrated at a time and aggressive will allow "
+ "max of [($(processing units) - 4) / 2), 4]"
+ },
+
+ { .key = {"lock-migration"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = " If enabled this feature will migrate the posix locks"
+ " associated with a file during rebalance"
+ },
+
+ { .key = {NULL} },
+};
diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c
index 0fc5d45a317..afdfd5c80ea 100644
--- a/xlators/cluster/dht/src/dht.c
+++ b/xlators/cluster/dht/src/dht.c
@@ -1,416 +1,74 @@
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-/* TODO: add NS locking */
-
#include "statedump.h"
-#include "dht-common.c"
-
-/* TODO:
- - use volumename in xattr instead of "dht"
- - use NS locks
- - handle all cases in self heal layout reconstruction
- - complete linkfile selfheal
-*/
-
-
-void
-dht_layout_dump (dht_layout_t *layout, const char *prefix)
-{
-
- char key[GF_DUMP_MAX_BUF_LEN];
- int i = 0;
-
- if (!layout)
- return;
-
- gf_proc_dump_build_key(key, prefix, "cnt");
- gf_proc_dump_write(key, "%d", layout->cnt);
- gf_proc_dump_build_key(key, prefix, "preset");
- gf_proc_dump_write(key, "%d", layout->preset);
- gf_proc_dump_build_key(key, prefix, "gen");
- gf_proc_dump_write(key, "%d", layout->gen);
- gf_proc_dump_build_key(key, prefix, "type");
- gf_proc_dump_write(key, "%d", layout->type);
-
- for (i = 0; i < layout->cnt; i++) {
- gf_proc_dump_build_key(key, prefix,"list[%d].err", i);
- gf_proc_dump_write(key, "%d", layout->list[i].err);
- gf_proc_dump_build_key(key, prefix,"list[%d].start", i);
- gf_proc_dump_write(key, "%u", layout->list[i].start);
- gf_proc_dump_build_key(key, prefix,"list[%d].stop", i);
- gf_proc_dump_write(key, "%u", layout->list[i].stop);
- if (layout->list[i].xlator) {
- gf_proc_dump_build_key(key, prefix,
- "list[%d].xlator.type", i);
- gf_proc_dump_write(key, "%s",
- layout->list[i].xlator->type);
- gf_proc_dump_build_key(key, prefix,
- "list[%d].xlator.name", i);
- gf_proc_dump_write(key, "%s",
- layout->list[i].xlator->name);
- }
- }
-}
-
-
-int32_t
-dht_priv_dump (xlator_t *this)
-{
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
- char key[GF_DUMP_MAX_BUF_LEN];
- int i = 0;
- dht_conf_t *conf = NULL;
- int ret = 0;
-
- if (!this)
- return -1;
-
- conf = this->private;
-
- if (!conf)
- return -1;
-
- ret = TRY_LOCK(&conf->subvolume_lock);
-
- if (ret != 0) {
- gf_log("", GF_LOG_WARNING, "Unable to lock dht subvolume %s",
- this->name);
- return ret;
- }
-
- gf_proc_dump_add_section("xlator.cluster.dht.%s.priv", this->name);
- gf_proc_dump_build_key(key_prefix,"xlator.cluster.dht","%s.priv",
- this->name);
- gf_proc_dump_build_key(key, key_prefix, "subvolume_cnt");
- gf_proc_dump_write(key,"%d", conf->subvolume_cnt);
- for (i = 0; i < conf->subvolume_cnt; i++) {
- gf_proc_dump_build_key(key, key_prefix, "subvolumes[%d]", i);
- gf_proc_dump_write(key, "%s.%s", conf->subvolumes[i]->type,
- conf->subvolumes[i]->name);
- if (conf->file_layouts && conf->file_layouts[i]){
- gf_proc_dump_build_key(key, key_prefix,
- "file_layouts[%d]",i);
- dht_layout_dump(conf->file_layouts[i], key);
- }
- if (conf->dir_layouts && conf->dir_layouts[i]) {
- gf_proc_dump_build_key(key, key_prefix,
- "dir_layouts[%d]",i);
- dht_layout_dump(conf->dir_layouts[i], key);
- }
- if (conf->subvolume_status) {
- gf_proc_dump_build_key(key, key_prefix,
- "subvolume_status[%d]", i);
- gf_proc_dump_write(key, "%d",
- (int)conf->subvolume_status[i]);
- }
-
- }
-
- gf_proc_dump_build_key(key, key_prefix,"default_dir_layout");
- dht_layout_dump(conf->default_dir_layout, key);
-
- gf_proc_dump_build_key(key, key_prefix, "search_unhashed");
- gf_proc_dump_write(key, "%d", conf->search_unhashed);
- gf_proc_dump_build_key(key, key_prefix, "gen");
- gf_proc_dump_write(key, "%d", conf->gen);
- gf_proc_dump_build_key(key, key_prefix, "min_free_disk");
- gf_proc_dump_write(key, "%lu", conf->min_free_disk);
- gf_proc_dump_build_key(key, key_prefix, "disk_unit");
- gf_proc_dump_write(key, "%c", conf->disk_unit);
- gf_proc_dump_build_key(key, key_prefix, "refresh_interval");
- gf_proc_dump_write(key, "%d", conf->refresh_interval);
- gf_proc_dump_build_key(key, key_prefix, "unhashed_sticky_bit");
- gf_proc_dump_write(key, "%d", conf->unhashed_sticky_bit);
- if (conf ->du_stats) {
- gf_proc_dump_build_key(key, key_prefix,
- "du_stats.avail_percent");
- gf_proc_dump_write(key, "%lf", conf->du_stats->avail_percent);
- gf_proc_dump_build_key(key, key_prefix,
- "du_stats.avail_space");
- gf_proc_dump_write(key, "%lu", conf->du_stats->avail_space);
- gf_proc_dump_build_key(key, key_prefix,
- "du_stats.log");
- gf_proc_dump_write(key, "%lu", conf->du_stats->log);
- }
- gf_proc_dump_build_key(key, key_prefix, "last_stat_fetch");
- gf_proc_dump_write(key, "%s", ctime(&conf->last_stat_fetch.tv_sec));
-
- UNLOCK(&conf->subvolume_lock);
-
- return 0;
-}
-
-int32_t
-dht_inodectx_dump (xlator_t *this, inode_t *inode)
-{
- int ret = -1;
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
- dht_layout_t *layout = NULL;
- uint64_t tmp_layout = 0;
-
- if (!inode)
- return -1;
-
- ret = inode_ctx_get (inode, this, &tmp_layout);
-
- if (ret != 0)
- return ret;
-
- layout = (dht_layout_t *)(long)tmp_layout;
-
- if (!layout)
- return -1;
-
- gf_proc_dump_build_key(key_prefix, "xlator.cluster.dht",
- "%s.inode.%ld", this->name, inode->ino);
- dht_layout_dump(layout, key_prefix);
-
- return 0;
-}
-
-int
-notify (xlator_t *this, int event, void *data, ...)
-{
- int ret = -1;
-
- ret = dht_notify (this, event, data);
-
- return ret;
-}
-
-void
-fini (xlator_t *this)
-{
- int i = 0;
- dht_conf_t *conf = NULL;
-
- conf = this->private;
-
- if (conf) {
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- GF_FREE (conf->file_layouts[i]);
- }
- GF_FREE (conf->file_layouts);
- }
-
- if (conf->default_dir_layout)
- GF_FREE (conf->default_dir_layout);
-
- if (conf->subvolumes)
- GF_FREE (conf->subvolumes);
-
- if (conf->subvolume_status)
- GF_FREE (conf->subvolume_status);
-
- GF_FREE (conf);
- }
-
- return;
-}
-
-int32_t
-mem_acct_init (xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init (this, gf_dht_mt_end + 1);
-
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
- }
-
- return ret;
-}
-
-int
-init (xlator_t *this)
-{
- dht_conf_t *conf = NULL;
- char *temp_str = NULL;
- int ret = -1;
- int i = 0;
- uint32_t temp_free_disk = 0;
-
-
- if (!this->children) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "Distribute needs more than one subvolume");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile");
- }
-
- conf = GF_CALLOC (1, sizeof (*conf), gf_dht_mt_dht_conf_t);
- if (!conf) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
- if (dict_get_str (this->options, "lookup-unhashed", &temp_str) == 0) {
- /* If option is not "auto", other options _should_ be boolean */
- if (strcasecmp (temp_str, "auto"))
- gf_string2boolean (temp_str, &conf->search_unhashed);
- }
-
- conf->unhashed_sticky_bit = 0;
-
- if (dict_get_str (this->options, "unhashed-sticky-bit",
- &temp_str) == 0) {
- gf_string2boolean (temp_str, &conf->unhashed_sticky_bit);
- }
-
- conf->disk_unit = 'p';
- conf->min_free_disk = 10;
-
- if (dict_get_str (this->options, "min-free-disk", &temp_str) == 0) {
- if (gf_string2percent (temp_str, &temp_free_disk) == 0) {
- if (temp_free_disk > 100) {
- gf_string2bytesize (temp_str,
- &conf->min_free_disk);
- conf->disk_unit = 'b';
- } else {
- conf->min_free_disk = (uint64_t)temp_free_disk;
- }
- } else {
- gf_string2bytesize (temp_str, &conf->min_free_disk);
- conf->disk_unit = 'b';
- }
- }
-
-
- ret = dht_init_subvolumes (this, conf);
- if (ret == -1) {
- goto err;
- }
-
- ret = dht_layouts_init (this, conf);
- if (ret == -1) {
- goto err;
- }
-
- conf->du_stats = GF_CALLOC (conf->subvolume_cnt, sizeof (dht_du_t),
- gf_dht_mt_dht_du_t);
- if (!conf->du_stats) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- LOCK_INIT (&conf->subvolume_lock);
- LOCK_INIT (&conf->layout_lock);
-
- conf->gen = 1;
-
- this->private = conf;
-
- return 0;
-
-err:
- if (conf) {
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- GF_FREE (conf->file_layouts[i]);
- }
- GF_FREE (conf->file_layouts);
- }
-
- if (conf->default_dir_layout)
- GF_FREE (conf->default_dir_layout);
-
- if (conf->subvolumes)
- GF_FREE (conf->subvolumes);
-
- if (conf->subvolume_status)
- GF_FREE (conf->subvolume_status);
-
- if (conf->du_stats)
- GF_FREE (conf->du_stats);
-
- GF_FREE (conf);
- }
-
- return -1;
-}
+#include "dht-common.h"
+class_methods_t class_methods = {
+ .init = dht_init,
+ .fini = dht_fini,
+ .reconfigure = dht_reconfigure,
+ .notify = dht_notify
+};
struct xlator_fops fops = {
- .lookup = dht_lookup,
- .mknod = dht_mknod,
- .create = dht_create,
-
- .stat = dht_stat,
- .fstat = dht_fstat,
- .truncate = dht_truncate,
- .ftruncate = dht_ftruncate,
- .access = dht_access,
- .readlink = dht_readlink,
- .setxattr = dht_setxattr,
- .getxattr = dht_getxattr,
- .removexattr = dht_removexattr,
- .open = dht_open,
- .readv = dht_readv,
- .writev = dht_writev,
- .flush = dht_flush,
- .fsync = dht_fsync,
- .statfs = dht_statfs,
- .lk = dht_lk,
- .opendir = dht_opendir,
- .readdir = dht_readdir,
- .readdirp = dht_readdirp,
- .fsyncdir = dht_fsyncdir,
- .symlink = dht_symlink,
- .unlink = dht_unlink,
- .link = dht_link,
- .mkdir = dht_mkdir,
- .rmdir = dht_rmdir,
- .rename = dht_rename,
- .inodelk = dht_inodelk,
- .finodelk = dht_finodelk,
- .entrylk = dht_entrylk,
- .fentrylk = dht_fentrylk,
- .xattrop = dht_xattrop,
- .fxattrop = dht_fxattrop,
- .setattr = dht_setattr,
+ .lookup = dht_lookup,
+ .mknod = dht_mknod,
+ .create = dht_create,
+
+ .open = dht_open,
+ .statfs = dht_statfs,
+ .opendir = dht_opendir,
+ .readdir = dht_readdir,
+ .readdirp = dht_readdirp,
+ .fsyncdir = dht_fsyncdir,
+ .symlink = dht_symlink,
+ .unlink = dht_unlink,
+ .link = dht_link,
+ .mkdir = dht_mkdir,
+ .rmdir = dht_rmdir,
+ .rename = dht_rename,
+ .entrylk = dht_entrylk,
+ .fentrylk = dht_fentrylk,
+
+ /* Inode read operations */
+ .stat = dht_stat,
+ .fstat = dht_fstat,
+ .access = dht_access,
+ .readlink = dht_readlink,
+ .getxattr = dht_getxattr,
+ .fgetxattr = dht_fgetxattr,
+ .readv = dht_readv,
+ .flush = dht_flush,
+ .fsync = dht_fsync,
+ .inodelk = dht_inodelk,
+ .finodelk = dht_finodelk,
+ .lk = dht_lk,
+ .lease = dht_lease,
+
+ /* Inode write operations */
+ .fremovexattr = dht_fremovexattr,
+ .removexattr = dht_removexattr,
+ .setxattr = dht_setxattr,
+ .fsetxattr = dht_fsetxattr,
+ .truncate = dht_truncate,
+ .ftruncate = dht_ftruncate,
+ .writev = dht_writev,
+ .xattrop = dht_xattrop,
+ .fxattrop = dht_fxattrop,
+ .setattr = dht_setattr,
.fsetattr = dht_fsetattr,
-#if 0
- .setdents = dht_setdents,
- .getdents = dht_getdents,
- .checksum = dht_checksum,
-#endif
+ .fallocate = dht_fallocate,
+ .discard = dht_discard,
+ .zerofill = dht_zerofill,
};
struct xlator_dumpops dumpops = {
@@ -420,23 +78,7 @@ struct xlator_dumpops dumpops = {
struct xlator_cbks cbks = {
-// .release = dht_release,
+ .release = dht_release,
// .releasedir = dht_releasedir,
- .forget = dht_forget
-};
-
-
-struct volume_options options[] = {
- { .key = {"lookup-unhashed"},
- .value = {"auto", "yes", "no", "enable", "disable", "1", "0",
- "on", "off"},
- .type = GF_OPTION_TYPE_STR
- },
- { .key = {"min-free-disk"},
- .type = GF_OPTION_TYPE_PERCENT_OR_SIZET,
- },
- { .key = {"unhashed-sticky-bit"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {NULL} },
+ .forget = dht_forget
};
diff --git a/xlators/cluster/dht/src/dht.sym b/xlators/cluster/dht/src/dht.sym
new file mode 100644
index 00000000000..780b5fc0387
--- /dev/null
+++ b/xlators/cluster/dht/src/dht.sym
@@ -0,0 +1,8 @@
+fops
+cbks
+class_methods
+dht_methods
+options
+mem_acct_init
+reconfigure
+dumpops
diff --git a/xlators/cluster/dht/src/nufa.c b/xlators/cluster/dht/src/nufa.c
index 8d181fa507a..56e17d6e884 100644
--- a/xlators/cluster/dht/src/nufa.c
+++ b/xlators/cluster/dht/src/nufa.c
@@ -1,35 +1,23 @@
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-#include "dht-common.c"
+#include "dht-common.h"
/* TODO: all 'TODO's in dht.c holds good */
+extern struct volume_options options[];
+
int
nufa_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
+ int op_ret, int op_errno,
inode_t *inode, struct iatt *stbuf, dict_t *xattr,
struct iatt *postparent)
{
@@ -41,67 +29,61 @@ nufa_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
loc_t *loc = NULL;
int i = 0;
call_frame_t *prev = NULL;
- int call_cnt = 0;
+ int call_cnt = 0;
int ret = 0;
-
conf = this->private;
prev = cookie;
local = frame->local;
loc = &local->loc;
- if (ENTRY_MISSING (op_ret, op_errno)) {
- if (conf->search_unhashed) {
- local->op_errno = ENOENT;
- dht_lookup_everywhere (frame, this, loc);
- return 0;
- }
- }
+ if (ENTRY_MISSING (op_ret, op_errno)) {
+ if (conf->search_unhashed) {
+ local->op_errno = ENOENT;
+ dht_lookup_everywhere (frame, this, loc);
+ return 0;
+ }
+ }
if (op_ret == -1)
goto out;
- is_linkfile = check_is_linkfile (inode, stbuf, xattr);
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr,
+ conf->link_xattr_name);
is_dir = check_is_dir (inode, stbuf, xattr);
if (!is_dir && !is_linkfile) {
/* non-directory and not a linkfile */
-
- dht_itransform (this, prev->this, stbuf->ia_ino,
- &stbuf->ia_ino);
-
- ret = dht_layout_preset (this, prev->this, inode);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not set pre-set layout for subvol %s",
- prev->this->name);
- op_ret = -1;
- op_errno = EINVAL;
- goto err;
- }
+ ret = dht_layout_preset (this, prev->this, inode);
+ if (ret < 0) {
+ gf_msg_debug (this->name, 0,
+ "could not set pre-set layout for subvol"
+ " %s", prev->this->name);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto err;
+ }
goto out;
}
if (is_dir) {
call_cnt = conf->subvolume_cnt;
- local->call_cnt = call_cnt;
+ local->call_cnt = call_cnt;
local->inode = inode_ref (inode);
local->xattr = dict_ref (xattr);
- local->op_ret = 0;
- local->op_errno = 0;
+ local->op_ret = 0;
+ local->op_errno = 0;
- local->layout = dht_layout_new (this, conf->subvolume_cnt);
- if (!local->layout) {
- op_ret = -1;
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_DEBUG,
- "memory allocation failed :(");
- goto err;
- }
+ local->layout = dht_layout_new (this, conf->subvolume_cnt);
+ if (!local->layout) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto err;
+ }
for (i = 0; i < call_cnt; i++) {
STACK_WIND (frame, dht_lookup_dir_cbk,
@@ -115,56 +97,55 @@ nufa_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
subvol = dht_linkfile_subvol (this, inode, stbuf, xattr);
if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "linkfile not having link subvolume. path=%s",
- loc->path);
- dht_lookup_everywhere (frame, this, loc);
- return 0;
+ gf_msg_debug (this->name, 0,
+ "linkfile has no link subvolume. path=%s",
+ loc->path);
+ dht_lookup_everywhere (frame, this, loc);
+ return 0;
}
- STACK_WIND (frame, dht_lookup_linkfile_cbk,
- subvol, subvol->fops->lookup,
- &local->loc, local->xattr_req);
+ STACK_WIND (frame, dht_lookup_linkfile_cbk,
+ subvol, subvol->fops->lookup,
+ &local->loc, local->xattr_req);
}
return 0;
out:
- if (!local->hashed_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- local->loc.path);
+ if (!local->hashed_subvol) {
+ gf_msg_debug (this->name, 0,
+ "no subvolume in layout for path=%s",
+ local->loc.path);
local->op_errno = ENOENT;
dht_lookup_everywhere (frame, this, loc);
return 0;
- }
+ }
- STACK_WIND (frame, dht_lookup_cbk,
- local->hashed_subvol, local->hashed_subvol->fops->lookup,
- &local->loc, local->xattr_req);
+ STACK_WIND (frame, dht_lookup_cbk,
+ local->hashed_subvol, local->hashed_subvol->fops->lookup,
+ &local->loc, local->xattr_req);
- return 0;
+ return 0;
- err:
+err:
DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno,
- inode, stbuf, xattr, NULL);
+ inode, stbuf, xattr, postparent);
return 0;
}
int
nufa_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr_req)
+ loc_t *loc, dict_t *xattr_req)
{
xlator_t *hashed_subvol = NULL;
- xlator_t *cached_subvol = NULL;
xlator_t *subvol = NULL;
dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
+ dht_conf_t *conf = NULL;
int ret = -1;
int op_errno = -1;
- dht_layout_t *layout = NULL;
- int i = 0;
- int call_cnt = 0;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int call_cnt = 0;
VALIDATE_OR_GOTO (frame, err);
@@ -173,578 +154,532 @@ nufa_lookup (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (loc->inode, err);
VALIDATE_OR_GOTO (loc->path, err);
- conf = this->private;
-
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- ret = loc_dup (loc, &local->loc);
- if (ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "copying location failed for path=%s",
- loc->path);
+ conf = this->private;
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_LOOKUP);
+ if (!local) {
+ op_errno = ENOMEM;
goto err;
}
- if (xattr_req) {
- local->xattr_req = dict_ref (xattr_req);
- } else {
- local->xattr_req = dict_new ();
- }
+ if (xattr_req) {
+ local->xattr_req = dict_ref (xattr_req);
+ } else {
+ local->xattr_req = dict_new ();
+ }
- hashed_subvol = dht_subvol_get_hashed (this, &local->loc);
- cached_subvol = dht_subvol_get_cached (this, local->loc.inode);
+ hashed_subvol = dht_subvol_get_hashed (this, &local->loc);
- local->cached_subvol = cached_subvol;
- local->hashed_subvol = hashed_subvol;
+ local->hashed_subvol = hashed_subvol;
if (is_revalidate (loc)) {
- local->layout = layout = dht_layout_get (this, loc->inode);
-
+ layout = local->layout;
if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "revalidate without cache. path=%s",
- loc->path);
+ gf_msg_debug (this->name, 0,
+ "revalidate lookup without cache. "
+ "path=%s", loc->path);
op_errno = EINVAL;
goto err;
}
- if (layout->gen && (layout->gen < conf->gen)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "incomplete layout failure for path=%s",
- loc->path);
+ if (layout->gen && (layout->gen < conf->gen)) {
+ gf_msg_debug (this->name, 0,
+ "incomplete layout failure for path=%s",
+ loc->path);
dht_layout_unref (this, local->layout);
- goto do_fresh_lookup;
- }
-
- local->inode = inode_ref (loc->inode);
- local->ia_ino = loc->inode->ino;
-
- local->call_cnt = layout->cnt;
- call_cnt = local->call_cnt;
+ goto do_fresh_lookup;
+ }
- /* NOTE: we don't require 'trusted.glusterfs.dht.linkto' attribute,
- * revalidates directly go to the cached-subvolume.
- */
- ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
+ local->inode = inode_ref (loc->inode);
+
+ local->call_cnt = layout->cnt;
+ call_cnt = local->call_cnt;
+
+ /* NOTE: we don't require 'trusted.glusterfs.dht.linkto' attribute,
+ * revalidates directly go to the cached-subvolume.
+ */
+ ret = dict_set_uint32 (local->xattr_req,
+ conf->xattr_name, 4 * 4);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dict value.");
+ op_errno = -1;
+ goto err;
+ }
- for (i = 0; i < layout->cnt; i++) {
- subvol = layout->list[i].xlator;
+ for (i = 0; i < layout->cnt; i++) {
+ subvol = layout->list[i].xlator;
- STACK_WIND (frame, dht_revalidate_cbk,
- subvol, subvol->fops->lookup,
- loc, local->xattr_req);
+ STACK_WIND (frame, dht_revalidate_cbk,
+ subvol, subvol->fops->lookup,
+ loc, local->xattr_req);
- if (!--call_cnt)
- break;
- }
- } else {
+ if (!--call_cnt)
+ break;
+ }
+ } else {
do_fresh_lookup:
- ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
+ ret = dict_set_uint32 (local->xattr_req,
+ conf->xattr_name, 4 * 4);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dict value.");
+ op_errno = -1;
+ goto err;
+ }
- ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht.linkto", 256);
+ ret = dict_set_uint32 (local->xattr_req,
+ conf->link_xattr_name, 256);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dict value.");
+ op_errno = -1;
+ goto err;
+ }
- /* Send it to only local volume */
- STACK_WIND (frame, nufa_local_lookup_cbk,
- (xlator_t *)conf->private,
- ((xlator_t *)conf->private)->fops->lookup,
- loc, local->xattr_req);
- }
+ /* Send it to only local volume */
+ STACK_WIND (frame, nufa_local_lookup_cbk,
+ (xlator_t *)conf->private,
+ ((xlator_t *)conf->private)->fops->lookup,
+ loc, local->xattr_req);
+ }
return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
- return 0;
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL);
+ return 0;
}
int
nufa_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
+ xlator_t *this, int op_ret, int op_errno,
inode_t *inode, struct iatt *stbuf,
struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
- local = frame->local;
- prev = cookie;
- conf = this->private;
+ local = frame->local;
- if (op_ret == -1)
- goto err;
+ if (op_ret == -1)
+ goto err;
- STACK_WIND (frame, dht_create_cbk,
- local->cached_subvol, local->cached_subvol->fops->create,
- &local->loc, local->flags, local->mode, local->fd);
+ STACK_WIND (frame, dht_create_cbk,
+ local->cached_subvol, local->cached_subvol->fops->create,
+ &local->loc, local->flags, local->mode, local->umask,
+ local->fd, local->params);
- return 0;
+ return 0;
- err:
- DHT_STACK_UNWIND (create, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
- return 0;
+err:
+ DHT_STACK_UNWIND (create, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL, NULL);
+ return 0;
}
int
nufa_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode, fd_t *fd)
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *params)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *subvol = NULL;
xlator_t *avail_subvol = NULL;
- int op_errno = -1;
- int ret = -1;
+ int op_errno = -1;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
- conf = this->private;
+ conf = this->private;
dht_get_du_info (frame, this, loc);
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- subvol = dht_subvol_get_hashed (this, loc);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = ENOENT;
- goto err;
- }
+ local = dht_local_init (frame, loc, fd, GF_FOP_CREATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = dht_subvol_get_hashed (this, loc);
+ if (!subvol) {
+ gf_msg_debug (this->name, 0,
+ "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
avail_subvol = conf->private;
if (dht_is_subvol_filled (this, (xlator_t *)conf->private)) {
avail_subvol =
dht_free_disk_available_subvol (this,
- (xlator_t *)conf->private);
+ (xlator_t *)conf->private,
+ local);
}
if (subvol != avail_subvol) {
/* create a link file instead of actual file */
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- local->fd = fd_ref (fd);
+ local->params = dict_ref (params);
local->mode = mode;
local->flags = flags;
-
+ local->umask = umask;
local->cached_subvol = avail_subvol;
- dht_linkfile_create (frame,
- nufa_create_linkfile_create_cbk,
- avail_subvol, subvol, loc);
+ dht_linkfile_create (frame, nufa_create_linkfile_create_cbk,
+ this, avail_subvol, subvol, loc);
return 0;
}
- gf_log (this->name, GF_LOG_TRACE,
- "creating %s on %s", loc->path, subvol->name);
+ gf_msg_trace (this->name, 0,
+ "creating %s on %s", loc->path, subvol->name);
STACK_WIND (frame, dht_create_cbk,
subvol, subvol->fops->create,
- loc, flags, mode, fd);
+ loc, flags, mode, umask, fd, params);
return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (create, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (create, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
}
int
nufa_mknod_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- dht_conf_t *conf = NULL;
-
- local = frame->local;
- prev = cookie;
- conf = this->private;
+ dht_local_t *local = NULL;
- if (op_ret >= 0) {
- STACK_WIND (frame, dht_newfile_cbk,
- local->cached_subvol,
- local->cached_subvol->fops->mknod,
- &local->loc, local->mode, local->rdev);
+ local = frame->local;
+ if (!local || !local->cached_subvol) {
+ op_errno = EINVAL;
+ op_ret = -1;
+ goto err;
+ }
- return 0;
- }
+ if (op_ret >= 0) {
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk,
+ (void *)local->cached_subvol, local->cached_subvol,
+ local->cached_subvol->fops->mknod,
+ &local->loc, local->mode, local->rdev,
+ local->umask, local->params);
+ return 0;
+ }
+err:
WIPE (postparent);
WIPE (preparent);
- DHT_STACK_UNWIND (link, frame, op_ret, op_errno,
- inode, stbuf, preparent, postparent);
- return 0;
+ DHT_STACK_UNWIND (link, frame, op_ret, op_errno,
+ inode, stbuf, preparent, postparent, xdata);
+ return 0;
}
int
nufa_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t rdev)
+ loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *params)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *subvol = NULL;
xlator_t *avail_subvol = NULL;
- int op_errno = -1;
- int ret = -1;
+ int op_errno = -1;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
- conf = this->private;
+ conf = this->private;
dht_get_du_info (frame, this, loc);
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- subvol = dht_subvol_get_hashed (this, loc);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = ENOENT;
- goto err;
- }
+ local = dht_local_init (frame, loc, NULL, GF_FOP_MKNOD);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = dht_subvol_get_hashed (this, loc);
+ if (!subvol) {
+ gf_msg_debug (this->name, 0,
+ "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
/* Consider the disksize in consideration */
avail_subvol = conf->private;
if (dht_is_subvol_filled (this, (xlator_t *)conf->private)) {
avail_subvol =
dht_free_disk_available_subvol (this,
- (xlator_t *)conf->private);
+ (xlator_t *)conf->private,
+ local);
}
- if (avail_subvol != subvol) {
- /* Create linkfile first */
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- local->mode = mode;
- local->rdev = rdev;
- local->cached_subvol = avail_subvol;
-
- dht_linkfile_create (frame, nufa_mknod_linkfile_cbk,
+ if (avail_subvol != subvol) {
+ /* Create linkfile first */
+
+ local->params = dict_ref (params);
+ local->mode = mode;
+ local->umask = umask;
+ local->rdev = rdev;
+ local->cached_subvol = avail_subvol;
+
+ dht_linkfile_create (frame, nufa_mknod_linkfile_cbk, this,
avail_subvol, subvol, loc);
- return 0;
- }
+ return 0;
+ }
- gf_log (this->name, GF_LOG_TRACE,
- "creating %s on %s", loc->path, subvol->name);
+ gf_msg_trace (this->name, 0,
+ "creating %s on %s", loc->path, subvol->name);
- STACK_WIND (frame, dht_newfile_cbk,
- subvol, subvol->fops->mknod,
- loc, mode, rdev);
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)subvol, subvol,
+ subvol->fops->mknod, loc, mode, rdev, umask,
+ params);
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (mknod, frame, -1, op_errno,
- NULL, NULL, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (mknod, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
}
-int
-notify (xlator_t *this, int event, void *data, ...)
+gf_boolean_t
+same_first_part (char *str1, char term1, char *str2, char term2)
{
- int ret = -1;
-
- ret = dht_notify (this, event, data);
-
- return ret;
+ gf_boolean_t ended1;
+ gf_boolean_t ended2;
+
+ for (;;) {
+ ended1 = ((*str1 == '\0') || (*str1 == term1));
+ ended2 = ((*str2 == '\0') || (*str2 == term2));
+ if (ended1 && ended2) {
+ return _gf_true;
+ }
+ if (ended1 || ended2 || (*str1 != *str2)) {
+ return _gf_false;
+ }
+ ++str1;
+ ++str2;
+ }
}
-void
-fini (xlator_t *this)
+typedef struct nufa_args {
+ xlator_t *this;
+ char *volname;
+ gf_boolean_t addr_match;
+} nufa_args_t;
+
+static void
+nufa_find_local_brick (xlator_t *xl, void *data)
{
- int i = 0;
- dht_conf_t *conf = NULL;
+ nufa_args_t *args = data;
+ xlator_t *this = args->this;
+ char *local_volname = args->volname;
+ gf_boolean_t addr_match = args->addr_match;
+ char *brick_host = NULL;
+ dht_conf_t *conf = this->private;
+ int ret = -1;
+
+ /*This means a local subvol was already found. We pick the first brick
+ * that is local*/
+ if (conf->private)
+ return;
+
+ if (strcmp (xl->name, local_volname) == 0) {
+ conf->private = xl;
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_SUBVOL_INFO,
+ "Using specified subvol %s",
+ local_volname);
+ return;
+ }
- conf = this->private;
+ if (!addr_match)
+ return;
+
+ ret = dict_get_str (xl->options, "remote-host", &brick_host);
+ if ((ret == 0) &&
+ (gf_is_same_address (local_volname, brick_host) ||
+ gf_is_local_addr (brick_host))) {
+ conf->private = xl;
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_SUBVOL_INFO, "Using the first local "
+ "subvol %s", xl->name);
+ return;
+ }
- if (conf) {
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- GF_FREE (conf->file_layouts[i]);
- }
- GF_FREE (conf->file_layouts);
- }
+}
- if (conf->default_dir_layout)
- GF_FREE (conf->default_dir_layout);
+static void
+nufa_to_dht (xlator_t *this)
+{
+ GF_ASSERT (this);
+ GF_ASSERT (this->fops);
- if (conf->subvolumes)
- GF_FREE (conf->subvolumes);
+ this->fops->lookup = dht_lookup;
+ this->fops->create = dht_create;
+ this->fops->mknod = dht_mknod;
+}
- if (conf->subvolume_status)
- GF_FREE (conf->subvolume_status);
+int
+nufa_find_local_subvol (xlator_t *this,
+ void (*fn) (xlator_t *each, void* data), void *data)
+{
+ int ret = -1;
+ dht_conf_t *conf = this->private;
+ xlator_list_t *trav = NULL;
+ xlator_t *parent = NULL;
+ xlator_t *candidate = NULL;
+
+ xlator_foreach_depth_first (this, fn, data);
+ if (!conf->private) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_BRICK_ERROR, "Couldn't find a local "
+ "brick");
+ return -1;
+ }
+
+ candidate = conf->private;
+ trav = candidate->parents;
+ while (trav) {
+
+ parent = trav->xlator;
+ if (strcmp (parent->type, "cluster/nufa") == 0) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_SUBVOL_INFO, "Found local subvol, "
+ "%s", candidate->name);
+ ret = 0;
+ conf->private = candidate;
+ break;
+ }
- GF_FREE (conf);
+ candidate = parent;
+ trav = parent->parents;
}
- return;
+ return ret;
}
int
-init (xlator_t *this)
+nufa_init (xlator_t *this)
{
- dht_conf_t *conf = NULL;
- xlator_list_t *trav = NULL;
- data_t *data = NULL;
- char *local_volname = NULL;
- char *temp_str = NULL;
+ data_t *data = NULL;
+ char *local_volname = NULL;
int ret = -1;
- int i = 0;
- char my_hostname[256];
- uint32_t temp_free_disk = 0;
-
- if (!this->children) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "NUFA needs more than one subvolume");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile");
- }
-
- conf = GF_CALLOC (1, sizeof (*conf),
- gf_dht_mt_dht_conf_t);
- if (!conf) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
+ char my_hostname[256];
+ gf_boolean_t addr_match = _gf_false;
+ nufa_args_t args = {0, };
+
+ ret = dht_init(this);
+ if (ret) {
+ return ret;
}
- conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
- if (dict_get_str (this->options, "lookup-unhashed", &temp_str) == 0) {
- /* If option is not "auto", other options _should_ be boolean */
- if (strcasecmp (temp_str, "auto"))
- gf_string2boolean (temp_str, &conf->search_unhashed);
- }
+ if ((data = dict_get (this->options, "local-volume-name"))) {
+ local_volname = data->data;
- ret = dht_init_subvolumes (this, conf);
- if (ret == -1) {
- goto err;
- }
+ } else {
+ addr_match = _gf_true;
+ local_volname = "localhost";
+ ret = gethostname (my_hostname, 256);
+ if (ret == 0)
+ local_volname = my_hostname;
- ret = dht_layouts_init (this, conf);
- if (ret == -1) {
- goto err;
- }
+ else
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ DHT_MSG_GET_HOSTNAME_FAILED,
+ "could not find hostname");
- LOCK_INIT (&conf->subvolume_lock);
- LOCK_INIT (&conf->layout_lock);
-
- conf->gen = 1;
-
- local_volname = "localhost";
- ret = gethostname (my_hostname, 256);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "could not find hostname (%s)",
- strerror (errno));
- }
-
- if (ret == 0)
- local_volname = my_hostname;
-
- data = dict_get (this->options, "local-volume-name");
- if (data) {
- local_volname = data->data;
- }
-
- trav = this->children;
- while (trav) {
- if (strcmp (trav->xlator->name, local_volname) == 0)
- break;
- trav = trav->next;
- }
-
- if (!trav) {
- gf_log (this->name, GF_LOG_ERROR,
- "Could not find subvolume named '%s'. "
- "Please define volume with the name as the hostname "
- "or override it with 'option local-volume-name'",
- local_volname);
- goto err;
- }
- /* The volume specified exists */
- conf->private = trav->xlator;
-
- conf->min_free_disk = 10;
- conf->disk_unit = 'p';
-
- if (dict_get_str (this->options, "min-free-disk",
- &temp_str) == 0) {
- if (gf_string2percent (temp_str,
- &temp_free_disk) == 0) {
- if (temp_free_disk > 100) {
- gf_string2bytesize (temp_str,
- &conf->min_free_disk);
- conf->disk_unit = 'b';
- } else {
- conf->min_free_disk = (uint64_t)temp_free_disk;
- conf->disk_unit = 'p';
- }
- } else {
- gf_string2bytesize (temp_str,
- &conf->min_free_disk);
- conf->disk_unit = 'b';
- }
}
- conf->du_stats = GF_CALLOC (conf->subvolume_cnt, sizeof (dht_du_t),
- gf_dht_mt_dht_du_t);
- if (!conf->du_stats) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
+ args.this = this;
+ args.volname = local_volname;
+ args.addr_match = addr_match;
+ ret = nufa_find_local_subvol (this, nufa_find_local_brick, &args);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_SUBVOL_INFO,
+ "Unable to find local subvolume, switching "
+ "to dht mode");
+ nufa_to_dht (this);
}
-
- this->private = conf;
-
return 0;
+}
-err:
- if (conf) {
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- GF_FREE (conf->file_layouts[i]);
- }
- GF_FREE (conf->file_layouts);
- }
-
- if (conf->default_dir_layout)
- GF_FREE (conf->default_dir_layout);
-
- if (conf->subvolumes)
- GF_FREE (conf->subvolumes);
-
- if (conf->subvolume_status)
- GF_FREE (conf->subvolume_status);
-
- if (conf->du_stats)
- GF_FREE (conf->du_stats);
-
- GF_FREE (conf);
- }
+dht_methods_t dht_methods = {
+ .migration_get_dst_subvol = dht_migration_get_dst_subvol,
+ .migration_needed = dht_migration_needed,
+ .layout_search = dht_layout_search,
+};
- return -1;
-}
+class_methods_t class_methods = {
+ .init = nufa_init,
+ .fini = dht_fini,
+ .reconfigure = dht_reconfigure,
+ .notify = dht_notify
+};
struct xlator_fops fops = {
- .lookup = nufa_lookup,
- .create = nufa_create,
- .mknod = nufa_mknod,
-
- .stat = dht_stat,
- .fstat = dht_fstat,
- .truncate = dht_truncate,
- .ftruncate = dht_ftruncate,
- .access = dht_access,
- .readlink = dht_readlink,
- .setxattr = dht_setxattr,
- .getxattr = dht_getxattr,
- .removexattr = dht_removexattr,
- .open = dht_open,
- .readv = dht_readv,
- .writev = dht_writev,
- .flush = dht_flush,
- .fsync = dht_fsync,
- .statfs = dht_statfs,
- .lk = dht_lk,
- .opendir = dht_opendir,
- .readdir = dht_readdir,
- .readdirp = dht_readdirp,
- .fsyncdir = dht_fsyncdir,
- .symlink = dht_symlink,
- .unlink = dht_unlink,
- .link = dht_link,
- .mkdir = dht_mkdir,
- .rmdir = dht_rmdir,
- .rename = dht_rename,
- .inodelk = dht_inodelk,
- .finodelk = dht_finodelk,
- .entrylk = dht_entrylk,
- .fentrylk = dht_fentrylk,
- .xattrop = dht_xattrop,
- .fxattrop = dht_fxattrop,
+ .lookup = nufa_lookup,
+ .create = nufa_create,
+ .mknod = nufa_mknod,
+
+ .stat = dht_stat,
+ .fstat = dht_fstat,
+ .truncate = dht_truncate,
+ .ftruncate = dht_ftruncate,
+ .access = dht_access,
+ .readlink = dht_readlink,
+ .setxattr = dht_setxattr,
+ .getxattr = dht_getxattr,
+ .removexattr = dht_removexattr,
+ .open = dht_open,
+ .readv = dht_readv,
+ .writev = dht_writev,
+ .flush = dht_flush,
+ .fsync = dht_fsync,
+ .statfs = dht_statfs,
+ .lk = dht_lk,
+ .opendir = dht_opendir,
+ .readdir = dht_readdir,
+ .readdirp = dht_readdirp,
+ .fsyncdir = dht_fsyncdir,
+ .symlink = dht_symlink,
+ .unlink = dht_unlink,
+ .link = dht_link,
+ .mkdir = dht_mkdir,
+ .rmdir = dht_rmdir,
+ .rename = dht_rename,
+ .inodelk = dht_inodelk,
+ .finodelk = dht_finodelk,
+ .entrylk = dht_entrylk,
+ .fentrylk = dht_fentrylk,
+ .xattrop = dht_xattrop,
+ .fxattrop = dht_fxattrop,
.setattr = dht_setattr,
-#if 0
- .setdents = dht_setdents,
- .getdents = dht_getdents,
- .checksum = dht_checksum,
-#endif
};
struct xlator_cbks cbks = {
- .forget = dht_forget
-};
-
-
-struct volume_options options[] = {
- { .key = {"lookup-unhashed"},
- .value = {"auto", "yes", "no", "enable", "disable", "1", "0",
- "on", "off"},
- .type = GF_OPTION_TYPE_STR
- },
- { .key = {"local-volume-name"},
- .type = GF_OPTION_TYPE_XLATOR
- },
- { .key = {"min-free-disk"},
- .type = GF_OPTION_TYPE_PERCENT_OR_SIZET,
- },
- { .key = {NULL} },
+ .forget = dht_forget
};
diff --git a/xlators/cluster/dht/src/nufa.sym b/xlators/cluster/dht/src/nufa.sym
new file mode 100644
index 00000000000..780b5fc0387
--- /dev/null
+++ b/xlators/cluster/dht/src/nufa.sym
@@ -0,0 +1,8 @@
+fops
+cbks
+class_methods
+dht_methods
+options
+mem_acct_init
+reconfigure
+dumpops
diff --git a/xlators/cluster/dht/src/switch.c b/xlators/cluster/dht/src/switch.c
index 22289b3a1bf..f1e9a399442 100644
--- a/xlators/cluster/dht/src/switch.c
+++ b/xlators/cluster/dht/src/switch.c
@@ -1,29 +1,15 @@
/*
- Copyright (c) 2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-#include "dht-common.c"
+#include "dht-common.h"
#include "dht-mem-types.h"
#include <sys/time.h>
@@ -31,21 +17,23 @@
#include <fnmatch.h>
#include <string.h>
+extern struct volume_options options[];
+
struct switch_sched_array {
- xlator_t *xl;
- int32_t eligible;
- int32_t considered;
+ xlator_t *xl;
+ int32_t eligible;
+ int32_t considered;
};
/* Select one of this struct based on the path's pattern match */
struct switch_struct {
- struct switch_struct *next;
+ struct switch_struct *next;
struct switch_sched_array *array;
- int32_t node_index; /* Index of the node in
+ int32_t node_index; /* Index of the node in
this pattern. */
- int32_t num_child; /* Total num of child nodes
+ int32_t num_child; /* Total num of child nodes
with this pattern. */
- char path_pattern[256];
+ char path_pattern[256];
};
/* TODO: all 'TODO's in dht.c holds good */
@@ -74,31 +62,39 @@ get_switch_matching_subvol (const char *path, dht_conf_t *conf,
xlator_t *hashed_subvol)
{
struct switch_struct *cond = NULL;
- struct switch_struct *trav = NULL;
- char *pathname = NULL;
- int idx = 0;
+ struct switch_struct *trav = NULL;
+ char *pathname = NULL;
+ int idx = 0;
+ xlator_t *subvol = NULL;
cond = conf->private;
+ subvol = hashed_subvol;
if (!cond)
- return hashed_subvol;
+ goto out;
- trav = cond;
pathname = gf_strdup (path);
- while (trav) {
- if (fnmatch (trav->path_pattern,
- pathname, FNM_NOESCAPE) == 0) {
+ if (!pathname)
+ goto out;
+
+ trav = cond;
+ while (trav) {
+ if (fnmatch (trav->path_pattern,
+ pathname, FNM_NOESCAPE) == 0) {
for (idx = 0; idx < trav->num_child; idx++) {
if (trav->array[idx].xl == hashed_subvol)
- return hashed_subvol;
+ goto out;
}
idx = trav->node_index++;
trav->node_index %= trav->num_child;
- return trav->array[idx].xl;
- }
- trav = trav->next;
- }
- GF_FREE (pathname);
- return hashed_subvol;
+ subvol = trav->array[idx].xl;
+ goto out;
+ }
+ trav = trav->next;
+ }
+out:
+ GF_FREE (pathname);
+
+ return subvol;
}
@@ -116,7 +112,7 @@ switch_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
loc_t *loc = NULL;
int i = 0;
call_frame_t *prev = NULL;
- int call_cnt = 0;
+ int call_cnt = 0;
int ret = 0;
conf = this->private;
@@ -125,57 +121,56 @@ switch_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
loc = &local->loc;
- if (ENTRY_MISSING (op_ret, op_errno)) {
- if (conf->search_unhashed) {
- local->op_errno = ENOENT;
- dht_lookup_everywhere (frame, this, loc);
- return 0;
- }
- }
+ if (ENTRY_MISSING (op_ret, op_errno)) {
+ if (conf->search_unhashed) {
+ local->op_errno = ENOENT;
+ dht_lookup_everywhere (frame, this, loc);
+ return 0;
+ }
+ }
if (op_ret == -1)
goto out;
- is_linkfile = check_is_linkfile (inode, stbuf, xattr);
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr,
+ conf->link_xattr_name);
is_dir = check_is_dir (inode, stbuf, xattr);
if (!is_dir && !is_linkfile) {
/* non-directory and not a linkfile */
- dht_itransform (this, prev->this, stbuf->ia_ino,
- &stbuf->ia_ino);
-
- ret = dht_layout_preset (this, prev->this, inode);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not set pre-set layout for subvol %s",
- prev->this->name);
- op_ret = -1;
- op_errno = EINVAL;
- goto err;
- }
+ ret = dht_layout_preset (this, prev->this, inode);
+ if (ret < 0) {
+ gf_msg_debug (this->name, 0,
+ "could not set pre-set layout "
+ "for subvol %s",
+ prev->this->name);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto err;
+ }
goto out;
}
if (is_dir) {
call_cnt = conf->subvolume_cnt;
- local->call_cnt = call_cnt;
+ local->call_cnt = call_cnt;
local->inode = inode_ref (inode);
local->xattr = dict_ref (xattr);
- local->op_ret = 0;
- local->op_errno = 0;
+ local->op_ret = 0;
+ local->op_errno = 0;
- local->layout = dht_layout_new (this, conf->subvolume_cnt);
- if (!local->layout) {
- op_ret = -1;
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_DEBUG,
- "memory allocation failed :(");
- goto err;
- }
+ local->layout = dht_layout_new (this, conf->subvolume_cnt);
+ if (!local->layout) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_msg_debug (this->name, 0,
+ "memory allocation failed :(");
+ goto err;
+ }
for (i = 0; i < call_cnt; i++) {
STACK_WIND (frame, dht_lookup_dir_cbk,
@@ -189,37 +184,37 @@ switch_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
subvol = dht_linkfile_subvol (this, inode, stbuf, xattr);
if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "linkfile not having link subvolume. path=%s",
- loc->path);
- dht_lookup_everywhere (frame, this, loc);
- return 0;
+ gf_msg_debug (this->name, 0,
+ "linkfile has no link subvolume.path=%s",
+ loc->path);
+ dht_lookup_everywhere (frame, this, loc);
+ return 0;
}
- STACK_WIND (frame, dht_lookup_linkfile_cbk,
- subvol, subvol->fops->lookup,
- &local->loc, local->xattr_req);
+ STACK_WIND (frame, dht_lookup_linkfile_cbk,
+ subvol, subvol->fops->lookup,
+ &local->loc, local->xattr_req);
}
return 0;
out:
- if (!local->hashed_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- local->loc.path);
+ if (!local->hashed_subvol) {
+ gf_msg_debug (this->name, 0,
+ "no subvolume in layout for path=%s",
+ local->loc.path);
local->op_errno = ENOENT;
dht_lookup_everywhere (frame, this, loc);
return 0;
- }
+ }
- STACK_WIND (frame, dht_lookup_cbk,
- local->hashed_subvol, local->hashed_subvol->fops->lookup,
- &local->loc, local->xattr_req);
+ STACK_WIND (frame, dht_lookup_cbk,
+ local->hashed_subvol, local->hashed_subvol->fops->lookup,
+ &local->loc, local->xattr_req);
- return 0;
+ return 0;
- err:
+err:
DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno,
inode, stbuf, xattr, NULL);
return 0;
@@ -227,18 +222,18 @@ out:
int
switch_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr_req)
+ loc_t *loc, dict_t *xattr_req)
{
xlator_t *hashed_subvol = NULL;
xlator_t *cached_subvol = NULL;
xlator_t *subvol = NULL;
dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
+ dht_conf_t *conf = NULL;
int ret = -1;
int op_errno = -1;
- dht_layout_t *layout = NULL;
- int i = 0;
- int call_cnt = 0;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int call_cnt = 0;
VALIDATE_OR_GOTO (frame, err);
@@ -247,113 +242,112 @@ switch_lookup (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (loc->inode, err);
VALIDATE_OR_GOTO (loc->path, err);
- conf = this->private;
-
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- ret = loc_dup (loc, &local->loc);
- if (ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "copying location failed for path=%s",
- loc->path);
+ conf = this->private;
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_LOOKUP);
+ if (!local) {
+ op_errno = ENOMEM;
goto err;
}
- if (xattr_req) {
- local->xattr_req = dict_ref (xattr_req);
- } else {
- local->xattr_req = dict_new ();
- }
+ if (xattr_req) {
+ local->xattr_req = dict_ref (xattr_req);
+ } else {
+ local->xattr_req = dict_new ();
+ }
- hashed_subvol = dht_subvol_get_hashed (this, &local->loc);
- cached_subvol = dht_subvol_get_cached (this, local->loc.inode);
+ hashed_subvol = dht_subvol_get_hashed (this, &local->loc);
+ cached_subvol = local->cached_subvol;
- local->cached_subvol = cached_subvol;
- local->hashed_subvol = hashed_subvol;
+ local->hashed_subvol = hashed_subvol;
if (is_revalidate (loc)) {
- local->layout = layout = dht_layout_get (this, loc->inode);
-
+ layout = local->layout;
if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "revalidate without cache. path=%s",
- loc->path);
+ gf_msg_debug(this->name, 0,
+ "revalidate lookup without cache. path=%s",
+ loc->path);
op_errno = EINVAL;
goto err;
}
- if (layout->gen && (layout->gen < conf->gen)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "incomplete layout failure for path=%s",
- loc->path);
+ if (layout->gen && (layout->gen < conf->gen)) {
+ gf_msg_debug (this->name, 0,
+ "incomplete layout failure for path=%s",
+ loc->path);
dht_layout_unref (this, local->layout);
- goto do_fresh_lookup;
- }
+ goto do_fresh_lookup;
+ }
- local->inode = inode_ref (loc->inode);
- local->ia_ino = loc->inode->ino;
+ local->inode = inode_ref (loc->inode);
- local->call_cnt = layout->cnt;
- call_cnt = local->call_cnt;
+ local->call_cnt = layout->cnt;
+ call_cnt = local->call_cnt;
- /* NOTE: we don't require 'trusted.glusterfs.dht.linkto'
+ /* NOTE: we don't require 'trusted.glusterfs.dht.linkto'
* attribute, revalidates directly go to the cached-subvolume.
- */
- ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
-
- for (i = 0; i < layout->cnt; i++) {
- subvol = layout->list[i].xlator;
-
- STACK_WIND (frame, dht_revalidate_cbk,
- subvol, subvol->fops->lookup,
- loc, local->xattr_req);
+ */
+ ret = dict_set_uint32 (local->xattr_req,
+ conf->xattr_name, 4 * 4);
+ if (ret < 0)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DICT_SET_FAILED,
+ "failed to set dict value for %s",
+ conf->xattr_name);
+
+ for (i = 0; i < layout->cnt; i++) {
+ subvol = layout->list[i].xlator;
+
+ STACK_WIND (frame, dht_revalidate_cbk,
+ subvol, subvol->fops->lookup,
+ loc, local->xattr_req);
- if (!--call_cnt)
- break;
- }
- } else {
+ if (!--call_cnt)
+ break;
+ }
+ } else {
do_fresh_lookup:
- ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
-
- ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht.linkto", 256);
+ ret = dict_set_uint32 (local->xattr_req,
+ conf->xattr_name, 4 * 4);
+ if (ret < 0)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DICT_SET_FAILED,
+ "failed to set dict value for %s",
+ conf->xattr_name);
+
+ ret = dict_set_uint32 (local->xattr_req,
+ conf->link_xattr_name, 256);
+ if (ret < 0)
+ gf_msg (this->name, GF_LOG_WARNING, EINVAL,
+ DHT_MSG_DICT_SET_FAILED,
+ "failed to set dict value for %s",
+ conf->link_xattr_name);
if (!hashed_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s, "
- "checking on all the subvols to see if "
- "it is a directory", loc->path);
- call_cnt = conf->subvolume_cnt;
- local->call_cnt = call_cnt;
-
- local->layout = dht_layout_new (this,
+ gf_msg_debug (this->name, 0,
+ "no subvolume in layout for path=%s, "
+ "checking on all the subvols to see if "
+ "it is a directory", loc->path);
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+
+ local->layout = dht_layout_new (this,
conf->subvolume_cnt);
- if (!local->layout) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_lookup_dir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->lookup,
- &local->loc, local->xattr_req);
- }
- return 0;
+ if (!local->layout) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_lookup_dir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup,
+ &local->loc, local->xattr_req);
+ }
+ return 0;
}
- /* */
+ /* */
cached_subvol = get_switch_matching_subvol (loc->path, conf,
hashed_subvol);
if (cached_subvol == hashed_subvol) {
@@ -372,281 +366,233 @@ switch_lookup (call_frame_t *frame, xlator_t *this,
return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
- return 0;
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (lookup, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL);
+ return 0;
}
int
switch_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent,
- struct iatt *postparent)
+ xlator_t *this, int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
- local = frame->local;
- prev = cookie;
- conf = this->private;
+ local = frame->local;
- if (op_ret == -1)
- goto err;
+ if (op_ret == -1)
+ goto err;
- STACK_WIND (frame, dht_create_cbk,
- local->cached_subvol, local->cached_subvol->fops->create,
- &local->loc, local->flags, local->mode, local->fd);
+ STACK_WIND (frame, dht_create_cbk,
+ local->cached_subvol, local->cached_subvol->fops->create,
+ &local->loc, local->flags, local->mode, local->umask,
+ local->fd, local->params);
- return 0;
+ return 0;
- err:
- DHT_STACK_UNWIND (create, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
- return 0;
+err:
+ DHT_STACK_UNWIND (create, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL, NULL);
+ return 0;
}
int
switch_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode, fd_t *fd)
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *params)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *subvol = NULL;
xlator_t *avail_subvol = NULL;
- int op_errno = -1;
- int ret = -1;
+ int op_errno = -1;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
- conf = this->private;
+ conf = this->private;
dht_get_du_info (frame, this, loc);
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- subvol = dht_subvol_get_hashed (this, loc);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = ENOENT;
- goto err;
- }
+ local = dht_local_init (frame, loc, fd, GF_FOP_CREATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = dht_subvol_get_hashed (this, loc);
+ if (!subvol) {
+ gf_msg_debug (this->name, 0,
+ "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
avail_subvol = get_switch_matching_subvol (loc->path, conf, subvol);
if (dht_is_subvol_filled (this, avail_subvol)) {
avail_subvol =
- dht_free_disk_available_subvol (this, avail_subvol);
+ dht_free_disk_available_subvol (this, avail_subvol,
+ local);
}
if (subvol != avail_subvol) {
/* create a link file instead of actual file */
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- local->fd = fd_ref (fd);
local->mode = mode;
local->flags = flags;
-
+ local->umask = umask;
local->cached_subvol = avail_subvol;
- dht_linkfile_create (frame,
- switch_create_linkfile_create_cbk,
- avail_subvol, subvol, loc);
+ dht_linkfile_create (frame, switch_create_linkfile_create_cbk,
+ this, avail_subvol, subvol, loc);
return 0;
}
- gf_log (this->name, GF_LOG_TRACE,
- "creating %s on %s", loc->path, subvol->name);
+ gf_msg_trace (this->name, 0,
+ "creating %s on %s", loc->path, subvol->name);
STACK_WIND (frame, dht_create_cbk,
subvol, subvol->fops->create,
- loc, flags, mode, fd);
+ loc, flags, mode, umask, fd, params);
return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (create, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (create, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
}
int
switch_mknod_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, inode_t *inode,
- struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent)
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- dht_conf_t *conf = NULL;
-
- local = frame->local;
- prev = cookie;
- conf = this->private;
-
- if (op_ret >= 0) {
- STACK_WIND (frame, dht_newfile_cbk,
- local->cached_subvol,
- local->cached_subvol->fops->mknod,
- &local->loc, local->mode, local->rdev);
-
- return 0;
- }
-
- DHT_STACK_UNWIND (link, frame, op_ret, op_errno,
- inode, stbuf, preparent, postparent);
- return 0;
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local || !local->cached_subvol) {
+ op_errno = EINVAL;
+ op_ret = -1;
+ goto err;
+ }
+
+ if (op_ret >= 0) {
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk,
+ (void *)local->cached_subvol, local->cached_subvol,
+ local->cached_subvol->fops->mknod,
+ &local->loc, local->mode, local->rdev,
+ local->umask, local->params);
+
+ return 0;
+ }
+err:
+ DHT_STACK_UNWIND (link, frame, op_ret, op_errno,
+ inode, stbuf, preparent, postparent, xdata);
+ return 0;
}
int
-switch_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t rdev)
+switch_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *params)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *subvol = NULL;
xlator_t *avail_subvol = NULL;
- int op_errno = -1;
- int ret = -1;
+ int op_errno = -1;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
- conf = this->private;
+ conf = this->private;
dht_get_du_info (frame, this, loc);
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- subvol = dht_subvol_get_hashed (this, loc);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = ENOENT;
- goto err;
- }
+ local = dht_local_init (frame, loc, NULL, GF_FOP_MKNOD);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = dht_subvol_get_hashed (this, loc);
+ if (!subvol) {
+ gf_msg_debug (this->name, 0,
+ "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
/* Consider the disksize in consideration */
avail_subvol = get_switch_matching_subvol (loc->path, conf, subvol);
if (dht_is_subvol_filled (this, avail_subvol)) {
avail_subvol =
- dht_free_disk_available_subvol (this, avail_subvol);
+ dht_free_disk_available_subvol (this, avail_subvol,
+ local);
}
- if (avail_subvol != subvol) {
- /* Create linkfile first */
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
+ if (avail_subvol != subvol) {
+ /* Create linkfile first */
- local->mode = mode;
- local->rdev = rdev;
- local->cached_subvol = avail_subvol;
+ local->params = dict_ref (params);
+ local->mode = mode;
+ local->umask = umask;
+ local->rdev = rdev;
+ local->cached_subvol = avail_subvol;
- dht_linkfile_create (frame, switch_mknod_linkfile_cbk,
- avail_subvol, subvol, loc);
- return 0;
- }
+ dht_linkfile_create (frame, switch_mknod_linkfile_cbk,
+ this, avail_subvol, subvol, loc);
+ return 0;
+ }
- gf_log (this->name, GF_LOG_TRACE,
- "creating %s on %s", loc->path, subvol->name);
+ gf_msg_trace (this->name, 0,
+ "creating %s on %s", loc->path, subvol->name);
- STACK_WIND (frame, dht_newfile_cbk,
- subvol, subvol->fops->mknod,
- loc, mode, rdev);
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)subvol, subvol,
+ subvol->fops->mknod, loc, mode, rdev, umask,
+ params);
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (mknod, frame, -1, op_errno,
- NULL, NULL, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (mknod, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
}
-int
-notify (xlator_t *this, int event, void *data, ...)
-{
- int ret = -1;
-
- ret = dht_notify (this, event, data);
-
- return ret;
-}
-
void
-fini (xlator_t *this)
+switch_fini (xlator_t *this)
{
- int i = 0;
dht_conf_t *conf = NULL;
struct switch_struct *trav = NULL;
struct switch_struct *prev = NULL;
- conf = this->private;
+ conf = this->private;
if (conf) {
trav = (struct switch_struct *)conf->private;
conf->private = NULL;
while (trav) {
- if (trav->array)
- GF_FREE (trav->array);
+ GF_FREE (trav->array);
prev = trav;
trav = trav->next;
GF_FREE (prev);
}
-
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- GF_FREE (conf->file_layouts[i]);
- }
- GF_FREE (conf->file_layouts);
- }
-
- if (conf->default_dir_layout)
- GF_FREE (conf->default_dir_layout);
-
- if (conf->subvolumes)
- GF_FREE (conf->subvolumes);
-
- if (conf->subvolume_status)
- GF_FREE (conf->subvolume_status);
-
- GF_FREE (conf);
}
- return;
+ dht_fini(this);
}
int
@@ -671,32 +617,32 @@ set_switch_pattern (xlator_t *this, dht_conf_t *conf,
struct switch_struct *switch_buf = NULL;
struct switch_struct *switch_opt = NULL;
struct switch_struct *trav = NULL;
- struct switch_sched_array *switch_buf_array = NULL;
- xlator_list_t *trav_xl = NULL;
+ struct switch_sched_array *switch_buf_array = NULL;
+ xlator_list_t *trav_xl = NULL;
trav_xl = this->children;
- while (trav_xl) {
- index++;
- trav_xl = trav_xl->next;
- }
- child_count = index;
- switch_buf_array = GF_CALLOC ((index + 1),
+ while (trav_xl) {
+ index++;
+ trav_xl = trav_xl->next;
+ }
+ child_count = index;
+ switch_buf_array = GF_CALLOC ((index + 1),
sizeof (struct switch_sched_array),
gf_switch_mt_switch_sched_array);
if (!switch_buf_array)
goto err;
- trav_xl = this->children;
- index = 0;
+ trav_xl = this->children;
+ index = 0;
- while (trav_xl) {
- switch_buf_array[index].xl = trav_xl->xlator;
- switch_buf_array[index].eligible = 1;
- trav_xl = trav_xl->next;
- index++;
- }
+ while (trav_xl) {
+ switch_buf_array[index].xl = trav_xl->xlator;
+ switch_buf_array[index].eligible = 1;
+ trav_xl = trav_xl->next;
+ index++;
+ }
- /* *jpg:child1,child2;*mpg:child3;*:child4,child5,child6 */
+ /* *jpg:child1,child2;*mpg:child3;*:child4,child5,child6 */
/* Get the pattern for considering switch case.
"option block-size *avi:10MB" etc */
@@ -706,20 +652,25 @@ set_switch_pattern (xlator_t *this, dht_conf_t *conf,
dup_str = gf_strdup (switch_str);
switch_opt = GF_CALLOC (1, sizeof (struct switch_struct),
gf_switch_mt_switch_struct);
- if (!switch_opt)
+ if (!switch_opt) {
+ GF_FREE (dup_str);
goto err;
+ }
pattern = strtok_r (dup_str, ":", &tmp_str1);
childs = strtok_r (NULL, ":", &tmp_str1);
if (strncmp (pattern, "*", 2) == 0) {
- gf_log ("switch", GF_LOG_NORMAL,
+ gf_msg ("switch", GF_LOG_INFO, 0,
+ DHT_MSG_SWITCH_PATTERN_INFO,
"'*' pattern will be taken by default "
"for all the unconfigured child nodes,"
" hence neglecting current option");
switch_str = strtok_r (NULL, ";", &tmp_str);
+ GF_FREE (switch_opt);
GF_FREE (dup_str);
continue;
}
+ GF_FREE (dup_str);
memcpy (switch_opt->path_pattern, pattern, strlen (pattern));
if (childs) {
dup_childs = gf_strdup (childs);
@@ -729,7 +680,8 @@ set_switch_pattern (xlator_t *this, dht_conf_t *conf,
idx++;
child = strtok_r (NULL, ",", &tmp);
} else {
- gf_log (this->name, GF_LOG_ERROR,
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_SUBVOL_ERROR,
"%s is not a subvolume of %s. "
"pattern can only be scheduled "
"only to a subvolume of %s",
@@ -741,7 +693,7 @@ set_switch_pattern (xlator_t *this, dht_conf_t *conf,
child = strtok_r (childs, ",", &tmp1);
switch_opt->num_child = idx;
switch_opt->array = GF_CALLOC (1, (idx *
- sizeof (struct switch_sched_array)),
+ sizeof (struct switch_sched_array)),
gf_switch_mt_switch_sched_array);
if (!switch_opt->array)
goto err;
@@ -750,13 +702,13 @@ set_switch_pattern (xlator_t *this, dht_conf_t *conf,
for (index = 0; index < child_count; index++) {
if (strcmp (switch_buf_array[index].xl->name,
child) == 0) {
- gf_log ("switch", GF_LOG_DEBUG,
- "'%s' pattern will be "
- "scheduled to \"%s\"",
- switch_opt->path_pattern, child);
+ gf_msg_debug ("switch", 0,
+ "'%s' pattern will be "
+ "scheduled to \"%s\"",
+ switch_opt->path_pattern, child);
/*
if (switch_buf_array[index-1].considered) {
- gf_log ("switch", GF_LOG_DEBUG,
+ gf_msg_debug ("switch", 0,
"ambiguity found, exiting");
return -1;
}
@@ -771,13 +723,13 @@ set_switch_pattern (xlator_t *this, dht_conf_t *conf,
}
} else {
/* error */
- gf_log ("switch", GF_LOG_ERROR,
+ gf_msg ("switch", GF_LOG_ERROR, 0,
+ DHT_MSG_SET_SWITCH_PATTERN_ERROR,
"Check \"scheduler.switch.case\" "
"option in unify volume. Exiting");
goto err;
}
- GF_FREE (dup_str);
-
+
/* Link it to the main structure */
if (switch_buf) {
/* there are already few entries */
@@ -789,75 +741,79 @@ set_switch_pattern (xlator_t *this, dht_conf_t *conf,
/* First entry */
switch_buf = switch_opt;
}
+ switch_opt = NULL;
switch_str = strtok_r (NULL, ";", &tmp_str);
}
- /* Now, all the pattern based considerations done, so for all the
- * remaining pattern, '*' to all the remaining child nodes
- */
- {
- for (index=0; index < child_count; index++) {
- /* check for considered flag */
- if (switch_buf_array[index].considered)
- continue;
- flag++;
- }
- if (!flag) {
- gf_log ("switch", GF_LOG_ERROR,
- "No nodes left for pattern '*'. Exiting");
- goto err;
- }
- switch_opt = GF_CALLOC (1, sizeof (struct switch_struct),
+ /* Now, all the pattern based considerations done, so for all the
+ * remaining pattern, '*' to all the remaining child nodes
+ */
+ {
+ for (index=0; index < child_count; index++) {
+ /* check for considered flag */
+ if (switch_buf_array[index].considered)
+ continue;
+ flag++;
+ }
+ if (!flag) {
+ gf_msg ("switch", GF_LOG_ERROR, 0,
+ DHT_MSG_SET_SWITCH_PATTERN_ERROR,
+ "No nodes left for pattern '*'. Exiting");
+ goto err;
+ }
+ switch_opt = GF_CALLOC (1, sizeof (struct switch_struct),
gf_switch_mt_switch_struct);
if (!switch_opt)
goto err;
- /* Add the '*' pattern to the array */
- memcpy (switch_opt->path_pattern, "*", 2);
- switch_opt->num_child = flag;
- switch_opt->array =
- GF_CALLOC (1,
+ /* Add the '*' pattern to the array */
+ memcpy (switch_opt->path_pattern, "*", 2);
+ switch_opt->num_child = flag;
+ switch_opt->array =
+ GF_CALLOC (1,
flag * sizeof (struct switch_sched_array),
gf_switch_mt_switch_sched_array);
if (!switch_opt->array)
goto err;
- flag = 0;
- for (index=0; index < child_count; index++) {
- /* check for considered flag */
- if (switch_buf_array[index].considered)
- continue;
- gf_log ("switch", GF_LOG_DEBUG,
- "'%s' pattern will be scheduled to \"%s\"",
- switch_opt->path_pattern,
- switch_buf_array[index].xl->name);
- switch_opt->array[flag].xl =
- switch_buf_array[index].xl;
- switch_buf_array[index].considered = 1;
- flag++;
+ flag = 0;
+ for (index=0; index < child_count; index++) {
+ /* check for considered flag */
+ if (switch_buf_array[index].considered)
+ continue;
+ gf_msg_debug ("switch", 0, "'%s'"
+ " pattern will be scheduled to \"%s\"",
+ switch_opt->path_pattern,
+ switch_buf_array[index].xl->name);
+
+ switch_opt->array[flag].xl =
+ switch_buf_array[index].xl;
+ switch_buf_array[index].considered = 1;
+ flag++;
}
- if (switch_buf) {
- /* there are already few entries */
- trav = switch_buf;
- while (trav->next)
- trav = trav->next;
- trav->next = switch_opt;
- } else {
- /* First entry */
- switch_buf = switch_opt;
- }
- }
+ if (switch_buf) {
+ /* there are already few entries */
+ trav = switch_buf;
+ while (trav->next)
+ trav = trav->next;
+ trav->next = switch_opt;
+ } else {
+ /* First entry */
+ switch_buf = switch_opt;
+ }
+ switch_opt = NULL;
+ }
/* */
conf->private = switch_buf;
return 0;
err:
+ GF_FREE (switch_buf_array);
+ GF_FREE (switch_opt);
+
if (switch_buf) {
- if (switch_buf_array)
- GF_FREE (switch_buf_array);
trav = switch_buf;
while (trav) {
- if (trav->array)
- GF_FREE (trav->array);
+ GF_FREE (trav->array);
switch_opt = trav;
trav = trav->next;
GF_FREE (switch_opt);
@@ -867,195 +823,86 @@ err:
}
-int
-init (xlator_t *this)
+int32_t
+switch_init (xlator_t *this)
{
dht_conf_t *conf = NULL;
- data_t *data = NULL;
- char *temp_str = NULL;
+ data_t *data = NULL;
int ret = -1;
- int i = 0;
- uint32_t temp_free_disk = 0;
-
- if (!this->children) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "SWITCH needs more than one subvolume");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile");
- }
-
- conf = GF_CALLOC (1, sizeof (*conf), gf_switch_mt_dht_conf_t);
- if (!conf) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
- conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
- if (dict_get_str (this->options, "lookup-unhashed", &temp_str) == 0) {
- /* If option is not "auto", other options _should_ be boolean */
- if (strcasecmp (temp_str, "auto"))
- gf_string2boolean (temp_str, &conf->search_unhashed);
- }
-
- conf->unhashed_sticky_bit = 0;
- if (dict_get_str (this->options, "unhashed-sticky-bit",
- &temp_str) == 0) {
- gf_string2boolean (temp_str, &conf->unhashed_sticky_bit);
- }
-
- conf->min_free_disk = 10;
- conf->disk_unit = 'p';
-
- if (dict_get_str (this->options, "min-free-disk",
- &temp_str) == 0) {
- if (gf_string2percent (temp_str,
- &temp_free_disk) == 0) {
- if (temp_free_disk > 100) {
- gf_string2bytesize (temp_str,
- &conf->min_free_disk);
- conf->disk_unit = 'b';
- } else {
- conf->min_free_disk = (uint64_t)temp_free_disk;
- conf->disk_unit = 'p';
- }
- } else {
- gf_string2bytesize (temp_str,
- &conf->min_free_disk);
- conf->disk_unit = 'b';
- }
+ ret = dht_init(this);
+ if (ret) {
+ return ret;
}
+ conf = this->private;
- data = dict_get (this->options, "pattern.switch.case");
- if (data) {
+ data = dict_get (this->options, "pattern.switch.case");
+ if (data) {
/* TODO: */
ret = set_switch_pattern (this, conf, data->data);
if (ret) {
goto err;
}
- }
-
- ret = dht_init_subvolumes (this, conf);
- if (ret == -1) {
- goto err;
- }
-
- ret = dht_layouts_init (this, conf);
- if (ret == -1) {
- goto err;
- }
-
- LOCK_INIT (&conf->subvolume_lock);
- LOCK_INIT (&conf->layout_lock);
-
- conf->gen = 1;
-
- conf->du_stats = GF_CALLOC (conf->subvolume_cnt, sizeof (dht_du_t),
- gf_switch_mt_dht_du_t);
- if (!conf->du_stats) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
}
this->private = conf;
-
return 0;
err:
- if (conf) {
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- GF_FREE (conf->file_layouts[i]);
- }
- GF_FREE (conf->file_layouts);
- }
-
- if (conf->default_dir_layout)
- GF_FREE (conf->default_dir_layout);
-
- if (conf->subvolumes)
- GF_FREE (conf->subvolumes);
-
- if (conf->subvolume_status)
- GF_FREE (conf->subvolume_status);
-
- if (conf->du_stats)
- GF_FREE (conf->du_stats);
-
- GF_FREE (conf);
- }
-
+ dht_fini(this);
return -1;
}
-struct xlator_fops fops = {
- .lookup = switch_lookup,
- .create = switch_create,
- .mknod = switch_mknod,
-
- .stat = dht_stat,
- .fstat = dht_fstat,
- .truncate = dht_truncate,
- .ftruncate = dht_ftruncate,
- .access = dht_access,
- .readlink = dht_readlink,
- .setxattr = dht_setxattr,
- .getxattr = dht_getxattr,
- .removexattr = dht_removexattr,
- .open = dht_open,
- .readv = dht_readv,
- .writev = dht_writev,
- .flush = dht_flush,
- .fsync = dht_fsync,
- .statfs = dht_statfs,
- .lk = dht_lk,
- .opendir = dht_opendir,
- .readdir = dht_readdir,
- .readdirp = dht_readdirp,
- .fsyncdir = dht_fsyncdir,
- .symlink = dht_symlink,
- .unlink = dht_unlink,
- .link = dht_link,
- .mkdir = dht_mkdir,
- .rmdir = dht_rmdir,
- .rename = dht_rename,
- .inodelk = dht_inodelk,
- .finodelk = dht_finodelk,
- .entrylk = dht_entrylk,
- .fentrylk = dht_fentrylk,
- .xattrop = dht_xattrop,
- .fxattrop = dht_fxattrop,
- .setattr = dht_setattr,
-#if 0
- .setdents = dht_setdents,
- .getdents = dht_getdents,
- .checksum = dht_checksum,
-#endif
+class_methods_t class_methods = {
+ .init = switch_init,
+ .fini = switch_fini,
+ .reconfigure = dht_reconfigure,
+ .notify = dht_notify
};
-struct xlator_cbks cbks = {
- .forget = dht_forget
+struct xlator_fops fops = {
+ .lookup = switch_lookup,
+ .create = switch_create,
+ .mknod = switch_mknod,
+
+ .stat = dht_stat,
+ .fstat = dht_fstat,
+ .truncate = dht_truncate,
+ .ftruncate = dht_ftruncate,
+ .access = dht_access,
+ .readlink = dht_readlink,
+ .setxattr = dht_setxattr,
+ .getxattr = dht_getxattr,
+ .removexattr = dht_removexattr,
+ .open = dht_open,
+ .readv = dht_readv,
+ .writev = dht_writev,
+ .flush = dht_flush,
+ .fsync = dht_fsync,
+ .statfs = dht_statfs,
+ .lk = dht_lk,
+ .opendir = dht_opendir,
+ .readdir = dht_readdir,
+ .readdirp = dht_readdirp,
+ .fsyncdir = dht_fsyncdir,
+ .symlink = dht_symlink,
+ .unlink = dht_unlink,
+ .link = dht_link,
+ .mkdir = dht_mkdir,
+ .rmdir = dht_rmdir,
+ .rename = dht_rename,
+ .inodelk = dht_inodelk,
+ .finodelk = dht_finodelk,
+ .entrylk = dht_entrylk,
+ .fentrylk = dht_fentrylk,
+ .xattrop = dht_xattrop,
+ .fxattrop = dht_fxattrop,
+ .setattr = dht_setattr,
};
-struct volume_options options[] = {
- { .key = {"lookup-unhashed"},
- .value = {"auto", "yes", "no", "enable", "disable", "1", "0",
- "on", "off"},
- .type = GF_OPTION_TYPE_STR
- },
- { .key = {"pattern.switch.case"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"min-free-disk"},
- .type = GF_OPTION_TYPE_PERCENT_OR_SIZET,
- },
- { .key = {NULL} },
+struct xlator_cbks cbks = {
+ .forget = dht_forget
};
diff --git a/xlators/cluster/dht/src/switch.sym b/xlators/cluster/dht/src/switch.sym
new file mode 100644
index 00000000000..780b5fc0387
--- /dev/null
+++ b/xlators/cluster/dht/src/switch.sym
@@ -0,0 +1,8 @@
+fops
+cbks
+class_methods
+dht_methods
+options
+mem_acct_init
+reconfigure
+dumpops
diff --git a/xlators/cluster/dht/src/tier-common.c b/xlators/cluster/dht/src/tier-common.c
new file mode 100644
index 00000000000..20d3f24d3bf
--- /dev/null
+++ b/xlators/cluster/dht/src/tier-common.c
@@ -0,0 +1,1084 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "libxlator.h"
+#include "dht-common.h"
+#include "defaults.h"
+#include "tier-common.h"
+#include "tier.h"
+
+int
+dht_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+
+int
+tier_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ loc_t *oldloc = NULL;
+ loc_t *newloc = NULL;
+
+ local = frame->local;
+
+ oldloc = &local->loc;
+ newloc = &local->loc2;
+
+ if (op_ret == -1) {
+ /* No continuation on DHT inode missing errors, as we should
+ * then have a good stbuf that states P2 happened. We would
+ * get inode missing if, the file completed migrated between
+ * the lookup and the link call */
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ goto out;
+ }
+
+ local->call_cnt = 2;
+
+ /* Do this on the hot tier now */
+
+ STACK_WIND (frame, tier_link_cbk, local->cached_subvol,
+ local->cached_subvol->fops->link,
+ oldloc, newloc, xdata);
+
+ return 0;
+
+out:
+ DHT_STRIP_PHASE1_FLAGS (stbuf);
+
+ DHT_STACK_UNWIND (link, frame, op_ret, op_errno, inode, stbuf,
+ preparent, postparent, NULL);
+
+ return 0;
+}
+
+
+int
+tier_link (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ xlator_t *cached_subvol = NULL;
+ xlator_t *hashed_subvol = NULL;
+ int op_errno = -1;
+ int ret = -1;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (oldloc, err);
+ VALIDATE_OR_GOTO (newloc, err);
+
+ conf = this->private;
+
+ local = dht_local_init (frame, oldloc, NULL, GF_FOP_LINK);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ local->call_cnt = 1;
+
+ cached_subvol = local->cached_subvol;
+
+ if (!cached_subvol) {
+ gf_msg_debug (this->name, 0,
+ "no cached subvolume for path=%s", oldloc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ hashed_subvol = TIER_HASHED_SUBVOL;
+
+ ret = loc_copy (&local->loc2, newloc);
+ if (ret == -1) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ if (hashed_subvol == cached_subvol) {
+ STACK_WIND (frame, dht_link_cbk,
+ cached_subvol, cached_subvol->fops->link,
+ oldloc, newloc, xdata);
+ return 0;
+ }
+
+
+ /* Create hardlinks to both the data file on the hot tier
+ and the linkto file on the cold tier */
+
+ gf_uuid_copy (local->gfid, oldloc->inode->gfid);
+
+ STACK_WIND (frame, tier_link_cbk,
+ hashed_subvol, hashed_subvol->fops->link,
+ oldloc, newloc, xdata);
+
+ return 0;
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
+}
+
+
+
+int
+tier_create_unlink_stale_linkto_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->params) {
+ dict_del (local->params, GLUSTERFS_INTERNAL_FOP_KEY);
+ }
+
+ DHT_STACK_UNWIND (create, frame, -1, local->op_errno,
+ NULL, NULL, NULL, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int
+tier_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ fd_t *fd, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ call_frame_t *prev = NULL;
+ int ret = -1;
+ dht_local_t *local = NULL;
+ xlator_t *hashed_subvol = NULL;
+ dht_conf_t *conf = NULL;
+
+ local = frame->local;
+ conf = this->private;
+
+ hashed_subvol = TIER_HASHED_SUBVOL;
+
+ if (!local) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ if (op_ret == -1) {
+ if (local->linked == _gf_true && local->xattr_req) {
+ local->op_errno = op_errno;
+ local->op_ret = op_ret;
+ ret = dht_fill_dict_to_avoid_unlink_of_migrating_file
+ (local->xattr_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value to "
+ "unlink of migrating file");
+ goto out;
+ }
+
+ STACK_WIND (frame,
+ tier_create_unlink_stale_linkto_cbk,
+ hashed_subvol,
+ hashed_subvol->fops->unlink,
+ &local->loc, 0, local->xattr_req);
+ return 0;
+ }
+ goto out;
+ }
+
+ prev = cookie;
+
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ preparent, 0);
+
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ postparent, 1);
+ }
+
+ ret = dht_layout_preset (this, prev->this, inode);
+ if (ret != 0) {
+ gf_msg_debug (this->name, 0,
+ "could not set preset layout for subvol %s",
+ prev->this->name);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ local->op_errno = op_errno;
+
+ if (local->linked == _gf_true) {
+ local->stbuf = *stbuf;
+ dht_linkfile_attr_heal (frame, this);
+ }
+out:
+ if (local->xattr_req) {
+ dict_del (local->xattr_req, TIER_LINKFILE_GFID);
+ }
+
+ DHT_STRIP_PHASE1_FLAGS (stbuf);
+
+ DHT_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode,
+ stbuf, preparent, postparent, xdata);
+
+ return 0;
+}
+
+int
+tier_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *cached_subvol = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ unsigned char *gfid = NULL;
+
+ local = frame->local;
+ if (!local) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ goto err;
+ }
+
+ conf = this->private;
+ if (!conf) {
+ local->op_errno = EINVAL;
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ cached_subvol = TIER_UNHASHED_SUBVOL;
+
+ if (local->params) {
+ dict_del (local->params, conf->link_xattr_name);
+ dict_del (local->params, GLUSTERFS_INTERNAL_FOP_KEY);
+ }
+
+ /*
+ * We will delete the linkfile if data file creation fails.
+ * When deleting this stale linkfile, there is a possibility
+ * for a race between this linkfile deletion and a stale
+ * linkfile deletion triggered by another lookup from different
+ * client.
+ *
+ * For eg:
+ *
+ * Client 1 Client 2
+ *
+ * 1 linkfile created for foo
+ *
+ * 2 data file creation failed
+ *
+ * 3 creating a file with same name
+ *
+ * 4 lookup before creation deleted
+ * the linkfile created by client1
+ * considering as a stale linkfile.
+ *
+ * 5 New linkfile created for foo
+ * with different gfid.
+ *
+ * 6 Trigger linkfile deletion as
+ * data file creation failed.
+ *
+ * 7 Linkfile deleted which is
+ * created by client2.
+ *
+ * 8 Data file created.
+ *
+ * With this race, we will end up having a file in a non-hashed subvol
+ * without a linkfile in hashed subvol.
+ *
+ * To avoid this, we store the gfid of linkfile created by client, So
+ * If we delete the linkfile , we validate gfid of existing file with
+ * stored value from posix layer.
+ *
+ * Storing this value in local->xattr_req as local->params was also used
+ * to create the data file. During the linkfile deletion we will use
+ * local->xattr_req dictionary.
+ */
+ if (!local->xattr_req) {
+ local->xattr_req = dict_new ();
+ if (!local->xattr_req) {
+ local->op_errno = ENOMEM;
+ op_errno = ENOMEM;
+ goto err;
+ }
+ }
+
+ gfid = GF_CALLOC (1, sizeof (uuid_t), gf_common_mt_char);
+ if (!gfid) {
+ local->op_errno = ENOMEM;
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ gf_uuid_copy (gfid, stbuf->ia_gfid);
+ ret = dict_set_dynptr (local->xattr_req, TIER_LINKFILE_GFID,
+ gfid, sizeof (uuid_t));
+ if (ret) {
+ GF_FREE (gfid);
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value"
+ " : key = %s", TIER_LINKFILE_GFID);
+ }
+
+ STACK_WIND (frame, tier_create_cbk,
+ cached_subvol, cached_subvol->fops->create,
+ &local->loc, local->flags, local->mode,
+ local->umask, local->fd, local->params);
+
+ return 0;
+err:
+ DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
+}
+
+gf_boolean_t
+tier_is_hot_tier_decommissioned (xlator_t *this)
+{
+ dht_conf_t *conf = NULL;
+ xlator_t *hot_tier = NULL;
+ int i = 0;
+
+ conf = this->private;
+ hot_tier = conf->subvolumes[1];
+
+ if (conf->decommission_subvols_cnt) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->decommissioned_bricks[i] &&
+ conf->decommissioned_bricks[i] == hot_tier)
+ return _gf_true;
+ }
+ }
+
+ return _gf_false;
+}
+
+int
+tier_create (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *params)
+{
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *hot_subvol = NULL;
+ xlator_t *cold_subvol = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+
+ conf = this->private;
+
+ dht_get_du_info (frame, this, loc);
+
+ local = dht_local_init (frame, loc, fd, GF_FOP_CREATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+
+ cold_subvol = TIER_HASHED_SUBVOL;
+ hot_subvol = TIER_UNHASHED_SUBVOL;
+
+ if (conf->subvolumes[0] != cold_subvol) {
+ hot_subvol = conf->subvolumes[0];
+ }
+ /*
+ * if hot tier full, write to cold.
+ * Also if hot tier is full, create in cold
+ */
+ if (dht_is_subvol_filled (this, hot_subvol) ||
+ tier_is_hot_tier_decommissioned (this)) {
+ gf_msg_debug (this->name, 0,
+ "creating %s on %s", loc->path,
+ cold_subvol->name);
+
+ STACK_WIND (frame, tier_create_cbk,
+ cold_subvol, cold_subvol->fops->create,
+ loc, flags, mode, umask, fd, params);
+ } else {
+ local->params = dict_ref (params);
+ local->flags = flags;
+ local->mode = mode;
+ local->umask = umask;
+ local->cached_subvol = hot_subvol;
+ local->hashed_subvol = cold_subvol;
+
+ gf_msg_debug (this->name, 0,
+ "creating %s on %s (link at %s)", loc->path,
+ hot_subvol->name, cold_subvol->name);
+
+ dht_linkfile_create (frame, tier_create_linkfile_create_cbk,
+ this, hot_subvol, cold_subvol, loc);
+
+ goto out;
+ }
+out:
+ return 0;
+
+err:
+
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL, NULL);
+
+ return 0;
+}
+
+int
+tier_unlink_nonhashed_linkfile_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ LOCK (&frame->lock);
+ {
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ local->op_errno = op_errno;
+ local->op_ret = op_ret;
+ gf_msg_debug (this->name, op_errno,
+ "Unlink link: subvolume %s"
+ " returned -1",
+ prev->this->name);
+ goto unlock;
+ }
+
+ local->op_ret = 0;
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (local->op_ret == -1)
+ goto err;
+ DHT_STACK_UNWIND (unlink, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, NULL);
+
+
+ return 0;
+
+err:
+ DHT_STACK_UNWIND (unlink, frame, -1, local->op_errno,
+ NULL, NULL, NULL);
+ return 0;
+}
+
+int
+tier_unlink_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *preparent, dict_t *xdata,
+ struct iatt *postparent)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *hot_subvol = NULL;
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+ hot_subvol = TIER_UNHASHED_SUBVOL;
+
+ if (!op_ret) {
+ /*
+ * linkfile present on hot tier. unlinking the linkfile
+ */
+ STACK_WIND (frame, tier_unlink_nonhashed_linkfile_cbk,
+ hot_subvol, hot_subvol->fops->unlink,
+ &local->loc, local->flags, NULL);
+ return 0;
+ }
+
+ LOCK (&frame->lock);
+ {
+ if (op_errno == ENOENT) {
+ local->op_ret = 0;
+ local->op_errno = op_errno;
+ } else {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
+ gf_msg_debug (this->name, op_errno,
+ "Lookup : subvolume %s returned -1",
+ prev->this->name);
+ }
+
+ UNLOCK (&frame->lock);
+
+ DHT_STACK_UNWIND (unlink, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, xdata);
+
+ return 0;
+}
+
+int
+tier_unlink_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ LOCK (&frame->lock);
+ {
+ /* Ignore EINVAL for tier to ignore error when the file
+ does not exist on the other tier */
+ if ((op_ret == -1) && !((op_errno == ENOENT) ||
+ (op_errno == EINVAL))) {
+ local->op_errno = op_errno;
+ local->op_ret = op_ret;
+ gf_msg_debug (this->name, op_errno,
+ "Unlink link: subvolume %s"
+ " returned -1",
+ prev->this->name);
+ goto unlock;
+ }
+
+ local->op_ret = 0;
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (local->op_ret == -1)
+ goto err;
+
+ DHT_STACK_UNWIND (unlink, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, xdata);
+
+ return 0;
+
+err:
+ DHT_STACK_UNWIND (unlink, frame, -1, local->op_errno,
+ NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+tier_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ struct iatt *stbuf = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ xlator_t *hot_tier = NULL;
+ xlator_t *cold_tier = NULL;
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ cold_tier = TIER_HASHED_SUBVOL;
+ hot_tier = TIER_UNHASHED_SUBVOL;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ if (op_errno == ENOENT) {
+ local->op_ret = 0;
+ } else {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ }
+ gf_msg_debug (this->name, op_errno,
+ "Unlink: subvolume %s returned -1"
+ " with errno = %d",
+ prev->this->name, op_errno);
+ goto unlock;
+ }
+
+ local->op_ret = 0;
+
+ local->postparent = *postparent;
+ local->preparent = *preparent;
+
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->preparent, 0);
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (local->op_ret)
+ goto out;
+
+ if (cold_tier != local->cached_subvol) {
+ /*
+ * File is present in hot tier, so there will be
+ * a link file on cold tier, deleting the linkfile
+ * from cold tier
+ */
+ STACK_WIND (frame, tier_unlink_linkfile_cbk,
+ cold_tier,
+ cold_tier->fops->unlink, &local->loc,
+ local->flags, xdata);
+ return 0;
+ }
+
+ ret = dict_get_bin (xdata, DHT_IATT_IN_XDATA_KEY, (void **) &stbuf);
+ if (!ret && stbuf && ((IS_DHT_MIGRATION_PHASE2 (stbuf)) ||
+ IS_DHT_MIGRATION_PHASE1 (stbuf))) {
+ /*
+ * File is migrating from cold to hot tier.
+ * Delete the destination linkfile.
+ */
+ STACK_WIND (frame, tier_unlink_lookup_cbk,
+ hot_tier,
+ hot_tier->fops->lookup,
+ &local->loc, NULL);
+ return 0;
+
+ }
+
+out:
+ DHT_STACK_UNWIND (unlink, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, xdata);
+
+ return 0;
+}
+
+int
+tier_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
+{
+ xlator_t *cached_subvol = NULL;
+ xlator_t *hashed_subvol = NULL;
+ dht_conf_t *conf = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ int ret = -1;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+
+ conf = this->private;
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_UNLINK);
+ if (!local) {
+ op_errno = ENOMEM;
+
+ goto err;
+ }
+
+ hashed_subvol = TIER_HASHED_SUBVOL;
+
+ cached_subvol = local->cached_subvol;
+ if (!cached_subvol) {
+ gf_msg_debug (this->name, 0,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->flags = xflag;
+ if (IA_ISREG (loc->inode->ia_type) &&
+ (hashed_subvol == cached_subvol)) {
+ /*
+ * File resides in cold tier. We need to stat
+ * the file to see if it is being promoted.
+ * If yes we need to delete the destination
+ * file as well.
+ *
+ * Currently we are doing this check only for
+ * regular files.
+ */
+ xdata = xdata ? dict_ref (xdata) : dict_new ();
+ if (xdata) {
+ ret = dict_set_dynstr_with_alloc (xdata,
+ DHT_IATT_IN_XDATA_KEY, "yes");
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "Failed to set dictionary key %s",
+ DHT_IATT_IN_XDATA_KEY);
+ }
+ }
+ }
+
+ /*
+ * File is on hot tier, delete the data file first, then
+ * linkfile from cold.
+ */
+ STACK_WIND (frame, tier_unlink_cbk,
+ cached_subvol, cached_subvol->fops->unlink, loc,
+ xflag, xdata);
+ if (xdata)
+ dict_unref (xdata);
+ return 0;
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int
+tier_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *orig_entries,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t *orig_entry = NULL;
+ gf_dirent_t *entry = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *next_subvol = NULL;
+ off_t next_offset = 0;
+ int count = 0;
+
+ INIT_LIST_HEAD (&entries.list);
+ prev = cookie;
+ local = frame->local;
+
+ if (op_ret < 0)
+ goto done;
+
+ list_for_each_entry (orig_entry, (&orig_entries->list), list) {
+ next_offset = orig_entry->d_off;
+
+ entry = gf_dirent_for_name (orig_entry->d_name);
+ if (!entry) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ DHT_MSG_NO_MEMORY,
+ "Memory allocation failed ");
+ goto unwind;
+ }
+
+ entry->d_off = orig_entry->d_off;
+ entry->d_ino = orig_entry->d_ino;
+ entry->d_type = orig_entry->d_type;
+ entry->d_len = orig_entry->d_len;
+
+ list_add_tail (&entry->list, &entries.list);
+ count++;
+ }
+ op_ret = count;
+
+done:
+ if (count == 0) {
+ /* non-zero next_offset means that
+ EOF is not yet hit on the current subvol
+ */
+ if (next_offset != 0) {
+ next_subvol = prev->this;
+ } else {
+ goto unwind;
+ }
+
+ STACK_WIND (frame, tier_readdir_cbk,
+ next_subvol, next_subvol->fops->readdir,
+ local->fd, local->size, next_offset, NULL);
+ return 0;
+ }
+
+unwind:
+ if (op_ret < 0)
+ op_ret = 0;
+
+ DHT_STACK_UNWIND (readdir, frame, op_ret, op_errno, &entries, NULL);
+
+ gf_dirent_free (&entries);
+
+ return 0;
+}
+
+int
+tier_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, gf_dirent_t *orig_entries, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t *orig_entry = NULL;
+ gf_dirent_t *entry = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *next_subvol = NULL;
+ off_t next_offset = 0;
+ int count = 0;
+ dht_conf_t *conf = NULL;
+ int ret = 0;
+ inode_table_t *itable = NULL;
+ inode_t *inode = NULL;
+
+ INIT_LIST_HEAD (&entries.list);
+ prev = cookie;
+ local = frame->local;
+ itable = local->fd ? local->fd->inode->table : NULL;
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, unwind);
+
+ if (op_ret < 0)
+ goto done;
+
+ list_for_each_entry (orig_entry, (&orig_entries->list), list) {
+ next_offset = orig_entry->d_off;
+
+ if (IA_ISINVAL(orig_entry->d_stat.ia_type)) {
+ /*stat failed somewhere- ignore this entry*/
+ continue;
+ }
+
+ entry = gf_dirent_for_name (orig_entry->d_name);
+ if (!entry) {
+
+ goto unwind;
+ }
+
+ entry->d_off = orig_entry->d_off;
+ entry->d_stat = orig_entry->d_stat;
+ entry->d_ino = orig_entry->d_ino;
+ entry->d_type = orig_entry->d_type;
+ entry->d_len = orig_entry->d_len;
+
+ if (orig_entry->dict)
+ entry->dict = dict_ref (orig_entry->dict);
+
+ if (check_is_linkfile (NULL, (&orig_entry->d_stat),
+ orig_entry->dict,
+ conf->link_xattr_name)) {
+ inode = inode_find (itable,
+ orig_entry->d_stat.ia_gfid);
+ if (inode) {
+ ret = dht_layout_preset
+ (this, TIER_UNHASHED_SUBVOL,
+ inode);
+ if (ret)
+ gf_msg (this->name,
+ GF_LOG_WARNING, 0,
+ DHT_MSG_LAYOUT_SET_FAILED,
+ "failed to link the layout"
+ " in inode");
+ inode_unref (inode);
+ inode = NULL;
+ }
+
+ } else if (IA_ISDIR(entry->d_stat.ia_type)) {
+ if (orig_entry->inode) {
+ dht_inode_ctx_time_update (orig_entry->inode,
+ this, &entry->d_stat,
+ 1);
+ }
+ } else {
+ if (orig_entry->inode) {
+ ret = dht_layout_preset (this, prev->this,
+ orig_entry->inode);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LAYOUT_SET_FAILED,
+ "failed to link the layout "
+ "in inode");
+
+ entry->inode = inode_ref (orig_entry->inode);
+ } else if (itable) {
+ /*
+ * orig_entry->inode might be null if any upper
+ * layer xlators below client set to null, to
+ * force a lookup on the inode even if the inode
+ * is present in the inode table. In that case
+ * we just update the ctx to make sure we didn't
+ * missed anything.
+ */
+ inode = inode_find (itable,
+ orig_entry->d_stat.ia_gfid);
+ if (inode) {
+ ret = dht_layout_preset
+ (this, TIER_HASHED_SUBVOL,
+ inode);
+ if (ret)
+ gf_msg (this->name,
+ GF_LOG_WARNING, 0,
+ DHT_MSG_LAYOUT_SET_FAILED,
+ "failed to link the layout"
+ " in inode");
+ inode_unref (inode);
+ inode = NULL;
+ }
+ }
+ }
+ list_add_tail (&entry->list, &entries.list);
+ count++;
+ }
+ op_ret = count;
+
+done:
+ if (count == 0) {
+ /* non-zero next_offset means that
+ EOF is not yet hit on the current subvol
+ */
+ if (next_offset != 0) {
+ next_subvol = prev->this;
+ } else {
+ goto unwind;
+ }
+
+ STACK_WIND (frame, tier_readdirp_cbk,
+ next_subvol, next_subvol->fops->readdirp,
+ local->fd, local->size, next_offset,
+ local->xattr);
+ return 0;
+ }
+
+unwind:
+ if (op_ret < 0)
+ op_ret = 0;
+
+ DHT_STACK_UNWIND (readdirp, frame, op_ret, op_errno, &entries, NULL);
+
+ gf_dirent_free (&entries);
+
+ return 0;
+}
+
+int
+tier_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t yoff, int whichop, dict_t *dict)
+{
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+ xlator_t *hashed_subvol = NULL;
+ int ret = 0;
+ dht_conf_t *conf = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (this->private, err);
+
+ conf = this->private;
+
+ local = dht_local_init (frame, NULL, NULL, whichop);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->fd = fd_ref (fd);
+ local->size = size;
+ local->xattr_req = (dict) ? dict_ref (dict) : NULL;
+
+ hashed_subvol = TIER_HASHED_SUBVOL;
+
+
+ /* TODO: do proper readdir */
+ if (whichop == GF_FOP_READDIRP) {
+ if (dict)
+ local->xattr = dict_ref (dict);
+ else
+ local->xattr = dict_new ();
+
+ if (local->xattr) {
+ ret = dict_set_uint32 (local->xattr,
+ conf->link_xattr_name, 256);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value"
+ " : key = %s",
+ conf->link_xattr_name);
+
+ }
+
+ STACK_WIND (frame, tier_readdirp_cbk, hashed_subvol,
+ hashed_subvol->fops->readdirp,
+ fd, size, yoff, local->xattr);
+
+ } else {
+ STACK_WIND (frame, tier_readdir_cbk, hashed_subvol,
+ hashed_subvol->fops->readdir,
+ fd, size, yoff, local->xattr);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int
+tier_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t yoff, dict_t *xdata)
+{
+ int op = GF_FOP_READDIR;
+ dht_conf_t *conf = NULL;
+ int i = 0;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!conf->subvolume_status[i]) {
+ op = GF_FOP_READDIRP;
+ break;
+ }
+ }
+
+ if (conf->use_readdirp)
+ op = GF_FOP_READDIRP;
+
+out:
+ tier_do_readdir (frame, this, fd, size, yoff, op, 0);
+ return 0;
+}
+
+int
+tier_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t yoff, dict_t *dict)
+{
+ tier_do_readdir (frame, this, fd, size, yoff, GF_FOP_READDIRP, dict);
+ return 0;
+}
diff --git a/xlators/cluster/dht/src/tier-common.h b/xlators/cluster/dht/src/tier-common.h
new file mode 100644
index 00000000000..0ef96aca032
--- /dev/null
+++ b/xlators/cluster/dht/src/tier-common.h
@@ -0,0 +1,62 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _TIER_COMMON_H_
+#define _TIER_COMMON_H_
+/* Function definitions */
+int
+tier_create_unlink_stale_linkto_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int
+tier_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ fd_t *fd, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata);
+
+int
+tier_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent,
+ struct iatt *postparent,
+ dict_t *xdata);
+
+int
+tier_create (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *params);
+
+int32_t
+tier_unlink (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int xflag, dict_t *xdata);
+
+int32_t
+tier_readdirp (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size, off_t off, dict_t *dict);
+
+int
+tier_readdir (call_frame_t *frame,
+ xlator_t *this, fd_t *fd, size_t size,
+ off_t yoff, dict_t *xdata);
+
+
+
+int
+tier_link (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata);
+#endif
+
diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c
new file mode 100644
index 00000000000..356af021563
--- /dev/null
+++ b/xlators/cluster/dht/src/tier.c
@@ -0,0 +1,2518 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <dlfcn.h>
+
+#include "dht-common.h"
+#include "tier.h"
+#include "tier-common.h"
+#include "syscall.h"
+
+/*Hard coded DB info*/
+static gfdb_db_type_t dht_tier_db_type = GFDB_SQLITE3;
+/*Hard coded DB info*/
+
+/*Mutex for updating the data movement stats*/
+static pthread_mutex_t dm_stat_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+/* Stores the path location of promotion query files */
+static char *promotion_qfile;
+/* Stores the path location of demotion query files */
+static char *demotion_qfile;
+
+static void *libhandle;
+static gfdb_methods_t gfdb_methods;
+
+#define DB_QUERY_RECORD_SIZE 4096
+
+/*
+ * Closes all the fds and frees the qfile_array
+ * */
+static void
+qfile_array_free (tier_qfile_array_t *qfile_array)
+{
+ ssize_t i = 0;
+
+ if (qfile_array) {
+ if (qfile_array->fd_array) {
+ for (i = 0; i < qfile_array->array_size; i++) {
+ if (qfile_array->fd_array[i] != -1) {
+ sys_close (qfile_array->fd_array[i]);
+ }
+ }
+ }
+ GF_FREE (qfile_array->fd_array);
+ }
+ GF_FREE (qfile_array);
+}
+
+
+/* Create a new query file list with given size */
+static tier_qfile_array_t *
+qfile_array_new (ssize_t array_size)
+{
+ int ret = -1;
+ tier_qfile_array_t *qfile_array = NULL;
+ ssize_t i = 0;
+
+ GF_VALIDATE_OR_GOTO ("tier", (array_size > 0), out);
+
+ qfile_array = GF_CALLOC (1, sizeof (tier_qfile_array_t),
+ gf_tier_mt_qfile_array_t);
+ if (!qfile_array) {
+ gf_msg ("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
+ "Failed to allocate memory for tier_qfile_array_t");
+ goto out;
+ }
+
+ qfile_array->fd_array = GF_CALLOC (array_size, sizeof (int),
+ gf_dht_mt_int32_t);
+ if (!qfile_array->fd_array) {
+ gf_msg ("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
+ "Failed to allocate memory for "
+ "tier_qfile_array_t->fd_array");
+ goto out;
+ }
+
+ /* Init all the fds to -1 */
+ for (i = 0; i < array_size; i++) {
+ qfile_array->fd_array[i] = -1;
+ }
+
+ qfile_array->array_size = array_size;
+ qfile_array->next_index = 0;
+
+ /* Set exhausted count to list size as the list is empty */
+ qfile_array->exhausted_count = qfile_array->array_size;
+
+ ret = 0;
+out:
+ if (ret) {
+ qfile_array_free (qfile_array);
+ qfile_array = NULL;
+ }
+ return qfile_array;
+}
+
+
+/* Checks if the query file list is empty or totally exhausted. */
+static gf_boolean_t
+is_qfile_array_empty (tier_qfile_array_t *qfile_array)
+{
+ return (qfile_array->exhausted_count == qfile_array->array_size) ?
+ _gf_true : _gf_false;
+}
+
+
+/* Shifts the next_fd pointer to the next available fd in the list */
+static void
+shift_next_index (tier_qfile_array_t *qfile_array)
+{
+ int qfile_fd = 0;
+ int spin_count = 0;
+
+ if (is_qfile_array_empty (qfile_array)) {
+ return;
+ }
+
+ do {
+ /* change next_index in a rotional manner */
+ (qfile_array->next_index == (qfile_array->array_size - 1)) ?
+ qfile_array->next_index = 0 : qfile_array->next_index++;
+
+ qfile_fd = (qfile_array->fd_array[qfile_array->next_index]);
+
+ spin_count++;
+
+ } while ((qfile_fd == -1) && (spin_count < qfile_array->array_size));
+
+}
+
+/*
+ * This is a non-thread safe function to read query records
+ * from a list of query files in a Round-Robin manner.
+ * As in when the query files get exhuasted they are closed.
+ * Returns:
+ * 0 if all the query records in all the query files of the list are
+ * exhausted.
+ * > 0 if a query record is successfully read. Indicates the size of the query
+ * record read.
+ * < 0 if there was failure
+ * */
+static int
+read_query_record_list (tier_qfile_array_t *qfile_array,
+ gfdb_query_record_t **query_record)
+{
+ int ret = -1;
+ int qfile_fd = 0;
+
+ GF_VALIDATE_OR_GOTO ("tier", qfile_array, out);
+ GF_VALIDATE_OR_GOTO ("tier", qfile_array->fd_array, out);
+
+ do {
+ if (is_qfile_array_empty (qfile_array)) {
+ ret = 0;
+ break;
+ }
+
+ qfile_fd = qfile_array->fd_array[qfile_array->next_index];
+ ret = gfdb_methods.gfdb_read_query_record
+ (qfile_fd, query_record);
+ if (ret <= 0) {
+ /*The qfile_fd has reached EOF or
+ * there was an error.
+ * 1. Close the exhausted fd
+ * 2. increment the exhausted count
+ * 3. shift next_qfile to next qfile
+ **/
+ sys_close (qfile_fd);
+ qfile_array->fd_array[qfile_array->next_index] = -1;
+ qfile_array->exhausted_count++;
+ /* shift next_qfile to next qfile */
+ shift_next_index (qfile_array);
+ continue;
+ } else {
+ /* shift next_qfile to next qfile */
+ shift_next_index (qfile_array);
+ break;
+ }
+ } while (1);
+out:
+ return ret;
+}
+
+
+/* Check and update the watermark every WM_INTERVAL seconds */
+#define WM_INTERVAL 5
+
+static int
+tier_check_same_node (xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag)
+{
+ int ret = -1;
+ dict_t *dict = NULL;
+ char *uuid_str = NULL;
+ uuid_t node_uuid = {0,};
+
+ GF_VALIDATE_OR_GOTO ("tier", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO (this->name, defrag, out);
+
+ if (syncop_getxattr (this, loc, &dict, GF_XATTR_NODE_UUID_KEY,
+ NULL, NULL)) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
+ "Unable to get NODE_UUID_KEY %s %s\n",
+ loc->name, loc->path);
+ goto out;
+ }
+
+ if (dict_get_str (dict, GF_XATTR_NODE_UUID_KEY, &uuid_str) < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
+ "Failed to get node-uuid for %s", loc->path);
+ goto out;
+ }
+
+ if (gf_uuid_parse (uuid_str, node_uuid)) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
+ "uuid_parse failed for %s", loc->path);
+ goto out;
+ }
+
+ if (gf_uuid_compare (node_uuid, defrag->node_uuid)) {
+ gf_msg_debug (this->name, 0,
+ "%s does not belong to this node", loc->path);
+ ret = 1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (dict)
+ dict_unref(dict);
+
+ return ret;
+}
+
+int
+tier_check_watermark (xlator_t *this, loc_t *root_loc)
+{
+ tier_watermark_op_t wm = TIER_WM_NONE;
+ int ret = -1;
+ gf_defrag_info_t *defrag = NULL;
+ dht_conf_t *conf = NULL;
+ dict_t *xdata = NULL;
+ struct statvfs statfs = {0, };
+ gf_tier_conf_t *tier_conf = NULL;
+
+ conf = this->private;
+ if (!conf)
+ goto exit;
+
+ defrag = conf->defrag;
+ if (!defrag)
+ goto exit;
+
+ tier_conf = &defrag->tier_conf;
+
+ if (tier_conf->mode != TIER_MODE_WM) {
+ ret = 0;
+ goto exit;
+ }
+
+ xdata = dict_new ();
+ if (!xdata) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ DHT_MSG_NO_MEMORY,
+ "failed to allocate dictionary");
+ ret = -1;
+ goto exit;
+ }
+
+ ret = dict_set_int8 (xdata, GF_INTERNAL_IGNORE_DEEM_STATFS, 1);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_DICT_SET_FAILED,
+ "Failed to set "
+ GF_INTERNAL_IGNORE_DEEM_STATFS" in dict");
+ ret = -1;
+ goto exit;
+ }
+
+ /* Find how much free space is on the hot subvolume.
+ * Then see if that value */
+ /* is less than or greater than user defined watermarks.
+ * Stash results in */
+ /* the tier_conf data structure. */
+
+ ret = syncop_statfs (conf->subvolumes[1], root_loc, &statfs,
+ xdata, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_LOG_TIER_STATUS,
+ "Unable to obtain statfs.");
+ goto exit;
+ }
+
+ pthread_mutex_lock (&dm_stat_mutex);
+
+ tier_conf->blocks_total = statfs.f_blocks;
+ tier_conf->blocks_used = statfs.f_blocks - statfs.f_bfree;
+
+ tier_conf->percent_full = (100 * tier_conf->blocks_used) /
+ statfs.f_blocks;
+ pthread_mutex_unlock (&dm_stat_mutex);
+
+ if (tier_conf->percent_full < tier_conf->watermark_low) {
+ wm = TIER_WM_LOW;
+
+ } else if (tier_conf->percent_full < tier_conf->watermark_hi) {
+ wm = TIER_WM_MID;
+
+ } else {
+ wm = TIER_WM_HI;
+ }
+
+ if (wm != tier_conf->watermark_last) {
+
+ tier_conf->watermark_last = wm;
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_LOG_TIER_STATUS,
+ "Tier watermark now %d", wm);
+ }
+
+exit:
+ if (xdata)
+ dict_unref (xdata);
+ return ret;
+}
+
+
+static gf_boolean_t
+is_hot_tier_full (gf_tier_conf_t *tier_conf)
+{
+ if (tier_conf && (tier_conf->mode == TIER_MODE_WM) &&
+ (tier_conf->watermark_last == TIER_WM_HI))
+ return _gf_true;
+
+ return _gf_false;
+}
+
+int
+tier_do_migration (xlator_t *this, int promote, loc_t *root_loc)
+{
+ gf_defrag_info_t *defrag = NULL;
+ dht_conf_t *conf = NULL;
+ long rand = 0;
+ int migrate = 0;
+ gf_tier_conf_t *tier_conf = NULL;
+
+ conf = this->private;
+ if (!conf)
+ goto exit;
+
+ defrag = conf->defrag;
+ if (!defrag)
+ goto exit;
+
+ if (defrag->tier_conf.mode != TIER_MODE_WM) {
+ migrate = 1;
+ goto exit;
+ }
+
+ if (tier_check_watermark (this, root_loc) != 0) {
+ gf_msg (this->name, GF_LOG_CRITICAL, errno,
+ DHT_MSG_LOG_TIER_ERROR,
+ "Failed to get watermark");
+ goto exit;
+ }
+
+ tier_conf = &defrag->tier_conf;
+
+ switch (tier_conf->watermark_last) {
+ case TIER_WM_LOW:
+ migrate = promote ? 1 : 0;
+ break;
+ case TIER_WM_HI:
+ migrate = promote ? 0 : 1;
+ break;
+ case TIER_WM_MID:
+ rand = random() % 100;
+ if (promote) {
+ migrate = (rand > tier_conf->percent_full);
+ } else {
+ migrate = (rand <= tier_conf->percent_full);
+ }
+ break;
+ }
+
+exit:
+ return migrate;
+}
+
+int
+tier_migrate (xlator_t *this, int is_promotion, dict_t *migrate_data,
+ loc_t *loc, gf_tier_conf_t *tier_conf)
+{
+ int ret = -1;
+
+ pthread_mutex_lock (&tier_conf->pause_mutex);
+ if (is_promotion)
+ tier_conf->promote_in_progress = 1;
+ else
+ tier_conf->demote_in_progress = 1;
+ pthread_mutex_unlock (&tier_conf->pause_mutex);
+
+ /* Data migration */
+ ret = syncop_setxattr (this, loc, migrate_data, 0,
+ NULL, NULL);
+
+ pthread_mutex_lock (&tier_conf->pause_mutex);
+ if (is_promotion)
+ tier_conf->promote_in_progress = 0;
+ else
+ tier_conf->demote_in_progress = 0;
+ pthread_mutex_unlock (&tier_conf->pause_mutex);
+
+ return ret;
+}
+
+static int
+tier_migrate_using_query_file (void *_args)
+{
+ int ret = -1;
+ query_cbk_args_t *query_cbk_args = (query_cbk_args_t *) _args;
+ xlator_t *this = NULL;
+ gf_defrag_info_t *defrag = NULL;
+ gfdb_query_record_t *query_record = NULL;
+ gfdb_link_info_t *link_info = NULL;
+ struct iatt par_stbuf = {0,};
+ struct iatt current = {0,};
+ loc_t p_loc = {0,};
+ loc_t loc = {0,};
+ dict_t *migrate_data = NULL;
+ dict_t *xdata_request = NULL;
+ dict_t *xdata_response = NULL;
+ char *parent_path = NULL;
+ inode_t *linked_inode = NULL;
+ /*
+ * per_file_status and per_link_status
+ * 0 : success
+ * -1 : failure
+ * 1 : ignore the status and dont count for migration
+ * */
+ int per_file_status = 0;
+ int per_link_status = 0;
+ int total_status = 0;
+ xlator_t *src_subvol = NULL;
+ dht_conf_t *conf = NULL;
+ uint64_t total_migrated_bytes = 0;
+ int total_files = 0;
+ loc_t root_loc = { 0 };
+ gfdb_time_t start_time = { 0 };
+ gfdb_time_t current_time = { 0 };
+ int total_time = 0;
+ int max_time = 0;
+
+
+ GF_VALIDATE_OR_GOTO ("tier", query_cbk_args, out);
+ GF_VALIDATE_OR_GOTO ("tier", query_cbk_args->this, out);
+ this = query_cbk_args->this;
+ GF_VALIDATE_OR_GOTO (this->name, query_cbk_args->defrag, out);
+ GF_VALIDATE_OR_GOTO (this->name, query_cbk_args->qfile_array, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+
+ conf = this->private;
+
+ defrag = query_cbk_args->defrag;
+ migrate_data = dict_new ();
+ if (!migrate_data)
+ goto out;
+
+ xdata_request = dict_new ();
+ if (!xdata_request) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR,
+ "Failed to create xdata_request dict");
+ goto out;
+ }
+ ret = dict_set_int32 (xdata_request,
+ GET_ANCESTRY_PATH_KEY, 42);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR,
+ "Failed to set value to dict : key %s \n",
+ GET_ANCESTRY_PATH_KEY);
+ goto out;
+ }
+
+ dht_build_root_loc (defrag->root_inode, &root_loc);
+
+ ret = gettimeofday (&start_time, NULL);
+ if (query_cbk_args->is_promotion) {
+ max_time = defrag->tier_conf.tier_promote_frequency;
+ } else {
+ max_time = defrag->tier_conf.tier_demote_frequency;
+ }
+
+ /* Per file */
+ while ((ret = read_query_record_list (query_cbk_args->qfile_array,
+ &query_record)) != 0) {
+
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR,
+ "Failed to fetch query record "
+ "from query file");
+ goto out;
+ }
+
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR,
+ "Exiting tier migration as"
+ "defrag status is not started");
+ goto out;
+ }
+
+ ret = gettimeofday (&current_time, NULL);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR,
+ "Could not get current time.");
+ goto out;
+ }
+
+ total_time = current_time.tv_sec - start_time.tv_sec;
+ if (total_time > max_time) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_LOG_TIER_STATUS,
+ "Max cycle time reached. Exiting migration.");
+ goto out;
+ }
+
+ per_file_status = 0;
+ per_link_status = 0;
+
+ dict_del (migrate_data, GF_XATTR_FILE_MIGRATE_KEY);
+
+ dict_del (migrate_data, "from.migrator");
+
+ if (gf_defrag_get_pause_state (&defrag->tier_conf)
+ != TIER_RUNNING) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_LOG_TIER_STATUS,
+ "Tiering paused. "
+ "Exiting tier_migrate_using_query_file");
+ break;
+ }
+
+ if (!tier_do_migration (this, query_cbk_args->is_promotion, &root_loc)) {
+ gfdb_methods.gfdb_query_record_free (query_record);
+ query_record = NULL;
+
+ /* We have crossed the high watermark. Stop processing
+ * files if this is a promotion cycle so demotion gets
+ * a chance to start if not already running*/
+
+ if (query_cbk_args->is_promotion &&
+ is_hot_tier_full (&defrag->tier_conf)) {
+
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_LOG_TIER_STATUS,
+ "High watermark crossed during "
+ "promotion. Exiting "
+ "tier_migrate_using_query_file");
+ break;
+ }
+ continue;
+ }
+
+ if (!list_empty (&query_record->link_list)) {
+ per_file_status =
+ dict_set_str (migrate_data,
+ GF_XATTR_FILE_MIGRATE_KEY,
+ "force");
+ if (per_file_status) {
+ goto per_file_out;
+ }
+
+ /* Flag to suggest the xattr call is from migrator */
+ per_file_status = dict_set_str (migrate_data,
+ "from.migrator", "yes");
+ if (per_file_status) {
+ goto per_file_out;
+ }
+
+ /* Flag to suggest its a tiering migration
+ * The reason for this dic key-value is that
+ * promotions and demotions are multithreaded
+ * so the original frame from gf_defrag_start()
+ * is not carried. A new frame will be created when
+ * we do syncop_setxattr(). This doesnot have the
+ * frame->root->pid of the original frame. So we pass
+ * this dic key-value when we do syncop_setxattr() to do
+ * data migration and set the frame->root->pid to
+ * GF_CLIENT_PID_TIER_DEFRAG in dht_setxattr() just before
+ * calling dht_start_rebalance_task() */
+ per_file_status = dict_set_str (migrate_data,
+ TIERING_MIGRATION_KEY, "yes");
+ if (per_file_status) {
+ goto per_file_out;
+ }
+
+ }
+ per_link_status = 0;
+
+ /* For now we only support single link migration. And we will
+ * ignore other hard links in the link info list of query record
+ * TODO: Multiple hard links migration */
+ if (!list_empty (&query_record->link_list)) {
+ link_info = list_first_entry (&query_record->link_list,
+ gfdb_link_info_t, list);
+ }
+ if (link_info != NULL) {
+
+ /* Lookup for parent and get the path of parent */
+ gf_uuid_copy (p_loc.gfid, link_info->pargfid);
+ p_loc.inode = inode_new (defrag->root_inode->table);
+ if (!p_loc.inode) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR,
+ "Failed to create reference to inode"
+ " for %s", uuid_utoa (p_loc.gfid));
+
+ per_link_status = -1;
+ goto abort;
+ }
+
+ ret = syncop_lookup (this, &p_loc, &par_stbuf, NULL,
+ xdata_request, &xdata_response);
+ /* When the parent gfid is a stale entry, the lookup
+ * will fail and stop the demotion process.
+ * The parent gfid can be stale when a huge folder is
+ * deleted while the files within it are being migrated
+ */
+ if (ret == -ESTALE) {
+ gf_msg (this->name, GF_LOG_WARNING, -ret,
+ DHT_MSG_STALE_LOOKUP,
+ "Stale entry in parent lookup for %s",
+ uuid_utoa (p_loc.gfid));
+ per_link_status = 1;
+ goto abort;
+ } else if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_LOG_TIER_ERROR,
+ "Error in parent lookup for %s",
+ uuid_utoa (p_loc.gfid));
+ per_link_status = -1;
+ goto abort;
+ }
+ ret = dict_get_str (xdata_response,
+ GET_ANCESTRY_PATH_KEY,
+ &parent_path);
+ if (ret || !parent_path) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR,
+ "Failed to get parent path for %s",
+ uuid_utoa (p_loc.gfid));
+ per_link_status = -1;
+ goto abort;
+ }
+
+ linked_inode = inode_link (p_loc.inode, NULL, NULL,
+ &par_stbuf);
+ inode_unref (p_loc.inode);
+ p_loc.inode = linked_inode;
+
+
+ /* Preparing File Inode */
+ gf_uuid_copy (loc.gfid, query_record->gfid);
+ loc.inode = inode_new (defrag->root_inode->table);
+ gf_uuid_copy (loc.pargfid, link_info->pargfid);
+ loc.parent = inode_ref (p_loc.inode);
+
+ /* Get filename and Construct file path */
+ loc.name = gf_strdup (link_info->file_name);
+ if (!loc.name) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR, "Memory "
+ "allocation failed for %s",
+ uuid_utoa (query_record->gfid));
+ per_link_status = -1;
+ goto abort;
+ }
+ ret = gf_asprintf((char **)&(loc.path), "%s/%s",
+ parent_path, loc.name);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR, "Failed to "
+ "construct file path for %s %s\n",
+ parent_path, loc.name);
+ per_link_status = -1;
+ goto abort;
+ }
+
+ gf_uuid_copy (loc.parent->gfid, link_info->pargfid);
+
+ /* lookup file inode */
+ ret = syncop_lookup (this, &loc, &current, NULL,
+ NULL, NULL);
+ /* The file may be deleted even when the parent
+ * is available and the lookup will
+ * return a stale entry which would stop the
+ * migration. so if its a stale entry, then skip
+ * the file and keep migrating.
+ */
+ if (ret == -ESTALE) {
+ gf_msg (this->name, GF_LOG_WARNING, -ret,
+ DHT_MSG_STALE_LOOKUP,
+ "Stale lookup for %s",
+ uuid_utoa (p_loc.gfid));
+ per_link_status = 1;
+ goto abort;
+ } else if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_LOG_TIER_ERROR, "Failed to "
+ "lookup file %s\n", loc.name);
+ per_link_status = -1;
+ goto abort;
+ }
+
+ if (query_cbk_args->is_promotion &&
+ defrag->tier_conf.tier_max_promote_size &&
+ (current.ia_size > defrag->tier_conf.tier_max_promote_size)) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_LOG_TIER_STATUS,
+ "File size exceeds maxsize for promotion. ");
+ per_link_status = 1;
+ goto abort;
+ }
+
+ linked_inode = inode_link (loc.inode, NULL, NULL,
+ &current);
+ inode_unref (loc.inode);
+ loc.inode = linked_inode;
+
+
+ /*
+ * Do not promote/demote if file already is where it
+ * should be. It means another brick moved the file
+ * so is not an error. So we set per_link_status = 1
+ * so that we ignore counting this.
+ */
+ src_subvol = dht_subvol_get_cached (this, loc.inode);
+
+ if (src_subvol == NULL) {
+ per_link_status = 1;
+ goto abort;
+ }
+ if (query_cbk_args->is_promotion &&
+ src_subvol == conf->subvolumes[1]) {
+ per_link_status = 1;
+ goto abort;
+ }
+
+ if (!query_cbk_args->is_promotion &&
+ src_subvol == conf->subvolumes[0]) {
+ per_link_status = 1;
+ goto abort;
+ }
+
+ gf_msg_debug (this->name, 0,
+ "Tier %s: src_subvol %s file %s",
+ (query_cbk_args->is_promotion ?
+ "promote" : "demote"),
+ src_subvol->name,
+ loc.path);
+
+
+ ret = tier_check_same_node (this, &loc, defrag);
+ if (ret != 0) {
+ if (ret < 0) {
+ per_link_status = -1;
+ goto abort;
+ }
+ ret = 0;
+ /* By setting per_link_status to 1 we are
+ * ignoring this status and will not be counting
+ * this file for migration */
+ per_link_status = 1;
+ goto abort;
+ }
+
+ gf_uuid_copy (loc.gfid, loc.inode->gfid);
+
+ if (gf_defrag_get_pause_state (&defrag->tier_conf)
+ != TIER_RUNNING) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_LOG_TIER_STATUS,
+ "Tiering paused. "
+ "Exiting "
+ "tier_migrate_using_query_file");
+ goto abort;
+ }
+
+ ret = tier_migrate (this, query_cbk_args->is_promotion,
+ migrate_data, &loc, &defrag->tier_conf);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_LOG_TIER_ERROR, "Failed to "
+ "migrate %s ", loc.path);
+ per_link_status = -1;
+ goto abort;
+ }
+
+ if (query_cbk_args->is_promotion) {
+ defrag->total_files_promoted++;
+ total_migrated_bytes +=
+ defrag->tier_conf.st_last_promoted_size;
+ pthread_mutex_lock (&dm_stat_mutex);
+ defrag->tier_conf.blocks_used +=
+ defrag->tier_conf.st_last_promoted_size;
+ pthread_mutex_unlock (&dm_stat_mutex);
+ } else {
+ defrag->total_files_demoted++;
+ total_migrated_bytes +=
+ defrag->tier_conf.st_last_demoted_size;
+ pthread_mutex_lock (&dm_stat_mutex);
+ defrag->tier_conf.blocks_used -=
+ defrag->tier_conf.st_last_demoted_size;
+ pthread_mutex_unlock (&dm_stat_mutex);
+ }
+ if (defrag->tier_conf.blocks_total) {
+ pthread_mutex_lock (&dm_stat_mutex);
+ defrag->tier_conf.percent_full =
+ (100 * defrag->tier_conf.blocks_used) /
+ defrag->tier_conf.blocks_total;
+ pthread_mutex_unlock (&dm_stat_mutex);
+ }
+ total_files++;
+abort:
+ GF_FREE ((char *) loc.name);
+ loc.name = NULL;
+ loc_wipe (&loc);
+ loc_wipe (&p_loc);
+
+
+ if (xdata_response) {
+ dict_unref (xdata_response);
+ xdata_response = NULL;
+ }
+
+ if ((total_files >= defrag->tier_conf.max_migrate_files)
+ || (total_migrated_bytes >
+ defrag->tier_conf.max_migrate_bytes)) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_LOG_TIER_STATUS,
+ "Reached cycle migration limit."
+ "migrated bytes %"PRId64" files %d",
+ total_migrated_bytes,
+ total_files);
+ goto out;
+ }
+ }
+ per_file_status = per_link_status;
+per_file_out:
+ if (per_file_status < 0) {/* Failure */
+ pthread_mutex_lock (&dm_stat_mutex);
+ defrag->total_failures++;
+ pthread_mutex_unlock (&dm_stat_mutex);
+ } else if (per_file_status == 0) {/* Success */
+ pthread_mutex_lock (&dm_stat_mutex);
+ defrag->total_files++;
+ pthread_mutex_unlock (&dm_stat_mutex);
+ } else if (per_file_status == 1) {/* Ignore */
+ per_file_status = 0;
+ /* Since this attempt was ignored we
+ * decrement the lookup count*/
+ pthread_mutex_lock (&dm_stat_mutex);
+ defrag->num_files_lookedup--;
+ pthread_mutex_unlock (&dm_stat_mutex);
+ }
+ total_status = total_status + per_file_status;
+ per_link_status = 0;
+ per_file_status = 0;
+
+ gfdb_methods.gfdb_query_record_free (query_record);
+ query_record = NULL;
+ }
+
+out:
+ if (xdata_request) {
+ dict_unref (xdata_request);
+ }
+
+ if (migrate_data)
+ dict_unref (migrate_data);
+
+
+ gfdb_methods.gfdb_query_record_free (query_record);
+ query_record = NULL;
+
+ return total_status;
+}
+
+
+/* This is the call back function per record/file from data base */
+static int
+tier_gf_query_callback (gfdb_query_record_t *gfdb_query_record,
+ void *_args) {
+ int ret = -1;
+ query_cbk_args_t *query_cbk_args = _args;
+
+ GF_VALIDATE_OR_GOTO ("tier", query_cbk_args, out);
+ GF_VALIDATE_OR_GOTO ("tier", query_cbk_args->defrag, out);
+ GF_VALIDATE_OR_GOTO ("tier", (query_cbk_args->query_fd > 0), out);
+
+ ret = gfdb_methods.gfdb_write_query_record (query_cbk_args->query_fd,
+ gfdb_query_record);
+ if (ret) {
+ gf_msg ("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
+ "Failed writing query record to query file");
+ goto out;
+ }
+
+ pthread_mutex_lock (&dm_stat_mutex);
+ query_cbk_args->defrag->num_files_lookedup++;
+ pthread_mutex_unlock (&dm_stat_mutex);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+
+
+/* Create query file in tier process */
+static int
+tier_process_self_query (tier_brick_list_t *local_brick, void *args)
+{
+ int ret = -1;
+ char *db_path = NULL;
+ query_cbk_args_t *query_cbk_args = NULL;
+ xlator_t *this = NULL;
+ gfdb_conn_node_t *conn_node = NULL;
+ dict_t *params_dict = NULL;
+ dict_t *ctr_ipc_dict = NULL;
+ gfdb_brick_info_t *gfdb_brick_info = args;
+
+ /*Init of all the essentials*/
+ GF_VALIDATE_OR_GOTO ("tier", gfdb_brick_info , out);
+ query_cbk_args = gfdb_brick_info->_query_cbk_args;
+
+ GF_VALIDATE_OR_GOTO ("tier", query_cbk_args->this, out);
+ this = query_cbk_args->this;
+
+ GF_VALIDATE_OR_GOTO (this->name,
+ gfdb_brick_info->_query_cbk_args, out);
+
+ GF_VALIDATE_OR_GOTO (this->name, local_brick, out);
+
+ GF_VALIDATE_OR_GOTO (this->name, local_brick->xlator, out);
+
+ GF_VALIDATE_OR_GOTO (this->name, local_brick->brick_db_path, out);
+
+ db_path = local_brick->brick_db_path;
+
+ /*Preparing DB parameters before init_db i.e getting db connection*/
+ params_dict = dict_new ();
+ if (!params_dict) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR,
+ "DB Params cannot initialized");
+ goto out;
+ }
+ SET_DB_PARAM_TO_DICT(this->name, params_dict,
+ (char *) gfdb_methods.get_db_path_key(),
+ db_path, ret, out);
+
+ /*Get the db connection*/
+ conn_node = gfdb_methods.init_db ((void *)params_dict, dht_tier_db_type);
+ if (!conn_node) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR,
+ "FATAL: Failed initializing db operations");
+ goto out;
+ }
+
+ /* Query for eligible files from db */
+ query_cbk_args->query_fd = open (local_brick->qfile_path,
+ O_WRONLY | O_CREAT | O_APPEND,
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
+ if (query_cbk_args->query_fd < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ DHT_MSG_LOG_TIER_ERROR,
+ "Failed to open query file %s",
+ local_brick->qfile_path);
+ goto out;
+ }
+ if (!gfdb_brick_info->_gfdb_promote) {
+ if (query_cbk_args->defrag->write_freq_threshold == 0 &&
+ query_cbk_args->defrag->read_freq_threshold == 0) {
+ ret = gfdb_methods.find_unchanged_for_time (
+ conn_node,
+ tier_gf_query_callback,
+ (void *)query_cbk_args,
+ gfdb_brick_info->time_stamp);
+ } else {
+ ret = gfdb_methods.find_unchanged_for_time_freq (
+ conn_node,
+ tier_gf_query_callback,
+ (void *)query_cbk_args,
+ gfdb_brick_info->time_stamp,
+ query_cbk_args->defrag->
+ write_freq_threshold,
+ query_cbk_args->defrag->
+ read_freq_threshold,
+ _gf_false);
+ }
+ } else {
+ if (query_cbk_args->defrag->write_freq_threshold == 0 &&
+ query_cbk_args->defrag->read_freq_threshold == 0) {
+ ret = gfdb_methods.find_recently_changed_files (
+ conn_node,
+ tier_gf_query_callback,
+ (void *)query_cbk_args,
+ gfdb_brick_info->time_stamp);
+ } else {
+ ret = gfdb_methods.find_recently_changed_files_freq (
+ conn_node,
+ tier_gf_query_callback,
+ (void *)query_cbk_args,
+ gfdb_brick_info->time_stamp,
+ query_cbk_args->defrag->
+ write_freq_threshold,
+ query_cbk_args->defrag->read_freq_threshold,
+ _gf_false);
+ }
+ }
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR,
+ "FATAL: query from db failed");
+ goto out;
+ }
+
+ /*Clear the heat on the DB entries*/
+ /*Preparing ctr_ipc_dict*/
+ ctr_ipc_dict = dict_new ();
+ if (!ctr_ipc_dict) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR,
+ "ctr_ipc_dict cannot initialized");
+ goto out;
+ }
+
+ SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_dict,
+ GFDB_IPC_CTR_KEY, GFDB_IPC_CTR_CLEAR_OPS,
+ ret, out);
+
+ ret = syncop_ipc (local_brick->xlator, GF_IPC_TARGET_CTR, ctr_ipc_dict,
+ NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR, "Failed clearing the heat "
+ "on db %s error %d", local_brick->brick_db_path, ret);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (params_dict) {
+ dict_unref (params_dict);
+ params_dict = NULL;
+ }
+
+ if (ctr_ipc_dict) {
+ dict_unref (ctr_ipc_dict);
+ ctr_ipc_dict = NULL;
+ }
+
+ if (query_cbk_args && query_cbk_args->query_fd >= 0) {
+ sys_close (query_cbk_args->query_fd);
+ query_cbk_args->query_fd = -1;
+ }
+ gfdb_methods.fini_db (conn_node);
+
+ return ret;
+}
+
+
+
+
+
+/*Ask CTR to create the query file*/
+static int
+tier_process_ctr_query (tier_brick_list_t *local_brick, void *args)
+{
+ int ret = -1;
+ query_cbk_args_t *query_cbk_args = NULL;
+ xlator_t *this = NULL;
+ dict_t *ctr_ipc_in_dict = NULL;
+ dict_t *ctr_ipc_out_dict = NULL;
+ gfdb_brick_info_t *gfdb_brick_info = args;
+ gfdb_ipc_ctr_params_t *ipc_ctr_params = NULL;
+ int count = 0;
+
+ /*Init of all the essentials*/
+ GF_VALIDATE_OR_GOTO ("tier", gfdb_brick_info , out);
+ query_cbk_args = gfdb_brick_info->_query_cbk_args;
+
+ GF_VALIDATE_OR_GOTO ("tier", query_cbk_args->this, out);
+ this = query_cbk_args->this;
+
+ GF_VALIDATE_OR_GOTO (this->name,
+ gfdb_brick_info->_query_cbk_args, out);
+
+ GF_VALIDATE_OR_GOTO (this->name, local_brick, out);
+
+ GF_VALIDATE_OR_GOTO (this->name, local_brick->xlator, out);
+
+ GF_VALIDATE_OR_GOTO (this->name, local_brick->brick_db_path, out);
+
+
+ /*Preparing ctr_ipc_in_dict*/
+ ctr_ipc_in_dict = dict_new ();
+ if (!ctr_ipc_in_dict) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR,
+ "ctr_ipc_in_dict cannot initialized");
+ goto out;
+ }
+
+ ipc_ctr_params = GF_CALLOC (1, sizeof (gfdb_ipc_ctr_params_t),
+ gf_tier_mt_ipc_ctr_params_t);
+ if (!ipc_ctr_params) {
+ goto out;
+ }
+
+ /* set all the query params*/
+ ipc_ctr_params->is_promote = gfdb_brick_info->_gfdb_promote;
+ ipc_ctr_params->write_freq_threshold = query_cbk_args->
+ defrag->write_freq_threshold;
+ ipc_ctr_params->read_freq_threshold = query_cbk_args->
+ defrag->read_freq_threshold;
+ memcpy (&ipc_ctr_params->time_stamp,
+ gfdb_brick_info->time_stamp,
+ sizeof (gfdb_time_t));
+
+ SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_in_dict,
+ GFDB_IPC_CTR_KEY, GFDB_IPC_CTR_QUERY_OPS,
+ ret, out);
+
+
+ SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_in_dict,
+ GFDB_IPC_CTR_GET_QFILE_PATH,
+ local_brick->qfile_path,
+ ret, out);
+
+ ret = dict_set_bin (ctr_ipc_in_dict, GFDB_IPC_CTR_GET_QUERY_PARAMS,
+ ipc_ctr_params, sizeof (*ipc_ctr_params));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED,
+ "Failed setting %s to params dictionary",
+ GFDB_IPC_CTR_GET_QUERY_PARAMS);
+ GF_FREE (ipc_ctr_params);
+ goto out;
+ }
+
+ ret = syncop_ipc (local_brick->xlator, GF_IPC_TARGET_CTR,
+ ctr_ipc_in_dict, &ctr_ipc_out_dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_IPC_TIER_ERROR, "Failed query on %s ret %d",
+ local_brick->brick_db_path, ret);
+ goto out;
+ }
+
+ ret = dict_get_int32(ctr_ipc_out_dict, GFDB_IPC_CTR_RET_QUERY_COUNT,
+ &count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR, "Failed getting count "
+ "of records on %s",
+ local_brick->brick_db_path);
+ goto out;
+ }
+
+ if (count < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR, "Failed query on %s",
+ local_brick->brick_db_path);
+ ret = -1;
+ goto out;
+ }
+
+ pthread_mutex_lock (&dm_stat_mutex);
+ query_cbk_args->defrag->num_files_lookedup = count;
+ pthread_mutex_unlock (&dm_stat_mutex);
+
+ ret = 0;
+out:
+
+ if (ctr_ipc_in_dict) {
+ dict_unref(ctr_ipc_in_dict);
+ ctr_ipc_in_dict = NULL;
+ }
+
+ if (ctr_ipc_out_dict) {
+ dict_unref(ctr_ipc_out_dict);
+ ctr_ipc_out_dict = NULL;
+ }
+
+ return ret;
+}
+
+
+
+
+/* This is the call back function for each brick from hot/cold bricklist
+ * It picks up each bricks db and queries for eligible files for migration.
+ * The list of eligible files are populated in appropriate query files*/
+static int
+tier_process_brick (tier_brick_list_t *local_brick, void *args) {
+ int ret = -1;
+ dict_t *ctr_ipc_in_dict = NULL;
+ dict_t *ctr_ipc_out_dict = NULL;
+ char *strval = NULL;
+
+ GF_VALIDATE_OR_GOTO ("tier", local_brick, out);
+
+ GF_VALIDATE_OR_GOTO ("tier", local_brick->xlator, out);
+
+ if (dht_tier_db_type == GFDB_SQLITE3) {
+
+ /*Preparing ctr_ipc_in_dict*/
+ ctr_ipc_in_dict = dict_new ();
+ if (!ctr_ipc_in_dict) {
+ gf_msg ("tier", GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR,
+ "ctr_ipc_in_dict cannot initialized");
+ goto out;
+ }
+
+ ret = dict_set_str (ctr_ipc_in_dict, GFDB_IPC_CTR_KEY,
+ GFDB_IPC_CTR_GET_DB_PARAM_OPS);
+ if (ret) {
+ gf_msg ("tier", GF_LOG_ERROR, 0,\
+ LG_MSG_SET_PARAM_FAILED, "Failed to set %s "
+ "to params dictionary", GFDB_IPC_CTR_KEY);
+ goto out;
+ }
+
+ ret = dict_set_str (ctr_ipc_in_dict,
+ GFDB_IPC_CTR_GET_DB_PARAM_OPS, "");
+ if (ret) {
+ gf_msg ("tier", GF_LOG_ERROR, 0,\
+ LG_MSG_SET_PARAM_FAILED, "Failed to set %s "
+ "to params dictionary",
+ GFDB_IPC_CTR_GET_DB_PARAM_OPS);
+ goto out;
+ }
+
+ ret = dict_set_str (ctr_ipc_in_dict,
+ GFDB_IPC_CTR_GET_DB_KEY, "journal_mode");
+ if (ret) {
+ gf_msg ("tier", GF_LOG_ERROR, 0,
+ LG_MSG_SET_PARAM_FAILED, "Failed to set %s "
+ "to params dictionary",
+ GFDB_IPC_CTR_GET_DB_KEY);\
+ goto out;
+ }
+
+
+
+ ret = syncop_ipc (local_brick->xlator, GF_IPC_TARGET_CTR,
+ ctr_ipc_in_dict, &ctr_ipc_out_dict);
+ if (ret || ctr_ipc_out_dict == NULL) {
+ gf_msg ("tier", GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR, "Failed to get "
+ "journal_mode of sql db %s",
+ local_brick->brick_db_path);
+ goto out;
+ }
+
+ ret = dict_get_str (ctr_ipc_out_dict, "journal_mode", &strval);
+ if (ret) {
+ gf_msg ("tier", GF_LOG_ERROR, 0,
+ LG_MSG_GET_PARAM_FAILED, "Failed to get %s "
+ "from params dictionary"
+ "journal_mode", strval);
+ goto out;
+ }
+
+ if (strval && (strncmp(strval, "wal", strlen ("wal")) == 0)) {
+ ret = tier_process_self_query (local_brick, args);
+ if (ret) {
+ goto out;
+ }
+ } else {
+ ret = tier_process_ctr_query (local_brick, args);
+ if (ret) {
+ goto out;
+ }
+ }
+ ret = 0;
+
+ } else {
+ ret = tier_process_self_query (local_brick, args);
+ if (ret) {
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ if (ctr_ipc_in_dict)
+ dict_unref (ctr_ipc_in_dict);
+
+ if (ctr_ipc_out_dict)
+ dict_unref (ctr_ipc_out_dict);
+
+ return ret;
+}
+
+
+
+
+static int
+tier_build_migration_qfile (migration_args_t *args,
+ query_cbk_args_t *query_cbk_args,
+ gf_boolean_t is_promotion)
+{
+ gfdb_time_t current_time;
+ gfdb_brick_info_t gfdb_brick_info;
+ gfdb_time_t time_in_past;
+ int ret = -1;
+ tier_brick_list_t *local_brick = NULL;
+ int i = 0;
+ time_in_past.tv_sec = args->freq_time;
+ time_in_past.tv_usec = 0;
+
+ ret = gettimeofday (&current_time, NULL);
+ if (ret == -1) {
+ gf_msg (args->this->name, GF_LOG_ERROR, errno,
+ DHT_MSG_SYS_CALL_GET_TIME_FAILED,
+ "Failed to get current time");
+ goto out;
+ }
+ time_in_past.tv_sec = current_time.tv_sec - time_in_past.tv_sec;
+
+ /* The migration daemon may run a varrying numberof usec after the sleep */
+ /* call triggers. A file may be registered in CTR some number of usec X */
+ /* after the daemon started and missed in the subsequent cycle if the */
+ /* daemon starts Y usec after the period in seconds where Y>X. Normalize */
+ /* away this problem by always setting usec to 0. */
+ time_in_past.tv_usec = 0;
+
+ gfdb_brick_info.time_stamp = &time_in_past;
+ gfdb_brick_info._gfdb_promote = is_promotion;
+ gfdb_brick_info._query_cbk_args = query_cbk_args;
+
+ list_for_each_entry (local_brick, args->brick_list, list) {
+
+ /* Construct query file path for this brick
+ * i.e
+ * /var/run/gluster/xlator_name/
+ * {promote/demote}-brickname-indexinbricklist
+ * So that no two query files will have same path even
+ * bricks have the same name
+ * */
+ snprintf (local_brick->qfile_path, PATH_MAX , "%s-%s-%d",
+ GET_QFILE_PATH (gfdb_brick_info._gfdb_promote),
+ local_brick->brick_name, i);
+
+ /* Delete any old query files for this brick */
+ sys_unlink (local_brick->qfile_path);
+
+ ret = tier_process_brick (local_brick,
+ &gfdb_brick_info);
+ if (ret) {
+ gf_msg (args->this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_BRICK_QUERY_FAILED,
+ "Brick %s query failed\n",
+ local_brick->brick_db_path);
+ }
+ i++;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+tier_migrate_files_using_qfile (migration_args_t *comp,
+ query_cbk_args_t *query_cbk_args)
+{
+ int ret = -1;
+ tier_brick_list_t *local_brick = NULL;
+ tier_brick_list_t *temp = NULL;
+ char query_file_path_err[PATH_MAX] = "";
+ struct tm tm = {0};
+ gfdb_time_t current_time = {0};
+ char time_str[256] = {0};
+ char time_format[20] = "%Y-%m-%d-%H-%M-%S";
+ ssize_t qfile_array_size = 0;
+ int count = 0;
+ int temp_fd = 0;
+ gf_tier_conf_t *tier_conf = NULL;
+
+ tier_conf = &(query_cbk_args->defrag->tier_conf);
+
+ /* Time format for error query files */
+ gettimeofday (&current_time, NULL);
+ gmtime_r (&current_time.tv_sec, &tm);
+ strftime (time_str, 256, time_format, &tm);
+
+ /* Build the qfile list */
+ list_for_each_entry_safe (local_brick, temp, comp->brick_list, list) {
+ qfile_array_size++;
+ }
+ query_cbk_args->qfile_array = qfile_array_new (qfile_array_size);
+ if (!query_cbk_args->qfile_array) {
+ gf_msg ("tier", GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR, "Failed to create new "
+ "qfile_array");
+ goto out;
+ }
+
+ /*Open all qfiles*/
+ count = 0;
+ query_cbk_args->qfile_array->exhausted_count = 0;
+ list_for_each_entry_safe (local_brick, temp, comp->brick_list, list) {
+ temp_fd = query_cbk_args->qfile_array->fd_array[count];
+ temp_fd = open (local_brick->qfile_path, O_RDONLY,
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
+ if (temp_fd < 0) {
+ gf_msg ("tier", GF_LOG_ERROR, errno,
+ DHT_MSG_LOG_TIER_ERROR, "Failed to open "
+ "%s to the query file",
+ local_brick->qfile_path);
+ query_cbk_args->qfile_array->exhausted_count++;
+ }
+ query_cbk_args->qfile_array->fd_array[count] = temp_fd;
+ count++;
+ }
+
+ /* Moving the query file index to the next, so that we won't the same
+ * query file every cycle as the first one */
+ query_cbk_args->qfile_array->next_index =
+ (query_cbk_args->is_promotion) ?
+ tier_conf->last_promote_qfile_index :
+ tier_conf->last_demote_qfile_index;
+ shift_next_index (query_cbk_args->qfile_array);
+ if (query_cbk_args->is_promotion) {
+ tier_conf->last_promote_qfile_index =
+ query_cbk_args->qfile_array->next_index;
+ } else {
+ tier_conf->last_demote_qfile_index =
+ query_cbk_args->qfile_array->next_index;
+ }
+
+ /* Migrate files using query file list */
+ ret = tier_migrate_using_query_file ((void *)query_cbk_args);
+out:
+ qfile_array_free (query_cbk_args->qfile_array);
+
+ /* If there is an error rename all the query files to .err files
+ * with a timestamp for better debugging */
+ if (ret) {
+ list_for_each_entry_safe (local_brick, temp, comp->brick_list,
+ list) {
+ /* rename error qfile*/
+ snprintf (query_file_path_err, PATH_MAX, "%s-%s.err",
+ local_brick->qfile_path, time_str);
+ sys_rename (local_brick->qfile_path,
+ query_file_path_err);
+ }
+ }
+
+ query_cbk_args->qfile_array = NULL;
+
+ return ret;
+}
+
+
+
+int
+tier_demote (migration_args_t *demotion_args)
+{
+ query_cbk_args_t query_cbk_args;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("tier", demotion_args, out);
+ GF_VALIDATE_OR_GOTO ("tier", demotion_args->this, out);
+ GF_VALIDATE_OR_GOTO (demotion_args->this->name,
+ demotion_args->brick_list, out);
+ GF_VALIDATE_OR_GOTO (demotion_args->this->name,
+ demotion_args->defrag, out);
+
+ THIS = demotion_args->this;
+
+ query_cbk_args.this = demotion_args->this;
+ query_cbk_args.defrag = demotion_args->defrag;
+ query_cbk_args.is_promotion = 0;
+
+ /*Build the query file using bricklist*/
+ ret = tier_build_migration_qfile (demotion_args, &query_cbk_args,
+ _gf_false);
+ if (ret)
+ goto out;
+
+ /* Migrate files using the query file */
+ ret = tier_migrate_files_using_qfile (demotion_args,
+ &query_cbk_args);
+ if (ret)
+ goto out;
+
+out:
+ demotion_args->return_value = ret;
+ return ret;
+}
+
+
+int
+tier_promote (migration_args_t *promotion_args)
+{
+ int ret = -1;
+ query_cbk_args_t query_cbk_args;
+
+ GF_VALIDATE_OR_GOTO ("tier", promotion_args->this, out);
+ GF_VALIDATE_OR_GOTO (promotion_args->this->name,
+ promotion_args->brick_list, out);
+ GF_VALIDATE_OR_GOTO (promotion_args->this->name,
+ promotion_args->defrag, out);
+
+ THIS = promotion_args->this;
+
+ query_cbk_args.this = promotion_args->this;
+ query_cbk_args.defrag = promotion_args->defrag;
+ query_cbk_args.is_promotion = 1;
+
+ /*Build the query file using bricklist*/
+ ret = tier_build_migration_qfile (promotion_args, &query_cbk_args,
+ _gf_true);
+ if (ret)
+ goto out;
+
+ /* Migrate files using the query file */
+ ret = tier_migrate_files_using_qfile (promotion_args, &query_cbk_args);
+ if (ret)
+ goto out;
+
+out:
+ promotion_args->return_value = ret;
+ return ret;
+}
+
+static int
+tier_get_bricklist (xlator_t *xl, struct list_head *local_bricklist_head)
+{
+ xlator_list_t *child = NULL;
+ char *rv = NULL;
+ char *rh = NULL;
+ char localhost[256] = {0};
+ char *brickname = NULL;
+ char db_name[PATH_MAX] = "";
+ int ret = 0;
+ tier_brick_list_t *local_brick = NULL;
+
+ GF_VALIDATE_OR_GOTO ("tier", xl, out);
+ GF_VALIDATE_OR_GOTO ("tier", local_bricklist_head, out);
+
+ gethostname (localhost, sizeof (localhost));
+
+ /*
+ * This function obtains remote subvolumes and filters out only
+ * those running on the same node as the tier daemon.
+ */
+ if (strcmp(xl->type, "protocol/client") == 0) {
+ ret = dict_get_str (xl->options, "remote-host", &rh);
+ if (ret < 0)
+ goto out;
+
+ if (gf_is_local_addr (rh)) {
+
+ local_brick = GF_CALLOC (1, sizeof(tier_brick_list_t),
+ gf_tier_mt_bricklist_t);
+ if (!local_brick) {
+ goto out;
+ }
+
+ ret = dict_get_str (xl->options, "remote-subvolume",
+ &rv);
+ if (ret < 0)
+ goto out;
+
+ brickname = strrchr(rv, '/') + 1;
+ snprintf(db_name, sizeof(db_name), "%s.db",
+ brickname);
+
+ local_brick->brick_db_path =
+ GF_CALLOC (PATH_MAX, 1, gf_common_mt_char);
+ if (!local_brick->brick_db_path) {
+ gf_msg ("tier", GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_STATUS,
+ "Failed to allocate memory for"
+ " bricklist.");
+ goto out;
+ }
+
+ snprintf(local_brick->brick_db_path,
+ PATH_MAX, "%s/%s/%s", rv,
+ GF_HIDDEN_PATH, db_name);
+
+ local_brick->xlator = xl;
+
+ snprintf (local_brick->brick_name,
+ NAME_MAX, "%s", brickname);
+
+ list_add_tail (&(local_brick->list),
+ local_bricklist_head);
+
+ ret = 0;
+ goto out;
+ }
+ }
+
+ for (child = xl->children; child; child = child->next) {
+ ret = tier_get_bricklist (child->xlator, local_bricklist_head);
+ if (ret) {
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+
+ if (ret) {
+ if (local_brick) {
+ GF_FREE (local_brick->brick_db_path);
+ }
+ GF_FREE (local_brick);
+ }
+
+ return ret;
+}
+
+int
+tier_get_freq_demote (gf_tier_conf_t *tier_conf)
+{
+ if ((tier_conf->mode == TIER_MODE_WM) &&
+ (tier_conf->watermark_last == TIER_WM_HI))
+ return DEFAULT_DEMOTE_DEGRADED;
+ else
+ return tier_conf->tier_demote_frequency;
+}
+
+int
+tier_get_freq_promote (gf_tier_conf_t *tier_conf)
+{
+ return tier_conf->tier_promote_frequency;
+}
+
+static int
+tier_check_demote (gfdb_time_t current_time, int freq)
+{
+ return ((current_time.tv_sec % freq) == 0) ?
+ _gf_true : _gf_false;
+}
+
+static gf_boolean_t
+tier_check_promote (gf_tier_conf_t *tier_conf,
+ gfdb_time_t current_time,
+ int freq)
+{
+ if ((tier_conf->mode == TIER_MODE_WM) &&
+ (tier_conf->watermark_last == TIER_WM_HI))
+ return _gf_false;
+
+ else
+ return ((current_time.tv_sec % freq) == 0) ?
+ _gf_true : _gf_false;
+}
+
+
+
+
+void
+clear_bricklist (struct list_head *brick_list)
+{
+ tier_brick_list_t *local_brick = NULL;
+ tier_brick_list_t *temp = NULL;
+
+ if (list_empty(brick_list)) {
+ return;
+ }
+
+ list_for_each_entry_safe (local_brick, temp, brick_list, list) {
+ list_del (&local_brick->list);
+ GF_FREE (local_brick->brick_db_path);
+ GF_FREE (local_brick);
+ }
+}
+
+
+static void
+set_brick_list_qpath (struct list_head *brick_list, gf_boolean_t is_cold)
+{
+
+ tier_brick_list_t *local_brick = NULL;
+ int i = 0;
+
+ GF_VALIDATE_OR_GOTO ("tier", brick_list, out);
+
+ list_for_each_entry (local_brick, brick_list, list) {
+
+ /* Construct query file path for this brick
+ * i.e
+ * /var/run/gluster/xlator_name/
+ * {promote/demote}-brickname-indexinbricklist
+ * So that no two query files will have same path even
+ * bricks have the same name
+ * */
+ snprintf (local_brick->qfile_path, PATH_MAX , "%s-%s-%d",
+ GET_QFILE_PATH (is_cold),
+ local_brick->brick_name, i);
+ i++;
+ }
+out:
+ return;
+}
+
+/*
+ * Main tiering loop. This is called from the promotion and the
+ * demotion threads spawned in tier_start().
+ *
+ * Every second, wake from sleep to perform tasks.
+ * 1. Check trigger to migrate data.
+ * 2. Check for state changes (pause, unpause, stop).
+ */
+static void
+*tier_run (void *in_args)
+{
+ dht_conf_t *conf = NULL;
+ gfdb_time_t current_time = { 0 };
+ int freq = 0;
+ int ret = 0;
+ xlator_t *any = NULL;
+ xlator_t *xlator = NULL;
+ gf_tier_conf_t *tier_conf = NULL;
+ loc_t root_loc = { 0 };
+ int check_watermark = 0;
+ gf_defrag_info_t *defrag = NULL;
+ xlator_t *this = NULL;
+ migration_args_t *args = in_args;
+
+ GF_VALIDATE_OR_GOTO ("tier", args, out);
+ GF_VALIDATE_OR_GOTO ("tier", args->brick_list, out);
+
+ this = args->this;
+ GF_VALIDATE_OR_GOTO ("tier", this, out);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO ("tier", conf, out);
+
+ defrag = conf->defrag;
+ GF_VALIDATE_OR_GOTO ("tier", defrag, out);
+
+ if (list_empty (args->brick_list)) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_LOG_TIER_ERROR,
+ "Brick list for tier is empty. Exiting.");
+ goto out;
+ }
+
+ defrag->defrag_status = GF_DEFRAG_STATUS_STARTED;
+ tier_conf = &defrag->tier_conf;
+
+ dht_build_root_loc (defrag->root_inode, &root_loc);
+
+ while (1) {
+
+ /*
+ * Check if a graph switch occured. If so, stop migration
+ * thread. It will need to be restarted manually.
+ */
+ any = THIS->ctx->active->first;
+ xlator = xlator_search_by_name (any, this->name);
+
+ if (xlator != this) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_LOG_TIER_STATUS,
+ "Detected graph switch. Exiting migration daemon.");
+ goto out;
+ }
+
+ gf_defrag_check_pause_tier (tier_conf);
+
+ sleep(1);
+
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+ ret = 1;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR,
+ "defrag->defrag_status != "
+ "GF_DEFRAG_STATUS_STARTED");
+ goto out;
+ }
+
+ if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER) {
+ ret = 0;
+ defrag->defrag_status =
+ GF_DEFRAG_STATUS_COMPLETE;
+ gf_msg (this->name, GF_LOG_DEBUG, 0,
+ DHT_MSG_LOG_TIER_ERROR,
+ "defrag->defrag_cmd == "
+ "GF_DEFRAG_CMD_START_DETACH_TIER");
+ goto out;
+ }
+
+ if (gf_defrag_get_pause_state (&defrag->tier_conf) != TIER_RUNNING)
+ continue;
+
+
+ /* To have proper synchronization amongst all
+ * brick holding nodes, so that promotion and demotions
+ * start atomicly w.r.t promotion/demotion frequency
+ * period, all nodes should have thier system time
+ * in-sync with each other either manually set or
+ * using a NTP server*/
+ ret = gettimeofday (&current_time, NULL);
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ DHT_MSG_SYS_CALL_GET_TIME_FAILED,
+ "Failed to get current time");
+ goto out;
+ }
+
+ check_watermark++;
+
+ if (check_watermark >= WM_INTERVAL) {
+ check_watermark = 0;
+ ret = tier_check_watermark (this, &root_loc);
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_CRITICAL, errno,
+ DHT_MSG_LOG_TIER_ERROR,
+ "Failed to get watermark");
+ continue;
+ }
+ }
+
+ if (args->is_promotion) {
+
+ freq = tier_get_freq_promote (tier_conf);
+
+ if (tier_check_promote (tier_conf, current_time, freq)) {
+ args->freq_time = freq;
+ ret = tier_promote (args);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR,
+ "Promotion failed");
+ }
+ }
+
+ } else {
+
+ freq = tier_get_freq_demote (tier_conf);
+
+ if (tier_check_demote (current_time, freq)) {
+ args->freq_time = freq;
+ ret = tier_demote (args);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR,
+ "Demotion failed");
+ }
+ }
+
+ }
+
+ /* Check the statfs immediately after the processing threads
+ return */
+ check_watermark = WM_INTERVAL;
+ }
+
+ ret = 0;
+out:
+
+ args->return_value = ret;
+
+ return NULL;
+}
+
+int
+tier_start (xlator_t *this, gf_defrag_info_t *defrag)
+{
+ pthread_t promote_thread;
+ pthread_t demote_thread;
+ int ret = -1;
+ struct list_head bricklist_hot = { 0 };
+ struct list_head bricklist_cold = { 0 };
+ migration_args_t promotion_args = { 0 };
+ migration_args_t demotion_args = { 0 };
+ dht_conf_t *conf = NULL;
+
+ INIT_LIST_HEAD ((&bricklist_hot));
+ INIT_LIST_HEAD ((&bricklist_cold));
+
+ conf = this->private;
+
+ tier_get_bricklist (conf->subvolumes[1], &bricklist_hot);
+ set_brick_list_qpath (&bricklist_hot, _gf_false);
+
+ demotion_args.this = this;
+ demotion_args.brick_list = &bricklist_hot;
+ demotion_args.defrag = defrag;
+ demotion_args.is_promotion = _gf_false;
+
+ ret = pthread_create (&demote_thread,
+ NULL, &tier_run,
+ &demotion_args);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR,
+ "Failed to start demotion thread.");
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+ goto cleanup;
+ }
+
+ tier_get_bricklist (conf->subvolumes[0], &bricklist_cold);
+ set_brick_list_qpath (&bricklist_cold, _gf_true);
+
+ promotion_args.this = this;
+ promotion_args.brick_list = &bricklist_cold;
+ promotion_args.defrag = defrag;
+ promotion_args.is_promotion = _gf_true;
+
+ ret = pthread_create (&promote_thread,
+ NULL, &tier_run,
+ &promotion_args);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR,
+ "Failed to start promotion thread.");
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+ goto waitforspawned;
+ }
+
+ pthread_join (promote_thread, NULL);
+
+waitforspawned:
+ pthread_join (demote_thread, NULL);
+
+cleanup:
+ clear_bricklist (&bricklist_cold);
+ clear_bricklist (&bricklist_hot);
+
+ return ret;
+}
+
+int32_t
+tier_migration_needed (xlator_t *this)
+{
+ gf_defrag_info_t *defrag = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = 0;
+
+ conf = this->private;
+
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
+ GF_VALIDATE_OR_GOTO (this->name, conf->defrag, out);
+
+ defrag = conf->defrag;
+
+ if ((defrag->cmd == GF_DEFRAG_CMD_START_TIER) ||
+ (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER))
+ ret = 1;
+out:
+ return ret;
+}
+
+int32_t
+tier_migration_get_dst (xlator_t *this, dht_local_t *local)
+{
+ dht_conf_t *conf = NULL;
+ int32_t ret = -1;
+ gf_defrag_info_t *defrag = NULL;
+
+ GF_VALIDATE_OR_GOTO ("tier", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+
+ conf = this->private;
+
+ defrag = conf->defrag;
+
+ if (defrag && defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER) {
+ local->rebalance.target_node = conf->subvolumes[0];
+
+ } else if (conf->subvolumes[0] == local->cached_subvol)
+ local->rebalance.target_node =
+ conf->subvolumes[1];
+ else
+ local->rebalance.target_node =
+ conf->subvolumes[0];
+
+ if (local->rebalance.target_node)
+ ret = 0;
+
+out:
+ return ret;
+}
+
+xlator_t *
+tier_search (xlator_t *this, dht_layout_t *layout, const char *name)
+{
+ xlator_t *subvol = NULL;
+ dht_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO ("tier", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+
+ conf = this->private;
+
+ subvol = TIER_HASHED_SUBVOL;
+
+ out:
+ return subvol;
+}
+
+
+static int
+tier_load_externals (xlator_t *this)
+{
+ int ret = -1;
+ char *libpathfull = (LIBDIR "/libgfdb.so.0");
+ get_gfdb_methods_t get_gfdb_methods;
+
+ GF_VALIDATE_OR_GOTO ("this", this, out);
+
+ libhandle = dlopen (libpathfull, RTLD_NOW);
+ if (!libhandle) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR,
+ "Error loading libgfdb.so %s\n", dlerror());
+ ret = -1;
+ goto out;
+ }
+
+ get_gfdb_methods = dlsym (libhandle, "get_gfdb_methods");
+ if (!get_gfdb_methods) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR,
+ "Error loading get_gfdb_methods()");
+ ret = -1;
+ goto out;
+ }
+
+ get_gfdb_methods (&gfdb_methods);
+
+ ret = 0;
+
+out:
+ if (ret && libhandle)
+ dlclose (libhandle);
+
+ return ret;
+}
+
+static
+int tier_validate_mode (char *mode)
+{
+ int ret = -1;
+
+ if (strcmp (mode, "test") == 0) {
+ ret = TIER_MODE_TEST;
+ } else {
+ ret = TIER_MODE_WM;
+ }
+
+ return ret;
+}
+
+
+int
+tier_init_methods (xlator_t *this)
+{
+ int ret = -1;
+ dht_conf_t *conf = NULL;
+ dht_methods_t *methods = NULL;
+
+ GF_VALIDATE_OR_GOTO ("tier", this, err);
+
+ conf = this->private;
+
+ methods = &(conf->methods);
+
+ methods->migration_get_dst_subvol = tier_migration_get_dst;
+ methods->migration_other = tier_start;
+ methods->migration_needed = tier_migration_needed;
+ methods->layout_search = tier_search;
+
+ ret = 0;
+err:
+ return ret;
+}
+
+
+
+int
+tier_init (xlator_t *this)
+{
+ int ret = -1;
+ int freq = 0;
+ int maxsize = 0;
+ dht_conf_t *conf = NULL;
+ gf_defrag_info_t *defrag = NULL;
+ char *voldir = NULL;
+ char *mode = NULL;
+ char *paused = NULL;
+
+ ret = dht_init (this);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR,
+ "tier_init failed");
+ goto out;
+ }
+
+ conf = this->private;
+
+ ret = tier_init_methods (this);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR,
+ "tier_init_methods failed");
+ goto out;
+ }
+
+ if (conf->subvolume_cnt != 2) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR,
+ "Invalid number of subvolumes %d", conf->subvolume_cnt);
+ goto out;
+ }
+
+ /* if instatiated from client side initialization is complete. */
+ if (!conf->defrag) {
+ ret = 0;
+ goto out;
+ }
+
+ /* if instatiated from server side, load db libraries */
+ ret = tier_load_externals (this);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR,
+ "Could not load externals. Aborting");
+ goto out;
+ }
+
+ defrag = conf->defrag;
+
+ defrag->tier_conf.last_demote_qfile_index = 0;
+ defrag->tier_conf.last_promote_qfile_index = 0;
+
+ defrag->tier_conf.is_tier = 1;
+
+ ret = dict_get_int32 (this->options,
+ "tier-max-promote-file-size", &maxsize);
+ if (ret) {
+ maxsize = 0;
+ }
+
+ defrag->tier_conf.tier_max_promote_size = maxsize;
+
+ ret = dict_get_int32 (this->options,
+ "tier-promote-frequency", &freq);
+ if (ret) {
+ freq = DEFAULT_PROMOTE_FREQ_SEC;
+ }
+
+ defrag->tier_conf.tier_promote_frequency = freq;
+
+ ret = dict_get_int32 (this->options,
+ "tier-demote-frequency", &freq);
+ if (ret) {
+ freq = DEFAULT_DEMOTE_FREQ_SEC;
+ }
+
+ defrag->tier_conf.tier_demote_frequency = freq;
+
+ ret = dict_get_int32 (this->options,
+ "watermark-hi", &freq);
+ if (ret) {
+ freq = DEFAULT_WM_HI;
+ }
+
+ defrag->tier_conf.watermark_hi = freq;
+
+ ret = dict_get_int32 (this->options,
+ "watermark-low", &freq);
+ if (ret) {
+ freq = DEFAULT_WM_LOW;
+ }
+
+ defrag->tier_conf.watermark_low = freq;
+
+ ret = dict_get_int32 (this->options,
+ "write-freq-threshold", &freq);
+ if (ret) {
+ freq = DEFAULT_WRITE_FREQ_SEC;
+ }
+
+ defrag->write_freq_threshold = freq;
+
+ ret = dict_get_int32 (this->options,
+ "read-freq-threshold", &freq);
+ if (ret) {
+ freq = DEFAULT_READ_FREQ_SEC;
+ }
+
+ defrag->read_freq_threshold = freq;
+
+ ret = dict_get_int32 (this->options,
+ "tier-max-mb", &freq);
+ if (ret) {
+ freq = DEFAULT_TIER_MAX_MIGRATE_MB;
+ }
+
+ defrag->tier_conf.max_migrate_bytes = (uint64_t) freq * 1024 * 1024;
+
+ ret = dict_get_int32 (this->options,
+ "tier-max-files", &freq);
+ if (ret) {
+ freq = DEFAULT_TIER_MAX_MIGRATE_FILES;
+ }
+
+ defrag->tier_conf.max_migrate_files = freq;
+
+ ret = dict_get_str (this->options,
+ "tier-mode", &mode);
+ if (ret) {
+ defrag->tier_conf.mode = DEFAULT_TIER_MODE;
+ } else {
+ ret = tier_validate_mode (mode);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR,
+ "tier_init failed - invalid mode");
+ goto out;
+ }
+ defrag->tier_conf.mode = ret;
+ }
+
+ pthread_mutex_init (&defrag->tier_conf.pause_mutex, 0);
+
+ gf_defrag_set_pause_state (&defrag->tier_conf, TIER_RUNNING);
+
+ ret = dict_get_str (this->options,
+ "tier-pause", &paused);
+
+ if (paused && strcmp (paused, "on") == 0)
+ gf_defrag_set_pause_state (&defrag->tier_conf, TIER_REQUEST_PAUSE);
+
+ ret = gf_asprintf(&voldir, "%s/%s",
+ DEFAULT_VAR_RUN_DIRECTORY,
+ this->name);
+ if (ret < 0)
+ goto out;
+
+ ret = mkdir_p(voldir, 0777, _gf_true);
+ if (ret == -1 && errno != EEXIST) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR,
+ "tier_init failed");
+
+ GF_FREE(voldir);
+ goto out;
+ }
+
+ GF_FREE(voldir);
+
+ ret = gf_asprintf (&promotion_qfile, "%s/%s/promote",
+ DEFAULT_VAR_RUN_DIRECTORY,
+ this->name);
+ if (ret < 0)
+ goto out;
+
+ ret = gf_asprintf (&demotion_qfile, "%s/%s/demote",
+ DEFAULT_VAR_RUN_DIRECTORY,
+ this->name);
+ if (ret < 0) {
+ GF_FREE (promotion_qfile);
+ goto out;
+ }
+
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_LOG_TIER_STATUS,
+ "Promote/demote frequency %d/%d "
+ "Write/Read freq thresholds %d/%d",
+ defrag->tier_conf.tier_promote_frequency,
+ defrag->tier_conf.tier_demote_frequency,
+ defrag->write_freq_threshold,
+ defrag->read_freq_threshold);
+
+ ret = 0;
+
+out:
+
+ return ret;
+}
+
+
+int
+tier_cli_pause_done (int op_ret, call_frame_t *sync_frame, void *data)
+{
+ gf_msg ("tier", GF_LOG_INFO, 0,
+ DHT_MSG_TIER_PAUSED,
+ "Migrate file paused with op_ret %d", op_ret);
+
+ return op_ret;
+}
+
+int
+tier_cli_pause (void *data)
+{
+ gf_defrag_info_t *defrag = NULL;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+
+ this = data;
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, conf, exit);
+
+ defrag = conf->defrag;
+ GF_VALIDATE_OR_GOTO (this->name, defrag, exit);
+
+ gf_defrag_pause_tier (this, defrag);
+
+ ret = 0;
+exit:
+ return ret;
+}
+
+
+int
+tier_reconfigure (xlator_t *this, dict_t *options)
+{
+ dht_conf_t *conf = NULL;
+ gf_defrag_info_t *defrag = NULL;
+ char *mode = NULL;
+ int migrate_mb = 0;
+ gf_boolean_t req_pause = _gf_false;
+ int ret = 0;
+ call_frame_t *frame = NULL;
+
+ conf = this->private;
+
+ if (conf->defrag) {
+ defrag = conf->defrag;
+ GF_OPTION_RECONF ("tier-max-promote-file-size",
+ defrag->tier_conf.tier_max_promote_size,
+ options, int32, out);
+
+ GF_OPTION_RECONF ("tier-promote-frequency",
+ defrag->tier_conf.tier_promote_frequency,
+ options, int32, out);
+
+ GF_OPTION_RECONF ("tier-demote-frequency",
+ defrag->tier_conf.tier_demote_frequency,
+ options, int32, out);
+
+ GF_OPTION_RECONF ("write-freq-threshold",
+ defrag->write_freq_threshold, options,
+ int32, out);
+
+ GF_OPTION_RECONF ("read-freq-threshold",
+ defrag->read_freq_threshold, options,
+ int32, out);
+
+ GF_OPTION_RECONF ("watermark-hi",
+ defrag->tier_conf.watermark_hi, options,
+ int32, out);
+
+ GF_OPTION_RECONF ("watermark-low",
+ defrag->tier_conf.watermark_low, options,
+ int32, out);
+
+ GF_OPTION_RECONF ("tier-mode",
+ mode, options,
+ str, out);
+ defrag->tier_conf.mode = tier_validate_mode (mode);
+
+ GF_OPTION_RECONF ("tier-max-mb",
+ migrate_mb, options,
+ int32, out);
+ defrag->tier_conf.max_migrate_bytes = (uint64_t) migrate_mb *
+ 1024 * 1024;
+
+ GF_OPTION_RECONF ("tier-max-files",
+ defrag->tier_conf.max_migrate_files, options,
+ int32, out);
+
+ GF_OPTION_RECONF ("tier-pause",
+ req_pause, options,
+ bool, out);
+
+ if (req_pause == _gf_true) {
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ frame->root->pid = GF_CLIENT_PID_DEFRAG;
+
+ ret = synctask_new (this->ctx->env, tier_cli_pause,
+ tier_cli_pause_done, frame, this);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR,
+ "pause tier failed on reconfigure");
+ }
+ } else {
+ ret = gf_defrag_resume_tier (this, defrag);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR,
+ "resume tier failed on reconfigure");
+ }
+ }
+
+ }
+
+out:
+ return dht_reconfigure (this, options);
+}
+
+void
+tier_fini (xlator_t *this)
+{
+ if (libhandle)
+ dlclose (libhandle);
+
+ GF_FREE (demotion_qfile);
+ GF_FREE (promotion_qfile);
+
+ dht_fini(this);
+}
+
+class_methods_t class_methods = {
+ .init = tier_init,
+ .fini = tier_fini,
+ .reconfigure = tier_reconfigure,
+ .notify = dht_notify
+};
+
+
+struct xlator_fops fops = {
+
+ .lookup = dht_lookup,
+ .create = tier_create,
+ .mknod = dht_mknod,
+
+ .open = dht_open,
+ .statfs = dht_statfs,
+ .opendir = dht_opendir,
+ .readdir = tier_readdir,
+ .readdirp = tier_readdirp,
+ .fsyncdir = dht_fsyncdir,
+ .symlink = dht_symlink,
+ .unlink = tier_unlink,
+ .link = tier_link,
+ .mkdir = dht_mkdir,
+ .rmdir = dht_rmdir,
+ .rename = dht_rename,
+ .entrylk = dht_entrylk,
+ .fentrylk = dht_fentrylk,
+
+ /* Inode read operations */
+ .stat = dht_stat,
+ .fstat = dht_fstat,
+ .access = dht_access,
+ .readlink = dht_readlink,
+ .getxattr = dht_getxattr,
+ .fgetxattr = dht_fgetxattr,
+ .readv = dht_readv,
+ .flush = dht_flush,
+ .fsync = dht_fsync,
+ .inodelk = dht_inodelk,
+ .finodelk = dht_finodelk,
+ .lk = dht_lk,
+
+ /* Inode write operations */
+ .fremovexattr = dht_fremovexattr,
+ .removexattr = dht_removexattr,
+ .setxattr = dht_setxattr,
+ .fsetxattr = dht_fsetxattr,
+ .truncate = dht_truncate,
+ .ftruncate = dht_ftruncate,
+ .writev = dht_writev,
+ .xattrop = dht_xattrop,
+ .fxattrop = dht_fxattrop,
+ .setattr = dht_setattr,
+ .fsetattr = dht_fsetattr,
+ .fallocate = dht_fallocate,
+ .discard = dht_discard,
+ .zerofill = dht_zerofill,
+};
+
+
+struct xlator_cbks cbks = {
+ .release = dht_release,
+ .forget = dht_forget
+};
+
diff --git a/xlators/cluster/dht/src/tier.h b/xlators/cluster/dht/src/tier.h
new file mode 100644
index 00000000000..0807608fda2
--- /dev/null
+++ b/xlators/cluster/dht/src/tier.h
@@ -0,0 +1,105 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _TIER_H_
+#define _TIER_H_
+
+
+/******************************************************************************/
+/* This is from dht-rebalancer.c as we dont have dht-rebalancer.h */
+#include "dht-common.h"
+#include "xlator.h"
+#include <signal.h>
+#include <fnmatch.h>
+#include <signal.h>
+
+/*
+ * Size of timer wheel. We would not promote or demote less
+ * frequently than this number.
+ */
+#define TIMER_SECS 3600
+
+#include "gfdb_data_store.h"
+#include <ctype.h>
+#include <sys/stat.h>
+
+#define PROMOTION_QFILE "promotequeryfile"
+#define DEMOTION_QFILE "demotequeryfile"
+
+#define TIER_HASHED_SUBVOL conf->subvolumes[0]
+#define TIER_UNHASHED_SUBVOL conf->subvolumes[1]
+
+#define GET_QFILE_PATH(is_promotion)\
+ (is_promotion) ? promotion_qfile : demotion_qfile
+
+typedef struct tier_qfile_array {
+ int *fd_array;
+ ssize_t array_size;
+ ssize_t next_index;
+ /* Indicate the number of exhuasted FDs*/
+ ssize_t exhausted_count;
+} tier_qfile_array_t;
+
+
+typedef struct _query_cbk_args {
+ xlator_t *this;
+ gf_defrag_info_t *defrag;
+ /* This is write */
+ int query_fd;
+ int is_promotion;
+ /* This is for read */
+ tier_qfile_array_t *qfile_array;
+} query_cbk_args_t;
+
+int
+gf_run_tier(xlator_t *this, gf_defrag_info_t *defrag);
+
+typedef struct gfdb_brick_info {
+ gfdb_time_t *time_stamp;
+ gf_boolean_t _gfdb_promote;
+ query_cbk_args_t *_query_cbk_args;
+} gfdb_brick_info_t;
+
+typedef struct brick_list {
+ xlator_t *xlator;
+ char *brick_db_path;
+ char brick_name[NAME_MAX];
+ char qfile_path[PATH_MAX];
+ struct list_head list;
+} tier_brick_list_t;
+
+typedef struct _dm_thread_args {
+ xlator_t *this;
+ gf_defrag_info_t *defrag;
+ struct list_head *brick_list;
+ int freq_time;
+ int return_value;
+ int is_promotion;
+} migration_args_t;
+
+typedef enum tier_watermark_op_ {
+ TIER_WM_NONE = 0,
+ TIER_WM_LOW,
+ TIER_WM_HI,
+ TIER_WM_MID
+} tier_watermark_op_t;
+
+#define DEFAULT_PROMOTE_FREQ_SEC 120
+#define DEFAULT_DEMOTE_FREQ_SEC 120
+#define DEFAULT_DEMOTE_DEGRADED 10
+#define DEFAULT_WRITE_FREQ_SEC 0
+#define DEFAULT_READ_FREQ_SEC 0
+#define DEFAULT_WM_LOW 75
+#define DEFAULT_WM_HI 90
+#define DEFAULT_TIER_MODE TIER_MODE_TEST
+#define DEFAULT_TIER_MAX_MIGRATE_MB 1000
+#define DEFAULT_TIER_MAX_MIGRATE_FILES 5000
+
+#endif
diff --git a/xlators/cluster/dht/src/tier.sym b/xlators/cluster/dht/src/tier.sym
new file mode 100644
index 00000000000..60205d145b6
--- /dev/null
+++ b/xlators/cluster/dht/src/tier.sym
@@ -0,0 +1,9 @@
+fops
+cbks
+class_methods
+dht_methods
+tier_methods
+options
+mem_acct_init
+reconfigure
+dumpops
diff --git a/xlators/cluster/dht/src/unittest/dht_layout_mock.c b/xlators/cluster/dht/src/unittest/dht_layout_mock.c
new file mode 100644
index 00000000000..6544f4208f5
--- /dev/null
+++ b/xlators/cluster/dht/src/unittest/dht_layout_mock.c
@@ -0,0 +1,72 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "glusterfs.h"
+#include "xlator.h"
+#include "dht-common.h"
+#include "byte-order.h"
+
+int
+dht_hash_compute (xlator_t *this, int type, const char *name, uint32_t *hash_p)
+{
+ return 0;
+}
+
+int
+dht_inode_ctx_layout_get (inode_t *inode, xlator_t *this, dht_layout_t **layout)
+{
+ return 0;
+}
+
+int
+dht_inode_ctx_layout_set (inode_t *inode, xlator_t *this,
+ dht_layout_t *layout_int)
+{
+ return 0;
+}
+
+int
+dict_get_ptr (dict_t *this, char *key, void **ptr)
+{
+ return 0;
+}
+
+int
+dict_get_ptr_and_len (dict_t *this, char *key, void **ptr, int *len)
+{
+ return 0;
+}
+
+int _gf_log (const char *domain, const char *file,
+ const char *function, int32_t line, gf_loglevel_t level,
+ const char *fmt, ...)
+{
+ return 0;
+}
+
+int _gf_log_callingfn (const char *domain, const char *file,
+ const char *function, int32_t line, gf_loglevel_t level,
+ const char *fmt, ...)
+{
+ return 0;
+}
+
+void gf_uuid_unparse(const uuid_t uu, char *out)
+{
+ // could call a will-return function here
+ // to place the correct data in *out
+}
+
+int
+_gf_msg (const char *domain, const char *file, const char *function,
+ int32_t line, gf_loglevel_t level, int errnum, int trace,
+ uint64_t msgid, const char *fmt, ...)
+{
+ return 0;
+}
diff --git a/xlators/cluster/dht/src/unittest/dht_layout_unittest.c b/xlators/cluster/dht/src/unittest/dht_layout_unittest.c
new file mode 100644
index 00000000000..84a89160e38
--- /dev/null
+++ b/xlators/cluster/dht/src/unittest/dht_layout_unittest.c
@@ -0,0 +1,125 @@
+/*
+ Copyright (c) 2008-2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "dht-common.h"
+#include "logging.h"
+#include "xlator.h"
+
+#include <inttypes.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <cmocka_pbc.h>
+#include <cmocka.h>
+
+/*
+ * Helper functions
+ */
+
+static xlator_t *
+helper_xlator_init(uint32_t num_types)
+{
+ xlator_t *xl;
+ int i, ret;
+
+ REQUIRE(num_types > 0);
+
+ xl = test_calloc(1, sizeof(xlator_t));
+ assert_non_null(xl);
+ xl->mem_acct->num_types = num_types;
+ xl->mem_acct = test_calloc (sizeof(struct mem_acct)
+ + sizeof(struct mem_acct_rec) + num_types);
+ assert_non_null(xl->mem_acct);
+
+ xl->ctx = test_calloc(1, sizeof(glusterfs_ctx_t));
+ assert_non_null(xl->ctx);
+
+ for (i = 0; i < num_types; i++) {
+ ret = LOCK_INIT(&(xl->mem_acct.rec[i].lock));
+ assert_false(ret);
+ }
+
+ ENSURE(num_types == xl->mem_acct.num_types);
+ ENSURE(NULL != xl);
+
+ return xl;
+}
+
+static int
+helper_xlator_destroy(xlator_t *xl)
+{
+ int i, ret;
+
+ for (i = 0; i < xl->mem_acct.num_types; i++) {
+ ret = LOCK_DESTROY(&(xl->mem_acct.rec[i].lock));
+ assert_int_equal(ret, 0);
+ }
+
+ free(xl->mem_acct.rec);
+ free(xl->ctx);
+ free(xl);
+ return 0;
+}
+
+/*
+ * Unit tests
+ */
+static void
+test_dht_layout_new(void **state)
+{
+ xlator_t *xl;
+ dht_layout_t *layout;
+ dht_conf_t *conf;
+ int cnt;
+
+ expect_assert_failure(dht_layout_new(NULL, 0));
+ expect_assert_failure(dht_layout_new((xlator_t *)0x12345, -1));
+ xl = helper_xlator_init(10);
+
+ // xl->private is NULL
+ assert_null(xl->private);
+ cnt = 100;
+ layout = dht_layout_new(xl, cnt);
+ assert_non_null(layout);
+ assert_int_equal(layout->type, DHT_HASH_TYPE_DM);
+ assert_int_equal(layout->cnt, cnt);
+ assert_int_equal(layout->ref, 1);
+ assert_int_equal(layout->gen, 0);
+ assert_int_equal(layout->spread_cnt, 0);
+ free(layout);
+
+ // xl->private is not NULL
+ cnt = 110;
+ conf = (dht_conf_t *)test_calloc(1, sizeof(dht_conf_t));
+ assert_non_null(conf);
+ conf->dir_spread_cnt = 12345;
+ conf->gen = -123;
+ xl->private = conf;
+
+ layout = dht_layout_new(xl, cnt);
+ assert_non_null(layout);
+ assert_int_equal(layout->type, DHT_HASH_TYPE_DM);
+ assert_int_equal(layout->cnt, cnt);
+ assert_int_equal(layout->ref, 1);
+ assert_int_equal(layout->gen, conf->gen);
+ assert_int_equal(layout->spread_cnt, conf->dir_spread_cnt);
+ free(layout);
+
+ free(conf);
+ helper_xlator_destroy(xl);
+}
+
+int main(void) {
+ const struct CMUnitTest xlator_dht_layout_tests[] = {
+ unit_test(test_dht_layout_new),
+ };
+
+ return cmocka_run_group_tests(xlator_dht_layout_tests, NULL, NULL);
+}
diff --git a/xlators/cluster/unify/Makefile.am b/xlators/cluster/ec/Makefile.am
index d471a3f9243..d471a3f9243 100644
--- a/xlators/cluster/unify/Makefile.am
+++ b/xlators/cluster/ec/Makefile.am
diff --git a/xlators/cluster/ec/src/Makefile.am b/xlators/cluster/ec/src/Makefile.am
new file mode 100644
index 00000000000..cbdceefdbe0
--- /dev/null
+++ b/xlators/cluster/ec/src/Makefile.am
@@ -0,0 +1,53 @@
+xlator_LTLIBRARIES = ec.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
+
+ec_sources := ec.c
+ec_sources += ec-data.c
+ec_sources += ec-helpers.c
+ec_sources += ec-common.c
+ec_sources += ec-generic.c
+ec_sources += ec-locks.c
+ec_sources += ec-dir-read.c
+ec_sources += ec-dir-write.c
+ec_sources += ec-inode-read.c
+ec_sources += ec-inode-write.c
+ec_sources += ec-combine.c
+ec_sources += ec-gf.c
+ec_sources += ec-method.c
+ec_sources += ec-heal.c
+ec_sources += ec-heald.c
+
+ec_headers := ec.h
+ec_headers += ec-mem-types.h
+ec_headers += ec-helpers.h
+ec_headers += ec-data.h
+ec_headers += ec-fops.h
+ec_headers += ec-common.h
+ec_headers += ec-combine.h
+ec_headers += ec-gf.h
+ec_headers += ec-method.h
+ec_headers += ec-heald.h
+ec_headers += ec-messages.h
+
+ec_ext_sources = $(top_builddir)/xlators/lib/src/libxlator.c
+
+ec_ext_headers = $(top_builddir)/xlators/lib/src/libxlator.h
+
+ec_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+ec_la_SOURCES = $(ec_sources) $(ec_headers) $(ec_ext_sources) $(ec_ext_headers)
+ec_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+AM_CPPFLAGS = $(GF_CPPFLAGS)
+AM_CPPFLAGS += -I$(top_srcdir)/libglusterfs/src
+AM_CPPFLAGS += -I$(top_srcdir)/xlators/lib/src
+AM_CPPFLAGS += -I$(top_srcdir)/rpc/rpc-lib/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
+
+install-data-hook:
+ ln -sf ec.so $(DESTDIR)$(xlatordir)/disperse.so
+
+uninstall-local:
+ rm -f $(DESTDIR)$(xlatordir)/disperse.so
diff --git a/xlators/cluster/ec/src/ec-combine.c b/xlators/cluster/ec/src/ec-combine.c
new file mode 100644
index 00000000000..379d703bf35
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-combine.c
@@ -0,0 +1,916 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <fnmatch.h>
+
+#include "libxlator.h"
+#include "byte-order.h"
+
+#include "ec-data.h"
+#include "ec-helpers.h"
+#include "ec-common.h"
+#include "ec-combine.h"
+#include "ec-messages.h"
+#include "quota-common-utils.h"
+
+#define EC_QUOTA_PREFIX "trusted.glusterfs.quota."
+
+struct _ec_dict_info;
+typedef struct _ec_dict_info ec_dict_info_t;
+
+struct _ec_dict_combine;
+typedef struct _ec_dict_combine ec_dict_combine_t;
+
+struct _ec_dict_info
+{
+ dict_t * dict;
+ int32_t count;
+};
+
+struct _ec_dict_combine
+{
+ ec_cbk_data_t * cbk;
+ int32_t which;
+};
+
+int32_t
+ec_combine_write (ec_fop_data_t *fop, ec_cbk_data_t *dst,
+ ec_cbk_data_t *src)
+{
+ int valid = 0;
+
+ if (!fop || !dst || !src)
+ return 0;
+
+ switch (fop->id) {
+ case GF_FOP_REMOVEXATTR:
+ case GF_FOP_FREMOVEXATTR:
+ case GF_FOP_SETXATTR:
+ case GF_FOP_FSETXATTR:
+ return 1;
+
+ case GF_FOP_SYMLINK:
+ case GF_FOP_LINK:
+ case GF_FOP_CREATE:
+ case GF_FOP_MKNOD:
+ case GF_FOP_MKDIR:
+ valid = 3;
+ break;
+ case GF_FOP_UNLINK:
+ case GF_FOP_RMDIR:
+ case GF_FOP_SETATTR:
+ case GF_FOP_FSETATTR:
+ case GF_FOP_TRUNCATE:
+ case GF_FOP_FTRUNCATE:
+ case GF_FOP_WRITE:
+ case GF_FOP_FALLOCATE:
+ case GF_FOP_DISCARD:
+ case GF_FOP_ZEROFILL:
+ valid = 2;
+ break;
+ case GF_FOP_RENAME:
+ valid = 5;
+ break;
+ default:
+ gf_msg_callingfn (fop->xl->name, GF_LOG_WARNING, EINVAL,
+ EC_MSG_INVALID_FOP,
+ "Invalid fop %d", fop->id);
+ return 0;
+ break;
+ }
+
+ if (!ec_iatt_combine(fop, dst->iatt, src->iatt, valid)) {
+ gf_msg (fop->xl->name, GF_LOG_NOTICE, 0,
+ EC_MSG_IATT_MISMATCH,
+ "Mismatching iatt in "
+ "answers of '%s'", gf_fop_list[fop->id]);
+ return 0;
+ }
+ return 1;
+}
+
+void ec_iatt_time_merge(uint32_t * dst_sec, uint32_t * dst_nsec,
+ uint32_t src_sec, uint32_t src_nsec)
+{
+ if ((*dst_sec < src_sec) ||
+ ((*dst_sec == src_sec) && (*dst_nsec < src_nsec)))
+ {
+ *dst_sec = src_sec;
+ *dst_nsec = src_nsec;
+ }
+}
+
+static
+gf_boolean_t
+ec_iatt_is_trusted(ec_fop_data_t *fop, struct iatt *iatt)
+{
+ uint64_t ino;
+ int32_t i;
+
+ /* Only the top level fop will have fop->locks filled. */
+ while (fop->parent != NULL) {
+ fop = fop->parent;
+ }
+
+ /* Lookups are special requests always done without locks taken but they
+ * require to be able to identify differences between bricks. Special
+ * handling of these differences is already done in lookup specific code
+ * so we shouldn't ignore any difference here and consider all iatt
+ * structures as trusted. */
+ if (fop->id == GF_FOP_LOOKUP) {
+ return _gf_true;
+ }
+
+ /* Check if the iatt references an inode locked by the current fop */
+ for (i = 0; i < fop->lock_count; i++) {
+ ino = gfid_to_ino(fop->locks[i].lock->loc.inode->gfid);
+ if (iatt->ia_ino == ino) {
+ return _gf_true;
+ }
+ }
+
+ return _gf_false;
+}
+
+int32_t ec_iatt_combine(ec_fop_data_t *fop, struct iatt *dst, struct iatt *src,
+ int32_t count)
+{
+ int32_t i;
+ gf_boolean_t failed = _gf_false;
+
+ for (i = 0; i < count; i++)
+ {
+ /* Check for basic fields. These fields must be equal always, even if
+ * the inode is not locked because in these cases the parent inode
+ * will be locked and differences in these fields require changes in
+ * the parent directory. */
+ if ((dst[i].ia_ino != src[i].ia_ino) ||
+ (((dst[i].ia_type == IA_IFBLK) || (dst[i].ia_type == IA_IFCHR)) &&
+ (dst[i].ia_rdev != src[i].ia_rdev)) ||
+ (gf_uuid_compare(dst[i].ia_gfid, src[i].ia_gfid) != 0)) {
+ failed = _gf_true;
+ }
+ /* Check for not so stable fields. These fields can change if the
+ * inode is not locked. */
+ if (!failed && ((dst[i].ia_uid != src[i].ia_uid) ||
+ (dst[i].ia_gid != src[i].ia_gid) ||
+ ((dst[i].ia_type == IA_IFREG) &&
+ (dst[i].ia_size != src[i].ia_size)) ||
+ (st_mode_from_ia(dst[i].ia_prot, dst[i].ia_type) !=
+ st_mode_from_ia(src[i].ia_prot, src[i].ia_type)))) {
+ if (ec_iatt_is_trusted(fop, dst)) {
+ /* If the iatt contains information from an inode that is
+ * locked, these differences are real problems, so we need to
+ * report them. Otherwise we ignore them and don't care which
+ * data is returned. */
+ failed = _gf_true;
+ } else {
+ gf_msg_debug (fop->xl->name, 0,
+ "Ignoring iatt differences because inode is not "
+ "locked");
+ }
+ }
+ if (failed) {
+ gf_msg (fop->xl->name, GF_LOG_WARNING, 0,
+ EC_MSG_IATT_COMBINE_FAIL,
+ "Failed to combine iatt (inode: %lu-%lu, links: %u-%u, "
+ "uid: %u-%u, gid: %u-%u, rdev: %lu-%lu, size: %lu-%lu, "
+ "mode: %o-%o)",
+ dst[i].ia_ino, src[i].ia_ino, dst[i].ia_nlink,
+ src[i].ia_nlink, dst[i].ia_uid, src[i].ia_uid,
+ dst[i].ia_gid, src[i].ia_gid, dst[i].ia_rdev,
+ src[i].ia_rdev, dst[i].ia_size, src[i].ia_size,
+ st_mode_from_ia(dst[i].ia_prot, dst[i].ia_type),
+ st_mode_from_ia(src[i].ia_prot, dst[i].ia_type));
+
+ return 0;
+ }
+ }
+
+ while (count-- > 0)
+ {
+ dst[count].ia_blocks += src[count].ia_blocks;
+ if (dst[count].ia_blksize < src[count].ia_blksize)
+ {
+ dst[count].ia_blksize = src[count].ia_blksize;
+ }
+
+ ec_iatt_time_merge(&dst[count].ia_atime, &dst[count].ia_atime_nsec,
+ src[count].ia_atime, src[count].ia_atime_nsec);
+ ec_iatt_time_merge(&dst[count].ia_mtime, &dst[count].ia_mtime_nsec,
+ src[count].ia_mtime, src[count].ia_mtime_nsec);
+ ec_iatt_time_merge(&dst[count].ia_ctime, &dst[count].ia_ctime_nsec,
+ src[count].ia_ctime, src[count].ia_ctime_nsec);
+ }
+
+ return 1;
+}
+
+void ec_iatt_rebuild(ec_t * ec, struct iatt * iatt, int32_t count,
+ int32_t answers)
+{
+ uint64_t blocks;
+
+ while (count-- > 0)
+ {
+ blocks = iatt[count].ia_blocks * ec->fragments + answers - 1;
+ blocks /= answers;
+ iatt[count].ia_blocks = blocks;
+ }
+}
+
+gf_boolean_t
+ec_xattr_match (dict_t *dict, char *key, data_t *value, void *arg)
+{
+ if ((fnmatch(GF_XATTR_STIME_PATTERN, key, 0) == 0) ||
+ (strcmp(key, GLUSTERFS_OPEN_FD_COUNT) == 0)) {
+ return _gf_false;
+ }
+
+ return _gf_true;
+}
+
+gf_boolean_t
+ec_value_ignore (char *key)
+{
+ if ((strcmp(key, GF_CONTENT_KEY) == 0) ||
+ (strcmp(key, GF_XATTR_PATHINFO_KEY) == 0) ||
+ (strcmp(key, GF_XATTR_USER_PATHINFO_KEY) == 0) ||
+ (strcmp(key, GF_XATTR_LOCKINFO_KEY) == 0) ||
+ (strcmp(key, GLUSTERFS_OPEN_FD_COUNT) == 0) ||
+ (strcmp(key, GLUSTERFS_INODELK_COUNT) == 0) ||
+ (strcmp(key, GLUSTERFS_ENTRYLK_COUNT) == 0) ||
+ (strncmp(key, GF_XATTR_CLRLK_CMD,
+ strlen (GF_XATTR_CLRLK_CMD)) == 0) ||
+ (strcmp(key, DHT_IATT_IN_XDATA_KEY) == 0) ||
+ (strncmp(key, EC_QUOTA_PREFIX, strlen(EC_QUOTA_PREFIX)) == 0) ||
+ (fnmatch(MARKER_XATTR_PREFIX ".*." XTIME, key, 0) == 0) ||
+ (fnmatch(GF_XATTR_MARKER_KEY ".*", key, 0) == 0) ||
+ (XATTR_IS_NODE_UUID(key))) {
+ return _gf_true;
+ }
+ return _gf_false;
+}
+
+int32_t
+ec_dict_compare (dict_t *dict1, dict_t *dict2)
+{
+ if (are_dicts_equal (dict1, dict2, ec_xattr_match, ec_value_ignore))
+ return 1;
+ return 0;
+}
+
+int32_t ec_dict_list(data_t ** list, int32_t * count, ec_cbk_data_t * cbk,
+ int32_t which, char * key)
+{
+ ec_cbk_data_t * ans;
+ dict_t * dict;
+ int32_t i, max;
+
+ max = *count;
+ i = 0;
+ for (ans = cbk; ans != NULL; ans = ans->next) {
+ if (i >= max) {
+ gf_msg (cbk->fop->xl->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_INVALID_DICT_NUMS,
+ "Unexpected number of "
+ "dictionaries");
+
+ return -EINVAL;
+ }
+
+ dict = (which == EC_COMBINE_XDATA) ? ans->xdata : ans->dict;
+ list[i] = dict_get(dict, key);
+ if (list[i] != NULL) {
+ i++;
+ }
+ }
+
+ *count = i;
+
+ return 0;
+}
+
+int32_t ec_concat_prepare(xlator_t *xl, char **str, char **sep, char **post,
+ const char *fmt, va_list args)
+{
+ char *tmp;
+ int32_t len;
+
+ len = gf_vasprintf(str, fmt, args);
+ if (len < 0) {
+ return -ENOMEM;
+ }
+
+ tmp = strchr(*str, '{');
+ if (tmp == NULL) {
+ goto out;
+ }
+ *tmp++ = 0;
+ *sep = tmp;
+ tmp = strchr(tmp, '}');
+ if (tmp == NULL) {
+ goto out;
+ }
+ *tmp++ = 0;
+ *post = tmp;
+
+ return 0;
+
+out:
+ gf_msg (xl->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_INVALID_FORMAT,
+ "Invalid concat format");
+
+ GF_FREE(*str);
+
+ return -EINVAL;
+}
+
+int32_t ec_dict_data_concat(const char * fmt, ec_cbk_data_t * cbk,
+ int32_t which, char * key, ...)
+{
+ data_t * data[cbk->count];
+ char * str = NULL, * pre = NULL, * sep, * post;
+ dict_t * dict;
+ va_list args;
+ int32_t i, num, len, prelen, postlen, seplen, tmp;
+ int32_t err;
+
+ num = cbk->count;
+ err = ec_dict_list(data, &num, cbk, which, key);
+ if (err != 0) {
+ return err;
+ }
+
+ va_start(args, key);
+ err = ec_concat_prepare(cbk->fop->xl, &pre, &sep, &post, fmt, args);
+ va_end(args);
+
+ if (err != 0) {
+ return err;
+ }
+
+ prelen = strlen(pre);
+ seplen = strlen(sep);
+ postlen = strlen(post);
+
+ len = prelen + (num - 1) * seplen + postlen + 1;
+ for (i = 0; i < num; i++) {
+ len += data[i]->len - 1;
+ }
+
+ err = -ENOMEM;
+
+ str = GF_MALLOC(len, gf_common_mt_char);
+ if (str == NULL) {
+ goto out;
+ }
+
+ memcpy(str, pre, prelen);
+ len = prelen;
+ for (i = 0; i < num; i++) {
+ if (i > 0) {
+ memcpy(str + len, sep, seplen);
+ len += seplen;
+ }
+ tmp = data[i]->len - 1;
+ memcpy(str + len, data[i]->data, tmp);
+ len += tmp;
+ }
+ memcpy(str + len, post, postlen + 1);
+
+ dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
+ err = dict_set_dynstr(dict, key, str);
+ if (err != 0) {
+ goto out;
+ }
+
+ str = NULL;
+
+out:
+ GF_FREE(str);
+ GF_FREE(pre);
+
+ return err;
+}
+
+int32_t ec_dict_data_merge(ec_cbk_data_t *cbk, int32_t which, char *key)
+{
+ data_t *data[cbk->count];
+ dict_t *dict, *lockinfo, *tmp = NULL;
+ char *ptr = NULL;
+ int32_t i, num, len;
+ int32_t err;
+
+ num = cbk->count;
+ err = ec_dict_list(data, &num, cbk, which, key);
+ if (err != 0) {
+ return err;
+ }
+
+ lockinfo = dict_new();
+ if (lockinfo == NULL) {
+ return -ENOMEM;
+ }
+
+ err = dict_unserialize(data[0]->data, data[0]->len, &lockinfo);
+ if (err != 0) {
+ goto out;
+ }
+
+ for (i = 1; i < num; i++)
+ {
+ tmp = dict_new();
+ if (tmp == NULL) {
+ err = -ENOMEM;
+
+ goto out;
+ }
+ err = dict_unserialize(data[i]->data, data[i]->len, &tmp);
+ if (err != 0) {
+ goto out;
+ }
+ if (dict_copy(tmp, lockinfo) == NULL) {
+ err = -ENOMEM;
+
+ goto out;
+ }
+
+ dict_unref(tmp);
+ }
+
+ tmp = NULL;
+
+ len = dict_serialized_length(lockinfo);
+ if (len < 0) {
+ err = len;
+
+ goto out;
+ }
+ ptr = GF_MALLOC(len, gf_common_mt_char);
+ if (ptr == NULL) {
+ err = -ENOMEM;
+
+ goto out;
+ }
+ err = dict_serialize(lockinfo, ptr);
+ if (err != 0) {
+ goto out;
+ }
+ dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
+ err = dict_set_dynptr(dict, key, ptr, len);
+ if (err != 0) {
+ goto out;
+ }
+
+ ptr = NULL;
+
+out:
+ GF_FREE(ptr);
+ dict_unref(lockinfo);
+ if (tmp != NULL) {
+ dict_unref(tmp);
+ }
+
+ return err;
+}
+
+int32_t ec_dict_data_uuid(ec_cbk_data_t * cbk, int32_t which, char * key)
+{
+ ec_cbk_data_t * ans, * min;
+ dict_t * src, * dst;
+ data_t * data;
+
+ min = cbk;
+ for (ans = cbk->next; ans != NULL; ans = ans->next) {
+ if (ans->idx < min->idx) {
+ min = ans;
+ }
+ }
+
+ if (min != cbk) {
+ src = (which == EC_COMBINE_XDATA) ? min->xdata : min->dict;
+ dst = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
+
+ data = dict_get(src, key);
+ if (data == NULL) {
+ return -ENOENT;
+ }
+ if (dict_set(dst, key, data) != 0) {
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
+int32_t ec_dict_data_max32(ec_cbk_data_t *cbk, int32_t which, char *key)
+{
+ data_t * data[cbk->count];
+ dict_t * dict;
+ int32_t i, num, err;
+ uint32_t max, tmp;
+
+ num = cbk->count;
+ err = ec_dict_list(data, &num, cbk, which, key);
+ if (err != 0) {
+ return err;
+ }
+
+ max = data_to_uint32(data[0]);
+ for (i = 1; i < num; i++) {
+ tmp = data_to_uint32(data[i]);
+ if (max < tmp) {
+ max = tmp;
+ }
+ }
+
+ dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
+ return dict_set_uint32(dict, key, max);
+}
+
+int32_t ec_dict_data_max64(ec_cbk_data_t *cbk, int32_t which, char *key)
+{
+ data_t *data[cbk->count];
+ dict_t *dict;
+ int32_t i, num, err;
+ uint64_t max, tmp;
+
+ num = cbk->count;
+ err = ec_dict_list(data, &num, cbk, which, key);
+ if (err != 0) {
+ return err;
+ }
+
+ max = data_to_uint64(data[0]);
+ for (i = 1; i < num; i++) {
+ tmp = data_to_uint64(data[i]);
+ if (max < tmp) {
+ max = tmp;
+ }
+ }
+
+ dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
+ return dict_set_uint64(dict, key, max);
+}
+
+int32_t ec_dict_data_quota(ec_cbk_data_t *cbk, int32_t which, char *key)
+{
+ data_t *data[cbk->count];
+ dict_t *dict = NULL;
+ ec_t *ec = NULL;
+ int32_t i = 0;
+ int32_t num = 0;
+ int32_t err = 0;
+ quota_meta_t size = {0, };
+ quota_meta_t max_size = {0, };
+
+ num = cbk->count;
+ err = ec_dict_list(data, &num, cbk, which, key);
+ if (err != 0) {
+ return err;
+ }
+
+ if (num == 0) {
+ return 0;
+ }
+
+ /* Quota size xattr is managed outside of the control of the ec xlator.
+ * This means that it might not be updated at the same time on all
+ * bricks and we can receive slightly different values. If that's the
+ * case, we take the maximum of all received values.
+ */
+ for (i = 0; i < num; i++) {
+ if (quota_data_to_meta (data[i], QUOTA_SIZE_KEY, &size) < 0) {
+ continue;
+ }
+
+ if (size.size > max_size.size)
+ max_size.size = size.size;
+ if (size.file_count > max_size.file_count)
+ max_size.file_count = size.file_count;
+ if (size.dir_count > max_size.dir_count)
+ max_size.dir_count = size.dir_count;
+ }
+
+ ec = cbk->fop->xl->private;
+ max_size.size *= ec->fragments;
+
+ dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
+ return quota_dict_set_meta (dict, key, &max_size, IA_IFDIR);
+}
+
+int32_t ec_dict_data_stime(ec_cbk_data_t * cbk, int32_t which, char * key)
+{
+ data_t * data[cbk->count];
+ dict_t * dict;
+ int32_t i, num, err;
+
+ num = cbk->count;
+ err = ec_dict_list(data, &num, cbk, which, key);
+ if (err != 0) {
+ return err;
+ }
+
+ dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
+ for (i = 1; i < num; i++) {
+ err = gf_get_max_stime(cbk->fop->xl, dict, key, data[i]);
+ if (err != 0) {
+ gf_msg (cbk->fop->xl->name, GF_LOG_ERROR, -err,
+ EC_MSG_STIME_COMBINE_FAIL, "STIME combination failed");
+
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+int32_t ec_dict_data_combine(dict_t * dict, char * key, data_t * value,
+ void * arg)
+{
+ ec_dict_combine_t * data = arg;
+
+ if ((strcmp(key, GF_XATTR_PATHINFO_KEY) == 0) ||
+ (strcmp(key, GF_XATTR_USER_PATHINFO_KEY) == 0))
+ {
+ return ec_dict_data_concat("(<EC:%s> { })", data->cbk, data->which,
+ key, data->cbk->fop->xl->name);
+ }
+
+ if (strncmp(key, GF_XATTR_CLRLK_CMD, strlen(GF_XATTR_CLRLK_CMD)) == 0)
+ {
+ return ec_dict_data_concat("{\n}", data->cbk, data->which, key);
+ }
+
+ if (strncmp(key, GF_XATTR_LOCKINFO_KEY,
+ strlen(GF_XATTR_LOCKINFO_KEY)) == 0)
+ {
+ return ec_dict_data_merge(data->cbk, data->which, key);
+ }
+
+ if (strcmp(key, GLUSTERFS_OPEN_FD_COUNT) == 0)
+ {
+ return ec_dict_data_max32(data->cbk, data->which, key);
+ }
+ if ((strcmp(key, GLUSTERFS_INODELK_COUNT) == 0) ||
+ (strcmp(key, GLUSTERFS_ENTRYLK_COUNT) == 0)) {
+ return ec_dict_data_max32(data->cbk, data->which, key);
+ }
+
+ if (strcmp(key, QUOTA_SIZE_KEY) == 0) {
+ return ec_dict_data_quota(data->cbk, data->which, key);
+ }
+ /* Ignore all other quota attributes */
+ if (strncmp(key, EC_QUOTA_PREFIX, strlen(EC_QUOTA_PREFIX)) == 0) {
+ return 0;
+ }
+
+ if (XATTR_IS_NODE_UUID(key))
+ {
+ return ec_dict_data_uuid(data->cbk, data->which, key);
+ }
+
+ if (fnmatch(GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0)
+ {
+ return ec_dict_data_stime(data->cbk, data->which, key);
+ }
+
+ if (fnmatch(MARKER_XATTR_PREFIX ".*." XTIME, key, FNM_NOESCAPE) == 0) {
+ return ec_dict_data_max64(data->cbk, data->which, key);
+ }
+
+ return 0;
+}
+
+int32_t ec_dict_combine(ec_cbk_data_t * cbk, int32_t which)
+{
+ dict_t * dict;
+ ec_dict_combine_t data;
+ int32_t err = 0;
+
+ data.cbk = cbk;
+ data.which = which;
+
+ dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
+ if (dict != NULL) {
+ err = dict_foreach(dict, ec_dict_data_combine, &data);
+ if (err != 0) {
+ gf_msg (cbk->fop->xl->name, GF_LOG_ERROR, -err,
+ EC_MSG_DICT_COMBINE_FAIL,
+ "Dictionary combination failed");
+
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+int32_t ec_vector_compare(struct iovec * dst_vector, int32_t dst_count,
+ struct iovec * src_vector, int32_t src_count)
+{
+ int32_t dst_size = 0, src_size = 0;
+
+ if (dst_count > 0)
+ {
+ dst_size = iov_length(dst_vector, dst_count);
+ }
+ if (src_count > 0)
+ {
+ src_size = iov_length(src_vector, src_count);
+ }
+
+ return (dst_size == src_size);
+}
+
+int32_t ec_flock_compare(struct gf_flock * dst, struct gf_flock * src)
+{
+ if ((dst->l_type != src->l_type) ||
+ (dst->l_whence != src->l_whence) ||
+ (dst->l_start != src->l_start) ||
+ (dst->l_len != src->l_len) ||
+ (dst->l_pid != src->l_pid) ||
+ !is_same_lkowner(&dst->l_owner, &src->l_owner))
+ {
+ return 0;
+ }
+
+ return 1;
+}
+
+void ec_statvfs_combine(struct statvfs * dst, struct statvfs * src)
+{
+ if (dst->f_bsize < src->f_bsize)
+ {
+ dst->f_bsize = src->f_bsize;
+ }
+
+ if (dst->f_frsize < src->f_frsize)
+ {
+ dst->f_blocks *= dst->f_frsize;
+ dst->f_blocks /= src->f_frsize;
+
+ dst->f_bfree *= dst->f_frsize;
+ dst->f_bfree /= src->f_frsize;
+
+ dst->f_bavail *= dst->f_frsize;
+ dst->f_bavail /= src->f_frsize;
+
+ dst->f_frsize = src->f_frsize;
+ }
+ else if (dst->f_frsize > src->f_frsize)
+ {
+ src->f_blocks *= src->f_frsize;
+ src->f_blocks /= dst->f_frsize;
+
+ src->f_bfree *= src->f_frsize;
+ src->f_bfree /= dst->f_frsize;
+
+ src->f_bavail *= src->f_frsize;
+ src->f_bavail /= dst->f_frsize;
+ }
+ if (dst->f_blocks > src->f_blocks)
+ {
+ dst->f_blocks = src->f_blocks;
+ }
+ if (dst->f_bfree > src->f_bfree)
+ {
+ dst->f_bfree = src->f_bfree;
+ }
+ if (dst->f_bavail > src->f_bavail)
+ {
+ dst->f_bavail = src->f_bavail;
+ }
+
+ if (dst->f_files < src->f_files)
+ {
+ dst->f_files = src->f_files;
+ }
+ if (dst->f_ffree > src->f_ffree)
+ {
+ dst->f_ffree = src->f_ffree;
+ }
+ if (dst->f_favail > src->f_favail)
+ {
+ dst->f_favail = src->f_favail;
+ }
+ if (dst->f_namemax > src->f_namemax)
+ {
+ dst->f_namemax = src->f_namemax;
+ }
+
+ if (dst->f_flag != src->f_flag)
+ {
+ gf_msg_debug (THIS->name, 0,
+ "Mismatching file system flags "
+ "(%lX, %lX)",
+ dst->f_flag, src->f_flag);
+ }
+ dst->f_flag &= src->f_flag;
+}
+
+int32_t ec_combine_check(ec_cbk_data_t * dst, ec_cbk_data_t * src,
+ ec_combine_f combine)
+{
+ ec_fop_data_t * fop = dst->fop;
+
+ if (dst->op_ret != src->op_ret)
+ {
+ gf_msg_debug (fop->xl->name, 0, "Mismatching return code in "
+ "answers of '%s': %d <-> %d",
+ ec_fop_name(fop->id), dst->op_ret, src->op_ret);
+
+ return 0;
+ }
+ if (dst->op_ret < 0)
+ {
+ if (dst->op_errno != src->op_errno)
+ {
+ gf_msg_debug (fop->xl->name, 0, "Mismatching errno code in "
+ "answers of '%s': %d <-> %d",
+ ec_fop_name(fop->id), dst->op_errno, src->op_errno);
+
+ return 0;
+ }
+ }
+
+ if (!ec_dict_compare(dst->xdata, src->xdata))
+ {
+ gf_msg (fop->xl->name, GF_LOG_DEBUG, 0,
+ EC_MSG_XDATA_MISMATCH,
+ "Mismatching xdata in answers "
+ "of '%s'", ec_fop_name(fop->id));
+
+ return 0;
+ }
+
+ if ((dst->op_ret >= 0) && (combine != NULL))
+ {
+ return combine(fop, dst, src);
+ }
+
+ return 1;
+}
+
+void ec_combine (ec_cbk_data_t *newcbk, ec_combine_f combine)
+{
+ ec_fop_data_t *fop = newcbk->fop;
+ ec_cbk_data_t *cbk = NULL, *tmp = NULL;
+ struct list_head *item = NULL;
+ int32_t needed = 0;
+ char str[32];
+
+ LOCK(&fop->lock);
+
+ fop->received |= newcbk->mask;
+
+ item = fop->cbk_list.prev;
+ list_for_each_entry(cbk, &fop->cbk_list, list)
+ {
+ if (ec_combine_check(newcbk, cbk, combine))
+ {
+ newcbk->count += cbk->count;
+ newcbk->mask |= cbk->mask;
+
+ item = cbk->list.prev;
+ while (item != &fop->cbk_list)
+ {
+ tmp = list_entry(item, ec_cbk_data_t, list);
+ if (tmp->count >= newcbk->count)
+ {
+ break;
+ }
+ item = item->prev;
+ }
+ list_del(&cbk->list);
+
+ newcbk->next = cbk;
+
+ break;
+ }
+ }
+ list_add(&newcbk->list, item);
+
+ ec_trace("ANSWER", fop, "combine=%s[%d]",
+ ec_bin(str, sizeof(str), newcbk->mask, 0), newcbk->count);
+
+ cbk = list_entry(fop->cbk_list.next, ec_cbk_data_t, list);
+ if ((fop->mask ^ fop->remaining) == fop->received) {
+ needed = fop->minimum - cbk->count;
+ }
+
+ UNLOCK(&fop->lock);
+
+ if (needed > 0) {
+ ec_dispatch_next(fop, newcbk->idx);
+ }
+}
diff --git a/xlators/cluster/ec/src/ec-combine.h b/xlators/cluster/ec/src/ec-combine.h
new file mode 100644
index 00000000000..19a42ded706
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-combine.h
@@ -0,0 +1,38 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __EC_COMBINE_H__
+#define __EC_COMBINE_H__
+
+#define EC_COMBINE_DICT 0
+#define EC_COMBINE_XDATA 1
+
+typedef int32_t (* ec_combine_f)(ec_fop_data_t * fop, ec_cbk_data_t * dst,
+ ec_cbk_data_t * src);
+
+void ec_iatt_rebuild(ec_t * ec, struct iatt * iatt, int32_t count,
+ int32_t answers);
+
+int32_t ec_iatt_combine(ec_fop_data_t *fop, struct iatt *dst, struct iatt *src,
+ int32_t count);
+int32_t ec_dict_compare(dict_t * dict1, dict_t * dict2);
+int32_t ec_vector_compare(struct iovec * dst_vector, int32_t dst_count,
+ struct iovec * src_vector, int32_t src_count);
+int32_t ec_flock_compare(struct gf_flock * dst, struct gf_flock * src);
+void ec_statvfs_combine(struct statvfs * dst, struct statvfs * src);
+
+int32_t ec_dict_combine(ec_cbk_data_t * cbk, int32_t which);
+
+void ec_combine(ec_cbk_data_t * cbk, ec_combine_f combine);
+
+int32_t
+ec_combine_write (ec_fop_data_t *fop, ec_cbk_data_t *dst,
+ ec_cbk_data_t *src);
+#endif /* __EC_COMBINE_H__ */
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
new file mode 100644
index 00000000000..175f6dfa71f
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-common.c
@@ -0,0 +1,2264 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "byte-order.h"
+#include "hashfn.h"
+
+#include "ec-mem-types.h"
+#include "ec-data.h"
+#include "ec-helpers.h"
+#include "ec-combine.h"
+#include "ec-common.h"
+#include "ec-fops.h"
+#include "ec-method.h"
+#include "ec.h"
+#include "ec-messages.h"
+
+uint32_t
+ec_select_first_by_read_policy (ec_t *ec, ec_fop_data_t *fop)
+{
+ if (ec->read_policy == EC_ROUND_ROBIN) {
+ return ec->idx;
+ } else if (ec->read_policy == EC_GFID_HASH) {
+ if (fop->use_fd) {
+ return SuperFastHash((char *)fop->fd->inode->gfid,
+ sizeof(fop->fd->inode->gfid)) % ec->nodes;
+ } else {
+ if (gf_uuid_is_null (fop->loc[0].gfid))
+ loc_gfid (&fop->loc[0], fop->loc[0].gfid);
+ return SuperFastHash((char *)fop->loc[0].gfid,
+ sizeof(fop->loc[0].gfid)) % ec->nodes;
+ }
+ }
+ return 0;
+}
+
+int32_t ec_child_valid(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ return (idx < ec->nodes) && (((fop->remaining >> idx) & 1) == 1);
+}
+
+int32_t ec_child_next(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ while (!ec_child_valid(ec, fop, idx))
+ {
+ if (++idx >= ec->nodes)
+ {
+ idx = 0;
+ }
+ if (idx == fop->first)
+ {
+ return -1;
+ }
+ }
+
+ return idx;
+}
+
+int32_t ec_heal_report(call_frame_t * frame, void * cookie, xlator_t * this,
+ int32_t op_ret, int32_t op_errno, uintptr_t mask,
+ uintptr_t good, uintptr_t bad, dict_t * xdata)
+{
+ if (op_ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, op_errno,
+ EC_MSG_HEAL_FAIL, "Heal failed");
+ } else {
+ if ((mask & ~good) != 0) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ EC_MSG_HEAL_SUCCESS, "Heal succeeded on %d/%d "
+ "subvolumes",
+ ec_bits_count(mask & ~(good | bad)),
+ ec_bits_count(mask & ~good));
+ }
+ }
+
+ return 0;
+}
+
+int32_t ec_fop_needs_heal(ec_fop_data_t *fop)
+{
+ ec_t *ec = fop->xl->private;
+
+ return (ec->xl_up & ~(fop->remaining | fop->good)) != 0;
+}
+
+void ec_check_status(ec_fop_data_t * fop)
+{
+ ec_t * ec = fop->xl->private;
+ int32_t partial = 0;
+
+ if (!ec_fop_needs_heal(fop)) {
+ return;
+ }
+
+ if (fop->answer->op_ret >= 0) {
+ if ((fop->id == GF_FOP_LOOKUP) ||
+ (fop->id == GF_FOP_STAT) || (fop->id == GF_FOP_FSTAT)) {
+ partial = fop->answer->iatt[0].ia_type == IA_IFDIR;
+ } else if (fop->id == GF_FOP_OPENDIR) {
+ partial = 1;
+ }
+ }
+
+ gf_msg (fop->xl->name, GF_LOG_WARNING, 0,
+ EC_MSG_OP_FAIL_ON_SUBVOLS,
+ "Operation failed on some "
+ "subvolumes (up=%lX, mask=%lX, "
+ "remaining=%lX, good=%lX, bad=%lX)",
+ ec->xl_up, fop->mask, fop->remaining, fop->good,
+ ec->xl_up & ~(fop->remaining | fop->good));
+
+ if (fop->use_fd)
+ {
+ if (fop->fd != NULL) {
+ ec_fheal(NULL, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL,
+ fop->fd, partial, NULL);
+ }
+ }
+ else
+ {
+ ec_heal(NULL, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL,
+ &fop->loc[0], partial, NULL);
+
+ if (fop->loc[1].inode != NULL)
+ {
+ ec_heal(NULL, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL,
+ &fop->loc[1], partial, NULL);
+ }
+ }
+}
+
+void ec_update_good(ec_fop_data_t *fop, uintptr_t good)
+{
+ fop->good = good;
+
+ /* Fops that are executed only on one brick do not have enough information
+ * to decide if healing is needed or not. */
+ if ((fop->expected != 1) && (fop->parent == NULL)) {
+ ec_check_status(fop);
+ }
+}
+
+void ec_lock_update_good(ec_lock_t *lock, ec_fop_data_t *fop)
+{
+ /* Fops that are executed only on one brick do not have enough information
+ * to update the global mask of good bricks. */
+ if (fop->expected == 1) {
+ return;
+ }
+
+ /* When updating the good mask of the lock, we only take into consideration
+ * those bits corresponding to the bricks where the fop has been executed.
+ * Bad bricks are removed from good_mask, but once marked as bad it's never
+ * set to good until the lock is released and reacquired */
+
+ lock->good_mask &= fop->good | fop->remaining;
+}
+
+void __ec_fop_set_error(ec_fop_data_t * fop, int32_t error)
+{
+ if ((error != 0) && (fop->error == 0))
+ {
+ fop->error = error;
+ }
+}
+
+void ec_fop_set_error(ec_fop_data_t * fop, int32_t error)
+{
+ LOCK(&fop->lock);
+
+ __ec_fop_set_error(fop, error);
+
+ UNLOCK(&fop->lock);
+}
+
+gf_boolean_t
+ec_cbk_set_error(ec_cbk_data_t *cbk, int32_t error, gf_boolean_t ro)
+{
+ if ((error != 0) && (cbk->op_ret >= 0)) {
+ /* If cbk->op_errno was 0, it means that the fop succeeded and this
+ * error has happened while processing the answer. If the operation was
+ * read-only, there's no problem (i.e. we simply return the generated
+ * error code). However if it caused a modification, we must return EIO
+ * to indicate that the operation has been partially executed. */
+ cbk->op_errno = ro ? error : EIO;
+ cbk->op_ret = -1;
+
+ ec_fop_set_error(cbk->fop, cbk->op_errno);
+ }
+
+ return (cbk->op_ret < 0);
+}
+
+ec_cbk_data_t *
+ec_fop_prepare_answer(ec_fop_data_t *fop, gf_boolean_t ro)
+{
+ ec_cbk_data_t *cbk;
+ int32_t err;
+
+ cbk = fop->answer;
+ if (cbk == NULL) {
+ ec_fop_set_error(fop, EIO);
+
+ return NULL;
+ }
+
+ if (cbk->op_ret < 0) {
+ ec_fop_set_error(fop, cbk->op_errno);
+ }
+
+ err = ec_dict_combine(cbk, EC_COMBINE_XDATA);
+ if (ec_cbk_set_error(cbk, -err, ro)) {
+ return NULL;
+ }
+
+ return cbk;
+}
+
+void ec_sleep(ec_fop_data_t *fop)
+{
+ LOCK(&fop->lock);
+
+ GF_ASSERT (fop->refs > 0);
+ fop->refs++;
+ fop->jobs++;
+
+ UNLOCK(&fop->lock);
+}
+
+int32_t ec_check_complete(ec_fop_data_t * fop, ec_resume_f resume)
+{
+ int32_t error = -1;
+
+ LOCK(&fop->lock);
+
+ GF_ASSERT(fop->resume == NULL);
+
+ if (--fop->jobs != 0)
+ {
+ ec_trace("WAIT", fop, "resume=%p", resume);
+
+ fop->resume = resume;
+ }
+ else
+ {
+ error = fop->error;
+ fop->error = 0;
+ }
+
+ UNLOCK(&fop->lock);
+
+ return error;
+}
+
+void ec_resume(ec_fop_data_t * fop, int32_t error)
+{
+ ec_resume_f resume = NULL;
+
+ LOCK(&fop->lock);
+
+ __ec_fop_set_error(fop, error);
+
+ if (--fop->jobs == 0)
+ {
+ resume = fop->resume;
+ fop->resume = NULL;
+ if (resume != NULL)
+ {
+ ec_trace("RESUME", fop, "error=%d", error);
+
+ if (fop->error != 0)
+ {
+ error = fop->error;
+ }
+ fop->error = 0;
+ }
+ }
+
+ UNLOCK(&fop->lock);
+
+ if (resume != NULL)
+ {
+ resume(fop, error);
+ }
+
+ ec_fop_data_release(fop);
+}
+
+void ec_resume_parent(ec_fop_data_t * fop, int32_t error)
+{
+ ec_fop_data_t * parent;
+
+ parent = fop->parent;
+ if (parent != NULL)
+ {
+ ec_trace("RESUME_PARENT", fop, "error=%u", error);
+ fop->parent = NULL;
+ ec_resume(parent, error);
+ }
+}
+
+gf_boolean_t
+ec_is_recoverable_error (int32_t op_errno)
+{
+ switch (op_errno) {
+ case ENOTCONN:
+ case ESTALE:
+ case ENOENT:
+ case EBADFD:/*Opened fd but brick is disconnected*/
+ case EIO:/*Backend-fs crash like XFS/ext4 etc*/
+ return _gf_true;
+ }
+ return _gf_false;
+}
+
+void ec_complete(ec_fop_data_t * fop)
+{
+ ec_cbk_data_t * cbk = NULL;
+ int32_t resume = 0, update = 0;
+ int healing_count = 0;
+
+ LOCK(&fop->lock);
+
+ ec_trace("COMPLETE", fop, "");
+
+ if (--fop->winds == 0) {
+ if (fop->answer == NULL) {
+ if (!list_empty(&fop->cbk_list)) {
+ cbk = list_entry(fop->cbk_list.next, ec_cbk_data_t, list);
+ healing_count = ec_bits_count (cbk->mask & fop->healing);
+ /* fop shouldn't be treated as success if it is not
+ * successful on at least fop->minimum good copies*/
+ if ((cbk->count - healing_count) >= fop->minimum) {
+ fop->answer = cbk;
+
+ update = 1;
+ }
+ }
+
+ resume = 1;
+ }
+ }
+
+ UNLOCK(&fop->lock);
+
+ /* ec_update_good() locks inode->lock. This may cause deadlocks with
+ fop->lock when used in another order. Since ec_update_good() will not
+ be called more than once for each fop, it can be called from outside
+ the fop->lock locked region. */
+ if (update) {
+ ec_update_good(fop, cbk->mask);
+ }
+
+ if (resume)
+ {
+ ec_resume(fop, 0);
+ }
+
+ ec_fop_data_release(fop);
+}
+
+/* There could be already granted locks sitting on the bricks, unlock for which
+ * must be wound at all costs*/
+static gf_boolean_t
+ec_must_wind (ec_fop_data_t *fop)
+{
+ if ((fop->id == GF_FOP_INODELK) || (fop->id == GF_FOP_FINODELK) ||
+ (fop->id == GF_FOP_LK)) {
+ if (fop->flock.l_type == F_UNLCK)
+ return _gf_true;
+ } else if ((fop->id == GF_FOP_ENTRYLK) ||
+ (fop->id == GF_FOP_FENTRYLK)) {
+ if (fop->entrylk_cmd == ENTRYLK_UNLOCK)
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
+static gf_boolean_t
+ec_internal_op (ec_fop_data_t *fop)
+{
+ if (ec_must_wind (fop))
+ return _gf_true;
+ if (fop->id == GF_FOP_XATTROP)
+ return _gf_true;
+ if (fop->id == GF_FOP_FXATTROP)
+ return _gf_true;
+ return _gf_false;
+}
+
+int32_t ec_child_select(ec_fop_data_t * fop)
+{
+ ec_t * ec = fop->xl->private;
+ int32_t first = 0, num = 0;
+
+ ec_fop_cleanup(fop);
+
+ fop->mask &= ec->node_mask;
+ /* Wind the fop on same subvols as parent for any internal extra fops like
+ * head/tail read in case of writev fop. Unlocks shouldn't do this because
+ * unlock should go on all subvols where lock is performed*/
+ if (fop->parent && !ec_internal_op (fop)) {
+ fop->mask &= (fop->parent->mask & ~fop->parent->healing);
+ }
+
+ if ((fop->mask & ~ec->xl_up) != 0)
+ {
+ gf_msg (fop->xl->name, GF_LOG_WARNING, 0,
+ EC_MSG_OP_EXEC_UNAVAIL,
+ "Executing operation with "
+ "some subvolumes unavailable "
+ "(%lX)", fop->mask & ~ec->xl_up);
+
+ fop->mask &= ec->xl_up;
+ }
+
+ switch (fop->minimum)
+ {
+ case EC_MINIMUM_ALL:
+ fop->minimum = ec_bits_count(fop->mask);
+ if (fop->minimum >= ec->fragments)
+ {
+ break;
+ }
+ case EC_MINIMUM_MIN:
+ fop->minimum = ec->fragments;
+ break;
+ case EC_MINIMUM_ONE:
+ fop->minimum = 1;
+ }
+
+ if (ec->read_policy == EC_ROUND_ROBIN) {
+ first = ec->idx;
+ if (++first >= ec->nodes) {
+ first = 0;
+ }
+ ec->idx = first;
+ }
+
+ /*Unconditionally wind on healing subvolumes*/
+ fop->mask |= fop->healing;
+ fop->remaining = fop->mask;
+ fop->received = 0;
+
+ ec_trace("SELECT", fop, "");
+
+ num = ec_bits_count(fop->mask);
+ if ((num < fop->minimum) && (num < ec->fragments))
+ {
+ gf_msg (ec->xl->name, GF_LOG_ERROR, 0,
+ EC_MSG_CHILDS_INSUFFICIENT,
+ "Insufficient available children "
+ "for this request (have %d, need "
+ "%d)", num, fop->minimum);
+
+ return 0;
+ }
+
+ ec_sleep(fop);
+
+ return 1;
+}
+
+int32_t ec_dispatch_next(ec_fop_data_t * fop, int32_t idx)
+{
+ ec_t * ec = fop->xl->private;
+
+ LOCK(&fop->lock);
+
+ idx = ec_child_next(ec, fop, idx);
+ if (idx >= 0)
+ {
+ fop->remaining ^= 1ULL << idx;
+
+ ec_trace("EXECUTE", fop, "idx=%d", idx);
+
+ fop->winds++;
+ fop->refs++;
+ }
+
+ UNLOCK(&fop->lock);
+
+ if (idx >= 0)
+ {
+ fop->wind(ec, fop, idx);
+ }
+
+ return idx;
+}
+
+void ec_dispatch_mask(ec_fop_data_t * fop, uintptr_t mask)
+{
+ ec_t * ec = fop->xl->private;
+ int32_t count, idx;
+
+ count = ec_bits_count(mask);
+
+ LOCK(&fop->lock);
+
+ ec_trace("EXECUTE", fop, "mask=%lX", mask);
+
+ fop->remaining ^= mask;
+
+ fop->winds += count;
+ fop->refs += count;
+
+ UNLOCK(&fop->lock);
+
+ idx = 0;
+ while (mask != 0)
+ {
+ if ((mask & 1) != 0)
+ {
+ fop->wind(ec, fop, idx);
+ }
+ idx++;
+ mask >>= 1;
+ }
+}
+
+void ec_dispatch_start(ec_fop_data_t * fop)
+{
+ fop->answer = NULL;
+ fop->good = 0;
+
+ INIT_LIST_HEAD(&fop->cbk_list);
+
+ if (fop->lock_count > 0)
+ {
+ ec_owner_copy(fop->frame, &fop->req_frame->root->lk_owner);
+ }
+}
+
+void ec_dispatch_one(ec_fop_data_t * fop)
+{
+ ec_dispatch_start(fop);
+
+ if (ec_child_select(fop))
+ {
+ fop->expected = 1;
+ fop->first = ec_select_first_by_read_policy (fop->xl->private, fop);
+
+ ec_dispatch_next(fop, fop->first);
+ }
+}
+
+gf_boolean_t
+ec_dispatch_one_retry(ec_fop_data_t *fop, ec_cbk_data_t **cbk)
+{
+ ec_cbk_data_t *tmp;
+
+ tmp = ec_fop_prepare_answer(fop, _gf_true);
+ if (cbk != NULL) {
+ *cbk = tmp;
+ }
+ if ((tmp != NULL) && (tmp->op_ret < 0) &&
+ ec_is_recoverable_error (tmp->op_errno)) {
+ GF_ASSERT (fop->mask & (1ULL << tmp->idx));
+ fop->mask ^= (1ULL << tmp->idx);
+ if (fop->mask) {
+ return _gf_true;
+ }
+ }
+
+ return _gf_false;
+}
+
+void ec_dispatch_inc(ec_fop_data_t * fop)
+{
+ ec_dispatch_start(fop);
+
+ if (ec_child_select(fop))
+ {
+ fop->expected = ec_bits_count(fop->remaining);
+ fop->first = 0;
+
+ ec_dispatch_next(fop, 0);
+ }
+}
+
+void
+ec_dispatch_all (ec_fop_data_t *fop)
+{
+ ec_dispatch_start(fop);
+
+ if (ec_child_select(fop)) {
+ fop->expected = ec_bits_count(fop->remaining);
+ fop->first = 0;
+
+ ec_dispatch_mask(fop, fop->remaining);
+ }
+}
+
+void ec_dispatch_min(ec_fop_data_t * fop)
+{
+ ec_t * ec = fop->xl->private;
+ uintptr_t mask;
+ int32_t idx, count;
+
+ ec_dispatch_start(fop);
+
+ if (ec_child_select(fop))
+ {
+ fop->expected = count = ec->fragments;
+ fop->first = ec_select_first_by_read_policy (fop->xl->private, fop);
+ idx = fop->first - 1;
+ mask = 0;
+ while (count-- > 0)
+ {
+ idx = ec_child_next(ec, fop, idx + 1);
+ mask |= 1ULL << idx;
+ }
+
+ ec_dispatch_mask(fop, mask);
+ }
+}
+
+ec_lock_t *ec_lock_allocate(ec_fop_data_t *fop, loc_t *loc)
+{
+ ec_t *ec = fop->xl->private;
+ ec_lock_t * lock;
+ int32_t err;
+
+ if ((loc->inode == NULL) ||
+ (gf_uuid_is_null(loc->gfid) && gf_uuid_is_null(loc->inode->gfid)))
+ {
+ gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_INVALID_INODE,
+ "Trying to lock based on an invalid "
+ "inode");
+
+ __ec_fop_set_error(fop, EINVAL);
+
+ return NULL;
+ }
+
+ lock = mem_get0(ec->lock_pool);
+ if (lock != NULL)
+ {
+ lock->good_mask = -1ULL;
+ INIT_LIST_HEAD(&lock->owners);
+ INIT_LIST_HEAD(&lock->waiting);
+ INIT_LIST_HEAD(&lock->frozen);
+ err = ec_loc_from_loc(fop->xl, &lock->loc, loc);
+ if (err != 0) {
+ mem_put(lock);
+ lock = NULL;
+
+ __ec_fop_set_error(fop, -err);
+ }
+ }
+
+ return lock;
+}
+
+void ec_lock_destroy(ec_lock_t * lock)
+{
+ loc_wipe(&lock->loc);
+ if (lock->fd != NULL) {
+ fd_unref(lock->fd);
+ }
+
+ mem_put(lock);
+}
+
+int32_t ec_lock_compare(ec_lock_t * lock1, ec_lock_t * lock2)
+{
+ return gf_uuid_compare(lock1->loc.gfid, lock2->loc.gfid);
+}
+
+void ec_lock_insert(ec_fop_data_t *fop, ec_lock_t *lock, uint32_t flags,
+ loc_t *base)
+{
+ ec_lock_link_t *link;
+
+ /* This check is only prepared for up to 2 locks per fop. If more locks
+ * are needed this must be changed. */
+ if ((fop->lock_count > 0) &&
+ (ec_lock_compare(fop->locks[0].lock, lock) < 0)) {
+ fop->first_lock = fop->lock_count;
+ } else {
+ /* When the first lock is added to the current fop, request lock
+ * counts from locks xlator to be able to determine if there is
+ * contention and release the lock sooner. */
+ if (fop->xdata == NULL) {
+ fop->xdata = dict_new();
+ if (fop->xdata == NULL) {
+ ec_fop_set_error(fop, ENOMEM);
+ return;
+ }
+ }
+ if (dict_set_str(fop->xdata, GLUSTERFS_INODELK_DOM_COUNT,
+ fop->xl->name) != 0) {
+ ec_fop_set_error(fop, ENOMEM);
+ return;
+ }
+ }
+
+ link = &fop->locks[fop->lock_count++];
+
+ link->lock = lock;
+ link->fop = fop;
+ link->update[EC_DATA_TXN] = (flags & EC_UPDATE_DATA) != 0;
+ link->update[EC_METADATA_TXN] = (flags & EC_UPDATE_META) != 0;
+ link->base = base;
+
+ lock->refs_pending++;
+}
+
+void ec_lock_prepare_inode_internal(ec_fop_data_t *fop, loc_t *loc,
+ uint32_t flags, loc_t *base)
+{
+ ec_lock_t *lock = NULL;
+ ec_inode_t *ctx;
+
+ if ((fop->parent != NULL) || (fop->error != 0) || (loc->inode == NULL)) {
+ return;
+ }
+
+ LOCK(&loc->inode->lock);
+
+ ctx = __ec_inode_get(loc->inode, fop->xl);
+ if (ctx == NULL) {
+ __ec_fop_set_error(fop, ENOMEM);
+
+ goto unlock;
+ }
+
+ if (ctx->inode_lock != NULL) {
+ lock = ctx->inode_lock;
+
+ /* If there's another lock, make sure that it's not the same. Otherwise
+ * do not insert it.
+ *
+ * This can only happen on renames where source and target names are
+ * in the same directory. */
+ if ((fop->lock_count > 0) && (fop->locks[0].lock == lock)) {
+ /* Combine data/meta updates */
+ fop->locks[0].update[EC_DATA_TXN] |= (flags & EC_UPDATE_DATA) != 0;
+ fop->locks[0].update[EC_METADATA_TXN] |=
+ (flags & EC_UPDATE_META) != 0;
+
+ /* Only one base inode is allowed per fop, so there shouldn't be
+ * overwrites here. */
+ if (base != NULL) {
+ fop->locks[0].base = base;
+ }
+
+ goto update_query;
+ }
+
+ ec_trace("LOCK_INODELK", fop, "lock=%p, inode=%p. Lock already "
+ "acquired", lock, loc->inode);
+
+ goto insert;
+ }
+
+ lock = ec_lock_allocate(fop, loc);
+ if (lock == NULL) {
+ goto unlock;
+ }
+
+ ec_trace("LOCK_CREATE", fop, "lock=%p", lock);
+
+ lock->flock.l_type = F_WRLCK;
+ lock->flock.l_whence = SEEK_SET;
+
+ lock->ctx = ctx;
+ ctx->inode_lock = lock;
+
+insert:
+ ec_lock_insert(fop, lock, flags, base);
+update_query:
+ lock->query |= (flags & EC_QUERY_INFO) != 0;
+unlock:
+ UNLOCK(&loc->inode->lock);
+}
+
+void ec_lock_prepare_inode(ec_fop_data_t *fop, loc_t *loc, uint32_t flags)
+{
+ ec_lock_prepare_inode_internal(fop, loc, flags, NULL);
+}
+
+void ec_lock_prepare_parent_inode(ec_fop_data_t *fop, loc_t *loc,
+ uint32_t flags)
+{
+ loc_t tmp, *base = NULL;
+ int32_t err;
+
+ if (fop->error != 0) {
+ return;
+ }
+
+ err = ec_loc_parent(fop->xl, loc, &tmp);
+ if (err != 0) {
+ ec_fop_set_error(fop, -err);
+
+ return;
+ }
+
+ if ((flags & EC_INODE_SIZE) != 0) {
+ base = loc;
+ flags ^= EC_INODE_SIZE;
+ }
+
+ ec_lock_prepare_inode_internal(fop, &tmp, flags, base);
+
+ loc_wipe(&tmp);
+}
+
+void ec_lock_prepare_fd(ec_fop_data_t *fop, fd_t *fd, uint32_t flags)
+{
+ loc_t loc;
+ int32_t err;
+
+ if (fop->error != 0) {
+ return;
+ }
+
+ err = ec_loc_from_fd(fop->xl, &loc, fd);
+ if (err != 0) {
+ ec_fop_set_error(fop, -err);
+
+ return;
+ }
+
+ ec_lock_prepare_inode_internal(fop, &loc, flags, NULL);
+
+ loc_wipe(&loc);
+}
+
+gf_boolean_t
+ec_config_check (ec_fop_data_t *fop, ec_config_t *config)
+{
+ ec_t *ec;
+
+ ec = fop->xl->private;
+ if ((config->version != EC_CONFIG_VERSION) ||
+ (config->algorithm != EC_CONFIG_ALGORITHM) ||
+ (config->gf_word_size != EC_GF_BITS) ||
+ (config->bricks != ec->nodes) ||
+ (config->redundancy != ec->redundancy) ||
+ (config->chunk_size != EC_METHOD_CHUNK_SIZE)) {
+ uint32_t data_bricks;
+
+ /* This combination of version/algorithm requires the following
+ values. Incorrect values for these fields are a sign of
+ corruption:
+
+ redundancy > 0
+ redundancy * 2 < bricks
+ gf_word_size must be a power of 2
+ chunk_size (in bits) must be a multiple of gf_word_size *
+ (bricks - redundancy) */
+
+ data_bricks = config->bricks - config->redundancy;
+ if ((config->redundancy < 1) ||
+ (config->redundancy * 2 >= config->bricks) ||
+ !ec_is_power_of_2(config->gf_word_size) ||
+ ((config->chunk_size * 8) % (config->gf_word_size * data_bricks)
+ != 0)) {
+ gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_INVALID_CONFIG,
+ "Invalid or corrupted config");
+ } else {
+ gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_INVALID_CONFIG,
+ "Unsupported config "
+ "(V=%u, A=%u, W=%u, "
+ "N=%u, R=%u, S=%u)",
+ config->version, config->algorithm,
+ config->gf_word_size, config->bricks,
+ config->redundancy, config->chunk_size);
+ }
+
+ return _gf_false;
+ }
+
+ return _gf_true;
+}
+
+int32_t
+ec_prepare_update_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ struct list_head list;
+ ec_fop_data_t *fop = cookie, *parent, *tmp;
+ ec_lock_link_t *link = fop->data;
+ ec_lock_t *lock = NULL;
+ ec_inode_t *ctx;
+
+ lock = link->lock;
+ parent = link->fop;
+ ctx = lock->ctx;
+
+ INIT_LIST_HEAD(&list);
+
+ LOCK(&lock->loc.inode->lock);
+
+ list_for_each_entry(link, &lock->owners, owner_list) {
+ if ((link->fop->flags & EC_FLAG_WAITING_SIZE) != 0) {
+ link->fop->flags ^= EC_FLAG_WAITING_SIZE;
+
+ list_add_tail(&link->fop->cbk_list, &list);
+ }
+ }
+
+ if (op_ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, op_errno,
+ EC_MSG_SIZE_VERS_GET_FAIL,
+ "Failed to get size and version");
+
+ goto unlock;
+ }
+
+ op_errno = -ec_dict_del_array(dict, EC_XATTR_VERSION, ctx->pre_version,
+ EC_VERSION_SIZE);
+ if (op_errno != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ EC_MSG_VER_XATTR_GET_FAIL,
+ "Unable to get version xattr");
+
+ goto unlock;
+ }
+ ctx->post_version[0] += ctx->pre_version[0];
+ ctx->post_version[1] += ctx->pre_version[1];
+
+ ctx->have_version = _gf_true;
+
+ if (lock->loc.inode->ia_type == IA_IFREG ||
+ lock->loc.inode->ia_type == IA_INVAL) {
+ op_errno = -ec_dict_del_number(dict, EC_XATTR_SIZE, &ctx->pre_size);
+ if (op_errno != 0) {
+ if (lock->loc.inode->ia_type == IA_IFREG) {
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ EC_MSG_SIZE_XATTR_GET_FAIL,
+ "Unable to get size xattr");
+
+ goto unlock;
+ }
+ } else {
+ ctx->post_size = ctx->pre_size;
+
+ ctx->have_size = _gf_true;
+ }
+
+ op_errno = -ec_dict_del_config(dict, EC_XATTR_CONFIG, &ctx->config);
+ if (op_errno != 0) {
+ if ((lock->loc.inode->ia_type == IA_IFREG) ||
+ (op_errno != ENODATA)) {
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ EC_MSG_CONFIG_XATTR_GET_FAIL,
+ "Unable to get config xattr");
+
+ goto unlock;
+ }
+ } else {
+ if (!ec_config_check(parent, &ctx->config)) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_CONFIG_XATTR_INVALID,
+ "Invalid config xattr");
+
+ op_errno = EINVAL;
+
+ goto unlock;
+ }
+
+ ctx->have_config = _gf_true;
+ }
+ }
+
+ ctx->have_info = _gf_true;
+
+ op_errno = 0;
+
+unlock:
+ lock->getting_size = _gf_false;
+
+ UNLOCK(&lock->loc.inode->lock);
+
+ if (op_errno == 0) {
+ /* We don't allow the main fop to be executed on bricks that have not
+ * succeeded the initial xattrop. */
+ parent->mask &= fop->good;
+ ec_lock_update_good (lock, fop);
+
+ /*As of now only data healing marks bricks as healing*/
+ lock->healing |= fop->healing;
+ if (ec_is_data_fop (parent->id)) {
+ parent->healing |= fop->healing;
+ }
+ } else {
+ ec_fop_set_error(parent, op_errno);
+ }
+
+ while (!list_empty(&list)) {
+ tmp = list_entry(list.next, ec_fop_data_t, cbk_list);
+ list_del_init(&tmp->cbk_list);
+
+ if (op_errno == 0) {
+ tmp->mask &= fop->good;
+
+ /*As of now only data healing marks bricks as healing*/
+ if (ec_is_data_fop (tmp->id)) {
+ tmp->healing |= fop->healing;
+ }
+ } else {
+ ec_fop_set_error(tmp, op_errno);
+ }
+
+ ec_resume(tmp, 0);
+ }
+
+ return 0;
+}
+
+void ec_get_size_version(ec_lock_link_t *link)
+{
+ loc_t loc;
+ ec_lock_t *lock;
+ ec_inode_t *ctx;
+ ec_fop_data_t *fop;
+ dict_t *dict = NULL;
+ int32_t error = -ENOMEM;
+ gf_boolean_t getting_size;
+ uint64_t allzero[EC_VERSION_SIZE] = {0, 0};
+
+ lock = link->lock;
+ ctx = lock->ctx;
+ fop = link->fop;
+
+ /* If ec metadata has already been retrieved, do not try again. */
+ if (ctx->have_info) {
+ if (ec_is_data_fop (fop->id)) {
+ fop->healing |= lock->healing;
+ }
+ return;
+ }
+
+ /* Determine if there's something we need to retrieve for the current
+ * operation. */
+ if (!lock->query &&
+ (lock->loc.inode->ia_type != IA_IFREG) &&
+ (lock->loc.inode->ia_type != IA_INVAL)) {
+ return;
+ }
+
+ memset(&loc, 0, sizeof(loc));
+
+ LOCK(&lock->loc.inode->lock);
+
+ getting_size = lock->getting_size;
+ lock->getting_size = _gf_true;
+ if (getting_size) {
+ fop->flags |= EC_FLAG_WAITING_SIZE;
+
+ ec_sleep(fop);
+ }
+
+ UNLOCK(&lock->loc.inode->lock);
+
+ if (getting_size) {
+ error = 0;
+
+ goto out;
+ }
+
+ dict = dict_new();
+ if (dict == NULL) {
+ goto out;
+ }
+
+ /* Once we know that an xattrop will be needed, we try to get all available
+ * information in a single call. */
+ error = ec_dict_set_array(dict, EC_XATTR_VERSION, allzero,
+ EC_VERSION_SIZE);
+ if (error == 0) {
+ error = ec_dict_set_array(dict, EC_XATTR_DIRTY, allzero,
+ EC_VERSION_SIZE);
+ }
+ if (error != 0) {
+ goto out;
+ }
+
+ if (lock->loc.inode->ia_type == IA_IFREG ||
+ lock->loc.inode->ia_type == IA_INVAL) {
+ error = ec_dict_set_number(dict, EC_XATTR_SIZE, 0);
+ if (error == 0) {
+ error = ec_dict_set_number(dict, EC_XATTR_CONFIG, 0);
+ }
+ if (error != 0) {
+ goto out;
+ }
+ }
+
+ fop->frame->root->uid = 0;
+ fop->frame->root->gid = 0;
+
+ /* For normal fops, ec_[f]xattrop() must succeed on at least
+ * EC_MINIMUM_MIN bricks, however when this is called as part of a
+ * self-heal operation the mask of target bricks (fop->mask) could
+ * contain less than EC_MINIMUM_MIN bricks, causing the xattrop to
+ * always fail. Thus we always use the same minimum used for the main
+ * fop.
+ */
+ if (lock->fd == NULL) {
+ error = ec_loc_from_loc(fop->xl, &loc, &lock->loc);
+ if (error != 0) {
+ goto out;
+ }
+ if (gf_uuid_is_null(loc.pargfid)) {
+ if (loc.parent != NULL) {
+ inode_unref(loc.parent);
+ loc.parent = NULL;
+ }
+ GF_FREE((char *)loc.path);
+ loc.path = NULL;
+ loc.name = NULL;
+ }
+
+ ec_xattrop (fop->frame, fop->xl, fop->mask, fop->minimum,
+ ec_prepare_update_cbk, link, &loc,
+ GF_XATTROP_ADD_ARRAY64, dict, NULL);
+ } else {
+ ec_fxattrop(fop->frame, fop->xl, fop->mask, fop->minimum,
+ ec_prepare_update_cbk, link, lock->fd,
+ GF_XATTROP_ADD_ARRAY64, dict, NULL);
+ }
+
+ error = 0;
+
+out:
+ fop->frame->root->uid = fop->uid;
+ fop->frame->root->gid = fop->gid;
+
+ loc_wipe(&loc);
+
+ if (dict != NULL) {
+ dict_unref(dict);
+ }
+
+ if (error != 0) {
+ ec_fop_set_error(fop, -error);
+ }
+}
+
+gf_boolean_t ec_get_inode_size(ec_fop_data_t *fop, inode_t *inode,
+ uint64_t *size)
+{
+ ec_inode_t *ctx;
+ gf_boolean_t found = _gf_false;
+
+ LOCK(&inode->lock);
+
+ ctx = __ec_inode_get(inode, fop->xl);
+ if (ctx == NULL) {
+ goto unlock;
+ }
+
+ if (ctx->have_size) {
+ *size = ctx->post_size;
+ found = _gf_true;
+ }
+
+unlock:
+ UNLOCK(&inode->lock);
+
+ return found;
+}
+
+gf_boolean_t ec_set_inode_size(ec_fop_data_t *fop, inode_t *inode,
+ uint64_t size)
+{
+ ec_inode_t *ctx;
+ gf_boolean_t found = _gf_false;
+
+ LOCK(&inode->lock);
+
+ ctx = __ec_inode_get(inode, fop->xl);
+ if (ctx == NULL) {
+ goto unlock;
+ }
+
+ /* Normal fops always have ctx->have_size set. However self-heal calls this
+ * to prepare the inode, so ctx->have_size will be false. In this case we
+ * prepare both pre_size and post_size, and set have_size and have_info to
+ * true. */
+ if (!ctx->have_size) {
+ ctx->pre_size = size;
+ ctx->have_size = ctx->have_info = _gf_true;
+ }
+ ctx->post_size = size;
+
+ found = _gf_true;
+
+unlock:
+ UNLOCK(&inode->lock);
+
+ return found;
+}
+
+void ec_clear_inode_info(ec_fop_data_t *fop, inode_t *inode)
+{
+ ec_inode_t *ctx;
+
+ LOCK(&inode->lock);
+
+ ctx = __ec_inode_get(inode, fop->xl);
+ if (ctx == NULL) {
+ goto unlock;
+ }
+
+ ctx->have_info = _gf_false;
+ ctx->have_config = _gf_false;
+ ctx->have_version = _gf_false;
+ ctx->have_size = _gf_false;
+
+ memset(&ctx->config, 0, sizeof(ctx->config));
+ memset(ctx->pre_version, 0, sizeof(ctx->pre_version));
+ memset(ctx->post_version, 0, sizeof(ctx->post_version));
+ ctx->pre_size = ctx->post_size = 0;
+ memset(ctx->dirty, 0, sizeof(ctx->dirty));
+
+unlock:
+ UNLOCK(&inode->lock);
+}
+
+int32_t ec_get_real_size_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ ec_fop_data_t *fop = cookie;
+ ec_lock_link_t *link;
+
+ if (op_ret >= 0) {
+ link = fop->data;
+ link->size = buf->ia_size;
+ } else {
+ /* Prevent failure of parent fop. */
+ fop->error = 0;
+ }
+
+ return 0;
+}
+
+/* This function is used to get the trusted.ec.size xattr from a file when
+ * no lock is needed on the inode. This is only required to maintan iatt
+ * structs on fops that manipulate directory entries but do not operate
+ * directly on the inode, like link, rename, ...
+ *
+ * Any error processing this request is ignored. In the worst case, an invalid
+ * or not up to date value in the iatt could cause some cache invalidation.
+ */
+void ec_get_real_size(ec_lock_link_t *link)
+{
+ ec_fop_data_t *fop;
+ dict_t *xdata;
+
+ if (link->base == NULL || link->base->inode == NULL) {
+ return;
+ }
+
+ if (link->base->inode->ia_type != IA_IFREG) {
+ return;
+ }
+
+ fop = link->fop;
+
+ if (ec_get_inode_size(fop, link->base->inode, &link->size)) {
+ return;
+ }
+
+ xdata = dict_new();
+ if (xdata == NULL) {
+ return;
+ }
+ if (ec_dict_set_number(xdata, EC_XATTR_SIZE, 0) != 0) {
+ goto out;
+ }
+
+ /* Send a simple lookup. A single answer is considered ok since this value
+ * is only used to return an iatt struct related to an inode that is not
+ * locked and have not suffered any operation. */
+ ec_lookup(fop->frame, fop->xl, fop->mask, 1, ec_get_real_size_cbk, link,
+ link->base, xdata);
+
+out:
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
+}
+
+static void
+ec_lock_update_fd(ec_lock_t *lock, ec_fop_data_t *fop)
+{
+ /* If the fop has an fd available, attach it to the lock structure to be
+ * able to do fxattrop calls instead of xattrop. */
+ if (fop->use_fd && (lock->fd == NULL)) {
+ lock->fd = __fd_ref(fop->fd);
+ }
+}
+
+static void
+ec_lock_wake_shared(ec_lock_t *lock, struct list_head *list)
+{
+ ec_fop_data_t *fop;
+ ec_lock_link_t *link;
+ gf_boolean_t exclusive = _gf_false;
+
+ while (!exclusive && !list_empty(&lock->waiting)) {
+ link = list_entry(lock->waiting.next, ec_lock_link_t, wait_list);
+ fop = link->fop;
+
+ /* If lock is not acquired, at most one fop can be assigned as owner.
+ * The following fops will need to wait in the lock->waiting queue
+ * until the lock has been fully acquired. */
+ exclusive = !lock->acquired;
+
+ /* If the fop is not shareable, only this fop can be assigned as owner.
+ * Other fops will need to wait until this one finishes. */
+ if ((fop->flags & EC_FLAG_LOCK_SHARED) == 0) {
+ exclusive = _gf_true;
+
+ /* Avoid other requests to be assigned as owners. */
+ lock->exclusive = 1;
+ }
+
+ /* If only one fop is allowed, it can be assigned as the owner of the
+ * lock only if there weren't any other owner. */
+ if (exclusive && !list_empty(&lock->owners)) {
+ break;
+ }
+
+ list_move_tail(&link->wait_list, list);
+
+ list_add_tail(&link->owner_list, &lock->owners);
+ lock->refs_owners++;
+
+ ec_lock_update_fd(lock, fop);
+ }
+}
+
+static void
+ec_lock_apply(ec_lock_link_t *link)
+{
+ ec_fop_data_t *fop = link->fop;
+
+ fop->mask &= link->lock->good_mask;
+ fop->locked++;
+
+ ec_get_size_version(link);
+ ec_get_real_size(link);
+}
+
+gf_boolean_t ec_lock_acquire(ec_lock_link_t *link);
+
+static void
+ec_lock_resume_shared(struct list_head *list)
+{
+ ec_lock_link_t *link;
+
+ while (!list_empty(list)) {
+ link = list_entry(list->next, ec_lock_link_t, wait_list);
+ list_del_init(&link->wait_list);
+
+ if (link->lock->acquired) {
+ ec_lock_apply(link);
+ ec_lock(link->fop);
+ } else {
+ GF_ASSERT(list_empty(list));
+
+ ec_lock_acquire(link);
+ }
+
+ ec_resume(link->fop, 0);
+ }
+}
+
+void ec_lock_acquired(ec_lock_link_t *link)
+{
+ struct list_head list;
+ ec_lock_t *lock;
+ ec_fop_data_t *fop;
+
+ lock = link->lock;
+ fop = link->fop;
+
+ ec_trace("LOCKED", fop, "lock=%p", lock);
+
+ INIT_LIST_HEAD(&list);
+
+ LOCK(&lock->loc.inode->lock);
+
+ lock->acquired = _gf_true;
+
+ ec_lock_update_fd(lock, fop);
+ if ((fop->flags & EC_FLAG_LOCK_SHARED) != 0) {
+ ec_lock_wake_shared(lock, &list);
+ }
+
+ UNLOCK(&lock->loc.inode->lock);
+
+ ec_lock_apply(link);
+
+ ec_lock_resume_shared(&list);
+}
+
+int32_t ec_locked(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ ec_fop_data_t *fop = cookie;
+ ec_lock_link_t *link = NULL;
+ ec_lock_t *lock = NULL;
+
+ if (op_ret >= 0) {
+ link = fop->data;
+ lock = link->lock;
+ lock->mask = lock->good_mask = fop->good;
+ lock->healing = 0;
+
+ ec_lock_acquired(link);
+ ec_lock(fop->parent);
+ } else {
+ gf_msg (this->name, GF_LOG_WARNING, op_errno,
+ EC_MSG_PREOP_LOCK_FAILED,
+ "Failed to complete preop lock");
+ }
+
+ return 0;
+}
+
+gf_boolean_t ec_lock_acquire(ec_lock_link_t *link)
+{
+ ec_lock_t *lock;
+ ec_fop_data_t *fop;
+
+ lock = link->lock;
+ fop = link->fop;
+
+ if (!lock->acquired) {
+ ec_owner_set(fop->frame, lock);
+
+ ec_trace("LOCK_ACQUIRE", fop, "lock=%p, inode=%p", lock,
+ lock->loc.inode);
+
+ lock->flock.l_type = F_WRLCK;
+ ec_inodelk(fop->frame, fop->xl, -1, EC_MINIMUM_ALL, ec_locked,
+ link, fop->xl->name, &lock->loc, F_SETLKW, &lock->flock,
+ NULL);
+
+ return _gf_false;
+ }
+
+ ec_trace("LOCK_REUSE", fop, "lock=%p", lock);
+
+ ec_lock_acquired(link);
+
+ return _gf_true;
+}
+
+static gf_boolean_t
+ec_lock_assign_owner(ec_lock_link_t *link)
+{
+ ec_fop_data_t *fop;
+ ec_lock_t *lock;
+ ec_lock_link_t *timer_link = NULL;
+ gf_boolean_t assigned = _gf_false;
+
+ /* The link cannot be in any list because we have just finished preparing
+ * it. */
+ GF_ASSERT(list_empty(&link->wait_list));
+
+ fop = link->fop;
+ lock = link->lock;
+
+ LOCK(&lock->loc.inode->lock);
+
+ /* Since the link has just been prepared but it's not active yet, the
+ * refs_pending must be one at least (the ref owned by this link). */
+ GF_ASSERT (lock->refs_pending > 0);
+ /* The link is not pending any more. It will be assigned to the owner,
+ * waiting or frozen list. */
+ lock->refs_pending--;
+
+ if (lock->release) {
+ ec_trace("LOCK_QUEUE_FREEZE", fop, "lock=%p", lock);
+
+ /* When lock->release is set, we'll unlock the lock as soon as
+ * possible, meaning that we won't use a timer. */
+ GF_ASSERT(lock->timer == NULL);
+
+ /* The lock is marked to be released. We can still have owners and fops
+ * in the waiting ilist f they have been added before the lock has been
+ * marked to be released. However new fops are put into the frozen list
+ * to wait for the next unlock/lock cycle. */
+ list_add_tail(&link->wait_list, &lock->frozen);
+
+ goto unlock;
+ }
+
+ /* The lock is not marked to be released, so the frozen list should be
+ * empty. */
+ GF_ASSERT(list_empty(&lock->frozen));
+
+ if (lock->timer != NULL) {
+ /* We are trying to acquire a lock that has an unlock timer active.
+ * This means that the lock must be idle, i.e. no fop can be in the
+ * owner, waiting or frozen lists. It also means that the lock cannot
+ * have been marked as being released (this is done without timers)
+ * and it must not be exclusive. There should only be one owner
+ * reference, but it's possible that some fops are being prepared to
+ * use this lock. */
+ GF_ASSERT ((lock->exclusive == 0) && (lock->refs_owners == 1) &&
+ list_empty(&lock->owners) && list_empty(&lock->waiting));
+
+ /* We take the timer_link before cancelling the timer, since a
+ * successful cancellation will destroy it. It must not be NULL
+ * because it references the fop responsible for the delayed unlock
+ * that we are currently trying to cancel. */
+ timer_link = lock->timer->data;
+ GF_ASSERT(timer_link != NULL);
+
+ if (gf_timer_call_cancel(fop->xl->ctx, lock->timer) < 0) {
+ /* It's too late to avoid the execution of the timer callback.
+ * Since we need to be sure that the callback has access to all
+ * needed resources, we cannot resume the execution of the timer
+ * fop now. This will be done in the callback.
+ */
+ timer_link = NULL;
+ } else {
+ /* The timer has been cancelled, so we need to release the owner
+ * reference that was held by the fop waiting for the timer. This
+ * can be the last reference, but we'll immediately increment it
+ * for the current fop, so no need to check it.
+ */
+ lock->refs_owners--;
+
+ ec_trace("UNLOCK_CANCELLED", timer_link->fop, "lock=%p", lock);
+ }
+
+ /* We have two options here:
+ *
+ * 1. The timer has been successfully cancelled.
+ *
+ * This is the easiest case and we can continue with the currently
+ * acquired lock.
+ *
+ * 2. The timer callback has already been fired.
+ *
+ * In this case we have not been able to cancel the timer before
+ * the timer callback has been fired, but we also know that
+ * lock->timer != NULL. This means that the timer callback is still
+ * trying to acquire the inode mutex that we currently own. We are
+ * safe until we release it. In this case we can safely clear
+ * lock->timer. This will cause that the timer callback does nothing
+ * once it acquires the mutex.
+ */
+ lock->timer = NULL;
+ }
+
+ lock->exclusive |= (fop->flags & EC_FLAG_LOCK_SHARED) == 0;
+
+ if (!list_empty(&lock->owners)) {
+ /* There are other owners of this lock. We can only take ownership if
+ * the lock is already acquired and can be shared. Otherwise we need
+ * to wait. */
+ if (!lock->acquired || (lock->exclusive != 0)) {
+ ec_trace("LOCK_QUEUE_WAIT", fop, "lock=%p", lock);
+
+ list_add_tail(&link->wait_list, &lock->waiting);
+
+ goto unlock;
+ }
+ }
+
+ list_add_tail(&link->owner_list, &lock->owners);
+ lock->refs_owners++;
+
+ assigned = _gf_true;
+
+unlock:
+ if (!assigned) {
+ /* We have not been able to take ownership of this lock. The fop must
+ * be put to sleep. */
+ ec_sleep(fop);
+ }
+
+ UNLOCK(&lock->loc.inode->lock);
+
+ /* If we have cancelled the timer, we need to resume the fop that was
+ * waiting for it. */
+ if (timer_link != NULL) {
+ ec_resume(timer_link->fop, 0);
+ }
+
+ return assigned;
+}
+
+static void
+ec_lock_next_owner(ec_lock_link_t *link, ec_cbk_data_t *cbk,
+ gf_boolean_t release)
+{
+ struct list_head list;
+ ec_lock_t *lock = link->lock;
+ ec_fop_data_t *fop = link->fop;
+ ec_inode_t *ctx = lock->ctx;
+ ec_t *ec = fop->xl->private;
+
+ INIT_LIST_HEAD(&list);
+
+ LOCK(&lock->loc.inode->lock);
+
+ ec_trace("LOCK_DONE", fop, "lock=%p", lock);
+
+ /* Current link must belong to the owner list of the lock. We don't
+ * decrement lock->refs_owners here because the inode mutex is released
+ * before ec_unlock() is called and we need to know when the last owner
+ * unlocks the lock to do proper cleanup. lock->refs_owners is used for
+ * this task. */
+ GF_ASSERT((lock->refs_owners > 0) && !list_empty(&link->owner_list));
+ list_del_init(&link->owner_list);
+
+ lock->release |= release;
+
+ if ((fop->error == 0) && (cbk != NULL) && (cbk->op_ret >= 0)) {
+ if (link->update[0]) {
+ ctx->post_version[0]++;
+ if (ec->node_mask & ~fop->good) {
+ ctx->dirty[0]++;
+ }
+ }
+ if (link->update[1]) {
+ ctx->post_version[1]++;
+ if (ec->node_mask & ~fop->good) {
+ ctx->dirty[1]++;
+ }
+ }
+ }
+
+ ec_lock_update_good(lock, fop);
+
+ lock->exclusive -= (fop->flags & EC_FLAG_LOCK_SHARED) == 0;
+ if (list_empty(&lock->owners)) {
+ ec_lock_wake_shared(lock, &list);
+ }
+
+ UNLOCK(&lock->loc.inode->lock);
+
+ ec_lock_resume_shared(&list);
+}
+
+void ec_lock(ec_fop_data_t *fop)
+{
+ ec_lock_link_t *link;
+
+ /* There is a chance that ec_resume is called on fop even before ec_sleep.
+ * Which can result in refs == 0 for fop leading to use after free in this
+ * function when it calls ec_sleep so do ec_sleep at start and ec_resume at
+ * the end of this function.*/
+ ec_sleep (fop);
+
+ while (fop->locked < fop->lock_count) {
+ /* Since there are only up to 2 locks per fop, this xor will change
+ * the order of the locks if fop->first_lock is 1. */
+ link = &fop->locks[fop->locked ^ fop->first_lock];
+
+ if (!ec_lock_assign_owner(link) || !ec_lock_acquire(link)) {
+ break;
+ }
+ }
+
+ ec_resume(fop, 0);
+}
+
+void
+ec_lock_unfreeze(ec_lock_link_t *link)
+{
+ struct list_head list;
+ ec_lock_t *lock;
+ gf_boolean_t destroy = _gf_false;
+
+ lock = link->lock;
+
+ INIT_LIST_HEAD(&list);
+
+ LOCK(&lock->loc.inode->lock);
+
+ /* The lock must be marked to be released here, since we have just released
+ * it and any attempt to assign it to more fops must have added them to the
+ * frozen list. We can only have one active reference here: the one that
+ * is processing this unfreeze. */
+ GF_ASSERT(lock->release && (lock->refs_owners == 1));
+ lock->release = _gf_false;
+ lock->refs_owners = 0;
+
+ lock->acquired = _gf_false;
+
+ /* We are unfreezing a lock. This means that the lock has already been
+ * released. In this state it shouldn't be exclusive nor have a pending
+ * timer nor have any owner, and the waiting list should be empty. Only
+ * the frozen list can contain some fop. */
+ GF_ASSERT((lock->exclusive == 0) && (lock->timer == NULL) &&
+ list_empty(&lock->waiting) && list_empty(&lock->owners));
+
+ /* We move all frozen fops to the waiting list. */
+ list_splice_init(&lock->frozen, &lock->waiting);
+
+ /* If we don't have any fop waiting nor there are any prepared fops using
+ * this lock, we can finally dispose it. */
+ destroy = list_empty(&lock->waiting) && (lock->refs_pending == 0);
+ if (destroy) {
+ ec_trace("LOCK_DESTROY", link->fop, "lock=%p", lock);
+
+ lock->ctx->inode_lock = NULL;
+ } else {
+ ec_trace("LOCK_UNFREEZE", link->fop, "lock=%p", lock);
+
+ ec_lock_wake_shared(lock, &list);
+ }
+
+ UNLOCK(&lock->loc.inode->lock);
+
+ ec_lock_resume_shared(&list);
+
+ if (destroy) {
+ ec_lock_destroy(lock);
+ }
+}
+
+int32_t ec_unlocked(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ ec_fop_data_t *fop = cookie;
+ ec_lock_link_t *link = fop->data;
+
+ if (op_ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, op_errno,
+ EC_MSG_UNLOCK_FAILED,
+ "entry/inode unlocking failed (%s)",
+ ec_fop_name(link->fop->id));
+ } else {
+ ec_trace("UNLOCKED", link->fop, "lock=%p", link->lock);
+ }
+
+ ec_lock_unfreeze(link);
+
+ return 0;
+}
+
+void ec_unlock_lock(ec_lock_link_t *link)
+{
+ ec_lock_t *lock;
+ ec_fop_data_t *fop;
+
+ lock = link->lock;
+ fop = link->fop;
+
+ ec_clear_inode_info(fop, lock->loc.inode);
+
+ if ((lock->mask != 0) && lock->acquired) {
+ ec_owner_set(fop->frame, lock);
+
+ lock->flock.l_type = F_UNLCK;
+ ec_trace("UNLOCK_INODELK", fop, "lock=%p, inode=%p", lock,
+ lock->loc.inode);
+
+ ec_inodelk(fop->frame, fop->xl, lock->mask, EC_MINIMUM_ONE,
+ ec_unlocked, link, fop->xl->name, &lock->loc, F_SETLK,
+ &lock->flock, NULL);
+ } else {
+ ec_lock_unfreeze(link);
+ }
+}
+
+int32_t ec_update_size_version_done(call_frame_t * frame, void * cookie,
+ xlator_t * this, int32_t op_ret,
+ int32_t op_errno, dict_t * xattr,
+ dict_t * xdata)
+{
+ ec_fop_data_t *fop = cookie;
+ ec_lock_link_t *link;
+ ec_lock_t *lock;
+ ec_inode_t *ctx;
+
+ if (op_ret < 0) {
+ gf_msg(fop->xl->name, fop_log_level (fop->id, op_errno), op_errno,
+ EC_MSG_SIZE_VERS_UPDATE_FAIL,
+ "Failed to update version and size");
+ } else {
+ fop->parent->good &= fop->good;
+ link = fop->data;
+ lock = link->lock;
+ ctx = lock->ctx;
+
+ ec_lock_update_good(lock, fop);
+
+ if (ec_dict_del_array(xattr, EC_XATTR_VERSION, ctx->post_version,
+ EC_VERSION_SIZE) == 0) {
+ ctx->pre_version[0] = ctx->post_version[0];
+ ctx->pre_version[1] = ctx->post_version[1];
+
+ ctx->have_version = _gf_true;
+ }
+ if (ec_dict_del_number(xattr, EC_XATTR_SIZE, &ctx->post_size) == 0) {
+ ctx->pre_size = ctx->post_size;
+
+ ctx->have_size = _gf_true;
+ }
+ if ((ec_dict_del_config(xdata, EC_XATTR_CONFIG, &ctx->config) == 0) &&
+ ec_config_check(fop->parent, &ctx->config)) {
+ ctx->have_config = _gf_true;
+ }
+
+ ctx->have_info = _gf_true;
+ }
+
+ if ((fop->parent->id != GF_FOP_FLUSH) &&
+ (fop->parent->id != GF_FOP_FSYNC) &&
+ (fop->parent->id != GF_FOP_FSYNCDIR)) {
+ ec_unlock_lock(fop->data);
+ }
+
+ return 0;
+}
+
+void
+ec_update_size_version(ec_lock_link_t *link, uint64_t *version,
+ uint64_t size, uint64_t *dirty)
+{
+ ec_fop_data_t *fop;
+ ec_lock_t *lock;
+ ec_inode_t *ctx;
+ dict_t * dict;
+ int32_t err = -ENOMEM;
+
+ fop = link->fop;
+
+ ec_trace("UPDATE", fop, "version=%ld/%ld, size=%ld, dirty=%ld/%ld",
+ version[0], version[1], size, dirty[0], dirty[1]);
+
+ dict = dict_new();
+ if (dict == NULL) {
+ goto out;
+ }
+
+ lock = link->lock;
+ ctx = lock->ctx;
+
+ /* If we don't have version information or it has been modified, we
+ * update it. */
+ if (!ctx->have_version || (version[0] != 0) || (version[1] != 0)) {
+ err = ec_dict_set_array(dict, EC_XATTR_VERSION, version,
+ EC_VERSION_SIZE);
+ if (err != 0) {
+ goto out;
+ }
+ }
+
+ if (size != 0) {
+ /* If size has been changed, we should already know the previous size
+ * of the file. */
+ GF_ASSERT(ctx->have_size);
+
+ err = ec_dict_set_number(dict, EC_XATTR_SIZE, size);
+ if (err != 0) {
+ goto out;
+ }
+ }
+
+ /* If we don't have dirty information or it has been modified, we update
+ * it. */
+ if ((dirty[0] != 0) || (dirty[1] != 0)) {
+ err = ec_dict_set_array(dict, EC_XATTR_DIRTY, dirty, EC_VERSION_SIZE);
+ if (err != 0) {
+ goto out;
+ }
+ }
+
+ /* If config information is not known, we request it now. */
+ if ((lock->loc.inode->ia_type == IA_IFREG) && !ctx->have_config) {
+ /* A failure requesting this xattr is ignored because it's not
+ * absolutely required right now. */
+ ec_dict_set_number(dict, EC_XATTR_CONFIG, 0);
+ }
+
+ fop->frame->root->uid = 0;
+ fop->frame->root->gid = 0;
+
+ if (link->lock->fd == NULL) {
+ ec_xattrop(fop->frame, fop->xl, lock->good_mask, EC_MINIMUM_MIN,
+ ec_update_size_version_done, link, &link->lock->loc,
+ GF_XATTROP_ADD_ARRAY64, dict, NULL);
+ } else {
+ ec_fxattrop(fop->frame, fop->xl, lock->good_mask, EC_MINIMUM_MIN,
+ ec_update_size_version_done, link, link->lock->fd,
+ GF_XATTROP_ADD_ARRAY64, dict, NULL);
+ }
+
+ fop->frame->root->uid = fop->uid;
+ fop->frame->root->gid = fop->gid;
+
+ dict_unref(dict);
+
+ return;
+
+out:
+ if (dict != NULL) {
+ dict_unref(dict);
+ }
+
+ ec_fop_set_error(fop, -err);
+
+ gf_msg (fop->xl->name, GF_LOG_ERROR, -err, EC_MSG_SIZE_VERS_UPDATE_FAIL,
+ "Unable to update version and size");
+
+ if ((fop->parent->id != GF_FOP_FLUSH) &&
+ (fop->parent->id != GF_FOP_FSYNC) &&
+ (fop->parent->id != GF_FOP_FSYNCDIR)) {
+ ec_unlock_lock(fop->data);
+ }
+
+}
+
+gf_boolean_t
+ec_update_info(ec_lock_link_t *link)
+{
+ ec_lock_t *lock;
+ ec_inode_t *ctx;
+ uint64_t version[2];
+ uint64_t dirty[2];
+ uint64_t size;
+
+ lock = link->lock;
+ ctx = lock->ctx;
+
+ /* pre_version[*] will be 0 if have_version is false */
+ version[0] = ctx->post_version[0] - ctx->pre_version[0];
+ version[1] = ctx->post_version[1] - ctx->pre_version[1];
+
+ size = ctx->post_size - ctx->pre_size;
+
+ dirty[0] = ctx->dirty[0];
+ dirty[1] = ctx->dirty[1];
+ /*Dirty is not combined so just reset it right here*/
+ memset(ctx->dirty, 0, sizeof(ctx->dirty));
+
+ if ((version[0] != 0) || (version[1] != 0) ||
+ (dirty[0] != 0) || (dirty[1] != 0)) {
+ ec_update_size_version(link, version, size, dirty);
+
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
+void
+ec_unlock_now(ec_lock_link_t *link)
+{
+ ec_trace("UNLOCK_NOW", link->fop, "lock=%p", link->lock);
+
+ if (!ec_update_info(link)) {
+ ec_unlock_lock(link);
+ }
+
+ ec_resume(link->fop, 0);
+}
+
+void ec_unlock_timer_add(ec_lock_link_t *link);
+
+void
+ec_unlock_timer_del(ec_lock_link_t *link)
+{
+ ec_lock_t *lock;
+ inode_t *inode;
+ gf_boolean_t now = _gf_false;
+
+ /* If we are here, it means that the timer has expired before having
+ * been cancelled. This guarantees that 'link' is still valid because
+ * the fop that contains it must be pending (if timer cancellation in
+ * ec_lock_assign_owner() fails, the fop is left sleeping).
+ *
+ * At the same time, the fop still has a reference to the lock, so
+ * it must also be valid.
+ */
+ lock = link->lock;
+
+ /* 'lock' must have a valid inode since it can only be destroyed
+ * when the lock itself is destroyed, but we have a reference to the
+ * lock to avoid this.
+ */
+ inode = lock->loc.inode;
+
+ LOCK(&inode->lock);
+
+ if (lock->timer != NULL) {
+ ec_trace("UNLOCK_DELAYED", link->fop, "lock=%p", lock);
+
+ /* The unlock timer has expired without anyone cancelling it.
+ * This means that it shouldn't have any owner, and the
+ * waiting and frozen lists should be empty. It shouldn't have
+ * been marked as release nor be exclusive either. It must have
+ * only one owner reference, but there can be fops being
+ * prepared though. */
+ GF_ASSERT(!lock->release && (lock->exclusive == 0) &&
+ (lock->refs_owners == 1) &&
+ list_empty(&lock->owners) &&
+ list_empty(&lock->waiting) &&
+ list_empty(&lock->frozen));
+
+ gf_timer_call_cancel(link->fop->xl->ctx, lock->timer);
+ lock->timer = NULL;
+
+ /* Any fop being processed from now on, will need to wait
+ * until the next unlock/lock cycle. */
+ lock->release = now = _gf_true;
+ }
+
+ UNLOCK(&inode->lock);
+
+ if (now) {
+ ec_unlock_now(link);
+ } else {
+ /* The timer has been cancelled just after firing it but before
+ * getting here. This means that another fop has used the lock
+ * and everything should be handled as if this callback were
+ * have not been executed. However we still have an owner
+ * reference.
+ *
+ * We need to release our reference. If this is not the last
+ * reference (the most common case because another fop has
+ * taken another ref) we only need to decrement the counter.
+ * Otherwise we have been delayed enough so that the other fop
+ * has had time to acquire the reference, do its operation and
+ * release it. At the time of releasing it, the fop did found
+ * that the ref counter was > 1 (our reference), so the delayed
+ * unlock timer wasn't started. We need to start it again if we
+ * are the last reference.
+ *
+ * ec_unlock_timer_add() handles both cases.
+ */
+ ec_unlock_timer_add(link);
+
+ /* We need to resume the fop that was waiting for the delayed
+ * unlock.
+ */
+ ec_resume(link->fop, 0);
+ }
+}
+
+void ec_unlock_timer_cbk(void *data)
+{
+ ec_unlock_timer_del(data);
+}
+
+void ec_unlock_timer_add(ec_lock_link_t *link)
+{
+ struct timespec delay;
+ ec_fop_data_t *fop = link->fop;
+ ec_lock_t *lock = link->lock;
+ gf_boolean_t now = _gf_false;
+
+ LOCK(&lock->loc.inode->lock);
+
+ /* We are trying to unlock the lock. We can have multiple scenarios here,
+ * but all of them need to have lock->timer == NULL:
+ *
+ * 1. There are other owners currently running that can call ec_unlock().
+ *
+ * None of them can have started the timer until the last one. But this
+ * call should be the consequence of this lastest one.
+ *
+ * 2. There are fops in the waiting or frozen lists.
+ *
+ * These fops cannot call ec_unlock(). So we should be here.
+ *
+ * We must reach here with at least one owner reference.
+ */
+ GF_ASSERT((lock->timer == NULL) && (lock->refs_owners > 0));
+
+ /* If the fop detects that a heal is needed, we mark the lock to be
+ * released as soon as possible. */
+ lock->release |= ec_fop_needs_heal(fop);
+
+ if (lock->refs_owners > 1) {
+ ec_trace("UNLOCK_SKIP", fop, "lock=%p", lock);
+
+ /* If there are other owners we cannot do anything else with the lock.
+ * Note that the current fop has already been removed from the owners
+ * list in ec_lock_reuse(). */
+ lock->refs_owners--;
+
+ UNLOCK(&lock->loc.inode->lock);
+ } else if (lock->acquired) {
+ /* There are no other owners and the lock is acquired. If there were
+ * fops waiting, at least one of them should have been promoted to an
+ * owner, so the waiting list should be empty. */
+ GF_ASSERT(list_empty(&lock->owners) && list_empty(&lock->waiting));
+
+ ec_t *ec = fop->xl->private;
+
+ /* If everything goes as expected this fop will be put to sleep until
+ * the timer callback is executed. */
+ ec_sleep(fop);
+
+ /* If the lock needs to be released, or ec is shutting down, do not
+ * delay lock release. */
+ if (!lock->release && !ec->shutdown) {
+ ec_trace("UNLOCK_DELAY", fop, "lock=%p, release=%d", lock,
+ lock->release);
+
+ delay.tv_sec = 1;
+ delay.tv_nsec = 0;
+ lock->timer = gf_timer_call_after(fop->xl->ctx, delay,
+ ec_unlock_timer_cbk, link);
+ if (lock->timer == NULL) {
+ gf_msg(fop->xl->name, GF_LOG_WARNING, ENOMEM,
+ EC_MSG_UNLOCK_DELAY_FAILED,
+ "Unable to delay an unlock");
+
+ /* We are unable to create a new timer. We immediately release
+ * the lock. */
+ lock->release = now = _gf_true;
+ }
+ } else {
+ ec_trace("UNLOCK_FORCE", fop, "lock=%p, release=%d", lock,
+ lock->release);
+ lock->release = now = _gf_true;
+ }
+
+ UNLOCK(&lock->loc.inode->lock);
+
+ if (now) {
+ ec_unlock_now(link);
+ }
+ } else {
+ /* There are no owners and the lock is not acquired. This can only
+ * happen if a lock attempt has failed and we get to the unlock step
+ * of the fop. As in the previous case, the waiting list must be
+ * empty. */
+ GF_ASSERT(list_empty(&lock->owners) && list_empty(&lock->waiting));
+
+ /* We need to mark the lock to be released to correctly handle fops
+ * that may get in after we release the inode mutex but before
+ * ec_lock_unfreeze() is processed. */
+ lock->release = _gf_true;
+
+ UNLOCK(&lock->loc.inode->lock);
+
+ ec_lock_unfreeze(link);
+ }
+}
+
+void ec_unlock(ec_fop_data_t *fop)
+{
+ int32_t i;
+
+ for (i = 0; i < fop->lock_count; i++) {
+ ec_unlock_timer_add(&fop->locks[i]);
+ }
+}
+
+void ec_flush_size_version(ec_fop_data_t * fop)
+{
+ GF_ASSERT(fop->lock_count == 1);
+ ec_update_info(&fop->locks[0]);
+}
+
+void ec_lock_reuse(ec_fop_data_t *fop)
+{
+ ec_cbk_data_t *cbk;
+ ec_t *ec = NULL;
+ int32_t i, count;
+ gf_boolean_t release = _gf_false;
+ ec = fop->xl->private;
+ cbk = fop->answer;
+
+ if (ec->eager_lock && cbk != NULL) {
+ if (cbk->xdata != NULL) {
+ if ((dict_get_int32(cbk->xdata, GLUSTERFS_INODELK_COUNT,
+ &count) == 0) && (count > 1)) {
+ release = _gf_true;
+ }
+ if (release) {
+ gf_msg_debug (fop->xl->name, 0,
+ "Lock contention detected");
+ }
+ }
+ } else {
+ /* If eager lock is disabled or if we haven't get
+ * an answer with enough quorum, we always release
+ * the lock. */
+ release = _gf_true;
+ }
+
+ for (i = 0; i < fop->lock_count; i++) {
+ ec_lock_next_owner(&fop->locks[i], cbk, release);
+ }
+}
+
+void __ec_manager(ec_fop_data_t * fop, int32_t error)
+{
+ ec_t *ec = fop->xl->private;
+
+ do {
+ ec_trace("MANAGER", fop, "error=%d", error);
+
+ if (!ec_must_wind (fop)) {
+ if (ec->xl_up_count < ec->fragments) {
+ error = ENOTCONN;
+ }
+ }
+
+ if (error != 0) {
+ fop->error = error;
+ fop->state = -fop->state;
+ }
+
+ if ((fop->state == EC_STATE_END) || (fop->state == -EC_STATE_END)) {
+ ec_fop_data_release(fop);
+
+ break;
+ }
+
+ /* At each state, fop must not be used anywhere else and there
+ * shouldn't be any pending subfop going on. */
+ GF_ASSERT(fop->jobs == 0);
+
+ /* While the manager is running we need to avoid that subfops launched
+ * from it could finish and call ec_resume() before the fop->handler
+ * has completed. This could lead to the same manager being executed
+ * by two threads concurrently. ec_check_complete() will take care of
+ * this reference. */
+ fop->jobs = 1;
+
+ fop->state = fop->handler(fop, fop->state);
+ GF_ASSERT (fop->state >= 0);
+
+ error = ec_check_complete(fop, __ec_manager);
+ } while (error >= 0);
+}
+
+void ec_manager(ec_fop_data_t * fop, int32_t error)
+{
+ GF_ASSERT(fop->jobs == 0);
+ GF_ASSERT(fop->winds == 0);
+ GF_ASSERT(fop->error == 0);
+
+ if (fop->state == EC_STATE_START)
+ {
+ fop->state = EC_STATE_INIT;
+ }
+
+ __ec_manager(fop, error);
+}
diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h
new file mode 100644
index 00000000000..8e724a81380
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-common.h
@@ -0,0 +1,120 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __EC_COMMON_H__
+#define __EC_COMMON_H__
+
+#include "xlator.h"
+
+#include "ec-data.h"
+
+typedef enum {
+ EC_DATA_TXN,
+ EC_METADATA_TXN
+} ec_txn_t;
+
+#define EC_FOP_HEAL -1
+#define EC_FOP_FHEAL -2
+
+#define EC_CONFIG_VERSION 0
+
+#define EC_CONFIG_ALGORITHM 0
+
+#define EC_FLAG_LOCK_SHARED 0x0001
+#define EC_FLAG_WAITING_SIZE 0x0002
+
+#define EC_SELFHEAL_BIT 62
+
+#define EC_MINIMUM_ONE -1
+#define EC_MINIMUM_MIN -2
+#define EC_MINIMUM_ALL -3
+
+#define EC_UPDATE_DATA 1
+#define EC_UPDATE_META 2
+#define EC_QUERY_INFO 4
+#define EC_INODE_SIZE 8
+
+#define EC_STATE_START 0
+#define EC_STATE_END 0
+#define EC_STATE_INIT 1
+#define EC_STATE_LOCK 2
+#define EC_STATE_DISPATCH 3
+#define EC_STATE_PREPARE_ANSWER 4
+#define EC_STATE_REPORT 5
+#define EC_STATE_LOCK_REUSE 6
+#define EC_STATE_UNLOCK 7
+
+#define EC_STATE_DELAYED_START 100
+
+#define EC_STATE_HEAL_ENTRY_LOOKUP 200
+#define EC_STATE_HEAL_ENTRY_PREPARE 201
+#define EC_STATE_HEAL_PRE_INODELK_LOCK 202
+#define EC_STATE_HEAL_PRE_INODE_LOOKUP 203
+#define EC_STATE_HEAL_XATTRIBUTES_REMOVE 204
+#define EC_STATE_HEAL_XATTRIBUTES_SET 205
+#define EC_STATE_HEAL_ATTRIBUTES 206
+#define EC_STATE_HEAL_OPEN 207
+#define EC_STATE_HEAL_REOPEN_FD 208
+#define EC_STATE_HEAL_UNLOCK 209
+#define EC_STATE_HEAL_UNLOCK_ENTRY 210
+#define EC_STATE_HEAL_DATA_LOCK 211
+#define EC_STATE_HEAL_DATA_COPY 212
+#define EC_STATE_HEAL_DATA_UNLOCK 213
+#define EC_STATE_HEAL_POST_INODELK_LOCK 214
+#define EC_STATE_HEAL_POST_INODE_LOOKUP 215
+#define EC_STATE_HEAL_SETATTR 216
+#define EC_STATE_HEAL_POST_INODELK_UNLOCK 217
+#define EC_STATE_HEAL_DISPATCH 218
+
+gf_boolean_t ec_dispatch_one_retry (ec_fop_data_t *fop, ec_cbk_data_t **cbk);
+int32_t ec_dispatch_next(ec_fop_data_t * fop, int32_t idx);
+
+void ec_complete(ec_fop_data_t *fop);
+
+void ec_update_good(ec_fop_data_t *fop, uintptr_t good);
+
+void ec_fop_set_error(ec_fop_data_t *fop, int32_t error);
+
+ec_cbk_data_t *
+ec_fop_prepare_answer(ec_fop_data_t *fop, gf_boolean_t ro);
+
+gf_boolean_t
+ec_cbk_set_error(ec_cbk_data_t *cbk, int32_t error, gf_boolean_t ro);
+
+void ec_lock_prepare_inode(ec_fop_data_t *fop, loc_t *loc, uint32_t flags);
+void ec_lock_prepare_parent_inode(ec_fop_data_t *fop, loc_t *loc,
+ uint32_t flags);
+void ec_lock_prepare_fd(ec_fop_data_t *fop, fd_t *fd, uint32_t flags);
+void ec_lock(ec_fop_data_t * fop);
+void ec_lock_reuse(ec_fop_data_t *fop);
+void ec_unlock(ec_fop_data_t * fop);
+
+gf_boolean_t ec_get_inode_size(ec_fop_data_t *fop, inode_t *inode,
+ uint64_t *size);
+gf_boolean_t ec_set_inode_size(ec_fop_data_t *fop, inode_t *inode,
+ uint64_t size);
+void ec_clear_inode_info(ec_fop_data_t *fop, inode_t *inode);
+
+void ec_flush_size_version(ec_fop_data_t * fop);
+
+void ec_dispatch_all(ec_fop_data_t * fop);
+void ec_dispatch_inc(ec_fop_data_t * fop);
+void ec_dispatch_min(ec_fop_data_t * fop);
+void ec_dispatch_one(ec_fop_data_t * fop);
+
+void ec_sleep(ec_fop_data_t *fop);
+void ec_resume(ec_fop_data_t * fop, int32_t error);
+void ec_resume_parent(ec_fop_data_t * fop, int32_t error);
+
+void ec_manager(ec_fop_data_t * fop, int32_t error);
+gf_boolean_t ec_is_recoverable_error (int32_t op_errno);
+void ec_handle_healers_done (ec_fop_data_t *fop);
+
+#endif /* __EC_COMMON_H__ */
diff --git a/xlators/cluster/ec/src/ec-data.c b/xlators/cluster/ec/src/ec-data.c
new file mode 100644
index 00000000000..28bf988d09d
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-data.c
@@ -0,0 +1,317 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "ec-mem-types.h"
+#include "ec-helpers.h"
+#include "ec-common.h"
+#include "ec-data.h"
+#include "ec-messages.h"
+
+ec_cbk_data_t * ec_cbk_data_allocate(call_frame_t * frame, xlator_t * this,
+ ec_fop_data_t * fop, int32_t id,
+ int32_t idx, int32_t op_ret,
+ int32_t op_errno)
+{
+ ec_cbk_data_t * cbk;
+ ec_t * ec = this->private;
+
+ if (fop->xl != this)
+ {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_XLATOR_MISMATCH, "Mismatching xlators between request "
+ "and answer (req=%s, ans=%s).", fop->xl->name, this->name);
+
+ return NULL;
+ }
+ if (fop->frame != frame)
+ {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_FRAME_MISMATCH, "Mismatching frames between request "
+ "and answer (req=%p, ans=%p).",
+ fop->frame, frame);
+
+ return NULL;
+ }
+ if (fop->id != id)
+ {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_FOP_MISMATCH, "Mismatching fops between request "
+ "and answer (req=%d, ans=%d).",
+ fop->id, id);
+
+ return NULL;
+ }
+
+ cbk = mem_get0(ec->cbk_pool);
+ if (cbk == NULL)
+ {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_NO_MEMORY, "Failed to allocate memory for an "
+ "answer.");
+ }
+
+ cbk->fop = fop;
+ cbk->idx = idx;
+ cbk->mask = 1ULL << idx;
+ cbk->count = 1;
+ cbk->op_ret = op_ret;
+ cbk->op_errno = op_errno;
+ INIT_LIST_HEAD (&cbk->entries.list);
+
+ LOCK(&fop->lock);
+
+ list_add_tail(&cbk->answer_list, &fop->answer_list);
+
+ UNLOCK(&fop->lock);
+
+ return cbk;
+}
+
+void ec_cbk_data_destroy(ec_cbk_data_t * cbk)
+{
+ if (cbk->xdata != NULL)
+ {
+ dict_unref(cbk->xdata);
+ }
+ if (cbk->dict != NULL)
+ {
+ dict_unref(cbk->dict);
+ }
+ if (cbk->inode != NULL)
+ {
+ inode_unref(cbk->inode);
+ }
+ if (cbk->fd != NULL)
+ {
+ fd_unref(cbk->fd);
+ }
+ if (cbk->buffers != NULL)
+ {
+ iobref_unref(cbk->buffers);
+ }
+ GF_FREE(cbk->vector);
+ gf_dirent_free (&cbk->entries);
+ GF_FREE (cbk->str);
+
+ mem_put(cbk);
+}
+
+/* PARENT_DOWN will be notified to children only after these fops are complete
+ * when graph switch happens. We do not want graph switch to be waiting on
+ * heal to complete as healing big file/directory could take a while. Which
+ * will lead to hang on the mount.
+ */
+static gf_boolean_t
+ec_needs_graceful_completion (ec_fop_data_t *fop)
+{
+ if ((fop->id != EC_FOP_HEAL) && (fop->id != EC_FOP_FHEAL))
+ return _gf_true;
+ return _gf_false;
+}
+
+ec_fop_data_t * ec_fop_data_allocate(call_frame_t * frame, xlator_t * this,
+ int32_t id, uint32_t flags,
+ uintptr_t target, int32_t minimum,
+ ec_wind_f wind, ec_handler_f handler,
+ ec_cbk_t cbks, void * data)
+{
+ ec_fop_data_t * fop, * parent;
+ ec_t * ec = this->private;
+
+ fop = mem_get0(ec->fop_pool);
+ if (fop == NULL)
+ {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_NO_MEMORY, "Failed to allocate memory for a "
+ "request.");
+
+ return NULL;
+ }
+
+ INIT_LIST_HEAD(&fop->cbk_list);
+ INIT_LIST_HEAD(&fop->healer);
+ INIT_LIST_HEAD(&fop->answer_list);
+ INIT_LIST_HEAD(&fop->pending_list);
+ INIT_LIST_HEAD(&fop->locks[0].owner_list);
+ INIT_LIST_HEAD(&fop->locks[0].wait_list);
+ INIT_LIST_HEAD(&fop->locks[1].owner_list);
+ INIT_LIST_HEAD(&fop->locks[1].wait_list);
+
+ fop->xl = this;
+ fop->req_frame = frame;
+
+ /* fops need a private frame to be able to execute some postop operations
+ * even if the original fop has completed and reported back to the upper
+ * xlator and it has destroyed the base frame.
+ *
+ * TODO: minimize usage of private frames. Reuse req_frame as much as
+ * possible.
+ */
+ if (frame != NULL)
+ {
+ fop->frame = copy_frame(frame);
+ }
+ else
+ {
+ fop->frame = create_frame(this, this->ctx->pool);
+ }
+ if (fop->frame == NULL)
+ {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_NO_MEMORY, "Failed to create a private frame "
+ "for a request");
+
+ mem_put(fop);
+
+ return NULL;
+ }
+ fop->id = id;
+ fop->refs = 1;
+
+ fop->flags = flags;
+ fop->minimum = minimum;
+ fop->mask = target;
+
+ fop->wind = wind;
+ fop->handler = handler;
+ fop->cbks = cbks;
+ fop->data = data;
+
+ fop->uid = fop->frame->root->uid;
+ fop->gid = fop->frame->root->gid;
+
+ LOCK_INIT(&fop->lock);
+
+ fop->frame->local = fop;
+
+ if (frame != NULL)
+ {
+ parent = frame->local;
+ if (parent != NULL)
+ {
+ ec_sleep(parent);
+ }
+
+ fop->parent = parent;
+ }
+
+ if (ec_needs_graceful_completion (fop)) {
+ LOCK(&ec->lock);
+
+ list_add_tail(&fop->pending_list, &ec->pending_fops);
+
+ UNLOCK(&ec->lock);
+ }
+
+ return fop;
+}
+
+void ec_fop_data_acquire(ec_fop_data_t * fop)
+{
+ LOCK(&fop->lock);
+
+ ec_trace("ACQUIRE", fop, "");
+
+ fop->refs++;
+
+ UNLOCK(&fop->lock);
+}
+
+static void
+ec_handle_last_pending_fop_completion (ec_fop_data_t *fop, gf_boolean_t *notify)
+{
+ ec_t *ec = fop->xl->private;
+
+ if (!list_empty (&fop->pending_list)) {
+ LOCK(&ec->lock);
+ {
+ list_del_init (&fop->pending_list);
+ *notify = list_empty (&ec->pending_fops);
+ }
+ UNLOCK(&ec->lock);
+ }
+}
+
+void
+ec_fop_cleanup(ec_fop_data_t *fop)
+{
+ ec_cbk_data_t *cbk, *tmp;
+
+ list_for_each_entry_safe(cbk, tmp, &fop->answer_list, answer_list) {
+ list_del_init(&cbk->answer_list);
+
+ ec_cbk_data_destroy(cbk);
+ }
+ INIT_LIST_HEAD(&fop->cbk_list);
+
+ fop->answer = NULL;
+}
+
+void ec_fop_data_release(ec_fop_data_t * fop)
+{
+ ec_t *ec = NULL;
+ int32_t refs;
+ gf_boolean_t notify = _gf_false;
+
+ LOCK(&fop->lock);
+
+ ec_trace("RELEASE", fop, "");
+
+ GF_ASSERT (fop->refs > 0);
+ refs = --fop->refs;
+
+ UNLOCK(&fop->lock);
+
+ if (refs == 0)
+ {
+ fop->frame->local = NULL;
+ STACK_DESTROY(fop->frame->root);
+
+ LOCK_DESTROY(&fop->lock);
+
+ if (fop->xdata != NULL)
+ {
+ dict_unref(fop->xdata);
+ }
+ if (fop->dict != NULL)
+ {
+ dict_unref(fop->dict);
+ }
+ if (fop->inode != NULL)
+ {
+ inode_unref(fop->inode);
+ }
+ if (fop->fd != NULL)
+ {
+ fd_unref(fop->fd);
+ }
+ if (fop->buffers != NULL)
+ {
+ iobref_unref(fop->buffers);
+ }
+ GF_FREE(fop->vector);
+ GF_FREE(fop->str[0]);
+ GF_FREE(fop->str[1]);
+ loc_wipe(&fop->loc[0]);
+ loc_wipe(&fop->loc[1]);
+
+ ec_resume_parent(fop, fop->error);
+
+ ec_fop_cleanup(fop);
+
+ ec = fop->xl->private;
+ ec_handle_last_pending_fop_completion (fop, &notify);
+ ec_handle_healers_done (fop);
+ mem_put(fop);
+ if (notify) {
+ ec_pending_fops_completed(ec);
+ }
+ }
+}
diff --git a/xlators/cluster/ec/src/ec-data.h b/xlators/cluster/ec/src/ec-data.h
new file mode 100644
index 00000000000..4a2a11f4ccd
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-data.h
@@ -0,0 +1,335 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __EC_DATA_H__
+#define __EC_DATA_H__
+
+#include "xlator.h"
+
+#include "ec.h"
+
+struct _ec_config;
+typedef struct _ec_config ec_config_t;
+
+struct _ec_fd;
+typedef struct _ec_fd ec_fd_t;
+
+struct _ec_inode;
+typedef struct _ec_inode ec_inode_t;
+
+union _ec_cbk;
+typedef union _ec_cbk ec_cbk_t;
+
+struct _ec_lock;
+typedef struct _ec_lock ec_lock_t;
+
+struct _ec_lock_link;
+typedef struct _ec_lock_link ec_lock_link_t;
+
+struct _ec_fop_data;
+typedef struct _ec_fop_data ec_fop_data_t;
+
+struct _ec_cbk_data;
+typedef struct _ec_cbk_data ec_cbk_data_t;
+
+struct _ec_heal;
+typedef struct _ec_heal ec_heal_t;
+
+typedef void (* ec_wind_f)(ec_t *, ec_fop_data_t *, int32_t);
+typedef int32_t (* ec_handler_f)(ec_fop_data_t *, int32_t);
+typedef void (* ec_resume_f)(ec_fop_data_t *, int32_t);
+
+struct _ec_config
+{
+ uint32_t version;
+ uint8_t algorithm;
+ uint8_t gf_word_size;
+ uint8_t bricks;
+ uint8_t redundancy;
+ uint32_t chunk_size;
+};
+
+struct _ec_fd
+{
+ loc_t loc;
+ uintptr_t open;
+ int32_t flags;
+};
+
+struct _ec_inode
+{
+ ec_lock_t *inode_lock;
+ gf_boolean_t have_info;
+ gf_boolean_t have_config;
+ gf_boolean_t have_version;
+ gf_boolean_t have_size;
+ ec_config_t config;
+ uint64_t pre_version[2];
+ uint64_t post_version[2];
+ uint64_t pre_size;
+ uint64_t post_size;
+ uint64_t dirty[2];
+ struct list_head heal;
+};
+
+typedef int32_t (* fop_heal_cbk_t)(call_frame_t *, void * cookie, xlator_t *,
+ int32_t, int32_t, uintptr_t, uintptr_t,
+ uintptr_t, dict_t *);
+typedef int32_t (* fop_fheal_cbk_t)(call_frame_t *, void * cookie, xlator_t *,
+ int32_t, int32_t, uintptr_t, uintptr_t,
+ uintptr_t, dict_t *);
+
+union _ec_cbk
+{
+ fop_access_cbk_t access;
+ fop_create_cbk_t create;
+ fop_discard_cbk_t discard;
+ fop_entrylk_cbk_t entrylk;
+ fop_fentrylk_cbk_t fentrylk;
+ fop_fallocate_cbk_t fallocate;
+ fop_flush_cbk_t flush;
+ fop_fsync_cbk_t fsync;
+ fop_fsyncdir_cbk_t fsyncdir;
+ fop_getxattr_cbk_t getxattr;
+ fop_fgetxattr_cbk_t fgetxattr;
+ fop_heal_cbk_t heal;
+ fop_fheal_cbk_t fheal;
+ fop_inodelk_cbk_t inodelk;
+ fop_finodelk_cbk_t finodelk;
+ fop_link_cbk_t link;
+ fop_lk_cbk_t lk;
+ fop_lookup_cbk_t lookup;
+ fop_mkdir_cbk_t mkdir;
+ fop_mknod_cbk_t mknod;
+ fop_open_cbk_t open;
+ fop_opendir_cbk_t opendir;
+ fop_readdir_cbk_t readdir;
+ fop_readdirp_cbk_t readdirp;
+ fop_readlink_cbk_t readlink;
+ fop_readv_cbk_t readv;
+ fop_removexattr_cbk_t removexattr;
+ fop_fremovexattr_cbk_t fremovexattr;
+ fop_rename_cbk_t rename;
+ fop_rmdir_cbk_t rmdir;
+ fop_setattr_cbk_t setattr;
+ fop_fsetattr_cbk_t fsetattr;
+ fop_setxattr_cbk_t setxattr;
+ fop_fsetxattr_cbk_t fsetxattr;
+ fop_stat_cbk_t stat;
+ fop_fstat_cbk_t fstat;
+ fop_statfs_cbk_t statfs;
+ fop_symlink_cbk_t symlink;
+ fop_truncate_cbk_t truncate;
+ fop_ftruncate_cbk_t ftruncate;
+ fop_unlink_cbk_t unlink;
+ fop_writev_cbk_t writev;
+ fop_xattrop_cbk_t xattrop;
+ fop_fxattrop_cbk_t fxattrop;
+ fop_zerofill_cbk_t zerofill;
+ fop_seek_cbk_t seek;
+};
+
+struct _ec_lock
+{
+ ec_inode_t *ctx;
+ gf_timer_t *timer;
+
+ /* List of owners of this lock. All fops added to this list are running
+ * concurrently. */
+ struct list_head owners;
+
+ /* List of fops waiting to be an owner of the lock. Fops are added to this
+ * list when the current owner has an incompatible access (shared vs
+ * exclusive) or the lock is not acquired yet. */
+ struct list_head waiting;
+
+ /* List of fops that will wait until the next unlock/lock cycle. This
+ * happens when the currently acquired lock is decided to be released as
+ * soon as possible. In this case, all frozen fops will be continued only
+ * after the lock is reacquired. */
+ struct list_head frozen;
+
+ int32_t exclusive;
+ uintptr_t mask;
+ uintptr_t good_mask;
+ uintptr_t healing;
+ uint32_t refs_owners; /* Refs for fops owning the lock */
+ uint32_t refs_pending; /* Refs assigned to fops being prepared */
+ gf_boolean_t acquired;
+ gf_boolean_t getting_size;
+ gf_boolean_t release;
+ gf_boolean_t query;
+ fd_t *fd;
+ loc_t loc;
+ union
+ {
+ entrylk_type type;
+ struct gf_flock flock;
+ };
+};
+
+struct _ec_lock_link
+{
+ ec_lock_t *lock;
+ ec_fop_data_t *fop;
+ struct list_head owner_list;
+ struct list_head wait_list;
+ gf_boolean_t update[2];
+ loc_t *base;
+ uint64_t size;
+};
+
+struct _ec_fop_data
+{
+ int32_t id;
+ int32_t refs;
+ int32_t state;
+ int32_t minimum;
+ int32_t expected;
+ int32_t winds;
+ int32_t jobs;
+ int32_t error;
+ ec_fop_data_t *parent;
+ xlator_t *xl;
+ call_frame_t *req_frame; /* frame of the calling xlator */
+ call_frame_t *frame; /* frame used by this fop */
+ struct list_head cbk_list; /* sorted list of groups of answers */
+ struct list_head answer_list; /* list of answers */
+ struct list_head pending_list; /* member of ec_t.pending_fops */
+ ec_cbk_data_t *answer; /* accepted answer */
+ int32_t lock_count;
+ int32_t locked;
+ ec_lock_link_t locks[2];
+ int32_t first_lock;
+ gf_lock_t lock;
+
+ uint32_t flags;
+ uint32_t first;
+ uintptr_t mask;
+ uintptr_t healing; /*Dispatch is done but call is successful only
+ if fop->minimum number of subvolumes succeed
+ which are not healing*/
+ uintptr_t remaining;
+ uintptr_t received; /* Mask of responses */
+ uintptr_t good;
+
+ uid_t uid;
+ gid_t gid;
+
+ ec_wind_f wind;
+ ec_handler_f handler;
+ ec_resume_f resume;
+ ec_cbk_t cbks;
+ void *data;
+ ec_heal_t *heal;
+ struct list_head healer;
+
+ uint64_t user_size;
+ uint32_t head;
+
+ int32_t use_fd;
+
+ dict_t *xdata;
+ dict_t *dict;
+ int32_t int32;
+ uint32_t uint32;
+ uint64_t size;
+ off_t offset;
+ mode_t mode[2];
+ entrylk_cmd entrylk_cmd;
+ entrylk_type entrylk_type;
+ gf_xattrop_flags_t xattrop_flags;
+ dev_t dev;
+ inode_t *inode;
+ fd_t *fd;
+ struct iatt iatt;
+ char *str[2];
+ loc_t loc[2];
+ struct gf_flock flock;
+ struct iovec *vector;
+ struct iobref *buffers;
+ gf_seek_what_t seek;
+};
+
+struct _ec_cbk_data
+{
+ struct list_head list; // item in the sorted list of groups
+ struct list_head answer_list; // item in the list of answers
+ ec_fop_data_t * fop;
+ ec_cbk_data_t * next; // next answer in the same group
+ int32_t idx;
+ int32_t op_ret;
+ int32_t op_errno;
+ int32_t count;
+ uintptr_t mask;
+ uint64_t dirty[2];
+
+ dict_t * xdata;
+ dict_t * dict;
+ int32_t int32;
+ uintptr_t uintptr[3];
+ uint64_t size;
+ uint64_t version[2];
+ inode_t * inode;
+ fd_t * fd;
+ struct statvfs statvfs;
+ struct iatt iatt[5];
+ struct gf_flock flock;
+ struct iovec * vector;
+ struct iobref * buffers;
+ char *str;
+ gf_dirent_t entries;
+ off_t offset;
+ gf_seek_what_t what;
+};
+
+struct _ec_heal
+{
+ struct list_head list;
+ gf_lock_t lock;
+ xlator_t *xl;
+ ec_fop_data_t *fop;
+ void *data;
+ ec_fop_data_t *lookup;
+ loc_t loc;
+ struct iatt iatt;
+ char *symlink;
+ fd_t *fd;
+ int32_t partial;
+ int32_t done;
+ int32_t error;
+ gf_boolean_t nameheal;
+ uintptr_t available;
+ uintptr_t good;
+ uintptr_t bad;
+ uintptr_t open;
+ uintptr_t fixed;
+ uint64_t offset;
+ uint64_t size;
+ uint64_t total_size;
+ uint64_t version[2];
+ uint64_t raw_size;
+};
+
+ec_cbk_data_t * ec_cbk_data_allocate(call_frame_t * frame, xlator_t * this,
+ ec_fop_data_t * fop, int32_t id,
+ int32_t idx, int32_t op_ret,
+ int32_t op_errno);
+ec_fop_data_t * ec_fop_data_allocate(call_frame_t * frame, xlator_t * this,
+ int32_t id, uint32_t flags,
+ uintptr_t target, int32_t minimum,
+ ec_wind_f wind, ec_handler_f handler,
+ ec_cbk_t cbks, void * data);
+void ec_fop_data_acquire(ec_fop_data_t * fop);
+void ec_fop_data_release(ec_fop_data_t * fop);
+
+void ec_fop_cleanup(ec_fop_data_t *fop);
+
+#endif /* __EC_DATA_H__ */
diff --git a/xlators/cluster/ec/src/ec-dir-read.c b/xlators/cluster/ec/src/ec-dir-read.c
new file mode 100644
index 00000000000..fc8b38b22a4
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-dir-read.c
@@ -0,0 +1,625 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include "ec-helpers.h"
+#include "ec-common.h"
+#include "ec-combine.h"
+#include "ec-method.h"
+#include "ec-fops.h"
+#include "ec-messages.h"
+
+/* FOP: opendir */
+
+int32_t ec_combine_opendir(ec_fop_data_t * fop, ec_cbk_data_t * dst,
+ ec_cbk_data_t * src)
+{
+ if (dst->fd != src->fd)
+ {
+ gf_msg (fop->xl->name, GF_LOG_NOTICE, 0,
+ EC_MSG_FD_MISMATCH, "Mismatching fd in answers "
+ "of 'GF_FOP_OPENDIR': %p <-> %p",
+ dst->fd, src->fd);
+
+ return 0;
+ }
+
+ return 1;
+}
+
+int32_t ec_opendir_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
+ int32_t op_ret, int32_t op_errno, fd_t * fd,
+ dict_t * xdata)
+{
+ ec_fop_data_t * fop = NULL;
+ ec_cbk_data_t * cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
+ frame, op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_OPENDIR, idx, op_ret,
+ op_errno);
+ if (cbk != NULL)
+ {
+ if (op_ret >= 0)
+ {
+ if (fd != NULL)
+ {
+ cbk->fd = fd_ref(fd);
+ if (cbk->fd == NULL)
+ {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_FILE_DESC_REF_FAIL, "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ }
+ if (xdata != NULL)
+ {
+ cbk->xdata = dict_ref(xdata);
+ if (cbk->xdata == NULL)
+ {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL, "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ ec_combine(cbk, ec_combine_opendir);
+ }
+
+out:
+ if (fop != NULL)
+ {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void ec_wind_opendir(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_opendir_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->opendir,
+ &fop->loc[0], fop->fd, fop->xdata);
+}
+
+int32_t ec_manager_opendir(ec_fop_data_t * fop, int32_t state)
+{
+ ec_cbk_data_t * cbk;
+ ec_fd_t *ctx;
+ int32_t err;
+
+ switch (state)
+ {
+ case EC_STATE_INIT:
+ LOCK(&fop->fd->lock);
+
+ ctx = __ec_fd_get(fop->fd, fop->xl);
+ if (ctx == NULL) {
+ UNLOCK(&fop->fd->lock);
+
+ fop->error = ENOMEM;
+
+ return EC_STATE_REPORT;
+ }
+ err = ec_loc_from_loc(fop->xl, &ctx->loc, &fop->loc[0]);
+ if (err != 0) {
+ UNLOCK(&fop->fd->lock);
+
+ fop->error = -err;
+
+ return EC_STATE_REPORT;
+ }
+
+ UNLOCK(&fop->fd->lock);
+
+ /* Fall through */
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ cbk = ec_fop_prepare_answer(fop, _gf_true);
+ if (cbk != NULL) {
+ /* Save which subvolumes successfully opened the directory.
+ * If ctx->open is 0, it means that readdir cannot be
+ * processed in this directory.
+ */
+ LOCK(&fop->fd->lock);
+
+ ctx = __ec_fd_get(fop->fd, fop->xl);
+ if (ctx != NULL) {
+ ctx->open |= cbk->mask;
+ }
+
+ UNLOCK(&fop->fd->lock);
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.opendir != NULL)
+ {
+ fop->cbks.opendir(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, cbk->fd, cbk->xdata);
+ }
+
+ return EC_STATE_END;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.opendir != NULL)
+ {
+ fop->cbks.opendir(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL, NULL);
+ }
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s",
+ state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void ec_opendir(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_opendir_cbk_t func, void * data,
+ loc_t * loc, fd_t * fd, dict_t * xdata)
+{
+ ec_cbk_t callback = { .opendir = func };
+ ec_fop_data_t * fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(OPENDIR) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_OPENDIR,
+ EC_FLAG_LOCK_SHARED, target, minimum,
+ ec_wind_opendir, ec_manager_opendir, callback,
+ data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_LOC_COPY_FAIL, "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_FILE_DESC_REF_FAIL, "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL, "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL);
+ }
+}
+
+/* Returns -1 if client_id is invalid else index of child subvol in xl_list */
+int
+ec_deitransform (xlator_t *this, off_t offset)
+{
+ int idx = -1;
+ int client_id = -1;
+ ec_t *ec = this->private;
+ char id[32] = {0};
+ int err;
+
+ client_id = gf_deitransform (this, offset);
+ sprintf (id, "%d", client_id);
+ err = dict_get_int32 (ec->leaf_to_subvolid, id, &idx);
+ if (err < 0) {
+ idx = err;
+ goto out;
+ }
+
+out:
+ if (idx < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_INVALID_REQUEST,
+ "Invalid index %d in readdirp request", client_id);
+ idx = -EINVAL;
+ }
+ return idx;
+}
+
+/* FOP: readdir */
+
+void ec_adjust_readdirp (ec_t *ec, int32_t idx, gf_dirent_t *entries)
+{
+ gf_dirent_t * entry;
+
+ list_for_each_entry(entry, &entries->list, list)
+ {
+ if (!entry->inode)
+ continue;
+
+ if (entry->d_stat.ia_type == IA_IFREG)
+ {
+ if ((entry->dict == NULL) ||
+ (ec_dict_del_number(entry->dict, EC_XATTR_SIZE,
+ &entry->d_stat.ia_size) != 0)) {
+ inode_unref (entry->inode);
+ entry->inode = NULL;
+ } else {
+ ec_iatt_rebuild(ec, &entry->d_stat, 1, 1);
+ }
+ }
+ }
+}
+
+int32_t
+ec_common_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ gf_dirent_t *entries, dict_t *xdata)
+{
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
+ frame, op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate (frame, this, fop, fop->id,
+ idx, op_ret, op_errno);
+ if (cbk) {
+ if (xdata)
+ cbk->xdata = dict_ref (xdata);
+ if (cbk->op_ret >= 0)
+ list_splice_init (&entries->list,
+ &cbk->entries.list);
+ ec_combine (cbk, NULL);
+ }
+
+out:
+ if (fop != NULL)
+ {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void ec_wind_readdir(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_common_readdir_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->readdir,
+ fop->fd, fop->size, fop->offset, fop->xdata);
+}
+
+int32_t ec_manager_readdir(ec_fop_data_t * fop, int32_t state)
+{
+ ec_fd_t *ctx = NULL;
+ ec_cbk_data_t *cbk = NULL;
+
+ switch (state)
+ {
+ case EC_STATE_INIT:
+ /* Return error if opendir has not been successfully called on
+ * any subvolume. */
+ ctx = ec_fd_get(fop->fd, fop->xl);
+ if ((ctx == NULL) || (ctx->open == 0)) {
+ fop->error = EINVAL;
+
+ return EC_STATE_REPORT;
+ }
+
+ if (fop->id == GF_FOP_READDIRP) {
+ int32_t err;
+
+ if (fop->xdata == NULL) {
+ fop->xdata = dict_new();
+ if (fop->xdata == NULL) {
+ fop->error = ENOMEM;
+
+ return EC_STATE_REPORT;
+ }
+ }
+
+ err = dict_set_uint64(fop->xdata, EC_XATTR_SIZE, 0);
+ if (err != 0) {
+ fop->error = -err;
+
+ return EC_STATE_REPORT;
+ }
+ }
+
+ if (fop->offset != 0)
+ {
+ /* Non-zero offset is irrecoverable error as the offset may not be
+ * valid on other bricks*/
+ int32_t idx = -1;
+
+ idx = ec_deitransform (fop->xl, fop->offset);
+
+ if (idx < 0) {
+ fop->error = -idx;
+ return EC_STATE_REPORT;
+ }
+ fop->mask &= 1ULL << idx;
+ } else {
+ ec_lock_prepare_fd(fop, fop->fd, EC_QUERY_INFO);
+ ec_lock(fop);
+ }
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_one(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ if (ec_dispatch_one_retry(fop, &cbk)) {
+ return EC_STATE_DISPATCH;
+ }
+
+ if ((cbk != NULL) && (cbk->op_ret > 0) &&
+ (fop->id == GF_FOP_READDIRP)) {
+ ec_adjust_readdirp (fop->xl->private, cbk->idx, &cbk->entries);
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+ GF_ASSERT (cbk);
+ if (fop->id == GF_FOP_READDIR) {
+ if (fop->cbks.readdir != NULL) {
+ fop->cbks.readdir(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, &cbk->entries, cbk->xdata);
+ }
+ } else {
+ if (fop->cbks.readdirp != NULL) {
+ fop->cbks.readdirp(fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno,
+ &cbk->entries, cbk->xdata);
+ }
+ }
+ if (fop->offset == 0)
+ return EC_STATE_LOCK_REUSE;
+ else
+ return EC_STATE_END;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ if (fop->id == GF_FOP_READDIR) {
+ if (fop->cbks.readdir != NULL) {
+ fop->cbks.readdir(fop->req_frame, fop, fop->xl, -1,
+ fop->error, NULL, NULL);
+ }
+ } else {
+ if (fop->cbks.readdirp != NULL) {
+ fop->cbks.readdirp(fop->req_frame, fop, fop->xl, -1,
+ fop->error, NULL, NULL);
+ }
+ }
+ if (fop->offset == 0)
+ return EC_STATE_LOCK_REUSE;
+ else
+ return EC_STATE_END;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ GF_ASSERT (fop->offset == 0);
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ GF_ASSERT (fop->offset == 0);
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+ default:
+ gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s",
+ state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void ec_readdir(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_readdir_cbk_t func, void * data,
+ fd_t * fd, size_t size, off_t offset, dict_t * xdata)
+{
+ ec_cbk_t callback = { .readdir = func };
+ ec_fop_data_t * fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(READDIR) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_READDIR,
+ EC_FLAG_LOCK_SHARED, target, minimum,
+ ec_wind_readdir, ec_manager_readdir, callback,
+ data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->use_fd = 1;
+
+ fop->size = size;
+ fop->offset = offset;
+
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_FILE_DESC_REF_FAIL, "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL, "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL);
+ }
+}
+
+/* FOP: readdirp */
+
+void ec_wind_readdirp(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_common_readdir_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->readdirp,
+ fop->fd, fop->size, fop->offset, fop->xdata);
+}
+
+void ec_readdirp(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_readdirp_cbk_t func, void * data,
+ fd_t * fd, size_t size, off_t offset, dict_t * xdata)
+{
+ ec_cbk_t callback = { .readdirp = func };
+ ec_fop_data_t * fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(READDIRP) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_READDIRP,
+ EC_FLAG_LOCK_SHARED, target, minimum,
+ ec_wind_readdirp, ec_manager_readdir, callback,
+ data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->use_fd = 1;
+
+ fop->size = size;
+ fop->offset = offset;
+
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_FILE_DESC_REF_FAIL, "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL, "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL);
+ }
+}
diff --git a/xlators/cluster/ec/src/ec-dir-write.c b/xlators/cluster/ec/src/ec-dir-write.c
new file mode 100644
index 00000000000..e181170650d
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-dir-write.c
@@ -0,0 +1,1498 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include "ec-helpers.h"
+#include "ec-common.h"
+#include "ec-combine.h"
+#include "ec-method.h"
+#include "ec-fops.h"
+#include "ec-messages.h"
+
+int
+ec_dir_write_cbk (call_frame_t *frame, xlator_t *this,
+ void *cookie, int op_ret, int op_errno,
+ struct iatt *poststat, struct iatt *preparent,
+ struct iatt *postparent, struct iatt *preparent2,
+ struct iatt *postparent2, dict_t *xdata)
+{
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ int i = 0;
+ int idx = 0;
+
+ VALIDATE_OR_GOTO (this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+
+ fop = frame->local;
+ idx = (long) cookie;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
+ frame, op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate (frame, this, fop, fop->id, idx, op_ret,
+ op_errno);
+ if (!cbk)
+ goto out;
+
+ if (xdata)
+ cbk->xdata = dict_ref (xdata);
+
+ if (op_ret < 0)
+ goto out;
+
+ if (poststat)
+ cbk->iatt[i++] = *poststat;
+
+ if (preparent)
+ cbk->iatt[i++] = *preparent;
+
+ if (postparent)
+ cbk->iatt[i++] = *postparent;
+
+ if (preparent2)
+ cbk->iatt[i++] = *preparent2;
+
+ if (postparent2)
+ cbk->iatt[i++] = *postparent2;
+
+out:
+ if (cbk)
+ ec_combine (cbk, ec_combine_write);
+ if (fop)
+ ec_complete (fop);
+ return 0;
+}
+
+/* FOP: create */
+
+int32_t ec_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ return ec_dir_write_cbk (frame, this, cookie, op_ret, op_errno,
+ buf, preparent, postparent, NULL, NULL, xdata);
+}
+
+void ec_wind_create(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_create_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->create,
+ &fop->loc[0], fop->int32, fop->mode[0], fop->mode[1],
+ fop->fd, fop->xdata);
+}
+
+int32_t ec_manager_create(ec_fop_data_t * fop, int32_t state)
+{
+ ec_config_t config;
+ ec_t *ec;
+ ec_cbk_data_t *cbk;
+ ec_fd_t *ctx;
+ uint64_t version[2] = {0, 0};
+ int32_t err;
+
+ switch (state)
+ {
+ case EC_STATE_INIT:
+ LOCK(&fop->fd->lock);
+
+ ctx = __ec_fd_get(fop->fd, fop->xl);
+ if (ctx == NULL) {
+ UNLOCK(&fop->fd->lock);
+
+ fop->error = ENOMEM;
+
+ return EC_STATE_REPORT;
+ }
+ err = ec_loc_from_loc(fop->xl, &ctx->loc, &fop->loc[0]);
+ if (err != 0) {
+ UNLOCK(&fop->fd->lock);
+
+ fop->error = -err;
+
+ return EC_STATE_REPORT;
+ }
+
+ ctx->flags = fop->int32;
+
+ UNLOCK(&fop->fd->lock);
+
+ if (fop->xdata == NULL) {
+ fop->xdata = dict_new();
+ if (fop->xdata == NULL) {
+ fop->error = ENOMEM;
+
+ return EC_STATE_REPORT;
+ }
+ }
+
+ ec = fop->xl->private;
+
+ config.version = EC_CONFIG_VERSION;
+ config.algorithm = EC_CONFIG_ALGORITHM;
+ config.gf_word_size = EC_GF_BITS;
+ config.bricks = ec->nodes;
+ config.redundancy = ec->redundancy;
+ config.chunk_size = EC_METHOD_CHUNK_SIZE;
+
+ err = ec_dict_set_config(fop->xdata, EC_XATTR_CONFIG, &config);
+ if (err != 0) {
+ fop->error = -err;
+
+ return EC_STATE_REPORT;
+ }
+ err = ec_dict_set_array(fop->xdata, EC_XATTR_VERSION, version,
+ EC_VERSION_SIZE);
+ if (err != 0) {
+ fop->error = -err;
+
+ return EC_STATE_REPORT;
+ }
+ err = ec_dict_set_number(fop->xdata, EC_XATTR_SIZE, 0);
+ if (err != 0) {
+ fop->error = -err;
+
+ return EC_STATE_REPORT;
+ }
+
+ /* We need to write to specific offsets on the bricks, so we
+ * need to remove O_APPEND from flags (if present) */
+ fop->int32 &= ~O_APPEND;
+
+ /* Fall through */
+
+ case EC_STATE_LOCK:
+ ec_lock_prepare_parent_inode(fop, &fop->loc[0],
+ EC_UPDATE_DATA | EC_UPDATE_META);
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ cbk = ec_fop_prepare_answer(fop, _gf_false);
+ if (cbk != NULL) {
+ int32_t err;
+
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 3, cbk->count);
+
+ err = ec_loc_update(fop->xl, &fop->loc[0], cbk->inode,
+ &cbk->iatt[0]);
+ if (!ec_cbk_set_error(cbk, -err, _gf_false)) {
+ LOCK(&fop->fd->lock);
+
+ ctx = __ec_fd_get(fop->fd, fop->xl);
+ if (ctx != NULL) {
+ ctx->open |= cbk->mask;
+ }
+
+ UNLOCK(&fop->fd->lock);
+ }
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.create != NULL)
+ {
+ fop->cbks.create (fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, fop->fd, fop->loc[0].inode,
+ &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
+ cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.create != NULL)
+ {
+ fop->cbks.create(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL, NULL, NULL, NULL, NULL, NULL);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s",
+ state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void ec_create(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_create_cbk_t func, void * data,
+ loc_t * loc, int32_t flags, mode_t mode, mode_t umask,
+ fd_t * fd, dict_t * xdata)
+{
+ ec_cbk_t callback = { .create = func };
+ ec_fop_data_t * fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(CREATE) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_CREATE, 0, target, minimum,
+ ec_wind_create, ec_manager_create, callback,
+ data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->int32 = flags;
+ fop->mode[0] = mode;
+ fop->mode[1] = umask;
+
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_LOC_COPY_FAIL, "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_FILE_DESC_REF_FAIL, "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref (xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL, "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL, NULL, NULL, NULL);
+ }
+}
+
+/* FOP: link */
+
+int32_t ec_link_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
+ int32_t op_ret, int32_t op_errno, inode_t * inode,
+ struct iatt * buf, struct iatt * preparent,
+ struct iatt * postparent, dict_t * xdata)
+{
+ return ec_dir_write_cbk (frame, this, cookie, op_ret, op_errno,
+ buf, preparent, postparent, NULL, NULL, xdata);
+}
+
+void ec_wind_link(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_link_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->link,
+ &fop->loc[0], &fop->loc[1], fop->xdata);
+}
+
+int32_t ec_manager_link(ec_fop_data_t * fop, int32_t state)
+{
+ ec_cbk_data_t * cbk;
+
+ switch (state)
+ {
+ case EC_STATE_INIT:
+ case EC_STATE_LOCK:
+ ec_lock_prepare_parent_inode(fop, &fop->loc[1], EC_UPDATE_DATA |
+ EC_UPDATE_META |
+ EC_INODE_SIZE);
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ cbk = ec_fop_prepare_answer(fop, _gf_false);
+ if (cbk != NULL) {
+ int32_t err;
+
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 3, cbk->count);
+
+ if (cbk->iatt[0].ia_type == IA_IFREG) {
+ cbk->iatt[0].ia_size = fop->locks[0].size;
+ }
+
+ err = ec_loc_update(fop->xl, &fop->loc[0], cbk->inode,
+ &cbk->iatt[0]);
+ ec_cbk_set_error(cbk, -err, _gf_false);
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.link != NULL)
+ {
+ fop->cbks.link(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, fop->loc[0].inode, &cbk->iatt[0],
+ &cbk->iatt[1], &cbk->iatt[2], cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.link != NULL)
+ {
+ fop->cbks.link(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL, NULL, NULL, NULL, NULL);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s",
+ state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void ec_link(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_link_cbk_t func, void * data, loc_t * oldloc,
+ loc_t * newloc, dict_t * xdata)
+{
+ ec_cbk_t callback = { .link = func };
+ ec_fop_data_t * fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(LINK) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_LINK, 0, target, minimum,
+ ec_wind_link, ec_manager_link, callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ if (oldloc != NULL) {
+ if (loc_copy(&fop->loc[0], oldloc) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_LOC_COPY_FAIL, "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (newloc != NULL) {
+ if (loc_copy(&fop->loc[1], newloc) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_LOC_COPY_FAIL, "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref (xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL, "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL, NULL, NULL);
+ }
+}
+
+/* FOP: mkdir */
+
+int32_t ec_mkdir_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
+ int32_t op_ret, int32_t op_errno, inode_t * inode,
+ struct iatt * buf, struct iatt * preparent,
+ struct iatt * postparent, dict_t * xdata)
+{
+ return ec_dir_write_cbk (frame, this, cookie, op_ret, op_errno,
+ buf, preparent, postparent, NULL, NULL, xdata);
+}
+
+void ec_wind_mkdir(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_mkdir_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->mkdir,
+ &fop->loc[0], fop->mode[0], fop->mode[1], fop->xdata);
+}
+
+int32_t ec_manager_mkdir(ec_fop_data_t * fop, int32_t state)
+{
+ ec_cbk_data_t * cbk;
+ uint64_t version[2] = {0, 0};
+ int32_t err;
+
+ switch (state)
+ {
+ case EC_STATE_INIT:
+ if (fop->xdata == NULL) {
+ fop->xdata = dict_new();
+ if (fop->xdata == NULL) {
+ fop->error = ENOMEM;
+
+ return EC_STATE_REPORT;
+ }
+ }
+
+ err = ec_dict_set_array(fop->xdata, EC_XATTR_VERSION, version,
+ EC_VERSION_SIZE);
+ if (err != 0) {
+ fop->error = -err;
+ return EC_STATE_REPORT;
+ }
+
+ /* Fall through */
+
+ case EC_STATE_LOCK:
+ ec_lock_prepare_parent_inode(fop, &fop->loc[0],
+ EC_UPDATE_DATA | EC_UPDATE_META);
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ cbk = ec_fop_prepare_answer(fop, _gf_false);
+ if (cbk != NULL) {
+ int32_t err;
+
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 3, cbk->count);
+
+ err = ec_loc_update(fop->xl, &fop->loc[0], cbk->inode,
+ &cbk->iatt[0]);
+ ec_cbk_set_error(cbk, -err, _gf_false);
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.mkdir != NULL)
+ {
+ fop->cbks.mkdir(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, fop->loc[0].inode, &cbk->iatt[0],
+ &cbk->iatt[1], &cbk->iatt[2], cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ cbk = fop->answer;
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.mkdir != NULL)
+ {
+ fop->cbks.mkdir(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL, NULL, NULL, NULL,
+ ((cbk) ? cbk->xdata : NULL));
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s",
+ state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void ec_mkdir(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_mkdir_cbk_t func, void * data, loc_t * loc,
+ mode_t mode, mode_t umask, dict_t * xdata)
+{
+ ec_cbk_t callback = { .mkdir = func };
+ ec_fop_data_t * fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(MKDIR) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_MKDIR, 0, target, minimum,
+ ec_wind_mkdir, ec_manager_mkdir, callback,
+ data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->mode[0] = mode;
+ fop->mode[1] = umask;
+
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_LOC_COPY_FAIL, "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref (xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL, "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL, NULL, NULL);
+ }
+}
+
+/* FOP: mknod */
+
+int32_t ec_mknod_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
+ int32_t op_ret, int32_t op_errno, inode_t * inode,
+ struct iatt * buf, struct iatt * preparent,
+ struct iatt * postparent, dict_t * xdata)
+{
+ return ec_dir_write_cbk (frame, this, cookie, op_ret, op_errno,
+ buf, preparent, postparent, NULL, NULL, xdata);
+}
+
+void ec_wind_mknod(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_mknod_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->mknod,
+ &fop->loc[0], fop->mode[0], fop->dev, fop->mode[1],
+ fop->xdata);
+}
+
+int32_t ec_manager_mknod(ec_fop_data_t * fop, int32_t state)
+{
+ ec_config_t config;
+ ec_t *ec;
+ ec_cbk_data_t * cbk;
+ uint64_t version[2] = {0, 0};
+
+ switch (state)
+ {
+ case EC_STATE_INIT:
+ if (S_ISREG(fop->mode[0])) {
+ int32_t err;
+
+ if (fop->xdata == NULL) {
+ fop->xdata = dict_new();
+ if (fop->xdata == NULL) {
+ fop->error = ENOMEM;
+
+ return EC_STATE_REPORT;
+ }
+ }
+
+ ec = fop->xl->private;
+
+ config.version = EC_CONFIG_VERSION;
+ config.algorithm = EC_CONFIG_ALGORITHM;
+ config.gf_word_size = EC_GF_BITS;
+ config.bricks = ec->nodes;
+ config.redundancy = ec->redundancy;
+ config.chunk_size = EC_METHOD_CHUNK_SIZE;
+
+ err = ec_dict_set_config(fop->xdata, EC_XATTR_CONFIG, &config);
+ if (err != 0) {
+ fop->error = -err;
+
+ return EC_STATE_REPORT;
+ }
+ err = ec_dict_set_array(fop->xdata, EC_XATTR_VERSION, version,
+ EC_VERSION_SIZE);
+ if (err != 0) {
+ fop->error = -err;
+
+ return EC_STATE_REPORT;
+ }
+ err = ec_dict_set_number(fop->xdata, EC_XATTR_SIZE, 0);
+ if (err != 0) {
+ fop->error = -err;
+
+ return EC_STATE_REPORT;
+ }
+ }
+
+ /* Fall through */
+
+ case EC_STATE_LOCK:
+ ec_lock_prepare_parent_inode(fop, &fop->loc[0],
+ EC_UPDATE_DATA | EC_UPDATE_META);
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ cbk = ec_fop_prepare_answer(fop, _gf_false);
+ if (cbk != NULL) {
+ int32_t err;
+
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 3, cbk->count);
+
+ err = ec_loc_update(fop->xl, &fop->loc[0], cbk->inode,
+ &cbk->iatt[0]);
+ ec_cbk_set_error(cbk, -err, _gf_false);
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.mknod != NULL)
+ {
+ fop->cbks.mknod(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, fop->loc[0].inode, &cbk->iatt[0],
+ &cbk->iatt[1], &cbk->iatt[2], cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.mknod != NULL)
+ {
+ fop->cbks.mknod(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL, NULL, NULL, NULL, NULL);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s",
+ state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void ec_mknod(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_mknod_cbk_t func, void * data, loc_t * loc,
+ mode_t mode, dev_t rdev, mode_t umask, dict_t * xdata)
+{
+ ec_cbk_t callback = { .mknod = func };
+ ec_fop_data_t * fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(MKNOD) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_MKNOD, 0, target, minimum,
+ ec_wind_mknod, ec_manager_mknod, callback,
+ data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->mode[0] = mode;
+ fop->dev = rdev;
+ fop->mode[1] = umask;
+
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_LOC_COPY_FAIL, "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref (xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL, "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL, NULL, NULL);
+ }
+}
+
+/* FOP: rename */
+
+int32_t ec_rename_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
+ int32_t op_ret, int32_t op_errno, struct iatt * buf,
+ struct iatt * preoldparent, struct iatt * postoldparent,
+ struct iatt * prenewparent, struct iatt * postnewparent,
+ dict_t * xdata)
+{
+ return ec_dir_write_cbk (frame, this, cookie, op_ret, op_errno,
+ buf, preoldparent, postoldparent, prenewparent,
+ postnewparent, xdata);
+}
+
+void ec_wind_rename(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_rename_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->rename,
+ &fop->loc[0], &fop->loc[1], fop->xdata);
+}
+
+int32_t ec_manager_rename(ec_fop_data_t * fop, int32_t state)
+{
+ ec_cbk_data_t * cbk;
+
+ switch (state)
+ {
+ case EC_STATE_INIT:
+ case EC_STATE_LOCK:
+ ec_lock_prepare_parent_inode(fop, &fop->loc[0], EC_UPDATE_DATA |
+ EC_UPDATE_META |
+ EC_INODE_SIZE);
+ ec_lock_prepare_parent_inode(fop, &fop->loc[1],
+ EC_UPDATE_DATA | EC_UPDATE_META);
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ cbk = ec_fop_prepare_answer(fop, _gf_false);
+ if (cbk != NULL) {
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 5, cbk->count);
+
+ if (cbk->iatt[0].ia_type == IA_IFREG) {
+ cbk->iatt[0].ia_size = fop->locks[0].size;
+ }
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.rename != NULL)
+ {
+ fop->cbks.rename(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
+ &cbk->iatt[2], &cbk->iatt[3], &cbk->iatt[4],
+ cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.rename != NULL)
+ {
+ fop->cbks.rename(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL, NULL, NULL, NULL, NULL, NULL);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s",
+ state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void ec_rename(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_rename_cbk_t func, void * data,
+ loc_t * oldloc, loc_t * newloc, dict_t * xdata)
+{
+ ec_cbk_t callback = { .rename = func };
+ ec_fop_data_t * fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(RENAME) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_RENAME, 0, target, minimum,
+ ec_wind_rename, ec_manager_rename, callback,
+ data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ if (oldloc != NULL) {
+ if (loc_copy(&fop->loc[0], oldloc) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_LOC_COPY_FAIL, "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (newloc != NULL) {
+ if (loc_copy(&fop->loc[1], newloc) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_LOC_COPY_FAIL, "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref (xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL, "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL, NULL, NULL, NULL);
+ }
+}
+
+/* FOP: rmdir */
+
+int32_t ec_rmdir_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
+ int32_t op_ret, int32_t op_errno, struct iatt * preparent,
+ struct iatt * postparent, dict_t * xdata)
+{
+ return ec_dir_write_cbk (frame, this, cookie, op_ret, op_errno, NULL,
+ preparent, postparent, NULL, NULL, xdata);
+}
+
+void ec_wind_rmdir(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_rmdir_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->rmdir,
+ &fop->loc[0], fop->int32, fop->xdata);
+}
+
+int32_t ec_manager_rmdir(ec_fop_data_t * fop, int32_t state)
+{
+ ec_cbk_data_t * cbk;
+
+ switch (state)
+ {
+ case EC_STATE_INIT:
+ case EC_STATE_LOCK:
+ ec_lock_prepare_parent_inode(fop, &fop->loc[0],
+ EC_UPDATE_DATA | EC_UPDATE_META);
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ ec_fop_prepare_answer(fop, _gf_false);
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.rmdir != NULL)
+ {
+ fop->cbks.rmdir(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
+ cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.rmdir != NULL)
+ {
+ fop->cbks.rmdir(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL, NULL, NULL);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s",
+ state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void ec_rmdir(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_rmdir_cbk_t func, void * data, loc_t * loc,
+ int xflags, dict_t * xdata)
+{
+ ec_cbk_t callback = { .rmdir = func };
+ ec_fop_data_t * fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(RMDIR) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_RMDIR, 0, target, minimum,
+ ec_wind_rmdir, ec_manager_rmdir, callback,
+ data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->int32 = xflags;
+
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_LOC_COPY_FAIL, "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref (xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL, "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL);
+ }
+}
+
+/* FOP: symlink */
+
+int32_t ec_symlink_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
+ int32_t op_ret, int32_t op_errno, inode_t * inode,
+ struct iatt * buf, struct iatt * preparent,
+ struct iatt * postparent, dict_t * xdata)
+{
+ return ec_dir_write_cbk (frame, this, cookie, op_ret, op_errno,
+ buf, preparent, postparent, NULL, NULL, xdata);
+}
+
+void ec_wind_symlink(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_symlink_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->symlink,
+ fop->str[0], &fop->loc[0], fop->mode[0], fop->xdata);
+}
+
+int32_t ec_manager_symlink(ec_fop_data_t * fop, int32_t state)
+{
+ ec_cbk_data_t * cbk;
+
+ switch (state)
+ {
+ case EC_STATE_INIT:
+ case EC_STATE_LOCK:
+ ec_lock_prepare_parent_inode(fop, &fop->loc[0],
+ EC_UPDATE_DATA | EC_UPDATE_META);
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ cbk = ec_fop_prepare_answer(fop, _gf_false);
+ if (cbk != NULL) {
+ int32_t err;
+
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 3, cbk->count);
+
+ err = ec_loc_update(fop->xl, &fop->loc[0], cbk->inode,
+ &cbk->iatt[0]);
+ ec_cbk_set_error(cbk, -err, _gf_false);
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.symlink != NULL)
+ {
+ fop->cbks.symlink(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, fop->loc[0].inode,
+ &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
+ cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.symlink != NULL)
+ {
+ fop->cbks.symlink(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL, NULL, NULL, NULL, NULL);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s",
+ state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void ec_symlink(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_symlink_cbk_t func, void * data,
+ const char * linkname, loc_t * loc, mode_t umask,
+ dict_t * xdata)
+{
+ ec_cbk_t callback = { .symlink = func };
+ ec_fop_data_t * fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(SYMLINK) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_SYMLINK, 0, target, minimum,
+ ec_wind_symlink, ec_manager_symlink, callback,
+ data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->mode[0] = umask;
+
+ if (linkname != NULL) {
+ fop->str[0] = gf_strdup(linkname);
+ if (fop->str[0] == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_NO_MEMORY, "Failed to duplicate a string.");
+
+ goto out;
+ }
+ }
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_LOC_COPY_FAIL, "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL, "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL, NULL, NULL);
+ }
+}
+
+/* FOP: unlink */
+
+int32_t ec_unlink_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt * preparent, struct iatt * postparent,
+ dict_t * xdata)
+{
+ return ec_dir_write_cbk (frame, this, cookie, op_ret, op_errno, NULL,
+ preparent, postparent, NULL, NULL, xdata);
+}
+
+void ec_wind_unlink(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_unlink_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->unlink,
+ &fop->loc[0], fop->int32, fop->xdata);
+}
+
+int32_t ec_manager_unlink(ec_fop_data_t * fop, int32_t state)
+{
+ ec_cbk_data_t * cbk;
+
+ switch (state)
+ {
+ case EC_STATE_INIT:
+ case EC_STATE_LOCK:
+ ec_lock_prepare_parent_inode(fop, &fop->loc[0],
+ EC_UPDATE_DATA | EC_UPDATE_META);
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ ec_fop_prepare_answer(fop, _gf_false);
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.unlink != NULL)
+ {
+ fop->cbks.unlink(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
+ cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.unlink != NULL)
+ {
+ fop->cbks.unlink(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL, NULL, NULL);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s",
+ state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void ec_unlink(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_unlink_cbk_t func, void * data,
+ loc_t * loc, int xflags, dict_t * xdata)
+{
+ ec_cbk_t callback = { .unlink = func };
+ ec_fop_data_t * fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(UNLINK) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_UNLINK, 0, target, minimum,
+ ec_wind_unlink, ec_manager_unlink, callback,
+ data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->int32 = xflags;
+
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_LOC_COPY_FAIL, "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref (xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL, "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL);
+ }
+}
diff --git a/xlators/cluster/ec/src/ec-fops.h b/xlators/cluster/ec/src/ec-fops.h
new file mode 100644
index 00000000000..8d938427a18
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-fops.h
@@ -0,0 +1,202 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __EC_FOPS_H__
+#define __EC_FOPS_H__
+
+#include "xlator.h"
+
+#include "ec-data.h"
+#include "ec-common.h"
+
+void ec_access(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_access_cbk_t func, void *data, loc_t * loc,
+ int32_t mask, dict_t * xdata);
+
+void ec_create(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_create_cbk_t func, void *data, loc_t * loc,
+ int32_t flags, mode_t mode, mode_t umask, fd_t * fd,
+ dict_t * xdata);
+
+void ec_entrylk(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_entrylk_cbk_t func, void *data,
+ const char * volume, loc_t * loc, const char * basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t * xdata);
+
+void ec_fentrylk(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_fentrylk_cbk_t func, void *data,
+ const char * volume, fd_t * fd, const char * basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t * xdata);
+
+void ec_flush(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_flush_cbk_t func, void *data, fd_t * fd,
+ dict_t * xdata);
+
+void ec_fsync(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_fsync_cbk_t func, void *data, fd_t * fd,
+ int32_t datasync, dict_t * xdata);
+
+void ec_fsyncdir(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_fsyncdir_cbk_t func, void *data,
+ fd_t * fd, int32_t datasync, dict_t * xdata);
+
+void ec_getxattr(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_getxattr_cbk_t func, void *data,
+ loc_t * loc, const char * name, dict_t * xdata);
+
+void ec_fgetxattr(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_fgetxattr_cbk_t func, void *data,
+ fd_t * fd, const char * name, dict_t * xdata);
+
+void ec_heal(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_heal_cbk_t func, void *data, loc_t * loc,
+ int32_t partial, dict_t *xdata);
+
+void ec_fheal(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_fheal_cbk_t func, void *data, fd_t * fd,
+ int32_t partial, dict_t *xdata);
+
+void ec_inodelk(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_inodelk_cbk_t func, void *data,
+ const char * volume, loc_t * loc, int32_t cmd,
+ struct gf_flock * flock, dict_t * xdata);
+
+void ec_finodelk(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_finodelk_cbk_t func, void *data,
+ const char * volume, fd_t * fd, int32_t cmd,
+ struct gf_flock * flock, dict_t * xdata);
+
+void ec_link(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_link_cbk_t func, void *data, loc_t * oldloc,
+ loc_t * newloc, dict_t * xdata);
+
+void ec_lk(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_lk_cbk_t func, void *data, fd_t * fd,
+ int32_t cmd, struct gf_flock * flock, dict_t * xdata);
+
+void ec_lookup(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_lookup_cbk_t func, void *data, loc_t * loc,
+ dict_t * xdata);
+
+void ec_mkdir(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_mkdir_cbk_t func, void *data, loc_t * loc,
+ mode_t mode, mode_t umask, dict_t * xdata);
+
+void ec_mknod(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_mknod_cbk_t func, void *data, loc_t * loc,
+ mode_t mode, dev_t rdev, mode_t umask, dict_t * xdata);
+
+void ec_open(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_open_cbk_t func, void *data, loc_t * loc,
+ int32_t flags, fd_t * fd, dict_t * xdata);
+
+void ec_opendir(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_opendir_cbk_t func, void *data,
+ loc_t * loc, fd_t * fd, dict_t * xdata);
+
+void ec_readdir(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_readdir_cbk_t func, void *data, fd_t * fd,
+ size_t size, off_t offset, dict_t * xdata);
+
+void ec_readdirp(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_readdirp_cbk_t func, void *data,
+ fd_t * fd, size_t size, off_t offset, dict_t * xdata);
+
+void ec_readlink(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_readlink_cbk_t func, void *data,
+ loc_t * loc, size_t size, dict_t * xdata);
+
+void ec_readv(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_readv_cbk_t func, void *data, fd_t * fd,
+ size_t size, off_t offset, uint32_t flags, dict_t * xdata);
+
+void ec_removexattr(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_removexattr_cbk_t func, void *data,
+ loc_t * loc, const char * name, dict_t * xdata);
+
+void ec_fremovexattr(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_fremovexattr_cbk_t func, void *data,
+ fd_t * fd, const char * name, dict_t * xdata);
+
+void ec_rename(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_rename_cbk_t func, void *data,
+ loc_t * oldloc, loc_t * newloc, dict_t * xdata);
+
+void ec_rmdir(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_rmdir_cbk_t func, void *data, loc_t * loc,
+ int xflags, dict_t * xdata);
+
+void ec_setattr(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_setattr_cbk_t func, void *data,
+ loc_t * loc, struct iatt * stbuf, int32_t valid,
+ dict_t * xdata);
+
+void ec_fsetattr(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_fsetattr_cbk_t func, void *data,
+ fd_t * fd, struct iatt * stbuf, int32_t valid,
+ dict_t * xdata);
+
+void ec_setxattr(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_setxattr_cbk_t func, void *data,
+ loc_t * loc, dict_t * dict, int32_t flags, dict_t * xdata);
+
+void ec_fsetxattr(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_fsetxattr_cbk_t func, void *data,
+ fd_t * fd, dict_t * dict, int32_t flags, dict_t * xdata);
+
+void ec_stat(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_stat_cbk_t func, void *data, loc_t * loc,
+ dict_t * xdata);
+
+void ec_fstat(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_fstat_cbk_t func, void *data, fd_t * fd,
+ dict_t * xdata);
+
+void ec_statfs(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_statfs_cbk_t func, void *data, loc_t * loc,
+ dict_t * xdata);
+
+void ec_symlink(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_symlink_cbk_t func, void *data,
+ const char * linkname, loc_t * loc, mode_t umask,
+ dict_t * xdata);
+
+void ec_truncate(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_truncate_cbk_t func, void *data,
+ loc_t * loc, off_t offset, dict_t * xdata);
+
+void ec_ftruncate(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_ftruncate_cbk_t func, void *data,
+ fd_t * fd, off_t offset, dict_t * xdata);
+
+void ec_unlink(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_unlink_cbk_t func, void *data, loc_t * loc,
+ int xflags, dict_t * xdata);
+
+void ec_writev(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_writev_cbk_t func, void *data, fd_t * fd,
+ struct iovec * vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref * iobref, dict_t * xdata);
+
+void ec_xattrop(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_xattrop_cbk_t func, void *data,
+ loc_t * loc, gf_xattrop_flags_t optype, dict_t * xattr,
+ dict_t * xdata);
+
+void ec_fxattrop(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_fxattrop_cbk_t func, void *data,
+ fd_t * fd, gf_xattrop_flags_t optype, dict_t * xattr,
+ dict_t * xdata);
+
+void ec_seek(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ int32_t minimum, fop_seek_cbk_t func, void *data, fd_t *fd,
+ off_t offset, gf_seek_what_t what, dict_t *xdata);
+
+#endif /* __EC_FOPS_H__ */
diff --git a/xlators/cluster/ec/src/ec-generic.c b/xlators/cluster/ec/src/ec-generic.c
new file mode 100644
index 00000000000..0ad514908aa
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-generic.c
@@ -0,0 +1,1448 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include "ec-helpers.h"
+#include "ec-common.h"
+#include "ec-combine.h"
+#include "ec-method.h"
+#include "ec-fops.h"
+#include "ec-messages.h"
+#include "byte-order.h"
+
+/* FOP: flush */
+
+int32_t ec_flush_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
+ int32_t op_ret, int32_t op_errno, dict_t * xdata)
+{
+ ec_fop_data_t * fop = NULL;
+ ec_cbk_data_t * cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
+ frame, op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_FLUSH, idx, op_ret,
+ op_errno);
+ if (cbk != NULL)
+ {
+ if (xdata != NULL)
+ {
+ cbk->xdata = dict_ref(xdata);
+ if (cbk->xdata == NULL)
+ {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL, "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ ec_combine(cbk, NULL);
+ }
+
+out:
+ if (fop != NULL)
+ {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void ec_wind_flush(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_flush_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->flush, fop->fd,
+ fop->xdata);
+}
+
+int32_t ec_manager_flush(ec_fop_data_t * fop, int32_t state)
+{
+ ec_cbk_data_t * cbk;
+
+ switch (state)
+ {
+ case EC_STATE_INIT:
+ case EC_STATE_LOCK:
+ ec_lock_prepare_fd(fop, fop->fd, 0);
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_flush_size_version(fop);
+
+ return EC_STATE_DELAYED_START;
+
+ case EC_STATE_DELAYED_START:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ ec_fop_prepare_answer(fop, _gf_false);
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.flush != NULL)
+ {
+ fop->cbks.flush(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DELAYED_START:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.flush != NULL)
+ {
+ fop->cbks.flush(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s",
+ state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void ec_flush(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_flush_cbk_t func, void * data, fd_t * fd,
+ dict_t * xdata)
+{
+ ec_cbk_t callback = { .flush = func };
+ ec_fop_data_t * fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(FLUSH) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FLUSH, 0, target, minimum,
+ ec_wind_flush, ec_manager_flush, callback,
+ data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->use_fd = 1;
+
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_FILE_DESC_REF_FAIL, "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL, "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL);
+ }
+}
+
+/* FOP: fsync */
+
+int32_t ec_combine_fsync(ec_fop_data_t * fop, ec_cbk_data_t * dst,
+ ec_cbk_data_t * src)
+{
+ if (!ec_iatt_combine(fop, dst->iatt, src->iatt, 2)) {
+ gf_msg (fop->xl->name, GF_LOG_NOTICE, 0,
+ EC_MSG_IATT_MISMATCH, "Mismatching iatt in "
+ "answers of 'GF_FOP_FSYNC'");
+
+ return 0;
+ }
+
+ return 1;
+}
+
+int32_t ec_fsync_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
+ int32_t op_ret, int32_t op_errno, struct iatt * prebuf,
+ struct iatt * postbuf, dict_t * xdata)
+{
+ ec_fop_data_t * fop = NULL;
+ ec_cbk_data_t * cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
+ frame, op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_FSYNC, idx, op_ret,
+ op_errno);
+ if (cbk != NULL)
+ {
+ if (op_ret >= 0)
+ {
+ if (prebuf != NULL)
+ {
+ cbk->iatt[0] = *prebuf;
+ }
+ if (postbuf != NULL)
+ {
+ cbk->iatt[1] = *postbuf;
+ }
+ }
+ if (xdata != NULL)
+ {
+ cbk->xdata = dict_ref(xdata);
+ if (cbk->xdata == NULL)
+ {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL, "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ ec_combine(cbk, ec_combine_fsync);
+ }
+
+out:
+ if (fop != NULL)
+ {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void ec_wind_fsync(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_fsync_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->fsync, fop->fd,
+ fop->int32, fop->xdata);
+}
+
+int32_t ec_manager_fsync(ec_fop_data_t * fop, int32_t state)
+{
+ ec_cbk_data_t * cbk;
+
+ switch (state)
+ {
+ case EC_STATE_INIT:
+ case EC_STATE_LOCK:
+ ec_lock_prepare_fd(fop, fop->fd, EC_QUERY_INFO);
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_flush_size_version(fop);
+
+ return EC_STATE_DELAYED_START;
+
+ case EC_STATE_DELAYED_START:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ cbk = ec_fop_prepare_answer(fop, _gf_false);
+ if (cbk != NULL) {
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2,
+ cbk->count);
+
+ /* This shouldn't fail because we have the inode locked. */
+ GF_ASSERT(ec_get_inode_size(fop, fop->fd->inode,
+ &cbk->iatt[0].ia_size));
+ cbk->iatt[1].ia_size = cbk->iatt[0].ia_size;
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.fsync != NULL)
+ {
+ fop->cbks.fsync(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
+ cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ case -EC_STATE_DELAYED_START:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.fsync != NULL)
+ {
+ fop->cbks.fsync(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL, NULL, NULL);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s",
+ state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void ec_fsync(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_fsync_cbk_t func, void * data, fd_t * fd,
+ int32_t datasync, dict_t * xdata)
+{
+ ec_cbk_t callback = { .fsync = func };
+ ec_fop_data_t * fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(FSYNC) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNC, 0, target, minimum,
+ ec_wind_fsync, ec_manager_fsync, callback,
+ data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->use_fd = 1;
+
+ fop->int32 = datasync;
+
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_FILE_DESC_REF_FAIL, "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL, "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL);
+ }
+}
+
+/* FOP: fsyncdir */
+
+int32_t ec_fsyncdir_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
+ int32_t op_ret, int32_t op_errno, dict_t * xdata)
+{
+ ec_fop_data_t * fop = NULL;
+ ec_cbk_data_t * cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
+ frame, op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_FSYNCDIR, idx, op_ret,
+ op_errno);
+ if (cbk != NULL)
+ {
+ if (xdata != NULL)
+ {
+ cbk->xdata = dict_ref(xdata);
+ if (cbk->xdata == NULL)
+ {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL, "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ ec_combine(cbk, NULL);
+ }
+
+out:
+ if (fop != NULL)
+ {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void ec_wind_fsyncdir(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_fsyncdir_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->fsyncdir,
+ fop->fd, fop->int32, fop->xdata);
+}
+
+int32_t ec_manager_fsyncdir(ec_fop_data_t * fop, int32_t state)
+{
+ ec_cbk_data_t * cbk;
+
+ switch (state)
+ {
+ case EC_STATE_INIT:
+ case EC_STATE_LOCK:
+ ec_lock_prepare_fd(fop, fop->fd, 0);
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_flush_size_version(fop);
+
+ return EC_STATE_DELAYED_START;
+
+ case EC_STATE_DELAYED_START:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ ec_fop_prepare_answer(fop, _gf_false);
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.fsyncdir != NULL)
+ {
+ fop->cbks.fsyncdir(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ case -EC_STATE_DELAYED_START:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.fsyncdir != NULL)
+ {
+ fop->cbks.fsyncdir(fop->req_frame, fop, fop->xl, -1,
+ fop->error, NULL);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s",
+ state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void ec_fsyncdir(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_fsyncdir_cbk_t func, void * data,
+ fd_t * fd, int32_t datasync, dict_t * xdata)
+{
+ ec_cbk_t callback = { .fsyncdir = func };
+ ec_fop_data_t * fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(FSYNCDIR) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNCDIR, 0, target,
+ minimum, ec_wind_fsyncdir, ec_manager_fsyncdir,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->use_fd = 1;
+
+ fop->int32 = datasync;
+
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_FILE_DESC_REF_FAIL, "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL, "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL);
+ }
+}
+
+/* FOP: lookup */
+
+void ec_lookup_rebuild(ec_t * ec, ec_fop_data_t * fop, ec_cbk_data_t * cbk)
+{
+ ec_inode_t * ctx = NULL;
+ uint64_t size = 0;
+ int32_t have_size = 0, err;
+
+ if (cbk->op_ret < 0) {
+ return;
+ }
+
+ ec_dict_del_array(cbk->xdata, EC_XATTR_VERSION, cbk->version,
+ EC_VERSION_SIZE);
+
+ err = ec_loc_update(fop->xl, &fop->loc[0], cbk->inode, &cbk->iatt[0]);
+ if (ec_cbk_set_error(cbk, -err, _gf_true)) {
+ return;
+ }
+
+ LOCK(&cbk->inode->lock);
+
+ ctx = __ec_inode_get(cbk->inode, fop->xl);
+ if (ctx != NULL)
+ {
+ if (ctx->have_version) {
+ cbk->version[0] = ctx->post_version[0];
+ cbk->version[1] = ctx->post_version[1];
+ }
+ if (ctx->have_size) {
+ size = ctx->post_size;
+ have_size = 1;
+ }
+ }
+
+ UNLOCK(&cbk->inode->lock);
+
+ if (cbk->iatt[0].ia_type == IA_IFREG)
+ {
+ cbk->size = cbk->iatt[0].ia_size;
+ ec_dict_del_number(cbk->xdata, EC_XATTR_SIZE, &cbk->iatt[0].ia_size);
+ if (have_size)
+ {
+ cbk->iatt[0].ia_size = size;
+ }
+ }
+}
+
+int32_t ec_combine_lookup(ec_fop_data_t * fop, ec_cbk_data_t * dst,
+ ec_cbk_data_t * src)
+{
+ if (!ec_iatt_combine(fop, dst->iatt, src->iatt, 2)) {
+ gf_msg (fop->xl->name, GF_LOG_NOTICE, 0,
+ EC_MSG_IATT_MISMATCH, "Mismatching iatt in "
+ "answers of 'GF_FOP_LOOKUP'");
+
+ return 0;
+ }
+
+ return 1;
+}
+
+int32_t ec_lookup_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
+ int32_t op_ret, int32_t op_errno, inode_t * inode,
+ struct iatt * buf, dict_t * xdata,
+ struct iatt * postparent)
+{
+ ec_fop_data_t * fop = NULL;
+ ec_cbk_data_t * cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
+ frame, op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_LOOKUP, idx, op_ret,
+ op_errno);
+ if (cbk != NULL)
+ {
+ if (op_ret >= 0)
+ {
+ if (inode != NULL)
+ {
+ cbk->inode = inode_ref(inode);
+ if (cbk->inode == NULL)
+ {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_INODE_REF_FAIL,
+ "Failed to reference an inode.");
+
+ goto out;
+ }
+ }
+ if (buf != NULL)
+ {
+ cbk->iatt[0] = *buf;
+ }
+ if (postparent != NULL)
+ {
+ cbk->iatt[1] = *postparent;
+ }
+ }
+ if (xdata != NULL)
+ {
+ cbk->xdata = dict_ref(xdata);
+ if (cbk->xdata == NULL)
+ {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL, "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ ec_dict_del_array (xdata, EC_XATTR_DIRTY, cbk->dirty,
+ EC_VERSION_SIZE);
+ }
+
+ ec_combine(cbk, ec_combine_lookup);
+ }
+
+out:
+ if (fop != NULL)
+ {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void ec_wind_lookup(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_lookup_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->lookup,
+ &fop->loc[0], fop->xdata);
+}
+
+int32_t ec_manager_lookup(ec_fop_data_t * fop, int32_t state)
+{
+ ec_cbk_data_t *cbk;
+ int32_t err;
+
+ switch (state)
+ {
+ case EC_STATE_INIT:
+ if (fop->xdata == NULL) {
+ fop->xdata = dict_new();
+ if (fop->xdata == NULL) {
+ gf_msg (fop->xl->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_LOOKUP_REQ_PREP_FAIL, "Unable to prepare "
+ "lookup request");
+
+ fop->error = ENOMEM;
+
+ return EC_STATE_REPORT;
+ }
+ } else {
+ /*TODO: To be handled once we have 'syndromes' */
+ dict_del (fop->xdata, GF_CONTENT_KEY);
+ }
+ err = dict_set_uint64(fop->xdata, EC_XATTR_SIZE, 0);
+ if (err == 0) {
+ err = dict_set_uint64(fop->xdata, EC_XATTR_VERSION, 0);
+ }
+ if (err == 0) {
+ err = dict_set_uint64(fop->xdata, EC_XATTR_DIRTY, 0);
+ }
+ if (err != 0) {
+ gf_msg (fop->xl->name, GF_LOG_ERROR, -err,
+ EC_MSG_LOOKUP_REQ_PREP_FAIL, "Unable to prepare lookup "
+ "request");
+
+ fop->error = -err;
+
+ return EC_STATE_REPORT;
+ }
+
+ /* Fall through */
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ /*
+ * Lookup happens without any lock, so there is a chance that it
+ * will have answers before modification happened and after
+ * modification happened in the same response. So choose the next
+ * best answer when the answers don't match for EC_MINIMUM_MIN
+ */
+
+ if (!fop->answer && !list_empty(&fop->cbk_list)) {
+ fop->answer = list_entry (fop->cbk_list.next, ec_cbk_data_t,
+ list);
+ }
+
+ cbk = ec_fop_prepare_answer(fop, _gf_true);
+ if (cbk != NULL) {
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2, cbk->count);
+
+ ec_lookup_rebuild(fop->xl->private, fop, cbk);
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.lookup != NULL)
+ {
+ fop->cbks.lookup(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, cbk->inode, &cbk->iatt[0],
+ cbk->xdata, &cbk->iatt[1]);
+ }
+
+ return EC_STATE_END;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.lookup != NULL)
+ {
+ fop->cbks.lookup(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL, NULL, NULL, NULL);
+ }
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s",
+ state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void ec_lookup(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_lookup_cbk_t func, void * data,
+ loc_t * loc, dict_t * xdata)
+{
+ ec_cbk_t callback = { .lookup = func };
+ ec_fop_data_t * fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(LOOKUP) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_LOOKUP, EC_FLAG_LOCK_SHARED,
+ target, minimum, ec_wind_lookup,
+ ec_manager_lookup, callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_LOC_COPY_FAIL, "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref (xdata, NULL);
+ /* Do not log failures here as a memory problem would have already
+ * been logged by the corresponding alloc functions */
+ if (fop->xdata == NULL)
+ goto out;
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL, NULL);
+ }
+}
+
+/* FOP: statfs */
+
+int32_t ec_combine_statfs(ec_fop_data_t * fop, ec_cbk_data_t * dst,
+ ec_cbk_data_t * src)
+{
+ ec_statvfs_combine(&dst->statvfs, &src->statvfs);
+
+ return 1;
+}
+
+int32_t ec_statfs_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
+ int32_t op_ret, int32_t op_errno, struct statvfs * buf,
+ dict_t * xdata)
+{
+ ec_fop_data_t * fop = NULL;
+ ec_cbk_data_t * cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
+ frame, op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_STATFS, idx, op_ret,
+ op_errno);
+ if (cbk != NULL)
+ {
+ if (op_ret >= 0)
+ {
+ if (buf != NULL)
+ {
+ cbk->statvfs = *buf;
+ }
+ }
+ if (xdata != NULL)
+ {
+ cbk->xdata = dict_ref(xdata);
+ if (cbk->xdata == NULL)
+ {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL, "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ ec_combine(cbk, ec_combine_statfs);
+ }
+
+out:
+ if (fop != NULL)
+ {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void ec_wind_statfs(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_statfs_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->statfs,
+ &fop->loc[0], fop->xdata);
+}
+
+int32_t ec_manager_statfs(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk = NULL;
+ gf_boolean_t deem_statfs_enabled = _gf_false;
+ int32_t err = 0;
+
+ switch (state)
+ {
+ case EC_STATE_INIT:
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ cbk = ec_fop_prepare_answer(fop, _gf_true);
+ if (cbk != NULL) {
+ ec_t *ec = fop->xl->private;
+
+ if (cbk->xdata) {
+ err = dict_get_int8 (cbk->xdata, "quota-deem-statfs",
+ (int8_t *)&deem_statfs_enabled);
+ if (err != -ENOENT) {
+ ec_cbk_set_error(cbk, -err, _gf_true);
+ }
+ }
+
+ if (err != 0 || deem_statfs_enabled == _gf_false) {
+ cbk->statvfs.f_blocks *= ec->fragments;
+ cbk->statvfs.f_bfree *= ec->fragments;
+ cbk->statvfs.f_bavail *= ec->fragments;
+ }
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.statfs != NULL)
+ {
+ fop->cbks.statfs(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, &cbk->statvfs, cbk->xdata);
+ }
+
+ return EC_STATE_END;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.statfs != NULL)
+ {
+ fop->cbks.statfs(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL, NULL);
+ }
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s",
+ state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void ec_statfs(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_statfs_cbk_t func, void * data,
+ loc_t * loc, dict_t * xdata)
+{
+ ec_cbk_t callback = { .statfs = func };
+ ec_fop_data_t * fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(STATFS) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_STATFS, EC_FLAG_LOCK_SHARED,
+ target, minimum, ec_wind_statfs,
+ ec_manager_statfs, callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_LOC_COPY_FAIL, "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL, "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL);
+ }
+}
+
+/* FOP: xattrop */
+
+int32_t ec_combine_xattrop(ec_fop_data_t *fop, ec_cbk_data_t *dst,
+ ec_cbk_data_t *src)
+{
+ if (!ec_dict_compare(dst->dict, src->dict))
+ {
+ gf_msg (fop->xl->name, GF_LOG_DEBUG, 0,
+ EC_MSG_DICT_MISMATCH, "Mismatching dictionary in "
+ "answers of 'GF_FOP_XATTROP'");
+
+ return 0;
+ }
+
+ return 1;
+}
+
+int32_t
+ec_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ data_t *data;
+ uint64_t *version;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO (this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace ("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
+ frame, op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate (frame, this, fop, fop->id, idx, op_ret,
+ op_errno);
+ if (!cbk)
+ goto out;
+
+ if (op_ret >= 0) {
+ cbk->dict = dict_ref (xattr);
+
+ data = dict_get(cbk->dict, EC_XATTR_VERSION);
+ if ((data != NULL) && (data->len >= sizeof(uint64_t))) {
+ version = (uint64_t *)data->data;
+
+ if (((ntoh64(version[0]) >> EC_SELFHEAL_BIT) & 1) != 0) {
+ LOCK(&fop->lock);
+
+ fop->healing |= 1ULL << idx;
+
+ UNLOCK(&fop->lock);
+ }
+ }
+
+ ec_dict_del_array (xattr, EC_XATTR_DIRTY, cbk->dirty,
+ EC_VERSION_SIZE);
+ }
+
+ if (xdata)
+ cbk->xdata = dict_ref(xdata);
+
+ ec_combine (cbk, ec_combine_xattrop);
+
+out:
+ if (fop)
+ ec_complete(fop);
+
+ return 0;
+}
+
+void ec_wind_xattrop(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_xattrop_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->xattrop,
+ &fop->loc[0], fop->xattrop_flags, fop->dict, fop->xdata);
+}
+
+int32_t ec_manager_xattrop(ec_fop_data_t * fop, int32_t state)
+{
+ ec_cbk_data_t * cbk;
+
+ switch (state)
+ {
+ case EC_STATE_INIT:
+ case EC_STATE_LOCK:
+ if (fop->fd == NULL) {
+ ec_lock_prepare_inode(fop, &fop->loc[0], EC_UPDATE_META);
+ } else {
+ ec_lock_prepare_fd(fop, fop->fd, EC_UPDATE_META);
+ }
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ cbk = ec_fop_prepare_answer(fop, _gf_false);
+ if (cbk != NULL) {
+ int32_t err;
+
+ err = ec_dict_combine(cbk, EC_COMBINE_DICT);
+ ec_cbk_set_error(cbk, -err, _gf_false);
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->id == GF_FOP_XATTROP)
+ {
+ if (fop->cbks.xattrop != NULL)
+ {
+ fop->cbks.xattrop(fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, cbk->dict,
+ cbk->xdata);
+ }
+ }
+ else
+ {
+ if (fop->cbks.fxattrop != NULL)
+ {
+ fop->cbks.fxattrop(fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, cbk->dict,
+ cbk->xdata);
+ }
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->id == GF_FOP_XATTROP)
+ {
+ if (fop->cbks.xattrop != NULL)
+ {
+ fop->cbks.xattrop(fop->req_frame, fop, fop->xl, -1,
+ fop->error, NULL, NULL);
+ }
+ }
+ else
+ {
+ if (fop->cbks.fxattrop != NULL)
+ {
+ fop->cbks.fxattrop(fop->req_frame, fop, fop->xl, -1,
+ fop->error, NULL, NULL);
+ }
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s",
+ state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void ec_xattrop(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_xattrop_cbk_t func, void * data,
+ loc_t * loc, gf_xattrop_flags_t optype, dict_t * xattr,
+ dict_t * xdata)
+{
+ ec_cbk_t callback = { .xattrop = func };
+ ec_fop_data_t * fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(XATTROP) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_XATTROP, 0, target, minimum,
+ ec_wind_xattrop, ec_manager_xattrop, callback,
+ data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->xattrop_flags = optype;
+
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_LOC_COPY_FAIL, "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (xattr != NULL) {
+ fop->dict = dict_ref(xattr);
+ if (fop->dict == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL, "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL, "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL);
+ }
+}
+
+void ec_wind_fxattrop(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_xattrop_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->fxattrop,
+ fop->fd, fop->xattrop_flags, fop->dict, fop->xdata);
+}
+
+void ec_fxattrop(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_fxattrop_cbk_t func, void * data,
+ fd_t * fd, gf_xattrop_flags_t optype, dict_t * xattr,
+ dict_t * xdata)
+{
+ ec_cbk_t callback = { .fxattrop = func };
+ ec_fop_data_t * fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(FXATTROP) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FXATTROP, 0, target,
+ minimum, ec_wind_fxattrop, ec_manager_xattrop,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->use_fd = 1;
+
+ fop->xattrop_flags = optype;
+
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_FILE_DESC_REF_FAIL, "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ if (xattr != NULL) {
+ fop->dict = dict_ref(xattr);
+ if (fop->dict == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL, "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL, "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL);
+ }
+}
diff --git a/xlators/cluster/ec/src/ec-gf.c b/xlators/cluster/ec/src/ec-gf.c
new file mode 100644
index 00000000000..1ae8928f20b
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-gf.c
@@ -0,0 +1,11635 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <inttypes.h>
+#include <string.h>
+
+#include "ec-gf.h"
+
+static void gf8_muladd_00(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ memcpy(out, in, sizeof(uint64_t) * 8 * width);
+}
+
+static void gf8_muladd_01(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ out_ptr[0] ^= in_ptr[0];
+ out_ptr[width] ^= in_ptr[width];
+ out_ptr[width * 2] ^= in_ptr[width * 2];
+ out_ptr[width * 3] ^= in_ptr[width * 3];
+ out_ptr[width * 4] ^= in_ptr[width * 4];
+ out_ptr[width * 5] ^= in_ptr[width * 5];
+ out_ptr[width * 6] ^= in_ptr[width * 6];
+ out_ptr[width * 7] ^= in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_02(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out0 = in7;
+ out1 = in0;
+ out7 = in6;
+ out5 = in4;
+ out6 = in5;
+ out3 = in2 ^ in7;
+ out4 = in3 ^ in7;
+ out2 = in1 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_03(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out0 = in0 ^ in7;
+ tmp0 = in2 ^ in7;
+ out1 = in0 ^ in1;
+ out7 = in6 ^ in7;
+ out5 = in4 ^ in5;
+ out6 = in5 ^ in6;
+ out4 = in3 ^ in4 ^ in7;
+ out2 = tmp0 ^ in1;
+ out3 = tmp0 ^ in3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_04(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out0 = in6;
+ out1 = in7;
+ out7 = in5;
+ out6 = in4;
+ tmp0 = in6 ^ in7;
+ out2 = in0 ^ in6;
+ out5 = in3 ^ in7;
+ out3 = tmp0 ^ in1;
+ out4 = tmp0 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_05(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out0 = in0 ^ in6;
+ out1 = in1 ^ in7;
+ out7 = in5 ^ in7;
+ out6 = in4 ^ in6;
+ out2 = out0 ^ in2;
+ out3 = out1 ^ in3 ^ in6;
+ out5 = out7 ^ in3;
+ out4 = out6 ^ in2 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_06(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out0 = in6 ^ in7;
+ tmp0 = in1 ^ in6;
+ out1 = in0 ^ in7;
+ out7 = in5 ^ in6;
+ out6 = in4 ^ in5;
+ out4 = in2 ^ in3 ^ in6;
+ out5 = in3 ^ in4 ^ in7;
+ out3 = tmp0 ^ in2;
+ out2 = tmp0 ^ out1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_07(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in2 ^ in6;
+ tmp1 = in5 ^ in6;
+ tmp2 = in0 ^ in7;
+ tmp3 = tmp0 ^ in3;
+ out6 = tmp1 ^ in4;
+ out7 = tmp1 ^ in7;
+ out0 = tmp2 ^ in6;
+ out1 = tmp2 ^ in1;
+ out3 = tmp3 ^ in1;
+ out4 = tmp3 ^ in4;
+ out5 = out4 ^ out7 ^ in2;
+ out2 = tmp0 ^ out1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_08(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out0 = in5;
+ out1 = in6;
+ out7 = in4;
+ out6 = in3 ^ in7;
+ out3 = in0 ^ in5 ^ in6;
+ out5 = in2 ^ in6 ^ in7;
+ out2 = in5 ^ in7;
+ out4 = out2 ^ in1 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_09(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out0 = in0 ^ in5;
+ tmp0 = in3 ^ in6;
+ out1 = in1 ^ in6;
+ out7 = in4 ^ in7;
+ out2 = in2 ^ in5 ^ in7;
+ out3 = tmp0 ^ out0;
+ out6 = tmp0 ^ in7;
+ out4 = out1 ^ out7 ^ in5;
+ out5 = out2 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_0A(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out0 = in5 ^ in7;
+ out1 = in0 ^ in6;
+ out7 = in4 ^ in6;
+ out2 = in1 ^ in5;
+ out6 = out0 ^ in3;
+ out3 = out0 ^ out1 ^ in2;
+ out5 = out7 ^ in2 ^ in7;
+ out4 = out2 ^ in3 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_0B(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in2 ^ in5;
+ tmp1 = in0 ^ in6;
+ tmp2 = in4 ^ in7;
+ out0 = in0 ^ in5 ^ in7;
+ out2 = tmp0 ^ in1;
+ out1 = tmp1 ^ in1;
+ out6 = tmp1 ^ out0 ^ in3;
+ out7 = tmp2 ^ in6;
+ out4 = tmp2 ^ out6 ^ in1;
+ out3 = out6 ^ in0 ^ in2;
+ out5 = tmp0 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_0C(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out0 = in5 ^ in6;
+ out1 = in6 ^ in7;
+ out7 = in4 ^ in5;
+ tmp0 = in1 ^ in5;
+ tmp1 = in0 ^ in7;
+ out5 = in2 ^ in3 ^ in6;
+ out6 = in3 ^ in4 ^ in7;
+ out2 = tmp1 ^ out0;
+ out4 = tmp0 ^ in2;
+ out3 = tmp0 ^ tmp1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_0D(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in4 ^ in5;
+ tmp1 = in5 ^ in6;
+ out1 = in1 ^ in6 ^ in7;
+ out7 = tmp0 ^ in7;
+ out4 = tmp0 ^ in1 ^ in2;
+ out0 = tmp1 ^ in0;
+ tmp2 = tmp1 ^ in3;
+ out6 = tmp2 ^ out7;
+ out2 = out0 ^ in2 ^ in7;
+ out3 = out0 ^ out1 ^ in3;
+ out5 = tmp2 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_0E(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in0 ^ in1;
+ tmp1 = in2 ^ in5;
+ tmp2 = in5 ^ in6;
+ out1 = in0 ^ in6 ^ in7;
+ out3 = tmp0 ^ tmp1;
+ out2 = tmp0 ^ tmp2;
+ tmp3 = tmp1 ^ in3;
+ out7 = tmp2 ^ in4;
+ out0 = tmp2 ^ in7;
+ out4 = tmp3 ^ in1 ^ in7;
+ out5 = tmp3 ^ out7;
+ out6 = out0 ^ out5 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_0F(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in6 ^ in7;
+ tmp1 = tmp0 ^ in1;
+ tmp2 = tmp0 ^ in5;
+ out1 = tmp1 ^ in0;
+ out7 = tmp2 ^ in4;
+ out0 = tmp2 ^ in0;
+ out6 = out7 ^ in3;
+ out5 = out6 ^ in2 ^ in7;
+ tmp3 = tmp1 ^ out0 ^ in2;
+ out4 = tmp1 ^ out5;
+ out2 = tmp3 ^ in6;
+ out3 = tmp3 ^ in3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_10(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out0 = in4;
+ out1 = in5;
+ out7 = in3 ^ in7;
+ tmp0 = in6 ^ in7;
+ out2 = in4 ^ in6;
+ tmp1 = out2 ^ in5;
+ out6 = tmp0 ^ in2;
+ out3 = tmp0 ^ tmp1;
+ out5 = out2 ^ out3 ^ in1;
+ out4 = tmp1 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_11(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out7 = in3;
+ out0 = in0 ^ in4;
+ out1 = in1 ^ in5;
+ out6 = in2 ^ in7;
+ out4 = in0 ^ in5 ^ in6;
+ out5 = in1 ^ in6 ^ in7;
+ out2 = in2 ^ in4 ^ in6;
+ out3 = in3 ^ in4 ^ in5 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_12(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out0 = in4 ^ in7;
+ out1 = in0 ^ in5;
+ out3 = in2 ^ in4 ^ in5;
+ tmp0 = out0 ^ in6;
+ out2 = tmp0 ^ in1;
+ tmp1 = tmp0 ^ in3;
+ out6 = tmp0 ^ out3;
+ out5 = out2 ^ in5;
+ out7 = tmp1 ^ in4;
+ out4 = tmp1 ^ out1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_13(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out7 = in3 ^ in6;
+ tmp0 = in0 ^ in5;
+ tmp1 = in4 ^ in7;
+ out6 = in2 ^ in5 ^ in7;
+ out4 = tmp0 ^ out7 ^ in7;
+ out1 = tmp0 ^ in1;
+ out0 = tmp1 ^ in0;
+ out5 = tmp1 ^ in1 ^ in6;
+ out3 = tmp1 ^ out6 ^ in3;
+ out2 = out5 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_14(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out0 = in4 ^ in6;
+ out1 = in5 ^ in7;
+ out2 = in0 ^ in4;
+ tmp0 = out0 ^ in5;
+ out7 = out1 ^ in3;
+ tmp1 = out1 ^ in2;
+ out3 = tmp0 ^ in1;
+ out6 = tmp0 ^ tmp1;
+ out4 = tmp1 ^ out2;
+ out5 = out3 ^ in3 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_15(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out7 = in3 ^ in5;
+ tmp0 = in0 ^ in4;
+ out1 = in1 ^ in5 ^ in7;
+ out5 = in1 ^ in3 ^ in6;
+ out0 = tmp0 ^ in6;
+ out2 = tmp0 ^ in2;
+ out3 = out5 ^ in4 ^ in5;
+ out6 = out2 ^ in0 ^ in7;
+ out4 = tmp0 ^ out6 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_16(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in0 ^ in5;
+ tmp1 = in4 ^ in7;
+ tmp2 = in2 ^ in3 ^ in4;
+ out1 = tmp0 ^ in7;
+ out4 = tmp0 ^ tmp2;
+ out0 = tmp1 ^ in6;
+ tmp3 = tmp1 ^ in1;
+ out6 = out0 ^ in2 ^ in5;
+ out2 = tmp3 ^ in0;
+ out3 = out6 ^ in1;
+ out7 = tmp2 ^ out6;
+ out5 = tmp3 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_17(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in2 ^ in5;
+ tmp1 = in3 ^ in6;
+ tmp2 = tmp0 ^ in4;
+ out4 = tmp0 ^ in0 ^ in3;
+ out7 = tmp1 ^ in5;
+ tmp3 = tmp1 ^ in1;
+ out6 = tmp2 ^ in7;
+ out5 = tmp3 ^ in4;
+ out3 = tmp3 ^ out6;
+ out0 = out3 ^ out4 ^ in1;
+ out2 = out3 ^ out7 ^ in0;
+ out1 = tmp2 ^ out2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_18(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out0 = in4 ^ in5;
+ out1 = in5 ^ in6;
+ tmp0 = in4 ^ in7;
+ out5 = in1 ^ in2 ^ in5;
+ out6 = in2 ^ in3 ^ in6;
+ out2 = tmp0 ^ out1;
+ out7 = tmp0 ^ in3;
+ tmp1 = tmp0 ^ in0;
+ out3 = tmp1 ^ in6;
+ out4 = tmp1 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_19(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out5 = in1 ^ in2;
+ out7 = in3 ^ in4;
+ tmp0 = in0 ^ in7;
+ out6 = in2 ^ in3;
+ out1 = in1 ^ in5 ^ in6;
+ out0 = in0 ^ in4 ^ in5;
+ out4 = tmp0 ^ in1;
+ tmp1 = tmp0 ^ in6;
+ out2 = tmp1 ^ out0 ^ in2;
+ out3 = tmp1 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_1A(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in4 ^ in5;
+ tmp1 = in5 ^ in6;
+ tmp2 = tmp0 ^ in1;
+ out0 = tmp0 ^ in7;
+ out1 = tmp1 ^ in0;
+ tmp3 = tmp1 ^ in3;
+ out5 = tmp2 ^ in2;
+ out2 = tmp2 ^ in6;
+ out7 = tmp3 ^ out0;
+ out6 = tmp3 ^ in2;
+ out4 = tmp3 ^ out2 ^ in0;
+ out3 = tmp0 ^ out1 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_1B(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in2 ^ in4;
+ tmp1 = in2 ^ in5;
+ tmp2 = in3 ^ in6;
+ out5 = tmp0 ^ in1;
+ tmp3 = tmp0 ^ in0;
+ out6 = tmp1 ^ in3;
+ out0 = tmp1 ^ tmp3 ^ in7;
+ out7 = tmp2 ^ in4;
+ tmp4 = out5 ^ in6;
+ out3 = tmp2 ^ tmp3;
+ out2 = tmp4 ^ in5;
+ out4 = tmp4 ^ out3;
+ out1 = tmp3 ^ out2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_1C(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in2 ^ in3;
+ tmp1 = in4 ^ in6;
+ tmp2 = in5 ^ in7;
+ out6 = tmp0 ^ tmp1;
+ out0 = tmp1 ^ in5;
+ out1 = tmp2 ^ in6;
+ tmp3 = tmp2 ^ in1;
+ tmp4 = tmp2 ^ in4;
+ out2 = tmp4 ^ in0;
+ out7 = tmp4 ^ in3;
+ out5 = tmp0 ^ tmp3;
+ out3 = tmp3 ^ out2;
+ out4 = out3 ^ in2 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_1D(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in1 ^ in3;
+ tmp1 = in0 ^ in4;
+ tmp2 = in3 ^ in4;
+ tmp3 = in2 ^ in7;
+ out3 = tmp0 ^ tmp1;
+ out5 = tmp0 ^ tmp3;
+ tmp4 = tmp1 ^ in5;
+ out6 = tmp2 ^ in2;
+ out7 = tmp2 ^ in5;
+ out2 = tmp3 ^ tmp4;
+ out4 = out3 ^ out6 ^ in6;
+ out0 = tmp4 ^ in6;
+ out1 = out2 ^ out4 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_1E(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in0 ^ in4;
+ tmp1 = in2 ^ in7;
+ tmp2 = tmp0 ^ in1;
+ out3 = tmp1 ^ tmp2;
+ out2 = tmp2 ^ in5;
+ out4 = out3 ^ in3 ^ in6;
+ tmp3 = out4 ^ in7;
+ out6 = tmp3 ^ out2 ^ in4;
+ out7 = tmp1 ^ out6;
+ out0 = out7 ^ in3;
+ out1 = tmp0 ^ out0;
+ out5 = tmp3 ^ out1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_1F(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in4 ^ in6;
+ tmp1 = tmp0 ^ in5;
+ out7 = tmp1 ^ in3;
+ out0 = tmp1 ^ in0 ^ in7;
+ out6 = out7 ^ in2 ^ in6;
+ out1 = out0 ^ in1 ^ in4;
+ out4 = out0 ^ out6 ^ in1;
+ out3 = tmp0 ^ out4;
+ out2 = out4 ^ out7 ^ in7;
+ out5 = out3 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_20(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out1 = in4;
+ out0 = in3 ^ in7;
+ tmp0 = in3 ^ in4;
+ tmp1 = in6 ^ in7;
+ out2 = out0 ^ in5;
+ out4 = tmp0 ^ in5;
+ out3 = tmp0 ^ tmp1;
+ out7 = tmp1 ^ in2;
+ out6 = tmp1 ^ in1 ^ in5;
+ out5 = out2 ^ out3 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_21(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out1 = in1 ^ in4;
+ tmp0 = in4 ^ in6;
+ out4 = in3 ^ in5;
+ out7 = in2 ^ in6;
+ out0 = in0 ^ in3 ^ in7;
+ out6 = in1 ^ in5 ^ in7;
+ out3 = tmp0 ^ in7;
+ out5 = tmp0 ^ in0;
+ out2 = out4 ^ in2 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_22(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out0 = in3;
+ out1 = in0 ^ in4;
+ out7 = in2 ^ in7;
+ out4 = in4 ^ in5 ^ in7;
+ out5 = in0 ^ in5 ^ in6;
+ out6 = in1 ^ in6 ^ in7;
+ out3 = in2 ^ in3 ^ in4 ^ in6;
+ out2 = in1 ^ in3 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_23(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out7 = in2;
+ out0 = in0 ^ in3;
+ out4 = in5 ^ in7;
+ out5 = in0 ^ in6;
+ out6 = in1 ^ in7;
+ out3 = in2 ^ in4 ^ in6;
+ out1 = in0 ^ in1 ^ in4;
+ out2 = out4 ^ out6 ^ in2 ^ in3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_24(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out1 = in4 ^ in7;
+ tmp0 = in3 ^ in4;
+ out0 = in3 ^ in6 ^ in7;
+ out3 = tmp0 ^ in1;
+ tmp1 = out0 ^ in5;
+ out6 = tmp1 ^ out3;
+ out2 = tmp1 ^ in0;
+ out7 = tmp1 ^ in2 ^ in3;
+ out5 = out2 ^ in4;
+ out4 = tmp0 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_25(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out3 = in1 ^ in4;
+ tmp0 = in2 ^ in5;
+ out1 = out3 ^ in7;
+ out7 = tmp0 ^ in6;
+ out6 = out1 ^ in5;
+ out4 = out7 ^ in3 ^ in7;
+ out2 = out4 ^ in0;
+ out0 = tmp0 ^ out2;
+ out5 = out0 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_26(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out0 = in3 ^ in6;
+ tmp0 = in4 ^ in7;
+ out7 = in2 ^ in5 ^ in7;
+ tmp1 = out0 ^ in0 ^ in5;
+ out1 = tmp0 ^ in0;
+ tmp2 = tmp0 ^ in6;
+ out2 = tmp1 ^ in1;
+ out5 = tmp1 ^ in7;
+ out6 = tmp2 ^ in1;
+ out4 = tmp2 ^ out7;
+ out3 = out0 ^ out6 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_27(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out7 = in2 ^ in5;
+ out0 = in0 ^ in3 ^ in6;
+ out6 = in1 ^ in4 ^ in7;
+ out4 = out7 ^ in6;
+ out2 = out0 ^ out7 ^ in1;
+ out5 = out0 ^ in7;
+ out1 = out6 ^ in0;
+ out3 = out6 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_28(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out2 = in3;
+ out1 = in4 ^ in6;
+ out0 = in3 ^ in5 ^ in7;
+ tmp0 = out1 ^ in7;
+ tmp1 = out0 ^ in4;
+ out7 = tmp0 ^ in2;
+ tmp2 = tmp0 ^ in1;
+ out3 = tmp1 ^ in0;
+ out6 = tmp1 ^ tmp2;
+ out4 = tmp2 ^ in3;
+ out5 = out3 ^ in2 ^ in3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_29(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out2 = in2 ^ in3;
+ tmp0 = in1 ^ in3;
+ tmp1 = in4 ^ in6;
+ tmp2 = in0 ^ in4 ^ in7;
+ out6 = tmp0 ^ in5;
+ out4 = tmp0 ^ in6 ^ in7;
+ out1 = tmp1 ^ in1;
+ out7 = tmp1 ^ in2;
+ out3 = tmp2 ^ in5;
+ out5 = tmp2 ^ in2;
+ out0 = out3 ^ in3 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_2A(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out0 = in3 ^ in5;
+ tmp0 = in1 ^ in3;
+ tmp1 = in0 ^ in4;
+ out7 = in2 ^ in4 ^ in7;
+ out3 = tmp1 ^ out0 ^ in2;
+ out2 = tmp0 ^ in7;
+ out6 = tmp0 ^ in6;
+ out1 = tmp1 ^ in6;
+ out5 = tmp1 ^ out7 ^ in5;
+ out4 = out1 ^ in0 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_2B(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out4 = in1 ^ in6;
+ out7 = in2 ^ in4;
+ tmp0 = in0 ^ in5;
+ tmp1 = in2 ^ in7;
+ out6 = in1 ^ in3;
+ out1 = out4 ^ in0 ^ in4;
+ out3 = tmp0 ^ out7;
+ out0 = tmp0 ^ in3;
+ out5 = tmp1 ^ in0;
+ out2 = tmp1 ^ out6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_2C(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in2 ^ in5;
+ tmp1 = in2 ^ in3 ^ in4;
+ tmp2 = tmp0 ^ in6;
+ out4 = tmp1 ^ in1;
+ out5 = tmp1 ^ in0 ^ in5;
+ tmp3 = tmp2 ^ in4;
+ out6 = tmp2 ^ out4;
+ out7 = tmp3 ^ in7;
+ out2 = tmp3 ^ out5;
+ out3 = out6 ^ in0;
+ out0 = tmp1 ^ out7;
+ out1 = tmp0 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_2D(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in2 ^ in3;
+ out4 = tmp0 ^ in1;
+ tmp1 = tmp0 ^ in0;
+ out2 = tmp1 ^ in6;
+ out5 = tmp1 ^ in4;
+ tmp2 = out2 ^ in2;
+ tmp3 = tmp2 ^ in5;
+ out0 = tmp3 ^ in7;
+ out7 = tmp3 ^ out5;
+ out6 = out4 ^ out7 ^ in6;
+ out3 = tmp2 ^ out6;
+ out1 = out0 ^ out6 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_2E(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in4 ^ in7;
+ out0 = in3 ^ in5 ^ in6;
+ tmp1 = tmp0 ^ in0;
+ tmp2 = tmp0 ^ in2;
+ out1 = tmp1 ^ in6;
+ out4 = tmp2 ^ in1;
+ out7 = tmp2 ^ in5;
+ out3 = out0 ^ out4 ^ in0;
+ out2 = out3 ^ out7 ^ in7;
+ out6 = tmp1 ^ out2;
+ out5 = tmp1 ^ out7 ^ in3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_2F(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in0 ^ in3;
+ tmp1 = in2 ^ in5;
+ out4 = in1 ^ in2 ^ in7;
+ out6 = in1 ^ in3 ^ in4;
+ out5 = tmp0 ^ in2;
+ tmp2 = tmp0 ^ in6;
+ out7 = tmp1 ^ in4;
+ out0 = tmp2 ^ in5;
+ out2 = tmp2 ^ out4;
+ out1 = tmp2 ^ out6 ^ in7;
+ out3 = tmp1 ^ out1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_30(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out1 = in4 ^ in5;
+ tmp0 = in3 ^ in6;
+ tmp1 = in4 ^ in7;
+ out6 = in1 ^ in2 ^ in5;
+ out3 = tmp0 ^ in5;
+ out4 = tmp0 ^ in0;
+ out7 = tmp0 ^ in2;
+ out0 = tmp1 ^ in3;
+ out2 = tmp1 ^ out3;
+ out5 = tmp1 ^ in0 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_31(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out3 = in5 ^ in6;
+ tmp0 = in4 ^ in5;
+ tmp1 = in0 ^ in3 ^ in4;
+ tmp2 = out3 ^ in2;
+ out1 = tmp0 ^ in1;
+ out0 = tmp1 ^ in7;
+ out4 = tmp1 ^ in6;
+ out6 = tmp2 ^ in1;
+ out2 = tmp2 ^ out0 ^ in0;
+ out5 = out1 ^ in0 ^ in7;
+ out7 = tmp0 ^ out2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_32(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out0 = in3 ^ in4;
+ out7 = in2 ^ in3;
+ tmp0 = in5 ^ in6;
+ tmp1 = in0 ^ in7;
+ out6 = in1 ^ in2;
+ out1 = in0 ^ in4 ^ in5;
+ out2 = tmp0 ^ out0 ^ in1;
+ out3 = tmp0 ^ out7 ^ in7;
+ out4 = tmp1 ^ in6;
+ out5 = tmp1 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_33(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in2 ^ in3;
+ tmp1 = in0 ^ in4;
+ tmp2 = in1 ^ in5;
+ out6 = in1 ^ in2 ^ in6;
+ out7 = tmp0 ^ in7;
+ out0 = tmp1 ^ in3;
+ out1 = tmp1 ^ tmp2;
+ tmp3 = tmp2 ^ in7;
+ tmp4 = tmp2 ^ in4 ^ in6;
+ out5 = tmp3 ^ in0;
+ out3 = tmp3 ^ out6;
+ out4 = tmp4 ^ out5;
+ out2 = tmp0 ^ tmp4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_34(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in3 ^ in4;
+ tmp1 = in4 ^ in5;
+ tmp2 = tmp0 ^ in1;
+ tmp3 = tmp0 ^ in6;
+ out1 = tmp1 ^ in7;
+ tmp4 = tmp1 ^ in2;
+ out5 = tmp2 ^ in0;
+ out3 = tmp2 ^ out1;
+ out0 = tmp3 ^ in7;
+ out7 = tmp3 ^ tmp4;
+ out6 = tmp4 ^ in1;
+ out2 = out3 ^ out5 ^ in3;
+ out4 = tmp4 ^ out2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_35(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in2 ^ in6;
+ tmp1 = in5 ^ in7;
+ out7 = tmp0 ^ tmp1 ^ in3;
+ out3 = tmp1 ^ in1;
+ out1 = out3 ^ in4;
+ tmp2 = out1 ^ in7;
+ out5 = tmp2 ^ in0 ^ in3;
+ out6 = tmp0 ^ tmp2;
+ out0 = out3 ^ out5 ^ in6;
+ out4 = tmp0 ^ out0;
+ out2 = out4 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_36(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out4 = in0 ^ in2;
+ tmp0 = in1 ^ in3;
+ out0 = in3 ^ in4 ^ in6;
+ out6 = in1 ^ in2 ^ in4;
+ out5 = tmp0 ^ in0;
+ tmp1 = out5 ^ in5;
+ out2 = tmp1 ^ in4;
+ out3 = tmp1 ^ out4;
+ out1 = tmp0 ^ out2 ^ in7;
+ out7 = out3 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_37(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in1 ^ in2;
+ tmp1 = in2 ^ in4;
+ tmp2 = tmp0 ^ in6;
+ out3 = tmp0 ^ in5;
+ out4 = tmp1 ^ in0;
+ out6 = tmp2 ^ in4;
+ out1 = out3 ^ out4 ^ in7;
+ tmp3 = out4 ^ in1 ^ in3;
+ out7 = tmp3 ^ out1;
+ out2 = tmp3 ^ in5;
+ out5 = tmp1 ^ out2;
+ out0 = tmp2 ^ tmp3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_38(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out3 = in0 ^ in3;
+ tmp0 = in3 ^ in4;
+ tmp1 = in5 ^ in7;
+ tmp2 = out3 ^ in1;
+ out2 = tmp0 ^ in6;
+ out0 = tmp0 ^ tmp1;
+ out4 = tmp1 ^ tmp2;
+ out7 = out2 ^ in2;
+ out1 = out2 ^ in3 ^ in5;
+ out6 = out4 ^ in0 ^ in2;
+ out5 = tmp2 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_39(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out3 = in0;
+ tmp0 = in1 ^ in5;
+ tmp1 = tmp0 ^ in4;
+ out1 = tmp1 ^ in6;
+ out5 = out1 ^ in0 ^ in2;
+ tmp2 = tmp0 ^ out5;
+ out2 = tmp2 ^ in0 ^ in3;
+ out7 = out2 ^ in7;
+ out6 = tmp1 ^ out7;
+ out4 = tmp2 ^ out6;
+ out0 = out4 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_3A(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in0 ^ in1;
+ tmp1 = in0 ^ in2;
+ tmp2 = in3 ^ in4;
+ tmp3 = in1 ^ in6;
+ tmp4 = in3 ^ in7;
+ out4 = tmp0 ^ in5;
+ out5 = tmp1 ^ tmp3;
+ out3 = tmp1 ^ tmp4;
+ out0 = tmp2 ^ in5;
+ out7 = tmp2 ^ in2;
+ tmp5 = tmp3 ^ in4;
+ out2 = tmp4 ^ tmp5;
+ out1 = tmp5 ^ out4;
+ out6 = tmp0 ^ out3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_3B(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in1 ^ in6;
+ tmp1 = in2 ^ in7;
+ tmp2 = tmp0 ^ in3;
+ out3 = tmp1 ^ in0;
+ out6 = tmp1 ^ tmp2;
+ out2 = out6 ^ in4;
+ out7 = tmp0 ^ out2;
+ out0 = out3 ^ out7 ^ in5;
+ out5 = out0 ^ out2 ^ in7;
+ out1 = tmp2 ^ out0;
+ out4 = out1 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_3C(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in0 ^ in3;
+ tmp1 = in2 ^ in7;
+ tmp2 = in1 ^ in6 ^ in7;
+ out2 = tmp0 ^ in4;
+ out3 = tmp0 ^ tmp2;
+ out4 = tmp1 ^ out3 ^ in5;
+ out5 = tmp2 ^ out2 ^ in2;
+ out1 = out4 ^ out5 ^ in6;
+ out0 = out1 ^ in3;
+ out7 = tmp1 ^ out0;
+ out6 = tmp2 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_3D(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in0 ^ in2;
+ tmp1 = tmp0 ^ in3;
+ out2 = tmp1 ^ in4;
+ tmp2 = out2 ^ in5;
+ out4 = tmp2 ^ in1 ^ in6;
+ out5 = out4 ^ in7;
+ out6 = out5 ^ in0;
+ out7 = out6 ^ in1;
+ out0 = tmp0 ^ out7;
+ out1 = tmp1 ^ out5;
+ out3 = tmp2 ^ out6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_3E(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in3 ^ in5;
+ tmp1 = tmp0 ^ in4;
+ out0 = tmp1 ^ in6;
+ out7 = tmp1 ^ in2;
+ out6 = out7 ^ in1 ^ in5 ^ in7;
+ out2 = out6 ^ in0 ^ in2;
+ out4 = out0 ^ out6 ^ in0;
+ out5 = tmp0 ^ out4;
+ out3 = out5 ^ in7;
+ out1 = out3 ^ out6 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_3F(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in0 ^ in1;
+ out3 = tmp0 ^ in2 ^ in6;
+ tmp1 = out3 ^ in5 ^ in7;
+ out4 = tmp1 ^ in4;
+ out5 = tmp1 ^ in3;
+ out1 = out4 ^ in2;
+ out7 = out1 ^ out3 ^ in3;
+ out2 = tmp0 ^ out7 ^ in5;
+ tmp2 = out2 ^ in0;
+ out6 = tmp2 ^ in6;
+ out0 = tmp1 ^ tmp2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_40(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out1 = in3 ^ in7;
+ tmp0 = in3 ^ in4;
+ tmp1 = in6 ^ in7;
+ out4 = tmp0 ^ in2;
+ out5 = tmp0 ^ in5;
+ out0 = tmp1 ^ in2;
+ out7 = tmp1 ^ in1 ^ in5;
+ out2 = out0 ^ in4;
+ out3 = out2 ^ out5 ^ in7;
+ out6 = out3 ^ out4 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_41(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out4 = in2 ^ in3;
+ tmp0 = in5 ^ in6;
+ tmp1 = in6 ^ in7;
+ out5 = in3 ^ in4;
+ out1 = in1 ^ in3 ^ in7;
+ out6 = in0 ^ in4 ^ in5;
+ out3 = tmp0 ^ in2;
+ out7 = tmp0 ^ in1;
+ out2 = tmp1 ^ in4;
+ out0 = tmp1 ^ in0 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_42(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out0 = in2 ^ in6;
+ out5 = in3 ^ in5;
+ out1 = in0 ^ in3 ^ in7;
+ out7 = in1 ^ in5 ^ in7;
+ out4 = in2 ^ in4 ^ in7;
+ out6 = in0 ^ in4 ^ in6;
+ out2 = out0 ^ in1 ^ in4;
+ out3 = out5 ^ in6 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_43(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out5 = in3;
+ out7 = in1 ^ in5;
+ out4 = in2 ^ in7;
+ out6 = in0 ^ in4;
+ out0 = in0 ^ in2 ^ in6;
+ out3 = in5 ^ in6 ^ in7;
+ out2 = in1 ^ in4 ^ in6;
+ out1 = in0 ^ in1 ^ in3 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_44(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out1 = in3;
+ out0 = in2 ^ in7;
+ tmp0 = in4 ^ in7;
+ out7 = in1 ^ in6 ^ in7;
+ out6 = in0 ^ in5 ^ in6;
+ out4 = tmp0 ^ in3 ^ in6;
+ out3 = out0 ^ in1 ^ in3 ^ in5;
+ out2 = out0 ^ in0 ^ in4;
+ out5 = tmp0 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_45(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out1 = in1 ^ in3;
+ out7 = in1 ^ in6;
+ out5 = in4 ^ in7;
+ out6 = in0 ^ in5;
+ out0 = in0 ^ in2 ^ in7;
+ out4 = in3 ^ in6 ^ in7;
+ out2 = out5 ^ in0;
+ out3 = out0 ^ out6 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_46(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out0 = in2;
+ out1 = in0 ^ in3;
+ out7 = in1 ^ in7;
+ out4 = in4 ^ in6;
+ out5 = in5 ^ in7;
+ out6 = in0 ^ in6;
+ out3 = in1 ^ in3 ^ in5;
+ out2 = out4 ^ out6 ^ in1 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_47(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out4 = in6;
+ out7 = in1;
+ out5 = in7;
+ out6 = in0;
+ tmp0 = in0 ^ in1;
+ out3 = in1 ^ in5;
+ out0 = in0 ^ in2;
+ out1 = tmp0 ^ in3;
+ out2 = tmp0 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_48(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in2 ^ in3;
+ out1 = in3 ^ in6 ^ in7;
+ out3 = tmp0 ^ in0;
+ out0 = tmp0 ^ out1 ^ in5;
+ tmp1 = out0 ^ in4;
+ out2 = tmp1 ^ in7;
+ out5 = tmp1 ^ in3;
+ out4 = out5 ^ in1;
+ out7 = tmp0 ^ out4;
+ out6 = tmp1 ^ out3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_49(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out3 = in0 ^ in2;
+ tmp0 = in2 ^ in5;
+ out2 = in4 ^ in5 ^ in6;
+ tmp1 = tmp0 ^ out2 ^ in3;
+ out7 = out2 ^ in1;
+ out5 = tmp1 ^ in7;
+ out4 = out5 ^ out7 ^ in6;
+ out1 = tmp0 ^ out4;
+ out6 = out1 ^ out7 ^ in0;
+ out0 = tmp1 ^ out6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_4A(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in2 ^ in6;
+ tmp1 = in3 ^ in7;
+ out0 = tmp0 ^ in5;
+ out3 = tmp1 ^ in0;
+ out5 = tmp1 ^ out0;
+ out4 = out0 ^ in1 ^ in4;
+ out1 = out3 ^ in6;
+ out2 = out4 ^ in7;
+ out6 = out1 ^ in4;
+ out7 = tmp0 ^ out2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_4B(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out3 = in0 ^ in7;
+ tmp0 = in1 ^ in5;
+ tmp1 = in2 ^ in6;
+ tmp2 = out3 ^ in3;
+ out7 = tmp0 ^ in4;
+ out4 = tmp0 ^ tmp1;
+ tmp3 = tmp1 ^ in0;
+ out6 = tmp2 ^ in4;
+ out5 = tmp2 ^ tmp3;
+ out1 = tmp2 ^ in1 ^ in6;
+ out2 = out7 ^ in6 ^ in7;
+ out0 = tmp3 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_4C(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out1 = in3 ^ in6;
+ tmp0 = in2 ^ in5;
+ tmp1 = out1 ^ in5 ^ in7;
+ out0 = tmp0 ^ in7;
+ tmp2 = tmp0 ^ in4;
+ out6 = tmp1 ^ in0;
+ out2 = tmp2 ^ in0;
+ out5 = tmp2 ^ in6;
+ out3 = tmp0 ^ out6 ^ in1;
+ out7 = out0 ^ out5 ^ in1;
+ out4 = tmp1 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_4D(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in0 ^ in5;
+ tmp1 = in1 ^ in6;
+ out4 = in1 ^ in3 ^ in5;
+ tmp2 = tmp0 ^ in7;
+ out2 = tmp0 ^ in4;
+ out1 = tmp1 ^ in3;
+ out7 = tmp1 ^ in4;
+ out0 = tmp2 ^ in2;
+ out6 = tmp2 ^ in3;
+ out5 = out7 ^ in1 ^ in2;
+ out3 = tmp1 ^ out0 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_4E(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out0 = in2 ^ in5;
+ out7 = in1 ^ in4 ^ in7;
+ out1 = in0 ^ in3 ^ in6;
+ out5 = out0 ^ in6;
+ out4 = out7 ^ in5;
+ out3 = out1 ^ in1;
+ out6 = out1 ^ in7;
+ out2 = out4 ^ in0 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_4F(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out5 = in2 ^ in6;
+ out7 = in1 ^ in4;
+ out3 = in0 ^ in1 ^ in6;
+ out4 = in1 ^ in5 ^ in7;
+ out0 = in0 ^ in2 ^ in5;
+ out6 = in0 ^ in3 ^ in7;
+ out1 = out3 ^ in3;
+ out2 = out4 ^ in0 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_50(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out2 = in2 ^ in7;
+ tmp0 = in3 ^ in5;
+ out0 = out2 ^ in4 ^ in6;
+ out1 = tmp0 ^ in7;
+ tmp1 = tmp0 ^ in6;
+ out3 = out0 ^ in3;
+ out7 = tmp1 ^ in1;
+ tmp2 = tmp1 ^ in0;
+ out5 = out3 ^ in1 ^ in2;
+ out4 = tmp2 ^ in2;
+ out6 = tmp2 ^ out3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_51(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out2 = in7;
+ out3 = in2 ^ in4 ^ in6 ^ in7;
+ out0 = out3 ^ in0;
+ out6 = out0 ^ in5;
+ out4 = out6 ^ in3 ^ in7;
+ out1 = out0 ^ out4 ^ in1;
+ out7 = out1 ^ in6;
+ out5 = out7 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_52(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out2 = in1 ^ in2;
+ tmp0 = in2 ^ in4;
+ tmp1 = in3 ^ in5;
+ tmp2 = in3 ^ in6;
+ tmp3 = in0 ^ in7;
+ out0 = tmp0 ^ in6;
+ out6 = tmp0 ^ tmp3;
+ out7 = tmp1 ^ in1;
+ out1 = tmp1 ^ tmp3;
+ out3 = tmp2 ^ in4;
+ out5 = tmp2 ^ in1 ^ in7;
+ out4 = tmp2 ^ out1 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_53(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out2 = in1;
+ out3 = in4 ^ in6;
+ out0 = out3 ^ in0 ^ in2;
+ out6 = out0 ^ in7;
+ out4 = out6 ^ in5;
+ out7 = out0 ^ out4 ^ in1 ^ in3;
+ out1 = out7 ^ in0;
+ out5 = out7 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_54(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out1 = in3 ^ in5;
+ tmp0 = in1 ^ in3;
+ tmp1 = in2 ^ in4;
+ tmp2 = in0 ^ in7;
+ out5 = in1 ^ in4 ^ in6;
+ out4 = tmp2 ^ out1;
+ out7 = tmp0 ^ in6;
+ out3 = tmp0 ^ tmp1;
+ out0 = tmp1 ^ in7;
+ tmp3 = tmp2 ^ in2;
+ out2 = tmp3 ^ in6;
+ out6 = tmp3 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_55(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in1 ^ in3;
+ tmp1 = in1 ^ in4;
+ tmp2 = in6 ^ in7;
+ out7 = tmp0 ^ tmp2;
+ out1 = tmp0 ^ in5;
+ out3 = tmp1 ^ in2;
+ out5 = tmp1 ^ in5 ^ in6;
+ out2 = tmp2 ^ in0;
+ out4 = out5 ^ out7 ^ in0;
+ out6 = out2 ^ in2 ^ in5;
+ out0 = out5 ^ out6 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_56(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out0 = in2 ^ in4;
+ tmp0 = in0 ^ in2;
+ out4 = in0 ^ in5;
+ out7 = in1 ^ in3;
+ out5 = in1 ^ in6;
+ out6 = tmp0 ^ in7;
+ out2 = tmp0 ^ out5;
+ out1 = out4 ^ in3;
+ out3 = out7 ^ in4 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_57(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in0 ^ in5;
+ tmp1 = in1 ^ in7;
+ out0 = in0 ^ in2 ^ in4;
+ out5 = in1 ^ in5 ^ in6;
+ out4 = tmp0 ^ in4;
+ out1 = tmp0 ^ in1 ^ in3;
+ out2 = tmp0 ^ out5;
+ out3 = tmp1 ^ in4;
+ out7 = tmp1 ^ in3;
+ out6 = tmp1 ^ out2 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_58(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out2 = in2 ^ in5;
+ tmp0 = in2 ^ in3 ^ in4;
+ out5 = tmp0 ^ in1;
+ out6 = tmp0 ^ in0 ^ in5;
+ out3 = out6 ^ in7;
+ tmp1 = out2 ^ out5;
+ out7 = tmp1 ^ in6;
+ out4 = tmp1 ^ out3 ^ in3;
+ out0 = out4 ^ out7 ^ in0;
+ out1 = tmp0 ^ out0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_59(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out2 = in5;
+ tmp0 = in0 ^ in5 ^ in7;
+ out3 = tmp0 ^ in2 ^ in4;
+ out0 = out3 ^ in6;
+ tmp1 = out0 ^ in7;
+ out6 = tmp1 ^ in3;
+ out5 = out6 ^ in0 ^ in1 ^ in6;
+ out4 = tmp0 ^ out5;
+ out1 = tmp1 ^ out4;
+ out7 = out1 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_5A(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in1 ^ in2;
+ tmp1 = in2 ^ in5;
+ out5 = tmp0 ^ in3;
+ out4 = tmp0 ^ in0;
+ tmp2 = tmp1 ^ in4;
+ out2 = tmp1 ^ in1 ^ in7;
+ out7 = tmp2 ^ out5;
+ out6 = out4 ^ out7 ^ in5;
+ out0 = tmp2 ^ in6;
+ out1 = out0 ^ out6 ^ in7;
+ out3 = tmp1 ^ out6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_5B(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in2 ^ in3;
+ tmp1 = in0 ^ in4;
+ tmp2 = in1 ^ in5;
+ out5 = tmp0 ^ tmp2;
+ tmp3 = tmp1 ^ in6;
+ out3 = tmp1 ^ in5;
+ out2 = tmp2 ^ in7;
+ tmp4 = out3 ^ in2;
+ out7 = out2 ^ in3 ^ in4;
+ out0 = tmp4 ^ in6;
+ out6 = tmp0 ^ tmp3;
+ out4 = tmp2 ^ tmp4;
+ out1 = tmp3 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_5C(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in3 ^ in6;
+ tmp1 = in0 ^ in2 ^ in5;
+ out1 = tmp0 ^ in5;
+ tmp2 = tmp0 ^ in1;
+ out2 = tmp1 ^ in6;
+ out6 = tmp1 ^ in3;
+ out4 = tmp2 ^ in0;
+ out7 = tmp2 ^ in4;
+ out3 = tmp1 ^ out7;
+ out0 = out3 ^ out4 ^ in7;
+ out5 = out0 ^ in1 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_5D(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in0 ^ in1;
+ tmp1 = in0 ^ in6;
+ out2 = tmp1 ^ in5;
+ tmp2 = out2 ^ in3;
+ out6 = tmp2 ^ in2;
+ out1 = tmp0 ^ tmp2;
+ tmp3 = out1 ^ in4 ^ in5;
+ out4 = tmp3 ^ in0;
+ out7 = tmp3 ^ in7;
+ tmp4 = out4 ^ out6;
+ out5 = tmp4 ^ in7;
+ out0 = tmp0 ^ out5;
+ out3 = tmp1 ^ tmp4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_5E(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in2 ^ in5;
+ tmp1 = in3 ^ in5;
+ tmp2 = in1 ^ in7;
+ out7 = in1 ^ in3 ^ in4;
+ out0 = tmp0 ^ in4;
+ tmp3 = tmp1 ^ in0;
+ out5 = tmp2 ^ in2;
+ out1 = tmp3 ^ in6;
+ out6 = tmp0 ^ tmp3;
+ tmp4 = tmp2 ^ out1;
+ out3 = tmp4 ^ in4;
+ out4 = tmp1 ^ tmp4;
+ out2 = tmp0 ^ out4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_5F(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in1 ^ in5;
+ tmp1 = in0 ^ in6;
+ tmp2 = tmp0 ^ in7;
+ tmp3 = tmp1 ^ in3;
+ out2 = tmp1 ^ tmp2;
+ out5 = tmp2 ^ in2;
+ out6 = tmp3 ^ in2;
+ out3 = out2 ^ in4;
+ out4 = out3 ^ in5;
+ out1 = tmp0 ^ tmp3;
+ out7 = tmp3 ^ out4;
+ out0 = out4 ^ out5 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_60(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out4 = in2 ^ in5;
+ tmp0 = in3 ^ in6;
+ out1 = in3 ^ in4 ^ in7;
+ out7 = out4 ^ in1;
+ tmp1 = out4 ^ in4;
+ out0 = tmp0 ^ in2;
+ out5 = tmp0 ^ in0;
+ out2 = tmp0 ^ tmp1;
+ out3 = tmp1 ^ in7;
+ out6 = out3 ^ out7 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_61(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in2 ^ in5;
+ out4 = tmp0 ^ in4;
+ tmp1 = out4 ^ in3;
+ out3 = tmp1 ^ in7;
+ out2 = tmp1 ^ in2 ^ in6;
+ out1 = tmp0 ^ out3 ^ in1;
+ out0 = out2 ^ out4 ^ in0;
+ out7 = tmp1 ^ out1;
+ out6 = out0 ^ out1 ^ in2;
+ out5 = tmp0 ^ out0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_62(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out3 = in4 ^ in5;
+ tmp0 = in0 ^ in3 ^ in4;
+ out1 = tmp0 ^ in7;
+ out5 = tmp0 ^ in6;
+ tmp1 = out1 ^ in0;
+ tmp2 = tmp1 ^ out3;
+ out4 = tmp2 ^ in2;
+ tmp3 = tmp2 ^ in1;
+ out0 = out4 ^ in5 ^ in6;
+ out7 = tmp3 ^ out0;
+ out6 = tmp0 ^ tmp3;
+ out2 = tmp1 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_63(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in3 ^ in4;
+ tmp1 = in1 ^ in7;
+ out3 = tmp0 ^ in5;
+ tmp2 = out3 ^ in6;
+ out4 = out3 ^ in2 ^ in7;
+ out5 = tmp2 ^ in0;
+ tmp3 = out5 ^ in3;
+ out0 = tmp3 ^ out4;
+ out2 = tmp1 ^ tmp2;
+ out6 = tmp1 ^ tmp3;
+ tmp4 = tmp0 ^ out2;
+ out1 = tmp4 ^ out5;
+ out7 = tmp4 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_64(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out0 = in2 ^ in3;
+ out1 = in3 ^ in4;
+ out7 = in1 ^ in2;
+ tmp0 = in4 ^ in5;
+ tmp1 = in0 ^ in7;
+ out4 = in5 ^ in6 ^ in7;
+ out2 = tmp0 ^ out0 ^ in0;
+ out3 = tmp0 ^ out7 ^ in6;
+ out5 = tmp1 ^ in6;
+ out6 = tmp1 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_65(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in0 ^ in3;
+ tmp1 = in4 ^ in5;
+ tmp2 = in6 ^ in7;
+ out7 = in1 ^ in2 ^ in7;
+ out1 = in1 ^ in3 ^ in4;
+ out0 = tmp0 ^ in2;
+ out2 = tmp0 ^ tmp1;
+ out4 = tmp1 ^ tmp2;
+ tmp3 = tmp2 ^ in0;
+ out3 = out4 ^ out7 ^ in3;
+ out5 = tmp3 ^ in5;
+ out6 = tmp3 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_66(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in1 ^ in2;
+ tmp1 = in2 ^ in3;
+ tmp2 = in0 ^ in4;
+ out7 = tmp0 ^ in6;
+ out0 = tmp1 ^ in7;
+ out1 = tmp2 ^ in3;
+ tmp3 = tmp2 ^ in6;
+ tmp4 = out1 ^ in5;
+ out5 = tmp3 ^ in7;
+ out4 = tmp3 ^ tmp4;
+ out2 = tmp0 ^ tmp4 ^ in7;
+ out6 = tmp1 ^ out2 ^ in4;
+ out3 = tmp3 ^ out6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_67(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in0 ^ in3;
+ tmp1 = tmp0 ^ in1;
+ tmp2 = tmp0 ^ in7;
+ out1 = tmp1 ^ in4;
+ out0 = tmp2 ^ in2;
+ tmp3 = out1 ^ in7;
+ out2 = tmp3 ^ in5;
+ out3 = out2 ^ in0 ^ in6;
+ out7 = tmp1 ^ out0 ^ in6;
+ out5 = tmp1 ^ out3;
+ out4 = tmp2 ^ out5;
+ out6 = tmp3 ^ out4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_68(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in3 ^ in4;
+ tmp1 = in2 ^ in3 ^ in5;
+ tmp2 = tmp0 ^ in1;
+ tmp3 = tmp0 ^ in6;
+ out0 = tmp1 ^ in6;
+ out6 = tmp2 ^ in0;
+ out7 = tmp1 ^ tmp2;
+ out1 = tmp3 ^ in7;
+ out2 = out1 ^ in2;
+ out4 = tmp2 ^ out2;
+ out3 = out4 ^ out6 ^ in3;
+ out5 = tmp3 ^ out3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_69(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in6 ^ in7;
+ out2 = tmp0 ^ in3 ^ in4;
+ out1 = out2 ^ in1;
+ out3 = out2 ^ in0 ^ in2;
+ out4 = out1 ^ in2 ^ in3;
+ out6 = out1 ^ in0 ^ in7;
+ out7 = out4 ^ in5 ^ in6;
+ out5 = out4 ^ out6 ^ in5;
+ out0 = tmp0 ^ out5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_6A(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in2 ^ in6;
+ out3 = in0 ^ in4 ^ in6;
+ tmp1 = tmp0 ^ in3;
+ out4 = tmp1 ^ in1;
+ tmp2 = tmp1 ^ in7;
+ out2 = out4 ^ in4;
+ out0 = tmp2 ^ in5;
+ out5 = tmp2 ^ out3;
+ out7 = out2 ^ in3 ^ in5;
+ out1 = tmp0 ^ out5;
+ out6 = tmp1 ^ out7 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_6B(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in4 ^ in6;
+ out2 = tmp0 ^ in1 ^ in3;
+ out4 = out2 ^ in2;
+ tmp1 = out2 ^ in0;
+ out7 = out4 ^ in3 ^ in5 ^ in7;
+ out1 = tmp1 ^ in7;
+ out3 = tmp1 ^ in1;
+ out6 = tmp1 ^ in5;
+ out0 = tmp1 ^ out7 ^ in6;
+ out5 = tmp0 ^ out0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_6C(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out4 = in1;
+ tmp0 = in2 ^ in3;
+ out5 = in0 ^ in2;
+ out1 = in3 ^ in4 ^ in6;
+ tmp1 = out5 ^ in1;
+ out0 = tmp0 ^ in5;
+ out6 = tmp0 ^ tmp1;
+ out3 = tmp1 ^ in4;
+ out7 = out3 ^ in0;
+ out2 = out6 ^ out7 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_6D(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out4 = in1 ^ in4;
+ tmp0 = in0 ^ in2;
+ tmp1 = out4 ^ in3;
+ out7 = out4 ^ in2 ^ in7;
+ out5 = tmp0 ^ in5;
+ out3 = tmp0 ^ tmp1;
+ out1 = tmp1 ^ in6;
+ out0 = out5 ^ in3;
+ out2 = out3 ^ out7 ^ in4;
+ out6 = out1 ^ in0 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_6E(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in1 ^ in3;
+ tmp1 = in0 ^ in4;
+ out4 = tmp0 ^ in7;
+ out6 = tmp0 ^ in0 ^ in5;
+ out5 = tmp1 ^ in2;
+ tmp2 = tmp1 ^ in3;
+ out3 = tmp2 ^ out4;
+ out1 = tmp2 ^ in6;
+ out2 = tmp0 ^ out5;
+ out0 = out2 ^ out3 ^ in5;
+ out7 = out1 ^ out2 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_6F(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in3 ^ in7;
+ tmp1 = tmp0 ^ in4;
+ tmp2 = tmp0 ^ in0 ^ in2;
+ out4 = tmp1 ^ in1;
+ out0 = tmp2 ^ in5;
+ out3 = out4 ^ in0;
+ out2 = out3 ^ in7;
+ out1 = out2 ^ in6;
+ out6 = out1 ^ in4 ^ in5;
+ out7 = tmp2 ^ out1;
+ out5 = tmp1 ^ out0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_70(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out3 = in2;
+ tmp0 = in2 ^ in4;
+ out2 = in2 ^ in3 ^ in5;
+ tmp1 = tmp0 ^ in6;
+ tmp2 = out2 ^ in7;
+ out0 = tmp1 ^ in3;
+ out4 = tmp1 ^ in0;
+ out7 = tmp2 ^ in1;
+ out6 = out4 ^ in1;
+ out5 = out7 ^ in0 ^ in2;
+ out1 = tmp0 ^ tmp2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_71(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out2 = in3 ^ in5;
+ out3 = in2 ^ in3;
+ tmp0 = in0 ^ in2;
+ tmp1 = out2 ^ in1;
+ out4 = tmp0 ^ in6;
+ tmp2 = tmp0 ^ in1;
+ out7 = tmp1 ^ in2;
+ out1 = tmp1 ^ in4 ^ in7;
+ out0 = out4 ^ in3 ^ in4;
+ out6 = tmp2 ^ in4;
+ out5 = tmp2 ^ out3 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_72(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out3 = in7;
+ tmp0 = in0 ^ in4;
+ tmp1 = tmp0 ^ in3 ^ in7;
+ out1 = tmp1 ^ in5;
+ out5 = out1 ^ in1;
+ tmp2 = tmp0 ^ out5;
+ out2 = tmp2 ^ in2;
+ out7 = out2 ^ in6;
+ out6 = tmp1 ^ out7;
+ out4 = tmp2 ^ out6;
+ out0 = out4 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_73(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out3 = in3 ^ in7;
+ out2 = out3 ^ in1 ^ in5;
+ out1 = out2 ^ in0 ^ in4;
+ out5 = out1 ^ in5;
+ out6 = out1 ^ out3 ^ in2;
+ out0 = out2 ^ out6 ^ in6;
+ out7 = out0 ^ out1 ^ in3;
+ out4 = out0 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_74(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in3 ^ in4;
+ tmp1 = in1 ^ in2 ^ in6;
+ out4 = in0 ^ in4 ^ in7;
+ out5 = in0 ^ in1 ^ in5;
+ out0 = tmp0 ^ in2;
+ out1 = tmp0 ^ in5;
+ out3 = tmp1 ^ in7;
+ out6 = tmp1 ^ in0;
+ out2 = tmp1 ^ out5 ^ in3;
+ out7 = out3 ^ in3 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_75(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out4 = in0 ^ in7;
+ tmp0 = in1 ^ in3;
+ out5 = in0 ^ in1;
+ out7 = tmp0 ^ in2;
+ tmp1 = tmp0 ^ in4;
+ out6 = out5 ^ in2;
+ tmp2 = out7 ^ in6;
+ out1 = tmp1 ^ in5;
+ out0 = tmp1 ^ out6;
+ out3 = tmp2 ^ in7;
+ out2 = tmp2 ^ out6 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_76(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out3 = in1 ^ in6;
+ tmp0 = in0 ^ in5;
+ tmp1 = in3 ^ in7;
+ tmp2 = tmp0 ^ in4;
+ tmp3 = tmp1 ^ in2;
+ out5 = tmp2 ^ in1;
+ out1 = tmp2 ^ in3;
+ out0 = tmp3 ^ in4;
+ out4 = out1 ^ in5;
+ out7 = tmp3 ^ out3;
+ out2 = tmp0 ^ out7;
+ out6 = tmp1 ^ out2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_77(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out4 = in0 ^ in3;
+ tmp0 = in1 ^ in4;
+ tmp1 = in1 ^ in6;
+ tmp2 = out4 ^ in5;
+ out5 = tmp0 ^ in0;
+ out1 = tmp0 ^ tmp2;
+ out3 = tmp1 ^ in3;
+ out2 = tmp1 ^ tmp2 ^ in7;
+ out7 = out3 ^ in2;
+ tmp3 = out7 ^ in6;
+ out6 = tmp2 ^ tmp3;
+ out0 = tmp3 ^ out5 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_78(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in0 ^ in3;
+ tmp1 = in2 ^ in7;
+ tmp2 = in0 ^ in5 ^ in6;
+ out2 = tmp1 ^ in3;
+ out3 = tmp2 ^ in2;
+ out5 = out3 ^ in1 ^ in3;
+ out0 = tmp0 ^ out3 ^ in4;
+ out1 = tmp1 ^ out0;
+ out4 = out1 ^ out5 ^ in5;
+ out7 = tmp0 ^ out4;
+ out6 = tmp2 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_79(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out2 = in3 ^ in7;
+ tmp0 = in3 ^ in4;
+ tmp1 = in1 ^ in5;
+ tmp2 = tmp1 ^ in2;
+ out4 = tmp2 ^ in0 ^ in7;
+ tmp3 = out4 ^ in5;
+ out5 = tmp3 ^ out2 ^ in6;
+ out7 = tmp0 ^ tmp2;
+ out6 = tmp0 ^ tmp3;
+ out3 = tmp1 ^ out5;
+ out0 = out3 ^ in4;
+ out1 = tmp3 ^ out0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_7A(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in1 ^ in2;
+ out2 = tmp0 ^ in3;
+ tmp1 = out2 ^ in4;
+ out4 = tmp1 ^ in0 ^ in5;
+ out5 = out4 ^ in6;
+ out6 = out5 ^ in7;
+ out7 = out6 ^ in0;
+ out0 = out7 ^ in1;
+ out1 = tmp0 ^ out6;
+ out3 = tmp1 ^ out6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_7B(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out2 = in1 ^ in3;
+ tmp0 = in0 ^ in5;
+ out4 = tmp0 ^ out2 ^ in2;
+ tmp1 = out4 ^ in4;
+ out6 = tmp1 ^ in7;
+ out5 = tmp1 ^ in5 ^ in6;
+ out0 = out6 ^ in1 ^ in6;
+ tmp2 = out0 ^ in2;
+ out1 = tmp2 ^ in1;
+ out3 = tmp2 ^ in4;
+ out7 = tmp0 ^ out5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_7C(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in3 ^ in5;
+ tmp1 = tmp0 ^ in4;
+ out0 = tmp1 ^ in2;
+ out1 = tmp1 ^ in6;
+ out7 = out0 ^ in1 ^ in5 ^ in7;
+ out5 = out1 ^ out7 ^ in0;
+ out3 = out5 ^ in6;
+ out6 = tmp0 ^ out5;
+ out2 = out6 ^ in1;
+ out4 = out2 ^ out7 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_7D(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in1 ^ in2;
+ tmp1 = tmp0 ^ in3;
+ tmp2 = tmp0 ^ in6;
+ out7 = tmp1 ^ in4;
+ tmp3 = tmp2 ^ in0;
+ out5 = tmp3 ^ in7;
+ out4 = tmp3 ^ in2 ^ in5;
+ out2 = tmp1 ^ out5;
+ out6 = tmp2 ^ out2;
+ out0 = out4 ^ out7 ^ in6;
+ out1 = tmp3 ^ out0;
+ out3 = out6 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_7E(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in3 ^ in4;
+ tmp1 = in0 ^ in5;
+ out1 = tmp0 ^ tmp1 ^ in6;
+ out3 = tmp1 ^ in1;
+ out4 = out1 ^ in1 ^ in7;
+ tmp2 = out4 ^ in3;
+ out5 = tmp2 ^ in2;
+ out6 = tmp0 ^ out5;
+ out7 = tmp1 ^ out4 ^ in2;
+ out2 = out6 ^ in5 ^ in7;
+ out0 = tmp2 ^ out2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_7F(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in2 ^ in7;
+ tmp1 = tmp0 ^ in3 ^ in5;
+ tmp2 = tmp1 ^ in0;
+ out0 = tmp2 ^ in4;
+ out6 = tmp2 ^ in1;
+ out3 = tmp0 ^ out6;
+ tmp3 = out3 ^ in6;
+ out1 = tmp3 ^ in4;
+ out2 = tmp3 ^ in5;
+ out4 = tmp3 ^ in7;
+ out5 = tmp1 ^ out1;
+ out7 = out0 ^ out4 ^ in3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_80(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in2 ^ in3;
+ tmp1 = in4 ^ in5;
+ out1 = in2 ^ in6 ^ in7;
+ out5 = tmp0 ^ in4;
+ tmp2 = tmp0 ^ in1;
+ out6 = tmp1 ^ in3;
+ out7 = tmp1 ^ in0 ^ in6;
+ out4 = tmp2 ^ in7;
+ out3 = tmp2 ^ out6;
+ out2 = out3 ^ out5 ^ in6;
+ out0 = out2 ^ in3 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_81(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in4 ^ in6;
+ tmp1 = tmp0 ^ in3;
+ out6 = tmp1 ^ in5;
+ out5 = out6 ^ in2 ^ in6;
+ out3 = out5 ^ in1;
+ out2 = tmp0 ^ out3;
+ out1 = out3 ^ out6 ^ in7;
+ out4 = tmp1 ^ out1;
+ out7 = out2 ^ out4 ^ in0;
+ out0 = out7 ^ in1 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_82(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out4 = in1 ^ in2;
+ tmp0 = in6 ^ in7;
+ out5 = in2 ^ in3;
+ out6 = in3 ^ in4;
+ out7 = in0 ^ in4 ^ in5;
+ out0 = in1 ^ in5 ^ in6;
+ out1 = tmp0 ^ in0 ^ in2;
+ out2 = tmp0 ^ in3 ^ in5;
+ out3 = tmp0 ^ out0 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_83(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in0 ^ in1;
+ tmp1 = in2 ^ in5;
+ tmp2 = in3 ^ in6;
+ out4 = in1 ^ in2 ^ in4;
+ out0 = tmp0 ^ in5 ^ in6;
+ out5 = tmp1 ^ in3;
+ tmp3 = tmp1 ^ in7;
+ out6 = tmp2 ^ in4;
+ out2 = tmp2 ^ tmp3;
+ tmp4 = tmp3 ^ out4;
+ out1 = tmp3 ^ out0;
+ out3 = tmp4 ^ in3;
+ out7 = tmp0 ^ tmp4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_84(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out1 = in2 ^ in6;
+ out6 = in3 ^ in5;
+ out0 = in1 ^ in5 ^ in7;
+ out7 = in0 ^ in4 ^ in6;
+ out4 = in1 ^ in3 ^ in6;
+ out5 = in2 ^ in4 ^ in7;
+ out2 = out6 ^ in0 ^ in1;
+ out3 = out5 ^ in5 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_85(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in1 ^ in6;
+ tmp1 = in3 ^ in6;
+ tmp2 = tmp0 ^ in4;
+ out1 = tmp0 ^ in2;
+ out6 = tmp1 ^ in5;
+ out4 = tmp2 ^ in3;
+ tmp3 = out1 ^ out6;
+ out2 = tmp3 ^ in0;
+ out3 = tmp2 ^ tmp3 ^ in7;
+ out7 = out2 ^ out3 ^ in1;
+ out5 = tmp1 ^ out3;
+ out0 = tmp2 ^ out7 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_86(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out6 = in3;
+ out7 = in0 ^ in4;
+ out0 = in1 ^ in5;
+ out5 = in2 ^ in7;
+ out3 = in4 ^ in5 ^ in6;
+ out1 = in0 ^ in2 ^ in6;
+ out4 = in1 ^ in6 ^ in7;
+ out2 = in0 ^ in3 ^ in5 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_87(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out6 = in3 ^ in6;
+ tmp0 = in0 ^ in1;
+ out7 = in0 ^ in4 ^ in7;
+ out5 = in2 ^ in5 ^ in7;
+ out3 = out6 ^ in4 ^ in5;
+ out0 = tmp0 ^ in5;
+ tmp1 = tmp0 ^ in6;
+ out2 = out5 ^ in0 ^ in3;
+ out1 = tmp1 ^ in2;
+ out4 = tmp1 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_88(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out1 = in2 ^ in7;
+ tmp0 = in5 ^ in6;
+ out0 = in1 ^ in6 ^ in7;
+ out6 = in4 ^ in5 ^ in7;
+ out3 = out0 ^ out1 ^ in0 ^ in4;
+ out7 = tmp0 ^ in0;
+ tmp1 = tmp0 ^ in3;
+ out2 = out0 ^ in3;
+ out4 = tmp1 ^ in2;
+ out5 = tmp1 ^ out6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_89(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in0 ^ in7;
+ tmp1 = in2 ^ in7;
+ tmp2 = tmp0 ^ in6;
+ out1 = tmp1 ^ in1;
+ out7 = tmp2 ^ in5;
+ out0 = tmp2 ^ in1;
+ out2 = out1 ^ in3 ^ in6;
+ out6 = out7 ^ in0 ^ in4;
+ out5 = out6 ^ in3;
+ out3 = tmp0 ^ out2 ^ in4;
+ out4 = tmp1 ^ out5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_8A(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out0 = in1 ^ in6;
+ out7 = in0 ^ in5;
+ out2 = in3 ^ in6;
+ out6 = in4 ^ in7;
+ out1 = in0 ^ in2 ^ in7;
+ out3 = out0 ^ out6 ^ in0;
+ out4 = out1 ^ out7 ^ in6;
+ out5 = out2 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_8B(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in0 ^ in1;
+ tmp1 = in3 ^ in6;
+ tmp2 = in5 ^ in7;
+ tmp3 = tmp0 ^ in7;
+ out0 = tmp0 ^ in6;
+ out2 = tmp1 ^ in2;
+ out5 = tmp1 ^ tmp2;
+ out7 = tmp2 ^ in0;
+ tmp4 = tmp3 ^ in4;
+ out1 = tmp3 ^ in2;
+ out6 = tmp4 ^ out0;
+ out4 = out6 ^ in2 ^ in5;
+ out3 = tmp1 ^ tmp4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_8C(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out1 = in2;
+ out0 = in1 ^ in7;
+ out7 = in0 ^ in6;
+ out5 = in4 ^ in6;
+ out6 = in5 ^ in7;
+ out2 = out0 ^ in0 ^ in3;
+ out3 = out5 ^ out7 ^ in2 ^ in7;
+ out4 = out6 ^ in3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_8D(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out1 = in1 ^ in2;
+ tmp0 = in6 ^ in7;
+ out0 = in0 ^ in1 ^ in7;
+ out5 = in4 ^ in5 ^ in6;
+ out6 = tmp0 ^ in5;
+ out7 = tmp0 ^ in0;
+ out4 = tmp0 ^ out5 ^ in3;
+ out2 = out0 ^ in2 ^ in3;
+ out3 = out2 ^ in1 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_8E(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out0 = in1;
+ out4 = in5;
+ out7 = in0;
+ out5 = in6;
+ out6 = in7;
+ out3 = in0 ^ in4;
+ out1 = in0 ^ in2;
+ out2 = in0 ^ in3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_8F(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out0 = in0 ^ in1;
+ tmp0 = in0 ^ in3;
+ out4 = in4 ^ in5;
+ out7 = in0 ^ in7;
+ out5 = in5 ^ in6;
+ out6 = in6 ^ in7;
+ out1 = out0 ^ in2;
+ out2 = tmp0 ^ in2;
+ out3 = tmp0 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_90(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in1 ^ in2;
+ tmp1 = in2 ^ in6 ^ in7;
+ out3 = tmp0 ^ in7;
+ out1 = tmp1 ^ in5;
+ tmp2 = out1 ^ in4;
+ out6 = tmp2 ^ in3;
+ out5 = out6 ^ in1;
+ out4 = out5 ^ in0;
+ out0 = tmp0 ^ tmp2;
+ out7 = tmp0 ^ out4;
+ out2 = tmp1 ^ out5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_91(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in2 ^ in4;
+ tmp1 = tmp0 ^ in3 ^ in5;
+ out2 = tmp1 ^ in1;
+ out6 = tmp1 ^ in7;
+ tmp2 = out2 ^ in5 ^ in7;
+ out3 = tmp2 ^ in4;
+ out5 = tmp2 ^ in6;
+ out1 = tmp1 ^ out5 ^ in2;
+ tmp3 = out1 ^ in0;
+ out4 = tmp3 ^ in3;
+ out0 = tmp0 ^ tmp3;
+ out7 = tmp2 ^ tmp3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_92(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out3 = in1;
+ tmp0 = in4 ^ in5;
+ tmp1 = tmp0 ^ in1;
+ out2 = tmp0 ^ in3 ^ in7;
+ out0 = tmp1 ^ in6;
+ out7 = out2 ^ in0;
+ out4 = out0 ^ in0 ^ in2;
+ out5 = out4 ^ out7 ^ in5;
+ out6 = tmp1 ^ out5;
+ out1 = out6 ^ out7 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_93(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out3 = in1 ^ in3;
+ tmp0 = in2 ^ in7;
+ tmp1 = out3 ^ in6;
+ tmp2 = tmp0 ^ in4;
+ out5 = tmp0 ^ tmp1;
+ out6 = tmp2 ^ in3;
+ out2 = out6 ^ in5;
+ out0 = out2 ^ out5 ^ in0;
+ out7 = tmp1 ^ out0;
+ out1 = tmp2 ^ out0;
+ out4 = out1 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_94(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out3 = in2 ^ in6;
+ tmp0 = in1 ^ in4 ^ in5;
+ out1 = out3 ^ in5;
+ out5 = tmp0 ^ out3;
+ out0 = tmp0 ^ in7;
+ out4 = tmp0 ^ in0 ^ in3;
+ out6 = out1 ^ in3 ^ in7;
+ out2 = out4 ^ in6;
+ out7 = out0 ^ out2 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_95(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in2 ^ in3;
+ out3 = tmp0 ^ in6;
+ tmp1 = tmp0 ^ in7;
+ tmp2 = out3 ^ in0;
+ out6 = tmp1 ^ in5;
+ tmp3 = tmp2 ^ in4;
+ out7 = tmp3 ^ in2;
+ tmp4 = tmp3 ^ in5;
+ out2 = tmp4 ^ in1;
+ tmp5 = out2 ^ in6;
+ out0 = tmp1 ^ tmp5;
+ out1 = tmp5 ^ out7;
+ out4 = tmp2 ^ out1;
+ out5 = tmp4 ^ out4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_96(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out3 = in6 ^ in7;
+ tmp0 = in1 ^ in5;
+ tmp1 = in5 ^ in6;
+ out6 = out3 ^ in2 ^ in3;
+ out0 = tmp0 ^ in4;
+ tmp2 = tmp1 ^ in2;
+ out4 = out0 ^ in0 ^ in7;
+ out1 = tmp2 ^ in0;
+ out5 = tmp2 ^ in1;
+ out7 = tmp0 ^ out4 ^ in3;
+ out2 = tmp1 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_97(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in0 ^ in4;
+ tmp1 = in2 ^ in6;
+ out3 = in3 ^ in6 ^ in7;
+ out7 = tmp0 ^ in3;
+ tmp2 = tmp0 ^ in5;
+ out5 = tmp1 ^ in1;
+ out6 = tmp1 ^ out3;
+ out0 = tmp2 ^ in1;
+ out2 = tmp2 ^ out3 ^ in2;
+ tmp3 = out0 ^ in4;
+ out4 = tmp3 ^ in7;
+ out1 = tmp1 ^ tmp3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_98(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in5 ^ in7;
+ tmp1 = in1 ^ in4 ^ in7;
+ out1 = tmp0 ^ in2;
+ out0 = tmp1 ^ in6;
+ out2 = tmp1 ^ in3;
+ out6 = out0 ^ out1 ^ in1;
+ out5 = tmp0 ^ out2;
+ out3 = tmp1 ^ out6 ^ in0;
+ out7 = out0 ^ out5 ^ in0;
+ out4 = out6 ^ out7 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_99(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in0 ^ in3;
+ out5 = in1 ^ in3 ^ in4;
+ out6 = in2 ^ in4 ^ in5;
+ out4 = tmp0 ^ in2;
+ tmp1 = tmp0 ^ in6;
+ tmp2 = out5 ^ in7;
+ out7 = tmp1 ^ in5;
+ out0 = tmp1 ^ tmp2;
+ out2 = tmp2 ^ in2;
+ out3 = out0 ^ out6 ^ in3;
+ out1 = tmp1 ^ out3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_9A(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out2 = in3 ^ in4;
+ tmp0 = in0 ^ in5;
+ tmp1 = in1 ^ in6;
+ out5 = in1 ^ in3 ^ in5;
+ tmp2 = tmp0 ^ in7;
+ out3 = tmp0 ^ tmp1;
+ out0 = tmp1 ^ in4;
+ out7 = tmp2 ^ in3;
+ out1 = tmp2 ^ in2;
+ out6 = out0 ^ in1 ^ in2;
+ out4 = out1 ^ in4 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_9B(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out5 = in1 ^ in3;
+ tmp0 = in3 ^ in5;
+ out6 = in2 ^ in4;
+ out4 = in0 ^ in2 ^ in7;
+ out7 = tmp0 ^ in0;
+ out2 = out6 ^ in3;
+ out1 = out4 ^ in1 ^ in5;
+ out3 = out7 ^ in1 ^ in6;
+ out0 = tmp0 ^ out3 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_9C(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out1 = in2 ^ in5;
+ tmp0 = in0 ^ in3 ^ in6;
+ out3 = out1 ^ in0;
+ out6 = out1 ^ in6;
+ out7 = tmp0 ^ in7;
+ out4 = out7 ^ in4;
+ out2 = out4 ^ in1;
+ out0 = tmp0 ^ out2;
+ out5 = out0 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_9D(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out6 = in2 ^ in5;
+ tmp0 = in0 ^ in3;
+ out5 = in1 ^ in4 ^ in7;
+ out1 = out6 ^ in1;
+ out3 = tmp0 ^ out6;
+ out7 = tmp0 ^ in6;
+ out0 = out5 ^ in0;
+ out4 = out7 ^ in7;
+ out2 = out5 ^ out7 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_9E(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out0 = in1 ^ in4;
+ tmp0 = in0 ^ in5;
+ out6 = in2 ^ in6;
+ out7 = in0 ^ in3 ^ in7;
+ out4 = in0 ^ in4 ^ in6;
+ out5 = in1 ^ in5 ^ in7;
+ out1 = tmp0 ^ in2;
+ out3 = tmp0 ^ in7;
+ out2 = out4 ^ in3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_9F(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out6 = in2;
+ out7 = in0 ^ in3;
+ tmp0 = in0 ^ in1;
+ out4 = in0 ^ in6;
+ out5 = in1 ^ in7;
+ out1 = tmp0 ^ in2 ^ in5;
+ out2 = out7 ^ in2 ^ in4 ^ in6;
+ out3 = out7 ^ in5 ^ in7;
+ out0 = tmp0 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_A0(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in1 ^ in6;
+ out2 = tmp0 ^ in7;
+ tmp1 = tmp0 ^ in5;
+ out6 = out2 ^ in3 ^ in4;
+ out0 = tmp1 ^ in3;
+ tmp2 = out0 ^ in2;
+ out3 = tmp2 ^ in7;
+ tmp3 = tmp2 ^ in1;
+ out5 = tmp3 ^ in0;
+ out4 = tmp3 ^ out6;
+ out7 = out5 ^ out6 ^ in1;
+ out1 = tmp1 ^ out4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_A1(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in2 ^ in5;
+ tmp1 = tmp0 ^ in1;
+ tmp2 = tmp0 ^ in4;
+ out4 = tmp1 ^ in7;
+ out7 = tmp2 ^ in0;
+ out6 = tmp2 ^ out4 ^ in3;
+ out3 = out4 ^ in6;
+ out2 = out3 ^ in5;
+ out1 = out2 ^ in4;
+ out5 = out1 ^ out6 ^ in0;
+ out0 = tmp1 ^ out5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_A2(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out2 = in6;
+ tmp0 = in1 ^ in3 ^ in5;
+ out3 = tmp0 ^ in6;
+ out4 = tmp0 ^ in2 ^ in4;
+ out0 = out3 ^ in7;
+ out6 = out0 ^ in4;
+ out1 = out0 ^ out4 ^ in0;
+ out7 = out1 ^ in5;
+ out5 = out7 ^ in3 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_A3(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out2 = in2 ^ in6;
+ out3 = in1 ^ in5 ^ in6;
+ tmp0 = out2 ^ in0;
+ out4 = out2 ^ out3 ^ in3;
+ tmp1 = tmp0 ^ in4;
+ out0 = tmp0 ^ out4 ^ in7;
+ out5 = tmp1 ^ in3;
+ out7 = tmp1 ^ in5;
+ out1 = tmp1 ^ in1 ^ in7;
+ out6 = tmp1 ^ out0 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_A4(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in1 ^ in3;
+ tmp1 = in2 ^ in4;
+ tmp2 = in2 ^ in5;
+ tmp3 = in0 ^ in7;
+ out0 = tmp0 ^ in5;
+ out6 = tmp0 ^ in6 ^ in7;
+ out1 = tmp1 ^ in6;
+ out7 = tmp1 ^ tmp3;
+ out3 = tmp2 ^ in3;
+ tmp4 = tmp2 ^ out1;
+ out2 = tmp3 ^ in1;
+ out5 = tmp4 ^ out7;
+ out4 = tmp4 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_A5(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out3 = in2 ^ in5;
+ tmp0 = in1 ^ in6;
+ tmp1 = in0 ^ in1;
+ tmp2 = in2 ^ in4;
+ out6 = in1 ^ in3 ^ in7;
+ out4 = tmp0 ^ in5;
+ out1 = tmp0 ^ tmp2;
+ out0 = tmp1 ^ in3 ^ in5;
+ out2 = tmp1 ^ in2 ^ in7;
+ out7 = tmp2 ^ in0;
+ out5 = tmp0 ^ out2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_A6(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out2 = in0;
+ out3 = in3 ^ in5 ^ in7;
+ out1 = in0 ^ in2 ^ in4 ^ in6;
+ out0 = out3 ^ in1;
+ out7 = out1 ^ in7;
+ out6 = out0 ^ in6;
+ out5 = out7 ^ in5;
+ out4 = out6 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_A7(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out2 = in0 ^ in2;
+ out3 = in5 ^ in7;
+ out7 = out2 ^ in4 ^ in6;
+ out6 = out3 ^ in1 ^ in3;
+ out1 = out7 ^ in1;
+ out5 = out7 ^ in7;
+ out0 = out6 ^ in0;
+ out4 = out6 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_A8(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in2 ^ in4;
+ tmp1 = in1 ^ in6;
+ tmp2 = in0 ^ in2 ^ in7;
+ out1 = tmp0 ^ in7;
+ out4 = tmp0 ^ in6;
+ out0 = tmp1 ^ in3;
+ out2 = tmp1 ^ in5;
+ out6 = tmp1 ^ in4;
+ out7 = tmp2 ^ in5;
+ out3 = tmp2 ^ out0 ^ in6;
+ out5 = out7 ^ in2 ^ in3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_A9(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out4 = in2 ^ in6;
+ out6 = in1 ^ in4;
+ out7 = in0 ^ in2 ^ in5;
+ out5 = in0 ^ in3 ^ in7;
+ out2 = out4 ^ in1 ^ in5;
+ out1 = out6 ^ in2 ^ in7;
+ out0 = out2 ^ out7 ^ in3;
+ out3 = out1 ^ in0 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_AA(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in0 ^ in2;
+ tmp1 = in1 ^ in3;
+ tmp2 = in6 ^ in7;
+ out1 = tmp0 ^ in4 ^ in7;
+ out3 = tmp1 ^ in0;
+ out0 = tmp1 ^ tmp2;
+ out2 = tmp2 ^ in5;
+ out7 = tmp0 ^ out2;
+ out6 = out1 ^ out7 ^ in1;
+ out5 = out0 ^ out6 ^ in0;
+ out4 = out5 ^ out7 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_AB(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out3 = in0 ^ in1;
+ tmp0 = in1 ^ in4;
+ tmp1 = in0 ^ in7;
+ out6 = tmp0 ^ in5;
+ out1 = tmp0 ^ tmp1 ^ in2;
+ out5 = tmp1 ^ in3 ^ in4;
+ out0 = tmp0 ^ out5 ^ in6;
+ out4 = out0 ^ out3 ^ in2;
+ out2 = out4 ^ in3 ^ in5;
+ out7 = tmp1 ^ out2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_AC(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out0 = in1 ^ in3;
+ out1 = in2 ^ in4;
+ tmp0 = in0 ^ in2;
+ out4 = in4 ^ in7;
+ out5 = in0 ^ in5;
+ out6 = in1 ^ in6;
+ out7 = tmp0 ^ in7;
+ out3 = tmp0 ^ in3 ^ in6;
+ out2 = out5 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_AD(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out4 = in7;
+ out5 = in0;
+ out6 = in1;
+ out7 = in0 ^ in2;
+ out0 = in0 ^ in1 ^ in3;
+ out2 = out7 ^ in1 ^ in5;
+ out1 = in1 ^ in2 ^ in4;
+ out3 = out7 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_AE(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out4 = in3 ^ in4;
+ tmp0 = in0 ^ in4;
+ tmp1 = in0 ^ in7;
+ out0 = in1 ^ in3 ^ in7;
+ out1 = tmp0 ^ in2;
+ out5 = tmp0 ^ in5;
+ tmp2 = tmp1 ^ in6;
+ out2 = tmp1 ^ in5;
+ out3 = tmp2 ^ in3;
+ out7 = tmp2 ^ in2;
+ out6 = tmp2 ^ out2 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_AF(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out4 = in3;
+ tmp0 = in0 ^ in7;
+ out5 = in0 ^ in4;
+ out6 = in1 ^ in5;
+ out7 = in0 ^ in2 ^ in6;
+ out0 = tmp0 ^ in1 ^ in3;
+ out3 = tmp0 ^ in6;
+ out2 = tmp0 ^ in2 ^ in5;
+ out1 = out5 ^ in1 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_B0(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in1 ^ in4;
+ tmp1 = in3 ^ in6;
+ out2 = tmp0 ^ in7;
+ tmp2 = tmp0 ^ tmp1;
+ out0 = tmp2 ^ in5;
+ out3 = tmp2 ^ in2;
+ out6 = out3 ^ in6;
+ tmp3 = out6 ^ in0 ^ in1;
+ out7 = tmp3 ^ in5;
+ out5 = tmp3 ^ out2;
+ out1 = out0 ^ out5 ^ in0;
+ out4 = tmp1 ^ out5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_B1(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in1 ^ in4;
+ out2 = tmp0 ^ in2 ^ in7;
+ tmp1 = out2 ^ in6;
+ out1 = tmp1 ^ in5;
+ out3 = tmp1 ^ in7;
+ out4 = tmp1 ^ in0;
+ out6 = out3 ^ in3;
+ out0 = out6 ^ in0 ^ in2 ^ in5;
+ out5 = tmp1 ^ out0 ^ in1;
+ out7 = tmp0 ^ out5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_B2(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out2 = in4;
+ tmp0 = in4 ^ in7;
+ tmp1 = in1 ^ in3 ^ in6;
+ out3 = tmp0 ^ tmp1;
+ tmp2 = tmp1 ^ in0;
+ out0 = out3 ^ in5;
+ out4 = tmp2 ^ in2;
+ tmp3 = out4 ^ in6;
+ out5 = tmp0 ^ tmp3;
+ out1 = tmp3 ^ out0;
+ tmp4 = out1 ^ in7;
+ out7 = tmp4 ^ in3;
+ out6 = tmp2 ^ tmp4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_B3(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out2 = in2 ^ in4;
+ tmp0 = in0 ^ in5;
+ tmp1 = in1 ^ in6;
+ out3 = tmp1 ^ in4 ^ in7;
+ tmp2 = tmp0 ^ out3;
+ out0 = tmp2 ^ in3;
+ out1 = tmp2 ^ in2;
+ out5 = out0 ^ in2 ^ in6;
+ out7 = tmp1 ^ out5;
+ out4 = out7 ^ in1 ^ in5 ^ in7;
+ out6 = tmp0 ^ out4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_B4(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out4 = in0 ^ in1;
+ out5 = out4 ^ in2;
+ tmp0 = out4 ^ in4;
+ out6 = out5 ^ in0 ^ in3;
+ out7 = tmp0 ^ out6;
+ out2 = tmp0 ^ in6 ^ in7;
+ out3 = out7 ^ in0 ^ in7;
+ out0 = out5 ^ out7 ^ in5;
+ out1 = out0 ^ out6 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_B5(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in0 ^ in1;
+ tmp1 = in2 ^ in4;
+ out4 = tmp0 ^ in4;
+ out3 = tmp1 ^ in7;
+ tmp2 = out4 ^ in5;
+ out7 = out3 ^ in0 ^ in3;
+ out0 = tmp2 ^ in3;
+ out2 = tmp0 ^ out3 ^ in6;
+ out5 = tmp1 ^ tmp2;
+ out6 = out2 ^ out7 ^ in2;
+ out1 = tmp0 ^ out0 ^ out6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_B6(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out3 = in3 ^ in4;
+ tmp0 = in1 ^ in2;
+ tmp1 = in0 ^ in4;
+ tmp2 = in3 ^ in5;
+ tmp3 = out3 ^ in1 ^ in7;
+ out5 = tmp0 ^ tmp1;
+ out6 = tmp0 ^ tmp2;
+ out2 = tmp1 ^ in6;
+ out4 = tmp1 ^ tmp3;
+ out0 = tmp3 ^ in5;
+ out1 = out2 ^ in2 ^ in5;
+ out7 = tmp2 ^ out1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_B7(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out3 = in4;
+ tmp0 = in0 ^ in4;
+ out2 = tmp0 ^ in2 ^ in6;
+ tmp1 = out2 ^ in7;
+ out1 = out2 ^ in1 ^ in5;
+ out7 = tmp1 ^ in3;
+ out5 = out1 ^ in6;
+ out6 = tmp0 ^ out1 ^ in3;
+ out0 = tmp1 ^ out6;
+ out4 = out0 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_B8(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in1 ^ in4;
+ tmp1 = in2 ^ in5;
+ out2 = tmp0 ^ in5;
+ out4 = tmp1 ^ in0;
+ tmp2 = tmp1 ^ in7;
+ out6 = tmp2 ^ out2;
+ out7 = out4 ^ in3;
+ out1 = tmp2 ^ in4;
+ out3 = tmp0 ^ out7;
+ out0 = out3 ^ out4 ^ in6;
+ out5 = out0 ^ in0 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_B9(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in0 ^ in2;
+ tmp1 = in4 ^ in5;
+ out4 = tmp0 ^ tmp1;
+ tmp2 = tmp0 ^ in3 ^ in7;
+ out3 = out4 ^ in1;
+ out7 = tmp2 ^ in5;
+ out2 = out3 ^ in0;
+ out1 = out2 ^ in7;
+ out6 = out1 ^ in5 ^ in6;
+ out0 = tmp2 ^ out6;
+ out5 = tmp1 ^ out0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_BA(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in5 ^ in7;
+ out2 = tmp0 ^ in4;
+ tmp1 = out2 ^ in2;
+ out1 = tmp1 ^ in0;
+ out6 = tmp1 ^ in1;
+ out4 = out1 ^ in3 ^ in4;
+ tmp2 = out4 ^ out6;
+ out7 = out4 ^ in6 ^ in7;
+ out5 = tmp2 ^ in6;
+ out3 = tmp0 ^ tmp2;
+ out0 = out6 ^ out7 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_BB(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out2 = in2 ^ in4 ^ in5 ^ in7;
+ tmp0 = out2 ^ in1;
+ out4 = out2 ^ in0 ^ in3;
+ out1 = tmp0 ^ in0;
+ out6 = tmp0 ^ in6;
+ out3 = out1 ^ in2;
+ tmp1 = out4 ^ out6 ^ in4;
+ out0 = tmp1 ^ in7;
+ out5 = tmp1 ^ in5;
+ out7 = tmp0 ^ tmp1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_BC(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in0 ^ in2;
+ tmp1 = in2 ^ in4;
+ out0 = in1 ^ in3 ^ in4;
+ out6 = in1 ^ in2 ^ in7;
+ out7 = tmp0 ^ in3;
+ out5 = tmp0 ^ out6 ^ in6;
+ out1 = tmp1 ^ in5;
+ tmp2 = out1 ^ out5 ^ in1;
+ out3 = tmp2 ^ in3;
+ out4 = tmp1 ^ tmp2;
+ out2 = tmp2 ^ out6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_BD(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in0 ^ in3;
+ tmp1 = in1 ^ in4;
+ out0 = tmp0 ^ tmp1;
+ out7 = tmp0 ^ in2 ^ in7;
+ out1 = tmp1 ^ in2 ^ in5;
+ tmp2 = out1 ^ in0;
+ out2 = tmp2 ^ in6;
+ out3 = out2 ^ in1 ^ in7;
+ out4 = out3 ^ in2;
+ out5 = tmp1 ^ out4;
+ out6 = tmp2 ^ out4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_BE(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in0 ^ in3 ^ in6;
+ out4 = tmp0 ^ in5;
+ out7 = tmp0 ^ in2;
+ out3 = out4 ^ in4;
+ out1 = out3 ^ out7 ^ in0;
+ out2 = out3 ^ in3 ^ in7;
+ out0 = out2 ^ out4 ^ in1;
+ out5 = tmp0 ^ out0;
+ out6 = out1 ^ out5 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_BF(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in0 ^ in4;
+ out3 = tmp0 ^ in5 ^ in6;
+ out4 = out3 ^ in3;
+ tmp1 = out3 ^ in7;
+ out2 = tmp1 ^ in2;
+ out5 = tmp1 ^ in1;
+ tmp2 = out2 ^ in5;
+ out7 = tmp2 ^ in3 ^ in4;
+ tmp3 = tmp0 ^ out5;
+ out0 = tmp3 ^ out4;
+ out1 = tmp2 ^ tmp3;
+ out6 = tmp3 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_C0(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out5 = in2 ^ in5;
+ tmp0 = in1 ^ in4;
+ tmp1 = in3 ^ in6;
+ out0 = out5 ^ in1;
+ out4 = tmp0 ^ in7;
+ out3 = tmp0 ^ tmp1;
+ out1 = tmp1 ^ in2;
+ out6 = tmp1 ^ in0;
+ out7 = out4 ^ in0;
+ out2 = out4 ^ out5 ^ in3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_C1(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out5 = in2;
+ tmp0 = in0 ^ in1;
+ out4 = in1 ^ in7;
+ out6 = in0 ^ in3;
+ out3 = in1 ^ in4 ^ in6;
+ tmp1 = tmp0 ^ in2;
+ out7 = tmp0 ^ in4;
+ out0 = tmp1 ^ in5;
+ out1 = tmp1 ^ out6 ^ in6;
+ out2 = out6 ^ out7 ^ in5 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_C2(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out4 = in1 ^ in3 ^ in4;
+ tmp0 = in0 ^ in3 ^ in6;
+ out5 = in2 ^ in4 ^ in5;
+ tmp1 = out4 ^ in7;
+ out1 = tmp0 ^ in2;
+ out6 = tmp0 ^ in5;
+ out2 = out5 ^ in3;
+ out7 = tmp0 ^ tmp1;
+ out3 = tmp1 ^ in2 ^ in6;
+ out0 = tmp1 ^ out2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_C3(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out4 = in1 ^ in3;
+ tmp0 = in0 ^ in2;
+ tmp1 = in3 ^ in5;
+ out5 = in2 ^ in4;
+ tmp2 = tmp0 ^ out4;
+ out2 = tmp1 ^ in4;
+ out6 = tmp1 ^ in0;
+ out0 = tmp1 ^ tmp2 ^ in7;
+ out1 = tmp2 ^ in6;
+ out7 = out1 ^ out5 ^ in3;
+ out3 = tmp0 ^ out7 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_C4(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in3 ^ in7;
+ out3 = tmp0 ^ in4;
+ tmp1 = tmp0 ^ in2;
+ out1 = tmp1 ^ in6;
+ out5 = tmp1 ^ in5;
+ out4 = out1 ^ out3 ^ in1;
+ out0 = out4 ^ in4 ^ in5;
+ out2 = out0 ^ out3 ^ in0;
+ out7 = out1 ^ out2 ^ in7;
+ out6 = tmp1 ^ out0 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_C5(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out3 = in4 ^ in7;
+ tmp0 = in3 ^ in7;
+ out4 = in1 ^ in2 ^ in6;
+ out6 = in0 ^ in3 ^ in4;
+ out5 = tmp0 ^ in2;
+ out1 = tmp0 ^ out4;
+ out0 = out4 ^ in0 ^ in5;
+ out2 = out0 ^ out5 ^ in4;
+ out7 = tmp0 ^ out2 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_C6(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in5 ^ in6;
+ tmp1 = in1 ^ in7;
+ tmp2 = tmp0 ^ in0;
+ tmp3 = tmp0 ^ tmp1;
+ tmp4 = tmp2 ^ in4;
+ out0 = tmp3 ^ in2;
+ out6 = tmp4 ^ in3;
+ out2 = out6 ^ in2;
+ out7 = tmp1 ^ tmp4;
+ out3 = tmp2 ^ out2;
+ tmp5 = out3 ^ in5;
+ out5 = tmp5 ^ in7;
+ out4 = tmp3 ^ tmp5;
+ out1 = tmp4 ^ out5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_C7(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out3 = in2 ^ in4;
+ tmp0 = in3 ^ in5;
+ tmp1 = out3 ^ in7;
+ out6 = tmp0 ^ in0 ^ in4;
+ out5 = tmp1 ^ in3;
+ out2 = out6 ^ in6;
+ out7 = out2 ^ in1 ^ in3;
+ out0 = tmp1 ^ out7;
+ out1 = tmp0 ^ out0;
+ out4 = out1 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_C8(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out0 = in1 ^ in2;
+ out1 = in2 ^ in3;
+ tmp0 = in5 ^ in6;
+ tmp1 = in0 ^ in7;
+ out2 = out1 ^ in1 ^ in4;
+ out4 = tmp0 ^ in4;
+ out5 = tmp0 ^ in7;
+ out6 = tmp1 ^ in6;
+ out7 = tmp1 ^ in1;
+ out3 = out2 ^ in0 ^ in2 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_C9(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out4 = in5 ^ in6;
+ out7 = in0 ^ in1;
+ tmp0 = in1 ^ in3;
+ out5 = in6 ^ in7;
+ out6 = in0 ^ in7;
+ out0 = out7 ^ in2;
+ out3 = out7 ^ in4 ^ in5;
+ out1 = tmp0 ^ in2;
+ out2 = tmp0 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_CA(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in0 ^ in7;
+ tmp1 = in2 ^ in7;
+ tmp2 = tmp0 ^ in6;
+ out0 = tmp1 ^ in1;
+ tmp3 = tmp1 ^ in3;
+ out6 = tmp2 ^ in5;
+ out7 = tmp2 ^ in1;
+ out2 = tmp3 ^ in4;
+ out5 = out6 ^ in0 ^ in4;
+ out4 = out5 ^ in3;
+ out1 = tmp0 ^ tmp3;
+ out3 = tmp3 ^ out5 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_CB(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in4 ^ in7;
+ tmp1 = in5 ^ in7;
+ out7 = in0 ^ in1 ^ in6;
+ out5 = tmp0 ^ in6;
+ out2 = tmp0 ^ in3;
+ out6 = tmp1 ^ in0;
+ out4 = tmp1 ^ in3 ^ in6;
+ tmp2 = out5 ^ out7 ^ in2;
+ out1 = tmp2 ^ out2;
+ out0 = tmp2 ^ in4;
+ out3 = tmp2 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_CC(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in3 ^ in5;
+ tmp1 = in1 ^ in6;
+ out1 = in2 ^ in3 ^ in7;
+ out5 = tmp0 ^ in6;
+ out0 = tmp1 ^ in2;
+ tmp2 = out5 ^ in0 ^ in7;
+ out3 = tmp2 ^ in4;
+ out6 = tmp0 ^ out3;
+ out7 = tmp1 ^ tmp2 ^ in3;
+ tmp3 = out1 ^ out6;
+ out4 = tmp2 ^ tmp3;
+ out2 = tmp3 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_CD(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out5 = in3 ^ in6;
+ tmp0 = in0 ^ in1;
+ tmp1 = in2 ^ in7;
+ out6 = in0 ^ in4 ^ in7;
+ out2 = tmp0 ^ out5 ^ in4;
+ out7 = tmp0 ^ in5;
+ out0 = tmp0 ^ in2 ^ in6;
+ out4 = tmp1 ^ in5;
+ out1 = tmp1 ^ in1 ^ in3;
+ out3 = out6 ^ in5 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_CE(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in2 ^ in5;
+ tmp1 = tmp0 ^ in3;
+ out4 = tmp1 ^ in4;
+ tmp2 = out4 ^ in6;
+ out3 = tmp2 ^ in0;
+ out5 = tmp2 ^ in2;
+ out2 = out3 ^ in5 ^ in7;
+ out6 = tmp1 ^ out2;
+ out7 = out2 ^ out4 ^ in1;
+ out1 = tmp2 ^ out6;
+ out0 = tmp0 ^ out7 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_CF(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in3 ^ in6;
+ tmp1 = in0 ^ in1 ^ in5;
+ out4 = in2 ^ in3 ^ in5;
+ out5 = tmp0 ^ in4;
+ out7 = tmp1 ^ in6;
+ out1 = tmp1 ^ out4 ^ in7;
+ tmp2 = out5 ^ in0;
+ out2 = tmp2 ^ in7;
+ out3 = tmp2 ^ out4;
+ out6 = tmp0 ^ out2 ^ in5;
+ out0 = tmp0 ^ out1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_D0(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in0 ^ in3;
+ tmp1 = in1 ^ in4;
+ tmp2 = in2 ^ in5;
+ out7 = tmp0 ^ tmp1;
+ out0 = tmp1 ^ tmp2;
+ tmp3 = tmp2 ^ in3;
+ out1 = tmp3 ^ in6;
+ tmp4 = out1 ^ in1;
+ out2 = tmp4 ^ in7;
+ out3 = out2 ^ in2;
+ out4 = tmp0 ^ out3;
+ out5 = tmp3 ^ out3;
+ out6 = tmp4 ^ out4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_D1(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in3 ^ in5 ^ in6;
+ tmp1 = tmp0 ^ in1;
+ out1 = tmp1 ^ in2;
+ out2 = tmp1 ^ in7;
+ out3 = out2 ^ in3;
+ out5 = out3 ^ in2;
+ tmp2 = out3 ^ in0;
+ out4 = tmp2 ^ in4;
+ out7 = tmp0 ^ out4;
+ out6 = tmp2 ^ out1 ^ in6;
+ out0 = out2 ^ out6 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_D2(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in5 ^ in6;
+ out2 = tmp0 ^ in2 ^ in3;
+ out1 = out2 ^ in0;
+ out3 = out2 ^ in1;
+ out4 = out1 ^ in1 ^ in2;
+ out6 = out1 ^ in6 ^ in7;
+ out7 = out4 ^ in4 ^ in5;
+ out5 = out4 ^ out6 ^ in4;
+ out0 = tmp0 ^ out5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_D3(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out2 = in3 ^ in5 ^ in6;
+ tmp0 = out2 ^ in2;
+ tmp1 = tmp0 ^ in1;
+ out1 = tmp1 ^ in0;
+ out3 = tmp1 ^ in3;
+ out4 = out1 ^ in2 ^ in4;
+ tmp2 = out4 ^ in5;
+ out7 = tmp2 ^ in7;
+ out0 = tmp0 ^ out7;
+ tmp3 = out0 ^ in0;
+ out5 = tmp3 ^ in6;
+ out6 = tmp2 ^ tmp3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_D4(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out3 = in3 ^ in5;
+ tmp0 = in1 ^ in5;
+ tmp1 = tmp0 ^ in2;
+ out4 = tmp1 ^ in0;
+ tmp2 = tmp1 ^ in6;
+ out2 = out4 ^ in3 ^ in7;
+ out0 = tmp2 ^ in4;
+ out5 = tmp2 ^ out3;
+ out1 = tmp0 ^ out5 ^ in7;
+ out6 = tmp0 ^ out2 ^ in4;
+ out7 = tmp1 ^ out6 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_D5(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out3 = in5;
+ tmp0 = in0 ^ in4;
+ tmp1 = tmp0 ^ in1 ^ in5;
+ out4 = tmp1 ^ in2;
+ out0 = out4 ^ in6;
+ tmp2 = tmp0 ^ out0;
+ out5 = tmp2 ^ in3;
+ out1 = out5 ^ in7;
+ out6 = tmp1 ^ out1;
+ out7 = tmp2 ^ out6;
+ out2 = out7 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_D6(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in1 ^ in2 ^ in4 ^ in6;
+ out5 = tmp0 ^ in3;
+ out0 = tmp0 ^ in5 ^ in7;
+ out3 = out0 ^ out5 ^ in2;
+ tmp1 = out3 ^ in0;
+ out1 = tmp1 ^ in6;
+ out2 = tmp1 ^ in7;
+ out4 = tmp1 ^ in1;
+ out6 = tmp1 ^ in4;
+ out7 = tmp0 ^ out2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_D7(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in0 ^ in3;
+ out3 = in2 ^ in5 ^ in7;
+ out2 = tmp0 ^ in5;
+ tmp1 = tmp0 ^ out3 ^ in1;
+ out1 = tmp1 ^ in6;
+ out4 = tmp1 ^ in4;
+ tmp2 = out1 ^ in4;
+ out6 = tmp2 ^ in1;
+ out7 = tmp2 ^ in2;
+ out0 = tmp2 ^ in3;
+ out5 = tmp2 ^ in0 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_D8(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out4 = in0;
+ out5 = in1;
+ tmp0 = in1 ^ in2;
+ out6 = in0 ^ in2;
+ out0 = tmp0 ^ in4;
+ tmp1 = tmp0 ^ in3;
+ out7 = tmp1 ^ out6;
+ out2 = tmp1 ^ in6;
+ out3 = out7 ^ in7;
+ out1 = tmp1 ^ in1 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_D9(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out4 = in0 ^ in4;
+ out5 = in1 ^ in5;
+ out2 = in1 ^ in3 ^ in6;
+ out3 = in0 ^ in1 ^ in7;
+ out6 = in0 ^ in2 ^ in6;
+ out0 = out4 ^ in1 ^ in2;
+ out1 = out5 ^ in2 ^ in3;
+ out7 = out3 ^ in3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_DA(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out5 = in1 ^ in4;
+ tmp0 = in2 ^ in7;
+ tmp1 = in0 ^ in2 ^ in3;
+ out0 = tmp0 ^ out5;
+ out4 = tmp0 ^ tmp1;
+ out2 = tmp0 ^ in3 ^ in6;
+ out1 = tmp1 ^ in5;
+ out3 = tmp1 ^ in1;
+ out6 = out1 ^ in3;
+ out7 = out3 ^ in2 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_DB(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in0 ^ in1;
+ tmp1 = in1 ^ in5;
+ tmp2 = in3 ^ in7;
+ out3 = tmp0 ^ in2;
+ out5 = tmp1 ^ in4;
+ out6 = tmp1 ^ out3 ^ in6;
+ out2 = tmp2 ^ in6;
+ tmp3 = tmp2 ^ in4;
+ tmp4 = out3 ^ in3;
+ out4 = tmp3 ^ in0;
+ out1 = tmp4 ^ in5;
+ out0 = tmp3 ^ tmp4;
+ out7 = tmp0 ^ out2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_DC(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in0 ^ in2;
+ tmp1 = in0 ^ in3;
+ out6 = tmp0 ^ in4;
+ tmp2 = tmp0 ^ in7;
+ out3 = tmp1 ^ in6;
+ tmp3 = tmp1 ^ in1;
+ out1 = tmp1 ^ tmp2 ^ in5;
+ out4 = tmp2 ^ in6;
+ out2 = tmp3 ^ in2;
+ out7 = tmp3 ^ in5;
+ out5 = tmp2 ^ out2;
+ out0 = out2 ^ out3 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_DD(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out3 = in0 ^ in6;
+ out2 = in0 ^ in1 ^ in3;
+ out6 = out3 ^ in2 ^ in4;
+ out7 = out2 ^ in5 ^ in7;
+ out0 = out6 ^ in1;
+ out4 = out6 ^ in7;
+ out5 = out7 ^ in0;
+ out1 = out5 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_DE(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in2 ^ in3 ^ in6;
+ tmp1 = in3 ^ in4 ^ in7;
+ out4 = tmp0 ^ in0;
+ out5 = tmp1 ^ in1;
+ out3 = out4 ^ in7;
+ out2 = out3 ^ in6;
+ out1 = out2 ^ in5;
+ out6 = tmp1 ^ out1;
+ out0 = tmp0 ^ out5;
+ out7 = out0 ^ out1 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_DF(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out2 = in0 ^ in3 ^ in7;
+ tmp0 = out2 ^ in1 ^ in5;
+ out1 = tmp0 ^ in2;
+ out7 = tmp0 ^ in6;
+ out5 = tmp0 ^ in0 ^ in4;
+ tmp1 = out1 ^ out5 ^ in6;
+ out4 = tmp1 ^ in3;
+ out6 = tmp1 ^ in5;
+ tmp2 = tmp1 ^ in7;
+ out0 = tmp2 ^ in1;
+ out3 = tmp2 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_E0(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out3 = in1 ^ in7;
+ tmp0 = in2 ^ in4;
+ out4 = out3 ^ in3 ^ in5;
+ out2 = tmp0 ^ in1;
+ tmp1 = tmp0 ^ in6;
+ out0 = out4 ^ in2;
+ out6 = out4 ^ in0;
+ out1 = tmp1 ^ in3;
+ out5 = tmp1 ^ in0;
+ out7 = out5 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_E1(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out2 = in1 ^ in4;
+ tmp0 = in1 ^ in7;
+ out3 = tmp0 ^ in3;
+ tmp1 = out3 ^ in5;
+ out4 = tmp1 ^ in4;
+ tmp2 = tmp1 ^ in0;
+ out0 = tmp2 ^ in2;
+ out6 = tmp2 ^ in6;
+ tmp3 = out0 ^ out4 ^ in6;
+ out5 = tmp3 ^ in5;
+ out7 = tmp0 ^ tmp3;
+ out1 = tmp2 ^ out5 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_E2(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out3 = in1 ^ in2;
+ out4 = in1 ^ in5;
+ out2 = in2 ^ in4 ^ in7;
+ out5 = in0 ^ in2 ^ in6;
+ out0 = out3 ^ in3 ^ in5;
+ out7 = out3 ^ in0 ^ in4;
+ out6 = out2 ^ out7 ^ in3;
+ out1 = out5 ^ in3 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_E3(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out2 = in4 ^ in7;
+ tmp0 = in1 ^ in3;
+ out3 = tmp0 ^ in2;
+ tmp1 = out3 ^ in0;
+ out0 = tmp1 ^ in5;
+ tmp2 = tmp1 ^ in4;
+ out1 = tmp2 ^ in6;
+ tmp3 = tmp2 ^ in3;
+ out7 = tmp3 ^ in7;
+ out6 = out1 ^ out2 ^ in2;
+ tmp4 = tmp0 ^ out0;
+ out5 = tmp4 ^ in6;
+ out4 = tmp3 ^ tmp4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_E4(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out3 = in6;
+ tmp0 = in0 ^ in4;
+ tmp1 = tmp0 ^ in2 ^ in6;
+ out2 = tmp1 ^ in1;
+ out7 = out2 ^ in5;
+ tmp2 = tmp0 ^ out7;
+ out4 = tmp2 ^ in3;
+ out0 = out4 ^ in7;
+ out6 = tmp1 ^ out0;
+ out5 = tmp2 ^ out6;
+ out1 = out5 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_E5(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out3 = in3 ^ in6;
+ tmp0 = in0 ^ in1;
+ tmp1 = in5 ^ in7;
+ out2 = tmp0 ^ in4 ^ in6;
+ tmp2 = tmp1 ^ out2;
+ out6 = tmp2 ^ in3;
+ out7 = tmp2 ^ in2;
+ out0 = out6 ^ in2 ^ in4;
+ out5 = out6 ^ in1 ^ in2;
+ out1 = tmp0 ^ out5 ^ in5;
+ out4 = tmp1 ^ out1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_E6(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out3 = in2 ^ in6 ^ in7;
+ out2 = out3 ^ in0 ^ in4;
+ out4 = out3 ^ in1 ^ in5;
+ out1 = out2 ^ in3;
+ out7 = out2 ^ out4 ^ in2;
+ out0 = out4 ^ in3 ^ in7;
+ out5 = out1 ^ in4;
+ out6 = out0 ^ out2 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_E7(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in2 ^ in3;
+ out3 = tmp0 ^ in6 ^ in7;
+ tmp1 = out3 ^ in0;
+ out5 = tmp1 ^ in5;
+ tmp2 = tmp1 ^ in4;
+ tmp3 = out5 ^ in7;
+ out1 = tmp2 ^ in1;
+ out0 = tmp3 ^ in1;
+ out6 = out1 ^ in2;
+ out2 = tmp0 ^ tmp2;
+ tmp4 = tmp3 ^ out6;
+ out4 = tmp4 ^ in6;
+ out7 = tmp4 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_E8(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out4 = in3 ^ in6;
+ tmp0 = in4 ^ in7;
+ out1 = in2 ^ in3 ^ in4;
+ out5 = tmp0 ^ in0;
+ tmp1 = tmp0 ^ in1;
+ tmp2 = tmp1 ^ in5;
+ out0 = tmp1 ^ out1;
+ out2 = tmp2 ^ in2;
+ out6 = tmp2 ^ out5;
+ tmp3 = out6 ^ in6;
+ out3 = tmp3 ^ in7;
+ out7 = tmp3 ^ in2 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_E9(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in0 ^ in1;
+ tmp1 = in3 ^ in6;
+ tmp2 = tmp0 ^ in6;
+ out4 = tmp1 ^ in4;
+ out6 = tmp2 ^ in5;
+ out7 = tmp2 ^ in2 ^ in7;
+ out3 = out6 ^ in3 ^ in7;
+ out0 = tmp1 ^ out7;
+ out2 = out3 ^ out4 ^ in0;
+ out5 = tmp0 ^ out2;
+ out1 = out0 ^ out5 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_EA(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out4 = in6 ^ in7;
+ out5 = in0 ^ in7;
+ out6 = in0 ^ in1;
+ out0 = in1 ^ in2 ^ in3;
+ out2 = in2 ^ in4 ^ in5;
+ out7 = out6 ^ in2;
+ out1 = out0 ^ out6 ^ in4;
+ out3 = out7 ^ in5 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_EB(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out2 = in4 ^ in5;
+ tmp0 = in0 ^ in1;
+ out4 = in4 ^ in6 ^ in7;
+ out5 = in0 ^ in5 ^ in7;
+ out6 = tmp0 ^ in6;
+ tmp1 = tmp0 ^ in2;
+ out0 = tmp1 ^ in3;
+ out7 = tmp1 ^ in7;
+ out1 = out0 ^ in4;
+ out3 = out0 ^ in5 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_EC(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out3 = in0 ^ in5;
+ out4 = in2 ^ in3 ^ in7;
+ out5 = in0 ^ in3 ^ in4;
+ out6 = out3 ^ in1 ^ in4;
+ out1 = out4 ^ in4;
+ out0 = out4 ^ in1 ^ in6;
+ out2 = out0 ^ out5 ^ in5;
+ out7 = out2 ^ in4 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_ED(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in2 ^ in4;
+ tmp1 = in3 ^ in5;
+ out4 = tmp0 ^ in3 ^ in7;
+ out3 = tmp1 ^ in0;
+ out1 = out4 ^ in1;
+ out5 = out3 ^ in4;
+ out7 = out1 ^ out5 ^ in6;
+ out2 = tmp0 ^ out7;
+ out0 = tmp1 ^ out7;
+ out6 = out2 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_EE(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out4 = in2;
+ tmp0 = in0 ^ in1;
+ out5 = in0 ^ in3;
+ tmp1 = tmp0 ^ in2;
+ out6 = tmp0 ^ in4;
+ tmp2 = tmp1 ^ out5;
+ out7 = tmp1 ^ in5;
+ out1 = tmp2 ^ out6 ^ in7;
+ out0 = tmp2 ^ in6;
+ tmp3 = out7 ^ in1;
+ out3 = tmp3 ^ in7;
+ out2 = tmp3 ^ in4 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_EF(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out4 = in2 ^ in4;
+ tmp0 = in0 ^ in5;
+ tmp1 = in4 ^ in6;
+ out5 = tmp0 ^ in3;
+ out2 = tmp0 ^ tmp1;
+ out6 = tmp1 ^ in0 ^ in1;
+ out3 = out5 ^ in2 ^ in7;
+ out7 = out3 ^ in1 ^ in3;
+ out0 = out4 ^ out6 ^ in3;
+ out1 = tmp1 ^ out0 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_F0(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in1 ^ in2;
+ tmp1 = in4 ^ in5;
+ out2 = tmp0 ^ in6;
+ out3 = tmp1 ^ in1;
+ tmp2 = tmp1 ^ in7;
+ out1 = out2 ^ out3 ^ in3;
+ tmp3 = tmp0 ^ tmp2;
+ out0 = tmp3 ^ in3;
+ out5 = tmp3 ^ in0;
+ out4 = out1 ^ out5 ^ in4;
+ out7 = out4 ^ in2;
+ out6 = tmp2 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_F1(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out2 = in1 ^ in6;
+ tmp0 = in3 ^ in5;
+ out3 = tmp0 ^ in1 ^ in4;
+ tmp1 = out3 ^ in2;
+ out1 = tmp1 ^ in6;
+ tmp2 = tmp1 ^ in0;
+ tmp3 = out1 ^ in5;
+ out0 = tmp2 ^ in7;
+ out6 = tmp2 ^ in4;
+ out7 = tmp3 ^ in0;
+ out5 = tmp0 ^ out0;
+ out4 = tmp3 ^ out5 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_F2(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in4 ^ in5;
+ out2 = in2 ^ in6 ^ in7;
+ tmp1 = tmp0 ^ in1;
+ tmp2 = tmp1 ^ in2;
+ out0 = tmp2 ^ in3;
+ out3 = tmp2 ^ in7;
+ out5 = out3 ^ in0 ^ in4;
+ tmp3 = tmp0 ^ out5;
+ out7 = tmp3 ^ in3;
+ out4 = tmp3 ^ out2;
+ out1 = out0 ^ out4 ^ in4;
+ out6 = tmp1 ^ out1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_F3(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out2 = in6 ^ in7;
+ tmp0 = in0 ^ in1;
+ out4 = tmp0 ^ in6;
+ tmp1 = tmp0 ^ in2;
+ out5 = tmp1 ^ in7;
+ out6 = tmp1 ^ in3;
+ out7 = out6 ^ in4;
+ out0 = out7 ^ in5;
+ out1 = out0 ^ in6;
+ out3 = out0 ^ in0 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_F4(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out2 = in0 ^ in1 ^ in2;
+ tmp0 = out2 ^ in3;
+ out4 = tmp0 ^ in4;
+ out5 = out4 ^ in5;
+ out6 = out5 ^ in6;
+ out7 = out6 ^ in7;
+ out0 = out7 ^ in0;
+ out1 = out0 ^ in1;
+ out3 = tmp0 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_F5(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out2 = in0 ^ in1;
+ tmp0 = out2 ^ in2;
+ out4 = tmp0 ^ in3;
+ out5 = out4 ^ in4;
+ out6 = out5 ^ in5;
+ out7 = out6 ^ in6;
+ out0 = out7 ^ in7;
+ out1 = out0 ^ in0;
+ out3 = tmp0 ^ out0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_F6(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in0 ^ in7;
+ out2 = tmp0 ^ in2;
+ out4 = out2 ^ in1 ^ in4;
+ out7 = out4 ^ in3 ^ in5;
+ out5 = out7 ^ in4 ^ in7;
+ out0 = tmp0 ^ out7 ^ in6;
+ tmp1 = out0 ^ in1;
+ out6 = out0 ^ in0 ^ in5;
+ out3 = tmp1 ^ in3;
+ out1 = tmp0 ^ tmp1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_F7(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out2 = in0 ^ in7;
+ tmp0 = out2 ^ in1;
+ out4 = tmp0 ^ in2;
+ out5 = out4 ^ in3 ^ in7;
+ out6 = out5 ^ in4;
+ out7 = out6 ^ in5;
+ out0 = out7 ^ in6;
+ out1 = out0 ^ in7;
+ out3 = tmp0 ^ out1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_F8(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in0 ^ in4;
+ tmp1 = in3 ^ in5;
+ tmp2 = tmp0 ^ in6;
+ out4 = tmp0 ^ tmp1;
+ out1 = tmp1 ^ in2 ^ in4;
+ out3 = tmp2 ^ in1;
+ out5 = out3 ^ in5;
+ out7 = out1 ^ out5 ^ in7;
+ out6 = tmp1 ^ out7;
+ out0 = tmp2 ^ out7;
+ out2 = out6 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_F9(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in3 ^ in5;
+ tmp1 = in0 ^ in6;
+ out4 = tmp0 ^ in0;
+ tmp2 = tmp1 ^ in4;
+ tmp3 = tmp1 ^ in2;
+ out5 = tmp2 ^ in1;
+ out3 = out5 ^ in3;
+ tmp4 = tmp3 ^ out3;
+ out1 = tmp4 ^ in5;
+ out0 = tmp4 ^ in0 ^ in7;
+ out6 = tmp0 ^ out0 ^ in4;
+ out7 = tmp2 ^ tmp4;
+ out2 = tmp3 ^ out6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_FA(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in0 ^ in1;
+ tmp1 = tmp0 ^ in2;
+ tmp2 = tmp0 ^ in5;
+ tmp3 = tmp1 ^ in7;
+ out5 = tmp2 ^ in6;
+ out6 = tmp3 ^ in6;
+ out7 = tmp3 ^ in3;
+ out3 = out6 ^ in4;
+ out2 = tmp1 ^ out5;
+ out4 = out2 ^ out3 ^ in1;
+ out0 = out4 ^ out7 ^ in5;
+ out1 = tmp2 ^ out0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_FB(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out2 = in5 ^ in6;
+ tmp0 = in0 ^ in1;
+ out4 = in0 ^ in5 ^ in7;
+ out5 = tmp0 ^ in6;
+ tmp1 = tmp0 ^ in2;
+ out6 = tmp1 ^ in7;
+ out7 = tmp1 ^ in3;
+ out0 = out7 ^ in4;
+ out1 = out0 ^ in5;
+ out3 = out0 ^ in6 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_FC(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in1 ^ in2;
+ tmp1 = in0 ^ in7;
+ out2 = tmp0 ^ tmp1 ^ in5;
+ out3 = tmp1 ^ in4;
+ tmp2 = out2 ^ in6;
+ out6 = tmp2 ^ in4;
+ out7 = tmp2 ^ in3;
+ out4 = out6 ^ in1 ^ in3;
+ tmp3 = out4 ^ in0;
+ out1 = tmp3 ^ in6;
+ out0 = tmp3 ^ in1 ^ in5;
+ out5 = tmp0 ^ out4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_FD(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in0 ^ in5;
+ tmp1 = in1 ^ in7;
+ out2 = tmp0 ^ tmp1;
+ out6 = out2 ^ in2 ^ in4;
+ tmp2 = out6 ^ in0;
+ out1 = tmp2 ^ in3;
+ out0 = tmp0 ^ out1 ^ in6;
+ out5 = out0 ^ in2;
+ tmp3 = out5 ^ in1;
+ out3 = tmp3 ^ in6;
+ out7 = tmp2 ^ tmp3;
+ out4 = tmp1 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_FE(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ tmp0 = in0 ^ in2;
+ out2 = tmp0 ^ in5;
+ out3 = tmp0 ^ in4;
+ tmp1 = out3 ^ in6;
+ out4 = tmp1 ^ in5;
+ tmp2 = tmp1 ^ in1;
+ out6 = tmp2 ^ in7;
+ tmp3 = tmp2 ^ in0;
+ out0 = tmp3 ^ in3;
+ tmp4 = out0 ^ out4 ^ in7;
+ out5 = tmp4 ^ in6;
+ out7 = tmp4 ^ in2;
+ out1 = tmp3 ^ out5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void gf8_muladd_FF(uint8_t * out, uint8_t * in, unsigned int width)
+{
+ unsigned int i;
+ uint64_t * in_ptr = (uint64_t *)in;
+ uint64_t * out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < width; i++)
+ {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[width];
+ uint64_t in2 = out_ptr[width * 2];
+ uint64_t in3 = out_ptr[width * 3];
+ uint64_t in4 = out_ptr[width * 4];
+ uint64_t in5 = out_ptr[width * 5];
+ uint64_t in6 = out_ptr[width * 6];
+ uint64_t in7 = out_ptr[width * 7];
+
+ out2 = in0 ^ in5;
+ tmp0 = in4 ^ in7;
+ tmp1 = out2 ^ in2;
+ out4 = tmp1 ^ in6;
+ out7 = tmp1 ^ in1 ^ in3;
+ out1 = tmp0 ^ out7;
+ tmp2 = out1 ^ in5;
+ out6 = tmp2 ^ in3;
+ tmp3 = tmp2 ^ in7;
+ out0 = tmp3 ^ in6;
+ out3 = tmp3 ^ in1;
+ out5 = tmp0 ^ out0 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[width] = out1 ^ in_ptr[width];
+ out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
+ out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
+ out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
+ out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
+ out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
+ out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+void (* ec_gf_muladd[])(uint8_t * out, uint8_t * in, unsigned int width) =
+{
+ gf8_muladd_00, gf8_muladd_01, gf8_muladd_02, gf8_muladd_03,
+ gf8_muladd_04, gf8_muladd_05, gf8_muladd_06, gf8_muladd_07,
+ gf8_muladd_08, gf8_muladd_09, gf8_muladd_0A, gf8_muladd_0B,
+ gf8_muladd_0C, gf8_muladd_0D, gf8_muladd_0E, gf8_muladd_0F,
+ gf8_muladd_10, gf8_muladd_11, gf8_muladd_12, gf8_muladd_13,
+ gf8_muladd_14, gf8_muladd_15, gf8_muladd_16, gf8_muladd_17,
+ gf8_muladd_18, gf8_muladd_19, gf8_muladd_1A, gf8_muladd_1B,
+ gf8_muladd_1C, gf8_muladd_1D, gf8_muladd_1E, gf8_muladd_1F,
+ gf8_muladd_20, gf8_muladd_21, gf8_muladd_22, gf8_muladd_23,
+ gf8_muladd_24, gf8_muladd_25, gf8_muladd_26, gf8_muladd_27,
+ gf8_muladd_28, gf8_muladd_29, gf8_muladd_2A, gf8_muladd_2B,
+ gf8_muladd_2C, gf8_muladd_2D, gf8_muladd_2E, gf8_muladd_2F,
+ gf8_muladd_30, gf8_muladd_31, gf8_muladd_32, gf8_muladd_33,
+ gf8_muladd_34, gf8_muladd_35, gf8_muladd_36, gf8_muladd_37,
+ gf8_muladd_38, gf8_muladd_39, gf8_muladd_3A, gf8_muladd_3B,
+ gf8_muladd_3C, gf8_muladd_3D, gf8_muladd_3E, gf8_muladd_3F,
+ gf8_muladd_40, gf8_muladd_41, gf8_muladd_42, gf8_muladd_43,
+ gf8_muladd_44, gf8_muladd_45, gf8_muladd_46, gf8_muladd_47,
+ gf8_muladd_48, gf8_muladd_49, gf8_muladd_4A, gf8_muladd_4B,
+ gf8_muladd_4C, gf8_muladd_4D, gf8_muladd_4E, gf8_muladd_4F,
+ gf8_muladd_50, gf8_muladd_51, gf8_muladd_52, gf8_muladd_53,
+ gf8_muladd_54, gf8_muladd_55, gf8_muladd_56, gf8_muladd_57,
+ gf8_muladd_58, gf8_muladd_59, gf8_muladd_5A, gf8_muladd_5B,
+ gf8_muladd_5C, gf8_muladd_5D, gf8_muladd_5E, gf8_muladd_5F,
+ gf8_muladd_60, gf8_muladd_61, gf8_muladd_62, gf8_muladd_63,
+ gf8_muladd_64, gf8_muladd_65, gf8_muladd_66, gf8_muladd_67,
+ gf8_muladd_68, gf8_muladd_69, gf8_muladd_6A, gf8_muladd_6B,
+ gf8_muladd_6C, gf8_muladd_6D, gf8_muladd_6E, gf8_muladd_6F,
+ gf8_muladd_70, gf8_muladd_71, gf8_muladd_72, gf8_muladd_73,
+ gf8_muladd_74, gf8_muladd_75, gf8_muladd_76, gf8_muladd_77,
+ gf8_muladd_78, gf8_muladd_79, gf8_muladd_7A, gf8_muladd_7B,
+ gf8_muladd_7C, gf8_muladd_7D, gf8_muladd_7E, gf8_muladd_7F,
+ gf8_muladd_80, gf8_muladd_81, gf8_muladd_82, gf8_muladd_83,
+ gf8_muladd_84, gf8_muladd_85, gf8_muladd_86, gf8_muladd_87,
+ gf8_muladd_88, gf8_muladd_89, gf8_muladd_8A, gf8_muladd_8B,
+ gf8_muladd_8C, gf8_muladd_8D, gf8_muladd_8E, gf8_muladd_8F,
+ gf8_muladd_90, gf8_muladd_91, gf8_muladd_92, gf8_muladd_93,
+ gf8_muladd_94, gf8_muladd_95, gf8_muladd_96, gf8_muladd_97,
+ gf8_muladd_98, gf8_muladd_99, gf8_muladd_9A, gf8_muladd_9B,
+ gf8_muladd_9C, gf8_muladd_9D, gf8_muladd_9E, gf8_muladd_9F,
+ gf8_muladd_A0, gf8_muladd_A1, gf8_muladd_A2, gf8_muladd_A3,
+ gf8_muladd_A4, gf8_muladd_A5, gf8_muladd_A6, gf8_muladd_A7,
+ gf8_muladd_A8, gf8_muladd_A9, gf8_muladd_AA, gf8_muladd_AB,
+ gf8_muladd_AC, gf8_muladd_AD, gf8_muladd_AE, gf8_muladd_AF,
+ gf8_muladd_B0, gf8_muladd_B1, gf8_muladd_B2, gf8_muladd_B3,
+ gf8_muladd_B4, gf8_muladd_B5, gf8_muladd_B6, gf8_muladd_B7,
+ gf8_muladd_B8, gf8_muladd_B9, gf8_muladd_BA, gf8_muladd_BB,
+ gf8_muladd_BC, gf8_muladd_BD, gf8_muladd_BE, gf8_muladd_BF,
+ gf8_muladd_C0, gf8_muladd_C1, gf8_muladd_C2, gf8_muladd_C3,
+ gf8_muladd_C4, gf8_muladd_C5, gf8_muladd_C6, gf8_muladd_C7,
+ gf8_muladd_C8, gf8_muladd_C9, gf8_muladd_CA, gf8_muladd_CB,
+ gf8_muladd_CC, gf8_muladd_CD, gf8_muladd_CE, gf8_muladd_CF,
+ gf8_muladd_D0, gf8_muladd_D1, gf8_muladd_D2, gf8_muladd_D3,
+ gf8_muladd_D4, gf8_muladd_D5, gf8_muladd_D6, gf8_muladd_D7,
+ gf8_muladd_D8, gf8_muladd_D9, gf8_muladd_DA, gf8_muladd_DB,
+ gf8_muladd_DC, gf8_muladd_DD, gf8_muladd_DE, gf8_muladd_DF,
+ gf8_muladd_E0, gf8_muladd_E1, gf8_muladd_E2, gf8_muladd_E3,
+ gf8_muladd_E4, gf8_muladd_E5, gf8_muladd_E6, gf8_muladd_E7,
+ gf8_muladd_E8, gf8_muladd_E9, gf8_muladd_EA, gf8_muladd_EB,
+ gf8_muladd_EC, gf8_muladd_ED, gf8_muladd_EE, gf8_muladd_EF,
+ gf8_muladd_F0, gf8_muladd_F1, gf8_muladd_F2, gf8_muladd_F3,
+ gf8_muladd_F4, gf8_muladd_F5, gf8_muladd_F6, gf8_muladd_F7,
+ gf8_muladd_F8, gf8_muladd_F9, gf8_muladd_FA, gf8_muladd_FB,
+ gf8_muladd_FC, gf8_muladd_FD, gf8_muladd_FE, gf8_muladd_FF
+};
diff --git a/xlators/cluster/ec/src/ec-gf.h b/xlators/cluster/ec/src/ec-gf.h
new file mode 100644
index 00000000000..23bca91e3b5
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-gf.h
@@ -0,0 +1,23 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __EC_GF8_H__
+#define __EC_GF8_H__
+
+#define EC_GF_BITS 8
+#define EC_GF_MOD 0x11D
+
+#define EC_GF_SIZE (1 << EC_GF_BITS)
+#define EC_GF_WORD_SIZE sizeof(uint64_t)
+
+extern void (* ec_gf_muladd[])(uint8_t * out, uint8_t * in,
+ unsigned int width);
+
+#endif /* __EC_GF8_H__ */
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
new file mode 100644
index 00000000000..94ff4757b4d
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-heal.c
@@ -0,0 +1,2616 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+#include "compat-errno.h"
+
+#include "ec-helpers.h"
+#include "ec-common.h"
+#include "ec-combine.h"
+#include "ec-method.h"
+#include "ec-fops.h"
+
+#include "ec-mem-types.h"
+#include "ec-data.h"
+#include "byte-order.h"
+#include "ec-messages.h"
+#include "syncop.h"
+#include "syncop-utils.h"
+#include "cluster-syncop.h"
+
+#define alloca0(size) ({void *__ptr; __ptr = alloca(size); memset(__ptr, 0, size); __ptr; })
+#define EC_COUNT(array, max) ({int __i; int __res = 0; for (__i = 0; __i < max; __i++) if (array[__i]) __res++; __res; })
+#define EC_INTERSECT(dst, src1, src2, max) ({int __i; for (__i = 0; __i < max; __i++) dst[__i] = src1[__i] && src2[__i]; })
+#define EC_ADJUST_SOURCE(source, sources, max) ({int __i; if (sources[source] == 0) {source = -1; for (__i = 0; __i < max; __i++) if (sources[__i]) source = __i; } })
+#define IA_EQUAL(f, s, field) (memcmp (&(f.ia_##field), &(s.ia_##field), sizeof (s.ia_##field)) == 0)
+#define EC_REPLIES_ALLOC(replies, numsubvols) do { \
+ int __i = 0; \
+ replies = alloca0(numsubvols * sizeof (*replies)); \
+ for (__i = 0; __i < numsubvols; __i++) \
+ INIT_LIST_HEAD (&replies[__i].entries.list); \
+ } while (0)
+
+
+struct ec_name_data {
+ call_frame_t *frame;
+ unsigned char *participants;
+ unsigned char *failed_on;
+ unsigned char *gfidless;
+ unsigned char *enoent;
+ unsigned char *same;
+ char *name;
+ inode_t *parent;
+ default_args_cbk_t *replies;
+};
+
+static char *ec_ignore_xattrs[] = {
+ GF_SELINUX_XATTR_KEY,
+ QUOTA_SIZE_KEY,
+ NULL
+};
+
+static gf_boolean_t
+ec_ignorable_key_match (dict_t *dict, char *key, data_t *val, void *mdata)
+{
+ int i = 0;
+
+ if (!key)
+ goto out;
+
+ if (strncmp (key, EC_XATTR_PREFIX, strlen (EC_XATTR_PREFIX)) == 0)
+ return _gf_true;
+
+ for (i = 0; ec_ignore_xattrs[i]; i++) {
+ if (!strcmp (key, ec_ignore_xattrs[i]))
+ return _gf_true;
+ }
+
+out:
+ return _gf_false;
+}
+
+static gf_boolean_t
+ec_sh_key_match (dict_t *dict, char *key, data_t *val, void *mdata)
+{
+ return !ec_ignorable_key_match (dict, key, val, mdata);
+}
+/* FOP: heal */
+
+uintptr_t ec_heal_check(ec_fop_data_t * fop, uintptr_t * pgood)
+{
+ ec_cbk_data_t * cbk;
+ uintptr_t mask[2] = { 0, 0 };
+
+ list_for_each_entry(cbk, &fop->cbk_list, list)
+ {
+ mask[cbk->op_ret >= 0] |= cbk->mask;
+ }
+
+ if (pgood != NULL)
+ {
+ *pgood = mask[1];
+ }
+
+ return mask[0];
+}
+
+void ec_heal_update(ec_fop_data_t * fop, int32_t is_open)
+{
+ ec_heal_t * heal = fop->data;
+ uintptr_t good, bad;
+
+ bad = ec_heal_check(fop, &good);
+
+ LOCK(&heal->lock);
+
+ heal->bad &= ~bad;
+ if (is_open)
+ {
+ heal->open |= good;
+ }
+
+ UNLOCK(&heal->lock);
+
+ fop->error = 0;
+}
+
+void ec_heal_avoid(ec_fop_data_t * fop)
+{
+ ec_heal_t * heal = fop->data;
+ uintptr_t bad;
+
+ bad = ec_heal_check(fop, NULL);
+
+ LOCK(&heal->lock);
+
+ heal->good &= ~bad;
+
+ UNLOCK(&heal->lock);
+}
+
+int32_t ec_heal_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ ec_fop_data_t *fop = cookie;
+ ec_heal_t *heal = fop->data;
+
+ if (op_ret >= 0) {
+ GF_ASSERT(ec_set_inode_size(heal->fop, heal->fd->inode,
+ heal->total_size));
+ }
+
+ return 0;
+}
+
+void ec_heal_lock(ec_heal_t *heal, int32_t type, fd_t *fd, loc_t *loc,
+ off_t offset, size_t size)
+{
+ struct gf_flock flock;
+ fop_inodelk_cbk_t cbk = NULL;
+
+ flock.l_type = type;
+ flock.l_whence = SEEK_SET;
+ flock.l_start = offset;
+ flock.l_len = size;
+ flock.l_pid = 0;
+ flock.l_owner.len = 0;
+
+ if (type == F_UNLCK) {
+ /* Remove inode size information before unlocking it. */
+ if (fd == NULL) {
+ ec_clear_inode_info(heal->fop, heal->loc.inode);
+ } else {
+ ec_clear_inode_info(heal->fop, heal->fd->inode);
+ }
+ } else {
+ /* Otherwise use the callback to update size information. */
+ cbk = ec_heal_lock_cbk;
+ }
+
+ if (fd != NULL)
+ {
+ ec_finodelk(heal->fop->frame, heal->xl, heal->fop->mask,
+ EC_MINIMUM_ALL, cbk, heal, heal->xl->name, fd, F_SETLKW,
+ &flock, NULL);
+ }
+ else
+ {
+ ec_inodelk(heal->fop->frame, heal->xl, heal->fop->mask, EC_MINIMUM_ALL,
+ cbk, heal, heal->xl->name, loc, F_SETLKW, &flock, NULL);
+ }
+}
+
+void ec_heal_inodelk(ec_heal_t *heal, int32_t type, int32_t use_fd,
+ off_t offset, size_t size)
+{
+ ec_heal_lock(heal, type, use_fd ? heal->fd : NULL, &heal->loc, offset,
+ size);
+}
+
+int32_t
+ec_heal_xattr_clean (dict_t *dict, char *key, data_t *data,
+ void *arg)
+{
+ dict_t *base = arg;
+
+ if (ec_ignorable_key_match (NULL, key, NULL, NULL)) {
+ dict_del (dict, key);
+ return 0;
+ }
+
+ if (dict_get (base, key) != NULL)
+ dict_del (dict, key);
+
+ return 0;
+}
+
+int32_t
+ec_heal_writev_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ ec_fop_data_t *fop = cookie;
+ ec_heal_t *heal = fop->data;
+
+ ec_trace("WRITE_CBK", cookie, "ret=%d, errno=%d", op_ret, op_errno);
+
+ gf_msg_debug (fop->xl->name, 0, "%s: write op_ret %d, op_errno %s"
+ " at %"PRIu64, uuid_utoa (heal->fd->inode->gfid), op_ret,
+ strerror (op_errno), heal->offset);
+
+ ec_heal_update(cookie, 0);
+
+ return 0;
+}
+
+int32_t ec_heal_readv_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
+ int32_t op_ret, int32_t op_errno,
+ struct iovec * vector, int32_t count,
+ struct iatt * stbuf, struct iobref * iobref,
+ dict_t * xdata)
+{
+ ec_fop_data_t * fop = cookie;
+ ec_heal_t * heal = fop->data;
+
+ ec_trace("READ_CBK", fop, "ret=%d, errno=%d", op_ret, op_errno);
+
+ ec_heal_avoid(fop);
+
+ if (op_ret > 0)
+ {
+ gf_msg_debug (fop->xl->name, 0, "%s: read succeeded, proceeding "
+ "to write at %"PRIu64, uuid_utoa (heal->fd->inode->gfid),
+ heal->offset);
+ ec_writev(heal->fop->frame, heal->xl, heal->bad, EC_MINIMUM_ONE,
+ ec_heal_writev_cbk, heal, heal->fd, vector, count,
+ heal->offset, 0, iobref, NULL);
+ }
+ else
+ {
+ if (op_ret < 0) {
+ gf_msg_debug (fop->xl->name, 0, "%s: read failed %s, failing "
+ "to heal block at %"PRIu64,
+ uuid_utoa (heal->fd->inode->gfid), strerror (op_errno),
+ heal->offset);
+ heal->bad = 0;
+ }
+ heal->done = 1;
+ }
+
+ return 0;
+}
+
+void ec_heal_data_block(ec_heal_t *heal)
+{
+ ec_trace("DATA", heal->fop, "good=%lX, bad=%lX", heal->good, heal->bad);
+
+ if ((heal->good != 0) && (heal->bad != 0) &&
+ (heal->iatt.ia_type == IA_IFREG))
+ {
+ ec_readv(heal->fop->frame, heal->xl, heal->good, EC_MINIMUM_MIN,
+ ec_heal_readv_cbk, heal, heal->fd, heal->size, heal->offset,
+ 0, NULL);
+ }
+}
+
+/* FOP: fheal */
+
+void ec_fheal(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_fheal_cbk_t func, void * data, fd_t * fd,
+ int32_t partial, dict_t *xdata)
+{
+ ec_fd_t * ctx = ec_fd_get(fd, this);
+
+ if (ctx != NULL)
+ {
+ gf_msg_trace ("ec", 0, "FHEAL ctx: flags=%X, open=%lX", ctx->flags,
+ ctx->open);
+ ec_heal(frame, this, target, minimum, func, data, &ctx->loc, partial,
+ xdata);
+ }
+}
+
+/* Common heal code */
+void
+ec_mask_to_char_array (uintptr_t mask, unsigned char *array, int numsubvols)
+{
+ int i = 0;
+
+ for (i = 0; i < numsubvols; i++)
+ array[i] = ((mask >> i) & 1);
+}
+
+uintptr_t
+ec_char_array_to_mask (unsigned char *array, int numsubvols)
+{
+ int i = 0;
+ uintptr_t mask = 0;
+
+ for (i = 0; i < numsubvols; i++)
+ if (array[i])
+ mask |= (1ULL<<i);
+ return mask;
+}
+
+int
+ec_heal_entry_find_direction (ec_t *ec, default_args_cbk_t *replies,
+ uint64_t *versions, uint64_t *dirty,
+ unsigned char *sources, unsigned char *healed_sinks)
+{
+ uint64_t xattr[EC_VERSION_SIZE] = {0};
+ int source = -1;
+ uint64_t max_version = 0;
+ int ret = 0;
+ int i = 0;
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if (replies[i].op_ret == -1)
+ continue;
+
+ if (source == -1)
+ source = i;
+
+ ret = ec_dict_del_array (replies[i].xdata, EC_XATTR_VERSION,
+ xattr, EC_VERSION_SIZE);
+ if (ret == 0) {
+ versions[i] = xattr[EC_DATA_TXN];
+ if (max_version < versions[i]) {
+ max_version = versions[i];
+ source = i;
+ }
+ }
+
+ memset (xattr, 0, sizeof(xattr));
+ ret = ec_dict_del_array (replies[i].xdata, EC_XATTR_DIRTY,
+ xattr, EC_VERSION_SIZE);
+ if (ret == 0) {
+ dirty[i] = xattr[EC_DATA_TXN];
+ }
+ }
+
+ if (source < 0)
+ goto out;
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if (replies[i].op_ret == -1)
+ continue;
+
+ if (versions[i] == versions[source])
+ sources[i] = 1;
+ else
+ healed_sinks[i] = 1;
+ }
+
+out:
+ return source;
+}
+
+int
+ec_adjust_versions (call_frame_t *frame, ec_t *ec, ec_txn_t type,
+ inode_t *inode, int source, unsigned char *sources,
+ unsigned char *healed_sinks, uint64_t *versions,
+ uint64_t *dirty)
+{
+ int i = 0;
+ int ret = 0;
+ dict_t *xattr = NULL;
+ int op_ret = 0;
+ loc_t loc = {0};
+ gf_boolean_t erase_dirty = _gf_false;
+ uint64_t versions_xattr[2] = {0};
+ uint64_t dirty_xattr[2] = {0};
+ uint64_t allzero[2] = {0};
+
+ loc.inode = inode_ref (inode);
+ gf_uuid_copy (loc.gfid, inode->gfid);
+ xattr = dict_new ();
+ if (!xattr)
+ goto out;
+
+ /* dirty xattr represents if the file/dir needs heal. Unless all the
+ * copies are healed, don't erase it */
+ if (EC_COUNT (sources, ec->nodes) +
+ EC_COUNT (healed_sinks, ec->nodes) == ec->nodes)
+ erase_dirty = _gf_true;
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (!sources[i] && !healed_sinks[i])
+ continue;
+ versions_xattr[type] = hton64(versions[source] - versions[i]);
+ ret = dict_set_static_bin (xattr, EC_XATTR_VERSION,
+ versions_xattr,
+ sizeof (versions_xattr));
+ if (ret < 0) {
+ op_ret = -ENOTCONN;
+ continue;
+ }
+
+ if (erase_dirty) {
+ dirty_xattr[type] = hton64(-dirty[i]);
+ ret = dict_set_static_bin (xattr, EC_XATTR_DIRTY,
+ dirty_xattr,
+ sizeof (dirty_xattr));
+ if (ret < 0) {
+ op_ret = -ENOTCONN;
+ continue;
+ }
+ }
+
+ if ((memcmp (versions_xattr, allzero, sizeof (allzero)) == 0) &&
+ (memcmp (dirty_xattr, allzero, sizeof (allzero)) == 0))
+ continue;
+
+ ret = syncop_xattrop (ec->xl_list[i], &loc,
+ GF_XATTROP_ADD_ARRAY64, xattr, NULL,
+ NULL);
+ if (ret < 0) {
+ op_ret = -ret;
+ continue;
+ }
+ }
+
+out:
+ if (xattr)
+ dict_unref (xattr);
+ loc_wipe (&loc);
+ return op_ret;
+}
+int
+ec_heal_metadata_find_direction (ec_t *ec, default_args_cbk_t *replies,
+ uint64_t *versions, uint64_t *dirty,
+ unsigned char *sources, unsigned char *healed_sinks)
+{
+ uint64_t xattr[EC_VERSION_SIZE] = {0};
+ uint64_t max_version = 0;
+ int same_count = 0;
+ int max_same_count = 0;
+ int same_source = -1;
+ int ret = 0;
+ int i = 0;
+ int j = 0;
+ int *groups = NULL;
+ struct iatt source_ia = {0};
+ struct iatt child_ia = {0};
+
+ groups = alloca0 (ec->nodes * sizeof(*groups));
+ for (i = 0; i < ec->nodes; i++)
+ groups[i] = -1;
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (!replies[i].valid)
+ continue;
+ if (replies[i].op_ret < 0)
+ continue;
+ ret = ec_dict_del_array (replies[i].xdata, EC_XATTR_VERSION,
+ xattr, EC_VERSION_SIZE);
+ if (ret == 0) {
+ versions[i] = xattr[EC_METADATA_TXN];
+ }
+
+ memset (xattr, 0, sizeof (xattr));
+ ret = ec_dict_del_array (replies[i].xdata, EC_XATTR_DIRTY,
+ xattr, EC_VERSION_SIZE);
+ if (ret == 0) {
+ dirty[i] = xattr[EC_METADATA_TXN];
+ }
+ if (groups[i] >= 0) /*Already part of group*/
+ continue;
+ groups[i] = i;
+ same_count = 1;
+ source_ia = replies[i].stat;
+ for (j = i + 1; j < ec->nodes; j++) {
+ if (!replies[j].valid || replies[j].op_ret < 0)
+ continue;
+ child_ia = replies[j].stat;
+ if (!IA_EQUAL(source_ia, child_ia, gfid) ||
+ !IA_EQUAL(source_ia, child_ia, type) ||
+ !IA_EQUAL(source_ia, child_ia, prot) ||
+ !IA_EQUAL(source_ia, child_ia, uid) ||
+ !IA_EQUAL(source_ia, child_ia, gid))
+ continue;
+ if (!are_dicts_equal(replies[i].xdata, replies[j].xdata,
+ ec_sh_key_match, NULL))
+ continue;
+ groups[j] = i; /*If iatts match put them into a group*/
+ same_count++;
+ }
+
+ if (max_same_count < same_count) {
+ max_same_count = same_count;
+ same_source = i;
+ }
+ }
+
+ if (max_same_count < ec->fragments) {
+ ret = -EIO;
+ goto out;
+ }
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (groups[i] == groups[same_source])
+ sources[i] = 1;
+ else if (replies[i].valid && replies[i].op_ret >= 0)
+ healed_sinks[i] = 1;
+ }
+ for (i = 0; i < ec->nodes; i++) {
+ if (sources[i] && (versions[i] > max_version)) {
+ same_source = i;
+ max_version = versions[i];
+ }
+ }
+ ret = same_source;
+out:
+ return ret;
+}
+
+
+int
+__ec_heal_metadata_prepare (call_frame_t *frame, ec_t *ec, inode_t *inode,
+ unsigned char *locked_on, default_args_cbk_t *replies,
+ uint64_t *versions, uint64_t *dirty, unsigned char *sources,
+ unsigned char *healed_sinks)
+{
+ loc_t loc = {0};
+ unsigned char *output = NULL;
+ unsigned char *lookup_on = NULL;
+ int ret = 0;
+ int source = 0;
+ default_args_cbk_t *greplies = NULL;
+ int i = 0;
+ EC_REPLIES_ALLOC (greplies, ec->nodes);
+
+ loc.inode = inode_ref (inode);
+ gf_uuid_copy (loc.gfid, inode->gfid);
+ output = alloca0 (ec->nodes);
+ lookup_on = alloca0 (ec->nodes);
+ ret = cluster_lookup (ec->xl_list, locked_on, ec->nodes, replies,
+ output, frame, ec->xl, &loc, NULL);
+ if (ret <= ec->fragments) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+
+ memcpy (lookup_on, output, ec->nodes);
+ /*Use getxattr to get the filtered xattrs which filter internal xattrs*/
+ ret = cluster_getxattr (ec->xl_list, lookup_on, ec->nodes, greplies,
+ output, frame, ec->xl, &loc, NULL, NULL);
+ for (i = 0; i < ec->nodes; i++) {
+ if (lookup_on[i] && !output[i]) {
+ replies[i].valid = 0;
+ continue;
+ }
+ if (replies[i].xdata) {
+ dict_unref (replies[i].xdata);
+ replies[i].xdata = NULL;
+ if (greplies[i].xattr)
+ replies[i].xdata = dict_ref (greplies[i].xattr);
+ }
+ }
+
+ source = ec_heal_metadata_find_direction (ec, replies, versions,
+ dirty, sources, healed_sinks);
+ if (source < 0) {
+ ret = -EIO;
+ goto out;
+ }
+ ret = source;
+out:
+ cluster_replies_wipe (greplies, ec->nodes);
+ loc_wipe (&loc);
+ return ret;
+}
+
+/* Metadata heal */
+int
+__ec_removexattr_sinks (call_frame_t *frame, ec_t *ec, inode_t *inode,
+ int source, unsigned char *sources,
+ unsigned char *healed_sinks,
+ default_args_cbk_t *replies)
+{
+ int i = 0;
+ int ret = 0;
+ loc_t loc = {0};
+
+ loc.inode = inode_ref (inode);
+ gf_uuid_copy (loc.gfid, inode->gfid);
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (i == source)
+ continue;
+ if (!sources[i] && !healed_sinks[i])
+ continue;
+ ret = dict_foreach (replies[i].xdata, ec_heal_xattr_clean,
+ replies[source].xdata);
+ if (ret < 0) {
+ sources[i] = 0;
+ healed_sinks[i] = 0;
+ continue;
+ }
+
+ if (replies[i].xdata->count == 0) {
+ continue;
+ } else if (sources[i]) {
+ /* This can happen if setxattr/removexattr succeeds on
+ * the bricks but fails to update the version. This
+ * will make sure that the xattrs are made equal after
+ * heal*/
+ sources[i] = 0;
+ healed_sinks[i] = 1;
+ }
+
+ ret = syncop_removexattr (ec->xl_list[i], &loc, "",
+ replies[i].xdata, NULL);
+ if (ret < 0)
+ healed_sinks[i] = 0;
+ }
+
+ loc_wipe (&loc);
+ if (EC_COUNT (healed_sinks, ec->nodes) == 0)
+ return -ENOTCONN;
+ return 0;
+}
+
+int
+__ec_heal_metadata (call_frame_t *frame, ec_t *ec, inode_t *inode,
+ unsigned char *locked_on, unsigned char *sources,
+ unsigned char *healed_sinks)
+{
+ loc_t loc = {0};
+ int ret = 0;
+ int source = 0;
+ default_args_cbk_t *replies = NULL;
+ default_args_cbk_t *sreplies = NULL;
+ uint64_t *versions = NULL;
+ uint64_t *dirty = NULL;
+ unsigned char *output = NULL;
+ dict_t *source_dict = NULL;
+ struct iatt source_buf = {0};
+
+ EC_REPLIES_ALLOC (replies, ec->nodes);
+ EC_REPLIES_ALLOC (sreplies, ec->nodes);
+
+ loc.inode = inode_ref (inode);
+ gf_uuid_copy (loc.gfid, inode->gfid);
+ output = alloca0 (ec->nodes);
+ versions = alloca0 (ec->nodes * sizeof (*versions));
+ dirty = alloca0 (ec->nodes * sizeof (*dirty));
+ source = __ec_heal_metadata_prepare (frame, ec, inode, locked_on, replies,
+ versions, dirty, sources, healed_sinks);
+ if (source < 0) {
+ ret = -EIO;
+ goto out;
+ }
+
+ if (EC_COUNT (sources, ec->nodes) == ec->nodes) {
+ ret = 0;
+ goto erase_dirty;
+ }
+
+ if (EC_COUNT (healed_sinks, ec->nodes) == 0) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+ source_buf = replies[source].stat;
+ ret = cluster_setattr (ec->xl_list, healed_sinks, ec->nodes, sreplies,
+ output, frame, ec->xl, &loc,
+ &source_buf, GF_SET_ATTR_MODE |
+ GF_SET_ATTR_UID | GF_SET_ATTR_GID, NULL);
+ /*In case the operation fails on some of the subvols*/
+ memcpy (healed_sinks, output, ec->nodes);
+ if (EC_COUNT (healed_sinks, ec->nodes) == 0) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+
+ ret = __ec_removexattr_sinks (frame, ec, inode, source, sources,
+ healed_sinks, replies);
+ if (ret < 0)
+ goto out;
+
+ source_dict = dict_ref (replies[source].xdata);
+ if (dict_foreach_match (source_dict, ec_ignorable_key_match, NULL,
+ dict_remove_foreach_fn, NULL) == -1) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = cluster_setxattr (ec->xl_list, healed_sinks, ec->nodes,
+ replies, output, frame, ec->xl, &loc,
+ source_dict, 0, NULL);
+
+ EC_INTERSECT (healed_sinks, healed_sinks, output, ec->nodes);
+ if (EC_COUNT (healed_sinks, ec->nodes) == 0) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+
+erase_dirty:
+ ret = ec_adjust_versions (frame, ec, EC_METADATA_TXN, inode, source,
+ sources, healed_sinks, versions, dirty);
+out:
+ if (source_dict)
+ dict_unref (source_dict);
+
+ loc_wipe (&loc);
+ cluster_replies_wipe (replies, ec->nodes);
+ cluster_replies_wipe (sreplies, ec->nodes);
+ return ret;
+}
+
+int
+ec_heal_metadata (call_frame_t *frame, ec_t *ec, inode_t *inode,
+ unsigned char *sources, unsigned char *healed_sinks)
+{
+ unsigned char *locked_on = NULL;
+ unsigned char *up_subvols = NULL;
+ unsigned char *output = NULL;
+ int ret = 0;
+ default_args_cbk_t *replies = NULL;
+
+ EC_REPLIES_ALLOC (replies, ec->nodes);
+ locked_on = alloca0(ec->nodes);
+ output = alloca0(ec->nodes);
+ up_subvols = alloca0(ec->nodes);
+ ec_mask_to_char_array (ec->xl_up, up_subvols, ec->nodes);
+ ret = cluster_inodelk (ec->xl_list, up_subvols, ec->nodes, replies,
+ locked_on, frame, ec->xl, ec->xl->name, inode, 0,
+ 0);
+ {
+ if (ret <= ec->fragments) {
+ gf_msg_debug (ec->xl->name, 0, "%s: Skipping heal "
+ "as only %d number of subvolumes could "
+ "be locked", uuid_utoa (inode->gfid), ret);
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+ ret = __ec_heal_metadata (frame, ec, inode, locked_on, sources,
+ healed_sinks);
+ }
+unlock:
+ cluster_uninodelk (ec->xl_list, locked_on, ec->nodes, replies, output,
+ frame, ec->xl, ec->xl->name, inode, 0, 0);
+ cluster_replies_wipe (replies, ec->nodes);
+ return ret;
+}
+
+/*entry heal*/
+int
+__ec_heal_entry_prepare (call_frame_t *frame, ec_t *ec, inode_t *inode,
+ unsigned char *locked_on, uint64_t *versions,
+ uint64_t *dirty, unsigned char *sources,
+ unsigned char *healed_sinks)
+{
+ loc_t loc = {0};
+ int source = 0;
+ int ret = 0;
+ default_args_cbk_t *replies = NULL;
+ unsigned char *output = NULL;
+ dict_t *xdata = NULL;
+
+ EC_REPLIES_ALLOC (replies, ec->nodes);
+
+ loc.inode = inode_ref (inode);
+ gf_uuid_copy (loc.gfid, inode->gfid);
+ xdata = dict_new ();
+ if (!xdata) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (dict_set_uint64(xdata, EC_XATTR_VERSION, 0) ||
+ dict_set_uint64(xdata, EC_XATTR_DIRTY, 0)) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ output = alloca0 (ec->nodes);
+ ret = cluster_lookup (ec->xl_list, locked_on, ec->nodes, replies,
+ output, frame, ec->xl, &loc, xdata);
+ if (ret <= ec->fragments) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+
+ source = ec_heal_entry_find_direction (ec, replies, versions,
+ dirty, sources, healed_sinks);
+ if (source < 0) {
+ ret = -EIO;
+ goto out;
+ }
+ ret = source;
+out:
+ if (xdata)
+ dict_unref (xdata);
+ loc_wipe (&loc);
+ cluster_replies_wipe (replies, ec->nodes);
+ return ret;
+}
+int32_t
+ec_set_new_entry_dirty (ec_t *ec, loc_t *loc, struct iatt *ia,
+ call_frame_t *frame, xlator_t *this, unsigned char *on)
+{
+ dict_t *xattr = NULL;
+ int32_t ret = -1;
+ default_args_cbk_t *replies = NULL;
+ unsigned char *output = NULL;
+ uint64_t dirty[EC_VERSION_SIZE] = {1, 1};
+ loc_t newloc = {0};
+
+ /*Symlinks don't have any data to be healed*/
+ if (ia->ia_type == IA_IFLNK)
+ dirty[EC_DATA_TXN] = 0;
+
+ newloc.inode = inode_ref (loc->inode);
+ gf_uuid_copy (newloc.gfid, ia->ia_gfid);
+ EC_REPLIES_ALLOC (replies, ec->nodes);
+ output = alloca0 (ec->nodes);
+ xattr = dict_new();
+ if (!xattr) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = ec_dict_set_array (xattr, EC_XATTR_DIRTY, dirty,
+ EC_VERSION_SIZE);
+ if (ret)
+ goto out;
+
+ ret = cluster_xattrop (ec->xl_list, on, ec->nodes, replies, output,
+ frame, ec->xl, &newloc,
+ GF_XATTROP_ADD_ARRAY64, xattr, NULL);
+
+ if (ret < ec->fragments) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+out:
+ if (xattr)
+ dict_unref (xattr);
+ cluster_replies_wipe (replies, ec->nodes);
+ loc_wipe (&newloc);
+ return ret;
+}
+
+/*Name heal*/
+int
+ec_delete_stale_name (dict_t *gfid_db, char *key, data_t *d, void *data)
+{
+ struct ec_name_data *name_data = data;
+ struct iatt *ia = NULL;
+ ec_t *ec = NULL;
+ loc_t loc = {0};
+ unsigned char *same = data_to_bin (d);
+ default_args_cbk_t *replies = NULL;
+ unsigned char *output = NULL;
+ int ret = 0;
+ int estale_count = 0;
+ int i = 0;
+ call_frame_t *frame = name_data->frame;
+
+ ec = name_data->frame->this->private;
+ EC_REPLIES_ALLOC (replies, ec->nodes);
+ if (EC_COUNT (same, ec->nodes) >= ec->fragments) {
+ ret = 0;
+ goto out;
+ }
+
+ loc.inode = inode_new (name_data->parent->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ gf_uuid_parse (key, loc.gfid);
+ output = alloca0(ec->nodes);
+ ret = cluster_lookup (ec->xl_list, name_data->participants, ec->nodes,
+ replies, output, name_data->frame, ec->xl, &loc,
+ NULL);
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (!replies[i].valid)
+ continue;
+ if (replies[i].op_ret == -1) {
+ if (replies[i].op_errno == ESTALE ||
+ replies[i].op_errno == ENOENT)
+ estale_count++;
+ else
+ name_data->participants[i] = 0;
+ }
+ }
+
+ if (estale_count <= ec->redundancy) {
+ /* We have at least ec->fragments number of fragments, so the
+ * file is recoverable, so don't delete it*/
+
+ /* Please note that the lookup call above could fail with
+ * ENOTCONN on all subvoumes and still this branch will be
+ * true, but in those cases conservatively we decide to not
+ * delete the file until we are sure*/
+ ret = 0;
+ goto out;
+ }
+
+ /*Noway to recover, delete the name*/
+ loc_wipe (&loc);
+ loc.parent = inode_ref (name_data->parent);
+ gf_uuid_copy (loc.pargfid, loc.parent->gfid);
+ loc.name = name_data->name;
+ for (i = 0; i < ec->nodes; i++) {
+ if (same[i] && replies[i].valid && (replies[i].op_ret == 0)) {
+ ia = &replies[i].stat;
+ break;
+ }
+ }
+
+ if (!ia) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+
+ if (IA_ISDIR (ia->ia_type)) {
+ ret = cluster_rmdir (ec->xl_list, same, ec->nodes, replies,
+ output, frame, ec->xl, &loc, 1, NULL);
+ } else {
+ ret = cluster_unlink (ec->xl_list, same, ec->nodes, replies,
+ output, frame, ec->xl, &loc, 0, NULL);
+ }
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (output[i]) {
+ same[i] = 0;
+ name_data->enoent[i] = 1;
+ } else {
+ /*op failed*/
+ if (same[i])
+ name_data->participants[i] = 0;
+ }
+ }
+ ret = 0;
+ /*This will help in making decisions about creating names*/
+ dict_del (gfid_db, key);
+out:
+ if (ret < 0) {
+ gf_msg_debug (ec->xl->name, 0, "%s/%s: heal failed %s",
+ uuid_utoa (name_data->parent->gfid), name_data->name,
+ strerror (-ret));
+ }
+ cluster_replies_wipe (replies, ec->nodes);
+ loc_wipe (&loc);
+ return ret;
+}
+
+int
+ec_delete_stale_names (call_frame_t *frame, ec_t *ec, inode_t *parent,
+ char *name, default_args_cbk_t *replies, dict_t *gfid_db,
+ unsigned char *enoent, unsigned char *gfidless,
+ unsigned char *participants)
+{
+ struct ec_name_data name_data = {0};
+
+ name_data.enoent = enoent;
+ name_data.gfidless = gfidless;
+ name_data.participants = participants;
+ name_data.name = name;
+ name_data.parent = parent;
+ name_data.frame = frame;
+ name_data.replies = replies;
+ return dict_foreach (gfid_db, ec_delete_stale_name, &name_data);
+}
+
+int
+_assign_same (dict_t *dict, char *key, data_t *value, void *data)
+{
+ struct ec_name_data *name_data = data;
+
+ name_data->same = data_to_bin (value);
+ return 0;
+}
+
+int
+ec_create_name (call_frame_t *frame, ec_t *ec, inode_t *parent, char *name,
+ default_args_cbk_t *lookup_replies, dict_t *gfid_db,
+ unsigned char *enoent, unsigned char *participants)
+{
+ int ret = 0;
+ int i = 0;
+ struct ec_name_data name_data = {0};
+ struct iatt *ia = NULL;
+ unsigned char *output = 0;
+ unsigned char *output1 = 0;
+ unsigned char *on = NULL;
+ default_args_cbk_t *replies = NULL;
+ loc_t loc = {0};
+ loc_t srcloc = {0};
+ unsigned char *link = NULL;
+ unsigned char *create = NULL;
+ dict_t *xdata = NULL;
+ char *linkname = NULL;
+ ec_config_t config;
+ /* There should be just one gfid key */
+ EC_REPLIES_ALLOC (replies, ec->nodes);
+ if (gfid_db->count != 1) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = dict_foreach (gfid_db, _assign_same, &name_data);
+ if (ret < 0)
+ goto out;
+ /*There should at least be one valid success reply with gfid*/
+ for (i = 0; i < ec->nodes; i++)
+ if (name_data.same[i])
+ break;
+
+ if (i == ec->nodes) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ia = &lookup_replies[i].stat;
+ xdata = dict_new ();
+ loc.parent = inode_ref (parent);
+ gf_uuid_copy (loc.pargfid, parent->gfid);
+ loc.inode = inode_new (parent->table);
+ if (loc.inode)
+ srcloc.inode = inode_ref (loc.inode);
+ gf_uuid_copy (srcloc.gfid, ia->ia_gfid);
+ if (!loc.inode || !xdata || dict_set_static_bin (xdata, "gfid-req",
+ ia->ia_gfid,
+ sizeof (ia->ia_gfid))) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ loc.name = name;
+ link = alloca0 (ec->nodes);
+ create = alloca0 (ec->nodes);
+ on = alloca0 (ec->nodes);
+ output = alloca0 (ec->nodes);
+ output1 = alloca0 (ec->nodes);
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (!lookup_replies[i].valid)
+ continue;
+ if (lookup_replies[i].op_ret)
+ continue;
+ on[i] = 1;
+ }
+ switch (ia->ia_type) {
+ case IA_IFDIR:
+ ec_set_new_entry_dirty (ec, &loc, ia, frame, ec->xl, on);
+ ret = cluster_mkdir (ec->xl_list, enoent, ec->nodes,
+ replies, output, frame, ec->xl, &loc,
+ st_mode_from_ia (ia->ia_prot,
+ ia->ia_type), 0, xdata);
+ break;
+
+ case IA_IFLNK:
+ /*Check for hard links and create/link*/
+ ret = cluster_lookup (ec->xl_list, enoent, ec->nodes,
+ replies, output, frame, ec->xl,
+ &srcloc, NULL);
+ for (i = 0; i < ec->nodes; i++) {
+ if (output[i]) {
+ link[i] = 1;
+ } else {
+ if (replies[i].op_errno == ENOENT ||
+ replies[i].op_errno == ESTALE) {
+ create[i] = 1;
+ }
+ }
+ }
+
+ if (EC_COUNT (link, ec->nodes)) {
+ cluster_link (ec->xl_list, link, ec->nodes,
+ replies, output1, frame, ec->xl,
+ &srcloc, &loc, NULL);
+ }
+
+ if (EC_COUNT (create, ec->nodes)) {
+ cluster_readlink (ec->xl_list, name_data.same,
+ ec->nodes, replies, output,
+ frame, ec->xl, &srcloc, 4096,
+ NULL);
+ if (EC_COUNT (output, ec->nodes) == 0) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (output[i])
+ break;
+ }
+ linkname = alloca0 (strlen(replies[i].buf) + 1);
+ strcpy (linkname, replies[i].buf);
+ ec_set_new_entry_dirty (ec, &loc, ia, frame,
+ ec->xl, on);
+ cluster_symlink (ec->xl_list, create, ec->nodes,
+ replies, output, frame, ec->xl,
+ linkname, &loc, 0, xdata);
+ }
+ for (i = 0; i < ec->nodes; i++)
+ if (output1[i])
+ output[i] = 1;
+ break;
+ case IA_IFREG:
+ ec_set_new_entry_dirty (ec, &loc, ia,
+ frame, ec->xl, on);
+ config.version = EC_CONFIG_VERSION;
+ config.algorithm = EC_CONFIG_ALGORITHM;
+ config.gf_word_size = EC_GF_BITS;
+ config.bricks = ec->nodes;
+ config.redundancy = ec->redundancy;
+ config.chunk_size = EC_METHOD_CHUNK_SIZE;
+
+ ret = ec_dict_set_config(xdata, EC_XATTR_CONFIG, &config);
+ if (ret != 0) {
+ goto out;
+ }
+ default:
+ ret = dict_set_int32 (xdata, GLUSTERFS_INTERNAL_FOP_KEY,
+ 1);
+ if (ret)
+ goto out;
+ ret = cluster_mknod (ec->xl_list, enoent, ec->nodes,
+ replies, output, frame, ec->xl,
+ &loc, st_mode_from_ia (ia->ia_prot,
+ ia->ia_type),
+ ia->ia_rdev, 0, xdata);
+ break;
+ }
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (enoent[i] && !output[i])
+ participants[i] = 0;
+ }
+
+ ret = 0;
+out:
+ if (ret < 0)
+ gf_msg_debug (ec->xl->name, 0, "%s/%s: heal failed %s",
+ uuid_utoa (parent->gfid), name, strerror (-ret));
+ cluster_replies_wipe (replies, ec->nodes);
+ loc_wipe (&loc);
+ loc_wipe (&srcloc);
+ if (xdata)
+ dict_unref (xdata);
+ return ret;
+}
+
+int
+__ec_heal_name (call_frame_t *frame, ec_t *ec, inode_t *parent, char *name,
+ unsigned char *participants)
+{
+ unsigned char *output = NULL;
+ unsigned char *enoent = NULL;
+ default_args_cbk_t *replies = NULL;
+ dict_t *xdata = NULL;
+ dict_t *gfid_db = NULL;
+ int ret = 0;
+ loc_t loc = {0};
+ int i = 0;
+ struct iatt *ia = NULL;
+ char gfid[64] = {0};
+ unsigned char *same = NULL;
+ unsigned char *gfidless = NULL;
+
+ EC_REPLIES_ALLOC (replies, ec->nodes);
+ loc.parent = inode_ref (parent);
+ loc.inode = inode_new (parent->table);
+ gf_uuid_copy (loc.pargfid, parent->gfid);
+ loc.name = name;
+ xdata = dict_new ();
+ gfid_db = dict_new ();
+ if (!xdata || !gfid_db || !loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = dict_set_int32 (xdata, GF_GFIDLESS_LOOKUP, 1);
+ if (ret) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ output = alloca0 (ec->nodes);
+ gfidless = alloca0 (ec->nodes);
+ enoent = alloca0 (ec->nodes);
+ ret = cluster_lookup (ec->xl_list, participants, ec->nodes, replies,
+ output, frame, ec->xl, &loc, NULL);
+ for (i = 0; i < ec->nodes; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if (replies[i].op_ret == -1) {
+ /*If ESTALE comes here, that means parent dir is not
+ * present, nothing to do there, so reset participants
+ * for that brick*/
+ if (replies[i].op_errno == ENOENT)
+ enoent[i] = 1;
+ else
+ participants[i] = 0;
+ continue;
+ }
+ ia = &replies[i].stat;
+ if (gf_uuid_is_null (ia->ia_gfid)) {
+ if (IA_ISDIR (ia->ia_type) || ia->ia_size == 0)
+ gfidless[i] = 1;
+ else
+ participants[i] = 0;
+ } else {
+ uuid_utoa_r (ia->ia_gfid, gfid);
+ ret = dict_get_bin (gfid_db, gfid, (void **)&same);
+ if (ret < 0) {
+ same = alloca0(ec->nodes);
+ }
+ same[i] = 1;
+ if (ret < 0) {
+ ret = dict_set_static_bin (gfid_db, gfid, same,
+ ec->nodes);
+ }
+ if (ret < 0)
+ goto out;
+ }
+ }
+
+ ret = ec_delete_stale_names (frame, ec, parent, name, replies, gfid_db,
+ enoent, gfidless, participants);
+
+ if (gfid_db->count == 0) {
+ /* All entries seem to be stale entries and deleted,
+ * nothing more to do.*/
+ goto out;
+ }
+
+ if (gfid_db->count > 1) {
+ gf_msg (ec->xl->name, GF_LOG_INFO, 0,
+ EC_MSG_HEAL_FAIL, "%s/%s: Not able to heal",
+ uuid_utoa (parent->gfid), name);
+ memset (participants, 0, ec->nodes);
+ goto out;
+ }
+
+ EC_INTERSECT (enoent, enoent, participants, ec->nodes);
+ if (EC_COUNT (enoent, ec->nodes) == 0) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = ec_create_name (frame, ec, parent, name, replies, gfid_db, enoent,
+ participants);
+out:
+ cluster_replies_wipe (replies, ec->nodes);
+ loc_wipe (&loc);
+ if (xdata)
+ dict_unref (xdata);
+ if (gfid_db)
+ dict_unref (gfid_db);
+ return ret;
+}
+
+int
+ec_heal_name (call_frame_t *frame, ec_t *ec, inode_t *parent, char *name,
+ unsigned char *participants)
+{
+ int ret = 0;
+ default_args_cbk_t *replies = NULL;
+ unsigned char *output = NULL;
+ unsigned char *locked_on = NULL;
+ loc_t loc = {0};
+
+ loc.parent = inode_ref (parent);
+ loc.name = name;
+ loc.inode = inode_new (parent->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ EC_REPLIES_ALLOC (replies, ec->nodes);
+ output = alloca0 (ec->nodes);
+ locked_on = alloca0 (ec->nodes);
+ ret = cluster_inodelk (ec->xl_list, participants, ec->nodes, replies,
+ locked_on, frame, ec->xl, ec->xl->name, parent,
+ 0, 0);
+ {
+ if (ret <= ec->fragments) {
+ gf_msg_debug (ec->xl->name, 0, "%s/%s: Skipping "
+ "heal as only %d number of subvolumes could "
+ "be locked", uuid_utoa (parent->gfid), name,
+ ret);
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+ EC_INTERSECT (participants, participants, locked_on, ec->nodes);
+ ret = __ec_heal_name (frame, ec, parent, name, participants);
+ }
+unlock:
+ cluster_uninodelk (ec->xl_list, locked_on, ec->nodes, replies, output,
+ frame, ec->xl, ec->xl->name, parent, 0, 0);
+out:
+ cluster_replies_wipe (replies, ec->nodes);
+ loc_wipe (&loc);
+ return ret;
+}
+
+int
+ec_name_heal_handler (xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data)
+{
+ struct ec_name_data *name_data = data;
+ xlator_t *this = THIS;
+ ec_t *ec = this->private;
+ unsigned char *name_on = alloca0 (ec->nodes);
+ int i = 0;
+ int ret = 0;
+
+ memcpy (name_on, name_data->participants, ec->nodes);
+ ret = ec_heal_name (name_data->frame, ec, parent->inode,
+ entry->d_name, name_on);
+
+ if (ret < 0)
+ memset (name_on, 0, ec->nodes);
+
+ for (i = 0; i < ec->nodes; i++)
+ if (name_data->participants[i] && !name_on[i])
+ name_data->failed_on[i] = 1;
+ return 0;
+}
+
+int
+ec_heal_names (call_frame_t *frame, ec_t *ec, inode_t *inode,
+ unsigned char *participants)
+{
+ int i = 0;
+ int j = 0;
+ loc_t loc = {0};
+ struct ec_name_data name_data = {0};
+
+ loc.inode = inode_ref (inode);
+ gf_uuid_copy (loc.gfid, inode->gfid);
+ name_data.frame = frame;
+ name_data.participants = participants;
+ name_data.failed_on = alloca0(ec->nodes);;
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (!participants[i])
+ continue;
+ syncop_dir_scan (ec->xl_list[i], &loc,
+ GF_CLIENT_PID_SELF_HEALD, &name_data,
+ ec_name_heal_handler);
+ for (j = 0; j < ec->nodes; j++)
+ if (name_data.failed_on[j])
+ participants[j] = 0;
+
+ if (EC_COUNT (participants, ec->nodes) <= ec->fragments)
+ return -ENOTCONN;
+ }
+ loc_wipe (&loc);
+ return 0;
+}
+
+int
+__ec_heal_entry (call_frame_t *frame, ec_t *ec, inode_t *inode,
+ unsigned char *heal_on, unsigned char *sources,
+ unsigned char *healed_sinks)
+{
+ unsigned char *locked_on = NULL;
+ unsigned char *output = NULL;
+ uint64_t *versions = NULL;
+ uint64_t *dirty = NULL;
+ unsigned char *participants = NULL;
+ default_args_cbk_t *replies = NULL;
+ int ret = 0;
+ int source = 0;
+ int i = 0;
+
+ locked_on = alloca0(ec->nodes);
+ output = alloca0(ec->nodes);
+ versions = alloca0 (ec->nodes * sizeof (*versions));
+ dirty = alloca0 (ec->nodes * sizeof (*dirty));
+
+ EC_REPLIES_ALLOC (replies, ec->nodes);
+ ret = cluster_inodelk (ec->xl_list, heal_on, ec->nodes, replies,
+ locked_on, frame, ec->xl, ec->xl->name, inode,
+ 0, 0);
+ {
+ if (ret <= ec->fragments) {
+ gf_msg_debug (ec->xl->name, 0, "%s: Skipping heal "
+ "as only %d number of subvolumes could "
+ "be locked", uuid_utoa (inode->gfid), ret);
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+ ret = __ec_heal_entry_prepare (frame, ec, inode, locked_on,
+ versions, dirty, sources,
+ healed_sinks);
+ source = ret;
+ }
+unlock:
+ cluster_uninodelk (ec->xl_list, locked_on, ec->nodes, replies, output,
+ frame, ec->xl, ec->xl->name, inode, 0, 0);
+ if (ret < 0)
+ goto out;
+
+ participants = alloca0 (ec->nodes);
+ for (i = 0; i < ec->nodes; i++) {
+ if (sources[i] || healed_sinks[i])
+ participants[i] = 1;
+ }
+ ret = ec_heal_names (frame, ec, inode, participants);
+
+ if (EC_COUNT (participants, ec->nodes) <= ec->fragments)
+ goto out;
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (!participants[i]) {
+ sources[i] = 0;
+ healed_sinks[i] = 0;
+ }
+ }
+
+ ec_adjust_versions (frame, ec, EC_DATA_TXN, inode, source,
+ sources, healed_sinks, versions, dirty);
+out:
+ cluster_replies_wipe (replies, ec->nodes);
+ return ret;
+}
+
+int
+ec_heal_entry (call_frame_t *frame, ec_t *ec, inode_t *inode,
+ unsigned char *sources, unsigned char *healed_sinks)
+{
+ unsigned char *locked_on = NULL;
+ unsigned char *up_subvols = NULL;
+ unsigned char *output = NULL;
+ char selfheal_domain[1024] = {0};
+ int ret = 0;
+ default_args_cbk_t *replies = NULL;
+
+ EC_REPLIES_ALLOC (replies, ec->nodes);
+ locked_on = alloca0(ec->nodes);
+ output = alloca0(ec->nodes);
+ up_subvols = alloca0(ec->nodes);
+
+ sprintf (selfheal_domain, "%s:self-heal", ec->xl->name);
+ ec_mask_to_char_array (ec->xl_up, up_subvols, ec->nodes);
+ /*If other processes are already doing the heal, don't block*/
+ ret = cluster_inodelk (ec->xl_list, up_subvols, ec->nodes, replies,
+ locked_on, frame, ec->xl, selfheal_domain, inode,
+ 0, 0);
+ {
+ if (ret <= ec->fragments) {
+ gf_msg_debug (ec->xl->name, 0, "%s: Skipping heal "
+ "as only %d number of subvolumes could "
+ "be locked", uuid_utoa (inode->gfid), ret);
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+ ret = __ec_heal_entry (frame, ec, inode, locked_on,
+ sources, healed_sinks);
+ }
+unlock:
+ cluster_uninodelk (ec->xl_list, locked_on, ec->nodes, replies, output,
+ frame, ec->xl, selfheal_domain, inode, 0, 0);
+ cluster_replies_wipe (replies, ec->nodes);
+ return ret;
+}
+
+/*Data heal*/
+int
+ec_heal_data_find_direction (ec_t *ec, default_args_cbk_t *replies,
+ uint64_t *versions, uint64_t *dirty,
+ uint64_t *size, unsigned char *sources,
+ unsigned char *healed_sinks)
+{
+ uint64_t xattr[EC_VERSION_SIZE] = {0};
+ char version_size[64] = {0};
+ dict_t *version_size_db = NULL;
+ unsigned char *same = NULL;
+ int max_same_count = 0;
+ int source = 0;
+ int i = 0;
+ int ret = 0;
+
+ version_size_db = dict_new ();
+ if (!version_size_db) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (!replies[i].valid)
+ continue;
+ if (replies[i].op_ret < 0)
+ continue;
+ ret = ec_dict_del_array (replies[i].xattr, EC_XATTR_VERSION,
+ xattr, EC_VERSION_SIZE);
+ if (ret == 0) {
+ versions[i] = xattr[EC_DATA_TXN];
+ }
+
+ memset (xattr, 0, sizeof (xattr));
+ ret = ec_dict_del_array (replies[i].xattr, EC_XATTR_DIRTY,
+ xattr, EC_VERSION_SIZE);
+ if (ret == 0) {
+ dirty[i] = xattr[EC_DATA_TXN];
+ }
+ ret = ec_dict_del_number (replies[i].xattr, EC_XATTR_SIZE,
+ &size[i]);
+ /*Build a db of same version, size*/
+ snprintf (version_size, sizeof (version_size),
+ "%"PRIu64"-%"PRIu64, versions[i], size[i]);
+ ret = dict_get_bin (version_size_db, version_size,
+ (void **)&same);
+ if (ret < 0) {
+ same = alloca0 (ec->nodes);
+ }
+
+ same[i] = 1;
+ if (max_same_count < EC_COUNT (same, ec->nodes)) {
+ max_same_count = EC_COUNT (same, ec->nodes);
+ source = i;
+ }
+
+ if (ret < 0) {
+ ret = dict_set_static_bin (version_size_db,
+ version_size, same, ec->nodes);
+ }
+
+ if (ret < 0) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+ /* If we don't have ec->fragments number of same version,size it is not
+ * recoverable*/
+ if (max_same_count < ec->fragments) {
+ ret = -EIO;
+ goto out;
+ } else {
+ snprintf (version_size, sizeof (version_size),
+ "%"PRIu64"-%"PRIu64, versions[source], size[source]);
+ ret = dict_get_bin (version_size_db, version_size,
+ (void **)&same);
+ if (ret < 0)
+ goto out;
+ memcpy (sources, same, ec->nodes);
+ for (i = 0; i < ec->nodes; i++) {
+ if (replies[i].valid && (replies[i].op_ret == 0) &&
+ !sources[i])
+ healed_sinks[i] = 1;
+ }
+ }
+
+ ret = source;
+out:
+ if (version_size_db)
+ dict_unref (version_size_db);
+ return ret;
+}
+
+int
+__ec_heal_data_prepare (call_frame_t *frame, ec_t *ec, fd_t *fd,
+ unsigned char *locked_on, uint64_t *versions,
+ uint64_t *dirty, uint64_t *size, unsigned char *sources,
+ unsigned char *healed_sinks, unsigned char *trim,
+ struct iatt *stbuf)
+{
+ default_args_cbk_t *replies = NULL;
+ unsigned char *output = NULL;
+ dict_t *xattrs = NULL;
+ uint64_t zero_array[2] = {0};
+ int source = 0;
+ int ret = 0;
+ uint64_t zero_value = 0;
+ uint64_t source_size = 0;
+ int i = 0;
+
+ EC_REPLIES_ALLOC (replies, ec->nodes);
+ output = alloca0(ec->nodes);
+ xattrs = dict_new ();
+ if (!xattrs ||
+ dict_set_static_bin (xattrs, EC_XATTR_VERSION, zero_array,
+ sizeof (zero_array)) ||
+ dict_set_static_bin (xattrs, EC_XATTR_DIRTY, zero_array,
+ sizeof (zero_array)) ||
+ dict_set_static_bin (xattrs, EC_XATTR_SIZE, &zero_value,
+ sizeof (zero_value))) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = cluster_fxattrop (ec->xl_list, locked_on, ec->nodes,
+ replies, output, frame, ec->xl, fd,
+ GF_XATTROP_ADD_ARRAY64, xattrs, NULL);
+ if (EC_COUNT (output, ec->nodes) <= ec->fragments) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+
+ source = ec_heal_data_find_direction (ec, replies, versions, dirty,
+ size, sources, healed_sinks);
+ ret = source;
+ if (ret < 0)
+ goto out;
+
+ /* There could be files with versions, size same but on disk ia_size
+ * could be different because of disk crashes, mark them as sinks as
+ * well*/
+ ret = cluster_fstat (ec->xl_list, locked_on, ec->nodes, replies,
+ output, frame, ec->xl, fd, NULL);
+ EC_INTERSECT (sources, sources, output, ec->nodes);
+ EC_INTERSECT (healed_sinks, healed_sinks, output, ec->nodes);
+ if (EC_COUNT (sources, ec->nodes) < ec->fragments) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+
+ source_size = ec_adjust_size (ec, size[source], 1);
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (sources[i]) {
+ if (replies[i].stat.ia_size != source_size) {
+ sources[i] = 0;
+ healed_sinks[i] = 1;
+ } else if (stbuf) {
+ source = i;
+ *stbuf = replies[i].stat;
+ }
+ }
+
+ if (healed_sinks[i]) {
+ if (replies[i].stat.ia_size)
+ trim[i] = 1;
+ }
+ }
+
+ if (EC_COUNT(sources, ec->nodes) < ec->fragments) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+
+ ret = source;
+out:
+ if (xattrs)
+ dict_unref (xattrs);
+ cluster_replies_wipe (replies, ec->nodes);
+ if (ret < 0) {
+ gf_msg_debug (ec->xl->name, 0, "%s: heal failed %s",
+ uuid_utoa (fd->inode->gfid), strerror (-ret));
+ } else {
+ gf_msg_debug (ec->xl->name, 0, "%s: sources: %d, sinks: "
+ "%d", uuid_utoa (fd->inode->gfid),
+ EC_COUNT (sources, ec->nodes),
+ EC_COUNT (healed_sinks, ec->nodes));
+ }
+ return ret;
+}
+
+int
+__ec_heal_mark_sinks (call_frame_t *frame, ec_t *ec, fd_t *fd,
+ uint64_t *versions, unsigned char *healed_sinks)
+{
+ int i = 0;
+ int ret = 0;
+ unsigned char *mark = NULL;
+ dict_t *xattrs = NULL;
+ default_args_cbk_t *replies = NULL;
+ unsigned char *output = NULL;
+ uint64_t versions_xattr[2] = {0};
+
+ EC_REPLIES_ALLOC (replies, ec->nodes);
+ xattrs = dict_new ();
+ if (!xattrs) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ mark = alloca0 (ec->nodes);
+ for (i = 0; i < ec->nodes; i++) {
+ if (!healed_sinks[i])
+ continue;
+ if ((versions[i] >> EC_SELFHEAL_BIT) & 1)
+ continue;
+ mark[i] = 1;
+ }
+
+ if (EC_COUNT (mark, ec->nodes) == 0)
+ return 0;
+
+ versions_xattr[EC_DATA_TXN] = hton64(1ULL<<EC_SELFHEAL_BIT);
+ if (dict_set_static_bin (xattrs, EC_XATTR_VERSION, versions_xattr,
+ sizeof (versions_xattr))) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ output = alloca0 (ec->nodes);
+ ret = cluster_fxattrop (ec->xl_list, mark, ec->nodes,
+ replies, output, frame, ec->xl, fd,
+ GF_XATTROP_ADD_ARRAY64, xattrs, NULL);
+ for (i = 0; i < ec->nodes; i++) {
+ if (!output[i]) {
+ if (mark[i])
+ healed_sinks[i] = 0;
+ continue;
+ }
+ versions[i] |= (1ULL<<EC_SELFHEAL_BIT);
+ }
+
+ if (EC_COUNT (healed_sinks, ec->nodes) == 0) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+ ret = 0;
+
+out:
+ cluster_replies_wipe (replies, ec->nodes);
+ if (xattrs)
+ dict_unref (xattrs);
+ if (ret < 0)
+ gf_msg_debug (ec->xl->name, 0, "%s: heal failed %s",
+ uuid_utoa (fd->inode->gfid), strerror (-ret));
+ return ret;
+}
+
+int32_t
+ec_manager_heal_block (ec_fop_data_t *fop, int32_t state)
+{
+ ec_heal_t *heal = fop->data;
+ heal->fop = fop;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ ec_owner_set(fop->frame, fop->frame->root);
+
+ ec_heal_inodelk(heal, F_WRLCK, 1, 0, 0);
+
+ return EC_STATE_HEAL_DATA_COPY;
+
+ case EC_STATE_HEAL_DATA_COPY:
+ gf_msg_debug (fop->xl->name, 0, "%s: read/write starting",
+ uuid_utoa (heal->fd->inode->gfid));
+ ec_heal_data_block (heal);
+
+ return EC_STATE_HEAL_DATA_UNLOCK;
+
+ case -EC_STATE_HEAL_DATA_COPY:
+ case -EC_STATE_HEAL_DATA_UNLOCK:
+ case EC_STATE_HEAL_DATA_UNLOCK:
+ ec_heal_inodelk(heal, F_UNLCK, 1, 0, 0);
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ if (fop->cbks.heal) {
+ fop->cbks.heal (fop->req_frame, fop, fop->xl, 0,
+ 0, (heal->good | heal->bad),
+ heal->good, heal->bad, NULL);
+ }
+
+ return EC_STATE_END;
+ case -EC_STATE_REPORT:
+ if (fop->cbks.heal) {
+ fop->cbks.heal (fop->req_frame, fop, fop->xl, -1,
+ fop->error, 0, 0, 0, NULL);
+ }
+
+ return EC_STATE_END;
+ default:
+ gf_msg (fop->xl->name, GF_LOG_ERROR, 0,
+ EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s",
+ state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+/*Takes lock */
+void
+ec_heal_block (call_frame_t *frame, xlator_t *this, uintptr_t target,
+ int32_t minimum, fop_heal_cbk_t func, ec_heal_t *heal)
+{
+ ec_cbk_t callback = { .heal = func };
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(HEAL) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate (frame, this, EC_FOP_HEAL, 0, target, minimum,
+ NULL, ec_manager_heal_block, callback,
+ heal);
+ if (fop == NULL)
+ goto out;
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, 0, 0, 0, NULL);
+ }
+}
+
+int32_t
+ec_heal_block_done (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, uintptr_t mask,
+ uintptr_t good, uintptr_t bad, dict_t *xdata)
+{
+ ec_fop_data_t *fop = cookie;
+ ec_heal_t *heal = fop->data;
+
+ fop->heal = NULL;
+ heal->fop = NULL;
+ heal->error = op_ret < 0 ? op_errno : 0;
+ syncbarrier_wake (heal->data);
+ return 0;
+}
+
+int
+ec_sync_heal_block (call_frame_t *frame, xlator_t *this, ec_heal_t *heal)
+{
+ ec_heal_block (frame, this, heal->bad|heal->good, EC_MINIMUM_ONE,
+ ec_heal_block_done, heal);
+ syncbarrier_wait (heal->data, 1);
+ if (heal->error != 0) {
+ return -heal->error;
+ }
+ if (heal->bad == 0)
+ return -ENOTCONN;
+ return 0;
+}
+
+int
+ec_rebuild_data (call_frame_t *frame, ec_t *ec, fd_t *fd, uint64_t size,
+ unsigned char *sources, unsigned char *healed_sinks)
+{
+ ec_heal_t *heal = NULL;
+ int ret = 0;
+ syncbarrier_t barrier;
+ struct iobuf_pool *pool = NULL;
+
+ if (syncbarrier_init (&barrier))
+ return -ENOMEM;
+
+ heal = alloca0(sizeof (*heal));
+ heal->fd = fd_ref (fd);
+ heal->xl = ec->xl;
+ heal->data = &barrier;
+ syncbarrier_init (heal->data);
+ pool = ec->xl->ctx->iobuf_pool;
+ heal->total_size = size;
+ heal->size = iobpool_default_pagesize (pool);
+ /* We need to adjust the size to a multiple of the stripe size of the
+ * volume. Otherwise writes would need to fill gaps (head and/or tail)
+ * with existent data from the bad bricks. This could be garbage on a
+ * damaged file or it could fail if there aren't enough bricks. */
+ heal->size -= heal->size % ec->stripe_size;
+ heal->bad = ec_char_array_to_mask (healed_sinks, ec->nodes);
+ heal->good = ec_char_array_to_mask (sources, ec->nodes);
+ heal->iatt.ia_type = IA_IFREG;
+ LOCK_INIT(&heal->lock);
+
+ for (heal->offset = 0; (heal->offset < size) && !heal->done;
+ heal->offset += heal->size) {
+ gf_msg_debug (ec->xl->name, 0, "%s: sources: %d, sinks: "
+ "%d, offset: %"PRIu64" bsize: %"PRIu64,
+ uuid_utoa (fd->inode->gfid),
+ EC_COUNT (sources, ec->nodes),
+ EC_COUNT (healed_sinks, ec->nodes), heal->offset,
+ heal->size);
+ ret = ec_sync_heal_block (frame, ec->xl, heal);
+ if (ret < 0)
+ break;
+
+ }
+ memset (healed_sinks, 0, ec->nodes);
+ ec_mask_to_char_array (heal->bad, healed_sinks, ec->nodes);
+ fd_unref (heal->fd);
+ LOCK_DESTROY (&heal->lock);
+ syncbarrier_destroy (heal->data);
+ if (ret < 0)
+ gf_msg_debug (ec->xl->name, 0, "%s: heal failed %s",
+ uuid_utoa (fd->inode->gfid), strerror (-ret));
+ return ret;
+}
+
+int
+__ec_heal_trim_sinks (call_frame_t *frame, ec_t *ec, fd_t *fd,
+ unsigned char *healed_sinks, unsigned char *trim)
+{
+ default_args_cbk_t *replies = NULL;
+ unsigned char *output = NULL;
+ int ret = 0;
+ int i = 0;
+
+ EC_REPLIES_ALLOC (replies, ec->nodes);
+ output = alloca0 (ec->nodes);
+
+ if (EC_COUNT (trim, ec->nodes) == 0) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = cluster_ftruncate (ec->xl_list, trim, ec->nodes, replies, output,
+ frame, ec->xl, fd, 0, NULL);
+ for (i = 0; i < ec->nodes; i++) {
+ if (!output[i] && trim[i])
+ healed_sinks[i] = 0;
+ }
+
+ if (EC_COUNT (healed_sinks, ec->nodes) == 0) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+
+out:
+ cluster_replies_wipe (replies, ec->nodes);
+ if (ret < 0)
+ gf_msg_debug (ec->xl->name, 0, "%s: heal failed %s",
+ uuid_utoa (fd->inode->gfid), strerror (-ret));
+ return ret;
+}
+
+int
+ec_data_undo_pending (call_frame_t *frame, ec_t *ec, fd_t *fd, dict_t *xattr,
+ uint64_t *versions, uint64_t *dirty, uint64_t *size,
+ int source, gf_boolean_t erase_dirty, int idx)
+{
+ uint64_t versions_xattr[2] = {0};
+ uint64_t dirty_xattr[2] = {0};
+ uint64_t allzero[2] = {0};
+ uint64_t size_xattr = 0;
+ int ret = 0;
+
+ versions_xattr[EC_DATA_TXN] = hton64(versions[source] - versions[idx]);
+ ret = dict_set_static_bin (xattr, EC_XATTR_VERSION,
+ versions_xattr,
+ sizeof (versions_xattr));
+ if (ret < 0)
+ goto out;
+
+ size_xattr = hton64(size[source] - size[idx]);
+ ret = dict_set_static_bin (xattr, EC_XATTR_SIZE,
+ &size_xattr, sizeof (size_xattr));
+ if (ret < 0)
+ goto out;
+
+ if (erase_dirty) {
+ dirty_xattr[EC_DATA_TXN] = hton64(-dirty[idx]);
+ ret = dict_set_static_bin (xattr, EC_XATTR_DIRTY,
+ dirty_xattr,
+ sizeof (dirty_xattr));
+ if (ret < 0)
+ goto out;
+ }
+
+ if ((memcmp (versions_xattr, allzero, sizeof (allzero)) == 0) &&
+ (memcmp (dirty_xattr, allzero, sizeof (allzero)) == 0) &&
+ (size == 0)) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = syncop_fxattrop (ec->xl_list[idx], fd,
+ GF_XATTROP_ADD_ARRAY64, xattr, NULL, NULL);
+out:
+ return ret;
+}
+
+int
+__ec_fd_data_adjust_versions (call_frame_t *frame, ec_t *ec, fd_t *fd,
+ unsigned char *sources, unsigned char *healed_sinks,
+ uint64_t *versions, uint64_t *dirty, uint64_t *size)
+{
+ dict_t *xattr = NULL;
+ int i = 0;
+ int ret = 0;
+ int op_ret = 0;
+ int source = -1;
+ gf_boolean_t erase_dirty = _gf_false;
+
+ xattr = dict_new ();
+ if (!xattr) {
+ op_ret = -ENOMEM;
+ goto out;
+ }
+
+ /* dirty xattr represents if the file needs heal. Unless all the
+ * copies are healed, don't erase it */
+ if (EC_COUNT (sources, ec->nodes) +
+ EC_COUNT (healed_sinks, ec->nodes) == ec->nodes)
+ erase_dirty = _gf_true;
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (sources[i]) {
+ source = i;
+ break;
+ }
+ }
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (healed_sinks[i]) {
+ ret = ec_data_undo_pending (frame, ec, fd, xattr,
+ versions, dirty, size,
+ source, erase_dirty, i);
+ if (ret < 0)
+ goto out;
+ }
+
+ }
+
+ if (!erase_dirty)
+ goto out;
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (sources[i]) {
+ ret = ec_data_undo_pending (frame, ec, fd, xattr,
+ versions, dirty, size,
+ source, erase_dirty, i);
+ if (ret < 0)
+ continue;
+ }
+
+ }
+out:
+ if (xattr)
+ dict_unref (xattr);
+ return op_ret;
+}
+
+int
+ec_restore_time_and_adjust_versions (call_frame_t *frame, ec_t *ec, fd_t *fd,
+ unsigned char *sources,
+ unsigned char *healed_sinks,
+ uint64_t *versions, uint64_t *dirty,
+ uint64_t *size)
+{
+ unsigned char *locked_on = NULL;
+ unsigned char *participants = NULL;
+ unsigned char *output = NULL;
+ default_args_cbk_t *replies = NULL;
+ unsigned char *postsh_sources = NULL;
+ unsigned char *postsh_healed_sinks = NULL;
+ unsigned char *postsh_trim = NULL;
+ uint64_t *postsh_versions = NULL;
+ uint64_t *postsh_dirty = NULL;
+ uint64_t *postsh_size = NULL;
+ int ret = 0;
+ int i = 0;
+ struct iatt source_buf = {0};
+ loc_t loc = {0};
+
+ locked_on = alloca0(ec->nodes);
+ output = alloca0(ec->nodes);
+ participants = alloca0(ec->nodes);
+ postsh_sources = alloca0(ec->nodes);
+ postsh_healed_sinks = alloca0(ec->nodes);
+ postsh_trim = alloca0(ec->nodes);
+ postsh_versions = alloca0(ec->nodes * sizeof (*postsh_versions));
+ postsh_dirty = alloca0(ec->nodes * sizeof (*postsh_dirty));
+ postsh_size = alloca0(ec->nodes * sizeof (*postsh_size));
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (healed_sinks[i] || sources[i])
+ participants[i] = 1;
+ }
+
+ EC_REPLIES_ALLOC (replies, ec->nodes);
+ ret = cluster_inodelk (ec->xl_list, participants, ec->nodes, replies,
+ locked_on, frame, ec->xl, ec->xl->name,
+ fd->inode, 0, 0);
+ {
+ if (ret <= ec->fragments) {
+ gf_msg_debug (ec->xl->name, 0, "%s: Skipping heal "
+ "as only %d number of subvolumes could "
+ "be locked", uuid_utoa (fd->inode->gfid), ret);
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+
+ ret = __ec_heal_data_prepare (frame, ec, fd, locked_on,
+ postsh_versions, postsh_dirty,
+ postsh_size, postsh_sources,
+ postsh_healed_sinks, postsh_trim,
+ &source_buf);
+ if (ret < 0)
+ goto unlock;
+
+ loc.inode = inode_ref (fd->inode);
+ gf_uuid_copy (loc.gfid, fd->inode->gfid);
+ ret = cluster_setattr (ec->xl_list, healed_sinks, ec->nodes,
+ replies, output, frame, ec->xl, &loc,
+ &source_buf,
+ GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME,
+ NULL);
+ EC_INTERSECT (healed_sinks, healed_sinks, output, ec->nodes);
+ if (EC_COUNT (healed_sinks, ec->nodes) == 0) {
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+ ret = __ec_fd_data_adjust_versions (frame, ec, fd, sources,
+ healed_sinks, versions, dirty, size);
+ }
+unlock:
+ cluster_uninodelk (ec->xl_list, locked_on, ec->nodes, replies, output,
+ frame, ec->xl, ec->xl->name, fd->inode, 0, 0);
+ cluster_replies_wipe (replies, ec->nodes);
+ loc_wipe (&loc);
+ return ret;
+}
+
+int
+__ec_heal_data (call_frame_t *frame, ec_t *ec, fd_t *fd, unsigned char *heal_on,
+ unsigned char *sources, unsigned char *healed_sinks)
+{
+ unsigned char *locked_on = NULL;
+ unsigned char *output = NULL;
+ uint64_t *versions = NULL;
+ uint64_t *dirty = NULL;
+ uint64_t *size = NULL;
+ unsigned char *trim = NULL;
+ default_args_cbk_t *replies = NULL;
+ int ret = 0;
+ int source = 0;
+
+ locked_on = alloca0(ec->nodes);
+ output = alloca0(ec->nodes);
+ trim = alloca0 (ec->nodes);
+ versions = alloca0 (ec->nodes * sizeof (*versions));
+ dirty = alloca0 (ec->nodes * sizeof (*dirty));
+ size = alloca0 (ec->nodes * sizeof (*size));
+
+ EC_REPLIES_ALLOC (replies, ec->nodes);
+ ret = cluster_inodelk (ec->xl_list, heal_on, ec->nodes, replies,
+ locked_on, frame, ec->xl, ec->xl->name,
+ fd->inode, 0, 0);
+ {
+ if (ret <= ec->fragments) {
+ gf_msg_debug (ec->xl->name, 0, "%s: Skipping heal "
+ "as only %d number of subvolumes could "
+ "be locked", uuid_utoa (fd->inode->gfid), ret);
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+
+ ret = __ec_heal_data_prepare (frame, ec, fd, locked_on,
+ versions, dirty, size, sources,
+ healed_sinks, trim, NULL);
+ if (ret < 0)
+ goto unlock;
+
+ if (EC_COUNT(healed_sinks, ec->nodes) == 0) {
+ ret = __ec_fd_data_adjust_versions (frame, ec, fd,
+ sources,
+ healed_sinks, versions, dirty, size);
+ goto unlock;
+ }
+
+ source = ret;
+ ret = __ec_heal_mark_sinks (frame, ec, fd, versions,
+ healed_sinks);
+ if (ret < 0)
+ goto unlock;
+
+ ret = __ec_heal_trim_sinks (frame, ec, fd, healed_sinks, trim);
+ }
+unlock:
+ cluster_uninodelk (ec->xl_list, locked_on, ec->nodes, replies, output,
+ frame, ec->xl, ec->xl->name, fd->inode, 0, 0);
+ if (ret < 0)
+ goto out;
+
+ if (EC_COUNT(healed_sinks, ec->nodes) == 0)
+ goto out;
+
+ gf_msg_debug (ec->xl->name, 0, "%s: sources: %d, sinks: "
+ "%d", uuid_utoa (fd->inode->gfid),
+ EC_COUNT (sources, ec->nodes),
+ EC_COUNT (healed_sinks, ec->nodes));
+
+ ret = ec_rebuild_data (frame, ec, fd, size[source], sources,
+ healed_sinks);
+ if (ret < 0)
+ goto out;
+
+ ret = ec_restore_time_and_adjust_versions (frame, ec, fd, sources,
+ healed_sinks, versions,
+ dirty, size);
+out:
+ cluster_replies_wipe (replies, ec->nodes);
+ return ret;
+}
+
+int
+ec_heal_data (call_frame_t *frame, ec_t *ec, gf_boolean_t block, inode_t *inode,
+ unsigned char *sources, unsigned char *healed_sinks)
+{
+ unsigned char *locked_on = NULL;
+ unsigned char *up_subvols = NULL;
+ unsigned char *output = NULL;
+ default_args_cbk_t *replies = NULL;
+ fd_t *fd = NULL;
+ loc_t loc = {0};
+ char selfheal_domain[1024] = {0};
+ int ret = 0;
+
+ EC_REPLIES_ALLOC (replies, ec->nodes);
+
+ locked_on = alloca0(ec->nodes);
+ output = alloca0(ec->nodes);
+ up_subvols = alloca0(ec->nodes);
+ loc.inode = inode_ref (inode);
+ gf_uuid_copy (loc.gfid, inode->gfid);
+
+ fd = fd_create (inode, 0);
+ if (!fd) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ec_mask_to_char_array (ec->xl_up, up_subvols, ec->nodes);
+
+ ret = cluster_open (ec->xl_list, up_subvols, ec->nodes, replies, output,
+ frame, ec->xl, &loc, O_RDWR|O_LARGEFILE, fd, NULL);
+ if (ret <= ec->fragments) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+
+ fd_bind (fd);
+ sprintf (selfheal_domain, "%s:self-heal", ec->xl->name);
+ /*If other processes are already doing the heal, don't block*/
+ if (block) {
+ ret = cluster_inodelk (ec->xl_list, output, ec->nodes, replies,
+ locked_on, frame, ec->xl,
+ selfheal_domain, inode, 0, 0);
+ } else {
+ ret = cluster_tryinodelk (ec->xl_list, output, ec->nodes,
+ replies, locked_on, frame, ec->xl,
+ selfheal_domain, inode, 0, 0);
+ }
+ {
+ if (ret <= ec->fragments) {
+ gf_msg_debug (ec->xl->name, 0, "%s: Skipping heal "
+ "as only %d number of subvolumes could "
+ "be locked", uuid_utoa (inode->gfid), ret);
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+ ret = __ec_heal_data (frame, ec, fd, locked_on, sources,
+ healed_sinks);
+ }
+unlock:
+ cluster_uninodelk (ec->xl_list, locked_on, ec->nodes, replies, output,
+ frame, ec->xl, selfheal_domain, inode, 0, 0);
+out:
+ if (fd)
+ fd_unref (fd);
+ loc_wipe (&loc);
+ cluster_replies_wipe (replies, ec->nodes);
+ return ret;
+}
+
+void
+ec_heal_do (xlator_t *this, void *data, loc_t *loc, int32_t partial)
+{
+ call_frame_t *frame = NULL;
+ unsigned char *participants = NULL;
+ unsigned char *msources = NULL;
+ unsigned char *mhealed_sinks = NULL;
+ unsigned char *sources = NULL;
+ unsigned char *healed_sinks = NULL;
+ ec_t *ec = NULL;
+ int ret = 0;
+ int op_ret = 0;
+ int op_errno = 0;
+ intptr_t mgood = 0;
+ intptr_t mbad = 0;
+ intptr_t good = 0;
+ intptr_t bad = 0;
+ ec_fop_data_t *fop = data;
+ gf_boolean_t blocking = _gf_false;
+
+ ec = this->private;
+
+ /* If it is heal request from getxattr, complete the heal and then
+ * unwind, if it is ec_heal with NULL as frame then no need to block
+ * the heal as the caller doesn't care about its completion*/
+ if (fop->req_frame)
+ blocking = _gf_true;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ return;
+
+ ec_owner_set(frame, frame->root);
+ /*Do heal as root*/
+ frame->root->uid = 0;
+ frame->root->gid = 0;
+ /*Mark the fops as internal*/
+ frame->root->pid = GF_CLIENT_PID_SELF_HEALD;
+ participants = alloca0(ec->nodes);
+ ec_mask_to_char_array (ec->xl_up, participants, ec->nodes);
+ if (loc->name && strlen (loc->name)) {
+ ret = ec_heal_name (frame, ec, loc->parent, (char *)loc->name,
+ participants);
+ if (ret == 0) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ EC_MSG_HEAL_SUCCESS, "%s: name heal "
+ "successful on %lX", loc->path,
+ ec_char_array_to_mask (participants,
+ ec->nodes));
+ } else {
+ gf_msg (this->name, GF_LOG_INFO, -ret,
+ EC_MSG_HEAL_FAIL, "%s: name heal "
+ "failed", loc->path);
+ }
+ }
+
+ msources = alloca0(ec->nodes);
+ mhealed_sinks = alloca0(ec->nodes);
+ ret = ec_heal_metadata (frame, ec, loc->inode, msources, mhealed_sinks);
+ if (ret == 0) {
+ mgood = ec_char_array_to_mask (msources, ec->nodes);
+ mbad = ec_char_array_to_mask (mhealed_sinks, ec->nodes);
+ } else {
+ op_ret = -1;
+ op_errno = -ret;
+ }
+ sources = alloca0(ec->nodes);
+ healed_sinks = alloca0(ec->nodes);
+ if (IA_ISREG (loc->inode->ia_type)) {
+ ret = ec_heal_data (frame, ec, blocking, loc->inode, sources,
+ healed_sinks);
+ } else if (IA_ISDIR (loc->inode->ia_type) && !partial) {
+ ret = ec_heal_entry (frame, ec, loc->inode, sources,
+ healed_sinks);
+ } else {
+ ret = 0;
+ memcpy (sources, participants, ec->nodes);
+ memcpy (healed_sinks, participants, ec->nodes);
+ }
+
+ if (ret == 0) {
+ good = ec_char_array_to_mask (sources, ec->nodes);
+ bad = ec_char_array_to_mask (healed_sinks, ec->nodes);
+ } else {
+ op_ret = -1;
+ op_errno = -ret;
+ }
+
+
+ if (fop->cbks.heal) {
+ fop->cbks.heal (fop->req_frame, fop, fop->xl, op_ret,
+ op_errno, ec_char_array_to_mask (participants,
+ ec->nodes),
+ mgood & good, mbad & bad, NULL);
+ }
+ STACK_DESTROY (frame->root);
+ return;
+}
+
+int
+ec_synctask_heal_wrap (void *opaque)
+{
+ ec_fop_data_t *fop = opaque;
+ ec_heal_do (fop->xl, fop, &fop->loc[0], fop->int32);
+ return 0;
+}
+
+int
+ec_heal_done (int ret, call_frame_t *heal, void *opaque)
+{
+ if (opaque)
+ ec_fop_data_release (opaque);
+ return 0;
+}
+
+ec_fop_data_t*
+__ec_dequeue_heals (ec_t *ec)
+{
+ ec_fop_data_t *fop = NULL;
+
+ if (list_empty (&ec->heal_waiting))
+ goto none;
+
+ if ((ec->background_heals > 0) && (ec->healers >= ec->background_heals))
+ goto none;
+
+ fop = list_entry(ec->heal_waiting.next, ec_fop_data_t, healer);
+ ec->heal_waiters--;
+ list_del_init(&fop->healer);
+ list_add(&fop->healer, &ec->healing);
+ ec->healers++;
+ return fop;
+none:
+ gf_msg_debug (ec->xl->name, 0, "Num healers: %d, Num Waiters: %d",
+ ec->healers, ec->heal_waiters);
+ return NULL;
+}
+
+void
+ec_heal_fail (ec_t *ec, ec_fop_data_t *fop)
+{
+ if (fop->cbks.heal) {
+ fop->cbks.heal (fop->req_frame, NULL, ec->xl, -1, fop->error, 0, 0,
+ 0, NULL);
+ }
+ if (fop)
+ ec_fop_data_release (fop);
+}
+
+void
+ec_launch_heal (ec_t *ec, ec_fop_data_t *fop)
+{
+ int ret = 0;
+
+ ret = synctask_new (ec->xl->ctx->env, ec_synctask_heal_wrap,
+ ec_heal_done, NULL, fop);
+ if (ret < 0) {
+ ec_fop_set_error(fop, ENOMEM);
+ ec_heal_fail (ec, fop);
+ }
+}
+
+void
+ec_handle_healers_done (ec_fop_data_t *fop)
+{
+ ec_t *ec = fop->xl->private;
+ ec_fop_data_t *heal_fop = NULL;
+
+ if (list_empty (&fop->healer))
+ return;
+
+ LOCK (&ec->lock);
+ {
+ list_del_init (&fop->healer);
+ ec->healers--;
+ heal_fop = __ec_dequeue_heals (ec);
+ }
+ UNLOCK (&ec->lock);
+
+ if (heal_fop)
+ ec_launch_heal (ec, heal_fop);
+
+}
+
+void
+ec_heal_throttle (xlator_t *this, ec_fop_data_t *fop)
+{
+ gf_boolean_t can_heal = _gf_true;
+ ec_t *ec = this->private;
+
+ if (fop->req_frame == NULL) {
+
+ LOCK (&ec->lock);
+ {
+ if ((ec->background_heals > 0) &&
+ (ec->heal_wait_qlen + ec->background_heals) >
+ (ec->heal_waiters + ec->healers)) {
+ list_add_tail(&fop->healer, &ec->heal_waiting);
+ ec->heal_waiters++;
+ fop = __ec_dequeue_heals (ec);
+ } else {
+ can_heal = _gf_false;
+ }
+ }
+ UNLOCK (&ec->lock);
+ }
+
+ if (can_heal) {
+ if (fop)
+ ec_launch_heal (ec, fop);
+ } else {
+ gf_msg_debug (this->name, 0, "Max number of heals are "
+ "pending, background self-heal rejected");
+ ec_fop_set_error(fop, EBUSY);
+ ec_heal_fail (ec, fop);
+ }
+}
+
+void
+ec_heal (call_frame_t *frame, xlator_t *this, uintptr_t target,
+ int32_t minimum, fop_heal_cbk_t func, void *data, loc_t *loc,
+ int32_t partial, dict_t *xdata)
+{
+ ec_cbk_t callback = { .heal = func };
+ ec_fop_data_t *fop = NULL;
+ int32_t err = EINVAL;
+
+ gf_msg_trace ("ec", 0, "EC(HEAL) %p", frame);
+
+ VALIDATE_OR_GOTO(this, fail);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, fail);
+
+ if (!loc || !loc->inode || gf_uuid_is_null (loc->inode->gfid))
+ goto fail;
+
+ if (frame && frame->local)
+ goto fail;
+ fop = ec_fop_data_allocate (frame, this, EC_FOP_HEAL, 0, target, minimum,
+ NULL, NULL, callback, data);
+
+ err = ENOMEM;
+
+ if (fop == NULL)
+ goto fail;
+
+ fop->int32 = partial;
+
+ if (loc) {
+ if (loc_copy(&fop->loc[0], loc) != 0)
+ goto fail;
+ }
+
+ if (xdata)
+ fop->xdata = dict_ref(xdata);
+
+ ec_heal_throttle (this, fop);
+
+ return;
+
+fail:
+ if (fop)
+ ec_fop_data_release (fop);
+ if (func)
+ func (frame, NULL, this, -1, err, 0, 0, 0, NULL);
+}
+
+int
+ec_replace_heal_done (int ret, call_frame_t *heal, void *opaque)
+{
+ ec_t *ec = opaque;
+
+ gf_msg_debug (ec->xl->name, 0,
+ "getxattr on bricks is done ret %d", ret);
+ return 0;
+}
+
+int32_t
+ec_replace_heal (ec_t *ec, inode_t *inode)
+{
+ loc_t loc = {0};
+ int ret = 0;
+
+ loc.inode = inode_ref (inode);
+ gf_uuid_copy (loc.gfid, inode->gfid);
+ ret = syncop_getxattr (ec->xl, &loc, NULL, EC_XATTR_HEAL,
+ NULL, NULL);
+ if (ret < 0)
+ gf_msg_debug (ec->xl->name, 0,
+ "Heal failed for replace brick ret = %d", ret);
+
+ loc_wipe (&loc);
+ return ret;
+}
+
+int32_t
+ec_replace_brick_heal_wrap (void *opaque)
+{
+ ec_t *ec = opaque;
+ inode_table_t *itable = NULL;
+ int32_t ret = -1;
+
+ if (ec->xl->itable)
+ itable = ec->xl->itable;
+ else
+ goto out;
+ ret = ec_replace_heal (ec, itable->root);
+out:
+ return ret;
+}
+
+int32_t
+ec_launch_replace_heal (ec_t *ec)
+{
+ int ret = -1;
+
+ if (!ec)
+ return ret;
+ ret = synctask_new (ec->xl->ctx->env, ec_replace_brick_heal_wrap,
+ ec_replace_heal_done, NULL, ec);
+ if (ret < 0) {
+ gf_msg_debug (ec->xl->name, 0,
+ "Heal failed for replace brick ret = %d", ret);
+ }
+ return ret;
+}
diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c
new file mode 100644
index 00000000000..0e8076826c6
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-heald.c
@@ -0,0 +1,607 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+#include "compat-errno.h"
+#include "ec.h"
+#include "ec-messages.h"
+#include "ec-heald.h"
+#include "ec-mem-types.h"
+#include "syncop.h"
+#include "syncop-utils.h"
+#include "protocol-common.h"
+
+#define ASSERT_LOCAL(this, healer) \
+ do { \
+ if (!ec_shd_is_subvol_local (this, healer->subvol)) { \
+ healer->local = _gf_false; \
+ if (safe_break (healer)) { \
+ break; \
+ } else { \
+ continue; \
+ } \
+ } else { \
+ healer->local = _gf_true; \
+ } \
+ } while (0);
+
+
+#define NTH_INDEX_HEALER(this, n) (&((((ec_t *)this->private))->shd.index_healers[n]))
+#define NTH_FULL_HEALER(this, n) (&((((ec_t *)this->private))->shd.full_healers[n]))
+
+gf_boolean_t
+ec_shd_is_subvol_local (xlator_t *this, int subvol)
+{
+ ec_t *ec = NULL;
+ gf_boolean_t is_local = _gf_false;
+ loc_t loc = {0, };
+
+ ec = this->private;
+ loc.inode = this->itable->root;
+ syncop_is_subvol_local (ec->xl_list[subvol], &loc, &is_local);
+ return is_local;
+}
+
+char *
+ec_subvol_name (xlator_t *this, int subvol)
+{
+ ec_t *ec = NULL;
+
+ ec = this->private;
+ if (subvol < 0 || subvol > ec->nodes)
+ return NULL;
+
+ return ec->xl_list[subvol]->name;
+}
+
+int
+__ec_shd_healer_wait (struct subvol_healer *healer)
+{
+ ec_t *ec = NULL;
+ struct timespec wait_till = {0, };
+ int ret = 0;
+
+ ec = healer->this->private;
+
+disabled_loop:
+ wait_till.tv_sec = time (NULL) + 60;
+
+ while (!healer->rerun) {
+ ret = pthread_cond_timedwait (&healer->cond,
+ &healer->mutex,
+ &wait_till);
+ if (ret == ETIMEDOUT)
+ break;
+ }
+
+ ret = healer->rerun;
+ healer->rerun = 0;
+
+ if (!ec->shd.enabled || !ec->up)
+ goto disabled_loop;
+
+ return ret;
+}
+
+
+int
+ec_shd_healer_wait (struct subvol_healer *healer)
+{
+ int ret = 0;
+
+ pthread_mutex_lock (&healer->mutex);
+ {
+ ret = __ec_shd_healer_wait (healer);
+ }
+ pthread_mutex_unlock (&healer->mutex);
+
+ return ret;
+}
+
+
+gf_boolean_t
+safe_break (struct subvol_healer *healer)
+{
+ gf_boolean_t ret = _gf_false;
+
+ pthread_mutex_lock (&healer->mutex);
+ {
+ if (healer->rerun)
+ goto unlock;
+
+ healer->running = _gf_false;
+ ret = _gf_true;
+ }
+unlock:
+ pthread_mutex_unlock (&healer->mutex);
+
+ return ret;
+}
+
+
+int
+ec_shd_inode_find (xlator_t *this, xlator_t *subvol,
+ uuid_t gfid, inode_t **inode)
+{
+ int ret = 0;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ *inode = NULL;
+
+ *inode = inode_find (this->itable, gfid);
+ if (*inode)
+ goto out;
+
+ loc.inode = inode_new (this->itable);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ gf_uuid_copy (loc.gfid, gfid);
+
+ ret = syncop_lookup (subvol, &loc, &iatt, NULL, NULL, NULL);
+ if (ret < 0)
+ goto out;
+
+ *inode = inode_link (loc.inode, NULL, NULL, &iatt);
+ if (!*inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+out:
+ loc_wipe (&loc);
+ return ret;
+}
+
+
+int
+ec_shd_index_inode (xlator_t *this, xlator_t *subvol, inode_t **inode)
+{
+ loc_t rootloc = {0, };
+ int ret = 0;
+ dict_t *xattr = NULL;
+ void *index_gfid = NULL;
+
+ *inode = NULL;
+ rootloc.inode = inode_ref (this->itable->root);
+ gf_uuid_copy (rootloc.gfid, rootloc.inode->gfid);
+
+ ret = syncop_getxattr (subvol, &rootloc, &xattr,
+ GF_XATTROP_INDEX_GFID, NULL, NULL);
+ if (ret < 0)
+ goto out;
+ if (!xattr) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = dict_get_ptr (xattr, GF_XATTROP_INDEX_GFID, &index_gfid);
+ if (ret)
+ goto out;
+
+ gf_msg_debug (this->name, 0, "index-dir gfid for %s: %s",
+ subvol->name, uuid_utoa (index_gfid));
+
+ ret = ec_shd_inode_find (this, subvol, index_gfid, inode);
+
+out:
+ loc_wipe (&rootloc);
+
+ if (xattr)
+ dict_unref (xattr);
+
+ return ret;
+}
+
+int
+ec_shd_index_purge (xlator_t *subvol, inode_t *inode, char *name)
+{
+ loc_t loc = {0, };
+ int ret = 0;
+
+ loc.parent = inode_ref (inode);
+ loc.name = name;
+
+ ret = syncop_unlink (subvol, &loc, NULL, NULL);
+
+ loc_wipe (&loc);
+ return ret;
+}
+
+int
+ec_shd_selfheal (struct subvol_healer *healer, int child, loc_t *loc)
+{
+ return syncop_getxattr (healer->this, loc, NULL, EC_XATTR_HEAL, NULL,
+ NULL);
+}
+
+
+int
+ec_shd_index_heal (xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data)
+{
+ struct subvol_healer *healer = data;
+ ec_t *ec = NULL;
+ loc_t loc = {0};
+ int ret = 0;
+
+ ec = healer->this->private;
+ if (!ec->shd.enabled)
+ return -EBUSY;
+
+ gf_msg_debug (healer->this->name, 0, "got entry: %s",
+ entry->d_name);
+
+ ret = gf_uuid_parse (entry->d_name, loc.gfid);
+ if (ret)
+ return 0;
+
+ /* If this fails with ENOENT/ESTALE index is stale */
+ ret = syncop_gfid_to_path (healer->this->itable, subvol, loc.gfid,
+ (char **)&loc.path);
+ if (ret < 0)
+ goto out;
+
+ ret = ec_shd_inode_find (healer->this, healer->this, loc.gfid,
+ &loc.inode);
+ if (ret < 0)
+ goto out;
+
+ ec_shd_selfheal (healer, healer->subvol, &loc);
+out:
+ if (ret == -ENOENT || ret == -ESTALE) {
+ gf_msg (healer->this->name, GF_LOG_DEBUG, 0,
+ EC_MSG_HEAL_FAIL, "Purging index for gfid %s:",
+ uuid_utoa(loc.gfid));
+ ec_shd_index_purge (subvol, parent->inode, entry->d_name);
+ }
+ loc_wipe (&loc);
+
+ return 0;
+}
+
+int
+ec_shd_index_sweep (struct subvol_healer *healer)
+{
+ loc_t loc = {0};
+ ec_t *ec = NULL;
+ int ret = 0;
+ xlator_t *subvol = NULL;
+
+ ec = healer->this->private;
+ subvol = ec->xl_list[healer->subvol];
+
+ ret = ec_shd_index_inode (healer->this, subvol, &loc.inode);
+ if (ret < 0) {
+ gf_msg (healer->this->name, GF_LOG_WARNING, errno,
+ EC_MSG_INDEX_DIR_GET_FAIL,
+ "unable to get index-dir on %s", subvol->name);
+ goto out;
+ }
+
+ ret = syncop_dir_scan (subvol, &loc, GF_CLIENT_PID_SELF_HEALD,
+ healer, ec_shd_index_heal);
+out:
+ loc_wipe (&loc);
+
+ return ret;
+}
+
+int
+ec_shd_full_heal (xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data)
+{
+ struct subvol_healer *healer = data;
+ xlator_t *this = healer->this;
+ ec_t *ec = NULL;
+ loc_t loc = {0};
+ int ret = 0;
+
+ ec = this->private;
+ if (!ec->shd.enabled)
+ return -EBUSY;
+
+ loc.parent = inode_ref (parent->inode);
+ loc.name = entry->d_name;
+ gf_uuid_copy (loc.gfid, entry->d_stat.ia_gfid);
+
+ /* If this fails with ENOENT/ESTALE index is stale */
+ ret = syncop_gfid_to_path (this->itable, subvol, loc.gfid,
+ (char **)&loc.path);
+ if (ret < 0)
+ goto out;
+
+ ret = ec_shd_inode_find (this, this, loc.gfid, &loc.inode);
+ if (ret < 0)
+ goto out;
+
+ ec_shd_selfheal (healer, healer->subvol, &loc);
+
+ ret = 0;
+
+out:
+ loc_wipe (&loc);
+ return ret;
+}
+
+int
+ec_shd_full_sweep (struct subvol_healer *healer, inode_t *inode)
+{
+ ec_t *ec = NULL;
+ loc_t loc = {0};
+
+ ec = healer->this->private;
+ loc.inode = inode;
+ return syncop_ftw (ec->xl_list[healer->subvol], &loc,
+ GF_CLIENT_PID_SELF_HEALD, healer,
+ ec_shd_full_heal);
+}
+
+
+void *
+ec_shd_index_healer (void *data)
+{
+ struct subvol_healer *healer = NULL;
+ xlator_t *this = NULL;
+
+ healer = data;
+ THIS = this = healer->this;
+
+ for (;;) {
+ ec_shd_healer_wait (healer);
+
+ ASSERT_LOCAL(this, healer);
+
+ gf_msg_debug (this->name, 0,
+ "starting index sweep on subvol %s",
+ ec_subvol_name (this, healer->subvol));
+
+ ec_shd_index_sweep (healer);
+
+ gf_msg_debug (this->name, 0,
+ "finished index sweep on subvol %s",
+ ec_subvol_name (this, healer->subvol));
+ }
+
+ return NULL;
+}
+
+
+void *
+ec_shd_full_healer (void *data)
+{
+ struct subvol_healer *healer = NULL;
+ xlator_t *this = NULL;
+ loc_t rootloc = {0};
+
+ int run = 0;
+
+ healer = data;
+ THIS = this = healer->this;
+
+ rootloc.inode = this->itable->root;
+ for (;;) {
+ pthread_mutex_lock (&healer->mutex);
+ {
+ run = __ec_shd_healer_wait (healer);
+ if (!run)
+ healer->running = _gf_false;
+ }
+ pthread_mutex_unlock (&healer->mutex);
+
+ if (!run)
+ break;
+
+ ASSERT_LOCAL(this, healer);
+
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ EC_MSG_FULL_SWEEP_START,
+ "starting full sweep on subvol %s",
+ ec_subvol_name (this, healer->subvol));
+
+ ec_shd_selfheal (healer, healer->subvol, &rootloc);
+ ec_shd_full_sweep (healer, this->itable->root);
+
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ EC_MSG_FULL_SWEEP_STOP,
+ "finished full sweep on subvol %s",
+ ec_subvol_name (this, healer->subvol));
+ }
+
+ return NULL;
+}
+
+
+int
+ec_shd_healer_init (xlator_t *this, struct subvol_healer *healer)
+{
+ int ret = 0;
+
+ ret = pthread_mutex_init (&healer->mutex, NULL);
+ if (ret)
+ goto out;
+
+ ret = pthread_cond_init (&healer->cond, NULL);
+ if (ret)
+ goto out;
+
+ healer->this = this;
+ healer->running = _gf_false;
+ healer->rerun = _gf_false;
+ healer->local = _gf_false;
+out:
+ return ret;
+}
+
+
+int
+ec_shd_healer_spawn (xlator_t *this, struct subvol_healer *healer,
+ void *(threadfn)(void *))
+{
+ int ret = 0;
+
+ pthread_mutex_lock (&healer->mutex);
+ {
+ if (healer->running) {
+ pthread_cond_signal (&healer->cond);
+ } else {
+ ret = gf_thread_create (&healer->thread, NULL,
+ threadfn, healer);
+ if (ret)
+ goto unlock;
+ healer->running = 1;
+ }
+
+ healer->rerun = 1;
+ }
+unlock:
+ pthread_mutex_unlock (&healer->mutex);
+
+ return ret;
+}
+
+int
+ec_shd_full_healer_spawn (xlator_t *this, int subvol)
+{
+ return ec_shd_healer_spawn (this, NTH_FULL_HEALER (this, subvol),
+ ec_shd_full_healer);
+}
+
+
+int
+ec_shd_index_healer_spawn (xlator_t *this, int subvol)
+{
+ return ec_shd_healer_spawn (this, NTH_INDEX_HEALER (this, subvol),
+ ec_shd_index_healer);
+}
+
+void
+ec_selfheal_childup (ec_t *ec, int child)
+{
+ if (!ec->shd.iamshd)
+ return;
+ ec_shd_index_healer_spawn (ec->xl, child);
+}
+
+int
+ec_selfheal_daemon_init (xlator_t *this)
+{
+ ec_t *ec = NULL;
+ ec_self_heald_t *shd = NULL;
+ int ret = -1;
+ int i = 0;
+
+ ec = this->private;
+ shd = &ec->shd;
+
+ shd->index_healers = GF_CALLOC (sizeof(*shd->index_healers),
+ ec->nodes,
+ ec_mt_subvol_healer_t);
+ if (!shd->index_healers)
+ goto out;
+
+ for (i = 0; i < ec->nodes; i++) {
+ shd->index_healers[i].subvol = i;
+ ret = ec_shd_healer_init (this, &shd->index_healers[i]);
+ if (ret)
+ goto out;
+ }
+
+ shd->full_healers = GF_CALLOC (sizeof(*shd->full_healers),
+ ec->nodes,
+ ec_mt_subvol_healer_t);
+ if (!shd->full_healers)
+ goto out;
+
+ for (i = 0; i < ec->nodes; i++) {
+ shd->full_healers[i].subvol = i;
+ ret = ec_shd_healer_init (this, &shd->full_healers[i]);
+ if (ret)
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int
+ec_heal_op (xlator_t *this, dict_t *output, gf_xl_afr_op_t op, int xl_id)
+{
+ char key[64] = {0};
+ int op_ret = 0;
+ ec_t *ec = NULL;
+ int i = 0;
+ GF_UNUSED int ret = 0;
+
+ ec = this->private;
+
+ op_ret = -1;
+ for (i = 0; i < ec->nodes; i++) {
+ snprintf (key, sizeof (key), "%d-%d-status", xl_id, i);
+
+ if (((ec->xl_up >> i) & 1) == 0) {
+ ret = dict_set_str (output, key, "Brick is not connected");
+ } else if (!ec->up) {
+ ret = dict_set_str (output, key,
+ "Disperse subvolume is not up");
+ } else if (!ec_shd_is_subvol_local (this, i)) {
+ ret = dict_set_str (output, key, "Brick is remote");
+ } else {
+ ret = dict_set_str (output, key, "Started self-heal");
+ if (op == GF_SHD_OP_HEAL_FULL) {
+ ec_shd_full_healer_spawn (this, i);
+ } else if (op == GF_SHD_OP_HEAL_INDEX) {
+ ec_shd_index_healer_spawn (this, i);
+ }
+ op_ret = 0;
+ }
+ }
+ return op_ret;
+}
+
+int
+ec_xl_op (xlator_t *this, dict_t *input, dict_t *output)
+{
+ gf_xl_afr_op_t op = GF_SHD_OP_INVALID;
+ int ret = 0;
+ int xl_id = 0;
+
+ ret = dict_get_int32 (input, "xl-op", (int32_t *)&op);
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32 (input, this->name, &xl_id);
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32 (output, this->name, xl_id);
+ if (ret)
+ goto out;
+
+ switch (op) {
+ case GF_SHD_OP_HEAL_FULL:
+ ret = ec_heal_op (this, output, op, xl_id);
+ break;
+
+ case GF_SHD_OP_HEAL_INDEX:
+ ret = ec_heal_op (this, output, op, xl_id);
+ break;
+
+ default:
+ ret = -1;
+ break;
+ }
+out:
+ dict_del (output, this->name);
+ return ret;
+}
diff --git a/xlators/cluster/ec/src/ec-heald.h b/xlators/cluster/ec/src/ec-heald.h
new file mode 100644
index 00000000000..0f27a8ec776
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-heald.h
@@ -0,0 +1,47 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __EC_HEALD_H__
+#define __EC_HEALD_H__
+
+#include "xlator.h"
+
+struct _ec;
+typedef struct _ec ec_t;
+
+struct subvol_healer {
+ xlator_t *this;
+ int subvol;
+ gf_boolean_t local;
+ gf_boolean_t running;
+ gf_boolean_t rerun;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+ pthread_t thread;
+};
+
+struct _ec_self_heald;
+typedef struct _ec_self_heald ec_self_heald_t;
+
+struct _ec_self_heald {
+ gf_boolean_t iamshd;
+ gf_boolean_t enabled;
+ int timeout;
+ struct subvol_healer *index_healers;
+ struct subvol_healer *full_healers;
+};
+
+int
+ec_xl_op (xlator_t *this, dict_t *input, dict_t *output);
+
+int
+ec_selfheal_daemon_init (xlator_t *this);
+void ec_selfheal_childup (ec_t *ec, int child);
+#endif /* __EC_HEALD_H__ */
diff --git a/xlators/cluster/ec/src/ec-helpers.c b/xlators/cluster/ec/src/ec-helpers.c
new file mode 100644
index 00000000000..c8f904ac51d
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-helpers.c
@@ -0,0 +1,848 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <libgen.h>
+
+#include "byte-order.h"
+
+#include "ec-mem-types.h"
+#include "ec-fops.h"
+#include "ec-helpers.h"
+#include "ec-messages.h"
+
+#ifndef ffsll
+#define ffsll(x) __builtin_ffsll(x)
+#endif
+
+static const char * ec_fop_list[] =
+{
+ [-EC_FOP_HEAL] = "HEAL"
+};
+
+const char * ec_bin(char * str, size_t size, uint64_t value, int32_t digits)
+{
+ str += size;
+
+ if (size-- < 1)
+ {
+ goto failed;
+ }
+ *--str = 0;
+
+ while ((value != 0) || (digits > 0))
+ {
+ if (size-- < 1)
+ {
+ goto failed;
+ }
+ *--str = '0' + (value & 1);
+ digits--;
+ value >>= 1;
+ }
+
+ return str;
+
+failed:
+ return "<buffer too small>";
+}
+
+const char * ec_fop_name(int32_t id)
+{
+ if (id >= 0)
+ {
+ return gf_fop_list[id];
+ }
+
+ return ec_fop_list[-id];
+}
+
+void ec_trace(const char * event, ec_fop_data_t * fop, const char * fmt, ...)
+{
+ char str1[32], str2[32], str3[32];
+ char * msg;
+ ec_t * ec = fop->xl->private;
+ va_list args;
+ int32_t ret;
+
+ va_start(args, fmt);
+ ret = vasprintf(&msg, fmt, args);
+ va_end(args);
+
+ if (ret < 0)
+ {
+ msg = "<memory allocation error>";
+ }
+
+ gf_msg_trace ("ec", 0, "%s(%s) %p(%p) [refs=%d, winds=%d, jobs=%d] "
+ "frame=%p/%p, min/exp=%d/%d, err=%d state=%d "
+ "{%s:%s:%s} %s",
+ event, ec_fop_name(fop->id), fop, fop->parent, fop->refs,
+ fop->winds, fop->jobs, fop->req_frame, fop->frame, fop->minimum,
+ fop->expected, fop->error, fop->state,
+ ec_bin(str1, sizeof(str1), fop->mask, ec->nodes),
+ ec_bin(str2, sizeof(str2), fop->remaining, ec->nodes),
+ ec_bin(str3, sizeof(str3), fop->good, ec->nodes), msg);
+
+ if (ret >= 0)
+ {
+ free(msg);
+ }
+}
+
+int32_t ec_bits_count(uint64_t n)
+{
+ n -= (n >> 1) & 0x5555555555555555ULL;
+ n = ((n >> 2) & 0x3333333333333333ULL) + (n & 0x3333333333333333ULL);
+ n = (n + (n >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
+ n += n >> 8;
+ n += n >> 16;
+ n += n >> 32;
+
+ return n & 0xFF;
+}
+
+int32_t ec_bits_index(uint64_t n)
+{
+ return ffsll(n) - 1;
+}
+
+int32_t ec_bits_consume(uint64_t * n)
+{
+ uint64_t tmp;
+
+ tmp = *n;
+ tmp &= -tmp;
+ *n ^= tmp;
+
+ return ffsll(tmp) - 1;
+}
+
+size_t ec_iov_copy_to(void * dst, struct iovec * vector, int32_t count,
+ off_t offset, size_t size)
+{
+ int32_t i = 0;
+ size_t total = 0, len = 0;
+
+ while (i < count)
+ {
+ if (offset < vector[i].iov_len)
+ {
+ while ((i < count) && (size > 0))
+ {
+ len = size;
+ if (len > vector[i].iov_len - offset)
+ {
+ len = vector[i].iov_len - offset;
+ }
+ memcpy(dst, vector[i++].iov_base + offset, len);
+ offset = 0;
+ dst += len;
+ total += len;
+ size -= len;
+ }
+
+ break;
+ }
+
+ offset -= vector[i].iov_len;
+ i++;
+ }
+
+ return total;
+}
+
+int32_t ec_dict_set_array(dict_t *dict, char *key, uint64_t value[],
+ int32_t size)
+{
+ int ret = -1;
+ uint64_t *ptr = NULL;
+ int32_t vindex;
+
+ if (value == NULL) {
+ return -EINVAL;
+ }
+
+ ptr = GF_MALLOC(sizeof(uint64_t) * size, gf_common_mt_char);
+ if (ptr == NULL) {
+ return -ENOMEM;
+ }
+ for (vindex = 0; vindex < size; vindex++) {
+ ptr[vindex] = hton64(value[vindex]);
+ }
+ ret = dict_set_bin(dict, key, ptr, sizeof(uint64_t) * size);
+ if (ret)
+ GF_FREE (ptr);
+ return ret;
+}
+
+
+int32_t ec_dict_del_array(dict_t *dict, char *key, uint64_t value[],
+ int32_t size)
+{
+ void *ptr;
+ int32_t len;
+ int32_t vindex;
+ int32_t old_size = 0;
+ int32_t err;
+
+ if (dict == NULL) {
+ return -EINVAL;
+ }
+ err = dict_get_ptr_and_len(dict, key, &ptr, &len);
+ if (err != 0) {
+ return err;
+ }
+
+ if (len > (size * sizeof(uint64_t)) || (len % sizeof (uint64_t))) {
+ return -EINVAL;
+ }
+
+ memset (value, 0, size * sizeof(uint64_t));
+ /* 3.6 version ec would have stored version in 64 bit. In that case treat
+ * metadata versions same as data*/
+ old_size = min (size, len/sizeof(uint64_t));
+ for (vindex = 0; vindex < old_size; vindex++) {
+ value[vindex] = ntoh64(*((uint64_t *)ptr + vindex));
+ }
+
+ if (old_size < size) {
+ for (vindex = old_size; vindex < size; vindex++) {
+ value[vindex] = value[old_size-1];
+ }
+ }
+
+ dict_del(dict, key);
+
+ return 0;
+}
+
+
+int32_t ec_dict_set_number(dict_t * dict, char * key, uint64_t value)
+{
+ int ret = -1;
+ uint64_t * ptr;
+
+ ptr = GF_MALLOC(sizeof(value), gf_common_mt_char);
+ if (ptr == NULL) {
+ return -ENOMEM;
+ }
+
+ *ptr = hton64(value);
+
+ ret = dict_set_bin(dict, key, ptr, sizeof(value));
+ if (ret)
+ GF_FREE (ptr);
+
+ return ret;
+}
+
+int32_t ec_dict_del_number(dict_t * dict, char * key, uint64_t * value)
+{
+ void * ptr;
+ int32_t len, err;
+
+ if (dict == NULL) {
+ return -EINVAL;
+ }
+ err = dict_get_ptr_and_len(dict, key, &ptr, &len);
+ if (err != 0) {
+ return err;
+ }
+ if (len != sizeof(uint64_t)) {
+ return -EINVAL;
+ }
+
+ *value = ntoh64(*(uint64_t *)ptr);
+
+ dict_del(dict, key);
+
+ return 0;
+}
+
+int32_t ec_dict_set_config(dict_t * dict, char * key, ec_config_t * config)
+{
+ int ret = -1;
+ uint64_t * ptr, data;
+
+ if (config->version > EC_CONFIG_VERSION)
+ {
+ gf_msg ("ec", GF_LOG_ERROR, EINVAL,
+ EC_MSG_UNSUPPORTED_VERSION,
+ "Trying to store an unsupported config "
+ "version (%u)", config->version);
+
+ return -EINVAL;
+ }
+
+ ptr = GF_MALLOC(sizeof(uint64_t), gf_common_mt_char);
+ if (ptr == NULL)
+ {
+ return -ENOMEM;
+ }
+
+ data = ((uint64_t)config->version) << 56;
+ data |= ((uint64_t)config->algorithm) << 48;
+ data |= ((uint64_t)config->gf_word_size) << 40;
+ data |= ((uint64_t)config->bricks) << 32;
+ data |= ((uint64_t)config->redundancy) << 24;
+ data |= config->chunk_size;
+
+ *ptr = hton64(data);
+
+ ret = dict_set_bin(dict, key, ptr, sizeof(uint64_t));
+ if (ret)
+ GF_FREE (ptr);
+
+ return ret;
+}
+
+int32_t ec_dict_del_config(dict_t * dict, char * key, ec_config_t * config)
+{
+ void * ptr;
+ uint64_t data;
+ int32_t len, err;
+
+ if (dict == NULL) {
+ return -EINVAL;
+ }
+ err = dict_get_ptr_and_len(dict, key, &ptr, &len);
+ if (err != 0) {
+ return err;
+ }
+ if (len != sizeof(uint64_t)) {
+ return -EINVAL;
+ }
+
+ data = ntoh64(*(uint64_t *)ptr);
+ /* Currently we need to get the config xattr for entries of type IA_INVAL.
+ * These entries can later become IA_DIR entries (after inode_link()),
+ * which don't have a config xattr. However, since the xattr is requested
+ * using an xattrop() fop, it will always return a config full of 0's
+ * instead of saying that it doesn't exist.
+ *
+ * We need to filter out this case and consider that a config xattr == 0 is
+ * the same than a non-existant xattr. Otherwise ec_config_check() will
+ * fail.
+ */
+ if (data == 0) {
+ return -ENODATA;
+ }
+
+ config->version = (data >> 56) & 0xff;
+ if (config->version > EC_CONFIG_VERSION)
+ {
+ gf_msg ("ec", GF_LOG_ERROR, EINVAL,
+ EC_MSG_UNSUPPORTED_VERSION,
+ "Found an unsupported config version (%u)",
+ config->version);
+
+ return -EINVAL;
+ }
+
+ config->algorithm = (data >> 48) & 0xff;
+ config->gf_word_size = (data >> 40) & 0xff;
+ config->bricks = (data >> 32) & 0xff;
+ config->redundancy = (data >> 24) & 0xff;
+ config->chunk_size = data & 0xffffff;
+
+ dict_del(dict, key);
+
+ return 0;
+}
+
+gf_boolean_t ec_loc_gfid_check(xlator_t *xl, uuid_t dst, uuid_t src)
+{
+ if (gf_uuid_is_null(src)) {
+ return _gf_true;
+ }
+
+ if (gf_uuid_is_null(dst)) {
+ gf_uuid_copy(dst, src);
+
+ return _gf_true;
+ }
+
+ if (gf_uuid_compare(dst, src) != 0) {
+ gf_msg (xl->name, GF_LOG_WARNING, 0,
+ EC_MSG_GFID_MISMATCH,
+ "Mismatching GFID's in loc");
+
+ return _gf_false;
+ }
+
+ return _gf_true;
+}
+
+int32_t ec_loc_setup_inode(xlator_t *xl, inode_table_t *table, loc_t *loc)
+{
+ int32_t ret = -EINVAL;
+
+ if (loc->inode != NULL) {
+ if (!ec_loc_gfid_check(xl, loc->gfid, loc->inode->gfid)) {
+ goto out;
+ }
+ } else if (table != NULL) {
+ if (!gf_uuid_is_null(loc->gfid)) {
+ loc->inode = inode_find(table, loc->gfid);
+ } else if (loc->path && strchr (loc->path, '/')) {
+ loc->inode = inode_resolve(table, (char *)loc->path);
+ }
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int32_t ec_loc_setup_parent(xlator_t *xl, inode_table_t *table, loc_t *loc)
+{
+ char *path, *parent;
+ int32_t ret = -EINVAL;
+
+ if (loc->parent != NULL) {
+ if (!ec_loc_gfid_check(xl, loc->pargfid, loc->parent->gfid)) {
+ goto out;
+ }
+ } else if (table != NULL) {
+ if (!gf_uuid_is_null(loc->pargfid)) {
+ loc->parent = inode_find(table, loc->pargfid);
+ } else if (loc->path && strchr (loc->path, '/')) {
+ path = gf_strdup(loc->path);
+ if (path == NULL) {
+ gf_msg (xl->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_NO_MEMORY,
+ "Unable to duplicate path '%s'",
+ loc->path);
+
+ ret = -ENOMEM;
+
+ goto out;
+ }
+ parent = dirname(path);
+ loc->parent = inode_resolve(table, parent);
+ if (loc->parent != NULL) {
+ gf_uuid_copy(loc->pargfid, loc->parent->gfid);
+ }
+ GF_FREE(path);
+ }
+ }
+
+ /* If 'pargfid' has not been determined, clear 'name' to avoid resolutions
+ based on <gfid:pargfid>/name. */
+ if (gf_uuid_is_null(loc->pargfid)) {
+ loc->name = NULL;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int32_t ec_loc_setup_path(xlator_t *xl, loc_t *loc)
+{
+ uuid_t root = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+ char *name;
+ int32_t ret = -EINVAL;
+
+ if (loc->path != NULL) {
+ name = strrchr(loc->path, '/');
+ if (name == NULL) {
+ /* Allow gfid paths: <gfid:...> */
+ if (strncmp(loc->path, "<gfid:", 6) == 0) {
+ ret = 0;
+ }
+ goto out;
+ }
+ if (name == loc->path) {
+ if (name[1] == 0) {
+ if (!ec_loc_gfid_check(xl, loc->gfid, root)) {
+ goto out;
+ }
+ } else {
+ if (!ec_loc_gfid_check(xl, loc->pargfid, root)) {
+ goto out;
+ }
+ }
+ }
+ name++;
+
+ if (loc->name != NULL) {
+ if (strcmp(loc->name, name) != 0) {
+ gf_msg (xl->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_INVALID_LOC_NAME,
+ "Invalid name '%s' in loc",
+ loc->name);
+
+ goto out;
+ }
+ } else {
+ loc->name = name;
+ }
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int32_t ec_loc_parent(xlator_t *xl, loc_t *loc, loc_t *parent)
+{
+ inode_table_t *table = NULL;
+ char *str = NULL;
+ int32_t ret = -ENOMEM;
+
+ memset(parent, 0, sizeof(loc_t));
+
+ if (loc->parent != NULL) {
+ table = loc->parent->table;
+ parent->inode = inode_ref(loc->parent);
+ } else if (loc->inode != NULL) {
+ table = loc->inode->table;
+ }
+ if (!gf_uuid_is_null(loc->pargfid)) {
+ gf_uuid_copy(parent->gfid, loc->pargfid);
+ }
+ if (loc->path && strchr (loc->path, '/')) {
+ str = gf_strdup(loc->path);
+ if (str == NULL) {
+ gf_msg (xl->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_NO_MEMORY,
+ "Unable to duplicate path '%s'",
+ loc->path);
+
+ goto out;
+ }
+ parent->path = gf_strdup(dirname(str));
+ if (parent->path == NULL) {
+ gf_msg (xl->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_NO_MEMORY,
+ "Unable to duplicate path '%s'",
+ dirname(str));
+
+ goto out;
+ }
+ }
+
+ ret = ec_loc_setup_path(xl, parent);
+ if (ret == 0) {
+ ret = ec_loc_setup_inode(xl, table, parent);
+ }
+ if (ret == 0) {
+ ret = ec_loc_setup_parent(xl, table, parent);
+ }
+ if (ret != 0) {
+ goto out;
+ }
+
+ if ((parent->inode == NULL) && (parent->path == NULL) &&
+ gf_uuid_is_null(parent->gfid)) {
+ gf_msg (xl->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_LOC_PARENT_INODE_MISSING,
+ "Parent inode missing for loc_t");
+
+ ret = -EINVAL;
+
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ GF_FREE(str);
+
+ if (ret != 0) {
+ loc_wipe(parent);
+ }
+
+ return ret;
+}
+
+int32_t ec_loc_update(xlator_t *xl, loc_t *loc, inode_t *inode,
+ struct iatt *iatt)
+{
+ inode_table_t *table = NULL;
+ int32_t ret = -EINVAL;
+
+ if (inode != NULL) {
+ table = inode->table;
+ if (loc->inode != inode) {
+ if (loc->inode != NULL) {
+ inode_unref(loc->inode);
+ }
+ loc->inode = inode_ref(inode);
+ gf_uuid_copy(loc->gfid, inode->gfid);
+ }
+ } else if (loc->inode != NULL) {
+ table = loc->inode->table;
+ } else if (loc->parent != NULL) {
+ table = loc->parent->table;
+ }
+
+ if (iatt != NULL) {
+ if (!ec_loc_gfid_check(xl, loc->gfid, iatt->ia_gfid)) {
+ goto out;
+ }
+ }
+
+ ret = ec_loc_setup_path(xl, loc);
+ if (ret == 0) {
+ ret = ec_loc_setup_inode(xl, table, loc);
+ }
+ if (ret == 0) {
+ ret = ec_loc_setup_parent(xl, table, loc);
+ }
+ if (ret != 0) {
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+int32_t ec_loc_from_fd(xlator_t * xl, loc_t * loc, fd_t * fd)
+{
+ ec_fd_t * ctx;
+ int32_t ret = -ENOMEM;
+
+ memset(loc, 0, sizeof(*loc));
+
+ ctx = ec_fd_get(fd, xl);
+ if (ctx != NULL) {
+ if (loc_copy(loc, &ctx->loc) != 0) {
+ goto out;
+ }
+ }
+
+ ret = ec_loc_update(xl, loc, fd->inode, NULL);
+ if (ret != 0) {
+ goto out;
+ }
+
+out:
+ if (ret != 0) {
+ loc_wipe(loc);
+ }
+
+ return ret;
+}
+
+int32_t ec_loc_from_loc(xlator_t * xl, loc_t * dst, loc_t * src)
+{
+ int32_t ret = -ENOMEM;
+
+ memset(dst, 0, sizeof(*dst));
+
+ if (loc_copy(dst, src) != 0) {
+ goto out;
+ }
+
+ ret = ec_loc_update(xl, dst, NULL, NULL);
+ if (ret != 0) {
+ goto out;
+ }
+
+out:
+ if (ret != 0) {
+ loc_wipe(dst);
+ }
+
+ return ret;
+}
+
+void ec_owner_set(call_frame_t * frame, void * owner)
+{
+ set_lk_owner_from_ptr(&frame->root->lk_owner, owner);
+}
+
+void ec_owner_copy(call_frame_t * frame, gf_lkowner_t * owner)
+{
+ frame->root->lk_owner.len = owner->len;
+ memcpy(frame->root->lk_owner.data, owner->data, owner->len);
+}
+
+ec_inode_t * __ec_inode_get(inode_t * inode, xlator_t * xl)
+{
+ ec_inode_t * ctx = NULL;
+ uint64_t value = 0;
+
+ if ((__inode_ctx_get(inode, xl, &value) != 0) || (value == 0))
+ {
+ ctx = GF_MALLOC(sizeof(*ctx), ec_mt_ec_inode_t);
+ if (ctx != NULL)
+ {
+ memset(ctx, 0, sizeof(*ctx));
+ INIT_LIST_HEAD(&ctx->heal);
+
+ value = (uint64_t)(uintptr_t)ctx;
+ if (__inode_ctx_set(inode, xl, &value) != 0)
+ {
+ GF_FREE(ctx);
+
+ return NULL;
+ }
+ }
+ }
+ else
+ {
+ ctx = (ec_inode_t *)(uintptr_t)value;
+ }
+
+ return ctx;
+}
+
+ec_inode_t * ec_inode_get(inode_t * inode, xlator_t * xl)
+{
+ ec_inode_t * ctx = NULL;
+
+ LOCK(&inode->lock);
+
+ ctx = __ec_inode_get(inode, xl);
+
+ UNLOCK(&inode->lock);
+
+ return ctx;
+}
+
+ec_fd_t * __ec_fd_get(fd_t * fd, xlator_t * xl)
+{
+ ec_fd_t * ctx = NULL;
+ uint64_t value = 0;
+
+ if ((__fd_ctx_get(fd, xl, &value) != 0) || (value == 0))
+ {
+ ctx = GF_MALLOC(sizeof(*ctx), ec_mt_ec_fd_t);
+ if (ctx != NULL)
+ {
+ memset(ctx, 0, sizeof(*ctx));
+
+ value = (uint64_t)(uintptr_t)ctx;
+ if (__fd_ctx_set(fd, xl, value) != 0)
+ {
+ GF_FREE(ctx);
+
+ return NULL;
+ }
+ }
+ }
+ else
+ {
+ ctx = (ec_fd_t *)(uintptr_t)value;
+ }
+
+ /* Treat anonymous fd specially */
+ if (fd->anonymous) {
+ /* Mark the fd open for all subvolumes. */
+ ctx->open = -1;
+ /* Try to populate ctx->loc with fd->inode information. */
+ ec_loc_update(xl, &ctx->loc, fd->inode, NULL);
+ }
+
+ return ctx;
+}
+
+ec_fd_t * ec_fd_get(fd_t * fd, xlator_t * xl)
+{
+ ec_fd_t * ctx = NULL;
+
+ LOCK(&fd->lock);
+
+ ctx = __ec_fd_get(fd, xl);
+
+ UNLOCK(&fd->lock);
+
+ return ctx;
+}
+
+uint32_t ec_adjust_offset(ec_t * ec, off_t * offset, int32_t scale)
+{
+ off_t head, tmp;
+
+ tmp = *offset;
+ head = tmp % ec->stripe_size;
+ tmp -= head;
+ if (scale)
+ {
+ tmp /= ec->fragments;
+ }
+
+ *offset = tmp;
+
+ return head;
+}
+
+uint64_t ec_adjust_size(ec_t * ec, uint64_t size, int32_t scale)
+{
+ size += ec->stripe_size - 1;
+ size -= size % ec->stripe_size;
+ if (scale)
+ {
+ size /= ec->fragments;
+ }
+
+ return size;
+}
+
+gf_boolean_t
+ec_is_internal_xattr (dict_t *dict, char *key, data_t *value, void *data)
+{
+ if (key &&
+ (strncmp (key, EC_XATTR_PREFIX, strlen (EC_XATTR_PREFIX)) == 0))
+ return _gf_true;
+
+ return _gf_false;
+}
+
+void
+ec_filter_internal_xattrs (dict_t *xattr)
+{
+ dict_foreach_match (xattr, ec_is_internal_xattr, NULL,
+ dict_remove_foreach_fn, NULL);
+}
+
+gf_boolean_t
+ec_is_data_fop (glusterfs_fop_t fop)
+{
+ switch (fop) {
+ case GF_FOP_WRITE:
+ case GF_FOP_TRUNCATE:
+ case GF_FOP_FTRUNCATE:
+ case GF_FOP_FALLOCATE:
+ case GF_FOP_DISCARD:
+ case GF_FOP_ZEROFILL:
+ return _gf_true;
+ default:
+ return _gf_false;
+ }
+ return _gf_false;
+}
+/*
+gf_boolean_t
+ec_is_metadata_fop (int32_t lock_kind, glusterfs_fop_t fop)
+{
+ if (lock_kind == EC_LOCK_ENTRY) {
+ return _gf_false;
+ }
+
+ switch (fop) {
+ case GF_FOP_SETATTR:
+ case GF_FOP_FSETATTR:
+ case GF_FOP_SETXATTR:
+ case GF_FOP_FSETXATTR:
+ case GF_FOP_REMOVEXATTR:
+ case GF_FOP_FREMOVEXATTR:
+ return _gf_true;
+ default:
+ return _gf_false;
+ }
+ return _gf_false;
+}*/
diff --git a/xlators/cluster/ec/src/ec-helpers.h b/xlators/cluster/ec/src/ec-helpers.h
new file mode 100644
index 00000000000..1f39da2c09f
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-helpers.h
@@ -0,0 +1,72 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __EC_HELPERS_H__
+#define __EC_HELPERS_H__
+
+#include "ec-data.h"
+
+const char * ec_bin(char * str, size_t size, uint64_t value, int32_t digits);
+const char * ec_fop_name(int32_t id);
+void ec_trace(const char * event, ec_fop_data_t * fop, const char * fmt, ...);
+int32_t ec_bits_count(uint64_t n);
+int32_t ec_bits_index(uint64_t n);
+int32_t ec_bits_consume(uint64_t * n);
+size_t ec_iov_copy_to(void * dst, struct iovec * vector, int32_t count,
+ off_t offset, size_t size);
+
+int32_t ec_dict_set_array(dict_t *dict, char *key,
+ uint64_t *value, int32_t size);
+int32_t ec_dict_del_array(dict_t *dict, char *key,
+ uint64_t *value, int32_t size);
+int32_t ec_dict_set_number(dict_t * dict, char * key, uint64_t value);
+int32_t ec_dict_del_number(dict_t * dict, char * key, uint64_t * value);
+int32_t ec_dict_set_config(dict_t * dict, char * key, ec_config_t * config);
+int32_t ec_dict_del_config(dict_t * dict, char * key, ec_config_t * config);
+
+int32_t ec_loc_parent(xlator_t *xl, loc_t *loc, loc_t *parent);
+int32_t ec_loc_update(xlator_t *xl, loc_t *loc, inode_t *inode,
+ struct iatt *iatt);
+
+int32_t ec_loc_from_fd(xlator_t * xl, loc_t * loc, fd_t * fd);
+int32_t ec_loc_from_loc(xlator_t * xl, loc_t * dst, loc_t * src);
+
+void ec_owner_set(call_frame_t * frame, void * owner);
+void ec_owner_copy(call_frame_t * frame, gf_lkowner_t * owner);
+
+ec_inode_t * __ec_inode_get(inode_t * inode, xlator_t * xl);
+ec_inode_t * ec_inode_get(inode_t * inode, xlator_t * xl);
+ec_fd_t * __ec_fd_get(fd_t * fd, xlator_t * xl);
+ec_fd_t * ec_fd_get(fd_t * fd, xlator_t * xl);
+
+uint32_t ec_adjust_offset(ec_t * ec, off_t * offset, int32_t scale);
+uint64_t ec_adjust_size(ec_t * ec, uint64_t size, int32_t scale);
+
+static inline int32_t ec_is_power_of_2(uint32_t value)
+{
+ return (value != 0) && ((value & (value - 1)) == 0);
+}
+
+gf_boolean_t
+ec_is_internal_xattr (dict_t *dict, char *key, data_t *value, void *data);
+
+void
+ec_filter_internal_xattrs (dict_t *xattr);
+
+gf_boolean_t
+ec_is_data_fop (glusterfs_fop_t fop);
+
+int32_t
+ec_launch_replace_heal (ec_t *ec);
+/*
+gf_boolean_t
+ec_is_metadata_fop (glusterfs_fop_t fop);
+*/
+#endif /* __EC_HELPERS_H__ */
diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c
new file mode 100644
index 00000000000..c3d9c879eb7
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-inode-read.c
@@ -0,0 +1,2046 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include "ec-helpers.h"
+#include "ec-common.h"
+#include "ec-combine.h"
+#include "ec-method.h"
+#include "ec-fops.h"
+#include "ec-messages.h"
+
+/* FOP: access */
+
+int32_t ec_access_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
+ int32_t op_ret, int32_t op_errno, dict_t * xdata)
+{
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
+ frame, op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate (frame, this, fop, GF_FOP_ACCESS,
+ idx, op_ret, op_errno);
+ if (cbk) {
+ if (xdata)
+ cbk->xdata = dict_ref (xdata);
+ ec_combine (cbk, NULL);
+ }
+
+out:
+ if (fop != NULL)
+ {
+ ec_complete (fop);
+ }
+
+ return 0;
+}
+
+void ec_wind_access(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_access_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->access,
+ &fop->loc[0], fop->int32, fop->xdata);
+}
+
+int32_t
+ec_manager_access(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk = NULL;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ case EC_STATE_LOCK:
+ ec_lock_prepare_inode (fop, &fop->loc[0], EC_QUERY_INFO);
+ ec_lock (fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_one (fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ if (ec_dispatch_one_retry(fop, NULL)) {
+ return EC_STATE_DISPATCH;
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+ GF_ASSERT (cbk);
+ if (fop->cbks.access != NULL) {
+ if (cbk) {
+ fop->cbks.access(fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno,
+ cbk->xdata);
+ }
+ }
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ if (fop->cbks.access != NULL) {
+ fop->cbks.access(fop->req_frame, fop, fop->xl, -1,
+ fop->error, NULL);
+ }
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s",
+ state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void ec_access(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_access_cbk_t func, void * data,
+ loc_t * loc, int32_t mask, dict_t * xdata)
+{
+ ec_cbk_t callback = { .access = func };
+ ec_fop_data_t * fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(ACCESS) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_ACCESS, EC_FLAG_LOCK_SHARED,
+ target, minimum, ec_wind_access,
+ ec_manager_access, callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->int32 = mask;
+
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL);
+ }
+}
+
+/* FOP: getxattr */
+
+int32_t ec_combine_getxattr(ec_fop_data_t * fop, ec_cbk_data_t * dst,
+ ec_cbk_data_t * src)
+{
+ if (!ec_dict_compare(dst->dict, src->dict))
+ {
+ gf_msg (fop->xl->name, GF_LOG_NOTICE, 0,
+ EC_MSG_DICT_MISMATCH, "Mismatching dictionary in "
+ "answers of 'GF_FOP_GETXATTR'");
+
+ return 0;
+ }
+
+ return 1;
+}
+
+int32_t ec_getxattr_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
+ int32_t op_ret, int32_t op_errno, dict_t * dict,
+ dict_t * xdata)
+{
+ ec_fop_data_t * fop = NULL;
+ ec_cbk_data_t * cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
+ frame, op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_GETXATTR, idx, op_ret,
+ op_errno);
+ if (cbk != NULL)
+ {
+ if (op_ret >= 0)
+ {
+ if (dict != NULL)
+ {
+ cbk->dict = dict_ref(dict);
+ if (cbk->dict == NULL)
+ {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+ }
+ if (xdata != NULL)
+ {
+ cbk->xdata = dict_ref(xdata);
+ if (cbk->xdata == NULL)
+ {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ ec_combine(cbk, ec_combine_getxattr);
+ }
+
+out:
+ if (fop != NULL)
+ {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void ec_wind_getxattr(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_getxattr_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->getxattr,
+ &fop->loc[0], fop->str[0], fop->xdata);
+}
+
+void
+ec_handle_special_xattrs (ec_fop_data_t *fop)
+{
+ ec_cbk_data_t *cbk = NULL;
+ /* Stime may not be available on all the bricks, so even if some of the
+ * subvols succeed the operation, treat it as answer.*/
+ if (fop->str[0] &&
+ fnmatch (GF_XATTR_STIME_PATTERN, fop->str[0], 0) == 0) {
+ if (!fop->answer || (fop->answer->op_ret < 0)) {
+ list_for_each_entry (cbk, &fop->cbk_list, list) {
+ if (cbk->op_ret >= 0) {
+ fop->answer = cbk;
+ break;
+ }
+ }
+ }
+ }
+}
+
+int32_t ec_manager_getxattr(ec_fop_data_t * fop, int32_t state)
+{
+ ec_cbk_data_t * cbk;
+
+ switch (state)
+ {
+ case EC_STATE_INIT:
+ case EC_STATE_LOCK:
+ /* clear-locks commands must be done without any locks acquired
+ to avoid interferences. */
+ if ((fop->str[0] == NULL) ||
+ (strncmp(fop->str[0], GF_XATTR_CLRLK_CMD,
+ strlen(GF_XATTR_CLRLK_CMD)) != 0)) {
+ if (fop->fd == NULL) {
+ ec_lock_prepare_inode(fop, &fop->loc[0], EC_QUERY_INFO);
+ } else {
+ ec_lock_prepare_fd(fop, fop->fd, EC_QUERY_INFO);
+ }
+ ec_lock(fop);
+ }
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ ec_handle_special_xattrs (fop);
+ cbk = ec_fop_prepare_answer(fop, _gf_true);
+ if (cbk != NULL) {
+ int32_t err;
+
+ err = ec_dict_combine(cbk, EC_COMBINE_DICT);
+ if (!ec_cbk_set_error(cbk, -err, _gf_true)) {
+ if (cbk->xdata != NULL)
+ ec_filter_internal_xattrs (cbk->xdata);
+
+ if (cbk->dict != NULL)
+ ec_filter_internal_xattrs (cbk->dict);
+ }
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.getxattr != NULL)
+ {
+ fop->cbks.getxattr(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, cbk->dict, cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.getxattr != NULL)
+ {
+ fop->cbks.getxattr(fop->req_frame, fop, fop->xl, -1,
+ fop->error, NULL, NULL);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s",
+ state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+int32_t ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl,
+ int32_t op_ret, int32_t op_errno, uintptr_t mask,
+ uintptr_t good, uintptr_t bad, dict_t *xdata)
+{
+ ec_fop_data_t *fop = cookie;
+ fop_getxattr_cbk_t func = fop->data;
+ ec_t *ec = xl->private;
+ dict_t *dict = NULL;
+ char *str;
+ char bin1[65], bin2[65];
+
+ if (op_ret >= 0) {
+ dict = dict_new();
+ if (dict == NULL) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ } else {
+ if (gf_asprintf(&str, "Good: %s, Bad: %s",
+ ec_bin(bin1, sizeof(bin1), good, ec->nodes),
+ ec_bin(bin2, sizeof(bin2), mask & ~(good | bad),
+ ec->nodes)) < 0) {
+ dict_unref(dict);
+ dict = NULL;
+
+ op_ret = -1;
+ op_errno = ENOMEM;
+
+ goto out;
+ }
+
+ if (dict_set_dynstr(dict, EC_XATTR_HEAL, str) != 0) {
+ GF_FREE(str);
+ dict_unref(dict);
+ dict = NULL;
+
+ op_ret = -1;
+ op_errno = ENOMEM;
+
+ goto out;
+ }
+ }
+ }
+
+out:
+ func(frame, NULL, xl, op_ret, op_errno, dict, NULL);
+
+ if (dict != NULL) {
+ dict_unref(dict);
+ }
+
+ return 0;
+}
+
+void
+ec_getxattr (call_frame_t *frame, xlator_t *this, uintptr_t target,
+ int32_t minimum, fop_getxattr_cbk_t func, void *data,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ ec_cbk_t callback = { .getxattr = func };
+ ec_fop_data_t * fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(GETXATTR) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ /* Special handling of an explicit self-heal request */
+ if ((name != NULL) && (strcmp(name, EC_XATTR_HEAL) == 0)) {
+ ec_heal(frame, this, target, EC_MINIMUM_ONE, ec_getxattr_heal_cbk,
+ func, loc, 0, NULL);
+
+ return;
+ }
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_GETXATTR,
+ EC_FLAG_LOCK_SHARED, target, minimum,
+ ec_wind_getxattr, ec_manager_getxattr, callback,
+ data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (name != NULL) {
+ fop->str[0] = gf_strdup(name);
+ if (fop->str[0] == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_NO_MEMORY,
+ "Failed to duplicate a string.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager (fop, error);
+ } else {
+ func (frame, NULL, this, -1, error, NULL, NULL);
+ }
+}
+
+/* FOP: fgetxattr */
+
+int32_t ec_fgetxattr_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
+ int32_t op_ret, int32_t op_errno, dict_t * dict,
+ dict_t * xdata)
+{
+ ec_fop_data_t * fop = NULL;
+ ec_cbk_data_t * cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
+ frame, op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_FGETXATTR, idx, op_ret,
+ op_errno);
+ if (cbk != NULL)
+ {
+ if (op_ret >= 0)
+ {
+ if (dict != NULL)
+ {
+ cbk->dict = dict_ref(dict);
+ if (cbk->dict == NULL)
+ {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+ }
+ if (xdata != NULL)
+ {
+ cbk->xdata = dict_ref(xdata);
+ if (cbk->xdata == NULL)
+ {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ ec_combine(cbk, ec_combine_getxattr);
+ }
+
+out:
+ if (fop != NULL)
+ {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void
+ec_wind_fgetxattr (ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_fgetxattr_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->fgetxattr,
+ fop->fd, fop->str[0], fop->xdata);
+}
+
+void
+ec_fgetxattr (call_frame_t *frame, xlator_t *this, uintptr_t target,
+ int32_t minimum, fop_fgetxattr_cbk_t func, void *data,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ ec_cbk_t callback = { .fgetxattr = func };
+ ec_fop_data_t * fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(FGETXATTR) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FGETXATTR,
+ EC_FLAG_LOCK_SHARED, target, minimum,
+ ec_wind_fgetxattr, ec_manager_getxattr,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->use_fd = 1;
+
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ if (name != NULL) {
+ fop->str[0] = gf_strdup(name);
+ if (fop->str[0] == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_NO_MEMORY, "Failed to duplicate a string.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL, "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager (fop, error);
+ } else {
+ func (frame, NULL, this, -1, error, NULL, NULL);
+ }
+}
+
+/* FOP: open */
+
+int32_t ec_combine_open(ec_fop_data_t * fop, ec_cbk_data_t * dst,
+ ec_cbk_data_t * src)
+{
+ if (dst->fd != src->fd)
+ {
+ gf_msg (fop->xl->name, GF_LOG_NOTICE, 0,
+ EC_MSG_FD_MISMATCH, "Mismatching fd in answers "
+ "of 'GF_FOP_OPEN': %p <-> %p",
+ dst->fd, src->fd);
+
+ return 0;
+ }
+
+ return 1;
+}
+
+int32_t ec_open_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
+ int32_t op_ret, int32_t op_errno, fd_t * fd,
+ dict_t * xdata)
+{
+ ec_fop_data_t * fop = NULL;
+ ec_cbk_data_t * cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
+ frame, op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_OPEN, idx, op_ret,
+ op_errno);
+ if (cbk != NULL)
+ {
+ if (op_ret >= 0)
+ {
+ if (fd != NULL)
+ {
+ cbk->fd = fd_ref(fd);
+ if (cbk->fd == NULL)
+ {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_FILE_DESC_REF_FAIL, "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ }
+ if (xdata != NULL)
+ {
+ cbk->xdata = dict_ref(xdata);
+ if (cbk->xdata == NULL)
+ {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL, "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ ec_combine(cbk, ec_combine_open);
+ }
+
+out:
+ if (fop != NULL)
+ {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void ec_wind_open(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_open_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->open,
+ &fop->loc[0], fop->int32, fop->fd, fop->xdata);
+}
+
+int32_t ec_open_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ ec_fop_data_t *fop = cookie;
+ int32_t error = 0;
+
+ fop = fop->data;
+ if (op_ret >= 0) {
+ fop->answer->iatt[0] = *postbuf;
+ } else {
+ error = op_errno;
+ }
+
+ ec_resume(fop, error);
+
+ return 0;
+}
+
+int32_t ec_manager_open(ec_fop_data_t * fop, int32_t state)
+{
+ ec_cbk_data_t * cbk;
+ ec_fd_t * ctx;
+ int32_t err;
+
+ switch (state)
+ {
+ case EC_STATE_INIT:
+ LOCK(&fop->fd->lock);
+
+ ctx = __ec_fd_get(fop->fd, fop->xl);
+ if (ctx == NULL) {
+ UNLOCK(&fop->fd->lock);
+
+ fop->error = ENOMEM;
+
+ return EC_STATE_REPORT;
+ }
+ err = ec_loc_from_loc(fop->xl, &ctx->loc, &fop->loc[0]);
+ if (err != 0) {
+ UNLOCK(&fop->fd->lock);
+
+ fop->error = -err;
+
+ return EC_STATE_REPORT;
+ }
+
+ ctx->flags = fop->int32;
+
+ UNLOCK(&fop->fd->lock);
+
+ /* We need to write to specific offsets on the bricks, so we
+ need to remove O_APPEND from flags (if present).
+ If O_TRUNC is specified, we remove it from open and an
+ ftruncate will be executed later, which will correctly update
+ the file size taking appropriate locks. O_TRUNC flag is saved
+ into fop->uint32 to use it later.*/
+ fop->uint32 = fop->int32 & O_TRUNC;
+ fop->int32 &= ~(O_APPEND | O_TRUNC);
+
+ /* Fall through */
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ cbk = ec_fop_prepare_answer(fop, _gf_true);
+ if (cbk != NULL) {
+ int32_t err;
+
+ err = ec_loc_update(fop->xl, &fop->loc[0], cbk->fd->inode,
+ NULL);
+ if (!ec_cbk_set_error(cbk, -err, _gf_true)) {
+ LOCK(&fop->fd->lock);
+
+ ctx = __ec_fd_get(fop->fd, fop->xl);
+ if (ctx != NULL) {
+ ctx->open |= cbk->mask;
+ }
+
+ UNLOCK(&fop->fd->lock);
+
+ /* If O_TRUNC was specified, call ftruncate to
+ effectively trunc the file with appropriate locks
+ acquired. We don't use ctx->flags because self-heal
+ can use the same fd with different flags. */
+ if (fop->uint32 != 0) {
+ ec_sleep(fop);
+ ec_ftruncate(fop->req_frame, fop->xl, cbk->mask,
+ fop->minimum, ec_open_truncate_cbk,
+ fop, cbk->fd, 0, NULL);
+ }
+ }
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.open != NULL)
+ {
+ fop->cbks.open(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, cbk->fd, cbk->xdata);
+ }
+
+ return EC_STATE_END;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.open != NULL)
+ {
+ fop->cbks.open(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL, NULL);
+ }
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s",
+ state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void ec_open(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_open_cbk_t func, void * data, loc_t * loc,
+ int32_t flags, fd_t * fd, dict_t * xdata)
+{
+ ec_cbk_t callback = { .open = func };
+ ec_fop_data_t * fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(OPEN) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_OPEN, EC_FLAG_LOCK_SHARED,
+ target, minimum, ec_wind_open, ec_manager_open,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->int32 = flags;
+
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_LOC_COPY_FAIL, "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_FILE_DESC_REF_FAIL, "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL, "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL);
+ }
+}
+
+/* FOP: readlink */
+
+int32_t ec_combine_readlink(ec_fop_data_t * fop, ec_cbk_data_t * dst,
+ ec_cbk_data_t * src)
+{
+ if (!ec_iatt_combine(fop, dst->iatt, src->iatt, 1)) {
+ gf_msg (fop->xl->name, GF_LOG_NOTICE, 0,
+ EC_MSG_IATT_MISMATCH, "Mismatching iatt in "
+ "answers of 'GF_FOP_READLINK'");
+
+ return 0;
+ }
+
+ return 1;
+}
+
+int32_t
+ec_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *path,
+ struct iatt *buf, dict_t *xdata)
+{
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
+ frame, op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate (frame, this, fop, fop->id,
+ idx, op_ret, op_errno);
+ if (cbk) {
+ if (xdata)
+ cbk->xdata = dict_ref (xdata);
+
+ if (cbk->op_ret >= 0) {
+ cbk->iatt[0] = *buf;
+ cbk->str = gf_strdup (path);
+ if (!cbk->str) {
+ ec_cbk_set_error(cbk, ENOMEM, _gf_true);
+ }
+ }
+ ec_combine (cbk, NULL);
+ }
+
+out:
+ if (fop != NULL)
+ ec_complete(fop);
+
+ return 0;
+}
+
+void ec_wind_readlink(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_readlink_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->readlink,
+ &fop->loc[0], fop->size, fop->xdata);
+}
+
+int32_t ec_manager_readlink(ec_fop_data_t * fop, int32_t state)
+{
+ ec_cbk_data_t *cbk = NULL;
+
+ switch (state)
+ {
+ case EC_STATE_INIT:
+ case EC_STATE_LOCK:
+ ec_lock_prepare_inode (fop, &fop->loc[0], EC_QUERY_INFO);
+ ec_lock (fop);
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_one (fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ if (ec_dispatch_one_retry(fop, &cbk)) {
+ return EC_STATE_DISPATCH;
+ }
+
+ if ((cbk != NULL) && (cbk->op_ret >= 0)) {
+ ec_iatt_rebuild(fop->xl->private, &cbk->iatt[0], 1, 1);
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+ GF_ASSERT (cbk);
+ if (fop->cbks.readlink != NULL) {
+ fop->cbks.readlink (fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, cbk->str, &cbk->iatt[0],
+ cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ if (fop->cbks.readlink != NULL) {
+ fop->cbks.readlink(fop->req_frame, fop, fop->xl, -1,
+ fop->error, NULL, NULL, NULL);
+ }
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+ default:
+ gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s",
+ state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void ec_readlink(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_readlink_cbk_t func, void * data,
+ loc_t * loc, size_t size, dict_t * xdata)
+{
+ ec_cbk_t callback = { .readlink = func };
+ ec_fop_data_t * fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(READLINK) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_READLINK,
+ EC_FLAG_LOCK_SHARED, target, minimum,
+ ec_wind_readlink, ec_manager_readlink, callback,
+ data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->size = size;
+
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_LOC_COPY_FAIL, "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL, "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL);
+ }
+}
+
+/* FOP: readv */
+
+int32_t ec_readv_rebuild(ec_t * ec, ec_fop_data_t * fop, ec_cbk_data_t * cbk)
+{
+ ec_cbk_data_t * ans = NULL;
+ struct iobref * iobref = NULL;
+ struct iobuf * iobuf = NULL;
+ uint8_t * buff = NULL, * ptr;
+ size_t fsize = 0, size = 0, max = 0;
+ int32_t i = 0, err = -ENOMEM;
+
+ if (cbk->op_ret < 0) {
+ err = -cbk->op_errno;
+
+ goto out;
+ }
+
+ /* This shouldn't fail because we have the inode locked. */
+ GF_ASSERT(ec_get_inode_size(fop, fop->fd->inode, &cbk->iatt[0].ia_size));
+
+ if (cbk->op_ret > 0) {
+ struct iovec vector[1];
+ uint8_t * blocks[cbk->count];
+ uint32_t values[cbk->count];
+
+ fsize = cbk->op_ret;
+ size = fsize * ec->fragments;
+ buff = GF_MALLOC(size, gf_common_mt_char);
+ if (buff == NULL) {
+ goto out;
+ }
+ ptr = buff;
+ for (i = 0, ans = cbk; ans != NULL; i++, ans = ans->next) {
+ values[i] = ans->idx;
+ blocks[i] = ptr;
+ ptr += ec_iov_copy_to(ptr, ans->vector, ans->int32, 0, fsize);
+ }
+
+ iobref = iobref_new();
+ if (iobref == NULL) {
+ goto out;
+ }
+ iobuf = iobuf_get2(fop->xl->ctx->iobuf_pool, size);
+ if (iobuf == NULL) {
+ goto out;
+ }
+ err = iobref_add(iobref, iobuf);
+ if (err != 0) {
+ goto out;
+ }
+
+ vector[0].iov_base = iobuf->ptr;
+ vector[0].iov_len = ec_method_decode(fsize, ec->fragments, values,
+ blocks, iobuf->ptr);
+
+ iobuf_unref(iobuf);
+
+ GF_FREE(buff);
+ buff = NULL;
+
+ vector[0].iov_base += fop->head;
+ vector[0].iov_len -= fop->head;
+
+ max = fop->offset * ec->fragments + size;
+ if (max > cbk->iatt[0].ia_size) {
+ max = cbk->iatt[0].ia_size;
+ }
+ max -= fop->offset * ec->fragments + fop->head;
+ if (max > fop->user_size) {
+ max = fop->user_size;
+ }
+ size -= fop->head;
+ if (size > max) {
+ vector[0].iov_len -= size - max;
+ size = max;
+ }
+
+ cbk->op_ret = size;
+ cbk->int32 = 1;
+
+ iobref_unref(cbk->buffers);
+ cbk->buffers = iobref;
+
+ GF_FREE(cbk->vector);
+ cbk->vector = iov_dup(vector, 1);
+ if (cbk->vector == NULL) {
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+
+out:
+ if (iobuf != NULL) {
+ iobuf_unref(iobuf);
+ }
+ if (iobref != NULL) {
+ iobref_unref(iobref);
+ }
+ GF_FREE(buff);
+
+ return err;
+}
+
+int32_t ec_combine_readv(ec_fop_data_t * fop, ec_cbk_data_t * dst,
+ ec_cbk_data_t * src)
+{
+ if (!ec_vector_compare(dst->vector, dst->int32, src->vector, src->int32))
+ {
+ gf_msg (fop->xl->name, GF_LOG_NOTICE, 0,
+ EC_MSG_VECTOR_MISMATCH, "Mismatching vector in "
+ "answers of 'GF_FOP_READ'");
+
+ return 0;
+ }
+
+ if (!ec_iatt_combine(fop, dst->iatt, src->iatt, 1)) {
+ gf_msg (fop->xl->name, GF_LOG_NOTICE, 0,
+ EC_MSG_IATT_MISMATCH, "Mismatching iatt in "
+ "answers of 'GF_FOP_READ'");
+
+ return 0;
+ }
+
+ return 1;
+}
+
+int32_t ec_readv_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
+ int32_t op_ret, int32_t op_errno, struct iovec * vector,
+ int32_t count, struct iatt * stbuf,
+ struct iobref * iobref, dict_t * xdata)
+{
+ ec_fop_data_t * fop = NULL;
+ ec_cbk_data_t * cbk = NULL;
+ ec_t * ec = this->private;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
+ frame, op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_READ, idx, op_ret,
+ op_errno);
+ if (cbk != NULL) {
+ if (op_ret >= 0) {
+ cbk->int32 = count;
+
+ if (count > 0) {
+ cbk->vector = iov_dup(vector, count);
+ if (cbk->vector == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_NO_MEMORY, "Failed to duplicate a "
+ "vector list.");
+
+ goto out;
+ }
+ cbk->int32 = count;
+ }
+ if (stbuf != NULL) {
+ cbk->iatt[0] = *stbuf;
+ }
+ if (iobref != NULL) {
+ cbk->buffers = iobref_ref(iobref);
+ if (cbk->buffers == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_BUF_REF_FAIL, "Failed to reference a "
+ "buffer.");
+
+ goto out;
+ }
+ }
+ }
+ if (xdata != NULL) {
+ cbk->xdata = dict_ref(xdata);
+ if (cbk->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL, "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ if ((op_ret > 0) && ((op_ret % ec->fragment_size) != 0)) {
+ ec_cbk_set_error(cbk, EIO, _gf_true);
+ }
+
+ ec_combine(cbk, ec_combine_readv);
+ }
+
+out:
+ if (fop != NULL) {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void ec_wind_readv(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_readv_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->readv, fop->fd,
+ fop->size, fop->offset, fop->uint32, fop->xdata);
+}
+
+int32_t ec_manager_readv(ec_fop_data_t * fop, int32_t state)
+{
+ ec_cbk_data_t * cbk;
+
+ switch (state)
+ {
+ case EC_STATE_INIT:
+ fop->user_size = fop->size;
+ fop->head = ec_adjust_offset(fop->xl->private, &fop->offset, 1);
+ fop->size = ec_adjust_size(fop->xl->private, fop->size + fop->head,
+ 1);
+
+ /* Fall through */
+
+ case EC_STATE_LOCK:
+ ec_lock_prepare_fd(fop, fop->fd, EC_QUERY_INFO);
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_min(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ cbk = ec_fop_prepare_answer(fop, _gf_true);
+ if (cbk != NULL) {
+ int32_t err;
+
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 1,
+ cbk->count);
+
+ err = ec_readv_rebuild(fop->xl->private, fop, cbk);
+ if (err != 0) {
+ ec_cbk_set_error(cbk, -err, _gf_true);
+ }
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.readv != NULL)
+ {
+ fop->cbks.readv(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, cbk->vector, cbk->int32,
+ &cbk->iatt[0], cbk->buffers, cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.readv != NULL)
+ {
+ fop->cbks.readv(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL, 0, NULL, NULL, NULL);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s",
+ state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void ec_readv(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_readv_cbk_t func, void * data, fd_t * fd,
+ size_t size, off_t offset, uint32_t flags, dict_t * xdata)
+{
+ ec_cbk_t callback = { .readv = func };
+ ec_fop_data_t * fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(READ) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_READ, EC_FLAG_LOCK_SHARED,
+ target, minimum, ec_wind_readv,
+ ec_manager_readv, callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->use_fd = 1;
+
+ fop->size = size;
+ fop->offset = offset;
+ fop->uint32 = flags;
+
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_FILE_DESC_REF_FAIL, "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL, "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, 0, NULL, NULL, NULL);
+ }
+}
+
+/* FOP: seek */
+
+int32_t ec_seek_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, off_t offset,
+ dict_t *xdata)
+{
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ ec_t *ec = this->private;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
+ frame, op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_SEEK, idx, op_ret,
+ op_errno);
+ if (cbk != NULL) {
+ if (op_ret >= 0) {
+ cbk->offset = offset;
+ }
+ if (xdata != NULL) {
+ cbk->xdata = dict_ref(xdata);
+ }
+
+ if ((op_ret > 0) && ((cbk->offset % ec->fragment_size) != 0)) {
+ cbk->op_ret = -1;
+ cbk->op_errno = EIO;
+ }
+
+ ec_combine(cbk, NULL);
+ }
+
+out:
+ if (fop != NULL) {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void ec_wind_seek(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_seek_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->seek, fop->fd,
+ fop->offset, fop->seek, fop->xdata);
+}
+
+int32_t ec_manager_seek(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ fop->user_size = fop->offset;
+ fop->head = ec_adjust_offset(fop->xl->private, &fop->offset, 1);
+
+ /* Fall through */
+
+ case EC_STATE_LOCK:
+ ec_lock_prepare_fd(fop, fop->fd, EC_QUERY_INFO);
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_one(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ cbk = fop->answer;
+ if (cbk != NULL) {
+ if (ec_dispatch_one_retry(fop, &cbk)) {
+ return EC_STATE_DISPATCH;
+ }
+ if (cbk->op_ret >= 0) {
+ ec_t *ec = fop->xl->private;
+
+ cbk->offset *= ec->fragments;
+ if (cbk->offset < fop->user_size) {
+ cbk->offset = fop->user_size;
+ }
+ } else {
+ ec_fop_set_error(fop, cbk->op_errno);
+ }
+ } else {
+ ec_fop_set_error(fop, EIO);
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.seek != NULL) {
+ fop->cbks.seek(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, cbk->offset, cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.seek != NULL) {
+ fop->cbks.seek(fop->req_frame, fop, fop->xl, -1, fop->error, 0,
+ NULL);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg (fop->xl->name, GF_LOG_ERROR, 0,
+ EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s", state,
+ ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void ec_seek(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ int32_t minimum, fop_seek_cbk_t func, void *data, fd_t *fd,
+ off_t offset, gf_seek_what_t what, dict_t *xdata)
+{
+ ec_cbk_t callback = { .seek = func };
+ ec_fop_data_t *fop = NULL;
+ int32_t error = EIO;
+
+ gf_msg_trace ("ec", 0, "EC(SEEK) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_SEEK, EC_FLAG_LOCK_SHARED,
+ target, minimum, ec_wind_seek,
+ ec_manager_seek, callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->use_fd = 1;
+
+ fop->offset = offset;
+ fop->seek = what;
+
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, EIO, 0, NULL);
+ }
+}
+
+/* FOP: stat */
+
+int32_t ec_combine_stat(ec_fop_data_t * fop, ec_cbk_data_t * dst,
+ ec_cbk_data_t * src)
+{
+ if (!ec_iatt_combine(fop, dst->iatt, src->iatt, 1)) {
+ gf_msg (fop->xl->name, GF_LOG_NOTICE, 0,
+ EC_MSG_IATT_MISMATCH, "Mismatching iatt in "
+ "answers of 'GF_FOP_STAT'");
+
+ return 0;
+ }
+
+ return 1;
+}
+
+int32_t ec_stat_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
+ int32_t op_ret, int32_t op_errno, struct iatt * buf,
+ dict_t * xdata)
+{
+ ec_fop_data_t * fop = NULL;
+ ec_cbk_data_t * cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
+ frame, op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_STAT, idx, op_ret,
+ op_errno);
+ if (cbk != NULL)
+ {
+ if (op_ret >= 0)
+ {
+ if (buf != NULL)
+ {
+ cbk->iatt[0] = *buf;
+ }
+ }
+ if (xdata != NULL)
+ {
+ cbk->xdata = dict_ref(xdata);
+ if (cbk->xdata == NULL)
+ {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL, "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ ec_combine(cbk, ec_combine_stat);
+ }
+
+out:
+ if (fop != NULL)
+ {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void ec_wind_stat(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_stat_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->stat,
+ &fop->loc[0], fop->xdata);
+}
+
+int32_t ec_manager_stat(ec_fop_data_t * fop, int32_t state)
+{
+ ec_cbk_data_t * cbk;
+
+ switch (state)
+ {
+ case EC_STATE_INIT:
+ case EC_STATE_LOCK:
+ if (fop->fd == NULL) {
+ ec_lock_prepare_inode(fop, &fop->loc[0], EC_QUERY_INFO);
+ } else {
+ ec_lock_prepare_fd(fop, fop->fd, EC_QUERY_INFO);
+ }
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ cbk = ec_fop_prepare_answer(fop, _gf_true);
+ if (cbk != NULL) {
+ if (cbk->iatt[0].ia_type == IA_IFREG) {
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 1,
+ cbk->count);
+
+ /* This shouldn't fail because we have the inode locked. */
+ GF_ASSERT(ec_get_inode_size(fop,
+ fop->locks[0].lock->loc.inode,
+ &cbk->iatt[0].ia_size));
+ }
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->id == GF_FOP_STAT)
+ {
+ if (fop->cbks.stat != NULL)
+ {
+ fop->cbks.stat(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, &cbk->iatt[0], cbk->xdata);
+ }
+ }
+ else
+ {
+ if (fop->cbks.fstat != NULL)
+ {
+ fop->cbks.fstat(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, &cbk->iatt[0], cbk->xdata);
+ }
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->id == GF_FOP_STAT)
+ {
+ if (fop->cbks.stat != NULL)
+ {
+ fop->cbks.stat(fop->req_frame, fop, fop->xl, -1,
+ fop->error, NULL, NULL);
+ }
+ }
+ else
+ {
+ if (fop->cbks.fstat != NULL)
+ {
+ fop->cbks.fstat(fop->req_frame, fop, fop->xl, -1,
+ fop->error, NULL, NULL);
+ }
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s",
+ state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void ec_stat(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_stat_cbk_t func, void * data, loc_t * loc,
+ dict_t * xdata)
+{
+ ec_cbk_t callback = { .stat = func };
+ ec_fop_data_t * fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(STAT) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_STAT, EC_FLAG_LOCK_SHARED,
+ target, minimum, ec_wind_stat, ec_manager_stat,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_LOC_COPY_FAIL, "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL, "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL);
+ }
+}
+
+/* FOP: fstat */
+
+int32_t ec_fstat_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
+ int32_t op_ret, int32_t op_errno, struct iatt * buf,
+ dict_t * xdata)
+{
+ ec_fop_data_t * fop = NULL;
+ ec_cbk_data_t * cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
+ frame, op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_FSTAT, idx, op_ret,
+ op_errno);
+ if (cbk != NULL)
+ {
+ if (op_ret >= 0)
+ {
+ if (buf != NULL)
+ {
+ cbk->iatt[0] = *buf;
+ }
+ }
+ if (xdata != NULL)
+ {
+ cbk->xdata = dict_ref(xdata);
+ if (cbk->xdata == NULL)
+ {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL, "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ ec_combine(cbk, ec_combine_stat);
+ }
+
+out:
+ if (fop != NULL)
+ {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void ec_wind_fstat(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_fstat_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->fstat, fop->fd,
+ fop->xdata);
+}
+
+void ec_fstat(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_fstat_cbk_t func, void * data, fd_t * fd,
+ dict_t * xdata)
+{
+ ec_cbk_t callback = { .fstat = func };
+ ec_fop_data_t * fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(FSTAT) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FSTAT, EC_FLAG_LOCK_SHARED,
+ target, minimum, ec_wind_fstat, ec_manager_stat,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->use_fd = 1;
+
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_FILE_DESC_REF_FAIL, "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL, "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL);
+ }
+}
diff --git a/xlators/cluster/ec/src/ec-inode-write.c b/xlators/cluster/ec/src/ec-inode-write.c
new file mode 100644
index 00000000000..6aeda5a2481
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-inode-write.c
@@ -0,0 +1,1678 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include "ec-helpers.h"
+#include "ec-common.h"
+#include "ec-combine.h"
+#include "ec-method.h"
+#include "ec-fops.h"
+#include "ec-messages.h"
+
+int
+ec_inode_write_cbk (call_frame_t *frame, xlator_t *this, void *cookie,
+ int op_ret, int op_errno, struct iatt *prestat,
+ struct iatt *poststat, dict_t *xdata)
+{
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ int i = 0;
+ int idx = 0;
+
+ VALIDATE_OR_GOTO (this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+
+ fop = frame->local;
+ idx = (int32_t)(uintptr_t) cookie;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
+ frame, op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate (frame, this, fop, fop->id, idx, op_ret,
+ op_errno);
+ if (!cbk)
+ goto out;
+
+ if (op_ret < 0)
+ goto out;
+
+ if (xdata)
+ cbk->xdata = dict_ref (xdata);
+
+ if (prestat)
+ cbk->iatt[i++] = *prestat;
+
+ if (poststat)
+ cbk->iatt[i++] = *poststat;
+
+out:
+ if (cbk)
+ ec_combine (cbk, ec_combine_write);
+
+ if (fop)
+ ec_complete (fop);
+ return 0;
+}
+/* FOP: removexattr */
+
+int32_t ec_removexattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
+{
+ return ec_inode_write_cbk (frame, this, cookie, op_ret, op_errno,
+ NULL, NULL, xdata);
+}
+
+void ec_wind_removexattr(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_removexattr_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->removexattr,
+ &fop->loc[0], fop->str[0], fop->xdata);
+}
+
+void
+ec_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ ec_fop_data_t *fop = cookie;
+ switch (fop->id) {
+ case GF_FOP_SETXATTR:
+ if (fop->cbks.setxattr) {
+ fop->cbks.setxattr (frame, cookie, this, op_ret,
+ op_errno, xdata);
+ }
+ break;
+ case GF_FOP_REMOVEXATTR:
+ if (fop->cbks.removexattr) {
+ fop->cbks.removexattr (frame, cookie, this, op_ret,
+ op_errno, xdata);
+ }
+ break;
+ case GF_FOP_FSETXATTR:
+ if (fop->cbks.fsetxattr) {
+ fop->cbks.fsetxattr (frame, cookie, this, op_ret,
+ op_errno, xdata);
+ }
+ break;
+ case GF_FOP_FREMOVEXATTR:
+ if (fop->cbks.fremovexattr) {
+ fop->cbks.fremovexattr (frame, cookie, this, op_ret,
+ op_errno, xdata);
+ }
+ break;
+ }
+}
+
+int32_t
+ec_manager_xattr (ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t * cbk;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ case EC_STATE_LOCK:
+ if (fop->fd == NULL) {
+ ec_lock_prepare_inode(fop, &fop->loc[0],
+ EC_UPDATE_META | EC_QUERY_INFO);
+ } else {
+ ec_lock_prepare_fd(fop, fop->fd,
+ EC_UPDATE_META | EC_QUERY_INFO);
+ }
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ ec_fop_prepare_answer(fop, _gf_false);
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ ec_xattr_cbk (fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, cbk->xdata);
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ ec_xattr_cbk (fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL);
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s",
+ state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void
+ec_removexattr (call_frame_t *frame, xlator_t *this, uintptr_t target,
+ int32_t minimum, fop_removexattr_cbk_t func, void *data,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ ec_cbk_t callback = { .removexattr = func };
+ ec_fop_data_t * fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(REMOVEXATTR) %p", frame);
+
+ VALIDATE_OR_GOTO (this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_REMOVEXATTR, 0, target,
+ minimum, ec_wind_removexattr, ec_manager_xattr,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (name != NULL) {
+ fop->str[0] = gf_strdup(name);
+ if (fop->str[0] == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_NO_MEMORY,
+ "Failed to duplicate a string.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref (xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager (fop, error);
+ } else {
+ func (frame, NULL, this, -1, error, NULL);
+ }
+}
+
+/* FOP: fremovexattr */
+
+int32_t ec_fremovexattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
+{
+ return ec_inode_write_cbk (frame, this, cookie, op_ret, op_errno,
+ NULL, NULL, xdata);
+}
+
+void ec_wind_fremovexattr(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_fremovexattr_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->fremovexattr,
+ fop->fd, fop->str[0], fop->xdata);
+}
+
+void
+ec_fremovexattr (call_frame_t *frame, xlator_t *this, uintptr_t target,
+ int32_t minimum, fop_fremovexattr_cbk_t func, void *data,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ ec_cbk_t callback = { .fremovexattr = func };
+ ec_fop_data_t * fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(FREMOVEXATTR) %p", frame);
+
+ VALIDATE_OR_GOTO (this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FREMOVEXATTR, 0, target,
+ minimum, ec_wind_fremovexattr, ec_manager_xattr,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->use_fd = 1;
+
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ if (name != NULL) {
+ fop->str[0] = gf_strdup(name);
+ if (fop->str[0] == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_NO_MEMORY,
+ "Failed to duplicate a string.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager (fop, error);
+ } else {
+ func (frame, NULL, this, -1, error, NULL);
+ }
+}
+
+/* FOP: setattr */
+
+int32_t ec_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prestat, struct iatt *poststat,
+ dict_t *xdata)
+{
+ return ec_inode_write_cbk (frame, this, cookie, op_ret, op_errno,
+ prestat, poststat, xdata);
+}
+
+void ec_wind_setattr(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_setattr_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->setattr,
+ &fop->loc[0], &fop->iatt, fop->int32, fop->xdata);
+}
+
+int32_t ec_manager_setattr(ec_fop_data_t * fop, int32_t state)
+{
+ ec_cbk_data_t * cbk;
+
+ switch (state)
+ {
+ case EC_STATE_INIT:
+ case EC_STATE_LOCK:
+ if (fop->fd == NULL) {
+ ec_lock_prepare_inode(fop, &fop->loc[0],
+ EC_UPDATE_META | EC_QUERY_INFO);
+ } else {
+ ec_lock_prepare_fd(fop, fop->fd,
+ EC_UPDATE_META | EC_QUERY_INFO);
+ }
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ cbk = ec_fop_prepare_answer(fop, _gf_false);
+ if (cbk != NULL) {
+ if (cbk->iatt[0].ia_type == IA_IFREG) {
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2,
+ cbk->count);
+
+ /* This shouldn't fail because we have the inode locked. */
+ GF_ASSERT(ec_get_inode_size(fop,
+ fop->locks[0].lock->loc.inode,
+ &cbk->iatt[0].ia_size));
+ cbk->iatt[1].ia_size = cbk->iatt[0].ia_size;
+ }
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->id == GF_FOP_SETATTR)
+ {
+ if (fop->cbks.setattr != NULL)
+ {
+ fop->cbks.setattr(fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno,
+ &cbk->iatt[0], &cbk->iatt[1],
+ cbk->xdata);
+ }
+ }
+ else
+ {
+ if (fop->cbks.fsetattr != NULL)
+ {
+ fop->cbks.fsetattr(fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno,
+ &cbk->iatt[0], &cbk->iatt[1],
+ cbk->xdata);
+ }
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->id == GF_FOP_SETATTR)
+ {
+ if (fop->cbks.setattr != NULL)
+ {
+ fop->cbks.setattr(fop->req_frame, fop, fop->xl, -1,
+ fop->error, NULL, NULL, NULL);
+ }
+ }
+ else
+ {
+ if (fop->cbks.fsetattr != NULL)
+ {
+ fop->cbks.fsetattr(fop->req_frame, fop, fop->xl, -1,
+ fop->error, NULL, NULL, NULL);
+ }
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s",
+ state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void ec_setattr(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_setattr_cbk_t func, void * data,
+ loc_t * loc, struct iatt * stbuf, int32_t valid,
+ dict_t * xdata)
+{
+ ec_cbk_t callback = { .setattr = func };
+ ec_fop_data_t * fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(SETATTR) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_SETATTR, 0, target, minimum,
+ ec_wind_setattr, ec_manager_setattr, callback,
+ data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->int32 = valid;
+
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (stbuf != NULL) {
+ fop->iatt = *stbuf;
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL);
+ }
+}
+
+/* FOP: fsetattr */
+
+int32_t ec_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prestat, struct iatt *poststat,
+ dict_t *xdata)
+{
+ return ec_inode_write_cbk (frame, this, cookie, op_ret, op_errno,
+ prestat, poststat, xdata);
+}
+
+void ec_wind_fsetattr(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_fsetattr_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->fsetattr,
+ fop->fd, &fop->iatt, fop->int32, fop->xdata);
+}
+
+void ec_fsetattr(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_fsetattr_cbk_t func, void * data,
+ fd_t * fd, struct iatt * stbuf, int32_t valid, dict_t * xdata)
+{
+ ec_cbk_t callback = { .fsetattr = func };
+ ec_fop_data_t * fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(FSETATTR) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FSETATTR, 0, target,
+ minimum, ec_wind_fsetattr, ec_manager_setattr,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->use_fd = 1;
+
+ fop->int32 = valid;
+
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ if (stbuf != NULL) {
+ fop->iatt = *stbuf;
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL);
+ }
+}
+
+/* FOP: setxattr */
+
+int32_t ec_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ return ec_inode_write_cbk (frame, this, cookie, op_ret, op_errno,
+ NULL, NULL, xdata);
+}
+
+void ec_wind_setxattr(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_setxattr_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->setxattr,
+ &fop->loc[0], fop->dict, fop->int32, fop->xdata);
+}
+
+void
+ec_setxattr (call_frame_t *frame, xlator_t *this, uintptr_t target,
+ int32_t minimum, fop_setxattr_cbk_t func, void *data,
+ loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ ec_cbk_t callback = { .setxattr = func };
+ ec_fop_data_t * fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(SETXATTR) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_SETXATTR, 0, target,
+ minimum, ec_wind_setxattr, ec_manager_xattr,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->int32 = flags;
+
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (dict != NULL) {
+ fop->dict = dict_copy_with_ref(dict, NULL);
+ if (fop->dict == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager (fop, error);
+ } else {
+ func (frame, NULL, this, -1, error, NULL);
+ }
+}
+
+/* FOP: fsetxattr */
+
+int32_t
+ec_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ ec_fop_data_t * fop = NULL;
+ ec_cbk_data_t * cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO (this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
+ frame, op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_FSETXATTR, idx, op_ret,
+ op_errno);
+ if (cbk != NULL)
+ {
+ if (xdata != NULL)
+ {
+ cbk->xdata = dict_ref(xdata);
+ if (cbk->xdata == NULL)
+ {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ ec_combine(cbk, NULL);
+ }
+
+out:
+ if (fop != NULL)
+ {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void ec_wind_fsetxattr(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_fsetxattr_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->fsetxattr,
+ fop->fd, fop->dict, fop->int32, fop->xdata);
+}
+
+void
+ec_fsetxattr (call_frame_t *frame, xlator_t *this, uintptr_t target,
+ int32_t minimum, fop_fsetxattr_cbk_t func, void *data,
+ fd_t *fd, dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ ec_cbk_t callback = { .fsetxattr = func };
+ ec_fop_data_t * fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(FSETXATTR) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FSETXATTR, 0, target,
+ minimum, ec_wind_fsetxattr, ec_manager_xattr,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->use_fd = 1;
+
+ fop->int32 = flags;
+
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ if (dict != NULL) {
+ fop->dict = dict_copy_with_ref(dict, NULL);
+ if (fop->dict == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager (fop, error);
+ } else {
+ func (frame, NULL, this, -1, error, NULL);
+ }
+}
+
+/* FOP: truncate */
+
+int32_t ec_truncate_write(ec_fop_data_t * fop, uintptr_t mask)
+{
+ ec_t * ec = fop->xl->private;
+ struct iobref * iobref = NULL;
+ struct iobuf * iobuf = NULL;
+ struct iovec vector;
+ int32_t err = -ENOMEM;
+
+ iobref = iobref_new();
+ if (iobref == NULL) {
+ goto out;
+ }
+ iobuf = iobuf_get(fop->xl->ctx->iobuf_pool);
+ if (iobuf == NULL) {
+ goto out;
+ }
+ err = iobref_add(iobref, iobuf);
+ if (err != 0) {
+ goto out;
+ }
+
+ vector.iov_base = iobuf->ptr;
+ vector.iov_len = fop->offset * ec->fragments - fop->user_size;
+ memset(vector.iov_base, 0, vector.iov_len);
+
+ iobuf_unref (iobuf);
+ iobuf = NULL;
+
+ ec_writev(fop->frame, fop->xl, mask, fop->minimum, NULL, NULL, fop->fd,
+ &vector, 1, fop->user_size, 0, iobref, NULL);
+
+ err = 0;
+
+out:
+ if (iobuf != NULL) {
+ iobuf_unref(iobuf);
+ }
+ if (iobref != NULL) {
+ iobref_unref(iobref);
+ }
+
+ return err;
+}
+
+int32_t ec_truncate_open_cbk(call_frame_t * frame, void * cookie,
+ xlator_t * this, int32_t op_ret, int32_t op_errno,
+ fd_t * fd, dict_t * xdata)
+{
+ ec_fop_data_t * fop = cookie;
+ int32_t err;
+
+ if (op_ret >= 0) {
+ fd_bind (fd);
+ err = ec_truncate_write(fop->parent, fop->answer->mask);
+ if (err != 0) {
+ fop->error = -err;
+ }
+ }
+
+ return 0;
+}
+
+int32_t ec_truncate_clean(ec_fop_data_t * fop)
+{
+ if (fop->fd == NULL) {
+ fop->fd = fd_create(fop->loc[0].inode, fop->frame->root->pid);
+ if (fop->fd == NULL) {
+ return -ENOMEM;
+ }
+
+ ec_open(fop->frame, fop->xl, fop->answer->mask, fop->minimum,
+ ec_truncate_open_cbk, fop, &fop->loc[0], O_RDWR, fop->fd,
+ NULL);
+
+ return 0;
+ } else {
+ return ec_truncate_write(fop, fop->answer->mask);
+ }
+}
+
+int32_t ec_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prestat,
+ struct iatt *poststat, dict_t *xdata)
+{
+ return ec_inode_write_cbk (frame, this, cookie, op_ret, op_errno,
+ prestat, poststat, xdata);
+}
+
+void ec_wind_truncate(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_truncate_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->truncate,
+ &fop->loc[0], fop->offset, fop->xdata);
+}
+
+int32_t ec_manager_truncate(ec_fop_data_t * fop, int32_t state)
+{
+ ec_cbk_data_t * cbk;
+
+ switch (state)
+ {
+ case EC_STATE_INIT:
+ fop->user_size = fop->offset;
+ fop->offset = ec_adjust_size(fop->xl->private, fop->offset, 1);
+
+ /* Fall through */
+
+ case EC_STATE_LOCK:
+ if (fop->id == GF_FOP_TRUNCATE) {
+ ec_lock_prepare_inode(fop, &fop->loc[0],
+ EC_UPDATE_DATA | EC_UPDATE_META |
+ EC_QUERY_INFO);
+ } else {
+ ec_lock_prepare_fd(fop, fop->fd,
+ EC_UPDATE_DATA | EC_UPDATE_META |
+ EC_QUERY_INFO);
+ }
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ cbk = ec_fop_prepare_answer(fop, _gf_false);
+ if (cbk != NULL) {
+ int32_t err;
+
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2,
+ cbk->count);
+
+ /* This shouldn't fail because we have the inode locked. */
+ GF_ASSERT(ec_get_inode_size(fop, fop->locks[0].lock->loc.inode,
+ &cbk->iatt[0].ia_size));
+ cbk->iatt[1].ia_size = fop->user_size;
+ /* This shouldn't fail because we have the inode locked. */
+ GF_ASSERT(ec_set_inode_size(fop, fop->locks[0].lock->loc.inode,
+ fop->user_size));
+ if ((cbk->iatt[0].ia_size > cbk->iatt[1].ia_size) &&
+ (fop->user_size != fop->offset)) {
+ err = ec_truncate_clean(fop);
+ if (err != 0) {
+ ec_cbk_set_error(cbk, -err, _gf_false);
+ }
+ }
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->id == GF_FOP_TRUNCATE)
+ {
+ if (fop->cbks.truncate != NULL)
+ {
+ fop->cbks.truncate(fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno,
+ &cbk->iatt[0], &cbk->iatt[1],
+ cbk->xdata);
+ }
+ }
+ else
+ {
+ if (fop->cbks.ftruncate != NULL)
+ {
+ fop->cbks.ftruncate(fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno,
+ &cbk->iatt[0], &cbk->iatt[1],
+ cbk->xdata);
+ }
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->id == GF_FOP_TRUNCATE)
+ {
+ if (fop->cbks.truncate != NULL)
+ {
+ fop->cbks.truncate(fop->req_frame, fop, fop->xl, -1,
+ fop->error, NULL, NULL, NULL);
+ }
+ }
+ else
+ {
+ if (fop->cbks.ftruncate != NULL)
+ {
+ fop->cbks.ftruncate(fop->req_frame, fop, fop->xl, -1,
+ fop->error, NULL, NULL, NULL);
+ }
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s",
+ state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void ec_truncate(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_truncate_cbk_t func, void * data,
+ loc_t * loc, off_t offset, dict_t * xdata)
+{
+ ec_cbk_t callback = { .truncate = func };
+ ec_fop_data_t * fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(TRUNCATE) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_TRUNCATE, 0, target,
+ minimum, ec_wind_truncate, ec_manager_truncate,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->offset = offset;
+
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL);
+ }
+}
+
+/* FOP: ftruncate */
+
+int32_t ec_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prestat, struct iatt *poststat,
+ dict_t *xdata)
+{
+ return ec_inode_write_cbk (frame, this, cookie, op_ret, op_errno,
+ prestat, poststat, xdata);
+}
+
+void ec_wind_ftruncate(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_ftruncate_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->ftruncate,
+ fop->fd, fop->offset, fop->xdata);
+}
+
+void ec_ftruncate(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_ftruncate_cbk_t func, void * data,
+ fd_t * fd, off_t offset, dict_t * xdata)
+{
+ ec_cbk_t callback = { .ftruncate = func };
+ ec_fop_data_t * fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(FTRUNCATE) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FTRUNCATE, 0, target,
+ minimum, ec_wind_ftruncate, ec_manager_truncate,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->use_fd = 1;
+
+ fop->offset = offset;
+
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL);
+ }
+}
+
+/* FOP: writev */
+
+int32_t ec_writev_merge_tail(call_frame_t * frame, void * cookie,
+ xlator_t * this, int32_t op_ret, int32_t op_errno,
+ struct iovec * vector, int32_t count,
+ struct iatt * stbuf, struct iobref * iobref,
+ dict_t * xdata)
+{
+ ec_t * ec = this->private;
+ ec_fop_data_t * fop = frame->local;
+ size_t size, base, tmp;
+
+ if (op_ret >= 0)
+ {
+ tmp = 0;
+ size = fop->size - fop->user_size - fop->head;
+ base = ec->stripe_size - size;
+ if (op_ret > base)
+ {
+ tmp = min(op_ret - base, size);
+ ec_iov_copy_to(fop->vector[0].iov_base + fop->size - size, vector,
+ count, base, tmp);
+
+ size -= tmp;
+ }
+
+ if (size > 0)
+ {
+ memset(fop->vector[0].iov_base + fop->size - size, 0, size);
+ }
+ }
+
+ return 0;
+}
+
+int32_t ec_writev_merge_head(call_frame_t * frame, void * cookie,
+ xlator_t * this, int32_t op_ret, int32_t op_errno,
+ struct iovec * vector, int32_t count,
+ struct iatt * stbuf, struct iobref * iobref,
+ dict_t * xdata)
+{
+ ec_t * ec = this->private;
+ ec_fop_data_t * fop = frame->local;
+ size_t size, base;
+
+ if (op_ret >= 0)
+ {
+ size = fop->head;
+ base = 0;
+
+ if (op_ret > 0)
+ {
+ base = min(op_ret, size);
+ ec_iov_copy_to(fop->vector[0].iov_base, vector, count, 0, base);
+
+ size -= base;
+ }
+
+ if (size > 0)
+ {
+ memset(fop->vector[0].iov_base + base, 0, size);
+ }
+
+ size = fop->size - fop->user_size - fop->head;
+ if ((size > 0) && (fop->size == ec->stripe_size))
+ {
+ ec_writev_merge_tail(frame, cookie, this, op_ret, op_errno, vector,
+ count, stbuf, iobref, xdata);
+ }
+ }
+
+ return 0;
+}
+
+static int
+ec_make_internal_fop_xdata (dict_t **xdata)
+{
+ dict_t *dict = NULL;
+
+ dict = dict_new();
+ if (!dict)
+ goto out;
+
+ if (dict_set_str (dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes"))
+ goto out;
+
+ *xdata = dict;
+ return 0;
+out:
+ if (dict)
+ dict_unref (dict);
+ return -1;
+}
+
+void ec_writev_start(ec_fop_data_t *fop)
+{
+ ec_t *ec = fop->xl->private;
+ struct iobref *iobref = NULL;
+ struct iobuf *iobuf = NULL;
+ void *ptr = NULL;
+ ec_fd_t *ctx;
+ fd_t *fd;
+ size_t tail;
+ uint64_t current;
+ int32_t err = -ENOMEM;
+ dict_t *xdata = NULL;
+
+ /* This shouldn't fail because we have the inode locked. */
+ GF_ASSERT(ec_get_inode_size(fop, fop->fd->inode, &current));
+
+ fd = fd_anonymous(fop->fd->inode);
+ if (fd == NULL) {
+ ec_fop_set_error(fop, ENOMEM);
+
+ return;
+ }
+
+ fop->frame->root->uid = 0;
+ fop->frame->root->gid = 0;
+
+ ctx = ec_fd_get(fop->fd, fop->xl);
+ if (ctx != NULL) {
+ if ((ctx->flags & O_APPEND) != 0) {
+ fop->offset = current;
+ }
+ }
+
+ fop->user_size = iov_length(fop->vector, fop->int32);
+ fop->head = ec_adjust_offset(ec, &fop->offset, 0);
+ fop->size = ec_adjust_size(ec, fop->user_size + fop->head, 0);
+
+ iobref = iobref_new();
+ if (iobref == NULL) {
+ goto out;
+ }
+ iobuf = iobuf_get2(fop->xl->ctx->iobuf_pool, fop->size);
+ if (iobuf == NULL) {
+ goto out;
+ }
+ err = iobref_add(iobref, iobuf);
+ if (err != 0) {
+ goto out;
+ }
+
+ ptr = iobuf->ptr + fop->head;
+ ec_iov_copy_to(ptr, fop->vector, fop->int32, 0, fop->user_size);
+
+ fop->vector[0].iov_base = iobuf->ptr;
+ fop->vector[0].iov_len = fop->size;
+
+ iobuf_unref(iobuf);
+
+ iobref_unref(fop->buffers);
+ fop->buffers = iobref;
+
+ if (fop->head > 0) {
+ if (ec_make_internal_fop_xdata (&xdata)) {
+ err = -ENOMEM;
+ goto out;
+ }
+ ec_readv(fop->frame, fop->xl, -1, EC_MINIMUM_MIN, ec_writev_merge_head,
+ NULL, fd, ec->stripe_size, fop->offset, 0, xdata);
+ }
+ tail = fop->size - fop->user_size - fop->head;
+ if ((tail > 0) && ((fop->head == 0) || (fop->size > ec->stripe_size))) {
+ if (current > fop->offset + fop->head + fop->user_size) {
+ if (ec_make_internal_fop_xdata (&xdata)) {
+ err = -ENOMEM;
+ goto out;
+ }
+ ec_readv(fop->frame, fop->xl, -1, EC_MINIMUM_MIN,
+ ec_writev_merge_tail, NULL, fd, ec->stripe_size,
+ fop->offset + fop->size - ec->stripe_size, 0, xdata);
+ } else {
+ memset(fop->vector[0].iov_base + fop->size - tail, 0, tail);
+ }
+ }
+
+ fd_unref(fd);
+ if (xdata)
+ dict_unref (xdata);
+
+ return;
+
+out:
+ if (iobuf != NULL) {
+ iobuf_unref(iobuf);
+ }
+ if (iobref != NULL) {
+ iobref_unref(iobref);
+ }
+
+ fd_unref(fd);
+ if (xdata)
+ dict_unref (xdata);
+
+ ec_fop_set_error(fop, -err);
+}
+
+int32_t ec_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prestat,
+ struct iatt *poststat, dict_t *xdata)
+{
+ ec_t *ec = NULL;
+ if (this && this->private) {
+ ec = this->private;
+ if ((op_ret > 0) && ((op_ret % ec->fragment_size) != 0)) {
+ op_ret = -1;
+ op_errno = EIO;
+ }
+ }
+ return ec_inode_write_cbk (frame, this, cookie, op_ret, op_errno,
+ prestat, poststat, xdata);
+}
+
+void ec_wind_writev(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ struct iovec vector[1];
+ struct iobref * iobref = NULL;
+ struct iobuf * iobuf = NULL;
+ ssize_t size = 0, bufsize = 0;
+ int32_t err = -ENOMEM;
+
+ iobref = iobref_new();
+ if (iobref == NULL) {
+ goto out;
+ }
+
+ size = fop->vector[0].iov_len;
+ bufsize = size / ec->fragments;
+
+ iobuf = iobuf_get2(fop->xl->ctx->iobuf_pool, bufsize);
+ if (iobuf == NULL) {
+ goto out;
+ }
+ err = iobref_add(iobref, iobuf);
+ if (err != 0) {
+ goto out;
+ }
+
+ ec_method_encode(size, ec->fragments, idx, fop->vector[0].iov_base,
+ iobuf->ptr);
+
+ vector[0].iov_base = iobuf->ptr;
+ vector[0].iov_len = bufsize;
+
+ iobuf_unref(iobuf);
+
+ STACK_WIND_COOKIE(fop->frame, ec_writev_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->writev,
+ fop->fd, vector, 1, fop->offset / ec->fragments,
+ fop->uint32, iobref, fop->xdata);
+
+ iobref_unref(iobref);
+
+ return;
+
+out:
+ if (iobuf != NULL) {
+ iobuf_unref(iobuf);
+ }
+ if (iobref != NULL) {
+ iobref_unref(iobref);
+ }
+
+ ec_writev_cbk(fop->frame, (void *)(uintptr_t)idx, fop->xl, -1, -err, NULL,
+ NULL, NULL);
+}
+
+int32_t ec_manager_writev(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk;
+
+ switch (state)
+ {
+ case EC_STATE_INIT:
+ case EC_STATE_LOCK:
+ ec_lock_prepare_fd(fop, fop->fd,
+ EC_UPDATE_DATA | EC_UPDATE_META |
+ EC_QUERY_INFO);
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_writev_start(fop);
+
+ return EC_STATE_DELAYED_START;
+
+ case EC_STATE_DELAYED_START:
+ /* Restore uid, gid if they were changed to do some partial
+ * reads. */
+ fop->frame->root->uid = fop->uid;
+ fop->frame->root->gid = fop->gid;
+
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ cbk = ec_fop_prepare_answer(fop, _gf_false);
+ if (cbk != NULL) {
+ ec_t *ec = fop->xl->private;
+ size_t size;
+
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2,
+ cbk->count);
+
+ /* This shouldn't fail because we have the inode locked. */
+ GF_ASSERT(ec_get_inode_size(fop, fop->fd->inode,
+ &cbk->iatt[0].ia_size));
+ cbk->iatt[1].ia_size = cbk->iatt[0].ia_size;
+ size = fop->offset + fop->head + fop->user_size;
+ if (size > cbk->iatt[0].ia_size) {
+ /* Only update inode size if this is a top level fop.
+ * Otherwise this is an internal write and the top
+ * level fop should take care of the real inode size.
+ */
+ if (fop->parent == NULL) {
+ /* This shouldn't fail because we have the inode
+ * locked. */
+ GF_ASSERT(ec_set_inode_size(fop, fop->fd->inode,
+ size));
+ }
+ cbk->iatt[1].ia_size = size;
+ }
+ if (fop->error == 0) {
+ cbk->op_ret *= ec->fragments;
+ if (cbk->op_ret < fop->head) {
+ cbk->op_ret = 0;
+ } else {
+ cbk->op_ret -= fop->head;
+ }
+ if (cbk->op_ret > fop->user_size) {
+ cbk->op_ret = fop->user_size;
+ }
+ }
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.writev != NULL)
+ {
+ fop->cbks.writev(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
+ cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_DELAYED_START:
+ /* We have failed while doing partial reads. We need to restore
+ * original uid, gid. */
+ fop->frame->root->uid = fop->uid;
+ fop->frame->root->gid = fop->gid;
+
+ /* Fall through */
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.writev != NULL)
+ {
+ fop->cbks.writev(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL, NULL, NULL);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s",
+ state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void ec_writev(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_writev_cbk_t func, void * data, fd_t * fd,
+ struct iovec * vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref * iobref, dict_t * xdata)
+{
+ ec_cbk_t callback = { .writev = func };
+ ec_fop_data_t * fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(WRITE) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_WRITE, 0, target, minimum,
+ ec_wind_writev, ec_manager_writev, callback,
+ data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->int32 = count;
+ fop->offset = offset;
+ fop->uint32 = flags;
+
+ fop->use_fd = 1;
+
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ if (count > 0) {
+ fop->vector = iov_dup(vector, count);
+ if (fop->vector == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_NO_MEMORY,
+ "Failed to duplicate a "
+ "vector list.");
+
+ goto out;
+ }
+ fop->int32 = count;
+ }
+ if (iobref != NULL) {
+ fop->buffers = iobref_ref(iobref);
+ if (fop->buffers == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_BUF_REF_FAIL,
+ "Failed to reference a "
+ "buffer.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL);
+ }
+}
diff --git a/xlators/cluster/ec/src/ec-locks.c b/xlators/cluster/ec/src/ec-locks.c
new file mode 100644
index 00000000000..0253b51bf5e
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-locks.c
@@ -0,0 +1,1169 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include "ec-helpers.h"
+#include "ec-common.h"
+#include "ec-combine.h"
+#include "ec-method.h"
+#include "ec-fops.h"
+#include "ec-messages.h"
+
+#define EC_LOCK_MODE_NONE 0
+#define EC_LOCK_MODE_INC 1
+#define EC_LOCK_MODE_ALL 2
+
+int32_t ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask)
+{
+ ec_t *ec = fop->xl->private;
+ ec_cbk_data_t *ans = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ uintptr_t locked = 0, notlocked = 0;
+ int32_t error = -1;
+
+ list_for_each_entry(ans, &fop->cbk_list, list) {
+ if (ans->op_ret >= 0) {
+ if (locked != 0) {
+ error = EIO;
+ }
+ locked |= ans->mask;
+ cbk = ans;
+ } else {
+ if (ans->op_errno == EAGAIN) {
+ switch (fop->uint32) {
+ case EC_LOCK_MODE_NONE:
+ case EC_LOCK_MODE_ALL:
+ /* Goal is to treat non-blocking lock as failure
+ * even if there is a signle EAGAIN*/
+ notlocked |= ans->mask;
+ break;
+ }
+ }
+ }
+ }
+
+ if (error == -1) {
+ if (ec_bits_count(locked | notlocked) >= ec->fragments) {
+ if (notlocked == 0) {
+ if (fop->answer == NULL) {
+ fop->answer = cbk;
+ }
+
+ ec_update_good(fop, locked);
+
+ error = 0;
+ } else {
+ switch (fop->uint32) {
+ case EC_LOCK_MODE_NONE:
+ error = EAGAIN;
+ break;
+
+ case EC_LOCK_MODE_ALL:
+ fop->uint32 = EC_LOCK_MODE_INC;
+ break;
+
+ default:
+ error = EIO;
+ break;
+ }
+ }
+ } else {
+ if (fop->answer && fop->answer->op_ret < 0)
+ error = fop->answer->op_errno;
+ else
+ error = EIO;
+ }
+ }
+
+ *mask = locked;
+
+ return error;
+}
+
+int32_t ec_lock_unlocked(call_frame_t * frame, void * cookie,
+ xlator_t * this, int32_t op_ret, int32_t op_errno,
+ dict_t * xdata)
+{
+ if (op_ret < 0)
+ {
+ gf_msg (this->name, GF_LOG_WARNING, op_errno,
+ EC_MSG_UNLOCK_FAILED,
+ "Failed to unlock an entry/inode");
+ }
+
+ return 0;
+}
+
+int32_t ec_lock_lk_unlocked(call_frame_t * frame, void * cookie,
+ xlator_t * this, int32_t op_ret, int32_t op_errno,
+ struct gf_flock * flock, dict_t * xdata)
+{
+ if (op_ret < 0)
+ {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno,
+ EC_MSG_LK_UNLOCK_FAILED,
+ "Failed to unlock an lk");
+ }
+
+ return 0;
+}
+
+/* FOP: entrylk */
+
+int32_t ec_entrylk_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
+ int32_t op_ret, int32_t op_errno, dict_t * xdata)
+{
+ ec_fop_data_t * fop = NULL;
+ ec_cbk_data_t * cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
+ frame, op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_ENTRYLK, idx, op_ret,
+ op_errno);
+ if (cbk != NULL)
+ {
+ if (xdata != NULL)
+ {
+ cbk->xdata = dict_ref(xdata);
+ if (cbk->xdata == NULL)
+ {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ ec_combine(cbk, NULL);
+ }
+
+out:
+ if (fop != NULL)
+ {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void ec_wind_entrylk(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_entrylk_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->entrylk,
+ fop->str[0], &fop->loc[0], fop->str[1], fop->entrylk_cmd,
+ fop->entrylk_type, fop->xdata);
+}
+
+int32_t ec_manager_entrylk(ec_fop_data_t * fop, int32_t state)
+{
+ ec_cbk_data_t * cbk;
+
+ switch (state)
+ {
+ case EC_STATE_INIT:
+ if (fop->entrylk_cmd == ENTRYLK_LOCK)
+ {
+ fop->uint32 = EC_LOCK_MODE_ALL;
+ fop->entrylk_cmd = ENTRYLK_LOCK_NB;
+ }
+
+ /* Fall through */
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_PREPARE_ANSWER:
+ if (fop->entrylk_cmd != ENTRYLK_UNLOCK) {
+ uintptr_t mask;
+
+ ec_fop_set_error (fop, ec_lock_check(fop, &mask));
+ if (fop->error != 0) {
+ if (mask != 0) {
+ if (fop->id == GF_FOP_ENTRYLK) {
+ ec_entrylk(fop->frame, fop->xl, mask, 1,
+ ec_lock_unlocked, NULL, fop->str[0],
+ &fop->loc[0], fop->str[1],
+ ENTRYLK_UNLOCK, fop->entrylk_type,
+ fop->xdata);
+ } else {
+ ec_fentrylk(fop->frame, fop->xl, mask, 1,
+ ec_lock_unlocked, NULL, fop->str[0],
+ fop->fd, fop->str[1], ENTRYLK_UNLOCK,
+ fop->entrylk_type, fop->xdata);
+ }
+ }
+ if (fop->error < 0) {
+ fop->error = 0;
+
+ fop->entrylk_cmd = ENTRYLK_LOCK;
+
+ ec_dispatch_inc(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+ }
+ }
+ } else {
+ ec_fop_prepare_answer(fop, _gf_true);
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->id == GF_FOP_ENTRYLK)
+ {
+ if (fop->cbks.entrylk != NULL)
+ {
+ fop->cbks.entrylk(fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, cbk->xdata);
+ }
+ }
+ else
+ {
+ if (fop->cbks.fentrylk != NULL)
+ {
+ fop->cbks.fentrylk(fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, cbk->xdata);
+ }
+ }
+
+ return EC_STATE_END;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->id == GF_FOP_ENTRYLK)
+ {
+ if (fop->cbks.entrylk != NULL)
+ {
+ fop->cbks.entrylk(fop->req_frame, fop, fop->xl, -1,
+ fop->error, NULL);
+ }
+ }
+ else
+ {
+ if (fop->cbks.fentrylk != NULL)
+ {
+ fop->cbks.fentrylk(fop->req_frame, fop, fop->xl, -1,
+ fop->error, NULL);
+ }
+ }
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s",
+ state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void ec_entrylk(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_entrylk_cbk_t func, void * data,
+ const char * volume, loc_t * loc, const char * basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t * xdata)
+{
+ ec_cbk_t callback = { .entrylk = func };
+ ec_fop_data_t * fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(ENTRYLK) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_ENTRYLK, 0, target, minimum,
+ ec_wind_entrylk, ec_manager_entrylk, callback,
+ data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->entrylk_cmd = cmd;
+ fop->entrylk_type = type;
+
+ if (volume != NULL) {
+ fop->str[0] = gf_strdup(volume);
+ if (fop->str[0] == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_NO_MEMORY,
+ "Failed to duplicate a string.");
+
+ goto out;
+ }
+ }
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (basename != NULL) {
+ fop->str[1] = gf_strdup(basename);
+ if (fop->str[1] == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_NO_MEMORY,
+ "Failed to duplicate a string.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL);
+ }
+}
+
+/* FOP: fentrylk */
+
+int32_t ec_fentrylk_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
+ int32_t op_ret, int32_t op_errno, dict_t * xdata)
+{
+ ec_fop_data_t * fop = NULL;
+ ec_cbk_data_t * cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
+ frame, op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_FENTRYLK, idx, op_ret,
+ op_errno);
+ if (cbk != NULL)
+ {
+ if (xdata != NULL)
+ {
+ cbk->xdata = dict_ref(xdata);
+ if (cbk->xdata == NULL)
+ {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ ec_combine(cbk, NULL);
+ }
+
+out:
+ if (fop != NULL)
+ {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void ec_wind_fentrylk(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_fentrylk_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->fentrylk,
+ fop->str[0], fop->fd, fop->str[1], fop->entrylk_cmd,
+ fop->entrylk_type, fop->xdata);
+}
+
+void ec_fentrylk(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_fentrylk_cbk_t func, void * data,
+ const char * volume, fd_t * fd, const char * basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t * xdata)
+{
+ ec_cbk_t callback = { .fentrylk = func };
+ ec_fop_data_t * fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(FENTRYLK) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FENTRYLK, 0, target,
+ minimum, ec_wind_fentrylk, ec_manager_entrylk,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->use_fd = 1;
+
+ fop->entrylk_cmd = cmd;
+ fop->entrylk_type = type;
+
+ if (volume != NULL) {
+ fop->str[0] = gf_strdup(volume);
+ if (fop->str[0] == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_NO_MEMORY,
+ "Failed to duplicate a string.");
+
+ goto out;
+ }
+ }
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ if (basename != NULL) {
+ fop->str[1] = gf_strdup(basename);
+ if (fop->str[1] == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_NO_MEMORY,
+ "Failed to duplicate a string.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL);
+ }
+}
+
+/* FOP: inodelk */
+
+int32_t ec_inodelk_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
+ int32_t op_ret, int32_t op_errno, dict_t * xdata)
+{
+ ec_fop_data_t * fop = NULL;
+ ec_cbk_data_t * cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
+ frame, op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_INODELK, idx, op_ret,
+ op_errno);
+ if (cbk != NULL)
+ {
+ if (xdata != NULL)
+ {
+ cbk->xdata = dict_ref(xdata);
+ if (cbk->xdata == NULL)
+ {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ ec_combine(cbk, NULL);
+ }
+
+out:
+ if (fop != NULL)
+ {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void ec_wind_inodelk(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_inodelk_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->inodelk,
+ fop->str[0], &fop->loc[0], fop->int32, &fop->flock,
+ fop->xdata);
+}
+
+int32_t ec_manager_inodelk(ec_fop_data_t * fop, int32_t state)
+{
+ ec_cbk_data_t * cbk;
+
+ switch (state)
+ {
+ case EC_STATE_INIT:
+ fop->flock.l_len += ec_adjust_offset(fop->xl->private,
+ &fop->flock.l_start, 1);
+ fop->flock.l_len = ec_adjust_size(fop->xl->private,
+ fop->flock.l_len, 1);
+ if ((fop->int32 == F_SETLKW) && (fop->flock.l_type != F_UNLCK))
+ {
+ fop->uint32 = EC_LOCK_MODE_ALL;
+ fop->int32 = F_SETLK;
+ }
+
+ /* Fall through */
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_PREPARE_ANSWER:
+ if (fop->flock.l_type != F_UNLCK) {
+ uintptr_t mask;
+
+ ec_fop_set_error (fop, ec_lock_check(fop, &mask));
+ if (fop->error != 0) {
+ if (mask != 0) {
+ ec_t *ec = fop->xl->private;
+ struct gf_flock flock;
+
+ flock.l_type = F_UNLCK;
+ flock.l_whence = fop->flock.l_whence;
+ flock.l_start = fop->flock.l_start * ec->fragments;
+ flock.l_len = fop->flock.l_len * ec->fragments;
+ flock.l_pid = 0;
+ flock.l_owner.len = 0;
+
+ if (fop->id == GF_FOP_INODELK) {
+ ec_inodelk(fop->frame, fop->xl, mask, 1,
+ ec_lock_unlocked, NULL, fop->str[0],
+ &fop->loc[0], F_SETLK, &flock,
+ fop->xdata);
+ } else {
+ ec_finodelk(fop->frame, fop->xl, mask, 1,
+ ec_lock_unlocked, NULL, fop->str[0],
+ fop->fd, F_SETLK, &flock, fop->xdata);
+ }
+ }
+ if (fop->error < 0) {
+ fop->error = 0;
+
+ fop->int32 = F_SETLKW;
+
+ ec_dispatch_inc(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+ }
+ }
+ } else {
+ ec_fop_prepare_answer(fop, _gf_true);
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->id == GF_FOP_INODELK)
+ {
+ if (fop->cbks.inodelk != NULL)
+ {
+ fop->cbks.inodelk(fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, cbk->xdata);
+ }
+ }
+ else
+ {
+ if (fop->cbks.finodelk != NULL)
+ {
+ fop->cbks.finodelk(fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, cbk->xdata);
+ }
+ }
+
+ return EC_STATE_END;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->id == GF_FOP_INODELK)
+ {
+ if (fop->cbks.inodelk != NULL)
+ {
+ fop->cbks.inodelk(fop->req_frame, fop, fop->xl, -1,
+ fop->error, NULL);
+ }
+ }
+ else
+ {
+ if (fop->cbks.finodelk != NULL)
+ {
+ fop->cbks.finodelk(fop->req_frame, fop, fop->xl, -1,
+ fop->error, NULL);
+ }
+ }
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s",
+ state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void ec_inodelk(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_inodelk_cbk_t func, void * data,
+ const char * volume, loc_t * loc, int32_t cmd,
+ struct gf_flock * flock, dict_t * xdata)
+{
+ ec_cbk_t callback = { .inodelk = func };
+ ec_fop_data_t * fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(INODELK) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_INODELK, 0, target, minimum,
+ ec_wind_inodelk, ec_manager_inodelk, callback,
+ data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->int32 = cmd;
+
+ if (volume != NULL) {
+ fop->str[0] = gf_strdup(volume);
+ if (fop->str[0] == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_NO_MEMORY,
+ "Failed to duplicate a string.");
+
+ goto out;
+ }
+ }
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (flock != NULL) {
+ fop->flock.l_type = flock->l_type;
+ fop->flock.l_whence = flock->l_whence;
+ fop->flock.l_start = flock->l_start;
+ fop->flock.l_len = flock->l_len;
+ fop->flock.l_pid = flock->l_pid;
+ fop->flock.l_owner.len = flock->l_owner.len;
+ if (flock->l_owner.len > 0) {
+ memcpy(fop->flock.l_owner.data, flock->l_owner.data,
+ flock->l_owner.len);
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL);
+ }
+}
+
+/* FOP: finodelk */
+
+int32_t ec_finodelk_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
+ int32_t op_ret, int32_t op_errno, dict_t * xdata)
+{
+ ec_fop_data_t * fop = NULL;
+ ec_cbk_data_t * cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
+ frame, op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_FINODELK, idx, op_ret,
+ op_errno);
+ if (cbk != NULL)
+ {
+ if (xdata != NULL)
+ {
+ cbk->xdata = dict_ref(xdata);
+ if (cbk->xdata == NULL)
+ {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ ec_combine(cbk, NULL);
+ }
+
+out:
+ if (fop != NULL)
+ {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void ec_wind_finodelk(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_finodelk_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->finodelk,
+ fop->str[0], fop->fd, fop->int32, &fop->flock,
+ fop->xdata);
+}
+
+void ec_finodelk(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_finodelk_cbk_t func, void * data,
+ const char * volume, fd_t * fd, int32_t cmd,
+ struct gf_flock * flock, dict_t * xdata)
+{
+ ec_cbk_t callback = { .finodelk = func };
+ ec_fop_data_t * fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(FINODELK) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FINODELK, 0, target,
+ minimum, ec_wind_finodelk, ec_manager_inodelk,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->use_fd = 1;
+
+ fop->int32 = cmd;
+
+ if (volume != NULL) {
+ fop->str[0] = gf_strdup(volume);
+ if (fop->str[0] == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_NO_MEMORY,
+ "Failed to duplicate a string.");
+
+ goto out;
+ }
+ }
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ if (flock != NULL) {
+ fop->flock.l_type = flock->l_type;
+ fop->flock.l_whence = flock->l_whence;
+ fop->flock.l_start = flock->l_start;
+ fop->flock.l_len = flock->l_len;
+ fop->flock.l_pid = flock->l_pid;
+ fop->flock.l_owner.len = flock->l_owner.len;
+ if (flock->l_owner.len > 0) {
+ memcpy(fop->flock.l_owner.data, flock->l_owner.data,
+ flock->l_owner.len);
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL);
+ }
+}
+
+/* FOP: lk */
+
+int32_t ec_combine_lk(ec_fop_data_t * fop, ec_cbk_data_t * dst,
+ ec_cbk_data_t * src)
+{
+ if (!ec_flock_compare(&dst->flock, &src->flock))
+ {
+ gf_msg (fop->xl->name, GF_LOG_NOTICE, 0,
+ EC_MSG_LOCK_MISMATCH,
+ "Mismatching lock in "
+ "answers of 'GF_FOP_LK'");
+
+ return 0;
+ }
+
+ return 1;
+}
+
+int32_t ec_lk_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock * flock,
+ dict_t * xdata)
+{
+ ec_fop_data_t * fop = NULL;
+ ec_cbk_data_t * cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
+ frame, op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_LK, idx, op_ret,
+ op_errno);
+ if (cbk != NULL)
+ {
+ if (op_ret >= 0)
+ {
+ if (flock != NULL)
+ {
+ cbk->flock.l_type = flock->l_type;
+ cbk->flock.l_whence = flock->l_whence;
+ cbk->flock.l_start = flock->l_start;
+ cbk->flock.l_len = flock->l_len;
+ cbk->flock.l_pid = flock->l_pid;
+ cbk->flock.l_owner.len = flock->l_owner.len;
+ if (flock->l_owner.len > 0)
+ {
+ memcpy(cbk->flock.l_owner.data, flock->l_owner.data,
+ flock->l_owner.len);
+ }
+ }
+ }
+ if (xdata != NULL)
+ {
+ cbk->xdata = dict_ref(xdata);
+ if (cbk->xdata == NULL)
+ {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ ec_combine(cbk, ec_combine_lk);
+ }
+
+out:
+ if (fop != NULL)
+ {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void ec_wind_lk(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_lk_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->lk, fop->fd,
+ fop->int32, &fop->flock, fop->xdata);
+}
+
+int32_t ec_manager_lk(ec_fop_data_t * fop, int32_t state)
+{
+ ec_cbk_data_t * cbk;
+
+ switch (state)
+ {
+ case EC_STATE_INIT:
+ fop->flock.l_len += ec_adjust_offset(fop->xl->private,
+ &fop->flock.l_start, 1);
+ fop->flock.l_len = ec_adjust_size(fop->xl->private,
+ fop->flock.l_len, 1);
+ if ((fop->int32 == F_SETLKW) && (fop->flock.l_type != F_UNLCK))
+ {
+ fop->uint32 = EC_LOCK_MODE_ALL;
+ fop->int32 = F_SETLK;
+ }
+
+ /* Fall through */
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_PREPARE_ANSWER:
+ if (fop->flock.l_type != F_UNLCK) {
+ uintptr_t mask;
+
+ ec_fop_set_error (fop, ec_lock_check(fop, &mask));
+ if (fop->error != 0) {
+ if (mask != 0) {
+ ec_t *ec = fop->xl->private;
+ struct gf_flock flock;
+
+ flock.l_type = F_UNLCK;
+ flock.l_whence = fop->flock.l_whence;
+ flock.l_start = fop->flock.l_start * ec->fragments;
+ flock.l_len = fop->flock.l_len * ec->fragments;
+ flock.l_pid = 0;
+ flock.l_owner.len = 0;
+
+ ec_lk(fop->frame, fop->xl, mask, 1,
+ ec_lock_lk_unlocked, NULL, fop->fd, F_SETLK,
+ &flock, fop->xdata);
+ }
+ if (fop->error < 0) {
+ fop->error = 0;
+
+ fop->int32 = F_SETLKW;
+
+ ec_dispatch_inc(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+ }
+ }
+ } else {
+ ec_fop_prepare_answer(fop, _gf_true);
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.lk != NULL)
+ {
+ fop->cbks.lk(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, &cbk->flock, cbk->xdata);
+ }
+
+ return EC_STATE_END;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.lk != NULL)
+ {
+ fop->cbks.lk(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL, NULL);
+ }
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s",
+ state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void ec_lk(call_frame_t * frame, xlator_t * this, uintptr_t target,
+ int32_t minimum, fop_lk_cbk_t func, void * data, fd_t * fd,
+ int32_t cmd, struct gf_flock * flock, dict_t * xdata)
+{
+ ec_cbk_t callback = { .lk = func };
+ ec_fop_data_t * fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(LK) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_LK, 0, target, minimum,
+ ec_wind_lk, ec_manager_lk, callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->use_fd = 1;
+
+ fop->int32 = cmd;
+
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ if (flock != NULL) {
+ fop->flock.l_type = flock->l_type;
+ fop->flock.l_whence = flock->l_whence;
+ fop->flock.l_start = flock->l_start;
+ fop->flock.l_len = flock->l_len;
+ fop->flock.l_pid = flock->l_pid;
+ fop->flock.l_owner.len = flock->l_owner.len;
+ if (flock->l_owner.len > 0) {
+ memcpy(fop->flock.l_owner.data, flock->l_owner.data,
+ flock->l_owner.len);
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL);
+ }
+}
diff --git a/xlators/cluster/ec/src/ec-mem-types.h b/xlators/cluster/ec/src/ec-mem-types.h
new file mode 100644
index 00000000000..df65a031590
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-mem-types.h
@@ -0,0 +1,27 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __EC_MEM_TYPES_H__
+#define __EC_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_ec_mem_types_
+{
+ ec_mt_ec_t = gf_common_mt_end + 1,
+ ec_mt_xlator_t,
+ ec_mt_ec_inode_t,
+ ec_mt_ec_fd_t,
+ ec_mt_ec_heal_t,
+ ec_mt_subvol_healer_t,
+ ec_mt_end
+};
+
+#endif /* __EC_MEM_TYPES_H__ */
diff --git a/xlators/cluster/ec/src/ec-messages.h b/xlators/cluster/ec/src/ec-messages.h
new file mode 100644
index 00000000000..76678f8f836
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-messages.h
@@ -0,0 +1,526 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _EC_MESSAGES_H_
+#define _EC_MESSAGES_H_
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glfs-message-id.h"
+
+/*! \file ec-messages.h
+ * \brief Glusterd log-message IDs and their descriptions
+ */
+
+/* NOTE: Rules for message additions
+ * 1) Each instance of a message is _better_ left with a unique message ID, even
+ * if the message format is the same. Reasoning is that, if the message
+ * format needs to change in one instance, the other instances are not
+ * impacted or the new change does not change the ID of the instance being
+ * modified.
+ * 2) Addition of a message,
+ * - Should increment the GLFS_NUM_MESSAGES
+ * - Append to the list of messages defined, towards the end
+ * - Retain macro naming as glfs_msg_X (for redability across developers)
+ * NOTE: Rules for message format modifications
+ * 3) Check acorss the code if the message ID macro in question is reused
+ * anywhere. If reused then then the modifications should ensure correctness
+ * everywhere, or needs a new message ID as (1) above was not adhered to. If
+ * not used anywhere, proceed with the required modification.
+ * NOTE: Rules for message deletion
+ * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used
+ * anywhere, then can be deleted, but will leave a hole by design, as
+ * addition rules specify modification to the end of the list and not filling
+ * holes.
+ */
+
+#define GLFS_EC_COMP_BASE GLFS_MSGID_COMP_EC
+#define GLFS_NUM_MESSAGES 66
+#define GLFS_MSGID_END (GLFS_EC_COMP_BASE + GLFS_NUM_MESSAGES + 1)
+/* Messaged with message IDs */
+#define glfs_msg_start_x GLFS_EC_COMP_BASE, "Invalid: Start of messages"
+/*------------*/
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_INVALID_CONFIG (GLFS_EC_COMP_BASE + 1)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_HEAL_FAIL (GLFS_EC_COMP_BASE + 2)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_DICT_COMBINE_FAIL (GLFS_EC_COMP_BASE + 3)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_STIME_COMBINE_FAIL (GLFS_EC_COMP_BASE + 4)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_INVALID_DICT_NUMS (GLFS_EC_COMP_BASE + 5)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_IATT_COMBINE_FAIL (GLFS_EC_COMP_BASE + 6)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_INVALID_FORMAT (GLFS_EC_COMP_BASE + 7)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_DICT_GET_FAILED (GLFS_EC_COMP_BASE + 8)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_UNHANDLED_STATE (GLFS_EC_COMP_BASE + 9)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_FILE_DESC_REF_FAIL (GLFS_EC_COMP_BASE + 10)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_LOC_COPY_FAIL (GLFS_EC_COMP_BASE + 11)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_BUF_REF_FAIL (GLFS_EC_COMP_BASE + 12)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_DICT_REF_FAIL (GLFS_EC_COMP_BASE + 13)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_LK_UNLOCK_FAILED (GLFS_EC_COMP_BASE + 14)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_UNLOCK_FAILED (GLFS_EC_COMP_BASE + 15)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_LOC_PARENT_INODE_MISSING (GLFS_EC_COMP_BASE + 16)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_INVALID_LOC_NAME (GLFS_EC_COMP_BASE + 17)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_NO_MEMORY (GLFS_EC_COMP_BASE + 18)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_GFID_MISMATCH (GLFS_EC_COMP_BASE + 19)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_UNSUPPORTED_VERSION (GLFS_EC_COMP_BASE + 20)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_FD_CREATE_FAIL (GLFS_EC_COMP_BASE + 21)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_READDIRP_REQ_PREP_FAIL (GLFS_EC_COMP_BASE + 22)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_LOOKUP_REQ_PREP_FAIL (GLFS_EC_COMP_BASE + 23)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_INODE_REF_FAIL (GLFS_EC_COMP_BASE + 24)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_LOOKUP_READAHEAD_FAIL (GLFS_EC_COMP_BASE + 25)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_FRAME_MISMATCH (GLFS_EC_COMP_BASE + 26)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_XLATOR_MISMATCH (GLFS_EC_COMP_BASE + 27)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_VECTOR_MISMATCH (GLFS_EC_COMP_BASE + 28)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_IATT_MISMATCH (GLFS_EC_COMP_BASE + 29)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_FD_MISMATCH (GLFS_EC_COMP_BASE + 30)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_DICT_MISMATCH (GLFS_EC_COMP_BASE + 31)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_INDEX_DIR_GET_FAIL (GLFS_EC_COMP_BASE + 32)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_PREOP_LOCK_FAILED (GLFS_EC_COMP_BASE + 33)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_CHILDS_INSUFFICIENT (GLFS_EC_COMP_BASE + 34)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_OP_EXEC_UNAVAIL (GLFS_EC_COMP_BASE + 35)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_UNLOCK_DELAY_FAILED (GLFS_EC_COMP_BASE + 36)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_SIZE_VERS_UPDATE_FAIL (GLFS_EC_COMP_BASE + 37)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_INVALID_REQUEST (GLFS_EC_COMP_BASE + 38)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_INVALID_LOCK_TYPE (GLFS_EC_COMP_BASE + 39)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_SIZE_VERS_GET_FAIL (GLFS_EC_COMP_BASE + 40)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_FILE_SIZE_GET_FAIL (GLFS_EC_COMP_BASE + 41)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_FOP_MISMATCH (GLFS_EC_COMP_BASE + 42)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_SUBVOL_ID_DICT_SET_FAIL (GLFS_EC_COMP_BASE + 43)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_SUBVOL_BUILD_FAIL (GLFS_EC_COMP_BASE + 44)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_XLATOR_INIT_FAIL (GLFS_EC_COMP_BASE + 45)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_NO_PARENTS (GLFS_EC_COMP_BASE + 46)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_TIMER_CREATE_FAIL (GLFS_EC_COMP_BASE + 47)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_TOO_MANY_SUBVOLS (GLFS_EC_COMP_BASE + 48)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_DATA_UNAVAILABLE (GLFS_EC_COMP_BASE + 49)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_INODE_REMOVE_FAIL (GLFS_EC_COMP_BASE + 50)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_INVALID_REDUNDANCY (GLFS_EC_COMP_BASE + 51)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_XLATOR_PARSE_OPT_FAIL (GLFS_EC_COMP_BASE + 52)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_OP_FAIL_ON_SUBVOLS (GLFS_EC_COMP_BASE + 53)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_INVALID_INODE (GLFS_EC_COMP_BASE + 54)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_LOCK_MISMATCH (GLFS_EC_COMP_BASE + 55)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_XDATA_MISMATCH (GLFS_EC_COMP_BASE + 56)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_HEALING_INFO (GLFS_EC_COMP_BASE + 57)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_HEAL_SUCCESS (GLFS_EC_COMP_BASE + 58)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_FULL_SWEEP_START (GLFS_EC_COMP_BASE + 59)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_FULL_SWEEP_STOP (GLFS_EC_COMP_BASE + 59)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_INVALID_FOP (GLFS_EC_COMP_BASE + 60)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_EC_UP (GLFS_EC_COMP_BASE + 61)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_EC_DOWN (GLFS_EC_COMP_BASE + 62)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_SIZE_XATTR_GET_FAIL (GLFS_EC_COMP_BASE + 63)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_VER_XATTR_GET_FAIL (GLFS_EC_COMP_BASE + 64)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_CONFIG_XATTR_GET_FAIL (GLFS_EC_COMP_BASE + 65)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define EC_MSG_CONFIG_XATTR_INVALID (GLFS_EC_COMP_BASE + 66)
+
+/*------------*/
+#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
+
+#endif /* !_EC_MESSAGES_H_ */
diff --git a/xlators/cluster/ec/src/ec-method.c b/xlators/cluster/ec/src/ec-method.c
new file mode 100644
index 00000000000..faab0115cdd
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-method.c
@@ -0,0 +1,159 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <string.h>
+#include <inttypes.h>
+
+#include "ec-gf.h"
+#include "ec-method.h"
+
+static uint32_t GfPow[EC_GF_SIZE << 1];
+static uint32_t GfLog[EC_GF_SIZE << 1];
+
+void ec_method_initialize(void)
+{
+ uint32_t i;
+
+ GfPow[0] = 1;
+ GfLog[0] = EC_GF_SIZE;
+ for (i = 1; i < EC_GF_SIZE; i++)
+ {
+ GfPow[i] = GfPow[i - 1] << 1;
+ if (GfPow[i] >= EC_GF_SIZE)
+ {
+ GfPow[i] ^= EC_GF_MOD;
+ }
+ GfPow[i + EC_GF_SIZE - 1] = GfPow[i];
+ GfLog[GfPow[i] + EC_GF_SIZE - 1] = GfLog[GfPow[i]] = i;
+ }
+}
+
+static uint32_t ec_method_mul(uint32_t a, uint32_t b)
+{
+ if (a && b)
+ {
+ return GfPow[GfLog[a] + GfLog[b]];
+ }
+ return 0;
+}
+
+static uint32_t ec_method_div(uint32_t a, uint32_t b)
+{
+ if (b)
+ {
+ if (a)
+ {
+ return GfPow[EC_GF_SIZE - 1 + GfLog[a] - GfLog[b]];
+ }
+ return 0;
+ }
+ return EC_GF_SIZE;
+}
+
+size_t ec_method_encode(size_t size, uint32_t columns, uint32_t row,
+ uint8_t * in, uint8_t * out)
+{
+ uint32_t i, j;
+
+ size /= EC_METHOD_CHUNK_SIZE * columns;
+ row++;
+ for (j = 0; j < size; j++)
+ {
+ ec_gf_muladd[0](out, in, EC_METHOD_WIDTH);
+ in += EC_METHOD_CHUNK_SIZE;
+ for (i = 1; i < columns; i++)
+ {
+ ec_gf_muladd[row](out, in, EC_METHOD_WIDTH);
+ in += EC_METHOD_CHUNK_SIZE;
+ }
+ out += EC_METHOD_CHUNK_SIZE;
+ }
+
+ return size * EC_METHOD_CHUNK_SIZE;
+}
+
+size_t ec_method_decode(size_t size, uint32_t columns, uint32_t * rows,
+ uint8_t ** in, uint8_t * out)
+{
+ uint32_t i, j, k, off, last, value;
+ uint32_t f;
+ uint8_t inv[EC_METHOD_MAX_FRAGMENTS][EC_METHOD_MAX_FRAGMENTS + 1];
+ uint8_t mtx[EC_METHOD_MAX_FRAGMENTS][EC_METHOD_MAX_FRAGMENTS];
+ uint8_t dummy[EC_METHOD_CHUNK_SIZE];
+
+ size /= EC_METHOD_CHUNK_SIZE;
+
+ memset(inv, 0, sizeof(inv));
+ memset(mtx, 0, sizeof(mtx));
+ memset(dummy, 0, sizeof(dummy));
+ for (i = 0; i < columns; i++)
+ {
+ inv[i][i] = 1;
+ inv[i][columns] = 1;
+ }
+ for (i = 0; i < columns; i++)
+ {
+ mtx[i][columns - 1] = 1;
+ for (j = columns - 1; j > 0; j--)
+ {
+ mtx[i][j - 1] = ec_method_mul(mtx[i][j], rows[i] + 1);
+ }
+ }
+
+ for (i = 0; i < columns; i++)
+ {
+ f = mtx[i][i];
+ for (j = 0; j < columns; j++)
+ {
+ mtx[i][j] = ec_method_div(mtx[i][j], f);
+ inv[i][j] = ec_method_div(inv[i][j], f);
+ }
+ for (j = 0; j < columns; j++)
+ {
+ if (i != j)
+ {
+ f = mtx[j][i];
+ for (k = 0; k < columns; k++)
+ {
+ mtx[j][k] ^= ec_method_mul(mtx[i][k], f);
+ inv[j][k] ^= ec_method_mul(inv[i][k], f);
+ }
+ }
+ }
+ }
+ off = 0;
+ for (f = 0; f < size; f++)
+ {
+ for (i = 0; i < columns; i++)
+ {
+ last = 0;
+ j = 0;
+ do
+ {
+ while (inv[i][j] == 0)
+ {
+ j++;
+ }
+ if (j < columns)
+ {
+ value = ec_method_div(last, inv[i][j]);
+ last = inv[i][j];
+ ec_gf_muladd[value](out, in[j] + off, EC_METHOD_WIDTH);
+ j++;
+ }
+ } while (j < columns);
+ ec_gf_muladd[last](out, dummy, EC_METHOD_WIDTH);
+ out += EC_METHOD_CHUNK_SIZE;
+ }
+ off += EC_METHOD_CHUNK_SIZE;
+ }
+
+ return size * EC_METHOD_CHUNK_SIZE * columns;
+}
diff --git a/xlators/cluster/ec/src/ec-method.h b/xlators/cluster/ec/src/ec-method.h
new file mode 100644
index 00000000000..29b46e10443
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-method.h
@@ -0,0 +1,32 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __EC_METHOD_H__
+#define __EC_METHOD_H__
+
+#include "ec-gf.h"
+
+/* Determines the maximum size of the matrix used to encode/decode data */
+#define EC_METHOD_MAX_FRAGMENTS 16
+/* Determines the maximum number of usable elements in the Galois Field */
+#define EC_METHOD_MAX_NODES (EC_GF_SIZE - 1)
+
+#define EC_METHOD_WORD_SIZE 64
+
+#define EC_METHOD_CHUNK_SIZE (EC_METHOD_WORD_SIZE * EC_GF_BITS)
+#define EC_METHOD_WIDTH (EC_METHOD_WORD_SIZE / EC_GF_WORD_SIZE)
+
+void ec_method_initialize(void);
+size_t ec_method_encode(size_t size, uint32_t columns, uint32_t row,
+ uint8_t * in, uint8_t * out);
+size_t ec_method_decode(size_t size, uint32_t columns, uint32_t * rows,
+ uint8_t ** in, uint8_t * out);
+
+#endif /* __EC_METHOD_H__ */
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
new file mode 100644
index 00000000000..c803ebfa796
--- /dev/null
+++ b/xlators/cluster/ec/src/ec.c
@@ -0,0 +1,1378 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "defaults.h"
+#include "statedump.h"
+#include "compat-errno.h"
+
+#include "ec-mem-types.h"
+#include "ec-helpers.h"
+#include "ec-common.h"
+#include "ec-fops.h"
+#include "ec-method.h"
+#include "ec.h"
+#include "ec-messages.h"
+#include "ec-heald.h"
+
+static char *ec_read_policies[EC_READ_POLICY_MAX + 1] = {
+ [EC_ROUND_ROBIN] = "round-robin",
+ [EC_GFID_HASH] = "gfid-hash",
+ [EC_READ_POLICY_MAX] = NULL
+};
+#define EC_MAX_FRAGMENTS EC_METHOD_MAX_FRAGMENTS
+/* The maximum number of nodes is derived from the maximum allowed fragments
+ * using the rule that redundancy cannot be equal or greater than the number
+ * of fragments.
+ */
+#define EC_MAX_NODES min(EC_MAX_FRAGMENTS * 2 - 1, EC_METHOD_MAX_NODES)
+
+#define EC_INTERNAL_XATTR_OR_GOTO(name, xattr, op_errno, label) \
+ do { \
+ if (ec_is_internal_xattr (NULL, (char *)name, NULL, NULL)) { \
+ op_errno = EPERM; \
+ goto label; \
+ } \
+ if (name && (strlen (name) == 0) && xattr) { \
+ /* Bulk [f]removexattr/[f]setxattr */ \
+ GF_IF_INTERNAL_XATTR_GOTO (EC_XATTR_PREFIX"*", xattr, \
+ op_errno, label); \
+ } \
+ } while (0)
+
+int32_t ec_parse_options(xlator_t * this)
+{
+ ec_t * ec = this->private;
+ int32_t error = EINVAL;
+ uintptr_t mask;
+
+ GF_OPTION_INIT("redundancy", ec->redundancy, int32, out);
+ ec->fragments = ec->nodes - ec->redundancy;
+ if ((ec->redundancy < 1) || (ec->redundancy >= ec->fragments) ||
+ (ec->fragments > EC_MAX_FRAGMENTS))
+ {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_INVALID_REDUNDANCY,
+ "Invalid redundancy (must be between "
+ "1 and %d)", (ec->nodes - 1) / 2);
+
+ goto out;
+ }
+
+ ec->bits_for_nodes = 1;
+ mask = 2;
+ while (ec->nodes > mask)
+ {
+ ec->bits_for_nodes++;
+ mask <<= 1;
+ }
+ ec->node_mask = (1ULL << ec->nodes) - 1ULL;
+ ec->fragment_size = EC_METHOD_CHUNK_SIZE;
+ ec->stripe_size = ec->fragment_size * ec->fragments;
+
+ gf_msg_debug ("ec", 0, "Initialized with: nodes=%u, fragments=%u, "
+ "stripe_size=%u, node_mask=%lX",
+ ec->nodes, ec->fragments, ec->stripe_size, ec->node_mask);
+
+ error = 0;
+
+out:
+ return error;
+}
+
+int32_t ec_prepare_childs(xlator_t * this)
+{
+ ec_t * ec = this->private;
+ xlator_list_t * child = NULL;
+ int32_t count = 0;
+
+ for (child = this->children; child != NULL; child = child->next)
+ {
+ count++;
+ }
+ if (count > EC_MAX_NODES)
+ {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_TOO_MANY_SUBVOLS, "Too many subvolumes");
+
+ return EINVAL;
+ }
+ ec->nodes = count;
+
+ ec->xl_list = GF_CALLOC(count, sizeof(ec->xl_list[0]), ec_mt_xlator_t);
+ if (ec->xl_list == NULL)
+ {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_NO_MEMORY, "Allocation of xlator list failed");
+
+ return ENOMEM;
+ }
+ ec->xl_up = 0;
+ ec->xl_up_count = 0;
+
+ count = 0;
+ for (child = this->children; child != NULL; child = child->next)
+ {
+ ec->xl_list[count++] = child->xlator;
+ }
+
+ return 0;
+}
+
+/* This function transforms the subvol to subvol-id*/
+static int
+_subvol_to_subvolid (dict_t *this, char *key, data_t *value, void *data)
+{
+ ec_t *ec = data;
+ xlator_t *subvol = NULL;
+ int i = 0;
+ int ret = -1;
+
+ subvol = data_to_ptr (value);
+ for (i = 0; i < ec->nodes; i++) {
+ if (ec->xl_list[i] == subvol) {
+ ret = dict_set_int32 (this, key, i);
+ /* -1 stops dict_foreach and returns -1*/
+ if (ret < 0)
+ ret = -1;
+ goto out;
+ }
+ }
+out:
+ return ret;
+}
+
+int
+ec_subvol_to_subvol_id_transform (ec_t *ec, dict_t *leaf_to_subvolid)
+{
+ return dict_foreach (leaf_to_subvolid, _subvol_to_subvolid, ec);
+}
+
+void __ec_destroy_private(xlator_t * this)
+{
+ ec_t * ec = this->private;
+
+ if (ec != NULL)
+ {
+ LOCK(&ec->lock);
+
+ if (ec->timer != NULL)
+ {
+ gf_timer_call_cancel(this->ctx, ec->timer);
+ ec->timer = NULL;
+ }
+
+ UNLOCK(&ec->lock);
+
+ /* There is a race with timer because there is no way to know if
+ * timer callback has really been cancelled or it has been scheduled
+ * for execution. If it has been scheduled, it will crash if we
+ * destroy ec too fast.
+ *
+ * Not sure how this can be solved without using global variables or
+ * having support from gf_timer_call_cancel()
+ */
+ sleep(2);
+
+ this->private = NULL;
+ if (ec->xl_list != NULL)
+ {
+ GF_FREE(ec->xl_list);
+ ec->xl_list = NULL;
+ }
+
+ if (ec->fop_pool != NULL)
+ {
+ mem_pool_destroy(ec->fop_pool);
+ }
+
+ if (ec->cbk_pool != NULL)
+ {
+ mem_pool_destroy(ec->cbk_pool);
+ }
+
+ if (ec->lock_pool != NULL)
+ {
+ mem_pool_destroy(ec->lock_pool);
+ }
+
+ LOCK_DESTROY(&ec->lock);
+
+ if (ec->leaf_to_subvolid)
+ dict_unref (ec->leaf_to_subvolid);
+ GF_FREE(ec);
+ }
+}
+
+int32_t mem_acct_init(xlator_t * this)
+{
+ if (xlator_mem_acct_init(this, ec_mt_end + 1) != 0)
+ {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_NO_MEMORY, "Memory accounting initialization "
+ "failed.");
+
+ return -1;
+ }
+
+ return 0;
+}
+
+void
+ec_configure_background_heal_opts (ec_t *ec, int background_heals,
+ int heal_wait_qlen)
+{
+ if (background_heals == 0) {
+ ec->heal_wait_qlen = 0;
+ } else {
+ ec->heal_wait_qlen = heal_wait_qlen;
+ }
+ ec->background_heals = background_heals;
+}
+
+int
+ec_assign_read_policy (ec_t *ec, char *read_policy)
+{
+ int read_policy_idx = -1;
+
+ read_policy_idx = gf_get_index_by_elem (ec_read_policies, read_policy);
+ if (read_policy_idx < 0 || read_policy_idx >= EC_READ_POLICY_MAX)
+ return -1;
+
+ ec->read_policy = read_policy_idx;
+ return 0;
+}
+
+int32_t
+reconfigure (xlator_t *this, dict_t *options)
+{
+ ec_t *ec = this->private;
+ char *read_policy = NULL;
+ uint32_t heal_wait_qlen = 0;
+ uint32_t background_heals = 0;
+
+ GF_OPTION_RECONF ("self-heal-daemon", ec->shd.enabled, options, bool,
+ failed);
+ GF_OPTION_RECONF ("iam-self-heal-daemon", ec->shd.iamshd, options,
+ bool, failed);
+ GF_OPTION_RECONF ("eager-lock", ec->eager_lock, options,
+ bool, failed);
+ GF_OPTION_RECONF ("background-heals", background_heals, options,
+ uint32, failed);
+ GF_OPTION_RECONF ("heal-wait-qlength", heal_wait_qlen, options,
+ uint32, failed);
+ GF_OPTION_RECONF ("heal-timeout", ec->shd.timeout, options,
+ int32, failed);
+ ec_configure_background_heal_opts (ec, background_heals,
+ heal_wait_qlen);
+ GF_OPTION_RECONF ("read-policy", read_policy, options, str, failed);
+ if (ec_assign_read_policy (ec, read_policy))
+ goto failed;
+
+ return 0;
+failed:
+ return -1;
+}
+
+glusterfs_event_t
+ec_get_event_from_state (ec_t *ec)
+{
+ int down_count = 0;
+
+ if (ec->xl_up_count >= ec->fragments) {
+ /* If ec is up but some subvolumes are yet to notify, give
+ * grace time for other subvols to notify to prevent start of
+ * I/O which may result in self-heals */
+ if (ec->timer && ec->xl_notify_count < ec->nodes)
+ return GF_EVENT_MAXVAL;
+
+ return GF_EVENT_CHILD_UP;
+ } else {
+ down_count = ec->xl_notify_count - ec->xl_up_count;
+ if (down_count > ec->redundancy)
+ return GF_EVENT_CHILD_DOWN;
+ }
+
+ return GF_EVENT_MAXVAL;
+}
+
+void
+ec_up (xlator_t *this, ec_t *ec)
+{
+ if (ec->timer != NULL) {
+ gf_timer_call_cancel (this->ctx, ec->timer);
+ ec->timer = NULL;
+ }
+
+ ec->up = 1;
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ EC_MSG_EC_UP, "Going UP");
+}
+
+void
+ec_down (xlator_t *this, ec_t *ec)
+{
+ if (ec->timer != NULL) {
+ gf_timer_call_cancel(this->ctx, ec->timer);
+ ec->timer = NULL;
+ }
+
+ ec->up = 0;
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ EC_MSG_EC_DOWN, "Going DOWN");
+}
+
+void
+ec_notify_cbk (void *data)
+{
+ ec_t *ec = data;
+ glusterfs_event_t event = GF_EVENT_MAXVAL;
+ gf_boolean_t propagate = _gf_false;
+
+ LOCK(&ec->lock);
+ {
+ if (!ec->timer) {
+ /*
+ * Either child_up/child_down is already sent to parent
+ * This is a spurious wake up.
+ */
+ goto unlock;
+ }
+
+ gf_timer_call_cancel (ec->xl->ctx, ec->timer);
+ ec->timer = NULL;
+
+ event = ec_get_event_from_state (ec);
+ /* If event is still MAXVAL then enough subvolumes didn't
+ * notify, treat it as CHILD_DOWN. */
+ if (event == GF_EVENT_MAXVAL) {
+ event = GF_EVENT_CHILD_DOWN;
+ ec->xl_notify = (1ULL << ec->nodes) - 1ULL;
+ ec->xl_notify_count = ec->nodes;
+ } else if (event == GF_EVENT_CHILD_UP) {
+ /* Rest of the bricks are still not coming up,
+ * notify that ec is up. Files/directories will be
+ * healed as in when they come up. */
+ ec_up (ec->xl, ec);
+ }
+
+ /* CHILD_DOWN should not come here as no grace period is given
+ * for notifying CHILD_DOWN. */
+
+ propagate = _gf_true;
+ }
+unlock:
+ UNLOCK(&ec->lock);
+
+ if (propagate) {
+ default_notify (ec->xl, event, NULL);
+ }
+}
+
+void
+ec_launch_notify_timer (xlator_t *this, ec_t *ec)
+{
+ struct timespec delay = {0, };
+
+ gf_msg_debug (this->name, 0, "Initiating child-down timer");
+ delay.tv_sec = 10;
+ delay.tv_nsec = 0;
+ ec->timer = gf_timer_call_after (this->ctx, delay, ec_notify_cbk, ec);
+ if (ec->timer == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_TIMER_CREATE_FAIL, "Cannot create timer "
+ "for delayed initialization");
+ }
+}
+
+void
+ec_handle_up (xlator_t *this, ec_t *ec, int32_t idx)
+{
+ if (((ec->xl_notify >> idx) & 1) == 0) {
+ ec->xl_notify |= 1ULL << idx;
+ ec->xl_notify_count++;
+ }
+
+ if (((ec->xl_up >> idx) & 1) == 0) { /* Duplicate event */
+ ec->xl_up |= 1ULL << idx;
+ ec->xl_up_count++;
+ }
+}
+
+void
+ec_handle_down (xlator_t *this, ec_t *ec, int32_t idx)
+{
+ if (((ec->xl_notify >> idx) & 1) == 0) {
+ ec->xl_notify |= 1ULL << idx;
+ ec->xl_notify_count++;
+ }
+
+ if (((ec->xl_up >> idx) & 1) != 0) { /* Duplicate event */
+ gf_msg_debug (this->name, 0, "Child %d is DOWN", idx);
+
+ ec->xl_up ^= 1ULL << idx;
+ ec->xl_up_count--;
+ }
+}
+
+gf_boolean_t
+ec_disable_delays(ec_t *ec)
+{
+ ec->shutdown = _gf_true;
+
+ return list_empty (&ec->pending_fops);
+}
+
+void
+ec_pending_fops_completed(ec_t *ec)
+{
+ if (ec->shutdown) {
+ default_notify(ec->xl, GF_EVENT_PARENT_DOWN, NULL);
+ }
+}
+
+int32_t
+ec_notify (xlator_t *this, int32_t event, void *data, void *data2)
+{
+ ec_t *ec = this->private;
+ int32_t idx = 0;
+ int32_t error = 0;
+ glusterfs_event_t old_event = GF_EVENT_MAXVAL;
+ dict_t *input = NULL;
+ dict_t *output = NULL;
+ gf_boolean_t propagate = _gf_true;
+
+ gf_msg_trace (this->name, 0, "NOTIFY(%d): %p, %p",
+ event, data, data2);
+
+ if (event == GF_EVENT_TRANSLATOR_OP) {
+ if (!ec->up) {
+ error = -1;
+ } else {
+ input = data;
+ output = data2;
+ error = ec_xl_op (this, input, output);
+ }
+ goto out;
+ }
+
+ for (idx = 0; idx < ec->nodes; idx++) {
+ if (ec->xl_list[idx] == data) {
+ if (event == GF_EVENT_CHILD_UP)
+ ec_selfheal_childup (ec, idx);
+ break;
+ }
+ }
+
+ LOCK (&ec->lock);
+
+ if (event == GF_EVENT_PARENT_UP) {
+ /*
+ * Start a timer which sends appropriate event to parent
+ * xlator to prevent the 'mount' syscall from hanging.
+ */
+ ec_launch_notify_timer (this, ec);
+ goto unlock;
+ } else if (event == GF_EVENT_PARENT_DOWN) {
+ /* If there aren't pending fops running after we have waken up
+ * them, we immediately propagate the notification. */
+ propagate = ec_disable_delays(ec);
+ goto unlock;
+ }
+
+ if (idx < ec->nodes) { /* CHILD_* events */
+ old_event = ec_get_event_from_state (ec);
+
+ if (event == GF_EVENT_CHILD_UP) {
+ ec_handle_up (this, ec, idx);
+ } else if (event == GF_EVENT_CHILD_DOWN) {
+ ec_handle_down (this, ec, idx);
+ }
+
+ event = ec_get_event_from_state (ec);
+
+ if (event == GF_EVENT_CHILD_UP && !ec->up) {
+ ec_up (this, ec);
+ } else if (event == GF_EVENT_CHILD_DOWN && ec->up) {
+ ec_down (this, ec);
+ }
+
+ if (event != GF_EVENT_MAXVAL) {
+ if (event == old_event) {
+ event = GF_EVENT_CHILD_MODIFIED;
+ }
+ } else {
+ propagate = _gf_false;
+ }
+ }
+unlock:
+ UNLOCK (&ec->lock);
+
+ if (propagate) {
+ error = default_notify (this, event, data);
+ }
+
+ if (ec->shd.iamshd &&
+ ec->xl_notify_count == ec->nodes &&
+ event == GF_EVENT_CHILD_UP) {
+ ec_launch_replace_heal (ec);
+ }
+out:
+ return error;
+}
+
+int32_t
+notify (xlator_t *this, int32_t event, void *data, ...)
+{
+ int ret = -1;
+ va_list ap;
+ void *data2 = NULL;
+
+ va_start (ap, data);
+ data2 = va_arg (ap, dict_t*);
+ va_end (ap);
+ ret = ec_notify (this, event, data, data2);
+
+ return ret;
+}
+
+int32_t
+init (xlator_t *this)
+{
+ ec_t *ec = NULL;
+ char *read_policy = NULL;
+
+ if (this->parents == NULL)
+ {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ EC_MSG_NO_PARENTS, "Volume does not have parents.");
+ }
+
+ ec = GF_MALLOC(sizeof(*ec), ec_mt_ec_t);
+ if (ec == NULL)
+ {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_NO_MEMORY, "Failed to allocate private memory.");
+
+ return -1;
+ }
+ memset(ec, 0, sizeof(*ec));
+
+ this->private = ec;
+
+ ec->xl = this;
+ LOCK_INIT(&ec->lock);
+
+ INIT_LIST_HEAD(&ec->pending_fops);
+ INIT_LIST_HEAD(&ec->heal_waiting);
+ INIT_LIST_HEAD(&ec->healing);
+
+ ec->fop_pool = mem_pool_new(ec_fop_data_t, 1024);
+ ec->cbk_pool = mem_pool_new(ec_cbk_data_t, 4096);
+ ec->lock_pool = mem_pool_new(ec_lock_t, 1024);
+ if ((ec->fop_pool == NULL) || (ec->cbk_pool == NULL) ||
+ (ec->lock_pool == NULL))
+ {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_NO_MEMORY, "Failed to create memory pools.");
+
+ goto failed;
+ }
+
+ if (ec_prepare_childs(this) != 0)
+ {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_XLATOR_INIT_FAIL, "Failed to initialize xlator");
+
+ goto failed;
+ }
+
+ if (ec_parse_options(this) != 0)
+ {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_XLATOR_PARSE_OPT_FAIL, "Failed to parse xlator options");
+
+ goto failed;
+ }
+
+ ec_method_initialize();
+ GF_OPTION_INIT ("self-heal-daemon", ec->shd.enabled, bool, failed);
+ GF_OPTION_INIT ("iam-self-heal-daemon", ec->shd.iamshd, bool, failed);
+ GF_OPTION_INIT ("eager-lock", ec->eager_lock, bool, failed);
+ GF_OPTION_INIT ("background-heals", ec->background_heals, uint32, failed);
+ GF_OPTION_INIT ("heal-wait-qlength", ec->heal_wait_qlen, uint32, failed);
+ ec_configure_background_heal_opts (ec, ec->background_heals,
+ ec->heal_wait_qlen);
+ GF_OPTION_INIT ("read-policy", read_policy, str, failed);
+ if (ec_assign_read_policy (ec, read_policy))
+ goto failed;
+
+ this->itable = inode_table_new (EC_SHD_INODE_LRU_LIMIT, this);
+ if (!this->itable)
+ goto failed;
+
+ if (ec->shd.iamshd)
+ ec_selfheal_daemon_init (this);
+ gf_msg_debug (this->name, 0, "Disperse translator initialized.");
+
+ ec->leaf_to_subvolid = dict_new ();
+ if (!ec->leaf_to_subvolid)
+ goto failed;
+ if (glusterfs_reachable_leaves (this, ec->leaf_to_subvolid)) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_SUBVOL_BUILD_FAIL, "Failed to build subvol "
+ "dictionary");
+ goto failed;
+ }
+
+ if (ec_subvol_to_subvol_id_transform (ec, ec->leaf_to_subvolid) < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_SUBVOL_ID_DICT_SET_FAIL, "Failed to build subvol-id "
+ "dictionary");
+ goto failed;
+ }
+
+ return 0;
+
+failed:
+ __ec_destroy_private(this);
+
+ return -1;
+}
+
+void fini(xlator_t * this)
+{
+ __ec_destroy_private(this);
+}
+
+int32_t ec_gf_access(call_frame_t * frame, xlator_t * this, loc_t * loc,
+ int32_t mask, dict_t * xdata)
+{
+ ec_access(frame, this, -1, EC_MINIMUM_ONE, default_access_cbk, NULL, loc,
+ mask, xdata);
+
+ return 0;
+}
+
+int32_t ec_gf_create(call_frame_t * frame, xlator_t * this, loc_t * loc,
+ int32_t flags, mode_t mode, mode_t umask, fd_t * fd,
+ dict_t * xdata)
+{
+ ec_create(frame, this, -1, EC_MINIMUM_MIN, default_create_cbk, NULL, loc,
+ flags, mode, umask, fd, xdata);
+
+ return 0;
+}
+
+int32_t ec_gf_discard(call_frame_t * frame, xlator_t * this, fd_t * fd,
+ off_t offset, size_t len, dict_t * xdata)
+{
+ default_discard_failure_cbk(frame, ENOTSUP);
+
+ return 0;
+}
+
+int32_t ec_gf_entrylk(call_frame_t * frame, xlator_t * this,
+ const char * volume, loc_t * loc, const char * basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t * xdata)
+{
+ int32_t minimum = EC_MINIMUM_ALL;
+ if (cmd == ENTRYLK_UNLOCK)
+ minimum = EC_MINIMUM_ONE;
+ ec_entrylk(frame, this, -1, minimum, default_entrylk_cbk, NULL,
+ volume, loc, basename, cmd, type, xdata);
+
+ return 0;
+}
+
+int32_t ec_gf_fentrylk(call_frame_t * frame, xlator_t * this,
+ const char * volume, fd_t * fd, const char * basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t * xdata)
+{
+ int32_t minimum = EC_MINIMUM_ALL;
+ if (cmd == ENTRYLK_UNLOCK)
+ minimum = EC_MINIMUM_ONE;
+ ec_fentrylk(frame, this, -1, minimum, default_fentrylk_cbk, NULL,
+ volume, fd, basename, cmd, type, xdata);
+
+ return 0;
+}
+
+int32_t ec_gf_fallocate(call_frame_t * frame, xlator_t * this, fd_t * fd,
+ int32_t keep_size, off_t offset, size_t len,
+ dict_t * xdata)
+{
+ default_fallocate_failure_cbk(frame, ENOTSUP);
+
+ return 0;
+}
+
+int32_t ec_gf_flush(call_frame_t * frame, xlator_t * this, fd_t * fd,
+ dict_t * xdata)
+{
+ ec_flush(frame, this, -1, EC_MINIMUM_MIN, default_flush_cbk, NULL, fd,
+ xdata);
+
+ return 0;
+}
+
+int32_t ec_gf_fsync(call_frame_t * frame, xlator_t * this, fd_t * fd,
+ int32_t datasync, dict_t * xdata)
+{
+ ec_fsync(frame, this, -1, EC_MINIMUM_MIN, default_fsync_cbk, NULL, fd,
+ datasync, xdata);
+
+ return 0;
+}
+
+int32_t ec_gf_fsyncdir(call_frame_t * frame, xlator_t * this, fd_t * fd,
+ int32_t datasync, dict_t * xdata)
+{
+ ec_fsyncdir(frame, this, -1, EC_MINIMUM_MIN, default_fsyncdir_cbk, NULL,
+ fd, datasync, xdata);
+
+ return 0;
+}
+
+int
+ec_marker_populate_args (call_frame_t *frame, int type, int *gauge,
+ xlator_t **subvols)
+{
+ xlator_t *this = frame->this;
+ ec_t *ec = this->private;
+
+ memcpy (subvols, ec->xl_list, sizeof (*subvols) * ec->nodes);
+
+ if (type == MARKER_XTIME_TYPE) {
+ /*Don't error out on ENOENT/ENOTCONN */
+ gauge[MCNT_NOTFOUND] = 0;
+ gauge[MCNT_ENOTCONN] = 0;
+ }
+
+ return ec->nodes;
+}
+
+int32_t
+ec_handle_heal_commands (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ dict_t *dict_rsp = NULL;
+ int op_ret = -1;
+ int op_errno = ENOMEM;
+
+ if (!name || strcmp (name, GF_HEAL_INFO))
+ return -1;
+
+ dict_rsp = dict_new ();
+ if (dict_rsp == NULL)
+ goto out;
+
+ if (dict_set_str (dict_rsp, "heal-info", "heal") == 0)
+ op_ret = 0;
+out:
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict_rsp, NULL);
+ if (dict_rsp)
+ dict_unref (dict_rsp);
+ return 0;
+}
+
+int32_t
+ec_gf_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ int error = 0;
+ ec_t *ec = this->private;
+ int32_t minimum = EC_MINIMUM_MIN;
+
+ if (name && strcmp (name, EC_XATTR_HEAL) != 0) {
+ EC_INTERNAL_XATTR_OR_GOTO(name, NULL, error, out);
+ }
+
+ if (ec_handle_heal_commands (frame, this, loc, name, xdata) == 0)
+ return 0;
+
+ if (cluster_handle_marker_getxattr (frame, loc, name, ec->vol_uuid,
+ NULL, ec_marker_populate_args) == 0)
+ return 0;
+
+ if (name && (fnmatch (GF_XATTR_STIME_PATTERN, name, 0) == 0))
+ minimum = EC_MINIMUM_ALL;
+
+ ec_getxattr (frame, this, -1, minimum, default_getxattr_cbk,
+ NULL, loc, name, xdata);
+
+ return 0;
+out:
+ error = ENODATA;
+ STACK_UNWIND_STRICT (getxattr, frame, -1, error, NULL, NULL);
+ return 0;
+}
+
+int32_t
+ec_gf_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ int error = 0;
+
+ EC_INTERNAL_XATTR_OR_GOTO(name, NULL, error, out);
+
+ ec_fgetxattr (frame, this, -1, EC_MINIMUM_MIN, default_fgetxattr_cbk,
+ NULL, fd, name, xdata);
+ return 0;
+out:
+ error = ENODATA;
+ STACK_UNWIND_STRICT (fgetxattr, frame, -1, error, NULL, NULL);
+ return 0;
+}
+
+int32_t ec_gf_inodelk(call_frame_t * frame, xlator_t * this,
+ const char * volume, loc_t * loc, int32_t cmd,
+ struct gf_flock * flock, dict_t * xdata)
+{
+ int32_t minimum = EC_MINIMUM_ALL;
+ if (flock->l_type == F_UNLCK)
+ minimum = EC_MINIMUM_ONE;
+
+ ec_inodelk(frame, this, -1, minimum, default_inodelk_cbk, NULL,
+ volume, loc, cmd, flock, xdata);
+
+ return 0;
+}
+
+int32_t ec_gf_finodelk(call_frame_t * frame, xlator_t * this,
+ const char * volume, fd_t * fd, int32_t cmd,
+ struct gf_flock * flock, dict_t * xdata)
+{
+ int32_t minimum = EC_MINIMUM_ALL;
+ if (flock->l_type == F_UNLCK)
+ minimum = EC_MINIMUM_ONE;
+ ec_finodelk(frame, this, -1, minimum, default_finodelk_cbk, NULL,
+ volume, fd, cmd, flock, xdata);
+
+ return 0;
+}
+
+int32_t ec_gf_link(call_frame_t * frame, xlator_t * this, loc_t * oldloc,
+ loc_t * newloc, dict_t * xdata)
+{
+ ec_link(frame, this, -1, EC_MINIMUM_MIN, default_link_cbk, NULL, oldloc,
+ newloc, xdata);
+
+ return 0;
+}
+
+int32_t ec_gf_lk(call_frame_t * frame, xlator_t * this, fd_t * fd,
+ int32_t cmd, struct gf_flock * flock, dict_t * xdata)
+{
+ int32_t minimum = EC_MINIMUM_ALL;
+ if (flock->l_type == F_UNLCK)
+ minimum = EC_MINIMUM_ONE;
+ ec_lk(frame, this, -1, minimum, default_lk_cbk, NULL, fd, cmd,
+ flock, xdata);
+
+ return 0;
+}
+
+int32_t ec_gf_lookup(call_frame_t * frame, xlator_t * this, loc_t * loc,
+ dict_t * xdata)
+{
+ ec_lookup(frame, this, -1, EC_MINIMUM_MIN, default_lookup_cbk, NULL, loc,
+ xdata);
+
+ return 0;
+}
+
+int32_t ec_gf_mkdir(call_frame_t * frame, xlator_t * this, loc_t * loc,
+ mode_t mode, mode_t umask, dict_t * xdata)
+{
+ ec_mkdir(frame, this, -1, EC_MINIMUM_MIN, default_mkdir_cbk, NULL, loc,
+ mode, umask, xdata);
+
+ return 0;
+}
+
+int32_t ec_gf_mknod(call_frame_t * frame, xlator_t * this, loc_t * loc,
+ mode_t mode, dev_t rdev, mode_t umask, dict_t * xdata)
+{
+ ec_mknod(frame, this, -1, EC_MINIMUM_MIN, default_mknod_cbk, NULL, loc,
+ mode, rdev, umask, xdata);
+
+ return 0;
+}
+
+int32_t ec_gf_open(call_frame_t * frame, xlator_t * this, loc_t * loc,
+ int32_t flags, fd_t * fd, dict_t * xdata)
+{
+ ec_open(frame, this, -1, EC_MINIMUM_MIN, default_open_cbk, NULL, loc,
+ flags, fd, xdata);
+
+ return 0;
+}
+
+int32_t ec_gf_opendir(call_frame_t * frame, xlator_t * this, loc_t * loc,
+ fd_t * fd, dict_t * xdata)
+{
+ ec_opendir(frame, this, -1, EC_MINIMUM_MIN, default_opendir_cbk, NULL, loc,
+ fd, xdata);
+
+ return 0;
+}
+
+int32_t ec_gf_readdir(call_frame_t * frame, xlator_t * this, fd_t * fd,
+ size_t size, off_t offset, dict_t * xdata)
+{
+ ec_readdir(frame, this, -1, EC_MINIMUM_ONE, default_readdir_cbk, NULL, fd,
+ size, offset, xdata);
+
+ return 0;
+}
+
+int32_t ec_gf_readdirp(call_frame_t * frame, xlator_t * this, fd_t * fd,
+ size_t size, off_t offset, dict_t * xdata)
+{
+ ec_readdirp(frame, this, -1, EC_MINIMUM_ONE, default_readdirp_cbk, NULL,
+ fd, size, offset, xdata);
+
+ return 0;
+}
+
+int32_t ec_gf_readlink(call_frame_t * frame, xlator_t * this, loc_t * loc,
+ size_t size, dict_t * xdata)
+{
+ ec_readlink(frame, this, -1, EC_MINIMUM_ONE, default_readlink_cbk, NULL,
+ loc, size, xdata);
+
+ return 0;
+}
+
+int32_t ec_gf_readv(call_frame_t * frame, xlator_t * this, fd_t * fd,
+ size_t size, off_t offset, uint32_t flags, dict_t * xdata)
+{
+ ec_readv(frame, this, -1, EC_MINIMUM_MIN, default_readv_cbk, NULL, fd,
+ size, offset, flags, xdata);
+
+ return 0;
+}
+
+int32_t
+ec_gf_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ int error = 0;
+
+ EC_INTERNAL_XATTR_OR_GOTO (name, xdata, error, out);
+
+ ec_removexattr (frame, this, -1, EC_MINIMUM_MIN,
+ default_removexattr_cbk, NULL, loc, name, xdata);
+
+ return 0;
+out:
+ STACK_UNWIND_STRICT (removexattr, frame, -1, error, NULL);
+ return 0;
+}
+
+int32_t
+ec_gf_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ int error = 0;
+
+ EC_INTERNAL_XATTR_OR_GOTO (name, xdata, error, out);
+
+ ec_fremovexattr (frame, this, -1, EC_MINIMUM_MIN,
+ default_fremovexattr_cbk, NULL, fd, name, xdata);
+
+ return 0;
+out:
+ STACK_UNWIND_STRICT (fremovexattr, frame, -1, error, NULL);
+ return 0;
+}
+
+int32_t ec_gf_rename(call_frame_t * frame, xlator_t * this, loc_t * oldloc,
+ loc_t * newloc, dict_t * xdata)
+{
+ ec_rename(frame, this, -1, EC_MINIMUM_MIN, default_rename_cbk, NULL,
+ oldloc, newloc, xdata);
+
+ return 0;
+}
+
+int32_t ec_gf_rmdir(call_frame_t * frame, xlator_t * this, loc_t * loc,
+ int xflags, dict_t * xdata)
+{
+ ec_rmdir(frame, this, -1, EC_MINIMUM_MIN, default_rmdir_cbk, NULL, loc,
+ xflags, xdata);
+
+ return 0;
+}
+
+int32_t ec_gf_setattr(call_frame_t * frame, xlator_t * this, loc_t * loc,
+ struct iatt * stbuf, int32_t valid, dict_t * xdata)
+{
+ ec_setattr(frame, this, -1, EC_MINIMUM_MIN, default_setattr_cbk, NULL, loc,
+ stbuf, valid, xdata);
+
+ return 0;
+}
+
+int32_t ec_gf_fsetattr(call_frame_t * frame, xlator_t * this, fd_t * fd,
+ struct iatt * stbuf, int32_t valid, dict_t * xdata)
+{
+ ec_fsetattr(frame, this, -1, EC_MINIMUM_MIN, default_fsetattr_cbk, NULL,
+ fd, stbuf, valid, xdata);
+
+ return 0;
+}
+
+int32_t
+ec_gf_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ int error = 0;
+
+ EC_INTERNAL_XATTR_OR_GOTO ("", dict, error, out);
+
+ ec_setxattr (frame, this, -1, EC_MINIMUM_MIN, default_setxattr_cbk,
+ NULL, loc, dict, flags, xdata);
+
+ return 0;
+out:
+ STACK_UNWIND_STRICT (setxattr, frame, -1, error, NULL);
+ return 0;
+}
+
+int32_t
+ec_gf_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ int error = 0;
+
+ EC_INTERNAL_XATTR_OR_GOTO ("", dict, error, out);
+
+ ec_fsetxattr (frame, this, -1, EC_MINIMUM_MIN, default_fsetxattr_cbk,
+ NULL, fd, dict, flags, xdata);
+
+ return 0;
+out:
+ STACK_UNWIND_STRICT (fsetxattr, frame, -1, error, NULL);
+ return 0;
+}
+
+int32_t ec_gf_stat(call_frame_t * frame, xlator_t * this, loc_t * loc,
+ dict_t * xdata)
+{
+ ec_stat(frame, this, -1, EC_MINIMUM_MIN, default_stat_cbk, NULL, loc,
+ xdata);
+
+ return 0;
+}
+
+int32_t ec_gf_fstat(call_frame_t * frame, xlator_t * this, fd_t * fd,
+ dict_t * xdata)
+{
+ ec_fstat(frame, this, -1, EC_MINIMUM_MIN, default_fstat_cbk, NULL, fd,
+ xdata);
+
+ return 0;
+}
+
+int32_t ec_gf_statfs(call_frame_t * frame, xlator_t * this, loc_t * loc,
+ dict_t * xdata)
+{
+ ec_statfs(frame, this, -1, EC_MINIMUM_MIN, default_statfs_cbk, NULL, loc,
+ xdata);
+
+ return 0;
+}
+
+int32_t ec_gf_symlink(call_frame_t * frame, xlator_t * this,
+ const char * linkname, loc_t * loc, mode_t umask,
+ dict_t * xdata)
+{
+ ec_symlink(frame, this, -1, EC_MINIMUM_MIN, default_symlink_cbk, NULL,
+ linkname, loc, umask, xdata);
+
+ return 0;
+}
+
+int32_t ec_gf_truncate(call_frame_t * frame, xlator_t * this, loc_t * loc,
+ off_t offset, dict_t * xdata)
+{
+ ec_truncate(frame, this, -1, EC_MINIMUM_MIN, default_truncate_cbk, NULL,
+ loc, offset, xdata);
+
+ return 0;
+}
+
+int32_t ec_gf_ftruncate(call_frame_t * frame, xlator_t * this, fd_t * fd,
+ off_t offset, dict_t * xdata)
+{
+ ec_ftruncate(frame, this, -1, EC_MINIMUM_MIN, default_ftruncate_cbk, NULL,
+ fd, offset, xdata);
+
+ return 0;
+}
+
+int32_t ec_gf_unlink(call_frame_t * frame, xlator_t * this, loc_t * loc,
+ int xflags, dict_t * xdata)
+{
+ ec_unlink(frame, this, -1, EC_MINIMUM_MIN, default_unlink_cbk, NULL, loc,
+ xflags, xdata);
+
+ return 0;
+}
+
+int32_t ec_gf_writev(call_frame_t * frame, xlator_t * this, fd_t * fd,
+ struct iovec * vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref * iobref, dict_t * xdata)
+{
+ ec_writev(frame, this, -1, EC_MINIMUM_MIN, default_writev_cbk, NULL, fd,
+ vector, count, offset, flags, iobref, xdata);
+
+ return 0;
+}
+
+int32_t ec_gf_xattrop(call_frame_t * frame, xlator_t * this, loc_t * loc,
+ gf_xattrop_flags_t optype, dict_t * xattr,
+ dict_t * xdata)
+{
+ ec_xattrop(frame, this, -1, EC_MINIMUM_MIN, default_xattrop_cbk, NULL, loc,
+ optype, xattr, xdata);
+
+ return 0;
+}
+
+int32_t ec_gf_fxattrop(call_frame_t * frame, xlator_t * this, fd_t * fd,
+ gf_xattrop_flags_t optype, dict_t * xattr,
+ dict_t * xdata)
+{
+ ec_fxattrop(frame, this, -1, EC_MINIMUM_MIN, default_fxattrop_cbk, NULL,
+ fd, optype, xattr, xdata);
+
+ return 0;
+}
+
+int32_t ec_gf_zerofill(call_frame_t * frame, xlator_t * this, fd_t * fd,
+ off_t offset, off_t len, dict_t * xdata)
+{
+ default_zerofill_failure_cbk(frame, ENOTSUP);
+
+ return 0;
+}
+
+int32_t ec_gf_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata)
+{
+ ec_seek(frame, this, -1, EC_MINIMUM_ONE, default_seek_cbk, NULL, fd,
+ offset, what, xdata);
+
+ return 0;
+}
+
+int32_t ec_gf_forget(xlator_t * this, inode_t * inode)
+{
+ uint64_t value = 0;
+ ec_inode_t * ctx = NULL;
+
+ if ((inode_ctx_del(inode, this, &value) == 0) && (value != 0))
+ {
+ ctx = (ec_inode_t *)(uintptr_t)value;
+ GF_FREE(ctx);
+ }
+
+ return 0;
+}
+
+void ec_gf_release_fd(xlator_t * this, fd_t * fd)
+{
+ uint64_t value = 0;
+ ec_fd_t * ctx = NULL;
+
+ if ((fd_ctx_del(fd, this, &value) == 0) && (value != 0))
+ {
+ ctx = (ec_fd_t *)(uintptr_t)value;
+ loc_wipe(&ctx->loc);
+ GF_FREE(ctx);
+ }
+}
+
+int32_t ec_gf_release(xlator_t * this, fd_t * fd)
+{
+ ec_gf_release_fd(this, fd);
+
+ return 0;
+}
+
+int32_t ec_gf_releasedir(xlator_t * this, fd_t * fd)
+{
+ ec_gf_release_fd(this, fd);
+
+ return 0;
+}
+
+int32_t ec_dump_private(xlator_t *this)
+{
+ ec_t *ec = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ char tmp[65];
+
+ GF_ASSERT(this);
+
+ ec = this->private;
+ GF_ASSERT(ec);
+
+ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
+ gf_proc_dump_add_section(key_prefix);
+ gf_proc_dump_write("nodes", "%u", ec->nodes);
+ gf_proc_dump_write("redundancy", "%u", ec->redundancy);
+ gf_proc_dump_write("fragment_size", "%u", ec->fragment_size);
+ gf_proc_dump_write("stripe_size", "%u", ec->stripe_size);
+ gf_proc_dump_write("childs_up", "%u", ec->xl_up_count);
+ gf_proc_dump_write("childs_up_mask", "%s",
+ ec_bin(tmp, sizeof(tmp), ec->xl_up, ec->nodes));
+ gf_proc_dump_write("background-heals", "%d", ec->background_heals);
+ gf_proc_dump_write("heal-wait-qlength", "%d", ec->heal_wait_qlen);
+ gf_proc_dump_write("healers", "%d", ec->healers);
+ gf_proc_dump_write("heal-waiters", "%d", ec->heal_waiters);
+ gf_proc_dump_write("read-policy", "%s", ec_read_policies[ec->read_policy]);
+
+ return 0;
+}
+
+struct xlator_fops fops =
+{
+ .lookup = ec_gf_lookup,
+ .stat = ec_gf_stat,
+ .fstat = ec_gf_fstat,
+ .truncate = ec_gf_truncate,
+ .ftruncate = ec_gf_ftruncate,
+ .access = ec_gf_access,
+ .readlink = ec_gf_readlink,
+ .mknod = ec_gf_mknod,
+ .mkdir = ec_gf_mkdir,
+ .unlink = ec_gf_unlink,
+ .rmdir = ec_gf_rmdir,
+ .symlink = ec_gf_symlink,
+ .rename = ec_gf_rename,
+ .link = ec_gf_link,
+ .create = ec_gf_create,
+ .open = ec_gf_open,
+ .readv = ec_gf_readv,
+ .writev = ec_gf_writev,
+ .flush = ec_gf_flush,
+ .fsync = ec_gf_fsync,
+ .opendir = ec_gf_opendir,
+ .readdir = ec_gf_readdir,
+ .readdirp = ec_gf_readdirp,
+ .fsyncdir = ec_gf_fsyncdir,
+ .statfs = ec_gf_statfs,
+ .setxattr = ec_gf_setxattr,
+ .getxattr = ec_gf_getxattr,
+ .fsetxattr = ec_gf_fsetxattr,
+ .fgetxattr = ec_gf_fgetxattr,
+ .removexattr = ec_gf_removexattr,
+ .fremovexattr = ec_gf_fremovexattr,
+ .lk = ec_gf_lk,
+ .inodelk = ec_gf_inodelk,
+ .finodelk = ec_gf_finodelk,
+ .entrylk = ec_gf_entrylk,
+ .fentrylk = ec_gf_fentrylk,
+ .xattrop = ec_gf_xattrop,
+ .fxattrop = ec_gf_fxattrop,
+ .setattr = ec_gf_setattr,
+ .fsetattr = ec_gf_fsetattr,
+ .fallocate = ec_gf_fallocate,
+ .discard = ec_gf_discard,
+ .zerofill = ec_gf_zerofill,
+ .seek = ec_gf_seek
+};
+
+struct xlator_cbks cbks =
+{
+ .forget = ec_gf_forget,
+ .release = ec_gf_release,
+ .releasedir = ec_gf_releasedir
+};
+
+struct xlator_dumpops dumpops = {
+ .priv = ec_dump_private
+};
+
+struct volume_options options[] =
+{
+ {
+ .key = { "redundancy" },
+ .type = GF_OPTION_TYPE_INT,
+ .description = "Maximum number of bricks that can fail "
+ "simultaneously without losing data."
+ },
+ {
+ .key = { "self-heal-daemon" },
+ .type = GF_OPTION_TYPE_BOOL,
+ .description = "self-heal daemon enable/disable",
+ .default_value = "enable",
+ },
+ { .key = {"iam-self-heal-daemon"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option differentiates if the disperse "
+ "translator is running as part of self-heal-daemon "
+ "or not."
+ },
+ { .key = {"eager-lock"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Enable/Disable eager lock for disperse volume. "
+ "If a fop takes a lock and completes its operation, "
+ "it waits for next 1 second before releasing the lock, "
+ "to see if the lock can be reused for next fop from "
+ "the same client. If ec finds any lock contention within "
+ "1 second it releases the lock immediately before time "
+ "expires. This improves the performance of file operations."
+ "However, as it takes lock on first brick, for few operations "
+ "like read, discovery of lock contention might take long time "
+ "and can actually degrade the performance. "
+ "If eager lock is disabled, lock will be released as soon as fop "
+ "completes. "
+ },
+ { .key = {"background-heals"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,/*Disabling background heals*/
+ .max = 256,
+ .default_value = "8",
+ .description = "This option can be used to control number of parallel"
+ " heals",
+ },
+ { .key = {"heal-wait-qlength"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = 65536, /*Around 100MB as of now with sizeof(ec_fop_data_t) at 1800*/
+ .default_value = "128",
+ .description = "This option can be used to control number of heals"
+ " that can wait",
+ },
+ { .key = {"heal-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 60,
+ .max = INT_MAX,
+ .default_value = "600",
+ .description = "time interval for checking the need to self-heal "
+ "in self-heal-daemon"
+ },
+ { .key = {"read-policy" },
+ .type = GF_OPTION_TYPE_STR,
+ .value = {"round-robin", "gfid-hash"},
+ .default_value = "round-robin",
+ .description = "inode-read fops happen only on 'k' number of bricks in"
+ " n=k+m disperse subvolume. 'round-robin' selects the read"
+ " subvolume using round-robin algo. 'gfid-hash' selects read"
+ " subvolume based on hash of the gfid of that file/directory.",
+ },
+ { }
+};
diff --git a/xlators/cluster/ec/src/ec.h b/xlators/cluster/ec/src/ec.h
new file mode 100644
index 00000000000..49af5c2daf2
--- /dev/null
+++ b/xlators/cluster/ec/src/ec.h
@@ -0,0 +1,74 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __EC_H__
+#define __EC_H__
+
+#include "xlator.h"
+#include "timer.h"
+#include "ec-heald.h"
+#include "libxlator.h"
+
+#define EC_XATTR_PREFIX "trusted.ec."
+#define EC_XATTR_CONFIG EC_XATTR_PREFIX"config"
+#define EC_XATTR_SIZE EC_XATTR_PREFIX"size"
+#define EC_XATTR_VERSION EC_XATTR_PREFIX"version"
+#define EC_XATTR_HEAL EC_XATTR_PREFIX"heal"
+#define EC_XATTR_DIRTY EC_XATTR_PREFIX"dirty"
+
+#define EC_VERSION_SIZE 2
+#define EC_SHD_INODE_LRU_LIMIT 10
+
+typedef enum {
+ EC_ROUND_ROBIN,
+ EC_GFID_HASH,
+ EC_READ_POLICY_MAX
+} ec_read_policy_t;
+
+struct _ec
+{
+ xlator_t * xl;
+ int32_t healers;
+ int32_t heal_waiters;
+ int32_t nodes;
+ int32_t bits_for_nodes;
+ int32_t fragments;
+ int32_t redundancy;
+ uint32_t fragment_size;
+ uint32_t stripe_size;
+ int32_t up;
+ uint32_t idx;
+ uint32_t xl_up_count;
+ uintptr_t xl_up;
+ uint32_t xl_notify_count;
+ uintptr_t xl_notify;
+ uintptr_t node_mask;
+ xlator_t ** xl_list;
+ gf_lock_t lock;
+ gf_timer_t * timer;
+ gf_boolean_t shutdown;
+ gf_boolean_t eager_lock;
+ uint32_t background_heals;
+ uint32_t heal_wait_qlen;
+ struct list_head pending_fops;
+ struct list_head heal_waiting;
+ struct list_head healing;
+ struct mem_pool * fop_pool;
+ struct mem_pool * cbk_pool;
+ struct mem_pool * lock_pool;
+ ec_self_heald_t shd;
+ char vol_uuid[UUID_SIZE + 1];
+ dict_t *leaf_to_subvolid;
+ ec_read_policy_t read_policy;
+};
+
+void ec_pending_fops_completed(ec_t *ec);
+
+#endif /* __EC_H__ */
diff --git a/xlators/cluster/ha/src/Makefile.am b/xlators/cluster/ha/src/Makefile.am
index 5f78a296533..740a6b840d7 100644
--- a/xlators/cluster/ha/src/Makefile.am
+++ b/xlators/cluster/ha/src/Makefile.am
@@ -1,15 +1,17 @@
xlator_LTLIBRARIES = ha.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/cluster
-ha_la_LDFLAGS = -module -avoidversion
+ha_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
ha_la_SOURCES = ha-helpers.c ha.c
ha_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = ha.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/cluster/ha/src/ha-helpers.c b/xlators/cluster/ha/src/ha-helpers.c
index fb6593101a4..19be1ed2773 100644
--- a/xlators/cluster/ha/src/ha-helpers.c
+++ b/xlators/cluster/ha/src/ha-helpers.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include "xlator.h"
#include "call-stub.h"
#include "defaults.h"
diff --git a/xlators/cluster/ha/src/ha-mem-types.h b/xlators/cluster/ha/src/ha-mem-types.h
index bdbfcb52b54..e5e97d237dc 100644
--- a/xlators/cluster/ha/src/ha-mem-types.h
+++ b/xlators/cluster/ha/src/ha-mem-types.h
@@ -1,24 +1,13 @@
-
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __HA_MEM_TYPES_H__
#define __HA_MEM_TYPES_H__
diff --git a/xlators/cluster/ha/src/ha.c b/xlators/cluster/ha/src/ha.c
index 54b41858eee..6071eab55ee 100644
--- a/xlators/cluster/ha/src/ha.c
+++ b/xlators/cluster/ha/src/ha.c
@@ -1,31 +1,16 @@
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
/* generate errors randomly, code is simple now, better alogorithm
* can be written to decide what error to be returned and when
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "xlator.h"
#include "call-stub.h"
#include "defaults.h"
@@ -1876,13 +1861,9 @@ err:
}
if (hafdp) {
- if (hafdp->fdstate) {
- GF_FREE (hafdp->fdstate);
- }
+ GF_FREE (hafdp->fdstate);
- if (hafdp->path) {
- GF_FREE (hafdp->path);
- }
+ GF_FREE (hafdp->path);
GF_FREE (hafdp);
}
@@ -2708,7 +2689,12 @@ ha_statfs_cbk (call_frame_t *frame,
ha_local_t *local = NULL;
ha_private_t *priv = NULL;
+ GF_ASSERT (this);
+
local = frame->local;
+ priv = this->private;
+ GF_ASSERT (priv);
+
if (-1 == op_ret) {
local->active = (local->active + 1) % priv->child_count;
local->tries--;
@@ -2725,7 +2711,6 @@ ha_statfs_cbk (call_frame_t *frame,
out:
loc_wipe (&local->loc);
STACK_UNWIND (frame, op_ret, op_errno, buf);
-
return 0;
}
@@ -3066,7 +3051,7 @@ ha_lk_setlk_unlck_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct flock *lock)
+ struct gf_flock *lock)
{
ha_local_t *local = NULL;
int cnt = 0;
@@ -3099,7 +3084,7 @@ ha_lk_setlk_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct flock *lock)
+ struct gf_flock *lock)
{
ha_local_t *local = NULL;
ha_private_t *pvt = NULL;
@@ -3155,7 +3140,7 @@ ha_lk_setlk_cbk (call_frame_t *frame,
cnt++;
}
if (cnt) {
- struct flock lock;
+ struct gf_flock lock;
lock = local->stub->args.lk.lock;
for (i = 0; i < child_count; i++) {
if (state[i]) {
@@ -3189,7 +3174,7 @@ ha_lk_getlk_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct flock *lock)
+ struct gf_flock *lock)
{
ha_local_t *local = NULL;
ha_private_t *pvt = NULL;
@@ -3244,7 +3229,7 @@ ha_lk (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
int32_t cmd,
- struct flock *lock)
+ struct gf_flock *lock)
{
ha_local_t *local = NULL;
ha_private_t *pvt = NULL;
@@ -3378,7 +3363,7 @@ ha_inodelk (call_frame_t *frame,
const char *volume,
loc_t *loc,
int32_t cmd,
- struct flock *lock)
+ struct gf_flock *lock)
{
ha_local_t *local = NULL;
int op_errno = 0;
diff --git a/xlators/cluster/ha/src/ha.h b/xlators/cluster/ha/src/ha.h
index 5e06b7e0201..e2ed7eaa68a 100644
--- a/xlators/cluster/ha/src/ha.h
+++ b/xlators/cluster/ha/src/ha.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef __HA_H_
#define __HA_H_
diff --git a/xlators/cluster/map/src/Makefile.am b/xlators/cluster/map/src/Makefile.am
index 26e19137a8b..209cafa7c1b 100644
--- a/xlators/cluster/map/src/Makefile.am
+++ b/xlators/cluster/map/src/Makefile.am
@@ -1,15 +1,17 @@
xlator_LTLIBRARIES = map.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/cluster
-map_la_LDFLAGS = -module -avoidversion
+map_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
map_la_SOURCES = map.c map-helper.c
map_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = map.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/cluster/map/src/map-helper.c b/xlators/cluster/map/src/map-helper.c
index ad01b2102dc..c5f828ae6d1 100644
--- a/xlators/cluster/map/src/map-helper.c
+++ b/xlators/cluster/map/src/map-helper.c
@@ -1,27 +1,12 @@
/*
- Copyright (c) 2009-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+ Copyright (c) 2009-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include "xlator.h"
#include "map.h"
diff --git a/xlators/cluster/map/src/map-mem-types.h b/xlators/cluster/map/src/map-mem-types.h
index f41ab420ace..3e89f4736e4 100644
--- a/xlators/cluster/map/src/map-mem-types.h
+++ b/xlators/cluster/map/src/map-mem-types.h
@@ -1,24 +1,13 @@
-
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __MAP_MEM_TYPES_H__
#define __MAP_MEM_TYPES_H__
diff --git a/xlators/cluster/map/src/map.c b/xlators/cluster/map/src/map.c
index dd89d0ebee9..fbb56dbc554 100644
--- a/xlators/cluster/map/src/map.c
+++ b/xlators/cluster/map/src/map.c
@@ -1,27 +1,12 @@
/*
- Copyright (c) 2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include "xlator.h"
#include "map.h"
@@ -438,7 +423,7 @@ map_lk_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct flock *lock)
+ struct gf_flock *lock)
{
STACK_UNWIND (frame, op_ret, op_errno, lock);
return 0;
@@ -580,7 +565,7 @@ map_statfs_cbk (call_frame_t *frame,
}
local->op_ret = 0;
- /* when a call is successfull, add it to local->dict */
+ /* when a call is successful, add it to local->dict */
dict_buf = &local->statvfs;
if (dict_buf->f_bsize != 0) {
@@ -1770,7 +1755,7 @@ map_lk (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
int32_t cmd,
- struct flock *lock)
+ struct gf_flock *lock)
{
int32_t op_errno = 1;
xlator_t *subvol = NULL;
@@ -1799,7 +1784,7 @@ map_lk (call_frame_t *frame,
int32_t
map_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct flock *lock)
+ const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *lock)
{
int32_t op_errno = 1;
xlator_t *subvol = NULL;
@@ -1829,7 +1814,7 @@ map_inodelk (call_frame_t *frame, xlator_t *this,
int32_t
map_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct flock *lock)
+ const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *lock)
{
int32_t op_errno = 1;
xlator_t *subvol = NULL;
@@ -2375,8 +2360,7 @@ fini (xlator_t *this)
priv = this->private;
if (priv) {
- if (priv->xlarray)
- GF_FREE (priv->xlarray);
+ GF_FREE (priv->xlarray);
trav_map = priv->map;
while (trav_map) {
diff --git a/xlators/cluster/map/src/map.h b/xlators/cluster/map/src/map.h
index eb549eb06e7..7703a543e28 100644
--- a/xlators/cluster/map/src/map.h
+++ b/xlators/cluster/map/src/map.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __MAP_H__
#define __MAP_H__
diff --git a/xlators/cluster/stripe/src/Makefile.am b/xlators/cluster/stripe/src/Makefile.am
index 6d4fae4d268..e732c52423c 100644
--- a/xlators/cluster/stripe/src/Makefile.am
+++ b/xlators/cluster/stripe/src/Makefile.am
@@ -1,16 +1,20 @@
-
xlator_LTLIBRARIES = stripe.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
-stripe_la_LDFLAGS = -module -avoidversion
+stripe_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+
+stripe_la_SOURCES = stripe.c stripe-helpers.c \
+ $(top_builddir)/xlators/lib/src/libxlator.c
-stripe_la_SOURCES = stripe.c
stripe_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = stripe.h stripe-mem-types.h
+noinst_HEADERS = stripe.h stripe-mem-types.h $(top_builddir)/xlators/lib/src/libxlator.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/xlators/lib/src
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/cluster/stripe/src/stripe-helpers.c b/xlators/cluster/stripe/src/stripe-helpers.c
new file mode 100644
index 00000000000..02ee6a43d7c
--- /dev/null
+++ b/xlators/cluster/stripe/src/stripe-helpers.c
@@ -0,0 +1,677 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <fnmatch.h>
+
+#include "stripe.h"
+#include "byte-order.h"
+#include "mem-types.h"
+#include "logging.h"
+
+void
+stripe_local_wipe (stripe_local_t *local)
+{
+ if (!local)
+ goto out;
+
+ loc_wipe (&local->loc);
+ loc_wipe (&local->loc2);
+
+ if (local->fd)
+ fd_unref (local->fd);
+
+ if (local->inode)
+ inode_unref (local->inode);
+
+ if (local->xattr)
+ dict_unref (local->xattr);
+
+ if (local->xdata)
+ dict_unref (local->xdata);
+
+out:
+ return;
+}
+
+
+
+int
+stripe_aggregate (dict_t *this, char *key, data_t *value, void *data)
+{
+ dict_t *dst = NULL;
+ int64_t *ptr = 0, *size = NULL;
+ int32_t ret = -1;
+
+ dst = data;
+
+ if (strcmp (key, QUOTA_SIZE_KEY) == 0) {
+ ret = dict_get_bin (dst, key, (void **)&size);
+ if (ret < 0) {
+ size = GF_CALLOC (1, sizeof (int64_t),
+ gf_common_mt_char);
+ if (size == NULL) {
+ gf_log ("stripe", GF_LOG_WARNING,
+ "memory allocation failed");
+ goto out;
+ }
+ ret = dict_set_bin (dst, key, size, sizeof (int64_t));
+ if (ret < 0) {
+ gf_log ("stripe", GF_LOG_WARNING,
+ "stripe aggregate dict set failed");
+ GF_FREE (size);
+ goto out;
+ }
+ }
+
+ ptr = data_to_bin (value);
+ if (ptr == NULL) {
+ gf_log ("stripe", GF_LOG_WARNING, "data to bin failed");
+ goto out;
+ }
+
+ *size = hton64 (ntoh64 (*size) + ntoh64 (*ptr));
+ } else if (strcmp (key, GF_CONTENT_KEY)) {
+ /* No need to aggregate 'CONTENT' data */
+ ret = dict_set (dst, key, value);
+ if (ret)
+ gf_log ("stripe", GF_LOG_WARNING, "xattr dict set failed");
+ }
+
+out:
+ return 0;
+}
+
+
+void
+stripe_aggregate_xattr (dict_t *dst, dict_t *src)
+{
+ if ((dst == NULL) || (src == NULL)) {
+ goto out;
+ }
+
+ dict_foreach (src, stripe_aggregate, dst);
+out:
+ return;
+}
+
+
+int32_t
+stripe_xattr_aggregate (char *buffer, stripe_local_t *local, int32_t *total)
+{
+ int32_t i = 0;
+ int32_t ret = -1;
+ int32_t len = 0;
+ char *sbuf = NULL;
+ stripe_xattr_sort_t *xattr = NULL;
+
+ if (!buffer || !local || !local->xattr_list)
+ goto out;
+
+ sbuf = buffer;
+
+ for (i = 0; i < local->nallocs; i++) {
+ xattr = local->xattr_list + i;
+ len = xattr->xattr_len;
+
+ if (len && xattr && xattr->xattr_value) {
+ memcpy (buffer, xattr->xattr_value, len);
+ buffer += len;
+ *buffer++ = ' ';
+ }
+ }
+
+ *--buffer = '\0';
+ if (total)
+ *total = buffer - sbuf;
+ ret = 0;
+
+ out:
+ return ret;
+}
+
+int32_t
+stripe_free_xattr_str (stripe_local_t *local)
+{
+ int32_t i = 0;
+ int32_t ret = -1;
+ stripe_xattr_sort_t *xattr = NULL;
+
+ if (!local || !local->xattr_list)
+ goto out;
+
+ for (i = 0; i < local->nallocs; i++) {
+ xattr = local->xattr_list + i;
+
+ if (xattr && xattr->xattr_value)
+ GF_FREE (xattr->xattr_value);
+ }
+
+ ret = 0;
+ out:
+ return ret;
+}
+
+
+int32_t
+stripe_fill_lockinfo_xattr (xlator_t *this, stripe_local_t *local,
+ void **xattr_serz)
+{
+ int32_t ret = -1, i = 0, len = 0;
+ dict_t *tmp1 = NULL, *tmp2 = NULL;
+ char *buf = NULL;
+ stripe_xattr_sort_t *xattr = NULL;
+
+ if (xattr_serz == NULL) {
+ goto out;
+ }
+
+ tmp2 = dict_new ();
+
+ if (tmp2 == NULL) {
+ goto out;
+ }
+
+ for (i = 0; i < local->nallocs; i++) {
+ xattr = local->xattr_list + i;
+ len = xattr->xattr_len;
+
+ if (len && xattr && xattr->xattr_value) {
+ ret = dict_reset (tmp2);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "dict_reset failed (%s)",
+ strerror (-ret));
+ }
+
+ ret = dict_unserialize (xattr->xattr_value,
+ xattr->xattr_len,
+ &tmp2);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dict_unserialize failed (%s)",
+ strerror (-ret));
+ ret = -1;
+ goto out;
+ }
+
+ tmp1 = dict_copy (tmp2, tmp1);
+ if (tmp1 == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dict_copy failed (%s)",
+ strerror (-ret));
+ ret = -1;
+ goto out;
+ }
+ }
+ }
+
+ len = dict_serialized_length (tmp1);
+ if (len > 0) {
+ buf = GF_CALLOC (1, len, gf_common_mt_dict_t);
+ if (buf == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_serialize (tmp1, buf);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dict_serialize failed (%s)", strerror (-ret));
+ GF_FREE(buf);
+ ret = -1;
+ goto out;
+ }
+
+ *xattr_serz = buf;
+ }
+
+ ret = 0;
+out:
+ if (tmp1 != NULL) {
+ dict_unref (tmp1);
+ }
+
+ if (tmp2 != NULL) {
+ dict_unref (tmp2);
+ }
+
+ return ret;
+}
+
+
+int32_t
+stripe_fill_pathinfo_xattr (xlator_t *this, stripe_local_t *local,
+ char **xattr_serz)
+{
+ int ret = -1;
+ int32_t padding = 0;
+ int32_t tlen = 0;
+ char stripe_size_str[20] = {0,};
+ char *pathinfo_serz = NULL;
+
+ if (!local) {
+ gf_log (this->name, GF_LOG_ERROR, "Possible NULL deref");
+ goto out;
+ }
+
+ (void) snprintf (stripe_size_str, 20, "%"PRId64,
+ (long long) (local->fctx) ? local->fctx->stripe_size : 0);
+
+ /* extra bytes for decorations (brackets and <>'s) */
+ padding = strlen (this->name) + strlen (STRIPE_PATHINFO_HEADER)
+ + strlen (stripe_size_str) + 7;
+ local->xattr_total_len += (padding + 2);
+
+ pathinfo_serz = GF_CALLOC (local->xattr_total_len, sizeof (char),
+ gf_common_mt_char);
+ if (!pathinfo_serz)
+ goto out;
+
+ /* xlator info */
+ (void) sprintf (pathinfo_serz, "(<"STRIPE_PATHINFO_HEADER"%s:[%s]> ",
+ this->name, stripe_size_str);
+
+ ret = stripe_xattr_aggregate (pathinfo_serz + padding, local, &tlen);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Cannot aggregate pathinfo list");
+ GF_FREE(pathinfo_serz);
+ goto out;
+ }
+
+ *(pathinfo_serz + padding + tlen) = ')';
+ *(pathinfo_serz + padding + tlen + 1) = '\0';
+
+ *xattr_serz = pathinfo_serz;
+
+ ret = 0;
+ out:
+ return ret;
+}
+
+/**
+ * stripe_get_matching_bs - Get the matching block size for the given path.
+ */
+int32_t
+stripe_get_matching_bs (const char *path, stripe_private_t *priv)
+{
+ struct stripe_options *trav = NULL;
+ uint64_t block_size = 0;
+
+ GF_VALIDATE_OR_GOTO ("stripe", priv, out);
+ GF_VALIDATE_OR_GOTO ("stripe", path, out);
+
+ LOCK (&priv->lock);
+ {
+ block_size = priv->block_size;
+ trav = priv->pattern;
+ while (trav) {
+ if (!fnmatch (trav->path_pattern, path, FNM_NOESCAPE)) {
+ block_size = trav->block_size;
+ break;
+ }
+ trav = trav->next;
+ }
+ }
+ UNLOCK (&priv->lock);
+
+out:
+ return block_size;
+}
+
+int32_t
+stripe_ctx_handle (xlator_t *this, call_frame_t *prev, stripe_local_t *local,
+ dict_t *dict)
+{
+ char key[256] = {0,};
+ data_t *data = NULL;
+ int32_t index = 0;
+ stripe_private_t *priv = NULL;
+
+ priv = this->private;
+
+
+ if (!local->fctx) {
+ local->fctx = GF_CALLOC (1, sizeof (stripe_fd_ctx_t),
+ gf_stripe_mt_stripe_fd_ctx_t);
+ if (!local->fctx) {
+ local->op_errno = ENOMEM;
+ local->op_ret = -1;
+ goto out;
+ }
+
+ local->fctx->static_array = 0;
+ }
+ /* Stripe block size */
+ sprintf (key, "trusted.%s.stripe-size", this->name);
+ data = dict_get (dict, key);
+ if (!data) {
+ local->xattr_self_heal_needed = 1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get stripe-size");
+ goto out;
+ } else {
+ if (!local->fctx->stripe_size) {
+ local->fctx->stripe_size =
+ data_to_int64 (data);
+ }
+
+ if (local->fctx->stripe_size != data_to_int64 (data)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "stripe-size mismatch in blocks");
+ local->xattr_self_heal_needed = 1;
+ }
+ }
+
+ /* Stripe count */
+ sprintf (key, "trusted.%s.stripe-count", this->name);
+ data = dict_get (dict, key);
+
+ if (!data) {
+ local->xattr_self_heal_needed = 1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get stripe-count");
+ goto out;
+ }
+ if (!local->fctx->xl_array) {
+ local->fctx->stripe_count = data_to_int32 (data);
+ if (!local->fctx->stripe_count) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error with stripe-count xattr");
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto out;
+ }
+
+ local->fctx->xl_array = GF_CALLOC (local->fctx->stripe_count,
+ sizeof (xlator_t *),
+ gf_stripe_mt_xlator_t);
+
+ if (!local->fctx->xl_array) {
+ local->op_errno = ENOMEM;
+ local->op_ret = -1;
+ goto out;
+ }
+ }
+ if (local->fctx->stripe_count != data_to_int32 (data)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error with stripe-count xattr (%d != %d)",
+ local->fctx->stripe_count, data_to_int32 (data));
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto out;
+ }
+
+ /* index */
+ sprintf (key, "trusted.%s.stripe-index", this->name);
+ data = dict_get (dict, key);
+ if (!data) {
+ local->xattr_self_heal_needed = 1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get stripe-index");
+ goto out;
+ }
+ index = data_to_int32 (data);
+ if (index > priv->child_count) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error with stripe-index xattr (%d)", index);
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto out;
+ }
+ if (local->fctx->xl_array) {
+ if (!local->fctx->xl_array[index])
+ local->fctx->xl_array[index] = prev->this;
+ }
+
+ sprintf(key, "trusted.%s.stripe-coalesce", this->name);
+ data = dict_get(dict, key);
+ if (!data) {
+ /*
+ * The file was probably created prior to coalesce support.
+ * Assume non-coalesce mode for this file to maintain backwards
+ * compatibility.
+ */
+ gf_log(this->name, GF_LOG_DEBUG, "missing stripe-coalesce "
+ "attr, assume non-coalesce mode");
+ local->fctx->stripe_coalesce = 0;
+ } else {
+ local->fctx->stripe_coalesce = data_to_int32(data);
+ }
+
+
+out:
+ return 0;
+}
+
+int32_t
+stripe_xattr_request_build (xlator_t *this, dict_t *dict, uint64_t stripe_size,
+ uint32_t stripe_count, uint32_t stripe_index,
+ uint32_t stripe_coalesce)
+{
+ char key[256] = {0,};
+ int32_t ret = -1;
+
+ sprintf (key, "trusted.%s.stripe-size", this->name);
+ ret = dict_set_int64 (dict, key, stripe_size);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set %s in xattr_req dict", key);
+ goto out;
+ }
+
+ sprintf (key, "trusted.%s.stripe-count", this->name);
+ ret = dict_set_int32 (dict, key, stripe_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set %s in xattr_req dict", key);
+ goto out;
+ }
+
+ sprintf (key, "trusted.%s.stripe-index", this->name);
+ ret = dict_set_int32 (dict, key, stripe_index);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set %s in xattr_req dict", key);
+ goto out;
+ }
+
+ sprintf(key, "trusted.%s.stripe-coalesce", this->name);
+ ret = dict_set_int32(dict, key, stripe_coalesce);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "failed to set %s in xattr_req_dict", key);
+ goto out;
+ }
+out:
+ return ret;
+}
+
+
+static int
+set_default_block_size (stripe_private_t *priv, char *num)
+{
+
+ int ret = -1;
+ GF_VALIDATE_OR_GOTO ("stripe", THIS, out);
+ GF_VALIDATE_OR_GOTO (THIS->name, priv, out);
+ GF_VALIDATE_OR_GOTO (THIS->name, num, out);
+
+
+ if (gf_string2bytesize_uint64 (num, &priv->block_size) != 0) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "invalid number format \"%s\"", num);
+ goto out;
+ }
+
+ ret = 0;
+
+ out:
+ return ret;
+
+}
+
+
+int
+set_stripe_block_size (xlator_t *this, stripe_private_t *priv, char *data)
+{
+ int ret = -1;
+ char *tmp_str = NULL;
+ char *tmp_str1 = NULL;
+ char *dup_str = NULL;
+ char *stripe_str = NULL;
+ char *pattern = NULL;
+ char *num = NULL;
+ struct stripe_options *temp_stripeopt = NULL;
+ struct stripe_options *stripe_opt = NULL;
+
+ if (!this || !priv || !data)
+ goto out;
+
+ /* Get the pattern for striping.
+ "option block-size *avi:10MB" etc */
+ stripe_str = strtok_r (data, ",", &tmp_str);
+ while (stripe_str) {
+ dup_str = gf_strdup (stripe_str);
+ stripe_opt = GF_CALLOC (1, sizeof (struct stripe_options),
+ gf_stripe_mt_stripe_options);
+ if (!stripe_opt) {
+ goto out;
+ }
+
+ pattern = strtok_r (dup_str, ":", &tmp_str1);
+ num = strtok_r (NULL, ":", &tmp_str1);
+ if (!num) {
+ num = pattern;
+ pattern = "*";
+ ret = set_default_block_size (priv, num);
+ if (ret)
+ goto out;
+ }
+ if (gf_string2bytesize_uint64 (num, &stripe_opt->block_size) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid number format \"%s\"", num);
+ goto out;
+ }
+
+ if (stripe_opt->block_size < STRIPE_MIN_BLOCK_SIZE) {
+ gf_log (this->name, GF_LOG_ERROR, "Invalid Block-size: "
+ "%s. Should be atleast %llu bytes", num,
+ STRIPE_MIN_BLOCK_SIZE);
+ goto out;
+ }
+ if (stripe_opt->block_size % 512) {
+ gf_log (this->name, GF_LOG_ERROR, "Block-size: %s should"
+ " be a multiple of 512 bytes", num);
+ goto out;
+ }
+
+ memcpy (stripe_opt->path_pattern, pattern, strlen (pattern));
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "block-size : pattern %s : size %"PRId64,
+ stripe_opt->path_pattern, stripe_opt->block_size);
+
+ if (priv->pattern)
+ temp_stripeopt = NULL;
+ else
+ temp_stripeopt = priv->pattern;
+
+ stripe_opt->next = temp_stripeopt;
+
+ priv->pattern = stripe_opt;
+ stripe_opt = NULL;
+
+ GF_FREE (dup_str);
+ dup_str = NULL;
+
+ stripe_str = strtok_r (NULL, ",", &tmp_str);
+ }
+
+ ret = 0;
+out:
+
+ GF_FREE (dup_str);
+
+ GF_FREE (stripe_opt);
+
+ return ret;
+}
+
+int32_t
+stripe_iatt_merge (struct iatt *from, struct iatt *to)
+{
+ if (to->ia_size < from->ia_size)
+ to->ia_size = from->ia_size;
+ if (to->ia_mtime < from->ia_mtime)
+ to->ia_mtime = from->ia_mtime;
+ if (to->ia_ctime < from->ia_ctime)
+ to->ia_ctime = from->ia_ctime;
+ if (to->ia_atime < from->ia_atime)
+ to->ia_atime = from->ia_atime;
+ return 0;
+}
+
+off_t
+coalesced_offset(off_t offset, uint64_t stripe_size, int stripe_count)
+{
+ size_t line_size = 0;
+ uint64_t stripe_num = 0;
+ off_t coalesced_offset = 0;
+
+ line_size = stripe_size * stripe_count;
+ stripe_num = offset / line_size;
+
+ coalesced_offset = (stripe_num * stripe_size) +
+ (offset % stripe_size);
+
+ return coalesced_offset;
+}
+
+off_t
+uncoalesced_size(off_t size, uint64_t stripe_size, int stripe_count,
+ int stripe_index)
+{
+ uint64_t nr_full_stripe_chunks = 0, mod = 0;
+
+ if (!size)
+ return size;
+
+ /*
+ * Estimate the number of fully written stripes from the
+ * local file size. Each stripe_size chunk corresponds to
+ * a stripe.
+ */
+ nr_full_stripe_chunks = (size / stripe_size) * stripe_count;
+ mod = size % stripe_size;
+
+ if (!mod) {
+ /*
+ * There is no remainder, thus we could have overestimated
+ * the size of the file in terms of chunks. Trim the number
+ * of chunks by the following stripe members and leave it
+ * up to those nodes to respond with a larger size (if
+ * necessary).
+ */
+ nr_full_stripe_chunks -= stripe_count -
+ (stripe_index + 1);
+ size = nr_full_stripe_chunks * stripe_size;
+ } else {
+ /*
+ * There is a remainder and thus we own the last chunk of the
+ * file. Add the preceding stripe members of the final stripe
+ * along with the remainder to calculate the exact size.
+ */
+ nr_full_stripe_chunks += stripe_index;
+ size = nr_full_stripe_chunks * stripe_size + mod;
+ }
+
+ return size;
+}
diff --git a/xlators/cluster/stripe/src/stripe-mem-types.h b/xlators/cluster/stripe/src/stripe-mem-types.h
index 06667107d65..e9ac9cf4648 100644
--- a/xlators/cluster/stripe/src/stripe-mem-types.h
+++ b/xlators/cluster/stripe/src/stripe-mem-types.h
@@ -1,21 +1,11 @@
-
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -25,15 +15,16 @@
#include "mem-types.h"
enum gf_stripe_mem_types_ {
- gf_stripe_mt_stripe_local_t = gf_common_mt_end + 1,
- gf_stripe_mt_iovec,
- gf_stripe_mt_readv_replies,
+ gf_stripe_mt_iovec = gf_common_mt_end + 1,
+ gf_stripe_mt_stripe_replies,
gf_stripe_mt_stripe_fd_ctx_t,
gf_stripe_mt_char,
gf_stripe_mt_int8_t,
+ gf_stripe_mt_int32_t,
gf_stripe_mt_xlator_t,
gf_stripe_mt_stripe_private_t,
gf_stripe_mt_stripe_options,
+ gf_stripe_mt_xattr_sort_t,
gf_stripe_mt_end
};
#endif
diff --git a/xlators/cluster/stripe/src/stripe.c b/xlators/cluster/stripe/src/stripe.c
index 4826d80a8f2..ae175faf811 100644
--- a/xlators/cluster/stripe/src/stripe.c
+++ b/xlators/cluster/stripe/src/stripe.c
@@ -1,25 +1,16 @@
/*
- Copyright (c) 2007-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
/**
* xlators/cluster/stripe:
- * Stripe translator, stripes the data accross its child nodes,
+ * Stripe translator, stripes the data across its child nodes,
* as per the options given in the volfile. The striping works
* fairly simple. It writes files at different offset as per
* calculation. So, 'ls -l' output at the real posix level will
@@ -32,65 +23,19 @@
* very much necessary, or else, use it in combination with AFR, to have a
* backup copy.
*/
+#include <fnmatch.h>
#include "stripe.h"
+#include "libxlator.h"
+#include "byte-order.h"
+#include "statedump.h"
-void
-stripe_local_wipe (stripe_local_t *local)
-{
- if (!local)
- goto out;
-
- if (local->loc.path)
- loc_wipe (&local->loc);
- if (local->loc2.path)
- loc_wipe (&local->loc2);
-out:
- return;
-}
-
-/**
- * stripe_get_matching_bs - Get the matching block size for the given path.
- */
-int32_t
-stripe_get_matching_bs (const char *path, struct stripe_options *opts,
- uint64_t default_bs)
-{
- struct stripe_options *trav = NULL;
- char *pathname = NULL;
- uint64_t block_size = 0;
-
- block_size = default_bs;
-
- if (!path || !opts)
- goto out;
-
- /* FIXME: is a strdup really necessary? */
- pathname = gf_strdup (path);
- if (!pathname)
- goto out;
-
- trav = opts;
- while (trav) {
- if (!fnmatch (trav->path_pattern, pathname, FNM_NOESCAPE)) {
- block_size = trav->block_size;
- break;
- }
- trav = trav->next;
- }
-
- GF_FREE (pathname);
-
-out:
- return block_size;
-}
-
-
+struct volume_options options[];
int32_t
stripe_sh_chown_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
int callcnt = -1;
stripe_local_t *local = NULL;
@@ -109,8 +54,7 @@ stripe_sh_chown_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
UNLOCK (&frame->lock);
if (!callcnt) {
- stripe_local_wipe (local);
- STACK_DESTROY (frame->root);
+ STRIPE_STACK_DESTROY (frame);
}
out:
return 0;
@@ -120,7 +64,7 @@ int32_t
stripe_sh_make_entry_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
stripe_local_t *local = NULL;
call_frame_t *prev = NULL;
@@ -135,7 +79,7 @@ stripe_sh_make_entry_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STACK_WIND (frame, stripe_sh_chown_cbk, prev->this,
prev->this->fops->setattr, &local->loc,
- &local->stbuf, (GF_SET_ATTR_UID | GF_SET_ATTR_GID));
+ &local->stbuf, (GF_SET_ATTR_UID | GF_SET_ATTR_GID), NULL);
out:
return 0;
@@ -149,6 +93,8 @@ stripe_entry_self_heal (call_frame_t *frame, xlator_t *this,
call_frame_t *rframe = NULL;
stripe_local_t *rlocal = NULL;
stripe_private_t *priv = NULL;
+ dict_t *xdata = NULL;
+ int ret = 0;
if (!local || !this || !frame) {
gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
@@ -165,8 +111,7 @@ stripe_entry_self_heal (call_frame_t *frame, xlator_t *this,
if (!rframe) {
goto out;
}
- rlocal = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ rlocal = mem_get0 (this->local_pool);
if (!rlocal) {
goto out;
}
@@ -175,37 +120,58 @@ stripe_entry_self_heal (call_frame_t *frame, xlator_t *this,
loc_copy (&rlocal->loc, &local->loc);
memcpy (&rlocal->stbuf, &local->stbuf, sizeof (struct iatt));
+ xdata = dict_new ();
+ if (!xdata)
+ goto out;
+
+ ret = dict_set_static_bin (xdata, "gfid-req", local->stbuf.ia_gfid, 16);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set gfid-req", local->loc.path);
+
while (trav) {
if (IA_ISREG (local->stbuf.ia_type)) {
STACK_WIND (rframe, stripe_sh_make_entry_cbk,
trav->xlator, trav->xlator->fops->mknod,
&local->loc,
st_mode_from_ia (local->stbuf.ia_prot,
- local->stbuf.ia_type), 0);
+ local->stbuf.ia_type),
+ 0, 0, xdata);
}
if (IA_ISDIR (local->stbuf.ia_type)) {
STACK_WIND (rframe, stripe_sh_make_entry_cbk,
trav->xlator, trav->xlator->fops->mkdir,
- &local->loc, st_mode_from_ia (local->stbuf.ia_prot,
- local->stbuf.ia_type));
+ &local->loc,
+ st_mode_from_ia (local->stbuf.ia_prot,
+ local->stbuf.ia_type),
+ 0, xdata);
}
trav = trav->next;
}
+ if (xdata)
+ dict_unref (xdata);
+ return 0;
+
out:
+ if (rframe)
+ STRIPE_STACK_DESTROY (rframe);
+ if (xdata)
+ dict_unref (xdata);
+
return 0;
}
+
int32_t
stripe_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *dict, struct iatt *postparent)
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
{
- int32_t callcnt = 0;
- dict_t *tmp_dict = NULL;
- inode_t *tmp_inode = NULL;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = 0;
if (!this || !frame || !frame->local || !cookie) {
gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
@@ -220,14 +186,15 @@ stripe_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
callcnt = --local->call_count;
if (op_ret == -1) {
- if (op_errno != ENOENT)
+ if ((op_errno != ENOENT) || (op_errno != ESTALE))
gf_log (this->name, GF_LOG_DEBUG,
"%s returned error %s",
prev->this->name,
strerror (op_errno));
if (local->op_errno != ESTALE)
local->op_errno = op_errno;
- if ((op_errno != ENOENT) ||
+ if (((op_errno != ENOENT) && (op_errno != ENOTCONN)
+ && (op_errno != ESTALE)) ||
(prev->this == FIRST_CHILD (this)))
local->failed = 1;
if (op_errno == ENOENT)
@@ -236,51 +203,81 @@ stripe_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret >= 0) {
local->op_ret = 0;
+ if (IA_ISREG (buf->ia_type)) {
+ ret = stripe_ctx_handle (this, prev, local,
+ xdata);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error getting fctx info from"
+ " dict");
+ }
if (FIRST_CHILD(this) == prev->this) {
local->stbuf = *buf;
local->postparent = *postparent;
local->inode = inode_ref (inode);
- local->dict = dict_ref (dict);
+ if (xdata)
+ local->xdata = dict_ref (xdata);
+ if (local->xattr) {
+ stripe_aggregate_xattr (local->xdata,
+ local->xattr);
+ dict_unref (local->xattr);
+ local->xattr = NULL;
+ }
+ }
+
+ if (!local->xdata && !local->xattr) {
+ local->xattr = dict_ref (xdata);
+ } else if (local->xdata) {
+ stripe_aggregate_xattr (local->xdata, xdata);
+ } else if (local->xattr) {
+ stripe_aggregate_xattr (local->xattr, xdata);
}
+
local->stbuf_blocks += buf->ia_blocks;
local->postparent_blocks += postparent->ia_blocks;
+ correct_file_size(buf, local->fctx, prev);
+
if (local->stbuf_size < buf->ia_size)
local->stbuf_size = buf->ia_size;
if (local->postparent_size < postparent->ia_size)
local->postparent_size = postparent->ia_size;
+
+ if (gf_uuid_is_null (local->ia_gfid))
+ gf_uuid_copy (local->ia_gfid, buf->ia_gfid);
+
+ /* Make sure the gfid on all the nodes are same */
+ if (gf_uuid_compare (local->ia_gfid, buf->ia_gfid)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: gfid different on subvolume %s",
+ local->loc.path, prev->this->name);
+ }
}
}
UNLOCK (&frame->lock);
if (!callcnt) {
- if (local->op_ret == 0 && local->entry_self_heal_needed)
+ if (local->op_ret == 0 && local->entry_self_heal_needed &&
+ !gf_uuid_is_null (local->loc.inode->gfid))
stripe_entry_self_heal (frame, this, local);
if (local->failed)
local->op_ret = -1;
- tmp_dict = local->dict;
- tmp_inode = local->inode;
-
if (local->op_ret != -1) {
local->stbuf.ia_blocks = local->stbuf_blocks;
local->stbuf.ia_size = local->stbuf_size;
local->postparent.ia_blocks = local->postparent_blocks;
local->postparent.ia_size = local->postparent_size;
+ inode_ctx_put (local->inode, this,
+ (uint64_t) (long)local->fctx);
}
- stripe_local_wipe (local);
- STACK_UNWIND_STRICT (lookup, frame, local->op_ret,
+ STRIPE_STACK_UNWIND (lookup, frame, local->op_ret,
local->op_errno, local->inode,
- &local->stbuf, local->dict,
+ &local->stbuf, local->xdata,
&local->postparent);
-
- if (tmp_inode)
- inode_unref (tmp_inode);
- if (tmp_dict)
- dict_unref (tmp_dict);
}
out:
return 0;
@@ -288,25 +285,26 @@ out:
int32_t
stripe_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *xattr_req)
+ dict_t *xdata)
{
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- stripe_private_t *priv = NULL;
+ stripe_local_t *local = NULL;
+ xlator_list_t *trav = NULL;
+ stripe_private_t *priv = NULL;
int32_t op_errno = EINVAL;
+ int64_t filesize = 0;
+ int ret = 0;
+ uint64_t tmpctx = 0;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->path, err);
VALIDATE_OR_GOTO (loc->inode, err);
priv = this->private;
trav = this->children;
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -315,26 +313,60 @@ stripe_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
frame->local = local;
loc_copy (&local->loc, loc);
- /* Everytime in stripe lookup, all child nodes
+ inode_ctx_get (local->inode, this, &tmpctx);
+ if (tmpctx)
+ local->fctx = (stripe_fd_ctx_t*) (long)tmpctx;
+
+ /* quick-read friendly changes */
+ if (xdata && dict_get (xdata, GF_CONTENT_KEY)) {
+ ret = dict_get_int64 (xdata, GF_CONTENT_KEY, &filesize);
+ if (!ret && (filesize > priv->block_size))
+ dict_del (xdata, GF_CONTENT_KEY);
+ }
+
+ /* get stripe-size xattr on lookup. This would be required for
+ * open/read/write/pathinfo calls. Hence we send down the request
+ * even when type == IA_INVAL */
+
+ /*
+ * We aren't guaranteed to have xdata here. We need the format info for
+ * the file, so allocate xdata if necessary.
+ */
+ if (!xdata)
+ xdata = dict_new();
+ else
+ xdata = dict_ref(xdata);
+
+ if (xdata && (IA_ISREG (loc->inode->ia_type) ||
+ (loc->inode->ia_type == IA_INVAL))) {
+ ret = stripe_xattr_request_build (this, xdata, 8, 4, 4, 0);
+ if (ret)
+ gf_log (this->name , GF_LOG_ERROR, "Failed to build"
+ " xattr request for %s", loc->path);
+
+ }
+
+ /* Every time in stripe lookup, all child nodes
should be looked up */
local->call_count = priv->child_count;
while (trav) {
STACK_WIND (frame, stripe_lookup_cbk, trav->xlator,
- trav->xlator->fops->lookup,
- loc, xattr_req);
+ trav->xlator->fops->lookup, loc, xdata);
trav = trav->next;
}
+ dict_unref(xdata);
+
return 0;
err:
- STACK_UNWIND_STRICT (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ STRIPE_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
return 0;
}
int32_t
stripe_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -369,6 +401,9 @@ stripe_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
local->stbuf_blocks += buf->ia_blocks;
+
+ correct_file_size(buf, local->fctx, prev);
+
if (local->stbuf_size < buf->ia_size)
local->stbuf_size = buf->ia_size;
}
@@ -384,20 +419,20 @@ stripe_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->stbuf.ia_blocks = local->stbuf_blocks;
}
- stripe_local_wipe (local);
- STACK_UNWIND_STRICT (stat, frame, local->op_ret,
- local->op_errno, &local->stbuf);
+ STRIPE_STACK_UNWIND (stat, frame, local->op_ret,
+ local->op_errno, &local->stbuf, NULL);
}
out:
return 0;
}
int32_t
-stripe_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
+stripe_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
int32_t op_errno = EINVAL;
VALIDATE_OR_GOTO (frame, err);
@@ -415,8 +450,7 @@ stripe_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -425,23 +459,30 @@ stripe_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
frame->local = local;
local->call_count = priv->child_count;
+ if (IA_ISREG(loc->inode->ia_type)) {
+ inode_ctx_get(loc->inode, this, (uint64_t *) &fctx);
+ if (!fctx)
+ goto err;
+ local->fctx = fctx;
+ }
+
while (trav) {
STACK_WIND (frame, stripe_stat_cbk, trav->xlator,
- trav->xlator->fops->stat, loc);
+ trav->xlator->fops->stat, loc, NULL);
trav = trav->next;
}
return 0;
err:
- STACK_UNWIND_STRICT (stat, frame, -1, op_errno, NULL);
+ STRIPE_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
return 0;
}
int32_t
stripe_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *stbuf)
+ int32_t op_ret, int32_t op_errno, struct statvfs *stbuf, dict_t *xdata)
{
stripe_local_t *local = NULL;
int32_t callcnt = 0;
@@ -478,15 +519,15 @@ stripe_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
UNLOCK (&frame->lock);
if (!callcnt) {
- STACK_UNWIND_STRICT (statfs, frame, local->op_ret,
- local->op_errno, &local->statvfs_buf);
+ STRIPE_STACK_UNWIND (statfs, frame, local->op_ret,
+ local->op_errno, &local->statvfs_buf, NULL);
}
out:
return 0;
}
int32_t
-stripe_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
+stripe_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
stripe_local_t *local = NULL;
xlator_list_t *trav = NULL;
@@ -501,8 +542,7 @@ stripe_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
priv = this->private;
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -514,13 +554,13 @@ stripe_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
local->call_count = priv->child_count;
while (trav) {
STACK_WIND (frame, stripe_statfs_cbk, trav->xlator,
- trav->xlator->fops->statfs, loc);
+ trav->xlator->fops->statfs, loc, NULL);
trav = trav->next;
}
return 0;
err:
- STACK_UNWIND_STRICT (statfs, frame, -1, op_errno, NULL);
+ STRIPE_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -529,7 +569,7 @@ err:
int32_t
stripe_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -567,6 +607,9 @@ stripe_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->prebuf_blocks += prebuf->ia_blocks;
local->postbuf_blocks += postbuf->ia_blocks;
+ correct_file_size(prebuf, local->fctx, prev);
+ correct_file_size(postbuf, local->fctx, prev);
+
if (local->prebuf_size < prebuf->ia_size)
local->prebuf_size = prebuf->ia_size;
@@ -587,22 +630,23 @@ stripe_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->post_buf.ia_size = local->postbuf_size;
}
- stripe_local_wipe (local);
- STACK_UNWIND_STRICT (truncate, frame, local->op_ret,
+ STRIPE_STACK_UNWIND (truncate, frame, local->op_ret,
local->op_errno, &local->pre_buf,
- &local->post_buf);
+ &local->post_buf, NULL);
}
out:
return 0;
}
int32_t
-stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
+stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata)
{
- xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
int32_t op_errno = EINVAL;
+ int i, eof_idx;
+ off_t dest_offset, tmp_offset;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -611,7 +655,6 @@ stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
VALIDATE_OR_GOTO (loc->inode, err);
priv = this->private;
- trav = this->children;
if (priv->first_child_down) {
op_errno = ENOTCONN;
@@ -619,8 +662,7 @@ stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -629,15 +671,55 @@ stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
frame->local = local;
local->call_count = priv->child_count;
- while (trav) {
- STACK_WIND (frame, stripe_truncate_cbk, trav->xlator,
- trav->xlator->fops->truncate, loc, offset);
- trav = trav->next;
- }
+ inode_ctx_get(loc->inode, this, (uint64_t *) &fctx);
+ if (!fctx) {
+ gf_log(this->name, GF_LOG_ERROR, "no stripe context");
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->fctx = fctx;
+ eof_idx = (offset / fctx->stripe_size) % fctx->stripe_count;
+
+ for (i = 0; i < fctx->stripe_count; i++) {
+ if (!fctx->xl_array[i]) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "no xlator at index %d", i);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (fctx->stripe_coalesce) {
+ /*
+ * The node that owns EOF is truncated to the exact
+ * coalesced offset. Nodes prior to this index should
+ * be rounded up to the size of the complete stripe,
+ * while nodes after this index should be rounded down
+ * to the size of the previous stripe.
+ */
+ if (i < eof_idx)
+ tmp_offset = roof(offset, fctx->stripe_size *
+ fctx->stripe_count);
+ else if (i > eof_idx)
+ tmp_offset = floor(offset, fctx->stripe_size *
+ fctx->stripe_count);
+ else
+ tmp_offset = offset;
+
+ dest_offset = coalesced_offset(tmp_offset,
+ fctx->stripe_size, fctx->stripe_count);
+ } else {
+ dest_offset = offset;
+ }
+
+ STACK_WIND(frame, stripe_truncate_cbk, fctx->xl_array[i],
+ fctx->xl_array[i]->fops->truncate, loc, dest_offset,
+ NULL);
+ }
return 0;
err:
- STACK_UNWIND_STRICT (truncate, frame, -1, op_errno, NULL, NULL);
+ STRIPE_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -645,7 +727,7 @@ err:
int32_t
stripe_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -684,6 +766,9 @@ stripe_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->prebuf_blocks += preop->ia_blocks;
local->postbuf_blocks += postop->ia_blocks;
+ correct_file_size(preop, local->fctx, prev);
+ correct_file_size(postop, local->fctx, prev);
+
if (local->prebuf_size < preop->ia_size)
local->prebuf_size = preop->ia_size;
if (local->postbuf_size < postop->ia_size)
@@ -703,10 +788,9 @@ stripe_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->post_buf.ia_size = local->postbuf_size;
}
- stripe_local_wipe (local);
- STACK_UNWIND_STRICT (setattr, frame, local->op_ret,
+ STRIPE_STACK_UNWIND (setattr, frame, local->op_ret,
local->op_errno, &local->pre_buf,
- &local->post_buf);
+ &local->post_buf, NULL);
}
out:
return 0;
@@ -715,11 +799,12 @@ out:
int32_t
stripe_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
int32_t op_errno = EINVAL;
VALIDATE_OR_GOTO (frame, err);
@@ -737,33 +822,47 @@ stripe_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
local->op_ret = -1;
frame->local = local;
- local->call_count = priv->child_count;
+ if (!IA_ISDIR (loc->inode->ia_type) &&
+ !IA_ISREG (loc->inode->ia_type)) {
+ local->call_count = 1;
+ STACK_WIND (frame, stripe_setattr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setattr,
+ loc, stbuf, valid, NULL);
+ return 0;
+ }
+
+ if (IA_ISREG(loc->inode->ia_type)) {
+ inode_ctx_get(loc->inode, this, (uint64_t *) &fctx);
+ if (!fctx)
+ goto err;
+ local->fctx = fctx;
+ }
+ local->call_count = priv->child_count;
while (trav) {
STACK_WIND (frame, stripe_setattr_cbk,
trav->xlator, trav->xlator->fops->setattr,
- loc, stbuf, valid);
+ loc, stbuf, valid, NULL);
trav = trav->next;
}
return 0;
err:
- STACK_UNWIND_STRICT (setattr, frame, -1, op_errno, NULL, NULL);
+ STRIPE_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
int32_t
stripe_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
@@ -779,8 +878,7 @@ stripe_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
trav = this->children;
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -791,13 +889,13 @@ stripe_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
while (trav) {
STACK_WIND (frame, stripe_setattr_cbk, trav->xlator,
- trav->xlator->fops->fsetattr, fd, stbuf, valid);
+ trav->xlator->fops->fsetattr, fd, stbuf, valid, NULL);
trav = trav->next;
}
return 0;
err:
- STACK_UNWIND_STRICT (fsetattr, frame, -1, op_errno, NULL, NULL);
+ STRIPE_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -805,7 +903,8 @@ int32_t
stripe_stack_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -842,6 +941,8 @@ stripe_stack_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->pre_buf.ia_blocks += prenewparent->ia_blocks;
local->post_buf.ia_blocks += postnewparent->ia_blocks;
+ correct_file_size(buf, local->fctx, prev);
+
if (local->stbuf.ia_size < buf->ia_size)
local->stbuf.ia_size = buf->ia_size;
@@ -864,11 +965,10 @@ stripe_stack_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (local->failed)
local->op_ret = -1;
- stripe_local_wipe (local);
- STACK_UNWIND_STRICT (rename, frame, local->op_ret, local->op_errno,
+ STRIPE_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
&local->stbuf, &local->preparent,
&local->postparent, &local->pre_buf,
- &local->post_buf);
+ &local->post_buf, NULL);
}
out:
return 0;
@@ -878,7 +978,8 @@ int32_t
stripe_first_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
stripe_local_t *local = NULL;
xlator_list_t *trav = NULL;
@@ -909,25 +1010,25 @@ stripe_first_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
while (trav) {
STACK_WIND (frame, stripe_stack_rename_cbk,
trav->xlator, trav->xlator->fops->rename,
- &local->loc, &local->loc2);
+ &local->loc, &local->loc2, NULL);
trav = trav->next;
}
return 0;
unwind:
- stripe_local_wipe (local);
- STACK_UNWIND_STRICT (rename, frame, -1, op_errno, buf, preoldparent,
- postoldparent, prenewparent, postnewparent);
+ STRIPE_STACK_UNWIND (rename, frame, -1, op_errno, buf, preoldparent,
+ postoldparent, prenewparent, postnewparent, NULL);
return 0;
}
int32_t
stripe_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc)
+ loc_t *newloc, dict_t *xdata)
{
stripe_private_t *priv = NULL;
stripe_local_t *local = NULL;
xlator_list_t *trav = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
int32_t op_errno = EINVAL;
VALIDATE_OR_GOTO (frame, err);
@@ -947,36 +1048,79 @@ stripe_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
+
+ frame->local = local;
+
local->op_ret = -1;
loc_copy (&local->loc, oldloc);
loc_copy (&local->loc2, newloc);
local->call_count = priv->child_count;
- frame->local = local;
+ if (IA_ISREG(oldloc->inode->ia_type)) {
+ inode_ctx_get(oldloc->inode, this, (uint64_t *) &fctx);
+ if (!fctx)
+ goto err;
+ local->fctx = fctx;
+ }
STACK_WIND (frame, stripe_first_rename_cbk, trav->xlator,
- trav->xlator->fops->rename, oldloc, newloc);
+ trav->xlator->fops->rename, oldloc, newloc, NULL);
return 0;
err:
- STACK_UNWIND_STRICT (rename, frame, -1, op_errno, NULL, NULL, NULL,
- NULL, NULL);
+ STRIPE_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
+}
+int32_t
+stripe_first_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ local = frame->local;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG, "%s returned %s",
+ prev->this->name, strerror (op_errno));
+ goto out;
+ }
+ local->op_ret = 0;
+ local->preparent = *preparent;
+ local->postparent = *postparent;
+ local->preparent_blocks += preparent->ia_blocks;
+ local->postparent_blocks += postparent->ia_blocks;
+
+ STRIPE_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, xdata);
+ return 0;
+out:
+ STRIPE_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
+
return 0;
}
+
int32_t
stripe_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -998,50 +1142,33 @@ stripe_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
gf_log (this->name, GF_LOG_DEBUG, "%s returned %s",
prev->this->name, strerror (op_errno));
local->op_errno = op_errno;
- if ((op_errno != ENOENT) ||
- (prev->this == FIRST_CHILD (this)))
+ if (op_errno != ENOENT) {
local->failed = 1;
- }
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- if (FIRST_CHILD(this) == prev->this) {
- local->preparent = *preparent;
- local->postparent = *postparent;
+ local->op_ret = op_ret;
}
- local->preparent_blocks += preparent->ia_blocks;
- local->postparent_blocks += postparent->ia_blocks;
-
- if (local->preparent_size < preparent->ia_size)
- local->preparent_size = preparent->ia_size;
-
- if (local->postparent_size < postparent->ia_size)
- local->postparent_size = postparent->ia_size;
}
}
UNLOCK (&frame->lock);
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret != -1) {
- local->preparent.ia_blocks = local->preparent_blocks;
- local->preparent.ia_size = local->preparent_size;
- local->postparent.ia_blocks = local->postparent_blocks;
- local->postparent.ia_size = local->postparent_size;
+ if (callcnt == 1) {
+ if (local->failed) {
+ op_errno = local->op_errno;
+ goto out;
}
-
- stripe_local_wipe (local);
- STACK_UNWIND_STRICT (unlink, frame, local->op_ret,
- local->op_errno, &local->preparent,
- &local->postparent);
+ STACK_WIND(frame, stripe_first_unlink_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->unlink, &local->loc,
+ local->xflag, local->xdata);
}
+ return 0;
out:
+ STRIPE_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
+
return 0;
}
int32_t
-stripe_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
+stripe_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int xflag, dict_t *xdata)
{
xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
@@ -1069,26 +1196,32 @@ stripe_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
local->op_ret = -1;
+ loc_copy (&local->loc, loc);
+ local->xflag = xflag;
+
+ if (xdata)
+ local->xdata = dict_ref (xdata);
+
frame->local = local;
local->call_count = priv->child_count;
+ trav = trav->next; /* Skip the first child */
while (trav) {
STACK_WIND (frame, stripe_unlink_cbk,
trav->xlator, trav->xlator->fops->unlink,
- loc);
+ loc, xflag, xdata);
trav = trav->next;
}
return 0;
err:
- STACK_UNWIND_STRICT (unlink, frame, -1, op_errno, NULL, NULL);
+ STRIPE_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -1096,10 +1229,8 @@ err:
int32_t
stripe_first_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,struct iatt *preparent,
- struct iatt *postparent)
-
+ struct iatt *postparent, dict_t *xdata)
{
- xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
if (!this || !frame || !frame->local) {
@@ -1112,11 +1243,10 @@ stripe_first_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto err;
}
- trav = this->children;
local = frame->local;
+ local->op_ret = 0;
local->call_count--; /* First child successful */
- trav = trav->next; /* Skip first child */
local->preparent = *preparent;
local->postparent = *postparent;
@@ -1125,21 +1255,60 @@ stripe_first_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->preparent_blocks += preparent->ia_blocks;
local->postparent_blocks += postparent->ia_blocks;
- while (trav) {
- STACK_WIND (frame, stripe_unlink_cbk, trav->xlator,
- trav->xlator->fops->rmdir, &local->loc);
- trav = trav->next;
- }
-
+ STRIPE_STACK_UNWIND (rmdir, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, xdata);
return 0;
err:
- STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, NULL, NULL);
+ STRIPE_STACK_UNWIND (rmdir, frame, op_ret, op_errno, NULL, NULL, NULL);
return 0;
}
int32_t
-stripe_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc)
+stripe_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG, "%s returned %s",
+ prev->this->name, strerror (op_errno));
+ if (op_errno != ENOENT)
+ local->failed = 1;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (callcnt == 1) {
+ if (local->failed)
+ goto out;
+ STACK_WIND (frame, stripe_first_rmdir_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->rmdir, &local->loc,
+ local->flags, NULL);
+ }
+ return 0;
+out:
+ STRIPE_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+stripe_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, dict_t *xdata)
{
xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
@@ -1162,8 +1331,7 @@ stripe_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc)
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -1171,14 +1339,19 @@ stripe_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc)
local->op_ret = -1;
frame->local = local;
loc_copy (&local->loc, loc);
+ local->flags = flags;
local->call_count = priv->child_count;
+ trav = trav->next; /* skip the first child */
- STACK_WIND (frame, stripe_first_rmdir_cbk, trav->xlator,
- trav->xlator->fops->rmdir, loc);
+ while (trav) {
+ STACK_WIND (frame, stripe_rmdir_cbk, trav->xlator,
+ trav->xlator->fops->rmdir, loc, flags, NULL);
+ trav = trav->next;
+ }
return 0;
err:
- STACK_UNWIND_STRICT (rmdir, frame, -1, op_errno, NULL, NULL);
+ STRIPE_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -1187,7 +1360,7 @@ int32_t
stripe_mknod_ifreg_fail_unlink_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -1206,10 +1379,9 @@ stripe_mknod_ifreg_fail_unlink_cbk (call_frame_t *frame, void *cookie,
UNLOCK (&frame->lock);
if (!callcnt) {
- stripe_local_wipe (local);
- STACK_UNWIND_STRICT (mknod, frame, local->op_ret, local->op_errno,
+ STRIPE_STACK_UNWIND (mknod, frame, local->op_ret, local->op_errno,
local->inode, &local->stbuf,
- &local->preparent, &local->postparent);
+ &local->preparent, &local->postparent, NULL);
}
out:
return 0;
@@ -1221,7 +1393,7 @@ out:
int32_t
stripe_mknod_ifreg_setxattr_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -1260,16 +1432,15 @@ stripe_mknod_ifreg_setxattr_cbk (call_frame_t *frame, void *cookie,
stripe_mknod_ifreg_fail_unlink_cbk,
trav->xlator,
trav->xlator->fops->unlink,
- &local->loc);
+ &local->loc, 0, NULL);
trav = trav->next;
}
return 0;
}
- stripe_local_wipe (local);
- STACK_UNWIND_STRICT (mknod, frame, local->op_ret, local->op_errno,
+ STRIPE_STACK_UNWIND (mknod, frame, local->op_ret, local->op_errno,
local->inode, &local->stbuf,
- &local->preparent, &local->postparent);
+ &local->preparent, &local->postparent, NULL);
}
out:
return 0;
@@ -1279,13 +1450,13 @@ int32_t
stripe_mknod_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- int ret = 0;
int32_t callcnt = 0;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
call_frame_t *prev = NULL;
+ xlator_list_t *trav = NULL;
if (!this || !frame || !frame->local || !cookie) {
gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
@@ -1293,7 +1464,7 @@ stripe_mknod_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
prev = cookie;
- priv = this->private;
+ priv = this->private;
local = frame->local;
LOCK (&frame->lock);
@@ -1309,20 +1480,24 @@ stripe_mknod_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->failed = 1;
local->op_errno = op_errno;
}
-
if (op_ret >= 0) {
local->op_ret = op_ret;
- if (FIRST_CHILD(this) == prev->this) {
- local->stbuf = *buf;
- local->preparent = *preparent;
- local->postparent = *postparent;
- }
+ /* Can be used as a mechanism to understand if mknod
+ was successful in at least one place */
+ if (gf_uuid_is_null (local->ia_gfid))
+ gf_uuid_copy (local->ia_gfid, buf->ia_gfid);
+
+ if (stripe_ctx_handle(this, prev, local, xdata))
+ gf_log(this->name, GF_LOG_ERROR,
+ "Error getting fctx info from dict");
local->stbuf_blocks += buf->ia_blocks;
local->preparent_blocks += preparent->ia_blocks;
local->postparent_blocks += postparent->ia_blocks;
+ correct_file_size(buf, local->fctx, prev);
+
if (local->stbuf_size < buf->ia_size)
local->stbuf_size = buf->ia_size;
if (local->preparent_size < preparent->ia_size)
@@ -1337,6 +1512,23 @@ stripe_mknod_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (local->failed)
local->op_ret = -1;
+ if ((local->op_ret == -1) && !gf_uuid_is_null (local->ia_gfid)) {
+ /* ia_gfid set means, at least on one node 'mknod'
+ is successful */
+ local->call_count = priv->child_count;
+ trav = this->children;
+ while (trav) {
+ STACK_WIND (frame,
+ stripe_mknod_ifreg_fail_unlink_cbk,
+ trav->xlator,
+ trav->xlator->fops->unlink,
+ &local->loc, 0, NULL);
+ trav = trav->next;
+ }
+ return 0;
+ }
+
+
if (local->op_ret != -1) {
local->preparent.ia_blocks = local->preparent_blocks;
local->preparent.ia_size = local->preparent_size;
@@ -1344,61 +1536,109 @@ stripe_mknod_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->postparent.ia_size = local->postparent_size;
local->stbuf.ia_size = local->stbuf_size;
local->stbuf.ia_blocks = local->stbuf_blocks;
+ inode_ctx_put (local->inode, this,
+ (uint64_t)(long) local->fctx);
+
}
+ STRIPE_STACK_UNWIND (mknod, frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf,
+ &local->preparent, &local->postparent, NULL);
+ }
+out:
+ return 0;
+}
- if ((local->op_ret != -1) && priv->xattr_supported) {
- /* Send a setxattr request to nodes where the
- files are created */
- int32_t i = 0;
- char size_key[256] = {0,};
- char index_key[256] = {0,};
- char count_key[256] = {0,};
- dict_t *dict = NULL;
- sprintf (size_key,
- "trusted.%s.stripe-size", this->name);
- sprintf (count_key,
- "trusted.%s.stripe-count", this->name);
- sprintf (index_key,
- "trusted.%s.stripe-index", this->name);
+int32_t
+stripe_mknod_first_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ stripe_private_t *priv = NULL;
+ call_frame_t *prev = NULL;
+ xlator_list_t *trav = NULL;
+ int i = 1;
+ dict_t *dict = NULL;
+ int ret = 0;
+ int need_unref = 0;
- local->call_count = priv->child_count;
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
- for (i = 0; i < priv->child_count; i++) {
- dict = get_new_dict ();
- if (!dict) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to allocate dict");
- }
+ prev = cookie;
+ priv = this->private;
+ local = frame->local;
+ trav = this->children;
- dict_ref (dict);
- /* TODO: check return value */
- ret = dict_set_int64 (dict, size_key,
- local->stripe_size);
- ret = dict_set_int32 (dict, count_key,
- priv->child_count);
- ret = dict_set_int32 (dict, index_key, i);
+ local->call_count--;
- STACK_WIND (frame,
- stripe_mknod_ifreg_setxattr_cbk,
- priv->xl_array[i],
- priv->xl_array[i]->fops->setxattr,
- &local->loc, dict, 0);
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG, "%s returned error %s",
+ prev->this->name, strerror (op_errno));
+ local->failed = 1;
+ local->op_errno = op_errno;
+ goto out;
+ }
- dict_unref (dict);
+ local->op_ret = op_ret;
+
+ local->stbuf = *buf;
+ local->preparent = *preparent;
+ local->postparent = *postparent;
+
+ if (gf_uuid_is_null (local->ia_gfid))
+ gf_uuid_copy (local->ia_gfid, buf->ia_gfid);
+ local->preparent.ia_blocks = local->preparent_blocks;
+ local->preparent.ia_size = local->preparent_size;
+ local->postparent.ia_blocks = local->postparent_blocks;
+ local->postparent.ia_size = local->postparent_size;
+ local->stbuf.ia_size = local->stbuf_size;
+ local->stbuf.ia_blocks = local->stbuf_blocks;
+
+ trav = trav->next;
+ while (trav) {
+ if (priv->xattr_supported) {
+ dict = dict_new ();
+ if (!dict) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to allocate dict %s", local->loc.path);
}
- return 0;
+ need_unref = 1;
+
+ dict_copy (local->xattr, dict);
+
+ ret = stripe_xattr_request_build (this, dict,
+ local->stripe_size,
+ priv->child_count, i,
+ priv->coalesce);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to build xattr request");
+
+ } else {
+ dict = local->xattr;
}
- /* Create itself has failed.. so return
- without setxattring */
- stripe_local_wipe (local);
- STACK_UNWIND_STRICT (mknod, frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf,
- &local->preparent, &local->postparent);
+ STACK_WIND (frame, stripe_mknod_ifreg_cbk,
+ trav->xlator, trav->xlator->fops->mknod,
+ &local->loc, local->mode, local->rdev, 0, dict);
+ trav = trav->next;
+ i++;
+
+ if (dict && need_unref)
+ dict_unref (dict);
}
-out:
+
return 0;
+
+out:
+
+ STRIPE_STACK_UNWIND (mknod, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, NULL);
+ return 0;
}
@@ -1406,21 +1646,25 @@ int32_t
stripe_single_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+ STRIPE_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
return 0;
}
-int32_t
+
+int
stripe_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev)
+ dev_t rdev, mode_t umask, dict_t *xdata)
{
- stripe_private_t *priv = NULL;
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- int32_t op_errno = EINVAL;
+ stripe_private_t *priv = NULL;
+ stripe_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+ int32_t i = 0;
+ dict_t *dict = NULL;
+ int ret = 0;
+ int need_unref = 0;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -1429,7 +1673,6 @@ stripe_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
VALIDATE_OR_GOTO (loc->inode, err);
priv = this->private;
- trav = this->children;
if (priv->first_child_down) {
op_errno = ENOTCONN;
@@ -1448,43 +1691,63 @@ stripe_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
local->op_ret = -1;
local->op_errno = ENOTCONN;
- local->stripe_size = stripe_get_matching_bs (loc->path,
- priv->pattern,
- priv->block_size);
+ local->stripe_size = stripe_get_matching_bs (loc->path, priv);
frame->local = local;
- local->inode = loc->inode;
+ local->inode = inode_ref (loc->inode);
loc_copy (&local->loc, loc);
+ local->xattr = dict_copy_with_ref (xdata, NULL);
+ local->mode = mode;
+ local->umask = umask;
+ local->rdev = rdev;
- /* Everytime in stripe lookup, all child nodes should
+ /* Every time in stripe lookup, all child nodes should
be looked up */
local->call_count = priv->child_count;
- while (trav) {
- STACK_WIND (frame, stripe_mknod_ifreg_cbk,
- trav->xlator, trav->xlator->fops->mknod,
- loc, mode, rdev);
- trav = trav->next;
+ if (priv->xattr_supported) {
+ dict = dict_new ();
+ if (!dict) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to allocate dict %s", loc->path);
+ }
+ need_unref = 1;
+
+ dict_copy (xdata, dict);
+
+ ret = stripe_xattr_request_build (this, dict,
+ local->stripe_size,
+ priv->child_count,
+ i, priv->coalesce);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to build xattr request");
+ } else {
+ dict = xdata;
}
- /* This case is handled, no need to continue further. */
+ STACK_WIND (frame, stripe_mknod_first_ifreg_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->mknod,
+ loc, mode, rdev, umask, dict);
+
+ if (dict && need_unref)
+ dict_unref (dict);
return 0;
}
STACK_WIND (frame, stripe_single_mknod_cbk,
FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod,
- loc, mode, rdev);
+ loc, mode, rdev, umask, xdata);
return 0;
err:
- STACK_UNWIND_STRICT (mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ STRIPE_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -1493,11 +1756,10 @@ int32_t
stripe_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
- inode_t *local_inode = NULL;
call_frame_t *prev = NULL;
if (!this || !frame || !frame->local || !cookie) {
@@ -1525,12 +1787,6 @@ stripe_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret >= 0) {
local->op_ret = 0;
- if (FIRST_CHILD(this) == prev->this) {
- local->inode = inode_ref (inode);
- local->stbuf = *buf;
- local->postparent = *postparent;
- local->preparent = *preparent;
- }
local->stbuf_blocks += buf->ia_blocks;
local->preparent_blocks += preparent->ia_blocks;
local->postparent_blocks += postparent->ia_blocks;
@@ -1546,12 +1802,7 @@ stripe_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
UNLOCK (&frame->lock);
if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- local_inode = local->inode;
-
- if (local->op_ret != -1) {
+ if (local->failed != -1) {
local->preparent.ia_blocks = local->preparent_blocks;
local->preparent.ia_size = local->preparent_size;
local->postparent.ia_blocks = local->postparent_blocks;
@@ -1559,20 +1810,79 @@ stripe_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->stbuf.ia_size = local->stbuf_size;
local->stbuf.ia_blocks = local->stbuf_blocks;
}
- STACK_UNWIND_STRICT (mkdir, frame, local->op_ret,
+ STRIPE_STACK_UNWIND (mkdir, frame, local->op_ret,
local->op_errno, local->inode,
&local->stbuf, &local->preparent,
- &local->postparent);
-
- if (local_inode)
- inode_unref (local_inode);
+ &local->postparent, NULL);
}
out:
return 0;
}
+
int32_t
-stripe_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode)
+stripe_first_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ xlator_list_t *trav = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ local = frame->local;
+ trav = this->children;
+
+ local->call_count--; /* first child is successful */
+ trav = trav->next; /* skip first child */
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG, "%s returned error %s",
+ prev->this->name, strerror (op_errno));
+ local->op_errno = op_errno;
+ goto out;
+ }
+
+ local->op_ret = 0;
+
+ local->inode = inode_ref (inode);
+ local->stbuf = *buf;
+ local->postparent = *postparent;
+ local->preparent = *preparent;
+
+ local->stbuf_blocks += buf->ia_blocks;
+ local->preparent_blocks += preparent->ia_blocks;
+ local->postparent_blocks += postparent->ia_blocks;
+
+ local->stbuf_size = buf->ia_size;
+ local->preparent_size = preparent->ia_size;
+ local->postparent_size = postparent->ia_size;
+
+ while (trav) {
+ STACK_WIND (frame, stripe_mkdir_cbk, trav->xlator,
+ trav->xlator->fops->mkdir, &local->loc, local->mode,
+ local->umask, local->xdata);
+ trav = trav->next;
+ }
+ return 0;
+out:
+ STRIPE_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL);
+
+ return 0;
+
+}
+
+
+int
+stripe_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
{
stripe_private_t *priv = NULL;
stripe_local_t *local = NULL;
@@ -1594,27 +1904,27 @@ stripe_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode)
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
local->op_ret = -1;
local->call_count = priv->child_count;
+ if (xdata)
+ local->xdata = dict_ref (xdata);
+ local->mode = mode;
+ local->umask = umask;
+ loc_copy (&local->loc, loc);
frame->local = local;
- /* Everytime in stripe lookup, all child nodes should be looked up */
- while (trav) {
- STACK_WIND (frame, stripe_mkdir_cbk,
- trav->xlator, trav->xlator->fops->mkdir,
- loc, mode);
- trav = trav->next;
- }
+ /* Every time in stripe lookup, all child nodes should be looked up */
+ STACK_WIND (frame, stripe_first_mkdir_cbk, trav->xlator,
+ trav->xlator->fops->mkdir, loc, mode, umask, xdata);
return 0;
err:
- STACK_UNWIND_STRICT (mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ STRIPE_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -1623,12 +1933,12 @@ int32_t
stripe_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
- inode_t *local_inode = NULL;
call_frame_t *prev = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
if (!this || !frame || !frame->local || !cookie) {
gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
@@ -1655,6 +1965,16 @@ stripe_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret >= 0) {
local->op_ret = 0;
+ if (IA_ISREG(inode->ia_type)) {
+ inode_ctx_get(inode, this, (uint64_t *) &fctx);
+ if (!fctx) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to get stripe context");
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
+ }
+
if (FIRST_CHILD(this) == prev->this) {
local->inode = inode_ref (inode);
local->stbuf = *buf;
@@ -1665,6 +1985,8 @@ stripe_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->preparent_blocks += preparent->ia_blocks;
local->postparent_blocks += postparent->ia_blocks;
+ correct_file_size(buf, fctx, prev);
+
if (local->stbuf_size < buf->ia_size)
local->stbuf_size = buf->ia_size;
if (local->preparent_size < preparent->ia_size)
@@ -1679,8 +2001,6 @@ stripe_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (local->failed)
local->op_ret = -1;
- local_inode = local->inode;
-
if (local->op_ret != -1) {
local->preparent.ia_blocks = local->preparent_blocks;
local->preparent.ia_size = local->preparent_size;
@@ -1689,20 +2009,17 @@ stripe_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->stbuf.ia_size = local->stbuf_size;
local->stbuf.ia_blocks = local->stbuf_blocks;
}
- STACK_UNWIND_STRICT (link, frame, local->op_ret,
+ STRIPE_STACK_UNWIND (link, frame, local->op_ret,
local->op_errno, local->inode,
&local->stbuf, &local->preparent,
- &local->postparent);
-
- if (local_inode)
- inode_unref (local_inode);
+ &local->postparent, NULL);
}
out:
return 0;
}
int32_t
-stripe_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
+stripe_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
@@ -1725,8 +2042,7 @@ stripe_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -1735,18 +2051,18 @@ stripe_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
frame->local = local;
local->call_count = priv->child_count;
- /* Everytime in stripe lookup, all child
+ /* Every time in stripe lookup, all child
nodes should be looked up */
while (trav) {
STACK_WIND (frame, stripe_link_cbk,
trav->xlator, trav->xlator->fops->link,
- oldloc, newloc);
+ oldloc, newloc, NULL);
trav = trav->next;
}
return 0;
err:
- STACK_UNWIND_STRICT (link, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ STRIPE_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -1754,12 +2070,10 @@ int32_t
stripe_create_fail_unlink_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
int32_t callcnt = 0;
- fd_t *lfd = NULL;
stripe_local_t *local = NULL;
- inode_t *local_inode = NULL;
if (!this || !frame || !frame->local) {
gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
@@ -1775,18 +2089,9 @@ stripe_create_fail_unlink_cbk (call_frame_t *frame, void *cookie,
UNLOCK (&frame->lock);
if (!callcnt) {
- local_inode = local->inode;
- lfd = local->fd;
-
- stripe_local_wipe (local);
- STACK_UNWIND_STRICT (create, frame, local->op_ret, local->op_errno,
+ STRIPE_STACK_UNWIND (create, frame, local->op_ret, local->op_errno,
local->fd, local->inode, &local->stbuf,
- &local->preparent, &local->postparent);
-
- if (local_inode)
- inode_unref (local_inode);
- if (lfd)
- fd_unref (lfd);
+ &local->preparent, &local->postparent, NULL);
}
out:
return 0;
@@ -1794,16 +2099,16 @@ out:
int32_t
-stripe_create_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+stripe_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ inode_t *inode, struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- inode_t *local_inode = NULL;
- fd_t *lfd = NULL;
+ int32_t callcnt = 0;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
+ call_frame_t *prev = NULL;
xlator_list_t *trav = NULL;
- int32_t callcnt = 0;
- call_frame_t *prev = NULL;
if (!this || !frame || !frame->local || !cookie) {
gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
@@ -1811,7 +2116,7 @@ stripe_create_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
prev = cookie;
- priv = this->private;
+ priv = this->private;
local = frame->local;
LOCK (&frame->lock);
@@ -1822,13 +2127,40 @@ stripe_create_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
gf_log (this->name, GF_LOG_DEBUG,
"%s returned error %s",
prev->this->name, strerror (op_errno));
- local->op_ret = -1;
+ local->failed = 1;
local->op_errno = op_errno;
}
+
+ if (op_ret >= 0) {
+ if (IA_ISREG(buf->ia_type)) {
+ if (stripe_ctx_handle(this, prev, local, xdata))
+ gf_log(this->name, GF_LOG_ERROR,
+ "Error getting fctx info from "
+ "dict");
+ }
+
+ local->op_ret = op_ret;
+
+ local->stbuf_blocks += buf->ia_blocks;
+ local->preparent_blocks += preparent->ia_blocks;
+ local->postparent_blocks += postparent->ia_blocks;
+
+ correct_file_size(buf, local->fctx, prev);
+
+ if (local->stbuf_size < buf->ia_size)
+ local->stbuf_size = buf->ia_size;
+ if (local->preparent_size < preparent->ia_size)
+ local->preparent_size = preparent->ia_size;
+ if (local->postparent_size < postparent->ia_size)
+ local->postparent_size = postparent->ia_size;
+ }
}
UNLOCK (&frame->lock);
if (!callcnt) {
+ if (local->failed)
+ local->op_ret = -1;
+
if (local->op_ret == -1) {
local->call_count = priv->child_count;
trav = this->children;
@@ -1837,44 +2169,57 @@ stripe_create_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
stripe_create_fail_unlink_cbk,
trav->xlator,
trav->xlator->fops->unlink,
- &local->loc);
+ &local->loc, 0, NULL);
trav = trav->next;
}
return 0;
}
- lfd = local->fd;
- local_inode = local->inode;
+ if (local->op_ret >= 0) {
+ local->preparent.ia_blocks = local->preparent_blocks;
+ local->preparent.ia_size = local->preparent_size;
+ local->postparent.ia_blocks = local->postparent_blocks;
+ local->postparent.ia_size = local->postparent_size;
+ local->stbuf.ia_size = local->stbuf_size;
+ local->stbuf.ia_blocks = local->stbuf_blocks;
- stripe_local_wipe (local);
- STACK_UNWIND_STRICT (create, frame, local->op_ret, local->op_errno,
- local->fd, local->inode, &local->stbuf,
- &local->preparent, &local->postparent);
+ stripe_copy_xl_array(local->fctx->xl_array,
+ priv->xl_array,
+ local->fctx->stripe_count);
+ inode_ctx_put(local->inode, this,
+ (uint64_t) local->fctx);
+ }
- if (local_inode)
- inode_unref (local_inode);
- if (lfd)
- fd_unref (lfd);
+ /* Create itself has failed.. so return
+ without setxattring */
+ STRIPE_STACK_UNWIND (create, frame, local->op_ret,
+ local->op_errno, local->fd,
+ local->inode, &local->stbuf,
+ &local->preparent, &local->postparent, NULL);
}
+
out:
return 0;
}
+
+
int32_t
-stripe_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+stripe_first_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, fd_t *fd,
inode_t *inode, struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- int32_t callcnt = 0;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
- fd_t *lfd = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- inode_t *local_inode = NULL;
call_frame_t *prev = NULL;
- int ret = 0;
+ xlator_list_t *trav = NULL;
+ int i = 1;
+ dict_t *dict = NULL;
+ loc_t *loc = NULL;
+ int32_t need_unref = 0;
+ int32_t ret = -1;
if (!this || !frame || !frame->local || !cookie) {
gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
@@ -1884,136 +2229,89 @@ stripe_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
prev = cookie;
priv = this->private;
local = frame->local;
+ trav = this->children;
+ loc = &local->loc;
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned error %s",
- prev->this->name, strerror (op_errno));
- if ((op_errno != ENOENT) ||
- (prev->this == FIRST_CHILD (this)))
- local->failed = 1;
- local->op_errno = op_errno;
- }
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- /* Get the mapping in inode private */
- /* Get the stat buf right */
- if (FIRST_CHILD(this) == prev->this) {
- local->stbuf = *buf;
- local->preparent = *preparent;
- local->postparent = *postparent;
- }
-
- local->stbuf_blocks += buf->ia_blocks;
- local->preparent_blocks += preparent->ia_blocks;
- local->postparent_blocks += postparent->ia_blocks;
+ --local->call_count;
- if (local->stbuf_size < buf->ia_size)
- local->stbuf_size = buf->ia_size;
- if (local->preparent_size < preparent->ia_size)
- local->preparent_size = preparent->ia_size;
- if (local->postparent_size < postparent->ia_size)
- local->postparent_size = postparent->ia_size;
- }
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG, "%s returned error %s",
+ prev->this->name, strerror (op_errno));
+ local->failed = 1;
+ local->op_errno = op_errno;
}
- UNLOCK (&frame->lock);
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret != -1) {
- local->preparent.ia_blocks = local->preparent_blocks;
- local->preparent.ia_size = local->preparent_size;
- local->postparent.ia_blocks = local->postparent_blocks;
- local->postparent.ia_size = local->postparent_size;
- local->stbuf.ia_size = local->stbuf_size;
- local->stbuf.ia_blocks = local->stbuf_blocks;
- }
-
- /* */
- if (local->op_ret >= 0) {
- fctx = GF_CALLOC (1, sizeof (stripe_fd_ctx_t),
- gf_stripe_mt_stripe_fd_ctx_t);
- if (!fctx) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- goto unwind;
- }
+ local->op_ret = 0;
+ /* Get the mapping in inode private */
+ /* Get the stat buf right */
+ local->stbuf = *buf;
+ local->preparent = *preparent;
+ local->postparent = *postparent;
- fctx->stripe_size = local->stripe_size;
- fctx->stripe_count = priv->child_count;
- fctx->static_array = 1;
- fctx->xl_array = priv->xl_array;
- fd_ctx_set (local->fd, this,
- (uint64_t)(long)fctx);
- }
-
- if ((local->op_ret != -1) &&
- local->stripe_size && priv->xattr_supported) {
- /* Send a setxattr request to nodes where
- the files are created */
- int32_t i = 0;
- char size_key[256] = {0,};
- char index_key[256] = {0,};
- char count_key[256] = {0,};
- dict_t *dict = NULL;
-
- sprintf (size_key,
- "trusted.%s.stripe-size", this->name);
- sprintf (count_key,
- "trusted.%s.stripe-count", this->name);
- sprintf (index_key,
- "trusted.%s.stripe-index", this->name);
+ local->stbuf_blocks += buf->ia_blocks;
+ local->preparent_blocks += preparent->ia_blocks;
+ local->postparent_blocks += postparent->ia_blocks;
- local->call_count = priv->child_count;
+ if (local->stbuf_size < buf->ia_size)
+ local->stbuf_size = buf->ia_size;
+ if (local->preparent_size < preparent->ia_size)
+ local->preparent_size = preparent->ia_size;
+ if (local->postparent_size < postparent->ia_size)
+ local->postparent_size = postparent->ia_size;
- for (i = 0; i < priv->child_count; i++) {
- dict = get_new_dict ();
- if (!dict) {
- gf_log (this->name, GF_LOG_ERROR,
- "error allocating dict");
- }
- dict_ref (dict);
+ if (local->failed)
+ local->op_ret = -1;
- /* TODO: check return values */
- ret = dict_set_int64 (dict, size_key,
- local->stripe_size);
- ret = dict_set_int32 (dict, count_key,
- priv->child_count);
- ret = dict_set_int32 (dict, index_key, i);
+ if (local->op_ret == -1) {
+ local->call_count = 1;
+ STACK_WIND (frame, stripe_create_fail_unlink_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->unlink,
+ &local->loc, 0, NULL);
+ return 0;
+ }
- STACK_WIND (frame, stripe_create_setxattr_cbk,
- priv->xl_array[i],
- priv->xl_array[i]->fops->setxattr,
- &local->loc, dict, 0);
+ if (local->op_ret >= 0) {
+ local->preparent.ia_blocks = local->preparent_blocks;
+ local->preparent.ia_size = local->preparent_size;
+ local->postparent.ia_blocks = local->postparent_blocks;
+ local->postparent.ia_size = local->postparent_size;
+ local->stbuf.ia_size = local->stbuf_size;
+ local->stbuf.ia_blocks = local->stbuf_blocks;
+ }
- dict_unref (dict);
+ /* Send a setxattr request to nodes where the
+ files are created */
+ trav = trav->next;
+ while (trav) {
+ if (priv->xattr_supported) {
+ dict = dict_new ();
+ if (!dict) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to allocate dict %s", loc->path);
}
- return 0;
- }
+ need_unref = 1;
-unwind:
- /* Create itself has failed.. so return
- without setxattring */
- lfd = local->fd;
- local_inode = local->inode;
+ dict_copy (local->xattr, dict);
- stripe_local_wipe (local);
- STACK_UNWIND_STRICT (create, frame, local->op_ret,
- local->op_errno, local->fd,
- local->inode, &local->stbuf,
- &local->preparent, &local->postparent);
+ ret = stripe_xattr_request_build (this, dict,
+ local->stripe_size,
+ priv->child_count,
+ i, priv->coalesce);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to build xattr request");
+ } else {
+ dict = local->xattr;
+ }
- if (local_inode)
- inode_unref (local_inode);
- if (lfd)
- fd_unref (lfd);
+ STACK_WIND (frame, stripe_create_cbk, trav->xlator,
+ trav->xlator->fops->create, &local->loc,
+ local->flags, local->mode, local->umask, local->fd,
+ dict);
+ trav = trav->next;
+ if (need_unref && dict)
+ dict_unref (dict);
+ i++;
}
out:
@@ -2021,6 +2319,7 @@ out:
}
+
/**
* stripe_create - If a block-size is specified for the 'name', create the
* file in all the child nodes. If not, create it in only first child.
@@ -2029,12 +2328,15 @@ out:
*/
int32_t
stripe_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, mode_t mode, fd_t *fd)
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
stripe_private_t *priv = NULL;
stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
int32_t op_errno = EINVAL;
+ int ret = 0;
+ int need_unref = 0;
+ int i = 0;
+ dict_t *dict = NULL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -2055,45 +2357,71 @@ stripe_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
local->op_ret = -1;
local->op_errno = ENOTCONN;
- local->stripe_size = stripe_get_matching_bs (loc->path,
- priv->pattern,
- priv->block_size);
+ local->stripe_size = stripe_get_matching_bs (loc->path, priv);
frame->local = local;
local->inode = inode_ref (loc->inode);
loc_copy (&local->loc, loc);
local->fd = fd_ref (fd);
+ local->flags = flags;
+ local->mode = mode;
+ local->umask = umask;
+ if (xdata)
+ local->xattr = dict_ref (xdata);
local->call_count = priv->child_count;
+ /* Send a setxattr request to nodes where the
+ files are created */
- trav = this->children;
- while (trav) {
- STACK_WIND (frame, stripe_create_cbk, trav->xlator,
- trav->xlator->fops->create, loc, flags, mode, fd);
- trav = trav->next;
+ if (priv->xattr_supported) {
+ dict = dict_new ();
+ if (!dict) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to allocate dict %s", loc->path);
+ }
+ need_unref = 1;
+
+ dict_copy (xdata, dict);
+
+ ret = stripe_xattr_request_build (this, dict,
+ local->stripe_size,
+ priv->child_count,
+ i, priv->coalesce);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to build xattr request");
+ } else {
+ dict = xdata;
}
+
+ STACK_WIND (frame, stripe_first_create_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->create, loc, flags, mode,
+ umask, fd, dict);
+
+ if (need_unref && dict)
+ dict_unref (dict);
+
+
return 0;
err:
- STACK_UNWIND_STRICT (create, frame, -1, op_errno, NULL, NULL, NULL,
- NULL, NULL);
+ STRIPE_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL, xdata);
return 0;
}
int32_t
stripe_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
- fd_t *lfd = NULL;
call_frame_t *prev = NULL;
if (!this || !frame || !frame->local || !cookie) {
@@ -2128,228 +2456,20 @@ stripe_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (local->failed)
local->op_ret = -1;
- if (local->op_ret == -1) {
- if (local->fctx) {
- if (!local->fctx->static_array)
- GF_FREE (local->fctx->xl_array);
- GF_FREE (local->fctx);
- }
- } else {
- fd_ctx_set (local->fd, this,
- (uint64_t)(long)local->fctx);
- }
-
- lfd = local->fd;
-
- stripe_local_wipe (local);
- STACK_UNWIND_STRICT (open, frame, local->op_ret,
- local->op_errno, local->fd);
- if (lfd)
- fd_unref (lfd);
-
+ STRIPE_STACK_UNWIND (open, frame, local->op_ret,
+ local->op_errno, local->fd, xdata);
}
out:
return 0;
}
-int32_t
-stripe_open_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
-{
- int32_t index = 0;
- int32_t callcnt = 0;
- char key[256] = {0,};
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- stripe_private_t *priv = NULL;
- data_t *data = NULL;
- call_frame_t *prev = NULL;
- fd_t *lfd = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = (call_frame_t *)cookie;
- priv = this->private;
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned error %s",
- prev->this->name, strerror (op_errno));
- local->op_ret = -1;
- if (local->op_errno != EIO)
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) ||
- (prev->this == FIRST_CHILD (this)))
- local->failed = 1;
- goto unlock;
- }
-
- if (!dict)
- goto unlock;
-
- if (!local->fctx) {
- local->fctx = GF_CALLOC (1, sizeof (stripe_fd_ctx_t),
- gf_stripe_mt_stripe_fd_ctx_t);
- if (!local->fctx) {
- local->op_errno = ENOMEM;
- local->op_ret = -1;
- goto unlock;
- }
-
- local->fctx->static_array = 0;
- }
- /* Stripe block size */
- sprintf (key, "trusted.%s.stripe-size", this->name);
- data = dict_get (dict, key);
- if (!data) {
- local->xattr_self_heal_needed = 1;
- } else {
- if (!local->fctx->stripe_size) {
- local->fctx->stripe_size =
- data_to_int64 (data);
- }
-
- if (local->fctx->stripe_size != data_to_int64 (data)) {
- gf_log (this->name, GF_LOG_WARNING,
- "stripe-size mismatch in blocks");
- local->xattr_self_heal_needed = 1;
- }
- }
- /* Stripe count */
- sprintf (key, "trusted.%s.stripe-count", this->name);
- data = dict_get (dict, key);
- if (!data) {
- local->xattr_self_heal_needed = 1;
- goto unlock;
- }
- if (!local->fctx->xl_array) {
- local->fctx->stripe_count = data_to_int32 (data);
- if (!local->fctx->stripe_count) {
- gf_log (this->name, GF_LOG_ERROR,
- "error with stripe-count xattr");
- local->op_ret = -1;
- local->op_errno = EIO;
- goto unlock;
- }
-
- local->fctx->xl_array =
- GF_CALLOC (local->fctx->stripe_count,
- sizeof (xlator_t *),
- gf_stripe_mt_xlator_t);
- if (!local->fctx->xl_array) {
- local->op_errno = ENOMEM;
- local->op_ret = -1;
- goto unlock;
- }
- }
- if (local->fctx->stripe_count != data_to_int32 (data)) {
- gf_log (this->name, GF_LOG_ERROR,
- "error with stripe-count xattr (%d != %d)",
- local->fctx->stripe_count, data_to_int32 (data));
- local->op_ret = -1;
- local->op_errno = EIO;
- goto unlock;
- }
-
- /* index */
- sprintf (key, "trusted.%s.stripe-index", this->name);
- data = dict_get (dict, key);
- if (!data) {
- local->xattr_self_heal_needed = 1;
- goto unlock;
- }
- index = data_to_int32 (data);
- if (index > priv->child_count) {
- gf_log (this->name, GF_LOG_ERROR,
- "error with stripe-index xattr (%d)", index);
- local->op_ret = -1;
- local->op_errno = EIO;
- goto unlock;
- }
- if (local->fctx->xl_array) {
- if (local->fctx->xl_array[index]) {
- gf_log (this->name, GF_LOG_ERROR,
- "duplicate entry @ index (%d)", index);
- local->op_ret = -1;
- local->op_errno = EIO;
- goto unlock;
- }
- local->fctx->xl_array[index] = prev->this;
- }
- local->entry_count++;
- local->op_ret = 0;
- }
-unlock:
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- /* TODO: if self-heal flag is set, do it */
- if (local->xattr_self_heal_needed) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: stripe info need to be healed",
- local->loc.path);
- }
-
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret)
- goto err;
-
- if (local->entry_count != local->fctx->stripe_count) {
- gf_log (this->name, GF_LOG_ERROR,
- "entry-count (%d) != stripe-count (%d)",
- local->entry_count, local->fctx->stripe_count);
- local->op_ret = -1;
- local->op_errno = EIO;
- goto err;
- }
- if (!local->fctx->stripe_size) {
- gf_log (this->name, GF_LOG_ERROR, "stripe size not set");
- local->op_ret = -1;
- local->op_errno = EIO;
- goto err;
- }
-
- local->call_count = local->fctx->stripe_count;
-
- trav = this->children;
- while (trav) {
- STACK_WIND (frame, stripe_open_cbk, trav->xlator,
- trav->xlator->fops->open, &local->loc,
- local->flags, local->fd, 0);
- trav = trav->next;
- }
- }
-
- return 0;
-err:
- lfd = local->fd;
-
- stripe_local_wipe (local);
- STACK_UNWIND_STRICT (open, frame, local->op_ret, local->op_errno,
- local->fd);
- if (lfd)
- fd_unref (lfd);
-out:
- return 0;
-}
-
/**
* stripe_open -
*/
int32_t
stripe_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, fd_t *fd, int32_t wbflags)
+ int32_t flags, fd_t *fd, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
@@ -2371,8 +2491,7 @@ stripe_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -2388,52 +2507,28 @@ stripe_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
/* Striped files */
local->flags = flags;
local->call_count = priv->child_count;
- local->stripe_size = stripe_get_matching_bs (loc->path,
- priv->pattern,
- priv->block_size);
-
- if (priv->xattr_supported) {
- while (trav) {
- STACK_WIND (frame, stripe_open_getxattr_cbk,
- trav->xlator, trav->xlator->fops->getxattr,
- loc, NULL);
- trav = trav->next;
- }
- return 0;
- }
- local->fctx = GF_CALLOC (1, sizeof (stripe_fd_ctx_t),
- gf_stripe_mt_stripe_fd_ctx_t);
- if (!local->fctx) {
- op_errno = ENOMEM;
- goto err;
- }
-
- local->fctx->static_array = 1;
- local->fctx->stripe_size = local->stripe_size;
- local->fctx->stripe_count = priv->child_count;
- local->fctx->xl_array = priv->xl_array;
+ local->stripe_size = stripe_get_matching_bs (loc->path, priv);
while (trav) {
STACK_WIND (frame, stripe_open_cbk, trav->xlator,
trav->xlator->fops->open,
&local->loc, local->flags, local->fd,
- wbflags);
+ xdata);
trav = trav->next;
}
return 0;
err:
- STACK_UNWIND_STRICT (open, frame, -1, op_errno, NULL);
+ STRIPE_STACK_UNWIND (open, frame, -1, op_errno, NULL, NULL);
return 0;
}
int32_t
stripe_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
- fd_t *local_fd = NULL;
call_frame_t *prev = NULL;
if (!this || !frame || !frame->local || !cookie) {
@@ -2462,11 +2557,8 @@ stripe_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
UNLOCK (&frame->lock);
if (!callcnt) {
- local_fd = local->fd;
- STACK_UNWIND_STRICT (opendir, frame, local->op_ret,
- local->op_errno, local->fd);
- if (local_fd)
- fd_unref (local_fd);
+ STRIPE_STACK_UNWIND (opendir, frame, local->op_ret,
+ local->op_errno, local->fd, NULL);
}
out:
return 0;
@@ -2474,7 +2566,7 @@ out:
int32_t
-stripe_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
+stripe_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, dict_t *xdata)
{
xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
@@ -2496,8 +2588,7 @@ stripe_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -2508,19 +2599,19 @@ stripe_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
while (trav) {
STACK_WIND (frame, stripe_opendir_cbk, trav->xlator,
- trav->xlator->fops->opendir, loc, fd);
+ trav->xlator->fops->opendir, loc, fd, NULL);
trav = trav->next;
}
return 0;
err:
- STACK_UNWIND_STRICT (opendir, frame, -1, op_errno, NULL);
+ STRIPE_STACK_UNWIND (opendir, frame, -1, op_errno, NULL, NULL);
return 0;
}
int32_t
stripe_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct flock *lock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -2559,8 +2650,8 @@ stripe_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (!callcnt) {
if (local->failed)
local->op_ret = -1;
- STACK_UNWIND_STRICT (lk, frame, local->op_ret,
- local->op_errno, &local->lock);
+ STRIPE_STACK_UNWIND (lk, frame, local->op_ret,
+ local->op_errno, &local->lock, NULL);
}
out:
return 0;
@@ -2568,7 +2659,7 @@ out:
int32_t
stripe_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
- struct flock *lock)
+ struct gf_flock *lock, dict_t *xdata)
{
stripe_local_t *local = NULL;
xlator_list_t *trav = NULL;
@@ -2584,8 +2675,7 @@ stripe_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
priv = this->private;
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -2596,20 +2686,20 @@ stripe_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
while (trav) {
STACK_WIND (frame, stripe_lk_cbk, trav->xlator,
- trav->xlator->fops->lk, fd, cmd, lock);
+ trav->xlator->fops->lk, fd, cmd, lock, NULL);
trav = trav->next;
}
return 0;
err:
- STACK_UNWIND_STRICT (lk, frame, -1, op_errno, NULL);
+ STRIPE_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL);
return 0;
}
int32_t
stripe_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -2645,16 +2735,15 @@ stripe_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (local->failed)
local->op_ret = -1;
- stripe_local_wipe (local);
- STACK_UNWIND_STRICT (flush, frame, local->op_ret,
- local->op_errno);
+ STRIPE_STACK_UNWIND (flush, frame, local->op_ret,
+ local->op_errno, NULL);
}
out:
return 0;
}
int32_t
-stripe_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
+stripe_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
@@ -2674,8 +2763,7 @@ stripe_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
goto err;
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -2686,13 +2774,13 @@ stripe_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
while (trav) {
STACK_WIND (frame, stripe_flush_cbk, trav->xlator,
- trav->xlator->fops->flush, fd);
+ trav->xlator->fops->flush, fd, NULL);
trav = trav->next;
}
return 0;
err:
- STACK_UNWIND_STRICT (flush, frame, -1, op_errno);
+ STRIPE_STACK_UNWIND (flush, frame, -1, op_errno, NULL);
return 0;
}
@@ -2701,7 +2789,7 @@ err:
int32_t
stripe_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -2737,6 +2825,9 @@ stripe_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->prebuf_blocks += prebuf->ia_blocks;
local->postbuf_blocks += postbuf->ia_blocks;
+ correct_file_size(prebuf, local->fctx, prev);
+ correct_file_size(postbuf, local->fctx, prev);
+
if (local->prebuf_size < prebuf->ia_size)
local->prebuf_size = prebuf->ia_size;
@@ -2757,21 +2848,21 @@ stripe_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->post_buf.ia_size = local->postbuf_size;
}
- stripe_local_wipe (local);
- STACK_UNWIND_STRICT (fsync, frame, local->op_ret,
+ STRIPE_STACK_UNWIND (fsync, frame, local->op_ret,
local->op_errno, &local->pre_buf,
- &local->post_buf);
+ &local->post_buf, NULL);
}
out:
return 0;
}
int32_t
-stripe_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
+stripe_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
xlator_list_t *trav = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
int32_t op_errno = 1;
VALIDATE_OR_GOTO (frame, err);
@@ -2783,31 +2874,38 @@ stripe_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
trav = this->children;
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
- local->op_ret = -1;
+
frame->local = local;
+
+ inode_ctx_get(fd->inode, this, (uint64_t *) &fctx);
+ if (!fctx) {
+ op_errno = EINVAL;
+ goto err;
+ }
+ local->fctx = fctx;
+ local->op_ret = -1;
local->call_count = priv->child_count;
while (trav) {
STACK_WIND (frame, stripe_fsync_cbk, trav->xlator,
- trav->xlator->fops->fsync, fd, flags);
+ trav->xlator->fops->fsync, fd, flags, NULL);
trav = trav->next;
}
return 0;
err:
- STACK_UNWIND_STRICT (fsync, frame, -1, op_errno, NULL, NULL);
+ STRIPE_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
int32_t
stripe_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -2842,6 +2940,9 @@ stripe_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->stbuf = *buf;
local->stbuf_blocks += buf->ia_blocks;
+
+ correct_file_size(buf, local->fctx, prev);
+
if (local->stbuf_size < buf->ia_size)
local->stbuf_size = buf->ia_size;
}
@@ -2857,9 +2958,8 @@ stripe_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->stbuf.ia_blocks = local->stbuf_blocks;
}
- stripe_local_wipe (local);
- STACK_UNWIND_STRICT (fstat, frame, local->op_ret,
- local->op_errno, &local->stbuf);
+ STRIPE_STACK_UNWIND (fstat, frame, local->op_ret,
+ local->op_errno, &local->stbuf, NULL);
}
out:
@@ -2869,11 +2969,12 @@ out:
int32_t
stripe_fstat (call_frame_t *frame,
xlator_t *this,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
xlator_list_t *trav = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
int32_t op_errno = 1;
VALIDATE_OR_GOTO (frame, err);
@@ -2885,8 +2986,7 @@ stripe_fstat (call_frame_t *frame,
trav = this->children;
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -2895,26 +2995,35 @@ stripe_fstat (call_frame_t *frame,
frame->local = local;
local->call_count = priv->child_count;
+ if (IA_ISREG(fd->inode->ia_type)) {
+ inode_ctx_get(fd->inode, this, (uint64_t *) &fctx);
+ if (!fctx)
+ goto err;
+ local->fctx = fctx;
+ }
+
while (trav) {
STACK_WIND (frame, stripe_fstat_cbk, trav->xlator,
- trav->xlator->fops->fstat, fd);
+ trav->xlator->fops->fstat, fd, NULL);
trav = trav->next;
}
return 0;
err:
- STACK_UNWIND_STRICT (fstat, frame, -1, op_errno, NULL);
+ STRIPE_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, NULL);
return 0;
}
int32_t
-stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
+stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- int32_t op_errno = 1;
+ stripe_fd_ctx_t *fctx = NULL;
+ int i, eof_idx;
+ off_t dest_offset, tmp_offset;
+ int32_t op_errno = 1;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -2922,11 +3031,9 @@ stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
VALIDATE_OR_GOTO (fd->inode, err);
priv = this->private;
- trav = this->children;
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -2935,22 +3042,60 @@ stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
frame->local = local;
local->call_count = priv->child_count;
- while (trav) {
- STACK_WIND (frame, stripe_truncate_cbk, trav->xlator,
- trav->xlator->fops->ftruncate, fd, offset);
- trav = trav->next;
- }
+ inode_ctx_get(fd->inode, this, (uint64_t *) &fctx);
+ if (!fctx) {
+ gf_log(this->name, GF_LOG_ERROR, "no stripe context");
+ op_errno = EINVAL;
+ goto err;
+ }
+ if (!fctx->stripe_count) {
+ gf_log(this->name, GF_LOG_ERROR, "no stripe count");
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->fctx = fctx;
+ eof_idx = (offset / fctx->stripe_size) % fctx->stripe_count;
+
+ for (i = 0; i < fctx->stripe_count; i++) {
+ if (!fctx->xl_array[i]) {
+ gf_log(this->name, GF_LOG_ERROR, "no xlator at index "
+ "%d", i);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (fctx->stripe_coalesce) {
+ if (i < eof_idx)
+ tmp_offset = roof(offset, fctx->stripe_size *
+ fctx->stripe_count);
+ else if (i > eof_idx)
+ tmp_offset = floor(offset, fctx->stripe_size *
+ fctx->stripe_count);
+ else
+ tmp_offset = offset;
+
+ dest_offset = coalesced_offset(tmp_offset,
+ fctx->stripe_size, fctx->stripe_count);
+ } else {
+ dest_offset = offset;
+ }
+
+ STACK_WIND(frame, stripe_truncate_cbk, fctx->xl_array[i],
+ fctx->xl_array[i]->fops->ftruncate, fd, dest_offset,
+ NULL);
+ }
return 0;
err:
- STACK_UNWIND_STRICT (ftruncate, frame, -1, op_errno, NULL, NULL);
+ STRIPE_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
int32_t
stripe_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -2986,16 +3131,15 @@ stripe_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (local->failed)
local->op_ret = -1;
- stripe_local_wipe (local);
- STACK_UNWIND_STRICT (fsyncdir, frame, local->op_ret,
- local->op_errno);
+ STRIPE_STACK_UNWIND (fsyncdir, frame, local->op_ret,
+ local->op_errno, NULL);
}
out:
return 0;
}
int32_t
-stripe_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
+stripe_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
@@ -3011,8 +3155,7 @@ stripe_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
trav = this->children;
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -3023,20 +3166,20 @@ stripe_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
while (trav) {
STACK_WIND (frame, stripe_fsyncdir_cbk, trav->xlator,
- trav->xlator->fops->fsyncdir, fd, flags);
+ trav->xlator->fops->fsyncdir, fd, flags, NULL);
trav = trav->next;
}
return 0;
err:
- STACK_UNWIND_STRICT (fsyncdir, frame, -1, op_errno);
+ STRIPE_STACK_UNWIND (fsyncdir, frame, -1, op_errno, NULL);
return 0;
}
int32_t
stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
int32_t i = 0;
int32_t callcnt = 0;
@@ -3046,6 +3189,7 @@ stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt tmp_stbuf = {0,};
struct iobref *tmp_iobref = NULL;
struct iobuf *iobuf = NULL;
+ call_frame_t *prev = NULL;
if (!this || !frame || !frame->local) {
gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
@@ -3053,13 +3197,16 @@ stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
local = frame->local;
+ prev = cookie;
LOCK (&frame->lock);
{
callcnt = --local->call_count;
- if (op_ret != -1)
+ if (op_ret != -1) {
+ correct_file_size(buf, local->fctx, prev);
if (local->stbuf_size < buf->ia_size)
local->stbuf_size = buf->ia_size;
+ }
}
UNLOCK (&frame->lock);
@@ -3088,7 +3235,8 @@ stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
vec[count].iov_len =
(local->replies[i].requested_size -
local->replies[i].op_ret);
- iobuf = iobuf_get (this->ctx->iobuf_pool);
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool,
+ vec[count].iov_len);
if (!iobuf) {
gf_log (this->name, GF_LOG_ERROR,
"Out of memory.");
@@ -3097,15 +3245,22 @@ stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto done;
}
memset (iobuf->ptr, 0, vec[count].iov_len);
- iobref_add (local->iobref, iobuf);
vec[count].iov_base = iobuf->ptr;
+ iobref_add (local->iobref, iobuf);
+ iobuf_unref(iobuf);
+
op_ret += vec[count].iov_len;
count++;
}
GF_FREE (local->replies[i].vector);
}
+ /* ENOENT signals EOF to the NFS-server */
+ if (op_ret != -1 && op_ret < local->readv_size &&
+ (local->offset + op_ret == buf->ia_size))
+ op_errno = ENOENT;
+
/* FIXME: notice that st_ino, and st_dev (gen) will be
* different than what inode will have. Make sure this doesn't
* cause any bugs at higher levels */
@@ -3116,13 +3271,11 @@ stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
done:
GF_FREE (local->replies);
tmp_iobref = local->iobref;
- fd_unref (local->fd);
- STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vec,
- count, &tmp_stbuf, tmp_iobref);
+ STRIPE_STACK_UNWIND (readv, frame, op_ret, op_errno, vec,
+ count, &tmp_stbuf, tmp_iobref, NULL);
iobref_unref (tmp_iobref);
- if (vec)
- GF_FREE (vec);
+ GF_FREE (vec);
}
out:
return 0;
@@ -3135,7 +3288,7 @@ out:
int32_t
stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *stbuf, struct iobref *iobref)
+ int32_t count, struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
{
int32_t index = 0;
int32_t callcnt = 0;
@@ -3146,8 +3299,10 @@ stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
stripe_local_t *local = NULL;
struct iovec *final_vec = NULL;
struct iatt tmp_stbuf = {0,};
+ struct iatt *tmp_stbuf_p = NULL; //need it for a warning
struct iobref *tmp_iobref = NULL;
stripe_fd_ctx_t *fctx = NULL;
+ call_frame_t *prev = NULL;
if (!this || !frame || !frame->local || !cookie) {
gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
@@ -3156,6 +3311,7 @@ stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
index = local->node_index;
+ prev = cookie;
mframe = local->orig_frame;
if (!mframe)
goto out;
@@ -3176,6 +3332,12 @@ stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
mlocal->replies[index].count = count;
mlocal->replies[index].vector = iov_dup (vector, count);
+ correct_file_size(stbuf, fctx, prev);
+
+ if (local->stbuf_size < stbuf->ia_size)
+ local->stbuf_size = stbuf->ia_size;
+ local->stbuf_blocks += stbuf->ia_blocks;
+
if (!mlocal->iobref)
mlocal->iobref = iobref_new ();
iobref_merge (mlocal->iobref, iobref);
@@ -3232,18 +3394,21 @@ stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
* cause any bugs at higher levels */
memcpy (&tmp_stbuf, &mlocal->replies[0].stbuf,
sizeof (struct iatt));
+ tmp_stbuf.ia_size = local->stbuf_size;
+ tmp_stbuf.ia_blocks = local->stbuf_blocks;
done:
/* */
GF_FREE (mlocal->replies);
tmp_iobref = mlocal->iobref;
- fd_unref (mlocal->fd);
- STACK_UNWIND_STRICT (readv, mframe, op_ret, op_errno, final_vec,
- final_count, &tmp_stbuf, tmp_iobref);
+ /* work around for nfs truncated read. Bug 3774 */
+ tmp_stbuf_p = &tmp_stbuf;
+ WIPE (tmp_stbuf_p);
+ STRIPE_STACK_UNWIND (readv, mframe, op_ret, op_errno, final_vec,
+ final_count, &tmp_stbuf, tmp_iobref, NULL);
iobref_unref (tmp_iobref);
- if (final_vec)
- GF_FREE (final_vec);
+ GF_FREE (final_vec);
}
goto out;
@@ -3255,11 +3420,11 @@ check_size:
STACK_WIND (mframe, stripe_readv_fstat_cbk,
(fctx->xl_array[index]),
(fctx->xl_array[index])->fops->fstat,
- mlocal->fd);
+ mlocal->fd, NULL);
}
out:
- STACK_DESTROY (frame->root);
+ STRIPE_STACK_DESTROY (frame);
end:
return 0;
}
@@ -3267,7 +3432,7 @@ end:
int32_t
stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t offset)
+ size_t size, off_t offset, uint32_t flags, dict_t *xdata)
{
int32_t op_errno = EINVAL;
int32_t idx = 0;
@@ -3280,10 +3445,10 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
uint64_t stripe_size = 0;
off_t rounded_start = 0;
off_t frame_offset = offset;
+ off_t dest_offset = 0;
stripe_local_t *local = NULL;
call_frame_t *rframe = NULL;
stripe_local_t *rlocal = NULL;
- xlator_list_t *trav = NULL;
stripe_fd_ctx_t *fctx = NULL;
VALIDATE_OR_GOTO (frame, err);
@@ -3291,9 +3456,7 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
VALIDATE_OR_GOTO (fd, err);
VALIDATE_OR_GOTO (fd->inode, err);
- trav = this->children;
-
- fd_ctx_get (fd, this, &tmp_fctx);
+ inode_ctx_get (fd->inode, this, &tmp_fctx);
if (!tmp_fctx) {
op_errno = EBADFD;
goto err;
@@ -3301,6 +3464,8 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
stripe_size = fctx->stripe_size;
+ STRIPE_VALIDATE_FCTX (fctx, err);
+
if (!stripe_size) {
gf_log (this->name, GF_LOG_DEBUG,
"Wrong stripe size for the file");
@@ -3315,8 +3480,7 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
rounded_end = roof (offset+size, stripe_size);
num_stripe = (rounded_end- rounded_start)/stripe_size;
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -3324,8 +3488,8 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
frame->local = local;
/* This is where all the vectors should be copied. */
- local->replies = GF_CALLOC (num_stripe, sizeof (struct readv_replies),
- gf_stripe_mt_readv_replies);
+ local->replies = GF_CALLOC (num_stripe, sizeof (struct stripe_replies),
+ gf_stripe_mt_stripe_replies);
if (!local->replies) {
op_errno = ENOMEM;
goto err;
@@ -3340,8 +3504,7 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
for (index = off_index; index < (num_stripe + off_index); index++) {
rframe = copy_frame (frame);
- rlocal = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ rlocal = mem_get0 (this->local_pool);
if (!rlocal) {
op_errno = ENOMEM;
goto err;
@@ -3355,19 +3518,26 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
rlocal->readv_size = frame_size;
rframe->local = rlocal;
idx = (index % fctx->stripe_count);
+
+ if (fctx->stripe_coalesce)
+ dest_offset = coalesced_offset(frame_offset,
+ stripe_size, fctx->stripe_count);
+ else
+ dest_offset = frame_offset;
+
STACK_WIND (rframe, stripe_readv_cbk, fctx->xl_array[idx],
fctx->xl_array[idx]->fops->readv,
- fd, frame_size, frame_offset);
+ fd, frame_size, dest_offset, flags, xdata);
frame_offset += frame_size;
}
return 0;
err:
- if (local && local->fd)
- fd_unref (local->fd);
+ if (rframe)
+ STRIPE_STACK_DESTROY (rframe);
- STACK_UNWIND_STRICT (readv, frame, -1, op_errno, NULL, 0, NULL, NULL);
+ STRIPE_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
return 0;
}
@@ -3375,11 +3545,15 @@ err:
int32_t
stripe_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
+ stripe_local_t *mlocal = NULL;
call_frame_t *prev = NULL;
+ call_frame_t *mframe = NULL;
+ struct stripe_replies *reply = NULL;
+ int32_t i = 0;
if (!this || !frame || !frame->local || !cookie) {
gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
@@ -3388,44 +3562,85 @@ stripe_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
prev = cookie;
local = frame->local;
+ mframe = local->orig_frame;
+ mlocal = mframe->local;
LOCK(&frame->lock);
{
- callcnt = ++local->call_count;
+ callcnt = ++mlocal->call_count;
+
+ mlocal->replies[local->node_index].op_ret = op_ret;
+ mlocal->replies[local->node_index].op_errno = op_errno;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned error %s",
- prev->this->name, strerror (op_errno));
- local->op_errno = op_errno;
- local->op_ret = -1;
- }
if (op_ret >= 0) {
- local->op_ret += op_ret;
- local->post_buf = *postbuf;
- local->pre_buf = *prebuf;
+ mlocal->post_buf = *postbuf;
+ mlocal->pre_buf = *prebuf;
+
+ mlocal->prebuf_blocks += prebuf->ia_blocks;
+ mlocal->postbuf_blocks += postbuf->ia_blocks;
+
+ correct_file_size(prebuf, mlocal->fctx, prev);
+ correct_file_size(postbuf, mlocal->fctx, prev);
+
+ if (mlocal->prebuf_size < prebuf->ia_size)
+ mlocal->prebuf_size = prebuf->ia_size;
+ if (mlocal->postbuf_size < postbuf->ia_size)
+ mlocal->postbuf_size = postbuf->ia_size;
}
}
UNLOCK (&frame->lock);
- if ((callcnt == local->wind_count) && local->unwind) {
- STACK_UNWIND_STRICT (writev, frame, local->op_ret,
- local->op_errno, &local->pre_buf,
- &local->post_buf);
+ if ((callcnt == mlocal->wind_count) && mlocal->unwind) {
+ mlocal->pre_buf.ia_size = mlocal->prebuf_size;
+ mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks;
+ mlocal->post_buf.ia_size = mlocal->postbuf_size;
+ mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks;
+
+ /*
+ * Only return the number of consecutively written bytes up until
+ * the first error. Only return an error if it occurs first.
+ *
+ * When a short write occurs, the application should retry at the
+ * appropriate offset, at which point we'll potentially pass back
+ * the error.
+ */
+ for (i = 0, reply = mlocal->replies; i < mlocal->wind_count;
+ i++, reply++) {
+ if (reply->op_ret == -1) {
+ gf_log(this->name, GF_LOG_DEBUG, "reply %d "
+ "returned error %s", i,
+ strerror(reply->op_errno));
+ if (!mlocal->op_ret) {
+ mlocal->op_ret = -1;
+ mlocal->op_errno = reply->op_errno;
+ }
+ break;
+ }
+
+ mlocal->op_ret += reply->op_ret;
+
+ if (reply->op_ret < reply->requested_size)
+ break;
+ }
+
+ GF_FREE(mlocal->replies);
+
+ STRIPE_STACK_UNWIND (writev, mframe, mlocal->op_ret,
+ mlocal->op_errno, &mlocal->pre_buf,
+ &mlocal->post_buf, NULL);
}
out:
+ STRIPE_STACK_DESTROY(frame);
return 0;
}
int32_t
stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int32_t count, off_t offset,
- struct iobref *iobref)
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
- struct iovec *tmp_vec = vector;
- stripe_private_t *priv = NULL;
+ struct iovec *tmp_vec = NULL;
stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
stripe_fd_ctx_t *fctx = NULL;
int32_t op_errno = 1;
int32_t idx = 0;
@@ -3436,15 +3651,19 @@ stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
off_t fill_size = 0;
uint64_t stripe_size = 0;
uint64_t tmp_fctx = 0;
+ off_t dest_offset = 0;
+ off_t rounded_start = 0;
+ off_t rounded_end = 0;
+ int32_t total_chunks = 0;
+ call_frame_t *wframe = NULL;
+ stripe_local_t *wlocal = NULL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
VALIDATE_OR_GOTO (fd->inode, err);
- priv = this->private;
-
- fd_ctx_get (fd, this, &tmp_fctx);
+ inode_ctx_get (fd->inode, this, &tmp_fctx);
if (!tmp_fctx) {
op_errno = EINVAL;
goto err;
@@ -3452,26 +3671,53 @@ stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
stripe_size = fctx->stripe_size;
+ STRIPE_VALIDATE_FCTX (fctx, err);
+
/* File has to be stripped across the child nodes */
for (idx = 0; idx< count; idx ++) {
- total_size += tmp_vec[idx].iov_len;
+ total_size += vector[idx].iov_len;
}
remaining_size = total_size;
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
frame->local = local;
local->stripe_size = stripe_size;
+ local->fctx = fctx;
+ if (!stripe_size) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Wrong stripe size for the file");
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ rounded_start = floor(offset, stripe_size);
+ rounded_end = roof(offset + total_size, stripe_size);
+ total_chunks = (rounded_end - rounded_start) / stripe_size;
+ local->replies = GF_CALLOC(total_chunks, sizeof(struct stripe_replies),
+ gf_stripe_mt_stripe_replies);
+ if (!local->replies) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ total_chunks = 0;
while (1) {
+ wframe = copy_frame(frame);
+ wlocal = mem_get0(this->local_pool);
+ if (!wlocal) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ wlocal->orig_frame = frame;
+ wframe->local = wlocal;
+
/* Send striped chunk of the vector to child
nodes appropriately. */
- trav = this->children;
-
idx = (((offset + offset_offset) /
local->stripe_size) % fctx->stripe_count);
@@ -3497,32 +3743,565 @@ stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
if (remaining_size == 0)
local->unwind = 1;
- STACK_WIND (frame, stripe_writev_cbk, fctx->xl_array[idx],
+ /*
+ * Store off the request index (with respect to the chunk of the
+ * initial offset) and the size of the request. This is required
+ * in the callback to calculate an appropriate return value in
+ * the event of a write failure in one or more requests.
+ */
+ wlocal->node_index = total_chunks;
+ local->replies[total_chunks].requested_size = fill_size;
+
+ dest_offset = offset + offset_offset;
+ if (fctx->stripe_coalesce)
+ dest_offset = coalesced_offset(dest_offset,
+ local->stripe_size, fctx->stripe_count);
+
+ STACK_WIND (wframe, stripe_writev_cbk, fctx->xl_array[idx],
fctx->xl_array[idx]->fops->writev, fd, tmp_vec,
- tmp_count, offset + offset_offset, iobref);
+ tmp_count, dest_offset, flags, iobref,
+ xdata);
+
GF_FREE (tmp_vec);
offset_offset += fill_size;
+ total_chunks++;
if (remaining_size == 0)
break;
}
return 0;
err:
- STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL);
+ if (wframe)
+ STRIPE_STACK_DESTROY(wframe);
+
+ STRIPE_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
int32_t
-stripe_release (xlator_t *this, fd_t *fd)
+stripe_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ stripe_local_t *mlocal = NULL;
+ call_frame_t *prev = NULL;
+ call_frame_t *mframe = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ local = frame->local;
+ mframe = local->orig_frame;
+ mlocal = mframe->local;
+
+ LOCK(&frame->lock);
+ {
+ callcnt = ++mlocal->call_count;
+
+ if (op_ret == 0) {
+ mlocal->post_buf = *postbuf;
+ mlocal->pre_buf = *prebuf;
+
+ mlocal->prebuf_blocks += prebuf->ia_blocks;
+ mlocal->postbuf_blocks += postbuf->ia_blocks;
+
+ correct_file_size(prebuf, mlocal->fctx, prev);
+ correct_file_size(postbuf, mlocal->fctx, prev);
+
+ if (mlocal->prebuf_size < prebuf->ia_size)
+ mlocal->prebuf_size = prebuf->ia_size;
+ if (mlocal->postbuf_size < postbuf->ia_size)
+ mlocal->postbuf_size = postbuf->ia_size;
+ }
+
+ /* return the first failure */
+ if (mlocal->op_ret == 0) {
+ mlocal->op_ret = op_ret;
+ mlocal->op_errno = op_errno;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if ((callcnt == mlocal->wind_count) && mlocal->unwind) {
+ mlocal->pre_buf.ia_size = mlocal->prebuf_size;
+ mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks;
+ mlocal->post_buf.ia_size = mlocal->postbuf_size;
+ mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks;
+
+ STRIPE_STACK_UNWIND (fallocate, mframe, mlocal->op_ret,
+ mlocal->op_errno, &mlocal->pre_buf,
+ &mlocal->post_buf, NULL);
+ }
+out:
+ STRIPE_STACK_DESTROY(frame);
+ return 0;
+}
+
+int32_t
+stripe_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
+ int32_t op_errno = 1;
+ int32_t idx = 0;
+ int32_t offset_offset = 0;
+ int32_t remaining_size = 0;
+ off_t fill_size = 0;
+ uint64_t stripe_size = 0;
uint64_t tmp_fctx = 0;
+ off_t dest_offset = 0;
+ call_frame_t *fframe = NULL;
+ stripe_local_t *flocal = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+
+ inode_ctx_get (fd->inode, this, &tmp_fctx);
+ if (!tmp_fctx) {
+ op_errno = EINVAL;
+ goto err;
+ }
+ fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
+ stripe_size = fctx->stripe_size;
+
+ STRIPE_VALIDATE_FCTX (fctx, err);
+
+ remaining_size = len;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ frame->local = local;
+ local->stripe_size = stripe_size;
+ local->fctx = fctx;
+
+ if (!stripe_size) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Wrong stripe size for the file");
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ while (1) {
+ fframe = copy_frame(frame);
+ flocal = mem_get0(this->local_pool);
+ if (!flocal) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ flocal->orig_frame = frame;
+ fframe->local = flocal;
+
+ /* send fallocate request to the associated child node */
+ idx = (((offset + offset_offset) /
+ local->stripe_size) % fctx->stripe_count);
+
+ fill_size = (local->stripe_size -
+ ((offset + offset_offset) % local->stripe_size));
+ if (fill_size > remaining_size)
+ fill_size = remaining_size;
+
+ remaining_size -= fill_size;
+
+ local->wind_count++;
+ if (remaining_size == 0)
+ local->unwind = 1;
+
+ dest_offset = offset + offset_offset;
+ if (fctx->stripe_coalesce)
+ dest_offset = coalesced_offset(dest_offset,
+ local->stripe_size, fctx->stripe_count);
+
+ /*
+ * TODO: Create a separate handler for coalesce mode that sends a
+ * single fallocate per-child (since the ranges are linear).
+ */
+ STACK_WIND(fframe, stripe_fallocate_cbk, fctx->xl_array[idx],
+ fctx->xl_array[idx]->fops->fallocate, fd, mode,
+ dest_offset, fill_size, xdata);
+
+ offset_offset += fill_size;
+ if (remaining_size == 0)
+ break;
+ }
+
+ return 0;
+err:
+ if (fframe)
+ STRIPE_STACK_DESTROY(fframe);
+
+ STRIPE_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int32_t
+stripe_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ stripe_local_t *mlocal = NULL;
+ call_frame_t *prev = NULL;
+ call_frame_t *mframe = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ local = frame->local;
+ mframe = local->orig_frame;
+ mlocal = mframe->local;
+
+ LOCK(&frame->lock);
+ {
+ callcnt = ++mlocal->call_count;
+
+ if (op_ret == 0) {
+ mlocal->post_buf = *postbuf;
+ mlocal->pre_buf = *prebuf;
+
+ mlocal->prebuf_blocks += prebuf->ia_blocks;
+ mlocal->postbuf_blocks += postbuf->ia_blocks;
+
+ correct_file_size(prebuf, mlocal->fctx, prev);
+ correct_file_size(postbuf, mlocal->fctx, prev);
+
+ if (mlocal->prebuf_size < prebuf->ia_size)
+ mlocal->prebuf_size = prebuf->ia_size;
+ if (mlocal->postbuf_size < postbuf->ia_size)
+ mlocal->postbuf_size = postbuf->ia_size;
+ }
+
+ /* return the first failure */
+ if (mlocal->op_ret == 0) {
+ mlocal->op_ret = op_ret;
+ mlocal->op_errno = op_errno;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if ((callcnt == mlocal->wind_count) && mlocal->unwind) {
+ mlocal->pre_buf.ia_size = mlocal->prebuf_size;
+ mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks;
+ mlocal->post_buf.ia_size = mlocal->postbuf_size;
+ mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks;
+
+ STRIPE_STACK_UNWIND (discard, mframe, mlocal->op_ret,
+ mlocal->op_errno, &mlocal->pre_buf,
+ &mlocal->post_buf, NULL);
+ }
+out:
+ STRIPE_STACK_DESTROY(frame);
+ return 0;
+}
+
+int32_t
+stripe_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
stripe_fd_ctx_t *fctx = NULL;
+ int32_t op_errno = 1;
+ int32_t idx = 0;
+ int32_t offset_offset = 0;
+ int32_t remaining_size = 0;
+ off_t fill_size = 0;
+ uint64_t stripe_size = 0;
+ uint64_t tmp_fctx = 0;
+ off_t dest_offset = 0;
+ call_frame_t *fframe = NULL;
+ stripe_local_t *flocal = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+
+ inode_ctx_get (fd->inode, this, &tmp_fctx);
+ if (!tmp_fctx) {
+ op_errno = EINVAL;
+ goto err;
+ }
+ fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
+ stripe_size = fctx->stripe_size;
+
+ STRIPE_VALIDATE_FCTX (fctx, err);
+
+ remaining_size = len;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ frame->local = local;
+ local->stripe_size = stripe_size;
+ local->fctx = fctx;
+
+ if (!stripe_size) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Wrong stripe size for the file");
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ while (1) {
+ fframe = copy_frame(frame);
+ flocal = mem_get0(this->local_pool);
+ if (!flocal) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ flocal->orig_frame = frame;
+ fframe->local = flocal;
+
+ /* send discard request to the associated child node */
+ idx = (((offset + offset_offset) /
+ local->stripe_size) % fctx->stripe_count);
+
+ fill_size = (local->stripe_size -
+ ((offset + offset_offset) % local->stripe_size));
+ if (fill_size > remaining_size)
+ fill_size = remaining_size;
+
+ remaining_size -= fill_size;
+
+ local->wind_count++;
+ if (remaining_size == 0)
+ local->unwind = 1;
+
+ dest_offset = offset + offset_offset;
+ if (fctx->stripe_coalesce)
+ dest_offset = coalesced_offset(dest_offset,
+ local->stripe_size, fctx->stripe_count);
+
+ /*
+ * TODO: Create a separate handler for coalesce mode that sends a
+ * single discard per-child (since the ranges are linear).
+ */
+ STACK_WIND(fframe, stripe_discard_cbk, fctx->xl_array[idx],
+ fctx->xl_array[idx]->fops->discard, fd, dest_offset,
+ fill_size, xdata);
+
+ offset_offset += fill_size;
+ if (remaining_size == 0)
+ break;
+ }
+
+ return 0;
+err:
+ if (fframe)
+ STRIPE_STACK_DESTROY(fframe);
+
+ STRIPE_STACK_UNWIND (discard, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+stripe_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ stripe_local_t *mlocal = NULL;
+ call_frame_t *prev = NULL;
+ call_frame_t *mframe = NULL;
+
+ GF_ASSERT (frame);
+
+ if (!this || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ local = frame->local;
+ mframe = local->orig_frame;
+ mlocal = mframe->local;
+
+ LOCK(&frame->lock);
+ {
+ callcnt = ++mlocal->call_count;
+
+ if (op_ret == 0) {
+ mlocal->post_buf = *postbuf;
+ mlocal->pre_buf = *prebuf;
+
+ mlocal->prebuf_blocks += prebuf->ia_blocks;
+ mlocal->postbuf_blocks += postbuf->ia_blocks;
+
+ correct_file_size(prebuf, mlocal->fctx, prev);
+ correct_file_size(postbuf, mlocal->fctx, prev);
+
+ if (mlocal->prebuf_size < prebuf->ia_size)
+ mlocal->prebuf_size = prebuf->ia_size;
+ if (mlocal->postbuf_size < postbuf->ia_size)
+ mlocal->postbuf_size = postbuf->ia_size;
+ }
+
+ /* return the first failure */
+ if (mlocal->op_ret == 0) {
+ mlocal->op_ret = op_ret;
+ mlocal->op_errno = op_errno;
+ }
+ }
+ UNLOCK (&frame->lock);
+ if ((callcnt == mlocal->wind_count) && mlocal->unwind) {
+ mlocal->pre_buf.ia_size = mlocal->prebuf_size;
+ mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks;
+ mlocal->post_buf.ia_size = mlocal->postbuf_size;
+ mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks;
+
+ STRIPE_STACK_UNWIND (zerofill, mframe, mlocal->op_ret,
+ mlocal->op_errno, &mlocal->pre_buf,
+ &mlocal->post_buf, NULL);
+ }
+out:
+ STRIPE_STACK_DESTROY(frame);
+ return 0;
+}
+
+int32_t
+stripe_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
+ int32_t op_errno = 1;
+ int32_t idx = 0;
+ int32_t offset_offset = 0;
+ int32_t remaining_size = 0;
+ off_t fill_size = 0;
+ uint64_t stripe_size = 0;
+ uint64_t tmp_fctx = 0;
+ off_t dest_offset = 0;
+ call_frame_t *fframe = NULL;
+ stripe_local_t *flocal = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
- fd_ctx_del (fd, this, &tmp_fctx);
+ inode_ctx_get (fd->inode, this, &tmp_fctx);
+ if (!tmp_fctx) {
+ op_errno = EINVAL;
+ goto err;
+ }
+ fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
+ stripe_size = fctx->stripe_size;
+
+ STRIPE_VALIDATE_FCTX (fctx, err);
+
+ remaining_size = len;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ frame->local = local;
+ local->stripe_size = stripe_size;
+ local->fctx = fctx;
+
+ if (!stripe_size) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Wrong stripe size for the file");
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ while (1) {
+ fframe = copy_frame(frame);
+ flocal = mem_get0(this->local_pool);
+ if (!flocal) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ flocal->orig_frame = frame;
+ fframe->local = flocal;
+
+ idx = (((offset + offset_offset) /
+ local->stripe_size) % fctx->stripe_count);
+
+ fill_size = (local->stripe_size -
+ ((offset + offset_offset) % local->stripe_size));
+ if (fill_size > remaining_size)
+ fill_size = remaining_size;
+
+ remaining_size -= fill_size;
+
+ local->wind_count++;
+ if (remaining_size == 0)
+ local->unwind = 1;
+
+ dest_offset = offset + offset_offset;
+ if (fctx->stripe_coalesce)
+ dest_offset = coalesced_offset(dest_offset,
+ local->stripe_size,
+ fctx->stripe_count);
+
+ STACK_WIND(fframe, stripe_zerofill_cbk, fctx->xl_array[idx],
+ fctx->xl_array[idx]->fops->zerofill, fd,
+ dest_offset, fill_size, xdata);
+ offset_offset += fill_size;
+ if (remaining_size == 0)
+ break;
+ }
+
+ return 0;
+err:
+ if (fframe)
+ STRIPE_STACK_DESTROY(fframe);
+
+ STRIPE_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+stripe_seek (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata)
+{
+ /* TBD */
+ gf_log (this->name, GF_LOG_INFO, "seek called on %s.",
+ uuid_utoa (fd->inode->gfid));
+ STRIPE_STACK_UNWIND (seek, frame, -1, ENOTSUP, 0, NULL);
+ return 0;
+}
+
+int32_t
+stripe_release (xlator_t *this, fd_t *fd)
+{
+ return 0;
+}
+
+int
+stripe_forget (xlator_t *this, inode_t *inode)
+{
+ uint64_t tmp_fctx = 0;
+ stripe_fd_ctx_t *fctx = NULL;
+
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (inode, err);
+
+ (void) inode_ctx_del (inode, this, &tmp_fctx);
if (!tmp_fctx) {
goto err;
}
@@ -3533,18 +4312,17 @@ stripe_release (xlator_t *this, fd_t *fd)
GF_FREE (fctx->xl_array);
GF_FREE (fctx);
-
err:
- return 0;
+ return 0;
}
-
int32_t
notify (xlator_t *this, int32_t event, void *data, ...)
{
stripe_private_t *priv = NULL;
int down_client = 0;
int i = 0;
+ gf_boolean_t heard_from_all_children = _gf_false;
if (!this)
return 0;
@@ -3562,23 +4340,28 @@ notify (xlator_t *this, int32_t event, void *data, ...)
if (data == priv->xl_array[i])
break;
}
- priv->state[i] = 1;
- for (i = 0; i < priv->child_count; i++) {
- if (!priv->state[i])
- down_client++;
+
+ if (priv->child_count == i) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "got GF_EVENT_CHILD_UP bad subvolume %s",
+ data? ((xlator_t *)data)->name: NULL);
+ break;
}
LOCK (&priv->lock);
{
- priv->nodes_down = down_client;
if (data == FIRST_CHILD (this))
priv->first_child_down = 0;
- if (!priv->nodes_down)
- default_notify (this, event, data);
+ priv->last_event[i] = event;
}
UNLOCK (&priv->lock);
}
break;
+ case GF_EVENT_CHILD_CONNECTING:
+ {
+ // 'CONNECTING' doesn't ensure its CHILD_UP, so do nothing
+ goto out;
+ }
case GF_EVENT_CHILD_DOWN:
{
/* get an index number to set */
@@ -3586,20 +4369,19 @@ notify (xlator_t *this, int32_t event, void *data, ...)
if (data == priv->xl_array[i])
break;
}
- priv->state[i] = 0;
- for (i = 0; i < priv->child_count; i++) {
- if (!priv->state[i])
- down_client++;
+
+ if (priv->child_count == i) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "got GF_EVENT_CHILD_DOWN bad subvolume %s",
+ data? ((xlator_t *)data)->name: NULL);
+ break;
}
LOCK (&priv->lock);
{
- priv->nodes_down = down_client;
-
if (data == FIRST_CHILD (this))
priv->first_child_down = 1;
- if (priv->nodes_down)
- default_notify (this, event, data);
+ priv->last_event[i] = event;
}
UNLOCK (&priv->lock);
}
@@ -3609,68 +4391,252 @@ notify (xlator_t *this, int32_t event, void *data, ...)
{
/* */
default_notify (this, event, data);
+ goto out;
}
break;
}
+ // Consider child as down if it's last_event is not CHILD_UP
+ for (i = 0, down_client = 0; i < priv->child_count; i++)
+ if (priv->last_event[i] != GF_EVENT_CHILD_UP)
+ down_client++;
+
+ LOCK (&priv->lock);
+ {
+ priv->nodes_down = down_client;
+ }
+ UNLOCK (&priv->lock);
+
+ heard_from_all_children = _gf_true;
+ for (i = 0; i < priv->child_count; i++)
+ if (!priv->last_event[i])
+ heard_from_all_children = _gf_false;
+
+ if (heard_from_all_children)
+ default_notify (this, event, data);
+out:
return 0;
}
int
-set_stripe_block_size (xlator_t *this, stripe_private_t *priv, char *data)
+stripe_setxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno, dict_t *xdata)
{
- int ret = -1;
- char *tmp_str = NULL;
- char *tmp_str1 = NULL;
- char *dup_str = NULL;
- char *stripe_str = NULL;
- char *pattern = NULL;
- char *num = NULL;
- struct stripe_options *temp_stripeopt = NULL;
- struct stripe_options *stripe_opt = NULL;
-
- if (!this || !priv || !data)
- goto out;
-
- /* Get the pattern for striping.
- "option block-size *avi:10MB" etc */
- stripe_str = strtok_r (data, ",", &tmp_str);
- while (stripe_str) {
- dup_str = gf_strdup (stripe_str);
- stripe_opt = GF_CALLOC (1, sizeof (struct stripe_options),
- gf_stripe_mt_stripe_options);
- if (!stripe_opt) {
- GF_FREE (dup_str);
- goto out;
- }
+ int ret = -1;
+ int call_cnt = 0;
+ stripe_local_t *local = NULL;
+
+ if (!frame || !frame->local || !this) {
+ gf_log ("", GF_LOG_ERROR, "Possible NULL deref");
+ return ret;
+ }
+
+ local = frame->local;
- pattern = strtok_r (dup_str, ":", &tmp_str1);
- num = strtok_r (NULL, ":", &tmp_str1);
- if (!num) {
- num = pattern;
- pattern = "*";
+ LOCK (&frame->lock);
+ {
+ call_cnt = --local->wind_count;
+
+ /**
+ * We overwrite ->op_* values here for subsequent faliure
+ * conditions, hence we propagate the last errno down the
+ * stack.
+ */
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto unlock;
}
- if (gf_string2bytesize (num, &stripe_opt->block_size) != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid number format \"%s\"", num);
- goto out;
+ }
+
+ unlock:
+ UNLOCK (&frame->lock);
+
+ if (!call_cnt) {
+ STRIPE_STACK_UNWIND (setxattr, frame, local->op_ret,
+ local->op_errno, xdata);
+ }
+
+ return 0;
+}
+
+#ifdef HAVE_BD_XLATOR
+int
+stripe_is_bd (dict_t *this, char *key, data_t *value, void *data)
+{
+ gf_boolean_t *is_bd = data;
+
+ if (data == NULL)
+ return 0;
+
+ if (XATTR_IS_BD (key))
+ *is_bd = _gf_true;
+
+ return 0;
+}
+
+static gf_boolean_t
+stripe_setxattr_is_bd (dict_t *dict)
+{
+ gf_boolean_t is_bd = _gf_false;
+
+ if (dict == NULL)
+ goto out;
+
+ dict_foreach (dict, stripe_is_bd, &is_bd);
+out:
+ return is_bd;
+}
+#else
+#define stripe_setxattr_is_bd(dict) _gf_false
+#endif
+
+int
+stripe_setxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *dict, int flags, dict_t *xdata)
+{
+ int32_t op_errno = EINVAL;
+ xlator_list_t *trav = NULL;
+ stripe_private_t *priv = NULL;
+ stripe_local_t *local = NULL;
+ int i = 0;
+ gf_boolean_t is_bd = _gf_false;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.*stripe*", dict,
+ op_errno, err);
+
+ priv = this->private;
+ trav = this->children;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ frame->local = local;
+ local->wind_count = priv->child_count;
+ local->op_ret = local->op_errno = 0;
+
+ is_bd = stripe_setxattr_is_bd (dict);
+
+ /**
+ * Set xattrs for directories on all subvolumes. Additionally
+ * this power is only given to a special client. Bd xlator
+ * also needs xattrs for regular files (ie LVs)
+ */
+ if (((frame->root->pid == GF_CLIENT_PID_GSYNCD) &&
+ IA_ISDIR (loc->inode->ia_type)) || is_bd) {
+ for (i = 0; i < priv->child_count; i++, trav = trav->next) {
+ STACK_WIND (frame, stripe_setxattr_cbk,
+ trav->xlator, trav->xlator->fops->setxattr,
+ loc, dict, flags, xdata);
}
- memcpy (stripe_opt->path_pattern, pattern, strlen (pattern));
+ } else {
+ local->wind_count = 1;
+ STACK_WIND (frame, stripe_setxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ loc, dict, flags, xdata);
+ }
- gf_log (this->name, GF_LOG_DEBUG,
- "block-size : pattern %s : size %"PRId64,
- stripe_opt->path_pattern, stripe_opt->block_size);
+ return 0;
+err:
+ STRIPE_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
+ return 0;
+}
- if (!priv->pattern) {
- priv->pattern = stripe_opt;
- } else {
- temp_stripeopt = priv->pattern;
- while (temp_stripeopt->next)
- temp_stripeopt = temp_stripeopt->next;
- temp_stripeopt->next = stripe_opt;
+
+int
+stripe_fsetxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno, dict_t *xdata)
+{
+ STRIPE_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+
+int
+stripe_is_special_key (dict_t *this,
+ char *key,
+ data_t *value,
+ void *data)
+{
+ gf_boolean_t *is_special = NULL;
+
+ if (data == NULL) {
+ goto out;
+ }
+
+ is_special = data;
+
+ if (XATTR_IS_LOCKINFO (key) || XATTR_IS_BD (key))
+ *is_special = _gf_true;
+
+out:
+ return 0;
+}
+
+int32_t
+stripe_fsetxattr_everyone_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
+{
+ int call_count = 0;
+ stripe_local_t *local = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ call_count = --local->wind_count;
+
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
}
- stripe_str = strtok_r (NULL, ",", &tmp_str);
- GF_FREE (dup_str);
+ }
+ UNLOCK (&frame->lock);
+
+ if (call_count == 0) {
+ STRIPE_STACK_UNWIND (fsetxattr, frame, local->op_ret,
+ local->op_errno, NULL);
+ }
+ return 0;
+}
+
+int
+stripe_fsetxattr_to_everyone (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int flags, dict_t *xdata)
+{
+ xlator_list_t *trav = NULL;
+ stripe_private_t *priv = NULL;
+ int ret = -1;
+ stripe_local_t *local = NULL;
+
+ priv = this->private;
+
+ local = mem_get0 (this->local_pool);
+ if (local == NULL) {
+ goto out;
+ }
+
+ frame->local = local;
+
+ local->wind_count = priv->child_count;
+
+ trav = this->children;
+
+ while (trav) {
+ STACK_WIND (frame, stripe_fsetxattr_everyone_cbk,
+ trav->xlator, trav->xlator->fops->fsetxattr,
+ fd, dict, flags, xdata);
+ trav = trav->next;
}
ret = 0;
@@ -3678,6 +4644,377 @@ out:
return ret;
}
+static gf_boolean_t
+stripe_fsetxattr_is_special (dict_t *dict)
+{
+ gf_boolean_t is_spl = _gf_false;
+
+ if (dict == NULL) {
+ goto out;
+ }
+
+ dict_foreach (dict, stripe_is_special_key, &is_spl);
+
+out:
+ return is_spl;
+}
+
+int
+stripe_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int flags, dict_t *xdata)
+{
+ int32_t op_ret = -1, ret = -1, op_errno = EINVAL;
+ gf_boolean_t is_spl = _gf_false;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.*stripe*", dict,
+ op_errno, err);
+
+ is_spl = stripe_fsetxattr_is_special (dict);
+ if (is_spl) {
+ ret = stripe_fsetxattr_to_everyone (frame, this, fd, dict,
+ flags, xdata);
+ if (ret < 0) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ goto out;
+ }
+
+ STACK_WIND (frame, stripe_fsetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ fd, dict, flags, xdata);
+out:
+ return 0;
+err:
+ STRIPE_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, NULL);
+ return 0;
+}
+
+int
+stripe_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STRIPE_STACK_UNWIND (removexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+stripe_removexattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ int32_t op_errno = EINVAL;
+
+ VALIDATE_OR_GOTO (this, err);
+
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.*stripe*",
+ name, op_errno, err);
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (loc, err);
+
+ STACK_WIND (frame, stripe_removexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr,
+ loc, name, xdata);
+ return 0;
+err:
+ STRIPE_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
+ return 0;
+}
+
+
+int
+stripe_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STRIPE_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+stripe_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.*stripe*",
+ name, op_errno, err);
+
+ STACK_WIND (frame, stripe_fremovexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr,
+ fd, name, xdata);
+ return 0;
+ err:
+ STRIPE_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+stripe_readdirp_lookup_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ dict_t *xattr, struct iatt *parent)
+{
+ stripe_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
+ stripe_local_t *main_local = NULL;
+ gf_dirent_t *entry = NULL;
+ call_frame_t *prev = NULL;
+ int done = 0;
+
+ local = frame->local;
+ prev = cookie;
+
+ entry = local->dirent;
+
+ main_frame = local->orig_frame;
+ main_local = main_frame->local;
+ LOCK (&frame->lock);
+ {
+
+ local->call_count--;
+ if (!local->call_count)
+ done = 1;
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ local->op_ret = op_ret;
+ goto unlock;
+ }
+
+ if (stripe_ctx_handle(this, prev, local, xattr))
+ gf_log(this->name, GF_LOG_ERROR,
+ "Error getting fctx info from dict.");
+
+ correct_file_size(stbuf, local->fctx, prev);
+
+ stripe_iatt_merge (stbuf, &entry->d_stat);
+ local->stbuf_blocks += stbuf->ia_blocks;
+ }
+unlock:
+ UNLOCK(&frame->lock);
+
+ if (done) {
+ inode_ctx_put (entry->inode, this,
+ (uint64_t) (long)local->fctx);
+
+ done = 0;
+ LOCK (&main_frame->lock);
+ {
+ main_local->wind_count--;
+ if (!main_local->wind_count)
+ done = 1;
+ if (local->op_ret == -1) {
+ main_local->op_errno = local->op_errno;
+ main_local->op_ret = local->op_ret;
+ }
+ entry->d_stat.ia_blocks = local->stbuf_blocks;
+ }
+ UNLOCK (&main_frame->lock);
+ if (done) {
+ main_frame->local = NULL;
+ STRIPE_STACK_UNWIND (readdir, main_frame,
+ main_local->op_ret,
+ main_local->op_errno,
+ &main_local->entries, NULL);
+ gf_dirent_free (&main_local->entries);
+ stripe_local_wipe (main_local);
+ mem_put (main_local);
+ }
+ frame->local = NULL;
+ stripe_local_wipe (local);
+ mem_put (local);
+ STRIPE_STACK_DESTROY (frame);
+ }
+
+ return 0;
+}
+
+int32_t
+stripe_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ gf_dirent_t *orig_entries, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ gf_dirent_t *local_entry = NULL;
+ gf_dirent_t *tmp_entry = NULL;
+ xlator_list_t *trav = NULL;
+ loc_t loc = {0, };
+ int32_t count = 0;
+ stripe_private_t *priv = NULL;
+ int32_t subvols = 0;
+ dict_t *xattrs = NULL;
+ call_frame_t *local_frame = NULL;
+ stripe_local_t *local_ent = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+ prev = cookie;
+ local = frame->local;
+ trav = this->children;
+ priv = this->private;
+
+ subvols = priv->child_count;
+
+ LOCK (&frame->lock);
+ {
+ local->op_errno = op_errno;
+ local->op_ret = op_ret;
+
+ if (op_ret != -1) {
+ list_splice_init (&orig_entries->list,
+ &local->entries.list);
+ local->wind_count = op_ret;
+ }
+
+ }
+ UNLOCK (&frame->lock);
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING, "%s returned error %s",
+ prev->this->name, strerror (op_errno));
+ goto out;
+ }
+
+ xattrs = dict_new ();
+ if (xattrs)
+ (void) stripe_xattr_request_build (this, xattrs, 0, 0, 0, 0);
+ count = op_ret;
+ list_for_each_entry_safe (local_entry, tmp_entry,
+ (&local->entries.list), list) {
+
+ if (!local_entry)
+ break;
+ if (!IA_ISREG (local_entry->d_stat.ia_type) || !local_entry->inode) {
+ LOCK (&frame->lock);
+ {
+ local->wind_count--;
+ count = local->wind_count;
+ }
+ UNLOCK (&frame->lock);
+ continue;
+ }
+
+ local_frame = copy_frame (frame);
+
+ if (!local_frame) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ goto out;
+ }
+
+ local_ent = mem_get0 (this->local_pool);
+ if (!local_ent) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ goto out;
+ }
+
+ loc.inode = inode_ref (local_entry->inode);
+
+ gf_uuid_copy (loc.gfid, local_entry->d_stat.ia_gfid);
+
+ local_ent->orig_frame = frame;
+
+ local_ent->call_count = subvols;
+
+ local_ent->dirent = local_entry;
+
+ local_frame->local = local_ent;
+
+ trav = this->children;
+ while (trav) {
+ STACK_WIND (local_frame, stripe_readdirp_lookup_cbk,
+ trav->xlator, trav->xlator->fops->lookup,
+ &loc, xattrs);
+ trav = trav->next;
+ }
+ loc_wipe (&loc);
+ }
+out:
+ if (!count) {
+ /* all entries are directories */
+ frame->local = NULL;
+ STRIPE_STACK_UNWIND (readdir, frame,
+ (local ? local->op_ret : -1),
+ (local ? local->op_errno : EINVAL),
+ (local ? &local->entries : NULL),
+ NULL);
+ gf_dirent_free (&local->entries);
+ stripe_local_wipe (local);
+ mem_put (local);
+ }
+ if (xattrs)
+ dict_unref (xattrs);
+ return 0;
+
+}
+int32_t
+stripe_readdirp (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t off, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ stripe_private_t *priv = NULL;
+ xlator_list_t *trav = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ priv = this->private;
+ trav = this->children;
+
+ if (priv->first_child_down) {
+ op_errno = ENOTCONN;
+ goto err;
+ }
+
+ /* Initialization */
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ frame->local = local;
+
+ local->fd = fd_ref (fd);
+
+ local->wind_count = 0;
+
+ local->count = 0;
+ local->op_ret = -1;
+ INIT_LIST_HEAD(&local->entries);
+
+ if (!trav)
+ goto err;
+
+ STACK_WIND (frame, stripe_readdirp_cbk, trav->xlator,
+ trav->xlator->fops->readdirp, fd, size, off, xdata);
+ return 0;
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ STRIPE_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+
+}
+
int32_t
mem_acct_init (xlator_t *this)
{
@@ -3698,6 +5035,89 @@ out:
return ret;
}
+static int
+clear_pattern_list (stripe_private_t *priv)
+{
+ struct stripe_options *prev = NULL;
+ struct stripe_options *trav = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("stripe", priv, out);
+
+ trav = priv->pattern;
+ priv->pattern = NULL;
+ while (trav) {
+ prev = trav;
+ trav = trav->next;
+ GF_FREE (prev);
+ }
+
+ ret = 0;
+ out:
+ return ret;
+
+
+}
+
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+
+ stripe_private_t *priv = NULL;
+ data_t *data = NULL;
+ int ret = -1;
+ volume_option_t *opt = NULL;
+
+ GF_ASSERT (this);
+ GF_ASSERT (this->private);
+
+ priv = this->private;
+
+
+ ret = 0;
+ LOCK (&priv->lock);
+ {
+ ret = clear_pattern_list (priv);
+ if (ret)
+ goto unlock;
+
+ data = dict_get (options, "block-size");
+ if (data) {
+ ret = set_stripe_block_size (this, priv, data->data);
+ if (ret)
+ goto unlock;
+ } else {
+ opt = xlator_volume_option_get (this, "block-size");
+ if (!opt) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "option 'block-size' not found");
+ ret = -1;
+ goto unlock;
+ }
+
+ if (gf_string2bytesize_uint64 (opt->default_value, &priv->block_size)){
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to set default block-size ");
+ ret = -1;
+ goto unlock;
+ }
+ }
+
+ GF_OPTION_RECONF("coalesce", priv->coalesce, options, bool,
+ unlock);
+ }
+ unlock:
+ UNLOCK (&priv->lock);
+ if (ret)
+ goto out;
+
+ ret = 0;
+ out:
+ return ret;
+
+}
+
/**
* init - This function is called when xlator-graph gets initialized.
* The option given in volfiles are parsed here.
@@ -3707,6 +5127,7 @@ int32_t
init (xlator_t *this)
{
stripe_private_t *priv = NULL;
+ volume_option_t *opt = NULL;
xlator_list_t *trav = NULL;
data_t *data = NULL;
int32_t count = 0;
@@ -3750,9 +5171,9 @@ init (xlator_t *this)
if (!priv->xl_array)
goto out;
- priv->state = GF_CALLOC (count, sizeof (int8_t),
- gf_stripe_mt_int8_t);
- if (!priv->state)
+ priv->last_event = GF_CALLOC (count, sizeof (int),
+ gf_stripe_mt_int32_t);
+ if (!priv->last_event)
goto out;
priv->child_count = count;
@@ -3772,41 +5193,56 @@ init (xlator_t *this)
goto out;
}
- priv->block_size = (128 * GF_UNIT_KB);
- /* option stripe-pattern *avi:1GB,*pdf:4096 */
- data = dict_get (this->options, "block-size");
- if (!data) {
- gf_log (this->name, GF_LOG_DEBUG,
- "No \"option block-size <x>\" given, defaulting "
- "to 128KB");
- } else {
- ret = set_stripe_block_size (this, priv, data->data);
- if (ret)
- goto out;
- }
-
- priv->xattr_supported = 1;
- data = dict_get (this->options, "use-xattr");
- if (data) {
- if (gf_string2boolean (data->data,
- &priv->xattr_supported) == -1) {
+ ret = 0;
+ LOCK (&priv->lock);
+ {
+ opt = xlator_volume_option_get (this, "block-size");
+ if (!opt) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "option 'block-size' not found");
+ ret = -1;
+ goto unlock;
+ }
+ if (gf_string2bytesize_uint64 (opt->default_value, &priv->block_size)){
gf_log (this->name, GF_LOG_ERROR,
- "error setting hard check for extended "
- "attribute");
- //return -1;
+ "Unable to set default block-size ");
+ ret = -1;
+ goto unlock;
+ }
+ /* option stripe-pattern *avi:1GB,*pdf:16K */
+ data = dict_get (this->options, "block-size");
+ if (data) {
+ ret = set_stripe_block_size (this, priv, data->data);
+ if (ret)
+ goto unlock;
}
}
+ unlock:
+ UNLOCK (&priv->lock);
+ if (ret)
+ goto out;
+ GF_OPTION_INIT ("use-xattr", priv->xattr_supported, bool, out);
/* notify related */
priv->nodes_down = priv->child_count;
+
+ GF_OPTION_INIT("coalesce", priv->coalesce, bool, out);
+
+ this->local_pool = mem_pool_new (stripe_local_t, 128);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ goto out;
+ }
+
this->private = priv;
ret = 0;
out:
if (ret) {
if (priv) {
- if (priv->xl_array)
- GF_FREE (priv->xl_array);
+ GF_FREE (priv->xl_array);
GF_FREE (priv);
}
}
@@ -3829,8 +5265,8 @@ fini (xlator_t *this)
priv = this->private;
if (priv) {
- if (priv->xl_array)
- GF_FREE (priv->xl_array);
+ this->private = NULL;
+ GF_FREE (priv->xl_array);
trav = priv->pattern;
while (trav) {
@@ -3838,6 +5274,7 @@ fini (xlator_t *this)
trav = trav->next;
GF_FREE (prev);
}
+ GF_FREE (priv->last_event);
LOCK_DESTROY (&priv->lock);
GF_FREE (priv);
}
@@ -3846,44 +5283,493 @@ out:
return;
}
+int32_t
+stripe_getxattr_unwind (call_frame_t *frame,
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
+
+{
+ STRIPE_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+int
+stripe_internal_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+
+ char size_key[256] = {0,};
+ char index_key[256] = {0,};
+ char count_key[256] = {0,};
+ char coalesce_key[256] = {0,};
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (frame->local, out);
+
+ if (!xattr || (op_ret == -1))
+ goto out;
+
+ sprintf (size_key, "trusted.%s.stripe-size", this->name);
+ sprintf (count_key, "trusted.%s.stripe-count", this->name);
+ sprintf (index_key, "trusted.%s.stripe-index", this->name);
+ sprintf (coalesce_key, "trusted.%s.stripe-coalesce", this->name);
+
+ dict_del (xattr, size_key);
+ dict_del (xattr, count_key);
+ dict_del (xattr, index_key);
+ dict_del (xattr, coalesce_key);
+
+out:
+ STRIPE_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr, xdata);
+
+ return 0;
+
+}
+
+int
+stripe_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
+{
+ int call_cnt = 0;
+ stripe_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (frame->local, out);
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ call_cnt = --local->wind_count;
+ }
+ UNLOCK (&frame->lock);
+
+ if (!xattr || (op_ret < 0))
+ goto out;
+
+ local->op_ret = 0;
+
+ if (!local->xattr) {
+ local->xattr = dict_ref (xattr);
+ } else {
+ stripe_aggregate_xattr (local->xattr, xattr);
+ }
+
+out:
+ if (!call_cnt) {
+ STRIPE_STACK_UNWIND (getxattr, frame, local->op_ret, op_errno,
+ local->xattr, xdata);
+ }
+
+ return 0;
+}
+
+int32_t
+stripe_vgetxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ int32_t callcnt = 0;
+ int32_t ret = -1;
+ long cky = 0;
+ void *xattr_val = NULL;
+ void *xattr_serz = NULL;
+ stripe_xattr_sort_t *xattr = NULL;
+ dict_t *stripe_xattr = NULL;
+
+ if (!frame || !frame->local || !this) {
+ gf_log ("", GF_LOG_ERROR, "Possible NULL deref");
+ return ret;
+ }
+
+ local = frame->local;
+ cky = (long) cookie;
+
+ if (local->xsel[0] == '\0') {
+ gf_log (this->name, GF_LOG_ERROR, "Empty xattr in cbk");
+ return ret;
+ }
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->wind_count;
+
+ if (!dict || (op_ret < 0))
+ goto out;
+
+ if (!local->xattr_list)
+ local->xattr_list = (stripe_xattr_sort_t *)
+ GF_CALLOC (local->nallocs,
+ sizeof (stripe_xattr_sort_t),
+ gf_stripe_mt_xattr_sort_t);
+
+ if (local->xattr_list) {
+ xattr = local->xattr_list + (int32_t) cky;
+
+ ret = dict_get_ptr_and_len (dict, local->xsel,
+ &xattr_val,
+ &xattr->xattr_len);
+ if (xattr->xattr_len == 0)
+ goto out;
+
+ xattr->pos = cky;
+ xattr->xattr_value = gf_memdup (xattr_val,
+ xattr->xattr_len);
+
+ if (xattr->xattr_value != NULL)
+ local->xattr_total_len += xattr->xattr_len + 1;
+ }
+ }
+ out:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if (!local->xattr_total_len)
+ goto unwind;
+
+ stripe_xattr = dict_new ();
+ if (!stripe_xattr)
+ goto unwind;
+
+ /* select filler based on ->xsel */
+ if (XATTR_IS_PATHINFO (local->xsel))
+ ret = stripe_fill_pathinfo_xattr (this, local,
+ (char **)&xattr_serz);
+ else if (XATTR_IS_LOCKINFO (local->xsel)) {
+ ret = stripe_fill_lockinfo_xattr (this, local,
+ &xattr_serz);
+ } else {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unknown xattr in xattr request");
+ goto unwind;
+ }
+
+ if (!ret) {
+ ret = dict_set_dynptr (stripe_xattr, local->xsel,
+ xattr_serz,
+ local->xattr_total_len);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Can't set %s key in dict",
+ local->xsel);
+ }
+
+ unwind:
+ /*
+ * Among other things, STRIPE_STACK_UNWIND will free "local"
+ * for us. That means we can't dereference it afterward.
+ * Fortunately, the actual result is in stripe_xattr now, so we
+ * can simply clean up before unwinding.
+ */
+ ret = stripe_free_xattr_str (local);
+ GF_FREE (local->xattr_list);
+ local->xattr_list = NULL;
+
+ STRIPE_STACK_UNWIND (getxattr, frame, op_ret, op_errno,
+ stripe_xattr, NULL);
+
+ if (stripe_xattr)
+ dict_unref (stripe_xattr);
+ }
+
+ return ret;
+}
+
+int
+stripe_marker_populate_args (call_frame_t *frame, int type, int *gauge,
+ xlator_t **subvols)
+{
+ xlator_t *this = frame->this;
+ stripe_private_t *priv = this->private;
+ stripe_local_t *local = frame->local;
+ int count = 0;
+
+ count = priv->child_count;
+ if (MARKER_XTIME_TYPE == type) {
+ if (!IA_FILE_OR_DIR (local->loc.inode->ia_type))
+ count = 1;
+ }
+ memcpy (subvols, priv->xl_array, sizeof (*subvols) * count);
+
+ return count;
+}
+
+int32_t
+stripe_getxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ xlator_list_t *trav = NULL;
+ stripe_private_t *priv = NULL;
+ int32_t op_errno = EINVAL;
+ int i = 0;
+ int ret = 0;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+
+ priv = this->private;
+ trav = this->children;
+
+ /* Initialization */
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ local->op_ret = -1;
+ frame->local = local;
+ loc_copy (&local->loc, loc);
+
+
+ if (name && strncmp (name, QUOTA_SIZE_KEY,
+ strlen (QUOTA_SIZE_KEY)) == 0) {
+ local->wind_count = priv->child_count;
+
+ for (i = 0, trav=this->children; i < priv->child_count; i++,
+ trav = trav->next) {
+ STACK_WIND (frame, stripe_getxattr_cbk,
+ trav->xlator, trav->xlator->fops->getxattr,
+ loc, name, xdata);
+ }
+
+ return 0;
+ }
+
+ if (name && (XATTR_IS_PATHINFO (name))) {
+ if (IA_ISREG (loc->inode->ia_type)) {
+ ret = inode_ctx_get (loc->inode, this,
+ (uint64_t *) &local->fctx);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "stripe size unavailable from fctx"
+ " relying on pathinfo could lead to"
+ " wrong results");
+ }
+
+ local->nallocs = local->wind_count = priv->child_count;
+ (void) strncpy (local->xsel, name, strlen (name));
+
+ /**
+ * for xattrs that need info from all childs, fill ->xsel
+ * as above and call the filler function in cbk based on
+ * it
+ */
+ for (i = 0, trav = this->children; i < priv->child_count; i++,
+ trav = trav->next) {
+ STACK_WIND_COOKIE (frame, stripe_vgetxattr_cbk,
+ (void *) (long) i, trav->xlator,
+ trav->xlator->fops->getxattr,
+ loc, name, xdata);
+ }
+
+ return 0;
+ }
+
+ if (cluster_handle_marker_getxattr (frame, loc, name, priv->vol_uuid,
+ stripe_getxattr_unwind,
+ stripe_marker_populate_args) == 0)
+ return 0;
+
+ STACK_WIND (frame, stripe_internal_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
+
+ return 0;
+
+err:
+ STRIPE_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+static gf_boolean_t
+stripe_is_special_xattr (const char *name)
+{
+ gf_boolean_t is_spl = _gf_false;
+
+ if (!name) {
+ goto out;
+ }
+
+ if (!strncmp (name, GF_XATTR_LOCKINFO_KEY,
+ strlen (GF_XATTR_LOCKINFO_KEY))
+ || XATTR_IS_PATHINFO (name))
+ is_spl = _gf_true;
+out:
+ return is_spl;
+}
+
+int32_t
+stripe_fgetxattr_from_everyone (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ stripe_private_t *priv = NULL;
+ int32_t ret = -1, op_errno = 0;
+ int i = 0;
+ xlator_list_t *trav = NULL;
+
+ priv = this->private;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->op_ret = -1;
+ frame->local = local;
+
+ strncpy (local->xsel, name, strlen (name));
+ local->nallocs = local->wind_count = priv->child_count;
+
+ for (i = 0, trav = this->children; i < priv->child_count; i++,
+ trav = trav->next) {
+ STACK_WIND_COOKIE (frame, stripe_vgetxattr_cbk,
+ (void *) (long) i, trav->xlator,
+ trav->xlator->fops->fgetxattr,
+ fd, name, xdata);
+ }
+
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT (fgetxattr, frame, -1, op_errno, NULL, NULL);
+ return ret;
+}
+
+int32_t
+stripe_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ if (stripe_is_special_xattr (name)) {
+ stripe_fgetxattr_from_everyone (frame, this, fd, name, xdata);
+ goto out;
+ }
+
+ STACK_WIND (frame, stripe_internal_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
+
+out:
+ return 0;
+}
+
+
+
+int32_t
+stripe_priv_dump (xlator_t *this)
+{
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i = 0;
+ stripe_private_t *priv = NULL;
+ int ret = -1;
+ struct stripe_options *options = NULL;
+
+ GF_VALIDATE_OR_GOTO ("stripe", this, out);
+
+ priv = this->private;
+ if (!priv)
+ goto out;
+
+ ret = TRY_LOCK (&priv->lock);
+ if (ret != 0)
+ goto out;
+
+ gf_proc_dump_add_section("xlator.cluster.stripe.%s.priv", this->name);
+ gf_proc_dump_write("child_count","%d", priv->child_count);
+
+ for (i = 0; i < priv->child_count; i++) {
+ sprintf (key, "subvolumes[%d]", i);
+ gf_proc_dump_write (key, "%s.%s", priv->xl_array[i]->type,
+ priv->xl_array[i]->name);
+ }
+
+ options = priv->pattern;
+ while (options != NULL) {
+ gf_proc_dump_write ("path_pattern", "%s", priv->pattern->path_pattern);
+ gf_proc_dump_write ("options_block_size", "%ul", options->block_size);
+
+ options = options->next;
+ }
+
+ gf_proc_dump_write ("block_size", "%ul", priv->block_size);
+ gf_proc_dump_write ("nodes-down", "%d", priv->nodes_down);
+ gf_proc_dump_write ("first-child_down", "%d", priv->first_child_down);
+ gf_proc_dump_write ("xattr_supported", "%d", priv->xattr_supported);
+
+ UNLOCK (&priv->lock);
+
+out:
+ return ret;
+}
struct xlator_fops fops = {
- .stat = stripe_stat,
- .unlink = stripe_unlink,
- .rename = stripe_rename,
- .link = stripe_link,
- .truncate = stripe_truncate,
- .create = stripe_create,
- .open = stripe_open,
- .readv = stripe_readv,
- .writev = stripe_writev,
- .statfs = stripe_statfs,
- .flush = stripe_flush,
- .fsync = stripe_fsync,
- .ftruncate = stripe_ftruncate,
- .fstat = stripe_fstat,
- .mkdir = stripe_mkdir,
- .rmdir = stripe_rmdir,
- .lk = stripe_lk,
- .opendir = stripe_opendir,
- .fsyncdir = stripe_fsyncdir,
- .setattr = stripe_setattr,
- .fsetattr = stripe_fsetattr,
- .lookup = stripe_lookup,
- .mknod = stripe_mknod,
+ .stat = stripe_stat,
+ .unlink = stripe_unlink,
+ .rename = stripe_rename,
+ .link = stripe_link,
+ .truncate = stripe_truncate,
+ .create = stripe_create,
+ .open = stripe_open,
+ .readv = stripe_readv,
+ .writev = stripe_writev,
+ .statfs = stripe_statfs,
+ .flush = stripe_flush,
+ .fsync = stripe_fsync,
+ .ftruncate = stripe_ftruncate,
+ .fstat = stripe_fstat,
+ .mkdir = stripe_mkdir,
+ .rmdir = stripe_rmdir,
+ .lk = stripe_lk,
+ .opendir = stripe_opendir,
+ .fsyncdir = stripe_fsyncdir,
+ .setattr = stripe_setattr,
+ .fsetattr = stripe_fsetattr,
+ .lookup = stripe_lookup,
+ .mknod = stripe_mknod,
+ .setxattr = stripe_setxattr,
+ .fsetxattr = stripe_fsetxattr,
+ .getxattr = stripe_getxattr,
+ .fgetxattr = stripe_fgetxattr,
+ .removexattr = stripe_removexattr,
+ .fremovexattr = stripe_fremovexattr,
+ .readdirp = stripe_readdirp,
+ .fallocate = stripe_fallocate,
+ .discard = stripe_discard,
+ .zerofill = stripe_zerofill,
+ .seek = stripe_seek,
};
struct xlator_cbks cbks = {
.release = stripe_release,
+ .forget = stripe_forget,
};
+struct xlator_dumpops dumpops = {
+ .priv = stripe_priv_dump,
+};
struct volume_options options[] = {
{ .key = {"block-size"},
- .type = GF_OPTION_TYPE_ANY
+ .type = GF_OPTION_TYPE_SIZE_LIST,
+ .default_value = "128KB",
+ .min = STRIPE_MIN_BLOCK_SIZE,
+ .description = "Size of the stripe unit that would be read "
+ "from or written to the striped servers."
},
{ .key = {"use-xattr"},
- .type = GF_OPTION_TYPE_BOOL
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "true"
},
+ { .key = {"coalesce"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "true",
+ .description = "Enable/Disable coalesce mode to flatten striped "
+ "files as stored on the server (i.e., eliminate holes "
+ "caused by the traditional format)."
+ },
{ .key = {NULL} },
};
diff --git a/xlators/cluster/stripe/src/stripe.h b/xlators/cluster/stripe/src/stripe.h
index 8afc6aa9a58..1e2fcb4e659 100644
--- a/xlators/cluster/stripe/src/stripe.h
+++ b/xlators/cluster/stripe/src/stripe.h
@@ -1,31 +1,17 @@
/*
- Copyright (c) 2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _STRIPE_H_
#define _STRIPE_H_
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "xlator.h"
#include "logging.h"
#include "defaults.h"
@@ -33,9 +19,59 @@
#include "compat.h"
#include "compat-errno.h"
#include "stripe-mem-types.h"
+#include "libxlator.h"
#include <fnmatch.h>
#include <signal.h>
+#define STRIPE_PATHINFO_HEADER "STRIPE:"
+#define STRIPE_MIN_BLOCK_SIZE (16*GF_UNIT_KB)
+
+#define STRIPE_STACK_UNWIND(fop, frame, params ...) do { \
+ stripe_local_t *__local = NULL; \
+ if (frame) { \
+ __local = frame->local; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT (fop, frame, params); \
+ if (__local) { \
+ stripe_local_wipe(__local); \
+ mem_put (__local); \
+ } \
+ } while (0)
+
+#define STRIPE_STACK_DESTROY(frame) do { \
+ stripe_local_t *__local = NULL; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ STACK_DESTROY (frame->root); \
+ if (__local) { \
+ stripe_local_wipe (__local); \
+ mem_put (__local); \
+ } \
+ } while (0)
+
+#define STRIPE_VALIDATE_FCTX(fctx, label) do { \
+ int idx = 0; \
+ if (!fctx) { \
+ op_errno = EINVAL; \
+ goto label; \
+ } \
+ for (idx = 0; idx < fctx->stripe_count; idx++) { \
+ if (!fctx->xl_array[idx]) { \
+ gf_log (this->name, GF_LOG_ERROR, \
+ "fctx->xl_array[%d] is NULL", \
+ idx); \
+ op_errno = ESTALE; \
+ goto label; \
+ } \
+ } \
+ } while (0)
+
+typedef struct stripe_xattr_sort {
+ int pos;
+ int xattr_len;
+ char *xattr_value;
+} stripe_xattr_sort_t;
/**
* struct stripe_options : This keeps the pattern and the block-size
@@ -57,15 +93,17 @@ struct stripe_private {
gf_lock_t lock;
uint8_t nodes_down;
int8_t first_child_down;
+ int *last_event;
int8_t child_count;
- int8_t *state; /* Current state of child node */
gf_boolean_t xattr_supported; /* default yes */
+ gf_boolean_t coalesce;
+ char vol_uuid[UUID_SIZE + 1];
};
/**
- * Used to keep info about the replies received from fops->readv calls
+ * Used to keep info about the replies received from readv/writev calls
*/
-struct readv_replies {
+struct stripe_replies {
struct iovec *vector;
int32_t count; //count of vector
int32_t op_ret; //op_ret of readv
@@ -77,6 +115,7 @@ struct readv_replies {
typedef struct _stripe_fd_ctx {
off_t stripe_size;
int stripe_count;
+ int stripe_coalesce;
int static_array;
xlator_t **xl_array;
} stripe_fd_ctx_t;
@@ -112,9 +151,9 @@ struct stripe_local {
blkcnt_t preparent_blocks;
blkcnt_t postparent_blocks;
- struct readv_replies *replies;
- struct statvfs statvfs_buf;
- dir_entry_t *entry;
+ struct stripe_replies *replies;
+ struct statvfs statvfs_buf;
+ dir_entry_t *entry;
int8_t revalidate;
int8_t failed;
@@ -136,8 +175,15 @@ struct stripe_local {
loc_t loc;
loc_t loc2;
+ mode_t mode;
+ dev_t rdev;
/* For File I/O fops */
- dict_t *dict;
+ dict_t *xdata;
+
+ stripe_xattr_sort_t *xattr_list;
+ int32_t xattr_total_len;
+ int32_t nallocs;
+ char xsel[256];
/* General usage */
off_t offset;
@@ -147,13 +193,89 @@ struct stripe_local {
int entry_self_heal_needed;
int8_t *list;
- struct flock lock;
+ struct gf_flock lock;
fd_t *fd;
void *value;
struct iobref *iobref;
+ gf_dirent_t entries;
+ gf_dirent_t *dirent;
+ dict_t *xattr;
+ uuid_t ia_gfid;
+
+ int xflag;
+ mode_t umask;
};
typedef struct stripe_local stripe_local_t;
typedef struct stripe_private stripe_private_t;
+/*
+ * Determine the stripe index of a particular frame based on the translator.
+ */
+static inline int32_t stripe_get_frame_index(stripe_fd_ctx_t *fctx,
+ call_frame_t *prev)
+{
+ int32_t i, idx = -1;
+
+ for (i = 0; i < fctx->stripe_count; i++) {
+ if (fctx->xl_array[i] == prev->this) {
+ idx = i;
+ break;
+ }
+ }
+
+ return idx;
+}
+
+static inline void stripe_copy_xl_array(xlator_t **dst, xlator_t **src,
+ int count)
+{
+ int i;
+
+ for (i = 0; i < count; i++)
+ dst[i] = src[i];
+}
+
+void stripe_local_wipe (stripe_local_t *local);
+int32_t stripe_ctx_handle (xlator_t *this, call_frame_t *prev,
+ stripe_local_t *local, dict_t *dict);
+void stripe_aggregate_xattr (dict_t *dst, dict_t *src);
+int32_t stripe_xattr_request_build (xlator_t *this, dict_t *dict,
+ uint64_t stripe_size, uint32_t stripe_count,
+ uint32_t stripe_index,
+ uint32_t stripe_coalesce);
+int32_t stripe_get_matching_bs (const char *path, stripe_private_t *priv);
+int set_stripe_block_size (xlator_t *this, stripe_private_t *priv, char *data);
+int32_t stripe_iatt_merge (struct iatt *from, struct iatt *to);
+int32_t stripe_fill_pathinfo_xattr (xlator_t *this, stripe_local_t *local,
+ char **xattr_serz);
+int32_t stripe_free_xattr_str (stripe_local_t *local);
+int32_t stripe_xattr_aggregate (char *buffer, stripe_local_t *local,
+ int32_t *total);
+off_t coalesced_offset(off_t offset, uint64_t stripe_size, int stripe_count);
+off_t uncoalesced_size(off_t size, uint64_t stripe_size, int stripe_count,
+ int stripe_index);
+int32_t
+stripe_fill_lockinfo_xattr (xlator_t *this, stripe_local_t *local,
+ void **xattr_serz);
+
+/*
+ * Adjust the size attribute for files if coalesce is enabled.
+ */
+static inline void correct_file_size(struct iatt *buf, stripe_fd_ctx_t *fctx,
+ call_frame_t *prev)
+{
+ int index;
+
+ if (!IA_ISREG(buf->ia_type))
+ return;
+
+ if (!fctx || !fctx->stripe_coalesce)
+ return;
+
+ index = stripe_get_frame_index(fctx, prev);
+ buf->ia_size = uncoalesced_size(buf->ia_size, fctx->stripe_size,
+ fctx->stripe_count, index);
+}
+
#endif /* _STRIPE_H_ */
diff --git a/xlators/cluster/unify/src/Makefile.am b/xlators/cluster/unify/src/Makefile.am
deleted file mode 100644
index 2a1fe837263..00000000000
--- a/xlators/cluster/unify/src/Makefile.am
+++ /dev/null
@@ -1,16 +0,0 @@
-
-xlator_LTLIBRARIES = unify.la
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/legacy/cluster
-
-unify_la_LDFLAGS = -module -avoidversion
-
-unify_la_SOURCES = unify.c unify-self-heal.c
-unify_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-noinst_HEADERS = unify.h
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
-
-CLEANFILES =
-
diff --git a/xlators/cluster/unify/src/unify-mem-types.h b/xlators/cluster/unify/src/unify-mem-types.h
deleted file mode 100644
index 3b4abc8e9b8..00000000000
--- a/xlators/cluster/unify/src/unify-mem-types.h
+++ /dev/null
@@ -1,41 +0,0 @@
-
-/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef __UNIFY_MEM_TYPES_H__
-#define __UNIFY_MEM_TYPES_H__
-
-#include "mem-types.h"
-
-enum gf_unify_mem_types_ {
- gf_unify_mt_char = gf_common_mt_end + 1,
- gf_unify_mt_int16_t,
- gf_unify_mt_xlator_t,
- gf_unify_mt_unify_private_t,
- gf_unify_mt_xlator_list_t,
- gf_unify_mt_dir_entry_t,
- gf_unify_mt_off_t,
- gf_unify_mt_int,
- gf_unify_mt_unify_self_heal_struct,
- gf_unify_mt_unify_local_t,
- gf_unify_mt_end
-};
-#endif
-
diff --git a/xlators/cluster/unify/src/unify-self-heal.c b/xlators/cluster/unify/src/unify-self-heal.c
deleted file mode 100644
index 88145af9a77..00000000000
--- a/xlators/cluster/unify/src/unify-self-heal.c
+++ /dev/null
@@ -1,1239 +0,0 @@
-/*
- Copyright (c) 2007-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-/**
- * unify-self-heal.c :
- * This file implements few functions which enables 'unify' translator
- * to be consistent in its behaviour when
- * > a node fails,
- * > a node gets added,
- * > a failed node comes back
- * > a new namespace server is added (ie, an fresh namespace server).
- *
- * This functionality of 'unify' will enable glusterfs to support storage
- * system failure, and maintain consistancy. This works both ways, ie, when
- * an entry (either file or directory) is found on namespace server, and not
- * on storage nodes, its created in storage nodes and vica-versa.
- *
- * The two fops, where it can be implemented are 'getdents ()' and 'lookup ()'
- *
- */
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "unify.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "common-utils.h"
-
-int32_t
-unify_sh_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count);
-
-int32_t
-unify_sh_ns_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count);
-
-int32_t
-unify_bgsh_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count);
-
-int32_t
-unify_bgsh_ns_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count);
-
-/**
- * unify_local_wipe - free all the extra allocation of local->* here.
- */
-static void
-unify_local_wipe (unify_local_t *local)
-{
- /* Free the strdup'd variables in the local structure */
- if (local->name) {
- GF_FREE (local->name);
- }
-
- if (local->sh_struct) {
- if (local->sh_struct->offset_list)
- GF_FREE (local->sh_struct->offset_list);
-
- if (local->sh_struct->entry_list)
- GF_FREE (local->sh_struct->entry_list);
-
- if (local->sh_struct->count_list)
- GF_FREE (local->sh_struct->count_list);
-
- GF_FREE (local->sh_struct);
- }
-
- loc_wipe (&local->loc1);
- loc_wipe (&local->loc2);
-}
-
-int32_t
-unify_sh_setdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- inode_t *inode = NULL;
- dict_t *tmp_dict = NULL;
- dir_entry_t *prev, *entry, *trav;
-
- LOCK (&frame->lock);
- {
- /* if local->call_count == 0, that means, setdents on
- * storagenodes is still pending.
- */
- if (local->call_count)
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- if (callcnt == 0) {
- if (local->sh_struct->entry_list[0]) {
- prev = entry = local->sh_struct->entry_list[0];
- if (!entry)
- return 0;
- trav = entry->next;
- while (trav) {
- prev->next = trav->next;
- GF_FREE (trav->name);
- if (IA_ISLNK (trav->buf.ia_type))
- GF_FREE (trav->link);
- GF_FREE (trav);
- trav = prev->next;
- }
- GF_FREE (entry);
- }
-
- if (!local->flags) {
- if (local->sh_struct->count_list[0] >=
- UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- /* count == size, that means, there are more entries
- to read from */
- //local->call_count = 0;
- local->sh_struct->offset_list[0] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
- STACK_WIND (frame,
- unify_sh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[0],
- GF_GET_DIR_ONLY);
- }
- } else {
- inode = local->loc1.inode;
- fd_unref (local->fd);
- tmp_dict = local->dict;
-
- unify_local_wipe (local);
-
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- inode, &local->stbuf, local->dict,
- &local->oldpostparent);
- if (tmp_dict)
- dict_unref (tmp_dict);
- }
- }
-
- return 0;
-}
-
-
-int32_t
-unify_sh_ns_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count)
-{
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = 0;
- unsigned long final = 0;
- dir_entry_t *tmp = GF_CALLOC (1, sizeof (dir_entry_t),
- gf_unify_mt_dir_entry_t);
-
- local->sh_struct->entry_list[0] = tmp;
- local->sh_struct->count_list[0] = count;
- if (entry) {
- tmp->next = entry->next;
- entry->next = NULL;
- }
-
- if ((count < UNIFY_SELF_HEAL_GETDENTS_COUNT) || !entry) {
- final = 1;
- }
-
- LOCK (&frame->lock);
- {
- /* local->call_count will be '0' till now. make it 1 so, it
- can be UNWIND'ed for the last call. */
- local->call_count = priv->child_count;
- if (final)
- local->flags = 1;
- }
- UNLOCK (&frame->lock);
-
- for (index = 0; index < priv->child_count; index++)
- {
- STACK_WIND_COOKIE (frame,
- unify_sh_setdents_cbk,
- (void *)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->setdents,
- local->fd, GF_SET_DIR_ONLY,
- local->sh_struct->entry_list[0], count);
- }
-
- return 0;
-}
-
-int32_t
-unify_sh_ns_setdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = (long)cookie;
- dir_entry_t *prev, *entry, *trav;
-
- LOCK (&frame->lock);
- {
- if (local->sh_struct->entry_list[index]) {
- prev = entry = local->sh_struct->entry_list[index];
- trav = entry->next;
- while (trav) {
- prev->next = trav->next;
- GF_FREE (trav->name);
- if (IA_ISLNK (trav->buf.ia_type))
- GF_FREE (trav->link);
- GF_FREE (trav);
- trav = prev->next;
- }
- GF_FREE (entry);
- }
- }
- UNLOCK (&frame->lock);
-
- if (local->sh_struct->count_list[index] <
- UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
- } else {
- /* count == size, that means, there are more entries
- to read from */
- local->sh_struct->offset_list[index] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
- STACK_WIND_COOKIE (frame,
- unify_sh_getdents_cbk,
- cookie,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[index],
- GF_GET_ALL);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "readdir on (%s) with offset %"PRId64"",
- priv->xl_array[index]->name,
- local->sh_struct->offset_list[index]);
- }
-
- if (!callcnt) {
- /* All storage nodes have done unified setdents on NS node.
- * Now, do getdents from NS and do setdents on storage nodes.
- */
-
- /* sh_struct->offset_list is no longer required for
- storage nodes now */
- local->sh_struct->offset_list[0] = 0; /* reset */
-
- STACK_WIND (frame,
- unify_sh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_DIR_ONLY);
- }
-
- return 0;
-}
-
-
-/**
- * unify_sh_getdents_cbk -
- */
-int32_t
-unify_sh_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = (long)cookie;
- dir_entry_t *tmp = NULL;
-
- if (op_ret >= 0 && count > 0) {
- /* There is some dentry found, just send the dentry to NS */
- tmp = GF_CALLOC (1, sizeof (dir_entry_t),
- gf_unify_mt_dir_entry_t);
- local->sh_struct->entry_list[index] = tmp;
- local->sh_struct->count_list[index] = count;
- if (entry) {
- tmp->next = entry->next;
- entry->next = NULL;
- }
- STACK_WIND_COOKIE (frame,
- unify_sh_ns_setdents_cbk,
- cookie,
- NS(this),
- NS(this)->fops->setdents,
- local->fd,
- GF_SET_IF_NOT_PRESENT,
- local->sh_struct->entry_list[index],
- count);
- return 0;
- }
-
- if (count < UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
- } else {
- /* count == size, that means, there are more entries
- to read from */
- local->sh_struct->offset_list[index] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
- STACK_WIND_COOKIE (frame,
- unify_sh_getdents_cbk,
- cookie,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[index],
- GF_GET_ALL);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "readdir on (%s) with offset %"PRId64"",
- priv->xl_array[index]->name,
- local->sh_struct->offset_list[index]);
- }
-
- if (!callcnt) {
- /* All storage nodes have done unified setdents on NS node.
- * Now, do getdents from NS and do setdents on storage nodes.
- */
-
- /* sh_struct->offset_list is no longer required for
- storage nodes now */
- local->sh_struct->offset_list[0] = 0; /* reset */
-
- STACK_WIND (frame,
- unify_sh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_DIR_ONLY);
- }
-
- return 0;
-}
-
-/**
- * unify_sh_opendir_cbk -
- *
- * @cookie:
- */
-int32_t
-unify_sh_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t index = 0;
- inode_t *inode = NULL;
- dict_t *tmp_dict = NULL;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- } else {
- gf_log (this->name, GF_LOG_WARNING, "failed");
- local->failed = 1;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local->call_count = priv->child_count + 1;
-
- if (!local->failed) {
- /* send getdents() namespace after finishing
- storage nodes */
- local->call_count--;
-
- fd_bind (fd);
-
- if (local->call_count) {
- /* Used as the offset index. This list keeps
- * track of offset sent to each node during
- * STACK_WIND.
- */
- local->sh_struct->offset_list =
- GF_CALLOC (priv->child_count,
- sizeof (off_t),
- gf_unify_mt_off_t);
- ERR_ABORT (local->sh_struct->offset_list);
-
- local->sh_struct->entry_list =
- GF_CALLOC (priv->child_count,
- sizeof (dir_entry_t *),
- gf_unify_mt_dir_entry_t);
- ERR_ABORT (local->sh_struct->entry_list);
-
- local->sh_struct->count_list =
- GF_CALLOC (priv->child_count,
- sizeof (int),
- gf_unify_mt_int);
- ERR_ABORT (local->sh_struct->count_list);
-
- /* Send getdents on all the fds */
- for (index = 0;
- index < priv->child_count; index++) {
- STACK_WIND_COOKIE (frame,
- unify_sh_getdents_cbk,
- (void *)(long)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_ALL);
- }
-
- /* did stack wind, so no need to unwind here */
- return 0;
- } /* (local->call_count) */
- } /* (!local->failed) */
-
- /* Opendir failed on one node. */
- inode = local->loc1.inode;
- fd_unref (local->fd);
- tmp_dict = local->dict;
-
- unify_local_wipe (local);
- /* Only 'self-heal' failed, lookup() was successful. */
- local->op_ret = 0;
-
- /* This is lookup_cbk ()'s UNWIND. */
- STACK_UNWIND (frame, local->op_ret, local->op_errno, inode,
- &local->stbuf, local->dict, &local->oldpostparent);
- if (tmp_dict)
- dict_unref (tmp_dict);
- }
-
- return 0;
-}
-
-/**
- * gf_sh_checksum_cbk -
- *
- * @frame: frame used in lookup. get a copy of it, and use that copy.
- * @this: pointer to unify xlator.
- * @inode: pointer to inode, for which the consistency check is required.
- *
- */
-int32_t
-unify_sh_checksum_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- uint8_t *file_checksum,
- uint8_t *dir_checksum)
-{
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t index = 0;
- int32_t callcnt = 0;
- inode_t *inode = NULL;
- dict_t *tmp_dict = NULL;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret >= 0) {
- if (NS(this) == (xlator_t *)cookie) {
- memcpy (local->sh_struct->ns_file_checksum,
- file_checksum, NAME_MAX);
- memcpy (local->sh_struct->ns_dir_checksum,
- dir_checksum, NAME_MAX);
- } else {
- if (local->entry_count == 0) {
- /* Initialize the dir_checksum to be
- * used for comparision with other
- * storage nodes. Should be done for
- * the first successful call *only*.
- */
- /* Using 'entry_count' as a flag */
- local->entry_count = 1;
- memcpy (local->sh_struct->dir_checksum,
- dir_checksum, NAME_MAX);
- }
-
- /* Reply from the storage nodes */
- for (index = 0;
- index < NAME_MAX; index++) {
- /* Files should be present in
- only one node */
- local->sh_struct->file_checksum[index] ^= file_checksum[index];
-
- /* directory structure should be
- same accross */
- if (local->sh_struct->dir_checksum[index] != dir_checksum[index])
- local->failed = 1;
- }
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- for (index = 0; index < NAME_MAX ; index++) {
- if (local->sh_struct->file_checksum[index] !=
- local->sh_struct->ns_file_checksum[index]) {
- local->failed = 1;
- break;
- }
- if (local->sh_struct->dir_checksum[index] !=
- local->sh_struct->ns_dir_checksum[index]) {
- local->failed = 1;
- break;
- }
- }
-
- if (local->failed) {
- /* Log it, it should be a rare event */
- gf_log (this->name, GF_LOG_WARNING,
- "Self-heal triggered on directory %s",
- local->loc1.path);
-
- /* Any self heal will be done at directory level */
- local->call_count = 0;
- local->op_ret = -1;
- local->failed = 0;
-
- local->fd = fd_create (local->loc1.inode,
- frame->root->pid);
-
- local->call_count = priv->child_count + 1;
-
- for (index = 0;
- index < (priv->child_count + 1); index++) {
- STACK_WIND_COOKIE (frame,
- unify_sh_opendir_cbk,
- priv->xl_array[index]->name,
- priv->xl_array[index],
- priv->xl_array[index]->fops->opendir,
- &local->loc1,
- local->fd);
- }
- /* opendir can be done on the directory */
- return 0;
- }
-
- /* no mismatch */
- inode = local->loc1.inode;
- tmp_dict = local->dict;
-
- unify_local_wipe (local);
-
- /* This is lookup_cbk ()'s UNWIND. */
- STACK_UNWIND (frame,
- local->op_ret,
- local->op_errno,
- inode,
- &local->stbuf,
- local->dict, &local->oldpostparent);
- if (tmp_dict)
- dict_unref (tmp_dict);
- }
-
- return 0;
-}
-
-/* Foreground self-heal part over */
-
-/* Background self-heal part */
-
-int32_t
-unify_bgsh_setdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- dir_entry_t *prev, *entry, *trav;
-
- LOCK (&frame->lock);
- {
- /* if local->call_count == 0, that means, setdents
- on storagenodes is still pending. */
- if (local->call_count)
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
-
- if (callcnt == 0) {
- if (local->sh_struct->entry_list[0]) {
- prev = entry = local->sh_struct->entry_list[0];
- trav = entry->next;
- while (trav) {
- prev->next = trav->next;
- GF_FREE (trav->name);
- if (IA_ISLNK (trav->buf.ia_type))
- GF_FREE (trav->link);
- GF_FREE (trav);
- trav = prev->next;
- }
- GF_FREE (entry);
- }
-
- if (!local->flags) {
- if (local->sh_struct->count_list[0] >=
- UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- /* count == size, that means, there are more
- entries to read from */
- //local->call_count = 0;
- local->sh_struct->offset_list[0] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
- STACK_WIND (frame,
- unify_bgsh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[0],
- GF_GET_DIR_ONLY);
- }
- } else {
- fd_unref (local->fd);
- unify_local_wipe (local);
- STACK_DESTROY (frame->root);
- }
- }
-
- return 0;
-}
-
-
-int32_t
-unify_bgsh_ns_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count)
-{
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = 0;
- unsigned long final = 0;
- dir_entry_t *tmp = GF_CALLOC (1, sizeof (dir_entry_t),
- gf_unify_mt_dir_entry_t);
-
- local->sh_struct->entry_list[0] = tmp;
- local->sh_struct->count_list[0] = count;
- if (entry) {
- tmp->next = entry->next;
- entry->next = NULL;
- }
-
- if ((count < UNIFY_SELF_HEAL_GETDENTS_COUNT) || !entry) {
- final = 1;
- }
-
- LOCK (&frame->lock);
- {
- /* local->call_count will be '0' till now. make it 1 so,
- it can be UNWIND'ed for the last call. */
- local->call_count = priv->child_count;
- if (final)
- local->flags = 1;
- }
- UNLOCK (&frame->lock);
-
- for (index = 0; index < priv->child_count; index++)
- {
- STACK_WIND_COOKIE (frame,
- unify_bgsh_setdents_cbk,
- (void *)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->setdents,
- local->fd, GF_SET_DIR_ONLY,
- local->sh_struct->entry_list[0], count);
- }
-
- return 0;
-}
-
-int32_t
-unify_bgsh_ns_setdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = (long)cookie;
- dir_entry_t *prev, *entry, *trav;
-
- if (local->sh_struct->entry_list[index]) {
- prev = entry = local->sh_struct->entry_list[index];
- if (!entry)
- return 0;
- trav = entry->next;
- while (trav) {
- prev->next = trav->next;
- GF_FREE (trav->name);
- if (IA_ISLNK (trav->buf.ia_type))
- GF_FREE (trav->link);
- GF_FREE (trav);
- trav = prev->next;
- }
- GF_FREE (entry);
- }
-
- if (local->sh_struct->count_list[index] <
- UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
- } else {
- /* count == size, that means, there are more entries
- to read from */
- local->sh_struct->offset_list[index] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
- STACK_WIND_COOKIE (frame,
- unify_bgsh_getdents_cbk,
- cookie,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[index],
- GF_GET_ALL);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "readdir on (%s) with offset %"PRId64"",
- priv->xl_array[index]->name,
- local->sh_struct->offset_list[index]);
- }
-
- if (!callcnt) {
- /* All storage nodes have done unified setdents on NS node.
- * Now, do getdents from NS and do setdents on storage nodes.
- */
-
- /* sh_struct->offset_list is no longer required for
- storage nodes now */
- local->sh_struct->offset_list[0] = 0; /* reset */
-
- STACK_WIND (frame,
- unify_bgsh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_DIR_ONLY);
- }
-
- return 0;
-}
-
-
-/**
- * unify_bgsh_getdents_cbk -
- */
-int32_t
-unify_bgsh_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = (long)cookie;
- dir_entry_t *tmp = NULL;
-
- if (op_ret >= 0 && count > 0) {
- /* There is some dentry found, just send the dentry to NS */
- tmp = GF_CALLOC (1, sizeof (dir_entry_t),
- gf_unify_mt_dir_entry_t);
- local->sh_struct->entry_list[index] = tmp;
- local->sh_struct->count_list[index] = count;
- if (entry) {
- tmp->next = entry->next;
- entry->next = NULL;
- }
- STACK_WIND_COOKIE (frame,
- unify_bgsh_ns_setdents_cbk,
- cookie,
- NS(this),
- NS(this)->fops->setdents,
- local->fd,
- GF_SET_IF_NOT_PRESENT,
- local->sh_struct->entry_list[index],
- count);
- return 0;
- }
-
- if (count < UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
- } else {
- /* count == size, that means, there are more entries to read from */
- local->sh_struct->offset_list[index] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
-
- STACK_WIND_COOKIE (frame,
- unify_bgsh_getdents_cbk,
- cookie,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[index],
- GF_GET_ALL);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "readdir on (%s) with offset %"PRId64"",
- priv->xl_array[index]->name,
- local->sh_struct->offset_list[index]);
- }
-
- if (!callcnt) {
- /* All storage nodes have done unified setdents on NS node.
- * Now, do getdents from NS and do setdents on storage nodes.
- */
-
- /* sh_struct->offset_list is no longer required for
- storage nodes now */
- local->sh_struct->offset_list[0] = 0; /* reset */
-
- STACK_WIND (frame,
- unify_bgsh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_DIR_ONLY);
- }
-
- return 0;
-}
-
-/**
- * unify_bgsh_opendir_cbk -
- *
- * @cookie:
- */
-int32_t
-unify_bgsh_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int32_t callcnt = 0;
- int16_t index = 0;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- } else {
- local->failed = 1;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local->call_count = priv->child_count + 1;
-
- if (!local->failed) {
- /* send getdents() namespace after finishing
- storage nodes */
- local->call_count--;
- callcnt = local->call_count;
-
- fd_bind (fd);
-
- if (local->call_count) {
- /* Used as the offset index. This list keeps
- track of offset sent to each node during
- STACK_WIND. */
- local->sh_struct->offset_list =
- GF_CALLOC (priv->child_count,
- sizeof (off_t),
- gf_unify_mt_off_t);
- ERR_ABORT (local->sh_struct->offset_list);
-
- local->sh_struct->entry_list =
- GF_CALLOC (priv->child_count,
- sizeof (dir_entry_t *),
- gf_unify_mt_dir_entry_t);
- ERR_ABORT (local->sh_struct->entry_list);
-
- local->sh_struct->count_list =
- GF_CALLOC (priv->child_count,
- sizeof (int),
- gf_unify_mt_int);
- ERR_ABORT (local->sh_struct->count_list);
-
- /* Send getdents on all the fds */
- for (index = 0;
- index < priv->child_count; index++) {
- STACK_WIND_COOKIE (frame,
- unify_bgsh_getdents_cbk,
- (void *)(long)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_ALL);
- }
- /* did a stack wind, so no need to unwind here */
- return 0;
- } /* (local->call_count) */
- } /* (!local->failed) */
-
- /* Opendir failed on one node. */
- fd_unref (local->fd);
-
- unify_local_wipe (local);
- STACK_DESTROY (frame->root);
- }
-
- return 0;
-}
-
-/**
- * gf_bgsh_checksum_cbk -
- *
- * @frame: frame used in lookup. get a copy of it, and use that copy.
- * @this: pointer to unify xlator.
- * @inode: pointer to inode, for which the consistency check is required.
- *
- */
-int32_t
-unify_bgsh_checksum_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- uint8_t *file_checksum,
- uint8_t *dir_checksum)
-{
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t index = 0;
- int32_t callcnt = 0;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret >= 0) {
- if (NS(this) == (xlator_t *)cookie) {
- memcpy (local->sh_struct->ns_file_checksum,
- file_checksum, NAME_MAX);
- memcpy (local->sh_struct->ns_dir_checksum,
- dir_checksum, NAME_MAX);
- } else {
- if (local->entry_count == 0) {
- /* Initialize the dir_checksum to be
- * used for comparision with other
- * storage nodes. Should be done for
- * the first successful call *only*.
- */
- /* Using 'entry_count' as a flag */
- local->entry_count = 1;
- memcpy (local->sh_struct->dir_checksum,
- dir_checksum, NAME_MAX);
- }
-
- /* Reply from the storage nodes */
- for (index = 0;
- index < NAME_MAX; index++) {
- /* Files should be present in only
- one node */
- local->sh_struct->file_checksum[index] ^= file_checksum[index];
-
- /* directory structure should be same
- accross */
- if (local->sh_struct->dir_checksum[index] != dir_checksum[index])
- local->failed = 1;
- }
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- for (index = 0; index < NAME_MAX ; index++) {
- if (local->sh_struct->file_checksum[index] !=
- local->sh_struct->ns_file_checksum[index]) {
- local->failed = 1;
- break;
- }
- if (local->sh_struct->dir_checksum[index] !=
- local->sh_struct->ns_dir_checksum[index]) {
- local->failed = 1;
- break;
- }
- }
-
- if (local->failed) {
- /* Log it, it should be a rare event */
- gf_log (this->name, GF_LOG_WARNING,
- "Self-heal triggered on directory %s",
- local->loc1.path);
-
- /* Any self heal will be done at the directory level */
- local->op_ret = -1;
- local->failed = 0;
-
- local->fd = fd_create (local->loc1.inode,
- frame->root->pid);
- local->call_count = priv->child_count + 1;
-
- for (index = 0;
- index < (priv->child_count + 1); index++) {
- STACK_WIND_COOKIE (frame,
- unify_bgsh_opendir_cbk,
- priv->xl_array[index]->name,
- priv->xl_array[index],
- priv->xl_array[index]->fops->opendir,
- &local->loc1,
- local->fd);
- }
-
- /* opendir can be done on the directory */
- return 0;
- }
-
- /* no mismatch */
- unify_local_wipe (local);
- STACK_DESTROY (frame->root);
- }
-
- return 0;
-}
-
-/* Background self-heal part over */
-
-
-
-
-/**
- * zr_unify_self_heal -
- *
- * @frame: frame used in lookup. get a copy of it, and use that copy.
- * @this: pointer to unify xlator.
- * @inode: pointer to inode, for which the consistency check is required.
- *
- */
-int32_t
-zr_unify_self_heal (call_frame_t *frame,
- xlator_t *this,
- unify_local_t *local)
-{
- unify_private_t *priv = this->private;
- call_frame_t *bg_frame = NULL;
- unify_local_t *bg_local = NULL;
- inode_t *tmp_inode = NULL;
- dict_t *tmp_dict = NULL;
- int16_t index = 0;
-
- if (local->inode_generation < priv->inode_generation) {
- /* Any self heal will be done at the directory level */
- /* Update the inode's generation to the current generation
- value. */
- local->inode_generation = priv->inode_generation;
- inode_ctx_put (local->loc1.inode, this,
- (uint64_t)(long)local->inode_generation);
-
- if (priv->self_heal == ZR_UNIFY_FG_SELF_HEAL) {
- local->op_ret = 0;
- local->failed = 0;
- local->call_count = priv->child_count + 1;
- local->sh_struct =
- GF_CALLOC (1, sizeof (struct unify_self_heal_struct),
- gf_unify_mt_unify_self_heal_struct);
-
- /* +1 is for NS */
- for (index = 0;
- index < (priv->child_count + 1); index++) {
- STACK_WIND_COOKIE (frame,
- unify_sh_checksum_cbk,
- priv->xl_array[index],
- priv->xl_array[index],
- priv->xl_array[index]->fops->checksum,
- &local->loc1,
- 0);
- }
-
- /* Self-heal in foreground, hence no need
- to UNWIND here */
- return 0;
- }
-
- /* Self Heal done in background */
- bg_frame = copy_frame (frame);
- INIT_LOCAL (bg_frame, bg_local);
- loc_copy (&bg_local->loc1, &local->loc1);
- bg_local->op_ret = 0;
- bg_local->failed = 0;
- bg_local->call_count = priv->child_count + 1;
- bg_local->sh_struct =
- GF_CALLOC (1, sizeof (struct unify_self_heal_struct),
- gf_unify_mt_unify_self_heal_struct);
-
- /* +1 is for NS */
- for (index = 0; index < (priv->child_count + 1); index++) {
- STACK_WIND_COOKIE (bg_frame,
- unify_bgsh_checksum_cbk,
- priv->xl_array[index],
- priv->xl_array[index],
- priv->xl_array[index]->fops->checksum,
- &bg_local->loc1,
- 0);
- }
- }
-
- /* generation number matches, self heal already done or
- * self heal done in background: just do STACK_UNWIND
- */
- tmp_inode = local->loc1.inode;
- tmp_dict = local->dict;
-
- unify_local_wipe (local);
-
- /* This is lookup_cbk ()'s UNWIND. */
- STACK_UNWIND (frame,
- local->op_ret,
- local->op_errno,
- tmp_inode,
- &local->stbuf,
- local->dict,
- &local->oldpostparent);
-
- if (tmp_dict)
- dict_unref (tmp_dict);
-
- return 0;
-}
-
diff --git a/xlators/cluster/unify/src/unify.c b/xlators/cluster/unify/src/unify.c
deleted file mode 100644
index e50d3274f3b..00000000000
--- a/xlators/cluster/unify/src/unify.c
+++ /dev/null
@@ -1,4589 +0,0 @@
-/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-/**
- * xlators/cluster/unify:
- * - This xlator is one of the main translator in GlusterFS, which
- * actually does the clustering work of the file system. One need to
- * understand that, unify assumes file to be existing in only one of
- * the child node, and directories to be present on all the nodes.
- *
- * NOTE:
- * Now, unify has support for global namespace, which is used to keep a
- * global view of fs's namespace tree. The stat for directories are taken
- * just from the namespace, where as for files, just 'ia_ino' is taken from
- * Namespace node, and other stat info is taken from the actual storage node.
- * Also Namespace node helps to keep consistant inode for files across
- * glusterfs (re-)mounts.
- */
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "unify.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include <signal.h>
-#include <libgen.h>
-#include "compat-errno.h"
-#include "compat.h"
-
-#define UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR(_loc) do { \
- if (!(_loc && _loc->inode)) { \
- STACK_UNWIND (frame, -1, EINVAL, NULL, NULL, NULL); \
- return 0; \
- } \
-} while(0)
-
-
-#define UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR(_fd) do { \
- if (!(_fd && !fd_ctx_get (_fd, this, NULL))) { \
- STACK_UNWIND (frame, -1, EBADFD, NULL, NULL); \
- return 0; \
- } \
-} while(0)
-
-#define UNIFY_CHECK_FD_AND_UNWIND_ON_ERR(_fd) do { \
- if (!_fd) { \
- STACK_UNWIND (frame, -1, EBADFD, NULL, NULL); \
- return 0; \
- } \
-} while(0)
-
-/**
- * unify_local_wipe - free all the extra allocation of local->* here.
- */
-static void
-unify_local_wipe (unify_local_t *local)
-{
- /* Free the strdup'd variables in the local structure */
- if (local->name) {
- GF_FREE (local->name);
- }
- loc_wipe (&local->loc1);
- loc_wipe (&local->loc2);
-}
-
-
-
-/*
- * unify_normalize_stats -
- */
-void
-unify_normalize_stats (struct statvfs *buf,
- unsigned long bsize,
- unsigned long frsize)
-{
- double factor;
-
- if (buf->f_bsize != bsize) {
- factor = ((double) buf->f_bsize) / bsize;
- buf->f_bsize = bsize;
- buf->f_bfree = (fsblkcnt_t) (factor * buf->f_bfree);
- buf->f_bavail = (fsblkcnt_t) (factor * buf->f_bavail);
- }
-
- if (buf->f_frsize != frsize) {
- factor = ((double) buf->f_frsize) / frsize;
- buf->f_frsize = frsize;
- buf->f_blocks = (fsblkcnt_t) (factor * buf->f_blocks);
- }
-}
-
-
-xlator_t *
-unify_loc_subvol (loc_t *loc, xlator_t *this)
-{
- unify_private_t *priv = NULL;
- xlator_t *subvol = NULL;
- int16_t *list = NULL;
- long index = 0;
- xlator_t *subvol_i = NULL;
- int ret = 0;
- uint64_t tmp_list = 0;
-
- priv = this->private;
- subvol = NS (this);
-
- if (!IA_ISDIR (loc->inode->ia_type)) {
- ret = inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
- if (!list)
- goto out;
-
- for (index = 0; list[index] != -1; index++) {
- subvol_i = priv->xl_array[list[index]];
- if (subvol_i != NS (this)) {
- subvol = subvol_i;
- break;
- }
- }
- }
-out:
- return subvol;
-}
-
-
-
-/**
- * unify_statfs_cbk -
- */
-int32_t
-unify_statfs_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct statvfs *stbuf)
-{
- int32_t callcnt = 0;
- struct statvfs *dict_buf = NULL;
- unsigned long bsize;
- unsigned long frsize;
- unify_local_t *local = (unify_local_t *)frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret >= 0) {
- /* when a call is successfull, add it to local->dict */
- dict_buf = &local->statvfs_buf;
-
- if (dict_buf->f_bsize != 0) {
- bsize = max (dict_buf->f_bsize,
- stbuf->f_bsize);
-
- frsize = max (dict_buf->f_frsize,
- stbuf->f_frsize);
- unify_normalize_stats(dict_buf, bsize, frsize);
- unify_normalize_stats(stbuf, bsize, frsize);
- } else {
- dict_buf->f_bsize = stbuf->f_bsize;
- dict_buf->f_frsize = stbuf->f_frsize;
- }
-
- dict_buf->f_blocks += stbuf->f_blocks;
- dict_buf->f_bfree += stbuf->f_bfree;
- dict_buf->f_bavail += stbuf->f_bavail;
- dict_buf->f_files += stbuf->f_files;
- dict_buf->f_ffree += stbuf->f_ffree;
- dict_buf->f_favail += stbuf->f_favail;
- dict_buf->f_fsid = stbuf->f_fsid;
- dict_buf->f_flag = stbuf->f_flag;
- dict_buf->f_namemax = stbuf->f_namemax;
- local->op_ret = op_ret;
- } else {
- /* fop on storage node has failed due to some error */
- if (op_errno != ENOTCONN) {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): %s",
- prev_frame->this->name,
- strerror (op_errno));
- }
- local->op_errno = op_errno;
- }
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->statvfs_buf);
- }
-
- return 0;
-}
-
-/**
- * unify_statfs -
- */
-int32_t
-unify_statfs (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- unify_local_t *local = NULL;
- xlator_list_t *trav = this->children;
-
- INIT_LOCAL (frame, local);
- local->call_count = ((unify_private_t *)this->private)->child_count;
-
- while(trav) {
- STACK_WIND (frame,
- unify_statfs_cbk,
- trav->xlator,
- trav->xlator->fops->statfs,
- loc);
- trav = trav->next;
- }
-
- return 0;
-}
-
-/**
- * unify_buf_cbk -
- */
-int32_t
-unify_buf_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s(): child(%s): path(%s): %s",
- gf_fop_list[frame->root->op],
- prev_frame->this->name,
- (local->loc1.path)?local->loc1.path:"",
- strerror (op_errno));
-
- local->op_errno = op_errno;
- if ((op_errno == ENOENT) && priv->optimist)
- local->op_ret = 0;
- }
-
- if (op_ret >= 0) {
- local->op_ret = 0;
-
- if (NS (this) == prev_frame->this) {
- local->ia_ino = buf->ia_ino;
- /* If the entry is directory, get the stat
- from NS node */
- if (IA_ISDIR (buf->ia_type) ||
- !local->stbuf.ia_blksize) {
- local->stbuf = *buf;
- }
- }
-
- if ((!IA_ISDIR (buf->ia_type)) &&
- (NS (this) != prev_frame->this)) {
- /* If file, take the stat info from Storage
- node. */
- local->stbuf = *buf;
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- /* If the inode number is not filled, operation should
- fail */
- if (!local->ia_ino)
- local->op_ret = -1;
-
- local->stbuf.ia_ino = local->ia_ino;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stbuf);
- }
-
- return 0;
-}
-
-#define check_if_dht_linkfile(s) \
- ((st_mode_from_ia (s->ia_prot, s->ia_type) & ~S_IFMT) == S_ISVTX)
-
-/**
- * unify_lookup_cbk -
- */
-int32_t
-unify_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *dict,
- struct iatt *postparent)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- inode_t *tmp_inode = NULL;
- dict_t *local_dict = NULL;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- if (local->revalidate &&
- (op_errno == ESTALE)) {
- /* ESTALE takes priority */
- local->op_errno = op_errno;
- local->failed = 1;
- }
-
- if ((op_errno != ENOTCONN)
- && (op_errno != ENOENT)
- && (local->op_errno != ESTALE)) {
- /* if local->op_errno is already ESTALE, then
- * ESTALE has to propogated to the parent first.
- * do not enter here.
- */
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- priv->xl_array[(long)cookie]->name,
- local->loc1.path, strerror (op_errno));
- local->op_errno = op_errno;
- local->failed = 1;
-
- } else if (local->revalidate &&
- (local->op_errno != ESTALE) &&
- !(priv->optimist && (op_errno == ENOENT))) {
-
- gf_log (this->name,
- (op_errno == ENOTCONN) ?
- GF_LOG_DEBUG:GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- priv->xl_array[(long)cookie]->name,
- local->loc1.path, strerror (op_errno));
- local->op_errno = op_errno;
- local->failed = 1;
- }
- }
-
- if (op_ret == 0) {
- local->op_ret = 0;
-
- if (check_if_dht_linkfile(buf)) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "file %s may be DHT link file on %s, "
- "make sure the backend is not shared "
- "between unify and DHT",
- local->loc1.path,
- priv->xl_array[(long)cookie]->name);
- }
-
- if (local->stbuf.ia_type && local->stbuf.ia_blksize) {
- /* make sure we already have a stbuf
- stored in local->stbuf */
- if (IA_ISDIR (local->stbuf.ia_type) &&
- !IA_ISDIR (buf->ia_type)) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "[CRITICAL] '%s' is directory "
- "on namespace, non-directory "
- "on node '%s', returning EIO",
- local->loc1.path,
- priv->xl_array[(long)cookie]->name);
- local->return_eio = 1;
- }
- if (!IA_ISDIR (local->stbuf.ia_type) &&
- IA_ISDIR (buf->ia_type)) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "[CRITICAL] '%s' is directory "
- "on node '%s', non-directory "
- "on namespace, returning EIO",
- local->loc1.path,
- priv->xl_array[(long)cookie]->name);
- local->return_eio = 1;
- }
- }
-
- if (!local->revalidate && !IA_ISDIR (buf->ia_type)) {
- /* This is the first time lookup on file*/
- if (!local->list) {
- /* list is not allocated, allocate
- the max possible range */
- local->list = GF_CALLOC (1, 2 * (priv->child_count + 2),
- gf_unify_mt_int16_t);
- if (!local->list) {
- gf_log (this->name,
- GF_LOG_CRITICAL,
- "Not enough memory");
- STACK_UNWIND (frame, -1,
- ENOMEM, inode,
- NULL, NULL, NULL);
- return 0;
- }
- }
- /* update the index of the list */
- local->list [local->index++] =
- (int16_t)(long)cookie;
- }
-
- if (!local->revalidate && IA_ISDIR (buf->ia_type)) {
- /* fresh lookup of a directory */
- inode_ctx_put (local->loc1.inode, this,
- priv->inode_generation);
- }
-
- if ((!local->dict) && dict &&
- (priv->xl_array[(long)cookie] != NS(this))) {
- local->dict = dict_ref (dict);
- }
-
- /* index of NS node is == total child count */
- if (priv->child_count == (int16_t)(long)cookie) {
- /* Take the inode number from namespace */
- local->ia_ino = buf->ia_ino;
- if (IA_ISDIR (buf->ia_type) ||
- !(local->stbuf.ia_blksize)) {
- local->stbuf = *buf;
- local->oldpostparent = *postparent;
- }
- } else if (!IA_ISDIR (buf->ia_type)) {
- /* If file, then get the stat from
- storage node */
- local->stbuf = *buf;
- }
-
- if (local->ia_nlink < buf->ia_nlink) {
- local->ia_nlink = buf->ia_nlink;
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local_dict = local->dict;
- if (local->return_eio) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "[CRITICAL] Unable to fix the path (%s) with "
- "self-heal, try manual verification. "
- "returning EIO.", local->loc1.path);
- unify_local_wipe (local);
- STACK_UNWIND (frame, -1, EIO, inode, NULL, NULL);
- if (local_dict) {
- dict_unref (local_dict);
- }
- return 0;
- }
-
- if (!local->stbuf.ia_blksize) {
- /* Inode not present */
- local->op_ret = -1;
- } else {
- if (!local->revalidate &&
- !IA_ISDIR (local->stbuf.ia_type)) {
- /* If its a file, big array is useless,
- allocate the smaller one */
- int16_t *list = NULL;
- list = GF_CALLOC (1, 2 * (local->index + 1),
- gf_unify_mt_int16_t);
- ERR_ABORT (list);
- memcpy (list, local->list, 2 * local->index);
- /* Make the end of the list as -1 */
- GF_FREE (local->list);
- local->list = list;
- local->list [local->index] = -1;
- /* Update the inode's ctx with proper array */
- /* TODO: log on failure */
- inode_ctx_put (local->loc1.inode, this,
- (uint64_t)(long)local->list);
- }
-
- if (IA_ISDIR(local->loc1.inode->ia_type)) {
- /* lookup is done for directory */
- if (local->failed && priv->self_heal) {
- /* Triggering self-heal */
- /* means, self-heal required for this
- inode */
- local->inode_generation = 0;
- priv->inode_generation++;
- }
- } else {
- local->stbuf.ia_ino = local->ia_ino;
- }
-
- local->stbuf.ia_nlink = local->ia_nlink;
- }
- if (local->op_ret == -1) {
- if (!local->revalidate && local->list)
- GF_FREE (local->list);
- }
-
- if ((local->op_ret >= 0) && local->failed &&
- local->revalidate) {
- /* Done revalidate, but it failed */
- if ((op_errno != ENOTCONN)
- && (local->op_errno != ESTALE)) {
- gf_log (this->name, GF_LOG_ERROR,
- "Revalidate failed for path(%s): %s",
- local->loc1.path, strerror (op_errno));
- }
- local->op_ret = -1;
- }
-
- if ((priv->self_heal && !priv->optimist) &&
- (!local->revalidate && (local->op_ret == 0) &&
- IA_ISDIR(local->stbuf.ia_type))) {
- /* Let the self heal be done here */
- zr_unify_self_heal (frame, this, local);
- local_dict = NULL;
- } else {
- if (local->failed) {
- /* NOTE: directory lookup is sent to all
- * subvolumes and success from a subvolume
- * might set local->op_ret to 0 (zero) */
- local->op_ret = -1;
- }
-
- /* either no self heal, or op_ret == -1 (failure) */
- tmp_inode = local->loc1.inode;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- tmp_inode, &local->stbuf, local->dict,
- &local->oldpostparent);
- }
- if (local_dict) {
- dict_unref (local_dict);
- }
- }
-
- return 0;
-}
-
-/**
- * unify_lookup -
- */
-int32_t
-unify_lookup (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *xattr_req)
-{
- unify_local_t *local = NULL;
- unify_private_t *priv = this->private;
- int16_t *list = NULL;
- long index = 0;
-
- if (!(loc && loc->inode)) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: Argument not right", loc?loc->path:"(null)");
- STACK_UNWIND (frame, -1, EINVAL, NULL, NULL, NULL, NULL);
- return 0;
- }
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, loc->inode, NULL, NULL, NULL);
- return 0;
- }
-
- if (inode_ctx_get (loc->inode, this, NULL)
- && IA_ISDIR (loc->inode->ia_type)) {
- local->revalidate = 1;
- }
-
- if (!inode_ctx_get (loc->inode, this, NULL) &&
- loc->inode->ia_type &&
- !IA_ISDIR (loc->inode->ia_type)) {
- uint64_t tmp_list = 0;
- /* check if revalidate or fresh lookup */
- inode_ctx_get (loc->inode, this, &tmp_list);
- local->list = (int16_t *)(long)tmp_list;
- }
-
- if (local->list) {
- list = local->list;
- for (index = 0; list[index] != -1; index++);
- if (index != 2) {
- if (index < 2) {
- gf_log (this->name, GF_LOG_ERROR,
- "returning ESTALE for %s: file "
- "count is %ld", loc->path, index);
- /* Print where all the file is present */
- for (index = 0;
- local->list[index] != -1; index++) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: found on %s", loc->path,
- priv->xl_array[list[index]]->name);
- }
- unify_local_wipe (local);
- STACK_UNWIND (frame, -1, ESTALE,
- NULL, NULL, NULL, NULL);
- return 0;
- } else {
- /* There are more than 2 presences */
- /* Just log and continue */
- gf_log (this->name, GF_LOG_ERROR,
- "%s: file count is %ld",
- loc->path, index);
- /* Print where all the file is present */
- for (index = 0;
- local->list[index] != -1; index++) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: found on %s", loc->path,
- priv->xl_array[list[index]]->name);
- }
- }
- }
-
- /* is revalidate */
- local->revalidate = 1;
-
- for (index = 0; list[index] != -1; index++)
- local->call_count++;
-
- for (index = 0; list[index] != -1; index++) {
- char need_break = (list[index+1] == -1);
- STACK_WIND_COOKIE (frame,
- unify_lookup_cbk,
- (void *)(long)list[index], //cookie
- priv->xl_array [list[index]],
- priv->xl_array [list[index]]->fops->lookup,
- loc,
- xattr_req);
- if (need_break)
- break;
- }
- } else {
- if (loc->inode->ia_type) {
- if (inode_ctx_get (loc->inode, this, NULL)) {
- inode_ctx_get (loc->inode, this,
- &local->inode_generation);
- }
- }
- /* This is first call, there is no list */
- /* call count should be all child + 1 namespace */
- local->call_count = priv->child_count + 1;
-
- for (index = 0; index <= priv->child_count; index++) {
- STACK_WIND_COOKIE (frame,
- unify_lookup_cbk,
- (void *)index, //cookie
- priv->xl_array[index],
- priv->xl_array[index]->fops->lookup,
- loc,
- xattr_req);
- }
- }
-
- return 0;
-}
-
-/**
- * unify_stat - if directory, get the stat directly from NameSpace child.
- * if file, check for a hint and send it only there (also to NS).
- * if its a fresh stat, then do it on all the nodes.
- *
- * NOTE: for all the call, sending cookie as xlator pointer, which will be
- * used in cbk.
- */
-int32_t
-unify_stat (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- unify_local_t *local = NULL;
- unify_private_t *priv = this->private;
- int16_t index = 0;
- int16_t *list = NULL;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
- return 0;
- }
- local->ia_ino = loc->inode->ino;
- if (IA_ISDIR (loc->inode->ia_type)) {
- /* Directory */
- local->call_count = 1;
- STACK_WIND (frame, unify_buf_cbk, NS(this),
- NS(this)->fops->stat, loc);
- } else {
- /* File */
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++)
- local->call_count++;
-
- for (index = 0; list[index] != -1; index++) {
- char need_break = (list[index+1] == -1);
- STACK_WIND (frame,
- unify_buf_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->stat,
- loc);
- if (need_break)
- break;
- }
- }
-
- return 0;
-}
-
-/**
- * unify_access_cbk -
- */
-int32_t
-unify_access_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-
-/**
- * unify_access - Send request to only namespace, which has all the
- * attributes set for the file.
- */
-int32_t
-unify_access (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t mask)
-{
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- STACK_WIND (frame,
- unify_access_cbk,
- NS(this),
- NS(this)->fops->access,
- loc,
- mask);
-
- return 0;
-}
-
-int32_t
-unify_mkdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- inode_t *tmp_inode = NULL;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if ((op_ret == -1) && !(priv->optimist &&
- (op_errno == ENOENT ||
- op_errno == EEXIST))) {
- /* TODO: Decrement the inode_generation of
- * this->inode's parent inode, hence the missing
- * directory is created properly by self-heal.
- * Currently, there is no way to get the parent
- * inode directly.
- */
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- priv->xl_array[(long)cookie]->name,
- local->loc1.path, strerror (op_errno));
- if (op_errno != EEXIST)
- local->failed = 1;
- local->op_errno = op_errno;
- }
-
- if (op_ret >= 0)
- local->op_ret = 0;
-
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (!local->failed) {
- inode_ctx_put (local->loc1.inode, this,
- priv->inode_generation);
- }
-
- tmp_inode = local->loc1.inode;
- unify_local_wipe (local);
-
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- tmp_inode, &local->stbuf,
- &local->oldpreparent, &local->oldpostparent);
- }
-
- return 0;
-}
-
-/**
- * unify_ns_mkdir_cbk -
- */
-int32_t
-unify_ns_mkdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- long index = 0;
-
- if (op_ret == -1) {
- /* No need to send mkdir request to other servers,
- * as namespace action failed
- */
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s): %s",
- local->name, strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, NULL,
- NULL, NULL);
- return 0;
- }
-
- /* Create one inode for this entry */
- local->op_ret = 0;
- local->stbuf = *buf;
-
- local->oldpreparent = *preparent;
- local->oldpostparent = *postparent;
-
- local->call_count = priv->child_count;
-
- /* Send mkdir request to all the nodes now */
- for (index = 0; index < priv->child_count; index++) {
- STACK_WIND_COOKIE (frame,
- unify_mkdir_cbk,
- (void *)index, //cookie
- priv->xl_array[index],
- priv->xl_array[index]->fops->mkdir,
- &local->loc1,
- local->mode);
- }
-
- return 0;
-}
-
-
-/**
- * unify_mkdir -
- */
-int32_t
-unify_mkdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode)
-{
- unify_local_t *local = NULL;
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- local->mode = mode;
-
- loc_copy (&local->loc1, loc);
-
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, NULL, NULL);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_ns_mkdir_cbk,
- NS(this),
- NS(this)->fops->mkdir,
- loc,
- mode);
- return 0;
-}
-
-/**
- * unify_rmdir_cbk -
- */
-int32_t
-unify_rmdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret == 0 || (priv->optimist && (op_errno == ENOENT)))
- local->op_ret = 0;
- if (op_ret == -1)
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->oldpreparent, &local->oldpostparent);
- }
-
- return 0;
-}
-
-/**
- * unify_ns_rmdir_cbk -
- */
-int32_t
-unify_ns_rmdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- int16_t index = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- /* No need to send rmdir request to other servers,
- * as namespace action failed
- */
- gf_log (this->name,
- ((op_errno != ENOTEMPTY) ?
- GF_LOG_ERROR : GF_LOG_DEBUG),
- "namespace: path(%s): %s",
- local->loc1.path, strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, NULL, NULL);
- return 0;
- }
-
- local->call_count = priv->child_count;
-
- local->oldpreparent = *preparent;
- local->oldpostparent = *postparent;
-
- for (index = 0; index < priv->child_count; index++) {
- STACK_WIND (frame,
- unify_rmdir_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->rmdir,
- &local->loc1);
- }
-
- return 0;
-}
-
-/**
- * unify_rmdir -
- */
-int32_t
-unify_rmdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- unify_local_t *local = NULL;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
-
- loc_copy (&local->loc1, loc);
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, NULL, NULL);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_ns_rmdir_cbk,
- NS(this),
- NS(this)->fops->rmdir,
- loc);
-
- return 0;
-}
-
-/**
- * unify_open_cbk -
- */
-int32_t
-unify_open_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- if (NS(this) != (xlator_t *)cookie) {
- /* Store child node's ptr, used in
- all the f*** / FileIO calls */
- fd_ctx_set (fd, this, (uint64_t)(long)cookie);
- }
- }
- if (op_ret == -1) {
- local->op_errno = op_errno;
- local->failed = 1;
- }
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if ((local->failed == 1) && (local->op_ret >= 0)) {
- local->call_count = 1;
- /* return -1 to user */
- local->op_ret = -1;
- //local->op_errno = EIO;
-
- if (!fd_ctx_get (local->fd, this, NULL)) {
- gf_log (this->name, GF_LOG_ERROR,
- "Open success on child node, "
- "failed on namespace");
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "Open success on namespace, "
- "failed on child node");
- }
- }
-
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret,
- local->op_errno, local->fd);
- }
-
- return 0;
-}
-
-#ifdef GF_DARWIN_HOST_OS
-/**
- * unify_create_lookup_cbk -
- */
-int32_t
-unify_open_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *dict,
- struct iatt *postparent)
-{
- int32_t callcnt = 0;
- int16_t index = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if ((op_ret == -1) && (op_errno != ENOENT)) {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- priv->xl_array[(long)cookie]->name,
- local->loc1.path, strerror (op_errno));
- local->op_errno = op_errno;
- }
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- local->index++;
- if (NS(this) == priv->xl_array[(long)cookie]) {
- local->list[0] = (int16_t)(long)cookie;
- } else {
- local->list[1] = (int16_t)(long)cookie;
- }
- if (IA_ISDIR (buf->ia_type))
- local->failed = 1;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- int16_t file_list[3] = {0,};
- local->op_ret = -1;
-
- file_list[0] = local->list[0];
- file_list[1] = local->list[1];
- file_list[2] = -1;
-
- if (local->index != 2) {
- /* Lookup failed, can't do open */
- gf_log (this->name, GF_LOG_ERROR,
- "%s: present on %d nodes",
- local->name, local->index);
-
- if (local->index < 2) {
- unify_local_wipe (local);
- gf_log (this->name, GF_LOG_ERROR,
- "returning as file found on less "
- "than 2 nodes");
- STACK_UNWIND (frame, local->op_ret,
- local->op_errno, local->fd);
- return 0;
- }
- }
-
- if (local->failed) {
- /* Open on directory, return EISDIR */
- unify_local_wipe (local);
- STACK_UNWIND (frame, -1, EISDIR, local->fd);
- return 0;
- }
-
- /* Everything is perfect :) */
- local->call_count = 2;
-
- for (index = 0; file_list[index] != -1; index++) {
- char need_break = (file_list[index+1] == -1);
- STACK_WIND_COOKIE (frame,
- unify_open_cbk,
- priv->xl_array[file_list[index]],
- priv->xl_array[file_list[index]],
- priv->xl_array[file_list[index]]->fops->open,
- &local->loc1,
- local->flags,
- local->fd, local->wbflags);
- if (need_break)
- break;
- }
- }
-
- return 0;
-}
-
-
-int32_t
-unify_open_readlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- const char *path,
- struct iatt *sbuf)
-{
- int16_t index = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- STACK_UNWIND (frame, -1, ENOENT);
- return 0;
- }
-
- if (path[0] == '/') {
- local->name = gf_strdup (path);
- ERR_ABORT (local->name);
- } else {
- char *tmp_str = gf_strdup (local->loc1.path);
- char *tmp_base = dirname (tmp_str);
- local->name = GF_CALLOC (1, ZR_PATH_MAX, gf_unify_mt_char);
- strcpy (local->name, tmp_base);
- strncat (local->name, "/", 1);
- strcat (local->name, path);
- GF_FREE (tmp_str);
- }
-
- local->list = GF_CALLOC (1, sizeof (int16_t) * 3,
- gf_unify_mt_int16_t);
- ERR_ABORT (local->list);
- local->call_count = priv->child_count + 1;
- local->op_ret = -1;
- for (index = 0; index <= priv->child_count; index++) {
- /* Send the lookup to all the nodes including namespace */
- STACK_WIND_COOKIE (frame,
- unify_open_lookup_cbk,
- (void *)(long)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->lookup,
- &local->loc1,
- NULL);
- }
-
- return 0;
-}
-#endif /* GF_DARWIN_HOST_OS */
-
-/**
- * unify_open -
- */
-int32_t
-unify_open (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags,
- fd_t *fd,
- int32_t wbflags)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = NULL;
- int16_t *list = NULL;
- int16_t index = 0;
- int16_t file_list[3] = {0,};
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Init */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
- local->fd = fd;
- local->flags = flags;
- local->wbflags = wbflags;
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- local->list = list;
- file_list[0] = priv->child_count; /* Thats namespace */
- file_list[2] = -1;
- for (index = 0; list[index] != -1; index++) {
- local->call_count++;
- if (list[index] != priv->child_count)
- file_list[1] = list[index];
- }
-
- if (local->call_count != 2) {
- /* If the lookup was done for file */
- gf_log (this->name, GF_LOG_ERROR,
- "%s: entry_count is %d",
- loc->path, local->call_count);
- for (index = 0; local->list[index] != -1; index++)
- gf_log (this->name, GF_LOG_ERROR, "%s: found on %s",
- loc->path, priv->xl_array[list[index]]->name);
-
- if (local->call_count < 2) {
- gf_log (this->name, GF_LOG_ERROR,
- "returning EIO as file found on onlyone node");
- STACK_UNWIND (frame, -1, EIO, fd);
- return 0;
- }
- }
-
-#ifdef GF_DARWIN_HOST_OS
- /* Handle symlink here */
- if (IA_ISLNK (loc->inode->ia_type)) {
- /* Callcount doesn't matter here */
- STACK_WIND (frame,
- unify_open_readlink_cbk,
- NS(this),
- NS(this)->fops->readlink,
- loc, ZR_PATH_MAX);
- return 0;
- }
-#endif /* GF_DARWIN_HOST_OS */
-
- local->call_count = 2;
- for (index = 0; file_list[index] != -1; index++) {
- char need_break = (file_list[index+1] == -1);
- STACK_WIND_COOKIE (frame,
- unify_open_cbk,
- priv->xl_array[file_list[index]], //cookie
- priv->xl_array[file_list[index]],
- priv->xl_array[file_list[index]]->fops->open,
- loc,
- flags,
- fd, wbflags);
- if (need_break)
- break;
- }
-
- return 0;
-}
-
-
-int32_t
-unify_create_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- unify_local_t *local = frame->local;
- inode_t *inode = local->loc1.inode;
-
- unify_local_wipe (local);
-
- STACK_UNWIND (frame, local->op_ret, local->op_errno, local->fd,
- inode, &local->stbuf,
- &local->oldpreparent, &local->oldpostparent);
-
- return 0;
-}
-
-/**
- * unify_create_open_cbk -
- */
-int32_t
-unify_create_open_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- int ret = 0;
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
- inode_t *inode = NULL;
- xlator_t *child = NULL;
- uint64_t tmp_value = 0;
-
- LOCK (&frame->lock);
- {
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- if (NS(this) != (xlator_t *)cookie) {
- /* Store child node's ptr, used in all
- the f*** / FileIO calls */
- /* TODO: log on failure */
- ret = fd_ctx_get (fd, this, &tmp_value);
- cookie = (void *)(long)tmp_value;
- } else {
- /* NOTE: open successful on namespace.
- * fd's ctx can be used to identify open
- * failure on storage subvolume. cool
- * ide ;) */
- local->failed = 0;
- }
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- ((xlator_t *)cookie)->name,
- local->loc1.path, strerror (op_errno));
- local->op_errno = op_errno;
- local->failed = 1;
- }
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->failed == 1 && (local->op_ret >= 0)) {
- local->call_count = 1;
- /* return -1 to user */
- local->op_ret = -1;
- local->op_errno = EIO;
- local->fd = fd;
- local->call_count = 1;
-
- if (!fd_ctx_get (local->fd, this, &tmp_value)) {
- child = (xlator_t *)(long)tmp_value;
-
- gf_log (this->name, GF_LOG_ERROR,
- "Create success on child node, "
- "failed on namespace");
-
- STACK_WIND (frame,
- unify_create_unlink_cbk,
- child,
- child->fops->unlink,
- &local->loc1);
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "Create success on namespace, "
- "failed on child node");
-
- STACK_WIND (frame,
- unify_create_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
- }
- return 0;
- }
- inode = local->loc1.inode;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno, fd,
- inode, &local->stbuf,
- &local->oldpreparent, &local->oldpostparent);
- }
- return 0;
-}
-
-/**
- * unify_create_lookup_cbk -
- */
-int32_t
-unify_create_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *dict,
- struct iatt *postparent)
-{
- int32_t callcnt = 0;
- int16_t index = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- priv->xl_array[(long)cookie]->name,
- local->loc1.path, strerror (op_errno));
- local->op_errno = op_errno;
- local->failed = 1;
- }
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- local->list[local->index++] = (int16_t)(long)cookie;
- if (NS(this) == priv->xl_array[(long)cookie]) {
- local->ia_ino = buf->ia_ino;
- } else {
- local->stbuf = *buf;
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- int16_t *list = local->list;
- int16_t file_list[3] = {0,};
- local->op_ret = -1;
-
- local->list [local->index] = -1;
- file_list[0] = list[0];
- file_list[1] = list[1];
- file_list[2] = -1;
-
- local->stbuf.ia_ino = local->ia_ino;
- /* TODO: log on failure */
- inode_ctx_put (local->loc1.inode, this,
- (uint64_t)(long)local->list);
-
- if (local->index != 2) {
- /* Lookup failed, can't do open */
- gf_log (this->name, GF_LOG_ERROR,
- "%s: present on %d nodes",
- local->loc1.path, local->index);
- file_list[0] = priv->child_count;
- for (index = 0; list[index] != -1; index++) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: found on %s", local->loc1.path,
- priv->xl_array[list[index]]->name);
- if (list[index] != priv->child_count)
- file_list[1] = list[index];
- }
-
- if (local->index < 2) {
- unify_local_wipe (local);
- gf_log (this->name, GF_LOG_ERROR,
- "returning EIO as file found on "
- "only one node");
- STACK_UNWIND (frame, -1, EIO,
- local->fd, inode, NULL,
- NULL, NULL);
- return 0;
- }
- }
- /* Everything is perfect :) */
- local->call_count = 2;
-
- for (index = 0; file_list[index] != -1; index++) {
- char need_break = (file_list[index+1] == -1);
- STACK_WIND_COOKIE (frame,
- unify_create_open_cbk,
- priv->xl_array[file_list[index]],
- priv->xl_array[file_list[index]],
- priv->xl_array[file_list[index]]->fops->open,
- &local->loc1,
- local->flags,
- local->fd, 0);
- if (need_break)
- break;
- }
- }
-
- return 0;
-}
-
-
-/**
- * unify_create_cbk -
- */
-int32_t
-unify_create_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- int ret = 0;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
- inode_t *tmp_inode = NULL;
-
- if (op_ret == -1) {
- /* send unlink () on Namespace */
- local->op_errno = op_errno;
- local->op_ret = -1;
- local->call_count = 1;
- gf_log (this->name, GF_LOG_ERROR,
- "create failed on %s (file %s, error %s), "
- "sending unlink to namespace",
- prev_frame->this->name,
- local->loc1.path, strerror (op_errno));
-
- STACK_WIND (frame,
- unify_create_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
-
- return 0;
- }
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- local->stbuf = *buf;
- /* Just inode number should be from NS node */
- local->stbuf.ia_ino = local->ia_ino;
-
- /* TODO: log on failure */
- ret = fd_ctx_set (fd, this, (uint64_t)(long)prev_frame->this);
- }
-
- tmp_inode = local->loc1.inode;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno, local->fd,
- tmp_inode, &local->stbuf,
- &local->oldpreparent, &local->oldpostparent);
-
- return 0;
-}
-
-/**
- * unify_ns_create_cbk -
- *
- */
-int32_t
-unify_ns_create_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- struct sched_ops *sched_ops = NULL;
- xlator_t *sched_xl = NULL;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t *list = NULL;
- int16_t index = 0;
-
- if (op_ret == -1) {
- /* No need to send create request to other servers, as
- namespace action failed. Handle exclusive create here. */
- if ((op_errno != EEXIST) ||
- ((op_errno == EEXIST) &&
- ((local->flags & O_EXCL) == O_EXCL))) {
- /* If its just a create call without O_EXCL,
- don't do this */
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s): %s",
- local->loc1.path, strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent);
- return 0;
- }
- }
-
- if (op_ret >= 0) {
- /* Get the inode number from the NS node */
- local->ia_ino = buf->ia_ino;
-
- local->oldpreparent = *preparent;
- local->oldpostparent = *postparent;
-
- local->op_ret = -1;
-
- /* Start the mapping list */
- list = GF_CALLOC (1, sizeof (int16_t) * 3,
- gf_unify_mt_int16_t);
- ERR_ABORT (list);
- inode_ctx_put (inode, this, (uint64_t)(long)list);
- list[0] = priv->child_count;
- list[2] = -1;
-
- /* This means, file doesn't exist anywhere in the Filesystem */
- sched_ops = priv->sched_ops;
-
- /* Send create request to the scheduled node now */
- sched_xl = sched_ops->schedule (this, local->loc1.path);
- if (sched_xl == NULL)
- {
- /* send unlink () on Namespace */
- local->op_errno = ENOTCONN;
- local->op_ret = -1;
- local->call_count = 1;
- gf_log (this->name, GF_LOG_ERROR,
- "no node online to schedule create:(file %s) "
- "sending unlink to namespace",
- (local->loc1.path)?local->loc1.path:"");
-
- STACK_WIND (frame,
- unify_create_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
-
- return 0;
- }
-
- for (index = 0; index < priv->child_count; index++)
- if (sched_xl == priv->xl_array[index])
- break;
- list[1] = index;
-
- STACK_WIND (frame, unify_create_cbk,
- sched_xl, sched_xl->fops->create,
- &local->loc1, local->flags, local->mode, fd);
- } else {
- /* File already exists, and there is no O_EXCL flag */
-
- gf_log (this->name, GF_LOG_DEBUG,
- "File(%s) already exists on namespace, sending "
- "open instead", local->loc1.path);
-
- local->list = GF_CALLOC (1, sizeof (int16_t) * 3,
- gf_unify_mt_int16_t);
- ERR_ABORT (local->list);
- local->call_count = priv->child_count + 1;
- local->op_ret = -1;
- for (index = 0; index <= priv->child_count; index++) {
- /* Send lookup() to all nodes including namespace */
- STACK_WIND_COOKIE (frame,
- unify_create_lookup_cbk,
- (void *)(long)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->lookup,
- &local->loc1,
- NULL);
- }
- }
- return 0;
-}
-
-/**
- * unify_create - create a file in global namespace first, so other
- * clients can see them. Create the file in storage nodes in background.
- */
-int32_t
-unify_create (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags,
- mode_t mode,
- fd_t *fd)
-{
- unify_local_t *local = NULL;
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- local->mode = mode;
- local->flags = flags;
- local->fd = fd;
-
- loc_copy (&local->loc1, loc);
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, fd, loc->inode, NULL,
- NULL, NULL);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_ns_create_cbk,
- NS(this),
- NS(this)->fops->create,
- loc,
- flags | O_EXCL,
- mode,
- fd);
-
- return 0;
-}
-
-
-/**
- * unify_opendir_cbk -
- */
-int32_t
-unify_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- STACK_UNWIND (frame, op_ret, op_errno, fd);
-
- return 0;
-}
-
-/**
- * unify_opendir -
- */
-int32_t
-unify_opendir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- fd_t *fd)
-{
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- STACK_WIND (frame, unify_opendir_cbk,
- NS(this), NS(this)->fops->opendir, loc, fd);
-
- return 0;
-}
-
-
-int32_t
-unify_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *statpre,
- struct iatt *statpost)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s(): child(%s): path(%s): %s",
- gf_fop_list[frame->root->op],
- prev_frame->this->name,
- (local->loc1.path)?local->loc1.path:"",
- strerror (op_errno));
-
- local->op_errno = op_errno;
- if ((op_errno == ENOENT) && priv->optimist)
- local->op_ret = 0;
- }
-
- if (op_ret >= 0) {
- local->op_ret = 0;
-
- if (NS (this) == prev_frame->this) {
- local->ia_ino = statpost->ia_ino;
- /* If the entry is directory, get the stat
- from NS node */
- if (IA_ISDIR (statpost->ia_type) ||
- !local->stpost.ia_blksize) {
- local->stpre = *statpre;
- local->stpost = *statpost;
- }
- }
-
- if ((!IA_ISDIR (statpost->ia_type)) &&
- (NS (this) != prev_frame->this)) {
- /* If file, take the stat info from Storage
- node. */
- local->stpre = *statpre;
- local->stpost = *statpost;
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- /* If the inode number is not filled, operation should
- fail */
- if (!local->ia_ino)
- local->op_ret = -1;
-
- local->stpre.ia_ino = local->ia_ino;
- local->stpost.ia_ino = local->ia_ino;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stpre, &local->stpost);
- }
-
- return 0;
-}
-
-
-int32_t
-unify_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid)
-{
- unify_local_t *local = NULL;
- unify_private_t *priv = this->private;
- int32_t index = 0;
- int32_t callcnt = 0;
- uint64_t tmp_list = 0;
-
- if (!(loc && loc->inode)) {
- STACK_UNWIND (frame, -1, EINVAL, NULL, NULL);
- return 0;
- }
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
-
- if (IA_ISDIR (loc->inode->ia_type)) {
- local->call_count = 1;
-
- STACK_WIND (frame,
- unify_setattr_cbk,
- NS (this),
- NS (this)->fops->setattr,
- loc, stbuf, valid);
- } else {
- inode_ctx_get (loc->inode, this, &tmp_list);
- local->list = (int16_t *)(long)tmp_list;
-
- for (index = 0; local->list[index] != -1; index++) {
- local->call_count++;
- callcnt++;
- }
-
- for (index = 0; local->list[index] != -1; index++) {
- STACK_WIND (frame,
- unify_setattr_cbk,
- priv->xl_array[local->list[index]],
- priv->xl_array[local->list[index]]->fops->setattr,
- loc, stbuf, valid);
-
- if (!--callcnt)
- break;
- }
- }
-
- return 0;
-}
-
-
-int32_t
-unify_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid)
-{
- unify_local_t *local = NULL;
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR(fd);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
-
- if (!fd_ctx_get (fd, this, &tmp_child)) {
- /* If its set, then its file */
- child = (xlator_t *)(long)tmp_child;
-
- local->call_count = 2;
-
- STACK_WIND (frame, unify_setattr_cbk, child,
- child->fops->fsetattr, fd, stbuf, valid);
-
- STACK_WIND (frame, unify_setattr_cbk, NS(this),
- NS(this)->fops->fsetattr, fd, stbuf, valid);
- } else {
- local->call_count = 1;
-
- STACK_WIND (frame, unify_setattr_cbk,
- NS(this), NS(this)->fops->fsetattr,
- fd, stbuf, valid);
- }
-
- return 0;
-}
-
-
-/**
- * unify_truncate_cbk -
- */
-int32_t
-unify_truncate_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- prev_frame->this->name,
- (local->loc1.path)?local->loc1.path:"",
- strerror (op_errno));
- local->op_errno = op_errno;
- if (!((op_errno == ENOENT) && priv->optimist))
- local->op_ret = -1;
- }
-
- if (op_ret >= 0) {
- if (NS (this) == prev_frame->this) {
- local->ia_ino = postbuf->ia_ino;
- /* If the entry is directory, get the
- stat from NS node */
- if (IA_ISDIR (postbuf->ia_type) ||
- !local->stbuf.ia_blksize) {
- local->stbuf = *prebuf;
- local->poststbuf = *postbuf;
- }
- }
-
- if ((!IA_ISDIR (postbuf->ia_type)) &&
- (NS (this) != prev_frame->this)) {
- /* If file, take the stat info from
- Storage node. */
- local->stbuf = *prebuf;
- local->poststbuf = *postbuf;
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->ia_ino) {
- local->stbuf.ia_ino = local->ia_ino;
- local->poststbuf.ia_ino = local->ia_ino;
- } else {
- local->op_ret = -1;
- }
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stbuf, &local->poststbuf);
- }
-
- return 0;
-}
-
-
-/**
- * unify_truncate -
- */
-int32_t
-unify_truncate (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- off_t offset)
-{
- unify_local_t *local = NULL;
- unify_private_t *priv = this->private;
- int32_t index = 0;
- int32_t callcnt = 0;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
- local->ia_ino = loc->inode->ino;
-
- if (IA_ISDIR (loc->inode->ia_type)) {
- local->call_count = 1;
-
- STACK_WIND (frame,
- unify_truncate_cbk,
- NS(this),
- NS(this)->fops->truncate,
- loc,
- 0);
- } else {
- local->op_ret = 0;
- inode_ctx_get (loc->inode, this, &tmp_list);
- local->list = (int16_t *)(long)tmp_list;
-
- for (index = 0; local->list[index] != -1; index++) {
- local->call_count++;
- callcnt++;
- }
-
- /* Don't send offset to NS truncate */
- STACK_WIND (frame, unify_truncate_cbk, NS(this),
- NS(this)->fops->truncate, loc, 0);
- callcnt--;
-
- for (index = 0; local->list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[local->list[index]]) {
- STACK_WIND (frame,
- unify_truncate_cbk,
- priv->xl_array[local->list[index]],
- priv->xl_array[local->list[index]]->fops->truncate,
- loc,
- offset);
- if (!--callcnt)
- break;
- }
- }
- }
-
- return 0;
-}
-
-/**
- * unify_readlink_cbk -
- */
-int32_t
-unify_readlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- const char *path,
- struct iatt *sbuf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, path, sbuf);
- return 0;
-}
-
-/**
- * unify_readlink - Read the link only from the storage node.
- */
-int32_t
-unify_readlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- size_t size)
-{
- unify_private_t *priv = this->private;
- int32_t entry_count = 0;
- int16_t *list = NULL;
- int16_t index = 0;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++)
- entry_count++;
-
- if (entry_count >= 2) {
- for (index = 0; list[index] != -1; index++) {
- if (priv->xl_array[list[index]] != NS(this)) {
- STACK_WIND (frame,
- unify_readlink_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->readlink,
- loc,
- size);
- break;
- }
- }
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "returning ENOENT, no softlink files found "
- "on storage node");
- STACK_UNWIND (frame, -1, ENOENT, NULL);
- }
-
- return 0;
-}
-
-
-/**
- * unify_unlink_cbk -
- */
-int32_t
-unify_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret == 0 || ((op_errno == ENOENT) && priv->optimist))
- local->op_ret = 0;
- if (op_ret == -1)
- local->op_errno = op_errno;
-
- if (((call_frame_t *)cookie)->this == NS(this)) {
- local->oldpreparent = *preparent;
- local->oldpostparent = *postparent;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->oldpreparent, &local->oldpostparent);
- }
-
- return 0;
-}
-
-
-/**
- * unify_unlink -
- */
-int32_t
-unify_unlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = NULL;
- int16_t *list = NULL;
- int16_t index = 0;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
-
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++)
- local->call_count++;
-
- if (local->call_count) {
- for (index = 0; list[index] != -1; index++) {
- char need_break = (list[index+1] == -1);
- STACK_WIND (frame,
- unify_unlink_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->unlink,
- loc);
- if (need_break)
- break;
- }
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: returning ENOENT", loc->path);
- STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
- }
-
- return 0;
-}
-
-
-/**
- * unify_readv_cbk -
- */
-int32_t
-unify_readv_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count,
- struct iatt *stbuf,
- struct iobref *iobref)
-{
- STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf, iobref);
- return 0;
-}
-
-/**
- * unify_readv -
- */
-int32_t
-unify_readv (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame,
- unify_readv_cbk,
- child,
- child->fops->readv,
- fd,
- size,
- offset);
-
-
- return 0;
-}
-
-/**
- * unify_writev_cbk -
- */
-int32_t
-unify_writev_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf)
-{
- unify_local_t *local = NULL;
-
- local = frame->local;
-
- local->stbuf = *prebuf;
- local->stbuf.ia_ino = local->ia_ino;
-
- local->poststbuf = *postbuf;
- local->poststbuf.ia_ino = local->ia_ino;
-
- STACK_UNWIND (frame, op_ret, op_errno,
- &local->stbuf, &local->poststbuf);
- return 0;
-}
-
-/**
- * unify_writev -
- */
-int32_t
-unify_writev (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t off,
- struct iobref *iobref)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
- unify_local_t *local = NULL;
-
- INIT_LOCAL (frame, local);
- local->ia_ino = fd->inode->ino;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame,
- unify_writev_cbk,
- child,
- child->fops->writev,
- fd,
- vector,
- count,
- off,
- iobref);
-
- return 0;
-}
-
-/**
- * unify_ftruncate -
- */
-int32_t
-unify_ftruncate (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- off_t offset)
-{
- xlator_t *child = NULL;
- unify_local_t *local = NULL;
- uint64_t tmp_child = 0;
-
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR(fd);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- local->op_ret = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- local->call_count = 2;
-
- STACK_WIND (frame, unify_truncate_cbk,
- child, child->fops->ftruncate,
- fd, offset);
-
- STACK_WIND (frame, unify_truncate_cbk,
- NS(this), NS(this)->fops->ftruncate,
- fd, 0);
-
- return 0;
-}
-
-
-/**
- * unify_flush_cbk -
- */
-int32_t
-unify_flush_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-/**
- * unify_flush -
- */
-int32_t
-unify_flush (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_flush_cbk, child,
- child->fops->flush, fd);
-
- return 0;
-}
-
-
-/**
- * unify_fsync_cbk -
- */
-int32_t
-unify_fsync_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
-}
-
-/**
- * unify_fsync -
- */
-int32_t
-unify_fsync (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_fsync_cbk, child,
- child->fops->fsync, fd, flags);
-
- return 0;
-}
-
-/**
- * unify_fstat - Send fstat FOP to Namespace only if its directory, and to
- * both namespace and the storage node if its a file.
- */
-int32_t
-unify_fstat (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd)
-{
- unify_local_t *local = NULL;
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR(fd);
-
- INIT_LOCAL (frame, local);
- local->ia_ino = fd->inode->ino;
-
- if (!fd_ctx_get (fd, this, &tmp_child)) {
- /* If its set, then its file */
- child = (xlator_t *)(long)tmp_child;
- local->call_count = 2;
-
- STACK_WIND (frame, unify_buf_cbk, child,
- child->fops->fstat, fd);
-
- STACK_WIND (frame, unify_buf_cbk, NS(this),
- NS(this)->fops->fstat, fd);
-
- } else {
- /* this is an directory */
- local->call_count = 1;
- STACK_WIND (frame, unify_buf_cbk, NS(this),
- NS(this)->fops->fstat, fd);
- }
-
- return 0;
-}
-
-/**
- * unify_getdents_cbk -
- */
-int32_t
-unify_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count)
-{
- STACK_UNWIND (frame, op_ret, op_errno, entry, count);
- return 0;
-}
-
-/**
- * unify_getdents - send the FOP request to all the nodes.
- */
-int32_t
-unify_getdents (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset,
- int32_t flag)
-{
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR (fd);
-
- STACK_WIND (frame, unify_getdents_cbk, NS(this),
- NS(this)->fops->getdents, fd, size, offset, flag);
-
- return 0;
-}
-
-
-/**
- * unify_readdir_cbk -
- */
-int32_t
-unify_readdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *buf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
-
- return 0;
-}
-
-/**
- * unify_readdir - send the FOP request to all the nodes.
- */
-int32_t
-unify_readdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset)
-{
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR (fd);
-
- STACK_WIND (frame, unify_readdir_cbk, NS(this),
- NS(this)->fops->readdir, fd, size, offset);
-
- return 0;
-}
-
-
-int32_t
-unify_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *buf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
-
- return 0;
-}
-
-
-int32_t
-unify_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
-{
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR (fd);
-
- STACK_WIND (frame, unify_readdirp_cbk, NS(this),
- NS(this)->fops->readdirp, fd, size, offset);
-
- return 0;
-}
-
-
-/**
- * unify_fsyncdir_cbk -
- */
-int32_t
-unify_fsyncdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
-
- return 0;
-}
-
-/**
- * unify_fsyncdir -
- */
-int32_t
-unify_fsyncdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags)
-{
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR (fd);
-
- STACK_WIND (frame, unify_fsyncdir_cbk,
- NS(this), NS(this)->fops->fsyncdir, fd, flags);
-
- return 0;
-}
-
-/**
- * unify_lk_cbk - UNWIND frame with the proper return arguments.
- */
-int32_t
-unify_lk_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct flock *lock)
-{
- STACK_UNWIND (frame, op_ret, op_errno, lock);
- return 0;
-}
-
-/**
- * unify_lk - Send it to all the storage nodes, (should be 1) which has file.
- */
-int32_t
-unify_lk (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t cmd,
- struct flock *lock)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_lk_cbk, child,
- child->fops->lk, fd, cmd, lock);
-
- return 0;
-}
-
-
-int32_t
-unify_setxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno);
-
-static int32_t
-unify_setxattr_file_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- unify_private_t *private = this->private;
- unify_local_t *local = frame->local;
- xlator_t *sched_xl = NULL;
- struct sched_ops *sched_ops = NULL;
-
- if (op_ret == -1) {
- if (!ENOTSUP)
- gf_log (this->name, GF_LOG_ERROR,
- "setxattr with XATTR_CREATE on ns: "
- "path(%s) key(%s): %s",
- local->loc1.path, local->name,
- strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
- }
-
- LOCK (&frame->lock);
- {
- local->failed = 0;
- local->op_ret = 0;
- local->op_errno = 0;
- local->call_count = 1;
- }
- UNLOCK (&frame->lock);
-
- /* schedule XATTR_CREATE on one of the child node */
- sched_ops = private->sched_ops;
-
- /* Send create request to the scheduled node now */
- sched_xl = sched_ops->schedule (this, local->name);
- if (!sched_xl) {
- STACK_UNWIND (frame, -1, ENOTCONN);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_setxattr_cbk,
- sched_xl,
- sched_xl->fops->setxattr,
- &local->loc1,
- local->dict,
- local->flags);
- return 0;
-}
-
-/**
- * unify_setxattr_cbk - When all the child nodes return, UNWIND frame.
- */
-int32_t
-unify_setxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
- dict_t *dict = NULL;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, (((op_errno == ENOENT) ||
- (op_errno == ENOTSUP))?
- GF_LOG_DEBUG : GF_LOG_ERROR),
- "child(%s): path(%s): %s",
- prev_frame->this->name,
- (local->loc1.path)?local->loc1.path:"",
- strerror (op_errno));
- if (local->failed == -1) {
- local->failed = 1;
- }
- local->op_errno = op_errno;
- } else {
- local->failed = 0;
- local->op_ret = op_ret;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->failed && local->name &&
- ZR_FILE_CONTENT_REQUEST(local->name)) {
- dict = get_new_dict ();
- dict_set (dict, local->dict->members_list->key,
- data_from_dynptr(NULL, 0));
- dict_ref (dict);
-
- local->call_count = 1;
-
- STACK_WIND (frame,
- unify_setxattr_file_cbk,
- NS(this),
- NS(this)->fops->setxattr,
- &local->loc1,
- dict,
- XATTR_CREATE);
-
- dict_unref (dict);
- return 0;
- }
-
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno);
- }
-
- return 0;
-}
-
-/**
- * unify_sexattr - This function should be sent to all the storage nodes,
- * which contains the file, (excluding namespace).
- */
-int32_t
-unify_setxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *dict,
- int32_t flags)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = NULL;
- int16_t *list = NULL;
- int16_t index = 0;
- int32_t call_count = 0;
- uint64_t tmp_list = 0;
- data_pair_t *trav = dict->members_list;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- local->failed = -1;
- loc_copy (&local->loc1, loc);
-
- if (IA_ISDIR (loc->inode->ia_type)) {
-
- if (trav && trav->key && ZR_FILE_CONTENT_REQUEST(trav->key)) {
- /* direct the storage xlators to change file
- content only if file exists */
- local->flags = flags;
- local->dict = dict;
- local->name = gf_strdup (trav->key);
- flags |= XATTR_REPLACE;
- }
-
- local->call_count = priv->child_count;
- for (index = 0; index < priv->child_count; index++) {
- STACK_WIND (frame,
- unify_setxattr_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->setxattr,
- loc, dict, flags);
- }
- return 0;
- }
-
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[list[index]]) {
- local->call_count++;
- call_count++;
- }
- }
-
- if (local->call_count) {
- for (index = 0; list[index] != -1; index++) {
- if (priv->xl_array[list[index]] != NS(this)) {
- STACK_WIND (frame,
- unify_setxattr_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->setxattr,
- loc,
- dict,
- flags);
- if (!--call_count)
- break;
- }
- }
- return 0;
- }
-
- /* No entry in storage nodes */
- gf_log (this->name, GF_LOG_DEBUG,
- "returning ENOENT, file not found on storage node.");
- STACK_UNWIND (frame, -1, ENOENT);
-
- return 0;
-}
-
-
-/**
- * unify_getxattr_cbk - This function is called from only one child, so, no
- * need of any lock or anything else, just send it to above layer
- */
-int32_t
-unify_getxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *value)
-{
- int32_t callcnt = 0;
- dict_t *local_value = NULL;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name,
- (((op_errno == ENOENT) ||
- (op_errno == ENODATA) ||
- (op_errno == ENOTSUP)) ?
- GF_LOG_DEBUG : GF_LOG_ERROR),
- "child(%s): path(%s): %s",
- prev_frame->this->name,
- (local->loc1.path)?local->loc1.path:"",
- strerror (op_errno));
- } else {
- if (!local->dict)
- local->dict = dict_ref (value);
- local->op_ret = op_ret;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local_value = local->dict;
- local->dict = NULL;
-
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local_value);
-
- if (local_value)
- dict_unref (local_value);
- }
-
- return 0;
-}
-
-
-/**
- * unify_getxattr - This FOP is sent to only the storage node.
- */
-int32_t
-unify_getxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name)
-{
- unify_private_t *priv = this->private;
- int16_t *list = NULL;
- int16_t index = 0;
- int16_t count = 0;
- unify_local_t *local = NULL;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
- INIT_LOCAL (frame, local);
-
- if (IA_ISDIR (loc->inode->ia_type)) {
- local->call_count = priv->child_count;
- for (index = 0; index < priv->child_count; index++)
- STACK_WIND (frame,
- unify_getxattr_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getxattr,
- loc,
- name);
- return 0;
- }
-
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[list[index]]) {
- local->call_count++;
- count++;
- }
- }
-
- if (count) {
- for (index = 0; list[index] != -1; index++) {
- if (priv->xl_array[list[index]] != NS(this)) {
- STACK_WIND (frame,
- unify_getxattr_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->getxattr,
- loc,
- name);
- if (!--count)
- break;
- }
- }
- } else {
- dict_t *tmp_dict = get_new_dict ();
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: returning ENODATA, no file found on storage node",
- loc->path);
- STACK_UNWIND (frame, -1, ENODATA, tmp_dict);
- dict_destroy (tmp_dict);
- }
-
- return 0;
-}
-
-/**
- * unify_removexattr_cbk - Wait till all the child node returns the call
- * and then UNWIND to above layer.
- */
-int32_t
-unify_removexattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret == -1) {
- local->op_errno = op_errno;
- if (op_errno != ENOTSUP)
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- prev_frame->this->name,
- local->loc1.path, strerror (op_errno));
- } else {
- local->op_ret = op_ret;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- STACK_UNWIND (frame, local->op_ret, local->op_errno);
- }
-
- return 0;
-}
-
-/**
- * unify_removexattr - Send it to all the child nodes which has the files.
- */
-int32_t
-unify_removexattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = NULL;
- int16_t *list = NULL;
- int16_t index = 0;
- int32_t call_count = 0;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
-
- if (IA_ISDIR (loc->inode->ia_type)) {
- local->call_count = priv->child_count;
- for (index = 0; index < priv->child_count; index++)
- STACK_WIND (frame,
- unify_removexattr_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->removexattr,
- loc,
- name);
-
- return 0;
- }
-
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[list[index]]) {
- local->call_count++;
- call_count++;
- }
- }
-
- if (local->call_count) {
- for (index = 0; list[index] != -1; index++) {
- if (priv->xl_array[list[index]] != NS(this)) {
- STACK_WIND (frame,
- unify_removexattr_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->removexattr,
- loc,
- name);
- if (!--call_count)
- break;
- }
- }
- return 0;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: returning ENOENT, not found on storage node.", loc->path);
- STACK_UNWIND (frame, -1, ENOENT);
-
- return 0;
-}
-
-
-int32_t
-unify_mknod_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- unify_local_t *local = frame->local;
-
- if (op_ret == -1)
- gf_log (this->name, GF_LOG_ERROR,
- "%s: %s", local->loc1.path, strerror (op_errno));
-
- unify_local_wipe (local);
- /* No log required here as this -1 is for mknod call */
- STACK_UNWIND (frame, -1, local->op_errno, NULL, NULL);
- return 0;
-}
-
-/**
- * unify_mknod_cbk -
- */
-int32_t
-unify_mknod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "mknod failed on storage node, sending unlink to "
- "namespace");
- local->op_errno = op_errno;
- STACK_WIND (frame,
- unify_mknod_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
- return 0;
- }
-
- local->stbuf = *buf;
- local->stbuf.ia_ino = local->ia_ino;
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, &local->stbuf,
- &local->oldpreparent, &local->oldpostparent);
- return 0;
-}
-
-/**
- * unify_ns_mknod_cbk -
- */
-int32_t
-unify_ns_mknod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- struct sched_ops *sched_ops = NULL;
- xlator_t *sched_xl = NULL;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t *list = NULL;
- int16_t index = 0;
- call_frame_t *prev_frame = cookie;
-
- if (op_ret == -1) {
- /* No need to send mknod request to other servers,
- * as namespace action failed
- */
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- prev_frame->this->name, local->loc1.path,
- strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
- return 0;
- }
-
- /* Create one inode for this entry */
- local->op_ret = 0;
- local->stbuf = *buf;
- local->ia_ino = buf->ia_ino;
-
- local->oldpreparent = *preparent;
- local->oldpostparent = *postparent;
-
- list = GF_CALLOC (1, sizeof (int16_t) * 3, gf_unify_mt_int16_t);
- ERR_ABORT (list);
- list[0] = priv->child_count;
- list[2] = -1;
- inode_ctx_put (inode, this, (uint64_t)(long)list);
-
- sched_ops = priv->sched_ops;
-
- /* Send mknod request to scheduled node now */
- sched_xl = sched_ops->schedule (this, local->loc1.path);
- if (!sched_xl) {
- gf_log (this->name, GF_LOG_ERROR,
- "mknod failed on storage node, no node online "
- "at the moment, sending unlink to NS");
- local->op_errno = ENOTCONN;
- STACK_WIND (frame,
- unify_mknod_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
-
- return 0;
- }
-
- for (index = 0; index < priv->child_count; index++)
- if (sched_xl == priv->xl_array[index])
- break;
- list[1] = index;
-
- STACK_WIND (frame, unify_mknod_cbk,
- sched_xl, sched_xl->fops->mknod,
- &local->loc1, local->mode, local->dev);
-
- return 0;
-}
-
-/**
- * unify_mknod - Create a device on namespace first, and later create on
- * the storage node.
- */
-int32_t
-unify_mknod (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode,
- dev_t rdev)
-{
- unify_local_t *local = NULL;
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- local->mode = mode;
- local->dev = rdev;
- loc_copy (&local->loc1, loc);
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, loc->inode, NULL);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_ns_mknod_cbk,
- NS(this),
- NS(this)->fops->mknod,
- loc,
- mode,
- rdev);
-
- return 0;
-}
-
-int32_t
-unify_symlink_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- unify_local_t *local = frame->local;
- if (op_ret == -1)
- gf_log (this->name, GF_LOG_ERROR,
- "%s: %s", local->loc1.path, strerror (op_errno));
-
- unify_local_wipe (local);
- STACK_UNWIND (frame, -1, local->op_errno, NULL, NULL);
- return 0;
-}
-
-/**
- * unify_symlink_cbk -
- */
-int32_t
-unify_symlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- /* Symlink on storage node failed, hence send unlink
- to the NS node */
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_ERROR,
- "symlink on storage node failed, sending unlink "
- "to namespace");
-
- STACK_WIND (frame,
- unify_symlink_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
-
- return 0;
- }
-
- local->stbuf = *buf;
- local->stbuf.ia_ino = local->ia_ino;
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, &local->stbuf,
- &local->oldpreparent, &local->oldpostparent);
-
- return 0;
-}
-
-/**
- * unify_ns_symlink_cbk -
- */
-int32_t
-unify_ns_symlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
-
- struct sched_ops *sched_ops = NULL;
- xlator_t *sched_xl = NULL;
- int16_t *list = NULL;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t index = 0;
-
- if (op_ret == -1) {
- /* No need to send symlink request to other servers,
- * as namespace action failed
- */
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s): %s",
- local->loc1.path, strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, NULL, buf,
- preparent, postparent);
- return 0;
- }
-
- /* Create one inode for this entry */
- local->op_ret = 0;
- local->ia_ino = buf->ia_ino;
-
- local->oldpreparent = *preparent;
- local->oldpostparent = *postparent;
-
- /* Start the mapping list */
-
- list = GF_CALLOC (1, sizeof (int16_t) * 3, gf_unify_mt_int16_t);
- ERR_ABORT (list);
- list[0] = priv->child_count; //namespace's index
- list[2] = -1;
- inode_ctx_put (inode, this, (uint64_t)(long)list);
-
- sched_ops = priv->sched_ops;
-
- /* Send symlink request to all the nodes now */
- sched_xl = sched_ops->schedule (this, local->loc1.path);
- if (!sched_xl) {
- /* Symlink on storage node failed, hence send unlink
- to the NS node */
- local->op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_ERROR,
- "symlink on storage node failed, no node online, "
- "sending unlink to namespace");
-
- STACK_WIND (frame,
- unify_symlink_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
-
- return 0;
- }
-
- for (index = 0; index < priv->child_count; index++)
- if (sched_xl == priv->xl_array[index])
- break;
- list[1] = index;
-
- STACK_WIND (frame,
- unify_symlink_cbk,
- sched_xl,
- sched_xl->fops->symlink,
- local->name,
- &local->loc1);
-
- return 0;
-}
-
-/**
- * unify_symlink -
- */
-int32_t
-unify_symlink (call_frame_t *frame,
- xlator_t *this,
- const char *linkpath,
- loc_t *loc)
-{
- unify_local_t *local = NULL;
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
- local->name = gf_strdup (linkpath);
-
- if ((local->name == NULL) ||
- (local->loc1.path == NULL)) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, loc->inode, NULL);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_ns_symlink_cbk,
- NS(this),
- NS(this)->fops->symlink,
- linkpath,
- loc);
-
- return 0;
-}
-
-
-int32_t
-unify_rename_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s -> %s): %s",
- prev_frame->this->name,
- local->loc1.path, local->loc2.path,
- strerror (op_errno));
-
- }
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local->stbuf.ia_ino = local->ia_ino;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stbuf);
- }
- return 0;
-}
-
-int32_t
-unify_ns_rename_undo_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf,
- struct iatt *preoldparent,
- struct iatt *postoldparent,
- struct iatt *prenewparent,
- struct iatt *postnewparent)
-{
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s -> %s): %s",
- local->loc1.path, local->loc2.path,
- strerror (op_errno));
- }
-
- local->stbuf.ia_ino = local->ia_ino;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno, &local->stbuf);
- return 0;
-}
-
-int32_t
-unify_rename_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf,
- struct iatt *preoldparent,
- struct iatt *postoldparent,
- struct iatt *prenewparent,
- struct iatt *postnewparent)
-{
- int32_t index = 0;
- int32_t callcnt = 0;
- int16_t *list = NULL;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret >= 0) {
- if (!IA_ISDIR (buf->ia_type))
- local->stbuf = *buf;
- local->op_ret = op_ret;
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s -> %s): %s",
- prev_frame->this->name,
- local->loc1.path, local->loc2.path,
- strerror (op_errno));
- local->op_errno = op_errno;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local->stbuf.ia_ino = local->ia_ino;
- if (IA_ISDIR (local->loc1.inode->ia_type)) {
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stbuf, &local->oldpreparent,
- &local->oldpostparent, &local->newpreparent,
- &local->newpostparent);
- return 0;
- }
-
- if (local->op_ret == -1) {
- /* TODO: check this logic */
-
- /* Rename failed in storage node, successful on NS,
- * hence, rename back the entries in NS */
- /* NOTE: this will be done only if the destination
- * doesn't exists, if the destination exists, the
- * job of correcting NS is left to self-heal
- */
- if (!local->index) {
- loc_t tmp_oldloc = {
- /* its actual 'newloc->path' */
- .path = local->loc2.path,
- .inode = local->loc1.inode,
- .parent = local->loc2.parent
- };
-
- loc_t tmp_newloc = {
- /* Actual 'oldloc->path' */
- .path = local->loc1.path,
- .parent = local->loc1.parent
- };
-
- gf_log (this->name, GF_LOG_ERROR,
- "rename succussful on namespace, on "
- "stroage node failed, reverting back");
-
- STACK_WIND (frame,
- unify_ns_rename_undo_cbk,
- NS(this),
- NS(this)->fops->rename,
- &tmp_oldloc,
- &tmp_newloc);
- return 0;
- }
- } else {
- /* Rename successful on storage nodes */
-
- int32_t idx = 0;
- int16_t *tmp_list = NULL;
- uint64_t tmp_list_int64 = 0;
- if (local->loc2.inode) {
- inode_ctx_get (local->loc2.inode,
- this, &tmp_list_int64);
- list = (int16_t *)(long)tmp_list_int64;
-
- }
-
- if (list) {
- for (index = 0; list[index] != -1; index++);
- tmp_list = GF_CALLOC (1, index * 2,
- gf_unify_mt_int16_t);
- memcpy (tmp_list, list, index * 2);
-
- for (index = 0; list[index] != -1; index++) {
- /* TODO: Check this logic. */
- /* If the destination file exists in
- * the same storage node where we sent
- * 'rename' call, no need to send
- * unlink
- */
- for (idx = 0;
- local->list[idx] != -1; idx++) {
- if (tmp_list[index] == local->list[idx]) {
- tmp_list[index] = priv->child_count;
- continue;
- }
- }
-
- if (NS(this) != priv->xl_array[tmp_list[index]]) {
- local->call_count++;
- callcnt++;
- }
- }
-
- if (local->call_count) {
- if (callcnt > 1)
- gf_log (this->name,
- GF_LOG_ERROR,
- "%s->%s: more (%d) "
- "subvolumes have the "
- "newloc entry",
- local->loc1.path,
- local->loc2.path,
- callcnt);
-
- for (index=0;
- tmp_list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[tmp_list[index]]) {
- STACK_WIND (frame,
- unify_rename_unlink_cbk,
- priv->xl_array[tmp_list[index]],
- priv->xl_array[tmp_list[index]]->fops->unlink,
- &local->loc2);
- if (!--callcnt)
- break;
- }
- }
-
- GF_FREE (tmp_list);
- return 0;
- }
- if (tmp_list)
- GF_FREE (tmp_list);
- }
- }
-
- /* Need not send 'unlink' to storage node */
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret,
- local->op_errno, &local->stbuf,
- &local->oldpreparent, &local->oldpostparent,
- &local->newpreparent, &local->newpostparent);
- }
-
- return 0;
-}
-
-int32_t
-unify_ns_rename_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf,
- struct iatt *preoldparent,
- struct iatt *postoldparent,
- struct iatt *prenewparent,
- struct iatt *postnewparent)
-{
- int32_t index = 0;
- int32_t callcnt = 0;
- int16_t *list = NULL;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- /* Free local->new_inode */
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s -> %s): %s",
- local->loc1.path, local->loc2.path,
- strerror (op_errno));
-
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, buf,
- preoldparent, postoldparent,
- prenewparent, postnewparent);
- return 0;
- }
-
- local->stbuf = *buf;
- local->ia_ino = buf->ia_ino;
-
- local->oldpreparent = *preoldparent;
- local->oldpostparent = *postoldparent;
- local->newpreparent = *prenewparent;
- local->newpostparent = *postnewparent;
-
- /* Everything is fine. */
- if (IA_ISDIR (buf->ia_type)) {
- local->call_count = priv->child_count;
- for (index=0; index < priv->child_count; index++) {
- STACK_WIND (frame,
- unify_rename_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->rename,
- &local->loc1,
- &local->loc2);
- }
-
- return 0;
- }
-
- local->call_count = 0;
- /* send rename */
- list = local->list;
- for (index=0; list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[list[index]]) {
- local->call_count++;
- callcnt++;
- }
- }
-
- if (local->call_count) {
- for (index=0; list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[list[index]]) {
- STACK_WIND (frame,
- unify_rename_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->rename,
- &local->loc1,
- &local->loc2);
- if (!--callcnt)
- break;
- }
- }
- } else {
- /* file doesn't seem to be present in storage nodes */
- gf_log (this->name, GF_LOG_CRITICAL,
- "CRITICAL: source file not in storage node, "
- "rename successful on namespace :O");
- unify_local_wipe (local);
- STACK_UNWIND (frame, -1, EIO, NULL,
- NULL, NULL, /* preoldparent, postoldparent */
- NULL, NULL); /* prenewparent, postnewparent */
- }
- return 0;
-}
-
-
-/**
- * unify_rename - One of the tricky function. The deadliest of all :O
- */
-int32_t
-unify_rename (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
-{
- unify_local_t *local = NULL;
- uint64_t tmp_list = 0;
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, oldloc);
- loc_copy (&local->loc2, newloc);
-
- if ((local->loc1.path == NULL) ||
- (local->loc2.path == NULL)) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, NULL,
- NULL, NULL, /* preoldparent, postoldparent */
- NULL, NULL); /* prenewparent, postnewparent */
- return 0;
- }
-
- inode_ctx_get (oldloc->inode, this, &tmp_list);
- local->list = (int16_t *)(long)tmp_list;
-
- STACK_WIND (frame,
- unify_ns_rename_cbk,
- NS(this),
- NS(this)->fops->rename,
- oldloc,
- newloc);
- return 0;
-}
-
-/**
- * unify_link_cbk -
- */
-int32_t
-unify_link_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- unify_local_t *local = frame->local;
-
- if (op_ret >= 0)
- local->stbuf = *buf;
- local->stbuf.ia_ino = local->ia_ino;
-
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, &local->stbuf,
- &local->oldpreparent, &local->oldpostparent);
-
- return 0;
-}
-
-/**
- * unify_ns_link_cbk -
- */
-int32_t
-unify_ns_link_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- int16_t *list = local->list;
- int16_t index = 0;
-
- if (op_ret == -1) {
- /* No need to send link request to other servers,
- * as namespace action failed
- */
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s -> %s): %s",
- local->loc1.path, local->loc2.path,
- strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
- return 0;
- }
-
- /* Update inode for this entry */
- local->op_ret = 0;
- local->ia_ino = buf->ia_ino;
-
- local->oldpreparent = *preparent;
- local->oldpostparent = *postparent;
-
- /* Send link request to the node now */
- for (index = 0; list[index] != -1; index++) {
- char need_break = (list[index+1] == -1);
- if (priv->xl_array[list[index]] != NS (this)) {
- STACK_WIND (frame,
- unify_link_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->link,
- &local->loc1,
- &local->loc2);
- break;
- }
- if (need_break)
- break;
- }
-
- return 0;
-}
-
-/**
- * unify_link -
- */
-int32_t
-unify_link (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
-{
- unify_local_t *local = NULL;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (oldloc);
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (newloc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
-
- loc_copy (&local->loc1, oldloc);
- loc_copy (&local->loc2, newloc);
-
- inode_ctx_get (oldloc->inode, this, &tmp_list);
- local->list = (int16_t *)(long)tmp_list;
-
- STACK_WIND (frame,
- unify_ns_link_cbk,
- NS(this),
- NS(this)->fops->link,
- oldloc,
- newloc);
-
- return 0;
-}
-
-
-/**
- * unify_checksum_cbk -
- */
-int32_t
-unify_checksum_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- uint8_t *fchecksum,
- uint8_t *dchecksum)
-{
- STACK_UNWIND (frame, op_ret, op_errno, fchecksum, dchecksum);
-
- return 0;
-}
-
-/**
- * unify_checksum -
- */
-int32_t
-unify_checksum (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flag)
-{
- STACK_WIND (frame,
- unify_checksum_cbk,
- NS(this),
- NS(this)->fops->checksum,
- loc,
- flag);
-
- return 0;
-}
-
-
-/**
- * unify_finodelk_cbk -
- */
-int
-unify_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-/**
- * unify_finodelk
- */
-int
-unify_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int cmd, struct flock *flock)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_finodelk_cbk,
- child, child->fops->finodelk,
- volume, fd, cmd, flock);
-
- return 0;
-}
-
-
-
-/**
- * unify_fentrylk_cbk -
- */
-int
-unify_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-/**
- * unify_fentrylk
- */
-int
-unify_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
-
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_fentrylk_cbk,
- child, child->fops->fentrylk,
- volume, fd, basename, cmd, type);
-
- return 0;
-}
-
-
-
-/**
- * unify_fxattrop_cbk -
- */
-int
-unify_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
-{
- STACK_UNWIND (frame, op_ret, op_errno, xattr);
- return 0;
-}
-
-/**
- * unify_fxattrop
- */
-int
-unify_fxattrop (call_frame_t *frame, xlator_t *this,
- fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_fxattrop_cbk,
- child, child->fops->fxattrop,
- fd, optype, xattr);
-
- return 0;
-}
-
-
-/**
- * unify_inodelk_cbk -
- */
-int
-unify_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-
-/**
- * unify_inodelk
- */
-int
-unify_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int cmd, struct flock *flock)
-{
- xlator_t *child = NULL;
-
- child = unify_loc_subvol (loc, this);
-
- STACK_WIND (frame, unify_inodelk_cbk,
- child, child->fops->inodelk,
- volume, loc, cmd, flock);
-
- return 0;
-}
-
-
-
-/**
- * unify_entrylk_cbk -
- */
-int
-unify_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-/**
- * unify_entrylk
- */
-int
-unify_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
-
-{
- xlator_t *child = NULL;
-
- child = unify_loc_subvol (loc, this);
-
- STACK_WIND (frame, unify_entrylk_cbk,
- child, child->fops->entrylk,
- volume, loc, basename, cmd, type);
-
- return 0;
-}
-
-
-
-/**
- * unify_xattrop_cbk -
- */
-int
-unify_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
-{
- STACK_UNWIND (frame, op_ret, op_errno, xattr);
- return 0;
-}
-
-/**
- * unify_xattrop
- */
-int
-unify_xattrop (call_frame_t *frame, xlator_t *this,
- loc_t *loc, gf_xattrop_flags_t optype, dict_t *xattr)
-{
- xlator_t *child = NULL;
-
- child = unify_loc_subvol (loc, this);
-
- STACK_WIND (frame, unify_xattrop_cbk,
- child, child->fops->xattrop,
- loc, optype, xattr);
-
- return 0;
-}
-
-int
-unify_forget (xlator_t *this,
- inode_t *inode)
-{
- int16_t *list = NULL;
- uint64_t tmp_list = 0;
-
- if (inode->ia_type && (!IA_ISDIR(inode->ia_type))) {
- inode_ctx_get (inode, this, &tmp_list);
- if (tmp_list) {
- list = (int16_t *)(long)tmp_list;
- GF_FREE (list);
- }
- }
-
- return 0;
-}
-
-/**
- * notify
- */
-int32_t
-notify (xlator_t *this,
- int32_t event,
- void *data,
- ...)
-{
- unify_private_t *priv = this->private;
- struct sched_ops *sched = NULL;
-
- if (!priv) {
- return 0;
- }
-
- sched = priv->sched_ops;
- if (!sched) {
- gf_log (this->name, GF_LOG_CRITICAL, "No scheduler :O");
- raise (SIGTERM);
- return 0;
- }
- if (priv->namespace == data) {
- if (event == GF_EVENT_CHILD_UP) {
- sched->notify (this, event, data);
- }
- return 0;
- }
-
- switch (event)
- {
- case GF_EVENT_CHILD_UP:
- {
- /* Call scheduler's update () to enable it for scheduling */
- sched->notify (this, event, data);
-
- LOCK (&priv->lock);
- {
- /* Increment the inode's generation, which is
- used for self_heal */
- ++priv->inode_generation;
- ++priv->num_child_up;
- }
- UNLOCK (&priv->lock);
-
- if (!priv->is_up) {
- default_notify (this, event, data);
- priv->is_up = 1;
- }
- }
- break;
- case GF_EVENT_CHILD_DOWN:
- {
- /* Call scheduler's update () to disable the child node
- * for scheduling
- */
- sched->notify (this, event, data);
- LOCK (&priv->lock);
- {
- --priv->num_child_up;
- }
- UNLOCK (&priv->lock);
-
- if (priv->num_child_up == 0) {
- /* Send CHILD_DOWN to upper layer */
- default_notify (this, event, data);
- priv->is_up = 0;
- }
- }
- break;
-
- default:
- {
- default_notify (this, event, data);
- }
- break;
- }
-
- return 0;
-}
-
-int32_t
-mem_acct_init (xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init (this, gf_unify_mt_end + 1);
-
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
- }
-
- return ret;
-}
-
-/**
- * init - This function is called first in the xlator, while initializing.
- * All the config file options are checked and appropriate flags are set.
- *
- * @this -
- */
-int32_t
-init (xlator_t *this)
-{
- int32_t ret = 0;
- int32_t count = 0;
- data_t *scheduler = NULL;
- data_t *data = NULL;
- xlator_t *ns_xl = NULL;
- xlator_list_t *trav = NULL;
- xlator_list_t *xlparent = NULL;
- xlator_list_t *parent = NULL;
- unify_private_t *_private = NULL;
-
-
- /* Check for number of child nodes, if there is no child nodes, exit */
- if (!this->children) {
- gf_log (this->name, GF_LOG_ERROR,
- "No child nodes specified. check \"subvolumes \" "
- "option in volfile");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
-
- /* Check for 'scheduler' in volume */
- scheduler = dict_get (this->options, "scheduler");
- if (!scheduler) {
- gf_log (this->name, GF_LOG_ERROR,
- "\"option scheduler <x>\" is missing in volfile");
- return -1;
- }
-
- /* Setting "option namespace <node>" */
- data = dict_get (this->options, "namespace");
- if(!data) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "namespace option not specified, Exiting");
- return -1;
- }
- /* Search namespace in the child node, if found, exit */
- trav = this->children;
- while (trav) {
- if (strcmp (trav->xlator->name, data->data) == 0)
- break;
- trav = trav->next;
- }
- if (trav) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "namespace node used as a subvolume, Exiting");
- return -1;
- }
-
- /* Search for the namespace node, if found, continue */
- ns_xl = this->next;
- while (ns_xl) {
- if (strcmp (ns_xl->name, data->data) == 0)
- break;
- ns_xl = ns_xl->next;
- }
- if (!ns_xl) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "namespace node not found in volfile, Exiting");
- return -1;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "namespace node specified as %s", data->data);
-
- _private = GF_CALLOC (1, sizeof (*_private),
- gf_unify_mt_unify_private_t);
- ERR_ABORT (_private);
- _private->sched_ops = get_scheduler (this, scheduler->data);
- if (!_private->sched_ops) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "Error while loading scheduler. Exiting");
- GF_FREE (_private);
- return -1;
- }
-
- if (ns_xl->parents) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "Namespace node should not be a child of any other node. Exiting");
- GF_FREE (_private);
- return -1;
- }
-
- _private->namespace = ns_xl;
-
- /* update _private structure */
- {
- count = 0;
- trav = this->children;
- /* Get the number of child count */
- while (trav) {
- count++;
- trav = trav->next;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Child node count is %d", count);
-
- _private->child_count = count;
- if (count == 1) {
- /* TODO: Should I error out here? */
- gf_log (this->name, GF_LOG_CRITICAL,
- "WARNING: You have defined only one "
- "\"subvolumes\" for unify volume. It may not "
- "be the desired config, review your volume "
- "volfile. If this is how you are testing it,"
- " you may hit some performance penalty");
- }
-
- _private->xl_array = GF_CALLOC (1,
- sizeof (xlator_t) * (count + 1),
- gf_unify_mt_xlator_t);
- ERR_ABORT (_private->xl_array);
-
- count = 0;
- trav = this->children;
- while (trav) {
- _private->xl_array[count++] = trav->xlator;
- trav = trav->next;
- }
- _private->xl_array[count] = _private->namespace;
-
- /* self-heal part, start with generation '1' */
- _private->inode_generation = 1;
- /* Because, Foreground part is tested well */
- _private->self_heal = ZR_UNIFY_FG_SELF_HEAL;
- data = dict_get (this->options, "self-heal");
- if (data) {
- if (strcasecmp (data->data, "off") == 0)
- _private->self_heal = ZR_UNIFY_SELF_HEAL_OFF;
-
- if (strcasecmp (data->data, "foreground") == 0)
- _private->self_heal = ZR_UNIFY_FG_SELF_HEAL;
-
- if (strcasecmp (data->data, "background") == 0)
- _private->self_heal = ZR_UNIFY_BG_SELF_HEAL;
- }
-
- /* optimist - ask bulde for more about it */
- data = dict_get (this->options, "optimist");
- if (data) {
- if (gf_string2boolean (data->data,
- &_private->optimist) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "optimist excepts only boolean "
- "options");
- }
- }
-
- LOCK_INIT (&_private->lock);
- }
-
- /* Now that everything is fine. */
- this->private = (void *)_private;
- {
- ret = _private->sched_ops->mem_acct_init (this);
-
- if (ret == -1) {
- return -1;
- }
-
- /* Initialize scheduler, if everything else is successful */
- ret = _private->sched_ops->init (this);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "Initializing scheduler failed, Exiting");
- GF_FREE (_private);
- return -1;
- }
-
-
- ret = 0;
-
- /* This section is required because some fops may look
- * for 'xl->parent' variable
- */
- xlparent = GF_CALLOC (1, sizeof (*xlparent),
- gf_unify_mt_xlator_list_t);
- xlparent->xlator = this;
- if (!ns_xl->parents) {
- ns_xl->parents = xlparent;
- } else {
- parent = ns_xl->parents;
- while (parent->next)
- parent = parent->next;
- parent->next = xlparent;
- }
- }
-
- /* Tell namespace node that init is done */
- xlator_notify (ns_xl, GF_EVENT_PARENT_UP, this);
-
- return 0;
-}
-
-/**
- * fini - Free all the allocated memory
- */
-void
-fini (xlator_t *this)
-{
- unify_private_t *priv = this->private;
- priv->sched_ops->fini (this);
- this->private = NULL;
- LOCK_DESTROY (&priv->lock);
- GF_FREE (priv->xl_array);
- GF_FREE (priv);
- return;
-}
-
-
-struct xlator_fops fops = {
- .stat = unify_stat,
- .readlink = unify_readlink,
- .mknod = unify_mknod,
- .mkdir = unify_mkdir,
- .unlink = unify_unlink,
- .rmdir = unify_rmdir,
- .symlink = unify_symlink,
- .rename = unify_rename,
- .link = unify_link,
- .truncate = unify_truncate,
- .create = unify_create,
- .open = unify_open,
- .readv = unify_readv,
- .writev = unify_writev,
- .statfs = unify_statfs,
- .flush = unify_flush,
- .fsync = unify_fsync,
- .setxattr = unify_setxattr,
- .getxattr = unify_getxattr,
- .removexattr = unify_removexattr,
- .opendir = unify_opendir,
- .readdir = unify_readdir,
- .readdirp = unify_readdirp,
- .fsyncdir = unify_fsyncdir,
- .access = unify_access,
- .ftruncate = unify_ftruncate,
- .fstat = unify_fstat,
- .lk = unify_lk,
- .lookup = unify_lookup,
- .getdents = unify_getdents,
- .checksum = unify_checksum,
- .inodelk = unify_inodelk,
- .finodelk = unify_finodelk,
- .entrylk = unify_entrylk,
- .fentrylk = unify_fentrylk,
- .xattrop = unify_xattrop,
- .fxattrop = unify_fxattrop,
- .setattr = unify_setattr,
- .fsetattr = unify_fsetattr,
-};
-
-
-struct xlator_cbks cbks = {
- .forget = unify_forget,
-};
-
-struct volume_options options[] = {
- { .key = { "namespace" },
- .type = GF_OPTION_TYPE_XLATOR
- },
- { .key = { "scheduler" },
- .value = { "alu", "rr", "random", "nufa", "switch" },
- .type = GF_OPTION_TYPE_STR
- },
- { .key = {"self-heal"},
- .value = { "foreground", "background", "off" },
- .type = GF_OPTION_TYPE_STR
- },
- /* TODO: remove it some time later */
- { .key = {"optimist"},
- .type = GF_OPTION_TYPE_BOOL
- },
-
- { .key = {NULL} },
-};
diff --git a/xlators/cluster/unify/src/unify.h b/xlators/cluster/unify/src/unify.h
deleted file mode 100644
index 8dcf1659821..00000000000
--- a/xlators/cluster/unify/src/unify.h
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#ifndef _UNIFY_H
-#define _UNIFY_H
-
-#include "scheduler.h"
-#include "list.h"
-#include "unify-mem-types.h"
-
-#define MAX_DIR_ENTRY_STRING (32 * 1024)
-
-#define ZR_UNIFY_SELF_HEAL_OFF 0
-#define ZR_UNIFY_FG_SELF_HEAL 1
-#define ZR_UNIFY_BG_SELF_HEAL 2
-
-/* Sometimes one should use completely random numbers.. its good :p */
-#define UNIFY_SELF_HEAL_GETDENTS_COUNT 512
-
-#define NS(xl) (((unify_private_t *)xl->private)->namespace)
-
-/* This is used to allocate memory for local structure */
-#define INIT_LOCAL(fr, loc) \
-do { \
- loc = GF_CALLOC (1, sizeof (unify_local_t), gf_unify_mt_unify_local_t); \
- ERR_ABORT (loc); \
- if (!loc) { \
- STACK_UNWIND (fr, -1, ENOMEM); \
- return 0; \
- } \
- fr->local = loc; \
- loc->op_ret = -1; \
- loc->op_errno = ENOENT; \
-} while (0)
-
-
-
-struct unify_private {
- /* Update this structure depending on requirement */
- void *scheduler; /* THIS SHOULD BE THE FIRST VARIABLE,
- if xlator is using scheduler */
- struct sched_ops *sched_ops; /* Scheduler options */
- xlator_t *namespace; /* ptr to namespace xlator */
- xlator_t **xl_array;
- gf_boolean_t optimist;
- int16_t child_count;
- int16_t num_child_up;
- uint8_t self_heal;
- uint8_t is_up;
- uint64_t inode_generation;
- gf_lock_t lock;
-};
-typedef struct unify_private unify_private_t;
-
-struct unify_self_heal_struct {
- uint8_t dir_checksum[NAME_MAX];
- uint8_t ns_dir_checksum[NAME_MAX];
- uint8_t file_checksum[NAME_MAX];
- uint8_t ns_file_checksum[NAME_MAX];
- off_t *offset_list;
- int *count_list;
- dir_entry_t **entry_list;
-};
-
-
-struct _unify_local_t {
- int32_t call_count;
- int32_t op_ret;
- int32_t op_errno;
- mode_t mode;
- off_t offset;
- dev_t dev;
- uid_t uid;
- gid_t gid;
- int32_t flags;
- int32_t entry_count;
- int32_t count; // dir_entry_t count;
- fd_t *fd;
- struct iatt stbuf;
- struct iatt stpre;
- struct iatt stpost;
- struct statvfs statvfs_buf;
- struct timespec tv[2];
- char *name;
- int32_t revalidate;
-
- ino_t ia_ino;
- nlink_t ia_nlink;
-
- dict_t *dict;
-
- int16_t *list;
- int16_t *new_list; /* Used only in case of rename */
- int16_t index;
-
- int32_t failed;
- int32_t return_eio; /* Used in case of different st-mode
- present for a given path */
-
- uint64_t inode_generation; /* used to store the per directory
- * inode_generation. Got from inode's ctx
- * of directory inodes
- */
-
- struct unify_self_heal_struct *sh_struct;
- loc_t loc1, loc2;
-
- struct iatt poststbuf;
- /* When not used for rename, old*
- * are used as the attrs for the current
- * parent directory.
- */
- struct iatt oldpreparent;
- struct iatt oldpostparent;
- struct iatt newpreparent;
- struct iatt newpostparent;
- int32_t wbflags;
-};
-typedef struct _unify_local_t unify_local_t;
-
-int32_t zr_unify_self_heal (call_frame_t *frame,
- xlator_t *this,
- unify_local_t *local);
-
-#endif /* _UNIFY_H */
diff --git a/xlators/debug/error-gen/src/Makefile.am b/xlators/debug/error-gen/src/Makefile.am
index f353b61e69c..8baf15612bd 100644
--- a/xlators/debug/error-gen/src/Makefile.am
+++ b/xlators/debug/error-gen/src/Makefile.am
@@ -2,15 +2,16 @@
xlator_LTLIBRARIES = error-gen.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/debug
-error_gen_la_LDFLAGS = -module -avoidversion
+error_gen_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
error_gen_la_SOURCES = error-gen.c
error_gen_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = error-gen.h
+noinst_HEADERS = error-gen.h error-gen-mem-types.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/debug/error-gen/src/error-gen-mem-types.h b/xlators/debug/error-gen/src/error-gen-mem-types.h
new file mode 100644
index 00000000000..f02280535df
--- /dev/null
+++ b/xlators/debug/error-gen/src/error-gen-mem-types.h
@@ -0,0 +1,20 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __ERROR_GEN_MEM_TYPES_H__
+#define __ERROR_GEN_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_error_gen_mem_types_ {
+ gf_error_gen_mt_eg_t = gf_common_mt_end + 1,
+ gf_error_gen_mt_end
+};
+#endif
diff --git a/xlators/debug/error-gen/src/error-gen.c b/xlators/debug/error-gen/src/error-gen.c
index 3ac2e22d3a5..b6b17baa87f 100644
--- a/xlators/debug/error-gen/src/error-gen.c
+++ b/xlators/debug/error-gen/src/error-gen.c
@@ -1,180 +1,173 @@
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include "xlator.h"
#include "error-gen.h"
+#include "statedump.h"
sys_error_t error_no_list[] = {
- [ERR_LOOKUP] = { .error_no_count = 4,
+ [GF_FOP_LOOKUP] = { .error_no_count = 4,
.error_no = {ENOENT,ENOTDIR,
ENAMETOOLONG,EAGAIN}},
- [ERR_STAT] = { .error_no_count = 7,
+ [GF_FOP_STAT] = { .error_no_count = 7,
.error_no = {EACCES,EBADF,EFAULT,
ENAMETOOLONG,ENOENT,
ENOMEM,ENOTDIR}},
- [ERR_READLINK] = { .error_no_count = 8,
+ [GF_FOP_READLINK] = { .error_no_count = 8,
.error_no = {EACCES,EFAULT,EINVAL,EIO,
ENAMETOOLONG,ENOENT,ENOMEM,
ENOTDIR}},
- [ERR_MKNOD] = { .error_no_count = 11,
+ [GF_FOP_MKNOD] = { .error_no_count = 11,
.error_no = {EACCES,EEXIST,EFAULT,
EINVAL,ENAMETOOLONG,
ENOENT,ENOMEM,ENOSPC,
ENOTDIR,EPERM,EROFS}},
- [ERR_MKDIR] = { .error_no_count = 10,
+ [GF_FOP_MKDIR] = { .error_no_count = 10,
.error_no = {EACCES,EEXIST,EFAULT,
ENAMETOOLONG,ENOENT,
ENOMEM,ENOSPC,ENOTDIR,
EPERM,EROFS}},
- [ERR_UNLINK] = { .error_no_count = 10,
+ [GF_FOP_UNLINK] = { .error_no_count = 10,
.error_no = {EACCES,EBUSY,EFAULT,EIO,
EISDIR,ENAMETOOLONG,
ENOENT,ENOMEM,ENOTDIR,
EPERM,EROFS}},
- [ERR_RMDIR] = { .error_no_count = 8,
+ [GF_FOP_RMDIR] = { .error_no_count = 8,
.error_no = {EACCES,EBUSY,EFAULT,
ENOMEM,ENOTDIR,ENOTEMPTY,
EPERM,EROFS}},
- [ERR_SYMLINK] = { .error_no_count = 11,
+ [GF_FOP_SYMLINK] = { .error_no_count = 11,
.error_no = {EACCES,EEXIST,EFAULT,EIO,
ENAMETOOLONG,ENOENT,ENOMEM,
ENOSPC,ENOTDIR,EPERM,
EROFS}},
- [ERR_RENAME] = { .error_no_count = 13,
+ [GF_FOP_RENAME] = { .error_no_count = 13,
.error_no = {EACCES,EBUSY,EFAULT,
EINVAL,EISDIR,EMLINK,
ENAMETOOLONG,ENOENT,ENOMEM,
ENOSPC,ENOTDIR,EEXIST,
EXDEV}},
- [ERR_LINK] = { .error_no_count = 13,
+ [GF_FOP_LINK] = { .error_no_count = 13,
.error_no = {EACCES,EFAULT,EEXIST,EIO,
EMLINK,ENAMETOOLONG,
ENOENT,ENOMEM,ENOSPC,
ENOTDIR,EPERM,EROFS,
EXDEV}},
- [ERR_TRUNCATE] = { .error_no_count = 10,
+ [GF_FOP_TRUNCATE] = { .error_no_count = 10,
.error_no = {EACCES,EFAULT,EFBIG,
EINTR,EINVAL,EIO,EISDIR,
ENAMETOOLONG,ENOENT,
EISDIR}},
- [ERR_CREATE] = {.error_no_count = 10,
+ [GF_FOP_CREATE] = {.error_no_count = 10,
.error_no = {EACCES,EEXIST,EFAULT,
EISDIR,EMFILE,ENAMETOOLONG,
ENFILE,ENODEV,ENOENT,
ENODEV}},
- [ERR_OPEN] = { .error_no_count = 10,
+ [GF_FOP_OPEN] = { .error_no_count = 10,
.error_no = {EACCES,EEXIST,EFAULT,
EISDIR,EMFILE,
ENAMETOOLONG,ENFILE,
ENODEV,ENOENT,ENOMEM}},
- [ERR_READV] = { .error_no_count = 5,
+ [GF_FOP_READ] = { .error_no_count = 5,
.error_no = {EINVAL,EBADF,EFAULT,EISDIR,
ENAMETOOLONG}},
- [ERR_WRITEV] = { .error_no_count = 5,
+ [GF_FOP_WRITE] = { .error_no_count = 7,
.error_no = {EINVAL,EBADF,EFAULT,EISDIR,
- ENAMETOOLONG}},
- [ERR_STATFS] = {.error_no_count = 10,
+ ENAMETOOLONG,ENOSPC,
+ GF_ERROR_SHORT_WRITE}},
+ [GF_FOP_STATFS] = {.error_no_count = 10,
.error_no = {EACCES,EBADF,EFAULT,EINTR,
EIO,ENAMETOOLONG,ENOENT,
ENOMEM,ENOSYS,ENOTDIR}},
- [ERR_FLUSH] = { .error_no_count = 5,
+ [GF_FOP_FLUSH] = { .error_no_count = 5,
.error_no = {EACCES,EFAULT,
ENAMETOOLONG,ENOSYS,
ENOENT}},
- [ERR_FSYNC] = { .error_no_count = 4,
+ [GF_FOP_FSYNC] = { .error_no_count = 4,
.error_no = {EBADF,EIO,EROFS,EINVAL}},
- [ERR_SETXATTR] = { .error_no_count = 4,
+ [GF_FOP_SETXATTR] = { .error_no_count = 4,
.error_no = {EACCES,EBADF,EINTR,
ENAMETOOLONG}},
- [ERR_GETXATTR] = { .error_no_count = 4,
+ [GF_FOP_GETXATTR] = { .error_no_count = 4,
.error_no = {EACCES,EBADF,ENAMETOOLONG,
EINTR}},
- [ERR_REMOVEXATTR] = { .error_no_count = 4,
+ [GF_FOP_REMOVEXATTR] = { .error_no_count = 4,
.error_no = {EACCES,EBADF,ENAMETOOLONG,
EINTR}},
- [ERR_OPENDIR] = { .error_no_count = 8,
+ [GF_FOP_FSETXATTR] = { .error_no_count = 4,
+ .error_no = {EACCES,EBADF,EINTR,
+ ENAMETOOLONG}},
+ [GF_FOP_FGETXATTR] = { .error_no_count = 4,
+ .error_no = {EACCES,EBADF,ENAMETOOLONG,
+ EINTR}},
+ [GF_FOP_FREMOVEXATTR] = { .error_no_count = 4,
+ .error_no = {EACCES,EBADF,ENAMETOOLONG,
+ EINTR}},
+ [GF_FOP_OPENDIR] = { .error_no_count = 8,
.error_no = {EACCES,EEXIST,EFAULT,
EISDIR,EMFILE,
ENAMETOOLONG,ENFILE,
ENODEV}},
- [ERR_READDIR] = { .error_no_count = 5,
+ [GF_FOP_READDIR] = { .error_no_count = 5,
.error_no = {EINVAL,EACCES,EBADF,
EMFILE,ENOENT}},
- [ERR_READDIRP] = { .error_no_count = 5,
+ [GF_FOP_READDIRP] = { .error_no_count = 5,
.error_no = {EINVAL,EACCES,EBADF,
EMFILE,ENOENT}},
- [ERR_FSYNCDIR] = { .error_no_count = 4,
+ [GF_FOP_FSYNCDIR] = { .error_no_count = 4,
.error_no = {EBADF,EIO,EROFS,EINVAL}},
- [ERR_ACCESS] = { .error_no_count = 8,
+ [GF_FOP_ACCESS] = { .error_no_count = 8,
.error_no = {EACCES,ENAMETOOLONG,
ENOENT,ENOTDIR,EROFS,
EFAULT,EINVAL,EIO}},
- [ERR_FTRUNCATE] = { .error_no_count = 9,
+ [GF_FOP_FTRUNCATE] = { .error_no_count = 9,
.error_no = {EACCES,EFAULT,EFBIG,
EINTR,EINVAL,EIO,EISDIR,
ENAMETOOLONG,ENOENT}},
- [ERR_FSTAT] = { .error_no_count = 7,
+ [GF_FOP_FSTAT] = { .error_no_count = 7,
.error_no = {EACCES,EBADF,EFAULT,
ENAMETOOLONG,ENOENT,
ENOMEM,ENOTDIR}},
- [ERR_LK] = { .error_no_count = 4,
+ [GF_FOP_LK] = { .error_no_count = 4,
.error_no = {EACCES,EFAULT,ENOENT,
EINTR}},
- [ERR_CHECKSUM] = { .error_no_count = 4,
- .error_no = {EACCES,EBADF,
- ENAMETOOLONG,EINTR}},
- [ERR_XATTROP] = { .error_no_count = 5,
+ [GF_FOP_XATTROP] = { .error_no_count = 5,
.error_no = {EACCES,EFAULT,
ENAMETOOLONG,ENOSYS,
ENOENT}},
- [ERR_FXATTROP] = { .error_no_count = 4,
+ [GF_FOP_FXATTROP] = { .error_no_count = 4,
.error_no = {EBADF,EIO,EROFS,EINVAL}},
- [ERR_INODELK] = { .error_no_count = 4,
+ [GF_FOP_INODELK] = { .error_no_count = 4,
.error_no = {EACCES,EBADF,EINTR,
ENAMETOOLONG}},
- [ERR_FINODELK] = { .error_no_count = 4,
+ [GF_FOP_FINODELK] = { .error_no_count = 4,
.error_no = {EACCES,EBADF,EINTR,
ENAMETOOLONG}},
- [ERR_ENTRYLK] = { .error_no_count = 4,
+ [GF_FOP_ENTRYLK] = { .error_no_count = 4,
.error_no = {EACCES,EBADF,
ENAMETOOLONG,EINTR}},
- [ERR_FENTRYLK] = { .error_no_count = 10,
+ [GF_FOP_FENTRYLK] = { .error_no_count = 10,
.error_no = {EACCES,EEXIST,EFAULT,
EISDIR,EMFILE,
ENAMETOOLONG,ENFILE,
ENODEV,ENOENT,ENOMEM}},
- [ERR_SETATTR] = {.error_no_count = 11,
+ [GF_FOP_SETATTR] = {.error_no_count = 11,
.error_no = {EACCES,EFAULT,EIO,
ENAMETOOLONG,ENOENT,
ENOMEM,ENOTDIR,EPERM,
EROFS,EBADF,EIO}},
- [ERR_FSETATTR] = { .error_no_count = 11,
+ [GF_FOP_FSETATTR] = { .error_no_count = 11,
.error_no = {EACCES,EFAULT,EIO,
ENAMETOOLONG,ENOENT,
ENOMEM,ENOTDIR,EPERM,
EROFS,EBADF,EIO}},
- [ERR_GETSPEC] = { .error_no_count = 4,
+ [GF_FOP_GETSPEC] = { .error_no_count = 4,
.error_no = {EACCES,EBADF,ENAMETOOLONG,
EINTR}}
};
@@ -184,7 +177,7 @@ generate_rand_no (int op_no)
{
int rand_no = 0;
- if (op_no < NO_OF_FOPS)
+ if (op_no < GF_FOP_MAXVALUE)
rand_no = rand () % error_no_list[op_no].error_no_count;
return rand_no;
}
@@ -240,6 +233,8 @@ conv_errno_to_int (char **error_no)
return EINTR;
else if (!strcmp ((*error_no), "EFBIG"))
return EFBIG;
+ else if (!strcmp((*error_no), "GF_ERROR_SHORT_WRITE"))
+ return GF_ERROR_SHORT_WRITE;
else
return EAGAIN;
}
@@ -248,83 +243,87 @@ int
get_fop_int (char **op_no_str)
{
if (!strcmp ((*op_no_str), "lookup"))
- return ERR_LOOKUP;
+ return GF_FOP_LOOKUP;
else if (!strcmp ((*op_no_str), "stat"))
- return ERR_STAT;
+ return GF_FOP_STAT;
else if (!strcmp ((*op_no_str), "readlink"))
- return ERR_READLINK;
+ return GF_FOP_READLINK;
else if (!strcmp ((*op_no_str), "mknod"))
- return ERR_MKNOD;
+ return GF_FOP_MKNOD;
else if (!strcmp ((*op_no_str), "mkdir"))
- return ERR_MKDIR;
+ return GF_FOP_MKDIR;
else if (!strcmp ((*op_no_str), "unlink"))
- return ERR_UNLINK;
+ return GF_FOP_UNLINK;
else if (!strcmp ((*op_no_str), "rmdir"))
- return ERR_RMDIR;
+ return GF_FOP_RMDIR;
else if (!strcmp ((*op_no_str), "symlink"))
- return ERR_SYMLINK;
+ return GF_FOP_SYMLINK;
else if (!strcmp ((*op_no_str), "rename"))
- return ERR_RENAME;
+ return GF_FOP_RENAME;
else if (!strcmp ((*op_no_str), "link"))
- return ERR_LINK;
+ return GF_FOP_LINK;
else if (!strcmp ((*op_no_str), "truncate"))
- return ERR_TRUNCATE;
+ return GF_FOP_TRUNCATE;
else if (!strcmp ((*op_no_str), "create"))
- return ERR_CREATE;
+ return GF_FOP_CREATE;
else if (!strcmp ((*op_no_str), "open"))
- return ERR_OPEN;
+ return GF_FOP_OPEN;
else if (!strcmp ((*op_no_str), "readv"))
- return ERR_READV;
+ return GF_FOP_READ;
else if (!strcmp ((*op_no_str), "writev"))
- return ERR_WRITEV;
+ return GF_FOP_WRITE;
else if (!strcmp ((*op_no_str), "statfs"))
- return ERR_STATFS;
+ return GF_FOP_STATFS;
else if (!strcmp ((*op_no_str), "flush"))
- return ERR_FLUSH;
+ return GF_FOP_FLUSH;
else if (!strcmp ((*op_no_str), "fsync"))
- return ERR_FSYNC;
+ return GF_FOP_FSYNC;
else if (!strcmp ((*op_no_str), "setxattr"))
- return ERR_SETXATTR;
+ return GF_FOP_SETXATTR;
else if (!strcmp ((*op_no_str), "getxattr"))
- return ERR_GETXATTR;
+ return GF_FOP_GETXATTR;
else if (!strcmp ((*op_no_str), "removexattr"))
- return ERR_REMOVEXATTR;
+ return GF_FOP_REMOVEXATTR;
+ else if (!strcmp ((*op_no_str), "fsetxattr"))
+ return GF_FOP_FSETXATTR;
+ else if (!strcmp ((*op_no_str), "fgetxattr"))
+ return GF_FOP_FGETXATTR;
+ else if (!strcmp ((*op_no_str), "fremovexattr"))
+ return GF_FOP_FREMOVEXATTR;
else if (!strcmp ((*op_no_str), "opendir"))
- return ERR_OPENDIR;
+ return GF_FOP_OPENDIR;
else if (!strcmp ((*op_no_str), "readdir"))
- return ERR_READDIR;
+ return GF_FOP_READDIR;
else if (!strcmp ((*op_no_str), "readdirp"))
- return ERR_READDIRP;
+ return GF_FOP_READDIRP;
else if (!strcmp ((*op_no_str), "fsyncdir"))
- return ERR_FSYNCDIR;
+ return GF_FOP_FSYNCDIR;
else if (!strcmp ((*op_no_str), "access"))
- return ERR_ACCESS;
+ return GF_FOP_ACCESS;
else if (!strcmp ((*op_no_str), "ftruncate"))
- return ERR_FTRUNCATE;
+ return GF_FOP_FTRUNCATE;
else if (!strcmp ((*op_no_str), "fstat"))
- return ERR_FSTAT;
+ return GF_FOP_FSTAT;
else if (!strcmp ((*op_no_str), "lk"))
- return ERR_LK;
- else if (!strcmp ((*op_no_str), "checksum"))
- return ERR_CHECKSUM;
+ return GF_FOP_LK;
else if (!strcmp ((*op_no_str), "xattrop"))
- return ERR_XATTROP;
+ return GF_FOP_XATTROP;
else if (!strcmp ((*op_no_str), "fxattrop"))
- return ERR_FXATTROP;
+ return GF_FOP_FXATTROP;
else if (!strcmp ((*op_no_str), "inodelk"))
- return ERR_INODELK;
+ return GF_FOP_INODELK;
else if (!strcmp ((*op_no_str), "finodelk"))
- return ERR_FINODELK;
+ return GF_FOP_FINODELK;
else if (!strcmp ((*op_no_str), "etrylk"))
- return ERR_ENTRYLK;
+ return GF_FOP_ENTRYLK;
else if (!strcmp ((*op_no_str), "fentrylk"))
- return ERR_FENTRYLK;
+ return GF_FOP_FENTRYLK;
else if (!strcmp ((*op_no_str), "setattr"))
- return ERR_SETATTR;
+ return GF_FOP_SETATTR;
else if (!strcmp ((*op_no_str), "fsetattr"))
- return ERR_FSETATTR;
+ return GF_FOP_FSETATTR;
else if (!strcmp ((*op_no_str), "getspec"))
- return ERR_GETSPEC;
+ return GF_FOP_GETSPEC;
else
return -1;
}
@@ -361,12 +360,14 @@ error_gen (xlator_t *this, int op_no)
else {
rand_no = generate_rand_no (op_no);
- if (op_no >= NO_OF_FOPS)
+ if (op_no >= GF_FOP_MAXVALUE)
op_no = 0;
if (rand_no >= error_no_list[op_no].error_no_count)
rand_no = 0;
ret = error_no_list[op_no].error_no[rand_no];
}
+ if (egp->random_failure == _gf_true)
+ egp->failure_iter_no = 3 + (rand () % GF_UNIVERSAL_ANSWER);
}
return ret;
}
@@ -375,306 +376,296 @@ error_gen (xlator_t *this, int op_no)
int
error_gen_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *dict, struct iatt *postparent)
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
{
STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode,
- buf, dict, postparent);
- return 0;
+ buf, xdata, postparent);
+ return 0;
}
int
error_gen_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *xattr_req)
+ dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
int enable = 1;
egp = this->private;
- enable = egp->enable[ERR_LOOKUP];
+ enable = egp->enable[GF_FOP_LOOKUP];
if (enable)
- op_errno = error_gen (this, ERR_LOOKUP);
+ op_errno = error_gen (this, GF_FOP_LOOKUP);
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL,
- NULL);
- return 0;
+ STACK_UNWIND_STRICT (lookup, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL);
+ return 0;
}
STACK_WIND (frame, error_gen_lookup_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lookup,
- loc, xattr_req);
- return 0;
-}
-
-
-int
-error_gen_forget (xlator_t *this, inode_t *inode)
-{
- return 0;
+ loc, xdata);
+ return 0;
}
int
error_gen_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf);
-
- return 0;
+ STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
}
int
-error_gen_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
+error_gen_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
int enable = 1;
egp = this->private;
- enable = egp->enable[ERR_STAT];
+ enable = egp->enable[GF_FOP_STAT];
if (enable)
- op_errno = error_gen (this, ERR_STAT);
+ op_errno = error_gen (this, GF_FOP_STAT);
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (stat, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_stat_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->stat,
- loc);
- return 0;
+ loc, xdata);
+ return 0;
}
int
error_gen_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
- STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop, postop);
-
- return 0;
+ STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop, postop, xdata);
+ return 0;
}
int
error_gen_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
int enable = 1;
egp = this->private;
- enable = egp->enable[ERR_SETATTR];
+ enable = egp->enable[GF_FOP_SETATTR];
if (enable)
- op_errno = error_gen (this, ERR_SETATTR);
+ op_errno = error_gen (this, GF_FOP_SETATTR);
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (setattr, frame, -1, op_errno, NULL, NULL);
- return 0;
+ STACK_UNWIND_STRICT (setattr, frame, -1, op_errno, NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_setattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setattr,
- loc, stbuf, valid);
- return 0;
+ loc, stbuf, valid, xdata);
+ return 0;
}
int
error_gen_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
int enable = 1;
egp = this->private;
- enable = egp->enable[ERR_FSETATTR];
+ enable = egp->enable[GF_FOP_FSETATTR];
if (enable)
- op_errno = error_gen (this, ERR_FSETATTR);
+ op_errno = error_gen (this, GF_FOP_FSETATTR);
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (fsetattr, frame, -1, op_errno, NULL, NULL);
- return 0;
+ STACK_UNWIND_STRICT (fsetattr, frame, -1, op_errno, NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_setattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsetattr,
- fd, stbuf, valid);
- return 0;
+ fd, stbuf, valid, xdata);
+ return 0;
}
int
error_gen_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno,
- prebuf, postbuf);
- return 0;
+ prebuf, postbuf, xdata);
+ return 0;
}
int
error_gen_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc,
- off_t offset)
+ off_t offset, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
int enable = 1;
egp = this->private;
- enable = egp->enable[ERR_TRUNCATE];
+ enable = egp->enable[GF_FOP_TRUNCATE];
if (enable)
- op_errno = error_gen (this, ERR_TRUNCATE);
+ op_errno = error_gen (this, GF_FOP_TRUNCATE);
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
STACK_UNWIND_STRICT (truncate, frame, -1, op_errno,
- NULL, NULL);
- return 0;
+ NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_truncate_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->truncate,
- loc, offset);
- return 0;
+ loc, offset, xdata);
+ return 0;
}
int
error_gen_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno,
- prebuf, postbuf);
- return 0;
+ prebuf, postbuf, xdata);
+ return 0;
}
int
error_gen_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd,
- off_t offset)
+ off_t offset, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp =NULL;
int enable = 1;
egp = this->private;
- enable = egp->enable[ERR_FTRUNCATE];
+ enable = egp->enable[GF_FOP_FTRUNCATE];
if (enable)
- op_errno = error_gen (this, ERR_FTRUNCATE);
+ op_errno = error_gen (this, GF_FOP_FTRUNCATE);
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
STACK_UNWIND_STRICT (ftruncate, frame, -1, op_errno,
- NULL, NULL);
- return 0;
+ NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_ftruncate_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->ftruncate,
- fd, offset);
- return 0;
+ fd, offset, xdata);
+ return 0;
}
int
error_gen_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (access, frame, op_ret, op_errno);
-
- return 0;
+ STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
error_gen_access (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t mask)
+ int32_t mask, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
int enable = 1;
egp = this->private;
- enable = egp->enable[ERR_ACCESS];
+ enable = egp->enable[GF_FOP_ACCESS];
if (enable)
- op_errno = error_gen (this, ERR_ACCESS);
+ op_errno = error_gen (this, GF_FOP_ACCESS);
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (access, frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (access, frame, -1, op_errno, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_access_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->access,
- loc, mask);
- return 0;
+ loc, mask, xdata);
+ return 0;
}
int
error_gen_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- const char *path, struct iatt *sbuf)
+ const char *path, struct iatt *sbuf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path, sbuf);
- return 0;
+ STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path, sbuf, xdata);
+ return 0;
}
int
error_gen_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc,
- size_t size)
+ size_t size, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
int enable = 1;
egp = this->private;
- enable = egp->enable[ERR_READLINK];
+ enable = egp->enable[GF_FOP_READLINK];
if (enable)
- op_errno = error_gen (this, ERR_READLINK);
+ op_errno = error_gen (this, GF_FOP_READLINK);
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (readlink, frame, -1, op_errno, NULL, NULL);
- return 0;
+ STACK_UNWIND_STRICT (readlink, frame, -1, op_errno, NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_readlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readlink,
- loc, size);
- return 0;
+ loc, size, xdata);
+ return 0;
}
@@ -682,41 +673,41 @@ int
error_gen_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno,
inode, buf,
- preparent, postparent);
- return 0;
+ preparent, postparent, xdata);
+ return 0;
}
int
error_gen_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
- mode_t mode, dev_t rdev)
+ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
int enable = 1;
egp = this->private;
- enable = egp->enable[ERR_MKNOD];
+ enable = egp->enable[GF_FOP_MKNOD];
if (enable)
- op_errno = error_gen (this, ERR_MKNOD);
+ op_errno = error_gen (this, GF_FOP_MKNOD);
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
STACK_UNWIND_STRICT (mknod, frame, -1, op_errno, NULL, NULL,
- NULL, NULL);
- return 0;
+ NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_mknod_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mknod,
- loc, mode, rdev);
- return 0;
+ loc, mode, rdev, umask, xdata);
+ return 0;
}
@@ -724,116 +715,120 @@ int
error_gen_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno,
inode, buf,
- preparent, postparent);
- return 0;
+ preparent, postparent, xdata);
+ return 0;
}
int
error_gen_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode)
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
int enable = 1;
egp = this->private;
- enable = egp->enable[ERR_MKDIR];
+ enable = egp->enable[GF_FOP_MKDIR];
if (enable)
- op_errno = error_gen (this, ERR_MKDIR);
+ op_errno = error_gen (this, GF_FOP_MKDIR);
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
STACK_UNWIND_STRICT (mkdir, frame, -1, op_errno, NULL, NULL,
- NULL, NULL);
- return 0;
+ NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_mkdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mkdir,
- loc, mode);
- return 0;
+ loc, mode, umask, xdata);
+ return 0;
}
int
error_gen_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno,
- preparent, postparent);
- return 0;
+ preparent, postparent, xdata);
+ return 0;
}
int
-error_gen_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
+error_gen_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
int enable = 1;
egp = this->private;
- enable = egp->enable[ERR_UNLINK];
+ enable = egp->enable[GF_FOP_UNLINK];
if (enable)
- op_errno = error_gen (this, ERR_UNLINK);
+ op_errno = error_gen (this, GF_FOP_UNLINK);
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (unlink, frame, -1, op_errno, NULL, NULL);
- return 0;
+ STACK_UNWIND_STRICT (unlink, frame, -1, op_errno, NULL, NULL,
+ xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_unlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->unlink,
- loc);
- return 0;
+ loc, xflag, xdata);
+ return 0;
}
int
error_gen_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno,
- preparent, postparent);
- return 0;
+ preparent, postparent, xdata);
+ return 0;
}
int
-error_gen_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc)
+error_gen_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
int enable = 1;
egp = this->private;
- enable = egp->enable[ERR_RMDIR];
+ enable = egp->enable[GF_FOP_RMDIR];
if (enable)
- op_errno = error_gen (this, ERR_RMDIR);
+ op_errno = error_gen (this, GF_FOP_RMDIR);
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (rmdir, frame, -1, op_errno, NULL, NULL);
- return 0;
+ STACK_UNWIND_STRICT (rmdir, frame, -1, op_errno, NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_rmdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rmdir,
- loc);
- return 0;
+ loc, flags, xdata);
+ return 0;
}
@@ -841,40 +836,40 @@ int
error_gen_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
- return 0;
+ preparent, postparent, xdata);
+ return 0;
}
int
error_gen_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
- loc_t *loc)
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
int enable = 1;
egp = this->private;
- enable = egp->enable[ERR_SYMLINK];
+ enable = egp->enable[GF_FOP_SYMLINK];
if (enable)
- op_errno = error_gen (this, ERR_SYMLINK);
+ op_errno = error_gen (this, GF_FOP_SYMLINK);
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
STACK_UNWIND_STRICT (symlink, frame, -1, op_errno, NULL, NULL,
- NULL, NULL); /* pre & post parent attr */
+ NULL, NULL, NULL); /* pre & post parent attr */
return 0;
}
STACK_WIND (frame, error_gen_symlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->symlink,
- linkpath, loc);
- return 0;
+ linkpath, loc, umask, xdata);
+ return 0;
}
@@ -882,41 +877,42 @@ int
error_gen_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf,
preoldparent, postoldparent,
- prenewparent, postnewparent);
- return 0;
+ prenewparent, postnewparent, xdata);
+ return 0;
}
int
error_gen_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
int enable = 1;
egp = this->private;
- enable = egp->enable[ERR_RENAME];
+ enable = egp->enable[GF_FOP_RENAME];
if (enable)
- op_errno = error_gen (this, ERR_RENAME);
+ op_errno = error_gen (this, GF_FOP_RENAME);
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
STACK_UNWIND_STRICT (rename, frame, -1, op_errno, NULL,
- NULL, NULL, NULL, NULL); /* pre & post parent attr */
+ NULL, NULL, NULL, NULL, NULL);
return 0;
}
STACK_WIND (frame, error_gen_rename_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rename,
- oldloc, newloc);
- return 0;
+ oldloc, newloc, xdata);
+ return 0;
}
@@ -924,40 +920,40 @@ int
error_gen_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
- return 0;
+ preparent, postparent, xdata);
+ return 0;
}
int
error_gen_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
int enable = 1;
egp = this->private;
- enable = egp->enable[ERR_LINK];
+ enable = egp->enable[GF_FOP_LINK];
if (enable)
- op_errno = error_gen (this, ERR_LINK);
+ op_errno = error_gen (this, GF_FOP_LINK);
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
STACK_UNWIND_STRICT (link, frame, -1, op_errno, NULL, NULL,
- NULL, NULL); /* pre & post parent attr */
+ NULL, NULL, NULL);
return 0;
}
STACK_WIND (frame, error_gen_link_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->link,
- oldloc, newloc);
- return 0;
+ oldloc, newloc, xdata);
+ return 0;
}
@@ -965,77 +961,78 @@ int
error_gen_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
fd_t *fd, inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent);
- return 0;
+ preparent, postparent, xdata);
+ return 0;
}
int
error_gen_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, mode_t mode, fd_t *fd)
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
int enable = 1;
egp = this->private;
- enable = egp->enable[ERR_CREATE];
+ enable = egp->enable[GF_FOP_CREATE];
if (enable)
- op_errno = error_gen (this, ERR_CREATE);
+ op_errno = error_gen (this, GF_FOP_CREATE);
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
STACK_UNWIND_STRICT (create, frame, -1, op_errno, NULL, NULL,
- NULL, NULL, NULL); /* pre & post attr */
+ NULL, NULL, NULL, NULL);
return 0;
}
STACK_WIND (frame, error_gen_create_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->create,
- loc, flags, mode, fd);
- return 0;
+ loc, flags, mode, umask, fd, xdata);
+ return 0;
}
int
error_gen_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd);
- return 0;
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
+ return 0;
}
int
error_gen_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, fd_t *fd, int32_t wbflags)
+ int32_t flags, fd_t *fd, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
int enable = 1;
egp = this->private;
- enable = egp->enable[ERR_OPEN];
+ enable = egp->enable[GF_FOP_OPEN];
if (enable)
- op_errno = error_gen (this, ERR_OPEN);
+ op_errno = error_gen (this, GF_FOP_OPEN);
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (open, frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (open, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_open_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->open,
- loc, flags, fd, wbflags);
- return 0;
+ loc, flags, fd, xdata);
+ return 0;
}
@@ -1043,116 +1040,132 @@ int
error_gen_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
struct iovec *vector, int32_t count,
- struct iatt *stbuf, struct iobref *iobref)
+ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
{
STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno,
- vector, count, stbuf, iobref);
- return 0;
+ vector, count, stbuf, iobref, xdata);
+ return 0;
}
int
error_gen_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset)
+ fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
int enable = 1;
egp = this->private;
- enable = egp->enable[ERR_READV];
+ enable = egp->enable[GF_FOP_READ];
if (enable)
- op_errno = error_gen (this, ERR_READV);
+ op_errno = error_gen (this, GF_FOP_READ);
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
STACK_UNWIND_STRICT (readv, frame, -1, op_errno, NULL, 0,
- NULL, NULL);
- return 0;
+ NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_readv_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readv,
- fd, size, offset);
- return 0;
+ fd, size, offset, flags, xdata);
+ return 0;
}
int
error_gen_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
}
int
error_gen_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int32_t count,
- off_t off, struct iobref *iobref)
+ off_t off, uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
int enable = 1;
egp = this->private;
- enable = egp->enable[ERR_WRITEV];
+ enable = egp->enable[GF_FOP_WRITE];
if (enable)
- op_errno = error_gen (this, ERR_WRITEV);
-
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL);
+ op_errno = error_gen (this, GF_FOP_WRITE);
+
+ if (op_errno == GF_ERROR_SHORT_WRITE) {
+ struct iovec *shortvec;
+
+ /*
+ * A short write error returns some value less than what was
+ * requested from a write. To simulate this, replace the vector
+ * with one half the size;
+ */
+ shortvec = iov_dup(vector, 1);
+ shortvec->iov_len /= 2;
+
+ STACK_WIND(frame, error_gen_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, shortvec, count,
+ off, flags, iobref, xdata);
+ GF_FREE(shortvec);
return 0;
+ } else if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_writev_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->writev,
- fd, vector, count, off, iobref);
- return 0;
+ fd, vector, count, off, flags, iobref, xdata);
+ return 0;
}
int
error_gen_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
-error_gen_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
+error_gen_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
int enable = 1;
egp = this->private;
- enable = egp->enable[ERR_FLUSH];
+ enable = egp->enable[GF_FOP_FLUSH];
if (enable)
- op_errno = error_gen (this, ERR_FLUSH);
+ op_errno = error_gen (this, GF_FOP_FLUSH);
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (flush, frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (flush, frame, -1, op_errno, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_flush_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->flush,
- fd);
- return 0;
+ fd, xdata);
+ return 0;
}
@@ -1160,564 +1173,664 @@ int
error_gen_fsync_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
+ STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
}
int
-error_gen_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
+error_gen_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
int enable = 1;
egp = this->private;
- enable = egp->enable[ERR_FSYNC];
+ enable = egp->enable[GF_FOP_FSYNC];
if (enable)
- op_errno = error_gen (this, ERR_FSYNC);
+ op_errno = error_gen (this, GF_FOP_FSYNC);
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (fsync, frame, -1, op_errno, NULL, NULL);
- return 0;
+ STACK_UNWIND_STRICT (fsync, frame, -1, op_errno, NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_fsync_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsync,
- fd, flags);
- return 0;
+ fd, flags, xdata);
+ return 0;
}
int
error_gen_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf);
- return 0;
+ STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
}
int
-error_gen_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)
+error_gen_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
int enable = 1;
egp = this->private;
- enable = egp->enable[ERR_FSTAT];
+ enable = egp->enable[GF_FOP_FSTAT];
if (enable)
- op_errno = error_gen (this, ERR_FSTAT);
+ op_errno = error_gen (this, GF_FOP_FSTAT);
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (fstat, frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (fstat, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_fstat_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fstat,
- fd);
- return 0;
+ fd, xdata);
+ return 0;
}
int
error_gen_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd);
- return 0;
+ STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, xdata);
+ return 0;
}
int
-error_gen_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
+error_gen_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
int enable = 1;
egp = this->private;
- enable = egp->enable[ERR_OPENDIR];
+ enable = egp->enable[GF_FOP_OPENDIR];
if (enable)
- op_errno = error_gen (this, ERR_OPENDIR);
+ op_errno = error_gen (this, GF_FOP_OPENDIR);
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (opendir, frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (opendir, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_opendir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->opendir,
- loc, fd);
- return 0;
+ loc, fd, xdata);
+ return 0;
}
int
error_gen_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
error_gen_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
int enable = 1;
egp = this->private;
- enable = egp->enable[ERR_FSYNCDIR];
+ enable = egp->enable[GF_FOP_FSYNCDIR];
if (enable)
- op_errno = error_gen (this, ERR_FSYNCDIR);
+ op_errno = error_gen (this, GF_FOP_FSYNCDIR);
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (fsyncdir, frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (fsyncdir, frame, -1, op_errno, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_fsyncdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsyncdir,
- fd, flags);
- return 0;
+ fd, flags, xdata);
+ return 0;
}
int
error_gen_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf)
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf);
-
- return 0;
+ STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf, xdata);
+ return 0;
}
int
-error_gen_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
+error_gen_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
int enable = 1;
egp = this->private;
- enable = egp->enable[ERR_STATFS];
+ enable = egp->enable[GF_FOP_STATFS];
if (enable)
- op_errno = error_gen (this, ERR_STATFS);
+ op_errno = error_gen (this, GF_FOP_STATFS);
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (statfs, frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (statfs, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_statfs_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->statfs,
- loc);
- return 0;
+ loc, xdata);
+ return 0;
}
int
error_gen_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno);
-
- return 0;
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
error_gen_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *dict, int32_t flags)
+ dict_t *dict, int32_t flags, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
int enable = 1;
egp = this->private;
- enable = egp->enable[ERR_SETXATTR];
+ enable = egp->enable[GF_FOP_SETXATTR];
if (enable)
- op_errno = error_gen (this, ERR_SETXATTR);
+ op_errno = error_gen (this, GF_FOP_SETXATTR);
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (setxattr, frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (setxattr, frame, -1, op_errno, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_setxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setxattr,
- loc, dict, flags);
- return 0;
+ loc, dict, flags, xdata);
+ return 0;
}
int
error_gen_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict);
- return 0;
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
int
error_gen_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
+ const char *name, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
int enable = 1;
egp = this->private;
- enable = egp->enable[ERR_GETXATTR];
+ enable = egp->enable[GF_FOP_GETXATTR];
if (enable)
- op_errno = error_gen (this, ERR_GETXATTR);
+ op_errno = error_gen (this, GF_FOP_GETXATTR);
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (getxattr, frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (getxattr, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_getxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->getxattr,
- loc, name);
- return 0;
+ loc, name, xdata);
+ return 0;
+}
+
+int
+error_gen_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
-error_gen_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+error_gen_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int32_t flags, dict_t *xdata)
{
- STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, dict);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
- return 0;
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FSETXATTR];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_FSETXATTR);
+
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND_STRICT (fsetxattr, frame, -1, op_errno, xdata);
+ return 0;
+ }
+
+ STACK_WIND (frame, error_gen_fsetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ fd, dict, flags, xdata);
+ return 0;
+}
+
+
+int
+error_gen_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+
+int
+error_gen_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FGETXATTR];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_FGETXATTR);
+
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND_STRICT (fgetxattr, frame, -1, op_errno, NULL, xdata);
+ return 0;
+ }
+
+ STACK_WIND (frame, error_gen_fgetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ fd, name, xdata);
+ return 0;
+}
+
+
+int
+error_gen_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
int
error_gen_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict)
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
int enable = 1;
egp = this->private;
- enable = egp->enable[ERR_XATTROP];
+ enable = egp->enable[GF_FOP_XATTROP];
if (enable)
- op_errno = error_gen (this, ERR_XATTROP);
+ op_errno = error_gen (this, GF_FOP_XATTROP);
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (xattrop, frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (xattrop, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_xattrop_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->xattrop,
- loc, flags, dict);
- return 0;
+ loc, flags, dict, xdata);
+ return 0;
}
int
error_gen_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, dict);
-
- return 0;
+ STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
int
error_gen_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t flags, dict_t *dict)
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
int enable = 1;
egp = this->private;
- enable = egp->enable[ERR_FXATTROP];
+ enable = egp->enable[GF_FOP_FXATTROP];
if (enable)
- op_errno = error_gen (this, ERR_FXATTROP);
+ op_errno = error_gen (this, GF_FOP_FXATTROP);
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (fxattrop, frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (fxattrop, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_fxattrop_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fxattrop,
- fd, flags, dict);
- return 0;
+ fd, flags, dict, xdata);
+ return 0;
}
int
error_gen_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno);
-
- return 0;
+ STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
error_gen_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
+ const char *name, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
int enable = 1;
egp = this->private;
- enable = egp->enable[ERR_REMOVEXATTR];
+ enable = egp->enable[GF_FOP_REMOVEXATTR];
if (enable)
- op_errno = error_gen (this, ERR_REMOVEXATTR);
+ op_errno = error_gen (this, GF_FOP_REMOVEXATTR);
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (removexattr, frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (removexattr, frame, -1, op_errno, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_removexattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->removexattr,
- loc, name);
- return 0;
+ loc, name, xdata);
+ return 0;
+}
+
+int
+error_gen_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+
+int
+error_gen_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FREMOVEXATTR];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_FREMOVEXATTR);
+
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND_STRICT (fremovexattr, frame, -1, op_errno, xdata);
+ return 0;
+ }
+
+ STACK_WIND (frame, error_gen_fremovexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr,
+ fd, name, xdata);
+ return 0;
}
int
error_gen_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct flock *lock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
{
- STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock);
- return 0;
+ STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock, xdata);
+ return 0;
}
int
error_gen_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
- struct flock *lock)
+ struct gf_flock *lock, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
int enable = 1;
egp = this->private;
- enable = egp->enable[ERR_LK];
+ enable = egp->enable[GF_FOP_LK];
if (enable)
- op_errno = error_gen (this, ERR_LK);
+ op_errno = error_gen (this, GF_FOP_LK);
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (lk, frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (lk, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_lk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lk,
- fd, cmd, lock);
- return 0;
+ fd, cmd, lock, xdata);
+ return 0;
}
int
-error_gen_inodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
+error_gen_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
error_gen_inodelk (call_frame_t *frame, xlator_t *this,
const char *volume, loc_t *loc, int32_t cmd,
- struct flock *lock)
+ struct gf_flock *lock, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
int enable = 1;
egp = this->private;
- enable = egp->enable[ERR_INODELK];
+ enable = egp->enable[GF_FOP_INODELK];
if (enable)
- op_errno = error_gen (this, ERR_INODELK);
+ op_errno = error_gen (this, GF_FOP_INODELK);
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (inodelk, frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (inodelk, frame, -1, op_errno, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_inodelk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->inodelk,
- volume, loc, cmd, lock);
- return 0;
+ volume, loc, cmd, lock, xdata);
+ return 0;
}
int
-error_gen_finodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
+error_gen_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
error_gen_finodelk (call_frame_t *frame, xlator_t *this,
const char *volume, fd_t *fd, int32_t cmd,
- struct flock *lock)
+ struct gf_flock *lock, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
int enable = 1;
egp = this->private;
- enable = egp->enable[ERR_FINODELK];
+ enable = egp->enable[GF_FOP_FINODELK];
if (enable)
- op_errno = error_gen (this, ERR_FINODELK);
+ op_errno = error_gen (this, GF_FOP_FINODELK);
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (finodelk, frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (finodelk, frame, -1, op_errno, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_finodelk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->finodelk,
- volume, fd, cmd, lock);
- return 0;
+ volume, fd, cmd, lock, xdata);
+ return 0;
}
int
-error_gen_entrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
+error_gen_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
error_gen_entrylk (call_frame_t *frame, xlator_t *this,
const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
int enable = 1;
egp = this->private;
- enable = egp->enable[ERR_ENTRYLK];
+ enable = egp->enable[GF_FOP_ENTRYLK];
if (enable)
- op_errno = error_gen (this, ERR_ENTRYLK);
+ op_errno = error_gen (this, GF_FOP_ENTRYLK);
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (entrylk, frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (entrylk, frame, -1, op_errno, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_entrylk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->entrylk,
- volume, loc, basename, cmd, type);
- return 0;
+ volume, loc, basename, cmd, type, xdata);
+ return 0;
}
int
-error_gen_fentrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
+error_gen_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fentrylk, frame, op_ret, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (fentrylk, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
error_gen_fentrylk (call_frame_t *frame, xlator_t *this,
const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
int enable = 1;
egp = this->private;
- enable = egp->enable[ERR_FENTRYLK];
+ enable = egp->enable[GF_FOP_FENTRYLK];
if (enable)
- op_errno = error_gen (this, ERR_FENTRYLK);
+ op_errno = error_gen (this, GF_FOP_FENTRYLK);
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (fentrylk, frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (fentrylk, frame, -1, op_errno, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_fentrylk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fentrylk,
- volume, fd, basename, cmd, type);
- return 0;
+ volume, fd, basename, cmd, type, xdata);
+ return 0;
}
@@ -1728,9 +1841,8 @@ int
error_gen_getspec_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, char *spec_data)
{
- STACK_UNWIND (frame, op_ret, op_errno, spec_data);
-
- return 0;
+ STACK_UNWIND_STRICT (getspec, frame, op_ret, op_errno, spec_data);
+ return 0;
}
@@ -1743,14 +1855,14 @@ error_gen_getspec (call_frame_t *frame, xlator_t *this, const char *key,
int enable = 1;
egp = this->private;
- enable = egp->enable[ERR_GETSPEC];
+ enable = egp->enable[GF_FOP_GETSPEC];
if (enable)
- op_errno = error_gen (this, ERR_GETSPEC);
+ op_errno = error_gen (this, GF_FOP_GETSPEC);
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
+ STACK_UNWIND_STRICT (getspec, frame, -1, op_errno, NULL);
return 0;
}
@@ -1763,144 +1875,216 @@ error_gen_getspec (call_frame_t *frame, xlator_t *this, const char *key,
int
-error_gen_checksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- uint8_t *file_checksum, uint8_t *dir_checksum)
-{
- STACK_UNWIND (frame, op_ret, op_errno,
- file_checksum, dir_checksum);
- return 0;
-}
-
-
-int
-error_gen_checksum (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flag)
-{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
-
- egp = this->private;
- enable = egp->enable[ERR_CHECKSUM];
-
- if (enable)
- op_errno = error_gen (this, ERR_CHECKSUM);
-
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
- return 0;
- }
-
- STACK_WIND (frame, error_gen_checksum_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->checksum,
- loc, flag);
- return 0;
-}
-
-
-int
error_gen_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
- STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, entries);
+ STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, entries, xdata);
return 0;
}
int
error_gen_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t off)
+ size_t size, off_t off, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
int enable = 1;
egp = this->private;
- enable = egp->enable[ERR_READDIR];
+ enable = egp->enable[GF_FOP_READDIR];
if (enable)
- op_errno = error_gen (this, ERR_READDIR);
+ op_errno = error_gen (this, GF_FOP_READDIR);
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (readdir, frame, -1, op_errno, NULL);
+ STACK_UNWIND_STRICT (readdir, frame, -1, op_errno, NULL, xdata);
return 0;
}
STACK_WIND (frame, error_gen_readdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdir,
- fd, size, off);
+ fd, size, off, xdata);
return 0;
}
int
error_gen_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
- STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries);
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
return 0;
}
int
error_gen_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t off)
+ off_t off, dict_t *dict)
{
int op_errno = 0;
eg_t *egp = NULL;
int enable = 1;
egp = this->private;
- enable = egp->enable[ERR_READDIRP];
+ enable = egp->enable[GF_FOP_READDIRP];
if (enable)
- op_errno = error_gen (this, ERR_READDIRP);
+ op_errno = error_gen (this, GF_FOP_READDIRP);
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (readdirp, frame, -1, op_errno, NULL);
+ STACK_UNWIND_STRICT (readdirp, frame, -1, op_errno, NULL, NULL);
return 0;
}
STACK_WIND (frame, error_gen_readdirp_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdirp,
- fd, size, off);
+ fd, size, off, dict);
return 0;
}
+static void
+error_gen_set_failure (eg_t *pvt, int percent)
+{
+ GF_ASSERT (pvt);
-int
-error_gen_closedir (xlator_t *this, fd_t *fd)
+ if (percent)
+ pvt->failure_iter_no = 100/percent;
+ else
+ pvt->failure_iter_no = 100/GF_FAILURE_DEFAULT;
+}
+
+static void
+error_gen_parse_fill_fops (eg_t *pvt, char *enable_fops)
{
- return 0;
+ char *op_no_str = NULL;
+ int op_no = -1;
+ int i = 0;
+ xlator_t *this = THIS;
+ char *saveptr = NULL;
+
+ GF_ASSERT (pvt);
+ GF_ASSERT (this);
+
+ for (i = 0; i < GF_FOP_MAXVALUE; i++)
+ pvt->enable[i] = 0;
+
+ if (!enable_fops) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "All fops are enabled.");
+ for (i = 0; i < GF_FOP_MAXVALUE; i++)
+ pvt->enable[i] = 1;
+ } else {
+ op_no_str = strtok_r (enable_fops, ",", &saveptr);
+ while (op_no_str) {
+ op_no = get_fop_int (&op_no_str);
+ if (op_no == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Wrong option value %s", op_no_str);
+ } else
+ pvt->enable[op_no] = 1;
+
+ op_no_str = strtok_r (NULL, ",", &saveptr);
+ }
+ }
}
+int32_t
+error_gen_priv_dump (xlator_t *this)
+{
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ int ret = -1;
+ eg_t *conf = NULL;
-int
-error_gen_close (xlator_t *this, fd_t *fd)
+ if (!this)
+ goto out;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ ret = TRY_LOCK(&conf->lock);
+ if (ret != 0) {
+ return ret;
+ }
+
+ gf_proc_dump_add_section("xlator.debug.error-gen.%s.priv", this->name);
+ gf_proc_dump_build_key(key_prefix,"xlator.debug.error-gen","%s.priv",
+ this->name);
+
+ gf_proc_dump_write("op_count", "%d", conf->op_count);
+ gf_proc_dump_write("failure_iter_no", "%d", conf->failure_iter_no);
+ gf_proc_dump_write("error_no", "%s", conf->error_no);
+ gf_proc_dump_write("random_failure", "%d", conf->random_failure);
+
+ UNLOCK(&conf->lock);
+out:
+ return ret;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
{
- return 0;
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_error_gen_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ " failed");
+ return ret;
+ }
+
+ return ret;
}
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ eg_t *pvt = NULL;
+ int32_t ret = 0;
+ char *error_enable_fops = NULL;
+ int32_t failure_percent_int = 0;
+
+ if (!this || !this->private)
+ goto out;
+
+ pvt = this->private;
+
+ GF_OPTION_RECONF ("error-no", pvt->error_no, options, str, out);
+
+ GF_OPTION_RECONF ("failure", failure_percent_int, options, int32,
+ out);
+
+ GF_OPTION_RECONF ("enable", error_enable_fops, options, str, out);
+
+ GF_OPTION_RECONF ("random-failure", pvt->random_failure, options,
+ bool, out);
+
+ error_gen_parse_fill_fops (pvt, error_enable_fops);
+ error_gen_set_failure (pvt, failure_percent_int);
+
+ ret = 0;
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "reconfigure returning %d", ret);
+ return ret;
+}
int
init (xlator_t *this)
{
eg_t *pvt = NULL;
- data_t *error_no = NULL;
- data_t *failure_percent = NULL;
- data_t *enable = NULL;
int32_t ret = 0;
char *error_enable_fops = NULL;
- char *op_no_str = NULL;
- int op_no = -1;
- int i = 0;
int32_t failure_percent_int = 0;
if (!this->children || this->children->next) {
@@ -1915,71 +2099,34 @@ init (xlator_t *this)
"dangling volume. check volfile ");
}
- error_no = dict_get (this->options, "error-no");
- failure_percent = dict_get (this->options, "failure");
- enable = dict_get (this->options, "enable");
-
- pvt = CALLOC (1, sizeof (eg_t));
+ pvt = GF_CALLOC (1, sizeof (eg_t), gf_error_gen_mt_eg_t);
if (!pvt) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory.");
ret = -1;
goto out;
}
LOCK_INIT (&pvt->lock);
- for (i = 0; i < NO_OF_FOPS; i++)
- pvt->enable[i] = 0;
- if (!error_no) {
- gf_log (this->name, GF_LOG_DEBUG,
- "error-no not specified.");
- } else {
- pvt->error_no = data_to_str (error_no);
- }
+ GF_OPTION_INIT ("error-no", pvt->error_no, str, out);
- if (!failure_percent) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failure percent not specified.");
- pvt->failure_iter_no = 100/GF_FAILURE_DEFAULT;
- } else {
- failure_percent_int = data_to_int32 (failure_percent);
- if (failure_percent_int)
- pvt->failure_iter_no = 100/failure_percent_int;
- else
- pvt->failure_iter_no = 100/GF_FAILURE_DEFAULT;
- }
+ GF_OPTION_INIT ("failure", failure_percent_int, int32, out);
+
+ GF_OPTION_INIT ("enable", error_enable_fops, str, out);
+
+ GF_OPTION_INIT ("random-failure", pvt->random_failure, bool, out);
+
+
+ error_gen_parse_fill_fops (pvt, error_enable_fops);
+ error_gen_set_failure (pvt, failure_percent_int);
- if (!enable) {
- gf_log (this->name, GF_LOG_WARNING,
- "All fops are enabled.");
- for (i = 0; i < NO_OF_FOPS; i++)
- pvt->enable[i] = 1;
- } else {
- error_enable_fops = data_to_str (enable);
- op_no_str = error_enable_fops;
- while ((*error_enable_fops) != '\0') {
- error_enable_fops++;
- if (((*error_enable_fops) == ',') ||
- ((*error_enable_fops) == '\0')) {
- if ((*error_enable_fops) != '\0') {
- (*error_enable_fops) = '\0';
- error_enable_fops++;
- }
- op_no = get_fop_int (&op_no_str);
- if (op_no == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "Wrong option value %s",
- op_no_str);
- } else
- pvt->enable[op_no] = 1;
- op_no_str = error_enable_fops;
- }
- }
- }
this->private = pvt;
+
+ /* Give some seed value here */
+ srand (time(NULL));
out:
+ if (ret)
+ GF_FREE (pvt);
return ret;
}
@@ -1995,12 +2142,18 @@ fini (xlator_t *this)
if (pvt) {
LOCK_DESTROY (&pvt->lock);
- FREE (pvt);
+ GF_FREE (pvt);
gf_log (this->name, GF_LOG_DEBUG, "fini called");
}
return;
}
+struct xlator_dumpops dumpops = {
+ .priv = error_gen_priv_dump,
+};
+
+struct xlator_fops cbks;
+
struct xlator_fops fops = {
.lookup = error_gen_lookup,
.stat = error_gen_stat,
@@ -2023,6 +2176,9 @@ struct xlator_fops fops = {
.setxattr = error_gen_setxattr,
.getxattr = error_gen_getxattr,
.removexattr = error_gen_removexattr,
+ .fsetxattr = error_gen_fsetxattr,
+ .fgetxattr = error_gen_fgetxattr,
+ .fremovexattr = error_gen_fremovexattr,
.opendir = error_gen_opendir,
.readdir = error_gen_readdir,
.readdirp = error_gen_readdirp,
@@ -2032,7 +2188,6 @@ struct xlator_fops fops = {
.fstat = error_gen_fstat,
.lk = error_gen_lk,
.lookup_cbk = error_gen_lookup_cbk,
- .checksum = error_gen_checksum,
.xattrop = error_gen_xattrop,
.fxattrop = error_gen_fxattrop,
.inodelk = error_gen_inodelk,
@@ -2044,22 +2199,29 @@ struct xlator_fops fops = {
.getspec = error_gen_getspec,
};
-struct xlator_cbks cbks = {
- .release = error_gen_close,
- .releasedir = error_gen_closedir,
-};
-
struct volume_options options[] = {
{ .key = {"failure"},
- .type = GF_OPTION_TYPE_INT },
+ .type = GF_OPTION_TYPE_INT,
+ .description = "Percentage failure of operations when enabled.",
+ },
+
{ .key = {"error-no"},
.value = {"ENOENT","ENOTDIR","ENAMETOOLONG","EACCES","EBADF",
"EFAULT","ENOMEM","EINVAL","EIO","EEXIST","ENOSPC",
"EPERM","EROFS","EBUSY","EISDIR","ENOTEMPTY","EMLINK"
"ENODEV","EXDEV","EMFILE","ENFILE","ENOSYS","EINTR",
- "EFBIG","EAGAIN"},
- .type = GF_OPTION_TYPE_STR },
+ "EFBIG","EAGAIN","GF_ERROR_SHORT_WRITE"},
+ .type = GF_OPTION_TYPE_STR,
+ },
+
+ { .key = {"random-failure"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ },
+
{ .key = {"enable"},
- .type = GF_OPTION_TYPE_STR },
+ .type = GF_OPTION_TYPE_STR,
+ },
+
{ .key = {NULL} }
};
diff --git a/xlators/debug/error-gen/src/error-gen.h b/xlators/debug/error-gen/src/error-gen.h
index 7fb5fdfb56c..351f5dc99d6 100644
--- a/xlators/debug/error-gen/src/error-gen.h
+++ b/xlators/debug/error-gen/src/error-gen.h
@@ -1,83 +1,37 @@
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _ERROR_GEN_H
#define _ERROR_GEN_H
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+#include "error-gen-mem-types.h"
#define GF_FAILURE_DEFAULT 10
-#define NO_OF_FOPS 42
-enum {
- ERR_LOOKUP,
- ERR_STAT,
- ERR_READLINK,
- ERR_MKNOD,
- ERR_MKDIR,
- ERR_UNLINK,
- ERR_RMDIR,
- ERR_SYMLINK,
- ERR_RENAME,
- ERR_LINK,
- ERR_TRUNCATE,
- ERR_CREATE,
- ERR_OPEN,
- ERR_READV,
- ERR_WRITEV,
- ERR_STATFS,
- ERR_FLUSH,
- ERR_FSYNC,
- ERR_SETXATTR,
- ERR_GETXATTR,
- ERR_REMOVEXATTR,
- ERR_OPENDIR,
- ERR_READDIR,
- ERR_READDIRP,
- ERR_GETDENTS,
- ERR_FSYNCDIR,
- ERR_ACCESS,
- ERR_FTRUNCATE,
- ERR_FSTAT,
- ERR_LK,
- ERR_SETDENTS,
- ERR_CHECKSUM,
- ERR_XATTROP,
- ERR_FXATTROP,
- ERR_INODELK,
- ERR_FINODELK,
- ERR_ENTRYLK,
- ERR_FENTRYLK,
- ERR_SETATTR,
- ERR_FSETATTR,
- ERR_STATS,
- ERR_GETSPEC
+/*
+ * Pseudo-errors refer to errors beyond the scope of traditional <-1, op_errno>
+ * returns. This facilitates the ability to return unexpected, but not -1 values
+ * and/or to inject operations that lead to implicit error conditions. The range
+ * for pseudo errors resides at a high value to avoid conflicts with the errno
+ * range.
+ */
+enum GF_PSEUDO_ERRORS {
+ GF_ERROR_SHORT_WRITE = 1000, /* short writev return value */
+ GF_ERROR_MAX
};
typedef struct {
- int enable[NO_OF_FOPS];
+ int enable[GF_FOP_MAXVALUE];
int op_count;
int failure_iter_no;
char *error_no;
+ gf_boolean_t random_failure;
gf_lock_t lock;
} eg_t;
diff --git a/xlators/debug/io-stats/src/Makefile.am b/xlators/debug/io-stats/src/Makefile.am
index b894e79c3fe..c5df598549a 100644
--- a/xlators/debug/io-stats/src/Makefile.am
+++ b/xlators/debug/io-stats/src/Makefile.am
@@ -2,14 +2,18 @@
xlator_LTLIBRARIES = io-stats.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/debug
-io_stats_la_LDFLAGS = -module -avoidversion
+io_stats_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
io_stats_la_SOURCES = io-stats.c
io_stats_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = io-stats-mem-types.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src \
+ -DDATADIR=\"$(localstatedir)\"
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/debug/io-stats/src/io-stats-mem-types.h b/xlators/debug/io-stats/src/io-stats-mem-types.h
index d9b434d579f..9dde9373264 100644
--- a/xlators/debug/io-stats/src/io-stats-mem-types.h
+++ b/xlators/debug/io-stats/src/io-stats-mem-types.h
@@ -1,32 +1,27 @@
-
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __IO_STATS_MEM_TYPES_H__
#define __IO_STATS_MEM_TYPES_H__
#include "mem-types.h"
+extern const char *__progname;
+
enum gf_io_stats_mem_types_ {
gf_io_stats_mt_ios_conf = gf_common_mt_end + 1,
gf_io_stats_mt_ios_fd,
+ gf_io_stats_mt_ios_stat,
+ gf_io_stats_mt_ios_stat_list,
+ gf_io_stats_mt_ios_sample_buf,
+ gf_io_stats_mt_ios_sample,
gf_io_stats_mt_end
};
#endif
diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c
index ee9c29e69a0..5e82fb4c029 100644
--- a/xlators/debug/io-stats/src/io-stats.c
+++ b/xlators/debug/io-stats/src/io-stats.c
@@ -1,26 +1,14 @@
/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "xlator.h"
+#include "syscall.h"
/**
* xlators/debug/io_stats :
@@ -33,7 +21,8 @@
* d) counts of write IO block size - since process start, last interval and per fd
* e) counts of all FOP types passing through it
*
- * Usage: setfattr -n io-stats-dump /tmp/filename /mnt/gluster
+ * Usage: setfattr -n trusted.io-stats-dump /tmp/filename /mnt/gluster
+ * output is written to /tmp/filename.<iostats xlator instance name>
*
*/
@@ -42,7 +31,88 @@
#include "glusterfs.h"
#include "xlator.h"
#include "io-stats-mem-types.h"
+#include <stdarg.h>
+#include "defaults.h"
+#include "logging.h"
+#include "cli1-xdr.h"
+#include "statedump.h"
+#include <pwd.h>
+#include <grp.h>
+
+#define MAX_LIST_MEMBERS 100
+#define DEFAULT_PWD_BUF_SZ 16384
+#define DEFAULT_GRP_BUF_SZ 16384
+
+typedef enum {
+ IOS_STATS_TYPE_NONE,
+ IOS_STATS_TYPE_OPEN,
+ IOS_STATS_TYPE_READ,
+ IOS_STATS_TYPE_WRITE,
+ IOS_STATS_TYPE_OPENDIR,
+ IOS_STATS_TYPE_READDIRP,
+ IOS_STATS_TYPE_READ_THROUGHPUT,
+ IOS_STATS_TYPE_WRITE_THROUGHPUT,
+ IOS_STATS_TYPE_MAX
+}ios_stats_type_t;
+
+typedef enum {
+ IOS_STATS_THRU_READ,
+ IOS_STATS_THRU_WRITE,
+ IOS_STATS_THRU_MAX,
+}ios_stats_thru_t;
+
+struct ios_stat_lat {
+ struct timeval time;
+ double throughput;
+};
+
+struct ios_stat {
+ gf_lock_t lock;
+ uuid_t gfid;
+ char *filename;
+ uint64_t counters [IOS_STATS_TYPE_MAX];
+ struct ios_stat_lat thru_counters [IOS_STATS_THRU_MAX];
+ int refcnt;
+};
+
+struct ios_stat_list {
+ struct list_head list;
+ struct ios_stat *iosstat;
+ double value;
+};
+struct ios_stat_head {
+ gf_lock_t lock;
+ double min_cnt;
+ uint64_t members;
+ struct ios_stat_list *iosstats;
+};
+
+typedef struct _ios_sample_t {
+ uid_t uid;
+ gid_t gid;
+ char identifier[UNIX_PATH_MAX];
+ glusterfs_fop_t fop_type;
+ struct timeval timestamp;
+ double elapsed;
+} ios_sample_t;
+
+
+typedef struct _ios_sample_buf_t {
+ uint64_t pos; /* Position in write buffer */
+ uint64_t size; /* Size of ring buffer */
+ uint64_t collected; /* Number of samples we've collected */
+ uint64_t observed; /* Number of FOPs we've observed */
+ ios_sample_t *ios_samples; /* Our list of samples */
+} ios_sample_buf_t;
+
+
+struct ios_lat {
+ double min;
+ double max;
+ double avg;
+ uint64_t total;
+};
struct ios_global_stats {
uint64_t data_written;
@@ -51,15 +121,31 @@ struct ios_global_stats {
uint64_t block_count_read[32];
uint64_t fop_hits[GF_FOP_MAXVALUE];
struct timeval started_at;
+ struct ios_lat latency[GF_FOP_MAXVALUE];
+ uint64_t nr_opens;
+ uint64_t max_nr_opens;
+ struct timeval max_openfd_time;
};
-
struct ios_conf {
gf_lock_t lock;
struct ios_global_stats cumulative;
uint64_t increment;
struct ios_global_stats incremental;
gf_boolean_t dump_fd_stats;
+ gf_boolean_t count_fop_hits;
+ gf_boolean_t measure_latency;
+ struct ios_stat_head list[IOS_STATS_TYPE_MAX];
+ struct ios_stat_head thru_list[IOS_STATS_THRU_MAX];
+ int32_t ios_dump_interval;
+ pthread_t dump_thread;
+ gf_boolean_t dump_thread_should_die;
+ gf_lock_t ios_sampling_lock;
+ int32_t ios_sample_interval;
+ int32_t ios_sample_buf_size;
+ ios_sample_buf_t *ios_sample_buf;
+ struct dnscache *dnscache;
+ int32_t ios_dnscache_ttl_sec;
};
@@ -72,78 +158,213 @@ struct ios_fd {
struct timeval opened_at;
};
+typedef enum {
+ IOS_DUMP_TYPE_NONE = 0,
+ IOS_DUMP_TYPE_FILE = 1,
+ IOS_DUMP_TYPE_DICT = 2,
+ IOS_DUMP_TYPE_JSON_FILE = 3,
+ IOS_DUMP_TYPE_SAMPLES = 4,
+ IOS_DUMP_TYPE_MAX = 5
+} ios_dump_type_t;
+
+struct ios_dump_args {
+ ios_dump_type_t type;
+ union {
+ FILE *logfp;
+ dict_t *dict;
+ } u;
+};
+
+typedef int (*block_dump_func) (xlator_t *, struct ios_dump_args*,
+ int , int , uint64_t ) ;
struct ios_local {
struct timeval wind_at;
struct timeval unwind_at;
};
+struct volume_options options[];
-#define BUMP_FOP(op) \
+static int
+is_fop_latency_started (call_frame_t *frame)
+{
+ GF_ASSERT (frame);
+ struct timeval epoch = {0,};
+ return memcmp (&frame->begin, &epoch, sizeof (epoch));
+}
+
+#define _IOS_SAMP_DIR DEFAULT_LOG_FILE_DIRECTORY "/samples"
+#ifdef GF_LINUX_HOST_OS
+#define _IOS_DUMP_DIR DATADIR "/lib/glusterd/stats"
+#else
+#define _IOS_DUMP_DIR DATADIR "/db/glusterd/stats"
+#endif
+
+#define END_FOP_LATENCY(frame, op) \
do { \
struct ios_conf *conf = NULL; \
\
conf = this->private; \
- LOCK (&conf->lock); \
- { \
- conf->cumulative.fop_hits[GF_FOP_##op]++; \
- conf->incremental.fop_hits[GF_FOP_##op]++; \
+ if (conf && conf->measure_latency) { \
+ gettimeofday (&frame->end, NULL); \
+ update_ios_latency (conf, frame, GF_FOP_##op); \
} \
- UNLOCK (&conf->lock); \
+ } while (0)
+
+#define START_FOP_LATENCY(frame) \
+ do { \
+ struct ios_conf *conf = NULL; \
+ \
+ conf = this->private; \
+ if (conf && conf->measure_latency) { \
+ gettimeofday (&frame->begin, NULL); \
+ } else { \
+ memset (&frame->begin, 0, sizeof (frame->begin));\
+ } \
} while (0)
-#define BUMP_READ(fd, len) \
+#define BUMP_FOP(op) \
do { \
struct ios_conf *conf = NULL; \
- struct ios_fd *iosfd = NULL; \
- int lb2 = 0; \
\
conf = this->private; \
- lb2 = log_base2 (len); \
- ios_fd_ctx_get (fd, this, &iosfd); \
- \
- LOCK (&conf->lock); \
- { \
- conf->cumulative.data_read += len; \
- conf->incremental.data_read += len; \
- conf->cumulative.block_count_read[lb2]++; \
- conf->incremental.block_count_read[lb2]++; \
- \
- if (iosfd) { \
- iosfd->data_read += len; \
- iosfd->block_count_read[lb2]++; \
- } \
- } \
- UNLOCK (&conf->lock); \
+ if (!conf) \
+ break; \
+ conf->cumulative.fop_hits[GF_FOP_##op]++; \
+ conf->incremental.fop_hits[GF_FOP_##op]++; \
+ } while (0)
+
+#if defined(HAVE_ATOMIC_BUILTINS)
+#define STATS_LOCK(x)
+#define STATS_UNLOCK(x)
+#define STATS_ADD(x,i) __sync_add_and_fetch (&x, i)
+#else
+#define STATS_LOCK(x) LOCK (x)
+#define STATS_UNLOCK(x) UNLOCK (x)
+#define STATS_ADD(x,i) (x) += (i)
+#endif
+
+#define UPDATE_PROFILE_STATS(frame, op) \
+ do { \
+ struct ios_conf *conf = NULL; \
+ \
+ if (!is_fop_latency_started (frame)) \
+ break; \
+ conf = this->private; \
+ STATS_LOCK (&conf->lock); \
+ { \
+ if (conf && conf->measure_latency && \
+ conf->count_fop_hits) { \
+ BUMP_FOP(op); \
+ gettimeofday (&frame->end, NULL); \
+ update_ios_latency (conf, frame, GF_FOP_##op);\
+ } \
+ } \
+ STATS_UNLOCK (&conf->lock); \
} while (0)
+#define BUMP_READ(fd, len) \
+ do { \
+ struct ios_conf *conf = NULL; \
+ struct ios_fd *iosfd = NULL; \
+ int lb2 = 0; \
+ \
+ conf = this->private; \
+ lb2 = log_base2 (len); \
+ ios_fd_ctx_get (fd, this, &iosfd); \
+ if (!conf) \
+ break; \
+ \
+ STATS_LOCK (&conf->lock); \
+ { \
+ STATS_ADD (conf->cumulative.data_read, len); \
+ STATS_ADD (conf->incremental.data_read, len); \
+ STATS_ADD (conf->cumulative.block_count_read[lb2], 1); \
+ STATS_ADD (conf->incremental.block_count_read[lb2], 1);\
+ \
+ if (iosfd) { \
+ STATS_ADD (iosfd->data_read, len); \
+ STATS_ADD (iosfd->block_count_read[lb2], 1); \
+ } \
+ } \
+ STATS_UNLOCK (&conf->lock); \
+ } while (0)
-#define BUMP_WRITE(fd, len) \
+#define BUMP_WRITE(fd, len) \
+ do { \
+ struct ios_conf *conf = NULL; \
+ struct ios_fd *iosfd = NULL; \
+ int lb2 = 0; \
+ \
+ conf = this->private; \
+ lb2 = log_base2 (len); \
+ ios_fd_ctx_get (fd, this, &iosfd); \
+ if (!conf) \
+ break; \
+ STATS_LOCK (&conf->lock); \
+ { \
+ STATS_ADD (conf->cumulative.data_written, len); \
+ STATS_ADD (conf->incremental.data_written, len); \
+ STATS_ADD (conf->cumulative.block_count_write[lb2], 1);\
+ STATS_ADD (conf->incremental.block_count_write[lb2], 1);\
+ \
+ if (iosfd) { \
+ STATS_ADD (iosfd->data_written, len); \
+ STATS_ADD (iosfd->block_count_write[lb2], 1); \
+ } \
+ } \
+ STATS_UNLOCK (&conf->lock); \
+ } while (0)
+
+#define BUMP_STATS(iosstat, type) \
do { \
- struct ios_conf *conf = NULL; \
- struct ios_fd *iosfd = NULL; \
- int lb2 = 0; \
+ struct ios_conf *conf = NULL; \
+ uint64_t value = 0; \
\
conf = this->private; \
- lb2 = log_base2 (len); \
- ios_fd_ctx_get (fd, this, &iosfd); \
\
- LOCK (&conf->lock); \
+ LOCK(&iosstat->lock); \
{ \
- conf->cumulative.data_written += len; \
- conf->incremental.data_written += len; \
- conf->cumulative.block_count_write[lb2]++; \
- conf->incremental.block_count_write[lb2]++; \
- \
- if (iosfd) { \
- iosfd->data_written += len; \
- iosfd->block_count_write[lb2]++; \
- } \
+ value = STATS_ADD (iosstat->counters[type], 1); \
} \
- UNLOCK (&conf->lock); \
+ UNLOCK (&iosstat->lock); \
+ ios_stat_add_to_list (&conf->list[type], \
+ value, iosstat); \
} while (0)
+#define BUMP_THROUGHPUT(iosstat, type) \
+ do { \
+ struct ios_conf *conf = NULL; \
+ double elapsed; \
+ struct timeval *begin, *end; \
+ double throughput; \
+ int flag = 0; \
+ \
+ begin = &frame->begin; \
+ end = &frame->end; \
+ \
+ elapsed = (end->tv_sec - begin->tv_sec) * 1e6 \
+ + (end->tv_usec - begin->tv_usec); \
+ throughput = op_ret / elapsed; \
+ \
+ conf = this->private; \
+ STATS_LOCK (&iosstat->lock); \
+ { \
+ if (iosstat->thru_counters[type].throughput \
+ <= throughput) { \
+ iosstat->thru_counters[type].throughput = \
+ throughput; \
+ gettimeofday (&iosstat-> \
+ thru_counters[type].time, NULL); \
+ flag = 1; \
+ } \
+ } \
+ STATS_UNLOCK (&iosstat->lock); \
+ if (flag) \
+ ios_stat_add_to_list (&conf->thru_list[type], \
+ throughput, iosstat); \
+ } while (0)
int
ios_fd_ctx_get (fd_t *fd, xlator_t *this, struct ios_fd **iosfd)
@@ -174,6 +395,252 @@ ios_fd_ctx_set (fd_t *fd, xlator_t *this, struct ios_fd *iosfd)
return ret;
}
+int
+ios_stat_ref (struct ios_stat *iosstat)
+{
+ LOCK (&iosstat->lock);
+ {
+ iosstat->refcnt++;
+ }
+ UNLOCK (&iosstat->lock);
+
+ return iosstat->refcnt;
+}
+
+int
+ios_stat_unref (struct ios_stat *iosstat)
+{
+ int cleanup = 0;
+ LOCK (&iosstat->lock);
+ {
+ iosstat->refcnt--;
+ if (iosstat->refcnt == 0) {
+ if (iosstat->filename) {
+ GF_FREE (iosstat->filename);
+ iosstat->filename = NULL;
+ }
+ cleanup = 1;
+ }
+ }
+ UNLOCK (&iosstat->lock);
+
+ if (cleanup) {
+ LOCK_DESTROY (&iosstat->lock);
+ GF_FREE (iosstat);
+ iosstat = NULL;
+ }
+
+ return 0;
+}
+
+int
+ios_inode_ctx_set (inode_t *inode, xlator_t *this, struct ios_stat *iosstat)
+{
+ uint64_t iosstat64 = 0;
+ int ret = 0;
+
+ ios_stat_ref (iosstat);
+ iosstat64 = (unsigned long )iosstat;
+ ret = inode_ctx_put (inode, this, iosstat64);
+ return ret;
+}
+
+int
+ios_inode_ctx_get (inode_t *inode, xlator_t *this, struct ios_stat **iosstat)
+{
+ uint64_t iosstat64 = 0;
+ unsigned long iosstatlong = 0;
+ int ret = 0;
+
+ ret = inode_ctx_get (inode, this, &iosstat64);
+ iosstatlong = iosstat64;
+ if (ret != -1)
+ *iosstat = (void *) iosstatlong;
+
+ return ret;
+
+}
+
+/*
+ * So why goto all this trouble? Why not just queue up some samples in
+ * a big list and malloc away? Well malloc is expensive relative
+ * to what we are measuring, so cannot have any malloc's (or worse
+ * callocs) in our measurement code paths. Instead, we are going to
+ * pre-allocate a circular buffer and collect a maximum number of samples.
+ * Prior to dumping them all we'll create a new buffer and swap the
+ * old buffer with the new, and then proceed to dump the statistics
+ * in our dump thread.
+ *
+ */
+ios_sample_buf_t *
+ios_create_sample_buf (size_t buf_size)
+{
+ ios_sample_buf_t *ios_sample_buf = NULL;
+ ios_sample_t *ios_samples = NULL;
+
+ ios_sample_buf = GF_CALLOC (1,
+ sizeof (*ios_sample_buf),
+ gf_io_stats_mt_ios_sample_buf);
+ if (!ios_sample_buf)
+ goto err;
+
+ ios_samples = GF_CALLOC (buf_size,
+ sizeof (*ios_samples),
+ gf_io_stats_mt_ios_sample);
+
+ if (!ios_samples)
+ goto err;
+
+ ios_sample_buf->ios_samples = ios_samples;
+ ios_sample_buf->size = buf_size;
+ ios_sample_buf->pos = 0;
+ ios_sample_buf->observed = 0;
+ ios_sample_buf->collected = 0;
+
+ return ios_sample_buf;
+err:
+ GF_FREE (ios_sample_buf);
+ return NULL;
+}
+
+void
+ios_destroy_sample_buf (ios_sample_buf_t *ios_sample_buf)
+{
+ GF_FREE (ios_sample_buf->ios_samples);
+ GF_FREE (ios_sample_buf);
+}
+
+static int
+ios_init_sample_buf (struct ios_conf *conf)
+{
+ int32_t ret = -1;
+
+ GF_ASSERT (conf);
+ LOCK (&conf->lock);
+ conf->ios_sample_buf = ios_create_sample_buf (
+ conf->ios_sample_buf_size);
+ if (!conf->ios_sample_buf)
+ goto out;
+ ret = 0;
+out:
+ UNLOCK (&conf->lock);
+ return ret;
+}
+
+int
+ios_stat_add_to_list (struct ios_stat_head *list_head, uint64_t value,
+ struct ios_stat *iosstat)
+{
+ struct ios_stat_list *new = NULL;
+ struct ios_stat_list *entry = NULL;
+ struct ios_stat_list *t = NULL;
+ struct ios_stat_list *list_entry = NULL;
+ struct ios_stat_list *tmp = NULL;
+ struct ios_stat_list *last = NULL;
+ struct ios_stat *stat = NULL;
+ int cnt = 0;
+ int found = 0;
+ int reposition = 0;
+ double min_count = 0;
+
+ LOCK (&list_head->lock);
+ {
+
+ if (list_head->min_cnt == 0)
+ list_head->min_cnt = value;
+ if ((list_head->members == MAX_LIST_MEMBERS) &&
+ (list_head->min_cnt > value))
+ goto out;
+
+ list_for_each_entry_safe (entry, t,
+ &list_head->iosstats->list, list) {
+ cnt++;
+ if (cnt == list_head->members)
+ last = entry;
+
+ if (!gf_uuid_compare (iosstat->gfid,
+ entry->iosstat->gfid)) {
+ list_entry = entry;
+ found = cnt;
+ entry->value = value;
+ if (!reposition) {
+ if (cnt == list_head->members)
+ list_head->min_cnt = value;
+ goto out;
+ }
+ break;
+ } else if (entry->value <= value && !reposition) {
+ reposition = cnt;
+ tmp = entry;
+ if (cnt == list_head->members - 1)
+ min_count = entry->value;
+ }
+ }
+ if (found) {
+ list_del (&list_entry->list);
+ list_add_tail (&list_entry->list, &tmp->list);
+ if (min_count)
+ list_head->min_cnt = min_count;
+ goto out;
+ } else if (list_head->members == MAX_LIST_MEMBERS && reposition) {
+ new = GF_CALLOC (1, sizeof (*new),
+ gf_io_stats_mt_ios_stat_list);
+ new->iosstat = iosstat;
+ new->value = value;
+ ios_stat_ref (iosstat);
+ list_add_tail (&new->list, &tmp->list);
+ if (last) {
+ stat = last->iosstat;
+ last->iosstat = NULL;
+ ios_stat_unref (stat);
+ list_del (&last->list);
+ GF_FREE (last);
+ }
+ if (reposition == MAX_LIST_MEMBERS)
+ list_head->min_cnt = value;
+ else if (min_count) {
+ list_head->min_cnt = min_count;
+ }
+ } else if (list_head->members < MAX_LIST_MEMBERS) {
+ new = GF_CALLOC (1, sizeof (*new),
+ gf_io_stats_mt_ios_stat_list);
+ new->iosstat = iosstat;
+ new->value = value;
+ ios_stat_ref (iosstat);
+ if (reposition) {
+ list_add_tail (&new->list, &tmp->list);
+ } else {
+ list_add_tail (&new->list, &entry->list);
+ }
+ list_head->members++;
+ if (list_head->min_cnt > value)
+ list_head->min_cnt = value;
+ }
+ }
+out:
+ UNLOCK (&list_head->lock);
+ return 0;
+}
+
+static int
+ios_stats_cleanup (xlator_t *this, inode_t *inode)
+{
+
+ struct ios_stat *iosstat = NULL;
+ uint64_t iosstat64 = 0;
+
+ inode_ctx_del (inode, this, &iosstat64);
+ if (!iosstat64) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "could not get inode ctx");
+ return 0;
+ }
+ iosstat = (void *) (long)iosstat64;
+ if (iosstat) {
+ ios_stat_unref (iosstat);
+ }
+ return 0;
+}
#define ios_log(this, logfp, fmt ...) \
do { \
@@ -181,82 +648,898 @@ ios_fd_ctx_set (fd_t *fd, xlator_t *this, struct ios_fd *iosfd)
fprintf (logfp, fmt); \
fprintf (logfp, "\n"); \
} \
- gf_log (this->name, GF_LOG_NORMAL, fmt); \
+ gf_log (this->name, GF_LOG_DEBUG, fmt); \
} while (0)
+int
+ios_dump_file_stats (struct ios_stat_head *list_head, xlator_t *this,
+ FILE *logfp)
+{
+ struct ios_stat_list *entry = NULL;
+
+ LOCK (&list_head->lock);
+ {
+ list_for_each_entry (entry, &list_head->iosstats->list, list) {
+ ios_log (this, logfp, "%-12.0f %s",
+ entry->value, entry->iosstat->filename);
+ }
+ }
+ UNLOCK (&list_head->lock);
+ return 0;
+}
int
-io_stats_dump_global (xlator_t *this, struct ios_global_stats *stats,
- struct timeval *now, int interval, FILE *logfp)
+ios_dump_throughput_stats (struct ios_stat_head *list_head, xlator_t *this,
+ FILE *logfp, ios_stats_thru_t type)
+{
+ struct ios_stat_list *entry = NULL;
+ struct timeval time = {0, };
+ char timestr[256] = {0, };
+
+ LOCK (&list_head->lock);
+ {
+ list_for_each_entry (entry, &list_head->iosstats->list, list) {
+ gf_time_fmt (timestr, sizeof timestr,
+ entry->iosstat->thru_counters[type].time.tv_sec,
+ gf_timefmt_FT);
+ snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS, time.tv_usec);
+
+ ios_log (this, logfp, "%s \t %-10.2f \t %s",
+ timestr, entry->value, entry->iosstat->filename);
+ }
+ }
+ UNLOCK (&list_head->lock);
+ return 0;
+}
+
+int
+_io_stats_get_key_prefix (xlator_t *this, char **key_prefix) {
+ char *key_root = "gluster";
+ char *xlator_name = NULL;
+ char *instance_name = NULL;
+ size_t key_len = 0;
+ int bytes_written = 0;
+ int i = 0;
+ int ret = 0;
+
+ xlator_name = strdupa (this->name);
+ for (i = 0; i < strlen (xlator_name); i++) {
+ if (xlator_name[i] == '/')
+ xlator_name[i] = '_';
+ }
+
+ instance_name = this->instance_name;
+ if (this->name && strcmp (this->name, "glustershd") == 0) {
+ xlator_name = "shd";
+ } else if (this->prev &&
+ strcmp (this->prev->name, "nfs-server") == 0) {
+ xlator_name = "nfsd";
+ if (this->prev->instance_name)
+ instance_name = strdupa (this->prev->instance_name);
+ }
+
+ if (strcmp (__progname, "glusterfsd") == 0)
+ key_root = "gluster.brick";
+
+ if (instance_name) {
+ /* +3 for 2 x "." + NULL */
+ key_len = strlen (key_root) + strlen (xlator_name) +
+ strlen (instance_name) + 3;
+ *key_prefix = GF_CALLOC (key_len, sizeof (char),
+ gf_common_mt_char);
+ if (!key_prefix) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ bytes_written = snprintf (*key_prefix, key_len, "%s.%s.%s",
+ key_root, xlator_name, instance_name);
+ if (bytes_written != key_len - 1) {
+ ret = -EINVAL;
+ goto err;
+ }
+ } else {
+ /* +2 for 1 x "." + NULL */
+ key_len = strlen (key_root) + strlen (xlator_name) + 2;
+ *key_prefix = GF_CALLOC (key_len, sizeof (char),
+ gf_common_mt_char);
+ if (!key_prefix) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ bytes_written = snprintf (*key_prefix, key_len, "%s.%s",
+ key_root, xlator_name);
+ if (bytes_written != key_len - 1) {
+ ret = -EINVAL;
+ goto err;
+ }
+ }
+ return 0;
+err:
+ GF_FREE (*key_prefix);
+ *key_prefix = NULL;
+ return ret;
+}
+
+int
+io_stats_dump_global_to_json_logfp (xlator_t *this,
+ struct ios_global_stats *stats, struct timeval *now, int interval,
+ FILE *logfp)
{
- int i = 0;
+ int i = 0;
+ int j = 0;
+ struct ios_conf *conf = NULL;
+ char *key_prefix = NULL;
+ char *str_prefix = NULL;
+ char *lc_fop_name = NULL;
+ int ret = 1; /* Default to error */
+ int rw_size;
+ char *rw_unit = NULL;
+ long fop_hits;
+ float fop_lat_ave;
+ float fop_lat_min;
+ float fop_lat_max;
+ double interval_sec;
+
+ interval_sec = ((now->tv_sec * 1000000.0 + now->tv_usec) -
+ (stats->started_at.tv_sec * 1000000.0 +
+ stats->started_at.tv_usec)) / 1000000.0;
+
+ conf = this->private;
+
+ ret = _io_stats_get_key_prefix (this, &key_prefix);
+ if (ret) {
+ goto out;
+ }
+
+ if (interval == -1) {
+ str_prefix = "aggr";
+
+ } else {
+ str_prefix = "inter";
+ }
+ ios_log (this, logfp, "{");
+
+ for (i = 0; i < 31; i++) {
+ rw_size = (1 << i);
+ if (rw_size >= 1024 * 1024) {
+ rw_size = rw_size / (1024 * 1024);
+ rw_unit = "mb";
+ } else if (rw_size >= 1024) {
+ rw_size = rw_size / 1024;
+ rw_unit = "kb";
+ } else {
+ rw_unit = "b";
+ }
+
+ if (interval == -1) {
+ ios_log (this, logfp,
+ "\"%s.%s.read_%d%s\": \"%"PRId64"\",",
+ key_prefix, str_prefix, rw_size, rw_unit,
+ stats->block_count_read[i]);
+ ios_log (this, logfp,
+ "\"%s.%s.write_%d%s\": \"%"PRId64"\",",
+ key_prefix, str_prefix, rw_size, rw_unit,
+ stats->block_count_write[i]);
+ } else {
+ ios_log (this, logfp,
+ "\"%s.%s.read_%d%s_per_sec\": \"%0.2lf\",",
+ key_prefix, str_prefix, rw_size, rw_unit,
+ (double)(stats->block_count_read[i] /
+ interval_sec));
+ ios_log (this, logfp,
+ "\"%s.%s.write_%d%s_per_sec\": \"%0.2lf\",",
+ key_prefix, str_prefix, rw_size, rw_unit,
+ (double)(stats->block_count_write[i] /
+ interval_sec));
+ }
+ }
+
+ if (interval == -1) {
+ ios_log (this, logfp, "\"%s.%s.fds.open_count\": \"%"PRId64
+ "\",", key_prefix, str_prefix,
+ conf->cumulative.nr_opens);
+ ios_log (this, logfp,
+ "\"%s.%s.fds.max_open_count\": \"%"PRId64"\",",
+ key_prefix, str_prefix, conf->cumulative.max_nr_opens);
+ }
+
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ lc_fop_name = strdupa (gf_fop_list[i]);
+ for (j = 0; lc_fop_name[j]; j++) {
+ lc_fop_name[j] = tolower (lc_fop_name[j]);
+ }
+
+ fop_hits = 0;
+ fop_lat_ave = 0.0;
+ fop_lat_min = 0.0;
+ fop_lat_max = 0.0;
+ if (stats->fop_hits[i]) {
+ fop_hits = stats->fop_hits[i];
+ if (stats->latency[i].avg) {
+ fop_lat_ave = stats->latency[i].avg;
+ fop_lat_min = stats->latency[i].min;
+ fop_lat_max = stats->latency[i].max;
+ }
+ }
+ if (interval == -1) {
+ ios_log (this, logfp,
+ "\"%s.%s.fop.%s.count\": \"%"PRId64"\",",
+ key_prefix, str_prefix, lc_fop_name,
+ fop_hits);
+ } else {
+ ios_log (this, logfp,
+ "\"%s.%s.fop.%s.per_sec\": \"%0.2lf\",",
+ key_prefix, str_prefix, lc_fop_name,
+ (double)(fop_hits / interval_sec));
+ }
+
+ ios_log (this, logfp,
+ "\"%s.%s.fop.%s.latency_ave_usec\": \"%0.2lf\",",
+ key_prefix, str_prefix, lc_fop_name, fop_lat_ave);
+ ios_log (this, logfp,
+ "\"%s.%s.fop.%s.latency_min_usec\": \"%0.2lf\",",
+ key_prefix, str_prefix, lc_fop_name, fop_lat_min);
+ ios_log (this, logfp,
+ "\"%s.%s.fop.%s.latency_max_usec\": \"%0.2lf\",",
+ key_prefix, str_prefix, lc_fop_name, fop_lat_max);
+ }
+ if (interval == -1) {
+ ios_log (this, logfp, "\"%s.%s.uptime\": \"%"PRId64"\",",
+ key_prefix, str_prefix,
+ (uint64_t) (now->tv_sec - stats->started_at.tv_sec));
+ ios_log (this, logfp, "\"%s.%s.bytes_read\": \"%"PRId64"\",",
+ key_prefix, str_prefix, stats->data_read);
+ ios_log (this, logfp, "\"%s.%s.bytes_written\": \"%"PRId64"\"",
+ key_prefix, str_prefix, stats->data_written);
+ } else {
+ ios_log (this, logfp,
+ "\"%s.%s.sample_interval_sec\": \"%0.2lf\",",
+ key_prefix, str_prefix,
+ interval_sec);
+ ios_log (this, logfp,
+ "\"%s.%s.bytes_read_per_sec\": \"%0.2lf\",",
+ key_prefix, str_prefix,
+ (double)(stats->data_read / interval_sec));
+ ios_log (this, logfp,
+ "\"%s.%s.bytes_written_per_sec\": \"%0.2lf\"",
+ key_prefix, str_prefix,
+ (double)(stats->data_written / interval_sec));
+ }
+
+ ios_log (this, logfp, "}");
+ ret = 0;
+out:
+ GF_FREE (key_prefix);
+ return ret;
+}
+
+char *
+_resolve_username (xlator_t *this, uid_t uid)
+{
+ struct passwd pwd;
+ struct passwd *pwd_result = NULL;
+ size_t pwd_buf_len;
+ char *pwd_buf = NULL;
+ char *ret = NULL;
+
+ /* Prepare our buffer for the uid->username translation */
+#ifdef _SC_GETGR_R_SIZE_MAX
+ pwd_buf_len = sysconf (_SC_GETGR_R_SIZE_MAX);
+#else
+ pwd_buf_len = -1;
+#endif
+ if (pwd_buf_len == -1) {
+ pwd_buf_len = DEFAULT_PWD_BUF_SZ; /* per the man page */
+ }
+
+ pwd_buf = alloca (pwd_buf_len);
+ if (!pwd_buf)
+ goto err;
+
+ getpwuid_r (uid, &pwd, pwd_buf, pwd_buf_len,
+ &pwd_result);
+ if (!pwd_result)
+ goto err;
+
+ ret = gf_strdup (pwd.pw_name);
+ if (ret)
+ return ret;
+ else
+ gf_log (this->name, GF_LOG_ERROR,
+ "gf_strdup failed, failing username "
+ "resolution.");
+err:
+ return ret;
+}
+
+char *
+_resolve_group_name (xlator_t *this, gid_t gid)
+{
+ struct group grp;
+ struct group *grp_result = NULL;
+ size_t grp_buf_len;
+ char *grp_buf = NULL;
+ char *ret = NULL;
+
+ /* Prepare our buffer for the gid->group name translation */
+#ifdef _SC_GETGR_R_SIZE_MAX
+ grp_buf_len = sysconf (_SC_GETGR_R_SIZE_MAX);
+#else
+ grp_buf_len = -1;
+#endif
+ if (grp_buf_len == -1) {
+ grp_buf_len = DEFAULT_GRP_BUF_SZ; /* per the man page */
+ }
+
+ grp_buf = alloca (grp_buf_len);
+ if (!grp_buf) {
+ goto err;
+ }
+
+ getgrgid_r (gid, &grp, grp_buf, grp_buf_len,
+ &grp_result);
+ if (!grp_result)
+ goto err;
+
+ ret = gf_strdup (grp.gr_name);
+ if (ret)
+ return ret;
+ else
+ gf_log (this->name, GF_LOG_ERROR,
+ "gf_strdup failed, failing username "
+ "resolution.");
+err:
+ return ret;
+}
+
+
+/*
+ * This function writes out a latency sample to a given file descriptor
+ * and beautifies the output in the process.
+ */
+void
+_io_stats_write_latency_sample (xlator_t *this, ios_sample_t *sample,
+ FILE *logfp)
+{
+ double epoch_time = 0.00;
+ char *xlator_name = NULL;
+ char *instance_name = NULL;
+ char *hostname = NULL;
+ char *identifier = NULL;
+ char *port = NULL;
+ char *port_pos = NULL;
+ char *group_name = NULL;
+ char *username = NULL;
+ struct ios_conf *conf = NULL;
+
+ conf = this->private;
+
+ epoch_time = (sample->timestamp).tv_sec +
+ ((sample->timestamp).tv_usec / 1000000.0);
+
+ if (strlen (sample->identifier) == 0) {
+ hostname = "Unknown";
+ port = "Unknown";
+ } else {
+ identifier = strdupa (sample->identifier);
+ port_pos = strrchr (identifier, ':');
+ if (!port_pos || strlen(port_pos) < 2)
+ goto err;
+ port = strdupa (port_pos + 1);
+ if (!port)
+ goto err;
+ *port_pos = '\0';
+ hostname = gf_rev_dns_lookup_cached (identifier,
+ conf->dnscache);
+ if (!hostname)
+ hostname = "Unknown";
+ }
+
+ xlator_name = this->name;
+ if (!xlator_name || strlen (xlator_name) == 0)
+ xlator_name = "Unknown";
+
+ instance_name = this->instance_name;
+ if (!instance_name || strlen (instance_name) == 0)
+ instance_name = "N/A";
+
+ /* Resolve the UID to a string username */
+ username = _resolve_username (this, sample->uid);
+ if (!username) {
+ username = GF_MALLOC (30, gf_common_mt_char);
+ sprintf (username, "%d", (int32_t)sample->uid);
+ }
+
+ /* Resolve the GID to a string group name */
+ group_name = _resolve_group_name (this, sample->gid);
+ if (!group_name) {
+ group_name = GF_MALLOC (30, gf_common_mt_char);
+ sprintf (group_name, "%d", (int32_t)sample->gid);
+ }
+
+ ios_log (this, logfp,
+ "%0.6lf,%s,%s,%0.4lf,%s,%s,%s,%s,%s,%s",
+ epoch_time, fop_enum_to_pri_string (sample->fop_type),
+ fop_enum_to_string (sample->fop_type),
+ sample->elapsed, xlator_name, instance_name, username,
+ group_name, hostname, port);
+ goto out;
+err:
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error parsing socket identifier");
+out:
+ GF_FREE (group_name);
+ GF_FREE (username);
+}
+
+/*
+ * Takes our current sample buffer in conf->io_sample_buf, and saves
+ * a reference to this, init's a new buffer, and then dumps out the
+ * contents of the saved reference.
+ */
+int
+io_stats_dump_latency_samples_logfp (xlator_t *this, FILE *logfp)
+{
+ uint64_t i = 0;
+ struct ios_conf *conf = NULL;
+ ios_sample_buf_t *sample_buf = NULL;
+ int ret = 1; /* Default to error */
+
+ conf = this->private;
+
+ /* Save pointer to old buffer; the CS equivalent of
+ * Indiana Jones: https://www.youtube.com/watch?v=Pr-8AP0To4k,
+ * though ours will end better I hope!
+ */
+ sample_buf = conf->ios_sample_buf;
+ if (!sample_buf) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Sampling buffer is null, bailing!");
+ goto out;
+ }
+
+ /* Empty case, nothing to do, exit. */
+ if (sample_buf->collected == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "No samples, dump not required.");
+ ret = 0;
+ goto out;
+ }
+
+ /* Init a new buffer, so we are free to work on the one we saved a
+ * reference to above.
+ */
+ if (ios_init_sample_buf (conf) != 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Failed to init new sampling buffer, out of memory?");
+ goto out;
+ }
+
+ /* Wrap-around case, dump from pos to sample_buf->size -1
+ * and then from 0 to sample_buf->pos (covered off by
+ * "simple case")
+ */
+ if (sample_buf->collected > sample_buf->pos + 1) {
+ for (i = sample_buf->pos; i < sample_buf->size; i++) {
+ _io_stats_write_latency_sample (this,
+ &(sample_buf->ios_samples[i]), logfp);
+ }
+ }
+
+ /* Simple case: Dump from 0 to sample_buf->pos */
+ for (i = 0; i < sample_buf->pos; i++) {
+ _io_stats_write_latency_sample (this,
+ &(sample_buf->ios_samples[i]), logfp);
+ }
+ ios_destroy_sample_buf (sample_buf);
+
+out:
+ return ret;
+}
+
+int
+io_stats_dump_global_to_logfp (xlator_t *this, struct ios_global_stats *stats,
+ struct timeval *now, int interval, FILE *logfp)
+{
+ int i = 0;
+ int per_line = 0;
+ int index = 0;
+ struct ios_stat_head *list_head = NULL;
+ struct ios_conf *conf = NULL;
+ char timestr[256] = {0, };
+ char str_header[128] = {0};
+ char str_read[128] = {0};
+ char str_write[128] = {0};
+
+ conf = this->private;
if (interval == -1)
- ios_log (this, logfp, "=== Cumulative stats ===");
+ ios_log (this, logfp, "\n=== Cumulative stats ===");
else
- ios_log (this, logfp, "=== Interval %d stats ===",
+ ios_log (this, logfp, "\n=== Interval %d stats ===",
interval);
- ios_log (this, logfp, " Duration : %"PRId64"secs",
+ ios_log (this, logfp, " Duration : %"PRId64" secs",
(uint64_t) (now->tv_sec - stats->started_at.tv_sec));
ios_log (this, logfp, " BytesRead : %"PRId64,
stats->data_read);
- ios_log (this, logfp, " BytesWritten : %"PRId64,
+ ios_log (this, logfp, " BytesWritten : %"PRId64"\n",
stats->data_written);
+ snprintf (str_header, sizeof (str_header), "%-12s %c", "Block Size", ':');
+ snprintf (str_read, sizeof (str_read), "%-12s %c", "Read Count", ':');
+ snprintf (str_write, sizeof (str_write), "%-12s %c", "Write Count", ':');
+ index = 14;
for (i = 0; i < 32; i++) {
+ if ((stats->block_count_read[i] == 0) &&
+ (stats->block_count_write[i] == 0))
+ continue;
+ per_line++;
+
+ snprintf (str_header+index, sizeof (str_header)-index,
+ "%16dB+", (1<<i));
if (stats->block_count_read[i])
- ios_log (this, logfp, " Read %06db+ : %"PRId64,
- (1 << i), stats->block_count_read[i]);
+ snprintf (str_read+index, sizeof (str_read)-index,
+ "%18"PRId64, stats->block_count_read[i]);
+ else snprintf (str_read+index, sizeof (str_read)-index,
+ "%18s", "0");
+ if (stats->block_count_write[i])
+ snprintf (str_write+index, sizeof (str_write)-index,
+ "%18"PRId64, stats->block_count_write[i]);
+ else snprintf (str_write+index, sizeof (str_write)-index,
+ "%18s", "0");
+
+ index += 18;
+ if (per_line == 3) {
+ ios_log (this, logfp, "%s", str_header);
+ ios_log (this, logfp, "%s", str_read);
+ ios_log (this, logfp, "%s\n", str_write);
+
+ memset (str_header, 0, sizeof (str_header));
+ memset (str_read, 0, sizeof (str_read));
+ memset (str_write, 0, sizeof (str_write));
+
+ snprintf (str_header, sizeof (str_header), "%-12s %c",
+ "Block Size", ':');
+ snprintf (str_read, sizeof (str_read), "%-12s %c",
+ "Read Count", ':');
+ snprintf (str_write, sizeof (str_write), "%-12s %c",
+ "Write Count", ':');
+
+ index = 14;
+ per_line = 0;
+ }
+ }
+
+ if (per_line != 0) {
+ ios_log (this, logfp, "%s", str_header);
+ ios_log (this, logfp, "%s", str_read);
+ ios_log (this, logfp, "%s\n", str_write);
+ }
+
+ ios_log (this, logfp, "%-13s %10s %14s %14s %14s", "Fop",
+ "Call Count", "Avg-Latency", "Min-Latency",
+ "Max-Latency");
+ ios_log (this, logfp, "%-13s %10s %14s %14s %14s", "---", "----------",
+ "-----------", "-----------", "-----------");
+
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ if (stats->fop_hits[i] && !stats->latency[i].avg)
+ ios_log (this, logfp, "%-13s %10"PRId64" %11s "
+ "us %11s us %11s us", gf_fop_list[i],
+ stats->fop_hits[i], "0", "0", "0");
+ else if (stats->fop_hits[i] && stats->latency[i].avg)
+ ios_log (this, logfp, "%-13s %10"PRId64" %11.2lf us "
+ "%11.2lf us %11.2lf us", gf_fop_list[i],
+ stats->fop_hits[i], stats->latency[i].avg,
+ stats->latency[i].min, stats->latency[i].max);
+ }
+ ios_log (this, logfp, "------ ----- ----- ----- ----- ----- ----- ----- "
+ " ----- ----- ----- -----\n");
+
+ if (interval == -1) {
+ LOCK (&conf->lock);
+ {
+ gf_time_fmt (timestr, sizeof timestr,
+ conf->cumulative.max_openfd_time.tv_sec,
+ gf_timefmt_FT);
+ snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS,
+ conf->cumulative.max_openfd_time.tv_usec);
+ ios_log (this, logfp, "Current open fd's: %"PRId64
+ " Max open fd's: %"PRId64" time %s",
+ conf->cumulative.nr_opens,
+ conf->cumulative.max_nr_opens, timestr);
+ }
+ UNLOCK (&conf->lock);
+ ios_log (this, logfp, "\n==========Open File Stats========");
+ ios_log (this, logfp, "\nCOUNT: \t FILE NAME");
+ list_head = &conf->list[IOS_STATS_TYPE_OPEN];
+ ios_dump_file_stats (list_head, this, logfp);
+
+
+ ios_log (this, logfp, "\n==========Read File Stats========");
+ ios_log (this, logfp, "\nCOUNT: \t FILE NAME");
+ list_head = &conf->list[IOS_STATS_TYPE_READ];
+ ios_dump_file_stats (list_head, this, logfp);
+
+ ios_log (this, logfp, "\n==========Write File Stats========");
+ ios_log (this, logfp, "\nCOUNT: \t FILE NAME");
+ list_head = &conf->list[IOS_STATS_TYPE_WRITE];
+ ios_dump_file_stats (list_head, this, logfp);
+
+ ios_log (this, logfp, "\n==========Directory open stats========");
+ ios_log (this, logfp, "\nCOUNT: \t DIRECTORY NAME");
+ list_head = &conf->list[IOS_STATS_TYPE_OPENDIR];
+ ios_dump_file_stats (list_head, this, logfp);
+
+ ios_log (this, logfp, "\n========Directory readdirp Stats=======");
+ ios_log (this, logfp, "\nCOUNT: \t DIRECTORY NAME");
+ list_head = &conf->list[IOS_STATS_TYPE_READDIRP];
+ ios_dump_file_stats (list_head, this, logfp);
+
+ ios_log (this, logfp, "\n========Read Throughput File Stats=====");
+ ios_log (this, logfp, "\nTIMESTAMP \t\t\t THROUGHPUT(KBPS)"
+ "\tFILE NAME");
+ list_head = &conf->thru_list[IOS_STATS_THRU_READ];
+ ios_dump_throughput_stats(list_head, this, logfp,
+ IOS_STATS_THRU_READ);
+
+ ios_log (this, logfp, "\n======Write Throughput File Stats======");
+ ios_log (this, logfp, "\nTIMESTAMP \t\t\t THROUGHPUT(KBPS)"
+ "\tFILE NAME");
+ list_head = &conf->thru_list[IOS_STATS_THRU_WRITE];
+ ios_dump_throughput_stats (list_head, this, logfp,
+ IOS_STATS_THRU_WRITE);
+ }
+ return 0;
+}
+
+int
+io_stats_dump_global_to_dict (xlator_t *this, struct ios_global_stats *stats,
+ struct timeval *now, int interval, dict_t *dict)
+{
+ int ret = 0;
+ char key[256] = {0};
+ uint64_t sec = 0;
+ int i = 0;
+ uint64_t count = 0;
+
+ GF_ASSERT (stats);
+ GF_ASSERT (now);
+ GF_ASSERT (dict);
+ GF_ASSERT (this);
+
+ if (interval == -1)
+ snprintf (key, sizeof (key), "cumulative");
+ else
+ snprintf (key, sizeof (key), "interval");
+ ret = dict_set_int32 (dict, key, interval);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "failed to set "
+ "interval %d", interval);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-duration", interval);
+ sec = (uint64_t) (now->tv_sec - stats->started_at.tv_sec);
+ ret = dict_set_uint64 (dict, key, sec);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to set "
+ "duration(%d) - %"PRId64, interval, sec);
+ goto out;
}
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-total-read", interval);
+ ret = dict_set_uint64 (dict, key, stats->data_read);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to set total "
+ "read(%d) - %"PRId64, interval, stats->data_read);
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-total-write", interval);
+ ret = dict_set_uint64 (dict, key, stats->data_written);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to set total "
+ "write(%d) - %"PRId64, interval, stats->data_written);
+ goto out;
+ }
for (i = 0; i < 32; i++) {
- if (stats->block_count_write[i])
- ios_log (this, logfp, "Write %06db+ : %"PRId64,
- (1 << i), stats->block_count_write[i]);
+ if (stats->block_count_read[i]) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-read-%d", interval,
+ (1 << i));
+ count = stats->block_count_read[i];
+ ret = dict_set_uint64 (dict, key, count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "set read-%db+, with: %"PRId64,
+ (1<<i), count);
+ goto out;
+ }
+ }
}
- for (i = 0; i < GF_FOP_MAXVALUE; i++)
- if (stats->fop_hits[i])
- ios_log (this, logfp, "%14s : %"PRId64,
- gf_fop_list[i], stats->fop_hits[i]);
+ for (i = 0; i < 32; i++) {
+ if (stats->block_count_write[i]) {
+ snprintf (key, sizeof (key), "%d-write-%d", interval,
+ (1<<i));
+ count = stats->block_count_write[i];
+ ret = dict_set_uint64 (dict, key, count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "set write-%db+, with: %"PRId64,
+ (1<<i), count);
+ goto out;
+ }
+ }
+ }
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ if (stats->fop_hits[i] == 0)
+ continue;
+ snprintf (key, sizeof (key), "%d-%d-hits", interval, i);
+ ret = dict_set_uint64 (dict, key, stats->fop_hits[i]);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "set %s-fop-hits: %"PRIu64, gf_fop_list[i],
+ stats->fop_hits[i]);
+ goto out;
+ }
- return 0;
+ if (stats->latency[i].avg == 0)
+ continue;
+ snprintf (key, sizeof (key), "%d-%d-avglatency", interval, i);
+ ret = dict_set_double (dict, key, stats->latency[i].avg);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to set %s "
+ "avglatency(%d) with %f", gf_fop_list[i],
+ interval, stats->latency[i].avg);
+ goto out;
+ }
+ snprintf (key, sizeof (key), "%d-%d-minlatency", interval, i);
+ ret = dict_set_double (dict, key, stats->latency[i].min);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to set %s "
+ "minlatency(%d) with %f", gf_fop_list[i],
+ interval, stats->latency[i].min);
+ goto out;
+ }
+ snprintf (key, sizeof (key), "%d-%d-maxlatency", interval, i);
+ ret = dict_set_double (dict, key, stats->latency[i].max);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to set %s "
+ "maxlatency(%d) with %f", gf_fop_list[i],
+ interval, stats->latency[i].max);
+ goto out;
+ }
+ }
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "returning %d", ret);
+ return ret;
}
+int
+io_stats_dump_global (xlator_t *this, struct ios_global_stats *stats,
+ struct timeval *now, int interval,
+ struct ios_dump_args *args)
+{
+ int ret = -1;
+
+ GF_ASSERT (args);
+ GF_ASSERT (now);
+ GF_ASSERT (stats);
+ GF_ASSERT (this);
+
+
+
+ switch (args->type) {
+ case IOS_DUMP_TYPE_JSON_FILE:
+ ret = io_stats_dump_global_to_json_logfp (
+ this, stats, now, interval, args->u.logfp);
+ break;
+ case IOS_DUMP_TYPE_FILE:
+ ret = io_stats_dump_global_to_logfp (this, stats, now,
+ interval, args->u.logfp);
+ break;
+ case IOS_DUMP_TYPE_DICT:
+ ret = io_stats_dump_global_to_dict (this, stats, now,
+ interval, args->u.dict);
+ break;
+ default:
+ GF_ASSERT (0);
+ ret = -1;
+ break;
+ }
+ return ret;
+}
int
-io_stats_dump (xlator_t *this, char *filename, inode_t *inode,
- const char *path)
+ios_dump_args_init (struct ios_dump_args *args, ios_dump_type_t type,
+ void *output)
+{
+ int ret = 0;
+
+ GF_ASSERT (args);
+ GF_ASSERT (type > IOS_DUMP_TYPE_NONE && type < IOS_DUMP_TYPE_MAX);
+ GF_ASSERT (output);
+
+ args->type = type;
+ switch (args->type) {
+ case IOS_DUMP_TYPE_JSON_FILE:
+ case IOS_DUMP_TYPE_FILE:
+ args->u.logfp = output;
+ break;
+ case IOS_DUMP_TYPE_DICT:
+ args->u.dict = output;
+ break;
+ default:
+ GF_ASSERT (0);
+ ret = -1;
+ }
+
+ return ret;
+}
+
+static void
+ios_global_stats_clear (struct ios_global_stats *stats, struct timeval *now)
+{
+ GF_ASSERT (stats);
+ GF_ASSERT (now);
+
+ memset (stats, 0, sizeof (*stats));
+ stats->started_at = *now;
+}
+
+int
+io_stats_dump (xlator_t *this, struct ios_dump_args *args,
+ gf1_cli_info_op op, gf_boolean_t is_peek)
{
struct ios_conf *conf = NULL;
struct ios_global_stats cumulative = {0, };
struct ios_global_stats incremental = {0, };
int increment = 0;
struct timeval now;
- FILE *logfp = NULL;
+
+ GF_ASSERT (this);
+ GF_ASSERT (args);
+ GF_ASSERT (args->type > IOS_DUMP_TYPE_NONE);
+ GF_ASSERT (args->type < IOS_DUMP_TYPE_MAX);
conf = this->private;
gettimeofday (&now, NULL);
LOCK (&conf->lock);
{
- cumulative = conf->cumulative;
- incremental = conf->incremental;
+ if (op == GF_CLI_INFO_ALL ||
+ op == GF_CLI_INFO_CUMULATIVE)
+ cumulative = conf->cumulative;
- increment = conf->increment++;
+ if (op == GF_CLI_INFO_ALL ||
+ op == GF_CLI_INFO_INCREMENTAL) {
+ incremental = conf->incremental;
+ increment = conf->increment;
- memset (&conf->incremental, 0, sizeof (conf->incremental));
- conf->incremental.started_at = now;
+ if (!is_peek) {
+ increment = conf->increment++;
+
+ ios_global_stats_clear (&conf->incremental,
+ &now);
+ }
+ }
}
UNLOCK (&conf->lock);
- logfp = fopen (filename, "w+");
- io_stats_dump_global (this, &cumulative, &now, -1, logfp);
- io_stats_dump_global (this, &incremental, &now, increment, logfp);
+ if (op == GF_CLI_INFO_ALL ||
+ op == GF_CLI_INFO_CUMULATIVE)
+ io_stats_dump_global (this, &cumulative, &now, -1, args);
+
+ if (op == GF_CLI_INFO_ALL ||
+ op == GF_CLI_INFO_INCREMENTAL)
+ io_stats_dump_global (this, &incremental, &now, increment, args);
- if (logfp)
- fclose (logfp);
return 0;
}
@@ -288,53 +1571,259 @@ io_stats_dump_fd (xlator_t *this, struct ios_fd *iosfd)
sec = now.tv_sec - iosfd->opened_at.tv_sec;
usec = now.tv_usec - iosfd->opened_at.tv_usec;
- gf_log (this->name, GF_LOG_NORMAL,
+ gf_log (this->name, GF_LOG_INFO,
"--- fd stats ---");
if (iosfd->filename)
- gf_log (this->name, GF_LOG_NORMAL,
+ gf_log (this->name, GF_LOG_INFO,
" Filename : %s",
iosfd->filename);
if (sec)
- gf_log (this->name, GF_LOG_NORMAL,
+ gf_log (this->name, GF_LOG_INFO,
" Lifetime : %"PRId64"secs, %"PRId64"usecs",
sec, usec);
if (iosfd->data_read)
- gf_log (this->name, GF_LOG_NORMAL,
+ gf_log (this->name, GF_LOG_INFO,
" BytesRead : %"PRId64" bytes",
iosfd->data_read);
if (iosfd->data_written)
- gf_log (this->name, GF_LOG_NORMAL,
+ gf_log (this->name, GF_LOG_INFO,
" BytesWritten : %"PRId64" bytes",
iosfd->data_written);
for (i = 0; i < 32; i++) {
if (iosfd->block_count_read[i])
- gf_log (this->name, GF_LOG_NORMAL,
+ gf_log (this->name, GF_LOG_INFO,
" Read %06db+ : %"PRId64,
(1 << i), iosfd->block_count_read[i]);
}
for (i = 0; i < 32; i++) {
if (iosfd->block_count_write[i])
- gf_log (this->name, GF_LOG_NORMAL,
+ gf_log (this->name, GF_LOG_INFO,
"Write %06db+ : %"PRId64,
(1 << i), iosfd->block_count_write[i]);
}
return 0;
}
+void collect_ios_latency_sample (struct ios_conf *conf,
+ glusterfs_fop_t fop_type, double elapsed,
+ call_frame_t *frame)
+{
+ ios_sample_buf_t *ios_sample_buf = NULL;
+ ios_sample_t *ios_sample = NULL;
+ struct timeval *timestamp = NULL;
+ call_stack_t *root = NULL;
+
+
+ ios_sample_buf = conf->ios_sample_buf;
+ LOCK (&conf->ios_sampling_lock);
+ if (conf->ios_sample_interval == 0 ||
+ ios_sample_buf->observed % conf->ios_sample_interval != 0)
+ goto out;
+
+ timestamp = &frame->begin;
+ root = frame->root;
+
+ ios_sample = &(ios_sample_buf->ios_samples[ios_sample_buf->pos]);
+ ios_sample->elapsed = elapsed;
+ ios_sample->fop_type = fop_type;
+ ios_sample->uid = root->uid;
+ ios_sample->gid = root->gid;
+ (ios_sample->timestamp).tv_sec = timestamp->tv_sec;
+ (ios_sample->timestamp).tv_usec = timestamp->tv_usec;
+ memcpy (&ios_sample->identifier, &root->identifier,
+ sizeof (root->identifier));
+
+ /* We've reached the end of the circular buffer, start from the
+ * beginning. */
+ if (ios_sample_buf->pos == (ios_sample_buf->size - 1))
+ ios_sample_buf->pos = 0;
+ else
+ ios_sample_buf->pos++;
+ ios_sample_buf->collected++;
+out:
+ ios_sample_buf->observed++;
+ UNLOCK (&conf->ios_sampling_lock);
+ return;
+}
+
+static void
+update_ios_latency_stats (struct ios_global_stats *stats, double elapsed,
+ glusterfs_fop_t op)
+{
+ double avg;
+
+ GF_ASSERT (stats);
+
+ stats->latency[op].total += elapsed;
+
+ if (!stats->latency[op].min)
+ stats->latency[op].min = elapsed;
+ if (stats->latency[op].min > elapsed)
+ stats->latency[op].min = elapsed;
+ if (stats->latency[op].max < elapsed)
+ stats->latency[op].max = elapsed;
+
+ avg = stats->latency[op].avg;
+
+ stats->latency[op].avg = avg + (elapsed - avg) / stats->fop_hits[op];
+}
+
+int
+update_ios_latency (struct ios_conf *conf, call_frame_t *frame,
+ glusterfs_fop_t op)
+{
+ double elapsed;
+ struct timeval *begin, *end;
+
+ begin = &frame->begin;
+ end = &frame->end;
+
+ elapsed = (end->tv_sec - begin->tv_sec) * 1e6
+ + (end->tv_usec - begin->tv_usec);
+
+ update_ios_latency_stats (&conf->cumulative, elapsed, op);
+ update_ios_latency_stats (&conf->incremental, elapsed, op);
+ collect_ios_latency_sample (conf, op, elapsed, frame);
+
+ return 0;
+}
+
+int32_t
+io_stats_dump_stats_to_dict (xlator_t *this, dict_t *resp,
+ ios_stats_type_t flags, int32_t list_cnt)
+{
+ struct ios_conf *conf = NULL;
+ int cnt = 0;
+ char key[256];
+ struct ios_stat_head *list_head = NULL;
+ struct ios_stat_list *entry = NULL;
+ int ret = -1;
+ ios_stats_thru_t index = IOS_STATS_THRU_MAX;
+ char timestr[256] = {0, };
+ char *dict_timestr = NULL;
+
+ conf = this->private;
+
+ switch (flags) {
+ case IOS_STATS_TYPE_OPEN:
+ list_head = &conf->list[IOS_STATS_TYPE_OPEN];
+ LOCK (&conf->lock);
+ {
+ ret = dict_set_uint64 (resp, "current-open",
+ conf->cumulative.nr_opens);
+ if (ret)
+ goto unlock;
+ ret = dict_set_uint64 (resp, "max-open",
+ conf->cumulative.max_nr_opens);
+
+ gf_time_fmt (timestr, sizeof timestr,
+ conf->cumulative.max_openfd_time.tv_sec,
+ gf_timefmt_FT);
+ if (conf->cumulative.max_openfd_time.tv_sec)
+ snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS,
+ conf->cumulative.max_openfd_time.tv_usec);
+
+ dict_timestr = gf_strdup (timestr);
+ if (!dict_timestr)
+ goto unlock;
+ ret = dict_set_dynstr (resp, "max-openfd-time",
+ dict_timestr);
+ if (ret)
+ goto unlock;
+ }
+ unlock:
+ UNLOCK (&conf->lock);
+ /* Do not proceed if we came here because of some error
+ * during the dict operation */
+ if (ret)
+ goto out;
+ break;
+ case IOS_STATS_TYPE_READ:
+ list_head = &conf->list[IOS_STATS_TYPE_READ];
+ break;
+ case IOS_STATS_TYPE_WRITE:
+ list_head = &conf->list[IOS_STATS_TYPE_WRITE];
+ break;
+ case IOS_STATS_TYPE_OPENDIR:
+ list_head = &conf->list[IOS_STATS_TYPE_OPENDIR];
+ break;
+ case IOS_STATS_TYPE_READDIRP:
+ list_head = &conf->list[IOS_STATS_TYPE_READDIRP];
+ break;
+ case IOS_STATS_TYPE_READ_THROUGHPUT:
+ list_head = &conf->thru_list[IOS_STATS_THRU_READ];
+ index = IOS_STATS_THRU_READ;
+ break;
+ case IOS_STATS_TYPE_WRITE_THROUGHPUT:
+ list_head = &conf->thru_list[IOS_STATS_THRU_WRITE];
+ index = IOS_STATS_THRU_WRITE;
+ break;
+
+ default:
+ goto out;
+ }
+ ret = dict_set_int32 (resp, "top-op", flags);
+ if (!list_cnt)
+ goto out;
+ LOCK (&list_head->lock);
+ {
+ list_for_each_entry (entry, &list_head->iosstats->list, list) {
+
+ cnt++;
+ snprintf (key, 256, "%s-%d", "filename", cnt);
+ ret = dict_set_str (resp, key, entry->iosstat->filename);
+ if (ret)
+ goto unlock_list_head;
+ snprintf (key, 256, "%s-%d", "value",cnt);
+ ret = dict_set_uint64 (resp, key, entry->value);
+ if (ret)
+ goto unlock_list_head;
+ if (index != IOS_STATS_THRU_MAX) {
+ snprintf (key, 256, "%s-%d", "time-sec", cnt);
+ ret = dict_set_int32 (resp, key,
+ entry->iosstat->thru_counters[index].time.tv_sec);
+ if (ret)
+ goto unlock_list_head;
+ snprintf (key, 256, "%s-%d", "time-usec", cnt);
+ ret = dict_set_int32 (resp, key,
+ entry->iosstat->thru_counters[index].time.tv_usec);
+ if (ret)
+ goto unlock_list_head;
+ }
+ if (cnt == list_cnt)
+ break;
+
+ }
+ }
+unlock_list_head:
+ UNLOCK (&list_head->lock);
+ /* ret is !=0 if some dict operation in the above critical region
+ * failed. */
+ if (ret)
+ goto out;
+ ret = dict_set_int32 (resp, "members", cnt);
+ out:
+ return ret;
+}
int
io_stats_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, fd_t *fd,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
struct ios_fd *iosfd = NULL;
char *path = NULL;
+ struct ios_stat *iosstat = NULL;
+ struct ios_conf *conf = NULL;
+
+ conf = this->private;
path = frame->local;
frame->local = NULL;
@@ -357,21 +1846,44 @@ io_stats_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
gettimeofday (&iosfd->opened_at, NULL);
ios_fd_ctx_set (fd, this, iosfd);
+ LOCK (&conf->lock);
+ {
+ conf->cumulative.nr_opens++;
+ if (conf->cumulative.nr_opens > conf->cumulative.max_nr_opens) {
+ conf->cumulative.max_nr_opens = conf->cumulative.nr_opens;
+ conf->cumulative.max_openfd_time = iosfd->opened_at;
+ }
+ }
+ UNLOCK (&conf->lock);
+
+ iosstat = GF_CALLOC (1, sizeof (*iosstat), gf_io_stats_mt_ios_stat);
+ if (!iosstat) {
+ GF_FREE (path);
+ goto unwind;
+ }
+ iosstat->filename = gf_strdup (path);
+ gf_uuid_copy (iosstat->gfid, buf->ia_gfid);
+ LOCK_INIT (&iosstat->lock);
+ ios_inode_ctx_set (fd->inode, this, iosstat);
unwind:
+ UPDATE_PROFILE_STATS (frame, CREATE);
STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
return 0;
}
int
io_stats_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
struct ios_fd *iosfd = NULL;
char *path = NULL;
+ struct ios_stat *iosstat = NULL;
+ struct ios_conf *conf = NULL;
+ conf = this->private;
path = frame->local;
frame->local = NULL;
@@ -394,17 +1906,46 @@ io_stats_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
ios_fd_ctx_set (fd, this, iosfd);
+ ios_inode_ctx_get (fd->inode, this, &iosstat);
+ if (!iosstat) {
+ iosstat = GF_CALLOC (1, sizeof (*iosstat),
+ gf_io_stats_mt_ios_stat);
+ if (iosstat) {
+ iosstat->filename = gf_strdup (path);
+ gf_uuid_copy (iosstat->gfid, fd->inode->gfid);
+ LOCK_INIT (&iosstat->lock);
+ ios_inode_ctx_set (fd->inode, this, iosstat);
+ }
+ }
+
+ LOCK (&conf->lock);
+ {
+ conf->cumulative.nr_opens++;
+ if (conf->cumulative.nr_opens > conf->cumulative.max_nr_opens) {
+ conf->cumulative.max_nr_opens = conf->cumulative.nr_opens;
+ conf->cumulative.max_openfd_time = iosfd->opened_at;
+ }
+ }
+ UNLOCK (&conf->lock);
+ if (iosstat) {
+ BUMP_STATS (iosstat, IOS_STATS_TYPE_OPEN);
+ iosstat = NULL;
+ }
unwind:
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd);
+ UPDATE_PROFILE_STATS (frame, OPEN);
+
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
return 0;
+
}
int
io_stats_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf);
+ UPDATE_PROFILE_STATS (frame, STAT);
+ STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata);
return 0;
}
@@ -413,13 +1954,11 @@ int
io_stats_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
struct iovec *vector, int32_t count,
- struct iatt *buf, struct iobref *iobref)
+ struct iatt *buf, struct iobref *iobref, dict_t *xdata)
{
- struct ios_conf *conf = NULL;
int len = 0;
fd_t *fd = NULL;
-
- conf = this->private;
+ struct ios_stat *iosstat = NULL;
fd = frame->local;
frame->local = NULL;
@@ -429,19 +1968,46 @@ io_stats_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
BUMP_READ (fd, len);
}
+ UPDATE_PROFILE_STATS (frame, READ);
+ ios_inode_ctx_get (fd->inode, this, &iosstat);
+
+ if (iosstat) {
+ BUMP_STATS (iosstat, IOS_STATS_TYPE_READ);
+ BUMP_THROUGHPUT (iosstat, IOS_STATS_THRU_READ);
+ iosstat = NULL;
+ }
+
STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno,
- vector, count, buf, iobref);
+ vector, count, buf, iobref, xdata);
return 0;
+
}
int
io_stats_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
-{
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf);
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ struct ios_stat *iosstat = NULL;
+ inode_t *inode = NULL;
+
+ UPDATE_PROFILE_STATS (frame, WRITE);
+ if (frame->local){
+ inode = frame->local;
+ frame->local = NULL;
+ ios_inode_ctx_get (inode, this, &iosstat);
+ if (iosstat) {
+ BUMP_STATS (iosstat, IOS_STATS_TYPE_WRITE);
+ BUMP_THROUGHPUT (iosstat, IOS_STATS_THRU_WRITE);
+ inode = NULL;
+ iosstat = NULL;
+ }
+ }
+
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, xdata);
return 0;
+
}
@@ -449,18 +2015,33 @@ io_stats_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
io_stats_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *buf)
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *buf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, buf);
+ struct ios_stat *iosstat = NULL;
+ inode_t *inode = frame->local;
+
+ frame->local = NULL;
+
+ UPDATE_PROFILE_STATS (frame, READDIRP);
+
+ ios_inode_ctx_get (inode, this, &iosstat);
+
+ if (iosstat) {
+ BUMP_STATS (iosstat, IOS_STATS_TYPE_READDIRP);
+ iosstat = NULL;
+ }
+
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, buf, xdata);
return 0;
}
int
io_stats_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *buf)
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *buf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, buf);
+ UPDATE_PROFILE_STATS (frame, READDIR);
+ STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, buf, xdata);
return 0;
}
@@ -468,9 +2049,10 @@ io_stats_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
io_stats_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf);
+ UPDATE_PROFILE_STATS (frame, FSYNC);
+ STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
return 0;
}
@@ -478,9 +2060,10 @@ io_stats_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
io_stats_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
- STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop, postop);
+ UPDATE_PROFILE_STATS (frame, SETATTR);
+ STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop, postop, xdata);
return 0;
}
@@ -488,11 +2071,13 @@ io_stats_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
io_stats_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
+ UPDATE_PROFILE_STATS (frame, UNLINK);
STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno,
- preparent, postparent);
+ preparent, postparent, xdata);
return 0;
+
}
@@ -500,11 +2085,12 @@ int
io_stats_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent, dict_t *xdata)
{
+ UPDATE_PROFILE_STATS (frame, RENAME);
STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf,
preoldparent, postoldparent,
- prenewparent, postnewparent);
+ prenewparent, postnewparent, xdata);
return 0;
}
@@ -512,9 +2098,10 @@ io_stats_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
io_stats_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, const char *buf,
- struct iatt *sbuf)
+ struct iatt *sbuf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, buf, sbuf);
+ UPDATE_PROFILE_STATS (frame, READLINK);
+ STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, buf, sbuf, xdata);
return 0;
}
@@ -523,9 +2110,10 @@ int
io_stats_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- dict_t *xattr, struct iatt *postparent)
+ dict_t *xdata, struct iatt *postparent)
{
- STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf, xattr,
+ UPDATE_PROFILE_STATS (frame, LOOKUP);
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf, xdata,
postparent);
return 0;
}
@@ -535,10 +2123,11 @@ int
io_stats_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
+ UPDATE_PROFILE_STATS (frame, SYMLINK);
STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
return 0;
}
@@ -547,10 +2136,11 @@ int
io_stats_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
+ UPDATE_PROFILE_STATS (frame, MKNOD);
STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
return 0;
}
@@ -559,10 +2149,33 @@ int
io_stats_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
+ struct ios_stat *iosstat = NULL;
+ char *path = frame->local;
+
+ if (!path)
+ goto unwind;
+
+ UPDATE_PROFILE_STATS (frame, MKDIR);
+ if (op_ret < 0)
+ goto unwind;
+
+ iosstat = GF_CALLOC (1, sizeof (*iosstat), gf_io_stats_mt_ios_stat);
+ if (iosstat) {
+ LOCK_INIT (&iosstat->lock);
+ iosstat->filename = gf_strdup(path);
+ gf_uuid_copy (iosstat->gfid, buf->ia_gfid);
+ ios_inode_ctx_set (inode, this, iosstat);
+ }
+
+unwind:
+ /* local is assigned with path */
+ GF_FREE (frame->local);
+ frame->local = NULL;
STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
return 0;
}
@@ -571,31 +2184,44 @@ int
io_stats_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
+ UPDATE_PROFILE_STATS (frame, LINK);
STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
return 0;
}
int
io_stats_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno);
+ UPDATE_PROFILE_STATS (frame, FLUSH);
+ STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata);
return 0;
}
int
io_stats_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- if (op_ret >= 0)
- ios_fd_ctx_set (fd, this, 0);
+ struct ios_stat *iosstat = NULL;
+ int ret = -1;
+
+ UPDATE_PROFILE_STATS (frame, OPENDIR);
+ if (op_ret < 0)
+ goto unwind;
- STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd);
+ ios_fd_ctx_set (fd, this, 0);
+
+ ret = ios_inode_ctx_get (fd->inode, this, &iosstat);
+ if (!ret)
+ BUMP_STATS (iosstat, IOS_STATS_TYPE_OPENDIR);
+
+unwind:
+ STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, xdata);
return 0;
}
@@ -603,10 +2229,13 @@ io_stats_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
io_stats_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
+
+ UPDATE_PROFILE_STATS (frame, RMDIR);
+
STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno,
- preparent, postparent);
+ preparent, postparent, xdata);
return 0;
}
@@ -614,64 +2243,100 @@ io_stats_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
io_stats_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
+ UPDATE_PROFILE_STATS (frame, TRUNCATE);
STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno,
- prebuf, postbuf);
+ prebuf, postbuf, xdata);
return 0;
}
int
io_stats_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf)
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf);
+ UPDATE_PROFILE_STATS (frame, STATFS);
+ STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf, xdata);
return 0;
}
int
io_stats_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno);
+ UPDATE_PROFILE_STATS (frame, SETXATTR);
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
return 0;
}
int
io_stats_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, dict);
+ UPDATE_PROFILE_STATS (frame, GETXATTR);
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
return 0;
}
int
io_stats_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno);
+ UPDATE_PROFILE_STATS (frame, REMOVEXATTR);
+ STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+io_stats_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS (frame, FSETXATTR);
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+
+int
+io_stats_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS (frame, FGETXATTR);
+ STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+
+int
+io_stats_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS (frame, FREMOVEXATTR);
+ STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, xdata);
return 0;
}
int
io_stats_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno);
+ UPDATE_PROFILE_STATS (frame, FSYNCDIR);
+ STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno, xdata);
return 0;
}
int
io_stats_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (access, frame, op_ret, op_errno);
+ UPDATE_PROFILE_STATS (frame, ACCESS);
+ STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, xdata);
return 0;
}
@@ -679,372 +2344,408 @@ io_stats_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
io_stats_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
+ UPDATE_PROFILE_STATS (frame, FTRUNCATE);
STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno,
- prebuf, postbuf);
+ prebuf, postbuf, xdata);
return 0;
}
int
io_stats_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf);
+ UPDATE_PROFILE_STATS (frame, FSTAT);
+ STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf, xdata);
return 0;
}
int
+io_stats_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS(frame, FALLOCATE);
+ STACK_UNWIND_STRICT(fallocate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+
+int
+io_stats_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS(frame, DISCARD);
+ STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+int
+io_stats_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS(frame, ZEROFILL);
+ STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+int
io_stats_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct flock *lock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
{
- STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock);
+ UPDATE_PROFILE_STATS (frame, LK);
+ STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock, xdata);
return 0;
}
int
io_stats_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno);
+ UPDATE_PROFILE_STATS (frame, ENTRYLK);
+ STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno, xdata);
return 0;
}
int
io_stats_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
- STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, dict);
+ UPDATE_PROFILE_STATS (frame, XATTROP);
+ STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, dict, xdata);
return 0;
}
int
io_stats_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, dict);
+ UPDATE_PROFILE_STATS (frame, FXATTROP);
+ STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, dict, xdata);
return 0;
}
int
io_stats_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno);
+ UPDATE_PROFILE_STATS (frame, INODELK);
+ STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno, xdata);
return 0;
}
-
int
io_stats_entrylk (call_frame_t *frame, xlator_t *this,
const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
- BUMP_FOP (ENTRYLK);
+ START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_entrylk_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->entrylk,
- volume, loc, basename, cmd, type);
+ volume, loc, basename, cmd, type, xdata);
return 0;
}
int
io_stats_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct flock *flock)
+ const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
- BUMP_FOP (INODELK);
+ START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_inodelk_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->inodelk,
- volume, loc, cmd, flock);
+ volume, loc, cmd, flock, xdata);
return 0;
}
int
io_stats_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno);
+ UPDATE_PROFILE_STATS (frame, FINODELK);
+ STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno, xdata);
return 0;
}
int
-io_stats_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct flock *flock)
+io_stats_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
- BUMP_FOP (FINODELK);
+ START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_finodelk_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->finodelk,
- volume, fd, cmd, flock);
+ volume, fd, cmd, flock, xdata);
return 0;
}
int
-io_stats_xattrop (call_frame_t *frame, xlator_t *this,
- loc_t *loc, gf_xattrop_flags_t flags, dict_t *dict)
+io_stats_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
- BUMP_FOP (XATTROP);
+ START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_xattrop_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->xattrop,
- loc, flags, dict);
-
+ loc, flags, dict, xdata);
return 0;
}
int
-io_stats_fxattrop (call_frame_t *frame, xlator_t *this,
- fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict)
+io_stats_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
- BUMP_FOP (FXATTROP);
+ START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_fxattrop_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fxattrop,
- fd, flags, dict);
-
+ fd, flags, dict, xdata);
return 0;
}
int
io_stats_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr_req)
+ loc_t *loc, dict_t *xdata)
{
- BUMP_FOP (LOOKUP);
+ START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_lookup_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lookup,
- loc, xattr_req);
-
+ loc, xdata);
return 0;
}
int
-io_stats_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
+io_stats_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- BUMP_FOP (STAT);
+ START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_stat_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->stat,
- loc);
-
+ loc, xdata);
return 0;
}
int
io_stats_readlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, size_t size)
+ loc_t *loc, size_t size, dict_t *xdata)
{
- BUMP_FOP (READLINK);
+ START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_readlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readlink,
- loc, size);
-
+ loc, size, xdata);
return 0;
}
int
-io_stats_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t dev)
+io_stats_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, dev_t dev, mode_t umask, dict_t *xdata)
{
- BUMP_FOP (MKNOD);
+ START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_mknod_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mknod,
- loc, mode, dev);
-
+ loc, mode, dev, umask, xdata);
return 0;
}
int
io_stats_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode)
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
{
- BUMP_FOP (MKDIR);
+ if (loc->path)
+ frame->local = gf_strdup (loc->path);
+
+ START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_mkdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mkdir,
- loc, mode);
+ loc, mode, umask, xdata);
return 0;
}
int
io_stats_unlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
+ loc_t *loc, int xflag, dict_t *xdata)
{
- BUMP_FOP (UNLINK);
+ START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_unlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->unlink,
- loc);
+ loc, xflag, xdata);
return 0;
}
int
io_stats_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
+ loc_t *loc, int flags, dict_t *xdata)
{
- BUMP_FOP (RMDIR);
+ START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_rmdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rmdir,
- loc);
-
+ loc, flags, xdata);
return 0;
}
int
-io_stats_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *loc)
+io_stats_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
- BUMP_FOP (SYMLINK);
+ START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_symlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->symlink,
- linkpath, loc);
-
+ linkpath, loc, umask, xdata);
return 0;
}
int
io_stats_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
- BUMP_FOP (RENAME);
+ START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_rename_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rename,
- oldloc, newloc);
-
+ oldloc, newloc, xdata);
return 0;
}
int
io_stats_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
- BUMP_FOP (LINK);
+ START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_link_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->link,
- oldloc, newloc);
+ oldloc, newloc, xdata);
return 0;
}
int
io_stats_setattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct iatt *stbuf, int32_t valid)
+ loc_t *loc, struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- BUMP_FOP (SETATTR);
+ START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_setattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setattr,
- loc, stbuf, valid);
-
+ loc, stbuf, valid, xdata);
return 0;
}
int
io_stats_truncate (call_frame_t *frame, xlator_t *this,
- loc_t *loc, off_t offset)
+ loc_t *loc, off_t offset, dict_t *xdata)
{
- BUMP_FOP (TRUNCATE);
+ START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_truncate_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->truncate,
- loc, offset);
-
+ loc, offset, xdata);
return 0;
}
int
-io_stats_open (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, fd_t *fd, int32_t wbflags)
+io_stats_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata)
{
- BUMP_FOP (OPEN);
+ if (loc->path)
+ frame->local = gf_strdup (loc->path);
- frame->local = gf_strdup (loc->path);
+ START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_open_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->open,
- loc, flags, fd, wbflags);
+ loc, flags, fd, xdata);
return 0;
}
int
io_stats_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode, fd_t *fd)
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *xdata)
{
- BUMP_FOP (CREATE);
+ if (loc->path)
+ frame->local = gf_strdup (loc->path);
- frame->local = gf_strdup (loc->path);
+ START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_create_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->create,
- loc, flags, mode, fd);
+ loc, flags, mode, umask, fd, xdata);
return 0;
}
int
io_stats_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset)
+ fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata)
{
- BUMP_FOP (READ);
-
frame->local = fd;
+ START_FOP_LATENCY (frame);
+
STACK_WIND (frame, io_stats_readv_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readv,
- fd, size, offset);
+ fd, size, offset, flags, xdata);
return 0;
}
@@ -1053,69 +2754,69 @@ int
io_stats_writev (call_frame_t *frame, xlator_t *this,
fd_t *fd, struct iovec *vector,
int32_t count, off_t offset,
- struct iobref *iobref)
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
- struct ios_conf *conf = NULL;
int len = 0;
- conf = this->private;
-
+ if (fd->inode)
+ frame->local = fd->inode;
len = iov_length (vector, count);
- BUMP_FOP (WRITE);
BUMP_WRITE (fd, len);
+ START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_writev_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->writev,
- fd, vector, count, offset, iobref);
+ fd, vector, count, offset, flags, iobref, xdata);
return 0;
+
}
int
io_stats_statfs (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
+ loc_t *loc, dict_t *xdata)
{
- BUMP_FOP (STATFS);
+ START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_statfs_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->statfs,
- loc);
+ loc, xdata);
return 0;
}
int
io_stats_flush (call_frame_t *frame, xlator_t *this,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
- BUMP_FOP (FLUSH);
+ START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_flush_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->flush,
- fd);
+ fd, xdata);
return 0;
}
int
io_stats_fsync (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t flags)
+ fd_t *fd, int32_t flags, dict_t *xdata)
{
- BUMP_FOP (FSYNC);
+ START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_fsync_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsync,
- fd, flags);
+ fd, flags, xdata);
return 0;
}
-void
+int
conditional_dump (dict_t *dict, char *key, data_t *value, void *data)
{
struct {
@@ -1123,249 +2824,487 @@ conditional_dump (dict_t *dict, char *key, data_t *value, void *data)
inode_t *inode;
const char *path;
} *stub;
- xlator_t *this = NULL;
- inode_t *inode = NULL;
- const char *path = NULL;
- char *filename = NULL;
+ xlator_t *this = NULL;
+ char *filename = NULL;
+ FILE *logfp = NULL;
+ struct ios_dump_args args = {0};
+ int pid, namelen;
+ char dump_key[100];
+ char *slash_ptr = NULL;
stub = data;
this = stub->this;
- inode = stub->inode;
- path = stub->path;
- filename = alloca (value->len + 1);
- memset (filename, 0, value->len + 1);
+ /* Create a file name that is appended with the io-stats instance
+ name as well. This helps when there is more than a single io-stats
+ instance in the graph, or the client and server processes are running
+ on the same node */
+ /* hmmm... no check for this */
+ /* name format: <passed in path/filename>.<xlator name slashes to -> */
+ namelen = value->len + strlen (this->name) + 2; /* '.' and '\0' */
+ filename = alloca (namelen);
+ memset (filename, 0, namelen);
memcpy (filename, data_to_str (value), value->len);
+ memcpy (filename + value->len, ".", 1);
+ memcpy (filename + value->len + 1, this->name, strlen(this->name));
+
+ /* convert any slashes to '-' so that fopen works correctly */
+ slash_ptr = strchr (filename + value->len + 1, '/');
+ while (slash_ptr) {
+ *slash_ptr = '-';
+ slash_ptr = strchr (slash_ptr, '/');
+ }
+
+ pid = getpid ();
- if (fnmatch ("*io*stat*dump", key, 0) == 0) {
- io_stats_dump (this, filename, inode, path);
+ if (!strncmp (filename, "", 1)) {
+ gf_log (this->name, GF_LOG_ERROR, "No filename given");
+ return -1;
}
+ logfp = fopen (filename, "w+");
+ if (!logfp) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to open %s "
+ "for writing", filename);
+ return -1;
+ }
+ sprintf (dump_key, "*io*stat*%d_json_dump", pid);
+ if (fnmatch (dump_key, key, 0) == 0) {
+ (void) ios_dump_args_init (
+ &args, IOS_DUMP_TYPE_JSON_FILE,
+ logfp);
+ } else {
+ (void) ios_dump_args_init (&args, IOS_DUMP_TYPE_FILE,
+ logfp);
+ }
+ io_stats_dump (this, &args, GF_CLI_INFO_ALL, _gf_false);
+ fclose (logfp);
+ return 0;
}
+int
+_ios_destroy_dump_thread (struct ios_conf *conf) {
+ conf->dump_thread_should_die = _gf_true;
+ if (conf->ios_dump_interval > 0) {
+ (void) pthread_cancel (conf->dump_thread);
+ (void) pthread_join (conf->dump_thread, NULL);
+ }
+ return 0;
+}
+
+void *
+_ios_dump_thread (xlator_t *this) {
+ struct ios_conf *conf = NULL;
+ FILE *stats_logfp = NULL;
+ FILE *samples_logfp = NULL;
+ struct ios_dump_args args = {0};
+ int i;
+ int stats_bytes_written = 0;
+ int samples_bytes_written = 0;
+ char stats_filename[PATH_MAX];
+ char samples_filename[PATH_MAX];
+ char *xlator_name;
+ char *instance_name;
+ gf_boolean_t log_stats_fopen_failure = _gf_true;
+ gf_boolean_t log_samples_fopen_failure = _gf_true;
+ int old_cancel_type;
+
+ conf = this->private;
+ gf_log (this->name, GF_LOG_INFO, "IO stats dump thread started, "
+ "polling IO stats every %d seconds", conf->ios_dump_interval);
+ xlator_name = strdupa (this->name);
+ for (i = 0; i < strlen (xlator_name); i++) {
+ if (xlator_name[i] == '/')
+ xlator_name[i] = '_';
+ }
+ instance_name = this->instance_name;
+ if (this->name && strcmp (this->name, "glustershd") == 0) {
+ xlator_name = "shd";
+ } else if (this->prev &&
+ strcmp (this->prev->name, "nfs-server") == 0) {
+ xlator_name = "nfsd";
+ instance_name = this->prev->instance_name;
+ }
+ if (sys_mkdir (_IOS_DUMP_DIR, S_IRWXU | S_IRWXO | S_IRWXG) == (-1)) {
+ if (errno != EEXIST) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not create stats-dump directory %s",
+ _IOS_DUMP_DIR);
+ goto out;
+ }
+ }
+ if (sys_mkdir (_IOS_SAMP_DIR, S_IRWXU | S_IRWXO | S_IRWXG) == (-1)) {
+ if (errno != EEXIST) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not create stats-sample directory %s",
+ _IOS_SAMP_DIR);
+ goto out;
+ }
+ }
+ if (instance_name) {
+ stats_bytes_written = snprintf (stats_filename, PATH_MAX,
+ "%s/%s_%s_%s.dump", _IOS_DUMP_DIR,
+ __progname, xlator_name, instance_name);
+ samples_bytes_written = snprintf (samples_filename, PATH_MAX,
+ "%s/%s_%s_%s.samp", _IOS_SAMP_DIR,
+ __progname, xlator_name, instance_name);
+ } else {
+ stats_bytes_written = snprintf (stats_filename, PATH_MAX,
+ "%s/%s_%s.dump", _IOS_DUMP_DIR, __progname,
+ xlator_name);
+ samples_bytes_written = snprintf (samples_filename, PATH_MAX,
+ "%s/%s_%s.samp", _IOS_SAMP_DIR, __progname,
+ xlator_name);
+ }
+ if ((stats_bytes_written >= PATH_MAX) ||
+ (samples_bytes_written >= PATH_MAX)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Invalid path for stats dump (%s) and/or latency "
+ "samples (%s)", stats_filename, samples_filename);
+ goto out;
+ }
+ while (1) {
+ if (conf->dump_thread_should_die)
+ break;
+ (void) pthread_setcanceltype (PTHREAD_CANCEL_ASYNCHRONOUS,
+ &old_cancel_type);
+ sleep (conf->ios_dump_interval);
+ (void) pthread_setcanceltype (PTHREAD_CANCEL_DEFERRED,
+ &old_cancel_type);
+ /*
+ * It's not clear whether we should reopen this each time, or
+ * just hold it open and rewind/truncate on each iteration.
+ * Leaving it alone for now.
+ */
+ stats_logfp = fopen (stats_filename, "w+");
+ if (stats_logfp) {
+ (void) ios_dump_args_init (&args,
+ IOS_DUMP_TYPE_JSON_FILE,
+ stats_logfp);
+ io_stats_dump (this, &args, GF_CLI_INFO_ALL, _gf_false);
+ fclose (stats_logfp);
+ log_stats_fopen_failure = _gf_true;
+ } else if (log_stats_fopen_failure) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not open stats-dump file %s (%s)",
+ stats_filename, strerror(errno));
+ log_stats_fopen_failure = _gf_false;
+ }
+ samples_logfp = fopen (samples_filename, "w+");
+ if (samples_logfp) {
+ io_stats_dump_latency_samples_logfp (this,
+ samples_logfp);
+ fclose (samples_logfp);
+ log_samples_fopen_failure = _gf_true;
+ } else if (log_samples_fopen_failure) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not open samples-dump file %s (%s)",
+ samples_filename, strerror(errno));
+ log_samples_fopen_failure = _gf_false;
+ }
+ }
+out:
+ gf_log (this->name, GF_LOG_INFO, "IO stats dump thread terminated");
+ return NULL;
+}
+
+static gf_boolean_t
+match_special_xattr (dict_t *d, char *k, data_t *val, void *mdata)
+{
+ gf_boolean_t ret = _gf_false;
+ if (fnmatch ("*io*stat*dump", k, 0) == 0) {
+ ret = _gf_true;
+ }
+
+ return ret;
+}
int
io_stats_setxattr (call_frame_t *frame, xlator_t *this,
loc_t *loc, dict_t *dict,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
+ int ret = 0;
struct {
xlator_t *this;
inode_t *inode;
const char *path;
} stub;
- BUMP_FOP (SETXATTR);
-
stub.this = this;
stub.inode = loc->inode;
stub.path = loc->path;
- dict_foreach (dict, conditional_dump, &stub);
+ ret = dict_foreach_match (dict, match_special_xattr, NULL,
+ conditional_dump, &stub);
+
+ START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_setxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setxattr,
- loc, dict, flags);
+ loc, dict, flags, xdata);
return 0;
}
int
io_stats_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
+ loc_t *loc, const char *name, dict_t *xdata)
{
- BUMP_FOP (GETXATTR);
+ START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_getxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->getxattr,
- loc, name);
+ loc, name, xdata);
return 0;
}
int
io_stats_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
+ loc_t *loc, const char *name, dict_t *xdata)
{
- BUMP_FOP (REMOVEXATTR);
+ START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_removexattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->removexattr,
- loc, name);
+ loc, name, xdata);
+ return 0;
+}
+
+int
+io_stats_fsetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ START_FOP_LATENCY (frame);
+
+ STACK_WIND (frame, io_stats_fsetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ fd, dict, flags, xdata);
+ return 0;
+}
+
+
+int
+io_stats_fgetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ START_FOP_LATENCY (frame);
+
+ STACK_WIND (frame, io_stats_fgetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ fd, name, xdata);
+ return 0;
+}
+
+
+int
+io_stats_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ START_FOP_LATENCY (frame);
+
+ STACK_WIND (frame, io_stats_fremovexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr,
+ fd, name, xdata);
return 0;
}
int
io_stats_opendir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, fd_t *fd)
+ loc_t *loc, fd_t *fd, dict_t *xdata)
{
- BUMP_FOP (OPENDIR);
+
+ START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_opendir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->opendir,
- loc, fd);
+ loc, fd, xdata);
return 0;
}
int
io_stats_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+ off_t offset, dict_t *dict)
{
- BUMP_FOP (READDIRP);
+ frame->local = fd->inode;
+ START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_readdirp_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdirp,
- fd, size, offset);
-
+ fd, size, offset, dict);
return 0;
}
int
io_stats_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset)
+ fd_t *fd, size_t size, off_t offset, dict_t *xdata)
{
- BUMP_FOP (READDIR);
+ START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_readdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdir,
- fd, size, offset);
-
+ fd, size, offset, xdata);
return 0;
}
int
io_stats_fsyncdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t datasync)
+ fd_t *fd, int32_t datasync, dict_t *xdata)
{
- BUMP_FOP (FSYNCDIR);
+ START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_fsyncdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsyncdir,
- fd, datasync);
+ fd, datasync, xdata);
return 0;
}
int
io_stats_access (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t mask)
+ loc_t *loc, int32_t mask, dict_t *xdata)
{
- BUMP_FOP (ACCESS);
+ START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_access_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->access,
- loc, mask);
+ loc, mask, xdata);
return 0;
}
int
io_stats_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset)
+ fd_t *fd, off_t offset, dict_t *xdata)
{
- BUMP_FOP (FTRUNCATE);
+ START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_ftruncate_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->ftruncate,
- fd, offset);
-
+ fd, offset, xdata);
return 0;
}
int
io_stats_fsetattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, struct iatt *stbuf, int32_t valid)
+ fd_t *fd, struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- BUMP_FOP (FSETATTR);
+ START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_setattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsetattr,
- fd, stbuf, valid);
+ fd, stbuf, valid, xdata);
return 0;
}
int
io_stats_fstat (call_frame_t *frame, xlator_t *this,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
- BUMP_FOP (FSTAT);
+ START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_fstat_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fstat,
- fd);
+ fd, xdata);
return 0;
}
int
-io_stats_lk (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t cmd, struct flock *lock)
+io_stats_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
{
- BUMP_FOP (LK);
+ START_FOP_LATENCY(frame);
- STACK_WIND (frame, io_stats_lk_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lk,
- fd, cmd, lock);
- return 0;
+ STACK_WIND(frame, io_stats_fallocate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len,
+ xdata);
+
+ return 0;
}
int
-io_stats_checksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- uint8_t *fchecksum, uint8_t *dchecksum)
+io_stats_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
{
- STACK_UNWIND_STRICT (checksum, frame, op_ret, op_errno,
- fchecksum, dchecksum);
+ START_FOP_LATENCY(frame);
+
+ STACK_WIND(frame, io_stats_discard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
+
+ return 0;
+}
+
+int
+io_stats_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ START_FOP_LATENCY(frame);
+
+ STACK_WIND(frame, io_stats_zerofill_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
return 0;
}
int
-io_stats_checksum (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flag)
+io_stats_lk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
- STACK_WIND (frame, io_stats_checksum_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->checksum,
- loc, flag);
+ START_FOP_LATENCY (frame);
+ STACK_WIND (frame, io_stats_lk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lk,
+ fd, cmd, lock, xdata);
return 0;
}
-
int
io_stats_release (xlator_t *this, fd_t *fd)
{
struct ios_fd *iosfd = NULL;
+ struct ios_conf *conf = NULL;
BUMP_FOP (RELEASE);
+ conf = this->private;
+
+ LOCK (&conf->lock);
+ {
+ conf->cumulative.nr_opens--;
+ }
+ UNLOCK (&conf->lock);
+
ios_fd_ctx_get (fd, this, &iosfd);
if (iosfd) {
io_stats_dump_fd (this, iosfd);
- if (iosfd->filename)
- GF_FREE (iosfd->filename);
+ GF_FREE (iosfd->filename);
GF_FREE (iosfd);
}
@@ -1386,10 +3325,266 @@ int
io_stats_forget (xlator_t *this, inode_t *inode)
{
BUMP_FOP (FORGET);
+ ios_stats_cleanup (this, inode);
+ return 0;
+}
+
+static int
+ios_init_top_stats (struct ios_conf *conf)
+{
+ int i = 0;
+
+ GF_ASSERT (conf);
+
+ for (i = 0; i <IOS_STATS_TYPE_MAX; i++) {
+ conf->list[i].iosstats = GF_CALLOC (1,
+ sizeof(*conf->list[i].iosstats),
+ gf_io_stats_mt_ios_stat);
+
+ if (!conf->list[i].iosstats)
+ return -1;
+
+ INIT_LIST_HEAD(&conf->list[i].iosstats->list);
+ LOCK_INIT (&conf->list[i].lock);
+ }
+
+ for (i = 0; i < IOS_STATS_THRU_MAX; i ++) {
+ conf->thru_list[i].iosstats = GF_CALLOC (1,
+ sizeof (*conf->thru_list[i].iosstats),
+ gf_io_stats_mt_ios_stat);
+
+ if (!conf->thru_list[i].iosstats)
+ return -1;
+
+ INIT_LIST_HEAD(&conf->thru_list[i].iosstats->list);
+ LOCK_INIT (&conf->thru_list[i].lock);
+ }
+
+ return 0;
+}
+
+static void
+ios_destroy_top_stats (struct ios_conf *conf)
+{
+ int i = 0;
+ struct ios_stat_head *list_head = NULL;
+ struct ios_stat_list *entry = NULL;
+ struct ios_stat_list *tmp = NULL;
+ struct ios_stat_list *list = NULL;
+ struct ios_stat *stat = NULL;
+
+ GF_ASSERT (conf);
+
+ LOCK (&conf->lock);
+
+ conf->cumulative.nr_opens = 0;
+ conf->cumulative.max_nr_opens = 0;
+ conf->cumulative.max_openfd_time.tv_sec = 0;
+ conf->cumulative.max_openfd_time.tv_usec = 0;
+
+ for (i = 0; i < IOS_STATS_TYPE_MAX; i++) {
+ list_head = &conf->list[i];
+ if (!list_head)
+ continue;
+ list_for_each_entry_safe (entry, tmp,
+ &list_head->iosstats->list, list) {
+ list = entry;
+ stat = list->iosstat;
+ ios_stat_unref (stat);
+ list_del (&list->list);
+ GF_FREE (list);
+ list_head->members--;
+ }
+ }
+
+ for (i = 0; i < IOS_STATS_THRU_MAX; i++) {
+ list_head = &conf->thru_list[i];
+ if (!list_head)
+ continue;
+ list_for_each_entry_safe (entry, tmp,
+ &list_head->iosstats->list, list) {
+ list = entry;
+ stat = list->iosstat;
+ ios_stat_unref (stat);
+ list_del (&list->list);
+ GF_FREE (list);
+ list_head->members--;
+ }
+ }
+
+ UNLOCK (&conf->lock);
+
+ return;
+}
+
+static int
+io_stats_clear (struct ios_conf *conf)
+{
+ struct timeval now;
+ int ret = -1;
+
+ GF_ASSERT (conf);
+
+ if (!gettimeofday (&now, NULL))
+ {
+ LOCK (&conf->lock);
+ {
+ ios_global_stats_clear (&conf->cumulative, &now);
+ ios_global_stats_clear (&conf->incremental, &now);
+ conf->increment = 0;
+ }
+ UNLOCK (&conf->lock);
+ ret = 0;
+ }
+
+ return ret;
+}
+
+int32_t
+io_priv (xlator_t *this)
+{
+ int i;
+ char key[GF_DUMP_MAX_BUF_LEN];
+ char key_prefix_cumulative[GF_DUMP_MAX_BUF_LEN];
+ char key_prefix_incremental[GF_DUMP_MAX_BUF_LEN];
+ double min, max, avg;
+ uint64_t count, total;
+ struct ios_conf *conf = NULL;
+
+ conf = this->private;
+ if (!conf)
+ return -1;
+
+ if(!conf->count_fop_hits || !conf->measure_latency)
+ return -1;
+
+ gf_proc_dump_write("cumulative.data_read", "%"PRIu64,
+ conf->cumulative.data_read);
+ gf_proc_dump_write("cumulative.data_written", "%"PRIu64,
+ conf->cumulative.data_written);
+
+ gf_proc_dump_write("incremental.data_read", "%"PRIu64,
+ conf->incremental.data_read);
+ gf_proc_dump_write("incremental.data_written", "%"PRIu64,
+ conf->incremental.data_written);
+
+ snprintf (key_prefix_cumulative, GF_DUMP_MAX_BUF_LEN, "%s.cumulative",
+ this->name);
+ snprintf (key_prefix_incremental, GF_DUMP_MAX_BUF_LEN, "%s.incremental",
+ this->name);
+
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ count = conf->cumulative.fop_hits[i];
+ total = conf->cumulative.latency[i].total;
+ min = conf->cumulative.latency[i].min;
+ max = conf->cumulative.latency[i].max;
+ avg = conf->cumulative.latency[i].avg;
+
+ gf_proc_dump_build_key (key, key_prefix_cumulative,
+ (char *)gf_fop_list[i]);
+
+ gf_proc_dump_write (key,"%"PRId64",%"PRId64",%.03f,%.03f,%.03f",
+ count, total, min, max, avg);
+
+ count = conf->incremental.fop_hits[i];
+ total = conf->incremental.latency[i].total;
+ min = conf->incremental.latency[i].min;
+ max = conf->incremental.latency[i].max;
+ avg = conf->incremental.latency[i].avg;
+
+ gf_proc_dump_build_key (key, key_prefix_incremental,
+ (char *)gf_fop_list[i]);
+
+ gf_proc_dump_write (key,"%"PRId64",%"PRId64",%.03f,%.03f,%.03f",
+ count, total, min, max, avg);
+
+ }
return 0;
}
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ struct ios_conf *conf = NULL;
+ int ret = -1;
+ char *sys_log_str = NULL;
+ char *log_format_str = NULL;
+ char *logger_str = NULL;
+ int sys_log_level = -1;
+ char *log_str = NULL;
+ int log_level = -1;
+ int log_format = -1;
+ int logger = -1;
+ uint32_t log_buf_size = 0;
+ uint32_t log_flush_timeout = 0;
+ int32_t old_dump_interval;
+
+ if (!this || !this->private)
+ goto out;
+
+ conf = this->private;
+
+ GF_OPTION_RECONF ("dump-fd-stats", conf->dump_fd_stats, options, bool,
+ out);
+
+ GF_OPTION_RECONF ("count-fop-hits", conf->count_fop_hits, options, bool,
+ out);
+
+ GF_OPTION_RECONF ("latency-measurement", conf->measure_latency,
+ options, bool, out);
+
+ old_dump_interval = conf->ios_dump_interval;
+ GF_OPTION_RECONF ("ios-dump-interval", conf->ios_dump_interval, options,
+ int32, out);
+ if ((old_dump_interval <= 0) && (conf->ios_dump_interval > 0)) {
+ pthread_create (&conf->dump_thread, NULL,
+ (void *) &_ios_dump_thread, this);
+ }
+
+ GF_OPTION_RECONF ("ios-sample-interval", conf->ios_sample_interval,
+ options, int32, out);
+ GF_OPTION_RECONF ("ios-sample-buf-size", conf->ios_sample_buf_size,
+ options, int32, out);
+ GF_OPTION_RECONF ("sys-log-level", sys_log_str, options, str, out);
+ if (sys_log_str) {
+ sys_log_level = glusterd_check_log_level (sys_log_str);
+ set_sys_log_level (sys_log_level);
+ }
+
+ GF_OPTION_RECONF ("log-level", log_str, options, str, out);
+ if (log_str) {
+ log_level = glusterd_check_log_level (log_str);
+ gf_log_set_loglevel (log_level);
+ }
+
+ GF_OPTION_RECONF ("logger", logger_str, options, str, out);
+ if (logger_str) {
+ logger = gf_check_logger (logger_str);
+ gf_log_set_logger (logger);
+ }
+
+ GF_OPTION_RECONF ("log-format", log_format_str, options, str, out);
+ if (log_format_str) {
+ log_format = gf_check_log_format (log_format_str);
+ gf_log_set_logformat (log_format);
+ }
+
+ GF_OPTION_RECONF ("log-buf-size", log_buf_size, options, uint32, out);
+ gf_log_set_log_buf_size (log_buf_size);
+
+ GF_OPTION_RECONF ("log-flush-timeout", log_flush_timeout, options,
+ time, out);
+ gf_log_set_log_flush_timeout (log_flush_timeout);
+
+ ret = 0;
+out:
+ gf_log (this ? this->name : "io-stats",
+ GF_LOG_DEBUG, "reconfigure returning %d", ret);
+ return ret;
+}
+
+
int32_t
mem_acct_init (xlator_t *this)
{
@@ -1399,7 +3594,7 @@ mem_acct_init (xlator_t *this)
return ret;
ret = xlator_mem_acct_init (this, gf_io_stats_mt_end + 1);
-
+
if (ret != 0) {
gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
" failed");
@@ -1409,64 +3604,142 @@ mem_acct_init (xlator_t *this)
return ret;
}
+void
+ios_conf_destroy (struct ios_conf *conf)
+{
+ if (!conf)
+ return;
+
+ ios_destroy_top_stats (conf);
+ _ios_destroy_dump_thread (conf);
+ LOCK_DESTROY (&conf->lock);
+ GF_FREE(conf);
+}
+
int
init (xlator_t *this)
{
- dict_t *options = NULL;
struct ios_conf *conf = NULL;
- char *str = NULL;
- int ret = 0;
+ char *sys_log_str = NULL;
+ char *logger_str = NULL;
+ char *log_format_str = NULL;
+ int logger = -1;
+ int log_format = -1;
+ int sys_log_level = -1;
+ char *log_str = NULL;
+ int log_level = -1;
+ int ret = -1;
+ uint32_t log_buf_size = 0;
+ uint32_t log_flush_timeout = 0;
if (!this)
return -1;
- if (!this->children || this->children->next) {
+ if (!this->children) {
gf_log (this->name, GF_LOG_ERROR,
- "io_stats translator requires one subvolume");
+ "io_stats translator requires atleast one subvolume");
return -1;
}
if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
+ /* This is very much valid as io-stats currently is loaded
+ * on top of volumes on both client and server, hence this is
+ * not an warning message */
+ gf_log (this->name, GF_LOG_DEBUG,
"dangling volume. check volfile ");
}
- options = this->options;
-
conf = GF_CALLOC (1, sizeof(*conf), gf_io_stats_mt_ios_conf);
- if (!conf) {
+ if (!conf)
+ goto out;
+
+ /*
+ * Init it just after calloc, so that we are sure the lock is inited
+ * in case of error paths.
+ */
+ LOCK_INIT (&conf->lock);
+ LOCK_INIT (&conf->ios_sampling_lock);
+
+ gettimeofday (&conf->cumulative.started_at, NULL);
+ gettimeofday (&conf->incremental.started_at, NULL);
+
+ ret = ios_init_top_stats (conf);
+ if (ret)
+ goto out;
+
+ GF_OPTION_INIT ("dump-fd-stats", conf->dump_fd_stats, bool, out);
+
+ GF_OPTION_INIT ("count-fop-hits", conf->count_fop_hits, bool, out);
+
+ GF_OPTION_INIT ("latency-measurement", conf->measure_latency,
+ bool, out);
+
+ GF_OPTION_INIT ("ios-dump-interval", conf->ios_dump_interval,
+ int32, out);
+
+ GF_OPTION_INIT ("ios-sample-interval", conf->ios_sample_interval,
+ int32, out);
+
+ GF_OPTION_INIT ("ios-sample-buf-size", conf->ios_sample_buf_size,
+ int32, out);
+
+ if (ios_init_sample_buf (conf) != 0) {
gf_log (this->name, GF_LOG_ERROR,
"Out of memory.");
return -1;
}
- LOCK_INIT (&conf->lock);
+ GF_OPTION_INIT ("ios-dnscache-ttl-sec", conf->ios_dnscache_ttl_sec,
+ int32, out);
+ conf->dnscache = gf_dnscache_init (conf->ios_dnscache_ttl_sec);
- gettimeofday (&conf->cumulative.started_at, NULL);
- gettimeofday (&conf->incremental.started_at, NULL);
+ GF_OPTION_INIT ("sys-log-level", sys_log_str, str, out);
+ if (sys_log_str) {
+ sys_log_level = glusterd_check_log_level (sys_log_str);
+ set_sys_log_level (sys_log_level);
+ }
- ret = dict_get_str (options, "dump-fd-stats", &str);
- if (ret == 0) {
- ret = gf_string2boolean (str, &conf->dump_fd_stats);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "'dump-fd-stats' takes only boolean arguments");
- return -1;
- }
+ GF_OPTION_INIT ("log-level", log_str, str, out);
+ if (log_str) {
+ log_level = glusterd_check_log_level (log_str);
+ gf_log_set_loglevel (log_level);
+ }
- if (conf->dump_fd_stats) {
- gf_log (this->name, GF_LOG_DEBUG,
- "enabling dump-fd-stats");
- }
+ GF_OPTION_INIT ("logger", logger_str, str, out);
+ if (logger_str) {
+ logger = gf_check_logger (logger_str);
+ gf_log_set_logger (logger);
}
+ GF_OPTION_INIT ("log-format", log_format_str, str, out);
+ if (log_format_str) {
+ log_format = gf_check_log_format (log_format_str);
+ gf_log_set_logformat (log_format);
+ }
+
+ GF_OPTION_INIT ("log-buf-size", log_buf_size, uint32, out);
+ gf_log_set_log_buf_size (log_buf_size);
+
+ GF_OPTION_INIT ("log-flush-timeout", log_flush_timeout, time, out);
+ gf_log_set_log_flush_timeout (log_flush_timeout);
+
+
this->private = conf;
+ if (conf->ios_dump_interval > 0) {
+ pthread_create (&conf->dump_thread, NULL,
+ (void *) &_ios_dump_thread, this);
+ }
+ ret = 0;
+out:
+ if (!this->private) {
+ ios_conf_destroy (conf);
+ ret = -1;
+ }
- return 0;
+ return ret;
}
-
void
fini (xlator_t *this)
{
@@ -1477,13 +3750,133 @@ fini (xlator_t *this)
conf = this->private;
- GF_FREE(conf);
-
- gf_log (this->name, GF_LOG_NORMAL,
+ ios_conf_destroy (conf);
+ this->private = NULL;
+ gf_log (this->name, GF_LOG_INFO,
"io-stats translator unloaded");
return;
}
+int
+notify (xlator_t *this, int32_t event, void *data, ...)
+{
+ int ret = 0;
+ struct ios_dump_args args = {0};
+ dict_t *output = NULL;
+ dict_t *dict = NULL;
+ int32_t op = 0;
+ int32_t list_cnt = 0;
+ double throughput = 0;
+ double time = 0;
+ gf_boolean_t is_peek = _gf_false;
+ va_list ap;
+
+ dict = data;
+ va_start (ap, data);
+ output = va_arg (ap, dict_t*);
+ va_end (ap);
+ switch (event) {
+ case GF_EVENT_TRANSLATOR_INFO:
+ ret = dict_get_str_boolean (dict, "clear-stats", _gf_false);
+ if (ret) {
+ ret = dict_set_int32 (output, "top-op", op);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set top-op in dict");
+ goto out;
+ }
+ ios_destroy_top_stats (this->private);
+ ret = ios_init_top_stats (this->private);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to reset top stats");
+ ret = dict_set_int32 (output, "stats-cleared",
+ ret ? 0 : 1);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set stats-cleared"
+ " in dict");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "top-op", &op);
+ if (!ret) {
+ ret = dict_get_int32 (dict, "list-cnt", &list_cnt);
+ if (op > IOS_STATS_TYPE_NONE &&
+ op < IOS_STATS_TYPE_MAX)
+ ret = io_stats_dump_stats_to_dict (this, output,
+ op, list_cnt);
+ if (op == IOS_STATS_TYPE_READ_THROUGHPUT ||
+ op == IOS_STATS_TYPE_WRITE_THROUGHPUT) {
+ ret = dict_get_double (dict, "throughput",
+ &throughput);
+ if (!ret) {
+ ret = dict_get_double (dict, "time",
+ &time);
+ if (ret)
+ goto out;
+ ret = dict_set_double (output,
+ "throughput", throughput);
+ if (ret)
+ goto out;
+ ret = dict_set_double (output, "time",
+ time);
+ if (ret)
+ goto out;
+ }
+ ret = 0;
+
+ }
+ } else {
+ ret = dict_get_int32 (dict, "info-op", &op);
+ if (ret || op < GF_CLI_INFO_ALL ||
+ GF_CLI_INFO_CLEAR < op)
+ op = GF_CLI_INFO_ALL;
+
+ ret = dict_set_int32 (output, "info-op", op);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set info-op in dict");
+ goto out;
+ }
+
+ if (GF_CLI_INFO_CLEAR == op) {
+ ret = io_stats_clear (this->private);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to clear info stats");
+
+ ret = dict_set_int32 (output, "stats-cleared",
+ ret ? 0 : 1);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set stats-cleared"
+ " in dict");
+ }
+ else {
+ ret = dict_get_str_boolean (dict, "peek",
+ _gf_false);
+ if (-1 != ret)
+ is_peek = ret;
+
+ (void) ios_dump_args_init (&args,
+ IOS_DUMP_TYPE_DICT, output);
+ ret = io_stats_dump (this, &args, op, is_peek);
+ }
+ }
+ break;
+ default:
+ default_notify (this, event, data);
+ break;
+
+ }
+out:
+ return ret;
+}
+
+struct xlator_dumpops dumpops = {
+ .priv = io_priv
+};
struct xlator_fops fops = {
.stat = io_stats_stat,
@@ -1505,6 +3898,9 @@ struct xlator_fops fops = {
.setxattr = io_stats_setxattr,
.getxattr = io_stats_getxattr,
.removexattr = io_stats_removexattr,
+ .fsetxattr = io_stats_fsetxattr,
+ .fgetxattr = io_stats_fgetxattr,
+ .fremovexattr = io_stats_fremovexattr,
.opendir = io_stats_opendir,
.readdir = io_stats_readdir,
.readdirp = io_stats_readdirp,
@@ -1518,11 +3914,13 @@ struct xlator_fops fops = {
.finodelk = io_stats_finodelk,
.entrylk = io_stats_entrylk,
.lookup = io_stats_lookup,
- .checksum = io_stats_checksum,
.xattrop = io_stats_xattrop,
.fxattrop = io_stats_fxattrop,
.setattr = io_stats_setattr,
.fsetattr = io_stats_fsetattr,
+ .fallocate = io_stats_fallocate,
+ .discard = io_stats_discard,
+ .zerofill = io_stats_zerofill,
};
struct xlator_cbks cbks = {
@@ -1534,6 +3932,160 @@ struct xlator_cbks cbks = {
struct volume_options options[] = {
{ .key = {"dump-fd-stats"},
.type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "If on stats related to file-operations would be "
+ "tracked inside GlusterFS data-structures."
+ },
+ { .key = { "ios-dump-interval" },
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = 3600,
+ .default_value = "0",
+ .description = "Interval (in seconds) at which to auto-dump "
+ "statistics. Zero disables automatic dumping."
+ },
+ { .key = { "ios-sample-interval" },
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = 65535,
+ .default_value = "0",
+ .description = "Interval in which we want to collect FOP latency "
+ "samples. 2 means collect a sample every 2nd FOP."
+ },
+ { .key = { "ios-sample-buf-size" },
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1024,
+ .max = 1024*1024,
+ .default_value = "65535",
+ .description = "The maximum size of our FOP sampling ring buffer."
+ },
+ { .key = { "ios-dnscache-ttl-sec" },
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 3600 * 72,
+ .default_value = "86400",
+ .description = "The interval after wish a cached DNS entry will be "
+ "re-validated. Default: 24 hrs"
+ },
+ { .key = { "latency-measurement" },
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "If on stats related to the latency of each operation "
+ "would be tracked inside GlusterFS data-structures. "
+ },
+ { .key = {"count-fop-hits"},
+ .type = GF_OPTION_TYPE_BOOL,
+ },
+ { .key = {"log-level"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = { "DEBUG", "WARNING", "ERROR", "INFO",
+ "CRITICAL", "NONE", "TRACE"}
+ },
+
+ /* These are synthetic entries to assist validation of CLI's *
+ * volume set command */
+ { .key = {"client-log-level"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "INFO",
+ .description = "Changes the log-level of the clients",
+ .value = { "DEBUG", "WARNING", "ERROR", "INFO",
+ "CRITICAL", "NONE", "TRACE"}
+ },
+ { .key = {"sys-log-level"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "CRITICAL",
+ .description = "Gluster's syslog log-level",
+ .value = { "WARNING", "ERROR", "INFO", "CRITICAL"}
+ },
+ { .key = {"brick-log-level"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "INFO",
+ .description = "Changes the log-level of the bricks",
+ .value = { "DEBUG", "WARNING", "ERROR", "INFO",
+ "CRITICAL", "NONE", "TRACE"}
+ },
+ { .key = {"logger"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = { GF_LOGGER_GLUSTER_LOG, GF_LOGGER_SYSLOG}
+ },
+ { .key = {"client-logger"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = GF_LOGGER_GLUSTER_LOG,
+ .description = "Changes the logging sub-system to log to, for the "
+ "clients",
+ .value = { GF_LOGGER_GLUSTER_LOG, GF_LOGGER_SYSLOG}
+ },
+ { .key = {"brick-logger"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = GF_LOGGER_GLUSTER_LOG,
+ .description = "Changes the logging sub-system to log to, for the "
+ "bricks",
+ .value = { GF_LOGGER_GLUSTER_LOG, GF_LOGGER_SYSLOG}
+ },
+ { .key = {"log-format"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = { GF_LOG_FORMAT_NO_MSG_ID, GF_LOG_FORMAT_WITH_MSG_ID}
+ },
+ { .key = {"client-log-format"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = GF_LOG_FORMAT_WITH_MSG_ID,
+ .description = "Changes log format for the clients",
+ .value = { GF_LOG_FORMAT_NO_MSG_ID, GF_LOG_FORMAT_WITH_MSG_ID}
+ },
+ { .key = {"brick-log-format"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = GF_LOG_FORMAT_WITH_MSG_ID,
+ .description = "Changes the log format for the bricks",
+ .value = { GF_LOG_FORMAT_NO_MSG_ID, GF_LOG_FORMAT_WITH_MSG_ID}
+ },
+ { .key = {"log-buf-size"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = GF_LOG_LRU_BUFSIZE_MIN,
+ .max = GF_LOG_LRU_BUFSIZE_MAX,
+ .default_value = "5",
+ },
+ { .key = {"client-log-buf-size"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = GF_LOG_LRU_BUFSIZE_MIN,
+ .max = GF_LOG_LRU_BUFSIZE_MAX,
+ .default_value = "5",
+ .description = "This option determines the maximum number of unique "
+ "log messages that can be buffered for a time equal to"
+ " the value of the option client-log-flush-timeout."
+ },
+ { .key = {"brick-log-buf-size"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = GF_LOG_LRU_BUFSIZE_MIN,
+ .max = GF_LOG_LRU_BUFSIZE_MAX,
+ .default_value = "5",
+ .description = "This option determines the maximum number of unique "
+ "log messages that can be buffered for a time equal to"
+ " the value of the option brick-log-flush-timeout."
+ },
+ { .key = {"log-flush-timeout"},
+ .type = GF_OPTION_TYPE_TIME,
+ .min = GF_LOG_FLUSH_TIMEOUT_MIN,
+ .max = GF_LOG_FLUSH_TIMEOUT_MAX,
+ .default_value = "120",
+ },
+ { .key = {"client-log-flush-timeout"},
+ .type = GF_OPTION_TYPE_TIME,
+ .min = GF_LOG_FLUSH_TIMEOUT_MIN,
+ .max = GF_LOG_FLUSH_TIMEOUT_MAX,
+ .default_value = "120",
+ .description = "This option determines the maximum number of unique "
+ "log messages that can be buffered for a time equal to"
+ " the value of the option client-log-flush-timeout."
+ },
+ { .key = {"brick-log-flush-timeout"},
+ .type = GF_OPTION_TYPE_TIME,
+ .min = GF_LOG_FLUSH_TIMEOUT_MIN,
+ .max = GF_LOG_FLUSH_TIMEOUT_MAX,
+ .default_value = "120",
+ .description = "This option determines the maximum number of unique "
+ "log messages that can be buffered for a time equal to"
+ " the value of the option brick-log-flush-timeout."
},
{ .key = {NULL} },
+
};
diff --git a/xlators/debug/trace/src/Makefile.am b/xlators/debug/trace/src/Makefile.am
index 0f1679a049d..9bd53c89bfe 100644
--- a/xlators/debug/trace/src/Makefile.am
+++ b/xlators/debug/trace/src/Makefile.am
@@ -2,13 +2,15 @@
xlator_LTLIBRARIES = trace.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/debug
-trace_la_LDFLAGS = -module -avoidversion
+trace_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
trace_la_SOURCES = trace.c
trace_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+noinst_HEADERS = trace.h trace-mem-types.h
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/debug/trace/src/trace-mem-types.h b/xlators/debug/trace/src/trace-mem-types.h
new file mode 100644
index 00000000000..9fa7d97c2ca
--- /dev/null
+++ b/xlators/debug/trace/src/trace-mem-types.h
@@ -0,0 +1,21 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef __TRACE_MEM_TYPES_H__
+#define __TRACE_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_trace_mem_types_ {
+ gf_trace_mt_trace_conf_t = gf_common_mt_end + 1,
+ gf_trace_mt_end
+};
+#endif
diff --git a/xlators/debug/trace/src/trace.c b/xlators/debug/trace/src/trace.c
index 31a1489b5d0..03e92184dcd 100644
--- a/xlators/debug/trace/src/trace.c
+++ b/xlators/debug/trace/src/trace.c
@@ -1,26 +1,15 @@
/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+#include "trace.h"
+#include "trace-mem-types.h"
/**
* xlators/debug/trace :
@@ -28,1908 +17,2934 @@
* their _cbk functions, which later passes the call to next layer.
* Very helpful translator for debugging.
*/
+#define TRACE_STAT_TO_STR(buf, str) trace_stat_to_str (buf, str, sizeof (str))
-#include <time.h>
-#include <errno.h>
-#include "glusterfs.h"
-#include "xlator.h"
-#include "common-utils.h"
+static void
+trace_stat_to_str(struct iatt *buf, char *str, size_t len)
+{
+ char atime_buf[256] = {0,};
+ char mtime_buf[256] = {0,};
+ char ctime_buf[256] = {0,};
-#define ERR_EINVAL_NORETURN(cond) \
- do \
- { \
- if ((cond)) \
- { \
- gf_log ("ERROR", GF_LOG_ERROR, \
- "%s: %s: (%s) is true", \
- __FILE__, __FUNCTION__, #cond); \
- } \
- } while (0)
+ if (!buf)
+ return;
+ gf_time_fmt (atime_buf, sizeof atime_buf, buf->ia_atime,
+ gf_timefmt_dirent);
-typedef struct trace_private {
- int32_t debug_flag;
-} trace_private_t;
+ gf_time_fmt (mtime_buf, sizeof mtime_buf, buf->ia_mtime,
+ gf_timefmt_dirent);
+ gf_time_fmt (ctime_buf, sizeof ctime_buf, buf->ia_ctime,
+ gf_timefmt_dirent);
-struct {
- char *name;
- int enabled;
-} trace_fop_names[GF_FOP_MAXVALUE];
+ snprintf (str, len, "gfid=%s ino=%"PRIu64", mode=%o, "
+ "nlink=%"GF_PRI_NLINK", uid=%u, gid=%u, size=%"PRIu64", "
+ "blocks=%"PRIu64", atime=%s mtime=%s ctime=%s "
+ "atime_sec=%"PRIu32", atime_nsec=%"PRIu32","
+ " mtime_sec=%"PRIu32", mtime_nsec=%"PRIu32", "
+ "ctime_sec=%"PRIu32", ctime_nsec=%"PRIu32"",
+ uuid_utoa (buf->ia_gfid), buf->ia_ino,
+ st_mode_from_ia (buf->ia_prot, buf->ia_type), buf->ia_nlink,
+ buf->ia_uid, buf->ia_gid, buf->ia_size, buf->ia_blocks,
+ atime_buf, mtime_buf, ctime_buf,
+ buf->ia_atime, buf->ia_atime_nsec,
+ buf->ia_mtime, buf->ia_mtime_nsec,
+ buf->ia_ctime, buf->ia_ctime_nsec);
+}
-int trace_log_level = GF_LOG_NORMAL;
-static char *
-trace_stat_to_str (struct iatt *stbuf)
+int
+dump_history_trace (circular_buffer_t *cb, void *data)
{
- char *statstr = NULL;
- char atime_buf[256] = {0,};
- char mtime_buf[256] = {0,};
- char ctime_buf[256] = {0,};
- int asprint_ret_value = 0;
+ char timestr[256] = {0,};
- strftime (atime_buf, 256, "[%b %d %H:%M:%S]",
- localtime ((time_t *)&stbuf->ia_atime));
- strftime (mtime_buf, 256, "[%b %d %H:%M:%S]",
- localtime ((time_t *)&stbuf->ia_mtime));
- strftime (ctime_buf, 256, "[%b %d %H:%M:%S]",
- localtime ((time_t *)&stbuf->ia_ctime));
+ /* Since we are continuing with adding entries to the buffer even when
+ gettimeofday () fails, it's safe to check tm and then dump the time
+ at which the entry was added to the buffer */
- asprint_ret_value = gf_asprintf (&statstr,
- "ia_ino=%"PRIu64", ia_gen=%"PRIu64
- ", st_mode=%o, ia_nlink=%"GF_PRI_NLINK", "
- "ia_uid=%d, ia_gid=%d, ia_size=%"PRId64", ia_blocks=%"PRId64
- ", ia_atime=%s, ia_mtime=%s, ia_ctime=%s",
- stbuf->ia_ino, stbuf->ia_gen,
- st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type),
- stbuf->ia_nlink, stbuf->ia_uid,
- stbuf->ia_gid, stbuf->ia_size,
- stbuf->ia_blocks, atime_buf,
- mtime_buf, ctime_buf);
+ gf_time_fmt (timestr, sizeof timestr, cb->tv.tv_sec, gf_timefmt_Ymd_T);
+ snprintf (timestr + strlen (timestr), 256 - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS, cb->tv.tv_usec);
+ gf_proc_dump_write ("TIME", "%s", timestr);
- if (asprint_ret_value < 0)
- statstr = NULL;
+ gf_proc_dump_write ("FOP", "%s\n", cb->data);
- return statstr;
+ return 0;
}
-
int
trace_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, fd_t *fd,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- char *statstr = NULL;
- char *preparentstr = NULL;
- char *postparentstr = NULL;
+ char statstr[4096] = {0, };
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_CREATE].enabled) {
+ char string[4096] = {0,};
if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- preparentstr = trace_stat_to_str (preparent);
- postparentstr = trace_stat_to_str (postparent);
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, fd=%p, ino=%"PRIu64" "
- "*stbuf {%s}, *preparent {%s}, *postparent = "
- "{%s})",
- frame->root->unique, op_ret, fd, inode->ino,
- statstr, preparentstr, postparentstr);
-
- if (statstr)
- GF_FREE (statstr);
- if (preparentstr)
- GF_FREE (preparentstr);
- if (postparentstr)
- GF_FREE (postparentstr);
+ TRACE_STAT_TO_STR (buf, statstr);
+ TRACE_STAT_TO_STR (preparent, preparentstr);
+ TRACE_STAT_TO_STR (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s (op_ret=%d, fd=%p"
+ "*stbuf {%s}, *preparent {%s}, "
+ "*postparent = {%s})",
+ frame->root->unique,
+ uuid_utoa (inode->gfid), op_ret, fd,
+ statstr, preparentstr, postparentstr);
+
+ /* for 'release' log */
+ fd_ctx_set (fd, this, 0);
} else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent);
+out:
+ TRACE_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
return 0;
}
-
int
trace_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_OPEN].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d, *fd=%p)",
- frame->root->unique, op_ret, op_errno, fd);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d, "
+ "*fd=%p", frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno,
+ fd);
+
+ LOG_ELEMENT (conf, string);
}
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd);
+out:
+ /* for 'release' log */
+ if (op_ret >= 0)
+ fd_ctx_set (fd, this, 0);
+
+ TRACE_STACK_UNWIND (open, frame, op_ret, op_errno, fd, xdata);
return 0;
}
-
int
trace_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
- char atime_buf[256];
- char mtime_buf[256];
- char ctime_buf[256];
+ char statstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_STAT].enabled) {
- if (op_ret >= 0) {
- strftime (atime_buf, 256, "[%b %d %H:%M:%S]",
- localtime ((time_t *)&buf->ia_atime));
- strftime (mtime_buf, 256, "[%b %d %H:%M:%S]",
- localtime ((time_t *)&buf->ia_mtime));
- strftime (ctime_buf, 256, "[%b %d %H:%M:%S]",
- localtime ((time_t *)&buf->ia_ctime));
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, buf {ia_gen=%"PRIu64", "
- "ia_ino=%"PRIu64", st_mode=%o, ia_nlink=%"GF_PRI_NLINK", "
- "ia_uid=%d, ia_gid=%d, ia_rdev=%"PRIu64", ia_size=%"PRId64
- ", ia_blksize=%"GF_PRI_BLKSIZE", ia_blocks=%"PRId64", "
- "ia_atime=%s, ia_mtime=%s, ia_ctime=%s})",
- frame->root->unique, op_ret, buf->ia_gen, buf->ia_ino,
- st_mode_from_ia (buf->ia_prot, buf->ia_type),
- buf->ia_nlink, buf->ia_uid, buf->ia_gid,
- buf->ia_rdev, buf->ia_size, buf->ia_blksize,
- buf->ia_blocks, atime_buf, mtime_buf, ctime_buf);
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ TRACE_STAT_TO_STR (buf, statstr);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d buf=%s",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ statstr);
} else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf);
+out:
+ TRACE_STACK_UNWIND (stat, frame, op_ret, op_errno, buf, xdata);
return 0;
}
-
int
trace_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *buf, struct iobref *iobref)
+ int32_t count, struct iatt *buf, struct iobref *iobref,
+ dict_t *xdata)
{
- char atime_buf[256];
- char mtime_buf[256];
- char ctime_buf[256];
+ char statstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_READ].enabled) {
+ char string[4096] = {0,};
if (op_ret >= 0) {
- strftime (atime_buf, 256, "[%b %d %H:%M:%S]",
- localtime ((time_t *)&buf->ia_atime));
- strftime (mtime_buf, 256, "[%b %d %H:%M:%S]",
- localtime ((time_t *)&buf->ia_mtime));
- strftime (ctime_buf, 256, "[%b %d %H:%M:%S]",
- localtime ((time_t *)&buf->ia_ctime));
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d, *buf {ia_gen=%"PRIu64", "
- "ia_ino=%"PRIu64", st_mode=%o, ia_nlink=%"GF_PRI_NLINK", "
- "ia_uid=%d, ia_gid=%d, ia_rdev=%"PRIu64", "
- "ia_size=%"PRId64", ia_blksize=%"GF_PRI_BLKSIZE", "
- "ia_blocks=%"PRId64", ia_atime=%s, ia_mtime=%s, ia_ctime=%s})",
- frame->root->unique, op_ret, op_errno, buf->ia_gen, buf->ia_ino,
- st_mode_from_ia (buf->ia_prot, buf->ia_type),
- buf->ia_nlink, buf->ia_uid, buf->ia_gid,
- buf->ia_rdev, buf->ia_size, buf->ia_blksize, buf->ia_blocks,
- atime_buf, mtime_buf, ctime_buf);
+ TRACE_STAT_TO_STR (buf, statstr);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d buf=%s",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ statstr);
} else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count,
- buf, iobref);
+out:
+ TRACE_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count,
+ buf, iobref, xdata);
return 0;
}
-
int
trace_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
- char *preopstr = NULL;
- char *postopstr = NULL;
+ char preopstr[4096] = {0, };
+ char postopstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_WRITE].enabled) {
+ char string[4096] = {0,};
if (op_ret >= 0) {
- preopstr = trace_stat_to_str (prebuf);
- preopstr = trace_stat_to_str (postbuf);
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, ino = %"PRIu64
- ", *prebuf = {%s}, *postbuf = {%s})",
- frame->root->unique, op_ret, postbuf->ia_ino,
- preopstr, postopstr);
-
- if (preopstr)
- GF_FREE (preopstr);
-
- if (postopstr)
- GF_FREE (postopstr);
+ TRACE_STAT_TO_STR (prebuf, preopstr);
+ TRACE_STAT_TO_STR (postbuf, postopstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s})",
+ frame->root->unique, op_ret,
+ preopstr, postopstr);
} else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf);
+out:
+ TRACE_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
return 0;
}
-
-
int
trace_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *buf)
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *buf,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_READDIR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64" :(op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64" : gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique, uuid_utoa (frame->local),
+ op_ret, op_errno);
- STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, buf);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (readdir, frame, op_ret, op_errno, buf, xdata);
return 0;
}
-
int
trace_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *buf)
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *buf,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_READDIRP].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64" :(op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64" : gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique, uuid_utoa (frame->local),
+ op_ret, op_errno);
- STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, buf);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (readdirp, frame, op_ret, op_errno, buf, xdata);
return 0;
}
-
int
trace_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
- char *preopstr = NULL;
- char *postopstr = NULL;
-
- if (trace_fop_names[GF_FOP_FSYNC].enabled) {
- if (op_ret >= 0) {
- preopstr = trace_stat_to_str (prebuf);
- preopstr = trace_stat_to_str (postbuf);
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, ino = %"PRIu64
- ", *prebuf = {%s}, *postbuf = {%s}",
- frame->root->unique, op_ret, postbuf->ia_ino,
- preopstr, postopstr);
+ char preopstr[4096] = {0, };
+ char postopstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- if (preopstr)
- GF_FREE (preopstr);
+ conf = this->private;
- if (postopstr)
- GF_FREE (postopstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FSYNC].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ TRACE_STAT_TO_STR (prebuf, preopstr);
+ TRACE_STAT_TO_STR (postbuf, postopstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s}",
+ frame->root->unique, op_ret,
+ preopstr, postopstr);
} else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
+
}
+ LOG_ELEMENT (conf, string);
}
-
- STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf);
+out:
+ TRACE_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
return 0;
}
-
int
trace_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *statpre, struct iatt *statpost)
+ struct iatt *statpre, struct iatt *statpost, dict_t *xdata)
{
- char atime_pre[256] = {0,};
- char mtime_pre[256] = {0,};
- char ctime_pre[256] = {0,};
- char atime_post[256] = {0,};
- char mtime_post[256] = {0,};
- char ctime_post[256] = {0,};
+ char preopstr[4096] = {0, };
+ char postopstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_SETATTR].enabled) {
- if (op_ret >= 0) {
- strftime (atime_pre, 256, "[%b %d %H:%M:%S]",
- localtime ((time_t *)&statpre->ia_atime));
- strftime (mtime_pre, 256, "[%b %d %H:%M:%S]",
- localtime ((time_t *)&statpre->ia_mtime));
- strftime (ctime_pre, 256, "[%b %d %H:%M:%S]",
- localtime ((time_t *)&statpre->ia_ctime));
-
- strftime (atime_post, 256, "[%b %d %H:%M:%S]",
- localtime ((time_t *)&statpost->ia_atime));
- strftime (mtime_post, 256, "[%b %d %H:%M:%S]",
- localtime ((time_t *)&statpost->ia_mtime));
- strftime (ctime_post, 256, "[%b %d %H:%M:%S]",
- localtime ((time_t *)&statpost->ia_ctime));
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, *statpre "
- "{ia_ino=%"PRIu64", st_mode=%o, ia_uid=%d, "
- "ia_gid=%d, ia_atime=%s, ia_mtime=%s, "
- "ia_ctime=%s}, *statpost {ia_ino=%"PRIu64", "
- "st_mode=%o, ia_uid=%d, ia_gid=%d, ia_atime=%s,"
- " ia_mtime=%s, ia_ctime=%s})",
- frame->root->unique, op_ret, statpre->ia_ino,
- st_mode_from_ia (statpre->ia_prot, statpre->ia_type),
- statpre->ia_uid,
- statpre->ia_gid, atime_pre, mtime_pre,
- ctime_pre, statpost->ia_ino,
- st_mode_from_ia (statpost->ia_prot, statpost->ia_type),
- statpost->ia_uid, statpost->ia_gid, atime_post,
- mtime_post, ctime_post);
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ TRACE_STAT_TO_STR (statpre, preopstr);
+ TRACE_STAT_TO_STR (statpost, postopstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s})",
+ frame->root->unique, op_ret,
+ preopstr, postopstr);
} else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, statpre, statpost);
+out:
+ TRACE_STACK_UNWIND (setattr, frame, op_ret, op_errno, statpre,
+ statpost, xdata);
return 0;
}
-
int
trace_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *statpre, struct iatt *statpost)
+ struct iatt *statpre, struct iatt *statpost, dict_t *xdata)
{
- char atime_pre[256] = {0,};
- char mtime_pre[256] = {0,};
- char ctime_pre[256] = {0,};
- char atime_post[256] = {0,};
- char mtime_post[256] = {0,};
- char ctime_post[256] = {0,};
+ char preopstr[4096] = {0, };
+ char postopstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FSETATTR].enabled) {
- if (op_ret >= 0) {
- strftime (atime_pre, 256, "[%b %d %H:%M:%S]",
- localtime ((time_t *)&statpre->ia_atime));
- strftime (mtime_pre, 256, "[%b %d %H:%M:%S]",
- localtime ((time_t *)&statpre->ia_mtime));
- strftime (ctime_pre, 256, "[%b %d %H:%M:%S]",
- localtime ((time_t *)&statpre->ia_ctime));
-
- strftime (atime_post, 256, "[%b %d %H:%M:%S]",
- localtime ((time_t *)&statpost->ia_atime));
- strftime (mtime_post, 256, "[%b %d %H:%M:%S]",
- localtime ((time_t *)&statpost->ia_mtime));
- strftime (ctime_post, 256, "[%b %d %H:%M:%S]",
- localtime ((time_t *)&statpost->ia_ctime));
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, *statpre "
- "{ia_ino=%"PRIu64", st_mode=%o, ia_uid=%d, "
- "ia_gid=%d, ia_atime=%s, ia_mtime=%s, "
- "ia_ctime=%s}, *statpost {ia_ino=%"PRIu64", "
- "st_mode=%o, ia_uid=%d, ia_gid=%d, ia_atime=%s,"
- " ia_mtime=%s, ia_ctime=%s})",
- frame->root->unique, op_ret, statpre->ia_ino,
- st_mode_from_ia (statpre->ia_prot, statpre->ia_type),
- statpre->ia_uid,
- statpre->ia_gid, atime_pre, mtime_pre,
- ctime_pre, statpost->ia_ino,
- st_mode_from_ia (statpost->ia_prot, statpost->ia_type),
- statpost->ia_uid, statpost->ia_gid, atime_post,
- mtime_post, ctime_post);
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ TRACE_STAT_TO_STR (statpre, preopstr);
+ TRACE_STAT_TO_STR (statpost, postopstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s})",
+ frame->root->unique, op_ret,
+ preopstr, postopstr);
} else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d)",
+ frame->root->unique, uuid_utoa (frame->local),
+ op_ret, op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno,
- statpre, statpost);
+out:
+ TRACE_STACK_UNWIND (fsetattr, frame, op_ret, op_errno,
+ statpre, statpost, xdata);
return 0;
}
-
int
trace_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- char *preparentstr = NULL;
- char *postparentstr = NULL;
-
- if (trace_fop_names[GF_FOP_UNLINK].enabled) {
- if (op_ret >= 0) {
- preparentstr = trace_stat_to_str (preparent);
- postparentstr = trace_stat_to_str (postparent);
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, *preparent = {%s}, "
- "*postparent = {%s})",
- frame->root->unique, op_ret, preparentstr,
- postparentstr);
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- if (preparentstr)
- GF_FREE (preparentstr);
+ conf = this->private;
- if (postparentstr)
- GF_FREE (postparentstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_UNLINK].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ TRACE_STAT_TO_STR (preparent, preparentstr);
+ TRACE_STAT_TO_STR (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ " *preparent = {%s}, "
+ "*postparent = {%s})",
+ frame->root->unique,
+ uuid_utoa (frame->local),
+ op_ret, preparentstr,
+ postparentstr);
} else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno,
- preparent, postparent);
+out:
+ TRACE_STACK_UNWIND (unlink, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
return 0;
}
-
int
trace_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
- char *statstr = NULL;
- char *preoldparentstr = NULL;
- char *postoldparentstr = NULL;
- char *prenewparentstr = NULL;
- char *postnewparentstr = NULL;
+ char statstr[4096] = {0, };
+ char preoldparentstr[4096] = {0, };
+ char postoldparentstr[4096] = {0, };
+ char prenewparentstr[4096] = {0, };
+ char postnewparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- if (trace_fop_names[GF_FOP_RENAME].enabled) {
- if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- preoldparentstr = trace_stat_to_str (preoldparent);
- postoldparentstr = trace_stat_to_str (postoldparent);
-
- prenewparentstr = trace_stat_to_str (prenewparent);
- postnewparentstr = trace_stat_to_str (postnewparent);
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, *stbuf = {%s}, "
- "*preoldparent = {%s}, *postoldparent = {%s}"
- " *prenewparent = {%s}, *postnewparent = {%s})",
- frame->root->unique, op_ret, statstr,
- preoldparentstr, postoldparentstr,
- prenewparentstr, postnewparentstr);
-
- if (preoldparentstr)
- GF_FREE (preoldparentstr);
-
- if (postoldparentstr)
- GF_FREE (postoldparentstr);
+ conf = this->private;
- if (prenewparentstr)
- GF_FREE (prenewparentstr);
-
- if (postnewparentstr)
- GF_FREE (postnewparentstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_RENAME].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ TRACE_STAT_TO_STR (buf, statstr);
+ TRACE_STAT_TO_STR (preoldparent, preoldparentstr);
+ TRACE_STAT_TO_STR (postoldparent, postoldparentstr);
+ TRACE_STAT_TO_STR (prenewparent, prenewparentstr);
+ TRACE_STAT_TO_STR (postnewparent, postnewparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*stbuf = {%s}, *preoldparent = {%s},"
+ " *postoldparent = {%s}"
+ " *prenewparent = {%s}, "
+ "*postnewparent = {%s})",
+ frame->root->unique, op_ret, statstr,
+ preoldparentstr, postoldparentstr,
+ prenewparentstr, postnewparentstr);
} else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local),
+ op_ret, op_errno);
+
}
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d, buf {ia_ino=%"PRIu64"})",
- frame->root->unique, op_ret, op_errno,
- (buf? buf->ia_ino : 0));
+ LOG_ELEMENT (conf, string);
}
-
- STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf,
- preoldparent, postoldparent,
- prenewparent, postnewparent);
+out:
+ TRACE_STACK_UNWIND (rename, frame, op_ret, op_errno, buf,
+ preoldparent, postoldparent,
+ prenewparent, postnewparent, xdata);
return 0;
}
-
int
trace_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- const char *buf, struct iatt *stbuf)
+ const char *buf, struct iatt *stbuf, dict_t *xdata)
{
- char *statstr = NULL;
+ char statstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- if (trace_fop_names[GF_FOP_READLINK].enabled) {
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_READLINK].enabled) {
+ char string[4096] = {0,};
if (op_ret == 0) {
- statstr = trace_stat_to_str (stbuf);
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d, buf=%s, "
- "stbuf = { %s })",
- frame->root->unique, op_ret, op_errno, buf,
- statstr);
- } else
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d",
- frame->root->unique, op_ret, op_errno);
+ TRACE_STAT_TO_STR (stbuf, statstr);
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, op_errno=%d,"
+ "buf=%s, stbuf = { %s })",
+ frame->root->unique, op_ret, op_errno,
+ buf, statstr);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
+ }
- if (statstr)
- GF_FREE (statstr);
+ LOG_ELEMENT (conf, string);
}
-
- STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, buf, stbuf);
+out:
+ TRACE_STACK_UNWIND (readlink, frame, op_ret, op_errno, buf, stbuf,
+ xdata);
return 0;
}
-
int
trace_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- dict_t *xattr, struct iatt *postparent)
+ dict_t *xdata, struct iatt *postparent)
{
- char *statstr = NULL;
- char *postparentstr = NULL;
+ char statstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_LOOKUP].enabled) {
- if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- postparentstr = trace_stat_to_str (postparent);
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, ino=%"PRIu64", "
- "*buf {%s}, *postparent {%s}",
- frame->root->unique, op_ret, inode->ino,
- statstr, postparentstr);
-
- if (statstr)
- GF_FREE (statstr);
- if (postparentstr)
- GF_FREE (postparentstr);
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ TRACE_STAT_TO_STR (buf, statstr);
+ TRACE_STAT_TO_STR (postparent, postparentstr);
+ /* print buf->ia_gfid instead of inode->gfid,
+ * since if the inode is not yet linked to the
+ * inode table (fresh lookup) then null gfid
+ * will be printed.
+ */
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s (op_ret=%d "
+ "*buf {%s}, *postparent {%s}",
+ frame->root->unique,
+ uuid_utoa (buf->ia_gfid),
+ op_ret, statstr, postparentstr);
+
+ /* For 'forget' */
+ inode_ctx_put (inode, this, 0);
} else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf,
- xattr, postparent);
+out:
+ TRACE_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf,
+ xdata, postparent);
return 0;
}
-
int
trace_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- char *statstr = NULL;
- char *preparentstr = NULL;
- char *postparentstr = NULL;
-
- if (trace_fop_names[GF_FOP_SYMLINK].enabled) {
- if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- preparentstr = trace_stat_to_str (preparent);
- postparentstr = trace_stat_to_str (postparent);
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, ino=%"PRIu64", "
- "*stbuf = {%s}, *preparent = {%s}, "
- "*postparent = {%s})",
- frame->root->unique, op_ret, inode->ino,
- statstr, preparentstr, postparentstr);
-
- if (statstr)
- GF_FREE (statstr);
+ char statstr[4096] = {0, };
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- if (preparentstr)
- GF_FREE (preparentstr);
-
- if (postparentstr)
- GF_FREE (postparentstr);
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_SYMLINK].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ TRACE_STAT_TO_STR (buf, statstr);
+ TRACE_STAT_TO_STR (preparent, preparentstr);
+ TRACE_STAT_TO_STR (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s (op_ret=%d "
+ "*stbuf = {%s}, *preparent = {%s}, "
+ "*postparent = {%s})",
+ frame->root->unique,
+ uuid_utoa (inode->gfid),
+ op_ret, statstr, preparentstr,
+ postparentstr);
} else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": op_ret=%d, op_errno=%d",
+ frame->root->unique, op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+out:
+ TRACE_STACK_UNWIND (symlink, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
return 0;
}
-
int
trace_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- char *statstr = NULL;
- char *preparentstr = NULL;
- char *postparentstr = NULL;
-
- if (trace_fop_names[GF_FOP_MKNOD].enabled) {
- if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- preparentstr = trace_stat_to_str (preparent);
- postparentstr = trace_stat_to_str (postparent);
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, ino=%"PRIu64", "
- "*stbuf = {%s}, *preparent = {%s}, "
- "*postparent = {%s})",
- frame->root->unique, op_ret, inode->ino,
- statstr, preparentstr, postparentstr);
-
- if (statstr)
- GF_FREE (statstr);
+ char statstr[4096] = {0, };
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- if (preparentstr)
- GF_FREE (preparentstr);
+ conf = this->private;
- if (postparentstr)
- GF_FREE (postparentstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ char string[4096] = {0,};
+ if (trace_fop_names[GF_FOP_MKNOD].enabled) {
+ if (op_ret == 0) {
+ TRACE_STAT_TO_STR (buf, statstr);
+ TRACE_STAT_TO_STR (preparent, preparentstr);
+ TRACE_STAT_TO_STR (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s (op_ret=%d "
+ "*stbuf = {%s}, *preparent = {%s}, "
+ "*postparent = {%s})",
+ frame->root->unique,
+ uuid_utoa (inode->gfid),
+ op_ret, statstr, preparentstr,
+ postparentstr);
} else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+out:
+ TRACE_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
return 0;
}
-
int
trace_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- char *statstr = NULL;
- char *preparentstr = NULL;
- char *postparentstr = NULL;
-
- if (trace_fop_names[GF_FOP_MKDIR].enabled) {
- if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- preparentstr = trace_stat_to_str (preparent);
- preparentstr = trace_stat_to_str (postparent);
+ char statstr[4096] = {0, };
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, ino = %"PRIu64
- ", *stbuf = {%s}, *prebuf = {%s}, "
- "*postbuf = {%s} )",
- frame->root->unique, op_ret, buf->ia_ino,
- statstr, preparentstr, postparentstr);
+ conf = this->private;
- if (statstr)
- GF_FREE (statstr);
-
- if (preparentstr)
- GF_FREE (preparentstr);
-
- if (postparentstr)
- GF_FREE (postparentstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_MKDIR].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ TRACE_STAT_TO_STR (buf, statstr);
+ TRACE_STAT_TO_STR (preparent, preparentstr);
+ TRACE_STAT_TO_STR (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s (op_ret=%d "
+ ", *stbuf = {%s}, *prebuf = {%s}, "
+ "*postbuf = {%s} )",
+ frame->root->unique,
+ uuid_utoa (inode->gfid),
+ op_ret, statstr, preparentstr,
+ postparentstr);
} else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+out:
+ TRACE_STACK_UNWIND (mkdir, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
return 0;
}
-
int
trace_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- char *statstr = NULL;
- char *preparentstr = NULL;
- char *postparentstr = NULL;
-
- if (trace_fop_names[GF_FOP_LINK].enabled) {
- if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- preparentstr = trace_stat_to_str (preparent);
- postparentstr = trace_stat_to_str (postparent);
+ char statstr[4096] = {0, };
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, ino = %"PRIu64
- ", *stbuf = {%s}, *prebuf = {%s}, "
- "*postbuf = {%s})",
- frame->root->unique, op_ret, buf->ia_ino,
- statstr, preparentstr, postparentstr);
+ conf = this->private;
- if (statstr)
- GF_FREE (statstr);
-
- if (preparentstr)
- GF_FREE (preparentstr);
-
- if (postparentstr)
- GF_FREE (postparentstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ char string[4096] = {0,};
+ if (trace_fop_names[GF_FOP_LINK].enabled) {
+ if (op_ret == 0) {
+ TRACE_STAT_TO_STR (buf, statstr);
+ TRACE_STAT_TO_STR (preparent, preparentstr);
+ TRACE_STAT_TO_STR (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*stbuf = {%s}, *prebuf = {%s},"
+ " *postbuf = {%s})",
+ frame->root->unique, op_ret,
+ statstr, preparentstr, postparentstr);
} else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local),
+ op_ret, op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+out:
+ TRACE_STACK_UNWIND (link, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
return 0;
}
-
int
trace_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ char string[4096] = {0,};
if (trace_fop_names[GF_FOP_FLUSH].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique, uuid_utoa (frame->local),
+ op_ret, op_errno);
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (flush, frame, op_ret, op_errno, xdata);
return 0;
}
-
int
trace_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ char string[4096] = {0,};
if (trace_fop_names[GF_FOP_OPENDIR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d, fd=%p)",
- frame->root->unique, op_ret, op_errno, fd);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d,"
+ " fd=%p",
+ frame->root->unique, uuid_utoa (frame->local),
+ op_ret, op_errno, fd);
+
+ LOG_ELEMENT (conf, string);
}
+out:
+ /* for 'releasedir' log */
+ if (op_ret >= 0)
+ fd_ctx_set (fd, this, 0);
- STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd);
+ TRACE_STACK_UNWIND (opendir, frame, op_ret, op_errno, fd, xdata);
return 0;
}
-
int
trace_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- char *preparentstr = NULL;
- char *postparentstr = NULL;
-
- if (trace_fop_names[GF_FOP_RMDIR].enabled) {
- if (op_ret >= 0) {
- preparentstr = trace_stat_to_str (preparent);
- postparentstr = trace_stat_to_str (postparent);
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, *prebuf = {%s}, "
- "*postbuf = {%s}",
- frame->root->unique, op_ret, preparentstr,
- postparentstr);
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- if (preparentstr)
- GF_FREE (preparentstr);
+ conf = this->private;
- if (postparentstr)
- GF_FREE (postparentstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_RMDIR].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ TRACE_STAT_TO_STR (preparent, preparentstr);
+ TRACE_STAT_TO_STR (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "*prebuf={%s}, *postbuf={%s}",
+ frame->root->unique,
+ uuid_utoa (frame->local),
+ op_ret, preparentstr, postparentstr);
} else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local),
+ op_ret, op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno,
- preparent, postparent);
+out:
+ TRACE_STACK_UNWIND (rmdir, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
return 0;
}
-
int
trace_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
- char *preopstr = NULL;
- char *postopstr = NULL;
+ char preopstr[4096] = {0, };
+ char postopstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- if (trace_fop_names[GF_FOP_TRUNCATE].enabled) {
- if (op_ret >= 0) {
- preopstr = trace_stat_to_str (prebuf);
- postopstr = trace_stat_to_str (postbuf);
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, *prebuf = {%s}, "
- "*postbuf = {%s} )",
- frame->root->unique, op_ret, preopstr,
- postopstr);
-
- if (preopstr)
- GF_FREE (preopstr);
+ conf = this->private;
- if (postopstr)
- GF_FREE (postopstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_TRUNCATE].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ TRACE_STAT_TO_STR (prebuf, preopstr);
+ TRACE_STAT_TO_STR (postbuf, postopstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s} )",
+ frame->root->unique, op_ret,
+ preopstr, postopstr);
} else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf, postbuf);
+out:
+ TRACE_STACK_UNWIND (truncate, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
return 0;
}
-
int
trace_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf)
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_STATFS].enabled) {
- if (op_ret >= 0) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": ({f_bsize=%lu, f_frsize=%lu, f_blocks=%"GF_PRI_FSBLK
- ", f_bfree=%"GF_PRI_FSBLK", f_bavail=%"GF_PRI_FSBLK", "
- "f_files=%"GF_PRI_FSBLK", f_ffree=%"GF_PRI_FSBLK", f_favail=%"
- GF_PRI_FSBLK", f_fsid=%lu, f_flag=%lu, f_namemax=%lu}) => ret=%d",
- frame->root->unique, buf->f_bsize, buf->f_frsize, buf->f_blocks,
- buf->f_bfree, buf->f_bavail, buf->f_files, buf->f_ffree,
- buf->f_favail, buf->f_fsid, buf->f_flag, buf->f_namemax, op_ret);
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ snprintf (string, sizeof (string),
+ "%"PRId64": ({f_bsize=%lu, "
+ "f_frsize=%lu, "
+ "f_blocks=%"GF_PRI_FSBLK
+ ", f_bfree=%"GF_PRI_FSBLK", "
+ "f_bavail=%"GF_PRI_FSBLK", "
+ "f_files=%"GF_PRI_FSBLK", "
+ "f_ffree=%"GF_PRI_FSBLK", "
+ "f_favail=%"GF_PRI_FSBLK", "
+ "f_fsid=%lu, f_flag=%lu, "
+ "f_namemax=%lu}) => ret=%d",
+ frame->root->unique, buf->f_bsize,
+ buf->f_frsize, buf->f_blocks,
+ buf->f_bfree, buf->f_bavail,
+ buf->f_files, buf->f_ffree,
+ buf->f_favail, buf->f_fsid,
+ buf->f_flag, buf->f_namemax, op_ret);
} else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "op_errno=%d)",
+ frame->root->unique, op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf);
+out:
+ TRACE_STACK_UNWIND (statfs, frame, op_ret, op_errno, buf, xdata);
return 0;
}
-
int
trace_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_SETXATTR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
- STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
return 0;
}
-
int
trace_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_GETXATTR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d, dict=%p)",
- frame->root->unique, op_ret, op_errno, dict);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d,"
+ " dict=%p", frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno,
+ dict);
+
+ LOG_ELEMENT (conf, string);
}
+out:
+ TRACE_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
+
+ return 0;
+}
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict);
+int
+trace_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FSETXATTR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, xdata);
return 0;
}
+int
+trace_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FGETXATTR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d,"
+ " dict=%p", frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno,
+ dict);
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, dict, xdata);
+
+ return 0;
+}
int
trace_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_REMOVEXATTR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
- STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (removexattr, frame, op_ret, op_errno, xdata);
return 0;
}
-
int
trace_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FSYNCDIR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
- STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (fsyncdir, frame, op_ret, op_errno, xdata);
return 0;
}
-
int
trace_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_ACCESS].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)", frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
- STACK_UNWIND_STRICT (access, frame, op_ret, op_errno);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (access, frame, op_ret, op_errno, xdata);
return 0;
}
-
int
trace_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
- char *prebufstr = NULL;
- char *postbufstr = NULL;
-
- if (trace_fop_names[GF_FOP_FTRUNCATE].enabled) {
- if (op_ret >= 0) {
- prebufstr = trace_stat_to_str (prebuf);
- postbufstr = trace_stat_to_str (postbuf);
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, *prebuf = {%s}, "
- "*postbuf = {%s} )",
- frame->root->unique, op_ret,
- prebufstr, postbufstr);
-
- if (prebufstr)
- GF_FREE (prebufstr);
+ char prebufstr[4096] = {0, };
+ char postbufstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- if (postbufstr)
- GF_FREE (postbufstr);
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FTRUNCATE].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ TRACE_STAT_TO_STR (prebuf, prebufstr);
+ TRACE_STAT_TO_STR (postbuf, postbufstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s} )",
+ frame->root->unique, op_ret,
+ prebufstr, postbufstr);
} else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf, postbuf);
+out:
+ TRACE_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
return 0;
}
-
int
trace_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- char atime_buf[256];
- char mtime_buf[256];
- char ctime_buf[256];
+ char statstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FSTAT].enabled) {
- if (op_ret >= 0) {
- strftime (atime_buf, 256, "[%b %d %H:%M:%S]",
- localtime ((time_t *)&buf->ia_atime));
- strftime (mtime_buf, 256, "[%b %d %H:%M:%S]",
- localtime ((time_t *)&buf->ia_mtime));
- strftime (ctime_buf, 256, "[%b %d %H:%M:%S]",
- localtime ((time_t *)&buf->ia_ctime));
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, *buf {ia_gen=%"PRIu64", "
- "ia_ino=%"PRIu64", st_mode=%o, ia_nlink=%"GF_PRI_NLINK", "
- "ia_uid=%d, ia_gid=%d, ia_rdev=%"PRIu64", ia_size=%"PRId64", "
- "ia_blksize=%"GF_PRI_BLKSIZE", ia_blocks=%"PRId64", ia_atime=%s, "
- "ia_mtime=%s, ia_ctime=%s})",
- frame->root->unique, op_ret, buf->ia_gen, buf->ia_ino,
- st_mode_from_ia (buf->ia_prot, buf->ia_type),
- buf->ia_nlink, buf->ia_uid, buf->ia_gid,
- buf->ia_rdev, buf->ia_size, buf->ia_blksize,
- buf->ia_blocks, atime_buf, mtime_buf, ctime_buf);
+ char string[4096] = {0.};
+ if (op_ret == 0) {
+ TRACE_STAT_TO_STR (buf, statstr);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d "
+ "buf=%s", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ statstr);
} else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf);
+out:
+ TRACE_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf, xdata);
return 0;
}
-
int
trace_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct flock *lock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_LK].enabled) {
- if (op_ret >= 0) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, {l_type=%d, l_whence=%d, "
- "l_start=%"PRId64", l_len=%"PRId64", l_pid=%u})",
- frame->root->unique, op_ret, lock->l_type, lock->l_whence,
- lock->l_start, lock->l_len, lock->l_pid);
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "{l_type=%d, l_whence=%d, "
+ "l_start=%"PRId64", "
+ "l_len=%"PRId64", l_pid=%u})",
+ frame->root->unique,
+ uuid_utoa (frame->local),
+ op_ret, lock->l_type, lock->l_whence,
+ lock->l_start, lock->l_len,
+ lock->l_pid);
} else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
}
- }
- STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (lk, frame, op_ret, op_errno, lock, xdata);
return 0;
}
-
-
int
trace_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_ENTRYLK].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": op_ret=%d, op_errno=%d",
- frame->root->unique, op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
- STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (entrylk, frame, op_ret, op_errno, xdata);
return 0;
}
+int
+trace_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FENTRYLK].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (fentrylk, frame, op_ret, op_errno, xdata);
+ return 0;
+}
int
trace_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_XATTROP].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
- STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, dict);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (xattrop, frame, op_ret, op_errno, dict, xdata);
return 0;
}
-
int
trace_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FXATTROP].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
- STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, dict);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, dict, xdata);
return 0;
}
-
int
trace_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_INODELK].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": op_ret=%d, op_errno=%d",
- frame->root->unique, op_ret, op_errno);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local),op_ret, op_errno);
+
+ LOG_ELEMENT (conf, string);
}
+out:
+ TRACE_STACK_UNWIND (inodelk, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+trace_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
- STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno);
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FINODELK].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (finodelk, frame, op_ret, op_errno, xdata);
return 0;
}
+int
+trace_rchecksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ uint32_t weak_checksum, uint8_t *strong_checksum,
+ dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_RCHECKSUM].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ TRACE_STACK_UNWIND (rchecksum, frame, op_ret, op_errno, weak_checksum,
+ strong_checksum, xdata);
+
+ return 0;
+}
+
+/* *_cbk section over <----------> fop section start */
int
trace_entrylk (call_frame_t *frame, xlator_t *this,
const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_ENTRYLK].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": volume=%s, (loc= {path=%s, ino=%"PRIu64"} basename=%s, cmd=%s, type=%s)",
- frame->root->unique, volume, loc->path, loc->inode->ino, basename,
- ((cmd == ENTRYLK_LOCK) ? "ENTRYLK_LOCK" : "ENTRYLK_UNLOCK"),
- ((type == ENTRYLK_RDLCK) ? "ENTRYLK_RDLCK" : "ENTRYLK_WRLCK"));
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s volume=%s, (path=%s "
+ "basename=%s, cmd=%s, type=%s)",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid),
+ volume, loc->path, basename,
+ ((cmd == ENTRYLK_LOCK) ? "ENTRYLK_LOCK" :
+ "ENTRYLK_UNLOCK"),
+ ((type == ENTRYLK_RDLCK) ? "ENTRYLK_RDLCK" :
+ "ENTRYLK_WRLCK"));
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_entrylk_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->entrylk,
- volume, loc, basename, cmd, type);
+ volume, loc, basename, cmd, type, xdata);
return 0;
}
-
int
trace_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
- loc_t *loc, int32_t cmd, struct flock *flock)
+ loc_t *loc, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
+ char *cmd_str = NULL;
+ char *type_str = NULL;
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_INODELK].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": volume=%s, (loc {path=%s, ino=%"PRIu64"}, cmd=%s)",
- frame->root->unique, volume, loc->path, loc->inode->ino,
- ((cmd == F_SETLK)? "F_SETLK" : "unknown"));
+ char string[4096] = {0,};
+ switch (cmd) {
+#if F_GETLK != F_GETLK64
+ case F_GETLK64:
+#endif
+ case F_GETLK:
+ cmd_str = "GETLK";
+ break;
+
+#if F_SETLK != F_SETLK64
+ case F_SETLK64:
+#endif
+ case F_SETLK:
+ cmd_str = "SETLK";
+ break;
+
+#if F_SETLKW != F_SETLKW64
+ case F_SETLKW64:
+#endif
+ case F_SETLKW:
+ cmd_str = "SETLKW";
+ break;
+
+ default:
+ cmd_str = "UNKNOWN";
+ break;
+ }
+
+ switch (flock->l_type) {
+ case F_RDLCK:
+ type_str = "READ";
+ break;
+ case F_WRLCK:
+ type_str = "WRITE";
+ break;
+ case F_UNLCK:
+ type_str = "UNLOCK";
+ break;
+ default:
+ type_str = "UNKNOWN";
+ break;
+ }
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s volume=%s, (path=%s "
+ "cmd=%s, type=%s, start=%llu, len=%llu, "
+ "pid=%llu)", frame->root->unique,
+ uuid_utoa (loc->inode->gfid), volume,
+ loc->path, cmd_str, type_str,
+ (unsigned long long)flock->l_start,
+ (unsigned long long) flock->l_len,
+ (unsigned long long) flock->l_pid);
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_inodelk_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->inodelk,
- volume, loc, cmd, flock);
+ volume, loc, cmd, flock, xdata);
return 0;
}
-
int
-trace_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+trace_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
+ char *cmd_str = NULL;
+ char *type_str = NULL;
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FINODELK].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": op_ret=%d, op_errno=%d",
- frame->root->unique, op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ switch (cmd) {
+#if F_GETLK != F_GETLK64
+ case F_GETLK64:
+#endif
+ case F_GETLK:
+ cmd_str = "GETLK";
+ break;
- STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno);
- return 0;
-}
+#if F_SETLK != F_SETLK64
+ case F_SETLK64:
+#endif
+ case F_SETLK:
+ cmd_str = "SETLK";
+ break;
+#if F_SETLKW != F_SETLKW64
+ case F_SETLKW64:
+#endif
+ case F_SETLKW:
+ cmd_str = "SETLKW";
+ break;
-int
-trace_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
- fd_t *fd, int32_t cmd, struct flock *flock)
-{
- if (trace_fop_names[GF_FOP_FINODELK].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": volume=%s, (fd=%p, cmd=%s)",
- frame->root->unique, volume, fd,
- ((cmd == F_SETLK) ? "F_SETLK" : "unknown"));
- }
+ default:
+ cmd_str = "UNKNOWN";
+ break;
+ }
+ switch (flock->l_type) {
+ case F_RDLCK:
+ type_str = "READ";
+ break;
+ case F_WRLCK:
+ type_str = "WRITE";
+ break;
+ case F_UNLCK:
+ type_str = "UNLOCK";
+ break;
+ default:
+ type_str = "UNKNOWN";
+ break;
+ }
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s volume=%s, (fd =%p "
+ "cmd=%s, type=%s, start=%llu, len=%llu, "
+ "pid=%llu)", frame->root->unique,
+ uuid_utoa (fd->inode->gfid), volume, fd,
+ cmd_str, type_str,
+ (unsigned long long) flock->l_start,
+ (unsigned long long) flock->l_len,
+ (unsigned long long) flock->l_pid);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
STACK_WIND (frame, trace_finodelk_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->finodelk,
- volume, fd, cmd, flock);
+ volume, fd, cmd, flock, xdata);
return 0;
}
-
int
trace_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict)
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_XATTROP].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (path=%s, ino=%"PRIu64" flags=%d)",
- frame->root->unique, loc->path, loc->inode->ino, flags);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s (path=%s flags=%d)",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ flags);
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_xattrop_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->xattrop,
- loc, flags, dict);
+ loc, flags, dict, xdata);
return 0;
}
-
int
trace_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t flags, dict_t *dict)
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FXATTROP].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (fd=%p, flags=%d)",
- frame->root->unique, fd, flags);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, flags=%d",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, flags);
+
+ frame->local = fd->inode->gfid;
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_fxattrop_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fxattrop,
- fd, flags, dict);
+ fd, flags, dict, xdata);
return 0;
}
-
int
trace_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr_req)
+ loc_t *loc, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_LOOKUP].enabled) {
+ char string[4096] = {0,};
/* TODO: print all the keys mentioned in xattr_req */
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"})",
- frame->root->unique, loc->path,
- loc->inode->ino);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path);
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_lookup_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lookup,
- loc, xattr_req);
+ loc, xdata);
return 0;
}
-
int
-trace_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
+trace_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_STAT].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"})",
- frame->root->unique, loc->path, loc->inode->ino);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path);
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_stat_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->stat,
- loc);
+ loc, xdata);
return 0;
}
-
int
-trace_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size)
+trace_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_READLINK].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, size=%"GF_PRI_SIZET")",
- frame->root->unique, loc->path, loc->inode->ino, size);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s, "
+ "size=%"GF_PRI_SIZET")", frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ size);
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_readlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readlink,
- loc, size);
+ loc, size, xdata);
return 0;
}
-
int
trace_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
- mode_t mode, dev_t dev)
+ mode_t mode, dev_t dev, mode_t umask, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_MKNOD].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, mode=%d, dev=%"GF_PRI_DEV")",
- frame->root->unique, loc->path, loc->inode->ino, mode, dev);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s mode=%d "
+ "umask=0%o, dev=%"GF_PRI_DEV")",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ mode, umask, dev);
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_mknod_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mknod,
- loc, mode, dev);
+ loc, mode, dev, umask, xdata);
return 0;
}
-
int
-trace_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode)
+trace_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_MKDIR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (path=%s, ino=%"PRIu64", mode=%d)",
- frame->root->unique, loc->path,
- ((loc->inode)? loc->inode->ino : 0), mode);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s mode=%d"
+ " umask=0%o", frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ mode, umask);
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_mkdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mkdir,
- loc, mode);
+ loc, mode, umask, xdata);
return 0;
}
-
int
-trace_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
+trace_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_UNLINK].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"})",
- frame->root->unique, loc->path, loc->inode->ino);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s flag=%d",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ xflag);
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
STACK_WIND (frame, trace_unlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->unlink,
- loc);
+ loc, xflag, xdata);
return 0;
}
-
int
-trace_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc)
+trace_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_RMDIR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"})",
- frame->root->unique, loc->path, loc->inode->ino);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s flags=%d",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ flags);
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_rmdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rmdir,
- loc);
+ loc, flags, xdata);
return 0;
}
-
int
trace_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
- loc_t *loc)
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_SYMLINK].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (linkpath=%s, loc {path=%s, ino=%"PRIu64"})",
- frame->root->unique, linkpath, loc->path,
- ((loc->inode)? loc->inode->ino : 0));
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s linkpath=%s, path=%s"
+ " umask=0%o", frame->root->unique,
+ uuid_utoa (loc->inode->gfid), linkpath,
+ loc->path, umask);
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_symlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->symlink,
- linkpath, loc);
+ linkpath, loc, umask, xdata);
return 0;
}
-
int
-trace_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
+trace_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
+ char oldgfid[50] = {0,};
+ char newgfid[50] = {0,};
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_RENAME].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (oldloc {path=%s, ino=%"PRIu64"}, "
- "newloc{path=%s, ino=%"PRIu64"})",
- frame->root->unique, oldloc->path, oldloc->ino,
- newloc->path, newloc->ino);
+ char string[4096] = {0,};
+ if (newloc->inode)
+ uuid_utoa_r (newloc->inode->gfid, newgfid);
+ else
+ strcpy (newgfid, "0");
+
+ uuid_utoa_r (oldloc->inode->gfid, oldgfid);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": oldgfid=%s oldpath=%s --> "
+ "newgfid=%s newpath=%s",
+ frame->root->unique, oldgfid,
+ oldloc->path, newgfid, newloc->path);
+
+ frame->local = oldloc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_rename_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rename,
- oldloc, newloc);
+ oldloc, newloc, xdata);
return 0;
}
-
int
-trace_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
+trace_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
+ char oldgfid[50] = {0,};
+ char newgfid[50] = {0,};
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_LINK].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (oldloc {path=%s, ino=%"PRIu64"}, "
- "newloc {path=%s, ino=%"PRIu64"})",
- frame->root->unique, oldloc->path, oldloc->inode->ino,
- newloc->path, newloc->inode->ino);
+ char string[4096] = {0,};
+ if (newloc->inode)
+ uuid_utoa_r (newloc->inode->gfid, newgfid);
+ else
+ strcpy (newgfid, "0");
+
+ uuid_utoa_r (oldloc->inode->gfid, oldgfid);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": oldgfid=%s oldpath=%s --> "
+ "newgfid=%s newpath=%s", frame->root->unique,
+ oldgfid, oldloc->path, newgfid,
+ newloc->path);
+
+ frame->local = oldloc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_link_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->link,
- oldloc, newloc);
+ oldloc, newloc, xdata);
return 0;
}
-
int
trace_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- char actime_str[256] = {0,};
- char modtime_str[256] = {0,};
+ char actime_str[256] = {0,};
+ char modtime_str[256] = {0,};
+ trace_conf_t *conf = NULL;
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_SETATTR].enabled) {
+ char string[4096] = {0,};
if (valid & GF_SET_ATTR_MODE) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"},"
- " mode=%o)", frame->root->unique, loc->path,
- loc->inode->ino,
- st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type));
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s mode=%o)",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid),
+ loc->path,
+ st_mode_from_ia (stbuf->ia_prot,
+ stbuf->ia_type));
+
+ LOG_ELEMENT (conf, string);
+ memset (string, 0 , sizeof (string));
}
if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"},"
- " uid=%o, gid=%o)",
- frame->root->unique, loc->path, loc->inode->ino,
- stbuf->ia_uid, stbuf->ia_gid);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s uid=%o,"
+ " gid=%o", frame->root->unique,
+ uuid_utoa (loc->inode->gfid),
+ loc->path, stbuf->ia_uid,
+ stbuf->ia_gid);
+
+ LOG_ELEMENT (conf, string);
+ memset (string, 0 , sizeof (string));
}
if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) {
- strftime (actime_str, 256, "[%b %d %H:%M:%S]",
- localtime ((time_t *)&stbuf->ia_atime));
- strftime (modtime_str, 256, "[%b %d %H:%M:%S]",
- localtime ((time_t *)&stbuf->ia_mtime));
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, "
- "*stbuf=%p {ia_atime=%s, ia_mtime=%s})",
- frame->root->unique, loc->path, loc->inode->ino,
- stbuf, actime_str, modtime_str);
+ gf_time_fmt (actime_str, sizeof actime_str,
+ stbuf->ia_atime, gf_timefmt_bdT);
+
+ gf_time_fmt (modtime_str, sizeof modtime_str,
+ stbuf->ia_mtime, gf_timefmt_bdT);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s "
+ "ia_atime=%s, ia_mtime=%s",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid),
+ loc->path, actime_str, modtime_str);
+
+ LOG_ELEMENT (conf, string);
+ memset (string, 0 , sizeof (string));
}
+ frame->local = loc->inode->gfid;
}
+out:
STACK_WIND (frame, trace_setattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setattr,
- loc, stbuf, valid);
+ loc, stbuf, valid, xdata);
return 0;
}
-
int
trace_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- char actime_str[256] = {0,};
- char modtime_str[256] = {0,};
+ char actime_str[256] = {0,};
+ char modtime_str[256] = {0,};
+ trace_conf_t *conf = NULL;
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FSETATTR].enabled) {
+ char string[4096] = {0,};
if (valid & GF_SET_ATTR_MODE) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (*fd=%p, mode=%o)",
- frame->root->unique, fd,
- st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type));
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, mode=%o",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd,
+ st_mode_from_ia (stbuf->ia_prot,
+ stbuf->ia_type));
+
+ LOG_ELEMENT (conf, string);
+ memset (string, 0, sizeof (string));
}
if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (*fd=%p, uid=%o, gid=%o)",
- frame->root->unique, fd,
- stbuf->ia_uid, stbuf->ia_gid);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, uid=%o, "
+ "gid=%o", frame->root->unique,
+ uuid_utoa (fd->inode->gfid),
+ fd, stbuf->ia_uid, stbuf->ia_gid);
+
+ LOG_ELEMENT (conf, string);
+ memset (string, 0, sizeof (string));
}
if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) {
- strftime (actime_str, 256, "[%b %d %H:%M:%S]",
- localtime ((time_t *)&stbuf->ia_atime));
- strftime (modtime_str, 256, "[%b %d %H:%M:%S]",
- localtime ((time_t *)&stbuf->ia_mtime));
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (*fd=%p"
- "*stbuf=%p {ia_atime=%s, ia_mtime=%s})",
- frame->root->unique, fd, stbuf, actime_str,
- modtime_str);
+ gf_time_fmt (actime_str, sizeof actime_str,
+ stbuf->ia_atime, gf_timefmt_bdT);
+
+ gf_time_fmt (modtime_str, sizeof modtime_str,
+ stbuf->ia_mtime, gf_timefmt_bdT);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p "
+ "ia_atime=%s, ia_mtime=%s",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid),
+ fd, actime_str, modtime_str);
+
+ LOG_ELEMENT (conf, string);
+ memset (string, 0, sizeof (string));
}
+ frame->local = fd->inode->gfid;
}
+out:
STACK_WIND (frame, trace_fsetattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsetattr,
- fd, stbuf, valid);
+ fd, stbuf, valid, xdata);
return 0;
}
-
int
trace_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc,
- off_t offset)
+ off_t offset, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_TRUNCATE].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, offset=%"PRId64")",
- frame->root->unique, loc->path, loc->inode->ino, offset);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s, "
+ "offset=%"PRId64"", frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ offset);
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_truncate_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->truncate,
- loc, offset);
+ loc, offset, xdata);
return 0;
}
-
int
trace_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, fd_t *fd, int32_t wbflags)
+ int32_t flags, fd_t *fd, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_OPEN].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, flags=%d, "
- "fd=%p, wbflags=%d)",
- frame->root->unique, loc->path, loc->inode->ino, flags,
- fd, wbflags);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s flags=%d fd=%p",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ flags, fd);
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_open_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->open,
- loc, flags, fd, wbflags);
+ loc, flags, fd, xdata);
return 0;
}
-
int
trace_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, mode_t mode, fd_t *fd)
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_CREATE].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, flags=0%o mode=0%o)",
- frame->root->unique, loc->path, loc->inode->ino, flags, mode);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s, fd=%p, "
+ "flags=0%o mode=0%o umask=0%o",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ fd, flags, mode, umask);
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_create_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->create,
- loc, flags, mode, fd);
+ loc, flags, mode, umask, fd, xdata);
return 0;
}
-
int
trace_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t offset)
+ size_t size, off_t offset, uint32_t flags, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_READ].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (*fd=%p, size=%"GF_PRI_SIZET", offset=%"PRId64")",
- frame->root->unique, fd, size, offset);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, size=%"
+ GF_PRI_SIZET"offset=%"PRId64" flags=0%x)",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, size,
+ offset, flags);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_readv_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readv,
- fd, size, offset);
+ fd, size, offset, flags, xdata);
return 0;
}
-
int
trace_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int32_t count,
- off_t offset, struct iobref *iobref)
+ off_t offset, uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+ int i = 0;
+ size_t total_size = 0;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_WRITE].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (*fd=%p, *vector=%p, count=%d, offset=%"PRId64")",
- frame->root->unique, fd, vector, count, offset);
+ char string[4096] = {0,};
+ for (i = 0; i < count; i++)
+ total_size += vector[i].iov_len;
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, count=%d, "
+ " offset=%"PRId64" flags=0%x write_size=%zu",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, count,
+ offset, flags, total_size);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_writev_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->writev,
- fd, vector, count, offset, iobref);
+ fd, vector, count, offset, flags, iobref, xdata);
return 0;
}
-
int
-trace_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
+trace_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_STATFS].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"})",
- frame->root->unique, loc->path,
- ((loc->inode)? loc->inode->ino : 0));
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s",
+ frame->root->unique, (loc->inode)?
+ uuid_utoa (loc->inode->gfid):"0", loc->path);
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_statfs_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->statfs,
- loc);
+ loc, xdata);
return 0;
}
-
int
-trace_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
+trace_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FLUSH].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (*fd=%p)",
- frame->root->unique, fd);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_flush_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->flush,
- fd);
+ fd, xdata);
return 0;
}
-
int
-trace_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
+trace_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FSYNC].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (flags=%d, *fd=%p)",
- frame->root->unique, flags, fd);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s flags=%d fd=%p",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), flags, fd);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_fsync_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsync,
- fd, flags);
+ fd, flags, xdata);
return 0;
}
-
int
trace_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *dict, int32_t flags)
+ loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_SETXATTR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, dict=%p, flags=%d)",
- frame->root->unique, loc->path,
- ((loc->inode)? loc->inode->ino : 0), dict, flags);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s flags=%d",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ flags);
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_setxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setxattr,
- loc, dict, flags);
+ loc, dict, flags, xdata);
return 0;
}
-
int
trace_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
+ loc_t *loc, const char *name, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_GETXATTR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"}), name=%s",
- frame->root->unique, loc->path,
- ((loc->inode)? loc->inode->ino : 0), name);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s name=%s",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ name);
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_getxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->getxattr,
- loc, name);
+ loc, name, xdata);
return 0;
}
-
int
trace_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
+ loc_t *loc, const char *name, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_REMOVEXATTR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, name=%s)",
- frame->root->unique, loc->path,
- ((loc->inode)? loc->inode->ino : 0), name);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s name=%s",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ name);
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_removexattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->removexattr,
- loc, name);
+ loc, name, xdata);
return 0;
}
-
int
-trace_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
+trace_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_OPENDIR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64":( loc {path=%s, ino=%"PRIu64"}, fd=%p)",
- frame->root->unique, loc->path, loc->inode->ino, fd);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s fd=%p",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path, fd);
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_opendir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->opendir,
- loc, fd);
+ loc, fd, xdata);
return 0;
}
int
trace_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+ off_t offset, dict_t *dict)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_READDIRP].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (fd=%p, size=%"GF_PRI_SIZET", offset=%"PRId64")",
- frame->root->unique, fd, size, offset);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, size=%"GF_PRI_SIZET
+ ", offset=%"PRId64" dict=%p",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, size,
+ offset, dict);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_readdirp_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdirp,
- fd, size, offset);
+ fd, size, offset, dict);
return 0;
}
-
int
trace_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t offset)
+ size_t size, off_t offset, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_READDIR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (fd=%p, size=%"GF_PRI_SIZET", offset=%"PRId64")",
- frame->root->unique, fd, size, offset);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, size=%"GF_PRI_SIZET
+ ", offset=%"PRId64,
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, size,
+ offset);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_readdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdir,
- fd, size, offset);
+ fd, size, offset, xdata);
return 0;
}
-
int
trace_fsyncdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t datasync)
+ fd_t *fd, int32_t datasync, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FSYNCDIR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (datasync=%d, *fd=%p)",
- frame->root->unique, datasync, fd);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s datasync=%d fd=%p",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), datasync, fd);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_fsyncdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsyncdir,
- fd, datasync);
+ fd, datasync, xdata);
return 0;
}
-
int
-trace_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)
+trace_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_ACCESS].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (*loc {path=%s, ino=%"PRIu64"}, mask=0%o)",
- frame->root->unique, loc->path,
- ((loc->inode)? loc->inode->ino : 0), mask);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s mask=0%o",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid),
+ loc->path, mask);
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_access_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->access,
- loc, mask);
+ loc, mask, xdata);
+ return 0;
+}
+
+int32_t
+trace_rchecksum (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ int32_t len, dict_t *xdata)
+{
+
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_RCHECKSUM].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s offset=%"PRId64
+ "len=%u fd=%p", frame->root->unique,
+ uuid_utoa (fd->inode->gfid), offset, len, fd);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_rchecksum_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rchecksum,
+ fd, offset, len, xdata);
+
return 0;
+
}
+int32_t
+trace_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FENTRYLK].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s volume=%s, (fd=%p "
+ "basename=%s, cmd=%s, type=%s)",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), volume, fd,
+ basename,
+ ((cmd == ENTRYLK_LOCK) ? "ENTRYLK_LOCK" :
+ "ENTRYLK_UNLOCK"),
+ ((type == ENTRYLK_RDLCK) ? "ENTRYLK_RDLCK" :
+ "ENTRYLK_WRLCK"));
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_fentrylk_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fentrylk,
+ volume, fd, basename, cmd, type, xdata);
+ return 0;
+
+}
+
+int32_t
+trace_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FGETXATTR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p name=%s",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, name);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_fgetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ fd, name, xdata);
+ return 0;
+}
+
+int32_t
+trace_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FSETXATTR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p flags=%d",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, flags);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_fsetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ fd, dict, flags, xdata);
+ return 0;
+}
int
trace_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset)
+ fd_t *fd, off_t offset, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FTRUNCATE].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (offset=%"PRId64", *fd=%p)",
- frame->root->unique, offset, fd);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s offset=%"PRId64" fd=%p",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), offset, fd);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_ftruncate_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->ftruncate,
- fd, offset);
+ fd, offset, xdata);
return 0;
}
-
int
-trace_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)
+trace_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FSTAT].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (*fd=%p)",
- frame->root->unique, fd);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_fstat_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fstat,
- fd);
+ fd, xdata);
return 0;
}
-
int
trace_lk (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t cmd, struct flock *lock)
+ int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_LK].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (*fd=%p, cmd=%d, lock {l_type=%d, l_whence=%d, "
- "l_start=%"PRId64", l_len=%"PRId64", l_pid=%u})",
- frame->root->unique, fd, cmd, lock->l_type, lock->l_whence,
- lock->l_start, lock->l_len, lock->l_pid);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, cmd=%d, "
+ "lock {l_type=%d, "
+ "l_whence=%d, l_start=%"PRId64", "
+ "l_len=%"PRId64", l_pid=%u})",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, cmd,
+ lock->l_type, lock->l_whence,
+ lock->l_start, lock->l_len, lock->l_pid);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_lk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lk,
- fd, cmd, lock);
+ fd, cmd, lock, xdata);
return 0;
}
-
-int
-trace_checksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- uint8_t *fchecksum, uint8_t *dchecksum)
+int32_t
+trace_forget (xlator_t *this, inode_t *inode)
{
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": op_ret (%d), op_errno(%d)",
- frame->root->unique, op_ret, op_errno);
+ trace_conf_t *conf = NULL;
- STACK_UNWIND_STRICT (checksum, frame, op_ret, op_errno,
- fchecksum, dchecksum);
+ conf = this->private;
+ /* If user want to understand when a lookup happens,
+ he should know about 'forget' too */
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_LOOKUP].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "gfid=%s", uuid_utoa (inode->gfid));
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
return 0;
}
+int32_t
+trace_releasedir (xlator_t *this, fd_t *fd)
+{
+ trace_conf_t *conf = NULL;
-int
-trace_checksum (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flag)
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_OPENDIR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "gfid=%s fd=%p",
+ uuid_utoa (fd->inode->gfid), fd);
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ return 0;
+}
+
+int32_t
+trace_release (xlator_t *this, fd_t *fd)
{
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": loc->path (%s) flag (%d)",
- frame->root->unique, loc->path, flag);
+ trace_conf_t *conf = NULL;
- STACK_WIND (frame, trace_checksum_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->checksum,
- loc, flag);
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_OPEN].enabled ||
+ trace_fop_names[GF_FOP_CREATE].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "gfid=%s fd=%p",
+ uuid_utoa (fd->inode->gfid), fd);
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
return 0;
}
@@ -1943,7 +2958,6 @@ enable_all_calls (int enabled)
trace_fop_names[i].enabled = enabled;
}
-
void
enable_call (const char *name, int enabled)
{
@@ -1971,6 +2985,105 @@ process_call_list (const char *list, int include)
}
}
+int32_t
+trace_dump_history (xlator_t *this)
+{
+ int ret = -1;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0,};
+ trace_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO ("trace", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->history, out);
+
+ conf = this->private;
+ // Is it ok to return silently if log-history option his off?
+ if (conf && conf->log_history == _gf_true) {
+ gf_proc_dump_build_key (key_prefix, "xlator.debug.trace",
+ "history");
+ gf_proc_dump_add_section (key_prefix);
+ eh_dump (this->history, NULL, dump_history_trace);
+ }
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_trace_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ " failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ int32_t ret = -1;
+ trace_conf_t *conf = NULL;
+ char *includes = NULL, *excludes = NULL;
+
+ GF_VALIDATE_OR_GOTO ("quick-read", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO (this->name, options, out);
+
+ conf = this->private;
+
+ includes = data_to_str (dict_get (options, "include-ops"));
+ excludes = data_to_str (dict_get (options, "exclude-ops"));
+
+ {
+ int i;
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ if (gf_fop_list[i])
+ strncpy (trace_fop_names[i].name,
+ gf_fop_list[i],
+ strlen (gf_fop_list[i]));
+ else
+ strncpy (trace_fop_names[i].name, ":O",
+ strlen (":O"));
+ trace_fop_names[i].enabled = 1;
+ }
+ }
+
+ if (includes && excludes) {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "must specify only one of 'include-ops' and "
+ "'exclude-ops'");
+ goto out;
+ }
+
+ if (includes)
+ process_call_list (includes, 1);
+ if (excludes)
+ process_call_list (excludes, 0);
+
+ /* Should resizing of the event-history be allowed in reconfigure?
+ * for which a new event_history might have to be allocated and the
+ * older history has to be freed.
+ */
+ GF_OPTION_RECONF ("log-file", conf->log_file, options, bool, out);
+
+ GF_OPTION_RECONF ("log-history", conf->log_history, options, bool, out);
+
+ ret = 0;
+
+out:
+ return ret;
+}
int32_t
init (xlator_t *this)
@@ -1978,6 +3091,10 @@ init (xlator_t *this)
dict_t *options = NULL;
char *includes = NULL, *excludes = NULL;
char *forced_loglevel = NULL;
+ eh_t *history = NULL;
+ int ret = -1;
+ size_t history_size = TRACE_DEFAULT_HISTORY_SIZE;
+ trace_conf_t *conf = NULL;
if (!this)
return -1;
@@ -1992,6 +3109,12 @@ init (xlator_t *this)
"dangling volume. check volfile ");
}
+ conf = GF_CALLOC (1, sizeof (trace_conf_t), gf_trace_mt_trace_conf_t);
+ if (!conf) {
+ gf_log (this->name, GF_LOG_ERROR, "cannot allocate "
+ "xl->private");
+ return -1;
+ }
options = this->options;
includes = data_to_str (dict_get (options, "include-ops"));
@@ -2000,8 +3123,13 @@ init (xlator_t *this)
{
int i;
for (i = 0; i < GF_FOP_MAXVALUE; i++) {
- trace_fop_names[i].name = (gf_fop_list[i] ?
- gf_fop_list[i] : ":O");
+ if (gf_fop_list[i])
+ strncpy (trace_fop_names[i].name,
+ gf_fop_list[i],
+ strlen (gf_fop_list[i]));
+ else
+ strncpy (trace_fop_names[i].name, ":O",
+ strlen (":O"));
trace_fop_names[i].enabled = 1;
}
}
@@ -2009,40 +3137,78 @@ init (xlator_t *this)
if (includes && excludes) {
gf_log (this->name,
GF_LOG_ERROR,
- "must specify only one of 'include-ops' and 'exclude-ops'");
+ "must specify only one of 'include-ops' and "
+ "'exclude-ops'");
return -1;
}
+
if (includes)
process_call_list (includes, 1);
if (excludes)
process_call_list (excludes, 0);
+
+ GF_OPTION_INIT ("history-size", conf->history_size, size, out);
+
+ gf_log (this->name, GF_LOG_INFO, "history size %"GF_PRI_SIZET,
+ history_size);
+
+ GF_OPTION_INIT ("log-file", conf->log_file, bool, out);
+
+ gf_log (this->name, GF_LOG_INFO, "logging to file %s",
+ (conf->log_file == _gf_true)?"enabled":"disabled");
+
+ GF_OPTION_INIT ("log-history", conf->log_history, bool, out);
+
+ gf_log (this->name, GF_LOG_DEBUG, "logging to history %s",
+ (conf->log_history == _gf_true)?"enabled":"disabled");
+
+ history = eh_new (history_size, _gf_false, NULL);
+ if (!history) {
+ gf_log (this->name, GF_LOG_ERROR, "event history cannot be "
+ "initialized");
+ return -1;
+ }
+
+ this->history = history;
+
+ conf->trace_log_level = GF_LOG_INFO;
+
if (dict_get (options, "force-log-level")) {
forced_loglevel = data_to_str (dict_get (options,
- "force-log-level"));
+ "force-log-level"));
if (!forced_loglevel)
goto setloglevel;
- if (strcmp (forced_loglevel, "NORMAL") == 0)
- trace_log_level = GF_LOG_NORMAL;
+ if (strcmp (forced_loglevel, "INFO") == 0)
+ conf->trace_log_level = GF_LOG_INFO;
else if (strcmp (forced_loglevel, "TRACE") == 0)
- trace_log_level = GF_LOG_TRACE;
+ conf->trace_log_level = GF_LOG_TRACE;
else if (strcmp (forced_loglevel, "ERROR") == 0)
- trace_log_level = GF_LOG_ERROR;
+ conf->trace_log_level = GF_LOG_ERROR;
else if (strcmp (forced_loglevel, "DEBUG") == 0)
- trace_log_level = GF_LOG_DEBUG;
+ conf->trace_log_level = GF_LOG_DEBUG;
else if (strcmp (forced_loglevel, "WARNING") == 0)
- trace_log_level = GF_LOG_WARNING;
+ conf->trace_log_level = GF_LOG_WARNING;
else if (strcmp (forced_loglevel, "CRITICAL") == 0)
- trace_log_level = GF_LOG_CRITICAL;
+ conf->trace_log_level = GF_LOG_CRITICAL;
else if (strcmp (forced_loglevel, "NONE") == 0)
- trace_log_level = GF_LOG_NONE;
+ conf->trace_log_level = GF_LOG_NONE;
}
setloglevel:
- gf_log_set_loglevel (trace_log_level);
+ gf_log_set_loglevel (conf->trace_log_level);
+ this->private = conf;
+ ret = 0;
+out:
+ if (ret == -1) {
+ if (history)
+ GF_FREE (history);
+ if (conf)
+ GF_FREE (conf);
+ }
- return 0;
+ return ret;
}
void
@@ -2051,7 +3217,10 @@ fini (xlator_t *this)
if (!this)
return;
- gf_log (this->name, GF_LOG_NORMAL,
+ if (this->history)
+ eh_destroy (this->history);
+
+ gf_log (this->name, GF_LOG_INFO,
"trace translator unloaded");
return;
}
@@ -2075,6 +3244,8 @@ struct xlator_fops fops = {
.fsync = trace_fsync,
.setxattr = trace_setxattr,
.getxattr = trace_getxattr,
+ .fsetxattr = trace_fsetxattr,
+ .fgetxattr = trace_fgetxattr,
.removexattr = trace_removexattr,
.opendir = trace_opendir,
.readdir = trace_readdir,
@@ -2088,16 +3259,19 @@ struct xlator_fops fops = {
.inodelk = trace_inodelk,
.finodelk = trace_finodelk,
.entrylk = trace_entrylk,
+ .fentrylk = trace_fentrylk,
.lookup = trace_lookup,
- .checksum = trace_checksum,
+ .rchecksum = trace_rchecksum,
.xattrop = trace_xattrop,
.fxattrop = trace_fxattrop,
.setattr = trace_setattr,
.fsetattr = trace_fsetattr,
};
-
struct xlator_cbks cbks = {
+ .release = trace_release,
+ .releasedir = trace_releasedir,
+ .forget = trace_forget,
};
struct volume_options options[] = {
@@ -2109,5 +3283,21 @@ struct volume_options options[] = {
.type = GF_OPTION_TYPE_STR
/*.value = { ""} */
},
+ { .key = {"history-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .default_value = "1024",
+ },
+ { .key = {"log-file"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ },
+ { .key = {"log-history"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ },
{ .key = {NULL} },
};
+
+struct xlator_dumpops dumpops = {
+ .history = trace_dump_history
+};
diff --git a/xlators/debug/trace/src/trace.h b/xlators/debug/trace/src/trace.h
new file mode 100644
index 00000000000..3b5f7891d00
--- /dev/null
+++ b/xlators/debug/trace/src/trace.h
@@ -0,0 +1,56 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <time.h>
+#include <errno.h>
+#include "glusterfs.h"
+#include "xlator.h"
+#include "common-utils.h"
+#include "event-history.h"
+#include "logging.h"
+#include "circ-buff.h"
+#include "statedump.h"
+#include "options.h"
+
+#define TRACE_DEFAULT_HISTORY_SIZE 1024
+
+typedef struct {
+ /* Since the longest fop name is fremovexattr i.e 12 characters, array size
+ * is kept 24, i.e double of the maximum.
+ */
+ char name[24];
+ int enabled;
+} trace_fop_name_t;
+
+trace_fop_name_t trace_fop_names[GF_FOP_MAXVALUE];
+
+typedef struct {
+ gf_boolean_t log_file;
+ gf_boolean_t log_history;
+ size_t history_size;
+ int trace_log_level;
+} trace_conf_t;
+
+#define TRACE_STACK_UNWIND(op, frame, params ...) \
+ do { \
+ frame->local = NULL; \
+ STACK_UNWIND_STRICT (op, frame, params); \
+ } while (0);
+
+#define LOG_ELEMENT(_conf, _string) \
+ do { \
+ if (_conf) { \
+ if ((_conf->log_history) == _gf_true) \
+ gf_log_eh ("%s", _string); \
+ if ((_conf->log_file) == _gf_true) \
+ gf_log (THIS->name, _conf->trace_log_level, \
+ "%s", _string); \
+ } \
+ } while (0);
diff --git a/xlators/encryption/Makefile.am b/xlators/encryption/Makefile.am
index 2cbde680fac..36efc6698bd 100644
--- a/xlators/encryption/Makefile.am
+++ b/xlators/encryption/Makefile.am
@@ -1,3 +1,3 @@
-SUBDIRS = rot-13
+SUBDIRS = rot-13 crypt
CLEANFILES =
diff --git a/xlators/protocol/legacy/client/Makefile.am b/xlators/encryption/crypt/Makefile.am
index d471a3f9243..d471a3f9243 100644
--- a/xlators/protocol/legacy/client/Makefile.am
+++ b/xlators/encryption/crypt/Makefile.am
diff --git a/xlators/encryption/crypt/src/Makefile.am b/xlators/encryption/crypt/src/Makefile.am
new file mode 100644
index 00000000000..5e45a5da98a
--- /dev/null
+++ b/xlators/encryption/crypt/src/Makefile.am
@@ -0,0 +1,24 @@
+if ENABLE_CRYPT_XLATOR
+
+xlator_LTLIBRARIES = crypt.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/encryption
+
+crypt_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+crypt_la_SOURCES = keys.c data.c metadata.c atom.c crypt.c
+crypt_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la -lssl -lcrypto
+
+noinst_HEADERS = crypt-common.h crypt-mem-types.h crypt.h metadata.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
+
+else
+
+noinst_DIST = keys.c data.c metadata.c atom.c crypt.c
+noinst_HEADERS = crypt-common.h crypt-mem-types.h crypt.h metadata.h
+
+endif
diff --git a/xlators/encryption/crypt/src/atom.c b/xlators/encryption/crypt/src/atom.c
new file mode 100644
index 00000000000..21d63e5d6d6
--- /dev/null
+++ b/xlators/encryption/crypt/src/atom.c
@@ -0,0 +1,957 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "defaults.h"
+#include "crypt-common.h"
+#include "crypt.h"
+
+/*
+ * Glossary
+ *
+ *
+ * cblock (or cipher block). A logical unit in a file.
+ * cblock size is defined as the number of bits
+ * in an input (or output) block of the block
+ * cipher (*). Cipher block size is a property of
+ * cipher algorithm. E.g. cblock size is 64 bits
+ * for DES, 128 bits for AES, etc.
+ *
+ * atomic cipher A cipher algorithm, which requires some chunks of
+ * algorithm text to be padded at left and(or) right sides before
+ * cipher transaform.
+ *
+ *
+ * block (atom) Minimal chunk of file's data, which doesn't require
+ * padding. We'll consider logical units in a file of
+ * block size (atom size).
+ *
+ * cipher algorithm Atomic cipher algorithm, which requires the last
+ * with EOF issue incomplete cblock in a file to be padded with some
+ * data (usually zeros).
+ *
+ *
+ * operation, which reading/writing from offset, which is not aligned to
+ * forms a gap at to atom size
+ * the beginning
+ *
+ *
+ * operation, which reading/writing count bytes starting from offset off,
+ * forms a gap at so that off+count is not aligned to atom_size
+ * the end
+ *
+ * head block the first atom affected by an operation, which forms
+ * a gap at the beginning, or(and) at the end.
+ * Сomment. Head block has at least one gap (either at
+ * the beginning, or at the end)
+ *
+ *
+ * tail block the last atom different from head, affected by an
+ * operation, which forms a gap at the end.
+ * Сomment: Tail block has exactly one gap (at the end).
+ *
+ *
+ * partial block head or tail block
+ *
+ *
+ * full block block without gaps.
+ *
+ *
+ * (*) Recommendation for Block Cipher Modes of Operation
+ * Methods and Techniques
+ * NIST Special Publication 800-38A Edition 2001
+ */
+
+/*
+ * atom->offset_at()
+ */
+static off_t offset_at_head(struct avec_config *conf)
+{
+ return conf->aligned_offset;
+}
+
+static off_t offset_at_hole_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_at_head(get_hole_conf(frame));
+}
+
+static off_t offset_at_data_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_at_head(get_data_conf(frame));
+}
+
+
+static off_t offset_at_tail(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ return conf->aligned_offset +
+ (conf->off_in_head ? get_atom_size(object) : 0) +
+ (conf->nr_full_blocks << get_atom_bits(object));
+}
+
+static off_t offset_at_hole_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_at_tail(get_hole_conf(frame), object);
+}
+
+
+static off_t offset_at_data_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_at_tail(get_data_conf(frame), object);
+}
+
+static off_t offset_at_full(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ return conf->aligned_offset +
+ (conf->off_in_head ? get_atom_size(object) : 0);
+}
+
+static off_t offset_at_data_full(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_at_full(get_data_conf(frame), object);
+}
+
+static off_t offset_at_hole_full(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_at_full(get_hole_conf(frame), object);
+}
+
+/*
+ * atom->io_size_nopad()
+ */
+
+static uint32_t io_size_nopad_head(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ uint32_t gap_at_beg;
+ uint32_t gap_at_end;
+
+ check_head_block(conf);
+
+ gap_at_beg = conf->off_in_head;
+
+ if (has_tail_block(conf) || has_full_blocks(conf) || conf->off_in_tail == 0 )
+ gap_at_end = 0;
+ else
+ gap_at_end = get_atom_size(object) - conf->off_in_tail;
+
+ return get_atom_size(object) - (gap_at_beg + gap_at_end);
+}
+
+static uint32_t io_size_nopad_tail(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ check_tail_block(conf);
+ return conf->off_in_tail;
+}
+
+static uint32_t io_size_nopad_full(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ check_full_block(conf);
+ return get_atom_size(object);
+}
+
+static uint32_t io_size_nopad_data_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return io_size_nopad_head(get_data_conf(frame), object);
+}
+
+static uint32_t io_size_nopad_hole_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return io_size_nopad_head(get_hole_conf(frame), object);
+}
+
+static uint32_t io_size_nopad_data_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return io_size_nopad_tail(get_data_conf(frame), object);
+}
+
+static uint32_t io_size_nopad_hole_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return io_size_nopad_tail(get_hole_conf(frame), object);
+}
+
+static uint32_t io_size_nopad_data_full(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return io_size_nopad_full(get_data_conf(frame), object);
+}
+
+static uint32_t io_size_nopad_hole_full(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return io_size_nopad_full(get_hole_conf(frame), object);
+}
+
+static uint32_t offset_in_head(struct avec_config *conf)
+{
+ check_cursor_head(conf);
+
+ return conf->off_in_head;
+}
+
+static uint32_t offset_in_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return 0;
+}
+
+static uint32_t offset_in_full(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ check_cursor_full(conf);
+
+ if (has_head_block(conf))
+ return (conf->cursor - 1) << get_atom_bits(object);
+ else
+ return conf->cursor << get_atom_bits(object);
+}
+
+static uint32_t offset_in_data_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_in_head(get_data_conf(frame));
+}
+
+static uint32_t offset_in_hole_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_in_head(get_hole_conf(frame));
+}
+
+static uint32_t offset_in_data_full(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_in_full(get_data_conf(frame), object);
+}
+
+static uint32_t offset_in_hole_full(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_in_full(get_hole_conf(frame), object);
+}
+
+/*
+ * atom->rmw()
+ */
+/*
+ * Pre-conditions:
+ * @vec contains plain text of the latest
+ * version.
+ *
+ * Uptodate gaps of the @partial block with
+ * this plain text, encrypt the whole block
+ * and write the result to disk.
+ */
+static int32_t rmw_partial_block(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ struct rmw_atom *atom)
+{
+ size_t was_read = 0;
+ uint64_t file_size;
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+
+ struct iovec *partial = atom->get_iovec(frame, 0);
+ struct avec_config *conf = atom->get_config(frame);
+ end_writeback_handler_t end_writeback_partial_block;
+#if DEBUG_CRYPT
+ gf_boolean_t check_last_cblock = _gf_false;
+#endif
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0)
+ goto exit;
+
+ file_size = local->cur_file_size;
+ was_read = op_ret;
+
+ if (atom->locality == HEAD_ATOM && conf->off_in_head) {
+ /*
+ * head atom with a non-uptodate gap
+ * at the beginning
+ *
+ * fill the gap with plain text of the
+ * latest version. Convert a part of hole
+ * (if any) to zeros.
+ */
+ int32_t i;
+ int32_t copied = 0;
+ int32_t to_gap; /* amount of data needed to uptodate
+ the gap at the beginning */
+#if 0
+ int32_t hole = 0; /* The part of the hole which
+ * got in the head block */
+#endif /* 0 */
+ to_gap = conf->off_in_head;
+
+ if (was_read < to_gap) {
+ if (file_size >
+ offset_at_head(conf) + was_read) {
+ /*
+ * It is impossible to uptodate
+ * head block: too few bytes have
+ * been read from disk, so that
+ * partial write is impossible.
+ *
+ * It could happen because of many
+ * reasons: IO errors, (meta)data
+ * corruption in the local file system,
+ * etc.
+ */
+ gf_log(this->name, GF_LOG_WARNING,
+ "Can not uptodate a gap at the beginning");
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto exit;
+ }
+#if 0
+ hole = to_gap - was_read;
+#endif /* 0 */
+ to_gap = was_read;
+ }
+ /*
+ * uptodate the gap at the beginning
+ */
+ for (i = 0; i < count && copied < to_gap; i++) {
+ int32_t to_copy;
+
+ to_copy = vec[i].iov_len;
+ if (to_copy > to_gap - copied)
+ to_copy = to_gap - copied;
+
+ memcpy(partial->iov_base, vec[i].iov_base, to_copy);
+ copied += to_copy;
+ }
+#if 0
+ /*
+ * If possible, convert part of the
+ * hole, which got in the head block
+ */
+ ret = TRY_LOCK(&local->hole_lock);
+ if (!ret) {
+ if (local->hole_handled)
+ /*
+ * already converted by
+ * crypt_writev_cbk()
+ */
+ UNLOCK(&local->hole_lock);
+ else {
+ /*
+ * convert the part of the hole
+ * which got in the head block
+ * to zeros.
+ *
+ * Update the orig_offset to make
+ * sure writev_cbk() won't care
+ * about this part of the hole.
+ *
+ */
+ memset(partial->iov_base + to_gap, 0, hole);
+
+ conf->orig_offset -= hole;
+ conf->orig_size += hole;
+ UNLOCK(&local->hole_lock);
+ }
+ }
+ else /*
+ * conversion is being performed
+ * by crypt_writev_cbk()
+ */
+ ;
+#endif /* 0 */
+ }
+ if (atom->locality == TAIL_ATOM ||
+ (!has_tail_block(conf) && conf->off_in_tail)) {
+ /*
+ * tail atom, or head atom with a non-uptodate
+ * gap at the end.
+ *
+ * fill the gap at the end of the block
+ * with plain text of the latest version.
+ * Pad the result, (if needed)
+ */
+ int32_t i;
+ int32_t to_gap;
+ int copied;
+ off_t off_in_tail;
+ int32_t to_copy;
+
+ off_in_tail = conf->off_in_tail;
+ to_gap = conf->gap_in_tail;
+
+ if (to_gap && was_read < off_in_tail + to_gap) {
+ /*
+ * It is impossible to uptodate
+ * the gap at the end: too few bytes
+ * have been read from disk, so that
+ * partial write is impossible.
+ *
+ * It could happen because of many
+ * reasons: IO errors, (meta)data
+ * corruption in the local file system,
+ * etc.
+ */
+ gf_log(this->name, GF_LOG_WARNING,
+ "Can not uptodate a gap at the end");
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto exit;
+ }
+ /*
+ * uptodate the gap at the end
+ */
+ copied = 0;
+ to_copy = to_gap;
+ for(i = count - 1; i >= 0 && to_copy > 0; i--) {
+ uint32_t from_vec, off_in_vec;
+
+ off_in_vec = 0;
+ from_vec = vec[i].iov_len;
+ if (from_vec > to_copy) {
+ off_in_vec = from_vec - to_copy;
+ from_vec = to_copy;
+ }
+ memcpy(partial->iov_base +
+ off_in_tail + to_gap - copied - from_vec,
+ vec[i].iov_base + off_in_vec,
+ from_vec);
+
+ gf_log(this->name, GF_LOG_DEBUG,
+ "uptodate %d bytes at tail. Offset at target(source): %d(%d)",
+ (int)from_vec,
+ (int)off_in_tail + to_gap - copied - from_vec,
+ (int)off_in_vec);
+
+ copied += from_vec;
+ to_copy -= from_vec;
+ }
+ partial->iov_len = off_in_tail + to_gap;
+
+ if (object_alg_should_pad(object)) {
+ int32_t resid = 0;
+ resid = partial->iov_len & (object_alg_blksize(object) - 1);
+ if (resid) {
+ /*
+ * append a new EOF padding
+ */
+ local->eof_padding_size =
+ object_alg_blksize(object) - resid;
+
+ gf_log(this->name, GF_LOG_DEBUG,
+ "set padding size %d",
+ local->eof_padding_size);
+
+ memset(partial->iov_base + partial->iov_len,
+ 1,
+ local->eof_padding_size);
+ partial->iov_len += local->eof_padding_size;
+#if DEBUG_CRYPT
+ gf_log(this->name, GF_LOG_DEBUG,
+ "pad cblock with %d zeros:",
+ local->eof_padding_size);
+ dump_cblock(this,
+ (unsigned char *)partial->iov_base +
+ partial->iov_len - object_alg_blksize(object));
+ check_last_cblock = _gf_true;
+#endif
+ }
+ }
+ }
+ /*
+ * encrypt the whole block
+ */
+ encrypt_aligned_iov(object,
+ partial,
+ 1,
+ atom->offset_at(frame, object));
+#if DEBUG_CRYPT
+ if (check_last_cblock == _gf_true) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "encrypt last cblock with offset %llu",
+ (unsigned long long)atom->offset_at(frame, object));
+ dump_cblock(this, (unsigned char *)partial->iov_base +
+ partial->iov_len - object_alg_blksize(object));
+ }
+#endif
+ set_local_io_params_writev(frame, object, atom,
+ atom->offset_at(frame, object),
+ iovec_get_size(partial, 1));
+ /*
+ * write the whole block to disk
+ */
+ end_writeback_partial_block = dispatch_end_writeback(local->fop);
+ conf->cursor ++;
+ STACK_WIND(frame,
+ end_writeback_partial_block,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev,
+ local->fd,
+ partial,
+ 1,
+ atom->offset_at(frame, object),
+ local->flags,
+ local->iobref_data,
+ local->xdata);
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "submit partial block: %d bytes from %d offset",
+ (int)iovec_get_size(partial, 1),
+ (int)atom->offset_at(frame, object));
+ exit:
+ return 0;
+}
+
+/*
+ * Perform a (read-)modify-write sequence.
+ * This should be performed only after approval
+ * of upper server-side manager, i.e. the caller
+ * needs to make sure this is his turn to rmw.
+ */
+void submit_partial(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ atom_locality_type ltype)
+{
+ int32_t ret;
+ dict_t *dict;
+ struct rmw_atom *atom;
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+
+ atom = atom_by_types(local->active_setup, ltype);
+ /*
+ * To perform the "read" component of the read-modify-write
+ * sequence the crypt translator does stack_wind to itself.
+ *
+ * Pass current file size to crypt_readv()
+ */
+ dict = dict_new();
+ if (!dict) {
+ /*
+ * FIXME: Handle the error
+ */
+ gf_log("crypt", GF_LOG_WARNING, "Can not alloc dict");
+ return;
+ }
+ ret = dict_set(dict,
+ FSIZE_XATTR_PREFIX,
+ data_from_uint64(local->cur_file_size));
+ if (ret) {
+ /*
+ * FIXME: Handle the error
+ */
+ dict_unref(dict);
+ gf_log("crypt", GF_LOG_WARNING, "Can not set dict");
+ goto exit;
+ }
+ STACK_WIND(frame,
+ atom->rmw,
+ this,
+ this->fops->readv, /* crypt_readv */
+ fd,
+ atom->count_to_uptodate(frame, object), /* count */
+ atom->offset_at(frame, object), /* offset to read from */
+ 0,
+ dict);
+ exit:
+ dict_unref(dict);
+}
+
+/*
+ * submit blocks of FULL_ATOM type
+ */
+void submit_full(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+ struct rmw_atom *atom = atom_by_types(local->active_setup, FULL_ATOM);
+ uint32_t count; /* total number of full blocks to submit */
+ uint32_t granularity; /* number of blocks to submit in one iteration */
+
+ uint64_t off_in_file; /* start offset in the file, bytes */
+ uint32_t off_in_atom; /* start offset in the atom, blocks */
+ uint32_t blocks_written = 0; /* blocks written for this submit */
+
+ struct avec_config *conf = atom->get_config(frame);
+ end_writeback_handler_t end_writeback_full_block;
+ /*
+ * Write full blocks by groups of granularity size.
+ */
+ end_writeback_full_block = dispatch_end_writeback(local->fop);
+
+ if (is_ordered_mode(frame)) {
+ uint32_t skip = has_head_block(conf) ? 1 : 0;
+ count = 1;
+ granularity = 1;
+ /*
+ * calculate start offset using cursor value;
+ * here we should take into accout head block,
+ * which corresponds to cursor value 0.
+ */
+ off_in_file = atom->offset_at(frame, object) +
+ ((conf->cursor - skip) << get_atom_bits(object));
+ off_in_atom = conf->cursor - skip;
+ }
+ else {
+ /*
+ * in parallel mode
+ */
+ count = conf->nr_full_blocks;
+ granularity = MAX_IOVEC;
+ off_in_file = atom->offset_at(frame, object);
+ off_in_atom = 0;
+ }
+ while (count) {
+ uint32_t blocks_to_write = count;
+
+ if (blocks_to_write > granularity)
+ blocks_to_write = granularity;
+ if (conf->type == HOLE_ATOM)
+ /*
+ * reset iovec before encryption
+ */
+ memset(atom->get_iovec(frame, 0)->iov_base,
+ 0,
+ get_atom_size(object));
+ /*
+ * encrypt the group
+ */
+ encrypt_aligned_iov(object,
+ atom->get_iovec(frame,
+ off_in_atom +
+ blocks_written),
+ blocks_to_write,
+ off_in_file + (blocks_written <<
+ get_atom_bits(object)));
+
+ set_local_io_params_writev(frame, object, atom,
+ off_in_file + (blocks_written << get_atom_bits(object)),
+ blocks_to_write << get_atom_bits(object));
+
+ conf->cursor += blocks_to_write;
+
+ STACK_WIND(frame,
+ end_writeback_full_block,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev,
+ local->fd,
+ atom->get_iovec(frame, off_in_atom + blocks_written),
+ blocks_to_write,
+ off_in_file + (blocks_written << get_atom_bits(object)),
+ local->flags,
+ local->iobref_data ? local->iobref_data : local->iobref,
+ local->xdata);
+
+ gf_log("crypt", GF_LOG_DEBUG, "submit %d full blocks from %d offset",
+ blocks_to_write,
+ (int)(off_in_file + (blocks_written << get_atom_bits(object))));
+
+ count -= blocks_to_write;
+ blocks_written += blocks_to_write;
+ }
+ return;
+}
+
+static int32_t rmw_data_head(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ return rmw_partial_block(frame,
+ cookie,
+ this,
+ op_ret,
+ op_errno,
+ vec,
+ count,
+ stbuf,
+ iobref,
+ atom_by_types(DATA_ATOM, HEAD_ATOM));
+}
+
+static int32_t rmw_data_tail(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ return rmw_partial_block(frame,
+ cookie,
+ this,
+ op_ret,
+ op_errno,
+ vec,
+ count,
+ stbuf,
+ iobref,
+ atom_by_types(DATA_ATOM, TAIL_ATOM));
+}
+
+static int32_t rmw_hole_head(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ return rmw_partial_block(frame,
+ cookie,
+ this,
+ op_ret,
+ op_errno,
+ vec,
+ count,
+ stbuf,
+ iobref,
+ atom_by_types(HOLE_ATOM, HEAD_ATOM));
+}
+
+static int32_t rmw_hole_tail(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ return rmw_partial_block(frame,
+ cookie,
+ this,
+ op_ret,
+ op_errno,
+ vec,
+ count,
+ stbuf,
+ iobref,
+ atom_by_types(HOLE_ATOM, TAIL_ATOM));
+}
+
+/*
+ * atom->count_to_uptodate()
+ */
+static uint32_t count_to_uptodate_head(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ if (conf->acount == 1 && conf->off_in_tail)
+ return get_atom_size(object);
+ else
+ /* there is no need to read the whole head block */
+ return conf->off_in_head;
+}
+
+static uint32_t count_to_uptodate_tail(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ /* we need to read the whole tail block */
+ return get_atom_size(object);
+}
+
+static uint32_t count_to_uptodate_data_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return count_to_uptodate_head(get_data_conf(frame), object);
+}
+
+static uint32_t count_to_uptodate_data_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return count_to_uptodate_tail(get_data_conf(frame), object);
+}
+
+static uint32_t count_to_uptodate_hole_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return count_to_uptodate_head(get_hole_conf(frame), object);
+}
+
+static uint32_t count_to_uptodate_hole_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return count_to_uptodate_tail(get_hole_conf(frame), object);
+}
+
+/* atom->get_config() */
+
+static struct avec_config *get_config_data(call_frame_t *frame)
+{
+ return &((crypt_local_t *)frame->local)->data_conf;
+}
+
+static struct avec_config *get_config_hole(call_frame_t *frame)
+{
+ return &((crypt_local_t *)frame->local)->hole_conf;
+}
+
+/*
+ * atom->get_iovec()
+ */
+static struct iovec *get_iovec_hole_head(call_frame_t *frame,
+ uint32_t count)
+{
+ struct avec_config *conf = get_hole_conf(frame);
+
+ return conf->avec;
+}
+
+static struct iovec *get_iovec_hole_full(call_frame_t *frame,
+ uint32_t count)
+{
+ struct avec_config *conf = get_hole_conf(frame);
+
+ return conf->avec + (conf->off_in_head ? 1 : 0);
+}
+
+static struct iovec *get_iovec_hole_tail(call_frame_t *frame,
+ uint32_t count)
+{
+ struct avec_config *conf = get_hole_conf(frame);
+
+ return conf->avec + (conf->blocks_in_pool - 1);
+}
+
+static struct iovec *get_iovec_data_head(call_frame_t *frame,
+ uint32_t count)
+{
+ struct avec_config *conf = get_data_conf(frame);
+
+ return conf->avec;
+}
+
+static struct iovec *get_iovec_data_full(call_frame_t *frame,
+ uint32_t count)
+{
+ struct avec_config *conf = get_data_conf(frame);
+
+ return conf->avec + (conf->off_in_head ? 1 : 0) + count;
+}
+
+static struct iovec *get_iovec_data_tail(call_frame_t *frame,
+ uint32_t count)
+{
+ struct avec_config *conf = get_data_conf(frame);
+
+ return conf->avec +
+ (conf->off_in_head ? 1 : 0) +
+ conf->nr_full_blocks;
+}
+
+static struct rmw_atom atoms[LAST_DATA_TYPE][LAST_LOCALITY_TYPE] = {
+ [DATA_ATOM][HEAD_ATOM] =
+ { .locality = HEAD_ATOM,
+ .rmw = rmw_data_head,
+ .offset_at = offset_at_data_head,
+ .offset_in = offset_in_data_head,
+ .get_iovec = get_iovec_data_head,
+ .io_size_nopad = io_size_nopad_data_head,
+ .count_to_uptodate = count_to_uptodate_data_head,
+ .get_config = get_config_data
+ },
+ [DATA_ATOM][TAIL_ATOM] =
+ { .locality = TAIL_ATOM,
+ .rmw = rmw_data_tail,
+ .offset_at = offset_at_data_tail,
+ .offset_in = offset_in_tail,
+ .get_iovec = get_iovec_data_tail,
+ .io_size_nopad = io_size_nopad_data_tail,
+ .count_to_uptodate = count_to_uptodate_data_tail,
+ .get_config = get_config_data
+ },
+ [DATA_ATOM][FULL_ATOM] =
+ { .locality = FULL_ATOM,
+ .offset_at = offset_at_data_full,
+ .offset_in = offset_in_data_full,
+ .get_iovec = get_iovec_data_full,
+ .io_size_nopad = io_size_nopad_data_full,
+ .get_config = get_config_data
+ },
+ [HOLE_ATOM][HEAD_ATOM] =
+ { .locality = HEAD_ATOM,
+ .rmw = rmw_hole_head,
+ .offset_at = offset_at_hole_head,
+ .offset_in = offset_in_hole_head,
+ .get_iovec = get_iovec_hole_head,
+ .io_size_nopad = io_size_nopad_hole_head,
+ .count_to_uptodate = count_to_uptodate_hole_head,
+ .get_config = get_config_hole
+ },
+ [HOLE_ATOM][TAIL_ATOM] =
+ { .locality = TAIL_ATOM,
+ .rmw = rmw_hole_tail,
+ .offset_at = offset_at_hole_tail,
+ .offset_in = offset_in_tail,
+ .get_iovec = get_iovec_hole_tail,
+ .io_size_nopad = io_size_nopad_hole_tail,
+ .count_to_uptodate = count_to_uptodate_hole_tail,
+ .get_config = get_config_hole
+ },
+ [HOLE_ATOM][FULL_ATOM] =
+ { .locality = FULL_ATOM,
+ .offset_at = offset_at_hole_full,
+ .offset_in = offset_in_hole_full,
+ .get_iovec = get_iovec_hole_full,
+ .io_size_nopad = io_size_nopad_hole_full,
+ .get_config = get_config_hole
+ }
+};
+
+struct rmw_atom *atom_by_types(atom_data_type data,
+ atom_locality_type locality)
+{
+ return &atoms[data][locality];
+}
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
diff --git a/xlators/encryption/crypt/src/crypt-common.h b/xlators/encryption/crypt/src/crypt-common.h
new file mode 100644
index 00000000000..7c212ad5d25
--- /dev/null
+++ b/xlators/encryption/crypt/src/crypt-common.h
@@ -0,0 +1,141 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __CRYPT_COMMON_H__
+#define __CRYPT_COMMON_H__
+
+#define INVAL_SUBVERSION_NUMBER (0xff)
+#define CRYPT_INVAL_OP (GF_FOP_NULL)
+
+#define CRYPTO_FORMAT_PREFIX "trusted.glusterfs.crypt.att.cfmt"
+#define FSIZE_XATTR_PREFIX "trusted.glusterfs.crypt.att.size"
+#define SUBREQ_PREFIX "trusted.glusterfs.crypt.msg.sreq"
+#define FSIZE_MSG_PREFIX "trusted.glusterfs.crypt.msg.size"
+#define DE_MSG_PREFIX "trusted.glusterfs.crypt.msg.dent"
+#define REQUEST_ID_PREFIX "trusted.glusterfs.crypt.msg.rqid"
+#define MSGFLAGS_PREFIX "trusted.glusterfs.crypt.msg.xfgs"
+
+
+/* messages for crypt_open() */
+#define MSGFLAGS_REQUEST_MTD_RLOCK 1 /* take read lock and don't unlock */
+#define MSGFLAGS_REQUEST_MTD_WLOCK 2 /* take write lock and don't unlock */
+
+#define AES_BLOCK_BITS (4) /* AES_BLOCK_SIZE == 1 << AES_BLOCK_BITS */
+
+#define noop do {; } while (0)
+#define cassert(cond) ({ switch (-1) { case (cond): case 0: break; } })
+#define __round_mask(x, y) ((__typeof__(x))((y)-1))
+#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
+
+/*
+ * Format of file's metadata
+ */
+struct crypt_format {
+ uint8_t loader_id; /* version of metadata loader */
+ uint8_t versioned[0]; /* file's metadata of specific version */
+} __attribute__((packed));
+
+typedef enum {
+ AES_CIPHER_ALG,
+ LAST_CIPHER_ALG
+} cipher_alg_t;
+
+typedef enum {
+ XTS_CIPHER_MODE,
+ LAST_CIPHER_MODE
+} cipher_mode_t;
+
+typedef enum {
+ MTD_LOADER_V1,
+ LAST_MTD_LOADER
+} mtd_loader_id;
+
+static inline void msgflags_set_mtd_rlock(uint32_t *flags)
+{
+ *flags |= MSGFLAGS_REQUEST_MTD_RLOCK;
+}
+
+static inline void msgflags_set_mtd_wlock(uint32_t *flags)
+{
+ *flags |= MSGFLAGS_REQUEST_MTD_WLOCK;
+}
+
+static inline gf_boolean_t msgflags_check_mtd_rlock(uint32_t *flags)
+{
+ return *flags & MSGFLAGS_REQUEST_MTD_RLOCK;
+}
+
+static inline gf_boolean_t msgflags_check_mtd_wlock(uint32_t *flags)
+{
+ return *flags & MSGFLAGS_REQUEST_MTD_WLOCK;
+}
+
+static inline gf_boolean_t msgflags_check_mtd_lock(uint32_t *flags)
+{
+ return msgflags_check_mtd_rlock(flags) ||
+ msgflags_check_mtd_wlock(flags);
+}
+
+/*
+ * returns number of logical blocks occupied
+ * (maybe partially) by @count bytes
+ * at offset @start.
+ */
+static inline off_t logical_blocks_occupied(uint64_t start, off_t count,
+ int blkbits)
+{
+ return ((start + count - 1) >> blkbits) - (start >> blkbits) + 1;
+}
+
+/*
+ * are two bytes (represented by offsets @off1
+ * and @off2 respectively) in the same logical
+ * block.
+ */
+static inline int in_same_lblock(uint64_t off1, uint64_t off2,
+ int blkbits)
+{
+ return off1 >> blkbits == off2 >> blkbits;
+}
+
+static inline void dump_cblock(xlator_t *this, unsigned char *buf)
+{
+ gf_log(this->name, GF_LOG_DEBUG,
+ "dump cblock: %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x",
+ (buf)[0],
+ (buf)[1],
+ (buf)[2],
+ (buf)[3],
+ (buf)[4],
+ (buf)[5],
+ (buf)[6],
+ (buf)[7],
+ (buf)[8],
+ (buf)[9],
+ (buf)[10],
+ (buf)[11],
+ (buf)[12],
+ (buf)[13],
+ (buf)[14],
+ (buf)[15]);
+}
+
+#endif /* __CRYPT_COMMON_H__ */
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
diff --git a/xlators/encryption/crypt/src/crypt-mem-types.h b/xlators/encryption/crypt/src/crypt-mem-types.h
new file mode 100644
index 00000000000..1954c579423
--- /dev/null
+++ b/xlators/encryption/crypt/src/crypt-mem-types.h
@@ -0,0 +1,45 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef __CRYPT_MEM_TYPES_H__
+#define __CRYPT_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_crypt_mem_types_ {
+ gf_crypt_mt_priv = gf_common_mt_end + 1,
+ gf_crypt_mt_inode,
+ gf_crypt_mt_data,
+ gf_crypt_mt_mtd,
+ gf_crypt_mt_loc,
+ gf_crypt_mt_iatt,
+ gf_crypt_mt_key,
+ gf_crypt_mt_iovec,
+ gf_crypt_mt_char,
+ gf_crypt_mt_local,
+ gf_crypt_mt_end,
+};
+
+#endif /* __CRYPT_MEM_TYPES_H__ */
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
+
+
+
diff --git a/xlators/encryption/crypt/src/crypt.c b/xlators/encryption/crypt/src/crypt.c
new file mode 100644
index 00000000000..2982bb26db0
--- /dev/null
+++ b/xlators/encryption/crypt/src/crypt.c
@@ -0,0 +1,4525 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <ctype.h>
+#include <sys/uio.h>
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "logging.h"
+#include "defaults.h"
+
+#include "crypt-common.h"
+#include "crypt.h"
+
+static void init_inode_info_head(struct crypt_inode_info *info, fd_t *fd);
+static int32_t init_inode_info_tail(struct crypt_inode_info *info,
+ struct master_cipher_info *master);
+static int32_t prepare_for_submit_hole(call_frame_t *frame, xlator_t *this,
+ uint64_t from, off_t size);
+static int32_t load_file_size(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata);
+static void do_ordered_submit(call_frame_t *frame, xlator_t *this,
+ atom_data_type dtype);
+static void do_parallel_submit(call_frame_t *frame, xlator_t *this,
+ atom_data_type dtype);
+static void put_one_call_open(call_frame_t *frame);
+static void put_one_call_readv(call_frame_t *frame, xlator_t *this);
+static void put_one_call_writev(call_frame_t *frame, xlator_t *this);
+static void put_one_call_ftruncate(call_frame_t *frame, xlator_t *this);
+static void free_avec(struct iovec *avec, char **pool, int blocks_in_pool);
+static void free_avec_data(crypt_local_t *local);
+static void free_avec_hole(crypt_local_t *local);
+
+static crypt_local_t *crypt_alloc_local(call_frame_t *frame, xlator_t *this,
+ glusterfs_fop_t fop)
+{
+ crypt_local_t *local = NULL;
+
+ local = GF_CALLOC (1, sizeof (*local), gf_crypt_mt_local);
+ if (!local) {
+ gf_log(this->name, GF_LOG_ERROR, "out of memory");
+ return NULL;
+ }
+ local->fop = fop;
+ LOCK_INIT(&local->hole_lock);
+ LOCK_INIT(&local->call_lock);
+ LOCK_INIT(&local->rw_count_lock);
+
+ frame->local = local;
+ return local;
+}
+
+struct crypt_inode_info *get_crypt_inode_info(inode_t *inode, xlator_t *this)
+{
+ int ret;
+ uint64_t value = 0;
+ struct crypt_inode_info *info;
+
+ ret = inode_ctx_get(inode, this, &value);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Can not get inode info");
+ return NULL;
+ }
+ info = (struct crypt_inode_info *)(long)value;
+ if (info == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Can not obtain inode info");
+ return NULL;
+ }
+ return info;
+}
+
+static struct crypt_inode_info *local_get_inode_info(crypt_local_t *local,
+ xlator_t *this)
+{
+ if (local->info)
+ return local->info;
+ local->info = get_crypt_inode_info(local->fd->inode, this);
+ return local->info;
+}
+
+static struct crypt_inode_info *alloc_inode_info(crypt_local_t *local,
+ loc_t *loc)
+{
+ struct crypt_inode_info *info;
+
+ info = GF_CALLOC(1, sizeof(*info), gf_crypt_mt_inode);
+ if (!info) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ gf_log ("crypt", GF_LOG_WARNING,
+ "Can not allocate inode info");
+ return NULL;
+ }
+ memset(info, 0, sizeof(*info));
+#if DEBUG_CRYPT
+ info->loc = GF_CALLOC(1, sizeof(*loc), gf_crypt_mt_loc);
+ if (!info->loc) {
+ gf_log("crypt", GF_LOG_WARNING, "Can not allocate loc");
+ GF_FREE(info);
+ return NULL;
+ }
+ if (loc_copy(info->loc, loc)){
+ GF_FREE(info->loc);
+ GF_FREE(info);
+ return NULL;
+ }
+#endif /* DEBUG_CRYPT */
+
+ local->info = info;
+ return info;
+}
+
+static void free_inode_info(struct crypt_inode_info *info)
+{
+#if DEBUG_CRYPT
+ loc_wipe(info->loc);
+ GF_FREE(info->loc);
+#endif
+ memset(info, 0, sizeof(*info));
+ GF_FREE(info);
+}
+
+int crypt_forget (xlator_t *this, inode_t *inode)
+{
+ uint64_t ctx_addr = 0;
+ if (!inode_ctx_del (inode, this, &ctx_addr))
+ free_inode_info((struct crypt_inode_info *)(long)ctx_addr);
+ return 0;
+}
+
+#if DEBUG_CRYPT
+static void check_read(call_frame_t *frame, xlator_t *this, int32_t read,
+ struct iovec *vec, int32_t count, struct iatt *stbuf)
+{
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = get_object_cinfo(local->info);
+ struct avec_config *conf = &local->data_conf;
+ uint32_t resid = stbuf->ia_size & (object_alg_blksize(object) - 1);
+
+ if (read <= 0)
+ return;
+ if (read != iovec_get_size(vec, count))
+ gf_log ("crypt", GF_LOG_DEBUG,
+ "op_ret differs from amount of read bytes");
+
+ if (object_alg_should_pad(object) && (read & (object_alg_blksize(object) - 1)))
+ gf_log ("crypt", GF_LOG_DEBUG,
+ "bad amount of read bytes (!= 0 mod(cblock size))");
+
+ if (conf->aligned_offset + read >
+ stbuf->ia_size + (resid ? object_alg_blksize(object) - resid : 0))
+ gf_log ("crypt", GF_LOG_DEBUG,
+ "bad amount of read bytes (too large))");
+
+}
+
+#define PT_BYTES_TO_DUMP (32)
+static void dump_plain_text(crypt_local_t *local, struct iovec *avec)
+{
+ int32_t to_dump;
+ char str[PT_BYTES_TO_DUMP + 1];
+
+ if (!avec)
+ return;
+ to_dump = avec->iov_len;
+ if (to_dump > PT_BYTES_TO_DUMP)
+ to_dump = PT_BYTES_TO_DUMP;
+ memcpy(str, avec->iov_base, to_dump);
+ memset(str + to_dump, '0', 1);
+ gf_log("crypt", GF_LOG_DEBUG, "Read file: %s", str);
+}
+
+static int32_t data_conf_invariant(struct avec_config *conf)
+{
+ return conf->acount ==
+ !!has_head_block(conf) +
+ !!has_tail_block(conf)+
+ conf->nr_full_blocks;
+}
+
+static int32_t hole_conf_invariant(struct avec_config *conf)
+{
+ return conf->blocks_in_pool ==
+ !!has_head_block(conf) +
+ !!has_tail_block(conf)+
+ !!has_full_blocks(conf);
+}
+
+static void crypt_check_conf(struct avec_config *conf)
+{
+ int32_t ret = 0;
+ const char *msg;
+
+ switch (conf->type) {
+ case DATA_ATOM:
+ msg = "data";
+ ret = data_conf_invariant(conf);
+ break;
+ case HOLE_ATOM:
+ msg = "hole";
+ ret = hole_conf_invariant(conf);
+ break;
+ default:
+ msg = "unknown";
+ }
+ if (!ret)
+ gf_log("crypt", GF_LOG_DEBUG, "bad %s conf", msg);
+}
+
+static void check_buf(call_frame_t *frame, xlator_t *this, struct iatt *buf)
+{
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+ uint64_t local_file_size;
+
+ switch(local->fop) {
+ case GF_FOP_FTRUNCATE:
+ return;
+ case GF_FOP_WRITE:
+ local_file_size = local->new_file_size;
+ break;
+ case GF_FOP_READ:
+ if (parent_is_crypt_xlator(frame, this))
+ return;
+ local_file_size = local->cur_file_size;
+ break;
+ default:
+ gf_log("crypt", GF_LOG_DEBUG, "bad file operation");
+ return;
+ }
+ if (buf->ia_size != round_up(local_file_size,
+ object_alg_blksize(object)))
+ gf_log("crypt", GF_LOG_DEBUG,
+ "bad ia_size in buf (%llu), should be %llu",
+ (unsigned long long)buf->ia_size,
+ (unsigned long long)round_up(local_file_size,
+ object_alg_blksize(object)));
+}
+
+#else
+#define check_read(frame, this, op_ret, vec, count, stbuf) noop
+#define dump_plain_text(local, avec) noop
+#define crypt_check_conf(conf) noop
+#define check_buf(frame, this, buf) noop
+#endif /* DEBUG_CRYPT */
+
+/*
+ * Pre-conditions:
+ * @vec represents a ciphertext of expanded size and
+ * aligned offset.
+ *
+ * Compound a temporal vector @avec with block-aligned
+ * components, decrypt and fix it up to represent a chunk
+ * of data corresponding to the original size and offset.
+ * Pass the result to the next translator.
+ */
+int32_t crypt_readv_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = &local->data_conf;
+ struct object_cipher_info *object = &local->info->cinfo;
+
+ struct iovec *avec;
+ uint32_t i;
+ uint32_t to_vec;
+ uint32_t to_user;
+
+ check_buf(frame, this, stbuf);
+ check_read(frame, this, op_ret, vec, count, stbuf);
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ local->iobref = iobref_ref(iobref);
+
+ local->buf = *stbuf;
+ local->buf.ia_size = local->cur_file_size;
+
+ if (op_ret <= 0 || count == 0 || vec[0].iov_len == 0)
+ goto put_one_call;
+
+ if (conf->orig_offset >= local->cur_file_size) {
+ local->op_ret = 0;
+ goto put_one_call;
+ }
+ /*
+ * correct config params with real file size
+ * and actual amount of bytes read
+ */
+ set_config_offsets(frame, this,
+ conf->orig_offset, op_ret, DATA_ATOM, 0);
+
+ if (conf->orig_offset + conf->orig_size > local->cur_file_size)
+ conf->orig_size = local->cur_file_size - conf->orig_offset;
+ /*
+ * calculate amount of data to be returned
+ * to user.
+ */
+ to_user = op_ret;
+ if (conf->aligned_offset + to_user <= conf->orig_offset) {
+ gf_log(this->name, GF_LOG_WARNING, "Incomplete read");
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto put_one_call;
+ }
+ to_user -= (conf->aligned_offset - conf->orig_offset);
+
+ if (to_user > conf->orig_size)
+ to_user = conf->orig_size;
+ local->rw_count = to_user;
+
+ op_errno = set_config_avec_data(this, local,
+ conf, object, vec, count);
+ if (op_errno) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto put_one_call;
+ }
+ avec = conf->avec;
+#if DEBUG_CRYPT
+ if (conf->off_in_tail != 0 &&
+ conf->off_in_tail < object_alg_blksize(object) &&
+ object_alg_should_pad(object))
+ gf_log(this->name, GF_LOG_DEBUG, "Bad offset in tail %d",
+ conf->off_in_tail);
+ if (iovec_get_size(vec, count) != 0 &&
+ in_same_lblock(conf->orig_offset + iovec_get_size(vec, count) - 1,
+ local->cur_file_size - 1,
+ object_alg_blkbits(object))) {
+ gf_log(this->name, GF_LOG_DEBUG, "Compound last cblock");
+ dump_cblock(this,
+ (unsigned char *)(avec[conf->acount - 1].iov_base) +
+ avec[conf->acount - 1].iov_len - object_alg_blksize(object));
+ dump_cblock(this,
+ (unsigned char *)(vec[count - 1].iov_base) +
+ vec[count - 1].iov_len - object_alg_blksize(object));
+ }
+#endif
+ decrypt_aligned_iov(object, avec,
+ conf->acount, conf->aligned_offset);
+ /*
+ * pass proper plain data to user
+ */
+ avec[0].iov_base += (conf->aligned_offset - conf->orig_offset);
+ avec[0].iov_len -= (conf->aligned_offset - conf->orig_offset);
+
+ to_vec = to_user;
+ for (i = 0; i < conf->acount; i++) {
+ if (avec[i].iov_len > to_vec)
+ avec[i].iov_len = to_vec;
+ to_vec -= avec[i].iov_len;
+ }
+ put_one_call:
+ put_one_call_readv(frame, this);
+ return 0;
+}
+
+static int32_t do_readv(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict,
+ dict_t *xdata)
+{
+ data_t *data;
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto error;
+ /*
+ * extract regular file size
+ */
+ data = dict_get(dict, FSIZE_XATTR_PREFIX);
+ if (!data) {
+ gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
+ op_errno = EIO;
+ goto error;
+ }
+ local->cur_file_size = data_to_uint64(data);
+
+ get_one_call(frame);
+ STACK_WIND(frame,
+ crypt_readv_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->readv,
+ local->fd,
+ /*
+ * FIXME: read amount can be reduced
+ */
+ local->data_conf.expanded_size,
+ local->data_conf.aligned_offset,
+ local->flags,
+ local->xdata);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+
+ get_one_call(frame);
+ put_one_call_readv(frame, this);
+ return 0;
+}
+
+static int32_t crypt_readv_finodelk_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto error;
+ /*
+ * An access has been granted,
+ * retrieve file size
+ */
+ STACK_WIND(frame,
+ do_readv,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ local->fd,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ return 0;
+ error:
+ fd_unref(local->fd);
+ if (local->xdata)
+ dict_unref(local->xdata);
+ STACK_UNWIND_STRICT(readv,
+ frame,
+ -1,
+ op_errno,
+ NULL,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+ return 0;
+}
+
+static int32_t readv_trivial_completion(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *buf,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "stat failed (%d)", op_errno);
+ goto error;
+ }
+ local->buf = *buf;
+ STACK_WIND(frame,
+ load_file_size,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr,
+ local->loc,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno,
+ NULL, 0, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t crypt_readv(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset,
+ uint32_t flags, dict_t *xdata)
+{
+ int32_t ret;
+ crypt_local_t *local;
+ struct crypt_inode_info *info;
+ struct gf_flock lock = {0, };
+
+#if DEBUG_CRYPT
+ gf_log("crypt", GF_LOG_DEBUG, "reading %d bytes from offset %llu",
+ (int)size, (long long)offset);
+ if (parent_is_crypt_xlator(frame, this))
+ gf_log("crypt", GF_LOG_DEBUG, "parent is crypt");
+#endif
+ local = crypt_alloc_local(frame, this, GF_FOP_READ);
+ if (!local) {
+ ret = ENOMEM;
+ goto error;
+ }
+ if (size == 0)
+ goto trivial;
+
+ local->fd = fd_ref(fd);
+ local->flags = flags;
+
+ info = local_get_inode_info(local, this);
+ if (info == NULL) {
+ ret = EINVAL;
+ fd_unref(fd);
+ goto error;
+ }
+ if (!object_alg_atomic(&info->cinfo)) {
+ ret = EINVAL;
+ fd_unref(fd);
+ goto error;
+ }
+ set_config_offsets(frame, this, offset, size,
+ DATA_ATOM, 0);
+ if (parent_is_crypt_xlator(frame, this)) {
+ data_t *data;
+ /*
+ * We are called by crypt_writev (or cypt_ftruncate)
+ * to perform the "read" component of the read-modify-write
+ * (or read-prune-write) sequence for some atom;
+ *
+ * don't ask for access:
+ * it has already been acquired
+ *
+ * Retrieve current file size
+ */
+ if (!xdata) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "Regular file size hasn't been passed");
+ ret = EIO;
+ goto error;
+ }
+ data = dict_get(xdata, FSIZE_XATTR_PREFIX);
+ if (!data) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "Regular file size not found");
+ ret = EIO;
+ goto error;
+ }
+ local->old_file_size =
+ local->cur_file_size = data_to_uint64(data);
+
+ get_one_call(frame);
+ STACK_WIND(frame,
+ crypt_readv_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv,
+ local->fd,
+ /*
+ * FIXME: read amount can be reduced
+ */
+ local->data_conf.expanded_size,
+ local->data_conf.aligned_offset,
+ flags,
+ NULL);
+ return 0;
+ }
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ lock.l_len = 0;
+ lock.l_start = 0;
+ lock.l_type = F_RDLCK;
+ lock.l_whence = SEEK_SET;
+
+ STACK_WIND(frame,
+ crypt_readv_finodelk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ trivial:
+ STACK_WIND(frame,
+ readv_trivial_completion,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat,
+ fd,
+ NULL);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(readv,
+ frame,
+ -1,
+ ret,
+ NULL,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+ return 0;
+}
+
+void set_local_io_params_writev(call_frame_t *frame,
+ struct object_cipher_info *object,
+ struct rmw_atom *atom,
+ off_t io_offset,
+ uint32_t io_size)
+{
+ crypt_local_t *local = frame->local;
+
+ local->io_offset = io_offset;
+ local->io_size = io_size;
+
+ local->io_offset_nopad =
+ atom->offset_at(frame, object) + atom->offset_in(frame, object);
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "set nopad offset to %llu",
+ (unsigned long long)local->io_offset_nopad);
+
+ local->io_size_nopad = atom->io_size_nopad(frame, object);
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "set nopad size to %llu",
+ (unsigned long long)local->io_size_nopad);
+
+ local->update_disk_file_size = 0;
+ /*
+ * NOTE: eof_padding_size is 0 for all full atoms;
+ * For head and tail atoms it will be set up at rmw_partial block()
+ */
+ local->new_file_size = local->cur_file_size;
+
+ if (local->io_offset_nopad + local->io_size_nopad > local->cur_file_size) {
+
+ local->new_file_size = local->io_offset_nopad + local->io_size_nopad;
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "set new file size to %llu",
+ (unsigned long long)local->new_file_size);
+
+ local->update_disk_file_size = 1;
+ }
+}
+
+void set_local_io_params_ftruncate(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ uint32_t resid;
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = &local->data_conf;
+
+ resid = conf->orig_offset & (object_alg_blksize(object) - 1);
+ if (resid) {
+ local->eof_padding_size =
+ object_alg_blksize(object) - resid;
+ local->new_file_size = conf->aligned_offset;
+ local->update_disk_file_size = 0;
+ /*
+ * file size will be updated
+ * in the ->writev() stack,
+ * when submitting file tail
+ */
+ } else {
+ local->eof_padding_size = 0;
+ local->new_file_size = conf->orig_offset;
+ local->update_disk_file_size = 1;
+ /*
+ * file size will be updated
+ * in this ->ftruncate stack
+ */
+ }
+}
+
+static void submit_head(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+ submit_partial(frame, this, local->fd, HEAD_ATOM);
+}
+
+static void submit_tail(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+ submit_partial(frame, this, local->fd, TAIL_ATOM);
+}
+
+static void submit_hole(call_frame_t *frame, xlator_t *this)
+{
+ /*
+ * hole conversion always means
+ * appended write and goes in ordered fashion
+ */
+ do_ordered_submit(frame, this, HOLE_ATOM);
+}
+
+static void submit_data(call_frame_t *frame, xlator_t *this)
+{
+ if (is_ordered_mode(frame)) {
+ do_ordered_submit(frame, this, DATA_ATOM);
+ return;
+ }
+ gf_log("crypt", GF_LOG_WARNING, "Bad submit mode");
+ get_nr_calls(frame, nr_calls_data(frame));
+ do_parallel_submit(frame, this, DATA_ATOM);
+ return;
+}
+
+/*
+ * heplers called by writev_cbk, fruncate_cbk in ordered mode
+ */
+
+static int32_t should_submit_hole(crypt_local_t *local)
+{
+ struct avec_config *conf = &local->hole_conf;
+
+ return conf->avec != NULL;
+}
+
+static int32_t should_resume_submit_hole(crypt_local_t *local)
+{
+ struct avec_config *conf = &local->hole_conf;
+
+ if (local->fop == GF_FOP_WRITE && has_tail_block(conf))
+ /*
+ * Don't submit a part of hole, which
+ * fits into a data block:
+ * this part of hole will be converted
+ * as a gap filled by zeros in data head
+ * block.
+ */
+ return conf->cursor < conf->acount - 1;
+ else
+ return conf->cursor < conf->acount;
+}
+
+static int32_t should_resume_submit_data(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = &local->data_conf;
+
+ if (is_ordered_mode(frame))
+ return conf->cursor < conf->acount;
+ /*
+ * parallel writes
+ */
+ return 0;
+}
+
+static int32_t should_submit_data_after_hole(crypt_local_t *local)
+{
+ return local->data_conf.avec != NULL;
+}
+
+static void update_local_file_params(call_frame_t *frame,
+ xlator_t *this,
+ struct iatt *prebuf,
+ struct iatt *postbuf)
+{
+ crypt_local_t *local = frame->local;
+
+ check_buf(frame, this, postbuf);
+
+ local->prebuf = *prebuf;
+ local->postbuf = *postbuf;
+
+ local->prebuf.ia_size = local->cur_file_size;
+ local->postbuf.ia_size = local->new_file_size;
+
+ local->cur_file_size = local->new_file_size;
+}
+
+static int32_t end_writeback_writev(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret <= 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "writev iteration failed");
+ goto put_one_call;
+ }
+ /*
+ * op_ret includes paddings (atom's head, atom's tail and EOF)
+ */
+ if (op_ret < local->io_size) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Incomplete writev iteration");
+ goto put_one_call;
+ }
+ op_ret -= local->eof_padding_size;
+ local->op_ret = op_ret;
+
+ update_local_file_params(frame, this, prebuf, postbuf);
+
+ if (data_write_in_progress(local)) {
+
+ LOCK(&local->rw_count_lock);
+ local->rw_count += op_ret;
+ UNLOCK(&local->rw_count_lock);
+
+ if (should_resume_submit_data(frame))
+ submit_data(frame, this);
+ }
+ else {
+ /*
+ * hole conversion is going on;
+ * don't take into account written zeros
+ */
+ if (should_resume_submit_hole(local))
+ submit_hole(frame, this);
+
+ else if (should_submit_data_after_hole(local))
+ submit_data(frame, this);
+ }
+ put_one_call:
+ put_one_call_writev(frame, this);
+ return 0;
+}
+
+#define crypt_writev_cbk end_writeback_writev
+
+#define HOLE_WRITE_CHUNK_BITS 12
+#define HOLE_WRITE_CHUNK_SIZE (1 << HOLE_WRITE_CHUNK_BITS)
+
+/*
+ * Convert hole of size @size at offset @off to
+ * zeros and prepare respective iovecs for submit.
+ * The hole lock should be held.
+ *
+ * Pre-conditions:
+ * @local->file_size is set and valid.
+ */
+int32_t prepare_for_submit_hole(call_frame_t *frame, xlator_t *this,
+ uint64_t off, off_t size)
+{
+ int32_t ret;
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+
+ set_config_offsets(frame, this, off, size, HOLE_ATOM, 1);
+
+ ret = set_config_avec_hole(this, local,
+ &local->hole_conf, object, local->fop);
+ crypt_check_conf(&local->hole_conf);
+
+ return ret;
+}
+
+/*
+ * prepare for submit @count bytes at offset @from
+ */
+int32_t prepare_for_submit_data(call_frame_t *frame, xlator_t *this,
+ off_t from, int32_t size, struct iovec *vec,
+ int32_t vec_count, int32_t setup_gap)
+{
+ uint32_t ret;
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+
+ set_config_offsets(frame, this, from, size,
+ DATA_ATOM, setup_gap);
+
+ ret = set_config_avec_data(this, local,
+ &local->data_conf, object, vec, vec_count);
+ crypt_check_conf(&local->data_conf);
+
+ return ret;
+}
+
+static void free_avec(struct iovec *avec,
+ char **pool, int blocks_in_pool)
+{
+ if (!avec)
+ return;
+ GF_FREE(pool);
+ GF_FREE(avec);
+}
+
+static void free_avec_data(crypt_local_t *local)
+{
+ return free_avec(local->data_conf.avec,
+ local->data_conf.pool,
+ local->data_conf.blocks_in_pool);
+}
+
+static void free_avec_hole(crypt_local_t *local)
+{
+ return free_avec(local->hole_conf.avec,
+ local->hole_conf.pool,
+ local->hole_conf.blocks_in_pool);
+}
+
+
+static void do_parallel_submit(call_frame_t *frame, xlator_t *this,
+ atom_data_type dtype)
+{
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf;
+
+ local->active_setup = dtype;
+ conf = conf_by_type(frame, dtype);
+
+ if (has_head_block(conf))
+ submit_head(frame, this);
+
+ if (has_full_blocks(conf))
+ submit_full(frame, this);
+
+ if (has_tail_block(conf))
+ submit_tail(frame, this);
+ return;
+}
+
+static void do_ordered_submit(call_frame_t *frame, xlator_t *this,
+ atom_data_type dtype)
+{
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf;
+
+ local->active_setup = dtype;
+ conf = conf_by_type(frame, dtype);
+
+ if (should_submit_head_block(conf)) {
+ get_one_call_nolock(frame);
+ submit_head(frame, this);
+ }
+ else if (should_submit_full_block(conf)) {
+ get_one_call_nolock(frame);
+ submit_full(frame, this);
+ }
+ else if (should_submit_tail_block(conf)) {
+ get_one_call_nolock(frame);
+ submit_tail(frame, this);
+ }
+ else
+ gf_log("crypt", GF_LOG_DEBUG,
+ "nothing has been submitted in ordered mode");
+ return;
+}
+
+static int32_t do_writev(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict,
+ dict_t *xdata)
+{
+ data_t *data;
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+ /*
+ * extract regular file size
+ */
+ data = dict_get(dict, FSIZE_XATTR_PREFIX);
+ if (!data) {
+ gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
+ op_ret = -1;
+ op_errno = EIO;
+ goto error;
+ }
+ local->old_file_size = local->cur_file_size = data_to_uint64(data);
+
+ set_gap_at_end(frame, object, &local->data_conf, DATA_ATOM);
+
+ if (local->cur_file_size < local->data_conf.orig_offset) {
+ /*
+ * Set up hole config
+ */
+ op_errno = prepare_for_submit_hole(frame,
+ this,
+ local->cur_file_size,
+ local->data_conf.orig_offset - local->cur_file_size);
+ if (op_errno) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto error;
+ }
+ }
+ if (should_submit_hole(local))
+ submit_hole(frame, this);
+ else
+ submit_data(frame, this);
+ return 0;
+ error:
+ get_one_call_nolock(frame);
+ put_one_call_writev(frame, this);
+ return 0;
+}
+
+static int32_t crypt_writev_finodelk_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0)
+ goto error;
+ /*
+ * An access has been granted,
+ * retrieve file size first
+ */
+ STACK_WIND(frame,
+ do_writev,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ local->fd,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ return 0;
+ error:
+ get_one_call_nolock(frame);
+ put_one_call_writev(frame, this);
+ return 0;
+}
+
+static int32_t writev_trivial_completion(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *buf,
+ dict_t *dict)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ local->prebuf = *buf;
+ local->postbuf = *buf;
+
+ local->prebuf.ia_size = local->cur_file_size;
+ local->postbuf.ia_size = local->cur_file_size;
+
+ get_one_call(frame);
+ put_one_call_writev(frame, this);
+ return 0;
+}
+
+int crypt_writev(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iovec *vec,
+ int32_t count,
+ off_t offset,
+ uint32_t flags,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ int32_t ret;
+ crypt_local_t *local;
+ struct crypt_inode_info *info;
+ struct gf_flock lock = {0, };
+#if DEBUG_CRYPT
+ gf_log ("crypt", GF_LOG_DEBUG, "writing %d bytes from offset %llu",
+ (int)iovec_get_size(vec, count), (long long)offset);
+#endif
+ local = crypt_alloc_local(frame, this, GF_FOP_WRITE);
+ if (!local) {
+ ret = ENOMEM;
+ goto error;
+ }
+ local->fd = fd_ref(fd);
+
+ if (iobref)
+ local->iobref = iobref_ref(iobref);
+ /*
+ * to update real file size on the server
+ */
+ local->xattr = dict_new();
+ if (!local->xattr) {
+ ret = ENOMEM;
+ goto error;
+ }
+ local->flags = flags;
+
+ info = local_get_inode_info(local, this);
+ if (info == NULL) {
+ ret = EINVAL;
+ goto error;
+ }
+ if (!object_alg_atomic(&info->cinfo)) {
+ ret = EINVAL;
+ goto error;
+ }
+ if (iovec_get_size(vec, count) == 0)
+ goto trivial;
+
+ ret = prepare_for_submit_data(frame, this, offset,
+ iovec_get_size(vec, count),
+ vec, count, 0 /* don't setup gup
+ in tail: we don't
+ know file size yet */);
+ if (ret)
+ goto error;
+
+ if (parent_is_crypt_xlator(frame, this)) {
+ data_t *data;
+ /*
+ * we are called by shinking crypt_ftruncate(),
+ * which doesn't perform hole conversion;
+ *
+ * don't ask for access:
+ * it has already been acquired
+ */
+
+ /*
+ * extract file size
+ */
+ if (!xdata) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "Regular file size hasn't been passed");
+ ret = EIO;
+ goto error;
+ }
+ data = dict_get(xdata, FSIZE_XATTR_PREFIX);
+ if (!data) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "Regular file size not found");
+ ret = EIO;
+ goto error;
+ }
+ local->old_file_size =
+ local->cur_file_size = data_to_uint64(data);
+
+ submit_data(frame, this);
+ return 0;
+ }
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+ /*
+ * lock the file and retrieve its size
+ */
+ lock.l_len = 0;
+ lock.l_start = 0;
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+
+ STACK_WIND(frame,
+ crypt_writev_finodelk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ trivial:
+ STACK_WIND(frame,
+ writev_trivial_completion,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat,
+ fd,
+ NULL);
+ return 0;
+ error:
+ if (local && local->fd)
+ fd_unref(fd);
+ if (local && local->iobref)
+ iobref_unref(iobref);
+ if (local && local->xdata)
+ dict_unref(xdata);
+ if (local && local->xattr)
+ dict_unref(local->xattr);
+ if (local && local->info)
+ free_inode_info(local->info);
+
+ STACK_UNWIND_STRICT(writev, frame, -1, ret, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t prepare_for_prune(call_frame_t *frame, xlator_t *this, uint64_t offset)
+{
+ set_config_offsets(frame, this,
+ offset,
+ 0, /* count */
+ DATA_ATOM,
+ 0 /* since we prune, there is no
+ gap in tail to uptodate */);
+ return 0;
+}
+
+/*
+ * Finish the read-prune-modify sequence
+ *
+ * Can be invoked as
+ * 1) ->ftruncate_cbk() for cblock-aligned, or trivial prune
+ * 2) ->writev_cbk() for non-cblock-aligned prune
+ */
+
+static int32_t prune_complete(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ update_local_file_params(frame, this, prebuf, postbuf);
+
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+/*
+ * This is called as ->ftruncate_cbk()
+ *
+ * Perform the "write" component of the
+ * read-prune-write sequence.
+ *
+ * submuit the rest of the file
+ */
+static int32_t prune_submit_file_tail(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = &local->data_conf;
+ dict_t *dict;
+
+ if (op_ret < 0)
+ goto put_one_call;
+
+ if (local->xdata) {
+ dict_unref(local->xdata);
+ local->xdata = NULL;
+ }
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ dict = dict_new();
+ if (!dict) {
+ op_errno = ENOMEM;
+ goto error;
+ }
+
+ update_local_file_params(frame, this, prebuf, postbuf);
+ local->new_file_size = conf->orig_offset;
+
+ /*
+ * The rest of the file is a partial block and, hence,
+ * should be written via RMW sequence, so the crypt xlator
+ * does STACK_WIND to itself.
+ *
+ * Pass current file size to crypt_writev()
+ */
+ op_errno = dict_set(dict,
+ FSIZE_XATTR_PREFIX,
+ data_from_uint64(local->cur_file_size));
+ if (op_errno) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "can not set key to update file size");
+ dict_unref(dict);
+ goto error;
+ }
+ gf_log("crypt", GF_LOG_DEBUG,
+ "passing current file size (%llu) to crypt_writev",
+ (unsigned long long)local->cur_file_size);
+ /*
+ * Padding will be filled with
+ * zeros by rmw_partial_block()
+ */
+ STACK_WIND(frame,
+ prune_complete,
+ this,
+ this->fops->writev, /* crypt_writev */
+ local->fd,
+ &local->vec,
+ 1,
+ conf->aligned_offset, /* offset to write from */
+ 0,
+ local->iobref,
+ dict);
+
+ dict_unref(dict);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ put_one_call:
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+/*
+ * This is called as a callback of ->writev() invoked in behalf
+ * of ftruncate(): it can be
+ * 1) ordered writes issued by hole conversion in the case of
+ * expanded truncate, or
+ * 2) an rmw partial data block issued by non-cblock-aligned
+ * prune.
+ */
+int32_t end_writeback_ftruncate(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ /*
+ * if nothing has been written,
+ * then it must be an error
+ */
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0)
+ goto put_one_call;
+
+ update_local_file_params(frame, this, prebuf, postbuf);
+
+ if (data_write_in_progress(local))
+ /* case (2) */
+ goto put_one_call;
+ /* case (1) */
+ if (should_resume_submit_hole(local))
+ submit_hole(frame, this);
+ /*
+ * case of hole, when we should't resume
+ */
+ put_one_call:
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+/*
+ * Perform prune and write components of the
+ * read-prune-write sequence.
+ *
+ * Called as ->readv_cbk()
+ *
+ * Pre-conditions:
+ * @vec contains the latest atom of the file
+ * (plain text)
+ */
+static int32_t prune_write(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ int32_t i;
+ size_t to_copy;
+ size_t copied = 0;
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = &local->data_conf;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ if (op_ret == -1)
+ goto put_one_call;
+
+ /*
+ * At first, uptodate head block
+ */
+ if (iovec_get_size(vec, count) < conf->off_in_head) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Failed to uptodate head block for prune");
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto put_one_call;
+ }
+ local->vec.iov_len = conf->off_in_head;
+ local->vec.iov_base = GF_CALLOC(1, local->vec.iov_len,
+ gf_crypt_mt_data);
+
+ if (local->vec.iov_base == NULL) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Failed to calloc head block for prune");
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto put_one_call;
+ }
+ for (i = 0; i < count; i++) {
+ to_copy = vec[i].iov_len;
+ if (to_copy > local->vec.iov_len - copied)
+ to_copy = local->vec.iov_len - copied;
+
+ memcpy((char *)local->vec.iov_base + copied,
+ vec[i].iov_base,
+ to_copy);
+ copied += to_copy;
+ if (copied == local->vec.iov_len)
+ break;
+ }
+ /*
+ * perform prune with aligned offset
+ * (i.e. at this step we prune a bit
+ * more then it is needed
+ */
+ STACK_WIND(frame,
+ prune_submit_file_tail,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate,
+ local->fd,
+ conf->aligned_offset,
+ local->xdata);
+ return 0;
+ put_one_call:
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+/*
+ * Perform a read-prune-write sequence
+ */
+int32_t read_prune_write(call_frame_t *frame, xlator_t *this)
+{
+ int32_t ret = 0;
+ dict_t *dict = NULL;
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = &local->data_conf;
+ struct object_cipher_info *object = &local->info->cinfo;
+
+ set_local_io_params_ftruncate(frame, object);
+ get_one_call_nolock(frame);
+
+ if ((conf->orig_offset & (object_alg_blksize(object) - 1)) == 0) {
+ /*
+ * cblock-aligned prune:
+ * we don't need read and write components,
+ * just cut file body
+ */
+ gf_log("crypt", GF_LOG_DEBUG,
+ "prune without RMW (at offset %llu",
+ (unsigned long long)conf->orig_offset);
+
+ STACK_WIND(frame,
+ prune_complete,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate,
+ local->fd,
+ conf->orig_offset,
+ local->xdata);
+ return 0;
+ }
+ gf_log("crypt", GF_LOG_DEBUG,
+ "prune with RMW (at offset %llu",
+ (unsigned long long)conf->orig_offset);
+ /*
+ * We are about to perform the "read" component of the
+ * read-prune-write sequence. It means that we need to
+ * read encrypted data from disk and decrypt it.
+ * So, the crypt translator does STACK_WIND to itself.
+ *
+ * Pass current file size to crypt_readv()
+
+ */
+ dict = dict_new();
+ if (!dict) {
+ gf_log("crypt", GF_LOG_WARNING, "Can not alloc dict");
+ ret = ENOMEM;
+ goto exit;
+ }
+ ret = dict_set(dict,
+ FSIZE_XATTR_PREFIX,
+ data_from_uint64(local->cur_file_size));
+ if (ret) {
+ gf_log("crypt", GF_LOG_WARNING, "Can not set dict");
+ goto exit;
+ }
+ STACK_WIND(frame,
+ prune_write,
+ this,
+ this->fops->readv, /* crypt_readv */
+ local->fd,
+ get_atom_size(object), /* bytes to read */
+ conf->aligned_offset, /* offset to read from */
+ 0,
+ dict);
+ exit:
+ if (dict)
+ dict_unref(dict);
+ return ret;
+}
+
+/*
+ * File prune is more complicated than expand.
+ * First we need to read the latest atom to not lose info
+ * needed for proper update. Also we need to make sure that
+ * every component of read-prune-write sequence leaves data
+ * consistent
+ *
+ * Non-cblock aligned prune is performed as read-prune-write
+ * sequence:
+ *
+ * 1) read the latest atom;
+ * 2) perform cblock-aligned prune
+ * 3) issue a write request for the end-of-file
+ */
+int32_t prune_file(call_frame_t *frame, xlator_t *this, uint64_t offset)
+{
+ int32_t ret;
+
+ ret = prepare_for_prune(frame, this, offset);
+ if (ret)
+ return ret;
+ return read_prune_write(frame, this);
+}
+
+int32_t expand_file(call_frame_t *frame, xlator_t *this,
+ uint64_t offset)
+{
+ int32_t ret;
+ crypt_local_t *local = frame->local;
+
+ ret = prepare_for_submit_hole(frame, this,
+ local->old_file_size,
+ offset - local->old_file_size);
+ if (ret)
+ return ret;
+ submit_hole(frame, this);
+ return 0;
+}
+
+static int32_t ftruncate_trivial_completion(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *buf,
+ dict_t *dict)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ local->prebuf = *buf;
+ local->postbuf = *buf;
+
+ local->prebuf.ia_size = local->cur_file_size;
+ local->postbuf.ia_size = local->cur_file_size;
+
+ get_one_call(frame);
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+static int32_t do_ftruncate(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict,
+ dict_t *xdata)
+{
+ data_t *data;
+ crypt_local_t *local = frame->local;
+
+ if (op_ret)
+ goto error;
+ /*
+ * extract regular file size
+ */
+ data = dict_get(dict, FSIZE_XATTR_PREFIX);
+ if (!data) {
+ gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
+ op_errno = EIO;
+ goto error;
+ }
+ local->old_file_size = local->cur_file_size = data_to_uint64(data);
+
+ if (local->data_conf.orig_offset == local->cur_file_size) {
+#if DEBUG_CRYPT
+ gf_log("crypt", GF_LOG_DEBUG,
+ "trivial ftruncate (current file size %llu)",
+ (unsigned long long)local->cur_file_size);
+#endif
+ goto trivial;
+ }
+ else if (local->data_conf.orig_offset < local->cur_file_size) {
+#if DEBUG_CRYPT
+ gf_log("crypt", GF_LOG_DEBUG, "prune from %llu to %llu",
+ (unsigned long long)local->cur_file_size,
+ (unsigned long long)local->data_conf.orig_offset);
+#endif
+ op_errno = prune_file(frame,
+ this,
+ local->data_conf.orig_offset);
+ }
+ else {
+#if DEBUG_CRYPT
+ gf_log("crypt", GF_LOG_DEBUG, "expand from %llu to %llu",
+ (unsigned long long)local->cur_file_size,
+ (unsigned long long)local->data_conf.orig_offset);
+#endif
+ op_errno = expand_file(frame,
+ this,
+ local->data_conf.orig_offset);
+ }
+ if (op_errno)
+ goto error;
+ return 0;
+ trivial:
+ STACK_WIND(frame,
+ ftruncate_trivial_completion,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat,
+ local->fd,
+ NULL);
+ return 0;
+ error:
+ /*
+ * finish with ftruncate
+ */
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+
+ get_one_call_nolock(frame);
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+static int32_t crypt_ftruncate_finodelk_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0)
+ goto error;
+ /*
+ * An access has been granted,
+ * retrieve file size first
+ */
+ STACK_WIND(frame,
+ do_ftruncate,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ local->fd,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ return 0;
+ error:
+ get_one_call_nolock(frame);
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+/*
+ * ftruncate is performed in 2 steps:
+ * . receive file size;
+ * . expand or prune file.
+ */
+static int32_t crypt_ftruncate(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset,
+ dict_t *xdata)
+{
+ int32_t ret;
+ crypt_local_t *local;
+ struct crypt_inode_info *info;
+ struct gf_flock lock = {0, };
+
+ local = crypt_alloc_local(frame, this, GF_FOP_FTRUNCATE);
+ if (!local) {
+ ret = ENOMEM;
+ goto error;
+ }
+ local->xattr = dict_new();
+ if (!local->xattr) {
+ ret = ENOMEM;
+ goto error;
+ }
+ local->fd = fd_ref(fd);
+ info = local_get_inode_info(local, this);
+ if (info == NULL) {
+ ret = EINVAL;
+ goto error;
+ }
+ if (!object_alg_atomic(&info->cinfo)) {
+ ret = EINVAL;
+ goto error;
+ }
+ local->data_conf.orig_offset = offset;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ lock.l_len = 0;
+ lock.l_start = 0;
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+
+ STACK_WIND(frame,
+ crypt_ftruncate_finodelk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ error:
+ if (local && local->fd)
+ fd_unref(fd);
+ if (local && local->xdata)
+ dict_unref(xdata);
+ if (local && local->xattr)
+ dict_unref(local->xattr);
+ if (local && local->info)
+ free_inode_info(local->info);
+
+ STACK_UNWIND_STRICT(ftruncate, frame, -1, ret, NULL, NULL, NULL);
+ return 0;
+}
+
+/* ->flush_cbk() */
+int32_t truncate_end(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ STACK_UNWIND_STRICT(truncate,
+ frame,
+ op_ret,
+ op_errno,
+ &local->prebuf,
+ &local->postbuf,
+ local->xdata);
+ return 0;
+}
+
+/* ftruncate_cbk() */
+int32_t truncate_flush(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ fd_t *fd = local->fd;
+ local->prebuf = *prebuf;
+ local->postbuf = *postbuf;
+
+ STACK_WIND(frame,
+ truncate_end,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush,
+ fd,
+ NULL);
+ fd_unref(fd);
+ return 0;
+}
+
+/*
+ * is called as ->open_cbk()
+ */
+static int32_t truncate_begin(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0) {
+ fd_unref(fd);
+ STACK_UNWIND_STRICT(truncate,
+ frame,
+ op_ret,
+ op_errno, NULL, NULL, NULL);
+ return 0;
+ } else {
+ fd_bind (fd);
+ }
+ /*
+ * crypt_truncate() is implemented via crypt_ftruncate(),
+ * so the crypt xlator does STACK_WIND to itself here
+ */
+ STACK_WIND(frame,
+ truncate_flush,
+ this,
+ this->fops->ftruncate, /* crypt_ftruncate */
+ fd,
+ local->offset,
+ NULL);
+ return 0;
+}
+
+/*
+ * crypt_truncate() is implemented via crypt_ftruncate() as a
+ * sequence crypt_open() - crypt_ftruncate() - truncate_flush()
+ */
+int32_t crypt_truncate(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ off_t offset,
+ dict_t *xdata)
+{
+ fd_t *fd;
+ crypt_local_t *local;
+
+#if DEBUG_CRYPT
+ gf_log(this->name, GF_LOG_DEBUG,
+ "truncate file %s at offset %llu",
+ loc->path, (unsigned long long)offset);
+#endif
+ local = crypt_alloc_local(frame, this, GF_FOP_TRUNCATE);
+ if (!local)
+ goto error;
+
+ fd = fd_create(loc->inode, frame->root->pid);
+ if (!fd) {
+ gf_log(this->name, GF_LOG_ERROR, "Can not create fd");
+ goto error;
+ }
+ local->fd = fd;
+ local->offset = offset;
+ local->xdata = xdata;
+ STACK_WIND(frame,
+ truncate_begin,
+ this,
+ this->fops->open, /* crypt_open() */
+ loc,
+ O_RDWR,
+ fd,
+ NULL);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(truncate, frame, -1, EINVAL, NULL, NULL, NULL);
+ return 0;
+}
+
+end_writeback_handler_t dispatch_end_writeback(glusterfs_fop_t fop)
+{
+ switch (fop) {
+ case GF_FOP_WRITE:
+ return end_writeback_writev;
+ case GF_FOP_FTRUNCATE:
+ return end_writeback_ftruncate;
+ default:
+ gf_log("crypt", GF_LOG_WARNING, "Bad wb operation %d", fop);
+ return NULL;
+ }
+}
+
+/*
+ * true, if the caller needs metadata string
+ */
+static int32_t is_custom_mtd(dict_t *xdata)
+{
+ data_t *data;
+ uint32_t flags;
+
+ if (!xdata)
+ return 0;
+
+ data = dict_get(xdata, MSGFLAGS_PREFIX);
+ if (!data)
+ return 0;
+ if (data->len != sizeof(uint32_t)) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "Bad msgflags size (%d)", data->len);
+ return -1;
+ }
+ flags = *((uint32_t *)data->data);
+ return msgflags_check_mtd_lock(&flags);
+}
+
+static int32_t crypt_open_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ if (op_ret < 0)
+ gf_log(this->name, GF_LOG_WARNING, "mtd unlock failed (%d)",
+ op_errno);
+ put_one_call_open(frame);
+ return 0;
+}
+
+static void crypt_open_tail(call_frame_t *frame, xlator_t *this)
+{
+ struct gf_flock lock = {0, };
+ crypt_local_t *local = frame->local;
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND(frame,
+ crypt_open_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+}
+
+/*
+ * load private inode info at open time
+ * called as ->fgetxattr_cbk()
+ */
+static int load_mtd_open(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict,
+ dict_t *xdata)
+{
+ int32_t ret;
+ gf_boolean_t upload_info;
+ data_t *mtd;
+ uint64_t value = 0;
+ struct crypt_inode_info *info;
+ crypt_local_t *local = frame->local;
+ crypt_private_t *priv = this->private;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (local->fd->inode->ia_type == IA_IFLNK)
+ goto exit;
+ if (op_ret < 0)
+ goto exit;
+ /*
+ * first, check for cached info
+ */
+ ret = inode_ctx_get(local->fd->inode, this, &value);
+ if (ret != -1) {
+ info = (struct crypt_inode_info *)(long)value;
+ if (info == NULL) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Inode info expected, but not found");
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto exit;
+ }
+ /*
+ * info has been found in the cache
+ */
+ upload_info = _gf_false;
+ }
+ else {
+ /*
+ * info hasn't been found in the cache.
+ */
+ info = alloc_inode_info(local, local->loc);
+ if (!info) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto exit;
+ }
+ init_inode_info_head(info, local->fd);
+ upload_info = _gf_true;
+ }
+ /*
+ * extract metadata
+ */
+ mtd = dict_get(dict, CRYPTO_FORMAT_PREFIX);
+ if (!mtd) {
+ local->op_ret = -1;
+ local->op_errno = ENOENT;
+ gf_log (this->name, GF_LOG_WARNING,
+ "Format string wasn't found");
+ goto exit;
+ }
+ /*
+ * authenticate metadata against the path
+ */
+ ret = open_format((unsigned char *)mtd->data,
+ mtd->len,
+ local->loc,
+ info,
+ get_master_cinfo(priv),
+ local,
+ upload_info);
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = ret;
+ goto exit;
+ }
+ if (upload_info) {
+ ret = init_inode_info_tail(info, get_master_cinfo(priv));
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = ret;
+ goto exit;
+ }
+ ret = inode_ctx_put(local->fd->inode,
+ this, (uint64_t)(long)info);
+ if (ret == -1) {
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto exit;
+ }
+ }
+ if (local->custom_mtd) {
+ /*
+ * pass the metadata string to the customer
+ */
+ ret = dict_set_static_bin(local->xdata,
+ CRYPTO_FORMAT_PREFIX,
+ mtd->data,
+ mtd->len);
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = ret;
+ goto exit;
+ }
+ }
+ exit:
+ if (!local->custom_mtd)
+ crypt_open_tail(frame, this);
+ else
+ put_one_call_open(frame);
+ return 0;
+}
+
+static int32_t crypt_open_finodelk_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING, "finodelk (LOCK) failed");
+ goto exit;
+ }
+ STACK_WIND(frame,
+ load_mtd_open,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ local->fd,
+ CRYPTO_FORMAT_PREFIX,
+ NULL);
+ return 0;
+ exit:
+ put_one_call_open(frame);
+ return 0;
+}
+
+/*
+ * verify metadata against the specified pathname
+ */
+static int32_t crypt_open_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd,
+ dict_t *xdata)
+{
+ struct gf_flock lock = {0, };
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (local->fd->inode->ia_type == IA_IFLNK)
+ goto exit;
+ if (op_ret < 0)
+ goto exit;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+ else if (local->custom_mtd){
+ local->xdata = dict_new();
+ if (!local->xdata) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ gf_log ("crypt", GF_LOG_ERROR,
+ "Can not get new dict for mtd string");
+ goto exit;
+ }
+ }
+ lock.l_len = 0;
+ lock.l_start = 0;
+ lock.l_type = local->custom_mtd ? F_WRLCK : F_RDLCK;
+ lock.l_whence = SEEK_SET;
+
+ STACK_WIND(frame,
+ crypt_open_finodelk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ exit:
+ put_one_call_open(frame);
+ return 0;
+}
+
+static int32_t crypt_open(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags,
+ fd_t *fd,
+ dict_t *xdata)
+{
+ int32_t ret = ENOMEM;
+ crypt_local_t *local;
+
+ local = crypt_alloc_local(frame, this, GF_FOP_OPEN);
+ if (!local)
+ goto error;
+ local->loc = GF_CALLOC(1, sizeof(*loc), gf_crypt_mt_loc);
+ if (!local->loc) {
+ ret = ENOMEM;
+ goto error;
+ }
+ memset(local->loc, 0, sizeof(*local->loc));
+ ret = loc_copy(local->loc, loc);
+ if (ret) {
+ GF_FREE(local->loc);
+ goto error;
+ }
+ local->fd = fd_ref(fd);
+
+ ret = is_custom_mtd(xdata);
+ if (ret < 0) {
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ ret = EINVAL;
+ goto error;
+ }
+ local->custom_mtd = ret;
+
+ if ((flags & O_ACCMODE) == O_WRONLY)
+ /*
+ * we can't open O_WRONLY, because
+ * we need to do read-modify-write
+ */
+ flags = (flags & ~O_ACCMODE) | O_RDWR;
+ /*
+ * Make sure that out translated offsets
+ * and counts won't be ignored
+ */
+ flags &= ~O_APPEND;
+ get_one_call_nolock(frame);
+ STACK_WIND(frame,
+ crypt_open_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open,
+ loc,
+ flags,
+ fd,
+ xdata);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(open,
+ frame,
+ -1,
+ ret,
+ NULL,
+ NULL);
+ return 0;
+}
+
+static int32_t init_inode_info_tail(struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ int32_t ret;
+ struct object_cipher_info *object = &info->cinfo;
+
+#if DEBUG_CRYPT
+ gf_log("crypt", GF_LOG_DEBUG, "Init inode info for object %s",
+ uuid_utoa(info->oid));
+#endif
+ ret = data_cipher_algs[object->o_alg][object->o_mode].set_private(info,
+ master);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, "Set private info failed");
+ return ret;
+ }
+ return 0;
+}
+
+/*
+ * Init inode info at ->create() time
+ */
+static void init_inode_info_create(struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ data_t *data)
+{
+ struct object_cipher_info *object;
+
+ info->nr_minor = CRYPT_XLATOR_ID;
+ memcpy(info->oid, data->data, data->len);
+
+ object = &info->cinfo;
+
+ object->o_alg = master->m_alg;
+ object->o_mode = master->m_mode;
+ object->o_block_bits = master->m_block_bits;
+ object->o_dkey_size = master->m_dkey_size;
+}
+
+static void init_inode_info_head(struct crypt_inode_info *info, fd_t *fd)
+{
+ memcpy(info->oid, fd->inode->gfid, sizeof(uuid_t));
+}
+
+static int32_t crypt_create_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ crypt_private_t *priv = this->private;
+ crypt_local_t *local = frame->local;
+ struct crypt_inode_info *info = local->info;
+ fd_t *local_fd = local->fd;
+ dict_t *local_xdata = local->xdata;
+ inode_t *local_inode = local->inode;
+
+ if (op_ret < 0) {
+ free_inode_info(info);
+ goto unwind;
+ }
+ op_errno = init_inode_info_tail(info, get_master_cinfo(priv));
+ if (op_errno) {
+ op_ret = -1;
+ free_inode_info(info);
+ goto unwind;
+ }
+ /*
+ * FIXME: drop major subversion number
+ */
+ op_ret = inode_ctx_put(local->fd->inode, this, (uint64_t)(long)info);
+ if (op_ret == -1) {
+ op_errno = EIO;
+ free_inode_info(info);
+ goto unwind;
+ }
+ unwind:
+ free_format(local);
+ STACK_UNWIND_STRICT(create,
+ frame,
+ op_ret,
+ op_errno,
+ local_fd,
+ local_inode,
+ &local->buf,
+ &local->prebuf,
+ &local->postbuf,
+ local_xdata);
+ fd_unref(local_fd);
+ inode_unref(local_inode);
+ if (local_xdata)
+ dict_unref(local_xdata);
+ return 0;
+}
+
+static int crypt_create_tail(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ struct gf_flock lock = {0, };
+ crypt_local_t *local = frame->local;
+ fd_t *local_fd = local->fd;
+ dict_t *local_xdata = local->xdata;
+ inode_t *local_inode = local->inode;
+
+ dict_unref(local->xattr);
+
+ if (op_ret < 0)
+ goto error;
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND(frame,
+ crypt_create_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ error:
+ free_inode_info(local->info);
+ free_format(local);
+
+ STACK_UNWIND_STRICT(create,
+ frame,
+ op_ret,
+ op_errno,
+ local_fd,
+ local_inode,
+ &local->buf,
+ &local->prebuf,
+ &local->postbuf,
+ local_xdata);
+
+ fd_unref(local_fd);
+ inode_unref(local_inode);
+ if (local_xdata)
+ dict_unref(local_xdata);
+ return 0;
+}
+
+static int32_t crypt_create_finodelk_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ struct crypt_inode_info *info = local->info;
+
+ if (op_ret < 0)
+ goto error;
+
+ STACK_WIND(frame,
+ crypt_create_tail,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ local->fd,
+ local->xattr, /* CRYPTO_FORMAT_PREFIX */
+ 0,
+ NULL);
+ return 0;
+ error:
+ free_inode_info(info);
+ free_format(local);
+ fd_unref(local->fd);
+ dict_unref(local->xattr);
+ if (local->xdata)
+ dict_unref(local->xdata);
+
+ STACK_UNWIND_STRICT(create,
+ frame,
+ op_ret,
+ op_errno,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL);
+ return 0;
+}
+
+/*
+ * Create and store crypt-specific format on disk;
+ * Populate cache with private inode info
+ */
+static int32_t crypt_create_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd,
+ inode_t *inode,
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent,
+ dict_t *xdata)
+{
+ struct gf_flock lock = {0, };
+ crypt_local_t *local = frame->local;
+ struct crypt_inode_info *info = local->info;
+
+ if (op_ret < 0)
+ goto error;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+ local->inode = inode_ref(inode);
+ local->buf = *buf;
+ local->prebuf = *preparent;
+ local->postbuf = *postparent;
+
+ lock.l_len = 0;
+ lock.l_start = 0;
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+
+ STACK_WIND(frame,
+ crypt_create_finodelk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ error:
+ free_inode_info(info);
+ free_format(local);
+ fd_unref(local->fd);
+ dict_unref(local->xattr);
+
+ STACK_UNWIND_STRICT(create,
+ frame,
+ op_ret,
+ op_errno,
+ NULL, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
+}
+
+static int32_t crypt_create(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags,
+ mode_t mode,
+ mode_t umask,
+ fd_t *fd,
+ dict_t *xdata)
+{
+ int ret;
+ data_t *data;
+ crypt_local_t *local;
+ crypt_private_t *priv;
+ struct master_cipher_info *master;
+ struct crypt_inode_info *info;
+
+ priv = this->private;
+ master = get_master_cinfo(priv);
+
+ if (master_alg_atomic(master)) {
+ /*
+ * We can't open O_WRONLY, because we
+ * need to do read-modify-write.
+ */
+ if ((flags & O_ACCMODE) == O_WRONLY)
+ flags = (flags & ~O_ACCMODE) | O_RDWR;
+ /*
+ * Make sure that out translated offsets
+ * and counts won't be ignored
+ */
+ flags &= ~O_APPEND;
+ }
+ local = crypt_alloc_local(frame, this, GF_FOP_CREATE);
+ if (!local) {
+ ret = ENOMEM;
+ goto error;
+ }
+ data = dict_get(xdata, "gfid-req");
+ if (!data) {
+ ret = EINVAL;
+ gf_log("crypt", GF_LOG_WARNING, "gfid not found");
+ goto error;
+ }
+ if (data->len != sizeof(uuid_t)) {
+ ret = EINVAL;
+ gf_log("crypt", GF_LOG_WARNING,
+ "bad gfid size (%d), should be %d",
+ (int)data->len, (int)sizeof(uuid_t));
+ goto error;
+ }
+ info = alloc_inode_info(local, loc);
+ if (!info){
+ ret = ENOMEM;
+ goto error;
+ }
+ /*
+ * NOTE:
+ * format has to be created BEFORE
+ * proceeding to the untrusted server
+ */
+ ret = alloc_format_create(local);
+ if (ret) {
+ free_inode_info(info);
+ goto error;
+ }
+ init_inode_info_create(info, master, data);
+
+ ret = create_format(local->format,
+ loc,
+ info,
+ master);
+ if (ret) {
+ free_inode_info(info);
+ goto error;
+ }
+ local->xattr = dict_new();
+ if (!local->xattr) {
+ free_inode_info(info);
+ free_format(local);
+ goto error;
+ }
+ ret = dict_set_static_bin(local->xattr,
+ CRYPTO_FORMAT_PREFIX,
+ local->format,
+ new_format_size());
+ if (ret) {
+ dict_unref(local->xattr);
+ free_inode_info(info);
+ free_format(local);
+ goto error;
+ }
+ ret = dict_set(local->xattr, FSIZE_XATTR_PREFIX, data_from_uint64(0));
+ if (ret) {
+ dict_unref(local->xattr);
+ free_inode_info(info);
+ free_format(local);
+ goto error;
+ }
+ local->fd = fd_ref(fd);
+
+ STACK_WIND(frame,
+ crypt_create_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create,
+ loc,
+ flags,
+ mode,
+ umask,
+ fd,
+ xdata);
+ return 0;
+ error:
+ gf_log("crypt", GF_LOG_WARNING, "can not create file");
+ STACK_UNWIND_STRICT(create,
+ frame,
+ -1,
+ ret,
+ NULL, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
+}
+
+/*
+ * FIXME: this should depends on the version of format string
+ */
+static int32_t filter_crypt_xattr(dict_t *dict,
+ char *key, data_t *value, void *data)
+{
+ dict_del(dict, key);
+ return 0;
+}
+
+static int32_t crypt_fsetxattr(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ dict_foreach_fnmatch(dict, "trusted.glusterfs.crypt*",
+ filter_crypt_xattr, NULL);
+ STACK_WIND(frame,
+ default_fsetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ fd,
+ dict,
+ flags,
+ xdata);
+ return 0;
+}
+
+/*
+ * TBD: verify file metadata before wind
+ */
+static int32_t crypt_setxattr(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ dict_foreach_fnmatch(dict, "trusted.glusterfs.crypt*",
+ filter_crypt_xattr, NULL);
+ STACK_WIND(frame,
+ default_setxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ loc,
+ dict,
+ flags,
+ xdata);
+ return 0;
+}
+
+/*
+ * called as flush_cbk()
+ */
+static int32_t linkop_end(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ linkop_unwind_handler_t unwind_fn;
+ unwind_fn = linkop_unwind_dispatch(local->fop);
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0 &&
+ op_errno == ENOENT &&
+ local->loc->inode->ia_type == IA_IFLNK) {
+ local->op_ret = 0;
+ local->op_errno = 0;
+ }
+ unwind_fn(frame);
+ return 0;
+}
+
+/*
+ * unpin inode on the server
+ */
+static int32_t link_flush(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto error;
+ if (local->xdata) {
+ dict_unref(local->xdata);
+ local->xdata = NULL;
+ }
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+ local->inode = inode_ref(inode);
+ local->buf = *buf;
+ local->prebuf = *preparent;
+ local->postbuf = *postparent;
+
+ STACK_WIND(frame,
+ linkop_end,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush,
+ local->fd,
+ NULL);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ link_unwind(frame);
+ return 0;
+}
+
+void link_unwind(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+ dict_t *xdata;
+ dict_t *xattr;
+ inode_t *inode;
+
+ if (!local) {
+ STACK_UNWIND_STRICT(link,
+ frame,
+ -1,
+ ENOMEM,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL);
+ return;
+ }
+ xdata = local->xdata;
+ xattr = local->xattr;
+ inode = local->inode;
+
+ if (local->loc){
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ }
+ if (local->newloc) {
+ loc_wipe(local->newloc);
+ GF_FREE(local->newloc);
+ }
+ if (local->fd)
+ fd_unref(local->fd);
+ if (local->format)
+ GF_FREE(local->format);
+
+ STACK_UNWIND_STRICT(link,
+ frame,
+ local->op_ret,
+ local->op_errno,
+ inode,
+ &local->buf,
+ &local->prebuf,
+ &local->postbuf,
+ xdata);
+ if (xdata)
+ dict_unref(xdata);
+ if (xattr)
+ dict_unref(xattr);
+ if (inode)
+ inode_unref(inode);
+}
+
+void link_wind(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+
+ STACK_WIND(frame,
+ link_flush,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link,
+ local->loc,
+ local->newloc,
+ local->xdata);
+}
+
+/*
+ * unlink()
+ */
+static int32_t unlink_flush(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto error;
+ local->prebuf = *preparent;
+ local->postbuf = *postparent;
+ if (local->xdata) {
+ dict_unref(local->xdata);
+ local->xdata = NULL;
+ }
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ STACK_WIND(frame,
+ linkop_end,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush,
+ local->fd,
+ NULL);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ unlink_unwind(frame);
+ return 0;
+}
+
+void unlink_unwind(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+ dict_t *xdata;
+ dict_t *xattr;
+
+ if (!local) {
+ STACK_UNWIND_STRICT(unlink,
+ frame,
+ -1,
+ ENOMEM,
+ NULL,
+ NULL,
+ NULL);
+ return;
+ }
+ xdata = local->xdata;
+ xattr = local->xattr;
+ if (local->loc){
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ }
+ if (local->fd)
+ fd_unref(local->fd);
+ if (local->format)
+ GF_FREE(local->format);
+
+ STACK_UNWIND_STRICT(unlink,
+ frame,
+ local->op_ret,
+ local->op_errno,
+ &local->prebuf,
+ &local->postbuf,
+ xdata);
+ if (xdata)
+ dict_unref(xdata);
+ if (xattr)
+ dict_unref(xattr);
+}
+
+void unlink_wind(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+
+ STACK_WIND(frame,
+ unlink_flush,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink,
+ local->loc,
+ local->flags,
+ local->xdata);
+}
+
+void rename_unwind(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+ dict_t *xdata;
+ dict_t *xattr;
+ struct iatt *prenewparent;
+ struct iatt *postnewparent;
+
+ if (!local) {
+ STACK_UNWIND_STRICT(rename,
+ frame,
+ -1,
+ ENOMEM,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL);
+ return;
+ }
+ xdata = local->xdata;
+ xattr = local->xattr;
+ prenewparent = local->prenewparent;
+ postnewparent = local->postnewparent;
+
+ if (local->loc){
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ }
+ if (local->newloc){
+ loc_wipe(local->newloc);
+ GF_FREE(local->newloc);
+ }
+ if (local->fd)
+ fd_unref(local->fd);
+ if (local->format)
+ GF_FREE(local->format);
+
+ STACK_UNWIND_STRICT(rename,
+ frame,
+ local->op_ret,
+ local->op_errno,
+ &local->buf,
+ &local->prebuf,
+ &local->postbuf,
+ prenewparent,
+ postnewparent,
+ xdata);
+ if (xdata)
+ dict_unref(xdata);
+ if (xattr)
+ dict_unref(xattr);
+ if (prenewparent)
+ GF_FREE(prenewparent);
+ if (postnewparent)
+ GF_FREE(postnewparent);
+}
+
+/*
+ * called as flush_cbk()
+ */
+static int32_t rename_end(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ rename_unwind(frame);
+ return 0;
+}
+
+static int32_t rename_flush(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *buf,
+ struct iatt *preoldparent,
+ struct iatt *postoldparent,
+ struct iatt *prenewparent,
+ struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto error;
+ dict_unref(local->xdata);
+ local->xdata = NULL;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ local->buf = *buf;
+ local->prebuf = *preoldparent;
+ local->postbuf = *postoldparent;
+ if (prenewparent) {
+ local->prenewparent = GF_CALLOC(1, sizeof(*prenewparent),
+ gf_crypt_mt_iatt);
+ if (!local->prenewparent) {
+ op_errno = ENOMEM;
+ goto error;
+ }
+ *local->prenewparent = *prenewparent;
+ }
+ if (postnewparent) {
+ local->postnewparent = GF_CALLOC(1, sizeof(*postnewparent),
+ gf_crypt_mt_iatt);
+ if (!local->postnewparent) {
+ op_errno = ENOMEM;
+ goto error;
+ }
+ *local->postnewparent = *postnewparent;
+ }
+ STACK_WIND(frame,
+ rename_end,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush,
+ local->fd,
+ NULL);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ rename_unwind(frame);
+ return 0;
+}
+
+void rename_wind(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+
+ STACK_WIND(frame,
+ rename_flush,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename,
+ local->loc,
+ local->newloc,
+ local->xdata);
+}
+
+static int32_t __do_linkop(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ linkop_wind_handler_t wind_fn;
+ linkop_unwind_handler_t unwind_fn;
+
+ wind_fn = linkop_wind_dispatch(local->fop);
+ unwind_fn = linkop_unwind_dispatch(local->fop);
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret >= 0)
+ wind_fn(frame, this);
+ else {
+ gf_log(this->name, GF_LOG_WARNING, "mtd unlock failed (%d)",
+ op_errno);
+ unwind_fn(frame);
+ }
+ return 0;
+}
+
+static int32_t do_linkop(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ struct gf_flock lock = {0, };
+ crypt_local_t *local = frame->local;
+ linkop_unwind_handler_t unwind_fn;
+
+ unwind_fn = linkop_unwind_dispatch(local->fop);
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if(op_ret < 0)
+ goto error;
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND(frame,
+ __do_linkop,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ error:
+ unwind_fn(frame);
+ return 0;
+}
+
+/*
+ * Update the metadata string (against the new pathname);
+ * submit the result
+ */
+static int32_t linkop_begin(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd,
+ dict_t *xdata)
+{
+ gf_boolean_t upload_info;
+ crypt_local_t *local = frame->local;
+ crypt_private_t *priv = this->private;
+ struct crypt_inode_info *info;
+ data_t *old_mtd;
+ uint32_t new_mtd_size;
+ uint64_t value = 0;
+ void (*unwind_fn)(call_frame_t *frame);
+ mtd_op_t mop;
+
+ unwind_fn = linkop_unwind_dispatch(local->fop);
+ mop = linkop_mtdop_dispatch(local->fop);
+
+ if (op_ret < 0) {
+ /*
+ * verification failed
+ */
+ goto error;
+ } else {
+ fd_bind (fd);
+ }
+
+ old_mtd = dict_get(xdata, CRYPTO_FORMAT_PREFIX);
+ if (!old_mtd) {
+ op_errno = EIO;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Metadata string wasn't found");
+ goto error;
+ }
+ new_mtd_size = format_size(mop, old_mtd->len);
+ op_errno = alloc_format(local, new_mtd_size);
+ if (op_errno)
+ goto error;
+ /*
+ * check for cached info
+ */
+ op_ret = inode_ctx_get(fd->inode, this, &value);
+ if (op_ret != -1) {
+ info = (struct crypt_inode_info *)(long)value;
+ if (info == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Inode info was not found");
+ op_errno = EINVAL;
+ goto error;
+ }
+ /*
+ * info was found in the cache
+ */
+ local->info = info;
+ upload_info = _gf_false;
+ }
+ else {
+ /*
+ * info wasn't found in the cache;
+ */
+ info = alloc_inode_info(local, local->loc);
+ if (!info)
+ goto error;
+ init_inode_info_head(info, fd);
+ local->info = info;
+ upload_info = _gf_true;
+ }
+ op_errno = open_format((unsigned char *)old_mtd->data,
+ old_mtd->len,
+ local->loc,
+ info,
+ get_master_cinfo(priv),
+ local,
+ upload_info);
+ if (op_errno)
+ goto error;
+ if (upload_info == _gf_true) {
+ op_errno = init_inode_info_tail(info,
+ get_master_cinfo(priv));
+ if (op_errno)
+ goto error;
+ op_errno = inode_ctx_put(fd->inode, this,
+ (uint64_t)(long)(info));
+ if (op_errno == -1) {
+ op_errno = EIO;
+ goto error;
+ }
+ }
+ /*
+ * update the format string (append/update/cup a MAC)
+ */
+ op_errno = update_format(local->format,
+ (unsigned char *)old_mtd->data,
+ old_mtd->len,
+ local->mac_idx,
+ mop,
+ local->newloc,
+ info,
+ get_master_cinfo(priv),
+ local);
+ if (op_errno)
+ goto error;
+ /*
+ * store the new format string on the server
+ */
+ if (new_mtd_size) {
+ op_errno = dict_set_static_bin(local->xattr,
+ CRYPTO_FORMAT_PREFIX,
+ local->format,
+ new_mtd_size);
+ if (op_errno)
+ goto error;
+ }
+ STACK_WIND(frame,
+ do_linkop,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ local->loc,
+ local->xattr,
+ 0,
+ NULL);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ unwind_fn(frame);
+ return 0;
+}
+
+static int32_t linkop_grab_local(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc,
+ int flags, dict_t *xdata,
+ glusterfs_fop_t op)
+{
+ int32_t ret = ENOMEM;
+ fd_t *fd;
+ crypt_local_t *local;
+
+ local = crypt_alloc_local(frame, this, op);
+ if (!local)
+ goto error;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ fd = fd_create(oldloc->inode, frame->root->pid);
+ if (!fd) {
+ gf_log(this->name, GF_LOG_ERROR, "Can not create fd");
+ goto error;
+ }
+ local->fd = fd;
+ local->flags = flags;
+ local->loc = GF_CALLOC(1, sizeof(*oldloc), gf_crypt_mt_loc);
+ if (!local->loc)
+ goto error;
+ memset(local->loc, 0, sizeof(*local->loc));
+ ret = loc_copy(local->loc, oldloc);
+ if (ret) {
+ GF_FREE(local->loc);
+ local->loc = NULL;
+ goto error;
+ }
+ if (newloc) {
+ local->newloc = GF_CALLOC(1, sizeof(*newloc), gf_crypt_mt_loc);
+ if (!local->newloc) {
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ goto error;
+ }
+ memset(local->newloc, 0, sizeof(*local->newloc));
+ ret = loc_copy(local->newloc, newloc);
+ if (ret) {
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ GF_FREE(local->newloc);
+ goto error;
+ }
+ }
+ local->xattr = dict_new();
+ if (!local->xattr) {
+ gf_log(this->name, GF_LOG_ERROR, "Can not create dict");
+ ret = ENOMEM;
+ goto error;
+ }
+ return 0;
+
+error:
+ if (local) {
+ if (local->xdata)
+ dict_unref(local->xdata);
+ if (local->fd)
+ fd_unref(local->fd);
+ local->fd = 0;
+ local->loc = NULL;
+ local->newloc = NULL;
+ local->op_ret = -1;
+ local->op_errno = ret;
+ }
+
+ return ret;
+}
+
+/*
+ * read and verify locked metadata against the old pathname (via open);
+ * update the metadata string in accordance with the new pathname;
+ * submit modified metadata;
+ * wind;
+ */
+static int32_t linkop(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc,
+ int flags,
+ dict_t *xdata,
+ glusterfs_fop_t op)
+{
+ int32_t ret;
+ dict_t *dict;
+ crypt_local_t *local;
+ void (*unwind_fn)(call_frame_t *frame);
+ void (*wind_fn)(call_frame_t *frame, xlator_t *this);
+
+ wind_fn = linkop_wind_dispatch(op);
+ unwind_fn = linkop_unwind_dispatch(op);
+
+ ret = linkop_grab_local(frame, this, oldloc, newloc, flags, xdata, op);
+ local = frame->local;
+ if (ret)
+ goto error;
+
+ if (local->fd->inode->ia_type == IA_IFLNK)
+ goto wind;
+
+ dict = dict_new();
+ if (!dict) {
+ gf_log(this->name, GF_LOG_ERROR, "Can not create dict");
+ ret = ENOMEM;
+ goto error;
+ }
+ /*
+ * Set a message to crypt_open() that we need
+ * locked metadata string.
+ * All link operations (link, unlink, rename)
+ * need write lock
+ */
+ msgflags_set_mtd_wlock(&local->msgflags);
+ ret = dict_set_static_bin(dict,
+ MSGFLAGS_PREFIX,
+ &local->msgflags,
+ sizeof(local->msgflags));
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR, "Can not set dict");
+ dict_unref(dict);
+ goto error;
+ }
+ /*
+ * verify metadata against the old pathname
+ * and retrieve locked metadata string
+ */
+ STACK_WIND(frame,
+ linkop_begin,
+ this,
+ this->fops->open, /* crypt_open() */
+ oldloc,
+ O_RDWR,
+ local->fd,
+ dict);
+ dict_unref(dict);
+ return 0;
+
+wind:
+ wind_fn(frame, this);
+ return 0;
+
+error:
+ local->op_ret = -1;
+ local->op_errno = ret;
+ unwind_fn(frame);
+ return 0;
+}
+
+static int32_t crypt_link(call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ return linkop(frame, this, oldloc, newloc, 0, xdata, GF_FOP_LINK);
+}
+
+static int32_t crypt_unlink(call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int flags, dict_t *xdata)
+{
+ return linkop(frame, this, loc, NULL, flags, xdata, GF_FOP_UNLINK);
+}
+
+static int32_t crypt_rename(call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ return linkop(frame, this, oldloc, newloc, 0, xdata, GF_FOP_RENAME);
+}
+
+static void put_one_call_open(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+ if (put_one_call(local)) {
+ fd_t *fd = local->fd;
+ loc_t *loc = local->loc;
+ dict_t *xdata = local->xdata;
+
+ STACK_UNWIND_STRICT(open,
+ frame,
+ local->op_ret,
+ local->op_errno,
+ fd,
+ xdata);
+ fd_unref(fd);
+ if (xdata)
+ dict_unref(xdata);
+ loc_wipe(loc);
+ GF_FREE(loc);
+ }
+}
+
+static int32_t __crypt_readv_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ fd_t *local_fd = local->fd;
+ dict_t *local_xdata = local->xdata;
+ /* read deals with data configs only */
+ struct iovec *avec = local->data_conf.avec;
+ char **pool = local->data_conf.pool;
+ int blocks_in_pool = local->data_conf.blocks_in_pool;
+ struct iobref *iobref = local->iobref;
+ struct iobref *iobref_data = local->iobref_data;
+
+ if (op_ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "readv unlock failed (%d)", op_errno);
+ if (local->op_ret >= 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
+ }
+ dump_plain_text(local, avec);
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "readv: ret_to_user: %d, iovec len: %d, ia_size: %llu",
+ (int)(local->rw_count > 0 ? local->rw_count : local->op_ret),
+ (int)(local->rw_count > 0 ? iovec_get_size(avec, local->data_conf.acount) : 0),
+ (unsigned long long)local->buf.ia_size);
+
+ STACK_UNWIND_STRICT(readv,
+ frame,
+ local->rw_count > 0 ? local->rw_count : local->op_ret,
+ local->op_errno,
+ avec,
+ avec ? local->data_conf.acount : 0,
+ &local->buf,
+ local->iobref,
+ local_xdata);
+
+ free_avec(avec, pool, blocks_in_pool);
+ fd_unref(local_fd);
+ if (local_xdata)
+ dict_unref(local_xdata);
+ if (iobref)
+ iobref_unref(iobref);
+ if (iobref_data)
+ iobref_unref(iobref_data);
+ return 0;
+}
+
+static void crypt_readv_done(call_frame_t *frame, xlator_t *this)
+{
+ if (parent_is_crypt_xlator(frame, this))
+ /*
+ * don't unlock (it will be done by the parent)
+ */
+ __crypt_readv_done(frame, NULL, this, 0, 0, NULL);
+ else {
+ crypt_local_t *local = frame->local;
+ struct gf_flock lock = {0, };
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND(frame,
+ __crypt_readv_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ }
+}
+
+static void put_one_call_readv(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+ if (put_one_call(local))
+ crypt_readv_done(frame, this);
+}
+
+static int32_t __crypt_writev_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ fd_t *local_fd = local->fd;
+ dict_t *local_xdata = local->xdata;
+ int32_t ret_to_user;
+
+ if (local->xattr)
+ dict_unref(local->xattr);
+ /*
+ * Calculate amout of butes to be returned
+ * to user. We need to subtract paddings that
+ * have been written as a part of atom.
+ */
+ /*
+ * subtract head padding
+ */
+ if (local->rw_count == 0)
+ /*
+ * Nothing has been written, it must be an error
+ */
+ ret_to_user = local->op_ret;
+ else if (local->rw_count <= local->data_conf.off_in_head) {
+ gf_log("crypt", GF_LOG_WARNING, "Incomplete write");
+ ret_to_user = 0;
+ }
+ else
+ ret_to_user = local->rw_count -
+ local->data_conf.off_in_head;
+ /*
+ * subtract tail padding
+ */
+ if (ret_to_user > local->data_conf.orig_size)
+ ret_to_user = local->data_conf.orig_size;
+
+ if (local->iobref)
+ iobref_unref(local->iobref);
+ if (local->iobref_data)
+ iobref_unref(local->iobref_data);
+ free_avec_data(local);
+ free_avec_hole(local);
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "writev: ret_to_user: %d", ret_to_user);
+
+ STACK_UNWIND_STRICT(writev,
+ frame,
+ ret_to_user,
+ local->op_errno,
+ &local->prebuf,
+ &local->postbuf,
+ local_xdata);
+ fd_unref(local_fd);
+ if (local_xdata)
+ dict_unref(local_xdata);
+ return 0;
+}
+
+static int32_t crypt_writev_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ gf_log("crypt", GF_LOG_WARNING, "can not update file size");
+
+ if (parent_is_crypt_xlator(frame, this))
+ /*
+ * don't unlock (it will be done by the parent)
+ */
+ __crypt_writev_done(frame, NULL, this, 0, 0, NULL);
+ else {
+ struct gf_flock lock = {0, };
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND(frame,
+ __crypt_writev_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ }
+ return 0;
+}
+
+static void put_one_call_writev(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+ if (put_one_call(local)) {
+ if (local->update_disk_file_size) {
+ int32_t ret;
+ /*
+ * update file size, unlock the file and unwind
+ */
+ ret = dict_set(local->xattr,
+ FSIZE_XATTR_PREFIX,
+ data_from_uint64(local->cur_file_size));
+ if (ret) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "can not set key to update file size");
+ crypt_writev_done(frame, NULL,
+ this, 0, 0, NULL);
+ return;
+ }
+ gf_log("crypt", GF_LOG_DEBUG,
+ "Updating disk file size to %llu",
+ (unsigned long long)local->cur_file_size);
+ STACK_WIND(frame,
+ crypt_writev_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ local->fd,
+ local->xattr, /* CRYPTO_FORMAT_PREFIX */
+ 0,
+ NULL);
+ }
+ else
+ crypt_writev_done(frame, NULL, this, 0, 0, NULL);
+ }
+}
+
+static int32_t __crypt_ftruncate_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ fd_t *local_fd = local->fd;
+ dict_t *local_xdata = local->xdata;
+ char *iobase = local->vec.iov_base;
+
+ if (op_ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "ftruncate unlock failed (%d)", op_errno);
+ if (local->op_ret >= 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
+ }
+ if (local->iobref_data)
+ iobref_unref(local->iobref_data);
+ free_avec_data(local);
+ free_avec_hole(local);
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "ftruncate, return to user: presize=%llu, postsize=%llu",
+ (unsigned long long)local->prebuf.ia_size,
+ (unsigned long long)local->postbuf.ia_size);
+
+ STACK_UNWIND_STRICT(ftruncate,
+ frame,
+ local->op_ret < 0 ? -1 : 0,
+ local->op_errno,
+ &local->prebuf,
+ &local->postbuf,
+ local_xdata);
+ fd_unref(local_fd);
+ if (local_xdata)
+ dict_unref(local_xdata);
+ if (iobase)
+ GF_FREE(iobase);
+ return 0;
+}
+
+static int32_t crypt_ftruncate_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ struct gf_flock lock = {0, };
+
+ dict_unref(local->xattr);
+ if (op_ret < 0)
+ gf_log("crypt", GF_LOG_WARNING, "can not update file size");
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND(frame,
+ __crypt_ftruncate_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+}
+
+static void put_one_call_ftruncate(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+ if (put_one_call(local)) {
+ if (local->update_disk_file_size) {
+ int32_t ret;
+ /*
+ * update file size, unlock the file and unwind
+ */
+ ret = dict_set(local->xattr,
+ FSIZE_XATTR_PREFIX,
+ data_from_uint64(local->cur_file_size));
+ if (ret) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "can not set key to update file size");
+ crypt_ftruncate_done(frame, NULL,
+ this, 0, 0, NULL);
+ return;
+ }
+ gf_log("crypt", GF_LOG_DEBUG,
+ "Updating disk file size to %llu",
+ (unsigned long long)local->cur_file_size);
+ STACK_WIND(frame,
+ crypt_ftruncate_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ local->fd,
+ local->xattr, /* CRYPTO_FORMAT_PREFIX */
+ 0,
+ NULL);
+ }
+ else
+ crypt_ftruncate_done(frame, NULL, this, 0, 0, NULL);
+ }
+}
+
+/*
+ * load regular file size for some FOPs
+ */
+static int32_t load_file_size(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict,
+ dict_t *xdata)
+{
+ data_t *data;
+ crypt_local_t *local = frame->local;
+
+ dict_t *local_xdata = local->xdata;
+ inode_t *local_inode = local->inode;
+
+ if (op_ret < 0)
+ goto unwind;
+ /*
+ * load regular file size
+ */
+ data = dict_get(dict, FSIZE_XATTR_PREFIX);
+ if (!data) {
+ if (local->xdata)
+ dict_unref(local->xdata);
+ gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
+ op_ret = -1;
+ op_errno = EIO;
+ goto unwind;
+ }
+ local->buf.ia_size = data_to_uint64(data);
+
+ gf_log(this->name, GF_LOG_DEBUG,
+ "FOP %d: Translate regular file to %llu",
+ local->fop,
+ (unsigned long long)local->buf.ia_size);
+ unwind:
+ if (local->fd)
+ fd_unref(local->fd);
+ if (local->loc) {
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ }
+ switch (local->fop) {
+ case GF_FOP_FSTAT:
+ STACK_UNWIND_STRICT(fstat,
+ frame,
+ op_ret,
+ op_errno,
+ op_ret >= 0 ? &local->buf : NULL,
+ local->xdata);
+ break;
+ case GF_FOP_STAT:
+ STACK_UNWIND_STRICT(stat,
+ frame,
+ op_ret,
+ op_errno,
+ op_ret >= 0 ? &local->buf : NULL,
+ local->xdata);
+ break;
+ case GF_FOP_LOOKUP:
+ STACK_UNWIND_STRICT(lookup,
+ frame,
+ op_ret,
+ op_errno,
+ op_ret >= 0 ? local->inode : NULL,
+ op_ret >= 0 ? &local->buf : NULL,
+ local->xdata,
+ op_ret >= 0 ? &local->postbuf : NULL);
+ break;
+ case GF_FOP_READ:
+ STACK_UNWIND_STRICT(readv,
+ frame,
+ op_ret,
+ op_errno,
+ NULL,
+ 0,
+ op_ret >= 0 ? &local->buf : NULL,
+ NULL,
+ NULL);
+ break;
+ default:
+ gf_log(this->name, GF_LOG_WARNING,
+ "Improper file operation %d", local->fop);
+ }
+ if (local_xdata)
+ dict_unref(local_xdata);
+ if (local_inode)
+ inode_unref(local_inode);
+ return 0;
+}
+
+static int32_t crypt_stat_common_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *buf, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto unwind;
+ if (!IA_ISREG(buf->ia_type))
+ goto unwind;
+
+ local->buf = *buf;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ switch (local->fop) {
+ case GF_FOP_FSTAT:
+ STACK_WIND(frame,
+ load_file_size,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ local->fd,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ break;
+ case GF_FOP_STAT:
+ STACK_WIND(frame,
+ load_file_size,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr,
+ local->loc,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_WARNING,
+ "Improper file operation %d", local->fop);
+ }
+ return 0;
+ unwind:
+ if (local->fd)
+ fd_unref(local->fd);
+ if (local->loc) {
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ }
+ switch (local->fop) {
+ case GF_FOP_FSTAT:
+ STACK_UNWIND_STRICT(fstat,
+ frame,
+ op_ret,
+ op_errno,
+ op_ret >= 0 ? buf : NULL,
+ op_ret >= 0 ? xdata : NULL);
+ break;
+ case GF_FOP_STAT:
+ STACK_UNWIND_STRICT(stat,
+ frame,
+ op_ret,
+ op_errno,
+ op_ret >= 0 ? buf : NULL,
+ op_ret >= 0 ? xdata : NULL);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_WARNING,
+ "Improper file operation %d", local->fop);
+ }
+ return 0;
+}
+
+static int32_t crypt_fstat(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd, dict_t *xdata)
+{
+ crypt_local_t *local;
+
+ local = crypt_alloc_local(frame, this, GF_FOP_FSTAT);
+ if (!local)
+ goto error;
+ local->fd = fd_ref(fd);
+ STACK_WIND(frame,
+ crypt_stat_common_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat,
+ fd,
+ xdata);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(fstat,
+ frame,
+ -1,
+ ENOMEM,
+ NULL,
+ NULL);
+ return 0;
+}
+
+static int32_t crypt_stat(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, dict_t *xdata)
+{
+ int32_t ret;
+ crypt_local_t *local;
+
+ local = crypt_alloc_local(frame, this, GF_FOP_STAT);
+ if (!local)
+ goto error;
+ local->loc = GF_CALLOC(1, sizeof(*loc), gf_crypt_mt_loc);
+ if (!local->loc)
+ goto error;
+ memset(local->loc, 0, sizeof(*local->loc));
+ ret = loc_copy(local->loc, loc);
+ if (ret) {
+ GF_FREE(local->loc);
+ goto error;
+ }
+ STACK_WIND(frame,
+ crypt_stat_common_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat,
+ loc,
+ xdata);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(stat,
+ frame,
+ -1,
+ ENOMEM,
+ NULL,
+ NULL);
+ return 0;
+}
+
+static int32_t crypt_lookup_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto unwind;
+ if (!IA_ISREG(buf->ia_type))
+ goto unwind;
+
+ local->inode = inode_ref(inode);
+ local->buf = *buf;
+ local->postbuf = *postparent;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+ gf_uuid_copy(local->loc->gfid, buf->ia_gfid);
+
+ STACK_WIND(frame,
+ load_file_size,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr,
+ local->loc,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ return 0;
+ unwind:
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ STACK_UNWIND_STRICT(lookup,
+ frame,
+ op_ret,
+ op_errno,
+ inode,
+ buf,
+ xdata,
+ postparent);
+ return 0;
+}
+
+static int32_t crypt_lookup(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, dict_t *xdata)
+{
+ int32_t ret;
+ crypt_local_t *local;
+
+ local = crypt_alloc_local(frame, this, GF_FOP_LOOKUP);
+ if (!local)
+ goto error;
+ local->loc = GF_CALLOC(1, sizeof(*loc), gf_crypt_mt_loc);
+ if (!local->loc)
+ goto error;
+ memset(local->loc, 0, sizeof(*local->loc));
+ ret = loc_copy(local->loc, loc);
+ if (ret) {
+ GF_FREE(local->loc);
+ goto error;
+ }
+ gf_log(this->name, GF_LOG_DEBUG, "Lookup %s", loc->path);
+ STACK_WIND(frame,
+ crypt_lookup_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup,
+ loc,
+ xdata);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(lookup,
+ frame,
+ -1,
+ ENOMEM,
+ NULL,
+ NULL,
+ NULL,
+ NULL);
+ return 0;
+}
+
+/*
+ * for every regular directory entry find its real file size
+ * and update stat's buf properly
+ */
+static int32_t crypt_readdirp_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ gf_dirent_t *entries, dict_t *xdata)
+{
+ gf_dirent_t *entry = NULL;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ list_for_each_entry (entry, (&entries->list), list) {
+ data_t *data;
+
+ if (!IA_ISREG(entry->d_stat.ia_type))
+ continue;
+ data = dict_get(entry->dict, FSIZE_XATTR_PREFIX);
+ if (!data){
+ gf_log("crypt", GF_LOG_WARNING,
+ "Regular file size of direntry not found");
+ op_errno = EIO;
+ op_ret = -1;
+ break;
+ }
+ entry->d_stat.ia_size = data_to_uint64(data);
+ }
+ unwind:
+ STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, xdata);
+ return 0;
+}
+
+/*
+ * ->readdirp() fills in-core inodes, so we need to set proper
+ * file sizes for all directory entries of the parent @fd.
+ * Actual updates take place in ->crypt_readdirp_cbk()
+ */
+static int32_t crypt_readdirp(call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset,
+ dict_t *xdata)
+{
+ int32_t ret = ENOMEM;
+
+ if (!xdata) {
+ xdata = dict_new();
+ if (!xdata)
+ goto error;
+ }
+ else
+ dict_ref(xdata);
+ /*
+ * make sure that we'll have real file sizes at ->readdirp_cbk()
+ */
+ ret = dict_set(xdata, FSIZE_XATTR_PREFIX, data_from_uint64(0));
+ if (ret) {
+ dict_unref(xdata);
+ goto error;
+ }
+ STACK_WIND(frame,
+ crypt_readdirp_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp,
+ fd,
+ size,
+ offset,
+ xdata);
+ dict_unref(xdata);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(readdirp, frame, -1, ret, NULL, NULL);
+ return 0;
+}
+
+static int32_t crypt_access(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t mask, dict_t *xdata)
+{
+ gf_log(this->name, GF_LOG_WARNING,
+ "NFS mounts of encrypted volumes are unsupported");
+ STACK_UNWIND_STRICT(access, frame, -1, EPERM, NULL);
+ return 0;
+}
+
+int32_t master_set_block_size (xlator_t *this, crypt_private_t *priv,
+ dict_t *options)
+{
+ uint64_t block_size = 0;
+ struct master_cipher_info *master = get_master_cinfo(priv);
+
+ if (options != NULL)
+ GF_OPTION_RECONF("block-size", block_size, options,
+ size_uint64, error);
+ else
+ GF_OPTION_INIT("block-size", block_size, size_uint64, error);
+
+ switch (block_size) {
+ case 512:
+ master->m_block_bits = 9;
+ break;
+ case 1024:
+ master->m_block_bits = 10;
+ break;
+ case 2048:
+ master->m_block_bits = 11;
+ break;
+ case 4096:
+ master->m_block_bits = 12;
+ break;
+ default:
+ gf_log("crypt", GF_LOG_ERROR,
+ "FATAL: unsupported block size %llu",
+ (unsigned long long)block_size);
+ goto error;
+ }
+ return 0;
+ error:
+ return -1;
+}
+
+int32_t master_set_alg(xlator_t *this, crypt_private_t *priv)
+{
+ struct master_cipher_info *master = get_master_cinfo(priv);
+ master->m_alg = AES_CIPHER_ALG;
+ return 0;
+}
+
+int32_t master_set_mode(xlator_t *this, crypt_private_t *priv)
+{
+ struct master_cipher_info *master = get_master_cinfo(priv);
+ master->m_mode = XTS_CIPHER_MODE;
+ return 0;
+}
+
+/*
+ * set key size in bits to the master info
+ * Pre-conditions: cipher mode in the master info is uptodate.
+ */
+static int master_set_data_key_size (xlator_t *this, crypt_private_t *priv,
+ dict_t *options)
+{
+ int32_t ret;
+ uint64_t key_size = 0;
+ struct master_cipher_info *master = get_master_cinfo(priv);
+
+ if (options != NULL)
+ GF_OPTION_RECONF("data-key-size", key_size, options,
+ uint64, error);
+ else
+ GF_OPTION_INIT("data-key-size", key_size, uint64, error);
+
+ ret = data_cipher_algs[master->m_alg][master->m_mode].check_key(key_size);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "FATAL: wrong bin key size %llu for alg %d mode %d",
+ (unsigned long long)key_size,
+ (int)master->m_alg,
+ (int)master->m_mode);
+ goto error;
+ }
+ master->m_dkey_size = key_size;
+ return 0;
+ error:
+ return -1;
+}
+
+static int is_hex(char *s) {
+ return ('0' <= *s && *s <= '9') || ('a' <= *s && *s <= 'f');
+}
+
+static int parse_hex_buf(xlator_t *this, char *src, unsigned char *dst,
+ int hex_size)
+{
+ int i;
+ int hex_byte = 0;
+
+ for (i = 0; i < (hex_size / 2); i++) {
+ if (!is_hex(src + i*2) || !is_hex(src + i*2 + 1)) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "FATAL: not hex symbol in key");
+ return -1;
+ }
+ if (sscanf(src + i*2, "%2x", &hex_byte) != 1) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "FATAL: can not parse hex key");
+ return -1;
+ }
+ dst[i] = hex_byte & 0xff;
+ }
+ return 0;
+}
+
+/*
+ * Parse options;
+ * install master volume key
+ */
+int32_t master_set_master_vol_key(xlator_t *this, crypt_private_t *priv)
+{
+ int32_t ret;
+ FILE *file = NULL;
+
+ int32_t key_size;
+ char *opt_key_file_pathname = NULL;
+
+ unsigned char bin_buf[MASTER_VOL_KEY_SIZE];
+ char hex_buf[2 * MASTER_VOL_KEY_SIZE];
+
+ struct master_cipher_info *master = get_master_cinfo(priv);
+ /*
+ * extract master key passed via option
+ */
+ GF_OPTION_INIT("master-key", opt_key_file_pathname, path, bad_key);
+
+ if (!opt_key_file_pathname) {
+ gf_log(this->name, GF_LOG_ERROR, "FATAL: missing master key");
+ return -1;
+ }
+ gf_log(this->name, GF_LOG_DEBUG, "handling file key %s",
+ opt_key_file_pathname);
+
+ file = fopen(opt_key_file_pathname, "r");
+ if (file == NULL) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "FATAL: can not open file with master key");
+ return -1;
+ }
+ /*
+ * extract hex key
+ */
+ key_size = fread(hex_buf, 1, sizeof(hex_buf), file);
+ if (key_size < sizeof(hex_buf)) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "FATAL: master key is too short");
+ goto bad_key;
+ }
+ ret = parse_hex_buf(this, hex_buf, bin_buf, key_size);
+ if (ret)
+ goto bad_key;
+ memcpy(master->m_key, bin_buf, MASTER_VOL_KEY_SIZE);
+ memset(hex_buf, 0, sizeof(hex_buf));
+ fclose(file);
+
+ memset(bin_buf, 0, sizeof(bin_buf));
+ return 0;
+ bad_key:
+ gf_log(this->name, GF_LOG_ERROR, "FATAL: bad master key");
+ if (file)
+ fclose(file);
+ memset(bin_buf, 0, sizeof(bin_buf));
+ return -1;
+}
+
+/*
+ * Derive volume key for object-id authentication
+ */
+int32_t master_set_nmtd_vol_key(xlator_t *this, crypt_private_t *priv)
+{
+ return get_nmtd_vol_key(get_master_cinfo(priv));
+}
+
+int32_t crypt_init_xlator(xlator_t *this)
+{
+ int32_t ret;
+ crypt_private_t *priv = this->private;
+
+ ret = master_set_alg(this, priv);
+ if (ret)
+ return ret;
+ ret = master_set_mode(this, priv);
+ if (ret)
+ return ret;
+ ret = master_set_block_size(this, priv, NULL);
+ if (ret)
+ return ret;
+ ret = master_set_data_key_size(this, priv, NULL);
+ if (ret)
+ return ret;
+ ret = master_set_master_vol_key(this, priv);
+ if (ret)
+ return ret;
+ return master_set_nmtd_vol_key(this, priv);
+}
+
+static int32_t crypt_alloc_private(xlator_t *this)
+{
+ this->private = GF_CALLOC(1, sizeof(crypt_private_t), gf_crypt_mt_priv);
+ if (!this->private) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "Can not allocate memory for private data");
+ return ENOMEM;
+ }
+ return 0;
+}
+
+static void crypt_free_private(xlator_t *this)
+{
+ crypt_private_t *priv = this->private;
+ if (priv) {
+ memset(priv, 0, sizeof(*priv));
+ GF_FREE(priv);
+ }
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_crypt_mt_end);
+
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+int32_t reconfigure (xlator_t *this, dict_t *options)
+{
+ int32_t ret = -1;
+ crypt_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("crypt", this, error);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, error);
+ GF_VALIDATE_OR_GOTO (this->name, options, error);
+
+ priv = this->private;
+
+ ret = master_set_block_size(this, priv, options);
+ if (ret) {
+ gf_log("this->name", GF_LOG_ERROR,
+ "Failed to reconfure block size");
+ goto error;
+ }
+ ret = master_set_data_key_size(this, priv, options);
+ if (ret) {
+ gf_log("this->name", GF_LOG_ERROR,
+ "Failed to reconfure data key size");
+ goto error;
+ }
+ return 0;
+ error:
+ return ret;
+}
+
+int32_t init(xlator_t *this)
+{
+ int32_t ret;
+
+ if (!this->children || this->children->next) {
+ gf_log ("crypt", GF_LOG_ERROR,
+ "FATAL: crypt should have exactly one child");
+ return EINVAL;
+ }
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+ ret = crypt_alloc_private(this);
+ if (ret)
+ return ret;
+ ret = crypt_init_xlator(this);
+ if (ret)
+ goto error;
+ this->local_pool = mem_pool_new(crypt_local_t, 64);
+ if (!this->local_pool) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ ret = ENOMEM;
+ goto error;
+ }
+ gf_log ("crypt", GF_LOG_INFO, "crypt xlator loaded");
+ return 0;
+ error:
+ crypt_free_private(this);
+ return ret;
+}
+
+void fini (xlator_t *this)
+{
+ crypt_free_private(this);
+}
+
+struct xlator_fops fops = {
+ .readv = crypt_readv,
+ .writev = crypt_writev,
+ .truncate = crypt_truncate,
+ .ftruncate = crypt_ftruncate,
+ .setxattr = crypt_setxattr,
+ .fsetxattr = crypt_fsetxattr,
+ .link = crypt_link,
+ .unlink = crypt_unlink,
+ .rename = crypt_rename,
+ .open = crypt_open,
+ .create = crypt_create,
+ .stat = crypt_stat,
+ .fstat = crypt_fstat,
+ .lookup = crypt_lookup,
+ .readdirp = crypt_readdirp,
+ .access = crypt_access
+};
+
+struct xlator_cbks cbks = {
+ .forget = crypt_forget
+};
+
+struct volume_options options[] = {
+ { .key = {"master-key"},
+ .type = GF_OPTION_TYPE_PATH,
+ .description = "Pathname of regular file which contains master volume key"
+ },
+ { .key = {"data-key-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .description = "Data key size (bits)",
+ .min = 256,
+ .max = 512,
+ .default_value = "256",
+ },
+ { .key = {"block-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .description = "Atom size (bits)",
+ .min = 512,
+ .max = 4096,
+ .default_value = "4096"
+ },
+ { .key = {NULL} },
+};
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
diff --git a/xlators/encryption/crypt/src/crypt.h b/xlators/encryption/crypt/src/crypt.h
new file mode 100644
index 00000000000..c1bfe3fcd0c
--- /dev/null
+++ b/xlators/encryption/crypt/src/crypt.h
@@ -0,0 +1,900 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __CRYPT_H__
+#define __CRYPT_H__
+
+#include <openssl/aes.h>
+#include <openssl/evp.h>
+#include <openssl/sha.h>
+#include <openssl/hmac.h>
+#include <openssl/cmac.h>
+#include <openssl/modes.h>
+#include "crypt-mem-types.h"
+#include "compat.h"
+
+#define CRYPT_XLATOR_ID (0)
+
+#define MAX_IOVEC_BITS (3)
+#define MAX_IOVEC (1 << MAX_IOVEC_BITS)
+#define KEY_FACTOR_BITS (6)
+
+#define DEBUG_CRYPT (0)
+#define TRIVIAL_TFM (0)
+
+#define CRYPT_MIN_BLOCK_BITS (9)
+#define CRYPT_MAX_BLOCK_BITS (12)
+
+#define MASTER_VOL_KEY_SIZE (32)
+#define NMTD_VOL_KEY_SIZE (16)
+
+#if !defined(GF_LINUX_HOST_OS)
+typedef off_t loff_t;
+#endif
+
+struct crypt_key {
+ uint32_t len;
+ const char *label;
+};
+
+/*
+ * Add new key types to the end of this
+ * enumeration but before LAST_KEY_TYPE
+ */
+typedef enum {
+ MASTER_VOL_KEY,
+ NMTD_VOL_KEY,
+ NMTD_LINK_KEY,
+ EMTD_FILE_KEY,
+ DATA_FILE_KEY_256,
+ DATA_FILE_KEY_512,
+ LAST_KEY_TYPE
+}crypt_key_type;
+
+struct kderive_context {
+ const unsigned char *pkey;/* parent key */
+ uint32_t pkey_len; /* parent key size, bits */
+ uint32_t ckey_len; /* child key size, bits */
+ unsigned char *fid; /* fixed input data, NIST 800-108, 5.1 */
+ uint32_t fid_len; /* fid len, bytes */
+ unsigned char *out; /* contains child keying material */
+ uint32_t out_len; /* out len, bytes */
+};
+
+typedef enum {
+ DATA_ATOM,
+ HOLE_ATOM,
+ LAST_DATA_TYPE
+}atom_data_type;
+
+typedef enum {
+ HEAD_ATOM,
+ TAIL_ATOM,
+ FULL_ATOM,
+ LAST_LOCALITY_TYPE
+}atom_locality_type;
+
+typedef enum {
+ MTD_CREATE,
+ MTD_APPEND,
+ MTD_OVERWRITE,
+ MTD_CUT,
+ MTD_LAST_OP
+} mtd_op_t;
+
+struct xts128_context {
+ void *key1, *key2;
+ block128_f block1,block2;
+};
+
+struct object_cipher_info {
+ cipher_alg_t o_alg;
+ cipher_mode_t o_mode;
+ uint32_t o_block_bits;
+ uint32_t o_dkey_size; /* raw data key size in bits */
+ union {
+ struct {
+ unsigned char ivec[16];
+ AES_KEY dkey[2];
+ AES_KEY tkey; /* key used for tweaking */
+ XTS128_CONTEXT xts;
+ } aes_xts;
+ } u;
+};
+
+struct master_cipher_info {
+ /*
+ * attributes inherited by newly created regular files
+ */
+ cipher_alg_t m_alg;
+ cipher_mode_t m_mode;
+ uint32_t m_block_bits;
+ uint32_t m_dkey_size; /* raw key size in bits */
+ /*
+ * master key
+ */
+ unsigned char m_key[MASTER_VOL_KEY_SIZE];
+ /*
+ * volume key for oid authentication
+ */
+ unsigned char m_nmtd_key[NMTD_VOL_KEY_SIZE];
+};
+
+/*
+* This info is not changed during file's life
+ */
+struct crypt_inode_info {
+#if DEBUG_CRYPT
+ loc_t *loc; /* pathname that the file has been
+ opened, or created with */
+#endif
+ uint16_t nr_minor;
+ uuid_t oid;
+ struct object_cipher_info cinfo;
+};
+
+/*
+ * this should locate in secure memory
+ */
+typedef struct {
+ struct master_cipher_info master;
+} crypt_private_t;
+
+static inline struct master_cipher_info *get_master_cinfo(crypt_private_t *priv)
+{
+ return &priv->master;
+}
+
+static inline struct object_cipher_info *get_object_cinfo(struct crypt_inode_info
+ *info)
+{
+ return &info->cinfo;
+}
+
+/*
+ * this describes layouts and properties
+ * of atoms in an aligned vector
+ */
+struct avec_config {
+ uint32_t atom_size;
+ atom_data_type type;
+ size_t orig_size;
+ off_t orig_offset;
+ size_t expanded_size;
+ off_t aligned_offset;
+
+ uint32_t off_in_head;
+ uint32_t off_in_tail;
+ uint32_t gap_in_tail;
+ uint32_t nr_full_blocks;
+
+ struct iovec *avec; /* aligned vector */
+ uint32_t acount; /* number of avec components. The same
+ * as number of occupied logical blocks */
+ char **pool;
+ uint32_t blocks_in_pool;
+ uint32_t cursor; /* makes sense only for ordered writes,
+ * so there is no races on this counter.
+ *
+ * Cursor is per-config object, we don't
+ * reset cursor for atoms of different
+ * localities (head, tail, full)
+ */
+};
+
+
+typedef struct {
+ glusterfs_fop_t fop; /* code of FOP this local info built for */
+ fd_t *fd;
+ inode_t *inode;
+ loc_t *loc;
+ int32_t mac_idx;
+ loc_t *newloc;
+ int32_t flags;
+ int32_t wbflags;
+ struct crypt_inode_info *info;
+ struct iobref *iobref;
+ struct iobref *iobref_data;
+ off_t offset;
+
+ uint64_t old_file_size; /* per FOP, retrieved under lock held */
+ uint64_t cur_file_size; /* per iteration, before issuing IOs */
+ uint64_t new_file_size; /* per iteration, after issuing IOs */
+
+ uint64_t io_offset; /* offset of IOs issued per iteration */
+ uint64_t io_offset_nopad; /* offset of user's data in the atom */
+ uint32_t io_size; /* size of IOs issued per iteration */
+ uint32_t io_size_nopad; /* size of user's data in the IOs */
+ uint32_t eof_padding_size; /* size od EOF padding in the IOs */
+
+ gf_lock_t call_lock; /* protect nr_calls from many cbks */
+ int32_t nr_calls;
+
+ atom_data_type active_setup; /* which setup (hole or date)
+ is currently active */
+ /* data setup */
+ struct avec_config data_conf;
+
+ /* hole setup */
+ int hole_conv_in_proggress;
+ gf_lock_t hole_lock; /* protect hole config from many cbks */
+ int hole_handled;
+ struct avec_config hole_conf;
+ struct iatt buf;
+ struct iatt prebuf;
+ struct iatt postbuf;
+ struct iatt *prenewparent;
+ struct iatt *postnewparent;
+ int32_t op_ret;
+ int32_t op_errno;
+ int32_t rw_count; /* total read or written */
+ gf_lock_t rw_count_lock; /* protect the counter above */
+ unsigned char *format; /* for create, update format string */
+ uint32_t format_size;
+ uint32_t msgflags; /* messages for crypt_open() */
+ dict_t *xdata;
+ dict_t *xattr;
+ struct iovec vec; /* contains last file's atom for
+ read-prune-write sequence */
+ gf_boolean_t custom_mtd;
+ /*
+ * the next 3 fields are used by readdir and friends
+ */
+ gf_dirent_t *de; /* directory entry */
+ char *de_path; /* pathname of directory entry */
+ uint32_t de_prefix_len; /* length of the parent's pathname */
+ gf_dirent_t *entries;
+
+ uint32_t update_disk_file_size:1;
+} crypt_local_t;
+
+/* This represents a (read)modify-write atom */
+struct rmw_atom {
+ atom_locality_type locality;
+ /*
+ * read-modify-write sequence of the atom
+ */
+ int32_t (*rmw)(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata);
+ /*
+ * offset of the logical block in a file
+ */
+ loff_t (*offset_at)(call_frame_t *frame,
+ struct object_cipher_info *object);
+ /*
+ * IO offset in an atom
+ */
+ uint32_t (*offset_in)(call_frame_t *frame,
+ struct object_cipher_info *object);
+ /*
+ * number of bytes of plain text of this atom that user
+ * wants to read/write.
+ * It can be smaller than atom_size in the case of head
+ * or tail atoms.
+ */
+ uint32_t (*io_size_nopad)(call_frame_t *frame,
+ struct object_cipher_info *object);
+ /*
+ * which iovec represents the atom
+ */
+ struct iovec *(*get_iovec)(call_frame_t *frame, uint32_t count);
+ /*
+ * how many bytes of partial block should be uptodated by
+ * reading from disk.
+ * This is used to perform a read component of RMW (read-modify-write).
+ */
+ uint32_t (*count_to_uptodate)(call_frame_t *frame, struct object_cipher_info *object);
+ struct avec_config *(*get_config)(call_frame_t *frame);
+};
+
+struct data_cipher_alg {
+ gf_boolean_t atomic; /* true means that algorithm requires
+ to pad data before cipher transform */
+ gf_boolean_t should_pad; /* true means that algorithm requires
+ to pad the end of file with extra-data */
+ uint32_t blkbits; /* blksize = 1 << blkbits */
+ /*
+ * any preliminary sanity checks goes here
+ */
+ int32_t (*init)(void);
+ /*
+ * set alg-mode specific inode info
+ */
+ int32_t (*set_private)(struct crypt_inode_info *info,
+ struct master_cipher_info *master);
+ /*
+ * check alg-mode specific data key
+ */
+ int32_t (*check_key)(uint32_t key_size);
+ void (*set_iv)(off_t offset, struct object_cipher_info *object);
+ int32_t (*encrypt)(const unsigned char *from, unsigned char *to,
+ size_t length, off_t offset, const int enc,
+ struct object_cipher_info *object);
+};
+
+/*
+ * version-dependent metadata loader
+ */
+struct crypt_mtd_loader {
+ /*
+ * return core format size
+ */
+ size_t (*format_size)(mtd_op_t op, size_t old_size);
+ /*
+ * pack version-specific metadata of an object
+ * at ->create()
+ */
+ int32_t (*create_format)(unsigned char *wire,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master);
+ /*
+ * extract version-specific metadata of an object
+ * at ->open() time
+ */
+ int32_t (*open_format)(unsigned char *wire,
+ int32_t len,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local,
+ gf_boolean_t load_info);
+ int32_t (*update_format)(unsigned char *new,
+ unsigned char *old,
+ size_t old_len,
+ int32_t mac_idx,
+ mtd_op_t op,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local);
+};
+
+typedef int32_t (*end_writeback_handler_t)(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata);
+typedef void (*linkop_wind_handler_t)(call_frame_t *frame, xlator_t *this);
+typedef void (*linkop_unwind_handler_t)(call_frame_t *frame);
+
+
+/* Declarations */
+
+/* keys.c */
+extern struct crypt_key crypt_keys[LAST_KEY_TYPE];
+int32_t get_nmtd_vol_key(struct master_cipher_info *master);
+int32_t get_nmtd_link_key(loc_t *loc,
+ struct master_cipher_info *master,
+ unsigned char *result);
+int32_t get_emtd_file_key(struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ unsigned char *result);
+int32_t get_data_file_key(struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ uint32_t keysize,
+ unsigned char *key);
+/* data.c */
+extern struct data_cipher_alg data_cipher_algs[LAST_CIPHER_ALG][LAST_CIPHER_MODE];
+void encrypt_aligned_iov(struct object_cipher_info *object,
+ struct iovec *vec,
+ int count,
+ off_t off);
+void decrypt_aligned_iov(struct object_cipher_info *object,
+ struct iovec *vec,
+ int count,
+ off_t off);
+int32_t align_iov_by_atoms(xlator_t *this,
+ crypt_local_t *local,
+ struct object_cipher_info *object,
+ struct iovec *vec /* input vector */,
+ int32_t count /* number of vec components */,
+ struct iovec *avec /* aligned vector */,
+ char **blocks /* pool of blocks */,
+ uint32_t *blocks_allocated,
+ struct avec_config *conf);
+int32_t set_config_avec_data(xlator_t *this,
+ crypt_local_t *local,
+ struct avec_config *conf,
+ struct object_cipher_info *object,
+ struct iovec *vec,
+ int32_t vec_count);
+int32_t set_config_avec_hole(xlator_t *this,
+ crypt_local_t *local,
+ struct avec_config *conf,
+ struct object_cipher_info *object,
+ glusterfs_fop_t fop);
+void set_gap_at_end(call_frame_t *frame, struct object_cipher_info *object,
+ struct avec_config *conf, atom_data_type dtype);
+void set_config_offsets(call_frame_t *frame,
+ xlator_t *this,
+ uint64_t offset,
+ uint64_t count,
+ atom_data_type dtype,
+ int32_t setup_gap_in_tail);
+
+/* metadata.c */
+extern struct crypt_mtd_loader mtd_loaders [LAST_MTD_LOADER];
+
+int32_t alloc_format(crypt_local_t *local, size_t size);
+int32_t alloc_format_create(crypt_local_t *local);
+void free_format(crypt_local_t *local);
+size_t format_size(mtd_op_t op, size_t old_size);
+size_t new_format_size(void);
+int32_t open_format(unsigned char *str, int32_t len, loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master, crypt_local_t *local,
+ gf_boolean_t load_info);
+int32_t update_format(unsigned char *new, unsigned char *old,
+ size_t old_len, int32_t mac_idx, mtd_op_t op, loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local);
+int32_t create_format(unsigned char *wire,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master);
+
+/* atom.c */
+struct rmw_atom *atom_by_types(atom_data_type data,
+ atom_locality_type locality);
+void submit_partial(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ atom_locality_type ltype);
+void submit_full(call_frame_t *frame, xlator_t *this);
+
+/* crypt.c */
+
+end_writeback_handler_t dispatch_end_writeback(glusterfs_fop_t fop);
+static size_t iovec_get_size(struct iovec *vec, uint32_t count);
+void set_local_io_params_writev(call_frame_t *frame,
+ struct object_cipher_info *object,
+ struct rmw_atom *atom, off_t io_offset,
+ uint32_t io_size);
+void link_wind(call_frame_t *frame, xlator_t *this);
+void unlink_wind(call_frame_t *frame, xlator_t *this);
+void link_unwind(call_frame_t *frame);
+void unlink_unwind(call_frame_t *frame);
+void rename_wind(call_frame_t *frame, xlator_t *this);
+void rename_unwind(call_frame_t *frame);
+
+/* Inline functions */
+
+static inline size_t iovec_get_size(struct iovec *vec, uint32_t count)
+{
+ int i;
+ size_t size = 0;
+ for (i = 0; i < count; i++)
+ size += vec[i].iov_len;
+ return size;
+}
+
+static inline int32_t crypt_xlator_id(void)
+{
+ return CRYPT_XLATOR_ID;
+}
+
+static inline mtd_loader_id current_mtd_loader(void)
+{
+ return MTD_LOADER_V1;
+}
+
+static inline uint32_t master_key_size (void)
+{
+ return crypt_keys[MASTER_VOL_KEY].len >> 3;
+}
+
+static inline uint32_t nmtd_vol_key_size (void)
+{
+ return crypt_keys[NMTD_VOL_KEY].len >> 3;
+}
+
+static inline uint32_t alg_mode_blkbits(cipher_alg_t alg,
+ cipher_mode_t mode)
+{
+ return data_cipher_algs[alg][mode].blkbits;
+}
+
+static inline uint32_t alg_mode_blksize(cipher_alg_t alg,
+ cipher_mode_t mode)
+{
+ return 1 << alg_mode_blkbits(alg, mode);
+}
+
+static inline gf_boolean_t alg_mode_atomic(cipher_alg_t alg,
+ cipher_mode_t mode)
+{
+ return data_cipher_algs[alg][mode].atomic;
+}
+
+static inline gf_boolean_t alg_mode_should_pad(cipher_alg_t alg,
+ cipher_mode_t mode)
+{
+ return data_cipher_algs[alg][mode].should_pad;
+}
+
+static inline uint32_t master_alg_blksize(struct master_cipher_info *mr)
+{
+ return alg_mode_blksize(mr->m_alg, mr->m_mode);
+}
+
+static inline uint32_t master_alg_blkbits(struct master_cipher_info *mr)
+{
+ return alg_mode_blkbits(mr->m_alg, mr->m_mode);
+}
+
+static inline gf_boolean_t master_alg_atomic(struct master_cipher_info *mr)
+{
+ return alg_mode_atomic(mr->m_alg, mr->m_mode);
+}
+
+static inline gf_boolean_t master_alg_should_pad(struct master_cipher_info *mr)
+{
+ return alg_mode_should_pad(mr->m_alg, mr->m_mode);
+}
+
+static inline uint32_t object_alg_blksize(struct object_cipher_info *ob)
+{
+ return alg_mode_blksize(ob->o_alg, ob->o_mode);
+}
+
+static inline uint32_t object_alg_blkbits(struct object_cipher_info *ob)
+{
+ return alg_mode_blkbits(ob->o_alg, ob->o_mode);
+}
+
+static inline gf_boolean_t object_alg_atomic(struct object_cipher_info *ob)
+{
+ return alg_mode_atomic(ob->o_alg, ob->o_mode);
+}
+
+static inline gf_boolean_t object_alg_should_pad(struct object_cipher_info *ob)
+{
+ return alg_mode_should_pad(ob->o_alg, ob->o_mode);
+}
+
+static inline uint32_t aes_raw_key_size(struct master_cipher_info *master)
+{
+ return master->m_dkey_size >> 3;
+}
+
+static inline struct avec_config *get_hole_conf(call_frame_t *frame)
+{
+ return &(((crypt_local_t *)frame->local)->hole_conf);
+}
+
+static inline struct avec_config *get_data_conf(call_frame_t *frame)
+{
+ return &(((crypt_local_t *)frame->local)->data_conf);
+}
+
+static inline int32_t get_atom_bits (struct object_cipher_info *object)
+{
+ return object->o_block_bits;
+}
+
+static inline int32_t get_atom_size (struct object_cipher_info *object)
+{
+ return 1 << get_atom_bits(object);
+}
+
+static inline int32_t has_head_block(struct avec_config *conf)
+{
+ return conf->off_in_head ||
+ (conf->acount == 1 && conf->off_in_tail);
+}
+
+static inline int32_t has_tail_block(struct avec_config *conf)
+{
+ return conf->off_in_tail && conf->acount > 1;
+}
+
+static inline int32_t has_full_blocks(struct avec_config *conf)
+{
+ return conf->nr_full_blocks;
+}
+
+static inline int32_t should_submit_head_block(struct avec_config *conf)
+{
+ return has_head_block(conf) && (conf->cursor == 0);
+}
+
+static inline int32_t should_submit_tail_block(struct avec_config *conf)
+{
+ return has_tail_block(conf) && (conf->cursor == conf->acount - 1);
+}
+
+static inline int32_t should_submit_full_block(struct avec_config *conf)
+{
+ uint32_t start = has_head_block(conf) ? 1 : 0;
+
+ return has_full_blocks(conf) &&
+ conf->cursor >= start &&
+ conf->cursor < start + conf->nr_full_blocks;
+}
+
+#if DEBUG_CRYPT
+static inline void crypt_check_input_len(size_t len,
+ struct object_cipher_info *object)
+{
+ if (object_alg_should_pad(object) && (len & (object_alg_blksize(object) - 1)))
+ gf_log ("crypt", GF_LOG_DEBUG, "bad input len: %d", (int)len);
+}
+
+static inline void check_head_block(struct avec_config *conf)
+{
+ if (!has_head_block(conf))
+ gf_log("crypt", GF_LOG_DEBUG, "not a head atom");
+}
+
+static inline void check_tail_block(struct avec_config *conf)
+{
+ if (!has_tail_block(conf))
+ gf_log("crypt", GF_LOG_DEBUG, "not a tail atom");
+}
+
+static inline void check_full_block(struct avec_config *conf)
+{
+ if (!has_full_blocks(conf))
+ gf_log("crypt", GF_LOG_DEBUG, "not a full atom");
+}
+
+static inline void check_cursor_head(struct avec_config *conf)
+{
+ if (!has_head_block(conf))
+ gf_log("crypt",
+ GF_LOG_DEBUG, "Illegal call of head atom method");
+ else if (conf->cursor != 0)
+ gf_log("crypt",
+ GF_LOG_DEBUG, "Cursor (%d) is not at head atom",
+ conf->cursor);
+}
+
+static inline void check_cursor_full(struct avec_config *conf)
+{
+ if (!has_full_blocks(conf))
+ gf_log("crypt",
+ GF_LOG_DEBUG, "Illegal call of full atom method");
+ if (has_head_block(conf) && (conf->cursor == 0))
+ gf_log("crypt",
+ GF_LOG_DEBUG, "Cursor is not at full atom");
+}
+
+/*
+ * FIXME: use avec->iov_len to check setup
+ */
+static inline int data_local_invariant(crypt_local_t *local)
+{
+ return 0;
+}
+
+#else
+#define crypt_check_input_len(len, object) noop
+#define check_head_block(conf) noop
+#define check_tail_block(conf) noop
+#define check_full_block(conf) noop
+#define check_cursor_head(conf) noop
+#define check_cursor_full(conf) noop
+
+#endif /* DEBUG_CRYPT */
+
+static inline struct avec_config *conf_by_type(call_frame_t *frame,
+ atom_data_type dtype)
+{
+ struct avec_config *conf = NULL;
+
+ switch (dtype) {
+ case HOLE_ATOM:
+ conf = get_hole_conf(frame);
+ break;
+ case DATA_ATOM:
+ conf = get_data_conf(frame);
+ break;
+ default:
+ gf_log("crypt", GF_LOG_DEBUG, "bad atom type");
+ }
+ return conf;
+}
+
+static inline uint32_t nr_calls_head(struct avec_config *conf)
+{
+ return has_head_block(conf) ? 1 : 0;
+}
+
+static inline uint32_t nr_calls_tail(struct avec_config *conf)
+{
+ return has_tail_block(conf) ? 1 : 0;
+}
+
+static inline uint32_t nr_calls_full(struct avec_config *conf)
+{
+ switch(conf->type) {
+ case HOLE_ATOM:
+ return has_full_blocks(conf);
+ case DATA_ATOM:
+ return has_full_blocks(conf) ?
+ logical_blocks_occupied(0,
+ conf->nr_full_blocks,
+ MAX_IOVEC_BITS) : 0;
+ default:
+ gf_log("crypt", GF_LOG_DEBUG, "bad atom data type");
+ return 0;
+ }
+}
+
+static inline uint32_t nr_calls(struct avec_config *conf)
+{
+ return nr_calls_head(conf) + nr_calls_tail(conf) + nr_calls_full(conf);
+}
+
+static inline uint32_t nr_calls_data(call_frame_t *frame)
+{
+ return nr_calls(get_data_conf(frame));
+}
+
+static inline uint32_t nr_calls_hole(call_frame_t *frame)
+{
+ return nr_calls(get_hole_conf(frame));
+}
+
+static inline void get_one_call_nolock(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+
+ ++local->nr_calls;
+
+ //gf_log("crypt", GF_LOG_DEBUG, "get %d calls", 1);
+}
+
+static inline void get_one_call(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+
+ LOCK(&local->call_lock);
+ get_one_call_nolock(frame);
+ UNLOCK(&local->call_lock);
+}
+
+static inline void get_nr_calls_nolock(call_frame_t *frame, int32_t nr)
+{
+ crypt_local_t *local = frame->local;
+
+ local->nr_calls += nr;
+
+ //gf_log("crypt", GF_LOG_DEBUG, "get %d calls", nr);
+}
+
+static inline void get_nr_calls(call_frame_t *frame, int32_t nr)
+{
+ crypt_local_t *local = frame->local;
+
+ LOCK(&local->call_lock);
+ get_nr_calls_nolock(frame, nr);
+ UNLOCK(&local->call_lock);
+}
+
+static inline int put_one_call(crypt_local_t *local)
+{
+ uint32_t last = 0;
+
+ LOCK(&local->call_lock);
+ if (--local->nr_calls == 0)
+ last = 1;
+
+ //gf_log("crypt", GF_LOG_DEBUG, "put %d calls", 1);
+
+ UNLOCK(&local->call_lock);
+ return last;
+}
+
+static inline int is_appended_write(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = get_data_conf(frame);
+
+ return conf->orig_offset + conf->orig_size > local->old_file_size;
+}
+
+static inline int is_ordered_mode(call_frame_t *frame)
+{
+#if 0
+ crypt_local_t *local = frame->local;
+ return local->fop == GF_FOP_FTRUNCATE ||
+ (local->fop == GF_FOP_WRITE && is_appended_write(frame));
+#endif
+ return 1;
+}
+
+static inline int32_t hole_conv_completed(crypt_local_t *local)
+{
+ struct avec_config *conf = &local->hole_conf;
+ return conf->cursor == conf->acount;
+}
+
+static inline int32_t data_write_in_progress(crypt_local_t *local)
+{
+ return local->active_setup == DATA_ATOM;
+}
+
+static inline int32_t parent_is_crypt_xlator(call_frame_t *frame,
+ xlator_t *this)
+{
+ return frame->parent->this == this;
+}
+
+static inline linkop_wind_handler_t linkop_wind_dispatch(glusterfs_fop_t fop)
+{
+ switch(fop){
+ case GF_FOP_LINK:
+ return link_wind;
+ case GF_FOP_UNLINK:
+ return unlink_wind;
+ case GF_FOP_RENAME:
+ return rename_wind;
+ default:
+ gf_log("crypt", GF_LOG_ERROR, "Bad link operation %d", fop);
+ return NULL;
+ }
+}
+
+static inline linkop_unwind_handler_t linkop_unwind_dispatch(glusterfs_fop_t fop)
+{
+ switch(fop){
+ case GF_FOP_LINK:
+ return link_unwind;
+ case GF_FOP_UNLINK:
+ return unlink_unwind;
+ case GF_FOP_RENAME:
+ return rename_unwind;
+ default:
+ gf_log("crypt", GF_LOG_ERROR, "Bad link operation %d", fop);
+ return NULL;
+ }
+}
+
+static inline mtd_op_t linkop_mtdop_dispatch(glusterfs_fop_t fop)
+{
+ switch (fop) {
+ case GF_FOP_LINK:
+ return MTD_APPEND;
+ case GF_FOP_UNLINK:
+ return MTD_CUT;
+ case GF_FOP_RENAME:
+ return MTD_OVERWRITE;
+ default:
+ gf_log("crypt", GF_LOG_WARNING, "Bad link operation %d", fop);
+ return MTD_LAST_OP;
+ }
+}
+
+#endif /* __CRYPT_H__ */
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
diff --git a/xlators/encryption/crypt/src/data.c b/xlators/encryption/crypt/src/data.c
new file mode 100644
index 00000000000..2f96ed2bab5
--- /dev/null
+++ b/xlators/encryption/crypt/src/data.c
@@ -0,0 +1,764 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "defaults.h"
+#include "crypt-common.h"
+#include "crypt.h"
+
+static void set_iv_aes_xts(off_t offset, struct object_cipher_info *object)
+{
+ unsigned char *ivec;
+
+ ivec = object->u.aes_xts.ivec;
+
+ /* convert the tweak into a little-endian byte
+ * array (IEEE P1619/D16, May 2007, section 5.1)
+ */
+
+ *((uint64_t *)ivec) = htole64(offset);
+
+ /* ivec is padded with zeroes */
+}
+
+static int32_t aes_set_keys_common(unsigned char *raw_key, uint32_t key_size,
+ AES_KEY *keys)
+{
+ int32_t ret;
+
+ ret = AES_set_encrypt_key(raw_key,
+ key_size,
+ &keys[AES_ENCRYPT]);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, "Set encrypt key failed");
+ return ret;
+ }
+ ret = AES_set_decrypt_key(raw_key,
+ key_size,
+ &keys[AES_DECRYPT]);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, "Set decrypt key failed");
+ return ret;
+ }
+ return 0;
+}
+
+/*
+ * set private cipher info for xts mode
+ */
+static int32_t set_private_aes_xts(struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ int ret;
+ struct object_cipher_info *object = get_object_cinfo(info);
+ unsigned char *data_key;
+ uint32_t subkey_size;
+
+ /* init tweak value */
+ memset(object->u.aes_xts.ivec, 0, 16);
+
+ data_key = GF_CALLOC(1, object->o_dkey_size, gf_crypt_mt_key);
+ if (!data_key)
+ return ENOMEM;
+
+ /*
+ * retrieve data keying meterial
+ */
+ ret = get_data_file_key(info, master, object->o_dkey_size, data_key);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, "Failed to retrieve data key");
+ GF_FREE(data_key);
+ return ret;
+ }
+ /*
+ * parse compound xts key
+ */
+ subkey_size = object->o_dkey_size >> 4; /* (xts-key-size-in-bytes / 2) */
+ /*
+ * install key for data encryption
+ */
+ ret = aes_set_keys_common(data_key,
+ subkey_size << 3, object->u.aes_xts.dkey);
+ if (ret) {
+ GF_FREE(data_key);
+ return ret;
+ }
+ /*
+ * set up key used to encrypt tweaks
+ */
+ ret = AES_set_encrypt_key(data_key + subkey_size,
+ object->o_dkey_size / 2,
+ &object->u.aes_xts.tkey);
+ if (ret < 0)
+ gf_log("crypt", GF_LOG_ERROR, "Set tweak key failed");
+
+ GF_FREE(data_key);
+ return ret;
+}
+
+static int32_t aes_xts_init(void)
+{
+ cassert(AES_BLOCK_SIZE == (1 << AES_BLOCK_BITS));
+ return 0;
+}
+
+static int32_t check_key_aes_xts(uint32_t keysize)
+{
+ switch(keysize) {
+ case 256:
+ case 512:
+ return 0;
+ default:
+ break;
+ }
+ return -1;
+}
+
+static int32_t encrypt_aes_xts(const unsigned char *from,
+ unsigned char *to, size_t length,
+ off_t offset, const int enc,
+ struct object_cipher_info *object)
+{
+ XTS128_CONTEXT ctx;
+ if (enc) {
+ ctx.key1 = &object->u.aes_xts.dkey[AES_ENCRYPT];
+ ctx.block1 = (block128_f)AES_encrypt;
+ }
+ else {
+ ctx.key1 = &object->u.aes_xts.dkey[AES_DECRYPT];
+ ctx.block1 = (block128_f)AES_decrypt;
+ }
+ ctx.key2 = &object->u.aes_xts.tkey;
+ ctx.block2 = (block128_f)AES_encrypt;
+
+ return CRYPTO_xts128_encrypt(&ctx,
+ object->u.aes_xts.ivec,
+ from,
+ to,
+ length, enc);
+}
+
+/*
+ * Cipher input chunk @from of length @len;
+ * @to: result of cipher transform;
+ * @off: offset in a file (must be cblock-aligned);
+ */
+static void cipher_data(struct object_cipher_info *object,
+ char *from,
+ char *to,
+ off_t off,
+ size_t len,
+ const int enc)
+{
+ crypt_check_input_len(len, object);
+
+#if TRIVIAL_TFM && DEBUG_CRYPT
+ return;
+#endif
+ data_cipher_algs[object->o_alg][object->o_mode].set_iv(off, object);
+ data_cipher_algs[object->o_alg][object->o_mode].encrypt
+ ((const unsigned char *)from,
+ (unsigned char *)to,
+ len,
+ off,
+ enc,
+ object);
+}
+
+#define MAX_CIPHER_CHUNK (1 << 30)
+
+/*
+ * Do cipher (encryption/decryption) transform of a
+ * continuous region of memory.
+ *
+ * @len: a number of bytes to transform;
+ * @buf: data to transform;
+ * @off: offset in a file, should be block-aligned
+ * for atomic cipher modes and ksize-aligned
+ * for other modes).
+ * @dir: direction of transform (encrypt/decrypt).
+ */
+static void cipher_region(struct object_cipher_info *object,
+ char *from,
+ char *to,
+ off_t off,
+ size_t len,
+ int dir)
+{
+ while (len > 0) {
+ size_t to_cipher;
+
+ to_cipher = len;
+ if (to_cipher > MAX_CIPHER_CHUNK)
+ to_cipher = MAX_CIPHER_CHUNK;
+
+ /* this will reset IV */
+ cipher_data(object,
+ from,
+ to,
+ off,
+ to_cipher,
+ dir);
+ from += to_cipher;
+ to += to_cipher;
+ off += to_cipher;
+ len -= to_cipher;
+ }
+}
+
+/*
+ * Do cipher transform (encryption/decryption) of
+ * plaintext/ciphertext represented by @vec.
+ *
+ * Pre-conditions: @vec represents a continuous piece
+ * of data in a file at offset @off to be ciphered
+ * (encrypted/decrypted).
+ * @count is the number of vec's components. All the
+ * components must be block-aligned, the caller is
+ * responsible for this. @dir is "direction" of
+ * transform (encrypt/decrypt).
+ */
+static void cipher_aligned_iov(struct object_cipher_info *object,
+ struct iovec *vec,
+ int count,
+ off_t off,
+ int32_t dir)
+{
+ int i;
+ int len = 0;
+
+ for (i = 0; i < count; i++) {
+ cipher_region(object,
+ vec[i].iov_base,
+ vec[i].iov_base,
+ off + len,
+ vec[i].iov_len,
+ dir);
+ len += vec[i].iov_len;
+ }
+}
+
+void encrypt_aligned_iov(struct object_cipher_info *object,
+ struct iovec *vec,
+ int count,
+ off_t off)
+{
+ cipher_aligned_iov(object, vec, count, off, 1);
+}
+
+void decrypt_aligned_iov(struct object_cipher_info *object,
+ struct iovec *vec,
+ int count,
+ off_t off)
+{
+ cipher_aligned_iov(object, vec, count, off, 0);
+}
+
+#if DEBUG_CRYPT
+static void compound_stream(struct iovec *vec, int count, char *buf, off_t skip)
+{
+ int i;
+ int off = 0;
+ for (i = 0; i < count; i++) {
+ memcpy(buf + off,
+ vec[i].iov_base + skip,
+ vec[i].iov_len - skip);
+
+ off += (vec[i].iov_len - skip);
+ skip = 0;
+ }
+}
+
+static void check_iovecs(struct iovec *vec, int cnt,
+ struct iovec *avec, int acnt, uint32_t off_in_head)
+{
+ char *s1, *s2;
+ uint32_t size, asize;
+
+ size = iovec_get_size(vec, cnt);
+ asize = iovec_get_size(avec, acnt) - off_in_head;
+ if (size != asize) {
+ gf_log("crypt", GF_LOG_DEBUG, "size %d is not eq asize %d",
+ size, asize);
+ return;
+ }
+ s1 = GF_CALLOC(1, size, gf_crypt_mt_data);
+ if (!s1) {
+ gf_log("crypt", GF_LOG_DEBUG, "Can not allocate stream ");
+ return;
+ }
+ s2 = GF_CALLOC(1, asize, gf_crypt_mt_data);
+ if (!s2) {
+ GF_FREE(s1);
+ gf_log("crypt", GF_LOG_DEBUG, "Can not allocate stream ");
+ return;
+ }
+ compound_stream(vec, cnt, s1, 0);
+ compound_stream(avec, acnt, s2, off_in_head);
+ if (memcmp(s1, s2, size))
+ gf_log("crypt", GF_LOG_DEBUG, "chunks of different data");
+ GF_FREE(s1);
+ GF_FREE(s2);
+}
+
+#else
+#define check_iovecs(vec, count, avec, avecn, off) noop
+#endif /* DEBUG_CRYPT */
+
+static char *data_alloc_block(xlator_t *this, crypt_local_t *local,
+ int32_t block_size)
+{
+ struct iobuf *iobuf = NULL;
+
+ iobuf = iobuf_get2(this->ctx->iobuf_pool, block_size);
+ if (!iobuf) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "Failed to get iobuf");
+ return NULL;
+ }
+ if (!local->iobref_data) {
+ local->iobref_data = iobref_new();
+ if (!local->iobref_data) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "Failed to get iobref");
+ iobuf_unref(iobuf);
+ return NULL;
+ }
+ }
+ iobref_add(local->iobref_data, iobuf);
+ return iobuf->ptr;
+}
+
+/*
+ * Compound @avec, which represent the same data
+ * chunk as @vec, but has aligned components of
+ * specified block size. Alloc blocks, if needed.
+ * In particular, incomplete head and tail blocks
+ * must be allocated.
+ * Put number of allocated blocks to @num_blocks.
+ *
+ * Example:
+ *
+ * input: data chunk represented by 4 components
+ * [AB],[BC],[CD],[DE];
+ * output: 5 logical blocks (0, 1, 2, 3, 4).
+ *
+ * A B C D E
+ * *-----*+------*-+---*----+--------+-*
+ * | || | | | | | |
+ * *-+-----+*------+-*---+----*--------*-+------*
+ * 0 1 2 3 4
+ *
+ * 0 - incomplete compound (head);
+ * 1, 2 - full compound;
+ * 3 - full non-compound (the case of reuse);
+ * 4 - incomplete non-compound (tail).
+ */
+int32_t align_iov_by_atoms(xlator_t *this,
+ crypt_local_t *local,
+ struct object_cipher_info *object,
+ struct iovec *vec /* input vector */,
+ int32_t count /* number of vec components */,
+ struct iovec *avec /* aligned vector */,
+ char **blocks /* pool of blocks */,
+ uint32_t *blocks_allocated,
+ struct avec_config *conf)
+{
+ int vecn = 0; /* number of the current component in vec */
+ int avecn = 0; /* number of the current component in avec */
+ off_t vec_off = 0; /* offset in the current vec component,
+ * i.e. the number of bytes have already
+ * been copied */
+ int32_t block_size = get_atom_size(object);
+ size_t to_process; /* number of vec's bytes to copy and(or) re-use */
+ int32_t off_in_head = conf->off_in_head;
+
+ to_process = iovec_get_size(vec, count);
+
+ while (to_process > 0) {
+ if (off_in_head ||
+ vec[vecn].iov_len - vec_off < block_size) {
+ /*
+ * less than block_size:
+ * the case of incomplete (head or tail),
+ * or compound block
+ */
+ size_t copied = 0;
+ /*
+ * populate the pool with a new block
+ */
+ blocks[*blocks_allocated] = data_alloc_block(this,
+ local,
+ block_size);
+ if (!blocks[*blocks_allocated])
+ return -ENOMEM;
+ memset(blocks[*blocks_allocated], 0, off_in_head);
+ /*
+ * fill the block with vec components
+ */
+ do {
+ size_t to_copy;
+
+ to_copy = vec[vecn].iov_len - vec_off;
+ if (to_copy > block_size - off_in_head)
+ to_copy = block_size - off_in_head;
+
+ memcpy(blocks[*blocks_allocated] + off_in_head + copied,
+ vec[vecn].iov_base + vec_off,
+ to_copy);
+
+ copied += to_copy;
+ to_process -= to_copy;
+
+ vec_off += to_copy;
+ if (vec_off == vec[vecn].iov_len) {
+ /* finished with this vecn */
+ vec_off = 0;
+ vecn++;
+ }
+ } while (copied < (block_size - off_in_head) && to_process > 0);
+ /*
+ * update avec
+ */
+ avec[avecn].iov_len = off_in_head + copied;
+ avec[avecn].iov_base = blocks[*blocks_allocated];
+
+ (*blocks_allocated)++;
+ off_in_head = 0;
+ } else {
+ /*
+ * the rest of the current vec component
+ * is not less than block_size, so reuse
+ * the memory buffer of the component.
+ */
+ size_t to_reuse;
+ to_reuse = (to_process > block_size ?
+ block_size :
+ to_process);
+ avec[avecn].iov_len = to_reuse;
+ avec[avecn].iov_base = vec[vecn].iov_base + vec_off;
+
+ vec_off += to_reuse;
+ if (vec_off == vec[vecn].iov_len) {
+ /* finished with this vecn */
+ vec_off = 0;
+ vecn++;
+ }
+ to_process -= to_reuse;
+ }
+ avecn++;
+ }
+ check_iovecs(vec, count, avec, avecn, conf->off_in_head);
+ return 0;
+}
+
+/*
+ * allocate and setup aligned vector for data submission
+ * Pre-condition: @conf is set.
+ */
+int32_t set_config_avec_data(xlator_t *this,
+ crypt_local_t *local,
+ struct avec_config *conf,
+ struct object_cipher_info *object,
+ struct iovec *vec,
+ int32_t vec_count)
+{
+ int32_t ret = ENOMEM;
+ struct iovec *avec;
+ char **pool;
+ uint32_t blocks_in_pool = 0;
+
+ conf->type = DATA_ATOM;
+
+ avec = GF_CALLOC(conf->acount, sizeof(*avec), gf_crypt_mt_iovec);
+ if (!avec)
+ return ret;
+ pool = GF_CALLOC(conf->acount, sizeof(pool), gf_crypt_mt_char);
+ if (!pool) {
+ GF_FREE(avec);
+ return ret;
+ }
+ if (!vec) {
+ /*
+ * degenerated case: no data
+ */
+ pool[0] = data_alloc_block(this, local, get_atom_size(object));
+ if (!pool[0])
+ goto free;
+ blocks_in_pool = 1;
+ avec->iov_base = pool[0];
+ avec->iov_len = conf->off_in_tail;
+ }
+ else {
+ ret = align_iov_by_atoms(this, local, object, vec, vec_count,
+ avec, pool, &blocks_in_pool, conf);
+ if (ret)
+ goto free;
+ }
+ conf->avec = avec;
+ conf->pool = pool;
+ conf->blocks_in_pool = blocks_in_pool;
+ return 0;
+ free:
+ GF_FREE(avec);
+ GF_FREE(pool);
+ return ret;
+}
+
+/*
+ * allocate and setup aligned vector for hole submission
+ */
+int32_t set_config_avec_hole(xlator_t *this,
+ crypt_local_t *local,
+ struct avec_config *conf,
+ struct object_cipher_info *object,
+ glusterfs_fop_t fop)
+{
+ uint32_t i, idx;
+ struct iovec *avec;
+ char **pool;
+ uint32_t num_blocks;
+ uint32_t blocks_in_pool = 0;
+
+ conf->type = HOLE_ATOM;
+
+ num_blocks = conf->acount -
+ (conf->nr_full_blocks ? conf->nr_full_blocks - 1 : 0);
+
+ switch (fop) {
+ case GF_FOP_WRITE:
+ /*
+ * hole goes before data
+ */
+ if (num_blocks == 1 && conf->off_in_tail != 0)
+ /*
+ * we won't submit a hole which fits into
+ * a data atom: this part of hole will be
+ * submitted with data write
+ */
+ return 0;
+ break;
+ case GF_FOP_FTRUNCATE:
+ /*
+ * expanding truncate, hole goes after data,
+ * and will be submited in any case.
+ */
+ break;
+ default:
+ gf_log("crypt", GF_LOG_WARNING,
+ "bad file operation %d", fop);
+ return 0;
+ }
+ avec = GF_CALLOC(num_blocks, sizeof(*avec), gf_crypt_mt_iovec);
+ if (!avec)
+ return ENOMEM;
+ pool = GF_CALLOC(num_blocks, sizeof(pool), gf_crypt_mt_char);
+ if (!pool) {
+ GF_FREE(avec);
+ return ENOMEM;
+ }
+ for (i = 0; i < num_blocks; i++) {
+ pool[i] = data_alloc_block(this, local, get_atom_size(object));
+ if (pool[i] == NULL)
+ goto free;
+ blocks_in_pool++;
+ }
+ if (has_head_block(conf)) {
+ /* set head block */
+ idx = 0;
+ avec[idx].iov_base = pool[idx];
+ avec[idx].iov_len = get_atom_size(object);
+ memset(avec[idx].iov_base + conf->off_in_head,
+ 0,
+ get_atom_size(object) - conf->off_in_head);
+ }
+ if (has_tail_block(conf)) {
+ /* set tail block */
+ idx = num_blocks - 1;
+ avec[idx].iov_base = pool[idx];
+ avec[idx].iov_len = get_atom_size(object);
+ memset(avec[idx].iov_base, 0, conf->off_in_tail);
+ }
+ if (has_full_blocks(conf)) {
+ /* set full block */
+ idx = conf->off_in_head ? 1 : 0;
+ avec[idx].iov_base = pool[idx];
+ avec[idx].iov_len = get_atom_size(object);
+ /*
+ * since we re-use the buffer,
+ * zeroes will be set every time
+ * before encryption, see submit_full()
+ */
+ }
+ conf->avec = avec;
+ conf->pool = pool;
+ conf->blocks_in_pool = blocks_in_pool;
+ return 0;
+ free:
+ GF_FREE(avec);
+ GF_FREE(pool);
+ return ENOMEM;
+}
+
+/* A helper for setting up config of partial atoms (which
+ * participate in read-modify-write sequence).
+ *
+ * Calculate and setup precise amount of "extra-bytes"
+ * that should be uptodated at the end of partial (not
+ * necessarily tail!) block.
+ *
+ * Pre-condition: local->old_file_size is valid!
+ * @conf contains setup, which is enough for correct calculation
+ * of has_tail_block(), ->get_offset().
+ */
+void set_gap_at_end(call_frame_t *frame, struct object_cipher_info *object,
+ struct avec_config *conf, atom_data_type dtype)
+{
+ uint32_t to_block;
+ crypt_local_t *local = frame->local;
+ uint64_t old_file_size = local->old_file_size;
+ struct rmw_atom *partial = atom_by_types(dtype,
+ has_tail_block(conf) ?
+ TAIL_ATOM : HEAD_ATOM);
+
+ if (old_file_size <= partial->offset_at(frame, object))
+ to_block = 0;
+ else {
+ to_block = old_file_size - partial->offset_at(frame, object);
+ if (to_block > get_atom_size(object))
+ to_block = get_atom_size(object);
+ }
+ if (to_block > conf->off_in_tail)
+ conf->gap_in_tail = to_block - conf->off_in_tail;
+ else
+ /*
+ * nothing to uptodate
+ */
+ conf->gap_in_tail = 0;
+}
+
+/*
+ * fill struct avec_config with offsets layouts
+ */
+void set_config_offsets(call_frame_t *frame,
+ xlator_t *this,
+ uint64_t offset,
+ uint64_t count,
+ atom_data_type dtype,
+ int32_t set_gap)
+{
+ crypt_local_t *local;
+ struct object_cipher_info *object;
+ struct avec_config *conf;
+ uint32_t resid;
+
+ uint32_t atom_size;
+ uint32_t atom_bits;
+
+ size_t orig_size;
+ off_t orig_offset;
+ size_t expanded_size;
+ off_t aligned_offset;
+
+ uint32_t off_in_head = 0;
+ uint32_t off_in_tail = 0;
+ uint32_t nr_full_blocks;
+ int32_t size_full_blocks;
+
+ uint32_t acount; /* number of alifned components to write.
+ * The same as number of occupied logical
+ * blocks (atoms)
+ */
+ local = frame->local;
+ object = &local->info->cinfo;
+ conf = (dtype == DATA_ATOM ?
+ get_data_conf(frame) : get_hole_conf(frame));
+
+ orig_offset = offset;
+ orig_size = count;
+
+ atom_size = get_atom_size(object);
+ atom_bits = get_atom_bits(object);
+
+ /*
+ * Round-down the start,
+ * round-up the end.
+ */
+ resid = offset & (uint64_t)(atom_size - 1);
+
+ if (resid)
+ off_in_head = resid;
+ aligned_offset = offset - off_in_head;
+ expanded_size = orig_size + off_in_head;
+
+ /* calculate tail,
+ expand size forward */
+ resid = (offset + orig_size) & (uint64_t)(atom_size - 1);
+
+ if (resid) {
+ off_in_tail = resid;
+ expanded_size += (atom_size - off_in_tail);
+ }
+ /*
+ * calculate number of occupied blocks
+ */
+ acount = expanded_size >> atom_bits;
+ /*
+ * calculate number of full blocks
+ */
+ size_full_blocks = expanded_size;
+ if (off_in_head)
+ size_full_blocks -= atom_size;
+ if (off_in_tail && size_full_blocks > 0)
+ size_full_blocks -= atom_size;
+ nr_full_blocks = size_full_blocks >> atom_bits;
+
+ conf->atom_size = atom_size;
+ conf->orig_size = orig_size;
+ conf->orig_offset = orig_offset;
+ conf->expanded_size = expanded_size;
+ conf->aligned_offset = aligned_offset;
+
+ conf->off_in_head = off_in_head;
+ conf->off_in_tail = off_in_tail;
+ conf->nr_full_blocks = nr_full_blocks;
+ conf->acount = acount;
+ /*
+ * Finally, calculate precise amount of
+ * "extra-bytes" that should be uptodated
+ * at the end.
+ * Only if RMW is expected.
+ */
+ if (off_in_tail && set_gap)
+ set_gap_at_end(frame, object, conf, dtype);
+}
+
+struct data_cipher_alg data_cipher_algs[LAST_CIPHER_ALG][LAST_CIPHER_MODE] = {
+ [AES_CIPHER_ALG][XTS_CIPHER_MODE] =
+ { .atomic = _gf_true,
+ .should_pad = _gf_true,
+ .blkbits = AES_BLOCK_BITS,
+ .init = aes_xts_init,
+ .set_private = set_private_aes_xts,
+ .check_key = check_key_aes_xts,
+ .set_iv = set_iv_aes_xts,
+ .encrypt = encrypt_aes_xts
+ }
+};
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
diff --git a/xlators/encryption/crypt/src/keys.c b/xlators/encryption/crypt/src/keys.c
new file mode 100644
index 00000000000..0b243d3e827
--- /dev/null
+++ b/xlators/encryption/crypt/src/keys.c
@@ -0,0 +1,297 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "defaults.h"
+#include "crypt-common.h"
+#include "crypt.h"
+
+/* Key hierarchy
+
+ +----------------+
+ | MASTER_VOL_KEY |
+ +-------+--------+
+ |
+ |
+ +----------------+----------------+
+ | | |
+ | | |
+ +-------+------+ +-------+-------+ +------+--------+
+ | NMTD_VOL_KEY | | EMTD_FILE_KEY | | DATA_FILE_KEY |
+ +-------+------+ +---------------+ +---------------+
+ |
+ |
+ +-------+-------+
+ | NMTD_LINK_KEY |
+ +---------------+
+
+ */
+
+#if DEBUG_CRYPT
+static void check_prf_iters(uint32_t num_iters)
+{
+ if (num_iters == 0)
+ gf_log ("crypt", GF_LOG_DEBUG,
+ "bad number of prf iterations : %d", num_iters);
+}
+#else
+#define check_prf_iters(num_iters) noop
+#endif /* DEBUG_CRYPT */
+
+unsigned char crypt_fake_oid[16] =
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+/*
+ * derive key in the counter mode using
+ * sha256-based HMAC as PRF, see
+ * NIST Special Publication 800-108, 5.1)
+ */
+
+#define PRF_OUTPUT_SIZE SHA256_DIGEST_LENGTH
+
+static int32_t kderive_init(struct kderive_context *ctx,
+ const unsigned char *pkey, /* parent key */
+ uint32_t pkey_size, /* parent key size */
+ const unsigned char *idctx, /* id-context */
+ uint32_t idctx_size,
+ crypt_key_type type /* type of child key */)
+{
+ unsigned char *pos;
+ uint32_t llen = strlen(crypt_keys[type].label);
+ /*
+ * Compoud the fixed input data for KDF:
+ * [i]_2 || Label || 0x00 || Id-Context || [L]_2),
+ * NIST SP 800-108, 5.1
+ */
+ ctx->fid_len =
+ sizeof(uint32_t) +
+ llen +
+ 1 +
+ idctx_size +
+ sizeof(uint32_t);
+
+ ctx->fid = GF_CALLOC(ctx->fid_len, 1, gf_crypt_mt_key);
+ if (!ctx->fid)
+ return ENOMEM;
+ ctx->out_len = round_up(crypt_keys[type].len >> 3,
+ PRF_OUTPUT_SIZE);
+ ctx->out = GF_CALLOC(ctx->out_len, 1, gf_crypt_mt_key);
+ if (!ctx->out) {
+ GF_FREE(ctx->fid);
+ return ENOMEM;
+ }
+ ctx->pkey = pkey;
+ ctx->pkey_len = pkey_size;
+ ctx->ckey_len = crypt_keys[type].len;
+
+ pos = ctx->fid;
+
+ /* counter will be set up in kderive_rfn() */
+ pos += sizeof(uint32_t);
+
+ memcpy(pos, crypt_keys[type].label, llen);
+ pos += llen;
+
+ /* set up zero octet */
+ *pos = 0;
+ pos += 1;
+
+ memcpy(pos, idctx, idctx_size);
+ pos += idctx_size;
+
+ *((uint32_t *)pos) = htobe32(ctx->ckey_len);
+
+ return 0;
+}
+
+static void kderive_update(struct kderive_context *ctx)
+{
+ uint32_t i;
+ HMAC_CTX hctx;
+ unsigned char *pos = ctx->out;
+ uint32_t *p_iter = (uint32_t *)ctx->fid;
+ uint32_t num_iters = ctx->out_len / PRF_OUTPUT_SIZE;
+
+ check_prf_iters(num_iters);
+
+ HMAC_CTX_init(&hctx);
+ for (i = 0; i < num_iters; i++) {
+ /*
+ * update the iteration number in the fid
+ */
+ *p_iter = htobe32(i);
+ HMAC_Init_ex(&hctx,
+ ctx->pkey, ctx->pkey_len >> 3,
+ EVP_sha256(),
+ NULL);
+ HMAC_Update(&hctx, ctx->fid, ctx->fid_len);
+ HMAC_Final(&hctx, pos, NULL);
+
+ pos += PRF_OUTPUT_SIZE;
+ }
+ HMAC_CTX_cleanup(&hctx);
+}
+
+static void kderive_final(struct kderive_context *ctx, unsigned char *child)
+{
+ memcpy(child, ctx->out, ctx->ckey_len >> 3);
+ GF_FREE(ctx->fid);
+ GF_FREE(ctx->out);
+ memset(ctx, 0, sizeof(*ctx));
+}
+
+/*
+ * derive per-volume key for object ids aithentication
+ */
+int32_t get_nmtd_vol_key(struct master_cipher_info *master)
+{
+ int32_t ret;
+ struct kderive_context ctx;
+
+ ret = kderive_init(&ctx,
+ master->m_key,
+ master_key_size(),
+ crypt_fake_oid, sizeof(uuid_t), NMTD_VOL_KEY);
+ if (ret)
+ return ret;
+ kderive_update(&ctx);
+ kderive_final(&ctx, master->m_nmtd_key);
+ return 0;
+}
+
+/*
+ * derive per-link key for aithentication of non-encrypted
+ * meta-data (nmtd)
+ */
+int32_t get_nmtd_link_key(loc_t *loc,
+ struct master_cipher_info *master,
+ unsigned char *result)
+{
+ int32_t ret;
+ struct kderive_context ctx;
+
+ ret = kderive_init(&ctx,
+ master->m_nmtd_key,
+ nmtd_vol_key_size(),
+ (const unsigned char *)loc->path,
+ strlen(loc->path), NMTD_LINK_KEY);
+ if (ret)
+ return ret;
+ kderive_update(&ctx);
+ kderive_final(&ctx, result);
+ return 0;
+}
+
+/*
+ * derive per-file key for encryption and authentication
+ * of encrypted part of metadata (emtd)
+ */
+int32_t get_emtd_file_key(struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ unsigned char *result)
+{
+ int32_t ret;
+ struct kderive_context ctx;
+
+ ret = kderive_init(&ctx,
+ master->m_key,
+ master_key_size(),
+ info->oid, sizeof(uuid_t), EMTD_FILE_KEY);
+ if (ret)
+ return ret;
+ kderive_update(&ctx);
+ kderive_final(&ctx, result);
+ return 0;
+}
+
+static int32_t data_key_type_by_size(uint32_t keysize, crypt_key_type *type)
+{
+ int32_t ret = 0;
+ switch (keysize) {
+ case 256:
+ *type = DATA_FILE_KEY_256;
+ break;
+ case 512:
+ *type = DATA_FILE_KEY_512;
+ break;
+ default:
+ gf_log("crypt", GF_LOG_ERROR, "Unsupported data key size %d",
+ keysize);
+ ret = ENOTSUP;
+ break;
+ }
+ return ret;
+}
+
+/*
+ * derive per-file key for data encryption
+ */
+int32_t get_data_file_key(struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ uint32_t keysize,
+ unsigned char *key)
+{
+ int32_t ret;
+ struct kderive_context ctx;
+ crypt_key_type type;
+
+ ret = data_key_type_by_size(keysize, &type);
+ if (ret)
+ return ret;
+ ret = kderive_init(&ctx,
+ master->m_key,
+ master_key_size(),
+ info->oid, sizeof(uuid_t), type);
+ if (ret)
+ return ret;
+ kderive_update(&ctx);
+ kderive_final(&ctx, key);
+ return 0;
+}
+
+/*
+ * NOTE: Don't change existing keys: it will break compatibility;
+ */
+struct crypt_key crypt_keys[LAST_KEY_TYPE] = {
+ [MASTER_VOL_KEY] =
+ { .len = MASTER_VOL_KEY_SIZE << 3,
+ .label = "volume-master",
+ },
+ [NMTD_VOL_KEY] =
+ { .len = NMTD_VOL_KEY_SIZE << 3,
+ .label = "volume-nmtd-key-generation"
+ },
+ [NMTD_LINK_KEY] =
+ { .len = 128,
+ .label = "link-nmtd-authentication"
+ },
+ [EMTD_FILE_KEY] =
+ { .len = 128,
+ .label = "file-emtd-encryption-and-auth"
+ },
+ [DATA_FILE_KEY_256] =
+ { .len = 256,
+ .label = "file-data-encryption-256"
+ },
+ [DATA_FILE_KEY_512] =
+ { .len = 512,
+ .label = "file-data-encryption-512"
+ }
+};
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
diff --git a/xlators/encryption/crypt/src/metadata.c b/xlators/encryption/crypt/src/metadata.c
new file mode 100644
index 00000000000..1364f825a98
--- /dev/null
+++ b/xlators/encryption/crypt/src/metadata.c
@@ -0,0 +1,614 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "defaults.h"
+#include "crypt-common.h"
+#include "crypt.h"
+#include "metadata.h"
+
+int32_t alloc_format(crypt_local_t *local, size_t size)
+{
+ if (size > 0) {
+ local->format = GF_CALLOC(1, size, gf_crypt_mt_mtd);
+ if (!local->format)
+ return ENOMEM;
+ }
+ local->format_size = size;
+ return 0;
+}
+
+int32_t alloc_format_create(crypt_local_t *local)
+{
+ return alloc_format(local, new_format_size());
+}
+
+void free_format(crypt_local_t *local)
+{
+ GF_FREE(local->format);
+}
+
+/*
+ * Check compatibility with extracted metadata
+ */
+static int32_t check_file_metadata(struct crypt_inode_info *info)
+{
+ struct object_cipher_info *object = &info->cinfo;
+
+ if (info->nr_minor != CRYPT_XLATOR_ID) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "unsupported minor subversion %d", info->nr_minor);
+ return EINVAL;
+ }
+ if (object->o_alg > LAST_CIPHER_ALG) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "unsupported cipher algorithm %d",
+ object->o_alg);
+ return EINVAL;
+ }
+ if (object->o_mode > LAST_CIPHER_MODE) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "unsupported cipher mode %d",
+ object->o_mode);
+ return EINVAL;
+ }
+ if (object->o_block_bits < CRYPT_MIN_BLOCK_BITS ||
+ object->o_block_bits > CRYPT_MAX_BLOCK_BITS) {
+ gf_log("crypt", GF_LOG_WARNING, "unsupported block bits %d",
+ object->o_block_bits);
+ return EINVAL;
+ }
+ /* TBD: check data key size */
+ return 0;
+}
+
+static size_t format_size_v1(mtd_op_t op, size_t old_size)
+{
+
+ switch (op) {
+ case MTD_CREATE:
+ return sizeof(struct mtd_format_v1);
+ case MTD_OVERWRITE:
+ return old_size;
+ case MTD_APPEND:
+ return old_size + NMTD_8_MAC_SIZE;
+ case MTD_CUT:
+ if (old_size > sizeof(struct mtd_format_v1))
+ return old_size - NMTD_8_MAC_SIZE;
+ else
+ return 0;
+ default:
+ gf_log("crypt", GF_LOG_WARNING, "Bad mtd operation");
+ return 0;
+ }
+}
+
+/*
+ * Calculate size of the updated format string.
+ * Returned zero means that we don't need to update the format string.
+ */
+size_t format_size(mtd_op_t op, size_t old_size)
+{
+ size_t versioned;
+
+ versioned = mtd_loaders[current_mtd_loader()].format_size(op,
+ old_size - sizeof(struct crypt_format));
+ if (versioned != 0)
+ return versioned + sizeof(struct crypt_format);
+ return 0;
+}
+
+/*
+ * size of the format string of newly created file (nr_links = 1)
+ */
+size_t new_format_size(void)
+{
+ return format_size(MTD_CREATE, 0);
+}
+
+/*
+ * Calculate per-link MAC by pathname
+ */
+static int32_t calc_link_mac_v1(struct mtd_format_v1 *fmt,
+ loc_t *loc,
+ unsigned char *result,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ int32_t ret;
+ unsigned char nmtd_link_key[16];
+ CMAC_CTX *cctx;
+ size_t len;
+
+ ret = get_nmtd_link_key(loc, master, nmtd_link_key);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, "Can not get nmtd link key");
+ return -1;
+ }
+ cctx = CMAC_CTX_new();
+ if (!cctx) {
+ gf_log("crypt", GF_LOG_ERROR, "CMAC_CTX_new failed");
+ return -1;
+ }
+ ret = CMAC_Init(cctx, nmtd_link_key, sizeof(nmtd_link_key),
+ EVP_aes_128_cbc(), 0);
+ if (!ret) {
+ gf_log("crypt", GF_LOG_ERROR, "CMAC_Init failed");
+ CMAC_CTX_free(cctx);
+ return -1;
+ }
+ ret = CMAC_Update(cctx, get_NMTD_V1(info), SIZE_OF_NMTD_V1);
+ if (!ret) {
+ gf_log("crypt", GF_LOG_ERROR, "CMAC_Update failed");
+ CMAC_CTX_free(cctx);
+ return -1;
+ }
+ ret = CMAC_Final(cctx, result, &len);
+ CMAC_CTX_free(cctx);
+ if (!ret) {
+ gf_log("crypt", GF_LOG_ERROR, "CMAC_Final failed");
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * Create per-link MAC of index @idx by pathname
+ */
+static int32_t create_link_mac_v1(struct mtd_format_v1 *fmt,
+ uint32_t idx,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ int32_t ret;
+ unsigned char *mac;
+ unsigned char cmac[16];
+
+ mac = get_NMTD_V1_MAC(fmt) + idx * SIZE_OF_NMTD_V1_MAC;
+
+ ret = calc_link_mac_v1(fmt, loc, cmac, info, master);
+ if (ret)
+ return -1;
+ memcpy(mac, cmac, SIZE_OF_NMTD_V1_MAC);
+ return 0;
+}
+
+static int32_t create_format_v1(unsigned char *wire,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ int32_t ret;
+ struct mtd_format_v1 *fmt;
+ unsigned char mtd_key[16];
+ AES_KEY EMTD_KEY;
+ unsigned char nmtd_link_key[16];
+ uint32_t ad;
+ GCM128_CONTEXT *gctx;
+
+ fmt = (struct mtd_format_v1 *)wire;
+
+ fmt->minor_id = info->nr_minor;
+ fmt->alg_id = AES_CIPHER_ALG;
+ fmt->dkey_factor = master->m_dkey_size >> KEY_FACTOR_BITS;
+ fmt->block_bits = master->m_block_bits;
+ fmt->mode_id = master->m_mode;
+ /*
+ * retrieve keys for the parts of metadata
+ */
+ ret = get_emtd_file_key(info, master, mtd_key);
+ if (ret)
+ return ret;
+ ret = get_nmtd_link_key(loc, master, nmtd_link_key);
+ if (ret)
+ return ret;
+
+ AES_set_encrypt_key(mtd_key, sizeof(mtd_key)*8, &EMTD_KEY);
+
+ gctx = CRYPTO_gcm128_new(&EMTD_KEY, (block128_f)AES_encrypt);
+
+ /* TBD: Check return values */
+
+ CRYPTO_gcm128_setiv(gctx, info->oid, sizeof(uuid_t));
+
+ ad = htole32(MTD_LOADER_V1);
+ ret = CRYPTO_gcm128_aad(gctx, (const unsigned char *)&ad, sizeof(ad));
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, " CRYPTO_gcm128_aad failed");
+ CRYPTO_gcm128_release(gctx);
+ return ret;
+ }
+ ret = CRYPTO_gcm128_encrypt(gctx,
+ get_EMTD_V1(fmt),
+ get_EMTD_V1(fmt),
+ SIZE_OF_EMTD_V1);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, " CRYPTO_gcm128_encrypt failed");
+ CRYPTO_gcm128_release(gctx);
+ return ret;
+ }
+ /*
+ * set MAC of encrypted part of metadata
+ */
+ CRYPTO_gcm128_tag(gctx, get_EMTD_V1_MAC(fmt), SIZE_OF_EMTD_V1_MAC);
+ CRYPTO_gcm128_release(gctx);
+ /*
+ * set the first MAC of non-encrypted part of metadata
+ */
+ return create_link_mac_v1(fmt, 0, loc, info, master);
+}
+
+/*
+ * Called by fops:
+ * ->create();
+ * ->link();
+ *
+ * Pack common and version-specific parts of file's metadata
+ * Pre-conditions: @info contains valid object-id.
+ */
+int32_t create_format(unsigned char *wire,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ struct crypt_format *fmt = (struct crypt_format *)wire;
+
+ fmt->loader_id = current_mtd_loader();
+
+ wire += sizeof(struct crypt_format);
+ return mtd_loaders[current_mtd_loader()].create_format(wire, loc,
+ info, master);
+}
+
+/*
+ * Append or overwrite per-link mac of @mac_idx index
+ * in accordance with the new pathname
+ */
+int32_t appov_link_mac_v1(unsigned char *new,
+ unsigned char *old,
+ uint32_t old_size,
+ int32_t mac_idx,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local)
+{
+ memcpy(new, old, old_size);
+ return create_link_mac_v1((struct mtd_format_v1 *)new, mac_idx,
+ loc, info, master);
+}
+
+/*
+ * Cut per-link mac of @mac_idx index
+ */
+static int32_t cut_link_mac_v1(unsigned char *new,
+ unsigned char *old,
+ uint32_t old_size,
+ int32_t mac_idx,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local)
+{
+ memcpy(new,
+ old,
+ sizeof(struct mtd_format_v1) + NMTD_8_MAC_SIZE * (mac_idx - 1));
+
+ memcpy(new + sizeof(struct mtd_format_v1) + NMTD_8_MAC_SIZE * (mac_idx - 1),
+ old + sizeof(struct mtd_format_v1) + NMTD_8_MAC_SIZE * mac_idx,
+ old_size - (sizeof(struct mtd_format_v1) + NMTD_8_MAC_SIZE * mac_idx));
+ return 0;
+}
+
+int32_t update_format_v1(unsigned char *new,
+ unsigned char *old,
+ size_t old_len,
+ int32_t mac_idx, /* of old name */
+ mtd_op_t op,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local)
+{
+ switch (op) {
+ case MTD_APPEND:
+ mac_idx = 1 + (old_len - sizeof(struct mtd_format_v1))/8;
+ case MTD_OVERWRITE:
+ return appov_link_mac_v1(new, old, old_len, mac_idx,
+ loc, info, master, local);
+ case MTD_CUT:
+ return cut_link_mac_v1(new, old, old_len, mac_idx,
+ loc, info, master, local);
+ default:
+ gf_log("crypt", GF_LOG_ERROR, "Bad mtd operation %d", op);
+ return -1;
+ }
+}
+
+/*
+ * Called by fops:
+ *
+ * ->link()
+ * ->unlink()
+ * ->rename()
+ *
+ */
+int32_t update_format(unsigned char *new,
+ unsigned char *old,
+ size_t old_len,
+ int32_t mac_idx,
+ mtd_op_t op,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local)
+{
+ if (!new)
+ return 0;
+ memcpy(new, old, sizeof(struct crypt_format));
+
+ old += sizeof(struct crypt_format);
+ new += sizeof(struct crypt_format);
+ old_len -= sizeof(struct crypt_format);
+
+ return mtd_loaders[current_mtd_loader()].update_format(new, old,
+ old_len,
+ mac_idx, op,
+ loc, info,
+ master, local);
+}
+
+/*
+ * Perform preliminary checks of found metadata
+ * Return < 0 on errors;
+ * Return number of object-id MACs (>= 1) on success
+ */
+int32_t check_format_v1(uint32_t len, unsigned char *wire)
+{
+ uint32_t nr_links;
+
+ if (len < sizeof(struct mtd_format_v1)) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "v1-loader: bad metadata size %d", len);
+ goto error;
+ }
+ len -= sizeof(struct mtd_format_v1);
+ if (len % sizeof(nmtd_8_mac_t)) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "v1-loader: bad metadata format");
+ goto error;
+ }
+ nr_links = 1 + len / sizeof(nmtd_8_mac_t);
+ if (nr_links > _POSIX_LINK_MAX)
+ goto error;
+ return nr_links;
+ error:
+ return EIO;
+}
+
+/*
+ * Verify per-link MAC specified by index @idx
+ *
+ * return:
+ * -1 on errors;
+ * 0 on failed verification;
+ * 1 on successful verification
+ */
+static int32_t verify_link_mac_v1(struct mtd_format_v1 *fmt,
+ uint32_t idx /* index of the mac to verify */,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ int32_t ret;
+ unsigned char *mac;
+ unsigned char cmac[16];
+
+ mac = get_NMTD_V1_MAC(fmt) + idx * SIZE_OF_NMTD_V1_MAC;
+
+ ret = calc_link_mac_v1(fmt, loc, cmac, info, master);
+ if (ret)
+ return -1;
+ if (memcmp(cmac, mac, SIZE_OF_NMTD_V1_MAC))
+ return 0;
+ return 1;
+}
+
+/*
+ * Lookup per-link MAC by pathname.
+ *
+ * return index of the MAC, if it was found;
+ * return < 0 on errors, or if the MAC wasn't found
+ */
+static int32_t lookup_link_mac_v1(struct mtd_format_v1 *fmt,
+ uint32_t nr_macs,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ int32_t ret;
+ uint32_t idx;
+
+ for (idx = 0; idx < nr_macs; idx++) {
+ ret = verify_link_mac_v1(fmt, idx, loc, info, master);
+ if (ret < 0)
+ return ret;
+ if (ret > 0)
+ return idx;
+ }
+ return -ENOENT;
+}
+
+/*
+ * Extract version-specific part of metadata
+ */
+static int32_t open_format_v1(unsigned char *wire,
+ int32_t len,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local,
+ gf_boolean_t load_info)
+{
+ int32_t ret;
+ int32_t num_nmtd_macs;
+ struct mtd_format_v1 *fmt;
+ unsigned char mtd_key[16];
+ AES_KEY EMTD_KEY;
+ GCM128_CONTEXT *gctx;
+ uint32_t ad;
+ emtd_8_mac_t gmac;
+ struct object_cipher_info *object;
+
+ num_nmtd_macs = check_format_v1(len, wire);
+ if (num_nmtd_macs <= 0)
+ return EIO;
+
+ ret = lookup_link_mac_v1((struct mtd_format_v1 *)wire,
+ num_nmtd_macs, loc, info, master);
+ if (ret < 0) {
+ gf_log("crypt", GF_LOG_ERROR, "NMTD verification failed");
+ return EINVAL;
+ }
+
+ local->mac_idx = ret;
+ if (load_info == _gf_false)
+ /* the case of partial open */
+ return 0;
+
+ fmt = GF_CALLOC(1, len, gf_crypt_mt_mtd);
+ if (!fmt)
+ return ENOMEM;
+ memcpy(fmt, wire, len);
+
+ object = &info->cinfo;
+
+ ret = get_emtd_file_key(info, master, mtd_key);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, "Can not retrieve metadata key");
+ goto out;
+ }
+ /*
+ * decrypt encrypted meta-data
+ */
+ ret = AES_set_encrypt_key(mtd_key, sizeof(mtd_key)*8, &EMTD_KEY);
+ if (ret < 0) {
+ gf_log("crypt", GF_LOG_ERROR, "Can not set encrypt key");
+ ret = EIO;
+ goto out;
+ }
+ gctx = CRYPTO_gcm128_new(&EMTD_KEY, (block128_f)AES_encrypt);
+ if (!gctx) {
+ gf_log("crypt", GF_LOG_ERROR, "Can not alloc gcm context");
+ ret = ENOMEM;
+ goto out;
+ }
+ CRYPTO_gcm128_setiv(gctx, info->oid, sizeof(uuid_t));
+
+ ad = htole32(MTD_LOADER_V1);
+ ret = CRYPTO_gcm128_aad(gctx, (const unsigned char *)&ad, sizeof(ad));
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, " CRYPTO_gcm128_aad failed");
+ CRYPTO_gcm128_release(gctx);
+ ret = EIO;
+ goto out;
+ }
+ ret = CRYPTO_gcm128_decrypt(gctx,
+ get_EMTD_V1(fmt),
+ get_EMTD_V1(fmt),
+ SIZE_OF_EMTD_V1);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, " CRYPTO_gcm128_decrypt failed");
+ CRYPTO_gcm128_release(gctx);
+ ret = EIO;
+ goto out;
+ }
+ /*
+ * verify metadata
+ */
+ CRYPTO_gcm128_tag(gctx, gmac, sizeof(gmac));
+ CRYPTO_gcm128_release(gctx);
+ if (memcmp(gmac, get_EMTD_V1_MAC(fmt), SIZE_OF_EMTD_V1_MAC)) {
+ gf_log("crypt", GF_LOG_ERROR, "EMTD verification failed");
+ ret = EINVAL;
+ goto out;
+ }
+ /*
+ * load verified metadata to the private part of inode
+ */
+ info->nr_minor = fmt->minor_id;
+
+ object->o_alg = fmt->alg_id;
+ object->o_dkey_size = fmt->dkey_factor << KEY_FACTOR_BITS;
+ object->o_block_bits = fmt->block_bits;
+ object->o_mode = fmt->mode_id;
+
+ ret = check_file_metadata(info);
+ out:
+ GF_FREE(fmt);
+ return ret;
+}
+
+/*
+ * perform metadata authentication against @loc->path;
+ * extract crypt-specific attribtes and populate @info
+ * with them (optional)
+ */
+int32_t open_format(unsigned char *str,
+ int32_t len,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local,
+ gf_boolean_t load_info)
+{
+ struct crypt_format *fmt;
+ if (len < sizeof(*fmt)) {
+ gf_log("crypt", GF_LOG_ERROR, "Bad core format");
+ return EIO;
+ }
+ fmt = (struct crypt_format *)str;
+
+ if (fmt->loader_id >= LAST_MTD_LOADER) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "Unsupported loader id %d", fmt->loader_id);
+ return EINVAL;
+ }
+ str += sizeof(*fmt);
+ len -= sizeof(*fmt);
+
+ return mtd_loaders[fmt->loader_id].open_format(str,
+ len,
+ loc,
+ info,
+ master,
+ local,
+ load_info);
+}
+
+struct crypt_mtd_loader mtd_loaders [LAST_MTD_LOADER] = {
+ [MTD_LOADER_V1] =
+ {.format_size = format_size_v1,
+ .create_format = create_format_v1,
+ .open_format = open_format_v1,
+ .update_format = update_format_v1
+ }
+};
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
diff --git a/xlators/encryption/crypt/src/metadata.h b/xlators/encryption/crypt/src/metadata.h
new file mode 100644
index 00000000000..b67ae25b58c
--- /dev/null
+++ b/xlators/encryption/crypt/src/metadata.h
@@ -0,0 +1,74 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __METADATA_H__
+#define __METADATA_H__
+
+#define NMTD_8_MAC_SIZE (8)
+#define EMTD_8_MAC_SIZE (8)
+
+typedef uint8_t nmtd_8_mac_t[NMTD_8_MAC_SIZE];
+typedef uint8_t emtd_8_mac_t[EMTD_8_MAC_SIZE] ;
+
+/*
+ * Version "v1" of file's metadata.
+ * Metadata of this version has 4 components:
+ *
+ * 1) EMTD (Encrypted part of MeTaData);
+ * 2) NMTD (Non-encrypted part of MeTaData);
+ * 3) EMTD_MAC; (EMTD Message Authentication Code);
+ * 4) Array of per-link NMTD MACs (for every (hard)link it includes
+ * exactly one MAC)
+ */
+struct mtd_format_v1 {
+ /* EMTD, encrypted part of meta-data */
+ uint8_t alg_id; /* cipher algorithm id (only AES for now) */
+ uint8_t mode_id; /* cipher mode id; (only XTS for now) */
+ uint8_t block_bits; /* encoded block size */
+ uint8_t minor_id; /* client translator id */
+ uint8_t dkey_factor; /* encoded size of the data key */
+ /* MACs */
+ emtd_8_mac_t gmac; /* MAC of the encrypted meta-data, 8 bytes */
+ nmtd_8_mac_t omac; /* per-link MACs of the non-encrypted
+ * meta-data: at least one such MAC is always
+ * present */
+} __attribute__((packed));
+
+/*
+ * NMTD, the non-encrypted part of metadata of version "v1"
+ * is file's gfid, which is generated on trusted machines.
+ */
+#define SIZE_OF_NMTD_V1 (sizeof(uuid_t))
+#define SIZE_OF_EMTD_V1 (offsetof(struct mtd_format_v1, gmac) - \
+ offsetof(struct mtd_format_v1, alg_id))
+#define SIZE_OF_NMTD_V1_MAC (NMTD_8_MAC_SIZE)
+#define SIZE_OF_EMTD_V1_MAC (EMTD_8_MAC_SIZE)
+
+static inline unsigned char *get_EMTD_V1(struct mtd_format_v1 *format)
+{
+ return &format->alg_id;
+}
+
+static inline unsigned char *get_NMTD_V1(struct crypt_inode_info *info)
+{
+ return info->oid;
+}
+
+static inline unsigned char *get_EMTD_V1_MAC(struct mtd_format_v1 *format)
+{
+ return format->gmac;
+}
+
+static inline unsigned char *get_NMTD_V1_MAC(struct mtd_format_v1 *format)
+{
+ return format->omac;
+}
+
+#endif /* __METADATA_H__ */
diff --git a/xlators/encryption/rot-13/src/Makefile.am b/xlators/encryption/rot-13/src/Makefile.am
index ba5e623d8e2..cc4b7c13005 100644
--- a/xlators/encryption/rot-13/src/Makefile.am
+++ b/xlators/encryption/rot-13/src/Makefile.am
@@ -1,14 +1,15 @@
xlator_LTLIBRARIES = rot-13.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/encryption
-rot_13_la_LDFLAGS = -module -avoidversion
+rot_13_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
rot_13_la_SOURCES = rot-13.c
rot_13_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = rot-13.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/encryption/rot-13/src/rot-13.c b/xlators/encryption/rot-13/src/rot-13.c
index a19ef5deadf..6ec1b47c87b 100644
--- a/xlators/encryption/rot-13/src/rot-13.c
+++ b/xlators/encryption/rot-13/src/rot-13.c
@@ -1,30 +1,15 @@
/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <ctype.h>
#include <sys/uio.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "glusterfs.h"
#include "xlator.h"
#include "logging.h"
@@ -32,13 +17,13 @@
#include "rot-13.h"
/*
- * This is a rot13 ``encryption'' xlator. It rot13's data when
- * writing to disk and rot13's it back when reading it.
+ * This is a rot13 ``encryption'' xlator. It rot13's data when
+ * writing to disk and rot13's it back when reading it.
* This xlator is meant as an example, NOT FOR PRODUCTION
* USE ;) (hence no error-checking)
*/
-void
+void
rot13 (char *buf, int len)
{
int i;
@@ -68,14 +53,15 @@ rot13_readv_cbk (call_frame_t *frame,
struct iovec *vector,
int32_t count,
struct iatt *stbuf,
- struct iobref *iobref)
+ struct iobref *iobref, dict_t *xdata)
{
rot_13_private_t *priv = (rot_13_private_t *)this->private;
-
+
if (priv->decrypt_read)
rot13_iovec (vector, count);
- STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf, iobref);
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count,
+ stbuf, iobref, xdata);
return 0;
}
@@ -84,13 +70,13 @@ rot13_readv (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
size_t size,
- off_t offset)
+ off_t offset, uint32_t flags, dict_t *xdata)
{
STACK_WIND (frame,
rot13_readv_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->readv,
- fd, size, offset);
+ fd, size, offset, flags, xdata);
return 0;
}
@@ -101,9 +87,10 @@ rot13_writev_cbk (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, prebuf, postbuf);
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
return 0;
}
@@ -112,20 +99,20 @@ rot13_writev (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
struct iovec *vector,
- int32_t count,
- off_t offset,
- struct iobref *iobref)
+ int32_t count,
+ off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
{
rot_13_private_t *priv = (rot_13_private_t *)this->private;
if (priv->encrypt_write)
rot13_iovec (vector, count);
- STACK_WIND (frame,
+ STACK_WIND (frame,
rot13_writev_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->writev,
- fd, vector, count, offset,
- iobref);
+ fd, vector, count, offset, flags,
+ iobref, xdata);
return 0;
}
@@ -136,7 +123,7 @@ init (xlator_t *this)
rot_13_private_t *priv = NULL;
if (!this->children || this->children->next) {
- gf_log ("rot13", GF_LOG_ERROR,
+ gf_log ("rot13", GF_LOG_ERROR,
"FATAL: rot13 should have exactly one child");
return -1;
}
@@ -145,9 +132,11 @@ init (xlator_t *this)
gf_log (this->name, GF_LOG_WARNING,
"dangling volume. check volfile ");
}
-
- priv = CALLOC (sizeof (rot_13_private_t), 1);
- ERR_ABORT (priv);
+
+ priv = GF_CALLOC (sizeof (rot_13_private_t), 1, 0);
+ if (!priv)
+ return -1;
+
priv->decrypt_read = 1;
priv->encrypt_write = 1;
@@ -156,6 +145,7 @@ init (xlator_t *this)
if (gf_string2boolean (data->data, &priv->encrypt_write) == -1) {
gf_log (this->name, GF_LOG_ERROR,
"encrypt-write takes only boolean options");
+ GF_FREE (priv);
return -1;
}
}
@@ -165,6 +155,7 @@ init (xlator_t *this)
if (gf_string2boolean (data->data, &priv->decrypt_read) == -1) {
gf_log (this->name, GF_LOG_ERROR,
"decrypt-read takes only boolean options");
+ GF_FREE (priv);
return -1;
}
}
@@ -174,13 +165,16 @@ init (xlator_t *this)
return 0;
}
-void
+void
fini (xlator_t *this)
{
rot_13_private_t *priv = this->private;
-
- FREE (priv);
-
+
+ if (!priv)
+ return;
+ this->private = NULL;
+ GF_FREE (priv);
+
return;
}
@@ -189,15 +183,14 @@ struct xlator_fops fops = {
.writev = rot13_writev
};
-struct xlator_cbks cbks = {
-};
+struct xlator_cbks cbks;
struct volume_options options[] = {
- { .key = {"encrypt-write"},
+ { .key = {"encrypt-write"},
.type = GF_OPTION_TYPE_BOOL
},
- { .key = {"decrypt-read"},
- .type = GF_OPTION_TYPE_BOOL
+ { .key = {"decrypt-read"},
+ .type = GF_OPTION_TYPE_BOOL
},
{ .key = {NULL} },
};
diff --git a/xlators/encryption/rot-13/src/rot-13.h b/xlators/encryption/rot-13/src/rot-13.h
index 35a2df2da29..a2017513437 100644
--- a/xlators/encryption/rot-13/src/rot-13.h
+++ b/xlators/encryption/rot-13/src/rot-13.h
@@ -1,30 +1,15 @@
/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __ROT_13_H__
#define __ROT_13_H__
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
typedef struct {
gf_boolean_t encrypt_write;
gf_boolean_t decrypt_read;
diff --git a/xlators/experimental/Makefile.am b/xlators/experimental/Makefile.am
new file mode 100644
index 00000000000..a530845c4c0
--- /dev/null
+++ b/xlators/experimental/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = jbr-client jbr-server fdl dht2 posix2
+
+CLEANFILES =
diff --git a/xlators/experimental/README.md b/xlators/experimental/README.md
new file mode 100644
index 00000000000..b00f24e114b
--- /dev/null
+++ b/xlators/experimental/README.md
@@ -0,0 +1,107 @@
+# Purpose of this directory
+
+This directory is created to host experimental gluster translators. A new
+translator that is *experimental* in nature, would need to create its own
+subdirectory under this directory, to host/publish its work.
+
+Example:
+ The first commit should include the following changes
+ 1. xlators/experimental/Makefile.am
+ NOTE: Add foobar to the list of SUBDIRS here
+ 2. xlators/experimental/foobar
+ 3. xlators/experimental/foobar/Makefle.am
+ NOTE: Can be empty initially in the first commit
+ 4. configure.ac
+ NOTE: Include your experimental Makefile under AC_CONFIG_FILES
+ 5. xlators/experimental/foobar/README.md
+ NOTE: The readme should cover details as required for the translator to be
+ accepted as experimental, primarily including a link to the specification
+ under the gluster-specs repository [1]. Later the readme should suffice
+ as an entry point for developers and users alike, who wish to experiment
+ with the xlator under development
+ 6. xlators/experimental/foobar/TODO.md
+ NOTE: This is a list of TODO's identified during the development process
+ that needs addressing over time. These include exceptions granted during
+ the review process, for things not addressed when commits are merged into
+ the repository
+
+# Why is it provided
+
+Quite often translator development that happens out of tree, does not get
+enough eyeballs early in its development phase, has not undergone CI
+(regression/continuous integration testing), and at times is not well integrated
+with the rest of gluster stack.
+
+Also, when such out of tree translators are submitted for acceptance, it is a
+bulk commit that makes review difficult and inefficient. Such submissions also
+have to be merged forward, and depending on the time spent in developing the
+translator the master branch could have moved far ahead, making this a painful
+activity.
+
+Experimental is born out of such needs, to provide xlator developers,
+ - Early access to CI
+ - Ability to adapt to ongoing changes in other parts of gluster
+ - More eye balls on the code and design aspects of the translator
+ - TBD: What else?
+
+and for maintainers,
+ - Ability to look at smaller change sets in the review process
+ - Ability to verify/check implementation against the specification provided
+
+# General rules
+
+1. If a new translator is added under here it should, at the very least, pass
+compilation.
+
+2. All translators under the experimental directory are shipped as a part of
+gluster-experimental RPMs.
+TBD: Spec file and other artifacts for the gluster-experimental RPM needs to be
+fleshed out.
+
+3. Experimental translators can leverage the CI framework as needed. Tests need
+to be hosted under xlators/experimental/tests initially, and later moved to the
+appropriate tests/ directory as the xlator matures. It is encouraged to provide
+tests for each commit or series of commits, so that code and tests can be
+inspected together.
+
+4. If any experimental translator breaks CI, it is quarantined till demonstrable
+proof towards the contrary is provided. This is applicable as tests are moved
+out of experimental tests directory to the CI framework directory, as otherwise
+experimental tests are not a part of regular CI regression runs.
+
+5. An experimental translator need not function at all, as a result commits can
+be merged pretty much at will as long as other rules as stated are not violated.
+
+6. Experimental submissions will be assigned a existing maintainer, to aid
+merging commits and ensure aspects of gluster code submissions are respected.
+When an experimental xlator is proposed and the first commit posted
+a mail to gluster-devel@gluster.org requesting attention, will assign the
+maintainer buddy for the submission.
+NOTE: As we scale, this may change.
+
+6. More?
+
+# Getting out of the experimental jail
+
+So you now think your xlator is ready to leave experimental and become part of
+mainline!
+- TBD: guidelines pending.
+
+# FAQs
+
+1. How do I submit/commit experimental framework changes outside of my
+experimental xlator?
+ - Provide such framework changes as a separate commit
+ - Conditionally ensure these are built or activated only when the experimental
+ feature is activated, so as to prevent normal gluster workflow to function as
+ before
+ - TBD: guidelines and/or examples pending.
+
+2. Ask your question either on gluster-devel@gluster.org or as a change request
+to this file in gluster gerrit [2] for an answer that will be assimilated into
+this readme.
+
+# Links
+[1] http://review.gluster.org/#/q/project:glusterfs-specs
+
+[2] http://review.gluster.org/#/q/project:glusterfs
diff --git a/xlators/experimental/dht2/Makefile.am b/xlators/experimental/dht2/Makefile.am
new file mode 100644
index 00000000000..9d910a66056
--- /dev/null
+++ b/xlators/experimental/dht2/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = dht2-client dht2-server
+
+CLEANFILES =
diff --git a/xlators/experimental/dht2/README.md b/xlators/experimental/dht2/README.md
new file mode 100644
index 00000000000..8f249a83673
--- /dev/null
+++ b/xlators/experimental/dht2/README.md
@@ -0,0 +1,47 @@
+# DHT2 Experimental README
+
+DHT2 is the new distribution scheme being developed for Gluster, that
+aims to remove the subdirectory spread across all DHT subvolumes.
+
+As a result of this work, the Gluster backend file layouts and on disk
+representation of directories and files are modified, thus making DHT2
+volumes incompatible to existing DHT based Gluster deployments.
+
+This document presents interested users with relevant data to play around
+with DHT2 volumes and provide feedback towards the same.
+
+REMOVEME: Design details currently under review here,
+ - http://review.gluster.org/#/c/13395/
+
+TODO: Add more information as relevant code is pulled in
+
+# Directory strucutre elaborated
+
+## dht2-server
+This directory contains code for the server side DHT2 xlator. This xlator is
+intended to run on the brick graph, and is responsible for FOP synchronization,
+redirection, transactions, and journal replays.
+
+NOTE: The server side code also handles changes to volume/cluster map and
+also any rebalance activities.
+
+## dht2-client
+This directory contains code for the client side DHT2 xlator. This xlator is
+intended to run on the client/access protocol/mount graph, and is responsible
+for FOP routing to the right DHT2 subvolume. It uses a volume/cluster wide map
+of the routing (layout), to achieve the same.
+
+## dht2-common
+This directory contains code that is used in common across other parts of DHT2.
+For example, FOP routing store/consult abstractions that are common across the
+client and server side of DHT2.
+
+## Issue: How to build dht2-common?
+ 1. Build a shared object
+ - We cannot ship this as a part of both the client xlator RPM
+ 2. Build an archive
+ - Symbol clashes? when both the client and server xlators are loaded as a
+ part of the same graph
+ 3. Compile with other parts of the code that needs it
+ - Not a very different from (2) above
+ - This is what is chosen at present, and maybe would be revised later
diff --git a/xlators/experimental/dht2/TODO.md b/xlators/experimental/dht2/TODO.md
new file mode 100644
index 00000000000..1e2c53c5b36
--- /dev/null
+++ b/xlators/experimental/dht2/TODO.md
@@ -0,0 +1,3 @@
+# DHT2 TODO list
+
+<Items will be added as code is pulled into the repository>
diff --git a/xlators/features/access-control/Makefile.am b/xlators/experimental/dht2/dht2-client/Makefile.am
index a985f42a877..a985f42a877 100644
--- a/xlators/features/access-control/Makefile.am
+++ b/xlators/experimental/dht2/dht2-client/Makefile.am
diff --git a/xlators/experimental/dht2/dht2-client/src/Makefile.am b/xlators/experimental/dht2/dht2-client/src/Makefile.am
new file mode 100644
index 00000000000..39132994d08
--- /dev/null
+++ b/xlators/experimental/dht2/dht2-client/src/Makefile.am
@@ -0,0 +1,19 @@
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/experimental
+xlator_LTLIBRARIES = dht2c.la
+
+dht2c_sources = dht2-client-main.c
+
+dht2common_sources = $(top_srcdir)/xlators/experimental/dht2/dht2-common/src/dht2-common-map.c
+
+dht2c_la_SOURCES = $(dht2c_sources) $(dht2common_sources)
+dht2c_la_LDFLAGS = -module -avoid-version
+dht2c_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+AM_CPPFLAGS = $(GF_CPPFLAGS)
+AM_CPPFLAGS += -I$(top_srcdir)/xlators/experimental/dht2/dht2-common/src/
+AM_CPPFLAGS += -I$(top_srcdir)/libglusterfs/src
+AM_CPPFLAGS += -I$(top_srcdir)/xlators/lib/src
+
+CLEANFILES =
diff --git a/xlators/experimental/dht2/dht2-client/src/dht2-client-main.c b/xlators/experimental/dht2/dht2-client/src/dht2-client-main.c
new file mode 100644
index 00000000000..bd1d446e2b5
--- /dev/null
+++ b/xlators/experimental/dht2/dht2-client/src/dht2-client-main.c
@@ -0,0 +1,59 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/* File: dht2-client-main.c
+ * This file contains the xlator loading functions, FOP entry points
+ * and options.
+ * The entire functionality including comments is TODO.
+ */
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "logging.h"
+#include "statedump.h"
+
+int32_t
+dht2_client_init (xlator_t *this)
+{
+ if (!this->children) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Missing children in volume graph, this (%s) is"
+ " not a leaf translator", this->name);
+ return -1;
+ }
+
+ return 0;
+}
+
+void
+dht2_client_fini (xlator_t *this)
+{
+ return;
+}
+
+class_methods_t class_methods = {
+ .init = dht2_client_init,
+ .fini = dht2_client_fini,
+};
+
+struct xlator_fops fops = {
+};
+
+struct xlator_cbks cbks = {
+};
+
+/*
+struct xlator_dumpops dumpops = {
+};
+*/
+
+struct volume_options options[] = {
+ { .key = {NULL} },
+};
diff --git a/xlators/experimental/dht2/dht2-common/src/dht2-common-map.c b/xlators/experimental/dht2/dht2-common/src/dht2-common-map.c
new file mode 100644
index 00000000000..d959483b8a4
--- /dev/null
+++ b/xlators/experimental/dht2/dht2-common/src/dht2-common-map.c
@@ -0,0 +1,19 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/* File: dht2-common-map.c
+ * This file contains helper routines to store, consult, the volume map
+ * for subvolume to GFID relations.
+ * The entire functionality including comments is TODO.
+ */
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "statedump.h"
diff --git a/xlators/nfs/lib/Makefile.am b/xlators/experimental/dht2/dht2-server/Makefile.am
index a985f42a877..a985f42a877 100644
--- a/xlators/nfs/lib/Makefile.am
+++ b/xlators/experimental/dht2/dht2-server/Makefile.am
diff --git a/xlators/experimental/dht2/dht2-server/src/Makefile.am b/xlators/experimental/dht2/dht2-server/src/Makefile.am
new file mode 100644
index 00000000000..4f721551020
--- /dev/null
+++ b/xlators/experimental/dht2/dht2-server/src/Makefile.am
@@ -0,0 +1,19 @@
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/experimental
+xlator_LTLIBRARIES = dht2s.la
+
+dht2s_sources = dht2-server-main.c
+
+dht2common_sources = $(top_srcdir)/xlators/experimental/dht2/dht2-common/src/dht2-common-map.c
+
+dht2s_la_SOURCES = $(dht2s_sources) $(dht2common_sources)
+dht2s_la_LDFLAGS = -module -avoid-version
+dht2s_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+AM_CPPFLAGS = $(GF_CPPFLAGS)
+AM_CPPFLAGS += -I$(top_srcdir)/xlators/experimental/dht2/dht2-common/src/
+AM_CPPFLAGS += -I$(top_srcdir)/libglusterfs/src
+AM_CPPFLAGS += -I$(top_srcdir)/xlators/lib/src
+
+CLEANFILES =
diff --git a/xlators/experimental/dht2/dht2-server/src/dht2-server-main.c b/xlators/experimental/dht2/dht2-server/src/dht2-server-main.c
new file mode 100644
index 00000000000..1f232cc3430
--- /dev/null
+++ b/xlators/experimental/dht2/dht2-server/src/dht2-server-main.c
@@ -0,0 +1,59 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/* File: dht2-server-main.c
+ * This file contains the xlator loading functions, FOP entry points
+ * and options.
+ * The entire functionality including comments is TODO.
+ */
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "logging.h"
+#include "statedump.h"
+
+int32_t
+dht2_server_init (xlator_t *this)
+{
+ if (!this->children) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Missing children in volume graph, this (%s) is"
+ " not a leaf translator", this->name);
+ return -1;
+ }
+
+ return 0;
+}
+
+void
+dht2_server_fini (xlator_t *this)
+{
+ return;
+}
+
+class_methods_t class_methods = {
+ .init = dht2_server_init,
+ .fini = dht2_server_fini,
+};
+
+struct xlator_fops fops = {
+};
+
+struct xlator_cbks cbks = {
+};
+
+/*
+struct xlator_dumpops dumpops = {
+};
+*/
+
+struct volume_options options[] = {
+ { .key = {NULL} },
+};
diff --git a/xlators/storage/bdb/Makefile.am b/xlators/experimental/fdl/Makefile.am
index d471a3f9243..a985f42a877 100644
--- a/xlators/storage/bdb/Makefile.am
+++ b/xlators/experimental/fdl/Makefile.am
@@ -1,3 +1,3 @@
SUBDIRS = src
-CLEANFILES =
+CLEANFILES =
diff --git a/xlators/experimental/fdl/src/Makefile.am b/xlators/experimental/fdl/src/Makefile.am
new file mode 100644
index 00000000000..aed0204284f
--- /dev/null
+++ b/xlators/experimental/fdl/src/Makefile.am
@@ -0,0 +1,43 @@
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/experimental
+xlator_LTLIBRARIES = fdl.la
+
+noinst_HEADERS = jnl-types.h
+
+nodist_fdl_la_SOURCES = fdl.c
+fdl_la_LDFLAGS = -module -avoid-version
+fdl_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+sbin_PROGRAMS = gf_logdump gf_recon
+gf_logdump_SOURCES = logdump.c
+nodist_gf_logdump_SOURCES = libfdl.c
+gf_logdump_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la\
+ $(top_builddir)/api/src/libgfapi.la
+
+# Eventually recon(ciliation) code will move elsewhere, but for now it's
+# easier to have it next to the similar logdump code.
+gf_recon_SOURCES = recon.c
+nodist_gf_recon_SOURCES = librecon.c
+gf_recon_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la\
+ $(top_builddir)/api/src/libgfapi.la
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/api/src -fPIC \
+ -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS) \
+ -DDATADIR=\"$(localstatedir)\"
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+noinst_PYTHON = gen_fdl.py gen_dumper.py gen_recon.py
+EXTRA_DIST = fdl-tmpl.c dump-tmpl.c recon-tmpl.c
+
+CLEANFILES = $(nodist_fdl_la_SOURCES) $(nodist_gf_logdump_SOURCES) \
+ $(nodist_gf_recon_SOURCES)
+
+fdl.c: fdl-tmpl.c gen_fdl.py
+ $(PYTHON) $(srcdir)/gen_fdl.py $(srcdir)/fdl-tmpl.c > $@
+
+libfdl.c: dump-tmpl.c gen_dumper.py
+ $(PYTHON) $(srcdir)/gen_dumper.py $(srcdir)/dump-tmpl.c > $@
+
+librecon.c: recon-tmpl.c gen_recon.py
+ $(PYTHON) $(srcdir)/gen_recon.py $(srcdir)/recon-tmpl.c > $@
diff --git a/xlators/experimental/fdl/src/dump-tmpl.c b/xlators/experimental/fdl/src/dump-tmpl.c
new file mode 100644
index 00000000000..cac1071a9c1
--- /dev/null
+++ b/xlators/experimental/fdl/src/dump-tmpl.c
@@ -0,0 +1,156 @@
+#pragma fragment PROLOG
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glfs.h"
+#include "iatt.h"
+#include "xlator.h"
+#include "jnl-types.h"
+
+#pragma fragment DICT
+ {
+ int key_len, data_len;
+ char *key_ptr;
+ printf ("@ARGNAME@ = dict {\n");
+ for (;;) {
+ key_len = *((int *)new_meta);
+ new_meta += sizeof(int);
+ if (!key_len) {
+ break;
+ }
+ key_ptr = new_meta;
+ new_meta += key_len;
+ data_len = *((int *)new_meta);
+ new_meta += sizeof(int) + data_len;
+ printf (" %s = <%d bytes>\n", key_ptr, data_len);
+ }
+ printf ("}\n");
+ }
+
+#pragma fragment DOUBLE
+ printf ("@ARGNAME@ = @FORMAT@\n", *((uint64_t *)new_meta),
+ *((uint64_t *)new_meta));
+ new_meta += sizeof(uint64_t);
+
+#pragma fragment GFID
+ printf ("@ARGNAME@ = <gfid %s>\n", uuid_utoa(*((uuid_t *)new_meta)));
+ new_meta += 16;
+
+#pragma fragment INTEGER
+ printf ("@ARGNAME@ = @FORMAT@\n", *((uint32_t *)new_meta),
+ *((uint32_t *)new_meta));
+ new_meta += sizeof(uint32_t);
+
+#pragma fragment LOC
+ printf ("@ARGNAME@ = loc {\n");
+ printf (" gfid = %s\n", uuid_utoa(*((uuid_t *)new_meta)));
+ new_meta += 16;
+ printf (" pargfid = %s\n", uuid_utoa(*((uuid_t *)new_meta)));
+ new_meta += 16;
+ if (*(new_meta++)) {
+ printf (" name = %s\n", new_meta);
+ new_meta += (strlen(new_meta) + 1);
+ }
+ printf ("}\n");
+
+#pragma fragment STRING
+ if (*(new_meta++)) {
+ printf ("@ARGNAME@ = %s\n", new_meta);
+ new_meta += (strlen(new_meta) + 1);
+ }
+
+#pragma fragment VECTOR
+ {
+ size_t len = *((size_t *)new_meta);
+ new_meta += sizeof(len);
+ printf ("@ARGNAME@ = <%zu bytes>\n", len);
+ new_data += len;
+ }
+
+#pragma fragment IATT
+ {
+ ia_prot_t *myprot = ((ia_prot_t *)new_meta);
+ printf ("@ARGNAME@ = iatt {\n");
+ printf (" ia_prot = %c%c%c",
+ myprot->suid ? 'S' : '-',
+ myprot->sgid ? 'S' : '-',
+ myprot->sticky ? 'T' : '-');
+ printf ("%c%c%c",
+ myprot->owner.read ? 'r' : '-',
+ myprot->owner.write ? 'w' : '-',
+ myprot->owner.exec ? 'x' : '-');
+ printf ("%c%c%c",
+ myprot->group.read ? 'r' : '-',
+ myprot->group.write ? 'w' : '-',
+ myprot->group.exec ? 'x' : '-');
+ printf ("%c%c%c\n",
+ myprot->other.read ? 'r' : '-',
+ myprot->other.write ? 'w' : '-',
+ myprot->other.exec ? 'x' : '-');
+ new_meta += sizeof(ia_prot_t);
+ uint32_t *myints = (uint32_t *)new_meta;
+ printf (" ia_uid = %u\n", myints[0]);
+ printf (" ia_gid = %u\n", myints[1]);
+ printf (" ia_atime = %u.%09u\n", myints[2], myints[3]);
+ printf (" ia_mtime = %u.%09u\n", myints[4], myints[5]);
+ new_meta += sizeof(*myints) * 6;
+ }
+
+#pragma fragment FOP
+void
+fdl_dump_@NAME@ (char **old_meta, char **old_data)
+{
+ char *new_meta = *old_meta;
+ char *new_data = *old_data;
+
+ /* TBD: word size/endianness */
+@FUNCTION_BODY@
+
+ *old_meta = new_meta;
+ *old_data = new_data;
+}
+
+#pragma fragment CASE
+ case GF_FOP_@UPNAME@:
+ printf ("=== GF_FOP_@UPNAME@\n");
+ fdl_dump_@NAME@ (&new_meta, &new_data);
+ break;
+
+#pragma fragment EPILOG
+int
+fdl_dump (char **old_meta, char **old_data)
+{
+ char *new_meta = *old_meta;
+ char *new_data = *old_data;
+ static glfs_t *fs = NULL;
+ int recognized = 1;
+ event_header_t *eh;
+
+ /*
+ * We don't really call anything else in GFAPI, but this is the most
+ * convenient way to satisfy all of the spurious dependencies on how it
+ * or glusterfsd initialize (e.g. setting up THIS).
+ */
+ if (!fs) {
+ fs = glfs_new ("dummy");
+ }
+
+ eh = (event_header_t *)new_meta;
+ new_meta += sizeof (*eh);
+
+ /* TBD: check event_type instead of assuming NEW_REQUEST */
+
+ switch (eh->fop_type) {
+@SWITCH_BODY@
+
+ default:
+ printf ("unknown fop %u\n", eh->fop_type);
+ recognized = 0;
+ }
+
+ *old_meta = new_meta;
+ *old_data = new_data;
+ return recognized;
+}
diff --git a/xlators/experimental/fdl/src/fdl-tmpl.c b/xlators/experimental/fdl/src/fdl-tmpl.c
new file mode 100644
index 00000000000..fdcfafbac31
--- /dev/null
+++ b/xlators/experimental/fdl/src/fdl-tmpl.c
@@ -0,0 +1,506 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include "call-stub.h"
+#include "iatt.h"
+#include "defaults.h"
+#include "syscall.h"
+#include "xlator.h"
+#include "jnl-types.h"
+
+/* TBD: make tunable */
+#define META_FILE_SIZE (1 << 20)
+#define DATA_FILE_SIZE (1 << 24)
+
+enum gf_fdl {
+ gf_fdl_mt_fdl_private_t = gf_common_mt_end + 1,
+ gf_fdl_mt_end
+};
+
+typedef struct {
+ char *type;
+ off_t size;
+ char *path;
+ int fd;
+ void * ptr;
+ off_t max_offset;
+} log_obj_t;
+
+typedef struct {
+ struct list_head reqs;
+ pthread_mutex_t req_lock;
+ pthread_cond_t req_cond;
+ char *log_dir;
+ pthread_t worker;
+ gf_boolean_t should_stop;
+ gf_boolean_t change_term;
+ log_obj_t meta_log;
+ log_obj_t data_log;
+ int term;
+ int first_term;
+} fdl_private_t;
+
+void
+fdl_enqueue (xlator_t *this, call_stub_t *stub)
+{
+ fdl_private_t *priv = this->private;
+
+ pthread_mutex_lock (&priv->req_lock);
+ list_add_tail (&stub->list, &priv->reqs);
+ pthread_mutex_unlock (&priv->req_lock);
+
+ pthread_cond_signal (&priv->req_cond);
+}
+
+#pragma generate
+
+char *
+fdl_open_term_log (xlator_t *this, log_obj_t *obj, int term)
+{
+ fdl_private_t *priv = this->private;
+ int ret;
+ char * ptr = NULL;
+
+ /*
+ * Use .jnl instead of .log so that we don't get test info (mistakenly)
+ * appended to our journal files.
+ */
+ if (this->ctx->cmd_args.log_ident) {
+ ret = gf_asprintf (&obj->path, "%s/%s-%s-%d.jnl",
+ priv->log_dir, this->ctx->cmd_args.log_ident,
+ obj->type, term);
+ }
+ else {
+ ret = gf_asprintf (&obj->path, "%s/fubar-%s-%d.jnl",
+ priv->log_dir, obj->type, term);
+ }
+ if ((ret <= 0) || !obj->path) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to construct log-file path");
+ goto err;
+ }
+
+ gf_log (this->name, GF_LOG_INFO, "opening %s (size %ld)",
+ obj->path, obj->size);
+
+ obj->fd = open (obj->path, O_RDWR|O_CREAT|O_TRUNC, 0666);
+ if (obj->fd < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to open log file (%s)", strerror(errno));
+ goto err;
+ }
+
+#if !defined(GF_BSD_HOST_OS)
+ /*
+ * NetBSD can just go die in a fire. Even though it claims to support
+ * fallocate/posix_fallocate they don't actually *do* anything so the
+ * file size remains zero. Then mmap succeeds anyway, but any access
+ * to the mmap'ed region will segfault. It would be acceptable for
+ * fallocate to do what it says, for mmap to fail, or for access to
+ * extend the file. NetBSD managed to hit the trifecta of Getting
+ * Everything Wrong, and debugging in that environment to get this far
+ * has already been painful enough (systems I worked on in 1990 were
+ * better that way). We'll fall through to the lseek/write method, and
+ * performance will be worse, and TOO BAD.
+ */
+ if (sys_fallocate(obj->fd,0,0,obj->size) < 0)
+#endif
+ {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to fallocate space for log file");
+ /* Have to do this the ugly page-faulty way. */
+ (void) sys_lseek (obj->fd, obj->size-1, SEEK_SET);
+ (void) sys_write (obj->fd, "", 1);
+ }
+
+ ptr = mmap (NULL, obj->size, PROT_WRITE, MAP_SHARED, obj->fd, 0);
+ if (ptr == MAP_FAILED) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to mmap log (%s)",
+ strerror(errno));
+ goto err;
+ }
+
+ obj->ptr = ptr;
+ obj->max_offset = 0;
+ return ptr;
+
+err:
+ if (obj->fd >= 0) {
+ sys_close (obj->fd);
+ obj->fd = (-1);
+ }
+ if (obj->path) {
+ GF_FREE (obj->path);
+ obj->path = NULL;
+ }
+ return ptr;
+}
+
+void
+fdl_close_term_log (xlator_t *this, log_obj_t *obj)
+{
+ fdl_private_t *priv = this->private;
+
+ if (obj->ptr) {
+ (void) munmap (obj->ptr, obj->size);
+ obj->ptr = NULL;
+ }
+
+ if (obj->fd >= 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "truncating term %d %s journal to %ld",
+ priv->term, obj->type, obj->max_offset);
+ if (sys_ftruncate(obj->fd,obj->max_offset) < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to truncate journal (%s)",
+ strerror(errno));
+ }
+ sys_close (obj->fd);
+ obj->fd = (-1);
+ }
+
+ if (obj->path) {
+ GF_FREE (obj->path);
+ obj->path = NULL;
+ }
+}
+
+gf_boolean_t
+fdl_change_term (xlator_t *this, char **meta_ptr, char **data_ptr)
+{
+ fdl_private_t *priv = this->private;
+
+ fdl_close_term_log (this, &priv->meta_log);
+ fdl_close_term_log (this, &priv->data_log);
+
+ ++(priv->term);
+
+ *meta_ptr = fdl_open_term_log (this, &priv->meta_log, priv->term);
+ if (!*meta_ptr) {
+ return _gf_false;
+ }
+
+ *data_ptr = fdl_open_term_log (this, &priv->data_log, priv->term);
+ if (!*data_ptr) {
+ return _gf_false;
+ }
+
+ return _gf_true;
+}
+
+void *
+fdl_worker (void *arg)
+{
+ xlator_t *this = arg;
+ fdl_private_t *priv = this->private;
+ call_stub_t *stub;
+ char * meta_ptr = NULL;
+ off_t *meta_offset = &priv->meta_log.max_offset;
+ char * data_ptr = NULL;
+ off_t *data_offset = &priv->data_log.max_offset;
+ unsigned long base_as_ul;
+ void * msync_ptr;
+ size_t msync_len;
+ gf_boolean_t recycle;
+ void *err_label = &&err_unlocked;
+
+ priv->meta_log.type = "meta";
+ priv->meta_log.size = META_FILE_SIZE;
+ priv->meta_log.path = NULL;
+ priv->meta_log.fd = (-1);
+ priv->meta_log.ptr = NULL;
+
+ priv->data_log.type = "data";
+ priv->data_log.size = DATA_FILE_SIZE;
+ priv->data_log.path = NULL;
+ priv->data_log.fd = (-1);
+ priv->data_log.ptr = NULL;
+
+ /* TBD: initial term should come from persistent storage (e.g. etcd) */
+ priv->first_term = ++(priv->term);
+ meta_ptr = fdl_open_term_log (this, &priv->meta_log, priv->term);
+ if (!meta_ptr) {
+ goto *err_label;
+ }
+ data_ptr = fdl_open_term_log (this, &priv->data_log, priv->term);
+ if (!data_ptr) {
+ fdl_close_term_log (this, &priv->meta_log);
+ goto *err_label;
+ }
+
+ for (;;) {
+ pthread_mutex_lock (&priv->req_lock);
+ err_label = &&err_locked;
+ while (list_empty(&priv->reqs)) {
+ pthread_cond_wait (&priv->req_cond, &priv->req_lock);
+ if (priv->should_stop) {
+ goto *err_label;
+ }
+ if (priv->change_term) {
+ if (!fdl_change_term(this, &meta_ptr,
+ &data_ptr)) {
+ goto *err_label;
+ }
+ priv->change_term = _gf_false;
+ continue;
+ }
+ }
+ stub = list_entry (priv->reqs.next, call_stub_t, list);
+ list_del_init (&stub->list);
+ pthread_mutex_unlock (&priv->req_lock);
+ err_label = &&err_unlocked;
+ /*
+ * TBD: batch requests
+ *
+ * What we should do here is gather up *all* of the requests
+ * that have accumulated since we were last at this point,
+ * blast them all out in one big writev, and then dispatch them
+ * all before coming back for more. That maximizes throughput,
+ * at some cost to latency (due to queuing effects at the log
+ * stage). Note that we're likely to be above io-threads, so
+ * the dispatch itself will be parallelized (at further cost to
+ * latency). For now, we just do the simplest thing and handle
+ * one request all the way through before fetching the next.
+ *
+ * So, why mmap/msync instead of writev/fdatasync? Because it's
+ * faster. Much faster. So much faster that I half-suspect
+ * cheating, but it's more convenient for now than having to
+ * ensure that everything's page-aligned for O_DIRECT (the only
+ * alternative that still might avoid ridiculous levels of
+ * local-FS overhead).
+ *
+ * TBD: check that msync really does get our data to disk.
+ */
+ gf_log (this->name, GF_LOG_DEBUG,
+ "logging %u+%u bytes for op %d",
+ stub->jnl_meta_len, stub->jnl_data_len, stub->fop);
+ recycle = _gf_false;
+ if ((*meta_offset + stub->jnl_meta_len) > priv->meta_log.size) {
+ recycle = _gf_true;
+ }
+ if ((*data_offset + stub->jnl_data_len) > priv->data_log.size) {
+ recycle = _gf_true;
+ }
+ if (recycle && !fdl_change_term(this,&meta_ptr,&data_ptr)) {
+ goto *err_label;
+ }
+ meta_ptr = priv->meta_log.ptr;
+ data_ptr = priv->data_log.ptr;
+ gf_log (this->name, GF_LOG_DEBUG, "serializing to %p/%p",
+ meta_ptr + *meta_offset, data_ptr + *data_offset);
+ stub->serialize (stub, meta_ptr + *meta_offset,
+ data_ptr + *data_offset);
+ if (stub->jnl_meta_len > 0) {
+ base_as_ul = (unsigned long) (meta_ptr + *meta_offset);
+ msync_ptr = (void *) (base_as_ul & ~0x0fff);
+ msync_len = (size_t) (base_as_ul & 0x0fff);
+ if (msync (msync_ptr, msync_len+stub->jnl_meta_len,
+ MS_SYNC) < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to log request meta (%s)",
+ strerror(errno));
+ }
+ *meta_offset += stub->jnl_meta_len;
+ }
+ if (stub->jnl_data_len > 0) {
+ base_as_ul = (unsigned long) (data_ptr + *data_offset);
+ msync_ptr = (void *) (base_as_ul & ~0x0fff);
+ msync_len = (size_t) (base_as_ul & 0x0fff);
+ if (msync (msync_ptr, msync_len+stub->jnl_data_len,
+ MS_SYNC) < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to log request data (%s)",
+ strerror(errno));
+ }
+ *data_offset += stub->jnl_data_len;
+ }
+ call_resume (stub);
+ }
+
+err_locked:
+ pthread_mutex_unlock (&priv->req_lock);
+err_unlocked:
+ fdl_close_term_log (this, &priv->meta_log);
+ fdl_close_term_log (this, &priv->data_log);
+ return NULL;
+}
+
+int32_t
+fdl_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata)
+{
+ fdl_private_t *priv = this->private;
+ dict_t *tdict;
+ int32_t gt_err = EIO;
+
+ switch (op) {
+
+ case FDL_IPC_CHANGE_TERM:
+ gf_log (this->name, GF_LOG_INFO, "got CHANGE_TERM op");
+ priv->change_term = _gf_true;
+ pthread_cond_signal (&priv->req_cond);
+ STACK_UNWIND_STRICT (ipc, frame, 0, 0, NULL);
+ break;
+
+ case FDL_IPC_GET_TERMS:
+ gf_log (this->name, GF_LOG_INFO, "got GET_TERMS op");
+ tdict = dict_new ();
+ if (!tdict) {
+ gt_err = ENOMEM;
+ goto gt_done;
+ }
+ if (dict_set_int32(tdict,"first",priv->first_term) != 0) {
+ goto gt_done;
+ }
+ if (dict_set_int32(tdict,"last",priv->term) != 0) {
+ goto gt_done;
+ }
+ gt_err = 0;
+ gt_done:
+ if (gt_err) {
+ STACK_UNWIND_STRICT (ipc, frame, -1, gt_err, NULL);
+ } else {
+ STACK_UNWIND_STRICT (ipc, frame, 0, 0, tdict);
+ }
+ if (tdict) {
+ dict_unref (tdict);
+ }
+ break;
+
+ default:
+ STACK_WIND_TAIL (frame,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ipc,
+ op, xdata);
+ }
+
+ return 0;
+}
+
+int
+fdl_init (xlator_t *this)
+{
+ fdl_private_t *priv = NULL;
+
+ priv = GF_CALLOC (1, sizeof (*priv), gf_fdl_mt_fdl_private_t);
+ if (!priv) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to allocate fdl_private");
+ goto err;
+ }
+
+ INIT_LIST_HEAD (&priv->reqs);
+ if (pthread_mutex_init (&priv->req_lock, NULL) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to initialize req_lock");
+ goto err;
+ }
+ if (pthread_cond_init (&priv->req_cond, NULL) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to initialize req_cond");
+ goto err;
+ }
+
+ GF_OPTION_INIT ("log-path", priv->log_dir, path, err);
+
+ this->private = priv;
+ /*
+ * The rest of the fop table is automatically generated, so this is a
+ * bit cleaner than messing with the generation to add a hand-written
+ * exception.
+ */
+ this->fops->ipc = fdl_ipc;
+
+ if (pthread_create(&priv->worker,NULL,fdl_worker,this) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to start fdl_worker");
+ goto err;
+ }
+
+ return 0;
+
+err:
+ if (priv) {
+ GF_FREE(priv);
+ }
+ return -1;
+}
+
+void
+fdl_fini (xlator_t *this)
+{
+ fdl_private_t *priv = this->private;
+
+ if (priv) {
+ priv->should_stop = _gf_true;
+ pthread_cond_signal (&priv->req_cond);
+ pthread_join (priv->worker, NULL);
+ GF_FREE(priv);
+ }
+}
+
+int
+fdl_reconfigure (xlator_t *this, dict_t *options)
+{
+ fdl_private_t *priv = this->private;
+
+ GF_OPTION_RECONF ("log_dir", priv->log_dir, options, path, out);
+ /* TBD: react if it changed */
+
+out:
+ return 0;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("fdl", this, out);
+
+ ret = xlator_mem_acct_init (this, gf_fdl_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+ return ret;
+ }
+out:
+ return ret;
+}
+
+class_methods_t class_methods = {
+ .init = fdl_init,
+ .fini = fdl_fini,
+ .reconfigure = fdl_reconfigure,
+ .notify = default_notify,
+};
+
+struct volume_options options[] = {
+ { .key = {"log-path"},
+ .type = GF_OPTION_TYPE_PATH,
+ .default_value = DEFAULT_LOG_FILE_DIRECTORY,
+ .description = "Directory for FDL files."
+ },
+ { .key = {NULL} },
+};
+
+struct xlator_cbks cbks = {
+ .release = default_release,
+ .releasedir = default_releasedir,
+ .forget = default_forget,
+};
diff --git a/xlators/experimental/fdl/src/gen_dumper.py b/xlators/experimental/fdl/src/gen_dumper.py
new file mode 100755
index 00000000000..42db55d2cb3
--- /dev/null
+++ b/xlators/experimental/fdl/src/gen_dumper.py
@@ -0,0 +1,116 @@
+#!/usr/bin/python
+
+import os
+import re
+import sys
+
+curdir = os.path.dirname (sys.argv[0])
+gendir = os.path.join (curdir, '../../../../libglusterfs/src')
+sys.path.append (gendir)
+from generator import ops, fop_subs, cbk_subs, generate
+
+# See the big header comment at the start of gen_fdl.py to see how the stages
+# fit together. The big difference here is that *all* of the C code is in the
+# template file as labelled fragments, instead of as Python strings. That
+# makes it much easier to edit in one place, with proper syntax highlighting
+# and indentation.
+#
+# Stage 1 uses type-specific fragments to generate FUNCTION_BODY, instead of
+# LEN_*_TEMPLATE and SERLZ_*_TEMPLATE to generate LEN_CODE and SER_CODE.
+#
+# Stage 2 uses the FOP and CASE fragments instead of RECON_TEMPLATE and
+# FOP_TEMPLATE. The expanded FOP code (including FUNCTION_BODY substitution
+# in the middle of each function) is emitted immediately; the expanded CASE
+# code is saved for the next stage.
+#
+# Stage 3 uses the PROLOG and EPILOG fragments, with the expanded CASE code
+# in the middle of EPILOG, to generate the whole output file.
+#
+# Another way of looking at it is to consider how the fragments appear in
+# the final output:
+#
+# PROLOG
+# FOP (expanded for CREATE)
+# FOP before FUNCTION_BODY
+# LOC, INTEGER, GFID, etc. (one per arg, by type)
+# FOP after FUNCTION_BODY
+# FOP (expanded for WRITEV)
+# FOP before FUNCTION_BODY
+# GFID, VECTOR, etc. (on per arg, by type)
+# FOP after FUNCTION_BODY
+# (more FOPs)
+# EPILOG
+# EPILOG before CASE
+# CASE statements (one per fop)
+# EPILOG after CASE
+
+typemap = {
+ 'dict_t *': ( "DICT", ""),
+ 'fd_t *': ( "GFID", ""),
+ 'dev_t': ( "DOUBLE", "%ld (0x%lx)"),
+ 'gf_xattrop_flags_t': ( "INTEGER", "%d (0x%x)"),
+ 'int32_t': ( "INTEGER", "%d (0x%x)"),
+ 'mode_t': ( "INTEGER", "%d (0x%x)"),
+ 'off_t': ( "DOUBLE", "%ld (0x%lx)"),
+ 'size_t': ( "DOUBLE", "%ld (0x%lx)"),
+ 'uint32_t': ( "INTEGER", "%d (0x%x)"),
+ 'loc_t *': ( "LOC", ""),
+ 'const char *': ( "STRING", ""),
+ 'struct iovec *': ( "VECTOR", ""),
+ 'struct iatt *': ( "IATT", ""),
+}
+
+def get_special_subs (args):
+ code = ""
+ for arg in args:
+ if (arg[0] != 'fop-arg') or (len(arg) < 4):
+ continue
+ recon_type, recon_fmt = typemap[arg[2]]
+ code += fragments[recon_type].replace("@ARGNAME@",arg[3]) \
+ .replace("@FORMAT@",recon_fmt)
+ return code
+
+def gen_functions ():
+ code = ""
+ for name, value in ops.iteritems():
+ if "journal" not in [ x[0] for x in value ]:
+ continue
+ fop_subs[name]["@FUNCTION_BODY@"] = get_special_subs(value)
+ # Print the FOP fragment with @FUNCTION_BODY@ in the middle.
+ code += generate(fragments["FOP"],name,fop_subs)
+ return code
+
+def gen_cases ():
+ code = ""
+ for name, value in ops.iteritems():
+ if "journal" not in [ x[0] for x in value ]:
+ continue
+ # Add the CASE fragment for this fop.
+ code += generate(fragments["CASE"],name,fop_subs)
+ return code
+
+def load_fragments (path="recon-tmpl.c"):
+ pragma_re = re.compile('pragma fragment (.*)')
+ cur_symbol = None
+ cur_value = ""
+ result = {}
+ for line in open(path,"r").readlines():
+ m = pragma_re.search(line)
+ if m:
+ if cur_symbol:
+ result[cur_symbol] = cur_value
+ cur_symbol = m.group(1)
+ cur_value = ""
+ else:
+ cur_value += line
+ if cur_symbol:
+ result[cur_symbol] = cur_value
+ return result
+
+if __name__ == "__main__":
+ fragments = load_fragments(sys.argv[1])
+ print "/* BEGIN GENERATED CODE - DO NOT MODIFY */"
+ print fragments["PROLOG"]
+ print gen_functions()
+ print fragments["EPILOG"].replace("@SWITCH_BODY@",gen_cases())
+ print "/* END GENERATED CODE */"
diff --git a/xlators/experimental/fdl/src/gen_fdl.py b/xlators/experimental/fdl/src/gen_fdl.py
new file mode 100755
index 00000000000..7f6b1aaaeaa
--- /dev/null
+++ b/xlators/experimental/fdl/src/gen_fdl.py
@@ -0,0 +1,328 @@
+#!/usr/bin/python
+
+import os
+import sys
+
+curdir = os.path.dirname (sys.argv[0])
+gendir = os.path.join (curdir, '../../../../libglusterfs/src')
+sys.path.append (gendir)
+from generator import ops, fop_subs, cbk_subs, generate
+
+# Generation occurs in three stages. In this case, it actually makes more
+# sense to discuss them in the *opposite* order of that in which they
+# actually happen.
+#
+# Stage 3 is to insert all of the generated code into a file, replacing the
+# "#pragma generate" that's already there. The file can thus contain all
+# sorts of stuff that's not specific to one fop, either before or after the
+# generated code as appropriate.
+#
+# Stage 2 is to generate all of the code *for a particular fop*, using a
+# string-valued template plus a table of substitution values. Most of these
+# are built in to the generator itself. However, we also add a couple that
+# are specific to this particular translator - LEN_CODE and SER_CODE. These
+# are per-fop functions to get the length or the contents (respectively) of
+# what we'll put in the log. As with stage 3 allowing per-file boilerplate
+# before and after generated code, this allows per-fop boilerplate before and
+# after generated code.
+#
+# Stage 1, therefore, is to create the LEN_CODE and SER_CODE substitutions for
+# each fop, and put them in the same table where e.g. NAME and SHORT_ARGS
+# already are. We do this by looking at the fop-description table in the
+# generator module, then doing out own template substitution to plug each
+# specific argument name into another string-valued template.
+#
+# So, what does this leave us with in terms of variables and files?
+#
+# For stage 1, we have a series of LEN_*_TEMPLATE and SERLZ_*_TEMPLATE
+# strings, which are used to generate the length and serialization code for
+# each argument type.
+#
+# For stage 2, we have a bunch of *_TEMPLATE strings (no LEN_ or SERLZ_
+# prefix), which are used (along with the output from stage 1) to generate
+# whole functions.
+#
+# For stage 3, we have a whole separate file (fdl_tmpl.c) into which we insert
+# the collection of all functions defined in stage 2.
+
+
+LEN_TEMPLATE = """
+void
+fdl_len_@NAME@ (call_stub_t *stub)
+{
+ uint32_t meta_len = sizeof (event_header_t);
+ uint32_t data_len = 0;
+
+ /* TBD: global stuff, e.g. uid/gid */
+@LEN_CODE@
+
+ /* TBD: pad extension length */
+ stub->jnl_meta_len = meta_len;
+ stub->jnl_data_len = data_len;
+}
+"""
+
+SER_TEMPLATE = """
+void
+fdl_serialize_@NAME@ (call_stub_t *stub, char *meta_buf, char *data_buf)
+{
+ event_header_t *eh;
+ unsigned long offset = 0;
+
+ /* TBD: word size/endianness */
+ eh = (event_header_t *)meta_buf;
+ eh->event_type = NEW_REQUEST;
+ eh->fop_type = GF_FOP_@UPNAME@;
+ eh->request_id = 0; // TBD
+ meta_buf += sizeof (*eh);
+@SER_CODE@
+ /* TBD: pad extension length */
+ eh->ext_length = offset;
+}
+"""
+
+CBK_TEMPLATE = """
+int32_t
+fdl_@NAME@_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ @LONG_ARGS@)
+{
+ STACK_UNWIND_STRICT (@NAME@, frame, op_ret, op_errno,
+ @SHORT_ARGS@);
+ return 0;
+}
+"""
+
+CONTINUE_TEMPLATE = """
+int32_t
+fdl_@NAME@_continue (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ STACK_WIND (frame, fdl_@NAME@_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@,
+ @SHORT_ARGS@);
+ return 0;
+}
+
+"""
+
+FOP_TEMPLATE = """
+int32_t
+fdl_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ call_stub_t *stub;
+
+ stub = fop_@NAME@_stub (frame, default_@NAME@,
+ @SHORT_ARGS@);
+ fdl_len_@NAME@ (stub);
+ stub->serialize = fdl_serialize_@NAME@;
+ fdl_enqueue (this, stub);
+
+ return 0;
+}
+"""
+
+LEN_DICT_TEMPLATE = """
+ if (@SRC@) {
+ data_pair_t *memb;
+ for (memb = @SRC@->members_list; memb; memb = memb->next) {
+ meta_len += sizeof(int);
+ meta_len += strlen(memb->key) + 1;
+ meta_len += sizeof(int);
+ meta_len += memb->value->len;
+ }
+ }
+ meta_len += sizeof(int);
+"""
+
+LEN_GFID_TEMPLATE = """
+ meta_len += 16;
+"""
+
+LEN_INTEGER_TEMPLATE = """
+ meta_len += sizeof (@SRC@);
+"""
+
+# 16 for gfid, 16 for pargfid, 1 for flag, 0/1 for terminating NUL
+LEN_LOC_TEMPLATE = """
+ if (@SRC@.name) {
+ meta_len += (strlen (@SRC@.name) + 34);
+ } else {
+ meta_len += 33;
+ }
+"""
+
+LEN_STRING_TEMPLATE = """
+ if (@SRC@) {
+ meta_len += (strlen (@SRC@) + 1);
+ } else {
+ meta_len += 1;
+ }
+"""
+
+LEN_VECTOR_TEMPLATE = """
+ meta_len += sizeof(size_t);
+ data_len += iov_length (@VEC@, @CNT@);
+"""
+
+LEN_IATT_TEMPLATE = """
+ meta_len += sizeof(@SRC@.ia_prot);
+ meta_len += sizeof(@SRC@.ia_uid);
+ meta_len += sizeof(@SRC@.ia_gid);
+ meta_len += sizeof(@SRC@.ia_atime);
+ meta_len += sizeof(@SRC@.ia_atime_nsec);
+ meta_len += sizeof(@SRC@.ia_mtime);
+ meta_len += sizeof(@SRC@.ia_mtime_nsec);
+"""
+
+SERLZ_DICT_TEMPLATE = """
+ if (@SRC@) {
+ data_pair_t *memb;
+ for (memb = @SRC@->members_list; memb; memb = memb->next) {
+ *((int *)(meta_buf+offset)) = strlen(memb->key) + 1;
+ offset += sizeof(int);
+ strcpy (meta_buf+offset, memb->key);
+ offset += strlen(memb->key) + 1;
+ *((int *)(meta_buf+offset)) = memb->value->len;
+ offset += sizeof(int);
+ memcpy (meta_buf+offset, memb->value->data, memb->value->len);
+ offset += memb->value->len;
+ }
+ }
+ *((int *)(meta_buf+offset)) = 0;
+ offset += sizeof(int);
+"""
+
+SERLZ_GFID_TEMPLATE = """
+ memcpy (meta_buf+offset, @SRC@->inode->gfid, 16);
+ offset += 16;
+"""
+
+SERLZ_INTEGER_TEMPLATE = """
+ memcpy (meta_buf+offset, &@SRC@, sizeof(@SRC@));
+ offset += sizeof(@SRC@);
+"""
+
+SERLZ_LOC_TEMPLATE = """
+ memcpy (meta_buf+offset, @SRC@.gfid, 16);
+ offset += 16;
+ memcpy (meta_buf+offset, @SRC@.pargfid, 16);
+ offset += 16;
+ if (@SRC@.name) {
+ *(meta_buf+offset) = 1;
+ ++offset;
+ strcpy (meta_buf+offset, @SRC@.name);
+ offset += (strlen (@SRC@.name) + 1);
+ } else {
+ *(meta_buf+offset) = 0;
+ ++offset;
+ }
+"""
+
+SERLZ_STRING_TEMPLATE = """
+ if (@SRC@) {
+ *(meta_buf+offset) = 1;
+ ++offset;
+ strcpy (meta_buf+offset, @SRC@);
+ offset += strlen(@SRC@);
+ } else {
+ *(meta_buf+offset) = 0;
+ ++offset;
+ }
+"""
+
+SERLZ_VECTOR_TEMPLATE = """
+ *((size_t *)(meta_buf+offset)) = iov_length (@VEC@, @CNT@);
+ offset += sizeof(size_t);
+ int32_t i;
+ for (i = 0; i < @CNT@; ++i) {
+ memcpy (data_buf, @VEC@[i].iov_base, @VEC@[i].iov_len);
+ data_buf += @VEC@[i].iov_len;
+ }
+"""
+
+# We don't need to save all of the fields - only those affected by chown,
+# chgrp, chmod, and utime.
+SERLZ_IATT_TEMPLATE = """
+ *((ia_prot_t *)(meta_buf+offset)) = @SRC@.ia_prot;
+ offset += sizeof(@SRC@.ia_prot);
+ *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_uid;
+ offset += sizeof(@SRC@.ia_uid);
+ *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_gid;
+ offset += sizeof(@SRC@.ia_gid);
+ *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_atime;
+ offset += sizeof(@SRC@.ia_atime);
+ *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_atime_nsec;
+ offset += sizeof(@SRC@.ia_atime_nsec);
+ *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_mtime;
+ offset += sizeof(@SRC@.ia_mtime);
+ *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_mtime_nsec;
+ offset += sizeof(@SRC@.ia_mtime_nsec);
+"""
+
+typemap = {
+ 'dict_t *': ( LEN_DICT_TEMPLATE, SERLZ_DICT_TEMPLATE),
+ 'fd_t *': ( LEN_GFID_TEMPLATE, SERLZ_GFID_TEMPLATE),
+ 'dev_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE),
+ 'gf_xattrop_flags_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE),
+ 'int32_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE),
+ 'mode_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE),
+ 'off_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE),
+ 'size_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE),
+ 'uint32_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE),
+ 'loc_t *': ( LEN_LOC_TEMPLATE, SERLZ_LOC_TEMPLATE),
+ 'const char *': ( LEN_STRING_TEMPLATE, SERLZ_STRING_TEMPLATE),
+ 'struct iatt *': ( LEN_IATT_TEMPLATE, SERLZ_IATT_TEMPLATE),
+}
+
+def get_special_subs (args):
+ len_code = ""
+ ser_code = ""
+ for arg in args:
+ if (arg[0] != 'fop-arg') or (len(arg) < 4):
+ continue
+ # Let this throw an exception if we get an unknown field name. The
+ # broken build will remind whoever messed with the stub code that a
+ # corresponding update is needed here.
+ if arg[3] == "vector":
+ # Make it as obvious as possible that this is a special case.
+ len_code += LEN_VECTOR_TEMPLATE \
+ .replace("@VEC@","stub->args.vector") \
+ .replace("@CNT@","stub->args.count")
+ ser_code += SERLZ_VECTOR_TEMPLATE \
+ .replace("@VEC@","stub->args.vector") \
+ .replace("@CNT@","stub->args.count")
+ else:
+ len_tmpl, ser_tmpl = typemap[arg[2]]
+ src = "stub->args.%s" % arg[3]
+ len_code += len_tmpl.replace("@SRC@",src)
+ ser_code += ser_tmpl.replace("@SRC@",src)
+ return len_code, ser_code
+
+def gen_fdl ():
+ entrypoints = []
+ for name, value in ops.iteritems():
+ if "journal" not in [ x[0] for x in value ]:
+ continue
+ len_code, ser_code = get_special_subs(value)
+ fop_subs[name]["@LEN_CODE@"] = len_code[:-1]
+ fop_subs[name]["@SER_CODE@"] = ser_code[:-1]
+ print generate(LEN_TEMPLATE,name,fop_subs)
+ print generate(SER_TEMPLATE,name,fop_subs)
+ print generate(CBK_TEMPLATE,name,cbk_subs)
+ print generate(CONTINUE_TEMPLATE,name,fop_subs)
+ print generate(FOP_TEMPLATE,name,fop_subs)
+ entrypoints.append(name)
+ print "struct xlator_fops fops = {"
+ for ep in entrypoints:
+ print "\t.%s = fdl_%s," % (ep, ep)
+ print "};"
+
+for l in open(sys.argv[1],'r').readlines():
+ if l.find('#pragma generate') != -1:
+ print "/* BEGIN GENERATED CODE - DO NOT MODIFY */"
+ gen_fdl()
+ print "/* END GENERATED CODE */"
+ else:
+ print l[:-1]
diff --git a/xlators/experimental/fdl/src/gen_recon.py b/xlators/experimental/fdl/src/gen_recon.py
new file mode 100755
index 00000000000..67f9ea9ebd3
--- /dev/null
+++ b/xlators/experimental/fdl/src/gen_recon.py
@@ -0,0 +1,213 @@
+#!/usr/bin/python
+
+import os
+import re
+import string
+import sys
+
+curdir = os.path.dirname (sys.argv[0])
+gendir = os.path.join (curdir, '../../../../libglusterfs/src')
+sys.path.append (gendir)
+from generator import ops, fop_subs, cbk_subs, generate
+
+# See the big header comment at the start of gen_fdl.py to see how the stages
+# fit together. The big difference here is that *all* of the C code is in the
+# template file as labelled fragments, instead of as Python strings. That
+# makes it much easier to edit in one place, with proper syntax highlighting
+# and indentation.
+#
+# Stage 1 uses type-specific fragments to generate FUNCTION_BODY, instead of
+# LEN_*_TEMPLATE and SERLZ_*_TEMPLATE to generate LEN_CODE and SER_CODE.
+#
+# Stage 2 uses the FOP and CASE fragments instead of RECON_TEMPLATE and
+# FOP_TEMPLATE. The expanded FOP code (including FUNCTION_BODY substitution
+# in the middle of each function) is emitted immediately; the expanded CASE
+# code is saved for the next stage.
+#
+# Stage 3 uses the PROLOG and EPILOG fragments, with the expanded CASE code
+# in the middle of EPILOG, to generate the whole output file.
+#
+# Another way of looking at it is to consider how the fragments appear in
+# the final output:
+#
+# PROLOG
+# FOP (expanded for CREATE)
+# FOP before FUNCTION_BODY
+# LOC, INTEGER, GFID, etc. (one per arg, by type)
+# FOP after FUNCTION_BODY
+# FOP (expanded for WRITEV)
+# FOP before FUNCTION_BODY
+# GFID, VECTOR, etc. (one per arg, by type)
+# FOP after FUNCTION_BODY
+# (more FOPs)
+# EPILOG
+# EPILOG before CASE
+# CASE statements (one per fop)
+# EPILOG after CASE
+
+typemap = {
+ 'dict_t *': "DICT",
+ 'fd_t *': "FD",
+ 'dev_t': "DOUBLE",
+ 'gf_xattrop_flags_t': "INTEGER",
+ 'int32_t': "INTEGER",
+ 'mode_t': "INTEGER",
+ 'off_t': "DOUBLE",
+ 'size_t': "DOUBLE",
+ 'uint32_t': "INTEGER",
+ 'loc_t *': "LOC",
+ 'const char *': "STRING",
+ 'struct iovec *': "VECTOR",
+ 'struct iatt *': "IATT",
+ 'struct iobref *': "IOBREF",
+}
+
+def get_special_subs (name, args, fop_type):
+ code = ""
+ cleanups = ""
+ links = ""
+ s_args = []
+ for arg in args:
+ if arg[0] == 'extra':
+ code += "\t%s %s;\n\n" % (arg[2], arg[1])
+ s_args.append(arg[3])
+ continue
+ if arg[0] == 'link':
+ links += fragments["LINK"].replace("@INODE_ARG@",arg[1]) \
+ .replace("@IATT_ARG@",arg[2])
+ continue
+ if arg[0] != 'fop-arg':
+ continue
+ if (name, arg[1]) == ('writev', 'count'):
+ # Special case: just skip this. We can't mark it as 'nosync'
+ # because of the way the translator and dumper generators look for
+ # that after 'stub-name' which we don't define. Instead of adding a
+ # bunch of generic infrastructure for this one case, just pound it
+ # here.
+ continue
+ recon_type = typemap[arg[2]]
+ # print "/* %s.%s => %s (%s)*/" % (name, arg[1], recon_type, fop_type)
+ if (name == "create") and (arg[1] == "fd"):
+ # Special case: fd for create is new, not looked up.
+ # print "/* change to NEW_FD */"
+ recon_type = "NEW_FD"
+ elif (recon_type == "LOC") and (fop_type == "entry-op"):
+ # Need to treat this differently for inode vs. entry ops.
+ # Special case: link source is treated like inode-op.
+ if (name != "link") or (arg[1] != "oldloc"):
+ # print "/* change to PARENT_LOC */"
+ recon_type = "PARENT_LOC"
+ code += fragments[recon_type].replace("@ARGNAME@",arg[1]) \
+ .replace("@ARGTYPE@",arg[2])
+ cleanup_key = recon_type + "_CLEANUP"
+ if fragments.has_key(cleanup_key):
+ new_frag = fragments[cleanup_key].replace("@ARGNAME@",arg[1])
+ # Make sure these get added in *reverse* order. Otherwise, a
+ # failure for an earlier argument might goto a label that falls
+ # through to the cleanup code for a variable associated with a
+ # later argument, but that variable might not even have been
+ # *declared* (let alone initialized) yet. Consider the following
+ # case.
+ #
+ # process argument A (on failure goto cleanup_A)
+ # set error label to cleanup_A
+ #
+ # declare pointer variable for argument B
+ # process argument B (on failure goto cleanup_B)
+ #
+ # cleanup_A:
+ # /* whatever */
+ # cleanup_B:
+ # free pointer variable <= "USED BUT NOT SET" error here
+ #
+ # By adding these in reverse order, we ensure that cleanup_B is
+ # actually *before* cleanup_A, and nothing will try to do the free
+ # until we've actually attempted processing of B.
+ cleanups = new_frag + cleanups
+ if 'nosync' in arg[4:]:
+ code += "\t(void)%s;\n" % arg[1];
+ continue
+ if arg[2] in ("loc_t *", "struct iatt *"):
+ # These are passed as pointers to the syncop, but they're actual
+ # structures in the generated code.
+ s_args.append("&"+arg[1]);
+ else:
+ s_args.append(arg[1])
+ # We have to handle a couple of special cases here, because some n00b
+ # defined the syncops with a different argument order than the fops they're
+ # based on.
+ if name == 'writev':
+ # Swap 'flags' and 'iobref'. Also, we need to add the iov count, which
+ # is not stored in or read from the journal. There are other ways to
+ # do that, but this is the only place we need anything similar and we
+ # already have to treat it as a special case so this is simplest.
+ s_args_str = 'fd, &vector, 1, off, iobref, flags, xdata'
+ elif name == 'symlink':
+ # Swap 'linkpath' and 'loc'.
+ s_args_str = '&loc, linkpath, &iatt, xdata'
+ else:
+ s_args_str = string.join (s_args, ", ")
+ return code, links, s_args_str, cleanups
+
+# TBD: probably need to generate type-specific cleanup code as well - e.g.
+# fd_unref for an fd_t, loc_wipe for a loc_t, and so on. All of these
+# generated CLEANUP fragments will go at the end of the function, with goto
+# labels. Meanwhile, the error-checking part of each type-specific fragment
+# (e.g. LOC or FD) will need to update the indirect label that we jump to when
+# an error is detected. This will probably get messy.
+def gen_functions ():
+ code = ""
+ for name, value in ops.iteritems():
+ fop_type = [ x[1] for x in value if x[0] == "journal" ]
+ if not fop_type:
+ continue
+ body, links, syncop_args, cleanups = get_special_subs (name, value,
+ fop_type[0])
+ fop_subs[name]["@FUNCTION_BODY@"] = body
+ fop_subs[name]["@LINKS@"] = links
+ fop_subs[name]["@SYNCOP_ARGS@"] = syncop_args
+ fop_subs[name]["@CLEANUPS@"] = cleanups
+ if name == "writev":
+ # Take advantage of the fact that, *during reconciliation*, the
+ # vector is always a single element. In normal I/O it's not.
+ fop_subs[name]["@SUCCESS_VALUE@"] = "vector.iov_len"
+ else:
+ fop_subs[name]["@SUCCESS_VALUE@"] = "GFAPI_SUCCESS"
+ # Print the FOP fragment with @FUNCTION_BODY@ in the middle.
+ code += generate(fragments["FOP"],name,fop_subs)
+ return code
+
+def gen_cases ():
+ code = ""
+ for name, value in ops.iteritems():
+ if "journal" not in [ x[0] for x in value ]:
+ continue
+ # Add the CASE fragment for this fop.
+ code += generate(fragments["CASE"],name,fop_subs)
+ return code
+
+def load_fragments (path="recon-tmpl.c"):
+ pragma_re = re.compile('pragma fragment (.*)')
+ cur_symbol = None
+ cur_value = ""
+ result = {}
+ for line in open(path,"r").readlines():
+ m = pragma_re.search(line)
+ if m:
+ if cur_symbol:
+ result[cur_symbol] = cur_value
+ cur_symbol = m.group(1)
+ cur_value = ""
+ else:
+ cur_value += line
+ if cur_symbol:
+ result[cur_symbol] = cur_value
+ return result
+
+if __name__ == "__main__":
+ fragments = load_fragments(sys.argv[1])
+ print "/* BEGIN GENERATED CODE - DO NOT MODIFY */"
+ print fragments["PROLOG"]
+ print gen_functions()
+ print fragments["EPILOG"].replace("@SWITCH_BODY@",gen_cases())
+ print "/* END GENERATED CODE */"
diff --git a/xlators/experimental/fdl/src/jnl-types.h b/xlators/experimental/fdl/src/jnl-types.h
new file mode 100644
index 00000000000..8cb39d01a25
--- /dev/null
+++ b/xlators/experimental/fdl/src/jnl-types.h
@@ -0,0 +1,14 @@
+#define NEW_REQUEST (uint8_t)'N'
+
+typedef struct {
+ uint8_t event_type; /* e.g. NEW_REQUEST */
+ uint8_t fop_type; /* e.g. GF_FOP_SETATTR */
+ uint16_t request_id;
+ uint32_t ext_length;
+} event_header_t;
+
+enum {
+ FDL_IPC_BASE = 0xfeedbee5, /* ... and they make honey */
+ FDL_IPC_CHANGE_TERM,
+ FDL_IPC_GET_TERMS,
+};
diff --git a/xlators/experimental/fdl/src/logdump.c b/xlators/experimental/fdl/src/logdump.c
new file mode 100644
index 00000000000..7c979c32a04
--- /dev/null
+++ b/xlators/experimental/fdl/src/logdump.c
@@ -0,0 +1,50 @@
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+extern int fdl_dump (char **, char **);
+
+int
+main (int argc, char **argv)
+{
+ int meta_fd = (-1);
+ char *meta_buf = NULL;
+ int data_fd = (-1);
+ char *data_buf = NULL;
+
+ meta_fd = open (argv[1], O_RDONLY);
+ if (meta_fd < 0) {
+ perror ("open");
+ return EXIT_FAILURE;
+ }
+
+ /* TBD: get proper length */
+ meta_buf = mmap (NULL, 1048576, PROT_READ, MAP_PRIVATE, meta_fd, 0);
+ if (meta_buf == MAP_FAILED) {
+ perror ("mmap");
+ return EXIT_FAILURE;
+ }
+
+ data_fd = open (argv[2], O_RDONLY);
+ if (data_fd < 0) {
+ perror ("open");
+ return EXIT_FAILURE;
+ }
+
+ /* TBD: get proper length */
+ data_buf = mmap (NULL, 1048576, PROT_READ, MAP_PRIVATE, data_fd, 0);
+ if (data_buf == MAP_FAILED) {
+ perror ("mmap");
+ return EXIT_FAILURE;
+ }
+
+ for (;;) {
+ if (!fdl_dump(&meta_buf,&data_buf)) {
+ break;
+ }
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/xlators/experimental/fdl/src/recon-tmpl.c b/xlators/experimental/fdl/src/recon-tmpl.c
new file mode 100644
index 00000000000..523bda39418
--- /dev/null
+++ b/xlators/experimental/fdl/src/recon-tmpl.c
@@ -0,0 +1,305 @@
+#pragma fragment PROLOG
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "fd.h"
+#include "iatt.h"
+#include "syncop.h"
+#include "xlator.h"
+#include "glfs-internal.h"
+
+#include "jnl-types.h"
+
+#define GFAPI_SUCCESS 0
+
+inode_t *
+recon_get_inode (glfs_t *fs, uuid_t gfid)
+{
+ inode_t *inode;
+ loc_t loc = {NULL,};
+ struct iatt iatt;
+ int ret;
+ inode_t *newinode;
+
+ inode = inode_find (fs->active_subvol->itable, gfid);
+ if (inode) {
+ printf ("=== FOUND %s IN TABLE\n", uuid_utoa(gfid));
+ return inode;
+ }
+
+ loc.inode = inode_new (fs->active_subvol->itable);
+ if (!loc.inode) {
+ return NULL;
+ }
+ gf_uuid_copy (loc.inode->gfid, gfid);
+ gf_uuid_copy (loc.gfid, gfid);
+
+ printf ("=== DOING LOOKUP FOR %s\n", uuid_utoa(gfid));
+
+ ret = syncop_lookup (fs->active_subvol, &loc, &iatt,
+ NULL, NULL, NULL);
+ if (ret != GFAPI_SUCCESS) {
+ fprintf (stderr, "syncop_lookup failed (%d)\n", ret);
+ return NULL;
+ }
+
+ newinode = inode_link (loc.inode, NULL, NULL, &iatt);
+ if (newinode) {
+ inode_lookup (newinode);
+ }
+
+ return newinode;
+}
+
+#pragma fragment DICT
+ dict_t *@ARGNAME@;
+
+ @ARGNAME@ = dict_new();
+ if (!@ARGNAME@) {
+ goto *err_label;
+ }
+ err_label = &&cleanup_@ARGNAME@;
+
+ {
+ int key_len, data_len;
+ char *key_ptr;
+ int garbage;
+ for (;;) {
+ key_len = *((int *)new_meta);
+ new_meta += sizeof(int);
+ if (!key_len) {
+ break;
+ }
+ key_ptr = new_meta;
+ new_meta += key_len;
+ data_len = *((int *)new_meta);
+ new_meta += sizeof(int);
+ garbage = dict_set_static_bin (@ARGNAME@, key_ptr,
+ new_meta, data_len);
+ /* TBD: check error from dict_set_static_bin */
+ (void)garbage;
+ new_meta += data_len;
+ }
+ }
+
+#pragma fragment DICT_CLEANUP
+cleanup_@ARGNAME@:
+ dict_unref (@ARGNAME@);
+
+#pragma fragment DOUBLE
+ @ARGTYPE@ @ARGNAME@ = *((@ARGTYPE@ *)new_meta);
+ new_meta += sizeof(uint64_t);
+
+#pragma fragment FD
+ inode_t *@ARGNAME@_ino;
+ fd_t *@ARGNAME@;
+
+ @ARGNAME@_ino = recon_get_inode (fs, *((uuid_t *)new_meta));
+ new_meta += 16;
+ if (!@ARGNAME@_ino) {
+ goto *err_label;
+ }
+ err_label = &&cleanup_@ARGNAME@_ino;
+
+ @ARGNAME@ = fd_anonymous (@ARGNAME@_ino);
+ if (!@ARGNAME@) {
+ goto *err_label;
+ }
+ err_label = &&cleanup_@ARGNAME@;
+
+#pragma fragment FD_CLEANUP
+cleanup_@ARGNAME@:
+ fd_unref (@ARGNAME@);
+cleanup_@ARGNAME@_ino:
+ inode_unref (@ARGNAME@_ino);
+
+#pragma fragment NEW_FD
+ /*
+ * This pseudo-type is only used for create, and in that case we know
+ * we'll be using loc.inode, so it's not worth generalizing to take an
+ * extra argument.
+ */
+ fd_t *@ARGNAME@ = fd_anonymous (loc.inode);
+
+ if (!fd) {
+ goto *err_label;
+ }
+ err_label = &&cleanup_@ARGNAME@;
+ new_meta += 16;
+
+#pragma fragment NEW_FD_CLEANUP
+cleanup_@ARGNAME@:
+ fd_unref (@ARGNAME@);
+
+#pragma fragment INTEGER
+ @ARGTYPE@ @ARGNAME@ = *((@ARGTYPE@ *)new_meta);
+
+ new_meta += sizeof(@ARGTYPE@);
+
+#pragma fragment LOC
+ loc_t @ARGNAME@ = { NULL, };
+
+ @ARGNAME@.inode = recon_get_inode (fs, *((uuid_t *)new_meta));
+ if (!@ARGNAME@.inode) {
+ goto *err_label;
+ }
+ err_label = &&cleanup_@ARGNAME@;
+ gf_uuid_copy (@ARGNAME@.gfid, @ARGNAME@.inode->gfid);
+ new_meta += 16;
+ new_meta += 16; /* skip over pargfid */
+ if (*(new_meta++)) {
+ @ARGNAME@.name = new_meta;
+ new_meta += strlen(new_meta) + 1;
+ }
+
+#pragma fragment LOC_CLEANUP
+cleanup_@ARGNAME@:
+ loc_wipe (&@ARGNAME@);
+
+#pragma fragment PARENT_LOC
+ loc_t @ARGNAME@ = { NULL, };
+
+ new_meta += 16; /* skip over gfid */
+ @ARGNAME@.parent = recon_get_inode (fs, *((uuid_t *)new_meta));
+ if (!@ARGNAME@.parent) {
+ goto *err_label;
+ }
+ err_label = &&cleanup_@ARGNAME@;
+ gf_uuid_copy (@ARGNAME@.pargfid, @ARGNAME@.parent->gfid);
+ new_meta += 16;
+ if (!*(new_meta++)) {
+ goto *err_label;
+ }
+ @ARGNAME@.name = new_meta;
+ new_meta += strlen(new_meta) + 1;
+
+ @ARGNAME@.inode = inode_new (fs->active_subvol->itable);
+ if (!@ARGNAME@.inode) {
+ goto *err_label;
+ }
+
+#pragma fragment PARENT_LOC_CLEANUP
+cleanup_@ARGNAME@:
+ loc_wipe (&@ARGNAME@);
+
+#pragma fragment STRING
+ char *@ARGNAME@;
+ if (*(new_meta++)) {
+ @ARGNAME@ = new_meta;
+ new_meta += (strlen(new_meta) + 1);
+ }
+ else {
+ goto *err_label;
+ }
+
+#pragma fragment VECTOR
+ struct iovec @ARGNAME@;
+
+ @ARGNAME@.iov_len = *((size_t *)new_meta);
+ new_meta += sizeof(@ARGNAME@.iov_len);
+ @ARGNAME@.iov_base = new_data;
+ new_data += @ARGNAME@.iov_len;
+
+#pragma fragment IATT
+ struct iatt @ARGNAME@;
+ {
+ @ARGNAME@.ia_prot = *((ia_prot_t *)new_meta);
+ new_meta += sizeof(ia_prot_t);
+ uint32_t *myints = (uint32_t *)new_meta;
+ @ARGNAME@.ia_uid = myints[0];
+ @ARGNAME@.ia_gid = myints[1];
+ @ARGNAME@.ia_atime = myints[2];
+ @ARGNAME@.ia_atime_nsec = myints[3];
+ @ARGNAME@.ia_mtime = myints[4];
+ @ARGNAME@.ia_mtime_nsec = myints[5];
+ new_meta += sizeof(*myints) * 6;
+ }
+
+#pragma fragment IOBREF
+ struct iobref *@ARGNAME@;
+
+ @ARGNAME@ = iobref_new();
+ if (!@ARGNAME@) {
+ goto *err_label;
+ }
+ err_label = &&cleanup_@ARGNAME@;
+
+#pragma fragment IOBREF_CLEANUP
+cleanup_@ARGNAME@:
+ iobref_unref (@ARGNAME@);
+
+#pragma fragment LINK
+ /* TBD: check error */
+ inode_t *new_inode = inode_link (@INODE_ARG@, NULL, NULL, @IATT_ARG@);
+ if (new_inode) {
+ inode_lookup (new_inode);
+ }
+
+#pragma fragment FOP
+int
+fdl_replay_@NAME@ (glfs_t *fs, char **old_meta, char **old_data)
+{
+ char *new_meta = *old_meta;
+ char *new_data = *old_data;
+ int ret;
+ int status = 0xbad;
+ void *err_label = &&done;
+
+@FUNCTION_BODY@
+
+ ret = syncop_@NAME@ (fs->active_subvol, @SYNCOP_ARGS@, NULL);
+ if (ret != @SUCCESS_VALUE@) {
+ fprintf (stderr, "syncop_@NAME@ returned %d", ret);
+ goto *err_label;
+ }
+
+@LINKS@
+
+ status = 0;
+
+@CLEANUPS@
+
+done:
+ *old_meta = new_meta;
+ *old_data = new_data;
+ return status;
+}
+
+#pragma fragment CASE
+ case GF_FOP_@UPNAME@:
+ printf ("=== GF_FOP_@UPNAME@\n");
+ if (fdl_replay_@NAME@ (fs, &new_meta, &new_data) != 0) {
+ goto done;
+ }
+ recognized = 1;
+ break;
+
+#pragma fragment EPILOG
+int
+recon_execute (glfs_t *fs, char **old_meta, char **old_data)
+{
+ char *new_meta = *old_meta;
+ char *new_data = *old_data;
+ int recognized = 0;
+ event_header_t *eh;
+
+ eh = (event_header_t *)new_meta;
+ new_meta += sizeof (*eh);
+
+ /* TBD: check event_type instead of assuming NEW_REQUEST */
+
+ switch (eh->fop_type) {
+@SWITCH_BODY@
+
+ default:
+ printf ("unknown fop %u\n", eh->fop_type);
+ }
+
+done:
+ *old_meta = new_meta;
+ *old_data = new_data;
+ return recognized;
+}
diff --git a/xlators/experimental/fdl/src/recon.c b/xlators/experimental/fdl/src/recon.c
new file mode 100644
index 00000000000..14168a011e0
--- /dev/null
+++ b/xlators/experimental/fdl/src/recon.c
@@ -0,0 +1,89 @@
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+#include "glusterfs.h"
+#include "fd.h"
+#include "syncop.h"
+#include "glfs-internal.h"
+
+#define GFAPI_SUCCESS 0
+
+extern int recon_execute (glfs_t *, char **, char **);
+
+int
+main (int argc, char **argv)
+{
+ glfs_t *fs;
+ int ret;
+ int meta_fd = (-1);
+ char *meta_buf = NULL;
+ int data_fd = (-1);
+ char *data_buf = NULL;
+
+ fs = glfs_new ("whocares");
+ if (!fs) {
+ fprintf (stderr, "glfs_new failed\n");
+ return EXIT_FAILURE;
+ }
+
+ if (getenv("RECON_DEBUG")) {
+ ret = glfs_set_logging (fs, "/dev/stderr", 7);
+ }
+ else {
+ ret = glfs_set_logging (fs, "/dev/null", 0);
+ }
+
+ if (ret != GFAPI_SUCCESS) {
+ fprintf (stderr, "glfs_set_logging failed (%d)\n", errno);
+ return EXIT_FAILURE;
+ }
+
+ ret = glfs_set_volfile (fs, argv[1]);
+ if (ret != GFAPI_SUCCESS) {
+ fprintf (stderr, "glfs_set_volfile failed (%d)\n", errno);
+ return EXIT_FAILURE;
+ }
+
+ ret = glfs_init (fs);
+ if (ret != GFAPI_SUCCESS) {
+ fprintf (stderr, "glfs_init failed (%d)\n", errno);
+ return EXIT_FAILURE;
+ }
+
+ meta_fd = open (argv[2], O_RDONLY);
+ if (meta_fd < 0) {
+ perror ("open");
+ return EXIT_FAILURE;
+ }
+
+ /* TBD: get proper length */
+ meta_buf = mmap (NULL, 1048576, PROT_READ, MAP_PRIVATE, meta_fd, 0);
+ if (meta_buf == MAP_FAILED) {
+ perror ("mmap");
+ return EXIT_FAILURE;
+ }
+
+ data_fd = open (argv[3], O_RDONLY);
+ if (data_fd < 0) {
+ perror ("open");
+ return EXIT_FAILURE;
+ }
+
+ /* TBD: get proper length */
+ data_buf = mmap (NULL, 1048576, PROT_READ, MAP_PRIVATE, data_fd, 0);
+ if (data_buf == MAP_FAILED) {
+ perror ("mmap");
+ return EXIT_FAILURE;
+ }
+
+ for (;;) {
+ if (!recon_execute(fs,&meta_buf,&data_buf)) {
+ break;
+ }
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/xlators/experimental/jbr-client/Makefile.am b/xlators/experimental/jbr-client/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/experimental/jbr-client/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/experimental/jbr-client/src/Makefile.am b/xlators/experimental/jbr-client/src/Makefile.am
new file mode 100644
index 00000000000..58f399f0607
--- /dev/null
+++ b/xlators/experimental/jbr-client/src/Makefile.am
@@ -0,0 +1,32 @@
+xlator_LTLIBRARIES = jbrc.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/experimental
+
+nodist_jbrc_la_SOURCES = jbrc-cg.c
+CLEANFILES = $(nodist_jbrc_la_SOURCES)
+
+jbrc_la_LDFLAGS = -module -avoid-version
+jbrc_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = \
+ $(top_srcdir)/xlators/lib/src/libxlator.h \
+ $(top_srcdir)/glusterfsd/src/glusterfsd.h \
+ jbrc.h jbr-messages.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
+ -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/xlators/lib/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+JBRC_PREFIX = $(top_srcdir)/xlators/experimental/jbr-client/src
+JBRC_GEN_FOPS = $(JBRC_PREFIX)/gen-fops.py
+JBRC_TEMPLATES = $(JBRC_PREFIX)/fop-template.c
+JBRC_WRAPPER = $(JBRC_PREFIX)/jbrc.c
+noinst_PYTHON = $(JBRC_GEN_FOPS)
+EXTRA_DIST = $(JBRC_TEMPLATES) $(JBRC_WRAPPER)
+
+jbrc-cg.c: $(JBRC_GEN_FOPS) $(JBRC_TEMPLATES) $(JBRC_WRAPPER)
+ $(PYTHON) $(JBRC_GEN_FOPS) $(JBRC_TEMPLATES) $(JBRC_WRAPPER) > $@
+
+uninstall-local:
+ rm -f $(DESTDIR)$(xlatordir)/jbr.so
diff --git a/xlators/experimental/jbr-client/src/fop-template.c b/xlators/experimental/jbr-client/src/fop-template.c
new file mode 100644
index 00000000000..7719f511f01
--- /dev/null
+++ b/xlators/experimental/jbr-client/src/fop-template.c
@@ -0,0 +1,113 @@
+/* template-name fop */
+int32_t
+jbrc_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ jbrc_local_t *local = NULL;
+ xlator_t *target_xl = ACTIVE_CHILD(this);
+
+ local = mem_get(this->local_pool);
+ if (!local) {
+ goto err;
+ }
+
+ local->stub = fop_@NAME@_stub (frame, jbrc_@NAME@_continue,
+ @SHORT_ARGS@);
+ if (!local->stub) {
+ goto err;
+ }
+ local->curr_xl = target_xl;
+ local->scars = 0;
+
+ frame->local = local;
+ STACK_WIND_COOKIE (frame, jbrc_@NAME@_cbk, target_xl,
+ target_xl, target_xl->fops->@NAME@,
+ @SHORT_ARGS@);
+ return 0;
+
+err:
+ if (local) {
+ mem_put(local);
+ }
+ STACK_UNWIND_STRICT (@NAME@, frame, -1, ENOMEM,
+ @ERROR_ARGS@);
+ return 0;
+}
+
+/* template-name cbk */
+int32_t
+jbrc_@NAME@_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ @LONG_ARGS@)
+{
+ jbrc_local_t *local = frame->local;
+ xlator_t *last_xl = cookie;
+ xlator_t *next_xl;
+ jbrc_private_t *priv = this->private;
+ struct timespec spec;
+
+ if (op_ret != (-1)) {
+ if (local->scars) {
+ gf_msg (this->name, GF_LOG_INFO, 0, J_MSG_RETRY_MSG,
+ HILITE("retried %p OK"), frame->local);
+ }
+ priv->active = last_xl;
+ goto unwind;
+ }
+ if ((op_errno != EREMOTE) && (op_errno != ENOTCONN)) {
+ goto unwind;
+ }
+
+ /* TBD: get leader ID from xdata? */
+ next_xl = next_xlator(this, last_xl);
+ /*
+ * We can't just give up after we've tried all bricks, because it's
+ * quite likely that a new leader election just hasn't finished yet.
+ * We also shouldn't retry endlessly, and especially not at a high
+ * rate, but that's good enough while we work on other things.
+ *
+ * TBD: implement slow/finite retry via a worker thread
+ */
+ if (!next_xl || (local->scars >= SCAR_LIMIT)) {
+ gf_msg (this->name, GF_LOG_DEBUG, 0, J_MSG_RETRY_MSG,
+ HILITE("ran out of retries for %p"), frame->local);
+ goto unwind;
+ }
+
+ local->curr_xl = next_xl;
+ local->scars += 1;
+ spec.tv_sec = 1;
+ spec.tv_nsec = 0;
+ /*
+ * WARNING
+ *
+ * Just calling gf_timer_call_after like this leaves open the
+ * possibility that writes will get reordered, if a first write is
+ * rescheduled and then a second comes along to find an updated
+ * priv->active before the first actually executes. We might need to
+ * implement a stricter (and more complicated) queuing mechanism to
+ * ensure absolute consistency in this case.
+ */
+ if (gf_timer_call_after(this->ctx, spec, jbrc_retry_cb, local)) {
+ return 0;
+ }
+
+unwind:
+ call_stub_destroy(local->stub);
+ STACK_UNWIND_STRICT (@NAME@, frame, op_ret, op_errno,
+ @SHORT_ARGS@);
+ return 0;
+}
+
+/* template-name cont-func */
+int32_t
+jbrc_@NAME@_continue (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ jbrc_local_t *local = frame->local;
+
+ STACK_WIND_COOKIE (frame, jbrc_@NAME@_cbk, local->curr_xl,
+ local->curr_xl, local->curr_xl->fops->@NAME@,
+ @SHORT_ARGS@);
+ return 0;
+}
diff --git a/xlators/experimental/jbr-client/src/gen-fops.py b/xlators/experimental/jbr-client/src/gen-fops.py
new file mode 100755
index 00000000000..4d9451f7177
--- /dev/null
+++ b/xlators/experimental/jbr-client/src/gen-fops.py
@@ -0,0 +1,57 @@
+#!/usr/bin/python
+
+import os
+import re
+import string
+import sys
+
+curdir = os.path.dirname(sys.argv[0])
+gendir = os.path.join(curdir,'../../../../libglusterfs/src')
+sys.path.append(gendir)
+from generator import ops, fop_subs, cbk_subs, generate
+
+# We really want the callback argument list, even when we're generating fop
+# code, so we propagate here.
+# TBD: this should probably be right in generate.py
+for k, v in cbk_subs.iteritems():
+ fop_subs[k]['@ERROR_ARGS@'] = v['@ERROR_ARGS@']
+
+# Stolen from old codegen.py
+def load_templates (path):
+ templates = {}
+ tmpl_re = re.compile("/\* template-name (.*) \*/")
+ templates = {}
+ t_name = None
+ for line in open(path,"r").readlines():
+ if not line:
+ break
+ m = tmpl_re.match(line)
+ if m:
+ if t_name:
+ templates[t_name] = string.join(t_contents,'')
+ t_name = m.group(1).strip()
+ t_contents = []
+ elif t_name:
+ t_contents.append(line)
+ if t_name:
+ templates[t_name] = string.join(t_contents,'')
+ return templates
+
+# Stolen from gen_fdl.py
+def gen_client (templates):
+ for name, value in ops.iteritems():
+ if name == 'getspec':
+ # It's not real if it doesn't have a stub function.
+ continue
+ print generate(templates['cbk'],name,cbk_subs)
+ print generate(templates['cont-func'],name,fop_subs)
+ print generate(templates['fop'],name,fop_subs)
+
+tmpl = load_templates(sys.argv[1])
+for l in open(sys.argv[2],'r').readlines():
+ if l.find('#pragma generate') != -1:
+ print "/* BEGIN GENERATED CODE - DO NOT MODIFY */"
+ gen_client(tmpl)
+ print "/* END GENERATED CODE */"
+ else:
+ print l[:-1]
diff --git a/xlators/experimental/jbr-client/src/jbr-messages.h b/xlators/experimental/jbr-client/src/jbr-messages.h
new file mode 100644
index 00000000000..626c4fd3eaa
--- /dev/null
+++ b/xlators/experimental/jbr-client/src/jbr-messages.h
@@ -0,0 +1,113 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _JBR_MESSAGES_H_
+#define _JBR_MESSAGES_H_
+
+#include "glfs-message-id.h"
+
+/* NOTE: Rules for message additions
+ * 1) Each instance of a message is _better_ left with a unique message ID, even
+ * if the message format is the same. Reasoning is that, if the message
+ * format needs to change in one instance, the other instances are not
+ * impacted or the new change does not change the ID of the instance being
+ * modified.
+ * 2) Addition of a message,
+ * - Should increment the GLFS_NUM_MESSAGES
+ * - Append to the list of messages defined, towards the end
+ * - Retain macro naming as glfs_msg_X (for redability across developers)
+ * NOTE: Rules for message format modifications
+ * 3) Check acorss the code if the message ID macro in question is reused
+ * anywhere. If reused then then the modifications should ensure correctness
+ * everywhere, or needs a new message ID as (1) above was not adhered to. If
+ * not used anywhere, proceed with the required modification.
+ * NOTE: Rules for message deletion
+ * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used
+ * anywhere, then can be deleted, but will leave a hole by design, as
+ * addition rules specify modification to the end of the list and not filling
+ * holes.
+ */
+
+#define JBR_COMP_BASE GLFS_MSGID_COMP_JBR
+#define GLFS_NUM_MESSAGES 1
+#define GLFS_MSGID_END (JBR_COMP_BASE + GLFS_NUM_MESSAGES + 1)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define J_MSG_INIT_FAIL (JBR_COMP_BASE + 1)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define J_MSG_RETRY_MSG (JBR_COMP_BASE + 2)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define J_MSG_MEM_ERR (JBR_COMP_BASE + 3)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define J_MSG_DICT_FLR (JBR_COMP_BASE + 4)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define J_MSG_GENERIC (JBR_COMP_BASE + 5)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define J_MSG_INVALID (JBR_COMP_BASE + 6)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define J_MSG_NO_DATA (JBR_COMP_BASE + 7)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define J_MSG_SYS_CALL_FAILURE (JBR_COMP_BASE + 8)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define J_MSG_QUORUM_NOT_MET (JBR_COMP_BASE + 9)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define J_MSG_LOCK_FAILURE (JBR_COMP_BASE + 10)
+
+
+#endif /* _JBR_MESSAGES_H_ */
diff --git a/xlators/experimental/jbr-client/src/jbrc.c b/xlators/experimental/jbr-client/src/jbrc.c
new file mode 100644
index 00000000000..9bb9346c5c0
--- /dev/null
+++ b/xlators/experimental/jbr-client/src/jbrc.c
@@ -0,0 +1,320 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "call-stub.h"
+#include "defaults.h"
+#include "timer.h"
+#include "xlator.h"
+#include "jbr-messages.h"
+#include "jbrc.h"
+#include "statedump.h"
+
+#define SCAR_LIMIT 20
+#define HILITE(x) (""x"")
+
+/*
+ * The fops are actually generated by gen-fops.py; the rest was mostly copied
+ * from defaults.c (commit cd253754 on 27 August 2013).
+ */
+
+enum gf_dht_mem_types_ {
+ gf_mt_jbrc_private_t = gf_common_mt_end + 1,
+ gf_mt_jbrc_end
+};
+
+char *JBRC_XATTR = "user.jbr.active";
+
+static inline
+xlator_t *
+ACTIVE_CHILD (xlator_t *parent)
+{
+ jbrc_private_t *priv = parent->private;
+
+ return priv ? priv->active : FIRST_CHILD(parent);
+}
+
+xlator_t *
+next_xlator (xlator_t *this, xlator_t *prev)
+{
+ xlator_list_t *trav;
+
+ for (trav = this->children; trav; trav = trav->next) {
+ if (trav->xlator == prev) {
+ return trav->next ? trav->next->xlator
+ : this->children->xlator;
+ }
+ }
+
+ return NULL;
+}
+
+void
+jbrc_retry_cb (void *cb_arg)
+{
+ jbrc_local_t *local = cb_arg;
+
+ gf_msg (__func__, GF_LOG_INFO, 0, J_MSG_RETRY_MSG,
+ HILITE("retrying %p"), local);
+ call_resume_wind(local->stub);
+}
+
+#pragma generate
+
+int32_t
+jbrc_forget (xlator_t *this, inode_t *inode)
+{
+ gf_msg_callingfn (this->name, GF_LOG_WARNING, 0, J_MSG_INIT_FAIL,
+ "xlator does not implement forget_cbk");
+ return 0;
+}
+
+
+int32_t
+jbrc_releasedir (xlator_t *this, fd_t *fd)
+{
+ gf_msg_callingfn (this->name, GF_LOG_WARNING, 0, J_MSG_INIT_FAIL,
+ "xlator does not implement releasedir_cbk");
+ return 0;
+}
+
+int32_t
+jbrc_release (xlator_t *this, fd_t *fd)
+{
+ gf_msg_callingfn (this->name, GF_LOG_WARNING, 0, J_MSG_INIT_FAIL,
+ "xlator does not implement release_cbk");
+ return 0;
+}
+
+struct xlator_fops fops = {
+ .lookup = jbrc_lookup,
+ .stat = jbrc_stat,
+ .fstat = jbrc_fstat,
+ .truncate = jbrc_truncate,
+ .ftruncate = jbrc_ftruncate,
+ .access = jbrc_access,
+ .readlink = jbrc_readlink,
+ .mknod = jbrc_mknod,
+ .mkdir = jbrc_mkdir,
+ .unlink = jbrc_unlink,
+ .rmdir = jbrc_rmdir,
+ .symlink = jbrc_symlink,
+ .rename = jbrc_rename,
+ .link = jbrc_link,
+ .create = jbrc_create,
+ .open = jbrc_open,
+ .readv = jbrc_readv,
+ .writev = jbrc_writev,
+ .flush = jbrc_flush,
+ .fsync = jbrc_fsync,
+ .opendir = jbrc_opendir,
+ .readdir = jbrc_readdir,
+ .readdirp = jbrc_readdirp,
+ .fsyncdir = jbrc_fsyncdir,
+ .statfs = jbrc_statfs,
+ .setxattr = jbrc_setxattr,
+ .getxattr = jbrc_getxattr,
+ .fsetxattr = jbrc_fsetxattr,
+ .fgetxattr = jbrc_fgetxattr,
+ .removexattr = jbrc_removexattr,
+ .fremovexattr = jbrc_fremovexattr,
+ .lk = jbrc_lk,
+ .inodelk = jbrc_inodelk,
+ .finodelk = jbrc_finodelk,
+ .entrylk = jbrc_entrylk,
+ .fentrylk = jbrc_fentrylk,
+ .rchecksum = jbrc_rchecksum,
+ .xattrop = jbrc_xattrop,
+ .fxattrop = jbrc_fxattrop,
+ .setattr = jbrc_setattr,
+ .fsetattr = jbrc_fsetattr,
+ .fallocate = jbrc_fallocate,
+ .discard = jbrc_discard,
+};
+
+struct xlator_cbks cbks = {
+};
+
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("jbrc", this, out);
+
+ ret = xlator_mem_acct_init (this, gf_mt_jbrc_end + 1);
+
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM, J_MSG_MEM_ERR,
+ "Memory accounting init failed");
+ return ret;
+ }
+out:
+ return ret;
+}
+
+
+int32_t
+jbrc_init (xlator_t *this)
+{
+ jbrc_private_t *priv = NULL;
+ xlator_list_t *trav = NULL;
+
+ this->local_pool = mem_pool_new (jbrc_local_t, 128);
+ if (!this->local_pool) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM, J_MSG_MEM_ERR,
+ "failed to create jbrc_local_t pool");
+ goto err;
+ }
+
+ priv = GF_CALLOC (1, sizeof (*priv), gf_mt_jbrc_private_t);
+ if (!priv) {
+ goto err;
+ }
+
+ for (trav = this->children; trav; trav = trav->next) {
+ ++(priv->n_children);
+ }
+
+ priv->active = FIRST_CHILD(this);
+ this->private = priv;
+ return 0;
+
+err:
+ if (priv) {
+ GF_FREE(priv);
+ }
+ return -1;
+}
+
+void
+jbrc_fini (xlator_t *this)
+{
+ GF_FREE(this->private);
+}
+
+int
+jbrc_get_child_index (xlator_t *this, xlator_t *kid)
+{
+ xlator_list_t *trav;
+ int retval = -1;
+
+ for (trav = this->children; trav; trav = trav->next) {
+ ++retval;
+ if (trav->xlator == kid) {
+ return retval;
+ }
+ }
+
+ return -1;
+}
+
+uint8_t
+jbrc_count_up_kids (jbrc_private_t *priv)
+{
+ uint8_t retval = 0;
+ uint8_t i;
+
+ for (i = 0; i < priv->n_children; ++i) {
+ if (priv->kid_state & (1 << i)) {
+ ++retval;
+ }
+ }
+
+ return retval;
+}
+
+int32_t
+jbrc_notify (xlator_t *this, int32_t event, void *data, ...)
+{
+ int32_t ret = 0;
+ int32_t index = 0;
+ jbrc_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO (THIS->name, this, out);
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, priv, out);
+
+ switch (event) {
+ case GF_EVENT_CHILD_UP:
+ index = jbrc_get_child_index(this, data);
+ if (index >= 0) {
+ priv->kid_state |= (1 << index);
+ priv->up_children = jbrc_count_up_kids(priv);
+ gf_msg (this->name, GF_LOG_INFO, 0, J_MSG_GENERIC,
+ "got CHILD_UP for %s, now %u kids",
+ ((xlator_t *)data)->name,
+ priv->up_children);
+ }
+ ret = default_notify (this, event, data);
+ break;
+ case GF_EVENT_CHILD_DOWN:
+ index = jbrc_get_child_index(this, data);
+ if (index >= 0) {
+ priv->kid_state &= ~(1 << index);
+ priv->up_children = jbrc_count_up_kids(priv);
+ gf_msg (this->name, GF_LOG_INFO, 0, J_MSG_GENERIC,
+ "got CHILD_DOWN for %s, now %u kids",
+ ((xlator_t *)data)->name,
+ priv->up_children);
+ }
+ break;
+ default:
+ ret = default_notify (this, event, data);
+ }
+
+out:
+ return ret;
+}
+
+int
+jbrc_priv_dump (xlator_t *this)
+{
+ jbrc_private_t *priv = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ xlator_list_t *trav = NULL;
+ int32_t i = -1;
+
+ GF_VALIDATE_OR_GOTO (THIS->name, this, out);
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, priv, out);
+
+ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s",
+ this->type, this->name);
+ gf_proc_dump_add_section(key_prefix);
+
+ gf_proc_dump_write("up_children", "%u", priv->up_children);
+
+ for (trav = this->children, i = 0; trav; trav = trav->next, i++) {
+ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "child_%d", i);
+ gf_proc_dump_write(key_prefix, "%s", trav->xlator->name);
+ }
+
+out:
+ return 0;
+}
+
+struct xlator_dumpops dumpops = {
+ .priv = jbrc_priv_dump,
+};
+
+class_methods_t class_methods = {
+ .init = jbrc_init,
+ .fini = jbrc_fini,
+ .notify = jbrc_notify,
+};
+
+struct volume_options options[] = {
+ { .key = {NULL} },
+};
diff --git a/xlators/experimental/jbr-client/src/jbrc.h b/xlators/experimental/jbr-client/src/jbrc.h
new file mode 100644
index 00000000000..c83259ca1bd
--- /dev/null
+++ b/xlators/experimental/jbr-client/src/jbrc.h
@@ -0,0 +1,27 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _JBRC_H_
+#define _JBRC_H_
+
+typedef struct {
+ xlator_t *active;
+ uint8_t up_children;
+ uint8_t n_children;
+ uint32_t kid_state;
+} jbrc_private_t;
+
+typedef struct {
+ call_stub_t *stub;
+ xlator_t *curr_xl;
+ uint16_t scars;
+} jbrc_local_t;
+
+#endif /* _JBRC_H_ */
diff --git a/xlators/experimental/jbr-server/Makefile.am b/xlators/experimental/jbr-server/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/experimental/jbr-server/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/experimental/jbr-server/src/Makefile.am b/xlators/experimental/jbr-server/src/Makefile.am
new file mode 100644
index 00000000000..66f73ba8c96
--- /dev/null
+++ b/xlators/experimental/jbr-server/src/Makefile.am
@@ -0,0 +1,35 @@
+xlator_LTLIBRARIES = jbr.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/experimental
+
+nodist_jbr_la_SOURCES = jbr-cg.c
+CLEANFILES = $(nodist_jbr_la_SOURCES)
+
+jbr_la_LDFLAGS = -module -avoid-version
+jbr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(top_builddir)/api/src/libgfapi.la
+
+noinst_HEADERS = jbr-internal.h \
+ $(top_srcdir)/xlators/lib/src/libxlator.h \
+ $(top_srcdir)/glusterfsd/src/glusterfsd.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
+ -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/xlators/lib/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src -DSBIN_DIR=\"$(sbindir)\" \
+ -I$(top_srcdir)/api/src -DJBR_SCRIPT_PREFIX=\"$(jbrdir)\" \
+ -I$(top_srcdir)/xlators/experimental/jbr-client/src/
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+JBR_PREFIX = $(top_srcdir)/xlators/experimental/jbr-server/src
+JBR_GEN_FOPS = $(JBR_PREFIX)/gen-fops.py
+JBR_TEMPLATES = $(JBR_PREFIX)/all-templates.c
+JBR_WRAPPER = $(JBR_PREFIX)/jbr.c
+noinst_PYTHON = $(JBR_GEN_FOPS)
+EXTRA_DIST = $(JBR_TEMPLATES) $(JBR_WRAPPER)
+
+jbr-cg.c: $(JBR_GEN_FOPS) $(JBR_TEMPLATES) $(JBR_WRAPPER)
+ $(PYTHON) $(JBR_GEN_FOPS) $(JBR_TEMPLATES) $(JBR_WRAPPER) > $@
+
+uninstall-local:
+ rm -f $(DESTDIR)$(xlatordir)/jbr.so
diff --git a/xlators/experimental/jbr-server/src/all-templates.c b/xlators/experimental/jbr-server/src/all-templates.c
new file mode 100644
index 00000000000..7314701029c
--- /dev/null
+++ b/xlators/experimental/jbr-server/src/all-templates.c
@@ -0,0 +1,431 @@
+/*
+ * You can put anything here - it doesn't even have to be a comment - and it
+ * will be ignored until we reach the first template-name comment.
+ */
+
+
+/* template-name read-fop */
+int32_t
+jbr_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ jbr_private_t *priv = NULL;
+ gf_boolean_t in_recon = _gf_false;
+ int32_t op_errno = 0;
+ int32_t recon_term, recon_index;
+
+ GF_VALIDATE_OR_GOTO ("jbr", this, err);
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, priv, err);
+ GF_VALIDATE_OR_GOTO (this->name, frame, err);
+
+ op_errno = EREMOTE;
+
+ /* allow reads during reconciliation *
+ * TBD: allow "dirty" reads on non-leaders *
+ */
+ if (xdata &&
+ (dict_get_int32(xdata, RECON_TERM_XATTR, &recon_term) == 0) &&
+ (dict_get_int32(xdata, RECON_INDEX_XATTR, &recon_index) == 0)) {
+ in_recon = _gf_true;
+ }
+
+ if ((!priv->leader) && (in_recon == _gf_false)) {
+ goto err;
+ }
+
+ STACK_WIND (frame, default_@NAME@_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@,
+ @SHORT_ARGS@);
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT (@NAME@, frame, -1, op_errno,
+ @ERROR_ARGS@);
+ return 0;
+}
+
+/* template-name read-perform_local_op */
+/* No "perform_local_op" function needed for @NAME@ */
+
+/* template-name read-dispatch */
+/* No "dispatch" function needed for @NAME@ */
+
+/* template-name read-call_dispatch */
+/* No "call_dispatch" function needed for @NAME@ */
+
+/* template-name read-fan-in */
+/* No "fan-in" function needed for @NAME@ */
+
+/* template-name read-continue */
+/* No "continue" function needed for @NAME@ */
+
+/* template-name read-complete */
+/* No "complete" function needed for @NAME@ */
+
+/* template-name write-fop */
+int32_t
+jbr_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ jbr_local_t *local = NULL;
+ jbr_private_t *priv = NULL;
+ int32_t ret = -1;
+ int op_errno = ENOMEM;
+
+ GF_VALIDATE_OR_GOTO ("jbr", this, err);
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, priv, err);
+ GF_VALIDATE_OR_GOTO (this->name, frame, err);
+
+#if defined(JBR_CG_NEED_FD)
+ ret = jbr_leader_checks_and_init (frame, this, &op_errno, xdata, fd);
+#else
+ ret = jbr_leader_checks_and_init (frame, this, &op_errno, xdata, NULL);
+#endif
+ if (ret)
+ goto err;
+
+ local = frame->local;
+
+ /*
+ * If we let it through despite not being the leader, then we just want
+ * to pass it on down without all of the additional xattrs, queuing, and
+ * so on. However, jbr_*_complete does depend on the initialization
+ * immediately above this.
+ */
+ if (!priv->leader) {
+ STACK_WIND (frame, jbr_@NAME@_complete,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@,
+ @SHORT_ARGS@);
+ return 0;
+ }
+
+ ret = jbr_initialize_xdata_set_attrs (this, &xdata);
+ if (ret)
+ goto err;
+
+ local->stub = fop_@NAME@_stub (frame, jbr_@NAME@_continue,
+ @SHORT_ARGS@);
+ if (!local->stub) {
+ goto err;
+ }
+
+ /*
+ * Can be used to just call_dispatch or be customised per fop to *
+ * perform ops specific to that particular fop. *
+ */
+ ret = jbr_@NAME@_perform_local_op (frame, this, &op_errno,
+ @SHORT_ARGS@);
+ if (ret)
+ goto err;
+
+ return ret;
+err:
+ if (local) {
+ if (local->stub) {
+ call_stub_destroy(local->stub);
+ }
+ if (local->qstub) {
+ call_stub_destroy(local->qstub);
+ }
+ if (local->fd) {
+ fd_unref(local->fd);
+ }
+ mem_put(local);
+ }
+ STACK_UNWIND_STRICT (@NAME@, frame, -1, op_errno,
+ @ERROR_ARGS@);
+ return 0;
+}
+
+/* template-name write-perform_local_op */
+int32_t
+jbr_@NAME@_perform_local_op (call_frame_t *frame, xlator_t *this, int *op_errno,
+ @LONG_ARGS@)
+{
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("jbr", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, op_errno, out);
+
+ ret = jbr_@NAME@_call_dispatch (frame, this, op_errno,
+ @SHORT_ARGS@);
+
+out:
+ return ret;
+}
+
+/* template-name write-call_dispatch */
+int32_t
+jbr_@NAME@_call_dispatch (call_frame_t *frame, xlator_t *this, int *op_errno,
+ @LONG_ARGS@)
+{
+ jbr_local_t *local = NULL;
+ jbr_private_t *priv = NULL;
+ int32_t ret = -1;
+ xlator_list_t *trav = NULL;
+
+ GF_VALIDATE_OR_GOTO ("jbr", this, out);
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, priv, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ local = frame->local;
+ GF_VALIDATE_OR_GOTO (this->name, local, out);
+ GF_VALIDATE_OR_GOTO (this->name, op_errno, out);
+
+#if defined(JBR_CG_QUEUE)
+ jbr_inode_ctx_t *ictx = jbr_get_inode_ctx(this, fd->inode);
+ if (!ictx) {
+ *op_errno = EIO;
+ goto out;
+ }
+
+ LOCK(&ictx->lock);
+ if (ictx->active) {
+ gf_msg_debug (this->name, 0,
+ "queuing request due to conflict");
+ /*
+ * TBD: enqueue only for real conflict
+ *
+ * Currently we just act like all writes are in
+ * conflict with one another. What we should really do
+ * is check the active/pending queues and defer only if
+ * there's a conflict there.
+ *
+ * It's important to check the pending queue because we
+ * might have an active request X which conflicts with
+ * a pending request Y, and this request Z might
+ * conflict with Y but not X. If we checked only the
+ * active queue then Z could jump ahead of Y, which
+ * would be incorrect.
+ */
+ local->qstub = fop_@NAME@_stub (frame,
+ jbr_@NAME@_dispatch,
+ @SHORT_ARGS@);
+ if (!local->qstub) {
+ UNLOCK(&ictx->lock);
+ goto out;
+ }
+ list_add_tail(&local->qlinks, &ictx->pqueue);
+ ++(ictx->pending);
+ UNLOCK(&ictx->lock);
+ ret = 0;
+ goto out;
+ } else {
+ list_add_tail(&local->qlinks, &ictx->aqueue);
+ ++(ictx->active);
+ }
+ UNLOCK(&ictx->lock);
+#endif
+ ret = jbr_@NAME@_dispatch (frame, this, @SHORT_ARGS@);
+
+out:
+ return ret;
+}
+
+/* template-name write-dispatch */
+int32_t
+jbr_@NAME@_dispatch (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ jbr_local_t *local = NULL;
+ jbr_private_t *priv = NULL;
+ int32_t ret = -1;
+ xlator_list_t *trav;
+
+ GF_VALIDATE_OR_GOTO ("jbr", this, out);
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, priv, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ local = frame->local;
+ GF_VALIDATE_OR_GOTO (this->name, local, out);
+
+ /*
+ * TBD: unblock pending request(s) if we fail after this point but
+ * before we get to jbr_@NAME@_complete (where that code currently
+ * resides).
+ */
+
+ local->call_count = priv->n_children - 1;
+ local->successful_acks = 0;
+ for (trav = this->children->next; trav; trav = trav->next) {
+ STACK_WIND (frame, jbr_@NAME@_fan_in,
+ trav->xlator, trav->xlator->fops->@NAME@,
+ @SHORT_ARGS@);
+ }
+
+ /* TBD: variable Issue count */
+ ret = 0;
+out:
+ return ret;
+}
+
+/* template-name write-fan-in */
+int32_t
+jbr_@NAME@_fan_in (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ @LONG_ARGS@)
+{
+ jbr_local_t *local = NULL;
+ int32_t ret = -1;
+ uint8_t call_count;
+
+ GF_VALIDATE_OR_GOTO ("jbr", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ local = frame->local;
+ GF_VALIDATE_OR_GOTO (this->name, local, out);
+
+ gf_msg_trace (this->name, 0, "op_ret = %d, op_errno = %d\n",
+ op_ret, op_errno);
+
+ LOCK(&frame->lock);
+ call_count = --(local->call_count);
+ if (op_ret != -1) {
+ /* Increment the number of successful acks *
+ * received for the operation. *
+ */
+ (local->successful_acks)++;
+ local->successful_op_ret = op_ret;
+ }
+ gf_msg_debug (this->name, 0, "succ_acks = %d, op_ret = %d, op_errno = %d\n",
+ op_ret, op_errno, local->successful_acks);
+ UNLOCK(&frame->lock);
+
+ /* TBD: variable Completion count */
+ if (call_count == 0) {
+ call_resume(local->stub);
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/* template-name write-continue */
+int32_t
+jbr_@NAME@_continue (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ int32_t ret = -1;
+ gf_boolean_t result = _gf_false;
+ jbr_local_t *local = NULL;
+ jbr_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("jbr", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ priv = this->private;
+ local = frame->local;
+ GF_VALIDATE_OR_GOTO (this->name, priv, out);
+ GF_VALIDATE_OR_GOTO (this->name, local, out);
+
+ /* Perform quorum check to see if the leader needs *
+ * to perform the operation. If the operation will not *
+ * meet quorum irrespective of the leader's result *
+ * there is no point in the leader performing the fop *
+ */
+ result = fop_quorum_check (this, (double)priv->n_children,
+ (double)local->successful_acks + 1);
+ if (result == _gf_false) {
+ gf_msg (this->name, GF_LOG_ERROR, EROFS,
+ J_MSG_QUORUM_NOT_MET, "Didn't receive enough acks "
+ "to meet quorum. Failing the operation without trying "
+ "it on the leader.");
+ STACK_UNWIND_STRICT (@NAME@, frame, -1, EROFS,
+ @ERROR_ARGS@);
+ } else {
+ STACK_WIND (frame, jbr_@NAME@_complete,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@,
+ @SHORT_ARGS@);
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/* template-name write-complete */
+int32_t
+jbr_@NAME@_complete (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ @LONG_ARGS@)
+{
+ int32_t ret = -1;
+ gf_boolean_t result = _gf_false;
+ jbr_private_t *priv = NULL;
+ jbr_local_t *local = NULL;
+
+ GF_VALIDATE_OR_GOTO ("jbr", this, err);
+ GF_VALIDATE_OR_GOTO (this->name, frame, err);
+ priv = this->private;
+ local = frame->local;
+ GF_VALIDATE_OR_GOTO (this->name, priv, err);
+ GF_VALIDATE_OR_GOTO (this->name, local, err);
+
+ /* If the fop failed on the leader, then reduce one succesful ack
+ * before calculating the fop quorum
+ */
+ LOCK(&frame->lock);
+ if (op_ret == -1)
+ (local->successful_acks)--;
+ UNLOCK(&frame->lock);
+
+#if defined(JBR_CG_QUEUE)
+ ret = jbr_remove_from_queue (frame, this);
+ if (ret)
+ goto err;
+#endif
+
+#if defined(JBR_CG_FSYNC)
+ jbr_mark_fd_dirty(this, local);
+#endif
+
+#if defined(JBR_CG_NEED_FD)
+ fd_unref(local->fd);
+#endif
+
+ /* After the leader completes the fop, a quorum check is *
+ * performed, taking into account the outcome of the fop *
+ * on the leader. Irrespective of the fop being successful *
+ * or failing on the leader, the result of the quorum will *
+ * determine if the overall fop is successful or not. For *
+ * example, a fop might have succeeded on every node except *
+ * the leader, in which case as quorum is being met, the fop *
+ * will be treated as a successful fop, even though it failed *
+ * on the leader. On follower nodes, no quorum check should *
+ * be done, and the result is returned to the leader as is. *
+ */
+ if (priv->leader) {
+ result = fop_quorum_check (this, (double)priv->n_children,
+ (double)local->successful_acks + 1);
+ if (result == _gf_false) {
+ op_ret = -1;
+ op_errno = EROFS;
+ gf_msg (this->name, GF_LOG_ERROR, EROFS,
+ J_MSG_QUORUM_NOT_MET, "Quorum is not met. "
+ "The operation has failed.");
+ } else {
+#if defined(JBR_CG_NEED_FD)
+ op_ret = local->successful_op_ret;
+#else
+ op_ret = 0;
+#endif
+ op_errno = 0;
+ gf_msg_debug (this->name, 0,
+ "Quorum has met. The operation has succeeded.");
+ }
+ }
+
+ STACK_UNWIND_STRICT (@NAME@, frame, op_ret, op_errno,
+ @SHORT_ARGS@);
+
+
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT (@NAME@, frame, -1, 0,
+ @SHORT_ARGS@);
+
+ return 0;
+}
diff --git a/xlators/experimental/jbr-server/src/gen-fops.py b/xlators/experimental/jbr-server/src/gen-fops.py
new file mode 100755
index 00000000000..8a2b47c5345
--- /dev/null
+++ b/xlators/experimental/jbr-server/src/gen-fops.py
@@ -0,0 +1,178 @@
+#!/usr/bin/python
+
+# This script generates the boilerplate versions of most fops and cbks in the
+# server. This allows the details of leadership-status checking, sequencing
+# between leader and followers (including fan-out), and basic error checking
+# to be centralized one place, with per-operation code kept to a minimum.
+
+import os
+import re
+import string
+import sys
+
+curdir = os.path.dirname(sys.argv[0])
+gendir = os.path.join(curdir,'../../../../libglusterfs/src')
+sys.path.append(gendir)
+from generator import ops, fop_subs, cbk_subs, generate
+
+# We really want the callback argument list, even when we're generating fop
+# code, so we propagate here.
+# TBD: this should probably be right in generate.py
+for k, v in cbk_subs.iteritems():
+ fop_subs[k]['@ERROR_ARGS@'] = v['@ERROR_ARGS@']
+
+# Stolen from old codegen.py
+def load_templates (path):
+ templates = {}
+ tmpl_re = re.compile("/\* template-name (.*) \*/")
+ templates = {}
+ t_name = None
+ for line in open(path,"r").readlines():
+ if not line:
+ break
+ m = tmpl_re.match(line)
+ if m:
+ if t_name:
+ templates[t_name] = string.join(t_contents,'')
+ t_name = m.group(1).strip()
+ t_contents = []
+ elif t_name:
+ t_contents.append(line)
+ if t_name:
+ templates[t_name] = string.join(t_contents,'')
+ return templates
+
+# We need two types of templates. The first, for pure read operations, just
+# needs to do a simple am-i-leader check (augmented to allow dirty reads).
+# The second, for pure writes, needs to do fan-out to followers between those
+# initial checks and local execution. There are other operations that don't
+# fit neatly into either category - e.g. lock ops or fsync - so we'll just have
+# to handle those manually. The table thus includes entries only for those we
+# can categorize. The special cases, plus any new operations we've never even
+# heard of, aren't in there.
+#
+# Various keywords can be used to define/undefine preprocessor symbols used
+# in the templates, on a per-function basis. For example, if the keyword here
+# is "fsync" (lowercase word or abbreviation) that will cause JBR_CG_FSYNC
+# (prefix plus uppercase version) to be defined above all of the generated code
+# for that fop.
+
+fop_table = {
+ "access": "read",
+ "create": "write",
+ "discard": "write",
+# "entrylk": "read",
+ "fallocate": "write",
+# "fentrylk": "read",
+ "fgetxattr": "read",
+# "finodelk": "read",
+# "flush": "read",
+ "fremovexattr": "write",
+ "fsetattr": "write",
+ "fsetxattr": "write",
+ "fstat": "read",
+# "fsync": "read",
+# "fsyncdir": "read",
+ "ftruncate": "write",
+ "fxattrop": "write",
+ "getxattr": "read",
+# "inodelk": "read",
+ "link": "write",
+ "lk": "write,queue",
+# "lookup": "read",
+ "mkdir": "write",
+ "mknod": "write",
+ "open": "write",
+ "opendir": "read",
+ "rchecksum": "read",
+ "readdir": "read",
+ "readdirp": "read",
+ "readlink": "read",
+ "readv": "read",
+ "removexattr": "write",
+ "rename": "write",
+ "rmdir": "write",
+ "setattr": "write",
+ "setxattr": "write",
+ "stat": "read",
+ "statfs": "read",
+ "symlink": "write",
+ "truncate": "write",
+ "unlink": "write",
+ "writev": "write,fsync,queue",
+ "xattrop": "write",
+}
+
+# Mention those fops in the selective_generate table, for which
+# only a few common functions will be generated, and mention those
+# functions. Rest of the functions can be customized
+selective_generate = {
+ "lk": "fop,dispatch,call_dispatch",
+}
+
+# Stolen from gen_fdl.py
+def gen_server (templates):
+ fops_done = []
+ for name in fop_table.keys():
+ info = fop_table[name].split(",")
+ kind = info[0]
+ flags = info[1:]
+
+ # generate all functions for the fops in fop_table
+ # except for the ones in selective_generate for which
+ # generate only the functions mentioned in the
+ # selective_generate table
+ gen_funcs = "fop,complete,continue,fan-in,dispatch, \
+ call_dispatch,perform_local_op"
+ if name in selective_generate:
+ gen_funcs = selective_generate[name].split(",")
+
+ if ("fsync" in flags) or ("queue" in flags):
+ flags.append("need_fd")
+ for fname in flags:
+ print "#define JBR_CG_%s" % fname.upper()
+
+ if 'complete' in gen_funcs:
+ print generate(templates[kind+"-complete"],
+ name,cbk_subs)
+
+ if 'continue' in gen_funcs:
+ print generate(templates[kind+"-continue"],
+ name,fop_subs)
+
+ if 'fan-in' in gen_funcs:
+ print generate(templates[kind+"-fan-in"],
+ name,cbk_subs)
+
+ if 'dispatch' in gen_funcs:
+ print generate(templates[kind+"-dispatch"],
+ name,fop_subs)
+
+ if 'call_dispatch' in gen_funcs:
+ print generate(templates[kind+"-call_dispatch"],
+ name,fop_subs)
+
+ if 'perform_local_op' in gen_funcs:
+ print generate(templates[kind+"-perform_local_op"],
+ name, fop_subs)
+
+ if 'fop' in gen_funcs:
+ print generate(templates[kind+"-fop"],name,fop_subs)
+
+ for fname in flags:
+ print "#undef JBR_CG_%s" % fname.upper()
+ fops_done.append(name)
+ # Just for fun, emit the fops table too.
+ print("struct xlator_fops fops = {")
+ for x in fops_done:
+ print(" .%s = jbr_%s,"%(x,x))
+ print("};")
+
+tmpl = load_templates(sys.argv[1])
+for l in open(sys.argv[2],'r').readlines():
+ if l.find('#pragma generate') != -1:
+ print "/* BEGIN GENERATED CODE - DO NOT MODIFY */"
+ gen_server(tmpl)
+ print "/* END GENERATED CODE */"
+ else:
+ print l[:-1]
diff --git a/xlators/experimental/jbr-server/src/jbr-internal.h b/xlators/experimental/jbr-server/src/jbr-internal.h
new file mode 100644
index 00000000000..ab1dfc16de2
--- /dev/null
+++ b/xlators/experimental/jbr-server/src/jbr-internal.h
@@ -0,0 +1,116 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#define LEADER_XATTR "user.jbr.leader"
+#define SECOND_CHILD(xl) (xl->children->next->xlator)
+#define RECONCILER_PATH JBR_SCRIPT_PREFIX"/reconciler.py"
+#define CHANGELOG_ENTRY_SIZE 128
+
+enum {
+ gf_mt_jbr_private_t = gf_common_mt_end + 1,
+ gf_mt_jbr_fd_ctx_t,
+ gf_mt_jbr_inode_ctx_t,
+ gf_mt_jbr_dirty_t,
+ gf_mt_jbr_end
+};
+
+typedef enum jbr_recon_notify_ev_id_t {
+ JBR_RECON_SET_LEADER = 1,
+ JBR_RECON_ADD_CHILD = 2
+} jbr_recon_notify_ev_id_t;
+
+typedef struct _jbr_recon_notify_ev_s {
+ jbr_recon_notify_ev_id_t id;
+ uint32_t index; /* in case of add */
+ struct list_head list;
+} jbr_recon_notify_ev_t;
+
+typedef struct {
+ /*
+ * This is a hack to allow a non-leader to accept requests while the
+ * leader is down, and it only works for n=2. The way it works is that
+ * "config_leader" indicates the state from our options (via init or
+ * reconfigure) but "leader" is what the fop code actually looks at. If
+ * config_leader is true, then leader will *always* be true as well,
+ * giving that brick precedence. If config_leader is false, then
+ * leader will only be true if there is no connection to the other
+ * brick (tracked in jbr_notify).
+ *
+ * TBD: implement real leader election
+ */
+ gf_boolean_t config_leader;
+ gf_boolean_t leader;
+ uint8_t up_children;
+ uint8_t n_children;
+ char *vol_file;
+ uint32_t current_term;
+ uint32_t kid_state;
+ gf_lock_t dirty_lock;
+ struct list_head dirty_fds;
+ uint32_t index;
+ gf_lock_t index_lock;
+ double quorum_pct;
+ int term_fd;
+ long term_total;
+ long term_read;
+ /*
+ * This is a super-duper hack, but it will do for now. The reason it's
+ * a hack is that we pass this to dict_set_static_bin, so we don't have
+ * to mess around with allocating and freeing it on every single IPC
+ * request, but it's totally not thread-safe. On the other hand, there
+ * should only be one reconciliation thread running and calling these
+ * functions at a time, so maybe that doesn't matter.
+ *
+ * TBD: re-evaluate how to manage this
+ */
+ char term_buf[CHANGELOG_ENTRY_SIZE];
+ gf_boolean_t child_up; /* To maintain the state of *
+ * the translator */
+} jbr_private_t;
+
+typedef struct {
+ call_stub_t *stub;
+ call_stub_t *qstub;
+ uint32_t call_count;
+ uint32_t successful_acks;
+ uint32_t successful_op_ret;
+ fd_t *fd;
+ struct list_head qlinks;
+} jbr_local_t;
+
+/*
+ * This should match whatever changelog returns on the pre-op for us to pass
+ * when we're ready for our post-op.
+ */
+typedef uint32_t log_id_t;
+
+typedef struct {
+ struct list_head links;
+ log_id_t id;
+} jbr_dirty_list_t;
+
+typedef struct {
+ fd_t *fd;
+ struct list_head dirty_list;
+ struct list_head fd_list;
+} jbr_fd_ctx_t;
+
+typedef struct {
+ gf_lock_t lock;
+ uint32_t active;
+ struct list_head aqueue;
+ uint32_t pending;
+ struct list_head pqueue;
+} jbr_inode_ctx_t;
+
+void jbr_start_reconciler (xlator_t *this);
diff --git a/xlators/experimental/jbr-server/src/jbr.c b/xlators/experimental/jbr-server/src/jbr.c
new file mode 100644
index 00000000000..afdbc5d9f4a
--- /dev/null
+++ b/xlators/experimental/jbr-server/src/jbr.c
@@ -0,0 +1,1675 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <fnmatch.h>
+#include "call-stub.h"
+#include "defaults.h"
+#include "xlator.h"
+#include "glfs.h"
+#include "glfs-internal.h"
+#include "run.h"
+#include "common-utils.h"
+#include "syncop.h"
+#include "syscall.h"
+#include "compat-errno.h"
+
+#include "jbr-internal.h"
+#include "jbr-messages.h"
+
+#define JBR_FLUSH_INTERVAL 5
+
+enum {
+ /* echo "cluster/jbr-server" | md5sum | cut -c 1-8 */
+ JBR_SERVER_IPC_BASE = 0x0e2d66a5,
+ JBR_SERVER_TERM_RANGE,
+ JBR_SERVER_OPEN_TERM,
+ JBR_SERVER_NEXT_ENTRY
+};
+
+/*
+ * Need to declare jbr_lk_call_dispatch as jbr_lk_continue and *
+ * jbr_lk_perform_local_op call it, before code is generated. *
+ */
+int32_t
+jbr_lk_call_dispatch (call_frame_t *frame, xlator_t *this, int *op_errno,
+ fd_t *fd, int32_t cmd, struct gf_flock *lock,
+ dict_t *xdata);
+
+int32_t
+jbr_lk_dispatch (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t cmd, struct gf_flock *lock,
+ dict_t *xdata);
+
+/* Used to check the quorum of acks received after the fop
+ * confirming the status of the fop on all the brick processes
+ * for this particular subvolume
+ */
+gf_boolean_t
+fop_quorum_check (xlator_t *this, double n_children,
+ double current_state)
+{
+ jbr_private_t *priv = NULL;
+ gf_boolean_t result = _gf_false;
+ double required = 0;
+ double current = 0;
+
+ GF_VALIDATE_OR_GOTO ("jbr", this, out);
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, priv, out);
+
+ required = n_children * priv->quorum_pct;
+
+ /*
+ * Before performing the fop on the leader, we need to check,
+ * if there is any merit in performing the fop on the leader.
+ * In a case, where even a successful write on the leader, will
+ * not meet quorum, there is no point in trying the fop on the
+ * leader.
+ * When this function is called after the leader has tried
+ * performing the fop, this check will calculate quorum taking into
+ * account the status of the fop on the leader. If the leader's
+ * op_ret was -1, the complete function would account that by
+ * decrementing successful_acks by 1
+ */
+
+ current = current_state * 100.0;
+
+ if (current < required) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ J_MSG_QUORUM_NOT_MET,
+ "Quorum not met. quorum_pct = %f "
+ "Current State = %f, Required State = %f",
+ priv->quorum_pct, current,
+ required);
+ } else
+ result = _gf_true;
+
+out:
+ return result;
+}
+
+jbr_inode_ctx_t *
+jbr_get_inode_ctx (xlator_t *this, inode_t *inode)
+{
+ uint64_t ctx_int = 0LL;
+ jbr_inode_ctx_t *ctx_ptr;
+
+ if (__inode_ctx_get(inode, this, &ctx_int) == 0) {
+ ctx_ptr = (jbr_inode_ctx_t *)(long)ctx_int;
+ } else {
+ ctx_ptr = GF_CALLOC (1, sizeof(*ctx_ptr),
+ gf_mt_jbr_inode_ctx_t);
+ if (ctx_ptr) {
+ ctx_int = (uint64_t)(long)ctx_ptr;
+ if (__inode_ctx_set(inode, this, &ctx_int) == 0) {
+ LOCK_INIT(&ctx_ptr->lock);
+ INIT_LIST_HEAD(&ctx_ptr->aqueue);
+ INIT_LIST_HEAD(&ctx_ptr->pqueue);
+ } else {
+ GF_FREE(ctx_ptr);
+ ctx_ptr = NULL;
+ }
+ }
+
+ }
+
+ return ctx_ptr;
+}
+
+jbr_fd_ctx_t *
+jbr_get_fd_ctx (xlator_t *this, fd_t *fd)
+{
+ uint64_t ctx_int = 0LL;
+ jbr_fd_ctx_t *ctx_ptr;
+
+ if (__fd_ctx_get(fd, this, &ctx_int) == 0) {
+ ctx_ptr = (jbr_fd_ctx_t *)(long)ctx_int;
+ } else {
+ ctx_ptr = GF_CALLOC (1, sizeof(*ctx_ptr), gf_mt_jbr_fd_ctx_t);
+ if (ctx_ptr) {
+ if (__fd_ctx_set(fd, this, (uint64_t)ctx_ptr) == 0) {
+ INIT_LIST_HEAD(&ctx_ptr->dirty_list);
+ INIT_LIST_HEAD(&ctx_ptr->fd_list);
+ } else {
+ GF_FREE(ctx_ptr);
+ ctx_ptr = NULL;
+ }
+ }
+
+ }
+
+ return ctx_ptr;
+}
+
+void
+jbr_mark_fd_dirty (xlator_t *this, jbr_local_t *local)
+{
+ fd_t *fd = local->fd;
+ jbr_fd_ctx_t *ctx_ptr;
+ jbr_dirty_list_t *dirty;
+ jbr_private_t *priv = this->private;
+
+ /*
+ * TBD: don't do any of this for O_SYNC/O_DIRECT writes.
+ * Unfortunately, that optimization requires that we distinguish
+ * between writev and other "write" calls, saving the original flags
+ * and checking them in the callback. Too much work for too little
+ * gain right now.
+ */
+
+ LOCK(&fd->lock);
+ ctx_ptr = jbr_get_fd_ctx(this, fd);
+ dirty = GF_CALLOC(1, sizeof(*dirty), gf_mt_jbr_dirty_t);
+ if (ctx_ptr && dirty) {
+ gf_msg_trace (this->name, 0,
+ "marking fd %p as dirty (%p)", fd, dirty);
+ /* TBD: fill dirty->id from what changelog gave us */
+ list_add_tail(&dirty->links, &ctx_ptr->dirty_list);
+ if (list_empty(&ctx_ptr->fd_list)) {
+ /* Add a ref so _release doesn't get called. */
+ ctx_ptr->fd = fd_ref(fd);
+ LOCK(&priv->dirty_lock);
+ list_add_tail (&ctx_ptr->fd_list,
+ &priv->dirty_fds);
+ UNLOCK(&priv->dirty_lock);
+ }
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ J_MSG_MEM_ERR, "could not mark %p dirty", fd);
+ if (ctx_ptr) {
+ GF_FREE(ctx_ptr);
+ }
+ if (dirty) {
+ GF_FREE(dirty);
+ }
+ }
+ UNLOCK(&fd->lock);
+}
+
+#define JBR_TERM_XATTR "trusted.jbr.term"
+#define JBR_INDEX_XATTR "trusted.jbr.index"
+#define JBR_REP_COUNT_XATTR "trusted.jbr.rep-count"
+#define RECON_TERM_XATTR "trusted.jbr.recon-term"
+#define RECON_INDEX_XATTR "trusted.jbr.recon-index"
+
+int32_t
+jbr_leader_checks_and_init (call_frame_t *frame, xlator_t *this, int *op_errno,
+ dict_t *xdata, fd_t *fd)
+{
+ jbr_local_t *local = NULL;
+ jbr_private_t *priv = NULL;
+ int32_t ret = -1;
+ gf_boolean_t result = _gf_false;
+ int from_leader = _gf_false;
+ int from_recon = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("jbr", this, out);
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, priv, out);
+ GF_VALIDATE_OR_GOTO (this->name, op_errno, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+
+ /*
+ * Our first goal here is to avoid "split brain surprise" for users who
+ * specify exactly 50% with two- or three-way replication. That means
+ * either a more-than check against half the total replicas or an
+ * at-least check against half of our peers (one less). Of the two,
+ * only an at-least check supports the intuitive use of 100% to mean
+ * all replicas must be present, because "more than 100%" will never
+ * succeed regardless of which count we use. This leaves us with a
+ * slightly non-traditional definition of quorum ("at least X% of peers
+ * not including ourselves") but one that's useful enough to be worth
+ * it.
+ *
+ * Note that n_children and up_children *do* include the local
+ * subvolume, so we need to subtract one in each case.
+ */
+ if (priv->leader) {
+ result = fop_quorum_check (this, (double)(priv->n_children - 1),
+ (double)(priv->up_children - 1));
+
+ if (result == _gf_false) {
+ /* Emulate the AFR client-side-quorum behavior. */
+ gf_msg (this->name, GF_LOG_ERROR, EROFS,
+ J_MSG_QUORUM_NOT_MET, "Sufficient number of "
+ "subvolumes are not up to meet quorum.");
+ *op_errno = EROFS;
+ goto out;
+ }
+ } else {
+ if (xdata) {
+ from_leader = !!dict_get(xdata, JBR_TERM_XATTR);
+ from_recon = !!dict_get(xdata, RECON_TERM_XATTR)
+ && !!dict_get(xdata, RECON_INDEX_XATTR);
+ } else {
+ from_leader = from_recon = _gf_false;
+ }
+
+ /* follower/recon path *
+ * just send it to local node *
+ */
+ if (!from_leader && !from_recon) {
+ *op_errno = EREMOTE;
+ goto out;
+ }
+ }
+
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ goto out;
+ }
+
+ if (fd)
+ local->fd = fd_ref(fd);
+ else
+ local->fd = NULL;
+
+ INIT_LIST_HEAD(&local->qlinks);
+ frame->local = local;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+jbr_initialize_xdata_set_attrs (xlator_t *this, dict_t **xdata)
+{
+ jbr_local_t *local = NULL;
+ jbr_private_t *priv = NULL;
+ int32_t ret = -1;
+ uint32_t ti = 0;
+
+ GF_VALIDATE_OR_GOTO ("jbr", this, out);
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, priv, out);
+ GF_VALIDATE_OR_GOTO (this->name, xdata, out);
+
+ if (!*xdata) {
+ *xdata = dict_new();
+ if (!*xdata) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ J_MSG_MEM_ERR, "failed to allocate xdata");
+ goto out;
+ }
+ }
+
+ if (dict_set_int32(*xdata, JBR_TERM_XATTR, priv->current_term) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ J_MSG_DICT_FLR, "failed to set jbr-term");
+ goto out;
+ }
+
+ LOCK(&priv->index_lock);
+ ti = ++(priv->index);
+ UNLOCK(&priv->index_lock);
+ if (dict_set_int32(*xdata, JBR_INDEX_XATTR, ti) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ J_MSG_DICT_FLR, "failed to set index");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+jbr_remove_from_queue (call_frame_t *frame, xlator_t *this)
+{
+ int32_t ret = -1;
+ jbr_inode_ctx_t *ictx = NULL;
+ jbr_local_t *local = NULL;
+ jbr_local_t *next = NULL;
+
+ GF_VALIDATE_OR_GOTO ("jbr", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ local = frame->local;
+ GF_VALIDATE_OR_GOTO (this->name, local, out);
+
+ if (local->qlinks.next != &local->qlinks) {
+ list_del(&local->qlinks);
+ ictx = jbr_get_inode_ctx(this, local->fd->inode);
+ if (ictx) {
+ LOCK(&ictx->lock);
+ if (ictx->pending) {
+ /*
+ * TBD: dequeue *all* non-conflicting
+ * reqs
+ *
+ * With the stub implementation there
+ * can only be one request active at a
+ * time (zero here) so it's not an
+ * issue. In a real implementation
+ * there might still be other active
+ * requests to check against, and
+ * multiple pending requests that could
+ * continue.
+ */
+ gf_msg_debug (this->name, 0,
+ "unblocking next request");
+ --(ictx->pending);
+ next = list_entry (ictx->pqueue.next,
+ jbr_local_t, qlinks);
+ list_del(&next->qlinks);
+ list_add_tail(&next->qlinks,
+ &ictx->aqueue);
+ call_resume(next->qstub);
+ } else {
+ --(ictx->active);
+ }
+ UNLOCK(&ictx->lock);
+ }
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int32_t
+jbr_lk_complete (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct gf_flock *flock, dict_t *xdata)
+{
+ int32_t ret = -1;
+ jbr_private_t *priv = NULL;
+ jbr_local_t *local = NULL;
+ gf_boolean_t result = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("jbr", this, err);
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, priv, err);
+ GF_VALIDATE_OR_GOTO (this->name, frame, err);
+ local = frame->local;
+ GF_VALIDATE_OR_GOTO (this->name, local, err);
+ GF_VALIDATE_OR_GOTO (this->name, flock, err);
+ GF_VALIDATE_OR_GOTO (this->name, xdata, err);
+
+ /*
+ * Remove from queue for unlock operation only *
+ * For lock operation, it will be done in fan-in *
+ */
+ if (flock->l_type == F_UNLCK) {
+ ret = jbr_remove_from_queue (frame, this);
+ if (ret)
+ goto err;
+ }
+
+ /*
+ * On a follower, unwind with the op_ret and op_errno. On a *
+ * leader, if the fop is a locking fop, and its a failure, *
+ * send fail, else call stub which will dispatch the fop to *
+ * the followers. *
+ * *
+ * If the fop is a unlocking fop, check quorum. If quorum *
+ * is met, then send success. Else Rollback on leader, *
+ * followed by followers, and then send -ve ack to client. *
+ */
+ if (priv->leader) {
+
+ /* Increase the successful acks if it's a success. */
+ LOCK(&frame->lock);
+ if (op_ret != -1)
+ (local->successful_acks)++;
+ UNLOCK(&frame->lock);
+
+ if (flock->l_type == F_UNLCK) {
+ result = fop_quorum_check (this,
+ (double)priv->n_children,
+ (double)local->successful_acks);
+ if (result == _gf_false) {
+ op_ret = -1;
+ op_errno = EROFS;
+ gf_msg (this->name, GF_LOG_ERROR, EROFS,
+ J_MSG_QUORUM_NOT_MET,
+ "Quorum is not met. "
+ "The operation has failed.");
+
+ /* TODO: PERFORM UNLOCK ROLLBACK ON LEADER *
+ * FOLLOWED BY FOLLOWERS. */
+ } else {
+ op_ret = 0;
+ op_errno = 0;
+ }
+
+ fd_unref(local->fd);
+ STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno,
+ flock, xdata);
+ } else {
+ if (op_ret == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ J_MSG_LOCK_FAILURE,
+ "The lock operation failed on "
+ "the leader.");
+
+ fd_unref(local->fd);
+ STACK_UNWIND_STRICT (lk, frame, op_ret,
+ op_errno, flock, xdata);
+ } else {
+ if (!local->stub) {
+ goto err;
+ }
+
+ call_resume(local->stub);
+ }
+ }
+ } else {
+ fd_unref(local->fd);
+ STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno,
+ flock, xdata);
+ }
+
+ return 0;
+
+err:
+ if (local) {
+ if (local->stub) {
+ call_stub_destroy(local->stub);
+ }
+ if (local->qstub) {
+ call_stub_destroy(local->qstub);
+ }
+ if (local->fd) {
+ fd_unref(local->fd);
+ }
+ mem_put(local);
+ }
+ STACK_UNWIND_STRICT (lk, frame, -1, op_errno,
+ flock, xdata);
+ return 0;
+}
+
+int32_t
+jbr_lk_fan_in (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *flock,
+ dict_t *xdata)
+{
+ uint8_t call_count = -1;
+ int32_t ret = -1;
+ gf_boolean_t result = _gf_false;
+ jbr_local_t *local = NULL;
+ jbr_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("jbr", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ priv = this->private;
+ local = frame->local;
+ GF_VALIDATE_OR_GOTO (this->name, priv, out);
+ GF_VALIDATE_OR_GOTO (this->name, local, out);
+
+ gf_msg_trace (this->name, 0, "op_ret = %d, op_errno = %d\n",
+ op_ret, op_errno);
+
+ LOCK(&frame->lock);
+ call_count = --(local->call_count);
+ if (op_ret != -1) {
+ /* Increment the number of successful acks *
+ * received for the operation. *
+ */
+ (local->successful_acks)++;
+ local->successful_op_ret = op_ret;
+ }
+ gf_msg_debug (this->name, 0, "succ_acks = %d, op_ret = %d, op_errno = %d\n",
+ op_ret, op_errno, local->successful_acks);
+ UNLOCK(&frame->lock);
+
+ if (call_count == 0) {
+ /*
+ * If the fop is a locking fop, then check quorum. If quorum *
+ * is met, send successful ack to the client. If quorum is *
+ * not met, then rollback locking on followers, followed by *
+ * rollback of locking on leader, and then sending -ve ack *
+ * to the client. *
+ * *
+ * If the fop is a unlocking fop, then call stub. *
+ */
+ if (flock->l_type == F_UNLCK) {
+ call_resume(local->stub);
+ } else {
+ /*
+ * Remove from queue for locking fops, for unlocking *
+ * fops, it is taken care of in jbr_lk_complete *
+ */
+ ret = jbr_remove_from_queue (frame, this);
+ if (ret)
+ goto out;
+
+ fd_unref(local->fd);
+
+ result = fop_quorum_check (this,
+ (double)priv->n_children,
+ (double)local->successful_acks);
+ if (result == _gf_false) {
+ gf_msg (this->name, GF_LOG_ERROR, EROFS,
+ J_MSG_QUORUM_NOT_MET,
+ "Didn't receive enough acks to meet "
+ "quorum. Failing the locking "
+ "operation and initiating rollback on "
+ "followers and the leader "
+ "respectively.");
+
+ /* TODO: PERFORM ROLLBACK OF LOCKING ON
+ * FOLLOWERS, FOLLOWED BY ROLLBACK ON
+ * LEADER.
+ */
+
+ STACK_UNWIND_STRICT (lk, frame, -1, EROFS,
+ flock, xdata);
+ } else {
+ STACK_UNWIND_STRICT (lk, frame, 0, 0,
+ flock, xdata);
+ }
+ }
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/*
+ * Called from leader for locking fop, being writen as a separate *
+ * function so as to support queues. *
+ */
+int32_t
+jbr_perform_lk_on_leader (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t cmd, struct gf_flock *flock,
+ dict_t *xdata)
+{
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("jbr", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, flock, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ STACK_WIND (frame, jbr_lk_complete,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->lk,
+ fd, cmd, flock, xdata);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+jbr_lk_perform_local_op (call_frame_t *frame, xlator_t *this, int *op_errno,
+ fd_t *fd, int32_t cmd, struct gf_flock *flock,
+ dict_t *xdata)
+{
+ int32_t ret = -1;
+ jbr_local_t *local = NULL;
+
+ GF_VALIDATE_OR_GOTO ("jbr", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ local = frame->local;
+ GF_VALIDATE_OR_GOTO (this->name, local, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+ GF_VALIDATE_OR_GOTO (this->name, op_errno, out);
+ GF_VALIDATE_OR_GOTO (this->name, flock, out);
+
+ /*
+ * Check if the fop is a locking fop or unlocking fop, and
+ * handle it accordingly. If it is a locking fop, take the
+ * lock on leader first, and then send it to the followers.
+ * If it is a unlocking fop, unlock the followers first,
+ * and then on meeting quorum perform the unlock on the leader.
+ */
+ if (flock->l_type == F_UNLCK) {
+ ret = jbr_lk_call_dispatch (frame, this, op_errno,
+ fd, cmd, flock, xdata);
+ if (ret)
+ goto out;
+ } else {
+ jbr_inode_ctx_t *ictx = jbr_get_inode_ctx(this, fd->inode);
+
+ if (!ictx) {
+ *op_errno = EIO;
+ goto out;
+ }
+
+ LOCK(&ictx->lock);
+ if (ictx->active) {
+ gf_msg_debug (this->name, 0,
+ "queuing request due to conflict");
+
+ local->qstub = fop_lk_stub (frame,
+ jbr_perform_lk_on_leader,
+ fd, cmd, flock, xdata);
+ if (!local->qstub) {
+ UNLOCK(&ictx->lock);
+ goto out;
+ }
+ list_add_tail(&local->qlinks, &ictx->pqueue);
+ ++(ictx->pending);
+ UNLOCK(&ictx->lock);
+ ret = 0;
+ goto out;
+ } else {
+ list_add_tail(&local->qlinks, &ictx->aqueue);
+ ++(ictx->active);
+ }
+ UNLOCK(&ictx->lock);
+ ret = jbr_perform_lk_on_leader (frame, this, fd, cmd,
+ flock, xdata);
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+jbr_lk_continue (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
+{
+ int32_t ret = -1;
+ jbr_local_t *local = NULL;
+ jbr_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("jbr", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ priv = this->private;
+ local = frame->local;
+ GF_VALIDATE_OR_GOTO (this->name, priv, out);
+ GF_VALIDATE_OR_GOTO (this->name, local, out);
+ GF_VALIDATE_OR_GOTO (this->name, flock, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+ GF_VALIDATE_OR_GOTO (this->name, xdata, out);
+
+ /*
+ * If it's a locking fop, then call dispatch to followers *
+ * If it's a unlock fop, then perform the unlock operation *
+ */
+ if (flock->l_type == F_UNLCK) {
+ STACK_WIND (frame, jbr_lk_complete,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->lk,
+ fd, cmd, flock, xdata);
+ } else {
+ /*
+ * Directly call jbr_lk_dispatch instead of appending *
+ * in queue, which is done at jbr_lk_perform_local_op *
+ * for locking fops *
+ */
+ ret = jbr_lk_dispatch (frame, this, fd, cmd,
+ flock, xdata);
+ if (ret) {
+ STACK_UNWIND_STRICT (lk, frame, -1, 0,
+ flock, xdata);
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+#pragma generate
+
+uint8_t
+jbr_count_up_kids (jbr_private_t *priv)
+{
+ uint8_t retval = 0;
+ uint8_t i;
+
+ for (i = 0; i < priv->n_children; ++i) {
+ if (priv->kid_state & (1 << i)) {
+ ++retval;
+ }
+ }
+
+ return retval;
+}
+
+/*
+ * The fsync machinery looks a lot like that for any write call, but there are
+ * some important differences that are easy to miss. First, we don't care
+ * about the xdata that shows whether the call came from a leader or
+ * reconciliation process. If we're the leader we fan out; if we're not we
+ * don't. Second, we don't wait for followers before we issue the local call.
+ * The code generation system could be updated to handle this, and still might
+ * if we need to implement other "almost identical" paths (e.g. for open), but
+ * a copy is more readable as long as it's just one.
+ */
+
+int32_t
+jbr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ jbr_local_t *local = frame->local;
+ gf_boolean_t unwind;
+
+ LOCK(&frame->lock);
+ unwind = !--(local->call_count);
+ UNLOCK(&frame->lock);
+
+ if (unwind) {
+ STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+ }
+ return 0;
+}
+
+int32_t
+jbr_fsync_local_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ jbr_dirty_list_t *dirty;
+ jbr_dirty_list_t *dtmp;
+ jbr_local_t *local = frame->local;
+
+ list_for_each_entry_safe (dirty, dtmp, &local->qlinks, links) {
+ gf_msg_trace (this->name, 0,
+ "sending post-op on %p (%p)", local->fd, dirty);
+ GF_FREE(dirty);
+ }
+
+ return jbr_fsync_cbk (frame, cookie, this, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+}
+
+int32_t
+jbr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
+{
+ jbr_private_t *priv = this->private;
+ jbr_local_t *local;
+ uint64_t ctx_int = 0LL;
+ jbr_fd_ctx_t *ctx_ptr;
+ xlator_list_t *trav;
+
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ STACK_UNWIND_STRICT(fsync, frame, -1, ENOMEM,
+ NULL, NULL, xdata);
+ return 0;
+ }
+ INIT_LIST_HEAD(&local->qlinks);
+ frame->local = local;
+
+ /* Move the dirty list from the fd to the fsync request. */
+ LOCK(&fd->lock);
+ if (__fd_ctx_get(fd, this, &ctx_int) == 0) {
+ ctx_ptr = (jbr_fd_ctx_t *)(long)ctx_int;
+ list_splice_init (&ctx_ptr->dirty_list,
+ &local->qlinks);
+ }
+ UNLOCK(&fd->lock);
+
+ /* Issue the local call. */
+ local->call_count = priv->leader ? priv->n_children : 1;
+ STACK_WIND (frame, jbr_fsync_local_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync,
+ fd, flags, xdata);
+
+ /* Issue remote calls if we're the leader. */
+ if (priv->leader) {
+ for (trav = this->children->next; trav; trav = trav->next) {
+ STACK_WIND (frame, jbr_fsync_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync,
+ fd, flags, xdata);
+ }
+ }
+
+ return 0;
+}
+
+int32_t
+jbr_getxattr_special (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ dict_t *result;
+ jbr_private_t *priv = this->private;
+
+ if (!priv->leader) {
+ STACK_UNWIND_STRICT (getxattr, frame, -1, EREMOTE, NULL, NULL);
+ return 0;
+ }
+
+ if (!name || (strcmp(name, JBR_REP_COUNT_XATTR) != 0)) {
+ STACK_WIND_TAIL (frame,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr,
+ loc, name, xdata);
+ return 0;
+ }
+
+ result = dict_new();
+ if (!result) {
+ goto dn_failed;
+ }
+
+ priv->up_children = jbr_count_up_kids(this->private);
+ if (dict_set_uint32(result, JBR_REP_COUNT_XATTR,
+ priv->up_children) != 0) {
+ goto dsu_failed;
+ }
+
+ STACK_UNWIND_STRICT (getxattr, frame, 0, 0, result, NULL);
+ dict_destroy(result);
+ return 0;
+
+dsu_failed:
+ dict_destroy(result);
+dn_failed:
+ STACK_UNWIND_STRICT (getxattr, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+}
+
+void
+jbr_flush_fd (xlator_t *this, jbr_fd_ctx_t *fd_ctx)
+{
+ jbr_dirty_list_t *dirty;
+ jbr_dirty_list_t *dtmp;
+
+ list_for_each_entry_safe (dirty, dtmp, &fd_ctx->dirty_list, links) {
+ gf_msg_trace (this->name, 0,
+ "sending post-op on %p (%p)", fd_ctx->fd, dirty);
+ GF_FREE(dirty);
+ }
+
+ INIT_LIST_HEAD(&fd_ctx->dirty_list);
+}
+
+void *
+jbr_flush_thread (void *ctx)
+{
+ xlator_t *this = ctx;
+ jbr_private_t *priv = this->private;
+ struct list_head dirty_fds;
+ jbr_fd_ctx_t *fd_ctx;
+ jbr_fd_ctx_t *fd_tmp;
+ int ret;
+
+ for (;;) {
+ /*
+ * We have to be very careful to avoid lock inversions here, so
+ * we can't just hold priv->dirty_lock while we take and
+ * release locks for each fd. Instead, we only hold dirty_lock
+ * at the beginning of each iteration, as we (effectively) make
+ * a copy of the current list head and then clear the original.
+ * This leads to four scenarios for adding the first entry to
+ * an fd and potentially putting it on the global list.
+ *
+ * (1) While we're asleep. No lock contention, it just gets
+ * added and will be processed on the next iteration.
+ *
+ * (2) After we've made a local copy, but before we've started
+ * processing that fd. The new entry will be added to the
+ * fd (under its lock), and we'll process it on the current
+ * iteration.
+ *
+ * (3) While we're processing the fd. They'll block on the fd
+ * lock, then see that the list is empty and put it on the
+ * global list. We'll process it here on the next
+ * iteration.
+ *
+ * (4) While we're working, but after we've processed that fd.
+ * Same as (1) as far as that fd is concerned.
+ */
+ INIT_LIST_HEAD(&dirty_fds);
+ LOCK(&priv->dirty_lock);
+ list_splice_init(&priv->dirty_fds, &dirty_fds);
+ UNLOCK(&priv->dirty_lock);
+
+ list_for_each_entry_safe (fd_ctx, fd_tmp, &dirty_fds, fd_list) {
+ ret = syncop_fsync(FIRST_CHILD(this), fd_ctx->fd, 0,
+ NULL, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ J_MSG_SYS_CALL_FAILURE,
+ "failed to fsync %p (%d)",
+ fd_ctx->fd, -ret);
+ }
+
+ LOCK(&fd_ctx->fd->lock);
+ jbr_flush_fd(this, fd_ctx);
+ list_del_init(&fd_ctx->fd_list);
+ UNLOCK(&fd_ctx->fd->lock);
+ fd_unref(fd_ctx->fd);
+ }
+
+ sleep(JBR_FLUSH_INTERVAL);
+ }
+
+ return NULL;
+}
+
+
+int32_t
+jbr_get_changelog_dir (xlator_t *this, char **cl_dir_p)
+{
+ xlator_t *cl_xl;
+
+ /* Find our changelog translator. */
+ cl_xl = this;
+ while (cl_xl) {
+ if (strcmp(cl_xl->type, "features/changelog") == 0) {
+ break;
+ }
+ cl_xl = cl_xl->children->xlator;
+ }
+ if (!cl_xl) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ J_MSG_INIT_FAIL,
+ "failed to find changelog translator");
+ return ENOENT;
+ }
+
+ /* Find the actual changelog directory. */
+ if (dict_get_str(cl_xl->options, "changelog-dir", cl_dir_p) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ J_MSG_INIT_FAIL,
+ "failed to find changelog-dir for %s", cl_xl->name);
+ return ENODATA;
+ }
+
+ return 0;
+}
+
+
+void
+jbr_get_terms (call_frame_t *frame, xlator_t *this)
+{
+ int32_t op_errno = 0;
+ char *cl_dir = NULL;
+ int32_t term_first = -1;
+ int32_t term_contig = -1;
+ int32_t term_last = -1;
+ int term_num = 0;
+ char *probe_str = NULL;
+ dict_t *my_xdata = NULL;
+ DIR *fp = NULL;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {{0,},};
+
+ op_errno = jbr_get_changelog_dir(this, &cl_dir);
+ if (op_errno) {
+ goto err; /* Error was already logged. */
+ }
+ op_errno = ENODATA; /* Most common error after this. */
+
+ fp = sys_opendir (cl_dir);
+ if (!fp) {
+ op_errno = errno;
+ goto err;
+ }
+
+ /* Find first and last terms. */
+ for (;;) {
+ errno = 0;
+ entry = sys_readdir (fp, scratch);
+ if (!entry || errno != 0) {
+ if (errno != 0) {
+ op_errno = errno;
+ goto err;
+ }
+ break;
+ }
+
+ if (fnmatch("TERM.*", entry->d_name, FNM_PATHNAME) != 0) {
+ continue;
+ }
+ /* +5 points to the character after the period */
+ term_num = atoi(entry->d_name+5);
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ J_MSG_GENERIC,
+ "%s => %d", entry->d_name, term_num);
+ if (term_num < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ J_MSG_INVALID,
+ "invalid term file name %s", entry->d_name);
+ op_errno = EINVAL;
+ goto err;
+ }
+ if ((term_first < 0) || (term_first > term_num)) {
+ term_first = term_num;
+ }
+ if ((term_last < 0) || (term_last < term_num)) {
+ term_last = term_num;
+ }
+ }
+ if ((term_first < 0) || (term_last < 0)) {
+ /* TBD: are we *sure* there should always be at least one? */
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ J_MSG_NO_DATA, "no terms found");
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ (void) sys_closedir (fp);
+ fp = NULL;
+
+ /*
+ * Find term_contig, which is the earliest term for which there are
+ * no gaps between it and term_last.
+ */
+ for (term_contig = term_last; term_contig > 0; --term_contig) {
+ if (gf_asprintf(&probe_str, "%s/TERM.%d",
+ cl_dir, term_contig-1) <= 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ J_MSG_MEM_ERR,
+ "failed to format term %d", term_contig-1);
+ goto err;
+ }
+ if (sys_access(probe_str, F_OK) != 0) {
+ GF_FREE(probe_str);
+ break;
+ }
+ GF_FREE(probe_str);
+ }
+
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ J_MSG_GENERIC,
+ "found terms %d-%d (%d)",
+ term_first, term_last, term_contig);
+
+ /* Return what we've found */
+ my_xdata = dict_new();
+ if (!my_xdata) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ J_MSG_MEM_ERR,
+ "failed to allocate reply dictionary");
+ goto err;
+ }
+ if (dict_set_int32(my_xdata, "term-first", term_first) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ J_MSG_DICT_FLR,
+ "failed to set term-first");
+ goto err;
+ }
+ if (dict_set_int32(my_xdata, "term-contig", term_contig) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ J_MSG_DICT_FLR,
+ "failed to set term-contig");
+ goto err;
+ }
+ if (dict_set_int32(my_xdata, "term-last", term_last) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ J_MSG_DICT_FLR,
+ "failed to set term-last");
+ goto err;
+ }
+
+ /* Finally! */
+ STACK_UNWIND_STRICT (ipc, frame, 0, 0, my_xdata);
+ dict_unref(my_xdata);
+ return;
+
+err:
+ if (fp) {
+ (void) sys_closedir (fp);
+ }
+ if (my_xdata) {
+ dict_unref(my_xdata);
+ }
+ STACK_UNWIND_STRICT (ipc, frame, -1, op_errno, NULL);
+}
+
+
+long
+get_entry_count (xlator_t *this, int fd)
+{
+ struct stat buf;
+ long min; /* last entry not known to be empty */
+ long max; /* first entry known to be empty */
+ long curr;
+ char entry[CHANGELOG_ENTRY_SIZE];
+
+ if (sys_fstat (fd, &buf) < 0) {
+ return -1;
+ }
+
+ min = 0;
+ max = buf.st_size / CHANGELOG_ENTRY_SIZE;
+
+ while ((min+1) < max) {
+ curr = (min + max) / 2;
+ if (sys_lseek(fd, curr*CHANGELOG_ENTRY_SIZE, SEEK_SET) < 0) {
+ return -1;
+ }
+ if (sys_read(fd, entry, sizeof(entry)) != sizeof(entry)) {
+ return -1;
+ }
+ if ((entry[0] == '_') && (entry[1] == 'P')) {
+ min = curr;
+ } else {
+ max = curr;
+ }
+ }
+
+ if (sys_lseek(fd, 0, SEEK_SET) < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ J_MSG_SYS_CALL_FAILURE,
+ "failed to reset offset");
+ }
+ return max;
+}
+
+
+void
+jbr_open_term (call_frame_t *frame, xlator_t *this, dict_t *xdata)
+{
+ int32_t op_errno;
+ char *cl_dir;
+ char *term;
+ char *path;
+ jbr_private_t *priv = this->private;
+
+ op_errno = jbr_get_changelog_dir(this, &cl_dir);
+ if (op_errno) {
+ goto err;
+ }
+
+ if (dict_get_str(xdata, "term", &term) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ J_MSG_NO_DATA, "missing term");
+ op_errno = ENODATA;
+ goto err;
+ }
+
+ if (gf_asprintf(&path, "%s/TERM.%s", cl_dir, term) < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ J_MSG_MEM_ERR, "failed to construct path");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ if (priv->term_fd >= 0) {
+ sys_close (priv->term_fd);
+ }
+ priv->term_fd = open(path, O_RDONLY);
+ if (priv->term_fd < 0) {
+ op_errno = errno;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ J_MSG_SYS_CALL_FAILURE,
+ "failed to open term file");
+ goto err;
+ }
+
+ priv->term_total = get_entry_count(this, priv->term_fd);
+ if (priv->term_total < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ J_MSG_NO_DATA, "failed to get entry count");
+ sys_close (priv->term_fd);
+ priv->term_fd = -1;
+ op_errno = EIO;
+ goto err;
+ }
+ priv->term_read = 0;
+
+ /* Success! */
+ STACK_UNWIND_STRICT (ipc, frame, 0, 0, NULL);
+ GF_FREE (path);
+ return;
+
+err:
+ STACK_UNWIND_STRICT (ipc, frame, -1, op_errno, NULL);
+ GF_FREE (path);
+}
+
+
+void
+jbr_next_entry (call_frame_t *frame, xlator_t *this)
+{
+ int32_t op_errno = ENOMEM;
+ jbr_private_t *priv = this->private;
+ ssize_t nbytes;
+ dict_t *my_xdata;
+
+ if (priv->term_fd < 0) {
+ op_errno = EBADFD;
+ goto err;
+ }
+
+ if (priv->term_read >= priv->term_total) {
+ op_errno = ENODATA;
+ goto err;
+ }
+
+ nbytes = sys_read (priv->term_fd, priv->term_buf, CHANGELOG_ENTRY_SIZE);
+ if (nbytes < CHANGELOG_ENTRY_SIZE) {
+ if (nbytes < 0) {
+ op_errno = errno;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ J_MSG_SYS_CALL_FAILURE,
+ "error reading next entry: %s",
+ strerror(errno));
+ } else {
+ op_errno = EIO;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ J_MSG_SYS_CALL_FAILURE,
+ "got %zd/%d bytes for next entry",
+ nbytes, CHANGELOG_ENTRY_SIZE);
+ }
+ goto err;
+ }
+ ++(priv->term_read);
+
+ my_xdata = dict_new();
+ if (!my_xdata) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ J_MSG_MEM_ERR, "failed to allocate reply xdata");
+ goto err;
+ }
+
+ if (dict_set_static_bin(my_xdata, "data",
+ priv->term_buf, CHANGELOG_ENTRY_SIZE) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ J_MSG_DICT_FLR, "failed to assign reply xdata");
+ goto err;
+ }
+
+ STACK_UNWIND_STRICT (ipc, frame, 0, 0, my_xdata);
+ dict_unref(my_xdata);
+ return;
+
+err:
+ STACK_UNWIND_STRICT (ipc, frame, -1, op_errno, NULL);
+}
+
+
+int32_t
+jbr_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata)
+{
+ switch (op) {
+ case JBR_SERVER_TERM_RANGE:
+ jbr_get_terms(frame, this);
+ break;
+ case JBR_SERVER_OPEN_TERM:
+ jbr_open_term(frame, this, xdata);
+ break;
+ case JBR_SERVER_NEXT_ENTRY:
+ jbr_next_entry(frame, this);
+ break;
+ default:
+ STACK_WIND_TAIL (frame,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ipc,
+ op, xdata);
+ }
+
+ return 0;
+}
+
+
+int32_t
+jbr_forget (xlator_t *this, inode_t *inode)
+{
+ uint64_t ctx = 0LL;
+
+ if ((inode_ctx_del(inode, this, &ctx) == 0) && ctx) {
+ GF_FREE((void *)(long)ctx);
+ }
+
+ return 0;
+}
+
+int32_t
+jbr_release (xlator_t *this, fd_t *fd)
+{
+ uint64_t ctx = 0LL;
+
+ if ((fd_ctx_del(fd, this, &ctx) == 0) && ctx) {
+ GF_FREE((void *)(long)ctx);
+ }
+
+ return 0;
+}
+
+struct xlator_cbks cbks = {
+ .forget = jbr_forget,
+ .release = jbr_release,
+};
+
+int
+jbr_reconfigure (xlator_t *this, dict_t *options)
+{
+ jbr_private_t *priv = this->private;
+
+ GF_OPTION_RECONF ("leader",
+ priv->config_leader, options, bool, err);
+ GF_OPTION_RECONF ("quorum-percent",
+ priv->quorum_pct, options, percent, err);
+ gf_msg (this->name, GF_LOG_INFO, 0, J_MSG_GENERIC,
+ "reconfigure called, config_leader = %d, quorum_pct = %.1f\n",
+ priv->leader, priv->quorum_pct);
+
+ priv->leader = priv->config_leader;
+
+ return 0;
+
+err:
+ return -1;
+}
+
+int
+jbr_get_child_index (xlator_t *this, xlator_t *kid)
+{
+ xlator_list_t *trav;
+ int retval = -1;
+
+ for (trav = this->children; trav; trav = trav->next) {
+ ++retval;
+ if (trav->xlator == kid) {
+ return retval;
+ }
+ }
+
+ return -1;
+}
+
+/*
+ * Child notify handling is unreasonably FUBAR. Sometimes we'll get a
+ * CHILD_DOWN for a protocol/client child before we ever got a CHILD_UP for it.
+ * Other times we won't. Because it's effectively random (probably racy), we
+ * can't just maintain a count. We actually have to keep track of the state
+ * for each child separately, to filter out the bogus CHILD_DOWN events, and
+ * then generate counts on demand.
+ */
+int
+jbr_notify (xlator_t *this, int event, void *data, ...)
+{
+ jbr_private_t *priv = this->private;
+ int index = -1;
+ int ret = -1;
+ gf_boolean_t result = _gf_false;
+ gf_boolean_t relevant = _gf_false;
+
+ switch (event) {
+ case GF_EVENT_CHILD_UP:
+ index = jbr_get_child_index(this, data);
+ if (index >= 0) {
+ /* Check if the child was previously down
+ * and it's not a false CHILD_UP
+ */
+ if (!(priv->kid_state & (1 << index))) {
+ relevant = _gf_true;
+ }
+
+ priv->kid_state |= (1 << index);
+ priv->up_children = jbr_count_up_kids(priv);
+ gf_msg (this->name, GF_LOG_INFO, 0, J_MSG_GENERIC,
+ "got CHILD_UP for %s, now %u kids",
+ ((xlator_t *)data)->name,
+ priv->up_children);
+ if (!priv->config_leader && (priv->up_children > 1)) {
+ priv->leader = _gf_false;
+ }
+
+ /* If it's not relevant, or we have already *
+ * sent CHILD_UP just break */
+ if (!relevant || priv->child_up)
+ break;
+
+ /* If it's not a leader, just send the notify up */
+ if (!priv->leader) {
+ ret = default_notify(this, event, data);
+ if (!ret)
+ priv->child_up = _gf_true;
+ break;
+ }
+
+ result = fop_quorum_check (this,
+ (double)(priv->n_children - 1),
+ (double)(priv->up_children - 1));
+ if (result == _gf_false) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ J_MSG_GENERIC, "Not enough children "
+ "are up to meet quorum. Waiting to "
+ "send CHILD_UP from leader");
+ } else {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ J_MSG_GENERIC, "Enough children are up "
+ "to meet quorum. Sending CHILD_UP "
+ "from leader");
+ ret = default_notify(this, event, data);
+ if (!ret)
+ priv->child_up = _gf_true;
+ }
+ }
+ break;
+ case GF_EVENT_CHILD_DOWN:
+ index = jbr_get_child_index(this, data);
+ if (index >= 0) {
+ /* Check if the child was previously up
+ * and it's not a false CHILD_DOWN
+ */
+ if (priv->kid_state & (1 << index)) {
+ relevant = _gf_true;
+ }
+ priv->kid_state &= ~(1 << index);
+ priv->up_children = jbr_count_up_kids(priv);
+ gf_msg (this->name, GF_LOG_INFO, 0, J_MSG_GENERIC,
+ "got CHILD_DOWN for %s, now %u kids",
+ ((xlator_t *)data)->name,
+ priv->up_children);
+ if (!priv->config_leader && (priv->up_children < 2)
+ && relevant) {
+ priv->leader = _gf_true;
+ }
+
+ /* If it's not relevant, or we have already *
+ * sent CHILD_DOWN just break */
+ if (!relevant || !priv->child_up)
+ break;
+
+ /* If it's not a leader, just break coz we shouldn't *
+ * propagate the failure from the failure till it *
+ * itself goes down *
+ */
+ if (!priv->leader) {
+ break;
+ }
+
+ result = fop_quorum_check (this,
+ (double)(priv->n_children - 1),
+ (double)(priv->up_children - 1));
+ if (result == _gf_false) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ J_MSG_GENERIC, "Enough children are "
+ "to down to fail quorum. "
+ "Sending CHILD_DOWN from leader");
+ ret = default_notify(this, event, data);
+ if (!ret)
+ priv->child_up = _gf_false;
+ } else {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ J_MSG_GENERIC, "Not enough children "
+ "are down to fail quorum. Waiting to "
+ "send CHILD_DOWN from leader");
+ }
+ }
+ break;
+ default:
+ ret = default_notify(this, event, data);
+ }
+
+ return ret;
+}
+
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("jbr", this, out);
+
+ ret = xlator_mem_acct_init (this, gf_mt_jbr_end + 1);
+
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, J_MSG_MEM_ERR,
+ "Memory accounting init" "failed");
+ return ret;
+ }
+out:
+ return ret;
+}
+
+
+void
+jbr_deallocate_priv (jbr_private_t *priv)
+{
+ if (!priv) {
+ return;
+ }
+
+ GF_FREE(priv);
+}
+
+
+int32_t
+jbr_init (xlator_t *this)
+{
+ xlator_list_t *remote;
+ xlator_list_t *local;
+ jbr_private_t *priv = NULL;
+ xlator_list_t *trav;
+ pthread_t kid;
+ extern xlator_t global_xlator;
+ glusterfs_ctx_t *oldctx = global_xlator.ctx;
+
+ /*
+ * Any fop that gets special treatment has to be patched in here,
+ * because the compiled-in table is produced by the code generator and
+ * only contains generated functions. Note that we have to go through
+ * this->fops because of some dynamic-linking strangeness; modifying
+ * the static table doesn't work.
+ */
+ this->fops->getxattr = jbr_getxattr_special;
+ this->fops->fsync = jbr_fsync;
+ this->fops->ipc = jbr_ipc;
+
+ local = this->children;
+ if (!local) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, J_MSG_NO_DATA,
+ "no local subvolume");
+ goto err;
+ }
+
+ remote = local->next;
+ if (!remote) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, J_MSG_NO_DATA,
+ "no remote subvolumes");
+ goto err;
+ }
+
+ this->local_pool = mem_pool_new (jbr_local_t, 128);
+ if (!this->local_pool) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, J_MSG_MEM_ERR,
+ "failed to create jbr_local_t pool");
+ goto err;
+ }
+
+ priv = GF_CALLOC (1, sizeof(*priv), gf_mt_jbr_private_t);
+ if (!priv) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, J_MSG_MEM_ERR,
+ "could not allocate priv");
+ goto err;
+ }
+
+ for (trav = this->children; trav; trav = trav->next) {
+ ++(priv->n_children);
+ }
+
+ LOCK_INIT(&priv->dirty_lock);
+ LOCK_INIT(&priv->index_lock);
+ INIT_LIST_HEAD(&priv->dirty_fds);
+ priv->term_fd = -1;
+
+ this->private = priv;
+
+ GF_OPTION_INIT ("leader", priv->config_leader, bool, err);
+ GF_OPTION_INIT ("quorum-percent", priv->quorum_pct, percent, err);
+
+ priv->leader = priv->config_leader;
+ priv->child_up = _gf_false;
+
+ if (pthread_create(&kid, NULL, jbr_flush_thread,
+ this) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, J_MSG_SYS_CALL_FAILURE,
+ "could not start flush thread");
+ /* TBD: treat this as a fatal error? */
+ }
+
+ /*
+ * Calling glfs_new changes old->ctx, even if THIS still points
+ * to global_xlator. That causes problems later in the main
+ * thread, when gf_log_dump_graph tries to use the FILE after
+ * we've mucked with it and gets a segfault in __fprintf_chk.
+ * We can avoid all that by undoing the damage before we
+ * continue.
+ */
+ global_xlator.ctx = oldctx;
+
+ return 0;
+
+err:
+ jbr_deallocate_priv(priv);
+ return -1;
+}
+
+
+void
+jbr_fini (xlator_t *this)
+{
+ jbr_deallocate_priv(this->private);
+}
+
+class_methods_t class_methods = {
+ .init = jbr_init,
+ .fini = jbr_fini,
+ .reconfigure = jbr_reconfigure,
+ .notify = jbr_notify,
+};
+
+struct volume_options options[] = {
+ { .key = {"leader"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .description = "Start in the leader role. This is only for "
+ "bootstrapping the code, and should go away when we "
+ "have real leader election."
+ },
+ { .key = {"vol-name"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "volume name"
+ },
+ { .key = {"my-name"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "brick name in form of host:/path"
+ },
+ { .key = {"etcd-servers"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "list of comma separated etc servers"
+ },
+ { .key = {"subvol-uuid"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "UUID for this JBR (sub)volume"
+ },
+ { .key = {"quorum-percent"},
+ .type = GF_OPTION_TYPE_PERCENT,
+ .default_value = "50.0",
+ .description = "percentage of rep_count-1 that must be up"
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/experimental/posix2/Makefile.am b/xlators/experimental/posix2/Makefile.am
new file mode 100644
index 00000000000..74e5ab0f5bc
--- /dev/null
+++ b/xlators/experimental/posix2/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = common mds ds
+
+CLEANFILES =
diff --git a/xlators/experimental/posix2/README.md b/xlators/experimental/posix2/README.md
new file mode 100644
index 00000000000..955a98d061e
--- /dev/null
+++ b/xlators/experimental/posix2/README.md
@@ -0,0 +1,7 @@
+# POSIX2 Experimental README
+
+POSIX2 is an implementation of modified storage translator to cater to DHT2
+on disk needs.
+
+For further understanding, refer to xlators/experimental/dht2/README.md for
+details regarding POSIX2
diff --git a/xlators/experimental/posix2/TODO.md b/xlators/experimental/posix2/TODO.md
new file mode 100644
index 00000000000..20cd1e89c1d
--- /dev/null
+++ b/xlators/experimental/posix2/TODO.md
@@ -0,0 +1,3 @@
+# POSIX2 TODO List
+
+<Items will be added as code is pulled into the repository> \ No newline at end of file
diff --git a/xlators/experimental/posix2/common/Makefile.am b/xlators/experimental/posix2/common/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/experimental/posix2/common/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/experimental/posix2/common/src/Makefile.am b/xlators/experimental/posix2/common/src/Makefile.am
new file mode 100644
index 00000000000..465f2f2ba32
--- /dev/null
+++ b/xlators/experimental/posix2/common/src/Makefile.am
@@ -0,0 +1,13 @@
+lib_LTLIBRARIES = libposix2common.la
+
+posix2_common_sources = posix2-common.c
+
+libposix2common_la_SOURCES = $(posix2_common_sources)
+libposix2common_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+libposix2common_la_CFLAGS = -Wall $(GF_CFLAGS)
+
+libposix2common_la_CPPFLAGS = $(GF_CPPFLAGS)
+libposix2common_la_CPPFLAGS += -I$(top_srcdir)/libglusterfs/src
+
+CLEANFILES = \ No newline at end of file
diff --git a/xlators/experimental/posix2/common/src/posix2-common.c b/xlators/experimental/posix2/common/src/posix2-common.c
new file mode 100644
index 00000000000..14b51d538b2
--- /dev/null
+++ b/xlators/experimental/posix2/common/src/posix2-common.c
@@ -0,0 +1,18 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/* File: posix2-common.c
+ * This file contains common routines across ds and mds posix xlators
+ * The entire functionality including comments is TODO.
+ */
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "statedump.h"
diff --git a/xlators/experimental/posix2/ds/Makefile.am b/xlators/experimental/posix2/ds/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/experimental/posix2/ds/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/experimental/posix2/ds/src/Makefile.am b/xlators/experimental/posix2/ds/src/Makefile.am
new file mode 100644
index 00000000000..d29c5e135a2
--- /dev/null
+++ b/xlators/experimental/posix2/ds/src/Makefile.am
@@ -0,0 +1,18 @@
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/experimental
+xlator_LTLIBRARIES = posix2-ds.la
+
+posix2_ds_sources = posix2-ds-main.c
+
+posix2_ds_la_SOURCES = $(posix2_ds_sources)
+posix2_ds_la_LDFLAGS = -module -avoid-version
+posix2_ds_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+posix2_ds_la_LIBADD += $(top_builddir)/xlators/experimental/posix2/common/src/libposix2common.la
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+AM_CPPFLAGS = $(GF_CPPFLAGS)
+AM_CPPFLAGS += -I$(top_srcdir)/xlators/storage/posix2/common/src
+AM_CPPFLAGS += -I$(top_srcdir)/libglusterfs/src
+AM_CPPFLAGS += -I$(top_srcdir)/xlators/lib/src
+
+CLEANFILES = \ No newline at end of file
diff --git a/xlators/experimental/posix2/ds/src/posix2-ds-main.c b/xlators/experimental/posix2/ds/src/posix2-ds-main.c
new file mode 100644
index 00000000000..675c4d7c9da
--- /dev/null
+++ b/xlators/experimental/posix2/ds/src/posix2-ds-main.c
@@ -0,0 +1,59 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/* File: posix2-ds-main.c
+ * This file contains the xlator loading functions, FOP entry points
+ * and options.
+ * The entire functionality including comments is TODO.
+ */
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "logging.h"
+#include "statedump.h"
+
+int32_t
+posix2_ds_init (xlator_t *this)
+{
+ if (this->children) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "This (%s) is a leaf xlator, but found children",
+ this->name);
+ return -1;
+ }
+
+ return 0;
+}
+
+void
+posix2_ds_fini (xlator_t *this)
+{
+ return;
+}
+
+class_methods_t class_methods = {
+ .init = posix2_ds_init,
+ .fini = posix2_ds_fini,
+};
+
+struct xlator_fops fops = {
+};
+
+struct xlator_cbks cbks = {
+};
+
+/*
+struct xlator_dumpops dumpops = {
+};
+*/
+
+struct volume_options options[] = {
+ { .key = {NULL} },
+};
diff --git a/xlators/experimental/posix2/mds/Makefile.am b/xlators/experimental/posix2/mds/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/experimental/posix2/mds/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/experimental/posix2/mds/src/Makefile.am b/xlators/experimental/posix2/mds/src/Makefile.am
new file mode 100644
index 00000000000..ddd49ef0012
--- /dev/null
+++ b/xlators/experimental/posix2/mds/src/Makefile.am
@@ -0,0 +1,18 @@
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/experimental
+xlator_LTLIBRARIES = posix2-mds.la
+
+posix2_mds_sources = posix2-mds-main.c
+
+posix2_mds_la_SOURCES = $(posix2_mds_sources)
+posix2_mds_la_LDFLAGS = -module -avoid-version
+posix2_mds_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+posix2_mds_la_LIBADD += $(top_builddir)/xlators/experimental/posix2/common/src/libposix2common.la
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+AM_CPPFLAGS = $(GF_CPPFLAGS)
+AM_CPPFLAGS += -I$(top_srcdir)/xlators/storage/posix2/common/src
+AM_CPPFLAGS += -I$(top_srcdir)/libglusterfs/src
+AM_CPPFLAGS += -I$(top_srcdir)/xlators/lib/src
+
+CLEANFILES = \ No newline at end of file
diff --git a/xlators/experimental/posix2/mds/src/posix2-mds-main.c b/xlators/experimental/posix2/mds/src/posix2-mds-main.c
new file mode 100644
index 00000000000..71ff4e0089c
--- /dev/null
+++ b/xlators/experimental/posix2/mds/src/posix2-mds-main.c
@@ -0,0 +1,59 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/* File: posix2-mds-main.c
+ * This file contains the xlator loading functions, FOP entry points
+ * and options.
+ * The entire functionality including comments is TODO.
+ */
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "logging.h"
+#include "statedump.h"
+
+int32_t
+posix2_mds_init (xlator_t *this)
+{
+ if (this->children) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "This (%s) is a leaf xlator, but found children",
+ this->name);
+ return -1;
+ }
+
+ return 0;
+}
+
+void
+posix2_mds_fini (xlator_t *this)
+{
+ return;
+}
+
+class_methods_t class_methods = {
+ .init = posix2_mds_init,
+ .fini = posix2_mds_fini,
+};
+
+struct xlator_fops fops = {
+};
+
+struct xlator_cbks cbks = {
+};
+
+/*
+struct xlator_dumpops dumpops = {
+};
+*/
+
+struct volume_options options[] = {
+ { .key = {NULL} },
+};
diff --git a/xlators/features/Makefile.am b/xlators/features/Makefile.am
index 6496f3ab0c9..c63eb75a7c3 100644
--- a/xlators/features/Makefile.am
+++ b/xlators/features/Makefile.am
@@ -1,3 +1,6 @@
-SUBDIRS = locks trash quota read-only access-control mac-compat #path-converter # filter
+SUBDIRS = locks quota read-only mac-compat quiesce marker index barrier \
+ arbiter protect compress changelog changetimerecorder ganesha \
+ gfid-access $(GLUPY_SUBDIR) upcall snapview-client snapview-server \
+ trash shard bit-rot leases
-CLEANFILES =
+CLEANFILES =
diff --git a/xlators/features/access-control/src/Makefile.am b/xlators/features/access-control/src/Makefile.am
deleted file mode 100644
index 6ab8cc4ec4e..00000000000
--- a/xlators/features/access-control/src/Makefile.am
+++ /dev/null
@@ -1,13 +0,0 @@
-xlator_LTLIBRARIES = access-control.la
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
-access_control_la_LDFLAGS = -module -avoidversion
-access_control_la_SOURCES = access-control.c
-access_control_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-noinst_HEADERS = access-control.h
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)\
- -L$(xlatordir)/
-
-CLEANFILES =
diff --git a/xlators/features/access-control/src/access-control.c b/xlators/features/access-control/src/access-control.c
deleted file mode 100644
index 3735992435c..00000000000
--- a/xlators/features/access-control/src/access-control.c
+++ /dev/null
@@ -1,1841 +0,0 @@
-/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "access-control.h"
-#include "xlator.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "iatt.h"
-
-/* Careful, this function erases the stub from frame->local. Dont call this if
- * a subsequent callback requires retaining access to the stub. This should be
- * called at the end of all access-control related operations, i.e. once the
- * frame will be handed off to the actual fop and the next callback that will
- * be called is the default callback. IOW, the function where call_resume is
- * called.
- * NOTE: this is required because FRAME_DESTROY tries to free frame->local if
- * it finds it to be non-NULL.
- */
-call_stub_t *
-__get_frame_stub (call_frame_t *fr)
-{
- call_stub_t *st = NULL;
-
- if (!fr)
- return NULL;
-
- st = fr->local;
- fr->local = NULL;
-
- return st;
-}
-
-
-int
-ac_test_owner_access (struct iatt *ia, uid_t uid, int accesstest)
-{
- int ret = -1;
-
- if (!ia)
- return -1;
-
- /* First test permissions using the uid. */
- if (ia->ia_uid != uid) {
- ret = -1;
- goto out;
- }
-
- /* At this point we know, the uid matches that of the stat structure, so
- * if the caller does not care, we should return success.
- */
- if (ac_test_dontcare (accesstest)) {
- ret = 0;
- goto out;
- }
-
- if (ac_test_read (accesstest))
- ret = IA_PROT_RUSR (ia->ia_prot);
-
- if (ac_test_write (accesstest))
- ret = IA_PROT_WUSR (ia->ia_prot);
-
- if (ac_test_exec (accesstest))
- ret = IA_PROT_XUSR (ia->ia_prot);
-
- /* For failed access test for owner, we need to return EACCES */
- if (!ret)
- ret = -1;
- else
- ret = 0;
-out:
- return ret;
-}
-
-
-int
-ac_test_group_access (struct iatt *ia, gid_t gid, gid_t *auxgids, int auxcount,
- int accesstest)
-{
- int ret = -1;
- int testgid = -1;
- int x = 0;
-
- if (!ia)
- return -1;
- /* First, determine which gid to test against. This will be determined
- * by first checking which of the gids given to us match the gid in the
- * stat. If none match, then we go to checking with others as the user.
- */
-
- /* If we are only given the primary gid. Dont depend on @auxgids
- * being NULL since I know users of this function can pass statically
- * allocated arrays which cant be NULL and yet contain no valid gids.
- */
-
- if ((ia->ia_gid != gid) && (auxcount == 0)) {
- ret = -1;
- goto out;
- }
-
- if (ia->ia_gid == gid)
- testgid = gid;
- else {
- for (; x < auxcount; ++x) {
- if (ia->ia_gid == auxgids[x]) {
- testgid = ia->ia_gid;
- break;
- }
- }
- }
-
- /* None of the gids match with the gid in the stat. */
- if (testgid == -1) {
- ret = -1;
- goto out;
- }
-
- /* At this point, at least one gid matches that in the stat, now we must
- * check whether the caller is interested in the access check at all.
- */
- if (ac_test_dontcare (accesstest)) {
- ret = 0;
- goto out;
- }
-
- if (ac_test_read (accesstest))
- ret = IA_PROT_RGRP (ia->ia_prot);
-
- if (ac_test_write (accesstest))
- ret = IA_PROT_WGRP (ia->ia_prot);
-
- if (ac_test_exec (accesstest))
- ret = IA_PROT_XGRP (ia->ia_prot);
-
- if (!ret)
- ret = -1;
- else
- ret = 0;
-
-out:
- return ret;
-}
-
-
-int
-ac_test_other_access (struct iatt *ia, int accesstest)
-{
- int ret = 0;
-
- if (!ia)
- return -1;
-
- if (ac_test_read (accesstest))
- ret = IA_PROT_ROTH (ia->ia_prot);
-
- if (ac_test_write (accesstest))
- ret = IA_PROT_WOTH (ia->ia_prot);
-
- if (ac_test_exec (accesstest))
- ret = IA_PROT_XOTH (ia->ia_prot);
-
- if (!ret)
- ret = -1;
- else
- ret = 0;
-
- return ret;
-}
-
-
-/* Returns -1 on a failed access test with @operrno set to the relevant error
- * number.
- */
-int
-ac_test_access (struct iatt *ia, uid_t uid, gid_t gid, gid_t *auxgids,
- int auxcount, int accesstest, int testwho, int *operrno)
-{
- int ret = -1;
-
- if ((!ia) || (!operrno))
- return -1;
-
- if ((uid == 0) && (gid == 0)) {
- gf_log (ACTRL, GF_LOG_TRACE, "Root has access");
- return 0;
- }
-
- if (ac_test_owner (testwho)) {
- gf_log (ACTRL, GF_LOG_TRACE, "Testing owner access");
- ret = ac_test_owner_access (ia, uid, accesstest);
- }
-
- if (ret == 0) {
- gf_log (ACTRL, GF_LOG_TRACE, "Owner has access");
- goto out;
- }
-
- if (ac_test_group (testwho)) {
- gf_log (ACTRL, GF_LOG_TRACE, "Testing group access");
- ret = ac_test_group_access (ia, gid, auxgids, auxcount,
- accesstest);
- }
-
- if (ret == 0) {
- gf_log (ACTRL, GF_LOG_TRACE, "Group has access");
- goto out;
- }
-
- if (ac_test_other (testwho)) {
- gf_log (ACTRL, GF_LOG_TRACE, "Testing other access");
- ret = ac_test_other_access (ia, accesstest);
- }
-
- if (ret == 0)
- gf_log (ACTRL, GF_LOG_TRACE, "Other has access");
-out:
- if (ret == -1) {
- gf_log (ACTRL, GF_LOG_TRACE, "No access allowed");
- *operrno = EPERM;
- }
-
- return ret;
-}
-
-
-int
-ac_loc_fill (loc_t *loc, inode_t *inode, inode_t *parent, char *path)
-{
- int ret = -EFAULT;
-
- if (!loc)
- return ret;
-
- if (inode) {
- loc->inode = inode_ref (inode);
- loc->ino = inode->ino;
- }
-
- if (parent)
- loc->parent = inode_ref (parent);
-
- loc->path = gf_strdup (path);
- if (!loc->path) {
- gf_log (ACTRL, GF_LOG_ERROR, "strdup failed");
- goto loc_wipe;
- }
-
- loc->name = strrchr (loc->path, '/');
- if (loc->name)
- loc->name++;
- else
- goto loc_wipe;
-
- ret = 0;
-loc_wipe:
- if (ret < 0)
- loc_wipe (loc);
-
- return ret;
-}
-
-
-int
-ac_inode_loc_fill (inode_t *inode, loc_t *loc)
-{
- char *resolvedpath = NULL;
- inode_t *parent = NULL;
- int ret = -EFAULT;
-
- if ((!inode) || (!loc))
- return ret;
-
- if ((inode) && (inode->ino == 1))
- goto ignore_parent;
-
- parent = inode_parent (inode, 0, NULL);
- if (!parent)
- goto err;
-
-ignore_parent:
- ret = inode_path (inode, NULL, &resolvedpath);
- if (ret < 0)
- goto err;
-
- ret = ac_loc_fill (loc, inode, parent, resolvedpath);
- if (ret < 0)
- goto err;
-
-err:
- if (parent)
- inode_unref (parent);
-
- if (resolvedpath)
- GF_FREE (resolvedpath);
-
- return ret;
-}
-
-
-int
-ac_parent_loc_fill (loc_t *parentloc, loc_t *childloc)
-{
- if ((!parentloc) || (!childloc))
- return -1;
-
- return ac_inode_loc_fill (childloc->parent, parentloc);
-}
-
-
-int32_t
-ac_truncate_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- off_t offset)
-{
- STACK_WIND (frame, default_truncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate, loc, offset);
- return 0;
-}
-
-
-int32_t
-ac_truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
-{
- call_stub_t *stub = NULL;
-
- stub = __get_frame_stub (frame);
- if (op_ret == -1)
- goto out;
-
- op_ret = ac_test_access (buf, frame->root->uid, frame->root->gid,
- frame->root->groups, frame->root->ngrps,
- ACCTEST_WRITE, ACCTEST_ANY, &op_errno);
- if (op_ret == -1)
- goto out;
-
- call_resume (stub);
-out:
- if (op_ret < 0) {
- STACK_UNWIND_STRICT (truncate, frame, -1, op_errno, NULL, NULL);
- if (stub)
- call_stub_destroy (stub);
- }
-
- return 0;
-}
-
-
-int32_t
-ac_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
-{
- call_stub_t *stub = NULL;
- int ret = -EFAULT;
-
- stub = fop_truncate_stub (frame, ac_truncate_resume, loc, offset);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create call stub: "
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- frame->local = stub;
- STACK_WIND (frame, ac_truncate_stat_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->stat, loc);
-
- ret = 0;
-out:
- if (ret < 0)
- STACK_UNWIND_STRICT (truncate, frame, -1, -ret, NULL, NULL);
-
- return 0;
-}
-
-
-int32_t
-ac_access_resume (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)
-{
- STACK_WIND (frame, default_access_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->access, loc, mask);
- return 0;
-}
-
-
-int32_t
-ac_access_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
-{
- call_stub_t *stub = NULL;
- int32_t mask = 0;
- int acctest = 0;
-
- stub = __get_frame_stub (frame);
- mask = stub->args.access.mask;
-
- /* If mask requests test for file existence then do not
- * return a failure with ENOENT, instead return a failed
- * access test.
- */
- if (op_ret == -1) {
- if (mask & F_OK)
- op_errno = EACCES;
- else
- op_errno = errno;
-
- goto out;
- }
-
- if (R_OK & mask)
- acctest |= ACCTEST_READ;
- else if (W_OK & mask)
- acctest |= ACCTEST_WRITE;
- else if (X_OK & mask)
- acctest |= ACCTEST_EXEC;
- else
- acctest = 0;
-
- op_ret = ac_test_access (buf, frame->root->uid, frame->root->gid,
- frame->root->groups, frame->root->ngrps,
- acctest, ACCTEST_ANY, &op_errno);
- if (op_ret == -1)
- goto out;
-
- call_resume (stub);
-out:
- if (op_ret < 0) {
- STACK_UNWIND_STRICT (access, frame, -1, op_errno);
- if (stub)
- call_stub_destroy (stub);
- }
-
- return 0;
-}
-
-
-int32_t
-ac_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)
-{
- call_stub_t *stub = NULL;
- int ret = -EFAULT;
-
- stub = fop_access_stub (frame, ac_access_resume, loc, mask);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create call stub: "
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- frame->local = stub;
- STACK_WIND (frame, ac_access_stat_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->stat, loc);
- ret = 0;
-
-out:
- if (ret < 0)
- STACK_UNWIND_STRICT (access, frame, -1, -ret);
-
- return 0;
-}
-
-
-int32_t
-ac_readlink_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- size_t size)
-{
- STACK_WIND (frame, default_readlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readlink, loc, size);
- return 0;
-}
-
-
-int32_t
-ac_readlink_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
-{
- call_stub_t *stub = NULL;
-
- stub = __get_frame_stub (frame);
- if (op_ret == -1)
- goto out;
-
- op_ret = ac_test_access (buf, frame->root->uid, frame->root->gid,
- frame->root->groups, frame->root->ngrps,
- ACCTEST_READ, ACCTEST_ANY, &op_errno);
- if (op_ret == -1)
- goto out;
-
- call_resume (stub);
-out:
- if (op_ret < 0) {
- STACK_UNWIND_STRICT (readlink, frame, -1, op_errno, NULL, NULL);
- if (stub)
- call_stub_destroy (stub);
- }
-
- return 0;
-}
-
-
-int32_t
-ac_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size)
-{
- call_stub_t *stub = NULL;
- int ret = -EFAULT;
-
- stub = fop_readlink_stub (frame, ac_readlink_resume, loc, size);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create call stub: "
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- frame->local = stub;
- STACK_WIND (frame, ac_readlink_stat_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->stat, loc);
- ret = 0;
-
-out:
- if (ret < 0)
- STACK_UNWIND_STRICT (readlink, frame, -1, -ret, NULL, NULL);
-
- return 0;
-}
-
-
-int32_t
-ac_mknod_resume (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev)
-{
- STACK_WIND (frame, default_mknod_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mknod, loc, mode, rdev);
- return 0;
-}
-
-
-int32_t
-ac_mknod_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
-{
- call_stub_t *stub = NULL;
-
- stub = __get_frame_stub (frame);
- if (op_ret == -1)
- goto out;
-
- op_ret = ac_test_access (buf, frame->root->uid, frame->root->gid,
- frame->root->groups, frame->root->ngrps,
- ACCTEST_WRITE, ACCTEST_ANY, &op_errno);
- if (op_ret == -1) {
- op_errno = EACCES;
- goto out;
- }
-
- call_resume (stub);
-out:
- if (op_ret < 0) {
- STACK_UNWIND_STRICT (mknod, frame, -1, op_errno, NULL, NULL,
- NULL, NULL);
- if (stub)
- call_stub_destroy (stub);
- }
-
- return 0;
-}
-
-
-int32_t
-ac_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev)
-{
- call_stub_t *stub = NULL;
- int ret = -EFAULT;
- loc_t parentloc = {0, };
-
- stub = fop_mknod_stub (frame, ac_mknod_resume, loc, mode, rdev);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create call stub: "
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- frame->local = stub;
- ret = ac_parent_loc_fill (&parentloc, loc);
- if (ret < 0)
- goto out;
-
- STACK_WIND (frame, ac_mknod_stat_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->stat, &parentloc);
- loc_wipe (&parentloc);
- ret = 0;
-
-out:
- if (ret < 0) {
- /* Erase any stored frame before unwinding. */
- stub = __get_frame_stub (frame);
- if (stub)
- call_stub_destroy (stub);
- STACK_UNWIND_STRICT (mknod, frame, -1, -ret, NULL, NULL, NULL,
- NULL);
- }
-
- return 0;
-}
-
-
-int32_t
-ac_mkdir_resume (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode)
-{
- STACK_WIND (frame, default_mkdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mkdir, loc, mode);
- return 0;
-}
-
-
-int32_t
-ac_mkdir_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
-{
- call_stub_t *stub = NULL;
-
- stub = __get_frame_stub (frame);
- if (op_ret == -1)
- goto out;
-
- op_ret = ac_test_access (buf, frame->root->uid, frame->root->gid,
- frame->root->groups, frame->root->ngrps,
- ACCTEST_WRITE, ACCTEST_ANY, &op_errno);
- if (op_ret == -1) {
- /* On a failed write test on parent dir, we need to return
- * EACCES, not EPERM that is returned by default by
- * ac_test_access.
- */
- op_errno = EACCES;
- goto out;
- }
-
- call_resume (stub);
-out:
- if (op_ret < 0) {
- STACK_UNWIND_STRICT (mkdir, frame, -1, op_errno, NULL, NULL,
- NULL, NULL);
- if (stub)
- call_stub_destroy (stub);
- }
-
- return 0;
-}
-
-
-int32_t
-ac_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode)
-{
- call_stub_t *stub = NULL;
- int ret = -EFAULT;
- loc_t parentloc = {0, };
-
- stub = fop_mkdir_stub (frame, ac_mkdir_resume, loc, mode);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create call stub: "
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- frame->local = stub;
- ret = ac_parent_loc_fill (&parentloc, loc);
- if (ret < 0)
- goto out;
-
- STACK_WIND (frame, ac_mkdir_stat_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->stat, &parentloc);
- loc_wipe (&parentloc);
- ret = 0;
-
-out:
- if (ret < 0) {
- /* Erase the stored stub before unwinding. */
- stub = __get_frame_stub (frame);
- if (stub)
- call_stub_destroy (stub);
- STACK_UNWIND_STRICT (mkdir, frame, -1, -ret, NULL, NULL, NULL,
- NULL);
- }
-
- return 0;
-}
-
-
-int32_t
-ac_unlink_resume (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- STACK_WIND (frame, default_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, loc);
- return 0;
-}
-
-
-int32_t
-ac_unlink_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
-{
- call_stub_t *stub = NULL;
-
- stub = __get_frame_stub (frame);
- if (op_ret == -1)
- goto out;
-
- op_ret = ac_test_access (buf, frame->root->uid, frame->root->gid,
- frame->root->groups, frame->root->ngrps,
- ACCTEST_WRITE, ACCTEST_ANY, &op_errno);
- if (op_ret == -1) {
- op_errno = EACCES;
- goto out;
- }
-
- call_resume (stub);
-out:
- if (op_ret < 0) {
- STACK_UNWIND_STRICT (unlink, frame, -1, op_errno, NULL, NULL);
- if (stub)
- call_stub_destroy (stub);
- }
-
- return 0;
-}
-
-
-int32_t
-ac_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- call_stub_t *stub = NULL;
- int ret = -EFAULT;
- loc_t parentloc = {0, };
-
- stub = fop_unlink_stub (frame, ac_unlink_resume, loc);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create call stub: "
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- frame->local = stub;
- ret = ac_parent_loc_fill (&parentloc, loc);
- if (ret < 0)
- goto out;
-
- STACK_WIND (frame, ac_unlink_stat_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->stat, &parentloc);
- loc_wipe (&parentloc);
- ret = 0;
-
-out:
- if (ret < 0) {
- /* Erase the stored stub before unwinding. */
- stub = __get_frame_stub (frame);
- if (stub)
- call_stub_destroy (stub);
- STACK_UNWIND_STRICT (unlink, frame, -1, -ret, NULL, NULL);
- }
-
- return 0;
-}
-
-
-int32_t
-ac_rmdir_resume (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- STACK_WIND (frame, default_rmdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir, loc);
- return 0;
-}
-
-
-int32_t
-ac_rmdir_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
-{
- call_stub_t *stub = NULL;
-
- stub = __get_frame_stub (frame);
- if (op_ret == -1)
- goto out;
-
- op_ret = ac_test_access (buf, frame->root->uid, frame->root->gid,
- frame->root->groups, frame->root->ngrps,
- ACCTEST_WRITE, ACCTEST_ANY, &op_errno);
- if (op_ret == -1) {
- op_errno = EACCES;
- goto out;
- }
-
- call_resume (stub);
-out:
- if (op_ret < 0) {
- STACK_UNWIND_STRICT (rmdir, frame, -1, op_errno, NULL, NULL);
- if (stub)
- call_stub_destroy (stub);
- }
-
- return 0;
-}
-
-
-int32_t
-ac_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- call_stub_t *stub = NULL;
- int ret = -EFAULT;
- loc_t parentloc = {0, };
-
- stub = fop_rmdir_stub (frame, ac_rmdir_resume, loc);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create call stub: "
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- frame->local = stub;
- ret = ac_parent_loc_fill (&parentloc, loc);
- if (ret < 0)
- goto out;
-
- STACK_WIND (frame, ac_rmdir_stat_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->stat, &parentloc);
- loc_wipe (&parentloc);
- ret = 0;
-
-out:
- if (ret < 0) {
- /* Erase the stored stub before unwinding. */
- stub = __get_frame_stub (frame);
- if (stub)
- call_stub_destroy (stub);
- STACK_UNWIND_STRICT (rmdir, frame, -1, -ret, NULL, NULL);
- }
-
- return 0;
-}
-
-
-int32_t
-ac_symlink_resume (call_frame_t *frame, xlator_t *this, const char *linkname,
- loc_t *loc)
-{
- STACK_WIND (frame, default_symlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->symlink, linkname, loc);
- return 0;
-}
-
-
-int32_t
-ac_symlink_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
-{
- call_stub_t *stub = NULL;
-
- stub = __get_frame_stub (frame);
- if (op_ret == -1)
- goto out;
-
- op_ret = ac_test_access (buf, frame->root->uid, frame->root->gid,
- frame->root->groups, frame->root->ngrps,
- ACCTEST_WRITE, ACCTEST_ANY, &op_errno);
- if (op_ret == -1) {
- op_errno = EACCES;
- goto out;
- }
-
- call_resume (stub);
-out:
- if (op_ret < 0) {
- STACK_UNWIND_STRICT (symlink, frame, -1, op_errno, NULL, NULL,
- NULL, NULL);
- if (stub)
- call_stub_destroy (stub);
- }
-
- return 0;
-}
-
-
-int32_t
-ac_symlink (call_frame_t *frame, xlator_t *this, const char *linkname,
- loc_t *loc)
-{
- call_stub_t *stub = NULL;
- int ret = -EFAULT;
- loc_t parentloc = {0, };
-
- stub = fop_symlink_stub (frame, ac_symlink_resume, linkname, loc);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create call stub: "
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- frame->local = stub;
- ret = ac_parent_loc_fill (&parentloc, loc);
- if (ret < 0)
- goto out;
-
- STACK_WIND (frame, ac_symlink_stat_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->stat, &parentloc);
- loc_wipe (&parentloc);
- ret = 0;
-
-out:
- if (ret < 0) {
- /* Erase the stored stub before unwinding. */
- stub = __get_frame_stub (frame);
- if (stub)
- call_stub_destroy (stub);
- STACK_UNWIND_STRICT (symlink, frame, -1, -ret, NULL, NULL, NULL,
- NULL);
- }
-
- return 0;
-}
-
-
-int32_t
-ac_rename_resume (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc)
-{
- STACK_WIND (frame, default_rename_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename, oldloc, newloc);
- return 0;
-}
-
-
-int32_t
-ac_rename_dst_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
-{
- call_stub_t *stub = NULL;
-
- stub = __get_frame_stub (frame);
- if (op_ret == -1)
- goto out;
-
- op_ret = ac_test_access (buf, frame->root->uid,
- frame->root->gid, frame->root->groups,
- frame->root->ngrps, ACCTEST_WRITE,
- ACCTEST_ANY, &op_errno);
- if (op_ret == -1) {
- op_errno = EACCES;
- goto out;
- }
-
- call_resume (stub);
-out:
- if (op_ret < 0) {
- STACK_UNWIND_STRICT (rename, frame, -1, op_errno, NULL, NULL,
- NULL, NULL, NULL);
- if (stub)
- call_stub_destroy (stub);
- }
-
- return 0;
-}
-
-
-int32_t
-ac_rename_src_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
-{
- call_stub_t *stub = NULL;
- loc_t parentloc = {0, };
-
- stub = frame->local;
- if (op_ret == -1)
- goto out;
-
- op_ret = ac_test_access (buf, frame->root->uid,
- frame->root->gid, frame->root->groups,
- frame->root->ngrps, ACCTEST_WRITE,
- ACCTEST_ANY, &op_errno);
- if (op_ret == -1) {
- op_errno = EACCES;
- goto out;
- }
-
- op_ret = ac_parent_loc_fill (&parentloc, &stub->args.rename.new);
- if (op_ret == -1) {
- op_errno = -EFAULT;
- goto out;
- }
-
- STACK_WIND (frame, ac_rename_dst_stat_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->stat, &parentloc);
- loc_wipe (&parentloc);
-
-out:
- if (op_ret < 0) {
- /* Erase the stored stub before unwinding. */
- stub = __get_frame_stub (frame);
- if (stub)
- call_stub_destroy (stub);
- STACK_UNWIND_STRICT (rename, frame, -1, op_errno, NULL, NULL,
- NULL, NULL, NULL);
- }
-
- return 0;
-}
-
-
-int32_t
-ac_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
-{
- call_stub_t *stub = NULL;
- int ret = -EFAULT;
- loc_t parentloc = {0, };
-
- stub = fop_rename_stub (frame, ac_rename_resume, oldloc, newloc);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create call stub: "
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- frame->local = stub;
- ret = ac_parent_loc_fill (&parentloc, oldloc);
- if (ret < 0)
- goto out;
-
- STACK_WIND (frame, ac_rename_src_stat_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->stat, &parentloc);
- loc_wipe (&parentloc);
- ret = 0;
-
-out:
- if (ret < 0) {
- /* Erase the stored stub before unwinding. */
- stub = __get_frame_stub (frame);
- if (stub)
- call_stub_destroy (stub);
- STACK_UNWIND_STRICT (rename, frame, -1, -ret, NULL, NULL, NULL,
- NULL, NULL);
- }
-
- return 0;
-}
-
-
-int32_t
-ac_link_resume (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc)
-{
- STACK_WIND (frame, default_link_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link, oldloc, newloc);
- return 0;
-}
-
-
-int32_t
-ac_link_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
-{
- call_stub_t *stub = NULL;
-
- stub = __get_frame_stub (frame);
- if (op_ret == -1)
- goto out;
-
- op_ret = ac_test_access (buf, frame->root->uid, frame->root->gid,
- frame->root->groups, frame->root->ngrps,
- ACCTEST_WRITE, ACCTEST_ANY, &op_errno);
- if (op_ret == -1) {
- /* By default ac_test_access sets the op_errno to EPERM
- * but in the case of link, we need to return EACCES to meet
- * posix requirements when a write permission is not available
- * for the new directory.
- */
- op_errno = EACCES;
- goto out;
- }
-
- call_resume (stub);
-out:
- if (op_ret < 0) {
- STACK_UNWIND_STRICT (link, frame, -1, op_errno, NULL, NULL,
- NULL, NULL);
- if (stub)
- call_stub_destroy (stub);
- }
-
- return 0;
-}
-
-
-int32_t
-ac_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
-{
- call_stub_t *stub = NULL;
- int ret = -EFAULT;
- loc_t parentloc = {0, };
-
- stub = fop_link_stub (frame, ac_link_resume, oldloc, newloc);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create call stub: "
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- frame->local = stub;
- ret = ac_parent_loc_fill (&parentloc, newloc);
- if (ret < 0)
- goto out;
-
- STACK_WIND (frame, ac_link_stat_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->stat, &parentloc);
- loc_wipe (&parentloc);
- ret = 0;
-
-out:
- if (ret < 0) {
- /* Erase the stored stub before unwinding. */
- stub = __get_frame_stub (frame);
- if (stub)
- call_stub_destroy (stub);
- STACK_UNWIND_STRICT (link, frame, -1, -ret, NULL, NULL, NULL,
- NULL);
- }
-
- return 0;
-}
-
-
-int32_t
-ac_create_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, mode_t mode, fd_t *fd)
-{
- STACK_WIND (frame, default_create_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create, loc, flags, mode, fd);
- return 0;
-}
-
-
-int32_t
-ac_create_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
-{
- call_stub_t *stub = NULL;
-
- stub = __get_frame_stub (frame);
- if (op_ret == -1)
- goto out;
-
- op_ret = ac_test_access (buf, frame->root->uid, frame->root->gid,
- frame->root->groups, frame->root->ngrps,
- ACCTEST_WRITE, ACCTEST_ANY, &op_errno);
- if (op_ret == -1) {
- op_errno = EACCES;
- goto out;
- }
-
- call_resume (stub);
-out:
- if (op_ret < 0) {
- STACK_UNWIND_STRICT (create, frame, -1, op_errno, NULL, NULL,
- NULL, NULL, NULL);
- if (stub)
- call_stub_destroy (stub);
- }
-
- return 0;
-}
-
-
-int32_t
-ac_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, fd_t *fd)
-{
- call_stub_t *stub = NULL;
- int ret = -EFAULT;
- loc_t parentloc = {0, };
-
- stub = fop_create_stub (frame, ac_create_resume, loc, flags, mode, fd);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create call stub: "
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- frame->local = stub;
- ret = ac_parent_loc_fill (&parentloc, loc);
- if (ret < 0)
- goto out;
-
- STACK_WIND (frame, ac_create_stat_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->stat, &parentloc);
- loc_wipe (&parentloc);
- ret = 0;
-
-out:
- if (ret < 0) {
- /* Erase the stored stub before unwinding. */
- stub = __get_frame_stub (frame);
- if (stub)
- call_stub_destroy (stub);
- STACK_UNWIND_STRICT (create, frame, -1, -ret, NULL, NULL, NULL,
- NULL, NULL);
- }
-
- return 0;
-}
-
-
-int32_t
-ac_open_resume (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int32_t wbflags)
-{
- STACK_WIND (frame, default_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, loc, flags, fd, wbflags);
- return 0;
-}
-
-
-int32_t
-ac_open_create_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
-{
- call_stub_t *stub = NULL;
-
- stub = __get_frame_stub (frame);
- if (op_ret == -1)
- goto out;
-
- op_ret = ac_test_access (buf, frame->root->uid, frame->root->gid,
- frame->root->groups, frame->root->ngrps,
- ACCTEST_WRITE, ACCTEST_ANY, &op_errno);
- if (op_ret == -1) {
- op_errno = EACCES;
- goto out;
- }
-
- call_resume (stub);
-out:
- if (op_ret < 0) {
- STACK_UNWIND_STRICT (open, frame, -1, op_errno, NULL);
- if (stub)
- call_stub_destroy (stub);
- }
-
- return 0;
-}
-
-
-int
-ac_open_create (call_stub_t *stub)
-{
- int ret = -EFAULT;
- loc_t parentloc = {0, };
- xlator_t *this = NULL;
-
- if (!stub)
- return ret;
-
- ret = ac_parent_loc_fill (&parentloc, &stub->args.open.loc);
- if (ret < 0)
- goto out;
-
- this = stub->frame->this;
- STACK_WIND (stub->frame, ac_open_create_stat_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->stat, &parentloc);
- loc_wipe (&parentloc);
- ret = 0;
-
-out:
- return ret;
-}
-
-
-int32_t
-ac_open_only_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
-{
- call_stub_t *stub = NULL;
- int acctest = 0;
- int32_t flags = 0;
-
- stub = __get_frame_stub (frame);
- if (op_ret == -1)
- goto out;
-
- flags = stub->args.open.flags;
- /* The permissions we test for depend on how the open needs to be
- * performed. */
- if ((flags & O_ACCMODE) == O_RDONLY)
- acctest = ACCTEST_READ;
- else if (((flags & O_ACCMODE) == O_RDWR) ||
- ((flags & O_ACCMODE) == O_WRONLY))
- acctest = ACCTEST_WRITE;
-
- op_ret = ac_test_access (buf, frame->root->uid, frame->root->gid,
- frame->root->groups, frame->root->ngrps,
- acctest, ACCTEST_ANY, &op_errno);
- if (op_ret == -1)
- goto out;
-
- call_resume (stub);
-out:
- if (op_ret < 0) {
- STACK_UNWIND_STRICT (open, frame, -1, op_errno, NULL);
- if (stub)
- call_stub_destroy (stub);
- }
-
- return 0;
-}
-
-
-int
-ac_open_only (call_stub_t *stub)
-{
- int ret = -EFAULT;
- xlator_t *this = NULL;
-
- if (!stub)
- return ret;
-
- this = stub->frame->this;
- STACK_WIND (stub->frame, ac_open_only_stat_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->stat, &stub->args.open.loc);
- return 0;
-}
-
-int32_t
-ac_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int32_t wbflags)
-{
- call_stub_t *stub = NULL;
- int ret = -EFAULT;
-
- stub = fop_open_stub (frame, ac_open_resume, loc, flags, fd, wbflags);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create call stub: "
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- frame->local = stub;
- /* If we are not supposed to create the file then there is no need to
- * check the parent dir permissions. */
- if (!(flags & O_CREAT))
- ret = ac_open_create (stub);
- else
- ret = ac_open_only (stub);
-
-out:
- if (ret < 0) {
- /* Erase the stored stub before unwinding. */
- stub = __get_frame_stub (frame);
- if (stub)
- call_stub_destroy (stub);
- STACK_UNWIND_STRICT (open, frame, -1, -ret, NULL);
- }
-
- return 0;
-}
-
-
-int32_t
-ac_readv_resume (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
-{
- STACK_WIND (frame, default_readv_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->readv, fd, size, offset);
- return 0;
-}
-
-
-int32_t
-ac_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
-{
- call_stub_t *stub = NULL;
-
- stub = __get_frame_stub (frame);
- if (op_ret == -1)
- goto out;
-
- op_ret = ac_test_access (buf, frame->root->uid, frame->root->gid,
- frame->root->groups, frame->root->ngrps,
- ACCTEST_READ, ACCTEST_ANY, &op_errno);
- if (op_ret == -1) {
- op_errno = EACCES;
- goto out;
- }
-
- call_resume (stub);
-out:
- if (op_ret < 0) {
- STACK_UNWIND_STRICT (readv, frame, -1, op_errno, NULL, 0, NULL,
- NULL);
- if (stub)
- call_stub_destroy (stub);
- }
-
- return 0;
-}
-
-
-int32_t
-ac_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
-{
- call_stub_t *stub = NULL;
- int ret = -EFAULT;
-
- stub = fop_readv_stub (frame, ac_readv_resume, fd, size, offset);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create call stub: "
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- frame->local = stub;
- STACK_WIND (frame, ac_readv_fstat_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fstat, fd);
- ret = 0;
-
-out:
- if (ret < 0)
- STACK_UNWIND_STRICT (readv, frame, -1, -ret, NULL, 0, NULL,
- NULL);
-
- return 0;
-}
-
-
-int32_t
-ac_writev_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t offset,
- struct iobref *iobref)
-{
- STACK_WIND (frame, default_writev_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->writev, fd, vector, count, offset,
- iobref);
- return 0;
-}
-
-
-int32_t
-ac_writev_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
-{
- call_stub_t *stub = NULL;
-
- stub = __get_frame_stub (frame);
- if (op_ret == -1)
- goto out;
-
- op_ret = ac_test_access (buf, frame->root->uid, frame->root->gid,
- frame->root->groups, frame->root->ngrps,
- ACCTEST_WRITE, ACCTEST_ANY, &op_errno);
- if (op_ret == -1) {
- op_errno = EACCES;
- goto out;
- }
-
- call_resume (stub);
-out:
- if (op_ret < 0) {
- STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL);
- if (stub)
- call_stub_destroy (stub);
- }
-
- return 0;
-}
-
-
-int32_t
-ac_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
- int32_t count, off_t offset, struct iobref *iobref)
-{
- call_stub_t *stub = NULL;
- int ret = -EFAULT;
-
- stub = fop_writev_stub (frame, ac_writev_resume, fd, vector, count,
- offset, iobref);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create call stub: "
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- frame->local = stub;
- STACK_WIND (frame, ac_writev_fstat_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fstat, fd);
- ret = 0;
-
-out:
- if (ret < 0)
- STACK_UNWIND_STRICT (writev, frame, -1, -ret, NULL, NULL);
-
- return 0;
-}
-
-
-int32_t
-ac_opendir_resume (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
-{
- STACK_WIND (frame, default_opendir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->opendir, loc, fd);
- return 0;
-}
-
-
-int32_t
-ac_opendir_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
-{
- call_stub_t *stub = NULL;
-
- stub = __get_frame_stub (frame);
- if (op_ret == -1)
- goto out;
-
- op_ret = ac_test_access (buf, frame->root->uid, frame->root->gid,
- frame->root->groups, frame->root->ngrps,
- ACCTEST_READ, ACCTEST_ANY, &op_errno);
- if (op_ret == -1)
- goto out;
-
- call_resume (stub);
-out:
- if (op_ret < 0) {
- STACK_UNWIND_STRICT (opendir, frame, -1, op_errno, NULL);
- if (stub)
- call_stub_destroy (stub);
- }
-
- return 0;
-}
-
-int32_t
-ac_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
-{
- call_stub_t *stub = NULL;
- int ret = -EFAULT;
-
- stub = fop_opendir_stub (frame, ac_opendir_resume, loc, fd);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create call stub: "
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- frame->local = stub;
- STACK_WIND (frame, ac_opendir_stat_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->stat, loc);
- ret = 0;
-
-out:
- if (ret < 0)
- STACK_UNWIND_STRICT (opendir, frame, -1, -ret, NULL);
-
- return 0;
-}
-
-
-int32_t
-ac_setattr_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *buf, int32_t valid)
-{
- STACK_WIND (frame, default_setattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setattr, loc, buf, valid);
- return 0;
-}
-
-
-int32_t
-ac_setattr_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
-{
- call_stub_t *stub = NULL;
- int32_t valid = 0;
- struct iatt *setbuf = NULL;
-
- stub = __get_frame_stub (frame);
- if (op_ret == -1)
- goto out;
-
- op_ret = ac_test_access (buf, frame->root->uid, frame->root->gid,
- frame->root->groups, frame->root->ngrps,
- ACCTEST_DONTCARE, ACCTEST_OWNER,
- &op_errno);
- if (op_ret == -1)
- goto out;
-
- valid = stub->args.setattr.valid;
- setbuf = &stub->args.setattr.stbuf;
- if (gf_attr_uid_set (valid) || gf_attr_gid_set (valid)) {
- /* chown returns EPERM if the operation would change the
- * ownership, but the effective user ID is not the
- * super-user and the process is not an owner of the file.
- * Ref: posix-testsuite/chown/07.t
- */
- if ((frame->root->uid != 0) && (gf_attr_uid_set (valid))) {
- if (buf->ia_uid != setbuf->ia_uid) {
- op_ret = -1;
- op_errno = EPERM;
- goto out;
- }
- }
-
- /* non-super-user can modify file group if he is owner of a
- * file and gid he is setting is in his groups list.
- * Ref: posix-testsuite/chown/00.t
- */
- if ((frame->root->uid != 0) && (gf_attr_gid_set (valid))) {
- if (frame->root->uid != buf->ia_uid) {
- op_ret = -1;
- op_errno = EPERM;
- goto out;
- }
-
- op_ret = ac_test_access (setbuf, 0, frame->root->gid,
- frame->root->groups,
- frame->root->ngrps,
- ACCTEST_DONTCARE,
- ACCTEST_GROUP, &op_errno);
- if (op_ret == -1)
- goto out;
- }
- }
-
- call_resume (stub);
-out:
- if (op_ret < 0) {
- STACK_UNWIND_STRICT (setattr, frame, -1, op_errno, NULL, NULL);
- if (stub)
- call_stub_destroy (stub);
- }
-
- return 0;
-}
-
-int32_t
-ac_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *buf,
- int32_t valid)
-{
- call_stub_t *stub = NULL;
- int ret = -EFAULT;
-
- stub = fop_setattr_stub (frame, ac_setattr_resume, loc, buf, valid);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create call stub: "
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- frame->local = stub;
- STACK_WIND (frame, ac_setattr_stat_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->stat, loc);
- ret = 0;
-
-out:
- if (ret < 0)
- STACK_UNWIND_STRICT (setattr, frame, -1, -ret, NULL, NULL);
-
- return 0;
-}
-
-
-int32_t
-ac_fsetattr_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *buf, int32_t valid)
-{
- STACK_WIND (frame, default_fsetattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetattr, fd, buf, valid);
- return 0;
-}
-
-
-int32_t
-ac_fsetattr_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
-{
- call_stub_t *stub = NULL;
- int32_t valid = 0;
- struct iatt *setbuf = NULL;
-
- stub = __get_frame_stub (frame);
- if (op_ret == -1)
- goto out;
-
- op_ret = ac_test_access (buf, frame->root->uid, frame->root->gid,
- frame->root->groups, frame->root->ngrps,
- ACCTEST_DONTCARE, ACCTEST_OWNER,
- &op_errno);
- if (op_ret == -1)
- goto out;
-
- valid = stub->args.fsetattr.valid;
- setbuf = &stub->args.fsetattr.stbuf;
- if (gf_attr_uid_set (valid) && gf_attr_gid_set (valid)) {
- /* chown returns EPERM if the operation would change the
- * ownership, but the effective user ID is not the
- * super-user and the process is not an owner of the file.
- * Ref: posix-testsuite/chown/07.t
- */
- if ((frame->root->uid != 0) && (gf_attr_uid_set (valid))) {
- if (buf->ia_uid != setbuf->ia_uid) {
- op_ret = -1;
- op_errno = EPERM;
- goto out;
- }
- }
-
- /* non-super-user can modify file group if he is owner of a
- * file and gid he is setting is in his groups list.
- * Ref: posix-testsuite/chown/00.t
- */
- if ((frame->root->uid != 0) && (gf_attr_gid_set (valid))) {
- if (frame->root->uid != buf->ia_uid) {
- op_ret = -1;
- op_errno = EPERM;
- goto out;
- }
-
- op_ret = ac_test_access (buf, 0, frame->root->gid,
- frame->root->groups,
- frame->root->ngrps,
- ACCTEST_DONTCARE,
- ACCTEST_GROUP, &op_errno);
- if (op_ret == -1)
- goto out;
- }
- }
-
- call_resume (stub);
-out:
- if (op_ret < 0) {
- STACK_UNWIND_STRICT (fsetattr, frame, -1, op_errno, NULL, NULL);
- if (stub)
- call_stub_destroy (stub);
- }
-
- return 0;
-}
-
-
-int32_t
-ac_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *buf,
- int32_t valid)
-{
- call_stub_t *stub = NULL;
- int ret = -EFAULT;
-
- stub = fop_fsetattr_stub (frame, ac_fsetattr_resume, fd, buf, valid);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create call stub: "
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- frame->local = stub;
- STACK_WIND (frame, ac_fsetattr_fstat_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fstat, fd);
- ret = 0;
-
-out:
- if (ret < 0)
- STACK_UNWIND_STRICT (fsetattr, frame, -1, -ret, NULL, NULL);
-
- return 0;
-}
-
-
-struct xlator_fops fops = {
- .truncate = ac_truncate,
- .access = ac_access,
- .readlink = ac_readlink,
- .mknod = ac_mknod,
- .mkdir = ac_mkdir,
- .unlink = ac_unlink,
- .rmdir = ac_rmdir,
- .symlink = ac_symlink,
- .rename = ac_rename,
- .link = ac_link,
- .create = ac_create,
- .open = ac_open,
- .readv = ac_readv,
- .writev = ac_writev,
- .opendir = ac_opendir,
- .setattr = ac_setattr,
- .fsetattr = ac_fsetattr,
-};
-
-int
-init (xlator_t *this)
-{
- return 0;
-}
-
-void
-fini (xlator_t *this)
-{
- return;
-}
-
-struct xlator_cbks cbks = {
-};
diff --git a/xlators/features/access-control/src/access-control.h b/xlators/features/access-control/src/access-control.h
deleted file mode 100644
index bfc0d775270..00000000000
--- a/xlators/features/access-control/src/access-control.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef __ACCESS_CONTROL_H_
-#define __ACCESS_CONTROL_H_
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#define ACTRL "access-control"
-#define ACCTEST_READ 0x1
-#define ACCTEST_WRITE 0x2
-#define ACCTEST_EXEC 0x4
-#define ACCTEST_DONTCARE 0x8
-
-/* Note if the caller is only interested in ownership test i.e. one of the below
-+ * in combination with GF_ACCTEST_DONTCARE, then only one type of user's owner
-+ * ship can be tested with one call to gf_test_access, i.e. we can only
-+ * check of either owner and group, if both need to be tested for a specific
-+ * (uid, gid) pair then two calls will be needed.
-+ */
-#define ACCTEST_OWNER 0x1
-#define ACCTEST_GROUP 0x2
-#define ACCTEST_OTHER 0x4
-
-/* Signifies any user, as long as we get access. */
-#define ACCTEST_ANY (ACCTEST_OWNER | ACCTEST_GROUP | ACCTEST_OTHER)
-
-#define ac_test_owner(acc) ((acc) & ACCTEST_OWNER)
-#define ac_test_group(acc) ((acc) & ACCTEST_GROUP)
-#define ac_test_other(acc) ((acc) & ACCTEST_OTHER)
-#define ac_test_dontcare(acc) ((acc) & ACCTEST_DONTCARE)
-#define ac_test_read(acc) ((acc) & ACCTEST_READ)
-#define ac_test_write(acc) ((acc) & ACCTEST_WRITE)
-#define ac_test_exec(acc) ((acc) & ACCTEST_EXEC)
-#endif
diff --git a/xlators/features/arbiter/Makefile.am b/xlators/features/arbiter/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/features/arbiter/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/arbiter/src/Makefile.am b/xlators/features/arbiter/src/Makefile.am
new file mode 100644
index 00000000000..dd262c3d6dc
--- /dev/null
+++ b/xlators/features/arbiter/src/Makefile.am
@@ -0,0 +1,15 @@
+xlator_LTLIBRARIES = arbiter.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+arbiter_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+arbiter_la_SOURCES = arbiter.c
+arbiter_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = arbiter.h arbiter-mem-types.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/arbiter/src/arbiter-mem-types.h b/xlators/features/arbiter/src/arbiter-mem-types.h
new file mode 100644
index 00000000000..200b59de695
--- /dev/null
+++ b/xlators/features/arbiter/src/arbiter-mem-types.h
@@ -0,0 +1,19 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __ARBITER_MEM_TYPES_H__
+#define __ARBITER_MEM_TYPES_H__
+#include "mem-types.h"
+
+typedef enum gf_arbiter_mem_types_ {
+ gf_arbiter_mt_inode_ctx_t = gf_common_mt_end + 1,
+ gf_arbiter_mt_iatt,
+ gf_arbiter_mt_end
+} gf_arbiter_mem_types_t;
+#endif
diff --git a/xlators/features/arbiter/src/arbiter.c b/xlators/features/arbiter/src/arbiter.c
new file mode 100644
index 00000000000..786f60b7bc9
--- /dev/null
+++ b/xlators/features/arbiter/src/arbiter.c
@@ -0,0 +1,360 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "arbiter.h"
+#include "arbiter-mem-types.h"
+#include "glusterfs.h"
+#include "xlator.h"
+#include "logging.h"
+
+void
+arbiter_inode_ctx_destroy (arbiter_inode_ctx_t *ctx)
+{
+ if (!ctx)
+ return;
+ GF_FREE (ctx->iattbuf);
+ GF_FREE (ctx);
+}
+
+static arbiter_inode_ctx_t *
+__arbiter_inode_ctx_get (inode_t *inode, xlator_t *this)
+{
+
+ arbiter_inode_ctx_t *ctx = NULL;
+ int ret = 0;
+ uint64_t ctx_addr = 0;
+
+ ret = __inode_ctx_get (inode, this, &ctx_addr);
+ if (ret == 0) {
+ ctx = (arbiter_inode_ctx_t *) (long) ctx_addr;
+ goto out;
+ }
+
+ ctx = GF_CALLOC (1, sizeof (*ctx), gf_arbiter_mt_inode_ctx_t);
+ if (!ctx)
+ goto fail;
+ ctx->iattbuf = GF_CALLOC (1, sizeof (*ctx->iattbuf),
+ gf_arbiter_mt_iatt);
+ if (!ctx->iattbuf)
+ goto fail;
+ ret = __inode_ctx_put (inode, this, (uint64_t)ctx);
+ if (ret) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "failed to "
+ "set the inode ctx (%s)",
+ uuid_utoa (inode->gfid));
+ goto fail;
+ }
+out:
+ return ctx;
+fail:
+ arbiter_inode_ctx_destroy (ctx);
+ return NULL;
+}
+
+static arbiter_inode_ctx_t *
+arbiter_inode_ctx_get (inode_t *inode, xlator_t *this)
+{
+ arbiter_inode_ctx_t *ctx = NULL;
+
+ LOCK(&inode->lock);
+ {
+ ctx = __arbiter_inode_ctx_get (inode, this);
+ }
+ UNLOCK(&inode->lock);
+ return ctx;
+}
+
+int32_t
+arbiter_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ arbiter_inode_ctx_t *ctx = NULL;
+
+ if (op_ret != 0)
+ goto unwind;
+ ctx = arbiter_inode_ctx_get (inode, this);
+ if (!ctx) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ memcpy (ctx->iattbuf, buf, sizeof (*ctx->iattbuf));
+
+unwind:
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf,
+ xdata, postparent);
+ return 0;
+}
+
+int32_t
+arbiter_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ STACK_WIND (frame, arbiter_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ return 0;
+}
+
+int32_t
+arbiter_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (readv, frame, -1, ENOTCONN, NULL, 0, NULL, NULL,
+ NULL);
+ return 0;
+}
+
+int32_t
+arbiter_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ arbiter_inode_ctx_t *ctx = NULL;
+ struct iatt *buf = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+
+ ctx = arbiter_inode_ctx_get (loc->inode, this);
+ if (!ctx) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ buf = ctx->iattbuf;
+unwind:
+ STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, buf, buf, NULL);
+ return 0;
+}
+
+int32_t
+arbiter_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+
+{
+ arbiter_inode_ctx_t *ctx = NULL;
+ struct iatt *buf = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+
+ ctx = arbiter_inode_ctx_get (fd->inode, this);
+ if (!ctx) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ buf = ctx->iattbuf;
+unwind:
+ STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, buf, buf,
+ NULL);
+ return 0;
+}
+
+dict_t*
+arbiter_fill_writev_xdata (fd_t *fd, dict_t *xdata, xlator_t *this)
+{
+ dict_t *rsp_xdata = NULL;
+ int32_t ret = 0;
+ int is_append = 1;
+
+ if (!fd || !fd->inode || gf_uuid_is_null (fd->inode->gfid)) {
+ goto out;
+ }
+
+ if (!xdata)
+ goto out;
+
+ rsp_xdata = dict_new();
+ if (!rsp_xdata)
+ goto out;
+
+ if (dict_get (xdata, GLUSTERFS_OPEN_FD_COUNT)) {
+ ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_OPEN_FD_COUNT,
+ fd->inode->fd_count);
+ if (ret < 0) {
+ gf_msg_debug (this->name, 0, "Failed to set dict value"
+ " for GLUSTERFS_OPEN_FD_COUNT");
+ }
+ }
+ if (dict_get (xdata, GLUSTERFS_WRITE_IS_APPEND)) {
+ ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_WRITE_IS_APPEND,
+ is_append);
+ if (ret < 0) {
+ gf_msg_debug (this->name, 0, "Failed to set dict value"
+ " for GLUSTERFS_WRITE_IS_APPEND");
+ }
+ }
+out:
+ return rsp_xdata;
+}
+
+int32_t
+arbiter_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ arbiter_inode_ctx_t *ctx = NULL;
+ struct iatt *buf = NULL;
+ dict_t *rsp_xdata = NULL;
+ int op_ret = 0;
+ int op_errno = 0;
+
+ ctx = arbiter_inode_ctx_get (fd->inode, this);
+ if (!ctx) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ buf = ctx->iattbuf;
+ op_ret = iov_length (vector, count);
+ rsp_xdata = arbiter_fill_writev_xdata (fd, xdata, this);
+unwind:
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, buf, buf,
+ rsp_xdata);
+ if (rsp_xdata)
+ dict_unref (rsp_xdata);
+ return 0;
+}
+
+int32_t
+arbiter_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t keep_size, off_t offset, size_t len, dict_t *xdata)
+{
+ arbiter_inode_ctx_t *ctx = NULL;
+ struct iatt *buf = NULL;
+ int op_ret = 0;
+ int op_errno = 0;
+
+ ctx = arbiter_inode_ctx_get (fd->inode, this);
+ if (!ctx) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ buf = ctx->iattbuf;
+unwind:
+ STACK_UNWIND_STRICT(fallocate, frame, op_ret, op_errno, buf, buf, NULL);
+ return 0;
+}
+
+int32_t
+arbiter_discard (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ arbiter_inode_ctx_t *ctx = NULL;
+ struct iatt *buf = NULL;
+ int op_ret = 0;
+ int op_errno = 0;
+
+ ctx = arbiter_inode_ctx_get (fd->inode, this);
+ if (!ctx) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ buf = ctx->iattbuf;
+unwind:
+ STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, buf, buf, NULL);
+ return 0;
+}
+
+int32_t
+arbiter_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, off_t len, dict_t *xdata)
+{
+ arbiter_inode_ctx_t *ctx = NULL;
+ struct iatt *buf = NULL;
+ int op_ret = 0;
+ int op_errno = 0;
+
+ ctx = arbiter_inode_ctx_get (fd->inode, this);
+ if (!ctx) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ buf = ctx->iattbuf;
+unwind:
+ STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, buf, buf, NULL);
+ return 0;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ ret = xlator_mem_acct_init (this, gf_arbiter_mt_end + 1);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting "
+ "initialization failed.");
+ return ret;
+}
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+
+ return 0;
+}
+
+int
+arbiter_forget (xlator_t *this, inode_t *inode)
+{
+ arbiter_inode_ctx_t *ctx = NULL;
+ uint64_t ctx_addr = 0;
+
+ inode_ctx_del (inode, this, &ctx_addr);
+ if (!ctx_addr)
+ return 0;
+ ctx = (arbiter_inode_ctx_t *) (long) ctx_addr;
+ GF_FREE (ctx);
+ return 0;
+}
+
+int32_t
+init (xlator_t *this)
+{
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "'arbiter' not configured with exactly one child");
+ return -1;
+ }
+
+ if (!this->parents)
+ gf_log (this->name, GF_LOG_ERROR,
+ "dangling volume. check volfile ");
+
+ return 0;
+}
+
+void
+fini (xlator_t *this)
+{
+ return;
+}
+
+struct xlator_fops fops = {
+ .lookup = arbiter_lookup,
+ .readv = arbiter_readv,
+ .truncate = arbiter_truncate,
+ .writev = arbiter_writev,
+ .ftruncate = arbiter_ftruncate,
+ .fallocate = arbiter_fallocate,
+ .discard = arbiter_discard,
+ .zerofill = arbiter_zerofill,
+};
+
+struct xlator_cbks cbks = {
+ .forget = arbiter_forget,
+};
+
+struct volume_options options[] = {
+ { .key = {NULL} },
+};
diff --git a/xlators/features/arbiter/src/arbiter.h b/xlators/features/arbiter/src/arbiter.h
new file mode 100644
index 00000000000..6ccc3add3b3
--- /dev/null
+++ b/xlators/features/arbiter/src/arbiter.h
@@ -0,0 +1,21 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _ARBITER_H
+#define _ARBITER_H
+
+#include "locking.h"
+#include "common-utils.h"
+
+typedef struct arbiter_inode_ctx_ {
+ struct iatt *iattbuf;
+} arbiter_inode_ctx_t;
+
+#endif /* _ARBITER_H */
diff --git a/xlators/features/barrier/Makefile.am b/xlators/features/barrier/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/features/barrier/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/barrier/src/Makefile.am b/xlators/features/barrier/src/Makefile.am
new file mode 100644
index 00000000000..4e909c8aad8
--- /dev/null
+++ b/xlators/features/barrier/src/Makefile.am
@@ -0,0 +1,16 @@
+xlator_LTLIBRARIES = barrier.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+barrier_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+barrier_la_SOURCES = barrier.c
+
+barrier_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = barrier.h barrier-mem-types.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/barrier/src/barrier-mem-types.h b/xlators/features/barrier/src/barrier-mem-types.h
new file mode 100644
index 00000000000..36647a66966
--- /dev/null
+++ b/xlators/features/barrier/src/barrier-mem-types.h
@@ -0,0 +1,20 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __BARRIER_MEM_TYPES_H__
+#define __BARRIER_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_barrier_mem_types_ {
+ gf_barrier_mt_priv_t = gf_common_mt_end + 1,
+ gf_barrier_mt_end
+};
+#endif
diff --git a/xlators/features/barrier/src/barrier.c b/xlators/features/barrier/src/barrier.c
new file mode 100644
index 00000000000..ce3a255d93e
--- /dev/null
+++ b/xlators/features/barrier/src/barrier.c
@@ -0,0 +1,799 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "barrier.h"
+#include "defaults.h"
+#include "call-stub.h"
+
+#include "statedump.h"
+
+void
+barrier_local_set_gfid (call_frame_t *frame, uuid_t gfid, xlator_t *this)
+{
+ if (gfid) {
+ uuid_t *id = GF_MALLOC (sizeof (uuid_t), gf_common_mt_uuid_t);
+ if (!id) {
+ gf_log (this->name, GF_LOG_WARNING, "Could not set gfid"
+ ". gfid will not be dumped in statedump file.");
+ return;
+ }
+ gf_uuid_copy (*id, gfid);
+ frame->local = id;
+ }
+}
+
+void
+barrier_local_free_gfid (call_frame_t *frame)
+{
+ if (frame->local) {
+ GF_FREE (frame->local);
+ frame->local = NULL;
+ }
+}
+
+int32_t
+barrier_truncate_cbk_resume (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ barrier_local_free_gfid (frame);
+ STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+int32_t
+barrier_ftruncate_cbk_resume (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ barrier_local_free_gfid (frame);
+ STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+ return 0;
+}
+
+int32_t
+barrier_unlink_cbk_resume (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ barrier_local_free_gfid (frame);
+ STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int32_t
+barrier_rmdir_cbk_resume (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ barrier_local_free_gfid (frame);
+ STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int32_t
+barrier_rename_cbk_resume (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ barrier_local_free_gfid (frame);
+ STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf, preoldparent,
+ postoldparent, prenewparent, postnewparent, xdata);
+ return 0;
+}
+
+int32_t
+barrier_writev_cbk_resume (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ barrier_local_free_gfid (frame);
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+int32_t
+barrier_fsync_cbk_resume (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ barrier_local_free_gfid (frame);
+ STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+int32_t
+barrier_removexattr_cbk_resume (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ barrier_local_free_gfid (frame);
+ STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+barrier_fremovexattr_cbk_resume (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ barrier_local_free_gfid (frame);
+ STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+barrier_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
+{
+ BARRIER_FOP_CBK (writev, out, frame, this, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+out:
+ return 0;
+}
+
+int32_t
+barrier_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ BARRIER_FOP_CBK (fremovexattr, out, frame, this, op_ret, op_errno,
+ xdata);
+out:
+ return 0;
+}
+
+int32_t
+barrier_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ BARRIER_FOP_CBK (removexattr, out, frame, this, op_ret, op_errno,
+ xdata);
+out:
+ return 0;
+}
+
+int32_t
+barrier_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ BARRIER_FOP_CBK (truncate, out, frame, this, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+out:
+ return 0;
+}
+
+int32_t
+barrier_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ BARRIER_FOP_CBK (ftruncate, out, frame, this, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+out:
+ return 0;
+}
+
+int32_t
+barrier_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ BARRIER_FOP_CBK (rename, out, frame, this, op_ret, op_errno, buf,
+ preoldparent, postoldparent, prenewparent,
+ postnewparent, xdata);
+out:
+ return 0;
+}
+
+int32_t
+barrier_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ BARRIER_FOP_CBK (rmdir, out, frame, this, op_ret, op_errno, preparent,
+ postparent, xdata);
+out:
+ return 0;
+}
+
+int32_t
+barrier_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ BARRIER_FOP_CBK (unlink, out, frame, this, op_ret, op_errno, preparent,
+ postparent, xdata);
+out:
+ return 0;
+}
+
+int32_t
+barrier_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ BARRIER_FOP_CBK (fsync, out, frame, this, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+out:
+ return 0;
+}
+
+int32_t
+barrier_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ if (!((flags | fd->flags) & (O_SYNC | O_DSYNC))) {
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev,
+ fd, vector, count, off, flags, iobref, xdata);
+
+ return 0;
+ }
+
+ barrier_local_set_gfid (frame, fd->inode->gfid, this);
+ STACK_WIND (frame, barrier_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count,
+ off, flags, iobref, xdata);
+ return 0;
+}
+
+int32_t
+barrier_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ barrier_local_set_gfid (frame, fd->inode->gfid, this);
+ STACK_WIND (frame, barrier_fremovexattr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fremovexattr,
+ fd, name, xdata);
+ return 0;
+}
+
+int32_t
+barrier_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ barrier_local_set_gfid (frame, loc->inode->gfid, this);
+ STACK_WIND (frame, barrier_removexattr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->removexattr,
+ loc, name, xdata);
+ return 0;
+}
+
+int32_t
+barrier_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ off_t offset, dict_t *xdata)
+{
+ barrier_local_set_gfid (frame, loc->inode->gfid, this);
+ STACK_WIND (frame, barrier_truncate_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->truncate,
+ loc, offset, xdata);
+ return 0;
+}
+
+
+int32_t
+barrier_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ barrier_local_set_gfid (frame, oldloc->inode->gfid, this);
+ STACK_WIND (frame, barrier_rename_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->rename,
+ oldloc, newloc, xdata);
+ return 0;
+}
+
+int
+barrier_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
+{
+ barrier_local_set_gfid (frame, loc->inode->gfid, this);
+ STACK_WIND (frame, barrier_rmdir_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->rmdir,
+ loc, flags, xdata);
+ return 0;
+}
+
+int32_t
+barrier_unlink (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int xflag, dict_t *xdata)
+{
+ barrier_local_set_gfid (frame, loc->inode->gfid, this);
+ STACK_WIND (frame, barrier_unlink_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->unlink,
+ loc, xflag, xdata);
+ return 0;
+}
+
+int32_t
+barrier_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, dict_t *xdata)
+{
+ barrier_local_set_gfid (frame, fd->inode->gfid, this);
+ STACK_WIND (frame, barrier_ftruncate_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->ftruncate,
+ fd, offset, xdata);
+ return 0;
+}
+
+int32_t
+barrier_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t flags, dict_t *xdata)
+{
+ barrier_local_set_gfid (frame, fd->inode->gfid, this);
+ STACK_WIND (frame, barrier_fsync_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fsync,
+ fd, flags, xdata);
+ return 0;
+}
+
+call_stub_t *
+__barrier_dequeue (xlator_t *this, struct list_head *queue)
+{
+ call_stub_t *stub = NULL;
+ barrier_priv_t *priv = NULL;
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ if (list_empty (queue))
+ goto out;
+
+ stub = list_entry (queue->next, call_stub_t, list);
+ list_del_init (&stub->list);
+
+out:
+ return stub;
+}
+
+void
+barrier_dequeue_all (xlator_t *this, struct list_head *queue)
+{
+ call_stub_t *stub = NULL;
+
+ gf_log (this->name, GF_LOG_INFO, "Dequeuing all the barriered fops");
+
+ /* TODO: Start the below task in a new thread */
+ while ((stub = __barrier_dequeue (this, queue)))
+ call_resume (stub);
+
+ gf_log (this->name, GF_LOG_INFO, "Dequeuing the barriered fops is "
+ "finished");
+ return;
+}
+
+void
+barrier_timeout (void *data)
+{
+ xlator_t *this = NULL;
+ barrier_priv_t *priv = NULL;
+ struct list_head queue = {0,};
+
+ this = data;
+ THIS = this;
+ priv = this->private;
+
+ INIT_LIST_HEAD (&queue);
+
+ gf_log (this->name, GF_LOG_CRITICAL, "Disabling barrier because of "
+ "the barrier timeout.");
+
+ LOCK (&priv->lock);
+ {
+ __barrier_disable (this, &queue);
+ }
+ UNLOCK (&priv->lock);
+
+ barrier_dequeue_all (this, &queue);
+
+ return;
+}
+
+void
+__barrier_enqueue (xlator_t *this, call_stub_t *stub)
+{
+ barrier_priv_t *priv = NULL;
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ list_add_tail (&stub->list, &priv->queue);
+ priv->queue_size++;
+
+ return;
+}
+
+void
+__barrier_disable (xlator_t *this, struct list_head *queue)
+{
+ GF_UNUSED int ret = 0;
+ barrier_priv_t *priv = NULL;
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ if (priv->timer) {
+ ret = gf_timer_call_cancel (this->ctx, priv->timer);
+ priv->timer = NULL;
+ }
+
+ list_splice_init (&priv->queue, queue);
+ priv->queue_size = 0;
+ priv->barrier_enabled = _gf_false;
+}
+
+int
+__barrier_enable (xlator_t *this, barrier_priv_t *priv)
+{
+ int ret = -1;
+
+ priv->timer = gf_timer_call_after (this->ctx, priv->timeout,
+ barrier_timeout, (void *) this);
+ if (!priv->timer) {
+ gf_log (this->name, GF_LOG_CRITICAL, "Couldn't add barrier "
+ "timeout event.");
+ goto out;
+ }
+
+ priv->barrier_enabled = _gf_true;
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+notify (xlator_t *this, int event, void *data, ...)
+{
+ barrier_priv_t *priv = NULL;
+ dict_t *dict = NULL;
+ gf_boolean_t past = _gf_false;
+ int ret = -1;
+ int barrier_enabled = _gf_false;
+ struct list_head queue = {0,};
+
+ priv = this->private;
+ GF_ASSERT (priv);
+ INIT_LIST_HEAD (&queue);
+
+ switch (event) {
+ case GF_EVENT_TRANSLATOR_OP:
+ {
+ dict = data;
+ barrier_enabled = dict_get_str_boolean (dict, "barrier", -1);
+
+ if (barrier_enabled == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not fetch "
+ " barrier key from the dictionary.");
+ goto out;
+ }
+
+ LOCK (&priv->lock);
+ {
+ past = priv->barrier_enabled;
+
+ switch (past) {
+ case _gf_false:
+ if (barrier_enabled) {
+ ret = __barrier_enable (this,priv);
+ if (ret)
+ goto unlock;
+ } else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Already disabled.");
+ goto unlock;
+ }
+ break;
+
+ case _gf_true:
+ if (!barrier_enabled) {
+ __barrier_disable(this, &queue);
+ } else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Already enabled");
+ goto unlock;
+ }
+ break;
+ }
+ ret = 0;
+ }
+unlock:
+ UNLOCK (&priv->lock);
+
+ if (!list_empty (&queue))
+ barrier_dequeue_all (this, &queue);
+
+ break;
+ }
+ default:
+ {
+ default_notify (this, event, data);
+ ret = 0;
+ goto out;
+ }
+ }
+out:
+ return ret;
+}
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ barrier_priv_t *priv = NULL;
+ gf_boolean_t past = _gf_false;
+ int ret = -1;
+ gf_boolean_t barrier_enabled = _gf_false;
+ uint32_t timeout = {0,};
+ struct list_head queue = {0,};
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ GF_OPTION_RECONF ("barrier", barrier_enabled, options, bool, out);
+ GF_OPTION_RECONF ("barrier-timeout", timeout, options, time, out);
+
+ INIT_LIST_HEAD (&queue);
+
+ LOCK (&priv->lock);
+ {
+ past = priv->barrier_enabled;
+
+ switch (past) {
+ case _gf_false:
+ if (barrier_enabled) {
+ ret = __barrier_enable (this, priv);
+ if (ret) {
+ goto unlock;
+ }
+ }
+ break;
+
+ case _gf_true:
+ if (!barrier_enabled) {
+ __barrier_disable (this, &queue);
+
+ }
+ break;
+ }
+ priv->timeout.tv_sec = timeout;
+ ret = 0;
+ }
+unlock:
+ UNLOCK (&priv->lock);
+
+ if (!list_empty (&queue))
+ barrier_dequeue_all (this, &queue);
+
+out:
+ return ret;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ ret = xlator_mem_acct_init (this, gf_barrier_mt_end + 1);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting "
+ "initialization failed.");
+
+ return ret;
+}
+
+int
+init (xlator_t *this)
+{
+ int ret = -1;
+ barrier_priv_t *priv = NULL;
+ uint32_t timeout = {0,};
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "'barrier' not configured with exactly one child");
+ goto out;
+ }
+
+ if (!this->parents)
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+
+ priv = GF_CALLOC (1, sizeof (*priv), gf_barrier_mt_priv_t);
+ if (!priv)
+ goto out;
+
+ LOCK_INIT (&priv->lock);
+
+ GF_OPTION_INIT ("barrier", priv->barrier_enabled, bool, out);
+ GF_OPTION_INIT ("barrier-timeout", timeout, time, out);
+ priv->timeout.tv_sec = timeout;
+
+ INIT_LIST_HEAD (&priv->queue);
+
+ if (priv->barrier_enabled) {
+ ret = __barrier_enable (this, priv);
+ if (ret == -1)
+ goto out;
+ }
+
+ this->private = priv;
+ ret = 0;
+out:
+ return ret;
+}
+
+void
+fini (xlator_t *this)
+{
+ barrier_priv_t *priv = NULL;
+ struct list_head queue = {0,};
+
+ priv = this->private;
+ if (!priv)
+ goto out;
+
+ INIT_LIST_HEAD (&queue);
+
+ gf_log (this->name, GF_LOG_INFO, "Disabling barriering and dequeuing "
+ "all the queued fops");
+ LOCK (&priv->lock);
+ {
+ __barrier_disable (this, &queue);
+ }
+ UNLOCK (&priv->lock);
+
+ if (!list_empty (&queue))
+ barrier_dequeue_all (this, &queue);
+
+ this->private = NULL;
+
+ LOCK_DESTROY (&priv->lock);
+ GF_FREE (priv);
+out:
+ return;
+}
+
+static void
+barrier_dump_stub (call_stub_t *stub, char *prefix)
+{
+ char key[GF_DUMP_MAX_BUF_LEN] = {0,};
+
+ gf_proc_dump_build_key (key, prefix, "fop");
+ gf_proc_dump_write (key, "%s", gf_fop_list[stub->fop]);
+
+ if (stub->frame->local) {
+ gf_proc_dump_build_key (key, prefix, "gfid");
+ gf_proc_dump_write (key, "%s",
+ uuid_utoa (*(uuid_t*)(stub->frame->local)));
+ }
+ if (stub->args.loc.path) {
+ gf_proc_dump_build_key (key, prefix, "path");
+ gf_proc_dump_write (key, "%s", stub->args.loc.path);
+ }
+ if (stub->args.loc.name) {
+ gf_proc_dump_build_key (key, prefix, "name");
+ gf_proc_dump_write (key, "%s", stub->args.loc.name);
+ }
+
+ return;
+}
+
+static void
+__barrier_dump_queue (barrier_priv_t *priv)
+{
+ call_stub_t *stub = NULL;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0,};
+ int i = 0;
+
+ GF_VALIDATE_OR_GOTO ("barrier", priv, out);
+
+ list_for_each_entry (stub, &priv->queue, list) {
+ snprintf (key, sizeof (key), "stub.%d", i++);
+ gf_proc_dump_add_section (key);
+ barrier_dump_stub(stub, key);
+ }
+
+out:
+ return;
+}
+
+int
+barrier_dump_priv (xlator_t *this)
+{
+ int ret = -1;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0,};
+ barrier_priv_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("barrier", this, out);
+
+ priv = this->private;
+ if (!priv)
+ return 0;
+
+ gf_proc_dump_build_key (key, "xlator.features.barrier", "priv");
+ gf_proc_dump_add_section (key);
+
+ LOCK (&priv->lock);
+ {
+ gf_proc_dump_build_key (key, "barrier", "enabled");
+ gf_proc_dump_write (key, "%d", priv->barrier_enabled);
+ gf_proc_dump_build_key (key, "barrier", "timeout");
+ gf_proc_dump_write (key, "%"PRId64, priv->timeout.tv_sec);
+ if (priv->barrier_enabled) {
+ gf_proc_dump_build_key (key, "barrier", "queue_size");
+ gf_proc_dump_write (key, "%d", priv->queue_size);
+ __barrier_dump_queue (priv);
+ }
+ }
+ UNLOCK (&priv->lock);
+
+out:
+ return ret;
+}
+
+struct xlator_fops fops = {
+
+ /* Barrier Class fops */
+ .rmdir = barrier_rmdir,
+ .unlink = barrier_unlink,
+ .rename = barrier_rename,
+ .removexattr = barrier_removexattr,
+ .fremovexattr = barrier_fremovexattr,
+ .truncate = barrier_truncate,
+ .ftruncate = barrier_ftruncate,
+ .fsync = barrier_fsync,
+
+ /* Writes with only O_SYNC flag */
+ .writev = barrier_writev,
+};
+
+struct xlator_dumpops dumpops = {
+ .priv = barrier_dump_priv,
+};
+
+struct xlator_cbks cbks;
+
+struct volume_options options[] = {
+ { .key = {"barrier"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "When \"on\", blocks acknowledgements to application "
+ "for file operations such as rmdir, rename, unlink, "
+ "removexattr, fremovexattr, truncate, ftruncate, "
+ "write (with O_SYNC), fsync. It is turned \"off\" by "
+ "default."
+ },
+ { .key = {"barrier-timeout"},
+ .type = GF_OPTION_TYPE_TIME,
+ .default_value = BARRIER_TIMEOUT,
+ .description = "After 'timeout' seconds since the time 'barrier' "
+ "option was set to \"on\", acknowledgements to file "
+ "operations are no longer blocked and previously "
+ "blocked acknowledgements are sent to the application"
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/features/barrier/src/barrier.h b/xlators/features/barrier/src/barrier.h
new file mode 100644
index 00000000000..0d646f90474
--- /dev/null
+++ b/xlators/features/barrier/src/barrier.h
@@ -0,0 +1,82 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __BARRIER_H__
+#define __BARRIER_H__
+
+#include "barrier-mem-types.h"
+#include "xlator.h"
+#include "timer.h"
+#include "call-stub.h"
+
+#define BARRIER_FOP_CBK(fop_name, label, frame, this, params ...) \
+ do { \
+ barrier_priv_t *_priv = NULL; \
+ call_stub_t *_stub = NULL; \
+ gf_boolean_t _barrier_enabled= _gf_false; \
+ struct list_head queue = {0, }; \
+ \
+ INIT_LIST_HEAD (&queue); \
+ \
+ _priv = this->private; \
+ GF_ASSERT (_priv); \
+ \
+ LOCK (&_priv->lock); \
+ { \
+ if (_priv->barrier_enabled) { \
+ _barrier_enabled = _priv->barrier_enabled;\
+ \
+ _stub = fop_##fop_name##_cbk_stub \
+ (frame, \
+ barrier_##fop_name##_cbk_resume,\
+ params); \
+ if (!_stub) { \
+ __barrier_disable (this, &queue);\
+ goto unlock; \
+ } \
+ \
+ __barrier_enqueue (this, _stub); \
+ } \
+ } \
+unlock: \
+ UNLOCK (&_priv->lock); \
+ \
+ if (_stub) \
+ goto label; \
+ \
+ if (_barrier_enabled && !_stub) { \
+ gf_log (this->name, GF_LOG_CRITICAL, \
+ "Failed to barrier FOPs, disabling " \
+ "barrier. FOP: %s, ERROR: %s", \
+ #fop_name, strerror (ENOMEM)); \
+ barrier_dequeue_all (this, &queue); \
+ } \
+ barrier_local_free_gfid (frame); \
+ STACK_UNWIND_STRICT (fop_name, frame, params); \
+ goto label; \
+ } while (0)
+
+typedef struct {
+ gf_timer_t *timer;
+ gf_boolean_t barrier_enabled;
+ gf_lock_t lock;
+ struct list_head queue;
+ struct timespec timeout;
+ uint32_t queue_size;
+} barrier_priv_t;
+
+int __barrier_enable (xlator_t *this, barrier_priv_t *priv);
+void __barrier_enqueue (xlator_t *this, call_stub_t *stub);
+void __barrier_disable (xlator_t *this, struct list_head *queue);
+void barrier_timeout (void *data);
+void barrier_dequeue_all (xlator_t *this, struct list_head *queue);
+call_stub_t *__barrier_dequeue (xlator_t *this, struct list_head *queue);
+
+#endif
diff --git a/xlators/protocol/legacy/transport/ib-verbs/Makefile.am b/xlators/features/bit-rot/Makefile.am
index f963effea22..f963effea22 100644
--- a/xlators/protocol/legacy/transport/ib-verbs/Makefile.am
+++ b/xlators/features/bit-rot/Makefile.am
diff --git a/xlators/features/bit-rot/src/Makefile.am b/xlators/features/bit-rot/src/Makefile.am
new file mode 100644
index 00000000000..b5e4a7d62a0
--- /dev/null
+++ b/xlators/features/bit-rot/src/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = stub bitd
diff --git a/xlators/features/bit-rot/src/bitd/Makefile.am b/xlators/features/bit-rot/src/bitd/Makefile.am
new file mode 100644
index 00000000000..dfa29fd72d9
--- /dev/null
+++ b/xlators/features/bit-rot/src/bitd/Makefile.am
@@ -0,0 +1,22 @@
+xlator_LTLIBRARIES = bit-rot.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+bit_rot_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src/ \
+ -I$(top_srcdir)/rpc/rpc-lib/src \
+ -I$(CONTRIBDIR)/timer-wheel \
+ -I$(top_srcdir)/xlators/features/bit-rot/src/stub
+
+bit_rot_la_SOURCES = bit-rot.c bit-rot-scrub.c bit-rot-ssm.c \
+ bit-rot-scrub-status.c
+bit_rot_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(top_builddir)/xlators/features/changelog/lib/src/libgfchangelog.la
+
+noinst_HEADERS = bit-rot.h bit-rot-scrub.h bit-rot-bitd-messages.h bit-rot-ssm.h \
+ bit-rot-scrub-status.h
+
+AM_CFLAGS = -Wall -DBR_RATE_LIMIT_SIGNER $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h b/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h
new file mode 100644
index 00000000000..c6b6a4afa05
--- /dev/null
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h
@@ -0,0 +1,448 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
+
+#ifndef _BITROT_BITD_MESSAGES_H_
+#define _BITROT_BITD_MESSAGES_H_
+
+#include "glfs-message-id.h"
+
+/* file bit-rot-bitd-messages.h
+ * brief BIT-ROT log-message IDs and their descriptions
+ */
+
+/* NOTE: Rules for message additions
+ * 1) Each instance of a message is _better_ left with a unique message ID, even
+ * if the message format is the same. Reasoning is that, if the message
+ * format needs to change in one instance, the other instances are not
+ * impacted or the new change does not change the ID of the instance being
+ * modified.
+ * 2) Addition of a message,
+ * - Should increment the GLFS_NUM_MESSAGES
+ * - Append to the list of messages defined, towards the end
+ * - Retain macro naming as glfs_msg_X (for redability across developers)
+ * NOTE: Rules for message format modifications
+ * 3) Check acorss the code if the message ID macro in question is reused
+ * anywhere. If reused then then the modifications should ensure correctness
+ * everywhere, or needs a new message ID as (1) above was not adhered to. If
+ * not used anywhere, proceed with the required modification.
+ * NOTE: Rules for message deletion
+ * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used
+ * anywhere, then can be deleted, but will leave a hole by design, as
+ * addition rules specify modification to the end of the list and not filling
+ * holes.
+ */
+
+#define GLFS_BITROT_BITD_BASE GLFS_MSGID_COMP_BITROT_BITD
+#define GLFS_BITROT_BITD_NUM_MESSAGES 55
+#define GLFS_MSGID_END (GLFS_BITROT_BITD_BASE + \
+ GLFS_BITROT_BITD_NUM_MESSAGES + 1)
+/* Messaged with message IDs */
+#define glfs_msg_start_x GLFS_BITROT_BITD_BASE, "Invalid: Start of messages"
+/*------------*/
+
+
+#define BRB_MSG_FD_CREATE_FAILED (GLFS_BITROT_BITD_BASE + 1)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define BRB_MSG_READV_FAILED (GLFS_BITROT_BITD_BASE + 2)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define BRB_MSG_BLOCK_READ_FAILED (GLFS_BITROT_BITD_BASE + 3)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_CALC_CHECKSUM_FAILED (GLFS_BITROT_BITD_BASE + 4)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_NO_MEMORY (GLFS_BITROT_BITD_BASE + 5)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_GET_SIGN_FAILED (GLFS_BITROT_BITD_BASE + 6)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_SET_SIGN_FAILED (GLFS_BITROT_BITD_BASE + 7)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_OP_FAILED (GLFS_BITROT_BITD_BASE + 8)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_READ_AND_SIGN_FAILED (GLFS_BITROT_BITD_BASE + 9)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_SIGN_FAILED (GLFS_BITROT_BITD_BASE + 10)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_GET_SUBVOL_FAILED (GLFS_BITROT_BITD_BASE + 11)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_SET_TIMER_FAILED (GLFS_BITROT_BITD_BASE + 12)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_GET_INFO_FAILED (GLFS_BITROT_BITD_BASE + 13)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_PATH_FAILED (GLFS_BITROT_BITD_BASE + 14)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_MARK_BAD_FILE (GLFS_BITROT_BITD_BASE + 15)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_TRIGGER_SIGN (GLFS_BITROT_BITD_BASE + 16)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_REGISTER_FAILED (GLFS_BITROT_BITD_BASE + 17)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_CRAWLING_START (GLFS_BITROT_BITD_BASE + 18)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_SPAWN_FAILED (GLFS_BITROT_BITD_BASE + 19)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_INVALID_SUBVOL_CHILD (GLFS_BITROT_BITD_BASE + 20)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_SKIP_OBJECT (GLFS_BITROT_BITD_BASE + 21)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_NO_CHILD (GLFS_BITROT_BITD_BASE + 22)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_CHECKSUM_MISMATCH (GLFS_BITROT_BITD_BASE + 23)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_MARK_CORRUPTED (GLFS_BITROT_BITD_BASE + 24)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_CRAWLING_FINISH (GLFS_BITROT_BITD_BASE + 25)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_CALC_ERROR (GLFS_BITROT_BITD_BASE + 26)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_LOOKUP_FAILED (GLFS_BITROT_BITD_BASE + 27)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_PARTIAL_VERSION_PRESENCE (GLFS_BITROT_BITD_BASE + 28)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_MEM_ACNT_FAILED (GLFS_BITROT_BITD_BASE + 29)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_TIMER_WHEEL_UNAVAILABLE (GLFS_BITROT_BITD_BASE + 30)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_BITROT_LOADED (GLFS_BITROT_BITD_BASE + 31)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_SCALE_DOWN_FAILED (GLFS_BITROT_BITD_BASE + 32)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_SCALE_UP_FAILED (GLFS_BITROT_BITD_BASE + 33)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_SCALE_DOWN_SCRUBBER (GLFS_BITROT_BITD_BASE + 34)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_SCALING_UP_SCRUBBER (GLFS_BITROT_BITD_BASE + 35)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define BRB_MSG_UNKNOWN_THROTTLE (GLFS_BITROT_BITD_BASE + 36)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_RATE_LIMIT_INFO (GLFS_BITROT_BITD_BASE + 37)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_SCRUB_INFO (GLFS_BITROT_BITD_BASE + 38)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_CONNECTED_TO_BRICK (GLFS_BITROT_BITD_BASE + 39)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_BRICK_INFO (GLFS_BITROT_BITD_BASE + 40)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_SUBVOL_CONNECT_FAILED (GLFS_BITROT_BITD_BASE + 41)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_INVALID_SUBVOL (GLFS_BITROT_BITD_BASE + 42)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_RESCHEDULE_SCRUBBER_FAILED (GLFS_BITROT_BITD_BASE + 43)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define BRB_MSG_SCRUB_START (GLFS_BITROT_BITD_BASE + 44)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_SCRUB_FINISH (GLFS_BITROT_BITD_BASE + 45)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_SCRUB_RUNNING (GLFS_BITROT_BITD_BASE + 46)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_SCRUB_RESCHEDULED (GLFS_BITROT_BITD_BASE + 47)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRB_MSG_SCRUB_TUNABLE (GLFS_BITROT_BITD_BASE + 48)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+/*------------*/
+#define BRB_MSG_SCRUB_THREAD_CLEANUP (GLFS_BITROT_BITD_BASE + 49)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+/*------------*/
+#define BRB_MSG_SCRUBBER_CLEANED (GLFS_BITROT_BITD_BASE + 50)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+/*------------*/
+#define BRB_MSG_GENERIC_SSM_INFO (GLFS_BITROT_BITD_BASE + 51)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+/*------------*/
+#define BRB_MSG_ZERO_TIMEOUT_BUG (GLFS_BITROT_BITD_BASE + 52)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+/*------------*/
+#define BRB_MSG_BAD_OBJ_READDIR_FAIL (GLFS_BITROT_BITD_BASE + 53)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+/*------------*/
+#define BRB_MSG_SSM_FAILED (GLFS_BITROT_BITD_BASE + 54)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+/*------------*/
+#define BRB_MSG_SCRUB_WAIT_FAILED (GLFS_BITROT_BITD_BASE + 55)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+/*------------*/
+
+#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
+#endif /* !_BITROT_BITD_MESSAGES_H_ */
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c
new file mode 100644
index 00000000000..0afd7ea05b1
--- /dev/null
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c
@@ -0,0 +1,73 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <string.h>
+
+#include "bit-rot-scrub-status.h"
+
+void
+br_inc_unsigned_file_count (br_scrub_stats_t *scrub_stat)
+{
+ if (!scrub_stat)
+ return;
+
+ pthread_mutex_lock (&scrub_stat->lock);
+ {
+ scrub_stat->unsigned_files++;
+ }
+ pthread_mutex_unlock (&scrub_stat->lock);
+}
+
+void
+br_inc_scrubbed_file (br_scrub_stats_t *scrub_stat)
+{
+ if (!scrub_stat)
+ return;
+
+ pthread_mutex_lock (&scrub_stat->lock);
+ {
+ scrub_stat->scrubbed_files++;
+ }
+ pthread_mutex_unlock (&scrub_stat->lock);
+}
+
+void
+br_update_scrub_start_time (br_scrub_stats_t *scrub_stat, struct timeval *tv)
+{
+ if (!scrub_stat)
+ return;
+
+ pthread_mutex_lock (&scrub_stat->lock);
+ {
+ scrub_stat->scrub_start_tv.tv_sec = tv->tv_sec;
+ }
+ pthread_mutex_unlock (&scrub_stat->lock);
+}
+
+void
+br_update_scrub_finish_time (br_scrub_stats_t *scrub_stat, char *timestr,
+ struct timeval *tv)
+{
+ if (!scrub_stat)
+ return;
+
+ pthread_mutex_lock (&scrub_stat->lock);
+ {
+ scrub_stat->scrub_end_tv.tv_sec = tv->tv_sec;
+
+ scrub_stat->scrub_duration =
+ scrub_stat->scrub_end_tv.tv_sec -
+ scrub_stat->scrub_start_tv.tv_sec;
+
+ strncpy (scrub_stat->last_scrub_time, timestr,
+ sizeof (scrub_stat->last_scrub_time));
+ }
+ pthread_mutex_unlock (&scrub_stat->lock);
+}
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h
new file mode 100644
index 00000000000..02bd0fab04e
--- /dev/null
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h
@@ -0,0 +1,48 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __BIT_ROT_SCRUB_STATUS_H__
+#define __BIT_ROT_SCRUB_STATUS_H__
+
+#include <stdint.h>
+#include <sys/time.h>
+#include <pthread.h>
+
+struct br_scrub_stats {
+ uint64_t scrubbed_files; /* Total number of scrubbed file */
+
+ uint64_t unsigned_files; /* Total number of unsigned file */
+
+ uint64_t scrub_duration; /* Duration of last scrub */
+
+ char last_scrub_time[1024]; /*last scrub completion time */
+
+ struct timeval scrub_start_tv; /* Scrubbing starting time*/
+
+ struct timeval scrub_end_tv; /* Scrubbing finishing time */
+
+ int8_t scrub_running; /* Scrub running or not */
+
+ pthread_mutex_t lock;
+};
+
+typedef struct br_scrub_stats br_scrub_stats_t;
+
+void
+br_inc_unsigned_file_count (br_scrub_stats_t *scrub_stat);
+void
+br_inc_scrubbed_file (br_scrub_stats_t *scrub_stat);
+void
+br_update_scrub_start_time (br_scrub_stats_t *scrub_stat, struct timeval *tv);
+void
+br_update_scrub_finish_time (br_scrub_stats_t *scrub_stat, char *timestr,
+ struct timeval *tv);
+
+#endif /* __BIT_ROT_SCRUB_STATUS_H__ */
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
new file mode 100644
index 00000000000..cb04235cb03
--- /dev/null
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
@@ -0,0 +1,1984 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <math.h>
+#include <ctype.h>
+#include <sys/uio.h>
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "common-utils.h"
+
+#include "bit-rot-scrub.h"
+#include <pthread.h>
+#include "bit-rot-bitd-messages.h"
+#include "bit-rot-scrub-status.h"
+
+struct br_scrubbers {
+ pthread_t scrubthread;
+
+ struct list_head list;
+};
+
+struct br_fsscan_entry {
+ void *data;
+
+ loc_t parent;
+
+ gf_dirent_t *entry;
+
+ struct br_scanfs *fsscan; /* backpointer to subvolume scanner */
+
+ struct list_head list;
+};
+
+/**
+ * fetch signature extended attribute from an object's fd.
+ * NOTE: On success @xattr is not unref'd as @sign points
+ * to the dictionary value.
+ */
+static int32_t
+bitd_fetch_signature (xlator_t *this, br_child_t *child,
+ fd_t *fd, dict_t **xattr, br_isignature_out_t **sign)
+{
+ int32_t ret = -1;
+
+ ret = syncop_fgetxattr (child->xl, fd, xattr,
+ GLUSTERFS_GET_OBJECT_SIGNATURE, NULL, NULL);
+ if (ret < 0) {
+ br_log_object (this, "fgetxattr", fd->inode->gfid, -ret);
+ goto out;
+ }
+
+ ret = dict_get_ptr
+ (*xattr, GLUSTERFS_GET_OBJECT_SIGNATURE, (void **) sign);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SIGN_FAILED,
+ "failed to extract signature info [GFID: %s]",
+ uuid_utoa (fd->inode->gfid));
+ goto unref_dict;
+ }
+
+ return 0;
+
+ unref_dict:
+ dict_unref (*xattr);
+ out:
+ return -1;
+
+}
+
+/**
+ * POST COMPUTE CHECK
+ *
+ * Checks to be performed before verifying calculated signature
+ * Object is skipped if:
+ * - has stale signature
+ * - mismatches versions caches in pre-compute check
+ */
+
+int32_t
+bitd_scrub_post_compute_check (xlator_t *this,
+ br_child_t *child,
+ fd_t *fd, unsigned long version,
+ br_isignature_out_t **signature,
+ br_scrub_stats_t *scrub_stat,
+ gf_boolean_t skip_stat)
+{
+ int32_t ret = 0;
+ size_t signlen = 0;
+ dict_t *xattr = NULL;
+ br_isignature_out_t *signptr = NULL;
+
+ ret = bitd_fetch_signature (this, child, fd, &xattr, &signptr);
+ if (ret < 0) {
+ if (!skip_stat)
+ br_inc_unsigned_file_count (scrub_stat);
+ goto out;
+ }
+
+ /**
+ * Either the object got dirtied during the time the signature was
+ * calculated OR the version we saved during pre-compute check does
+ * not match now, implying that the object got dirtied and signed in
+ * between scrubs pre & post compute checks (checksum window).
+ *
+ * The log entry looks pretty ugly, but helps in debugging..
+ */
+ if (signptr->stale || (signptr->version != version)) {
+ if (!skip_stat)
+ br_inc_unsigned_file_count (scrub_stat);
+ gf_msg_debug (this->name, 0, "<STAGE: POST> Object [GFID: %s] "
+ "either has a stale signature OR underwent "
+ "signing during checksumming {Stale: %d | "
+ "Version: %lu,%lu}", uuid_utoa (fd->inode->gfid),
+ (signptr->stale) ? 1 : 0, version,
+ signptr->version);
+ ret = -1;
+ goto unref_dict;
+ }
+
+ signlen = signptr->signaturelen;
+ *signature = GF_CALLOC (1, sizeof (br_isignature_out_t) + signlen,
+ gf_common_mt_char);
+
+ (void) memcpy (*signature, signptr,
+ sizeof (br_isignature_out_t) + signlen);
+
+ unref_dict:
+ dict_unref (xattr);
+ out:
+ return ret;
+
+}
+
+static int32_t
+bitd_signature_staleness (xlator_t *this,
+ br_child_t *child, fd_t *fd,
+ int *stale, unsigned long *version,
+ br_scrub_stats_t *scrub_stat, gf_boolean_t skip_stat)
+{
+ int32_t ret = -1;
+ dict_t *xattr = NULL;
+ br_isignature_out_t *signptr = NULL;
+
+ ret = bitd_fetch_signature (this, child, fd, &xattr, &signptr);
+ if (ret < 0) {
+ if (!skip_stat)
+ br_inc_unsigned_file_count (scrub_stat);
+ goto out;
+ }
+
+ /**
+ * save verison for validation in post compute stage
+ * c.f. bitd_scrub_post_compute_check()
+ */
+ *stale = signptr->stale ? 1 : 0;
+ *version = signptr->version;
+
+ dict_unref (xattr);
+
+ out:
+ return ret;
+}
+
+/**
+ * PRE COMPUTE CHECK
+ *
+ * Checks to be performed before initiating object signature calculation.
+ * An object is skipped if:
+ * - it's already marked corrupted
+ * - has stale signature
+ */
+int32_t
+bitd_scrub_pre_compute_check (xlator_t *this, br_child_t *child,
+ fd_t *fd, unsigned long *version,
+ br_scrub_stats_t *scrub_stat,
+ gf_boolean_t skip_stat)
+{
+ int stale = 0;
+ int32_t ret = -1;
+
+ if (bitd_is_bad_file (this, child, NULL, fd)) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, BRB_MSG_SKIP_OBJECT,
+ "Object [GFID: %s] is marked corrupted, skipping..",
+ uuid_utoa (fd->inode->gfid));
+ goto out;
+ }
+
+ ret = bitd_signature_staleness (this, child, fd, &stale, version,
+ scrub_stat, skip_stat);
+ if (!ret && stale) {
+ if (!skip_stat)
+ br_inc_unsigned_file_count (scrub_stat);
+ gf_msg_debug (this->name, 0, "<STAGE: PRE> Object [GFID: %s] "
+ "has stale signature",
+ uuid_utoa (fd->inode->gfid));
+ ret = -1;
+ }
+
+ out:
+ return ret;
+}
+
+/* static int */
+int
+bitd_compare_ckum (xlator_t *this,
+ br_isignature_out_t *sign,
+ unsigned char *md, inode_t *linked_inode,
+ gf_dirent_t *entry, fd_t *fd, br_child_t *child, loc_t *loc)
+{
+ int ret = -1;
+ dict_t *xattr = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, sign, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+ GF_VALIDATE_OR_GOTO (this->name, child, out);
+ GF_VALIDATE_OR_GOTO (this->name, linked_inode, out);
+ GF_VALIDATE_OR_GOTO (this->name, md, out);
+ GF_VALIDATE_OR_GOTO (this->name, entry, out);
+
+ if (strncmp
+ (sign->signature, (char *) md, strlen (sign->signature)) == 0) {
+ gf_msg_debug (this->name, 0, "%s [GFID: %s | Brick: %s] "
+ "matches calculated checksum", loc->path,
+ uuid_utoa (linked_inode->gfid),
+ child->brick_path);
+ return 0;
+ }
+
+ gf_msg (this->name, GF_LOG_DEBUG, 0, BRB_MSG_CHECKSUM_MISMATCH,
+ "Object checksum mismatch: %s [GFID: %s | Brick: %s]",
+ loc->path, uuid_utoa (linked_inode->gfid), child->brick_path);
+ gf_msg (this->name, GF_LOG_ALERT, 0, BRB_MSG_CHECKSUM_MISMATCH,
+ "CORRUPTION DETECTED: Object %s {Brick: %s | GFID: %s}",
+ loc->path, child->brick_path, uuid_utoa (linked_inode->gfid));
+
+ /* Perform bad-file marking */
+ xattr = dict_new ();
+ if (!xattr) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_int32 (xattr, BITROT_OBJECT_BAD_KEY, _gf_true);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_MARK_BAD_FILE,
+ "Error setting bad-file marker for %s [GFID: %s | "
+ "Brick: %s]", loc->path, uuid_utoa (linked_inode->gfid),
+ child->brick_path);
+ goto dictfree;
+ }
+
+ gf_msg (this->name, GF_LOG_ALERT, 0, BRB_MSG_MARK_CORRUPTED, "Marking"
+ " %s [GFID: %s | Brick: %s] as corrupted..", loc->path,
+ uuid_utoa (linked_inode->gfid), child->brick_path);
+ ret = syncop_fsetxattr (child->xl, fd, xattr, 0, NULL, NULL);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_MARK_BAD_FILE,
+ "Error marking object %s [GFID: %s] as corrupted",
+ loc->path, uuid_utoa (linked_inode->gfid));
+
+ dictfree:
+ dict_unref (xattr);
+ out:
+ return ret;
+}
+
+/**
+ * "The Scrubber"
+ *
+ * Perform signature validation for a given object with the assumption
+ * that the signature is SHA256 (because signer as of now _always_
+ * signs with SHA256).
+ */
+int
+br_scrubber_scrub_begin (xlator_t *this, struct br_fsscan_entry *fsentry)
+{
+ int32_t ret = -1;
+ fd_t *fd = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ struct iatt parent_buf = {0, };
+ pid_t pid = 0;
+ br_child_t *child = NULL;
+ unsigned char *md = NULL;
+ inode_t *linked_inode = NULL;
+ br_isignature_out_t *sign = NULL;
+ unsigned long signedversion = 0;
+ gf_dirent_t *entry = NULL;
+ br_private_t *priv = NULL;
+ loc_t *parent = NULL;
+ gf_boolean_t skip_stat = _gf_false;
+ uuid_t shard_root_gfid = {0,};
+
+
+ GF_VALIDATE_OR_GOTO ("bit-rot", fsentry, out);
+
+ entry = fsentry->entry;
+ parent = &fsentry->parent;
+ child = fsentry->data;
+
+ priv = this->private;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot", entry, out);
+ GF_VALIDATE_OR_GOTO ("bit-rot", parent, out);
+ GF_VALIDATE_OR_GOTO ("bit-rot", child, out);
+ GF_VALIDATE_OR_GOTO ("bit-rot", priv, out);
+
+ pid = GF_CLIENT_PID_SCRUB;
+
+ ret = br_prepare_loc (this, child, parent, entry, &loc);
+ if (!ret)
+ goto out;
+
+ syncopctx_setfspid (&pid);
+
+ ret = syncop_lookup (child->xl, &loc, &iatt, &parent_buf, NULL, NULL);
+ if (ret) {
+ br_log_object_path (this, "lookup", loc.path, -ret);
+ goto out;
+ }
+
+ linked_inode = inode_link (loc.inode, parent->inode, loc.name, &iatt);
+ if (linked_inode)
+ inode_lookup (linked_inode);
+
+ gf_msg_debug (this->name, 0, "Scrubbing object %s [GFID: %s]",
+ entry->d_name, uuid_utoa (linked_inode->gfid));
+
+ if (iatt.ia_type != IA_IFREG) {
+ gf_msg_debug (this->name, 0, "%s is not a regular file",
+ entry->d_name);
+ ret = 0;
+ goto unref_inode;
+ }
+
+ if (IS_DHT_LINKFILE_MODE ((&iatt))) {
+ gf_msg_debug (this->name, 0, "%s is a dht sticky bit file",
+ entry->d_name);
+ ret = 0;
+ goto unref_inode;
+ }
+
+ /* skip updating scrub statistics for shard entries */
+ gf_uuid_parse (SHARD_ROOT_GFID, shard_root_gfid);
+ if (gf_uuid_compare (loc.pargfid, shard_root_gfid) == 0)
+ skip_stat = _gf_true;
+
+ /**
+ * open() an fd for subsequent opertaions
+ */
+ fd = fd_create (linked_inode, 0);
+ if (!fd) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_FD_CREATE_FAILED,
+ "failed to create fd for inode %s",
+ uuid_utoa (linked_inode->gfid));
+ goto unref_inode;
+ }
+
+ ret = syncop_open (child->xl, &loc, O_RDWR, fd, NULL, NULL);
+ if (ret) {
+ br_log_object (this, "open", linked_inode->gfid, -ret);
+ ret = -1;
+ goto unrefd;
+ }
+
+ fd_bind (fd);
+
+ /**
+ * perform pre compute checks before initiating checksum
+ * computation
+ * - presence of bad object
+ * - signature staleness
+ */
+ ret = bitd_scrub_pre_compute_check (this, child, fd, &signedversion,
+ &priv->scrub_stat, skip_stat);
+ if (ret)
+ goto unrefd; /* skip this object */
+
+ /* if all's good, proceed to calculate the hash */
+ md = GF_CALLOC (SHA256_DIGEST_LENGTH, sizeof (*md),
+ gf_common_mt_char);
+ if (!md)
+ goto unrefd;
+
+ ret = br_calculate_obj_checksum (md, child, fd, &iatt);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_CALC_ERROR,
+ "error calculating hash for object [GFID: %s]",
+ uuid_utoa (fd->inode->gfid));
+ ret = -1;
+ goto free_md;
+ }
+
+ /**
+ * perform post compute checks as an object's signature may have
+ * become stale while scrubber calculated checksum.
+ */
+ ret = bitd_scrub_post_compute_check (this, child, fd, signedversion,
+ &sign, &priv->scrub_stat,
+ skip_stat);
+ if (ret)
+ goto free_md;
+
+ ret = bitd_compare_ckum (this, sign, md,
+ linked_inode, entry, fd, child, &loc);
+
+ if (!skip_stat)
+ br_inc_scrubbed_file (&priv->scrub_stat);
+
+ GF_FREE (sign); /* alloced on post-compute */
+
+ /** fd_unref() takes care of closing fd.. like syncop_close() */
+
+ free_md:
+ GF_FREE (md);
+ unrefd:
+ fd_unref (fd);
+ unref_inode:
+ inode_unref (linked_inode);
+ out:
+ loc_wipe (&loc);
+ return ret;
+}
+
+static void
+_br_lock_cleaner (void *arg)
+{
+ pthread_mutex_t *mutex = arg;
+
+ pthread_mutex_unlock (mutex);
+}
+
+static void
+wait_for_scrubbing (xlator_t *this, struct br_scanfs *fsscan)
+{
+ br_private_t *priv = NULL;
+ struct br_scrubber *fsscrub = NULL;
+
+ priv = this->private;
+ fsscrub = &priv->fsscrub;
+
+ pthread_cleanup_push (_br_lock_cleaner, &fsscan->waitlock);
+ pthread_mutex_lock (&fsscan->waitlock);
+ {
+ pthread_cleanup_push (_br_lock_cleaner, &fsscrub->mutex);
+ pthread_mutex_lock (&fsscrub->mutex);
+ {
+ list_replace_init (&fsscan->queued, &fsscan->ready);
+
+ /* wake up scrubbers */
+ pthread_cond_broadcast (&fsscrub->cond);
+ }
+ pthread_mutex_unlock (&fsscrub->mutex);
+ pthread_cleanup_pop (0);
+
+ while (fsscan->entries != 0)
+ pthread_cond_wait
+ (&fsscan->waitcond, &fsscan->waitlock);
+ }
+ pthread_mutex_unlock (&fsscan->waitlock);
+ pthread_cleanup_pop (0);
+}
+
+static void
+_br_fsscan_inc_entry_count (struct br_scanfs *fsscan)
+{
+ fsscan->entries++;
+}
+
+static void
+_br_fsscan_dec_entry_count (struct br_scanfs *fsscan)
+{
+ if (--fsscan->entries == 0) {
+ pthread_mutex_lock (&fsscan->waitlock);
+ {
+ pthread_cond_signal (&fsscan->waitcond);
+ }
+ pthread_mutex_unlock (&fsscan->waitlock);
+ }
+}
+
+static void
+_br_fsscan_collect_entry (struct br_scanfs *fsscan,
+ struct br_fsscan_entry *fsentry)
+{
+ list_add_tail (&fsentry->list, &fsscan->queued);
+ _br_fsscan_inc_entry_count (fsscan);
+}
+
+#define NR_ENTRIES (1<<7) /* ..bulk scrubbing */
+
+int
+br_fsscanner_handle_entry (xlator_t *subvol,
+ gf_dirent_t *entry, loc_t *parent, void *data)
+{
+ int32_t ret = -1;
+ int scrub = 0;
+ br_child_t *child = NULL;
+ xlator_t *this = NULL;
+ struct br_scanfs *fsscan = NULL;
+ struct br_fsscan_entry *fsentry = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot", subvol, error_return);
+ GF_VALIDATE_OR_GOTO ("bit-rot", data, error_return);
+
+ child = data;
+ this = child->this;
+ fsscan = &child->fsscan;
+
+ _mask_cancellation ();
+
+ fsentry = GF_CALLOC (1, sizeof (*fsentry), gf_br_mt_br_fsscan_entry_t);
+ if (!fsentry)
+ goto error_return;
+
+ {
+ fsentry->data = data;
+ fsentry->fsscan = &child->fsscan;
+
+ /* copy parent loc */
+ ret = loc_copy (&fsentry->parent, parent);
+ if (ret)
+ goto dealloc;
+
+ /* copy child entry */
+ fsentry->entry = entry_copy (entry);
+ if (!fsentry->entry)
+ goto locwipe;
+
+ INIT_LIST_HEAD (&fsentry->list);
+ }
+
+ LOCK (&fsscan->entrylock);
+ {
+ _br_fsscan_collect_entry (fsscan, fsentry);
+
+ /**
+ * need not be a equality check as entries may be pushed
+ * back onto the scanned queue when thread(s) are cleaned.
+ */
+ if (fsscan->entries >= NR_ENTRIES)
+ scrub = 1;
+ }
+ UNLOCK (&fsscan->entrylock);
+
+ _unmask_cancellation ();
+
+ if (scrub)
+ wait_for_scrubbing (this, fsscan);
+
+ return 0;
+
+ locwipe:
+ loc_wipe (&fsentry->parent);
+ dealloc:
+ GF_FREE (fsentry);
+ error_return:
+ return -1;
+}
+
+int32_t
+br_fsscan_deactivate (xlator_t *this)
+{
+ int ret = 0;
+ br_private_t *priv = NULL;
+ br_scrub_state_t nstate = 0;
+ struct br_monitor *scrub_monitor = NULL;
+
+ priv = this->private;
+ scrub_monitor = &priv->scrub_monitor;
+
+ ret = gf_tw_del_timer (priv->timer_wheel, scrub_monitor->timer);
+ if (ret == 0) {
+ nstate = BR_SCRUB_STATE_STALLED;
+ gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO,
+ "Volume is under active scrubbing. Pausing scrub..");
+ } else {
+ nstate = BR_SCRUB_STATE_PAUSED;
+ gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO,
+ "Scrubber paused");
+ }
+
+ _br_monitor_set_scrub_state (scrub_monitor, nstate);
+
+ return 0;
+}
+
+static void
+br_scrubber_log_time (xlator_t *this, const char *sfx)
+{
+ char timestr[1024] = {0,};
+ struct timeval tv = {0,};
+ br_private_t *priv = NULL;
+
+ priv = this->private;
+
+ gettimeofday (&tv, NULL);
+ gf_time_fmt (timestr, sizeof (timestr), tv.tv_sec, gf_timefmt_FT);
+
+ if (strcasecmp (sfx, "started") == 0) {
+ br_update_scrub_start_time (&priv->scrub_stat, &tv);
+ gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_START,
+ "Scrubbing %s at %s", sfx, timestr);
+ } else {
+ br_update_scrub_finish_time (&priv->scrub_stat, timestr, &tv);
+ gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_FINISH,
+ "Scrubbing %s at %s", sfx, timestr);
+ }
+}
+
+static void
+br_fsscanner_log_time (xlator_t *this, br_child_t *child, const char *sfx)
+{
+ char timestr[1024] = {0,};
+ struct timeval tv = {0,};
+
+ gettimeofday (&tv, NULL);
+ gf_time_fmt (timestr, sizeof (timestr), tv.tv_sec, gf_timefmt_FT);
+
+ if (strcasecmp (sfx, "started") == 0) {
+ gf_msg_debug (this->name, 0, "Scrubbing \"%s\" %s at %s",
+ child->brick_path, sfx, timestr);
+ } else {
+ gf_msg_debug (this->name, 0, "Scrubbing \"%s\" %s at %s",
+ child->brick_path, sfx, timestr);
+ }
+}
+
+void
+br_child_set_scrub_state (br_child_t *child, gf_boolean_t state)
+{
+ child->active_scrubbing = state;
+}
+
+static void
+br_fsscanner_wait_until_kicked (xlator_t *this, br_child_t *child)
+{
+ br_private_t *priv = NULL;
+ struct br_monitor *scrub_monitor = NULL;
+
+ priv = this->private;
+ scrub_monitor = &priv->scrub_monitor;
+
+ pthread_cleanup_push (_br_lock_cleaner, &scrub_monitor->wakelock);
+ pthread_mutex_lock (&scrub_monitor->wakelock);
+ {
+ while (!scrub_monitor->kick)
+ pthread_cond_wait (&scrub_monitor->wakecond,
+ &scrub_monitor->wakelock);
+
+ /* Child lock is to synchronize with disconnect events */
+ pthread_cleanup_push (_br_lock_cleaner, &child->lock);
+ pthread_mutex_lock (&child->lock);
+ {
+ scrub_monitor->active_child_count++;
+ br_child_set_scrub_state (child, _gf_true);
+ }
+ pthread_mutex_unlock (&child->lock);
+ pthread_cleanup_pop (0);
+ }
+ pthread_mutex_unlock (&scrub_monitor->wakelock);
+ pthread_cleanup_pop (0);
+}
+
+static void
+br_scrubber_entry_control (xlator_t *this)
+{
+ br_private_t *priv = NULL;
+ struct br_monitor *scrub_monitor = NULL;
+
+ priv = this->private;
+ scrub_monitor = &priv->scrub_monitor;
+
+ LOCK (&scrub_monitor->lock);
+ {
+ /* Move the state to BR_SCRUB_STATE_ACTIVE */
+ if (scrub_monitor->state == BR_SCRUB_STATE_PENDING)
+ scrub_monitor->state = BR_SCRUB_STATE_ACTIVE;
+ br_scrubber_log_time (this, "started");
+ priv->scrub_stat.scrub_running = 1;
+ }
+ UNLOCK (&scrub_monitor->lock);
+}
+
+static void
+br_scrubber_exit_control (xlator_t *this)
+{
+ br_private_t *priv = NULL;
+ struct br_monitor *scrub_monitor = NULL;
+
+ priv = this->private;
+ scrub_monitor = &priv->scrub_monitor;
+
+ LOCK (&scrub_monitor->lock);
+ {
+ br_scrubber_log_time (this, "finished");
+ priv->scrub_stat.scrub_running = 0;
+
+ if (scrub_monitor->state == BR_SCRUB_STATE_ACTIVE) {
+ (void) br_fsscan_activate (this);
+ } else {
+ gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO,
+ "Volume waiting to get rescheduled..");
+ }
+ }
+ UNLOCK (&scrub_monitor->lock);
+}
+
+static void
+br_fsscanner_entry_control (xlator_t *this, br_child_t *child)
+{
+ br_fsscanner_log_time (this, child, "started");
+}
+
+static void
+br_fsscanner_exit_control (xlator_t *this, br_child_t *child)
+{
+ br_private_t *priv = NULL;
+ struct br_monitor *scrub_monitor = NULL;
+
+ priv = this->private;
+ scrub_monitor = &priv->scrub_monitor;
+
+ if (!_br_is_child_connected (child)) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, BRB_MSG_SCRUB_INFO,
+ "Brick [%s] disconnected while scrubbing. Scrubbing "
+ "might be incomplete", child->brick_path);
+ }
+
+ br_fsscanner_log_time (this, child, "finished");
+
+ pthread_cleanup_push (_br_lock_cleaner, &scrub_monitor->wakelock);
+ pthread_mutex_lock (&scrub_monitor->wakelock);
+ {
+ scrub_monitor->active_child_count--;
+ pthread_cleanup_push (_br_lock_cleaner, &child->lock);
+ pthread_mutex_lock (&child->lock);
+ {
+ br_child_set_scrub_state (child, _gf_false);
+ }
+ pthread_mutex_unlock (&child->lock);
+ pthread_cleanup_pop (0);
+
+ if (scrub_monitor->active_child_count == 0) {
+ /* The last child has finished scrubbing.
+ * Set the kick to false and wake up other
+ * children who are waiting for the last
+ * child to complete scrubbing.
+ */
+ scrub_monitor->kick = _gf_false;
+ pthread_cond_broadcast (&scrub_monitor->wakecond);
+
+ /* Signal monitor thread waiting for the all
+ * the children to finish scrubbing.
+ */
+ pthread_cleanup_push (_br_lock_cleaner,
+ &scrub_monitor->donelock);
+ pthread_mutex_lock (&scrub_monitor->donelock);
+ {
+ scrub_monitor->done = _gf_true;
+ pthread_cond_signal (&scrub_monitor->donecond);
+ }
+ pthread_mutex_unlock (&scrub_monitor->donelock);
+ pthread_cleanup_pop (0);
+ } else {
+ while (scrub_monitor->active_child_count)
+ pthread_cond_wait (&scrub_monitor->wakecond,
+ &scrub_monitor->wakelock);
+ }
+ }
+ pthread_mutex_unlock (&scrub_monitor->wakelock);
+ pthread_cleanup_pop (0);
+}
+
+void *
+br_fsscanner (void *arg)
+{
+ loc_t loc = {0,};
+ br_child_t *child = NULL;
+ xlator_t *this = NULL;
+ struct br_scanfs *fsscan = NULL;
+
+ child = arg;
+ this = child->this;
+ fsscan = &child->fsscan;
+
+ THIS = this;
+ loc.inode = child->table->root;
+
+ while (1) {
+ br_fsscanner_wait_until_kicked (this, child);
+ {
+ /* precursor for scrub */
+ br_fsscanner_entry_control (this, child);
+
+ /* scrub */
+ (void) syncop_ftw (child->xl,
+ &loc, GF_CLIENT_PID_SCRUB,
+ child, br_fsscanner_handle_entry);
+ if (!list_empty (&fsscan->queued))
+ wait_for_scrubbing (this, fsscan);
+
+ /* scrub exit criteria */
+ br_fsscanner_exit_control (this, child);
+ }
+ }
+
+ return NULL;
+}
+
+/**
+ * Keep this routine extremely simple and do not ever try to acquire
+ * child->lock here: it may lead to deadlock. Scrubber state is
+ * modified in br_fsscanner(). An intermediate state change to pause
+ * changes the scrub state to the _correct_ state by identifying a
+ * non-pending timer.
+ */
+void
+br_kickstart_scanner (struct gf_tw_timer_list *timer,
+ void *data, unsigned long calltime)
+{
+ xlator_t *this = NULL;
+ struct br_monitor *scrub_monitor = data;
+ br_private_t *priv = NULL;
+
+ THIS = this = scrub_monitor->this;
+ priv = this->private;
+
+ /* Reset scrub statistics */
+ priv->scrub_stat.scrubbed_files = 0;
+ priv->scrub_stat.unsigned_files = 0;
+
+ /* Moves state from PENDING to ACTIVE */
+ (void) br_scrubber_entry_control (this);
+
+ /* kickstart scanning.. */
+ pthread_mutex_lock (&scrub_monitor->wakelock);
+ {
+ scrub_monitor->kick = _gf_true;
+ GF_ASSERT (scrub_monitor->active_child_count == 0);
+ pthread_cond_broadcast (&scrub_monitor->wakecond);
+ }
+ pthread_mutex_unlock (&scrub_monitor->wakelock);
+
+ return;
+}
+
+static uint32_t
+br_fsscan_calculate_delta (uint32_t times)
+{
+ return times;
+}
+
+#define BR_SCRUB_MINUTE (60)
+#define BR_SCRUB_HOURLY (60 * 60)
+#define BR_SCRUB_DAILY (1 * 24 * 60 * 60)
+#define BR_SCRUB_WEEKLY (7 * 24 * 60 * 60)
+#define BR_SCRUB_BIWEEKLY (14 * 24 * 60 * 60)
+#define BR_SCRUB_MONTHLY (30 * 24 * 60 * 60)
+
+static unsigned int
+br_fsscan_calculate_timeout (scrub_freq_t freq)
+{
+ uint32_t timo = 0;
+
+ switch (freq) {
+ case BR_FSSCRUB_FREQ_MINUTE:
+ timo = br_fsscan_calculate_delta (BR_SCRUB_MINUTE);
+ break;
+ case BR_FSSCRUB_FREQ_HOURLY:
+ timo = br_fsscan_calculate_delta (BR_SCRUB_HOURLY);
+ break;
+ case BR_FSSCRUB_FREQ_DAILY:
+ timo = br_fsscan_calculate_delta (BR_SCRUB_DAILY);
+ break;
+ case BR_FSSCRUB_FREQ_WEEKLY:
+ timo = br_fsscan_calculate_delta (BR_SCRUB_WEEKLY);
+ break;
+ case BR_FSSCRUB_FREQ_BIWEEKLY:
+ timo = br_fsscan_calculate_delta (BR_SCRUB_BIWEEKLY);
+ break;
+ case BR_FSSCRUB_FREQ_MONTHLY:
+ timo = br_fsscan_calculate_delta (BR_SCRUB_MONTHLY);
+ break;
+ default:
+ timo = 0;
+ }
+
+ return timo;
+}
+
+int32_t
+br_fsscan_schedule (xlator_t *this)
+{
+ uint32_t timo = 0;
+ br_private_t *priv = NULL;
+ struct timeval tv = {0,};
+ char timestr[1024] = {0,};
+ struct br_scrubber *fsscrub = NULL;
+ struct gf_tw_timer_list *timer = NULL;
+ struct br_monitor *scrub_monitor = NULL;
+
+ priv = this->private;
+ fsscrub = &priv->fsscrub;
+ scrub_monitor = &priv->scrub_monitor;
+
+ (void) gettimeofday (&tv, NULL);
+ scrub_monitor->boot = tv.tv_sec;
+
+ timo = br_fsscan_calculate_timeout (fsscrub->frequency);
+ if (timo == 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_ZERO_TIMEOUT_BUG,
+ "BUG: Zero schedule timeout");
+ goto error_return;
+ }
+
+ scrub_monitor->timer = GF_CALLOC (1, sizeof (*scrub_monitor->timer),
+ gf_br_stub_mt_br_scanner_freq_t);
+ if (!scrub_monitor->timer)
+ goto error_return;
+
+ timer = scrub_monitor->timer;
+ INIT_LIST_HEAD (&timer->entry);
+
+ timer->data = scrub_monitor;
+ timer->expires = timo;
+ timer->function = br_kickstart_scanner;
+
+ gf_tw_add_timer (priv->timer_wheel, timer);
+ _br_monitor_set_scrub_state (scrub_monitor, BR_SCRUB_STATE_PENDING);
+
+ gf_time_fmt (timestr, sizeof (timestr),
+ (scrub_monitor->boot + timo), gf_timefmt_FT);
+ gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, "Scrubbing is "
+ "scheduled to run at %s", timestr);
+
+ return 0;
+
+ error_return:
+ return -1;
+}
+
+int32_t
+br_fsscan_activate (xlator_t *this)
+{
+ uint32_t timo = 0;
+ char timestr[1024] = {0,};
+ struct timeval now = {0,};
+ br_private_t *priv = NULL;
+ struct br_scrubber *fsscrub = NULL;
+ struct br_monitor *scrub_monitor = NULL;
+
+ priv = this->private;
+ fsscrub = &priv->fsscrub;
+ scrub_monitor = &priv->scrub_monitor;
+
+ (void) gettimeofday (&now, NULL);
+ timo = br_fsscan_calculate_timeout (fsscrub->frequency);
+ if (timo == 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_ZERO_TIMEOUT_BUG,
+ "BUG: Zero schedule timeout");
+ return -1;
+ }
+
+ pthread_mutex_lock (&scrub_monitor->donelock);
+ {
+ scrub_monitor->done = _gf_false;
+ }
+ pthread_mutex_unlock (&scrub_monitor->donelock);
+
+ gf_time_fmt (timestr, sizeof (timestr),
+ (now.tv_sec + timo), gf_timefmt_FT);
+ (void) gf_tw_mod_timer (priv->timer_wheel, scrub_monitor->timer, timo);
+
+ _br_monitor_set_scrub_state (scrub_monitor, BR_SCRUB_STATE_PENDING);
+ gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, "Scrubbing is "
+ "rescheduled to run at %s", timestr);
+
+ return 0;
+}
+
+int32_t
+br_fsscan_reschedule (xlator_t *this)
+{
+ int32_t ret = 0;
+ uint32_t timo = 0;
+ char timestr[1024] = {0,};
+ struct timeval now = {0,};
+ br_private_t *priv = NULL;
+ struct br_scrubber *fsscrub = NULL;
+ struct br_monitor *scrub_monitor = NULL;
+
+ priv = this->private;
+ fsscrub = &priv->fsscrub;
+ scrub_monitor = &priv->scrub_monitor;
+
+ if (!fsscrub->frequency_reconf)
+ return 0;
+
+ (void) gettimeofday (&now, NULL);
+ timo = br_fsscan_calculate_timeout (fsscrub->frequency);
+ if (timo == 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_ZERO_TIMEOUT_BUG,
+ "BUG: Zero schedule timeout");
+ return -1;
+ }
+
+ gf_time_fmt (timestr, sizeof (timestr),
+ (now.tv_sec + timo), gf_timefmt_FT);
+
+ pthread_mutex_lock (&scrub_monitor->donelock);
+ {
+ scrub_monitor->done = _gf_false;
+ }
+ pthread_mutex_unlock (&scrub_monitor->donelock);
+
+ ret = gf_tw_mod_timer_pending (priv->timer_wheel, scrub_monitor->timer, timo);
+ if (ret == 0)
+ gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO,
+ "Scrubber is currently running and would be "
+ "rescheduled after completion");
+ else {
+ _br_monitor_set_scrub_state (scrub_monitor, BR_SCRUB_STATE_PENDING);
+ gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO,
+ "Scrubbing rescheduled to run at %s", timestr);
+ }
+
+ return 0;
+}
+
+#define BR_SCRUB_THREAD_SCALE_LAZY 0
+#define BR_SCRUB_THREAD_SCALE_NORMAL 0.4
+#define BR_SCRUB_THREAD_SCALE_AGGRESSIVE 1.0
+
+#ifndef M_E
+#define M_E 2.718
+#endif
+
+/**
+ * This is just a simple exponential scale to a fixed value selected
+ * per throttle config. We probably need to be more smart and select
+ * the scale based on the number of processor cores too.
+ */
+static unsigned int
+br_scrubber_calc_scale (xlator_t *this,
+ br_private_t *priv, scrub_throttle_t throttle)
+{
+ unsigned int scale = 0;
+
+ switch (throttle) {
+ case BR_SCRUB_THROTTLE_VOID:
+ case BR_SCRUB_THROTTLE_STALLED:
+ scale = 0;
+ break;
+ case BR_SCRUB_THROTTLE_LAZY:
+ scale = priv->child_count *
+ pow (M_E, BR_SCRUB_THREAD_SCALE_LAZY);
+ break;
+ case BR_SCRUB_THROTTLE_NORMAL:
+ scale = priv->child_count *
+ pow (M_E, BR_SCRUB_THREAD_SCALE_NORMAL);
+ break;
+ case BR_SCRUB_THROTTLE_AGGRESSIVE:
+ scale = priv->child_count *
+ pow (M_E, BR_SCRUB_THREAD_SCALE_AGGRESSIVE);
+ break;
+ default:
+ gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_UNKNOWN_THROTTLE,
+ "Unknown throttle %d", throttle);
+ }
+
+ return scale;
+
+}
+
+static br_child_t *
+_br_scrubber_get_next_child (struct br_scrubber *fsscrub)
+{
+ br_child_t *child = NULL;
+
+ child = list_first_entry (&fsscrub->scrublist, br_child_t, list);
+ list_rotate_left (&fsscrub->scrublist);
+
+ return child;
+}
+
+static void
+_br_scrubber_get_entry (br_child_t *child, struct br_fsscan_entry **fsentry)
+{
+ struct br_scanfs *fsscan = &child->fsscan;
+
+ if (list_empty (&fsscan->ready))
+ return;
+ *fsentry = list_first_entry
+ (&fsscan->ready, struct br_fsscan_entry, list);
+ list_del_init (&(*fsentry)->list);
+}
+
+static void
+_br_scrubber_find_scrubbable_entry (struct br_scrubber *fsscrub,
+ struct br_fsscan_entry **fsentry)
+{
+ br_child_t *child = NULL;
+ br_child_t *firstchild = NULL;
+
+ while (1) {
+ while (list_empty (&fsscrub->scrublist))
+ pthread_cond_wait (&fsscrub->cond, &fsscrub->mutex);
+
+ firstchild = NULL;
+ for (child = _br_scrubber_get_next_child (fsscrub);
+ child != firstchild;
+ child = _br_scrubber_get_next_child (fsscrub)) {
+
+ if (!firstchild)
+ firstchild = child;
+
+ _br_scrubber_get_entry (child, fsentry);
+ if (*fsentry)
+ break;
+ }
+
+ if (*fsentry)
+ break;
+
+ /* nothing to work on.. wait till available */
+ pthread_cond_wait (&fsscrub->cond, &fsscrub->mutex);
+ }
+}
+
+static void
+br_scrubber_pick_entry (struct br_scrubber *fsscrub,
+ struct br_fsscan_entry **fsentry)
+{
+ pthread_cleanup_push (_br_lock_cleaner, &fsscrub->mutex);
+
+ pthread_mutex_lock (&fsscrub->mutex);
+ {
+ *fsentry = NULL;
+ _br_scrubber_find_scrubbable_entry (fsscrub, fsentry);
+ }
+ pthread_mutex_unlock (&fsscrub->mutex);
+
+ pthread_cleanup_pop (0);
+}
+
+struct br_scrub_entry {
+ gf_boolean_t scrubbed;
+ struct br_fsscan_entry *fsentry;
+};
+
+/**
+ * We need to be a bit careful here. These thread(s) are prone to cancellations
+ * when threads are scaled down (depending on the thottling value configured)
+ * and pausing scrub. A thread can get cancelled while it's waiting for entries
+ * in the ->pending queue or when an object is undergoing scrubbing.
+ */
+static void
+br_scrubber_entry_handle (void *arg)
+{
+ struct br_scanfs *fsscan = NULL;
+ struct br_scrub_entry *sentry = NULL;
+ struct br_fsscan_entry *fsentry = NULL;
+
+ sentry = arg;
+
+ fsentry = sentry->fsentry;
+ fsscan = fsentry->fsscan;
+
+ LOCK (&fsscan->entrylock);
+ {
+ if (sentry->scrubbed) {
+ _br_fsscan_dec_entry_count (fsscan);
+
+ /* cleanup ->entry */
+ fsentry->data = NULL;
+ fsentry->fsscan = NULL;
+ loc_wipe (&fsentry->parent);
+ gf_dirent_entry_free (fsentry->entry);
+
+ GF_FREE (sentry->fsentry);
+ } else {
+ /* (re)queue the entry again for scrub */
+ _br_fsscan_collect_entry (fsscan, sentry->fsentry);
+ }
+ }
+ UNLOCK (&fsscan->entrylock);
+}
+
+static void
+br_scrubber_scrub_entry (xlator_t *this, struct br_fsscan_entry *fsentry)
+{
+ struct br_scrub_entry sentry = {0, };
+
+ sentry.scrubbed = 0;
+ sentry.fsentry = fsentry;
+
+ pthread_cleanup_push (br_scrubber_entry_handle, &sentry);
+ {
+ (void) br_scrubber_scrub_begin (this, fsentry);
+ sentry.scrubbed = 1;
+ }
+ pthread_cleanup_pop (1);
+}
+
+void *br_scrubber_proc (void *arg)
+{
+ xlator_t *this = NULL;
+ struct br_scrubber *fsscrub = NULL;
+ struct br_fsscan_entry *fsentry = NULL;
+
+ fsscrub = arg;
+ THIS = this = fsscrub->this;
+
+ while (1) {
+ br_scrubber_pick_entry (fsscrub, &fsentry);
+ br_scrubber_scrub_entry (this, fsentry);
+ sleep (1);
+ }
+
+ return NULL;
+}
+
+static int32_t
+br_scrubber_scale_up (xlator_t *this,
+ struct br_scrubber *fsscrub,
+ unsigned int v1, unsigned int v2)
+{
+ int i = 0;
+ int32_t ret = -1;
+ int diff = 0;
+ struct br_scrubbers *scrub = NULL;
+
+ diff = (int)(v2 - v1);
+
+ gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCALING_UP_SCRUBBER,
+ "Scaling up scrubbers [%d => %d]", v1, v2);
+
+ for (i = 0; i < diff; i++) {
+ scrub = GF_CALLOC (diff, sizeof (*scrub),
+ gf_br_mt_br_scrubber_t);
+ if (!scrub)
+ break;
+
+ INIT_LIST_HEAD (&scrub->list);
+ ret = gf_thread_create (&scrub->scrubthread,
+ NULL, br_scrubber_proc, fsscrub);
+ if (ret)
+ break;
+
+ fsscrub->nr_scrubbers++;
+ list_add_tail (&scrub->list, &fsscrub->scrubbers);
+ }
+
+ if ((i != diff) && !scrub)
+ goto error_return;
+
+ if (i != diff) /* degraded scaling.. */
+ gf_msg (this->name, GF_LOG_WARNING, 0, BRB_MSG_SCALE_UP_FAILED,
+ "Could not fully scale up to %d scrubber(s). Spawned "
+ "%d/%d [total scrubber(s): %d]", v2, i, diff, (v1 + i));
+
+ return 0;
+
+ error_return:
+ return -1;
+}
+
+static int32_t
+br_scrubber_scale_down (xlator_t *this,
+ struct br_scrubber *fsscrub,
+ unsigned int v1, unsigned int v2)
+{
+ int i = 0;
+ int diff = 0;
+ int32_t ret = -1;
+ struct br_scrubbers *scrub = NULL;
+
+ diff = (int)(v1 - v2);
+
+ gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCALE_DOWN_SCRUBBER,
+ "Scaling down scrubbers [%d => %d]", v1, v2);
+
+ for (i = 0 ; i < diff; i++) {
+ scrub = list_first_entry
+ (&fsscrub->scrubbers, struct br_scrubbers, list);
+
+ list_del_init (&scrub->list);
+ ret = gf_thread_cleanup_xint (scrub->scrubthread);
+ if (ret)
+ break;
+ GF_FREE (scrub);
+
+ fsscrub->nr_scrubbers--;
+ }
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ BRB_MSG_SCALE_DOWN_FAILED, "Could not fully scale down "
+ "to %d scrubber(s). Terminated %d/%d [total "
+ "scrubber(s): %d]", v1, i, diff, (v2 - i));
+ ret = 0;
+ }
+
+ return ret;
+}
+
+static int32_t
+br_scrubber_configure (xlator_t *this, br_private_t *priv,
+ struct br_scrubber *fsscrub, scrub_throttle_t nthrottle)
+{
+ int32_t ret = 0;
+ unsigned int v1 = 0;
+ unsigned int v2 = 0;
+
+ v1 = fsscrub->nr_scrubbers;
+ v2 = br_scrubber_calc_scale (this, priv, nthrottle);
+
+ if (v1 == v2)
+ return 0;
+
+ if (v1 > v2)
+ ret = br_scrubber_scale_down (this, fsscrub, v1, v2);
+ else
+ ret = br_scrubber_scale_up (this, fsscrub, v1, v2);
+
+ return ret;
+}
+
+static int32_t
+br_scrubber_fetch_option (xlator_t *this,
+ char *opt, dict_t *options, char **value)
+{
+ if (options)
+ GF_OPTION_RECONF (opt, *value, options, str, error_return);
+ else
+ GF_OPTION_INIT (opt, *value, str, error_return);
+
+ return 0;
+
+ error_return:
+ return -1;
+}
+
+/* internal "throttle" override */
+#define BR_SCRUB_STALLED "STALLED"
+
+/* TODO: token buket spec */
+static int32_t
+br_scrubber_handle_throttle (xlator_t *this, br_private_t *priv,
+ dict_t *options, gf_boolean_t scrubstall)
+{
+ int32_t ret = 0;
+ char *tmp = NULL;
+ struct br_scrubber *fsscrub = NULL;
+ scrub_throttle_t nthrottle = BR_SCRUB_THROTTLE_VOID;
+
+ fsscrub = &priv->fsscrub;
+ fsscrub->throttle_reconf = _gf_false;
+
+ ret = br_scrubber_fetch_option (this, "scrub-throttle", options, &tmp);
+ if (ret)
+ goto error_return;
+
+ if (scrubstall)
+ tmp = BR_SCRUB_STALLED;
+
+ if (strcasecmp (tmp, "lazy") == 0)
+ nthrottle = BR_SCRUB_THROTTLE_LAZY;
+ else if (strcasecmp (tmp, "normal") == 0)
+ nthrottle = BR_SCRUB_THROTTLE_NORMAL;
+ else if (strcasecmp (tmp, "aggressive") == 0)
+ nthrottle = BR_SCRUB_THROTTLE_AGGRESSIVE;
+ else if (strcasecmp (tmp, BR_SCRUB_STALLED) == 0)
+ nthrottle = BR_SCRUB_THROTTLE_STALLED;
+ else
+ goto error_return;
+
+ /* on failure old throttling value is preserved */
+ ret = br_scrubber_configure (this, priv, fsscrub, nthrottle);
+ if (ret)
+ goto error_return;
+
+ if (fsscrub->throttle != nthrottle)
+ fsscrub->throttle_reconf = _gf_true;
+
+ fsscrub->throttle = nthrottle;
+ return 0;
+
+ error_return:
+ return -1;
+}
+
+static int32_t
+br_scrubber_handle_stall (xlator_t *this, br_private_t *priv,
+ dict_t *options, gf_boolean_t *scrubstall)
+{
+ int32_t ret = 0;
+ char *tmp = NULL;
+
+ ret = br_scrubber_fetch_option (this, "scrub-state", options, &tmp);
+ if (ret)
+ goto error_return;
+
+ if (strcasecmp (tmp, "pause") == 0) /* anything else is active */
+ *scrubstall = _gf_true;
+
+ return 0;
+
+ error_return:
+ return -1;
+}
+
+static int32_t
+br_scrubber_handle_freq (xlator_t *this, br_private_t *priv,
+ dict_t *options, gf_boolean_t scrubstall)
+{
+ int32_t ret = -1;
+ char *tmp = NULL;
+ scrub_freq_t frequency = BR_FSSCRUB_FREQ_HOURLY;
+ struct br_scrubber *fsscrub = NULL;
+
+ fsscrub = &priv->fsscrub;
+ fsscrub->frequency_reconf = _gf_true;
+
+ ret = br_scrubber_fetch_option (this, "scrub-freq", options, &tmp);
+ if (ret)
+ goto error_return;
+
+ if (scrubstall)
+ tmp = BR_SCRUB_STALLED;
+
+ if (strcasecmp (tmp, "hourly") == 0) {
+ frequency = BR_FSSCRUB_FREQ_HOURLY;
+ } else if (strcasecmp (tmp, "daily") == 0) {
+ frequency = BR_FSSCRUB_FREQ_DAILY;
+ } else if (strcasecmp (tmp, "weekly") == 0) {
+ frequency = BR_FSSCRUB_FREQ_WEEKLY;
+ } else if (strcasecmp (tmp, "biweekly") == 0) {
+ frequency = BR_FSSCRUB_FREQ_BIWEEKLY;
+ } else if (strcasecmp (tmp, "monthly") == 0) {
+ frequency = BR_FSSCRUB_FREQ_MONTHLY;
+ } else if (strcasecmp (tmp, "minute") == 0) {
+ frequency = BR_FSSCRUB_FREQ_MINUTE;
+ } else if (strcasecmp (tmp, BR_SCRUB_STALLED) == 0) {
+ frequency = BR_FSSCRUB_FREQ_STALLED;
+ } else
+ goto error_return;
+
+ if (fsscrub->frequency == frequency)
+ fsscrub->frequency_reconf = _gf_false;
+ else
+ fsscrub->frequency = frequency;
+
+ return 0;
+
+ error_return:
+ return -1;
+}
+
+static void br_scrubber_log_option (xlator_t *this,
+ br_private_t *priv, gf_boolean_t scrubstall)
+{
+ struct br_scrubber *fsscrub = &priv->fsscrub;
+ char *scrub_throttle_str[] = {
+ [BR_SCRUB_THROTTLE_LAZY] = "lazy",
+ [BR_SCRUB_THROTTLE_NORMAL] = "normal",
+ [BR_SCRUB_THROTTLE_AGGRESSIVE] = "aggressive",
+ };
+
+ char *scrub_freq_str[] = {
+ [BR_FSSCRUB_FREQ_HOURLY] = "hourly",
+ [BR_FSSCRUB_FREQ_DAILY] = "daily",
+ [BR_FSSCRUB_FREQ_WEEKLY] = "weekly",
+ [BR_FSSCRUB_FREQ_BIWEEKLY] = "biweekly",
+ [BR_FSSCRUB_FREQ_MONTHLY] = "monthly (30 days)",
+ [BR_FSSCRUB_FREQ_MINUTE] = "every minute",
+ };
+
+ if (scrubstall)
+ return; /* logged as pause */
+
+ if (fsscrub->frequency_reconf || fsscrub->throttle_reconf) {
+ gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_TUNABLE,
+ "SCRUB TUNABLES:: [Frequency: %s, Throttle: %s]",
+ scrub_freq_str[fsscrub->frequency],
+ scrub_throttle_str[fsscrub->throttle]);
+ }
+}
+
+int32_t
+br_scrubber_handle_options (xlator_t *this, br_private_t *priv, dict_t *options)
+{
+ int32_t ret = 0;
+ gf_boolean_t scrubstall = _gf_false; /* not as dangerous as it sounds */
+
+ ret = br_scrubber_handle_stall (this, priv, options, &scrubstall);
+ if (ret)
+ goto error_return;
+
+ ret = br_scrubber_handle_throttle (this, priv, options, scrubstall);
+ if (ret)
+ goto error_return;
+
+ ret = br_scrubber_handle_freq (this, priv, options, scrubstall);
+ if (ret)
+ goto error_return;
+
+ br_scrubber_log_option (this, priv, scrubstall);
+
+ return 0;
+
+ error_return:
+ return -1;
+}
+
+inode_t *
+br_lookup_bad_obj_dir (xlator_t *this, br_child_t *child, uuid_t gfid)
+{
+ struct iatt statbuf = {0, };
+ inode_table_t *table = NULL;
+ int32_t ret = -1;
+ loc_t loc = {0, };
+ inode_t *linked_inode = NULL;
+ int32_t op_errno = 0;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot-scrubber", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO (this->name, child, out);
+
+ table = child->table;
+
+ loc.inode = inode_new (table);
+ if (!loc.inode) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ BRB_MSG_NO_MEMORY, "failed to allocate a new inode for"
+ "bad object directory");
+ goto out;
+ }
+
+ gf_uuid_copy (loc.gfid, gfid);
+
+ ret = syncop_lookup (child->xl, &loc, &statbuf, NULL, NULL, NULL);
+ if (ret < 0) {
+ op_errno = -ret;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ BRB_MSG_LOOKUP_FAILED, "failed to lookup the bad "
+ "objects directory (gfid: %s (%s))", uuid_utoa (gfid),
+ strerror (op_errno));
+ goto out;
+ }
+
+ linked_inode = inode_link (loc.inode, NULL, NULL, &statbuf);
+ if (linked_inode)
+ inode_lookup (linked_inode);
+
+out:
+ loc_wipe (&loc);
+ return linked_inode;
+}
+
+int32_t
+br_read_bad_object_dir (xlator_t *this, br_child_t *child, fd_t *fd,
+ dict_t *dict)
+{
+ gf_dirent_t entries;
+ gf_dirent_t *entry = NULL;
+ int32_t ret = -1;
+ off_t offset = 0;
+ int32_t count = 0;
+ char key[PATH_MAX] = {0, };
+
+ INIT_LIST_HEAD (&entries.list);
+
+ while ((ret = syncop_readdir (child->xl, fd, 131072, offset, &entries,
+ NULL, NULL))) {
+ if (ret < 0)
+ goto out;
+ if (ret == 0)
+ break;
+ list_for_each_entry (entry, &entries.list, list) {
+ offset = entry->d_off;
+
+ snprintf (key, sizeof (key), "quarantine-%d", count);
+
+ /*
+ * ignore the dict_set errors for now. The intention is
+ * to get as many bad objects as possible instead of
+ * erroring out at the first failure.
+ */
+ ret = dict_set_dynstr_with_alloc (dict, key,
+ entry->d_name);
+ if (!ret)
+ count++;
+ }
+
+ gf_dirent_free (&entries);
+ }
+
+ ret = count;
+ ret = dict_set_int32 (dict, "count", count);
+
+out:
+ return ret;
+}
+
+int32_t
+br_get_bad_objects_from_child (xlator_t *this, dict_t *dict, br_child_t *child)
+{
+ inode_t *inode = NULL;
+ inode_table_t *table = NULL;
+ fd_t *fd = NULL;
+ int32_t ret = -1;
+ loc_t loc = {0, };
+ int32_t op_errno = 0;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot-scrubber", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO (this->name, child, out);
+ GF_VALIDATE_OR_GOTO (this->name, dict, out);
+
+ table = child->table;
+
+ inode = inode_find (table, BR_BAD_OBJ_CONTAINER);
+ if (!inode) {
+ inode = br_lookup_bad_obj_dir (this, child,
+ BR_BAD_OBJ_CONTAINER);
+ if (!inode)
+ goto out;
+ }
+
+ fd = fd_create (inode, 0);
+ if (!fd) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ BRB_MSG_FD_CREATE_FAILED, "fd creation for the bad "
+ "objects directory failed (gfid: %s)",
+ uuid_utoa (BR_BAD_OBJ_CONTAINER));
+ goto out;
+ }
+
+ loc.inode = inode;
+ gf_uuid_copy (loc.gfid, inode->gfid);
+
+ ret = syncop_opendir (child->xl, &loc, fd, NULL, NULL);
+ if (ret < 0) {
+ op_errno = -ret;
+ fd_unref (fd);
+ fd = NULL;
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ BRB_MSG_FD_CREATE_FAILED, "failed to open the bad "
+ "objects directory %s",
+ uuid_utoa (BR_BAD_OBJ_CONTAINER));
+ goto out;
+ }
+
+ fd_bind (fd);
+
+ ret = br_read_bad_object_dir (this, child, fd, dict);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ BRB_MSG_BAD_OBJ_READDIR_FAIL, "readdir of the bad "
+ "objects directory (%s) failed ",
+ uuid_utoa (BR_BAD_OBJ_CONTAINER));
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ loc_wipe (&loc);
+ if (fd)
+ fd_unref (fd);
+ return ret;
+}
+
+int32_t
+br_collect_bad_objects_of_child (xlator_t *this, br_child_t *child,
+ dict_t *dict, dict_t *child_dict,
+ int32_t total_count)
+{
+
+ int32_t ret = -1;
+ int32_t count = 0;
+ char key[PATH_MAX] = {0, };
+ char main_key[PATH_MAX] = {0, };
+ int32_t j = 0;
+ int32_t tmp_count = 0;
+ char *entry = NULL;
+
+ ret = dict_get_int32 (child_dict, "count", &count);
+ if (ret)
+ goto out;
+
+ tmp_count = total_count;
+
+ for (j = 0; j < count; j++) {
+ snprintf (key, PATH_MAX, "quarantine-%d", j);
+ ret = dict_get_str (child_dict, key, &entry);
+ if (ret)
+ continue;
+ snprintf (main_key, PATH_MAX, "quarantine-%d",
+ tmp_count);
+ ret = dict_set_dynstr_with_alloc (dict, main_key, entry);
+ if (!ret)
+ tmp_count++;
+ }
+
+ ret = tmp_count;
+
+out:
+ return ret;
+}
+
+int32_t
+br_collect_bad_objects_from_children (xlator_t *this, dict_t *dict)
+{
+ int32_t ret = -1;
+ dict_t *child_dict = NULL;
+ int32_t i = 0;
+ int32_t total_count = 0;
+ br_child_t *child = NULL;
+ br_private_t *priv = NULL;
+ dict_t *tmp_dict = NULL;
+
+ priv = this->private;
+ tmp_dict = dict;
+
+ for (i = 0; i < priv->child_count; i++) {
+ child = &priv->children[i];
+ GF_ASSERT (child);
+ if (!_br_is_child_connected (child))
+ continue;
+
+ child_dict = dict_new ();
+ if (!child_dict) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ BRB_MSG_NO_MEMORY, "failed to allocate dict");
+ continue;
+ }
+ ret = br_get_bad_objects_from_child (this, child_dict, child);
+ /*
+ * Continue asking the remaining children for the list of
+ * bad objects even though getting the list from one of them
+ * fails.
+ */
+ if (ret) {
+ dict_unref (child_dict);
+ continue;
+ }
+
+ ret = br_collect_bad_objects_of_child (this, child, tmp_dict,
+ child_dict, total_count);
+ if (ret < 0) {
+ dict_unref (child_dict);
+ continue;
+ }
+
+ total_count = ret;
+ dict_unref (child_dict);
+ child_dict = NULL;
+ }
+
+ ret = dict_set_int32 (tmp_dict, "total-count", total_count);
+
+ return ret;
+}
+
+int32_t
+br_get_bad_objects_list (xlator_t *this, dict_t **dict)
+{
+ int32_t ret = -1;
+ dict_t *tmp_dict = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bir-rot-scrubber", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, dict, out);
+
+ tmp_dict = *dict;
+ if (!tmp_dict) {
+ tmp_dict = dict_new ();
+ if (!tmp_dict) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ BRB_MSG_NO_MEMORY, "failed to allocate dict");
+ goto out;
+ }
+ *dict = tmp_dict;
+ }
+
+ ret = br_collect_bad_objects_from_children (this, tmp_dict);
+
+out:
+ return ret;
+}
+
+static int
+wait_for_scrub_to_finish (xlator_t *this)
+{
+ int ret = -1;
+ br_private_t *priv = NULL;
+ struct br_monitor *scrub_monitor = NULL;
+
+ priv = this->private;
+ scrub_monitor = &priv->scrub_monitor;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot", scrub_monitor, out);
+ GF_VALIDATE_OR_GOTO ("bit-rot", this, out);
+
+ gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO,
+ "Waiting for all children to start and finish scrub");
+
+ pthread_mutex_lock (&scrub_monitor->donelock);
+ {
+ while (!scrub_monitor->done)
+ pthread_cond_wait (&scrub_monitor->donecond,
+ &scrub_monitor->donelock);
+ }
+ pthread_mutex_unlock (&scrub_monitor->donelock);
+ ret = 0;
+out:
+ return ret;
+}
+
+/**
+ * This function is executed in a separate thread. This is scrubber monitor
+ * thread that takes care of state machine.
+ */
+void *
+br_monitor_thread (void *arg)
+{
+ int32_t ret = 0;
+ xlator_t *this = NULL;
+ br_private_t *priv = NULL;
+ struct br_monitor *scrub_monitor = NULL;
+
+ this = arg;
+ priv = this->private;
+
+ /*
+ * Since, this is the topmost xlator, THIS has to be set by bit-rot
+ * xlator itself (STACK_WIND wont help in this case). Also it has
+ * to be done for each thread that gets spawned. Otherwise, a new
+ * thread will get global_xlator's pointer when it does "THIS".
+ */
+ THIS = this;
+
+ scrub_monitor = &priv->scrub_monitor;
+
+ pthread_mutex_lock (&scrub_monitor->mutex);
+ {
+ while (!scrub_monitor->inited)
+ pthread_cond_wait (&scrub_monitor->cond,
+ &scrub_monitor->mutex);
+ }
+ pthread_mutex_unlock (&scrub_monitor->mutex);
+
+ /* this needs to be serialized with reconfigure() */
+ pthread_mutex_lock (&priv->lock);
+ {
+ ret = br_scrub_state_machine (this);
+ }
+ pthread_mutex_unlock (&priv->lock);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
+ BRB_MSG_SSM_FAILED,
+ "Scrub state machine failed");
+ goto out;
+ }
+
+ while (1) {
+ /* Wait for all children to finish scrubbing */
+ ret = wait_for_scrub_to_finish (this);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
+ BRB_MSG_SCRUB_WAIT_FAILED,
+ "Scrub wait failed");
+ goto out;
+ }
+
+ /* scrub exit criteria: Move the state to PENDING */
+ br_scrubber_exit_control (this);
+ }
+
+out:
+ return NULL;
+}
+
+static void
+br_set_scrub_state (struct br_monitor *scrub_monitor, br_scrub_state_t state)
+{
+ LOCK (&scrub_monitor->lock);
+ {
+ _br_monitor_set_scrub_state (scrub_monitor, state);
+ }
+ UNLOCK (&scrub_monitor->lock);
+}
+
+int32_t
+br_scrubber_monitor_init (xlator_t *this, br_private_t *priv)
+{
+ struct br_monitor *scrub_monitor = NULL;
+ int ret = 0;
+
+ scrub_monitor = &priv->scrub_monitor;
+
+ LOCK_INIT (&scrub_monitor->lock);
+ scrub_monitor->this = this;
+
+ scrub_monitor->inited = _gf_false;
+ pthread_mutex_init (&scrub_monitor->mutex, NULL);
+ pthread_cond_init (&scrub_monitor->cond, NULL);
+
+ scrub_monitor->kick = _gf_false;
+ scrub_monitor->active_child_count = 0;
+ pthread_mutex_init (&scrub_monitor->wakelock, NULL);
+ pthread_cond_init (&scrub_monitor->wakecond, NULL);
+
+ scrub_monitor->done = _gf_false;
+ pthread_mutex_init (&scrub_monitor->donelock, NULL);
+ pthread_cond_init (&scrub_monitor->donecond, NULL);
+
+ /* Set the state to INACTIVE */
+ br_set_scrub_state (&priv->scrub_monitor, BR_SCRUB_STATE_INACTIVE);
+
+ /* Start the monitor thread */
+ ret = gf_thread_create (&scrub_monitor->thread, NULL, br_monitor_thread, this);
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
+ BRB_MSG_SPAWN_FAILED, "monitor thread creation failed");
+ ret = -1;
+ goto err;
+ }
+
+ return 0;
+err:
+ pthread_mutex_destroy (&scrub_monitor->mutex);
+ pthread_cond_destroy (&scrub_monitor->cond);
+
+ pthread_mutex_destroy (&scrub_monitor->wakelock);
+ pthread_cond_destroy (&scrub_monitor->wakecond);
+
+ pthread_mutex_destroy (&scrub_monitor->donelock);
+ pthread_cond_destroy (&scrub_monitor->donecond);
+
+ LOCK_DESTROY (&scrub_monitor->lock);
+
+ return ret;
+}
+
+int32_t
+br_scrubber_init (xlator_t *this, br_private_t *priv)
+{
+ struct br_scrubber *fsscrub = NULL;
+ int ret = 0;
+
+ priv->tbf = tbf_init (NULL, 0);
+ if (!priv->tbf)
+ return -1;
+
+ ret = br_scrubber_monitor_init (this, priv);
+ if (ret)
+ return -1;
+
+ fsscrub = &priv->fsscrub;
+
+ fsscrub->this = this;
+ fsscrub->throttle = BR_SCRUB_THROTTLE_VOID;
+
+ pthread_mutex_init (&fsscrub->mutex, NULL);
+ pthread_cond_init (&fsscrub->cond, NULL);
+
+ fsscrub->nr_scrubbers = 0;
+ INIT_LIST_HEAD (&fsscrub->scrubbers);
+ INIT_LIST_HEAD (&fsscrub->scrublist);
+
+ return 0;
+}
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h
new file mode 100644
index 00000000000..63169068ed4
--- /dev/null
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h
@@ -0,0 +1,36 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __BIT_ROT_SCRUB_H__
+#define __BIT_ROT_SCRUB_H__
+
+#include "xlator.h"
+#include "bit-rot.h"
+
+void *br_fsscanner (void *);
+
+int32_t br_fsscan_schedule (xlator_t *);
+int32_t br_fsscan_reschedule (xlator_t *);
+int32_t br_fsscan_activate (xlator_t *);
+int32_t br_fsscan_deactivate (xlator_t *);
+
+int32_t br_scrubber_handle_options (xlator_t *, br_private_t *, dict_t *);
+
+int32_t
+br_scrubber_monitor_init (xlator_t *, br_private_t *);
+
+int32_t br_scrubber_init (xlator_t *, br_private_t *);
+
+int32_t br_collect_bad_objects_from_children (xlator_t *this, dict_t *dict);
+
+void
+br_child_set_scrub_state (br_child_t *, gf_boolean_t);
+
+#endif /* __BIT_ROT_SCRUB_H__ */
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-ssm.c b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.c
new file mode 100644
index 00000000000..d304fc804ee
--- /dev/null
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.c
@@ -0,0 +1,114 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "bit-rot-ssm.h"
+#include "bit-rot-scrub.h"
+#include "bit-rot-bitd-messages.h"
+
+int br_scrub_ssm_noop (xlator_t *this)
+{
+ return 0;
+}
+
+int
+br_scrub_ssm_state_pause (xlator_t *this)
+{
+ br_private_t *priv = NULL;
+ struct br_monitor *scrub_monitor = NULL;
+
+ priv = this->private;
+ scrub_monitor = &priv->scrub_monitor;
+
+ gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_GENERIC_SSM_INFO,
+ "Scrubber paused");
+ _br_monitor_set_scrub_state (scrub_monitor, BR_SCRUB_STATE_PAUSED);
+ return 0;
+}
+
+int
+br_scrub_ssm_state_ipause (xlator_t *this)
+{
+ br_private_t *priv = NULL;
+ struct br_monitor *scrub_monitor = NULL;
+
+ priv = this->private;
+ scrub_monitor = &priv->scrub_monitor;
+
+ gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_GENERIC_SSM_INFO,
+ "Scrubber paused");
+ _br_monitor_set_scrub_state (scrub_monitor, BR_SCRUB_STATE_IPAUSED);
+ return 0;
+}
+
+int
+br_scrub_ssm_state_active (xlator_t *this)
+{
+ br_private_t *priv = NULL;
+ struct br_monitor *scrub_monitor = NULL;
+
+ priv = this->private;
+ scrub_monitor = &priv->scrub_monitor;
+
+ if (scrub_monitor->done) {
+ (void) br_fsscan_activate (this);
+ } else {
+ gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_GENERIC_SSM_INFO,
+ "Scrubbing resumed");
+ _br_monitor_set_scrub_state (scrub_monitor, BR_SCRUB_STATE_ACTIVE);
+ }
+
+ return 0;
+}
+
+int
+br_scrub_ssm_state_stall (xlator_t *this)
+{
+ br_private_t *priv = NULL;
+ struct br_monitor *scrub_monitor = NULL;
+
+ priv = this->private;
+ scrub_monitor = &priv->scrub_monitor;
+
+ gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_GENERIC_SSM_INFO,
+ "Volume is under active scrubbing. Pausing scrub..");
+ _br_monitor_set_scrub_state (scrub_monitor, BR_SCRUB_STATE_STALLED);
+ return 0;
+}
+
+static br_scrub_ssm_call *
+br_scrub_ssm[BR_SCRUB_MAXSTATES][BR_SCRUB_MAXEVENTS] = {
+ {br_fsscan_schedule, br_scrub_ssm_state_ipause}, /* INACTIVE */
+ {br_fsscan_reschedule, br_fsscan_deactivate}, /* PENDING */
+ {br_scrub_ssm_noop, br_scrub_ssm_state_stall}, /* ACTIVE */
+ {br_fsscan_activate, br_scrub_ssm_noop}, /* PAUSED */
+ {br_fsscan_schedule, br_scrub_ssm_noop}, /* IPAUSED */
+ {br_scrub_ssm_state_active, br_scrub_ssm_noop}, /* STALLED */
+};
+
+int32_t
+br_scrub_state_machine (xlator_t *this)
+{
+ br_private_t *priv = NULL;
+ br_scrub_ssm_call *call = NULL;
+ struct br_scrubber *fsscrub = NULL;
+ br_scrub_state_t currstate = 0;
+ br_scrub_event_t event = 0;
+ struct br_monitor *scrub_monitor = NULL;
+
+ priv = this->private;
+ fsscrub = &priv->fsscrub;
+ scrub_monitor = &priv->scrub_monitor;
+
+ currstate = scrub_monitor->state;
+ event = _br_child_get_scrub_event (fsscrub);
+
+ call = br_scrub_ssm[currstate][event];
+ return call (this);
+}
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h
new file mode 100644
index 00000000000..936ee4d837c
--- /dev/null
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h
@@ -0,0 +1,36 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __BIT_ROT_SSM_H__
+#define __BIT_ROT_SSM_H__
+
+#include "xlator.h"
+
+typedef enum br_scrub_state {
+ BR_SCRUB_STATE_INACTIVE = 0,
+ BR_SCRUB_STATE_PENDING,
+ BR_SCRUB_STATE_ACTIVE,
+ BR_SCRUB_STATE_PAUSED,
+ BR_SCRUB_STATE_IPAUSED,
+ BR_SCRUB_STATE_STALLED,
+ BR_SCRUB_MAXSTATES,
+} br_scrub_state_t;
+
+typedef enum br_scrub_event {
+ BR_SCRUB_EVENT_SCHEDULE = 0,
+ BR_SCRUB_EVENT_PAUSE,
+ BR_SCRUB_MAXEVENTS,
+} br_scrub_event_t;
+
+struct br_monitor;
+
+int32_t br_scrub_state_machine (xlator_t *);
+
+#endif /* __BIT_ROT_SSM_H__ */
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c
new file mode 100644
index 00000000000..ca3fc273e9f
--- /dev/null
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.c
@@ -0,0 +1,2148 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <ctype.h>
+#include <sys/uio.h>
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "logging.h"
+#include "compat-errno.h"
+
+#include "bit-rot.h"
+#include "bit-rot-scrub.h"
+#include <pthread.h>
+#include "bit-rot-bitd-messages.h"
+
+#include "tw.h"
+
+#define BR_HASH_CALC_READ_SIZE (128 * 1024)
+
+typedef int32_t (br_child_handler)(xlator_t *, br_child_t *);
+
+struct br_child_event {
+ xlator_t *this;
+
+ br_child_t *child;
+
+ br_child_handler *call;
+
+ struct list_head list;
+};
+
+static int
+br_find_child_index (xlator_t *this, xlator_t *child)
+{
+ br_private_t *priv = NULL;
+ int i = -1;
+ int index = -1;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO (this->name, child, out);
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (child == priv->children[i].xl) {
+ index = i;
+ break;
+ }
+ }
+
+out:
+ return index;
+}
+
+br_child_t *
+br_get_child_from_brick_path (xlator_t *this, char *brick_path)
+{
+ br_private_t *priv = NULL;
+ br_child_t *child = NULL;
+ br_child_t *tmp = NULL;
+ int i = 0;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO (this->name, brick_path, out);
+
+ priv = this->private;
+
+ pthread_mutex_lock (&priv->lock);
+ {
+ for (i = 0; i < priv->child_count; i++) {
+ tmp = &priv->children[i];
+ if (!strcmp (tmp->brick_path, brick_path)) {
+ child = tmp;
+ break;
+ }
+ }
+ }
+ pthread_mutex_unlock (&priv->lock);
+
+out:
+ return child;
+}
+
+/**
+ * probably we'll encapsulate brick inside our own structure when
+ * needed -- later.
+ */
+void *
+br_brick_init (void *xl, struct gf_brick_spec *brick)
+{
+ return brick;
+}
+
+/**
+ * and cleanup things here when allocated br_brick_init().
+ */
+void
+br_brick_fini (void *xl, char *brick, void *data)
+{
+ return;
+}
+
+/**
+ * TODO: Signature can contain null terminators which causes bitrot
+ * stub to store truncated hash as it depends on string length of
+ * the hash.
+ *
+ * FIX: Send the string length as part of the signature struct and
+ * change stub to handle this change.
+ */
+static br_isignature_t *
+br_prepare_signature (const unsigned char *sign,
+ unsigned long hashlen,
+ int8_t hashtype, br_object_t *object)
+{
+ br_isignature_t *signature = NULL;
+
+ /* TODO: use mem-pool */
+ signature = GF_CALLOC (1, signature_size (hashlen + 1),
+ gf_br_stub_mt_signature_t);
+ if (!signature)
+ return NULL;
+
+ /* object version */
+ signature->signedversion = object->signedversion;
+
+ /* signature length & type */
+ signature->signaturelen = hashlen;
+ signature->signaturetype = hashtype;
+
+ /* signature itself */
+ memcpy (signature->signature, (char *)sign, hashlen);
+ signature->signature[hashlen+1] = '\0';
+
+ return signature;
+}
+
+gf_boolean_t
+bitd_is_bad_file (xlator_t *this, br_child_t *child, loc_t *loc, fd_t *fd)
+{
+ int32_t ret = -1;
+ dict_t *xattr = NULL;
+ inode_t *inode = NULL;
+ gf_boolean_t bad_file = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot", this, out);
+
+ inode = (loc) ? loc->inode : fd->inode;
+
+ if (fd)
+ ret = syncop_fgetxattr (child->xl, fd, &xattr,
+ BITROT_OBJECT_BAD_KEY, NULL, NULL);
+ else if (loc)
+ ret = syncop_getxattr (child->xl, loc,
+ &xattr, BITROT_OBJECT_BAD_KEY, NULL,
+ NULL);
+
+ if (!ret) {
+ gf_msg_debug (this->name, 0, "[GFID: %s] is marked corrupted",
+ uuid_utoa (inode->gfid));
+ bad_file = _gf_true;
+ }
+
+ if (xattr)
+ dict_unref (xattr);
+
+out:
+ return bad_file;
+}
+
+/**
+ * Do a lookup on the gfid present within the object.
+ */
+static int32_t
+br_object_lookup (xlator_t *this, br_object_t *object,
+ struct iatt *iatt, inode_t **linked_inode)
+{
+ int ret = -EINVAL;
+ loc_t loc = {0, };
+ inode_t *inode = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, object, out);
+
+ inode = inode_find (object->child->table, object->gfid);
+
+ if (inode)
+ loc.inode = inode;
+ else
+ loc.inode = inode_new (object->child->table);
+
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ gf_uuid_copy (loc.gfid, object->gfid);
+
+ ret = syncop_lookup (object->child->xl, &loc, iatt, NULL, NULL, NULL);
+ if (ret < 0)
+ goto out;
+
+ /*
+ * The file might have been deleted by the application
+ * after getting the event, but before doing a lookup.
+ * So use linked_inode after inode_link is done.
+ */
+ *linked_inode = inode_link (loc.inode, NULL, NULL, iatt);
+ if (*linked_inode)
+ inode_lookup (*linked_inode);
+
+out:
+ loc_wipe (&loc);
+ return ret;
+}
+
+/**
+ * open the object with O_RDONLY flags and return the fd. How to let brick
+ * know that open is being done by bitd because syncop framework does not allow
+ * passing xdata -- may be use frame->root->pid itself.
+ */
+static int32_t
+br_object_open (xlator_t *this,
+ br_object_t *object, inode_t *inode, fd_t **openfd)
+{
+ int32_t ret = -1;
+ fd_t *fd = NULL;
+ loc_t loc = {0, };
+
+ GF_VALIDATE_OR_GOTO ("bit-rot", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, object, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ ret = -EINVAL;
+ fd = fd_create (inode, 0);
+ if (!fd) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_FD_CREATE_FAILED,
+ "failed to create fd for the inode %s",
+ uuid_utoa (inode->gfid));
+ goto out;
+ }
+
+ loc.inode = inode_ref (inode);
+ gf_uuid_copy (loc.gfid, inode->gfid);
+
+ ret = syncop_open (object->child->xl, &loc, O_RDONLY, fd, NULL, NULL);
+ if (ret) {
+ br_log_object (this, "open", inode->gfid, -ret);
+ fd_unref (fd);
+ fd = NULL;
+ } else {
+ fd_bind (fd);
+ *openfd = fd;
+ }
+
+ loc_wipe (&loc);
+
+out:
+ return ret;
+}
+
+/**
+ * read 128k block from the object @object from the offset @offset
+ * and return the buffer.
+ */
+static int32_t
+br_object_read_block_and_sign (xlator_t *this, fd_t *fd, br_child_t *child,
+ off_t offset, size_t size, SHA256_CTX *sha256)
+{
+ int32_t ret = -1;
+ tbf_t *tbf = NULL;
+ struct iovec *iovec = NULL;
+ struct iobref *iobref = NULL;
+ br_private_t *priv = NULL;
+ int count = 0;
+ int i = 0;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd->inode, out);
+ GF_VALIDATE_OR_GOTO (this->name, child, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+
+ priv = this->private;
+
+ GF_VALIDATE_OR_GOTO (this->name, priv->tbf, out);
+ tbf = priv->tbf;
+
+ ret = syncop_readv (child->xl, fd,
+ size, offset, 0, &iovec, &count, &iobref, NULL,
+ NULL);
+
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno, BRB_MSG_READV_FAILED,
+ "readv on %s failed", uuid_utoa (fd->inode->gfid));
+ ret = -1;
+ goto out;
+ }
+
+ if (ret == 0)
+ goto out;
+
+ for (i = 0; i < count; i++) {
+ TBF_THROTTLE_BEGIN (tbf, TBF_OP_HASH, iovec[i].iov_len);
+ {
+ SHA256_Update (sha256, (const unsigned char *)
+ (iovec[i].iov_base), iovec[i].iov_len);
+ }
+ TBF_THROTTLE_BEGIN (tbf, TBF_OP_HASH, iovec[i].iov_len);
+ }
+
+ out:
+ if (iovec)
+ GF_FREE (iovec);
+
+ if (iobref)
+ iobref_unref (iobref);
+
+ return ret;
+}
+
+int32_t
+br_calculate_obj_checksum (unsigned char *md,
+ br_child_t *child, fd_t *fd, struct iatt *iatt)
+{
+ int32_t ret = -1;
+ off_t offset = 0;
+ size_t block = BR_HASH_CALC_READ_SIZE;
+ xlator_t *this = NULL;
+
+ SHA256_CTX sha256;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot", child, out);
+ GF_VALIDATE_OR_GOTO ("bit-rot", iatt, out);
+ GF_VALIDATE_OR_GOTO ("bit-rot", fd, out);
+
+ this = child->this;
+
+ SHA256_Init (&sha256);
+
+ while (1) {
+ ret = br_object_read_block_and_sign (this, fd, child,
+ offset, block, &sha256);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ BRB_MSG_BLOCK_READ_FAILED, "reading block with "
+ "offset %lu of object %s failed", offset,
+ uuid_utoa (fd->inode->gfid));
+ break;
+ }
+
+ if (ret == 0)
+ break;
+
+ offset += ret;
+ }
+
+ if (ret == 0)
+ SHA256_Final (md, &sha256);
+
+ out:
+ return ret;
+}
+
+static int32_t
+br_object_checksum (unsigned char *md,
+ br_object_t *object, fd_t *fd, struct iatt *iatt)
+{
+ return br_calculate_obj_checksum (md, object->child, fd, iatt);
+}
+
+static int32_t
+br_object_read_sign (inode_t *linked_inode, fd_t *fd, br_object_t *object,
+ struct iatt *iatt)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ dict_t *xattr = NULL;
+ unsigned char *md = NULL;
+ br_isignature_t *sign = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot", object, out);
+ GF_VALIDATE_OR_GOTO ("bit-rot", linked_inode, out);
+ GF_VALIDATE_OR_GOTO ("bit-rot", fd, out);
+
+ this = object->this;
+
+ md = GF_CALLOC (SHA256_DIGEST_LENGTH, sizeof (*md), gf_common_mt_char);
+ if (!md) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY,
+ "failed to allocate memory for saving hash of the "
+ "object %s", uuid_utoa (fd->inode->gfid));
+ goto out;
+ }
+
+ ret = br_object_checksum (md, object, fd, iatt);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ BRB_MSG_CALC_CHECKSUM_FAILED, "calculating checksum "
+ "for the object %s failed",
+ uuid_utoa (linked_inode->gfid));
+ goto free_signature;
+ }
+
+ sign = br_prepare_signature (md, SHA256_DIGEST_LENGTH,
+ BR_SIGNATURE_TYPE_SHA256, object);
+ if (!sign) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SIGN_FAILED,
+ "failed to get the signature for the object %s",
+ uuid_utoa (fd->inode->gfid));
+ goto free_signature;
+ }
+
+ xattr = dict_for_key_value
+ (GLUSTERFS_SET_OBJECT_SIGNATURE,
+ (void *)sign, signature_size (SHA256_DIGEST_LENGTH));
+
+ if (!xattr) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_SIGN_FAILED,
+ "dict allocation for signing failed for the object %s",
+ uuid_utoa (fd->inode->gfid));
+ goto free_isign;
+ }
+
+ ret = syncop_fsetxattr (object->child->xl, fd, xattr, 0, NULL, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_SIGN_FAILED,
+ "fsetxattr of signature to the object %s failed",
+ uuid_utoa (fd->inode->gfid));
+ goto unref_dict;
+ }
+
+ ret = 0;
+
+ unref_dict:
+ dict_unref (xattr);
+ free_isign:
+ GF_FREE (sign);
+ free_signature:
+ GF_FREE (md);
+ out:
+ return ret;
+}
+
+static int br_object_sign_softerror (int32_t op_errno)
+{
+ return ((op_errno == ENOENT) || (op_errno == ESTALE)
+ || (op_errno == ENODATA));
+}
+
+void
+br_log_object (xlator_t *this, char *op, uuid_t gfid, int32_t op_errno)
+{
+ int softerror = br_object_sign_softerror (op_errno);
+ if (softerror) {
+ gf_msg_debug (this->name, 0, "%s() failed on object %s "
+ "[reason: %s]", op, uuid_utoa (gfid),
+ strerror (op_errno));
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, op_errno, BRB_MSG_OP_FAILED,
+ "%s() failed on object %s", op, uuid_utoa (gfid));
+ }
+}
+
+void
+br_log_object_path (xlator_t *this, char *op,
+ const char *path, int32_t op_errno)
+{
+ int softerror = br_object_sign_softerror (op_errno);
+ if (softerror) {
+ gf_msg_debug (this->name, 0, "%s() failed on object %s "
+ "[reason: %s]", op, path, strerror (op_errno));
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, op_errno, BRB_MSG_OP_FAILED,
+ "%s() failed on object %s", op, path);
+ }
+}
+
+static void
+br_trigger_sign (xlator_t *this, br_child_t *child,
+ inode_t *linked_inode, loc_t *loc, gf_boolean_t need_reopen)
+{
+ fd_t *fd = NULL;
+ int32_t ret = -1;
+ uint32_t val = 0;
+ dict_t *dict = NULL;
+ pid_t pid = GF_CLIENT_PID_BITD;
+
+ syncopctx_setfspid (&pid);
+
+ val = (need_reopen == _gf_true) ? BR_OBJECT_REOPEN : BR_OBJECT_RESIGN;
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_set_uint32 (dict, BR_REOPEN_SIGN_HINT_KEY, val);
+ if (ret)
+ goto cleanup_dict;
+
+ ret = -1;
+ fd = fd_create (linked_inode, 0);
+ if (!fd) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_FD_CREATE_FAILED,
+ "Failed to create fd [GFID %s]",
+ uuid_utoa (linked_inode->gfid));
+ goto cleanup_dict;
+ }
+
+ ret = syncop_open (child->xl, loc, O_RDWR, fd, NULL, NULL);
+ if (ret) {
+ br_log_object (this, "open", linked_inode->gfid, -ret);
+ goto unref_fd;
+ }
+
+ fd_bind (fd);
+
+ ret = syncop_fsetxattr (child->xl, fd, dict, 0, NULL, NULL);
+ if (ret)
+ br_log_object (this, "fsetxattr", linked_inode->gfid, -ret);
+
+ /* passthough: fd_unref() */
+
+ unref_fd:
+ fd_unref (fd);
+ cleanup_dict:
+ dict_unref (dict);
+ out:
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, BRB_MSG_TRIGGER_SIGN,
+ "Could not trigger signingd for %s (reopen hint: %d)",
+ uuid_utoa (linked_inode->gfid), val);
+ }
+}
+
+static void
+br_object_resign (xlator_t *this,
+ br_object_t *object, inode_t *linked_inode)
+{
+ loc_t loc = {0, };
+
+ loc.inode = inode_ref (linked_inode);
+ gf_uuid_copy (loc.gfid, linked_inode->gfid);
+
+ br_trigger_sign (this, object->child, linked_inode, &loc, _gf_false);
+
+ loc_wipe (&loc);
+}
+
+/**
+ * Sign a given object. This routine runs full throttle. There needs to be
+ * some form of priority scheduling and/or read burstness to avoid starving
+ * (or kicking) client I/O's.
+ */
+static int32_t br_sign_object (br_object_t *object)
+{
+ int32_t ret = -1;
+ inode_t *linked_inode = NULL;
+ xlator_t *this = NULL;
+ fd_t *fd = NULL;
+ struct iatt iatt = {0, };
+ pid_t pid = GF_CLIENT_PID_BITD;
+ br_sign_state_t sign_info = BR_SIGN_NORMAL;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot", object, out);
+
+ this = object->this;
+
+ /**
+ * FIXME: This is required as signing an object is restricted to
+ * clients with special frame->root->pid. Change the way client
+ * pid is set.
+ */
+ syncopctx_setfspid (&pid);
+
+ ret = br_object_lookup (this, object, &iatt, &linked_inode);
+ if (ret) {
+ br_log_object (this, "lookup", object->gfid, -ret);
+ goto out;
+ }
+
+ /**
+ * For fd's that have notified for reopening, we send an explicit
+ * open() followed by a dummy write() call. This triggers the
+ * actual signing of the object.
+ */
+ sign_info = ntohl (object->sign_info);
+ if (sign_info == BR_SIGN_REOPEN_WAIT) {
+ br_object_resign (this, object, linked_inode);
+ goto unref_inode;
+ }
+
+ ret = br_object_open (this, object, linked_inode, &fd);
+ if (!fd) {
+ br_log_object (this, "open", object->gfid, -ret);
+ goto unref_inode;
+ }
+
+ /**
+ * we have an open file descriptor on the object. from here on,
+ * do not be generous to file operation errors.
+ */
+ gf_msg_debug (this->name, 0, "Signing object [%s]",
+ uuid_utoa (linked_inode->gfid));
+
+ ret = br_object_read_sign (linked_inode, fd, object, &iatt);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ BRB_MSG_READ_AND_SIGN_FAILED, "reading and signing of "
+ "the object %s failed", uuid_utoa (linked_inode->gfid));
+ goto unref_fd;
+ }
+
+ ret = 0;
+
+ unref_fd:
+ fd_unref (fd);
+ unref_inode:
+ inode_unref (linked_inode);
+ out:
+ return ret;
+}
+
+static br_object_t *__br_pick_object (br_private_t *priv)
+{
+ br_object_t *object = NULL;
+
+ while (list_empty (&priv->obj_queue->objects)) {
+ pthread_cond_wait (&priv->object_cond, &priv->lock);
+ }
+
+ object = list_first_entry
+ (&priv->obj_queue->objects, br_object_t, list);
+ list_del_init (&object->list);
+
+ return object;
+}
+
+/**
+ * This is the place where the signing of the objects is triggered.
+ */
+void *
+br_process_object (void *arg)
+{
+ xlator_t *this = NULL;
+ br_object_t *object = NULL;
+ br_private_t *priv = NULL;
+ int32_t ret = -1;
+
+ this = arg;
+ priv = this->private;
+
+ THIS = this;
+
+ for (;;) {
+ pthread_mutex_lock (&priv->lock);
+ {
+ object = __br_pick_object (priv);
+ }
+ pthread_mutex_unlock (&priv->lock);
+
+ ret = br_sign_object (object);
+ if (ret && !br_object_sign_softerror (-ret))
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ BRB_MSG_SIGN_FAILED, "SIGNING FAILURE [%s]",
+ uuid_utoa (object->gfid));
+ GF_FREE (object);
+ }
+
+ return NULL;
+}
+
+/**
+ * This function gets kicked in once the object is expired from the
+ * timer wheel. This actually adds the object received via notification
+ * from the changelog to the queue from where the objects gets picked
+ * up for signing.
+ *
+ * This routine can be made lightweight by introducing an alternate
+ * timer-wheel API that dispatches _all_ expired objects in one-shot
+ * rather than an object at-a-time. This routine can then just simply
+ * be a call to list_splice_tail().
+ *
+ * NOTE: use call_time to instrument signing time in br_sign_object().
+ */
+void
+br_add_object_to_queue (struct gf_tw_timer_list *timer,
+ void *data, unsigned long call_time)
+{
+ br_object_t *object = NULL;
+ xlator_t *this = NULL;
+ br_private_t *priv = NULL;
+
+ object = data;
+ this = object->this;
+ priv = this->private;
+
+ THIS = this;
+
+ pthread_mutex_lock (&priv->lock);
+ {
+ list_add_tail (&object->list, &priv->obj_queue->objects);
+ pthread_cond_broadcast (&priv->object_cond);
+ }
+ pthread_mutex_unlock (&priv->lock);
+
+ if (timer)
+ mem_put (timer);
+ return;
+}
+
+static br_object_t *
+br_initialize_object (xlator_t *this, br_child_t *child, changelog_event_t *ev)
+{
+ br_object_t *object = NULL;
+
+ object = GF_CALLOC (1, sizeof (*object), gf_br_mt_br_object_t);
+ if (!object)
+ goto out;
+ INIT_LIST_HEAD (&object->list);
+
+ object->this = this;
+ object->child = child;
+ gf_uuid_copy (object->gfid, ev->u.releasebr.gfid);
+
+ /* NOTE: it's BE, but no worry */
+ object->signedversion = ev->u.releasebr.version;
+ object->sign_info = ev->u.releasebr.sign_info;
+
+out:
+ return object;
+}
+
+static struct gf_tw_timer_list *
+br_initialize_timer (xlator_t *this, br_object_t *object, br_child_t *child,
+ changelog_event_t *ev)
+{
+ br_private_t *priv = NULL;
+ struct gf_tw_timer_list *timer = NULL;
+
+ priv = this->private;
+
+ timer = mem_get0 (child->timer_pool);
+ if (!timer)
+ goto out;
+ INIT_LIST_HEAD (&timer->entry);
+
+ timer->expires = priv->expiry_time;
+ if (!timer->expires)
+ timer->expires = 1;
+
+ timer->data = object;
+ timer->function = br_add_object_to_queue;
+ gf_tw_add_timer (priv->timer_wheel, timer);
+
+out:
+ return timer;
+}
+
+static int32_t
+br_schedule_object_reopen (xlator_t *this, br_object_t *object,
+ br_child_t *child, changelog_event_t *ev)
+{
+ struct gf_tw_timer_list *timer = NULL;
+
+ timer = br_initialize_timer (this, object, child, ev);
+ if (!timer)
+ gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_TIMER_FAILED,
+ "Failed to allocate object expiry timer [GFID: %s]",
+ uuid_utoa (object->gfid));
+ return timer ? 0 : -1;
+}
+
+static int32_t
+br_object_quicksign (xlator_t *this, br_object_t *object)
+{
+ br_add_object_to_queue (NULL, object, 0ULL);
+ return 0;
+}
+
+/**
+ * This callback function registered with the changelog is executed
+ * whenever a notification from the changelog is received. This should
+ * add the object (or the gfid) on which the notification has come to
+ * the timer-wheel with some expiry time.
+ *
+ * TODO: use mem-pool for allocations and maybe allocate timer and
+ * object as a single alloc and bifurcate their respective pointers.
+ */
+void
+br_brick_callback (void *xl, char *brick,
+ void *data, changelog_event_t *ev)
+{
+ int32_t ret = 0;
+ uuid_t gfid = {0,};
+ xlator_t *this = NULL;
+ br_object_t *object = NULL;
+ br_child_t *child = NULL;
+ br_sign_state_t sign_info = BR_SIGN_INVALID;
+
+ this = xl;
+
+ GF_VALIDATE_OR_GOTO (this->name, ev, out);
+ GF_VALIDATE_OR_GOTO ("bit-rot", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+
+ GF_ASSERT (ev->ev_type == CHANGELOG_OP_TYPE_BR_RELEASE);
+ GF_ASSERT (!gf_uuid_is_null (ev->u.releasebr.gfid));
+
+ gf_uuid_copy (gfid, ev->u.releasebr.gfid);
+
+ gf_msg_debug (this->name, 0, "RELEASE EVENT [GFID %s]",
+ uuid_utoa (gfid));
+
+ child = br_get_child_from_brick_path (this, brick);
+ if (!child) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SUBVOL_FAILED,
+ "failed to get the subvolume for the brick %s", brick);
+ goto out;
+ }
+
+ object = br_initialize_object (this, child, ev);
+ if (!object) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY,
+ "failed to allocate object memory [GFID: %s]",
+ uuid_utoa (gfid));
+ goto out;
+ }
+
+ /* sanity check */
+ sign_info = ntohl (object->sign_info);
+ GF_ASSERT (sign_info != BR_SIGN_NORMAL);
+
+ if (sign_info == BR_SIGN_REOPEN_WAIT)
+ ret = br_schedule_object_reopen (this, object, child, ev);
+ else
+ ret = br_object_quicksign (this, object);
+
+ if (ret)
+ goto free_object;
+
+ gf_msg_debug (this->name, 0, "->callback: brick [%s], type [%d]\n",
+ brick, ev->ev_type);
+ return;
+
+ free_object:
+ GF_FREE (object);
+ out:
+ return;
+}
+
+void
+br_fill_brick_spec (struct gf_brick_spec *brick, char *path)
+{
+ brick->brick_path = gf_strdup (path);
+ brick->filter = CHANGELOG_OP_TYPE_BR_RELEASE;
+
+ brick->init = br_brick_init;
+ brick->fini = br_brick_fini;
+ brick->callback = br_brick_callback;
+ brick->connected = NULL;
+ brick->disconnected = NULL;
+}
+
+static gf_boolean_t
+br_check_object_need_sign (xlator_t *this, dict_t *xattr, br_child_t *child)
+{
+ int32_t ret = -1;
+ gf_boolean_t need_sign = _gf_false;
+ br_isignature_out_t *sign = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, xattr, out);
+ GF_VALIDATE_OR_GOTO (this->name, child, out);
+
+ ret = dict_get_ptr (xattr, GLUSTERFS_GET_OBJECT_SIGNATURE,
+ (void **)&sign);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SIGN_FAILED,
+ "failed to get object signature info");
+ goto out;
+ }
+
+ /* Object has been opened and hence dirty. Do not sign it */
+ if (sign->stale)
+ need_sign = _gf_true;
+
+out:
+ return need_sign;
+}
+
+
+
+int32_t
+br_prepare_loc (xlator_t *this, br_child_t *child, loc_t *parent,
+ gf_dirent_t *entry, loc_t *loc)
+{
+ int32_t ret = -1;
+ inode_t *inode = NULL;
+
+ inode = inode_grep (child->table, parent->inode, entry->d_name);
+ if (!inode)
+ loc->inode = inode_new (child->table);
+ else {
+ loc->inode = inode;
+ if (loc->inode->ia_type != IA_IFREG) {
+ gf_msg_debug (this->name, 0, "%s is not a regular "
+ "file", entry->d_name);
+ ret = 0;
+ goto out;
+ }
+ }
+
+ loc->parent = inode_ref (parent->inode);
+ gf_uuid_copy (loc->pargfid, parent->inode->gfid);
+
+ ret = inode_path (parent->inode, entry->d_name, (char **)&loc->path);
+ if (ret < 0 || !loc->path) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_PATH_FAILED,
+ "inode_path on %s (parent: %s) failed", entry->d_name,
+ uuid_utoa (parent->inode->gfid));
+ goto out;
+ }
+
+ loc->name = strrchr (loc->path, '/');
+ if (loc->name)
+ loc->name++;
+
+ ret = 1;
+
+out:
+ return ret;
+}
+
+/**
+ * Oneshot crawler
+ * ---------------
+ * This is a catchup mechanism. Objects that remained unsigned from the
+ * last run for whatever reason (node crashes, reboots, etc..) become
+ * candidates for signing. This allows the signature to "catch up" with
+ * the current state of the object. Triggering signing is easy: perform
+ * an open() followed by a close() therby resulting in call boomerang.
+ * (though not back to itself :))
+ */
+int
+bitd_oneshot_crawl (xlator_t *subvol,
+ gf_dirent_t *entry, loc_t *parent, void *data)
+{
+ int op_errno = 0;
+ br_child_t *child = NULL;
+ xlator_t *this = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ struct iatt parent_buf = {0, };
+ dict_t *xattr = NULL;
+ int32_t ret = -1;
+ inode_t *linked_inode = NULL;
+ gf_boolean_t need_signing = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot", subvol, out);
+ GF_VALIDATE_OR_GOTO ("bit-rot", data, out);
+
+ child = data;
+ this = child->this;
+
+ ret = br_prepare_loc (this, child, parent, entry, &loc);
+ if (!ret)
+ goto out;
+
+ ret = syncop_lookup (child->xl, &loc, &iatt, &parent_buf, NULL, NULL);
+ if (ret) {
+ br_log_object_path (this, "lookup", loc.path, -ret);
+ goto out;
+ }
+
+ linked_inode = inode_link (loc.inode, parent->inode, loc.name, &iatt);
+ if (linked_inode)
+ inode_lookup (linked_inode);
+
+ if (iatt.ia_type != IA_IFREG) {
+ gf_msg_debug (this->name, 0, "%s is not a regular file, "
+ "skipping..", entry->d_name);
+ ret = 0;
+ goto unref_inode;
+ }
+
+ /**
+ * As of now, 2 cases are possible and handled.
+ * 1) GlusterFS is upgraded from a previous version which does not
+ * have any idea about bit-rot and have data in the filesystem.
+ * In this case syncop_getxattr fails with ENODATA and the object
+ * is signed. (In real, when crawler sends lookup, bit-rot-stub
+ * creates the xattrs before returning lookup reply)
+ * 2) Bit-rot was not enabled or BitD was dows for some reasons, during
+ * which some files were created, but since BitD was down, were not
+ * signed.
+ * If the file was just created and was being written some data when
+ * the down BitD came up, then bit-rot stub should be intelligent to
+ * identify this case (by comparing the ongoing version or by checking
+ * if there are any fds present for that inode) and handle properly.
+ */
+
+ if (bitd_is_bad_file (this, child, &loc, NULL)) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, BRB_MSG_SKIP_OBJECT,
+ "Entry [%s] is marked corrupted.. skipping.", loc.path);
+ goto unref_inode;
+ }
+
+ ret = syncop_getxattr (child->xl, &loc, &xattr,
+ GLUSTERFS_GET_OBJECT_SIGNATURE, NULL, NULL);
+ if (ret < 0) {
+ op_errno = -ret;
+ br_log_object (this, "getxattr", linked_inode->gfid, op_errno);
+
+ /**
+ * No need to sign the zero byte objects as the signing
+ * happens upon first modification of the object.
+ */
+ if (op_errno == ENODATA && (iatt.ia_size != 0))
+ need_signing = _gf_true;
+ if (op_errno == EINVAL)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ BRB_MSG_PARTIAL_VERSION_PRESENCE, "Partial "
+ "version xattr presence detected, ignoring "
+ "[GFID: %s]", uuid_utoa (linked_inode->gfid));
+ } else {
+ need_signing = br_check_object_need_sign (this, xattr, child);
+ }
+
+ if (!need_signing)
+ goto unref_dict;
+
+ gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_TRIGGER_SIGN,
+ "Triggering signing for %s [GFID: %s | Brick: %s]",
+ loc.path, uuid_utoa (linked_inode->gfid), child->brick_path);
+ br_trigger_sign (this, child, linked_inode, &loc, _gf_true);
+
+ ret = 0;
+
+ unref_dict:
+ if (xattr)
+ dict_unref (xattr);
+ unref_inode:
+ inode_unref (linked_inode);
+ out:
+ loc_wipe (&loc);
+
+ return ret;
+}
+
+#define BR_CRAWL_THROTTLE_COUNT 50
+#define BR_CRAWL_THROTTLE_ZZZ 5
+
+void *
+br_oneshot_signer (void *arg)
+{
+ loc_t loc = {0,};
+ xlator_t *this = NULL;
+ br_child_t *child = NULL;
+
+ child = arg;
+ this = child->this;
+
+ THIS = this;
+
+ gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_CRAWLING_START,
+ "Crawling brick [%s], scanning for unsigned objects",
+ child->brick_path);
+
+ loc.inode = child->table->root;
+ (void) syncop_ftw_throttle
+ (child->xl, &loc,
+ GF_CLIENT_PID_BITD, child, bitd_oneshot_crawl,
+ BR_CRAWL_THROTTLE_COUNT, BR_CRAWL_THROTTLE_ZZZ);
+
+ gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_CRAWLING_FINISH,
+ "Completed crawling brick [%s]", child->brick_path);
+
+ return NULL;
+}
+
+static void
+br_set_child_state (br_child_t *child, br_child_state_t state)
+{
+ pthread_mutex_lock (&child->lock);
+ {
+ _br_set_child_state (child, state);
+ }
+ pthread_mutex_unlock (&child->lock);
+}
+
+/**
+ * At this point a thread is spawned to crawl the filesystem (in
+ * tortoise pace) to sign objects that were not signed in previous run(s).
+ * Such objects are identified by examining it's dirtyness and timestamp.
+ *
+ * pick object:
+ * signature_is_stale() && (object_timestamp() <= stub_init_time())
+ *
+ * Also, we register to the changelog library to subscribe for event
+ * notifications.
+ */
+static int32_t
+br_enact_signer (xlator_t *this, br_child_t *child, br_stub_init_t *stub)
+{
+ int32_t ret = 0;
+ br_private_t *priv = NULL;
+ struct gf_brick_spec *brick = NULL;
+
+ priv = this->private;
+
+ brick = GF_CALLOC (1, sizeof (struct gf_brick_spec),
+ gf_common_mt_gf_brick_spec_t);
+ if (!brick)
+ goto error_return;
+
+ br_fill_brick_spec (brick, stub->export);
+ ret = gf_changelog_register_generic
+ (brick, 1, 1, this->ctx->cmd_args.log_file, -1, this);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ BRB_MSG_REGISTER_FAILED, "Register to changelog "
+ "failed");
+ goto dealloc;
+ }
+
+ child->threadrunning = 0;
+ ret = gf_thread_create (&child->thread, NULL, br_oneshot_signer, child);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0, BRB_MSG_SPAWN_FAILED,
+ "failed to spawn FS crawler thread");
+ else
+ child->threadrunning = 1;
+
+ /* it's OK to continue, "old" objects would be signed when modified */
+ list_add_tail (&child->list, &priv->signing);
+ return 0;
+
+ dealloc:
+ GF_FREE (brick);
+ error_return:
+ return -1;
+}
+
+static int32_t
+br_launch_scrubber (xlator_t *this, br_child_t *child,
+ struct br_scanfs *fsscan, struct br_scrubber *fsscrub)
+{
+ int32_t ret = -1;
+ br_private_t *priv = NULL;
+ struct br_monitor *scrub_monitor = NULL;
+
+ priv = this->private;
+
+ scrub_monitor = &priv->scrub_monitor;
+ ret = gf_thread_create (&child->thread, NULL, br_fsscanner, child);
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_ALERT, 0, BRB_MSG_SPAWN_FAILED,
+ "failed to spawn bitrot scrubber daemon [Brick: %s]",
+ child->brick_path);
+ goto error_return;
+ }
+
+ /* Signal monitor to kick off state machine*/
+ pthread_mutex_lock (&scrub_monitor->mutex);
+ {
+ if (!scrub_monitor->inited)
+ pthread_cond_signal (&scrub_monitor->cond);
+ scrub_monitor->inited = _gf_true;
+ }
+ pthread_mutex_unlock (&scrub_monitor->mutex);
+
+ /**
+ * Everything has been setup.. add this subvolume to scrubbers
+ * list.
+ */
+ pthread_mutex_lock (&fsscrub->mutex);
+ {
+ list_add_tail (&child->list, &fsscrub->scrublist);
+ pthread_cond_broadcast (&fsscrub->cond);
+ }
+ pthread_mutex_unlock (&fsscrub->mutex);
+
+ return 0;
+
+ error_return:
+ return -1;
+}
+
+static int32_t
+br_enact_scrubber (xlator_t *this, br_child_t *child)
+{
+ int32_t ret = 0;
+ br_private_t *priv = NULL;
+ struct br_scanfs *fsscan = NULL;
+ struct br_scrubber *fsscrub = NULL;
+
+ priv = this->private;
+
+ fsscan = &child->fsscan;
+ fsscrub = &priv->fsscrub;
+
+ /**
+ * if this child already witnesses a successfull connection earlier
+ * there's no need to initialize mutexes, condvars, etc..
+ */
+ if (_br_child_witnessed_connection (child))
+ return br_launch_scrubber (this, child, fsscan, fsscrub);
+
+ LOCK_INIT (&fsscan->entrylock);
+ pthread_mutex_init (&fsscan->waitlock, NULL);
+ pthread_cond_init (&fsscan->waitcond, NULL);
+
+ fsscan->entries = 0;
+ INIT_LIST_HEAD (&fsscan->queued);
+ INIT_LIST_HEAD (&fsscan->ready);
+
+ ret = br_launch_scrubber (this, child, fsscan, fsscrub);
+ if (ret)
+ goto error_return;
+
+ return 0;
+
+ error_return:
+ LOCK_DESTROY (&fsscan->entrylock);
+ pthread_mutex_destroy (&fsscan->waitlock);
+ pthread_cond_destroy (&fsscan->waitcond);
+
+ return -1;
+}
+
+static int32_t
+br_child_enaction (xlator_t *this, br_child_t *child, br_stub_init_t *stub)
+{
+ int32_t ret = -1;
+ br_private_t *priv = this->private;
+
+ pthread_mutex_lock (&child->lock);
+ {
+ if (priv->iamscrubber)
+ ret = br_enact_scrubber (this, child);
+ else
+ ret = br_enact_signer (this, child, stub);
+
+ if (!ret) {
+ child->witnessed = 1;
+ _br_set_child_state (child, BR_CHILD_STATE_CONNECTED);
+ gf_msg (this->name, GF_LOG_INFO,
+ 0, BRB_MSG_CONNECTED_TO_BRICK,
+ "Connected to brick %s..", child->brick_path);
+ }
+ }
+ pthread_mutex_unlock (&child->lock);
+
+ return ret;
+}
+
+/**
+ * This routine fetches various attributes associated with a child which
+ * is basically a subvolume. Attributes include brick path and the stub
+ * birth time. This is done by performing a lookup on the root followed
+ * by getxattr() on a virtual key. Depending on the configuration, the
+ * process either acts as a signer or a scrubber.
+ */
+int32_t
+br_brick_connect (xlator_t *this, br_child_t *child)
+{
+ int32_t ret = -1;
+ loc_t loc = {0, };
+ struct iatt buf = {0, };
+ struct iatt parent = {0, };
+ br_stub_init_t *stub = NULL;
+ dict_t *xattr = NULL;
+ int op_errno = 0;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, child, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+
+ br_child_set_scrub_state (child, _gf_false);
+ br_set_child_state (child, BR_CHILD_STATE_INITIALIZING);
+
+ loc.inode = inode_ref (child->table->root);
+ gf_uuid_copy (loc.gfid, loc.inode->gfid);
+ loc.path = gf_strdup ("/");
+
+ ret = syncop_lookup (child->xl, &loc, &buf, &parent, NULL, NULL);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ BRB_MSG_LOOKUP_FAILED, "lookup on root failed");
+ goto wipeloc;
+ }
+
+ ret = syncop_getxattr (child->xl, &loc, &xattr,
+ GLUSTERFS_GET_BR_STUB_INIT_TIME, NULL, NULL);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ BRB_MSG_GET_INFO_FAILED, "failed to get stub info");
+ goto wipeloc;
+ }
+
+ ret = dict_get_ptr (xattr, GLUSTERFS_GET_BR_STUB_INIT_TIME,
+ (void **)&stub);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_INFO_FAILED,
+ "failed to extract stub information");
+ goto free_dict;
+ }
+
+ memcpy (child->brick_path, stub->export, strlen (stub->export) + 1);
+ child->tv.tv_sec = ntohl (stub->timebuf[0]);
+ child->tv.tv_usec = ntohl (stub->timebuf[1]);
+
+ ret = br_child_enaction (this, child, stub);
+
+ free_dict:
+ dict_unref (xattr);
+ wipeloc:
+ loc_wipe (&loc);
+ out:
+ if (ret)
+ br_set_child_state (child, BR_CHILD_STATE_CONNFAILED);
+ return ret;
+}
+
+/* TODO: cleanup signer */
+static int32_t
+br_cleanup_signer (xlator_t *this, br_child_t *child)
+{
+ return 0;
+}
+
+static int32_t
+br_cleanup_scrubber (xlator_t *this, br_child_t *child)
+{
+ int32_t ret = 0;
+ br_private_t *priv = NULL;
+ struct br_scrubber *fsscrub = NULL;
+ struct br_monitor *scrub_monitor = NULL;
+
+ priv = this->private;
+ fsscrub = &priv->fsscrub;
+ scrub_monitor = &priv->scrub_monitor;
+
+ if (_br_is_child_scrub_active (child)) {
+ scrub_monitor->active_child_count--;
+ br_child_set_scrub_state (child, _gf_false);
+ }
+
+ /**
+ * 0x0: child (brick) goes out of rotation
+ *
+ * This is fully safe w.r.t. entries for this child being actively
+ * scrubbed. Each of the scrubber thread(s) would finish scrubbing
+ * the entry (probably failing due to disconnection) and either
+ * putting the entry back into the queue or continuing further.
+ * Either way, pending entries for this child's queue need not be
+ * drained; entries just sit there in the queued/ready list to be
+ * consumed later upon re-connection.
+ */
+ pthread_mutex_lock (&fsscrub->mutex);
+ {
+ list_del_init (&child->list);
+ }
+ pthread_mutex_unlock (&fsscrub->mutex);
+
+ /**
+ * 0x1: cleanup scanner thread
+ *
+ * The pending timer needs to be removed _after_ cleaning up the
+ * filesystem scanner (scheduling the next scrub time is not a
+ * cancellation point).
+ */
+ ret = gf_thread_cleanup_xint (child->thread);
+ if (ret)
+ gf_msg (this->name, GF_LOG_INFO,
+ 0, BRB_MSG_SCRUB_THREAD_CLEANUP,
+ "Error cleaning up scanner thread");
+
+ gf_msg (this->name, GF_LOG_INFO,
+ 0, BRB_MSG_SCRUBBER_CLEANED,
+ "Cleaned up scrubber for brick [%s]", child->brick_path);
+
+ return 0;
+}
+
+/**
+ * OK.. this child has made it's mind to go down the drain. So,
+ * let's clean up what it touched. (NOTE: there's no need to clean
+ * the inode table, it's just reused taking care of stale inodes)
+ */
+int32_t
+br_brick_disconnect (xlator_t *this, br_child_t *child)
+{
+ int32_t ret = 0;
+ struct br_monitor *scrub_monitor = NULL;
+ br_private_t *priv = this->private;
+
+ scrub_monitor = &priv->scrub_monitor;
+
+ /* Lock order should be wakelock and then child lock to
+ * dead locks.
+ */
+ pthread_mutex_lock (&scrub_monitor->wakelock);
+ {
+ pthread_mutex_lock (&child->lock);
+ {
+ if (!_br_is_child_connected (child))
+ goto unblock;
+
+ /* child is on death row.. */
+ _br_set_child_state (child, BR_CHILD_STATE_DISCONNECTED);
+
+ if (priv->iamscrubber)
+ ret = br_cleanup_scrubber (this, child);
+ else
+ ret = br_cleanup_signer (this, child);
+ }
+ unblock:
+ pthread_mutex_unlock (&child->lock);
+ }
+ pthread_mutex_unlock (&scrub_monitor->wakelock);
+
+ return ret;
+}
+
+/**
+ * This function is executed in a separate thread. The thread gets the
+ * brick from where CHILD_UP has received from the queue and gets the
+ * information regarding that brick (such as brick path).
+ */
+void *
+br_handle_events (void *arg)
+{
+ int32_t ret = 0;
+ xlator_t *this = NULL;
+ br_private_t *priv = NULL;
+ br_child_t *child = NULL;
+ struct br_child_event *childev = NULL;
+
+ this = arg;
+ priv = this->private;
+
+ /*
+ * Since, this is the topmost xlator, THIS has to be set by bit-rot
+ * xlator itself (STACK_WIND wont help in this case). Also it has
+ * to be done for each thread that gets spawned. Otherwise, a new
+ * thread will get global_xlator's pointer when it does "THIS".
+ */
+ THIS = this;
+
+ while (1) {
+ pthread_mutex_lock (&priv->lock);
+ {
+ while (list_empty (&priv->bricks))
+ pthread_cond_wait (&priv->cond, &priv->lock);
+
+ childev = list_first_entry
+ (&priv->bricks, struct br_child_event, list);
+ list_del_init (&childev->list);
+ }
+ pthread_mutex_unlock (&priv->lock);
+
+ child = childev->child;
+ ret = childev->call (this, child);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ BRB_MSG_SUBVOL_CONNECT_FAILED,
+ "callback handler for subvolume [%s] failed",
+ child->xl->name);
+ GF_FREE (childev);
+ }
+
+ return NULL;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int32_t ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_br_stub_mt_end + 1);
+
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, BRB_MSG_MEM_ACNT_FAILED,
+ "Memory accounting init failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+static void
+_br_qchild_event (xlator_t *this, br_child_t *child, br_child_handler *call)
+{
+ br_private_t *priv = NULL;
+ struct br_child_event *childev = NULL;
+
+ priv = this->private;
+
+ childev = GF_CALLOC (1, sizeof (*childev), gf_br_mt_br_child_event_t);
+ if (!childev) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY,
+ "Event unhandled for child.. [Brick: %s]",
+ child->xl->name);
+ return;
+ }
+
+ INIT_LIST_HEAD (&childev->list);
+ childev->this = this;
+ childev->child = child;
+ childev->call = call;
+
+ list_add_tail (&childev->list, &priv->bricks);
+}
+
+int
+br_scrubber_status_get (xlator_t *this, dict_t **dict)
+{
+
+ int ret = -1;
+ br_private_t *priv = NULL;
+ struct br_scrub_stats *scrub_stats = NULL;
+
+ priv = this->private;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot", priv, out);
+
+ scrub_stats = &priv->scrub_stat;
+
+ ret = br_get_bad_objects_list (this, dict);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Failed to collect corrupt "
+ "files");
+ }
+
+ ret = dict_set_int8 (*dict, "scrub-running",
+ scrub_stats->scrub_running);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Failed setting scrub_running "
+ "entry to the dictionary");
+ }
+
+ ret = dict_set_uint64 (*dict, "scrubbed-files",
+ scrub_stats->scrubbed_files);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Failed to setting scrubbed file "
+ "entry to the dictionary");
+ }
+
+ ret = dict_set_uint64 (*dict, "unsigned-files",
+ scrub_stats->unsigned_files);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Failed to set unsigned file count"
+ " entry to the dictionary");
+ }
+
+ ret = dict_set_uint64 (*dict, "scrub-duration",
+ scrub_stats->scrub_duration);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Failed to set scrub duration"
+ " entry to the dictionary");
+ }
+
+ ret = dict_set_dynstr_with_alloc (*dict, "last-scrub-time",
+ scrub_stats->last_scrub_time);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Failed to set "
+ "last scrub time value");
+ }
+
+out:
+ return ret;
+}
+
+int
+notify (xlator_t *this, int32_t event, void *data, ...)
+{
+ int idx = -1;
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ br_child_t *child = NULL;
+ br_private_t *priv = NULL;
+ dict_t *output = NULL;
+ va_list ap;
+
+ subvol = (xlator_t *)data;
+ priv = this->private;
+
+ gf_msg_trace (this->name, 0, "Notification received: %d", event);
+
+ idx = br_find_child_index (this, subvol);
+
+ switch (event) {
+ case GF_EVENT_CHILD_UP:
+ if (idx < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ BRB_MSG_INVALID_SUBVOL, "Got event %d from "
+ "invalid subvolume", event);
+ goto out;
+ }
+
+ pthread_mutex_lock (&priv->lock);
+ {
+ child = &priv->children[idx];
+ if (child->child_up == 1)
+ goto unblock_0;
+ priv->up_children++;
+
+ child->child_up = 1;
+ child->xl = subvol;
+ if (!child->table)
+ child->table = inode_table_new (4096, subvol);
+
+ _br_qchild_event (this, child, br_brick_connect);
+ pthread_cond_signal (&priv->cond);
+ }
+ unblock_0:
+ pthread_mutex_unlock (&priv->lock);
+
+ if (priv->up_children == priv->child_count)
+ default_notify (this, event, data);
+ break;
+
+ case GF_EVENT_CHILD_DOWN:
+ if (idx < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ BRB_MSG_INVALID_SUBVOL_CHILD,
+ "Got event %d from invalid subvolume", event);
+ goto out;
+ }
+
+ pthread_mutex_lock (&priv->lock);
+ {
+ child = &priv->children[idx];
+ if (child->child_up == 0)
+ goto unblock_1;
+
+ child->child_up = 0;
+ priv->up_children--;
+
+ _br_qchild_event (this, child, br_brick_disconnect);
+ pthread_cond_signal (&priv->cond);
+ }
+ unblock_1:
+ pthread_mutex_unlock (&priv->lock);
+
+ if (priv->up_children == 0)
+ default_notify (this, event, data);
+ break;
+
+ case GF_EVENT_SCRUB_STATUS:
+ gf_msg_debug (this->name, GF_LOG_INFO, "BitRot scrub status "
+ "called");
+ va_start (ap, data);
+ output = va_arg (ap, dict_t *);
+ va_end (ap);
+
+ ret = br_scrubber_status_get (this, &output);
+ gf_msg_debug (this->name, 0, "returning %d", ret);
+ break;
+ default:
+ default_notify (this, event, data);
+ }
+
+ out:
+ return 0;
+}
+
+/**
+ * Initialize signer specific structures, spawn worker threads.
+ */
+
+static void
+br_fini_signer (xlator_t *this, br_private_t *priv)
+{
+ int i = 0;
+
+ for (; i < BR_WORKERS; i++) {
+ (void) gf_thread_cleanup_xint (priv->obj_queue->workers[i]);
+ }
+
+ pthread_cond_destroy (&priv->object_cond);
+}
+
+static int32_t
+br_init_signer (xlator_t *this, br_private_t *priv)
+{
+ int i = 0;
+ int32_t ret = -1;
+
+ /* initialize gfchangelog xlator context */
+ ret = gf_changelog_init (this);
+ if (ret)
+ goto out;
+
+ pthread_cond_init (&priv->object_cond, NULL);
+
+ priv->obj_queue = GF_CALLOC (1, sizeof (*priv->obj_queue),
+ gf_br_mt_br_ob_n_wk_t);
+ if (!priv->obj_queue)
+ goto cleanup_cond;
+ INIT_LIST_HEAD (&priv->obj_queue->objects);
+
+ for (i = 0; i < BR_WORKERS; i++) {
+ ret = gf_thread_create (&priv->obj_queue->workers[i], NULL,
+ br_process_object, this);
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
+ BRB_MSG_SPAWN_FAILED, "thread creation"
+ " failed");
+ ret = -1;
+ goto cleanup_threads;
+ }
+ }
+
+ return 0;
+
+ cleanup_threads:
+ for (i--; i >= 0; i--) {
+ (void) gf_thread_cleanup_xint (priv->obj_queue->workers[i]);
+ }
+
+ GF_FREE (priv->obj_queue);
+
+ cleanup_cond:
+ /* that's explicit */
+ pthread_cond_destroy (&priv->object_cond);
+ out:
+ return -1;
+}
+
+/**
+ * For signer, only rate limit CPU usage (during hash calculation) when
+ * compiled with -DBR_RATE_LIMIT_SIGNER cflags, else let it run full
+ * throttle.
+ */
+static int32_t
+br_rate_limit_signer (xlator_t *this, int child_count, int numbricks)
+{
+ br_private_t *priv = NULL;
+ tbf_opspec_t spec = {0,};
+
+ priv = this->private;
+
+ spec.op = TBF_OP_HASH;
+ spec.rate = 0;
+ spec.maxlimit = 0;
+
+/**
+ * OK. Most implementations of TBF I've come across generate tokens
+ * every second (UML, etc..) and some chose sub-second granularity
+ * (blk-iothrottle cgroups). TBF algorithm itself does not enforce
+ * any logic for choosing generation interval and it seems pretty
+ * logical as one could jack up token count per interval w.r.t.
+ * generation rate.
+ *
+ * Value used here is chosen based on a series of test(s) performed
+ * to balance object signing time and not maxing out on all available
+ * CPU cores. It's obvious to have seconds granularity and jack up
+ * token count per interval, thereby achieving close to similar
+ * results. Let's stick to this as it seems to be working fine for
+ * the set of ops that are throttled.
+ **/
+ spec.token_gen_interval = 600000; /* In usec */
+
+
+#ifdef BR_RATE_LIMIT_SIGNER
+
+ double contribution = 0;
+ contribution = ((double)1 - ((double)child_count / (double)numbricks));
+ if (contribution == 0)
+ contribution = 1;
+ spec.rate = BR_HASH_CALC_READ_SIZE * contribution;
+ spec.maxlimit = BR_WORKERS * BR_HASH_CALC_READ_SIZE;
+
+#endif
+
+ if (!spec.rate)
+ gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_RATE_LIMIT_INFO,
+ "[Rate Limit Info] \"FULL THROTTLE\"");
+ else
+ gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_RATE_LIMIT_INFO,
+ "[Rate Limit Info] \"tokens/sec (rate): %lu, "
+ "maxlimit: %lu\"", spec.rate, spec.maxlimit);
+
+ priv->tbf = tbf_init (&spec, 1);
+ return priv->tbf ? 0 : -1;
+}
+
+static int32_t
+br_signer_handle_options (xlator_t *this, br_private_t *priv, dict_t *options)
+{
+ if (options)
+ GF_OPTION_RECONF ("expiry-time", priv->expiry_time,
+ options, uint32, error_return);
+ else
+ GF_OPTION_INIT ("expiry-time", priv->expiry_time,
+ uint32, error_return);
+
+ return 0;
+
+error_return:
+ return -1;
+}
+
+static int32_t
+br_signer_init (xlator_t *this, br_private_t *priv)
+{
+ int32_t ret = 0;
+ int numbricks = 0;
+
+ GF_OPTION_INIT ("expiry-time", priv->expiry_time, uint32, error_return);
+ GF_OPTION_INIT ("brick-count", numbricks, int32, error_return);
+
+ ret = br_rate_limit_signer (this, priv->child_count, numbricks);
+ if (ret)
+ goto error_return;
+
+ ret = br_init_signer (this, priv);
+ if (ret)
+ goto cleanup_tbf;
+
+ return 0;
+
+ cleanup_tbf:
+ /* cleanup TBF */
+ error_return:
+ return -1;
+
+}
+
+static void
+br_free_scrubber_monitor (xlator_t *this, br_private_t *priv)
+{
+ struct br_monitor *scrub_monitor = &priv->scrub_monitor;
+
+ if (scrub_monitor->timer) {
+ (void) gf_tw_del_timer (priv->timer_wheel, scrub_monitor->timer);
+
+ GF_FREE (scrub_monitor->timer);
+ scrub_monitor->timer = NULL;
+ }
+
+ (void) gf_thread_cleanup_xint (scrub_monitor->thread);
+
+ /* Clean up cond and mutex variables */
+ pthread_mutex_destroy (&scrub_monitor->mutex);
+ pthread_cond_destroy (&scrub_monitor->cond);
+
+ pthread_mutex_destroy (&scrub_monitor->wakelock);
+ pthread_cond_destroy (&scrub_monitor->wakecond);
+
+ pthread_mutex_destroy (&scrub_monitor->donelock);
+ pthread_cond_destroy (&scrub_monitor->donecond);
+
+ LOCK_DESTROY (&scrub_monitor->lock);
+}
+
+static void
+br_free_children (xlator_t *this, br_private_t *priv, int count)
+{
+ br_child_t *child = NULL;
+
+ for (--count; count >= 0; count--) {
+ child = &priv->children[count];
+ mem_pool_destroy (child->timer_pool);
+ pthread_mutex_destroy (&child->lock);
+ }
+
+ GF_FREE (priv->children);
+ priv->children = NULL;
+}
+
+static int
+br_init_children (xlator_t *this, br_private_t *priv)
+{
+ int i = 0;
+ br_child_t *child = NULL;
+ xlator_list_t *trav = NULL;
+
+ priv->child_count = xlator_subvolume_count (this);
+ priv->children = GF_CALLOC (priv->child_count, sizeof (*priv->children),
+ gf_br_mt_br_child_t);
+ if (!priv->children)
+ goto err;
+
+ trav = this->children;
+ while (trav) {
+ child = &priv->children[i];
+
+ pthread_mutex_init (&child->lock, NULL);
+ child->witnessed = 0;
+
+ br_set_child_state (child, BR_CHILD_STATE_DISCONNECTED);
+
+ child->this = this;
+ child->xl = trav->xlator;
+
+ child->timer_pool = mem_pool_new
+ (struct gf_tw_timer_list, 4096);
+ if (!child->timer_pool) {
+ gf_msg (this->name, GF_LOG_ERROR,
+ ENOMEM, BRB_MSG_NO_MEMORY,
+ "failed to allocate mem-pool for timer");
+ errno = ENOMEM;
+ goto freechild;
+ }
+
+ INIT_LIST_HEAD (&child->list);
+
+ i++;
+ trav = trav->next;
+ }
+
+ return 0;
+
+ freechild:
+ br_free_children (this, priv, i);
+ err:
+ return -1;
+}
+
+int32_t
+init (xlator_t *this)
+{
+ int32_t ret = -1;
+ br_private_t *priv = NULL;
+
+ if (!this->children) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_NO_CHILD,
+ "FATAL: no children");
+ goto out;
+ }
+
+ priv = GF_CALLOC (1, sizeof (*priv), gf_br_mt_br_private_t);
+ if (!priv) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY,
+ "failed to allocate memory (->priv)");
+ goto out;
+ }
+
+ GF_OPTION_INIT ("scrubber", priv->iamscrubber, bool, out);
+
+ ret = br_init_children (this, priv);
+ if (ret)
+ goto free_priv;
+
+ pthread_mutex_init (&priv->lock, NULL);
+ pthread_cond_init (&priv->cond, NULL);
+
+ INIT_LIST_HEAD (&priv->bricks);
+ INIT_LIST_HEAD (&priv->signing);
+
+ priv->timer_wheel = glusterfs_global_timer_wheel (this);
+ if (!priv->timer_wheel) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ BRB_MSG_TIMER_WHEEL_UNAVAILABLE,
+ "global timer wheel unavailable");
+ goto cleanup;
+ }
+
+ this->private = priv;
+
+ if (!priv->iamscrubber) {
+ ret = br_signer_init (this, priv);
+ if (!ret)
+ ret = br_signer_handle_options (this, priv, NULL);
+ } else {
+ ret = br_scrubber_init (this, priv);
+ if (!ret)
+ ret = br_scrubber_handle_options (this, priv, NULL);
+ }
+
+ if (ret)
+ goto cleanup;
+
+ ret = gf_thread_create (&priv->thread, NULL, br_handle_events, this);
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
+ BRB_MSG_SPAWN_FAILED, "thread creation failed");
+ ret = -1;
+ }
+
+ if (!ret) {
+ gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_BITROT_LOADED,
+ "bit-rot xlator loaded in \"%s\" mode",
+ (priv->iamscrubber) ? "SCRUBBER" : "SIGNER");
+ return 0;
+ }
+
+ cleanup:
+ (void) pthread_cond_destroy (&priv->cond);
+ (void) pthread_mutex_destroy (&priv->lock);
+
+ br_free_children (this, priv, priv->child_count);
+
+ free_priv:
+ GF_FREE (priv);
+ out:
+ this->private = NULL;
+ return -1;
+}
+
+void
+fini (xlator_t *this)
+{
+ br_private_t *priv = this->private;
+
+ if (!priv)
+ return;
+
+ if (!priv->iamscrubber)
+ br_fini_signer (this, priv);
+ else
+ (void) br_free_scrubber_monitor (this, priv);
+
+ br_free_children (this, priv, priv->child_count);
+
+ this->private = NULL;
+ GF_FREE (priv);
+
+ return;
+}
+
+static void
+br_reconfigure_monitor (xlator_t *this)
+{
+ int32_t ret = 0;
+
+ ret = br_scrub_state_machine (this);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ BRB_MSG_RESCHEDULE_SCRUBBER_FAILED,
+ "Could not reschedule scrubber for the volume. Scrubbing "
+ "will continue according to old frequency.");
+ }
+}
+
+static int
+br_reconfigure_scrubber (xlator_t *this, dict_t *options)
+{
+ int32_t ret = -1;
+ br_private_t *priv = NULL;
+
+ priv = this->private;
+
+ pthread_mutex_lock (&priv->lock);
+ {
+ ret = br_scrubber_handle_options (this, priv, options);
+ }
+ pthread_mutex_unlock (&priv->lock);
+
+ if (ret)
+ goto err;
+
+ /* change state for all _up_ subvolume(s) */
+ pthread_mutex_lock (&priv->lock);
+ {
+ br_reconfigure_monitor (this);
+ }
+ pthread_mutex_unlock (&priv->lock);
+
+ err:
+ return ret;
+}
+
+static int
+br_reconfigure_signer (xlator_t *this, dict_t *options)
+{
+ br_private_t *priv = this->private;
+
+ return br_signer_handle_options (this, priv, options);
+}
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ int ret = 0;
+ br_private_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->iamscrubber)
+ ret = br_reconfigure_scrubber (this, options);
+ else
+ ret = br_reconfigure_signer (this, options);
+
+ return ret;
+}
+
+struct xlator_fops fops;
+
+struct xlator_cbks cbks;
+
+struct volume_options options[] = {
+ { .key = {"expiry-time"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = SIGNING_TIMEOUT,
+ .description = "Waiting time for an object on which it waits "
+ "before it is signed",
+ },
+ { .key = {"brick-count"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Total number of bricks for the current node for "
+ "all volumes in the trusted storage pool.",
+ },
+ { .key = {"scrubber"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .description = "option to run as a scrubber",
+ },
+ { .key = {"scrub-throttle"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Scrub-throttle value is a measure of how fast "
+ "or slow the scrubber scrubs the filesystem for "
+ "volume <VOLNAME>",
+ },
+ { .key = {"scrub-freq"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "biweekly",
+ .description = "Scrub frequency for volume <VOLNAME>",
+ },
+ { .key = {"scrub-state"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "active",
+ .description = "Pause/Resume scrub. Upon resume, scrubber "
+ "continues from where it left off.",
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.h b/xlators/features/bit-rot/src/bitd/bit-rot.h
new file mode 100644
index 00000000000..b5448f76d52
--- /dev/null
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.h
@@ -0,0 +1,307 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __BIT_ROT_H__
+#define __BIT_ROT_H__
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "defaults.h"
+#include "syncop.h"
+#include "syncop-utils.h"
+#include "changelog.h"
+#include "timer-wheel.h"
+
+#include "throttle-tbf.h"
+#include "bit-rot-ssm.h"
+
+#include "bit-rot-common.h"
+#include "bit-rot-stub-mem-types.h"
+#include "bit-rot-scrub-status.h"
+
+#include <openssl/sha.h>
+
+/**
+ * TODO: make this configurable. As a best practice, set this to the
+ * number of processor cores.
+ */
+#define BR_WORKERS 4
+
+typedef enum scrub_throttle {
+ BR_SCRUB_THROTTLE_VOID = -1,
+ BR_SCRUB_THROTTLE_LAZY = 0,
+ BR_SCRUB_THROTTLE_NORMAL = 1,
+ BR_SCRUB_THROTTLE_AGGRESSIVE = 2,
+ BR_SCRUB_THROTTLE_STALLED = 3,
+} scrub_throttle_t;
+
+typedef enum scrub_freq {
+ BR_FSSCRUB_FREQ_HOURLY = 1,
+ BR_FSSCRUB_FREQ_DAILY,
+ BR_FSSCRUB_FREQ_WEEKLY,
+ BR_FSSCRUB_FREQ_BIWEEKLY,
+ BR_FSSCRUB_FREQ_MONTHLY,
+ BR_FSSCRUB_FREQ_MINUTE,
+ BR_FSSCRUB_FREQ_STALLED,
+} scrub_freq_t;
+
+#define signature_size(hl) (sizeof (br_isignature_t) + hl + 1)
+
+struct br_scanfs {
+ gf_lock_t entrylock;
+
+ pthread_mutex_t waitlock;
+ pthread_cond_t waitcond;
+
+ unsigned int entries;
+ struct list_head queued;
+ struct list_head ready;
+};
+
+/* just need three states to track child status */
+typedef enum br_child_state {
+ BR_CHILD_STATE_CONNECTED = 1,
+ BR_CHILD_STATE_INITIALIZING,
+ BR_CHILD_STATE_CONNFAILED,
+ BR_CHILD_STATE_DISCONNECTED,
+} br_child_state_t;
+
+struct br_child {
+ pthread_mutex_t lock; /* protects child state */
+ char witnessed; /* witnessed at least one succesfull
+ connection */
+ br_child_state_t c_state; /* current state of this child */
+
+ char child_up; /* Indicates whether this child is
+ up or not */
+ xlator_t *xl; /* client xlator corresponding to
+ this child */
+ inode_table_t *table; /* inode table for this child */
+ char brick_path[PATH_MAX]; /* brick export directory of this
+ child */
+ struct list_head list; /* hook to attach to the list of
+ UP children */
+ xlator_t *this; /* Bit rot xlator */
+
+ pthread_t thread; /* initial crawler for unsigned
+ object(s) or scrub crawler */
+ int threadrunning; /* active thread */
+
+ struct mem_pool *timer_pool; /* timer-wheel's timer mem-pool */
+
+ struct timeval tv;
+
+ struct br_scanfs fsscan; /* per subvolume FS scanner */
+
+ gf_boolean_t active_scrubbing; /* Actively scrubbing or not */
+};
+
+typedef struct br_child br_child_t;
+
+struct br_obj_n_workers {
+ struct list_head objects; /* queue of objects expired from the
+ timer wheel and ready to be picked
+ up for signing */
+ pthread_t workers[BR_WORKERS]; /* Threads which pick up the objects
+ from the above queue and start
+ signing each object */
+};
+
+struct br_scrubber {
+ xlator_t *this;
+
+ scrub_throttle_t throttle;
+
+ /**
+ * frequency of scanning for this subvolume. this should
+ * normally be per-child, but since all childs follow the
+ * same frequency for a volume, this option ends up here
+ * instead of br_child_t.
+ */
+ scrub_freq_t frequency;
+
+ gf_boolean_t frequency_reconf;
+ gf_boolean_t throttle_reconf;
+
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+
+ unsigned int nr_scrubbers;
+ struct list_head scrubbers;
+
+ /**
+ * list of "rotatable" subvolume(s) undergoing scrubbing
+ */
+ struct list_head scrublist;
+};
+
+struct br_monitor {
+ gf_lock_t lock;
+ pthread_t thread; /* Monitor thread */
+
+ gf_boolean_t inited;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond; /* Thread starts and will be waiting on cond.
+ First child which is up wakes this up */
+
+ xlator_t *this;
+ /* scheduler */
+ uint32_t boot;
+
+ int32_t active_child_count; /* Number of children currently scrubbing */
+ gf_boolean_t kick; /* This variable tracks the scrubber is
+ * kicked or not. Both 'kick' and
+ * 'active_child_count' uses the same pair
+ * of mutex-cond variable, i.e, wakelock and
+ * wakecond. */
+
+ pthread_mutex_t wakelock;
+ pthread_cond_t wakecond;
+
+ gf_boolean_t done;
+ pthread_mutex_t donelock;
+ pthread_cond_t donecond;
+
+ struct gf_tw_timer_list *timer;
+ br_scrub_state_t state; /* current scrub state */
+};
+
+typedef struct br_obj_n_workers br_obj_n_workers_t;
+
+typedef struct br_private br_private_t;
+
+typedef void (*br_scrubbed_file_update) (br_private_t *priv);
+
+struct br_private {
+ pthread_mutex_t lock;
+
+ struct list_head bricks; /* list of bricks from which enents
+ have been received */
+
+ struct list_head signing;
+
+ pthread_cond_t object_cond; /* handling signing of objects */
+ int child_count;
+ br_child_t *children; /* list of subvolumes */
+ int up_children;
+
+ pthread_cond_t cond; /* handling CHILD_UP notifications */
+ pthread_t thread; /* thread for connecting each UP
+ child with changelog */
+
+ struct tvec_base *timer_wheel; /* timer wheel where the objects which
+ changelog has sent sits and waits
+ for expiry */
+ br_obj_n_workers_t *obj_queue; /* place holder for all the objects
+ that are expired from timer wheel
+ and ready to be picked up for
+ signing and the workers which sign
+ the objects */
+
+ uint32_t expiry_time; /* objects "wait" time */
+
+ tbf_t *tbf; /* token bucket filter */
+
+ gf_boolean_t iamscrubber; /* function as a fs scrubber */
+
+ struct br_scrub_stats scrub_stat; /* statistics of scrub*/
+
+ struct br_scrubber fsscrub; /* scrubbers for this subvolume */
+
+ struct br_monitor scrub_monitor; /* scrubber monitor */
+};
+
+struct br_object {
+ xlator_t *this;
+
+ uuid_t gfid;
+
+ unsigned long signedversion; /* version aginst which this object will
+ be signed */
+ br_child_t *child; /* object's subvolume */
+
+ int sign_info;
+
+ struct list_head list; /* hook to add to the queue once the
+ object is expired from timer wheel */
+ void *data;
+};
+
+typedef struct br_object br_object_t;
+typedef int32_t (br_scrub_ssm_call) (xlator_t *);
+
+void
+br_log_object (xlator_t *, char *, uuid_t, int32_t);
+
+void
+br_log_object_path (xlator_t *, char *, const char *, int32_t);
+
+int32_t
+br_calculate_obj_checksum (unsigned char *,
+ br_child_t *, fd_t *, struct iatt *);
+
+int32_t
+br_prepare_loc (xlator_t *, br_child_t *, loc_t *, gf_dirent_t *, loc_t *);
+
+gf_boolean_t
+bitd_is_bad_file (xlator_t *, br_child_t *, loc_t *, fd_t *);
+
+static inline void
+_br_set_child_state (br_child_t *child, br_child_state_t state)
+{
+ child->c_state = state;
+}
+
+static inline int
+_br_is_child_connected (br_child_t *child)
+{
+ return (child->c_state == BR_CHILD_STATE_CONNECTED);
+}
+
+static inline int
+_br_is_child_scrub_active (br_child_t *child)
+{
+ return child->active_scrubbing;
+}
+
+static inline int
+_br_child_failed_conn (br_child_t *child)
+{
+ return (child->c_state == BR_CHILD_STATE_CONNFAILED);
+}
+
+static inline int
+_br_child_witnessed_connection (br_child_t *child)
+{
+ return (child->witnessed == 1);
+}
+
+/* scrub state */
+static inline void
+_br_monitor_set_scrub_state (struct br_monitor *scrub_monitor,
+ br_scrub_state_t state)
+{
+ scrub_monitor->state = state;
+}
+
+static inline br_scrub_event_t
+_br_child_get_scrub_event (struct br_scrubber *fsscrub)
+{
+ return (fsscrub->frequency == BR_FSSCRUB_FREQ_STALLED)
+ ? BR_SCRUB_EVENT_PAUSE : BR_SCRUB_EVENT_SCHEDULE;
+}
+
+int32_t
+br_get_bad_objects_list (xlator_t *this, dict_t **dict);
+
+
+#endif /* __BIT_ROT_H__ */
diff --git a/xlators/features/bit-rot/src/stub/Makefile.am b/xlators/features/bit-rot/src/stub/Makefile.am
new file mode 100644
index 00000000000..7e4b6837eec
--- /dev/null
+++ b/xlators/features/bit-rot/src/stub/Makefile.am
@@ -0,0 +1,17 @@
+xlator_LTLIBRARIES = bitrot-stub.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+bitrot_stub_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+bitrot_stub_la_SOURCES = bit-rot-stub-helpers.c bit-rot-stub.c
+bitrot_stub_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = bit-rot-stub.h bit-rot-common.h bit-rot-stub-mem-types.h \
+ bit-rot-object-version.h bit-rot-stub-messages.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_srcdir)/rpc/rpc-lib/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-common.h b/xlators/features/bit-rot/src/stub/bit-rot-common.h
new file mode 100644
index 00000000000..2afc9f47c29
--- /dev/null
+++ b/xlators/features/bit-rot/src/stub/bit-rot-common.h
@@ -0,0 +1,179 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __BIT_ROT_COMMON_H__
+#define __BIT_ROT_COMMON_H__
+
+#include "glusterfs.h"
+#include "bit-rot-object-version.h"
+
+#define BR_VXATTR_VERSION (1 << 0)
+#define BR_VXATTR_SIGNATURE (1 << 1)
+
+#define BR_VXATTR_SIGN_MISSING (BR_VXATTR_SIGNATURE)
+#define BR_VXATTR_ALL_MISSING \
+ (BR_VXATTR_VERSION | BR_VXATTR_SIGNATURE)
+
+#define BR_BAD_OBJ_CONTAINER (uuid_t){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8}
+
+typedef enum br_vxattr_state {
+ BR_VXATTR_STATUS_FULL = 0,
+ BR_VXATTR_STATUS_MISSING = 1,
+ BR_VXATTR_STATUS_UNSIGNED = 2,
+ BR_VXATTR_STATUS_INVALID = 3,
+} br_vxattr_status_t;
+
+typedef enum br_sign_state {
+ BR_SIGN_INVALID = -1,
+ BR_SIGN_NORMAL = 0,
+ BR_SIGN_REOPEN_WAIT = 1,
+ BR_SIGN_QUICK = 2,
+} br_sign_state_t;
+
+static inline br_vxattr_status_t
+br_version_xattr_state (dict_t *xattr, br_version_t **obuf,
+ br_signature_t **sbuf, gf_boolean_t *objbad)
+{
+ int32_t ret = 0;
+ int32_t vxattr = 0;
+ br_vxattr_status_t status;
+ void *data = NULL;
+
+ /**
+ * The key being present in the dict indicates the xattr was set on
+ * disk. The presence of xattr itself as of now is suffecient to say
+ * the the object is bad.
+ */
+ *objbad = _gf_false;
+ ret = dict_get_bin (xattr, BITROT_OBJECT_BAD_KEY, (void **)&data);
+ if (!ret)
+ *objbad = _gf_true;
+
+ ret = dict_get_bin (xattr, BITROT_CURRENT_VERSION_KEY, (void **)obuf);
+ if (ret)
+ vxattr |= BR_VXATTR_VERSION;
+
+ ret = dict_get_bin (xattr, BITROT_SIGNING_VERSION_KEY, (void **)sbuf);
+ if (ret)
+ vxattr |= BR_VXATTR_SIGNATURE;
+
+ switch (vxattr) {
+ case 0:
+ status = BR_VXATTR_STATUS_FULL;
+ break;
+ case BR_VXATTR_SIGN_MISSING:
+ status = BR_VXATTR_STATUS_UNSIGNED;
+ break;
+ case BR_VXATTR_ALL_MISSING:
+ status = BR_VXATTR_STATUS_MISSING;
+ break;
+ default:
+ status = BR_VXATTR_STATUS_INVALID;
+ }
+
+ return status;
+}
+
+/**
+ * in-memory representation of signature used by signer for object
+ * signing.
+ */
+typedef struct br_isignature_in {
+ int8_t signaturetype; /* signature type */
+
+ unsigned long signedversion; /* version against which the
+ object was signed */
+
+ size_t signaturelen; /* signature length */
+ char signature[0]; /* object signature */
+} br_isignature_t;
+
+/**
+ * in-memory representation of signature used by scrubber for object
+ * verification.
+ */
+typedef struct br_isignature_out {
+ char stale; /* stale signature? */
+
+ unsigned long version; /* current signed version */
+
+ uint32_t time[2]; /* time when the object
+ got dirtied */
+
+ int8_t signaturetype; /* hash type */
+ size_t signaturelen; /* signature length */
+ char signature[0]; /* signature (hash) */
+} br_isignature_out_t;
+
+typedef struct br_stub_init {
+ uint32_t timebuf[2];
+ char export[PATH_MAX];
+} br_stub_init_t;
+
+typedef enum {
+ BR_SIGNATURE_TYPE_VOID = -1, /* object is not signed */
+ BR_SIGNATURE_TYPE_ZERO = 0, /* min boundary */
+ BR_SIGNATURE_TYPE_SHA256 = 1, /* signed with SHA256 */
+ BR_SIGNATURE_TYPE_MAX = 2, /* max boundary */
+} br_signature_type;
+
+/* BitRot stub start time (virtual xattr) */
+#define GLUSTERFS_GET_BR_STUB_INIT_TIME "trusted.glusterfs.bit-rot.stub-init"
+
+/* signing/reopen hint */
+#define BR_OBJECT_RESIGN 0
+#define BR_OBJECT_REOPEN 1
+#define BR_REOPEN_SIGN_HINT_KEY "trusted.glusterfs.bit-rot.reopen-hint"
+
+static inline int
+br_is_signature_type_valid (int8_t signaturetype)
+{
+ return ((signaturetype > BR_SIGNATURE_TYPE_ZERO)
+ && (signaturetype < BR_SIGNATURE_TYPE_MAX));
+}
+
+static inline void
+br_set_default_ongoingversion (br_version_t *buf, uint32_t *tv)
+{
+ buf->ongoingversion = BITROT_DEFAULT_CURRENT_VERSION;
+ buf->timebuf[0] = tv[0];
+ buf->timebuf[1] = tv[1];
+}
+
+static inline void
+br_set_default_signature (br_signature_t *buf, size_t *size)
+{
+ buf->signaturetype = (int8_t) BR_SIGNATURE_TYPE_VOID;
+ buf->signedversion = BITROT_DEFAULT_SIGNING_VERSION;
+
+ *size = sizeof (br_signature_t); /* no signature */
+}
+
+static inline void
+br_set_ongoingversion (br_version_t *buf,
+ unsigned long version, uint32_t *tv)
+{
+ buf->ongoingversion = version;
+ buf->timebuf[0] = tv[0];
+ buf->timebuf[1] = tv[1];
+}
+
+static inline void
+br_set_signature (br_signature_t *buf,
+ br_isignature_t *sign, size_t signaturelen, size_t *size)
+{
+ buf->signaturetype = sign->signaturetype;
+ buf->signedversion = ntohl (sign->signedversion);
+
+ memcpy (buf->signature, sign->signature, signaturelen);
+ *size = sizeof (br_signature_t) + signaturelen;
+}
+
+#endif /* __BIT_ROT_COMMON_H__ */
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-object-version.h b/xlators/features/bit-rot/src/stub/bit-rot-object-version.h
new file mode 100644
index 00000000000..1f2497aebe9
--- /dev/null
+++ b/xlators/features/bit-rot/src/stub/bit-rot-object-version.h
@@ -0,0 +1,30 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __BIT_ROT_OBJECT_VERSION_H
+#define __BIT_ROT_OBJECT_VERSION_H
+
+/**
+ * on-disk formats for ongoing version and object signature.
+ */
+typedef struct br_version {
+ unsigned long ongoingversion;
+ uint32_t timebuf[2];
+} br_version_t;
+
+typedef struct __attribute__ ((__packed__)) br_signature {
+ int8_t signaturetype;
+
+ unsigned long signedversion;
+
+ char signature[0];
+} br_signature_t;
+
+#endif
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-helpers.c b/xlators/features/bit-rot/src/stub/bit-rot-stub-helpers.c
new file mode 100644
index 00000000000..09f556db10b
--- /dev/null
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-helpers.c
@@ -0,0 +1,633 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "bit-rot-stub.h"
+
+br_stub_fd_t *
+br_stub_fd_new (void)
+{
+ br_stub_fd_t *br_stub_fd = NULL;
+
+ br_stub_fd = GF_CALLOC (1, sizeof (*br_stub_fd),
+ gf_br_stub_mt_br_stub_fd_t);
+
+ return br_stub_fd;
+}
+
+int
+__br_stub_fd_ctx_set (xlator_t *this, fd_t *fd, br_stub_fd_t *br_stub_fd)
+{
+ uint64_t value = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+ GF_VALIDATE_OR_GOTO (this->name, br_stub_fd, out);
+
+ value = (uint64_t)(long) br_stub_fd;
+
+ ret = __fd_ctx_set (fd, this, value);
+
+out:
+ return ret;
+}
+
+br_stub_fd_t *
+__br_stub_fd_ctx_get (xlator_t *this, fd_t *fd)
+{
+ br_stub_fd_t *br_stub_fd = NULL;
+ uint64_t value = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ ret = __fd_ctx_get (fd, this, &value);
+ if (ret)
+ return NULL;
+
+ br_stub_fd = (br_stub_fd_t *) ((long) value);
+
+out:
+ return br_stub_fd;
+}
+
+br_stub_fd_t *
+br_stub_fd_ctx_get (xlator_t *this, fd_t *fd)
+{
+ br_stub_fd_t *br_stub_fd = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ LOCK (&fd->lock);
+ {
+ br_stub_fd = __br_stub_fd_ctx_get (this, fd);
+ }
+ UNLOCK (&fd->lock);
+
+out:
+ return br_stub_fd;
+}
+
+int32_t
+br_stub_fd_ctx_set (xlator_t *this, fd_t *fd, br_stub_fd_t *br_stub_fd)
+{
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+ GF_VALIDATE_OR_GOTO (this->name, br_stub_fd, out);
+
+ LOCK (&fd->lock);
+ {
+ ret = __br_stub_fd_ctx_set (this, fd, br_stub_fd);
+ }
+ UNLOCK (&fd->lock);
+
+out:
+ return ret;
+}
+
+
+/**
+ * prints the path to the bad object's entry into the buffer provided.
+ * @priv: xlator private
+ * @filename: gfid of the bad object.
+ * @file_path: buffer provided into which path of the bad object is printed
+ * using above 2 arguments.
+ */
+static void
+br_stub_link_path (br_stub_private_t *priv, const char *filename,
+ char *file_path, size_t len)
+{
+ snprintf (file_path, len, "%s/%s", priv->stub_basepath, filename);
+}
+
+/**
+ * Prints the path of the object which acts as a container for all the bad
+ * objects. Each new entry corresponding to a bad object is a hard link to
+ * the object with name "stub-0000000000000008".
+ * @priv: xlator's private
+ * @stub_gfid_path: buffer into which the path to the container of bad objects
+ * is printed.
+ */
+static void
+br_stub_container_entry (br_stub_private_t *priv, char *stub_gfid_path,
+ size_t len)
+{
+
+ snprintf (stub_gfid_path, len, "%s/stub-%s", priv->stub_basepath,
+ uuid_utoa (priv->bad_object_dir_gfid));
+}
+
+/**
+ * Prints the path to the bad object's entry into the buffer provided.
+ * @priv: xlator private
+ * @gfid: gfid of the bad object.
+ * @gfid_path: buffer provided into which path of the bad object is printed
+ * using above 2 arguments.
+ * This function is same as br_stub_link_path. But in this function the
+ * gfid of the bad object is obtained as an argument (i.e. uuid_t gfid),
+ * where as in br_stub_link_path, the gfid is received as filename
+ * (i.e. char *filename)
+ */
+static void
+br_stub_linked_entry (br_stub_private_t *priv, char *gfid_path, uuid_t gfid,
+ size_t len)
+{
+ snprintf (gfid_path, len, "%s/%s", priv->stub_basepath,
+ uuid_utoa (gfid));
+}
+
+/**
+ * Adds an entry to the bad objects directory.
+ * @gfid: gfid of the bad object being added to the bad objects directory
+ */
+int
+br_stub_add (xlator_t *this, uuid_t gfid)
+{
+ char gfid_path[PATH_MAX] = {0};
+ char bad_gfid_path[PATH_MAX] = {0};
+ int ret = 0;
+ uuid_t index = {0};
+ br_stub_private_t *priv = NULL;
+ struct stat st = {0};
+ int fd = 0;
+
+ priv = this->private;
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name, !gf_uuid_is_null (gfid),
+ out, errno, EINVAL);
+
+ br_stub_linked_entry (priv, gfid_path, gfid, sizeof (gfid_path));
+
+ ret = sys_stat (gfid_path, &st);
+ if (!ret)
+ goto out;
+ br_stub_container_entry (priv, bad_gfid_path, sizeof (bad_gfid_path));
+
+ ret = sys_link (bad_gfid_path, gfid_path);
+ if (ret) {
+ if ((errno != ENOENT) && (errno != EMLINK) && (errno != EEXIST))
+ goto out;
+
+ /*
+ * Continue with success. At least we'll have half of the
+ * functionality, in the sense, object is marked bad and
+ * would be inaccessible. It's only scrub status that would
+ * show up less number of objects. That's fine as we'll have
+ * the log files that will have the missing information.
+ */
+ gf_msg (this->name, GF_LOG_WARNING, errno, BRS_MSG_LINK_FAIL,
+ "failed to record gfid [%s]", uuid_utoa (gfid));
+ }
+
+ return 0;
+out:
+ return -1;
+}
+
+int
+br_stub_del (xlator_t *this, uuid_t gfid)
+{
+ int32_t op_errno __attribute__((unused)) = 0;
+ br_stub_private_t *priv = NULL;
+ int ret = 0;
+ char gfid_path[PATH_MAX] = {0};
+
+ priv = this->private;
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name, !gf_uuid_is_null (gfid),
+ out, op_errno, EINVAL);
+ br_stub_linked_entry (priv, gfid_path, gfid,
+ sizeof (gfid_path));
+ ret = sys_unlink (gfid_path);
+ if (ret && (errno != ENOENT)) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ BRS_MSG_BAD_OBJ_UNLINK_FAIL,
+ "%s: failed to delete bad object link from quarantine "
+ "directory", gfid_path);
+ ret = -errno;
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static int
+br_stub_check_stub_directory (xlator_t *this, char *fullpath)
+{
+ int ret = 0;
+ struct stat st = {0,};
+
+ ret = sys_stat (fullpath, &st);
+ if (!ret && !S_ISDIR (st.st_mode))
+ goto error_return;
+ if (ret) {
+ if (errno != ENOENT)
+ goto error_return;
+ ret = mkdir_p (fullpath, 0600, _gf_true);
+ }
+
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ BRS_MSG_BAD_OBJECT_DIR_FAIL,
+ "failed to create stub directory [%s]", fullpath);
+ return ret;
+
+error_return:
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ BRS_MSG_BAD_OBJECT_DIR_FAIL,
+ "Failed to verify stub directory [%s]", fullpath);
+ return -1;
+}
+
+/**
+ * Function to create the container for the bad objects within the bad objects
+ * directory.
+ */
+static int
+br_stub_check_stub_file (xlator_t *this, char *path)
+{
+ int ret = 0;
+ int fd = -1;
+ struct stat st = {0,};
+
+ ret = sys_stat (path, &st);
+ if (!ret && !S_ISREG (st.st_mode))
+ goto error_return;
+ if (ret) {
+ if (errno != ENOENT)
+ goto error_return;
+ fd = sys_creat (path, 0);
+ if (fd < 0)
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ BRS_MSG_BAD_OBJECT_DIR_FAIL,
+ "Failed ot create stub file [%s]", path);
+ }
+
+ if (fd >= 0) {
+ sys_close (fd);
+ ret = 0;
+ }
+
+ return ret;
+
+error_return:
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ BRS_MSG_BAD_OBJECT_DIR_FAIL, "Failed ot verify stub file [%s]", path);
+ return -1;
+}
+
+int
+br_stub_dir_create (xlator_t *this, br_stub_private_t *priv)
+{
+ int ret = -1;
+ char fullpath[PATH_MAX] = {0};
+ char stub_gfid_path[PATH_MAX] = {0, };
+ char path[PATH_MAX] = {0};
+ size_t len = 0;
+
+ gf_uuid_copy (priv->bad_object_dir_gfid, BR_BAD_OBJ_CONTAINER);
+
+ snprintf (fullpath, sizeof (fullpath), "%s", priv->stub_basepath);
+
+ br_stub_container_entry (priv, stub_gfid_path, sizeof (stub_gfid_path));
+
+ ret = br_stub_check_stub_directory (this, fullpath);
+ if (ret)
+ goto out;
+ ret = br_stub_check_stub_file (this, stub_gfid_path);
+ if (ret)
+ goto out;
+
+ return 0;
+
+out:
+ return -1;
+}
+
+call_stub_t *
+__br_stub_dequeue (struct list_head *callstubs)
+{
+ call_stub_t *stub = NULL;
+
+ if (!list_empty (callstubs)) {
+ stub = list_entry (callstubs->next, call_stub_t, list);
+ list_del_init (&stub->list);
+ }
+
+ return stub;
+}
+
+void
+__br_stub_enqueue (struct list_head *callstubs, call_stub_t *stub)
+{
+ list_add_tail (&stub->list, callstubs);
+}
+
+void
+br_stub_worker_enqueue (xlator_t *this, call_stub_t *stub)
+{
+ br_stub_private_t *priv = NULL;
+
+ priv = this->private;
+ pthread_mutex_lock (&priv->container.bad_lock);
+ {
+ __br_stub_enqueue (&priv->container.bad_queue, stub);
+ pthread_cond_signal (&priv->container.bad_cond);
+ }
+ pthread_mutex_unlock (&priv->container.bad_lock);
+}
+
+void *
+br_stub_worker (void *data)
+{
+ br_stub_private_t *priv = NULL;
+ xlator_t *this = NULL;
+ call_stub_t *stub = NULL;
+ int ret = 0;
+
+
+ THIS = data;
+ this = data;
+ priv = this->private;
+
+ for (;;) {
+ pthread_mutex_lock (&priv->container.bad_lock);
+ {
+ while (list_empty (&priv->container.bad_queue)) {
+ ret = pthread_cond_wait (&priv->container.bad_cond,
+ &priv->container.bad_lock);
+ }
+
+ stub = __br_stub_dequeue (&priv->container.bad_queue);
+ }
+ pthread_mutex_unlock (&priv->container.bad_lock);
+
+ if (stub) /* guard against spurious wakeups */
+ call_resume (stub);
+ }
+
+ return NULL;
+}
+
+int32_t
+br_stub_lookup_wrapper (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xattr_req)
+{
+ br_stub_private_t *priv = NULL;
+ struct stat lstatbuf = {0};
+ int ret = 0;
+ int32_t op_errno = EINVAL;
+ int32_t op_ret = -1;
+ struct iatt stbuf = {0, };
+ struct iatt postparent = {0,};
+ dict_t *xattr = NULL;
+
+ priv = this->private;
+
+ VALIDATE_OR_GOTO (loc, done);
+ if (gf_uuid_compare (loc->gfid, priv->bad_object_dir_gfid))
+ goto done;
+
+ ret = sys_lstat (priv->stub_basepath, &lstatbuf);
+ if (ret) {
+ gf_msg_debug (this->name, errno, "Stat failed on stub bad "
+ "object dir");
+ op_errno = errno;
+ goto done;
+ } else if (!S_ISDIR (lstatbuf.st_mode)) {
+ gf_msg_debug (this->name, errno, "bad object container is not "
+ "a directory");
+ op_errno = ENOTDIR;
+ goto done;
+ }
+
+ iatt_from_stat (&stbuf, &lstatbuf);
+ gf_uuid_copy (stbuf.ia_gfid, priv->bad_object_dir_gfid);
+
+ op_ret = op_errno = 0;
+ xattr = dict_new ();
+ if (!xattr) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ }
+
+done:
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno,
+ loc->inode, &stbuf, xattr, &postparent);
+ if (xattr)
+ dict_unref (xattr);
+ return 0;
+}
+
+static int
+is_bad_gfid_file_current (char *filename, uuid_t gfid)
+{
+ char current_stub_gfid[GF_UUID_BUF_SIZE + 16] = {0, };
+
+ snprintf (current_stub_gfid, sizeof current_stub_gfid,
+ "stub-%s", uuid_utoa(gfid));
+ return (!strcmp(filename, current_stub_gfid));
+}
+
+static void
+check_delete_stale_bad_file (xlator_t *this, char *filename)
+{
+ int ret = 0;
+ struct stat st = {0};
+ char filepath[PATH_MAX] = {0};
+ br_stub_private_t *priv = NULL;
+
+ priv = this->private;
+
+ if (is_bad_gfid_file_current (filename, priv->bad_object_dir_gfid))
+ return;
+
+ br_stub_link_path (priv, filename, filepath, sizeof (filepath));
+
+ ret = sys_stat (filepath, &st);
+ if (!ret && st.st_nlink == 1)
+ sys_unlink (filepath);
+}
+
+static int
+br_stub_fill_readdir (fd_t *fd, br_stub_fd_t *fctx, DIR *dir, off_t off,
+ size_t size, gf_dirent_t *entries)
+{
+ off_t in_case = -1;
+ off_t last_off = 0;
+ size_t filled = 0;
+ int count = 0;
+ int32_t this_size = -1;
+ gf_dirent_t *this_entry = NULL;
+ xlator_t *this = NULL;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {{0,},};
+
+ this = THIS;
+ if (!off) {
+ rewinddir (dir);
+ } else {
+ seekdir (dir, off);
+#ifndef GF_LINUX_HOST_OS
+ if ((u_long)telldir(dir) != off &&
+ off != fctx->bad_object.dir_eof) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ BRS_MSG_BAD_OBJECT_DIR_SEEK_FAIL,
+ "seekdir(0x%llx) failed on dir=%p: "
+ "Invalid argument (offset reused from "
+ "another DIR * structure?)", off, dir);
+ errno = EINVAL;
+ count = -1;
+ goto out;
+ }
+#endif /* GF_LINUX_HOST_OS */
+ }
+
+ while (filled <= size) {
+ in_case = (u_long)telldir (dir);
+
+ if (in_case == -1) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ BRS_MSG_BAD_OBJECT_DIR_TELL_FAIL,
+ "telldir failed on dir=%p: %s",
+ dir, strerror (errno));
+ goto out;
+ }
+
+ errno = 0;
+ entry = sys_readdir (dir, scratch);
+ if (!entry || errno != 0) {
+ if (errno == EBADF) {
+ gf_msg (THIS->name, GF_LOG_WARNING, 0,
+ BRS_MSG_BAD_OBJECT_DIR_READ_FAIL,
+ "readdir failed on dir=%p: %s",
+ dir, strerror (errno));
+ goto out;
+ }
+ break;
+ }
+
+ if (!strcmp (entry->d_name, ".") ||
+ !strcmp (entry->d_name, ".."))
+ continue;
+
+ if (!strncmp (entry->d_name, "stub-",
+ strlen ("stub-"))) {
+ check_delete_stale_bad_file (this, entry->d_name);
+ continue;
+ }
+
+ this_size = max (sizeof (gf_dirent_t),
+ sizeof (gfs3_dirplist))
+ + strlen (entry->d_name) + 1;
+
+ if (this_size + filled > size) {
+ seekdir (dir, in_case);
+#ifndef GF_LINUX_HOST_OS
+ if ((u_long)telldir(dir) != in_case &&
+ in_case != fctx->bad_object.dir_eof) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ BRS_MSG_BAD_OBJECT_DIR_SEEK_FAIL,
+ "seekdir(0x%llx) failed on dir=%p: "
+ "Invalid argument (offset reused from "
+ "another DIR * structure?)",
+ in_case, dir);
+ errno = EINVAL;
+ count = -1;
+ goto out;
+ }
+#endif /* GF_LINUX_HOST_OS */
+ break;
+ }
+
+ this_entry = gf_dirent_for_name (entry->d_name);
+
+ if (!this_entry) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ BRS_MSG_NO_MEMORY,
+ "could not create gf_dirent for entry %s: (%s)",
+ entry->d_name, strerror (errno));
+ goto out;
+ }
+ /*
+ * we store the offset of next entry here, which is
+ * probably not intended, but code using syncop_readdir()
+ * (glfs-heal.c, afr-self-heald.c, pump.c) rely on it
+ * for directory read resumption.
+ */
+ last_off = (u_long)telldir(dir);
+ this_entry->d_off = last_off;
+ this_entry->d_ino = entry->d_ino;
+
+ list_add_tail (&this_entry->list, &entries->list);
+
+ filled += this_size;
+ count++;
+ }
+
+ if ((!sys_readdir (dir, scratch) && (errno == 0))) {
+ /* Indicate EOF */
+ errno = ENOENT;
+ /* Remember EOF offset for later detection */
+ fctx->bad_object.dir_eof = last_off;
+ }
+out:
+ return count;
+}
+
+int32_t
+br_stub_readdir_wrapper (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t off, dict_t *xdata)
+{
+ br_stub_fd_t *fctx = NULL;
+ DIR *dir = NULL;
+ int ret = -1;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int count = 0;
+ gf_dirent_t entries;
+
+ INIT_LIST_HEAD (&entries.list);
+
+ fctx = br_stub_fd_ctx_get (this, fd);
+ if (!fctx) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ BRS_MSG_GET_FD_CONTEXT_FAILED,
+ "pfd is NULL, fd=%p", fd);
+ op_errno = -ret;
+ goto done;
+ }
+
+ dir = fctx->bad_object.dir;
+
+ if (!dir) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ BRS_MSG_BAD_HANDLE_DIR_NULL,
+ "dir is NULL for fd=%p", fd);
+ op_errno = EINVAL;
+ goto done;
+ }
+
+ count = br_stub_fill_readdir (fd, fctx, dir, off, size, &entries);
+
+ /* pick ENOENT to indicate EOF */
+ op_errno = errno;
+ op_ret = count;
+done:
+ STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, &entries, xdata);
+ gf_dirent_free (&entries);
+ return 0;
+}
+
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h
new file mode 100644
index 00000000000..a33577cf598
--- /dev/null
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h
@@ -0,0 +1,34 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _BR_MEM_TYPES_H
+#define _BR_MEM_TYPES_H
+
+#include "mem-types.h"
+
+enum br_mem_types {
+ gf_br_stub_mt_private_t = gf_common_mt_end + 1,
+ gf_br_stub_mt_version_t,
+ gf_br_stub_mt_inode_ctx_t,
+ gf_br_stub_mt_signature_t,
+ gf_br_mt_br_private_t,
+ gf_br_mt_br_child_t,
+ gf_br_mt_br_object_t,
+ gf_br_mt_br_ob_n_wk_t,
+ gf_br_mt_br_scrubber_t,
+ gf_br_mt_br_fsscan_entry_t,
+ gf_br_stub_mt_br_stub_fd_t,
+ gf_br_stub_mt_br_scanner_freq_t,
+ gf_br_stub_mt_sigstub_t,
+ gf_br_mt_br_child_event_t,
+ gf_br_stub_mt_end,
+};
+
+#endif
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
new file mode 100644
index 00000000000..c0fcfd324a5
--- /dev/null
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
@@ -0,0 +1,271 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
+
+#ifndef _BITROT_STUB_MESSAGES_H_
+#define _BITROT_STUB_MESSAGES_H_
+
+#include "glfs-message-id.h"
+
+/* file bit-rot-stub-messages.h
+ * brief BIT-ROT log-message IDs and their descriptions
+ */
+
+/* NOTE: Rules for message additions
+ * 1) Each instance of a message is _better_ left with a unique message ID, even
+ * if the message format is the same. Reasoning is that, if the message
+ * format needs to change in one instance, the other instances are not
+ * impacted or the new change does not change the ID of the instance being
+ * modified.
+ * 2) Addition of a message,
+ * - Should increment the GLFS_NUM_MESSAGES
+ * - Append to the list of messages defined, towards the end
+ * - Retain macro naming as glfs_msg_X (for redability across developers)
+ * NOTE: Rules for message format modifications
+ * 3) Check acorss the code if the message ID macro in question is reused
+ * anywhere. If reused then then the modifications should ensure correctness
+ * everywhere, or needs a new message ID as (1) above was not adhered to. If
+ * not used anywhere, proceed with the required modification.
+ * NOTE: Rules for message deletion
+ * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used
+ * anywhere, then can be deleted, but will leave a hole by design, as
+ * addition rules specify modification to the end of the list and not filling
+ * holes.
+ */
+
+#define GLFS_BITROT_STUB_BASE GLFS_MSGID_COMP_BITROT_STUB
+#define GLFS_BITROT_STUB_NUM_MESSAGES 31
+#define GLFS_MSGID_END (GLFS_BITROT_STUB_BASE + \
+ GLFS_BITROT_STUB_NUM_MESSAGES + 1)
+/* Messaged with message IDs */
+#define glfs_msg_start_x GLFS_BITROT_STUB_BASE, "Invalid: Start of messages"
+/*------------*/
+
+
+#define BRS_MSG_NO_MEMORY (GLFS_BITROT_STUB_BASE + 1)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRS_MSG_SET_EVENT_FAILED (GLFS_BITROT_STUB_BASE + 2)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRS_MSG_MEM_ACNT_FAILED (GLFS_BITROT_STUB_BASE + 3)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRS_MSG_CREATE_FRAME_FAILED (GLFS_BITROT_STUB_BASE + 4)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRS_MSG_SET_CONTEXT_FAILED (GLFS_BITROT_STUB_BASE + 5)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRS_MSG_CHANGE_VERSION_FAILED (GLFS_BITROT_STUB_BASE + 6)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRS_MSG_ADD_FD_TO_LIST_FAILED (GLFS_BITROT_STUB_BASE + 7)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRS_MSG_SET_FD_CONTEXT_FAILED (GLFS_BITROT_STUB_BASE + 8)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRS_MSG_CREATE_ANONYMOUS_FD_FAILED (GLFS_BITROT_STUB_BASE + 9)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRS_MSG_NO_CHILD (GLFS_BITROT_STUB_BASE + 10)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRS_MSG_STUB_ALLOC_FAILED (GLFS_BITROT_STUB_BASE + 11)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRS_MSG_GET_INODE_CONTEXT_FAILED (GLFS_BITROT_STUB_BASE + 12)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRS_MSG_CANCEL_SIGN_THREAD_FAILED (GLFS_BITROT_STUB_BASE + 13)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRS_MSG_ADD_FD_TO_INODE (GLFS_BITROT_STUB_BASE + 14)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRS_MSG_SIGN_VERSION_ERROR (GLFS_BITROT_STUB_BASE + 15)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRS_MSG_BAD_OBJ_MARK_FAIL (GLFS_BITROT_STUB_BASE + 16)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRS_MSG_NON_SCRUB_BAD_OBJ_MARK (GLFS_BITROT_STUB_BASE + 17)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRS_MSG_REMOVE_INTERNAL_XATTR (GLFS_BITROT_STUB_BASE + 18)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRS_MSG_SET_INTERNAL_XATTR (GLFS_BITROT_STUB_BASE + 19)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRS_MSG_BAD_OBJECT_ACCESS (GLFS_BITROT_STUB_BASE + 20)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRS_MSG_BAD_CONTAINER_FAIL (GLFS_BITROT_STUB_BASE + 21)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRS_MSG_BAD_OBJECT_DIR_FAIL (GLFS_BITROT_STUB_BASE + 22)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRS_MSG_BAD_OBJECT_DIR_SEEK_FAIL (GLFS_BITROT_STUB_BASE + 23)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRS_MSG_BAD_OBJECT_DIR_TELL_FAIL (GLFS_BITROT_STUB_BASE + 24)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRS_MSG_BAD_OBJECT_DIR_READ_FAIL (GLFS_BITROT_STUB_BASE + 25)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRS_MSG_GET_FD_CONTEXT_FAILED (GLFS_BITROT_STUB_BASE + 26)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRS_MSG_BAD_HANDLE_DIR_NULL (GLFS_BITROT_STUB_BASE + 27)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRS_MSG_BAD_OBJ_THREAD_FAIL (GLFS_BITROT_STUB_BASE + 28)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRS_MSG_BAD_OBJ_DIR_CLOSE_FAIL (GLFS_BITROT_STUB_BASE + 29)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRS_MSG_LINK_FAIL (GLFS_BITROT_STUB_BASE + 30)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define BRS_MSG_BAD_OBJ_UNLINK_FAIL (GLFS_BITROT_STUB_BASE + 31)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+/*------------*/
+
+#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
+#endif /* !_BITROT_STUB_MESSAGES_H_ */
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
new file mode 100644
index 00000000000..67103f6b5e1
--- /dev/null
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
@@ -0,0 +1,3245 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <ctype.h>
+#include <sys/uio.h>
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "logging.h"
+#include "changelog.h"
+#include "compat-errno.h"
+#include "call-stub.h"
+
+#include "bit-rot-stub.h"
+#include "bit-rot-stub-mem-types.h"
+#include "bit-rot-stub-messages.h"
+#include "bit-rot-common.h"
+
+#define BR_STUB_REQUEST_COOKIE 0x1
+
+void *br_stub_signth (void *);
+
+struct br_stub_signentry {
+ unsigned long v;
+
+ call_stub_t *stub;
+
+ struct list_head list;
+};
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int32_t ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_br_stub_mt_end + 1);
+
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, BRS_MSG_MEM_ACNT_FAILED,
+ "Memory accounting init failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+int32_t
+br_stub_bad_object_container_init (xlator_t *this, br_stub_private_t *priv)
+{
+ pthread_attr_t w_attr;
+ int32_t ret = -1;
+
+ ret = pthread_cond_init(&priv->container.bad_cond, NULL);
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_BAD_OBJ_THREAD_FAIL,
+ "pthread_cond_init failed (%d)", ret);
+ goto out;
+ }
+
+ ret = pthread_mutex_init(&priv->container.bad_lock, NULL);
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_BAD_OBJ_THREAD_FAIL,
+ "pthread_mutex_init failed (%d)", ret);
+ goto cleanup_cond;
+ }
+
+ ret = pthread_attr_init (&w_attr);
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_BAD_OBJ_THREAD_FAIL,
+ "pthread_attr_init failed (%d)", ret);
+ goto cleanup_lock;
+ }
+
+ ret = pthread_attr_setstacksize (&w_attr, BAD_OBJECT_THREAD_STACK_SIZE);
+ if (ret == EINVAL) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ BRS_MSG_BAD_OBJ_THREAD_FAIL,
+ "Using default thread stack size");
+ }
+
+ INIT_LIST_HEAD (&priv->container.bad_queue);
+ ret = br_stub_dir_create (this, priv);
+ if (ret < 0)
+ goto cleanup_lock;
+
+ ret = gf_thread_create (&priv->container.thread, &w_attr, br_stub_worker, this);
+ if (ret)
+ goto cleanup_attr;
+
+ return 0;
+
+cleanup_attr:
+ pthread_attr_destroy (&w_attr);
+cleanup_lock:
+ pthread_mutex_destroy (&priv->container.bad_lock);
+cleanup_cond:
+ pthread_cond_destroy (&priv->container.bad_cond);
+out:
+ return -1;
+}
+
+#define BR_STUB_QUARANTINE_DIR GF_HIDDEN_PATH"/quanrantine"
+
+int32_t
+init (xlator_t *this)
+{
+ int32_t ret = 0;
+ char *tmp = NULL;
+ struct timeval tv = {0,};
+ br_stub_private_t *priv = NULL;
+
+ if (!this->children) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, BRS_MSG_NO_CHILD,
+ "FATAL: no children");
+ goto error_return;
+ }
+
+ priv = GF_CALLOC (1, sizeof (*priv), gf_br_stub_mt_private_t);
+ if (!priv)
+ goto error_return;
+
+ priv->local_pool = mem_pool_new (br_stub_local_t, 512);
+ if (!priv->local_pool)
+ goto free_priv;
+
+ GF_OPTION_INIT ("bitrot", priv->go, bool, free_mempool);
+
+ GF_OPTION_INIT ("export", tmp, str, free_mempool);
+ memcpy (priv->export, tmp, strlen (tmp) + 1);
+
+ (void) snprintf (priv->stub_basepath, PATH_MAX,
+ "%s/%s", priv->export, BR_STUB_QUARANTINE_DIR);
+
+ (void) gettimeofday (&tv, NULL);
+
+ /* boot time is in network endian format */
+ priv->boot[0] = htonl (tv.tv_sec);
+ priv->boot[1] = htonl (tv.tv_usec);
+
+ pthread_mutex_init (&priv->lock, NULL);
+ pthread_cond_init (&priv->cond, NULL);
+ INIT_LIST_HEAD (&priv->squeue);
+
+ /* Thread creations need 'this' to be passed so that THIS can be
+ * assigned inside the thread. So setting this->private here.
+ */
+ this->private = priv;
+
+ ret = gf_thread_create (&priv->signth, NULL, br_stub_signth, this);
+ if (ret != 0)
+ goto cleanup_lock;
+
+ ret = br_stub_bad_object_container_init (this, priv);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_CONTAINER_FAIL,
+ "failed to launch the thread for storing bad gfids");
+ goto cleanup_lock;
+ }
+
+ gf_msg_debug (this->name, 0, "bit-rot stub loaded");
+
+ return 0;
+
+ cleanup_lock:
+ pthread_cond_destroy (&priv->cond);
+ pthread_mutex_destroy (&priv->lock);
+ free_mempool:
+ mem_pool_destroy (priv->local_pool);
+ free_priv:
+ GF_FREE (priv);
+ this->private = NULL;
+ error_return:
+ return -1;
+}
+
+void
+fini (xlator_t *this)
+{
+ int32_t ret = 0;
+ br_stub_private_t *priv = this->private;
+ struct br_stub_signentry *sigstub = NULL;
+ call_stub_t *stub = NULL;
+
+ if (!priv)
+ return;
+
+ ret = gf_thread_cleanup_xint (priv->signth);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_CANCEL_SIGN_THREAD_FAILED,
+ "Could not cancel sign serializer thread");
+ goto out;
+ }
+
+ while (!list_empty (&priv->squeue)) {
+ sigstub = list_first_entry (&priv->squeue,
+ struct br_stub_signentry, list);
+ list_del_init (&sigstub->list);
+
+ call_stub_destroy (sigstub->stub);
+ GF_FREE (sigstub);
+ }
+
+ pthread_mutex_destroy (&priv->lock);
+ pthread_cond_destroy (&priv->cond);
+
+ ret = gf_thread_cleanup_xint (priv->container.thread);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_CANCEL_SIGN_THREAD_FAILED,
+ "Could not cancel sign serializer thread");
+ goto out;
+ }
+
+ while (!list_empty (&priv->container.bad_queue)) {
+ stub = list_first_entry (&priv->container.bad_queue, call_stub_t,
+ list);
+ list_del_init (&stub->list);
+ call_stub_destroy (stub);
+ };
+
+ pthread_mutex_destroy (&priv->container.bad_lock);
+ pthread_cond_destroy (&priv->container.bad_cond);
+
+ this->private = NULL;
+ GF_FREE (priv);
+
+ out:
+ return;
+}
+
+static int
+br_stub_alloc_versions (br_version_t **obuf,
+ br_signature_t **sbuf, size_t signaturelen)
+{
+ void *mem = NULL;
+ size_t size = 0;
+
+ if (obuf)
+ size += sizeof (br_version_t);
+ if (sbuf)
+ size += sizeof (br_signature_t) + signaturelen;
+
+ mem = GF_CALLOC (1, size, gf_br_stub_mt_version_t);
+ if (!mem)
+ goto error_return;
+
+ if (obuf) {
+ *obuf = (br_version_t *)mem;
+ mem = ((char *)mem + sizeof (br_version_t));
+ }
+ if (sbuf) {
+ *sbuf = (br_signature_t *)mem;
+ }
+
+ return 0;
+
+ error_return:
+ return -1;
+}
+
+static void
+br_stub_dealloc_versions (void *mem)
+{
+ GF_FREE (mem);
+}
+
+static br_stub_local_t *
+br_stub_alloc_local (xlator_t *this)
+{
+ br_stub_private_t *priv = this->private;
+
+ return mem_get0 (priv->local_pool);
+}
+
+static void
+br_stub_dealloc_local (br_stub_local_t *ptr)
+{
+ mem_put (ptr);
+}
+
+static int
+br_stub_prepare_version_request (xlator_t *this, dict_t *dict,
+ br_version_t *obuf, unsigned long oversion)
+{
+ br_stub_private_t *priv = NULL;
+
+ priv = this->private;
+ br_set_ongoingversion (obuf, oversion, priv->boot);
+
+ return dict_set_static_bin (dict, BITROT_CURRENT_VERSION_KEY,
+ (void *)obuf, sizeof (br_version_t));
+}
+
+static int
+br_stub_prepare_signing_request (dict_t *dict,
+ br_signature_t *sbuf,
+ br_isignature_t *sign, size_t signaturelen)
+{
+ size_t size = 0;
+
+ br_set_signature (sbuf, sign, signaturelen, &size);
+
+ return dict_set_static_bin (dict, BITROT_SIGNING_VERSION_KEY,
+ (void *)sbuf, size);
+}
+
+/**
+ * initialize an inode context starting with a given ongoing version.
+ * a fresh lookup() or a first creat() call initializes the inode
+ * context, hence the inode is marked dirty. this routine also
+ * initializes the transient inode version.
+ */
+static int
+br_stub_init_inode_versions (xlator_t *this, fd_t *fd, inode_t *inode,
+ unsigned long version, gf_boolean_t markdirty,
+ gf_boolean_t bad_object)
+{
+ int32_t ret = 0;
+ br_stub_inode_ctx_t *ctx = NULL;
+
+ ctx = GF_CALLOC (1, sizeof (br_stub_inode_ctx_t),
+ gf_br_stub_mt_inode_ctx_t);
+ if (!ctx)
+ goto error_return;
+
+ INIT_LIST_HEAD (&ctx->fd_list);
+ (markdirty) ? __br_stub_mark_inode_dirty (ctx)
+ : __br_stub_mark_inode_synced (ctx);
+ __br_stub_set_ongoing_version (ctx, version);
+
+ if (bad_object)
+ __br_stub_mark_object_bad (ctx);
+
+ if (fd) {
+ ret = br_stub_add_fd_to_inode (this, fd, ctx);
+ if (ret)
+ goto free_ctx;
+ }
+
+ ret = br_stub_set_inode_ctx (this, inode, ctx);
+ if (ret)
+ goto free_ctx;
+ return 0;
+
+free_ctx:
+ GF_FREE (ctx);
+ error_return:
+ return -1;
+}
+
+/**
+ * modify the ongoing version of an inode.
+ */
+static int
+br_stub_mod_inode_versions (xlator_t *this,
+ fd_t *fd, inode_t *inode, unsigned long version)
+{
+ int32_t ret = -1;
+ br_stub_inode_ctx_t *ctx = 0;
+
+ LOCK (&inode->lock);
+ {
+ ctx = __br_stub_get_ongoing_version_ctx (this, inode, NULL);
+ if (ctx == NULL)
+ goto unblock;
+ if (__br_stub_is_inode_dirty (ctx)) {
+ __br_stub_set_ongoing_version (ctx, version);
+ __br_stub_mark_inode_synced (ctx);
+ }
+
+ ret = 0;
+ }
+unblock:
+ UNLOCK (&inode->lock);
+
+ return ret;
+}
+
+static void
+br_stub_fill_local (br_stub_local_t *local,
+ call_stub_t *stub, fd_t *fd, inode_t *inode, uuid_t gfid,
+ int versioningtype, unsigned long memversion)
+{
+ local->fopstub = stub;
+ local->versioningtype = versioningtype;
+ local->u.context.version = memversion;
+ if (fd)
+ local->u.context.fd = fd_ref (fd);
+ if (inode)
+ local->u.context.inode = inode_ref (inode);
+ gf_uuid_copy (local->u.context.gfid, gfid);
+}
+
+static void
+br_stub_cleanup_local (br_stub_local_t *local)
+{
+ local->fopstub = NULL;
+ local->versioningtype = 0;
+ local->u.context.version = 0;
+ if (local->u.context.fd) {
+ fd_unref (local->u.context.fd);
+ local->u.context.fd = NULL;
+ }
+ if (local->u.context.inode) {
+ inode_unref (local->u.context.inode);
+ local->u.context.inode = NULL;
+ }
+ memset (local->u.context.gfid, '\0', sizeof (uuid_t));
+}
+
+static int
+br_stub_need_versioning (xlator_t *this,
+ fd_t *fd, gf_boolean_t *versioning,
+ gf_boolean_t *modified, br_stub_inode_ctx_t **ctx)
+{
+ int32_t ret = -1;
+ uint64_t ctx_addr = 0;
+ br_stub_inode_ctx_t *c = NULL;
+
+ *versioning = _gf_false;
+ *modified = _gf_false;
+
+ ret = br_stub_get_inode_ctx (this, fd->inode, &ctx_addr);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_GET_INODE_CONTEXT_FAILED, "failed to get the "
+ "inode context for the inode %s",
+ uuid_utoa (fd->inode->gfid));
+ goto error_return;
+ }
+
+ c = (br_stub_inode_ctx_t *) (long) ctx_addr;
+
+ LOCK (&fd->inode->lock);
+ {
+ if (__br_stub_is_inode_dirty (c))
+ *versioning = _gf_true;
+ if (__br_stub_is_inode_modified (c))
+ *modified = _gf_true;
+ }
+ UNLOCK (&fd->inode->lock);
+
+ if (ctx)
+ *ctx = c;
+ return 0;
+
+ error_return:
+ return -1;
+}
+
+static int32_t
+br_stub_anon_fd_ctx (xlator_t *this, fd_t *fd, br_stub_inode_ctx_t *ctx)
+{
+ int32_t ret = -1;
+ br_stub_fd_t *br_stub_fd = NULL;
+
+ br_stub_fd = br_stub_fd_ctx_get (this, fd);
+ if (!br_stub_fd) {
+ ret = br_stub_add_fd_to_inode (this, fd, ctx);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_ADD_FD_TO_INODE, "failed to add fd to "
+ "the inode (gfid: %s)",
+ uuid_utoa (fd->inode->gfid));
+ goto out;
+ }
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static int
+br_stub_versioning_prep (call_frame_t *frame,
+ xlator_t *this, fd_t *fd, br_stub_inode_ctx_t *ctx)
+{
+ int32_t ret = -1;
+ br_stub_local_t *local = NULL;
+
+ local = br_stub_alloc_local (this);
+ if (!local) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM, BRS_MSG_NO_MEMORY,
+ "local allocation failed (gfid: %s)",
+ uuid_utoa (fd->inode->gfid));
+ goto error_return;
+ }
+
+ if (fd_is_anonymous (fd)) {
+ ret = br_stub_anon_fd_ctx (this, fd, ctx);
+ if (ret)
+ goto free_local;
+ }
+
+ frame->local = local;
+
+ return 0;
+
+ free_local:
+ br_stub_dealloc_local (local);
+ error_return:
+ return -1;
+}
+
+static int
+br_stub_mark_inode_modified (xlator_t *this, br_stub_local_t *local)
+{
+ fd_t *fd = NULL;
+ int32_t ret = 0;
+ uint64_t ctx_addr = 0;
+ br_stub_inode_ctx_t *ctx = NULL;
+
+ fd = local->u.context.fd;
+
+ ret = br_stub_get_inode_ctx (this, fd->inode, &ctx_addr);
+ if (ret < 0)
+ goto error_return;
+
+ ctx = (br_stub_inode_ctx_t *) (long) ctx_addr;
+
+ LOCK (&fd->inode->lock);
+ {
+ __br_stub_set_inode_modified (ctx);
+ }
+ UNLOCK (&fd->inode->lock);
+
+ return 0;
+
+ error_return:
+ return -1;
+}
+
+/**
+ * The possible return values from br_stub_is_bad_object () are:
+ * 1) 0 => as per the inode context object is not bad
+ * 2) -1 => Failed to get the inode context itself
+ * 3) -2 => As per the inode context object is bad
+ * Both -ve values means the fop which called this function is failed
+ * and error is returned upwards.
+ */
+static int
+br_stub_check_bad_object (xlator_t *this, inode_t *inode, int32_t *op_ret,
+ int32_t *op_errno)
+{
+ int ret = -1;
+
+ ret = br_stub_is_bad_object (this, inode);
+ if (ret == -2) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJECT_ACCESS,
+ "%s is a bad object. Returning",
+ uuid_utoa (inode->gfid));
+ *op_ret = -1;
+ *op_errno = EIO;
+ }
+
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_GET_INODE_CONTEXT_FAILED, "could not get inode"
+ " context for %s", uuid_utoa (inode->gfid));
+ *op_ret = -1;
+ *op_errno = EINVAL;
+ }
+
+ return ret;
+}
+
+/**
+ * callback for inode/fd versioning
+ */
+int
+br_stub_fd_incversioning_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ fd_t *fd = NULL;
+ inode_t *inode = NULL;
+ unsigned long version = 0;
+ br_stub_local_t *local = NULL;
+
+ local = (br_stub_local_t *)frame->local;
+ if (op_ret < 0)
+ goto done;
+ fd = local->u.context.fd;
+ inode = local->u.context.inode;
+ version = local->u.context.version;
+
+ op_ret = br_stub_mod_inode_versions (this, fd, inode, version);
+ if (op_ret < 0)
+ op_errno = EINVAL;
+
+ done:
+ if (op_ret < 0) {
+ frame->local = NULL;
+ call_unwind_error (local->fopstub, -1, op_errno);
+ br_stub_cleanup_local (local);
+ br_stub_dealloc_local (local);
+ } else {
+ call_resume (local->fopstub);
+ }
+ return 0;
+}
+
+/**
+ * Initial object versioning
+ *
+ * Version persists two (2) extended attributes as explained below:
+ * 1. Current (ongoing) version: This is incremented on an writev ()
+ * or truncate () and is the running version for an object.
+ * 2. Signing version: This is the version against which an object
+ * was signed (checksummed).
+ *
+ * During initial versioning, both ongoing and signing versions are
+ * set of one and zero respectively. A write() call increments the
+ * ongoing version as an indication of modification to the object.
+ * Additionally this needs to be persisted on disk and needs to be
+ * durable: fsync().. :-/
+ * As an optimization only the first write() synchronizes the ongoing
+ * version to disk, subsequent write()s before the *last* release()
+ * are no-op's.
+ *
+ * create(), just like lookup() initializes the object versions to
+ * the default. As an optimization this is not a durable operation:
+ * in case of a crash, hard reboot etc.. absence of versioning xattrs
+ * is ignored in scrubber along with the one time crawler explicitly
+ * triggering signing for such objects.
+ *
+ * c.f. br_stub_writev() / br_stub_truncate()
+ */
+
+/**
+ * perform full or incremental versioning on an inode pointd by an
+ * fd. incremental versioning is done when an inode is dirty and a
+ * writeback is trigerred.
+ */
+
+int
+br_stub_fd_versioning (xlator_t *this, call_frame_t *frame,
+ call_stub_t *stub, dict_t *dict, fd_t *fd,
+ br_stub_version_cbk *callback, unsigned long memversion,
+ int versioningtype, int durable)
+{
+ int32_t ret = -1;
+ int flags = 0;
+ dict_t *xdata = NULL;
+ br_stub_local_t *local = NULL;
+
+ xdata = dict_new ();
+ if (!xdata)
+ goto done;
+
+ ret = dict_set_int32 (xdata, GLUSTERFS_INTERNAL_FOP_KEY, 1);
+ if (ret)
+ goto dealloc_xdata;
+
+ if (durable) {
+ ret = dict_set_int32 (xdata, GLUSTERFS_DURABLE_OP, 0);
+ if (ret)
+ goto dealloc_xdata;
+ }
+
+ local = frame->local;
+
+ br_stub_fill_local (local, stub, fd,
+ fd->inode, fd->inode->gfid,
+ versioningtype, memversion);
+
+ STACK_WIND (frame, callback,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->fsetxattr,
+ fd, dict, flags, xdata);
+
+ ret = 0;
+
+ dealloc_xdata:
+ dict_unref (xdata);
+ done:
+ return ret;
+}
+
+static int
+br_stub_perform_incversioning (xlator_t *this,
+ call_frame_t *frame, call_stub_t *stub,
+ fd_t *fd, br_stub_inode_ctx_t *ctx)
+{
+ int32_t ret = -1;
+ dict_t *dict = NULL;
+ br_version_t *obuf = NULL;
+ unsigned long writeback_version = 0;
+ int op_errno = 0;
+ br_stub_local_t *local = NULL;
+
+ op_errno = EINVAL;
+ local = frame->local;
+
+ writeback_version = __br_stub_writeback_version (ctx);
+
+ op_errno = ENOMEM;
+ dict = dict_new ();
+ if (!dict)
+ goto done;
+ ret = br_stub_alloc_versions (&obuf, NULL, 0);
+ if (ret)
+ goto dealloc_dict;
+ ret = br_stub_prepare_version_request (this, dict,
+ obuf, writeback_version);
+ if (ret)
+ goto dealloc_versions;
+
+ ret = br_stub_fd_versioning
+ (this, frame, stub, dict,
+ fd, br_stub_fd_incversioning_cbk, writeback_version,
+ BR_STUB_INCREMENTAL_VERSIONING, !WRITEBACK_DURABLE);
+
+ dealloc_versions:
+ br_stub_dealloc_versions (obuf);
+ dealloc_dict:
+ dict_unref (dict);
+ done:
+ if (ret) {
+ if (local)
+ frame->local = NULL;
+ call_unwind_error (stub, -1, op_errno);
+ if (local) {
+ br_stub_cleanup_local (local);
+ br_stub_dealloc_local (local);
+ }
+ }
+
+ return ret;
+}
+
+/** {{{ */
+
+/* fsetxattr() */
+
+int32_t
+br_stub_perform_objsign (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, dict_t *dict, int flags, dict_t *xdata)
+{
+ STACK_WIND (frame, default_fsetxattr_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->fsetxattr, fd,
+ dict, flags, xdata);
+
+ dict_unref (xdata);
+ return 0;
+}
+
+void *
+br_stub_signth (void *arg)
+{
+ xlator_t *this = arg;
+ br_stub_private_t *priv = this->private;
+ struct br_stub_signentry *sigstub = NULL;
+
+ THIS = this;
+ while (1) {
+ pthread_mutex_lock (&priv->lock);
+ {
+ while (list_empty (&priv->squeue))
+ pthread_cond_wait (&priv->cond, &priv->lock);
+
+ sigstub = list_first_entry
+ (&priv->squeue, struct br_stub_signentry, list);
+ list_del_init (&sigstub->list);
+ }
+ pthread_mutex_unlock (&priv->lock);
+
+ call_resume (sigstub->stub);
+
+ GF_FREE (sigstub);
+ }
+
+ return NULL;
+}
+
+int
+orderq (struct list_head *elem1, struct list_head *elem2)
+{
+ struct br_stub_signentry *s1 = NULL;
+ struct br_stub_signentry *s2 = NULL;
+
+ s1 = list_entry (elem1, struct br_stub_signentry, list);
+ s2 = list_entry (elem2, struct br_stub_signentry, list);
+
+ return (s1->v > s2->v);
+}
+
+static int
+br_stub_compare_sign_version (xlator_t *this,
+ inode_t *inode,
+ br_signature_t *sbuf,
+ dict_t *dict, int *fakesuccess)
+{
+ int32_t ret = -1;
+ uint64_t tmp_ctx = 0;
+ gf_boolean_t invalid = _gf_false;
+ br_stub_inode_ctx_t *ctx = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+ GF_VALIDATE_OR_GOTO (this->name, sbuf, out);
+ GF_VALIDATE_OR_GOTO (this->name, dict, out);
+
+ ret = br_stub_get_inode_ctx (this, inode, &tmp_ctx);
+ if (ret) {
+ dict_del (dict, BITROT_SIGNING_VERSION_KEY);
+ goto out;
+ }
+
+ ctx = (br_stub_inode_ctx_t *)(long)tmp_ctx;
+
+ LOCK (&inode->lock);
+ {
+ if (ctx->currentversion < sbuf->signedversion) {
+ invalid = _gf_true;
+ } else if (ctx->currentversion > sbuf->signedversion) {
+ gf_msg_debug (this->name, 0, "\"Signing version\" "
+ "(%lu) lower than \"Current version \" "
+ "(%lu)", ctx->currentversion,
+ sbuf->signedversion);
+ *fakesuccess = 1;
+ }
+ }
+ UNLOCK (&inode->lock);
+
+ if (invalid) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ BRS_MSG_SIGN_VERSION_ERROR, "Signing version exceeds "
+ "current version [%lu > %lu]", sbuf->signedversion,
+ ctx->currentversion);
+ }
+
+ out:
+ return ret;
+}
+
+static int
+br_stub_prepare_signature (xlator_t *this,
+ dict_t *dict, inode_t *inode,
+ br_isignature_t *sign, int *fakesuccess)
+{
+ int32_t ret = 0;
+ size_t signaturelen = 0;
+ br_signature_t *sbuf = NULL;
+
+ if (!br_is_signature_type_valid (sign->signaturetype))
+ goto error_return;
+
+ signaturelen = sign->signaturelen;
+ ret = br_stub_alloc_versions (NULL, &sbuf, signaturelen);
+ if (ret)
+ goto error_return;
+ ret = br_stub_prepare_signing_request (dict, sbuf, sign, signaturelen);
+ if (ret)
+ goto dealloc_versions;
+
+ ret = br_stub_compare_sign_version (this, inode,
+ sbuf, dict, fakesuccess);
+ if (ret)
+ goto dealloc_versions;
+
+ return 0;
+
+ dealloc_versions:
+ br_stub_dealloc_versions (sbuf);
+ error_return:
+ return -1;
+}
+
+static void
+br_stub_handle_object_signature (call_frame_t *frame,
+ xlator_t *this, fd_t *fd, dict_t *dict,
+ br_isignature_t *sign, dict_t *xdata)
+{
+ int32_t ret = -1;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ int fakesuccess = 0;
+ br_stub_private_t *priv = NULL;
+ struct br_stub_signentry *sigstub = NULL;
+
+ priv = this->private;
+
+ if (frame->root->pid != GF_CLIENT_PID_BITD)
+ goto dofop;
+
+ ret = br_stub_prepare_signature (this, dict,
+ fd->inode, sign, &fakesuccess);
+ if (ret)
+ goto dofop;
+ if (fakesuccess) {
+ op_ret = op_errno = 0;
+ goto dofop;
+ }
+
+ dict_del (dict, GLUSTERFS_SET_OBJECT_SIGNATURE);
+
+ ret = -1;
+ if (!xdata) {
+ xdata = dict_new ();
+ if (!xdata)
+ goto dofop;
+ } else {
+ dict_ref (xdata);
+ }
+
+ ret = dict_set_int32 (xdata, GLUSTERFS_DURABLE_OP, 0);
+ if (ret)
+ goto unref_dict;
+
+ /* prepare dispatch stub to order object signing */
+ sigstub = GF_CALLOC (1, sizeof (*sigstub), gf_br_stub_mt_sigstub_t);
+ if (!sigstub)
+ goto unref_dict;
+
+ INIT_LIST_HEAD (&sigstub->list);
+ sigstub->v = ntohl (sign->signedversion);
+ sigstub->stub = fop_fsetxattr_stub (frame, br_stub_perform_objsign,
+ fd, dict, 0, xdata);
+ if (!sigstub->stub)
+ goto cleanup_stub;
+
+ pthread_mutex_lock (&priv->lock);
+ {
+ list_add_order (&sigstub->list, &priv->squeue, orderq);
+ pthread_cond_signal (&priv->cond);
+ }
+ pthread_mutex_unlock (&priv->lock);
+
+ return;
+
+ cleanup_stub:
+ GF_FREE (sigstub);
+ unref_dict:
+ dict_unref (xdata);
+ dofop:
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, NULL);
+}
+
+int32_t
+br_stub_fsetxattr_resume (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ int32_t ret = -1;
+ br_stub_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+
+ ret = br_stub_mark_inode_modified (this, local);
+ if (ret) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
+
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata);
+
+ br_stub_cleanup_local (local);
+ br_stub_dealloc_local (local);
+
+ return 0;
+}
+
+/**
+ * Handles object reopens. Object reopens can be of 3 types. 2 are from
+ * oneshot crawler and 1 from the regular signer.
+ * ONESHOT CRAWLER:
+ * For those objects which were created before bitrot was enabled. oneshow
+ * crawler crawls the namespace and signs all the objects. It has to do
+ * the versioning before making bit-rot-stub send a sign notification.
+ * So it sends fsetxattr with BR_OBJECT_REOPEN as the value. And bit-rot-stub
+ * upon getting BR_OBJECT_REOPEN value checks if the version has to be
+ * increased or not. By default the version will be increased. But if the
+ * object is modified before BR_OBJECT_REOPEN from oneshot crawler, then
+ * versioning need not be done. In that case simply a success is returned.
+ * SIGNER:
+ * Signer wait for 2 minutes upon getting the notification from bit-rot-stub
+ * and then it sends a dummy write (in reality a fsetxattr) call, to change
+ * the state of the inode from REOPEN_WAIT to SIGN_QUICK. The funny part here
+ * is though the inode's state is REOPEN_WAIT, the call sent by signer is
+ * BR_OBJECT_RESIGN. Once the state is changed to SIGN_QUICK, then yet another
+ * notification is sent upon release (RESIGN would have happened via fsetxattr,
+ * so a fd is needed) and the object is signed truly this time.
+ * There is a challenge in the above RESIGN method by signer. After sending
+ * the 1st notification, the inode could be forgotten before RESIGN request
+ * is received. In that case, the inode's context (the newly looked up inode)
+ * would not indicate the inode as being modified (it would be in the default
+ * state) and because of this, a SIGN_QUICK notification to truly sign the
+ * object would not be sent. So, this is how its handled.
+ * if (request == RESIGN) {
+ * if (inode->sign_info == NORMAL) {
+ * mark_inode_non_dirty;
+ * mark_inode_modified;
+ * }
+ * GOBACK (means unwind without doing versioning)
+ * }
+ */
+static void
+br_stub_handle_object_reopen (call_frame_t *frame,
+ xlator_t *this, fd_t *fd, uint32_t val)
+{
+ int32_t ret = -1;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ call_stub_t *stub = NULL;
+ gf_boolean_t inc_version = _gf_false;
+ gf_boolean_t modified = _gf_false;
+ br_stub_inode_ctx_t *ctx = NULL;
+ br_stub_local_t *local = NULL;
+ gf_boolean_t goback = _gf_true;
+
+ ret = br_stub_need_versioning (this, fd, &inc_version, &modified, &ctx);
+ if (ret)
+ goto unwind;
+
+ LOCK (&fd->inode->lock);
+ {
+ if ((val == BR_OBJECT_REOPEN) && inc_version)
+ goback = _gf_false;
+ if (val == BR_OBJECT_RESIGN &&
+ ctx->info_sign == BR_SIGN_NORMAL) {
+ __br_stub_mark_inode_synced (ctx);
+ __br_stub_set_inode_modified (ctx);
+ }
+ (void) __br_stub_inode_sign_state (ctx, GF_FOP_FSETXATTR, fd);
+ }
+ UNLOCK (&fd->inode->lock);
+
+ if (goback) {
+ op_ret = op_errno = 0;
+ goto unwind;
+ }
+
+ ret = br_stub_versioning_prep (frame, this, fd, ctx);
+ if (ret)
+ goto unwind;
+ local = frame->local;
+
+ stub = fop_fsetxattr_cbk_stub (frame, br_stub_fsetxattr_resume,
+ 0, 0, NULL);
+ if (!stub) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED,
+ "failed to allocate stub for fsetxattr fop (gfid: %s),"
+ " unwinding", uuid_utoa (fd->inode->gfid));
+ goto cleanup_local;
+ }
+
+ (void) br_stub_perform_incversioning (this, frame, stub, fd, ctx);
+ return;
+
+ cleanup_local:
+ br_stub_cleanup_local (local);
+ br_stub_dealloc_local (local);
+
+ unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, NULL);
+}
+
+/**
+ * This function only handles bad file identification. Instead of checking in
+ * fops like open, readv, writev whether the object is bad or not by doing
+ * getxattr calls, better to catch them when scrubber marks it as bad.
+ * So this callback is called only when the fsetxattr is sent by the scrubber
+ * to mark the object as bad.
+ */
+int
+br_stub_fsetxattr_bad_object_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ br_stub_local_t *local = NULL;
+ int32_t ret = -1;
+
+ local = frame->local;
+ frame->local = NULL;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ /*
+ * What to do if marking the object as bad fails? (i.e. in memory
+ * marking within the inode context. If we are here means fsetxattr
+ * fop has succeeded on disk and the bad object xattr has been set).
+ * We can return failure to scruber, but there is nothing the scrubber
+ * can do with it (it might assume that the on disk setxattr itself has
+ * failed). The main purpose of this operation is to help identify the
+ * bad object by checking the inode context itself (thus avoiding the
+ * necessity of doing a getxattr fop on the disk).
+ *
+ * So as of now, success itself is being returned even though inode
+ * context set operation fails.
+ * In future if there is any change in the policy which can handle this,
+ * then appropriate response should be sent (i.e. success or error).
+ */
+ ret = br_stub_mark_object_bad (this, local->u.context.inode);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_MARK_FAIL,
+ "failed to mark object %s as bad",
+ uuid_utoa (local->u.context.inode->gfid));
+
+ ret = br_stub_add (this, local->u.context.inode->gfid);
+
+unwind:
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata);
+ br_stub_cleanup_local (local);
+ br_stub_dealloc_local (local);
+ return 0;
+}
+
+static int32_t
+br_stub_handle_bad_object_key (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int flags, dict_t *xdata)
+{
+ br_stub_local_t *local = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+
+ if (frame->root->pid != GF_CLIENT_PID_SCRUB) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_NON_SCRUB_BAD_OBJ_MARK, "bad object marking "
+ "on %s is not from the scrubber",
+ uuid_utoa (fd->inode->gfid));
+ goto unwind;
+ }
+
+ local = br_stub_alloc_local (this);
+ if (!local) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, BRS_MSG_NO_MEMORY,
+ "failed to allocate memory for fsetxattr on %s",
+ uuid_utoa (fd->inode->gfid));
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ br_stub_fill_local (local, NULL, fd, fd->inode,
+ fd->inode->gfid, BR_STUB_NO_VERSIONING, 0);
+ frame->local = local;
+
+ STACK_WIND (frame, br_stub_fsetxattr_bad_object_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fsetxattr, fd, dict, flags,
+ xdata);
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, NULL);
+ return 0;
+}
+
+
+/**
+ * As of now, versioning is done by the stub (though as a setxattr
+ * operation) as part of inode modification operations such as writev,
+ * truncate, ftruncate. And signing is done by BitD by a fsetxattr call.
+ * So any kind of setxattr coming on the versioning and the signing xattr is
+ * not allowed (i.e. BITROT_CURRENT_VERSION_KEY and BITROT_SIGNING_VERSION_KEY).
+ * In future if BitD/scrubber are allowed to change the versioning
+ * xattrs (though I cannot see a reason for it as of now), then the below
+ * function can be modified to block setxattr on version for only applications.
+ *
+ * NOTE: BitD sends sign request on GLUSTERFS_SET_OBJECT_SIGNATURE key.
+ * BITROT_SIGNING_VERSION_KEY is the xattr used to save the signature.
+ *
+ */
+static int32_t
+br_stub_handle_internal_xattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ char *key)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_SET_INTERNAL_XATTR, "setxattr called"
+ " on the internal xattr %s for inode %s", key,
+ uuid_utoa (fd->inode->gfid));
+
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, NULL);
+ return 0;
+}
+
+int
+br_stub_fsetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, dict_t *dict, int flags, dict_t *xdata)
+{
+ int32_t ret = 0;
+ uint32_t val = 0;
+ br_isignature_t *sign = NULL;
+
+ if (!IA_ISREG (fd->inode->ia_type))
+ goto wind;
+
+ /* object signature request */
+ ret = dict_get_bin (dict, GLUSTERFS_SET_OBJECT_SIGNATURE,
+ (void **) &sign);
+ if (!ret) {
+ br_stub_handle_object_signature (frame, this,
+ fd, dict, sign, xdata);
+ goto done;
+ }
+
+ /* signing xattr */
+ if (dict_get(dict, BITROT_SIGNING_VERSION_KEY)) {
+ br_stub_handle_internal_xattr (frame, this, fd,
+ BITROT_SIGNING_VERSION_KEY);
+ goto done;
+ }
+
+ /* version xattr */
+ if (dict_get(dict, BITROT_CURRENT_VERSION_KEY)) {
+ br_stub_handle_internal_xattr (frame, this, fd,
+ BITROT_CURRENT_VERSION_KEY);
+ goto done;
+ }
+
+ if (dict_get (dict, GLUSTERFS_GET_OBJECT_SIGNATURE)) {
+ br_stub_handle_internal_xattr (frame, this, fd,
+ GLUSTERFS_GET_OBJECT_SIGNATURE);
+ goto done;
+ }
+
+ /* object reopen request */
+ ret = dict_get_uint32 (dict, BR_REOPEN_SIGN_HINT_KEY, &val);
+ if (!ret) {
+ br_stub_handle_object_reopen (frame, this, fd, val);
+ goto done;
+ }
+
+ /* handle bad object */
+ if (dict_get (dict, BITROT_OBJECT_BAD_KEY)) {
+ br_stub_handle_bad_object_key (frame, this, fd,
+ dict, flags, xdata);
+ goto done;
+ }
+
+wind:
+ STACK_WIND (frame, default_fsetxattr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fsetxattr, fd, dict, flags,
+ xdata);
+done:
+ return 0;
+}
+
+
+/**
+ * Currently BitD and scrubber are doing fsetxattr to either sign the object
+ * or to mark it as bad. Hence setxattr on any of those keys is denied directly
+ * without checking from where the fop is coming.
+ * Later, if BitD or Scrubber does setxattr of those keys, then appropriate
+ * check has to be added below.
+ */
+int
+br_stub_setxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *dict, int flags, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ char dump[64*1024] = {0,};
+ char *format = "(%s:%s)";
+
+ if (dict_get (dict, GLUSTERFS_SET_OBJECT_SIGNATURE) ||
+ dict_get (dict, GLUSTERFS_GET_OBJECT_SIGNATURE) ||
+ dict_get (dict, BR_REOPEN_SIGN_HINT_KEY) ||
+ dict_get (dict, BITROT_OBJECT_BAD_KEY) ||
+ dict_get (dict, BITROT_SIGNING_VERSION_KEY) ||
+ dict_get (dict, BITROT_CURRENT_VERSION_KEY)) {
+ dict_dump_to_str (dict, dump, sizeof(dump), format);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_SET_INTERNAL_XATTR, "setxattr called on "
+ "internal xattr %s", dump);
+ goto unwind;
+ }
+
+
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setxattr, loc, dict, flags,
+ xdata);
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, NULL);
+ return 0;
+}
+
+/** }}} */
+
+
+/** {{{ */
+
+/* {f}removexattr() */
+
+int32_t
+br_stub_removexattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+
+ if (!strcmp (BITROT_OBJECT_BAD_KEY, name) ||
+ !strcmp (BITROT_SIGNING_VERSION_KEY, name) ||
+ !strcmp (BITROT_CURRENT_VERSION_KEY, name)) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ BRS_MSG_REMOVE_INTERNAL_XATTR, "removexattr called"
+ " on internal xattr %s for file %s", name, loc->path);
+ goto unwind;
+ }
+
+
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr,
+ loc, name, xdata);
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, NULL);
+ return 0;
+}
+
+int32_t
+br_stub_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+
+ if (!strcmp (BITROT_OBJECT_BAD_KEY, name) ||
+ !strcmp (BITROT_SIGNING_VERSION_KEY, name) ||
+ !strcmp (BITROT_CURRENT_VERSION_KEY, name)) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ BRS_MSG_REMOVE_INTERNAL_XATTR, "removexattr called"
+ " on internal xattr %s for inode %s", name,
+ uuid_utoa (fd->inode->gfid));
+ goto unwind;
+ }
+
+
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr,
+ fd, name, xdata);
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, NULL);
+ return 0;
+}
+
+/** }}} */
+
+/** {{{ */
+
+/* {f}getxattr() */
+
+int
+br_stub_listxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
+{
+ if (op_ret < 0)
+ goto unwind;
+
+ br_stub_remove_vxattrs (xattr);
+
+ unwind:
+ STACK_UNWIND (frame, op_ret, op_errno, xattr, xdata);
+ return 0;
+}
+
+/**
+ * ONE SHOT CRAWLER from BitD signs the objects that it encounters while
+ * crawling, if the object is identified as stale by the stub. Stub follows
+ * the below logic to mark an object as stale or not.
+ * If the ongoing version and the signed_version match, then the object is not
+ * stale. Just return. Otherwise if they does not match, then it means one
+ * of the below things.
+ * 1) If the inode does not need write back of the version and the sign state is
+ * is NORMAL, then some active i/o is going on the object. So skip it.
+ * A notification will be sent to trigger the sign once the release is
+ * received on the object.
+ * 2) If inode does not need writeback of the version and the sign state is
+ * either reopen wait or quick sign, then it means:
+ * A) BitD restarted and it is not sure whether the object it encountered
+ * while crawling is in its timer wheel or not. Since there is no way to
+ * scan the timer wheel as of now, ONE SHOT CRAWLER just goes ahead and
+ * signs the object. Since the inode does not need writeback, version will
+ * not be incremented and directly the object will be signed.
+ * 3) If the inode needs writeback, then it means the inode was forgotten after
+ * the versioning and it has to be signed now.
+ *
+ * This is the algorithm followed:
+ * if (ongoing_version == signed_version); then
+ * object_is_not_stale;
+ * return;
+ * else; then
+ * if (!inode_needs_writeback && inode_sign_state != NORMAL); then
+ * object_is_stale;
+ * if (inode_needs_writeback); then
+ * object_is_stale;
+ *
+ * For SCRUBBER, no need to check for the sign state and inode writeback.
+ * If the ondisk ongoingversion and the ondisk signed version does not match,
+ * then treat the object as stale.
+ */
+char
+br_stub_is_object_stale (xlator_t *this, call_frame_t *frame, inode_t *inode,
+ br_version_t *obuf, br_signature_t *sbuf)
+{
+ uint64_t ctx_addr = 0;
+ br_stub_inode_ctx_t *ctx = NULL;
+ int32_t ret = -1;
+ char stale = 0;
+
+ if (obuf->ongoingversion == sbuf->signedversion)
+ goto out;
+
+ if (frame->root->pid == GF_CLIENT_PID_SCRUB) {
+ stale = 1;
+ goto out;
+ }
+
+ ret = br_stub_get_inode_ctx (this, inode, &ctx_addr);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_GET_INODE_CONTEXT_FAILED, "failed to get the "
+ "inode context for %s", uuid_utoa (inode->gfid));
+ goto out;
+ }
+
+ ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
+
+ LOCK (&inode->lock);
+ {
+ if ((!__br_stub_is_inode_dirty (ctx) &&
+ ctx->info_sign != BR_SIGN_NORMAL) ||
+ __br_stub_is_inode_dirty (ctx))
+ stale = 1;
+ }
+ UNLOCK (&inode->lock);
+
+out:
+ return stale;
+}
+
+int
+br_stub_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
+{
+ int32_t ret = 0;
+ size_t totallen = 0;
+ size_t signaturelen = 0;
+ br_version_t *obuf = NULL;
+ br_signature_t *sbuf = NULL;
+ br_isignature_out_t *sign = NULL;
+ br_vxattr_status_t status;
+ br_stub_local_t *local = NULL;
+ inode_t *inode = NULL;
+ gf_boolean_t bad_object = _gf_false;
+
+ if (op_ret < 0)
+ goto unwind;
+ if (cookie != (void *) BR_STUB_REQUEST_COOKIE)
+ goto unwind;
+
+ local = frame->local;
+ frame->local = NULL;
+ inode = local->u.context.inode;
+
+ op_ret = -1;
+ status = br_version_xattr_state (xattr, &obuf, &sbuf, &bad_object);
+
+ op_errno = EIO;
+ if (bad_object)
+ goto delkeys;
+
+ op_errno = EINVAL;
+ if (status == BR_VXATTR_STATUS_INVALID)
+ goto delkeys;
+
+ op_errno = ENODATA;
+ if ((status == BR_VXATTR_STATUS_MISSING)
+ || (status == BR_VXATTR_STATUS_UNSIGNED))
+ goto delkeys;
+
+ /**
+ * okay.. we have enough information to satisfy the request,
+ * namely: version and signing extended attribute. what's
+ * pending is the signature length -- that's figured out
+ * indirectly via the size of the _whole_ xattr and the
+ * on-disk signing xattr header size.
+ */
+ op_errno = EINVAL;
+ ret = dict_get_uint32 (xattr, BITROT_SIGNING_XATTR_SIZE_KEY,
+ (uint32_t *)&signaturelen);
+ if (ret)
+ goto delkeys;
+
+ signaturelen -= sizeof (br_signature_t);
+ totallen = sizeof (br_isignature_out_t) + signaturelen;
+
+ op_errno = ENOMEM;
+ sign = GF_CALLOC (1, totallen, gf_br_stub_mt_signature_t);
+ if (!sign)
+ goto delkeys;
+
+ sign->time[0] = obuf->timebuf[0];
+ sign->time[1] = obuf->timebuf[1];
+
+ /* Object's dirty state & current signed version */
+ sign->version = sbuf->signedversion;
+ sign->stale = br_stub_is_object_stale (this, frame, inode, obuf, sbuf);
+
+ /* Object's signature */
+ sign->signaturelen = signaturelen;
+ sign->signaturetype = sbuf->signaturetype;
+ (void) memcpy (sign->signature, sbuf->signature, signaturelen);
+
+ op_errno = EINVAL;
+ ret = dict_set_bin (xattr, GLUSTERFS_GET_OBJECT_SIGNATURE,
+ (void *)sign, totallen);
+ if (ret < 0) {
+ GF_FREE (sign);
+ goto delkeys;
+ }
+ op_errno = 0;
+ op_ret = totallen;
+
+ delkeys:
+ br_stub_remove_vxattrs (xattr);
+
+ unwind:
+ STACK_UNWIND (frame, op_ret, op_errno, xattr, xdata);
+ if (local) {
+ br_stub_cleanup_local (local);
+ br_stub_dealloc_local (local);
+ }
+ return 0;
+}
+
+static void
+br_stub_send_stub_init_time (call_frame_t *frame, xlator_t *this)
+{
+ int op_ret = 0;
+ int op_errno = 0;
+ dict_t *xattr = NULL;
+ br_stub_init_t stub = {{0,},};
+ br_stub_private_t *priv = NULL;
+
+ priv = this->private;
+
+ xattr = dict_new ();
+ if (!xattr) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ stub.timebuf[0] = priv->boot[0];
+ stub.timebuf[1] = priv->boot[1];
+ memcpy (stub.export, priv->export, strlen (priv->export) + 1);
+
+ op_ret = dict_set_static_bin (xattr, GLUSTERFS_GET_BR_STUB_INIT_TIME,
+ (void *) &stub, sizeof (br_stub_init_t));
+ if (op_ret < 0) {
+ op_errno = EINVAL;
+ goto unwind;
+ }
+
+ op_ret = sizeof (br_stub_init_t);
+
+ unwind:
+ STACK_UNWIND (frame, op_ret, op_errno, xattr, NULL);
+
+ if (xattr)
+ dict_unref (xattr);
+}
+
+int
+br_stub_getxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ void *cookie = NULL;
+ uuid_t rootgfid = {0, };
+ fop_getxattr_cbk_t cbk = br_stub_getxattr_cbk;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ br_stub_local_t *local = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, unwind);
+ GF_VALIDATE_OR_GOTO (this->name, loc, unwind);
+ GF_VALIDATE_OR_GOTO (this->name, loc->inode, unwind);
+
+ rootgfid[15] = 1;
+
+ if (!name) {
+ cbk = br_stub_listxattr_cbk;
+ goto wind;
+ }
+
+ /**
+ * If xattr is node-uuid and the inode is marked bad, return EIO.
+ * Returning EIO would result in AFR to choose correct node-uuid
+ * coresponding to the subvolume * where the good copy of the
+ * file resides.
+ */
+ if (IA_ISREG (loc->inode->ia_type) && XATTR_IS_NODE_UUID (name) &&
+ br_stub_check_bad_object (this, loc->inode, &op_ret, &op_errno)) {
+ goto unwind;
+ }
+
+ if (br_stub_is_internal_xattr (name))
+ goto unwind;
+
+ /**
+ * this special extended attribute is allowed only on root
+ */
+ if (name
+ && (strncmp (name, GLUSTERFS_GET_BR_STUB_INIT_TIME,
+ strlen (GLUSTERFS_GET_BR_STUB_INIT_TIME)) == 0)
+ && ((gf_uuid_compare (loc->gfid, rootgfid) == 0)
+ || (gf_uuid_compare (loc->inode->gfid, rootgfid) == 0))) {
+ br_stub_send_stub_init_time (frame, this);
+ return 0;
+ }
+
+ if (!IA_ISREG (loc->inode->ia_type))
+ goto wind;
+
+ if (name && (strncmp (name, GLUSTERFS_GET_OBJECT_SIGNATURE,
+ strlen (GLUSTERFS_GET_OBJECT_SIGNATURE)) == 0)) {
+ cookie = (void *) BR_STUB_REQUEST_COOKIE;
+
+ local = br_stub_alloc_local (this);
+ if (!local) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ br_stub_fill_local (local, NULL, NULL, loc->inode,
+ loc->inode->gfid,
+ BR_STUB_NO_VERSIONING, 0);
+ frame->local = local;
+ }
+
+ wind:
+ STACK_WIND_COOKIE
+ (frame, cbk, cookie, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->getxattr, loc, name, xdata);
+ return 0;
+unwind:
+ STACK_UNWIND (frame, op_ret, op_errno, NULL, NULL);
+ return 0;
+}
+
+int
+br_stub_fgetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ void *cookie = NULL;
+ uuid_t rootgfid = {0, };
+ fop_fgetxattr_cbk_t cbk = br_stub_getxattr_cbk;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ br_stub_local_t *local = NULL;
+
+ rootgfid[15] = 1;
+
+ if (!name) {
+ cbk = br_stub_listxattr_cbk;
+ goto wind;
+ }
+
+ /**
+ * If xattr is node-uuid and the inode is marked bad, return EIO.
+ * Returning EIO would result in AFR to choose correct node-uuid
+ * coresponding to the subvolume * where the good copy of the
+ * file resides.
+ */
+ if (IA_ISREG (fd->inode->ia_type) && XATTR_IS_NODE_UUID (name) &&
+ br_stub_check_bad_object (this, fd->inode, &op_ret, &op_errno)) {
+ goto unwind;
+ }
+
+ if (br_stub_is_internal_xattr (name))
+ goto unwind;
+
+ /**
+ * this special extended attribute is allowed only on root
+ */
+ if (name
+ && (strncmp (name, GLUSTERFS_GET_BR_STUB_INIT_TIME,
+ strlen (GLUSTERFS_GET_BR_STUB_INIT_TIME)) == 0)
+ && (gf_uuid_compare (fd->inode->gfid, rootgfid) == 0)) {
+ br_stub_send_stub_init_time (frame, this);
+ return 0;
+ }
+
+ if (!IA_ISREG (fd->inode->ia_type))
+ goto wind;
+
+ if (name && (strncmp (name, GLUSTERFS_GET_OBJECT_SIGNATURE,
+ strlen (GLUSTERFS_GET_OBJECT_SIGNATURE)) == 0)) {
+ cookie = (void *) BR_STUB_REQUEST_COOKIE;
+
+ local = br_stub_alloc_local (this);
+ if (!local) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ br_stub_fill_local (local, NULL, fd, fd->inode,
+ fd->inode->gfid,
+ BR_STUB_NO_VERSIONING, 0);
+ frame->local = local;
+ }
+
+ wind:
+ STACK_WIND_COOKIE
+ (frame, cbk, cookie, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fgetxattr, fd, name, xdata);
+ return 0;
+unwind:
+ STACK_UNWIND (frame, op_ret, op_errno, NULL, NULL);
+ return 0;
+}
+
+int32_t
+br_stub_readv (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, unwind);
+ GF_VALIDATE_OR_GOTO (this->name, frame, unwind);
+ GF_VALIDATE_OR_GOTO (this->name, fd, unwind);
+ GF_VALIDATE_OR_GOTO (this->name, fd->inode, unwind);
+
+ ret = br_stub_check_bad_object (this, fd->inode, &op_ret, &op_errno);
+ if (ret)
+ goto unwind;
+
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset,
+ flags, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, NULL, 0, NULL,
+ NULL, NULL);
+ return 0;
+}
+
+/**
+ * The first write response on the first fd in the list of fds will set
+ * the flag to indicate that the inode is modified. The subsequent write
+ * respnses coming on either the first fd or some other fd will not change
+ * the fd. The inode-modified flag is unset only upon release of all the
+ * fds.
+ */
+int32_t
+br_stub_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ int32_t ret = 0;
+ br_stub_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ ret = br_stub_mark_inode_modified (this, local);
+ if (ret) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
+
+unwind:
+ STACK_UNWIND_STRICT (writev, frame,
+ op_ret, op_errno, prebuf, postbuf, xdata);
+
+ br_stub_cleanup_local (local);
+ br_stub_dealloc_local (local);
+
+ return 0;
+}
+
+int32_t
+br_stub_writev_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
+{
+ STACK_WIND (frame, br_stub_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count,
+ offset, flags, iobref, xdata);
+ return 0;
+}
+
+/**
+ * This is probably the most crucial part about the whole versioning thing.
+ * There's absolutely no differentiation as such between an anonymous fd
+ * and a regular fd except the fd context initialization. Object versioning
+ * is perfomed when the inode is dirty. Parallel write operations are no
+ * special with each write performing object versioning followed by marking
+ * the inode as non-dirty (synced). This is followed by the actual operation
+ * (writev() in this case) which on a success marks the inode as modified.
+ * This prevents signing of objects that have not been modified.
+ */
+int32_t
+br_stub_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ gf_boolean_t inc_version = _gf_false;
+ gf_boolean_t modified = _gf_false;
+ br_stub_inode_ctx_t *ctx = NULL;
+ int32_t ret = -1;
+ fop_writev_cbk_t cbk = default_writev_cbk;
+ br_stub_local_t *local = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, unwind);
+ GF_VALIDATE_OR_GOTO (this->name, frame, unwind);
+ GF_VALIDATE_OR_GOTO (this->name, fd, unwind);
+
+ ret = br_stub_need_versioning (this, fd, &inc_version, &modified, &ctx);
+ if (ret)
+ goto unwind;
+
+ ret = br_stub_check_bad_object (this, fd->inode, &op_ret, &op_errno);
+ if (ret)
+ goto unwind;
+
+ /**
+ * The inode is not dirty and also witnessed atleast one successful
+ * modification operation. Therefore, subsequent operations need not
+ * perform any special tracking.
+ */
+ if (!inc_version && modified)
+ goto wind;
+
+ /**
+ * okay.. so, either the inode needs versioning or the modification
+ * needs to be tracked. ->cbk is set to the appropriate callback
+ * routine for this.
+ * NOTE: ->local needs to be deallocated on failures from here on.
+ */
+ ret = br_stub_versioning_prep (frame, this, fd, ctx);
+ if (ret)
+ goto unwind;
+
+ local = frame->local;
+ if (!inc_version) {
+ br_stub_fill_local (local, NULL, fd, fd->inode,
+ fd->inode->gfid, BR_STUB_NO_VERSIONING, 0);
+ cbk = br_stub_writev_cbk;
+ goto wind;
+ }
+
+ stub = fop_writev_stub (frame, br_stub_writev_resume, fd, vector, count,
+ offset, flags, iobref, xdata);
+
+ if (!stub) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED,
+ "failed to allocate stub for write fop (gfid: %s), "
+ "unwinding", uuid_utoa (fd->inode->gfid));
+ goto cleanup_local;
+ }
+
+ /* Perform Versioning */
+ return br_stub_perform_incversioning (this, frame, stub, fd, ctx);
+
+ wind:
+ STACK_WIND (frame, cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev,
+ fd, vector, count, offset, flags, iobref, xdata);
+ return 0;
+
+ cleanup_local:
+ br_stub_cleanup_local (local);
+ br_stub_dealloc_local (local);
+
+ unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, NULL, NULL,
+ NULL);
+
+ return 0;
+}
+
+int32_t
+br_stub_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ int32_t ret = -1;
+ br_stub_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ ret = br_stub_mark_inode_modified (this, local);
+ if (ret) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
+
+unwind:
+ STACK_UNWIND_STRICT (ftruncate, frame,
+ op_ret, op_errno, prebuf, postbuf, xdata);
+
+ br_stub_cleanup_local (local);
+ br_stub_dealloc_local (local);
+
+ return 0;
+}
+
+int32_t
+br_stub_ftruncate_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, dict_t *xdata)
+{
+ STACK_WIND (frame, br_stub_ftruncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+ return 0;
+}
+
+/* c.f. br_stub_writev() for explanation */
+int32_t
+br_stub_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, dict_t *xdata)
+{
+ br_stub_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ gf_boolean_t inc_version = _gf_false;
+ gf_boolean_t modified = _gf_false;
+ br_stub_inode_ctx_t *ctx = NULL;
+ int32_t ret = -1;
+ fop_ftruncate_cbk_t cbk = default_ftruncate_cbk;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, unwind);
+ GF_VALIDATE_OR_GOTO (this->name, frame, unwind);
+ GF_VALIDATE_OR_GOTO (this->name, fd, unwind);
+
+ ret = br_stub_need_versioning (this, fd, &inc_version, &modified, &ctx);
+ if (ret)
+ goto unwind;
+
+ ret = br_stub_check_bad_object (this, fd->inode, &op_ret, &op_errno);
+ if (ret)
+ goto unwind;
+
+ if (!inc_version && modified)
+ goto wind;
+
+ ret = br_stub_versioning_prep (frame, this, fd, ctx);
+ if (ret)
+ goto unwind;
+
+ local = frame->local;
+ if (!inc_version) {
+ br_stub_fill_local (local, NULL, fd, fd->inode,
+ fd->inode->gfid, BR_STUB_NO_VERSIONING, 0);
+ cbk = br_stub_ftruncate_cbk;
+ goto wind;
+ }
+
+ stub = fop_ftruncate_stub (frame, br_stub_ftruncate_resume, fd, offset,
+ xdata);
+ if (!stub) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED,
+ "failed to allocate stub for ftruncate fop (gfid: %s),"
+ " unwinding", uuid_utoa (fd->inode->gfid));
+ goto cleanup_local;
+ }
+
+ return br_stub_perform_incversioning (this, frame, stub, fd, ctx);
+
+ wind:
+ STACK_WIND (frame, cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+ return 0;
+
+ cleanup_local:
+ br_stub_cleanup_local (local);
+ br_stub_dealloc_local (local);
+
+ unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, NULL, NULL,
+ NULL);
+
+ return 0;
+}
+
+int32_t
+br_stub_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ int32_t ret = 0;
+ br_stub_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ ret = br_stub_mark_inode_modified (this, local);
+ if (ret) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
+
+unwind:
+ STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ br_stub_cleanup_local (local);
+ br_stub_dealloc_local (local);
+ return 0;
+}
+
+int32_t
+br_stub_truncate_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ off_t offset, dict_t *xdata)
+{
+ br_stub_local_t *local = frame->local;
+
+ fd_unref (local->u.context.fd);
+ STACK_WIND (frame, br_stub_ftruncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ return 0;
+}
+
+/**
+ * Bit-rot-stub depends heavily on the fd based operations to for doing
+ * versioning and sending notification. It starts tracking the operation
+ * upon getting first fd based modify operation by doing versioning and
+ * sends notification when last fd using which the inode was modified is
+ * released.
+ * But for truncate there is no fd and hence it becomes difficult to do
+ * the versioning and send notification. It is handled by doing versioning
+ * on an anonymous fd. The fd will be valid till the completion of the
+ * truncate call. It guarantees that release on this anonymous fd will happen
+ * after the truncate call and notification is sent after the truncate call.
+ *
+ * c.f. br_writev_cbk() for explanation
+ */
+int32_t
+br_stub_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ off_t offset, dict_t *xdata)
+{
+ br_stub_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ gf_boolean_t inc_version = _gf_false;
+ gf_boolean_t modified = _gf_false;
+ br_stub_inode_ctx_t *ctx = NULL;
+ int32_t ret = -1;
+ fd_t *fd = NULL;
+ fop_truncate_cbk_t cbk = default_truncate_cbk;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, unwind);
+ GF_VALIDATE_OR_GOTO (this->name, frame, unwind);
+ GF_VALIDATE_OR_GOTO (this->name, loc, unwind);
+ GF_VALIDATE_OR_GOTO (this->name, loc->inode, unwind);
+
+ fd = fd_anonymous (loc->inode);
+ if (!fd) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_CREATE_ANONYMOUS_FD_FAILED, "failed to create "
+ "anonymous fd for the inode %s",
+ uuid_utoa (loc->inode->gfid));
+ goto unwind;
+ }
+
+ ret = br_stub_need_versioning (this, fd, &inc_version, &modified, &ctx);
+ if (ret)
+ goto cleanup_fd;
+
+ ret = br_stub_check_bad_object (this, fd->inode, &op_ret, &op_errno);
+ if (ret)
+ goto unwind;
+
+ if (!inc_version && modified)
+ goto wind;
+
+ ret = br_stub_versioning_prep (frame, this, fd, ctx);
+ if (ret)
+ goto cleanup_fd;
+
+ local = frame->local;
+ if (!inc_version) {
+ br_stub_fill_local (local, NULL, fd, fd->inode,
+ fd->inode->gfid, BR_STUB_NO_VERSIONING, 0);
+ cbk = br_stub_truncate_cbk;
+ goto wind;
+ }
+
+ stub = fop_truncate_stub (frame, br_stub_truncate_resume, loc, offset,
+ xdata);
+ if (!stub) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED,
+ "failed to allocate stub for truncate fop (gfid: %s), "
+ "unwinding", uuid_utoa (fd->inode->gfid));
+ goto cleanup_local;
+ }
+
+ return br_stub_perform_incversioning (this, frame, stub, fd, ctx);
+
+ wind:
+ STACK_WIND (frame, cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ fd_unref (fd);
+ return 0;
+
+ cleanup_local:
+ br_stub_cleanup_local (local);
+ br_stub_dealloc_local (local);
+ cleanup_fd:
+ fd_unref (fd);
+ unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, NULL, NULL,
+ NULL);
+
+ return 0;
+}
+
+/** }}} */
+
+
+/** {{{ */
+
+/* open() */
+
+/**
+ * It's probably worth mentioning a bit about why some of the housekeeping
+ * work is done in open() call path, rather than the callback path.
+ * Two (or more) open()'s in parallel can race and lead to a situation
+ * where a release() gets triggered (possibly after a series of write()
+ * calls) when *other* open()'s have still not reached callback path
+ * thereby having an active fd on an inode that is in process of getting
+ * signed with the current version.
+ *
+ * Maintaining fd list in the call path ensures that a release() would
+ * not be triggered if an open() call races ahead (followed by a close())
+ * threby finding non-empty fd list.
+ */
+
+int
+br_stub_open (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, fd_t *fd, dict_t *xdata)
+{
+ int32_t ret = -1;
+ br_stub_inode_ctx_t *ctx = NULL;
+ uint64_t ctx_addr = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, unwind);
+ GF_VALIDATE_OR_GOTO (this->name, loc, unwind);
+ GF_VALIDATE_OR_GOTO (this->name, fd, unwind);
+ GF_VALIDATE_OR_GOTO (this->name, fd->inode, unwind);
+
+ ret = br_stub_get_inode_ctx (this, fd->inode, &ctx_addr);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_GET_INODE_CONTEXT_FAILED, "failed to get the "
+ "inode context for the file %s (gfid: %s)", loc->path,
+ uuid_utoa (fd->inode->gfid));
+ goto unwind;
+ }
+
+ ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
+
+ ret = br_stub_check_bad_object (this, fd->inode, &op_ret, &op_errno);
+ if (ret)
+ goto unwind;
+
+ if (frame->root->pid == GF_CLIENT_PID_SCRUB)
+ goto wind;
+
+ if (flags == O_RDONLY)
+ goto wind;
+
+ ret = br_stub_add_fd_to_inode (this, fd, ctx);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_ADD_FD_TO_LIST_FAILED,
+ "failed add fd to the list (gfid: %s)",
+ uuid_utoa (fd->inode->gfid));
+ goto unwind;
+ }
+
+wind:
+ STACK_WIND (frame, default_open_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->open, loc, flags, fd, xdata);
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, NULL, NULL);
+ return 0;
+}
+
+/** }}} */
+
+
+/** {{{ */
+
+/* creat() */
+
+/**
+ * This routine registers a release callback for the given fd and adds the
+ * fd to the inode context fd tracking list.
+ */
+int32_t
+br_stub_add_fd_to_inode (xlator_t *this, fd_t *fd, br_stub_inode_ctx_t *ctx)
+{
+ int32_t ret = -1;
+ br_stub_fd_t *br_stub_fd = NULL;
+
+ ret = br_stub_require_release_call (this, fd, &br_stub_fd);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_SET_FD_CONTEXT_FAILED, "failed to set the fd "
+ "context for the file (gfid: %s)",
+ uuid_utoa (fd->inode->gfid));
+ goto out;
+ }
+
+ LOCK (&fd->inode->lock);
+ {
+ list_add_tail (&ctx->fd_list, &br_stub_fd->list);
+ }
+ UNLOCK (&fd->inode->lock);
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int
+br_stub_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int32_t ret = 0;
+ uint64_t ctx_addr = 0;
+ br_stub_inode_ctx_t *ctx = NULL;
+ unsigned long version = BITROT_DEFAULT_CURRENT_VERSION;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ ret = br_stub_get_inode_ctx (this, fd->inode, &ctx_addr);
+ if (ret < 0) {
+ ret = br_stub_init_inode_versions (this, fd, inode, version,
+ _gf_true, _gf_false);
+ if (ret) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
+ } else {
+ ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
+ ret = br_stub_add_fd_to_inode (this, fd, ctx);
+ }
+
+unwind:
+ STACK_UNWIND_STRICT (create, frame, op_ret, op_errno,
+ fd, inode, stbuf, preparent, postparent, xdata);
+ return 0;
+}
+
+int
+br_stub_create (call_frame_t *frame,
+ xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, unwind);
+ GF_VALIDATE_OR_GOTO (this->name, loc, unwind);
+ GF_VALIDATE_OR_GOTO (this->name, loc->inode, unwind);
+ GF_VALIDATE_OR_GOTO (this->name, fd, unwind);
+ GF_VALIDATE_OR_GOTO (this->name, fd->inode, unwind);
+
+ STACK_WIND (frame, br_stub_create_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->create,
+ loc, flags, mode, umask, fd, xdata);
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT (create, frame, -1, EINVAL, NULL, NULL, NULL, NULL,
+ NULL, NULL);
+ return 0;
+}
+
+int
+br_stub_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int32_t ret = -1;
+ unsigned long version = BITROT_DEFAULT_CURRENT_VERSION;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ ret = br_stub_init_inode_versions (this, NULL, inode, version,
+ _gf_true, _gf_false);
+ /**
+ * Like lookup, if init_inode_versions fail, return EINVAL
+ */
+ if (ret) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
+
+unwind:
+ STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno,
+ inode, stbuf, preparent, postparent, xdata);
+ return 0;
+}
+
+int
+br_stub_mknod (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, dev_t dev, mode_t umask, dict_t *xdata)
+{
+ GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, unwind);
+ GF_VALIDATE_OR_GOTO (this->name, loc, unwind);
+ GF_VALIDATE_OR_GOTO (this->name, loc->inode, unwind);
+
+ STACK_WIND (frame, br_stub_mknod_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->mknod,
+ loc, mode, dev, umask, xdata);
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT (mknod, frame, -1, EINVAL, NULL, NULL, NULL,
+ NULL, NULL);
+ return 0;
+}
+
+/** }}} */
+
+/**
+ * As of now, only lookup searches for bad object xattr and marks the
+ * object as bad in its inode context if the xattr is present. But there
+ * is a possibility that, at the time of the lookup the object was not
+ * marked bad (i.e. bad object xattr was not set), and later its marked
+ * as bad. In this case, object is not bad, so when a fop such as open or
+ * readv or writev comes on the object, the fop will be sent downward instead
+ * of sending as error upwards.
+ * The solution for this is to do a getxattr for the below list of fops.
+ * lookup, readdirp, open, readv, writev.
+ * But doing getxattr for each of the above fops might be costly.
+ * So another method followed is to catch the bad file marking by the scrubber
+ * and set that info within the object's inode context. In this way getxattr
+ * calls can be avoided and bad objects can be caught instantly. Fetching the
+ * xattr is needed only in lookups when there is a brick restart or inode
+ * forget.
+ *
+ * If the dict (@xattr) is NULL, then how should that be handled? Fail the
+ * lookup operation? Or let it continue with version being initialized to
+ * BITROT_DEFAULT_CURRENT_VERSION. But what if the version was different
+ * on disk (and also a right signature was there), but posix failed to
+ * successfully allocate the dict? Posix does not treat call back xdata
+ * creattion failure as the lookup failure.
+ */
+static int32_t
+br_stub_lookup_version (xlator_t *this,
+ uuid_t gfid, inode_t *inode, dict_t *xattr)
+{
+ unsigned long version = 0;
+ br_version_t *obuf = NULL;
+ br_signature_t *sbuf = NULL;
+ br_vxattr_status_t status;
+ gf_boolean_t bad_object = _gf_false;
+
+ /**
+ * versioning xattrs were requested from POSIX. if available, figure
+ * out the correct version to use in the inode context (start with
+ * the default version if unavailable). As of now versions are not
+ * persisted on-disk. The inode is marked dirty, so that the first
+ * operation (such as write(), etc..) triggers synchronization to
+ * disk.
+ */
+ status = br_version_xattr_state (xattr, &obuf, &sbuf, &bad_object);
+ version = ((status == BR_VXATTR_STATUS_FULL)
+ || (status == BR_VXATTR_STATUS_UNSIGNED))
+ ? obuf->ongoingversion : BITROT_DEFAULT_CURRENT_VERSION;
+
+ /**
+ * If signature is there, but version is not therem then that status is
+ * is treated as INVALID. So in that case, we should not initialize the
+ * inode context with wrong version names etc.
+ */
+ if (status == BR_VXATTR_STATUS_INVALID)
+ return -1;
+
+ return br_stub_init_inode_versions (this, NULL, inode, version,
+ _gf_true, bad_object);
+}
+
+
+/** {{{ */
+
+int32_t
+br_stub_opendir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, fd_t *fd, dict_t *xdata)
+{
+ br_stub_private_t *priv = NULL;
+ br_stub_fd_t *fd_ctx = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+
+ priv = this->private;
+ if (gf_uuid_compare (fd->inode->gfid, priv->bad_object_dir_gfid))
+ goto normal;
+
+ fd_ctx = br_stub_fd_new ();
+ if (!fd_ctx) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ fd_ctx->bad_object.dir_eof = -1;
+ fd_ctx->bad_object.dir = sys_opendir (priv->stub_basepath);
+ if (!fd_ctx->bad_object.dir) {
+ op_errno = errno;
+ goto err_freectx;
+ }
+
+ op_ret = br_stub_fd_ctx_set (this, fd, fd_ctx);
+ if (!op_ret)
+ goto unwind;
+
+ sys_closedir (fd_ctx->bad_object.dir);
+
+err_freectx:
+ GF_FREE (fd_ctx);
+unwind:
+ STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, NULL);
+ return 0;
+
+normal:
+ STACK_WIND (frame, default_opendir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);
+ return 0;
+}
+
+int32_t
+br_stub_readdir (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t off, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ br_stub_private_t *priv = NULL;
+
+ priv = this->private;
+ if (gf_uuid_compare (fd->inode->gfid, priv->bad_object_dir_gfid))
+ goto out;
+ stub = fop_readdir_stub (frame, br_stub_readdir_wrapper, fd, size, off,
+ xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (readdir, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+ br_stub_worker_enqueue (this, stub);
+ return 0;
+out:
+ STACK_WIND (frame, default_readdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdir, fd, size, off, xdata);
+ return 0;
+}
+
+int
+br_stub_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries,
+ dict_t *dict)
+{
+ int32_t ret = 0;
+ uint64_t ctxaddr = 0;
+ gf_dirent_t *entry = NULL;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ list_for_each_entry (entry, &entries->list, list) {
+ if ((strcmp (entry->d_name, ".") == 0)
+ || (strcmp (entry->d_name, "..") == 0))
+ continue;
+
+ if (!IA_ISREG (entry->d_stat.ia_type))
+ continue;
+
+ ret = br_stub_get_inode_ctx (this, entry->inode, &ctxaddr);
+ if (ret < 0)
+ ctxaddr = 0;
+ if (ctxaddr) { /* already has the context */
+ br_stub_remove_vxattrs (entry->dict);
+ continue;
+ }
+
+ ret = br_stub_lookup_version
+ (this, entry->inode->gfid, entry->inode, entry->dict);
+ br_stub_remove_vxattrs (entry->dict);
+ if (ret) {
+ /**
+ * there's no per-file granularity support in case of
+ * failure. let's fail the entire request for now..
+ */
+ break;
+ }
+ }
+
+ if (ret) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
+
+ unwind:
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, dict);
+
+ return 0;
+}
+
+int
+br_stub_readdirp (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset, dict_t *dict)
+{
+ int32_t ret = -1;
+ int op_errno = 0;
+ gf_boolean_t xref = _gf_false;
+
+ op_errno = ENOMEM;
+ if (!dict) {
+ dict = dict_new ();
+ if (!dict)
+ goto unwind;
+ } else {
+ dict = dict_ref (dict);
+ }
+
+ xref = _gf_true;
+
+ op_errno = EINVAL;
+ ret = dict_set_uint32 (dict, BITROT_CURRENT_VERSION_KEY, 0);
+ if (ret)
+ goto unwind;
+ ret = dict_set_uint32 (dict, BITROT_SIGNING_VERSION_KEY, 0);
+ if (ret)
+ goto unwind;
+ ret = dict_set_uint32 (dict, BITROT_OBJECT_BAD_KEY, 0);
+ if (ret)
+ goto unwind;
+
+ STACK_WIND (frame, br_stub_readdirp_cbk, FIRST_CHILD (this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size,
+ offset, dict);
+ goto unref_dict;
+
+ unwind:
+ STACK_UNWIND_STRICT (readdirp, frame, -1, op_errno, NULL, NULL);
+ return 0;
+
+ unref_dict:
+ if (xref)
+ dict_unref (dict);
+ return 0;
+}
+
+/** }}} */
+
+
+/** {{{ */
+
+/* lookup() */
+
+/**
+ * This function mainly handles the ENOENT error for the bad objects. Though
+ * br_stub_forget () handles removal of the link for the bad object from the
+ * quarantine directory, its better to handle it in lookup as well, where
+ * a failed lookup on a bad object with ENOENT, will trigger deletion of the
+ * link for the bad object from quarantine directory. So whoever comes first
+ * either forget () or lookup () will take care of removing the link.
+ */
+void
+br_stub_handle_lookup_error (xlator_t *this, inode_t *inode, int32_t op_errno)
+{
+ int32_t ret = -1;
+ uint64_t ctx_addr = 0;
+ br_stub_inode_ctx_t *ctx = NULL;
+
+ if (op_errno != ENOENT)
+ goto out;
+
+ if (!inode_is_linked (inode))
+ goto out;
+
+ ret = br_stub_get_inode_ctx (this, inode, &ctx_addr);
+ if (ret)
+ goto out;
+
+ ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
+
+ LOCK (&inode->lock);
+ {
+ if (__br_stub_is_bad_object (ctx))
+ (void) br_stub_del (this, inode->gfid);
+ }
+ UNLOCK (&inode->lock);
+
+out:
+ return;
+}
+
+int
+br_stub_lookup_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr, struct iatt *postparent)
+{
+ int32_t ret = 0;
+
+ if (op_ret < 0) {
+ (void) br_stub_handle_lookup_error (this, inode, op_errno);
+ goto unwind;
+ }
+
+ if (!IA_ISREG (stbuf->ia_type))
+ goto unwind;
+
+ /**
+ * If the object is bad, then "bad inode" marker has to be sent back
+ * in resoinse, for revalidated lookups as well. Some xlators such as
+ * quick-read might cache the data in revalidated lookup as fresh
+ * lookup would anyway have sent "bad inode" marker.
+ * In general send bad inode marker for every lookup operation on the
+ * bad object.
+ */
+ if (cookie != (void *) BR_STUB_REQUEST_COOKIE) {
+ ret = br_stub_mark_xdata_bad_object (this, inode, xattr);
+ if (ret) {
+ op_ret = -1;
+ op_errno = EIO;
+ goto unwind;
+ }
+
+ goto delkey;
+ }
+
+ ret = br_stub_lookup_version (this, stbuf->ia_gfid, inode, xattr);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto delkey;
+ }
+
+ /**
+ * If the object is bad, send "bad inode" marker back in response
+ * for xlator(s) to act accordingly (such as quick-read, etc..)
+ */
+ ret = br_stub_mark_xdata_bad_object (this, inode, xattr);
+ if (ret) {
+ /**
+ * aaha! bad object, but sorry we would not
+ * satisfy the request on allocation failures.
+ */
+ op_ret = -1;
+ op_errno = EIO;
+ goto unwind;
+ }
+
+delkey:
+ br_stub_remove_vxattrs (xattr);
+unwind:
+ STACK_UNWIND_STRICT (lookup, frame,
+ op_ret, op_errno, inode, stbuf, xattr, postparent);
+
+ return 0;
+}
+
+int
+br_stub_lookup (call_frame_t *frame,
+ xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ int32_t ret = 0;
+ int op_errno = 0;
+ void *cookie = NULL;
+ uint64_t ctx_addr = 0;
+ gf_boolean_t xref = _gf_false;
+ br_stub_private_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, unwind);
+ GF_VALIDATE_OR_GOTO (this->name, loc, unwind);
+ GF_VALIDATE_OR_GOTO (this->name, loc->inode, unwind);
+
+ priv = this->private;
+
+ if (!gf_uuid_compare (loc->gfid, priv->bad_object_dir_gfid) ||
+ !gf_uuid_compare (loc->pargfid, priv->bad_object_dir_gfid)) {
+
+ stub = fop_lookup_stub (frame, br_stub_lookup_wrapper, loc,
+ xdata);
+ if (!stub) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ br_stub_worker_enqueue (this, stub);
+ return 0;
+ }
+
+ ret = br_stub_get_inode_ctx (this, loc->inode, &ctx_addr);
+ if (ret < 0)
+ ctx_addr = 0;
+ if (ctx_addr != 0)
+ goto wind;
+
+ /**
+ * fresh lookup: request version keys from POSIX
+ */
+ op_errno = ENOMEM;
+ if (!xdata) {
+ xdata = dict_new ();
+ if (!xdata)
+ goto unwind;
+ } else {
+ xdata = dict_ref (xdata);
+ }
+
+ xref = _gf_true;
+
+ /**
+ * Requesting both xattrs provides a way of sanity checking the
+ * object. Anomaly checking is done in cbk by examining absence
+ * of either or both xattrs.
+ */
+ op_errno = EINVAL;
+ ret = dict_set_uint32 (xdata, BITROT_CURRENT_VERSION_KEY, 0);
+ if (ret)
+ goto unwind;
+ ret = dict_set_uint32 (xdata, BITROT_SIGNING_VERSION_KEY, 0);
+ if (ret)
+ goto unwind;
+ ret = dict_set_uint32 (xdata, BITROT_OBJECT_BAD_KEY, 0);
+ if (ret)
+ goto unwind;
+ cookie = (void *) BR_STUB_REQUEST_COOKIE;
+
+ wind:
+ STACK_WIND_COOKIE (frame, br_stub_lookup_cbk, cookie,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup,
+ loc, xdata);
+ goto dealloc_dict;
+
+ unwind:
+ STACK_UNWIND_STRICT (lookup, frame,
+ -1, op_errno, NULL, NULL, NULL, NULL);
+ dealloc_dict:
+ if (xref)
+ dict_unref (xdata);
+ return 0;
+}
+
+/** }}} */
+
+/** {{{ */
+
+/* stat */
+int
+br_stub_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ int32_t ret = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+
+ if (!IA_ISREG (loc->inode->ia_type))
+ goto wind;
+
+ ret = br_stub_check_bad_object (this, loc->inode, &op_ret, &op_errno);
+ if (ret)
+ goto unwind;
+
+ wind:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, NULL, NULL);
+ return 0;
+}
+
+/* fstat */
+int
+br_stub_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ int32_t ret = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+
+ if (!IA_ISREG (fd->inode->ia_type))
+ goto wind;
+
+ ret = br_stub_check_bad_object (this, fd->inode, &op_ret, &op_errno);
+ if (ret)
+ goto unwind;
+
+ wind:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, NULL, NULL);
+ return 0;
+}
+
+/** }}} */
+
+/** {{{ */
+
+/* unlink() */
+
+int
+br_stub_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ br_stub_local_t *local = NULL;
+ inode_t *inode = NULL;
+ uint64_t ctx_addr = 0;
+ br_stub_inode_ctx_t *ctx = NULL;
+ int32_t ret = -1;
+
+ local = frame->local;
+ frame->local = NULL;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ inode = local->u.context.inode;
+ if (!IA_ISREG (inode->ia_type))
+ goto unwind;
+
+ ret = br_stub_get_inode_ctx (this, inode, &ctx_addr);
+ if (ret) {
+ /**
+ * If the inode is bad AND context is not there, then there
+ * is a possibility of the gfid of the object being listed
+ * in the quarantine directory and will be shown in the
+ * bad objects list. So continuing with the fop with a
+ * warning log. The entry from the quarantine directory
+ * has to be removed manually. Its not a good idea to fail
+ * the fop, as the object has already been deleted.
+ */
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ BRS_MSG_GET_INODE_CONTEXT_FAILED,
+ "failed to get the context for the inode %s",
+ uuid_utoa (inode->gfid));
+ goto unwind;
+ }
+
+ ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
+
+ LOCK (&inode->lock);
+ {
+ /**
+ * Ignoring the return value of br_stub_del ().
+ * There is not much that can be done if unlinking
+ * of the entry in the quarantine directory fails.
+ * The failure is logged.
+ */
+ if (__br_stub_is_bad_object (ctx))
+ (void) br_stub_del (this, inode->gfid);
+ }
+ UNLOCK (&inode->lock);
+
+unwind:
+ STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno, preparent,
+ postparent, xdata);
+ br_stub_cleanup_local (local);
+ br_stub_dealloc_local (local);
+ return 0;
+}
+
+int
+br_stub_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int flag,
+ dict_t *xdata)
+{
+ br_stub_local_t *local = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ local = br_stub_alloc_local (this);
+ if (!local) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM, BRS_MSG_NO_MEMORY,
+ "failed to allocate memory for local (path: %s, gfid: %s)",
+ loc->path, uuid_utoa (loc->inode->gfid));
+ goto unwind;
+ }
+
+ br_stub_fill_local (local, NULL, NULL, loc->inode,
+ loc->inode->gfid,
+ BR_STUB_NO_VERSIONING, 0);
+
+ frame->local = local;
+
+ STACK_WIND (frame, br_stub_unlink_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->unlink, loc, flag, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+
+/** }}} */
+
+/** {{{ */
+
+/* forget() */
+
+int
+br_stub_forget (xlator_t *this, inode_t *inode)
+{
+ uint64_t ctx_addr = 0;
+ br_stub_inode_ctx_t *ctx = NULL;
+
+ inode_ctx_del (inode, this, &ctx_addr);
+ if (!ctx_addr)
+ return 0;
+
+ ctx = (br_stub_inode_ctx_t *) (long) ctx_addr;
+
+ GF_FREE (ctx);
+
+ return 0;
+}
+
+/** }}} */
+
+/** {{{ */
+
+int32_t
+br_stub_noop (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STACK_DESTROY (frame->root);
+ return 0;
+}
+
+static void
+br_stub_send_ipc_fop (xlator_t *this, fd_t *fd, unsigned long releaseversion,
+ int sign_info)
+{
+ int32_t op = 0;
+ int32_t ret = 0;
+ dict_t *xdata = NULL;
+ call_frame_t *frame = NULL;
+ changelog_event_t ev = {0,};
+
+ ev.ev_type = CHANGELOG_OP_TYPE_BR_RELEASE;
+ ev.u.releasebr.version = releaseversion;
+ ev.u.releasebr.sign_info = sign_info;
+ gf_uuid_copy (ev.u.releasebr.gfid, fd->inode->gfid);
+
+ xdata = dict_new ();
+ if (!xdata) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM, BRS_MSG_NO_MEMORY,
+ "dict allocation failed: cannot send IPC FOP "
+ "to changelog");
+ goto out;
+ }
+
+ ret = dict_set_static_bin (xdata,
+ "RELEASE-EVENT", &ev, CHANGELOG_EV_SIZE);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, BRS_MSG_SET_EVENT_FAILED,
+ "cannot set release event in dict");
+ goto dealloc_dict;
+ }
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ BRS_MSG_CREATE_FRAME_FAILED,
+ "create_frame() failure");
+ goto dealloc_dict;
+ }
+
+ op = GF_IPC_TARGET_CHANGELOG;
+ STACK_WIND (frame, br_stub_noop, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->ipc, op, xdata);
+
+ dealloc_dict:
+ dict_unref (xdata);
+ out:
+ return;
+}
+
+/**
+ * This is how the state machine of sign info works:
+ * 3 states:
+ * 1) BR_SIGN_NORMAL => The default State of the inode
+ * 2) BR_SIGN_REOPEN_WAIT => A release has been sent and is waiting for reopen
+ * 3) BR_SIGN_QUICK => reopen has happened and this release should trigger sign
+ * 2 events:
+ * 1) GF_FOP_RELEASE
+ * 2) GF_FOP_WRITE (actually a dummy write fro BitD)
+ *
+ * This is how states are changed based on events:
+ * EVENT: GF_FOP_RELEASE:
+ * if (state == BR_SIGN_NORMAL) ; then
+ * set state = BR_SIGN_REOPEN_WAIT;
+ * if (state == BR_SIGN_QUICK); then
+ * set state = BR_SIGN_NORMAL;
+ * EVENT: GF_FOP_WRITE:
+ * if (state == BR_SIGN_REOPEN_WAIT); then
+ * set state = BR_SIGN_QUICK;
+ */
+br_sign_state_t
+__br_stub_inode_sign_state (br_stub_inode_ctx_t *ctx,
+ glusterfs_fop_t fop, fd_t *fd)
+{
+ br_sign_state_t sign_info = BR_SIGN_INVALID;
+
+ switch (fop) {
+
+ case GF_FOP_FSETXATTR:
+ sign_info = ctx->info_sign = BR_SIGN_QUICK;
+ break;
+
+ case GF_FOP_RELEASE:
+ GF_ASSERT (ctx->info_sign != BR_SIGN_REOPEN_WAIT);
+
+ if (ctx->info_sign == BR_SIGN_NORMAL) {
+ sign_info = ctx->info_sign = BR_SIGN_REOPEN_WAIT;
+ } else {
+ sign_info = ctx->info_sign;
+ ctx->info_sign = BR_SIGN_NORMAL;
+ }
+
+ break;
+ default:
+ break;
+ }
+
+ return sign_info;
+}
+
+int32_t
+br_stub_release (xlator_t *this, fd_t *fd)
+{
+ int32_t ret = 0;
+ int32_t flags = 0;
+ inode_t *inode = NULL;
+ unsigned long releaseversion = 0;
+ br_stub_inode_ctx_t *ctx = NULL;
+ uint64_t tmp = 0;
+ br_stub_fd_t *br_stub_fd = NULL;
+ int32_t signinfo = 0;
+
+ inode = fd->inode;
+
+ LOCK (&inode->lock);
+ {
+ ctx = __br_stub_get_ongoing_version_ctx (this, inode, NULL);
+ if (ctx == NULL)
+ goto unblock;
+ br_stub_fd = br_stub_fd_ctx_get (this, fd);
+ if (br_stub_fd) {
+ list_del_init (&br_stub_fd->list);
+ }
+
+ ret = __br_stub_can_trigger_release
+ (inode, ctx, &releaseversion);
+ if (!ret)
+ goto unblock;
+
+ signinfo = __br_stub_inode_sign_state (ctx, GF_FOP_RELEASE, fd);
+ signinfo = htonl (signinfo);
+
+ /* inode back to initital state: mark dirty */
+ if (ctx->info_sign == BR_SIGN_NORMAL) {
+ __br_stub_mark_inode_dirty (ctx);
+ __br_stub_unset_inode_modified (ctx);
+ }
+ }
+ unblock:
+ UNLOCK (&inode->lock);
+
+ if (ret) {
+ gf_msg_debug (this->name, 0, "releaseversion: %lu | flags: %d "
+ "| signinfo: %d",
+ (unsigned long) ntohl (releaseversion), flags,
+ ntohl(signinfo));
+ br_stub_send_ipc_fop (this, fd, releaseversion, signinfo);
+ }
+
+ ret = fd_ctx_del (fd, this, &tmp);
+ br_stub_fd = (br_stub_fd_t *)(long)tmp;
+
+ GF_FREE (br_stub_fd);
+
+ return 0;
+}
+
+int32_t
+br_stub_releasedir (xlator_t *this, fd_t *fd)
+{
+ br_stub_fd_t *fctx = NULL;
+ uint64_t ctx = 0;
+ int ret = 0;
+
+ ret = fd_ctx_del (fd, this, &ctx);
+ if (ret < 0)
+ goto out;
+
+ fctx = (br_stub_fd_t *) (long) ctx;
+ if (fctx->bad_object.dir) {
+ ret = sys_closedir (fctx->bad_object.dir);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_BAD_OBJ_DIR_CLOSE_FAIL,
+ "closedir error: %s", strerror (errno));
+ }
+
+ GF_FREE (fctx);
+out:
+ return 0;
+}
+
+/** }}} */
+
+/** {{{ */
+
+/* ictxmerge */
+
+void
+br_stub_ictxmerge (xlator_t *this, fd_t *fd,
+ inode_t *inode, inode_t *linked_inode)
+{
+ int32_t ret = 0;
+ uint64_t ctxaddr = 0;
+ uint64_t lctxaddr = 0;
+ br_stub_inode_ctx_t *ctx = NULL;
+ br_stub_inode_ctx_t *lctx = NULL;
+ br_stub_fd_t *br_stub_fd = NULL;
+
+ ret = br_stub_get_inode_ctx (this, inode, &ctxaddr);
+ if (ret < 0)
+ goto done;
+ ctx = (br_stub_inode_ctx_t *) ctxaddr;
+
+ LOCK (&linked_inode->lock);
+ {
+ ret = __br_stub_get_inode_ctx (this, linked_inode, &lctxaddr);
+ if (ret < 0)
+ goto unblock;
+ lctx = (br_stub_inode_ctx_t *) lctxaddr;
+
+ GF_ASSERT (list_is_singular (&ctx->fd_list));
+ br_stub_fd = list_first_entry (&ctx->fd_list, br_stub_fd_t,
+ list);
+ if (br_stub_fd) {
+ GF_ASSERT (br_stub_fd->fd == fd);
+ list_move_tail (&br_stub_fd->list, &lctx->fd_list);
+ }
+ }
+unblock:
+ UNLOCK (&linked_inode->lock);
+
+ done:
+ return;
+}
+
+/** }}} */
+
+
+struct xlator_fops fops = {
+ .lookup = br_stub_lookup,
+ .stat = br_stub_stat,
+ .fstat = br_stub_fstat,
+ .open = br_stub_open,
+ .create = br_stub_create,
+ .readdirp = br_stub_readdirp,
+ .getxattr = br_stub_getxattr,
+ .fgetxattr = br_stub_fgetxattr,
+ .fsetxattr = br_stub_fsetxattr,
+ .writev = br_stub_writev,
+ .truncate = br_stub_truncate,
+ .ftruncate = br_stub_ftruncate,
+ .mknod = br_stub_mknod,
+ .readv = br_stub_readv,
+ .removexattr = br_stub_removexattr,
+ .fremovexattr = br_stub_fremovexattr,
+ .setxattr = br_stub_setxattr,
+ .opendir = br_stub_opendir,
+ .readdir = br_stub_readdir,
+ .unlink = br_stub_unlink,
+};
+
+struct xlator_cbks cbks = {
+ .forget = br_stub_forget,
+ .release = br_stub_release,
+ .ictxmerge = br_stub_ictxmerge,
+};
+
+struct volume_options options[] = {
+ { .key = {"bitrot"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "enable/disable bitrot stub"
+ },
+ { .key = {"export"},
+ .type = GF_OPTION_TYPE_PATH,
+ .description = "brick path for versioning"
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.h b/xlators/features/bit-rot/src/stub/bit-rot-stub.h
new file mode 100644
index 00000000000..2d515417059
--- /dev/null
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.h
@@ -0,0 +1,463 @@
+ /*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef __BIT_ROT_STUB_H__
+#define __BIT_ROT_STUB_H__
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "defaults.h"
+#include "call-stub.h"
+#include "bit-rot-stub-mem-types.h"
+#include "syscall.h"
+#include "bit-rot-common.h"
+#include "bit-rot-stub-messages.h"
+#include "glusterfs3-xdr.h"
+
+#define BAD_OBJECT_THREAD_STACK_SIZE ((size_t)(1024*1024))
+
+typedef int (br_stub_version_cbk) (call_frame_t *, void *,
+ xlator_t *, int32_t, int32_t, dict_t *);
+
+typedef struct br_stub_inode_ctx {
+ int need_writeback; /* does the inode need
+ a writeback to disk? */
+ unsigned long currentversion; /* ongoing version */
+
+ int info_sign;
+ struct list_head fd_list; /* list of open fds or fds participating in
+ write operations */
+ gf_boolean_t bad_object;
+} br_stub_inode_ctx_t;
+
+typedef struct br_stub_fd {
+ fd_t *fd;
+ struct list_head list;
+ struct bad_object_dir {
+ DIR *dir;
+ off_t dir_eof;
+ } bad_object;
+} br_stub_fd_t;
+
+#define I_DIRTY (1<<0) /* inode needs writeback */
+#define I_MODIFIED (1<<1)
+#define WRITEBACK_DURABLE 1 /* writeback is durable */
+
+/**
+ * This could just have been a plain struct without unions and all,
+ * but we may need additional things in the future.
+ */
+typedef struct br_stub_local {
+ call_stub_t *fopstub; /* stub for original fop */
+
+ int versioningtype; /* not much used atm */
+
+ union {
+ struct br_stub_ctx {
+ fd_t *fd;
+ uuid_t gfid;
+ inode_t *inode;
+ unsigned long version;
+ } context;
+ } u;
+} br_stub_local_t;
+
+#define BR_STUB_NO_VERSIONING (1 << 0)
+#define BR_STUB_INCREMENTAL_VERSIONING (1 << 1)
+
+typedef struct br_stub_private {
+ gf_boolean_t go;
+
+ uint32_t boot[2];
+ char export[PATH_MAX];
+
+ pthread_mutex_t lock;
+ pthread_cond_t cond;
+
+ struct list_head squeue; /* ordered signing queue */
+ pthread_t signth;
+ struct bad_objects_container {
+ pthread_t thread;
+ pthread_mutex_t bad_lock;
+ pthread_cond_t bad_cond;
+ struct list_head bad_queue;
+ } container;
+ struct mem_pool *local_pool;
+
+ char stub_basepath[PATH_MAX];
+
+ uuid_t bad_object_dir_gfid;
+} br_stub_private_t;
+
+br_stub_fd_t *
+br_stub_fd_new (void);
+
+
+int
+__br_stub_fd_ctx_set (xlator_t *this, fd_t *fd, br_stub_fd_t *br_stub_fd);
+
+br_stub_fd_t *
+__br_stub_fd_ctx_get (xlator_t *this, fd_t *fd);
+
+br_stub_fd_t *
+br_stub_fd_ctx_get (xlator_t *this, fd_t *fd);
+
+int32_t
+br_stub_fd_ctx_set (xlator_t *this, fd_t *fd, br_stub_fd_t *br_stub_fd);
+
+static inline gf_boolean_t
+__br_stub_is_bad_object (br_stub_inode_ctx_t *ctx)
+{
+ return ctx->bad_object;
+}
+
+static inline void
+__br_stub_mark_object_bad (br_stub_inode_ctx_t *ctx)
+{
+ ctx->bad_object = _gf_true;
+}
+
+/* inode writeback helpers */
+static inline void
+__br_stub_mark_inode_dirty (br_stub_inode_ctx_t *ctx)
+{
+ ctx->need_writeback |= I_DIRTY;
+}
+
+static inline void
+__br_stub_mark_inode_synced (br_stub_inode_ctx_t *ctx)
+{
+ ctx->need_writeback &= ~I_DIRTY;
+}
+
+static inline int
+__br_stub_is_inode_dirty (br_stub_inode_ctx_t *ctx)
+{
+ return (ctx->need_writeback & I_DIRTY);
+}
+
+/* inode mofification markers */
+static inline void
+__br_stub_set_inode_modified (br_stub_inode_ctx_t *ctx)
+{
+ ctx->need_writeback |= I_MODIFIED;
+}
+
+static inline void
+__br_stub_unset_inode_modified (br_stub_inode_ctx_t *ctx)
+{
+ ctx->need_writeback &= ~I_MODIFIED;
+}
+
+static inline int
+__br_stub_is_inode_modified (br_stub_inode_ctx_t *ctx)
+{
+ return (ctx->need_writeback & I_MODIFIED);
+}
+
+
+static inline int
+br_stub_require_release_call (xlator_t *this, fd_t *fd, br_stub_fd_t **fd_ctx)
+{
+ int32_t ret = 0;
+ br_stub_fd_t *br_stub_fd = NULL;
+
+ br_stub_fd = br_stub_fd_new ();
+ if (!br_stub_fd)
+ return -1;
+
+ br_stub_fd->fd = fd;
+ INIT_LIST_HEAD (&br_stub_fd->list);
+
+ ret = br_stub_fd_ctx_set (this, fd, br_stub_fd);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ BRS_MSG_SET_CONTEXT_FAILED,
+ "could not set fd context (for release callback");
+ else
+ *fd_ctx = br_stub_fd;
+
+ return ret;
+}
+
+/* get/set inode context helpers */
+
+static inline int
+__br_stub_get_inode_ctx (xlator_t *this,
+ inode_t *inode, uint64_t *ctx)
+{
+ return __inode_ctx_get (inode, this, ctx);
+}
+
+static inline int
+br_stub_get_inode_ctx (xlator_t *this,
+ inode_t *inode, uint64_t *ctx)
+{
+ int ret = -1;
+
+ LOCK (&inode->lock);
+ {
+ ret = __br_stub_get_inode_ctx (this, inode, ctx);
+ }
+ UNLOCK (&inode->lock);
+
+ return ret;
+}
+
+static inline int
+br_stub_set_inode_ctx (xlator_t *this,
+ inode_t *inode, br_stub_inode_ctx_t *ctx)
+{
+ uint64_t ctx_addr = (uint64_t) ctx;
+ return inode_ctx_set (inode, this, &ctx_addr);
+}
+
+/* version get/set helpers */
+
+static inline unsigned long
+__br_stub_writeback_version (br_stub_inode_ctx_t *ctx)
+{
+ return (ctx->currentversion + 1);
+}
+
+static inline void
+__br_stub_set_ongoing_version (br_stub_inode_ctx_t *ctx, unsigned long version)
+{
+ if (ctx->currentversion < version)
+ ctx->currentversion = version;
+ else
+ gf_msg ("bit-rot-stub", GF_LOG_WARNING, 0,
+ BRS_MSG_CHANGE_VERSION_FAILED, "current version: %lu"
+ "new version: %lu", ctx->currentversion, version);
+}
+
+static inline int
+__br_stub_can_trigger_release (inode_t *inode,
+ br_stub_inode_ctx_t *ctx, unsigned long *version)
+{
+ /**
+ * If the inode is modified, then it has to be dirty. An inode is
+ * marked dirty once version is increased. Its marked as modified
+ * when the modification call (write/truncate) which triggered
+ * the versioning is successful.
+ */
+ if (__br_stub_is_inode_modified (ctx)
+ && list_empty (&ctx->fd_list)
+ && (ctx->info_sign != BR_SIGN_REOPEN_WAIT)) {
+
+ GF_ASSERT (__br_stub_is_inode_dirty (ctx) == 0);
+
+ if (version)
+ *version = htonl (ctx->currentversion);
+ return 1;
+ }
+
+ return 0;
+}
+
+static inline int32_t
+br_stub_get_ongoing_version (xlator_t *this,
+ inode_t *inode, unsigned long *version)
+{
+ int32_t ret = 0;
+ uint64_t ctx_addr = 0;
+ br_stub_inode_ctx_t *ctx = NULL;
+
+ LOCK (&inode->lock);
+ {
+ ret = __inode_ctx_get (inode, this, &ctx_addr);
+ if (ret < 0)
+ goto unblock;
+ ctx = (br_stub_inode_ctx_t *) (long) ctx_addr;
+ *version = ctx->currentversion;
+ }
+ unblock:
+ UNLOCK (&inode->lock);
+
+ return ret;
+}
+
+/**
+ * fetch the current version from inode and return the context.
+ * inode->lock should be held before invoking this as context
+ * *needs* to be valid in the caller.
+ */
+static inline br_stub_inode_ctx_t *
+__br_stub_get_ongoing_version_ctx (xlator_t *this,
+ inode_t *inode, unsigned long *version)
+{
+ int32_t ret = 0;
+ uint64_t ctx_addr = 0;
+ br_stub_inode_ctx_t *ctx = NULL;
+
+ ret = __inode_ctx_get (inode, this, &ctx_addr);
+ if (ret < 0)
+ return NULL;
+ ctx = (br_stub_inode_ctx_t *) (long) ctx_addr;
+ if (version)
+ *version = ctx->currentversion;
+
+ return ctx;
+}
+
+/* filter for xattr fetch */
+static inline int
+br_stub_is_internal_xattr (const char *name)
+{
+ if (name
+ && ((strncmp (name, BITROT_CURRENT_VERSION_KEY,
+ strlen (BITROT_CURRENT_VERSION_KEY)) == 0)
+ || (strncmp (name, BITROT_SIGNING_VERSION_KEY,
+ strlen (BITROT_SIGNING_VERSION_KEY)) == 0)))
+ return 1;
+ return 0;
+}
+
+static inline void
+br_stub_remove_vxattrs (dict_t *xattr)
+{
+ if (xattr) {
+ dict_del (xattr, BITROT_OBJECT_BAD_KEY);
+ dict_del (xattr, BITROT_CURRENT_VERSION_KEY);
+ dict_del (xattr, BITROT_SIGNING_VERSION_KEY);
+ dict_del (xattr, BITROT_SIGNING_XATTR_SIZE_KEY);
+ }
+}
+
+/**
+ * This function returns the below values for different situations
+ * 0 => as per the inode context object is not bad
+ * -1 => Failed to get the inode context itself
+ * -2 => As per the inode context object is bad
+ * Both -ve values means the fop which called this function is failed
+ * and error is returned upwards.
+ * In future if needed or more errors have to be handled, then those
+ * errors can be made into enums.
+ */
+static inline int
+br_stub_is_bad_object (xlator_t *this, inode_t *inode)
+{
+ int bad_object = 0;
+ gf_boolean_t tmp = _gf_false;
+ uint64_t ctx_addr = 0;
+ br_stub_inode_ctx_t *ctx = NULL;
+ int32_t ret = -1;
+
+ ret = br_stub_get_inode_ctx (this, inode, &ctx_addr);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_GET_INODE_CONTEXT_FAILED,
+ "failed to get the inode context for the inode %s",
+ uuid_utoa (inode->gfid));
+ bad_object = -1;
+ goto out;
+ }
+
+ ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
+
+ LOCK (&inode->lock);
+ {
+ tmp = __br_stub_is_bad_object (ctx);
+ if (tmp)
+ bad_object = -2;
+ }
+ UNLOCK (&inode->lock);
+
+out:
+ return bad_object;
+}
+
+static inline int32_t
+br_stub_mark_object_bad (xlator_t *this, inode_t *inode)
+{
+ int32_t ret = -1;
+ uint64_t ctx_addr = 0;
+ br_stub_inode_ctx_t *ctx = NULL;
+
+ ret = br_stub_get_inode_ctx (this, inode, &ctx_addr);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_GET_INODE_CONTEXT_FAILED, "failed to get the "
+ "inode context for the inode %s",
+ uuid_utoa (inode->gfid));
+ goto out;
+ }
+
+ ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
+
+ LOCK (&inode->lock);
+ {
+ __br_stub_mark_object_bad (ctx);
+ }
+ UNLOCK (&inode->lock);
+
+out:
+ return ret;
+}
+
+/**
+ * There is a possibility that dict_set might fail. The o/p of dict_set is
+ * given to the caller and the caller has to decide what to do.
+ */
+static inline int32_t
+br_stub_mark_xdata_bad_object (xlator_t *this, inode_t *inode, dict_t *xdata)
+{
+ int32_t ret = 0;
+
+ if (br_stub_is_bad_object (this, inode) == -2)
+ ret = dict_set_int32 (xdata, GLUSTERFS_BAD_INODE, 1);
+
+ return ret;
+}
+
+int32_t
+br_stub_add_fd_to_inode (xlator_t *this, fd_t *fd, br_stub_inode_ctx_t *ctx);
+
+br_sign_state_t
+__br_stub_inode_sign_state (br_stub_inode_ctx_t *ctx, glusterfs_fop_t fop,
+ fd_t *fd);
+
+int
+br_stub_dir_create (xlator_t *this, br_stub_private_t *priv);
+
+int
+br_stub_add (xlator_t *this, uuid_t gfid);
+
+int32_t
+br_stub_create_stub_gfid (xlator_t *this, char *stub_gfid_path, uuid_t gfid);
+
+int
+br_stub_dir_create (xlator_t *this, br_stub_private_t *priv);
+
+call_stub_t *
+__br_stub_dequeue (struct list_head *callstubs);
+
+void
+__br_stub_enqueue (struct list_head *callstubs, call_stub_t *stub);
+
+void
+br_stub_worker_enqueue (xlator_t *this, call_stub_t *stub);
+
+void *
+br_stub_worker (void *data);
+
+int32_t
+br_stub_lookup_wrapper (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xattr_req);
+
+int32_t
+br_stub_readdir_wrapper (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t off, dict_t *xdata);
+
+int
+br_stub_del (xlator_t *this, uuid_t gfid);
+
+#endif /* __BIT_ROT_STUB_H__ */
diff --git a/xlators/features/changelog/Makefile.am b/xlators/features/changelog/Makefile.am
new file mode 100644
index 00000000000..153bb685076
--- /dev/null
+++ b/xlators/features/changelog/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src lib
+
+CLEANFILES =
diff --git a/xlators/features/changelog/lib/Makefile.am b/xlators/features/changelog/lib/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/features/changelog/lib/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/changelog/lib/examples/c/get-changes-multi.c b/xlators/features/changelog/lib/examples/c/get-changes-multi.c
new file mode 100644
index 00000000000..3741bdf6edc
--- /dev/null
+++ b/xlators/features/changelog/lib/examples/c/get-changes-multi.c
@@ -0,0 +1,88 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/**
+ * Compile it using:
+ * gcc -o getchanges-multi `pkg-config --cflags libgfchangelog` \
+ * get-changes-multi.c `pkg-config --libs libgfchangelog`
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/un.h>
+#include <limits.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <errno.h>
+
+#include "changelog.h"
+
+void *brick_init (void *xl, struct gf_brick_spec *brick)
+{
+ return brick;
+}
+
+void brick_fini (void *xl, char *brick, void *data)
+{
+ return;
+}
+
+void brick_callback (void *xl, char *brick,
+ void *data, changelog_event_t *ev)
+{
+ printf ("->callback: (brick,type) [%s:%d]\n", brick, ev->ev_type);
+}
+
+void fill_brick_spec (struct gf_brick_spec *brick, char *path)
+{
+ brick->brick_path = strdup (path);
+ brick->filter = CHANGELOG_OP_TYPE_BR_RELEASE;
+
+ brick->init = brick_init;
+ brick->fini = brick_fini;
+ brick->callback = brick_callback;
+ brick->connected = NULL;
+ brick->disconnected = NULL;
+}
+
+int
+main (int argc, char **argv)
+{
+ int ret = 0;
+ void *bricks = NULL;
+ struct gf_brick_spec *brick = NULL;
+
+ bricks = calloc (2, sizeof (struct gf_brick_spec));
+ if (!bricks)
+ goto error_return;
+
+ brick = (struct gf_brick_spec *)bricks;
+ fill_brick_spec (brick, "/export/z1/zwoop");
+
+ brick++;
+ fill_brick_spec (brick, "/export/z2/zwoop");
+
+ ret = gf_changelog_init (NULL);
+ if (ret)
+ goto error_return;
+
+ ret = gf_changelog_register_generic ((struct gf_brick_spec *)bricks, 2,
+ 0, "/tmp/multi-changes.log", 9,
+ NULL);
+ if (ret)
+ goto error_return;
+
+ /* let callbacks do the job */
+ select (0, NULL, NULL, NULL, NULL);
+
+ error_return:
+ return -1;
+}
diff --git a/xlators/features/changelog/lib/examples/c/get-changes.c b/xlators/features/changelog/lib/examples/c/get-changes.c
new file mode 100644
index 00000000000..ef766c566b6
--- /dev/null
+++ b/xlators/features/changelog/lib/examples/c/get-changes.c
@@ -0,0 +1,93 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/**
+ * get set of new changes every 10 seconds (just print the file names)
+ *
+ * Compile it using:
+ * gcc -o getchanges `pkg-config --cflags libgfchangelog` get-changes.c \
+ * `pkg-config --libs libgfchangelog`
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/un.h>
+#include <limits.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <errno.h>
+
+#include "changelog.h"
+
+#define handle_error(fn) \
+ printf ("%s (reason: %s)\n", fn, strerror (errno))
+
+int
+main (int argc, char ** argv)
+{
+ int i = 0;
+ int ret = 0;
+ ssize_t nr_changes = 0;
+ ssize_t changes = 0;
+ char fbuf[PATH_MAX] = {0,};
+
+ ret = gf_changelog_init (NULL);
+ if (ret) {
+ handle_error ("Init failed");
+ goto out;
+ }
+
+ /* get changes for brick "/home/vshankar/export/yow/yow-1" */
+ ret = gf_changelog_register ("/export/z1/zwoop",
+ "/tmp/scratch", "/tmp/change.log", 9, 5);
+ if (ret) {
+ handle_error ("register failed");
+ goto out;
+ }
+
+ while (1) {
+ i = 0;
+ nr_changes = gf_changelog_scan ();
+ if (nr_changes < 0) {
+ handle_error ("scan(): ");
+ break;
+ }
+
+ if (nr_changes == 0)
+ goto next;
+
+ printf ("Got %ld changelog files\n", nr_changes);
+
+ while ( (changes =
+ gf_changelog_next_change (fbuf, PATH_MAX)) > 0) {
+ printf ("changelog file [%d]: %s\n", ++i, fbuf);
+
+ /* process changelog */
+ /* ... */
+ /* ... */
+ /* ... */
+ /* done processing */
+
+ ret = gf_changelog_done (fbuf);
+ if (ret)
+ handle_error ("gf_changelog_done");
+ }
+
+ if (changes == -1)
+ handle_error ("gf_changelog_next_change");
+
+ next:
+ sleep (10);
+ }
+
+ out:
+ return ret;
+}
diff --git a/xlators/features/changelog/lib/examples/c/get-history.c b/xlators/features/changelog/lib/examples/c/get-history.c
new file mode 100644
index 00000000000..ee3ec0ad100
--- /dev/null
+++ b/xlators/features/changelog/lib/examples/c/get-history.c
@@ -0,0 +1,116 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/**
+ * get set of new changes every 10 seconds (just print the file names)
+ *
+ * Compile it using:
+ * gcc -o gethistory `pkg-config --cflags libgfchangelog` get-history.c \
+ * `pkg-config --libs libgfchangelog`
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/un.h>
+#include <limits.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <errno.h>
+
+#include "changelog.h"
+
+#define handle_error(fn) \
+ printf ("%s (reason: %s)\n", fn, strerror (errno))
+
+int
+main (int argc, char ** argv)
+{
+ int i = 0;
+ int ret = 0;
+ ssize_t nr_changes = 0;
+ ssize_t changes = 0;
+ char fbuf[PATH_MAX] = {0,};
+ unsigned long end_ts = 0;
+
+ ret = gf_changelog_init (NULL);
+ if (ret) {
+ handle_error ("init failed");
+ goto out;
+ }
+
+ ret = gf_changelog_register ("/export/z1/zwoop",
+ "/tmp/scratch_v1", "/tmp/changes.log",
+ 9, 5);
+ if (ret) {
+ handle_error ("register failed");
+ goto out;
+ }
+
+ int a, b;
+ printf ("give the two numbers start and end\t");
+ scanf ("%d%d", &a, &b);
+ ret = gf_history_changelog ("/export/z1/zwoop/.glusterfs/changelogs",
+ a, b, 3, &end_ts);
+ if (ret == -1) {
+ printf ("history failed");
+ goto out;
+ }
+
+ printf ("end time till when changelog available : %d , ret(%d) \t", end_ts, ret);
+ fflush(stdout);
+
+ while (1) {
+ nr_changes = gf_history_changelog_scan ();
+ printf ("scanned, nr_changes : %d\n",nr_changes);
+ if (nr_changes < 0) {
+ handle_error ("scan(): ");
+ break;
+ }
+
+ if (nr_changes == 0) {
+ printf ("done scanning \n");
+ goto out;
+ }
+
+ printf ("Got %ld changelog files\n", nr_changes);
+
+ while ( (changes =
+ gf_history_changelog_next_change (fbuf, PATH_MAX)) > 0) {
+ printf ("changelog file [%d]: %s\n", ++i, fbuf);
+
+ /* process changelog */
+ /* ... */
+ /* ... */
+ /* ... */
+ /* done processing */
+
+ ret = gf_history_changelog_done (fbuf);
+ if (ret)
+ handle_error ("gf_changelog_done");
+ }
+ /*
+ if (changes == -1)
+ handle_error ("gf_changelog_next_change");
+ if (nr_changes ==1){
+ printf("continue scanning\n");
+ }
+
+ if(nr_changes == 0){
+ printf("done scanning \n");
+ goto out;
+ }
+ */
+ }
+
+
+out:
+ return ret;
+}
diff --git a/xlators/features/changelog/lib/examples/python/changes.py b/xlators/features/changelog/lib/examples/python/changes.py
new file mode 100644
index 00000000000..221df642a36
--- /dev/null
+++ b/xlators/features/changelog/lib/examples/python/changes.py
@@ -0,0 +1,33 @@
+#!/usr/bin/python
+
+import os
+import sys
+import time
+import libgfchangelog
+
+cl = libgfchangelog.Changes()
+
+def get_changes(brick, scratch_dir, log_file, log_level, interval):
+ change_list = []
+ try:
+ cl.cl_init()
+ cl.cl_register(brick, scratch_dir, log_file, log_level)
+ while True:
+ cl.cl_scan()
+ change_list = cl.cl_getchanges()
+ if change_list:
+ print change_list
+ for change in change_list:
+ print('done with %s' % (change))
+ cl.cl_done(change)
+ time.sleep(interval)
+ except OSError:
+ ex = sys.exc_info()[1]
+ print ex
+
+if __name__ == '__main__':
+ if len(sys.argv) != 6:
+ print("usage: %s <brick> <scratch-dir> <log-file> <fetch-interval>"
+ % (sys.argv[0]))
+ sys.exit(1)
+ get_changes(sys.argv[1], sys.argv[2], sys.argv[3], 9, int(sys.argv[4]))
diff --git a/xlators/features/changelog/lib/examples/python/libgfchangelog.py b/xlators/features/changelog/lib/examples/python/libgfchangelog.py
new file mode 100644
index 00000000000..10e73c02b34
--- /dev/null
+++ b/xlators/features/changelog/lib/examples/python/libgfchangelog.py
@@ -0,0 +1,70 @@
+import os
+from ctypes import *
+from ctypes.util import find_library
+
+class Changes(object):
+ libgfc = CDLL(find_library("gfchangelog"), mode=RTLD_GLOBAL, use_errno=True)
+
+ @classmethod
+ def geterrno(cls):
+ return get_errno()
+
+ @classmethod
+ def raise_oserr(cls):
+ errn = cls.geterrno()
+ raise OSError(errn, os.strerror(errn))
+
+ @classmethod
+ def _get_api(cls, call):
+ return getattr(cls.libgfc, call)
+
+ @classmethod
+ def cl_init(cls):
+ ret = cls._get_api('gf_changelog_init')(None)
+ if ret == -1:
+ cls.raise_changelog_err()
+
+ @classmethod
+ def cl_register(cls, brick, path, log_file, log_level, retries = 0):
+ ret = cls._get_api('gf_changelog_register')(brick, path,
+ log_file, log_level, retries)
+ if ret == -1:
+ cls.raise_oserr()
+
+ @classmethod
+ def cl_scan(cls):
+ ret = cls._get_api('gf_changelog_scan')()
+ if ret == -1:
+ cls.raise_oserr()
+
+ @classmethod
+ def cl_startfresh(cls):
+ ret = cls._get_api('gf_changelog_start_fresh')()
+ if ret == -1:
+ cls.raise_oserr()
+
+ @classmethod
+ def cl_getchanges(cls):
+ """ remove hardcoding for path name length """
+ def clsort(f):
+ return f.split('.')[-1]
+ changes = []
+ buf = create_string_buffer('\0', 4096)
+ call = cls._get_api('gf_changelog_next_change')
+
+ while True:
+ ret = call(buf, 4096)
+ if ret in (0, -1):
+ break;
+ changes.append(buf.raw[:ret-1])
+ if ret == -1:
+ cls.raise_oserr()
+ # cleanup tracker
+ cls.cl_startfresh()
+ return sorted(changes, key=clsort)
+
+ @classmethod
+ def cl_done(cls, clfile):
+ ret = cls._get_api('gf_changelog_done')(clfile)
+ if ret == -1:
+ cls.raise_oserr()
diff --git a/xlators/features/changelog/lib/src/Makefile.am b/xlators/features/changelog/lib/src/Makefile.am
new file mode 100644
index 00000000000..8d3edb4d63f
--- /dev/null
+++ b/xlators/features/changelog/lib/src/Makefile.am
@@ -0,0 +1,31 @@
+libgfchangelog_la_CFLAGS = -Wall $(GF_CFLAGS) $(GF_DARWIN_LIBGLUSTERFS_CFLAGS) \
+ -DDATADIR=\"$(localstatedir)\"
+
+libgfchangelog_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 -fpic \
+ -I../../../src/ -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/xlators/features/changelog/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_srcdir)/rpc/rpc-lib/src \
+ -I$(top_srcdir)/rpc/rpc-transport/socket/src \
+ -DDATADIR=\"$(localstatedir)\"
+
+libgfchangelog_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(top_builddir)/rpc/xdr/src/libgfxdr.la \
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la
+
+libgfchangelog_la_LDFLAGS = $(GF_LDFLAGS) -version-info $(LIBGFCHANGELOG_LT_VERSION)
+
+libgfchangelogdir = $(includedir)/glusterfs/gfchangelog
+lib_LTLIBRARIES = libgfchangelog.la
+
+CONTRIB_BUILDDIR = $(top_builddir)/contrib
+
+libgfchangelog_la_SOURCES = gf-changelog.c gf-changelog-journal-handler.c gf-changelog-helpers.c \
+ gf-changelog-api.c gf-history-changelog.c gf-changelog-rpc.c gf-changelog-reborp.c \
+ $(top_srcdir)/xlators/features/changelog/src/changelog-rpc-common.c
+
+noinst_HEADERS = gf-changelog-helpers.h gf-changelog-rpc.h gf-changelog-journal.h changelog-lib-messages.h
+
+CLEANFILES =
+
+$(top_builddir)/libglusterfs/src/libglusterfs.la:
+ $(MAKE) -C $(top_builddir)/libglusterfs/src/ all
diff --git a/xlators/features/changelog/lib/src/changelog-lib-messages.h b/xlators/features/changelog/lib/src/changelog-lib-messages.h
new file mode 100644
index 00000000000..976c67f61a9
--- /dev/null
+++ b/xlators/features/changelog/lib/src/changelog-lib-messages.h
@@ -0,0 +1,287 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
+
+#ifndef _CHANGELOG_LIB_MESSAGES_H_
+#define _CHANGELOG_LIB_MESSAGES_H_
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glfs-message-id.h"
+
+/*! \file changelog-lib-messages.h
+ * \brief CHANGELOG_LIB log-message IDs and their descriptions.
+ */
+
+/* NOTE: Rules for message additions
+ * 1) Each instance of a message is _better_ left with a unique message ID, even
+ * if the message format is the same. Reasoning is that, if the message
+ * format needs to change in one instance, the other instances are not
+ * impacted or the new change does not change the ID of the instance being
+ * modified.
+ * 2) Addition of a message,
+ * - Should increment the GLFS_NUM_MESSAGES
+ * - Append to the list of messages defined, towards the end
+ * - Retain macro naming as glfs_msg_X (for readability across developers)
+ * NOTE: Rules for message format modifications
+ * 3) Check acorss the code if the message ID macro in question is reused
+ * anywhere. If reused then then the modifications should ensure correctness
+ * everywhere, or needs a new message ID as (1) above was not adhered to. If
+ * not used anywhere, proceed with the required modification.
+ * NOTE: Rules for message deletion
+ * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used
+ * anywhere, then can be deleted, but will leave a hole by design, as
+ * addition rules specify modification to the end of the list and not filling
+ * holes.
+ */
+
+#define GLFS_COMP_BASE_CHANGELOG_LIB GLFS_MSGID_COMP_CHANGELOG_LIB
+#define GLFS_NUM_MESSAGES 28
+#define GLFS_MSGID_END (GLFS_COMP_BASE_CHANGELOG_LIB + GLFS_NUM_MESSAGES + 1)
+
+#define glfs_msg_start_x GLFS_COMP_BASE_CHANGELOG_LIB,\
+ "Invalid: Start of messages"
+
+/*!
+ * @messageid
+ * @diagnosis open/opendir failed on a brick.
+ * @recommended action Error number in the log should give the reason why it
+ * failed. Also observe brick logs for more information.
+ */
+#define CHANGELOG_LIB_MSG_OPEN_FAILED (GLFS_COMP_BASE_CHANGELOG_LIB + 1)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_LIB_MSG_FAILED_TO_RMDIR (GLFS_COMP_BASE_CHANGELOG_LIB + 2)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_LIB_MSG_SCRATCH_DIR_ENTRIES_CREATION_ERROR \
+(GLFS_COMP_BASE_CHANGELOG_LIB + 3)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_LIB_MSG_THREAD_CREATION_FAILED \
+ (GLFS_COMP_BASE_CHANGELOG_LIB + 4)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_LIB_MSG_OPENDIR_ERROR (GLFS_COMP_BASE_CHANGELOG_LIB + 5)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_LIB_MSG_RENAME_FAILED (GLFS_COMP_BASE_CHANGELOG_LIB + 6)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_LIB_MSG_READ_ERROR (GLFS_COMP_BASE_CHANGELOG_LIB + 7)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_LIB_MSG_HTIME_ERROR (GLFS_COMP_BASE_CHANGELOG_LIB + 8)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_LIB_MSG_GET_TIME_ERROR (GLFS_COMP_BASE_CHANGELOG_LIB + 9)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_LIB_MSG_WRITE_FAILED (GLFS_COMP_BASE_CHANGELOG_LIB + 10)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_LIB_MSG_PTHREAD_ERROR (GLFS_COMP_BASE_CHANGELOG_LIB + 11)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_LIB_MSG_MMAP_FAILED (GLFS_COMP_BASE_CHANGELOG_LIB + 12)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_LIB_MSG_MUNMAP_FAILED (GLFS_COMP_BASE_CHANGELOG_LIB + 13)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_LIB_MSG_ASCII_ERROR (GLFS_COMP_BASE_CHANGELOG_LIB + 14)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_LIB_MSG_STAT_FAILED (GLFS_COMP_BASE_CHANGELOG_LIB + 15)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_LIB_MSG_GET_XATTR_FAILED \
+ (GLFS_COMP_BASE_CHANGELOG_LIB + 16)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_LIB_MSG_PUBLISH_ERROR (GLFS_COMP_BASE_CHANGELOG_LIB + 17)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_LIB_MSG_PARSE_ERROR (GLFS_COMP_BASE_CHANGELOG_LIB + 18)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_LIB_MSG_TOTAL_LOG_INFO (GLFS_COMP_BASE_CHANGELOG_LIB + 19)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_LIB_MSG_CLEANUP_ERROR (GLFS_COMP_BASE_CHANGELOG_LIB + 20)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_LIB_MSG_UNLINK_FAILED (GLFS_COMP_BASE_CHANGELOG_LIB + 21)
+
+/*!
+ @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_LIB_MSG_NOTIFY_REGISTER_FAILED\
+ (GLFS_COMP_BASE_CHANGELOG_LIB + 22)
+
+/*!
+ @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_LIB_MSG_INVOKE_RPC_FAILED\
+ (GLFS_COMP_BASE_CHANGELOG_LIB + 23)
+
+/*!
+ @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_LIB_MSG_DRAINING_EVENT_INFO\
+ (GLFS_COMP_BASE_CHANGELOG_LIB + 24)
+
+/*!
+ @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_LIB_MSG_CLEANING_BRICK_ENTRY_INFO \
+ (GLFS_COMP_BASE_CHANGELOG_LIB + 25)
+
+/*!
+ @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_LIB_MSG_FREEING_ENTRY_INFO \
+ (GLFS_COMP_BASE_CHANGELOG_LIB + 26)
+
+/*!
+ @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_LIB_MSG_XDR_DECODING_FAILED \
+ (GLFS_COMP_BASE_CHANGELOG_LIB + 27)
+
+/*!
+ @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_LIB_MSG_NOTIFY_REGISTER_INFO \
+ (GLFS_COMP_BASE_CHANGELOG_LIB + 28)
+
+/*!
+ @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_LIB_MSG_THREAD_CLEANUP_WARNING \
+ (GLFS_COMP_BASE_CHANGELOG_LIB + 29)
+
+/*!
+ @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_LIB_MSG_COPY_FROM_BUFFER_FAILED \
+ (GLFS_COMP_BASE_CHANGELOG_LIB + 30)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_LIB_MSG_PTHREAD_JOIN_FAILED \
+ (GLFS_COMP_BASE_CHANGELOG_LIB + 20)
+
+
+#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
+#endif /* !_CHANGELOG_MESSAGES_H_ */
diff --git a/xlators/features/changelog/lib/src/gf-changelog-api.c b/xlators/features/changelog/lib/src/gf-changelog-api.c
new file mode 100644
index 00000000000..d2a28bc6d52
--- /dev/null
+++ b/xlators/features/changelog/lib/src/gf-changelog-api.c
@@ -0,0 +1,224 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "compat-uuid.h"
+#include "globals.h"
+#include "glusterfs.h"
+#include "syscall.h"
+
+#include "gf-changelog-helpers.h"
+#include "gf-changelog-journal.h"
+#include "changelog-mem-types.h"
+#include "changelog-lib-messages.h"
+
+int
+gf_changelog_done (char *file)
+{
+ int ret = -1;
+ char *buffer = NULL;
+ xlator_t *this = NULL;
+ gf_changelog_journal_t *jnl = NULL;
+ char to_path[PATH_MAX] = {0,};
+
+ errno = EINVAL;
+
+ this = THIS;
+ if (!this)
+ goto out;
+
+ jnl = (gf_changelog_journal_t *) GF_CHANGELOG_GET_API_PTR (this);
+ if (!jnl)
+ goto out;
+
+ if (!file || !strlen (file))
+ goto out;
+
+ /* make sure 'file' is inside ->jnl_working_dir */
+ buffer = realpath (file, NULL);
+ if (!buffer)
+ goto out;
+
+ if (strncmp (jnl->jnl_working_dir,
+ buffer, strlen (jnl->jnl_working_dir)))
+ goto out;
+
+ (void) snprintf (to_path, PATH_MAX, "%s%s",
+ jnl->jnl_processed_dir, basename (buffer));
+ gf_msg_debug (this->name, 0,
+ "moving %s to processed directory", file);
+ ret = sys_rename (buffer, to_path);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_LIB_MSG_RENAME_FAILED,
+ "cannot move %s to %s",
+ file, to_path);
+ goto out;
+ }
+
+ ret = 0;
+
+ out:
+ if (buffer)
+ free (buffer); /* allocated by realpath() */
+ return ret;
+}
+
+/**
+ * @API
+ * for a set of changelogs, start from the beginning
+ */
+int
+gf_changelog_start_fresh ()
+{
+ xlator_t *this = NULL;
+ gf_changelog_journal_t *jnl = NULL;
+
+ this = THIS;
+ if (!this)
+ goto out;
+
+ errno = EINVAL;
+
+ jnl = (gf_changelog_journal_t *) GF_CHANGELOG_GET_API_PTR (this);
+ if (!jnl)
+ goto out;
+
+ if (gf_ftruncate (jnl->jnl_fd, 0))
+ goto out;
+
+ return 0;
+
+ out:
+ return -1;
+}
+
+/**
+ * @API
+ * return the next changelog file entry. zero means all chanelogs
+ * consumed.
+ */
+ssize_t
+gf_changelog_next_change (char *bufptr, size_t maxlen)
+{
+ ssize_t size = -1;
+ int tracker_fd = 0;
+ xlator_t *this = NULL;
+ gf_changelog_journal_t *jnl = NULL;
+ char buffer[PATH_MAX] = {0,};
+
+ errno = EINVAL;
+
+ this = THIS;
+ if (!this)
+ goto out;
+
+ jnl = (gf_changelog_journal_t *) GF_CHANGELOG_GET_API_PTR (this);
+ if (!jnl)
+ goto out;
+
+ tracker_fd = jnl->jnl_fd;
+
+ size = gf_readline (tracker_fd, buffer, maxlen);
+ if (size < 0) {
+ size = -1;
+ goto out;
+ }
+
+ if (size == 0)
+ goto out;
+
+ memcpy (bufptr, buffer, size - 1);
+ bufptr[size - 1] = '\0';
+
+out:
+ return size;
+}
+
+/**
+ * @API
+ * gf_changelog_scan() - scan and generate a list of change entries
+ *
+ * calling this api multiple times (without calling gf_changlog_done())
+ * would result new changelogs(s) being refreshed in the tracker file.
+ * This call also acts as a cancellation point for the consumer.
+ */
+ssize_t
+gf_changelog_scan ()
+{
+ int ret = 0;
+ int tracker_fd = 0;
+ size_t len = 0;
+ size_t off = 0;
+ xlator_t *this = NULL;
+ size_t nr_entries = 0;
+ gf_changelog_journal_t *jnl = NULL;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {{0,},};
+ char buffer[PATH_MAX] = {0,};
+
+ this = THIS;
+ if (!this)
+ goto out;
+
+ jnl = (gf_changelog_journal_t *) GF_CHANGELOG_GET_API_PTR (this);
+ if (!jnl)
+ goto out;
+ if (JNL_IS_API_DISCONNECTED (jnl)) {
+ errno = ENOTCONN;
+ goto out;
+ }
+
+ errno = EINVAL;
+
+ tracker_fd = jnl->jnl_fd;
+ if (gf_ftruncate (tracker_fd, 0))
+ goto out;
+
+ len = offsetof(struct dirent, d_name)
+ + pathconf(jnl->jnl_processing_dir, _PC_NAME_MAX) + 1;
+
+ rewinddir (jnl->jnl_dir);
+
+ for (;;) {
+ errno = 0;
+ entry = sys_readdir (jnl->jnl_dir, scratch);
+ if (!entry || errno != 0)
+ break;
+
+ if (!strcmp (basename (entry->d_name), ".")
+ || !strcmp (basename (entry->d_name), ".."))
+ continue;
+
+ nr_entries++;
+
+ GF_CHANGELOG_FILL_BUFFER (jnl->jnl_processing_dir,
+ buffer, off,
+ strlen (jnl->jnl_processing_dir));
+ GF_CHANGELOG_FILL_BUFFER (entry->d_name, buffer,
+ off, strlen (entry->d_name));
+ GF_CHANGELOG_FILL_BUFFER ("\n", buffer, off, 1);
+
+ if (gf_changelog_write (tracker_fd, buffer, off) != off) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_LIB_MSG_WRITE_FAILED,
+ "error writing changelog filename"
+ " to tracker file");
+ break;
+ }
+ off = 0;
+ }
+
+ if (!entry) {
+ if (gf_lseek (tracker_fd, 0, SEEK_SET) != -1)
+ return nr_entries;
+ }
+ out:
+ return -1;
+}
diff --git a/xlators/features/changelog/lib/src/gf-changelog-helpers.c b/xlators/features/changelog/lib/src/gf-changelog-helpers.c
new file mode 100644
index 00000000000..8b35f4e9416
--- /dev/null
+++ b/xlators/features/changelog/lib/src/gf-changelog-helpers.c
@@ -0,0 +1,219 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "changelog-mem-types.h"
+#include "gf-changelog-helpers.h"
+#include "changelog-lib-messages.h"
+#include "syscall.h"
+
+ssize_t gf_changelog_read_path (int fd, char *buffer, size_t bufsize)
+{
+ return sys_read (fd, buffer, bufsize);
+}
+
+size_t
+gf_changelog_write (int fd, char *buffer, size_t len)
+{
+ ssize_t size = 0;
+ size_t written = 0;
+
+ while (written < len) {
+ size = sys_write (fd, buffer + written, len - written);
+ if (size <= 0)
+ break;
+
+ written += size;
+ }
+
+ return written;
+}
+
+void
+gf_rfc3986_encode (unsigned char *s, char *enc, char *estr)
+{
+ for (; *s; s++) {
+ if (estr[*s])
+ sprintf(enc, "%c", estr[*s]);
+ else
+ sprintf(enc, "%%%02X", *s);
+ while (*++enc);
+ }
+}
+
+/**
+ * thread safe version of readline with buffering
+ * (taken from Unix Network Programming Volume I, W.R. Stevens)
+ *
+ * This is favoured over fgets() as we'd need to ftruncate()
+ * (see gf_changelog_scan() API) to record new changelog files.
+ * stream open functions does have a truncate like api (although
+ * that can be done via @fflush(fp), @ftruncate(fd) and @fseek(fp),
+ * but this involves mixing POSIX file descriptors and stream FILE *).
+ *
+ * NOTE: This implmentation still does work with more than one fd's
+ * used to perform gf_readline(). For this very reason it's not
+ * made a part of libglusterfs.
+ */
+
+static pthread_key_t rl_key;
+static pthread_once_t rl_once = PTHREAD_ONCE_INIT;
+
+static void
+readline_destructor (void *ptr)
+{
+ GF_FREE (ptr);
+}
+
+static void
+readline_once (void)
+{
+ pthread_key_create (&rl_key, readline_destructor);
+}
+
+static ssize_t
+my_read (read_line_t *tsd, int fd, char *ptr)
+{
+ if (tsd->rl_cnt <= 0) {
+ tsd->rl_cnt = sys_read (fd, tsd->rl_buf, MAXLINE);
+
+ if (tsd->rl_cnt < 0)
+ return -1;
+ else if (tsd->rl_cnt == 0)
+ return 0;
+ tsd->rl_bufptr = tsd->rl_buf;
+ }
+
+ tsd->rl_cnt--;
+ *ptr = *tsd->rl_bufptr++;
+ return 1;
+}
+
+static int
+gf_readline_init_once (read_line_t **tsd)
+{
+ if (pthread_once (&rl_once, readline_once) != 0)
+ return -1;
+
+ *tsd = pthread_getspecific (rl_key);
+ if (*tsd)
+ goto out;
+
+ *tsd = GF_CALLOC (1, sizeof (**tsd),
+ gf_changelog_mt_libgfchangelog_rl_t);
+ if (!*tsd)
+ return -1;
+
+ if (pthread_setspecific (rl_key, *tsd) != 0)
+ return -1;
+
+ out:
+ return 0;
+}
+
+ssize_t
+gf_readline (int fd, void *vptr, size_t maxlen)
+{
+ size_t n = 0;
+ size_t rc = 0;
+ char c = ' ';
+ char *ptr = NULL;
+ read_line_t *tsd = NULL;
+
+ if (gf_readline_init_once (&tsd))
+ return -1;
+
+ ptr = vptr;
+ for (n = 1; n < maxlen; n++) {
+ if ( (rc = my_read (tsd, fd, &c)) == 1 ) {
+ *ptr++ = c;
+ if (c == '\n')
+ break;
+ } else if (rc == 0) {
+ *ptr = '\0';
+ return (n - 1);
+ } else
+ return -1;
+ }
+
+ *ptr = '\0';
+ return n;
+
+}
+
+off_t
+gf_lseek (int fd, off_t offset, int whence)
+{
+ off_t off = 0;
+ read_line_t *tsd = NULL;
+
+ if (gf_readline_init_once (&tsd))
+ return -1;
+
+ off = sys_lseek (fd, offset, whence);
+ if (off == -1)
+ return -1;
+
+ tsd->rl_cnt = 0;
+ tsd->rl_bufptr = tsd->rl_buf;
+
+ return off;
+}
+
+int
+gf_ftruncate (int fd, off_t length)
+{
+ read_line_t *tsd = NULL;
+
+ if (gf_readline_init_once (&tsd))
+ return -1;
+
+ if (sys_ftruncate (fd, 0))
+ return -1;
+
+ tsd->rl_cnt = 0;
+ tsd->rl_bufptr = tsd->rl_buf;
+
+ return 0;
+}
+
+int
+gf_thread_cleanup (xlator_t *this, pthread_t thread)
+{
+ int ret = 0;
+ void *res = NULL;
+
+ ret = pthread_cancel (thread);
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ CHANGELOG_LIB_MSG_THREAD_CLEANUP_WARNING,
+ "Failed to send cancellation to thread");
+ goto error_return;
+ }
+
+ ret = pthread_join (thread, &res);
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ CHANGELOG_LIB_MSG_THREAD_CLEANUP_WARNING,
+ "failed to join thread");
+ goto error_return;
+ }
+
+ if (res != PTHREAD_CANCELED) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ CHANGELOG_LIB_MSG_THREAD_CLEANUP_WARNING,
+ "Thread could not be cleaned up");
+ goto error_return;
+ }
+
+ return 0;
+
+ error_return:
+ return -1;
+}
diff --git a/xlators/features/changelog/lib/src/gf-changelog-helpers.h b/xlators/features/changelog/lib/src/gf-changelog-helpers.h
new file mode 100644
index 00000000000..bd21e4df035
--- /dev/null
+++ b/xlators/features/changelog/lib/src/gf-changelog-helpers.h
@@ -0,0 +1,259 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GF_CHANGELOG_HELPERS_H
+#define _GF_CHANGELOG_HELPERS_H
+
+#include <unistd.h>
+#include <dirent.h>
+#include <limits.h>
+#include "locking.h"
+
+#include <xlator.h>
+
+#include "changelog.h"
+
+#include "changelog-rpc-common.h"
+#include "gf-changelog-journal.h"
+
+#define GF_CHANGELOG_TRACKER "tracker"
+
+#define GF_CHANGELOG_CURRENT_DIR ".current"
+#define GF_CHANGELOG_PROCESSED_DIR ".processed"
+#define GF_CHANGELOG_PROCESSING_DIR ".processing"
+#define GF_CHANGELOG_HISTORY_DIR ".history"
+#define TIMESTAMP_LENGTH 10
+
+#ifndef MAXLINE
+#define MAXLINE 4096
+#endif
+
+#define GF_CHANGELOG_FILL_BUFFER(ptr, ascii, off, len) do { \
+ memcpy (ascii + off, ptr, len); \
+ off += len; \
+ } while (0)
+
+typedef struct read_line {
+ int rl_cnt;
+ char *rl_bufptr;
+ char rl_buf[MAXLINE];
+} read_line_t;
+
+struct gf_changelog;
+struct gf_event;
+
+/**
+ * Event list for ordered event notification
+ *
+ * ->next_seq holds the next _expected_ sequence number.
+ */
+struct gf_event_list {
+ pthread_mutex_t lock; /* protects this structure */
+ pthread_cond_t cond;
+
+ pthread_t invoker;
+
+ unsigned long next_seq; /* next sequence number expected:
+ zero during bootstrap */
+
+ struct gf_changelog *entry; /* backpointer to it's brick
+ encapsulator (entry) */
+ struct list_head events; /* list of events */
+};
+
+/**
+ * include a refcount if it's of use by additional layers
+ */
+struct gf_event {
+ int count;
+
+ unsigned long seq;
+
+ struct list_head list;
+
+ struct iovec iov[0];
+};
+#define GF_EVENT_CALLOC_SIZE(cnt, len) \
+ (sizeof (struct gf_event) + (cnt * sizeof (struct iovec)) + len)
+
+/**
+ * assign the base address of the IO vector to the correct memory
+o * area and set it's addressable length.
+ */
+#define GF_EVENT_ASSIGN_IOVEC(vec, event, len, pos) \
+ do { \
+ vec->iov_base = ((char *)event) + \
+ sizeof (struct gf_event) + \
+ (event->count * sizeof (struct iovec)) + pos; \
+ vec->iov_len = len; \
+ pos += len; \
+ } while (0)
+
+typedef enum gf_changelog_conn_state {
+ GF_CHANGELOG_CONN_STATE_PENDING = 0,
+ GF_CHANGELOG_CONN_STATE_ACCEPTED,
+ GF_CHANGELOG_CONN_STATE_DISCONNECTED,
+} gf_changelog_conn_state_t;
+
+/**
+ * An instance of this structure is allocated for each brick for which
+ * notifications are streamed.
+ */
+typedef struct gf_changelog {
+ gf_lock_t statelock;
+ gf_changelog_conn_state_t connstate;
+
+ xlator_t *this;
+
+ struct list_head list; /* list of instances */
+
+ char brick[PATH_MAX]; /* brick path for this end-point */
+
+ changelog_rpc_t grpc; /* rpc{-clnt,svc} for this brick */
+#define RPC_PROBER(ent) ent->grpc.rpc
+#define RPC_REBORP(ent) ent->grpc.svc
+#define RPC_SOCK(ent) ent->grpc.sock
+
+ unsigned int notify; /* notification flag(s) */
+
+ FINI *fini; /* destructor callback */
+ CALLBACK *callback; /* event callback dispatcher */
+ CONNECT *connected; /* connect callback */
+ DISCONNECT *disconnected; /* disconnection callback */
+
+ void *ptr; /* owner specific private data */
+ xlator_t *invokerxl; /* consumers _this_, if valid,
+ assigned to THIS before cbk is
+ invoked */
+
+ gf_boolean_t ordered;
+
+ void (*queueevent) (struct gf_event_list *, struct gf_event *);
+ void (*pickevent) (struct gf_event_list *, struct gf_event **);
+
+ struct gf_event_list event;
+} gf_changelog_t;
+
+static inline int
+gf_changelog_filter_check (gf_changelog_t *entry, changelog_event_t *event)
+{
+ if (event->ev_type & entry->notify)
+ return 1;
+ return 0;
+}
+
+#define GF_NEED_ORDERED_EVENTS(ent) (ent->ordered == _gf_true)
+
+/** private structure */
+typedef struct gf_private {
+ pthread_mutex_t lock; /* protects ->connections, cleanups */
+ pthread_cond_t cond;
+
+ void *api; /* pointer for API access */
+
+ pthread_t poller; /* event poller thread */
+ pthread_t connectionjanitor; /* connection cleaner */
+
+ struct list_head connections; /* list of connections */
+ struct list_head cleanups; /* list of connection to be
+ cleaned up */
+} gf_private_t;
+
+#define GF_CHANGELOG_GET_API_PTR(this) (((gf_private_t *) this->private)->api)
+
+/**
+ * upcall: invoke callback with _correct_ THIS
+ */
+#define GF_CHANGELOG_INVOKE_CBK(this, cbk, brick, args ...) \
+ do { \
+ xlator_t *old_this = NULL; \
+ xlator_t *invokerxl = NULL; \
+ \
+ invokerxl = entry->invokerxl; \
+ old_this = this; \
+ \
+ if (invokerxl) { \
+ THIS = invokerxl; \
+ } \
+ \
+ cbk (invokerxl, brick, args); \
+ THIS = old_this; \
+ \
+ } while (0)
+
+#define SAVE_THIS(xl) \
+ do { \
+ old_this = xl; \
+ THIS = master; \
+ } while (0)
+
+#define RESTORE_THIS() \
+ do { \
+ if (old_this) \
+ THIS = old_this; \
+ } while (0)
+
+/** APIs and the rest */
+
+void *
+gf_changelog_process (void *data);
+
+ssize_t
+gf_changelog_read_path (int fd, char *buffer, size_t bufsize);
+
+void
+gf_rfc3986_encode (unsigned char *s, char *enc, char *estr);
+
+size_t
+gf_changelog_write (int fd, char *buffer, size_t len);
+
+ssize_t
+gf_readline (int fd, void *vptr, size_t maxlen);
+
+int
+gf_ftruncate (int fd, off_t length);
+
+off_t
+gf_lseek (int fd, off_t offset, int whence);
+
+int
+gf_changelog_consume (xlator_t *this,
+ gf_changelog_journal_t *jnl,
+ char *from_path, gf_boolean_t no_publish);
+int
+gf_changelog_publish (xlator_t *this,
+ gf_changelog_journal_t *jnl, char *from_path);
+int
+gf_thread_cleanup (xlator_t *this, pthread_t thread);
+void *
+gf_changelog_callback_invoker (void *arg);
+
+int
+gf_cleanup_event (xlator_t *, struct gf_event_list *);
+
+/* (un)ordered event queueing */
+void
+queue_ordered_event (struct gf_event_list *, struct gf_event *);
+
+void
+queue_unordered_event (struct gf_event_list *, struct gf_event *);
+
+/* (un)ordered event picking */
+void
+pick_event_ordered (struct gf_event_list *, struct gf_event **);
+
+void
+pick_event_unordered (struct gf_event_list *, struct gf_event **);
+
+/* connection janitor thread */
+void *
+gf_changelog_connection_janitor (void *);
+
+#endif
diff --git a/xlators/features/changelog/lib/src/gf-changelog-journal-handler.c b/xlators/features/changelog/lib/src/gf-changelog-journal-handler.c
new file mode 100644
index 00000000000..6ea7cac88da
--- /dev/null
+++ b/xlators/features/changelog/lib/src/gf-changelog-journal-handler.c
@@ -0,0 +1,1065 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "compat-uuid.h"
+#include "globals.h"
+#include "glusterfs.h"
+#include "syscall.h"
+#include "compat-errno.h"
+
+#include "gf-changelog-helpers.h"
+
+/* from the changelog translator */
+#include "changelog-misc.h"
+#include "changelog-mem-types.h"
+
+#include "gf-changelog-journal.h"
+#include "changelog-lib-messages.h"
+
+extern int byebye;
+
+enum changelog_versions {
+ VERSION_1_1 = 0,
+ VERSION_1_2 = 1
+};
+
+/**
+ * number of gfid records after fop number
+ */
+int nr_gfids[2][GF_FOP_MAXVALUE] = {
+ {
+ [GF_FOP_MKNOD] = 1,
+ [GF_FOP_MKDIR] = 1,
+ [GF_FOP_UNLINK] = 1,
+ [GF_FOP_RMDIR] = 1,
+ [GF_FOP_SYMLINK] = 1,
+ [GF_FOP_RENAME] = 2,
+ [GF_FOP_LINK] = 1,
+ [GF_FOP_CREATE] = 1,
+ },
+ {
+ [GF_FOP_MKNOD] = 1,
+ [GF_FOP_MKDIR] = 1,
+ [GF_FOP_UNLINK] = 2,
+ [GF_FOP_RMDIR] = 2,
+ [GF_FOP_SYMLINK] = 1,
+ [GF_FOP_RENAME] = 2,
+ [GF_FOP_LINK] = 1,
+ [GF_FOP_CREATE] = 1,
+ }
+};
+
+int nr_extra_recs[2][GF_FOP_MAXVALUE] = {
+ {
+ [GF_FOP_MKNOD] = 3,
+ [GF_FOP_MKDIR] = 3,
+ [GF_FOP_UNLINK] = 0,
+ [GF_FOP_RMDIR] = 0,
+ [GF_FOP_SYMLINK] = 0,
+ [GF_FOP_RENAME] = 0,
+ [GF_FOP_LINK] = 0,
+ [GF_FOP_CREATE] = 3,
+ },
+ {
+ [GF_FOP_MKNOD] = 3,
+ [GF_FOP_MKDIR] = 3,
+ [GF_FOP_UNLINK] = 0,
+ [GF_FOP_RMDIR] = 0,
+ [GF_FOP_SYMLINK] = 0,
+ [GF_FOP_RENAME] = 0,
+ [GF_FOP_LINK] = 0,
+ [GF_FOP_CREATE] = 3,
+ }
+};
+
+static char *
+binary_to_ascii (uuid_t uuid)
+{
+ return uuid_utoa (uuid);
+}
+
+static char *
+conv_noop (char *ptr) { return ptr; }
+
+#define VERIFY_SEPARATOR(ptr, plen, perr) \
+ { \
+ if (*(ptr + plen) != '\0') { \
+ perr = 1; \
+ break; \
+ } \
+ }
+
+#define MOVER_MOVE(mover, nleft, bytes) \
+ { \
+ mover += bytes; \
+ nleft -= bytes; \
+ } \
+
+#define PARSE_GFID(mov, ptr, le, fn, perr) \
+ { \
+ VERIFY_SEPARATOR (mov, le, perr); \
+ ptr = fn (mov); \
+ if (!ptr) { \
+ perr = 1; \
+ break; \
+ } \
+ }
+
+#define FILL_AND_MOVE(pt, buf, of, mo, nl, le) \
+ { \
+ GF_CHANGELOG_FILL_BUFFER (pt, buf, of, strlen (pt)); \
+ MOVER_MOVE (mo, nl, le); \
+ }
+
+
+#define PARSE_GFID_MOVE(ptr, uuid, mover, nleft, perr) \
+ { \
+ memcpy (uuid, mover, sizeof (uuid_t)); \
+ ptr = binary_to_ascii (uuid); \
+ if (!ptr) { \
+ perr = 1; \
+ break; \
+ } \
+ MOVER_MOVE (mover, nleft, sizeof (uuid_t)); \
+ } \
+
+#define LINE_BUFSIZE (3*PATH_MAX) /* enough buffer for extra chars too */
+
+/**
+ * using mmap() makes parsing easy. fgets() cannot be used here as
+ * the binary gfid could contain a line-feed (0x0A), in that case fgets()
+ * would read an incomplete line and parsing would fail. using POSIX fds
+ * would result is additional code to maintain state in case of partial
+ * reads of data (where multiple entries do not fit extirely in the buffer).
+ *
+ * mmap() gives the flexibility of pointing to an offset in the file
+ * without us worrying about reading it in memory (VM does that for us for
+ * free).
+ */
+
+static int
+gf_changelog_parse_binary (xlator_t *this,
+ gf_changelog_journal_t *jnl,
+ int from_fd, int to_fd,
+ size_t start_offset, struct stat *stbuf,
+ int version_idx)
+
+{
+ int ret = -1;
+ off_t off = 0;
+ off_t nleft = 0;
+ uuid_t uuid = {0,};
+ char *ptr = NULL;
+ char *bname_start = NULL;
+ char *bname_end = NULL;
+ char *mover = NULL;
+ void *start = NULL;
+ char current_mover = ' ';
+ size_t blen = 0;
+ int parse_err = 0;
+ char ascii[LINE_BUFSIZE] = {0,};
+
+ nleft = stbuf->st_size;
+
+ start = mmap (NULL, nleft, PROT_READ, MAP_PRIVATE, from_fd, 0);
+ if (start == MAP_FAILED) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_LIB_MSG_MMAP_FAILED,
+ "mmap() error");
+ goto out;
+ }
+
+ mover = start;
+
+ MOVER_MOVE (mover, nleft, start_offset);
+
+ while (nleft > 0) {
+
+ off = blen = 0;
+ ptr = bname_start = bname_end = NULL;
+
+ current_mover = *mover;
+
+ switch (current_mover) {
+ case 'D':
+ case 'M':
+ MOVER_MOVE (mover, nleft, 1);
+ PARSE_GFID_MOVE (ptr, uuid, mover, nleft, parse_err);
+
+ break;
+
+ case 'E':
+ MOVER_MOVE (mover, nleft, 1);
+ PARSE_GFID_MOVE (ptr, uuid, mover, nleft, parse_err);
+
+ bname_start = mover;
+ bname_end = strchr (mover, '\n');
+ if (bname_end == NULL) {
+ parse_err = 1;
+ break;
+ }
+
+ blen = bname_end - bname_start;
+ MOVER_MOVE (mover, nleft, blen);
+
+ break;
+
+ default:
+ parse_err = 1;
+ }
+
+ if (parse_err)
+ break;
+
+ GF_CHANGELOG_FILL_BUFFER (&current_mover, ascii, off, 1);
+ GF_CHANGELOG_FILL_BUFFER (" ", ascii, off, 1);
+ GF_CHANGELOG_FILL_BUFFER (ptr, ascii, off, strlen (ptr));
+ if (blen)
+ GF_CHANGELOG_FILL_BUFFER (bname_start,
+ ascii, off, blen);
+ GF_CHANGELOG_FILL_BUFFER ("\n", ascii, off, 1);
+
+ if (gf_changelog_write (to_fd, ascii, off) != off) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_LIB_MSG_ASCII_ERROR,
+ "processing binary changelog failed due to "
+ " error in writing ascii change");
+ break;
+ }
+
+ MOVER_MOVE (mover, nleft, 1);
+ }
+
+ if ((nleft == 0) && (!parse_err))
+ ret = 0;
+
+ if (munmap (start, stbuf->st_size))
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_LIB_MSG_MUNMAP_FAILED,
+ "munmap() error");
+ out:
+ return ret;
+}
+
+/**
+ * ascii decoder:
+ * - separate out one entry from another
+ * - use fop name rather than fop number
+ */
+static int
+gf_changelog_parse_ascii (xlator_t *this,
+ gf_changelog_journal_t *jnl,
+ int from_fd, int to_fd,
+ size_t start_offset, struct stat *stbuf,
+ int version_idx)
+{
+ int ng = 0;
+ int ret = -1;
+ int fop = 0;
+ int len = 0;
+ off_t off = 0;
+ off_t nleft = 0;
+ char *ptr = NULL;
+ char *eptr = NULL;
+ void *start = NULL;
+ char *mover = NULL;
+ int parse_err = 0;
+ char current_mover = ' ';
+ char ascii[LINE_BUFSIZE] = {0,};
+ const char *fopname = NULL;
+
+ nleft = stbuf->st_size;
+
+ start = mmap (NULL, nleft, PROT_READ, MAP_PRIVATE, from_fd, 0);
+ if (start == MAP_FAILED) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_LIB_MSG_MMAP_FAILED,
+ "mmap() error");
+ goto out;
+ }
+
+ mover = start;
+
+ MOVER_MOVE (mover, nleft, start_offset);
+
+ while (nleft > 0) {
+ off = 0;
+ current_mover = *mover;
+
+ GF_CHANGELOG_FILL_BUFFER (&current_mover, ascii, off, 1);
+ GF_CHANGELOG_FILL_BUFFER (" ", ascii, off, 1);
+
+ switch (current_mover) {
+ case 'D':
+ MOVER_MOVE (mover, nleft, 1);
+
+ /* target gfid */
+ PARSE_GFID (mover, ptr, UUID_CANONICAL_FORM_LEN,
+ conv_noop, parse_err);
+ FILL_AND_MOVE(ptr, ascii, off,
+ mover, nleft, UUID_CANONICAL_FORM_LEN);
+ break;
+ case 'M':
+ MOVER_MOVE (mover, nleft, 1);
+
+ /* target gfid */
+ PARSE_GFID (mover, ptr, UUID_CANONICAL_FORM_LEN,
+ conv_noop, parse_err);
+ FILL_AND_MOVE (ptr, ascii, off,
+ mover, nleft, UUID_CANONICAL_FORM_LEN);
+ FILL_AND_MOVE (" ", ascii, off, mover, nleft, 1);
+
+ /* fop */
+ len = strlen (mover);
+ VERIFY_SEPARATOR (mover, len, parse_err);
+
+ fop = atoi (mover);
+ fopname = gf_fop_list[fop];
+ if (fopname == NULL) {
+ parse_err = 1;
+ break;
+ }
+
+ MOVER_MOVE (mover, nleft, len);
+
+ len = strlen (fopname);
+ GF_CHANGELOG_FILL_BUFFER (fopname, ascii, off, len);
+
+ break;
+
+ case 'E':
+ MOVER_MOVE (mover, nleft, 1);
+
+ /* target gfid */
+ PARSE_GFID (mover, ptr, UUID_CANONICAL_FORM_LEN,
+ conv_noop, parse_err);
+ FILL_AND_MOVE (ptr, ascii, off,
+ mover, nleft, UUID_CANONICAL_FORM_LEN);
+ FILL_AND_MOVE (" ", ascii, off,
+ mover, nleft, 1);
+
+ /* fop */
+ len = strlen (mover);
+ VERIFY_SEPARATOR (mover, len, parse_err);
+
+ fop = atoi (mover);
+ fopname = gf_fop_list[fop];
+ if (fopname == NULL) {
+ parse_err = 1;
+ break;
+ }
+
+ MOVER_MOVE (mover, nleft, len);
+
+ len = strlen (fopname);
+ GF_CHANGELOG_FILL_BUFFER (fopname, ascii, off, len);
+
+ ng = nr_extra_recs[version_idx][fop];
+ for (; ng > 0; ng--) {
+ MOVER_MOVE (mover, nleft, 1);
+ len = strlen (mover);
+ VERIFY_SEPARATOR (mover, len, parse_err);
+
+ GF_CHANGELOG_FILL_BUFFER (" ", ascii, off, 1);
+ FILL_AND_MOVE (mover, ascii,
+ off, mover, nleft, len);
+ }
+
+ /* pargfid + bname */
+ ng = nr_gfids[version_idx][fop];
+ while (ng-- > 0) {
+ MOVER_MOVE (mover, nleft, 1);
+ len = strlen (mover);
+ if (!len) {
+ MOVER_MOVE (mover, nleft, 1);
+ continue;
+ }
+
+ GF_CHANGELOG_FILL_BUFFER (" ", ascii, off, 1);
+
+ PARSE_GFID (mover, ptr, len,
+ conv_noop, parse_err);
+ eptr = calloc (3, strlen (ptr));
+ if (!eptr) {
+ parse_err = 1;
+ break;
+ }
+
+ gf_rfc3986_encode ((unsigned char *) ptr,
+ eptr, jnl->rfc3986);
+ FILL_AND_MOVE (eptr, ascii, off,
+ mover, nleft, len);
+ free (eptr);
+ }
+
+ break;
+ default:
+ parse_err = 1;
+ }
+
+ if (parse_err)
+ break;
+
+ GF_CHANGELOG_FILL_BUFFER ("\n", ascii, off, 1);
+
+ if (gf_changelog_write (to_fd, ascii, off) != off) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_LIB_MSG_ASCII_ERROR,
+ "processing ascii changelog failed due to "
+ " error in writing change");
+ break;
+ }
+
+ MOVER_MOVE (mover, nleft, 1);
+
+ }
+
+ if ((nleft == 0) && (!parse_err))
+ ret = 0;
+
+ if (munmap (start, stbuf->st_size))
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_LIB_MSG_MUNMAP_FAILED,
+ "munmap() error");
+
+ out:
+ return ret;
+}
+
+#define COPY_BUFSIZE 8192
+static int
+gf_changelog_copy (xlator_t *this, int from_fd, int to_fd)
+{
+ ssize_t size = 0;
+ char buffer[COPY_BUFSIZE+1] = {0,};
+
+ while (1) {
+ size = sys_read (from_fd, buffer, COPY_BUFSIZE);
+ if (size <= 0)
+ break;
+
+ if (gf_changelog_write (to_fd,
+ buffer, size) != size) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_LIB_MSG_COPY_FROM_BUFFER_FAILED,
+ "error processing ascii changlog");
+ size = -1;
+ break;
+ }
+ }
+
+ return (size < 0 ? -1 : 0);
+}
+
+static int
+gf_changelog_decode (xlator_t *this, gf_changelog_journal_t *jnl,
+ int from_fd, int to_fd, struct stat *stbuf, int *zerob)
+{
+ int ret = -1;
+ int encoding = -1;
+ int major_version = -1;
+ int minor_version = -1;
+ int version_idx = -1;
+ size_t elen = 0;
+ char buffer[1024] = {0,};
+
+ CHANGELOG_GET_HEADER_INFO (from_fd, buffer, 1024, encoding,
+ major_version, minor_version, elen);
+ if (encoding == -1) /* unknown encoding */
+ goto out;
+
+ if (major_version == -1) /* unknown major version */
+ goto out;
+
+ if (minor_version == -1) /* unknown minor version */
+ goto out;
+
+ if (!CHANGELOG_VALID_ENCODING (encoding))
+ goto out;
+
+ if (elen == stbuf->st_size) {
+ *zerob = 1;
+ goto out;
+ }
+
+ if (major_version == 1 && minor_version == 1) {
+ version_idx = VERSION_1_1;
+ } else if (major_version == 1 && minor_version == 2) {
+ version_idx = VERSION_1_2;
+ }
+
+ if (version_idx == -1) /* unknown version number */
+ goto out;
+
+ /**
+ * start processing after the header
+ */
+ sys_lseek (from_fd, elen, SEEK_SET);
+
+ switch (encoding) {
+ case CHANGELOG_ENCODE_BINARY:
+ /**
+ * this ideally should have been a part of changelog-encoders.c
+ * (ie. part of the changelog translator).
+ */
+ ret = gf_changelog_parse_binary (this, jnl, from_fd,
+ to_fd, elen, stbuf,
+ version_idx);
+ break;
+
+ case CHANGELOG_ENCODE_ASCII:
+ ret = gf_changelog_parse_ascii (this, jnl, from_fd,
+ to_fd, elen, stbuf,
+ version_idx);
+ break;
+ default:
+ ret = gf_changelog_copy (this, from_fd, to_fd);
+ }
+
+ out:
+ return ret;
+}
+
+int
+gf_changelog_publish (xlator_t *this,
+ gf_changelog_journal_t *jnl, char *from_path)
+{
+ int ret = 0;
+ char dest[PATH_MAX] = {0,};
+ char to_path[PATH_MAX] = {0,};
+ struct stat stbuf = {0,};
+
+ (void) snprintf (to_path, PATH_MAX, "%s%s",
+ jnl->jnl_current_dir, basename (from_path));
+
+ /* handle zerob file that wont exist in current */
+ ret = sys_stat (to_path, &stbuf);
+ if (ret) {
+ if (errno == ENOENT)
+ ret = 0;
+ goto out;
+ }
+
+ (void) snprintf (dest, PATH_MAX, "%s%s",
+ jnl->jnl_processing_dir, basename (from_path));
+
+ ret = sys_rename (to_path, dest);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_LIB_MSG_RENAME_FAILED,
+ "error moving %s to processing dir",
+ to_path);
+ }
+
+out:
+ return ret;
+}
+
+int
+gf_changelog_consume (xlator_t *this,
+ gf_changelog_journal_t *jnl,
+ char *from_path, gf_boolean_t no_publish)
+{
+ int ret = -1;
+ int fd1 = 0;
+ int fd2 = 0;
+ int zerob = 0;
+ struct stat stbuf = {0,};
+ char dest[PATH_MAX] = {0,};
+ char to_path[PATH_MAX] = {0,};
+
+ ret = sys_stat (from_path, &stbuf);
+ if (ret || !S_ISREG(stbuf.st_mode)) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_LIB_MSG_STAT_FAILED,
+ "stat failed on changelog file: %s", from_path);
+ goto out;
+ }
+
+ fd1 = open (from_path, O_RDONLY);
+ if (fd1 < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_LIB_MSG_OPEN_FAILED,
+ "cannot open changelog file: %s",
+ from_path);
+ goto out;
+ }
+
+ (void) snprintf (to_path, PATH_MAX, "%s%s",
+ jnl->jnl_current_dir, basename (from_path));
+ (void) snprintf (dest, PATH_MAX, "%s%s",
+ jnl->jnl_processing_dir, basename (from_path));
+
+ fd2 = open (to_path, O_CREAT | O_TRUNC | O_RDWR,
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
+ if (fd2 < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_LIB_MSG_OPEN_FAILED,
+ "cannot create ascii changelog file %s",
+ to_path);
+ goto close_fd;
+ } else {
+ ret = gf_changelog_decode (this, jnl, fd1,
+ fd2, &stbuf, &zerob);
+
+ sys_close (fd2);
+
+ if (!ret) {
+ /* move it to processing on a successful
+ decode */
+ if (no_publish == _gf_true)
+ goto close_fd;
+ ret = sys_rename (to_path, dest);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_LIB_MSG_RENAME_FAILED,
+ "error moving %s to processing dir",
+ to_path);
+ }
+
+ /* remove it from .current if it's an empty file */
+ if (zerob) {
+ /* zerob changelogs must be unlinked */
+ ret = sys_unlink (to_path);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_LIB_MSG_UNLINK_FAILED,
+ "could not unlink %s",
+ to_path);
+ }
+ }
+
+ close_fd:
+ sys_close (fd1);
+
+ out:
+ return ret;
+}
+
+void *
+gf_changelog_process (void *data)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+ gf_changelog_journal_t *jnl = NULL;
+ gf_changelog_entry_t *entry = NULL;
+ gf_changelog_processor_t *jnl_proc = NULL;
+
+ jnl = data;
+ jnl_proc = jnl->jnl_proc;
+ THIS = jnl->this;
+ this = jnl->this;
+
+ while (1) {
+ pthread_mutex_lock (&jnl_proc->lock);
+ {
+ while (list_empty (&jnl_proc->entries)) {
+ jnl_proc->waiting = _gf_true;
+ pthread_cond_wait
+ (&jnl_proc->cond, &jnl_proc->lock);
+ }
+
+ entry = list_first_entry (&jnl_proc->entries,
+ gf_changelog_entry_t, list);
+ list_del (&entry->list);
+ jnl_proc->waiting = _gf_false;
+ }
+ pthread_mutex_unlock (&jnl_proc->lock);
+
+ if (entry) {
+ ret = gf_changelog_consume (this, jnl,
+ entry->path, _gf_false);
+ GF_FREE (entry);
+ }
+ }
+
+ return NULL;
+}
+
+void
+gf_changelog_queue_journal (gf_changelog_processor_t *jnl_proc,
+ changelog_event_t *event)
+{
+ size_t len = 0;
+ gf_changelog_entry_t *entry = NULL;
+
+ entry = GF_CALLOC (1, sizeof (gf_changelog_entry_t),
+ gf_changelog_mt_libgfchangelog_entry_t);
+ if (!entry)
+ return;
+ INIT_LIST_HEAD (&entry->list);
+
+ len = strlen (event->u.journal.path);
+ (void)memcpy (entry->path, event->u.journal.path, len+1);
+
+ pthread_mutex_lock (&jnl_proc->lock);
+ {
+ list_add_tail (&entry->list, &jnl_proc->entries);
+ if (jnl_proc->waiting)
+ pthread_cond_signal (&jnl_proc->cond);
+ }
+ pthread_mutex_unlock (&jnl_proc->lock);
+
+ return;
+}
+
+void
+gf_changelog_handle_journal (void *xl, char *brick,
+ void *cbkdata, changelog_event_t *event)
+{
+ int ret = 0;
+ gf_changelog_journal_t *jnl = NULL;
+ gf_changelog_processor_t *jnl_proc = NULL;
+
+ jnl = cbkdata;
+ jnl_proc = jnl->jnl_proc;
+
+ gf_changelog_queue_journal (jnl_proc, event);
+}
+
+void
+gf_changelog_journal_disconnect (void *xl, char *brick, void *data)
+{
+ gf_changelog_journal_t *jnl = NULL;
+
+ jnl = data;
+
+ pthread_spin_lock (&jnl->lock);
+ {
+ JNL_SET_API_STATE (jnl, JNL_API_DISCONNECTED);
+ };
+ pthread_spin_unlock (&jnl->lock);
+}
+
+void
+gf_changelog_journal_connect (void *xl, char *brick, void *data)
+{
+ gf_changelog_journal_t *jnl = NULL;
+
+ jnl = data;
+
+ pthread_spin_lock (&jnl->lock);
+ {
+ JNL_SET_API_STATE (jnl, JNL_API_CONNECTED);
+ };
+ pthread_spin_unlock (&jnl->lock);
+
+ return;
+}
+
+void
+gf_changelog_cleanup_processor (gf_changelog_journal_t *jnl)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+ gf_changelog_processor_t *jnl_proc = NULL;
+
+ this = THIS;
+ if (!this || !jnl || !jnl->jnl_proc)
+ goto error_return;
+
+ jnl_proc = jnl->jnl_proc;
+
+ ret = gf_thread_cleanup (this, jnl_proc->processor);
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_LIB_MSG_CLEANUP_ERROR,
+ "failed to cleanup processor thread");
+ goto error_return;
+ }
+
+ (void)pthread_mutex_destroy (&jnl_proc->lock);
+ (void)pthread_cond_destroy (&jnl_proc->cond);
+
+ GF_FREE (jnl_proc);
+
+ error_return:
+ return;
+}
+
+int
+gf_changelog_init_processor (gf_changelog_journal_t *jnl)
+{
+ int ret = -1;
+ gf_changelog_processor_t *jnl_proc = NULL;
+
+ jnl_proc = GF_CALLOC (1, sizeof (gf_changelog_processor_t),
+ gf_changelog_mt_libgfchangelog_t);
+ if (!jnl_proc)
+ goto error_return;
+
+ ret = pthread_mutex_init (&jnl_proc->lock, NULL);
+ if (ret != 0)
+ goto free_jnl_proc;
+ ret = pthread_cond_init (&jnl_proc->cond, NULL);
+ if (ret != 0)
+ goto cleanup_mutex;
+
+ INIT_LIST_HEAD (&jnl_proc->entries);
+ jnl_proc->waiting = _gf_false;
+ jnl->jnl_proc = jnl_proc;
+
+ ret = pthread_create (&jnl_proc->processor,
+ NULL, gf_changelog_process, jnl);
+ if (ret != 0) {
+ jnl->jnl_proc = NULL;
+ goto cleanup_cond;
+ }
+
+ return 0;
+
+ cleanup_cond:
+ (void) pthread_cond_destroy (&jnl_proc->cond);
+ cleanup_mutex:
+ (void) pthread_mutex_destroy (&jnl_proc->lock);
+ free_jnl_proc:
+ GF_FREE (jnl_proc);
+ error_return:
+ return -1;
+}
+
+static void
+gf_changelog_cleanup_fds (gf_changelog_journal_t *jnl)
+{
+ /* tracker fd */
+ if (jnl->jnl_fd != -1)
+ sys_close (jnl->jnl_fd);
+ /* processing dir */
+ if (jnl->jnl_dir)
+ sys_closedir (jnl->jnl_dir);
+
+ if (jnl->jnl_working_dir)
+ free (jnl->jnl_working_dir); /* allocated by realpath */
+}
+
+static int
+gf_changelog_open_dirs (xlator_t *this, gf_changelog_journal_t *jnl)
+{
+ int ret = -1;
+ DIR *dir = NULL;
+ int tracker_fd = 0;
+ char tracker_path[PATH_MAX] = {0,};
+
+ /* .current */
+ (void) snprintf (jnl->jnl_current_dir, PATH_MAX,
+ "%s/"GF_CHANGELOG_CURRENT_DIR"/",
+ jnl->jnl_working_dir);
+ ret = recursive_rmdir (jnl->jnl_current_dir);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_LIB_MSG_FAILED_TO_RMDIR,
+ "Failed to rmdir: %s",
+ jnl->jnl_current_dir);
+ goto out;
+ }
+ ret = mkdir_p (jnl->jnl_current_dir, 0600, _gf_false);
+ if (ret)
+ goto out;
+
+ /* .processed */
+ (void) snprintf (jnl->jnl_processed_dir, PATH_MAX,
+ "%s/"GF_CHANGELOG_PROCESSED_DIR"/",
+ jnl->jnl_working_dir);
+ ret = mkdir_p (jnl->jnl_processed_dir, 0600, _gf_false);
+ if (ret)
+ goto out;
+
+ /* .processing */
+ (void) snprintf (jnl->jnl_processing_dir, PATH_MAX,
+ "%s/"GF_CHANGELOG_PROCESSING_DIR"/",
+ jnl->jnl_working_dir);
+ ret = recursive_rmdir (jnl->jnl_processing_dir);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_LIB_MSG_FAILED_TO_RMDIR,
+ "Failed to rmdir: %s",
+ jnl->jnl_processing_dir);
+ goto out;
+ }
+
+ ret = mkdir_p (jnl->jnl_processing_dir, 0600, _gf_false);
+ if (ret)
+ goto out;
+
+ dir = sys_opendir (jnl->jnl_processing_dir);
+ if (!dir) {
+ gf_msg ("", GF_LOG_ERROR, errno,
+ CHANGELOG_LIB_MSG_OPENDIR_ERROR,
+ "opendir() error");
+ goto out;
+ }
+
+ jnl->jnl_dir = dir;
+
+ (void) snprintf (tracker_path, PATH_MAX,
+ "%s/"GF_CHANGELOG_TRACKER, jnl->jnl_working_dir);
+
+ tracker_fd = open (tracker_path, O_CREAT | O_APPEND | O_RDWR,
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
+ if (tracker_fd < 0) {
+ sys_closedir (jnl->jnl_dir);
+ ret = -1;
+ goto out;
+ }
+
+ jnl->jnl_fd = tracker_fd;
+ ret = 0;
+ out:
+ return ret;
+}
+
+int
+gf_changelog_init_history (xlator_t *this,
+ gf_changelog_journal_t *jnl,
+ char *brick_path)
+{
+ int i = 0;
+ int ret = 0;
+ char hist_scratch_dir[PATH_MAX] = {0,};
+
+ jnl->hist_jnl = GF_CALLOC (1, sizeof (*jnl),
+ gf_changelog_mt_libgfchangelog_t);
+ if (!jnl->hist_jnl)
+ goto error_return;
+
+ jnl->hist_jnl->jnl_dir = NULL;
+ jnl->hist_jnl->jnl_fd = -1;
+
+ (void) snprintf (hist_scratch_dir, PATH_MAX,
+ "%s/"GF_CHANGELOG_HISTORY_DIR"/",
+ jnl->jnl_working_dir);
+
+ ret = mkdir_p (hist_scratch_dir, 0600, _gf_false);
+ if (ret)
+ goto dealloc_hist;
+
+ jnl->hist_jnl->jnl_working_dir = realpath (hist_scratch_dir, NULL);
+ if (!jnl->hist_jnl->jnl_working_dir)
+ goto dealloc_hist;
+
+ ret = gf_changelog_open_dirs (this, jnl->hist_jnl);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_LIB_MSG_OPENDIR_ERROR,
+ "could not create entries in history scratch dir");
+ goto dealloc_hist;
+ }
+
+ (void) strncpy (jnl->hist_jnl->jnl_brickpath, brick_path, PATH_MAX-1);
+ jnl->hist_jnl->jnl_brickpath[PATH_MAX-1] = 0;
+
+ for (i = 0; i < 256; i++) {
+ jnl->hist_jnl->rfc3986[i] =
+ (isalnum(i) || i == '~' ||
+ i == '-' || i == '.' || i == '_') ? i : 0;
+ }
+
+ return 0;
+
+ dealloc_hist:
+ GF_FREE (jnl->hist_jnl);
+ jnl->hist_jnl = NULL;
+ error_return:
+ return -1;
+}
+
+void
+gf_changelog_journal_fini (void *xl, char *brick, void *data)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+ gf_changelog_journal_t *jnl = NULL;
+
+ this = xl;
+ jnl = data;
+
+ gf_changelog_cleanup_processor (jnl);
+
+ gf_changelog_cleanup_fds (jnl);
+ if (jnl->hist_jnl)
+ gf_changelog_cleanup_fds (jnl->hist_jnl);
+
+ GF_FREE (jnl);
+}
+
+void *
+gf_changelog_journal_init (void *xl, struct gf_brick_spec *brick)
+{
+ int i = 0;
+ int ret = 0;
+ xlator_t *this = NULL;
+ struct stat buf = {0,};
+ char *scratch_dir = NULL;
+ gf_changelog_journal_t *jnl = NULL;
+
+ this = xl;
+ scratch_dir = (char *) brick->ptr;
+
+ jnl = GF_CALLOC (1, sizeof (gf_changelog_journal_t),
+ gf_changelog_mt_libgfchangelog_t);
+ if (!jnl)
+ goto error_return;
+
+ if (sys_stat (scratch_dir, &buf) && errno == ENOENT) {
+ ret = mkdir_p (scratch_dir, 0600, _gf_true);
+ if (ret)
+ goto dealloc_private;
+ }
+
+ jnl->jnl_working_dir = realpath (scratch_dir, NULL);
+ if (!jnl->jnl_working_dir)
+ goto dealloc_private;
+
+ ret = gf_changelog_open_dirs (this, jnl);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_LIB_MSG_OPENDIR_ERROR,
+ "could not create entries in scratch dir");
+ goto dealloc_private;
+ }
+
+ (void) strncpy (jnl->jnl_brickpath, brick->brick_path, PATH_MAX-1);
+ jnl->jnl_brickpath[PATH_MAX-1] = 0;
+
+ /* RFC 3986 {de,en}coding */
+ for (i = 0; i < 256; i++) {
+ jnl->rfc3986[i] =
+ (isalnum(i) || i == '~' ||
+ i == '-' || i == '.' || i == '_') ? i : 0;
+ }
+
+ ret = gf_changelog_init_history (this, jnl, brick->brick_path);
+ if (ret)
+ goto cleanup_fds;
+
+ /* initialize journal processor */
+ jnl->this = this;
+ ret = gf_changelog_init_processor (jnl);
+ if (ret)
+ goto cleanup_fds;
+
+ JNL_SET_API_STATE (jnl, JNL_API_CONN_INPROGESS);
+ ret = pthread_spin_init (&jnl->lock, 0);
+ if (ret != 0)
+ goto cleanup_processor;
+ return jnl;
+
+ cleanup_processor:
+ gf_changelog_cleanup_processor (jnl);
+ cleanup_fds:
+ gf_changelog_cleanup_fds (jnl);
+ if (jnl->hist_jnl)
+ gf_changelog_cleanup_fds (jnl->hist_jnl);
+ dealloc_private:
+ GF_FREE (jnl);
+ error_return:
+ return NULL;
+}
diff --git a/xlators/features/changelog/lib/src/gf-changelog-journal.h b/xlators/features/changelog/lib/src/gf-changelog-journal.h
new file mode 100644
index 00000000000..e91807c80b6
--- /dev/null
+++ b/xlators/features/changelog/lib/src/gf-changelog-journal.h
@@ -0,0 +1,116 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __GF_CHANGELOG_JOURNAL_H
+#define __GF_CHANGELOG_JOURNAL_H
+
+#include <unistd.h>
+#include <pthread.h>
+
+#include "changelog.h"
+
+enum api_conn {
+ JNL_API_CONNECTED,
+ JNL_API_CONN_INPROGESS,
+ JNL_API_DISCONNECTED,
+};
+
+typedef struct gf_changelog_entry {
+ char path[PATH_MAX];
+
+ struct list_head list;
+} gf_changelog_entry_t;
+
+typedef struct gf_changelog_processor {
+ pthread_mutex_t lock; /* protects ->entries */
+ pthread_cond_t cond; /* waiter during empty list */
+ gf_boolean_t waiting;
+
+ pthread_t processor; /* thread-id of journal processing thread */
+
+ struct list_head entries;
+} gf_changelog_processor_t;
+
+typedef struct gf_changelog_journal {
+ DIR *jnl_dir; /* 'processing' directory stream */
+
+ int jnl_fd; /* fd to the tracker file */
+
+ char jnl_brickpath[PATH_MAX]; /* brick path for this end-point */
+
+ gf_changelog_processor_t *jnl_proc;
+
+ char *jnl_working_dir; /* scratch directory */
+
+ char jnl_current_dir[PATH_MAX];
+ char jnl_processed_dir[PATH_MAX];
+ char jnl_processing_dir[PATH_MAX];
+
+ char rfc3986[256]; /* RFC 3986 string encoding */
+
+ struct gf_changelog_journal *hist_jnl;
+ int hist_done; /* holds 0 done scanning,
+ 1 keep scanning and -1 error */
+
+ pthread_spinlock_t lock;
+ int connected;
+ xlator_t *this;
+} gf_changelog_journal_t;
+
+#define JNL_SET_API_STATE(jnl, state) (jnl->connected = state)
+#define JNL_IS_API_DISCONNECTED(jnl) (jnl->connected == JNL_API_DISCONNECTED)
+
+/* History API */
+typedef struct gf_changelog_history_data {
+ int len;
+
+ int htime_fd;
+
+ /* parallelism count */
+ int n_parallel;
+
+ /* history from, to indexes */
+ unsigned long from;
+ unsigned long to;
+ xlator_t *this;
+} gf_changelog_history_data_t;
+
+typedef struct gf_changelog_consume_data {
+ /** set of inputs */
+
+ /* fd to read from */
+ int fd;
+
+ /* from @offset */
+ off_t offset;
+
+ xlator_t *this;
+
+ gf_changelog_journal_t *jnl;
+
+ /** set of outputs */
+
+ /* return value */
+ int retval;
+
+ /* journal processed */
+ char changelog[PATH_MAX];
+} gf_changelog_consume_data_t;
+
+/* event handler */
+CALLBACK gf_changelog_handle_journal;
+
+/* init, connect & disconnect handler */
+INIT gf_changelog_journal_init;
+FINI gf_changelog_journal_fini;
+CONNECT gf_changelog_journal_connect;
+DISCONNECT gf_changelog_journal_disconnect;
+
+#endif
diff --git a/xlators/features/changelog/lib/src/gf-changelog-reborp.c b/xlators/features/changelog/lib/src/gf-changelog-reborp.c
new file mode 100644
index 00000000000..8fd01d0c77a
--- /dev/null
+++ b/xlators/features/changelog/lib/src/gf-changelog-reborp.c
@@ -0,0 +1,425 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "changelog-misc.h"
+#include "changelog-mem-types.h"
+
+#include "gf-changelog-helpers.h"
+#include "changelog-rpc-common.h"
+#include "changelog-lib-messages.h"
+
+#include "syscall.h"
+
+/**
+ * Reverse socket: actual data transfer handler. Connection
+ * initiator is PROBER, data transfer is REBORP.
+ */
+
+struct rpcsvc_program *gf_changelog_reborp_programs[];
+
+void *
+gf_changelog_connection_janitor (void *arg)
+{
+ int32_t ret = 0;
+ xlator_t *this = NULL;
+ gf_private_t *priv = NULL;
+ gf_changelog_t *entry = NULL;
+ struct gf_event *event = NULL;
+ struct gf_event_list *ev = NULL;
+ unsigned long drained = 0;
+
+ this = arg;
+ THIS = this;
+
+ priv = this->private;
+
+ while (1) {
+ pthread_mutex_lock (&priv->lock);
+ {
+ while (list_empty (&priv->cleanups))
+ pthread_cond_wait (&priv->cond, &priv->lock);
+
+ entry = list_first_entry (&priv->cleanups,
+ gf_changelog_t, list);
+ list_del_init (&entry->list);
+ }
+ pthread_mutex_unlock (&priv->lock);
+
+ drained = 0;
+ ev = &entry->event;
+
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ CHANGELOG_LIB_MSG_CLEANING_BRICK_ENTRY_INFO,
+ "Cleaning brick entry for brick %s", entry->brick);
+
+ /* 0x0: disbale rpc-clnt */
+ rpc_clnt_disable (RPC_PROBER (entry));
+
+ /* 0x1: cleanup callback invoker thread */
+ ret = gf_cleanup_event (this, ev);
+ if (ret)
+ continue;
+
+ /* 0x2: drain pending events */
+ while (!list_empty (&ev->events)) {
+ event = list_first_entry (&ev->events,
+ struct gf_event, list);
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ CHANGELOG_LIB_MSG_DRAINING_EVENT_INFO,
+ "Draining event [Seq: %lu, Payload: %d]",
+ event->seq, event->count);
+
+ GF_FREE (event);
+ drained++;
+ }
+
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ CHANGELOG_LIB_MSG_DRAINING_EVENT_INFO,
+ "Drained %lu events", drained);
+
+ /* 0x3: freeup brick entry */
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ CHANGELOG_LIB_MSG_FREEING_ENTRY_INFO,
+ "freeing entry %p", entry);
+ LOCK_DESTROY (&entry->statelock);
+ GF_FREE (entry);
+ }
+
+ return NULL;
+}
+
+int
+gf_changelog_reborp_rpcsvc_notify (rpcsvc_t *rpc, void *mydata,
+ rpcsvc_event_t event, void *data)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+ gf_private_t *priv = NULL;
+ gf_changelog_t *entry = NULL;
+
+ if (!(event == RPCSVC_EVENT_ACCEPT ||
+ event == RPCSVC_EVENT_DISCONNECT))
+ return 0;
+
+ entry = mydata;
+ this = entry->this;
+ priv = this->private;
+
+ switch (event) {
+ case RPCSVC_EVENT_ACCEPT:
+ ret = sys_unlink (RPC_SOCK(entry));
+ if (ret != 0)
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ CHANGELOG_LIB_MSG_UNLINK_FAILED,
+ "failed to unlink "
+ "reverse socket %s", RPC_SOCK (entry));
+ if (entry->connected)
+ GF_CHANGELOG_INVOKE_CBK (this, entry->connected,
+ entry->brick, entry->ptr);
+ break;
+ case RPCSVC_EVENT_DISCONNECT:
+ if (entry->disconnected)
+ GF_CHANGELOG_INVOKE_CBK (this, entry->disconnected,
+ entry->brick, entry->ptr);
+ /* passthrough */
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+rpcsvc_t *
+gf_changelog_reborp_init_rpc_listner (xlator_t *this,
+ char *path, char *sock, void *cbkdata)
+{
+ CHANGELOG_MAKE_TMP_SOCKET_PATH (path, sock, UNIX_PATH_MAX);
+ return changelog_rpc_server_init (this, sock, cbkdata,
+ gf_changelog_reborp_rpcsvc_notify,
+ gf_changelog_reborp_programs);
+}
+
+/**
+ * This is dirty and painful as of now untill there is event filtering in the
+ * server. The entire event buffer is scanned and interested events are picked,
+ * whereas we _should_ be notified with the events we were interested in
+ * (selected at the time of probe). As of now this is complete BS and needs
+ * fixture ASAP. I just made it work, it needs to be better.
+ *
+ * @FIXME: cleanup this bugger once server filters events.
+ */
+void
+gf_changelog_invoke_callback (gf_changelog_t *entry,
+ struct iovec **vec, int payloadcnt)
+{
+ int i = 0;
+ int evsize = 0;
+ xlator_t *this = NULL;
+ changelog_event_t *event = NULL;
+
+ this = entry->this;
+
+ for (; i < payloadcnt; i++) {
+ event = (changelog_event_t *)vec[i]->iov_base;
+ evsize = vec[i]->iov_len / CHANGELOG_EV_SIZE;
+
+ for (; evsize > 0; evsize--, event++) {
+ if (gf_changelog_filter_check (entry, event)) {
+ GF_CHANGELOG_INVOKE_CBK (this,
+ entry->callback,
+ entry->brick,
+ entry->ptr, event);
+ }
+ }
+ }
+}
+
+/**
+ * Ordered event handler is self-adaptive.. if the event sequence number
+ * is what's expected (->next_seq) there is no ordering list that's
+ * maintained. On out-of-order event notifications, event buffers are
+ * dynamically allocated and ordered.
+ */
+
+int
+__is_expected_sequence (struct gf_event_list *ev, struct gf_event *event)
+{
+ return (ev->next_seq == event->seq);
+}
+
+int
+__can_process_event (struct gf_event_list *ev, struct gf_event **event)
+{
+ *event = list_first_entry (&ev->events, struct gf_event, list);
+
+ if (__is_expected_sequence (ev, *event)) {
+ list_del (&(*event)->list);
+ ev->next_seq++;
+ return 1;
+ }
+
+ return 0;
+}
+
+void
+pick_event_ordered (struct gf_event_list *ev, struct gf_event **event)
+{
+ pthread_mutex_lock (&ev->lock);
+ {
+ while (list_empty (&ev->events)
+ || !__can_process_event (ev, event))
+ pthread_cond_wait (&ev->cond, &ev->lock);
+ }
+ pthread_mutex_unlock (&ev->lock);
+}
+
+void
+pick_event_unordered (struct gf_event_list *ev, struct gf_event **event)
+{
+ pthread_mutex_lock (&ev->lock);
+ {
+ while (list_empty (&ev->events))
+ pthread_cond_wait (&ev->cond, &ev->lock);
+ *event = list_first_entry (&ev->events, struct gf_event, list);
+ list_del (&(*event)->list);
+ }
+ pthread_mutex_unlock (&ev->lock);
+}
+
+void *
+gf_changelog_callback_invoker (void *arg)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+ gf_changelog_t *entry = NULL;
+ struct iovec *vec = NULL;
+ struct gf_event *event = NULL;
+ struct gf_event_list *ev = NULL;
+
+ ev = arg;
+ entry = ev->entry;
+ THIS = this = entry->this;
+
+ while (1) {
+ entry->pickevent (ev, &event);
+
+ vec = (struct iovec *) &event->iov;
+ gf_changelog_invoke_callback (entry, &vec, event->count);
+
+ GF_FREE (event);
+ }
+
+ return NULL;
+}
+
+static int
+orderfn (struct list_head *pos1, struct list_head *pos2)
+{
+ struct gf_event *event1 = NULL;
+ struct gf_event *event2 = NULL;
+
+ event1 = list_entry (pos1, struct gf_event, list);
+ event2 = list_entry (pos2, struct gf_event, list);
+
+ if (event1->seq > event2->seq)
+ return 1;
+ return -1;
+}
+
+void
+queue_ordered_event (struct gf_event_list *ev, struct gf_event *event)
+{
+ /* add event to the ordered event list and wake up listner(s) */
+ pthread_mutex_lock (&ev->lock);
+ {
+ list_add_order (&event->list, &ev->events, orderfn);
+ if (!ev->next_seq)
+ ev->next_seq = event->seq;
+ if (ev->next_seq == event->seq)
+ pthread_cond_signal (&ev->cond);
+ }
+ pthread_mutex_unlock (&ev->lock);
+}
+
+void
+queue_unordered_event (struct gf_event_list *ev, struct gf_event *event)
+{
+ /* add event to the tail of the queue and wake up listener(s) */
+ pthread_mutex_lock (&ev->lock);
+ {
+ list_add_tail (&event->list, &ev->events);
+ pthread_cond_signal (&ev->cond);
+ }
+ pthread_mutex_unlock (&ev->lock);
+}
+
+int
+gf_changelog_event_handler (rpcsvc_request_t *req,
+ xlator_t *this, gf_changelog_t *entry)
+{
+ int i = 0;
+ size_t payloadlen = 0;
+ ssize_t len = 0;
+ int payloadcnt = 0;
+ changelog_event_req rpc_req = {0,};
+ changelog_event_rsp rpc_rsp = {0,};
+ struct iovec *vec = NULL;
+ struct gf_event *event = NULL;
+ struct gf_event_list *ev = NULL;
+
+ ev = &entry->event;
+
+ len = xdr_to_generic (req->msg[0],
+ &rpc_req, (xdrproc_t)xdr_changelog_event_req);
+ if (len < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_LIB_MSG_XDR_DECODING_FAILED,
+ "xdr decoding failed");
+ req->rpc_err = GARBAGE_ARGS;
+ goto handle_xdr_error;
+ }
+
+ if (len < req->msg[0].iov_len) {
+ payloadcnt = 1;
+ payloadlen = (req->msg[0].iov_len - len);
+ }
+ for (i = 1; i < req->count; i++) {
+ payloadcnt++;
+ payloadlen += req->msg[i].iov_len;
+ }
+
+ event = GF_CALLOC (1, GF_EVENT_CALLOC_SIZE (payloadcnt, payloadlen),
+ gf_changelog_mt_libgfchangelog_event_t);
+ if (!event)
+ goto handle_xdr_error;
+ INIT_LIST_HEAD (&event->list);
+
+ payloadlen = 0;
+ event->seq = rpc_req.seq;
+ event->count = payloadcnt;
+
+ /* deep copy IO vectors */
+ vec = &event->iov[0];
+ GF_EVENT_ASSIGN_IOVEC (vec, event,
+ (req->msg[0].iov_len - len), payloadlen);
+ (void) memcpy (vec->iov_base,
+ req->msg[0].iov_base + len, vec->iov_len);
+
+ for (i = 1; i < req->count; i++) {
+ vec = &event->iov[i];
+ GF_EVENT_ASSIGN_IOVEC (vec, event,
+ req->msg[i].iov_len, payloadlen);
+ (void) memcpy (event->iov[i].iov_base,
+ req->msg[i].iov_base, req->msg[i].iov_len);
+ }
+
+ gf_msg_debug (this->name, 0,
+ "seq: %lu [%s] (time: %lu.%lu), (vec: %d, len: %zd)",
+ rpc_req.seq, entry->brick, rpc_req.tv_sec,
+ rpc_req.tv_usec, payloadcnt, payloadlen);
+
+ /* dispatch event */
+ entry->queueevent (ev, event);
+
+ /* ack sequence number */
+ rpc_rsp.op_ret = 0;
+ rpc_rsp.seq = rpc_req.seq;
+
+ goto submit_rpc;
+
+ handle_xdr_error:
+ rpc_rsp.op_ret = -1;
+ rpc_rsp.seq = 0; /* invalid */
+ submit_rpc:
+ return changelog_rpc_sumbit_reply (req, &rpc_rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_changelog_event_rsp);
+}
+
+int
+gf_changelog_reborp_handle_event (rpcsvc_request_t *req)
+{
+ xlator_t *this = NULL;
+ rpcsvc_t *svc = NULL;
+ gf_changelog_t *entry = NULL;
+
+ svc = rpcsvc_request_service (req);
+ entry = svc->mydata;
+
+ this = THIS = entry->this;
+
+ return gf_changelog_event_handler (req, this, entry);
+}
+
+rpcsvc_actor_t gf_changelog_reborp_actors[CHANGELOG_REV_PROC_MAX] = {
+ [CHANGELOG_REV_PROC_EVENT] = {
+ "CHANGELOG EVENT HANDLER", CHANGELOG_REV_PROC_EVENT,
+ gf_changelog_reborp_handle_event, NULL, 0, DRC_NA
+ },
+};
+
+/**
+ * Do not use synctask as the RPC layer dereferences ->mydata as THIS.
+ * In gf_changelog_setup_rpc(), @cbkdata is of type @gf_changelog_t,
+ * and that's required to invoke the callback with the appropriate
+ * brick path and it's private data.
+ */
+struct rpcsvc_program gf_changelog_reborp_prog = {
+ .progname = "LIBGFCHANGELOG REBORP",
+ .prognum = CHANGELOG_REV_RPC_PROCNUM,
+ .progver = CHANGELOG_REV_RPC_PROCVER,
+ .numactors = CHANGELOG_REV_PROC_MAX,
+ .actors = gf_changelog_reborp_actors,
+ .synctask = _gf_false,
+};
+
+struct rpcsvc_program *gf_changelog_reborp_programs[] = {
+ &gf_changelog_reborp_prog,
+ NULL,
+};
diff --git a/xlators/features/changelog/lib/src/gf-changelog-rpc.c b/xlators/features/changelog/lib/src/gf-changelog-rpc.c
new file mode 100644
index 00000000000..270632bc71b
--- /dev/null
+++ b/xlators/features/changelog/lib/src/gf-changelog-rpc.c
@@ -0,0 +1,99 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "gf-changelog-rpc.h"
+#include "changelog-misc.h"
+#include "changelog-mem-types.h"
+
+struct rpc_clnt_program gf_changelog_clnt;
+
+/* TODO: piggyback reconnect to called (upcall) */
+int
+gf_changelog_rpc_notify (struct rpc_clnt *rpc,
+ void *mydata, rpc_clnt_event_t event, void *data)
+{
+ switch (event) {
+ case RPC_CLNT_CONNECT:
+ rpc_clnt_set_connected (&rpc->conn);
+ break;
+ case RPC_CLNT_DISCONNECT:
+ case RPC_CLNT_MSG:
+ case RPC_CLNT_DESTROY:
+ break;
+ }
+
+ return 0;
+}
+
+struct rpc_clnt *
+gf_changelog_rpc_init (xlator_t *this, gf_changelog_t *entry)
+{
+ char sockfile[UNIX_PATH_MAX] = {0,};
+
+ CHANGELOG_MAKE_SOCKET_PATH (entry->brick,
+ sockfile, UNIX_PATH_MAX);
+ return changelog_rpc_client_init (this, entry,
+ sockfile, gf_changelog_rpc_notify);
+}
+
+/**
+ * remote procedure calls declarations.
+ */
+
+int
+gf_probe_changelog_cbk (struct rpc_req *req,
+ struct iovec *iovec, int count, void *myframe)
+{
+ return 0;
+}
+
+int
+gf_probe_changelog_filter (call_frame_t *frame, xlator_t *this, void *data)
+{
+ int ret = 0;
+ char *sock = NULL;
+ gf_changelog_t *entry = NULL;
+ changelog_probe_req req = {0,};
+
+ entry = data;
+ sock = RPC_SOCK (entry);
+
+ (void) memcpy (&req.sock, sock, strlen (sock));
+ req.filter = entry->notify;
+
+ /* invoke RPC */
+ return changelog_rpc_sumbit_req (RPC_PROBER (entry), (void *) &req,
+ frame, &gf_changelog_clnt,
+ CHANGELOG_RPC_PROBE_FILTER, NULL, 0,
+ NULL, this, gf_probe_changelog_cbk,
+ (xdrproc_t) xdr_changelog_probe_req);
+}
+
+int
+gf_changelog_invoke_rpc (xlator_t *this, gf_changelog_t *entry, int procidx)
+{
+ return changelog_invoke_rpc (this, RPC_PROBER (entry),
+ &gf_changelog_clnt, procidx, entry);
+}
+
+struct rpc_clnt_procedure gf_changelog_procs[CHANGELOG_RPC_PROC_MAX] = {
+ [CHANGELOG_RPC_PROC_NULL] = {"NULL", NULL},
+ [CHANGELOG_RPC_PROBE_FILTER] = {
+ "PROBE FILTER", gf_probe_changelog_filter
+ },
+};
+
+struct rpc_clnt_program gf_changelog_clnt = {
+ .progname = "LIBGFCHANGELOG",
+ .prognum = CHANGELOG_RPC_PROGNUM,
+ .progver = CHANGELOG_RPC_PROGVER,
+ .numproc = CHANGELOG_RPC_PROC_MAX,
+ .proctable = gf_changelog_procs,
+};
diff --git a/xlators/features/changelog/lib/src/gf-changelog-rpc.h b/xlators/features/changelog/lib/src/gf-changelog-rpc.h
new file mode 100644
index 00000000000..1c982eef809
--- /dev/null
+++ b/xlators/features/changelog/lib/src/gf-changelog-rpc.h
@@ -0,0 +1,26 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __GF_CHANGELOG_RPC_H
+#define __GF_CHANGELOG_RPC_H
+
+#include "xlator.h"
+
+#include "gf-changelog-helpers.h"
+#include "changelog-rpc-common.h"
+
+struct rpc_clnt *gf_changelog_rpc_init (xlator_t *, gf_changelog_t *);
+
+int gf_changelog_invoke_rpc (xlator_t *, gf_changelog_t *, int);
+
+rpcsvc_t *
+gf_changelog_reborp_init_rpc_listner (xlator_t *, char *, char *, void *);
+
+#endif
diff --git a/xlators/features/changelog/lib/src/gf-changelog.c b/xlators/features/changelog/lib/src/gf-changelog.c
new file mode 100644
index 00000000000..75891635827
--- /dev/null
+++ b/xlators/features/changelog/lib/src/gf-changelog.c
@@ -0,0 +1,623 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <errno.h>
+#include <dirent.h>
+#include <stddef.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <string.h>
+
+#include "globals.h"
+#include "glusterfs.h"
+#include "logging.h"
+#include "defaults.h"
+#include "syncop.h"
+
+#include "gf-changelog-rpc.h"
+#include "gf-changelog-helpers.h"
+
+/* from the changelog translator */
+#include "changelog-misc.h"
+#include "changelog-mem-types.h"
+#include "changelog-lib-messages.h"
+
+/**
+ * Global singleton xlator pointer for the library, initialized
+ * during library load. This should probably be hidden inside
+ * an initialized object which is an handle for the consumer.
+ *
+ * TODO: do away with the global..
+ */
+xlator_t *master = NULL;
+
+static inline
+gf_private_t *gf_changelog_alloc_priv ()
+{
+ int ret = 0;
+ gf_private_t *priv = NULL;
+
+ priv = GF_CALLOC (1, sizeof (*priv), gf_changelog_mt_priv_t);
+ if (!priv)
+ goto error_return;
+ INIT_LIST_HEAD (&priv->connections);
+ INIT_LIST_HEAD (&priv->cleanups);
+
+ ret = pthread_mutex_init (&priv->lock, NULL);
+ if (ret != 0)
+ goto free_priv;
+ ret = pthread_cond_init (&priv->cond, NULL);
+ if (ret != 0)
+ goto cleanup_mutex;
+
+ priv->api = NULL;
+ return priv;
+
+ cleanup_mutex:
+ (void) pthread_mutex_destroy (&priv->lock);
+ free_priv:
+ GF_FREE (priv);
+ error_return:
+ return NULL;
+}
+
+#define GF_CHANGELOG_EVENT_POOL_SIZE 16384
+#define GF_CHANGELOG_EVENT_THREAD_COUNT 4
+
+static int
+gf_changelog_ctx_defaults_init (glusterfs_ctx_t *ctx)
+{
+ cmd_args_t *cmd_args = NULL;
+ struct rlimit lim = {0, };
+ call_pool_t *pool = NULL;
+ int ret = -1;
+
+ ret = xlator_mem_acct_init (THIS, gf_changelog_mt_end);
+ if (ret != 0)
+ return -1;
+
+ ctx->process_uuid = generate_glusterfs_ctx_id ();
+ if (!ctx->process_uuid)
+ return -1;
+
+ ctx->page_size = 128 * GF_UNIT_KB;
+
+ ctx->iobuf_pool = iobuf_pool_new ();
+ if (!ctx->iobuf_pool)
+ return -1;
+
+ ctx->event_pool = event_pool_new (GF_CHANGELOG_EVENT_POOL_SIZE,
+ GF_CHANGELOG_EVENT_THREAD_COUNT);
+ if (!ctx->event_pool)
+ return -1;
+
+ pool = GF_CALLOC (1, sizeof (call_pool_t),
+ gf_changelog_mt_libgfchangelog_call_pool_t);
+ if (!pool)
+ return -1;
+
+ /* frame_mem_pool size 112 * 64 */
+ pool->frame_mem_pool = mem_pool_new (call_frame_t, 32);
+ if (!pool->frame_mem_pool)
+ return -1;
+
+ /* stack_mem_pool size 256 * 128 */
+ pool->stack_mem_pool = mem_pool_new (call_stack_t, 16);
+
+ if (!pool->stack_mem_pool)
+ return -1;
+
+ ctx->stub_mem_pool = mem_pool_new (call_stub_t, 16);
+ if (!ctx->stub_mem_pool)
+ return -1;
+
+ ctx->dict_pool = mem_pool_new (dict_t, 32);
+ if (!ctx->dict_pool)
+ return -1;
+
+ ctx->dict_pair_pool = mem_pool_new (data_pair_t, 512);
+ if (!ctx->dict_pair_pool)
+ return -1;
+
+ ctx->dict_data_pool = mem_pool_new (data_t, 512);
+ if (!ctx->dict_data_pool)
+ return -1;
+
+ ctx->logbuf_pool = mem_pool_new (log_buf_t, 256);
+ if (!ctx->logbuf_pool)
+ return -1;
+
+ INIT_LIST_HEAD (&pool->all_frames);
+ LOCK_INIT (&pool->lock);
+ ctx->pool = pool;
+
+ LOCK_INIT (&ctx->lock);
+
+ cmd_args = &ctx->cmd_args;
+
+ INIT_LIST_HEAD (&cmd_args->xlator_options);
+
+ lim.rlim_cur = RLIM_INFINITY;
+ lim.rlim_max = RLIM_INFINITY;
+ setrlimit (RLIMIT_CORE, &lim);
+
+ return 0;
+}
+
+/* TODO: cleanup ctx defaults */
+void
+gf_changelog_cleanup_this (xlator_t *this)
+{
+ glusterfs_ctx_t *ctx = NULL;
+
+ if (!this)
+ return;
+
+ ctx = this->ctx;
+ syncenv_destroy (ctx->env);
+ free (ctx);
+
+ this->private = NULL;
+ this->ctx = NULL;
+}
+
+static int
+gf_changelog_init_context ()
+{
+ glusterfs_ctx_t *ctx = NULL;
+
+ ctx = glusterfs_ctx_new ();
+ if (!ctx)
+ goto error_return;
+
+ if (glusterfs_globals_init (ctx))
+ goto free_ctx;
+
+ THIS->ctx = ctx;
+ if (gf_changelog_ctx_defaults_init (ctx))
+ goto free_ctx;
+
+ ctx->env = syncenv_new (0, 0, 0);
+ if (!ctx->env)
+ goto free_ctx;
+ return 0;
+
+ free_ctx:
+ free (ctx);
+ THIS->ctx = NULL;
+ error_return:
+ return -1;
+}
+
+static int
+gf_changelog_init_master ()
+{
+ return gf_changelog_init_context ();
+}
+
+/* TODO: cleanup clnt/svc on failure */
+int
+gf_changelog_setup_rpc (xlator_t *this,
+ gf_changelog_t *entry, int proc)
+{
+ int ret = 0;
+ rpcsvc_t *svc = NULL;
+ struct rpc_clnt *rpc = NULL;
+
+ /**
+ * Initialize a connect back socket. A probe() RPC call to the server
+ * triggers a reverse connect.
+ */
+ svc = gf_changelog_reborp_init_rpc_listner (this, entry->brick,
+ RPC_SOCK (entry), entry);
+ if (!svc)
+ goto error_return;
+ RPC_REBORP (entry) = svc;
+
+ /* Initialize an RPC client */
+ rpc = gf_changelog_rpc_init (this, entry);
+ if (!rpc)
+ goto error_return;
+ RPC_PROBER (entry) = rpc;
+
+ /**
+ * @FIXME
+ * till we have connection state machine, let's delay the RPC call
+ * for now..
+ */
+ sleep (2);
+
+ /**
+ * Probe changelog translator for reverse connection. After a successful
+ * call, there's less use of the client and can be disconnected, but
+ * let's leave the connection active for any future RPC calls.
+ */
+ ret = gf_changelog_invoke_rpc (this, entry, proc);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_LIB_MSG_INVOKE_RPC_FAILED,
+ "Could not initiate probe RPC, bailing out!!!");
+ goto error_return;
+ }
+
+ return 0;
+
+ error_return:
+ return -1;
+}
+
+int
+gf_cleanup_event (xlator_t *this, struct gf_event_list *ev)
+{
+ int ret = 0;
+
+ ret = gf_thread_cleanup (this, ev->invoker);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, -ret,
+ CHANGELOG_LIB_MSG_CLEANUP_ERROR,
+ "cannot cleanup callback invoker thread."
+ " Not freeing resources");
+ return -1;
+ }
+
+ ev->entry = NULL;
+
+ return 0;
+}
+
+static int
+gf_init_event (gf_changelog_t *entry)
+{
+ int ret = 0;
+ struct gf_event_list *ev = NULL;
+
+ ev = &entry->event;
+ ev->entry = entry;
+
+ ret = pthread_mutex_init (&ev->lock, NULL);
+ if (ret != 0)
+ goto error_return;
+ ret = pthread_cond_init (&ev->cond, NULL);
+ if (ret != 0)
+ goto cleanup_mutex;
+ INIT_LIST_HEAD (&ev->events);
+
+ ev->next_seq = 0; /* bootstrap sequencing */
+
+ if (GF_NEED_ORDERED_EVENTS (entry)) {
+ entry->pickevent = pick_event_ordered;
+ entry->queueevent = queue_ordered_event;
+ } else {
+ entry->pickevent = pick_event_unordered;
+ entry->queueevent = queue_unordered_event;
+ }
+
+ ret = gf_thread_create (&ev->invoker, NULL,
+ gf_changelog_callback_invoker, ev);
+ if (ret != 0) {
+ entry->pickevent = NULL;
+ entry->queueevent = NULL;
+ goto cleanup_cond;
+ }
+
+ return 0;
+
+ cleanup_cond:
+ (void) pthread_cond_destroy (&ev->cond);
+ cleanup_mutex:
+ (void) pthread_mutex_destroy (&ev->lock);
+ error_return:
+ return -1;
+}
+
+/**
+ * TODO:
+ * - cleanup invoker thread
+ * - cleanup event list
+ * - destroy rpc{-clnt, svc}
+ */
+int
+gf_cleanup_brick_connection (xlator_t *this, gf_changelog_t *entry)
+{
+ return 0;
+}
+
+int
+gf_cleanup_connections (xlator_t *this)
+{
+ return 0;
+}
+
+static int
+gf_setup_brick_connection (xlator_t *this,
+ struct gf_brick_spec *brick,
+ gf_boolean_t ordered, void *xl)
+{
+ int ret = 0;
+ gf_private_t *priv = NULL;
+ gf_changelog_t *entry = NULL;
+
+ priv = this->private;
+
+ if (!brick->callback || !brick->init || !brick->fini)
+ goto error_return;
+
+ entry = GF_CALLOC (1, sizeof (*entry),
+ gf_changelog_mt_libgfchangelog_t);
+ if (!entry)
+ goto error_return;
+ INIT_LIST_HEAD (&entry->list);
+
+ LOCK_INIT (&entry->statelock);
+ entry->connstate = GF_CHANGELOG_CONN_STATE_PENDING;
+
+ entry->notify = brick->filter;
+ (void) strncpy (entry->brick, brick->brick_path, PATH_MAX-1);
+ entry->brick[PATH_MAX-1] = 0;
+
+ entry->this = this;
+ entry->invokerxl = xl;
+
+ entry->ordered = ordered;
+ ret = gf_init_event (entry);
+ if (ret)
+ goto free_entry;
+
+ entry->fini = brick->fini;
+ entry->callback = brick->callback;
+ entry->connected = brick->connected;
+ entry->disconnected = brick->disconnected;
+
+ entry->ptr = brick->init (this, brick);
+ if (!entry->ptr)
+ goto cleanup_event;
+ priv->api = entry->ptr; /* pointer to API, if required */
+
+ pthread_mutex_lock (&priv->lock);
+ {
+ list_add_tail (&entry->list, &priv->connections);
+ }
+ pthread_mutex_unlock (&priv->lock);
+
+ ret = gf_changelog_setup_rpc (this, entry, CHANGELOG_RPC_PROBE_FILTER);
+ if (ret)
+ goto cleanup_event;
+ return 0;
+
+ cleanup_event:
+ (void) gf_cleanup_event (this, &entry->event);
+ free_entry:
+ gf_msg_debug (this->name, 0, "freeing entry %p", entry);
+ list_del (&entry->list); /* FIXME: kludge for now */
+ GF_FREE (entry);
+ error_return:
+ return -1;
+}
+
+int
+gf_changelog_register_brick (xlator_t *this,
+ struct gf_brick_spec *brick,
+ gf_boolean_t ordered, void *xl)
+{
+ return gf_setup_brick_connection (this, brick, ordered, xl);
+}
+
+static int
+gf_changelog_setup_logging (xlator_t *this, char *logfile, int loglevel)
+{
+ /* passing ident as NULL means to use default ident for syslog */
+ if (gf_log_init (this->ctx, logfile, NULL))
+ return -1;
+
+ gf_log_set_loglevel ((loglevel == -1) ? GF_LOG_INFO :
+ loglevel);
+ return 0;
+}
+
+static int
+gf_changelog_set_master (xlator_t *master, void *xl)
+{
+ int32_t ret = 0;
+ xlator_t *this = NULL;
+ xlator_t *old_this = NULL;
+ gf_private_t *priv = NULL;
+
+ this = xl;
+ if (!this || !this->ctx) {
+ ret = gf_changelog_init_master ();
+ if (ret)
+ return -1;
+ this = THIS;
+ }
+
+ master->ctx = this->ctx;
+
+ INIT_LIST_HEAD (&master->volume_options);
+ SAVE_THIS (THIS);
+
+ ret = xlator_mem_acct_init (THIS, gf_changelog_mt_end);
+ if (ret != 0)
+ goto restore_this;
+
+ priv = gf_changelog_alloc_priv ();
+ if (!priv) {
+ ret = -1;
+ goto restore_this;
+ }
+
+ if (!xl) {
+ /* poller thread */
+ ret = gf_thread_create (&priv->poller,
+ NULL, changelog_rpc_poller, THIS);
+ if (ret != 0) {
+ GF_FREE (priv);
+ gf_msg (master->name, GF_LOG_ERROR, 0,
+ CHANGELOG_LIB_MSG_THREAD_CREATION_FAILED,
+ "failed to spawn poller thread");
+ goto restore_this;
+ }
+ }
+
+ master->private = priv;
+
+ restore_this:
+ RESTORE_THIS ();
+
+ return ret;
+}
+
+int
+gf_changelog_init (void *xl)
+{
+ int ret = 0;
+ gf_private_t *priv = NULL;
+
+ if (master)
+ return 0;
+
+ master = calloc (1, sizeof (*master));
+ if (!master)
+ goto error_return;
+
+ master->name = strdup ("gfchangelog");
+ if (!master->name)
+ goto dealloc_master;
+
+ ret = gf_changelog_set_master (master, xl);
+ if (ret)
+ goto dealloc_name;
+
+ priv = master->private;
+ ret = gf_thread_create (&priv->connectionjanitor, NULL,
+ gf_changelog_connection_janitor, master);
+ if (ret != 0) {
+ /* TODO: cleanup priv, mutex (poller thread for !xl) */
+ goto dealloc_name;
+ }
+
+ return 0;
+
+ dealloc_name:
+ free (master->name);
+ dealloc_master:
+ free (master);
+ master = NULL;
+ error_return:
+ return -1;
+}
+
+int
+gf_changelog_register_generic (struct gf_brick_spec *bricks, int count,
+ int ordered, char *logfile, int lvl, void *xl)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+ xlator_t *old_this = NULL;
+ struct gf_brick_spec *brick = NULL;
+ gf_boolean_t need_order = _gf_false;
+
+ SAVE_THIS (xl);
+
+ this = THIS;
+ if (!this)
+ goto error_return;
+
+ ret = gf_changelog_setup_logging (this, logfile, lvl);
+ if (ret)
+ goto error_return;
+
+ need_order = (ordered) ? _gf_true : _gf_false;
+
+ brick = bricks;
+ while (count--) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ CHANGELOG_LIB_MSG_NOTIFY_REGISTER_INFO,
+ "Registering brick: %s [notify filter: %d]",
+ brick->brick_path, brick->filter);
+
+ ret = gf_changelog_register_brick (this, brick, need_order, xl);
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_LIB_MSG_NOTIFY_REGISTER_FAILED,
+ "Error registering with changelog xlator");
+ break;
+ }
+
+ brick++;
+ }
+
+ if (ret != 0)
+ goto cleanup_inited_bricks;
+
+ RESTORE_THIS();
+ return 0;
+
+ cleanup_inited_bricks:
+ gf_cleanup_connections (this);
+ error_return:
+ RESTORE_THIS();
+ return -1;
+}
+
+/**
+ * @API
+ * gf_changelog_register()
+ *
+ * This is _NOT_ a generic register API. It's a special API to handle
+ * updates at a journal granulality. This is used by consumers wanting
+ * to process persistent journal such as geo-replication via a set of
+ * APIs. All of this is required to maintain backward compatibility.
+ * Owner specific private data is stored in ->api (in gf_private_t),
+ * which is used by APIs to access it's private data. This limits
+ * the library access to a single brick, but that's how it used to
+ * be anyway. Furthermore, this API solely _owns_ "this", therefore
+ * callers already having a notion of "this" are expected to use the
+ * newer API.
+ *
+ * Newer applications wanting to use this library need not face this
+ * limitation and reply of the much more feature rich generic register
+ * API, which is purely callback based.
+ *
+ * NOTE: @max_reconnects is not used but required for backward compat.
+ *
+ * For generic API, refer gf_changelog_register_generic().
+ */
+int
+gf_changelog_register (char *brick_path, char *scratch_dir,
+ char *log_file, int log_level, int max_reconnects)
+{
+ struct gf_brick_spec brick = {0,};
+
+ if (master)
+ THIS = master;
+ else
+ return -1;
+
+ brick.brick_path = brick_path;
+ brick.filter = CHANGELOG_OP_TYPE_JOURNAL;
+
+ brick.init = gf_changelog_journal_init;
+ brick.fini = gf_changelog_journal_fini;
+ brick.callback = gf_changelog_handle_journal;
+ brick.connected = gf_changelog_journal_connect;
+ brick.disconnected = gf_changelog_journal_disconnect;
+
+ brick.ptr = scratch_dir;
+
+ return gf_changelog_register_generic (&brick, 1, 1,
+ log_file, log_level, NULL);
+}
diff --git a/xlators/features/changelog/lib/src/gf-history-changelog.c b/xlators/features/changelog/lib/src/gf-history-changelog.c
new file mode 100644
index 00000000000..5ed50390a7c
--- /dev/null
+++ b/xlators/features/changelog/lib/src/gf-history-changelog.c
@@ -0,0 +1,991 @@
+#include <errno.h>
+#include <dirent.h>
+#include <stddef.h>
+#include <sys/types.h>
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <string.h>
+
+#include "globals.h"
+#include "glusterfs.h"
+#include "logging.h"
+#include "syscall.h"
+
+#include "gf-changelog-helpers.h"
+#include "gf-changelog-journal.h"
+
+/* from the changelog translator */
+#include "changelog-misc.h"
+#include "changelog-lib-messages.h"
+#include "changelog-mem-types.h"
+
+/**
+ * @API
+ * gf_history_changelog_done:
+ * Move processed history changelog file from .processing
+ * to .processed
+ *
+ * ARGUMENTS:
+ * file(IN): path to processed history changelog file in
+ * .processing directory.
+ *
+ * RETURN VALUE:
+ * 0: On success.
+ * -1: On error.
+ */
+int
+gf_history_changelog_done (char *file)
+{
+ int ret = -1;
+ char *buffer = NULL;
+ xlator_t *this = NULL;
+ gf_changelog_journal_t *jnl = NULL;
+ gf_changelog_journal_t *hist_jnl = NULL;
+ char to_path[PATH_MAX] = {0,};
+
+ errno = EINVAL;
+
+ this = THIS;
+ if (!this)
+ goto out;
+
+ jnl = (gf_changelog_journal_t *) GF_CHANGELOG_GET_API_PTR (this);
+ if (!jnl)
+ goto out;
+
+ hist_jnl = jnl->hist_jnl;
+ if (!hist_jnl)
+ goto out;
+
+ if (!file || !strlen (file))
+ goto out;
+
+ /* make sure 'file' is inside ->jnl_working_dir */
+ buffer = realpath (file, NULL);
+ if (!buffer)
+ goto out;
+
+ if (strncmp (hist_jnl->jnl_working_dir,
+ buffer, strlen (hist_jnl->jnl_working_dir)))
+ goto out;
+
+ (void) snprintf (to_path, PATH_MAX, "%s%s",
+ hist_jnl->jnl_processed_dir, basename (buffer));
+ gf_msg_debug (this->name, 0,
+ "moving %s to processed directory", file);
+ ret = sys_rename (buffer, to_path);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_LIB_MSG_RENAME_FAILED,
+ "cannot move %s to %s",
+ file, to_path);
+ goto out;
+ }
+
+ ret = 0;
+
+ out:
+ if (buffer)
+ free (buffer); /* allocated by realpath() */
+ return ret;
+}
+
+/**
+ * @API
+ * gf_history_changelog_start_fresh:
+ * For a set of changelogs, start from the beginning.
+ * It will truncates the history tracker fd.
+ *
+ * RETURN VALUES:
+ * 0: On success.
+ * -1: On error.
+ */
+int
+gf_history_changelog_start_fresh ()
+{
+ xlator_t *this = NULL;
+ gf_changelog_journal_t *jnl = NULL;
+ gf_changelog_journal_t *hist_jnl = NULL;
+
+ this = THIS;
+ if (!this)
+ goto out;
+
+ errno = EINVAL;
+
+ jnl = (gf_changelog_journal_t *) GF_CHANGELOG_GET_API_PTR (this);
+ if (!jnl)
+ goto out;
+
+ hist_jnl = jnl->hist_jnl;
+ if (!hist_jnl)
+ goto out;
+
+ if (gf_ftruncate (hist_jnl->jnl_fd, 0))
+ goto out;
+
+ return 0;
+
+ out:
+ return -1;
+}
+
+/**
+ * @API
+ * gf_history_changelog_next_change:
+ * Return the next history changelog file entry. Zero means all
+ * history chanelogs are consumed.
+ *
+ * ARGUMENTS:
+ * bufptr(OUT): Path to unprocessed history changelog file
+ * from tracker file.
+ * maxlen(IN): Usually PATH_MAX.
+ *
+ * RETURN VALUES:
+ * size: On success.
+ * -1 : On error.
+ */
+ssize_t
+gf_history_changelog_next_change (char *bufptr, size_t maxlen)
+{
+ ssize_t size = -1;
+ int tracker_fd = 0;
+ xlator_t *this = NULL;
+ gf_changelog_journal_t *jnl = NULL;
+ gf_changelog_journal_t *hist_jnl = NULL;
+ char buffer[PATH_MAX] = {0,};
+
+ if (maxlen > PATH_MAX) {
+ errno = ENAMETOOLONG;
+ goto out;
+ }
+
+ errno = EINVAL;
+
+ this = THIS;
+ if (!this)
+ goto out;
+
+ jnl = (gf_changelog_journal_t *) GF_CHANGELOG_GET_API_PTR (this);
+ if (!jnl)
+ goto out;
+
+ hist_jnl = jnl->hist_jnl;
+ if (!hist_jnl)
+ goto out;
+
+ tracker_fd = hist_jnl->jnl_fd;
+
+ size = gf_readline (tracker_fd, buffer, maxlen);
+ if (size < 0) {
+ size = -1;
+ goto out;
+ }
+
+ if (size == 0)
+ goto out;
+
+ memcpy (bufptr, buffer, size - 1);
+ bufptr[size - 1] = '\0';
+
+out:
+ return size;
+}
+
+/**
+ * @API
+ * gf_history_changelog_scan:
+ * Scan and generate a list of change entries.
+ * Calling this api multiple times (without calling gf_changlog_done())
+ * would result new changelogs(s) being refreshed in the tracker file.
+ * This call also acts as a cancellation point for the consumer.
+ *
+ * RETURN VALUES:
+ * +ve integer : success and keep scanning.(count of changelogs)
+ * 0 : success and done scanning.
+ * -1 : error.
+ *
+ * NOTE: After first 0 return call_get_next change for once more time
+ * to empty the tracker
+ *
+ */
+ssize_t
+gf_history_changelog_scan ()
+{
+ int ret = 0;
+ int tracker_fd = 0;
+ size_t len = 0;
+ size_t off = 0;
+ xlator_t *this = NULL;
+ size_t nr_entries = 0;
+ gf_changelog_journal_t *jnl = NULL;
+ gf_changelog_journal_t *hist_jnl = NULL;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {{0,},};
+ char buffer[PATH_MAX] = {0,};
+ static int is_last_scan;
+
+ this = THIS;
+ if (!this)
+ goto out;
+
+ jnl = (gf_changelog_journal_t *) GF_CHANGELOG_GET_API_PTR (this);
+ if (!jnl)
+ goto out;
+ if (JNL_IS_API_DISCONNECTED (jnl)) {
+ errno = ENOTCONN;
+ goto out;
+ }
+
+ hist_jnl = jnl->hist_jnl;
+ if (!hist_jnl)
+ goto out;
+
+ retry:
+ if (is_last_scan == 1)
+ return 0;
+ if (hist_jnl->hist_done == 0)
+ is_last_scan = 1;
+
+ errno = EINVAL;
+ if (hist_jnl->hist_done == -1)
+ goto out;
+
+ tracker_fd = hist_jnl->jnl_fd;
+
+ if (gf_ftruncate (tracker_fd, 0))
+ goto out;
+
+ len = offsetof (struct dirent, d_name)
+ + pathconf (hist_jnl->jnl_processing_dir, _PC_NAME_MAX) + 1;
+
+ rewinddir (hist_jnl->jnl_dir);
+
+ for (;;) {
+ errno = 0;
+ entry = sys_readdir (hist_jnl->jnl_dir, scratch);
+ if (!entry || errno != 0)
+ break;
+
+ if (strcmp (basename (entry->d_name), ".") == 0 ||
+ strcmp (basename (entry->d_name), "..") == 0)
+ continue;
+
+ nr_entries++;
+
+ GF_CHANGELOG_FILL_BUFFER (hist_jnl->jnl_processing_dir,
+ buffer, off,
+ strlen (hist_jnl->jnl_processing_dir));
+ GF_CHANGELOG_FILL_BUFFER (entry->d_name, buffer,
+ off, strlen (entry->d_name));
+ GF_CHANGELOG_FILL_BUFFER ("\n", buffer, off, 1);
+
+ if (gf_changelog_write (tracker_fd, buffer, off) != off) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_LIB_MSG_WRITE_FAILED,
+ "error writing changelog filename"
+ " to tracker file");
+ break;
+ }
+ off = 0;
+ }
+
+ gf_msg_debug (this->name, 0,
+ "hist_done %d, is_last_scan: %d",
+ hist_jnl->hist_done, is_last_scan);
+
+ if (!entry) {
+ if (gf_lseek (tracker_fd, 0, SEEK_SET) != -1) {
+ if (nr_entries > 0)
+ return nr_entries;
+ else {
+ sleep(1);
+ goto retry;
+ }
+ }
+ }
+ out:
+ return -1;
+}
+
+/*
+ * Gets timestamp value at the changelog path at index.
+ * Returns 0 on success(updates given time-stamp), -1 on failure.
+ */
+int
+gf_history_get_timestamp (int fd, int index, int len,
+ unsigned long *ts)
+{
+ xlator_t *this = NULL;
+ int n_read = -1;
+ char path_buf[PATH_MAX]= {0,};
+ char *iter = path_buf;
+ size_t offset = index * (len+1);
+ unsigned long value = 0;
+ int ret = 0;
+
+ this = THIS;
+ if (!this) {
+ return -1;
+ }
+
+ n_read = sys_pread (fd, path_buf, len, offset);
+ if (n_read < 0 ) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_LIB_MSG_READ_ERROR,
+ "could not read from htime file");
+ goto out;
+ }
+ iter+= len - TIMESTAMP_LENGTH;
+ sscanf (iter, "%lu",&value);
+out:
+ if(ret == 0)
+ *ts = value;
+ return ret;
+}
+
+/*
+ * Function to ensure correctness of search
+ * Checks whether @value is there next to @target_index or not
+ */
+int
+gf_history_check ( int fd, int target_index, unsigned long value, int len)
+{
+ int ret = 0;
+ unsigned long ts1 = 0;
+ unsigned long ts2 = 0;
+
+ if (target_index == 0) {
+ ret = gf_history_get_timestamp (fd, target_index, len, &ts1);
+ if (ret == -1)
+ goto out;
+ if (value <= ts1)
+ goto out;
+ else {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = gf_history_get_timestamp (fd, target_index, len, &ts1);
+ if (ret ==-1)
+ goto out;
+ ret = gf_history_get_timestamp (fd, target_index -1, len, &ts2);
+ if (ret ==-1)
+ goto out;
+
+ if ( (value <= ts1) && (value > ts2) ) {
+ goto out;
+ }
+ else
+ ret = -1;
+out:
+ return ret;
+}
+
+/*
+ * This is a "binary search" based search function which checks neighbours
+ * for in-range availability of the value to be searched and provides the
+ * index at which the changelog file nearest to the requested timestamp(value)
+ * can be read from.
+ *
+ * Actual offset can be calculated as (index* (len+1) ).
+ * "1" is because the changelog paths are null terminated.
+ *
+ * @path : Htime file to search in
+ * @value : time stamp to search
+ * @from : start index to search
+ * @to : end index to search
+ * @len : length of fixes length strings separated by null
+ */
+
+int
+gf_history_b_search (int fd, unsigned long value,
+ unsigned long from, unsigned long to, int len)
+{
+ int m_index = -1;
+ unsigned long cur_value = 0;
+ unsigned long ts1 = 0;
+ int ret = 0;
+
+ m_index = (from + to)/2;
+
+ if ( (to - from) <=1 ) {
+ /* either one or 2 changelogs left */
+ if ( to != from ) {
+ /* check if value is less or greater than to
+ * return accordingly
+ */
+ ret = gf_history_get_timestamp (fd, from, len, &ts1);
+ if (ret ==-1)
+ goto out;
+ if ( ts1 >= value) {
+ /* actually compatision should be
+ * exactly == but considering
+ *
+ * case of only 2 changelogs in htime file
+ */
+ return from;
+ }
+ else
+ return to;
+ }
+ else
+ return to;
+ }
+
+ ret = gf_history_get_timestamp (fd, m_index, len, &cur_value);
+ if (ret == -1)
+ goto out;
+ if (cur_value == value) {
+ return m_index;
+ }
+ else if (value > cur_value) {
+ ret = gf_history_get_timestamp (fd, m_index+1, len, &cur_value);
+ if (ret == -1)
+ goto out;
+ if (value < cur_value)
+ return m_index + 1;
+ else
+ return gf_history_b_search (fd, value,
+ m_index+1, to, len);
+ }
+ else {
+ if (m_index ==0) {
+ /* we are sure that values exists
+ * in this htime file
+ */
+ return 0;
+ }
+ else {
+ ret = gf_history_get_timestamp (fd, m_index-1, len,
+ &cur_value);
+ if (ret == -1)
+ goto out;
+ if (value > cur_value) {
+ return m_index;
+ }
+ else
+ return gf_history_b_search (fd, value, from,
+ m_index-1, len);
+ }
+ }
+out:
+ return -1;
+}
+
+/*
+ * Description: Checks if the changelog path is usable or not,
+ * which is differenciated by checking for "changelog"
+ * in the path and not "CHANGELOG".
+ *
+ * Returns:
+ * 1 : Yes, usable ( contains "CHANGELOG" )
+ * 0 : No, Not usable ( contains, "changelog")
+ */
+int
+gf_is_changelog_usable (char *cl_path)
+{
+ int ret = -1;
+ const char low_c[] = "changelog";
+ char *str_ret = NULL;
+ char *bname = NULL;
+
+ bname = basename (cl_path);
+
+ str_ret = strstr (bname, low_c);
+
+ if (str_ret != NULL)
+ ret = 0;
+ else
+ ret = 1;
+
+ return ret;
+
+}
+
+void *
+gf_changelog_consume_wrap (void* data)
+{
+ int ret = -1;
+ ssize_t nread = 0;
+ xlator_t *this = NULL;
+ gf_changelog_consume_data_t *ccd = NULL;
+
+ ccd = (gf_changelog_consume_data_t *) data;
+ this = ccd->this;
+
+ ccd->retval = -1;
+
+ nread = sys_pread (ccd->fd, ccd->changelog, PATH_MAX, ccd->offset);
+ if (nread < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_LIB_MSG_READ_ERROR,
+ "cannot read from history metadata file");
+ goto out;
+ }
+
+ /* TODO: handle short reads and EOF. */
+ if (gf_is_changelog_usable (ccd->changelog) == 1) {
+
+ ret = gf_changelog_consume (ccd->this,
+ ccd->jnl, ccd->changelog, _gf_true);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR,
+ 0, CHANGELOG_LIB_MSG_PARSE_ERROR,
+ "could not parse changelog: %s",
+ ccd->changelog);
+ goto out;
+ }
+ }
+ ccd->retval = 0;
+
+ out:
+ return NULL;
+}
+
+/**
+ * "gf_history_consume" is a worker function for history.
+ * parses and moves changelogs files from index "from"
+ * to index "to" in open htime file whose fd is "fd".
+ */
+
+#define MAX_PARALLELS 10
+
+void *
+gf_history_consume (void * data)
+{
+ xlator_t *this = NULL;
+ gf_changelog_journal_t *jnl = NULL;
+ gf_changelog_journal_t *hist_jnl = NULL;
+ int ret = 0;
+ int iter = 0;
+ int fd = -1;
+ int from = -1;
+ int to = -1;
+ int len = -1;
+ int n_parallel = 0;
+ int n_envoked = 0;
+ gf_boolean_t publish = _gf_true;
+ pthread_t th_id[MAX_PARALLELS] = {0,};
+ gf_changelog_history_data_t *hist_data = NULL;
+ gf_changelog_consume_data_t ccd[MAX_PARALLELS] = {{0},};
+ gf_changelog_consume_data_t *curr = NULL;
+
+ hist_data = (gf_changelog_history_data_t *) data;
+ if (hist_data == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ fd = hist_data->htime_fd;
+ from = hist_data->from;
+ to = hist_data->to;
+ len = hist_data->len;
+ n_parallel = hist_data->n_parallel;
+
+ THIS = hist_data->this;
+ this = hist_data->this;
+ if (!this) {
+ ret = -1;
+ goto out;
+ }
+
+ jnl = (gf_changelog_journal_t *) GF_CHANGELOG_GET_API_PTR (this);
+ if (!jnl) {
+ ret = -1;
+ goto out;
+ }
+
+ hist_jnl = jnl->hist_jnl;
+ if (!hist_jnl) {
+ ret = -1;
+ goto out;
+ }
+
+ while (from <= to) {
+ n_envoked = 0;
+
+ for (iter = 0 ; (iter < n_parallel) && (from <= to); iter++) {
+ curr = &ccd[iter];
+
+ curr->this = this;
+ curr->jnl = hist_jnl;
+ curr->fd = fd;
+ curr->offset = from * (len + 1);
+
+ curr->retval = 0;
+ memset (curr->changelog, '\0', PATH_MAX);
+
+ ret = pthread_create (&th_id[iter], NULL,
+ gf_changelog_consume_wrap, curr);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, ret,
+ CHANGELOG_LIB_MSG_THREAD_CREATION_FAILED
+ , "could not create consume-thread");
+ ret = -1;
+ goto sync;
+ } else
+ n_envoked++;
+
+ from++;
+ }
+
+ sync:
+ for (iter = 0; iter < n_envoked; iter++) {
+ ret = pthread_join (th_id[iter], NULL);
+ if (ret) {
+ publish = _gf_false;
+ gf_msg (this->name, GF_LOG_ERROR, ret,
+ CHANGELOG_LIB_MSG_PTHREAD_JOIN_FAILED,
+ "pthread_join() error");
+ /* try to join the rest */
+ continue;
+ }
+
+ if (publish == _gf_false)
+ continue;
+
+ curr = &ccd[iter];
+ if (ccd->retval) {
+ publish = _gf_false;
+ gf_msg (this->name, GF_LOG_ERROR,
+ 0, CHANGELOG_LIB_MSG_PARSE_ERROR,
+ "parsing error, ceased publishing...");
+ continue;
+ }
+
+ ret = gf_changelog_publish (curr->this,
+ curr->jnl, curr->changelog);
+ if (ret) {
+ publish = _gf_false;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_LIB_MSG_PUBLISH_ERROR,
+ "publish error, ceased publishing...");
+ }
+ }
+ }
+
+ /* informing "parsing done". */
+ hist_jnl->hist_done = (publish == _gf_true) ? 0 : -1;
+
+out:
+ if (fd != -1)
+ (void) sys_close (fd);
+ GF_FREE (hist_data);
+ return NULL;
+}
+
+/**
+ * @API
+ * gf_history_changelog() : Get/parse historical changelogs and get them ready
+ * for consumption.
+ *
+ * Arguments:
+ * @changelog_dir : Directory location from where history changelogs are
+ * supposed to be consumed.
+ * @start: Unix timestamp FROM where changelogs should be consumed.
+ * @end: Unix timestamp TO where changelogsshould be consumed.
+ * @n_parallel : degree of parallelism while changelog parsing.
+ * @actual_end : the end time till where changelogs are available.
+ *
+ * Return:
+ * Returns <timestamp> on success, the last time till where changelogs are
+ * available.
+ * Returns -1 on failure(error).
+ */
+
+/**
+ * Extract timestamp range from a historical metadata file
+ * Returns:
+ * 0 : Success ({min,max}_ts with the appropriate values)
+ * -1 : Failure
+ * -2 : Ignore this metadata file and process next
+ */
+int
+gf_changelog_extract_min_max (const char *dname, const char *htime_dir,
+ int *fd, unsigned long *total,
+ unsigned long *min_ts, unsigned long *max_ts)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ char htime_file[PATH_MAX] = {0,};
+ struct stat stbuf = {0,};
+ char *iter = NULL;
+ char x_value[30] = {0,};
+
+ this = THIS;
+
+ snprintf (htime_file, PATH_MAX, "%s/%s", htime_dir, dname);
+
+ iter = (htime_file + strlen (htime_file) - TIMESTAMP_LENGTH);
+ sscanf (iter ,"%lu",min_ts);
+
+ ret = sys_stat (htime_file, &stbuf);
+ if (ret) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_LIB_MSG_HTIME_ERROR,
+ "stat() failed on htime file %s",
+ htime_file);
+ goto out;
+ }
+
+ /* ignore everything except regular files */
+ if (!S_ISREG (stbuf.st_mode)) {
+ ret = -2;
+ goto out;
+ }
+
+ *fd = open (htime_file, O_RDONLY);
+ if (*fd < 0) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_LIB_MSG_HTIME_ERROR,
+ "open() failed for htime %s",
+ htime_file);
+ goto out;
+ }
+
+ /* Looks good, extract max timestamp */
+ ret = sys_fgetxattr (*fd, HTIME_KEY, x_value, sizeof (x_value));
+ if (ret < 0) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_LIB_MSG_GET_XATTR_FAILED,
+ "error extracting max timstamp from htime file"
+ " %s", htime_file);
+ goto out;
+ }
+
+ sscanf (x_value, "%lu:%lu", max_ts, total);
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ CHANGELOG_LIB_MSG_TOTAL_LOG_INFO,
+ "MIN: %lu, MAX: %lu, TOTAL CHANGELOGS: %lu",
+ *min_ts, *max_ts, *total);
+
+ ret = 0;
+
+ out:
+ return ret;
+}
+
+int
+gf_history_changelog (char* changelog_dir, unsigned long start,
+ unsigned long end, int n_parallel,
+ unsigned long *actual_end)
+{
+ int ret = 0;
+ int len = -1;
+ int fd = -1;
+ int n_read = -1;
+ unsigned long min_ts = 0;
+ unsigned long max_ts = 0;
+ unsigned long end2 = 0;
+ unsigned long ts1 = 0;
+ unsigned long ts2 = 0;
+ unsigned long to = 0;
+ unsigned long from = 0;
+ unsigned long total_changelog = 0;
+ xlator_t *this = NULL;
+ gf_changelog_journal_t *jnl = NULL;
+ gf_changelog_journal_t *hist_jnl = NULL;
+ gf_changelog_history_data_t *hist_data = NULL;
+ DIR *dirp = NULL;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {{0,},};
+ pthread_t consume_th = 0;
+ char htime_dir[PATH_MAX] = {0,};
+ char buffer[PATH_MAX] = {0,};
+
+ pthread_attr_t attr;
+
+ ret = pthread_attr_init (&attr);
+ if (ret != 0) {
+ return -1;
+ }
+
+ this = THIS;
+ if (!this) {
+ ret = -1;
+ goto out;
+ }
+
+ jnl = (gf_changelog_journal_t *) GF_CHANGELOG_GET_API_PTR (this);
+ if (!jnl) {
+ ret = -1;
+ goto out;
+ }
+
+ hist_jnl = (gf_changelog_journal_t *) jnl->hist_jnl;
+ if (!hist_jnl) {
+ ret = -1;
+ goto out;
+ }
+
+ /* basic sanity check */
+ if (start > end || n_parallel <= 0) {
+ ret = -1;
+ goto out;
+ }
+
+ /* cap parallelism count */
+ if (n_parallel > MAX_PARALLELS)
+ n_parallel = MAX_PARALLELS;
+
+ CHANGELOG_FILL_HTIME_DIR (changelog_dir, htime_dir);
+
+ dirp = sys_opendir (htime_dir);
+ if (dirp == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_LIB_MSG_HTIME_ERROR,
+ "open dir on htime failed : %s",
+ htime_dir);
+ ret = -1;
+ goto out;
+ }
+
+ for (;;) {
+
+ errno = 0;
+
+ entry = sys_readdir (dirp, scratch);
+
+ if (!entry || errno != 0)
+ break;
+
+ ret = gf_changelog_extract_min_max (entry->d_name, htime_dir,
+ &fd, &total_changelog,
+ &min_ts, &max_ts);
+ if (ret) {
+ if (-2 == ret)
+ continue;
+ goto out;
+ }
+
+ if (start >= min_ts && start < max_ts) {
+ /**
+ * TODO: handle short reads later...
+ */
+ n_read = sys_read (fd, buffer, PATH_MAX);
+ if (n_read < 0) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_LIB_MSG_READ_ERROR,
+ "unable to read htime file");
+ goto out;
+ }
+
+ len = strlen (buffer);
+
+ /**
+ * search @start in the htime file returning it's index
+ * (@from)
+ */
+ from = gf_history_b_search (fd, start, 0,
+ total_changelog - 1, len);
+
+ /* ensuring correctness of gf_b_search */
+ if (gf_history_check (fd, from, start, len) != 0) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_LIB_MSG_GET_TIME_ERROR,
+ "wrong result for start: %lu idx: %lu",
+ start, from);
+ goto out;
+ }
+
+ end2 = (end <= max_ts) ? end : max_ts;
+
+ /**
+ * search @end2 in htime file returning it's index (@to)
+ */
+ to = gf_history_b_search (fd, end2,
+ 0, total_changelog - 1, len);
+
+ if (gf_history_check (fd, to, end2, len) != 0) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_LIB_MSG_GET_TIME_ERROR,
+ "wrong result for start: %lu idx: %lu",
+ end2, to);
+ goto out;
+ }
+
+ ret = gf_history_get_timestamp (fd, from, len, &ts1);
+ if (ret == -1)
+ goto out;
+
+ ret = gf_history_get_timestamp (fd, to, len, &ts2);
+ if (ret == -1)
+ goto out;
+
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ CHANGELOG_LIB_MSG_TOTAL_LOG_INFO,
+ "FINAL: from: %lu, to: %lu, changes: %lu",
+ ts1, ts2, (to - from + 1));
+
+ hist_data = GF_CALLOC (1,
+ sizeof (gf_changelog_history_data_t),
+ gf_changelog_mt_history_data_t);
+
+ hist_data->htime_fd = fd;
+ hist_data->from = from;
+ hist_data->to = to;
+ hist_data->len = len;
+ hist_data->n_parallel = n_parallel;
+ hist_data->this = this;
+
+ ret = pthread_attr_setdetachstate
+ (&attr, PTHREAD_CREATE_DETACHED);
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, ret,
+ CHANGELOG_LIB_MSG_PTHREAD_ERROR,
+ "unable to sets the detach"
+ " state attribute");
+ ret = -1;
+ goto out;
+ }
+
+ /* spawn a thread for background parsing & publishing */
+ ret = pthread_create (&consume_th, &attr,
+ gf_history_consume, hist_data);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, ret,
+ CHANGELOG_LIB_MSG_THREAD_CREATION_FAILED
+ , "creation of consume parent-thread"
+ " failed.");
+ ret = -1;
+ goto out;
+ }
+
+ goto out;
+
+ } /* end of range check */
+
+ } /* end of readdir() */
+
+ if (!from || !to)
+ ret = -1;
+
+out:
+ if (dirp != NULL)
+ (void) sys_closedir (dirp);
+
+ if (ret < 0) {
+ if (fd != -1)
+ (void) sys_close (fd);
+ GF_FREE (hist_data);
+ (void) pthread_attr_destroy (&attr);
+
+ return ret;
+ }
+
+ hist_jnl->hist_done = 1;
+ *actual_end = ts2;
+
+ return ret;
+}
diff --git a/xlators/features/changelog/src/Makefile.am b/xlators/features/changelog/src/Makefile.am
new file mode 100644
index 00000000000..27af7a5ebd3
--- /dev/null
+++ b/xlators/features/changelog/src/Makefile.am
@@ -0,0 +1,28 @@
+xlator_LTLIBRARIES = changelog.la
+
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+noinst_HEADERS = changelog-helpers.h changelog-mem-types.h changelog-rt.h \
+ changelog-rpc-common.h changelog-misc.h changelog-encoders.h \
+ changelog-rpc-common.h changelog-rpc.h changelog-ev-handle.h \
+ changelog-messages.h
+
+changelog_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+changelog_la_SOURCES = changelog.c changelog-rt.c changelog-helpers.c \
+ changelog-encoders.c changelog-rpc.c changelog-barrier.c \
+ changelog-rpc-common.c changelog-ev-handle.c
+changelog_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(top_builddir)/rpc/xdr/src/libgfxdr.la \
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_srcdir)/rpc/rpc-lib/src \
+ -I$(top_srcdir)/rpc/rpc-transport/socket/src \
+ -I$(top_srcdir)/xlators/features/changelog/lib/src/ \
+ -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS) \
+ -DDATADIR=\"$(localstatedir)\"
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/changelog/src/changelog-barrier.c b/xlators/features/changelog/src/changelog-barrier.c
new file mode 100644
index 00000000000..ac1eb0e4397
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-barrier.c
@@ -0,0 +1,134 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "changelog-helpers.h"
+#include "changelog-messages.h"
+#include "call-stub.h"
+
+/* Enqueue a stub*/
+void
+__chlog_barrier_enqueue (xlator_t *this, call_stub_t *stub)
+{
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ list_add_tail (&stub->list, &priv->queue);
+ priv->queue_size++;
+
+ return;
+}
+
+/* Dequeue a stub */
+call_stub_t *
+__chlog_barrier_dequeue (xlator_t *this, struct list_head *queue)
+{
+ call_stub_t *stub = NULL;
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ if (list_empty (queue))
+ goto out;
+
+ stub = list_entry (queue->next, call_stub_t, list);
+ list_del_init (&stub->list);
+
+out:
+ return stub;
+}
+
+/* Dequeue all the stubs and call corresponding resume functions */
+void
+chlog_barrier_dequeue_all (xlator_t *this, struct list_head *queue)
+{
+ call_stub_t *stub = NULL;
+
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ CHANGELOG_MSG_BARRIER_INFO,
+ "Dequeuing all the changelog barriered fops");
+
+ while ((stub = __chlog_barrier_dequeue (this, queue)))
+ call_resume (stub);
+
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ CHANGELOG_MSG_BARRIER_INFO,
+ "Dequeuing changelog barriered fops is finished");
+ return;
+}
+
+/* Function called on changelog barrier timeout */
+void
+chlog_barrier_timeout (void *data)
+{
+ xlator_t *this = NULL;
+ changelog_priv_t *priv = NULL;
+ struct list_head queue = {0,};
+
+ this = data;
+ THIS = this;
+ priv = this->private;
+
+ INIT_LIST_HEAD (&queue);
+
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_BARRIER_ERROR,
+ "Disabling changelog barrier because of the timeout.");
+
+ LOCK (&priv->lock);
+ {
+ __chlog_barrier_disable (this, &queue);
+ }
+ UNLOCK (&priv->lock);
+
+ chlog_barrier_dequeue_all (this, &queue);
+
+ return;
+}
+
+/* Disable changelog barrier enable flag */
+void
+__chlog_barrier_disable (xlator_t *this, struct list_head *queue)
+{
+ changelog_priv_t *priv = this->private;
+ GF_ASSERT (priv);
+
+ if (priv->timer) {
+ gf_timer_call_cancel (this->ctx, priv->timer);
+ priv->timer = NULL;
+ }
+
+ list_splice_init (&priv->queue, queue);
+ priv->queue_size = 0;
+ priv->barrier_enabled = _gf_false;
+}
+
+/* Enable chagelog barrier enable with timer */
+int
+__chlog_barrier_enable (xlator_t *this, changelog_priv_t *priv)
+{
+ int ret = -1;
+
+ priv->timer = gf_timer_call_after (this->ctx, priv->timeout,
+ chlog_barrier_timeout, (void *)this);
+ if (!priv->timer) {
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ CHANGELOG_MSG_BARRIER_ERROR,
+ "Couldn't add changelog barrier timeout event.");
+ goto out;
+ }
+
+ priv->barrier_enabled = _gf_true;
+ ret = 0;
+out:
+ return ret;
+}
diff --git a/xlators/features/changelog/src/changelog-encoders.c b/xlators/features/changelog/src/changelog-encoders.c
new file mode 100644
index 00000000000..95030236636
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-encoders.c
@@ -0,0 +1,236 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "changelog-encoders.h"
+
+size_t
+entry_fn (void *data, char *buffer, gf_boolean_t encode)
+{
+ char *tmpbuf = NULL;
+ size_t bufsz = 0;
+ struct changelog_entry_fields *ce = NULL;
+
+ ce = (struct changelog_entry_fields *) data;
+
+ if (encode) {
+ tmpbuf = uuid_utoa (ce->cef_uuid);
+ CHANGELOG_FILL_BUFFER (buffer, bufsz, tmpbuf, strlen (tmpbuf));
+ } else {
+ CHANGELOG_FILL_BUFFER (buffer, bufsz,
+ ce->cef_uuid, sizeof (uuid_t));
+ }
+
+ CHANGELOG_FILL_BUFFER (buffer, bufsz, "/", 1);
+ CHANGELOG_FILL_BUFFER (buffer, bufsz,
+ ce->cef_bname, strlen (ce->cef_bname));
+ return bufsz;
+}
+
+size_t
+del_entry_fn (void *data, char *buffer, gf_boolean_t encode)
+{
+ char *tmpbuf = NULL;
+ size_t bufsz = 0;
+ struct changelog_entry_fields *ce = NULL;
+
+ ce = (struct changelog_entry_fields *) data;
+
+ if (encode) {
+ tmpbuf = uuid_utoa (ce->cef_uuid);
+ CHANGELOG_FILL_BUFFER (buffer, bufsz, tmpbuf, strlen (tmpbuf));
+ } else {
+ CHANGELOG_FILL_BUFFER (buffer, bufsz,
+ ce->cef_uuid, sizeof (uuid_t));
+ }
+
+ CHANGELOG_FILL_BUFFER (buffer, bufsz, "/", 1);
+ CHANGELOG_FILL_BUFFER (buffer, bufsz,
+ ce->cef_bname, strlen (ce->cef_bname));
+ CHANGELOG_FILL_BUFFER (buffer, bufsz, "\0", 1);
+
+ if (ce->cef_path[0] == '\0') {
+ CHANGELOG_FILL_BUFFER (buffer, bufsz, "\0", 1);
+ } else {
+ CHANGELOG_FILL_BUFFER (buffer, bufsz,
+ ce->cef_path, strlen (ce->cef_path));
+ }
+
+ return bufsz;
+}
+
+size_t
+fop_fn (void *data, char *buffer, gf_boolean_t encode)
+{
+ char buf[10] = {0,};
+ size_t bufsz = 0;
+ glusterfs_fop_t fop = 0;
+
+ fop = *(glusterfs_fop_t *) data;
+
+ if (encode) {
+ (void) snprintf (buf, sizeof (buf), "%d", fop);
+ CHANGELOG_FILL_BUFFER (buffer, bufsz, buf, strlen (buf));
+ } else
+ CHANGELOG_FILL_BUFFER (buffer, bufsz, &fop, sizeof (fop));
+
+ return bufsz;
+}
+
+size_t
+number_fn (void *data, char *buffer, gf_boolean_t encode)
+{
+ size_t bufsz = 0;
+ unsigned int nr = 0;
+ char buf[20] = {0,};
+
+ nr = *(unsigned int *) data;
+
+ if (encode) {
+ (void) snprintf (buf, sizeof (buf), "%u", nr);
+ CHANGELOG_FILL_BUFFER (buffer, bufsz, buf, strlen (buf));
+ } else
+ CHANGELOG_FILL_BUFFER (buffer, bufsz, &nr, sizeof (unsigned int));
+
+ return bufsz;
+}
+
+void
+entry_free_fn (void *data)
+{
+ changelog_opt_t *co = data;
+
+ if (!co)
+ return;
+
+ GF_FREE (co->co_entry.cef_bname);
+}
+
+void
+del_entry_free_fn (void *data)
+{
+ changelog_opt_t *co = data;
+
+ if (!co)
+ return;
+
+ GF_FREE (co->co_entry.cef_bname);
+ GF_FREE (co->co_entry.cef_path);
+}
+
+/**
+ * try to write all data in one shot
+ */
+
+static void
+changelog_encode_write_xtra (changelog_log_data_t *cld,
+ char *buffer, size_t *off, gf_boolean_t encode)
+{
+ int i = 0;
+ size_t offset = 0;
+ void *data = NULL;
+ changelog_opt_t *co = NULL;
+
+ offset = *off;
+
+ co = (changelog_opt_t *) cld->cld_ptr;
+
+ for (; i < cld->cld_xtra_records; i++, co++) {
+ CHANGELOG_FILL_BUFFER (buffer, offset, "\0", 1);
+
+ switch (co->co_type) {
+ case CHANGELOG_OPT_REC_FOP:
+ data = &co->co_fop;
+ break;
+ case CHANGELOG_OPT_REC_ENTRY:
+ data = &co->co_entry;
+ break;
+ case CHANGELOG_OPT_REC_UINT32:
+ data = &co->co_uint32;
+ break;
+ }
+
+ if (co->co_convert)
+ offset += co->co_convert (data,
+ buffer + offset, encode);
+ else /* no coversion: write it out as it is */
+ CHANGELOG_FILL_BUFFER (buffer, offset,
+ data, co->co_len);
+ }
+
+ *off = offset;
+}
+
+int
+changelog_encode_ascii (xlator_t *this, changelog_log_data_t *cld)
+{
+ size_t off = 0;
+ size_t gfid_len = 0;
+ char *gfid_str = NULL;
+ char *buffer = NULL;
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ gfid_str = uuid_utoa (cld->cld_gfid);
+ gfid_len = strlen (gfid_str);
+
+ /* extra bytes for decorations */
+ buffer = alloca (gfid_len + cld->cld_ptr_len + 10);
+ CHANGELOG_STORE_ASCII (priv, buffer,
+ off, gfid_str, gfid_len, cld);
+
+ if (cld->cld_xtra_records)
+ changelog_encode_write_xtra (cld, buffer, &off, _gf_true);
+
+ CHANGELOG_FILL_BUFFER (buffer, off, "\0", 1);
+
+ return changelog_write_change (priv, buffer, off);
+}
+
+int
+changelog_encode_binary (xlator_t *this, changelog_log_data_t *cld)
+{
+ size_t off = 0;
+ char *buffer = NULL;
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ /* extra bytes for decorations */
+ buffer = alloca (sizeof (uuid_t) + cld->cld_ptr_len + 10);
+ CHANGELOG_STORE_BINARY (priv, buffer, off, cld->cld_gfid, cld);
+
+ if (cld->cld_xtra_records)
+ changelog_encode_write_xtra (cld, buffer, &off, _gf_false);
+
+ CHANGELOG_FILL_BUFFER (buffer, off, "\0", 1);
+
+ return changelog_write_change (priv, buffer, off);
+}
+
+static struct changelog_encoder
+cb_encoder[] = {
+ [CHANGELOG_ENCODE_BINARY] =
+ {
+ .encoder = CHANGELOG_ENCODE_BINARY,
+ .encode = changelog_encode_binary,
+ },
+ [CHANGELOG_ENCODE_ASCII] =
+ {
+ .encoder = CHANGELOG_ENCODE_ASCII,
+ .encode = changelog_encode_ascii,
+ },
+};
+
+void
+changelog_encode_change(changelog_priv_t *priv)
+{
+ priv->ce = &cb_encoder[priv->encode_mode];
+}
diff --git a/xlators/features/changelog/src/changelog-encoders.h b/xlators/features/changelog/src/changelog-encoders.h
new file mode 100644
index 00000000000..d6a50cc9ef7
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-encoders.h
@@ -0,0 +1,52 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CHANGELOG_ENCODERS_H
+#define _CHANGELOG_ENCODERS_H
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include "changelog-helpers.h"
+
+#define CHANGELOG_STORE_ASCII(priv, buf, off, gfid, gfid_len, cld) do { \
+ CHANGELOG_FILL_BUFFER (buffer, off, \
+ priv->maps[cld->cld_type], 1); \
+ CHANGELOG_FILL_BUFFER (buffer, \
+ off, gfid, gfid_len); \
+ } while (0)
+
+#define CHANGELOG_STORE_BINARY(priv, buf, off, gfid, cld) do { \
+ CHANGELOG_FILL_BUFFER (buffer, off, \
+ priv->maps[cld->cld_type], 1); \
+ CHANGELOG_FILL_BUFFER (buffer, \
+ off, gfid, sizeof (uuid_t)); \
+ } while (0)
+
+size_t
+entry_fn (void *data, char *buffer, gf_boolean_t encode);
+size_t
+del_entry_fn (void *data, char *buffer, gf_boolean_t encode);
+size_t
+fop_fn (void *data, char *buffer, gf_boolean_t encode);
+size_t
+number_fn (void *data, char *buffer, gf_boolean_t encode);
+void
+entry_free_fn (void *data);
+void
+del_entry_free_fn (void *data);
+int
+changelog_encode_binary (xlator_t *, changelog_log_data_t *);
+int
+changelog_encode_ascii (xlator_t *, changelog_log_data_t *);
+void
+changelog_encode_change(changelog_priv_t *);
+
+#endif /* _CHANGELOG_ENCODERS_H */
diff --git a/xlators/features/changelog/src/changelog-ev-handle.c b/xlators/features/changelog/src/changelog-ev-handle.c
new file mode 100644
index 00000000000..77637c7beec
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-ev-handle.c
@@ -0,0 +1,398 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "changelog-ev-handle.h"
+#include "changelog-rpc-common.h"
+#include "changelog-helpers.h"
+
+struct rpc_clnt_program changelog_ev_program;
+
+#define NR_IOVEC (MAX_IOVEC - 3)
+struct ev_rpc_vec {
+ int count;
+ struct iovec vector[NR_IOVEC];
+
+ /* sequence number */
+ unsigned long seq;
+};
+
+struct ev_rpc {
+ rbuf_list_t *rlist;
+ struct rpc_clnt *rpc;
+ struct ev_rpc_vec vec;
+};
+
+/**
+ * As of now this just does the minimal (retval logging). Going further
+ * un-acknowledges sequence numbers can be retransmitted and other
+ * intelligence can be built into the server.
+ */
+int
+changelog_event_dispatch_cbk (struct rpc_req *req,
+ struct iovec *iov, int count, void *myframe)
+{
+ return 0;
+}
+
+/* dispatcher RPC */
+int
+changelog_dispatch_vec (call_frame_t *frame, xlator_t *this,
+ struct rpc_clnt *rpc, struct ev_rpc_vec *vec)
+{
+ struct timeval tv = {0,};
+ changelog_event_req req = {0,};
+
+ (void) gettimeofday (&tv, NULL);
+
+ /**
+ * Event dispatch RPC header contains a sequence number for each
+ * dispatch. This allows the reciever to order the request before
+ * processing.
+ */
+ req.seq = vec->seq;
+ req.tv_sec = tv.tv_sec;
+ req.tv_usec = tv.tv_usec;
+
+ return changelog_rpc_sumbit_req (rpc, (void *)&req,
+ frame, &changelog_ev_program,
+ CHANGELOG_REV_PROC_EVENT,
+ vec->vector, vec->count, NULL,
+ this, changelog_event_dispatch_cbk,
+ (xdrproc_t) xdr_changelog_event_req);
+ }
+
+ int
+ changelog_event_dispatch_rpc (call_frame_t *frame, xlator_t *this, void *data)
+ {
+ int idx = 0;
+ int count = 0;
+ int ret = 0;
+ unsigned long range = 0;
+ unsigned long sequence = 0;
+ rbuf_iovec_t *rvec = NULL;
+ struct ev_rpc *erpc = NULL;
+ struct rlist_iter riter = {{0,},};
+
+ /* dispatch NR_IOVEC IO vectors at a time. */
+
+ erpc = data;
+ RLIST_GET_SEQ (erpc->rlist, sequence, range);
+
+ rlist_iter_init (&riter, erpc->rlist);
+
+ rvec_for_each_entry (rvec, &riter) {
+ idx = count % NR_IOVEC;
+ if (++count == NR_IOVEC) {
+ erpc->vec.vector[idx] = rvec->iov;
+ erpc->vec.seq = sequence++;
+ erpc->vec.count = NR_IOVEC;
+
+ ret = changelog_dispatch_vec (frame, this,
+ erpc->rpc, &erpc->vec);
+ if (ret)
+ break;
+ count = 0;
+ continue;
+ }
+
+ erpc->vec.vector[idx] = rvec->iov;
+ }
+
+ if (ret)
+ goto error_return;
+
+ idx = count % NR_IOVEC;
+ if (idx) {
+ erpc->vec.seq = sequence;
+ erpc->vec.count = idx;
+
+ ret = changelog_dispatch_vec (frame, this,
+ erpc->rpc, &erpc->vec);
+ }
+
+ error_return:
+ return ret;
+}
+
+int
+changelog_rpc_notify (struct rpc_clnt *rpc,
+ void *mydata, rpc_clnt_event_t event, void *data)
+{
+ xlator_t *this = NULL;
+ changelog_rpc_clnt_t *crpc = NULL;
+ changelog_clnt_t *c_clnt = NULL;
+ changelog_priv_t *priv = NULL;
+ changelog_ev_selector_t *selection = NULL;
+
+ crpc = mydata;
+ this = crpc->this;
+ c_clnt = crpc->c_clnt;
+
+ priv = this->private;
+
+ switch (event) {
+ case RPC_CLNT_CONNECT:
+ rpc_clnt_set_connected (&rpc->conn);
+ selection = &priv->ev_selection;
+
+ LOCK (&c_clnt->wait_lock);
+ {
+ LOCK (&c_clnt->active_lock);
+ {
+ changelog_select_event (this, selection,
+ crpc->filter);
+ list_move_tail (&crpc->list, &c_clnt->active);
+ }
+ UNLOCK (&c_clnt->active_lock);
+ }
+ UNLOCK (&c_clnt->wait_lock);
+
+ break;
+ case RPC_CLNT_DISCONNECT:
+ rpc_clnt_disable (crpc->rpc);
+
+ /* rpc_clnt_disable doesn't unref the rpc. It just marks
+ * the rpc as disabled and cancels reconnection timer.
+ * Hence unref the rpc object to free it.
+ */
+ rpc_clnt_unref (crpc->rpc);
+
+ selection = &priv->ev_selection;
+
+ LOCK (&crpc->lock);
+ {
+ changelog_deselect_event (this, selection,
+ crpc->filter);
+ changelog_set_disconnect_flag (crpc, _gf_true);
+ }
+ UNLOCK (&crpc->lock);
+
+ break;
+ case RPC_CLNT_MSG:
+ case RPC_CLNT_DESTROY:
+ /* Free up mydata */
+ changelog_rpc_clnt_unref (crpc);
+ break;
+ }
+
+ return 0;
+}
+
+void *
+changelog_ev_connector (void *data)
+{
+ xlator_t *this = NULL;
+ changelog_clnt_t *c_clnt = NULL;
+ changelog_rpc_clnt_t *crpc = NULL;
+
+ c_clnt = data;
+ this = c_clnt->this;
+
+ while (1) {
+ pthread_mutex_lock (&c_clnt->pending_lock);
+ {
+ while (list_empty (&c_clnt->pending))
+ pthread_cond_wait (&c_clnt->pending_cond,
+ &c_clnt->pending_lock);
+ crpc = list_first_entry (&c_clnt->pending,
+ changelog_rpc_clnt_t, list);
+ crpc->rpc =
+ changelog_rpc_client_init (this, crpc,
+ crpc->sock,
+ changelog_rpc_notify);
+ if (!crpc->rpc) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_RPC_CONNECT_ERROR,
+ "failed to connect back.. <%s>",
+ crpc->sock);
+ crpc->cleanup (crpc);
+ goto mutex_unlock;
+ }
+
+ LOCK (&c_clnt->wait_lock);
+ {
+ list_move_tail (&crpc->list, &c_clnt->waitq);
+ }
+ UNLOCK (&c_clnt->wait_lock);
+ }
+ mutex_unlock:
+ pthread_mutex_unlock (&c_clnt->pending_lock);
+ }
+
+ return NULL;
+}
+
+void
+changelog_ev_cleanup_connections (xlator_t *this, changelog_clnt_t *c_clnt)
+{
+ int ret = 0;
+ changelog_rpc_clnt_t *crpc = NULL;
+
+ /* cleanup active connections */
+ LOCK (&c_clnt->active_lock);
+ {
+ list_for_each_entry (crpc, &c_clnt->active, list) {
+ rpc_clnt_disable (crpc->rpc);
+ }
+ }
+ UNLOCK (&c_clnt->active_lock);
+}
+
+/**
+ * TODO: granularize lock
+ *
+ * If we have multiple threads dispatching events, doing it this way is
+ * a performance bottleneck.
+ */
+
+static changelog_rpc_clnt_t *
+get_client (changelog_clnt_t *c_clnt, struct list_head **next)
+{
+ changelog_rpc_clnt_t *crpc = NULL;
+
+ LOCK (&c_clnt->active_lock);
+ {
+ if (*next == &c_clnt->active)
+ goto unblock;
+ crpc = list_entry (*next, changelog_rpc_clnt_t, list);
+ /* ref rpc as DISCONNECT might unref the rpc asynchronously */
+ changelog_rpc_clnt_ref (crpc);
+ rpc_clnt_ref (crpc->rpc);
+ *next = (*next)->next;
+ }
+ unblock:
+ UNLOCK (&c_clnt->active_lock);
+
+ return crpc;
+}
+
+static void
+put_client (changelog_clnt_t *c_clnt, changelog_rpc_clnt_t *crpc)
+{
+ LOCK (&c_clnt->active_lock);
+ {
+ rpc_clnt_unref (crpc->rpc);
+ changelog_rpc_clnt_unref (crpc);
+ }
+ UNLOCK (&c_clnt->active_lock);
+}
+
+void
+_dispatcher (rbuf_list_t *rlist, void *arg)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+ changelog_clnt_t *c_clnt = NULL;
+ changelog_rpc_clnt_t *crpc = NULL;
+ changelog_rpc_clnt_t *tmp = NULL;
+ struct ev_rpc erpc = {0,};
+ struct list_head *next = NULL;
+
+ c_clnt = arg;
+ this = c_clnt->this;
+
+ erpc.rlist = rlist;
+ next = c_clnt->active.next;
+
+ while (1) {
+ crpc = get_client (c_clnt, &next);
+ if (!crpc)
+ break;
+ erpc.rpc = crpc->rpc;
+ ret = changelog_invoke_rpc (this, crpc->rpc,
+ &changelog_ev_program,
+ CHANGELOG_REV_PROC_EVENT, &erpc);
+ put_client (c_clnt, crpc);
+ }
+}
+
+/** this is called under rotbuff's lock */
+void
+sequencer (rbuf_list_t *rlist, void *mydata)
+{
+ unsigned long range = 0;
+ changelog_clnt_t *c_clnt = 0;
+
+ c_clnt = mydata;
+
+ range = (RLIST_ENTRY_COUNT (rlist)) / NR_IOVEC;
+ if ((RLIST_ENTRY_COUNT (rlist)) % NR_IOVEC)
+ range++;
+ RLIST_STORE_SEQ (rlist, c_clnt->sequence, range);
+
+ c_clnt->sequence += range;
+}
+
+void *
+changelog_ev_dispatch (void *data)
+{
+ int ret = 0;
+ void *opaque = NULL;
+ xlator_t *this = NULL;
+ changelog_clnt_t *c_clnt = NULL;
+ struct timeval tv = {0,};
+
+ c_clnt = data;
+ this = c_clnt->this;
+
+ while (1) {
+ /* TODO: change this to be pthread cond based.. later */
+ tv.tv_sec = 1;
+ tv.tv_usec = 0;
+ select (0, NULL, NULL, NULL, &tv);
+
+ ret = rbuf_get_buffer (c_clnt->rbuf,
+ &opaque, sequencer, c_clnt);
+ if (ret != RBUF_CONSUMABLE) {
+ if (ret != RBUF_EMPTY)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ CHANGELOG_MSG_BUFFER_STARVATION_ERROR,
+ "Failed to get buffer for RPC dispatch "
+ "[rbuf retval: %d]", ret);
+ continue;
+ }
+
+ ret = rbuf_wait_for_completion (c_clnt->rbuf,
+ opaque, _dispatcher, c_clnt);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ CHANGELOG_MSG_PUT_BUFFER_FAILED,
+ "failed to put buffer after consumption");
+ }
+
+ return NULL;
+}
+
+void
+changelog_ev_queue_connection (changelog_clnt_t *c_clnt,
+ changelog_rpc_clnt_t *crpc)
+{
+ pthread_mutex_lock (&c_clnt->pending_lock);
+ {
+ list_add_tail (&crpc->list, &c_clnt->pending);
+ pthread_cond_signal (&c_clnt->pending_cond);
+ }
+ pthread_mutex_unlock (&c_clnt->pending_lock);
+}
+
+struct rpc_clnt_procedure changelog_ev_procs[CHANGELOG_REV_PROC_MAX] = {
+ [CHANGELOG_REV_PROC_NULL] = {"NULL", NULL},
+ [CHANGELOG_REV_PROC_EVENT] = {
+ "EVENT DISPATCH", changelog_event_dispatch_rpc
+ },
+};
+
+struct rpc_clnt_program changelog_ev_program = {
+ .progname = "CHANGELOG EVENT DISPATCHER",
+ .prognum = CHANGELOG_REV_RPC_PROCNUM,
+ .progver = CHANGELOG_REV_RPC_PROCVER,
+ .numproc = CHANGELOG_REV_PROC_MAX,
+ .proctable = changelog_ev_procs,
+};
diff --git a/xlators/features/changelog/src/changelog-ev-handle.h b/xlators/features/changelog/src/changelog-ev-handle.h
new file mode 100644
index 00000000000..eef0492a9ee
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-ev-handle.h
@@ -0,0 +1,140 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __CHANGELOG_EV_HANDLE_H
+#define __CHANGELOG_EV_HANDLE_H
+
+#include "list.h"
+#include "xlator.h"
+#include "rpc-clnt.h"
+
+#include "rot-buffs.h"
+
+struct changelog_clnt;
+
+typedef struct changelog_rpc_clnt {
+ xlator_t *this;
+
+ gf_lock_t lock;
+
+ unsigned long ref;
+ gf_boolean_t disconnected;
+
+ unsigned int filter;
+ char sock[UNIX_PATH_MAX];
+
+ struct changelog_clnt *c_clnt; /* back pointer to list holder */
+
+ struct rpc_clnt *rpc; /* RPC client endpoint */
+
+ struct list_head list; /* ->pending, ->waitq, ->active */
+
+ void (*cleanup)
+ (struct changelog_rpc_clnt *); /* cleanup handler */
+} changelog_rpc_clnt_t;
+
+static inline void
+changelog_rpc_clnt_ref (changelog_rpc_clnt_t *crpc)
+{
+ LOCK (&crpc->lock);
+ {
+ ++crpc->ref;
+ }
+ UNLOCK (&crpc->lock);
+}
+
+static inline void
+changelog_set_disconnect_flag (changelog_rpc_clnt_t *crpc, gf_boolean_t flag)
+{
+ crpc->disconnected = flag;
+}
+
+static inline int
+changelog_rpc_clnt_is_disconnected (changelog_rpc_clnt_t *crpc)
+{
+ return (crpc->disconnected == _gf_true);
+}
+
+static inline void
+changelog_rpc_clnt_unref (changelog_rpc_clnt_t *crpc)
+{
+ gf_boolean_t gone = _gf_false;
+
+ LOCK (&crpc->lock);
+ {
+ if (!(--crpc->ref)
+ && changelog_rpc_clnt_is_disconnected (crpc)) {
+ list_del (&crpc->list);
+ gone = _gf_true;
+ }
+ }
+ UNLOCK (&crpc->lock);
+
+ if (gone)
+ crpc->cleanup (crpc);
+}
+
+/**
+ * This structure holds pending and active clients. On probe RPC all
+ * an instance of the above structure (@changelog_rpc_clnt) is placed
+ * in ->pending and gets moved to ->active on a successful connect.
+ *
+ * locking rules:
+ *
+ * Manipulating ->pending
+ * ->pending_lock
+ * ->pending
+ *
+ * Manipulating ->active
+ * ->active_lock
+ * ->active
+ *
+ * Moving object from ->pending to ->active
+ * ->pending_lock
+ * ->active_lock
+ *
+ * Objects are _never_ moved from ->active to ->pending, i.e., during
+ * disconnection, the object is destroyed. Well, we could have tried
+ * to reconnect, but that's pure waste.. let the other end reconnect.
+ */
+
+typedef struct changelog_clnt {
+ xlator_t *this;
+
+ /* pending connections */
+ pthread_mutex_t pending_lock;
+ pthread_cond_t pending_cond;
+ struct list_head pending;
+
+ /* current active connections */
+ gf_lock_t active_lock;
+ struct list_head active;
+
+ gf_lock_t wait_lock;
+ struct list_head waitq;
+
+ /* consumer part of rot-buffs */
+ rbuf_t *rbuf;
+ unsigned long sequence;
+} changelog_clnt_t;
+
+void *changelog_ev_connector (void *);
+
+void *changelog_ev_dispatch (void *);
+
+/* APIs */
+void
+changelog_ev_queue_connection (changelog_clnt_t *, changelog_rpc_clnt_t *);
+
+void
+changelog_ev_cleanup_connections (xlator_t *, changelog_clnt_t *);
+
+#endif
+
diff --git a/xlators/features/changelog/src/changelog-helpers.c b/xlators/features/changelog/src/changelog-helpers.c
new file mode 100644
index 00000000000..0cb68587e57
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-helpers.c
@@ -0,0 +1,1979 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+#include "logging.h"
+#include "iobuf.h"
+#include "syscall.h"
+
+#include "changelog-helpers.h"
+#include "changelog-encoders.h"
+#include "changelog-mem-types.h"
+#include "changelog-messages.h"
+
+#include "changelog-encoders.h"
+#include "changelog-rpc-common.h"
+#include <pthread.h>
+
+static void
+changelog_cleanup_free_mutex (void *arg_mutex)
+{
+ pthread_mutex_t *p_mutex = (pthread_mutex_t*) arg_mutex;
+
+ if (p_mutex)
+ pthread_mutex_unlock(p_mutex);
+}
+
+int
+changelog_thread_cleanup (xlator_t *this, pthread_t thr_id)
+{
+ int ret = 0;
+ void *retval = NULL;
+
+ /* send a cancel request to the thread */
+ ret = pthread_cancel (thr_id);
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_PTHREAD_CANCEL_FAILED,
+ "could not cancel thread");
+ goto out;
+ }
+
+ ret = pthread_join (thr_id, &retval);
+ if ((ret != 0) || (retval != PTHREAD_CANCELED)) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_PTHREAD_CANCEL_FAILED,
+ "cancel request not adhered as expected");
+ }
+
+ out:
+ return ret;
+}
+
+void *
+changelog_get_usable_buffer (changelog_local_t *local)
+{
+ changelog_log_data_t *cld = NULL;
+
+ if (!local)
+ return NULL;
+
+ cld = &local->cld;
+ if (!cld->cld_iobuf)
+ return NULL;
+
+ return cld->cld_iobuf->ptr;
+}
+
+static int
+changelog_selector_index (unsigned int selector)
+{
+ return (ffs (selector) - 1);
+}
+
+int
+changelog_ev_selected (xlator_t *this,
+ changelog_ev_selector_t *selection,
+ unsigned int selector)
+{
+ int idx = 0;
+
+ idx = changelog_selector_index (selector);
+ gf_msg_debug (this->name, 0,
+ "selector ref count for %d (idx: %d): %d",
+ selector, idx, selection->ref[idx]);
+ /* this can be lockless */
+ return (idx < CHANGELOG_EV_SELECTION_RANGE
+ && (selection->ref[idx] > 0));
+}
+
+void
+changelog_select_event (xlator_t *this,
+ changelog_ev_selector_t *selection,
+ unsigned int selector)
+{
+ int idx = 0;
+
+ LOCK (&selection->reflock);
+ {
+ while (selector) {
+ idx = changelog_selector_index (selector);
+ if (idx < CHANGELOG_EV_SELECTION_RANGE) {
+ selection->ref[idx]++;
+ gf_msg_debug (this->name, 0,
+ "selecting event %d", idx);
+ }
+ selector &= ~(1 << idx);
+ }
+ }
+ UNLOCK (&selection->reflock);
+}
+
+void
+changelog_deselect_event (xlator_t *this,
+ changelog_ev_selector_t *selection,
+ unsigned int selector)
+{
+ int idx = 0;
+
+ LOCK (&selection->reflock);
+ {
+ while (selector) {
+ idx = changelog_selector_index (selector);
+ if (idx < CHANGELOG_EV_SELECTION_RANGE) {
+ selection->ref[idx]--;
+ gf_msg_debug (this->name, 0,
+ "de-selecting event %d", idx);
+ }
+ selector &= ~(1 << idx);
+ }
+ }
+ UNLOCK (&selection->reflock);
+}
+
+int
+changelog_init_event_selection (xlator_t *this,
+ changelog_ev_selector_t *selection)
+{
+ int ret = 0;
+ int j = CHANGELOG_EV_SELECTION_RANGE;
+
+ ret = LOCK_INIT (&selection->reflock);
+ if (ret != 0)
+ return -1;
+
+ LOCK (&selection->reflock);
+ {
+ while (j--) {
+ selection->ref[j] = 0;
+ }
+ }
+ UNLOCK (&selection->reflock);
+
+ return 0;
+}
+
+int
+changelog_cleanup_event_selection (xlator_t *this,
+ changelog_ev_selector_t *selection)
+{
+ int ret = 0;
+ int j = CHANGELOG_EV_SELECTION_RANGE;
+
+ LOCK (&selection->reflock);
+ {
+ while (j--) {
+ if (selection->ref[j] > 0)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ CHANGELOG_MSG_CLEANUP_ON_ACTIVE_REF,
+ "changelog event selection cleaning up "
+ " on active references");
+ }
+ }
+ UNLOCK (&selection->reflock);
+
+ return LOCK_DESTROY (&selection->reflock);
+}
+
+static void
+changelog_perform_dispatch (xlator_t *this,
+ changelog_priv_t *priv, void *mem, size_t size)
+{
+ char *buf = NULL;
+ void *opaque = NULL;
+
+ buf = rbuf_reserve_write_area (priv->rbuf, size, &opaque);
+ if (!buf) {
+ gf_msg_callingfn (this->name,
+ GF_LOG_WARNING, 0,
+ CHANGELOG_MSG_DISPATCH_EVENT_FAILED,
+ "failed to dispatch event");
+ return;
+ }
+
+ memcpy (buf, mem, size);
+ rbuf_write_complete (opaque);
+}
+
+void
+changelog_dispatch_event (xlator_t *this,
+ changelog_priv_t *priv, changelog_event_t *ev)
+{
+ changelog_ev_selector_t *selection = NULL;
+
+ selection = &priv->ev_selection;
+ if (changelog_ev_selected (this, selection, ev->ev_type)) {
+ changelog_perform_dispatch (this, priv, ev, CHANGELOG_EV_SIZE);
+ }
+}
+
+void
+changelog_set_usable_record_and_length (changelog_local_t *local,
+ size_t len, int xr)
+{
+ changelog_log_data_t *cld = NULL;
+
+ cld = &local->cld;
+
+ cld->cld_ptr_len = len;
+ cld->cld_xtra_records = xr;
+}
+
+void
+changelog_local_cleanup (xlator_t *xl, changelog_local_t *local)
+{
+ int i = 0;
+ changelog_opt_t *co = NULL;
+ changelog_log_data_t *cld = NULL;
+
+ if (!local)
+ return;
+
+ cld = &local->cld;
+
+ /* cleanup dynamic allocation for extra records */
+ if (cld->cld_xtra_records) {
+ co = (changelog_opt_t *) cld->cld_ptr;
+ for (; i < cld->cld_xtra_records; i++, co++)
+ if (co->co_free)
+ co->co_free (co);
+ }
+
+ CHANGELOG_IOBUF_UNREF (cld->cld_iobuf);
+
+ if (local->inode)
+ inode_unref (local->inode);
+
+ mem_put (local);
+}
+
+int
+changelog_write (int fd, char *buffer, size_t len)
+{
+ ssize_t size = 0;
+ size_t written = 0;
+
+ while (written < len) {
+ size = sys_write (fd,
+ buffer + written, len - written);
+ if (size <= 0)
+ break;
+
+ written += size;
+ }
+
+ return (written != len);
+}
+
+int
+htime_update (xlator_t *this,
+ changelog_priv_t *priv, unsigned long ts,
+ char * buffer)
+{
+ char changelog_path[PATH_MAX+1] = {0,};
+ int len = -1;
+ char x_value[25] = {0,};
+ /* time stamp(10) + : (1) + rolltime (12 ) + buffer (2) */
+ int ret = 0;
+
+ if (priv->htime_fd ==-1) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_HTIME_ERROR,
+ "Htime fd not available for updation");
+ ret = -1;
+ goto out;
+ }
+ strncpy (changelog_path, buffer, PATH_MAX);
+ len = strlen (changelog_path);
+ changelog_path[len] = '\0'; /* redundant */
+
+ if (changelog_write (priv->htime_fd, (void*) changelog_path, len+1 ) < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_HTIME_ERROR,
+ "Htime file content write failed");
+ ret =-1;
+ goto out;
+ }
+
+ snprintf (x_value, sizeof x_value, "%lu:%d",
+ ts, priv->rollover_count);
+
+ if (sys_fsetxattr (priv->htime_fd, HTIME_KEY, x_value,
+ strlen (x_value), XATTR_REPLACE)) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_HTIME_ERROR,
+ "Htime xattr updation failed with XATTR_REPLACE "
+ "Changelog: %s", changelog_path);
+
+ if (sys_fsetxattr (priv->htime_fd, HTIME_KEY, x_value,
+ strlen (x_value), 0)) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_HTIME_ERROR,
+ "Htime xattr updation failed "
+ "Changelog: %s", changelog_path);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ priv->rollover_count +=1;
+
+out:
+ return ret;
+}
+
+/*
+ * Description: Check if the changelog to rollover is empty or not.
+ * It is assumed that fd passed is already verified.
+ *
+ * Returns:
+ * 1 : If found empty, changed path from "CHANGELOG.<TS>" to "changelog.<TS>"
+ * 0 : If NOT empty, proceed usual.
+ */
+int
+cl_is_empty (xlator_t *this, int fd)
+{
+ int ret = -1;
+ size_t elen = 0;
+ int encoding = -1;
+ char buffer[1024] = {0,};
+ struct stat stbuf = {0,};
+ int major_version = -1;
+ int minor_version = -1;
+
+ ret = sys_fstat (fd, &stbuf);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_FSTAT_OP_FAILED,
+ "Could not stat (CHANGELOG)");
+ goto out;
+ }
+
+ ret = sys_lseek (fd, 0, SEEK_SET);
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_LSEEK_OP_FAILED,
+ "Could not lseek (CHANGELOG)");
+ goto out;
+ }
+
+ CHANGELOG_GET_HEADER_INFO (fd, buffer, 1024, encoding,
+ major_version, minor_version, elen);
+
+ if (elen == stbuf.st_size) {
+ ret = 1;
+ } else {
+ ret = 0;
+ }
+
+out:
+ return ret;
+}
+
+/*
+ * Description: Updates "CHANGELOG" to "changelog" for writing changelog path
+ * to htime file.
+ *
+ * Returns:
+ * 0 : Success
+ * -1 : Error
+ */
+int
+update_path (xlator_t *this, char *cl_path)
+{
+ char low_cl[] = "changelog";
+ char up_cl[] = "CHANGELOG";
+ char *found = NULL;
+ int iter = 0;
+ int ret = -1;
+
+ found = strstr(cl_path, up_cl);
+
+ if (found == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_LSEEK_OP_FAILED,
+ "Could not find CHANGELOG in changelog path");
+ goto out;
+ } else {
+ strncpy(found, low_cl, strlen(low_cl));
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+changelog_rollover_changelog (xlator_t *this,
+ changelog_priv_t *priv, unsigned long ts)
+{
+ int ret = -1;
+ int notify = 0;
+ int cl_empty_flag = 0;
+ char ofile[PATH_MAX] = {0,};
+ char nfile[PATH_MAX] = {0,};
+ changelog_event_t ev = {0,};
+
+ if (priv->changelog_fd != -1) {
+ ret = sys_fsync (priv->changelog_fd);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_FSYNC_OP_FAILED,
+ "fsync failed");
+ }
+ ret = cl_is_empty (this, priv->changelog_fd);
+ if (ret == 1) {
+ cl_empty_flag = 1;
+ } else if (ret == -1) {
+ /* Log error but proceed as usual */
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ CHANGELOG_MSG_DETECT_EMPTY_CHANGELOG_FAILED,
+ "Error detecting empty changelog");
+ }
+ sys_close (priv->changelog_fd);
+ priv->changelog_fd = -1;
+ }
+
+ (void) snprintf (ofile, PATH_MAX,
+ "%s/"CHANGELOG_FILE_NAME, priv->changelog_dir);
+ (void) snprintf (nfile, PATH_MAX,
+ "%s/"CHANGELOG_FILE_NAME".%lu",
+ priv->changelog_dir, ts);
+
+ if (cl_empty_flag == 1) {
+ ret = sys_unlink (ofile);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_UNLINK_OP_FAILED,
+ "error unlinking(empty cl) %s)",
+ ofile);
+ ret = 0; /* Error in unlinking empty changelog should
+ not break further changelog operation, so
+ reset return value to 0*/
+ }
+ } else {
+ ret = sys_rename (ofile, nfile);
+
+ if (ret && (errno == ENOENT)) {
+ ret = 0;
+ goto out;
+ }
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_RENAME_ERROR,
+ "error renaming %s -> %s",
+ ofile, nfile);
+ }
+ }
+
+ if (!ret && (cl_empty_flag == 0)) {
+ notify = 1;
+ }
+
+ if (!ret) {
+ if (cl_empty_flag) {
+ update_path (this, nfile);
+ }
+ ret = htime_update (this, priv, ts, nfile);
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_ERROR,
+ 0, CHANGELOG_MSG_HTIME_ERROR,
+ "could not update htime file");
+ goto out;
+ }
+ }
+
+ if (notify) {
+ ev.ev_type = CHANGELOG_OP_TYPE_JOURNAL;
+ memcpy (ev.u.journal.path, nfile, strlen (nfile) + 1);
+ changelog_dispatch_event (this, priv, &ev);
+ }
+ out:
+ /* If this is explicit rollover initiated by snapshot,
+ * wakeup reconfigure thread waiting for changelog to
+ * rollover. This should happen even in failure cases as
+ * well otherwise snapshot will timeout and fail. Hence
+ * moved under out.
+ */
+ if (priv->explicit_rollover) {
+ priv->explicit_rollover = _gf_false;
+
+ pthread_mutex_lock (&priv->bn.bnotify_mutex);
+ {
+ if (ret) {
+ priv->bn.bnotify_error = _gf_true;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_EXPLICIT_ROLLOVER_FAILED,
+ "Fail snapshot because of "
+ "previous errors");
+ } else {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ CHANGELOG_MSG_BNOTIFY_INFO, "Explicit "
+ "rollover changelog: %s signaling "
+ "bnotify", nfile);
+ }
+ priv->bn.bnotify = _gf_false;
+ pthread_cond_signal (&priv->bn.bnotify_cond);
+ }
+ pthread_mutex_unlock (&priv->bn.bnotify_mutex);
+ }
+ return ret;
+}
+
+int
+filter_cur_par_dirs (const struct dirent *entry)
+{
+ if (entry == NULL)
+ return 0;
+
+ if ((strcmp(entry->d_name, ".") == 0) ||
+ (strcmp(entry->d_name, "..") == 0))
+ return 0;
+ else
+ return 1;
+}
+
+/*
+ * find_current_htime:
+ * It finds the latest htime file and sets the HTIME_CURRENT
+ * xattr.
+ * RETURN VALUE:
+ * -1 : Error
+ * ret: Number of directory entries;
+ */
+
+int
+find_current_htime (int ht_dir_fd, const char *ht_dir_path, char *ht_file_bname)
+{
+ struct dirent **namelist = NULL;
+ int ret = 0;
+ int cnt = 0;
+ int i = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (ht_dir_path);
+
+ cnt = scandir (ht_dir_path, &namelist, filter_cur_par_dirs, alphasort);
+ if (cnt < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_SCAN_DIR_FAILED,
+ "scandir failed");
+ } else if (cnt > 0) {
+ strncpy (ht_file_bname, namelist[cnt - 1]->d_name, NAME_MAX);
+ ht_file_bname[NAME_MAX - 1] = 0;
+
+ if (sys_fsetxattr (ht_dir_fd, HTIME_CURRENT, ht_file_bname,
+ strlen (ht_file_bname), 0)) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_FSETXATTR_FAILED,
+ "fsetxattr failed: HTIME_CURRENT");
+ ret = -1;
+ goto out;
+ }
+
+ if (sys_fsync (ht_dir_fd) < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_FSYNC_OP_FAILED,
+ "fsync failed");
+ ret = -1;
+ goto out;
+ }
+ }
+
+ out:
+ for (i = 0; i < cnt; i++)
+ free (namelist[i]);
+ free (namelist);
+
+ if (ret)
+ cnt = ret;
+
+ return cnt;
+}
+
+/* Returns 0 on successful open of htime file
+ * returns -1 on failure or error
+ */
+int
+htime_open (xlator_t *this,
+ changelog_priv_t *priv, unsigned long ts)
+{
+ int ht_file_fd = -1;
+ int ht_dir_fd = -1;
+ int ret = 0;
+ int cnt = 0;
+ char ht_dir_path[PATH_MAX] = {0,};
+ char ht_file_path[PATH_MAX] = {0,};
+ char ht_file_bname[NAME_MAX] = {0,};
+ char x_value[NAME_MAX] = {0,};
+ int flags = 0;
+ unsigned long min_ts = 0;
+ unsigned long max_ts = 0;
+ unsigned long total = 0;
+ ssize_t size = 0;
+
+ CHANGELOG_FILL_HTIME_DIR(priv->changelog_dir, ht_dir_path);
+
+ /* Open htime directory to get HTIME_CURRENT */
+ ht_dir_fd = open (ht_dir_path, O_RDONLY);
+ if (ht_dir_fd == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_OPEN_FAILED, "open failed: %s",
+ ht_dir_path);
+ ret = -1;
+ goto out;
+ }
+
+ size = sys_fgetxattr (ht_dir_fd, HTIME_CURRENT, ht_file_bname,
+ sizeof (ht_file_bname));
+ if (size < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_FGETXATTR_FAILED, "Error extracting"
+ " HTIME_CURRENT.");
+
+ /* If upgrade scenario, find the latest HTIME.TSTAMP file
+ * and use the same. If error, create a new HTIME.TSTAMP
+ * file.
+ */
+ cnt = find_current_htime (ht_dir_fd, ht_dir_path,
+ ht_file_bname);
+ if (cnt <= 0) {
+ gf_msg (this->name, GF_LOG_INFO, errno,
+ CHANGELOG_MSG_HTIME_INFO,
+ "HTIME_CURRENT not found. Changelog enabled"
+ " before init");
+ return htime_create (this, priv, ts);
+ }
+
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_HTIME_ERROR, "Error extracting"
+ " HTIME_CURRENT.");
+ }
+
+ gf_msg (this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_HTIME_INFO,
+ "HTIME_CURRENT: %s", ht_file_bname);
+ (void) snprintf (ht_file_path, PATH_MAX, "%s/%s",
+ ht_dir_path, ht_file_bname);
+
+ /* Open in append mode as existing htime file is used */
+ flags |= (O_RDWR | O_SYNC | O_APPEND);
+ ht_file_fd = open (ht_file_path, flags,
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
+ if (ht_file_fd < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_OPEN_FAILED,
+ "unable to open htime file: %s",
+ ht_file_path);
+ ret = -1;
+ goto out;
+ }
+
+ /* save this htime_fd in priv->htime_fd */
+ priv->htime_fd = ht_file_fd;
+
+ /* Initialize rollover-number in priv to current number */
+ size = sys_fgetxattr (ht_file_fd, HTIME_KEY, x_value, sizeof (x_value));
+ if (size < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_FGETXATTR_FAILED, "error extracting max"
+ " timstamp from htime file %s",
+ ht_file_path);
+ ret = -1;
+ goto out;
+ }
+
+ sscanf (x_value, "%lu:%lu", &max_ts, &total);
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ CHANGELOG_MSG_TOTAL_LOG_INFO,
+ "INIT CASE: MIN: %lu, MAX: %lu,"
+ " TOTAL CHANGELOGS: %lu", min_ts, max_ts, total);
+ priv->rollover_count = total + 1;
+
+out:
+ if (ht_dir_fd != -1)
+ sys_close (ht_dir_fd);
+ return ret;
+}
+
+/* Returns 0 on successful creation of htime file
+ * returns -1 on failure or error
+ */
+int
+htime_create (xlator_t *this,
+ changelog_priv_t *priv, unsigned long ts)
+{
+ int ht_file_fd = -1;
+ int ht_dir_fd = -1;
+ int ret = 0;
+ char ht_dir_path[PATH_MAX] = {0,};
+ char ht_file_path[PATH_MAX] = {0,};
+ char ht_file_bname[NAME_MAX + 1] = {0,};
+ int flags = 0;
+
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ CHANGELOG_MSG_HTIME_INFO, "Changelog enable: Creating new "
+ "HTIME.%lu file", ts);
+
+ CHANGELOG_FILL_HTIME_DIR(priv->changelog_dir, ht_dir_path);
+
+ /* get the htime file name in ht_file_path */
+ (void) snprintf (ht_file_path,PATH_MAX,"%s/%s.%lu",ht_dir_path,
+ HTIME_FILE_NAME, ts);
+
+ flags |= (O_CREAT | O_RDWR | O_SYNC);
+ ht_file_fd = open (ht_file_path, flags,
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
+ if (ht_file_fd < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_OPEN_FAILED,
+ "unable to create htime file: %s",
+ ht_file_path);
+ ret = -1;
+ goto out;
+ }
+
+ if (sys_fsetxattr (ht_file_fd, HTIME_KEY, HTIME_INITIAL_VALUE,
+ sizeof (HTIME_INITIAL_VALUE)-1, 0)) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_FSETXATTR_FAILED,
+ "Htime xattr initialization failed");
+ ret = -1;
+ goto out;
+ }
+
+ ret = sys_fsync (ht_file_fd);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_FSYNC_OP_FAILED,
+ "fsync failed");
+ goto out;
+ }
+
+ /* Set xattr HTIME_CURRENT on htime directory to htime filename */
+ ht_dir_fd = open (ht_dir_path, O_RDONLY);
+ if (ht_dir_fd == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_OPEN_FAILED, "open of %s failed",
+ ht_dir_path);
+ ret = -1;
+ goto out;
+ }
+
+ (void) snprintf (ht_file_bname, sizeof (ht_file_bname), "%s.%lu",
+ HTIME_FILE_NAME, ts);
+ if (sys_fsetxattr (ht_dir_fd, HTIME_CURRENT, ht_file_bname,
+ strlen (ht_file_bname), 0)) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_FSETXATTR_FAILED, "fsetxattr failed:"
+ " HTIME_CURRENT");
+ ret = -1;
+ goto out;
+ }
+
+ ret = sys_fsync (ht_dir_fd);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_FSYNC_OP_FAILED,
+ "fsync failed");
+ goto out;
+ }
+
+ /* save this htime_fd in priv->htime_fd */
+ priv->htime_fd = ht_file_fd;
+ /* initialize rollover-number in priv to 1 */
+ priv->rollover_count = 1;
+
+out:
+ if (ht_dir_fd != -1)
+ sys_close (ht_dir_fd);
+ return ret;
+}
+
+/* Description:
+ * Opens the snap changelog to log call path fops in it.
+ * This changelos name is "CHANGELOG.SNAP", stored in
+ * path ".glusterfs/changelogs/csnap".
+ * Returns:
+ * 0 : On success.
+ * -1 : On failure.
+ */
+int
+changelog_snap_open (xlator_t *this,
+ changelog_priv_t *priv)
+{
+ int fd = -1;
+ int ret = 0;
+ int flags = 0;
+ char buffer[1024] = {0,};
+ char c_snap_path[PATH_MAX] = {0,};
+ char csnap_dir_path[PATH_MAX] = {0,};
+
+ CHANGELOG_FILL_CSNAP_DIR(priv->changelog_dir, csnap_dir_path);
+
+ (void) snprintf (c_snap_path, PATH_MAX,
+ "%s/"CSNAP_FILE_NAME,
+ csnap_dir_path);
+
+ flags |= (O_CREAT | O_RDWR | O_TRUNC);
+
+ fd = open (c_snap_path, flags,
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
+ if (fd < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_OPEN_FAILED, "unable to open %s file ",
+ c_snap_path);
+ ret = -1;
+ goto out;
+ }
+ priv->c_snap_fd = fd;
+
+ (void) snprintf (buffer, 1024, CHANGELOG_HEADER,
+ CHANGELOG_VERSION_MAJOR,
+ CHANGELOG_VERSION_MINOR,
+ priv->ce->encoder);
+ ret = changelog_snap_write_change (priv, buffer, strlen (buffer));
+ if (ret < 0) {
+ sys_close (priv->c_snap_fd);
+ priv->c_snap_fd = -1;
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+/*
+ * Description:
+ * Starts logging fop details in CSNAP journal.
+ * Returns:
+ * 0 : On success.
+ * -1 : On Failure.
+ */
+int
+changelog_snap_logging_start (xlator_t *this,
+ changelog_priv_t *priv)
+{
+ int ret = 0;
+
+ ret = changelog_snap_open (this, priv);
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ CHANGELOG_MSG_SNAP_INFO,
+ "Now starting to log in call path");
+
+ return ret;
+}
+
+/*
+ * Description:
+ * Stops logging fop details in CSNAP journal.
+ * Returns:
+ * 0 : On success.
+ * -1 : On Failure.
+ */
+int
+changelog_snap_logging_stop (xlator_t *this,
+ changelog_priv_t *priv)
+{
+ int ret = 0;
+
+ sys_close (priv->c_snap_fd);
+ priv->c_snap_fd = -1;
+
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ CHANGELOG_MSG_SNAP_INFO,
+ "Stopped to log in call path");
+
+ return ret;
+}
+
+int
+changelog_open_journal (xlator_t *this,
+ changelog_priv_t *priv)
+{
+ int fd = 0;
+ int ret = -1;
+ int flags = 0;
+ char buffer[1024] = {0,};
+ char changelog_path[PATH_MAX] = {0,};
+
+ (void) snprintf (changelog_path, PATH_MAX,
+ "%s/"CHANGELOG_FILE_NAME,
+ priv->changelog_dir);
+
+ flags |= (O_CREAT | O_RDWR);
+ if (priv->fsync_interval == 0)
+ flags |= O_SYNC;
+
+ fd = open (changelog_path, flags,
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
+ if (fd < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_OPEN_FAILED,
+ "unable to open/create changelog file %s."
+ " change-logging will be"
+ " inactive", changelog_path);
+ goto out;
+ }
+
+ priv->changelog_fd = fd;
+
+ (void) snprintf (buffer, 1024, CHANGELOG_HEADER,
+ CHANGELOG_VERSION_MAJOR,
+ CHANGELOG_VERSION_MINOR,
+ priv->ce->encoder);
+ ret = changelog_write_change (priv, buffer, strlen (buffer));
+ if (ret) {
+ sys_close (priv->changelog_fd);
+ priv->changelog_fd = -1;
+ goto out;
+ }
+
+ ret = 0;
+
+ out:
+ return ret;
+}
+
+int
+changelog_start_next_change (xlator_t *this,
+ changelog_priv_t *priv,
+ unsigned long ts, gf_boolean_t finale)
+{
+ int ret = -1;
+
+ ret = changelog_rollover_changelog (this, priv, ts);
+
+ if (!ret && !finale)
+ ret = changelog_open_journal (this, priv);
+
+ return ret;
+}
+
+/**
+ * return the length of entry
+ */
+size_t
+changelog_entry_length ()
+{
+ return sizeof (changelog_log_data_t);
+}
+
+int
+changelog_fill_rollover_data (changelog_log_data_t *cld, gf_boolean_t is_last)
+{
+ struct timeval tv = {0,};
+
+ cld->cld_type = CHANGELOG_TYPE_ROLLOVER;
+
+ if (gettimeofday (&tv, NULL))
+ return -1;
+
+ cld->cld_roll_time = (unsigned long) tv.tv_sec;
+ cld->cld_finale = is_last;
+ return 0;
+}
+
+int
+changelog_snap_write_change (changelog_priv_t *priv, char *buffer, size_t len)
+{
+ return changelog_write (priv->c_snap_fd, buffer, len);
+}
+
+int
+changelog_write_change (changelog_priv_t *priv, char *buffer, size_t len)
+{
+ return changelog_write (priv->changelog_fd, buffer, len);
+}
+
+/*
+ * Descriptions:
+ * Writes fop details in ascii format to CSNAP.
+ * Issues:
+ * Not Encoding agnostic.
+ * Returns:
+ * 0 : On Success.
+ * -1 : On Failure.
+ */
+int
+changelog_snap_handle_ascii_change (xlator_t *this,
+ changelog_log_data_t *cld)
+{
+ size_t off = 0;
+ size_t gfid_len = 0;
+ char *gfid_str = NULL;
+ char *buffer = NULL;
+ changelog_priv_t *priv = NULL;
+ int ret = 0;
+
+ if (this == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ priv = this->private;
+
+ if (priv == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ gfid_str = uuid_utoa (cld->cld_gfid);
+ gfid_len = strlen (gfid_str);
+
+ /* extra bytes for decorations */
+ buffer = alloca (gfid_len + cld->cld_ptr_len + 10);
+ CHANGELOG_STORE_ASCII (priv, buffer,
+ off, gfid_str, gfid_len, cld);
+
+ CHANGELOG_FILL_BUFFER (buffer, off, "\0", 1);
+
+ ret = changelog_snap_write_change (priv, buffer, off);
+
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_WRITE_FAILED,
+ "error writing csnap to disk");
+ }
+ gf_msg (this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_SNAP_INFO,
+ "Successfully wrote to csnap");
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+changelog_handle_change (xlator_t *this,
+ changelog_priv_t *priv, changelog_log_data_t *cld)
+{
+ int ret = 0;
+
+ if (CHANGELOG_TYPE_IS_ROLLOVER (cld->cld_type)) {
+ changelog_encode_change (priv);
+ ret = changelog_start_next_change (this, priv,
+ cld->cld_roll_time,
+ cld->cld_finale);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_GET_TIME_OP_FAILED,
+ "Problem rolling over changelog(s)");
+ goto out;
+ }
+
+ /**
+ * case when there is reconfigure done (disabling changelog) and there
+ * are still fops that have updates in prgress.
+ */
+ if (priv->changelog_fd == -1)
+ return 0;
+
+ if (CHANGELOG_TYPE_IS_FSYNC (cld->cld_type)) {
+ ret = sys_fsync (priv->changelog_fd);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_FSYNC_OP_FAILED,
+ "fsync failed");
+ }
+ goto out;
+ }
+
+ ret = priv->ce->encode (this, cld);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_WRITE_FAILED,
+ "error writing changelog to disk");
+ }
+
+ out:
+ return ret;
+}
+
+changelog_local_t *
+changelog_local_init (xlator_t *this, inode_t *inode,
+ uuid_t gfid, int xtra_records,
+ gf_boolean_t update_flag)
+{
+ changelog_local_t *local = NULL;
+ struct iobuf *iobuf = NULL;
+
+ /**
+ * We relax the presence of inode if @update_flag is true.
+ * The caller (implmentation of the fop) needs to be careful to
+ * not blindly use local->inode.
+ */
+ if (!update_flag && !inode) {
+ gf_msg_callingfn (this->name, GF_LOG_WARNING, 0,
+ CHANGELOG_MSG_INODE_NOT_FOUND,
+ "inode needed for version checking !!!");
+ goto out;
+ }
+
+ if (xtra_records) {
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool,
+ xtra_records * CHANGELOG_OPT_RECORD_LEN);
+ if (!iobuf)
+ goto out;
+ }
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ CHANGELOG_IOBUF_UNREF (iobuf);
+ goto out;
+ }
+
+ local->update_no_check = update_flag;
+
+ gf_uuid_copy (local->cld.cld_gfid, gfid);
+
+ local->cld.cld_iobuf = iobuf;
+ local->cld.cld_xtra_records = 0; /* set by the caller */
+
+ if (inode)
+ local->inode = inode_ref (inode);
+
+ out:
+ return local;
+}
+
+int
+changelog_forget (xlator_t *this, inode_t *inode)
+{
+ uint64_t ctx_addr = 0;
+ changelog_inode_ctx_t *ctx = NULL;
+
+ inode_ctx_del (inode, this, &ctx_addr);
+ if (!ctx_addr)
+ return 0;
+
+ ctx = (changelog_inode_ctx_t *) (long) ctx_addr;
+ GF_FREE (ctx);
+
+ return 0;
+}
+
+int
+changelog_inject_single_event (xlator_t *this,
+ changelog_priv_t *priv,
+ changelog_log_data_t *cld)
+{
+ return priv->cd.dispatchfn (this, priv, priv->cd.cd_data, cld, NULL);
+}
+
+/* Wait till all the black fops are drained */
+void
+changelog_drain_black_fops (xlator_t *this, changelog_priv_t *priv)
+{
+ int ret = 0;
+
+ /* clean up framework of pthread_mutex is required here as
+ * 'reconfigure' terminates the changelog_rollover thread
+ * on graph change.
+ */
+ pthread_cleanup_push (changelog_cleanup_free_mutex,
+ &priv->dm.drain_black_mutex);
+ ret = pthread_mutex_lock (&priv->dm.drain_black_mutex);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_PTHREAD_ERROR, "pthread error:"
+ " Error:%d", ret);
+ while (priv->dm.black_fop_cnt > 0) {
+ gf_msg_debug (this->name, 0,
+ "Condtional wait on black fops: %ld",
+ priv->dm.black_fop_cnt);
+ priv->dm.drain_wait_black = _gf_true;
+ ret = pthread_cond_wait (&priv->dm.drain_black_cond,
+ &priv->dm.drain_black_mutex);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_PTHREAD_COND_WAIT_FAILED,
+ "pthread cond wait failed: Error:%d",
+ ret);
+ }
+ priv->dm.drain_wait_black = _gf_false;
+ ret = pthread_mutex_unlock (&priv->dm.drain_black_mutex);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_PTHREAD_ERROR, "pthread error:"
+ " Error:%d", ret);
+ pthread_cleanup_pop (0);
+ gf_msg_debug (this->name, 0,
+ "Woke up: Conditional wait on black fops");
+}
+
+/* Wait till all the white fops are drained */
+void
+changelog_drain_white_fops (xlator_t *this, changelog_priv_t *priv)
+{
+ int ret = 0;
+
+ /* clean up framework of pthread_mutex is required here as
+ * 'reconfigure' terminates the changelog_rollover thread
+ * on graph change.
+ */
+ pthread_cleanup_push (changelog_cleanup_free_mutex,
+ &priv->dm.drain_white_mutex);
+ ret = pthread_mutex_lock (&priv->dm.drain_white_mutex);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_PTHREAD_ERROR, "pthread error:"
+ " Error:%d", ret);
+ while (priv->dm.white_fop_cnt > 0) {
+ gf_msg_debug (this->name, 0,
+ "Condtional wait on white fops : %ld",
+ priv->dm.white_fop_cnt);
+ priv->dm.drain_wait_white = _gf_true;
+ ret = pthread_cond_wait (&priv->dm.drain_white_cond,
+ &priv->dm.drain_white_mutex);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_PTHREAD_COND_WAIT_FAILED,
+ "pthread cond wait failed: Error:%d",
+ ret);
+ }
+ priv->dm.drain_wait_white = _gf_false;
+ ret = pthread_mutex_unlock (&priv->dm.drain_white_mutex);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_PTHREAD_ERROR, "pthread error:"
+ " Error:%d", ret);
+ pthread_cleanup_pop (0);
+ gf_msg_debug (this->name, 0,
+ "Woke up: Conditional wait on white fops");
+}
+
+/**
+ * TODO: these threads have many thing in common (wake up after
+ * a certain time etc..). move them into separate routine.
+ */
+void *
+changelog_rollover (void *data)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+ struct timespec tv = {0,};
+ changelog_log_data_t cld = {0,};
+ changelog_time_slice_t *slice = NULL;
+ changelog_priv_t *priv = data;
+
+ this = priv->cr.this;
+ slice = &priv->slice;
+
+ while (1) {
+ (void) pthread_testcancel();
+
+ tv.tv_sec = time (NULL) + priv->rollover_time;
+ tv.tv_nsec = 0;
+ ret = 0; /* Reset ret to zero */
+
+ /* The race between actual rollover and explicit rollover is
+ * handled. If actual rollover is being done and the
+ * explicit rollover event comes, the event is not missed.
+ * Since explicit rollover sets 'cr.notify' to true, this
+ * thread doesn't wait on 'pthread_cond_timedwait'.
+ */
+ pthread_cleanup_push (changelog_cleanup_free_mutex,
+ &priv->cr.lock);
+ pthread_mutex_lock (&priv->cr.lock);
+ {
+ while (ret == 0 && !priv->cr.notify)
+ ret = pthread_cond_timedwait (&priv->cr.cond,
+ &priv->cr.lock,
+ &tv);
+ if (ret == 0)
+ priv->cr.notify = _gf_false;
+ }
+ pthread_mutex_unlock (&priv->cr.lock);
+ pthread_cleanup_pop (0);
+
+ if (ret == 0) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ CHANGELOG_MSG_BARRIER_INFO,
+ "Explicit wakeup on barrier notify");
+ priv->explicit_rollover = _gf_true;
+ } else if (ret && ret != ETIMEDOUT) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_SELECT_FAILED,
+ "pthread_cond_timedwait failed");
+ continue;
+ } else if (ret && ret == ETIMEDOUT) {
+ gf_msg_debug (this->name, 0, "Wokeup on timeout");
+ }
+
+ /* Reading curent_color without lock is fine here
+ * as it is only modified here and is next to reading.
+ */
+ if (priv->current_color == FOP_COLOR_BLACK) {
+ LOCK(&priv->lock);
+ priv->current_color = FOP_COLOR_WHITE;
+ UNLOCK(&priv->lock);
+ gf_msg_debug (this->name, 0, "Black fops"
+ " to be drained:%ld",
+ priv->dm.black_fop_cnt);
+ changelog_drain_black_fops (this, priv);
+ } else {
+ LOCK(&priv->lock);
+ priv->current_color = FOP_COLOR_BLACK;
+ UNLOCK(&priv->lock);
+ gf_msg_debug (this->name, 0, "White fops"
+ " to be drained:%ld",
+ priv->dm.white_fop_cnt);
+ changelog_drain_white_fops (this, priv);
+ }
+
+ /* Adding delay of 1 second only during explicit rollover:
+ *
+ * Changelog rollover can happen either due to actual
+ * or the explict rollover during snapshot. Actual
+ * rollover is controlled by tuneable called 'rollover-time'.
+ * The minimum granularity for rollover-time is 1 second.
+ * Explicit rollover is asynchronous in nature and happens
+ * during snapshot.
+ *
+ * Basically, rollover renames the current CHANGELOG file
+ * to CHANGELOG.TIMESTAMP. Let's assume, at time 't1',
+ * actual and explicit rollover raced against each
+ * other and actual rollover won the race renaming the
+ * CHANGELOG file to CHANGELOG.t1 and opens a new
+ * CHANGELOG file. There is high chance that, an immediate
+ * explicit rollover at time 't1' can happen with in the same
+ * second to rename CHANGELOG file to CHANGELOG.t1 resulting in
+ * purging the earlier CHANGELOG.t1 file created by actual
+ * rollover. So adding a delay of 1 second guarantees unique
+ * CHANGELOG.TIMESTAMP during explicit rollover.
+ */
+ if (priv->explicit_rollover == _gf_true)
+ sleep (1);
+
+ ret = changelog_fill_rollover_data (&cld, _gf_false);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_GET_TIME_OP_FAILED,
+ "failed to fill rollover data");
+ continue;
+ }
+
+ _mask_cancellation ();
+
+ LOCK (&priv->lock);
+ {
+ ret = changelog_inject_single_event (this, priv, &cld);
+ if (!ret)
+ SLICE_VERSION_UPDATE (slice);
+ }
+ UNLOCK (&priv->lock);
+
+ _unmask_cancellation ();
+ }
+
+ return NULL;
+}
+
+void *
+changelog_fsync_thread (void *data)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+ struct timeval tv = {0,};
+ changelog_log_data_t cld = {0,};
+ changelog_priv_t *priv = data;
+
+ this = priv->cf.this;
+ cld.cld_type = CHANGELOG_TYPE_FSYNC;
+
+ while (1) {
+ (void) pthread_testcancel();
+
+ tv.tv_sec = priv->fsync_interval;
+ tv.tv_usec = 0;
+
+ ret = select (0, NULL, NULL, NULL, &tv);
+ if (ret)
+ continue;
+
+ _mask_cancellation ();
+
+ ret = changelog_inject_single_event (this, priv, &cld);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_INJECT_FSYNC_FAILED,
+ "failed to inject fsync event");
+
+ _unmask_cancellation ();
+ }
+
+ return NULL;
+}
+
+/* macros for inode/changelog version checks */
+
+#define INODE_VERSION_UPDATE(priv, inode, iver, slice, type) do { \
+ LOCK (&inode->lock); \
+ { \
+ LOCK (&priv->lock); \
+ { \
+ *iver = slice->changelog_version[type]; \
+ } \
+ UNLOCK (&priv->lock); \
+ } \
+ UNLOCK (&inode->lock); \
+ } while (0)
+
+#define INODE_VERSION_EQUALS_SLICE(priv, ver, slice, type, upd) do { \
+ LOCK (&priv->lock); \
+ { \
+ upd = (ver == slice->changelog_version[type]) \
+ ? _gf_false : _gf_true; \
+ } \
+ UNLOCK (&priv->lock); \
+ } while (0)
+
+static int
+__changelog_inode_ctx_set (xlator_t *this,
+ inode_t *inode, changelog_inode_ctx_t *ctx)
+{
+ uint64_t ctx_addr = (uint64_t) ctx;
+ return __inode_ctx_set (inode, this, &ctx_addr);
+}
+
+/**
+ * one shot routine to get the address and the value of a inode version
+ * for a particular type.
+ */
+changelog_inode_ctx_t *
+__changelog_inode_ctx_get (xlator_t *this,
+ inode_t *inode, unsigned long **iver,
+ unsigned long *version, changelog_log_type type)
+{
+ int ret = 0;
+ uint64_t ctx_addr = 0;
+ changelog_inode_ctx_t *ctx = NULL;
+
+ ret = __inode_ctx_get (inode, this, &ctx_addr);
+ if (ret < 0)
+ ctx_addr = 0;
+ if (ctx_addr != 0) {
+ ctx = (changelog_inode_ctx_t *) (long)ctx_addr;
+ goto out;
+ }
+
+ ctx = GF_CALLOC (1, sizeof (*ctx), gf_changelog_mt_inode_ctx_t);
+ if (!ctx)
+ goto out;
+
+ ret = __changelog_inode_ctx_set (this, inode, ctx);
+ if (ret) {
+ GF_FREE (ctx);
+ ctx = NULL;
+ }
+
+ out:
+ if (ctx && iver && version) {
+ *iver = CHANGELOG_INODE_VERSION_TYPE (ctx, type);
+ *version = **iver;
+ }
+
+ return ctx;
+}
+
+static changelog_inode_ctx_t *
+changelog_inode_ctx_get (xlator_t *this,
+ inode_t *inode, unsigned long **iver,
+ unsigned long *version, changelog_log_type type)
+{
+ changelog_inode_ctx_t *ctx = NULL;
+
+ LOCK (&inode->lock);
+ {
+ ctx = __changelog_inode_ctx_get (this,
+ inode, iver, version, type);
+ }
+ UNLOCK (&inode->lock);
+
+ return ctx;
+}
+
+/**
+ * This is the main update routine. Locking has been made granular so as to
+ * maximize parallelism of fops - I'll try to explain it below using execution
+ * timelines.
+ *
+ * Basically, the contention is between multiple execution threads of this
+ * routine and the roll-over thread. So, instead of having a big lock, we hold
+ * granular locks: inode->lock and priv->lock. Now I'll explain what happens
+ * when there is an update and a roll-over at just about the same time.
+ * NOTE:
+ * - the dispatcher itself synchronizes updates via it's own lock
+ * - the slice version in incremented by the roll-over thread
+ *
+ * Case 1: When the rollover thread wins before the inode version can be
+ * compared with the slice version.
+ *
+ * [updater] | [rollover]
+ * |
+ * | <SLICE: 1, 1, 1>
+ * <changelog_update> |
+ * <changelog_inode_ctx_get> |
+ * <CTX: 1, 1, 1> |
+ * | <dispatch-rollover-event>
+ * | LOCK (&priv->lock)
+ * | <SLICE_VERSION_UPDATE>
+ * | <SLICE: 2, 2, 2>
+ * | UNLOCK (&priv->lock)
+ * |
+ * LOCK (&priv->lock) |
+ * <INODE_VERSION_EQUALS_SLICE> |
+ * I: 1 <-> S: 2 |
+ * update: true |
+ * UNLOCK (&priv->lock) |
+ * |
+ * <if update == true> |
+ * <dispath-update-event> |
+ * <INODE_VERSION_UPDATE> |
+ * LOCK (&inode->lock) |
+ * LOCK (&priv->lock) |
+ * <CTX: 2, 1, 1> |
+ * UNLOCK (&priv->lock) |
+ * UNLOCK (&inode->lock) |
+ *
+ * Therefore, the change gets recorded in the next change (no lost change). If
+ * the slice version was ahead of the inode version (say I:1, S: 2), then
+ * anyway the comparison would result in a update (I: 1, S: 3).
+ *
+ * If the rollover time is too less, then there is another contention when the
+ * updater tries to bring up inode version to the slice version (this is also
+ * the case when the roll-over thread wakes up during INODE_VERSION_UPDATE.
+ *
+ * <CTX: 1, 1, 1> | <SLICE: 2, 2, 2>
+ * |
+ * |
+ * <dispath-update-event> |
+ * <INODE_VERSION_UPDATE> |
+ * LOCK (&inode->lock) |
+ * LOCK (&priv->lock) |
+ * <CTX: 2, 1, 1> |
+ * UNLOCK (&priv->lock) |
+ * UNLOCK (&inode->lock) |
+ * | <dispatch-rollover-event>
+ * | LOCK (&priv->lock)
+ * | <SLICE_VERSION_UPDATE>
+ * | <SLICE: 3, 3, 3>
+ * | UNLOCK (&priv->lock)
+ *
+ *
+ * Case 2: When the fop thread wins
+ *
+ * [updater] | [rollover]
+ * |
+ * | <SLICE: 1, 1, 1>
+ * <changelog_update> |
+ * <changelog_inode_ctx_get> |
+ * <CTX: 0, 0, 0> |
+ * |
+ * LOCK (&priv->lock) |
+ * <INODE_VERSION_EQUALS_SLICE> |
+ * I: 0 <-> S: 1 |
+ * update: true |
+ * UNLOCK (&priv->lock) |
+ * | <dispatch-rollover-event>
+ * | LOCK (&priv->lock)
+ * | <SLICE_VERSION_UPDATE>
+ * | <SLICE: 2, 2, 2>
+ * | UNLOCK (&priv->lock)
+ * <if update == true> |
+ * <dispath-update-event> |
+ * <INODE_VERSION_UPDATE> |
+ * LOCK (&inode->lock) |
+ * LOCK (&priv->lock) |
+ * <CTX: 2, 0, 0> |
+ * UNLOCK (&priv->lock) |
+ * UNLOCK (&inode->lock) |
+ *
+ * Here again, if the inode version was equal to the slice version (I: 1, S: 1)
+ * then there is no need to record an update (as the equality of the two version
+ * signifies an update was recorded in the current time slice).
+ */
+void
+changelog_update (xlator_t *this, changelog_priv_t *priv,
+ changelog_local_t *local, changelog_log_type type)
+{
+ int ret = 0;
+ unsigned long *iver = NULL;
+ unsigned long version = 0;
+ inode_t *inode = NULL;
+ changelog_time_slice_t *slice = NULL;
+ changelog_inode_ctx_t *ctx = NULL;
+ changelog_log_data_t *cld_0 = NULL;
+ changelog_log_data_t *cld_1 = NULL;
+ changelog_local_t *next_local = NULL;
+ gf_boolean_t need_upd = _gf_true;
+
+ slice = &priv->slice;
+
+ /**
+ * for fops that do not require inode version checking
+ */
+ if (local->update_no_check)
+ goto update;
+
+ inode = local->inode;
+
+ ctx = changelog_inode_ctx_get (this,
+ inode, &iver, &version, type);
+ if (!ctx)
+ goto update;
+
+ INODE_VERSION_EQUALS_SLICE (priv, version, slice, type, need_upd);
+
+ update:
+ if (need_upd) {
+ cld_0 = &local->cld;
+ cld_0->cld_type = type;
+
+ if ( (next_local = local->prev_entry) != NULL ) {
+ cld_1 = &next_local->cld;
+ cld_1->cld_type = type;
+ }
+
+ ret = priv->cd.dispatchfn (this, priv,
+ priv->cd.cd_data, cld_0, cld_1);
+
+ /**
+ * update after the dispatcher has successfully done
+ * it's job.
+ */
+ if (!local->update_no_check && iver && !ret)
+ INODE_VERSION_UPDATE (priv, inode, iver, slice, type);
+ }
+
+ return;
+}
+
+/* Begin: Geo-rep snapshot dependency changes */
+
+/* changelog_color_fop_and_inc_cnt: Assign color and inc fop cnt.
+ *
+ * Assigning color and increment of corresponding fop count should happen
+ * in a lock (i.e., there should be no window between them). If it does not,
+ * we might miss draining those fops which are colored but not yet incremented
+ * the count. Let's assume black fops are draining. If the black fop count
+ * reaches zero, we say draining is completed but we miss black fops which are
+ * not incremented fop count but color is assigned black.
+ */
+
+void
+changelog_color_fop_and_inc_cnt (xlator_t *this, changelog_priv_t *priv,
+ changelog_local_t *local)
+{
+ if (!priv || !local)
+ return;
+
+ LOCK (&priv->lock);
+ {
+ local->color = priv->current_color;
+ changelog_inc_fop_cnt (this, priv, local);
+ }
+ UNLOCK (&priv->lock);
+}
+
+/* Increments the respective fop counter based on the fop color */
+void
+changelog_inc_fop_cnt (xlator_t *this, changelog_priv_t *priv,
+ changelog_local_t *local)
+{
+ int ret = 0;
+
+ if (local) {
+ if (local->color == FOP_COLOR_BLACK) {
+ ret = pthread_mutex_lock (&priv->dm.drain_black_mutex);
+ CHANGELOG_PTHREAD_ERROR_HANDLE_0 (ret, out);
+ {
+ priv->dm.black_fop_cnt++;
+ }
+ ret = pthread_mutex_unlock(&priv->dm.drain_black_mutex);
+ CHANGELOG_PTHREAD_ERROR_HANDLE_0 (ret, out);
+ } else {
+ ret = pthread_mutex_lock (&priv->dm.drain_white_mutex);
+ CHANGELOG_PTHREAD_ERROR_HANDLE_0 (ret, out);
+ {
+ priv->dm.white_fop_cnt++;
+ }
+ ret = pthread_mutex_unlock(&priv->dm.drain_white_mutex);
+ CHANGELOG_PTHREAD_ERROR_HANDLE_0 (ret, out);
+ }
+ }
+ out:
+ return;
+}
+
+/* Decrements the respective fop counter based on the fop color */
+void
+changelog_dec_fop_cnt (xlator_t *this, changelog_priv_t *priv,
+ changelog_local_t *local)
+{
+ int ret = 0;
+
+ if (local) {
+ if (local->color == FOP_COLOR_BLACK) {
+ ret = pthread_mutex_lock (&priv->dm.drain_black_mutex);
+ CHANGELOG_PTHREAD_ERROR_HANDLE_0 (ret, out);
+ {
+ priv->dm.black_fop_cnt--;
+ if (priv->dm.black_fop_cnt == 0 &&
+ priv->dm.drain_wait_black == _gf_true) {
+ ret = pthread_cond_signal (
+ &priv->dm.drain_black_cond);
+ CHANGELOG_PTHREAD_ERROR_HANDLE_0 (ret,
+ out);
+ gf_msg_debug (this->name, 0,
+ "Signalled "
+ "draining of black");
+ }
+ }
+ ret = pthread_mutex_unlock(&priv->dm.drain_black_mutex);
+ CHANGELOG_PTHREAD_ERROR_HANDLE_0 (ret, out);
+ } else {
+ ret = pthread_mutex_lock (&priv->dm.drain_white_mutex);
+ CHANGELOG_PTHREAD_ERROR_HANDLE_0 (ret, out);
+ {
+ priv->dm.white_fop_cnt--;
+ if (priv->dm.white_fop_cnt == 0 &&
+ priv->dm.drain_wait_white == _gf_true) {
+ ret = pthread_cond_signal (
+ &priv->dm.drain_white_cond);
+ CHANGELOG_PTHREAD_ERROR_HANDLE_0 (ret,
+ out);
+ gf_msg_debug (this->name, 0,
+ "Signalled "
+ "draining of white");
+ }
+ }
+ ret = pthread_mutex_unlock(&priv->dm.drain_white_mutex);
+ CHANGELOG_PTHREAD_ERROR_HANDLE_0 (ret, out);
+ }
+ }
+ out:
+ return;
+}
+
+/* Write to a pipe setup between changelog main thread and changelog
+ * rollover thread to initiate explicit rollover of changelog journal.
+ */
+int
+changelog_barrier_notify (changelog_priv_t *priv, char *buf)
+{
+ int ret = 0;
+
+ pthread_mutex_lock (&priv->cr.lock);
+ {
+ ret = pthread_cond_signal (&priv->cr.cond);
+ priv->cr.notify = _gf_true;
+ }
+ pthread_mutex_unlock (&priv->cr.lock);
+ return ret;
+}
+
+/* Clean up flags set on barrier notification */
+void
+changelog_barrier_cleanup (xlator_t *this, changelog_priv_t *priv,
+ struct list_head *queue)
+{
+ int ret = 0;
+
+ LOCK (&priv->bflags.lock);
+ priv->bflags.barrier_ext = _gf_false;
+ UNLOCK (&priv->bflags.lock);
+
+ ret = pthread_mutex_lock (&priv->bn.bnotify_mutex);
+ CHANGELOG_PTHREAD_ERROR_HANDLE_0 (ret, out);
+ {
+ priv->bn.bnotify = _gf_false;
+ }
+ ret = pthread_mutex_unlock (&priv->bn.bnotify_mutex);
+ CHANGELOG_PTHREAD_ERROR_HANDLE_0 (ret, out);
+
+ /* Disable changelog barrier and dequeue fops */
+ LOCK (&priv->lock);
+ {
+ if (priv->barrier_enabled == _gf_true)
+ __chlog_barrier_disable (this, queue);
+ else
+ ret = -1;
+ }
+ UNLOCK (&priv->lock);
+ if (ret == 0)
+ chlog_barrier_dequeue_all(this, queue);
+
+ out:
+ return;
+}
+/* End: Geo-Rep snapshot dependency changes */
+
+int32_t
+changelog_fill_entry_buf (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, changelog_local_t **local)
+{
+ changelog_opt_t *co = NULL;
+ size_t xtra_len = 0;
+ char *dup_path = NULL;
+ char *bname = NULL;
+ inode_t *parent = NULL;
+
+ GF_ASSERT (this);
+
+ parent = inode_parent (loc->inode, 0, 0);
+ if (!parent) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_INODE_NOT_FOUND, "Parent inode not found"
+ " for gfid: %s", uuid_utoa (loc->inode->gfid));
+ goto err;
+ }
+
+ CHANGELOG_INIT_NOCHECK (this, *local, loc->inode, loc->inode->gfid, 5);
+ if (!(*local)) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_LOCAL_INIT_FAILED, "changelog local"
+ " initiatilization failed");
+ goto err;
+ }
+
+ co = changelog_get_usable_buffer (*local);
+ if (!co) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_NO_MEMORY,
+ "Failed to get buffer");
+ goto err;
+ }
+
+ if (loc->inode->ia_type == IA_IFDIR) {
+ CHANGLOG_FILL_FOP_NUMBER (co, GF_FOP_MKDIR, fop_fn, xtra_len);
+ co++;
+ CHANGELOG_FILL_UINT32 (co, S_IFDIR|0755, number_fn, xtra_len);
+ co++;
+ } else {
+ CHANGLOG_FILL_FOP_NUMBER (co, GF_FOP_CREATE, fop_fn, xtra_len);
+ co++;
+ CHANGELOG_FILL_UINT32 (co, S_IFREG|0644, number_fn, xtra_len);
+ co++;
+ }
+
+ CHANGELOG_FILL_UINT32 (co, frame->root->uid, number_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_UINT32 (co, frame->root->gid, number_fn, xtra_len);
+ co++;
+
+ dup_path = gf_strdup (loc->path);
+ bname = basename (dup_path);
+
+ CHANGELOG_FILL_ENTRY (co, parent->gfid, bname, entry_fn, entry_free_fn,
+ xtra_len, err);
+ changelog_set_usable_record_and_length (*local, xtra_len, 5);
+
+ if (dup_path)
+ GF_FREE (dup_path);
+ if (parent)
+ inode_unref (parent);
+ return 0;
+
+err:
+ if (dup_path)
+ GF_FREE (dup_path);
+ if (parent)
+ inode_unref (parent);
+ return -1;
+}
+
+/*
+ * resolve_pargfid_to_path:
+ * It converts given pargfid to path by doing recursive readlinks at the
+ * backend. If bname is given, it suffixes bname to pargfid to form the
+ * complete path else it doesn't. It allocates memory for the path and is
+ * caller's responsibility to free the same. If bname is NULL and pargfid
+ * is ROOT, then it returns "."
+ */
+
+int
+resolve_pargfid_to_path (xlator_t *this, const uuid_t pgfid,
+ char **path, char *bname)
+{
+ char *linkname = NULL;
+ char *dir_handle = NULL;
+ char *pgfidstr = NULL;
+ char *saveptr = NULL;
+ ssize_t len = 0;
+ int ret = 0;
+ uuid_t tmp_gfid = {0, };
+ uuid_t pargfid = {0, };
+ changelog_priv_t *priv = NULL;
+ char gpath[PATH_MAX] = {0,};
+ char result[PATH_MAX] = {0,};
+ char *dir_name = NULL;
+ char pre_dir_name[PATH_MAX] = {0,};
+
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ gf_uuid_copy (pargfid, pgfid);
+ if (!path || gf_uuid_is_null (pargfid)) {
+ ret = -1;
+ goto out;
+ }
+
+ if (__is_root_gfid (pargfid)) {
+ if (bname)
+ *path = gf_strdup (bname);
+ else
+ *path = gf_strdup (".");
+ return ret;
+ }
+
+ dir_handle = alloca (PATH_MAX);
+ linkname = alloca (PATH_MAX);
+ (void) snprintf (gpath, PATH_MAX, "%s/.glusterfs/",
+ priv->changelog_brick);
+
+ while (!(__is_root_gfid (pargfid))) {
+ snprintf (dir_handle, PATH_MAX, "%s/%02x/%02x/%s", gpath,
+ pargfid[0], pargfid[1], uuid_utoa (pargfid));
+
+ len = sys_readlink (dir_handle, linkname, PATH_MAX);
+ if (len < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_READLINK_OP_FAILED,
+ "could not read the "
+ "link from the gfid handle %s", dir_handle);
+ ret = -1;
+ goto out;
+ }
+
+ linkname[len] = '\0';
+
+ pgfidstr = strtok_r (linkname + strlen("../../00/00/"), "/",
+ &saveptr);
+ dir_name = strtok_r (NULL, "/", &saveptr);
+
+ snprintf (result, PATH_MAX, "%s/%s", dir_name, pre_dir_name);
+ strncpy (pre_dir_name, result, sizeof(pre_dir_name));
+
+ gf_uuid_parse (pgfidstr, tmp_gfid);
+ gf_uuid_copy (pargfid, tmp_gfid);
+ }
+
+ if (bname)
+ strncat (result, bname, strlen(bname) + 1);
+
+ *path = gf_strdup (result);
+
+out:
+ return ret;
+}
diff --git a/xlators/features/changelog/src/changelog-helpers.h b/xlators/features/changelog/src/changelog-helpers.h
new file mode 100644
index 00000000000..4fdba244aa1
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-helpers.h
@@ -0,0 +1,680 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CHANGELOG_HELPERS_H
+#define _CHANGELOG_HELPERS_H
+
+#include "locking.h"
+#include "timer.h"
+#include "pthread.h"
+#include "iobuf.h"
+#include "rot-buffs.h"
+
+#include "changelog-misc.h"
+#include "call-stub.h"
+
+#include "rpcsvc.h"
+#include "changelog-ev-handle.h"
+
+#include "changelog.h"
+#include "changelog-messages.h"
+
+/**
+ * the changelog entry
+ */
+typedef struct changelog_log_data {
+ /* rollover related */
+ unsigned long cld_roll_time;
+
+ /* reopen changelog? */
+ gf_boolean_t cld_finale;
+
+ changelog_log_type cld_type;
+
+ /**
+ * sincd gfid is _always_ a necessity, it's not a part
+ * of the iobuf. by doing this we do not add any overhead
+ * for data and metadata related fops.
+ */
+ uuid_t cld_gfid;
+
+ /**
+ * iobufs are used for optionals records: pargfid, path,
+ * write offsets etc.. It's the fop implementers job
+ * to allocate (iobuf_get() in the fop) and get unref'ed
+ * in the callback (CHANGELOG_STACK_UNWIND).
+ */
+ struct iobuf *cld_iobuf;
+
+#define cld_ptr cld_iobuf->ptr
+
+ /**
+ * after allocation you can point this to the length of
+ * usable data, but make sure it does not exceed the
+ * the size of the requested iobuf.
+ */
+ size_t cld_iobuf_len;
+
+#define cld_ptr_len cld_iobuf_len
+
+ /**
+ * number of optional records
+ */
+ int cld_xtra_records;
+} changelog_log_data_t;
+
+/**
+ * holder for dispatch function and private data
+ */
+
+typedef struct changelog_priv changelog_priv_t;
+
+typedef struct changelog_dispatcher {
+ void *cd_data;
+ int (*dispatchfn) (xlator_t *, changelog_priv_t *, void *,
+ changelog_log_data_t *, changelog_log_data_t *);
+} changelog_dispatcher_t;
+
+struct changelog_bootstrap {
+ changelog_mode_t mode;
+ int (*ctor) (xlator_t *, changelog_dispatcher_t *);
+ int (*dtor) (xlator_t *, changelog_dispatcher_t *);
+};
+
+struct changelog_encoder {
+ changelog_encoder_t encoder;
+ int (*encode) (xlator_t *, changelog_log_data_t *);
+};
+
+
+/* xlator private */
+
+typedef struct changelog_time_slice {
+ /**
+ * just in case we need nanosecond granularity some day.
+ * field is unused as of now (maybe we'd need it later).
+ */
+ struct timeval tv_start;
+
+ /**
+ * version of changelog file, incremented each time changes
+ * rollover.
+ */
+ unsigned long changelog_version[CHANGELOG_MAX_TYPE];
+} changelog_time_slice_t;
+
+typedef struct changelog_rollover {
+ /* rollover thread */
+ pthread_t rollover_th;
+
+ xlator_t *this;
+
+ pthread_mutex_t lock;
+ pthread_cond_t cond;
+ gf_boolean_t notify;
+} changelog_rollover_t;
+
+typedef struct changelog_fsync {
+ /* fsync() thread */
+ pthread_t fsync_th;
+
+ xlator_t *this;
+} changelog_fsync_t;
+
+/* Draining during changelog rollover (for geo-rep snapshot dependency):
+ * --------------------------------------------------------------------
+ * The introduction of draining of in-transit fops during changelog rollover
+ * (both explicit/timeout triggered) requires coloring of fops. Basically the
+ * implementation requires two counters, one counter which keeps the count of
+ * current intransit fops which should end up in current changelog and the other
+ * counter to keep track of incoming fops which should be drained as part of
+ * next changelog rollover event. The fops are colored w.r.t these counters.
+ * The fops that are to be drained as part of current changelog rollover is
+ * given one color and the fops which keep incoming during this and not
+ * necessarily should end up in current changelog and should be drained as part
+ * of next changelog rollover are given other color. The color switching
+ * continues with each changelog rollover. Two colors(black and white) are
+ * chosen here and initially black is chosen is default.
+ */
+
+typedef enum chlog_fop_color {
+ FOP_COLOR_BLACK,
+ FOP_COLOR_WHITE
+} chlog_fop_color_t;
+
+/* Barrier notify variable */
+typedef struct barrier_notify {
+ pthread_mutex_t bnotify_mutex;
+ pthread_cond_t bnotify_cond;
+ gf_boolean_t bnotify;
+ gf_boolean_t bnotify_error;
+} barrier_notify_t;
+
+/* Two separate mutex and conditional variable set is used
+ * to drain white and black fops. */
+
+typedef struct drain_mgmt {
+ pthread_mutex_t drain_black_mutex;
+ pthread_cond_t drain_black_cond;
+ pthread_mutex_t drain_white_mutex;
+ pthread_cond_t drain_white_cond;
+ /* Represents black fops count in-transit */
+ unsigned long black_fop_cnt;
+ /* Represents white fops count in-transit */
+ unsigned long white_fop_cnt;
+ gf_boolean_t drain_wait_black;
+ gf_boolean_t drain_wait_white;
+} drain_mgmt_t;
+
+/* External barrier as a result of snap on/off indicating flag*/
+typedef struct barrier_flags {
+ gf_lock_t lock;
+ gf_boolean_t barrier_ext;
+} barrier_flags_t;
+
+/* Event selection */
+typedef struct changelog_ev_selector {
+ gf_lock_t reflock;
+
+ /**
+ * Array of references for each selection bit.
+ */
+ unsigned int ref[CHANGELOG_EV_SELECTION_RANGE];
+} changelog_ev_selector_t;
+
+
+/* changelog's private structure */
+struct changelog_priv {
+ gf_boolean_t active;
+
+ /* to generate unique socket file per brick */
+ char *changelog_brick;
+
+ /* logging directory */
+ char *changelog_dir;
+
+ /* htime directory */
+ char *htime_dir;
+
+ /* one file for all changelog types */
+ int changelog_fd;
+
+ /* htime fd for current changelog session */
+ int htime_fd;
+
+ /* c_snap_fd is fd for call-path changelog */
+ int c_snap_fd;
+
+ /* rollover_count used by htime */
+ int rollover_count;
+
+ gf_lock_t lock;
+
+ /* lock to synchronize CSNAP updation */
+ gf_lock_t c_snap_lock;
+
+ /* written end of the pipe */
+ int wfd;
+
+ /* rollover time */
+ int32_t rollover_time;
+
+ /* fsync() interval */
+ int32_t fsync_interval;
+
+ /* changelog type maps */
+ const char *maps[CHANGELOG_MAX_TYPE];
+
+ /* time slicer */
+ changelog_time_slice_t slice;
+
+ /* context of the updater */
+ changelog_dispatcher_t cd;
+
+ /* context of the rollover thread */
+ changelog_rollover_t cr;
+
+ /* context of fsync thread */
+ changelog_fsync_t cf;
+
+ /* operation mode */
+ changelog_mode_t op_mode;
+
+ /* bootstrap routine for 'current' logger */
+ struct changelog_bootstrap *cb;
+
+ /* encoder mode */
+ changelog_encoder_t encode_mode;
+
+ /* encoder */
+ struct changelog_encoder *ce;
+
+ /**
+ * snapshot dependency changes
+ */
+
+ /* Draining of fops*/
+ drain_mgmt_t dm;
+
+ /* Represents the active color. Initially by default black */
+ chlog_fop_color_t current_color;
+
+ /* flag to determine explicit rollover is triggered */
+ gf_boolean_t explicit_rollover;
+
+ /* barrier notification variable protected by mutex */
+ barrier_notify_t bn;
+
+ /* barrier on/off indicating flags */
+ barrier_flags_t bflags;
+
+ /* changelog barrier on/off indicating flag */
+ gf_boolean_t barrier_enabled;
+ struct list_head queue;
+ uint32_t queue_size;
+ gf_timer_t *timer;
+ struct timespec timeout;
+
+ /**
+ * buffers, RPC, event selection, notifications and other
+ * beasts.
+ */
+
+ /* epoll pthread */
+ pthread_t poller;
+
+ /* rotational buffer */
+ rbuf_t *rbuf;
+
+ /* changelog RPC server */
+ rpcsvc_t *rpc;
+
+ /* event selection */
+ changelog_ev_selector_t ev_selection;
+
+ /* client handling (reverse connection) */
+ pthread_t connector;
+
+ int nr_dispatchers;
+ pthread_t *ev_dispatcher;
+
+ changelog_clnt_t connections;
+
+ /* glusterfind dependency to capture paths on deleted entries*/
+ gf_boolean_t capture_del_path;
+};
+
+struct changelog_local {
+ inode_t *inode;
+ gf_boolean_t update_no_check;
+
+ changelog_log_data_t cld;
+
+ /**
+ * ->prev_entry is used in cases when there needs to be
+ * additional changelog entry for the parent (eg. rename)
+ * It's analogous to ->next in single linked list world,
+ * but we call it as ->prev_entry... ha ha ha
+ */
+ struct changelog_local *prev_entry;
+
+ /* snap dependency changes */
+ chlog_fop_color_t color;
+};
+
+typedef struct changelog_local changelog_local_t;
+
+/* inode version is stored in inode ctx */
+typedef struct changelog_inode_ctx {
+ unsigned long iversion[CHANGELOG_MAX_TYPE];
+} changelog_inode_ctx_t;
+
+#define CHANGELOG_INODE_VERSION_TYPE(ctx, type) &(ctx->iversion[type])
+
+/**
+ * Optional Records:
+ * fops that need to save additional information request a array of
+ * @changelog_opt_t struct. The array is allocated via @iobufs.
+ */
+typedef enum {
+ CHANGELOG_OPT_REC_FOP,
+ CHANGELOG_OPT_REC_ENTRY,
+ CHANGELOG_OPT_REC_UINT32,
+} changelog_optional_rec_type_t;
+
+struct changelog_entry_fields {
+ uuid_t cef_uuid;
+ char *cef_bname;
+ char *cef_path;
+};
+
+typedef struct {
+ /**
+ * @co_covert can be used to do post-processing of the record before
+ * it's persisted to the CHANGELOG. If this is NULL, then the record
+ * is persisted as per it's in memory format.
+ */
+ size_t (*co_convert) (void *data, char *buffer, gf_boolean_t encode);
+
+ /* release routines */
+ void (*co_free) (void *data);
+
+ /* type of the field */
+ changelog_optional_rec_type_t co_type;
+
+ /**
+ * sizeof of the 'valid' field in the union. This field is not used if
+ * @co_convert is specified.
+ */
+ size_t co_len;
+
+ union {
+ unsigned int co_uint32;
+ glusterfs_fop_t co_fop;
+ struct changelog_entry_fields co_entry;
+ };
+} changelog_opt_t;
+
+#define CHANGELOG_OPT_RECORD_LEN sizeof (changelog_opt_t)
+
+/**
+ * helpers routines
+ */
+
+int
+changelog_thread_cleanup (xlator_t *this, pthread_t thr_id);
+
+void *
+changelog_get_usable_buffer (changelog_local_t *local);
+
+void
+changelog_set_usable_record_and_length (changelog_local_t *local,
+ size_t len, int xr);
+void
+changelog_local_cleanup (xlator_t *xl, changelog_local_t *local);
+changelog_local_t *
+changelog_local_init (xlator_t *this, inode_t *inode, uuid_t gfid,
+ int xtra_records, gf_boolean_t update_flag);
+int
+changelog_start_next_change (xlator_t *this,
+ changelog_priv_t *priv,
+ unsigned long ts, gf_boolean_t finale);
+int
+changelog_open_journal (xlator_t *this, changelog_priv_t *priv);
+int
+changelog_fill_rollover_data (changelog_log_data_t *cld, gf_boolean_t is_last);
+int
+changelog_inject_single_event (xlator_t *this,
+ changelog_priv_t *priv,
+ changelog_log_data_t *cld);
+size_t
+changelog_entry_length ();
+int
+changelog_write (int fd, char *buffer, size_t len);
+int
+changelog_write_change (changelog_priv_t *priv, char *buffer, size_t len);
+int
+changelog_handle_change (xlator_t *this,
+ changelog_priv_t *priv, changelog_log_data_t *cld);
+void
+changelog_update (xlator_t *this, changelog_priv_t *priv,
+ changelog_local_t *local, changelog_log_type type);
+void *
+changelog_rollover (void *data);
+void *
+changelog_fsync_thread (void *data);
+int
+changelog_forget (xlator_t *this, inode_t *inode);
+int
+htime_update (xlator_t *this, changelog_priv_t *priv,
+ unsigned long ts, char * buffer);
+int
+htime_open (xlator_t *this, changelog_priv_t *priv, unsigned long ts);
+int
+htime_create (xlator_t *this, changelog_priv_t *priv, unsigned long ts);
+
+/* Geo-Rep snapshot dependency changes */
+void
+changelog_color_fop_and_inc_cnt (xlator_t *this, changelog_priv_t *priv,
+ changelog_local_t *local);
+void
+changelog_inc_fop_cnt (xlator_t *this, changelog_priv_t *priv,
+ changelog_local_t *local);
+void
+changelog_dec_fop_cnt (xlator_t *this, changelog_priv_t *priv,
+ changelog_local_t *local);
+int
+changelog_barrier_notify (changelog_priv_t *priv, char* buf);
+void
+changelog_barrier_cleanup (xlator_t *this, changelog_priv_t *priv,
+ struct list_head *queue);
+void
+changelog_drain_white_fops (xlator_t *this, changelog_priv_t *priv);
+void
+changelog_drain_black_fops (xlator_t *this, changelog_priv_t *priv);
+
+/* Crash consistency of changelog wrt snapshot */
+int
+changelog_snap_logging_stop ( xlator_t *this, changelog_priv_t *priv);
+int
+changelog_snap_logging_start ( xlator_t *this, changelog_priv_t *priv);
+int
+changelog_snap_open ( xlator_t *this, changelog_priv_t *priv);
+int
+changelog_snap_handle_ascii_change (xlator_t *this,
+ changelog_log_data_t *cld);
+int
+changelog_snap_write_change (changelog_priv_t *priv, char *buffer, size_t len);
+
+/* Changelog barrier routines */
+void __chlog_barrier_enqueue (xlator_t *this, call_stub_t *stub);
+void __chlog_barrier_disable (xlator_t *this, struct list_head *queue);
+void chlog_barrier_dequeue_all (xlator_t *this, struct list_head *queue);
+call_stub_t *__chlog_barrier_dequeue (xlator_t *this, struct list_head *queue);
+int __chlog_barrier_enable (xlator_t *this, changelog_priv_t *priv);
+
+int32_t
+changelog_fill_entry_buf (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, changelog_local_t **local);
+
+/* event selection routines */
+void changelog_select_event (xlator_t *,
+ changelog_ev_selector_t *, unsigned int);
+void changelog_deselect_event (xlator_t *,
+ changelog_ev_selector_t *, unsigned int);
+int changelog_init_event_selection (xlator_t *,
+ changelog_ev_selector_t *);
+int changelog_cleanup_event_selection (xlator_t *,
+ changelog_ev_selector_t *);
+int changelog_ev_selected (xlator_t *,
+ changelog_ev_selector_t *, unsigned int);
+void
+changelog_dispatch_event (xlator_t *, changelog_priv_t *, changelog_event_t *);
+
+changelog_inode_ctx_t *
+__changelog_inode_ctx_get (xlator_t *, inode_t *, unsigned long **,
+ unsigned long *, changelog_log_type);
+int
+resolve_pargfid_to_path (xlator_t *this, const uuid_t gfid, char **path,
+ char *bname);
+
+/* macros */
+
+#define CHANGELOG_STACK_UNWIND(fop, frame, params ...) do { \
+ changelog_local_t *__local = NULL; \
+ xlator_t *__xl = NULL; \
+ if (frame) { \
+ __local = frame->local; \
+ __xl = frame->this; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT (fop, frame, params); \
+ if (__local && __local->prev_entry) \
+ changelog_local_cleanup (__xl, \
+ __local->prev_entry); \
+ changelog_local_cleanup (__xl, __local); \
+ } while (0)
+
+#define CHANGELOG_IOBUF_REF(iobuf) do { \
+ if (iobuf) \
+ iobuf_ref (iobuf); \
+ } while (0)
+
+#define CHANGELOG_IOBUF_UNREF(iobuf) do { \
+ if (iobuf) \
+ iobuf_unref (iobuf); \
+ } while (0)
+
+#define CHANGELOG_FILL_BUFFER(buffer, off, val, len) do { \
+ memcpy (buffer + off, val, len); \
+ off += len; \
+ } while (0)
+
+#define SLICE_VERSION_UPDATE(slice) do { \
+ int i = 0; \
+ for (; i < CHANGELOG_MAX_TYPE; i++) { \
+ slice->changelog_version[i]++; \
+ } \
+ } while (0)
+
+#define CHANGELOG_FILL_UINT32(co, number, converter, xlen) do { \
+ co->co_convert = converter; \
+ co->co_free = NULL; \
+ co->co_type = CHANGELOG_OPT_REC_UINT32; \
+ co->co_uint32 = number; \
+ xlen += sizeof (unsigned int); \
+ } while (0)
+
+#define CHANGLOG_FILL_FOP_NUMBER(co, fop, converter, xlen) do { \
+ co->co_convert = converter; \
+ co->co_free = NULL; \
+ co->co_type = CHANGELOG_OPT_REC_FOP; \
+ co->co_fop = fop; \
+ xlen += sizeof (fop); \
+ } while (0)
+
+#define CHANGELOG_FILL_ENTRY(co, pargfid, bname, \
+ converter, freefn, xlen, label) \
+ do { \
+ co->co_convert = converter; \
+ co->co_free = freefn; \
+ co->co_type = CHANGELOG_OPT_REC_ENTRY; \
+ gf_uuid_copy (co->co_entry.cef_uuid, pargfid); \
+ co->co_entry.cef_bname = gf_strdup(bname); \
+ if (!co->co_entry.cef_bname) \
+ goto label; \
+ xlen += (UUID_CANONICAL_FORM_LEN + strlen (bname)); \
+ } while (0)
+
+#define CHANGELOG_FILL_ENTRY_DIR_PATH(co, pargfid, bname, converter, \
+ del_freefn, xlen, label, capture_del) \
+ do { \
+ co->co_convert = converter; \
+ co->co_free = del_freefn; \
+ co->co_type = CHANGELOG_OPT_REC_ENTRY; \
+ gf_uuid_copy (co->co_entry.cef_uuid, pargfid); \
+ co->co_entry.cef_bname = gf_strdup(bname); \
+ if (!co->co_entry.cef_bname) \
+ goto label; \
+ xlen += (UUID_CANONICAL_FORM_LEN + strlen (bname)); \
+ if (!capture_del || resolve_pargfid_to_path (this, pargfid, \
+ &(co->co_entry.cef_path), co->co_entry.cef_bname)) { \
+ co->co_entry.cef_path = gf_strdup ("\0"); \
+ xlen += 1; \
+ } else { \
+ xlen += (strlen (co->co_entry.cef_path)); \
+ } \
+ } while (0)
+
+#define CHANGELOG_INIT(this, local, inode, gfid, xrec) \
+ local = changelog_local_init (this, inode, gfid, xrec, _gf_false)
+
+#define CHANGELOG_INIT_NOCHECK(this, local, inode, gfid, xrec) \
+ local = changelog_local_init (this, inode, gfid, xrec, _gf_true)
+
+#define CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, label) do { \
+ if (!priv->active) \
+ goto label; \
+ /* ignore rebalance process's activity. */ \
+ if ((frame->root->pid == GF_CLIENT_PID_DEFRAG) || \
+ (frame->root->pid == GF_CLIENT_PID_TIER_DEFRAG)) \
+ goto label; \
+ } while (0)
+
+/* If it is a METADATA entry and fop num being GF_FOP_NULL, don't
+ * log in the changelog as it is of no use. And also if it is
+ * logged, since slicing version checking is done for metadata
+ * entries, the subsequent entries with valid fop num which falls
+ * to same changelog will be missed. Hence check for boundary
+ * condition.
+ */
+#define CHANGELOG_OP_BOUNDARY_CHECK(frame, label) do { \
+ if (frame->root->op <= GF_FOP_NULL || \
+ frame->root->op >= GF_FOP_MAXVALUE) \
+ goto label; \
+ } while (0)
+
+/**
+ * ignore internal fops for all clients except AFR self-heal daemon
+ */
+#define CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO(frame, dict, label) do { \
+ if ((frame->root->pid != GF_CLIENT_PID_SELF_HEALD) \
+ && dict \
+ && dict_get (dict, GLUSTERFS_INTERNAL_FOP_KEY)) \
+ goto label; \
+ } while (0)
+
+#define CHANGELOG_COND_GOTO(priv, cond, label) do { \
+ if (!priv->active || cond) \
+ goto label; \
+ } while (0)
+
+/* Begin: Geo-Rep snapshot dependency changes */
+
+#define DICT_ERROR -1
+#define BARRIER_OFF 0
+#define BARRIER_ON 1
+#define DICT_DEFAULT 2
+
+#define CHANGELOG_NOT_ON_THEN_GOTO(priv, ret, label) do { \
+ if (!priv->active) { \
+ gf_msg (this->name, GF_LOG_WARNING, 0, \
+ CHANGELOG_MSG_NOT_ACTIVE, \
+ "Changelog is not active, return success"); \
+ ret = 0; \
+ goto label; \
+ } \
+ } while (0)
+
+/* Log pthread error and goto label */
+#define CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, label) do { \
+ if (ret) { \
+ gf_msg (this->name, GF_LOG_ERROR, \
+ 0, CHANGELOG_MSG_PTHREAD_ERROR, \
+ "pthread error: Error: %d", ret); \
+ ret = -1; \
+ goto label; \
+ } \
+ } while (0);
+
+/* Log pthread error, set flag and goto label */
+#define CHANGELOG_PTHREAD_ERROR_HANDLE_1(ret, label, flag) do { \
+ if (ret) { \
+ gf_msg (this->name, GF_LOG_ERROR, 0, \
+ CHANGELOG_MSG_PTHREAD_ERROR, \
+ "pthread error: Error: %d", ret); \
+ ret = -1; \
+ flag = _gf_true; \
+ goto label; \
+ } \
+ } while (0)
+/* End: Geo-Rep snapshot dependency changes */
+
+#endif /* _CHANGELOG_HELPERS_H */
diff --git a/xlators/features/changelog/src/changelog-mem-types.h b/xlators/features/changelog/src/changelog-mem-types.h
new file mode 100644
index 00000000000..33fea31b979
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-mem-types.h
@@ -0,0 +1,34 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CHANGELOG_MEM_TYPES_H
+#define _CHANGELOG_MEM_TYPES_H
+
+#include "mem-types.h"
+
+enum gf_changelog_mem_types {
+ gf_changelog_mt_priv_t = gf_common_mt_end + 1,
+ gf_changelog_mt_str_t = gf_common_mt_end + 2,
+ gf_changelog_mt_batch_t = gf_common_mt_end + 3,
+ gf_changelog_mt_rt_t = gf_common_mt_end + 4,
+ gf_changelog_mt_inode_ctx_t = gf_common_mt_end + 5,
+ gf_changelog_mt_rpc_clnt_t = gf_common_mt_end + 6,
+ gf_changelog_mt_libgfchangelog_t = gf_common_mt_end + 7,
+ gf_changelog_mt_libgfchangelog_entry_t = gf_common_mt_end + 8,
+ gf_changelog_mt_libgfchangelog_rl_t = gf_common_mt_end + 9,
+ gf_changelog_mt_changelog_buffer_t = gf_common_mt_end + 10,
+ gf_changelog_mt_history_data_t = gf_common_mt_end + 11,
+ gf_changelog_mt_libgfchangelog_call_pool_t = gf_common_mt_end + 12,
+ gf_changelog_mt_libgfchangelog_event_t = gf_common_mt_end + 13,
+ gf_changelog_mt_ev_dispatcher_t = gf_common_mt_end + 14,
+ gf_changelog_mt_end
+};
+
+#endif
diff --git a/xlators/features/changelog/src/changelog-messages.h b/xlators/features/changelog/src/changelog-messages.h
new file mode 100644
index 00000000000..e65a457b7c0
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-messages.h
@@ -0,0 +1,450 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
+
+#ifndef _CHANGELOG_MESSAGES_H_
+#define _CHANGELOG_MESSAGES_H_
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glfs-message-id.h"
+
+/*! \file changelog-messages.h
+ * \brief CHANGELOG log-message IDs and their descriptions.
+ */
+
+/* NOTE: Rules for message additions
+ * 1) Each instance of a message is _better_ left with a unique message ID, even
+ * if the message format is the same. Reasoning is that, if the message
+ * format needs to change in one instance, the other instances are not
+ * impacted or the new change does not change the ID of the instance being
+ * modified.
+ * 2) Addition of a message,
+ * - Should increment the GLFS_NUM_MESSAGES
+ * - Append to the list of messages defined, towards the end
+ * - Retain macro naming as glfs_msg_X (for readability across developers)
+ * NOTE: Rules for message format modifications
+ * 3) Check acorss the code if the message ID macro in question is reused
+ * anywhere. If reused then then the modifications should ensure correctness
+ * everywhere, or needs a new message ID as (1) above was not adhered to. If
+ * not used anywhere, proceed with the required modification.
+ * NOTE: Rules for message deletion
+ * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used
+ * anywhere, then can be deleted, but will leave a hole by design, as
+ * addition rules specify modification to the end of the list and not filling
+ * holes.
+ */
+
+#define GLFS_COMP_BASE_CHANGELOG GLFS_MSGID_COMP_CHANGELOG
+#define GLFS_NUM_MESSAGES 54
+#define GLFS_MSGID_END (GLFS_COMP_BASE_CHANGELOG + GLFS_NUM_MESSAGES + 1)
+
+#define glfs_msg_start_x GLFS_COMP_BASE_CHANGELOG, "Invalid: Start of messages"
+
+/*!
+ * @messageid
+ * @diagnosis open/opendir failed on a brick.
+ * @recommended action Error number in the log should give the reason why it
+ * failed. Also observe brick logs for more information.
+ */
+#define CHANGELOG_MSG_OPEN_FAILED (GLFS_COMP_BASE_CHANGELOG + 1)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_NO_MEMORY (GLFS_COMP_BASE_CHANGELOG + 2)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_VOL_MISCONFIGURED (GLFS_COMP_BASE_CHANGELOG + 3)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_RENAME_ERROR (GLFS_COMP_BASE_CHANGELOG + 4)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_READ_ERROR (GLFS_COMP_BASE_CHANGELOG + 5)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_HTIME_ERROR (GLFS_COMP_BASE_CHANGELOG + 6)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED (GLFS_COMP_BASE_CHANGELOG + 7)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED (GLFS_COMP_BASE_CHANGELOG + 8)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_CHILD_MISCONFIGURED (GLFS_COMP_BASE_CHANGELOG + 9)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_DIR_OPTIONS_NOT_SET (GLFS_COMP_BASE_CHANGELOG + 10)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_CLOSE_ERROR (GLFS_COMP_BASE_CHANGELOG + 11)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_PIPE_CREATION_ERROR (GLFS_COMP_BASE_CHANGELOG + 12)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_DICT_GET_FAILED (GLFS_COMP_BASE_CHANGELOG + 13)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_BARRIER_INFO (GLFS_COMP_BASE_CHANGELOG + 14)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_BARRIER_ERROR (GLFS_COMP_BASE_CHANGELOG + 15)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_GET_TIME_OP_FAILED (GLFS_COMP_BASE_CHANGELOG + 16)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_WRITE_FAILED (GLFS_COMP_BASE_CHANGELOG + 17)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_PTHREAD_ERROR (GLFS_COMP_BASE_CHANGELOG + 18)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_INODE_NOT_FOUND (GLFS_COMP_BASE_CHANGELOG + 19)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_FSYNC_OP_FAILED (GLFS_COMP_BASE_CHANGELOG + 20)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_TOTAL_LOG_INFO (GLFS_COMP_BASE_CHANGELOG + 21)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_SNAP_INFO (GLFS_COMP_BASE_CHANGELOG + 22)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_SELECT_FAILED (GLFS_COMP_BASE_CHANGELOG + 23)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_FCNTL_FAILED (GLFS_COMP_BASE_CHANGELOG + 24)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_BNOTIFY_INFO (GLFS_COMP_BASE_CHANGELOG + 25)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_ENTRY_BUF_INFO (GLFS_COMP_BASE_CHANGELOG + 26)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_NOT_ACTIVE (GLFS_COMP_BASE_CHANGELOG + 27)
+
+/*!
+ @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_LOCAL_INIT_FAILED (GLFS_COMP_BASE_CHANGELOG + 28)
+
+/*!
+ @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_NOTIFY_REGISTER_FAILED (GLFS_COMP_BASE_CHANGELOG + 28)
+
+/*!
+ @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_PROGRAM_NAME_REG_FAILED (GLFS_COMP_BASE_CHANGELOG + 29)
+
+/*!
+ @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_HANDLE_PROBE_ERROR (GLFS_COMP_BASE_CHANGELOG + 30)
+
+/*!
+ @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_SET_FD_CONTEXT (GLFS_COMP_BASE_CHANGELOG + 31)
+
+/*!
+ @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_FREEUP_FAILED (GLFS_COMP_BASE_CHANGELOG + 32)
+
+/*!
+ @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_HTIME_INFO (GLFS_COMP_BASE_CHANGELOG + 33)
+
+/*!
+ @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_RPC_SUBMIT_REPLY_FAILED (GLFS_COMP_BASE_CHANGELOG + 34)
+
+/*!
+ @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_RPC_BUILD_ERROR (GLFS_COMP_BASE_CHANGELOG + 35)
+
+/*!
+ @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_RPC_CONNECT_ERROR (GLFS_COMP_BASE_CHANGELOG + 36)
+
+/*!
+ @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_RPC_START_ERROR (GLFS_COMP_BASE_CHANGELOG + 37)
+
+/*!
+ @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_BUFFER_STARVATION_ERROR (GLFS_COMP_BASE_CHANGELOG + 3)
+
+/*!
+ @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_SCAN_DIR_FAILED (GLFS_COMP_BASE_CHANGELOG + 39)
+
+/*!
+ @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_FSETXATTR_FAILED (GLFS_COMP_BASE_CHANGELOG + 40)
+
+/*!
+ @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_FGETXATTR_FAILED (GLFS_COMP_BASE_CHANGELOG + 41)
+
+/*!
+ @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_CLEANUP_ON_ACTIVE_REF \
+ (GLFS_COMP_BASE_CHANGELOG + 42)
+
+/*!
+ @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_DISPATCH_EVENT_FAILED (GLFS_COMP_BASE_CHANGELOG + 43)
+
+/*!
+ @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_PUT_BUFFER_FAILED (GLFS_COMP_BASE_CHANGELOG + 44)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_PTHREAD_COND_WAIT_FAILED (GLFS_COMP_BASE_CHANGELOG + 45)
+
+/*!
+ @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_PTHREAD_CANCEL_FAILED (GLFS_COMP_BASE_CHANGELOG + 46)
+
+/*!
+ @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_INJECT_FSYNC_FAILED (GLFS_COMP_BASE_CHANGELOG + 47)
+
+/*!
+ @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_CREATE_FRAME_FAILED (GLFS_COMP_BASE_CHANGELOG + 48)
+
+/*!
+ @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_FSTAT_OP_FAILED (GLFS_COMP_BASE_CHANGELOG + 49)
+
+/*!
+ @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_LSEEK_OP_FAILED (GLFS_COMP_BASE_CHANGELOG + 50)
+
+/*!
+ @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_STRSTR_OP_FAILED (GLFS_COMP_BASE_CHANGELOG + 51)
+
+/*!
+ @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_UNLINK_OP_FAILED (GLFS_COMP_BASE_CHANGELOG + 52)
+
+/*!
+ @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_DETECT_EMPTY_CHANGELOG_FAILED \
+ (GLFS_COMP_BASE_CHANGELOG + 53)
+
+/*!
+ @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_READLINK_OP_FAILED (GLFS_COMP_BASE_CHANGELOG + 54)
+
+/*!
+ @messageid
+ * @diagnosis
+ * @recommended action
+*/
+#define CHANGELOG_MSG_EXPLICIT_ROLLOVER_FAILED (GLFS_COMP_BASE_CHANGELOG + 55)
+
+
+
+#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
+#endif /* !_CHANGELOG_MESSAGES_H_ */
diff --git a/xlators/features/changelog/src/changelog-misc.h b/xlators/features/changelog/src/changelog-misc.h
new file mode 100644
index 00000000000..778f79c82c5
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-misc.h
@@ -0,0 +1,131 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CHANGELOG_MISC_H
+#define _CHANGELOG_MISC_H
+
+#include "glusterfs.h"
+#include "common-utils.h"
+
+#define CHANGELOG_MAX_TYPE 3
+#define CHANGELOG_FILE_NAME "CHANGELOG"
+#define HTIME_FILE_NAME "HTIME"
+#define CSNAP_FILE_NAME "CHANGELOG.SNAP"
+#define HTIME_KEY "trusted.glusterfs.htime"
+#define HTIME_CURRENT "trusted.glusterfs.current_htime"
+#define HTIME_INITIAL_VALUE "0:0"
+
+#define CHANGELOG_VERSION_MAJOR 1
+#define CHANGELOG_VERSION_MINOR 2
+
+#define CHANGELOG_UNIX_SOCK DEFAULT_VAR_RUN_DIRECTORY"/changelog-%s.sock"
+#define CHANGELOG_TMP_UNIX_SOCK DEFAULT_VAR_RUN_DIRECTORY"/.%s%lu.sock"
+
+/**
+ * header starts with the version and the format of the changelog.
+ * 'version' not much of a use now.
+ */
+#define CHANGELOG_HEADER \
+ "GlusterFS Changelog | version: v%d.%d | encoding : %d\n"
+
+#define CHANGELOG_MAKE_SOCKET_PATH(brick_path, sockpath, len) do { \
+ char md5_sum[MD5_DIGEST_LENGTH*2+1] = {0,}; \
+ md5_wrapper((unsigned char *) brick_path, \
+ strlen(brick_path), \
+ md5_sum); \
+ (void) snprintf (sockpath, len, \
+ CHANGELOG_UNIX_SOCK, md5_sum); \
+ } while (0)
+
+#define CHANGELOG_MAKE_TMP_SOCKET_PATH(brick_path, sockpath, len) do { \
+ unsigned long pid = 0; \
+ char md5_sum[MD5_DIGEST_LENGTH*2+1] = {0,}; \
+ pid = (unsigned long) getpid (); \
+ md5_wrapper((unsigned char *) brick_path, \
+ strlen(brick_path), \
+ md5_sum); \
+ (void) snprintf (sockpath, \
+ len, CHANGELOG_TMP_UNIX_SOCK, \
+ md5_sum, pid); \
+ } while (0)
+
+
+/**
+ * ... used by libgfchangelog.
+ */
+#define CHANGELOG_GET_HEADER_INFO(fd, buffer, len, enc, maj, min, elen) do { \
+ FILE *fp; \
+ int fd_dup; \
+ \
+ enc = -1; \
+ maj = -1; \
+ min = -1; \
+ fd_dup = dup (fd); \
+ \
+ if (fd_dup != -1) { \
+ fp = fdopen (fd_dup, "r"); \
+ if (fp) { \
+ if (fgets (buffer, len, fp)) { \
+ elen = strlen (buffer); \
+ sscanf (buffer, \
+ CHANGELOG_HEADER, \
+ &maj, &min, &enc); \
+ } \
+ fclose (fp); \
+ } else { \
+ sys_close (fd_dup); \
+ } \
+ } \
+ } while (0)
+
+#define CHANGELOG_FILL_HTIME_DIR(changelog_dir, path) do { \
+ strncpy (path, changelog_dir, sizeof (path) - 1); \
+ strcat (path, "/htime"); \
+ } while(0)
+
+#define CHANGELOG_FILL_CSNAP_DIR(changelog_dir, path) do { \
+ strncpy (path, changelog_dir, sizeof (path) - 1); \
+ strcat (path, "/csnap"); \
+ } while(0)
+/**
+ * everything after 'CHANGELOG_TYPE_ENTRY' are internal types
+ * (ie. none of the fops trigger this type of event), hence
+ * CHANGELOG_MAX_TYPE = 3
+ */
+typedef enum {
+ CHANGELOG_TYPE_DATA = 0,
+ CHANGELOG_TYPE_METADATA,
+ CHANGELOG_TYPE_ENTRY,
+ CHANGELOG_TYPE_ROLLOVER,
+ CHANGELOG_TYPE_FSYNC,
+} changelog_log_type;
+
+/* operation modes - RT for now */
+typedef enum {
+ CHANGELOG_MODE_RT = 0,
+} changelog_mode_t;
+
+/* encoder types */
+
+typedef enum {
+ CHANGELOG_ENCODE_MIN = 0,
+ CHANGELOG_ENCODE_BINARY,
+ CHANGELOG_ENCODE_ASCII,
+ CHANGELOG_ENCODE_MAX,
+} changelog_encoder_t;
+
+#define CHANGELOG_VALID_ENCODING(enc) \
+ (enc > CHANGELOG_ENCODE_MIN && enc < CHANGELOG_ENCODE_MAX)
+
+#define CHANGELOG_TYPE_IS_ENTRY(type) (type == CHANGELOG_TYPE_ENTRY)
+#define CHANGELOG_TYPE_IS_ROLLOVER(type) (type == CHANGELOG_TYPE_ROLLOVER)
+#define CHANGELOG_TYPE_IS_FSYNC(type) (type == CHANGELOG_TYPE_FSYNC)
+
+#endif /* _CHANGELOG_MISC_H */
diff --git a/xlators/features/changelog/src/changelog-rpc-common.c b/xlators/features/changelog/src/changelog-rpc-common.c
new file mode 100644
index 00000000000..4525923d34d
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-rpc-common.c
@@ -0,0 +1,349 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "changelog-rpc-common.h"
+#include "changelog-messages.h"
+
+#include "syscall.h"
+/**
+*****************************************************
+ Client Interface
+*****************************************************
+*/
+
+/**
+ * Initialize and return an RPC client object for a given unix
+ * domain socket.
+ */
+
+void *
+changelog_rpc_poller (void *arg)
+{
+ xlator_t *this = arg;
+
+ (void) event_dispatch (this->ctx->event_pool);
+ return NULL;
+}
+
+struct rpc_clnt *
+changelog_rpc_client_init (xlator_t *this, void *cbkdata,
+ char *sockfile, rpc_clnt_notify_t fn)
+{
+ int ret = 0;
+ struct rpc_clnt *rpc = NULL;
+ dict_t *options = NULL;
+
+ if (!cbkdata)
+ cbkdata = this;
+
+ options = dict_new ();
+ if (!options)
+ goto error_return;
+
+ ret = rpc_transport_unix_options_build (&options, sockfile, 0);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_RPC_BUILD_ERROR,
+ "failed to build rpc options");
+ goto dealloc_dict;
+ }
+
+ rpc = rpc_clnt_new (options, this, this->name, 16);
+ if (!rpc)
+ goto dealloc_dict;
+
+ ret = rpc_clnt_register_notify (rpc, fn, cbkdata);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_NOTIFY_REGISTER_FAILED,
+ "failed to register notify");
+ goto dealloc_rpc_clnt;
+ }
+
+ ret = rpc_clnt_start (rpc);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_RPC_START_ERROR,
+ "failed to start rpc");
+ goto dealloc_rpc_clnt;
+ }
+
+ return rpc;
+
+ dealloc_rpc_clnt:
+ rpc_clnt_unref (rpc);
+ dealloc_dict:
+ dict_unref (options);
+ error_return:
+ return NULL;
+}
+
+/**
+ * Generic RPC client routine to dispatch a request to an
+ * RPC server.
+ */
+int
+changelog_rpc_sumbit_req (struct rpc_clnt *rpc, void *req,
+ call_frame_t *frame, rpc_clnt_prog_t *prog,
+ int procnum, struct iovec *payload, int payloadcnt,
+ struct iobref *iobref, xlator_t *this,
+ fop_cbk_fn_t cbkfn, xdrproc_t xdrproc)
+{
+ int ret = 0;
+ int count = 0;
+ struct iovec iov = {0, };
+ struct iobuf *iobuf = NULL;
+ char new_iobref = 0;
+ ssize_t xdr_size = 0;
+
+ GF_ASSERT (this);
+
+ if (req) {
+ xdr_size = xdr_sizeof (xdrproc, req);
+
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool, xdr_size);
+ if (!iobuf) {
+ goto out;
+ };
+
+ if (!iobref) {
+ iobref = iobref_new ();
+ if (!iobref) {
+ goto out;
+ }
+
+ new_iobref = 1;
+ }
+
+ iobref_add (iobref, iobuf);
+
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = iobuf_size (iobuf);
+
+ /* Create the xdr payload */
+ ret = xdr_serialize_generic (iov, req, xdrproc);
+ if (ret == -1) {
+ goto out;
+ }
+
+ iov.iov_len = ret;
+ count = 1;
+ }
+
+ ret = rpc_clnt_submit (rpc, prog, procnum, cbkfn, &iov, count,
+ payload, payloadcnt, iobref, frame, NULL,
+ 0, NULL, 0, NULL);
+
+ out:
+ if (new_iobref)
+ iobref_unref (iobref);
+ if (iobuf)
+ iobuf_unref (iobuf);
+ return ret;
+}
+
+/**
+ * Entry point to perform a remote procedure call
+ */
+int
+changelog_invoke_rpc (xlator_t *this, struct rpc_clnt *rpc,
+ rpc_clnt_prog_t *prog, int procidx, void *arg)
+{
+ int ret = 0;
+ call_frame_t *frame = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+
+ if (!this || !prog)
+ goto error_return;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_CREATE_FRAME_FAILED,
+ "failed to create frame");
+ goto error_return;
+ }
+
+ proc = &prog->proctable[procidx];
+ if (proc->fn)
+ ret = proc->fn (frame, this, arg);
+
+ STACK_DESTROY (frame->root);
+ return ret;
+
+ error_return:
+ return -1;
+}
+
+/**
+*****************************************************
+ Server Interface
+*****************************************************
+*/
+
+struct iobuf *
+__changelog_rpc_serialize_reply (rpcsvc_request_t *req, void *arg,
+ struct iovec *outmsg, xdrproc_t xdrproc)
+{
+ struct iobuf *iob = NULL;
+ ssize_t retlen = 0;
+ ssize_t rsp_size = 0;
+
+ rsp_size = xdr_sizeof (xdrproc, arg);
+ iob = iobuf_get2 (req->svc->ctx->iobuf_pool, rsp_size);
+ if (!iob)
+ goto error_return;
+
+ iobuf_to_iovec (iob, outmsg);
+
+ retlen = xdr_serialize_generic (*outmsg, arg, xdrproc);
+ if (retlen == -1)
+ goto unref_iob;
+
+ outmsg->iov_len = retlen;
+ return iob;
+
+ unref_iob:
+ iobuf_unref (iob);
+ error_return:
+ return NULL;
+}
+
+int
+changelog_rpc_sumbit_reply (rpcsvc_request_t *req,
+ void *arg, struct iovec *payload, int payloadcount,
+ struct iobref *iobref, xdrproc_t xdrproc)
+{
+ int ret = -1;
+ struct iobuf *iob = NULL;
+ struct iovec iov = {0,};
+ char new_iobref = 0;
+
+ if (!req)
+ goto return_ret;
+
+ if (!iobref) {
+ iobref = iobref_new ();
+ if (!iobref)
+ goto return_ret;
+ new_iobref = 1;
+ }
+
+ iob = __changelog_rpc_serialize_reply (req, arg, &iov, xdrproc);
+ if (!iob)
+ gf_msg ("", GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_RPC_SUBMIT_REPLY_FAILED,
+ "failed to serialize reply");
+ else
+ iobref_add (iobref, iob);
+
+ ret = rpcsvc_submit_generic (req, &iov,
+ 1, payload, payloadcount, iobref);
+
+ if (new_iobref)
+ iobref_unref (iobref);
+ if (iob)
+ iobuf_unref (iob);
+ return_ret:
+ return ret;
+}
+
+void
+changelog_rpc_server_destroy (xlator_t *this, rpcsvc_t *rpc, char *sockfile,
+ rpcsvc_notify_t fn, struct rpcsvc_program **progs)
+{
+ rpcsvc_listener_t *listener = NULL;
+ rpcsvc_listener_t *next = NULL;
+ struct rpcsvc_program *prog = NULL;
+
+ while (*progs) {
+ prog = *progs;
+ (void) rpcsvc_program_unregister (rpc, prog);
+ }
+
+ list_for_each_entry_safe (listener, next, &rpc->listeners, list) {
+ rpcsvc_listener_destroy (listener);
+ }
+
+ (void) rpcsvc_unregister_notify (rpc, fn, this);
+ sys_unlink (sockfile);
+
+ GF_FREE (rpc);
+}
+
+rpcsvc_t *
+changelog_rpc_server_init (xlator_t *this, char *sockfile, void *cbkdata,
+ rpcsvc_notify_t fn, struct rpcsvc_program **progs)
+{
+ int j = 0;
+ int ret = 0;
+ rpcsvc_t *rpc = NULL;
+ dict_t *options = NULL;
+ struct rpcsvc_program *prog = NULL;
+
+ if (!cbkdata)
+ cbkdata = this;
+
+ options = dict_new ();
+ if (!options)
+ goto error_return;
+
+ ret = rpcsvc_transport_unix_options_build (&options, sockfile);
+ if (ret)
+ goto dealloc_dict;
+
+ rpc = rpcsvc_init (this, this->ctx, options, 8);
+ if (rpc == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_RPC_START_ERROR,
+ "failed to init rpc");
+ goto dealloc_dict;
+ }
+
+ ret = rpcsvc_register_notify (rpc, fn, cbkdata);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_NOTIFY_REGISTER_FAILED,
+ "failed to register notify function");
+ goto dealloc_rpc;
+ }
+
+ ret = rpcsvc_create_listeners (rpc, options, this->name);
+ if (ret != 1) {
+ gf_msg_debug (this->name,
+ 0, "failed to create listeners");
+ goto dealloc_rpc;
+ }
+
+ while (*progs) {
+ prog = *progs;
+ ret = rpcsvc_program_register (rpc, prog);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_PROGRAM_NAME_REG_FAILED,
+ "cannot register program "
+ "(name: %s, prognum: %d, pogver: %d)",
+ prog->progname, prog->prognum, prog->progver);
+ goto dealloc_rpc;
+ }
+
+ progs++;
+ }
+
+ dict_unref (options);
+ return rpc;
+
+ dealloc_rpc:
+ GF_FREE (rpc);
+ dealloc_dict:
+ dict_unref (options);
+ error_return:
+ return NULL;
+}
diff --git a/xlators/features/changelog/src/changelog-rpc-common.h b/xlators/features/changelog/src/changelog-rpc-common.h
new file mode 100644
index 00000000000..95c850c9400
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-rpc-common.h
@@ -0,0 +1,84 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __CHANGELOG_RPC_COMMON_H
+#define __CHANGELOG_RPC_COMMON_H
+
+#include "rpcsvc.h"
+#include "rpc-clnt.h"
+#include "event.h"
+#include "call-stub.h"
+
+#include "changelog-xdr.h"
+#include "xdr-generic.h"
+
+#include "changelog.h"
+
+/**
+ * Let's keep this non-configurable for now.
+ */
+#define NR_ROTT_BUFFS 4
+#define NR_DISPATCHERS (NR_ROTT_BUFFS - 1)
+
+enum changelog_rpc_procnum {
+ CHANGELOG_RPC_PROC_NULL = 0,
+ CHANGELOG_RPC_PROBE_FILTER = 1,
+ CHANGELOG_RPC_PROC_MAX = 2,
+};
+
+#define CHANGELOG_RPC_PROGNUM 1885957735
+#define CHANGELOG_RPC_PROGVER 1
+
+/**
+ * reverse connection: data xfer path
+ */
+enum changelog_reverse_rpc_procnum {
+ CHANGELOG_REV_PROC_NULL = 0,
+ CHANGELOG_REV_PROC_EVENT = 1,
+ CHANGELOG_REV_PROC_MAX = 2,
+};
+
+#define CHANGELOG_REV_RPC_PROCNUM 1886350951
+#define CHANGELOG_REV_RPC_PROCVER 1
+
+typedef struct changelog_rpc {
+ rpcsvc_t *svc;
+ struct rpc_clnt *rpc;
+ char sock[UNIX_PATH_MAX]; /* tied to server */
+} changelog_rpc_t;
+
+/* event poller */
+void *changelog_rpc_poller (void *);
+
+/* CLIENT API */
+struct rpc_clnt *
+changelog_rpc_client_init (xlator_t *, void *, char *, rpc_clnt_notify_t);
+
+int
+changelog_rpc_sumbit_req (struct rpc_clnt *, void *, call_frame_t *,
+ rpc_clnt_prog_t *, int , struct iovec *, int,
+ struct iobref *, xlator_t *, fop_cbk_fn_t, xdrproc_t);
+
+int
+changelog_invoke_rpc (xlator_t *, struct rpc_clnt *,
+ rpc_clnt_prog_t *, int , void *);
+
+/* SERVER API */
+int
+changelog_rpc_sumbit_reply (rpcsvc_request_t *, void *,
+ struct iovec *, int, struct iobref *, xdrproc_t);
+rpcsvc_t *
+changelog_rpc_server_init (xlator_t *, char *, void*,
+ rpcsvc_notify_t, struct rpcsvc_program **);
+void
+changelog_rpc_server_destroy (xlator_t *, rpcsvc_t *, char *,
+ rpcsvc_notify_t, struct rpcsvc_program **);
+
+#endif
diff --git a/xlators/features/changelog/src/changelog-rpc.c b/xlators/features/changelog/src/changelog-rpc.c
new file mode 100644
index 00000000000..4bc24203118
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-rpc.c
@@ -0,0 +1,305 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "changelog-rpc.h"
+#include "changelog-mem-types.h"
+#include "changelog-ev-handle.h"
+
+struct rpcsvc_program *changelog_programs[];
+
+static void
+changelog_cleanup_dispatchers (xlator_t *this,
+ changelog_priv_t *priv, int count)
+{
+ for (; count >= 0; count--) {
+ (void) changelog_thread_cleanup
+ (this, priv->ev_dispatcher[count]);
+ }
+}
+
+static int
+changelog_cleanup_rpc_threads (xlator_t *this, changelog_priv_t *priv)
+{
+ int ret = 0;
+ changelog_clnt_t *conn = NULL;
+
+ conn = &priv->connections;
+ if (!conn)
+ return 0;
+
+ /** terminate RPC thread(s) */
+ ret = changelog_thread_cleanup (this, priv->connector);
+ if (ret != 0)
+ goto error_return;
+ /** terminate dispatcher thread(s) */
+ changelog_cleanup_dispatchers (this, priv, priv->nr_dispatchers);
+
+ /* TODO: what about pending and waiting connections? */
+ changelog_ev_cleanup_connections (this, conn);
+
+ /* destroy locks */
+ ret = pthread_mutex_destroy (&conn->pending_lock);
+ if (ret != 0)
+ goto error_return;
+ ret = pthread_cond_destroy (&conn->pending_cond);
+ if (ret != 0)
+ goto error_return;
+ ret = LOCK_DESTROY (&conn->active_lock);
+ if (ret != 0)
+ goto error_return;
+ ret = LOCK_DESTROY (&conn->wait_lock);
+ if (ret != 0)
+ goto error_return;
+ return 0;
+
+ error_return:
+ return -1;
+}
+
+static int
+changelog_init_rpc_threads (xlator_t *this, changelog_priv_t *priv,
+ rbuf_t *rbuf, int nr_dispatchers)
+{
+ int j = 0;
+ int ret = 0;
+ changelog_clnt_t *conn = NULL;
+
+ conn = &priv->connections;
+
+ conn->this = this;
+ conn->rbuf = rbuf;
+ conn->sequence = 1; /* start with sequence number one */
+
+ INIT_LIST_HEAD (&conn->pending);
+ INIT_LIST_HEAD (&conn->active);
+ INIT_LIST_HEAD (&conn->waitq);
+
+ ret = pthread_mutex_init (&conn->pending_lock, NULL);
+ if (ret)
+ goto error_return;
+ ret = pthread_cond_init (&conn->pending_cond, NULL);
+ if (ret)
+ goto cleanup_pending_lock;
+
+ ret = LOCK_INIT (&conn->active_lock);
+ if (ret)
+ goto cleanup_pending_cond;
+ ret = LOCK_INIT (&conn->wait_lock);
+ if (ret)
+ goto cleanup_active_lock;
+
+ /* spawn reverse connection thread */
+ ret = pthread_create (&priv->connector,
+ NULL, changelog_ev_connector, conn);
+ if (ret != 0)
+ goto cleanup_wait_lock;
+
+ /* spawn dispatcher thread(s) */
+ priv->ev_dispatcher = GF_CALLOC (nr_dispatchers, sizeof(pthread_t),
+ gf_changelog_mt_ev_dispatcher_t);
+ if (!priv->ev_dispatcher)
+ goto cleanup_connector;
+
+ /* spawn dispatcher threads */
+ for (; j < nr_dispatchers; j++) {
+ ret = pthread_create (&priv->ev_dispatcher[j],
+ NULL, changelog_ev_dispatch, conn);
+ if (ret != 0) {
+ changelog_cleanup_dispatchers (this, priv, --j);
+ break;
+ }
+ }
+
+ if (ret != 0)
+ goto cleanup_connector;
+
+ priv->nr_dispatchers = nr_dispatchers;
+ return 0;
+
+ cleanup_connector:
+ (void) pthread_cancel (priv->connector);
+ cleanup_wait_lock:
+ LOCK_DESTROY (&conn->wait_lock);
+ cleanup_active_lock:
+ LOCK_DESTROY (&conn->active_lock);
+ cleanup_pending_cond:
+ (void) pthread_cond_destroy (&conn->pending_cond);
+ cleanup_pending_lock:
+ (void) pthread_mutex_destroy (&conn->pending_lock);
+ error_return:
+ return -1;
+}
+
+int
+changelog_rpcsvc_notify (rpcsvc_t *rpc,
+ void *xl, rpcsvc_event_t event, void *data)
+{
+ return 0;
+}
+
+void
+changelog_destroy_rpc_listner (xlator_t *this, changelog_priv_t *priv)
+{
+ char sockfile[UNIX_PATH_MAX] = {0,};
+
+ /* sockfile path could have been saved to avoid this */
+ CHANGELOG_MAKE_SOCKET_PATH (priv->changelog_brick,
+ sockfile, UNIX_PATH_MAX);
+ changelog_rpc_server_destroy (this,
+ priv->rpc, sockfile,
+ changelog_rpcsvc_notify,
+ changelog_programs);
+ (void) changelog_cleanup_rpc_threads (this, priv);
+}
+
+rpcsvc_t *
+changelog_init_rpc_listner (xlator_t *this, changelog_priv_t *priv,
+ rbuf_t *rbuf, int nr_dispatchers)
+{
+ int ret = 0;
+ char sockfile[UNIX_PATH_MAX] = {0,};
+
+ ret = changelog_init_rpc_threads (this, priv, rbuf, nr_dispatchers);
+ if (ret)
+ return NULL;
+
+ CHANGELOG_MAKE_SOCKET_PATH (priv->changelog_brick,
+ sockfile, UNIX_PATH_MAX);
+ return changelog_rpc_server_init (this, sockfile, NULL,
+ changelog_rpcsvc_notify,
+ changelog_programs);
+}
+
+void
+changelog_rpc_clnt_cleanup (changelog_rpc_clnt_t *crpc)
+{
+ if (!crpc)
+ return;
+ crpc->c_clnt = NULL;
+ LOCK_DESTROY (&crpc->lock);
+ GF_FREE (crpc);
+}
+
+static changelog_rpc_clnt_t *
+changelog_rpc_clnt_init (xlator_t *this,
+ changelog_probe_req *rpc_req, changelog_clnt_t *c_clnt)
+{
+ int ret = 0;
+ changelog_rpc_clnt_t *crpc = NULL;
+
+ crpc = GF_CALLOC (1, sizeof (*crpc), gf_changelog_mt_rpc_clnt_t);
+ if (!crpc)
+ goto error_return;
+ INIT_LIST_HEAD (&crpc->list);
+
+ /* Take a ref, the last unref will be on RPC_CLNT_DESTROY
+ * which comes as a result of last rpc_clnt_unref.
+ */
+ crpc->ref = 1;
+ changelog_set_disconnect_flag (crpc, _gf_false);
+
+ crpc->filter = rpc_req->filter;
+ (void) memcpy (crpc->sock, rpc_req->sock, strlen (rpc_req->sock));
+
+ crpc->this = this;
+ crpc->c_clnt = c_clnt;
+ crpc->cleanup = changelog_rpc_clnt_cleanup;
+
+ ret = LOCK_INIT (&crpc->lock);
+ if (ret != 0)
+ goto dealloc_crpc;
+ return crpc;
+
+ dealloc_crpc:
+ GF_FREE (crpc);
+ error_return:
+ return NULL;
+}
+
+/**
+ * Actor declarations
+ */
+
+/**
+ * @probe_handler
+ * A probe RPC call spawns a connect back to the caller. Caller also
+ * passes an hint which acts as a filter for selecting updates.
+ */
+
+int
+changelog_handle_probe (rpcsvc_request_t *req)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+ rpcsvc_t *svc = NULL;
+ changelog_priv_t *priv = NULL;
+ changelog_clnt_t *c_clnt = NULL;
+ changelog_rpc_clnt_t *crpc = NULL;
+
+ changelog_probe_req rpc_req = {0,};
+ changelog_probe_rsp rpc_rsp = {0,};
+
+ ret = xdr_to_generic (req->msg[0],
+ &rpc_req, (xdrproc_t)xdr_changelog_probe_req);
+ if (ret < 0) {
+ gf_msg ("", GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_HANDLE_PROBE_ERROR,
+ "xdr decoding error");
+ req->rpc_err = GARBAGE_ARGS;
+ goto handle_xdr_error;
+ }
+
+ /* ->xl hidden in rpcsvc */
+ svc = rpcsvc_request_service (req);
+ this = svc->xl;
+ priv = this->private;
+ c_clnt = &priv->connections;
+
+ crpc = changelog_rpc_clnt_init (this, &rpc_req, c_clnt);
+ if (!crpc)
+ goto handle_xdr_error;
+
+ changelog_ev_queue_connection (c_clnt, crpc);
+ rpc_rsp.op_ret = 0;
+
+ goto submit_rpc;
+
+ handle_xdr_error:
+ rpc_rsp.op_ret = -1;
+ submit_rpc:
+ (void) changelog_rpc_sumbit_reply (req, &rpc_rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_changelog_probe_rsp);
+ return 0;
+}
+
+/**
+ * RPC declarations
+ */
+
+rpcsvc_actor_t changelog_svc_actors[CHANGELOG_RPC_PROC_MAX] = {
+ [CHANGELOG_RPC_PROBE_FILTER] = {
+ "CHANGELOG PROBE FILTER", CHANGELOG_RPC_PROBE_FILTER,
+ changelog_handle_probe, NULL, 0, DRC_NA
+ },
+};
+
+struct rpcsvc_program changelog_svc_prog = {
+ .progname = CHANGELOG_RPC_PROGNAME,
+ .prognum = CHANGELOG_RPC_PROGNUM,
+ .progver = CHANGELOG_RPC_PROGVER,
+ .numactors = CHANGELOG_RPC_PROC_MAX,
+ .actors = changelog_svc_actors,
+ .synctask = _gf_true,
+};
+
+struct rpcsvc_program *changelog_programs[] = {
+ &changelog_svc_prog,
+ NULL,
+};
diff --git a/xlators/features/changelog/src/changelog-rpc.h b/xlators/features/changelog/src/changelog-rpc.h
new file mode 100644
index 00000000000..0df96684b6c
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-rpc.h
@@ -0,0 +1,29 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __CHANGELOG_RPC_H
+#define __CHANGELOG_RPC_H
+
+#include "xlator.h"
+#include "changelog-helpers.h"
+
+/* one time */
+#include "socket.h"
+#include "changelog-rpc-common.h"
+
+#define CHANGELOG_RPC_PROGNAME "GlusterFS Changelog"
+
+rpcsvc_t *
+changelog_init_rpc_listner (xlator_t *, changelog_priv_t *, rbuf_t *, int);
+
+void
+changelog_destroy_rpc_listner (xlator_t *, changelog_priv_t *);
+
+#endif
diff --git a/xlators/features/changelog/src/changelog-rt.c b/xlators/features/changelog/src/changelog-rt.c
new file mode 100644
index 00000000000..c262820c64c
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-rt.c
@@ -0,0 +1,67 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+#include "logging.h"
+
+#include "changelog-rt.h"
+#include "changelog-mem-types.h"
+
+int
+changelog_rt_init (xlator_t *this, changelog_dispatcher_t *cd)
+{
+ changelog_rt_t *crt = NULL;
+
+ crt = GF_CALLOC (1, sizeof (*crt),
+ gf_changelog_mt_rt_t);
+ if (!crt)
+ return -1;
+
+ LOCK_INIT (&crt->lock);
+
+ cd->cd_data = crt;
+ cd->dispatchfn = &changelog_rt_enqueue;
+
+ return 0;
+}
+
+int
+changelog_rt_fini (xlator_t *this, changelog_dispatcher_t *cd)
+{
+ changelog_rt_t *crt = NULL;
+
+ crt = cd->cd_data;
+
+ LOCK_DESTROY (&crt->lock);
+ GF_FREE (crt);
+
+ return 0;
+}
+
+int
+changelog_rt_enqueue (xlator_t *this, changelog_priv_t *priv, void *cbatch,
+ changelog_log_data_t *cld_0, changelog_log_data_t *cld_1)
+{
+ int ret = 0;
+ changelog_rt_t *crt = NULL;
+
+ crt = (changelog_rt_t *) cbatch;
+
+ LOCK (&crt->lock);
+ {
+ ret = changelog_handle_change (this, priv, cld_0);
+ if (!ret && cld_1)
+ ret = changelog_handle_change (this, priv, cld_1);
+ }
+ UNLOCK (&crt->lock);
+
+ return ret;
+}
diff --git a/xlators/features/changelog/src/changelog-rt.h b/xlators/features/changelog/src/changelog-rt.h
new file mode 100644
index 00000000000..1fc2bbc5bb9
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-rt.h
@@ -0,0 +1,33 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CHANGELOG_RT_H
+#define _CHANGELOG_RT_H
+
+#include "locking.h"
+#include "timer.h"
+#include "pthread.h"
+
+#include "changelog-helpers.h"
+
+/* unused as of now - may be you would need it later */
+typedef struct changelog_rt {
+ gf_lock_t lock;
+} changelog_rt_t;
+
+int
+changelog_rt_init (xlator_t *this, changelog_dispatcher_t *cd);
+int
+changelog_rt_fini (xlator_t *this, changelog_dispatcher_t *cd);
+int
+changelog_rt_enqueue (xlator_t *this, changelog_priv_t *priv, void *cbatch,
+ changelog_log_data_t *cld_0, changelog_log_data_t *cld_1);
+
+#endif /* _CHANGELOG_RT_H */
diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c
new file mode 100644
index 00000000000..f8f95cf0e81
--- /dev/null
+++ b/xlators/features/changelog/src/changelog.c
@@ -0,0 +1,2988 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+#include "syscall.h"
+#include "logging.h"
+#include "iobuf.h"
+
+#include "changelog-rt.h"
+
+#include "changelog-encoders.h"
+#include "changelog-mem-types.h"
+#include "changelog-messages.h"
+
+#include <pthread.h>
+
+#include "changelog-rpc.h"
+#include "errno.h"
+
+static struct changelog_bootstrap
+cb_bootstrap[] = {
+ {
+ .mode = CHANGELOG_MODE_RT,
+ .ctor = changelog_rt_init,
+ .dtor = changelog_rt_fini,
+ },
+};
+
+/* Entry operations - TYPE III */
+
+/**
+ * entry operations do not undergo inode version checking.
+ */
+
+/* {{{ */
+
+/* rmdir */
+
+int32_t
+changelog_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY);
+
+ unwind:
+ changelog_dec_fop_cnt (this, priv, local);
+ CHANGELOG_STACK_UNWIND (rmdir, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+changelog_rmdir_resume (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int xflags, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ gf_msg_debug (this->name, 0, "Dequeue rmdir");
+ changelog_color_fop_and_inc_cnt (this, priv,
+ frame->local);
+ STACK_WIND (frame, changelog_rmdir_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->rmdir,
+ loc, xflags, xdata);
+ return 0;
+}
+
+int32_t
+changelog_rmdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int xflags, dict_t *xdata)
+{
+ size_t xtra_len = 0;
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+ call_stub_t *stub = NULL;
+ struct list_head queue = {0, };
+ gf_boolean_t barrier_enabled = _gf_false;
+
+ INIT_LIST_HEAD (&queue);
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ CHANGELOG_INIT_NOCHECK (this, frame->local,
+ NULL, loc->inode->gfid, 2);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+
+ co++;
+ if (priv->capture_del_path) {
+ CHANGELOG_FILL_ENTRY_DIR_PATH (co, loc->pargfid, loc->name,
+ del_entry_fn, del_entry_free_fn,
+ xtra_len, wind, _gf_true);
+ } else {
+ CHANGELOG_FILL_ENTRY_DIR_PATH (co, loc->pargfid, loc->name,
+ del_entry_fn, del_entry_free_fn,
+ xtra_len, wind, _gf_false);
+ }
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 2);
+
+/* changelog barrier */
+ /* Color assignment and increment of fop_cnt for rmdir/unlink/rename
+ * should be made with in priv lock if changelog barrier is not enabled.
+ * Because if counter is not incremented yet, draining wakes up and
+ * publishes the changelog but later these fops might hit the disk and
+ * present in snapped volume but where as the intention is these fops
+ * should not be present in snapped volume.
+ */
+ LOCK (&priv->lock);
+ {
+ if ((barrier_enabled = priv->barrier_enabled)) {
+ stub = fop_rmdir_stub (frame, changelog_rmdir_resume,
+ loc, xflags, xdata);
+ if (!stub)
+ __chlog_barrier_disable (this, &queue);
+ else
+ __chlog_barrier_enqueue (this, stub);
+ } else {
+ ((changelog_local_t *)frame->local)->color
+ = priv->current_color;
+ changelog_inc_fop_cnt (this, priv, frame->local);
+ }
+ }
+ UNLOCK (&priv->lock);
+
+ if (barrier_enabled && stub) {
+ gf_msg_debug (this->name, 0, "Enqueue rmdir");
+ goto out;
+ }
+ if (barrier_enabled && !stub) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ CHANGELOG_MSG_NO_MEMORY,
+ "Failed to barrier FOPs, disabling changelog barrier "
+ "FOP: rmdir");
+ chlog_barrier_dequeue_all (this, &queue);
+ }
+
+/* changelog barrier */
+
+ wind:
+ STACK_WIND (frame, changelog_rmdir_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->rmdir,
+ loc, xflags, xdata);
+ out:
+ return 0;
+}
+
+/* unlink */
+
+int32_t
+changelog_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY);
+
+ unwind:
+ changelog_dec_fop_cnt (this, priv, local);
+ CHANGELOG_STACK_UNWIND (unlink, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+changelog_unlink_resume (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int xflags, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ gf_msg_debug (this->name, 0, "Dequeue unlink");
+ changelog_color_fop_and_inc_cnt
+ (this, priv, frame->local);
+ STACK_WIND (frame, changelog_unlink_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->unlink,
+ loc, xflags, xdata);
+ return 0;
+}
+
+int32_t
+changelog_unlink (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int xflags, dict_t *xdata)
+{
+ size_t xtra_len = 0;
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+ call_stub_t *stub = NULL;
+ struct list_head queue = {0, };
+ gf_boolean_t barrier_enabled = _gf_false;
+ dht_changelog_rename_info_t *info = NULL;
+ int ret = 0;
+ char old_name[NAME_MAX] = {0};
+ char new_name[NAME_MAX] = {0};
+ char *nname = NULL;
+
+ INIT_LIST_HEAD (&queue);
+ priv = this->private;
+
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ ret = dict_get_bin (xdata, DHT_CHANGELOG_RENAME_OP_KEY, (void **)&info);
+ if (!ret) { /* special case: unlink considered as rename */
+ /* 3 == fop + oldloc + newloc */
+ CHANGELOG_INIT_NOCHECK (this, frame->local,
+ NULL, loc->inode->gfid, 3);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, GF_FOP_RENAME, fop_fn, xtra_len);
+
+ co++;
+ strncpy (old_name, info->buffer, info->oldname_len);
+ CHANGELOG_FILL_ENTRY (co, info->old_pargfid, old_name,
+ entry_fn, entry_free_fn, xtra_len, wind);
+
+ co++;
+ /* new name resides just after old name */
+ nname = info->buffer + info->oldname_len;
+ strncpy (new_name, nname, info->newname_len);
+ CHANGELOG_FILL_ENTRY (co, info->new_pargfid, new_name,
+ entry_fn, entry_free_fn, xtra_len, wind);
+
+ changelog_set_usable_record_and_length (frame->local,
+ xtra_len, 3);
+ } else { /* default unlink */
+ CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, wind);
+ CHANGELOG_INIT_NOCHECK (this, frame->local, NULL,
+ loc->inode->gfid, 2);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op,
+ fop_fn, xtra_len);
+
+ co++;
+ if (priv->capture_del_path) {
+ CHANGELOG_FILL_ENTRY_DIR_PATH (co, loc->pargfid,
+ loc->name, del_entry_fn, del_entry_free_fn,
+ xtra_len, wind, _gf_true);
+ } else {
+ CHANGELOG_FILL_ENTRY_DIR_PATH (co, loc->pargfid,
+ loc->name, del_entry_fn, del_entry_free_fn,
+ xtra_len, wind, _gf_false);
+ }
+
+ changelog_set_usable_record_and_length (frame->local,
+ xtra_len, 2);
+ }
+
+/* changelog barrier */
+ LOCK (&priv->lock);
+ {
+ if ((barrier_enabled = priv->barrier_enabled)) {
+ stub = fop_unlink_stub (frame, changelog_unlink_resume,
+ loc, xflags, xdata);
+ if (!stub)
+ __chlog_barrier_disable (this, &queue);
+ else
+ __chlog_barrier_enqueue (this, stub);
+ } else {
+ ((changelog_local_t *)frame->local)->color
+ = priv->current_color;
+ changelog_inc_fop_cnt (this, priv, frame->local);
+ }
+ }
+ UNLOCK (&priv->lock);
+
+ if (barrier_enabled && stub) {
+ gf_msg_debug (this->name, 0, "Enqueue unlink");
+ goto out;
+ }
+ if (barrier_enabled && !stub) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ CHANGELOG_MSG_NO_MEMORY,
+ "Failed to barrier FOPs, disabling changelog barrier "
+ "FOP: unlink");
+ chlog_barrier_dequeue_all (this, &queue);
+ }
+
+/* changelog barrier */
+
+ wind:
+ STACK_WIND (frame, changelog_unlink_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->unlink,
+ loc, xflags, xdata);
+ out:
+ return 0;
+}
+
+/* rename */
+
+int32_t
+changelog_rename_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *buf, struct iatt *preoldparent,
+ struct iatt *postoldparent, struct iatt *prenewparent,
+ struct iatt *postnewparent, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+ changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY);
+ unwind:
+ changelog_dec_fop_cnt (this, priv, local);
+ CHANGELOG_STACK_UNWIND (rename, frame, op_ret, op_errno,
+ buf, preoldparent, postoldparent,
+ prenewparent, postnewparent, xdata);
+ return 0;
+}
+
+int32_t
+changelog_rename_resume (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ gf_msg_debug (this->name, 0, "Dequeue rename");
+ changelog_color_fop_and_inc_cnt
+ (this, priv, frame->local);
+ STACK_WIND (frame, changelog_rename_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->rename,
+ oldloc, newloc, xdata);
+ return 0;
+}
+
+int32_t
+changelog_rename (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ size_t xtra_len = 0;
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+ call_stub_t *stub = NULL;
+ struct list_head queue = {0, };
+ gf_boolean_t barrier_enabled = _gf_false;
+ dht_changelog_rename_info_t *info = NULL;
+ int ret = 0;
+
+ INIT_LIST_HEAD (&queue);
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ ret = dict_get_bin (xdata, DHT_CHANGELOG_RENAME_OP_KEY, (void **)&info);
+ if (ret && oldloc->inode->ia_type != IA_IFDIR) {
+ /* xdata "NOT" set for a non-directory,
+ * Special rename => avoid logging */
+ goto wind;
+ }
+
+ /* 3 == fop + oldloc + newloc */
+ CHANGELOG_INIT_NOCHECK (this, frame->local,
+ NULL, oldloc->inode->gfid, 3);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+
+ co++;
+ CHANGELOG_FILL_ENTRY (co, oldloc->pargfid, oldloc->name,
+ entry_fn, entry_free_fn, xtra_len, wind);
+
+ co++;
+ CHANGELOG_FILL_ENTRY (co, newloc->pargfid, newloc->name,
+ entry_fn, entry_free_fn, xtra_len, wind);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 3);
+/* changelog barrier */
+ LOCK (&priv->lock);
+ {
+ if ((barrier_enabled = priv->barrier_enabled)) {
+ stub = fop_rename_stub (frame, changelog_rename_resume,
+ oldloc, newloc, xdata);
+ if (!stub)
+ __chlog_barrier_disable (this, &queue);
+ else
+ __chlog_barrier_enqueue (this, stub);
+ } else {
+ ((changelog_local_t *)frame->local)->color
+ = priv->current_color;
+ changelog_inc_fop_cnt (this, priv, frame->local);
+ }
+ }
+ UNLOCK (&priv->lock);
+
+ if (barrier_enabled && stub) {
+ gf_msg_debug (this->name, 0, "Enqueue rename");
+ goto out;
+ }
+ if (barrier_enabled && !stub) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ CHANGELOG_MSG_NO_MEMORY,
+ "Failed to barrier FOPs, disabling changelog barrier "
+ "FOP: rename");
+ chlog_barrier_dequeue_all (this, &queue);
+ }
+/* changelog barrier */
+
+ wind:
+ STACK_WIND (frame, changelog_rename_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->rename,
+ oldloc, newloc, xdata);
+ out:
+ return 0;
+}
+
+/* link */
+
+int32_t
+changelog_link_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY);
+
+ unwind:
+ changelog_dec_fop_cnt (this, priv, local);
+ CHANGELOG_STACK_UNWIND (link, frame, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+changelog_link_resume (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("changelog", this, out);
+ GF_VALIDATE_OR_GOTO ("changelog", this->fops, out);
+ GF_VALIDATE_OR_GOTO ("changelog", frame, out);
+
+ priv = this->private;
+
+ gf_msg_debug (this->name, 0, "Dequeuing link");
+ changelog_color_fop_and_inc_cnt
+ (this, priv, frame->local);
+ STACK_WIND (frame, changelog_link_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->link,
+ oldloc, newloc, xdata);
+ return 0;
+out:
+ return -1;
+}
+int32_t
+changelog_link (call_frame_t *frame,
+ xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ size_t xtra_len = 0;
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+ call_stub_t *stub = NULL;
+ struct list_head queue = {0, };
+ gf_boolean_t barrier_enabled = _gf_false;
+
+ priv = this->private;
+
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+ CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, wind);
+
+ CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, oldloc->gfid, 2);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+
+ co++;
+ CHANGELOG_FILL_ENTRY (co, newloc->pargfid, newloc->name,
+ entry_fn, entry_free_fn, xtra_len, wind);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 2);
+
+ LOCK (&priv->lock);
+ {
+ if ((barrier_enabled = priv->barrier_enabled)) {
+ stub = fop_link_stub (frame, changelog_link_resume,
+ oldloc, newloc, xdata);
+ if (!stub)
+ __chlog_barrier_disable (this, &queue);
+ else
+ __chlog_barrier_enqueue (this, stub);
+ } else {
+ ((changelog_local_t *)frame->local)->color
+ = priv->current_color;
+ changelog_inc_fop_cnt (this, priv, frame->local);
+ }
+ }
+ UNLOCK (&priv->lock);
+
+ if (barrier_enabled && stub) {
+ gf_msg_debug (this->name, 0, "Enqueued link");
+ goto out;
+ }
+
+ if (barrier_enabled && !stub) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_NO_MEMORY,
+ "Failed to barrier FOPs, disabling changelog barrier "
+ "FOP: link");
+ chlog_barrier_dequeue_all (this, &queue);
+ }
+ wind:
+ STACK_WIND (frame, changelog_link_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->link,
+ oldloc, newloc, xdata);
+out:
+ return 0;
+}
+
+/* mkdir */
+
+int32_t
+changelog_mkdir_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY);
+
+ unwind:
+ changelog_dec_fop_cnt (this, priv, local);
+ CHANGELOG_STACK_UNWIND (mkdir, frame, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+changelog_mkdir_resume (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("changelog", this, out);
+ GF_VALIDATE_OR_GOTO ("changelog", this->fops, out);
+ GF_VALIDATE_OR_GOTO ("changelog", frame, out);
+
+ priv = this->private;
+
+ gf_msg_debug (this->name, 0, "Dequeuing mkdir");
+ changelog_color_fop_and_inc_cnt
+ (this, priv, frame->local);
+ STACK_WIND (frame, changelog_mkdir_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->mkdir,
+ loc, mode, umask, xdata);
+ return 0;
+out:
+ return -1;
+}
+
+int32_t
+changelog_mkdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
+{
+ int ret = -1;
+ uuid_t gfid = {0,};
+ void *uuid_req = NULL;
+ size_t xtra_len = 0;
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+ call_stub_t *stub = NULL;
+ struct list_head queue = {0, };
+ gf_boolean_t barrier_enabled = _gf_false;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ ret = dict_get_ptr (xdata, "gfid-req", &uuid_req);
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "failed to get gfid from dict");
+ goto wind;
+ }
+ gf_uuid_copy (gfid, uuid_req);
+
+ CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, gfid, 5);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_UINT32 (co, S_IFDIR | mode, number_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_UINT32 (co, frame->root->uid, number_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_UINT32 (co, frame->root->gid, number_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_ENTRY (co, loc->pargfid, loc->name,
+ entry_fn, entry_free_fn, xtra_len, wind);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 5);
+
+ LOCK (&priv->lock);
+ {
+ if ((barrier_enabled = priv->barrier_enabled)) {
+ stub = fop_mkdir_stub (frame, changelog_mkdir_resume,
+ loc, mode, umask, xdata);
+ if (!stub)
+ __chlog_barrier_disable (this, &queue);
+ else
+ __chlog_barrier_enqueue (this, stub);
+ } else {
+ ((changelog_local_t *)frame->local)->color
+ = priv->current_color;
+ changelog_inc_fop_cnt (this, priv, frame->local);
+ }
+ }
+ UNLOCK (&priv->lock);
+
+ if (barrier_enabled && stub) {
+ gf_msg_debug (this->name, 0, "Enqueued mkdir");
+ goto out;
+ }
+
+ if (barrier_enabled && !stub) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ CHANGELOG_MSG_NO_MEMORY,
+ "Failed to barrier FOPs, disabling changelog barrier "
+ "FOP: mkdir");
+ chlog_barrier_dequeue_all (this, &queue);
+ }
+
+ wind:
+ STACK_WIND (frame, changelog_mkdir_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->mkdir,
+ loc, mode, umask, xdata);
+out:
+ return 0;
+}
+
+/* symlink */
+
+int32_t
+changelog_symlink_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY);
+
+ unwind:
+ changelog_dec_fop_cnt (this, priv, local);
+ CHANGELOG_STACK_UNWIND (symlink, frame, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata);
+ return 0;
+}
+
+
+int32_t
+changelog_symlink_resume (call_frame_t *frame, xlator_t *this,
+ const char *linkname, loc_t *loc,
+ mode_t umask, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("changelog", this, out);
+ GF_VALIDATE_OR_GOTO ("changelog", this->fops, out);
+ GF_VALIDATE_OR_GOTO ("changelog", frame, out);
+
+ priv = this->private;
+
+ gf_msg_debug (this->name, 0, "Dequeuing symlink");
+ changelog_color_fop_and_inc_cnt
+ (this, priv, frame->local);
+ STACK_WIND (frame, changelog_symlink_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->symlink,
+ linkname, loc, umask, xdata);
+ return 0;
+out:
+ return -1;
+}
+
+int32_t
+changelog_symlink (call_frame_t *frame, xlator_t *this,
+ const char *linkname, loc_t *loc,
+ mode_t umask, dict_t *xdata)
+{
+ int ret = -1;
+ size_t xtra_len = 0;
+ uuid_t gfid = {0,};
+ void *uuid_req = NULL;
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+ call_stub_t *stub = NULL;
+ struct list_head queue = {0, };
+ gf_boolean_t barrier_enabled = _gf_false;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ ret = dict_get_ptr (xdata, "gfid-req", &uuid_req);
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "failed to get gfid from dict");
+ goto wind;
+ }
+ gf_uuid_copy (gfid, uuid_req);
+
+ CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, gfid, 2);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_ENTRY (co, loc->pargfid, loc->name,
+ entry_fn, entry_free_fn, xtra_len, wind);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 2);
+
+ LOCK (&priv->lock);
+ {
+ if ((barrier_enabled = priv->barrier_enabled)) {
+ stub = fop_symlink_stub (frame,
+ changelog_symlink_resume,
+ linkname, loc, umask, xdata);
+ if (!stub)
+ __chlog_barrier_disable (this, &queue);
+ else
+ __chlog_barrier_enqueue (this, stub);
+ } else {
+ ((changelog_local_t *)frame->local)->color
+ = priv->current_color;
+ changelog_inc_fop_cnt (this, priv, frame->local);
+ }
+ }
+ UNLOCK (&priv->lock);
+
+ if (barrier_enabled && stub) {
+ gf_msg_debug (this->name, 0, "Enqueued symlink");
+ goto out;
+ }
+
+ if (barrier_enabled && !stub) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ CHANGELOG_MSG_NO_MEMORY,
+ "Failed to barrier FOPs, disabling changelog barrier "
+ "FOP: symlink");
+ chlog_barrier_dequeue_all (this, &queue);
+ }
+
+ wind:
+ STACK_WIND (frame, changelog_symlink_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->symlink,
+ linkname, loc, umask, xdata);
+out:
+ return 0;
+}
+
+/* mknod */
+
+int32_t
+changelog_mknod_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY);
+
+ unwind:
+ changelog_dec_fop_cnt (this, priv, local);
+ CHANGELOG_STACK_UNWIND (mknod, frame, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+changelog_mknod_resume (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, dev_t rdev,
+ mode_t umask, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("changelog", this, out);
+ GF_VALIDATE_OR_GOTO ("changelog", this->fops, out);
+ GF_VALIDATE_OR_GOTO ("changelog", frame, out);
+
+ priv = this->private;
+
+ gf_msg_debug (this->name, 0, "Dequeuing mknod");
+ changelog_color_fop_and_inc_cnt
+ (this, priv, frame->local);
+ STACK_WIND (frame, changelog_mknod_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->mknod,
+ loc, mode, rdev, umask, xdata);
+ return 0;
+out:
+ return -1;
+}
+
+int32_t
+changelog_mknod (call_frame_t *frame,
+ xlator_t *this, loc_t *loc,
+ mode_t mode, dev_t dev, mode_t umask, dict_t *xdata)
+{
+ int ret = -1;
+ uuid_t gfid = {0,};
+ void *uuid_req = NULL;
+ size_t xtra_len = 0;
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+ call_stub_t *stub = NULL;
+ struct list_head queue = {0, };
+ gf_boolean_t barrier_enabled = _gf_false;
+
+ priv = this->private;
+
+ /* Check whether changelog active */
+ if (!(priv->active))
+ goto wind;
+
+ /* Check whether rebalance activity */
+ if (frame->root->pid == GF_CLIENT_PID_DEFRAG)
+ goto wind;
+
+ /* If tier-dht linkto is SET, ignore about verifiying :
+ * 1. Whether internal fop AND
+ * 2. Whether tier rebalance process activity (this will help in
+ * recording mknod if tier rebalance process calls this mknod) */
+ if (!(dict_get (xdata, "trusted.tier.tier-dht.linkto"))) {
+ CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, wind);
+ if (frame->root->pid == GF_CLIENT_PID_TIER_DEFRAG)
+ goto wind;
+ }
+
+ ret = dict_get_ptr (xdata, "gfid-req", &uuid_req);
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "failed to get gfid from dict");
+ goto wind;
+ }
+ gf_uuid_copy (gfid, uuid_req);
+
+ CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, gfid, 5);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_UINT32 (co, mode, number_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_UINT32 (co, frame->root->uid, number_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_UINT32 (co, frame->root->gid, number_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_ENTRY (co, loc->pargfid, loc->name,
+ entry_fn, entry_free_fn, xtra_len, wind);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 5);
+
+ LOCK (&priv->lock);
+ {
+ if ((barrier_enabled = priv->barrier_enabled)) {
+ stub = fop_mknod_stub (frame, changelog_mknod_resume,
+ loc, mode, dev, umask, xdata);
+ if (!stub)
+ __chlog_barrier_disable (this, &queue);
+ else
+ __chlog_barrier_enqueue (this, stub);
+ } else {
+ ((changelog_local_t *)frame->local)->color
+ = priv->current_color;
+ changelog_inc_fop_cnt (this, priv, frame->local);
+ }
+ }
+ UNLOCK (&priv->lock);
+
+ if (barrier_enabled && stub) {
+ gf_msg_debug (this->name, 0, "Enqueued mknod");
+ goto out;
+ }
+
+ if (barrier_enabled && !stub) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ CHANGELOG_MSG_NO_MEMORY,
+ "Failed to barrier FOPs, disabling changelog barrier "
+ "FOP: mknod");
+ chlog_barrier_dequeue_all (this, &queue);
+ }
+
+ wind:
+ STACK_WIND (frame, changelog_mknod_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->mknod,
+ loc, mode, dev, umask, xdata);
+out:
+ return 0;
+}
+
+/* creat */
+
+int32_t
+changelog_create_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ fd_t *fd, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int32_t ret = 0;
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+ changelog_event_t ev = {0,};
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ /* fill the event structure.. similar to open() */
+ ev.ev_type = CHANGELOG_OP_TYPE_CREATE;
+ gf_uuid_copy (ev.u.create.gfid, buf->ia_gfid);
+ ev.u.create.flags = fd->flags;
+ changelog_dispatch_event (this, priv, &ev);
+
+ if (changelog_ev_selected
+ (this, &priv->ev_selection, CHANGELOG_OP_TYPE_RELEASE)) {
+ ret = fd_ctx_set (fd, this, (uint64_t)(long) 0x1);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ CHANGELOG_MSG_SET_FD_CONTEXT,
+ "could not set fd context (for release cbk)");
+ }
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY);
+
+ unwind:
+ changelog_dec_fop_cnt (this, priv, local);
+ CHANGELOG_STACK_UNWIND (create, frame,
+ op_ret, op_errno, fd, inode,
+ buf, preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+changelog_create_resume (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("changelog", this, out);
+ GF_VALIDATE_OR_GOTO ("changelog", this->fops, out);
+ GF_VALIDATE_OR_GOTO ("changelog", frame, out);
+
+ priv = this->private;
+
+ gf_msg_debug (this->name, 0, "Dequeuing create");
+ changelog_color_fop_and_inc_cnt
+ (this, priv, frame->local);
+ STACK_WIND (frame, changelog_create_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->create,
+ loc, flags, mode, umask, fd, xdata);
+ return 0;
+
+out:
+ return -1;
+}
+
+int32_t
+changelog_create (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ int ret = -1;
+ uuid_t gfid = {0,};
+ void *uuid_req = NULL;
+ changelog_opt_t *co = NULL;
+ changelog_priv_t *priv = NULL;
+ size_t xtra_len = 0;
+ call_stub_t *stub = NULL;
+ struct list_head queue = {0, };
+ gf_boolean_t barrier_enabled = _gf_false;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ ret = dict_get_ptr (xdata, "gfid-req", &uuid_req);
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "failed to get gfid from dict");
+ goto wind;
+ }
+ gf_uuid_copy (gfid, uuid_req);
+
+ /* init with two extra records */
+ CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, gfid, 5);
+ if (!frame->local)
+ goto wind;
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_UINT32 (co, mode, number_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_UINT32 (co, frame->root->uid, number_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_UINT32 (co, frame->root->gid, number_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_ENTRY (co, loc->pargfid, loc->name,
+ entry_fn, entry_free_fn, xtra_len, wind);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 5);
+
+ LOCK (&priv->lock);
+ {
+ if ((barrier_enabled = priv->barrier_enabled)) {
+ stub = fop_create_stub (frame, changelog_create_resume,
+ loc, flags, mode, umask, fd,
+ xdata);
+ if (!stub)
+ __chlog_barrier_disable (this, &queue);
+ else
+ __chlog_barrier_enqueue (this, stub);
+ } else {
+ ((changelog_local_t *)frame->local)->color
+ = priv->current_color;
+ changelog_inc_fop_cnt (this, priv, frame->local);
+ }
+ }
+ UNLOCK (&priv->lock);
+
+ if (barrier_enabled && stub) {
+ gf_msg_debug (this->name, 0, "Enqueued create");
+ goto out;
+ }
+
+ if (barrier_enabled && !stub) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ CHANGELOG_MSG_NO_MEMORY,
+ "Failed to barrier FOPs, disabling changelog barrier "
+ "FOP: create");
+ chlog_barrier_dequeue_all (this, &queue);
+ }
+
+ wind:
+ STACK_WIND (frame, changelog_create_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->create,
+ loc, flags, mode, umask, fd, xdata);
+out:
+ return 0;
+}
+
+/* }}} */
+
+
+/* Metadata modification fops - TYPE II */
+
+/* {{{ */
+
+/* {f}setattr */
+
+int32_t
+changelog_fsetattr_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *preop_stbuf,
+ struct iatt *postop_stbuf, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_METADATA);
+
+ unwind:
+ changelog_dec_fop_cnt (this, priv, local);
+ CHANGELOG_STACK_UNWIND (fsetattr, frame, op_ret, op_errno,
+ preop_stbuf, postop_stbuf, xdata);
+
+ return 0;
+
+
+}
+
+int32_t
+changelog_fsetattr (call_frame_t *frame,
+ xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+ size_t xtra_len = 0;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ CHANGELOG_OP_BOUNDARY_CHECK (frame, wind);
+
+ CHANGELOG_INIT (this, frame->local,
+ fd->inode, fd->inode->gfid, 1);
+ if (!frame->local)
+ goto wind;
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 1);
+
+ wind:
+ changelog_color_fop_and_inc_cnt (this, priv, frame->local);
+ STACK_WIND (frame, changelog_fsetattr_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->fsetattr,
+ fd, stbuf, valid, xdata);
+ return 0;
+
+
+}
+
+int32_t
+changelog_setattr_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *preop_stbuf,
+ struct iatt *postop_stbuf, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_METADATA);
+
+ unwind:
+ changelog_dec_fop_cnt (this, priv, local);
+ CHANGELOG_STACK_UNWIND (setattr, frame, op_ret, op_errno,
+ preop_stbuf, postop_stbuf, xdata);
+
+ return 0;
+}
+
+int32_t
+changelog_setattr (call_frame_t *frame,
+ xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+ size_t xtra_len = 0;
+ uuid_t shard_root_gfid = {0,};
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, wind);
+
+ /* Do not record META on .shard */
+ gf_uuid_parse (SHARD_ROOT_GFID, shard_root_gfid);
+ if (gf_uuid_compare (loc->gfid, shard_root_gfid) == 0) {
+ goto wind;
+ }
+
+ CHANGELOG_OP_BOUNDARY_CHECK (frame, wind);
+
+ CHANGELOG_INIT (this, frame->local,
+ loc->inode, loc->inode->gfid, 1);
+ if (!frame->local)
+ goto wind;
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 1);
+
+ wind:
+ changelog_color_fop_and_inc_cnt (this, priv, frame->local);
+ STACK_WIND (frame, changelog_setattr_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->setattr,
+ loc, stbuf, valid, xdata);
+ return 0;
+}
+
+/* {f}removexattr */
+
+int32_t
+changelog_fremovexattr_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_METADATA);
+
+ unwind:
+ changelog_dec_fop_cnt (this, priv, local);
+ CHANGELOG_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int32_t
+changelog_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+ size_t xtra_len = 0;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ CHANGELOG_OP_BOUNDARY_CHECK (frame, wind);
+
+ CHANGELOG_INIT (this, frame->local,
+ fd->inode, fd->inode->gfid, 1);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 1);
+
+ wind:
+ changelog_color_fop_and_inc_cnt (this, priv, frame->local);
+ STACK_WIND (frame, changelog_fremovexattr_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->fremovexattr,
+ fd, name, xdata);
+ return 0;
+}
+
+int32_t
+changelog_removexattr_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_METADATA);
+
+ unwind:
+ changelog_dec_fop_cnt (this, priv, local);
+ CHANGELOG_STACK_UNWIND (removexattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int32_t
+changelog_removexattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+ size_t xtra_len = 0;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ CHANGELOG_OP_BOUNDARY_CHECK (frame, wind);
+
+ CHANGELOG_INIT (this, frame->local,
+ loc->inode, loc->inode->gfid, 1);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 1);
+
+ wind:
+ changelog_color_fop_and_inc_cnt (this, priv, frame->local);
+ STACK_WIND (frame, changelog_removexattr_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->removexattr,
+ loc, name, xdata);
+ return 0;
+}
+
+/* {f}setxattr */
+
+int32_t
+changelog_setxattr_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_METADATA);
+
+ unwind:
+ changelog_dec_fop_cnt (this, priv, local);
+ CHANGELOG_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+/* changelog_handle_virtual_xattr:
+ * Handles virtual setxattr 'glusterfs.geo-rep.trigger-sync' on files.
+ * Following is the behaviour based on the value of xattr.
+ * 1: Captures only DATA entry in changelog.
+ * 2: Tries to captures both ENTRY and DATA entry in
+ * changelog. If failed to get pargfid, only DATA
+ * entry is captured.
+ * any other value: ENOTSUP is returned.
+ */
+static void
+changelog_handle_virtual_xattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *dict)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+ int32_t value = 0;
+ int ret = 0;
+ int dict_ret = 0;
+ gf_boolean_t valid = _gf_false;
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ dict_ret = dict_get_int32 (dict, GF_XATTR_TRIGGER_SYNC, &value);
+
+ if ((dict_ret == 0 && value == 1) && ((loc->inode->ia_type == IA_IFDIR)
+ || (loc->inode->ia_type == IA_IFREG)))
+ valid = _gf_true;
+
+ if (valid) {
+ ret = changelog_fill_entry_buf (frame, this, loc, &local);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ CHANGELOG_MSG_ENTRY_BUF_INFO,
+ "Entry cannot be"
+ " captured for gfid: %s. Capturing DATA"
+ " entry.", uuid_utoa (loc->inode->gfid));
+ goto unwind;
+ }
+ changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY);
+
+ unwind:
+ /* Capture DATA only if it's a file. */
+ if (loc->inode->ia_type != IA_IFDIR)
+ changelog_update (this, priv, frame->local,
+ CHANGELOG_TYPE_DATA);
+ /* Assign local to prev_entry, so unwind will take
+ * care of cleanup. */
+ ((changelog_local_t *)(frame->local))->prev_entry = local;
+ CHANGELOG_STACK_UNWIND (setxattr, frame, 0, 0, NULL);
+ return;
+ } else {
+ CHANGELOG_STACK_UNWIND (setxattr, frame, -1, ENOTSUP, NULL);
+ return;
+ }
+}
+
+int32_t
+changelog_setxattr (call_frame_t *frame,
+ xlator_t *this, loc_t *loc,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+ size_t xtra_len = 0;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ CHANGELOG_OP_BOUNDARY_CHECK (frame, wind);
+
+ CHANGELOG_INIT (this, frame->local,
+ loc->inode, loc->inode->gfid, 1);
+
+ /* On setting this virtual xattr on a file, an explicit data
+ * sync is triggered from geo-rep as CREATE|DATA entry is
+ * recorded in changelog based on xattr value.
+ */
+ if (dict_get (dict, GF_XATTR_TRIGGER_SYNC)) {
+ changelog_handle_virtual_xattr (frame, this, loc, dict);
+ return 0;
+ }
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 1);
+
+ wind:
+ changelog_color_fop_and_inc_cnt (this, priv, frame->local);
+ STACK_WIND (frame, changelog_setxattr_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->setxattr,
+ loc, dict, flags, xdata);
+ return 0;
+}
+
+int32_t
+changelog_fsetxattr_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_METADATA);
+
+ unwind:
+ changelog_dec_fop_cnt (this, priv, local);
+ CHANGELOG_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int32_t
+changelog_fsetxattr (call_frame_t *frame,
+ xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+ size_t xtra_len = 0;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+ CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, wind);
+
+ CHANGELOG_OP_BOUNDARY_CHECK (frame, wind);
+
+ CHANGELOG_INIT (this, frame->local,
+ fd->inode, fd->inode->gfid, 1);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 1);
+
+ wind:
+ changelog_color_fop_and_inc_cnt (this, priv, frame->local);
+ STACK_WIND (frame, changelog_fsetxattr_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->fsetxattr,
+ fd, dict, flags, xdata);
+ return 0;
+}
+
+int32_t
+changelog_xattrop_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xattr, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_METADATA);
+
+ unwind:
+ changelog_dec_fop_cnt (this, priv, local);
+ CHANGELOG_STACK_UNWIND (xattrop, frame, op_ret, op_errno, xattr, xdata);
+
+ return 0;
+}
+
+int32_t
+changelog_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+ size_t xtra_len = 0;
+ int ret = 0;
+ void *size_attr = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+ ret = dict_get_ptr (xattr, GF_XATTR_SHARD_FILE_SIZE, &size_attr);
+ if (ret)
+ goto wind;
+
+ CHANGELOG_OP_BOUNDARY_CHECK (frame, wind);
+
+ CHANGELOG_INIT (this, frame->local,
+ loc->inode, loc->inode->gfid, 1);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 1);
+
+ wind:
+ changelog_color_fop_and_inc_cnt (this, priv, frame->local);
+ STACK_WIND (frame, changelog_xattrop_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->xattrop,
+ loc, optype, xattr, xdata);
+ return 0;
+}
+
+int32_t
+changelog_fxattrop_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xattr, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_METADATA);
+
+ unwind:
+ changelog_dec_fop_cnt (this, priv, local);
+ CHANGELOG_STACK_UNWIND (fxattrop, frame,
+ op_ret, op_errno, xattr, xdata);
+
+ return 0;
+}
+
+int32_t
+changelog_fxattrop (call_frame_t *frame,
+ xlator_t *this, fd_t *fd, gf_xattrop_flags_t optype,
+ dict_t *xattr, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+ size_t xtra_len = 0;
+ void *size_attr = NULL;
+ int ret = 0;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+ ret = dict_get_ptr (xattr, GF_XATTR_SHARD_FILE_SIZE, &size_attr);
+ if (ret)
+ goto wind;
+
+
+ CHANGELOG_OP_BOUNDARY_CHECK (frame, wind);
+
+ CHANGELOG_INIT (this, frame->local,
+ fd->inode, fd->inode->gfid, 1);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 1);
+
+ wind:
+ changelog_color_fop_and_inc_cnt (this, priv, frame->local);
+ STACK_WIND (frame, changelog_fxattrop_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->fxattrop,
+ fd, optype, xattr, xdata);
+ return 0;
+}
+/* }}} */
+
+
+/* Data modification fops - TYPE I */
+
+/* {{{ */
+
+/* {f}truncate() */
+
+int32_t
+changelog_truncate_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_DATA);
+
+ unwind:
+ changelog_dec_fop_cnt (this, priv, local);
+ CHANGELOG_STACK_UNWIND (truncate, frame,
+ op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
+}
+
+int32_t
+changelog_truncate (call_frame_t *frame,
+ xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ CHANGELOG_INIT (this, frame->local,
+ loc->inode, loc->inode->gfid, 0);
+ LOCK(&priv->c_snap_lock);
+ {
+ if (priv->c_snap_fd != -1 &&
+ priv->barrier_enabled == _gf_true) {
+ changelog_snap_handle_ascii_change (this,
+ &( ((changelog_local_t *)(frame->local))->cld));
+ }
+ }
+ UNLOCK(&priv->c_snap_lock);
+
+
+ wind:
+ changelog_color_fop_and_inc_cnt (this, priv, frame->local);
+ STACK_WIND (frame, changelog_truncate_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->truncate,
+ loc, offset, xdata);
+ return 0;
+}
+
+int32_t
+changelog_ftruncate_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_DATA);
+
+ unwind:
+ changelog_dec_fop_cnt (this, priv, local);
+ CHANGELOG_STACK_UNWIND (ftruncate, frame,
+ op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
+}
+
+int32_t
+changelog_ftruncate (call_frame_t *frame,
+ xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ CHANGELOG_INIT (this, frame->local,
+ fd->inode, fd->inode->gfid, 0);
+ LOCK(&priv->c_snap_lock);
+ {
+ if (priv->c_snap_fd != -1 &&
+ priv->barrier_enabled == _gf_true) {
+ changelog_snap_handle_ascii_change (this,
+ &( ((changelog_local_t *)(frame->local))->cld));
+ }
+ }
+ UNLOCK(&priv->c_snap_lock);
+
+ wind:
+ changelog_color_fop_and_inc_cnt (this, priv, frame->local);
+ STACK_WIND (frame, changelog_ftruncate_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->ftruncate,
+ fd, offset, xdata);
+ return 0;
+}
+
+/* writev() */
+
+int32_t
+changelog_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret <= 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_DATA);
+
+ unwind:
+ changelog_dec_fop_cnt (this, priv, local);
+ CHANGELOG_STACK_UNWIND (writev, frame,
+ op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
+}
+
+int32_t
+changelog_writev (call_frame_t *frame,
+ xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ CHANGELOG_INIT (this, frame->local,
+ fd->inode, fd->inode->gfid, 0);
+ LOCK(&priv->c_snap_lock);
+ {
+ if (priv->c_snap_fd != -1 &&
+ priv->barrier_enabled == _gf_true) {
+ changelog_snap_handle_ascii_change (this,
+ &( ((changelog_local_t *)(frame->local))->cld));
+ }
+ }
+ UNLOCK(&priv->c_snap_lock);
+
+ wind:
+ changelog_color_fop_and_inc_cnt (this, priv, frame->local);
+ STACK_WIND (frame, changelog_writev_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->writev, fd, vector,
+ count, offset, flags, iobref, xdata);
+ return 0;
+}
+
+/* }}} */
+
+/* open, release and other beasts */
+
+/* {{{ */
+
+
+
+int
+changelog_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
+{
+ int ret = 0;
+ void *opaque = NULL;
+ char *buf = NULL;
+ ssize_t buflen = 0;
+ changelog_priv_t *priv = NULL;
+ changelog_event_t ev = {0,};
+ gf_boolean_t logopen = _gf_false;
+
+ priv = this->private;
+ if (frame->local) {
+ frame->local = NULL;
+ logopen = _gf_true;
+ }
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !logopen), unwind);
+
+ /* fill the event structure */
+ ev.ev_type = CHANGELOG_OP_TYPE_OPEN;
+ gf_uuid_copy (ev.u.open.gfid, fd->inode->gfid);
+ ev.u.open.flags = fd->flags;
+ changelog_dispatch_event (this, priv, &ev);
+
+ if (changelog_ev_selected
+ (this, &priv->ev_selection, CHANGELOG_OP_TYPE_RELEASE)) {
+ ret = fd_ctx_set (fd, this, (uint64_t)(long) 0x1);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ CHANGELOG_MSG_SET_FD_CONTEXT,
+ "could not set fd context (for release cbk)");
+ }
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (open, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+int
+changelog_open (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int flags, fd_t *fd, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ frame->local = (void *)0x1; /* do not dereference in ->cbk */
+
+ wind:
+ STACK_WIND (frame, changelog_open_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->open, loc, flags, fd, xdata);
+ return 0;
+}
+
+/* }}} */
+
+/* {{{ */
+
+
+/* }}} */
+
+int32_t
+_changelog_generic_dispatcher (dict_t *dict,
+ char *key, data_t *value, void *data)
+{
+ xlator_t *this = NULL;
+ changelog_priv_t *priv = NULL;
+
+ this = data;
+ priv = this->private;
+
+ changelog_dispatch_event (this, priv, (changelog_event_t *)value->data);
+ return 0;
+}
+
+/**
+ * changelog ipc dispatches events, pointers of which are passed in
+ * @xdata. Dispatching is orderless (whatever order dict_foreach()
+ * traverses the dictionary).
+ */
+int32_t
+changelog_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata)
+{
+ if (op != GF_IPC_TARGET_CHANGELOG)
+ goto wind;
+
+ /* it's for us, do the job */
+ if (xdata)
+ (void) dict_foreach (xdata,
+ _changelog_generic_dispatcher, this);
+
+ STACK_UNWIND_STRICT (ipc, frame, 0, 0, NULL);
+ return 0;
+
+ wind:
+ STACK_WIND (frame, default_ipc_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->ipc, op, xdata);
+ return 0;
+}
+
+
+/* {{{ */
+
+int32_t
+changelog_release (xlator_t *this, fd_t *fd)
+{
+ changelog_event_t ev = {0,};
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ ev.ev_type = CHANGELOG_OP_TYPE_RELEASE;
+ gf_uuid_copy (ev.u.release.gfid, fd->inode->gfid);
+ changelog_dispatch_event (this, priv, &ev);
+
+ (void) fd_ctx_del (fd, this, NULL);
+
+ return 0;
+}
+
+
+/* }}} */
+
+/**
+ * The
+ * - @init ()
+ * - @fini ()
+ * - @reconfigure ()
+ * ... and helper routines
+ */
+
+/**
+ * needed if there are more operation modes in the future.
+ */
+static void
+changelog_assign_opmode (changelog_priv_t *priv, char *mode)
+{
+ if ( strncmp (mode, "realtime", 8) == 0 ) {
+ priv->op_mode = CHANGELOG_MODE_RT;
+ }
+}
+
+static void
+changelog_assign_encoding (changelog_priv_t *priv, char *enc)
+{
+ if ( strncmp (enc, "binary", 6) == 0 ) {
+ priv->encode_mode = CHANGELOG_ENCODE_BINARY;
+ } else if ( strncmp (enc, "ascii", 5) == 0 ) {
+ priv->encode_mode = CHANGELOG_ENCODE_ASCII;
+ }
+}
+
+static void
+changelog_assign_barrier_timeout(changelog_priv_t *priv, uint32_t timeout)
+{
+ LOCK (&priv->lock);
+ {
+ priv->timeout.tv_sec = timeout;
+ }
+ UNLOCK (&priv->lock);
+}
+
+/* cleanup any helper threads that are running */
+static void
+changelog_cleanup_helper_threads (xlator_t *this, changelog_priv_t *priv)
+{
+ int ret = 0;
+
+ if (priv->cr.rollover_th) {
+ (void) changelog_thread_cleanup (this, priv->cr.rollover_th);
+ priv->cr.rollover_th = 0;
+ }
+
+ if (priv->cf.fsync_th) {
+ (void) changelog_thread_cleanup (this, priv->cf.fsync_th);
+ priv->cf.fsync_th = 0;
+ }
+}
+
+/* spawn helper thread; cleaning up in case of errors */
+static int
+changelog_spawn_helper_threads (xlator_t *this, changelog_priv_t *priv)
+{
+ int ret = 0;
+ int flags = 0;
+
+ /* Geo-Rep snapshot dependency:
+ *
+ * To implement explicit rollover of changlog journal on barrier
+ * notification, a pipe is created to communicate between
+ * 'changelog_rollover' thread and changelog main thread. The select
+ * call used to wait till roll-over time in changelog_rollover thread
+ * is modified to wait on read end of the pipe. When barrier
+ * notification comes (i.e, in 'reconfigure'), select in
+ * changelog_rollover thread is woken up explicitly by writing into
+ * the write end of the pipe in 'reconfigure'.
+ */
+
+ priv->cr.notify = _gf_false;
+ priv->cr.this = this;
+ ret = gf_thread_create (&priv->cr.rollover_th,
+ NULL, changelog_rollover, priv);
+ if (ret)
+ goto out;
+
+ if (priv->fsync_interval) {
+ priv->cf.this = this;
+ ret = gf_thread_create (&priv->cf.fsync_th,
+ NULL, changelog_fsync_thread, priv);
+ }
+
+ if (ret)
+ changelog_cleanup_helper_threads (this, priv);
+
+ out:
+ return ret;
+}
+
+int
+notify (xlator_t *this, int event, void *data, ...)
+{
+ changelog_priv_t *priv = NULL;
+ dict_t *dict = NULL;
+ char buf[1] = {1};
+ int barrier = DICT_DEFAULT;
+ gf_boolean_t bclean_req = _gf_false;
+ int ret = 0;
+ int ret1 = 0;
+ struct list_head queue = {0, };
+
+ INIT_LIST_HEAD (&queue);
+
+ priv = this->private;
+ if (!priv)
+ goto out;
+
+ if (event == GF_EVENT_TRANSLATOR_OP) {
+
+ dict = data;
+
+ barrier = dict_get_str_boolean (dict, "barrier", DICT_DEFAULT);
+
+ switch (barrier) {
+ case DICT_ERROR:
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_DICT_GET_FAILED,
+ "Barrier dict_get_str_boolean failed");
+ ret = -1;
+ goto out;
+
+ case BARRIER_OFF:
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ CHANGELOG_MSG_BARRIER_INFO,
+ "Barrier off notification");
+
+ CHANGELOG_NOT_ON_THEN_GOTO(priv, ret, out);
+ LOCK(&priv->c_snap_lock);
+ {
+ changelog_snap_logging_stop (this, priv);
+ }
+ UNLOCK(&priv->c_snap_lock);
+
+ LOCK (&priv->bflags.lock);
+ {
+ if (priv->bflags.barrier_ext == _gf_false)
+ ret = -1;
+ }
+ UNLOCK (&priv->bflags.lock);
+
+ if (ret == -1 ) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_BARRIER_ERROR,
+ "Received another barrier off"
+ " notification while already off");
+ goto out;
+ }
+
+ /* Stop changelog barrier and dequeue all fops */
+ LOCK (&priv->lock);
+ {
+ if (priv->barrier_enabled == _gf_true)
+ __chlog_barrier_disable (this, &queue);
+ else
+ ret = -1;
+ }
+ UNLOCK (&priv->lock);
+ /* If ret = -1, then changelog barrier is already
+ * disabled because of error or timeout.
+ */
+ if (ret == 0) {
+ chlog_barrier_dequeue_all(this, &queue);
+ gf_msg(this->name, GF_LOG_INFO, 0,
+ CHANGELOG_MSG_BARRIER_INFO,
+ "Disabled changelog barrier");
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_BARRIER_ERROR,
+ "Changelog barrier already disabled");
+ }
+
+ LOCK (&priv->bflags.lock);
+ {
+ priv->bflags.barrier_ext = _gf_false;
+ }
+ UNLOCK (&priv->bflags.lock);
+
+ goto out;
+
+ case BARRIER_ON:
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ CHANGELOG_MSG_BARRIER_INFO,
+ "Barrier on notification");
+
+ CHANGELOG_NOT_ON_THEN_GOTO(priv, ret, out);
+ LOCK(&priv->c_snap_lock);
+ {
+ changelog_snap_logging_start (this, priv);
+ }
+ UNLOCK(&priv->c_snap_lock);
+
+ LOCK (&priv->bflags.lock);
+ {
+ if (priv->bflags.barrier_ext == _gf_true)
+ ret = -1;
+ else
+ priv->bflags.barrier_ext = _gf_true;
+ }
+ UNLOCK (&priv->bflags.lock);
+
+ if (ret == -1 ) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_BARRIER_ERROR,
+ "Received another barrier on"
+ "notification when last one is"
+ "not served yet");
+ goto out;
+ }
+
+ ret = pthread_mutex_lock (&priv->bn.bnotify_mutex);
+ CHANGELOG_PTHREAD_ERROR_HANDLE_1 (ret, out,
+ bclean_req);
+ {
+ priv->bn.bnotify = _gf_true;
+ }
+ ret = pthread_mutex_unlock (&priv->bn.bnotify_mutex);
+ CHANGELOG_PTHREAD_ERROR_HANDLE_1 (ret, out,
+ bclean_req);
+
+ /* Start changelog barrier */
+ LOCK (&priv->lock);
+ {
+ ret = __chlog_barrier_enable (this, priv);
+ }
+ UNLOCK (&priv->lock);
+ if (ret == -1) {
+ changelog_barrier_cleanup (this, priv, &queue);
+ goto out;
+ }
+
+ gf_msg(this->name, GF_LOG_INFO, 0,
+ CHANGELOG_MSG_BARRIER_INFO,
+ "Enabled changelog barrier");
+
+ ret = changelog_barrier_notify(priv, buf);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_WRITE_FAILED,
+ "Explicit roll over: write failed");
+ changelog_barrier_cleanup (this, priv, &queue);
+ ret = -1;
+ goto out;
+ }
+
+ ret = pthread_mutex_lock (&priv->bn.bnotify_mutex);
+ CHANGELOG_PTHREAD_ERROR_HANDLE_1 (ret, out,
+ bclean_req);
+ {
+ /* The while condition check is required here to
+ * handle spurious wakeup of cond wait that can
+ * happen with pthreads. See man page */
+ while (priv->bn.bnotify == _gf_true) {
+ ret = pthread_cond_wait (
+ &priv->bn.bnotify_cond,
+ &priv->bn.bnotify_mutex);
+ CHANGELOG_PTHREAD_ERROR_HANDLE_1 (ret,
+ out,
+ bclean_req);
+ }
+ if (priv->bn.bnotify_error == _gf_true) {
+ ret = -1;
+ priv->bn.bnotify_error = _gf_false;
+ }
+ }
+ ret1 = pthread_mutex_unlock (&priv->bn.bnotify_mutex);
+ CHANGELOG_PTHREAD_ERROR_HANDLE_1 (ret1, out,
+ bclean_req);
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ CHANGELOG_MSG_BNOTIFY_INFO,
+ "Woke up: bnotify conditional wait");
+
+ goto out;
+
+ case DICT_DEFAULT:
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_DICT_GET_FAILED,
+ "barrier key not found");
+ ret = -1;
+ goto out;
+
+ default:
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ CHANGELOG_MSG_DICT_GET_FAILED,
+ "Something went bad in dict_get_str_boolean");
+ ret = -1;
+ goto out;
+ }
+ } else {
+ ret = default_notify (this, event, data);
+ }
+
+ out:
+ if (bclean_req)
+ changelog_barrier_cleanup (this, priv, &queue);
+
+ return ret;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_changelog_mt_end + 1);
+
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ CHANGELOG_MSG_NO_MEMORY, "Memory accounting"
+ " init failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+static int
+changelog_init (xlator_t *this, changelog_priv_t *priv)
+{
+ int i = 0;
+ int ret = -1;
+ struct timeval tv = {0,};
+ changelog_log_data_t cld = {0,};
+
+ ret = gettimeofday (&tv, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_GET_TIME_OP_FAILED,
+ "gettimeofday() failure");
+ goto out;
+ }
+
+ priv->slice.tv_start = tv;
+
+ priv->maps[CHANGELOG_TYPE_DATA] = "D ";
+ priv->maps[CHANGELOG_TYPE_METADATA] = "M ";
+ priv->maps[CHANGELOG_TYPE_ENTRY] = "E ";
+
+ for (; i < CHANGELOG_MAX_TYPE; i++) {
+ /* start with version 1 */
+ priv->slice.changelog_version[i] = 1;
+ }
+
+ if (!priv->active)
+ return ret;
+
+ /**
+ * start with a fresh changelog file every time. this is done
+ * in case there was an encoding change. so... things are kept
+ * simple here.
+ */
+ ret = changelog_fill_rollover_data (&cld, _gf_false);
+ if(ret)
+ goto out;
+
+ ret = htime_open (this, priv, cld.cld_roll_time);
+ /* call htime open with cld's rollover_time */
+ if (ret)
+ goto out;
+
+ LOCK (&priv->lock);
+ {
+ ret = changelog_inject_single_event (this, priv, &cld);
+ }
+ UNLOCK (&priv->lock);
+
+ /* ... and finally spawn the helpers threads */
+ ret = changelog_spawn_helper_threads (this, priv);
+
+ out:
+ return ret;
+}
+
+/**
+ * Init barrier related condition variables and locks
+ */
+static int
+changelog_barrier_pthread_init (xlator_t *this, changelog_priv_t *priv)
+{
+ gf_boolean_t bn_mutex_init = _gf_false;
+ gf_boolean_t bn_cond_init = _gf_false;
+ gf_boolean_t dm_mutex_black_init = _gf_false;
+ gf_boolean_t dm_cond_black_init = _gf_false;
+ gf_boolean_t dm_mutex_white_init = _gf_false;
+ gf_boolean_t dm_cond_white_init = _gf_false;
+ gf_boolean_t cr_mutex_init = _gf_false;
+ gf_boolean_t cr_cond_init = _gf_false;
+ int ret = 0;
+
+ if ((ret = pthread_mutex_init(&priv->bn.bnotify_mutex, NULL)) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED,
+ "bnotify pthread_mutex_init failed (%d)", ret);
+ ret = -1;
+ goto out;
+ }
+ bn_mutex_init = _gf_true;
+
+ if ((ret = pthread_cond_init(&priv->bn.bnotify_cond, NULL)) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED,
+ "bnotify pthread_cond_init failed (%d)", ret);
+ ret = -1;
+ goto out;
+ }
+ bn_cond_init = _gf_true;
+
+ if ((ret = pthread_mutex_init(&priv->dm.drain_black_mutex, NULL)) != 0)
+ {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED,
+ "drain_black pthread_mutex_init failed (%d)", ret);
+ ret = -1;
+ goto out;
+ }
+ dm_mutex_black_init = _gf_true;
+
+ if ((ret = pthread_cond_init(&priv->dm.drain_black_cond, NULL)) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED,
+ "drain_black pthread_cond_init failed (%d)", ret);
+ ret = -1;
+ goto out;
+ }
+ dm_cond_black_init = _gf_true;
+
+ if ((ret = pthread_mutex_init(&priv->dm.drain_white_mutex, NULL)) != 0)
+ {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED,
+ "drain_white pthread_mutex_init failed (%d)", ret);
+ ret = -1;
+ goto out;
+ }
+ dm_mutex_white_init = _gf_true;
+
+ if ((ret = pthread_cond_init(&priv->dm.drain_white_cond, NULL)) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED,
+ "drain_white pthread_cond_init failed (%d)", ret);
+ ret = -1;
+ goto out;
+ }
+ dm_cond_white_init = _gf_true;
+
+ if ((pthread_mutex_init(&priv->cr.lock, NULL)) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED,
+ "changelog_rollover lock init failed (%d)", ret);
+ ret = -1;
+ goto out;
+ }
+ cr_mutex_init = _gf_true;
+
+ if ((pthread_cond_init(&priv->cr.cond, NULL)) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED,
+ "changelog_rollover cond init failed (%d)", ret);
+ ret = -1;
+ goto out;
+ }
+ cr_cond_init = _gf_true;
+ out:
+ if (ret) {
+ if (bn_mutex_init)
+ pthread_mutex_destroy(&priv->bn.bnotify_mutex);
+ if (bn_cond_init)
+ pthread_cond_destroy (&priv->bn.bnotify_cond);
+ if (dm_mutex_black_init)
+ pthread_mutex_destroy(&priv->dm.drain_black_mutex);
+ if (dm_cond_black_init)
+ pthread_cond_destroy (&priv->dm.drain_black_cond);
+ if (dm_mutex_white_init)
+ pthread_mutex_destroy(&priv->dm.drain_white_mutex);
+ if (dm_cond_white_init)
+ pthread_cond_destroy (&priv->dm.drain_white_cond);
+ if (cr_mutex_init)
+ pthread_mutex_destroy(&priv->cr.lock);
+ if (cr_cond_init)
+ pthread_cond_destroy (&priv->cr.cond);
+ }
+ return ret;
+}
+
+/* Destroy barrier related condition variables and locks */
+static void
+changelog_barrier_pthread_destroy (changelog_priv_t *priv)
+{
+ pthread_mutex_destroy (&priv->bn.bnotify_mutex);
+ pthread_cond_destroy (&priv->bn.bnotify_cond);
+ pthread_mutex_destroy (&priv->dm.drain_black_mutex);
+ pthread_cond_destroy (&priv->dm.drain_black_cond);
+ pthread_mutex_destroy (&priv->dm.drain_white_mutex);
+ pthread_cond_destroy (&priv->dm.drain_white_cond);
+ pthread_mutex_destroy(&priv->cr.lock);
+ pthread_cond_destroy (&priv->cr.cond);
+ LOCK_DESTROY (&priv->bflags.lock);
+}
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ int ret = 0;
+ char *tmp = NULL;
+ changelog_priv_t *priv = NULL;
+ gf_boolean_t active_earlier = _gf_true;
+ gf_boolean_t active_now = _gf_true;
+ changelog_time_slice_t *slice = NULL;
+ changelog_log_data_t cld = {0,};
+ char htime_dir[PATH_MAX] = {0,};
+ char csnap_dir[PATH_MAX] = {0,};
+ struct timeval tv = {0,};
+ uint32_t timeout = 0;
+
+ priv = this->private;
+ if (!priv)
+ goto out;
+
+ ret = -1;
+ active_earlier = priv->active;
+
+ /* first stop the rollover and the fsync thread */
+ changelog_cleanup_helper_threads (this, priv);
+
+ GF_OPTION_RECONF ("changelog-dir", tmp, options, str, out);
+ if (!tmp) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_DIR_OPTIONS_NOT_SET,
+ "\"changelog-dir\" option is not set");
+ goto out;
+ }
+
+ GF_FREE (priv->changelog_dir);
+ priv->changelog_dir = gf_strdup (tmp);
+ if (!priv->changelog_dir)
+ goto out;
+
+ ret = mkdir_p (priv->changelog_dir, 0600, _gf_true);
+
+ if (ret)
+ goto out;
+ CHANGELOG_FILL_HTIME_DIR(priv->changelog_dir, htime_dir);
+ ret = mkdir_p (htime_dir, 0600, _gf_true);
+
+ if (ret)
+ goto out;
+
+ CHANGELOG_FILL_CSNAP_DIR(priv->changelog_dir, csnap_dir);
+ ret = mkdir_p (csnap_dir, 0600, _gf_true);
+
+ if (ret)
+ goto out;
+
+ GF_OPTION_RECONF ("changelog", active_now, options, bool, out);
+
+ /**
+ * changelog_handle_change() handles changes that could possibly
+ * have been submit changes before changelog deactivation.
+ */
+ if (!active_now)
+ priv->active = _gf_false;
+
+ GF_OPTION_RECONF ("op-mode", tmp, options, str, out);
+ changelog_assign_opmode (priv, tmp);
+
+ tmp = NULL;
+
+ GF_OPTION_RECONF ("encoding", tmp, options, str, out);
+ changelog_assign_encoding (priv, tmp);
+
+ GF_OPTION_RECONF ("rollover-time",
+ priv->rollover_time, options, int32, out);
+ GF_OPTION_RECONF ("fsync-interval",
+ priv->fsync_interval, options, int32, out);
+ GF_OPTION_RECONF ("changelog-barrier-timeout",
+ timeout, options, time, out);
+ changelog_assign_barrier_timeout (priv, timeout);
+
+ GF_OPTION_RECONF ("capture-del-path", priv->capture_del_path, options,
+ bool, out);
+
+ if (active_now || active_earlier) {
+ ret = changelog_fill_rollover_data (&cld, !active_now);
+ if (ret)
+ goto out;
+
+ slice = &priv->slice;
+
+ LOCK (&priv->lock);
+ {
+ ret = changelog_inject_single_event (this, priv, &cld);
+ if (!ret && active_now)
+ SLICE_VERSION_UPDATE (slice);
+ }
+ UNLOCK (&priv->lock);
+
+ if (ret)
+ goto out;
+
+ if (active_now) {
+ if (!active_earlier) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ CHANGELOG_MSG_HTIME_INFO,
+ "Reconfigure: Changelog Enable");
+ if (gettimeofday(&tv, NULL) ) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_HTIME_ERROR,
+ "unable to fetch htime");
+ ret = -1;
+ goto out;
+ }
+ htime_create (this, priv, tv.tv_sec);
+ }
+ ret = changelog_spawn_helper_threads (this, priv);
+ }
+ }
+
+ out:
+ if (ret) {
+ /* TODO */
+ } else {
+ gf_msg_debug (this->name, 0,
+ "changelog reconfigured");
+ if (active_now && priv)
+ priv->active = _gf_true;
+ }
+
+ return ret;
+}
+
+static void
+changelog_freeup_options (xlator_t *this, changelog_priv_t *priv)
+{
+ int ret = 0;
+
+ ret = priv->cb->dtor (this, &priv->cd);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_FREEUP_FAILED,
+ "could not cleanup bootstrapper");
+ GF_FREE (priv->changelog_brick);
+ GF_FREE (priv->changelog_dir);
+}
+
+static int
+changelog_init_options (xlator_t *this, changelog_priv_t *priv)
+{
+ int ret = 0;
+ char *tmp = NULL;
+ uint32_t timeout = 0;
+ char htime_dir[PATH_MAX] = {0,};
+ char csnap_dir[PATH_MAX] = {0,};
+
+ GF_OPTION_INIT ("changelog-brick", tmp, str, error_return);
+ priv->changelog_brick = gf_strdup (tmp);
+ if (!priv->changelog_brick)
+ goto error_return;
+
+ tmp = NULL;
+
+ GF_OPTION_INIT ("changelog-dir", tmp, str, dealloc_1);
+ priv->changelog_dir = gf_strdup (tmp);
+ if (!priv->changelog_dir)
+ goto dealloc_1;
+
+ tmp = NULL;
+
+ /**
+ * create the directory even if change-logging would be inactive
+ * so that consumers can _look_ into it (finding nothing...)
+ */
+ ret = mkdir_p (priv->changelog_dir, 0600, _gf_true);
+
+ if (ret)
+ goto dealloc_2;
+
+ CHANGELOG_FILL_HTIME_DIR (priv->changelog_dir, htime_dir);
+ ret = mkdir_p (htime_dir, 0600, _gf_true);
+ if (ret)
+ goto dealloc_2;
+
+ CHANGELOG_FILL_CSNAP_DIR (priv->changelog_dir, csnap_dir);
+ ret = mkdir_p (csnap_dir, 0600, _gf_true);
+ if (ret)
+ goto dealloc_2;
+
+ GF_OPTION_INIT ("changelog", priv->active, bool, dealloc_2);
+ GF_OPTION_INIT ("capture-del-path", priv->capture_del_path,
+ bool, dealloc_2);
+
+ GF_OPTION_INIT ("op-mode", tmp, str, dealloc_2);
+ changelog_assign_opmode (priv, tmp);
+
+ tmp = NULL;
+
+ GF_OPTION_INIT ("encoding", tmp, str, dealloc_2);
+ changelog_assign_encoding (priv, tmp);
+ changelog_encode_change (priv);
+
+ GF_OPTION_INIT ("rollover-time",
+ priv->rollover_time, int32, dealloc_2);
+
+ GF_OPTION_INIT ("fsync-interval",
+ priv->fsync_interval, int32, dealloc_2);
+
+ GF_OPTION_INIT ("changelog-barrier-timeout",
+ timeout, time, dealloc_2);
+ changelog_assign_barrier_timeout (priv, timeout);
+
+ GF_ASSERT (cb_bootstrap[priv->op_mode].mode == priv->op_mode);
+ priv->cb = &cb_bootstrap[priv->op_mode];
+
+ /* ... now bootstrap the logger */
+ ret = priv->cb->ctor (this, &priv->cd);
+ if (ret)
+ goto dealloc_2;
+
+ priv->changelog_fd = -1;
+
+ return 0;
+
+ dealloc_2:
+ GF_FREE (priv->changelog_dir);
+ dealloc_1:
+ GF_FREE (priv->changelog_brick);
+ error_return:
+ return -1;
+}
+
+static void
+changelog_cleanup_rpc (xlator_t *this, changelog_priv_t *priv)
+{
+ /* terminate rpc server */
+ changelog_destroy_rpc_listner (this, priv);
+
+ /* cleanup rot buffs */
+ rbuf_dtor (priv->rbuf);
+
+ /* cleanup poller thread */
+ if (priv->poller)
+ (void) changelog_thread_cleanup (this, priv->poller);
+}
+
+static int
+changelog_init_rpc (xlator_t *this, changelog_priv_t *priv)
+{
+ int ret = 0;
+ rpcsvc_t *rpc = NULL;
+ changelog_ev_selector_t *selection = NULL;
+
+ selection = &priv->ev_selection;
+
+ /* initialize event selection */
+ changelog_init_event_selection (this, selection);
+
+ priv->rbuf = rbuf_init (NR_ROTT_BUFFS);
+ if (!priv->rbuf)
+ goto cleanup_thread;
+
+ rpc = changelog_init_rpc_listner (this, priv,
+ priv->rbuf, NR_DISPATCHERS);
+ if (!rpc)
+ goto cleanup_rbuf;
+ priv->rpc = rpc;
+
+ return 0;
+
+ cleanup_rbuf:
+ rbuf_dtor (priv->rbuf);
+ cleanup_thread:
+ if (priv->poller)
+ (void) changelog_thread_cleanup (this, priv->poller);
+
+ return -1;
+}
+
+int32_t
+init (xlator_t *this)
+{
+ int ret = -1;
+ char *tmp = NULL;
+ changelog_priv_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("changelog", this, error_return);
+
+ if (!this->children || this->children->next) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_CHILD_MISCONFIGURED,
+ "translator needs a single subvolume");
+ goto error_return;
+ }
+
+ if (!this->parents) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_VOL_MISCONFIGURED,
+ "dangling volume. please check volfile");
+ goto error_return;
+ }
+
+ priv = GF_CALLOC (1, sizeof (*priv), gf_changelog_mt_priv_t);
+ if (!priv)
+ goto error_return;
+
+ this->local_pool = mem_pool_new (changelog_local_t, 64);
+ if (!this->local_pool) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ CHANGELOG_MSG_NO_MEMORY,
+ "failed to create local memory pool");
+ goto cleanup_priv;
+ }
+
+ LOCK_INIT (&priv->lock);
+ LOCK_INIT (&priv->c_snap_lock);
+
+ ret = changelog_init_options (this, priv);
+ if (ret)
+ goto cleanup_mempool;
+
+ /* snap dependency changes */
+ priv->dm.black_fop_cnt = 0;
+ priv->dm.white_fop_cnt = 0;
+ priv->dm.drain_wait_black = _gf_false;
+ priv->dm.drain_wait_white = _gf_false;
+ priv->current_color = FOP_COLOR_BLACK;
+ priv->explicit_rollover = _gf_false;
+
+ priv->cr.notify = _gf_false;
+ /* Mutex is not needed as threads are not spawned yet */
+ priv->bn.bnotify = _gf_false;
+ priv->bn.bnotify_error = _gf_false;
+ ret = changelog_barrier_pthread_init (this, priv);
+ if (ret)
+ goto cleanup_options;
+ LOCK_INIT (&priv->bflags.lock);
+ priv->bflags.barrier_ext = _gf_false;
+
+ /* Changelog barrier init */
+ INIT_LIST_HEAD (&priv->queue);
+ priv->barrier_enabled = _gf_false;
+
+ /* RPC ball rolling.. */
+ ret = changelog_init_rpc (this, priv);
+ if (ret)
+ goto cleanup_barrier;
+
+ ret = changelog_init (this, priv);
+ if (ret)
+ goto cleanup_rpc;
+
+ gf_msg_debug (this->name, 0, "changelog translator loaded");
+
+ this->private = priv;
+ return 0;
+
+ cleanup_rpc:
+ changelog_cleanup_rpc (this, priv);
+ cleanup_barrier:
+ changelog_barrier_pthread_destroy (priv);
+ cleanup_options:
+ changelog_freeup_options (this, priv);
+ cleanup_mempool:
+ mem_pool_destroy (this->local_pool);
+ cleanup_priv:
+ GF_FREE (priv);
+ error_return:
+ this->private = NULL;
+ return -1;
+}
+
+void
+fini (xlator_t *this)
+{
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv) {
+ /* terminate RPC server/threads */
+ changelog_cleanup_rpc (this, priv);
+
+ /* cleanup barrier related objects */
+ changelog_barrier_pthread_destroy (priv);
+
+ /* cleanup allocated options */
+ changelog_freeup_options (this, priv);
+
+ /* deallocate mempool */
+ mem_pool_destroy (this->local_pool);
+ /* finally, dealloac private variable */
+ GF_FREE (priv);
+ }
+
+ this->private = NULL;
+
+ return;
+}
+
+struct xlator_fops fops = {
+ .open = changelog_open,
+ .mknod = changelog_mknod,
+ .mkdir = changelog_mkdir,
+ .create = changelog_create,
+ .symlink = changelog_symlink,
+ .writev = changelog_writev,
+ .truncate = changelog_truncate,
+ .ftruncate = changelog_ftruncate,
+ .link = changelog_link,
+ .rename = changelog_rename,
+ .unlink = changelog_unlink,
+ .rmdir = changelog_rmdir,
+ .setattr = changelog_setattr,
+ .fsetattr = changelog_fsetattr,
+ .setxattr = changelog_setxattr,
+ .fsetxattr = changelog_fsetxattr,
+ .removexattr = changelog_removexattr,
+ .fremovexattr = changelog_fremovexattr,
+ .ipc = changelog_ipc,
+ .xattrop = changelog_xattrop,
+ .fxattrop = changelog_fxattrop,
+};
+
+struct xlator_cbks cbks = {
+ .forget = changelog_forget,
+ .release = changelog_release,
+};
+
+struct volume_options options[] = {
+ {.key = {"changelog"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "enable/disable change-logging"
+ },
+ {.key = {"changelog-brick"},
+ .type = GF_OPTION_TYPE_PATH,
+ .description = "brick path to generate unique socket file name."
+ " should be the export directory of the volume strictly."
+ },
+ {.key = {"changelog-dir"},
+ .type = GF_OPTION_TYPE_PATH,
+ .description = "directory for the changelog files"
+ },
+ {.key = {"op-mode"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "realtime",
+ .value = {"realtime"},
+ .description = "operation mode - futuristic operation modes"
+ },
+ {.key = {"encoding"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "ascii",
+ .value = {"binary", "ascii"},
+ .description = "encoding type for changelogs"
+ },
+ {.key = {"rollover-time"},
+ .default_value = "15",
+ .type = GF_OPTION_TYPE_TIME,
+ .description = "time to switch to a new changelog file (in seconds)"
+ },
+ {.key = {"fsync-interval"},
+ .type = GF_OPTION_TYPE_TIME,
+ .default_value = "5",
+ .description = "do not open CHANGELOG file with O_SYNC mode."
+ " instead perform fsync() at specified intervals"
+ },
+ { .key = {"changelog-barrier-timeout"},
+ .type = GF_OPTION_TYPE_TIME,
+ .default_value = BARRIER_TIMEOUT,
+ .description = "After 'timeout' seconds since the time 'barrier' "
+ "option was set to \"on\", unlink/rmdir/rename "
+ "operations are no longer blocked and previously "
+ "blocked fops are allowed to go through"
+ },
+ {.key = {"capture-del-path"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "enable/disable capturing paths of deleted entries"
+ },
+ {.key = {NULL}
+ },
+};
diff --git a/xlators/features/changetimerecorder/Makefile.am b/xlators/features/changetimerecorder/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/features/changetimerecorder/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/changetimerecorder/src/Makefile.am b/xlators/features/changetimerecorder/src/Makefile.am
new file mode 100644
index 00000000000..44cebd6aedf
--- /dev/null
+++ b/xlators/features/changetimerecorder/src/Makefile.am
@@ -0,0 +1,23 @@
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+# changetimerecorder can only get build when libgfdb is enabled
+if BUILD_GFDB
+ xlator_LTLIBRARIES = changetimerecorder.la
+endif
+
+changetimerecorder_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+changetimerecorder_la_SOURCES = changetimerecorder.c ctr-helper.c ctr-xlator-ctx.c
+
+changetimerecorder_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la\
+ $(top_builddir)/libglusterfs/src/gfdb/libgfdb.la
+
+noinst_HEADERS = changetimerecorder.h ctr_mem_types.h ctr-helper.h ctr-xlator-ctx.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/libglusterfs/src/gfdb \
+ -DDATADIR=\"$(localstatedir)\"
+
+AM_CFLAGS = -Wall $(GF_CFLAGS) $(SQLITE_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/changetimerecorder/src/changetimerecorder.c b/xlators/features/changetimerecorder/src/changetimerecorder.c
new file mode 100644
index 00000000000..b6ff18934fe
--- /dev/null
+++ b/xlators/features/changetimerecorder/src/changetimerecorder.c
@@ -0,0 +1,2308 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <ctype.h>
+#include <sys/uio.h>
+
+#include "gfdb_sqlite3.h"
+#include "ctr-helper.h"
+#include "ctr-messages.h"
+#include "syscall.h"
+
+/*******************************inode forget***********************************/
+
+int
+ctr_forget (xlator_t *this, inode_t *inode)
+{
+ fini_ctr_xlator_ctx (this, inode);
+ return 0;
+}
+
+/************************** Look up heal **************************************/
+/*
+Problem: The CTR xlator records file meta (heat/hardlinks)
+into the data. This works fine for files which are created
+after ctr xlator is switched ON. But for files which were
+created before CTR xlator is ON, CTR xlator is not able to
+record either of the meta i.e heat or hardlinks. Thus making
+those files immune to promotions/demotions.
+
+Solution: The solution that is implemented in this patch is
+do ctr-db heal of all those pre-existent files, using named lookup.
+For this purpose we use the inode-xlator context variable option
+in gluster.
+The inode-xlator context variable for ctr xlator will have the
+following,
+ a. A Lock for the context variable
+ b. A hardlink list: This list represents the successful looked
+ up hardlinks.
+These are the scenarios when the hardlink list is updated:
+1) Named-Lookup: Whenever a named lookup happens on a file, in the
+ wind path we copy all required hardlink and inode information to
+ ctr_db_record structure, which resides in the frame->local variable.
+ We dont update the database in wind. During the unwind, we read the
+ information from the ctr_db_record and ,
+ Check if the inode context variable is created, if not we create it.
+ Check if the hard link is there in the hardlink list.
+ If its not there we add it to the list and send a update to the
+ database using libgfdb.
+ Please note: The database transaction can fail(and we ignore) as there
+ already might be a record in the db. This update to the db is to heal
+ if its not there.
+ If its there in the list we ignore it.
+2) Inode Forget: Whenever an inode forget hits we clear the hardlink list in
+ the inode context variable and delete the inode context variable.
+ Please note: An inode forget may happen for two reason,
+ a. when the inode is delete.
+ b. the in-memory inode is evicted from the inode table due to cache limits.
+3) create: whenever a create happens we create the inode context variable and
+ add the hardlink. The database updation is done as usual by ctr.
+4) link: whenever a hardlink is created for the inode, we create the inode
+ context variable, if not present, and add the hardlink to the list.
+5) unlink: whenever a unlink happens we delete the hardlink from the list.
+6) mknod: same as create.
+7) rename: whenever a rename happens we update the hardlink in list. if the
+ hardlink was not present for updation, we add the hardlink to the list.
+
+What is pending:
+1) This solution will only work for named lookups.
+2) We dont track afr-self-heal/dht-rebalancer traffic for healing.
+
+*/
+
+
+/* This function doesnot write anything to the db,
+ * just created the local variable
+ * for the frame and sets values for the ctr_db_record */
+static int
+ctr_lookup_wind(call_frame_t *frame,
+ xlator_t *this,
+ gf_ctr_inode_context_t *ctr_inode_cx)
+{
+ int ret = -1;
+ gf_ctr_private_t *_priv = NULL;
+ gf_ctr_local_t *ctr_local = NULL;
+
+ GF_ASSERT(frame);
+ GF_ASSERT(frame->root);
+ GF_ASSERT(this);
+ IS_CTR_INODE_CX_SANE(ctr_inode_cx);
+
+ _priv = this->private;
+ GF_ASSERT (_priv);
+
+ if (_priv->ctr_record_wind && ctr_inode_cx->ia_type != IA_IFDIR) {
+
+ frame->local = init_ctr_local_t (this);
+ if (!frame->local) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND,
+ "WIND: Error while creating ctr local");
+ goto out;
+ };
+ ctr_local = frame->local;
+ /*Definately no internal fops will reach here*/
+ ctr_local->is_internal_fop = _gf_false;
+ /*Dont record counters*/
+ CTR_DB_REC(ctr_local).do_record_counters = _gf_false;
+ /*Don't record time at all*/
+ CTR_DB_REC(ctr_local).do_record_times = _gf_false;
+
+ /* Copy gfid into db record*/
+ gf_uuid_copy (CTR_DB_REC(ctr_local).gfid,
+ *(ctr_inode_cx->gfid));
+
+ /* Set fop_path and fop_type, required by libgfdb to make
+ * decision while inserting the record */
+ CTR_DB_REC(ctr_local).gfdb_fop_path = ctr_inode_cx->fop_path;
+ CTR_DB_REC(ctr_local).gfdb_fop_type = ctr_inode_cx->fop_type;
+
+ /* Copy hard link info*/
+ gf_uuid_copy (CTR_DB_REC(ctr_local).pargfid,
+ *((NEW_LINK_CX(ctr_inode_cx))->pargfid));
+ strncpy (CTR_DB_REC(ctr_local).file_name,
+ NEW_LINK_CX(ctr_inode_cx)->basename,
+ sizeof(CTR_DB_REC(ctr_local).file_name));
+
+ /* Since we are in lookup we can ignore errors while
+ * Inserting in the DB, because there may be many
+ * to write to the DB attempts for healing.
+ * We dont want to log all failed attempts and
+ * bloat the log*/
+ ctr_local->gfdb_db_record.ignore_errors = _gf_true;
+ }
+
+ ret = 0;
+
+out:
+
+ if (ret) {
+ free_ctr_local (ctr_local);
+ frame->local = NULL;
+ }
+
+ return ret;
+}
+
+
+/* This function inserts the ctr_db_record populated by ctr_lookup_wind
+ * in to the db. It also destroys the frame->local created by ctr_lookup_wind */
+static int
+ctr_lookup_unwind (call_frame_t *frame,
+ xlator_t *this)
+{
+ int ret = -1;
+ gf_ctr_private_t *_priv = NULL;
+ gf_ctr_local_t *ctr_local = NULL;
+
+ GF_ASSERT(frame);
+ GF_ASSERT(this);
+
+ _priv = this->private;
+ GF_ASSERT (_priv);
+
+ GF_ASSERT(_priv->_db_conn);
+
+ ctr_local = frame->local;
+
+ if (ctr_local && (ctr_local->ia_inode_type != IA_IFDIR)) {
+
+ ret = insert_record(_priv->_db_conn,
+ &ctr_local->gfdb_db_record);
+ if (ret == -1) {
+ gf_msg (this->name,
+ _gfdb_log_level (GF_LOG_ERROR,
+ ctr_local->
+ gfdb_db_record.ignore_errors),
+ 0, CTR_MSG_FILL_CTR_LOCAL_ERROR_UNWIND,
+ "UNWIND: Error filling ctr local");
+ goto out;
+ }
+ }
+ ret = 0;
+out:
+ free_ctr_local (ctr_local);
+ frame->local = NULL;
+ return ret;
+}
+
+/******************************************************************************
+ *
+ * FOPS HANDLING BELOW
+ *
+ * ***************************************************************************/
+
+/****************************LOOKUP********************************************/
+
+
+int32_t
+ctr_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *dict, struct iatt *postparent)
+{
+ int ret = -1;
+ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
+ gf_ctr_local_t *ctr_local = NULL;
+ ctr_heal_ret_val_t ret_val = CTR_CTX_ERROR;
+ gf_boolean_t _is_heal_needed = _gf_false;
+
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
+
+ /* if the lookup failed lookup dont do anything*/
+ if (op_ret == -1) {
+ gf_msg_trace (this->name, 0, "lookup failed with %s",
+ strerror (op_errno));
+ goto out;
+ }
+
+ /* Ignore directory lookups */
+ if (inode->ia_type == IA_IFDIR) {
+ goto out;
+ }
+
+ /* if frame local was not set by the ctr_lookup()
+ * so dont so anything*/
+ if (!frame->local) {
+ goto out;
+ }
+
+ /* if the lookup is for dht link donot record*/
+ if (dht_is_linkfile (buf, dict)) {
+ gf_msg_trace (this->name, 0, "Ignoring Lookup "
+ "for dht link file");
+ goto out;
+ }
+
+ ctr_local = frame->local;
+ /*Assign the proper inode type*/
+ ctr_local->ia_inode_type = inode->ia_type;
+
+ /* Copy gfid directly from inode */
+ gf_uuid_copy (CTR_DB_REC(ctr_local).gfid, inode->gfid);
+
+ /* Checking if gfid and parent gfid is valid */
+ if (gf_uuid_is_null(CTR_DB_REC(ctr_local).gfid) ||
+ gf_uuid_is_null(CTR_DB_REC(ctr_local).pargfid)) {
+ gf_msg_trace (this->name, 0,
+ "Invalid GFID");
+ goto out;
+ }
+
+ /* if its a first entry
+ * then mark the ctr_record for create
+ * A create will attempt a file and a hard link created in the db*/
+ ctr_xlator_ctx = get_ctr_xlator_ctx (this, inode);
+ if (!ctr_xlator_ctx) {
+ /* This marks inode heal */
+ CTR_DB_REC(ctr_local).gfdb_fop_type = GFDB_FOP_CREATE_WRITE;
+ _is_heal_needed = _gf_true;
+ }
+
+ /* Copy the correct gfid from resolved inode */
+ gf_uuid_copy (CTR_DB_REC(ctr_local).gfid, inode->gfid);
+
+ /* Add hard link to the list */
+ ret_val = add_hard_link_ctx (frame, this, inode);
+ if (ret_val == CTR_CTX_ERROR) {
+ gf_msg_trace (this->name, 0,
+ "Failed adding hardlink to list");
+ goto out;
+ }
+ /* If inode needs healing then heal the hardlink also */
+ else if (ret_val & CTR_TRY_INODE_HEAL) {
+ /* This marks inode heal */
+ CTR_DB_REC(ctr_local).gfdb_fop_type = GFDB_FOP_CREATE_WRITE;
+ _is_heal_needed = _gf_true;
+ }
+ /* If hardlink needs healing */
+ else if (ret_val & CTR_TRY_HARDLINK_HEAL) {
+ _is_heal_needed = _gf_true;
+ }
+
+ /* If lookup heal needed */
+ if (!_is_heal_needed)
+ goto out;
+
+ /* FINALLY HEAL : Inserts the ctr_db_record populated by ctr_lookup_wind
+ * in to the db. It also destroys the frame->local
+ * created by ctr_lookup_wind */
+ ret = ctr_lookup_unwind(frame, this);
+ if (ret) {
+ gf_msg_trace (this->name, 0,
+ "Failed healing/inserting link");
+ }
+
+
+out:
+ free_ctr_local ((gf_ctr_local_t *)frame->local);
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf,
+ dict, postparent);
+
+ return 0;
+}
+
+
+
+int32_t
+ctr_lookup (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xdata)
+{
+ gf_ctr_inode_context_t ctr_inode_cx;
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
+ gf_ctr_link_context_t ctr_link_cx;
+ gf_ctr_link_context_t *_link_cx = &ctr_link_cx;
+ int ret = -1;
+
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
+ CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out);
+
+ GF_ASSERT(frame);
+ GF_ASSERT(frame->root);
+
+ /* Dont handle nameless lookups*/
+ if (!loc->parent || !loc->name)
+ goto out;
+
+ /*fill ctr link context*/
+ FILL_CTR_LINK_CX(_link_cx, loc->parent->gfid, loc->name, out);
+
+ /* Fill ctr inode context*/
+ /* IA_IFREG : We assume its a file in the wind
+ * but in the unwind we are sure what the inode is a file
+ * or directory
+ * gfid: we are just filling loc->gfid which is not correct.
+ * In unwind we fill the correct gfid for successful lookup*/
+ FILL_CTR_INODE_CONTEXT(_inode_cx, IA_IFREG,
+ loc->gfid, _link_cx, NULL,
+ GFDB_FOP_DENTRY_WRITE, GFDB_FOP_WIND);
+
+ /* Create the frame->local and populate ctr_db_record
+ * No writing to the db yet */
+ ret = ctr_lookup_wind(frame, this, _inode_cx);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_INSERT_LINK_WIND_FAILED,
+ "Failed to insert link wind");
+ }
+
+out:
+ STACK_WIND (frame, ctr_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ return 0;
+}
+
+
+
+
+/****************************WRITEV********************************************/
+int32_t
+ctr_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
+{
+ int ret = -1;
+
+ CTR_IS_DISABLED_THEN_GOTO (this, out);
+ CTR_IF_FOP_FAILED_THEN_GOTO (this, op_ret, op_errno, out);
+
+ ret = ctr_insert_unwind (frame, this,
+ GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_INSERT_WRITEV_UNWIND_FAILED,
+ "Failed to insert writev unwind");
+ }
+
+
+out:
+ ctr_free_frame_local (frame);
+
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+
+ return 0;
+}
+
+int32_t
+ctr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t off,
+ uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ int ret = -1;
+ gf_ctr_inode_context_t ctr_inode_cx;
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
+
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
+ CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out);
+
+ /*Fill ctr inode context*/
+ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type,
+ fd->inode->gfid, NULL, NULL,
+ GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
+
+ /*record into the database*/
+ ret = ctr_insert_wind(frame, this, _inode_cx);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_INSERT_WRITEV_WIND_FAILED,
+ "Failed to insert writev wind");
+ }
+
+out:
+ STACK_WIND (frame, ctr_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count,
+ off, flags, iobref, xdata);
+
+ return 0;
+}
+
+/******************************setattr*****************************************/
+
+int32_t
+ctr_setattr_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *preop_stbuf,
+ struct iatt *postop_stbuf, dict_t *xdata)
+{
+
+ int ret = -1;
+
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
+ CTR_IF_FOP_FAILED_THEN_GOTO (this, op_ret, op_errno, out);
+
+ ret = ctr_insert_unwind(frame, this,
+ GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_INSERT_SETATTR_UNWIND_FAILED,
+ "Failed to insert setattr unwind");
+ }
+
+out:
+ ctr_free_frame_local (frame);
+
+ STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop_stbuf,
+ postop_stbuf, xdata);
+
+ return 0;
+}
+
+int32_t
+ctr_setattr (call_frame_t *frame,
+ xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+
+ int ret = -1;
+ gf_ctr_inode_context_t ctr_inode_cx;
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
+
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
+ CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out);
+ CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO (this, out);
+
+ /*Fill ctr inode context*/
+ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type,
+ loc->inode->gfid, NULL, NULL, GFDB_FOP_INODE_WRITE,
+ GFDB_FOP_WIND);
+
+ /*record into the database*/
+ ret = ctr_insert_wind(frame, this, _inode_cx);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_INSERT_SETATTR_WIND_FAILED,
+ "Failed to insert setattr wind");
+ }
+out:
+
+ STACK_WIND (frame, ctr_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr, loc, stbuf,
+ valid, xdata);
+
+ return 0;
+}
+
+/*************************** fsetattr ***************************************/
+int32_t
+ctr_fsetattr_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *preop_stbuf,
+ struct iatt *postop_stbuf, dict_t *xdata)
+{
+ int ret = -1;
+
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
+ CTR_IF_FOP_FAILED_THEN_GOTO (this, op_ret, op_errno, out);
+
+ ret = ctr_insert_unwind(frame, this,
+ GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_INSERT_SETATTR_UNWIND_FAILED,
+ "Failed to insert fsetattr unwind");
+ }
+
+out:
+ ctr_free_frame_local (frame);
+
+ STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno,
+ preop_stbuf, postop_stbuf, xdata);
+
+ return 0;
+}
+
+
+int32_t
+ctr_fsetattr (call_frame_t *frame,
+ xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ int ret = -1;
+ gf_ctr_inode_context_t ctr_inode_cx;
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
+
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
+ CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out);
+ CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO (this, out);
+
+ /*Fill ctr inode context*/
+ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type,
+ fd->inode->gfid, NULL, NULL, GFDB_FOP_INODE_WRITE,
+ GFDB_FOP_WIND);
+
+ /*record into the database*/
+ ret = ctr_insert_wind(frame, this, _inode_cx);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_INSERT_SETATTR_WIND_FAILED,
+ "Failed to insert fsetattr wind");
+ }
+out:
+ STACK_WIND (frame, ctr_fsetattr_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->fsetattr,
+ fd, stbuf, valid, xdata);
+
+ return 0;
+}
+/****************************fremovexattr************************************/
+
+int32_t
+ctr_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ int ret = -1;
+
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
+ CTR_IF_FOP_FAILED_THEN_GOTO (this, op_ret, op_errno, out);
+
+ ret = ctr_insert_unwind(frame, this,
+ GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_INSERT_FREMOVEXATTR_UNWIND_FAILED,
+ "Failed to insert fremovexattr unwind");
+ }
+
+out:
+ ctr_free_frame_local (frame);
+
+ STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int32_t
+ctr_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ int ret = -1;
+ gf_ctr_inode_context_t ctr_inode_cx;
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
+
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
+ CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out);
+ CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO (this, out);
+
+ /*Fill ctr inode context*/
+ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type,
+ fd->inode->gfid, NULL, NULL, GFDB_FOP_INODE_WRITE,
+ GFDB_FOP_WIND);
+
+ /*record into the database*/
+ ret = ctr_insert_wind(frame, this, _inode_cx);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_INSERT_FREMOVEXATTR_WIND_FAILED,
+ "Failed to insert fremovexattr wind");
+ }
+
+out:
+ STACK_WIND (frame, ctr_fremovexattr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fremovexattr,
+ fd, name, xdata);
+ return 0;
+}
+
+/****************************removexattr*************************************/
+
+int32_t
+ctr_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ int ret = -1;
+
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
+ CTR_IF_FOP_FAILED_THEN_GOTO (this, op_ret, op_errno, out);
+ CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out);
+
+
+ ret = ctr_insert_unwind(frame, this,
+ GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_INSERT_REMOVEXATTR_UNWIND_FAILED,
+ "Failed to insert removexattr unwind");
+ }
+
+out:
+ ctr_free_frame_local (frame);
+
+ STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int32_t
+ctr_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ int ret = -1;
+ gf_ctr_inode_context_t ctr_inode_cx;
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
+
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
+ CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out);
+ CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO (this, out);
+
+ /*Fill ctr inode context*/
+ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type,
+ loc->inode->gfid, NULL, NULL, GFDB_FOP_INODE_WRITE,
+ GFDB_FOP_WIND);
+
+ /*record into the database*/
+ ret = ctr_insert_wind(frame, this, _inode_cx);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_INSERT_REMOVEXATTR_WIND_FAILED,
+ "Failed to insert removexattr wind");
+ }
+
+out:
+ STACK_WIND (frame, ctr_removexattr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->removexattr,
+ loc, name, xdata);
+ return 0;
+}
+
+/****************************truncate****************************************/
+
+int32_t
+ctr_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ int ret = -1;
+
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
+ CTR_IF_FOP_FAILED_THEN_GOTO (this, op_ret, op_errno, out);
+
+ ret = ctr_insert_unwind(frame, this,
+ GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_INSERT_TRUNCATE_UNWIND_FAILED,
+ "Failed to insert truncate unwind");
+ }
+
+
+out:
+ ctr_free_frame_local (frame);
+
+ STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+
+ return 0;
+}
+
+int32_t
+ctr_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ off_t offset, dict_t *xdata)
+{
+ int ret = -1;
+ gf_ctr_inode_context_t ctr_inode_cx;
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
+
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
+ CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out);
+
+ /*Fill ctr inode context*/
+ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type,
+ loc->inode->gfid, NULL, NULL, GFDB_FOP_INODE_WRITE,
+ GFDB_FOP_WIND);
+
+ /*record into the database*/
+ ret = ctr_insert_wind(frame, this, _inode_cx);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_INSERT_TRUNCATE_WIND_FAILED,
+ "Failed to insert truncate wind");
+ }
+out:
+ STACK_WIND (frame, ctr_truncate_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->truncate,
+ loc, offset, xdata);
+ return 0;
+}
+
+/****************************ftruncate***************************************/
+
+int32_t
+ctr_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ int ret = -1;
+
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
+ CTR_IF_FOP_FAILED_THEN_GOTO (this, op_ret, op_errno, out);
+
+ ret = ctr_insert_unwind(frame, this,
+ GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_INSERT_FTRUNCATE_UNWIND_FAILED,
+ "Failed to insert ftruncate unwind");
+ }
+
+out:
+ ctr_free_frame_local (frame);
+
+ STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+
+ return 0;
+}
+
+int32_t
+ctr_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, dict_t *xdata)
+{
+ int ret = -1;
+ gf_ctr_inode_context_t ctr_inode_cx;
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
+
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
+ CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out);
+
+ /*Fill ctr inode context*/
+ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type,
+ fd->inode->gfid, NULL, NULL, GFDB_FOP_INODE_WRITE,
+ GFDB_FOP_WIND);
+
+ /*record into the database*/
+ ret = ctr_insert_wind(frame, this, _inode_cx);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_INSERT_FTRUNCATE_WIND_FAILED,
+ "Failed to insert ftruncate wind");
+ }
+
+out:
+ STACK_WIND (frame, ctr_ftruncate_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->ftruncate,
+ fd, offset, xdata);
+ return 0;
+}
+
+/****************************rename******************************************/
+int32_t
+ctr_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ int ret = -1;
+ uint32_t remaining_links = -1;
+ gf_ctr_local_t *ctr_local = NULL;
+ gfdb_fop_type_t fop_type = GFDB_FOP_INVALID_OP;
+ gfdb_fop_path_t fop_path = GFDB_FOP_INVALID;
+
+ GF_ASSERT(frame);
+ GF_ASSERT(this);
+
+ CTR_IS_DISABLED_THEN_GOTO (this, out);
+ CTR_IF_FOP_FAILED_THEN_GOTO (this, op_ret, op_errno, out);
+
+ ret = ctr_insert_unwind (frame, this,
+ GFDB_FOP_DENTRY_WRITE, GFDB_FOP_UNWIND);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_INSERT_RENAME_UNWIND_FAILED,
+ "Failed to insert rename unwind");
+ goto out;
+ }
+
+ if (!xdata)
+ goto out;
+ /*
+ *
+ * Extracting GF_RESPONSE_LINK_COUNT_XDATA from POSIX Xlator
+ * This is only set when we are overwriting hardlinks.
+ *
+ * */
+ ret = dict_get_uint32 (xdata , GF_RESPONSE_LINK_COUNT_XDATA,
+ &remaining_links);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_GET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED,
+ "Failed to getting GF_RESPONSE_LINK_COUNT_XDATA");
+ remaining_links = -1;
+ goto out;
+ }
+
+ ctr_local = frame->local;
+
+ /* This is not the only link */
+ if (remaining_links > 1) {
+ fop_type = GFDB_FOP_DENTRY_WRITE;
+ fop_path = GFDB_FOP_UNDEL;
+ }
+ /* Last link that was deleted */
+ else if (remaining_links == 1) {
+ fop_type = GFDB_FOP_DENTRY_WRITE;
+ fop_path = GFDB_FOP_UNDEL_ALL;
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_INSERT_RENAME_UNWIND_FAILED,
+ "Invalid link count from posix");
+ goto out;
+ }
+
+ ret = ctr_delete_hard_link_from_db (this,
+ CTR_DB_REC(ctr_local).old_gfid,
+ CTR_DB_REC(ctr_local).pargfid,
+ CTR_DB_REC(ctr_local).file_name,
+ fop_type, fop_path);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_INSERT_UNLINK_UNWIND_FAILED,
+ "Failed to delete records of %s",
+ CTR_DB_REC(ctr_local).old_file_name);
+ }
+
+out:
+ ctr_free_frame_local (frame);
+
+ STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf,
+ preoldparent, postoldparent, prenewparent,
+ postnewparent,
+ xdata);
+
+ return 0;
+}
+
+int32_t
+ctr_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ int ret = -1;
+ gf_ctr_inode_context_t ctr_inode_cx;
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
+ gf_ctr_link_context_t new_link_cx, old_link_cx;
+ gf_ctr_link_context_t *_nlink_cx = &new_link_cx;
+ gf_ctr_link_context_t *_olink_cx = &old_link_cx;
+ int is_dict_created = 0;
+ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
+
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
+ CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out);
+
+ /*Fill old link context*/
+ FILL_CTR_LINK_CX(_olink_cx, oldloc->pargfid, oldloc->name, out);
+
+ /*Fill new link context*/
+ FILL_CTR_LINK_CX(_nlink_cx, newloc->pargfid, newloc->name, out);
+
+ /*Fill ctr inode context*/
+ FILL_CTR_INODE_CONTEXT(_inode_cx, oldloc->inode->ia_type,
+ oldloc->inode->gfid, _nlink_cx, _olink_cx,
+ GFDB_FOP_DENTRY_WRITE, GFDB_FOP_WIND);
+
+
+ /* If the rename is a overwrite of hardlink
+ * rename ("file1", "file2")
+ * file1 is hardlink for gfid say 00000000-0000-0000-0000-00000000000A
+ * file2 is hardlink for gfid say 00000000-0000-0000-0000-00000000000B
+ * so we are saving file2 gfid in old_gfid so that we delete entries
+ * from the db during rename callback if the fop is successful
+ * */
+ if (newloc->inode) {
+ /* This is the GFID from where the newloc hardlink will be
+ * unlinked */
+ _inode_cx->old_gfid = &newloc->inode->gfid;
+ }
+
+ /* Is a metatdata fop */
+ _inode_cx->is_metadata_fop = _gf_true;
+
+ /*record into the database*/
+ ret = ctr_insert_wind(frame, this, _inode_cx);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_INSERT_RENAME_WIND_FAILED,
+ "Failed to insert rename wind");
+ } else {
+ /* We are doing updation of hard link in inode context in wind
+ * As we dont get the "inode" in the call back for rename */
+ ret = update_hard_link_ctx (frame, this, oldloc->inode);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_UPDATE_HARDLINK_FAILED, "Failed "
+ "updating hard link in ctr inode context");
+ goto out;
+ }
+
+ /* If the newloc has an inode. i.e aquiring hardlink of an
+ * exisitng file i.e overwritting a file.
+ * */
+ if (newloc->inode) {
+
+ /* Getting the ctr inode context variable for
+ * inode whose hardlink will be aquired during
+ * the rename
+ * */
+ ctr_xlator_ctx = get_ctr_xlator_ctx (this,
+ newloc->inode);
+ if (!ctr_xlator_ctx) {
+ /* Since there is no ctr inode context
+ * so nothing more to do */
+ ret = 0;
+ goto out;
+ }
+
+ /* Deleting hardlink from context variable */
+ ret = ctr_delete_hard_link (this, ctr_xlator_ctx,
+ newloc->pargfid, newloc->name);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_DELETE_HARDLINK_FAILED,
+ "Failed to delete hard link");
+ goto out;
+ }
+
+ /* Requesting for number of hardlinks on the newloc
+ * inode from POSIX.
+ * */
+ is_dict_created = set_posix_link_request (this, &xdata);
+ if (is_dict_created == -1) {
+ ret = -1;
+ goto out;
+ }
+ }
+ }
+
+out:
+ STACK_WIND (frame, ctr_rename_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->rename,
+ oldloc, newloc, xdata);
+
+ if (is_dict_created == 1) {
+ dict_unref (xdata);
+ }
+
+ return 0;
+}
+
+/****************************unlink******************************************/
+int32_t
+ctr_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int ret = -1;
+ uint32_t remaining_links = -1;
+
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
+ CTR_IF_FOP_FAILED_THEN_GOTO (this, op_ret, op_errno, out);
+
+ if (!xdata)
+ goto out;
+
+ /*
+ *
+ * Extracting GF_RESPONSE_LINK_COUNT_XDATA from POSIX Xlator
+ *
+ * */
+ ret = dict_get_uint32 (xdata , GF_RESPONSE_LINK_COUNT_XDATA,
+ &remaining_links);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_GET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED,
+ "Failed to getting GF_RESPONSE_LINK_COUNT_XDATA");
+ remaining_links = -1;
+ }
+
+ /*This is not the only link*/
+ if (remaining_links != 1) {
+
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_DENTRY_WRITE,
+ GFDB_FOP_UNDEL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_INSERT_UNLINK_UNWIND_FAILED,
+ "Failed to insert unlink unwind");
+ }
+ }
+ /*Last link that was deleted*/
+ else if (remaining_links == 1) {
+
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_DENTRY_WRITE,
+ GFDB_FOP_UNDEL_ALL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_INSERT_UNLINK_UNWIND_FAILED,
+ "Failed to insert unlink unwind");
+ }
+ }
+
+out:
+ ctr_free_frame_local (frame);
+
+ STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno, preparent,
+ postparent, xdata);
+
+ return 0;
+}
+
+int32_t
+ctr_unlink (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int xflag, dict_t *xdata)
+{
+ int ret = -1;
+ gf_ctr_inode_context_t ctr_inode_cx;
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
+ gf_ctr_link_context_t ctr_link_cx;
+ gf_ctr_link_context_t *_link_cx = &ctr_link_cx;
+ gf_boolean_t is_xdata_created = _gf_false;
+ struct iatt dummy_stat = {0};
+
+ GF_ASSERT (frame);
+
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
+
+ /*Fill link context*/
+ FILL_CTR_LINK_CX(_link_cx, loc->pargfid, loc->name, out);
+
+ /*Fill ctr inode context*/
+ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type,
+ loc->inode->gfid, _link_cx, NULL,
+ GFDB_FOP_DENTRY_WRITE, GFDB_FOP_WDEL);
+
+ /*Internal FOP*/
+ _inode_cx->is_internal_fop = is_internal_fop (frame, xdata);
+
+ /* Is a metadata FOP */
+ _inode_cx->is_metadata_fop = _gf_true;
+
+ /* If its a internal FOP and dht link file donot record*/
+ if (_inode_cx->is_internal_fop &&
+ dht_is_linkfile (&dummy_stat, xdata)) {
+ goto out;
+ }
+
+ /*record into the database*/
+ ret = ctr_insert_wind(frame, this, _inode_cx);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_INSERT_UNLINK_UNWIND_FAILED,
+ "Failed to insert unlink wind");
+ } else {
+ /* We are doing delete of hard link in inode context in wind
+ * As we dont get the "inode" in the call back for rename */
+ ret = delete_hard_link_ctx (frame, this, loc->inode);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_DELETE_HARDLINK_FAILED, "Failed "
+ "deleting hard link from ctr inode context");
+ }
+ }
+
+ /*
+ *
+ * Sending GF_REQUEST_LINK_COUNT_XDATA
+ * to POSIX Xlator to send link count in unwind path
+ *
+ * */
+ /*create xdata if NULL*/
+ if (!xdata) {
+ xdata = dict_new();
+ is_xdata_created = (xdata) ? _gf_true : _gf_false;
+ }
+ if (!xdata) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_XDATA_NULL, "xdata is NULL :Cannot send "
+ "GF_REQUEST_LINK_COUNT_XDATA to posix");
+ goto out;
+ }
+
+ ret = dict_set_int32 (xdata, GF_REQUEST_LINK_COUNT_XDATA, 1);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_SET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED,
+ "Failed setting GF_REQUEST_LINK_COUNT_XDATA");
+ if (is_xdata_created) {
+ dict_unref (xdata);
+ }
+ goto out;
+ }
+
+out:
+ STACK_WIND (frame, ctr_unlink_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->unlink,
+ loc, xflag, xdata);
+
+ if (is_xdata_created)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+/****************************fsync******************************************/
+int32_t
+ctr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ int ret = -1;
+
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
+ CTR_IF_FOP_FAILED_THEN_GOTO (this, op_ret, op_errno, out);
+
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE,
+ GFDB_FOP_UNWIND);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_INSERT_FSYNC_UNWIND_FAILED,
+ "Failed to insert fsync unwind");
+ }
+
+out:
+ ctr_free_frame_local (frame);
+
+ STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ return 0;
+}
+
+int32_t
+ctr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t flags, dict_t *xdata)
+{
+ int ret = -1;
+ gf_ctr_inode_context_t ctr_inode_cx;
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
+
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
+ CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out);
+
+ /*Fill ctr inode context*/
+ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type,
+ fd->inode->gfid, NULL, NULL,
+ GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
+
+ /*record into the database*/
+ ret = ctr_insert_wind(frame, this, _inode_cx);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_INSERT_FSYNC_WIND_FAILED,
+ "Failed to insert fsync wind");
+ }
+
+out:
+ STACK_WIND (frame, ctr_fsync_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fsync,
+ fd, flags, xdata);
+ return 0;
+}
+
+/****************************setxattr****************************************/
+
+int
+ctr_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ int ret = -1;
+
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
+
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE,
+ GFDB_FOP_UNWIND);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_INSERT_FSYNC_UNWIND_FAILED,
+ "Failed to insert setxattr unwind");
+ }
+
+out:
+ ctr_free_frame_local (frame);
+
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int
+ctr_setxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xattr, int flags, dict_t *xdata)
+{
+ int ret = -1;
+ gf_ctr_inode_context_t ctr_inode_cx;
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
+
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
+ CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out);
+ CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO (this, out);
+
+ /*Fill ctr inode context*/
+ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type,
+ loc->inode->gfid, NULL, NULL,
+ GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
+
+ /*record into the database*/
+ ret = ctr_insert_wind(frame, this, _inode_cx);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_INSERT_SETATTR_WIND_FAILED,
+ "Failed to insert setxattr wind");
+ }
+
+out:
+ STACK_WIND (frame, ctr_setxattr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setxattr,
+ loc, xattr, flags, xdata);
+ return 0;
+}
+/**************************** fsetxattr *************************************/
+int32_t
+ctr_fsetxattr_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ int ret = -1;
+
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
+ CTR_IF_FOP_FAILED_THEN_GOTO (this, op_ret, op_errno, out);
+
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE,
+ GFDB_FOP_UNWIND);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_INSERT_FSYNC_UNWIND_FAILED,
+ "Failed to insert fsetxattr unwind");
+ }
+
+out:
+ ctr_free_frame_local (frame);
+
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int32_t
+ctr_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ int ret = -1;
+ gf_ctr_inode_context_t ctr_inode_cx;
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
+
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
+ CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out);
+ CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO (this, out);
+
+ /*Fill ctr inode context*/
+ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type,
+ fd->inode->gfid, NULL, NULL,
+ GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
+
+ /*record into the database*/
+ ret = ctr_insert_wind(frame, this, _inode_cx);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_INSERT_SETATTR_WIND_FAILED,
+ "Failed to insert fsetxattr wind");
+ }
+
+out:
+ STACK_WIND (frame, ctr_fsetxattr_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->fsetxattr,
+ fd, dict, flags, xdata);
+ return 0;
+}
+/****************************mknod*******************************************/
+
+
+int32_t
+ctr_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int ret = -1;
+ ctr_heal_ret_val_t ret_val = CTR_CTX_ERROR;
+
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
+ CTR_IF_FOP_FAILED_THEN_GOTO (this, op_ret, op_errno, out);
+
+ /* Add hard link to the list */
+ ret_val = add_hard_link_ctx (frame, this, inode);
+ if (ret_val == CTR_CTX_ERROR) {
+ gf_msg_trace (this->name, 0, "Failed adding hard link");
+ }
+
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_CREATE_WRITE,
+ GFDB_FOP_UNWIND);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_INSERT_MKNOD_UNWIND_FAILED,
+ "Failed to insert mknod unwind");
+ }
+
+out:
+ ctr_free_frame_local (frame);
+
+ STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+
+ return 0;
+}
+
+
+int
+ctr_mknod (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ int ret = -1;
+ gf_ctr_inode_context_t ctr_inode_cx;
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
+ gf_ctr_link_context_t ctr_link_cx;
+ gf_ctr_link_context_t *_link_cx = &ctr_link_cx;
+ void *uuid_req = NULL;
+ uuid_t gfid = {0,};
+ uuid_t *ptr_gfid = &gfid;
+
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
+ CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out);
+
+ GF_ASSERT(frame);
+ GF_ASSERT(frame->root);
+
+ /*get gfid from xdata dict*/
+ ret = dict_get_ptr (xdata, "gfid-req", &uuid_req);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "failed to get gfid from dict");
+ goto out;
+ }
+ gf_uuid_copy (gfid, uuid_req);
+
+ /*fill ctr link context*/
+ FILL_CTR_LINK_CX (_link_cx, loc->pargfid, loc->name, out);
+
+ /*Fill ctr inode context*/
+ FILL_CTR_INODE_CONTEXT (_inode_cx, loc->inode->ia_type,
+ *ptr_gfid, _link_cx, NULL,
+ GFDB_FOP_CREATE_WRITE, GFDB_FOP_WIND);
+
+ /*record into the database*/
+ ret = ctr_insert_wind(frame, this, _inode_cx);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_INSERT_MKNOD_WIND_FAILED,
+ "Failed to insert mknod wind");
+ }
+
+out:
+ STACK_WIND (frame, ctr_mknod_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->mknod,
+ loc, mode, rdev, umask, xdata);
+ return 0;
+}
+
+/****************************create******************************************/
+int
+ctr_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ fd_t *fd, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ int ret = -1;
+
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
+ CTR_IF_FOP_FAILED_THEN_GOTO (this, op_ret, op_errno, out);
+
+ ret = add_hard_link_ctx (frame, this, inode);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_ADD_HARDLINK_FAILED,
+ "Failed adding hard link");
+ }
+
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_CREATE_WRITE,
+ GFDB_FOP_UNWIND);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_INSERT_CREATE_UNWIND_FAILED,
+ "Failed to insert create unwind");
+ }
+
+out:
+ ctr_free_frame_local (frame);
+
+ STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode,
+ stbuf,
+ preparent, postparent, xdata);
+
+ return 0;
+}
+
+int
+ctr_create (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ int ret = -1;
+ gf_ctr_inode_context_t ctr_inode_cx;
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
+ gf_ctr_link_context_t ctr_link_cx;
+ gf_ctr_link_context_t *_link_cx = &ctr_link_cx;
+ void *uuid_req = NULL;
+ uuid_t gfid = {0,};
+ uuid_t *ptr_gfid = &gfid;
+ struct iatt dummy_stat = {0};
+
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
+
+ GF_ASSERT(frame);
+ GF_ASSERT(frame->root);
+
+ /*Get GFID from Xdata dict*/
+ ret = dict_get_ptr (xdata, "gfid-req", &uuid_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_GET_GFID_FROM_DICT_FAILED,
+ "failed to get gfid from dict");
+ goto out;
+ }
+ gf_uuid_copy (gfid, uuid_req);
+
+ /*fill ctr link context*/
+ FILL_CTR_LINK_CX(_link_cx, loc->pargfid, loc->name, out);
+
+ /*Fill ctr inode context*/
+ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type,
+ *ptr_gfid, _link_cx, NULL,
+ GFDB_FOP_CREATE_WRITE, GFDB_FOP_WIND);
+
+ /*Internal FOP*/
+ _inode_cx->is_internal_fop = is_internal_fop (frame, xdata);
+
+ /* If its a internal FOP and dht link file donot record*/
+ if (_inode_cx->is_internal_fop &&
+ dht_is_linkfile (&dummy_stat, xdata)) {
+ goto out;
+ }
+
+ /*record into the database*/
+ ret = ctr_insert_wind(frame, this, &ctr_inode_cx);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_INSERT_CREATE_WIND_FAILED,
+ "Failed to insert create wind");
+ }
+out:
+ STACK_WIND (frame, ctr_create_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->create,
+ loc, flags, mode, umask, fd, xdata);
+ return 0;
+}
+
+/****************************link********************************************/
+
+int
+ctr_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int ret = -1;
+
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
+ CTR_IF_FOP_FAILED_THEN_GOTO (this, op_ret, op_errno, out);
+
+ /* Add hard link to the list */
+ ret = add_hard_link_ctx (frame, this, inode);
+ if (ret) {
+ gf_msg_trace (this->name, 0, "Failed adding hard link");
+ }
+
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_DENTRY_WRITE,
+ GFDB_FOP_UNWIND);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_INSERT_CREATE_UNWIND_FAILED,
+ "Failed to insert create unwind");
+ }
+
+out:
+ ctr_free_frame_local (frame);
+
+ STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, stbuf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int
+ctr_link (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ int ret = -1;
+ gf_ctr_inode_context_t ctr_inode_cx;
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
+ gf_ctr_link_context_t ctr_link_cx;
+ gf_ctr_link_context_t *_link_cx = &ctr_link_cx;
+ struct iatt dummy_stat = {0};
+
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
+
+ GF_ASSERT(frame);
+ GF_ASSERT(frame->root);
+
+ /*fill ctr link context*/
+ FILL_CTR_LINK_CX(_link_cx, newloc->pargfid, newloc->name, out);
+
+ /*Fill ctr inode context*/
+ FILL_CTR_INODE_CONTEXT(_inode_cx, oldloc->inode->ia_type,
+ oldloc->inode->gfid, _link_cx, NULL,
+ GFDB_FOP_DENTRY_WRITE, GFDB_FOP_WIND);
+
+ /*Internal FOP*/
+ _inode_cx->is_internal_fop = is_internal_fop (frame, xdata);
+
+ /* Is a metadata fop */
+ _inode_cx->is_metadata_fop = _gf_true;
+
+ /* If its a internal FOP and dht link file donot record*/
+ if (_inode_cx->is_internal_fop &&
+ dht_is_linkfile (&dummy_stat, xdata)) {
+ goto out;
+ }
+
+
+ /*record into the database*/
+ ret = ctr_insert_wind(frame, this, _inode_cx);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_INSERT_LINK_WIND_FAILED,
+ "Failed to insert link wind");
+ }
+
+out:
+ STACK_WIND (frame, ctr_link_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->link,
+ oldloc, newloc, xdata);
+ return 0;
+}
+
+/******************************readv*****************************************/
+int ctr_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ struct iovec *vector, int count, struct iatt *stbuf,
+ struct iobref *iobref, dict_t *xdata) {
+
+ int ret = -1;
+
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
+ CTR_IF_FOP_FAILED_THEN_GOTO (this, op_ret, op_errno, out);
+
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_READ,
+ GFDB_FOP_UNWIND);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_INSERT_CREATE_UNWIND_FAILED,
+ "Failed to insert create unwind");
+ }
+
+out:
+ ctr_free_frame_local (frame);
+
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count,
+ stbuf, iobref, xdata);
+ return 0;
+}
+
+
+int
+ctr_readv (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t off, uint32_t flags, dict_t *xdata)
+{
+ int ret = -1;
+ gf_ctr_inode_context_t ctr_inode_cx;
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
+
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
+ CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out);
+
+ /*Fill ctr inode context*/
+ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type,
+ fd->inode->gfid, NULL, NULL,
+ GFDB_FOP_INODE_READ, GFDB_FOP_WIND);
+
+ /*record into the database*/
+ ret = ctr_insert_wind(frame, this, _inode_cx);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_INSERT_READV_WIND_FAILED,
+ "Failed to insert readv wind");
+ }
+
+out:
+ STACK_WIND (frame, ctr_readv_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->readv,
+ fd, size, off, flags, xdata);
+ return 0;
+}
+
+/*******************************ctr_ipc****************************************/
+
+/*This is the call back function per record/file from data base*/
+static int
+ctr_db_query_callback (gfdb_query_record_t *gfdb_query_record,
+ void *args) {
+ int ret = -1;
+ ctr_query_cbk_args_t *query_cbk_args = args;
+
+ GF_VALIDATE_OR_GOTO ("ctr", query_cbk_args, out);
+
+ ret = gfdb_write_query_record (query_cbk_args->query_fd,
+ gfdb_query_record);
+ if (ret) {
+ gf_msg ("ctr", GF_LOG_ERROR, 0,
+ CTR_MSG_FATAL_ERROR,
+ "Failed to write to query file");
+ goto out;
+ }
+
+ query_cbk_args->count++;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/* This function does all the db queries related to tiering and
+ * generates/populates new/existing query file
+ * inputs:
+ * xlator_t *this : CTR Translator
+ * void *conn_node : Database connection
+ * char *query_file: the query file that needs to be updated
+ * gfdb_ipc_ctr_params_t *ipc_ctr_params: the query parameters
+ * Return:
+ * On success 0
+ * On failure -1
+ * */
+int
+ctr_db_query (xlator_t *this,
+ void *conn_node,
+ char *query_file,
+ gfdb_ipc_ctr_params_t *ipc_ctr_params)
+{
+ int ret = -1;
+ ctr_query_cbk_args_t query_cbk_args = {0};
+
+ GF_VALIDATE_OR_GOTO ("ctr", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, conn_node, out);
+ GF_VALIDATE_OR_GOTO (this->name, query_file, out);
+ GF_VALIDATE_OR_GOTO (this->name, ipc_ctr_params, out);
+
+ /*Query for eligible files from db*/
+ query_cbk_args.query_fd = open (query_file,
+ O_WRONLY | O_CREAT | O_APPEND,
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
+ if (query_cbk_args.query_fd < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CTR_MSG_FATAL_ERROR,
+ "Failed to open query file %s", query_file);
+ goto out;
+ }
+ if (!ipc_ctr_params->is_promote) {
+ if (ipc_ctr_params->write_freq_threshold == 0 &&
+ ipc_ctr_params->read_freq_threshold == 0) {
+ ret = find_unchanged_for_time (
+ conn_node,
+ ctr_db_query_callback,
+ (void *)&query_cbk_args,
+ &ipc_ctr_params->time_stamp);
+ } else {
+ ret = find_unchanged_for_time_freq (
+ conn_node,
+ ctr_db_query_callback,
+ (void *)&query_cbk_args,
+ &ipc_ctr_params->time_stamp,
+ ipc_ctr_params->write_freq_threshold,
+ ipc_ctr_params->read_freq_threshold,
+ _gf_false);
+ }
+ } else {
+ if (ipc_ctr_params->write_freq_threshold == 0 &&
+ ipc_ctr_params->read_freq_threshold == 0) {
+ ret = find_recently_changed_files (
+ conn_node,
+ ctr_db_query_callback,
+ (void *)&query_cbk_args,
+ &ipc_ctr_params->time_stamp);
+ } else {
+ ret = find_recently_changed_files_freq (
+ conn_node,
+ ctr_db_query_callback,
+ (void *)&query_cbk_args,
+ &ipc_ctr_params->time_stamp,
+ ipc_ctr_params->write_freq_threshold,
+ ipc_ctr_params->read_freq_threshold,
+ _gf_false);
+ }
+ }
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_FATAL_ERROR,
+ "FATAL: query from db failed");
+ goto out;
+ }
+
+ ret = clear_files_heat (conn_node);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_FATAL_ERROR,
+ "FATAL: Failed to clear db entries");
+ goto out;
+ }
+
+ ret = 0;
+out:
+
+ if (!ret)
+ ret = query_cbk_args.count;
+
+ if (query_cbk_args.query_fd >= 0) {
+ sys_close (query_cbk_args.query_fd);
+ query_cbk_args.query_fd = -1;
+ }
+
+ return ret;
+}
+
+
+int
+ctr_ipc_helper (xlator_t *this, dict_t *in_dict,
+ dict_t *out_dict)
+{
+ int ret = -1;
+ char *ctr_ipc_ops = NULL;
+ gf_ctr_private_t *priv = NULL;
+ char *db_version = NULL;
+ char *db_param_key = NULL;
+ char *db_param = NULL;
+ char *query_file = NULL;
+ gfdb_ipc_ctr_params_t *ipc_ctr_params = NULL;
+
+
+ GF_VALIDATE_OR_GOTO ("ctr", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, priv->_db_conn, out);
+ GF_VALIDATE_OR_GOTO (this->name, in_dict, out);
+ GF_VALIDATE_OR_GOTO (this->name, out_dict, out);
+
+ GET_DB_PARAM_FROM_DICT(this->name, in_dict, GFDB_IPC_CTR_KEY,
+ ctr_ipc_ops, out);
+
+ /*if its a db clear operation */
+ if (strncmp (ctr_ipc_ops, GFDB_IPC_CTR_CLEAR_OPS,
+ strlen (GFDB_IPC_CTR_CLEAR_OPS)) == 0) {
+
+ ret = clear_files_heat (priv->_db_conn);
+ if (ret)
+ goto out;
+
+ } /* if its a query operation, in which case its query + clear db*/
+ else if (strncmp (ctr_ipc_ops, GFDB_IPC_CTR_QUERY_OPS,
+ strlen (GFDB_IPC_CTR_QUERY_OPS)) == 0) {
+
+ ret = dict_get_str (in_dict, GFDB_IPC_CTR_GET_QFILE_PATH,
+ &query_file);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
+ "Failed extracting query file path");
+ goto out;
+ }
+
+ ret = dict_get_bin (in_dict, GFDB_IPC_CTR_GET_QUERY_PARAMS,
+ (void *)&ipc_ctr_params);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
+ "Failed extracting query parameters");
+ goto out;
+ }
+
+ ret = ctr_db_query (this, priv->_db_conn, query_file,
+ ipc_ctr_params);
+
+ ret = dict_set_int32 (out_dict,
+ GFDB_IPC_CTR_RET_QUERY_COUNT, ret);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
+ "Failed setting query reply");
+ goto out;
+ }
+
+ } /* if its a query for db version */
+ else if (strncmp (ctr_ipc_ops, GFDB_IPC_CTR_GET_DB_VERSION_OPS,
+ strlen (GFDB_IPC_CTR_GET_DB_VERSION_OPS)) == 0) {
+
+ ret = get_db_version (priv->_db_conn, &db_version);
+ if (ret == -1 || !db_version) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
+ "Failed extracting db version ");
+ goto out;
+ }
+
+ SET_DB_PARAM_TO_DICT(this->name, out_dict,
+ GFDB_IPC_CTR_RET_DB_VERSION,
+ db_version, ret, error);
+
+ } /* if its a query for a db setting */
+ else if (strncmp (ctr_ipc_ops, GFDB_IPC_CTR_GET_DB_PARAM_OPS,
+ strlen (GFDB_IPC_CTR_GET_DB_PARAM_OPS)) == 0) {
+
+ ret = dict_get_str (in_dict, GFDB_IPC_CTR_GET_DB_KEY,
+ &db_param_key);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
+ "Failed extracting db param key");
+ goto out;
+ }
+
+ ret = get_db_params (priv->_db_conn, db_param_key, &db_param);
+ if (ret == -1 || !db_param) {
+ goto out;
+ }
+
+ SET_DB_PARAM_TO_DICT(this->name, out_dict,
+ db_param_key,
+ db_param, ret, error);
+ } /* default case */
+ else {
+ goto out;
+ }
+
+
+ ret = 0;
+ goto out;
+error:
+ GF_FREE (db_param_key);
+ GF_FREE (db_param);
+ GF_FREE (db_version);
+out:
+ return ret;
+}
+
+
+/* IPC Call from tier migrator to clear the heat on the DB */
+int32_t
+ctr_ipc (call_frame_t *frame, xlator_t *this, int32_t op,
+ dict_t *in_dict)
+{
+ int ret = -1;
+ gf_ctr_private_t *priv = NULL;
+ dict_t *out_dict = NULL;
+
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT(priv->_db_conn);
+ GF_VALIDATE_OR_GOTO (this->name, in_dict, wind);
+
+
+ if (op != GF_IPC_TARGET_CTR)
+ goto wind;
+
+ out_dict = dict_new();
+ if (!out_dict) {
+ goto out;
+ }
+
+ ret = ctr_ipc_helper (this, in_dict, out_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
+ "Failed in ctr_ipc_helper");
+ }
+out:
+
+ STACK_UNWIND_STRICT (ipc, frame, ret, 0, out_dict);
+
+ if (out_dict)
+ dict_unref(out_dict);
+
+ return 0;
+
+ wind:
+ STACK_WIND (frame, default_ipc_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->ipc, op, in_dict);
+
+
+
+ return 0;
+}
+
+
+/******************************************************************************/
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ char *temp_str = NULL;
+ int ret = 0;
+ gf_ctr_private_t *priv = NULL;
+
+ priv = this->private;
+ if (dict_get_str(options, "changetimerecorder.frequency",
+ &temp_str)) {
+ gf_msg(this->name, GF_LOG_INFO, 0, CTR_MSG_SET, "set");
+ }
+
+ GF_OPTION_RECONF ("ctr-enabled", priv->enabled, options,
+ bool, out);
+
+ GF_OPTION_RECONF ("record-counters", priv->ctr_record_counter, options,
+ bool, out);
+
+ GF_OPTION_RECONF ("ctr-record-metadata-heat",
+ priv->ctr_record_metadata_heat, options,
+ bool, out);
+
+ GF_OPTION_RECONF ("ctr_link_consistency", priv->ctr_link_consistency,
+ options, bool, out);
+
+ GF_OPTION_RECONF ("ctr_lookupheal_inode_timeout",
+ priv->ctr_lookupheal_inode_timeout,
+ options, uint64, out);
+
+ GF_OPTION_RECONF ("ctr_lookupheal_link_timeout",
+ priv->ctr_lookupheal_link_timeout,
+ options, uint64, out);
+
+ GF_OPTION_RECONF ("record-exit", priv->ctr_record_unwind, options,
+ bool, out);
+
+ GF_OPTION_RECONF ("record-entry", priv->ctr_record_wind, options,
+ bool, out);
+
+
+
+
+ /* If database is sqlite */
+ if (priv->gfdb_db_type == GFDB_SQLITE3) {
+
+ /* AUTOCHECKPOINT */
+ if (dict_get_str (options, GFDB_SQL_PARAM_WAL_AUTOCHECK,
+ &temp_str) == 0) {
+ ret = set_db_params (priv->_db_conn,
+ "wal_autocheckpoint", temp_str);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_SET_VALUE_TO_SQL_PARAM_FAILED,
+ "Failed to set %s",
+ GFDB_SQL_PARAM_WAL_AUTOCHECK);
+ }
+ }
+
+ /* CACHE_SIZE */
+ if (dict_get_str (options, GFDB_SQL_PARAM_CACHE_SIZE, &temp_str)
+ == 0) {
+ ret = set_db_params (priv->_db_conn, "cache_size",
+ temp_str);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_SET_VALUE_TO_SQL_PARAM_FAILED,
+ "Failed to set %s",
+ GFDB_SQL_PARAM_CACHE_SIZE);
+ }
+ }
+ }
+
+ ret = 0;
+
+out:
+
+ return ret;
+}
+
+/****************************init********************************************/
+
+int32_t
+init (xlator_t *this)
+{
+ gf_ctr_private_t *priv = NULL;
+ int ret_db = -1;
+ dict_t *params_dict = NULL;
+
+ GF_VALIDATE_OR_GOTO ("ctr", this, error);
+
+ if (!this->children || this->children->next) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_FATAL_ERROR,
+ "FATAL: ctr should have exactly one child");
+ goto error;
+ }
+
+ if (!this->parents) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ CTR_MSG_DANGLING_VOLUME,
+ "dangling volume. check volfile ");
+ }
+
+ priv = GF_CALLOC (1, sizeof (*priv), gf_ctr_mt_private_t);
+ if (!priv) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ CTR_MSG_CALLOC_FAILED,
+ "Calloc did not work!!!");
+ goto error;
+ }
+
+ /*Default values for the translator*/
+ priv->ctr_record_wind = _gf_true;
+ priv->ctr_record_unwind = _gf_false;
+ priv->ctr_hot_brick = _gf_false;
+ priv->gfdb_db_type = GFDB_SQLITE3;
+ priv->gfdb_sync_type = GFDB_DB_SYNC;
+ priv->enabled = _gf_true;
+ priv->_db_conn = NULL;
+ priv->ctr_lookupheal_link_timeout =
+ CTR_DEFAULT_HARDLINK_EXP_PERIOD;
+ priv->ctr_lookupheal_inode_timeout =
+ CTR_DEFAULT_INODE_EXP_PERIOD;
+
+ /*Extract ctr xlator options*/
+ ret_db = extract_ctr_options (this, priv);
+ if (ret_db) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_EXTRACT_CTR_XLATOR_OPTIONS_FAILED,
+ "Failed extracting ctr xlator options");
+ goto error;
+ }
+
+ params_dict = dict_new ();
+ if (!params_dict) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_INIT_DB_PARAMS_FAILED,
+ "DB Params cannot initialized!");
+ goto error;
+ }
+
+ /*Extract db params options*/
+ ret_db = extract_db_params(this, params_dict, priv->gfdb_db_type);
+ if (ret_db) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_EXTRACT_DB_PARAM_OPTIONS_FAILED,
+ "Failed extracting db params options");
+ goto error;
+ }
+
+ /*Create a memory pool for ctr xlator*/
+ this->local_pool = mem_pool_new (gf_ctr_local_t, 64);
+ if (!this->local_pool) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_CREATE_LOCAL_MEMORY_POOL_FAILED,
+ "failed to create local memory pool");
+ goto error;
+ }
+
+ /*Initialize Database Connection*/
+ priv->_db_conn = init_db(params_dict, priv->gfdb_db_type);
+ if (!priv->_db_conn) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_FATAL_ERROR,
+ "FATAL: Failed initializing data base");
+ goto error;
+ }
+
+ ret_db = 0;
+ goto out;
+
+/*Error handling */
+error:
+
+ if (this)
+ mem_pool_destroy (this->local_pool);
+
+ if (priv) {
+ GF_FREE (priv->ctr_db_path);
+ }
+ GF_FREE (priv);
+
+ if (params_dict)
+ dict_unref (params_dict);
+
+ return -1;
+
+out:
+
+ if (params_dict)
+ dict_unref (params_dict);
+
+ this->private = (void *)priv;
+ return 0;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("ctr", this, out);
+
+ ret = xlator_mem_acct_init (this, gf_ctr_mt_end + 1);
+
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_MEM_ACC_INIT_FAILED, "Memory accounting init"
+ "failed");
+ return ret;
+ }
+out:
+ return ret;
+}
+
+
+void
+fini (xlator_t *this)
+{
+ gf_ctr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv) {
+ if (fini_db (priv->_db_conn)) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ CTR_MSG_CLOSE_DB_CONN_FAILED, "Failed closing "
+ "db connection");
+ }
+ GF_FREE (priv->ctr_db_path);
+ }
+ GF_FREE (priv);
+ mem_pool_destroy (this->local_pool);
+
+ return;
+}
+
+struct xlator_fops fops = {
+ /*lookup*/
+ .lookup = ctr_lookup,
+ /*write fops */
+ .mknod = ctr_mknod,
+ .create = ctr_create,
+ .truncate = ctr_truncate,
+ .ftruncate = ctr_ftruncate,
+ .setxattr = ctr_setxattr,
+ .fsetxattr = ctr_fsetxattr,
+ .removexattr = ctr_removexattr,
+ .fremovexattr = ctr_fremovexattr,
+ .unlink = ctr_unlink,
+ .link = ctr_link,
+ .rename = ctr_rename,
+ .writev = ctr_writev,
+ .setattr = ctr_setattr,
+ .fsetattr = ctr_fsetattr,
+ /*read fops*/
+ .readv = ctr_readv,
+ /* IPC call*/
+ .ipc = ctr_ipc
+};
+
+struct xlator_cbks cbks = {
+ .forget = ctr_forget
+};
+
+struct volume_options options[] = {
+ { .key = {"ctr-enabled",},
+ .type = GF_OPTION_TYPE_BOOL,
+ .value = {"on", "off"},
+ .default_value = "off",
+ .description = "Enables the CTR"
+ },
+ { .key = {"record-entry"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .value = {"on", "off"},
+ .default_value = "on"
+ },
+ { .key = {"record-exit"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .value = {"on", "off"},
+ .default_value = "off"
+ },
+ { .key = {"record-counters"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .value = {"on", "off"},
+ .default_value = "off"
+ },
+ { .key = {"ctr-record-metadata-heat"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .value = {"on", "off"},
+ .default_value = "off"
+ },
+ { .key = {"ctr_link_consistency"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .value = {"on", "off"},
+ .default_value = "off"
+ },
+ { .key = {"ctr_lookupheal_link_timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "300"
+ },
+ { .key = {"ctr_lookupheal_inode_timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "300"
+ },
+ { .key = {"hot-brick"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .value = {"on", "off"},
+ .default_value = "off"
+ },
+ { .key = {"db-type"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = {"hashfile", "rocksdb", "changelog", "sqlite3",
+ "hyperdex"},
+ .default_value = "sqlite3"
+ },
+ { .key = {"db-sync"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = {"sync", "async"},
+ .default_value = "sync"
+ },
+ { .key = {"db-path"},
+ .type = GF_OPTION_TYPE_PATH
+ },
+ { .key = {"db-name"},
+ .type = GF_OPTION_TYPE_STR
+ },
+ { .key = {GFDB_SQL_PARAM_SYNC},
+ .type = GF_OPTION_TYPE_STR,
+ .value = {"off", "normal", "full"},
+ .default_value = "normal"
+ },
+ { .key = {GFDB_SQL_PARAM_JOURNAL_MODE},
+ .type = GF_OPTION_TYPE_STR,
+ .value = {"delete", "truncate", "persist", "memory", "wal", "off"},
+ .default_value = "wal"
+ },
+ { .key = {GFDB_SQL_PARAM_AUTO_VACUUM},
+ .type = GF_OPTION_TYPE_STR,
+ .value = {"off", "full", "incr"},
+ .default_value = "off"
+ },
+ { .key = {GFDB_SQL_PARAM_WAL_AUTOCHECK},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "1000"
+ },
+ { .key = {GFDB_SQL_PARAM_CACHE_SIZE},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "1000"
+ },
+ { .key = {GFDB_SQL_PARAM_PAGE_SIZE},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "4096"
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/features/changetimerecorder/src/changetimerecorder.h b/xlators/features/changetimerecorder/src/changetimerecorder.h
new file mode 100644
index 00000000000..2a8bbd18c5b
--- /dev/null
+++ b/xlators/features/changetimerecorder/src/changetimerecorder.h
@@ -0,0 +1,21 @@
+/*
+ Copyright (c) 2006-2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __CTR_H
+#define __CTR_H
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "logging.h"
+#include "common-utils.h"
+#include "ctr_mem_types.h"
+#include "ctr-helper.h"
+
+#endif /* __CTR_H */
diff --git a/xlators/features/changetimerecorder/src/ctr-helper.c b/xlators/features/changetimerecorder/src/ctr-helper.c
new file mode 100644
index 00000000000..263eb58db6f
--- /dev/null
+++ b/xlators/features/changetimerecorder/src/ctr-helper.c
@@ -0,0 +1,308 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "gfdb_sqlite3.h"
+#include "ctr-helper.h"
+#include "ctr-messages.h"
+
+/*******************************************************************************
+ *
+ * Fill unwind into db record
+ *
+ ******************************************************************************/
+int
+fill_db_record_for_unwind(xlator_t *this,
+ gf_ctr_local_t *ctr_local,
+ gfdb_fop_type_t fop_type,
+ gfdb_fop_path_t fop_path)
+{
+ int ret = -1;
+ gfdb_time_t *ctr_uwtime = NULL;
+ gf_ctr_private_t *_priv = NULL;
+
+ GF_ASSERT (this);
+ _priv = this->private;
+ GF_ASSERT (_priv);
+
+ GF_ASSERT(ctr_local);
+
+ /*If not unwind path error*/
+ if (!isunwindpath(fop_path)) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, CTR_MSG_WRONG_FOP_PATH,
+ "Wrong fop_path. Should be unwind");
+ goto out;
+ }
+
+ ctr_uwtime = &CTR_DB_REC(ctr_local).gfdb_unwind_change_time;
+ CTR_DB_REC(ctr_local).gfdb_fop_path = fop_path;
+ CTR_DB_REC(ctr_local).gfdb_fop_type = fop_type;
+
+ ret = gettimeofday (ctr_uwtime, NULL);
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CTR_MSG_FILL_UNWIND_TIME_REC_ERROR, "Error "
+ "filling unwind time record %s",
+ strerror(errno));
+ goto out;
+ }
+
+ /* Special case i.e if its a tier rebalance
+ * + cold tier brick
+ * + its a create/mknod FOP
+ * we record unwind time as zero */
+ if (ctr_local->client_pid == GF_CLIENT_PID_TIER_DEFRAG
+ && (!_priv->ctr_hot_brick)
+ && isdentrycreatefop(fop_type)) {
+ memset(ctr_uwtime, 0, sizeof(*ctr_uwtime));
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+
+/*******************************************************************************
+ *
+ * Fill wind into db record
+ *
+ ******************************************************************************/
+int
+fill_db_record_for_wind (xlator_t *this,
+ gf_ctr_local_t *ctr_local,
+ gf_ctr_inode_context_t *ctr_inode_cx)
+{
+ int ret = -1;
+ gfdb_time_t *ctr_wtime = NULL;
+ gf_ctr_private_t *_priv = NULL;
+
+ GF_ASSERT (this);
+ _priv = this->private;
+ GF_ASSERT (_priv);
+ GF_ASSERT (ctr_local);
+ IS_CTR_INODE_CX_SANE (ctr_inode_cx);
+
+ /*if not wind path error!*/
+ if (!iswindpath(ctr_inode_cx->fop_path)) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_WRONG_FOP_PATH,
+ "Wrong fop_path. Should be wind");
+ goto out;
+ }
+
+ ctr_wtime = &CTR_DB_REC(ctr_local).gfdb_wind_change_time;
+ CTR_DB_REC(ctr_local).gfdb_fop_path = ctr_inode_cx->fop_path;
+ CTR_DB_REC(ctr_local).gfdb_fop_type = ctr_inode_cx->fop_type;
+ CTR_DB_REC(ctr_local).link_consistency = _priv->ctr_link_consistency;
+
+ ret = gettimeofday (ctr_wtime, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ CTR_MSG_FILL_UNWIND_TIME_REC_ERROR,
+ "Error filling wind time record %s",
+ strerror(errno));
+ goto out;
+ }
+
+ /* Special case i.e if its a tier rebalance
+ * + cold tier brick
+ * + its a create/mknod FOP
+ * we record wind time as zero */
+ if (ctr_local->client_pid == GF_CLIENT_PID_TIER_DEFRAG
+ && (!_priv->ctr_hot_brick)
+ && isdentrycreatefop(ctr_inode_cx->fop_type)) {
+ memset(ctr_wtime, 0, sizeof(*ctr_wtime));
+ }
+
+ /* Copy gfid into db record */
+ gf_uuid_copy (CTR_DB_REC(ctr_local).gfid, *(ctr_inode_cx->gfid));
+
+ /* Copy older gfid if any */
+ if (ctr_inode_cx->old_gfid &&
+ (!gf_uuid_is_null (*(ctr_inode_cx->old_gfid)))) {
+ gf_uuid_copy (CTR_DB_REC(ctr_local).old_gfid,
+ *(ctr_inode_cx->old_gfid));
+ }
+
+ /*Hard Links*/
+ if (isdentryfop(ctr_inode_cx->fop_type)) {
+ /*new link fop*/
+ if (NEW_LINK_CX(ctr_inode_cx)) {
+ gf_uuid_copy (CTR_DB_REC(ctr_local).pargfid,
+ *((NEW_LINK_CX(ctr_inode_cx))->pargfid));
+ strcpy (CTR_DB_REC(ctr_local).file_name,
+ NEW_LINK_CX(ctr_inode_cx)->basename);
+ }
+ /*rename fop*/
+ if (OLD_LINK_CX(ctr_inode_cx)) {
+ gf_uuid_copy (CTR_DB_REC(ctr_local).old_pargfid,
+ *((OLD_LINK_CX(ctr_inode_cx))->pargfid));
+ strcpy (CTR_DB_REC(ctr_local).old_file_name,
+ OLD_LINK_CX(ctr_inode_cx)->basename);
+ }
+ }
+
+ ret = 0;
+out:
+ /*On error roll back and clean the record*/
+ if (ret == -1) {
+ CLEAR_CTR_DB_RECORD (ctr_local);
+ }
+ return ret;
+}
+
+
+/******************************************************************************
+ *
+ * CTR xlator init related functions
+ *
+ *
+ * ****************************************************************************/
+static int
+extract_sql_params(xlator_t *this, dict_t *params_dict)
+{
+ int ret = -1;
+ char *db_path = NULL;
+ char *db_name = NULL;
+ char *db_full_path = NULL;
+
+ GF_ASSERT (this);
+ GF_ASSERT (params_dict);
+
+ /*Extract the path of the db*/
+ db_path = NULL;
+ GET_DB_PARAM_FROM_DICT_DEFAULT(this->name, this->options, "db-path",
+ db_path, "/var/run/gluster/");
+
+ /*Extract the name of the db*/
+ db_name = NULL;
+ GET_DB_PARAM_FROM_DICT_DEFAULT(this->name, this->options, "db-name",
+ db_name, "gf_ctr_db.db");
+
+ /*Construct full path of the db*/
+ ret = gf_asprintf(&db_full_path, "%s/%s", db_path, db_name);
+ if (ret < 0) {
+ gf_msg (GFDB_DATA_STORE, GF_LOG_ERROR, 0,
+ CTR_MSG_CONSTRUCT_DB_PATH_FAILED,
+ "Construction of full db path failed!");
+ goto out;
+ }
+
+ /*Setting the SQL DB Path*/
+ SET_DB_PARAM_TO_DICT(this->name, params_dict, GFDB_SQL_PARAM_DBPATH,
+ db_full_path, ret, out);
+
+ /*Extact rest of the sql params*/
+ ret = gfdb_set_sql_params(this->name, this->options, params_dict);
+ if (ret) {
+ gf_msg (GFDB_DATA_STORE, GF_LOG_ERROR, 0,
+ CTR_MSG_SET_VALUE_TO_SQL_PARAM_FAILED,
+ "Failed setting values to sql param dict!");
+ }
+
+ ret = 0;
+
+out:
+ if (ret)
+ GF_FREE (db_full_path);
+ return ret;
+}
+
+
+
+int extract_db_params(xlator_t *this, dict_t *params_dict,
+ gfdb_db_type_t db_type) {
+
+ int ret = -1;
+
+ GF_ASSERT (this);
+ GF_ASSERT (params_dict);
+
+ switch (db_type) {
+ case GFDB_SQLITE3:
+ ret = extract_sql_params(this, params_dict);
+ if (ret)
+ goto out;
+ break;
+ case GFDB_ROCKS_DB:
+ case GFDB_HYPERDEX:
+ case GFDB_HASH_FILE_STORE:
+ case GFDB_INVALID_DB:
+ case GFDB_DB_END:
+ ret = -1;
+ break;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+int extract_ctr_options (xlator_t *this, gf_ctr_private_t *_priv) {
+ int ret = -1;
+ char *_val_str = NULL;
+
+ GF_ASSERT (this);
+ GF_ASSERT (_priv);
+
+ /*Checking if the CTR Translator is enabled. By default its disabled*/
+ _priv->enabled = _gf_false;
+ GF_OPTION_INIT ("ctr-enabled", _priv->enabled, bool, out);
+ if (!_priv->enabled) {
+ gf_msg (GFDB_DATA_STORE, GF_LOG_INFO, 0,
+ CTR_MSG_XLATOR_DISABLED,
+ "CTR Xlator is disabled.");
+ ret = 0;
+ goto out;
+ }
+
+ /*Extract db type*/
+ GF_OPTION_INIT ("db-type", _val_str, str, out);
+ _priv->gfdb_db_type = gf_string2gfdbdbtype(_val_str);
+
+ /*Extract flag for record on wind*/
+ GF_OPTION_INIT ("record-entry", _priv->ctr_record_wind, bool, out);
+
+ /*Extract flag for record on unwind*/
+ GF_OPTION_INIT ("record-exit", _priv->ctr_record_unwind, bool, out);
+
+ /*Extract flag for record on counters*/
+ GF_OPTION_INIT ("record-counters", _priv->ctr_record_counter, bool,
+ out);
+
+ /* Extract flag for record metadata heat */
+ GF_OPTION_INIT ("ctr-record-metadata-heat",
+ _priv->ctr_record_metadata_heat, bool,
+ out);
+
+ /*Extract flag for link consistency*/
+ GF_OPTION_INIT ("ctr_link_consistency", _priv->ctr_link_consistency,
+ bool, out);
+
+ /*Extract ctr_lookupheal_inode_timeout */
+ GF_OPTION_INIT ("ctr_lookupheal_inode_timeout",
+ _priv->ctr_lookupheal_inode_timeout,
+ uint64, out);
+
+ /*Extract ctr_lookupheal_link_timeout*/
+ GF_OPTION_INIT ("ctr_lookupheal_link_timeout",
+ _priv->ctr_lookupheal_link_timeout,
+ uint64, out);
+
+ /*Extract flag for hot tier brick*/
+ GF_OPTION_INIT ("hot-brick", _priv->ctr_hot_brick, bool, out);
+
+ /*Extract flag for sync mode*/
+ GF_OPTION_INIT ("db-sync", _val_str, str, out);
+ _priv->gfdb_sync_type = gf_string2gfdbdbsync(_val_str);
+
+ ret = 0;
+
+out:
+ return ret;
+}
diff --git a/xlators/features/changetimerecorder/src/ctr-helper.h b/xlators/features/changetimerecorder/src/ctr-helper.h
new file mode 100644
index 00000000000..d5615270184
--- /dev/null
+++ b/xlators/features/changetimerecorder/src/ctr-helper.h
@@ -0,0 +1,923 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __CTR_HELPER_H
+#define __CTR_HELPER_H
+
+
+#include "xlator.h"
+#include "ctr_mem_types.h"
+#include "iatt.h"
+#include "glusterfs.h"
+#include "xlator.h"
+#include "defaults.h"
+#include "logging.h"
+#include "common-utils.h"
+#include <time.h>
+#include <sys/time.h>
+
+#include "gfdb_data_store.h"
+#include "ctr-xlator-ctx.h"
+#include "ctr-messages.h"
+
+#define CTR_DEFAULT_HARDLINK_EXP_PERIOD 300 /* Five mins */
+#define CTR_DEFAULT_INODE_EXP_PERIOD 300 /* Five mins */
+
+
+typedef struct ctr_query_cbk_args {
+ int query_fd;
+ int count;
+} ctr_query_cbk_args_t;
+
+
+/*CTR Xlator Private structure*/
+typedef struct gf_ctr_private {
+ gf_boolean_t enabled;
+ char *ctr_db_path;
+ gf_boolean_t ctr_hot_brick;
+ gf_boolean_t ctr_record_wind;
+ gf_boolean_t ctr_record_unwind;
+ gf_boolean_t ctr_record_counter;
+ gf_boolean_t ctr_record_metadata_heat;
+ gf_boolean_t ctr_link_consistency;
+ gfdb_db_type_t gfdb_db_type;
+ gfdb_sync_type_t gfdb_sync_type;
+ gfdb_conn_node_t *_db_conn;
+ uint64_t ctr_lookupheal_link_timeout;
+ uint64_t ctr_lookupheal_inode_timeout;
+} gf_ctr_private_t;
+
+
+/*
+ * gf_ctr_local_t is the ctr xlator local data structure that is stored in
+ * the call_frame of each FOP.
+ *
+ * gfdb_db_record: The gf_ctr_local contains a gfdb_db_record object, which is
+ * used by the insert_record() api from the libgfdb. The gfdb_db_record object
+ * will contain all the inode and hardlink(only for dentry fops: create,
+ * mknod,link, unlink, rename).The ctr_local is keep alive till the unwind
+ * call and will be release during the unwind. The same gfdb_db_record will
+ * used for the unwind insert_record() api, to record unwind in the database.
+ *
+ * ia_inode_type in gf_ctr_local will tell the type of the inode. This is
+ * important for during the unwind path. As we will not have the inode during
+ * the unwind path. We would have include this in the gfdb_db_record itself
+ * but currently we record only file inode information.
+ *
+ * is_internal_fop in gf_ctr_local will tell us if this is a internal fop and
+ * take special/no action. We dont record change/acces times or increement heat
+ * counter for internal fops from rebalancer.
+ * */
+typedef struct gf_ctr_local {
+ gfdb_db_record_t gfdb_db_record;
+ ia_type_t ia_inode_type;
+ gf_boolean_t is_internal_fop;
+ gf_special_pid_t client_pid;
+} gf_ctr_local_t;
+/*
+ * Easy access of gfdb_db_record of ctr_local
+ * */
+#define CTR_DB_REC(ctr_local)\
+ (ctr_local->gfdb_db_record)
+
+/*Clear db record*/
+#define CLEAR_CTR_DB_RECORD(ctr_local)\
+do {\
+ ctr_local->gfdb_db_record.gfdb_fop_path = GFDB_FOP_INVALID;\
+ memset(&(ctr_local->gfdb_db_record.gfdb_wind_change_time),\
+ 0, sizeof(gfdb_time_t));\
+ memset(&(ctr_local->gfdb_db_record.gfdb_unwind_change_time),\
+ 0, sizeof(gfdb_time_t));\
+ gf_uuid_clear (ctr_local->gfdb_db_record.gfid);\
+ gf_uuid_clear (ctr_local->gfdb_db_record.pargfid);\
+ memset(ctr_local->gfdb_db_record.file_name, 0, GF_NAME_MAX + 1);\
+ memset(ctr_local->gfdb_db_record.old_file_name, 0, GF_NAME_MAX + 1);\
+ ctr_local->gfdb_db_record.gfdb_fop_type = GFDB_FOP_INVALID_OP;\
+ ctr_local->ia_inode_type = IA_INVAL;\
+} while (0)
+
+
+static gf_ctr_local_t *
+init_ctr_local_t (xlator_t *this) {
+
+ gf_ctr_local_t *ctr_local = NULL;
+
+ GF_ASSERT(this);
+
+ ctr_local = mem_get0 (this->local_pool);
+ if (!ctr_local) {
+ gf_msg (GFDB_DATA_STORE, GF_LOG_ERROR, 0,
+ CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND,
+ "Error while creating ctr local");
+ goto out;
+ }
+
+ CLEAR_CTR_DB_RECORD (ctr_local);
+out:
+ return ctr_local;
+}
+
+static void
+free_ctr_local (gf_ctr_local_t *ctr_local)
+{
+ if (ctr_local)
+ mem_put (ctr_local);
+}
+
+
+
+/******************************************************************************
+ *
+ *
+ * Context Carrier Structures
+ *
+ *
+ * ****************************************************************************/
+
+/*
+ * Context Carrier structures are used to carry relavent information about
+ * inodes and links from the fops calls to the ctr_insert_wind.
+ * These structure just have pointers to the original data and donot
+ * do a deep copy of any data. This info is deep copied to
+ * ctr_local->gfdb_db_record and passed to insert_record() api of libgfdb. This
+ * info remains persistent for the unwind in ctr_local->gfdb_db_record
+ * and once used will be destroyed.
+ *
+ * gf_ctr_link_context_t : Context structure for hard links
+ * gf_ctr_inode_context_t : Context structure for inodes
+ *
+ * */
+
+ /*Context Carrier Structure for hard links*/
+typedef struct gf_ctr_link_context {
+ uuid_t *pargfid;
+ const char *basename;
+} gf_ctr_link_context_t;
+
+ /*Context Carrier Structure for inodes*/
+typedef struct gf_ctr_inode_context {
+ ia_type_t ia_type;
+ uuid_t *gfid;
+ uuid_t *old_gfid;
+ gf_ctr_link_context_t *new_link_cx;
+ gf_ctr_link_context_t *old_link_cx;
+ gfdb_fop_type_t fop_type;
+ gfdb_fop_path_t fop_path;
+ gf_boolean_t is_internal_fop;
+ /* Indicating metadata fops */
+ gf_boolean_t is_metadata_fop;
+} gf_ctr_inode_context_t;
+
+
+/*******************Util Macros for Context Carrier Structures*****************/
+
+/*Checks if ctr_link_cx is sane!*/
+#define IS_CTR_LINK_CX_SANE(ctr_link_cx)\
+do {\
+ if (ctr_link_cx) {\
+ if (ctr_link_cx->pargfid)\
+ GF_ASSERT (*(ctr_link_cx->pargfid));\
+ GF_ASSERT (ctr_link_cx->basename);\
+ };\
+} while (0)
+
+/*Clear and fill the ctr_link_context with values*/
+#define FILL_CTR_LINK_CX(ctr_link_cx, _pargfid, _basename, label)\
+do {\
+ GF_VALIDATE_OR_GOTO ("ctr", ctr_link_cx, label);\
+ GF_VALIDATE_OR_GOTO ("ctr", _pargfid, label);\
+ GF_VALIDATE_OR_GOTO ("ctr", _basename, label);\
+ memset (ctr_link_cx, 0, sizeof (*ctr_link_cx));\
+ ctr_link_cx->pargfid = &_pargfid;\
+ ctr_link_cx->basename = _basename;\
+} while (0)
+
+#define NEW_LINK_CX(ctr_inode_cx)\
+ ctr_inode_cx->new_link_cx\
+
+#define OLD_LINK_CX(ctr_inode_cx)\
+ ctr_inode_cx->old_link_cx\
+
+/*Checks if ctr_inode_cx is sane!*/
+#define IS_CTR_INODE_CX_SANE(ctr_inode_cx)\
+do {\
+ GF_ASSERT (ctr_inode_cx);\
+ GF_ASSERT (ctr_inode_cx->gfid);\
+ GF_ASSERT (*(ctr_inode_cx->gfid));\
+ GF_ASSERT (ctr_inode_cx->fop_type != GFDB_FOP_INVALID_OP);\
+ GF_ASSERT (ctr_inode_cx->fop_path != GFDB_FOP_INVALID);\
+ IS_CTR_LINK_CX_SANE (NEW_LINK_CX(ctr_inode_cx));\
+ IS_CTR_LINK_CX_SANE (OLD_LINK_CX(ctr_inode_cx));\
+} while (0)
+
+/*Clear and fill the ctr_inode_context with values*/
+#define FILL_CTR_INODE_CONTEXT(ctr_inode_cx,\
+ _ia_type,\
+ _gfid,\
+ _new_link_cx,\
+ _old_link_cx,\
+ _fop_type,\
+ _fop_path)\
+do {\
+ GF_ASSERT (ctr_inode_cx);\
+ GF_ASSERT (_gfid);\
+ GF_ASSERT (_fop_type != GFDB_FOP_INVALID_OP);\
+ GF_ASSERT (_fop_path != GFDB_FOP_INVALID);\
+ memset(ctr_inode_cx, 0, sizeof(*ctr_inode_cx));\
+ ctr_inode_cx->ia_type = _ia_type;\
+ ctr_inode_cx->gfid = &_gfid;\
+ IS_CTR_LINK_CX_SANE(NEW_LINK_CX(ctr_inode_cx));\
+ if (_new_link_cx)\
+ NEW_LINK_CX(ctr_inode_cx) = _new_link_cx;\
+ IS_CTR_LINK_CX_SANE(OLD_LINK_CX(ctr_inode_cx));\
+ if (_old_link_cx)\
+ OLD_LINK_CX(ctr_inode_cx) = _old_link_cx;\
+ ctr_inode_cx->fop_type = _fop_type;\
+ ctr_inode_cx->fop_path = _fop_path;\
+} while (0)
+
+
+/******************************************************************************
+ *
+ * Util functions or macros used by
+ * insert wind and insert unwind
+ *
+ * ****************************************************************************/
+/* Free ctr frame local */
+static inline void
+ctr_free_frame_local (call_frame_t *frame) {
+ if (frame) {
+ free_ctr_local ((gf_ctr_local_t *) frame->local);
+ frame->local = NULL;
+ }
+}
+
+/* Setting GF_REQUEST_LINK_COUNT_XDATA in dict
+ * that has to be sent to POSIX Xlator to send
+ * link count in unwind path.
+ * return 0 for success with not creation of dict
+ * return 1 for success with creation of dict
+ * return -1 for failure.
+ * */
+static inline int
+set_posix_link_request (xlator_t *this,
+ dict_t **xdata)
+{
+ int ret = -1;
+ gf_boolean_t is_created = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("ctr", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, xdata, out);
+
+ /*create xdata if NULL*/
+ if (!*xdata) {
+ *xdata = dict_new();
+ is_created = _gf_true;
+ ret = 1;
+ } else {
+ ret = 0;
+ }
+
+ if (!*xdata) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, CTR_MSG_XDATA_NULL,
+ "xdata is NULL :Cannot send "
+ "GF_REQUEST_LINK_COUNT_XDATA to posix");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_int32 (*xdata, GF_REQUEST_LINK_COUNT_XDATA, 1);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_SET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED,
+ "Failed setting GF_REQUEST_LINK_COUNT_XDATA");
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+out:
+ if (ret == -1) {
+ if (*xdata && is_created) {
+ dict_unref (*xdata);
+ }
+ }
+ return ret;
+}
+
+
+/*
+ * If a bitrot fop
+ * */
+#define BITROT_FOP(frame)\
+ (frame->root->pid == GF_CLIENT_PID_BITD ||\
+ frame->root->pid == GF_CLIENT_PID_SCRUB)
+
+
+/*
+ * If a rebalancer fop
+ * */
+#define REBALANCE_FOP(frame)\
+ (frame->root->pid == GF_CLIENT_PID_DEFRAG)
+
+/*
+ * If its a tiering rebalancer fop
+ * */
+#define TIER_REBALANCE_FOP(frame)\
+ (frame->root->pid == GF_CLIENT_PID_TIER_DEFRAG)
+
+/*
+ * If its a AFR SELF HEAL
+ * */
+ #define AFR_SELF_HEAL_FOP(frame)\
+ (frame->root->pid == GF_CLIENT_PID_SELF_HEALD)
+
+/*
+ * if a rebalancer fop goto
+ * */
+#define CTR_IF_REBALANCE_FOP_THEN_GOTO(frame, label)\
+do {\
+ if (REBALANCE_FOP (frame))\
+ goto label;\
+} while (0)
+
+/*
+ * Internal fop
+ *
+ * */
+static inline gf_boolean_t
+is_internal_fop (call_frame_t *frame,
+ dict_t *xdata)
+{
+ gf_boolean_t ret = _gf_false;
+
+ GF_ASSERT(frame);
+ GF_ASSERT(frame->root);
+
+ if (AFR_SELF_HEAL_FOP (frame)) {
+ ret = _gf_true;
+ }
+ if (BITROT_FOP (frame)) {
+ ret = _gf_true;
+ }
+ if (REBALANCE_FOP (frame) || TIER_REBALANCE_FOP (frame)) {
+ ret = _gf_true;
+ if (xdata && dict_get (xdata, CTR_ATTACH_TIER_LOOKUP)) {
+ ret = _gf_false;
+ }
+ }
+ if (xdata && dict_get (xdata, GLUSTERFS_INTERNAL_FOP_KEY)) {
+ ret = _gf_true;
+ }
+
+ return ret;
+}
+
+#define CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, dict, label)\
+do {\
+ if (is_internal_fop (frame, dict)) \
+ goto label; \
+} while (0)
+
+/* if fop has failed exit */
+#define CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, label)\
+do {\
+ if (op_ret == -1) {\
+ gf_msg_trace (this->name, 0, "Failed fop with %s",\
+ strerror (op_errno));\
+ goto label;\
+ };\
+} while (0)
+
+/*
+ * IS CTR Xlator is disabled then goto to label
+ * */
+ #define CTR_IS_DISABLED_THEN_GOTO(this, label)\
+ do {\
+ gf_ctr_private_t *_priv = NULL;\
+ GF_ASSERT (this);\
+ GF_ASSERT (this->private);\
+ _priv = this->private;\
+ if (!_priv->enabled)\
+ goto label;\
+ } while (0)
+
+/*
+ * IS CTR record metadata heat is disabled then goto to label
+ * */
+ #define CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, label)\
+ do {\
+ gf_ctr_private_t *_priv = NULL;\
+ GF_ASSERT (this);\
+ GF_ASSERT (this->private);\
+ _priv = this->private;\
+ if (!_priv->ctr_record_metadata_heat)\
+ goto label;\
+ } while (0)
+
+int
+fill_db_record_for_unwind (xlator_t *this,
+ gf_ctr_local_t *ctr_local,
+ gfdb_fop_type_t fop_type,
+ gfdb_fop_path_t fop_path);
+
+int
+fill_db_record_for_wind (xlator_t *this,
+ gf_ctr_local_t *ctr_local,
+ gf_ctr_inode_context_t *ctr_inode_cx);
+
+/*******************************************************************************
+ * CTR INSERT WIND
+ * *****************************************************************************
+ * Function used to insert/update record into the database during a wind fop
+ * This function creates ctr_local structure into the frame of the fop
+ * call.
+ * ****************************************************************************/
+
+static inline int
+ctr_insert_wind (call_frame_t *frame,
+ xlator_t *this,
+ gf_ctr_inode_context_t *ctr_inode_cx)
+{
+ int ret = -1;
+ gf_ctr_private_t *_priv = NULL;
+ gf_ctr_local_t *ctr_local = NULL;
+
+ GF_ASSERT(frame);
+ GF_ASSERT(frame->root);
+ GF_ASSERT(this);
+ IS_CTR_INODE_CX_SANE(ctr_inode_cx);
+
+ _priv = this->private;
+ GF_ASSERT (_priv);
+
+ GF_ASSERT(_priv->_db_conn);
+
+ /*If record_wind option of CTR is on record wind for
+ * regular files only*/
+ if (_priv->ctr_record_wind && ctr_inode_cx->ia_type != IA_IFDIR) {
+ frame->local = init_ctr_local_t (this);
+ if (!frame->local) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND,
+ "WIND: Error while creating ctr local");
+ goto out;
+ };
+ ctr_local = frame->local;
+ ctr_local->client_pid = frame->root->pid;
+ ctr_local->is_internal_fop = ctr_inode_cx->is_internal_fop;
+
+ /* Decide whether to record counters or not */
+ CTR_DB_REC(ctr_local).do_record_counters = _gf_false;
+ /* If record counter is enabled */
+ if (_priv->ctr_record_counter) {
+ /* If not a internal fop */
+ if (!(ctr_local->is_internal_fop)) {
+ /* If its a metadata fop AND
+ * record metadata heat
+ * OR
+ * its NOT a metadata fop */
+ if ((ctr_inode_cx->is_metadata_fop
+ && _priv->ctr_record_metadata_heat)
+ ||
+ (!ctr_inode_cx->is_metadata_fop)) {
+ CTR_DB_REC(ctr_local).do_record_counters
+ = _gf_true;
+ }
+ }
+ }
+
+ /* Decide whether to record times or not
+ * For non internal FOPS record times as usual*/
+ CTR_DB_REC(ctr_local).do_record_times = _gf_false;
+ if (!ctr_local->is_internal_fop) {
+ /* If its a metadata fop AND
+ * record metadata heat
+ * OR
+ * its NOT a metadata fop */
+ if ((ctr_inode_cx->is_metadata_fop &&
+ _priv->ctr_record_metadata_heat)
+ ||
+ (!ctr_inode_cx->is_metadata_fop)) {
+ CTR_DB_REC(ctr_local).do_record_times =
+ (_priv->ctr_record_wind
+ || _priv->ctr_record_unwind);
+ }
+ }
+ /* when its a internal FOPS*/
+ else {
+ /* Record times only for create
+ * i.e when the inode is created */
+ CTR_DB_REC(ctr_local).do_record_times =
+ (isdentrycreatefop(ctr_inode_cx->fop_type)) ?
+ _gf_true : _gf_false;
+ }
+
+ /*Fill the db record for insertion*/
+ ret = fill_db_record_for_wind (this, ctr_local, ctr_inode_cx);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_FILL_CTR_LOCAL_ERROR_WIND,
+ "WIND: Error filling ctr local");
+ goto out;
+ }
+
+ /*Insert the db record*/
+ ret = insert_record (_priv->_db_conn,
+ &ctr_local->gfdb_db_record);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_INSERT_RECORD_WIND_FAILED,
+ "WIND: Inserting of record failed!");
+ goto out;
+ }
+ }
+ ret = 0;
+out:
+
+ if (ret) {
+ free_ctr_local (ctr_local);
+ frame->local = NULL;
+ }
+
+ return ret;
+}
+
+
+
+
+/*******************************************************************************
+ * CTR INSERT UNWIND
+ * *****************************************************************************
+ * Function used to insert/update record into the database during a unwind fop
+ * This function destroys ctr_local structure into the frame of the fop
+ * call at the end.
+ * ****************************************************************************/
+static inline int
+ctr_insert_unwind (call_frame_t *frame,
+ xlator_t *this,
+ gfdb_fop_type_t fop_type,
+ gfdb_fop_path_t fop_path)
+{
+ int ret = -1;
+ gf_ctr_private_t *_priv = NULL;
+ gf_ctr_local_t *ctr_local = NULL;
+
+ GF_ASSERT(frame);
+ GF_ASSERT(this);
+
+ _priv = this->private;
+ GF_ASSERT (_priv);
+
+ GF_ASSERT(_priv->_db_conn);
+
+ ctr_local = frame->local;
+
+ if (ctr_local
+ && (_priv->ctr_record_unwind || isdentryfop(fop_type))
+ && (ctr_local->ia_inode_type != IA_IFDIR)) {
+
+ CTR_DB_REC(ctr_local).do_record_uwind_time =
+ _priv->ctr_record_unwind;
+
+ ret = fill_db_record_for_unwind(this, ctr_local, fop_type,
+ fop_path);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_FILL_CTR_LOCAL_ERROR_UNWIND,
+ "UNWIND: Error filling ctr local");
+ goto out;
+ }
+
+ ret = insert_record(_priv->_db_conn,
+ &ctr_local->gfdb_db_record);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_FILL_CTR_LOCAL_ERROR_UNWIND,
+ "UNWIND: Error filling ctr local");
+ goto out;
+ }
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+/******************************************************************************
+ * Delete file/flink record/s from db
+ * ****************************************************************************/
+static inline int
+ctr_delete_hard_link_from_db (xlator_t *this,
+ uuid_t gfid,
+ uuid_t pargfid,
+ char *basename,
+ gfdb_fop_type_t fop_type,
+ gfdb_fop_path_t fop_path)
+{
+ int ret = -1;
+ gfdb_db_record_t gfdb_db_record;
+ gf_ctr_private_t *_priv = NULL;
+
+ _priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, _priv, out);
+ GF_VALIDATE_OR_GOTO (this->name, (!gf_uuid_is_null (gfid)), out);
+ GF_VALIDATE_OR_GOTO (this->name, (!gf_uuid_is_null (pargfid)), out);
+ GF_VALIDATE_OR_GOTO (this->name, (fop_type == GFDB_FOP_DENTRY_WRITE),
+ out);
+ GF_VALIDATE_OR_GOTO (this->name,
+ (fop_path == GFDB_FOP_UNDEL || GFDB_FOP_UNDEL_ALL),
+ out);
+
+ /* Set gfdb_db_record to 0 */
+ memset (&gfdb_db_record, 0, sizeof(gfdb_db_record));
+
+ /* Copy gfid into db record */
+ gf_uuid_copy (gfdb_db_record.gfid, gfid);
+
+ /* Copy pargid into db record */
+ gf_uuid_copy (gfdb_db_record.pargfid, pargfid);
+
+ /* Copy basename */
+ strncpy (gfdb_db_record.file_name, basename, GF_NAME_MAX - 1);
+
+ gfdb_db_record.gfdb_fop_path = fop_path;
+ gfdb_db_record.gfdb_fop_type = fop_type;
+
+ /*send delete request to db*/
+ ret = insert_record (_priv->_db_conn, &gfdb_db_record);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_INSERT_RECORD_WIND_FAILED,
+ "Failed to delete record. %s", basename);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/******************************* Hard link function ***************************/
+
+static inline gf_boolean_t
+__is_inode_expired (ctr_xlator_ctx_t *ctr_xlator_ctx,
+ gf_ctr_private_t *_priv,
+ gfdb_time_t *current_time)
+{
+ gf_boolean_t ret = _gf_false;
+ uint64_t time_diff = 0;
+
+ GF_ASSERT (ctr_xlator_ctx);
+ GF_ASSERT (_priv);
+ GF_ASSERT (current_time);
+
+ time_diff = current_time->tv_sec -
+ ctr_xlator_ctx->inode_heal_period;
+
+ ret = (time_diff >= _priv->ctr_lookupheal_inode_timeout) ?
+ _gf_true : _gf_false;
+ return ret;
+}
+
+static inline gf_boolean_t
+__is_hardlink_expired (ctr_hard_link_t *ctr_hard_link,
+ gf_ctr_private_t *_priv,
+ gfdb_time_t *current_time)
+{
+ gf_boolean_t ret = _gf_false;
+ uint64_t time_diff = 0;
+
+ GF_ASSERT (ctr_hard_link);
+ GF_ASSERT (_priv);
+ GF_ASSERT (current_time);
+
+ time_diff = current_time->tv_sec -
+ ctr_hard_link->hardlink_heal_period;
+
+ ret = ret || (time_diff >= _priv->ctr_lookupheal_link_timeout) ?
+ _gf_true : _gf_false;
+
+ return ret;
+}
+
+
+/* Return values of heal*/
+typedef enum ctr_heal_ret_val {
+ CTR_CTX_ERROR = -1,
+ /* No healing required */
+ CTR_TRY_NO_HEAL = 0,
+ /* Try healing hard link */
+ CTR_TRY_HARDLINK_HEAL = 1,
+ /* Try healing inode */
+ CTR_TRY_INODE_HEAL = 2,
+} ctr_heal_ret_val_t;
+
+
+
+/**
+ * @brief Function to add hard link to the inode context variable.
+ * The inode context maintainences a in-memory list. This is used
+ * smart healing of database.
+ * @param frame of the FOP
+ * @param this is the Xlator instant
+ * @param inode
+ * @return Return ctr_heal_ret_val_t
+ */
+
+static inline ctr_heal_ret_val_t
+add_hard_link_ctx (call_frame_t *frame,
+ xlator_t *this,
+ inode_t *inode)
+{
+ ctr_heal_ret_val_t ret_val = CTR_TRY_NO_HEAL;
+ int ret = -1;
+ gf_ctr_local_t *ctr_local = NULL;
+ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
+ ctr_hard_link_t *ctr_hard_link = NULL;
+ gf_ctr_private_t *_priv = NULL;
+ gfdb_time_t current_time = {0};
+
+
+ GF_ASSERT (frame);
+ GF_ASSERT (this);
+ GF_ASSERT (inode);
+ GF_ASSERT (this->private);
+
+ _priv = this->private;
+
+ ctr_local = frame->local;
+ if (!ctr_local) {
+ goto out;
+ }
+
+ ctr_xlator_ctx = init_ctr_xlator_ctx (this, inode);
+ if (!ctr_xlator_ctx) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_ACCESS_CTR_INODE_CONTEXT_FAILED,
+ "Failed accessing ctr inode context");
+ goto out;
+ }
+
+ LOCK (&ctr_xlator_ctx->lock);
+
+ /* Check if the hard link already exists
+ * in the ctr inode context*/
+ ctr_hard_link = ctr_search_hard_link_ctx (this,
+ ctr_xlator_ctx,
+ CTR_DB_REC(ctr_local).pargfid,
+ CTR_DB_REC(ctr_local).file_name);
+ /* if there then ignore */
+ if (ctr_hard_link) {
+
+ ret = gettimeofday (&current_time, NULL);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get current time");
+ ret_val = CTR_CTX_ERROR;
+ goto unlock;
+ }
+
+ if (__is_hardlink_expired (ctr_hard_link,
+ _priv, &current_time)) {
+ ctr_hard_link->hardlink_heal_period =
+ current_time.tv_sec;
+ ret_val = ret_val | CTR_TRY_HARDLINK_HEAL;
+ }
+
+ if (__is_inode_expired (ctr_xlator_ctx,
+ _priv, &current_time)) {
+ ctr_xlator_ctx->inode_heal_period =
+ current_time.tv_sec;
+ ret_val = ret_val | CTR_TRY_INODE_HEAL;
+ }
+
+ goto unlock;
+ }
+
+ /* Add the hard link to the list*/
+ ret = ctr_add_hard_link (this, ctr_xlator_ctx,
+ CTR_DB_REC(ctr_local).pargfid,
+ CTR_DB_REC(ctr_local).file_name);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_ADD_HARDLINK_TO_CTR_INODE_CONTEXT_FAILED,
+ "Failed to add hardlink to the ctr inode context");
+ ret_val = CTR_CTX_ERROR;
+ goto unlock;
+ }
+
+ ret_val = CTR_TRY_NO_HEAL;
+unlock:
+ UNLOCK (&ctr_xlator_ctx->lock);
+out:
+ return ret_val;
+}
+
+static inline int
+delete_hard_link_ctx (call_frame_t *frame,
+ xlator_t *this,
+ inode_t *inode)
+{
+ int ret = -1;
+ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
+ gf_ctr_local_t *ctr_local = NULL;
+
+ GF_ASSERT (frame);
+ GF_ASSERT (this);
+ GF_ASSERT (inode);
+
+ ctr_local = frame->local;
+ if (!ctr_local) {
+ goto out;
+ }
+
+ ctr_xlator_ctx = get_ctr_xlator_ctx (this, inode);
+ if (!ctr_xlator_ctx) {
+ /* Since there is no ctr inode context so nothing more to do */
+ ret = 0;
+ goto out;
+ }
+
+ ret = ctr_delete_hard_link (this, ctr_xlator_ctx,
+ CTR_DB_REC(ctr_local).pargfid,
+ CTR_DB_REC(ctr_local).file_name);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_DELETE_HARDLINK_FAILED,
+ "Failed to delete hard link");
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static inline int
+update_hard_link_ctx (call_frame_t *frame,
+ xlator_t *this,
+ inode_t *inode)
+{
+ int ret = -1;
+ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
+ gf_ctr_local_t *ctr_local = NULL;
+
+ GF_ASSERT (frame);
+ GF_ASSERT (this);
+ GF_ASSERT (inode);
+
+ ctr_local = frame->local;
+ if (!ctr_local) {
+ goto out;
+ }
+
+ ctr_xlator_ctx = init_ctr_xlator_ctx (this, inode);
+ if (!ctr_xlator_ctx) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_ACCESS_CTR_INODE_CONTEXT_FAILED,
+ "Failed accessing ctr inode context");
+ goto out;
+ }
+
+ ret = ctr_update_hard_link (this, ctr_xlator_ctx,
+ CTR_DB_REC(ctr_local).pargfid,
+ CTR_DB_REC(ctr_local).file_name,
+ CTR_DB_REC(ctr_local).old_pargfid,
+ CTR_DB_REC(ctr_local).old_file_name);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_DELETE_HARDLINK_FAILED,
+ "Failed to delete hard link");
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+
+/******************************************************************************
+ *
+ * CTR xlator init related functions
+ *
+ *
+ * ****************************************************************************/
+int
+extract_db_params (xlator_t *this,
+ dict_t *params_dict,
+ gfdb_db_type_t db_type);
+
+int
+extract_ctr_options (xlator_t *this,
+ gf_ctr_private_t *_priv);
+
+#endif
diff --git a/xlators/features/changetimerecorder/src/ctr-xlator-ctx.c b/xlators/features/changetimerecorder/src/ctr-xlator-ctx.c
new file mode 100644
index 00000000000..7700ad40ba6
--- /dev/null
+++ b/xlators/features/changetimerecorder/src/ctr-xlator-ctx.c
@@ -0,0 +1,409 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "ctr-xlator-ctx.h"
+#include "ctr-messages.h"
+#include <time.h>
+#include <sys/time.h>
+
+#define IS_THE_ONLY_HARDLINK(ctr_hard_link)\
+ (ctr_hard_link->list.next == ctr_hard_link->list.prev)
+
+
+static void
+fini_ctr_hard_link (ctr_hard_link_t **ctr_hard_link) {
+
+ GF_ASSERT (ctr_hard_link);
+
+ if (*ctr_hard_link)
+ return;
+ GF_FREE ((*ctr_hard_link)->base_name);
+ GF_FREE (*ctr_hard_link);
+ *ctr_hard_link = NULL;
+}
+
+
+/* Please lock the ctr_xlator_ctx before using this function */
+ctr_hard_link_t *
+ctr_search_hard_link_ctx (xlator_t *this,
+ ctr_xlator_ctx_t *ctr_xlator_ctx,
+ uuid_t pgfid,
+ const char *base_name)
+{
+ ctr_hard_link_t *_hard_link = NULL;
+ ctr_hard_link_t *searched_hardlink = NULL;
+
+ GF_ASSERT (this);
+ GF_ASSERT (ctr_xlator_ctx);
+
+ if (pgfid == NULL || base_name == NULL)
+ goto out;
+
+ /*linear search*/
+ list_for_each_entry (_hard_link, &ctr_xlator_ctx->hardlink_list, list) {
+ if (gf_uuid_compare (_hard_link->pgfid, pgfid) == 0
+ && _hard_link->base_name
+ && strcmp(_hard_link->base_name, base_name) == 0) {
+ searched_hardlink = _hard_link;
+ break;
+ }
+ }
+
+out:
+ return searched_hardlink;
+}
+
+
+
+
+/* Please lock the ctr_xlator_ctx before using this function */
+int
+ctr_add_hard_link (xlator_t *this,
+ ctr_xlator_ctx_t *ctr_xlator_ctx,
+ uuid_t pgfid,
+ const char *base_name)
+{
+ int ret = -1;
+ ctr_hard_link_t *ctr_hard_link = NULL;
+ struct timeval current_time = {0};
+
+ GF_ASSERT (this);
+ GF_ASSERT (ctr_xlator_ctx);
+
+ if (pgfid == NULL || base_name == NULL)
+ goto out;
+
+ ctr_hard_link = GF_CALLOC (1, sizeof (*ctr_hard_link),
+ gf_ctr_mt_hard_link_t);
+ if (!ctr_hard_link) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ CTR_MSG_CALLOC_FAILED, "Failed allocating "
+ "ctr_hard_link");
+ goto out;
+ }
+
+ /*Initialize the ctr_hard_link object and
+ * Assign the values : parent GFID and basename*/
+ INIT_LIST_HEAD (&ctr_hard_link->list);
+ gf_uuid_copy (ctr_hard_link->pgfid, pgfid);
+ ret = gf_asprintf(&ctr_hard_link->base_name, "%s", base_name);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_COPY_FAILED, "Failed copying basename"
+ "to ctr_hard_link");
+ goto error;
+ }
+
+ ret = gettimeofday (&current_time, NULL);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get current time");
+ goto error;
+ }
+
+ /*Add the hard link to the list*/
+ list_add_tail (&ctr_hard_link->list,
+ &ctr_xlator_ctx->hardlink_list);
+
+ ctr_hard_link->hardlink_heal_period = current_time.tv_sec;
+
+ /*aal izz well!*/
+ ret = 0;
+ goto out;
+error:
+ GF_FREE (ctr_hard_link);
+out:
+ return ret;
+}
+
+static void
+__delete_hard_link_from_list (ctr_hard_link_t **ctr_hard_link)
+{
+ GF_ASSERT (ctr_hard_link);
+ GF_ASSERT (*ctr_hard_link);
+
+ /*Remove hard link from list*/
+ list_del(&(*ctr_hard_link)->list);
+ fini_ctr_hard_link (ctr_hard_link);
+}
+
+
+int
+ctr_delete_hard_link (xlator_t *this,
+ ctr_xlator_ctx_t *ctr_xlator_ctx,
+ uuid_t pgfid,
+ const char *base_name)
+{
+ int ret = -1;
+ ctr_hard_link_t *ctr_hard_link = NULL;
+
+ GF_ASSERT (this);
+ GF_ASSERT (ctr_xlator_ctx);
+
+
+ LOCK (&ctr_xlator_ctx->lock);
+
+ /*Check if the hard link is present */
+ ctr_hard_link = ctr_search_hard_link_ctx (this, ctr_xlator_ctx,
+ pgfid, base_name);
+ if (!ctr_hard_link) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_HARDLINK_MISSING_IN_LIST,
+ "Hard link doesn't exist in the list");
+ goto out;
+ }
+
+ __delete_hard_link_from_list (&ctr_hard_link);
+ ctr_hard_link = NULL;
+
+ ret = 0;
+out:
+ UNLOCK (&ctr_xlator_ctx->lock);
+
+ return ret;
+}
+
+
+
+
+int
+ctr_update_hard_link (xlator_t *this,
+ ctr_xlator_ctx_t *ctr_xlator_ctx,
+ uuid_t pgfid,
+ const char *base_name,
+ uuid_t old_pgfid,
+ const char *old_base_name)
+{
+ int ret = -1;
+ ctr_hard_link_t *ctr_hard_link = NULL;
+ struct timeval current_time = {0};
+
+ GF_ASSERT (this);
+ GF_ASSERT (ctr_xlator_ctx);
+
+
+ LOCK (&ctr_xlator_ctx->lock);
+
+ /*Check if the hard link is present */
+ ctr_hard_link = ctr_search_hard_link_ctx (this, ctr_xlator_ctx,
+ old_pgfid, old_base_name);
+ if (!ctr_hard_link) {
+ gf_msg_trace (this->name, 0, "Hard link doesn't exist"
+ " in the list");
+ /* Since the hard link is not present in the list
+ * we add it to the list */
+ ret = ctr_add_hard_link (this, ctr_xlator_ctx,
+ pgfid, base_name);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_ADD_HARDLINK_TO_LIST_FAILED,
+ "Failed adding hard link to the list");
+ goto out;
+ }
+ ret = 0;
+ goto out;
+ }
+
+ /* update the hard link */
+ gf_uuid_copy (ctr_hard_link->pgfid, pgfid);
+ GF_FREE (ctr_hard_link->base_name);
+ ret = gf_asprintf(&ctr_hard_link->base_name, "%s", base_name);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_COPY_FAILED, "Failed copying basename"
+ "to ctr_hard_link");
+ /* delete the corrupted entry */
+ __delete_hard_link_from_list (&ctr_hard_link);
+ ctr_hard_link = NULL;
+ goto out;
+ }
+
+ ret = gettimeofday (&current_time, NULL);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get current time");
+ ctr_hard_link->hardlink_heal_period = 0;
+ } else {
+ ctr_hard_link->hardlink_heal_period = current_time.tv_sec;
+ }
+
+ ret = 0;
+
+out:
+ UNLOCK (&ctr_xlator_ctx->lock);
+
+ return ret;
+}
+
+
+
+
+/* Delete all hardlinks */
+static int
+ctr_delete_all_hard_link (xlator_t *this,
+ ctr_xlator_ctx_t *ctr_xlator_ctx)
+{
+ int ret = -1;
+ ctr_hard_link_t *ctr_hard_link = NULL;
+ ctr_hard_link_t *tmp = NULL;
+
+ GF_ASSERT (ctr_xlator_ctx);
+
+ LOCK (&ctr_xlator_ctx->lock);
+
+ list_for_each_entry_safe(ctr_hard_link, tmp,
+ &ctr_xlator_ctx->hardlink_list, list)
+ {
+ /*Remove hard link from list*/
+ __delete_hard_link_from_list (&ctr_hard_link);
+ ctr_hard_link = NULL;
+
+ }
+
+
+ UNLOCK (&ctr_xlator_ctx->lock);
+
+ ret = 0;
+
+ return ret;
+}
+
+
+/* Please lock the inode before using this function */
+static ctr_xlator_ctx_t *
+__get_ctr_xlator_ctx (xlator_t *this,
+ inode_t *inode)
+{
+ int ret = 0;
+ uint64_t _addr = 0;
+ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
+
+ GF_ASSERT (this);
+ GF_ASSERT (inode);
+
+ ret = __inode_ctx_get (inode, this, &_addr);
+ if (ret < 0)
+ _addr = 0;
+ if (_addr != 0) {
+ ctr_xlator_ctx = (ctr_xlator_ctx_t *) (long)_addr;
+ }
+
+ return ctr_xlator_ctx;
+}
+
+
+ctr_xlator_ctx_t *
+init_ctr_xlator_ctx (xlator_t *this,
+ inode_t *inode)
+{
+ int ret = -1;
+ uint64_t _addr = 0;
+ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
+ struct timeval current_time = {0};
+
+ GF_ASSERT (this);
+ GF_ASSERT (inode);
+
+ LOCK (&inode->lock);
+ {
+ ctr_xlator_ctx = __get_ctr_xlator_ctx (this, inode);
+ if (ctr_xlator_ctx) {
+ ret = 0;
+ goto out;
+ }
+ ctr_xlator_ctx = GF_CALLOC (1, sizeof (*ctr_xlator_ctx),
+ gf_ctr_mt_xlator_ctx);
+ if (!ctr_xlator_ctx)
+ goto out;
+
+ ret = LOCK_INIT (&ctr_xlator_ctx->lock);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, ret,
+ CTR_MSG_INIT_LOCK_FAILED,
+ "Failed init lock %s", strerror(ret));
+ goto out;
+ }
+ _addr = (uint64_t) ctr_xlator_ctx;
+
+ ret = __inode_ctx_set (inode, this, &_addr);
+ if (ret) {
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&ctr_xlator_ctx->hardlink_list);
+
+ ret = gettimeofday (&current_time, NULL);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get current time");
+ goto out;
+ }
+
+ ctr_xlator_ctx->inode_heal_period = current_time.tv_sec;
+ }
+ ret = 0;
+out:
+ if (ret) {
+ GF_FREE (ctr_xlator_ctx);
+ ctr_xlator_ctx = NULL;
+ }
+
+ UNLOCK (&inode->lock);
+
+ return ctr_xlator_ctx;
+}
+
+
+
+
+void
+fini_ctr_xlator_ctx (xlator_t *this,
+ inode_t *inode)
+{
+ int ret = 0;
+ uint64_t _addr = 0;
+ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
+
+
+ inode_ctx_del (inode, this, &_addr);
+ if (!_addr)
+ return;
+
+ ctr_xlator_ctx = (ctr_xlator_ctx_t *) (long) _addr;
+
+ ret = ctr_delete_all_hard_link (this, ctr_xlator_ctx);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING , 0,
+ CTR_MSG_DELETE_HARDLINK_FAILED, "Failed deleting all "
+ "hard links from inode context");
+ }
+
+ LOCK_DESTROY (&ctr_xlator_ctx->lock);
+
+ GF_FREE (ctr_xlator_ctx);
+
+}
+
+
+
+
+ctr_xlator_ctx_t *
+get_ctr_xlator_ctx (xlator_t *this,
+ inode_t *inode)
+{
+ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
+
+ LOCK (&inode->lock);
+ ctr_xlator_ctx = __get_ctr_xlator_ctx (this, inode);
+ UNLOCK (&inode->lock);
+
+ return ctr_xlator_ctx;
+}
+
diff --git a/xlators/features/changetimerecorder/src/ctr-xlator-ctx.h b/xlators/features/changetimerecorder/src/ctr-xlator-ctx.h
new file mode 100644
index 00000000000..7f1c6cb1712
--- /dev/null
+++ b/xlators/features/changetimerecorder/src/ctr-xlator-ctx.h
@@ -0,0 +1,90 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __CTR_XLATOR_CTX_H
+#define __CTR_XLATOR_CTX_H
+
+#include "xlator.h"
+#include "ctr_mem_types.h"
+#include "iatt.h"
+#include "glusterfs.h"
+#include "xlator.h"
+#include "logging.h"
+#include "locking.h"
+#include "common-utils.h"
+#include <time.h>
+#include <sys/time.h>
+
+typedef struct ctr_hard_link {
+ uuid_t pgfid;
+ char *base_name;
+ /* Hardlink expiry : Defines the expiry period after which a
+ * database heal is attempted. */
+ uint64_t hardlink_heal_period;
+ struct list_head list;
+} ctr_hard_link_t;
+
+typedef struct ctr_xlator_ctx {
+ /* This represents the looked up hardlinks
+ * NOTE: This doesn't represent all physical hardlinks of the inode*/
+ struct list_head hardlink_list;
+ uint64_t inode_heal_period;
+ gf_lock_t lock;
+} ctr_xlator_ctx_t;
+
+
+ctr_hard_link_t *
+ctr_search_hard_link_ctx (xlator_t *this,
+ ctr_xlator_ctx_t *ctr_xlator_ctx,
+ uuid_t pgfid,
+ const char *base_name);
+
+
+int
+ctr_add_hard_link (xlator_t *this,
+ ctr_xlator_ctx_t *ctr_xlator_ctx,
+ uuid_t pgfid,
+ const char *base_name);
+
+
+
+int
+ctr_delete_hard_link (xlator_t *this,
+ ctr_xlator_ctx_t *ctr_xlator_ctx,
+ uuid_t pgfid,
+ const char *base_name);
+
+
+int
+ctr_update_hard_link (xlator_t *this,
+ ctr_xlator_ctx_t *ctr_xlator_ctx,
+ uuid_t pgfid,
+ const char *base_name,
+ uuid_t old_pgfid,
+ const char *old_base_name);
+
+
+ctr_xlator_ctx_t *
+get_ctr_xlator_ctx (xlator_t *this,
+ inode_t *inode);
+
+
+
+
+ctr_xlator_ctx_t *
+init_ctr_xlator_ctx (xlator_t *this,
+ inode_t *inode);
+
+
+void
+fini_ctr_xlator_ctx (xlator_t *this,
+ inode_t *inode);
+
+#endif
diff --git a/xlators/features/changetimerecorder/src/ctr_mem_types.h b/xlators/features/changetimerecorder/src/ctr_mem_types.h
new file mode 100644
index 00000000000..f408c028e24
--- /dev/null
+++ b/xlators/features/changetimerecorder/src/ctr_mem_types.h
@@ -0,0 +1,24 @@
+/*
+ Copyright (c) 2008-2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef __CTR_MEM_TYPES_H__
+#define __CTR_MEM_TYPES_H__
+
+#include "gfdb_mem-types.h"
+
+enum gf_ctr_mem_types_ {
+ gf_ctr_mt_private_t = gfdb_mt_end + 1,
+ gf_ctr_mt_xlator_ctx,
+ gf_ctr_mt_hard_link_t,
+ gf_ctr_mt_end
+};
+#endif
+
diff --git a/xlators/features/compress/Makefile.am b/xlators/features/compress/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/features/compress/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/compress/src/Makefile.am b/xlators/features/compress/src/Makefile.am
new file mode 100644
index 00000000000..b7c75e91b92
--- /dev/null
+++ b/xlators/features/compress/src/Makefile.am
@@ -0,0 +1,17 @@
+xlator_LTLIBRARIES = cdc.la
+
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+noinst_HEADERS = cdc.h cdc-mem-types.h
+
+cdc_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+cdc_la_SOURCES = cdc.c cdc-helper.c
+cdc_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(ZLIB_LIBS)
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -fPIC \
+ -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS) $(LIBZ_CFLAGS)
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/compress/src/cdc-helper.c b/xlators/features/compress/src/cdc-helper.c
new file mode 100644
index 00000000000..0a9a0e3d29c
--- /dev/null
+++ b/xlators/features/compress/src/cdc-helper.c
@@ -0,0 +1,543 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "syscall.h"
+
+#include "cdc.h"
+#include "cdc-mem-types.h"
+
+#ifdef HAVE_LIB_Z
+#include "zlib.h"
+#endif
+
+#ifdef HAVE_LIB_Z
+/* gzip header looks something like this
+ * (RFC 1950)
+ *
+ * +---+---+---+---+---+---+---+---+---+---+
+ * |ID1|ID2|CM |FLG| MTIME |XFL|OS |
+ * +---+---+---+---+---+---+---+---+---+---+
+ *
+ * Data is usually sent without this header i.e
+ * Data sent = <compressed-data> + trailer(8)
+ * The trailer contains the checksum.
+ *
+ * gzip_header is added only during debugging.
+ * Refer to the function cdc_dump_iovec_to_disk
+ */
+static const char gzip_header[10] =
+ {
+ '\037', '\213', Z_DEFLATED, 0,
+ 0, 0, 0, 0,
+ 0, GF_CDC_OS_ID
+ };
+
+static int32_t
+cdc_next_iovec (xlator_t *this, cdc_info_t *ci)
+{
+ int ret = -1;
+
+ ci->ncount++;
+ /* check for iovec overflow -- should not happen */
+ if (ci->ncount == MAX_IOVEC) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Zlib output buffer overflow"
+ " ->ncount (%d) | ->MAX_IOVEC (%d)",
+ ci->ncount, MAX_IOVEC);
+ goto out;
+ }
+
+ ret = 0;
+
+ out:
+ return ret;
+}
+
+static void
+cdc_put_long (unsigned char *string, unsigned long x)
+{
+ string[0] = (unsigned char) (x & 0xff);
+ string[1] = (unsigned char) ((x & 0xff00) >> 8);
+ string[2] = (unsigned char) ((x & 0xff0000) >> 16);
+ string[3] = (unsigned char) ((x & 0xff000000) >> 24);
+}
+
+static unsigned long
+cdc_get_long (unsigned char *buf)
+{
+ return ((unsigned long) buf[0])
+ | (((unsigned long) buf[1]) << 8)
+ | (((unsigned long) buf[2]) << 16)
+ | (((unsigned long) buf[3]) << 24);
+}
+
+static int32_t
+cdc_init_gzip_trailer (xlator_t *this, cdc_priv_t *priv, cdc_info_t *ci)
+{
+ int ret = -1;
+ char *buf = NULL;
+
+ ret = cdc_next_iovec (this, ci);
+ if (ret)
+ goto out;
+
+ buf = CURR_VEC(ci).iov_base =
+ (char *) GF_CALLOC (1, GF_CDC_VALIDATION_SIZE,
+ gf_cdc_mt_gzip_trailer_t);
+
+ if (!CURR_VEC(ci).iov_base)
+ goto out;
+
+ CURR_VEC(ci).iov_len = GF_CDC_VALIDATION_SIZE;
+
+ cdc_put_long ((unsigned char *)&buf[0], ci->crc);
+ cdc_put_long ((unsigned char *)&buf[4], ci->stream.total_in);
+
+ ret = 0;
+
+ out:
+ return ret;
+}
+
+static int32_t
+cdc_alloc_iobuf_and_init_vec (xlator_t *this,
+ cdc_priv_t *priv, cdc_info_t *ci,
+ int size)
+{
+ int ret = -1;
+ int alloc_len = 0;
+ struct iobuf *iobuf = NULL;
+
+ ret = cdc_next_iovec (this, ci);
+ if (ret)
+ goto out;
+
+ alloc_len = size ? size : ci->buffer_size;
+
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool, alloc_len);
+ if (!iobuf)
+ goto out;
+
+ ret = iobref_add (ci->iobref, iobuf);
+ if (ret)
+ goto out;
+
+ /* Initialize this iovec */
+ CURR_VEC(ci).iov_base = iobuf->ptr;
+ CURR_VEC(ci).iov_len = alloc_len;
+
+ ret = 0;
+
+ out:
+ return ret;
+}
+
+static void
+cdc_init_zlib_output_stream (cdc_priv_t *priv, cdc_info_t *ci, int size)
+{
+ ci->stream.next_out = (unsigned char *) CURR_VEC(ci).iov_base;
+ ci->stream.avail_out = size ? size : ci->buffer_size;
+}
+
+/* This routine is for testing and debugging only.
+ * Data written = header(10) + <compressed-data> + trailer(8)
+ * So each gzip dump file is at least 18 bytes in size.
+ */
+void
+cdc_dump_iovec_to_disk (xlator_t *this, cdc_info_t *ci, const char *file)
+{
+ int i = 0;
+ int fd = 0;
+ size_t written = 0;
+ size_t total_written = 0;
+
+ fd = open (file, O_WRONLY|O_CREAT|O_TRUNC, 0777 );
+ if (fd < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Cannot open file: %s", file);
+ return;
+ }
+
+ written = sys_write (fd, (char *) gzip_header, 10);
+ total_written += written;
+ for (i = 0; i < ci->ncount; i++) {
+ written = sys_write (fd, (char *) ci->vec[i].iov_base, ci->vec[i].iov_len);
+ total_written += written;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "dump'd %zu bytes to %s", total_written, GF_CDC_DEBUG_DUMP_FILE );
+
+ sys_close (fd);
+}
+
+static int32_t
+cdc_flush_libz_buffer (cdc_priv_t *priv, xlator_t *this, cdc_info_t *ci,
+ int (*libz_func)(z_streamp, int),
+ int flush)
+{
+ int32_t ret = Z_OK;
+ int done = 0;
+ unsigned int deflate_len = 0;
+
+ for (;;) {
+ deflate_len = ci->buffer_size - ci->stream.avail_out;
+
+ if (deflate_len != 0) {
+ CURR_VEC(ci).iov_len = deflate_len;
+
+ ret = cdc_alloc_iobuf_and_init_vec (this, priv, ci, 0);
+ if (ret) {
+ ret = Z_MEM_ERROR;
+ break;
+ }
+
+ /* Re-position Zlib output buffer */
+ cdc_init_zlib_output_stream (priv, ci, 0);
+ }
+
+ if (done) {
+ ci->ncount--;
+ break;
+ }
+
+ ret = libz_func (&ci->stream, flush);
+
+ if (ret == Z_BUF_ERROR) {
+ ret = Z_OK;
+ ci->ncount--;
+ break;
+ }
+
+ done = (ci->stream.avail_out != 0 || ret == Z_STREAM_END);
+
+ if (ret != Z_OK && ret != Z_STREAM_END)
+ break;
+ }
+
+ return ret;
+}
+
+static int32_t
+do_cdc_compress (struct iovec *vec, xlator_t *this, cdc_priv_t *priv,
+ cdc_info_t *ci)
+{
+ int ret = -1;
+
+ /* Initialize defalte */
+ ret = deflateInit2 (&ci->stream, priv->cdc_level, Z_DEFLATED,
+ priv->window_size, priv->mem_level,
+ Z_DEFAULT_STRATEGY);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "unable to init Zlib (retval: %d)", ret);
+ goto out;
+ }
+
+ ret = cdc_alloc_iobuf_and_init_vec (this, priv, ci, 0);
+ if (ret)
+ goto out;
+
+ /* setup output buffer */
+ cdc_init_zlib_output_stream (priv, ci, 0);
+
+ /* setup input buffer */
+ ci->stream.next_in = (unsigned char *) vec->iov_base;
+ ci->stream.avail_in = vec->iov_len;
+
+ ci->crc = crc32 (ci->crc, (const Bytef *) vec->iov_base, vec->iov_len);
+
+ gf_log (this->name, GF_LOG_DEBUG, "crc=%lu len=%d buffer_size=%d",
+ ci->crc, ci->stream.avail_in, ci->buffer_size);
+
+ /* compress !! */
+ while (ci->stream.avail_in != 0) {
+ if (ci->stream.avail_out == 0) {
+
+ CURR_VEC(ci).iov_len = ci->buffer_size;
+
+ ret = cdc_alloc_iobuf_and_init_vec (this, priv, ci, 0);
+ if (ret)
+ break;
+
+ /* Re-position Zlib output buffer */
+ cdc_init_zlib_output_stream (priv, ci, 0);
+ }
+
+ ret = deflate (&ci->stream, Z_NO_FLUSH);
+ if (ret != Z_OK)
+ break;
+ }
+
+ out:
+ return ret;
+}
+
+int32_t
+cdc_compress (xlator_t *this, cdc_priv_t *priv, cdc_info_t *ci,
+ dict_t **xdata)
+{
+ int ret = -1;
+ int i = 0;
+
+ ci->iobref = iobref_new ();
+ if (!ci->iobref)
+ goto out;
+
+ if (!*xdata) {
+ *xdata = dict_new ();
+ if (!*xdata) {
+ gf_log (this->name, GF_LOG_ERROR, "Cannot allocate xdata"
+ " dict");
+ goto out;
+ }
+ }
+
+ /* data */
+ for (i = 0; i < ci->count; i++) {
+ ret = do_cdc_compress (&ci->vector[i], this, priv, ci);
+ if (ret != Z_OK)
+ goto deflate_cleanup_out;
+ }
+
+ /* flush zlib buffer */
+ ret = cdc_flush_libz_buffer (priv, this, ci, deflate, Z_FINISH);
+ if (!(ret == Z_OK || ret == Z_STREAM_END)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Compression Error: ret (%d)", ret);
+ ret = -1;
+ goto deflate_cleanup_out;
+ }
+
+ /* trailer */
+ ret = cdc_init_gzip_trailer (this, priv, ci);
+ if (ret)
+ goto deflate_cleanup_out;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Compressed %ld to %ld bytes",
+ ci->stream.total_in, ci->stream.total_out);
+
+ ci->nbytes = ci->stream.total_out + GF_CDC_VALIDATION_SIZE;
+
+ /* set deflated canary value for identification */
+ ret = dict_set_int32 (*xdata, GF_CDC_DEFLATE_CANARY_VAL, 1);
+ if (ret) {
+ /* Send uncompressed data if we can't _tell_ the client
+ * that deflated data is on it's way. So, we just log
+ * the faliure and continue as usual.
+ */
+ gf_log (this->name, GF_LOG_ERROR,
+ "Data deflated, but could not set canary"
+ " value in dict for identification");
+ }
+
+ /* This is to be used in testing */
+ if ( priv->debug ) {
+ cdc_dump_iovec_to_disk (this, ci, GF_CDC_DEBUG_DUMP_FILE );
+ }
+
+ deflate_cleanup_out:
+ (void) deflateEnd(&ci->stream);
+
+ out:
+ return ret;
+}
+
+
+/* deflate content is checked by the presence of a canary
+ * value in the dict as the key
+ */
+static int32_t
+cdc_check_content_for_deflate (dict_t *xdata)
+{
+ return dict_get (xdata, GF_CDC_DEFLATE_CANARY_VAL) ? -1 : 0;
+}
+
+static unsigned long
+cdc_extract_crc (char *trailer)
+{
+ return cdc_get_long ((unsigned char *) &trailer[0]);
+}
+
+static unsigned long
+cdc_extract_size (char *trailer)
+{
+ return cdc_get_long ((unsigned char *) &trailer[4]);
+}
+
+static int32_t
+cdc_validate_inflate (cdc_info_t *ci, unsigned long crc,
+ unsigned long len)
+{
+ return !((crc == ci->crc)
+ /* inflated length is hidden inside
+ * Zlib stream struct */
+ && (len == ci->stream.total_out));
+}
+
+static int32_t
+do_cdc_decompress (xlator_t *this, cdc_priv_t *priv, cdc_info_t *ci)
+{
+ int ret = -1;
+ int i = 0;
+ int len = 0;
+ char *inflte = NULL;
+ char *trailer = NULL;
+ struct iovec vec = {0,};
+ unsigned long computed_crc = 0;
+ unsigned long computed_len = 0;
+
+ ret = inflateInit2 (&ci->stream, priv->window_size);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Zlib: Unable to initialize inflate");
+ goto out;
+ }
+
+ vec = THIS_VEC(ci, 0);
+
+ trailer = (char *) (((char *) vec.iov_base) + vec.iov_len
+ - GF_CDC_VALIDATION_SIZE);
+
+ /* CRC of uncompressed data */
+ computed_crc = cdc_extract_crc (trailer);
+
+ /* size of uncomrpessed data */
+ computed_len = cdc_extract_size (trailer);
+
+ gf_log (this->name, GF_LOG_DEBUG, "crc=%lu len=%lu buffer_size=%d",
+ computed_crc, computed_len, ci->buffer_size);
+
+ inflte = vec.iov_base ;
+ len = vec.iov_len - GF_CDC_VALIDATION_SIZE;
+
+ /* allocate buffer of the original length of the data */
+ ret = cdc_alloc_iobuf_and_init_vec (this, priv, ci, 0);
+ if (ret)
+ goto out;
+
+ /* setup output buffer */
+ cdc_init_zlib_output_stream (priv, ci, 0);
+
+ /* setup input buffer */
+ ci->stream.next_in = (unsigned char *) inflte;
+ ci->stream.avail_in = len;
+
+ while (ci->stream.avail_in != 0) {
+ if (ci->stream.avail_out == 0) {
+ CURR_VEC(ci).iov_len = ci->buffer_size;
+
+ ret = cdc_alloc_iobuf_and_init_vec (this, priv, ci, 0);
+ if (ret)
+ break;
+
+ /* Re-position Zlib output buffer */
+ cdc_init_zlib_output_stream (priv, ci, 0);
+ }
+
+ ret = inflate (&ci->stream, Z_NO_FLUSH);
+ if (ret == Z_STREAM_ERROR)
+ break;
+ }
+
+ /* flush zlib buffer */
+ ret = cdc_flush_libz_buffer (priv, this, ci, inflate, Z_SYNC_FLUSH);
+ if (!(ret == Z_OK || ret == Z_STREAM_END)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Decompression Error: ret (%d)", ret);
+ ret = -1;
+ goto out;
+ }
+
+ /* compute CRC of the uncompresses data to check for
+ * correctness */
+
+ for (i = 0; i < ci->ncount; i++) {
+ ci->crc = crc32 (ci->crc,
+ (const Bytef *) ci->vec[i].iov_base,
+ ci->vec[i].iov_len);
+ }
+
+ /* validate inflated data */
+ ret = cdc_validate_inflate (ci, computed_crc, computed_len);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Checksum or length mismatched in inflated data");
+ }
+
+ out:
+ return ret;
+}
+
+int32_t
+cdc_decompress (xlator_t *this, cdc_priv_t *priv, cdc_info_t *ci,
+ dict_t *xdata)
+{
+ int32_t ret = -1;
+
+ /* check for deflate content */
+ if (!cdc_check_content_for_deflate (xdata)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Content not deflated, passing through ...");
+ goto passthrough_out;
+ }
+
+ ci->iobref = iobref_new ();
+ if (!ci->iobref)
+ goto passthrough_out;
+
+ /* do we need to do this? can we assume that one iovec
+ * will hold per request data every time?
+ *
+ * server/client protocol seems to deal with a single
+ * iovec even if op_ret > 1M. So, it looks ok to
+ * assume that a single iovec will contain all the
+ * data (This saves us a lot from finding the trailer
+ * and the data since it could have been split-up onto
+ * two adjacent iovec's.
+ *
+ * But, in case this translator is loaded above quick-read
+ * for some reason, then it's entirely possible that we get
+ * multiple iovec's...
+ *
+ * This case (handled below) is not tested. (by loading the
+ * xlator below quick-read)
+ */
+
+ /* @@ I_HOPE_THIS_IS_NEVER_HIT */
+ if (ci->count > 1) {
+ gf_log (this->name, GF_LOG_WARNING, "unable to handle"
+ " multiple iovecs (%d in number)", ci->count);
+ goto inflate_cleanup_out;
+ /* TODO: coallate all iovecs in one */
+ }
+
+ ret = do_cdc_decompress (this, priv, ci);
+ if (ret)
+ goto inflate_cleanup_out;
+
+ ci->nbytes = ci->stream.total_out;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Inflated %ld to %ld bytes",
+ ci->stream.total_in, ci->stream.total_out);
+
+ inflate_cleanup_out:
+ (void) inflateEnd (&ci->stream);
+
+ passthrough_out:
+ return ret;
+}
+
+#endif
diff --git a/xlators/features/compress/src/cdc-mem-types.h b/xlators/features/compress/src/cdc-mem-types.h
new file mode 100644
index 00000000000..ead2c70ba6e
--- /dev/null
+++ b/xlators/features/compress/src/cdc-mem-types.h
@@ -0,0 +1,23 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __CDC_MEM_TYPES_H
+#define __CDC_MEM_TYPES_H
+
+#include "mem-types.h"
+
+enum gf_cdc_mem_types {
+ gf_cdc_mt_priv_t = gf_common_mt_end + 1,
+ gf_cdc_mt_vec_t = gf_common_mt_end + 2,
+ gf_cdc_mt_gzip_trailer_t = gf_common_mt_end + 3,
+ gf_cdc_mt_end = gf_common_mt_end + 4,
+};
+
+#endif
diff --git a/xlators/features/compress/src/cdc.c b/xlators/features/compress/src/cdc.c
new file mode 100644
index 00000000000..e33d4efc1a1
--- /dev/null
+++ b/xlators/features/compress/src/cdc.c
@@ -0,0 +1,356 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <sys/uio.h>
+
+#include "xlator.h"
+#include "defaults.h"
+#include "logging.h"
+
+#include "cdc.h"
+#include "cdc-mem-types.h"
+
+static void
+cdc_cleanup_iobref (cdc_info_t *ci)
+{
+ assert(ci->iobref != NULL);
+ iobref_clear (ci->iobref);
+}
+
+int32_t
+cdc_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iovec *vector, int32_t count,
+ struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
+{
+ int ret = -1;
+ cdc_priv_t *priv = NULL;
+ cdc_info_t ci = {0,};
+
+ GF_VALIDATE_OR_GOTO ("cdc", this, default_out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, default_out);
+
+ priv = this->private;
+
+ if (op_ret <= 0)
+ goto default_out;
+
+ if ( (priv->min_file_size != 0)
+ && (op_ret < priv->min_file_size) )
+ goto default_out;
+
+ ci.count = count;
+ ci.ibytes = op_ret;
+ ci.vector = vector;
+ ci.buf = NULL;
+ ci.iobref = NULL;
+ ci.ncount = 0;
+ ci.crc = 0;
+ ci.buffer_size = GF_CDC_DEF_BUFFERSIZE;
+
+/* A readv compresses on the server side and decompresses on the client side
+ */
+ if (priv->op_mode == GF_CDC_MODE_SERVER) {
+ ret = cdc_compress (this, priv, &ci, &xdata);
+ } else if (priv->op_mode == GF_CDC_MODE_CLIENT) {
+ ret = cdc_decompress (this, priv, &ci, xdata);
+ } else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Invalid operation mode (%d)", priv->op_mode);
+ }
+
+ if (ret)
+ goto default_out;
+
+ STACK_UNWIND_STRICT (readv, frame, ci.nbytes, op_errno,
+ ci.vec, ci.ncount, stbuf, iobref,
+ xdata);
+ cdc_cleanup_iobref (&ci);
+ return 0;
+
+ default_out:
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno,
+ vector, count, stbuf, iobref, xdata);
+ return 0;
+}
+
+int32_t
+cdc_readv (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset, uint32_t flags,
+ dict_t *xdata)
+{
+ fop_readv_cbk_t cbk = NULL;
+
+#ifdef HAVE_LIB_Z
+ cbk = cdc_readv_cbk;
+#else
+ cbk = default_readv_cbk;
+#endif
+ STACK_WIND (frame, cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv,
+ fd, size, offset, flags, xdata);
+ return 0;
+}
+
+int32_t
+cdc_writev_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
+}
+
+int32_t
+cdc_writev (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iovec *vector,
+ int32_t count,
+ off_t offset,
+ uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ int ret = -1;
+ cdc_priv_t *priv = NULL;
+ cdc_info_t ci = {0,};
+ size_t isize = 0;
+
+ GF_VALIDATE_OR_GOTO ("cdc", this, default_out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, default_out);
+
+ priv = this->private;
+
+ isize = iov_length(vector, count);
+
+ if (isize <= 0)
+ goto default_out;
+
+ if ( (priv->min_file_size != 0) && (isize < priv->min_file_size) )
+ goto default_out;
+
+ ci.count = count;
+ ci.ibytes = isize;
+ ci.vector = vector;
+ ci.buf = NULL;
+ ci.iobref = NULL;
+ ci.ncount = 0;
+ ci.crc = 0;
+ ci.buffer_size = GF_CDC_DEF_BUFFERSIZE;
+
+/* A writev compresses on the client side and decompresses on the server side
+ */
+ if (priv->op_mode == GF_CDC_MODE_CLIENT) {
+ ret = cdc_compress (this, priv, &ci, &xdata);
+ } else if (priv->op_mode == GF_CDC_MODE_SERVER) {
+ ret = cdc_decompress (this, priv, &ci, xdata);
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "Invalid operation mode (%d) ", priv->op_mode);
+ }
+
+ if (ret)
+ goto default_out;
+
+ STACK_WIND (frame,
+ cdc_writev_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->writev,
+ fd, ci.vec, ci.ncount, offset, flags,
+ iobref, xdata);
+
+ cdc_cleanup_iobref (&ci);
+ return 0;
+
+ default_out:
+ STACK_WIND (frame,
+ cdc_writev_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->writev,
+ fd, vector, count, offset, flags,
+ iobref, xdata);
+ return 0;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_cdc_mt_end);
+
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+int32_t
+init (xlator_t *this)
+{
+ int ret = -1;
+ char *temp_str = NULL;
+ cdc_priv_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("cdc", this, err);
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Need subvolume == 1");
+ goto err;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Dangling volume. Check volfile");
+ }
+
+ priv = GF_CALLOC (1, sizeof (*priv), gf_cdc_mt_priv_t);
+ if (!priv) {
+ goto err;
+ }
+
+ /* Check if debug mode is turned on */
+ GF_OPTION_INIT ("debug", priv->debug, bool, err);
+ if( priv->debug ) {
+ gf_log (this->name, GF_LOG_DEBUG, "CDC debug option turned on");
+ }
+
+ /* Set Gzip Window Size */
+ GF_OPTION_INIT ("window-size", priv->window_size, int32, err);
+ if ( (priv->window_size > GF_CDC_MAX_WINDOWSIZE)
+ || (priv->window_size < GF_CDC_DEF_WINDOWSIZE) ) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Invalid gzip window size (%d), using default",
+ priv->window_size);
+ priv->window_size = GF_CDC_DEF_WINDOWSIZE;
+ }
+
+ /* Set Gzip (De)Compression Level */
+ GF_OPTION_INIT ("compression-level", priv->cdc_level, int32, err);
+ if ( ((priv->cdc_level < 1) || (priv->cdc_level > 9))
+ && (priv->cdc_level != GF_CDC_DEF_COMPRESSION) ) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Invalid gzip (de)compression level (%d),"
+ " using default", priv->cdc_level);
+ priv->cdc_level = GF_CDC_DEF_COMPRESSION;
+ }
+
+ /* Set Gzip Memory Level */
+ GF_OPTION_INIT ("mem-level", priv->mem_level, int32, err);
+ if ( (priv->mem_level < 1) || (priv->mem_level > 9) ) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Invalid gzip memory level, using the default");
+ priv->mem_level = GF_CDC_DEF_MEMLEVEL;
+ }
+
+ /* Set min file size to enable compression */
+ GF_OPTION_INIT ("min-size", priv->min_file_size, int32, err);
+
+ /* Mode of operation - Server/Client */
+ ret = dict_get_str (this->options, "mode", &temp_str);
+ if (ret) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Operation mode not specified !!");
+ goto err;
+ }
+
+ if (GF_CDC_MODE_IS_CLIENT (temp_str)) {
+ priv->op_mode = GF_CDC_MODE_CLIENT;
+ } else if (GF_CDC_MODE_IS_SERVER (temp_str)) {
+ priv->op_mode = GF_CDC_MODE_SERVER;
+ } else {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Bogus operation mode (%s) specified", temp_str);
+ goto err;
+ }
+
+ this->private = priv;
+ gf_log (this->name, GF_LOG_DEBUG, "CDC xlator loaded in (%s) mode",temp_str);
+ return 0;
+
+ err:
+ if (priv)
+ GF_FREE (priv);
+
+ return -1;
+}
+
+void
+fini (xlator_t *this)
+{
+ cdc_priv_t *priv = this->private;
+
+ if (priv)
+ GF_FREE (priv);
+ this->private = NULL;
+ return;
+}
+
+struct xlator_fops fops = {
+ .readv = cdc_readv,
+ .writev = cdc_writev,
+};
+
+struct xlator_cbks cbks = {
+};
+
+struct volume_options options[] = {
+ { .key = {"window-size"},
+ .default_value = "-15",
+ .type = GF_OPTION_TYPE_INT,
+ .description = "Size of the zlib history buffer."
+ },
+ { .key = {"mem-level"},
+ .default_value = "8",
+ .type = GF_OPTION_TYPE_INT,
+ .description = "Memory allocated for internal compression state. "
+ "1 uses minimum memory but is slow and reduces "
+ "compression ratio; memLevel=9 uses maximum memory "
+ "for optimal speed. The default value is 8."
+ },
+ { .key = {"compression-level"},
+ .default_value = "-1",
+ .type = GF_OPTION_TYPE_INT,
+ .description = "Compression levels \n"
+ "0 : no compression, 1 : best speed, \n"
+ "9 : best compression, -1 : default compression "
+ },
+ { .key = {"min-size"},
+ .default_value = "0",
+ .type = GF_OPTION_TYPE_INT,
+ .description = "Data is compressed only when its size exceeds this."
+ },
+ { .key = {"mode"},
+ .value = {"server", "client"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Set on the basis of where the xlator is loaded. "
+ "This option should NOT be configured by user."
+ },
+ { .key = {"debug"},
+ .default_value = "false",
+ .type = GF_OPTION_TYPE_BOOL,
+ .description = "This is used in testing. Will dump compressed data "
+ "to disk as a gzip file."
+ },
+ { .key = {NULL}
+ },
+};
diff --git a/xlators/features/compress/src/cdc.h b/xlators/features/compress/src/cdc.h
new file mode 100644
index 00000000000..71f4d2317bb
--- /dev/null
+++ b/xlators/features/compress/src/cdc.h
@@ -0,0 +1,107 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __CDC_H
+#define __CDC_H
+
+#ifdef HAVE_LIB_Z
+#include "zlib.h"
+#endif
+
+#include "xlator.h"
+
+#ifndef MAX_IOVEC
+#define MAX_IOVEC 16
+#endif
+
+typedef struct cdc_priv {
+ int window_size;
+ int mem_level;
+ int cdc_level;
+ int min_file_size;
+ int op_mode;
+ gf_boolean_t debug;
+ gf_lock_t lock;
+} cdc_priv_t;
+
+typedef struct cdc_info {
+ /* input bits */
+ int count;
+ int32_t ibytes;
+ struct iovec *vector;
+ struct iatt *buf;
+
+ /* output bits */
+ int ncount;
+ int nbytes;
+ int buffer_size;
+ struct iovec vec[MAX_IOVEC];
+ struct iobref *iobref;
+
+ /* zlib bits */
+#ifdef HAVE_LIB_Z
+ z_stream stream;
+#endif
+ unsigned long crc;
+} cdc_info_t;
+
+#define NVEC(ci) (ci->ncount - 1)
+#define CURR_VEC(ci) ci->vec[ci->ncount - 1]
+#define THIS_VEC(ci, i) ci->vector[i]
+
+/* Gzip defaults */
+#define GF_CDC_DEF_WINDOWSIZE -15 /* default value */
+#define GF_CDC_MAX_WINDOWSIZE -8 /* max value */
+
+#ifdef HAVE_LIB_Z
+#define GF_CDC_DEF_COMPRESSION Z_DEFAULT_COMPRESSION
+#else
+#define GF_CDC_DEF_COMPRESSION -1
+#endif
+
+#define GF_CDC_DEF_MEMLEVEL 8
+#define GF_CDC_DEF_BUFFERSIZE 262144 // 256K - default compression buffer size
+
+/* Operation mode
+ * If xlator is loaded on client, readv decompresses and writev compresses
+ * If xlator is loaded on server, readv compresses and writev decompresses
+ */
+#define GF_CDC_MODE_CLIENT 0
+#define GF_CDC_MODE_SERVER 1
+
+/* min size of data to do cmpression
+ * 0 == compress even 1byte
+ */
+#define GF_CDC_MIN_CHUNK_SIZE 0
+
+#define GF_CDC_VALIDATION_SIZE 8
+
+#define GF_CDC_OS_ID 0xFF
+#define GF_CDC_DEFLATE_CANARY_VAL "deflate"
+#define GF_CDC_DEBUG_DUMP_FILE "/tmp/cdcdump.gz"
+
+#define GF_CDC_MODE_IS_CLIENT(m) \
+ (strcmp (m, "client") == 0)
+
+#define GF_CDC_MODE_IS_SERVER(m) \
+ (strcmp (m, "server") == 0)
+
+int32_t
+cdc_compress (xlator_t *this,
+ cdc_priv_t *priv,
+ cdc_info_t *ci,
+ dict_t **xdata);
+int32_t
+cdc_decompress (xlator_t *this,
+ cdc_priv_t *priv,
+ cdc_info_t *ci,
+ dict_t *xdata);
+
+#endif
diff --git a/xlators/features/filter/src/Makefile.am b/xlators/features/filter/src/Makefile.am
index d473b9ea16d..5bdc711ae07 100644
--- a/xlators/features/filter/src/Makefile.am
+++ b/xlators/features/filter/src/Makefile.am
@@ -1,15 +1,16 @@
xlator_LTLIBRARIES = filter.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/features
-filter_la_LDFLAGS = -module -avoidversion
+filter_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
filter_la_SOURCES = filter.c
filter_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = filter-mem-types.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/features/filter/src/filter-mem-types.h b/xlators/features/filter/src/filter-mem-types.h
index cca354438b5..47a17249b8d 100644
--- a/xlators/features/filter/src/filter-mem-types.h
+++ b/xlators/features/filter/src/filter-mem-types.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __FILTER_MEM_TYPES_H__
#define __FILTER_MEM_TYPES_H__
diff --git a/xlators/features/filter/src/filter.c b/xlators/features/filter/src/filter.c
index beede16e80c..3fd7dc8c8fb 100644
--- a/xlators/features/filter/src/filter.c
+++ b/xlators/features/filter/src/filter.c
@@ -1,27 +1,12 @@
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
diff --git a/xlators/features/ganesha/Makefile.am b/xlators/features/ganesha/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/features/ganesha/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/ganesha/src/Makefile.am b/xlators/features/ganesha/src/Makefile.am
new file mode 100644
index 00000000000..3bf291b92c6
--- /dev/null
+++ b/xlators/features/ganesha/src/Makefile.am
@@ -0,0 +1,18 @@
+xlator_LTLIBRARIES = ganesha.la
+
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+noinst_HEADERS = ganesha.h ganesha-mem-types.h
+
+ganesha_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+ganesha_la_SOURCES = ganesha.c
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS)\
+ -DGANESHA_DIR=\"$(sysconfdir)/ganesha\" \
+ -DGYSNCD_PREFIX=\"$(libexecdir)/glusterfs\"
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/ganesha/src/ganesha-mem-types.h b/xlators/features/ganesha/src/ganesha-mem-types.h
new file mode 100644
index 00000000000..c4976c01afc
--- /dev/null
+++ b/xlators/features/ganesha/src/ganesha-mem-types.h
@@ -0,0 +1,21 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef __GANESHA_MEM_TYPES_H__
+#define __GANESHA_MEM_TYPES_H__
+
+
+#include "mem-types.h"
+
+enum gf_ganesha_mem_types_ {
+ gf_ganesha_mt_priv_t = gf_common_mt_end + 1,
+ gf_ganesha_mt_end
+};
+
+#endif
diff --git a/xlators/features/ganesha/src/ganesha.c b/xlators/features/ganesha/src/ganesha.c
new file mode 100644
index 00000000000..859915420ac
--- /dev/null
+++ b/xlators/features/ganesha/src/ganesha.c
@@ -0,0 +1,90 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#include "ganesha.h"
+#include "ganesha-mem-types.h"
+
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_ganesha_mt_end + 1);
+
+ if (ret != 0)
+ gf_log (this->name, GF_LOG_WARNING, "Memory accounting"
+ "init failed");
+
+ return ret;
+}
+
+int32_t
+init (xlator_t *this)
+{
+ int ret = -1;
+ ganesha_priv_t *priv = NULL;
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Need subvolume == 1");
+ goto err;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Dangling volume. Check volfile");
+ goto err;
+ }
+
+ priv = GF_CALLOC (1, sizeof (*priv), gf_ganesha_mt_priv_t);
+ if (!priv)
+ goto err;
+
+ this->private = priv;
+ ret = 0;
+
+err:
+ return ret;
+}
+
+
+void
+fini (xlator_t *this)
+{
+ ganesha_priv_t *priv = this->private;
+
+ this->private = NULL;
+ if (priv)
+ GF_FREE (priv);
+
+ return;
+}
+
+struct xlator_fops fops = {
+};
+
+struct xlator_cbks cbks = {
+};
+
+struct volume_options options[] = {
+
+ { .key = {"ganesha.enable"},
+ .default_value = "off",
+ .type = GF_OPTION_TYPE_BOOL,
+ .description = "export volume via NFS-Ganesha"
+ },
+ { .key = {NULL}
+ },
+};
diff --git a/xlators/features/ganesha/src/ganesha.h b/xlators/features/ganesha/src/ganesha.h
new file mode 100644
index 00000000000..86320e9da28
--- /dev/null
+++ b/xlators/features/ganesha/src/ganesha.h
@@ -0,0 +1,18 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "ganesha-mem-types.h"
+
+typedef struct {
+ char *host_name;
+} ganesha_priv_t;
+
+
diff --git a/xlators/bindings/python/Makefile.am b/xlators/features/gfid-access/Makefile.am
index af437a64d6d..af437a64d6d 100644
--- a/xlators/bindings/python/Makefile.am
+++ b/xlators/features/gfid-access/Makefile.am
diff --git a/xlators/features/gfid-access/src/Makefile.am b/xlators/features/gfid-access/src/Makefile.am
new file mode 100644
index 00000000000..3b25f099123
--- /dev/null
+++ b/xlators/features/gfid-access/src/Makefile.am
@@ -0,0 +1,15 @@
+xlator_LTLIBRARIES = gfid-access.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+gfid_access_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+gfid_access_la_SOURCES = gfid-access.c
+gfid_access_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = gfid-access.h gfid-access-mem-types.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/gfid-access/src/gfid-access-mem-types.h b/xlators/features/gfid-access/src/gfid-access-mem-types.h
new file mode 100644
index 00000000000..168d67b431f
--- /dev/null
+++ b/xlators/features/gfid-access/src/gfid-access-mem-types.h
@@ -0,0 +1,23 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GFID_ACCESS_MEM_TYPES_H
+#define _GFID_ACCESS_MEM_TYPES_H
+
+#include "mem-types.h"
+
+enum gf_changelog_mem_types {
+ gf_gfid_access_mt_priv_t = gf_common_mt_end + 1,
+ gf_gfid_access_mt_gfid_t,
+ gf_gfid_access_mt_end
+};
+
+#endif
+
diff --git a/xlators/features/gfid-access/src/gfid-access.c b/xlators/features/gfid-access/src/gfid-access.c
new file mode 100644
index 00000000000..7d75b09bae0
--- /dev/null
+++ b/xlators/features/gfid-access/src/gfid-access.c
@@ -0,0 +1,1428 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "gfid-access.h"
+#include "inode.h"
+#include "byte-order.h"
+#include "statedump.h"
+
+
+int
+ga_valid_inode_loc_copy (loc_t *dst, loc_t *src, xlator_t *this)
+{
+ int ret = 0;
+ uint64_t value = 0;
+
+ /* if its an entry operation, on the virtual */
+ /* directory inode as parent, we need to handle */
+ /* it properly */
+ ret = loc_copy (dst, src);
+ if (ret < 0)
+ goto out;
+
+ /*
+ * Change ALL virtual inodes with real-inodes in loc
+ */
+ if (dst->parent) {
+ ret = inode_ctx_get (dst->parent, this, &value);
+ if (ret < 0) {
+ ret = 0; //real-inode
+ goto out;
+ }
+ inode_unref (dst->parent);
+ dst->parent = inode_ref ((inode_t*)value);
+ gf_uuid_copy (dst->pargfid, dst->parent->gfid);
+ }
+
+ if (dst->inode) {
+ ret = inode_ctx_get (dst->inode, this, &value);
+ if (ret < 0) {
+ ret = 0; //real-inode
+ goto out;
+ }
+ inode_unref (dst->inode);
+ dst->inode = inode_ref ((inode_t*)value);
+ gf_uuid_copy (dst->gfid, dst->inode->gfid);
+ }
+out:
+
+ return ret;
+}
+
+void
+ga_newfile_args_free (ga_newfile_args_t *args)
+{
+ if (!args)
+ goto out;
+
+ GF_FREE (args->bname);
+
+ if (S_ISLNK (args->st_mode) && args->args.symlink.linkpath) {
+ GF_FREE (args->args.symlink.linkpath);
+ args->args.symlink.linkpath = NULL;
+ }
+
+ mem_put (args);
+out:
+ return;
+}
+
+
+void
+ga_heal_args_free (ga_heal_args_t *args)
+{
+ if (!args)
+ goto out;
+
+ GF_FREE (args->bname);
+
+ mem_put (args);
+out:
+ return;
+}
+
+
+ga_newfile_args_t *
+ga_newfile_parse_args (xlator_t *this, data_t *data)
+{
+ ga_newfile_args_t *args = NULL;
+ ga_private_t *priv = NULL;
+ int len = 0;
+ int blob_len = 0;
+ int min_len = 0;
+ void *blob = NULL;
+
+ priv = this->private;
+
+ blob = data->data;
+ blob_len = data->len;
+
+ min_len = sizeof (args->uid) + sizeof (args->gid) + sizeof (args->gfid)
+ + sizeof (args->st_mode) + 2 + 2;
+ if (blob_len < min_len) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Invalid length: Total length is less "
+ "than minimum length.");
+ goto err;
+ }
+
+ args = mem_get0 (priv->newfile_args_pool);
+ if (args == NULL)
+ goto err;
+
+ args->uid = ntoh32 (*(uint32_t *)blob);
+ blob += sizeof (uint32_t);
+ blob_len -= sizeof (uint32_t);
+
+ args->gid = ntoh32 (*(uint32_t *)blob);
+ blob += sizeof (uint32_t);
+ blob_len -= sizeof (uint32_t);
+
+ memcpy (args->gfid, blob, sizeof (args->gfid));
+ blob += sizeof (args->gfid);
+ blob_len -= sizeof (args->gfid);
+
+ args->st_mode = ntoh32 (*(uint32_t *)blob);
+ blob += sizeof (uint32_t);
+ blob_len -= sizeof (uint32_t);
+
+ len = strnlen (blob, blob_len);
+ if (len == blob_len) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "gfid: %s. No null byte present.",
+ args->gfid);
+ goto err;
+ }
+
+ args->bname = GF_CALLOC (1, (len + 1), gf_common_mt_char);
+ if (args->bname == NULL)
+ goto err;
+
+ memcpy (args->bname, blob, (len + 1));
+ blob += (len + 1);
+ blob_len -= (len + 1);
+
+ if (S_ISDIR (args->st_mode)) {
+ if (blob_len < sizeof (uint32_t)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "gfid: %s. Invalid length",
+ args->gfid);
+ goto err;
+ }
+ args->args.mkdir.mode = ntoh32 (*(uint32_t *)blob);
+ blob += sizeof (uint32_t);
+ blob_len -= sizeof (uint32_t);
+
+ if (blob_len < sizeof (uint32_t)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "gfid: %s. Invalid length",
+ args->gfid);
+ goto err;
+ }
+ args->args.mkdir.umask = ntoh32 (*(uint32_t *)blob);
+ blob_len -= sizeof (uint32_t);
+ if (blob_len < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "gfid: %s. Invalid length",
+ args->gfid);
+ goto err;
+ }
+ } else if (S_ISLNK (args->st_mode)) {
+ len = strnlen (blob, blob_len);
+ if (len == blob_len) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "gfid: %s. Invalid length",
+ args->gfid);
+ goto err;
+ }
+ args->args.symlink.linkpath = GF_CALLOC (1, len + 1,
+ gf_common_mt_char);
+ if (args->args.symlink.linkpath == NULL)
+ goto err;
+
+ memcpy (args->args.symlink.linkpath, blob, (len + 1));
+ blob_len -= (len + 1);
+ } else {
+ if (blob_len < sizeof (uint32_t)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "gfid: %s. Invalid length",
+ args->gfid);
+ goto err;
+ }
+ args->args.mknod.mode = ntoh32 (*(uint32_t *)blob);
+ blob += sizeof (uint32_t);
+ blob_len -= sizeof (uint32_t);
+
+ if (blob_len < sizeof (uint32_t)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "gfid: %s. Invalid length",
+ args->gfid);
+ goto err;
+ }
+ args->args.mknod.rdev = ntoh32 (*(uint32_t *)blob);
+ blob += sizeof (uint32_t);
+ blob_len -= sizeof (uint32_t);
+
+ if (blob_len < sizeof (uint32_t)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "gfid: %s. Invalid length",
+ args->gfid);
+ goto err;
+ }
+ args->args.mknod.umask = ntoh32 (*(uint32_t *)blob);
+ blob_len -= sizeof (uint32_t);
+ }
+
+ if (blob_len) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "gfid: %s. Invalid length",
+ args->gfid);
+ goto err;
+ }
+
+ return args;
+
+err:
+ if (args)
+ ga_newfile_args_free (args);
+
+ return NULL;
+}
+
+ga_heal_args_t *
+ga_heal_parse_args (xlator_t *this, data_t *data)
+{
+ ga_heal_args_t *args = NULL;
+ ga_private_t *priv = NULL;
+ void *blob = NULL;
+ int len = 0;
+ int blob_len = 0;
+
+ blob = data->data;
+ blob_len = data->len;
+
+ priv = this->private;
+
+ /* bname should at least contain a character */
+ if (blob_len < (sizeof (args->gfid) + 2))
+ goto err;
+
+ args = mem_get0 (priv->heal_args_pool);
+ if (!args)
+ goto err;
+
+ memcpy (args->gfid, blob, sizeof (args->gfid));
+ blob += sizeof (args->gfid);
+ blob_len -= sizeof (args->gfid);
+
+ len = strnlen (blob, blob_len);
+ if (len == blob_len)
+ goto err;
+
+ args->bname = GF_CALLOC (1, len + 1, gf_common_mt_char);
+ if (!args->bname)
+ goto err;
+
+ memcpy (args->bname, blob, len);
+ blob_len -= (len + 1);
+
+ if (blob_len)
+ goto err;
+
+ return args;
+
+err:
+ if (args)
+ ga_heal_args_free (args);
+
+ return NULL;
+}
+
+static int32_t
+ga_fill_tmp_loc (loc_t *loc, xlator_t *this, uuid_t gfid,
+ char *bname, dict_t *xdata, loc_t *new_loc)
+{
+ int ret = -1;
+ uint64_t value = 0;
+ inode_t *parent = NULL;
+ uuid_t *gfid_ptr = NULL;
+
+ parent = loc->inode;
+ ret = inode_ctx_get (loc->inode, this, &value);
+ if (!ret) {
+ parent = (void *)value;
+ if (gf_uuid_is_null (parent->gfid))
+ parent = loc->inode;
+ }
+
+ /* parent itself should be looked up */
+ gf_uuid_copy (new_loc->pargfid, parent->gfid);
+ new_loc->parent = inode_ref (parent);
+
+ new_loc->inode = inode_grep (parent->table, parent, bname);
+ if (!new_loc->inode) {
+ new_loc->inode = inode_new (parent->table);
+ gf_uuid_copy (new_loc->inode->gfid, gfid);
+ }
+
+ loc_path (new_loc, bname);
+ if (new_loc->path) {
+ new_loc->name = strrchr (new_loc->path, '/');
+ if (new_loc->name)
+ new_loc->name++;
+ }
+
+ gfid_ptr = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t);
+ if (!gfid_ptr) {
+ ret = -1;
+ goto out;
+ }
+ gf_uuid_copy (*gfid_ptr, gfid);
+ ret = dict_set_dynptr (xdata, "gfid-req", gfid_ptr, sizeof (uuid_t));
+ if (ret < 0)
+ goto out;
+
+ ret = 0;
+
+out:
+ if (ret && gfid_ptr)
+ GF_FREE (gfid_ptr);
+ return ret;
+}
+
+
+
+static gf_boolean_t
+__is_gfid_access_dir (uuid_t gfid)
+{
+ uuid_t aux_gfid;
+
+ memset (aux_gfid, 0, 16);
+ aux_gfid[15] = GF_AUX_GFID;
+
+ if (gf_uuid_compare (gfid, aux_gfid) == 0)
+ return _gf_true;
+
+ return _gf_false;
+}
+
+int32_t
+ga_forget (xlator_t *this, inode_t *inode)
+{
+ int ret = -1;
+ uint64_t value = 0;
+ inode_t *tmp_inode = NULL;
+
+ ret = inode_ctx_del (inode, this, &value);
+ if (ret)
+ goto out;
+
+ tmp_inode = (void *)value;
+ inode_unref (tmp_inode);
+
+out:
+ return 0;
+}
+
+
+static int
+ga_heal_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *stat, dict_t *dict,
+ struct iatt *postparent)
+{
+ call_frame_t *orig_frame = NULL;
+
+ orig_frame = frame->local;
+ frame->local = NULL;
+
+ /* don't worry about inode linking and other stuff. They'll happen on
+ * the next lookup.
+ */
+ STACK_DESTROY (frame->root);
+
+ STACK_UNWIND_STRICT (setxattr, orig_frame, op_ret, op_errno, dict);
+
+ return 0;
+}
+
+static int
+ga_newentry_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ ga_local_t *local = NULL;
+
+ local = frame->local;
+
+ /* don't worry about inode linking and other stuff. They'll happen on
+ * the next lookup.
+ */
+ frame->local = NULL;
+ STACK_DESTROY (frame->root);
+
+ STACK_UNWIND_STRICT (setxattr, local->orig_frame, op_ret,
+ op_errno, xdata);
+
+ if (local->xdata)
+ dict_unref (local->xdata);
+ loc_wipe (&local->loc);
+ mem_put (local);
+
+ return 0;
+}
+
+static int
+ga_newentry_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stat, dict_t *xdata,
+ struct iatt *postparent)
+
+{
+ ga_local_t *local = NULL;
+
+ local = frame->local;
+
+ if ((op_ret < 0) && ((op_errno != ENOENT) && (op_errno != ESTALE)))
+ goto err;
+
+ STACK_WIND (frame, ga_newentry_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod, &local->loc, local->mode,
+ local->rdev, local->umask, local->xdata);
+ return 0;
+
+err:
+ frame->local = NULL;
+ STACK_DESTROY (frame->root);
+ STACK_UNWIND_STRICT (setxattr, local->orig_frame, op_ret, op_errno,
+ xdata);
+ if (local->xdata)
+ dict_unref (local->xdata);
+ loc_wipe (&local->loc);
+ mem_put (local);
+
+ return 0;
+}
+
+int32_t
+ga_new_entry (call_frame_t *frame, xlator_t *this, loc_t *loc, data_t *data,
+ dict_t *xdata)
+{
+ int ret = -1;
+ ga_newfile_args_t *args = NULL;
+ loc_t tmp_loc = {0,};
+ call_frame_t *new_frame = NULL;
+ ga_local_t *local = NULL;
+ uuid_t gfid = {0,};
+
+ args = ga_newfile_parse_args (this, data);
+ if (!args)
+ goto out;
+
+ ret = gf_uuid_parse (args->gfid, gfid);
+ if (ret)
+ goto out;
+
+ if (!xdata) {
+ xdata = dict_new ();
+ } else {
+ xdata = dict_ref (xdata);
+ }
+
+ if (!xdata) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = ga_fill_tmp_loc (loc, this, gfid,
+ args->bname, xdata, &tmp_loc);
+ if (ret)
+ goto out;
+
+ new_frame = copy_frame (frame);
+ if (!new_frame)
+ goto out;
+
+ local = mem_get0 (this->local_pool);
+ local->orig_frame = frame;
+
+ loc_copy (&local->loc, &tmp_loc);
+
+ new_frame->local = local;
+ new_frame->root->uid = args->uid;
+ new_frame->root->gid = args->gid;
+
+ if (S_ISDIR (args->st_mode)) {
+ STACK_WIND (new_frame, ga_newentry_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir,
+ &tmp_loc, args->args.mkdir.mode,
+ args->args.mkdir.umask, xdata);
+ } else if (S_ISLNK (args->st_mode)) {
+ STACK_WIND (new_frame, ga_newentry_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->symlink,
+ args->args.symlink.linkpath,
+ &tmp_loc, 0, xdata);
+ } else {
+ /* use 07777 (4 7s) for considering the Sticky bits etc) */
+ ((ga_local_t *)new_frame->local)->mode =
+ (S_IFMT & args->st_mode) | (07777 & args->args.mknod.mode);
+
+ ((ga_local_t *)new_frame->local)->umask =
+ args->args.mknod.umask;
+ ((ga_local_t *)new_frame->local)->rdev = args->args.mknod.rdev;
+ ((ga_local_t *)new_frame->local)->xdata = dict_ref (xdata);
+
+ /* send a named lookup, so that dht can cleanup up stale linkto
+ * files etc.
+ */
+ STACK_WIND (new_frame, ga_newentry_lookup_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup,
+ &tmp_loc, NULL);
+ }
+
+ ret = 0;
+out:
+ ga_newfile_args_free (args);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ loc_wipe (&tmp_loc);
+
+ return ret;
+}
+
+int32_t
+ga_heal_entry (call_frame_t *frame, xlator_t *this, loc_t *loc, data_t *data,
+ dict_t *xdata)
+{
+ int ret = -1;
+ ga_heal_args_t *args = NULL;
+ loc_t tmp_loc = {0,};
+ call_frame_t *new_frame = NULL;
+ uuid_t gfid = {0,};
+
+ args = ga_heal_parse_args (this, data);
+ if (!args)
+ goto out;
+
+ ret = gf_uuid_parse (args->gfid, gfid);
+ if (ret)
+ goto out;
+
+ if (!xdata)
+ xdata = dict_new ();
+ else
+ xdata = dict_ref (xdata);
+
+ if (!xdata) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = ga_fill_tmp_loc (loc, this, gfid, args->bname,
+ xdata, &tmp_loc);
+ if (ret)
+ goto out;
+
+ new_frame = copy_frame (frame);
+ if (!new_frame)
+ goto out;
+
+ new_frame->local = (void *)frame;
+
+ STACK_WIND (new_frame, ga_heal_cbk, FIRST_CHILD (this),
+ FIRST_CHILD(this)->fops->lookup,
+ &tmp_loc, xdata);
+
+ ret = 0;
+out:
+ if (args)
+ ga_heal_args_free (args);
+
+ loc_wipe (&tmp_loc);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return ret;
+}
+
+int32_t
+ga_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+ga_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ data_t *data = NULL;
+ int op_errno = ENOMEM;
+ int ret = 0;
+ loc_t ga_loc = {0, };
+
+ GFID_ACCESS_INODE_OP_CHECK (loc, op_errno, err);
+
+ data = dict_get (dict, GF_FUSE_AUX_GFID_NEWFILE);
+ if (data) {
+ ret = ga_new_entry (frame, this, loc, data, xdata);
+ if (ret)
+ goto err;
+ return 0;
+ }
+
+ data = dict_get (dict, GF_FUSE_AUX_GFID_HEAL);
+ if (data) {
+ ret = ga_heal_entry (frame, this, loc, data, xdata);
+ if (ret)
+ goto err;
+ return 0;
+ }
+
+ //If the inode is a virtual inode change the inode otherwise perform
+ //the operation on same inode
+ ret = ga_valid_inode_loc_copy (&ga_loc, loc, this);
+ if (ret < 0)
+ goto err;
+
+ STACK_WIND (frame, ga_setxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, &ga_loc, dict, flags,
+ xdata);
+
+ loc_wipe (&ga_loc);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (setxattr, frame, -1, op_errno, xdata);
+ return 0;
+}
+
+
+int32_t
+ga_virtual_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ int ret = 0;
+ inode_t *cbk_inode = NULL;
+ inode_t *true_inode = NULL;
+ uuid_t random_gfid = {0,};
+ inode_t *linked_inode = NULL;
+
+ if (frame->local)
+ cbk_inode = frame->local;
+ else
+ cbk_inode = inode_ref (inode);
+
+ frame->local = NULL;
+ if (op_ret)
+ goto unwind;
+
+ if (!IA_ISDIR (buf->ia_type))
+ goto unwind;
+
+ /* need to send back a different inode for linking in itable */
+ if (cbk_inode == inode) {
+ /* check if the inode is in the 'itable' or
+ if its just previously discover()'d inode */
+ true_inode = inode_find (inode->table, buf->ia_gfid);
+ if (!true_inode) {
+ /* This unref is for 'inode_ref()' done in beginning.
+ This is needed as cbk_inode is allocated new inode
+ whose unref is taken at the end*/
+ inode_unref (cbk_inode);
+ cbk_inode = inode_new (inode->table);
+
+ if (!cbk_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ /* the inode is not present in itable, ie, the actual
+ path is not yet looked up. Use the current inode
+ itself for now */
+
+ linked_inode = inode_link (inode, NULL, NULL, buf);
+ inode = linked_inode;
+ } else {
+ /* 'inode_ref()' has been done in inode_find() */
+ inode = true_inode;
+ }
+
+ ret = inode_ctx_put (cbk_inode, this, (uint64_t)inode);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set the inode ctx with"
+ "the actual inode");
+ if (inode)
+ inode_unref (inode);
+ }
+ inode = NULL;
+ }
+
+ if (!gf_uuid_is_null (cbk_inode->gfid)) {
+ /* if the previous linked inode is used, use the
+ same gfid */
+ gf_uuid_copy (random_gfid, cbk_inode->gfid);
+ } else {
+ /* replace the buf->ia_gfid to a random gfid
+ for directory, for files, what we received is fine */
+ gf_uuid_generate (random_gfid);
+ }
+
+ gf_uuid_copy (buf->ia_gfid, random_gfid);
+
+ buf->ia_ino = gfid_to_ino (buf->ia_gfid);
+
+unwind:
+ /* Lookup on non-existing gfid returns ESTALE.
+ Convert into ENOENT for virtual lookup*/
+ if (op_errno == ESTALE)
+ op_errno = ENOENT;
+
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, cbk_inode, buf,
+ xdata, postparent);
+
+ /* Also handles inode_unref of frame->local if done in ga_lookup */
+ if (cbk_inode)
+ inode_unref (cbk_inode);
+
+ return 0;
+}
+
+int32_t
+ga_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ ga_private_t *priv = NULL;
+
+ /* if the entry in question is not 'root',
+ then follow the normal path */
+ if (op_ret || !__is_root_gfid(buf->ia_gfid))
+ goto unwind;
+
+ priv = this->private;
+
+ /* do we need to copy root stbuf everytime? */
+ /* mostly yes, as we want to have the 'stat' info show latest
+ in every _cbk() */
+
+ /* keep the reference for root stat buf */
+ priv->root_stbuf = *buf;
+ priv->gfiddir_stbuf = priv->root_stbuf;
+ priv->gfiddir_stbuf.ia_gfid[15] = GF_AUX_GFID;
+ priv->gfiddir_stbuf.ia_ino = GF_AUX_GFID;
+
+unwind:
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf,
+ xdata, postparent);
+ return 0;
+}
+
+int32_t
+ga_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ ga_private_t *priv = NULL;
+ int ret = -1;
+ uuid_t tmp_gfid = {0,};
+ loc_t tmp_loc = {0,};
+ uint64_t value = 0;
+ inode_t *inode = NULL;
+ inode_t *true_inode = NULL;
+ int32_t op_errno = ENOENT;
+
+ priv = this->private;
+
+ /* Handle nameless lookup on ".gfid" */
+ if (!loc->parent && __is_gfid_access_dir(loc->gfid)) {
+ STACK_UNWIND_STRICT (lookup, frame, 0, 0, loc->inode,
+ &priv->gfiddir_stbuf, xdata,
+ &priv->root_stbuf);
+ return 0;
+ }
+
+ /* if its discover(), no need for any action here */
+ if (!loc->name)
+ goto wind;
+
+ /* if its revalidate, and inode is not of type directory,
+ proceed with 'wind' */
+ if (loc->inode && loc->inode->ia_type &&
+ !IA_ISDIR (loc->inode->ia_type)) {
+
+ /* a revalidate on ".gfid/<dentry>" is possible, check for it */
+ if (((loc->parent &&
+ __is_gfid_access_dir (loc->parent->gfid)) ||
+ __is_gfid_access_dir (loc->pargfid))) {
+
+ /* here, just send 'loc->gfid' and 'loc->inode' */
+ tmp_loc.inode = inode_ref (loc->inode);
+ gf_uuid_copy (tmp_loc.gfid, loc->inode->gfid);
+
+ STACK_WIND (frame, default_lookup_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup,
+ &tmp_loc, xdata);
+
+ inode_unref (tmp_loc.inode);
+
+ return 0;
+ }
+
+ /* not something to bother, continue the flow */
+ goto wind;
+ }
+
+ /* need to check if the lookup is on virtual dir */
+ if ((loc->name && !strcmp (GF_GFID_DIR, loc->name)) &&
+ ((loc->parent && __is_root_gfid (loc->parent->gfid)) ||
+ __is_root_gfid (loc->pargfid))) {
+ /* this means, the query is on '/.gfid', return the fake stat,
+ and say success */
+
+ STACK_UNWIND_STRICT (lookup, frame, 0, 0, loc->inode,
+ &priv->gfiddir_stbuf, xdata,
+ &priv->root_stbuf);
+ return 0;
+ }
+
+ /* now, check if the lookup() is on an existing entry,
+ but on gfid-path */
+ if (!((loc->parent && __is_gfid_access_dir (loc->parent->gfid)) ||
+ __is_gfid_access_dir (loc->pargfid))) {
+ if (!loc->parent)
+ goto wind;
+
+ ret = inode_ctx_get (loc->parent, this, &value);
+ if (ret)
+ goto wind;
+
+ inode = (inode_t *) value;
+
+ ret = loc_copy_overload_parent (&tmp_loc, loc, inode);
+ if (ret)
+ goto err;
+
+ STACK_WIND (frame, ga_lookup_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->lookup, &tmp_loc, xdata);
+
+ loc_wipe (&tmp_loc);
+ return 0;
+ }
+
+ /* make sure the 'basename' is actually a 'canonical-gfid',
+ otherwise, return error */
+ ret = gf_uuid_parse (loc->name, tmp_gfid);
+ if (ret)
+ goto err;
+
+ /* if its fresh lookup, go ahead and send it down, if not,
+ for directory, we need indirection to actual dir inode */
+ if (!(loc->inode && loc->inode->ia_type))
+ goto discover;
+
+ /* revalidate on directory */
+ ret = inode_ctx_get (loc->inode, this, &value);
+ if (ret)
+ goto err;
+
+ inode = (void *)value;
+
+ /* valid inode, already looked up, work on that */
+ if (inode->ia_type)
+ goto discover;
+
+ /* check if the inode is in the 'itable' or
+ if its just previously discover()'d inode */
+ true_inode = inode_find (loc->inode->table, tmp_gfid);
+ if (true_inode) {
+ /* time do another lookup and update the context
+ with proper inode */
+ op_errno = ESTALE;
+ /* 'inode_ref()' done in inode_find */
+ inode_unref (true_inode);
+ goto err;
+ }
+
+discover:
+ /* for the virtual entries, we don't need to send 'gfid-req' key, as
+ for these entries, we don't want to 'set' a new gfid */
+ if (xdata)
+ dict_del (xdata, "gfid-req");
+
+ gf_uuid_copy (tmp_loc.gfid, tmp_gfid);
+
+ /* if revalidate, then we need to have the proper reference */
+ if (inode) {
+ tmp_loc.inode = inode_ref (inode);
+ frame->local = inode_ref (loc->inode);
+ } else {
+ tmp_loc.inode = inode_ref (loc->inode);
+ }
+
+ STACK_WIND (frame, ga_virtual_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, &tmp_loc, xdata);
+
+ inode_unref (tmp_loc.inode);
+
+ return 0;
+
+wind:
+ /* used for all the normal lookup path */
+ STACK_WIND (frame, ga_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT (lookup, frame, -1, op_errno, loc->inode,
+ &priv->gfiddir_stbuf, xdata,
+ &priv->root_stbuf);
+ return 0;
+}
+
+int
+ga_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+ int op_errno = ENOMEM;
+
+ GFID_ACCESS_ENTRY_OP_CHECK (loc, op_errno, err);
+
+ STACK_WIND (frame, default_mkdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, loc, mode, umask,
+ xdata);
+
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT (mkdir, frame, -1, op_errno, loc->inode,
+ NULL, NULL, NULL, xdata);
+ return 0;
+}
+
+
+int
+ga_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ int op_errno = ENOMEM;
+
+ GFID_ACCESS_ENTRY_OP_CHECK (loc, op_errno, err);
+
+ STACK_WIND (frame, default_create_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->create,
+ loc, flags, mode, umask, fd, xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (create, frame, -1, op_errno, NULL,
+ NULL, NULL, NULL, NULL, xdata);
+
+ return 0;
+
+}
+
+int
+ga_symlink (call_frame_t *frame, xlator_t *this, const char *linkname,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ int op_errno = ENOMEM;
+
+ GFID_ACCESS_ENTRY_OP_CHECK (loc, op_errno, err);
+
+ STACK_WIND (frame, default_symlink_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->symlink,
+ linkname, loc, umask, xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (symlink, frame, -1, op_errno, NULL,
+ NULL, NULL, NULL, xdata);
+
+ return 0;
+}
+
+int
+ga_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ int op_errno = ENOMEM;
+
+ GFID_ACCESS_ENTRY_OP_CHECK (loc, op_errno, err);
+
+ STACK_WIND (frame, default_mknod_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod, loc, mode, rdev,
+ umask, xdata);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (mknod, frame, -1, op_errno, NULL,
+ NULL, NULL, NULL, xdata);
+
+ return 0;
+}
+
+int
+ga_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flag,
+ dict_t *xdata)
+{
+ int op_errno = ENOMEM;
+ int ret = -1;
+ loc_t ga_loc = {0, };
+
+ GFID_ACCESS_ENTRY_OP_CHECK (loc, op_errno, err);
+
+ ret = ga_valid_inode_loc_copy (&ga_loc, loc, this);
+ if (ret < 0)
+ goto err;
+
+ STACK_WIND (frame, default_rmdir_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->rmdir,
+ &ga_loc, flag, xdata);
+
+ loc_wipe (&ga_loc);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (rmdir, frame, -1, op_errno, NULL,
+ NULL, xdata);
+
+ return 0;
+}
+
+int
+ga_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t xflag,
+ dict_t *xdata)
+{
+ int op_errno = ENOMEM;
+ int ret = -1;
+ loc_t ga_loc = {0, };
+
+ GFID_ACCESS_ENTRY_OP_CHECK (loc, op_errno, err);
+
+ ret = ga_valid_inode_loc_copy (&ga_loc, loc, this);
+ if (ret < 0)
+ goto err;
+
+ STACK_WIND (frame, default_unlink_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink,
+ &ga_loc, xflag, xdata);
+
+ loc_wipe (&ga_loc);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (unlink, frame, -1, op_errno, NULL,
+ NULL, xdata);
+
+ return 0;
+}
+
+int
+ga_rename (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ int op_errno = ENOMEM;
+ int ret = 0;
+ loc_t ga_oldloc = {0, };
+ loc_t ga_newloc = {0, };
+
+ GFID_ACCESS_ENTRY_OP_CHECK (oldloc, op_errno, err);
+ GFID_ACCESS_ENTRY_OP_CHECK (newloc, op_errno, err);
+
+ ret = ga_valid_inode_loc_copy (&ga_oldloc, oldloc, this);
+ if (ret < 0)
+ goto err;
+
+ ret = ga_valid_inode_loc_copy (&ga_newloc, newloc, this);
+ if (ret < 0) {
+ loc_wipe (&ga_oldloc);
+ goto err;
+ }
+
+ STACK_WIND (frame, default_rename_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->rename,
+ &ga_oldloc, &ga_newloc, xdata);
+
+ loc_wipe (&ga_newloc);
+ loc_wipe (&ga_oldloc);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (rename, frame, -1, op_errno, NULL,
+ NULL, NULL, NULL, NULL, xdata);
+
+ return 0;
+}
+
+
+int
+ga_link (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ int op_errno = ENOMEM;
+ int ret = 0;
+ loc_t ga_oldloc = {0, };
+ loc_t ga_newloc = {0, };
+
+ GFID_ACCESS_ENTRY_OP_CHECK (oldloc, op_errno, err);
+ GFID_ACCESS_ENTRY_OP_CHECK (newloc, op_errno, err);
+
+ ret = ga_valid_inode_loc_copy (&ga_oldloc, oldloc, this);
+ if (ret < 0)
+ goto err;
+
+ ret = ga_valid_inode_loc_copy (&ga_newloc, newloc, this);
+ if (ret < 0) {
+ loc_wipe (&ga_oldloc);
+ goto err;
+ }
+
+ STACK_WIND (frame, default_link_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->link,
+ &ga_oldloc, &ga_newloc, xdata);
+
+ loc_wipe (&ga_newloc);
+ loc_wipe (&ga_oldloc);
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT (link, frame, -1, op_errno, NULL,
+ NULL, NULL, NULL, xdata);
+
+ return 0;
+}
+
+int32_t
+ga_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ fd_t *fd, dict_t *xdata)
+{
+ int op_errno = ENOMEM;
+
+ GFID_ACCESS_INODE_OP_CHECK (loc, op_errno, err);
+
+ /* also check if the loc->inode itself is virtual
+ inode, if yes, return with failure, mainly because we
+ can't handle all the readdirp and other things on it. */
+ if (inode_ctx_get (loc->inode, this, NULL) == 0) {
+ op_errno = ENOTSUP;
+ goto err;
+ }
+
+ STACK_WIND (frame, default_opendir_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->opendir,
+ loc, fd, xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (opendir, frame, -1, op_errno, NULL, xdata);
+
+ return 0;
+}
+
+int32_t
+ga_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ int op_errno = ENOMEM;
+ int ret = -1;
+ loc_t ga_loc = {0, };
+
+ GFID_ACCESS_INODE_OP_CHECK (loc, op_errno, err);
+ ret = ga_valid_inode_loc_copy (&ga_loc, loc, this);
+ if (ret < 0)
+ goto err;
+
+ STACK_WIND (frame, default_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, &ga_loc, name, xdata);
+
+ loc_wipe (&ga_loc);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (getxattr, frame, -1, op_errno, NULL, xdata);
+
+ return 0;
+}
+
+int32_t
+ga_stat (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ int op_errno = ENOMEM;
+ int ret = -1;
+ loc_t ga_loc = {0, };
+ ga_private_t *priv = NULL;
+
+ priv = this->private;
+ /* If stat is on ".gfid" itself, do not wind further,
+ * return fake stat and return success.
+ */
+ if (__is_gfid_access_dir(loc->gfid))
+ goto out;
+
+ ret = ga_valid_inode_loc_copy (&ga_loc, loc, this);
+ if (ret < 0)
+ goto err;
+
+ STACK_WIND (frame, default_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, &ga_loc, xdata);
+
+ loc_wipe (&ga_loc);
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT (stat, frame, -1, op_errno, NULL, xdata);
+
+ return 0;
+
+out:
+ STACK_UNWIND_STRICT (stat, frame, 0, 0, &priv->gfiddir_stbuf, xdata);
+ return 0;
+}
+
+int32_t
+ga_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid,
+ dict_t *xdata)
+{
+ int op_errno = ENOMEM;
+ int ret = -1;
+ loc_t ga_loc = {0, };
+
+ GFID_ACCESS_INODE_OP_CHECK (loc, op_errno, err);
+ ret = ga_valid_inode_loc_copy (&ga_loc, loc, this);
+ if (ret < 0)
+ goto err;
+
+ STACK_WIND (frame, default_setattr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setattr, &ga_loc, stbuf, valid,
+ xdata);
+
+ loc_wipe (&ga_loc);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (setattr, frame, -1, op_errno, NULL, NULL, xdata);
+
+ return 0;
+}
+
+int32_t
+ga_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ int op_errno = ENOMEM;
+ int ret = -1;
+ loc_t ga_loc = {0, };
+
+ GFID_ACCESS_INODE_OP_CHECK (loc, op_errno, err);
+ ret = ga_valid_inode_loc_copy (&ga_loc, loc, this);
+ if (ret < 0)
+ goto err;
+
+ STACK_WIND (frame, default_removexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, &ga_loc, name,
+ xdata);
+
+ loc_wipe (&ga_loc);
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT (removexattr, frame, -1, op_errno, xdata);
+
+ return 0;
+}
+
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_gfid_access_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_WARNING, "Memory accounting"
+ " init failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+int32_t
+init (xlator_t *this)
+{
+ ga_private_t *priv = NULL;
+ int ret = -1;
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "not configured with exactly one child. exiting");
+ goto out;
+ }
+
+ /* This can be the top of graph in certain cases */
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "dangling volume. check volfile ");
+ }
+
+ /* TODO: define a mem-type structure */
+ priv = GF_CALLOC (1, sizeof (*priv), gf_gfid_access_mt_priv_t);
+ if (!priv)
+ goto out;
+
+ priv->newfile_args_pool = mem_pool_new (ga_newfile_args_t, 512);
+ if (!priv->newfile_args_pool)
+ goto out;
+
+ priv->heal_args_pool = mem_pool_new (ga_heal_args_t, 512);
+ if (!priv->heal_args_pool)
+ goto out;
+
+ this->local_pool = mem_pool_new (ga_local_t, 16);
+ if (!this->local_pool) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ goto out;
+ }
+
+ this->private = priv;
+
+ ret = 0;
+out:
+ if (ret && priv) {
+ if (priv->newfile_args_pool)
+ mem_pool_destroy (priv->newfile_args_pool);
+ GF_FREE (priv);
+ }
+
+ return ret;
+}
+
+void
+fini (xlator_t *this)
+{
+ ga_private_t *priv = NULL;
+ priv = this->private;
+ this->private = NULL;
+
+ if (priv) {
+ if (priv->newfile_args_pool)
+ mem_pool_destroy (priv->newfile_args_pool);
+ if (priv->heal_args_pool)
+ mem_pool_destroy (priv->heal_args_pool);
+ GF_FREE (priv);
+ }
+
+ return;
+}
+
+int32_t
+ga_dump_inodectx (xlator_t *this, inode_t *inode)
+{
+ int ret = -1;
+ uint64_t value = 0;
+ inode_t *tmp_inode = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, };
+
+ ret = inode_ctx_get (inode, this, &value);
+ if (ret == 0) {
+ tmp_inode = (void*) value;
+ gf_proc_dump_build_key (key_prefix, this->name, "inode");
+ gf_proc_dump_add_section (key_prefix);
+ gf_proc_dump_write ("real-gfid", "%s",
+ uuid_utoa (tmp_inode->gfid));
+ }
+
+ return 0;
+}
+
+struct xlator_fops fops = {
+ .lookup = ga_lookup,
+
+ /* entry fops */
+ .mkdir = ga_mkdir,
+ .mknod = ga_mknod,
+ .create = ga_create,
+ .symlink = ga_symlink,
+ .link = ga_link,
+ .unlink = ga_unlink,
+ .rmdir = ga_rmdir,
+ .rename = ga_rename,
+
+ /* handle any other directory operations here */
+ .opendir = ga_opendir,
+ .stat = ga_stat,
+ .setattr = ga_setattr,
+ .getxattr = ga_getxattr,
+ .removexattr = ga_removexattr,
+
+ /* special fop to handle more entry creations */
+ .setxattr = ga_setxattr,
+};
+
+struct xlator_cbks cbks = {
+ .forget = ga_forget,
+};
+
+struct xlator_dumpops dumpops = {
+ .inodectx = ga_dump_inodectx,
+};
+
+struct volume_options options[] = {
+ /* This translator doesn't take any options, or provide any options */
+ { .key = {NULL} },
+};
diff --git a/xlators/features/gfid-access/src/gfid-access.h b/xlators/features/gfid-access/src/gfid-access.h
new file mode 100644
index 00000000000..2b5e4fd4184
--- /dev/null
+++ b/xlators/features/gfid-access/src/gfid-access.h
@@ -0,0 +1,107 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef __GFID_ACCESS_H__
+#define __GFID_ACCESS_H__
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "defaults.h"
+#include "gfid-access-mem-types.h"
+
+#define UUID_CANONICAL_FORM_LEN 36
+
+#define GF_FUSE_AUX_GFID_NEWFILE "glusterfs.gfid.newfile"
+#define GF_FUSE_AUX_GFID_HEAL "glusterfs.gfid.heal"
+
+#define GF_GFID_KEY "GLUSTERFS_GFID"
+#define GF_GFID_DIR ".gfid"
+#define GF_AUX_GFID 0xd
+
+#define GFID_ACCESS_ENTRY_OP_CHECK(loc,err,lbl) do { \
+ /* need to check if the lookup is on virtual dir */ \
+ if ((loc->name && !strcmp (GF_GFID_DIR, loc->name)) && \
+ ((loc->parent && \
+ __is_root_gfid (loc->parent->gfid)) || \
+ __is_root_gfid (loc->pargfid))) { \
+ err = ENOTSUP; \
+ goto lbl; \
+ } \
+ \
+ /* now, check if the lookup() is on an existing */ \
+ /* entry, but on gfid-path */ \
+ if ((loc->parent && \
+ __is_gfid_access_dir (loc->parent->gfid)) || \
+ __is_gfid_access_dir (loc->pargfid)) { \
+ err = EPERM; \
+ goto lbl; \
+ } \
+ } while (0)
+
+#define GFID_ACCESS_INODE_OP_CHECK(loc,err,lbl) do { \
+ /*Check if it is on .gfid*/ \
+ if (__is_gfid_access_dir(loc->gfid)) { \
+ err = ENOTSUP; \
+ goto lbl; \
+ } \
+ } while (0)
+typedef struct {
+ unsigned int uid;
+ unsigned int gid;
+ char gfid[UUID_CANONICAL_FORM_LEN + 1];
+ unsigned int st_mode;
+ char *bname;
+
+ union {
+ struct _symlink_in {
+ char *linkpath;
+ } __attribute__ ((__packed__)) symlink;
+
+ struct _mknod_in {
+ unsigned int mode;
+ unsigned int rdev;
+ unsigned int umask;
+ } __attribute__ ((__packed__)) mknod;
+
+ struct _mkdir_in {
+ unsigned int mode;
+ unsigned int umask;
+ } __attribute__ ((__packed__)) mkdir;
+ } __attribute__ ((__packed__)) args;
+} __attribute__((__packed__)) ga_newfile_args_t;
+
+typedef struct {
+ char gfid[UUID_CANONICAL_FORM_LEN + 1];
+ char *bname; /* a null terminated basename */
+} __attribute__((__packed__)) ga_heal_args_t;
+
+struct ga_private {
+ /* root inode's stbuf */
+ struct iatt root_stbuf;
+ struct iatt gfiddir_stbuf;
+ struct mem_pool *newfile_args_pool;
+ struct mem_pool *heal_args_pool;
+};
+typedef struct ga_private ga_private_t;
+
+struct __ga_local {
+ call_frame_t *orig_frame;
+ unsigned int uid;
+ unsigned int gid;
+ loc_t loc;
+ mode_t mode;
+ dev_t rdev;
+ mode_t umask;
+ dict_t *xdata;
+};
+typedef struct __ga_local ga_local_t;
+
+#endif /* __GFID_ACCESS_H__ */
diff --git a/xlators/features/glupy/Makefile.am b/xlators/features/glupy/Makefile.am
new file mode 100644
index 00000000000..060429ecf0f
--- /dev/null
+++ b/xlators/features/glupy/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src examples
+
+CLEANFILES =
diff --git a/xlators/features/glupy/doc/README.md b/xlators/features/glupy/doc/README.md
new file mode 100644
index 00000000000..4b8b863ef39
--- /dev/null
+++ b/xlators/features/glupy/doc/README.md
@@ -0,0 +1,44 @@
+This is just the very start for a GlusterFS[1] meta-translator that will
+allow translator code to be written in Python. It's based on the standard
+Python embedding (not extending) techniques, plus a dash of the ctypes module.
+The interface is a pretty minimal adaptation of the dispatches and callbacks
+from the C API[2] to Python, as follows:
+
+* Dispatch functions and callbacks must be defined on an "xlator" class
+ derived from gluster.Translator so that they'll be auto-registered with
+ the C translator during initialization.
+
+* For each dispatch or callback function you want to intercept, you define a
+ Python function using the xxx\_fop\_t or xxx\_cbk\_t decorator.
+
+* The arguments for each operation are different, so you'll need to refer to
+ the C API. GlusterFS-specific types are used (though only loc\_t is fully
+ defined so far) and type correctness is enforced by ctypes.
+
+* If you do intercept a dispatch function, it is your responsibility to call
+ xxx\_wind (like STACK\_WIND in the C API but operation-specific) to pass
+ the request to the next translator. If you do not intercept a function, it
+ will default the same way as for C (pass through to the same operation with
+ the same arguments on the first child translator).
+
+* If you intercept a callback function, it is your responsibility to call
+ xxx\_unwind (like STACK\_UNWIND\_STRICT in the C API) to pass the request back
+ to the caller.
+
+So far only the lookup and create operations are handled this way, to support
+the "negative lookup" example. Now that the basic infrastructure is in place,
+adding more functions should be very quick, though with that much boilerplate I
+might pause to write a code generator. I also plan to add structure
+definitions and interfaces for some of the utility functions in libglusterfs
+(especially those having to do with inode and fd context) in the fairly near
+future. Note that you can also use ctypes to get at anything not explicitly
+exposed to Python already.
+
+_If you're coming here because of the Linux Journal article, please note that
+the code has evolved since that was written. The version that matches the
+article is here:_
+
+https://github.com/jdarcy/glupy/tree/4bbae91ba459ea46ef32f2966562492e4ca9187a
+
+[1] http://www.gluster.org
+[2] http://pl.atyp.us/hekafs.org/dist/xlator_api_2.html
diff --git a/xlators/features/glupy/doc/TESTING b/xlators/features/glupy/doc/TESTING
new file mode 100644
index 00000000000..e05f17f498f
--- /dev/null
+++ b/xlators/features/glupy/doc/TESTING
@@ -0,0 +1,9 @@
+Loading a translator written in Python using the glupy meta translator
+-------------------------------------------------------------------------------
+'test.vol' is a simple volfile with the debug-trace Python translator on top
+of a brick. The volfile can be mounted using the following command.
+
+$ glusterfs --debug -f test.vol /path/to/mntpt
+
+If then file operations are performed on the newly mounted file system, log
+output would be printed by the Python translator on the standard output.
diff --git a/xlators/features/glupy/doc/test.vol b/xlators/features/glupy/doc/test.vol
new file mode 100644
index 00000000000..0751a488c1f
--- /dev/null
+++ b/xlators/features/glupy/doc/test.vol
@@ -0,0 +1,10 @@
+volume vol-posix
+ type storage/posix
+ option directory /path/to/brick
+end-volume
+
+volume vol-glupy
+ type features/glupy
+ option module-name debug-trace
+ subvolumes vol-posix
+end-volume
diff --git a/xlators/features/glupy/examples/Makefile.am b/xlators/features/glupy/examples/Makefile.am
new file mode 100644
index 00000000000..c26abeaafb6
--- /dev/null
+++ b/xlators/features/glupy/examples/Makefile.am
@@ -0,0 +1,5 @@
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+glupyexamplesdir = $(xlatordir)/glupy
+
+glupyexamples_PYTHON = negative.py helloworld.py debug-trace.py
diff --git a/xlators/features/glupy/examples/debug-trace.py b/xlators/features/glupy/examples/debug-trace.py
new file mode 100644
index 00000000000..6eef1b58b8f
--- /dev/null
+++ b/xlators/features/glupy/examples/debug-trace.py
@@ -0,0 +1,775 @@
+import sys
+import stat
+from uuid import UUID
+from time import strftime, localtime
+from gluster.glupy import *
+
+# This translator was written primarily to test the fop entry point definitions
+# and structure definitions in 'glupy.py'.
+
+# It is similar to the C language debug-trace translator, which logs the
+# arguments passed to the fops and their corresponding cbk functions.
+
+dl.get_id.restype = c_long
+dl.get_id.argtypes = [ POINTER(call_frame_t) ]
+
+dl.get_rootunique.restype = c_uint64
+dl.get_rootunique.argtypes = [ POINTER(call_frame_t) ]
+
+def uuid2str (gfid):
+ return str(UUID(''.join(map("{0:02x}".format, gfid))))
+
+
+def st_mode_from_ia (prot, filetype):
+ st_mode = 0
+ type_bit = 0
+ prot_bit = 0
+
+ if filetype == IA_IFREG:
+ type_bit = stat.S_IFREG
+ elif filetype == IA_IFDIR:
+ type_bit = stat.S_IFDIR
+ elif filetype == IA_IFLNK:
+ type_bit = stat.S_IFLNK
+ elif filetype == IA_IFBLK:
+ type_bit = stat.S_IFBLK
+ elif filetype == IA_IFCHR:
+ type_bit = stat.S_IFCHR
+ elif filetype == IA_IFIFO:
+ type_bit = stat.S_IFIFO
+ elif filetype == IA_IFSOCK:
+ type_bit = stat.S_IFSOCK
+ elif filetype == IA_INVAL:
+ pass
+
+
+ if prot.suid:
+ prot_bit |= stat.S_ISUID
+ if prot.sgid:
+ prot_bit |= stat.S_ISGID
+ if prot.sticky:
+ prot_bit |= stat.S_ISVTX
+
+ if prot.owner.read:
+ prot_bit |= stat.S_IRUSR
+ if prot.owner.write:
+ prot_bit |= stat.S_IWUSR
+ if prot.owner.execn:
+ prot_bit |= stat.S_IXUSR
+
+ if prot.group.read:
+ prot_bit |= stat.S_IRGRP
+ if prot.group.write:
+ prot_bit |= stat.S_IWGRP
+ if prot.group.execn:
+ prot_bit |= stat.S_IXGRP
+
+ if prot.other.read:
+ prot_bit |= stat.S_IROTH
+ if prot.other.write:
+ prot_bit |= stat.S_IWOTH
+ if prot.other.execn:
+ prot_bit |= stat.S_IXOTH
+
+ st_mode = (type_bit | prot_bit)
+
+ return st_mode
+
+
+def trace_stat2str (buf):
+ gfid = uuid2str(buf.contents.ia_gfid)
+ mode = st_mode_from_ia(buf.contents.ia_prot, buf.contents.ia_type)
+ atime_buf = strftime("[%b %d %H:%M:%S]",
+ localtime(buf.contents.ia_atime))
+ mtime_buf = strftime("[%b %d %H:%M:%S]",
+ localtime(buf.contents.ia_mtime))
+ ctime_buf = strftime("[%b %d %H:%M:%S]",
+ localtime(buf.contents.ia_ctime))
+ return ("(gfid={0:s}, ino={1:d}, mode={2:o}, nlink={3:d}, uid ={4:d}, "+
+ "gid ={5:d}, size={6:d}, blocks={7:d}, atime={8:s}, mtime={9:s}, "+
+ "ctime={10:s})").format(gfid, buf.contents.ia_no, mode,
+ buf.contents.ia_nlink,
+ buf.contents.ia_uid,
+ buf.contents.ia_gid,
+ buf.contents.ia_size,
+ buf.contents.ia_blocks,
+ atime_buf, mtime_buf,
+ ctime_buf)
+
+class xlator(Translator):
+
+ def __init__(self, c_this):
+ Translator.__init__(self, c_this)
+ self.gfids = {}
+
+ def lookup_fop(self, frame, this, loc, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.gfid)
+ print("GLUPY TRACE LOOKUP FOP- {0:d}: gfid={1:s}; " +
+ "path={2:s}").format(unique, gfid, loc.contents.path)
+ self.gfids[key] = gfid
+ dl.wind_lookup(frame, POINTER(xlator_t)(), loc, xdata)
+ return 0
+
+ def lookup_cbk(self, frame, cookie, this, op_ret, op_errno,
+ inode, buf, xdata, postparent):
+ unique =dl.get_rootunique(frame)
+ key =dl.get_id(frame)
+ if op_ret == 0:
+ gfid = uuid2str(buf.contents.ia_gfid)
+ statstr = trace_stat2str(buf)
+ postparentstr = trace_stat2str(postparent)
+ print("GLUPY TRACE LOOKUP CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; *buf={3:s}; " +
+ "*postparent={4:s}").format(unique, gfid,
+ op_ret, statstr,
+ postparentstr)
+ else:
+ gfid = self.gfids[key]
+ print("GLUPY TRACE LOOKUP CBK - {0:d}: gfid={1:s};" +
+ " op_ret={2:d}; op_errno={3:d}").format(unique,
+ gfid,
+ op_ret,
+ op_errno)
+ del self.gfids[key]
+ dl.unwind_lookup(frame, cookie, this, op_ret, op_errno,
+ inode, buf, xdata, postparent)
+ return 0
+
+ def create_fop(self, frame, this, loc, flags, mode, umask, fd,
+ xdata):
+ unique = dl.get_rootunique(frame)
+ gfid = uuid2str(loc.contents.gfid)
+ print("GLUPY TRACE CREATE FOP- {0:d}: gfid={1:s}; path={2:s}; " +
+ "fd={3:s}; flags=0{4:o}; mode=0{5:o}; " +
+ "umask=0{6:o}").format(unique, gfid, loc.contents.path,
+ fd, flags, mode, umask)
+ dl.wind_create(frame, POINTER(xlator_t)(), loc, flags,mode,
+ umask, fd, xdata)
+ return 0
+
+ def create_cbk(self, frame, cookie, this, op_ret, op_errno, fd,
+ inode, buf, preparent, postparent, xdata):
+ unique = dl.get_rootunique(frame)
+ if op_ret >= 0:
+ gfid = uuid2str(inode.contents.gfid)
+ statstr = trace_stat2str(buf)
+ preparentstr = trace_stat2str(preparent)
+ postparentstr = trace_stat2str(postparent)
+ print("GLUPY TRACE CREATE CBK- {0:d}: gfid={1:s};" +
+ " op_ret={2:d}; fd={3:s}; *stbuf={4:s}; " +
+ "*preparent={5:s};" +
+ " *postparent={6:s}").format(unique, gfid, op_ret,
+ fd, statstr,
+ preparentstr,
+ postparentstr)
+ else:
+ print ("GLUPY TRACE CREATE CBK- {0:d}: op_ret={1:d}; " +
+ "op_errno={2:d}").format(unique, op_ret, op_errno)
+ dl.unwind_create(frame, cookie, this, op_ret, op_errno, fd,
+ inode, buf, preparent, postparent, xdata)
+ return 0
+
+ def open_fop(self, frame, this, loc, flags, fd, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE OPEN FOP- {0:d}: gfid={1:s}; path={2:s}; "+
+ "flags={3:d}; fd={4:s}").format(unique, gfid,
+ loc.contents.path, flags,
+ fd)
+ self.gfids[key] = gfid
+ dl.wind_open(frame, POINTER(xlator_t)(), loc, flags, fd, xdata)
+ return 0
+
+ def open_cbk(self, frame, cookie, this, op_ret, op_errno, fd, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ print("GLUPY TRACE OPEN CBK- {0:d}: gfid={1:s}; op_ret={2:d}; "
+ "op_errno={3:d}; *fd={4:s}").format(unique, gfid,
+ op_ret, op_errno, fd)
+ del self.gfids[key]
+ dl.unwind_open(frame, cookie, this, op_ret, op_errno, fd,
+ xdata)
+ return 0
+
+ def readv_fop(self, frame, this, fd, size, offset, flags, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(fd.contents.inode.contents.gfid)
+ print("GLUPY TRACE READV FOP- {0:d}: gfid={1:s}; "+
+ "fd={2:s}; size ={3:d}; offset={4:d}; " +
+ "flags=0{5:x}").format(unique, gfid, fd, size, offset,
+ flags)
+ self.gfids[key] = gfid
+ dl.wind_readv (frame, POINTER(xlator_t)(), fd, size, offset,
+ flags, xdata)
+ return 0
+
+ def readv_cbk(self, frame, cookie, this, op_ret, op_errno, vector,
+ count, buf, iobref, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ if op_ret >= 0:
+ statstr = trace_stat2str(buf)
+ print("GLUPY TRACE READV CBK- {0:d}: gfid={1:s}, "+
+ "op_ret={2:d}; *buf={3:s};").format(unique, gfid,
+ op_ret,
+ statstr)
+
+ else:
+ print("GLUPY TRACE READV CBK- {0:d}: gfid={1:s}, "+
+ "op_ret={2:d}; op_errno={3:d}").format(unique,
+ gfid,
+ op_ret,
+ op_errno)
+ del self.gfids[key]
+ dl.unwind_readv (frame, cookie, this, op_ret, op_errno,
+ vector, count, buf, iobref, xdata)
+ return 0
+
+ def writev_fop(self, frame, this, fd, vector, count, offset, flags,
+ iobref, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(fd.contents.inode.contents.gfid)
+ print("GLUPY TRACE WRITEV FOP- {0:d}: gfid={1:s}; " +
+ "fd={2:s}; count={3:d}; offset={4:d}; " +
+ "flags=0{5:x}").format(unique, gfid, fd, count, offset,
+ flags)
+ self.gfids[key] = gfid
+ dl.wind_writev(frame, POINTER(xlator_t)(), fd, vector, count,
+ offset, flags, iobref, xdata)
+ return 0
+
+ def writev_cbk(self, frame, cookie, this, op_ret, op_errno, prebuf,
+ postbuf, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ if op_ret >= 0:
+ preopstr = trace_stat2str(prebuf)
+ postopstr = trace_stat2str(postbuf)
+ print("GLUPY TRACE WRITEV CBK- {0:d}: op_ret={1:d}; " +
+ "*prebuf={2:s}; " +
+ "*postbuf={3:s}").format(unique, op_ret, preopstr,
+ postopstr)
+ else:
+ gfid = self.gfids[key]
+ print("GLUPY TRACE WRITEV CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; op_errno={3:d}").format(unique,
+ gfid,
+ op_ret,
+ op_errno)
+ del self.gfids[key]
+ dl.unwind_writev (frame, cookie, this, op_ret, op_errno,
+ prebuf, postbuf, xdata)
+ return 0
+
+ def opendir_fop(self, frame, this, loc, fd, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE OPENDIR FOP- {0:d}: gfid={1:s}; path={2:s}; "+
+ "fd={3:s}").format(unique, gfid, loc.contents.path, fd)
+ self.gfids[key] = gfid
+ dl.wind_opendir(frame, POINTER(xlator_t)(), loc, fd, xdata)
+ return 0
+
+ def opendir_cbk(self, frame, cookie, this, op_ret, op_errno, fd,
+ xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ print("GLUPY TRACE OPENDIR CBK- {0:d}: gfid={1:s}; op_ret={2:d};"+
+ " op_errno={3:d}; fd={4:s}").format(unique, gfid, op_ret,
+ op_errno, fd)
+ del self.gfids[key]
+ dl.unwind_opendir(frame, cookie, this, op_ret, op_errno,
+ fd, xdata)
+ return 0
+
+ def readdir_fop(self, frame, this, fd, size, offset, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(fd.contents.inode.contents.gfid)
+ print("GLUPY TRACE READDIR FOP- {0:d}: gfid={1:s}; fd={2:s}; " +
+ "size={3:d}; offset={4:d}").format(unique, gfid, fd, size,
+ offset)
+ self.gfids[key] = gfid
+ dl.wind_readdir(frame, POINTER(xlator_t)(), fd, size, offset,
+ xdata)
+ return 0
+
+ def readdir_cbk(self, frame, cookie, this, op_ret, op_errno, buf,
+ xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ print("GLUPY TRACE READDIR CBK- {0:d}: gfid={1:s}; op_ret={2:d};"+
+ " op_errno={3:d}").format(unique, gfid, op_ret, op_errno)
+ del self.gfids[key]
+ dl.unwind_readdir(frame, cookie, this, op_ret, op_errno, buf,
+ xdata)
+ return 0
+
+ def readdirp_fop(self, frame, this, fd, size, offset, dictionary):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(fd.contents.inode.contents.gfid)
+ print("GLUPY TRACE READDIRP FOP- {0:d}: gfid={1:s}; fd={2:s}; "+
+ " size={3:d}; offset={4:d}").format(unique, gfid, fd, size,
+ offset)
+ self.gfids[key] = gfid
+ dl.wind_readdirp(frame, POINTER(xlator_t)(), fd, size, offset,
+ dictionary)
+ return 0
+
+ def readdirp_cbk(self, frame, cookie, this, op_ret, op_errno, buf,
+ xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ print("GLUPY TRACE READDIRP CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; op_errno={3:d}").format(unique, gfid,
+ op_ret, op_errno)
+ del self.gfids[key]
+ dl.unwind_readdirp(frame, cookie, this, op_ret, op_errno, buf,
+ xdata)
+ return 0
+
+ def mkdir_fop(self, frame, this, loc, mode, umask, xdata):
+ unique = dl.get_rootunique(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE MKDIR FOP- {0:d}: gfid={1:s}; path={2:s}; " +
+ "mode={3:d}; umask=0{4:o}").format(unique, gfid,
+ loc.contents.path, mode,
+ umask)
+ dl.wind_mkdir(frame, POINTER(xlator_t)(), loc, mode, umask,
+ xdata)
+ return 0
+
+ def mkdir_cbk(self, frame, cookie, this, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata):
+ unique = dl.get_rootunique(frame)
+ if op_ret == 0:
+ gfid = uuid2str(inode.contents.gfid)
+ statstr = trace_stat2str(buf)
+ preparentstr = trace_stat2str(preparent)
+ postparentstr = trace_stat2str(postparent)
+ print("GLUPY TRACE MKDIR CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; *stbuf={3:s}; *prebuf={4:s}; "+
+ "*postbuf={5:s} ").format(unique, gfid, op_ret,
+ statstr,
+ preparentstr,
+ postparentstr)
+ else:
+ print("GLUPY TRACE MKDIR CBK- {0:d}: op_ret={1:d}; "+
+ "op_errno={2:d}").format(unique, op_ret, op_errno)
+ dl.unwind_mkdir(frame, cookie, this, op_ret, op_errno, inode,
+ buf, preparent, postparent, xdata)
+ return 0
+
+ def rmdir_fop(self, frame, this, loc, flags, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE RMDIR FOP- {0:d}: gfid={1:s}; path={2:s}; "+
+ "flags={3:d}").format(unique, gfid, loc.contents.path,
+ flags)
+ self.gfids[key] = gfid
+ dl.wind_rmdir(frame, POINTER(xlator_t)(), loc, flags, xdata)
+ return 0
+
+ def rmdir_cbk(self, frame, cookie, this, op_ret, op_errno, preparent,
+ postparent, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ if op_ret == 0:
+ preparentstr = trace_stat2str(preparent)
+ postparentstr = trace_stat2str(postparent)
+ print("GLUPY TRACE RMDIR CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; *prebuf={3:s}; "+
+ "*postbuf={4:s}").format(unique, gfid, op_ret,
+ preparentstr,
+ postparentstr)
+ else:
+ print("GLUPY TRACE RMDIR CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; op_errno={3:d}").format(unique,
+ gfid,
+ op_ret,
+ op_errno)
+ del self.gfids[key]
+ dl.unwind_rmdir(frame, cookie, this, op_ret, op_errno,
+ preparent, postparent, xdata)
+ return 0
+
+ def stat_fop(self, frame, this, loc, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE STAT FOP- {0:d}: gfid={1:s}; " +
+ " path={2:s}").format(unique, gfid, loc.contents.path)
+ self.gfids[key] = gfid
+ dl.wind_stat(frame, POINTER(xlator_t)(), loc, xdata)
+ return 0
+
+ def stat_cbk(self, frame, cookie, this, op_ret, op_errno, buf,
+ xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ if op_ret == 0:
+ statstr = trace_stat2str(buf)
+ print("GLUPY TRACE STAT CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; *buf={3:s};").format(unique,
+ gfid,
+ op_ret,
+ statstr)
+ else:
+ print("GLUPY TRACE STAT CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; op_errno={3:d}").format(unique,
+ gfid,
+ op_ret,
+ op_errno)
+ del self.gfids[key]
+ dl.unwind_stat(frame, cookie, this, op_ret, op_errno,
+ buf, xdata)
+ return 0
+
+ def fstat_fop(self, frame, this, fd, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(fd.contents.inode.contents.gfid)
+ print("GLUPY TRACE FSTAT FOP- {0:d}: gfid={1:s}; " +
+ "fd={2:s}").format(unique, gfid, fd)
+ self.gfids[key] = gfid
+ dl.wind_fstat(frame, POINTER(xlator_t)(), fd, xdata)
+ return 0
+
+ def fstat_cbk(self, frame, cookie, this, op_ret, op_errno, buf,
+ xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ if op_ret == 0:
+ statstr = trace_stat2str(buf)
+ print("GLUPY TRACE FSTAT CBK- {0:d}: gfid={1:s} "+
+ " op_ret={2:d}; *buf={3:s}").format(unique,
+ gfid,
+ op_ret,
+ statstr)
+ else:
+ print("GLUPY TRACE FSTAT CBK- {0:d}: gfid={1:s} "+
+ "op_ret={2:d}; op_errno={3:d}").format(unique.
+ gfid,
+ op_ret,
+ op_errno)
+ del self.gfids[key]
+ dl.unwind_fstat(frame, cookie, this, op_ret, op_errno,
+ buf, xdata)
+ return 0
+
+ def statfs_fop(self, frame, this, loc, xdata):
+ unique = dl.get_rootunique(frame)
+ if loc.contents.inode:
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ else:
+ gfid = "0"
+ print("GLUPY TRACE STATFS FOP- {0:d}: gfid={1:s}; "+
+ "path={2:s}").format(unique, gfid, loc.contents.path)
+ dl.wind_statfs(frame, POINTER(xlator_t)(), loc, xdata)
+ return 0
+
+ def statfs_cbk(self, frame, cookie, this, op_ret, op_errno, buf,
+ xdata):
+ unique = dl.get_rootunique(frame)
+ if op_ret == 0:
+ #TBD: print buf (pointer to an iovec type object)
+ print("GLUPY TRACE STATFS CBK {0:d}: "+
+ "op_ret={1:d}").format(unique, op_ret)
+ else:
+ print("GLUPY TRACE STATFS CBK- {0:d}"+
+ "op_ret={1:d}; op_errno={2:d}").format(unique,
+ op_ret,
+ op_errno)
+ dl.unwind_statfs(frame, cookie, this, op_ret, op_errno,
+ buf, xdata)
+ return 0
+
+ def getxattr_fop(self, frame, this, loc, name, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE GETXATTR FOP- {0:d}: gfid={1:s}; path={2:s};"+
+ " name={3:s}").format(unique, gfid, loc.contents.path,
+ name)
+ self.gfids[key]=gfid
+ dl.wind_getxattr(frame, POINTER(xlator_t)(), loc, name, xdata)
+ return 0
+
+ def getxattr_cbk(self, frame, cookie, this, op_ret, op_errno,
+ dictionary, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ print("GLUPY TRACE GETXATTR CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; op_errno={3:d}; "+
+ " dictionary={4:s}").format(unique, gfid, op_ret, op_errno,
+ dictionary)
+ del self.gfids[key]
+ dl.unwind_getxattr(frame, cookie, this, op_ret, op_errno,
+ dictionary, xdata)
+ return 0
+
+ def fgetxattr_fop(self, frame, this, fd, name, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(fd.contents.inode.contents.gfid)
+ print("GLUPY TRACE FGETXATTR FOP- {0:d}: gfid={1:s}; fd={2:s}; "+
+ "name={3:s}").format(unique, gfid, fd, name)
+ self.gfids[key] = gfid
+ dl.wind_fgetxattr(frame, POINTER(xlator_t)(), fd, name, xdata)
+ return 0
+
+ def fgetxattr_cbk(self, frame, cookie, this, op_ret, op_errno,
+ dictionary, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ print("GLUPY TRACE FGETXATTR CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; op_errno={3:d};"+
+ " dictionary={4:s}").format(unique, gfid, op_ret,
+ op_errno, dictionary)
+ del self.gfids[key]
+ dl.unwind_fgetxattr(frame, cookie, this, op_ret, op_errno,
+ dictionary, xdata)
+ return 0
+
+ def setxattr_fop(self, frame, this, loc, dictionary, flags, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE SETXATTR FOP- {0:d}: gfid={1:s}; path={2:s};"+
+ " flags={3:d}").format(unique, gfid, loc.contents.path,
+ flags)
+ self.gfids[key] = gfid
+ dl.wind_setxattr(frame, POINTER(xlator_t)(), loc, dictionary,
+ flags, xdata)
+ return 0
+
+ def setxattr_cbk(self, frame, cookie, this, op_ret, op_errno, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ print("GLUPY TRACE SETXATTR CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; op_errno={3:d}").format(unique, gfid,
+ op_ret, op_errno)
+ del self.gfids[key]
+ dl.unwind_setxattr(frame, cookie, this, op_ret, op_errno,
+ xdata)
+ return 0
+
+ def fsetxattr_fop(self, frame, this, fd, dictionary, flags, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(fd.contents.inode.contents.gfid)
+ print("GLUPY TRACE FSETXATTR FOP- {0:d}: gfid={1:s}; fd={2:p}; "+
+ "flags={3:d}").format(unique, gfid, fd, flags)
+ self.gfids[key] = gfid
+ dl.wind_fsetxattr(frame, POINTER(xlator_t)(), fd, dictionary,
+ flags, xdata)
+ return 0
+
+ def fsetxattr_cbk(self, frame, cookie, this, op_ret, op_errno, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ print("GLUPY TRACE FSETXATTR CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; op_errno={3:d}").format(unique, gfid,
+ op_ret, op_errno)
+ del self.gfids[key]
+ dl.unwind_fsetxattr(frame, cookie, this, op_ret, op_errno,
+ xdata)
+ return 0
+
+ def removexattr_fop(self, frame, this, loc, name, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE REMOVEXATTR FOP- {0:d}: gfid={1:s}; "+
+ "path={2:s}; name={3:s}").format(unique, gfid,
+ loc.contents.path,
+ name)
+ self.gfids[key] = gfid
+ dl.wind_removexattr(frame, POINTER(xlator_t)(), loc, name,
+ xdata)
+ return 0
+
+ def removexattr_cbk(self, frame, cookie, this, op_ret, op_errno,
+ xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ print("GLUPY TRACE REMOVEXATTR CBK- {0:d}: gfid={1:s} "+
+ " op_ret={2:d}; op_errno={3:d}").format(unique, gfid,
+ op_ret, op_errno)
+ del self.gfids[key]
+ dl.unwind_removexattr(frame, cookie, this, op_ret, op_errno,
+ xdata)
+ return 0
+
+ def link_fop(self, frame, this, oldloc, newloc, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ if (newloc.contents.inode):
+ newgfid = uuid2str(newloc.contents.inode.contents.gfid)
+ else:
+ newgfid = "0"
+ oldgfid = uuid2str(oldloc.contents.inode.contents.gfid)
+ print("GLUPY TRACE LINK FOP-{0:d}: oldgfid={1:s}; oldpath={2:s};"+
+ "newgfid={3:s};"+
+ "newpath={4:s}").format(unique, oldgfid,
+ oldloc.contents.path,
+ newgfid,
+ newloc.contents.path)
+ self.gfids[key] = oldgfid
+ dl.wind_link(frame, POINTER(xlator_t)(), oldloc, newloc,
+ xdata)
+ return 0
+
+ def link_cbk(self, frame, cookie, this, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ if op_ret == 0:
+ statstr = trace_stat2str(buf)
+ preparentstr = trace_stat2str(preparent)
+ postparentstr = trace_stat2str(postparent)
+ print("GLUPY TRACE LINK CBK- {0:d}: op_ret={1:d} "+
+ "*stbuf={2:s}; *prebuf={3:s}; "+
+ "*postbuf={4:s} ").format(unique, op_ret, statstr,
+ preparentstr,
+ postparentstr)
+ else:
+ print("GLUPY TRACE LINK CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; "+
+ "op_errno={3:d}").format(unique, gfid,
+ op_ret, op_errno)
+ del self.gfids[key]
+ dl.unwind_link(frame, cookie, this, op_ret, op_errno, inode,
+ buf, preparent, postparent, xdata)
+ return 0
+
+ def unlink_fop(self, frame, this, loc, xflag, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE UNLINK FOP- {0:d}; gfid={1:s}; path={2:s}; "+
+ "flag={3:d}").format(unique, gfid, loc.contents.path,
+ xflag)
+ self.gfids[key] = gfid
+ dl.wind_unlink(frame, POINTER(xlator_t)(), loc, xflag,
+ xdata)
+ return 0
+
+ def unlink_cbk(self, frame, cookie, this, op_ret, op_errno,
+ preparent, postparent, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ if op_ret == 0:
+ preparentstr = trace_stat2str(preparent)
+ postparentstr = trace_stat2str(postparent)
+ print("GLUPY TRACE UNLINK CBK- {0:d}: gfid ={1:s}; "+
+ "op_ret={2:d}; *prebuf={3:s}; "+
+ "*postbuf={4:s} ").format(unique, gfid, op_ret,
+ preparentstr,
+ postparentstr)
+ else:
+ print("GLUPY TRACE UNLINK CBK: {0:d}: gfid ={1:s}; "+
+ "op_ret={2:d}; "+
+ "op_errno={3:d}").format(unique, gfid, op_ret,
+ op_errno)
+ del self.gfids[key]
+ dl.unwind_unlink(frame, cookie, this, op_ret, op_errno,
+ preparent, postparent, xdata)
+ return 0
+
+ def readlink_fop(self, frame, this, loc, size, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE READLINK FOP- {0:d}: gfid={1:s}; path={2:s};"+
+ " size={3:d}").format(unique, gfid, loc.contents.path,
+ size)
+ self.gfids[key] = gfid
+ dl.wind_readlink(frame, POINTER(xlator_t)(), loc, size,
+ xdata)
+ return 0
+
+ def readlink_cbk(self, frame, cookie, this, op_ret, op_errno,
+ buf, stbuf, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ if op_ret == 0:
+ statstr = trace_stat2str(stbuf)
+ print("GLUPY TRACE READLINK CBK- {0:d}: gfid={1:s} "+
+ " op_ret={2:d}; op_errno={3:d}; *prebuf={4:s}; "+
+ "*postbuf={5:s} ").format(unique, gfid,
+ op_ret, op_errno,
+ buf, statstr)
+ else:
+ print("GLUPY TRACE READLINK CBK- {0:d}: gfid={1:s} "+
+ " op_ret={2:d}; op_errno={3:d}").format(unique,
+ gfid,
+ op_ret,
+ op_errno)
+ del self.gfids[key]
+ dl.unwind_readlink(frame, cookie, this, op_ret, op_errno, buf,
+ stbuf, xdata)
+ return 0
+
+ def symlink_fop(self, frame, this, linkpath, loc, umask, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE SYMLINK FOP- {0:d}: gfid={1:s}; "+
+ "linkpath={2:s}; path={3:s};"+
+ "umask=0{4:o}").format(unique, gfid, linkpath,
+ loc.contents.path, umask)
+ self.gfids[key] = gfid
+ dl.wind_symlink(frame, POINTER(xlator_t)(), linkpath, loc,
+ umask, xdata)
+ return 0
+
+ def symlink_cbk(self, frame, cookie, this, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ if op_ret == 0:
+ statstr = trace_stat2str(buf)
+ preparentstr = trace_stat2str(preparent)
+ postparentstr = trace_stat2str(postparent)
+ print("GLUPY TRACE SYMLINK CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; *stbuf={3:s}; *preparent={4:s}; "+
+ "*postparent={5:s}").format(unique, gfid,
+ op_ret, statstr,
+ preparentstr,
+ postparentstr)
+ else:
+ print("GLUPY TRACE SYMLINK CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; op_errno={3:d}").format(unique,
+ gfid,
+ op_ret,
+ op_errno)
+ del self.gfids[key]
+ dl.unwind_symlink(frame, cookie, this, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata)
+ return 0
diff --git a/xlators/features/glupy/examples/helloworld.py b/xlators/features/glupy/examples/helloworld.py
new file mode 100644
index 00000000000..b565a4e5bc3
--- /dev/null
+++ b/xlators/features/glupy/examples/helloworld.py
@@ -0,0 +1,19 @@
+import sys
+from gluster.glupy import *
+
+class xlator (Translator):
+
+ def __init__(self, c_this):
+ Translator.__init__(self, c_this)
+
+ def lookup_fop(self, frame, this, loc, xdata):
+ print "Python xlator: Hello!"
+ dl.wind_lookup(frame, POINTER(xlator_t)(), loc, xdata)
+ return 0
+
+ def lookup_cbk(self, frame, cookie, this, op_ret, op_errno, inode, buf,
+ xdata, postparent):
+ print "Python xlator: Hello again!"
+ dl.unwind_lookup(frame, cookie, this, op_ret, op_errno, inode, buf,
+ xdata, postparent)
+ return 0
diff --git a/xlators/features/glupy/examples/negative.py b/xlators/features/glupy/examples/negative.py
new file mode 100644
index 00000000000..e7a4fc07ced
--- /dev/null
+++ b/xlators/features/glupy/examples/negative.py
@@ -0,0 +1,91 @@
+import sys
+from uuid import UUID
+from gluster.glupy import *
+
+# Negative-lookup-caching example. If a file wasn't there the last time we
+# looked, it's probably still not there. This translator keeps track of
+# those failed lookups for us, and returns ENOENT without needing to pass the
+# call any further for repeated requests.
+
+# If we were doing this for real, we'd need separate caches for each xlator
+# instance. The easiest way to do this would be to have xlator.__init__
+# "register" each instance in a module-global dict, with the key as the C
+# translator address and the value as the xlator object itself. For testing
+# and teaching, it's sufficient just to have one cache. The keys are parent
+# GFIDs, and the entries are lists of names within that parent that we know
+# don't exist.
+cache = {}
+
+# TBD: we need a better way of handling per-request data (frame->local in C).
+dl.get_id.restype = c_long
+dl.get_id.argtypes = [ POINTER(call_frame_t) ]
+
+def uuid2str (gfid):
+ return str(UUID(''.join(map("{0:02x}".format, gfid))))
+
+class xlator (Translator):
+
+ def __init__ (self, c_this):
+ self.requests = {}
+ Translator.__init__(self,c_this)
+
+ def lookup_fop (self, frame, this, loc, xdata):
+ pargfid = uuid2str(loc.contents.pargfid)
+ print "lookup FOP: %s:%s" % (pargfid, loc.contents.name)
+ # Check the cache.
+ if cache.has_key(pargfid):
+ if loc.contents.name in cache[pargfid]:
+ print "short-circuiting for %s:%s" % (pargfid,
+ loc.contents.name)
+ dl.unwind_lookup(frame,0,this,-1,2,None,None,None,None)
+ return 0
+ key = dl.get_id(frame)
+ self.requests[key] = (pargfid, loc.contents.name[:])
+ # TBD: get real child xl from init, pass it here
+ dl.wind_lookup(frame,POINTER(xlator_t)(),loc,xdata)
+ return 0
+
+ def lookup_cbk (self, frame, cookie, this, op_ret, op_errno, inode, buf,
+ xdata, postparent):
+ print "lookup CBK: %d (%d)" % (op_ret, op_errno)
+ key = dl.get_id(frame)
+ pargfid, name = self.requests[key]
+ # Update the cache.
+ if op_ret == 0:
+ print "found %s, removing from cache" % name
+ if cache.has_key(pargfid):
+ cache[pargfid].discard(name)
+ elif op_errno == 2: # ENOENT
+ print "failed to find %s, adding to cache" % name
+ if cache.has_key(pargfid):
+ cache[pargfid].add(name)
+ else:
+ cache[pargfid] = set([name])
+ del self.requests[key]
+ dl.unwind_lookup(frame,cookie,this,op_ret,op_errno,
+ inode,buf,xdata,postparent)
+ return 0
+
+ def create_fop (self, frame, this, loc, flags, mode, umask, fd, xdata):
+ pargfid = uuid2str(loc.contents.pargfid)
+ print "create FOP: %s:%s" % (pargfid, loc.contents.name)
+ key = dl.get_id(frame)
+ self.requests[key] = (pargfid, loc.contents.name[:])
+ # TBD: get real child xl from init, pass it here
+ dl.wind_create(frame,POINTER(xlator_t)(),loc,flags,mode,umask,fd,xdata)
+ return 0
+
+ def create_cbk (self, frame, cookie, this, op_ret, op_errno, fd, inode,
+ buf, preparent, postparent, xdata):
+ print "create CBK: %d (%d)" % (op_ret, op_errno)
+ key = dl.get_id(frame)
+ pargfid, name = self.requests[key]
+ # Update the cache.
+ if op_ret == 0:
+ print "created %s, removing from cache" % name
+ if cache.has_key(pargfid):
+ cache[pargfid].discard(name)
+ del self.requests[key]
+ dl.unwind_create(frame,cookie,this,op_ret,op_errno,fd,inode,buf,
+ preparent,postparent,xdata)
+ return 0
diff --git a/xlators/features/glupy/src/Makefile.am b/xlators/features/glupy/src/Makefile.am
new file mode 100644
index 00000000000..9b39b4687a3
--- /dev/null
+++ b/xlators/features/glupy/src/Makefile.am
@@ -0,0 +1,29 @@
+xlator_LTLIBRARIES = glupy.la
+
+# Ensure GLUSTER_PYTHON_PATH is passed to glupy.so
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+glupydir = $(xlatordir)/glupy
+AM_CPPFLAGS = $(PYTHONDEV_CPPFLAGS) $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -isystem $(BUILD_PYTHON_INC)
+AM_CFLAGS = $(PYTHONDEV_CPPFLAGS) -Wall -fno-strict-aliasing \
+ -DGLUSTER_PYTHON_PATH=\"$(glupydir)\" \
+ -DPATH_GLUSTERFS_GLUPY_MODULE=\"${xlatordir}/glupy${shrext_cmds}\" \
+ $(GF_CFLAGS)
+
+# Flags to build glupy.so with
+glupy_la_LDFLAGS = $(PYTHONDEV_LDFLAGS) -module -avoid-version -nostartfiles -export-symbols $(top_srcdir)/xlators/features/glupy/src/glupy.sym
+glupy_la_SOURCES = glupy.c
+glupy_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ -lpthread -l$(BUILD_PYTHON_LIB)
+
+noinst_HEADERS = glupy.h
+
+# Install __init__.py into the Python site-packages area
+pyglupydir = @BUILD_PYTHON_SITE_PACKAGES@/gluster
+pyglupy_PYTHON = __init__.py
+
+# Install glupy/__init_-.py into the Python site-packages area
+SUBDIRS = glupy
+
+CLEANFILES =
+
+EXTRA_DIST = glupy.sym
diff --git a/xlators/features/glupy/src/__init__.py.in b/xlators/features/glupy/src/__init__.py.in
new file mode 100644
index 00000000000..3ad9513f40e
--- /dev/null
+++ b/xlators/features/glupy/src/__init__.py.in
@@ -0,0 +1,2 @@
+from pkgutil import extend_path
+__path__ = extend_path(__path__, __name__)
diff --git a/xlators/features/glupy/src/glupy.c b/xlators/features/glupy/src/glupy.c
new file mode 100644
index 00000000000..bca476427c8
--- /dev/null
+++ b/xlators/features/glupy/src/glupy.c
@@ -0,0 +1,2496 @@
+/*
+ Copyright (c) 2006-2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <ctype.h>
+#include <sys/uio.h>
+#include <Python.h>
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "logging.h"
+#include "defaults.h"
+
+#include "glupy.h"
+
+/* UTILITY FUNCTIONS FOR FOP-SPECIFIC CODE */
+
+pthread_key_t gil_init_key;
+
+PyGILState_STATE
+glupy_enter (void)
+{
+ if (!pthread_getspecific(gil_init_key)) {
+ PyEval_ReleaseLock();
+ (void)pthread_setspecific(gil_init_key,(void *)1);
+ }
+
+ return PyGILState_Ensure();
+}
+
+void
+glupy_leave (PyGILState_STATE gstate)
+{
+ PyGILState_Release(gstate);
+}
+
+/* FOP: LOOKUP */
+
+int32_t
+glupy_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_LOOKUP]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_lookup_cbk_t)(priv->cbks[GLUPY_LOOKUP]))(
+ frame, cookie, this, op_ret, op_errno,
+ inode, buf, xdata, postparent);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf,
+ xdata, postparent);
+ return 0;
+}
+
+int32_t
+glupy_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_LOOKUP]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_lookup_t)(priv->fops[GLUPY_LOOKUP]))(
+ frame, this, loc, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ return 0;
+}
+
+void
+wind_lookup (call_frame_t *frame, xlator_t *xl, loc_t *loc, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND(frame,glupy_lookup_cbk,xl,xl->fops->lookup,loc,xdata);
+}
+
+void
+unwind_lookup (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(lookup,frame,op_ret,op_errno,
+ inode,buf,xdata,postparent);
+}
+
+void
+set_lookup_fop (long py_this, fop_lookup_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_LOOKUP] = (long)fop;
+}
+
+void
+set_lookup_cbk (long py_this, fop_lookup_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_LOOKUP] = (long)cbk;
+}
+
+/* FOP: CREATE */
+
+int32_t
+glupy_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_CREATE]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_create_cbk_t)(priv->cbks[GLUPY_CREATE]))(
+ frame, cookie, this, op_ret, op_errno,
+ fd, inode, buf, preparent, postparent, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+glupy_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_CREATE]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_create_t)(priv->fops[GLUPY_CREATE]))(
+ frame, this, loc, flags, mode, umask, fd, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_create_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask,
+ fd, xdata);
+ return 0;
+}
+
+void
+wind_create (call_frame_t *frame, xlator_t *xl, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_create_cbk,xl, xl->fops->create,
+ loc, flags, mode, umask, fd, xdata);
+}
+
+void
+unwind_create (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+}
+
+void
+set_create_fop (long py_this, fop_create_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_CREATE] = (long)fop;
+}
+
+void
+set_create_cbk (long py_this, fop_create_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_CREATE] = (long)cbk;
+}
+
+/* FOP: OPEN */
+
+int32_t
+glupy_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_OPEN]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_open_cbk_t)(priv->cbks[GLUPY_OPEN]))(
+ frame, cookie, this, op_ret, op_errno,
+ fd, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+int32_t
+glupy_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_OPEN]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_open_t)(priv->fops[GLUPY_OPEN]))(
+ frame, this, loc, flags, fd, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
+}
+
+void
+wind_open (call_frame_t *frame, xlator_t *xl, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_open_cbk, xl, xl->fops->open, loc, flags,
+ fd, xdata);
+}
+
+void
+unwind_open (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
+}
+
+void
+set_open_fop (long py_this, fop_open_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+ priv->fops[GLUPY_OPEN] = (long)fop;
+}
+
+void
+set_open_cbk (long py_this, fop_open_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+ priv->cbks[GLUPY_OPEN] = (long)cbk;
+}
+
+/* FOP: READV */
+
+int32_t
+glupy_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_READV]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_readv_cbk_t)(priv->cbks[GLUPY_READV]))(
+ frame, cookie, this, op_ret, op_errno,
+ vector, count, stbuf, iobref, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector,
+ count, stbuf, iobref, xdata);
+ return 0;
+}
+
+int32_t
+glupy_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t offset, uint32_t flags, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_READV]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_readv_t)(priv->fops[GLUPY_READV]))(
+ frame, this, fd, size, offset, flags, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset,
+ flags, xdata);
+ return 0;
+}
+
+void
+wind_readv (call_frame_t *frame, xlator_t *xl, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_readv_cbk, xl, xl->fops->readv, fd, size,
+ offset, flags, xdata);
+}
+
+void
+unwind_readv (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector,
+ count, stbuf, iobref, xdata);
+}
+
+void
+set_readv_fop (long py_this, fop_readv_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+ priv->fops[GLUPY_READV] = (long)fop;
+}
+
+void
+set_readv_cbk (long py_this, fop_readv_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+ priv->cbks[GLUPY_READV] = (long)cbk;
+}
+
+/* FOP: WRITEV */
+
+int32_t
+glupy_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_WRITEV]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_writev_cbk_t)(priv->cbks[GLUPY_WRITEV]))(
+ frame, cookie, this, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+ return 0;
+}
+
+int32_t
+glupy_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_WRITEV]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_writev_t)(priv->fops[GLUPY_WRITEV]))(
+ frame, this, fd, vector, count, offset, flags,
+ iobref, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count,
+ offset, flags, iobref, xdata);
+ return 0;
+}
+
+void
+wind_writev (call_frame_t *frame, xlator_t *xl, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_writev_cbk, xl, xl->fops->writev, fd, vector,
+ count, offset, flags, iobref, xdata);
+}
+
+void
+unwind_writev (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+}
+
+void
+set_writev_fop (long py_this, fop_writev_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+ priv->fops[GLUPY_WRITEV] = (long)fop;
+}
+
+void
+set_writev_cbk (long py_this, fop_writev_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+ priv->cbks[GLUPY_WRITEV] = (long)cbk;
+}
+
+
+/* FOP: OPENDIR */
+
+int32_t
+glupy_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_OPENDIR]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_opendir_cbk_t)(priv->cbks[GLUPY_OPENDIR]))(
+ frame, cookie, this, op_ret, op_errno,
+ fd, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+int32_t
+glupy_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ fd_t *fd, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_OPENDIR]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_opendir_t)(priv->fops[GLUPY_OPENDIR]))(
+ frame, this, loc, fd, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_opendir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);
+ return 0;
+}
+
+void
+wind_opendir (call_frame_t *frame, xlator_t *xl, loc_t *loc, fd_t *fd, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND(frame,glupy_opendir_cbk,xl,xl->fops->opendir,loc,fd,xdata);
+}
+
+void
+unwind_opendir (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(opendir,frame,op_ret,op_errno,
+ fd,xdata);
+}
+
+void
+set_opendir_fop (long py_this, fop_opendir_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_OPENDIR] = (long)fop;
+}
+
+void
+set_opendir_cbk (long py_this, fop_opendir_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_OPENDIR] = (long)cbk;
+}
+
+/* FOP: READDIR */
+
+int32_t
+glupy_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_READDIR]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_readdir_cbk_t)(priv->cbks[GLUPY_READDIR]))(
+ frame, cookie, this, op_ret, op_errno,
+ entries, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, entries,
+ xdata);
+ return 0;
+}
+
+int32_t
+glupy_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t offset, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_READDIR]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_readdir_t)(priv->fops[GLUPY_READDIR]))(
+ frame, this, fd, size, offset, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_readdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdir,fd, size, offset, xdata);
+ return 0;
+}
+
+void
+wind_readdir(call_frame_t *frame, xlator_t *xl, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND(frame,glupy_readdir_cbk,xl,xl->fops->readdir,fd,size,offset,xdata);
+}
+
+void
+unwind_readdir (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(readdir,frame,op_ret,op_errno,
+ entries, xdata);
+}
+
+void
+set_readdir_fop (long py_this, fop_readdir_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_READDIR] = (long)fop;
+}
+
+void
+set_readdir_cbk (long py_this, fop_readdir_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_READDIR] = (long)cbk;
+}
+
+
+/* FOP: READDIRP */
+
+int32_t
+glupy_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_READDIRP]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_readdirp_cbk_t)(priv->cbks[GLUPY_READDIRP]))(
+ frame, cookie, this, op_ret, op_errno,
+ entries, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries,
+ xdata);
+ return 0;
+}
+
+int32_t
+glupy_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t offset, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_READDIRP]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_readdirp_t)(priv->fops[GLUPY_READDIRP]))(
+ frame, this, fd, size, offset, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp,fd, size, offset, xdata);
+ return 0;
+}
+
+void
+wind_readdirp (call_frame_t *frame, xlator_t *xl, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND(frame,glupy_readdirp_cbk,xl,xl->fops->readdirp,fd,size,offset,xdata);
+}
+
+void
+unwind_readdirp (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(readdirp,frame,op_ret,op_errno,
+ entries, xdata);
+}
+
+void
+set_readdirp_fop (long py_this, fop_readdirp_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_READDIRP] = (long)fop;
+}
+
+void
+set_readdirp_cbk (long py_this, fop_readdirp_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_READDIRP] = (long)cbk;
+}
+
+
+/* FOP:STAT */
+
+int32_t
+glupy_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_STAT]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_stat_cbk_t)(priv->cbks[GLUPY_STAT]))(
+ frame, cookie, this, op_ret, op_errno,
+ buf, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int32_t
+glupy_stat (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_STAT]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_stat_t)(priv->fops[GLUPY_STAT]))(
+ frame, this, loc, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
+}
+
+void
+wind_stat (call_frame_t *frame, xlator_t *xl, loc_t *loc, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND(frame,glupy_stat_cbk,xl,xl->fops->stat,loc,xdata);
+}
+
+void
+unwind_stat (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(stat,frame,op_ret,op_errno,
+ buf,xdata);
+}
+
+void
+set_stat_fop (long py_this, fop_stat_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_STAT] = (long)fop;
+}
+
+void
+set_stat_cbk (long py_this, fop_stat_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_STAT] = (long)cbk;
+}
+
+
+/* FOP: FSTAT */
+
+int32_t
+glupy_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_FSTAT]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_fstat_cbk_t)(priv->cbks[GLUPY_FSTAT]))(
+ frame, cookie, this, op_ret, op_errno,
+ buf, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int32_t
+glupy_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_FSTAT]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_fstat_t)(priv->fops[GLUPY_FSTAT]))(
+ frame, this, fd, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_fstat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+ return 0;
+}
+
+void
+wind_fstat (call_frame_t *frame, xlator_t *xl, fd_t *fd, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND(frame,glupy_fstat_cbk,xl,xl->fops->fstat,fd,xdata);
+}
+
+void
+unwind_fstat (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(fstat,frame,op_ret,op_errno,
+ buf,xdata);
+}
+
+void
+set_fstat_fop (long py_this, fop_fstat_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_FSTAT] = (long)fop;
+}
+
+void
+set_fstat_cbk (long py_this, fop_fstat_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_FSTAT] = (long)cbk;
+}
+
+/* FOP:STATFS */
+
+int32_t
+glupy_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_STATFS]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_statfs_cbk_t)(priv->cbks[GLUPY_STATFS]))(
+ frame, cookie, this, op_ret, op_errno,
+ buf, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int32_t
+glupy_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_STATFS]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_statfs_t)(priv->fops[GLUPY_STATFS]))(
+ frame, this, loc, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_statfs_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->statfs, loc, xdata);
+ return 0;
+}
+
+void
+wind_statfs (call_frame_t *frame, xlator_t *xl, loc_t *loc, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND(frame,glupy_statfs_cbk,xl,xl->fops->statfs,loc,xdata);
+}
+
+void
+unwind_statfs (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(statfs,frame,op_ret,op_errno,
+ buf,xdata);
+}
+
+void
+set_statfs_fop (long py_this, fop_statfs_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_STATFS] = (long)fop;
+}
+
+void
+set_statfs_cbk (long py_this, fop_statfs_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_STATFS] = (long)cbk;
+}
+
+
+/* FOP: SETXATTR */
+
+int32_t
+glupy_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_SETXATTR]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_setxattr_cbk_t)(priv->cbks[GLUPY_SETXATTR]))(
+ frame, cookie, this, op_ret, op_errno,
+ xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+glupy_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_SETXATTR]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_setxattr_t)(priv->fops[GLUPY_SETXATTR]))(
+ frame, this, loc, dict, flags, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_setxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, dict,
+ flags, xdata);
+ return 0;
+}
+
+void
+wind_setxattr (call_frame_t *frame, xlator_t *xl, loc_t *loc,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_setxattr_cbk, xl, xl->fops->setxattr,
+ loc, dict, flags, xdata);
+}
+
+
+void
+unwind_setxattr (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
+
+}
+
+void
+set_setxattr_fop (long py_this, fop_setxattr_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_SETXATTR] = (long)fop;
+}
+
+void
+set_setxattr_cbk (long py_this, fop_setxattr_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_SETXATTR] = (long)cbk;
+}
+
+/* FOP: GETXATTR */
+
+int32_t
+glupy_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_GETXATTR]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_getxattr_cbk_t)(priv->cbks[GLUPY_GETXATTR]))(
+ frame, cookie, this, op_ret, op_errno, dict,
+ xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict,
+ xdata);
+ return 0;
+}
+
+int32_t
+glupy_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_GETXATTR]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_getxattr_t)(priv->fops[GLUPY_GETXATTR]))(
+ frame, this, loc, name, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, name,
+ xdata);
+ return 0;
+}
+
+void
+wind_getxattr (call_frame_t *frame, xlator_t *xl, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_getxattr_cbk, xl, xl->fops->getxattr,
+ loc, name, xdata);
+}
+
+
+void
+unwind_getxattr (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict,
+ xdata);
+
+}
+
+
+void
+set_getxattr_fop (long py_this, fop_getxattr_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_GETXATTR] = (long)fop;
+}
+
+
+void
+set_getxattr_cbk (long py_this, fop_getxattr_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_GETXATTR] = (long)cbk;
+}
+
+/* FOP: FSETXATTR */
+
+int32_t
+glupy_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_FSETXATTR]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_fsetxattr_cbk_t)(priv->cbks[GLUPY_FSETXATTR]))(
+ frame, cookie, this, op_ret, op_errno,
+ xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+glupy_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_FSETXATTR]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_fsetxattr_t)(priv->fops[GLUPY_FSETXATTR]))(
+ frame, this, fd, dict, flags, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_fsetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict,
+ flags, xdata);
+ return 0;
+}
+
+void
+wind_fsetxattr (call_frame_t *frame, xlator_t *xl, fd_t *fd,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_fsetxattr_cbk, xl, xl->fops->fsetxattr,
+ fd, dict, flags, xdata);
+}
+
+
+void
+unwind_fsetxattr (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata);
+
+}
+
+void
+set_fsetxattr_fop (long py_this, fop_fsetxattr_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_FSETXATTR] = (long)fop;
+}
+
+void
+set_fsetxattr_cbk (long py_this, fop_fsetxattr_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_FSETXATTR] = (long)cbk;
+}
+
+/* FOP: FGETXATTR */
+
+int32_t
+glupy_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_FGETXATTR]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_fgetxattr_cbk_t)(priv->cbks[GLUPY_FGETXATTR]))(
+ frame, cookie, this, op_ret, op_errno, dict,
+ xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict,
+ xdata);
+ return 0;
+}
+
+int32_t
+glupy_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_FGETXATTR]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_fgetxattr_t)(priv->fops[GLUPY_FGETXATTR]))(
+ frame, this, fd, name, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_fgetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr, fd, name,
+ xdata);
+ return 0;
+}
+
+void
+wind_fgetxattr (call_frame_t *frame, xlator_t *xl, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_fgetxattr_cbk, xl, xl->fops->fgetxattr,
+ fd, name, xdata);
+}
+
+
+void
+unwind_fgetxattr (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict,
+ xdata);
+
+}
+
+
+void
+set_fgetxattr_fop (long py_this, fop_fgetxattr_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_FGETXATTR] = (long)fop;
+}
+
+
+void
+set_fgetxattr_cbk (long py_this, fop_fgetxattr_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_FGETXATTR] = (long)cbk;
+}
+
+/* FOP:REMOVEXATTR */
+
+int32_t
+glupy_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_REMOVEXATTR]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_removexattr_cbk_t)(priv->cbks[GLUPY_REMOVEXATTR]))(
+ frame, cookie, this, op_ret, op_errno, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+glupy_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_REMOVEXATTR]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_removexattr_t)(priv->fops[GLUPY_REMOVEXATTR]))(
+ frame, this, loc, name, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_removexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name,
+ xdata);
+ return 0;
+}
+
+void
+wind_removexattr (call_frame_t *frame, xlator_t *xl, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_removexattr_cbk, xl, xl->fops->removexattr,
+ loc, name, xdata);
+}
+
+
+void
+unwind_removexattr (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata);
+
+}
+
+void
+set_removexattr_fop (long py_this, fop_removexattr_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_REMOVEXATTR] = (long)fop;
+}
+
+void
+set_removexattr_cbk (long py_this, fop_removexattr_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_REMOVEXATTR] = (long)cbk;
+}
+
+
+/* FOP:FREMOVEXATTR */
+
+int32_t
+glupy_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_FREMOVEXATTR]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_fremovexattr_cbk_t)(priv->cbks[GLUPY_FREMOVEXATTR]))(
+ frame, cookie, this, op_ret, op_errno, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+glupy_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_FREMOVEXATTR]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_fremovexattr_t)(priv->fops[GLUPY_FREMOVEXATTR]))(
+ frame, this, fd, name, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_fremovexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, fd, name,
+ xdata);
+ return 0;
+}
+
+void
+wind_fremovexattr (call_frame_t *frame, xlator_t *xl, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_fremovexattr_cbk, xl, xl->fops->fremovexattr,
+ fd, name, xdata);
+}
+
+
+void
+unwind_fremovexattr (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, xdata);
+
+}
+
+void
+set_fremovexattr_fop (long py_this, fop_fremovexattr_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_FREMOVEXATTR] = (long)fop;
+}
+
+void
+set_fremovexattr_cbk (long py_this, fop_fremovexattr_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_FREMOVEXATTR] = (long)cbk;
+}
+
+
+/* FOP: LINK*/
+int32_t
+glupy_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_LINK]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_link_cbk_t)(priv->cbks[GLUPY_LINK]))(
+ frame, cookie, this, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+glupy_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_LINK]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_link_t)(priv->fops[GLUPY_LINK]))(
+ frame, this, oldloc, newloc, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_link_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, oldloc, newloc,
+ xdata);
+ return 0;
+}
+
+void
+wind_link (call_frame_t *frame, xlator_t *xl, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_link_cbk, xl, xl->fops->link,
+ oldloc, newloc, xdata);
+}
+
+void
+unwind_link (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+}
+
+void
+set_link_fop (long py_this, fop_link_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_LINK] = (long)fop;
+}
+
+void
+set_link_cbk (long py_this, fop_link_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_LINK] = (long)cbk;
+}
+
+/* FOP: SYMLINK*/
+int32_t
+glupy_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_SYMLINK]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_symlink_cbk_t)(priv->cbks[GLUPY_SYMLINK]))(
+ frame, cookie, this, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+glupy_symlink(call_frame_t *frame, xlator_t *this, const char *linkname,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_SYMLINK]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_symlink_t)(priv->fops[GLUPY_SYMLINK]))(
+ frame, this, linkname, loc, umask, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_symlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink, linkname, loc,
+ umask, xdata);
+ return 0;
+}
+
+void
+wind_symlink (call_frame_t *frame, xlator_t *xl, const char *linkname,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_symlink_cbk, xl, xl->fops->symlink,
+ linkname, loc, umask, xdata);
+}
+
+void
+unwind_symlink (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+}
+
+void
+set_symlink_fop (long py_this, fop_symlink_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_SYMLINK] = (long)fop;
+}
+
+void
+set_symlink_cbk (long py_this, fop_symlink_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_SYMLINK] = (long)cbk;
+}
+
+
+/* FOP: READLINK */
+int32_t
+glupy_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *path,
+ struct iatt *buf, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_READLINK]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_readlink_cbk_t)(priv->cbks[GLUPY_READLINK]))(
+ frame, cookie, this, op_ret, op_errno,
+ path, buf, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path,
+ buf, xdata);
+ return 0;
+}
+
+int32_t
+glupy_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ size_t size, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_READLINK]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_readlink_t)(priv->fops[GLUPY_READLINK]))(
+ frame, this, loc, size, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_readlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readlink, loc,
+ size, xdata);
+ return 0;
+}
+
+void
+wind_readlink (call_frame_t *frame, xlator_t *xl, loc_t *loc,
+ size_t size, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_readlink_cbk, xl, xl->fops->readlink,
+ loc, size, xdata);
+}
+
+void
+unwind_readlink (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *path,
+ struct iatt *buf, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path, buf,
+ xdata);
+}
+
+void
+set_readlink_fop (long py_this, fop_readlink_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_READLINK] = (long)fop;
+}
+
+void
+set_readlink_cbk (long py_this, fop_readlink_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_READLINK] = (long)cbk;
+}
+
+
+/* FOP: UNLINK */
+
+int32_t
+glupy_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_UNLINK]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_unlink_cbk_t)(priv->cbks[GLUPY_UNLINK]))(
+ frame, cookie, this, op_ret, op_errno,
+ preparent, postparent, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int32_t
+glupy_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int xflags, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_UNLINK]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_unlink_t)(priv->fops[GLUPY_UNLINK]))(
+ frame, this, loc, xflags, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc,
+ xflags, xdata);
+ return 0;
+}
+
+void
+wind_unlink (call_frame_t *frame, xlator_t *xl, loc_t *loc,
+ int xflags, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_unlink_cbk, xl, xl->fops->unlink,
+ loc, xflags, xdata);
+}
+
+void
+unwind_unlink (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
+}
+
+void
+set_unlink_fop (long py_this, fop_unlink_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_UNLINK] = (long)fop;
+}
+
+void
+set_unlink_cbk (long py_this, fop_unlink_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_UNLINK] = (long)cbk;
+}
+
+
+/* FOP: MKDIR */
+
+int32_t
+glupy_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_MKDIR]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_mkdir_cbk_t)(priv->cbks[GLUPY_MKDIR]))(
+ frame, cookie, this, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+glupy_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_MKDIR]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_mkdir_t)(priv->fops[GLUPY_MKDIR]))(
+ frame, this, loc, mode, umask, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_mkdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, loc, mode, umask,
+ xdata);
+ return 0;
+}
+
+void
+wind_mkdir (call_frame_t *frame, xlator_t *xl, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_mkdir_cbk, xl, xl->fops->mkdir,
+ loc, mode, umask, xdata);
+}
+
+void
+unwind_mkdir (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+}
+
+void
+set_mkdir_fop (long py_this, fop_mkdir_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_MKDIR] = (long)fop;
+}
+
+void
+set_mkdir_cbk (long py_this, fop_mkdir_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_MKDIR] = (long)cbk;
+}
+
+
+/* FOP: RMDIR */
+
+int32_t
+glupy_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_RMDIR]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_rmdir_cbk_t)(priv->cbks[GLUPY_RMDIR]))(
+ frame, cookie, this, op_ret, op_errno,
+ preparent, postparent, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int32_t
+glupy_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int xflags, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_RMDIR]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_rmdir_t)(priv->fops[GLUPY_RMDIR]))(
+ frame, this, loc, xflags, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_rmdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir, loc,
+ xflags, xdata);
+ return 0;
+}
+
+void
+wind_rmdir (call_frame_t *frame, xlator_t *xl, loc_t *loc,
+ int xflags, dict_t *xdata)
+{
+
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_rmdir_cbk, xl, xl->fops->rmdir,
+ loc, xflags, xdata);
+}
+
+void
+unwind_rmdir (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
+}
+
+void
+set_rmdir_fop (long py_this, fop_rmdir_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_RMDIR] = (long)fop;
+}
+
+void
+set_rmdir_cbk (long py_this, fop_rmdir_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_RMDIR] = (long)cbk;
+}
+
+
+/* NON-FOP-SPECIFIC CODE */
+
+
+long
+get_id (call_frame_t *frame)
+{
+ return (long)(frame->local);
+}
+
+uint64_t
+get_rootunique (call_frame_t *frame)
+{
+ return frame->root->unique;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_glupy_mt_end);
+
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR, "Memory accounting init"
+ " failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+int32_t
+init (xlator_t *this)
+{
+ glupy_private_t *priv = NULL;
+ char *module_name = NULL;
+ PyObject *py_mod_name = NULL;
+ PyObject *py_init_func = NULL;
+ PyObject *py_args = NULL;
+ PyObject *syspath = NULL;
+ PyObject *path = NULL;
+ PyObject *error_type = NULL;
+ PyObject *error_msg = NULL;
+ PyObject *error_bt = NULL;
+ static gf_boolean_t py_inited = _gf_false;
+ void * err_cleanup = &&err_return;
+
+ if (dict_get_str(this->options,"module-name",&module_name) != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "missing module-name");
+ return -1;
+ }
+
+ priv = GF_CALLOC (1, sizeof (glupy_private_t), gf_glupy_mt_priv);
+ if (!priv) {
+ goto *err_cleanup;
+ }
+ this->private = priv;
+ err_cleanup = &&err_free_priv;
+
+ if (!py_inited) {
+ /*
+ * This must be done before Py_Initialize(),
+ * because it will duplicate the environment,
+ * and fail to see later environment updates.
+ */
+ setenv("PATH_GLUSTERFS_GLUPY_MODULE",
+ PATH_GLUSTERFS_GLUPY_MODULE, 1);
+
+ Py_Initialize();
+ PyEval_InitThreads();
+
+ (void)pthread_key_create(&gil_init_key,NULL);
+ (void)pthread_setspecific(gil_init_key,(void *)1);
+
+ /* PyEval_InitThreads takes this "for" us. No thanks. */
+ PyEval_ReleaseLock();
+ py_inited = _gf_true;
+ }
+
+ /* Adjust python's path */
+ syspath = PySys_GetObject("path");
+ path = PyString_FromString(GLUSTER_PYTHON_PATH);
+ PyList_Append(syspath, path);
+ Py_DECREF(path);
+
+ py_mod_name = PyString_FromString(module_name);
+ if (!py_mod_name) {
+ gf_log (this->name, GF_LOG_ERROR, "could not create name");
+ if (PyErr_Occurred()) {
+ PyErr_Fetch (&error_type, &error_msg, &error_bt);
+ gf_log (this->name, GF_LOG_ERROR, "Python error: %s",
+ PyString_AsString(error_msg));
+ }
+ goto *err_cleanup;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "py_mod_name = %s", module_name);
+ priv->py_module = PyImport_Import(py_mod_name);
+ Py_DECREF(py_mod_name);
+ if (!priv->py_module) {
+ gf_log (this->name, GF_LOG_ERROR, "Python import of %s failed",
+ module_name);
+ if (PyErr_Occurred()) {
+ PyErr_Fetch (&error_type, &error_msg, &error_bt);
+ gf_log (this->name, GF_LOG_ERROR, "Python error: %s",
+ PyString_AsString(error_msg));
+ }
+ goto *err_cleanup;
+ }
+ gf_log (this->name, GF_LOG_INFO, "Import of %s succeeded", module_name);
+ err_cleanup = &&err_deref_module;
+
+ py_init_func = PyObject_GetAttrString(priv->py_module, "xlator");
+ if (!py_init_func || !PyCallable_Check(py_init_func)) {
+ gf_log (this->name, GF_LOG_ERROR, "missing init func");
+ if (PyErr_Occurred()) {
+ PyErr_Fetch (&error_type, &error_msg, &error_bt);
+ gf_log (this->name, GF_LOG_ERROR, "Python error: %s",
+ PyString_AsString(error_msg));
+ }
+ goto *err_cleanup;
+ }
+ err_cleanup = &&err_deref_init;
+
+ py_args = PyTuple_New(1);
+ if (!py_args) {
+ gf_log (this->name, GF_LOG_ERROR, "could not create args");
+ if (PyErr_Occurred()) {
+ PyErr_Fetch (&error_type, &error_msg, &error_bt);
+ gf_log (this->name, GF_LOG_ERROR, "Python error: %s",
+ PyString_AsString(error_msg));
+ }
+ goto *err_cleanup;
+ }
+ PyTuple_SetItem(py_args,0,PyLong_FromLong((long)this));
+
+ /* TBD: pass in list of children */
+ priv->py_xlator = PyObject_CallObject(py_init_func, py_args);
+ Py_DECREF(py_args);
+ if (!priv->py_xlator) {
+ gf_log (this->name, GF_LOG_ERROR, "Python init failed");
+ if (PyErr_Occurred()) {
+ PyErr_Fetch (&error_type, &error_msg, &error_bt);
+ gf_log (this->name, GF_LOG_ERROR, "Python error: %s",
+ PyString_AsString(error_msg));
+ }
+ goto *err_cleanup;
+ }
+ gf_log (this->name, GF_LOG_DEBUG, "init returned %p", priv->py_xlator);
+
+ return 0;
+
+err_deref_init:
+ Py_DECREF(py_init_func);
+err_deref_module:
+ Py_DECREF(priv->py_module);
+err_free_priv:
+ GF_FREE(priv);
+err_return:
+ return -1;
+}
+
+void
+fini (xlator_t *this)
+{
+ glupy_private_t *priv = this->private;
+
+ if (!priv)
+ return;
+ Py_DECREF(priv->py_xlator);
+ Py_DECREF(priv->py_module);
+ this->private = NULL;
+ GF_FREE (priv);
+
+ return;
+}
+
+struct xlator_fops fops = {
+ .lookup = glupy_lookup,
+ .create = glupy_create,
+ .open = glupy_open,
+ .readv = glupy_readv,
+ .writev = glupy_writev,
+ .opendir = glupy_opendir,
+ .readdir = glupy_readdir,
+ .stat = glupy_stat,
+ .fstat = glupy_fstat,
+ .setxattr = glupy_setxattr,
+ .getxattr = glupy_getxattr,
+ .fsetxattr = glupy_fsetxattr,
+ .fgetxattr = glupy_fgetxattr,
+ .removexattr = glupy_removexattr,
+ .fremovexattr = glupy_fremovexattr,
+ .link = glupy_link,
+ .unlink = glupy_unlink,
+ .readlink = glupy_readlink,
+ .symlink = glupy_symlink,
+ .mkdir = glupy_mkdir,
+ .rmdir = glupy_rmdir,
+ .statfs = glupy_statfs,
+ .readdirp = glupy_readdirp
+};
+
+struct xlator_cbks cbks = {
+};
+
+struct volume_options options[] = {
+ { .key = {NULL} },
+};
diff --git a/xlators/features/glupy/src/glupy.h b/xlators/features/glupy/src/glupy.h
new file mode 100644
index 00000000000..1488c55c331
--- /dev/null
+++ b/xlators/features/glupy/src/glupy.h
@@ -0,0 +1,56 @@
+/*
+ Copyright (c) 2006-2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __GLUPY_H__
+#define __GLUPY_H__
+
+#include "mem-types.h"
+
+enum {
+ GLUPY_LOOKUP = 0,
+ GLUPY_CREATE,
+ GLUPY_OPEN,
+ GLUPY_READV,
+ GLUPY_WRITEV,
+ GLUPY_OPENDIR,
+ GLUPY_READDIR,
+ GLUPY_READDIRP,
+ GLUPY_STAT,
+ GLUPY_FSTAT,
+ GLUPY_STATFS,
+ GLUPY_SETXATTR,
+ GLUPY_GETXATTR,
+ GLUPY_FSETXATTR,
+ GLUPY_FGETXATTR,
+ GLUPY_REMOVEXATTR,
+ GLUPY_FREMOVEXATTR,
+ GLUPY_LINK,
+ GLUPY_UNLINK,
+ GLUPY_READLINK,
+ GLUPY_SYMLINK,
+ GLUPY_MKNOD,
+ GLUPY_MKDIR,
+ GLUPY_RMDIR,
+ GLUPY_N_FUNCS
+};
+
+typedef struct {
+ PyObject *py_module;
+ PyObject *py_xlator;
+ long fops[GLUPY_N_FUNCS];
+ long cbks[GLUPY_N_FUNCS];
+} glupy_private_t;
+
+enum gf_glupy_mem_types_ {
+ gf_glupy_mt_priv = gf_common_mt_end + 1,
+ gf_glupy_mt_end
+};
+
+#endif /* __GLUPY_H__ */
diff --git a/xlators/features/glupy/src/glupy.sym b/xlators/features/glupy/src/glupy.sym
new file mode 100644
index 00000000000..55d9a300108
--- /dev/null
+++ b/xlators/features/glupy/src/glupy.sym
@@ -0,0 +1,101 @@
+init
+fini
+fops
+cbks
+options
+notify
+mem_acct_init
+reconfigure
+dumpops
+set_lookup_fop
+set_lookup_cbk
+set_create_fop
+set_create_cbk
+set_open_fop
+set_open_cbk
+set_readv_fop
+set_readv_cbk
+set_writev_fop
+set_writev_cbk
+set_opendir_fop
+set_opendir_cbk
+set_readdir_fop
+set_readdir_cbk
+set_readdirp_fop
+set_readdirp_cbk
+set_stat_fop
+set_stat_cbk
+set_fstat_fop
+set_fstat_cbk
+set_statfs_fop
+set_statfs_cbk
+set_setxattr_fop
+set_setxattr_cbk
+set_getxattr_fop
+set_getxattr_cbk
+set_fsetxattr_fop
+set_fsetxattr_cbk
+set_fgetxattr_fop
+set_fgetxattr_cbk
+set_removexattr_fop
+set_removexattr_cbk
+set_fremovexattr_fop
+set_fremovexattr_cbk
+set_link_fop
+set_link_cbk
+set_symlink_fop
+set_symlink_cbk
+set_readlink_fop
+set_readlink_cbk
+set_unlink_fop
+set_unlink_cbk
+set_mkdir_fop
+set_mkdir_cbk
+set_rmdir_fop
+set_rmdir_cbk
+wind_lookup
+wind_create
+wind_open
+wind_readv
+wind_writev
+wind_opendir
+wind_readdir
+wind_readdirp
+wind_stat
+wind_fstat
+wind_statfs
+wind_setxattr
+wind_getxattr
+wind_fsetxattr
+wind_fgetxattr
+wind_removexattr
+wind_fremovexattr
+wind_link
+wind_symlink
+wind_readlink
+wind_unlink
+wind_mkdir
+wind_rmdir
+unwind_lookup
+unwind_create
+unwind_open
+unwind_readv
+unwind_writev
+unwind_opendir
+unwind_readdir
+unwind_readdirp
+unwind_stat
+unwind_fstat
+unwind_statfs
+unwind_setxattr
+unwind_getxattr
+unwind_fsetxattr
+unwind_fgetxattr
+unwind_removexattr
+unwind_fremovexattr
+unwind_link
+unwind_symlink
+unwind_readlink
+unwind_unlink
+unwind_mkdir
+unwind_rmdir
diff --git a/xlators/features/glupy/src/glupy/Makefile.am b/xlators/features/glupy/src/glupy/Makefile.am
new file mode 100644
index 00000000000..573d2da12e1
--- /dev/null
+++ b/xlators/features/glupy/src/glupy/Makefile.am
@@ -0,0 +1,5 @@
+# Install __init__.py into the Python site-packages area
+pyglupydir = @BUILD_PYTHON_SITE_PACKAGES@/gluster/glupy
+pyglupy_PYTHON = __init__.py
+
+CLEANFILES =
diff --git a/xlators/features/glupy/src/glupy/__init__.py b/xlators/features/glupy/src/glupy/__init__.py
new file mode 100644
index 00000000000..b9fc3700fa6
--- /dev/null
+++ b/xlators/features/glupy/src/glupy/__init__.py
@@ -0,0 +1,852 @@
+##
+## Copyright (c) 2006-2014 Red Hat, Inc. <http://www.redhat.com>
+## This file is part of GlusterFS.
+##
+## This file is licensed to you under your choice of the GNU Lesser
+## General Public License, version 3 or any later version (LGPLv3 or
+## later), or the GNU General Public License, version 2 (GPLv2), in all
+## cases as published by the Free Software Foundation.
+##
+
+import sys
+import os
+from ctypes import *
+
+dl = CDLL(os.getenv("PATH_GLUSTERFS_GLUPY_MODULE", ""),RTLD_GLOBAL)
+
+
+class call_frame_t (Structure):
+ pass
+
+class dev_t (Structure):
+ pass
+
+
+class dict_t (Structure):
+ pass
+
+
+class gf_dirent_t (Structure):
+ pass
+
+
+class iobref_t (Structure):
+ pass
+
+
+class iovec_t (Structure):
+ pass
+
+
+class list_head (Structure):
+ pass
+
+list_head._fields_ = [
+ ("next", POINTER(list_head)),
+ ("prev", POINTER(list_head))
+ ]
+
+
+class rwxperm_t (Structure):
+ _fields_ = [
+ ("read", c_uint8, 1),
+ ("write", c_uint8, 1),
+ ("execn", c_uint8, 1)
+ ]
+
+
+class statvfs_t (Structure):
+ pass
+
+
+class xlator_t (Structure):
+ pass
+
+
+class ia_prot_t (Structure):
+ _fields_ = [
+ ("suid", c_uint8, 1),
+ ("sgid", c_uint8, 1),
+ ("sticky", c_uint8, 1),
+ ("owner", rwxperm_t),
+ ("group", rwxperm_t),
+ ("other", rwxperm_t)
+ ]
+
+# For checking file type.
+(IA_INVAL, IA_IFREG, IA_IFDIR, IA_IFLNK, IA_IFBLK, IA_IFCHR, IA_IFIFO,
+ IA_IFSOCK) = xrange(8)
+
+
+class iatt_t (Structure):
+ _fields_ = [
+ ("ia_no", c_uint64),
+ ("ia_gfid", c_ubyte * 16),
+ ("ia_dev", c_uint64),
+ ("ia_type", c_uint),
+ ("ia_prot", ia_prot_t),
+ ("ia_nlink", c_uint32),
+ ("ia_uid", c_uint32),
+ ("ia_gid", c_uint32),
+ ("ia_rdev", c_uint64),
+ ("ia_size", c_uint64),
+ ("ia_blksize", c_uint32),
+ ("ia_blocks", c_uint64),
+ ("ia_atime", c_uint32 ),
+ ("ia_atime_nsec", c_uint32),
+ ("ia_mtime", c_uint32),
+ ("ia_mtime_nsec", c_uint32),
+ ("ia_ctime", c_uint32),
+ ("ia_ctime_nsec", c_uint32)
+ ]
+
+
+class mem_pool (Structure):
+ _fields_ = [
+ ("list", list_head),
+ ("hot_count", c_int),
+ ("cold_count", c_int),
+ ("lock", c_void_p),
+ ("padded_sizeof_type", c_ulong),
+ ("pool", c_void_p),
+ ("pool_end", c_void_p),
+ ("real_sizeof_type", c_int),
+ ("alloc_count", c_uint64),
+ ("pool_misses", c_uint64),
+ ("max_alloc", c_int),
+ ("curr_stdalloc", c_int),
+ ("max_stdalloc", c_int),
+ ("name", c_char_p),
+ ("global_list", list_head)
+ ]
+
+
+class U_ctx_key_inode (Union):
+ _fields_ = [
+ ("key", c_uint64),
+ ("xl_key", POINTER(xlator_t))
+ ]
+
+
+class U_ctx_value1 (Union):
+ _fields_ = [
+ ("value1", c_uint64),
+ ("ptr1", c_void_p)
+ ]
+
+
+class U_ctx_value2 (Union):
+ _fields_ = [
+ ("value2", c_uint64),
+ ("ptr2", c_void_p)
+ ]
+
+class inode_ctx (Structure):
+ _anonymous_ = ("u_key","u_value1","u_value2",)
+ _fields_ = [
+ ("u_key", U_ctx_key_inode),
+ ("u_value1", U_ctx_value1),
+ ("u_value2", U_ctx_value2)
+ ]
+
+class inode_t (Structure):
+ pass
+
+class inode_table_t (Structure):
+ _fields_ = [
+ ("lock", c_void_p),
+ ("hashsize", c_size_t),
+ ("name", c_char_p),
+ ("root", POINTER(inode_t)),
+ ("xl", POINTER(xlator_t)),
+ ("lru_limit", c_uint32),
+ ("inode_hash", POINTER(list_head)),
+ ("name_hash", POINTER(list_head)),
+ ("active", list_head),
+ ("active_size", c_uint32),
+ ("lru", list_head),
+ ("lru_size", c_uint32),
+ ("purge", list_head),
+ ("purge_size", c_uint32),
+ ("inode_pool", POINTER(mem_pool)),
+ ("dentry_pool", POINTER(mem_pool)),
+ ("fd_mem_pool", POINTER(mem_pool))
+ ]
+
+inode_t._fields_ = [
+ ("table", POINTER(inode_table_t)),
+ ("gfid", c_ubyte * 16),
+ ("lock", c_void_p),
+ ("nlookup", c_uint64),
+ ("fd_count", c_uint32),
+ ("ref", c_uint32),
+ ("ia_type", c_uint),
+ ("fd_list", list_head),
+ ("dentry_list", list_head),
+ ("hashv", list_head),
+ ("listv", list_head),
+ ("ctx", POINTER(inode_ctx))
+ ]
+
+
+
+class U_ctx_key_fd (Union):
+ _fields_ = [
+ ("key", c_uint64),
+ ("xl_key", c_void_p)
+ ]
+
+class fd_lk_ctx (Structure):
+ _fields_ = [
+ ("lk_list", list_head),
+ ("ref", c_int),
+ ("lock", c_void_p)
+ ]
+
+class fd_ctx (Structure):
+ _anonymous_ = ("u_key","u_value1")
+ _fields_ = [
+ ("u_key", U_ctx_key_fd),
+ ("u_value1", U_ctx_value1)
+ ]
+
+class fd_t (Structure):
+ _fields_ = [
+ ("pid", c_uint64),
+ ("flags", c_int32),
+ ("refcount", c_int32),
+ ("inode_list", list_head),
+ ("inode", POINTER(inode_t)),
+ ("lock", c_void_p),
+ ("ctx", POINTER(fd_ctx)),
+ ("xl_count", c_int),
+ ("lk_ctx", POINTER(fd_lk_ctx)),
+ ("anonymous", c_uint)
+ ]
+
+class loc_t (Structure):
+ _fields_ = [
+ ("path", c_char_p),
+ ("name", c_char_p),
+ ("inode", POINTER(inode_t)),
+ ("parent", POINTER(inode_t)),
+ ("gfid", c_ubyte * 16),
+ ("pargfid", c_ubyte * 16),
+ ]
+
+
+
+def _init_op (a_class, fop, cbk, wind, unwind):
+ # Decorators, used by translators. We could pass the signatures as
+ # parameters, but it's actually kind of nice to keep them around for
+ # inspection.
+ a_class.fop_type = apply(CFUNCTYPE,a_class.fop_sig)
+ a_class.cbk_type = apply(CFUNCTYPE,a_class.cbk_sig)
+ # Dispatch-function registration.
+ fop.restype = None
+ fop.argtypes = [ c_long, a_class.fop_type ]
+ # Callback-function registration.
+ cbk.restype = None
+ cbk.argtypes = [ c_long, a_class.cbk_type ]
+ # STACK_WIND function.
+ wind.restype = None
+ wind.argtypes = list(a_class.fop_sig[1:])
+ # STACK_UNWIND function.
+ unwind.restype = None
+ unwind.argtypes = list(a_class.cbk_sig[1:])
+
+class OpLookup:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(inode_t), POINTER(iatt_t),
+ POINTER(dict_t), POINTER(iatt_t))
+_init_op (OpLookup, dl.set_lookup_fop, dl.set_lookup_cbk,
+ dl.wind_lookup, dl.unwind_lookup)
+
+class OpCreate:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), c_int, c_uint, c_uint, POINTER(fd_t),
+ POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(fd_t), POINTER(inode_t),
+ POINTER(iatt_t), POINTER(iatt_t), POINTER(iatt_t),
+ POINTER(dict_t))
+_init_op (OpCreate, dl.set_create_fop, dl.set_create_cbk,
+ dl.wind_create, dl.unwind_create)
+
+class OpOpen:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), c_int, POINTER(fd_t), POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(fd_t), POINTER(dict_t))
+_init_op (OpOpen, dl.set_open_fop, dl.set_open_cbk,
+ dl.wind_open, dl.unwind_open)
+
+class OpReadv:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(fd_t), c_size_t, c_long, c_uint32, POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(iovec_t), c_int, POINTER(iatt_t),
+ POINTER(iobref_t), POINTER(dict_t))
+_init_op (OpReadv, dl.set_readv_fop, dl.set_readv_cbk,
+ dl.wind_readv, dl.unwind_readv)
+class OpWritev:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(fd_t), POINTER(iovec_t), c_int, c_long, c_uint32,
+ POINTER(iobref_t), POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(iatt_t), POINTER(iatt_t),
+ POINTER(dict_t))
+_init_op (OpWritev, dl.set_writev_fop, dl.set_writev_cbk,
+ dl.wind_writev, dl.unwind_writev)
+
+class OpOpendir:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), POINTER(fd_t) ,POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(fd_t), POINTER(dict_t))
+_init_op (OpOpendir, dl.set_opendir_fop, dl.set_opendir_cbk,
+ dl.wind_opendir, dl.unwind_opendir)
+
+class OpReaddir:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(fd_t), c_size_t, c_long, POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(gf_dirent_t), POINTER(dict_t))
+_init_op (OpReaddir, dl.set_readdir_fop, dl.set_readdir_cbk,
+ dl.wind_readdir, dl.unwind_readdir)
+
+class OpReaddirp:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(fd_t), c_size_t, c_long, POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(gf_dirent_t), POINTER(dict_t))
+_init_op (OpReaddirp, dl.set_readdirp_fop, dl.set_readdirp_cbk,
+ dl.wind_readdirp, dl.unwind_readdirp)
+
+class OpStat:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(iatt_t), POINTER(dict_t))
+_init_op (OpStat, dl.set_stat_fop, dl.set_stat_cbk,
+ dl.wind_stat, dl.unwind_stat)
+
+class OpFstat:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(fd_t), POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(iatt_t), POINTER(dict_t))
+_init_op (OpFstat, dl.set_fstat_fop, dl.set_fstat_cbk,
+ dl.wind_fstat, dl.unwind_fstat)
+
+class OpStatfs:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(statvfs_t), POINTER(dict_t))
+_init_op (OpStatfs, dl.set_statfs_fop, dl.set_statfs_cbk,
+ dl.wind_statfs, dl.unwind_statfs)
+
+
+class OpSetxattr:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), POINTER(dict_t), c_int32,
+ POINTER (dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(dict_t))
+_init_op (OpSetxattr, dl.set_setxattr_fop, dl.set_setxattr_cbk,
+ dl.wind_setxattr, dl.unwind_setxattr)
+
+class OpGetxattr:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), c_char_p, POINTER (dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(dict_t), POINTER(dict_t))
+_init_op (OpGetxattr, dl.set_getxattr_fop, dl.set_getxattr_cbk,
+ dl.wind_getxattr, dl.unwind_getxattr)
+
+class OpFsetxattr:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(fd_t), POINTER(dict_t), c_int32,
+ POINTER (dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(dict_t))
+_init_op (OpFsetxattr, dl.set_fsetxattr_fop, dl.set_fsetxattr_cbk,
+ dl.wind_fsetxattr, dl.unwind_fsetxattr)
+
+class OpFgetxattr:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(fd_t), c_char_p, POINTER (dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(dict_t), POINTER(dict_t))
+_init_op (OpFgetxattr, dl.set_fgetxattr_fop, dl.set_fgetxattr_cbk,
+ dl.wind_fgetxattr, dl.unwind_fgetxattr)
+
+class OpRemovexattr:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), c_char_p, POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(dict_t))
+_init_op (OpRemovexattr, dl.set_removexattr_fop, dl.set_removexattr_cbk,
+ dl.wind_removexattr, dl.unwind_removexattr)
+
+
+class OpFremovexattr:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(fd_t), c_char_p, POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(dict_t))
+_init_op (OpFremovexattr, dl.set_fremovexattr_fop, dl.set_fremovexattr_cbk,
+ dl.wind_fremovexattr, dl.unwind_fremovexattr)
+
+class OpLink:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), POINTER(loc_t), POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(inode_t), POINTER(iatt_t),
+ POINTER(iatt_t), POINTER(iatt_t), POINTER(dict_t))
+_init_op (OpLink, dl.set_link_fop, dl.set_link_cbk,
+ dl.wind_link, dl.unwind_link)
+
+class OpSymlink:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ c_char_p, POINTER(loc_t), c_uint, POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(inode_t), POINTER(iatt_t),
+ POINTER(iatt_t), POINTER(iatt_t), POINTER(dict_t))
+_init_op (OpSymlink, dl.set_symlink_fop, dl.set_symlink_cbk,
+ dl.wind_symlink, dl.unwind_symlink)
+
+class OpUnlink:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), c_int, POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(iatt_t), POINTER(iatt_t),
+ POINTER(dict_t))
+_init_op (OpUnlink, dl.set_unlink_fop, dl.set_unlink_cbk,
+ dl.wind_unlink, dl.unwind_unlink)
+
+class OpReadlink:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), c_size_t, POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, c_char_p, POINTER(iatt_t), POINTER(dict_t))
+_init_op (OpReadlink, dl.set_readlink_fop, dl.set_readlink_cbk,
+ dl.wind_readlink, dl.unwind_readlink)
+
+class OpMkdir:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), c_uint, c_uint, POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(inode_t), POINTER(iatt_t),
+ POINTER(iatt_t), POINTER(iatt_t), POINTER(dict_t))
+_init_op (OpMkdir, dl.set_mkdir_fop, dl.set_mkdir_cbk,
+ dl.wind_mkdir, dl.unwind_mkdir)
+
+class OpRmdir:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), c_int, POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(iatt_t), POINTER(iatt_t),
+ POINTER(dict_t))
+_init_op (OpRmdir, dl.set_rmdir_fop, dl.set_rmdir_cbk,
+ dl.wind_rmdir, dl.unwind_rmdir)
+
+
+class Translator:
+ def __init__ (self, c_this):
+ # This is only here to keep references to the stubs we create,
+ # because ctypes doesn't and glupy.so can't because it doesn't
+ # get a pointer to the actual Python object. It's a dictionary
+ # instead of a list in case we ever allow changing fops/cbks
+ # after initialization and need to look them up.
+ self.stub_refs = {}
+ funcs = dir(self.__class__)
+ if "lookup_fop" in funcs:
+ @OpLookup.fop_type
+ def stub (frame, this, loc, xdata, s=self):
+ return s.lookup_fop (frame, this, loc, xdata)
+ self.stub_refs["lookup_fop"] = stub
+ dl.set_lookup_fop(c_this,stub)
+ if "lookup_cbk" in funcs:
+ @OpLookup.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno, inode,
+ buf, xdata, postparent, s=self):
+ return s.lookup_cbk(frame, cookie, this, op_ret,
+ op_errno, inode, buf, xdata,
+ postparent)
+ self.stub_refs["lookup_cbk"] = stub
+ dl.set_lookup_cbk(c_this,stub)
+ if "create_fop" in funcs:
+ @OpCreate.fop_type
+ def stub (frame, this, loc, flags, mode, umask, fd,
+ xdata, s=self):
+ return s.create_fop (frame, this, loc, flags,
+ mode, umask, fd, xdata)
+ self.stub_refs["create_fop"] = stub
+ dl.set_create_fop(c_this,stub)
+ if "create_cbk" in funcs:
+ @OpCreate.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno, fd,
+ inode, buf, preparent, postparent, xdata,
+ s=self):
+ return s.create_cbk (frame, cookie, this,
+ op_ret, op_errno, fd,
+ inode, buf, preparent,
+ postparent, xdata)
+ self.stub_refs["create_cbk"] = stub
+ dl.set_create_cbk(c_this,stub)
+ if "open_fop" in funcs:
+ @OpOpen.fop_type
+ def stub (frame, this, loc, flags, fd,
+ xdata, s=self):
+ return s.open_fop (frame, this, loc, flags,
+ fd, xdata)
+ self.stub_refs["open_fop"] = stub
+ dl.set_open_fop(c_this,stub)
+ if "open_cbk" in funcs:
+ @OpOpen.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno, fd,
+ xdata, s=self):
+ return s.open_cbk (frame, cookie, this,
+ op_ret, op_errno, fd,
+ xdata)
+ self.stub_refs["open_cbk"] = stub
+ dl.set_open_cbk(c_this,stub)
+ if "readv_fop" in funcs:
+ @OpReadv.fop_type
+ def stub (frame, this, fd, size, offset, flags,
+ xdata, s=self):
+ return s.readv_fop (frame, this, fd, size,
+ offset, flags, xdata)
+ self.stub_refs["readv_fop"] = stub
+ dl.set_readv_fop(c_this,stub)
+ if "readv_cbk" in funcs:
+ @OpReadv.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ vector, count, stbuf, iobref, xdata,
+ s=self):
+ return s.readv_cbk (frame, cookie, this,
+ op_ret, op_errno, vector,
+ count, stbuf, iobref,
+ xdata)
+ self.stub_refs["readv_cbk"] = stub
+ dl.set_readv_cbk(c_this,stub)
+ if "writev_fop" in funcs:
+ @OpWritev.fop_type
+ def stub (frame, this, fd, vector, count,
+ offset, flags, iobref, xdata, s=self):
+ return s.writev_fop (frame, this, fd, vector,
+ count, offset, flags,
+ iobref, xdata)
+ self.stub_refs["writev_fop"] = stub
+ dl.set_writev_fop(c_this,stub)
+ if "writev_cbk" in funcs:
+ @OpWritev.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ prebuf, postbuf, xdata, s=self):
+ return s.writev_cbk (frame, cookie, this,
+ op_ret, op_errno, prebuf,
+ postbuf, xdata)
+ self.stub_refs["writev_cbk"] = stub
+ dl.set_writev_cbk(c_this,stub)
+ if "opendir_fop" in funcs:
+ @OpOpendir.fop_type
+ def stub (frame, this, loc, fd, xdata, s=self):
+ return s.opendir_fop (frame, this, loc, fd,
+ xdata)
+ self.stub_refs["opendir_fop"] = stub
+ dl.set_opendir_fop(c_this,stub)
+ if "opendir_cbk" in funcs:
+ @OpOpendir.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno, fd,
+ xdata, s=self):
+ return s.opendir_cbk(frame, cookie, this,
+ op_ret, op_errno, fd,
+ xdata)
+ self.stub_refs["opendir_cbk"] = stub
+ dl.set_opendir_cbk(c_this,stub)
+ if "readdir_fop" in funcs:
+ @OpReaddir.fop_type
+ def stub (frame, this, fd, size, offset, xdata, s=self):
+ return s.readdir_fop (frame, this, fd, size,
+ offset, xdata)
+ self.stub_refs["readdir_fop"] = stub
+ dl.set_readdir_fop(c_this,stub)
+ if "readdir_cbk" in funcs:
+ @OpReaddir.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ entries, xdata, s=self):
+ return s.readdir_cbk(frame, cookie, this,
+ op_ret, op_errno, entries,
+ xdata)
+ self.stub_refs["readdir_cbk"] = stub
+ dl.set_readdir_cbk(c_this,stub)
+ if "readdirp_fop" in funcs:
+ @OpReaddirp.fop_type
+ def stub (frame, this, fd, size, offset, xdata, s=self):
+ return s.readdirp_fop (frame, this, fd, size,
+ offset, xdata)
+ self.stub_refs["readdirp_fop"] = stub
+ dl.set_readdirp_fop(c_this,stub)
+ if "readdirp_cbk" in funcs:
+ @OpReaddirp.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ entries, xdata, s=self):
+ return s.readdirp_cbk (frame, cookie, this,
+ op_ret, op_errno,
+ entries, xdata)
+ self.stub_refs["readdirp_cbk"] = stub
+ dl.set_readdirp_cbk(c_this,stub)
+ if "stat_fop" in funcs:
+ @OpStat.fop_type
+ def stub (frame, this, loc, xdata, s=self):
+ return s.stat_fop (frame, this, loc, xdata)
+ self.stub_refs["stat_fop"] = stub
+ dl.set_stat_fop(c_this,stub)
+ if "stat_cbk" in funcs:
+ @OpStat.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno, buf,
+ xdata, s=self):
+ return s.stat_cbk(frame, cookie, this, op_ret,
+ op_errno, buf, xdata)
+ self.stub_refs["stat_cbk"] = stub
+ dl.set_stat_cbk(c_this,stub)
+ if "fstat_fop" in funcs:
+ @OpFstat.fop_type
+ def stub (frame, this, fd, xdata, s=self):
+ return s.fstat_fop (frame, this, fd, xdata)
+ self.stub_refs["fstat_fop"] = stub
+ dl.set_fstat_fop(c_this,stub)
+ if "fstat_cbk" in funcs:
+ @OpFstat.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno, buf,
+ xdata, s=self):
+ return s.fstat_cbk(frame, cookie, this, op_ret,
+ op_errno, buf, xdata)
+ self.stub_refs["fstat_cbk"] = stub
+ dl.set_fstat_cbk(c_this,stub)
+ if "statfs_fop" in funcs:
+ @OpStatfs.fop_type
+ def stub (frame, this, loc, xdata, s=self):
+ return s.statfs_fop (frame, this, loc, xdata)
+ self.stub_refs["statfs_fop"] = stub
+ dl.set_statfs_fop(c_this,stub)
+ if "statfs_cbk" in funcs:
+ @OpStatfs.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno, buf,
+ xdata, s=self):
+ return s.statfs_cbk (frame, cookie, this,
+ op_ret, op_errno, buf,
+ xdata)
+ self.stub_refs["statfs_cbk"] = stub
+ dl.set_statfs_cbk(c_this,stub)
+ if "setxattr_fop" in funcs:
+ @OpSetxattr.fop_type
+ def stub (frame, this, loc, dictionary, flags, xdata,
+ s=self):
+ return s.setxattr_fop (frame, this, loc,
+ dictionary, flags,
+ xdata)
+ self.stub_refs["setxattr_fop"] = stub
+ dl.set_setxattr_fop(c_this,stub)
+ if "setxattr_cbk" in funcs:
+ @OpSetxattr.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno, xdata,
+ s=self):
+ return s.setxattr_cbk(frame, cookie, this,
+ op_ret, op_errno, xdata)
+ self.stub_refs["setxattr_cbk"] = stub
+ dl.set_setxattr_cbk(c_this,stub)
+ if "getxattr_fop" in funcs:
+ @OpGetxattr.fop_type
+ def stub (frame, this, loc, name, xdata, s=self):
+ return s.getxattr_fop (frame, this, loc, name,
+ xdata)
+ self.stub_refs["getxattr_fop"] = stub
+ dl.set_getxattr_fop(c_this,stub)
+ if "getxattr_cbk" in funcs:
+ @OpGetxattr.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ dictionary, xdata, s=self):
+ return s.getxattr_cbk(frame, cookie, this,
+ op_ret, op_errno,
+ dictionary, xdata)
+ self.stub_refs["getxattr_cbk"] = stub
+ dl.set_getxattr_cbk(c_this,stub)
+ if "fsetxattr_fop" in funcs:
+ @OpFsetxattr.fop_type
+ def stub (frame, this, fd, dictionary, flags, xdata,
+ s=self):
+ return s.fsetxattr_fop (frame, this, fd,
+ dictionary, flags,
+ xdata)
+ self.stub_refs["fsetxattr_fop"] = stub
+ dl.set_fsetxattr_fop(c_this,stub)
+ if "fsetxattr_cbk" in funcs:
+ @OpFsetxattr.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno, xdata,
+ s=self):
+ return s.fsetxattr_cbk(frame, cookie, this,
+ op_ret, op_errno, xdata)
+ self.stub_refs["fsetxattr_cbk"] = stub
+ dl.set_fsetxattr_cbk(c_this,stub)
+ if "fgetxattr_fop" in funcs:
+ @OpFgetxattr.fop_type
+ def stub (frame, this, fd, name, xdata, s=self):
+ return s.fgetxattr_fop (frame, this, fd, name,
+ xdata)
+ self.stub_refs["fgetxattr_fop"] = stub
+ dl.set_fgetxattr_fop(c_this,stub)
+ if "fgetxattr_cbk" in funcs:
+ @OpFgetxattr.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ dictionary, xdata, s=self):
+ return s.fgetxattr_cbk(frame, cookie, this,
+ op_ret, op_errno,
+ dictionary, xdata)
+ self.stub_refs["fgetxattr_cbk"] = stub
+ dl.set_fgetxattr_cbk(c_this,stub)
+ if "removexattr_fop" in funcs:
+ @OpRemovexattr.fop_type
+ def stub (frame, this, loc, name, xdata, s=self):
+ return s.removexattr_fop (frame, this, loc,
+ name, xdata)
+ self.stub_refs["removexattr_fop"] = stub
+ dl.set_removexattr_fop(c_this,stub)
+ if "removexattr_cbk" in funcs:
+ @OpRemovexattr.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ xdata, s=self):
+ return s.removexattr_cbk(frame, cookie, this,
+ op_ret, op_errno,
+ xdata)
+ self.stub_refs["removexattr_cbk"] = stub
+ dl.set_removexattr_cbk(c_this,stub)
+ if "fremovexattr_fop" in funcs:
+ @OpFremovexattr.fop_type
+ def stub (frame, this, fd, name, xdata, s=self):
+ return s.fremovexattr_fop (frame, this, fd,
+ name, xdata)
+ self.stub_refs["fremovexattr_fop"] = stub
+ dl.set_fremovexattr_fop(c_this,stub)
+ if "fremovexattr_cbk" in funcs:
+ @OpFremovexattr.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ xdata, s=self):
+ return s.fremovexattr_cbk(frame, cookie, this,
+ op_ret, op_errno,
+ xdata)
+ self.stub_refs["fremovexattr_cbk"] = stub
+ dl.set_fremovexattr_cbk(c_this,stub)
+ if "link_fop" in funcs:
+ @OpLink.fop_type
+ def stub (frame, this, oldloc, newloc,
+ xdata, s=self):
+ return s.link_fop (frame, this, oldloc,
+ newloc, xdata)
+ self.stub_refs["link_fop"] = stub
+ dl.set_link_fop(c_this,stub)
+ if "link_cbk" in funcs:
+ @OpLink.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata,
+ s=self):
+ return s.link_cbk (frame, cookie, this,
+ op_ret, op_errno, inode,
+ buf, preparent,
+ postparent, xdata)
+ self.stub_refs["link_cbk"] = stub
+ dl.set_link_cbk(c_this,stub)
+ if "symlink_fop" in funcs:
+ @OpSymlink.fop_type
+ def stub (frame, this, linkname, loc,
+ umask, xdata, s=self):
+ return s.symlink_fop (frame, this, linkname,
+ loc, umask, xdata)
+ self.stub_refs["symlink_fop"] = stub
+ dl.set_symlink_fop(c_this,stub)
+ if "symlink_cbk" in funcs:
+ @OpSymlink.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata,
+ s=self):
+ return s.symlink_cbk (frame, cookie, this,
+ op_ret, op_errno, inode,
+ buf, preparent,
+ postparent, xdata)
+ self.stub_refs["symlink_cbk"] = stub
+ dl.set_symlink_cbk(c_this,stub)
+ if "unlink_fop" in funcs:
+ @OpUnlink.fop_type
+ def stub (frame, this, loc, xflags,
+ xdata, s=self):
+ return s.unlink_fop (frame, this, loc,
+ xflags, xdata)
+ self.stub_refs["unlink_fop"] = stub
+ dl.set_unlink_fop(c_this,stub)
+ if "unlink_cbk" in funcs:
+ @OpUnlink.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ preparent, postparent, xdata, s=self):
+ return s.unlink_cbk (frame, cookie, this,
+ op_ret, op_errno,
+ preparent, postparent,
+ xdata)
+ self.stub_refs["unlink_cbk"] = stub
+ dl.set_unlink_cbk(c_this,stub)
+ if "readlink_fop" in funcs:
+ @OpReadlink.fop_type
+ def stub (frame, this, loc, size,
+ xdata, s=self):
+ return s.readlink_fop (frame, this, loc,
+ size, xdata)
+ self.stub_refs["readlink_fop"] = stub
+ dl.set_readlink_fop(c_this,stub)
+ if "readlink_cbk" in funcs:
+ @OpReadlink.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ path, buf, xdata, s=self):
+ return s.readlink_cbk (frame, cookie, this,
+ op_ret, op_errno,
+ path, buf, xdata)
+ self.stub_refs["readlink_cbk"] = stub
+ dl.set_readlink_cbk(c_this,stub)
+ if "mkdir_fop" in funcs:
+ @OpMkdir.fop_type
+ def stub (frame, this, loc, mode, umask, xdata,
+ s=self):
+ return s.mkdir_fop (frame, this, loc, mode,
+ umask, xdata)
+ self.stub_refs["mkdir_fop"] = stub
+ dl.set_mkdir_fop(c_this,stub)
+ if "mkdir_cbk" in funcs:
+ @OpMkdir.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno, inode,
+ buf, preparent, postparent, xdata, s=self):
+ return s.mkdir_cbk (frame, cookie, this,
+ op_ret, op_errno, inode,
+ buf, preparent,
+ postparent, xdata)
+ self.stub_refs["mkdir_cbk"] = stub
+ dl.set_mkdir_cbk(c_this,stub)
+ if "rmdir_fop" in funcs:
+ @OpRmdir.fop_type
+ def stub (frame, this, loc, xflags,
+ xdata, s=self):
+ return s.rmdir_fop (frame, this, loc,
+ xflags, xdata)
+ self.stub_refs["rmdir_fop"] = stub
+ dl.set_rmdir_fop(c_this,stub)
+ if "rmdir_cbk" in funcs:
+ @OpRmdir.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ preparent, postparent, xdata, s=self):
+ return s.rmdir_cbk (frame, cookie, this,
+ op_ret, op_errno,
+ preparent, postparent,
+ xdata)
+ self.stub_refs["rmdir_cbk"] = stub
+ dl.set_rmdir_cbk(c_this,stub)
diff --git a/xlators/features/glupy/src/setup.py.in b/xlators/features/glupy/src/setup.py.in
new file mode 100644
index 00000000000..611e9695f76
--- /dev/null
+++ b/xlators/features/glupy/src/setup.py.in
@@ -0,0 +1,24 @@
+from distutils.core import setup
+
+DESC = """GlusterFS is a distributed file-system capable of scaling to
+several petabytes. It aggregates various storage bricks over Infiniband
+RDMA or TCP/IP interconnect into one large parallel network file system.
+GlusterFS is one of the most sophisticated file systems in terms of
+features and extensibility. It borrows a powerful concept called
+Translators from GNU Hurd kernel. Much of the code in GlusterFS is in
+user space and easily manageable.
+
+This package contains Glupy, the Python translator interface for GlusterFS."""
+
+setup(
+ name='glusterfs-glupy',
+ version='@PACKAGE_VERSION@',
+ description='Glupy is the Python translator interface for GlusterFS',
+ long_description=DESC,
+ author='Gluster Community',
+ author_email='gluster-devel@gluster.org',
+ license='LGPLv3',
+ url='http://gluster.org/',
+ package_dir={'gluster':''},
+ packages=['gluster']
+)
diff --git a/xlators/features/index/Makefile.am b/xlators/features/index/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/features/index/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/index/src/Makefile.am b/xlators/features/index/src/Makefile.am
new file mode 100644
index 00000000000..1e88f119833
--- /dev/null
+++ b/xlators/features/index/src/Makefile.am
@@ -0,0 +1,17 @@
+xlator_LTLIBRARIES = index.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+index_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+index_la_SOURCES = index.c
+index_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = index.h index-mem-types.h index-messages.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
+ -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/xdr/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/index/src/index-mem-types.h b/xlators/features/index/src/index-mem-types.h
new file mode 100644
index 00000000000..ca291cfba7e
--- /dev/null
+++ b/xlators/features/index/src/index-mem-types.h
@@ -0,0 +1,23 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __QUIESCE_MEM_TYPES_H__
+#define __QUIESCE_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_index_mem_types_ {
+ gf_index_mt_priv_t = gf_common_mt_end + 1,
+ gf_index_inode_ctx_t = gf_common_mt_end + 2,
+ gf_index_fd_ctx_t = gf_common_mt_end + 3,
+ gf_index_mt_local_t = gf_common_mt_end + 4,
+ gf_index_mt_end
+};
+#endif
diff --git a/xlators/features/index/src/index-messages.h b/xlators/features/index/src/index-messages.h
new file mode 100644
index 00000000000..91f17555d62
--- /dev/null
+++ b/xlators/features/index/src/index-messages.h
@@ -0,0 +1,121 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
+
+#ifndef _INDEX_MESSAGES_H_
+#define _INDEX_MESSAGES_H_
+
+#include "glfs-message-id.h"
+
+/*! \file index-messages.h
+ * \brief INDEX log-message IDs and their descriptions.
+ */
+
+/* NOTE: Rules for message additions
+ * 1) Each instance of a message is _better_ left with a unique message ID, even
+ * if the message format is the same. Reasoning is that, if the message
+ * format needs to change in one instance, the other instances are not
+ * impacted or the new change does not change the ID of the instance being
+ * modified.
+ * 2) Addition of a message,
+ * - Should increment the GLFS_NUM_MESSAGES
+ * - Append to the list of messages defined, towards the end
+ * - Retain macro naming as glfs_msg_X (for redability across developers)
+ * NOTE: Rules for message format modifications
+ * 3) Check acorss the code if the message ID macro in question is reused
+ * anywhere. If reused then then the modifications should ensure correctness
+ * everywhere, or needs a new message ID as (1) above was not adhered to. If
+ * not used anywhere, proceed with the required modification.
+ * NOTE: Rules for message deletion
+ * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used
+ * anywhere, then can be deleted, but will leave a hole by design, as
+ * addition rules specify modification to the end of the list and not filling
+ * holes.
+ */
+
+#define GLFS_COMP_BASE_INDEX GLFS_MSGID_COMP_INDEX
+#define GLFS_NUM_MESSAGES 10
+#define GLFS_MSGID_END (GLFS_COMP_BASE_INDEX + GLFS_NUM_MESSAGES + 1)
+
+#define glfs_msg_start_x GLFS_COMP_BASE_INDEX, "Invalid: Start of messages"
+
+/*!
+ * @messageid 138001
+ * @diagnosis Index directory creation failed.
+ * @recommendedaction Brick log should give the reason why it failed.
+ */
+#define INDEX_MSG_INDEX_DIR_CREATE_FAILED (GLFS_COMP_BASE_INDEX + 1)
+
+/*!
+ * @messageid 138002
+ * @diagnosis Index directory readdir failed.
+ * @recommendedaction Brick log should give the reason why it failed.
+ */
+#define INDEX_MSG_INDEX_READDIR_FAILED (GLFS_COMP_BASE_INDEX + 2)
+
+/*!
+ * @messageid 138003
+ * @diagnosis Index addition failed.
+ * @recommendedaction Brick log should give the reason why it failed.
+ */
+#define INDEX_MSG_INDEX_ADD_FAILED (GLFS_COMP_BASE_INDEX + 3)
+
+/*!
+ * @messageid 138004
+ * @diagnosis Index deletion failed.
+ * @recommendedaction Brick log should give the reason why it failed.
+ */
+#define INDEX_MSG_INDEX_DEL_FAILED (GLFS_COMP_BASE_INDEX + 4)
+
+/*!
+ * @messageid 138005
+ * @diagnosis Setting option in dictionary failed.
+ * @recommendedaction Brick log should give the reason why it failed.
+ */
+#define INDEX_MSG_DICT_SET_FAILED (GLFS_COMP_BASE_INDEX + 5)
+
+/*!
+ * @messageid 138006
+ * @diagnosis Setting/Getting inode data failed.
+ * @recommendedaction Brick log should give the reason why it failed.
+ */
+#define INDEX_MSG_INODE_CTX_GET_SET_FAILED (GLFS_COMP_BASE_INDEX + 6)
+
+/*!
+ * @messageid 138007
+ * @diagnosis Invalid argments lead to the failure.
+ * @recommendedaction Brick log should give more context where it failed.
+ */
+#define INDEX_MSG_INVALID_ARGS (GLFS_COMP_BASE_INDEX + 7)
+
+/*!
+ * @messageid 138008
+ * @diagnosis Operations on an opened file/directory failed.
+ * @recommendedaction Brick log should give the reason why it failed.
+ */
+#define INDEX_MSG_FD_OP_FAILED (GLFS_COMP_BASE_INDEX + 8)
+
+/*!
+ * @messageid 138009
+ * @diagnosis Worker thread creation for index xlator failed.
+ * @recommendedaction Brick log should give the reason why it failed.
+ */
+#define INDEX_MSG_WORKER_THREAD_CREATE_FAILED (GLFS_COMP_BASE_INDEX + 9)
+
+/*!
+ * @messageid 138010
+ * @diagnosis Index xlator needs to have single subvolume and at least one
+ * parent subvolume, otherwise this message will come.
+ * @recommendedaction Please check brick log file to find which of the above
+ * two conditions failed.
+ */
+#define INDEX_MSG_INVALID_GRAPH (GLFS_COMP_BASE_INDEX + 10)
+
+#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
+#endif /* !_INDEX_MESSAGES_H_ */
diff --git a/xlators/features/index/src/index.c b/xlators/features/index/src/index.c
new file mode 100644
index 00000000000..75809e36e4c
--- /dev/null
+++ b/xlators/features/index/src/index.c
@@ -0,0 +1,2558 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "index.h"
+#include "options.h"
+#include "glusterfs3-xdr.h"
+#include "syscall.h"
+#include "syncop.h"
+#include "common-utils.h"
+#include "index-messages.h"
+#include <ftw.h>
+
+#define XATTROP_SUBDIR "xattrop"
+#define DIRTY_SUBDIR "dirty"
+#define ENTRY_CHANGES_SUBDIR "entry-changes"
+
+struct index_syncop_args {
+ inode_t *parent;
+ gf_dirent_t *entries;
+ char *path;
+};
+
+static char *index_vgfid_xattrs[XATTROP_TYPE_END] = {
+ [XATTROP] = GF_XATTROP_INDEX_GFID,
+ [DIRTY] = GF_XATTROP_DIRTY_GFID,
+ [ENTRY_CHANGES] = GF_XATTROP_ENTRY_CHANGES_GFID
+};
+
+static char *index_subdirs[XATTROP_TYPE_END] = {
+ [XATTROP] = XATTROP_SUBDIR,
+ [DIRTY] = DIRTY_SUBDIR,
+ [ENTRY_CHANGES] = ENTRY_CHANGES_SUBDIR
+};
+
+int
+index_get_type_from_vgfid (index_priv_t *priv, uuid_t vgfid)
+{
+ int i = 0;
+
+ for (i = 0; i < XATTROP_TYPE_END; i++) {
+ if (gf_uuid_compare (priv->internal_vgfid[i], vgfid) == 0)
+ return i;
+ }
+ return -1;
+}
+
+gf_boolean_t
+index_is_virtual_gfid (index_priv_t *priv, uuid_t vgfid)
+{
+ if (index_get_type_from_vgfid (priv, vgfid) < 0)
+ return _gf_false;
+ return _gf_true;
+}
+
+static int
+__index_inode_ctx_get (inode_t *inode, xlator_t *this, index_inode_ctx_t **ctx)
+{
+ int ret = 0;
+ index_inode_ctx_t *ictx = NULL;
+ uint64_t tmpctx = 0;
+
+ ret = __inode_ctx_get (inode, this, &tmpctx);
+ if (!ret) {
+ ictx = (index_inode_ctx_t *) (long) tmpctx;
+ goto out;
+ }
+ ictx = GF_CALLOC (1, sizeof (*ictx), gf_index_inode_ctx_t);
+ if (!ictx) {
+ ret = -1;
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&ictx->callstubs);
+ ret = __inode_ctx_put (inode, this, (uint64_t)ictx);
+ if (ret) {
+ GF_FREE (ictx);
+ ictx = NULL;
+ goto out;
+ }
+out:
+ if (ictx)
+ *ctx = ictx;
+ return ret;
+}
+
+static int
+index_inode_ctx_get (inode_t *inode, xlator_t *this, index_inode_ctx_t **ctx)
+{
+ int ret = 0;
+
+ LOCK (&inode->lock);
+ {
+ ret = __index_inode_ctx_get (inode, this, ctx);
+ }
+ UNLOCK (&inode->lock);
+
+ return ret;
+}
+
+static gf_boolean_t
+index_is_subdir_of_entry_changes (xlator_t *this, inode_t *inode)
+{
+ index_priv_t *priv = this->private;
+ index_inode_ctx_t *ctx = NULL;
+ int ret = 0;
+
+ if (!inode)
+ return _gf_false;
+
+ ret = index_inode_ctx_get (inode, this, &ctx);
+ if ((ret == 0) && !gf_uuid_is_null (ctx->virtual_pargfid))
+ return _gf_true;
+ return _gf_false;
+}
+
+static int
+index_get_type_from_vgfid_xattr (const char *name)
+{
+ int i = 0;
+
+ for (i = 0; i < XATTROP_TYPE_END; i++) {
+ if (strcmp (name, index_vgfid_xattrs[i]) == 0)
+ return i;
+ }
+ return -1;
+}
+
+gf_boolean_t
+index_is_fop_on_internal_inode (xlator_t *this, inode_t *inode, uuid_t gfid)
+{
+ index_priv_t *priv = this->private;
+ uuid_t vgfid = {0};
+
+ if (!inode)
+ return _gf_false;
+
+ if (gfid && !gf_uuid_is_null (gfid))
+ gf_uuid_copy (vgfid, gfid);
+ else
+ gf_uuid_copy (vgfid, inode->gfid);
+
+ if (index_is_virtual_gfid (priv, vgfid))
+ return _gf_true;
+ if (index_is_subdir_of_entry_changes (this, inode))
+ return _gf_true;
+ return _gf_false;
+}
+
+static gf_boolean_t
+index_is_vgfid_xattr (const char *name)
+{
+ if (index_get_type_from_vgfid_xattr (name) < 0)
+ return _gf_false;
+ return _gf_true;
+}
+
+call_stub_t *
+__index_dequeue (struct list_head *callstubs)
+{
+ call_stub_t *stub = NULL;
+
+ if (!list_empty (callstubs)) {
+ stub = list_entry (callstubs->next, call_stub_t, list);
+ list_del_init (&stub->list);
+ }
+
+ return stub;
+}
+
+static void
+__index_enqueue (struct list_head *callstubs, call_stub_t *stub)
+{
+ list_add_tail (&stub->list, callstubs);
+}
+
+static void
+worker_enqueue (xlator_t *this, call_stub_t *stub)
+{
+ index_priv_t *priv = NULL;
+
+ priv = this->private;
+ pthread_mutex_lock (&priv->mutex);
+ {
+ __index_enqueue (&priv->callstubs, stub);
+ pthread_cond_signal (&priv->cond);
+ }
+ pthread_mutex_unlock (&priv->mutex);
+}
+
+void *
+index_worker (void *data)
+{
+ index_priv_t *priv = NULL;
+ xlator_t *this = NULL;
+ call_stub_t *stub = NULL;
+ int ret = 0;
+
+ THIS = data;
+ this = data;
+ priv = this->private;
+
+ for (;;) {
+ pthread_mutex_lock (&priv->mutex);
+ {
+ while (list_empty (&priv->callstubs)) {
+ ret = pthread_cond_wait (&priv->cond,
+ &priv->mutex);
+ }
+
+ stub = __index_dequeue (&priv->callstubs);
+ }
+ pthread_mutex_unlock (&priv->mutex);
+
+ if (stub) /* guard against spurious wakeups */
+ call_resume (stub);
+ }
+
+ return NULL;
+}
+
+static void
+make_index_dir_path (char *base, const char *subdir,
+ char *index_dir, size_t len)
+{
+ snprintf (index_dir, len, "%s/%s", base, subdir);
+}
+
+int
+index_dir_create (xlator_t *this, const char *subdir)
+{
+ int ret = 0;
+ struct stat st = {0};
+ char fullpath[PATH_MAX] = {0};
+ char path[PATH_MAX] = {0};
+ char *dir = NULL;
+ index_priv_t *priv = NULL;
+ size_t len = 0;
+ size_t pathlen = 0;
+
+ priv = this->private;
+ make_index_dir_path (priv->index_basepath, subdir, fullpath,
+ sizeof (fullpath));
+ ret = sys_stat (fullpath, &st);
+ if (!ret) {
+ if (!S_ISDIR (st.st_mode))
+ ret = -2;
+ goto out;
+ }
+
+ pathlen = strlen (fullpath);
+ if ((pathlen > 1) && fullpath[pathlen - 1] == '/')
+ fullpath[pathlen - 1] = '\0';
+ dir = strchr (fullpath, '/');
+ while (dir) {
+ dir = strchr (dir + 1, '/');
+ if (dir)
+ len = pathlen - strlen (dir);
+ else
+ len = pathlen;
+ strncpy (path, fullpath, len);
+ path[len] = '\0';
+ ret = sys_mkdir (path, 0600);
+ if (ret && (errno != EEXIST))
+ goto out;
+ }
+ ret = 0;
+out:
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ INDEX_MSG_INDEX_DIR_CREATE_FAILED, "%s/%s: Failed to "
+ "create", priv->index_basepath, subdir);
+ } else if (ret == -2) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOTDIR,
+ INDEX_MSG_INDEX_DIR_CREATE_FAILED, "%s/%s: Failed to "
+ "create, path exists, not a directory ",
+ priv->index_basepath, subdir);
+ }
+ return ret;
+}
+
+void
+index_get_index (index_priv_t *priv, uuid_t index)
+{
+ LOCK (&priv->lock);
+ {
+ gf_uuid_copy (index, priv->index);
+ }
+ UNLOCK (&priv->lock);
+}
+
+void
+index_generate_index (index_priv_t *priv, uuid_t index)
+{
+ LOCK (&priv->lock);
+ {
+ //To prevent duplicate generates.
+ //This method fails if number of contending threads is greater
+ //than MAX_LINK count of the fs
+ if (!gf_uuid_compare (priv->index, index))
+ gf_uuid_generate (priv->index);
+ gf_uuid_copy (index, priv->index);
+ }
+ UNLOCK (&priv->lock);
+}
+
+static void
+make_index_path (char *base, const char *subdir, uuid_t index,
+ char *index_path, size_t len)
+{
+ make_index_dir_path (base, subdir, index_path, len);
+ snprintf (index_path + strlen (index_path), len - strlen (index_path),
+ "/%s-%s", subdir, uuid_utoa (index));
+}
+
+static void
+make_gfid_path (char *base, const char *subdir, uuid_t gfid,
+ char *gfid_path, size_t len)
+{
+ make_index_dir_path (base, subdir, gfid_path, len);
+ snprintf (gfid_path + strlen (gfid_path), len - strlen (gfid_path),
+ "/%s", uuid_utoa (gfid));
+}
+
+static void
+make_file_path (char *base, const char *subdir, const char *filename,
+ char *file_path, size_t len)
+{
+ make_index_dir_path (base, subdir, file_path, len);
+ snprintf (file_path + strlen (file_path), len - strlen (file_path),
+ "/%s", filename);
+}
+
+static int
+is_index_file_current (char *filename, uuid_t priv_index, char *subdir)
+{
+ char current_index[GF_UUID_BUF_SIZE + 16] = {0, };
+
+ snprintf (current_index, sizeof current_index,
+ "%s-%s", subdir, uuid_utoa(priv_index));
+ return (!strcmp(filename, current_index));
+}
+
+static void
+check_delete_stale_index_file (xlator_t *this, char *filename, char *subdir)
+{
+ int ret = 0;
+ struct stat st = {0};
+ char filepath[PATH_MAX] = {0};
+ index_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ if (is_index_file_current (filename, priv->index, subdir))
+ return;
+
+ make_file_path (priv->index_basepath, subdir,
+ filename, filepath, sizeof (filepath));
+ ret = sys_stat (filepath, &st);
+ if (!ret && st.st_nlink == 1)
+ sys_unlink (filepath);
+}
+
+static void
+index_set_link_count (index_priv_t *priv, int64_t count,
+ index_xattrop_type_t type)
+{
+ switch (type) {
+ case XATTROP:
+ LOCK (&priv->lock);
+ {
+ priv->pending_count = count;
+ }
+ UNLOCK (&priv->lock);
+ break;
+ default:
+ break;
+ }
+}
+
+static void
+index_get_link_count (index_priv_t *priv, int64_t *count,
+ index_xattrop_type_t type)
+{
+ switch (type) {
+ case XATTROP:
+ LOCK (&priv->lock);
+ {
+ *count = priv->pending_count;
+ }
+ UNLOCK (&priv->lock);
+ break;
+ default:
+ break;
+ }
+}
+
+static void
+index_dec_link_count (index_priv_t *priv, index_xattrop_type_t type)
+{
+ switch (type) {
+ case XATTROP:
+ LOCK (&priv->lock);
+ {
+ priv->pending_count--;
+ if (priv->pending_count == 0)
+ priv->pending_count--;
+ }
+ UNLOCK (&priv->lock);
+ break;
+ default:
+ break;
+ }
+}
+
+char*
+index_get_subdir_from_type (index_xattrop_type_t type)
+{
+ if (type < XATTROP || type >= XATTROP_TYPE_END)
+ return NULL;
+ return index_subdirs[type];
+}
+
+char*
+index_get_subdir_from_vgfid (index_priv_t *priv, uuid_t vgfid)
+{
+ return index_get_subdir_from_type (index_get_type_from_vgfid (priv,
+ vgfid));
+}
+
+static int
+index_fill_readdir (fd_t *fd, index_fd_ctx_t *fctx, DIR *dir, off_t off,
+ size_t size, gf_dirent_t *entries)
+{
+ off_t in_case = -1;
+ off_t last_off = 0;
+ size_t filled = 0;
+ int count = 0;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {{0,},};
+ int32_t this_size = -1;
+ gf_dirent_t *this_entry = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ if (!off) {
+ rewinddir (dir);
+ } else {
+ seekdir (dir, off);
+#ifndef GF_LINUX_HOST_OS
+ if ((u_long)telldir(dir) != off && off != fctx->dir_eof) {
+ gf_msg (THIS->name, GF_LOG_ERROR, EINVAL,
+ INDEX_MSG_INDEX_READDIR_FAILED,
+ "seekdir(0x%llx) failed on dir=%p: "
+ "Invalid argument (offset reused from "
+ "another DIR * structure?)", off, dir);
+ errno = EINVAL;
+ count = -1;
+ goto out;
+ }
+#endif /* GF_LINUX_HOST_OS */
+ }
+
+ while (filled <= size) {
+ in_case = (u_long)telldir (dir);
+
+ if (in_case == -1) {
+ gf_msg (THIS->name, GF_LOG_ERROR, errno,
+ INDEX_MSG_INDEX_READDIR_FAILED,
+ "telldir failed on dir=%p", dir);
+ goto out;
+ }
+
+ errno = 0;
+ entry = sys_readdir (dir, scratch);
+ if (!entry || errno != 0) {
+ if (errno == EBADF) {
+ gf_msg (THIS->name, GF_LOG_WARNING, errno,
+ INDEX_MSG_INDEX_READDIR_FAILED,
+ "readdir failed on dir=%p", dir);
+ goto out;
+ }
+ break;
+ }
+
+ if (!strncmp (entry->d_name, XATTROP_SUBDIR"-",
+ strlen (XATTROP_SUBDIR"-"))) {
+ check_delete_stale_index_file (this, entry->d_name,
+ XATTROP_SUBDIR);
+ continue;
+ } else if (!strncmp (entry->d_name, DIRTY_SUBDIR"-",
+ strlen (DIRTY_SUBDIR"-"))) {
+ check_delete_stale_index_file (this, entry->d_name,
+ DIRTY_SUBDIR);
+ continue;
+ }
+
+ this_size = max (sizeof (gf_dirent_t),
+ sizeof (gfs3_dirplist))
+ + strlen (entry->d_name) + 1;
+
+ if (this_size + filled > size) {
+ seekdir (dir, in_case);
+#ifndef GF_LINUX_HOST_OS
+ if ((u_long)telldir(dir) != in_case &&
+ in_case != fctx->dir_eof) {
+ gf_msg (THIS->name, GF_LOG_ERROR, EINVAL,
+ INDEX_MSG_INDEX_READDIR_FAILED,
+ "seekdir(0x%llx) failed on dir=%p: "
+ "Invalid argument (offset reused from "
+ "another DIR * structure?)",
+ in_case, dir);
+ errno = EINVAL;
+ count = -1;
+ goto out;
+ }
+#endif /* GF_LINUX_HOST_OS */
+ break;
+ }
+
+ this_entry = gf_dirent_for_name (entry->d_name);
+
+ if (!this_entry) {
+ gf_msg (THIS->name, GF_LOG_ERROR, errno,
+ INDEX_MSG_INDEX_READDIR_FAILED,
+ "could not create gf_dirent for entry %s",
+ entry->d_name);
+ goto out;
+ }
+ /*
+ * we store the offset of next entry here, which is
+ * probably not intended, but code using syncop_readdir()
+ * (glfs-heal.c, afr-self-heald.c, pump.c) rely on it
+ * for directory read resumption.
+ */
+ last_off = (u_long)telldir(dir);
+ this_entry->d_off = last_off;
+ this_entry->d_ino = entry->d_ino;
+
+ list_add_tail (&this_entry->list, &entries->list);
+
+ filled += this_size;
+ count ++;
+ }
+
+ errno = 0;
+
+ if ((!sys_readdir (dir, scratch) && (errno == 0))) {
+ /* Indicate EOF */
+ errno = ENOENT;
+ /* Remember EOF offset for later detection */
+ fctx->dir_eof = last_off;
+ }
+out:
+ return count;
+}
+
+int
+index_link_to_base (xlator_t *this, char *base, size_t base_len,
+ char *fpath, const char *subdir)
+{
+ int ret = 0;
+ int fd = 0;
+ int op_errno = 0;
+ uuid_t index = {0};
+ index_priv_t *priv = this->private;
+
+ ret = sys_link (base, fpath);
+ if (!ret || (errno == EEXIST)) {
+ ret = 0;
+ goto out;
+ }
+
+ op_errno = errno;
+ if (op_errno == ENOENT) {
+ ret = index_dir_create (this, subdir);
+ if (ret) {
+ op_errno = errno;
+ goto out;
+ }
+ } else if (op_errno == EMLINK) {
+ index_generate_index (priv, index);
+ make_index_path (priv->index_basepath, subdir,
+ index, base, base_len);
+ } else {
+ goto out;
+ }
+
+ op_errno = 0;
+ fd = sys_creat (base, 0);
+ if ((fd < 0) && (errno != EEXIST)) {
+ op_errno = errno;
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ INDEX_MSG_INDEX_ADD_FAILED, "%s: Not able to "
+ "create index", fpath);
+ goto out;
+ }
+
+ if (fd >= 0)
+ sys_close (fd);
+
+ ret = sys_link (base, fpath);
+ if (ret && (errno != EEXIST)) {
+ op_errno = errno;
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ INDEX_MSG_INDEX_ADD_FAILED, "%s: Not able to "
+ "add to index", fpath);
+ goto out;
+ }
+out:
+ return -op_errno;
+}
+
+int
+index_add (xlator_t *this, uuid_t gfid, const char *subdir,
+ index_xattrop_type_t type)
+{
+ int32_t op_errno = 0;
+ char gfid_path[PATH_MAX] = {0};
+ char index_path[PATH_MAX] = {0};
+ int ret = -1;
+ uuid_t index = {0};
+ index_priv_t *priv = NULL;
+ struct stat st = {0};
+ int fd = 0;
+
+ priv = this->private;
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name, !gf_uuid_is_null (gfid),
+ out, op_errno, EINVAL);
+
+ make_gfid_path (priv->index_basepath, subdir, gfid,
+ gfid_path, sizeof (gfid_path));
+
+ ret = sys_stat (gfid_path, &st);
+ if (!ret)
+ goto out;
+ index_get_index (priv, index);
+ make_index_path (priv->index_basepath, subdir,
+ index, index_path, sizeof (index_path));
+ ret = index_link_to_base (this, index_path, sizeof (index_path),
+ gfid_path, subdir);
+out:
+ return ret;
+}
+
+int
+index_del (xlator_t *this, uuid_t gfid, const char *subdir, int type)
+{
+ int32_t op_errno __attribute__((unused)) = 0;
+ index_priv_t *priv = NULL;
+ int ret = 0;
+ char gfid_path[PATH_MAX] = {0};
+
+ priv = this->private;
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name, !gf_uuid_is_null (gfid),
+ out, op_errno, EINVAL);
+ make_gfid_path (priv->index_basepath, subdir, gfid,
+ gfid_path, sizeof (gfid_path));
+
+ if ((strcmp (subdir, ENTRY_CHANGES_SUBDIR)) == 0)
+ ret = sys_rmdir (gfid_path);
+ else
+ ret = sys_unlink (gfid_path);
+
+ if (ret && (errno != ENOENT)) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ INDEX_MSG_INDEX_DEL_FAILED, "%s: failed to delete"
+ " from index", gfid_path);
+ ret = -errno;
+ goto out;
+ }
+
+ index_dec_link_count (priv, type);
+ ret = 0;
+out:
+ return ret;
+}
+
+static gf_boolean_t
+_is_xattr_in_watchlist (dict_t *d, char *k, data_t *v, void *tmp)
+{
+ const char *data = tmp;
+
+ if (!strncmp (k, tmp, strlen (k)))
+ return _gf_true;
+
+ return _gf_false;
+}
+
+static gf_boolean_t
+is_xattr_in_watchlist (dict_t *this, char *key, data_t *value, void *matchdata)
+{
+ int ret = -1;
+
+ //matchdata is a list of xattrs
+ //key is strncmp'ed with each xattr in matchdata.
+ //ret will be 0 if key pattern is not present in the matchdata
+ //else ret will be count number of xattrs the key pattern-matches with.
+ ret = dict_foreach_match (matchdata, _is_xattr_in_watchlist, key,
+ dict_null_foreach_fn, NULL);
+
+ if (ret > 0)
+ return _gf_true;
+ return _gf_false;
+}
+
+static int
+index_find_xattr_type (dict_t *d, char *k, data_t *v)
+{
+ int idx = -1;
+ index_priv_t *priv = THIS->private;
+
+ if (priv->dirty_watchlist && is_xattr_in_watchlist (d, k, v,
+ priv->dirty_watchlist))
+ idx = DIRTY;
+ else if (priv->pending_watchlist && is_xattr_in_watchlist (d, k, v,
+ priv->pending_watchlist))
+ idx = XATTROP;
+
+ return idx;
+}
+
+int
+index_fill_zero_array (dict_t *d, char *k, data_t *v, void *adata)
+{
+ int idx = -1;
+ int *zfilled = adata;
+ //zfilled array contains `state` for all types xattrs.
+ //state : whether the gfid file of this file exists in
+ //corresponding xattr directory or not.
+
+ idx = index_find_xattr_type (d, k, v);
+ if (idx == -1)
+ return 0;
+ zfilled[idx] = 0;
+ return 0;
+}
+
+static int
+_check_key_is_zero_filled (dict_t *d, char *k, data_t *v,
+ void *tmp)
+{
+ int *zfilled = tmp;
+ int idx = -1;
+
+ idx = index_find_xattr_type (d, k, v);
+ if (idx == -1)
+ return 0;
+
+ /* Along with checking that the value of a key is zero filled
+ * the key's corresponding index should be assigned
+ * appropriate value.
+ * zfilled[idx] will be 0(false) if value not zero.
+ * will be 1(true) if value is zero.
+ */
+ if (mem_0filled ((const char*)v->data, v->len)) {
+ zfilled[idx] = 0;
+ return 0;
+ }
+
+ /* If zfilled[idx] was previously 0, it means at least
+ * one xattr of its "kind" is non-zero. Keep its value
+ * the same.
+ */
+ if (zfilled[idx])
+ zfilled[idx] = 1;
+ return 0;
+}
+
+int
+index_entry_create (xlator_t *this, inode_t *inode, char *filename)
+{
+ int fd = 0;
+ int ret = -1;
+ int op_errno = 0;
+ char *subdir = NULL;
+ char pgfid_path[PATH_MAX] = {0};
+ char entry_path[PATH_MAX] = {0};
+ char entry_base_index_path[PATH_MAX] = {0};
+ uuid_t index = {0};
+ struct stat st = {0};
+ index_priv_t *priv = NULL;
+ index_inode_ctx_t *ctx = NULL;
+
+ priv = this->private;
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !gf_uuid_is_null (inode->gfid), out,
+ op_errno, EINVAL);
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name, filename, out, op_errno,
+ EINVAL);
+
+ ret = index_inode_ctx_get (inode, this, &ctx);
+ if (ret) {
+ op_errno = EINVAL;
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ INDEX_MSG_INODE_CTX_GET_SET_FAILED,
+ "Not able to get inode ctx for %s",
+ uuid_utoa (inode->gfid));
+ goto out;
+ }
+
+ make_gfid_path (priv->index_basepath, ENTRY_CHANGES_SUBDIR,
+ inode->gfid, pgfid_path, sizeof (pgfid_path));
+ subdir = index_get_subdir_from_type (ENTRY_CHANGES);
+
+ if (ctx->state[ENTRY_CHANGES] != IN) {
+ ret = sys_mkdir (pgfid_path, 0600);
+ if (ret != 0 && errno != EEXIST) {
+ op_errno = errno;
+ goto out;
+ }
+ ctx->state[ENTRY_CHANGES] = IN;
+ }
+
+ op_errno = 0;
+
+ snprintf (entry_path, sizeof(entry_path), "%s/%s", pgfid_path,
+ filename);
+ index_get_index (priv, index);
+ make_index_path (priv->index_basepath, ENTRY_CHANGES_SUBDIR, index,
+ entry_base_index_path, sizeof(entry_base_index_path));
+ ret = index_link_to_base (this, entry_base_index_path,
+ sizeof (entry_base_index_path),
+ entry_path, ENTRY_CHANGES_SUBDIR);
+out:
+ if (op_errno)
+ ret = -op_errno;
+ return ret;
+}
+
+int
+index_entry_delete (xlator_t *this, uuid_t pgfid, char *filename)
+{
+ int ret = 0;
+ int op_errno = 0;
+ char entry_base_index_path[PATH_MAX] = {0};
+ char pgfid_path[PATH_MAX] = {0};
+ char entry_path[PATH_MAX] = {0};
+ index_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name, !gf_uuid_is_null (pgfid),
+ out, op_errno, EINVAL);
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name, filename, out, op_errno,
+ EINVAL);
+
+ make_gfid_path (priv->index_basepath, ENTRY_CHANGES_SUBDIR, pgfid,
+ pgfid_path, sizeof (pgfid_path));
+ snprintf (entry_path, sizeof(entry_path), "%s/%s", pgfid_path,
+ filename);
+
+ ret = sys_unlink (entry_path);
+ if (ret && (errno != ENOENT)) {
+ op_errno = errno;
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ INDEX_MSG_INDEX_DEL_FAILED,
+ "%s: failed to delete from index/entry-changes",
+ entry_path);
+ }
+
+out:
+ return -op_errno;
+}
+
+int
+index_entry_action (xlator_t *this, inode_t *inode, dict_t *xdata, char *key)
+{
+ int ret = 0;
+ char *filename = NULL;
+ char *pargfid = NULL;
+
+ ret = dict_get_str (xdata, key, &filename);
+ if (ret != 0) {
+ ret = 0;
+ goto out;
+ }
+
+ if (strcmp (key, GF_XATTROP_ENTRY_IN_KEY) == 0)
+ ret = index_entry_create (this, inode, filename);
+ else if (strcmp (key, GF_XATTROP_ENTRY_OUT_KEY) == 0)
+ ret = index_entry_delete (this, inode->gfid, filename);
+
+out:
+ return ret;
+}
+
+void
+_index_action (xlator_t *this, inode_t *inode, int *zfilled)
+{
+ int ret = 0;
+ int i = 0;
+ index_inode_ctx_t *ctx = NULL;
+ char *subdir = NULL;
+
+ ret = index_inode_ctx_get (inode, this, &ctx);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ INDEX_MSG_INODE_CTX_GET_SET_FAILED, "Not able to get"
+ " inode context for %s.", uuid_utoa (inode->gfid));
+ goto out;
+ }
+
+ for (i = 0; i < XATTROP_TYPE_END; i++) {
+ subdir = index_get_subdir_from_type (i);
+ if (zfilled[i] == 1) {
+ if (ctx->state[i] == NOTIN)
+ continue;
+ ret = index_del (this, inode->gfid, subdir, i);
+ if (!ret)
+ ctx->state[i] = NOTIN;
+ } else if (zfilled[i] == 0){
+ if (ctx->state[i] == IN)
+ continue;
+ ret = index_add (this, inode->gfid, subdir, i);
+ if (!ret)
+ ctx->state[i] = IN;
+ }
+ }
+out:
+ return;
+}
+
+static void
+index_init_state (xlator_t *this, inode_t *inode, index_inode_ctx_t *ctx,
+ char *subdir)
+{
+ int ret = -1;
+ char pgfid_path[PATH_MAX] = {0};
+ struct stat st = {0};
+ index_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ make_gfid_path (priv->index_basepath, subdir, inode->gfid, pgfid_path,
+ sizeof (pgfid_path));
+
+ ret = sys_stat (pgfid_path, &st);
+ if (ret == 0)
+ ctx->state[ENTRY_CHANGES] = IN;
+ else if (ret != 0 && errno == ENOENT)
+ ctx->state[ENTRY_CHANGES] = NOTIN;
+
+ return;
+}
+
+void
+xattrop_index_action (xlator_t *this, index_local_t *local, dict_t *xattr,
+ dict_match_t match, void *match_data)
+{
+ int i = 0;
+ int ret = 0;
+ int zfilled[XATTROP_TYPE_END] = {0,};
+ int8_t value = 0;
+ char *subdir = NULL;
+ dict_t *req_xdata = NULL;
+ inode_t *inode = NULL;
+ gf_boolean_t zero_xattr = _gf_true;
+ index_inode_ctx_t *ctx = NULL;
+
+ inode = local->inode;
+ req_xdata = local->xdata;
+
+ memset (zfilled, -1, sizeof (zfilled));
+ ret = dict_foreach_match (xattr, match, match_data,
+ _check_key_is_zero_filled, zfilled);
+ _index_action (this, inode, zfilled);
+
+ if (req_xdata) {
+ ret = index_entry_action (this, inode, req_xdata,
+ GF_XATTROP_ENTRY_OUT_KEY);
+
+ ret = dict_get_int8 (req_xdata, GF_XATTROP_PURGE_INDEX, &value);
+ if ((ret) || (value == 0))
+ goto out;
+ }
+
+ if (zfilled[XATTROP] != 1)
+ goto out;
+
+ if (inode->ia_type != IA_IFDIR)
+ goto out;
+
+ subdir = index_get_subdir_from_type (ENTRY_CHANGES);
+ ret = index_inode_ctx_get (inode, this, &ctx);
+ if (ctx->state[ENTRY_CHANGES] == UNKNOWN)
+ index_init_state (this, inode, ctx, subdir);
+ if (ctx->state[ENTRY_CHANGES] == IN) {
+ ret = index_del (this, inode->gfid, subdir,
+ ENTRY_CHANGES);
+ ctx->state[ENTRY_CHANGES] = NOTIN;
+ }
+
+out:
+ return;
+}
+
+static gf_boolean_t
+index_xattrop_track (xlator_t *this, gf_xattrop_flags_t flags, dict_t *dict)
+{
+ index_priv_t *priv = this->private;
+
+ if (flags == GF_XATTROP_ADD_ARRAY)
+ return _gf_true;
+
+ if (flags != GF_XATTROP_ADD_ARRAY64)
+ return _gf_false;
+
+ if (!priv->pending_watchlist)
+ return _gf_false;
+
+ if (dict_foreach_match (dict, is_xattr_in_watchlist,
+ priv->pending_watchlist, dict_null_foreach_fn,
+ NULL) > 0)
+ return _gf_true;
+
+ return _gf_false;
+}
+
+int
+index_inode_path (xlator_t *this, inode_t *inode, char *dirpath, size_t len)
+{
+ char *subdir = NULL;
+ int ret = 0;
+ index_priv_t *priv = NULL;
+ index_inode_ctx_t *ictx = NULL;
+
+ priv = this->private;
+ if (!index_is_fop_on_internal_inode (this, inode, NULL)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ subdir = index_get_subdir_from_vgfid (priv, inode->gfid);
+ if (subdir) {
+ if (len <= strlen (priv->index_basepath) + 1 /*'/'*/ +
+ strlen (subdir)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ make_index_dir_path (priv->index_basepath, subdir,
+ dirpath, len);
+ } else {
+ ret = index_inode_ctx_get (inode, this, &ictx);
+ if (ret)
+ goto out;
+ if (gf_uuid_is_null (ictx->virtual_pargfid)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ make_index_dir_path (priv->index_basepath, ENTRY_CHANGES_SUBDIR,
+ dirpath, len);
+ if (len <= strlen (dirpath) + 1 /*'/'*/ + strlen (UUID0_STR)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ strcat (dirpath, "/");
+ strcat (dirpath, uuid_utoa (ictx->virtual_pargfid));
+ }
+out:
+ return ret;
+}
+
+int
+__index_fd_ctx_get (fd_t *fd, xlator_t *this, index_fd_ctx_t **ctx)
+{
+ int ret = 0;
+ index_fd_ctx_t *fctx = NULL;
+ index_inode_ctx_t *ictx = NULL;
+ uint64_t tmpctx = 0;
+ char dirpath[PATH_MAX] = {0};
+ index_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ ret = __fd_ctx_get (fd, this, &tmpctx);
+ if (!ret) {
+ fctx = (index_fd_ctx_t*) (long) tmpctx;
+ *ctx = fctx;
+ goto out;
+ }
+
+ ret = index_inode_path (this, fd->inode, dirpath, sizeof (dirpath));
+ if (ret)
+ goto out;
+
+ fctx = GF_CALLOC (1, sizeof (*fctx), gf_index_fd_ctx_t);
+ if (!fctx) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ fctx->dir = sys_opendir (dirpath);
+ if (!fctx->dir) {
+ ret = -errno;
+ GF_FREE (fctx);
+ fctx = NULL;
+ goto out;
+ }
+ fctx->dir_eof = -1;
+
+ ret = __fd_ctx_set (fd, this, (uint64_t)(long)fctx);
+ if (ret) {
+ (void) sys_closedir (fctx->dir);
+ GF_FREE (fctx);
+ fctx = NULL;
+ ret = -EINVAL;
+ goto out;
+ }
+ *ctx = fctx;
+out:
+ return ret;
+}
+
+int
+index_fd_ctx_get (fd_t *fd, xlator_t *this, index_fd_ctx_t **ctx)
+{
+ int ret = 0;
+ LOCK (&fd->lock);
+ {
+ ret = __index_fd_ctx_get (fd, this, ctx);
+ }
+ UNLOCK (&fd->lock);
+ return ret;
+}
+
+//new - Not NULL means start a fop
+//new - NULL means done processing the fop
+void
+index_queue_process (xlator_t *this, inode_t *inode, call_stub_t *new)
+{
+ call_stub_t *stub = NULL;
+ index_inode_ctx_t *ctx = NULL;
+ int ret = 0;
+ call_frame_t *frame = NULL;
+
+ LOCK (&inode->lock);
+ {
+ ret = __index_inode_ctx_get (inode, this, &ctx);
+ if (ret)
+ goto unlock;
+
+ if (new) {
+ __index_enqueue (&ctx->callstubs, new);
+ new = NULL;
+ } else {
+ ctx->processing = _gf_false;
+ }
+
+ if (!ctx->processing) {
+ stub = __index_dequeue (&ctx->callstubs);
+ if (stub)
+ ctx->processing = _gf_true;
+ else
+ ctx->processing = _gf_false;
+ }
+ }
+unlock:
+ UNLOCK (&inode->lock);
+
+ if (ret && new) {
+ frame = new->frame;
+ if (new->fop == GF_FOP_XATTROP) {
+ INDEX_STACK_UNWIND (xattrop, frame, -1, ENOMEM,
+ NULL, NULL);
+ } else if (new->fop == GF_FOP_FXATTROP) {
+ INDEX_STACK_UNWIND (fxattrop, frame, -1, ENOMEM,
+ NULL, NULL);
+ }
+ call_stub_destroy (new);
+ } else if (stub) {
+ call_resume (stub);
+ }
+ return;
+}
+
+static int
+xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata, dict_match_t match, dict_t *matchdata)
+{
+ inode_t *inode = NULL;
+ index_local_t *local = NULL;
+
+ local = frame->local;
+ inode = inode_ref (local->inode);
+
+ if (op_ret < 0)
+ goto out;
+
+ xattrop_index_action (this, local, xattr, match, matchdata);
+out:
+ INDEX_STACK_UNWIND (xattrop, frame, op_ret, op_errno, xattr, xdata);
+ index_queue_process (this, inode, NULL);
+ inode_unref (inode);
+
+ return 0;
+}
+
+int32_t
+index_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+ index_priv_t *priv = this->private;
+
+ xattrop_cbk (frame, cookie, this, op_ret, op_errno,
+ xattr, xdata, is_xattr_in_watchlist,
+ priv->complete_watchlist);
+ return 0;
+}
+
+int32_t
+index_xattrop64_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+ index_priv_t *priv = this->private;
+
+ return xattrop_cbk (frame, cookie, this, op_ret, op_errno, xattr, xdata,
+ is_xattr_in_watchlist, priv->pending_watchlist);
+}
+
+void
+index_xattrop_do (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr,
+ dict_t *xdata)
+{
+ int i = 0;
+ int ret = -1;
+ int zfilled[XATTROP_TYPE_END] = {0,};
+ index_priv_t *priv = NULL;
+ index_local_t *local = NULL;
+ fop_xattrop_cbk_t x_cbk = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (optype == GF_XATTROP_ADD_ARRAY)
+ x_cbk = index_xattrop_cbk;
+ else
+ x_cbk = index_xattrop64_cbk;
+
+ //In wind phase bring the gfid into index. This way if the brick crashes
+ //just after posix performs xattrop before _cbk reaches index xlator
+ //we will still have the gfid in index.
+ memset (zfilled, -1, sizeof (zfilled));
+
+ /* Foreach xattr, set corresponding index of zfilled to 1
+ * zfilled[index] = 1 implies the xattr's value is zero filled
+ * and should be added in its corresponding subdir.
+ *
+ * zfilled should be set to 1 only for those index that
+ * exist in xattr variable. This is to distinguish
+ * between different types of volumes.
+ * For e.g., if the check is not made,
+ * zfilled[DIRTY] is set to 1 for EC volumes,
+ * index file will be tried to create in indices/dirty dir
+ * which doesn't exist for an EC volume.
+ */
+ ret = dict_foreach (xattr, index_fill_zero_array, zfilled);
+
+ _index_action (this, local->inode, zfilled);
+ if (xdata)
+ ret = index_entry_action (this, local->inode, xdata,
+ GF_XATTROP_ENTRY_IN_KEY);
+ if (ret < 0) {
+ x_cbk (frame, NULL, this, -1, -ret, NULL, NULL);
+ return;
+ }
+
+ if (loc)
+ STACK_WIND (frame, x_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->xattrop,
+ loc, optype, xattr, xdata);
+ else
+ STACK_WIND (frame, x_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fxattrop, fd,
+ optype, xattr, xdata);
+}
+
+int
+index_xattrop_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ index_xattrop_do (frame, this, loc, NULL, optype, xattr, xdata);
+ return 0;
+}
+
+int
+index_fxattrop_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ index_xattrop_do (frame, this, NULL, fd, optype, xattr, xdata);
+ return 0;
+}
+
+int32_t
+index_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ index_local_t *local = NULL;
+
+ if (!index_xattrop_track (this, flags, dict))
+ goto out;
+
+ local = mem_get0 (this->local_pool);
+ if (!local)
+ goto err;
+
+ frame->local = local;
+ local->inode = inode_ref (loc->inode);
+ if (xdata)
+ local->xdata = dict_ref (xdata);
+ stub = fop_xattrop_stub (frame, index_xattrop_wrapper,
+ loc, flags, dict, xdata);
+
+err:
+ if ((!local) || (!stub)) {
+ INDEX_STACK_UNWIND (xattrop, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+
+ index_queue_process (this, loc->inode, stub);
+ return 0;
+out:
+ STACK_WIND (frame, default_xattrop_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->xattrop, loc, flags, dict, xdata);
+ return 0;
+}
+
+int32_t
+index_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ index_local_t *local = NULL;
+
+ if (!index_xattrop_track (this, flags, dict))
+ goto out;
+
+ local = mem_get0 (this->local_pool);
+ if (!local)
+ goto err;
+
+ frame->local = local;
+ local->inode = inode_ref (fd->inode);
+ if (xdata)
+ local->xdata = dict_ref (xdata);
+ stub = fop_fxattrop_stub (frame, index_fxattrop_wrapper,
+ fd, flags, dict, xdata);
+
+err:
+ if ((!local) || (!stub)) {
+ INDEX_STACK_UNWIND (fxattrop, frame, -1, ENOMEM, NULL, xdata);
+ return 0;
+ }
+
+ index_queue_process (this, fd->inode, stub);
+ return 0;
+out:
+ STACK_WIND (frame, default_fxattrop_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fxattrop, fd, flags, dict, xdata);
+ return 0;
+}
+
+uint64_t
+index_entry_count (xlator_t *this, char *subdir)
+{
+ uint64_t count = 0;
+ index_priv_t *priv = NULL;
+ DIR *dirp = NULL;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {{0,},};
+ char index_dir[PATH_MAX] = {0,};
+
+ priv = this->private;
+
+ make_index_dir_path (priv->index_basepath, subdir,
+ index_dir, sizeof (index_dir));
+
+ dirp = sys_opendir (index_dir);
+ if (!dirp)
+ return 0;
+
+ for (;;) {
+ errno = 0;
+ entry = sys_readdir (dirp, scratch);
+ if (!entry || errno != 0)
+ break;
+
+ if (strcmp (entry->d_name, ".") == 0 ||
+ strcmp (entry->d_name, "..") == 0)
+ continue;
+
+ if (!strncmp (entry->d_name, subdir, strlen (subdir)))
+ continue;
+
+ count++;
+ }
+
+ (void) sys_closedir (dirp);
+
+ return count;
+}
+
+int32_t
+index_getxattr_wrapper (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ index_priv_t *priv = NULL;
+ dict_t *xattr = NULL;
+ int ret = 0;
+ int vgfid_type = 0;
+ uint64_t count = 0;
+
+ priv = this->private;
+
+ xattr = dict_new ();
+ if (!xattr) {
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ vgfid_type = index_get_type_from_vgfid_xattr (name);
+ if (vgfid_type >= 0) {
+ ret = dict_set_static_bin (xattr, (char *)name,
+ priv->internal_vgfid[vgfid_type],
+ sizeof (priv->internal_vgfid[vgfid_type]));
+ if (ret) {
+ ret = -EINVAL;
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
+ INDEX_MSG_DICT_SET_FAILED, "xattrop index "
+ "gfid set failed");
+ goto done;
+ }
+ }
+
+ /* TODO: Need to check what kind of link-counts are needed for
+ * ENTRY-CHANGES before refactor of this block with array*/
+ if (strcmp (name, GF_XATTROP_INDEX_COUNT) == 0) {
+ count = index_entry_count (this, XATTROP_SUBDIR);
+
+ ret = dict_set_uint64 (xattr, (char *)name, count);
+ if (ret) {
+ ret = -EINVAL;
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
+ INDEX_MSG_DICT_SET_FAILED, "xattrop index "
+ "count set failed");
+ goto done;
+ }
+ } else if (strcmp (name, GF_XATTROP_DIRTY_COUNT) == 0) {
+ count = index_entry_count (this, DIRTY_SUBDIR);
+
+ ret = dict_set_uint64 (xattr, (char *)name, count);
+ if (ret) {
+ ret = -EINVAL;
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
+ INDEX_MSG_DICT_SET_FAILED, "dirty index "
+ "count set failed");
+ goto done;
+ }
+ }
+done:
+ if (ret)
+ STACK_UNWIND_STRICT (getxattr, frame, -1, -ret, xattr, NULL);
+ else
+ STACK_UNWIND_STRICT (getxattr, frame, 0, 0, xattr, NULL);
+
+ if (xattr)
+ dict_unref (xattr);
+
+ return 0;
+}
+
+static int
+index_save_pargfid_for_entry_changes (xlator_t *this, loc_t *loc, char *path)
+{
+ index_priv_t *priv = NULL;
+ index_inode_ctx_t *ctx = NULL;
+ int ret = 0;
+
+ priv = this->private;
+ if (gf_uuid_compare (loc->pargfid,
+ priv->internal_vgfid[ENTRY_CHANGES]))
+ return 0;
+
+ ret = index_inode_ctx_get (loc->inode, this, &ctx);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ INDEX_MSG_INODE_CTX_GET_SET_FAILED,
+ "Unable to get inode context for %s", path);
+ return -EINVAL;
+ }
+ ret = gf_uuid_parse (loc->name, ctx->virtual_pargfid);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ INDEX_MSG_INODE_CTX_GET_SET_FAILED, "Unable to store "
+ "virtual gfid in inode context for %s", path);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int32_t
+index_lookup_wrapper (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xattr_req)
+{
+ index_priv_t *priv = NULL;
+ struct stat lstatbuf = {0};
+ int ret = 0;
+ int32_t op_errno = EINVAL;
+ int32_t op_ret = -1;
+ uint64_t val = IA_INVAL;
+ char path[PATH_MAX] = {0};
+ struct iatt stbuf = {0, };
+ struct iatt postparent = {0,};
+ dict_t *xattr = NULL;
+ index_inode_ctx_t *ctx = NULL;
+ gf_boolean_t is_dir = _gf_false;
+ char *subdir = NULL;
+ loc_t iloc = {0};
+
+ priv = this->private;
+ loc_copy (&iloc, loc);
+
+ VALIDATE_OR_GOTO (loc, done);
+ if (index_is_fop_on_internal_inode (this, loc->parent, loc->pargfid)) {
+ subdir = index_get_subdir_from_vgfid (priv, loc->pargfid);
+ ret = index_inode_path (this, loc->parent, path, sizeof (path));
+ if (ret < 0) {
+ op_errno = -ret;
+ goto done;
+ }
+ strcat (path, "/");
+ strcat (path, (char *)loc->name);
+ } else if (index_is_virtual_gfid (priv, loc->gfid)) {
+ subdir = index_get_subdir_from_vgfid (priv, loc->gfid);
+ make_index_dir_path (priv->index_basepath, subdir,
+ path, sizeof (path));
+ is_dir = _gf_true;
+
+ if ((xattr_req) &&
+ (dict_get (xattr_req, GF_INDEX_IA_TYPE_GET_REQ))) {
+ if (0 == strcmp (subdir,
+ index_get_subdir_from_type(ENTRY_CHANGES)))
+ val = IA_IFDIR;
+ else
+ val = IA_IFREG;
+ }
+ } else {
+ if (!inode_is_linked (loc->inode)) {
+ inode_unref (iloc.inode);
+ iloc.inode = inode_find (loc->inode->table, loc->gfid);
+ }
+ ret = index_inode_path (this, iloc.inode, path,
+ sizeof (path));
+ if (ret < 0) {
+ op_errno = -ret;
+ goto done;
+ }
+ }
+ ret = sys_lstat (path, &lstatbuf);
+ if (ret) {
+ gf_msg_debug (this->name, errno, "Stat failed on %s dir ",
+ path);
+ op_errno = errno;
+ goto done;
+ } else if (!S_ISDIR (lstatbuf.st_mode) && is_dir) {
+ op_errno = ENOTDIR;
+ gf_msg_debug (this->name, op_errno, "Stat failed on %s dir, "
+ "not a directory", path);
+ goto done;
+ }
+ xattr = dict_new ();
+ if (!xattr) {
+ op_errno = ENOMEM;
+ goto done;
+ }
+
+ if (val != IA_INVAL) {
+ ret = dict_set_uint64 (xattr, GF_INDEX_IA_TYPE_GET_RSP, val);
+ if (ret) {
+ op_ret = -1;
+ op_errno = -ret;
+ goto done;
+ }
+ }
+
+ iatt_from_stat (&stbuf, &lstatbuf);
+ if (is_dir || inode_is_linked (iloc.inode))
+ loc_gfid (&iloc, stbuf.ia_gfid);
+ else
+ gf_uuid_generate (stbuf.ia_gfid);
+
+ ret = index_save_pargfid_for_entry_changes (this, &iloc, path);
+ if (ret) {
+ op_ret = -1;
+ op_errno = -ret;
+ goto done;
+ }
+
+ stbuf.ia_ino = -1;
+ op_ret = 0;
+done:
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno,
+ loc->inode, &stbuf, xattr, &postparent);
+ if (xattr)
+ dict_unref (xattr);
+ loc_wipe (&iloc);
+ return 0;
+}
+
+int
+index_get_gfid_type (void *opaque)
+{
+ gf_dirent_t *entry = NULL;
+ inode_t *inode = NULL;
+ xlator_t *this = THIS;
+ struct index_syncop_args *args = opaque;
+ loc_t loc = {0};
+ struct iatt iatt = {0};
+ int ret = 0;
+
+ list_for_each_entry (entry, &args->entries->list, list) {
+ if (strcmp (entry->d_name, ".") == 0 ||
+ strcmp (entry->d_name, "..") == 0)
+ continue;
+
+ loc_wipe (&loc);
+
+ entry->d_type = IA_INVAL;
+ if (gf_uuid_parse (entry->d_name, loc.gfid))
+ continue;
+
+ loc.inode = inode_find (args->parent->table, loc.gfid);
+ if (loc.inode) {
+ entry->d_type = loc.inode->ia_type;
+ continue;
+ }
+ loc.inode = inode_new (args->parent->table);
+ if (!loc.inode)
+ continue;
+ ret = syncop_lookup (FIRST_CHILD (this), &loc, &iatt, 0, 0, 0);
+ if (ret == 0)
+ entry->d_type = iatt.ia_type;
+ }
+ loc_wipe (&loc);
+
+ return 0;
+}
+
+int32_t
+index_readdir_wrapper (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t off, dict_t *xdata)
+{
+ index_fd_ctx_t *fctx = NULL;
+ index_priv_t *priv = NULL;
+ DIR *dir = NULL;
+ int ret = -1;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int count = 0;
+ gf_dirent_t entries;
+ struct index_syncop_args args = {0};
+
+ priv = this->private;
+ INIT_LIST_HEAD (&entries.list);
+
+ ret = index_fd_ctx_get (fd, this, &fctx);
+ if (ret < 0) {
+ op_errno = -ret;
+ gf_msg (this->name, GF_LOG_WARNING, op_errno,
+ INDEX_MSG_FD_OP_FAILED, "pfd is NULL, fd=%p", fd);
+ goto done;
+ }
+
+ dir = fctx->dir;
+ if (!dir) {
+ op_errno = EINVAL;
+ gf_msg (this->name, GF_LOG_WARNING, op_errno,
+ INDEX_MSG_INDEX_READDIR_FAILED,
+ "dir is NULL for fd=%p", fd);
+ goto done;
+ }
+
+ count = index_fill_readdir (fd, fctx, dir, off, size, &entries);
+
+ /* pick ENOENT to indicate EOF */
+ op_errno = errno;
+ op_ret = count;
+ if (index_is_virtual_gfid (priv, fd->inode->gfid) &&
+ xdata && dict_get (xdata, "get-gfid-type")) {
+ args.parent = fd->inode;
+ args.entries = &entries;
+ ret = synctask_new (this->ctx->env, index_get_gfid_type,
+ NULL, NULL, &args);
+ }
+done:
+ STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, &entries, NULL);
+ gf_dirent_free (&entries);
+ return 0;
+}
+
+int
+deletion_handler (const char *fpath, const struct stat *sb, int typeflag,
+ struct FTW *ftwbuf)
+{
+ int ret = -1;
+ ia_type_t type = IA_INVAL;
+
+ switch (sb->st_mode & S_IFMT) {
+ case S_IFREG:
+ sys_unlink (fpath);
+ break;
+
+ case S_IFDIR:
+ sys_rmdir (fpath);
+ break;
+ default:
+ type = ia_type_from_st_mode (sb->st_mode);
+ gf_msg (THIS->name, GF_LOG_WARNING, EINVAL,
+ INDEX_MSG_INVALID_ARGS,
+ "%s neither a regular file nor a directory - type:%s",
+ fpath, gf_inode_type_to_str (type));
+ break;
+ }
+ return 0;
+}
+
+static int
+index_wipe_index_subdir (void *opaque)
+{
+ struct index_syncop_args *args = opaque;
+
+ nftw (args->path, deletion_handler, 1, FTW_DEPTH | FTW_PHYS);
+ return 0;
+}
+
+static void
+index_get_parent_iatt (struct iatt *parent, char *path, loc_t *loc,
+ int32_t *op_ret, int32_t *op_errno)
+{
+ int ret = -1;
+ struct stat lstatbuf = {0,};
+
+ ret = sys_lstat (path, &lstatbuf);
+ if (ret < 0) {
+ *op_ret = -1;
+ *op_errno = errno;
+ return;
+ }
+
+ iatt_from_stat (parent, &lstatbuf);
+ gf_uuid_copy (parent->ia_gfid, loc->pargfid);
+ parent->ia_ino = -1;
+
+ return;
+}
+
+int
+index_rmdir_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, int flag,
+ dict_t *xdata)
+{
+ int ret = 0;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+ char *subdir = NULL;
+ char index_dir[PATH_MAX] = {0};
+ char index_subdir[PATH_MAX] = {0};
+ uuid_t gfid = {0};
+ struct iatt preparent = {0};
+ struct iatt postparent = {0};
+ index_priv_t *priv = NULL;
+ index_xattrop_type_t type = XATTROP_TYPE_UNSET;
+ struct index_syncop_args args = {0,};
+
+ priv = this->private;
+
+ type = index_get_type_from_vgfid (priv, loc->pargfid);
+ subdir = index_get_subdir_from_vgfid (priv, loc->pargfid);
+ make_index_dir_path (priv->index_basepath, subdir,
+ index_dir, sizeof (index_dir));
+
+ index_get_parent_iatt (&preparent, index_dir, loc, &op_ret, &op_errno);
+ if (op_ret < 0)
+ goto done;
+
+ gf_uuid_parse (loc->name, gfid);
+ make_gfid_path (priv->index_basepath, subdir, gfid, index_subdir,
+ sizeof (index_subdir));
+
+ if (flag == 0) {
+ ret = index_del (this, gfid, subdir, type);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = -ret;
+ goto done;
+ }
+ } else {
+ args.path = index_subdir;
+ ret = synctask_new (this->ctx->env, index_wipe_index_subdir,
+ NULL, NULL, &args);
+ }
+
+ index_get_parent_iatt (&postparent, index_dir, loc, &op_ret, &op_errno);
+ if (op_ret < 0)
+ goto done;
+
+done:
+ INDEX_STACK_UNWIND (rmdir, frame, op_ret, op_errno, &preparent,
+ &postparent, xdata);
+ return 0;
+}
+
+int
+index_unlink_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, int flag,
+ dict_t *xdata)
+{
+ index_priv_t *priv = NULL;
+ index_inode_ctx_t *ictx = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+ int ret = 0;
+ index_xattrop_type_t type = XATTROP_TYPE_UNSET;
+ struct iatt preparent = {0};
+ struct iatt postparent = {0};
+ char index_dir[PATH_MAX] = {0};
+ char filepath[PATH_MAX] = {0};
+ uuid_t gfid = {0};
+ char *subdir = NULL;
+
+ priv = this->private;
+ type = index_get_type_from_vgfid (priv, loc->pargfid);
+ ret = index_inode_path (this, loc->parent, index_dir,
+ sizeof (index_dir));
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = -ret;
+ goto done;
+ }
+
+ index_get_parent_iatt (&preparent, index_dir, loc, &op_ret, &op_errno);
+ if (op_ret < 0)
+ goto done;
+
+ if (type <= XATTROP_TYPE_UNSET) {
+ ret = index_inode_ctx_get (loc->parent, this, &ictx);
+ if ((ret == 0) && gf_uuid_is_null (ictx->virtual_pargfid)) {
+ ret = -EINVAL;
+ }
+ if (ret == 0) {
+ ret = index_entry_delete (this, ictx->virtual_pargfid,
+ (char *)loc->name);
+ }
+ } else if (type == ENTRY_CHANGES) {
+ make_file_path (priv->index_basepath, ENTRY_CHANGES_SUBDIR,
+ (char *)loc->name, filepath, sizeof (filepath));
+ ret = sys_unlink (filepath);
+ } else {
+ subdir = index_get_subdir_from_type (type);
+ gf_uuid_parse (loc->name, gfid);
+ ret = index_del (this, gfid, subdir, type);
+ }
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = -ret;
+ goto done;
+ }
+
+ index_get_parent_iatt (&postparent, index_dir, loc, &op_ret, &op_errno);
+ if (op_ret < 0)
+ goto done;
+done:
+ INDEX_STACK_UNWIND (unlink, frame, op_ret, op_errno, &preparent,
+ &postparent, xdata);
+ return 0;
+}
+
+int32_t
+index_getxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ index_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ if (!name || (!index_is_vgfid_xattr (name) &&
+ strcmp (GF_XATTROP_INDEX_COUNT, name) &&
+ strcmp (GF_XATTROP_DIRTY_COUNT, name)))
+ goto out;
+
+ stub = fop_getxattr_stub (frame, index_getxattr_wrapper, loc, name,
+ xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (getxattr, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+ worker_enqueue (this, stub);
+ return 0;
+out:
+ STACK_WIND (frame, default_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
+ return 0;
+}
+
+int64_t
+index_fetch_link_count (xlator_t *this, index_xattrop_type_t type)
+{
+ index_priv_t *priv = this->private;
+ char *subdir = NULL;
+ struct stat lstatbuf = {0,};
+ int ret = -1;
+ int64_t count = -1;
+ DIR *dirp = NULL;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {{0,},};
+ char index_dir[PATH_MAX] = {0,};
+ char index_path[PATH_MAX] = {0,};
+
+ subdir = index_get_subdir_from_type (type);
+ make_index_dir_path (priv->index_basepath, subdir,
+ index_dir, sizeof (index_dir));
+
+ dirp = sys_opendir (index_dir);
+ if (!dirp)
+ goto out;
+
+ for (;;) {
+ errno = 0;
+ entry = sys_readdir (dirp, scratch);
+ if (!entry || errno != 0) {
+ if (count == -1)
+ count = 0;
+ goto out;
+ }
+
+ if (strcmp (entry->d_name, ".") == 0 ||
+ strcmp (entry->d_name, "..") == 0)
+ continue;
+
+ make_file_path (priv->index_basepath, subdir,
+ entry->d_name, index_path, sizeof(index_path));
+
+ ret = sys_lstat (index_path, &lstatbuf);
+ if (ret < 0) {
+ count = -2;
+ continue;
+ } else {
+ count = lstatbuf.st_nlink - 1;
+ if (count == 0)
+ continue;
+ else
+ break;
+ }
+ }
+out:
+ if (dirp)
+ (void) sys_closedir (dirp);
+ return count;
+}
+
+dict_t*
+index_fill_link_count (xlator_t *this, dict_t *xdata)
+{
+ int ret = -1;
+ index_priv_t *priv = NULL;
+ int64_t count = -1;
+
+ priv = this->private;
+ xdata = (xdata) ? dict_ref (xdata) : dict_new ();
+ if (!xdata)
+ goto out;
+
+ index_get_link_count (priv, &count, XATTROP);
+ if (count < 0) {
+ count = index_fetch_link_count (this, XATTROP);
+ index_set_link_count (priv, count, XATTROP);
+ }
+
+ if (count == 0) {
+ ret = dict_set_int8 (xdata, "link-count", 0);
+ if (ret < 0)
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ INDEX_MSG_DICT_SET_FAILED,
+ "Unable to set link-count");
+ } else {
+ ret = dict_set_int8 (xdata, "link-count", 1);
+ if (ret < 0)
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ INDEX_MSG_DICT_SET_FAILED,
+ "Unable to set link-count");
+ }
+
+out:
+ return xdata;
+}
+
+int32_t
+index_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+
+ xdata = index_fill_link_count (this, xdata);
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf,
+ xdata, postparent);
+ if (xdata)
+ dict_unref (xdata);
+ return 0;
+}
+
+int32_t
+index_lookup (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xattr_req)
+{
+ inode_t *inode = NULL;
+ call_stub_t *stub = NULL;
+ index_priv_t *priv = NULL;
+ char *flag = NULL;
+ int ret = -1;
+
+ priv = this->private;
+
+ if (!index_is_fop_on_internal_inode (this, loc->parent, loc->pargfid) &&
+ !index_is_fop_on_internal_inode (this, loc->inode, loc->gfid)) {
+ if (!inode_is_linked (loc->inode)) {
+ inode = inode_find (loc->inode->table, loc->gfid);
+ if (!index_is_fop_on_internal_inode (this, inode,
+ loc->gfid)) {
+ inode_unref (inode);
+ goto normal;
+ }
+ inode_unref (inode);
+ } else {
+ goto normal;
+ }
+ }
+
+ stub = fop_lookup_stub (frame, index_lookup_wrapper, loc, xattr_req);
+ if (!stub) {
+ STACK_UNWIND_STRICT (lookup, frame, -1, ENOMEM, loc->inode,
+ NULL, NULL, NULL);
+ return 0;
+ }
+ worker_enqueue (this, stub);
+ return 0;
+normal:
+ ret = dict_get_str (xattr_req, "link-count", &flag);
+ if ((ret == 0) && (strcmp (flag, GF_XATTROP_INDEX_COUNT) == 0)) {
+ STACK_WIND (frame, index_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
+ } else {
+ STACK_WIND (frame, default_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
+ }
+
+ return 0;
+}
+
+int32_t
+index_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ xdata = index_fill_link_count (this, xdata);
+ STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf, xdata);
+ if (xdata)
+ dict_unref (xdata);
+ return 0;
+}
+
+int32_t
+index_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ int ret = -1;
+ char *flag = NULL;
+
+ ret = dict_get_str (xdata, "link-count", &flag);
+ if ((ret == 0) && (strcmp (flag, GF_XATTROP_INDEX_COUNT) == 0)) {
+ STACK_WIND (frame, index_fstat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+ } else {
+ STACK_WIND (frame, default_fstat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+ }
+
+ return 0;
+}
+
+int32_t
+index_opendir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, fd_t *fd, dict_t *xdata)
+{
+ index_priv_t *priv = NULL;
+
+ priv = this->private;
+ if (!index_is_fop_on_internal_inode (this, fd->inode, NULL))
+ goto normal;
+
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (opendir, frame, 0, 0, fd, NULL);
+ return 0;
+
+normal:
+ STACK_WIND (frame, default_opendir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);
+ return 0;
+}
+
+int32_t
+index_readdir (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t off, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ index_priv_t *priv = NULL;
+
+ priv = this->private;
+ if (!index_is_fop_on_internal_inode (this, fd->inode, NULL))
+ goto out;
+
+ stub = fop_readdir_stub (frame, index_readdir_wrapper, fd, size, off,
+ xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (readdir, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+ worker_enqueue (this, stub);
+ return 0;
+out:
+ STACK_WIND (frame, default_readdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdir, fd, size, off, xdata);
+ return 0;
+}
+
+int
+index_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ index_priv_t *priv = NULL;
+
+ priv = this->private;
+ if (!index_is_fop_on_internal_inode (this, loc->parent, NULL))
+ goto out;
+
+ stub = fop_unlink_stub (frame, index_unlink_wrapper, loc, xflag, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (unlink, frame, -1, ENOMEM, NULL, NULL,
+ NULL);
+ return 0;
+ }
+ worker_enqueue (this, stub);
+ return 0;
+out:
+ STACK_WIND (frame, default_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ return 0;
+}
+
+int
+index_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ index_priv_t *priv = NULL;
+
+ priv = this->private;
+ if (!index_is_fop_on_internal_inode (this, loc->parent, NULL))
+ goto out;
+
+ stub = fop_rmdir_stub (frame, index_rmdir_wrapper, loc, flags, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (rmdir, frame, -1, ENOMEM, NULL, NULL,
+ NULL);
+ return 0;
+ }
+ worker_enqueue (this, stub);
+ return 0;
+out:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir, loc, flags, xdata);
+ return 0;
+}
+
+int
+index_make_xattrop_watchlist (xlator_t *this, index_priv_t *priv,
+ char *watchlist, index_xattrop_type_t type)
+{
+ char *delim = NULL;
+ char *dup_watchlist = NULL;
+ char *key = NULL;
+ char *saveptr = NULL;
+ dict_t *xattrs = NULL;
+ data_t *dummy = NULL;
+ int ret = 0;
+
+ if (!watchlist)
+ return 0;
+
+ dup_watchlist = gf_strdup (watchlist);
+ if (!dup_watchlist)
+ return -1;
+
+ xattrs = dict_new ();
+ if (!xattrs) {
+ ret = -1;
+ goto out;
+ }
+
+ dummy = int_to_data (1);
+ if (!dummy) {
+ ret = -1;
+ goto out;
+ }
+
+ data_ref (dummy);
+
+ delim = ",";
+ key = strtok_r (dup_watchlist, delim, &saveptr);
+ while (key) {
+ if (strlen (key) == 0) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set (xattrs, key, dummy);
+ if (ret)
+ goto out;
+
+ key = strtok_r (NULL, delim, &saveptr);
+ }
+
+ switch (type) {
+ case DIRTY:
+ priv->dirty_watchlist = xattrs;
+ break;
+ case XATTROP:
+ priv->pending_watchlist = xattrs;
+ break;
+ default:
+ break;
+ }
+ xattrs = NULL;
+
+ ret = 0;
+out:
+ if (xattrs)
+ dict_unref (xattrs);
+
+ GF_FREE (dup_watchlist);
+
+ if (dummy)
+ data_unref (dummy);
+
+ return ret;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ ret = xlator_mem_acct_init (this, gf_index_mt_end + 1);
+
+ return ret;
+}
+
+int
+init (xlator_t *this)
+{
+ int i = 0;
+ int ret = -1;
+ int64_t count = -1;
+ index_priv_t *priv = NULL;
+ pthread_t thread;
+ pthread_attr_t w_attr;
+ gf_boolean_t mutex_inited = _gf_false;
+ gf_boolean_t cond_inited = _gf_false;
+ gf_boolean_t attr_inited = _gf_false;
+ char *watchlist = NULL;
+ char *dirtylist = NULL;
+ char *pendinglist = NULL;
+
+ if (!this->children || this->children->next) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ INDEX_MSG_INVALID_GRAPH,
+ "'index' not configured with exactly one child");
+ goto out;
+ }
+
+ if (!this->parents) {
+ gf_msg (this->name, GF_LOG_WARNING, EINVAL,
+ INDEX_MSG_INVALID_GRAPH,
+ "dangling volume. check volfile ");
+ }
+
+ priv = GF_CALLOC (1, sizeof (*priv), gf_index_mt_priv_t);
+ if (!priv)
+ goto out;
+
+ LOCK_INIT (&priv->lock);
+ if ((ret = pthread_cond_init(&priv->cond, NULL)) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, ret,
+ INDEX_MSG_INVALID_ARGS,
+ "pthread_cond_init failed");
+ goto out;
+ }
+ cond_inited = _gf_true;
+
+ if ((ret = pthread_mutex_init(&priv->mutex, NULL)) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, ret,
+ INDEX_MSG_INVALID_ARGS,
+ "pthread_mutex_init failed");
+ goto out;
+ }
+ mutex_inited = _gf_true;
+
+ if ((ret = pthread_attr_init (&w_attr)) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, ret,
+ INDEX_MSG_INVALID_ARGS,
+ "pthread_attr_init failed");
+ goto out;
+ }
+ attr_inited = _gf_true;
+
+ ret = pthread_attr_setstacksize (&w_attr, INDEX_THREAD_STACK_SIZE);
+ if (ret == EINVAL) {
+ gf_msg (this->name, GF_LOG_WARNING, ret,
+ INDEX_MSG_INVALID_ARGS,
+ "Using default thread stack size");
+ }
+
+ GF_OPTION_INIT ("index-base", priv->index_basepath, path, out);
+
+ GF_OPTION_INIT ("xattrop64-watchlist", watchlist, str, out);
+ ret = index_make_xattrop_watchlist (this, priv, watchlist,
+ XATTROP);
+ if (ret)
+ goto out;
+
+ GF_OPTION_INIT ("xattrop-dirty-watchlist", dirtylist, str, out);
+ ret = index_make_xattrop_watchlist (this, priv, dirtylist,
+ DIRTY);
+ if (ret)
+ goto out;
+
+ GF_OPTION_INIT ("xattrop-pending-watchlist", pendinglist, str, out);
+ ret = index_make_xattrop_watchlist (this, priv, pendinglist,
+ XATTROP);
+ if (ret)
+ goto out;
+
+ if (priv->dirty_watchlist)
+ priv->complete_watchlist = dict_copy_with_ref (priv->dirty_watchlist,
+ priv->complete_watchlist);
+ if (priv->pending_watchlist)
+ priv->complete_watchlist = dict_copy_with_ref (priv->pending_watchlist,
+ priv->complete_watchlist);
+
+ gf_uuid_generate (priv->index);
+ for (i = 0; i < XATTROP_TYPE_END; i++)
+ gf_uuid_generate (priv->internal_vgfid[i]);
+
+ INIT_LIST_HEAD (&priv->callstubs);
+
+ this->local_pool = mem_pool_new (index_local_t, 64);
+ if (!this->local_pool) {
+ ret = -1;
+ goto out;
+ }
+
+ this->private = priv;
+
+ ret = index_dir_create (this, XATTROP_SUBDIR);
+ if (ret < 0)
+ goto out;
+
+ if (priv->dirty_watchlist) {
+ ret = index_dir_create (this, DIRTY_SUBDIR);
+ if (ret < 0)
+ goto out;
+ }
+
+ ret = index_dir_create (this, ENTRY_CHANGES_SUBDIR);
+ if (ret < 0)
+ goto out;
+
+ /*init indices files counts*/
+ count = index_fetch_link_count (this, XATTROP);
+ index_set_link_count (priv, count, XATTROP);
+
+ ret = gf_thread_create (&thread, &w_attr, index_worker, this);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, ret,
+ INDEX_MSG_WORKER_THREAD_CREATE_FAILED,
+ "Failed to create worker thread, aborting");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret) {
+ if (cond_inited)
+ pthread_cond_destroy (&priv->cond);
+ if (mutex_inited)
+ pthread_mutex_destroy (&priv->mutex);
+ if (priv && priv->dirty_watchlist)
+ dict_unref (priv->dirty_watchlist);
+ if (priv && priv->pending_watchlist)
+ dict_unref (priv->pending_watchlist);
+ if (priv && priv->complete_watchlist)
+ dict_unref (priv->complete_watchlist);
+ if (priv)
+ GF_FREE (priv);
+ this->private = NULL;
+ mem_pool_destroy (this->local_pool);
+ }
+
+ if (attr_inited)
+ pthread_attr_destroy (&w_attr);
+ return ret;
+}
+
+void
+fini (xlator_t *this)
+{
+ index_priv_t *priv = NULL;
+
+ priv = this->private;
+ if (!priv)
+ goto out;
+ this->private = NULL;
+ LOCK_DESTROY (&priv->lock);
+ pthread_cond_destroy (&priv->cond);
+ pthread_mutex_destroy (&priv->mutex);
+ if (priv->dirty_watchlist)
+ dict_unref (priv->dirty_watchlist);
+ if (priv->pending_watchlist)
+ dict_unref (priv->pending_watchlist);
+ if (priv->complete_watchlist)
+ dict_unref (priv->complete_watchlist);
+ GF_FREE (priv);
+ mem_pool_destroy (this->local_pool);
+ this->local_pool = NULL;
+out:
+ return;
+}
+
+int
+index_forget (xlator_t *this, inode_t *inode)
+{
+ uint64_t tmp_cache = 0;
+ if (!inode_ctx_del (inode, this, &tmp_cache))
+ GF_FREE ((index_inode_ctx_t*) (long)tmp_cache);
+
+ return 0;
+}
+
+int32_t
+index_releasedir (xlator_t *this, fd_t *fd)
+{
+ index_fd_ctx_t *fctx = NULL;
+ uint64_t ctx = 0;
+ int ret = 0;
+
+ ret = fd_ctx_del (fd, this, &ctx);
+ if (ret < 0)
+ goto out;
+
+ fctx = (index_fd_ctx_t*) (long) ctx;
+ if (fctx->dir) {
+ ret = sys_closedir (fctx->dir);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ INDEX_MSG_FD_OP_FAILED,
+ "closedir error");
+ }
+
+ GF_FREE (fctx);
+out:
+ return 0;
+}
+
+int32_t
+index_release (xlator_t *this, fd_t *fd)
+{
+ index_fd_ctx_t *fctx = NULL;
+ uint64_t ctx = 0;
+ int ret = 0;
+
+ ret = fd_ctx_del (fd, this, &ctx);
+ if (ret < 0)
+ goto out;
+
+ fctx = (index_fd_ctx_t*) (long) ctx;
+ GF_FREE (fctx);
+out:
+ return 0;
+}
+
+int
+notify (xlator_t *this, int event, void *data, ...)
+{
+ int ret = 0;
+ ret = default_notify (this, event, data);
+ return ret;
+}
+
+struct xlator_fops fops = {
+ .xattrop = index_xattrop,
+ .fxattrop = index_fxattrop,
+
+ //interface functions follow
+ .getxattr = index_getxattr,
+ .lookup = index_lookup,
+ .opendir = index_opendir,
+ .readdir = index_readdir,
+ .unlink = index_unlink,
+ .rmdir = index_rmdir,
+ .fstat = index_fstat,
+};
+
+struct xlator_dumpops dumpops;
+
+struct xlator_cbks cbks = {
+ .forget = index_forget,
+ .release = index_release,
+ .releasedir = index_releasedir
+};
+
+struct volume_options options[] = {
+ { .key = {"index-base" },
+ .type = GF_OPTION_TYPE_PATH,
+ .description = "path where the index files need to be stored",
+ },
+ { .key = {"xattrop64-watchlist" },
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Comma separated list of xattrs that are watched",
+ },
+ { .key = {"xattrop-dirty-watchlist" },
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Comma separated list of xattrs that are watched",
+ },
+ { .key = {"xattrop-pending-watchlist" },
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Comma separated list of xattrs that are watched",
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/features/index/src/index.h b/xlators/features/index/src/index.h
new file mode 100644
index 00000000000..5fb5a65cd8e
--- /dev/null
+++ b/xlators/features/index/src/index.h
@@ -0,0 +1,86 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __INDEX_H__
+#define __INDEX_H__
+
+#include "xlator.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "byte-order.h"
+#include "common-utils.h"
+#include "index-mem-types.h"
+
+#define INDEX_THREAD_STACK_SIZE ((size_t)(1024*1024))
+
+typedef enum {
+ UNKNOWN,
+ IN,
+ NOTIN
+} index_state_t;
+
+typedef enum {
+ XATTROP_TYPE_UNSET = -1,
+ XATTROP,
+ DIRTY,
+ ENTRY_CHANGES,
+ XATTROP_TYPE_END
+} index_xattrop_type_t;
+
+typedef struct index_inode_ctx {
+ gf_boolean_t processing;
+ struct list_head callstubs;
+ int state[XATTROP_TYPE_END];
+ uuid_t virtual_pargfid; /* virtual gfid of dir under
+ .glusterfs/indices/entry-changes. */
+} index_inode_ctx_t;
+
+typedef struct index_fd_ctx {
+ DIR *dir;
+ off_t dir_eof;
+} index_fd_ctx_t;
+
+typedef struct index_priv {
+ char *index_basepath;
+ char *dirty_basepath;
+ uuid_t index;
+ gf_lock_t lock;
+ uuid_t internal_vgfid[XATTROP_TYPE_END];
+ struct list_head callstubs;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+ dict_t *dirty_watchlist;
+ dict_t *pending_watchlist;
+ dict_t *complete_watchlist;
+ int64_t pending_count;
+} index_priv_t;
+
+typedef struct index_local {
+ inode_t *inode;
+ dict_t *xdata;
+} index_local_t;
+
+#define INDEX_STACK_UNWIND(fop, frame, params ...) \
+do { \
+ index_local_t *__local = NULL; \
+ if (frame) { \
+ __local = frame->local; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT (fop, frame, params); \
+ if (__local) { \
+ inode_unref (__local->inode); \
+ if (__local->xdata) \
+ dict_unref (__local->xdata); \
+ mem_put (__local); \
+ } \
+} while (0)
+
+#endif
diff --git a/xlators/features/leases/Makefile.am b/xlators/features/leases/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/features/leases/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/leases/src/Makefile.am b/xlators/features/leases/src/Makefile.am
new file mode 100644
index 00000000000..343f5c82425
--- /dev/null
+++ b/xlators/features/leases/src/Makefile.am
@@ -0,0 +1,17 @@
+xlator_LTLIBRARIES = leases.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+leases_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+leases_la_SOURCES = leases.c leases-internal.c
+
+leases_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = leases.h leases-mem-types.h leases-messages.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(CONTRIBDIR)/timer-wheel
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/leases/src/leases-internal.c b/xlators/features/leases/src/leases-internal.c
new file mode 100644
index 00000000000..6884b581273
--- /dev/null
+++ b/xlators/features/leases/src/leases-internal.c
@@ -0,0 +1,1351 @@
+/*
+ Copyright (c) 2015-2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "leases.h"
+
+
+/* Mutex locks used in this xlator and their order of acquisition:
+ * Check lease conflict:
+ * lease_ctx lock
+ * add_timer => internal timer locks
+ * lease_ctx unlock
+ *
+ * Add/remove lease:
+ * lease_ctx lock
+ * add_timer => internal timer locks
+ * OR
+ * priv lock => Adding/removing to/from the cleanup client list
+ * priv unlock
+ * lease_ctx unlock
+ *
+ * Timer thread:
+ * Timer internal lock
+ * priv lock => By timer handler
+ * priv unlock
+ * Timer internal unlock
+ *
+ * Expired recall cleanup thread:
+ * priv lock
+ * priv condwait
+ * priv unlock
+ * lease_ctx lock
+ * priv lock
+ * priv unlock
+ * lease_ctx unlock
+ */
+
+/*
+ * Check if lease_lk is enabled
+ * Return Value:
+ * _gf_true - lease lock option enabled
+ * _gf_false - lease lock option disabled
+ */
+gf_boolean_t
+is_leases_enabled (xlator_t *this)
+{
+ leases_private_t *priv = NULL;
+ gf_boolean_t is_enabled = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("leases", this, out);
+
+ if (this->private) {
+ priv = (leases_private_t *)this->private;
+ is_enabled = priv->leases_enabled;
+ }
+out:
+ return is_enabled;
+}
+
+
+/*
+ * Get the recall_leaselk_timeout
+ * Return Value:
+ * timeout value(in seconds) set as an option to this xlator.
+ * -1 error case
+ */
+int32_t
+get_recall_lease_timeout (xlator_t *this)
+{
+ leases_private_t *priv = NULL;
+ int32_t timeout = -1;
+
+ GF_VALIDATE_OR_GOTO ("leases", this, out);
+
+ if (this->private) {
+ priv = (leases_private_t *)this->private;
+ timeout = priv->recall_lease_timeout;
+ }
+out:
+ return timeout;
+}
+
+
+static void
+__dump_leases_info (xlator_t *this, lease_inode_ctx_t *lease_ctx)
+{
+ lease_id_entry_t *lease_entry = NULL;
+ lease_id_entry_t *tmp = NULL;
+
+ GF_VALIDATE_OR_GOTO ("leases", this, out);
+ GF_VALIDATE_OR_GOTO ("leases", lease_ctx, out);
+
+ gf_msg_debug (this->name, 0, "Lease held on this inode, lease_type: %d,"
+ " lease_cnt:%"PRIu64", RD lease:%d, RW lease:%d, "
+ "openfd cnt:%"PRIu64, lease_ctx->lease_type,
+ lease_ctx->lease_cnt,
+ lease_ctx->lease_type_cnt[GF_RD_LEASE],
+ lease_ctx->lease_type_cnt[GF_RW_LEASE],
+ lease_ctx->openfd_cnt);
+
+ list_for_each_entry_safe (lease_entry, tmp,
+ &lease_ctx->lease_id_list,
+ lease_id_list) {
+ gf_msg_debug (this->name, 0, "Leases held by client: %s, lease "
+ "ID:%s, RD lease:%d, RW lease:%d, lease_type: %d, "
+ "lease_cnt:%"PRIu64, lease_entry->client_uid,
+ lease_entry->lease_id,
+ lease_entry->lease_type_cnt[GF_RD_LEASE],
+ lease_entry->lease_type_cnt[GF_RW_LEASE],
+ lease_entry->lease_type, lease_entry->lease_cnt);
+ }
+out:
+ return;
+}
+
+
+static int
+__lease_ctx_set (inode_t *inode, xlator_t *this)
+{
+ lease_inode_ctx_t *inode_ctx = NULL;
+ int ret = -1;
+ uint64_t ctx = 0;
+
+ GF_VALIDATE_OR_GOTO ("leases", inode, out);
+ GF_VALIDATE_OR_GOTO ("leases", this, out);
+
+ ret = __inode_ctx_get (inode, this, &ctx);
+ if (!ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, LEASE_MSG_INVAL_INODE_CTX,
+ "inode_ctx_get failed");
+ goto out;
+ }
+
+ inode_ctx = GF_CALLOC (1, sizeof (*inode_ctx),
+ gf_leases_mt_lease_inode_ctx_t);
+ GF_CHECK_ALLOC (inode_ctx, ret, out);
+
+ pthread_mutex_init (&inode_ctx->lock, NULL);
+ INIT_LIST_HEAD (&inode_ctx->lease_id_list);
+ INIT_LIST_HEAD (&inode_ctx->blocked_list);
+
+ inode_ctx->lease_cnt = 0;
+
+ ret = __inode_ctx_set (inode, this, (uint64_t *) inode_ctx);
+ if (ret) {
+ GF_FREE (inode_ctx);
+ gf_msg (this->name, GF_LOG_INFO, 0, LEASE_MSG_INVAL_INODE_CTX,
+ "failed to set inode ctx (%p)", inode);
+ }
+out:
+ return ret;
+}
+
+
+static lease_inode_ctx_t *
+__lease_ctx_get (inode_t *inode, xlator_t *this)
+{
+ lease_inode_ctx_t *inode_ctx = NULL;
+ uint64_t ctx = 0;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO ("leases", inode, out);
+ GF_VALIDATE_OR_GOTO ("leases", this, out);
+
+ ret = __inode_ctx_get (inode, this, &ctx);
+ if (ret < 0) {
+ ret = __lease_ctx_set (inode, this);
+ if (ret < 0)
+ goto out;
+
+ ret = __inode_ctx_get (inode, this, &ctx);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, LEASE_MSG_INVAL_INODE_CTX,
+ "failed to get inode ctx (%p)", inode);
+ goto out;
+ }
+ }
+
+ inode_ctx = (lease_inode_ctx_t *)(long) ctx;
+out:
+ return inode_ctx;
+}
+
+
+lease_inode_ctx_t *
+lease_ctx_get (inode_t *inode, xlator_t *this)
+{
+ lease_inode_ctx_t *inode_ctx = NULL;
+
+ GF_VALIDATE_OR_GOTO ("leases", inode, out);
+ GF_VALIDATE_OR_GOTO ("leases", this, out);
+
+ LOCK (&inode->lock);
+ {
+ inode_ctx = __lease_ctx_get (inode, this);
+ }
+ UNLOCK (&inode->lock);
+out:
+ return inode_ctx;
+}
+
+
+static lease_id_entry_t *
+new_lease_id_entry (call_frame_t *frame, const char *lease_id)
+{
+ lease_id_entry_t *lease_entry = NULL;
+
+ GF_VALIDATE_OR_GOTO ("leases", frame, out);
+ GF_VALIDATE_OR_GOTO ("leases", lease_id, out);
+
+ lease_entry = GF_CALLOC (1, sizeof (*lease_entry),
+ gf_leases_mt_lease_id_entry_t);
+ if (!lease_entry) {
+ gf_msg (frame->this->name, GF_LOG_ERROR, ENOMEM, LEASE_MSG_NO_MEM,
+ "Memory allocation for lease_entry failed");
+ return NULL;
+ }
+
+ INIT_LIST_HEAD (&lease_entry->lease_id_list);
+ lease_entry->lease_type = NONE;
+ lease_entry->lease_cnt = 0;
+ lease_entry->recall_time =
+ get_recall_lease_timeout (frame->this);
+ lease_entry->client_uid = gf_strdup (frame->root->client->client_uid);
+ if (!lease_entry->client_uid) {
+ gf_msg (frame->this->name, GF_LOG_ERROR, ENOMEM, LEASE_MSG_NO_MEM,
+ "Memory allocation for client_uid failed");
+ GF_FREE (lease_entry);
+ lease_entry = NULL;
+ goto out;
+ }
+
+ memcpy (lease_entry->lease_id, lease_id, LEASE_ID_SIZE);
+out:
+ return lease_entry;
+}
+
+
+static void
+__destroy_lease_id_entry (lease_id_entry_t *lease_entry)
+{
+ GF_VALIDATE_OR_GOTO ("leases", lease_entry, out);
+
+ list_del_init (&lease_entry->lease_id_list);
+ GF_FREE (lease_entry->client_uid);
+ GF_FREE (lease_entry);
+out:
+ return;
+}
+
+
+static inline gf_boolean_t
+__is_same_lease_id (const char *k1, const char *k2)
+{
+ if (memcmp(k1, k2, LEASE_ID_SIZE) == 0)
+ return _gf_true;
+
+ return _gf_false;
+}
+
+
+/* Checks if there are any leases, other than the leases taken
+ * by the given lease_id
+ */
+static gf_boolean_t
+__another_lease_found (lease_inode_ctx_t *lease_ctx, const char *lease_id)
+{
+ lease_id_entry_t *lease_entry = NULL;
+ lease_id_entry_t *tmp = NULL;
+ gf_boolean_t found_lease = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("leases", lease_id, out);
+ GF_VALIDATE_OR_GOTO ("leases", lease_ctx, out);
+
+ list_for_each_entry_safe (lease_entry, tmp,
+ &lease_ctx->lease_id_list,
+ lease_id_list) {
+
+ if (!__is_same_lease_id (lease_id, lease_entry->lease_id)) {
+ if (lease_entry->lease_cnt > 0) {
+ found_lease = _gf_true;
+ break;
+ }
+ }
+ }
+out:
+ return found_lease;
+}
+
+
+/* Returns the lease_id_entry for a given lease_id and a given inode.
+ * Return values:
+ * NULL - If no client entry found
+ * lease_id_entry_t* - a pointer to the client entry if found
+ */
+static lease_id_entry_t *
+__get_lease_id_entry (lease_inode_ctx_t *lease_ctx, const char *lease_id)
+{
+ lease_id_entry_t *lease_entry = NULL;
+ lease_id_entry_t *tmp = NULL;
+ lease_id_entry_t *found = NULL;
+
+ GF_VALIDATE_OR_GOTO ("leases", lease_id, out);
+ GF_VALIDATE_OR_GOTO ("leases", lease_ctx, out);
+
+ list_for_each_entry_safe (lease_entry, tmp,
+ &lease_ctx->lease_id_list,
+ lease_id_list) {
+
+ if (__is_same_lease_id (lease_id, lease_entry->lease_id)) {
+ found = lease_entry;
+ gf_msg_debug ("leases", 0, "lease ID entry found "
+ "Client UID:%s, lease id:%s",
+ lease_entry->client_uid,
+ leaseid_utoa (lease_entry->lease_id));
+ break;
+ }
+ }
+out:
+ return found;
+}
+
+
+/* Returns the lease_id_entry for a given lease_id and a given inode,
+ * if none found creates one.
+ * Return values:
+ * lease_id_entry_t* - a pointer to the client entry
+ */
+static lease_id_entry_t *
+__get_or_new_lease_entry (call_frame_t *frame, const char *lease_id,
+ lease_inode_ctx_t *lease_ctx)
+{
+ lease_id_entry_t *lease_entry = NULL;
+
+ GF_VALIDATE_OR_GOTO ("leases", frame, out);
+ GF_VALIDATE_OR_GOTO ("leases", lease_id, out);
+ GF_VALIDATE_OR_GOTO ("leases", lease_ctx, out);
+
+ lease_entry = __get_lease_id_entry (lease_ctx, lease_id);
+ if (!lease_entry) { /* create one */
+ lease_entry = new_lease_id_entry (frame, lease_id);
+ if (!lease_entry)
+ goto out;
+
+ list_add_tail (&lease_entry->lease_id_list,
+ &lease_ctx->lease_id_list);
+
+ gf_msg_debug (frame->this->name, 0, "lease ID entry added,"
+ " Client UID:%s, lease id:%s",
+ lease_entry->client_uid,
+ leaseid_utoa (lease_entry->lease_id));
+ }
+out:
+ return lease_entry;
+}
+
+
+static lease_inode_t *
+new_lease_inode (inode_t *inode)
+{
+ lease_inode_t *l_inode = NULL;
+
+ l_inode = GF_CALLOC (1, sizeof (*l_inode), gf_leases_mt_lease_inode_t);
+ if (!l_inode)
+ goto out;
+
+ INIT_LIST_HEAD (&l_inode->list);
+ l_inode->inode = inode_ref (inode);
+out:
+ return l_inode;
+}
+
+
+static void
+__destroy_lease_inode (lease_inode_t *l_inode)
+{
+ list_del_init (&l_inode->list);
+ inode_unref (l_inode->inode);
+ GF_FREE (l_inode);
+}
+
+
+static lease_client_t *
+new_lease_client (const char *client_uid)
+{
+ lease_client_t *clnt = NULL;
+
+ clnt = GF_CALLOC (1, sizeof (*clnt), gf_leases_mt_lease_client_t);
+ if (!clnt)
+ goto out;
+
+ INIT_LIST_HEAD (&clnt->client_list);
+ INIT_LIST_HEAD (&clnt->inode_list);
+ clnt->client_uid = gf_strdup (client_uid);
+out:
+ return clnt;
+}
+
+
+static void
+__destroy_lease_client (lease_client_t *clnt)
+{
+ list_del_init (&clnt->inode_list);
+ list_del_init (&clnt->client_list);
+ GF_FREE (clnt);
+
+ return;
+}
+
+
+static lease_client_t *
+__get_lease_client (xlator_t *this, leases_private_t *priv,
+ const char *client_uid)
+{
+ lease_client_t *clnt = NULL;
+ lease_client_t *tmp = NULL;
+ lease_client_t *found = NULL;
+
+ list_for_each_entry_safe (clnt, tmp, &priv->client_list, client_list) {
+ if ((strcmp (clnt->client_uid, client_uid) == 0)) {
+ found = clnt;
+ gf_msg_debug (this->name, 0, "Client:%s already found "
+ "in the cleanup list", client_uid);
+ break;
+ }
+ }
+ return found;
+}
+
+
+static lease_client_t *
+__get_or_new_lease_client (xlator_t *this, leases_private_t *priv,
+ const char *client_uid)
+{
+ lease_client_t *found = NULL;
+
+ found = __get_lease_client (this, priv, client_uid);
+ if (!found) {
+ found = new_lease_client (client_uid);
+ if (!found)
+ goto out;
+ list_add_tail (&found->client_list, &priv->client_list);
+ gf_msg_debug (this->name, 0, "Adding a new client:%s entry "
+ "to the cleanup list", client_uid);
+ }
+out:
+ return found;
+}
+
+
+static int
+add_inode_to_client_list (xlator_t *this, inode_t *inode, const char *client_uid)
+{
+ int ret = 0;
+ leases_private_t *priv = NULL;
+ lease_client_t *clnt = NULL;
+ lease_inode_t *lease_inode = NULL;
+
+ priv = this->private;
+ pthread_mutex_lock (&priv->mutex);
+ {
+ clnt = __get_or_new_lease_client (this, priv, client_uid);
+ GF_CHECK_ALLOC (clnt, ret, out);
+
+ lease_inode = new_lease_inode (inode);
+ GF_CHECK_ALLOC (lease_inode, ret, out);
+
+ list_add_tail (&clnt->inode_list, &lease_inode->list);
+ gf_msg_debug (this->name, 0,
+ "Added a new inode:%p to the client(%s) "
+ "cleanup list, gfid(%s)", inode, client_uid,
+ uuid_utoa (inode->gfid));
+ }
+ pthread_mutex_unlock (&priv->mutex);
+out:
+ return ret;
+}
+
+
+/* Add lease entry to the corresponding client entry.
+ * Return values:
+ * 0 Success
+ * -1 Failure
+ */
+static int
+__add_lease (call_frame_t *frame, inode_t *inode, lease_inode_ctx_t *lease_ctx,
+ const char *client_uid, struct gf_lease *lease)
+{
+ lease_id_entry_t *lease_entry = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("leases", frame, out);
+ GF_VALIDATE_OR_GOTO ("leases", client_uid, out);
+ GF_VALIDATE_OR_GOTO ("leases", lease_ctx, out);
+ GF_VALIDATE_OR_GOTO ("leases", inode, out);
+ GF_VALIDATE_OR_GOTO ("leases", lease, out);
+
+ gf_msg_trace (frame->this->name, 0,
+ "Granting lease lock to client %s with lease id %s"
+ " on gfid(%s)", client_uid, leaseid_utoa (lease->lease_id),
+ uuid_utoa (inode->gfid));
+
+ lease_entry = __get_or_new_lease_entry (frame, lease->lease_id, lease_ctx);
+ if (!lease_entry) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ lease_entry->lease_type_cnt[lease->lease_type]++;
+ lease_entry->lease_cnt++;
+ lease_entry->lease_type |= lease->lease_type;
+ /* If this is the first lease taken by the client on the file, then
+ * add this inode/file to the client disconnect cleanup list
+ */
+ if (lease_entry->lease_cnt == 1) {
+ add_inode_to_client_list (frame->this, inode, client_uid);
+ }
+
+ lease_ctx->lease_cnt++;
+ lease_ctx->lease_type_cnt[lease->lease_type]++;
+ lease_ctx->lease_type |= lease->lease_type;
+
+ /* Take a ref for the first lock taken on this inode. Corresponding
+ * unref when all the leases are unlocked or during DISCONNECT
+ * Ref is required because the inode on which lease is acquired should
+ * not be deleted when lru cleanup kicks in*/
+ if (lease_ctx->lease_cnt == 1) {
+ lease_ctx->inode = inode_ref (inode);
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+static gf_boolean_t
+__is_clnt_lease_none (const char *client_uid, lease_inode_ctx_t *lease_ctx)
+{
+ gf_boolean_t lease_none = _gf_true;
+ lease_id_entry_t *lease_entry = NULL;
+ lease_id_entry_t *tmp = NULL;
+
+ list_for_each_entry_safe (lease_entry, tmp,
+ &lease_ctx->lease_id_list,
+ lease_id_list) {
+ if ((strcmp (client_uid, lease_entry->client_uid) == 0)
+ && (lease_entry->lease_cnt != 0)) {
+ lease_none = _gf_false;
+ break;
+ }
+ }
+
+ return lease_none;
+}
+
+static int
+__remove_inode_from_clnt_list (xlator_t *this, lease_client_t *clnt,
+ inode_t *inode)
+{
+ int ret = -1;
+ lease_inode_t *l_inode = NULL;
+ lease_inode_t *tmp1 = NULL;
+
+ list_for_each_entry_safe (l_inode, tmp1,
+ &clnt->inode_list,
+ list) {
+ if (l_inode->inode == inode) {
+ __destroy_lease_inode (l_inode);
+ gf_msg_debug (this->name, 0,
+ "Removed the inode from the client cleanup list");
+ ret = 0;
+ }
+ }
+ /* TODO: Remove the client entry from the cleanup list */
+
+ return ret;
+}
+
+
+static int
+remove_from_clnt_list (xlator_t *this, const char *client_uid, inode_t *inode)
+{
+ leases_private_t *priv = NULL;
+ int ret = -1;
+ lease_client_t *clnt = NULL;
+
+ priv = this->private;
+ if (!priv)
+ goto out;
+
+ pthread_mutex_lock (&priv->mutex);
+ {
+ clnt = __get_lease_client (this, priv, client_uid);
+ if (!clnt) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, LEASE_MSG_CLNT_NOTFOUND,
+ "There is no client entry found in the cleanup list");
+ pthread_mutex_unlock (&priv->mutex);
+ goto out;
+ }
+ ret = __remove_inode_from_clnt_list (this, clnt, inode);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, LEASE_MSG_INODE_NOTFOUND,
+ "There is no inode entry found in the cleanup list");
+ }
+ }
+ pthread_mutex_unlock (&priv->mutex);
+out:
+ return ret;
+}
+
+
+/* Remove lease entry in the corresponding client entry.
+ */
+static int
+__remove_lease (xlator_t *this, inode_t *inode, lease_inode_ctx_t *lease_ctx,
+ const char *client_uid, struct gf_lease *lease)
+{
+ lease_id_entry_t *lease_entry = NULL;
+ int ret = 0;
+ int32_t lease_type = 0;
+ leases_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("leases", lease_ctx, out);
+ GF_VALIDATE_OR_GOTO ("leases", lease, out);
+
+ priv = this->private;
+
+ gf_msg_trace (this->name, 0, "Removing lease entry for client: %s, "
+ "lease type:%d, lease id:%s", client_uid, lease->lease_type,
+ leaseid_utoa (lease->lease_id));
+
+ lease_entry = __get_lease_id_entry (lease_ctx, lease->lease_id);
+ if (!lease_entry) {
+ gf_msg (this->name, GF_LOG_INFO, 0, LEASE_MSG_INVAL_UNLK_LEASE,
+ "Got unlock lease request from client:%s, but has no "
+ "corresponding lock", client_uid);
+ ret = -EINVAL;
+ errno = EINVAL;
+ goto out;
+ }
+
+ lease_type = lease->lease_type;
+ lease_entry->lease_type_cnt[lease_type]--;
+ lease_entry->lease_cnt--;
+
+ lease_ctx->lease_type_cnt[lease_type]--;
+ lease_ctx->lease_cnt--;
+
+ if (lease_entry->lease_type_cnt[lease_type] == 0)
+ lease_entry->lease_type = lease_entry->lease_type & (~lease_type);
+
+ if (lease_ctx->lease_type_cnt[lease_type] == 0)
+ lease_ctx->lease_type = lease_ctx->lease_type & (~lease_type);
+
+ if (lease_entry->lease_cnt == 0) {
+ if (__is_clnt_lease_none (client_uid, lease_ctx)) {
+ gf_msg_debug (this->name, 0, "Client(%s) has no leases"
+ " on gfid (%s), hence removing the inode"
+ " from the client cleanup list",
+ client_uid, uuid_utoa (inode->gfid));
+ remove_from_clnt_list (this, client_uid, lease_ctx->inode);
+ }
+ __destroy_lease_id_entry (lease_entry);
+ }
+
+ if (lease_ctx->lease_cnt == 0 && lease_ctx->timer) {
+ ret = gf_tw_del_timer (priv->timer_wheel, lease_ctx->timer);
+ lease_ctx->recall_in_progress = _gf_false;
+ }
+out:
+ return ret;
+}
+
+
+static gf_boolean_t
+__is_lease_grantable (xlator_t *this, lease_inode_ctx_t *lease_ctx,
+ struct gf_lease *lease, inode_t *inode)
+{
+ uint32_t fd_count = 0;
+ int32_t flags = 0;
+ fd_t *iter_fd = NULL;
+ gf_boolean_t grant = _gf_false;
+ int ret = 0;
+ lease_fd_ctx_t *fd_ctx = NULL;
+ uint64_t ctx = 0;
+
+ GF_VALIDATE_OR_GOTO ("leases", lease_ctx, out);
+ GF_VALIDATE_OR_GOTO ("leases", lease, out);
+ GF_VALIDATE_OR_GOTO ("leases", inode, out);
+
+ if (lease_ctx->recall_in_progress) {
+ gf_msg_debug (this->name, 0, "Recall in progress, hence "
+ "failing the lease request");
+ grant = _gf_false;
+ goto out;
+ }
+
+ LOCK (&inode->lock);
+ {
+ list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
+ ret = fd_ctx_get (iter_fd, this, &ctx);
+ if (ret < 0) {
+ grant = _gf_false;
+ UNLOCK (&inode->lock);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ LEASE_MSG_INVAL_FD_CTX,
+ "Unable to get fd ctx");
+ goto out;
+ }
+ fd_ctx = (lease_fd_ctx_t *)(long) ctx;
+
+ /* Check for open fd conflict, note that open fds from
+ * the same lease id is not checked for conflict, as it is
+ * lease id based lease.
+ */
+ if (!__is_same_lease_id (fd_ctx->lease_id, lease->lease_id)) {
+ fd_count++;
+ flags |= iter_fd->flags;
+ }
+ }
+ }
+ UNLOCK (&inode->lock);
+
+ gf_msg_debug (this->name, 0, "open fd count:%d flags:%d",
+ fd_count, flags);
+
+ __dump_leases_info (this, lease_ctx);
+
+ switch (lease->lease_type) {
+ case GF_RD_LEASE:
+ /* check open fd conflict */
+ if ((fd_count > 0) && ((flags & O_WRONLY) || (flags & O_RDWR))) {
+ grant = _gf_false;
+ break;
+ }
+
+ /* check for conflict with existing leases */
+ if (lease_ctx->lease_type == NONE ||
+ lease_ctx->lease_type == GF_RD_LEASE ||
+ !(__another_lease_found (lease_ctx, lease->lease_id)))
+ grant = _gf_true;
+ else
+ grant = _gf_false;
+ break;
+
+ case GF_RW_LEASE:
+ /* check open fd conflict; conflict if there are any fds open
+ * other than the client on which the lease is requested. */
+ if (fd_count > 0) {
+ grant = _gf_false;
+ break;
+ }
+
+ /* check existing lease conflict */
+ if (lease_ctx->lease_type == NONE ||
+ !(__another_lease_found (lease_ctx, lease->lease_id)))
+ grant = _gf_true;
+ else
+ grant = _gf_false;
+ break;
+
+ default:
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL, LEASE_MSG_INVAL_LEASE_TYPE,
+ "Invalid lease type specified");
+ break;
+ }
+out:
+ return grant;
+}
+
+
+static void
+do_blocked_fops (xlator_t *this, lease_inode_ctx_t *lease_ctx)
+{
+ struct list_head wind_list;
+ fop_stub_t *blk_fop = NULL;
+ fop_stub_t *tmp = NULL;
+
+ INIT_LIST_HEAD (&wind_list);
+
+ pthread_mutex_lock (&lease_ctx->lock);
+ {
+ list_for_each_entry_safe (blk_fop, tmp,
+ &lease_ctx->blocked_list, list) {
+ list_del_init (&blk_fop->list);
+ list_add_tail (&blk_fop->list, &wind_list);
+ }
+ }
+ pthread_mutex_unlock (&lease_ctx->lock);
+
+ gf_msg_trace (this->name, 0, "Executing the blocked stubs on gfid(%s)",
+ uuid_utoa (lease_ctx->inode->gfid));
+
+ list_for_each_entry_safe (blk_fop, tmp, &wind_list, list) {
+ list_del_init (&blk_fop->list);
+ gf_msg_trace (this->name, 0, "Executing fop:%d", blk_fop->stub->fop);
+ call_resume (blk_fop->stub);
+ GF_FREE (blk_fop);
+ }
+
+ pthread_mutex_lock (&lease_ctx->lock);
+ {
+ lease_ctx->lease_type = NONE;
+ inode_unref (lease_ctx->inode);
+ lease_ctx->inode = NULL;
+ }
+ pthread_mutex_unlock (&lease_ctx->lock);
+
+ return;
+}
+
+
+void
+recall_lease_timer_handler (struct gf_tw_timer_list *timer,
+ void *data, unsigned long calltime)
+{
+ inode_t *inode = NULL;
+ lease_inode_t *lease_inode = NULL;
+ leases_private_t *priv = NULL;
+ lease_timer_data_t *timer_data = NULL;
+
+ timer_data = data;
+
+ priv = timer_data->this->private;
+ inode = timer_data->inode;
+ pthread_mutex_lock (&priv->mutex);
+ {
+ lease_inode = new_lease_inode (inode);
+ if (!lease_inode) {
+ errno = ENOMEM;
+ goto out;
+ }
+ list_add_tail (&lease_inode->list, &priv->recall_list);
+ pthread_cond_broadcast (&priv->cond);
+ }
+out:
+ pthread_mutex_unlock (&priv->mutex);
+
+ GF_FREE (timer);
+}
+
+
+static void
+__recall_lease (xlator_t *this, lease_inode_ctx_t *lease_ctx)
+{
+ lease_id_entry_t *lease_entry = NULL;
+ lease_id_entry_t *tmp = NULL;
+ struct gf_upcall up_req = {0,};
+ struct gf_upcall_recall_lease recall_req = {0,};
+ int notify_ret = -1;
+ struct gf_tw_timer_list *timer = NULL;
+ leases_private_t *priv = NULL;
+ lease_timer_data_t *timer_data = NULL;
+
+ if (lease_ctx->recall_in_progress) {
+ gf_msg_debug (this->name, 0, "Lease recall is already in "
+ "progress, hence not sending another recall");
+ goto out;
+ }
+
+ priv = this->private;
+ list_for_each_entry_safe (lease_entry, tmp,
+ &lease_ctx->lease_id_list,
+ lease_id_list) {
+ gf_uuid_copy (up_req.gfid, lease_ctx->inode->gfid);
+ up_req.client_uid = lease_entry->client_uid;
+ up_req.event_type = GF_UPCALL_RECALL_LEASE;
+ up_req.data = &recall_req;
+
+ notify_ret = this->notify (this, GF_EVENT_UPCALL, &up_req);
+ if (notify_ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, LEASE_MSG_RECALL_FAIL,
+ "Recall notification to client: %s failed",
+ lease_entry->client_uid);
+ /* Do not return from here, continue registering the timer,
+ this is required mostly o keep replicas in sync*/
+ } else {
+ gf_msg_debug (this->name, 0, "Recall lease (all)"
+ "notification sent to client %s",
+ lease_entry->client_uid);
+ }
+
+ lease_ctx->recall_in_progress = _gf_true;
+ lease_entry->recall_time = time (NULL);
+ }
+ timer = GF_CALLOC (1, sizeof (*timer),
+ gf_common_mt_tw_timer_list);
+ if (!timer) {
+ goto out;
+ }
+ timer_data = GF_CALLOC (1, sizeof (*timer_data),
+ gf_leases_mt_timer_data_t);
+ if (!timer_data) {
+ GF_FREE (timer);
+ goto out;
+ }
+
+ timer_data->inode = inode_ref (lease_ctx->inode);
+ timer_data->this = this;
+ timer->data = timer_data;
+
+ INIT_LIST_HEAD (&timer->entry);
+ timer->expires = get_recall_lease_timeout (this);
+ timer->function = recall_lease_timer_handler;
+ lease_ctx->timer = timer;
+ gf_tw_add_timer (priv->timer_wheel, timer);
+ gf_msg_trace (this->name, 0, "Registering timer " "%p, after "
+ "sending recall", timer);
+out:
+ return;
+}
+
+
+/* ret = 0; STACK_UNWIND Success
+ * ret = -1; STACK_UNWIND failure
+ */
+int
+process_lease_req (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, struct gf_lease *lease)
+{
+ int ret = 0;
+ char *client_uid = NULL;
+ lease_inode_ctx_t *lease_ctx = NULL;
+
+ GF_VALIDATE_OR_GOTO ("leases", frame, out);
+ GF_VALIDATE_OR_GOTO ("leases", this, out);
+ GF_VALIDATE_OR_GOTO ("leases", inode, out);
+ GF_VALIDATE_OR_GOTO ("leases", lease, out);
+
+ client_uid = frame->root->client->client_uid;
+
+ if (!is_valid_lease_id (lease->lease_id)) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ LEASE_MSG_INVAL_LEASE_ID, "Invalid lease id, from"
+ "client:%s", client_uid);
+ ret = -EINVAL;
+ errno = EINVAL;
+ goto out;
+ }
+
+ lease_ctx = lease_ctx_get (inode, this);
+ if (!lease_ctx) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ LEASE_MSG_NO_MEM, "Unable to create/get inode ctx, "
+ "inode:%p", inode);
+ ret = -ENOMEM;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gf_msg_debug (this->name, 0, "Lease request from client: %s, "
+ "lease type:%d, lease cmd:%d, lease ID:%s, gfid:%s",
+ client_uid, lease->lease_type, lease->cmd,
+ leaseid_utoa (lease->lease_id), uuid_utoa (inode->gfid));
+
+ pthread_mutex_lock (&lease_ctx->lock);
+ {
+ switch (lease->cmd) {
+ case GF_GET_LEASE:
+ lease->lease_type = lease_ctx->lease_type;
+ gf_msg_debug (this->name, 0, "Get lease, existing lease"
+ "type: %d", lease_ctx->lease_type);
+ /*TODO:Should it consider lease id or client_uid?*/
+ break;
+
+ case GF_SET_LEASE:
+ if (__is_lease_grantable (this, lease_ctx, lease, inode)) {
+ __add_lease (frame, inode, lease_ctx,
+ client_uid, lease);
+ ret = 0;
+ } else {
+ gf_msg_debug (this->name, GF_LOG_DEBUG,
+ "Not granting the conflicting lease"
+ " request from %s on gfid(%s)",
+ client_uid, uuid_utoa (inode->gfid));
+ __recall_lease (this, lease_ctx);
+ ret = -1;
+ }
+ break;
+ case GF_UNLK_LEASE:
+ ret = __remove_lease (this, inode, lease_ctx,
+ client_uid, lease);
+ if ((ret == 0) && (lease_ctx->lease_cnt == 0)) {
+ pthread_mutex_unlock (&lease_ctx->lock);
+ goto unblock;
+ }
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ }
+ pthread_mutex_unlock (&lease_ctx->lock);
+
+ return ret;
+
+unblock:
+ do_blocked_fops (this, lease_ctx);
+out:
+ return ret;
+}
+
+
+/* ret = 1 conflict
+ * ret = 0 no conflict
+ */
+gf_boolean_t
+__check_lease_conflict (call_frame_t *frame, lease_inode_ctx_t *lease_ctx,
+ const char *lease_id, gf_boolean_t is_write)
+{
+ gf_lease_types_t lease_type = {0,};
+ gf_boolean_t conflicts = _gf_false;
+ lease_id_entry_t *lease_entry = NULL;
+
+ GF_VALIDATE_OR_GOTO ("leases", frame, out);
+ GF_VALIDATE_OR_GOTO ("leases", lease_ctx, out);
+ GF_VALIDATE_OR_GOTO ("leases", lease_id, out);
+
+ lease_type = lease_ctx->lease_type;
+
+ /* If the fop is rename or unlink conflict the lease even if its
+ * from the same client??
+ */
+ if ((frame->root->op == GF_FOP_RENAME) ||
+ (frame->root->op == GF_FOP_UNLINK)) {
+ conflicts = _gf_true;
+ goto recall;
+ }
+
+ /* TODO: If lease_id is not sent, fall back to client uid conflict check?
+ * Or set conflicts = true if lease_id is 0 when there is an existing
+ * lease */
+ switch (lease_type) {
+ case (GF_RW_LEASE | GF_RD_LEASE):
+ case GF_RW_LEASE:
+ lease_entry = __get_lease_id_entry (lease_ctx, lease_id);
+ if (lease_entry && (lease_entry->lease_type & GF_RW_LEASE))
+ conflicts = _gf_false;
+ else
+ conflicts = _gf_true;
+ break;
+ case GF_RD_LEASE:
+ if (is_write && __another_lease_found(lease_ctx, lease_id))
+ conflicts = _gf_true;
+ else
+ conflicts = _gf_false;
+ break;
+ default:
+ break;
+ }
+
+recall:
+ /* If there is a conflict found and recall is not already sent to all
+ * the clients, then send recall to each of the client holding lease.
+ */
+ if (conflicts)
+ __recall_lease (frame->this, lease_ctx);
+out:
+ return conflicts;
+}
+
+
+/* Return values:
+ * -1 : error, unwind the fop
+ * WIND_FOP: No conflict, wind the fop
+ * BLOCK_FOP: Found a conflicting lease, block the fop
+ */
+int
+check_lease_conflict (call_frame_t *frame, inode_t *inode,
+ const char *lease_id, uint32_t fop_flags)
+{
+ lease_inode_ctx_t *lease_ctx = NULL;
+ gf_boolean_t is_blocking_fop = _gf_false;
+ gf_boolean_t is_write_fop = _gf_false;
+ gf_boolean_t conflicts = _gf_false;
+ int ret = -1;
+
+ lease_ctx = lease_ctx_get (inode, frame->this);
+ if (!lease_ctx) {
+ gf_msg (frame->this->name, GF_LOG_WARNING, ENOMEM,
+ LEASE_MSG_NO_MEM,
+ "Unable to create/get inode ctx");
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ is_blocking_fop = ((fop_flags & BLOCKING_FOP) != 0);
+ is_write_fop = ((fop_flags & DATA_MODIFY_FOP) != 0);
+
+ pthread_mutex_lock (&lease_ctx->lock);
+ {
+ if (lease_ctx->lease_type == NONE) {
+ gf_msg_debug (frame->this->name, 0,
+ "No leases found continuing with the"
+ " fop:%s", gf_fop_list[frame->root->op]);
+ ret = WIND_FOP;
+ goto unlock;
+ }
+ conflicts = __check_lease_conflict (frame, lease_ctx,
+ lease_id, is_write_fop);
+ if (conflicts) {
+ if (is_blocking_fop) {
+ gf_msg_debug (frame->this->name, 0, "Fop: %s "
+ "conflicting existing "
+ "lease: %d, blocking the"
+ "fop", gf_fop_list[frame->root->op],
+ lease_ctx->lease_type);
+ ret = BLOCK_FOP;
+ } else {
+ gf_msg_debug (frame->this->name, 0, "Fop: %s "
+ "conflicting existing "
+ "lease: %d, sending "
+ "EAGAIN",
+ gf_fop_list[frame->root->op],
+ lease_ctx->lease_type);
+ errno = EAGAIN;
+ ret = -1;
+ }
+ }
+ }
+unlock:
+ pthread_mutex_unlock (&lease_ctx->lock);
+out:
+ return ret;
+}
+
+
+static int
+remove_clnt_leases (const char *client_uid, inode_t *inode, xlator_t *this)
+{
+ lease_inode_ctx_t *lease_ctx = NULL;
+ lease_id_entry_t *lease_entry = NULL;
+ lease_id_entry_t *tmp = NULL;
+ int ret = 0;
+ int i = 0;
+
+ lease_ctx = lease_ctx_get (inode, this);
+ if (!lease_ctx) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ LEASE_MSG_INVAL_INODE_CTX,
+ "Unable to create/get inode ctx");
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ pthread_mutex_lock (&lease_ctx->lock);
+ {
+ list_for_each_entry_safe (lease_entry, tmp,
+ &lease_ctx->lease_id_list,
+ lease_id_list) {
+ if (strcmp (client_uid, lease_entry->client_uid) == 0) {
+ for (i = 0; i < GF_LEASE_MAX_TYPE; i++) {
+ lease_ctx->lease_type_cnt[i] -= lease_entry->lease_type_cnt[i];
+ }
+ lease_ctx->lease_cnt -= lease_entry->lease_cnt;
+ __destroy_lease_id_entry (lease_entry);
+ if (lease_ctx->lease_cnt == 0) {
+ pthread_mutex_unlock (&lease_ctx->lock);
+ goto unblock;
+ }
+ }
+ }
+ }
+ pthread_mutex_unlock (&lease_ctx->lock);
+out:
+ return ret;
+
+unblock:
+ do_blocked_fops (this, lease_ctx);
+ return ret;
+}
+
+
+int
+cleanup_client_leases (xlator_t *this, const char *client_uid)
+{
+ lease_client_t *clnt = NULL;
+ lease_client_t *tmp = NULL;
+ struct list_head cleanup_list = {0, };
+ lease_inode_t *l_inode = NULL;
+ lease_inode_t *tmp1 = NULL;
+ leases_private_t *priv = NULL;
+ int ret = 0;
+
+ priv = this->private;
+ if (!priv) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&cleanup_list);
+ pthread_mutex_lock (&priv->mutex);
+ {
+ list_for_each_entry_safe (clnt, tmp, &priv->client_list, client_list) {
+ if ((strcmp (clnt->client_uid, client_uid) == 0)) {
+ list_for_each_entry_safe (l_inode, tmp1,
+ &clnt->inode_list, list) {
+ list_del_init (&l_inode->list);
+ list_add_tail (&l_inode->list, &cleanup_list);
+ }
+ break;
+ }
+ __destroy_lease_client (clnt);
+ }
+ }
+ pthread_mutex_unlock (&priv->mutex);
+
+ l_inode = tmp1 = NULL;
+ list_for_each_entry_safe (l_inode, tmp1, &cleanup_list, list) {
+ remove_clnt_leases (client_uid, l_inode->inode, this);
+ }
+out:
+ return ret;
+}
+
+
+static void
+__remove_all_leases (xlator_t *this, lease_inode_ctx_t *lease_ctx)
+{
+ int i = 0;
+ lease_id_entry_t *lease_entry = NULL;
+ lease_id_entry_t *tmp = NULL;
+
+ __dump_leases_info (this, lease_ctx);
+
+ list_for_each_entry_safe (lease_entry, tmp,
+ &lease_ctx->lease_id_list,
+ lease_id_list) {
+ lease_entry->lease_cnt = 0;
+ remove_from_clnt_list (this, lease_entry->client_uid, lease_ctx->inode);
+ __destroy_lease_id_entry (lease_entry);
+ }
+ INIT_LIST_HEAD (&lease_ctx->lease_id_list);
+ for (i = 0; i <= GF_LEASE_MAX_TYPE; i++)
+ lease_ctx->lease_type_cnt[i] = 0;
+ lease_ctx->lease_type = 0;
+ lease_ctx->lease_cnt = 0;
+ lease_ctx->recall_in_progress = _gf_false;
+ inode_unref (lease_ctx->inode);
+ lease_ctx->timer = NULL;
+
+ /* TODO:
+ * - Mark the corresponding fd bad. Could be done on client side
+ * as a result of recall
+ * - Free the lease_ctx
+ */
+ return;
+}
+
+
+static int
+remove_all_leases (xlator_t *this, inode_t *inode)
+{
+ lease_inode_ctx_t *lease_ctx = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO ("leases", inode, out);
+
+ lease_ctx = lease_ctx_get (inode, this);
+ if (!lease_ctx) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ LEASE_MSG_INVAL_INODE_CTX,
+ "Unable to create/get inode ctx");
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ pthread_mutex_lock (&lease_ctx->lock);
+ {
+ __remove_all_leases (this, lease_ctx);
+ }
+ pthread_mutex_unlock (&lease_ctx->lock);
+
+ do_blocked_fops (this, lease_ctx);
+out:
+ return ret;
+}
+
+
+void *
+expired_recall_cleanup (void *data)
+{
+ struct timespec sleep_till = {0, };
+ struct list_head recall_cleanup_list;
+ lease_inode_t *recall_entry = NULL;
+ lease_inode_t *tmp = NULL;
+ leases_private_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ GF_VALIDATE_OR_GOTO ("leases", data, out);
+
+ this = data;
+ priv = this->private;
+
+ gf_msg_debug (this->name, 0, "Started the expired_recall_cleanup thread");
+
+ while (1) {
+ pthread_mutex_lock (&priv->mutex);
+ {
+ if (priv->fini) {
+ pthread_mutex_unlock (&priv->mutex);
+ goto out;
+ }
+ INIT_LIST_HEAD (&recall_cleanup_list);
+ if (list_empty (&priv->recall_list)) {
+ sleep_till.tv_sec = time (NULL) + 600;
+ pthread_cond_timedwait (&priv->cond, &priv->mutex,
+ &sleep_till);
+ }
+ if (!list_empty (&priv->recall_list)) {
+ gf_msg_debug (this->name, 0, "Found expired recalls");
+ list_for_each_entry_safe (recall_entry, tmp,
+ &priv->recall_list, list) {
+ list_del_init (&recall_entry->list);
+ list_add_tail (&recall_entry->list, &recall_cleanup_list);
+ }
+ }
+ }
+ pthread_mutex_unlock (&priv->mutex);
+
+ recall_entry = tmp = NULL;
+ list_for_each_entry_safe (recall_entry, tmp, &recall_cleanup_list, list) {
+ gf_msg_debug (this->name, 0, "Recall lease was sent on"
+ " inode:%p, recall timer has expired"
+ " and clients haven't unlocked the lease"
+ " hence cleaning up leases on the inode",
+ recall_entry->inode);
+ remove_all_leases (this, recall_entry->inode);
+ list_del_init (&recall_entry->list);
+ }
+ }
+
+out:
+ return NULL;
+}
diff --git a/xlators/features/leases/src/leases-mem-types.h b/xlators/features/leases/src/leases-mem-types.h
new file mode 100644
index 00000000000..d1a59c1db2e
--- /dev/null
+++ b/xlators/features/leases/src/leases-mem-types.h
@@ -0,0 +1,28 @@
+/*
+ Copyright (c) 2015-2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __LEASES_MEM_TYPES_H__
+#define __LEASES_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_leases_mem_types_ {
+ gf_leases_mt_conf_t = gf_common_mt_end + 1,
+ gf_leases_mt_private_t,
+ gf_leases_mt_lease_client_t,
+ gf_leases_mt_lease_inode_t,
+ gf_leases_mt_fd_ctx_t,
+ gf_leases_mt_lease_inode_ctx_t,
+ gf_leases_mt_lease_id_entry_t,
+ gf_leases_mt_fop_stub_t,
+ gf_leases_mt_timer_data_t,
+ gf_leases_mt_end
+};
+#endif
diff --git a/xlators/features/leases/src/leases-messages.h b/xlators/features/leases/src/leases-messages.h
new file mode 100644
index 00000000000..62df4395a59
--- /dev/null
+++ b/xlators/features/leases/src/leases-messages.h
@@ -0,0 +1,129 @@
+/*
+ Copyright (c) 2015-2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
+
+#ifndef _LEASES_MESSAGES_H_
+#define _LEASES_MESSAGES_H_
+
+#include "glfs-message-id.h"
+
+/* NOTE: Rules for message additions
+ * 1) Each instance of a message is _better_ left with a unique message ID, even
+ * if the message format is the same. Reasoning is that, if the message
+ * format needs to change in one instance, the other instances are not
+ * impacted or the new change does not change the ID of the instance being
+ * modified.
+ * 2) Addition of a message,
+ * - Should increment the GLFS_NUM_MESSAGES
+ * - Append to the list of messages defined, towards the end
+ * - Retain macro naming as glfs_msg_X (for redability across developers)
+ * NOTE: Rules for message format modifications
+ * 3) Check across the code if the message ID macro in question is reused
+ * anywhere. If reused then then the modifications should ensure correctness
+ * everywhere, or needs a new message ID as (1) above was not adhered to. If
+ * not used anywhere, proceed with the required modification.
+ * NOTE: Rules for message deletion
+ * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used
+ * anywhere, then can be deleted, but will leave a hole by design, as
+ * addition rules specify modification to the end of the list and not filling
+ * holes.
+ */
+
+#define LEASES_COMP_BASE GLFS_MSGID_COMP_LEASES
+#define GLFS_NUM_MESSAGES 11
+#define GLFS_MSGID_END (LEASES_COMP_BASE + GLFS_NUM_MESSAGES + 1)
+
+#define glfs_msg_start_x LEASES_COMP_BASE, "Invalid: Start of messages"
+/*------------*/
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define LEASE_MSG_NO_MEM (LEASES_COMP_BASE + 1)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define LEASE_MSG_RECALL_FAIL (LEASES_COMP_BASE + 2)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define LEASE_MSG_INVAL_LEASE_ID (LEASES_COMP_BASE + 3)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define LEASE_MSG_INVAL_UNLK_LEASE (LEASES_COMP_BASE + 4)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define LEASE_MSG_INVAL_INODE_CTX (LEASES_COMP_BASE + 5)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define LEASE_MSG_NOT_ENABLED (LEASES_COMP_BASE + 6)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define LEASE_MSG_NO_TIMER_WHEEL (LEASES_COMP_BASE + 7)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define LEASE_MSG_CLNT_NOTFOUND (LEASES_COMP_BASE + 8)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define LEASE_MSG_INODE_NOTFOUND (LEASES_COMP_BASE + 9)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define LEASE_MSG_INVAL_FD_CTX (LEASES_COMP_BASE + 10)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define LEASE_MSG_INVAL_LEASE_TYPE (LEASES_COMP_BASE + 11)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
+
+#endif /* !_LEASES_MESSAGES_H_ */
diff --git a/xlators/features/leases/src/leases.c b/xlators/features/leases/src/leases.c
new file mode 100644
index 00000000000..3e0460000d7
--- /dev/null
+++ b/xlators/features/leases/src/leases.c
@@ -0,0 +1,1168 @@
+/*
+ Copyright (c) 2015-2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "leases.h"
+
+int32_t
+leases_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
+
+ return 0;
+}
+
+
+int32_t
+leases_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ uint32_t fop_flags = 0;
+ int32_t op_errno = 0;
+ int ret = 0;
+ lease_fd_ctx_t *fd_ctx = NULL;
+ char *lease_id = NULL;
+
+ EXIT_IF_LEASES_OFF (this, out);
+
+ fd_ctx = GF_CALLOC (1, sizeof (*fd_ctx), gf_leases_mt_fd_ctx_t);
+
+ fd_ctx->client_uid = gf_strdup (frame->root->client->client_uid);
+ if (!fd_ctx->client_uid) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ GET_FLAGS (frame->root->op, flags);
+ GET_LEASE_ID (xdata, lease_id, frame->root->client->client_uid);
+ if (lease_id != NULL)
+ memcpy (fd_ctx->lease_id, lease_id, LEASE_ID_SIZE);
+ else
+ memset (fd_ctx->lease_id, 0, LEASE_ID_SIZE);
+
+ ret = fd_ctx_set (fd, this, (uint64_t)fd_ctx);
+ if (ret) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ ret = check_lease_conflict (frame, fd->inode, lease_id, fop_flags);
+ if (ret < 0)
+ goto err;
+ else if (ret == BLOCK_FOP)
+ goto block;
+ else if (ret == WIND_FOP)
+ goto out;
+
+block:
+ LEASE_BLOCK_FOP (fd->inode, open, frame, this,
+ loc, flags, fd, xdata);
+ return 0;
+
+out:
+ STACK_WIND (frame, leases_open_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->open,
+ loc, flags, fd, xdata);
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ STACK_UNWIND_STRICT (open, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+int32_t
+leases_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+
+ return 0;
+}
+
+
+int32_t
+leases_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int count, off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ uint32_t fop_flags = 0;
+ int32_t op_errno = -1;
+ char *lease_id = NULL;
+ int ret = 0;
+
+ EXIT_IF_LEASES_OFF (this, out);
+
+ GET_LEASE_ID (xdata, lease_id, frame->root->client->client_uid);
+ GET_FLAGS (frame->root->op, fd->flags);
+
+ ret = check_lease_conflict (frame, fd->inode, lease_id, fop_flags);
+ if (ret < 0)
+ goto err;
+ else if (ret == BLOCK_FOP)
+ goto block;
+ else if (ret == WIND_FOP)
+ goto out;
+
+block:
+ LEASE_BLOCK_FOP (fd->inode, writev, frame, this, fd, vector, count,
+ off, flags, iobref, xdata);
+ return 0;
+
+out:
+ STACK_WIND (frame, leases_writev_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev,
+ fd, vector, count, off, flags, iobref, xdata);
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int32_t
+leases_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ struct iovec *vector, int count, struct iatt *stbuf,
+ struct iobref *iobref, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector,
+ count, stbuf, iobref, xdata);
+
+ return 0;
+}
+
+int32_t
+leases_readv (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset,
+ uint32_t flags, dict_t *xdata)
+{
+ uint32_t fop_flags = 0;
+ int32_t op_errno = -1;
+ char *lease_id = NULL;
+ int ret = 0;
+
+ EXIT_IF_LEASES_OFF (this, out);
+
+ GET_LEASE_ID (xdata, lease_id, frame->root->client->client_uid);
+ GET_FLAGS (frame->root->op, fd->flags);
+
+ ret = check_lease_conflict (frame, fd->inode, lease_id, fop_flags);
+ if (ret < 0)
+ goto err;
+ else if (ret == BLOCK_FOP)
+ goto block;
+ else if (ret == WIND_FOP)
+ goto out;
+
+block:
+ LEASE_BLOCK_FOP (fd->inode, readv, frame, this,
+ fd, size, offset, flags, xdata);
+ return 0;
+
+out:
+ STACK_WIND (frame, leases_readv_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv,
+ fd, size, offset, flags, xdata);
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ STACK_UNWIND_STRICT (readv, frame, -1, op_errno, NULL, 0,
+ NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+leases_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock, xdata);
+
+ return 0;
+}
+
+int32_t
+leases_lk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
+{
+ int32_t op_errno = 0;
+ uint32_t fop_flags = 0;
+ char *lease_id = NULL;
+ int ret = 0;
+
+ EXIT_IF_LEASES_OFF (this, out);
+
+ GET_LEASE_ID (xdata, lease_id, frame->root->client->client_uid);
+ GET_FLAGS_LK (cmd, flock->l_type, fd->flags);
+
+ ret = check_lease_conflict (frame, fd->inode, lease_id, fop_flags);
+ if (ret < 0)
+ goto err;
+ else if (ret == BLOCK_FOP)
+ goto block;
+ else if (ret == WIND_FOP)
+ goto out;
+
+block:
+ LEASE_BLOCK_FOP (fd->inode, lk, frame, this,
+ fd, cmd, flock, xdata);
+ return 0;
+
+out:
+ STACK_WIND (frame, leases_lk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lk,
+ fd, cmd, flock, xdata);
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ STACK_UNWIND_STRICT (lk, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+int32_t
+leases_lease (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, struct gf_lease *lease, dict_t *xdata)
+{
+ int32_t op_errno = 0;
+ int ret = 0;
+ struct gf_lease nullease = {0, };
+ int32_t op_ret = 0;
+
+ EXIT_IF_LEASES_OFF (this, out);
+
+ ret = process_lease_req (frame, this, loc->inode, lease);
+ if (ret < 0) {
+ op_errno = -ret;
+ op_ret = -1;
+ }
+ goto unwind;
+
+out:
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL, LEASE_MSG_NOT_ENABLED,
+ "\"features/leases\" translator is not enabled. "
+ "You need to enable it for proper functioning of your "
+ "application");
+ op_errno = ENOSYS;
+ op_ret = -1;
+
+unwind:
+ STACK_UNWIND_STRICT (lease, frame, op_ret, op_errno,
+ (op_errno == ENOSYS) ? &nullease : lease, xdata);
+ return 0;
+}
+
+int32_t
+leases_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+
+ return 0;
+}
+
+int32_t
+leases_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ uint32_t fop_flags = 0;
+ int32_t op_errno = -1;
+ char *lease_id = NULL;
+ int ret = 0;
+
+ EXIT_IF_LEASES_OFF (this, out);
+
+ GET_LEASE_ID (xdata, lease_id, frame->root->client->client_uid);
+ GET_FLAGS (frame->root->op, 0);
+
+ ret = check_lease_conflict (frame, loc->inode, lease_id, fop_flags);
+ if (ret < 0)
+ goto err;
+ else if (ret == BLOCK_FOP)
+ goto block;
+ else if (ret == WIND_FOP)
+ goto out;
+
+block:
+ LEASE_BLOCK_FOP (loc->inode, truncate, frame, this,
+ loc, offset, xdata);
+ return 0;
+
+out:
+ STACK_WIND (frame, leases_truncate_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->truncate,
+ loc, offset, xdata);
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ STACK_UNWIND_STRICT (truncate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+leases_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno,
+ statpre, statpost, xdata);
+
+ return 0;
+}
+
+int32_t
+leases_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ uint32_t fop_flags = 0;
+ int32_t op_errno = -1;
+ char *lease_id = NULL;
+ int ret = 0;
+
+ EXIT_IF_LEASES_OFF (this, out);
+
+ GET_LEASE_ID (xdata, lease_id, frame->root->client->client_uid);
+ GET_FLAGS (frame->root->op, 0);
+
+ ret = check_lease_conflict (frame, loc->inode, lease_id, fop_flags);
+ if (ret < 0)
+ goto err;
+ else if (ret == BLOCK_FOP)
+ goto block;
+ else if (ret == WIND_FOP)
+ goto out;
+
+block:
+ LEASE_BLOCK_FOP (loc->inode, setattr, frame, this,
+ loc, stbuf, valid, xdata);
+ return 0;
+
+out:
+ STACK_WIND (frame, leases_setattr_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->setattr,
+ loc, stbuf, valid, xdata);
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ STACK_UNWIND_STRICT (setattr, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+leases_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno,
+ stbuf, preoldparent, postoldparent,
+ prenewparent, postnewparent, xdata);
+
+ return 0;
+}
+
+int32_t
+leases_rename (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ uint32_t fop_flags = 0;
+ int32_t op_errno = -1;
+ char *lease_id = NULL;
+ int ret = 0;
+
+ EXIT_IF_LEASES_OFF (this, out);
+
+ /* should the lease be also checked for newloc */
+ GET_LEASE_ID (xdata, lease_id, frame->root->client->client_uid);
+ GET_FLAGS (frame->root->op, 0);
+
+ ret = check_lease_conflict (frame, oldloc->inode, lease_id, fop_flags);
+ if (ret < 0)
+ goto err;
+ else if (ret == BLOCK_FOP)
+ goto block;
+ else if (ret == WIND_FOP)
+ goto out;
+
+block:
+ LEASE_BLOCK_FOP (oldloc->inode, rename, frame, this,
+ oldloc, newloc, xdata);
+ return 0;
+
+out:
+ STACK_WIND (frame, leases_rename_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->rename,
+ oldloc, newloc, xdata);
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ STACK_UNWIND_STRICT (rename, frame, -1, op_errno, NULL,
+ NULL, NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+leases_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
+
+ return 0;
+}
+
+int32_t
+leases_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
+{
+ uint32_t fop_flags = 0;
+ int32_t op_errno = -1;
+ char *lease_id = NULL;
+ int ret = 0;
+
+ EXIT_IF_LEASES_OFF (this, out);
+
+ GET_LEASE_ID (xdata, lease_id, frame->root->client->client_uid);
+ GET_FLAGS (frame->root->op, 0);
+
+ ret = check_lease_conflict (frame, loc->inode, lease_id, fop_flags);
+ if (ret < 0)
+ goto err;
+ else if (ret == BLOCK_FOP)
+ goto block;
+ else if (ret == WIND_FOP)
+ goto out;
+
+block:
+ LEASE_BLOCK_FOP (loc->inode, unlink, frame, this,
+ loc, xflag, xdata);
+ return 0;
+
+out:
+ STACK_WIND (frame, leases_unlink_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink,
+ loc, xflag, xdata);
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ STACK_UNWIND_STRICT (unlink, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+leases_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (link, frame, op_ret, op_errno,
+ inode, stbuf, preparent, postparent, xdata);
+
+ return 0;
+}
+
+int32_t
+leases_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ uint32_t fop_flags = 0;
+ int32_t op_errno = -1;
+ char *lease_id = NULL;
+ int ret = 0;
+
+ EXIT_IF_LEASES_OFF (this, out);
+
+ GET_LEASE_ID (xdata, lease_id, frame->root->client->client_uid);
+ GET_FLAGS (frame->root->op, 0);
+
+ ret = check_lease_conflict (frame, oldloc->inode, lease_id, fop_flags);
+ if (ret < 0)
+ goto err;
+ else if (ret == BLOCK_FOP)
+ goto block;
+ else if (ret == WIND_FOP)
+ goto out;
+
+block:
+ LEASE_BLOCK_FOP (oldloc->inode, link, frame, this,
+ oldloc, newloc, xdata);
+ return 0;
+out:
+ STACK_WIND (frame, leases_link_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->link,
+ oldloc, newloc, xdata);
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ STACK_UNWIND_STRICT (link, frame, -1, op_errno, NULL,
+ NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+leases_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd,
+ inode, stbuf, preparent, postparent, xdata);
+
+ return 0;
+}
+
+int32_t
+leases_create (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ uint32_t fop_flags = 0;
+ int32_t op_errno = -1;
+ char *lease_id = NULL;
+ int ret = 0;
+
+ EXIT_IF_LEASES_OFF (this, out);
+
+ GET_LEASE_ID (xdata, lease_id, frame->root->client->client_uid);
+ GET_FLAGS (frame->root->op, flags);
+
+ ret = check_lease_conflict (frame, fd->inode, lease_id, fop_flags);
+ if (ret < 0)
+ goto err;
+ else if (ret == BLOCK_FOP)
+ goto block;
+ else if (ret == WIND_FOP)
+ goto out;
+
+block:
+ LEASE_BLOCK_FOP (fd->inode, create, frame, this,
+ loc, flags, mode, umask, fd, xdata);
+ return 0;
+
+out:
+ STACK_WIND (frame, leases_create_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->create,
+ loc, flags, mode, umask, fd, xdata);
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ STACK_UNWIND_STRICT (create, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+leases_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+int32_t
+leases_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t flags, dict_t *xdata)
+{
+ uint32_t fop_flags = 0;
+ int32_t op_errno = -1;
+ char *lease_id = NULL;
+ int ret = 0;
+
+ EXIT_IF_LEASES_OFF (this, out);
+
+ GET_LEASE_ID (xdata, lease_id, frame->root->client->client_uid);
+ GET_FLAGS (frame->root->op, fd->flags);
+
+ ret = check_lease_conflict (frame, fd->inode, lease_id, fop_flags);
+ if (ret < 0)
+ goto err;
+ else if (ret == BLOCK_FOP)
+ goto block;
+ else if (ret == WIND_FOP)
+ goto out;
+
+block:
+ LEASE_BLOCK_FOP (fd->inode, fsync, frame, this,
+ fd, flags, xdata);
+ return 0;
+
+out:
+ STACK_WIND (frame, leases_fsync_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync, fd, flags, xdata);
+ return 0;
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ STACK_UNWIND_STRICT (fsync, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+leases_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+ return 0;
+}
+
+int32_t
+leases_ftruncate (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, off_t offset, dict_t *xdata)
+{
+ uint32_t fop_flags = 0;
+ int32_t op_errno = -1;
+ char *lease_id = NULL;
+ int ret = 0;
+
+ EXIT_IF_LEASES_OFF (this, out);
+
+ GET_LEASE_ID (xdata, lease_id, frame->root->client->client_uid);
+ GET_FLAGS (frame->root->op, 0); /* TODO:fd->flags?*/
+
+ ret = check_lease_conflict (frame, fd->inode, lease_id, fop_flags);
+ if (ret < 0)
+ goto err;
+ else if (ret == BLOCK_FOP)
+ goto block;
+ else if (ret == WIND_FOP)
+ goto out;
+
+block:
+ LEASE_BLOCK_FOP (fd->inode, ftruncate, frame, this,
+ fd, offset, xdata);
+ return 0;
+
+out:
+ STACK_WIND (frame, leases_ftruncate_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->ftruncate,
+ fd, offset, xdata);
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ STACK_UNWIND_STRICT (ftruncate, frame, -1, op_errno, NULL,
+ NULL, NULL);
+ return 0;
+}
+
+int32_t
+leases_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno,
+ statpre, statpost, xdata);
+ return 0;
+}
+
+int32_t
+leases_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ uint32_t fop_flags = 0;
+ int32_t op_errno = -1;
+ char *lease_id = NULL;
+ int ret = 0;
+
+ EXIT_IF_LEASES_OFF (this, out);
+
+ GET_LEASE_ID (xdata, lease_id, frame->root->client->client_uid);
+ GET_FLAGS (frame->root->op, fd->flags);
+
+ ret = check_lease_conflict (frame, fd->inode, lease_id, fop_flags);
+ if (ret < 0)
+ goto err;
+ else if (ret == BLOCK_FOP)
+ goto block;
+ else if (ret == WIND_FOP)
+ goto out;
+
+block:
+ LEASE_BLOCK_FOP (fd->inode, fsetattr, frame, this,
+ fd, stbuf, valid, xdata);
+ return 0;
+
+out:
+ STACK_WIND (frame, leases_fsetattr_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsetattr,
+ fd, stbuf, valid, xdata);
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ STACK_UNWIND_STRICT (fsetattr, frame, -1, op_errno, NULL,
+ NULL, NULL);
+ return 0;
+}
+
+int32_t
+leases_fallocate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fallocate, frame, op_ret, op_errno, pre,
+ post, xdata);
+
+ return 0;
+}
+
+int32_t
+leases_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t mode, off_t offset, size_t len, dict_t *xdata)
+{
+ uint32_t fop_flags = 0;
+ int32_t op_errno = -1;
+ char *lease_id = NULL;
+ int ret = 0;
+
+ EXIT_IF_LEASES_OFF (this, out);
+
+ GET_LEASE_ID (xdata, lease_id, frame->root->client->client_uid);
+ GET_FLAGS (frame->root->op, fd->flags);
+
+ ret = check_lease_conflict (frame, fd->inode, lease_id, fop_flags);
+ if (ret < 0)
+ goto err;
+ else if (ret == BLOCK_FOP)
+ goto block;
+ else if (ret == WIND_FOP)
+ goto out;
+
+block:
+ LEASE_BLOCK_FOP (fd->inode, fallocate, frame, this,
+ fd, mode, offset, len, xdata);
+ return 0;
+
+out:
+ STACK_WIND (frame, leases_fallocate_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fallocate,
+ fd, mode, offset, len, xdata);
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ STACK_UNWIND_STRICT (fallocate, frame, -1, op_errno, NULL,
+ NULL, NULL);
+ return 0;
+}
+
+int32_t
+leases_discard_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (discard, frame, op_ret, op_errno, pre,
+ post, xdata);
+
+ return 0;
+}
+
+int32_t
+leases_discard (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ uint32_t fop_flags = 0;
+ int32_t op_errno = -1;
+ char *lease_id = NULL;
+ int ret = 0;
+
+ EXIT_IF_LEASES_OFF (this, out);
+
+ GET_LEASE_ID (xdata, lease_id, frame->root->client->client_uid);
+ GET_FLAGS (frame->root->op, fd->flags);
+
+ ret = check_lease_conflict (frame, fd->inode, lease_id, fop_flags);
+ if (ret < 0)
+ goto err;
+ else if (ret == BLOCK_FOP)
+ goto block;
+ else if (ret == WIND_FOP)
+ goto out;
+
+block:
+ LEASE_BLOCK_FOP (fd->inode, discard, frame, this,
+ fd, offset, len, xdata);
+ return 0;
+
+out:
+ STACK_WIND (frame, leases_discard_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->discard,
+ fd, offset, len, xdata);
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ STACK_UNWIND_STRICT (discard, frame, -1, op_errno, NULL,
+ NULL, NULL);
+ return 0;
+}
+
+int32_t
+leases_zerofill_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (zerofill, frame, op_ret, op_errno, pre,
+ post, xdata);
+
+ return 0;
+}
+
+int
+leases_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, off_t len, dict_t *xdata)
+{
+ uint32_t fop_flags = 0;
+ int32_t op_errno = -1;
+ char *lease_id = NULL;
+ int ret = 0;
+
+ EXIT_IF_LEASES_OFF (this, out);
+
+ GET_LEASE_ID (xdata, lease_id, frame->root->client->client_uid);
+ GET_FLAGS (frame->root->op, fd->flags);
+
+ ret = check_lease_conflict (frame, fd->inode, lease_id, fop_flags);
+ if (ret < 0)
+ goto err;
+ else if (ret == BLOCK_FOP)
+ goto block;
+ else if (ret == WIND_FOP)
+ goto out;
+
+block:
+ LEASE_BLOCK_FOP (fd->inode, zerofill, frame, this,
+ fd, offset, len, xdata);
+ return 0;
+
+out:
+ STACK_WIND (frame, leases_zerofill_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->zerofill,
+ fd, offset, len, xdata);
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ STACK_UNWIND_STRICT (zerofill, frame, -1, op_errno, NULL,
+ NULL, NULL);
+ return 0;
+}
+
+int
+leases_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int
+leases_flush (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, dict_t *xdata)
+{
+ int32_t op_errno = -1;
+ uint32_t fop_flags = 0;
+ char *lease_id = NULL;
+ int ret = 0;
+
+ EXIT_IF_LEASES_OFF (this, out);
+
+ GET_LEASE_ID (xdata, lease_id, frame->root->client->client_uid);
+ GET_FLAGS (frame->root->op, fd->flags);
+
+ ret = check_lease_conflict (frame, fd->inode, lease_id, fop_flags);
+ if (ret < 0)
+ goto err;
+ else if (ret == BLOCK_FOP)
+ goto block;
+ else if (ret == WIND_FOP)
+ goto out;
+
+block:
+ LEASE_BLOCK_FOP (fd->inode, flush, frame, this,
+ fd, xdata);
+ return 0;
+
+out:
+ STACK_WIND (frame, leases_flush_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush, fd, xdata);
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ STACK_UNWIND_STRICT (create, frame, -1, op_errno, NULL,
+ NULL, NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_leases_mt_end + 1);
+
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM, LEASE_MSG_NO_MEM,
+ "mem account init failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+static int
+leases_init_priv (xlator_t *this)
+{
+ int ret = 0;
+ leases_private_t *priv = NULL;
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ if (!priv->timer_wheel) {
+ if (!glusterfs_global_timer_wheel (this)) {
+ gf_msg_debug (this->name, 0, "Initing the global "
+ "timer wheel");
+ ret = glusterfs_global_timer_wheel_init (this->ctx);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ LEASE_MSG_NO_TIMER_WHEEL,
+ "Initing the global timer "
+ "wheel failed");
+ goto out;
+ }
+ }
+ priv->timer_wheel = glusterfs_global_timer_wheel (this);
+ }
+
+ if (!priv->inited_recall_thr) {
+ pthread_create (&priv->recall_thr, NULL,
+ expired_recall_cleanup, this);
+ priv->inited_recall_thr = _gf_true;
+ }
+
+out:
+ return ret;
+}
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ leases_private_t *priv = NULL;
+ int ret = -1;
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ /* TODO: In case of reconfigure, if its enabling the leases
+ * its not an issue, but if its disabling the leases, there
+ * is more to it, like recall all the existing leases, wait
+ * for unlock of all the leases etc., hence not supporting the
+ * reconfigure for now.
+
+ GF_OPTION_RECONF ("leases", priv->leases_enabled,
+ options, bool, out);
+
+ if (priv->leases_enabled) {
+ ret = leases_init_priv (this);
+ if (ret)
+ goto out;
+ }
+ */
+
+ GF_OPTION_RECONF ("lease-lock-recall-timeout",
+ priv->recall_lease_timeout,
+ options, int32, out);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+init (xlator_t *this)
+{
+ int ret = -1;
+ leases_private_t *priv = NULL;
+
+ priv = GF_CALLOC (1, sizeof (*priv),
+ gf_leases_mt_private_t);
+ if (!priv) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM, LEASE_MSG_NO_MEM,
+ "Leases init failed");
+ goto out;
+ }
+
+ GF_OPTION_INIT ("leases", priv->leases_enabled,
+ bool, out);
+ GF_OPTION_INIT ("lease-lock-recall-timeout",
+ priv->recall_lease_timeout, int32, out);
+ pthread_mutex_init (&priv->mutex, NULL);
+ INIT_LIST_HEAD (&priv->client_list);
+ INIT_LIST_HEAD (&priv->recall_list);
+
+ this->private = priv;
+
+ if (priv->leases_enabled) {
+ ret = leases_init_priv (this);
+ if (ret)
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (ret) {
+ GF_FREE (priv);
+ this->private = NULL;
+ }
+
+ return ret;
+}
+
+int
+fini (xlator_t *this)
+{
+ leases_private_t *priv = NULL;
+
+ priv = this->private;
+ if (!priv) {
+ return 0;
+ }
+ this->private = NULL;
+
+ priv->fini = _gf_true;
+ pthread_cond_broadcast (&priv->cond);
+ pthread_join (priv->recall_thr, NULL);
+
+ priv->inited_recall_thr = _gf_false;
+
+ GF_FREE (priv);
+
+ return 0;
+}
+
+static int
+leases_forget (xlator_t *this, inode_t *inode)
+{
+ /* TODO:leases_cleanup_inode_ctx (this, inode); */
+ return 0;
+}
+
+static int
+leases_release (xlator_t *this, fd_t *fd)
+{
+ /* TODO:cleanup fd_ctx */
+ return 0;
+}
+
+static int
+leases_clnt_disconnect_cbk (xlator_t *this, client_t *client)
+{
+ int ret = 0;
+
+ EXIT_IF_LEASES_OFF (this, out);
+
+ ret = cleanup_client_leases (this, client->client_uid);
+out:
+ return ret;
+}
+
+struct xlator_fops fops = {
+ /* Metadata modifying fops */
+ .fsetattr = leases_fsetattr,
+ .setattr = leases_setattr,
+
+ /* File Data reading fops */
+ .open = leases_open,
+ .readv = leases_readv,
+
+ /* File Data modifying fops */
+ .truncate = leases_truncate,
+ .ftruncate = leases_ftruncate,
+ .writev = leases_writev,
+ .zerofill = leases_zerofill,
+ .fallocate = leases_fallocate,
+ .discard = leases_discard,
+ .lk = leases_lk,
+ .fsync = leases_fsync,
+ .flush = leases_flush,
+ .lease = leases_lease,
+
+ /* Directory Data modifying fops */
+ .create = leases_create,
+ .rename = leases_rename,
+ .unlink = leases_unlink,
+ .link = leases_link,
+
+#ifdef NOT_SUPPORTED
+ /* internal lk fops */
+ .inodelk = leases_inodelk,
+ .finodelk = leases_finodelk,
+ .entrylk = leases_entrylk,
+ .fentrylk = leases_fentrylk,
+
+ /* Internal special fops*/
+ .xattrop = leases_xattrop,
+ .fxattrop = leases_fxattrop,
+#endif
+};
+
+struct xlator_cbks cbks = {
+ .forget = leases_forget,
+ .release = leases_release,
+ .client_disconnect = leases_clnt_disconnect_cbk,
+};
+
+struct volume_options options[] = {
+ { .key = {"leases"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "When \"on\", enables leases support"
+ },
+ { .key = {"lease-lock-recall-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = RECALL_LEASE_LK_TIMEOUT,
+ .description = "After 'timeout' seconds since the recall_lease"
+ " request has been sent to the client, the lease lock"
+ " will be forcefully purged by the server."
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/features/leases/src/leases.h b/xlators/features/leases/src/leases.h
new file mode 100644
index 00000000000..df5e8beb85c
--- /dev/null
+++ b/xlators/features/leases/src/leases.h
@@ -0,0 +1,252 @@
+/*
+ Copyright (c) 2015-2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _LEASES_H
+#define _LEASES_H
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "common-utils.h"
+#include "glusterfs.h"
+#include "xlator.h"
+#include "inode.h"
+#include "call-stub.h"
+#include "logging.h"
+#include "client_t.h"
+#include "lkowner.h"
+#include "locking.h"
+#include "upcall-utils.h"
+#include "tw.h"
+#include "timer-wheel.h"
+#include "leases-mem-types.h"
+#include "leases-messages.h"
+
+/* The time period for which a client lease lock will be stored after its been
+ * recalled for the first time. */
+#define RECALL_LEASE_LK_TIMEOUT "60"
+
+#define DATA_MODIFY_FOP 0x0001
+#define BLOCKING_FOP 0x0002
+
+#define BLOCK_FOP 0x0001
+#define WIND_FOP 0x0002
+
+#define EXIT_IF_LEASES_OFF(this, label) do { \
+ if (!is_leases_enabled(this)) \
+ goto label; \
+} while (0)
+
+#define GET_LEASE_ID(xdata, lease_id, client_uid) do { \
+ int ret_val = -1; \
+ ret_val = dict_get_bin (xdata, "lease-id", (void **)&lease_id); \
+ if (ret_val) { \
+ ret_val = 0; \
+ gf_msg_debug ("leases", 0, "Lease id is not set for client:%s", client_uid); \
+ } \
+} while (0)
+
+#define GET_FLAGS(fop, fd_flags) \
+do { \
+ if ((fd_flags & (O_WRONLY | O_RDWR)) && fop == GF_FOP_OPEN) \
+ fop_flags = DATA_MODIFY_FOP; \
+ \
+ if (fop == GF_FOP_UNLINK || fop == GF_FOP_RENAME || \
+ fop == GF_FOP_TRUNCATE || fop == GF_FOP_FTRUNCATE || \
+ fop == GF_FOP_FLUSH || fop == GF_FOP_FSYNC || \
+ fop == GF_FOP_WRITE || fop == GF_FOP_FALLOCATE || \
+ fop == GF_FOP_DISCARD || fop == GF_FOP_ZEROFILL || \
+ fop == GF_FOP_SETATTR || fop == GF_FOP_FSETATTR || \
+ fop == GF_FOP_LINK) \
+ fop_flags = DATA_MODIFY_FOP; \
+ \
+ if (!(fd_flags & (O_NONBLOCK | O_NDELAY))) \
+ fop_flags |= BLOCKING_FOP; \
+ \
+} while (0) \
+
+
+#define GET_FLAGS_LK(cmd, l_type, fd_flags) \
+do { \
+ /* TODO: handle F_RESLK_LCK and other glusterfs_lk_recovery_cmds_t */ \
+ if ((cmd == F_SETLKW || cmd == F_SETLKW64 || \
+ cmd == F_SETLK || cmd == F_SETLK64) && \
+ l_type == F_WRLCK) \
+ fop_flags = DATA_MODIFY_FOP; \
+ \
+ if (fd_flags & (O_NONBLOCK | O_NDELAY) && \
+ (cmd == F_SETLKW || cmd == F_SETLKW64)) \
+ fop_flags |= BLOCKING_FOP; \
+ \
+} while (0) \
+
+#define LEASE_BLOCK_FOP(inode, fop_name, frame, this, params ...) \
+do { \
+ call_stub_t *__stub = NULL; \
+ fop_stub_t *blk_fop = NULL; \
+ lease_inode_ctx_t *lease_ctx = NULL; \
+ int __ret = 0; \
+ \
+ __stub = fop_##fop_name##_stub (frame, default_##fop_name##_resume, \
+ params); \
+ if (!__stub) { \
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM, \
+ LEASE_MSG_NO_MEM, \
+ "Unable to create stub"); \
+ ret = -ENOMEM; \
+ goto __out; \
+ } \
+ \
+ blk_fop = GF_CALLOC (1, sizeof (*blk_fop), \
+ gf_leases_mt_fop_stub_t); \
+ if (!blk_fop) { \
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM, \
+ LEASE_MSG_NO_MEM, \
+ "Unable to create lease fop stub"); \
+ ret = -ENOMEM; \
+ goto __out; \
+ } \
+ \
+ lease_ctx = lease_ctx_get (inode, this); \
+ if (!lease_ctx) { \
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM, \
+ LEASE_MSG_NO_MEM, \
+ "Unable to create/get inode ctx"); \
+ op_errno = ENOMEM; \
+ goto __out; \
+ } \
+ \
+ blk_fop->stub = __stub; \
+ pthread_mutex_lock (&lease_ctx->lock); \
+ { \
+ /*TODO: If the lease is unlocked btw check lease conflict and \
+ * by now, then this fop shouldn't be add to the blocked fop \
+ * list, can use generation number for the same?*/ \
+ list_add_tail (&blk_fop->list, &lease_ctx->blocked_list); \
+ } \
+ pthread_mutex_unlock (&lease_ctx->lock); \
+ \
+__out: \
+ if (ret < 0) { \
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM, LEASE_MSG_NO_MEM, \
+ "Unable to create stub for blocking the fop:%s (%s)", \
+ gf_fop_list[frame->root->op], strerror(ENOMEM)); \
+ if (__stub != NULL) { \
+ call_stub_destroy (__stub); \
+ } \
+ GF_FREE (blk_fop); \
+ goto err; \
+ } \
+} while (0) \
+
+struct _leases_private {
+ gf_boolean_t leases_enabled;
+ int32_t recall_lease_timeout;
+ struct list_head client_list;
+ struct list_head recall_list;
+ struct tvec_base *timer_wheel; /* timer wheel where the recall request
+ is qued and waits for unlock/expiry */
+ gf_boolean_t fini;
+ pthread_t recall_thr;
+ gf_boolean_t inited_recall_thr;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+};
+typedef struct _leases_private leases_private_t;
+
+struct _lease_client {
+ char *client_uid;
+ struct list_head client_list;
+ struct list_head inode_list;
+};
+typedef struct _lease_client lease_client_t;
+
+struct _lease_inode {
+ inode_t *inode;
+ struct list_head list; /* This can be part of both inode_list and recall_list */
+};
+typedef struct _lease_inode lease_inode_t;
+
+struct _lease_fd_ctx {
+ char *client_uid;
+ char lease_id[LEASE_ID_SIZE];
+};
+typedef struct _lease_fd_ctx lease_fd_ctx_t;
+
+struct _lease_inode_ctx {
+ struct list_head lease_id_list; /* clients that have taken leases */
+ int lease_type_cnt[GF_LEASE_MAX_TYPE+1];
+ int lease_type; /* Types of leases acquired */
+ uint64_t lease_cnt; /* Total number of leases on this inode */
+ uint64_t openfd_cnt; /* number of fds open */
+ gf_boolean_t recall_in_progress; /* if lease recall is sent on this inode */
+ struct list_head blocked_list; /* List of fops blocked until the
+ lease recall is complete */
+ inode_t *inode; /* this represents the inode on which the
+ lock was taken, required mainly during
+ disconnect cleanup */
+ struct gf_tw_timer_list *timer;
+ pthread_mutex_t lock;
+};
+typedef struct _lease_inode_ctx lease_inode_ctx_t;
+
+struct _lease_id_entry {
+ struct list_head lease_id_list;
+ char lease_id[LEASE_ID_SIZE];
+ char *client_uid; /* uid of the client that has
+ taken the lease */
+ int lease_type_cnt[GF_LEASE_MAX_TYPE+1]; /* count of each lease type */
+ int lease_type; /* Union of all the leases taken
+ under the given lease id */
+ uint64_t lease_cnt; /* Number of leases taken under the
+ given lease id */
+ time_t recall_time; /* time @ which recall was sent */
+};
+typedef struct _lease_id_entry lease_id_entry_t;
+
+/* Required? as stub itself will have list */
+struct __fop_stub {
+ struct list_head list;
+ call_stub_t *stub;
+};
+typedef struct __fop_stub fop_stub_t;
+
+struct __lease_timer_data {
+ inode_t *inode;
+ xlator_t *this;
+};
+typedef struct __lease_timer_data lease_timer_data_t;
+
+gf_boolean_t
+is_leases_enabled (xlator_t *this);
+
+int32_t
+get_recall_lease_timeout (xlator_t *this);
+
+lease_inode_ctx_t *
+lease_ctx_get (inode_t *inode, xlator_t *this);
+
+int
+process_lease_req (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, struct gf_lease *lease);
+
+int
+check_lease_conflict (call_frame_t *frame, inode_t *inode,
+ const char *lease_id, uint32_t fop_flags);
+
+int
+cleanup_client_leases (xlator_t *this, const char *client_uid);
+
+void *
+expired_recall_cleanup (void *data);
+
+#endif /* _LEASES_H */
diff --git a/xlators/features/locks/src/Makefile.am b/xlators/features/locks/src/Makefile.am
index d10b874befb..a3b3855fb9b 100644
--- a/xlators/features/locks/src/Makefile.am
+++ b/xlators/features/locks/src/Makefile.am
@@ -1,15 +1,18 @@
xlator_LTLIBRARIES = locks.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
-locks_la_LDFLAGS = -module -avoidversion
+locks_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
-locks_la_SOURCES = common.c posix.c entrylk.c inodelk.c
-locks_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+locks_la_SOURCES = common.c posix.c entrylk.c inodelk.c reservelk.c \
+ clear.c
+locks_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = locks.h common.h locks-mem-types.h
+noinst_HEADERS = locks.h common.h locks-mem-types.h clear.h pl-messages.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -fno-strict-aliasing -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src $(GF_CFLAGS) -shared -nostartfiles
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+
+AM_CFLAGS = -Wall -fno-strict-aliasing $(GF_CFLAGS)
CLEANFILES =
@@ -17,4 +20,4 @@ uninstall-local:
rm -f $(DESTDIR)$(xlatordir)/posix-locks.so
install-data-hook:
- ln -sf locks.so $(DESTDIR)$(xlatordir)/posix-locks.so \ No newline at end of file
+ ln -sf locks.so $(DESTDIR)$(xlatordir)/posix-locks.so
diff --git a/xlators/features/locks/src/clear.c b/xlators/features/locks/src/clear.c
new file mode 100644
index 00000000000..d7c210f24a5
--- /dev/null
+++ b/xlators/features/locks/src/clear.c
@@ -0,0 +1,422 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <unistd.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <pthread.h>
+
+#include "glusterfs.h"
+#include "compat.h"
+#include "xlator.h"
+#include "inode.h"
+#include "logging.h"
+#include "common-utils.h"
+
+#include "locks.h"
+#include "common.h"
+#include "statedump.h"
+#include "clear.h"
+
+int
+clrlk_get_kind (char *kind)
+{
+ char *clrlk_kinds[CLRLK_KIND_MAX] = {"dummy", "blocked", "granted",
+ "all"};
+ int ret_kind = CLRLK_KIND_MAX;
+ int i = 0;
+
+ for (i = CLRLK_BLOCKED; i < CLRLK_KIND_MAX; i++) {
+ if (!strcmp (clrlk_kinds[i], kind)) {
+ ret_kind = i;
+ break;
+ }
+ }
+
+ return ret_kind;
+}
+
+int
+clrlk_get_type (char *type)
+{
+ char *clrlk_types[CLRLK_TYPE_MAX] = {"inode", "entry", "posix"};
+ int ret_type = CLRLK_TYPE_MAX;
+ int i = 0;
+
+ for (i = CLRLK_INODE; i < CLRLK_TYPE_MAX; i++) {
+ if (!strcmp (clrlk_types[i], type)) {
+ ret_type = i;
+ break;
+ }
+ }
+
+ return ret_type;
+}
+
+int
+clrlk_get_lock_range (char *range_str, struct gf_flock *ulock,
+ gf_boolean_t *chk_range)
+{
+ int ret = -1;
+
+ if (!chk_range)
+ goto out;
+
+ if (!range_str) {
+ ret = 0;
+ *chk_range = _gf_false;
+ goto out;
+ }
+
+ if (sscanf (range_str, "%hd,%"PRId64"-""%"PRId64, &ulock->l_whence,
+ &ulock->l_start, &ulock->l_len) != 3) {
+ goto out;
+ }
+
+ ret = 0;
+ *chk_range = _gf_true;
+out:
+ return ret;
+}
+
+int
+clrlk_parse_args (const char* cmd, clrlk_args *args)
+{
+ char *opts = NULL;
+ char *cur = NULL;
+ char *tok = NULL;
+ char *sptr = NULL;
+ char *free_ptr = NULL;
+ char kw[KW_MAX] = {[KW_TYPE] = 't',
+ [KW_KIND] = 'k',
+ };
+ int ret = -1;
+ int i = 0;
+
+ GF_ASSERT (cmd);
+ free_ptr = opts = GF_CALLOC (1, strlen (cmd), gf_common_mt_char);
+ if (!opts)
+ goto out;
+
+ if (sscanf (cmd, GF_XATTR_CLRLK_CMD".%s", opts) < 1) {
+ ret = -1;
+ goto out;
+ }
+
+ /*clr_lk_prefix.ttype.kkind.args, args - type specific*/
+ cur = opts;
+ for (i = 0; i < KW_MAX && (tok = strtok_r (cur, ".", &sptr));
+ cur = NULL, i++) {
+ if (tok[0] != kw[i]) {
+ ret = -1;
+ goto out;
+ }
+ if (i == KW_TYPE)
+ args->type = clrlk_get_type (tok+1);
+ if (i == KW_KIND)
+ args->kind = clrlk_get_kind (tok+1);
+ }
+
+ if ((args->type == CLRLK_TYPE_MAX) || (args->kind == CLRLK_KIND_MAX))
+ goto out;
+
+ /*optional args, neither range nor basename can 'legally' contain
+ * "/" in them*/
+ tok = strtok_r (NULL, "/", &sptr);
+ if (tok)
+ args->opts = gf_strdup (tok);
+
+ ret = 0;
+out:
+ GF_FREE (free_ptr);
+ return ret;
+}
+
+int
+clrlk_clear_posixlk (xlator_t *this, pl_inode_t *pl_inode, clrlk_args *args,
+ int *blkd, int *granted, int *op_errno)
+{
+ posix_lock_t *plock = NULL;
+ posix_lock_t *tmp = NULL;
+ struct gf_flock ulock = {0, };
+ int ret = -1;
+ int bcount = 0;
+ int gcount = 0;
+ gf_boolean_t chk_range = _gf_false;
+
+ if (clrlk_get_lock_range (args->opts, &ulock, &chk_range)) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry_safe (plock, tmp, &pl_inode->ext_list,
+ list) {
+ if ((plock->blocked &&
+ !(args->kind & CLRLK_BLOCKED)) ||
+ (!plock->blocked &&
+ !(args->kind & CLRLK_GRANTED)))
+ continue;
+
+ if (chk_range &&
+ (plock->user_flock.l_whence != ulock.l_whence
+ || plock->user_flock.l_start != ulock.l_start
+ || plock->user_flock.l_len != ulock.l_len))
+ continue;
+
+ list_del_init (&plock->list);
+ if (plock->blocked) {
+ bcount++;
+ pl_trace_out (this, plock->frame, NULL, NULL,
+ F_SETLKW, &plock->user_flock,
+ -1, EAGAIN, NULL);
+
+ STACK_UNWIND_STRICT (lk, plock->frame, -1, EAGAIN,
+ &plock->user_flock, NULL);
+
+ } else {
+ gcount++;
+ }
+ GF_FREE (plock);
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+ grant_blocked_locks (this, pl_inode);
+ ret = 0;
+out:
+ *blkd = bcount;
+ *granted = gcount;
+ return ret;
+}
+
+/* Returns 0 on success and -1 on failure */
+int
+clrlk_clear_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom,
+ clrlk_args *args, int *blkd, int *granted, int *op_errno)
+{
+ pl_inode_lock_t *ilock = NULL;
+ pl_inode_lock_t *tmp = NULL;
+ struct gf_flock ulock = {0, };
+ int ret = -1;
+ int bcount = 0;
+ int gcount = 0;
+ gf_boolean_t chk_range = _gf_false;
+ struct list_head released;
+
+ INIT_LIST_HEAD (&released);
+ if (clrlk_get_lock_range (args->opts, &ulock, &chk_range)) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ if (args->kind & CLRLK_BLOCKED)
+ goto blkd;
+
+ if (args->kind & CLRLK_GRANTED)
+ goto granted;
+
+blkd:
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry_safe (ilock, tmp, &dom->blocked_inodelks,
+ blocked_locks) {
+ if (chk_range &&
+ (ilock->user_flock.l_whence != ulock.l_whence
+ || ilock->user_flock.l_start != ulock.l_start
+ || ilock->user_flock.l_len != ulock.l_len))
+ continue;
+
+ bcount++;
+ list_del_init (&ilock->client_list);
+ list_del_init (&ilock->blocked_locks);
+ list_add (&ilock->blocked_locks, &released);
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (ilock, tmp, &released, blocked_locks) {
+ list_del_init (&ilock->blocked_locks);
+ pl_trace_out (this, ilock->frame, NULL, NULL, F_SETLKW,
+ &ilock->user_flock, -1, EAGAIN,
+ ilock->volume);
+ STACK_UNWIND_STRICT (inodelk, ilock->frame, -1,
+ EAGAIN, NULL);
+ //No need to take lock as the locks are only in one list
+ __pl_inodelk_unref (ilock);
+ }
+
+ if (!(args->kind & CLRLK_GRANTED)) {
+ ret = 0;
+ goto out;
+ }
+
+granted:
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry_safe (ilock, tmp, &dom->inodelk_list,
+ list) {
+ if (chk_range &&
+ (ilock->user_flock.l_whence != ulock.l_whence
+ || ilock->user_flock.l_start != ulock.l_start
+ || ilock->user_flock.l_len != ulock.l_len))
+ continue;
+
+ gcount++;
+ list_del_init (&ilock->client_list);
+ list_del_init (&ilock->list);
+ list_add (&ilock->list, &released);
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (ilock, tmp, &released, list) {
+ list_del_init (&ilock->list);
+ //No need to take lock as the locks are only in one list
+ __pl_inodelk_unref (ilock);
+ }
+
+ ret = 0;
+out:
+ grant_blocked_inode_locks (this, pl_inode, dom);
+ *blkd = bcount;
+ *granted = gcount;
+ return ret;
+}
+
+/* Returns 0 on success and -1 on failure */
+int
+clrlk_clear_entrylk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom,
+ clrlk_args *args, int *blkd, int *granted, int *op_errno)
+{
+ pl_entry_lock_t *elock = NULL;
+ pl_entry_lock_t *tmp = NULL;
+ int bcount = 0;
+ int gcount = 0;
+ int ret = -1;
+ struct list_head removed;
+ struct list_head released;
+
+ INIT_LIST_HEAD (&released);
+ if (args->kind & CLRLK_BLOCKED)
+ goto blkd;
+
+ if (args->kind & CLRLK_GRANTED)
+ goto granted;
+
+blkd:
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry_safe (elock, tmp, &dom->blocked_entrylks,
+ blocked_locks) {
+ if (args->opts) {
+ if (!elock->basename ||
+ strcmp (elock->basename, args->opts))
+ continue;
+ }
+
+ bcount++;
+
+ list_del_init (&elock->client_list);
+ list_del_init (&elock->blocked_locks);
+ list_add_tail (&elock->blocked_locks, &released);
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (elock, tmp, &released, blocked_locks) {
+ list_del_init (&elock->blocked_locks);
+ entrylk_trace_out (this, elock->frame, elock->volume, NULL, NULL,
+ elock->basename, ENTRYLK_LOCK, elock->type,
+ -1, EAGAIN);
+ STACK_UNWIND_STRICT (entrylk, elock->frame, -1, EAGAIN, NULL);
+
+ __pl_entrylk_unref (elock);
+ }
+
+ if (!(args->kind & CLRLK_GRANTED)) {
+ ret = 0;
+ goto out;
+ }
+
+granted:
+ INIT_LIST_HEAD (&removed);
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry_safe (elock, tmp, &dom->entrylk_list,
+ domain_list) {
+ if (args->opts) {
+ if (!elock->basename ||
+ strcmp (elock->basename, args->opts))
+ continue;
+ }
+
+ gcount++;
+ list_del_init (&elock->client_list);
+ list_del_init (&elock->domain_list);
+ list_add_tail (&elock->domain_list, &removed);
+
+ __pl_entrylk_unref (elock);
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ grant_blocked_entry_locks (this, pl_inode, dom);
+
+ ret = 0;
+out:
+ *blkd = bcount;
+ *granted = gcount;
+ return ret;
+}
+
+int
+clrlk_clear_lks_in_all_domains (xlator_t *this, pl_inode_t *pl_inode,
+ clrlk_args *args, int *blkd, int *granted,
+ int *op_errno)
+{
+ pl_dom_list_t *dom = NULL;
+ int ret = -1;
+ int tmp_bcount = 0;
+ int tmp_gcount = 0;
+
+ if (list_empty (&pl_inode->dom_list)) {
+ ret = 0;
+ goto out;
+ }
+
+ list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
+ tmp_bcount = tmp_gcount = 0;
+
+ switch (args->type)
+ {
+ case CLRLK_INODE:
+ ret = clrlk_clear_inodelk (this, pl_inode, dom, args,
+ &tmp_bcount, &tmp_gcount,
+ op_errno);
+ if (ret)
+ goto out;
+ break;
+ case CLRLK_ENTRY:
+ ret = clrlk_clear_entrylk (this, pl_inode, dom, args,
+ &tmp_bcount, &tmp_gcount,
+ op_errno);
+ if (ret)
+ goto out;
+ break;
+ }
+
+ *blkd += tmp_bcount;
+ *granted += tmp_gcount;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
diff --git a/xlators/features/locks/src/clear.h b/xlators/features/locks/src/clear.h
new file mode 100644
index 00000000000..78fc5ae3398
--- /dev/null
+++ b/xlators/features/locks/src/clear.h
@@ -0,0 +1,71 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef __CLEAR_H__
+#define __CLEAR_H__
+
+#include "compat-errno.h"
+#include "stack.h"
+#include "call-stub.h"
+#include "locks.h"
+
+typedef enum {
+ CLRLK_INODE,
+ CLRLK_ENTRY,
+ CLRLK_POSIX,
+ CLRLK_TYPE_MAX
+} clrlk_type;
+
+typedef enum {
+ CLRLK_BLOCKED = 1,
+ CLRLK_GRANTED,
+ CLRLK_ALL,
+ CLRLK_KIND_MAX
+} clrlk_kind;
+
+typedef enum {
+ KW_TYPE,
+ KW_KIND,
+ /*add new keywords here*/
+ KW_MAX
+} clrlk_opts;
+
+struct _clrlk_args;
+typedef struct _clrlk_args clrlk_args;
+
+struct _clrlk_args {
+ int type;
+ int kind;
+ char *opts;
+};
+
+int
+clrlk_get__kind (char *kind);
+int
+clrlk_get_type (char *type);
+int
+clrlk_get_lock_range (char *range_str, struct gf_flock *ulock,
+ gf_boolean_t *chk_range);
+int
+clrlk_parse_args (const char* cmd, clrlk_args *args);
+
+int
+clrlk_clear_posixlk (xlator_t *this, pl_inode_t *pl_inode, clrlk_args *args,
+ int *blkd, int *granted, int *op_errno);
+int
+clrlk_clear_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom,
+ clrlk_args *args, int *blkd, int *granted, int *op_errno);
+int
+clrlk_clear_entrylk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom,
+ clrlk_args *args, int *blkd, int *granted, int *op_errno);
+int
+clrlk_clear_lks_in_all_domains (xlator_t *this, pl_inode_t *pl_inode,
+ clrlk_args *args, int *blkd, int *granted,
+ int *op_errno);
+#endif /* __CLEAR_H__ */
diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c
index 8ab9feaaf05..796b538f6f2 100644
--- a/xlators/features/locks/src/common.c
+++ b/xlators/features/locks/src/common.c
@@ -1,32 +1,17 @@
/*
- Copyright (c) 2006, 2007, 2008 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012, 2015-2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <unistd.h>
#include <fcntl.h>
#include <limits.h>
#include <pthread.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "glusterfs.h"
#include "compat.h"
#include "xlator.h"
@@ -42,24 +27,23 @@ static int
__is_lock_grantable (pl_inode_t *pl_inode, posix_lock_t *lock);
static void
__insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock);
+static int
+pl_send_prelock_unlock (xlator_t *this, pl_inode_t *pl_inode,
+ posix_lock_t *old_lock);
static pl_dom_list_t *
-allocate_domain (const char *volume)
+__allocate_domain (const char *volume)
{
pl_dom_list_t *dom = NULL;
dom = GF_CALLOC (1, sizeof (*dom),
gf_locks_mt_pl_dom_list_t);
if (!dom)
- return NULL;
-
+ goto out;
dom->domain = gf_strdup(volume);
- if (!dom->domain) {
- gf_log ("posix-locks", GF_LOG_TRACE,
- "Out of Memory");
- return NULL;
- }
+ if (!dom->domain)
+ goto out;
gf_log ("posix-locks", GF_LOG_TRACE,
"New domain allocated: %s", dom->domain);
@@ -70,6 +54,12 @@ allocate_domain (const char *volume)
INIT_LIST_HEAD (&dom->inodelk_list);
INIT_LIST_HEAD (&dom->blocked_inodelks);
+out:
+ if (dom && (NULL == dom->domain)) {
+ GF_FREE (dom);
+ dom = NULL;
+ }
+
return dom;
}
@@ -81,19 +71,28 @@ get_domain (pl_inode_t *pl_inode, const char *volume)
{
pl_dom_list_t *dom = NULL;
- list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
- if (strcmp (dom->domain, volume) == 0)
- goto found;
+ GF_VALIDATE_OR_GOTO ("posix-locks", pl_inode, out);
+ GF_VALIDATE_OR_GOTO ("posix-locks", volume, out);
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
+ if (strcmp (dom->domain, volume) == 0)
+ goto unlock;
+ }
+ dom = __allocate_domain (volume);
+ if (dom)
+ list_add (&dom->inode_list, &pl_inode->dom_list);
}
-
- dom = allocate_domain(volume);
-
- if (dom)
- list_add (&dom->inode_list, &pl_inode->dom_list);
-found:
-
+unlock:
+ pthread_mutex_unlock (&pl_inode->mutex);
+ if (dom) {
+ gf_log ("posix-locks", GF_LOG_TRACE, "Domain %s found", volume);
+ } else {
+ gf_log ("posix-locks", GF_LOG_TRACE, "Domain %s not found", volume);
+ }
+out:
return dom;
}
@@ -103,33 +102,25 @@ fd_to_fdnum (fd_t *fd)
return ((unsigned long) fd);
}
+fd_t *
+fd_from_fdnum (posix_lock_t *lock)
+{
+ return ((fd_t *) lock->fd_num);
+}
+
int
__pl_inode_is_empty (pl_inode_t *pl_inode)
{
- pl_dom_list_t *dom = NULL;
- int is_empty = 1;
-
- if (!list_empty (&pl_inode->ext_list))
- is_empty = 0;
-
- list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
- if (!list_empty (&dom->entrylk_list))
- is_empty = 0;
-
- if (!list_empty (&dom->inodelk_list))
- is_empty = 0;
- }
-
- return is_empty;
+ return (list_empty (&pl_inode->ext_list));
}
void
pl_print_locker (char *str, int size, xlator_t *this, call_frame_t *frame)
{
- snprintf (str, size, "Pid=%llu, lk-owner=%llu, Transport=%p, Frame=%llu",
+ snprintf (str, size, "Pid=%llu, lk-owner=%s, Client=%p, Frame=%llu",
(unsigned long long) frame->root->pid,
- (unsigned long long) frame->root->lk_owner,
- (void *)frame->root->trans,
+ lkowner_utoa (&frame->root->lk_owner),
+ frame->root->client,
(unsigned long long) frame->root->unique);
}
@@ -159,18 +150,17 @@ pl_print_lockee (char *str, int size, fd_t *fd, loc_t *loc)
ipath = NULL;
}
- snprintf (str, size, "ino=%llu, fd=%p, path=%s",
- (unsigned long long) inode->ino, fd,
+ snprintf (str, size, "gfid=%s, fd=%p, path=%s",
+ uuid_utoa (inode->gfid), fd,
ipath ? ipath : "<nul>");
- if (ipath)
- GF_FREE (ipath);
+ GF_FREE (ipath);
}
void
pl_print_lock (char *str, int size, int cmd,
- struct flock *flock, uint64_t owner)
+ struct gf_flock *flock, gf_lkowner_t *owner)
{
char *cmd_str = NULL;
char *type_str = NULL;
@@ -218,17 +208,17 @@ pl_print_lock (char *str, int size, int cmd,
}
snprintf (str, size, "lock=FCNTL, cmd=%s, type=%s, "
- "start=%llu, len=%llu, pid=%llu, lk-owner=%llu",
+ "start=%llu, len=%llu, pid=%llu, lk-owner=%s",
cmd_str, type_str, (unsigned long long) flock->l_start,
(unsigned long long) flock->l_len,
(unsigned long long) flock->l_pid,
- (unsigned long long) owner);
+ lkowner_utoa (owner));
}
void
pl_trace_in (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
- int cmd, struct flock *flock, const char *domain)
+ int cmd, struct gf_flock *flock, const char *domain)
{
posix_locks_private_t *priv = NULL;
char pl_locker[256];
@@ -245,9 +235,9 @@ pl_trace_in (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
if (domain)
pl_print_inodelk (pl_lock, 256, cmd, flock, domain);
else
- pl_print_lock (pl_lock, 256, cmd, flock, frame->root->lk_owner);
+ pl_print_lock (pl_lock, 256, cmd, flock, &frame->root->lk_owner);
- gf_log (this->name, GF_LOG_NORMAL,
+ gf_log (this->name, GF_LOG_INFO,
"[REQUEST] Locker = {%s} Lockee = {%s} Lock = {%s}",
pl_locker, pl_lockee, pl_lock);
}
@@ -276,7 +266,7 @@ pl_print_verdict (char *str, int size, int op_ret, int op_errno)
void
pl_trace_out (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
- int cmd, struct flock *flock, int op_ret, int op_errno, const char *domain)
+ int cmd, struct gf_flock *flock, int op_ret, int op_errno, const char *domain)
{
posix_locks_private_t *priv = NULL;
@@ -295,11 +285,11 @@ pl_trace_out (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
if (domain)
pl_print_inodelk (pl_lock, 256, cmd, flock, domain);
else
- pl_print_lock (pl_lock, 256, cmd, flock, frame->root->lk_owner);
+ pl_print_lock (pl_lock, 256, cmd, flock, &frame->root->lk_owner);
pl_print_verdict (verdict, 32, op_ret, op_errno);
- gf_log (this->name, GF_LOG_NORMAL,
+ gf_log (this->name, GF_LOG_INFO,
"[%s] Locker = {%s} Lockee = {%s} Lock = {%s}",
verdict, pl_locker, pl_lockee, pl_lock);
}
@@ -307,7 +297,7 @@ pl_trace_out (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
void
pl_trace_block (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
- int cmd, struct flock *flock, const char *domain)
+ int cmd, struct gf_flock *flock, const char *domain)
{
posix_locks_private_t *priv = NULL;
@@ -325,9 +315,9 @@ pl_trace_block (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
if (domain)
pl_print_inodelk (pl_lock, 256, cmd, flock, domain);
else
- pl_print_lock (pl_lock, 256, cmd, flock, frame->root->lk_owner);
+ pl_print_lock (pl_lock, 256, cmd, flock, &frame->root->lk_owner);
- gf_log (this->name, GF_LOG_NORMAL,
+ gf_log (this->name, GF_LOG_INFO,
"[BLOCKED] Locker = {%s} Lockee = {%s} Lock = {%s}",
pl_locker, pl_lockee, pl_lock);
}
@@ -354,7 +344,7 @@ pl_trace_flush (xlator_t *this, call_frame_t *frame, fd_t *fd)
pl_print_locker (pl_locker, 256, this, frame);
pl_print_lockee (pl_lockee, 256, fd, NULL);
- gf_log (this->name, GF_LOG_NORMAL,
+ gf_log (this->name, GF_LOG_INFO,
"[FLUSH] Locker = {%s} Lockee = {%s}",
pl_locker, pl_lockee);
}
@@ -372,7 +362,7 @@ pl_trace_release (xlator_t *this, fd_t *fd)
pl_print_lockee (pl_lockee, 256, fd, NULL);
- gf_log (this->name, GF_LOG_NORMAL,
+ gf_log (this->name, GF_LOG_INFO,
"[RELEASE] Lockee = {%s}", pl_lockee);
}
@@ -383,6 +373,7 @@ pl_update_refkeeper (xlator_t *this, inode_t *inode)
pl_inode_t *pl_inode = NULL;
int is_empty = 0;
int need_unref = 0;
+ int need_ref = 0;
pl_inode = pl_inode_get (this, inode);
@@ -396,13 +387,17 @@ pl_update_refkeeper (xlator_t *this, inode_t *inode)
}
if (!is_empty && !pl_inode->refkeeper) {
- pl_inode->refkeeper = inode_ref (inode);
+ need_ref = 1;
+ pl_inode->refkeeper = inode;
}
}
pthread_mutex_unlock (&pl_inode->mutex);
if (need_unref)
inode_unref (inode);
+
+ if (need_ref)
+ inode_ref (inode);
}
@@ -410,82 +405,104 @@ pl_inode_t *
pl_inode_get (xlator_t *this, inode_t *inode)
{
uint64_t tmp_pl_inode = 0;
- pl_inode_t *pl_inode = NULL;
-// mode_t st_mode = 0;
- int ret = 0;
-
- ret = inode_ctx_get (inode, this,&tmp_pl_inode);
- if (ret == 0) {
- pl_inode = (pl_inode_t *)(long)tmp_pl_inode;
- goto out;
- }
- pl_inode = GF_CALLOC (1, sizeof (*pl_inode),
- gf_locks_mt_pl_inode_t);
- if (!pl_inode) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "Allocating new pl inode");
+ pl_inode_t *pl_inode = NULL;
+ int ret = 0;
-/*
- st_mode = inode->st_mode;
- if ((st_mode & S_ISGID) && !(st_mode & S_IXGRP))
- pl_inode->mandatory = 1;
-*/
-
- pthread_mutex_init (&pl_inode->mutex, NULL);
-
- INIT_LIST_HEAD (&pl_inode->dom_list);
- INIT_LIST_HEAD (&pl_inode->ext_list);
- INIT_LIST_HEAD (&pl_inode->rw_list);
+ LOCK (&inode->lock);
+ {
+ ret = __inode_ctx_get (inode, this, &tmp_pl_inode);
+ if (ret == 0) {
+ pl_inode = (pl_inode_t *)(long)tmp_pl_inode;
+ goto unlock;
+ }
+ pl_inode = GF_CALLOC (1, sizeof (*pl_inode),
+ gf_locks_mt_pl_inode_t);
+ if (!pl_inode) {
+ goto unlock;
+ }
- ret = inode_ctx_put (inode, this, (uint64_t)(long)(pl_inode));
+ gf_log (this->name, GF_LOG_TRACE,
+ "Allocating new pl inode");
+
+ pthread_mutex_init (&pl_inode->mutex, NULL);
+
+ INIT_LIST_HEAD (&pl_inode->dom_list);
+ INIT_LIST_HEAD (&pl_inode->ext_list);
+ INIT_LIST_HEAD (&pl_inode->rw_list);
+ INIT_LIST_HEAD (&pl_inode->reservelk_list);
+ INIT_LIST_HEAD (&pl_inode->blocked_reservelks);
+ INIT_LIST_HEAD (&pl_inode->blocked_calls);
+ INIT_LIST_HEAD (&pl_inode->metalk_list);
+ INIT_LIST_HEAD (&pl_inode->queued_locks);
+ gf_uuid_copy (pl_inode->gfid, inode->gfid);
+
+ ret = __inode_ctx_put (inode, this, (uint64_t)(long)(pl_inode));
+ if (ret) {
+ GF_FREE (pl_inode);
+ pl_inode = NULL;
+ goto unlock;
+ }
+ }
+unlock:
+ UNLOCK (&inode->lock);
-out:
- return pl_inode;
+ return pl_inode;
}
/* Create a new posix_lock_t */
posix_lock_t *
-new_posix_lock (struct flock *flock, void *transport, pid_t client_pid,
- uint64_t owner, fd_t *fd)
+new_posix_lock (struct gf_flock *flock, client_t *client, pid_t client_pid,
+ gf_lkowner_t *owner, fd_t *fd, uint32_t lk_flags, int blocking)
{
- posix_lock_t *lock = NULL;
+ posix_lock_t *lock = NULL;
+
+ GF_VALIDATE_OR_GOTO ("posix-locks", flock, out);
+ GF_VALIDATE_OR_GOTO ("posix-locks", client, out);
+ GF_VALIDATE_OR_GOTO ("posix-locks", fd, out);
- lock = GF_CALLOC (1, sizeof (posix_lock_t),
+ lock = GF_CALLOC (1, sizeof (posix_lock_t),
gf_locks_mt_posix_lock_t);
- if (!lock) {
- return NULL;
- }
+ if (!lock) {
+ goto out;
+ }
+
+ lock->fl_start = flock->l_start;
+ lock->fl_type = flock->l_type;
+
+ if (flock->l_len == 0)
+ lock->fl_end = LLONG_MAX;
+ else
+ lock->fl_end = flock->l_start + flock->l_len - 1;
- lock->fl_start = flock->l_start;
- lock->fl_type = flock->l_type;
+ lock->client = client;
- if (flock->l_len == 0)
- lock->fl_end = LLONG_MAX;
- else
- lock->fl_end = flock->l_start + flock->l_len - 1;
+ lock->client_uid = gf_strdup (client->client_uid);
+ if (lock->client_uid == NULL) {
+ GF_FREE (lock);
+ goto out;
+ }
- lock->transport = transport;
lock->fd_num = fd_to_fdnum (fd);
- lock->client_pid = client_pid;
- lock->owner = owner;
+ lock->fd = fd;
+ lock->client_pid = client_pid;
+ lock->owner = *owner;
+ lock->lk_flags = lk_flags;
+
+ lock->blocking = blocking;
- INIT_LIST_HEAD (&lock->list);
+ INIT_LIST_HEAD (&lock->list);
- return lock;
+out:
+ return lock;
}
/* Delete a lock from the inode's lock list */
void
-__delete_lock (pl_inode_t *pl_inode, posix_lock_t *lock)
+__delete_lock (posix_lock_t *lock)
{
- list_del_init (&lock->list);
+ list_del_init (&lock->list);
}
@@ -493,32 +510,37 @@ __delete_lock (pl_inode_t *pl_inode, posix_lock_t *lock)
void
__destroy_lock (posix_lock_t *lock)
{
- GF_FREE (lock);
+ GF_FREE (lock);
}
-/* Convert a posix_lock to a struct flock */
+/* Convert a posix_lock to a struct gf_flock */
void
-posix_lock_to_flock (posix_lock_t *lock, struct flock *flock)
+posix_lock_to_flock (posix_lock_t *lock, struct gf_flock *flock)
{
- flock->l_pid = lock->client_pid;
- flock->l_type = lock->fl_type;
- flock->l_start = lock->fl_start;
+ flock->l_pid = lock->client_pid;
+ flock->l_type = lock->fl_type;
+ flock->l_start = lock->fl_start;
+ flock->l_owner = lock->owner;
- if (lock->fl_end == LLONG_MAX)
- flock->l_len = 0;
- else
- flock->l_len = lock->fl_end - lock->fl_start + 1;
+ if (lock->fl_end == LLONG_MAX)
+ flock->l_len = 0;
+ else
+ flock->l_len = lock->fl_end - lock->fl_start + 1;
}
-
/* Insert the lock into the inode's lock list */
static void
__insert_lock (pl_inode_t *pl_inode, posix_lock_t *lock)
{
- list_add_tail (&lock->list, &pl_inode->ext_list);
+ if (lock->blocked)
+ gettimeofday (&lock->blkd_time, NULL);
+ else
+ gettimeofday (&lock->granted_time, NULL);
- return;
+ list_add_tail (&lock->list, &pl_inode->ext_list);
+
+ return;
}
@@ -526,14 +548,14 @@ __insert_lock (pl_inode_t *pl_inode, posix_lock_t *lock)
int
locks_overlap (posix_lock_t *l1, posix_lock_t *l2)
{
- /*
- Note:
- FUSE always gives us absolute offsets, so no need to worry
- about SEEK_CUR or SEEK_END
- */
-
- return ((l1->fl_end >= l2->fl_start) &&
- (l2->fl_end >= l1->fl_start));
+ /*
+ Note:
+ FUSE always gives us absolute offsets, so no need to worry
+ about SEEK_CUR or SEEK_END
+ */
+
+ return ((l1->fl_end >= l2->fl_start) &&
+ (l2->fl_end >= l1->fl_start));
}
@@ -542,8 +564,8 @@ int
same_owner (posix_lock_t *l1, posix_lock_t *l2)
{
- return ((l1->owner == l2->owner) &&
- (l1->transport == l2->transport));
+ return (is_same_lkowner (&l1->owner, &l2->owner) &&
+ (l1->client == l2->client));
}
@@ -552,15 +574,15 @@ same_owner (posix_lock_t *l1, posix_lock_t *l2)
void
__delete_unlck_locks (pl_inode_t *pl_inode)
{
- posix_lock_t *l = NULL;
- posix_lock_t *tmp = NULL;
+ posix_lock_t *l = NULL;
+ posix_lock_t *tmp = NULL;
- list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) {
- if (l->fl_type == F_UNLCK) {
- __delete_lock (pl_inode, l);
- __destroy_lock (l);
- }
- }
+ list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) {
+ if (l->fl_type == F_UNLCK) {
+ __delete_lock (l);
+ __destroy_lock (l);
+ }
+ }
}
@@ -568,95 +590,160 @@ __delete_unlck_locks (pl_inode_t *pl_inode)
static posix_lock_t *
add_locks (posix_lock_t *l1, posix_lock_t *l2)
{
- posix_lock_t *sum = NULL;
+ posix_lock_t *sum = NULL;
- sum = GF_CALLOC (1, sizeof (posix_lock_t),
+ sum = GF_CALLOC (1, sizeof (posix_lock_t),
gf_locks_mt_posix_lock_t);
- if (!sum)
- return NULL;
+ if (!sum)
+ return NULL;
- sum->fl_start = min (l1->fl_start, l2->fl_start);
- sum->fl_end = max (l1->fl_end, l2->fl_end);
+ sum->fl_start = min (l1->fl_start, l2->fl_start);
+ sum->fl_end = max (l1->fl_end, l2->fl_end);
- return sum;
+ return sum;
}
/* Subtract two locks */
struct _values {
- posix_lock_t *locks[3];
+ posix_lock_t *locks[3];
};
/* {big} must always be contained inside {small} */
static struct _values
subtract_locks (posix_lock_t *big, posix_lock_t *small)
{
- struct _values v = { .locks = {0, 0, 0} };
-
- if ((big->fl_start == small->fl_start) &&
- (big->fl_end == small->fl_end)) {
- /* both edges coincide with big */
- v.locks[0] = GF_CALLOC (1, sizeof (posix_lock_t),
+
+ struct _values v = { .locks = {0, 0, 0} };
+
+ if ((big->fl_start == small->fl_start) &&
+ (big->fl_end == small->fl_end)) {
+ /* both edges coincide with big */
+ v.locks[0] = GF_CALLOC (1, sizeof (posix_lock_t),
gf_locks_mt_posix_lock_t);
- ERR_ABORT (v.locks[0]);
- memcpy (v.locks[0], big, sizeof (posix_lock_t));
- v.locks[0]->fl_type = small->fl_type;
- }
- else if ((small->fl_start > big->fl_start) &&
- (small->fl_end < big->fl_end)) {
- /* both edges lie inside big */
- v.locks[0] = GF_CALLOC (1, sizeof (posix_lock_t),
+ if (!v.locks[0])
+ goto out;
+ memcpy (v.locks[0], big, sizeof (posix_lock_t));
+ v.locks[0]->fl_type = small->fl_type;
+ goto done;
+ }
+
+ if ((small->fl_start > big->fl_start) &&
+ (small->fl_end < big->fl_end)) {
+ /* both edges lie inside big */
+ v.locks[0] = GF_CALLOC (1, sizeof (posix_lock_t),
gf_locks_mt_posix_lock_t);
- ERR_ABORT (v.locks[0]);
- v.locks[1] = GF_CALLOC (1, sizeof (posix_lock_t),
+ if (!v.locks[0])
+ goto out;
+
+ v.locks[1] = GF_CALLOC (1, sizeof (posix_lock_t),
gf_locks_mt_posix_lock_t);
- ERR_ABORT (v.locks[1]);
- v.locks[2] = GF_CALLOC (1, sizeof (posix_lock_t),
+ if (!v.locks[1])
+ goto out;
+
+ v.locks[2] = GF_CALLOC (1, sizeof (posix_lock_t),
gf_locks_mt_posix_lock_t);
- ERR_ABORT (v.locks[2]);
-
- memcpy (v.locks[0], big, sizeof (posix_lock_t));
- v.locks[0]->fl_end = small->fl_start - 1;
-
- memcpy (v.locks[1], small, sizeof (posix_lock_t));
- memcpy (v.locks[2], big, sizeof (posix_lock_t));
- v.locks[2]->fl_start = small->fl_end + 1;
- }
- /* one edge coincides with big */
- else if (small->fl_start == big->fl_start) {
- v.locks[0] = GF_CALLOC (1, sizeof (posix_lock_t),
+ if (!v.locks[1])
+ goto out;
+
+ memcpy (v.locks[0], big, sizeof (posix_lock_t));
+ v.locks[0]->fl_end = small->fl_start - 1;
+
+ memcpy (v.locks[1], small, sizeof (posix_lock_t));
+
+ memcpy (v.locks[2], big, sizeof (posix_lock_t));
+ v.locks[2]->fl_start = small->fl_end + 1;
+ goto done;
+
+ }
+
+ /* one edge coincides with big */
+ if (small->fl_start == big->fl_start) {
+ v.locks[0] = GF_CALLOC (1, sizeof (posix_lock_t),
gf_locks_mt_posix_lock_t);
- ERR_ABORT (v.locks[0]);
- v.locks[1] = GF_CALLOC (1, sizeof (posix_lock_t),
+ if (!v.locks[0])
+ goto out;
+
+ v.locks[1] = GF_CALLOC (1, sizeof (posix_lock_t),
gf_locks_mt_posix_lock_t);
- ERR_ABORT (v.locks[1]);
-
- memcpy (v.locks[0], big, sizeof (posix_lock_t));
- v.locks[0]->fl_start = small->fl_end + 1;
-
- memcpy (v.locks[1], small, sizeof (posix_lock_t));
- }
- else if (small->fl_end == big->fl_end) {
- v.locks[0] = GF_CALLOC (1, sizeof (posix_lock_t),
+ if (!v.locks[1])
+ goto out;
+
+ memcpy (v.locks[0], big, sizeof (posix_lock_t));
+ v.locks[0]->fl_start = small->fl_end + 1;
+
+ memcpy (v.locks[1], small, sizeof (posix_lock_t));
+ goto done;
+ }
+
+ if (small->fl_end == big->fl_end) {
+ v.locks[0] = GF_CALLOC (1, sizeof (posix_lock_t),
gf_locks_mt_posix_lock_t);
- ERR_ABORT (v.locks[0]);
- v.locks[1] = GF_CALLOC (1, sizeof (posix_lock_t),
+ if (!v.locks[0])
+ goto out;
+
+ v.locks[1] = GF_CALLOC (1, sizeof (posix_lock_t),
gf_locks_mt_posix_lock_t);
- ERR_ABORT (v.locks[1]);
+ if (!v.locks[1])
+ goto out;
+
+ memcpy (v.locks[0], big, sizeof (posix_lock_t));
+ v.locks[0]->fl_end = small->fl_start - 1;
+
+ memcpy (v.locks[1], small, sizeof (posix_lock_t));
+ goto done;
+ }
+
+ GF_ASSERT (0);
+ gf_log ("posix-locks", GF_LOG_ERROR, "Unexpected case in subtract_locks");
- memcpy (v.locks[0], big, sizeof (posix_lock_t));
- v.locks[0]->fl_end = small->fl_start - 1;
-
- memcpy (v.locks[1], small, sizeof (posix_lock_t));
- }
- else {
- gf_log ("posix-locks", GF_LOG_ERROR,
- "Unexpected case in subtract_locks. Please send "
- "a bug report to gluster-devel@nongnu.org");
+out:
+ if (v.locks[0]) {
+ GF_FREE (v.locks[0]);
+ v.locks[0] = NULL;
+ }
+ if (v.locks[1]) {
+ GF_FREE (v.locks[1]);
+ v.locks[1] = NULL;
+ }
+ if (v.locks[2]) {
+ GF_FREE (v.locks[2]);
+ v.locks[2] = NULL;
}
+done:
return v;
}
+static posix_lock_t *
+first_conflicting_overlap (pl_inode_t *pl_inode, posix_lock_t *lock)
+{
+ posix_lock_t *l = NULL;
+ posix_lock_t *conf = NULL;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry (l, &pl_inode->ext_list, list) {
+ if (l->blocked)
+ continue;
+
+ if (locks_overlap (l, lock)) {
+ if (same_owner (l, lock))
+ continue;
+
+ if ((l->fl_type == F_WRLCK) ||
+ (lock->fl_type == F_WRLCK)) {
+ conf = l;
+ goto unlock;
+ }
+ }
+ }
+ }
+unlock:
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ return conf;
+}
+
/*
Start searching from {begin}, and return the first lock that
conflicts, NULL if no conflict
@@ -713,25 +800,35 @@ __insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock)
posix_lock_t *sum = NULL;
int i = 0;
struct _values v = { .locks = {0, 0, 0} };
+ client_t *client = NULL;
list_for_each_entry_safe (conf, t, &pl_inode->ext_list, list) {
+ if (conf->blocked)
+ continue;
if (!locks_overlap (conf, lock))
continue;
if (same_owner (conf, lock)) {
- if (conf->fl_type == lock->fl_type) {
+ if (conf->fl_type == lock->fl_type &&
+ conf->lk_flags == lock->lk_flags) {
sum = add_locks (lock, conf);
sum->fl_type = lock->fl_type;
- sum->transport = lock->transport;
+ sum->client = lock->client;
+ client = sum->client;
+ sum->client_uid =
+ gf_strdup (client->client_uid);
sum->fd_num = lock->fd_num;
sum->client_pid = lock->client_pid;
sum->owner = lock->owner;
+ sum->lk_flags = lock->lk_flags;
- __delete_lock (pl_inode, conf);
+ __delete_lock (conf);
__destroy_lock (conf);
__destroy_lock (lock);
+ INIT_LIST_HEAD (&sum->list);
+ posix_lock_to_flock (sum, &sum->user_flock);
__insert_and_merge (pl_inode, sum);
return;
@@ -739,17 +836,22 @@ __insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock)
sum = add_locks (lock, conf);
sum->fl_type = conf->fl_type;
- sum->transport = conf->transport;
+ sum->client = conf->client;
+ client = sum->client;
+ sum->client_uid =
+ gf_strdup (client->client_uid);
+
sum->fd_num = conf->fd_num;
sum->client_pid = conf->client_pid;
sum->owner = conf->owner;
+ sum->lk_flags = conf->lk_flags;
v = subtract_locks (sum, lock);
- __delete_lock (pl_inode, conf);
+ __delete_lock (conf);
__destroy_lock (conf);
- __delete_lock (pl_inode, lock);
+ __delete_lock (lock);
__destroy_lock (lock);
__destroy_lock (sum);
@@ -759,6 +861,8 @@ __insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock)
continue;
INIT_LIST_HEAD (&v.locks[i]->list);
+ posix_lock_to_flock (v.locks[i],
+ &v.locks[i]->user_flock);
__insert_and_merge (pl_inode,
v.locks[i]);
}
@@ -812,7 +916,7 @@ __grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode, struct list_head *g
list_del_init (&l->list);
if (__is_lock_grantable (pl_inode, l)) {
- conf = GF_CALLOC (1, sizeof (*conf),
+ conf = GF_CALLOC (1, sizeof (*conf),
gf_locks_mt_posix_lock_t);
if (!conf) {
@@ -827,10 +931,9 @@ __grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode, struct list_head *g
posix_lock_to_flock (l, &conf->user_flock);
gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) lk-owner:%"PRIu64" %"PRId64" - %"PRId64" => Granted",
+ "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Granted",
l->fl_type == F_UNLCK ? "Unlock" : "Lock",
- l->client_pid,
- l->owner,
+ l->client_pid, lkowner_utoa (&l->owner),
l->user_flock.l_start,
l->user_flock.l_len);
@@ -866,7 +969,8 @@ grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode)
pl_trace_out (this, lock->frame, NULL, NULL, F_SETLKW,
&lock->user_flock, 0, 0, NULL);
- STACK_UNWIND (lock->frame, 0, 0, &lock->user_flock);
+ STACK_UNWIND_STRICT (lk, lock->frame, 0, 0,
+ &lock->user_flock, NULL);
GF_FREE (lock);
}
@@ -874,6 +978,52 @@ grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode)
return;
}
+static int
+pl_send_prelock_unlock (xlator_t *this, pl_inode_t *pl_inode,
+ posix_lock_t *old_lock)
+{
+ struct gf_flock flock = {0,};
+ posix_lock_t *unlock_lock = NULL;
+
+ struct list_head granted_list;
+ posix_lock_t *tmp = NULL;
+ posix_lock_t *lock = NULL;
+
+ int ret = -1;
+
+ INIT_LIST_HEAD (&granted_list);
+
+ flock.l_type = F_UNLCK;
+ flock.l_whence = old_lock->user_flock.l_whence;
+ flock.l_start = old_lock->user_flock.l_start;
+ flock.l_len = old_lock->user_flock.l_len;
+
+
+ unlock_lock = new_posix_lock (&flock, old_lock->client,
+ old_lock->client_pid, &old_lock->owner,
+ old_lock->fd, old_lock->lk_flags, 0);
+ GF_VALIDATE_OR_GOTO (this->name, unlock_lock, out);
+ ret = 0;
+
+ __insert_and_merge (pl_inode, unlock_lock);
+
+ __grant_blocked_locks (this, pl_inode, &granted_list);
+
+ list_for_each_entry_safe (lock, tmp, &granted_list, list) {
+ list_del_init (&lock->list);
+
+ pl_trace_out (this, lock->frame, NULL, NULL, F_SETLKW,
+ &lock->user_flock, 0, 0, NULL);
+
+ STACK_UNWIND_STRICT (lk, lock->frame, 0, 0,
+ &lock->user_flock, NULL);
+
+ GF_FREE (lock);
+ }
+
+out:
+ return ret;
+}
int
pl_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
@@ -885,21 +1035,50 @@ pl_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
pthread_mutex_lock (&pl_inode->mutex);
{
+ /* Send unlock before the actual lock to
+ prevent lock upgrade / downgrade
+ problems only if:
+ - it is a blocking call
+ - it has other conflicting locks
+ */
+
+ if (can_block &&
+ !(__is_lock_grantable (pl_inode, lock))) {
+ ret = pl_send_prelock_unlock (this, pl_inode,
+ lock);
+ if (ret)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Could not send pre-lock "
+ "unlock");
+ }
+
if (__is_lock_grantable (pl_inode, lock)) {
+ if (pl_metalock_is_active (pl_inode)) {
+ __pl_queue_lock (pl_inode, lock, can_block);
+ pthread_mutex_unlock (&pl_inode->mutex);
+ ret = -2;
+ goto out;
+ }
gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) lk-owner:%"PRIu64" %"PRId64" - %"PRId64" => OK",
+ "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => OK",
lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
lock->client_pid,
- lock->owner,
+ lkowner_utoa (&lock->owner),
lock->user_flock.l_start,
lock->user_flock.l_len);
__insert_and_merge (pl_inode, lock);
} else if (can_block) {
+ if (pl_metalock_is_active (pl_inode)) {
+ __pl_queue_lock (pl_inode, lock, can_block);
+ pthread_mutex_unlock (&pl_inode->mutex);
+ ret = -2;
+ goto out;
+ }
gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) lk-owner:%"PRIu64" %"PRId64" - %"PRId64" => Blocked",
+ "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Blocked",
lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
lock->client_pid,
- lock->owner,
+ lkowner_utoa (&lock->owner),
lock->user_flock.l_start,
lock->user_flock.l_len);
lock->blocked = 1;
@@ -907,10 +1086,10 @@ pl_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
ret = -1;
} else {
gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) lk-owner:%"PRIu64" %"PRId64" - %"PRId64" => NOK",
+ "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => NOK",
lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
lock->client_pid,
- lock->owner,
+ lkowner_utoa (&lock->owner),
lock->user_flock.l_start,
lock->user_flock.l_len);
errno = EAGAIN;
@@ -923,6 +1102,7 @@ pl_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
do_blocked_rw (pl_inode);
+out:
return ret;
}
@@ -932,7 +1112,7 @@ pl_getlk (pl_inode_t *pl_inode, posix_lock_t *lock)
{
posix_lock_t *conf = NULL;
- conf = first_overlap (pl_inode, lock);
+ conf = first_conflicting_overlap (pl_inode, lock);
if (conf == NULL) {
lock->fl_type = F_UNLCK;
@@ -941,3 +1121,16 @@ pl_getlk (pl_inode_t *pl_inode, posix_lock_t *lock)
return conf;
}
+
+gf_boolean_t
+pl_does_monkey_want_stuck_lock()
+{
+ long int monkey_unlock_rand = 0;
+ long int monkey_unlock_rand_rem = 0;
+
+ monkey_unlock_rand = random ();
+ monkey_unlock_rand_rem = monkey_unlock_rand % 100;
+ if (monkey_unlock_rand_rem == 0)
+ return _gf_true;
+ return _gf_false;
+}
diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h
index d707294475f..3729ca24bed 100644
--- a/xlators/features/locks/src/common.h
+++ b/xlators/features/locks/src/common.h
@@ -1,28 +1,42 @@
/*
- Copyright (c) 2006, 2007, 2008 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012, 2015-2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef __COMMON_H__
#define __COMMON_H__
+#include "lkowner.h"
+/*dump locks format strings */
+#define RANGE_FMT "type=%s, whence=%hd, start=%llu, len=%llu"
+#define ENTRY_FMT "type=%s on basename=%s"
+#define DUMP_GEN_FMT "pid = %llu, owner=%s, client=%p"
+#define GRNTD_AT "granted at %s"
+#define BLKD_AT "blocked at %s"
+#define CONN_ID "connection-id=%s"
+#define DUMP_BLKD_FMT DUMP_GEN_FMT", "CONN_ID", "BLKD_AT
+#define DUMP_GRNTD_FMT DUMP_GEN_FMT", "CONN_ID", "GRNTD_AT
+#define DUMP_BLKD_GRNTD_FMT DUMP_GEN_FMT", "CONN_ID", "BLKD_AT", "GRNTD_AT
+
+#define ENTRY_BLKD_FMT ENTRY_FMT", "DUMP_BLKD_FMT
+#define ENTRY_GRNTD_FMT ENTRY_FMT", "DUMP_GRNTD_FMT
+#define ENTRY_BLKD_GRNTD_FMT ENTRY_FMT", "DUMP_BLKD_GRNTD_FMT
+
+#define RANGE_BLKD_FMT RANGE_FMT", "DUMP_BLKD_FMT
+#define RANGE_GRNTD_FMT RANGE_FMT", "DUMP_GRNTD_FMT
+#define RANGE_BLKD_GRNTD_FMT RANGE_FMT", "DUMP_BLKD_GRNTD_FMT
+
+#define SET_FLOCK_PID(flock, lock) ((flock)->l_pid = lock->client_pid)
+
+
posix_lock_t *
-new_posix_lock (struct flock *flock, void *transport, pid_t client_pid,
- uint64_t owner, fd_t *fd);
+new_posix_lock (struct gf_flock *flock, client_t *client, pid_t client_pid,
+ gf_lkowner_t *owner, fd_t *fd, uint32_t lk_flags,
+ int can_block);
pl_inode_t *
pl_inode_get (xlator_t *this, inode_t *inode);
@@ -38,7 +52,7 @@ void
grant_blocked_locks (xlator_t *this, pl_inode_t *inode);
void
-posix_lock_to_flock (posix_lock_t *lock, struct flock *flock);
+posix_lock_to_flock (posix_lock_t *lock, struct gf_flock *flock);
int
locks_overlap (posix_lock_t *l1, posix_lock_t *l2);
@@ -46,7 +60,7 @@ locks_overlap (posix_lock_t *l1, posix_lock_t *l2);
int
same_owner (posix_lock_t *l1, posix_lock_t *l2);
-void __delete_lock (pl_inode_t *, posix_lock_t *);
+void __delete_lock (posix_lock_t *);
void __destroy_lock (posix_lock_t *);
@@ -54,34 +68,39 @@ pl_dom_list_t *
get_domain (pl_inode_t *pl_inode, const char *volume);
void
-grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom);
+grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
+ pl_dom_list_t *dom);
void
__delete_inode_lock (pl_inode_lock_t *lock);
void
-__destroy_inode_lock (pl_inode_lock_t *lock);
+__pl_inodelk_unref (pl_inode_lock_t *lock);
void
grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
- pl_entry_lock_t *unlocked, pl_dom_list_t *dom);
+ pl_dom_list_t *dom);
void pl_update_refkeeper (xlator_t *this, inode_t *inode);
int32_t
-get_inodelk_count (xlator_t *this, inode_t *inode);
+__get_inodelk_count (xlator_t *this, pl_inode_t *pl_inode, char *domname);
+int32_t
+get_inodelk_count (xlator_t *this, inode_t *inode, char *domname);
int32_t
+__get_entrylk_count (xlator_t *this, pl_inode_t *pl_inode);
+int32_t
get_entrylk_count (xlator_t *this, inode_t *inode);
void pl_trace_in (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
- int cmd, struct flock *flock, const char *domain);
+ int cmd, struct gf_flock *flock, const char *domain);
void pl_trace_out (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
- int cmd, struct flock *flock, int op_ret, int op_errno, const char *domain);
+ int cmd, struct gf_flock *flock, int op_ret, int op_errno, const char *domain);
void pl_trace_block (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
- int cmd, struct flock *flock, const char *domain);
+ int cmd, struct gf_flock *flock, const char *domain);
void pl_trace_flush (xlator_t *this, call_frame_t *frame, fd_t *fd);
@@ -108,7 +127,7 @@ void
pl_print_locker (char *str, int size, xlator_t *this, call_frame_t *frame);
void
-pl_print_inodelk (char *str, int size, int cmd, struct flock *flock, const char *domain);
+pl_print_inodelk (char *str, int size, int cmd, struct gf_flock *flock, const char *domain);
void
pl_trace_release (xlator_t *this, fd_t *fd);
@@ -116,4 +135,33 @@ pl_trace_release (xlator_t *this, fd_t *fd);
unsigned long
fd_to_fdnum (fd_t *fd);
+fd_t *
+fd_from_fdnum (posix_lock_t *lock);
+
+int
+pl_reserve_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
+ int can_block);
+int
+reservelks_equal (posix_lock_t *l1, posix_lock_t *l2);
+
+int
+pl_verify_reservelk (xlator_t *this, pl_inode_t *pl_inode,
+ posix_lock_t *lock, int can_block);
+int
+pl_reserve_unlock (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *reqlock);
+
+int32_t
+check_entrylk_on_basename (xlator_t *this, inode_t *parent, char *basename);
+
+void __pl_inodelk_unref (pl_inode_lock_t *lock);
+void __pl_entrylk_unref (pl_entry_lock_t *lock);
+
+int
+pl_metalock_is_active (pl_inode_t *pl_inode);
+
+int
+__pl_queue_lock (pl_inode_t *pl_inode, posix_lock_t *reqlock, int can_block);
+
+gf_boolean_t
+pl_does_monkey_want_stuck_lock();
#endif /* __COMMON_H__ */
diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c
index 603e73da046..4231d760cdc 100644
--- a/xlators/features/locks/src/entrylk.c
+++ b/xlators/features/locks/src/entrylk.c
@@ -1,27 +1,12 @@
/*
- Copyright (c) 2006, 2007, 2008 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include "glusterfs.h"
#include "compat.h"
#include "xlator.h"
@@ -31,33 +16,60 @@
#include "list.h"
#include "locks.h"
+#include "clear.h"
#include "common.h"
+void
+__pl_entrylk_unref (pl_entry_lock_t *lock)
+{
+ lock->ref--;
+ if (!lock->ref) {
+ GF_FREE ((char *)lock->basename);
+ GF_FREE (lock->connection_id);
+ GF_FREE (lock);
+ }
+}
+
+
+static void
+__pl_entrylk_ref (pl_entry_lock_t *lock)
+{
+ lock->ref++;
+}
+
+
static pl_entry_lock_t *
new_entrylk_lock (pl_inode_t *pinode, const char *basename, entrylk_type type,
- void *trans, pid_t client_pid, uint64_t owner, const char *volume)
-
+ const char *domain, call_frame_t *frame, char *conn_id)
{
- pl_entry_lock_t *newlock = NULL;
+ pl_entry_lock_t *newlock = NULL;
- newlock = GF_CALLOC (1, sizeof (pl_entry_lock_t),
+ newlock = GF_CALLOC (1, sizeof (pl_entry_lock_t),
gf_locks_mt_pl_entry_lock_t);
- if (!newlock) {
- goto out;
- }
+ if (!newlock) {
+ goto out;
+ }
- newlock->basename = basename ? gf_strdup (basename) : NULL;
- newlock->type = type;
- newlock->trans = trans;
- newlock->volume = volume;
- newlock->client_pid = client_pid;
- newlock->owner = owner;
+ newlock->basename = basename ? gf_strdup (basename) : NULL;
+ newlock->type = type;
+ newlock->client = frame->root->client;
+ newlock->client_pid = frame->root->pid;
+ newlock->volume = domain;
+ newlock->owner = frame->root->lk_owner;
+ newlock->frame = frame;
+ newlock->this = frame->this;
+
+ if (conn_id) {
+ newlock->connection_id = gf_strdup (conn_id);
+ }
- INIT_LIST_HEAD (&newlock->domain_list);
- INIT_LIST_HEAD (&newlock->blocked_locks);
+ INIT_LIST_HEAD (&newlock->domain_list);
+ INIT_LIST_HEAD (&newlock->blocked_locks);
+ INIT_LIST_HEAD (&newlock->client_list);
+ __pl_entrylk_ref (newlock);
out:
- return newlock;
+ return newlock;
}
@@ -77,55 +89,155 @@ out:
static int
names_conflict (const char *n1, const char *n2)
{
- return all_names (n1) || all_names (n2) || !strcmp (n1, n2);
+ return all_names (n1) || all_names (n2) || !strcmp (n1, n2);
}
static int
__same_entrylk_owner (pl_entry_lock_t *l1, pl_entry_lock_t *l2)
{
+ return (is_same_lkowner (&l1->owner, &l2->owner) &&
+ (l1->client == l2->client));
+}
+
+/* Just as in inodelk, allow conflicting name locks from same (lk_owner, conn)*/
+static int
+__conflicting_entrylks (pl_entry_lock_t *l1, pl_entry_lock_t *l2)
+{
+ if (names_conflict (l1->basename, l2->basename)
+ && !__same_entrylk_owner (l1, l2))
+ return 1;
- return ((l1->owner == l2->owner) &&
- (l1->trans == l2->trans));
+ return 0;
}
+/* See comments in inodelk.c for details */
+static inline gf_boolean_t
+__stale_entrylk (xlator_t *this, pl_entry_lock_t *candidate_lock,
+ pl_entry_lock_t *requested_lock, time_t *lock_age_sec)
+{
+ posix_locks_private_t *priv = NULL;
+ struct timeval curr;
+ gettimeofday (&curr, NULL);
+
+ priv = this->private;
+
+ /* Question: Should we just prune them all given the
+ * chance? Or just the locks we are attempting to acquire?
+ */
+ if (names_conflict (candidate_lock->basename,
+ requested_lock->basename)) {
+ *lock_age_sec = curr.tv_sec -
+ candidate_lock->granted_time.tv_sec;
+ if (*lock_age_sec > priv->revocation_secs)
+ return _gf_true;
+ }
+ return _gf_false;
+}
+
+/* See comments in inodelk.c for details */
+static gf_boolean_t
+__entrylk_prune_stale (xlator_t *this, pl_inode_t *pinode, pl_dom_list_t *dom,
+ pl_entry_lock_t *lock)
+{
+ posix_locks_private_t *priv = NULL;
+ pl_entry_lock_t *tmp = NULL;
+ pl_entry_lock_t *lk = NULL;
+ gf_boolean_t revoke_lock = _gf_false;
+ int bcount = 0;
+ int gcount = 0;
+ int op_errno = 0;
+ clrlk_args args;
+ args.opts = NULL;
+ time_t lk_age_sec = 0;
+ uint32_t max_blocked = 0;
+ char *reason_str = NULL;
+
+ priv = this->private;
+ args.type = CLRLK_ENTRY;
+ if (priv->revocation_clear_all == _gf_true)
+ args.kind = CLRLK_ALL;
+ else
+ args.kind = CLRLK_GRANTED;
+
+
+ if (list_empty (&dom->entrylk_list))
+ goto out;
+
+ pthread_mutex_lock (&pinode->mutex);
+ lock->pinode = pinode;
+ list_for_each_entry_safe (lk, tmp, &dom->entrylk_list, domain_list) {
+ if (__stale_entrylk (this, lk, lock, &lk_age_sec) == _gf_true) {
+ revoke_lock = _gf_true;
+ reason_str = "age";
+ break;
+ }
+ }
+ max_blocked = priv->revocation_max_blocked;
+ if (max_blocked != 0 && revoke_lock == _gf_false) {
+ list_for_each_entry_safe (lk, tmp, &dom->blocked_entrylks,
+ blocked_locks) {
+ max_blocked--;
+ if (max_blocked == 0) {
+ revoke_lock = _gf_true;
+ reason_str = "max blocked";
+ break;
+ }
+ }
+ }
+ pthread_mutex_unlock (&pinode->mutex);
+
+out:
+ if (revoke_lock == _gf_true) {
+ clrlk_clear_entrylk (this, pinode, dom, &args, &bcount, &gcount,
+ &op_errno);
+ gf_log (this->name, GF_LOG_WARNING,
+ "Lock revocation [reason: %s; gfid: %s; domain: %s; "
+ "age: %ld sec] - Entry lock revoked: %d granted & %d "
+ "blocked locks cleared", reason_str,
+ uuid_utoa (pinode->gfid), dom->domain, lk_age_sec,
+ gcount, bcount);
+ }
+
+ return revoke_lock;
+}
/**
- * lock_grantable - is this lock grantable?
+ * entrylk_grantable - is this lock grantable?
* @inode: inode in which to look
* @basename: name we're trying to lock
* @type: type of lock
*/
static pl_entry_lock_t *
-__lock_grantable (pl_dom_list_t *dom, const char *basename, entrylk_type type)
+__entrylk_grantable (pl_dom_list_t *dom, pl_entry_lock_t *lock)
{
- pl_entry_lock_t *lock = NULL;
+ pl_entry_lock_t *tmp = NULL;
- if (list_empty (&dom->entrylk_list))
- return NULL;
+ if (list_empty (&dom->entrylk_list))
+ return NULL;
- list_for_each_entry (lock, &dom->entrylk_list, domain_list) {
- if (names_conflict (lock->basename, basename))
- return lock;
- }
+ list_for_each_entry (tmp, &dom->entrylk_list, domain_list) {
+ if (__conflicting_entrylks (tmp, lock))
+ return tmp;
+ }
- return NULL;
+ return NULL;
}
static pl_entry_lock_t *
-__blocked_lock_conflict (pl_dom_list_t *dom, const char *basename, entrylk_type type)
+__blocked_entrylk_conflict (pl_dom_list_t *dom, pl_entry_lock_t *lock)
{
- pl_entry_lock_t *lock = NULL;
+ pl_entry_lock_t *tmp = NULL;
- if (list_empty (&dom->blocked_entrylks))
- return NULL;
+ if (list_empty (&dom->blocked_entrylks))
+ return NULL;
- list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) {
- if (names_conflict (lock->basename, basename))
- return lock;
- }
+ list_for_each_entry (tmp, &dom->blocked_entrylks, blocked_locks) {
+ if (names_conflict (tmp->basename, lock->basename))
+ return lock;
+ }
- return NULL;
+ return NULL;
}
static int
@@ -133,23 +245,23 @@ __owner_has_lock (pl_dom_list_t *dom, pl_entry_lock_t *newlock)
{
pl_entry_lock_t *lock = NULL;
- list_for_each_entry (lock, &dom->entrylk_list, domain_list) {
- if (__same_entrylk_owner (lock, newlock))
- return 1;
- }
+ list_for_each_entry (lock, &dom->entrylk_list, domain_list) {
+ if (__same_entrylk_owner (lock, newlock))
+ return 1;
+ }
- list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) {
- if (__same_entrylk_owner (lock, newlock))
- return 1;
- }
+ list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) {
+ if (__same_entrylk_owner (lock, newlock))
+ return 1;
+ }
- return 0;
+ return 0;
}
static int
names_equal (const char *n1, const char *n2)
{
- return (n1 == NULL && n2 == NULL) || (n1 && n2 && !strcmp (n1, n2));
+ return (n1 == NULL && n2 == NULL) || (n1 && n2 && !strcmp (n1, n2));
}
void
@@ -213,7 +325,7 @@ entrylk_trace_in (xlator_t *this, call_frame_t *frame, const char *domain,
pl_print_lockee (pl_lockee, 256, fd, loc);
pl_print_entrylk (pl_entrylk, 256, cmd, type, basename, domain);
- gf_log (this->name, GF_LOG_NORMAL,
+ gf_log (this->name, GF_LOG_INFO,
"[REQUEST] Locker = {%s} Lockee = {%s} Lock = {%s}",
pl_locker, pl_lockee, pl_entrylk);
}
@@ -240,7 +352,7 @@ entrylk_trace_out (xlator_t *this, call_frame_t *frame, const char *domain,
pl_print_entrylk (pl_entrylk, 256, cmd, type, basename, domain);
pl_print_verdict (verdict, 32, op_ret, op_errno);
- gf_log (this->name, GF_LOG_NORMAL,
+ gf_log (this->name, GF_LOG_INFO,
"[%s] Locker = {%s} Lockee = {%s} Lock = {%s}",
verdict, pl_locker, pl_lockee, pl_entrylk);
}
@@ -266,7 +378,7 @@ entrylk_trace_block (xlator_t *this, call_frame_t *frame, const char *volume,
pl_print_lockee (pl_lockee, 256, fd, loc);
pl_print_entrylk (pl_entrylk, 256, cmd, type, basename, volume);
- gf_log (this->name, GF_LOG_NORMAL,
+ gf_log (this->name, GF_LOG_INFO,
"[BLOCKED] Locker = {%s} Lockee = {%s} Lock = {%s}",
pl_locker, pl_lockee, pl_entrylk);
}
@@ -284,25 +396,39 @@ entrylk_trace_block (xlator_t *this, call_frame_t *frame, const char *volume,
static pl_entry_lock_t *
__find_most_matching_lock (pl_dom_list_t *dom, const char *basename)
{
- pl_entry_lock_t *lock;
- pl_entry_lock_t *all = NULL;
- pl_entry_lock_t *exact = NULL;
-
- if (list_empty (&dom->entrylk_list))
- return NULL;
-
- list_for_each_entry (lock, &dom->entrylk_list, domain_list) {
- if (all_names (lock->basename))
- all = lock;
- else if (names_equal (lock->basename, basename))
- exact = lock;
- }
+ pl_entry_lock_t *lock;
+ pl_entry_lock_t *all = NULL;
+ pl_entry_lock_t *exact = NULL;
+
+ if (list_empty (&dom->entrylk_list))
+ return NULL;
+
+ list_for_each_entry (lock, &dom->entrylk_list, domain_list) {
+ if (all_names (lock->basename))
+ all = lock;
+ else if (names_equal (lock->basename, basename))
+ exact = lock;
+ }
- return (exact ? exact : all);
+ return (exact ? exact : all);
+}
+
+static pl_entry_lock_t*
+__find_matching_lock (pl_dom_list_t *dom, pl_entry_lock_t *lock)
+{
+ pl_entry_lock_t *tmp = NULL;
+
+ list_for_each_entry (tmp, &dom->entrylk_list, domain_list) {
+ if (names_equal (lock->basename, tmp->basename)
+ && __same_entrylk_owner (lock, tmp)
+ && (lock->type == tmp->type))
+ return tmp;
+ }
+ return NULL;
}
/**
- * __lock_name - lock a name in a directory
+ * __lock_entrylk - lock a name in a directory
* @inode: inode for the directory in which to lock
* @basename: name of the entry to lock
* if null, lock the entire directory
@@ -313,397 +439,379 @@ __find_most_matching_lock (pl_dom_list_t *dom, const char *basename)
*/
int
-__lock_name (pl_inode_t *pinode, const char *basename, entrylk_type type,
- call_frame_t *frame, pl_dom_list_t *dom, xlator_t *this, int nonblock)
+__lock_entrylk (xlator_t *this, pl_inode_t *pinode, pl_entry_lock_t *lock,
+ int nonblock, pl_dom_list_t *dom)
{
- pl_entry_lock_t *lock = NULL;
- pl_entry_lock_t *conf = NULL;
- void *trans = NULL;
- pid_t client_pid = 0;
- uint64_t owner = 0;
-
- int ret = -EINVAL;
-
- trans = frame->root->trans;
- client_pid = frame->root->pid;
- owner = frame->root->lk_owner;
-
- lock = new_entrylk_lock (pinode, basename, type, trans, client_pid, owner, dom->domain);
- if (!lock) {
- ret = -ENOMEM;
- goto out;
- }
-
- conf = __lock_grantable (dom, basename, type);
- if (conf) {
- ret = -EAGAIN;
- if (nonblock)
- goto out;
-
- lock->frame = frame;
- lock->this = this;
+ pl_entry_lock_t *conf = NULL;
+ int ret = -EAGAIN;
- list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks);
-
- gf_log (this->name, GF_LOG_TRACE,
- "Blocking lock: {pinode=%p, basename=%s}",
- pinode, basename);
-
- goto out;
- }
-
- if ( __blocked_lock_conflict (dom, basename, type) && !(__owner_has_lock (dom, lock))) {
+ conf = __entrylk_grantable (dom, lock);
+ if (conf) {
ret = -EAGAIN;
if (nonblock)
goto out;
- lock->frame = frame;
- lock->this = this;
+ gettimeofday (&lock->blkd_time, NULL);
list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks);
gf_log (this->name, GF_LOG_TRACE,
- "Lock is grantable, but blocking to prevent starvation");
- gf_log (this->name, GF_LOG_TRACE,
- "Blocking lock: {pinode=%p, basename=%s}",
- pinode, basename);
+ "Blocking lock: {pinode=%p, basename=%s}",
+ pinode, lock->basename);
- goto out;
+ goto out;
}
- switch (type) {
- case ENTRYLK_WRLCK:
- list_add (&lock->domain_list, &dom->entrylk_list);
- break;
+ /* To prevent blocked locks starvation, check if there are any blocked
+ * locks thay may conflict with this lock. If there is then don't grant
+ * the lock. BUT grant the lock if the owner already has lock to allow
+ * nested locks.
+ * Example: SHD from Machine1 takes (gfid, basename=257-length-name)
+ * and is granted.
+ * SHD from machine2 takes (gfid, basename=NULL) and is blocked.
+ * When SHD from Machine1 takes (gfid, basename=NULL) it needs to be
+ * granted, without which self-heal can't progress.
+ * TODO: Find why 'owner_has_lock' is checked even for blocked locks.
+ */
+ if (__blocked_entrylk_conflict (dom, lock) && !(__owner_has_lock (dom, lock))) {
+ ret = -EAGAIN;
+ if (nonblock)
+ goto out;
- default:
+ gettimeofday (&lock->blkd_time, NULL);
+ list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks);
gf_log (this->name, GF_LOG_DEBUG,
- "Invalid type for entrylk specified: %d", type);
- ret = -EINVAL;
+ "Lock is grantable, but blocking to prevent starvation");
+ gf_log (this->name, GF_LOG_TRACE,
+ "Blocking lock: {pinode=%p, basename=%s}",
+ pinode, lock->basename);
+
goto out;
- }
+ }
+
+ __pl_entrylk_ref (lock);
+ gettimeofday (&lock->granted_time, NULL);
+ list_add (&lock->domain_list, &dom->entrylk_list);
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
/**
- * __unlock_name - unlock a name in a directory
+ * __unlock_entrylk - unlock a name in a directory
* @inode: inode for the directory to unlock in
* @basename: name of the entry to unlock
* if null, unlock the entire directory
*/
pl_entry_lock_t *
-__unlock_name (pl_dom_list_t *dom, const char *basename, entrylk_type type)
+__unlock_entrylk (pl_dom_list_t *dom, pl_entry_lock_t *lock)
{
- pl_entry_lock_t *lock = NULL;
- pl_entry_lock_t *ret_lock = NULL;
+ pl_entry_lock_t *ret_lock = NULL;
- lock = __find_most_matching_lock (dom, basename);
+ ret_lock = __find_matching_lock (dom, lock);
- if (!lock) {
- gf_log ("locks", GF_LOG_DEBUG,
- "unlock on %s (type=ENTRYLK_WRLCK) attempted but no matching lock found",
- basename);
- goto out;
- }
+ if (ret_lock) {
+ list_del_init (&ret_lock->domain_list);
+ } else {
+ gf_log ("locks", GF_LOG_ERROR, "unlock on %s "
+ "(type=ENTRYLK_WRLCK) attempted but no matching lock "
+ "found", lock->basename);
+ }
+
+ return ret_lock;
+}
- if (names_equal (lock->basename, basename)
- && lock->type == type) {
+int32_t
+check_entrylk_on_basename (xlator_t *this, inode_t *parent, char *basename)
+{
+ int32_t entrylk = 0;
+ pl_inode_t *pinode = 0;
+ pl_dom_list_t *dom = NULL;
+ pl_entry_lock_t *conf = NULL;
- if (type == ENTRYLK_WRLCK) {
- list_del (&lock->domain_list);
- ret_lock = lock;
- }
- } else {
- gf_log ("locks", GF_LOG_DEBUG,
- "Unlock for a non-existing lock!");
- goto out;
- }
+ pinode = pl_inode_get (this, parent);
+ if (!pinode)
+ goto out;
+ pthread_mutex_lock (&pinode->mutex);
+ {
+ list_for_each_entry (dom, &pinode->dom_list, inode_list) {
+ conf = __find_most_matching_lock (dom, basename);
+ if (conf && conf->basename) {
+ entrylk = 1;
+ break;
+ }
+ }
+ }
+ pthread_mutex_unlock (&pinode->mutex);
out:
- return ret_lock;
+ return entrylk;
}
-
void
__grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
- pl_dom_list_t *dom, struct list_head *granted)
+ pl_dom_list_t *dom, struct list_head *granted)
{
- int bl_ret = 0;
- pl_entry_lock_t *bl = NULL;
- pl_entry_lock_t *tmp = NULL;
+ int bl_ret = 0;
+ pl_entry_lock_t *bl = NULL;
+ pl_entry_lock_t *tmp = NULL;
struct list_head blocked_list;
INIT_LIST_HEAD (&blocked_list);
list_splice_init (&dom->blocked_entrylks, &blocked_list);
-
- list_for_each_entry_safe (bl, tmp, &blocked_list,
- blocked_locks) {
- list_del_init (&bl->blocked_locks);
+ list_for_each_entry_safe (bl, tmp, &blocked_list, blocked_locks) {
+ list_del_init (&bl->blocked_locks);
- gf_log ("locks", GF_LOG_TRACE,
- "Trying to unblock: {pinode=%p, basename=%s}",
- pl_inode, bl->basename);
+ bl_ret = __lock_entrylk (bl->this, pl_inode, bl, 0, dom);
- bl_ret = __lock_name (pl_inode, bl->basename, bl->type,
- bl->frame, dom, bl->this, 0);
-
- if (bl_ret == 0) {
- list_add (&bl->blocked_locks, granted);
- } else {
- if (bl->basename)
- GF_FREE ((char *)bl->basename);
- GF_FREE (bl);
- }
- }
- return;
+ if (bl_ret == 0) {
+ list_add (&bl->blocked_locks, granted);
+ }
+ }
+ return;
}
/* Grants locks if possible which are blocked on a lock */
void
grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
- pl_entry_lock_t *unlocked, pl_dom_list_t *dom)
+ pl_dom_list_t *dom)
{
- struct list_head granted_list;
- pl_entry_lock_t *tmp = NULL;
- pl_entry_lock_t *lock = NULL;
+ struct list_head granted_list;
+ pl_entry_lock_t *tmp = NULL;
+ pl_entry_lock_t *lock = NULL;
- INIT_LIST_HEAD (&granted_list);
+ INIT_LIST_HEAD (&granted_list);
- pthread_mutex_lock (&pl_inode->mutex);
- {
- __grant_blocked_entry_locks (this, pl_inode, dom, &granted_list);
- }
- pthread_mutex_unlock (&pl_inode->mutex);
-
- list_for_each_entry_safe (lock, tmp, &granted_list, blocked_locks) {
- list_del_init (&lock->blocked_locks);
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ __grant_blocked_entry_locks (this, pl_inode, dom,
+ &granted_list);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+ list_for_each_entry_safe (lock, tmp, &granted_list, blocked_locks) {
entrylk_trace_out (this, lock->frame, NULL, NULL, NULL,
lock->basename, ENTRYLK_LOCK, lock->type,
0, 0);
- STACK_UNWIND_STRICT (entrylk, lock->frame, 0, 0);
-
- GF_FREE ((char *)lock->basename);
- GF_FREE (lock);
+ STACK_UNWIND_STRICT (entrylk, lock->frame, 0, 0, NULL);
+ lock->frame = NULL;
}
- GF_FREE ((char *)unlocked->basename);
- GF_FREE (unlocked);
-
- return;
-}
-
-/**
- * release_entry_locks_for_transport: release all entry locks from this
- * transport for this loc_t
- */
-
-static int
-release_entry_locks_for_transport (xlator_t *this, pl_inode_t *pinode,
- pl_dom_list_t *dom, void *trans)
-{
- pl_entry_lock_t *lock = NULL;
- pl_entry_lock_t *tmp = NULL;
- struct list_head granted;
- struct list_head released;
-
- INIT_LIST_HEAD (&granted);
- INIT_LIST_HEAD (&released);
-
- pthread_mutex_lock (&pinode->mutex);
- {
- list_for_each_entry_safe (lock, tmp, &dom->blocked_entrylks,
- blocked_locks) {
- if (lock->trans != trans)
- continue;
-
- list_del_init (&lock->blocked_locks);
-
- gf_log (this->name, GF_LOG_TRACE,
- "releasing lock on held by "
- "{transport=%p}",trans);
-
- list_add (&lock->blocked_locks, &released);
-
- }
-
- list_for_each_entry_safe (lock, tmp, &dom->entrylk_list,
- domain_list) {
- if (lock->trans != trans)
- continue;
-
- list_del_init (&lock->domain_list);
-
- gf_log (this->name, GF_LOG_TRACE,
- "releasing lock on held by "
- "{transport=%p}",trans);
-
- GF_FREE ((char *)lock->basename);
- GF_FREE (lock);
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry_safe (lock, tmp, &granted_list, blocked_locks) {
+ list_del_init (&lock->blocked_locks);
+ __pl_entrylk_unref (lock);
}
-
- __grant_blocked_entry_locks (this, pinode, dom, &granted);
-
- }
-
- pthread_mutex_unlock (&pinode->mutex);
-
- list_for_each_entry_safe (lock, tmp, &released, blocked_locks) {
- list_del_init (&lock->blocked_locks);
-
- STACK_UNWIND_STRICT (entrylk, lock->frame, -1, EAGAIN);
-
- if (lock->basename)
- GF_FREE ((char *)lock->basename);
- GF_FREE (lock);
-
- }
-
- list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) {
- list_del_init (&lock->blocked_locks);
-
- STACK_UNWIND_STRICT (entrylk, lock->frame, 0, 0);
-
- if (lock->basename)
- GF_FREE ((char *)lock->basename);
- GF_FREE (lock);
}
+ pthread_mutex_unlock (&pl_inode->mutex);
- return 0;
+ return;
}
+
/* Common entrylk code called by pl_entrylk and pl_fentrylk */
int
pl_common_entrylk (call_frame_t *frame, xlator_t *this,
const char *volume, inode_t *inode, const char *basename,
- entrylk_cmd cmd, entrylk_type type, loc_t *loc, fd_t *fd)
-{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- void * transport = NULL;
- pid_t pid = -1;
- uint64_t owner = -1;
-
- pl_inode_t * pinode = NULL;
- int ret = -1;
- pl_entry_lock_t *unlocked = NULL;
- char unwind = 1;
-
- pl_dom_list_t *dom = NULL;
-
- pinode = pl_inode_get (this, inode);
- if (!pinode) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_errno = ENOMEM;
- goto out;
- }
-
- dom = get_domain (pinode, volume);
- if (!dom){
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto out;
- }
+ entrylk_cmd cmd, entrylk_type type, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
- entrylk_trace_in (this, frame, volume, fd, loc, basename, cmd, type);
-
- pid = frame->root->pid;
- owner = (uint64_t)(long) frame->root;
- transport = frame->root->trans;
-
- if (pid == 0) {
- /*
- this is a special case that means release
- all locks from this transport
- */
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int ret = -1;
+ char unwind = 1;
+ GF_UNUSED int dict_ret = -1;
+ pl_inode_t *pinode = NULL;
+ pl_entry_lock_t *reqlock = NULL;
+ pl_entry_lock_t *unlocked = NULL;
+ pl_dom_list_t *dom = NULL;
+ char *conn_id = NULL;
+ pl_ctx_t *ctx = NULL;
+ int nonblock = 0;
+ gf_boolean_t need_inode_unref = _gf_false;
+ posix_locks_private_t *priv = NULL;
- gf_log (this->name, GF_LOG_TRACE,
- "Releasing locks for transport %p", transport);
+ priv = this->private;
- release_entry_locks_for_transport (this, pinode, dom, transport);
- op_ret = 0;
+ if (xdata)
+ dict_ret = dict_get_str (xdata, "connection-id", &conn_id);
- goto out;
- }
+ pinode = pl_inode_get (this, inode);
+ if (!pinode) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- switch (cmd) {
- case ENTRYLK_LOCK:
- pthread_mutex_lock (&pinode->mutex);
- {
- ret = __lock_name (pinode, basename, type,
- frame, dom, this, 0);
+ if (frame->root->client) {
+ ctx = pl_ctx_get (frame->root->client, this);
+ if (!ctx) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_INFO, "pl_ctx_get() failed");
+ goto unwind;
}
- pthread_mutex_unlock (&pinode->mutex);
+ }
- if (ret < 0) {
- if (ret == -EAGAIN)
- unwind = 0;
- op_errno = -ret;
- goto out;
- }
+ dom = get_domain (pinode, volume);
+ if (!dom){
+ op_errno = ENOMEM;
+ goto out;
+ }
- break;
+ entrylk_trace_in (this, frame, volume, fd, loc, basename, cmd, type);
- case ENTRYLK_LOCK_NB:
- pthread_mutex_lock (&pinode->mutex);
- {
- ret = __lock_name (pinode, basename, type,
- frame, dom, this, 1);
- }
- pthread_mutex_unlock (&pinode->mutex);
+ reqlock = new_entrylk_lock (pinode, basename, type, dom->domain, frame,
+ conn_id);
+ if (!reqlock) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ /* Ideally, AFTER a successful lock (both blocking and non-blocking) or
+ * an unsuccessful blocking lock operation, the inode needs to be ref'd.
+ *
+ * But doing so might give room to a race where the lock-requesting
+ * client could send a DISCONNECT just before this thread refs the inode
+ * after the locking is done, and the epoll thread could unref the inode
+ * in cleanup which means the inode's refcount would come down to 0, and
+ * the call to pl_forget() at this point destroys @pinode. Now when
+ * the io-thread executing this function tries to access pinode,
+ * it could crash on account of illegal memory access.
+ *
+ * To get around this problem, the inode is ref'd once even before
+ * adding the lock into client_list as a precautionary measure.
+ * This way even if there are DISCONNECTs, there will always be 1 extra
+ * ref on the inode, so @pinode is still alive until after the
+ * current stack unwinds.
+ */
+ pinode->inode = inode_ref (inode);
+ if (priv->revocation_secs != 0) {
+ if (cmd != ENTRYLK_UNLOCK) {
+ __entrylk_prune_stale (this, pinode, dom, reqlock);
+ } else if (priv->monkey_unlocking == _gf_true) {
+ if (pl_does_monkey_want_stuck_lock ()) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "MONKEY LOCKING (forcing stuck lock)!");
+ op_ret = 0;
+ need_inode_unref = _gf_true;
+ pthread_mutex_lock (&pinode->mutex);
+ {
+ __pl_entrylk_unref (reqlock);
+ }
+ pthread_mutex_unlock (&pinode->mutex);
+ goto out;
+ }
+ }
+ }
+ switch (cmd) {
+ case ENTRYLK_LOCK_NB:
+ nonblock = 1;
+ /* fall through */
+ case ENTRYLK_LOCK:
+ if (ctx)
+ pthread_mutex_lock (&ctx->lock);
+ pthread_mutex_lock (&pinode->mutex);
+ {
+ reqlock->pinode = pinode;
+
+ ret = __lock_entrylk (this, pinode, reqlock, nonblock, dom);
+ if (ret == 0) {
+ reqlock->frame = NULL;
+ op_ret = 0;
+ } else {
+ op_errno = -ret;
+ }
+
+ if (ctx && (!ret || !nonblock))
+ list_add (&reqlock->client_list,
+ &ctx->entrylk_lockers);
+
+ if (ret == -EAGAIN && !nonblock) {
+ /* blocked */
+ unwind = 0;
+ } else {
+ __pl_entrylk_unref (reqlock);
+ }
+
+ /* For all but the case where a non-blocking lock
+ * attempt fails, the extra ref taken before the switch
+ * block must be negated.
+ */
+ if ((ret == -EAGAIN) && (nonblock))
+ need_inode_unref = _gf_true;
+ }
+ pthread_mutex_unlock (&pinode->mutex);
+ if (ctx)
+ pthread_mutex_unlock (&ctx->lock);
break;
- case ENTRYLK_UNLOCK:
- pthread_mutex_lock (&pinode->mutex);
- {
- unlocked = __unlock_name (dom, basename, type);
- }
- pthread_mutex_unlock (&pinode->mutex);
+ case ENTRYLK_UNLOCK:
+ if (ctx)
+ pthread_mutex_lock (&ctx->lock);
+ pthread_mutex_lock (&pinode->mutex);
+ {
+ /* Irrespective of whether unlock succeeds or not,
+ * the extra inode ref that was done before the switch
+ * block must be negated. Towards this,
+ * @need_inode_unref flag is set unconditionally here.
+ */
+ need_inode_unref = _gf_true;
+ unlocked = __unlock_entrylk (dom, reqlock);
+ if (unlocked) {
+ list_del_init (&unlocked->client_list);
+ __pl_entrylk_unref (unlocked);
+ op_ret = 0;
+ } else {
+ op_errno = EINVAL;
+ }
+ __pl_entrylk_unref (reqlock);
+ }
+ pthread_mutex_unlock (&pinode->mutex);
+ if (ctx)
+ pthread_mutex_unlock (&ctx->lock);
- if (unlocked)
- grant_blocked_entry_locks (this, pinode, unlocked, dom);
+ grant_blocked_entry_locks (this, pinode, dom);
- break;
+ break;
- default:
- gf_log (this->name, GF_LOG_ERROR,
- "Unexpected case in entrylk (cmd=%d). Please file"
+ default:
+ inode_unref (pinode->inode);
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unexpected case in entrylk (cmd=%d). Please file"
"a bug report at http://bugs.gluster.com", cmd);
- goto out;
- }
+ goto out;
+ }
+ /* The following (extra) unref corresponds to the ref that
+ * was done at the time the lock was granted.
+ */
+ if ((cmd == ENTRYLK_UNLOCK) && (op_ret == 0))
+ inode_unref (pinode->inode);
- op_ret = 0;
out:
- pl_update_refkeeper (this, inode);
- if (unwind) {
+
+ if (need_inode_unref)
+ inode_unref (pinode->inode);
+
+ if (unwind) {
entrylk_trace_out (this, frame, volume, fd, loc, basename,
cmd, type, op_ret, op_errno);
-
- STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno);
- } else {
+unwind:
+ STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno, NULL);
+ } else {
entrylk_trace_block (this, frame, volume, fd, loc, basename,
cmd, type);
}
-
- return 0;
+ return 0;
}
/**
@@ -714,11 +822,11 @@ out:
int
pl_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ const char *volume, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
-
- pl_common_entrylk (frame, this, volume, loc->inode, basename, cmd, type, loc, NULL);
+ pl_common_entrylk (frame, this, volume, loc->inode, basename, cmd,
+ type, loc, NULL, xdata);
return 0;
}
@@ -733,16 +841,133 @@ pl_entrylk (call_frame_t *frame, xlator_t *this,
int
pl_fentrylk (call_frame_t *frame, xlator_t *this,
const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
+{
+ pl_common_entrylk (frame, this, volume, fd->inode, basename, cmd,
+ type, NULL, fd, xdata);
+
+ return 0;
+}
+
+
+static void
+pl_entrylk_log_cleanup (pl_entry_lock_t *lock)
+{
+ pl_inode_t *pinode = NULL;
+
+ pinode = lock->pinode;
+
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "releasing lock on %s held by "
+ "{client=%p, pid=%"PRId64" lk-owner=%s}",
+ uuid_utoa (pinode->gfid), lock->client,
+ (uint64_t) lock->client_pid, lkowner_utoa (&lock->owner));
+}
+
+
+/* Release all entrylks from this client */
+int
+pl_entrylk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)
{
+ pl_entry_lock_t *tmp = NULL;
+ pl_entry_lock_t *l = NULL;
+ pl_dom_list_t *dom = NULL;
+ pl_inode_t *pinode = NULL;
+
+ struct list_head released;
+ struct list_head unwind;
+
+ INIT_LIST_HEAD (&released);
+ INIT_LIST_HEAD (&unwind);
+
+ pthread_mutex_lock (&ctx->lock);
+ {
+ list_for_each_entry_safe (l, tmp, &ctx->entrylk_lockers,
+ client_list) {
+ pl_entrylk_log_cleanup (l);
+
+ pinode = l->pinode;
+
+ pthread_mutex_lock (&pinode->mutex);
+ {
+ /* If the entrylk object is part of granted list but not
+ * blocked list, then perform the following actions:
+ * i. delete the object from granted list;
+ * ii. grant other locks (from other clients) that may
+ * have been blocked on this entrylk; and
+ * iii. unref the object.
+ *
+ * If the entrylk object (L1) is part of both granted
+ * and blocked lists, then this means that a parallel
+ * unlock on another entrylk (L2 say) may have 'granted'
+ * L1 and added it to 'granted' list in
+ * __grant_blocked_entry_locks() (although using the
+ * 'blocked_locks' member). In that case, the cleanup
+ * codepath must try and grant other overlapping
+ * blocked entrylks from other clients, now that L1 is
+ * out of their way and then unref L1 in the end, and
+ * leave it to the other thread (the one executing
+ * unlock codepath) to unwind L1's frame, delete it from
+ * blocked_locks list, and perform the last unref on L1.
+ *
+ * If the entrylk object (L1) is part of blocked list
+ * only, the cleanup code path must:
+ * i. delete it from the blocked_locks list inside
+ * this critical section,
+ * ii. unwind its frame with EAGAIN,
+ * iii. try and grant blocked entry locks from other
+ * clients that were otherwise grantable, but were
+ * blocked to avoid leaving L1 to starve forever.
+ * iv. unref the object.
+ */
+ list_del_init (&l->client_list);
+
+ if (!list_empty (&l->domain_list)) {
+ list_del_init (&l->domain_list);
+ list_add_tail (&l->client_list,
+ &released);
+ } else {
+ list_del_init (&l->blocked_locks);
+ list_add_tail (&l->client_list,
+ &unwind);
+ }
+ }
+ pthread_mutex_unlock (&pinode->mutex);
+ }
+ }
+ pthread_mutex_unlock (&ctx->lock);
- pl_common_entrylk (frame, this, volume, fd->inode, basename, cmd, type, NULL, fd);
+ list_for_each_entry_safe (l, tmp, &unwind, client_list) {
+ list_del_init (&l->client_list);
+
+ if (l->frame)
+ STACK_UNWIND_STRICT (entrylk, l->frame, -1, EAGAIN,
+ NULL);
+ list_add_tail (&l->client_list, &released);
+ }
+
+ list_for_each_entry_safe (l, tmp, &released, client_list) {
+ list_del_init (&l->client_list);
+
+ pinode = l->pinode;
+
+ dom = get_domain (pinode, l->volume);
+
+ grant_blocked_entry_locks (this, pinode, dom);
+
+ pthread_mutex_lock (&pinode->mutex);
+ {
+ __pl_entrylk_unref (l);
+ }
+ pthread_mutex_unlock (&pinode->mutex);
+ inode_unref (pinode->inode);
+ }
return 0;
}
-static int32_t
+int32_t
__get_entrylk_count (xlator_t *this, pl_inode_t *pl_inode)
{
int32_t count = 0;
@@ -751,24 +976,10 @@ __get_entrylk_count (xlator_t *this, pl_inode_t *pl_inode)
list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
list_for_each_entry (lock, &dom->entrylk_list, domain_list) {
-
- gf_log (this->name, GF_LOG_DEBUG,
- " XATTR DEBUG"
- " domain: %s %s on %s state = Active",
- dom->domain,
- lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" :
- "ENTRYLK_WRLCK", lock->basename);
count++;
}
list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) {
-
- gf_log (this->name, GF_LOG_DEBUG,
- " XATTR DEBUG"
- " domain: %s %s on %s state = Blocked",
- dom->domain,
- lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" :
- "ENTRYLK_WRLCK", lock->basename);
count++;
}
diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c
index 592d14e0876..e1702c78ba1 100644
--- a/xlators/features/locks/src/inodelk.c
+++ b/xlators/features/locks/src/inodelk.c
@@ -1,27 +1,12 @@
/*
- Copyright (c) 2006, 2007, 2008 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include "glusterfs.h"
#include "compat.h"
#include "xlator.h"
@@ -31,32 +16,43 @@
#include "list.h"
#include "locks.h"
+#include "clear.h"
#include "common.h"
void
__delete_inode_lock (pl_inode_lock_t *lock)
{
- list_del (&lock->list);
+ list_del_init (&lock->list);
+}
+
+static void
+__pl_inodelk_ref (pl_inode_lock_t *lock)
+{
+ lock->ref++;
}
void
-__destroy_inode_lock (pl_inode_lock_t *lock)
+__pl_inodelk_unref (pl_inode_lock_t *lock)
{
- GF_FREE (lock);
+ lock->ref--;
+ if (!lock->ref) {
+ GF_FREE (lock->connection_id);
+ GF_FREE (lock);
+ }
}
/* Check if 2 inodelks are conflicting on type. Only 2 shared locks don't conflict */
static int
inodelk_type_conflict (pl_inode_lock_t *l1, pl_inode_lock_t *l2)
{
- if (l2->fl_type == F_WRLCK || l1->fl_type == F_WRLCK)
- return 1;
+ if (l2->fl_type == F_WRLCK || l1->fl_type == F_WRLCK)
+ return 1;
- return 0;
+ return 0;
}
void
-pl_print_inodelk (char *str, int size, int cmd, struct flock *flock, const char *domain)
+pl_print_inodelk (char *str, int size, int cmd, struct gf_flock *flock, const char *domain)
{
char *cmd_str = NULL;
char *type_str = NULL;
@@ -115,46 +111,142 @@ pl_print_inodelk (char *str, int size, int cmd, struct flock *flock, const char
static int
inodelk_overlap (pl_inode_lock_t *l1, pl_inode_lock_t *l2)
{
- return ((l1->fl_end >= l2->fl_start) &&
- (l2->fl_end >= l1->fl_start));
+ return ((l1->fl_end >= l2->fl_start) &&
+ (l2->fl_end >= l1->fl_start));
}
/* Returns true if the 2 inodelks have the same owner */
-static int same_inodelk_owner (pl_inode_lock_t *l1, pl_inode_lock_t *l2)
+static int
+same_inodelk_owner (pl_inode_lock_t *l1, pl_inode_lock_t *l2)
{
- return ((l1->owner == l2->owner) &&
- (l1->transport == l2->transport));
+ return (is_same_lkowner (&l1->owner, &l2->owner) &&
+ (l1->client == l2->client));
}
/* Returns true if the 2 inodelks conflict with each other */
static int
inodelk_conflict (pl_inode_lock_t *l1, pl_inode_lock_t *l2)
{
- if (same_inodelk_owner (l1, l2))
- return 0;
+ return (inodelk_overlap (l1, l2) &&
+ inodelk_type_conflict (l1, l2));
+}
+
+/*
+ * Check to see if the candidate lock overlaps/conflicts with the
+ * requested lock. If so, determine how old the lock is and return
+ * true if it exceeds the configured threshold, false otherwise.
+ */
+static inline gf_boolean_t
+__stale_inodelk (xlator_t *this, pl_inode_lock_t *candidate_lock,
+ pl_inode_lock_t *requested_lock, time_t *lock_age_sec)
+{
+ posix_locks_private_t *priv = NULL;
+ struct timeval curr;
+
+ priv = this->private;
+ gettimeofday (&curr, NULL);
+ /* Question: Should we just prune them all given the
+ * chance? Or just the locks we are attempting to acquire?
+ */
+ if (inodelk_conflict (candidate_lock, requested_lock)) {
+ *lock_age_sec = curr.tv_sec -
+ candidate_lock->granted_time.tv_sec;
+ if (*lock_age_sec > priv->revocation_secs)
+ return _gf_true;
+ }
+ return _gf_false;
+}
+
+/* Examine any locks held on this inode and potentially revoke the lock
+ * if the age exceeds revocation_secs. We will clear _only_ those locks
+ * which are granted, and then grant those locks which are blocked.
+ *
+ * Depending on how this patch works in the wild, we may expand this and
+ * introduce a heuristic which clears blocked locks as well if they
+ * are beyond a threshold.
+ */
+static gf_boolean_t
+__inodelk_prune_stale (xlator_t *this, pl_inode_t *pinode, pl_dom_list_t *dom,
+ pl_inode_lock_t *lock)
+{
+ posix_locks_private_t *priv = NULL;
+ pl_inode_lock_t *tmp = NULL;
+ pl_inode_lock_t *lk = NULL;
+ gf_boolean_t revoke_lock = _gf_false;
+ int bcount = 0;
+ int gcount = 0;
+ int op_errno = 0;
+ clrlk_args args;
+ args.opts = NULL;
+ time_t lk_age_sec = 0;
+ uint32_t max_blocked = 0;
+ char *reason_str = NULL;
+
+ priv = this->private;
+
+ args.type = CLRLK_INODE;
+ if (priv->revocation_clear_all == _gf_true)
+ args.kind = CLRLK_ALL;
+ else
+ args.kind = CLRLK_GRANTED;
+
+ if (list_empty (&dom->inodelk_list))
+ goto out;
+
+ pthread_mutex_lock (&pinode->mutex);
+ list_for_each_entry_safe (lk, tmp, &dom->inodelk_list, list) {
+ if (__stale_inodelk (this, lk, lock, &lk_age_sec) == _gf_true) {
+ revoke_lock = _gf_true;
+ reason_str = "age";
+ break;
+ }
+ }
- if (!inodelk_overlap (l1, l2))
- return 0;
+ max_blocked = priv->revocation_max_blocked;
+ if (max_blocked != 0 && revoke_lock == _gf_false) {
+ list_for_each_entry_safe (lk, tmp, &dom->blocked_inodelks,
+ blocked_locks) {
+ max_blocked--;
+ if (max_blocked == 0) {
+ revoke_lock = _gf_true;
+ reason_str = "max blocked";
+ break;
+ }
+ }
+ }
+ pthread_mutex_unlock (&pinode->mutex);
- return (inodelk_type_conflict(l1, l2));
+out:
+ if (revoke_lock == _gf_true) {
+ clrlk_clear_inodelk (this, pinode, dom, &args, &bcount, &gcount,
+ &op_errno);
+ gf_log (this->name, GF_LOG_WARNING,
+ "Lock revocation [reason: %s; gfid: %s; domain: %s; "
+ "age: %ld sec] - Inode lock revoked: %d granted & %d "
+ "blocked locks cleared",
+ reason_str, uuid_utoa (pinode->gfid), dom->domain,
+ lk_age_sec, gcount, bcount);
+ }
+ return revoke_lock;
}
/* Determine if lock is grantable or not */
static pl_inode_lock_t *
__inodelk_grantable (pl_dom_list_t *dom, pl_inode_lock_t *lock)
{
- pl_inode_lock_t *l = NULL;
- pl_inode_lock_t *ret = NULL;
- if (list_empty (&dom->inodelk_list))
- goto out;
- list_for_each_entry (l, &dom->inodelk_list, list){
- if (inodelk_conflict (lock, l)) {
- ret = l;
- goto out;
- }
- }
+ pl_inode_lock_t *l = NULL;
+ pl_inode_lock_t *ret = NULL;
+ if (list_empty (&dom->inodelk_list))
+ goto out;
+ list_for_each_entry (l, &dom->inodelk_list, list){
+ if (inodelk_conflict (lock, l) &&
+ !same_inodelk_owner (lock, l)) {
+ ret = l;
+ goto out;
+ }
+ }
out:
- return ret;
+ return ret;
}
static pl_inode_lock_t *
@@ -163,18 +255,18 @@ __blocked_lock_conflict (pl_dom_list_t *dom, pl_inode_lock_t *lock)
pl_inode_lock_t *l = NULL;
pl_inode_lock_t *ret = NULL;
- if (list_empty (&dom->blocked_entrylks))
- return NULL;
+ if (list_empty (&dom->blocked_inodelks))
+ return NULL;
- list_for_each_entry (l, &dom->blocked_inodelks, blocked_locks) {
- if (inodelk_conflict (lock, l)) {
- ret = l;
- goto out;
+ list_for_each_entry (l, &dom->blocked_inodelks, blocked_locks) {
+ if (inodelk_conflict (lock, l)) {
+ ret = l;
+ goto out;
}
- }
+ }
out:
- return ret;
+ return ret;
}
static int
@@ -182,17 +274,17 @@ __owner_has_lock (pl_dom_list_t *dom, pl_inode_lock_t *newlock)
{
pl_inode_lock_t *lock = NULL;
- list_for_each_entry (lock, &dom->entrylk_list, list) {
- if (same_inodelk_owner (lock, newlock))
- return 1;
- }
+ list_for_each_entry (lock, &dom->inodelk_list, list) {
+ if (same_inodelk_owner (lock, newlock))
+ return 1;
+ }
- list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) {
- if (same_inodelk_owner (lock, newlock))
- return 1;
- }
+ list_for_each_entry (lock, &dom->blocked_inodelks, blocked_locks) {
+ if (same_inodelk_owner (lock, newlock))
+ return 1;
+ }
- return 0;
+ return 0;
}
@@ -201,80 +293,97 @@ __owner_has_lock (pl_dom_list_t *dom, pl_inode_lock_t *newlock)
*/
static int
__lock_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
- int can_block, pl_dom_list_t *dom)
+ int can_block, pl_dom_list_t *dom)
{
- pl_inode_lock_t *conf = NULL;
- int ret = -EINVAL;
+ pl_inode_lock_t *conf = NULL;
+ int ret = -EINVAL;
- conf = __inodelk_grantable (dom, lock);
- if (conf){
- ret = -EAGAIN;
- if (can_block == 0)
- goto out;
+ conf = __inodelk_grantable (dom, lock);
+ if (conf) {
+ ret = -EAGAIN;
+ if (can_block == 0)
+ goto out;
- list_add_tail (&lock->blocked_locks, &dom->blocked_inodelks);
+ gettimeofday (&lock->blkd_time, NULL);
+ list_add_tail (&lock->blocked_locks, &dom->blocked_inodelks);
gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) lk-owner:%"PRIu64" %"PRId64" - %"PRId64" => Blocked",
+ "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Blocked",
lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
lock->client_pid,
- lock->owner,
+ lkowner_utoa (&lock->owner),
lock->user_flock.l_start,
lock->user_flock.l_len);
- goto out;
- }
+ goto out;
+ }
+ /* To prevent blocked locks starvation, check if there are any blocked
+ * locks thay may conflict with this lock. If there is then don't grant
+ * the lock. BUT grant the lock if the owner already has lock to allow
+ * nested locks.
+ * Example:
+ * SHD from Machine1 takes (gfid, 0-infinity) and is granted.
+ * SHD from machine2 takes (gfid, 0-infinity) and is blocked.
+ * When SHD from Machine1 takes (gfid, 0-128KB) it
+ * needs to be granted, without which the earlier lock on 0-infinity
+ * will not be unlocked by SHD from Machine1.
+ * TODO: Find why 'owner_has_lock' is checked even for blocked locks.
+ */
if (__blocked_lock_conflict (dom, lock) && !(__owner_has_lock (dom, lock))) {
ret = -EAGAIN;
if (can_block == 0)
goto out;
+ gettimeofday (&lock->blkd_time, NULL);
list_add_tail (&lock->blocked_locks, &dom->blocked_inodelks);
- gf_log (this->name, GF_LOG_TRACE,
+ gf_log (this->name, GF_LOG_DEBUG,
"Lock is grantable, but blocking to prevent starvation");
- gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" => Blocked",
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => Blocked",
lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
lock->client_pid,
- lock->owner,
+ lkowner_utoa (&lock->owner),
lock->user_flock.l_start,
lock->user_flock.l_len);
- goto out;
+ goto out;
}
- list_add (&lock->list, &dom->inodelk_list);
+ __pl_inodelk_ref (lock);
+ gettimeofday (&lock->granted_time, NULL);
+ list_add (&lock->list, &dom->inodelk_list);
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
/* Return true if the two inodelks have exactly same lock boundaries */
static int
inodelks_equal (pl_inode_lock_t *l1, pl_inode_lock_t *l2)
{
- if ((l1->fl_start == l2->fl_start) &&
- (l1->fl_end == l2->fl_end))
- return 1;
+ if ((l1->fl_start == l2->fl_start) &&
+ (l1->fl_end == l2->fl_end))
+ return 1;
- return 0;
+ return 0;
}
static pl_inode_lock_t *
find_matching_inodelk (pl_inode_lock_t *lock, pl_dom_list_t *dom)
{
- pl_inode_lock_t *l = NULL;
- list_for_each_entry (l, &dom->inodelk_list, list) {
- if (inodelks_equal (l, lock))
- return l;
- }
- return NULL;
+ pl_inode_lock_t *l = NULL;
+ list_for_each_entry (l, &dom->inodelk_list, list) {
+ if (inodelks_equal (l, lock) &&
+ same_inodelk_owner (l, lock))
+ return l;
+ }
+ return NULL;
}
/* Set F_UNLCK removes a lock which has the exact same lock boundaries
@@ -284,355 +393,517 @@ static pl_inode_lock_t *
__inode_unlock_lock (xlator_t *this, pl_inode_lock_t *lock, pl_dom_list_t *dom)
{
- pl_inode_lock_t *conf = NULL;
+ pl_inode_lock_t *conf = NULL;
- conf = find_matching_inodelk (lock, dom);
- if (!conf) {
- gf_log (this->name, GF_LOG_DEBUG,
- " Matching lock not found for unlock");
- goto out;
+ conf = find_matching_inodelk (lock, dom);
+ if (!conf) {
+ gf_log (this->name, GF_LOG_ERROR,
+ " Matching lock not found for unlock %llu-%llu, by %s "
+ "on %p", (unsigned long long)lock->fl_start,
+ (unsigned long long)lock->fl_end,
+ lkowner_utoa (&lock->owner), lock->client);
+ goto out;
}
- __delete_inode_lock (conf);
+ __delete_inode_lock (conf);
gf_log (this->name, GF_LOG_DEBUG,
- " Matching lock found for unlock");
- __destroy_inode_lock (lock);
-
+ " Matching lock found for unlock %llu-%llu, by %s on %p",
+ (unsigned long long)lock->fl_start,
+ (unsigned long long)lock->fl_end, lkowner_utoa (&lock->owner),
+ lock->client);
out:
- return conf;
+ return conf;
+}
-}
static void
-__grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
- struct list_head *granted, pl_dom_list_t *dom)
+__grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
+ struct list_head *granted, pl_dom_list_t *dom)
{
- int bl_ret = 0;
- pl_inode_lock_t *bl = NULL;
- pl_inode_lock_t *tmp = NULL;
+ int bl_ret = 0;
+ pl_inode_lock_t *bl = NULL;
+ pl_inode_lock_t *tmp = NULL;
struct list_head blocked_list;
INIT_LIST_HEAD (&blocked_list);
list_splice_init (&dom->blocked_inodelks, &blocked_list);
- list_for_each_entry_safe (bl, tmp, &blocked_list, blocked_locks) {
+ list_for_each_entry_safe (bl, tmp, &blocked_list, blocked_locks) {
- list_del_init (&bl->blocked_locks);
+ list_del_init (&bl->blocked_locks);
- bl_ret = __lock_inodelk (this, pl_inode, bl, 1, dom);
+ bl_ret = __lock_inodelk (this, pl_inode, bl, 1, dom);
- if (bl_ret == 0) {
- list_add (&bl->blocked_locks, granted);
+ if (bl_ret == 0) {
+ list_add (&bl->blocked_locks, granted);
}
}
- return;
+ return;
}
/* Grant all inodelks blocked on a lock */
void
-grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom)
+grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
+ pl_dom_list_t *dom)
{
- struct list_head granted;
- pl_inode_lock_t *lock;
- pl_inode_lock_t *tmp;
+ struct list_head granted;
+ pl_inode_lock_t *lock;
+ pl_inode_lock_t *tmp;
- INIT_LIST_HEAD (&granted);
+ INIT_LIST_HEAD (&granted);
- if (list_empty (&dom->blocked_inodelks)) {
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ __grant_blocked_inode_locks (this, pl_inode, &granted, dom);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) {
gf_log (this->name, GF_LOG_TRACE,
- "No blocked locks to be granted for domain: %s", dom->domain);
+ "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => Granted",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid,
+ lkowner_utoa (&lock->owner),
+ lock->user_flock.l_start,
+ lock->user_flock.l_len);
+
+ pl_trace_out (this, lock->frame, NULL, NULL, F_SETLKW,
+ &lock->user_flock, 0, 0, lock->volume);
+
+ STACK_UNWIND_STRICT (inodelk, lock->frame, 0, 0, NULL);
+ lock->frame = NULL;
}
pthread_mutex_lock (&pl_inode->mutex);
- {
- __grant_blocked_inode_locks (this, pl_inode, &granted, dom);
- }
+ {
+ list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) {
+ list_del_init (&lock->blocked_locks);
+ __pl_inodelk_unref (lock);
+ }
+ }
pthread_mutex_unlock (&pl_inode->mutex);
+}
- list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) {
- gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" => Granted",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lock->owner,
- lock->user_flock.l_start,
- lock->user_flock.l_len);
- pl_trace_out (this, lock->frame, NULL, NULL, F_SETLKW,
- &lock->user_flock, 0, 0, lock->volume);
+static void
+pl_inodelk_log_cleanup (pl_inode_lock_t *lock)
+{
+ pl_inode_t *pl_inode = NULL;
- STACK_UNWIND_STRICT (inodelk, lock->frame, 0, 0);
- }
+ pl_inode = lock->pl_inode;
+ gf_log (THIS->name, GF_LOG_WARNING, "releasing lock on %s held by "
+ "{client=%p, pid=%"PRId64" lk-owner=%s}",
+ uuid_utoa (pl_inode->gfid), lock->client,
+ (uint64_t) lock->client_pid, lkowner_utoa (&lock->owner));
}
-/* Release all inodelks from this transport */
-static int
-release_inode_locks_of_transport (xlator_t *this, pl_dom_list_t *dom,
- inode_t *inode, void *trans)
-{
- pl_inode_lock_t *tmp = NULL;
- pl_inode_lock_t *l = NULL;
- pl_inode_t * pinode = NULL;
+/* Release all inodelks from this client */
+int
+pl_inodelk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)
+{
+ pl_inode_lock_t *tmp = NULL;
+ pl_inode_lock_t *l = NULL;
+ pl_dom_list_t *dom = NULL;
+ pl_inode_t *pl_inode = NULL;
- struct list_head granted;
struct list_head released;
+ struct list_head unwind;
- char *path = NULL;
-
- INIT_LIST_HEAD (&granted);
INIT_LIST_HEAD (&released);
+ INIT_LIST_HEAD (&unwind);
- pinode = pl_inode_get (this, inode);
-
- pthread_mutex_lock (&pinode->mutex);
+ pthread_mutex_lock (&ctx->lock);
{
-
- list_for_each_entry_safe (l, tmp, &dom->blocked_inodelks, blocked_locks) {
- if (l->transport != trans)
- continue;
-
- list_del_init (&l->blocked_locks);
-
- if (inode_path (inode, NULL, &path) < 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "inode_path failed");
- goto unlock;
+ list_for_each_entry_safe (l, tmp, &ctx->inodelk_lockers,
+ client_list) {
+ pl_inodelk_log_cleanup (l);
+
+ pl_inode = l->pl_inode;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ /* If the inodelk object is part of granted list but not
+ * blocked list, then perform the following actions:
+ * i. delete the object from granted list;
+ * ii. grant other locks (from other clients) that may
+ * have been blocked on this inodelk; and
+ * iii. unref the object.
+ *
+ * If the inodelk object (L1) is part of both granted
+ * and blocked lists, then this means that a parallel
+ * unlock on another inodelk (L2 say) may have 'granted'
+ * L1 and added it to 'granted' list in
+ * __grant_blocked_node_locks() (although using the
+ * 'blocked_locks' member). In that case, the cleanup
+ * codepath must try and grant other overlapping
+ * blocked inodelks from other clients, now that L1 is
+ * out of their way and then unref L1 in the end, and
+ * leave it to the other thread (the one executing
+ * unlock codepath) to unwind L1's frame, delete it from
+ * blocked_locks list, and perform the last unref on L1.
+ *
+ * If the inodelk object (L1) is part of blocked list
+ * only, the cleanup code path must:
+ * i. delete it from the blocked_locks list inside
+ * this critical section,
+ * ii. unwind its frame with EAGAIN,
+ * iii. try and grant blocked inode locks from other
+ * clients that were otherwise grantable, but just
+ * got blocked to avoid leaving L1 to starve
+ * forever.
+ * iv. unref the object.
+ */
+ list_del_init (&l->client_list);
+
+ if (!list_empty (&l->list)) {
+ __delete_inode_lock (l);
+ list_add_tail (&l->client_list,
+ &released);
+ } else {
+ list_del_init(&l->blocked_locks);
+ list_add_tail (&l->client_list,
+ &unwind);
+ }
}
-
- gf_log (this->name, GF_LOG_TRACE,
- "releasing lock on %s held by "
- "{transport=%p, pid=%"PRId64" lk-owner=%"PRIu64"}",
- path, trans,
- (uint64_t) l->client_pid,
- l->owner);
-
- list_add (&l->blocked_locks, &released);
-
+ pthread_mutex_unlock (&pl_inode->mutex);
}
+ }
+ pthread_mutex_unlock (&ctx->lock);
- list_for_each_entry_safe (l, tmp, &dom->inodelk_list, list) {
- if (l->transport != trans)
- continue;
-
- __delete_inode_lock (l);
- __destroy_inode_lock (l);
-
+ list_for_each_entry_safe (l, tmp, &unwind, client_list) {
+ list_del_init (&l->client_list);
- if (inode_path (inode, NULL, &path) < 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "inode_path failed");
- goto unlock;
- }
+ if (l->frame)
+ STACK_UNWIND_STRICT (inodelk, l->frame, -1, EAGAIN,
+ NULL);
+ list_add_tail (&l->client_list, &released);
- gf_log (this->name, GF_LOG_TRACE,
- "releasing lock on %s held by "
- "{transport=%p, pid=%"PRId64" lk-owner=%"PRIu64"}",
- path, trans,
- (uint64_t) l->client_pid,
- l->owner);
+ }
+ list_for_each_entry_safe (l, tmp, &released, client_list) {
+ list_del_init (&l->client_list);
- }
- }
-unlock:
- if (path)
- GF_FREE (path);
+ pl_inode = l->pl_inode;
- pthread_mutex_unlock (&pinode->mutex);
+ dom = get_domain (pl_inode, l->volume);
- list_for_each_entry_safe (l, tmp, &released, blocked_locks) {
- list_del_init (&l->blocked_locks);
+ grant_blocked_inode_locks (this, pl_inode, dom);
- STACK_UNWIND_STRICT (inodelk, l->frame, -1, EAGAIN);
- GF_FREE (l);
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ __pl_inodelk_unref (l);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+ inode_unref (pl_inode->inode);
}
- grant_blocked_inode_locks (this, pinode, dom);
- return 0;
+ return 0;
}
static int
-pl_inode_setlk (xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
- int can_block, pl_dom_list_t *dom)
+pl_inode_setlk (xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
+ pl_inode_lock_t *lock, int can_block, pl_dom_list_t *dom,
+ inode_t *inode)
{
- int ret = -EINVAL;
- pl_inode_lock_t *retlock = NULL;
-
- pthread_mutex_lock (&pl_inode->mutex);
- {
- if (lock->fl_type != F_UNLCK) {
- ret = __lock_inodelk (this, pl_inode, lock, can_block, dom);
- if (ret == 0)
- gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" => OK",
+ posix_locks_private_t *priv = NULL;
+ int ret = -EINVAL;
+ pl_inode_lock_t *retlock = NULL;
+ gf_boolean_t unref = _gf_true;
+ gf_boolean_t need_inode_unref = _gf_false;
+ short fl_type;
+
+ lock->pl_inode = pl_inode;
+ fl_type = lock->fl_type;
+
+ priv = this->private;
+
+ /* Ideally, AFTER a successful lock (both blocking and non-blocking) or
+ * an unsuccessful blocking lock operation, the inode needs to be ref'd.
+ *
+ * But doing so might give room to a race where the lock-requesting
+ * client could send a DISCONNECT just before this thread refs the inode
+ * after the locking is done, and the epoll thread could unref the inode
+ * in cleanup which means the inode's refcount would come down to 0, and
+ * the call to pl_forget() at this point destroys @pl_inode. Now when
+ * the io-thread executing this function tries to access pl_inode,
+ * it could crash on account of illegal memory access.
+ *
+ * To get around this problem, the inode is ref'd once even before
+ * adding the lock into client_list as a precautionary measure.
+ * This way even if there are DISCONNECTs, there will always be 1 extra
+ * ref on the inode, so @pl_inode is still alive until after the
+ * current stack unwinds.
+ */
+ pl_inode->inode = inode_ref (inode);
+
+ if (priv->revocation_secs != 0) {
+ if (lock->fl_type != F_UNLCK) {
+ __inodelk_prune_stale (this, pl_inode, dom, lock);
+ } else if (priv->monkey_unlocking == _gf_true) {
+ if (pl_does_monkey_want_stuck_lock ()) {
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ __pl_inodelk_unref (lock);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+ inode_unref (pl_inode->inode);
+ gf_log (this->name, GF_LOG_WARNING,
+ "MONKEY LOCKING (forcing stuck lock)!");
+ return 0;
+ }
+ }
+ }
+
+ if (ctx)
+ pthread_mutex_lock (&ctx->lock);
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ if (lock->fl_type != F_UNLCK) {
+ ret = __lock_inodelk (this, pl_inode, lock, can_block, dom);
+ if (ret == 0) {
+ lock->frame = NULL;
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => OK",
lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
lock->client_pid,
- lock->owner,
+ lkowner_utoa (&lock->owner),
lock->fl_start,
lock->fl_end);
+ } else if (ret == -EAGAIN) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => NOK",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid,
+ lkowner_utoa (&lock->owner),
+ lock->user_flock.l_start,
+ lock->user_flock.l_len);
+ if (can_block)
+ unref = _gf_false;
+ /* For all but the case where a non-blocking
+ * lock attempt fails, the extra ref taken at
+ * the start of this function must be negated.
+ */
+ else
+ need_inode_unref = _gf_true;
+ }
- if (ret == -EAGAIN)
- gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" => NOK",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lock->owner,
- lock->user_flock.l_start,
- lock->user_flock.l_len);
-
- goto out;
- }
-
+ if (ctx && (!ret || can_block))
+ list_add_tail (&lock->client_list,
+ &ctx->inodelk_lockers);
+ } else {
+ /* Irrespective of whether unlock succeeds or not,
+ * the extra inode ref that was done at the start of
+ * this function must be negated. Towards this,
+ * @need_inode_unref flag is set unconditionally here.
+ */
+ need_inode_unref = _gf_true;
+ retlock = __inode_unlock_lock (this, lock, dom);
+ if (!retlock) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Bad Unlock issued on Inode lock");
+ ret = -EINVAL;
+ goto out;
+ }
+ list_del_init (&retlock->client_list);
+ __pl_inodelk_unref (retlock);
- retlock = __inode_unlock_lock (this, lock, dom);
- if (!retlock) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Bad Unlock issued on Inode lock");
- ret = -EINVAL;
- goto out;
+ ret = 0;
}
- __destroy_inode_lock (retlock);
-
- ret = 0;
-
-
- }
out:
- pthread_mutex_unlock (&pl_inode->mutex);
- grant_blocked_inode_locks (this, pl_inode, dom);
+ if (unref)
+ __pl_inodelk_unref (lock);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+ if (ctx)
+ pthread_mutex_unlock (&ctx->lock);
+
+ if (need_inode_unref)
+ inode_unref (pl_inode->inode);
+
+ /* The following (extra) unref corresponds to the ref that
+ * was done at the time the lock was granted.
+ */
+ if ((fl_type == F_UNLCK) && (ret == 0)) {
+ inode_unref (pl_inode->inode);
+ grant_blocked_inode_locks (this, pl_inode, dom);
+ }
+
return ret;
}
/* Create a new inode_lock_t */
pl_inode_lock_t *
-new_inode_lock (struct flock *flock, void *transport, pid_t client_pid,
- uint64_t owner, const char *volume)
+new_inode_lock (struct gf_flock *flock, client_t *client, pid_t client_pid,
+ call_frame_t *frame, xlator_t *this, const char *volume,
+ char *conn_id)
{
- pl_inode_lock_t *lock = NULL;
+ pl_inode_lock_t *lock = NULL;
- lock = GF_CALLOC (1, sizeof (*lock),
+ lock = GF_CALLOC (1, sizeof (*lock),
gf_locks_mt_pl_inode_lock_t);
- if (!lock) {
- return NULL;
- }
+ if (!lock) {
+ return NULL;
+ }
- lock->fl_start = flock->l_start;
- lock->fl_type = flock->l_type;
+ lock->fl_start = flock->l_start;
+ lock->fl_type = flock->l_type;
- if (flock->l_len == 0)
- lock->fl_end = LLONG_MAX;
- else
- lock->fl_end = flock->l_start + flock->l_len - 1;
+ if (flock->l_len == 0)
+ lock->fl_end = LLONG_MAX;
+ else
+ lock->fl_end = flock->l_start + flock->l_len - 1;
- lock->transport = transport;
- lock->client_pid = client_pid;
- lock->owner = owner;
- lock->volume = volume;
+ lock->client = client;
+ lock->client_pid = client_pid;
+ lock->volume = volume;
+ lock->owner = frame->root->lk_owner;
+ lock->frame = frame;
+ lock->this = this;
- INIT_LIST_HEAD (&lock->list);
- INIT_LIST_HEAD (&lock->blocked_locks);
+ if (conn_id) {
+ lock->connection_id = gf_strdup (conn_id);
+ }
+
+ INIT_LIST_HEAD (&lock->list);
+ INIT_LIST_HEAD (&lock->blocked_locks);
+ INIT_LIST_HEAD (&lock->client_list);
+ __pl_inodelk_ref (lock);
- return lock;
+ return lock;
+}
+
+int32_t
+_pl_convert_volume (const char *volume, char **res)
+{
+ char *mdata_vol = NULL;
+ int ret = 0;
+
+ mdata_vol = strrchr (volume, ':');
+ //if the volume already ends with :metadata don't bother
+ if (mdata_vol && (strcmp (mdata_vol, ":metadata") == 0))
+ return 0;
+
+ ret = gf_asprintf (res, "%s:metadata", volume);
+ if (ret <= 0)
+ return ENOMEM;
+ return 0;
+}
+
+int32_t
+_pl_convert_volume_for_special_range (struct gf_flock *flock,
+ const char *volume, char **res)
+{
+ int32_t ret = 0;
+
+ if ((flock->l_start == LLONG_MAX -1) &&
+ (flock->l_len == 0)) {
+ ret = _pl_convert_volume (volume, res);
+ }
+
+ return ret;
}
/* Common inodelk code called from pl_inodelk and pl_finodelk */
int
pl_common_inodelk (call_frame_t *frame, xlator_t *this,
const char *volume, inode_t *inode, int32_t cmd,
- struct flock *flock, loc_t *loc, fd_t *fd)
+ struct gf_flock *flock, loc_t *loc, fd_t *fd, dict_t *xdata)
{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int ret = -1;
- int can_block = 0;
- void * transport = NULL;
- pid_t client_pid = -1;
- uint64_t owner = -1;
- pl_inode_t * pinode = NULL;
- pl_inode_lock_t * reqlock = NULL;
- pl_dom_list_t * dom = NULL;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (inode, unwind);
- VALIDATE_OR_GOTO (flock, unwind);
-
- if ((flock->l_start < 0) || (flock->l_len < 0)) {
- op_errno = EINVAL;
- goto unwind;
- }
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int ret = -1;
+ GF_UNUSED int dict_ret = -1;
+ int can_block = 0;
+ pl_inode_t * pinode = NULL;
+ pl_inode_lock_t * reqlock = NULL;
+ pl_dom_list_t * dom = NULL;
+ char *res = NULL;
+ char *res1 = NULL;
+ char *conn_id = NULL;
+ pl_ctx_t *ctx = NULL;
+
+ if (xdata)
+ dict_ret = dict_get_str (xdata, "connection-id", &conn_id);
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (inode, unwind);
+ VALIDATE_OR_GOTO (flock, unwind);
+
+ if ((flock->l_start < 0) || (flock->l_len < 0)) {
+ op_errno = EINVAL;
+ goto unwind;
+ }
- pl_trace_in (this, frame, fd, loc, cmd, flock, volume);
+ op_errno = _pl_convert_volume_for_special_range (flock, volume, &res);
+ if (op_errno)
+ goto unwind;
+ if (res)
+ volume = res;
- transport = frame->root->trans;
- client_pid = frame->root->pid;
- owner = frame->root->lk_owner;
+ pl_trace_in (this, frame, fd, loc, cmd, flock, volume);
- pinode = pl_inode_get (this, inode);
- if (!pinode) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_errno = ENOMEM;
- goto unwind;
+ if (frame->root->client) {
+ ctx = pl_ctx_get (frame->root->client, this);
+ if (!ctx) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_INFO, "pl_ctx_get() failed");
+ goto unwind;
+ }
}
- dom = get_domain (pinode, volume);
+ pinode = pl_inode_get (this, inode);
+ if (!pinode) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
- if (client_pid == 0) {
- /*
- special case: this means release all locks
- from this transport
- */
- gf_log (this->name, GF_LOG_TRACE,
- "Releasing all locks from transport %p", transport);
+ dom = get_domain (pinode, volume);
+ if (!dom) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
- release_inode_locks_of_transport (this, dom, inode, transport);
- goto unwind;
- }
+ reqlock = new_inode_lock (flock, frame->root->client, frame->root->pid,
+ frame, this, dom->domain, conn_id);
- reqlock = new_inode_lock (flock, transport, client_pid, owner, volume);
+ if (!reqlock) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
- if (!reqlock) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_ret = -1;
- op_errno = ENOMEM;
- goto unwind;
- }
- switch (cmd) {
- case F_SETLKW:
- can_block = 1;
- reqlock->frame = frame;
- reqlock->this = this;
+ switch (cmd) {
+ case F_SETLKW:
+ can_block = 1;
- /* fall through */
+ /* fall through */
- case F_SETLK:
- memcpy (&reqlock->user_flock, flock, sizeof (struct flock));
- ret = pl_inode_setlk (this, pinode, reqlock,
- can_block, dom);
+ case F_SETLK:
+ memcpy (&reqlock->user_flock, flock, sizeof (struct gf_flock));
+ ret = pl_inode_setlk (this, ctx, pinode, reqlock, can_block,
+ dom, inode);
- if (ret < 0) {
- if (can_block) {
+ if (ret < 0) {
+ if ((can_block) && (F_UNLCK != flock->l_type)) {
pl_trace_block (this, frame, fd, loc,
cmd, flock, volume);
- goto out;
+ goto out;
}
- gf_log (this->name, GF_LOG_TRACE, "returning EAGAIN");
- op_errno = -ret;
- __destroy_inode_lock (reqlock);
- goto unwind;
- }
- break;
+ gf_log (this->name, GF_LOG_TRACE, "returning EAGAIN");
+ op_errno = -ret;
+ goto unwind;
+ }
+ break;
- default:
- op_errno = ENOTSUP;
- gf_log (this->name, GF_LOG_DEBUG,
+ default:
+ op_errno = ENOTSUP;
+ gf_log (this->name, GF_LOG_DEBUG,
"Lock command F_GETLK not supported for [f]inodelk "
"(cmd=%d)",
cmd);
@@ -642,85 +913,84 @@ pl_common_inodelk (call_frame_t *frame, xlator_t *this,
op_ret = 0;
unwind:
- if ((inode != NULL) && (flock !=NULL)) {
- pl_update_refkeeper (this, inode);
- pl_trace_out (this, frame, fd, loc, cmd, flock, op_ret, op_errno, volume);
- }
+ if (flock != NULL)
+ pl_trace_out (this, frame, fd, loc, cmd, flock, op_ret,
+ op_errno, volume);
- STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno, NULL);
out:
+ GF_FREE (res);
+ GF_FREE (res1);
return 0;
}
int
pl_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct flock *flock)
+ const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *flock,
+ dict_t *xdata)
{
-
- pl_common_inodelk (frame, this, volume, loc->inode, cmd, flock, loc, NULL);
+ pl_common_inodelk (frame, this, volume, loc->inode, cmd, flock,
+ loc, NULL, xdata);
return 0;
}
int
pl_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct flock *flock)
+ const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock,
+ dict_t *xdata)
{
-
- pl_common_inodelk (frame, this, volume, fd->inode, cmd, flock, NULL, fd);
+ pl_common_inodelk (frame, this, volume, fd->inode, cmd, flock,
+ NULL, fd, xdata);
return 0;
}
-
static int32_t
-__get_inodelk_count (xlator_t *this, pl_inode_t *pl_inode)
+__get_inodelk_dom_count (pl_dom_list_t *dom)
+{
+ pl_inode_lock_t *lock = NULL;
+ int32_t count = 0;
+
+ list_for_each_entry (lock, &dom->inodelk_list, list) {
+ count++;
+ }
+ list_for_each_entry (lock, &dom->blocked_inodelks, blocked_locks) {
+ count++;
+ }
+ return count;
+}
+
+/* Returns the no. of locks (blocked/granted) held on a given domain name
+ * If @domname is NULL, returns the no. of locks in all the domains present.
+ * If @domname is non-NULL and non-existent, returns 0 */
+int32_t
+__get_inodelk_count (xlator_t *this, pl_inode_t *pl_inode, char *domname)
{
int32_t count = 0;
- pl_inode_lock_t *lock = NULL;
pl_dom_list_t *dom = NULL;
list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
- list_for_each_entry (lock, &dom->inodelk_list, list) {
-
- gf_log (this->name, GF_LOG_DEBUG,
- " XATTR DEBUG"
- " domain: %s %s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" "
- "state = Active",
- dom->domain,
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lock->owner,
- lock->user_flock.l_start,
- lock->user_flock.l_len);
-
- count++;
- }
-
- list_for_each_entry (lock, &dom->blocked_inodelks, blocked_locks) {
+ if (domname) {
+ if (strcmp (domname, dom->domain) == 0) {
+ count = __get_inodelk_dom_count (dom);
+ goto out;
+ }
- gf_log (this->name, GF_LOG_DEBUG,
- " XATTR DEBUG"
- " domain: %s %s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" "
- "state = Blocked",
- dom->domain,
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lock->owner,
- lock->user_flock.l_start,
- lock->user_flock.l_len);
+ } else {
+ /* Counting locks from all domains */
+ count += __get_inodelk_dom_count (dom);
- count++;
}
-
}
+out:
return count;
}
int32_t
-get_inodelk_count (xlator_t *this, inode_t *inode)
+get_inodelk_count (xlator_t *this, inode_t *inode, char *domname)
{
pl_inode_t *pl_inode = NULL;
uint64_t tmp_pl_inode = 0;
@@ -736,7 +1006,7 @@ get_inodelk_count (xlator_t *this, inode_t *inode)
pthread_mutex_lock (&pl_inode->mutex);
{
- count = __get_inodelk_count (this, pl_inode);
+ count = __get_inodelk_count (this, pl_inode, domname);
}
pthread_mutex_unlock (&pl_inode->mutex);
diff --git a/xlators/features/locks/src/locks-mem-types.h b/xlators/features/locks/src/locks-mem-types.h
index cf50240863b..a48b35c2044 100644
--- a/xlators/features/locks/src/locks-mem-types.h
+++ b/xlators/features/locks/src/locks-mem-types.h
@@ -1,23 +1,13 @@
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __LOCKS_MEM_TYPES_H__
#define __LOCKS_MEM_TYPES_H__
@@ -32,7 +22,8 @@ enum gf_locks_mem_types_ {
gf_locks_mt_truncate_ops,
gf_locks_mt_pl_rw_req_t,
gf_locks_mt_posix_locks_private_t,
- gf_locks_mt_pl_local_t,
+ gf_locks_mt_pl_fdctx_t,
+ gf_locks_mt_pl_meta_lock_t,
gf_locks_mt_end
};
#endif
diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h
index 60474615e5f..8eb35da44be 100644
--- a/xlators/features/locks/src/locks.h
+++ b/xlators/features/locks/src/locks.h
@@ -1,34 +1,29 @@
/*
- Copyright (c) 2006, 2007, 2008 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012, 2015-2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef __POSIX_LOCKS_H__
#define __POSIX_LOCKS_H__
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "compat-errno.h"
#include "stack.h"
#include "call-stub.h"
#include "locks-mem-types.h"
+#include "client_t.h"
+
+#include "lkowner.h"
+
+typedef enum {
+ MLK_NONE,
+ MLK_FILE_BASED,
+ MLK_FORCED,
+ MLK_OPTIMAL
+} mlk_mode_t; /* defines different mandatory locking modes*/
struct __pl_fd;
@@ -38,26 +33,44 @@ struct __posix_lock {
short fl_type;
off_t fl_start;
off_t fl_end;
+ uint32_t lk_flags;
short blocked; /* waiting to acquire */
- struct flock user_flock; /* the flock supplied by the user */
+ struct gf_flock user_flock; /* the flock supplied by the user */
xlator_t *this; /* required for blocked locks */
unsigned long fd_num;
+ fd_t *fd;
call_frame_t *frame;
+ struct timeval blkd_time; /*time at which lock was queued into blkd list*/
+ struct timeval granted_time; /*time at which lock was queued into active list*/
+
/* These two together serve to uniquely identify each process
across nodes */
- void *transport; /* to identify client node */
+ void *client; /* to identify client node */
+
+ /* This field uniquely identifies the client the lock belongs to. As
+ * lock migration is handled by rebalance, the client_t object will be
+ * overwritten by rebalance and can't be deemed as the owner of the
+ * lock on destination. Hence, the below field is migrated from
+ * source to destination by lock_migration_info_t and updated on the
+ * destination. So that on client-server disconnection, server can
+ * cleanup the locks proper;y. */
+
+ char *client_uid;
+ gf_lkowner_t owner;
pid_t client_pid; /* pid of client process */
- uint64_t owner; /* lock owner from fuse */
+
+ int blocking;
};
typedef struct __posix_lock posix_lock_t;
struct __pl_inode_lock {
struct list_head list;
struct list_head blocked_locks; /* list_head pointing to blocked_inodelks */
+ int ref;
short fl_type;
off_t fl_start;
@@ -65,18 +78,25 @@ struct __pl_inode_lock {
const char *volume;
- struct flock user_flock; /* the flock supplied by the user */
+ struct gf_flock user_flock; /* the flock supplied by the user */
xlator_t *this; /* required for blocked locks */
- fd_t *fd;
+ struct __pl_inode *pl_inode;
call_frame_t *frame;
+ struct timeval blkd_time; /*time at which lock was queued into blkd list*/
+ struct timeval granted_time; /*time at which lock was queued into active list*/
+
/* These two together serve to uniquely identify each process
across nodes */
- void *transport; /* to identify client node */
+ void *client; /* to identify client node */
+ gf_lkowner_t owner;
pid_t client_pid; /* pid of client process */
- uint64_t owner;
+
+ char *connection_id; /* stores the client connection id */
+
+ struct list_head client_list; /* list of all locks from a client */
};
typedef struct __pl_inode_lock pl_inode_lock_t;
@@ -100,18 +120,27 @@ typedef struct __pl_dom_list_t pl_dom_list_t;
struct __entry_lock {
struct list_head domain_list; /* list_head back to pl_dom_list_t */
struct list_head blocked_locks; /* list_head back to blocked_entrylks */
+ int ref;
call_frame_t *frame;
xlator_t *this;
+ struct __pl_inode *pinode;
const char *volume;
const char *basename;
entrylk_type type;
- void *trans;
- pid_t client_pid; /* pid of client process */
- uint64_t owner;
+ struct timeval blkd_time; /*time at which lock was queued into blkd list*/
+ struct timeval granted_time; /*time at which lock was queued into active list*/
+
+ void *client;
+ gf_lkowner_t owner;
+ pid_t client_pid; /* pid of client process */
+
+ char *connection_id; /* stores the client connection id */
+
+ struct list_head client_list; /* list of all locks from a client */
};
typedef struct __entry_lock pl_entry_lock_t;
@@ -125,29 +154,91 @@ struct __pl_inode {
struct list_head dom_list; /* list of domains */
struct list_head ext_list; /* list of fcntl locks */
struct list_head rw_list; /* list of waiting r/w requests */
+ struct list_head reservelk_list; /* list of reservelks */
+ struct list_head blocked_reservelks; /* list of blocked reservelks */
+ struct list_head blocked_calls; /* List of blocked lock calls while a reserve is held*/
+ struct list_head metalk_list; /* Meta lock list */
+ /* This is to store the incoming lock
+ requests while meta lock is enabled */
+ struct list_head queued_locks;
int mandatory; /* if mandatory locking is enabled */
inode_t *refkeeper; /* hold refs on an inode while locks are
held to prevent pruning */
+ uuid_t gfid; /* placeholder for gfid of the inode */
+ inode_t *inode; /* pointer to be used for ref and unref
+ of inode_t as long as there are
+ locks on it */
+ gf_boolean_t migrated;
};
typedef struct __pl_inode pl_inode_t;
+struct __pl_metalk {
+ pthread_mutex_t mutex;
+ /* For pl_inode meta lock list */
+ struct list_head list;
+ /* For pl_ctx_t list */
+ struct list_head client_list;
+ char *client_uid;
-struct __pl_fd {
- gf_boolean_t nonblocking; /* whether O_NONBLOCK has been set */
+ pl_inode_t *pl_inode;
+ int ref;
};
-typedef struct __pl_fd pl_fd_t;
-
+typedef struct __pl_metalk pl_meta_lock_t;
typedef struct {
- gf_boolean_t mandatory; /* if mandatory locking is enabled */
+ mlk_mode_t mandatory_mode; /* holds current mandatory locking mode */
gf_boolean_t trace; /* trace lock requests in and out */
+ char *brickname;
+ gf_boolean_t monkey_unlocking;
+ uint32_t revocation_secs;
+ gf_boolean_t revocation_clear_all;
+ uint32_t revocation_max_blocked;
} posix_locks_private_t;
+
typedef struct {
gf_boolean_t entrylk_count_req;
gf_boolean_t inodelk_count_req;
gf_boolean_t posixlk_count_req;
+ gf_boolean_t parent_entrylk_req;
+ data_t *inodelk_dom_count_req;
+
+ dict_t *xdata;
+ loc_t loc[2];
+ fd_t *fd;
+ off_t offset;
+ glusterfs_fop_t op;
} pl_local_t;
+
+typedef struct {
+ struct list_head locks_list;
+} pl_fdctx_t;
+
+
+struct _locker {
+ struct list_head lockers;
+ char *volume;
+ inode_t *inode;
+ gf_lkowner_t owner;
+};
+
+typedef struct _locks_ctx {
+ pthread_mutex_t lock;
+ struct list_head inodelk_lockers;
+ struct list_head entrylk_lockers;
+ struct list_head metalk_list;
+} pl_ctx_t;
+
+
+pl_ctx_t *
+pl_ctx_get (client_t *client, xlator_t *xlator);
+
+int
+pl_inodelk_client_cleanup (xlator_t *this, pl_ctx_t *ctx);
+
+int
+pl_entrylk_client_cleanup (xlator_t *this, pl_ctx_t *ctx);
+
#endif /* __POSIX_LOCKS_H__ */
diff --git a/xlators/features/locks/src/pl-messages.h b/xlators/features/locks/src/pl-messages.h
new file mode 100644
index 00000000000..45c8873ecb4
--- /dev/null
+++ b/xlators/features/locks/src/pl-messages.h
@@ -0,0 +1,64 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _PL_MESSAGES_H_
+#define _PL_MESSAGES_H_
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glfs-message-id.h"
+
+/*! \file pl-messages.h
+ * \brief Locks log-message IDs and their descriptions
+ */
+
+/* NOTE: Rules for message additions
+ * 1) Each instance of a message is _better_ left with a unique message ID, even
+ * if the message format is the same. Reasoning is that, if the message
+ * format needs to change in one instance, the other instances are not
+ * impacted or the new change does not change the ID of the instance being
+ * modified.
+ * 2) Addition of a message,
+ * - Should increment the GLFS_NUM_MESSAGES
+ * - Append to the list of messages defined, towards the end
+ * - Retain macro naming as glfs_msg_X (for redability across developers)
+ * NOTE: Rules for message format modifications
+ * 3) Check acorss the code if the message ID macro in question is reused
+ * anywhere. If reused then then the modifications should ensure correctness
+ * everywhere, or needs a new message ID as (1) above was not adhered to. If
+ * not used anywhere, proceed with the required modification.
+ * NOTE: Rules for message deletion
+ * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used
+ * anywhere, then can be deleted, but will leave a hole by design, as
+ * addition rules specify modification to the end of the list and not filling
+ * holes.
+ */
+
+#define GLFS_PL_COMP_BASE GLFS_MSGID_COMP_PL
+#define GLFS_NUM_MESSAGES 1
+#define GLFS_MSGID_END (GLFS_PL_COMP_BASE + GLFS_NUM_MESSAGES + 1)
+/* Messaged with message IDs */
+#define glfs_msg_start_x GLFS_PL_COMP_BASE, "Invalid: Start of messages"
+/*------------*/
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define PL_MSG_LOCK_NUMBER (GLFS_PL_COMP_BASE + 1)
+
+/*------------*/
+#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
+
+#endif /* !_PL_MESSAGES_H_ */
diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
index 54cf89ca42d..8a142c9991a 100644
--- a/xlators/features/locks/src/posix.c
+++ b/xlators/features/locks/src/posix.c
@@ -1,32 +1,17 @@
/*
- Copyright (c) 2006, 2007, 2008 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012, 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <unistd.h>
#include <fcntl.h>
#include <limits.h>
#include <pthread.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "glusterfs.h"
#include "compat.h"
#include "xlator.h"
@@ -37,6 +22,10 @@
#include "locks.h"
#include "common.h"
#include "statedump.h"
+#include "clear.h"
+#include "defaults.h"
+#include "syncop.h"
+#include "pl-messages.h"
#ifndef LLONG_MAX
#define LLONG_MAX LONG_LONG_MAX /* compat with old gcc */
@@ -44,79 +33,684 @@
/* Forward declarations */
-
void do_blocked_rw (pl_inode_t *);
static int __rw_allowable (pl_inode_t *, posix_lock_t *, glusterfs_fop_t);
+static int format_brickname(char *);
+int pl_lockinfo_get_brickname (xlator_t *, inode_t *, int32_t *);
+static int fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **);
+
+#define PL_STACK_UNWIND(fop, xdata, frame, op_ret, params ...) \
+ do { \
+ pl_local_t *__local = NULL; \
+ inode_t *__parent = NULL; \
+ inode_t *__inode = NULL; \
+ char *__name = NULL; \
+ dict_t *__unref = NULL; \
+ int __i = 0 ; \
+ __local = frame->local; \
+ if (op_ret >= 0 && pl_needs_xdata_response (frame->local)) {\
+ if (xdata) \
+ dict_ref (xdata); \
+ else \
+ xdata = dict_new(); \
+ if (xdata) { \
+ __unref = xdata; \
+ while (__local->fd || __local->loc[__i].inode) { \
+ pl_get_xdata_rsp_args (__local, \
+ #fop, &__parent, &__inode, \
+ &__name, __i); \
+ pl_set_xdata_response (frame->this, \
+ __local, __parent, __inode, __name, \
+ xdata, __i > 0); \
+ if (__local->fd || __i == 1) \
+ break; \
+ __i++; \
+ } \
+ } \
+ } \
+ frame->local = NULL; \
+ STACK_UNWIND_STRICT (fop, frame, op_ret, params); \
+ if (__local) { \
+ if (__local->inodelk_dom_count_req) \
+ data_unref (__local->inodelk_dom_count_req);\
+ loc_wipe (&__local->loc[0]); \
+ loc_wipe (&__local->loc[1]); \
+ if (__local->fd) \
+ fd_unref (__local->fd); \
+ mem_put (__local); \
+ } \
+ if (__unref) \
+ dict_unref (__unref); \
+ } while (0)
+
+#define PL_LOCAL_GET_REQUESTS(frame, this, xdata, __fd, __loc, __newloc)\
+ do { \
+ if (pl_has_xdata_requests (xdata)) { \
+ frame->local = mem_get0 (this->local_pool); \
+ pl_local_t *__local = frame->local; \
+ if (__local) { \
+ if (__fd) { \
+ __local->fd = fd_ref (__fd); \
+ } else { \
+ if (__loc) \
+ loc_copy (&__local->loc[0],\
+ __loc); \
+ if (__newloc) \
+ loc_copy (&__local->loc[1],\
+ __newloc); \
+ } \
+ pl_get_xdata_requests (__local, xdata); \
+ } \
+ } \
+ } while (0)
+
+gf_boolean_t
+pl_has_xdata_requests (dict_t *xdata)
+{
+ char *reqs[] = {GLUSTERFS_ENTRYLK_COUNT, GLUSTERFS_INODELK_COUNT,
+ GLUSTERFS_INODELK_DOM_COUNT, GLUSTERFS_POSIXLK_COUNT,
+ GLUSTERFS_PARENT_ENTRYLK, NULL};
+ int i = 0;
-struct _truncate_ops {
- loc_t loc;
- fd_t *fd;
- off_t offset;
- enum {TRUNCATE, FTRUNCATE} op;
-};
+ if (!xdata)
+ return _gf_false;
+ for (i = 0; reqs[i]; i++)
+ if (dict_get (xdata, reqs[i]))
+ return _gf_true;
-int
-pl_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ return _gf_false;
+}
+
+void
+pl_get_xdata_requests (pl_local_t *local, dict_t *xdata)
{
- struct _truncate_ops *local = NULL;
+ if (!local || !xdata)
+ return;
- local = frame->local;
+ if (dict_get (xdata, GLUSTERFS_ENTRYLK_COUNT)) {
+ local->entrylk_count_req = 1;
+ dict_del (xdata, GLUSTERFS_ENTRYLK_COUNT);
+ }
+ if (dict_get (xdata, GLUSTERFS_INODELK_COUNT)) {
+ local->inodelk_count_req = 1;
+ dict_del (xdata, GLUSTERFS_INODELK_COUNT);
+ }
- if (local->op == TRUNCATE)
- loc_wipe (&local->loc);
+ local->inodelk_dom_count_req = dict_get (xdata, GLUSTERFS_INODELK_DOM_COUNT);
+ if (local->inodelk_dom_count_req) {
+ data_ref (local->inodelk_dom_count_req);
+ dict_del (xdata, GLUSTERFS_INODELK_DOM_COUNT);
+ }
- STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno,
- prebuf, postbuf);
- return 0;
+ if (dict_get (xdata, GLUSTERFS_POSIXLK_COUNT)) {
+ local->posixlk_count_req = 1;
+ dict_del (xdata, GLUSTERFS_POSIXLK_COUNT);
+ }
+
+ if (dict_get (xdata, GLUSTERFS_PARENT_ENTRYLK)) {
+ local->parent_entrylk_req = 1;
+ dict_del (xdata, GLUSTERFS_PARENT_ENTRYLK);
+ }
}
+gf_boolean_t
+pl_needs_xdata_response (pl_local_t *local)
+{
+ if (!local)
+ return _gf_false;
-static int
-truncate_allowed (pl_inode_t *pl_inode,
- void *transport, pid_t client_pid,
- uint64_t owner, off_t offset)
+ if (local->parent_entrylk_req)
+ return _gf_true;
+
+ if (local->entrylk_count_req)
+ return _gf_true;
+
+ if (local->inodelk_dom_count_req)
+ return _gf_true;
+
+ if (local->inodelk_count_req)
+ return _gf_true;
+
+ if (local->posixlk_count_req)
+ return _gf_true;
+ return _gf_false;
+}
+
+void
+pl_get_xdata_rsp_args (pl_local_t *local, char *fop, inode_t **parent,
+ inode_t **inode, char **name, int i)
{
- posix_lock_t *l = NULL;
- posix_lock_t region = {.list = {0, }, };
- int ret = 1;
+ if (strcmp (fop, "lookup") == 0) {
+ *parent = local->loc[0].parent;
+ *inode = local->loc[0].inode;
+ *name = (char *)local->loc[0].name;
+ } else {
+ if (local->fd) {
+ *inode = local->fd->inode;
+ } else {
+ *inode = local->loc[i].parent;
+ }
+ }
+}
+
+int32_t
+__get_posixlk_count (xlator_t *this, pl_inode_t *pl_inode)
+{
+ posix_lock_t *lock = NULL;
+ int32_t count = 0;
+
+ list_for_each_entry (lock, &pl_inode->ext_list, list) {
+
+ count++;
+ }
- region.fl_start = offset;
- region.fl_end = LLONG_MAX;
- region.transport = transport;
- region.client_pid = client_pid;
- region.owner = owner;
+ return count;
+}
+
+int32_t
+get_posixlk_count (xlator_t *this, inode_t *inode)
+{
+ pl_inode_t *pl_inode = NULL;
+ uint64_t tmp_pl_inode = 0;
+ int ret = 0;
+ int32_t count = 0;
+
+ ret = inode_ctx_get (inode, this, &tmp_pl_inode);
+ if (ret != 0) {
+ goto out;
+ }
+
+ pl_inode = (pl_inode_t *)(long) tmp_pl_inode;
pthread_mutex_lock (&pl_inode->mutex);
{
- list_for_each_entry (l, &pl_inode->ext_list, list) {
- if (!l->blocked
- && locks_overlap (&region, l)
- && !same_owner (&region, l)) {
- ret = 0;
- break;
- }
- }
+ count = __get_posixlk_count (this, pl_inode);
}
pthread_mutex_unlock (&pl_inode->mutex);
+out:
+ return count;
+}
+
+void
+pl_parent_entrylk_xattr_fill (xlator_t *this, inode_t *parent,
+ char *basename, dict_t *dict, gf_boolean_t keep_max)
+{
+ int32_t entrylk = 0;
+ int32_t maxcount = -1;
+ int ret = -1;
+
+ if (!parent || !basename || !strlen (basename))
+ goto out;
+ if (keep_max) {
+ ret = dict_get_int32 (dict, GLUSTERFS_PARENT_ENTRYLK, &maxcount);
+ }
+ entrylk = check_entrylk_on_basename (this, parent, basename);
+ if (maxcount >= entrylk)
+ return;
+out:
+ ret = dict_set_int32 (dict, GLUSTERFS_PARENT_ENTRYLK, entrylk);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ " dict_set failed on key %s", GLUSTERFS_PARENT_ENTRYLK);
+ }
+}
+
+void
+pl_entrylk_xattr_fill (xlator_t *this, inode_t *inode,
+ dict_t *dict, gf_boolean_t keep_max)
+{
+ int32_t count = 0;
+ int32_t maxcount = -1;
+ int ret = -1;
+
+ if (keep_max) {
+ ret = dict_get_int32 (dict, GLUSTERFS_ENTRYLK_COUNT, &maxcount);
+ }
+ count = get_entrylk_count (this, inode);
+ if (maxcount >= count)
+ return;
+
+ ret = dict_set_int32 (dict, GLUSTERFS_ENTRYLK_COUNT, count);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ " dict_set failed on key %s", GLUSTERFS_ENTRYLK_COUNT);
+ }
+
+}
+
+void
+pl_inodelk_xattr_fill (xlator_t *this, inode_t *inode, dict_t *dict,
+ char *domname, gf_boolean_t keep_max)
+{
+ int32_t count = 0;
+ int32_t maxcount = -1;
+ int ret = -1;
+
+ if (keep_max) {
+ ret = dict_get_int32 (dict, GLUSTERFS_INODELK_COUNT, &maxcount);
+ }
+ count = get_inodelk_count (this, inode, domname);
+ if (maxcount >= count)
+ return;
+
+ ret = dict_set_int32 (dict, GLUSTERFS_INODELK_COUNT, count);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG, "Failed to set count for "
+ "key %s", GLUSTERFS_INODELK_COUNT);
+ }
+
+ return;
+}
+
+void
+pl_posixlk_xattr_fill (xlator_t *this, inode_t *inode,
+ dict_t *dict, gf_boolean_t keep_max)
+{
+ int32_t count = 0;
+ int32_t maxcount = -1;
+ int ret = -1;
+
+ if (keep_max) {
+ ret = dict_get_int32 (dict, GLUSTERFS_POSIXLK_COUNT, &maxcount);
+ }
+ count = get_posixlk_count (this, inode);
+ if (maxcount >= count)
+ return;
+
+ ret = dict_set_int32 (dict, GLUSTERFS_POSIXLK_COUNT, count);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ " dict_set failed on key %s", GLUSTERFS_POSIXLK_COUNT);
+ }
+
+}
+
+void
+pl_set_xdata_response (xlator_t *this, pl_local_t *local, inode_t *parent,
+ inode_t *inode, char *name, dict_t *xdata, gf_boolean_t max_lock)
+{
+ if (!xdata || !local)
+ return;
+
+ if (local->parent_entrylk_req && parent && name && strlen (name))
+ pl_parent_entrylk_xattr_fill (this, parent, name, xdata, max_lock);
+
+ if (local->entrylk_count_req && inode)
+ pl_entrylk_xattr_fill (this, inode, xdata, max_lock);
+
+ if (local->inodelk_dom_count_req && inode)
+ pl_inodelk_xattr_fill (this, inode, xdata,
+ data_to_str (local->inodelk_dom_count_req), max_lock);
+
+ if (local->inodelk_count_req && inode)
+ pl_inodelk_xattr_fill (this, inode, xdata, NULL, max_lock);
+
+ if (local->posixlk_count_req && inode)
+ pl_posixlk_xattr_fill (this, inode, xdata, max_lock);
+}
+
+/* Return true in case we need to ensure mandatory-locking
+ * semnatics under different modes.
+ */
+gf_boolean_t
+pl_is_mandatory_locking_enabled (pl_inode_t *pl_inode)
+{
+ posix_locks_private_t *priv = NULL;
+
+ priv = THIS->private;
+
+ if (priv->mandatory_mode == MLK_FILE_BASED && pl_inode->mandatory)
+ return _gf_true;
+ else if (priv->mandatory_mode == MLK_FORCED ||
+ priv->mandatory_mode == MLK_OPTIMAL)
+ return _gf_true;
+
+ return _gf_false;
+}
+
+/* Checks whether the region where fop is acting upon conflicts
+ * with existing locks. If there is no conflict function returns
+ * 1 else returns 0 with can_block boolean set accordingly to
+ * indicate block/fail the fop.
+ */
+int
+pl_is_fop_allowed (pl_inode_t *pl_inode, posix_lock_t *region, fd_t *fd,
+ glusterfs_fop_t op, gf_boolean_t *can_block)
+{
+ int ret = 0;
+
+ if (!__rw_allowable (pl_inode, region, op)) {
+ if ((!fd) || (fd && (fd->flags & O_NONBLOCK))) {
+ gf_log ("locks", GF_LOG_TRACE, "returning EAGAIN"
+ " because fd is O_NONBLOCK");
+ *can_block = _gf_false;
+ } else
+ *can_block = _gf_true;
+ } else
+ ret = 1;
+
return ret;
}
+static pl_fdctx_t *
+pl_new_fdctx ()
+{
+ pl_fdctx_t *fdctx = NULL;
+
+ fdctx = GF_CALLOC (1, sizeof (*fdctx),
+ gf_locks_mt_pl_fdctx_t);
+ GF_VALIDATE_OR_GOTO ("posix-locks", fdctx, out);
+
+ INIT_LIST_HEAD (&fdctx->locks_list);
+
+out:
+ return fdctx;
+}
+
+static pl_fdctx_t *
+pl_check_n_create_fdctx (xlator_t *this, fd_t *fd)
+{
+ int ret = 0;
+ uint64_t tmp = 0;
+ pl_fdctx_t *fdctx = NULL;
+
+ GF_VALIDATE_OR_GOTO ("posix-locks", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ LOCK (&fd->lock);
+ {
+ ret = __fd_ctx_get (fd, this, &tmp);
+ if ((ret != 0) || (tmp == 0)) {
+ fdctx = pl_new_fdctx ();
+ if (fdctx == NULL) {
+ goto unlock;
+ }
+ }
+
+ ret = __fd_ctx_set (fd, this, (uint64_t)(long)fdctx);
+ if (ret != 0) {
+ GF_FREE (fdctx);
+ fdctx = NULL;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to set fd ctx");
+ }
+ }
+unlock:
+ UNLOCK (&fd->lock);
+
+out:
+ return fdctx;
+}
+
+int32_t
+pl_discard_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (discard, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+ return 0;
+}
+
+int
+pl_discard_cont (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ STACK_WIND (frame, pl_discard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
+ return 0;
+}
+
+int32_t
+pl_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ pl_inode_t *pl_inode = NULL;
+ pl_rw_req_t *rw = NULL;
+ posix_lock_t region = {.list = {0, }, };
+ gf_boolean_t enabled = _gf_false;
+ gf_boolean_t can_block = _gf_true;
+ int op_ret = 0;
+ int op_errno = 0;
+ int allowed = 1;
+
+ GF_VALIDATE_OR_GOTO ("locks", this, unwind);
+
+ pl_inode = pl_inode_get (this, fd->inode);
+ if (!pl_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ enabled = pl_is_mandatory_locking_enabled (pl_inode);
+
+ if (frame->root->pid < 0)
+ enabled = _gf_false;
+
+ if (enabled) {
+ region.fl_start = offset;
+ region.fl_end = offset + len - 1;
+ region.client = frame->root->client;
+ region.fd_num = fd_to_fdnum(fd);
+ region.client_pid = frame->root->pid;
+ region.owner = frame->root->lk_owner;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ allowed = pl_is_fop_allowed (pl_inode, &region, fd,
+ GF_FOP_DISCARD,
+ &can_block);
+ if (allowed == 1)
+ goto unlock;
+ else if (!can_block) {
+ op_errno = EAGAIN;
+ op_ret = -1;
+ goto unlock;
+ }
+
+ rw = GF_CALLOC (1, sizeof (*rw),
+ gf_locks_mt_pl_rw_req_t);
+ if (!rw) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ goto unlock;
+ }
+
+ rw->stub = fop_discard_stub (frame, pl_discard_cont,
+ fd, offset, len, xdata);
+ if (!rw->stub) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ GF_FREE (rw);
+ goto unlock;
+ }
+
+ rw->region = region;
+
+ list_add_tail (&rw->list, &pl_inode->rw_list);
+ }
+ unlock:
+ pthread_mutex_unlock (&pl_inode->mutex);
+ }
+
+ if (allowed == 1)
+ STACK_WIND (frame, pl_discard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd, offset,
+ len, xdata);
+unwind:
+ if (op_ret == -1)
+ STACK_UNWIND_STRICT (discard, frame, op_ret, op_errno,
+ NULL, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+pl_zerofill_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (zerofill, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+ return 0;
+}
+
+int
+pl_zerofill_cont (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ STACK_WIND (frame, pl_zerofill_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
+ return 0;
+}
+
+int32_t
+pl_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ pl_inode_t *pl_inode = NULL;
+ pl_rw_req_t *rw = NULL;
+ posix_lock_t region = {.list = {0, }, };
+ gf_boolean_t enabled = _gf_false;
+ gf_boolean_t can_block = _gf_true;
+ int op_ret = 0;
+ int op_errno = 0;
+ int allowed = 1;
+
+ GF_VALIDATE_OR_GOTO ("locks", this, unwind);
+
+ pl_inode = pl_inode_get (this, fd->inode);
+ if (!pl_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ enabled = pl_is_mandatory_locking_enabled (pl_inode);
+
+ if (frame->root->pid < 0)
+ enabled = _gf_false;
+
+ if (enabled) {
+ region.fl_start = offset;
+ region.fl_end = offset + len - 1;
+ region.client = frame->root->client;
+ region.fd_num = fd_to_fdnum(fd);
+ region.client_pid = frame->root->pid;
+ region.owner = frame->root->lk_owner;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ allowed = pl_is_fop_allowed (pl_inode, &region, fd,
+ GF_FOP_ZEROFILL,
+ &can_block);
+ if (allowed == 1)
+ goto unlock;
+ else if (!can_block) {
+ op_errno = EAGAIN;
+ op_ret = -1;
+ goto unlock;
+ }
+
+ rw = GF_CALLOC (1, sizeof (*rw),
+ gf_locks_mt_pl_rw_req_t);
+ if (!rw) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ goto unlock;
+ }
+
+ rw->stub = fop_zerofill_stub (frame, pl_zerofill_cont,
+ fd, offset, len, xdata);
+ if (!rw->stub) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ GF_FREE (rw);
+ goto unlock;
+ }
+
+ rw->region = region;
+
+ list_add_tail (&rw->list, &pl_inode->rw_list);
+ }
+ unlock:
+ pthread_mutex_unlock (&pl_inode->mutex);
+ }
+
+ if (allowed == 1)
+ STACK_WIND (frame, pl_zerofill_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset,
+ len, xdata);
+unwind:
+ if (op_ret == -1)
+ STACK_UNWIND_STRICT (zerofill, frame, op_ret, op_errno,
+ NULL, NULL, NULL);
+
+ return 0;
+}
+
+int
+pl_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ pl_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op == GF_FOP_TRUNCATE)
+ loc_wipe (&local->loc[0]);
+
+ if (local->xdata)
+ dict_unref (local->xdata);
+ if (local->fd)
+ fd_unref (local->fd);
+
+ if (local->op == GF_FOP_TRUNCATE)
+ STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+ else
+ STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+ return 0;
+}
+
+int
+pl_ftruncate_cont (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, dict_t *xdata)
+{
+ STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+ return 0;
+}
+
+int
+pl_truncate_cont (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ off_t offset, dict_t *xdata)
+{
+ STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ return 0;
+}
static int
truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
- posix_locks_private_t *priv = NULL;
- struct _truncate_ops *local = NULL;
+ pl_local_t *local = NULL;
inode_t *inode = NULL;
pl_inode_t *pl_inode = NULL;
+ pl_rw_req_t *rw = NULL;
+ posix_lock_t region = {.list = {0, }, };
+ gf_boolean_t enabled = _gf_false;
+ gf_boolean_t can_block = _gf_true;
+ int allowed = 1;
-
- priv = this->private;
+ GF_VALIDATE_OR_GOTO ("locks", this, unwind);
local = frame->local;
if (op_ret != 0) {
@@ -126,114 +720,203 @@ truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto unwind;
}
- if (local->op == TRUNCATE)
- inode = local->loc.inode;
+ if (local->op == GF_FOP_TRUNCATE)
+ inode = local->loc[0].inode;
else
inode = local->fd->inode;
pl_inode = pl_inode_get (this, inode);
if (!pl_inode) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
op_ret = -1;
op_errno = ENOMEM;
goto unwind;
}
- if (priv->mandatory
- && pl_inode->mandatory
- && !truncate_allowed (pl_inode, frame->root->trans,
- frame->root->pid, frame->root->lk_owner,
- local->offset)) {
- op_ret = -1;
- op_errno = EAGAIN;
- goto unwind;
- }
+ enabled = pl_is_mandatory_locking_enabled (pl_inode);
- switch (local->op) {
- case TRUNCATE:
- STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->truncate,
- &local->loc, local->offset);
- break;
- case FTRUNCATE:
- STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->ftruncate,
- local->fd, local->offset);
- break;
- }
+ if (frame->root->pid < 0)
+ enabled = _gf_false;
- return 0;
+ if (enabled) {
+ region.fl_start = local->offset;
+ region.fl_end = LLONG_MAX;
+ region.client = frame->root->client;
+ region.fd_num = fd_to_fdnum(local->fd);
+ region.client_pid = frame->root->pid;
+ region.owner = frame->root->lk_owner;
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ allowed = pl_is_fop_allowed (pl_inode, &region,
+ local->fd, local->op,
+ &can_block);
-unwind:
- if (local->op == TRUNCATE)
- loc_wipe (&local->loc);
+ if (allowed == 1)
+ goto unlock;
+ else if (!can_block) {
+ op_errno = EAGAIN;
+ op_ret = -1;
+ goto unlock;
+ }
+
+ rw = GF_CALLOC (1, sizeof (*rw),
+ gf_locks_mt_pl_rw_req_t);
+ if (!rw) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ goto unlock;
+ }
- STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, buf, NULL);
+ if (local->op == GF_FOP_TRUNCATE)
+ rw->stub = fop_truncate_stub (frame,
+ pl_truncate_cont, &local->loc[0],
+ local->offset, local->xdata);
+ else
+ rw->stub = fop_ftruncate_stub (frame,
+ pl_ftruncate_cont, local->fd,
+ local->offset, local->xdata);
+ if (!rw->stub) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ GF_FREE (rw);
+ goto unlock;
+ }
+
+ rw->region = region;
+
+ list_add_tail (&rw->list, &pl_inode->rw_list);
+ }
+ unlock:
+ pthread_mutex_unlock (&pl_inode->mutex);
+ }
+
+ if (allowed == 1) {
+ switch (local->op) {
+ case GF_FOP_TRUNCATE:
+ STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->truncate,
+ &local->loc[0], local->offset, local->xdata);
+ break;
+ case GF_FOP_FTRUNCATE:
+ STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->ftruncate,
+ local->fd, local->offset, local->xdata);
+ break;
+ default:
+ break;
+ }
+ }
+unwind:
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "truncate failed with "
+ "ret: %d, error: %s", op_ret, strerror (op_errno));
+ if (local->op == GF_FOP_TRUNCATE)
+ loc_wipe (&local->loc[0]);
+ if (local->xdata)
+ dict_unref (local->xdata);
+ if (local->fd)
+ fd_unref (local->fd);
+
+ switch (local->op) {
+ case GF_FOP_TRUNCATE:
+ STACK_UNWIND_STRICT (truncate, frame, op_ret,
+ op_errno, buf, NULL, xdata);
+ break;
+ case GF_FOP_FTRUNCATE:
+ STACK_UNWIND_STRICT (ftruncate, frame, op_ret,
+ op_errno, buf, NULL, xdata);
+ break;
+ default:
+ break;
+ }
+ }
return 0;
}
-
int
pl_truncate (call_frame_t *frame, xlator_t *this,
- loc_t *loc, off_t offset)
+ loc_t *loc, off_t offset, dict_t *xdata)
{
- struct _truncate_ops *local = NULL;
+ pl_local_t *local = NULL;
+ int ret = -1;
- local = GF_CALLOC (1, sizeof (struct _truncate_ops),
- gf_locks_mt_truncate_ops);
- if (!local) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto unwind;
- }
+ GF_VALIDATE_OR_GOTO ("locks", this, unwind);
+
+ local = mem_get0 (this->local_pool);
+ GF_VALIDATE_OR_GOTO (this->name, local, unwind);
- local->op = TRUNCATE;
+ local->op = GF_FOP_TRUNCATE;
local->offset = offset;
- loc_copy (&local->loc, loc);
+ loc_copy (&local->loc[0], loc);
+ if (xdata)
+ local->xdata = dict_ref (xdata);
frame->local = local;
STACK_WIND (frame, truncate_stat_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->stat, loc);
-
- return 0;
-
+ FIRST_CHILD (this)->fops->stat, loc, NULL);
+ ret = 0;
unwind:
- STACK_UNWIND_STRICT (truncate, frame, -1, ENOMEM, NULL, NULL);
-
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "truncate on %s failed with"
+ " ret: %d, error: %s", loc->path, -1,
+ strerror (ENOMEM));
+ STACK_UNWIND_STRICT (truncate, frame, -1, ENOMEM,
+ NULL, NULL, NULL);
+ }
return 0;
}
-
int
pl_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset)
+ fd_t *fd, off_t offset, dict_t *xdata)
{
- struct _truncate_ops *local = NULL;
+ pl_local_t *local = NULL;
+ int ret = -1;
- local = GF_CALLOC (1, sizeof (struct _truncate_ops),
- gf_locks_mt_truncate_ops);
- if (!local) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto unwind;
- }
+ GF_VALIDATE_OR_GOTO ("locks", this, unwind);
+ local = mem_get0 (this->local_pool);
+ GF_VALIDATE_OR_GOTO (this->name, local, unwind);
- local->op = FTRUNCATE;
+ local->op = GF_FOP_FTRUNCATE;
local->offset = offset;
- local->fd = fd;
+ local->fd = fd_ref (fd);
+ if (xdata)
+ local->xdata = dict_ref (xdata);
frame->local = local;
STACK_WIND (frame, truncate_stat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat, fd);
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+ ret = 0;
+unwind:
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "ftruncate failed with"
+ " ret: %d, error: %s", -1, strerror (ENOMEM));
+ STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM,
+ NULL, NULL, NULL);
+ }
return 0;
+}
-unwind:
- STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM, NULL, NULL);
+int
+pl_locks_by_fd (pl_inode_t *pl_inode, fd_t *fd)
+{
+ posix_lock_t *l = NULL;
+ int found = 0;
- return 0;
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+
+ list_for_each_entry (l, &pl_inode->ext_list, list) {
+ if (l->fd_num == fd_to_fdnum(fd)) {
+ found = 1;
+ break;
+ }
+ }
+
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+ return found;
}
static void
@@ -250,12 +933,12 @@ delete_locks_of_fd (xlator_t *this, pl_inode_t *pl_inode, fd_t *fd)
{
list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) {
- if ((l->fd_num == fd_to_fdnum(fd))) {
+ if (l->fd_num == fd_to_fdnum(fd)) {
if (l->blocked) {
list_move_tail (&l->list, &blocked_list);
continue;
}
- __delete_lock (pl_inode, l);
+ __delete_lock (l);
__destroy_lock (l);
}
}
@@ -265,7 +948,8 @@ delete_locks_of_fd (xlator_t *this, pl_inode_t *pl_inode, fd_t *fd)
list_for_each_entry_safe (l, tmp, &blocked_list, list) {
list_del_init(&l->list);
- STACK_UNWIND_STRICT (lk, l->frame, -1, EAGAIN, &l->user_flock);
+ STACK_UNWIND_STRICT (lk, l->frame, -1, EAGAIN, &l->user_flock,
+ NULL);
__destroy_lock (l);
}
@@ -277,7 +961,7 @@ delete_locks_of_fd (xlator_t *this, pl_inode_t *pl_inode, fd_t *fd)
static void
__delete_locks_of_owner (pl_inode_t *pl_inode,
- void *transport, uint64_t owner)
+ client_t *client, gf_lkowner_t *owner)
{
posix_lock_t *tmp = NULL;
posix_lock_t *l = NULL;
@@ -285,19 +969,21 @@ __delete_locks_of_owner (pl_inode_t *pl_inode,
/* TODO: what if it is a blocked lock with pending l->frame */
list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) {
- if ((l->transport == transport)
- && (l->owner == owner)) {
- gf_log ("posix-locks", GF_LOG_TRACE,
+ if (l->blocked)
+ continue;
+ if ((l->client == client) &&
+ is_same_lkowner (&l->owner, owner)) {
+ gf_log ("posix-locks", GF_LOG_TRACE,
" Flushing lock"
- "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" state: %s",
- l->fl_type == F_UNLCK ? "Unlock" : "Lock",
- l->client_pid,
- l->owner,
- l->user_flock.l_start,
- l->user_flock.l_len,
+ "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" state: %s",
+ l->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ l->client_pid,
+ lkowner_utoa (&l->owner),
+ l->user_flock.l_start,
+ l->user_flock.l_len,
l->blocked == 1 ? "Blocked" : "Active");
- __delete_lock (pl_inode, l);
+ __delete_lock (l);
__destroy_lock (l);
}
}
@@ -305,52 +991,570 @@ __delete_locks_of_owner (pl_inode_t *pl_inode,
return;
}
+
+int32_t
+pl_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+
+}
+
+int32_t
+pl_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ int32_t op_errno = EINVAL;
+ int op_ret = -1;
+ int32_t bcount = 0;
+ int32_t gcount = 0;
+ char key[PATH_MAX] = {0, };
+ char *lk_summary = NULL;
+ pl_inode_t *pl_inode = NULL;
+ dict_t *dict = NULL;
+ clrlk_args args = {0,};
+ char *brickname = NULL;
+
+ if (!name)
+ goto usual;
+
+ if (strncmp (name, GF_XATTR_CLRLK_CMD, strlen (GF_XATTR_CLRLK_CMD)))
+ goto usual;
+
+ if (clrlk_parse_args (name, &args)) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ pl_inode = pl_inode_get (this, loc->inode);
+ if (!pl_inode) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ switch (args.type) {
+ case CLRLK_INODE:
+ case CLRLK_ENTRY:
+ op_ret = clrlk_clear_lks_in_all_domains (this, pl_inode,
+ &args, &bcount,
+ &gcount,
+ &op_errno);
+ if (op_ret)
+ goto out;
+ break;
+ case CLRLK_POSIX:
+ op_ret = clrlk_clear_posixlk (this, pl_inode, &args,
+ &bcount, &gcount,
+ &op_errno);
+ if (op_ret)
+ goto out;
+ break;
+ case CLRLK_TYPE_MAX:
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ op_ret = fetch_pathinfo (this, loc->inode, &op_errno, &brickname);
+ if (op_ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Couldn't get brickname");
+ } else {
+ op_ret = format_brickname(brickname);
+ if (op_ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Couldn't format brickname");
+ GF_FREE(brickname);
+ brickname = NULL;
+ }
+ }
+
+ if (!gcount && !bcount) {
+ if (gf_asprintf (&lk_summary, "No locks cleared.") == -1) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+ } else if (gf_asprintf (&lk_summary, "%s: %s blocked locks=%d "
+ "granted locks=%d",
+ (brickname == NULL)? this->name : brickname,
+ (args.type == CLRLK_INODE)? "inode":
+ (args.type == CLRLK_ENTRY)? "entry":
+ (args.type == CLRLK_POSIX)? "posix": " ",
+ bcount, gcount) == -1) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ strncpy (key, name, strlen (name));
+ if (dict_set_dynstr (dict, key, lk_summary)) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ op_ret = 0;
+out:
+ GF_FREE(brickname);
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
+
+ GF_FREE (args.opts);
+ if (op_ret && lk_summary)
+ GF_FREE (lk_summary);
+ if (dict)
+ dict_unref (dict);
+ return 0;
+
+usual:
+ STACK_WIND (frame, pl_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
+ return 0;
+}
+
+static int
+format_brickname(char *brickname)
+{
+ int ret = -1;
+ char *hostname = NULL;
+ char *volume = NULL;
+ char *saveptr = NULL;
+
+ if (!brickname)
+ goto out;
+
+ strtok_r(brickname, ":", &saveptr);
+ hostname = gf_strdup(strtok_r(NULL, ":", &saveptr));
+ if (hostname == NULL)
+ goto out;
+ volume = gf_strdup(strtok_r(NULL, ".", &saveptr));
+ if (volume == NULL)
+ goto out;
+
+ sprintf(brickname, "%s:%s", hostname, volume);
+
+ ret = 0;
+out:
+ GF_FREE(hostname);
+ GF_FREE(volume);
+ return ret;
+}
+
+static int
+fetch_pathinfo (xlator_t *this, inode_t *inode, int32_t *op_errno,
+ char **brickname)
+{
+ int ret = -1;
+ loc_t loc = {0, };
+ dict_t *dict = NULL;
+
+ if (!brickname)
+ goto out;
+
+ if (!op_errno)
+ goto out;
+
+ gf_uuid_copy (loc.gfid, inode->gfid);
+ loc.inode = inode_ref (inode);
+
+ ret = syncop_getxattr (FIRST_CHILD(this), &loc, &dict,
+ GF_XATTR_PATHINFO_KEY, NULL, NULL);
+ if (ret < 0) {
+ *op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, GF_XATTR_PATHINFO_KEY, brickname);
+ if (ret)
+ goto out;
+
+ *brickname = gf_strdup(*brickname);
+ if (*brickname == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (dict != NULL) {
+ dict_unref (dict);
+ }
+ loc_wipe(&loc);
+
+ return ret;
+}
+
+
+int
+pl_lockinfo_get_brickname (xlator_t *this, inode_t *inode, int32_t *op_errno)
+{
+ int ret = -1;
+ posix_locks_private_t *priv = NULL;
+ char *brickname = NULL;
+ char *end = NULL;
+ char *tmp = NULL;
+
+ priv = this->private;
+
+ ret = fetch_pathinfo (this, inode, op_errno, &brickname);
+ if (ret)
+ goto out;
+
+ end = strrchr (brickname, ':');
+ if (!end) {
+ GF_FREE(brickname);
+ ret = -1;
+ goto out;
+ }
+
+ tmp = brickname;
+ brickname = gf_strndup (brickname, (end - brickname));
+ if (brickname == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ priv->brickname = brickname;
+ ret = 0;
+out:
+ GF_FREE(tmp);
+ return ret;
+}
+
+char *
+pl_lockinfo_key (xlator_t *this, inode_t *inode, int32_t *op_errno)
+{
+ posix_locks_private_t *priv = NULL;
+ char *key = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ if (priv->brickname == NULL) {
+ ret = pl_lockinfo_get_brickname (this, inode, op_errno);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "cannot get brickname");
+ goto out;
+ }
+ }
+
+ key = priv->brickname;
+out:
+ return key;
+}
+
+int32_t
+pl_fgetxattr_handle_lockinfo (xlator_t *this, fd_t *fd,
+ dict_t *dict, int32_t *op_errno)
+{
+ pl_inode_t *pl_inode = NULL;
+ char *key = NULL, *buf = NULL;
+ int32_t op_ret = 0;
+ unsigned long fdnum = 0;
+ int32_t len = 0;
+ dict_t *tmp = NULL;
+
+ pl_inode = pl_inode_get (this, fd->inode);
+
+ if (!pl_inode) {
+ gf_log (this->name, GF_LOG_DEBUG, "Could not get inode.");
+ *op_errno = EBADFD;
+ op_ret = -1;
+ goto out;
+ }
+
+ if (!pl_locks_by_fd (pl_inode, fd)) {
+ op_ret = 0;
+ goto out;
+ }
+
+ fdnum = fd_to_fdnum (fd);
+
+ key = pl_lockinfo_key (this, fd->inode, op_errno);
+ if (key == NULL) {
+ op_ret = -1;
+ goto out;
+ }
+
+ tmp = dict_new ();
+ if (tmp == NULL) {
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ op_ret = dict_set_uint64 (tmp, key, fdnum);
+ if (op_ret < 0) {
+ *op_errno = -op_ret;
+ op_ret = -1;
+ gf_log (this->name, GF_LOG_WARNING, "setting lockinfo value "
+ "(%lu) for fd (ptr:%p inode-gfid:%s) failed (%s)",
+ fdnum, fd, uuid_utoa (fd->inode->gfid),
+ strerror (*op_errno));
+ goto out;
+ }
+
+ len = dict_serialized_length (tmp);
+ if (len < 0) {
+ *op_errno = -op_ret;
+ op_ret = -1;
+ gf_log (this->name, GF_LOG_WARNING,
+ "dict_serialized_length failed (%s) while handling "
+ "lockinfo for fd (ptr:%p inode-gfid:%s)",
+ strerror (*op_errno), fd, uuid_utoa (fd->inode->gfid));
+ goto out;
+ }
+
+ buf = GF_CALLOC (1, len, gf_common_mt_char);
+ if (buf == NULL) {
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ op_ret = dict_serialize (tmp, buf);
+ if (op_ret < 0) {
+ *op_errno = -op_ret;
+ op_ret = -1;
+ gf_log (this->name, GF_LOG_WARNING,
+ "dict_serialize failed (%s) while handling lockinfo "
+ "for fd (ptr: %p inode-gfid:%s)", strerror (*op_errno),
+ fd, uuid_utoa (fd->inode->gfid));
+ goto out;
+ }
+
+ op_ret = dict_set_dynptr (dict, GF_XATTR_LOCKINFO_KEY, buf, len);
+ if (op_ret < 0) {
+ *op_errno = -op_ret;
+ op_ret = -1;
+ gf_log (this->name, GF_LOG_WARNING, "setting lockinfo value "
+ "(%lu) for fd (ptr:%p inode-gfid:%s) failed (%s)",
+ fdnum, fd, uuid_utoa (fd->inode->gfid),
+ strerror (*op_errno));
+ goto out;
+ }
+
+ buf = NULL;
+out:
+ if (tmp != NULL) {
+ dict_unref (tmp);
+ }
+
+ if (buf != NULL) {
+ GF_FREE (buf);
+ }
+
+ return op_ret;
+}
+
+
int32_t
-pl_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
+pl_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
{
- int dummy = 1;
- int ret = -1;
+ int32_t op_ret = 0, op_errno = 0;
+ dict_t *dict = NULL;
+
+ if (!name) {
+ goto usual;
+ }
+
+ if (strcmp (name, GF_XATTR_LOCKINFO_KEY) == 0) {
+ dict = dict_new ();
+ if (dict == NULL) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ op_ret = pl_fgetxattr_handle_lockinfo (this, fd, dict,
+ &op_errno);
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "getting lockinfo on fd (ptr:%p inode-gfid:%s) "
+ "failed (%s)", fd, uuid_utoa (fd->inode->gfid),
+ strerror (op_errno));
+ }
+
+ goto unwind;
+ } else {
+ goto usual;
+ }
+
+unwind:
+ STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, NULL);
+ if (dict != NULL) {
+ dict_unref (dict);
+ }
+
+ return 0;
+
+usual:
+ STACK_WIND (frame, default_fgetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
+ return 0;
+}
+
+int32_t
+pl_migrate_locks (call_frame_t *frame, fd_t *newfd, uint64_t oldfd_num,
+ int32_t *op_errno)
+{
+ pl_inode_t *pl_inode = NULL;
+ uint64_t newfd_num = 0;
+ posix_lock_t *l = NULL;
+ int32_t op_ret = 0;
+
+ newfd_num = fd_to_fdnum (newfd);
+
+ pl_inode = pl_inode_get (frame->this, newfd->inode);
+ if (pl_inode == NULL) {
+ op_ret = -1;
+ *op_errno = EBADFD;
+ goto out;
+ }
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry (l, &pl_inode->ext_list, list) {
+ if (l->fd_num == oldfd_num) {
+ l->fd_num = newfd_num;
+ l->client = frame->root->client;
+ }
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ op_ret = 0;
+out:
+ return op_ret;
+}
+
+int32_t
+pl_fsetxattr_handle_lockinfo (call_frame_t *frame, fd_t *fd, char *lockinfo_buf,
+ int len, int32_t *op_errno)
+{
+ int32_t op_ret = -1;
+ dict_t *lockinfo = NULL;
+ uint64_t oldfd_num = 0;
+ char *key = NULL;
+
+ lockinfo = dict_new ();
+ if (lockinfo == NULL) {
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ op_ret = dict_unserialize (lockinfo_buf, len, &lockinfo);
+ if (op_ret < 0) {
+ *op_errno = -op_ret;
+ op_ret = -1;
+ goto out;
+ }
+
+ key = pl_lockinfo_key (frame->this, fd->inode, op_errno);
+ if (key == NULL) {
+ op_ret = -1;
+ goto out;
+ }
+
+ op_ret = dict_get_uint64 (lockinfo, key, &oldfd_num);
+
+ if (oldfd_num == 0) {
+ op_ret = 0;
+ goto out;
+ }
+
+ op_ret = pl_migrate_locks (frame, fd, oldfd_num, op_errno);
+ if (op_ret < 0) {
+ gf_log (frame->this->name, GF_LOG_WARNING,
+ "migration of locks from oldfd (ptr:%p) to newfd "
+ "(ptr:%p) (inode-gfid:%s)", (void *)oldfd_num, fd,
+ uuid_utoa (fd->inode->gfid));
+ goto out;
+ }
+
+out:
+ dict_unref (lockinfo);
+
+ return op_ret;
+}
+
+int32_t
+pl_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ int32_t op_ret = 0, op_errno = 0;
+ void *lockinfo_buf = NULL;
+ int len = 0;
+
+ op_ret = dict_get_ptr_and_len (dict, GF_XATTR_LOCKINFO_KEY,
+ &lockinfo_buf, &len);
+ if (lockinfo_buf == NULL) {
+ goto usual;
+ }
+
+ op_ret = pl_fsetxattr_handle_lockinfo (frame, fd, lockinfo_buf, len,
+ &op_errno);
+ if (op_ret < 0) {
+ goto unwind;
+ }
+
+usual:
+ STACK_WIND (frame, default_fsetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, NULL);
+ return 0;
+}
+
+int32_t
+pl_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ pl_fdctx_t *fdctx = NULL;
if (op_ret < 0)
goto unwind;
- ret = fd_ctx_set (fd, this, dummy);
- if (ret != 0)
- gf_log (this->name, GF_LOG_ERROR,
- "setting context for fd=%p in locks failed.", fd);
+ fdctx = pl_check_n_create_fdctx (this, fd);
+ if (!fdctx) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ goto unwind;
+ }
unwind:
- STACK_UNWIND_STRICT (opendir,
- frame,
- op_ret,
- op_errno,
- fd);
- return 0;
+ PL_STACK_UNWIND (opendir, xdata, frame, op_ret, op_errno, fd, xdata);
+
+ return 0;
}
-int32_t
+int32_t
pl_opendir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, fd_t *fd)
+ loc_t *loc, fd_t *fd, dict_t *xdata)
{
- STACK_WIND (frame,
- pl_opendir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->opendir,
- loc, fd);
- return 0;
-
+ PL_LOCAL_GET_REQUESTS (frame, this, xdata, fd, NULL, NULL);
+ STACK_WIND (frame, pl_opendir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);
+ return 0;
}
int
pl_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata);
return 0;
}
@@ -358,40 +1562,46 @@ pl_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
pl_flush (call_frame_t *frame, xlator_t *this,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
- posix_locks_private_t *priv = NULL;
- pl_inode_t *pl_inode = NULL;
- uint64_t owner = -1;
-
- priv = this->private;
- owner = frame->root->lk_owner;
+ pl_inode_t *pl_inode = NULL;
pl_inode = pl_inode_get (this, fd->inode);
if (!pl_inode) {
gf_log (this->name, GF_LOG_DEBUG, "Could not get inode.");
- STACK_UNWIND_STRICT (flush, frame, -1, EBADFD);
+ STACK_UNWIND_STRICT (flush, frame, -1, EBADFD, NULL);
return 0;
}
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ if (pl_inode->migrated) {
+ pthread_mutex_unlock (&pl_inode->mutex);
+ STACK_UNWIND_STRICT (flush, frame, -1, EREMOTE,
+ NULL);
+ return 0;
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
pl_trace_flush (this, frame, fd);
- if (owner == 0) {
+ if (frame->root->lk_owner.len == 0) {
/* Handle special case when protocol/server sets lk-owner to zero.
* This usually happens due to a client disconnection. Hence, free
* all locks opened with this fd.
*/
gf_log (this->name, GF_LOG_TRACE,
- "Releasing all locks with fd %p", fd);
+ "Releasing all locks with fd %p", fd);
delete_locks_of_fd (this, pl_inode, fd);
goto wind;
}
pthread_mutex_lock (&pl_inode->mutex);
{
- __delete_locks_of_owner (pl_inode, frame->root->trans,
- owner);
+ __delete_locks_of_owner (pl_inode, frame->root->client,
+ &frame->root->lk_owner);
}
pthread_mutex_unlock (&pl_inode->mutex);
@@ -401,42 +1611,93 @@ pl_flush (call_frame_t *frame, xlator_t *this,
wind:
STACK_WIND (frame, pl_flush_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush, fd);
+ FIRST_CHILD(this)->fops->flush, fd, xdata);
return 0;
}
int
pl_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- int dummy = 1;
- int ret = -1;
+ pl_fdctx_t *fdctx = NULL;
if (op_ret < 0)
goto unwind;
- ret = fd_ctx_set (fd, this, dummy);
- if (ret != 0)
- gf_log (this->name, GF_LOG_ERROR,
- "setting context for fd=%p in locks failed.", fd);
+ fdctx = pl_check_n_create_fdctx (this, fd);
+ if (!fdctx) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ goto unwind;
+ }
unwind:
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd);
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
return 0;
}
-
int
pl_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int32_t wbflags)
+ fd_t *fd, dict_t *xdata)
{
- /* why isn't O_TRUNC being handled ? */
- STACK_WIND (frame, pl_open_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->open,
- loc, flags & ~O_TRUNC, fd, wbflags);
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ pl_inode_t *pl_inode = NULL;
+ posix_lock_t *l = NULL;
+ posix_locks_private_t *priv = NULL;
+
+ priv = this->private;
+
+ GF_VALIDATE_OR_GOTO ("locks", this, unwind);
+
+ op_ret = 0, op_errno = 0;
+ pl_inode = pl_inode_get (this, fd->inode);
+
+ /* As per design, under forced and file-based mandatory locking modes
+ * it doesn't matter whether inodes's lock list contain advisory or
+ * mandatory type locks. So we just check whether inode's lock list is
+ * empty or not to make sure that no locks are being held for the file.
+ * Whereas under optimal mandatory locking mode, we strictly fail open
+ * if and only if lock list contain mandatory locks.
+ */
+ if (((priv->mandatory_mode == MLK_FILE_BASED) && pl_inode->mandatory) ||
+ priv->mandatory_mode == MLK_FORCED) {
+ if (fd->flags & O_TRUNC) {
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ if (!list_empty (&pl_inode->ext_list)) {
+ op_ret = -1;
+ op_errno = EAGAIN;
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+ }
+ } else if (priv->mandatory_mode == MLK_OPTIMAL) {
+ if (fd->flags & O_TRUNC) {
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry (l, &pl_inode->ext_list, list) {
+ if ((l->lk_flags & GF_LK_MANDATORY)) {
+ op_ret = -1;
+ op_errno = EAGAIN;
+ break;
+ }
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+ }
+ }
+unwind:
+ if (op_ret == -1)
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno,
+ NULL, NULL);
+ else
+ STACK_WIND (frame, pl_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open,
+ loc, flags, fd, xdata);
return 0;
}
@@ -445,22 +1706,23 @@ int
pl_create_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
fd_t *fd, inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- int dummy = 1;
- int ret = -1;
+ pl_fdctx_t *fdctx = NULL;
if (op_ret < 0)
goto unwind;
- ret = fd_ctx_set (fd, this, dummy);
- if (ret != 0)
- gf_log (this->name, GF_LOG_ERROR,
- "setting context for fd=%p in locks failed.", fd);
+ fdctx = pl_check_n_create_fdctx (this, fd);
+ if (!fdctx) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ goto unwind;
+ }
unwind:
- STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent);
+ PL_STACK_UNWIND (create, xdata, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
return 0;
}
@@ -468,23 +1730,44 @@ unwind:
int
pl_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode, fd_t *fd)
+ loc_t *loc, int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *xdata)
{
+ PL_LOCAL_GET_REQUESTS (frame, this, xdata, NULL, loc, NULL);
STACK_WIND (frame, pl_create_cbk,
FIRST_CHILD (this), FIRST_CHILD (this)->fops->create,
- loc, flags, mode, fd);
+ loc, flags, mode, umask, fd, xdata);
return 0;
}
+int32_t
+pl_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ PL_STACK_UNWIND (unlink, xdata, frame, op_ret, op_errno, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int32_t
+pl_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
+{
+ PL_LOCAL_GET_REQUESTS (frame, this, xdata, NULL, loc, NULL);
+ STACK_WIND (frame, pl_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ return 0;
+}
int
pl_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
struct iovec *vector, int32_t count, struct iatt *stbuf,
- struct iobref *iobref)
+ struct iobref *iobref, dict_t *xdata)
{
- STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno,
- vector, count, stbuf, iobref);
+ PL_STACK_UNWIND (readv, xdata, frame, op_ret, op_errno,
+ vector, count, stbuf, iobref, xdata);
return 0;
}
@@ -492,14 +1775,14 @@ pl_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
pl_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf);
+ PL_STACK_UNWIND (writev, xdata, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
return 0;
}
-
void
do_blocked_rw (pl_inode_t *pl_inode)
{
@@ -530,18 +1813,26 @@ do_blocked_rw (pl_inode_t *pl_inode)
return;
}
-
static int
__rw_allowable (pl_inode_t *pl_inode, posix_lock_t *region,
glusterfs_fop_t op)
{
posix_lock_t *l = NULL;
+ posix_locks_private_t *priv = NULL;
int ret = 1;
+ priv = THIS->private;
+
list_for_each_entry (l, &pl_inode->ext_list, list) {
- if (locks_overlap (l, region) && !same_owner (l, region)) {
+ if (!l->blocked && locks_overlap (l, region)
+ && !same_owner (l, region)) {
if ((op == GF_FOP_READ) && (l->fl_type != F_WRLCK))
continue;
+ /* Check for mandatory lock under optimal
+ * mandatory-locking mode */
+ if (priv->mandatory_mode == MLK_OPTIMAL
+ && !(l->lk_flags & GF_LK_MANDATORY))
+ continue;
ret = 0;
break;
}
@@ -550,74 +1841,77 @@ __rw_allowable (pl_inode_t *pl_inode, posix_lock_t *region,
return ret;
}
-
int
-pl_readv_cont (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset)
+pl_readv_cont (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
{
STACK_WIND (frame, pl_readv_cbk,
FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv,
- fd, size, offset);
+ fd, size, offset, flags, xdata);
return 0;
}
-
int
pl_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset)
+ fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata)
{
- posix_locks_private_t *priv = NULL;
pl_inode_t *pl_inode = NULL;
pl_rw_req_t *rw = NULL;
posix_lock_t region = {.list = {0, }, };
+ gf_boolean_t enabled = _gf_false;
+ gf_boolean_t can_block = _gf_true;
int op_ret = 0;
int op_errno = 0;
- char wind_needed = 1;
+ int allowed = 1;
+ GF_VALIDATE_OR_GOTO ("locks", this, unwind);
- priv = this->private;
pl_inode = pl_inode_get (this, fd->inode);
+ if (!pl_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ PL_LOCAL_GET_REQUESTS (frame, this, xdata, fd, NULL, NULL);
+ enabled = pl_is_mandatory_locking_enabled (pl_inode);
+
+ if (frame->root->pid < 0)
+ enabled = _gf_false;
- if (priv->mandatory && pl_inode->mandatory) {
+ if (enabled) {
region.fl_start = offset;
region.fl_end = offset + size - 1;
- region.transport = frame->root->trans;
+ region.client = frame->root->client;
region.fd_num = fd_to_fdnum(fd);
region.client_pid = frame->root->pid;
region.owner = frame->root->lk_owner;
pthread_mutex_lock (&pl_inode->mutex);
{
- wind_needed = __rw_allowable (pl_inode, &region,
- GF_FOP_READ);
- if (wind_needed) {
+ allowed = pl_is_fop_allowed (pl_inode, &region, fd,
+ GF_FOP_READ, &can_block);
+ if (allowed == 1)
goto unlock;
- }
-
- if (fd->flags & O_NONBLOCK) {
- gf_log (this->name, GF_LOG_TRACE,
- "returning EAGAIN as fd is O_NONBLOCK");
+ else if (!can_block) {
op_errno = EAGAIN;
op_ret = -1;
goto unlock;
}
- rw = GF_CALLOC (1, sizeof (*rw),
+ rw = GF_CALLOC (1, sizeof (*rw),
gf_locks_mt_pl_rw_req_t);
if (!rw) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
op_errno = ENOMEM;
op_ret = -1;
goto unlock;
}
rw->stub = fop_readv_stub (frame, pl_readv_cont,
- fd, size, offset);
+ fd, size, offset, flags,
+ xdata);
if (!rw->stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
op_errno = ENOMEM;
op_ret = -1;
GF_FREE (rw);
@@ -632,80 +1926,83 @@ pl_readv (call_frame_t *frame, xlator_t *this,
pthread_mutex_unlock (&pl_inode->mutex);
}
-
- if (wind_needed) {
+ if (allowed == 1) {
STACK_WIND (frame, pl_readv_cbk,
FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv,
- fd, size, offset);
+ fd, size, offset, flags, xdata);
}
-
+unwind:
if (op_ret == -1)
- STACK_UNWIND_STRICT (readv, frame, -1, op_errno,
- NULL, 0, NULL, NULL);
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno,
+ NULL, 0, NULL, NULL, NULL);
return 0;
}
-
int
pl_writev_cont (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int count, off_t offset,
- struct iobref *iobref)
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
STACK_WIND (frame, pl_writev_cbk,
FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev,
- fd, vector, count, offset, iobref);
+ fd, vector, count, offset, flags, iobref, xdata);
return 0;
}
-
int
pl_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int32_t count, off_t offset,
- struct iobref *iobref)
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
- posix_locks_private_t *priv = NULL;
pl_inode_t *pl_inode = NULL;
pl_rw_req_t *rw = NULL;
posix_lock_t region = {.list = {0, }, };
+ gf_boolean_t enabled = _gf_false;
+ gf_boolean_t can_block = _gf_true;
int op_ret = 0;
int op_errno = 0;
- char wind_needed = 1;
+ int allowed = 1;
+ GF_VALIDATE_OR_GOTO ("locks", this, unwind);
- priv = this->private;
pl_inode = pl_inode_get (this, fd->inode);
+ if (!pl_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
- if (priv->mandatory && pl_inode->mandatory) {
+ PL_LOCAL_GET_REQUESTS (frame, this, xdata, fd, NULL, NULL);
+ enabled = pl_is_mandatory_locking_enabled (pl_inode);
+
+ if (frame->root->pid < 0)
+ enabled = _gf_false;
+
+ if (enabled) {
region.fl_start = offset;
region.fl_end = offset + iov_length (vector, count) - 1;
- region.transport = frame->root->trans;
+ region.client = frame->root->client;
region.fd_num = fd_to_fdnum(fd);
region.client_pid = frame->root->pid;
region.owner = frame->root->lk_owner;
pthread_mutex_lock (&pl_inode->mutex);
{
- wind_needed = __rw_allowable (pl_inode, &region,
- GF_FOP_WRITE);
- if (wind_needed)
+ allowed = pl_is_fop_allowed (pl_inode, &region, fd,
+ GF_FOP_WRITE, &can_block);
+ if (allowed == 1)
goto unlock;
-
- if (fd->flags & O_NONBLOCK) {
- gf_log (this->name, GF_LOG_TRACE,
- "returning EAGAIN because fd is "
- "O_NONBLOCK");
+ else if (!can_block) {
op_errno = EAGAIN;
op_ret = -1;
goto unlock;
}
- rw = GF_CALLOC (1, sizeof (*rw),
+ rw = GF_CALLOC (1, sizeof (*rw),
gf_locks_mt_pl_rw_req_t);
if (!rw) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
op_errno = ENOMEM;
op_ret = -1;
goto unlock;
@@ -713,10 +2010,8 @@ pl_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
rw->stub = fop_writev_stub (frame, pl_writev_cont,
fd, vector, count, offset,
- iobref);
+ flags, iobref, xdata);
if (!rw->stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
op_errno = ENOMEM;
op_ret = -1;
GF_FREE (rw);
@@ -731,55 +2026,270 @@ pl_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
pthread_mutex_unlock (&pl_inode->mutex);
}
-
- if (wind_needed)
+ if (allowed == 1) {
STACK_WIND (frame, pl_writev_cbk,
FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev,
- fd, vector, count, offset, iobref);
-
+ fd, vector, count, offset, flags, iobref, xdata);
+ }
+unwind:
if (op_ret == -1)
- STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL);
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno,
+ NULL, NULL, NULL);
return 0;
}
+static int
+__fd_has_locks (pl_inode_t *pl_inode, fd_t *fd)
+{
+ int found = 0;
+ posix_lock_t *l = NULL;
+
+ list_for_each_entry (l, &pl_inode->ext_list, list) {
+ if (l->fd_num == fd_to_fdnum(fd)) {
+ found = 1;
+ break;
+ }
+ }
+
+ return found;
+}
+
+static posix_lock_t *
+lock_dup (posix_lock_t *lock)
+{
+ posix_lock_t *new_lock = NULL;
+
+ new_lock = new_posix_lock (&lock->user_flock, lock->client,
+ lock->client_pid, &lock->owner,
+ (fd_t *)lock->fd_num, lock->lk_flags,
+ lock->blocking);
+ return new_lock;
+}
+
+static int
+__dup_locks_to_fdctx (pl_inode_t *pl_inode, fd_t *fd,
+ pl_fdctx_t *fdctx)
+{
+ posix_lock_t *l = NULL;
+ posix_lock_t *duplock = NULL;
+ int ret = 0;
+
+ list_for_each_entry (l, &pl_inode->ext_list, list) {
+ if (l->fd_num == fd_to_fdnum(fd)) {
+ duplock = lock_dup (l);
+ if (!duplock) {
+ ret = -1;
+ break;
+ }
+
+ list_add_tail (&duplock->list, &fdctx->locks_list);
+ }
+ }
+
+ return ret;
+}
+
+static int
+__copy_locks_to_fdctx (pl_inode_t *pl_inode, fd_t *fd,
+ pl_fdctx_t *fdctx)
+{
+ int ret = 0;
+
+ ret = __dup_locks_to_fdctx (pl_inode, fd, fdctx);
+ if (ret)
+ goto out;
+
+out:
+ return ret;
+
+}
+
+static void
+pl_mark_eol_lock (posix_lock_t *lock)
+{
+ lock->user_flock.l_type = GF_LK_EOL;
+ return;
+}
+
+static posix_lock_t *
+__get_next_fdctx_lock (pl_fdctx_t *fdctx)
+{
+ posix_lock_t *lock = NULL;
+
+ GF_ASSERT (fdctx);
+
+ if (list_empty (&fdctx->locks_list)) {
+ gf_log (THIS->name, GF_LOG_DEBUG,
+ "fdctx lock list empty");
+ goto out;
+ }
+
+ lock = list_entry (fdctx->locks_list.next, typeof (*lock),
+ list);
+
+ GF_ASSERT (lock);
+
+ list_del_init (&lock->list);
+
+out:
+ return lock;
+}
+
+static int
+__set_next_lock_fd (pl_fdctx_t *fdctx, posix_lock_t *reqlock)
+{
+ posix_lock_t *lock = NULL;
+ int ret = 0;
+
+ GF_ASSERT (fdctx);
+
+ lock = __get_next_fdctx_lock (fdctx);
+ if (!lock) {
+ gf_log (THIS->name, GF_LOG_DEBUG,
+ "marking EOL in reqlock");
+ pl_mark_eol_lock (reqlock);
+ goto out;
+ }
+
+ reqlock->user_flock = lock->user_flock;
+ reqlock->fl_start = lock->fl_start;
+ reqlock->fl_type = lock->fl_type;
+ reqlock->fl_end = lock->fl_end;
+ reqlock->owner = lock->owner;
+
+out:
+ if (lock)
+ __destroy_lock (lock);
+
+ return ret;
+}
+
+static int
+pl_getlk_fd (xlator_t *this, pl_inode_t *pl_inode,
+ fd_t *fd, posix_lock_t *reqlock)
+{
+ uint64_t tmp = 0;
+ pl_fdctx_t *fdctx = NULL;
+ int ret = 0;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ if (!__fd_has_locks (pl_inode, fd)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "fd=%p has no active locks", fd);
+ ret = 0;
+ goto unlock;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "There are active locks on fd");
+
+ ret = fd_ctx_get (fd, this, &tmp);
+ fdctx = (pl_fdctx_t *)(long) tmp;
+
+ if (list_empty (&fdctx->locks_list)) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "no fdctx -> copying all locks on fd");
+
+ ret = __copy_locks_to_fdctx (pl_inode, fd, fdctx);
+ if (ret) {
+ goto unlock;
+ }
+
+ ret = __set_next_lock_fd (fdctx, reqlock);
+
+ } else {
+ gf_log (this->name, GF_LOG_TRACE,
+ "fdctx present -> returning the next lock");
+ ret = __set_next_lock_fd (fdctx, reqlock);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "could not get next lock of fd");
+ goto unlock;
+ }
+ }
+ }
+
+unlock:
+ pthread_mutex_unlock (&pl_inode->mutex);
+ return ret;
+
+}
+
+int
+pl_metalock_is_active (pl_inode_t *pl_inode)
+{
+ if (list_empty (&pl_inode->metalk_list))
+ return 0;
+ else
+ return 1;
+}
+
+int
+__pl_queue_lock (pl_inode_t *pl_inode, posix_lock_t *reqlock, int can_block)
+{
+ list_add_tail (&reqlock->list, &pl_inode->queued_locks);
+
+ return 0;
+}
int
pl_lk (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t cmd, struct flock *flock)
+ fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
- void *transport = NULL;
- pid_t client_pid = 0;
- uint64_t owner = 0;
+ pl_inode_t *pl_inode = NULL;
+ int op_ret = 0;
+ int op_errno = 0;
+ int can_block = 0;
+ posix_lock_t *reqlock = NULL;
+ posix_lock_t *conf = NULL;
+ int ret = 0;
+ uint32_t lk_flags = 0;
posix_locks_private_t *priv = NULL;
- pl_inode_t *pl_inode = NULL;
- int op_ret = 0;
- int op_errno = 0;
- int can_block = 0;
- posix_lock_t *reqlock = NULL;
- posix_lock_t *conf = NULL;
- int ret = 0;
- transport = frame->root->trans;
- client_pid = frame->root->pid;
- owner = frame->root->lk_owner;
- priv = this->private;
+ priv = this->private;
+
+ ret = dict_get_uint32 (xdata, "lkmode", &lk_flags);
+ if (ret == 0) {
+ if (priv->mandatory_mode == MLK_NONE)
+ gf_log (this->name, GF_LOG_DEBUG, "Lock flags received "
+ "in a non-mandatory locking environment, "
+ "continuing");
+ else
+ gf_log (this->name, GF_LOG_DEBUG, "Lock flags received, "
+ "continuing");
+ }
+
+ if ((flock->l_start < 0) ||
+ ((flock->l_start + flock->l_len) < 0)) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto unwind;
+ }
+
+ /* As per 'man 3 fcntl', the value of l_len may be
+ * negative. In such cases, lock request should be
+ * considered for the range starting at 'l_start+l_len'
+ * and ending at 'l_start-1'. Update the fields accordingly.
+ */
+ if (flock->l_len < 0) {
+ flock->l_start += flock->l_len;
+ flock->l_len = labs (flock->l_len);
+ }
pl_inode = pl_inode_get (this, fd->inode);
if (!pl_inode) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
op_ret = -1;
op_errno = ENOMEM;
goto unwind;
}
- reqlock = new_posix_lock (flock, transport, client_pid,
- owner, fd);
+ reqlock = new_posix_lock (flock, frame->root->client, frame->root->pid,
+ &frame->root->lk_owner, fd, lk_flags,
+ can_block);
if (!reqlock) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
op_ret = -1;
op_errno = ENOMEM;
goto unwind;
@@ -789,6 +2299,68 @@ pl_lk (call_frame_t *frame, xlator_t *this,
switch (cmd) {
+ case F_RESLK_LCKW:
+ can_block = 1;
+
+ /* fall through */
+ case F_RESLK_LCK:
+ memcpy (&reqlock->user_flock, flock, sizeof (struct gf_flock));
+ reqlock->frame = frame;
+ reqlock->this = this;
+
+ ret = pl_reserve_setlk (this, pl_inode, reqlock,
+ can_block);
+ if (ret < 0) {
+ if (can_block)
+ goto out;
+
+ op_ret = -1;
+ op_errno = -ret;
+ __destroy_lock (reqlock);
+ goto unwind;
+ }
+ /* Finally a getlk and return the call */
+ conf = pl_getlk (pl_inode, reqlock);
+ if (conf)
+ posix_lock_to_flock (conf, flock);
+ break;
+
+ case F_RESLK_UNLCK:
+ reqlock->frame = frame;
+ reqlock->this = this;
+ ret = pl_reserve_unlock (this, pl_inode, reqlock);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = -ret;
+ }
+ __destroy_lock (reqlock);
+ goto unwind;
+
+ break;
+
+ case F_GETLK_FD:
+ reqlock->frame = frame;
+ reqlock->this = this;
+ ret = pl_verify_reservelk (this, pl_inode, reqlock, can_block);
+ GF_ASSERT (ret >= 0);
+
+ ret = pl_getlk_fd (this, pl_inode, fd, reqlock);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "getting locks on fd failed");
+ op_ret = -1;
+ op_errno = ENOLCK;
+ goto unwind;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "Replying with a lock on fd for healing");
+
+ posix_lock_to_flock (reqlock, flock);
+ __destroy_lock (reqlock);
+
+ break;
+
#if F_GETLK != F_GETLK64
case F_GETLK64:
#endif
@@ -806,19 +2378,42 @@ pl_lk (call_frame_t *frame, xlator_t *this,
can_block = 1;
reqlock->frame = frame;
reqlock->this = this;
-
+ reqlock->blocking = can_block;
/* fall through */
#if F_SETLK != F_SETLK64
case F_SETLK64:
#endif
case F_SETLK:
- memcpy (&reqlock->user_flock, flock, sizeof (struct flock));
- ret = pl_setlk (this, pl_inode, reqlock,
- can_block);
+ reqlock->frame = frame;
+ reqlock->this = this;
+
+ memcpy (&reqlock->user_flock, flock, sizeof (struct gf_flock));
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ if (pl_inode->migrated) {
+ op_errno = EREMOTE;
+ pthread_mutex_unlock (&pl_inode->mutex);
+ STACK_UNWIND_STRICT (lk, frame, -1,
+ op_errno, flock, xdata);
+
+ __destroy_lock (reqlock);
+ goto out;
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ ret = pl_verify_reservelk (this, pl_inode, reqlock, can_block);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "Lock blocked due to conflicting reserve lock");
+ goto out;
+ }
+ ret = pl_setlk (this, pl_inode, reqlock, can_block);
if (ret == -1) {
- if (can_block) {
+ if ((can_block) && (F_UNLCK != flock->l_type)) {
pl_trace_block (this, frame, fd, NULL, cmd, flock, NULL);
goto out;
}
@@ -826,13 +2421,23 @@ pl_lk (call_frame_t *frame, xlator_t *this,
op_ret = -1;
op_errno = EAGAIN;
__destroy_lock (reqlock);
+ } else if (ret == -2) {
+ goto out;
+ } else if ((0 == ret) && (F_UNLCK == flock->l_type)) {
+ /* For NLM's last "unlock on fd" detection */
+ if (pl_locks_by_fd (pl_inode, fd))
+ flock->l_type = F_RDLCK;
+ else
+ flock->l_type = F_UNLCK;
}
}
unwind:
pl_trace_out (this, frame, fd, NULL, cmd, flock, op_ret, op_errno, NULL);
pl_update_refkeeper (this, fd->inode);
- STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, flock);
+
+
+ STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, flock, xdata);
out:
return 0;
}
@@ -847,120 +2452,121 @@ pl_forget (xlator_t *this,
posix_lock_t *ext_tmp = NULL;
posix_lock_t *ext_l = NULL;
- struct list_head posixlks_released;
+ struct list_head posixlks_released;
pl_inode_lock_t *ino_tmp = NULL;
pl_inode_lock_t *ino_l = NULL;
- struct list_head inodelks_released;
+ struct list_head inodelks_released;
pl_rw_req_t *rw_tmp = NULL;
pl_rw_req_t *rw_req = NULL;
pl_entry_lock_t *entry_tmp = NULL;
pl_entry_lock_t *entry_l = NULL;
- struct list_head entrylks_released;
+ struct list_head entrylks_released;
pl_dom_list_t *dom = NULL;
pl_dom_list_t *dom_tmp = NULL;
- INIT_LIST_HEAD (&posixlks_released);
- INIT_LIST_HEAD (&inodelks_released);
- INIT_LIST_HEAD (&entrylks_released);
+ INIT_LIST_HEAD (&posixlks_released);
+ INIT_LIST_HEAD (&inodelks_released);
+ INIT_LIST_HEAD (&entrylks_released);
pl_inode = pl_inode_get (this, inode);
- pthread_mutex_lock (&pl_inode->mutex);
- {
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+
+ if (!list_empty (&pl_inode->rw_list)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Pending R/W requests found, releasing.");
- if (!list_empty (&pl_inode->rw_list)) {
- gf_log (this->name, GF_LOG_WARNING,
- "Pending R/W requests found, releasing.");
+ list_for_each_entry_safe (rw_req, rw_tmp, &pl_inode->rw_list,
+ list) {
- list_for_each_entry_safe (rw_req, rw_tmp, &pl_inode->rw_list,
- list) {
+ list_del (&rw_req->list);
+ GF_FREE (rw_req);
+ }
+ }
- list_del (&rw_req->list);
- GF_FREE (rw_req);
- }
- }
+ if (!list_empty (&pl_inode->ext_list)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Pending fcntl locks found, releasing.");
- if (!list_empty (&pl_inode->ext_list)) {
- gf_log (this->name, GF_LOG_WARNING,
- "Pending fcntl locks found, releasing.");
+ list_for_each_entry_safe (ext_l, ext_tmp, &pl_inode->ext_list,
+ list) {
- list_for_each_entry_safe (ext_l, ext_tmp, &pl_inode->ext_list,
- list) {
+ __delete_lock (ext_l);
+ if (ext_l->blocked) {
+ list_add_tail (&ext_l->list, &posixlks_released);
+ continue;
+ }
+ __destroy_lock (ext_l);
+ }
+ }
- __delete_lock (pl_inode, ext_l);
- if (ext_l->blocked) {
- list_add_tail (&ext_l->list, &posixlks_released);
- continue;
- }
- __destroy_lock (ext_l);
- }
- }
+ list_for_each_entry_safe (dom, dom_tmp, &pl_inode->dom_list, inode_list) {
- list_for_each_entry_safe (dom, dom_tmp, &pl_inode->dom_list, inode_list) {
+ if (!list_empty (&dom->inodelk_list)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Pending inode locks found, releasing.");
- if (!list_empty (&dom->inodelk_list)) {
- gf_log (this->name, GF_LOG_WARNING,
- "Pending inode locks found, releasing.");
+ list_for_each_entry_safe (ino_l, ino_tmp, &dom->inodelk_list, list) {
+ __delete_inode_lock (ino_l);
+ __pl_inodelk_unref (ino_l);
+ }
- list_for_each_entry_safe (ino_l, ino_tmp, &dom->inodelk_list, list) {
- __delete_inode_lock (ino_l);
- __destroy_inode_lock (ino_l);
- }
+ list_splice_init (&dom->blocked_inodelks, &inodelks_released);
- list_splice_init (&dom->blocked_inodelks, &inodelks_released);
-
- }
- if (!list_empty (&dom->entrylk_list)) {
- gf_log (this->name, GF_LOG_WARNING,
- "Pending entry locks found, releasing.");
+ }
+ if (!list_empty (&dom->entrylk_list)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Pending entry locks found, releasing.");
- list_for_each_entry_safe (entry_l, entry_tmp, &dom->entrylk_list, domain_list) {
- list_del_init (&entry_l->domain_list);
+ list_for_each_entry_safe (entry_l, entry_tmp, &dom->entrylk_list, domain_list) {
+ list_del_init (&entry_l->domain_list);
- if (entry_l->basename)
- GF_FREE ((char *)entry_l->basename);
- GF_FREE (entry_l);
- }
+ GF_FREE ((char *)entry_l->basename);
+ GF_FREE (entry_l->connection_id);
+ GF_FREE (entry_l);
+ }
- list_splice_init (&dom->blocked_entrylks, &entrylks_released);
- }
+ list_splice_init (&dom->blocked_entrylks, &entrylks_released);
+ }
- list_del (&dom->inode_list);
- gf_log ("posix-locks", GF_LOG_TRACE,
- " Cleaning up domain: %s", dom->domain);
- GF_FREE ((char *)(dom->domain));
- GF_FREE (dom);
- }
+ list_del (&dom->inode_list);
+ gf_log ("posix-locks", GF_LOG_TRACE,
+ " Cleaning up domain: %s", dom->domain);
+ GF_FREE ((char *)(dom->domain));
+ GF_FREE (dom);
+ }
- }
- pthread_mutex_unlock (&pl_inode->mutex);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
- list_for_each_entry_safe (ext_l, ext_tmp, &posixlks_released, list) {
+ list_for_each_entry_safe (ext_l, ext_tmp, &posixlks_released, list) {
- STACK_UNWIND_STRICT (lk, ext_l->frame, -1, 0, &ext_l->user_flock);
- __destroy_lock (ext_l);
- }
+ STACK_UNWIND_STRICT (lk, ext_l->frame, -1, 0,
+ &ext_l->user_flock, NULL);
+ __destroy_lock (ext_l);
+ }
- list_for_each_entry_safe (ino_l, ino_tmp, &inodelks_released, blocked_locks) {
+ list_for_each_entry_safe (ino_l, ino_tmp, &inodelks_released, blocked_locks) {
- STACK_UNWIND_STRICT (inodelk, ino_l->frame, -1, 0);
- __destroy_inode_lock (ino_l);
- }
+ STACK_UNWIND_STRICT (inodelk, ino_l->frame, -1, 0, NULL);
+ __pl_inodelk_unref (ino_l);
+ }
- list_for_each_entry_safe (entry_l, entry_tmp, &entrylks_released, blocked_locks) {
+ list_for_each_entry_safe (entry_l, entry_tmp, &entrylks_released, blocked_locks) {
- STACK_UNWIND_STRICT (entrylk, entry_l->frame, -1, 0);
- if (entry_l->basename)
- GF_FREE ((char *)entry_l->basename);
- GF_FREE (entry_l);
+ STACK_UNWIND_STRICT (entrylk, entry_l->frame, -1, 0, NULL);
+ GF_FREE ((char *)entry_l->basename);
+ GF_FREE (entry_l->connection_id);
+ GF_FREE (entry_l);
- }
+ }
GF_FREE (pl_inode);
@@ -973,8 +2579,14 @@ pl_release (xlator_t *this, fd_t *fd)
pl_inode_t *pl_inode = NULL;
uint64_t tmp_pl_inode = 0;
int ret = -1;
+ uint64_t tmp = 0;
+ pl_fdctx_t *fdctx = NULL;
- ret = inode_ctx_get (fd->inode, this, &tmp_pl_inode);
+ if (fd == NULL) {
+ goto out;
+ }
+
+ ret = inode_ctx_get (fd->inode, this, &tmp_pl_inode);
if (ret != 0)
goto out;
@@ -986,203 +2598,535 @@ pl_release (xlator_t *this, fd_t *fd)
"Releasing all locks with fd %p", fd);
delete_locks_of_fd (this, pl_inode, fd);
+ pl_update_refkeeper (this, fd->inode);
+ ret = fd_ctx_del (fd, this, &tmp);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Could not get fdctx");
+ goto out;
+ }
+
+ fdctx = (pl_fdctx_t *)(long)tmp;
+
+ GF_FREE (fdctx);
out:
return ret;
}
-static int32_t
-__get_posixlk_count (xlator_t *this, pl_inode_t *pl_inode)
-{
- posix_lock_t *lock = NULL;
- int32_t count = 0;
- list_for_each_entry (lock, &pl_inode->ext_list, list) {
+int
+pl_releasedir (xlator_t *this, fd_t *fd)
+{
+ int ret = -1;
+ uint64_t tmp = 0;
+ pl_fdctx_t *fdctx = NULL;
- gf_log (this->name, GF_LOG_DEBUG,
- " XATTR DEBUG"
- "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" state: %s",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lock->owner,
- lock->user_flock.l_start,
- lock->user_flock.l_len,
- lock->blocked == 1 ? "Blocked" : "Active");
+ if (fd == NULL) {
+ goto out;
+ }
- count++;
+ ret = fd_ctx_del (fd, this, &tmp);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Could not get fdctx");
+ goto out;
}
- return count;
+ fdctx = (pl_fdctx_t *)(long)tmp;
+
+ GF_FREE (fdctx);
+out:
+ return ret;
}
int32_t
-get_posixlk_count (xlator_t *this, inode_t *inode)
+pl_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
{
- pl_inode_t *pl_inode = NULL;
- uint64_t tmp_pl_inode = 0;
- int ret = 0;
- int32_t count = 0;
+ PL_STACK_UNWIND (lookup, xdata, frame, op_ret, op_errno, inode, buf,
+ xdata, postparent);
+ return 0;
+}
- ret = inode_ctx_get (inode, this, &tmp_pl_inode);
- if (ret != 0) {
+int32_t
+pl_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ PL_LOCAL_GET_REQUESTS (frame, this, xdata, NULL, loc, NULL);
+ STACK_WIND (frame, pl_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ return 0;
+}
+
+int32_t
+pl_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf, dict_t *xdata)
+{
+ PL_STACK_UNWIND (fstat, xdata, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int32_t
+pl_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ PL_LOCAL_GET_REQUESTS (frame, this, xdata, fd, NULL, NULL);
+ STACK_WIND (frame, pl_fstat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+ return 0;
+}
+
+int
+pl_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries, dict_t *xdata)
+{
+ pl_local_t *local = NULL;
+ gf_dirent_t *entry = NULL;
+
+ if (op_ret <= 0)
+ goto unwind;
+
+ local = frame->local;
+ if (!local)
+ goto unwind;
+
+ list_for_each_entry (entry, &entries->list, list) {
+ pl_set_xdata_response (this, local, local->fd->inode,
+ entry->inode, entry->d_name,
+ entry->dict, 0);
+ }
+
+unwind:
+ PL_STACK_UNWIND (readdirp, xdata, frame, op_ret, op_errno, entries,
+ xdata);
+
+ return 0;
+}
+
+int
+pl_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
+{
+ PL_LOCAL_GET_REQUESTS (frame, this, xdata, fd, NULL, NULL);
+ STACK_WIND (frame, pl_readdirp_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp,
+ fd, size, offset, xdata);
+
+ return 0;
+}
+
+lock_migration_info_t *
+gf_mig_info_for_lock (posix_lock_t *lock)
+{
+ lock_migration_info_t *new = NULL;
+
+ new = GF_CALLOC (1, sizeof (lock_migration_info_t),
+ gf_common_mt_lock_mig);
+ if (new == NULL) {
goto out;
}
- pl_inode = (pl_inode_t *)(long) tmp_pl_inode;
+ INIT_LIST_HEAD (&new->list);
+
+ posix_lock_to_flock (lock, &new->flock);
+
+ new->lk_flags = lock->lk_flags;
+
+ new->client_uid = gf_strdup (lock->client_uid);
+
+out:
+ return new;
+}
+
+int
+pl_fill_active_locks (pl_inode_t *pl_inode, lock_migration_info_t *lmi)
+{
+ posix_lock_t *temp = NULL;
+ lock_migration_info_t *newlock = NULL;
+ int count = 0;
pthread_mutex_lock (&pl_inode->mutex);
{
- count =__get_posixlk_count (this, pl_inode);
+ if (list_empty (&pl_inode->ext_list)) {
+ count = 0;
+ goto out;
+ }
+
+ list_for_each_entry (temp, &pl_inode->ext_list, list) {
+
+ if (temp->blocked)
+ continue;
+
+ newlock = gf_mig_info_for_lock (temp);
+ if (!newlock) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0, 0,
+ "lock_dup failed");
+ count = -1;
+ goto out;
+ }
+
+ list_add_tail (&newlock->list, &lmi->list);
+ count++;
+ }
+
}
- pthread_mutex_unlock (&pl_inode->mutex);
out:
+ pthread_mutex_unlock (&pl_inode->mutex);
return count;
}
-void
-pl_entrylk_xattr_fill (xlator_t *this, inode_t *inode,
- dict_t *dict)
+/* This function reads only active locks */
+static int
+pl_getactivelk (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- int32_t count = 0;
- int ret = -1;
+ pl_inode_t *pl_inode = NULL;
+ lock_migration_info_t locks;
+ int op_ret = 0;
+ int op_errno = 0;
+ int count = 0;
- count = get_entrylk_count (this, inode);
- ret = dict_set_int32 (dict, GLUSTERFS_ENTRYLK_COUNT, count);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- " dict_set failed on key %s", GLUSTERFS_ENTRYLK_COUNT);
+ INIT_LIST_HEAD (&locks.list);
+
+ pl_inode = pl_inode_get (this, loc->inode);
+ if (!pl_inode) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, 0,
+ "pl_inode_get failed");
+
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
}
+ count = pl_fill_active_locks (pl_inode, &locks);
+
+ op_ret = count;
+
+out:
+ STACK_UNWIND_STRICT (getactivelk, frame, op_ret, op_errno, &locks,
+ NULL);
+
+ gf_free_mig_locks (&locks);
+
+ return 0;
}
void
-pl_inodelk_xattr_fill (xlator_t *this, inode_t *inode,
- dict_t *dict)
+pl_metalk_unref (pl_meta_lock_t *lock)
{
- int32_t count = 0;
- int ret = -1;
+ lock->ref--;
+ if (!lock->ref) {
+ GF_FREE (lock->client_uid);
+ GF_FREE (lock);
+ }
+}
- count = get_inodelk_count (this, inode);
- ret = dict_set_int32 (dict, GLUSTERFS_INODELK_COUNT, count);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- " dict_set failed on key %s", GLUSTERFS_INODELK_COUNT);
+
+void
+__pl_metalk_ref (pl_meta_lock_t *lock)
+{
+ lock->ref++;
+}
+
+pl_meta_lock_t *
+new_meta_lock (call_frame_t *frame, xlator_t *this)
+{
+ pl_meta_lock_t *lock = NULL;
+
+ lock = GF_CALLOC (1, sizeof (*lock),
+ gf_locks_mt_pl_meta_lock_t);
+
+ if (!lock) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, ENOMEM, "mem allocation"
+ " failed for meta lock");
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&lock->list);
+ INIT_LIST_HEAD (&lock->client_list);
+
+ lock->client_uid = gf_strdup (frame->root->client->client_uid);
+ if (!lock->client_uid) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, ENOMEM, "mem allocation"
+ " failed for client_uid");
+ GF_FREE (lock);
+ goto out;
}
+ __pl_metalk_ref (lock);
+out:
+ return lock;
}
-void
-pl_posixlk_xattr_fill (xlator_t *this, inode_t *inode,
- dict_t *dict)
+int
+pl_insert_metalk (pl_inode_t *pl_inode, pl_ctx_t *ctx, pl_meta_lock_t *lock)
{
- int32_t count = 0;
- int ret = -1;
+ int ret = 0;
- count = get_posixlk_count (this, inode);
- ret = dict_set_int32 (dict, GLUSTERFS_POSIXLK_COUNT, count);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- " dict_set failed on key %s", GLUSTERFS_POSIXLK_COUNT);
+ if (!pl_inode || !ctx || !lock) {
+ gf_msg (THIS->name, GF_LOG_INFO, 0, 0, "NULL parameter");
+ ret = -1;
+ goto out;
+ }
+
+ lock->pl_inode = pl_inode;
+
+ /* refer function pl_inode_setlk for more info for this ref.
+ * This should be unrefed on meta-unlock triggered by rebalance or
+ * in cleanup with client disconnect*/
+ /*TODO: unref this in cleanup code for disconnect and meta-unlock*/
+ pl_inode->inode = inode_ref (pl_inode->inode);
+
+ /* NOTE:In case of a client-server disconnect we need to cleanup metalk.
+ * Hence, adding the metalk to pl_ctx_t as well. The mutex lock order
+ * should always be on ctx and then on pl_inode*/
+
+ pthread_mutex_lock (&ctx->lock);
+ {
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_add_tail (&lock->list, &pl_inode->metalk_list);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_add_tail (&lock->client_list, &ctx->metalk_list);
}
+ pthread_mutex_unlock (&ctx->lock);
+out:
+ return ret;
}
int32_t
-pl_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *dict,
- struct iatt *postparent)
+pl_metalk (call_frame_t *frame, xlator_t *this, inode_t *inode)
{
- pl_local_t *local = NULL;
+ pl_inode_t *pl_inode = NULL;
+ int ret = 0;
+ pl_meta_lock_t *reqlk = NULL;
+ pl_ctx_t *ctx = NULL;
+
+ pl_inode = pl_inode_get (this, inode);
+ if (!pl_inode) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, ENOMEM,
+ "pl_inode mem allocation failedd");
- if (!frame->local) {
+ ret = -1;
goto out;
}
- if (op_ret) {
+ if (frame->root->client) {
+ ctx = pl_ctx_get (frame->root->client, this);
+ if (!ctx) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, 0,
+ "pl_ctx_get failed");
+
+ ret = -1;
+ goto out;
+
+ }
+ } else {
+ gf_msg (this->name, GF_LOG_INFO, 0, 0, "frame-root-client "
+ "is NULL");
+
+ ret = -1;
goto out;
}
- local = frame->local;
+ reqlk = new_meta_lock (frame, this);
+ if (!reqlk) {
+ ret = -1;
+ goto out;
+ }
- if (local->entrylk_count_req)
- pl_entrylk_xattr_fill (this, inode, dict);
- if (local->inodelk_count_req)
- pl_inodelk_xattr_fill (this, inode, dict);
- if (local->posixlk_count_req)
- pl_posixlk_xattr_fill (this, inode, dict);
+ ret = pl_insert_metalk (pl_inode, ctx, reqlk);
+ if (ret < 0) {
+ pl_metalk_unref (reqlk);
+ }
+out:
+ return ret;
+}
- frame->local = NULL;
+void
+__unwind_queued_locks (xlator_t *this, pl_inode_t *pl_inode,
+ struct list_head *tmp_list)
+{
+ posix_lock_t *lock = NULL;
+ posix_lock_t *tmp = NULL;
- if (local != NULL)
- GF_FREE (local);
+ if (list_empty (&pl_inode->queued_locks))
+ return;
-out:
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- inode,
- buf,
- dict,
- postparent);
- return 0;
+ list_splice_init (&pl_inode->queued_locks, tmp_list);
}
-int32_t
-pl_lookup (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *xattr_req)
+void
+__unwind_blocked_locks (xlator_t *this, pl_inode_t *pl_inode,
+ struct list_head *tmp_list)
{
- pl_local_t *local = NULL;
- int ret = -1;
+ posix_lock_t *lock = NULL;
+ posix_lock_t *tmp = NULL;
+
+ if (list_empty (&pl_inode->ext_list))
+ return;
+
+ list_for_each_entry_safe (lock, tmp, &pl_inode->ext_list, list) {
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (loc, out);
+ if (!lock->blocking)
+ continue;
- local = GF_CALLOC (1, sizeof (*local), gf_locks_mt_pl_local_t);
- if (!local) {
+ list_del_init (&lock->list);
+ list_add_tail (&lock->list, tmp_list);
+ }
+}
+
+int
+pl_metaunlock (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ dict_t *dict)
+{
+ pl_inode_t *pl_inode = NULL;
+ int ret = 0;
+ pl_meta_lock_t *meta_lock = NULL;
+ pl_meta_lock_t *tmp_metalk = NULL;
+ pl_ctx_t *ctx = NULL;
+ posix_lock_t *posix_lock = NULL;
+ posix_lock_t *tmp_posixlk = NULL;
+ struct list_head tmp_posixlk_list;
+
+ INIT_LIST_HEAD (&tmp_posixlk_list);
+
+ if (frame->root->client) {
+ ctx = pl_ctx_get (frame->root->client, this);
+ if (!ctx) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, 0,
+ "pl_ctx_get failed");
+
+ ret = -1;
+ goto out;
+ }
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, 0, 0, "frame-root-client is "
+ "NULL");
ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- " Out of memory");
goto out;
}
- if (dict_get (xattr_req, GLUSTERFS_ENTRYLK_COUNT))
- local->entrylk_count_req = 1;
- if (dict_get (xattr_req, GLUSTERFS_INODELK_COUNT))
- local->inodelk_count_req = 1;
- if (dict_get (xattr_req, GLUSTERFS_POSIXLK_COUNT))
- local->posixlk_count_req = 1;
+ pl_inode = pl_inode_get (this, inode);
+ if (!pl_inode) {
+ ret = -1;
+ goto out;
+ }
- frame->local = local;
+ pthread_mutex_lock (&ctx->lock);
+ {
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ /* Unwind queued locks regardless of migration status */
+ __unwind_queued_locks (this, pl_inode,
+ &tmp_posixlk_list);
+
+ /* Unwind blocked locks only for successful migration */
+ if (dict_get (dict, "status")) {
+
+ /* unwind all blocked locks */
+ __unwind_blocked_locks (this, pl_inode,
+ &tmp_posixlk_list);
+ }
+
+ /* unlock metalk */
+ /* if this list is empty then pl_inode->metalk_list
+ * should be empty too. meta lock should in all cases
+ * be added/removed from both pl_ctx_t and pl_inode */
+
+ if (list_empty (&ctx->metalk_list))
+ goto unlock;
+
+ list_for_each_entry_safe (meta_lock, tmp_metalk,
+ &ctx->metalk_list,
+ client_list) {
+ list_del_init (&meta_lock->client_list);
+
+ pl_inode = meta_lock->pl_inode;
+
+ list_del_init (&meta_lock->list);
+
+ pl_metalk_unref (meta_lock);
+
+ /* The corresponding ref is taken in
+ * pl_insert_metalk*/
+ inode_unref (pl_inode->inode);
+ }
+
+ if (dict_get (dict, "status"))
+ pl_inode->migrated = _gf_true;
+ else
+ pl_inode->migrated = _gf_false;
+ }
+unlock:
+
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ }
+ pthread_mutex_unlock (&ctx->lock);
- STACK_WIND (frame,
- pl_lookup_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup,
- loc,
- xattr_req);
- ret = 0;
out:
- if (ret == -1)
- STACK_UNWIND_STRICT (lookup, frame, -1, 0, NULL, NULL, NULL, NULL);
+ list_for_each_entry_safe (posix_lock, tmp_posixlk, &tmp_posixlk_list,
+ list) {
+ list_del_init (&posix_lock->list);
+
+ STACK_UNWIND_STRICT (lk, posix_lock->frame, -1, EREMOTE,
+ &posix_lock->user_flock, NULL);
+
+ GF_FREE (posix_lock->client_uid);
+ GF_FREE (posix_lock);
+ }
- return 0;
+ return ret;
}
-void
-pl_dump_lock (char *str, int size, struct flock *flock, uint64_t owner)
+int32_t
+pl_setxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *dict, int flags, dict_t *xdata)
{
- char *type_str = NULL;
+ int op_ret = 0;
+ int op_errno = 0;
+
+ if (dict_get (dict, GF_META_LOCK_KEY)) {
+
+ op_ret = pl_metalk (frame, this, loc->inode);
+
+ } else if (dict_get (dict, GF_META_UNLOCK_KEY)) {
+
+ op_ret = pl_metaunlock (frame, this, loc->inode, dict);
+
+ } else {
+ goto usual;
+ }
+
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, NULL);
+ return 0;
+usual:
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setxattr, loc, dict, flags,
+ xdata);
+ return 0;
+}
+
+void
+pl_dump_lock (char *str, int size, struct gf_flock *flock,
+ gf_lkowner_t *owner, void *trans, char *conn_id,
+ time_t *granted_time, time_t *blkd_time, gf_boolean_t active)
+{
+ char *type_str = NULL;
+ char granted[256] = {0,};
+ char blocked[256] = {0,};
+
+ if (granted_time)
+ gf_time_fmt (granted, sizeof (granted), *granted_time,
+ gf_timefmt_FT);
+ if (blkd_time)
+ gf_time_fmt (blocked, sizeof (blocked), *blkd_time,
+ gf_timefmt_FT);
switch (flock->l_type) {
case F_RDLCK:
type_str = "READ";
@@ -1198,12 +3142,32 @@ pl_dump_lock (char *str, int size, struct flock *flock, uint64_t owner)
break;
}
- snprintf (str, size, "type=%s, start=%llu, len=%llu, pid=%llu, lk-owner=%llu",
- type_str, (unsigned long long) flock->l_start,
- (unsigned long long) flock->l_len,
- (unsigned long long) flock->l_pid,
- (unsigned long long) owner);
-
+ if (active) {
+ if (blkd_time && *blkd_time == 0) {
+ snprintf (str, size, RANGE_GRNTD_FMT,
+ type_str, flock->l_whence,
+ (unsigned long long) flock->l_start,
+ (unsigned long long) flock->l_len,
+ (unsigned long long) flock->l_pid,
+ lkowner_utoa (owner), trans, conn_id,
+ granted);
+ } else {
+ snprintf (str, size, RANGE_BLKD_GRNTD_FMT,
+ type_str, flock->l_whence,
+ (unsigned long long) flock->l_start,
+ (unsigned long long) flock->l_len,
+ (unsigned long long) flock->l_pid,
+ lkowner_utoa (owner), trans, conn_id,
+ blocked, granted);
+ }
+ } else {
+ snprintf (str, size, RANGE_BLKD_FMT,
+ type_str, flock->l_whence,
+ (unsigned long long) flock->l_start,
+ (unsigned long long) flock->l_len,
+ (unsigned long long) flock->l_pid,
+ lkowner_utoa (owner), trans, conn_id, blocked);
+ }
}
@@ -1212,42 +3176,74 @@ __dump_entrylks (pl_inode_t *pl_inode)
{
pl_dom_list_t *dom = NULL;
pl_entry_lock_t *lock = NULL;
- int count = 0;
- char key[GF_DUMP_MAX_BUF_LEN];
+ char blocked[256] = {0,};
+ char granted[256] = {0,};
+ int count = 0;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0,};
+ char *k = "xlator.feature.locks.lock-dump.domain.entrylk";
- char tmp[256];
+ char tmp[4098];
list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
count = 0;
gf_proc_dump_build_key(key,
- "xlator.feature.locks.lock-dump.domain",
+ "lock-dump.domain",
"domain");
gf_proc_dump_write(key, "%s", dom->domain);
list_for_each_entry (lock, &dom->entrylk_list, domain_list) {
- gf_proc_dump_build_key(key,
- "xlator.feature.locks.lock-dump.domain.entrylk",
- "entrylk[%d](ACTIVE)",count );
- snprintf (tmp, 256," %s on %s",
- lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" :
- "ENTRYLK_WRLCK", lock->basename);
+ gf_time_fmt (granted, sizeof (granted),
+ lock->granted_time.tv_sec, gf_timefmt_FT);
+ gf_proc_dump_build_key(key, k,
+ "entrylk[%d](ACTIVE)", count );
+ if (lock->blkd_time.tv_sec == 0) {
+ snprintf (tmp, sizeof (tmp), ENTRY_GRNTD_FMT,
+ lock->type == ENTRYLK_RDLCK ?
+ "ENTRYLK_RDLCK" : "ENTRYLK_WRLCK",
+ lock->basename,
+ (unsigned long long) lock->client_pid,
+ lkowner_utoa (&lock->owner),
+ lock->client,
+ lock->connection_id, granted);
+ } else {
+ gf_time_fmt (blocked, sizeof (blocked),
+ lock->blkd_time.tv_sec,
+ gf_timefmt_FT);
+ snprintf (tmp, sizeof (tmp),
+ ENTRY_BLKD_GRNTD_FMT,
+ lock->type == ENTRYLK_RDLCK ?
+ "ENTRYLK_RDLCK" : "ENTRYLK_WRLCK",
+ lock->basename,
+ (unsigned long long) lock->client_pid,
+ lkowner_utoa (&lock->owner),
+ lock->client,
+ lock->connection_id,
+ blocked, granted);
+ }
gf_proc_dump_write(key, tmp);
count++;
}
- list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) {
+ list_for_each_entry (lock, &dom->blocked_entrylks,
+ blocked_locks) {
- gf_proc_dump_build_key(key,
- "xlator.feature.locks.lock-dump.domain.entrylk",
- "entrylk[%d](BLOCKED)",count );
- snprintf (tmp, 256," %s on %s state = Blocked",
- lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" :
- "ENTRYLK_WRLCK", lock->basename);
+ gf_time_fmt (blocked, sizeof (blocked),
+ lock->blkd_time.tv_sec, gf_timefmt_FT);
+
+ gf_proc_dump_build_key(key, k,
+ "entrylk[%d](BLOCKED)", count );
+ snprintf (tmp, sizeof (tmp), ENTRY_BLKD_FMT,
+ lock->type == ENTRYLK_RDLCK ?
+ "ENTRYLK_RDLCK" : "ENTRYLK_WRLCK",
+ lock->basename,
+ (unsigned long long) lock->client_pid,
+ lkowner_utoa (&lock->owner), lock->client,
+ lock->connection_id, blocked);
gf_proc_dump_write(key, tmp);
@@ -1255,7 +3251,6 @@ __dump_entrylks (pl_inode_t *pl_inode)
}
}
-
}
void
@@ -1277,24 +3272,30 @@ __dump_inodelks (pl_inode_t *pl_inode)
int count = 0;
char key[GF_DUMP_MAX_BUF_LEN];
- char tmp[256];
+ char tmp[4098];
list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
count = 0;
gf_proc_dump_build_key(key,
- "xlator.feature.locks.lock-dump.domain",
+ "lock-dump.domain",
"domain");
gf_proc_dump_write(key, "%s", dom->domain);
list_for_each_entry (lock, &dom->inodelk_list, list) {
gf_proc_dump_build_key(key,
- "xlator.feature.locks.lock-dump.domain.inodelk",
+ "inodelk",
"inodelk[%d](ACTIVE)",count );
- pl_dump_lock (tmp, 256, &lock->user_flock, lock->owner);
+ SET_FLOCK_PID (&lock->user_flock, lock);
+ pl_dump_lock (tmp, sizeof (tmp), &lock->user_flock,
+ &lock->owner,
+ lock->client, lock->connection_id,
+ &lock->granted_time.tv_sec,
+ &lock->blkd_time.tv_sec,
+ _gf_true);
gf_proc_dump_write(key, tmp);
count++;
@@ -1303,9 +3304,14 @@ __dump_inodelks (pl_inode_t *pl_inode)
list_for_each_entry (lock, &dom->blocked_inodelks, blocked_locks) {
gf_proc_dump_build_key(key,
- "xlator.feature.locks.lock-dump.domain.inodelk",
+ "inodelk",
"inodelk[%d](BLOCKED)",count );
- pl_dump_lock (tmp, 256, &lock->user_flock, lock->owner);
+ SET_FLOCK_PID (&lock->user_flock, lock);
+ pl_dump_lock (tmp, sizeof (tmp), &lock->user_flock,
+ &lock->owner,
+ lock->client, lock->connection_id,
+ 0, &lock->blkd_time.tv_sec,
+ _gf_false);
gf_proc_dump_write(key, tmp);
count++;
@@ -1333,23 +3339,24 @@ __dump_posixlks (pl_inode_t *pl_inode)
int count = 0;
char key[GF_DUMP_MAX_BUF_LEN];
- char tmp[256];
+ char tmp[4098];
list_for_each_entry (lock, &pl_inode->ext_list, list) {
+ SET_FLOCK_PID (&lock->user_flock, lock);
gf_proc_dump_build_key(key,
- "xlator.feature.locks.lock-dump.domain.posixlk",
+ "posixlk",
"posixlk[%d](%s)",
count,
lock->blocked ? "BLOCKED" : "ACTIVE");
- pl_dump_lock (tmp, 256, &lock->user_flock, lock->owner);
+ pl_dump_lock (tmp, sizeof (tmp), &lock->user_flock,
+ &lock->owner, lock->client, NULL,
+ &lock->granted_time.tv_sec, &lock->blkd_time.tv_sec,
+ (lock->blocked)? _gf_false: _gf_true);
gf_proc_dump_write(key, tmp);
count++;
}
-
-
-
}
void
@@ -1370,93 +3377,273 @@ pl_dump_inode_priv (xlator_t *this, inode_t *inode)
int ret = -1;
uint64_t tmp_pl_inode = 0;
pl_inode_t *pl_inode = NULL;
- char key[GF_DUMP_MAX_BUF_LEN];
+ char *pathname = NULL;
+ gf_boolean_t section_added = _gf_false;
int count = 0;
- if (!inode)
- return -1;
-
- ret = inode_ctx_get (inode, this, &tmp_pl_inode);
+ if (!inode) {
+ errno = EINVAL;
+ goto out;
+ }
- if (ret != 0)
- return ret;
+ ret = TRY_LOCK (&inode->lock);
+ if (ret)
+ goto out;
+ {
+ ret = __inode_ctx_get (inode, this, &tmp_pl_inode);
+ if (ret)
+ goto unlock;
+ }
+unlock:
+ UNLOCK (&inode->lock);
+ if (ret)
+ goto out;
pl_inode = (pl_inode_t *)(long)tmp_pl_inode;
+ if (!pl_inode) {
+ ret = -1;
+ goto out;
+ }
- if (!pl_inode)
- return -1;
+ gf_proc_dump_add_section("xlator.features.locks.%s.inode", this->name);
+ section_added = _gf_true;
- gf_proc_dump_build_key(key,
- "xlator.feature.locks.inode",
- "%ld.mandatory",inode->ino);
- gf_proc_dump_write(key, "%d", pl_inode->mandatory);
+ /*We are safe to call __inode_path since we have the
+ * inode->table->lock */
+ __inode_path (inode, NULL, &pathname);
+ if (pathname)
+ gf_proc_dump_write ("path", "%s", pathname);
+ gf_proc_dump_write("mandatory", "%d", pl_inode->mandatory);
- count = get_entrylk_count (this, inode);
- gf_proc_dump_build_key(key,
- "xlator.feature.locks.entrylk-count",
- "%ld.entrylk-count", inode->ino);
- gf_proc_dump_write(key, "%d", count);
+ ret = pthread_mutex_trylock (&pl_inode->mutex);
+ if (ret)
+ goto out;
+ {
+ count = __get_entrylk_count (this, pl_inode);
+ if (count) {
+ gf_proc_dump_write("entrylk-count", "%d", count);
+ __dump_entrylks (pl_inode);
+ }
- dump_entrylks(pl_inode);
+ count = __get_inodelk_count (this, pl_inode, NULL);
+ if (count) {
+ gf_proc_dump_write("inodelk-count", "%d", count);
+ __dump_inodelks (pl_inode);
+ }
- count = get_inodelk_count (this, inode);
- gf_proc_dump_build_key(key,
- "xlator.feature.locks.inodelk-count",
- "%ld.inodelk-count", inode->ino);
- gf_proc_dump_write(key, "%d", count);
+ count = __get_posixlk_count (this, pl_inode);
+ if (count) {
+ gf_proc_dump_write("posixlk-count", "%d", count);
+ __dump_posixlks (pl_inode);
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
- dump_inodelks(pl_inode);
+out:
+ GF_FREE (pathname);
+
+ if (ret && inode) {
+ if (!section_added)
+ gf_proc_dump_add_section ("xlator.features.locks.%s."
+ "inode", this->name);
+ gf_proc_dump_write ("Unable to print lock state", "(Lock "
+ "acquisition failure) %s",
+ uuid_utoa (inode->gfid));
+ }
+ return ret;
+}
- count = get_posixlk_count (this, inode);
- gf_proc_dump_build_key(key,
- "xlator.feature.locks.posixlk-count",
- "%ld.posixlk-count", inode->ino);
- gf_proc_dump_write(key, "%d", count);
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
- dump_posixlks(pl_inode);
+ if (!this)
+ return ret;
+ ret = xlator_mem_acct_init (this, gf_locks_mt_end + 1);
- return 0;
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+ return ret;
+ }
+
+ return ret;
}
+pl_ctx_t*
+pl_ctx_get (client_t *client, xlator_t *xlator)
+{
+ void *tmp = NULL;
+ pl_ctx_t *ctx = NULL;
+
+ client_ctx_get (client, xlator, &tmp);
+
+ ctx = tmp;
+
+ if (ctx != NULL)
+ goto out;
+
+ ctx = GF_CALLOC (1, sizeof (pl_ctx_t), gf_locks_mt_posix_lock_t);
+
+ if (ctx == NULL)
+ goto out;
+
+ pthread_mutex_init (&ctx->lock, NULL);
+ INIT_LIST_HEAD (&ctx->inodelk_lockers);
+ INIT_LIST_HEAD (&ctx->entrylk_lockers);
+ INIT_LIST_HEAD (&ctx->metalk_list);
+
+ if (client_ctx_set (client, xlator, ctx) != 0) {
+ pthread_mutex_destroy (&ctx->lock);
+ GF_FREE (ctx);
+ ctx = NULL;
+ }
+out:
+ return ctx;
+}
-/*
- * pl_dump_inode - inode dump function for posix locks
- *
- */
int
-pl_dump_inode (xlator_t *this)
+pl_metalk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)
{
+ pl_meta_lock_t *meta_lock = NULL;
+ pl_meta_lock_t *tmp_metalk = NULL;
+ pl_inode_t *pl_inode = NULL;
+ posix_lock_t *posix_lock = NULL;
+ posix_lock_t *tmp_posixlk = NULL;
+ struct list_head tmp_posixlk_list;
- assert(this);
+ INIT_LIST_HEAD (&tmp_posixlk_list);
- if (this->itable) {
- inode_table_dump(this->itable,
- "xlator.features.locks.inode_table");
+ pthread_mutex_lock (&ctx->lock);
+ {
+
+ /* if this list is empty then pl_inode->metalk_list should be
+ * empty too. meta lock should in all cases be added/removed
+ * from both pl_ctx_t and pl_inode */
+ if (list_empty (&ctx->metalk_list))
+ goto unlock;
+
+ list_for_each_entry_safe (meta_lock, tmp_metalk,
+ &ctx->metalk_list, client_list) {
+ list_del_init (&meta_lock->client_list);
+
+ pl_inode = meta_lock->pl_inode;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+
+ {
+
+ /* Since the migration status is unknown here
+ * unwind all queued and blocked locks to check
+ * migration status and find the correct
+ * destination */
+ __unwind_queued_locks (this, pl_inode,
+ &tmp_posixlk_list);
+
+ __unwind_blocked_locks (this, pl_inode,
+ &tmp_posixlk_list);
+
+ list_del_init (&meta_lock->list);
+
+ pl_metalk_unref (meta_lock);
+
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ /* The corresponding ref is taken in
+ * pl_insert_metalk*/
+ inode_unref (pl_inode->inode);
+ }
}
+unlock:
+ pthread_mutex_unlock (&ctx->lock);
+
+ list_for_each_entry_safe (posix_lock, tmp_posixlk, &tmp_posixlk_list,
+ list) {
+ list_del_init (&posix_lock->list);
+
+ STACK_UNWIND_STRICT (lk, posix_lock->frame, -1, EREMOTE,
+ &posix_lock->user_flock, NULL);
+
+ GF_FREE (posix_lock->client_uid);
+ GF_FREE (posix_lock);
+ }
return 0;
}
-int32_t
-mem_acct_init (xlator_t *this)
+static int
+pl_client_disconnect_cbk (xlator_t *this, client_t *client)
{
- int ret = -1;
+ pl_ctx_t *pl_ctx = NULL;
- if (!this)
- return ret;
+ pl_ctx = pl_ctx_get (client, this);
- ret = xlator_mem_acct_init (this, gf_locks_mt_end + 1);
-
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
- }
+ pl_inodelk_client_cleanup (this, pl_ctx);
+
+ pl_entrylk_client_cleanup (this, pl_ctx);
+
+ pl_metalk_client_cleanup (this, pl_ctx);
+
+ return 0;
+}
+
+
+static int
+pl_client_destroy_cbk (xlator_t *this, client_t *client)
+{
+ void *tmp = NULL;
+ pl_ctx_t *pl_ctx = NULL;
+
+ pl_client_disconnect_cbk (this, client);
+
+ client_ctx_del (client, this, &tmp);
+
+ if (tmp == NULL)
+ return 0;
+
+ pl_ctx = tmp;
+
+ GF_ASSERT (list_empty(&pl_ctx->inodelk_lockers));
+ GF_ASSERT (list_empty(&pl_ctx->entrylk_lockers));
+
+ pthread_mutex_destroy (&pl_ctx->lock);
+ GF_FREE (pl_ctx);
+ return 0;
+}
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ posix_locks_private_t *priv = NULL;
+ int ret = -1;
+
+ priv = this->private;
+
+ GF_OPTION_RECONF ("trace", priv->trace, options, bool, out);
+
+ GF_OPTION_RECONF ("monkey-unlocking", priv->monkey_unlocking, options,
+ bool, out);
+
+ GF_OPTION_RECONF ("revocation-secs",
+ priv->revocation_secs, options,
+ uint32, out);
+
+ GF_OPTION_RECONF ("revocation-clear-all", priv->revocation_clear_all,
+ options, bool, out);
+
+ GF_OPTION_RECONF ("revocation-max-blocked",
+ priv->revocation_max_blocked, options,
+ uint32, out);
+ ret = 0;
+
+out:
return ret;
}
@@ -1465,13 +3652,13 @@ init (xlator_t *this)
{
posix_locks_private_t *priv = NULL;
xlator_list_t *trav = NULL;
- data_t *mandatory = NULL;
- data_t *trace = NULL;
+ char *tmp_str = NULL;
+ int ret = -1;
if (!this->children || this->children->next) {
gf_log (this->name, GF_LOG_CRITICAL,
"FATAL: posix-locks should have exactly one child");
- return -1;
+ goto out;
}
if (!this->parents) {
@@ -1487,29 +3674,53 @@ init (xlator_t *this)
gf_log (this->name, GF_LOG_CRITICAL,
"'locks' translator is not loaded over a storage "
"translator");
- return -1;
+ goto out;
}
- priv = GF_CALLOC (1, sizeof (*priv),
+ priv = GF_CALLOC (1, sizeof (*priv),
gf_locks_mt_posix_locks_private_t);
- mandatory = dict_get (this->options, "mandatory-locks");
- if (mandatory)
- gf_log (this->name, GF_LOG_WARNING,
- "mandatory locks not supported in this minor release.");
-
- trace = dict_get (this->options, "trace");
- if (trace) {
- if (gf_string2boolean (trace->data,
- &priv->trace) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "'trace' takes on only boolean values.");
- return -1;
- }
- }
+ GF_OPTION_INIT ("mandatory-locking", tmp_str, str, out);
+ if (!strcmp (tmp_str, "forced"))
+ priv->mandatory_mode = MLK_FORCED;
+ else if (!strcmp (tmp_str, "file"))
+ priv->mandatory_mode = MLK_FILE_BASED;
+ else if (!strcmp (tmp_str, "optimal"))
+ priv->mandatory_mode = MLK_OPTIMAL;
+ else
+ priv->mandatory_mode = MLK_NONE;
+ tmp_str = NULL;
+
+ GF_OPTION_INIT ("trace", priv->trace, bool, out);
+
+ GF_OPTION_INIT ("monkey-unlocking", priv->monkey_unlocking,
+ bool, out);
+
+ GF_OPTION_INIT ("revocation-secs", priv->revocation_secs,
+ uint32, out);
+
+ GF_OPTION_INIT ("revocation-clear-all", priv->revocation_clear_all,
+ bool, out);
+
+ GF_OPTION_INIT ("revocation-max-blocked", priv->revocation_max_blocked,
+ uint32, out);
+
+ this->local_pool = mem_pool_new (pl_local_t, 32);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ goto out;
+ }
this->private = priv;
- return 0;
+ ret = 0;
+
+out:
+ if (ret) {
+ GF_FREE (priv);
+ }
+ return ret;
}
@@ -1519,6 +3730,10 @@ fini (xlator_t *this)
posix_locks_private_t *priv = NULL;
priv = this->private;
+ if (!priv)
+ return 0;
+ this->private = NULL;
+ GF_FREE (priv->brickname);
GF_FREE (priv);
return 0;
@@ -1527,27 +3742,177 @@ fini (xlator_t *this)
int
pl_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct flock *flock);
+ const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *flock,
+ dict_t *xdata);
int
pl_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct flock *flock);
+ const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock,
+ dict_t *xdata);
int
pl_entrylk (call_frame_t *frame, xlator_t *this,
const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type);
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
int
pl_fentrylk (call_frame_t *frame, xlator_t *this,
const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type);
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
+
+int32_t
+pl_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ PL_STACK_UNWIND (rename, xdata, frame, op_ret, op_errno,
+ buf, preoldparent, postoldparent, prenewparent,
+ postnewparent, xdata);
+ return 0;
+}
+
+int32_t
+pl_rename (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ PL_LOCAL_GET_REQUESTS (frame, this, xdata, NULL, oldloc, newloc);
+
+ STACK_WIND (frame, pl_rename_cbk, FIRST_CHILD (this),
+ FIRST_CHILD(this)->fops->rename, oldloc,
+ newloc, xdata);
+ return 0;
+}
+
+posix_lock_t *
+gf_lkmig_info_to_posix_lock (call_frame_t *frame,
+ lock_migration_info_t *lmi)
+{
+ posix_lock_t *lock = NULL;
+
+ lock = GF_CALLOC (1, sizeof (posix_lock_t), gf_locks_mt_posix_lock_t);
+ if (!lock)
+ goto out;
+
+ lock->fl_start = lmi->flock.l_start;
+ lock->fl_type = lmi->flock.l_type;
+
+ if (lmi->flock.l_len == 0)
+ lock->fl_end = LLONG_MAX;
+ else
+ lock->fl_end = lmi->flock.l_start + lmi->flock.l_len - 1;
+
+ lock->client = frame->root->client;
+
+ lock->lk_flags = lmi->lk_flags;
+
+ lock->client_uid = gf_strdup (lmi->client_uid);
+ if (lock->client_uid == NULL) {
+ GF_FREE (lock);
+ goto out;
+ }
+
+ lock->client_pid = lmi->flock.l_pid;
+ lock->owner = lmi->flock.l_owner;
+
+ INIT_LIST_HEAD (&lock->list);
+
+out:
+ return lock;
+}
+
+/* This function is supposed to write the active locks from the source brick(in
+ * rebalance context) and write here. Hence, will add the locks directly to the
+ * pl_inode->ext_list*/
+int
+pl_write_active_locks (call_frame_t *frame, pl_inode_t *pl_inode,
+ lock_migration_info_t *locklist)
+{
+ posix_lock_t *newlock = NULL;
+ lock_migration_info_t *temp = NULL;
+ int ret = 0;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ /* Just making sure the activelk list is empty. Should not
+ * happen though*/
+ if (!list_empty (&pl_inode->ext_list)) {
+
+ gf_msg (THIS->name, GF_LOG_ERROR, 0, 0,
+ "invalid locks found");
+
+ ret = -1;
+ goto out;
+ }
+
+ /* This list also should not be empty */
+ if (list_empty (&locklist->list)) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0, 0,
+ "empty lock list");
+
+ ret = -1;
+ goto out;
+ }
+
+ list_for_each_entry (temp, &locklist->list, list) {
+
+ newlock = gf_lkmig_info_to_posix_lock (frame, temp);
+ if (!newlock) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0, 0,
+ "mem allocation failed for newlock");
+
+ ret = -1;
+ goto out;
+ }
+ list_add_tail (&newlock->list, &pl_inode->ext_list);
+ }
+ }
+
+out:
+ /*TODO: What if few lock add failed with ENOMEM. Should the already
+ * added locks be clearted */
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ return ret;
+}
+
+static int
+pl_setactivelk (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ lock_migration_info_t *locklist, dict_t *xdata)
+{
+ pl_inode_t *pl_inode = NULL;
+ int op_ret = 0;
+ int op_errno = 0;
+ int ret = 0;
+
+ pl_inode = pl_inode_get (this, loc->inode);
+ if (!pl_inode) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, 0,
+ "pl_inode_get failed");
+
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+ ret = pl_write_active_locks (frame, pl_inode, locklist);
+
+ op_ret = ret;
+
+out:
+ STACK_UNWIND_STRICT (setactivelk, frame, op_ret, op_errno, NULL);
+
+ return 0;
+}
struct xlator_fops fops = {
.lookup = pl_lookup,
.create = pl_create,
+ .fstat = pl_fstat,
.truncate = pl_truncate,
.ftruncate = pl_ftruncate,
+ .discard = pl_discard,
+ .zerofill = pl_zerofill,
.open = pl_open,
.readv = pl_readv,
.writev = pl_writev,
@@ -1558,6 +3923,14 @@ struct xlator_fops fops = {
.fentrylk = pl_fentrylk,
.flush = pl_flush,
.opendir = pl_opendir,
+ .readdirp = pl_readdirp,
+ .getxattr = pl_getxattr,
+ .fgetxattr = pl_fgetxattr,
+ .fsetxattr = pl_fsetxattr,
+ .setxattr = pl_setxattr,
+ .rename = pl_rename,
+ .getactivelk = pl_getactivelk,
+ .setactivelk = pl_setactivelk,
};
struct xlator_dumpops dumpops = {
@@ -1565,17 +3938,58 @@ struct xlator_dumpops dumpops = {
};
struct xlator_cbks cbks = {
- .forget = pl_forget,
- .release = pl_release,
+ .forget = pl_forget,
+ .release = pl_release,
+ .releasedir = pl_releasedir,
+ .client_destroy = pl_client_destroy_cbk,
+ .client_disconnect = pl_client_disconnect_cbk,
};
-
struct volume_options options[] = {
- { .key = { "mandatory-locks", "mandatory" },
- .type = GF_OPTION_TYPE_BOOL
+ { .key = { "mandatory-locking" },
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "off",
+ .description = "Specifies the mandatory-locking mode. Valid options "
+ "are 'file' to use linux style mandatory locks, "
+ "'forced' to use volume striclty under mandatory lock "
+ "semantics only and 'optimal' to treat advisory and "
+ "mandatory locks separately on their own."
+ },
+ { .key = { "trace" },
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Trace the different lock requests "
+ "to logs."
+ },
+ { .key = { "monkey-unlocking" },
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .description = "Ignore a random number of unlock requests. Useful "
+ "for testing/creating robust lock recovery mechanisms."
+ },
+ { .key = {"revocation-secs"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = INT_MAX,
+ .default_value = "0",
+ .description = "Maximum time a lock can be taken out, before"
+ "being revoked.",
+ },
+ { .key = {"revocation-clear-all"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .description = "If set to true, will revoke BOTH granted and blocked "
+ "(pending) lock requests if a revocation threshold is "
+ "hit.",
+ },
+ { .key = {"revocation-max-blocked"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = INT_MAX,
+ .default_value = "0",
+ .description = "A number of blocked lock requests after which a lock "
+ "will be revoked to allow the others to proceed. Can "
+ "be used in conjunction w/ revocation-clear-all."
},
- { .key = { "trace" },
- .type = GF_OPTION_TYPE_BOOL
- },
{ .key = {NULL} },
};
diff --git a/xlators/features/locks/src/reservelk.c b/xlators/features/locks/src/reservelk.c
new file mode 100644
index 00000000000..8eb08d0ef79
--- /dev/null
+++ b/xlators/features/locks/src/reservelk.c
@@ -0,0 +1,438 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "glusterfs.h"
+#include "compat.h"
+#include "xlator.h"
+#include "inode.h"
+#include "logging.h"
+#include "common-utils.h"
+#include "list.h"
+
+#include "locks.h"
+#include "common.h"
+
+void
+__delete_reserve_lock (posix_lock_t *lock)
+{
+ list_del (&lock->list);
+}
+
+void
+__destroy_reserve_lock (posix_lock_t *lock)
+{
+ GF_FREE (lock);
+}
+
+/* Return true if the two reservelks have exactly same lock boundaries */
+int
+reservelks_equal (posix_lock_t *l1, posix_lock_t *l2)
+{
+ if ((l1->fl_start == l2->fl_start) &&
+ (l1->fl_end == l2->fl_end))
+ return 1;
+
+ return 0;
+}
+
+/* Determine if lock is grantable or not */
+static posix_lock_t *
+__reservelk_grantable (pl_inode_t *pl_inode, posix_lock_t *lock)
+{
+ xlator_t *this = NULL;
+ posix_lock_t *l = NULL;
+ posix_lock_t *ret_lock = NULL;
+
+ this = THIS;
+
+ if (list_empty (&pl_inode->reservelk_list)) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "No reservelks in list");
+ goto out;
+ }
+ list_for_each_entry (l, &pl_inode->reservelk_list, list){
+ if (reservelks_equal (lock, l)) {
+ ret_lock = l;
+ break;
+ }
+ }
+out:
+ return ret_lock;
+}
+
+static int
+__same_owner_reservelk (posix_lock_t *l1, posix_lock_t *l2)
+{
+ return (is_same_lkowner (&l1->owner, &l2->owner));
+
+}
+
+static posix_lock_t *
+__matching_reservelk (pl_inode_t *pl_inode, posix_lock_t *lock)
+{
+ posix_lock_t *l = NULL;
+
+ if (list_empty (&pl_inode->reservelk_list)) {
+ gf_log ("posix-locks", GF_LOG_TRACE,
+ "reservelk list empty");
+ return NULL;
+ }
+
+ list_for_each_entry (l, &pl_inode->reservelk_list, list) {
+ if (reservelks_equal (l, lock)) {
+ gf_log ("posix-locks", GF_LOG_TRACE,
+ "equal reservelk found");
+ break;
+ }
+ }
+
+ return l;
+}
+
+static int
+__reservelk_conflict (xlator_t *this, pl_inode_t *pl_inode,
+ posix_lock_t *lock)
+{
+ posix_lock_t *conf = NULL;
+ int ret = 0;
+
+ conf = __matching_reservelk (pl_inode, lock);
+ if (conf) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "Matching reservelk found");
+ if (__same_owner_reservelk (lock, conf)) {
+ list_del_init (&conf->list);
+ gf_log (this->name, GF_LOG_TRACE,
+ "Removing the matching reservelk for setlk to progress");
+ GF_FREE (conf);
+ ret = 0;
+ } else {
+ gf_log (this->name, GF_LOG_TRACE,
+ "Conflicting reservelk found");
+ ret = 1;
+ }
+
+ }
+ return ret;
+
+}
+
+int
+pl_verify_reservelk (xlator_t *this, pl_inode_t *pl_inode,
+ posix_lock_t *lock, int can_block)
+{
+ int ret = 0;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ if (__reservelk_conflict (this, pl_inode, lock)) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "Found conflicting reservelk. Blocking until reservelk is unlocked.");
+ lock->blocked = can_block;
+ list_add_tail (&lock->list, &pl_inode->blocked_calls);
+ ret = -1;
+ goto unlock;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "no conflicting reservelk found. Call continuing");
+ ret = 0;
+
+ }
+unlock:
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ return ret;
+
+}
+
+
+/* Determines if lock can be granted and adds the lock. If the lock
+ * is blocking, adds it to the blocked_reservelks.
+ */
+static int
+__lock_reservelk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
+ int can_block)
+{
+ posix_lock_t *conf = NULL;
+ int ret = -EINVAL;
+
+ conf = __reservelk_grantable (pl_inode, lock);
+ if (conf){
+ ret = -EAGAIN;
+ if (can_block == 0)
+ goto out;
+
+ list_add_tail (&lock->list, &pl_inode->blocked_reservelks);
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Blocked",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid,
+ lkowner_utoa (&lock->owner),
+ lock->user_flock.l_start,
+ lock->user_flock.l_len);
+
+
+ goto out;
+ }
+
+ list_add (&lock->list, &pl_inode->reservelk_list);
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static posix_lock_t *
+find_matching_reservelk (posix_lock_t *lock, pl_inode_t *pl_inode)
+{
+ posix_lock_t *l = NULL;
+ list_for_each_entry (l, &pl_inode->reservelk_list, list) {
+ if (reservelks_equal (l, lock))
+ return l;
+ }
+ return NULL;
+}
+
+/* Set F_UNLCK removes a lock which has the exact same lock boundaries
+ * as the UNLCK lock specifies. If such a lock is not found, returns invalid
+ */
+static posix_lock_t *
+__reserve_unlock_lock (xlator_t *this, posix_lock_t *lock, pl_inode_t *pl_inode)
+{
+
+ posix_lock_t *conf = NULL;
+
+ conf = find_matching_reservelk (lock, pl_inode);
+ if (!conf) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ " Matching lock not found for unlock");
+ goto out;
+ }
+ __delete_reserve_lock (conf);
+ gf_log (this->name, GF_LOG_DEBUG,
+ " Matching lock found for unlock");
+
+out:
+ return conf;
+
+
+}
+
+static void
+__grant_blocked_reserve_locks (xlator_t *this, pl_inode_t *pl_inode,
+ struct list_head *granted)
+{
+ int bl_ret = 0;
+ posix_lock_t *bl = NULL;
+ posix_lock_t *tmp = NULL;
+
+ struct list_head blocked_list;
+
+ INIT_LIST_HEAD (&blocked_list);
+ list_splice_init (&pl_inode->blocked_reservelks, &blocked_list);
+
+ list_for_each_entry_safe (bl, tmp, &blocked_list, list) {
+
+ list_del_init (&bl->list);
+
+ bl_ret = __lock_reservelk (this, pl_inode, bl, 1);
+
+ if (bl_ret == 0) {
+ list_add (&bl->list, granted);
+ }
+ }
+ return;
+}
+
+/* Grant all reservelks blocked on lock(s) */
+void
+grant_blocked_reserve_locks (xlator_t *this, pl_inode_t *pl_inode)
+{
+ struct list_head granted;
+ posix_lock_t *lock = NULL;
+ posix_lock_t *tmp = NULL;
+
+ INIT_LIST_HEAD (&granted);
+
+ if (list_empty (&pl_inode->blocked_reservelks)) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "No blocked locks to be granted");
+ return;
+ }
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ __grant_blocked_reserve_locks (this, pl_inode, &granted);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (lock, tmp, &granted, list) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => Granted",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid,
+ lkowner_utoa (&lock->owner),
+ lock->user_flock.l_start,
+ lock->user_flock.l_len);
+
+ STACK_UNWIND_STRICT (lk, lock->frame, 0, 0, &lock->user_flock,
+ NULL);
+ }
+
+}
+
+static void
+__grant_blocked_lock_calls (xlator_t *this, pl_inode_t *pl_inode,
+ struct list_head *granted)
+{
+ int bl_ret = 0;
+ posix_lock_t *bl = NULL;
+ posix_lock_t *tmp = NULL;
+
+ struct list_head blocked_list;
+
+ INIT_LIST_HEAD (&blocked_list);
+ list_splice_init (&pl_inode->blocked_reservelks, &blocked_list);
+
+ list_for_each_entry_safe (bl, tmp, &blocked_list, list) {
+
+ list_del_init (&bl->list);
+
+ bl_ret = pl_verify_reservelk (this, pl_inode, bl, bl->blocked);
+
+ if (bl_ret == 0) {
+ list_add_tail (&bl->list, granted);
+ }
+ }
+ return;
+}
+
+void
+grant_blocked_lock_calls (xlator_t *this, pl_inode_t *pl_inode)
+{
+ struct list_head granted;
+ posix_lock_t *lock = NULL;
+ posix_lock_t *tmp = NULL;
+ fd_t *fd = NULL;
+
+ int can_block = 0;
+ int32_t cmd = 0;
+ int ret = 0;
+
+ if (list_empty (&pl_inode->blocked_calls)) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "No blocked lock calls to be granted");
+ return;
+ }
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ __grant_blocked_lock_calls (this, pl_inode, &granted);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (lock, tmp, &granted, list) {
+ fd = fd_from_fdnum (lock);
+
+ if (lock->blocked) {
+ can_block = 1;
+ cmd = F_SETLKW;
+ }
+ else
+ cmd = F_SETLK;
+
+ lock->blocked = 0;
+ ret = pl_setlk (this, pl_inode, lock, can_block);
+ if (ret == -1) {
+ if (can_block) {
+ pl_trace_block (this, lock->frame, fd, NULL,
+ cmd, &lock->user_flock, NULL);
+ continue;
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG, "returning EAGAIN");
+ pl_trace_out (this, lock->frame, fd, NULL, cmd,
+ &lock->user_flock, -1, EAGAIN, NULL);
+ pl_update_refkeeper (this, fd->inode);
+ STACK_UNWIND_STRICT (lk, lock->frame, -1,
+ EAGAIN, &lock->user_flock,
+ NULL);
+ __destroy_lock (lock);
+ }
+ }
+
+ }
+
+}
+
+
+int
+pl_reserve_unlock (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock)
+{
+ posix_lock_t *retlock = NULL;
+ int ret = -1;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ retlock = __reserve_unlock_lock (this, lock, pl_inode);
+ if (!retlock) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Bad Unlock issued on Inode lock");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "Reservelk Unlock successful");
+ __destroy_reserve_lock (retlock);
+ ret = 0;
+ }
+out:
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ grant_blocked_reserve_locks (this, pl_inode);
+ grant_blocked_lock_calls (this, pl_inode);
+
+ return ret;
+
+}
+
+int
+pl_reserve_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
+ int can_block)
+{
+ int ret = -EINVAL;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+
+ ret = __lock_reservelk (this, pl_inode, lock, can_block);
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => NOK",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid,
+ lkowner_utoa (&lock->owner),
+ lock->user_flock.l_start,
+ lock->user_flock.l_len);
+ else
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => OK",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid,
+ lkowner_utoa (&lock->owner),
+ lock->fl_start,
+ lock->fl_end);
+
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+ return ret;
+}
diff --git a/xlators/features/locks/tests/unit-test.c b/xlators/features/locks/tests/unit-test.c
index fc69ce8a9bd..dec2ba85909 100644
--- a/xlators/features/locks/tests/unit-test.c
+++ b/xlators/features/locks/tests/unit-test.c
@@ -1,27 +1,12 @@
/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "glusterfs.h"
#include "compat.h"
#include "xlator.h"
diff --git a/xlators/features/mac-compat/src/Makefile.am b/xlators/features/mac-compat/src/Makefile.am
index 915c13e308f..1a312991f6b 100644
--- a/xlators/features/mac-compat/src/Makefile.am
+++ b/xlators/features/mac-compat/src/Makefile.am
@@ -1,13 +1,15 @@
xlator_LTLIBRARIES = mac-compat.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
-mac_compat_la_LDFLAGS = -module -avoidversion
+mac_compat_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
mac_compat_la_SOURCES = mac-compat.c
mac_compat_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+noinst_HEADERS = mac-compat.h
-CLEANFILES =
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/mac-compat/src/mac-compat.c b/xlators/features/mac-compat/src/mac-compat.c
index 649d2ad5db4..795a387d484 100644
--- a/xlators/features/mac-compat/src/mac-compat.c
+++ b/xlators/features/mac-compat/src/mac-compat.c
@@ -1,165 +1,212 @@
/*
- Copyright (c) 2008-2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include "xlator.h"
#include "defaults.h"
#include "compat-errno.h"
+#include "syscall.h"
+#include "mem-pool.h"
+#include "mac-compat.h"
-
-enum apple_xattr {
- GF_FINDER_INFO_XATTR,
- GF_RESOURCE_FORK_XATTR,
- GF_XATTR_ALL,
- GF_XATTR_NONE
-};
-
-static char *apple_xattr_name[] = {
- [GF_FINDER_INFO_XATTR] = "com.apple.FinderInfo",
- [GF_RESOURCE_FORK_XATTR] = "com.apple.ResourceFork"
-};
-
-static const char *apple_xattr_value[] = {
- [GF_FINDER_INFO_XATTR] =
- /* 1 2 3 4 5 6 7 8 */
- "\0\0\0\0\0\0\0\0"
- "\0\0\0\0\0\0\0\0"
- "\0\0\0\0\0\0\0\0"
- "\0\0\0\0\0\0\0\0",
- [GF_RESOURCE_FORK_XATTR] = ""
-};
-
-static int32_t apple_xattr_len[] = {
- [GF_FINDER_INFO_XATTR] = 32,
- [GF_RESOURCE_FORK_XATTR] = 1
-};
-
+static int
+dict_key_remove_namespace(dict_t *dict, char *key, data_t *value, void *data)
+{
+ /*
+ char buffer[3*value->len+1];
+ int index = 0;
+ for (index = 0; index < value->len; index++)
+ sprintf(buffer+3*index, " %02x", value->data[index]);
+ */
+ xlator_t *this = (xlator_t *) data;
+ if (strncmp(key, "user.", 5) == 0) {
+ dict_set (dict, key + 5, value);
+ gf_log (this->name, GF_LOG_DEBUG,
+ "remove_namespace_dict: %s -> %s ", key, key + 5);
+ dict_del (dict, key);
+ }
+ return 0;
+}
int32_t
maccomp_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
intptr_t ax = (intptr_t)this->private;
int i = 0;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "getxattr_cbk: dict %p private: %p xdata %p ", dict,
+ this->private, xdata);
+
+ if (dict) {
+ dict_foreach(dict, dict_key_remove_namespace, this);
+ }
+ else {
+ // TODO: we expect dict to exist here, don't know why this
+ // this is needed
+ dict = dict_new();
+ }
+ gf_log (this->name, GF_LOG_DEBUG,
+ "getxattr_cbk: dict %p ax: %ld op_ret %d op_err %d ", dict, ax,
+ op_ret, op_errno);
if ((ax == GF_XATTR_ALL && op_ret >= 0) || ax != GF_XATTR_NONE) {
op_ret = op_errno = 0;
-
for (i = 0; i < GF_XATTR_ALL; i++) {
if (dict_get (dict, apple_xattr_name[i]))
continue;
-
+ /* set dummy data */
+ gf_log (this->name, GF_LOG_DEBUG,
+ "getxattr_cbk: setting dummy data %p, %s", dict,
+ apple_xattr_name[i]);
if (dict_set (dict, apple_xattr_name[i],
bin_to_data ((void *)apple_xattr_value[i],
apple_xattr_len[i])) == -1) {
op_ret = -1;
- op_errno = ENOMEM;
+ op_errno = ENOATTR;
break;
}
}
}
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict);
+static
+int prepend_xattr_user_namespace(dict_t *dict, char *key, data_t *value, void *obj)
+{
+ xlator_t *this = (xlator_t *) obj;
+ dict_t *newdict = (dict_t *) this->private;
+ char *newkey = NULL;
+ gf_add_prefix(XATTR_USER_PREFIX, key, &newkey);
+ key = newkey;
+ dict_set(newdict, (char *)key, value);
+ if (newkey)
+ GF_FREE(newkey);
return 0;
}
-
-int32_t
-maccomp_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
+intptr_t
+check_name(const char *name, char **newkey)
{
intptr_t ax = GF_XATTR_NONE;
- int i = 0;
-
if (name) {
+ int i = 0;
for (i = 0; i < GF_XATTR_ALL; i++) {
if (strcmp (apple_xattr_name[i], name) == 0) {
ax = i;
-
break;
}
}
+ gf_add_prefix("user.", name, newkey);
} else
ax = GF_XATTR_ALL;
+ return ax;
+}
- this->private = (void *)ax;
+int32_t
+maccomp_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ char *newkey = NULL;
+ this->private = (void *) check_name(name, &newkey);
+ gf_log (this->name, GF_LOG_DEBUG,
+ "getxattr: name %s private: %p xdata %p ", name,
+ this->private, xdata);
STACK_WIND (frame, maccomp_getxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->getxattr,
- loc, name);
+ loc, newkey, xdata);
return 0;
}
int32_t
maccomp_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name)
+ const char *name, dict_t *xdata)
{
- intptr_t ax = GF_XATTR_NONE;
- int i = 0;
-
- if (name) {
- for (i = 0; i < GF_XATTR_ALL; i++) {
- if (strcmp (apple_xattr_name[i], name) == 0) {
- ax = i;
-
- break;
- }
- }
- } else
- ax = GF_XATTR_ALL;
-
- this->private = (void *)ax;
+ char *newkey = NULL;
+ this->private = (void *) check_name(name, &newkey);
STACK_WIND (frame, maccomp_getxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fgetxattr,
- fd, name);
+ fd, newkey, xdata);
+ GF_FREE(newkey);
return 0;
}
-
int32_t
maccomp_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
intptr_t ax = (intptr_t)this->private;
if (op_ret == -1 && ax != GF_XATTR_NONE)
op_ret = op_errno = 0;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "setxattr_cbk op_ret %d op_errno %d private: %p xdata %p ",
+ op_ret, op_errno, this->private, xdata);
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
- STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno);
-
+int32_t
+maccomp_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *iatt1,
+ struct iatt *iattr2, dict_t *xdata)
+{
+ gf_log (this->name, GF_LOG_DEBUG,
+ "setattr_cbk op_ret %d op_errno %d private: %p xdata %p ",
+ op_ret, op_errno, this->private, xdata);
+ STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno,
+ iatt1, iattr2, xdata);
return 0;
}
+int map_flags(int flags)
+{
+ /* DARWIN has different defines on XATTR_ flags.
+ There do not seem to be a POSIX standard
+ Parse any other flags over.
+ NOFOLLOW is always true on Linux and Darwin
+ */
+ int linux_flags = flags & ~(GF_XATTR_CREATE | GF_XATTR_REPLACE | XATTR_REPLACE);
+ if (XATTR_CREATE & flags)
+ linux_flags |= GF_XATTR_CREATE;
+ if (XATTR_REPLACE & flags)
+ linux_flags |= GF_XATTR_REPLACE;
+ return linux_flags;
+}
+
+int32_t
+maccomp_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ char *newkey = NULL;
+
+ this->private = (void *) check_name(name, &newkey);
+
+ STACK_WIND (frame, default_fremovexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr,
+ fd, newkey, xdata);
+ GF_FREE(newkey);
+ return 0;
+}
int32_t
maccomp_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
intptr_t ax = GF_XATTR_NONE;
int i = 0;
@@ -171,20 +218,60 @@ maccomp_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
break;
}
}
+ dict_t *newdict = dict_new();
+ this->private = (void *) newdict;
+ dict_foreach(dict, prepend_xattr_user_namespace, this);
this->private = (void *)ax;
-
+ int linux_flags = map_flags(flags);
+ gf_log (this->name, GF_LOG_DEBUG,
+ "setxattr flags: %d -> %d dict %p private: %p xdata %p ",
+ flags, linux_flags, dict, this->private, xdata);
STACK_WIND (frame, maccomp_setxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setxattr,
- loc, dict, flags);
+ loc, newdict, linux_flags, xdata);
+ dict_unref(newdict);
return 0;
}
+int32_t
+maccomp_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *iattr,
+ int32_t flags, dict_t *xdata)
+{
+ gf_log (this->name, GF_LOG_DEBUG,
+ "setattr iattr %p private: %p xdata %p ",
+ iattr, this->private, xdata);
+ STACK_WIND (frame, maccomp_setattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr,
+ loc, iattr, flags, xdata);
+ return 0;
+}
+
+int32_t
+maccomp_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ char *newkey = NULL;
+ this->private = (void *) check_name(name, &newkey);
+
+ STACK_WIND (frame, default_removexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr,
+ loc, newkey, xdata);
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "removeattr name %p private: %p xdata %p ",
+ name, this->private, xdata);
+ GF_FREE(newkey);
+ return 0;
+
+}
int32_t
maccomp_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
intptr_t ax = GF_XATTR_NONE;
int i = 0;
@@ -197,12 +284,20 @@ maccomp_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
}
}
- this->private = (void *)ax;
+ dict_t *newdict = dict_new();
+ this->private = (void *) newdict;
+ dict_foreach(dict, prepend_xattr_user_namespace, this);
+ this->private = (void *)ax;
+ int linux_flags = map_flags(flags);
+ gf_log (this->name, GF_LOG_DEBUG,
+ "fsetxattr flags: %d -> %d dict %p private: %p xdata %p ",
+ flags, linux_flags, dict, this->private, xdata);
STACK_WIND (frame, maccomp_setxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsetxattr,
- fd, dict, flags);
+ fd, newdict, linux_flags, xdata);
+ dict_unref(newdict);
return 0;
}
@@ -233,14 +328,16 @@ fini (xlator_t *this)
struct xlator_fops fops = {
- .getxattr = maccomp_getxattr,
- .fgetxattr = maccomp_fgetxattr,
- .setxattr = maccomp_setxattr,
- .fsetxattr = maccomp_fsetxattr,
+ .getxattr = maccomp_getxattr,
+ .fgetxattr = maccomp_fgetxattr,
+ .setxattr = maccomp_setxattr,
+ .setattr = maccomp_setattr,
+ .fsetxattr = maccomp_fsetxattr,
+ .removexattr = maccomp_removexattr,
+ .fremovexattr = maccomp_fremovexattr,
};
-struct xlator_cbks cbks = {
-};
+struct xlator_cbks cbks;
struct volume_options options[] = {
{ .key = {NULL} },
diff --git a/xlators/features/mac-compat/src/mac-compat.h b/xlators/features/mac-compat/src/mac-compat.h
new file mode 100644
index 00000000000..b033ca0e4d8
--- /dev/null
+++ b/xlators/features/mac-compat/src/mac-compat.h
@@ -0,0 +1,41 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __MAC_COMPAT_H__
+#define __MAC_COMPAT_H__
+
+enum apple_xattr {
+ GF_FINDER_INFO_XATTR,
+ GF_RESOURCE_FORK_XATTR,
+ GF_XATTR_ALL,
+ GF_XATTR_NONE
+};
+
+static char *apple_xattr_name[] = {
+ [GF_FINDER_INFO_XATTR] = "com.apple.FinderInfo",
+ [GF_RESOURCE_FORK_XATTR] = "com.apple.ResourceFork"
+};
+
+static const char *apple_xattr_value[] = {
+ [GF_FINDER_INFO_XATTR] =
+ /* 1 2 3 4 5 6 7 8 */
+ "\0\0\0\0\0\0\0\0"
+ "\0\0\0\0\0\0\0\0"
+ "\0\0\0\0\0\0\0\0"
+ "\0\0\0\0\0\0\0\0",
+ [GF_RESOURCE_FORK_XATTR] = ""
+};
+
+static int32_t apple_xattr_len[] = {
+ [GF_FINDER_INFO_XATTR] = 32,
+ [GF_RESOURCE_FORK_XATTR] = 1
+};
+
+#endif /* __MAC_COMPAT_H__ */
diff --git a/xlators/features/marker/Makefile.am b/xlators/features/marker/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/features/marker/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/marker/src/Makefile.am b/xlators/features/marker/src/Makefile.am
new file mode 100644
index 00000000000..0465b02012e
--- /dev/null
+++ b/xlators/features/marker/src/Makefile.am
@@ -0,0 +1,17 @@
+xlator_LTLIBRARIES = marker.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+marker_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+marker_la_SOURCES = marker.c marker-quota.c marker-quota-helper.c marker-common.c
+marker_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = marker-mem-types.h marker.h marker-quota.h marker-quota-helper.h marker-common.h $(top_builddir)/xlators/lib/src/libxlator.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/xlators/lib/src
+
+AM_CFLAGS = -Wall -fno-strict-aliasing $(GF_CFLAGS)
+
+CLEANFILES =
+
diff --git a/xlators/features/marker/src/marker-common.c b/xlators/features/marker/src/marker-common.c
new file mode 100644
index 00000000000..6ec5e3cc8bc
--- /dev/null
+++ b/xlators/features/marker/src/marker-common.c
@@ -0,0 +1,65 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <fnmatch.h>
+#include "marker-common.h"
+
+marker_inode_ctx_t *
+marker_inode_ctx_new ()
+{
+ marker_inode_ctx_t *ctx = NULL;
+
+ ctx = GF_CALLOC (1, sizeof (marker_inode_ctx_t),
+ gf_marker_mt_marker_inode_ctx_t);
+ if (ctx == NULL)
+ goto out;
+
+ ctx->quota_ctx = NULL;
+out:
+ return ctx;
+}
+
+int32_t
+marker_force_inode_ctx_get (inode_t *inode, xlator_t *this,
+ marker_inode_ctx_t **ctx)
+{
+ int32_t ret = -1;
+ uint64_t ctx_int = 0;
+
+ LOCK (&inode->lock);
+ {
+ ret = __inode_ctx_get (inode, this, &ctx_int);
+ if (ret == 0)
+ *ctx = (marker_inode_ctx_t *) (unsigned long)ctx_int;
+ else {
+ *ctx = marker_inode_ctx_new ();
+ if (*ctx == NULL)
+ goto unlock;
+
+ ret = __inode_ctx_put (inode, this,
+ (uint64_t )(unsigned long) *ctx);
+ if (ret == -1) {
+ GF_FREE (*ctx);
+ goto unlock;
+ }
+ ret = 0;
+ }
+ }
+unlock: UNLOCK (&inode->lock);
+
+ return ret;
+}
+
+int
+marker_filter_quota_xattr (dict_t *dict, char *key,
+ data_t *value, void *data)
+{
+ dict_del (dict, key);
+ return 0;
+}
diff --git a/xlators/features/marker/src/marker-common.h b/xlators/features/marker/src/marker-common.h
new file mode 100644
index 00000000000..c6ca422dd6a
--- /dev/null
+++ b/xlators/features/marker/src/marker-common.h
@@ -0,0 +1,22 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _MARKER_COMMON_H
+#define _MARKER_COMMON_H
+
+#include "inode.h"
+#include "xlator.h"
+#include "marker.h"
+
+int32_t
+marker_force_inode_ctx_get (inode_t *, xlator_t *, marker_inode_ctx_t **);
+
+int
+marker_filter_quota_xattr (dict_t *, char *, data_t *, void *);
+#endif
diff --git a/xlators/features/marker/src/marker-mem-types.h b/xlators/features/marker/src/marker-mem-types.h
new file mode 100644
index 00000000000..dc5ad16ed76
--- /dev/null
+++ b/xlators/features/marker/src/marker-mem-types.h
@@ -0,0 +1,27 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef __MARKER_MEM_TYPES_H__
+#define __MARKER_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_marker_mem_types_ {
+ gf_marker_mt_marker_conf_t = gf_common_mt_end + 1,
+ gf_marker_mt_loc_t,
+ gf_marker_mt_volume_mark,
+ gf_marker_mt_int64_t,
+ gf_marker_mt_quota_inode_ctx_t,
+ gf_marker_mt_marker_inode_ctx_t,
+ gf_marker_mt_inode_contribution_t,
+ gf_marker_mt_quota_meta_t,
+ gf_marker_mt_quota_synctask_t,
+ gf_marker_mt_end
+};
+#endif
diff --git a/xlators/features/marker/src/marker-quota-helper.c b/xlators/features/marker/src/marker-quota-helper.c
new file mode 100644
index 00000000000..1fed9df6d6a
--- /dev/null
+++ b/xlators/features/marker/src/marker-quota-helper.c
@@ -0,0 +1,481 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "locking.h"
+#include "marker-quota.h"
+#include "marker-common.h"
+#include "marker-quota-helper.h"
+#include "marker-mem-types.h"
+
+int
+mq_loc_fill (loc_t *loc, inode_t *inode, inode_t *parent, char *path)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("marker", loc, out);
+ GF_VALIDATE_OR_GOTO ("marker", inode, out);
+ GF_VALIDATE_OR_GOTO ("marker", path, out);
+ /* Not checking for parent because while filling
+ * loc of root, parent will be NULL
+ */
+
+ if (inode) {
+ loc->inode = inode_ref (inode);
+ }
+
+ if (parent)
+ loc->parent = inode_ref (parent);
+
+ if (!gf_uuid_is_null (inode->gfid))
+ gf_uuid_copy (loc->gfid, inode->gfid);
+
+ loc->path = gf_strdup (path);
+ if (!loc->path) {
+ gf_log ("loc fill", GF_LOG_ERROR, "strdup failed");
+ goto out;
+ }
+
+ loc->name = strrchr (loc->path, '/');
+ if (loc->name)
+ loc->name++;
+ else
+ goto out;
+
+ ret = 0;
+
+out:
+ if (ret < 0)
+ loc_wipe (loc);
+
+ return ret;
+}
+
+
+int32_t
+mq_inode_loc_fill (const char *parent_gfid, inode_t *inode, loc_t *loc)
+{
+ char *resolvedpath = NULL;
+ inode_t *parent = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ xlator_t *this = NULL;
+ int ret = -1;
+
+ this = THIS;
+
+ if (inode == NULL) {
+ gf_log_callingfn ("marker", GF_LOG_ERROR, "loc fill failed, "
+ "inode is NULL");
+ return ret;
+ }
+
+ if (loc == NULL)
+ return ret;
+
+ if ((inode) && __is_root_gfid (inode->gfid)) {
+ loc->parent = NULL;
+ goto ignore_parent;
+ }
+
+ if (parent_gfid == NULL)
+ parent = inode_parent (inode, 0, NULL);
+ else
+ parent = inode_find (inode->table,
+ (unsigned char *) parent_gfid);
+
+ if (parent == NULL) {
+ gf_log ("marker", GF_LOG_ERROR, "parent is NULL for %s",
+ uuid_utoa(inode->gfid));
+ goto err;
+ }
+
+ignore_parent:
+ ret = inode_path (inode, NULL, &resolvedpath);
+ if (ret < 0) {
+ gf_log ("marker", GF_LOG_ERROR, "failed to resolve path for %s",
+ uuid_utoa(inode->gfid));
+ goto err;
+ }
+
+ ret = mq_loc_fill (loc, inode, parent, resolvedpath);
+ if (ret < 0)
+ goto err;
+
+ ret = mq_inode_ctx_get (inode, this, &ctx);
+ if (ret < 0 || ctx == NULL)
+ ctx = mq_inode_ctx_new (inode, this);
+ if (ctx == NULL) {
+ gf_log (this->name, GF_LOG_WARNING, "mq_inode_ctx_new "
+ "failed for %s", uuid_utoa (inode->gfid));
+ ret = -1;
+ goto err;
+ }
+ ret = 0;
+
+err:
+ if (parent)
+ inode_unref (parent);
+
+ GF_FREE (resolvedpath);
+
+ return ret;
+}
+
+
+quota_inode_ctx_t *
+mq_alloc_inode_ctx ()
+{
+ int32_t ret = -1;
+ quota_inode_ctx_t *ctx = NULL;
+
+ QUOTA_ALLOC (ctx, quota_inode_ctx_t, ret);
+ if (ret == -1)
+ goto out;
+
+ ctx->size = 0;
+ ctx->dirty = 0;
+ ctx->updation_status = _gf_false;
+ LOCK_INIT (&ctx->lock);
+ INIT_LIST_HEAD (&ctx->contribution_head);
+out:
+ return ctx;
+}
+
+void
+mq_contri_fini (void *data)
+{
+ inode_contribution_t *contri = data;
+
+ LOCK_DESTROY (&contri->lock);
+ GF_FREE (contri);
+}
+
+inode_contribution_t*
+mq_contri_init (inode_t *inode)
+{
+ inode_contribution_t *contri = NULL;
+ int32_t ret = 0;
+
+ QUOTA_ALLOC (contri, inode_contribution_t, ret);
+ if (ret == -1)
+ goto out;
+
+ GF_REF_INIT (contri, mq_contri_fini);
+
+ contri->contribution = 0;
+ contri->file_count = 0;
+ contri->dir_count = 0;
+ gf_uuid_copy (contri->gfid, inode->gfid);
+
+ LOCK_INIT (&contri->lock);
+ INIT_LIST_HEAD (&contri->contri_list);
+
+out:
+ return contri;
+}
+
+inode_contribution_t *
+mq_get_contribution_node (inode_t *inode, quota_inode_ctx_t *ctx)
+{
+ inode_contribution_t *contri = NULL;
+ inode_contribution_t *temp = NULL;
+
+ if (!inode || !ctx)
+ goto out;
+
+ LOCK (&ctx->lock);
+ {
+ if (list_empty (&ctx->contribution_head))
+ goto unlock;
+
+ list_for_each_entry (temp, &ctx->contribution_head,
+ contri_list) {
+ if (gf_uuid_compare (temp->gfid, inode->gfid) == 0) {
+ contri = temp;
+ GF_REF_GET (contri);
+ break;
+ }
+ }
+ }
+unlock:
+ UNLOCK (&ctx->lock);
+
+out:
+ return contri;
+}
+
+inode_contribution_t *
+__mq_add_new_contribution_node (xlator_t *this, quota_inode_ctx_t *ctx,
+ loc_t *loc)
+{
+ inode_contribution_t *contribution = NULL;
+
+ if (!loc->parent) {
+ if (!gf_uuid_is_null (loc->pargfid))
+ loc->parent = inode_find (loc->inode->table,
+ loc->pargfid);
+
+ if (!loc->parent)
+ loc->parent = inode_parent (loc->inode, loc->pargfid,
+ loc->name);
+ if (!loc->parent)
+ goto out;
+ }
+
+ list_for_each_entry (contribution, &ctx->contribution_head,
+ contri_list) {
+ if (loc->parent &&
+ gf_uuid_compare (contribution->gfid, loc->parent->gfid) == 0) {
+ goto out;
+ }
+ }
+
+ contribution = mq_contri_init (loc->parent);
+ if (contribution == NULL)
+ goto out;
+
+ list_add_tail (&contribution->contri_list, &ctx->contribution_head);
+
+out:
+ return contribution;
+}
+
+
+inode_contribution_t *
+mq_add_new_contribution_node (xlator_t *this, quota_inode_ctx_t *ctx,
+ loc_t *loc)
+{
+ inode_contribution_t *contribution = NULL;
+
+ if ((ctx == NULL) || (loc == NULL))
+ return NULL;
+
+ if (((loc->path) && (strcmp (loc->path, "/") == 0))
+ || (!loc->path && gf_uuid_is_null (loc->pargfid)))
+ return NULL;
+
+ LOCK (&ctx->lock);
+ {
+ contribution = __mq_add_new_contribution_node (this, ctx, loc);
+ if (contribution)
+ GF_REF_GET (contribution);
+ }
+ UNLOCK (&ctx->lock);
+
+ return contribution;
+}
+
+
+int32_t
+mq_dict_set_contribution (xlator_t *this, dict_t *dict, loc_t *loc,
+ uuid_t gfid, char *contri_key)
+{
+ int32_t ret = -1;
+ char key[QUOTA_KEY_MAX] = {0, };
+
+ GF_VALIDATE_OR_GOTO ("marker", this, out);
+ GF_VALIDATE_OR_GOTO ("marker", dict, out);
+ GF_VALIDATE_OR_GOTO ("marker", loc, out);
+
+ if (gfid && !gf_uuid_is_null(gfid)) {
+ GET_CONTRI_KEY (this, key, gfid, ret);
+ } else if (loc->parent) {
+ GET_CONTRI_KEY (this, key, loc->parent->gfid, ret);
+ } else {
+ /* nameless lookup, fetch contributions to all parents */
+ GET_CONTRI_KEY (this, key, NULL, ret);
+ }
+
+ if (ret < 0)
+ goto out;
+
+ ret = dict_set_int64 (dict, key, 0);
+ if (ret < 0)
+ goto out;
+
+ if (contri_key)
+ strncpy (contri_key, key, QUOTA_KEY_MAX);
+
+out:
+ if (ret < 0)
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "dict set failed");
+
+ return ret;
+}
+
+
+int32_t
+mq_inode_ctx_get (inode_t *inode, xlator_t *this,
+ quota_inode_ctx_t **ctx)
+{
+ int32_t ret = -1;
+ uint64_t ctx_int = 0;
+ marker_inode_ctx_t *mark_ctx = NULL;
+
+ GF_VALIDATE_OR_GOTO ("marker", inode, out);
+ GF_VALIDATE_OR_GOTO ("marker", this, out);
+ GF_VALIDATE_OR_GOTO ("marker", ctx, out);
+
+ ret = inode_ctx_get (inode, this, &ctx_int);
+ if (ret < 0) {
+ ret = -1;
+ *ctx = NULL;
+ goto out;
+ }
+
+ mark_ctx = (marker_inode_ctx_t *) (unsigned long)ctx_int;
+ if (mark_ctx->quota_ctx == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ *ctx = mark_ctx->quota_ctx;
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+
+quota_inode_ctx_t *
+__mq_inode_ctx_new (inode_t *inode, xlator_t *this)
+{
+ int32_t ret = -1;
+ quota_inode_ctx_t *quota_ctx = NULL;
+ marker_inode_ctx_t *mark_ctx = NULL;
+
+ ret = marker_force_inode_ctx_get (inode, this, &mark_ctx);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "marker_force_inode_ctx_get() failed");
+ goto out;
+ }
+
+ LOCK (&inode->lock);
+ {
+ if (mark_ctx->quota_ctx == NULL) {
+ quota_ctx = mq_alloc_inode_ctx ();
+ if (quota_ctx == NULL) {
+ ret = -1;
+ goto unlock;
+ }
+ mark_ctx->quota_ctx = quota_ctx;
+ } else {
+ quota_ctx = mark_ctx->quota_ctx;
+ }
+
+ ret = 0;
+ }
+unlock:
+ UNLOCK (&inode->lock);
+out:
+ return quota_ctx;
+}
+
+
+quota_inode_ctx_t *
+mq_inode_ctx_new (inode_t * inode, xlator_t *this)
+{
+ return __mq_inode_ctx_new (inode, this);
+}
+
+quota_local_t *
+mq_local_new ()
+{
+ quota_local_t *local = NULL;
+
+ local = mem_get0 (THIS->local_pool);
+ if (!local)
+ goto out;
+
+ local->ref = 1;
+ LOCK_INIT (&local->lock);
+
+ local->ctx = NULL;
+ local->contri = NULL;
+
+out:
+ return local;
+}
+
+quota_local_t *
+mq_local_ref (quota_local_t *local)
+{
+ LOCK (&local->lock);
+ {
+ local->ref ++;
+ }
+ UNLOCK (&local->lock);
+
+ return local;
+}
+
+
+int32_t
+mq_local_unref (xlator_t *this, quota_local_t *local)
+{
+ int32_t ref = 0;
+ if (local == NULL)
+ goto out;
+
+ QUOTA_SAFE_DECREMENT (&local->lock, local->ref, ref);
+
+ if (ref != 0)
+ goto out;
+
+ if (local->fd != NULL)
+ fd_unref (local->fd);
+
+ if (local->contri)
+ GF_REF_PUT (local->contri);
+
+ if (local->xdata)
+ dict_unref (local->xdata);
+
+ loc_wipe (&local->loc);
+
+ loc_wipe (&local->parent_loc);
+
+ LOCK_DESTROY (&local->lock);
+
+ mem_put (local);
+out:
+ return 0;
+}
+
+
+inode_contribution_t *
+mq_get_contribution_from_loc (xlator_t *this, loc_t *loc)
+{
+ int32_t ret = 0;
+ quota_inode_ctx_t *ctx = NULL;
+ inode_contribution_t *contribution = NULL;
+
+ ret = mq_inode_ctx_get (loc->inode, this, &ctx);
+ if (ret < 0) {
+ gf_log_callingfn (this->name, GF_LOG_WARNING,
+ "cannot get marker-quota context from inode "
+ "(gfid:%s, path:%s)",
+ uuid_utoa (loc->inode->gfid), loc->path);
+ goto err;
+ }
+
+ contribution = mq_get_contribution_node (loc->parent, ctx);
+ if (contribution == NULL) {
+ gf_log_callingfn (this->name, GF_LOG_WARNING,
+ "inode (gfid:%s, path:%s) has "
+ "no contribution towards parent (gfid:%s)",
+ uuid_utoa (loc->inode->gfid),
+ loc->path, uuid_utoa (loc->parent->gfid));
+ goto err;
+ }
+
+err:
+ return contribution;
+}
diff --git a/xlators/features/marker/src/marker-quota-helper.h b/xlators/features/marker/src/marker-quota-helper.h
new file mode 100644
index 00000000000..bf417aa8241
--- /dev/null
+++ b/xlators/features/marker/src/marker-quota-helper.h
@@ -0,0 +1,81 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _MARKER_QUOTA_HELPER_H
+#define _MARKER_QUOTA_HELPER_H
+
+#include "marker.h"
+
+#define QUOTA_FREE_CONTRIBUTION_NODE(ctx, _contribution) \
+ do { \
+ LOCK (&ctx->lock); \
+ { \
+ list_del_init (&_contribution->contri_list); \
+ GF_REF_PUT (_contribution); \
+ } \
+ UNLOCK (&ctx->lock); \
+ } while (0)
+
+#define QUOTA_SAFE_INCREMENT(lock, var) \
+ do { \
+ LOCK (lock); \
+ var ++; \
+ UNLOCK (lock); \
+ } while (0)
+
+#define QUOTA_SAFE_DECREMENT(lock, var, value) \
+ do { \
+ LOCK (lock); \
+ { \
+ value = --var; \
+ } \
+ UNLOCK (lock); \
+ } while (0)
+
+inode_contribution_t *
+mq_add_new_contribution_node (xlator_t *, quota_inode_ctx_t *, loc_t *);
+
+int32_t
+mq_dict_set_contribution (xlator_t *, dict_t *, loc_t *, uuid_t, char *);
+
+quota_inode_ctx_t *
+mq_inode_ctx_new (inode_t *, xlator_t *);
+
+int32_t
+mq_inode_ctx_get (inode_t *, xlator_t *, quota_inode_ctx_t **);
+
+int32_t
+mq_delete_contribution_node (dict_t *, char *, inode_contribution_t *);
+
+int32_t
+mq_inode_loc_fill (const char *, inode_t *, loc_t *);
+
+quota_local_t *
+mq_local_new ();
+
+quota_local_t *
+mq_local_ref (quota_local_t *);
+
+int32_t
+mq_local_unref (xlator_t *, quota_local_t *);
+
+void
+mq_contri_fini (void *data);
+
+inode_contribution_t*
+mq_contri_init (inode_t *inode);
+
+inode_contribution_t *
+mq_get_contribution_node (inode_t *, quota_inode_ctx_t *);
+
+inode_contribution_t *
+mq_get_contribution_from_loc (xlator_t *this, loc_t *loc);
+
+#endif
diff --git a/xlators/features/marker/src/marker-quota.c b/xlators/features/marker/src/marker-quota.c
new file mode 100644
index 00000000000..902b8e5c272
--- /dev/null
+++ b/xlators/features/marker/src/marker-quota.c
@@ -0,0 +1,2189 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "dict.h"
+#include "xlator.h"
+#include "defaults.h"
+#include "libxlator.h"
+#include "common-utils.h"
+#include "byte-order.h"
+#include "marker-quota.h"
+#include "marker-quota-helper.h"
+#include "syncop.h"
+#include "quota-common-utils.h"
+
+int
+mq_loc_copy (loc_t *dst, loc_t *src)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("marker", dst, out);
+ GF_VALIDATE_OR_GOTO ("marker", src, out);
+
+ if (src->inode == NULL ||
+ ((src->parent == NULL) && (gf_uuid_is_null (src->pargfid))
+ && !__is_root_gfid (src->inode->gfid))) {
+ gf_log ("marker", GF_LOG_WARNING,
+ "src loc is not valid");
+ goto out;
+ }
+
+ ret = loc_copy (dst, src);
+out:
+ return ret;
+}
+
+static void
+mq_set_ctx_status (quota_inode_ctx_t *ctx, gf_boolean_t *flag,
+ gf_boolean_t status)
+{
+ LOCK (&ctx->lock);
+ {
+ *flag = status;
+ }
+ UNLOCK (&ctx->lock);
+}
+
+static void
+mq_test_and_set_ctx_status (quota_inode_ctx_t *ctx, gf_boolean_t *flag,
+ gf_boolean_t *status)
+{
+ gf_boolean_t temp = _gf_false;
+
+ LOCK (&ctx->lock);
+ {
+ temp = *status;
+ *status = *flag;
+ *flag = temp;
+ }
+ UNLOCK (&ctx->lock);
+}
+
+static void
+mq_get_ctx_status (quota_inode_ctx_t *ctx, gf_boolean_t *flag,
+ gf_boolean_t *status)
+{
+ LOCK (&ctx->lock);
+ {
+ *status = *flag;
+ }
+ UNLOCK (&ctx->lock);
+}
+
+int32_t
+mq_get_ctx_updation_status (quota_inode_ctx_t *ctx,
+ gf_boolean_t *status)
+{
+ GF_VALIDATE_OR_GOTO ("marker", ctx, out);
+ GF_VALIDATE_OR_GOTO ("marker", status, out);
+
+ mq_get_ctx_status (ctx, &ctx->updation_status, status);
+ return 0;
+out:
+ return -1;
+}
+
+int32_t
+mq_set_ctx_updation_status (quota_inode_ctx_t *ctx,
+ gf_boolean_t status)
+{
+ GF_VALIDATE_OR_GOTO ("marker", ctx, out);
+
+ mq_set_ctx_status (ctx, &ctx->updation_status, status);
+ return 0;
+out:
+ return -1;
+}
+
+int32_t
+mq_test_and_set_ctx_updation_status (quota_inode_ctx_t *ctx,
+ gf_boolean_t *status)
+{
+ GF_VALIDATE_OR_GOTO ("marker", ctx, out);
+ GF_VALIDATE_OR_GOTO ("marker", status, out);
+
+ mq_test_and_set_ctx_status (ctx, &ctx->updation_status, status);
+ return 0;
+out:
+ return -1;
+}
+
+int32_t
+mq_set_ctx_create_status (quota_inode_ctx_t *ctx,
+ gf_boolean_t status)
+{
+ GF_VALIDATE_OR_GOTO ("marker", ctx, out);
+
+ mq_set_ctx_status (ctx, &ctx->create_status, status);
+ return 0;
+out:
+ return -1;
+}
+
+int32_t
+mq_test_and_set_ctx_create_status (quota_inode_ctx_t *ctx,
+ gf_boolean_t *status)
+{
+ GF_VALIDATE_OR_GOTO ("marker", ctx, out);
+ GF_VALIDATE_OR_GOTO ("marker", status, out);
+
+ mq_test_and_set_ctx_status (ctx, &ctx->create_status, status);
+ return 0;
+out:
+ return -1;
+}
+
+int32_t
+mq_set_ctx_dirty_status (quota_inode_ctx_t *ctx,
+ gf_boolean_t status)
+{
+ GF_VALIDATE_OR_GOTO ("marker", ctx, out);
+
+ mq_set_ctx_status (ctx, &ctx->dirty_status, status);
+ return 0;
+out:
+ return -1;
+}
+
+int32_t
+mq_test_and_set_ctx_dirty_status (quota_inode_ctx_t *ctx,
+ gf_boolean_t *status)
+{
+ GF_VALIDATE_OR_GOTO ("marker", ctx, out);
+ GF_VALIDATE_OR_GOTO ("marker", status, out);
+
+ mq_test_and_set_ctx_status (ctx, &ctx->dirty_status, status);
+ return 0;
+out:
+ return -1;
+}
+
+int
+mq_build_ancestry (xlator_t *this, loc_t *loc)
+{
+ int32_t ret = -1;
+ fd_t *fd = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t *entry = NULL;
+ dict_t *xdata = NULL;
+ inode_t *tmp_parent = NULL;
+ inode_t *tmp_inode = NULL;
+ inode_t *linked_inode = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+
+ INIT_LIST_HEAD (&entries.list);
+
+ xdata = dict_new ();
+ if (xdata == NULL) {
+ gf_log (this->name, GF_LOG_ERROR, "dict_new failed");
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = dict_set_int8 (xdata, GET_ANCESTRY_DENTRY_KEY, 1);
+ if (ret < 0)
+ goto out;
+
+ fd = fd_anonymous (loc->inode);
+ if (fd == NULL) {
+ gf_log (this->name, GF_LOG_ERROR, "fd creation failed");
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ fd_bind (fd);
+
+ ret = syncop_readdirp (this, fd, 131072, 0, &entries, xdata, NULL);
+ if (ret < 0) {
+ gf_log (this->name, (-ret == ENOENT || -ret == ESTALE)
+ ? GF_LOG_DEBUG:GF_LOG_ERROR, "readdirp failed "
+ "for %s: %s", loc->path, strerror (-ret));
+ goto out;
+ }
+
+ if (list_empty (&entries.list)) {
+ ret = -1;
+ goto out;
+ }
+
+ list_for_each_entry (entry, &entries.list, list) {
+ if (__is_root_gfid (entry->inode->gfid)) {
+ tmp_parent = NULL;
+ } else {
+ linked_inode = inode_link (entry->inode, tmp_parent,
+ entry->d_name,
+ &entry->d_stat);
+ if (linked_inode) {
+ tmp_inode = entry->inode;
+ entry->inode = linked_inode;
+ inode_unref (tmp_inode);
+ } else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "inode link failed");
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ ctx = mq_inode_ctx_new (entry->inode, this);
+ if (ctx == NULL) {
+ gf_log (this->name, GF_LOG_WARNING, "mq_inode_ctx_new "
+ "failed for %s",
+ uuid_utoa (entry->inode->gfid));
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ tmp_parent = entry->inode;
+ }
+
+ if (loc->parent)
+ inode_unref (loc->parent);
+
+ loc->parent = inode_parent (loc->inode, 0, NULL);
+ if (loc->parent == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ gf_dirent_free (&entries);
+
+ if (fd)
+ fd_unref (fd);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return ret;
+}
+
+
+/* This function should be used only in inspect_directory and inspect_file
+ * function to heal quota xattrs.
+ * Inode quota feature is introduced in 3.7.
+ * If gluster setup is upgraded from 3.6 to 3.7, there can be a
+ * getxattr and setxattr spikes with quota heal as inode quota is missing.
+ * So this wrapper function is to avoid xattrs spikes during upgrade.
+ * This function returns success even is inode-quota xattrs are missing and
+ * hence no healing performed.
+ */
+int32_t
+_quota_dict_get_meta (xlator_t *this, dict_t *dict, char *key,
+ quota_meta_t *meta, ia_type_t ia_type,
+ gf_boolean_t add_delta)
+{
+ int32_t ret = 0;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ ret = quota_dict_get_inode_meta (dict, key, meta);
+ if (ret == -2 && (priv->feature_enabled & GF_INODE_QUOTA) == 0) {
+ /* quota_dict_get_inode_meta returns -2 if
+ * inode quota xattrs are not present.
+ * if inode quota self heal is turned off,
+ * then we should skip healing inode quotas
+ */
+
+ gf_log (this->name, GF_LOG_DEBUG, "inode quota disabled. "
+ "inode quota self heal will not be performed");
+ ret = 0;
+ if (add_delta) {
+ if (ia_type == IA_IFDIR)
+ meta->dir_count = 1;
+ else
+ meta->file_count = 1;
+ }
+ }
+
+ return ret;
+}
+
+int32_t
+quota_dict_set_size_meta (xlator_t *this, dict_t *dict,
+ const quota_meta_t *meta)
+{
+ int32_t ret = -ENOMEM;
+ quota_meta_t *value = NULL;
+ char size_key[QUOTA_KEY_MAX] = {0, };
+
+ value = GF_CALLOC (2, sizeof (quota_meta_t), gf_common_quota_meta_t);
+ if (value == NULL) {
+ goto out;
+ }
+ value[0].size = hton64 (meta->size);
+ value[0].file_count = hton64 (meta->file_count);
+ value[0].dir_count = hton64 (meta->dir_count);
+
+ value[1].size = 0;
+ value[1].file_count = 0;
+ value[1].dir_count = hton64 (1);
+
+ GET_SIZE_KEY (this, size_key, ret);
+ if (ret < 0)
+ goto out;
+ ret = dict_set_bin (dict, size_key, value,
+ (sizeof (quota_meta_t) * 2));
+ if (ret < 0) {
+ gf_log_callingfn ("quota", GF_LOG_ERROR, "dict set failed");
+ GF_FREE (value);
+ }
+out:
+ return ret;
+}
+
+void
+mq_compute_delta (quota_meta_t *delta, const quota_meta_t *op1,
+ const quota_meta_t *op2)
+{
+ delta->size = op1->size - op2->size;
+ delta->file_count = op1->file_count - op2->file_count;
+ delta->dir_count = op1->dir_count - op2->dir_count;
+}
+
+void
+mq_add_meta (quota_meta_t *dst, const quota_meta_t *src)
+{
+ dst->size += src->size;
+ dst->file_count += src->file_count;
+ dst->dir_count += src->dir_count;
+}
+
+void
+mq_sub_meta (quota_meta_t *dst, const quota_meta_t *src)
+{
+ if (src == NULL) {
+ dst->size = -dst->size;
+ dst->file_count = -dst->file_count;
+ dst->dir_count = -dst->dir_count;
+ } else {
+ dst->size = src->size - dst->size;
+ dst->file_count = src->file_count - dst->file_count;
+ dst->dir_count = src->dir_count - dst->dir_count;
+ }
+}
+
+int32_t
+mq_are_xattrs_set (xlator_t *this, loc_t *loc, gf_boolean_t *contri_set,
+ gf_boolean_t *size_set)
+{
+ int32_t ret = -1;
+ char contri_key[QUOTA_KEY_MAX] = {0, };
+ char size_key[QUOTA_KEY_MAX] = {0, };
+ quota_meta_t meta = {0, };
+ struct iatt stbuf = {0,};
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
+
+ dict = dict_new ();
+ if (dict == NULL) {
+ gf_log (this->name, GF_LOG_ERROR, "dict_new failed");
+ goto out;
+ }
+
+ ret = mq_req_xattr (this, loc, dict, contri_key, size_key);
+ if (ret < 0)
+ goto out;
+
+ ret = syncop_lookup (FIRST_CHILD(this), loc, &stbuf, NULL,
+ dict, &rsp_dict);
+ if (ret < 0) {
+ gf_log_callingfn (this->name, (-ret == ENOENT || -ret == ESTALE)
+ ? GF_LOG_DEBUG:GF_LOG_ERROR, "lookup failed "
+ "for %s: %s", loc->path, strerror (-ret));
+ goto out;
+ }
+
+ if (rsp_dict == NULL)
+ goto out;
+
+ *contri_set = _gf_true;
+ *size_set = _gf_true;
+ if (loc->inode->ia_type == IA_IFDIR) {
+ ret = quota_dict_get_inode_meta (rsp_dict, size_key, &meta);
+ if (ret < 0 || meta.dir_count == 0)
+ *size_set = _gf_false;
+ }
+
+ if (!loc_is_root(loc)) {
+ ret = quota_dict_get_inode_meta (rsp_dict, contri_key, &meta);
+ if (ret < 0)
+ *contri_set = _gf_false;
+ }
+
+ ret = 0;
+out:
+ if (dict)
+ dict_unref (dict);
+
+ if (rsp_dict)
+ dict_unref (rsp_dict);
+
+ return ret;
+}
+
+int32_t
+mq_create_size_xattrs (xlator_t *this, quota_inode_ctx_t *ctx, loc_t *loc)
+{
+ int32_t ret = -1;
+ quota_meta_t size = {0, };
+ dict_t *dict = NULL;
+
+ GF_VALIDATE_OR_GOTO ("marker", loc, out);
+ GF_VALIDATE_OR_GOTO ("marker", loc->inode, out);
+
+ if (loc->inode->ia_type != IA_IFDIR) {
+ ret = 0;
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict) {
+ gf_log (this->name, GF_LOG_ERROR, "dict_new failed");
+ ret = -1;
+ goto out;
+ }
+
+ ret = quota_dict_set_size_meta (this, dict, &size);
+ if (ret < 0)
+ goto out;
+
+ ret = syncop_xattrop (FIRST_CHILD(this), loc,
+ GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT, dict, NULL,
+ NULL);
+
+ if (ret < 0) {
+ gf_log_callingfn (this->name, (-ret == ENOENT || -ret == ESTALE)
+ ? GF_LOG_DEBUG:GF_LOG_ERROR, "xattrop failed "
+ "for %s: %s", loc->path, strerror (-ret));
+ goto out;
+ }
+
+out:
+ if (dict)
+ dict_unref (dict);
+
+ return ret;
+}
+
+int32_t
+mq_lock (xlator_t *this, loc_t *loc, short l_type)
+{
+ struct gf_flock lock = {0, };
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("marker", loc, out);
+ GF_VALIDATE_OR_GOTO ("marker", loc->inode, out);
+
+ gf_log (this->name, GF_LOG_DEBUG, "set lock type %d on %s",
+ l_type, loc->path);
+
+ lock.l_len = 0;
+ lock.l_start = 0;
+ lock.l_type = l_type;
+ lock.l_whence = SEEK_SET;
+
+ ret = syncop_inodelk (FIRST_CHILD(this), this->name, loc, F_SETLKW,
+ &lock, NULL, NULL);
+ if (ret < 0)
+ gf_log_callingfn (this->name, (-ret == ENOENT || -ret == ESTALE)
+ ? GF_LOG_DEBUG:GF_LOG_ERROR, "inodelk failed "
+ "for %s: %s", loc->path, strerror (-ret));
+
+out:
+
+ return ret;
+}
+
+int32_t
+mq_get_dirty (xlator_t *this, loc_t *loc, int32_t *dirty)
+{
+ int32_t ret = -1;
+ int8_t value = 0;
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
+ struct iatt stbuf = {0,};
+
+ dict = dict_new ();
+ if (dict == NULL) {
+ gf_log (this->name, GF_LOG_ERROR, "dict_new failed");
+ goto out;
+ }
+
+ ret = dict_set_int64 (dict, QUOTA_DIRTY_KEY, 0);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING, "dict set failed");
+ goto out;
+ }
+
+ ret = syncop_lookup (FIRST_CHILD(this), loc, &stbuf, NULL,
+ dict, &rsp_dict);
+ if (ret < 0) {
+ gf_log_callingfn (this->name, (-ret == ENOENT || -ret == ESTALE)
+ ? GF_LOG_DEBUG:GF_LOG_ERROR, "lookup failed "
+ "for %s: %s", loc->path, strerror (-ret));
+ goto out;
+ }
+
+ ret = dict_get_int8 (rsp_dict, QUOTA_DIRTY_KEY, &value);
+ if (ret < 0)
+ goto out;
+
+ *dirty = value;
+
+out:
+ if (dict)
+ dict_unref (dict);
+
+ if (rsp_dict)
+ dict_unref (rsp_dict);
+
+ return ret;
+}
+
+int32_t
+mq_get_set_dirty (xlator_t *this, loc_t *loc, int32_t dirty,
+ int32_t *prev_dirty)
+{
+ int32_t ret = -1;
+ int8_t value = 0;
+ quota_inode_ctx_t *ctx = NULL;
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
+
+ GF_VALIDATE_OR_GOTO ("marker", loc, out);
+ GF_VALIDATE_OR_GOTO ("marker", loc->inode, out);
+ GF_VALIDATE_OR_GOTO ("marker", prev_dirty, out);
+
+ ret = mq_inode_ctx_get (loc->inode, this, &ctx);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to get inode ctx for "
+ "%s", loc->path);
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict) {
+ gf_log (this->name, GF_LOG_ERROR, "dict_new failed");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_int8 (dict, QUOTA_DIRTY_KEY, dirty);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "dict_set failed");
+ goto out;
+ }
+
+ ret = syncop_xattrop (FIRST_CHILD(this), loc, GF_XATTROP_GET_AND_SET,
+ dict, NULL, &rsp_dict);
+ if (ret < 0) {
+ gf_log_callingfn (this->name, (-ret == ENOENT || -ret == ESTALE)
+ ? GF_LOG_DEBUG:GF_LOG_ERROR, "xattrop failed "
+ "for %s: %s", loc->path, strerror (-ret));
+ goto out;
+ }
+
+ *prev_dirty = 0;
+ if (rsp_dict) {
+ ret = dict_get_int8 (rsp_dict, QUOTA_DIRTY_KEY, &value);
+ if (ret == 0)
+ *prev_dirty = value;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ ctx->dirty = dirty;
+ }
+ UNLOCK (&ctx->lock);
+ ret = 0;
+out:
+ if (dict)
+ dict_unref (dict);
+
+ if (rsp_dict)
+ dict_unref (rsp_dict);
+
+ return ret;
+}
+
+int32_t
+mq_mark_dirty (xlator_t *this, loc_t *loc, int32_t dirty)
+{
+ int32_t ret = -1;
+ dict_t *dict = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+
+ GF_VALIDATE_OR_GOTO ("marker", loc, out);
+ GF_VALIDATE_OR_GOTO ("marker", loc->inode, out);
+
+ ret = mq_inode_ctx_get (loc->inode, this, &ctx);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to get inode ctx for "
+ "%s", loc->path);
+ ret = 0;
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR, "dict_new failed");
+ goto out;
+ }
+
+ ret = dict_set_int8 (dict, QUOTA_DIRTY_KEY, dirty);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "dict_set failed");
+ goto out;
+ }
+
+ ret = syncop_setxattr (FIRST_CHILD(this), loc, dict, 0, NULL, NULL);
+ if (ret < 0) {
+ gf_log_callingfn (this->name, (-ret == ENOENT || -ret == ESTALE)
+ ? GF_LOG_DEBUG:GF_LOG_ERROR, "setxattr dirty = %d "
+ "failed for %s: %s", dirty, loc->path, strerror (-ret));
+ goto out;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ ctx->dirty = dirty;
+ }
+ UNLOCK (&ctx->lock);
+
+out:
+ if (dict)
+ dict_unref (dict);
+
+ return ret;
+}
+
+int32_t
+_mq_get_metadata (xlator_t *this, loc_t *loc, quota_meta_t *contri,
+ quota_meta_t *size, uuid_t contri_gfid)
+{
+ int32_t ret = -1;
+ quota_meta_t meta = {0, };
+ char contri_key[QUOTA_KEY_MAX] = {0, };
+ char size_key[QUOTA_KEY_MAX] = {0, };
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
+ struct iatt stbuf = {0,};
+
+ GF_VALIDATE_OR_GOTO ("marker", loc, out);
+ GF_VALIDATE_OR_GOTO ("marker", loc->inode, out);
+
+ if (size == NULL && contri == NULL)
+ goto out;
+
+ dict = dict_new ();
+ if (dict == NULL) {
+ gf_log (this->name, GF_LOG_ERROR, "dict_new failed");
+ goto out;
+ }
+
+ if (size && loc->inode->ia_type == IA_IFDIR) {
+ GET_SIZE_KEY (this, size_key, ret);
+ if (ret < 0)
+ goto out;
+ ret = dict_set_int64 (dict, size_key, 0);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "dict_set failed.");
+ goto out;
+ }
+ }
+
+ if (contri && !loc_is_root(loc)) {
+ ret = mq_dict_set_contribution (this, dict, loc, contri_gfid,
+ contri_key);
+ if (ret < 0)
+ goto out;
+ }
+
+ ret = syncop_lookup (FIRST_CHILD(this), loc, &stbuf, NULL,
+ dict, &rsp_dict);
+ if (ret < 0) {
+ gf_log_callingfn (this->name, (-ret == ENOENT || -ret == ESTALE)
+ ? GF_LOG_DEBUG:GF_LOG_ERROR, "lookup failed "
+ "for %s: %s", loc->path, strerror (-ret));
+ goto out;
+ }
+
+ if (size) {
+ if (loc->inode->ia_type == IA_IFDIR) {
+ ret = quota_dict_get_meta (rsp_dict, size_key,
+ &meta);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "dict_get failed.");
+ goto out;
+ }
+
+ size->size = meta.size;
+ size->file_count = meta.file_count;
+ size->dir_count = meta.dir_count;
+ } else {
+ size->size = stbuf.ia_blocks * 512;
+ size->file_count = 1;
+ size->dir_count = 0;
+ }
+ }
+
+ if (contri && !loc_is_root(loc)) {
+ ret = quota_dict_get_meta (rsp_dict, contri_key, &meta);
+ if (ret < 0) {
+ contri->size = 0;
+ contri->file_count = 0;
+ contri->dir_count = 0;
+ } else {
+ contri->size = meta.size;
+ contri->file_count = meta.file_count;
+ contri->dir_count = meta.dir_count;
+ }
+ }
+
+ ret = 0;
+
+out:
+ if (dict)
+ dict_unref (dict);
+
+ if (rsp_dict)
+ dict_unref (rsp_dict);
+
+ return ret;
+}
+
+int32_t
+mq_get_metadata (xlator_t *this, loc_t *loc, quota_meta_t *contri,
+ quota_meta_t *size, quota_inode_ctx_t *ctx,
+ inode_contribution_t *contribution)
+{
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("marker", loc, out);
+ GF_VALIDATE_OR_GOTO ("marker", loc->inode, out);
+ GF_VALIDATE_OR_GOTO ("marker", ctx, out);
+ GF_VALIDATE_OR_GOTO ("marker", contribution, out);
+
+ if (size == NULL && contri == NULL) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = _mq_get_metadata (this, loc, contri, size, contribution->gfid);
+ if (ret < 0)
+ goto out;
+
+ if (size) {
+ LOCK (&ctx->lock);
+ {
+ ctx->size = size->size;
+ ctx->file_count = size->file_count;
+ ctx->dir_count = size->dir_count;
+ }
+ UNLOCK (&ctx->lock);
+ }
+
+ if (contri) {
+ LOCK (&contribution->lock);
+ {
+ contribution->contribution = contri->size;
+ contribution->file_count = contri->file_count;
+ contribution->dir_count = contri->dir_count;
+ }
+ UNLOCK (&contribution->lock);
+ }
+
+out:
+ return ret;
+}
+
+int32_t
+mq_get_size (xlator_t *this, loc_t *loc, quota_meta_t *size)
+{
+ return _mq_get_metadata (this, loc, NULL, size, 0);
+}
+
+int32_t
+mq_get_contri (xlator_t *this, loc_t *loc, quota_meta_t *contri,
+ uuid_t contri_gfid)
+{
+ return _mq_get_metadata (this, loc, contri, NULL, contri_gfid);
+}
+
+int32_t
+mq_get_delta (xlator_t *this, loc_t *loc, quota_meta_t *delta,
+ quota_inode_ctx_t *ctx, inode_contribution_t *contribution)
+{
+ int32_t ret = -1;
+ quota_meta_t size = {0, };
+ quota_meta_t contri = {0, };
+
+ GF_VALIDATE_OR_GOTO ("marker", loc, out);
+ GF_VALIDATE_OR_GOTO ("marker", loc->inode, out);
+ GF_VALIDATE_OR_GOTO ("marker", ctx, out);
+ GF_VALIDATE_OR_GOTO ("marker", contribution, out);
+
+ ret = mq_get_metadata (this, loc, &contri, &size, ctx, contribution);
+ if (ret < 0)
+ goto out;
+
+ mq_compute_delta (delta, &size, &contri);
+
+out:
+ return ret;
+}
+
+int32_t
+mq_remove_contri (xlator_t *this, loc_t *loc, quota_inode_ctx_t *ctx,
+ inode_contribution_t *contri, quota_meta_t *delta,
+ uint32_t nlink)
+{
+ int32_t ret = -1;
+ char contri_key[QUOTA_KEY_MAX] = {0, };
+
+ if (nlink == 1) {
+ /*File was a last link and has been deleted */
+ ret = 0;
+ goto done;
+ }
+
+ GET_CONTRI_KEY (this, contri_key, contri->gfid, ret);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "get contri_key "
+ "failed for %s", uuid_utoa(contri->gfid));
+ goto out;
+ }
+
+ ret = syncop_removexattr (FIRST_CHILD(this), loc, contri_key, 0, NULL);
+ if (ret < 0) {
+ if (-ret == ENOENT || -ret == ESTALE || -ret == ENODATA ||
+ -ret == ENOATTR) {
+ /* Remove contri in done when unlink operation is
+ * performed, so return success on ENOENT/ESTSLE
+ * rename operation removes xattr earlier,
+ * so return success on ENODATA
+ */
+ ret = 0;
+ } else {
+ gf_log_callingfn (this->name, GF_LOG_ERROR,
+ "removexattr %s failed for %s: %s",
+ contri_key, loc->path,
+ strerror (-ret));
+ goto out;
+ }
+ }
+
+done:
+ LOCK (&contri->lock);
+ {
+ contri->contribution += delta->size;
+ contri->file_count += delta->file_count;
+ contri->dir_count += delta->dir_count;
+ }
+ UNLOCK (&contri->lock);
+
+ ret = 0;
+
+out:
+ QUOTA_FREE_CONTRIBUTION_NODE (ctx, contri);
+
+ return ret;
+}
+
+int32_t
+mq_update_contri (xlator_t *this, loc_t *loc, inode_contribution_t *contri,
+ quota_meta_t *delta)
+{
+ int32_t ret = -1;
+ char contri_key[QUOTA_KEY_MAX] = {0, };
+ dict_t *dict = NULL;
+
+ GF_VALIDATE_OR_GOTO ("marker", loc, out);
+ GF_VALIDATE_OR_GOTO ("marker", loc->inode, out);
+ GF_VALIDATE_OR_GOTO ("marker", delta, out);
+ GF_VALIDATE_OR_GOTO ("marker", contri, out);
+
+ if (quota_meta_is_null (delta)) {
+ ret = 0;
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict) {
+ gf_log (this->name, GF_LOG_ERROR, "dict_new failed");
+ ret = -1;
+ goto out;
+ }
+
+ GET_CONTRI_KEY (this, contri_key, contri->gfid, ret);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "get contri_key "
+ "failed for %s", uuid_utoa(contri->gfid));
+ goto out;
+ }
+
+ ret = quota_dict_set_meta (dict, contri_key, delta,
+ loc->inode->ia_type);
+ if (ret < 0)
+ goto out;
+
+ ret = syncop_xattrop(FIRST_CHILD(this), loc, GF_XATTROP_ADD_ARRAY64,
+ dict, NULL, NULL);
+ if (ret < 0) {
+ gf_log_callingfn (this->name, (-ret == ENOENT || -ret == ESTALE)
+ ? GF_LOG_DEBUG:GF_LOG_ERROR, "xattrop failed "
+ "for %s: %s", loc->path, strerror (-ret));
+ goto out;
+ }
+
+ LOCK (&contri->lock);
+ {
+ contri->contribution += delta->size;
+ contri->file_count += delta->file_count;
+ contri->dir_count += delta->dir_count;
+ }
+ UNLOCK (&contri->lock);
+
+out:
+ if (dict)
+ dict_unref (dict);
+
+ return ret;
+}
+
+int32_t
+mq_update_size (xlator_t *this, loc_t *loc, quota_meta_t *delta)
+{
+ int32_t ret = -1;
+ quota_inode_ctx_t *ctx = NULL;
+ dict_t *dict = NULL;
+
+ GF_VALIDATE_OR_GOTO ("marker", loc, out);
+ GF_VALIDATE_OR_GOTO ("marker", loc->inode, out);
+ GF_VALIDATE_OR_GOTO ("marker", delta, out);
+
+ if (quota_meta_is_null (delta)) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = mq_inode_ctx_get (loc->inode, this, &ctx);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to get inode ctx for "
+ "%s", loc->path);
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict) {
+ gf_log (this->name, GF_LOG_ERROR, "dict_new failed");
+ ret = -1;
+ goto out;
+ }
+
+ ret = quota_dict_set_size_meta (this, dict, delta);
+ if (ret < 0)
+ goto out;
+
+ ret = syncop_xattrop(FIRST_CHILD(this), loc,
+ GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT, dict, NULL,
+ NULL);
+ if (ret < 0) {
+ gf_log_callingfn (this->name, (-ret == ENOENT || -ret == ESTALE)
+ ? GF_LOG_DEBUG:GF_LOG_ERROR, "xattrop failed "
+ "for %s: %s", loc->path, strerror (-ret));
+ goto out;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ ctx->size += delta->size;
+ ctx->file_count += delta->file_count;
+ if (ctx->dir_count == 0)
+ ctx->dir_count += delta->dir_count + 1;
+ else
+ ctx->dir_count += delta->dir_count;
+ }
+ UNLOCK (&ctx->lock);
+
+out:
+ if (dict)
+ dict_unref (dict);
+
+ return ret;
+}
+
+int
+mq_synctask_cleanup (int ret, call_frame_t *frame, void *opaque)
+{
+ quota_synctask_t *args = NULL;
+
+ GF_ASSERT (opaque);
+
+ args = (quota_synctask_t *) opaque;
+ loc_wipe (&args->loc);
+
+ if (args->stub)
+ call_resume (args->stub);
+
+ if (!args->is_static)
+ GF_FREE (args);
+
+ return 0;
+}
+
+int
+mq_synctask1 (xlator_t *this, synctask_fn_t task, gf_boolean_t spawn,
+ loc_t *loc, quota_meta_t *contri, uint32_t nlink,
+ call_stub_t *stub)
+{
+ int32_t ret = -1;
+ quota_synctask_t *args = NULL;
+ quota_synctask_t static_args = {0, };
+
+ if (spawn) {
+ QUOTA_ALLOC_OR_GOTO (args, quota_synctask_t, ret, out);
+ args->is_static = _gf_false;
+ } else {
+ args = &static_args;
+ args->is_static = _gf_true;
+ }
+
+ args->this = this;
+ args->stub = stub;
+ loc_copy (&args->loc, loc);
+ args->ia_nlink = nlink;
+
+ if (contri) {
+ args->contri = *contri;
+ } else {
+ args->contri.size = -1;
+ args->contri.file_count = -1;
+ args->contri.dir_count = -1;
+ }
+
+ if (spawn) {
+ ret = synctask_new1 (this->ctx->env, 1024 * 16, task,
+ mq_synctask_cleanup, NULL, args);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to spawn "
+ "new synctask");
+ mq_synctask_cleanup (ret, NULL, args);
+ }
+ } else {
+ ret = task (args);
+ mq_synctask_cleanup (ret, NULL, args);
+ }
+
+out:
+ return ret;
+}
+
+int
+mq_synctask (xlator_t *this, synctask_fn_t task, gf_boolean_t spawn, loc_t *loc)
+{
+ return mq_synctask1 (this, task, spawn, loc, NULL, -1, NULL);
+}
+
+int32_t
+mq_prevalidate_txn (xlator_t *this, loc_t *origin_loc, loc_t *loc,
+ quota_inode_ctx_t **ctx, struct iatt *buf)
+{
+ int32_t ret = -1;
+ quota_inode_ctx_t *ctxtmp = NULL;
+
+ if (buf) {
+ if (buf->ia_type == IA_IFREG && IS_DHT_LINKFILE_MODE(buf))
+ goto out;
+
+ if (buf->ia_type != IA_IFREG && buf->ia_type != IA_IFLNK &&
+ buf->ia_type != IA_IFDIR)
+ goto out;
+ }
+
+ if (origin_loc == NULL || origin_loc->inode == NULL ||
+ gf_uuid_is_null(origin_loc->inode->gfid))
+ goto out;
+
+ loc_copy (loc, origin_loc);
+
+ if (gf_uuid_is_null (loc->gfid))
+ gf_uuid_copy (loc->gfid, loc->inode->gfid);
+
+ if (!loc_is_root(loc) && loc->parent == NULL)
+ loc->parent = inode_parent (loc->inode, 0, NULL);
+
+ ret = mq_inode_ctx_get (loc->inode, this, &ctxtmp);
+ if (ret < 0) {
+ gf_log_callingfn (this->name, GF_LOG_WARNING, "inode ctx for "
+ "is NULL for %s", loc->path);
+ goto out;
+ }
+ if (ctx)
+ *ctx = ctxtmp;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+mq_create_xattrs_task (void *opaque)
+{
+ int32_t ret = -1;
+ gf_boolean_t locked = _gf_false;
+ gf_boolean_t contri_set = _gf_false;
+ gf_boolean_t size_set = _gf_false;
+ gf_boolean_t need_txn = _gf_false;
+ quota_synctask_t *args = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ xlator_t *this = NULL;
+ loc_t *loc = NULL;
+ gf_boolean_t status = _gf_false;
+
+ GF_ASSERT (opaque);
+
+ args = (quota_synctask_t *) opaque;
+ loc = &args->loc;
+ this = args->this;
+ THIS = this;
+
+ ret = mq_inode_ctx_get (loc->inode, this, &ctx);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING, "Failed to"
+ "get inode ctx, aborting quota create txn");
+ goto out;
+ }
+
+ if (loc->inode->ia_type == IA_IFDIR) {
+ /* lock not required for files */
+ ret = mq_lock (this, loc, F_WRLCK);
+ if (ret < 0)
+ goto out;
+ locked = _gf_true;
+ }
+
+ ret = mq_are_xattrs_set (this, loc, &contri_set, &size_set);
+ if (ret < 0 || (contri_set && size_set))
+ goto out;
+
+ mq_set_ctx_create_status (ctx, _gf_false);
+ status = _gf_true;
+
+ if (loc->inode->ia_type == IA_IFDIR && size_set == _gf_false) {
+ ret = mq_create_size_xattrs (this, ctx, loc);
+ if (ret < 0)
+ goto out;
+ }
+
+ need_txn = _gf_true;
+out:
+ if (locked)
+ ret = mq_lock (this, loc, F_UNLCK);
+
+ if (status == _gf_false)
+ mq_set_ctx_create_status (ctx, _gf_false);
+
+ if (need_txn)
+ ret = mq_initiate_quota_blocking_txn (this, loc, NULL);
+
+ return ret;
+}
+
+static int
+_mq_create_xattrs_txn (xlator_t *this, loc_t *origin_loc, struct iatt *buf,
+ gf_boolean_t spawn)
+{
+ int32_t ret = -1;
+ quota_inode_ctx_t *ctx = NULL;
+ gf_boolean_t status = _gf_true;
+ loc_t loc = {0, };
+ inode_contribution_t *contribution = NULL;
+
+ ret = mq_prevalidate_txn (this, origin_loc, &loc, &ctx, buf);
+ if (ret < 0)
+ goto out;
+
+ ret = mq_test_and_set_ctx_create_status (ctx, &status);
+ if (ret < 0 || status == _gf_true)
+ goto out;
+
+ if (!loc_is_root(&loc) && loc.parent) {
+ contribution = mq_add_new_contribution_node (this, ctx, &loc);
+ if (contribution == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "cannot add a new contribution node "
+ "(%s)", uuid_utoa (loc.gfid));
+ ret = -1;
+ goto out;
+ } else {
+ GF_REF_PUT (contribution);
+ }
+ }
+
+ ret = mq_synctask (this, mq_create_xattrs_task, spawn, &loc);
+out:
+ if (ret < 0 && status == _gf_false)
+ mq_set_ctx_create_status (ctx, _gf_false);
+
+ loc_wipe (&loc);
+ return ret;
+}
+
+int
+mq_create_xattrs_txn (xlator_t *this, loc_t *loc, struct iatt *buf)
+{
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("marker", loc, out);
+ GF_VALIDATE_OR_GOTO ("marker", loc->inode, out);
+
+ ret = _mq_create_xattrs_txn (this, loc, buf, _gf_true);
+out:
+ return ret;
+}
+
+int
+mq_create_xattrs_blocking_txn (xlator_t *this, loc_t *loc, struct iatt *buf)
+{
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("marker", loc, out);
+ GF_VALIDATE_OR_GOTO ("marker", loc->inode, out);
+
+ ret = _mq_create_xattrs_txn (this, loc, buf, _gf_false);
+out:
+ return ret;
+}
+
+int32_t
+mq_reduce_parent_size_task (void *opaque)
+{
+ int32_t ret = -1;
+ int32_t prev_dirty = 0;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_inode_ctx_t *parent_ctx = NULL;
+ inode_contribution_t *contribution = NULL;
+ quota_meta_t delta = {0, };
+ quota_meta_t contri = {0, };
+ loc_t parent_loc = {0,};
+ gf_boolean_t locked = _gf_false;
+ gf_boolean_t dirty = _gf_false;
+ quota_synctask_t *args = NULL;
+ xlator_t *this = NULL;
+ loc_t *loc = NULL;
+ gf_boolean_t remove_xattr = _gf_true;
+ uint32_t nlink = 0;
+
+ GF_ASSERT (opaque);
+
+ args = (quota_synctask_t *) opaque;
+ loc = &args->loc;
+ contri = args->contri;
+ nlink = args->ia_nlink;
+ this = args->this;
+ THIS = this;
+
+ ret = mq_inode_loc_fill (NULL, loc->parent, &parent_loc);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "parent_loc fill failed for "
+ "child inode %s: ", uuid_utoa (loc->inode->gfid));
+ goto out;
+ }
+
+ ret = mq_lock (this, &parent_loc, F_WRLCK);
+ if (ret < 0)
+ goto out;
+ locked = _gf_true;
+
+ if (contri.size >= 0) {
+ /* contri paramater is supplied only for rename operation.
+ * remove xattr is alreday performed, we need to skip
+ * removexattr for rename operation
+ */
+ remove_xattr = _gf_false;
+ delta.size = contri.size;
+ delta.file_count = contri.file_count;
+ delta.dir_count = contri.dir_count;
+ } else {
+ remove_xattr = _gf_true;
+
+ ret = mq_inode_ctx_get (loc->inode, this, &ctx);
+ if (ret < 0) {
+ gf_log_callingfn (this->name, GF_LOG_WARNING, "ctx for"
+ " the node %s is NULL", loc->path);
+ goto out;
+ }
+
+ contribution = mq_get_contribution_node (loc->parent, ctx);
+ if (contribution == NULL) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "contribution for the node %s is NULL",
+ loc->path);
+ goto out;
+ }
+
+ LOCK (&contribution->lock);
+ {
+ delta.size = contribution->contribution;
+ delta.file_count = contribution->file_count;
+ delta.dir_count = contribution->dir_count;
+ }
+ UNLOCK (&contribution->lock);
+ }
+
+ ret = mq_get_set_dirty (this, &parent_loc, 1, &prev_dirty);
+ if (ret < 0)
+ goto out;
+ dirty = _gf_true;
+
+ mq_sub_meta (&delta, NULL);
+
+ if (remove_xattr) {
+ ret = mq_remove_contri (this, loc, ctx, contribution, &delta,
+ nlink);
+ if (ret < 0)
+ goto out;
+ }
+
+ if (quota_meta_is_null (&delta))
+ goto out;
+
+ ret = mq_update_size (this, &parent_loc, &delta);
+ if (ret < 0)
+ goto out;
+
+out:
+ if (dirty) {
+ if (ret < 0 || prev_dirty) {
+ /* On failure clear dirty status flag.
+ * In the next lookup inspect_directory_xattr
+ * can set the status flag and fix the
+ * dirty directory.
+ * Do the same if dir was dirty before
+ * the txn
+ */
+ ret = mq_inode_ctx_get (parent_loc.inode, this,
+ &parent_ctx);
+ if (ret == 0)
+ mq_set_ctx_dirty_status (parent_ctx, _gf_false);
+ } else {
+ ret = mq_mark_dirty (this, &parent_loc, 0);
+ }
+ }
+
+ if (locked)
+ ret = mq_lock (this, &parent_loc, F_UNLCK);
+
+ if (ret >= 0)
+ ret = mq_initiate_quota_blocking_txn (this, &parent_loc, NULL);
+
+ loc_wipe (&parent_loc);
+
+ if (contribution)
+ GF_REF_PUT (contribution);
+
+ return ret;
+}
+
+int32_t
+mq_reduce_parent_size_txn (xlator_t *this, loc_t *origin_loc,
+ quota_meta_t *contri, uint32_t nlink,
+ call_stub_t *stub)
+{
+ int32_t ret = -1;
+ loc_t loc = {0, };
+ gf_boolean_t resume_stub = _gf_true;
+
+ GF_VALIDATE_OR_GOTO ("marker", this, out);
+ GF_VALIDATE_OR_GOTO ("marker", origin_loc, out);
+
+ ret = mq_prevalidate_txn (this, origin_loc, &loc, NULL, NULL);
+ if (ret < 0)
+ goto out;
+
+ if (loc_is_root(&loc)) {
+ ret = 0;
+ goto out;
+ }
+
+ resume_stub = _gf_false;
+ ret = mq_synctask1 (this, mq_reduce_parent_size_task, _gf_true, &loc,
+ contri, nlink, stub);
+out:
+ loc_wipe (&loc);
+
+ if (resume_stub && stub)
+ call_resume (stub);
+
+ if (ret)
+ gf_log_callingfn (this->name, GF_LOG_ERROR,
+ "mq_reduce_parent_size_txn failed");
+
+ return ret;
+}
+
+int
+mq_initiate_quota_task (void *opaque)
+{
+ int32_t ret = -1;
+ int32_t prev_dirty = 0;
+ loc_t child_loc = {0,};
+ loc_t parent_loc = {0,};
+ gf_boolean_t locked = _gf_false;
+ gf_boolean_t dirty = _gf_false;
+ gf_boolean_t status = _gf_false;
+ quota_meta_t delta = {0, };
+ quota_synctask_t *args = NULL;
+ xlator_t *this = NULL;
+ loc_t *loc = NULL;
+ inode_contribution_t *contri = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_inode_ctx_t *parent_ctx = NULL;
+ inode_t *tmp_parent = NULL;
+
+ GF_VALIDATE_OR_GOTO ("marker", opaque, out);
+
+ args = (quota_synctask_t *) opaque;
+ loc = &args->loc;
+ this = args->this;
+
+ GF_VALIDATE_OR_GOTO ("marker", this, out);
+ THIS = this;
+
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc->inode, out);
+
+ ret = mq_loc_copy (&child_loc, loc);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "loc copy failed");
+ goto out;
+ }
+
+ while (!__is_root_gfid (child_loc.gfid)) {
+
+ ret = mq_inode_ctx_get (child_loc.inode, this, &ctx);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "inode ctx get failed for %s, "
+ "aborting update txn", child_loc.path);
+ goto out;
+ }
+
+ /* To improve performance, abort current transaction
+ * if one is already in progress for same inode
+ */
+ if (status == _gf_true) {
+ /* status will already set before txn start,
+ * so it should not be set in first
+ * loop iteration
+ */
+ ret = mq_test_and_set_ctx_updation_status (ctx,
+ &status);
+ if (ret < 0 || status == _gf_true)
+ goto out;
+ }
+
+ if (child_loc.parent == NULL) {
+ ret = mq_build_ancestry (this, &child_loc);
+ if (ret < 0 || child_loc.parent == NULL) {
+ /* If application performs parallel remove
+ * operations on same set of files/directories
+ * then we may get ENOENT/ESTALE
+ */
+ gf_log (this->name,
+ (-ret == ENOENT || -ret == ESTALE)
+ ? GF_LOG_DEBUG:GF_LOG_ERROR,
+ "build ancestry failed for inode %s",
+ uuid_utoa (child_loc.inode->gfid));
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = mq_inode_loc_fill (NULL, child_loc.parent, &parent_loc);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "parent_loc fill "
+ "failed for child inode %s: ",
+ uuid_utoa (child_loc.inode->gfid));
+ goto out;
+ }
+
+ ret = mq_lock (this, &parent_loc, F_WRLCK);
+ if (ret < 0)
+ goto out;
+ locked = _gf_true;
+
+ mq_set_ctx_updation_status (ctx, _gf_false);
+ status = _gf_true;
+
+ /* Contribution node can be NULL in below scenarios and
+ create if needed:
+
+ Scenario 1)
+ In this case create a new contribution node
+ Suppose hard link for a file f1 present in a directory d1 is
+ created in the directory d2 (as f2). Now, since d2's
+ contribution is not there in f1's inode ctx, d2's
+ contribution xattr wont be created and will create problems
+ for quota operations.
+
+ Don't create contribution if parent has been changed after
+ taking a lock, this can happen when rename is performed
+ and writes is still in-progress for the same file
+
+ Scenario 2)
+ When a rename operation is performed, contribution node
+ for olp path will be removed.
+
+ Create contribution node only if oldparent is same as
+ newparent.
+ Consider below example
+ 1) rename FOP invoked on file 'x'
+ 2) write is still in progress for file 'x'
+ 3) rename takes a lock on old-parent
+ 4) write-update txn blocked on old-parent to acquire lock
+ 5) in rename_cbk, contri xattrs are removed and contribution
+ is deleted and lock is released
+ 6) now write-update txn gets the lock and updates the
+ wrong parent as it was holding lock on old parent
+ so validate parent once the lock is acquired
+
+ For more information on this problem, please see
+ doc for marker_rename in file marker.c
+ */
+ contri = mq_get_contribution_node (child_loc.parent, ctx);
+ if (contri == NULL) {
+ tmp_parent = inode_parent (child_loc.inode, 0, NULL);
+ if (tmp_parent == NULL) {
+ /* This can happen if application performs
+ * parallel remove operations on same set
+ * of files/directories
+ */
+ gf_log (this->name, GF_LOG_WARNING, "parent is "
+ "NULL for inode %s",
+ uuid_utoa (child_loc.inode->gfid));
+ ret = -1;
+ goto out;
+ }
+ if (gf_uuid_compare(tmp_parent->gfid,
+ parent_loc.gfid)) {
+ /* abort txn if parent has changed */
+ ret = 0;
+ goto out;
+ }
+
+ inode_unref (tmp_parent);
+ tmp_parent = NULL;
+
+ contri = mq_add_new_contribution_node (this, ctx,
+ &child_loc);
+ if (contri == NULL) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "create contribution node for %s, "
+ "abort update txn", child_loc.path);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = mq_get_delta (this, &child_loc, &delta, ctx, contri);
+ if (ret < 0)
+ goto out;
+
+ if (quota_meta_is_null (&delta))
+ goto out;
+
+ ret = mq_get_set_dirty (this, &parent_loc, 1, &prev_dirty);
+ if (ret < 0)
+ goto out;
+ dirty = _gf_true;
+
+ ret = mq_update_contri (this, &child_loc, contri, &delta);
+ if (ret < 0)
+ goto out;
+
+ ret = mq_update_size (this, &parent_loc, &delta);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG, "rollback "
+ "contri updation");
+ mq_sub_meta (&delta, NULL);
+ mq_update_contri (this, &child_loc, contri, &delta);
+ goto out;
+ }
+
+ if (prev_dirty == 0) {
+ ret = mq_mark_dirty (this, &parent_loc, 0);
+ } else {
+ ret = mq_inode_ctx_get (parent_loc.inode, this,
+ &parent_ctx);
+ if (ret == 0)
+ mq_set_ctx_dirty_status (parent_ctx, _gf_false);
+ }
+ dirty = _gf_false;
+ prev_dirty = 0;
+
+ ret = mq_lock (this, &parent_loc, F_UNLCK);
+ locked = _gf_false;
+
+ if (__is_root_gfid (parent_loc.gfid))
+ break;
+
+ /* Repeate above steps upwards till the root */
+ loc_wipe (&child_loc);
+ ret = mq_loc_copy (&child_loc, &parent_loc);
+ if (ret < 0)
+ goto out;
+
+ loc_wipe (&parent_loc);
+ GF_REF_PUT (contri);
+ contri = NULL;
+ }
+
+out:
+ if (dirty) {
+ if (ret < 0 || prev_dirty) {
+ /* On failure clear dirty status flag.
+ * In the next lookup inspect_directory_xattr
+ * can set the status flag and fix the
+ * dirty directory.
+ * Do the same if the dir was dirty before
+ * txn
+ */
+ ret = mq_inode_ctx_get (parent_loc.inode, this,
+ &parent_ctx);
+ if (ret == 0)
+ mq_set_ctx_dirty_status (parent_ctx, _gf_false);
+ } else {
+ ret = mq_mark_dirty (this, &parent_loc, 0);
+ }
+ }
+
+ if (locked)
+ ret = mq_lock (this, &parent_loc, F_UNLCK);
+
+ if (ctx && status == _gf_false)
+ mq_set_ctx_updation_status (ctx, _gf_false);
+
+ loc_wipe (&child_loc);
+ loc_wipe (&parent_loc);
+
+ if (tmp_parent)
+ inode_unref (tmp_parent);
+
+ if (contri)
+ GF_REF_PUT (contri);
+
+ return 0;
+}
+
+int
+_mq_initiate_quota_txn (xlator_t *this, loc_t *origin_loc, struct iatt *buf,
+ gf_boolean_t spawn)
+{
+ int32_t ret = -1;
+ quota_inode_ctx_t *ctx = NULL;
+ gf_boolean_t status = _gf_true;
+ loc_t loc = {0,};
+
+ ret = mq_prevalidate_txn (this, origin_loc, &loc, &ctx, buf);
+ if (ret < 0)
+ goto out;
+
+ if (loc_is_root(&loc)) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = mq_test_and_set_ctx_updation_status (ctx, &status);
+ if (ret < 0 || status == _gf_true)
+ goto out;
+
+ ret = mq_synctask (this, mq_initiate_quota_task, spawn, &loc);
+
+out:
+ if (ret < 0 && status == _gf_false)
+ mq_set_ctx_updation_status (ctx, _gf_false);
+
+ loc_wipe (&loc);
+ return ret;
+}
+
+int
+mq_initiate_quota_txn (xlator_t *this, loc_t *loc, struct iatt *buf)
+{
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("marker", this, out);
+ GF_VALIDATE_OR_GOTO ("marker", loc, out);
+ GF_VALIDATE_OR_GOTO ("marker", loc->inode, out);
+
+ ret = _mq_initiate_quota_txn (this, loc, buf, _gf_true);
+out:
+ return ret;
+}
+
+int
+mq_initiate_quota_blocking_txn (xlator_t *this, loc_t *loc, struct iatt *buf)
+{
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("marker", this, out);
+ GF_VALIDATE_OR_GOTO ("marker", loc, out);
+ GF_VALIDATE_OR_GOTO ("marker", loc->inode, out);
+
+ ret = _mq_initiate_quota_txn (this, loc, buf, _gf_false);
+out:
+ return ret;
+}
+
+int
+mq_update_dirty_inode_task (void *opaque)
+{
+ int32_t ret = -1;
+ fd_t *fd = NULL;
+ off_t offset = 0;
+ gf_dirent_t entries;
+ gf_dirent_t *entry = NULL;
+ gf_boolean_t locked = _gf_false;
+ gf_boolean_t updated = _gf_false;
+ int32_t dirty = 0;
+ quota_meta_t contri = {0, };
+ quota_meta_t size = {0, };
+ quota_meta_t contri_sum = {0, };
+ quota_meta_t delta = {0, };
+ quota_synctask_t *args = NULL;
+ xlator_t *this = NULL;
+ loc_t *loc = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ dict_t *xdata = NULL;
+ char contri_key[QUOTA_KEY_MAX] = {0, };
+
+ GF_ASSERT (opaque);
+
+ args = (quota_synctask_t *) opaque;
+ loc = &args->loc;
+ this = args->this;
+ THIS = this;
+ INIT_LIST_HEAD (&entries.list);
+
+ ret = mq_inode_ctx_get (loc->inode, this, &ctx);
+ if (ret < 0)
+ goto out;
+
+ GET_CONTRI_KEY (this, contri_key, loc->gfid, ret);
+ if (ret < 0)
+ goto out;
+
+ xdata = dict_new ();
+ if (xdata == NULL) {
+ gf_log (this->name, GF_LOG_ERROR, "dict_new failed");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_int64 (xdata, contri_key, 0);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "dict_set failed");
+ goto out;
+ }
+
+ ret = mq_lock (this, loc, F_WRLCK);
+ if (ret < 0)
+ goto out;
+ locked = _gf_true;
+
+ ret = mq_get_dirty (this, loc, &dirty);
+ if (ret < 0 || dirty == 0) {
+ ret = 0;
+ goto out;
+ }
+
+ fd = fd_create (loc->inode, 0);
+ if (!fd) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to create fd");
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_opendir (this, loc, fd, NULL, NULL);
+ if (ret < 0) {
+ gf_log (this->name, (-ret == ENOENT || -ret == ESTALE)
+ ? GF_LOG_DEBUG:GF_LOG_ERROR, "opendir failed "
+ "for %s: %s", loc->path, strerror (-ret));
+ goto out;
+ }
+
+ fd_bind (fd);
+ while ((ret = syncop_readdirp (this, fd, 131072, offset, &entries,
+ xdata, NULL)) != 0) {
+ if (ret < 0) {
+ gf_log (this->name, (-ret == ENOENT || -ret == ESTALE)
+ ? GF_LOG_DEBUG:GF_LOG_ERROR, "readdirp failed "
+ "for %s: %s", loc->path, strerror (-ret));
+ goto out;
+ }
+
+ if (list_empty (&entries.list))
+ break;
+
+ list_for_each_entry (entry, &entries.list, list) {
+ offset = entry->d_off;
+
+ if (!strcmp (entry->d_name, ".") ||
+ !strcmp (entry->d_name, ".."))
+ continue;
+
+ memset (&contri, 0, sizeof (contri));
+ quota_dict_get_meta (entry->dict, contri_key, &contri);
+ if (quota_meta_is_null (&contri))
+ continue;
+
+ mq_add_meta (&contri_sum, &contri);
+ }
+
+ gf_dirent_free (&entries);
+ }
+ /* Inculde for self */
+ contri_sum.dir_count++;
+
+ ret = mq_get_size (this, loc, &size);
+ if (ret < 0)
+ goto out;
+
+ mq_compute_delta (&delta, &contri_sum, &size);
+
+ if (quota_meta_is_null (&delta))
+ goto out;
+
+ gf_log (this->name, GF_LOG_INFO, "calculated size = %"PRId64
+ ", original size = %"PRIu64 ", diff = %"PRIu64
+ ", path = %s ", contri_sum.size, size.size, delta.size,
+ loc->path);
+
+ gf_log (this->name, GF_LOG_INFO, "calculated f_count = %"PRId64
+ ", original f_count = %"PRIu64 ", diff = %"PRIu64
+ ", path = %s ", contri_sum.file_count, size.file_count,
+ delta.file_count, loc->path);
+
+ gf_log (this->name, GF_LOG_INFO, "calculated d_count = %"PRId64
+ ", original d_count = %"PRIu64 ", diff = %"PRIu64
+ ", path = %s ", contri_sum.dir_count, size.dir_count,
+ delta.dir_count, loc->path);
+
+
+ ret = mq_update_size (this, loc, &delta);
+ if (ret < 0)
+ goto out;
+
+ updated = _gf_true;
+
+out:
+ gf_dirent_free (&entries);
+
+ if (fd)
+ fd_unref (fd);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ if (ret < 0) {
+ /* On failure clear dirty status flag.
+ * In the next lookup inspect_directory_xattr
+ * can set the status flag and fix the
+ * dirty directory
+ */
+ if (ctx)
+ mq_set_ctx_dirty_status (ctx, _gf_false);
+ } else if (dirty) {
+ mq_mark_dirty (this, loc, 0);
+ }
+
+ if (locked)
+ mq_lock (this, loc, F_UNLCK);
+
+ if (updated)
+ mq_initiate_quota_blocking_txn (this, loc, NULL);
+
+ return ret;
+}
+
+int32_t
+mq_update_dirty_inode_txn (xlator_t *this, loc_t *loc, quota_inode_ctx_t *ctx)
+{
+ int32_t ret = -1;
+ gf_boolean_t status = _gf_true;
+
+ GF_VALIDATE_OR_GOTO ("marker", loc, out);
+ GF_VALIDATE_OR_GOTO ("marker", loc->inode, out);
+
+ ret = mq_test_and_set_ctx_dirty_status (ctx, &status);
+ if (ret < 0 || status == _gf_true)
+ goto out;
+
+ ret = mq_synctask (this, mq_update_dirty_inode_task, _gf_true, loc);
+out:
+ if (ret < 0 && status == _gf_false)
+ mq_set_ctx_dirty_status (ctx, _gf_false);
+
+ return ret;
+}
+
+int32_t
+mq_inspect_directory_xattr (xlator_t *this, quota_inode_ctx_t *ctx,
+ inode_contribution_t *contribution, loc_t *loc,
+ dict_t *dict, struct iatt buf)
+{
+ int32_t ret = -1;
+ int8_t dirty = -1;
+ quota_meta_t size = {0, };
+ quota_meta_t contri = {0, };
+ quota_meta_t delta = {0, };
+ char contri_key[QUOTA_KEY_MAX] = {0, };
+ char size_key[QUOTA_KEY_MAX] = {0, };
+ gf_boolean_t status = _gf_false;
+
+ ret = dict_get_int8 (dict, QUOTA_DIRTY_KEY, &dirty);
+ if (ret < 0) {
+ /* dirty is set only on the first file write operation
+ * so ignore this error
+ */
+ ret = 0;
+ dirty = 0;
+ }
+
+ GET_SIZE_KEY (this, size_key, ret);
+ if (ret < 0)
+ goto out;
+ ret = _quota_dict_get_meta (this, dict, size_key, &size,
+ IA_IFDIR, _gf_false);
+ if (ret < 0)
+ goto create_xattr;
+
+ if (!loc_is_root(loc)) {
+ GET_CONTRI_KEY (this, contri_key, contribution->gfid, ret);
+ if (ret < 0)
+ goto out;
+
+ ret = _quota_dict_get_meta (this, dict, contri_key, &contri,
+ IA_IFDIR, _gf_false);
+ if (ret < 0)
+ goto create_xattr;
+
+ LOCK (&contribution->lock);
+ {
+ contribution->contribution = contri.size;
+ contribution->file_count = contri.file_count;
+ contribution->dir_count = contri.dir_count;
+ }
+ UNLOCK (&contribution->lock);
+ }
+
+ LOCK (&ctx->lock);
+ {
+ ctx->size = size.size;
+ ctx->file_count = size.file_count;
+ ctx->dir_count = size.dir_count;
+ ctx->dirty = dirty;
+ }
+ UNLOCK (&ctx->lock);
+
+ ret = mq_get_ctx_updation_status (ctx, &status);
+ if (ret < 0 || status == _gf_true) {
+ /* If the update txn is in progress abort inspection */
+ ret = 0;
+ goto out;
+ }
+
+ mq_compute_delta (&delta, &size, &contri);
+
+ if (dirty) {
+ ret = mq_update_dirty_inode_txn (this, loc, ctx);
+ goto out;
+ }
+
+ if (!loc_is_root(loc) &&
+ !quota_meta_is_null (&delta))
+ mq_initiate_quota_txn (this, loc, NULL);
+
+ ret = 0;
+ goto out;
+
+create_xattr:
+ if (ret < 0)
+ ret = mq_create_xattrs_txn (this, loc, NULL);
+
+out:
+ return ret;
+}
+
+int32_t
+mq_inspect_file_xattr (xlator_t *this, quota_inode_ctx_t *ctx,
+ inode_contribution_t *contribution, loc_t *loc,
+ dict_t *dict, struct iatt buf)
+{
+ int32_t ret = -1;
+ quota_meta_t size = {0, };
+ quota_meta_t contri = {0, };
+ quota_meta_t delta = {0, };
+ char contri_key[QUOTA_KEY_MAX] = {0, };
+ gf_boolean_t status = _gf_false;
+
+ LOCK (&ctx->lock);
+ {
+ ctx->size = 512 * buf.ia_blocks;
+ ctx->file_count = 1;
+ ctx->dir_count = 0;
+
+ size.size = ctx->size;
+ size.file_count = ctx->file_count;
+ size.dir_count = ctx->dir_count;
+ }
+ UNLOCK (&ctx->lock);
+
+ GET_CONTRI_KEY (this, contri_key, contribution->gfid, ret);
+ if (ret < 0)
+ goto out;
+
+ ret = _quota_dict_get_meta (this, dict, contri_key, &contri,
+ IA_IFREG, _gf_true);
+ if (ret < 0) {
+ ret = mq_create_xattrs_txn (this, loc, NULL);
+ } else {
+ LOCK (&contribution->lock);
+ {
+ contribution->contribution = contri.size;
+ contribution->file_count = contri.file_count;
+ contribution->dir_count = contri.dir_count;
+ }
+ UNLOCK (&contribution->lock);
+
+ ret = mq_get_ctx_updation_status (ctx, &status);
+ if (ret < 0 || status == _gf_true) {
+ /* If the update txn is in progress abort inspection */
+ ret = 0;
+ goto out;
+ }
+
+ mq_compute_delta (&delta, &size, &contri);
+ if (!quota_meta_is_null (&delta))
+ mq_initiate_quota_txn (this, loc, NULL);
+ }
+ /* TODO: revist this code when fixing hardlinks */
+
+out:
+ return ret;
+}
+
+int32_t
+mq_xattr_state (xlator_t *this, loc_t *origin_loc, dict_t *dict,
+ struct iatt buf)
+{
+ int32_t ret = -1;
+ quota_inode_ctx_t *ctx = NULL;
+ loc_t loc = {0, };
+ inode_contribution_t *contribution = NULL;
+
+ ret = mq_prevalidate_txn (this, origin_loc, &loc, &ctx, &buf);
+ if (ret < 0 || loc.parent == NULL)
+ goto out;
+
+ if (!loc_is_root(&loc)) {
+ contribution = mq_add_new_contribution_node (this, ctx, &loc);
+ if (contribution == NULL) {
+ if (!gf_uuid_is_null (loc.inode->gfid))
+ gf_log (this->name, GF_LOG_WARNING,
+ "cannot add a new contribution node "
+ "(%s)", uuid_utoa (loc.gfid));
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (buf.ia_type == IA_IFDIR)
+ mq_inspect_directory_xattr (this, ctx, contribution, &loc, dict,
+ buf);
+ else
+ mq_inspect_file_xattr (this, ctx, contribution, &loc, dict,
+ buf);
+
+out:
+ loc_wipe (&loc);
+
+ if (contribution)
+ GF_REF_PUT (contribution);
+
+ return ret;
+}
+
+int32_t
+mq_req_xattr (xlator_t *this, loc_t *loc, dict_t *dict,
+ char *contri_key, char *size_key)
+{
+ int32_t ret = -1;
+ char key[QUOTA_KEY_MAX] = {0, };
+
+ GF_VALIDATE_OR_GOTO ("marker", this, out);
+ GF_VALIDATE_OR_GOTO ("marker", loc, out);
+ GF_VALIDATE_OR_GOTO ("marker", dict, out);
+
+ if (!loc_is_root(loc)) {
+ ret = mq_dict_set_contribution (this, dict, loc, NULL,
+ contri_key);
+ if (ret < 0)
+ goto out;
+ }
+
+ GET_SIZE_KEY (this, key, ret);
+ if (ret < 0)
+ goto out;
+ if (size_key)
+ strncpy (size_key, key, QUOTA_KEY_MAX);
+
+ ret = dict_set_uint64 (dict, key, 0);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_set_int8 (dict, QUOTA_DIRTY_KEY, 0);
+
+out:
+ if (ret < 0)
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "dict set failed");
+
+ return ret;
+}
+
+
+int32_t
+mq_forget (xlator_t *this, quota_inode_ctx_t *ctx)
+{
+ inode_contribution_t *contri = NULL;
+ inode_contribution_t *next = NULL;
+
+ GF_VALIDATE_OR_GOTO ("marker", this, out);
+ GF_VALIDATE_OR_GOTO ("marker", ctx, out);
+
+ list_for_each_entry_safe (contri, next, &ctx->contribution_head,
+ contri_list) {
+ list_del_init (&contri->contri_list);
+ GF_REF_PUT (contri);
+ }
+
+ LOCK_DESTROY (&ctx->lock);
+ GF_FREE (ctx);
+out:
+ return 0;
+}
diff --git a/xlators/features/marker/src/marker-quota.h b/xlators/features/marker/src/marker-quota.h
new file mode 100644
index 00000000000..51e062537b8
--- /dev/null
+++ b/xlators/features/marker/src/marker-quota.h
@@ -0,0 +1,156 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _MARKER_QUOTA_H
+#define _MARKER_QUOTA_H
+
+#include "xlator.h"
+#include "marker-mem-types.h"
+#include "refcount.h"
+#include "quota-common-utils.h"
+#include "call-stub.h"
+
+#define QUOTA_XATTR_PREFIX "trusted.glusterfs"
+#define QUOTA_DIRTY_KEY "trusted.glusterfs.quota.dirty"
+
+#define CONTRIBUTION "contri"
+#define QUOTA_KEY_MAX 512
+#define READDIR_BUF 4096
+
+
+#define QUOTA_STACK_DESTROY(_frame, _this) \
+ do { \
+ quota_local_t *_local = NULL; \
+ _local = _frame->local; \
+ _frame->local = NULL; \
+ STACK_DESTROY (_frame->root); \
+ mq_local_unref (_this, _local); \
+ } while (0)
+
+
+#define QUOTA_ALLOC(var, type, ret) \
+ do { \
+ ret = 0; \
+ var = GF_CALLOC (sizeof (type), 1, \
+ gf_marker_mt_##type); \
+ if (!var) { \
+ ret = -1; \
+ } \
+ } while (0);
+
+#define QUOTA_ALLOC_OR_GOTO(var, type, ret, label) \
+ do { \
+ var = GF_CALLOC (sizeof (type), 1, \
+ gf_marker_mt_##type); \
+ if (!var) { \
+ gf_log ("", GF_LOG_ERROR, \
+ "out of memory"); \
+ ret = -1; \
+ goto label; \
+ } \
+ ret = 0; \
+ } while (0);
+
+#define GET_QUOTA_KEY(_this, var, key, _ret) \
+ do { \
+ marker_conf_t *_priv = _this->private; \
+ if (_priv->version > 0) \
+ _ret = snprintf (var, QUOTA_KEY_MAX, "%s.%d", \
+ key, _priv->version); \
+ else \
+ _ret = snprintf (var, QUOTA_KEY_MAX, "%s", key); \
+ } while (0)
+
+#define GET_CONTRI_KEY(_this, var, _gfid, _ret) \
+ do { \
+ char _tmp_var[QUOTA_KEY_MAX] = {0, }; \
+ if (_gfid != NULL) { \
+ char _gfid_unparsed[40]; \
+ gf_uuid_unparse (_gfid, _gfid_unparsed); \
+ _ret = snprintf (_tmp_var, QUOTA_KEY_MAX, \
+ QUOTA_XATTR_PREFIX \
+ ".%s.%s." CONTRIBUTION, \
+ "quota", _gfid_unparsed); \
+ } else { \
+ _ret = snprintf (_tmp_var, QUOTA_KEY_MAX, \
+ QUOTA_XATTR_PREFIX \
+ ".%s.." CONTRIBUTION, \
+ "quota"); \
+ } \
+ GET_QUOTA_KEY (_this, var, _tmp_var, _ret); \
+ } while (0)
+
+#define GET_SIZE_KEY(_this, var, _ret) \
+ { \
+ GET_QUOTA_KEY (_this, var, QUOTA_SIZE_KEY, _ret); \
+ }
+
+#define QUOTA_SAFE_INCREMENT(lock, var) \
+ do { \
+ LOCK (lock); \
+ var ++; \
+ UNLOCK (lock); \
+ } while (0)
+
+struct quota_inode_ctx {
+ int64_t size;
+ int64_t file_count;
+ int64_t dir_count;
+ int8_t dirty;
+ gf_boolean_t create_status;
+ gf_boolean_t updation_status;
+ gf_boolean_t dirty_status;
+ gf_lock_t lock;
+ struct list_head contribution_head;
+};
+typedef struct quota_inode_ctx quota_inode_ctx_t;
+
+struct quota_synctask {
+ xlator_t *this;
+ loc_t loc;
+ quota_meta_t contri;
+ gf_boolean_t is_static;
+ uint32_t ia_nlink;
+ call_stub_t *stub;
+};
+typedef struct quota_synctask quota_synctask_t;
+
+struct inode_contribution {
+ struct list_head contri_list;
+ int64_t contribution;
+ int64_t file_count;
+ int64_t dir_count;
+ uuid_t gfid;
+ gf_lock_t lock;
+ GF_REF_DECL;
+};
+typedef struct inode_contribution inode_contribution_t;
+
+int32_t
+mq_req_xattr (xlator_t *, loc_t *, dict_t *, char *, char *);
+
+int32_t
+mq_xattr_state (xlator_t *, loc_t *, dict_t *, struct iatt);
+
+int
+mq_initiate_quota_txn (xlator_t *, loc_t *, struct iatt *);
+
+int
+mq_initiate_quota_blocking_txn (xlator_t *, loc_t *, struct iatt *);
+
+int
+mq_create_xattrs_txn (xlator_t *this, loc_t *loc, struct iatt *buf);
+
+int32_t
+mq_reduce_parent_size_txn (xlator_t *, loc_t *, quota_meta_t *,
+ uint32_t nlink, call_stub_t *stub);
+
+int32_t
+mq_forget (xlator_t *, quota_inode_ctx_t *);
+#endif
diff --git a/xlators/features/marker/src/marker.c b/xlators/features/marker/src/marker.c
new file mode 100644
index 00000000000..e0e7c9857e6
--- /dev/null
+++ b/xlators/features/marker/src/marker.c
@@ -0,0 +1,3520 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "xlator.h"
+#include "defaults.h"
+#include "libxlator.h"
+#include "marker.h"
+#include "marker-mem-types.h"
+#include "marker-quota.h"
+#include "marker-quota-helper.h"
+#include "marker-common.h"
+#include "byte-order.h"
+#include "syncop.h"
+#include "syscall.h"
+
+#include <fnmatch.h>
+
+#define _GF_UID_GID_CHANGED 1
+
+static char *mq_ext_xattrs[] = {
+ QUOTA_SIZE_KEY,
+ QUOTA_LIMIT_KEY,
+ QUOTA_LIMIT_OBJECTS_KEY,
+ NULL,
+};
+
+void
+fini (xlator_t *this);
+
+int32_t
+marker_start_setxattr (call_frame_t *, xlator_t *);
+
+/* When client/quotad request for quota xattrs,
+ * replace the key-name by adding the version number
+ * in end of the key-name.
+ * In the cbk, result value of xattrs for original
+ * key-name.
+ * Below function marker_key_replace_with_ver and
+ * marker_key_set_ver is used for setting/removing
+ * version for the key-name
+ */
+int
+marker_key_replace_with_ver (xlator_t *this, dict_t *dict)
+{
+ int ret = -1;
+ int i = 0;
+ marker_conf_t *priv = NULL;
+ char key[QUOTA_KEY_MAX] = {0, };
+
+ priv = this->private;
+
+ if (dict == NULL || priv->version <= 0) {
+ ret = 0;
+ goto out;
+ }
+
+ for (i = 0; mq_ext_xattrs[i]; i++) {
+ if (dict_get (dict, mq_ext_xattrs[i])) {
+ GET_QUOTA_KEY (this, key, mq_ext_xattrs[i], ret);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_set (dict, key,
+ dict_get (dict, mq_ext_xattrs[i]));
+ if (ret < 0)
+ goto out;
+
+ dict_del (dict, mq_ext_xattrs[i]);
+ }
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int
+marker_key_set_ver (xlator_t *this, dict_t *dict)
+{
+ int ret = -1;
+ int i = -1;
+ marker_conf_t *priv = NULL;
+ char key[QUOTA_KEY_MAX] = {0, };
+
+ priv = this->private;
+
+ if (dict == NULL || priv->version <= 0) {
+ ret = 0;
+ goto out;
+ }
+
+ for (i = 0; mq_ext_xattrs[i]; i++) {
+ GET_QUOTA_KEY (this, key, mq_ext_xattrs[i], ret);
+ if (ret < 0)
+ goto out;
+
+ if (dict_get (dict, key))
+ dict_set (dict, mq_ext_xattrs[i], dict_get (dict, key));
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+marker_local_t *
+marker_local_ref (marker_local_t *local)
+{
+ GF_VALIDATE_OR_GOTO ("marker", local, err);
+
+ LOCK (&local->lock);
+ {
+ local->ref++;
+ }
+ UNLOCK (&local->lock);
+
+ return local;
+err:
+ return NULL;
+}
+
+int
+marker_loc_fill (loc_t *loc, inode_t *inode, inode_t *parent, char *path)
+{
+ int ret = -1;
+
+ if (!loc)
+ return ret;
+
+ if (inode) {
+ loc->inode = inode_ref (inode);
+ if (gf_uuid_is_null (loc->gfid)) {
+ gf_uuid_copy (loc->gfid, loc->inode->gfid);
+ }
+ }
+
+ if (parent)
+ loc->parent = inode_ref (parent);
+
+ if (path) {
+ loc->path = gf_strdup (path);
+ if (!loc->path) {
+ gf_log ("loc fill", GF_LOG_ERROR, "strdup failed");
+ goto loc_wipe;
+ }
+
+ loc->name = strrchr (loc->path, '/');
+ if (loc->name)
+ loc->name++;
+ }
+
+ ret = 0;
+loc_wipe:
+ if (ret < 0)
+ loc_wipe (loc);
+
+ return ret;
+}
+
+int
+_marker_inode_loc_fill (inode_t *inode, inode_t *parent, char *name, loc_t *loc)
+{
+ char *resolvedpath = NULL;
+ int ret = -1;
+ gf_boolean_t free_parent = _gf_false;
+
+ if ((!inode) || (!loc))
+ return ret;
+
+ if (parent && name)
+ ret = inode_path (parent, name, &resolvedpath);
+ else
+ ret = inode_path (inode, NULL, &resolvedpath);
+ if (ret < 0)
+ goto err;
+
+ if (parent == NULL) {
+ parent = inode_parent (inode, NULL, NULL);
+ free_parent = _gf_true;
+ }
+
+ ret = marker_loc_fill (loc, inode, parent, resolvedpath);
+ if (ret < 0)
+ goto err;
+
+err:
+ if (free_parent)
+ inode_unref (parent);
+
+ GF_FREE (resolvedpath);
+
+ return ret;
+}
+
+int
+marker_inode_loc_fill (inode_t *inode, loc_t *loc)
+{
+ return _marker_inode_loc_fill (inode, NULL, NULL, loc);
+}
+
+int32_t
+marker_trav_parent (marker_local_t *local)
+{
+ int32_t ret = 0;
+ loc_t loc = {0, };
+ inode_t *parent = NULL;
+ int8_t need_unref = 0;
+
+ if (!local->loc.parent) {
+ parent = inode_parent (local->loc.inode, NULL, NULL);
+ if (parent)
+ need_unref = 1;
+ } else
+ parent = local->loc.parent;
+
+ ret = marker_inode_loc_fill (parent, &loc);
+
+ if (ret < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ loc_wipe (&local->loc);
+
+ local->loc = loc;
+out:
+ if (need_unref)
+ inode_unref (parent);
+
+ return ret;
+}
+
+int32_t
+marker_error_handler (xlator_t *this, marker_local_t *local, int32_t op_errno)
+{
+ marker_conf_t *priv = NULL;
+ const char *path = NULL;
+
+ priv = (marker_conf_t *) this->private;
+ path = local
+ ? (local->loc.path
+ ? local->loc.path : uuid_utoa(local->loc.gfid))
+ : "<nul>";
+
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Indexing gone corrupt at %s (reason: %s)."
+ " Geo-replication slave content needs to be revalidated",
+ path, strerror (op_errno));
+ sys_unlink (priv->timestamp_file);
+
+ return 0;
+}
+
+int32_t
+marker_local_unref (marker_local_t *local)
+{
+ int32_t var = 0;
+
+ if (local == NULL)
+ return -1;
+
+ LOCK (&local->lock);
+ {
+ var = --local->ref;
+ }
+ UNLOCK (&local->lock);
+
+ if (var != 0)
+ goto out;
+
+ loc_wipe (&local->loc);
+ loc_wipe (&local->parent_loc);
+ if (local->xdata)
+ dict_unref (local->xdata);
+
+ if (local->lk_frame) {
+ STACK_DESTROY (local->lk_frame->root);
+ local->lk_frame = NULL;
+ }
+
+ if (local->oplocal) {
+ marker_local_unref (local->oplocal);
+ local->oplocal = NULL;
+ }
+ mem_put (local);
+out:
+ return 0;
+}
+
+int32_t
+stat_stampfile (xlator_t *this, marker_conf_t *priv,
+ struct volume_mark **status)
+{
+ struct stat buf = {0, };
+ struct volume_mark *vol_mark = NULL;
+
+ vol_mark = GF_CALLOC (sizeof (struct volume_mark), 1,
+ gf_marker_mt_volume_mark);
+
+ vol_mark->major = 1;
+ vol_mark->minor = 0;
+
+ GF_ASSERT (sizeof (priv->volume_uuid_bin) == 16);
+ memcpy (vol_mark->uuid, priv->volume_uuid_bin, 16);
+
+ if (sys_stat (priv->timestamp_file, &buf) != -1) {
+ vol_mark->retval = 0;
+ vol_mark->sec = htonl (buf.st_mtime);
+ vol_mark->usec = htonl (ST_MTIM_NSEC (&buf)/1000);
+ } else
+ vol_mark->retval = 1;
+
+ *status = vol_mark;
+
+ return 0;
+}
+
+int32_t
+marker_getxattr_stampfile_cbk (call_frame_t *frame, xlator_t *this,
+ const char *name, struct volume_mark *vol_mark,
+ dict_t *xdata)
+{
+ int32_t ret = -1;
+ dict_t *dict = NULL;
+
+ if (vol_mark == NULL){
+ STACK_UNWIND_STRICT (getxattr, frame, -1, ENOMEM, NULL, NULL);
+
+ goto out;
+ }
+
+ dict = dict_new ();
+
+ ret = dict_set_bin (dict, (char *)name, vol_mark,
+ sizeof (struct volume_mark));
+ if (ret) {
+ GF_FREE (vol_mark);
+ gf_log (this->name, GF_LOG_WARNING, "failed to set key %s",
+ name);
+ }
+
+ STACK_UNWIND_STRICT (getxattr, frame, 0, 0, dict, xdata);
+
+ if (dict)
+ dict_unref (dict);
+out:
+ return 0;
+}
+
+gf_boolean_t
+call_from_special_client (call_frame_t *frame, xlator_t *this, const char *name)
+{
+ struct volume_mark *vol_mark = NULL;
+ marker_conf_t *priv = NULL;
+ gf_boolean_t is_true = _gf_true;
+
+ priv = (marker_conf_t *)this->private;
+
+ if (frame->root->pid != GF_CLIENT_PID_GSYNCD || name == NULL ||
+ strcmp (name, MARKER_XATTR_PREFIX "." VOLUME_MARK) != 0) {
+ is_true = _gf_false;
+ goto out;
+ }
+
+ stat_stampfile (this, priv, &vol_mark);
+
+ marker_getxattr_stampfile_cbk (frame, this, name, vol_mark, NULL);
+out:
+ return is_true;
+}
+
+static gf_boolean_t
+_is_quota_internal_xattr (dict_t *d, char *k, data_t *v, void *data)
+{
+ int i = 0;
+ char **external_xattrs = data;
+
+ for (i = 0; external_xattrs && external_xattrs[i]; i++) {
+ if (strcmp (k, external_xattrs[i]) == 0)
+ return _gf_false;
+ }
+
+ if (fnmatch ("trusted.glusterfs.quota*", k, 0) == 0)
+ return _gf_true;
+
+ /* It would be nice if posix filters pgfid xattrs. But since marker
+ * also takes up responsibility to clean these up, adding the filtering
+ * here (Check 'quota_xattr_cleaner')
+ */
+ if (fnmatch (PGFID_XATTR_KEY_PREFIX"*", k, 0) == 0)
+ return _gf_true;
+
+ return _gf_false;
+}
+
+static void
+marker_filter_internal_xattrs (xlator_t *this, dict_t *xattrs)
+{
+ marker_conf_t *priv = NULL;
+ char **ext = NULL;
+
+ priv = this->private;
+ if (priv->feature_enabled & GF_QUOTA)
+ ext = mq_ext_xattrs;
+
+ dict_foreach_match (xattrs, _is_quota_internal_xattr, ext,
+ dict_remove_foreach_fn, NULL);
+}
+
+static void
+marker_filter_gsyncd_xattrs (call_frame_t *frame,
+ xlator_t *this, dict_t *xattrs)
+{
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (frame);
+
+ if (xattrs && frame->root->pid != GF_CLIENT_PID_GSYNCD) {
+ GF_REMOVE_INTERNAL_XATTR (GF_XATTR_XTIME_PATTERN, xattrs);
+ }
+ return;
+}
+
+int32_t
+marker_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ int32_t ret = -1;
+ if (op_ret < 0)
+ goto unwind;
+
+ ret = marker_key_set_ver (this, dict);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ if (cookie) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Filtering the quota extended attributes");
+
+ /* If the getxattr is from a non special client, then do not
+ copy the quota related xattrs (except the quota limit key
+ i.e trusted.glusterfs.quota.limit-set which has been set by
+ glusterd on the directory on which quota limit is set.) for
+ directories. Let the healing of xattrs happen upon lookup.
+ NOTE: setting of trusted.glusterfs.quota.limit-set as of now
+ happens from glusterd. It should be moved to quotad. Also
+ trusted.glusterfs.quota.limit-set is set on directory which
+ is permanent till quota is removed on that directory or limit
+ is changed. So let that xattr be healed by other xlators
+ properly whenever directory healing is done.
+ */
+ /*
+ * Except limit-set xattr, rest of the xattrs are maintained
+ * by quota xlator. Don't expose them to other xlators.
+ * This filter makes sure quota xattrs are not healed as part of
+ * metadata self-heal
+ */
+ marker_filter_internal_xattrs (frame->this, dict);
+ }
+
+ /* Filter gsyncd xtime xattr for non gsyncd clients */
+ marker_filter_gsyncd_xattrs (frame, frame->this, dict);
+
+unwind:
+ MARKER_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+int32_t
+marker_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ gf_boolean_t is_true = _gf_false;
+ marker_conf_t *priv = NULL;
+ unsigned long cookie = 0;
+ marker_local_t *local = NULL;
+ char key[QUOTA_KEY_MAX] = {0, };
+ int32_t ret = -1;
+ int32_t i = 0;
+
+ priv = this->private;
+
+ if (name) {
+ for (i = 0; mq_ext_xattrs[i]; i++) {
+ if (strcmp (name, mq_ext_xattrs[i]))
+ continue;
+
+ GET_QUOTA_KEY (this, key, mq_ext_xattrs[i], ret);
+ if (ret < 0)
+ goto out;
+ name = key;
+ break;
+ }
+ }
+
+ frame->local = mem_get0 (this->local_pool);
+ local = frame->local;
+ if (local == NULL)
+ goto out;
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ if ((loc_copy (&local->loc, loc)) < 0)
+ goto out;
+
+ gf_log (this->name, GF_LOG_DEBUG, "USER:PID = %d", frame->root->pid);
+
+ if (priv && priv->feature_enabled & GF_XTIME)
+ is_true = call_from_special_client (frame, this, name);
+
+ if (is_true == _gf_false) {
+ if (name == NULL) {
+ /* Signifies that marker translator
+ * has to filter the quota's xattr's,
+ * this is to prevent afr from performing
+ * self healing on marker-quota xattrs'
+ */
+ cookie = 1;
+ }
+ STACK_WIND_COOKIE (frame, marker_getxattr_cbk,
+ (void *)cookie,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr,
+ loc, name, xdata);
+ }
+
+ return 0;
+out:
+ MARKER_STACK_UNWIND (getxattr, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+}
+
+int32_t
+marker_setxattr_done (call_frame_t *frame)
+{
+ marker_local_t *local = NULL;
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+
+ STACK_DESTROY (frame->root);
+
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int
+marker_specific_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ int32_t ret = 0;
+ int32_t done = 0;
+ marker_local_t *local = NULL;
+
+ local = (marker_local_t*) frame->local;
+
+ if (op_ret == -1 && op_errno == ENOSPC) {
+ marker_error_handler (this, local, op_errno);
+ done = 1;
+ goto out;
+ }
+
+ if (local) {
+ if (local->loc.path && strcmp (local->loc.path, "/") == 0) {
+ done = 1;
+ goto out;
+ }
+ if (__is_root_gfid (local->loc.gfid)) {
+ done = 1;
+ goto out;
+ }
+ }
+
+ ret = (local) ? marker_trav_parent (local) : -1;
+
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG, "Error occurred "
+ "while traversing to the parent, stopping marker");
+
+ done = 1;
+
+ goto out;
+ }
+
+ marker_start_setxattr (frame, this);
+
+out:
+ if (done) {
+ marker_setxattr_done (frame);
+ }
+
+ return 0;
+}
+
+int32_t
+marker_start_setxattr (call_frame_t *frame, xlator_t *this)
+{
+ int32_t ret = -1;
+ dict_t *dict = NULL;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ local = (marker_local_t*) frame->local;
+
+ if (!local)
+ goto out;
+
+ dict = dict_new ();
+
+ if (!dict)
+ goto out;
+
+ if (local->loc.inode && gf_uuid_is_null (local->loc.gfid))
+ gf_uuid_copy (local->loc.gfid, local->loc.inode->gfid);
+
+ GF_UUID_ASSERT (local->loc.gfid);
+
+ ret = dict_set_static_bin (dict, priv->marker_xattr,
+ (void *)local->timebuf, 8);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set marker xattr (%s)", local->loc.path);
+ goto out;
+ }
+
+ STACK_WIND (frame, marker_specific_setxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, &local->loc, dict, 0,
+ NULL);
+
+ ret = 0;
+out:
+ if (dict)
+ dict_unref (dict);
+
+ return ret;
+}
+
+void
+marker_gettimeofday (marker_local_t *local)
+{
+ struct timeval tv = {0, };
+
+ gettimeofday (&tv, NULL);
+
+ local->timebuf [0] = htonl (tv.tv_sec);
+ local->timebuf [1] = htonl (tv.tv_usec);
+
+ return;
+}
+
+int32_t
+marker_create_frame (xlator_t *this, marker_local_t *local)
+{
+ call_frame_t *frame = NULL;
+
+ frame = create_frame (this, this->ctx->pool);
+
+ if (!frame)
+ return -1;
+
+ frame->local = (void *) local;
+
+ marker_start_setxattr (frame, this);
+
+ return 0;
+}
+
+int32_t
+marker_xtime_update_marks (xlator_t *this, marker_local_t *local)
+{
+ marker_conf_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("marker", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, local, out);
+
+ priv = this->private;
+
+ if ((local->pid == GF_CLIENT_PID_GSYNCD
+ && !(priv->feature_enabled & GF_XTIME_GSYNC_FORCE))
+ || (local->pid == GF_CLIENT_PID_DEFRAG))
+ goto out;
+
+ marker_gettimeofday (local);
+
+ marker_local_ref (local);
+
+ marker_create_frame (this, local);
+out:
+ return 0;
+}
+
+
+int32_t
+marker_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ marker_conf_t *priv = NULL;
+ marker_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE, "error occurred "
+ "while creating directory %s", strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+ priv = this->private;
+
+ if (op_ret >= 0 && inode && (priv->feature_enabled & GF_QUOTA)) {
+ ctx = mq_inode_ctx_new (inode, this);
+ if (ctx == NULL) {
+ gf_log (this->name, GF_LOG_WARNING, "mq_inode_ctx_new "
+ "failed for %s", uuid_utoa (inode->gfid));
+ op_ret = -1;
+ op_errno = ENOMEM;
+ }
+ }
+
+ STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode,
+ buf, preparent, postparent, xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ if (gf_uuid_is_null (local->loc.gfid))
+ gf_uuid_copy (local->loc.gfid, buf->ia_gfid);
+
+ if (priv->feature_enabled & GF_QUOTA)
+ mq_create_xattrs_txn (this, &local->loc, NULL);
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int
+marker_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = loc_copy (&local->loc, loc);
+
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND (frame, marker_mkdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
+
+ return 0;
+err:
+ MARKER_STACK_UNWIND (mkdir, frame, -1, ENOMEM, NULL,
+ NULL, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+marker_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE, "error occurred "
+ "while creating file %s", strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+ priv = this->private;
+
+ if (op_ret >= 0 && inode && (priv->feature_enabled & GF_QUOTA)) {
+ ctx = mq_inode_ctx_new (inode, this);
+ if (ctx == NULL) {
+ gf_log (this->name, GF_LOG_WARNING, "mq_inode_ctx_new "
+ "failed for %s", uuid_utoa (inode->gfid));
+ op_ret = -1;
+ op_errno = ENOMEM;
+ }
+ }
+
+ STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ if (gf_uuid_is_null (local->loc.gfid))
+ gf_uuid_copy (local->loc.gfid, buf->ia_gfid);
+
+ if (priv->feature_enabled & GF_QUOTA)
+ mq_create_xattrs_txn (this, &local->loc, buf);
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int32_t
+marker_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = loc_copy (&local->loc, loc);
+
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND (frame, marker_create_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask,
+ fd, xdata);
+ return 0;
+err:
+ MARKER_STACK_UNWIND (create, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
+ NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+marker_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ marker_conf_t *priv = NULL;
+ marker_local_t *local = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE, "error occurred "
+ "while write, %s", strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ priv = this->private;
+
+ if (priv->feature_enabled & GF_QUOTA)
+ mq_initiate_quota_txn (this, &local->loc, postbuf);
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int32_t
+marker_writev (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iovec *vector,
+ int32_t count,
+ off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = marker_inode_loc_fill (fd->inode, &local->loc);
+
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND (frame, marker_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,
+ flags, iobref, xdata);
+ return 0;
+err:
+ MARKER_STACK_UNWIND (writev, frame, -1, ENOMEM, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+marker_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ marker_conf_t *priv = NULL;
+ marker_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE, "error occurred "
+ "rmdir %s", strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+ priv = this->private;
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+
+ if (priv->feature_enabled & GF_QUOTA) {
+ /* If a 'rm -rf' is performed by a client, rmdir can be faster
+ than marker background mq_reduce_parent_size_txn.
+ In this case, as part of rmdir parent child association
+ will be removed in the server protocol.
+ This can lead to mq_reduce_parent_size_txn failures.
+
+ So perform mq_reduce_parent_size_txn in foreground
+ and unwind to server once txn is complete
+ */
+
+ stub = fop_rmdir_cbk_stub (frame, default_rmdir_cbk, op_ret,
+ op_errno, preparent, postparent,
+ xdata);
+ mq_reduce_parent_size_txn (this, &local->loc, NULL, 1, stub);
+
+ if (stub) {
+ marker_local_unref (local);
+ return 0;
+ }
+ }
+
+out:
+ STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, preparent,
+ postparent, xdata);
+
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int32_t
+marker_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = loc_copy (&local->loc, loc);
+
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND (frame, marker_rmdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir, loc, flags, xdata);
+ return 0;
+err:
+ MARKER_STACK_UNWIND (rmdir, frame, -1, ENOMEM, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+marker_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ marker_conf_t *priv = NULL;
+ marker_local_t *local = NULL;
+ uint32_t nlink = -1;
+ GF_UNUSED int32_t ret = 0;
+ call_stub_t *stub = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s occurred in unlink", strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+ priv = this->private;
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+
+ if (priv->feature_enabled & GF_QUOTA) {
+ if (local->skip_txn)
+ goto out;
+
+ if (xdata) {
+ ret = dict_get_uint32 (xdata,
+ GF_RESPONSE_LINK_COUNT_XDATA, &nlink);
+ if (ret) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "dict get failed %s ",
+ strerror (-ret));
+ }
+ }
+
+ /* If a 'rm -rf' is performed by a client, unlink can be faster
+ than marker background mq_reduce_parent_size_txn.
+ In this case, as part of unlink parent child association
+ will be removed in the server protocol.
+ This can lead to mq_reduce_parent_size_txn failures.
+
+ So perform mq_reduce_parent_size_txn in foreground
+ and unwind to server once txn is complete
+ */
+
+ stub = fop_unlink_cbk_stub (frame, default_unlink_cbk, op_ret,
+ op_errno, preparent, postparent,
+ xdata);
+ mq_reduce_parent_size_txn (this, &local->loc, NULL, nlink,
+ stub);
+
+ if (stub) {
+ marker_local_unref (local);
+ return 0;
+ }
+ }
+
+out:
+ STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno, preparent,
+ postparent, xdata);
+
+ marker_local_unref (local);
+
+ return 0;
+}
+
+
+int32_t
+marker_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+ gf_boolean_t dict_free = _gf_false;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto unlink_wind;
+
+ local = mem_get0 (this->local_pool);
+ local->xflag = xflag;
+ if (xdata)
+ local->xdata = dict_ref (xdata);
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = loc_copy (&local->loc, loc);
+
+ if (ret == -1)
+ goto err;
+
+ if (xdata && dict_get (xdata, GLUSTERFS_MARKER_DONT_ACCOUNT_KEY)) {
+ local->skip_txn = 1;
+ goto unlink_wind;
+ }
+
+ if (xdata == NULL) {
+ xdata = dict_new ();
+ dict_free = _gf_true;
+ }
+
+ ret = dict_set_int32 (xdata, GF_REQUEST_LINK_COUNT_XDATA, 1);
+ if (ret < 0)
+ goto err;
+
+unlink_wind:
+ STACK_WIND (frame, marker_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ goto out;
+
+err:
+ MARKER_STACK_UNWIND (unlink, frame, -1, ENOMEM, NULL, NULL, NULL);
+
+out:
+ if (dict_free)
+ dict_unref (xdata);
+ return 0;
+}
+
+
+int32_t
+marker_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE, "%s occurred while "
+ "linking a file ", strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ priv = this->private;
+
+ if (priv->feature_enabled & GF_QUOTA) {
+ if (!local->skip_txn)
+ mq_create_xattrs_txn (this, &local->loc, buf);
+ }
+
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int32_t
+marker_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = loc_copy (&local->loc, newloc);
+
+ if (ret == -1)
+ goto err;
+
+ if (xdata && dict_get (xdata, GLUSTERFS_MARKER_DONT_ACCOUNT_KEY))
+ local->skip_txn = 1;
+wind:
+ STACK_WIND (frame, marker_link_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
+ return 0;
+err:
+ MARKER_STACK_UNWIND (link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
+ NULL);
+
+ return 0;
+}
+
+
+int32_t
+marker_rename_done (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ marker_local_t *local = NULL, *oplocal = NULL;
+ loc_t newloc = {0, };
+ marker_conf_t *priv = NULL;
+
+ local = frame->local;
+ oplocal = local->oplocal;
+
+ priv = this->private;
+
+ frame->local = NULL;
+
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "inodelk (UNLOCK) failed on path:%s (gfid:%s) (%s)",
+ oplocal->parent_loc.path,
+ uuid_utoa (oplocal->parent_loc.inode->gfid),
+ strerror (op_errno));
+ }
+
+ if (local->err != 0)
+ goto err;
+
+ mq_reduce_parent_size_txn (this, &oplocal->loc, &oplocal->contribution,
+ -1, NULL);
+
+ if (local->loc.inode != NULL) {
+ /* If destination file exits before rename, it would have
+ * been unlinked while renaming a file
+ */
+ mq_reduce_parent_size_txn (this, &local->loc, NULL,
+ local->ia_nlink, NULL);
+ }
+
+ newloc.inode = inode_ref (oplocal->loc.inode);
+ newloc.path = gf_strdup (local->loc.path);
+ newloc.name = strrchr (newloc.path, '/');
+ if (newloc.name)
+ newloc.name++;
+ newloc.parent = inode_ref (local->loc.parent);
+
+ mq_create_xattrs_txn (this, &newloc, &local->buf);
+
+ loc_wipe (&newloc);
+
+ if (priv->feature_enabled & GF_XTIME) {
+ //update marks on oldpath
+ gf_uuid_copy (local->loc.gfid, oplocal->loc.inode->gfid);
+ marker_xtime_update_marks (this, oplocal);
+ marker_xtime_update_marks (this, local);
+ }
+
+err:
+ marker_local_unref (local);
+ marker_local_unref (oplocal);
+
+ return 0;
+}
+
+
+void
+marker_rename_release_oldp_lock (marker_local_t *local, xlator_t *this)
+{
+ marker_local_t *oplocal = NULL;
+ call_frame_t *lk_frame = NULL;
+ struct gf_flock lock = {0, };
+
+ oplocal = local->oplocal;
+ lk_frame = local->lk_frame;
+
+ if (lk_frame == NULL)
+ goto err;
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND (lk_frame,
+ marker_rename_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->inodelk,
+ this->name, &oplocal->parent_loc, F_SETLKW, &lock, NULL);
+
+ return;
+
+err:
+ marker_local_unref (local);
+ marker_local_unref (oplocal);
+}
+
+
+int32_t
+marker_rename_unwind (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ marker_local_t *local = NULL;
+ marker_local_t *oplocal = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ inode_contribution_t *contri = NULL;
+
+ local = frame->local;
+ oplocal = local->oplocal;
+ frame->local = NULL;
+
+ //Reset frame uid and gid if set.
+ if (cookie == (void *) _GF_UID_GID_CHANGED)
+ MARKER_RESET_UID_GID (frame, frame->root, local);
+
+ if (op_ret < 0)
+ local->err = op_errno ? op_errno : EINVAL;
+
+ if (local->stub != NULL) {
+ /* Remove contribution node from in-memory even if
+ * remove-xattr has failed as the rename is already performed
+ * if local->stub is set, which means rename was sucessful
+ */
+ mq_inode_ctx_get (oplocal->loc.inode, this, &ctx);
+ if (ctx) {
+ contri = mq_get_contribution_node (oplocal->loc.parent,
+ ctx);
+ if (contri) {
+ QUOTA_FREE_CONTRIBUTION_NODE (ctx, contri);
+ GF_REF_PUT (contri);
+ }
+ }
+
+ call_resume (local->stub);
+ local->stub = NULL;
+ local->err = 0;
+ } else if (local->err != 0) {
+ STACK_UNWIND_STRICT (rename, frame, -1, local->err, NULL, NULL,
+ NULL, NULL, NULL, NULL);
+ } else {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "continuation stub to unwind the call is absent, hence "
+ "call will be hung (call-stack id = %"PRIu64")",
+ frame->root->unique);
+ }
+
+ /* If there are in-progress writes on old-path when during rename
+ * operation, update txn will update the wrong path if lock
+ * is released before rename unwind.
+ * So release lock only after rename unwind
+ */
+ marker_rename_release_oldp_lock (local, this);
+
+ return 0;
+}
+
+
+int32_t
+marker_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ marker_conf_t *priv = NULL;
+ marker_local_t *local = NULL;
+ marker_local_t *oplocal = NULL;
+ call_stub_t *stub = NULL;
+ int32_t ret = 0;
+ char contri_key[QUOTA_KEY_MAX] = {0, };
+ loc_t newloc = {0, };
+
+ local = (marker_local_t *) frame->local;
+
+ if (local != NULL) {
+ oplocal = local->oplocal;
+ }
+
+ priv = this->private;
+
+ if (op_ret < 0) {
+ if (local != NULL) {
+ local->err = op_errno;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE, "%s occurred while "
+ "renaming a file ", strerror (op_errno));
+ }
+
+ if (priv->feature_enabled & GF_QUOTA) {
+ if ((op_ret < 0) || (local == NULL)) {
+ goto quota_err;
+ }
+
+ local->ia_nlink = 0;
+ if (xdata)
+ ret = dict_get_uint32 (xdata,
+ GF_RESPONSE_LINK_COUNT_XDATA,
+ &local->ia_nlink);
+
+ local->buf = *buf;
+ stub = fop_rename_cbk_stub (frame, default_rename_cbk, op_ret,
+ op_errno, buf, preoldparent,
+ postoldparent, prenewparent,
+ postnewparent, xdata);
+ if (stub == NULL) {
+ local->err = ENOMEM;
+ goto quota_err;
+ }
+
+ local->stub = stub;
+
+ GET_CONTRI_KEY (this, contri_key, oplocal->loc.parent->gfid,
+ ret);
+ if (ret < 0) {
+ local->err = ENOMEM;
+ goto quota_err;
+ }
+
+ /* Removexattr requires uid and gid to be 0,
+ * reset them in the callback.
+ */
+ MARKER_SET_UID_GID (frame, local, frame->root);
+
+ newloc.inode = inode_ref (oplocal->loc.inode);
+ newloc.path = gf_strdup (local->loc.path);
+ newloc.name = strrchr (newloc.path, '/');
+ if (newloc.name)
+ newloc.name++;
+ newloc.parent = inode_ref (local->loc.parent);
+ gf_uuid_copy (newloc.gfid, oplocal->loc.inode->gfid);
+
+ STACK_WIND_COOKIE (frame, marker_rename_unwind,
+ frame->cookie, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr,
+ &newloc, contri_key, NULL);
+
+ loc_wipe (&newloc);
+ } else {
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf,
+ preoldparent, postoldparent, prenewparent,
+ postnewparent, xdata);
+
+ if ((op_ret < 0) || (local == NULL)) {
+ goto out;
+ }
+
+ if (priv->feature_enabled & GF_XTIME) {
+ //update marks on oldpath
+ gf_uuid_copy (local->loc.gfid, oplocal->loc.inode->gfid);
+ marker_xtime_update_marks (this, oplocal);
+ marker_xtime_update_marks (this, local);
+ }
+ }
+
+out:
+ if (!(priv->feature_enabled & GF_QUOTA)) {
+ marker_local_unref (local);
+ marker_local_unref (oplocal);
+ }
+
+ return 0;
+
+quota_err:
+ marker_rename_unwind (frame, NULL, this, 0, 0, NULL);
+ return 0;
+}
+
+
+int32_t
+marker_do_rename (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ marker_local_t *local = NULL;
+ marker_local_t *oplocal = NULL;
+ char contri_key[QUOTA_KEY_MAX] = {0, };
+ int32_t ret = 0;
+ quota_meta_t contribution = {0, };
+
+ local = frame->local;
+ oplocal = local->oplocal;
+
+ //Reset frame uid and gid if set.
+ if (cookie == (void *) _GF_UID_GID_CHANGED)
+ MARKER_RESET_UID_GID (frame, frame->root, local);
+
+ if ((op_ret < 0) && (op_errno != ENOATTR) && (op_errno != ENODATA)) {
+ local->err = op_errno ? op_errno : EINVAL;
+ gf_log (this->name, GF_LOG_WARNING,
+ "fetching contribution values from %s (gfid:%s) "
+ "failed (%s)", oplocal->loc.path,
+ uuid_utoa (oplocal->loc.inode->gfid),
+ strerror (op_errno));
+ goto err;
+ }
+
+ GET_CONTRI_KEY (this, contri_key, oplocal->loc.parent->gfid, ret);
+ if (ret < 0) {
+ local->err = errno ? errno : ENOMEM;
+ goto err;
+ }
+ quota_dict_get_meta (dict, contri_key, &contribution);
+ oplocal->contribution = contribution;
+
+ STACK_WIND (frame, marker_rename_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, &oplocal->loc,
+ &local->loc, local->xdata);
+
+ return 0;
+
+err:
+ marker_rename_unwind (frame, NULL, this, 0, 0, NULL);
+ return 0;
+}
+
+int32_t
+marker_get_oldpath_contribution (call_frame_t *lk_frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ call_frame_t *frame = NULL;
+ marker_local_t *local = NULL;
+ marker_local_t *oplocal = NULL;
+ char contri_key[QUOTA_KEY_MAX] = {0, };
+ int32_t ret = 0;
+
+ local = lk_frame->local;
+ oplocal = local->oplocal;
+ frame = local->frame;
+
+ if (op_ret < 0) {
+ local->err = op_errno ? op_errno : EINVAL;
+ gf_log (this->name, GF_LOG_WARNING,
+ "cannot hold inodelk on %s (gfid:%s) (%s)",
+ oplocal->loc.path, uuid_utoa (oplocal->loc.inode->gfid),
+ strerror (op_errno));
+ goto err;
+
+ STACK_DESTROY (local->lk_frame->root);
+ local->lk_frame = NULL;
+ }
+
+ GET_CONTRI_KEY (this, contri_key, oplocal->loc.parent->gfid, ret);
+ if (ret < 0) {
+ local->err = errno ? errno : ENOMEM;
+ goto err;
+ }
+
+ /* getxattr requires uid and gid to be 0,
+ * reset them in the callback.
+ */
+ MARKER_SET_UID_GID (frame, local, frame->root);
+
+ if (gf_uuid_is_null (oplocal->loc.gfid))
+ gf_uuid_copy (oplocal->loc.gfid,
+ oplocal->loc.inode->gfid);
+
+ GF_UUID_ASSERT (oplocal->loc.gfid);
+
+ STACK_WIND_COOKIE (frame, marker_do_rename,
+ frame->cookie, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr,
+ &oplocal->loc, contri_key, NULL);
+
+ return 0;
+err:
+ marker_rename_unwind (frame, NULL, this, 0, 0, NULL);
+ return 0;
+}
+
+
+/* For a marker_rename FOP, following is the algorithm used for Quota
+ * accounting. The use-case considered is:
+ * 1. rename (src, dst)
+ * 2. both src and dst exist
+ * 3. there are parallel operations on src and dst (lets say through fds
+ * opened on them before rename was initiated).
+ *
+ * PS: We've not thought through whether this algo works in the presence of
+ * hardlinks to src and/or dst.
+ *
+ * Algorithm:
+ * ==========
+ *
+ * 1) set inodelk on src-parent
+ * As part of rename operation, parent can change for the file.
+ * We need to remove contribution (both on disk xattr and in-memory one)
+ * to src-parent (and its ancestors) and add the contribution to dst-parent
+ * (and its ancestors). While we are doing these operations, contribution of
+ * the file/directory shouldn't be changing as we want to be sure that
+ * a) what we subtract from src-parent is exactly what we add to dst-parent
+ * b) we should subtract from src-parent exactly what we contributed to
+ * src-parent
+ * So, We hold a lock on src-parent to block any parallel transcations on
+ * src-inode (since thats the one which survives rename).
+ *
+ * If there are any parallel transactions on dst-inode they keep succeeding
+ * till the association of dst-inode with dst-parent is broken because of an
+ * inode_rename after unwind of rename fop from marker. Only after unwind
+ * (and hence inode_rename), we delete and subtract the contribution of
+ * dst-inode to dst-parent. That way we are making sure we subtract exactly
+ * what dst-inode contributed to dst-parent.
+ *
+ * 2) lookup contribution to src-parent on src-inode.
+ * We need to save the contribution info for use at step-8.
+ *
+ * 3) wind rename
+ * Perform rename on disk
+ *
+ * 4) remove xattr on src-loc
+ * After rename, parent can change, so
+ * need to remove xattrs storing contribution to src-parent.
+ *
+ * 5) remove contribution node corresponding to src-parent from the in-memory
+ * list.
+ * After rename, contri gfid can change and we have
+ * also removed xattr from file.
+ * We need to remove in-memory contribution node to prevent updations to
+ * src-parent even after a successful rename
+ *
+ * 6) unwind rename
+ * This will ensure that rename is done in the server
+ * inode table. An inode_rename disassociates src-inode from src-parent and
+ * associates it with dst-parent. It also disassociates dst-inode from
+ * dst-parent. After inode_rename, inode_parent on src-inode will give
+ * dst-parent and inode_parent on dst-inode will return NULL (assuming
+ * dst-inode doesn't have any hardlinks).
+ *
+ * 7) release inodelk on src-parent
+ * Lock on src-parent should be released only after
+ * rename on disk, remove xattr and rename_unwind (and hence inode_rename)
+ * operations. If lock is released before inode_rename, a parallel
+ * transaction on src-inode can still update src-parent (as inode_parent on
+ * src-inode can still return src-parent). This would make the
+ * contribution from src-inode to src-parent stored in step-2 stale.
+ *
+ * 8) Initiate mq_reduce_parent_size_txn on src-parent to remove contribution
+ * of src-inode to src-parent. We use the contribution stored in step-2.
+ * Since, we had acquired the lock on src-parent all along step-2 through
+ * inode_rename, we can be sure that a parallel transaction wouldn't have
+ * added a delta to src-parent.
+ *
+ * 9) Initiate mq_reduce_parent_size_txn on dst-parent if dst-inode exists.
+ * The size reduced from dst-parent and its ancestors is the
+ * size stored as contribution to dst-parent in dst-inode.
+ * If the destination file had existed, rename will unlink the
+ * destination file as part of its operation.
+ * We need to reduce the size on the dest parent similarly to
+ * unlink. Since, we are initiating reduce-parent-size transaction after
+ * inode_rename, we can be sure that a parallel transaction wouldn't add
+ * delta to dst-parent while we are reducing the contribution of dst-inode
+ * from its ancestors before rename.
+ *
+ * 10) create contribution xattr to dst-parent on src-inode.
+ */
+int32_t
+marker_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_local_t *oplocal = NULL;
+ marker_conf_t *priv = NULL;
+ struct gf_flock lock = {0, };
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto rename_wind;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ oplocal = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, oplocal);
+
+ frame->local = local;
+
+ local->oplocal = marker_local_ref (oplocal);
+
+ ret = loc_copy (&local->loc, newloc);
+ if (ret < 0)
+ goto err;
+
+ ret = loc_copy (&oplocal->loc, oldloc);
+ if (ret < 0)
+ goto err;
+
+ if (!(priv->feature_enabled & GF_QUOTA)) {
+ goto rename_wind;
+ }
+
+ ret = mq_inode_loc_fill (NULL, newloc->parent, &local->parent_loc);
+ if (ret < 0)
+ goto err;
+
+ ret = mq_inode_loc_fill (NULL, oldloc->parent, &oplocal->parent_loc);
+ if (ret < 0)
+ goto err;
+
+ lock.l_len = 0;
+ lock.l_start = 0;
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+
+ local->xdata = xdata ? dict_ref (xdata) : dict_new ();
+ ret = dict_set_int32 (local->xdata, GF_REQUEST_LINK_COUNT_XDATA, 1);
+ if (ret < 0)
+ goto err;
+
+ local->frame = frame;
+ local->lk_frame = create_frame (this, this->ctx->pool);
+ if (local->lk_frame == NULL)
+ goto err;
+
+ local->lk_frame->root->uid = 0;
+ local->lk_frame->root->gid = 0;
+ local->lk_frame->local = local;
+ set_lk_owner_from_ptr (&local->lk_frame->root->lk_owner,
+ local->lk_frame->root);
+
+ STACK_WIND (local->lk_frame,
+ marker_get_oldpath_contribution,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->inodelk,
+ this->name, &oplocal->parent_loc,
+ F_SETLKW, &lock, NULL);
+
+ return 0;
+
+rename_wind:
+ STACK_WIND (frame, marker_rename_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
+
+ return 0;
+err:
+ MARKER_STACK_UNWIND (rename, frame, -1, ENOMEM, NULL,
+ NULL, NULL, NULL, NULL, NULL);
+ marker_local_unref (oplocal);
+
+ return 0;
+}
+
+
+int32_t
+marker_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE, "%s occurred while "
+ "truncating a file ", strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ priv = this->private;
+
+ if (priv->feature_enabled & GF_QUOTA) {
+ /* DHT Rebalance process, at the end of migration will
+ * first make the src file as a linkto file and then
+ * truncate the file. By doing a truncate after making the
+ * src file as linkto file, the contri which is already
+ * accounted is left over.
+ * So, we need to account for the linkto file when a truncate
+ * happens, thereby updating the contri properly.
+ * By passing NULL for postbuf, mq_prevalidate does not check
+ * for linkto file.
+ * Same happens with ftruncate as well.
+ */
+ if (postbuf && IS_DHT_LINKFILE_MODE (postbuf))
+ mq_initiate_quota_txn (this, &local->loc, NULL);
+ else
+ mq_initiate_quota_txn (this, &local->loc, postbuf);
+ }
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int32_t
+marker_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = loc_copy (&local->loc, loc);
+
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND (frame, marker_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ return 0;
+err:
+ MARKER_STACK_UNWIND (truncate, frame, -1, ENOMEM, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+marker_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE, "%s occurred while "
+ "truncating a file ", strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ priv = this->private;
+
+ if (priv->feature_enabled & GF_QUOTA) {
+ if (postbuf && IS_DHT_LINKFILE_MODE (postbuf))
+ mq_initiate_quota_txn (this, &local->loc, NULL);
+ else
+ mq_initiate_quota_txn (this, &local->loc, postbuf);
+ }
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int32_t
+marker_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = marker_inode_loc_fill (fd->inode, &local->loc);
+
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND (frame, marker_ftruncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+ return 0;
+err:
+ MARKER_STACK_UNWIND (ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+marker_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ marker_conf_t *priv = NULL;
+ marker_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE, "%s occurred while "
+ "creating symlinks ", strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+ priv = this->private;
+
+ if (op_ret >= 0 && inode && (priv->feature_enabled & GF_QUOTA)) {
+ ctx = mq_inode_ctx_new (inode, this);
+ if (ctx == NULL) {
+ gf_log (this->name, GF_LOG_WARNING, "mq_inode_ctx_new "
+ "failed for %s", uuid_utoa (inode->gfid));
+ op_ret = -1;
+ op_errno = ENOMEM;
+ }
+ }
+
+ STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ if (gf_uuid_is_null (local->loc.gfid))
+ gf_uuid_copy (local->loc.gfid, buf->ia_gfid);
+
+ if (priv->feature_enabled & GF_QUOTA) {
+ mq_create_xattrs_txn (this, &local->loc, buf);
+ }
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int
+marker_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = loc_copy (&local->loc, loc);
+
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND (frame, marker_symlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink, linkpath, loc, umask,
+ xdata);
+ return 0;
+err:
+ MARKER_STACK_UNWIND (symlink, frame, -1, ENOMEM, NULL,
+ NULL, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+marker_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE, "%s occurred with "
+ "mknod ", strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+ priv = this->private;
+
+ if (op_ret >= 0 && inode && (priv->feature_enabled & GF_QUOTA)) {
+ ctx = mq_inode_ctx_new (inode, this);
+ if (ctx == NULL) {
+ gf_log (this->name, GF_LOG_WARNING, "mq_inode_ctx_new "
+ "failed for %s", uuid_utoa (inode->gfid));
+ op_ret = -1;
+ op_errno = ENOMEM;
+ }
+ }
+
+ STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode,
+ buf, preparent, postparent, xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ if (gf_uuid_is_null (local->loc.gfid))
+ gf_uuid_copy (local->loc.gfid, buf->ia_gfid);
+
+ if ((priv->feature_enabled & GF_QUOTA) && (S_ISREG (local->mode))) {
+ mq_create_xattrs_txn (this, &local->loc, buf);
+ }
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int
+marker_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = loc_copy (&local->loc, loc);
+
+ local->mode = mode;
+
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND (frame, marker_mknod_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask,
+ xdata);
+ return 0;
+err:
+ MARKER_STACK_UNWIND (mknod, frame, -1, ENOMEM, NULL,
+ NULL, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+marker_fallocate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE, "%s occurred while "
+ "fallocating a file ", strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (fallocate, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ priv = this->private;
+
+ if (priv->feature_enabled & GF_QUOTA)
+ mq_initiate_quota_txn (this, &local->loc, postbuf);
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int32_t
+marker_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = marker_inode_loc_fill (fd->inode, &local->loc);
+
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND (frame, marker_fallocate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len,
+ xdata);
+ return 0;
+err:
+ MARKER_STACK_UNWIND (fallocate, frame, -1, ENOMEM, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+marker_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE, "%s occurred during discard",
+ strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (discard, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ priv = this->private;
+
+ if (priv->feature_enabled & GF_QUOTA)
+ mq_initiate_quota_txn (this, &local->loc, postbuf);
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int32_t
+marker_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = marker_inode_loc_fill (fd->inode, &local->loc);
+
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND (frame, marker_discard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
+ return 0;
+err:
+ MARKER_STACK_UNWIND (discard, frame, -1, ENOMEM, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+marker_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE, "%s occurred during zerofill",
+ strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (zerofill, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ priv = this->private;
+
+ if (priv->feature_enabled & GF_QUOTA)
+ mq_initiate_quota_txn (this, &local->loc, postbuf);
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int32_t
+marker_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = marker_inode_loc_fill (fd->inode, &local->loc);
+
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND (frame, marker_zerofill_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
+ return 0;
+err:
+ MARKER_STACK_UNWIND (zerofill, frame, -1, ENOMEM, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+/* when a call from the special client is received on
+ * key trusted.glusterfs.volume-mark with value "RESET"
+ * or if the value is 0length, update the change the
+ * access time and modification time via touching the
+ * timestamp file.
+ */
+int32_t
+call_from_sp_client_to_reset_tmfile (call_frame_t *frame,
+ xlator_t *this,
+ dict_t *dict)
+{
+ int32_t fd = 0;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+ data_t *data = NULL;
+ marker_conf_t *priv = NULL;
+
+ if (frame == NULL || this == NULL || dict == NULL)
+ return -1;
+
+ priv = this->private;
+
+ data = dict_get (dict, "trusted.glusterfs.volume-mark");
+ if (data == NULL)
+ return -1;
+
+ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
+ op_ret = -1;
+ op_errno = EPERM;
+
+ goto out;
+ }
+
+ if (data->len == 0 || (data->len == 5 &&
+ memcmp (data->data, "RESET", 5) == 0)) {
+ fd = open (priv->timestamp_file, O_WRONLY|O_TRUNC);
+ if (fd != -1) {
+ /* TODO check whether the O_TRUNC would update the
+ * timestamps on a zero length file on all machies.
+ */
+ sys_close (fd);
+ }
+
+ if (fd != -1 || errno == ENOENT) {
+ op_ret = 0;
+ op_errno = 0;
+ } else {
+ op_ret = -1;
+ op_errno = errno;
+ }
+ } else {
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
+out:
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, NULL);
+
+ return 0;
+}
+
+
+int32_t
+marker_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE, "%s occurred in "
+ "setxattr ", strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ priv = this->private;
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int
+remove_quota_keys (dict_t *dict, char *k, data_t *v, void *data)
+{
+ call_frame_t *frame = data;
+ marker_local_t *local = frame->local;
+ xlator_t *this = frame->this;
+ marker_conf_t *priv = NULL;
+ char ver_str[NAME_MAX] = {0,};
+ char *dot = NULL;
+ int ret = -1;
+
+ priv = this->private;
+
+ /* If quota is enabled immediately after disable.
+ * quota healing starts creating new xattrs
+ * before completing the cleanup operation.
+ * So we should check if the xattr is the new.
+ * Do not remove xattr if its xattr
+ * version is same as current version
+ */
+ if ((priv->feature_enabled & GF_QUOTA) && priv->version > 0) {
+ snprintf (ver_str, sizeof (ver_str), ".%d", priv->version);
+ dot = strrchr (k, '.');
+ if (dot && !strcmp(dot, ver_str))
+ return 0;
+ }
+
+ ret = syncop_removexattr (FIRST_CHILD (this), &local->loc, k, 0, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Failed to remove "
+ "extended attribute: %s", local->loc.path, k);
+ return -1;
+ }
+ return 0;
+}
+
+int
+quota_xattr_cleaner_cbk (int ret, call_frame_t *frame, void *args)
+{
+ dict_t *xdata = args;
+ int op_ret = -1;
+ int op_errno = 0;
+
+ op_ret = (ret < 0)? -1: 0;
+ op_errno = -ret;
+
+ MARKER_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
+ return ret;
+}
+
+int
+quota_xattr_cleaner (void *args)
+{
+ struct synctask *task = NULL;
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ marker_local_t *local = NULL;
+ dict_t *xdata = NULL;
+ int ret = -1;
+
+ task = synctask_get ();
+ if (!task)
+ goto out;
+
+ frame = task->frame;
+ this = frame->this;
+ local = frame->local;
+
+ ret = syncop_listxattr (FIRST_CHILD(this), &local->loc, &xdata, NULL,
+ NULL);
+ if (ret == -1) {
+ ret = -errno;
+ goto out;
+ }
+
+ ret = dict_foreach_fnmatch (xdata, "trusted.glusterfs.quota.*",
+ remove_quota_keys, frame);
+ if (ret == -1) {
+ ret = -errno;
+ goto out;
+ }
+ ret = dict_foreach_fnmatch (xdata, PGFID_XATTR_KEY_PREFIX"*",
+ remove_quota_keys, frame);
+ if (ret == -1) {
+ ret = -errno;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (xdata)
+ dict_unref (xdata);
+
+ return ret;
+}
+
+int
+marker_do_xattr_cleanup (call_frame_t *frame, xlator_t *this, dict_t *xdata,
+ loc_t *loc)
+{
+ int ret = -1;
+ marker_local_t *local = NULL;
+
+ local = mem_get0 (this->local_pool);
+ if (!local)
+ goto out;
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ loc_copy (&local->loc, loc);
+ ret = synctask_new (this->ctx->env, quota_xattr_cleaner,
+ quota_xattr_cleaner_cbk, frame, xdata);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to create synctask "
+ "for cleaning up quota extended attributes");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret)
+ MARKER_STACK_UNWIND (setxattr, frame, -1, ENOMEM, xdata);
+
+ return ret;
+}
+
+static gf_boolean_t
+marker_xattr_cleanup_cmd (dict_t *dict)
+{
+ return (dict_get (dict, VIRTUAL_QUOTA_XATTR_CLEANUP_KEY) != NULL);
+}
+
+int32_t
+marker_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+ int op_errno = ENOMEM;
+
+ priv = this->private;
+
+ if (marker_xattr_cleanup_cmd (dict)) {
+ if (frame->root->uid != 0 || frame->root->gid != 0) {
+ op_errno = EPERM;
+ ret = -1;
+ goto err;
+ }
+
+ /* The following function does the cleanup and then unwinds the
+ * corresponding call*/
+ loc_path (loc, NULL);
+ marker_do_xattr_cleanup (frame, this, xdata, loc);
+ return 0;
+ }
+
+ ret = marker_key_replace_with_ver (this, dict);
+ if (ret < 0)
+ goto err;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ ret = call_from_sp_client_to_reset_tmfile (frame, this, dict);
+ if (ret == 0)
+ return 0;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = loc_copy (&local->loc, loc);
+
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND (frame, marker_setxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata);
+ return 0;
+err:
+ MARKER_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int32_t
+marker_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE, "%s occurred in "
+ "fsetxattr", strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ priv = this->private;
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int32_t
+marker_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ ret = call_from_sp_client_to_reset_tmfile (frame, this, dict);
+ if (ret == 0)
+ return 0;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = marker_inode_loc_fill (fd->inode, &local->loc);
+
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND (frame, marker_fsetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
+ return 0;
+err:
+ MARKER_STACK_UNWIND (fsetxattr, frame, -1, ENOMEM, NULL);
+
+ return 0;
+}
+
+
+int32_t
+marker_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE, "%s occurred in "
+ "fsetattr ", strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno, statpre,
+ statpost, xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ priv = this->private;
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+
+int32_t
+marker_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = marker_inode_loc_fill (fd->inode, &local->loc);
+
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND (frame, marker_fsetattr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fsetattr, fd, stbuf, valid, xdata);
+ return 0;
+err:
+ MARKER_STACK_UNWIND (fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+marker_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s occurred during setattr of %s",
+ strerror (op_errno),
+ (local ? local->loc.path : "<nul>"));
+ }
+
+ STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, statpre,
+ statpost, xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ priv = this->private;
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int32_t
+marker_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = loc_copy (&local->loc, loc);
+
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND (frame, marker_setattr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid, xdata);
+ return 0;
+err:
+ MARKER_STACK_UNWIND (setattr, frame, -1, ENOMEM, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+marker_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s occurred while "
+ "removing extended attribute",
+ strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ priv = this->private;
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int32_t
+marker_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ int32_t ret = -1;
+ int32_t i = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+ char key[QUOTA_KEY_MAX] = {0, };
+
+ priv = this->private;
+
+ if (name) {
+ for (i = 0; mq_ext_xattrs[i]; i++) {
+ if (strcmp (name, mq_ext_xattrs[i]))
+ continue;
+
+ GET_QUOTA_KEY (this, key, mq_ext_xattrs[i], ret);
+ if (ret < 0)
+ goto err;
+ name = key;
+ break;
+ }
+ }
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = loc_copy (&local->loc, loc);
+
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND (frame, marker_removexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
+ return 0;
+err:
+ MARKER_STACK_UNWIND (removexattr, frame, -1, ENOMEM, NULL);
+
+ return 0;
+}
+
+static gf_boolean_t
+__has_quota_xattrs (dict_t *xattrs)
+{
+ if (dict_foreach_match (xattrs, _is_quota_internal_xattr, NULL,
+ dict_null_foreach_fn, NULL) > 0)
+ return _gf_true;
+
+ return _gf_false;
+}
+
+int32_t
+marker_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *dict, struct iatt *postparent)
+{
+ marker_conf_t *priv = NULL;
+ marker_local_t *local = NULL;
+ dict_t *xattrs = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ int32_t ret = -1;
+
+ priv = this->private;
+ local = (marker_local_t *) frame->local;
+ frame->local = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE, "lookup failed with %s",
+ strerror (op_errno));
+ goto unwind;
+ }
+
+ ret = marker_key_set_ver (this, dict);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ if (dict && __has_quota_xattrs (dict)) {
+ xattrs = dict_copy_with_ref (dict, NULL);
+ if (!xattrs) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ } else {
+ marker_filter_internal_xattrs (this, xattrs);
+ }
+ } else if (dict) {
+ xattrs = dict_ref (dict);
+ }
+
+ if (op_ret >= 0 && inode && (priv->feature_enabled & GF_QUOTA)) {
+ ctx = mq_inode_ctx_new (inode, this);
+ if (ctx == NULL) {
+ gf_log (this->name, GF_LOG_WARNING, "mq_inode_ctx_new "
+ "failed for %s", uuid_utoa (inode->gfid));
+ op_ret = -1;
+ op_errno = ENOMEM;
+ }
+ }
+
+unwind:
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf,
+ xattrs, postparent);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ /* copy the gfid from the stat structure instead of inode,
+ * since if the lookup is fresh lookup, then the inode
+ * would have not yet linked to the inode table which happens
+ * in protocol/server.
+ */
+ if (gf_uuid_is_null (local->loc.gfid))
+ gf_uuid_copy (local->loc.gfid, buf->ia_gfid);
+
+
+ if (priv->feature_enabled & GF_QUOTA) {
+ mq_xattr_state (this, &local->loc, dict, *buf);
+ }
+
+out:
+ marker_local_unref (local);
+ if (xattrs)
+ dict_unref (xattrs);
+
+ return 0;
+}
+
+int32_t
+marker_lookup (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xattr_req)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ xattr_req = xattr_req ? dict_ref (xattr_req) : dict_new ();
+ if (!xattr_req)
+ goto err;
+
+ ret = marker_key_replace_with_ver (this, xattr_req);
+ if (ret < 0)
+ goto err;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ local = mem_get0 (this->local_pool);
+ if (local == NULL)
+ goto err;
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = loc_copy (&local->loc, loc);
+ if (ret == -1)
+ goto err;
+
+ if ((priv->feature_enabled & GF_QUOTA))
+ mq_req_xattr (this, loc, xattr_req, NULL, NULL);
+
+wind:
+ STACK_WIND (frame, marker_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
+
+ dict_unref (xattr_req);
+
+ return 0;
+err:
+ MARKER_STACK_UNWIND (lookup, frame, -1, ENOMEM, NULL, NULL, NULL, NULL);
+
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ return 0;
+}
+
+
+int
+marker_build_ancestry_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ gf_dirent_t *entry = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ int ret = -1;
+
+ if ((op_ret <= 0) || (entries == NULL)) {
+ goto out;
+ }
+
+ list_for_each_entry (entry, &entries->list, list) {
+ if (entry->inode == NULL)
+ continue;
+
+ ret = marker_key_set_ver (this, entry->dict);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ break;
+ }
+
+ ctx = mq_inode_ctx_new (entry->inode, this);
+ if (ctx == NULL)
+ gf_log (this->name, GF_LOG_WARNING, "mq_inode_ctx_new "
+ "failed for %s",
+ uuid_utoa (entry->inode->gfid));
+ }
+
+out:
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
+ return 0;
+}
+
+int
+marker_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ gf_dirent_t *entry = NULL;
+ marker_conf_t *priv = NULL;
+ marker_local_t *local = NULL;
+ loc_t loc = {0, };
+ int ret = -1;
+ char *resolvedpath = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+
+ if (op_ret <= 0)
+ goto unwind;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (!(priv->feature_enabled & GF_QUOTA) || (local == NULL)) {
+ goto unwind;
+ }
+
+ list_for_each_entry (entry, &entries->list, list) {
+ if ((strcmp (entry->d_name, ".") == 0) ||
+ (strcmp (entry->d_name, "..") == 0) ||
+ entry->inode == NULL)
+ continue;
+
+ loc.parent = inode_ref (local->loc.inode);
+ loc.inode = inode_ref (entry->inode);
+ ret = inode_path (loc.parent, entry->d_name, &resolvedpath);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to get the "
+ "path for the entry %s", entry->d_name);
+ loc_wipe (&loc);
+ continue;
+ }
+
+ loc.path = resolvedpath;
+ resolvedpath = NULL;
+
+ ctx = mq_inode_ctx_new (loc.inode, this);
+ if (ctx == NULL)
+ gf_log (this->name, GF_LOG_WARNING, "mq_inode_ctx_new "
+ "failed for %s", uuid_utoa (loc.inode->gfid));
+
+ mq_xattr_state (this, &loc, entry->dict, entry->d_stat);
+ loc_wipe (&loc);
+
+ ret = marker_key_set_ver (this, entry->dict);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ }
+
+unwind:
+ MARKER_STACK_UNWIND (readdirp, frame, op_ret, op_errno, entries, xdata);
+
+ return 0;
+}
+
+int
+marker_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *dict)
+{
+ marker_conf_t *priv = NULL;
+ loc_t loc = {0, };
+ marker_local_t *local = NULL;
+ int ret = -1;
+
+ priv = this->private;
+
+ dict = dict ? dict_ref(dict) : dict_new();
+ if (!dict)
+ goto unwind;
+
+ ret = marker_key_replace_with_ver (this, dict);
+ if (ret < 0)
+ goto unwind;
+
+ if (dict_get (dict, GET_ANCESTRY_DENTRY_KEY)) {
+ STACK_WIND (frame, marker_build_ancestry_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp,
+ fd, size, offset, dict);
+ } else {
+ if (priv->feature_enabled & GF_QUOTA) {
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ loc.parent = local->loc.inode = inode_ref (fd->inode);
+
+ mq_req_xattr (this, &loc, dict, NULL, NULL);
+ }
+
+ STACK_WIND (frame, marker_readdirp_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp,
+ fd, size, offset, dict);
+ }
+
+ dict_unref (dict);
+ return 0;
+unwind:
+ MARKER_STACK_UNWIND (readdirp, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_marker_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR, "Memory accounting init"
+ " failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+
+int32_t
+init_xtime_priv (xlator_t *this, dict_t *options)
+{
+ data_t *data = NULL;
+ int32_t ret = -1;
+ marker_conf_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("marker", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, options, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+
+ priv = this->private;
+
+ if((data = dict_get (options, VOLUME_UUID)) != NULL) {
+ priv->volume_uuid = data->data;
+
+ ret = gf_uuid_parse (priv->volume_uuid, priv->volume_uuid_bin);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid volume uuid %s", priv->volume_uuid);
+ goto out;
+ }
+
+ ret = gf_asprintf (& (priv->marker_xattr), "%s.%s.%s",
+ MARKER_XATTR_PREFIX, priv->volume_uuid,
+ XTIME);
+
+ if (ret == -1){
+ priv->marker_xattr = NULL;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "volume-uuid = %s", priv->volume_uuid);
+ } else {
+ priv->volume_uuid = NULL;
+
+ gf_log (this->name, GF_LOG_ERROR,
+ "please specify the volume-uuid"
+ "in the translator options");
+
+ return -1;
+ }
+
+ if ((data = dict_get (options, TIMESTAMP_FILE)) != NULL) {
+ priv->timestamp_file = data->data;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "the timestamp-file is = %s",
+ priv->timestamp_file);
+
+ } else {
+ priv->timestamp_file = NULL;
+
+ gf_log (this->name, GF_LOG_ERROR,
+ "please specify the timestamp-file"
+ "in the translator options");
+
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+void
+marker_xtime_priv_cleanup (xlator_t *this)
+{
+ marker_conf_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("marker", this, out);
+
+ priv = (marker_conf_t *) this->private;
+
+ GF_VALIDATE_OR_GOTO (this->name, priv, out);
+
+ GF_FREE (priv->volume_uuid);
+
+ GF_FREE (priv->timestamp_file);
+
+ GF_FREE (priv->marker_xattr);
+out:
+ return;
+}
+
+void
+marker_priv_cleanup (xlator_t *this)
+{
+ marker_conf_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("marker", this, out);
+
+ priv = (marker_conf_t *) this->private;
+
+ GF_VALIDATE_OR_GOTO (this->name, priv, out);
+
+ marker_xtime_priv_cleanup (this);
+
+ LOCK_DESTROY (&priv->lock);
+
+ GF_FREE (priv);
+out:
+ return;
+}
+
+int32_t
+reconfigure (xlator_t *this, dict_t *options)
+{
+ int32_t ret = 0;
+ data_t *data = NULL;
+ gf_boolean_t flag = _gf_false;
+ marker_conf_t *priv = NULL;
+ int32_t version = 0;
+
+ GF_ASSERT (this);
+ GF_ASSERT (this->private);
+
+ priv = this->private;
+
+ priv->feature_enabled = 0;
+
+ GF_VALIDATE_OR_GOTO (this->name, options, out);
+
+ data = dict_get (options, "quota");
+ if (data) {
+ ret = gf_string2boolean (data->data, &flag);
+ if (ret == 0 && flag == _gf_true)
+ priv->feature_enabled |= GF_QUOTA;
+ }
+
+ data = dict_get (options, "inode-quota");
+ if (data) {
+ ret = gf_string2boolean (data->data, &flag);
+ if (ret == 0 && flag == _gf_true)
+ priv->feature_enabled |= GF_INODE_QUOTA;
+ }
+
+ data = dict_get (options, "quota-version");
+ if (data)
+ ret = gf_string2int32 (data->data, &version);
+
+ if (priv->feature_enabled) {
+ if (version >= 0)
+ priv->version = version;
+ else
+ gf_log (this->name, GF_LOG_ERROR, "Invalid quota "
+ "version %d", priv->version);
+ }
+
+ data = dict_get (options, "xtime");
+ if (data) {
+ ret = gf_string2boolean (data->data, &flag);
+ if (ret == 0 && flag == _gf_true) {
+ marker_xtime_priv_cleanup (this);
+
+ ret = init_xtime_priv (this, options);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to initialize xtime private, "
+ "xtime updation will fail");
+ } else {
+ priv->feature_enabled |= GF_XTIME;
+ data = dict_get (options, "gsync-force-xtime");
+ if (!data)
+ goto out;
+ ret = gf_string2boolean (data->data, &flag);
+ if (ret == 0 && flag)
+ priv->feature_enabled |= GF_XTIME_GSYNC_FORCE;
+ }
+ }
+ }
+out:
+ return ret;
+}
+
+
+int32_t
+init (xlator_t *this)
+{
+ dict_t *options = NULL;
+ data_t *data = NULL;
+ int32_t ret = 0;
+ gf_boolean_t flag = _gf_false;
+ marker_conf_t *priv = NULL;
+
+ if (!this->children) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "marker translator needs subvolume defined.");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Volume is dangling.");
+ return -1;
+ }
+
+ options = this->options;
+
+ ALLOCATE_OR_GOTO (this->private, marker_conf_t, err);
+
+ priv = this->private;
+
+ priv->feature_enabled = 0;
+ priv->version = 0;
+
+ LOCK_INIT (&priv->lock);
+
+ data = dict_get (options, "quota");
+ if (data) {
+ ret = gf_string2boolean (data->data, &flag);
+ if (ret == 0 && flag == _gf_true)
+ priv->feature_enabled |= GF_QUOTA;
+ }
+
+ data = dict_get (options, "inode-quota");
+ if (data) {
+ ret = gf_string2boolean (data->data, &flag);
+ if (ret == 0 && flag == _gf_true)
+ priv->feature_enabled |= GF_INODE_QUOTA;
+ }
+
+ data = dict_get (options, "quota-version");
+ if (data)
+ ret = gf_string2int32 (data->data, &priv->version);
+
+ if (priv->feature_enabled && priv->version < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Invalid quota version %d",
+ priv->version);
+ goto err;
+ }
+
+ data = dict_get (options, "xtime");
+ if (data) {
+ ret = gf_string2boolean (data->data, &flag);
+ if (ret == 0 && flag == _gf_true) {
+ ret = init_xtime_priv (this, options);
+ if (ret < 0)
+ goto err;
+
+ priv->feature_enabled |= GF_XTIME;
+ data = dict_get (options, "gsync-force-xtime");
+ if (!data)
+ goto cont;
+ ret = gf_string2boolean (data->data, &flag);
+ if (ret == 0 && flag)
+ priv->feature_enabled |= GF_XTIME_GSYNC_FORCE;
+ }
+ }
+
+ cont:
+ this->local_pool = mem_pool_new (marker_local_t, 128);
+ if (!this->local_pool) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ goto err;
+ }
+
+ return 0;
+err:
+ marker_priv_cleanup (this);
+
+ return -1;
+}
+
+int32_t
+marker_forget (xlator_t *this, inode_t *inode)
+{
+ marker_inode_ctx_t *ctx = NULL;
+ uint64_t value = 0;
+
+ if (inode_ctx_del (inode, this, &value) != 0)
+ goto out;
+
+ ctx = (marker_inode_ctx_t *)(unsigned long)value;
+ if (ctx == NULL) {
+ goto out;
+ }
+
+ mq_forget (this, ctx->quota_ctx);
+
+ GF_FREE (ctx);
+out:
+ return 0;
+}
+
+void
+fini (xlator_t *this)
+{
+ marker_priv_cleanup (this);
+}
+
+struct xlator_fops fops = {
+ .lookup = marker_lookup,
+ .create = marker_create,
+ .mkdir = marker_mkdir,
+ .writev = marker_writev,
+ .truncate = marker_truncate,
+ .ftruncate = marker_ftruncate,
+ .symlink = marker_symlink,
+ .link = marker_link,
+ .unlink = marker_unlink,
+ .rmdir = marker_rmdir,
+ .rename = marker_rename,
+ .mknod = marker_mknod,
+ .setxattr = marker_setxattr,
+ .fsetxattr = marker_fsetxattr,
+ .setattr = marker_setattr,
+ .fsetattr = marker_fsetattr,
+ .removexattr = marker_removexattr,
+ .getxattr = marker_getxattr,
+ .readdirp = marker_readdirp,
+ .fallocate = marker_fallocate,
+ .discard = marker_discard,
+ .zerofill = marker_zerofill,
+};
+
+struct xlator_cbks cbks = {
+ .forget = marker_forget
+};
+
+struct volume_options options[] = {
+ {.key = {"volume-uuid"}},
+ {.key = {"timestamp-file"}},
+ {.key = {"quota"}},
+ {.key = {"inode-quota"} },
+ {.key = {"xtime"}},
+ {.key = {"gsync-force-xtime"}},
+ {.key = {"quota-version"} },
+ {.key = {NULL}}
+};
diff --git a/xlators/features/marker/src/marker.h b/xlators/features/marker/src/marker.h
new file mode 100644
index 00000000000..4726880b82f
--- /dev/null
+++ b/xlators/features/marker/src/marker.h
@@ -0,0 +1,149 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _MARKER_H
+#define _MARKER_H
+
+#include "marker-quota.h"
+#include "xlator.h"
+#include "defaults.h"
+#include "compat-uuid.h"
+#include "call-stub.h"
+
+#define MARKER_XATTR_PREFIX "trusted.glusterfs"
+#define XTIME "xtime"
+#define VOLUME_MARK "volume-mark"
+#define VOLUME_UUID "volume-uuid"
+#define TIMESTAMP_FILE "timestamp-file"
+
+enum {
+ GF_QUOTA = 1,
+ GF_XTIME = 2,
+ GF_XTIME_GSYNC_FORCE = 4,
+ GF_INODE_QUOTA = 8,
+};
+
+/*initialize the local variable*/
+#define MARKER_INIT_LOCAL(_frame,_local) do { \
+ _frame->local = _local; \
+ _local->pid = _frame->root->pid; \
+ memset (&_local->loc, 0, sizeof (loc_t)); \
+ _local->ref = 1; \
+ _local->uid = -1; \
+ _local->gid = -1; \
+ LOCK_INIT (&_local->lock); \
+ _local->oplocal = NULL; \
+ } while (0)
+
+/* try alloc and if it fails, goto label */
+#define ALLOCATE_OR_GOTO(var, type, label) do { \
+ var = GF_CALLOC (sizeof (type), 1, \
+ gf_marker_mt_##type); \
+ if (!var) { \
+ gf_log (this->name, GF_LOG_ERROR, \
+ "out of memory :("); \
+ goto label; \
+ } \
+ } while (0)
+
+#define _MARKER_SET_UID_GID(dest, src) \
+ do { \
+ if (src->uid != -1 && \
+ src->gid != -1) { \
+ dest->uid = src->uid; \
+ dest->gid = src->gid; \
+ } \
+ } while (0)
+
+#define MARKER_SET_UID_GID(frame, dest, src) \
+ do { \
+ _MARKER_SET_UID_GID (dest, src); \
+ frame->root->uid = 0; \
+ frame->root->gid = 0; \
+ frame->cookie = (void *) _GF_UID_GID_CHANGED; \
+ } while (0)
+
+#define MARKER_RESET_UID_GID(frame, dest, src) \
+ do { \
+ _MARKER_SET_UID_GID (dest, src); \
+ frame->cookie = NULL; \
+ } while (0)
+
+#define MARKER_STACK_UNWIND(fop, frame, params...) \
+ do { \
+ quota_local_t *_local = NULL; \
+ if (frame) { \
+ _local = frame->local; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT (fop, frame, params); \
+ if (_local) \
+ marker_local_unref (_local); \
+ } while (0)
+
+struct marker_local{
+ uint32_t timebuf[2];
+ pid_t pid;
+ loc_t loc;
+ loc_t parent_loc;
+ uid_t uid;
+ gid_t gid;
+ int32_t ref;
+ uint32_t ia_nlink;
+ struct iatt buf;
+ gf_lock_t lock;
+ mode_t mode;
+ int32_t err;
+ call_stub_t *stub;
+ call_frame_t *lk_frame;
+ quota_meta_t contribution;
+ struct marker_local *oplocal;
+
+ /* marker quota specific */
+ int64_t delta;
+ int64_t d_off;
+ int64_t sum;
+ int64_t size;
+ int32_t hl_count;
+ int32_t dentry_child_count;
+
+ fd_t *fd;
+ call_frame_t *frame;
+
+ quota_inode_ctx_t *ctx;
+ inode_contribution_t *contri;
+
+ int xflag;
+ dict_t *xdata;
+ gf_boolean_t skip_txn;
+};
+typedef struct marker_local marker_local_t;
+
+#define quota_local_t marker_local_t
+
+struct marker_inode_ctx {
+ struct quota_inode_ctx *quota_ctx;
+};
+typedef struct marker_inode_ctx marker_inode_ctx_t;
+
+struct marker_conf{
+ char feature_enabled;
+ char *size_key;
+ char *dirty_key;
+ char *volume_uuid;
+ uuid_t volume_uuid_bin;
+ char *timestamp_file;
+ char *marker_xattr;
+ uint64_t quota_lk_owner;
+ gf_lock_t lock;
+ int32_t version;
+};
+typedef struct marker_conf marker_conf_t;
+
+#endif
diff --git a/xlators/features/path-convertor/src/Makefile.am b/xlators/features/path-convertor/src/Makefile.am
index 58cfed0f983..7090698687b 100644
--- a/xlators/features/path-convertor/src/Makefile.am
+++ b/xlators/features/path-convertor/src/Makefile.am
@@ -2,13 +2,14 @@
xlator_LTLIBRARIES = path-converter.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/features
-path_converter_la_LDFLAGS = -module -avoidversion
+path_converter_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
path_converter_la_SOURCES = path.c
path_converter_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/features/path-convertor/src/path-mem-types.h b/xlators/features/path-convertor/src/path-mem-types.h
index 99f794679e7..77ada8d537a 100644
--- a/xlators/features/path-convertor/src/path-mem-types.h
+++ b/xlators/features/path-convertor/src/path-mem-types.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __PATH_MEM_TYPES_H__
#define __PATH_MEM_TYPES_H__
diff --git a/xlators/features/path-convertor/src/path.c b/xlators/features/path-convertor/src/path.c
index 0a86baa8785..b0e5d6cc625 100644
--- a/xlators/features/path-convertor/src/path.c
+++ b/xlators/features/path-convertor/src/path.c
@@ -1,29 +1,14 @@
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
/* TODO: add gf_log to all the cases returning errors */
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
/**
* xlators/features/path-translator:
* This translator converts the path it gets into user specified targets.
@@ -52,7 +37,7 @@ static char *
name_this_to_that (xlator_t *xl, const char *path, const char *name)
{
path_private_t *priv = xl->private;
- char priv_path[ZR_PATH_MAX] = {0,};
+ char priv_path[PATH_MAX] = {0,};
char *tmp_name = NULL;
int32_t path_len = strlen (path);
int32_t name_len = strlen (name) - ZR_FILE_CONTENT_STRLEN;
@@ -848,8 +833,7 @@ path_setxattr (call_frame_t *frame,
if (tmp_path != loc_path)
GF_FREE (tmp_path);
- if (tmp_name)
- GF_FREE (tmp_name);
+ GF_FREE (tmp_name);
return 0;
}
@@ -1057,7 +1041,7 @@ path_entrylk (call_frame_t *frame, xlator_t *this,
int32_t
path_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct flock *lock)
+ const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *lock)
{
char *loc_path = (char *)loc->path;
char *tmp_path = NULL;
diff --git a/xlators/protocol/legacy/lib/Makefile.am b/xlators/features/protect/Makefile.am
index d471a3f9243..d471a3f9243 100644
--- a/xlators/protocol/legacy/lib/Makefile.am
+++ b/xlators/features/protect/Makefile.am
diff --git a/xlators/features/protect/src/Makefile.am b/xlators/features/protect/src/Makefile.am
new file mode 100644
index 00000000000..98499712fab
--- /dev/null
+++ b/xlators/features/protect/src/Makefile.am
@@ -0,0 +1,21 @@
+xlator_LTLIBRARIES = prot_dht.la prot_client.la prot_server.la
+
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+prot_dht_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+prot_dht_la_SOURCES = prot_dht.c
+prot_dht_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+prot_client_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+prot_client_la_SOURCES = prot_client.c
+prot_client_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+prot_server_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+prot_server_la_SOURCES = prot_server.c
+prot_server_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(CONTRIBDIR)/libexecinfo
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/protect/src/prot_client.c b/xlators/features/protect/src/prot_client.c
new file mode 100644
index 00000000000..79636410b94
--- /dev/null
+++ b/xlators/features/protect/src/prot_client.c
@@ -0,0 +1,213 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "xlator.h"
+#include "defaults.h"
+
+#ifdef HAVE_BACKTRACE
+#include <execinfo.h>
+#else
+#include "execinfo_compat.h"
+#endif
+
+#define NUM_FRAMES 20
+
+static char PROTECT_KEY[] = "trusted.glusterfs.protect";
+
+enum {
+ PROT_ACT_NONE = 0,
+ PROT_ACT_LOG,
+ PROT_ACT_REJECT,
+};
+
+void
+pcli_print_trace (char *name, call_frame_t *frame)
+{
+ void *frames[NUM_FRAMES];
+ char **symbols;
+ int size;
+ int i;
+
+ gf_log (name, GF_LOG_INFO, "Translator stack:");
+ list_for_each_entry (frame, &frame->root->myframes, frames) {
+ gf_log (name, GF_LOG_INFO, "%s (%s)",
+ frame->wind_from, frame->this->name);
+ }
+
+ size = backtrace (frames, NUM_FRAMES);
+ if (size <= 0) {
+ return;
+ }
+ symbols = backtrace_symbols (frames, size);
+ if (!symbols) {
+ return;
+ }
+
+ gf_log (name, GF_LOG_INFO, "Processor stack:");
+ for (i = 0; i < size; ++i) {
+ gf_log (name, GF_LOG_INFO, "%s", symbols[i]);
+ }
+ free (symbols);
+}
+
+int32_t
+pcli_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ uint64_t value;
+
+ if (newloc->parent == oldloc->parent) {
+ gf_log (this->name, GF_LOG_DEBUG, "rename in same directory");
+ goto simple_unwind;
+ }
+ if (!oldloc->parent) {
+ goto simple_unwind;
+ }
+ if (inode_ctx_get (oldloc->parent, this, &value) != 0) {
+ goto simple_unwind;
+ }
+
+ if (value != PROT_ACT_NONE) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "got rename for protected %s", oldloc->path);
+ pcli_print_trace (this->name, frame);
+ if (value == PROT_ACT_REJECT) {
+ STACK_UNWIND_STRICT (rename, frame, -1, EPERM,
+ NULL, NULL, NULL, NULL, NULL,
+ xdata);
+ return 0;
+ }
+ }
+
+simple_unwind:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, oldloc, newloc,
+ xdata);
+ return 0;
+}
+
+int32_t
+pcli_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ data_t *data;
+ uint64_t value;
+
+ /*
+ * We can't use dict_get_str and strcmp here, because the value comes
+ * directly from the user and might not be NUL-terminated (it would
+ * be if we had set it ourselves.
+ */
+
+ data = dict_get(dict,PROTECT_KEY);
+ if (!data) {
+ goto simple_wind;
+ }
+
+ if (dict->count > 1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "attempted to mix %s with other keys", PROTECT_KEY);
+ goto simple_wind;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "got %s request", PROTECT_KEY);
+ if (!strncmp(data->data,"log",data->len)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "logging removals on %s", loc->path);
+ value = PROT_ACT_LOG;
+ }
+ else if (!strncmp(data->data,"reject",data->len)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "rejecting removals on %s", loc->path);
+ value = PROT_ACT_REJECT;
+ }
+ else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "removing protection on %s", loc->path);
+ value = PROT_ACT_NONE;
+ }
+ /* Right now the value doesn't matter - just the presence. */
+ if (inode_ctx_set(loc->inode,this,&value) != 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set protection status for %s", loc->path);
+ }
+ STACK_UNWIND_STRICT (setxattr, frame, 0, 0, NULL);
+ return 0;
+
+simple_wind:
+ STACK_WIND_TAIL (frame,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr,
+ loc, dict, flags, xdata);
+ return 0;
+}
+
+int32_t
+pcli_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
+{
+ uint64_t value;
+
+ if (!loc->parent || (inode_ctx_get(loc->parent,this,&value) != 0)) {
+ goto simple_unwind;
+ }
+
+ if (value != PROT_ACT_NONE) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "got unlink for protected %s", loc->path);
+ pcli_print_trace(this->name, frame);
+ if (value == PROT_ACT_REJECT) {
+ STACK_UNWIND_STRICT (unlink, frame, -1, EPERM,
+ NULL, NULL, NULL);
+ return 0;
+ }
+ }
+
+simple_unwind:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ return 0;
+}
+
+int32_t
+init (xlator_t *this)
+{
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "translator not configured with exactly one child");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ return 0;
+}
+
+
+void
+fini (xlator_t *this)
+{
+ return;
+}
+
+
+struct xlator_fops fops = {
+ .rename = pcli_rename,
+ .setxattr = pcli_setxattr,
+ .unlink = pcli_unlink,
+};
+
+struct xlator_cbks cbks = {
+};
+
+struct volume_options options[] = {
+ { .key = {NULL} },
+};
diff --git a/xlators/features/protect/src/prot_dht.c b/xlators/features/protect/src/prot_dht.c
new file mode 100644
index 00000000000..1fc8cc1ffde
--- /dev/null
+++ b/xlators/features/protect/src/prot_dht.c
@@ -0,0 +1,163 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "xlator.h"
+#include "defaults.h"
+
+enum gf_pdht_mem_types_ {
+ gf_pdht_mt_coord_t = gf_common_mt_end + 1,
+ gf_pdht_mt_end
+};
+
+typedef struct {
+ pthread_mutex_t lock;
+ uint16_t refs;
+ int32_t op_ret;
+ int32_t op_errno;
+ dict_t *xdata;
+} pdht_coord_t;
+
+static char PROTECT_KEY[] = "trusted.glusterfs.protect";
+
+void
+pdht_unref_and_unlock (call_frame_t *frame, xlator_t *this,
+ pdht_coord_t *coord)
+{
+ gf_boolean_t should_unwind;
+
+ should_unwind = (--(coord->refs) == 0);
+ pthread_mutex_unlock(&coord->lock);
+
+ if (should_unwind) {
+ STACK_UNWIND_STRICT (setxattr, frame,
+ coord->op_ret, coord->op_errno,
+ coord->xdata);
+ if (coord->xdata) {
+ dict_unref(coord->xdata);
+ }
+ GF_FREE(coord);
+ }
+}
+
+int32_t
+pdht_recurse_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ pdht_coord_t *coord = cookie;
+
+ pthread_mutex_lock(&coord->lock);
+ if (op_ret) {
+ coord->op_ret = op_ret;
+ coord->op_errno = op_errno;
+ }
+ if (xdata) {
+ if (coord->xdata) {
+ dict_unref(coord->xdata);
+ }
+ coord->xdata = dict_ref(xdata);
+ }
+ pdht_unref_and_unlock(frame,this,coord);
+
+ return 0;
+}
+
+void
+pdht_recurse (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata, xlator_t *xl, pdht_coord_t *coord)
+{
+ xlator_list_t *iter;
+
+ if (!strcmp(xl->type,"features/prot_client")) {
+ pthread_mutex_lock(&coord->lock);
+ ++(coord->refs);
+ pthread_mutex_unlock(&coord->lock);
+ STACK_WIND_COOKIE (frame, pdht_recurse_cbk, coord, xl,
+ xl->fops->setxattr, loc, dict, flags, xdata);
+ }
+
+ else for (iter = xl->children; iter; iter = iter->next) {
+ pdht_recurse (frame, this, loc, dict, flags, xdata,
+ iter->xlator, coord);
+ }
+}
+
+int32_t
+pdht_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ pdht_coord_t *coord;
+
+ if (!dict_get(dict,PROTECT_KEY)) {
+ goto simple_wind;
+ }
+
+ if (dict->count > 1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "attempted to mix %s with other keys", PROTECT_KEY);
+ goto simple_wind;
+ }
+
+ coord = GF_CALLOC(1,sizeof(*coord),gf_pdht_mt_coord_t);
+ if (!coord) {
+ gf_log (this->name, GF_LOG_WARNING, "allocation failed");
+ goto simple_wind;
+ }
+
+ pthread_mutex_init(&coord->lock,NULL);
+ coord->refs = 1;
+ coord->op_ret = 0;
+ coord->xdata = NULL;
+
+ pdht_recurse(frame,this,loc,dict,flags,xdata,this,coord);
+ pthread_mutex_lock(&coord->lock);
+ pdht_unref_and_unlock(frame,this,coord);
+
+ return 0;
+
+simple_wind:
+ STACK_WIND_TAIL (frame,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr,
+ loc, dict, flags, xdata);
+ return 0;
+}
+
+int32_t
+init (xlator_t *this)
+{
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "translator not configured with exactly one child");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ return 0;
+}
+
+
+void
+fini (xlator_t *this)
+{
+ return;
+}
+
+struct xlator_fops fops = {
+ .setxattr = pdht_setxattr,
+};
+
+struct xlator_cbks cbks = {
+};
+
+struct volume_options options[] = {
+ { .key = {NULL} },
+};
diff --git a/xlators/features/protect/src/prot_server.c b/xlators/features/protect/src/prot_server.c
new file mode 100644
index 00000000000..8ebace240f3
--- /dev/null
+++ b/xlators/features/protect/src/prot_server.c
@@ -0,0 +1,46 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "xlator.h"
+#include "defaults.h"
+
+int32_t
+init (xlator_t *this)
+{
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "translator not configured with exactly one child");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ return 0;
+}
+
+
+void
+fini (xlator_t *this)
+{
+ return;
+}
+
+
+struct xlator_fops fops = {
+};
+
+struct xlator_cbks cbks = {
+};
+
+struct volume_options options[] = {
+ { .key = {NULL} },
+};
diff --git a/xlators/features/quiesce/Makefile.am b/xlators/features/quiesce/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/features/quiesce/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/quiesce/src/Makefile.am b/xlators/features/quiesce/src/Makefile.am
new file mode 100644
index 00000000000..6468669af2a
--- /dev/null
+++ b/xlators/features/quiesce/src/Makefile.am
@@ -0,0 +1,15 @@
+xlator_LTLIBRARIES = quiesce.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+quiesce_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+quiesce_la_SOURCES = quiesce.c
+quiesce_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = quiesce.h quiesce-mem-types.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/quiesce/src/quiesce-mem-types.h b/xlators/features/quiesce/src/quiesce-mem-types.h
new file mode 100644
index 00000000000..6e582f424ea
--- /dev/null
+++ b/xlators/features/quiesce/src/quiesce-mem-types.h
@@ -0,0 +1,20 @@
+/*
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __QUIESCE_MEM_TYPES_H__
+#define __QUIESCE_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_quiesce_mem_types_ {
+ gf_quiesce_mt_priv_t = gf_common_mt_end + 1,
+ gf_quiesce_mt_end
+};
+#endif
diff --git a/xlators/features/quiesce/src/quiesce.c b/xlators/features/quiesce/src/quiesce.c
new file mode 100644
index 00000000000..3a4100f796e
--- /dev/null
+++ b/xlators/features/quiesce/src/quiesce.c
@@ -0,0 +1,2605 @@
+/*
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "quiesce.h"
+#include "defaults.h"
+#include "call-stub.h"
+
+/* TODO: */
+/* Think about 'writev/_*_lk/setattr/xattrop/' fops to do re-transmittion */
+
+
+/* Quiesce Specific Functions */
+void
+gf_quiesce_local_wipe (xlator_t *this, quiesce_local_t *local)
+{
+ if (!local || !this || !this->private)
+ return;
+
+ if (local->loc.inode)
+ loc_wipe (&local->loc);
+ if (local->fd)
+ fd_unref (local->fd);
+ GF_FREE (local->name);
+ GF_FREE (local->volname);
+ if (local->dict)
+ dict_unref (local->dict);
+ if (local->iobref)
+ iobref_unref (local->iobref);
+ GF_FREE (local->vector);
+
+ mem_put (local);
+}
+
+call_stub_t *
+gf_quiesce_dequeue (xlator_t *this)
+{
+ call_stub_t *stub = NULL;
+ quiesce_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ if (!priv || list_empty (&priv->req))
+ return NULL;
+
+ LOCK (&priv->lock);
+ {
+ stub = list_entry (priv->req.next, call_stub_t, list);
+ list_del_init (&stub->list);
+ priv->queue_size--;
+ }
+ UNLOCK (&priv->lock);
+
+ return stub;
+}
+
+void *
+gf_quiesce_dequeue_start (void *data)
+{
+ xlator_t *this = NULL;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ this = data;
+ priv = this->private;
+ THIS = this;
+
+ while (!list_empty (&priv->req)) {
+ stub = gf_quiesce_dequeue (this);
+ if (stub) {
+ call_resume (stub);
+ }
+ }
+
+ return 0;
+}
+
+
+void
+gf_quiesce_timeout (void *data)
+{
+ xlator_t *this = NULL;
+ quiesce_priv_t *priv = NULL;
+
+ this = data;
+ priv = this->private;
+ THIS = this;
+
+ LOCK (&priv->lock);
+ {
+ priv->pass_through = _gf_true;
+ }
+ UNLOCK (&priv->lock);
+
+ gf_quiesce_dequeue_start (this);
+
+ return;
+}
+
+void
+gf_quiesce_enqueue (xlator_t *this, call_stub_t *stub)
+{
+ quiesce_priv_t *priv = NULL;
+ struct timespec timeout = {0,};
+
+ priv = this->private;
+ if (!priv) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR,
+ "this->private == NULL");
+ return;
+ }
+
+ LOCK (&priv->lock);
+ {
+ list_add_tail (&stub->list, &priv->req);
+ priv->queue_size++;
+ }
+ UNLOCK (&priv->lock);
+
+ if (!priv->timer) {
+ timeout.tv_sec = 20;
+ timeout.tv_nsec = 0;
+
+ priv->timer = gf_timer_call_after (this->ctx,
+ timeout,
+ gf_quiesce_timeout,
+ (void *) this);
+ }
+
+ return;
+}
+
+
+
+/* _CBK function section */
+
+int32_t
+quiesce_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *dict, struct iatt *postparent)
+{
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_lookup_stub (frame, default_lookup_resume,
+ &local->loc, local->dict);
+ if (!stub) {
+ STACK_UNWIND_STRICT (lookup, frame, -1, ENOMEM,
+ NULL, NULL, NULL, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf,
+ dict, postparent);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+int32_t
+quiesce_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_stat_stub (frame, default_stat_resume,
+ &local->loc, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (stat, frame, -1, ENOMEM,
+ NULL, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+int32_t
+quiesce_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_access_stub (frame, default_access_resume,
+ &local->loc, local->flag, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (access, frame, -1, ENOMEM, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+int32_t
+quiesce_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *path,
+ struct iatt *buf, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_readlink_stub (frame, default_readlink_resume,
+ &local->loc, local->size, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (readlink, frame, -1, ENOMEM,
+ NULL, NULL, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path, buf, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+int32_t
+quiesce_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_open_stub (frame, default_open_resume,
+ &local->loc, local->flag, local->fd,
+ xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (open, frame, -1, ENOMEM,
+ NULL, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+int32_t
+quiesce_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_readv_stub (frame, default_readv_resume,
+ local->fd, local->size, local->offset,
+ local->io_flag, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (readv, frame, -1, ENOMEM,
+ NULL, 0, NULL, NULL, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count,
+ stbuf, iobref, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+int32_t
+quiesce_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_flush_stub (frame, default_flush_resume,
+ local->fd, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+
+
+int32_t
+quiesce_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_fsync_stub (frame, default_fsync_resume,
+ local->fd, local->flag, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM,
+ NULL, NULL, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+int32_t
+quiesce_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_fstat_stub (frame, default_fstat_resume,
+ local->fd, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (fstat, frame, -1, ENOMEM,
+ NULL, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+int32_t
+quiesce_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_opendir_stub (frame, default_opendir_resume,
+ &local->loc, local->fd, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (opendir, frame, -1, ENOMEM,
+ NULL, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+int32_t
+quiesce_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_fsyncdir_stub (frame, default_fsyncdir_resume,
+ local->fd, local->flag, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (fsyncdir, frame, -1, ENOMEM, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+int32_t
+quiesce_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_statfs_stub (frame, default_statfs_resume,
+ &local->loc, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (statfs, frame, -1, ENOMEM,
+ NULL, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+int32_t
+quiesce_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_fgetxattr_stub (frame, default_fgetxattr_resume,
+ local->fd, local->name, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (fgetxattr, frame, -1, ENOMEM,
+ NULL, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+
+int32_t
+quiesce_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_getxattr_stub (frame, default_getxattr_resume,
+ &local->loc, local->name, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (getxattr, frame, -1, ENOMEM,
+ NULL, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+
+int32_t
+quiesce_rchecksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, uint32_t weak_checksum,
+ uint8_t *strong_checksum, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_rchecksum_stub (frame, default_rchecksum_resume,
+ local->fd, local->offset, local->flag, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (rchecksum, frame, -1, ENOMEM,
+ 0, NULL, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (rchecksum, frame, op_ret, op_errno, weak_checksum,
+ strong_checksum, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+
+int32_t
+quiesce_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_readdir_stub (frame, default_readdir_resume,
+ local->fd, local->size, local->offset, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (readdir, frame, -1, ENOMEM,
+ NULL, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, entries, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+
+int32_t
+quiesce_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_readdirp_stub (frame, default_readdirp_resume,
+ local->fd, local->size, local->offset,
+ local->dict);
+ if (!stub) {
+ STACK_UNWIND_STRICT (readdirp, frame, -1, ENOMEM,
+ NULL, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+
+#if 0
+
+int32_t
+quiesce_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_writev_stub (frame, default_writev_resume,
+ local->fd, local->vector, local->flag,
+ local->offset, local->io_flags,
+ local->iobref, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (writev, frame, -1, ENOMEM,
+ NULL, NULL, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+int32_t
+quiesce_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_xattrop_stub (frame, default_xattrop_resume,
+ &local->loc, local->xattrop_flags,
+ local->dict, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (xattrop, frame, -1, ENOMEM,
+ NULL, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, dict, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+int32_t
+quiesce_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_fxattrop_stub (frame, default_fxattrop_resume,
+ local->fd, local->xattrop_flags,
+ local->dict, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (fxattrop, frame, -1, ENOMEM,
+ NULL, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, dict, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+int32_t
+quiesce_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_lk_stub (frame, default_lk_resume,
+ local->fd, local->flag, &local->flock, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (lk, frame, -1, ENOMEM,
+ NULL, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+int32_t
+quiesce_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_inodelk_stub (frame, default_inodelk_resume,
+ local->volname, &local->loc,
+ local->flag, &local->flock, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (inodelk, frame, -1, ENOMEM, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+
+int32_t
+quiesce_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_finodelk_stub (frame, default_finodelk_resume,
+ local->volname, local->fd,
+ local->flag, &local->flock, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (finodelk, frame, -1, ENOMEM, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+int32_t
+quiesce_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_entrylk_stub (frame, default_entrylk_resume,
+ local->volname, &local->loc,
+ local->name, local->cmd, local->type, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (entrylk, frame, -1, ENOMEM, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+int32_t
+quiesce_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_fentrylk_stub (frame, default_fentrylk_resume,
+ local->volname, local->fd,
+ local->name, local->cmd, local->type, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOMEM, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (fentrylk, frame, op_ret, op_errno, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+int32_t
+quiesce_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_setattr_stub (frame, default_setattr_resume,
+ &local->loc, &local->stbuf, local->flag, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (setattr, frame, -1, ENOMEM,
+ NULL, NULL, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, statpre,
+ statpost, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+int32_t
+quiesce_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ local = frame->local;
+ frame->local = NULL;
+
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_fsetattr_stub (frame, default_fsetattr_resume,
+ local->fd, &local->stbuf, local->flag, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (fsetattr, frame, -1, ENOMEM,
+ NULL, NULL, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno, statpre,
+ statpost, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+#endif /* if 0 */
+
+
+/* FOP */
+
+/* No retransmittion */
+
+int32_t
+quiesce_removexattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv->pass_through) {
+ STACK_WIND (frame,
+ default_removexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr,
+ loc,
+ name, xdata);
+ return 0;
+ }
+
+ stub = fop_removexattr_stub (frame, default_removexattr_resume,
+ loc, name, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (removexattr, frame, -1, ENOMEM, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_truncate (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ off_t offset, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv->pass_through) {
+ STACK_WIND (frame,
+ default_truncate_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate,
+ loc,
+ offset, xdata);
+ return 0;
+ }
+
+ stub = fop_truncate_stub (frame, default_truncate_resume, loc, offset, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (truncate, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_fsetxattr (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv->pass_through) {
+ STACK_WIND (frame,
+ default_fsetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ fd,
+ dict,
+ flags, xdata);
+ return 0;
+ }
+
+ stub = fop_fsetxattr_stub (frame, default_fsetxattr_resume,
+ fd, dict, flags, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (fsetxattr, frame, -1, ENOMEM, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_setxattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv->pass_through) {
+ STACK_WIND (frame,
+ default_setxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ loc,
+ dict,
+ flags, xdata);
+ return 0;
+ }
+
+ stub = fop_setxattr_stub (frame, default_setxattr_resume,
+ loc, dict, flags, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (setxattr, frame, -1, ENOMEM, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_create (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv->pass_through) {
+ /* Don't send O_APPEND below, as write() re-transmittions can
+ fail with O_APPEND */
+ STACK_WIND (frame, default_create_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create,
+ loc, (flags & ~O_APPEND), mode, umask, fd, xdata);
+ return 0;
+ }
+
+ stub = fop_create_stub (frame, default_create_resume,
+ loc, (flags & ~O_APPEND), mode, umask, fd, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (create, frame, -1, ENOMEM,
+ NULL, NULL, NULL, NULL, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_link (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv->pass_through) {
+ STACK_WIND (frame,
+ default_link_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link,
+ oldloc, newloc, xdata);
+ return 0;
+ }
+
+ stub = fop_link_stub (frame, default_link_resume, oldloc, newloc, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (link, frame, -1, ENOMEM,
+ NULL, NULL, NULL, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_rename (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv->pass_through) {
+ STACK_WIND (frame,
+ default_rename_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename,
+ oldloc, newloc, xdata);
+ return 0;
+ }
+
+ stub = fop_rename_stub (frame, default_rename_resume, oldloc, newloc, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (rename, frame, -1, ENOMEM,
+ NULL, NULL, NULL, NULL, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+
+int
+quiesce_symlink (call_frame_t *frame, xlator_t *this,
+ const char *linkpath, loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv->pass_through) {
+ STACK_WIND (frame, default_symlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink,
+ linkpath, loc, umask, xdata);
+ return 0;
+ }
+
+ stub = fop_symlink_stub (frame, default_symlink_resume,
+ linkpath, loc, umask, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (symlink, frame, -1, ENOMEM,
+ NULL, NULL, NULL, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+
+int
+quiesce_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv->pass_through) {
+ STACK_WIND (frame, default_rmdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir,
+ loc, flags, xdata);
+ return 0;
+ }
+
+ stub = fop_rmdir_stub (frame, default_rmdir_resume, loc, flags, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (rmdir, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_unlink (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, int xflag, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv->pass_through) {
+ STACK_WIND (frame,
+ default_unlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink,
+ loc, xflag, xdata);
+ return 0;
+ }
+
+ stub = fop_unlink_stub (frame, default_unlink_resume, loc, xflag, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (unlink, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int
+quiesce_mkdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv->pass_through) {
+ STACK_WIND (frame, default_mkdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir,
+ loc, mode, umask, xdata);
+ return 0;
+ }
+
+ stub = fop_mkdir_stub (frame, default_mkdir_resume,
+ loc, mode, umask, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (mkdir, frame, -1, ENOMEM,
+ NULL, NULL, NULL, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+
+int
+quiesce_mknod (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv->pass_through) {
+ STACK_WIND (frame, default_mknod_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod,
+ loc, mode, rdev, umask, xdata);
+ return 0;
+ }
+
+ stub = fop_mknod_stub (frame, default_mknod_resume,
+ loc, mode, rdev, umask, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (mknod, frame, -1, ENOMEM,
+ NULL, NULL, NULL, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_ftruncate (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv->pass_through) {
+ STACK_WIND (frame,
+ default_ftruncate_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate,
+ fd,
+ offset, xdata);
+ return 0;
+ }
+
+ stub = fop_ftruncate_stub (frame, default_ftruncate_resume, fd, offset, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+/* Re-transmittion */
+
+int32_t
+quiesce_readlink (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ size_t size, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ local = mem_get0 (priv->local_pool);
+ loc_dup (loc, &local->loc);
+ local->size = size;
+ frame->local = local;
+
+ STACK_WIND (frame,
+ quiesce_readlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readlink,
+ loc,
+ size, xdata);
+ return 0;
+ }
+
+ stub = fop_readlink_stub (frame, default_readlink_resume, loc, size, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (readlink, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+
+int32_t
+quiesce_access (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t mask, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ local = mem_get0 (priv->local_pool);
+ loc_dup (loc, &local->loc);
+ local->flag = mask;
+ frame->local = local;
+
+ STACK_WIND (frame,
+ quiesce_access_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->access,
+ loc,
+ mask, xdata);
+ return 0;
+ }
+
+ stub = fop_access_stub (frame, default_access_resume, loc, mask, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (access, frame, -1, ENOMEM, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_fgetxattr (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ local = mem_get0 (priv->local_pool);
+ local->fd = fd_ref (fd);
+ if (name)
+ local->name = gf_strdup (name);
+
+ frame->local = local;
+
+ STACK_WIND (frame,
+ quiesce_fgetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ fd,
+ name, xdata);
+ return 0;
+ }
+
+ stub = fop_fgetxattr_stub (frame, default_fgetxattr_resume, fd, name, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (fgetxattr, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_statfs (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ local = mem_get0 (priv->local_pool);
+ loc_dup (loc, &local->loc);
+ frame->local = local;
+
+ STACK_WIND (frame,
+ quiesce_statfs_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->statfs,
+ loc, xdata);
+ return 0;
+ }
+
+ stub = fop_statfs_stub (frame, default_statfs_resume, loc, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (statfs, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_fsyncdir (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t flags, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ local = mem_get0 (priv->local_pool);
+ local->fd = fd_ref (fd);
+ local->flag = flags;
+ frame->local = local;
+
+ STACK_WIND (frame,
+ quiesce_fsyncdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsyncdir,
+ fd,
+ flags, xdata);
+ return 0;
+ }
+
+ stub = fop_fsyncdir_stub (frame, default_fsyncdir_resume, fd, flags, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (fsyncdir, frame, -1, ENOMEM, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_opendir (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, fd_t *fd, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ local = mem_get0 (priv->local_pool);
+ loc_dup (loc, &local->loc);
+ local->fd = fd_ref (fd);
+ frame->local = local;
+
+ STACK_WIND (frame,
+ quiesce_opendir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir,
+ loc, fd, xdata);
+ return 0;
+ }
+
+ stub = fop_opendir_stub (frame, default_opendir_resume, loc, fd, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (opendir, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_fstat (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ local = mem_get0 (priv->local_pool);
+ local->fd = fd_ref (fd);
+ frame->local = local;
+
+ STACK_WIND (frame,
+ quiesce_fstat_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat,
+ fd, xdata);
+ return 0;
+ }
+
+ stub = fop_fstat_stub (frame, default_fstat_resume, fd, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (fstat, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_fsync (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t flags, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ local = mem_get0 (priv->local_pool);
+ local->fd = fd_ref (fd);
+ local->flag = flags;
+ frame->local = local;
+
+ STACK_WIND (frame,
+ quiesce_fsync_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync,
+ fd,
+ flags, xdata);
+ return 0;
+ }
+
+ stub = fop_fsync_stub (frame, default_fsync_resume, fd, flags, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_flush (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ local = mem_get0 (priv->local_pool);
+ local->fd = fd_ref (fd);
+ frame->local = local;
+
+ STACK_WIND (frame,
+ quiesce_flush_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush,
+ fd, xdata);
+ return 0;
+ }
+
+ stub = fop_flush_stub (frame, default_flush_resume, fd, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_writev (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iovec *vector,
+ int32_t count,
+ off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ STACK_WIND (frame,
+ default_writev_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev,
+ fd,
+ vector,
+ count,
+ off, flags,
+ iobref, xdata);
+ return 0;
+ }
+
+ stub = fop_writev_stub (frame, default_writev_resume,
+ fd, vector, count, off, flags, iobref, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (writev, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_readv (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ local = mem_get0 (priv->local_pool);
+ local->fd = fd_ref (fd);
+ local->size = size;
+ local->offset = offset;
+ local->io_flag = flags;
+ frame->local = local;
+
+ STACK_WIND (frame,
+ quiesce_readv_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv,
+ fd,
+ size,
+ offset, flags, xdata);
+ return 0;
+ }
+
+ stub = fop_readv_stub (frame, default_readv_resume, fd, size, offset,
+ flags, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (readv, frame, -1, ENOMEM,
+ NULL, 0, NULL, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+
+int32_t
+quiesce_open (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags, fd_t *fd,
+ dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ local = mem_get0 (priv->local_pool);
+ loc_dup (loc, &local->loc);
+ local->fd = fd_ref (fd);
+
+ /* Don't send O_APPEND below, as write() re-transmittions can
+ fail with O_APPEND */
+ local->flag = (flags & ~O_APPEND);
+ frame->local = local;
+
+ STACK_WIND (frame,
+ quiesce_open_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open,
+ loc, (flags & ~O_APPEND), fd, xdata);
+ return 0;
+ }
+
+ stub = fop_open_stub (frame, default_open_resume, loc,
+ (flags & ~O_APPEND), fd, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (open, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_getxattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ local = mem_get0 (priv->local_pool);
+ loc_dup (loc, &local->loc);
+ if (name)
+ local->name = gf_strdup (name);
+
+ frame->local = local;
+
+ STACK_WIND (frame,
+ quiesce_getxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr,
+ loc,
+ name, xdata);
+ return 0;
+ }
+
+ stub = fop_getxattr_stub (frame, default_getxattr_resume, loc, name, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (getxattr, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+
+int32_t
+quiesce_xattrop (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ gf_xattrop_flags_t flags,
+ dict_t *dict, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ STACK_WIND (frame,
+ default_xattrop_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->xattrop,
+ loc,
+ flags,
+ dict, xdata);
+ return 0;
+ }
+
+ stub = fop_xattrop_stub (frame, default_xattrop_resume,
+ loc, flags, dict, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (xattrop, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_fxattrop (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ gf_xattrop_flags_t flags,
+ dict_t *dict, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ STACK_WIND (frame,
+ default_fxattrop_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fxattrop,
+ fd,
+ flags,
+ dict, xdata);
+ return 0;
+ }
+
+ stub = fop_fxattrop_stub (frame, default_fxattrop_resume,
+ fd, flags, dict, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (fxattrop, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_lk (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ STACK_WIND (frame,
+ default_lk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lk,
+ fd,
+ cmd,
+ lock, xdata);
+ return 0;
+ }
+
+ stub = fop_lk_stub (frame, default_lk_resume, fd, cmd, lock, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (lk, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+
+int32_t
+quiesce_inodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ STACK_WIND (frame,
+ default_inodelk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->inodelk,
+ volume, loc, cmd, lock, xdata);
+ return 0;
+ }
+
+ stub = fop_inodelk_stub (frame, default_inodelk_resume,
+ volume, loc, cmd, lock, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (inodelk, frame, -1, ENOMEM, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_finodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ STACK_WIND (frame,
+ default_finodelk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ volume, fd, cmd, lock, xdata);
+ return 0;
+ }
+
+ stub = fop_finodelk_stub (frame, default_finodelk_resume,
+ volume, fd, cmd, lock, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (finodelk, frame, -1, ENOMEM, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_entrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ STACK_WIND (frame, default_entrylk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->entrylk,
+ volume, loc, basename, cmd, type, xdata);
+ return 0;
+ }
+
+ stub = fop_entrylk_stub (frame, default_entrylk_resume,
+ volume, loc, basename, cmd, type, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (entrylk, frame, -1, ENOMEM, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_fentrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ STACK_WIND (frame, default_fentrylk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fentrylk,
+ volume, fd, basename, cmd, type, xdata);
+ return 0;
+ }
+
+ stub = fop_fentrylk_stub (frame, default_fentrylk_resume,
+ volume, fd, basename, cmd, type, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOMEM, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_rchecksum (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd, off_t offset,
+ int32_t len, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ local = mem_get0 (priv->local_pool);
+ local->fd = fd_ref (fd);
+ local->offset = offset;
+ local->flag = len;
+ frame->local = local;
+
+ STACK_WIND (frame,
+ quiesce_rchecksum_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rchecksum,
+ fd, offset, len, xdata);
+ return 0;
+ }
+
+ stub = fop_rchecksum_stub (frame, default_rchecksum_resume,
+ fd, offset, len, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (rchecksum, frame, -1, ENOMEM, 0, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+
+int32_t
+quiesce_readdir (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t off, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ local = mem_get0 (priv->local_pool);
+ local->fd = fd_ref (fd);
+ local->size = size;
+ local->offset = off;
+ frame->local = local;
+
+ STACK_WIND (frame,
+ quiesce_readdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdir,
+ fd, size, off, xdata);
+ return 0;
+ }
+
+ stub = fop_readdir_stub (frame, default_readdir_resume, fd, size, off, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (readdir, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+
+int32_t
+quiesce_readdirp (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t off, dict_t *dict)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ local = mem_get0 (priv->local_pool);
+ local->fd = fd_ref (fd);
+ local->size = size;
+ local->offset = off;
+ local->dict = dict_ref (dict);
+ frame->local = local;
+
+ STACK_WIND (frame,
+ quiesce_readdirp_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp,
+ fd, size, off, dict);
+ return 0;
+ }
+
+ stub = fop_readdirp_stub (frame, default_readdirp_resume, fd, size,
+ off, dict);
+ if (!stub) {
+ STACK_UNWIND_STRICT (readdirp, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_setattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ STACK_WIND (frame,
+ default_setattr_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setattr,
+ loc, stbuf, valid, xdata);
+ return 0;
+ }
+
+ stub = fop_setattr_stub (frame, default_setattr_resume,
+ loc, stbuf, valid, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (setattr, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+
+int32_t
+quiesce_stat (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ local = mem_get0 (priv->local_pool);
+ loc_dup (loc, &local->loc);
+ frame->local = local;
+
+ STACK_WIND (frame,
+ quiesce_stat_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat,
+ loc, xdata);
+ return 0;
+ }
+
+ stub = fop_stat_stub (frame, default_stat_resume, loc, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (stat, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_lookup (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *xattr_req)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ local = mem_get0 (priv->local_pool);
+ loc_dup (loc, &local->loc);
+ local->dict = dict_ref (xattr_req);
+ frame->local = local;
+
+ STACK_WIND (frame,
+ quiesce_lookup_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup,
+ loc, xattr_req);
+ return 0;
+ }
+
+ stub = fop_lookup_stub (frame, default_lookup_resume, loc, xattr_req);
+ if (!stub) {
+ STACK_UNWIND_STRICT (lookup, frame, -1, ENOMEM,
+ NULL, NULL, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_fsetattr (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ STACK_WIND (frame,
+ default_fsetattr_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fsetattr,
+ fd, stbuf, valid, xdata);
+ return 0;
+ }
+
+ stub = fop_fsetattr_stub (frame, default_fsetattr_resume,
+ fd, stbuf, valid, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ ret = xlator_mem_acct_init (this, gf_quiesce_mt_end + 1);
+
+ return ret;
+}
+
+int
+init (xlator_t *this)
+{
+ int ret = -1;
+ quiesce_priv_t *priv = NULL;
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "'quiesce' not configured with exactly one child");
+ goto out;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ priv = GF_CALLOC (1, sizeof (*priv), gf_quiesce_mt_priv_t);
+ if (!priv)
+ goto out;
+
+ priv->local_pool = mem_pool_new (quiesce_local_t,
+ GF_FOPS_EXPECTED_IN_PARALLEL);
+
+ LOCK_INIT (&priv->lock);
+ priv->pass_through = _gf_false;
+
+ INIT_LIST_HEAD (&priv->req);
+
+ this->private = priv;
+ ret = 0;
+out:
+ return ret;
+}
+
+void
+fini (xlator_t *this)
+{
+ quiesce_priv_t *priv = NULL;
+
+ priv = this->private;
+ if (!priv)
+ goto out;
+ this->private = NULL;
+
+ mem_pool_destroy (priv->local_pool);
+ LOCK_DESTROY (&priv->lock);
+ GF_FREE (priv);
+out:
+ return;
+}
+
+int
+notify (xlator_t *this, int event, void *data, ...)
+{
+ int ret = 0;
+ quiesce_priv_t *priv = NULL;
+ struct timespec timeout = {0,};
+
+ priv = this->private;
+ if (!priv)
+ goto out;
+
+ switch (event) {
+ case GF_EVENT_CHILD_UP:
+ {
+ ret = pthread_create (&priv->thr, NULL, gf_quiesce_dequeue_start,
+ this);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create the quiesce-dequeue thread");
+ }
+
+ LOCK (&priv->lock);
+ {
+ priv->pass_through = _gf_true;
+ }
+ UNLOCK (&priv->lock);
+ break;
+ }
+ case GF_EVENT_CHILD_DOWN:
+ LOCK (&priv->lock);
+ {
+ priv->pass_through = _gf_false;
+ }
+ UNLOCK (&priv->lock);
+
+ if (priv->timer)
+ break;
+ timeout.tv_sec = 20;
+ timeout.tv_nsec = 0;
+
+ priv->timer = gf_timer_call_after (this->ctx,
+ timeout,
+ gf_quiesce_timeout,
+ (void *) this);
+
+ if (priv->timer == NULL) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Cannot create timer");
+ }
+
+ break;
+ default:
+ break;
+ }
+
+ ret = default_notify (this, event, data);
+out:
+ return ret;
+}
+
+
+struct xlator_fops fops = {
+ /* write/modifying fops */
+ .mknod = quiesce_mknod,
+ .create = quiesce_create,
+ .truncate = quiesce_truncate,
+ .ftruncate = quiesce_ftruncate,
+ .setxattr = quiesce_setxattr,
+ .removexattr = quiesce_removexattr,
+ .symlink = quiesce_symlink,
+ .unlink = quiesce_unlink,
+ .link = quiesce_link,
+ .mkdir = quiesce_mkdir,
+ .rmdir = quiesce_rmdir,
+ .rename = quiesce_rename,
+
+ /* The below calls are known to change state, hence
+ re-transmittion is not advised */
+ .lk = quiesce_lk,
+ .inodelk = quiesce_inodelk,
+ .finodelk = quiesce_finodelk,
+ .entrylk = quiesce_entrylk,
+ .fentrylk = quiesce_fentrylk,
+ .xattrop = quiesce_xattrop,
+ .fxattrop = quiesce_fxattrop,
+ .setattr = quiesce_setattr,
+ .fsetattr = quiesce_fsetattr,
+
+ /* Special case, re-transmittion is not harmful *
+ * as offset is properly sent from above layers */
+ /* TODO: not re-transmitted as of now */
+ .writev = quiesce_writev,
+
+ /* re-transmittable fops */
+ .lookup = quiesce_lookup,
+ .stat = quiesce_stat,
+ .fstat = quiesce_fstat,
+ .access = quiesce_access,
+ .readlink = quiesce_readlink,
+ .getxattr = quiesce_getxattr,
+ .open = quiesce_open,
+ .readv = quiesce_readv,
+ .flush = quiesce_flush,
+ .fsync = quiesce_fsync,
+ .statfs = quiesce_statfs,
+ .opendir = quiesce_opendir,
+ .readdir = quiesce_readdir,
+ .readdirp = quiesce_readdirp,
+ .fsyncdir = quiesce_fsyncdir,
+
+};
+
+struct xlator_dumpops dumpops;
+
+
+struct xlator_cbks cbks;
+
+
+struct volume_options options[] = {
+ { .key = {NULL} },
+};
diff --git a/xlators/features/quiesce/src/quiesce.h b/xlators/features/quiesce/src/quiesce.h
new file mode 100644
index 00000000000..878ed77e928
--- /dev/null
+++ b/xlators/features/quiesce/src/quiesce.h
@@ -0,0 +1,51 @@
+/*
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __QUIESCE_H__
+#define __QUIESCE_H__
+
+#include "quiesce-mem-types.h"
+#include "xlator.h"
+#include "timer.h"
+
+#define GF_FOPS_EXPECTED_IN_PARALLEL 512
+
+typedef struct {
+ gf_timer_t *timer;
+ gf_boolean_t pass_through;
+ gf_lock_t lock;
+ struct list_head req;
+ int queue_size;
+ pthread_t thr;
+ struct mem_pool *local_pool;
+} quiesce_priv_t;
+
+typedef struct {
+ fd_t *fd;
+ char *name;
+ char *volname;
+ loc_t loc;
+ off_t size;
+ off_t offset;
+ mode_t mode;
+ int32_t flag;
+ struct iatt stbuf;
+ struct iovec *vector;
+ struct iobref *iobref;
+ dict_t *dict;
+ struct gf_flock flock;
+ entrylk_cmd cmd;
+ entrylk_type type;
+ gf_xattrop_flags_t xattrop_flags;
+ int32_t wbflags;
+ uint32_t io_flag;
+} quiesce_local_t;
+
+#endif
diff --git a/xlators/features/quota/src/Makefile.am b/xlators/features/quota/src/Makefile.am
index fe373c8b515..a15135347ac 100644
--- a/xlators/features/quota/src/Makefile.am
+++ b/xlators/features/quota/src/Makefile.am
@@ -1,15 +1,25 @@
-xlator_LTLIBRARIES = quota.la
+xlator_LTLIBRARIES = quota.la quotad.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
-quota_la_LDFLAGS = -module -avoidversion
+quota_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+quotad_la_LDFLAGS = -module -avoid-version -export-symbols $(top_srcdir)/xlators/features/quota/src/quotad.sym
-quota_la_SOURCES = quota.c
+quota_la_SOURCES = quota.c quota-enforcer-client.c
quota_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = quota-mem-types.h
+quotad_la_SOURCES = quotad.c quotad-helpers.c quotad-aggregator.c
+quotad_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+noinst_HEADERS = quota-mem-types.h quota.h quotad-aggregator.h \
+ quotad-helpers.h quota-messages.h
-CLEANFILES =
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/xlators/cluster/dht/src -I$(top_srcdir)/rpc/xdr/src/ \
+ -I$(top_srcdir)/rpc/rpc-lib/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
+
+EXTRA_DIST = quotad.sym
diff --git a/xlators/features/quota/src/quota-enforcer-client.c b/xlators/features/quota/src/quota-enforcer-client.c
new file mode 100644
index 00000000000..6f36c081dbc
--- /dev/null
+++ b/xlators/features/quota/src/quota-enforcer-client.c
@@ -0,0 +1,491 @@
+/*
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <stdio.h>
+#include <string.h>
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/resource.h>
+#include <sys/file.h>
+#include <netdb.h>
+#include <signal.h>
+#include <libgen.h>
+
+#include <sys/utsname.h>
+
+#include <stdint.h>
+#include <pthread.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <time.h>
+#include <semaphore.h>
+#include <errno.h>
+
+#ifdef HAVE_MALLOC_H
+#include <malloc.h>
+#endif
+
+#ifdef HAVE_MALLOC_STATS
+#ifdef DEBUG
+#include <mcheck.h>
+#endif
+#endif
+
+#include "quota.h"
+#include "quota-messages.h"
+
+extern struct rpc_clnt_program quota_enforcer_clnt;
+
+int32_t
+quota_validate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent);
+
+int
+quota_enforcer_submit_request (void *req, call_frame_t *frame,
+ rpc_clnt_prog_t *prog,
+ int procnum, struct iobref *iobref,
+ xlator_t *this, fop_cbk_fn_t cbkfn,
+ xdrproc_t xdrproc)
+{
+ int ret = -1;
+ int count = 0;
+ struct iovec iov = {0, };
+ struct iobuf *iobuf = NULL;
+ char new_iobref = 0;
+ ssize_t xdr_size = 0;
+ quota_priv_t *priv = NULL;
+
+ GF_ASSERT (this);
+
+ priv = this->private;
+
+ if (req) {
+ xdr_size = xdr_sizeof (xdrproc, req);
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool, xdr_size);
+ if (!iobuf) {
+ goto out;
+ }
+
+ if (!iobref) {
+ iobref = iobref_new ();
+ if (!iobref) {
+ goto out;
+ }
+
+ new_iobref = 1;
+ }
+
+ iobref_add (iobref, iobuf);
+
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = iobuf_size (iobuf);
+
+ /* Create the xdr payload */
+ ret = xdr_serialize_generic (iov, req, xdrproc);
+ if (ret == -1) {
+ goto out;
+ }
+ iov.iov_len = ret;
+ count = 1;
+ }
+
+ /* Send the msg */
+ ret = rpc_clnt_submit (priv->rpc_clnt, prog, procnum, cbkfn,
+ &iov, count,
+ NULL, 0, iobref, frame, NULL, 0, NULL, 0, NULL);
+ ret = 0;
+
+out:
+ if (new_iobref)
+ iobref_unref (iobref);
+ if (iobuf)
+ iobuf_unref (iobuf);
+
+ return ret;
+}
+
+int
+quota_enforcer_lookup_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ quota_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ int ret = 0;
+ gfs3_lookup_rsp rsp = {0,};
+ struct iatt stbuf = {0,};
+ struct iatt postparent = {0,};
+ int op_errno = EINVAL;
+ dict_t *xdata = NULL;
+ inode_t *inode = NULL;
+ xlator_t *this = NULL;
+ quota_priv_t *priv = NULL;
+ struct timespec retry_delay = {0,};
+ gf_timer_t *timer = NULL;
+
+ this = THIS;
+
+ frame = myframe;
+ local = frame->local;
+ inode = local->validate_loc.inode;
+ priv = this->private;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_lookup_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ Q_MSG_XDR_DECODING_FAILED,
+ "XDR decoding failed");
+ rsp.op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ op_errno = gf_error_to_errno (rsp.op_errno);
+ gf_stat_to_iatt (&rsp.postparent, &postparent);
+
+ if (rsp.op_ret == -1)
+ goto out;
+
+ rsp.op_ret = -1;
+ gf_stat_to_iatt (&rsp.stat, &stbuf);
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), rsp.op_ret,
+ op_errno, out);
+
+ if ((!gf_uuid_is_null (inode->gfid))
+ && (gf_uuid_compare (stbuf.ia_gfid, inode->gfid) != 0)) {
+ gf_msg_debug (frame->this->name, ESTALE,
+ "gfid changed for %s", local->validate_loc.path);
+ rsp.op_ret = -1;
+ op_errno = ESTALE;
+ goto out;
+ }
+
+ rsp.op_ret = 0;
+
+out:
+ rsp.op_errno = op_errno;
+
+ /* We need to retry connecting to quotad on ENOTCONN error.
+ * Suppose if there are two volumes vol1 and vol2,
+ * and quota is enabled and limit is set on vol1.
+ * Now if IO is happening on vol1 and quota is enabled/disabled
+ * on vol2, quotad gets restarted and client will receive
+ * ENOTCONN in the IO path of vol1
+ */
+ if (rsp.op_ret == -1 && rsp.op_errno == ENOTCONN) {
+ if (local->quotad_conn_retry >= 12) {
+ priv->quotad_conn_status = 1;
+ gf_log (this->name, GF_LOG_WARNING, "failed to connect "
+ "to quotad after retry count %d)",
+ local->quotad_conn_retry);
+ } else {
+ local->quotad_conn_retry++;
+ }
+
+ if (priv->quotad_conn_status == 0) {
+ /* retry connecting after 5secs for 12 retries
+ * (upto 60sec).
+ */
+ gf_log (this->name, GF_LOG_DEBUG, "retry connecting to "
+ "quotad (retry count %d)",
+ local->quotad_conn_retry);
+
+ retry_delay.tv_sec = 5;
+ retry_delay.tv_nsec = 0;
+ timer = gf_timer_call_after (this->ctx, retry_delay,
+ _quota_enforcer_lookup,
+ (void *) frame);
+ if (timer == NULL) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to "
+ "set quota_enforcer_lookup with timer");
+ } else {
+ goto clean;
+ }
+ }
+ } else {
+ priv->quotad_conn_status = 0;
+ }
+
+ if (rsp.op_ret == -1) {
+ /* any error other than ENOENT */
+ if (rsp.op_errno != ENOENT)
+ gf_msg (this->name, GF_LOG_WARNING, rsp.op_errno,
+ Q_MSG_LOOKUP_FAILED,
+ "Getting cluster-wide size of directory failed "
+ "(path: %s gfid:%s)", local->validate_loc.path,
+ loc_gfid_utoa (&local->validate_loc));
+ else
+ gf_msg_trace (this->name, ENOENT,
+ "not found on remote node");
+
+ } else if (local->quotad_conn_retry) {
+ gf_log (this->name, GF_LOG_DEBUG, "connected to quotad after "
+ "retry count %d", local->quotad_conn_retry);
+ }
+
+ local->validate_cbk (frame, NULL, this, rsp.op_ret, rsp.op_errno, inode,
+ &stbuf, xdata, &postparent);
+
+clean:
+ if (xdata)
+ dict_unref (xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+void
+_quota_enforcer_lookup (void *data)
+{
+ quota_local_t *local = NULL;
+ gfs3_lookup_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+ quota_priv_t *priv = NULL;
+ call_frame_t *frame = NULL;
+ loc_t *loc = NULL;
+ xlator_t *this = NULL;
+ char *dir_path = NULL;
+
+ frame = data;
+ local = frame->local;
+ this = local->this;
+ loc = &local->validate_loc;
+
+ priv = this->private;
+
+ if (!(loc && loc->inode))
+ goto unwind;
+
+ if (!gf_uuid_is_null (loc->inode->gfid))
+ memcpy (req.gfid, loc->inode->gfid, 16);
+ else
+ memcpy (req.gfid, loc->gfid, 16);
+
+ if (local->validate_xdata) {
+ GF_PROTOCOL_DICT_SERIALIZE (this, local->validate_xdata,
+ (&req.xdata.xdata_val),
+ req.xdata.xdata_len,
+ op_errno, unwind);
+ }
+
+ if (loc->name)
+ req.bname = (char *)loc->name;
+ else
+ req.bname = "";
+
+ if (loc->path)
+ dir_path = (char *)loc->path;
+ else
+ dir_path = "";
+
+ ret = quota_enforcer_submit_request (&req, frame,
+ priv->quota_enforcer,
+ GF_AGGREGATOR_LOOKUP,
+ NULL, this,
+ quota_enforcer_lookup_cbk,
+ (xdrproc_t)xdr_gfs3_lookup_req);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ Q_MSG_RPC_SUBMIT_FAILED, "Couldn't send the request to "
+ "fetch cluster wide size of directory (path:%s gfid:%s)"
+ , dir_path, req.gfid);
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return;
+
+unwind:
+ local->validate_cbk (frame, NULL, this, -1, op_errno, NULL, NULL, NULL,
+ NULL);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return;
+}
+
+int
+quota_enforcer_lookup (call_frame_t *frame, xlator_t *this, dict_t *xdata,
+ fop_lookup_cbk_t validate_cbk)
+{
+ quota_local_t *local = NULL;
+
+ if (!frame || !this)
+ goto unwind;
+
+ local = frame->local;
+ local->this = this;
+ local->validate_cbk = validate_cbk;
+ local->validate_xdata = dict_ref (xdata);
+
+ _quota_enforcer_lookup (frame);
+
+ return 0;
+
+unwind:
+ validate_cbk (frame, NULL, this, -1, ESTALE, NULL, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int
+quota_enforcer_notify (struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data)
+{
+ xlator_t *this = NULL;
+ int ret = 0;
+
+ this = mydata;
+
+ switch (event) {
+ case RPC_CLNT_CONNECT:
+ {
+ gf_msg_trace (this->name, 0, "got RPC_CLNT_CONNECT");
+ break;
+ }
+
+ case RPC_CLNT_DISCONNECT:
+ {
+ gf_msg_trace (this->name, 0, "got RPC_CLNT_DISCONNECT");
+ break;
+ }
+
+ default:
+ gf_msg_trace (this->name, 0,
+ "got some other RPC event %d", event);
+ ret = 0;
+ break;
+ }
+
+ return ret;
+}
+
+int
+quota_enforcer_blocking_connect (rpc_clnt_t *rpc)
+{
+ dict_t *options = NULL;
+ int ret = -1;
+
+ options = dict_new ();
+ if (options == NULL)
+ goto out;
+
+ ret = dict_set_str (options, "non-blocking-io", "no");
+ if (ret)
+ goto out;
+
+ rpc->conn.trans->reconfigure (rpc->conn.trans, options);
+
+ rpc_clnt_start (rpc);
+
+ ret = dict_set_str (options, "non-blocking-io", "yes");
+ if (ret)
+ goto out;
+
+ rpc->conn.trans->reconfigure (rpc->conn.trans, options);
+
+ ret = 0;
+out:
+ if (options)
+ dict_unref (options);
+
+ return ret;
+}
+
+//Returns a started rpc_clnt. Creates a new rpc_clnt if quota_priv doesn't have
+//one already
+struct rpc_clnt *
+quota_enforcer_init (xlator_t *this, dict_t *options)
+{
+ struct rpc_clnt *rpc = NULL;
+ quota_priv_t *priv = NULL;
+ int ret = -1;
+
+ priv = this->private;
+
+ LOCK (&priv->lock);
+ {
+ if (priv->rpc_clnt) {
+ ret = 0;
+ rpc = priv->rpc_clnt;
+ }
+ }
+ UNLOCK (&priv->lock);
+
+ if (rpc)
+ goto out;
+
+ priv->quota_enforcer = &quota_enforcer_clnt;
+
+ ret = dict_set_str (options, "transport.address-family", "unix");
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (options, "transport-type", "socket");
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (options, "transport.socket.connect-path",
+ "/var/run/gluster/quotad.socket");
+ if (ret)
+ goto out;
+
+ rpc = rpc_clnt_new (options, this, this->name, 16);
+ if (!rpc) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = rpc_clnt_register_notify (rpc, quota_enforcer_notify, this);
+ if (ret) {
+ gf_msg ("quota", GF_LOG_ERROR, 0,
+ Q_MSG_RPCCLNT_REGISTER_NOTIFY_FAILED,
+ "failed to register notify");
+ goto out;
+ }
+
+ ret = quota_enforcer_blocking_connect (rpc);
+ if (ret)
+ goto out;
+
+ ret = 0;
+out:
+ if (ret) {
+ if (rpc)
+ rpc_clnt_unref (rpc);
+ rpc = NULL;
+ }
+
+ return rpc;
+ }
+
+struct rpc_clnt_procedure quota_enforcer_actors[GF_AGGREGATOR_MAXVALUE] = {
+ [GF_AGGREGATOR_NULL] = {"NULL", NULL},
+ [GF_AGGREGATOR_LOOKUP] = {"LOOKUP", NULL},
+};
+
+struct rpc_clnt_program quota_enforcer_clnt = {
+ .progname = "Quota enforcer",
+ .prognum = GLUSTER_AGGREGATOR_PROGRAM,
+ .progver = GLUSTER_AGGREGATOR_VERSION,
+ .numproc = GF_AGGREGATOR_MAXVALUE,
+ .proctable = quota_enforcer_actors,
+};
diff --git a/xlators/features/quota/src/quota-mem-types.h b/xlators/features/quota/src/quota-mem-types.h
index b71314ed8e6..97d9165681f 100644
--- a/xlators/features/quota/src/quota-mem-types.h
+++ b/xlators/features/quota/src/quota-mem-types.h
@@ -1,30 +1,29 @@
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __QUOTA_MEM_TYPES_H__
#define __QUOTA_MEM_TYPES_H__
#include "mem-types.h"
enum gf_quota_mem_types_ {
- gf_quota_mt_quota_local = gf_common_mt_end + 1,
- gf_quota_mt_quota_priv,
+ gf_quota_mt_quota_priv_t = gf_common_mt_end + 1,
+ gf_quota_mt_quota_inode_ctx_t,
+ gf_quota_mt_loc_t,
+ gf_quota_mt_char,
+ gf_quota_mt_int64_t,
+ gf_quota_mt_int32_t,
+ gf_quota_mt_limits_t,
+ gf_quota_mt_quota_dentry_t,
+ gf_quota_mt_quota_limits_level_t,
+ gf_quota_mt_qd_vols_conf_t,
+ gf_quota_mt_aggregator_state_t,
gf_quota_mt_end
};
#endif
diff --git a/xlators/features/quota/src/quota-messages.h b/xlators/features/quota/src/quota-messages.h
new file mode 100644
index 00000000000..b01fe98e908
--- /dev/null
+++ b/xlators/features/quota/src/quota-messages.h
@@ -0,0 +1,247 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _QUOTA_MESSAGES_H_
+#define _QUOTA_MESSAGES_H_
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glfs-message-id.h"
+
+/*! \file quota-messages.h
+ * \brief Quota log-message IDs and their descriptions
+ *
+ */
+
+/* NOTE: Rules for message additions
+ * 1) Each instance of a message is _better_ left with a unique message ID, even
+ * if the message format is the same. Reasoning is that, if the message
+ * format needs to change in one instance, the other instances are not
+ * impacted or the new change does not change the ID of the instance being
+ * modified.
+ * 2) Addition of a message,
+ * - Should increment the GLFS_NUM_MESSAGES
+ * - Append to the list of messages defined, towards the end
+ * - Retain macro naming as glfs_msg_X (for redability across developers)
+ * NOTE: Rules for message format modifications
+ * 3) Check across the code if the message ID macro in question is reused
+ * anywhere. If reused then the modifications should ensure correctness
+ * everywhere, or needs a new message ID as (1) above was not adhered to. If
+ * not used anywhere, proceed with the required modification.
+ * NOTE: Rules for message deletion
+ * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used
+ * anywhere, then can be deleted, but will leave a hole by design, as
+ * addition rules specify modification to the end of the list and not filling
+ * holes.
+ */
+
+#define GLFS_QUOTA_BASE GLFS_MSGID_COMP_QUOTA
+#define GLFS_NUM_MESSAGES 23
+#define GLFS_MSGID_END (GLFS_QUOTA_BASE + GLFS_NUM_MESSAGES + 1)
+/* Messaged with message IDs */
+#define glfs_msg_start_x GLFS_QUOTA_BASE, "Invalid: Start of messages"
+/*------------*/
+
+/*!
+ * @messageid 120001
+ * @diagnosis Quota enforcement has failed.
+ * @recommendedaction None
+ */
+#define Q_MSG_ENFORCEMENT_FAILED (GLFS_QUOTA_BASE + 1)
+
+
+/*!
+* @messageid 120002
+* @diagnosis system is out of memory
+* @recommendedaction None
+*/
+#define Q_MSG_ENOMEM (GLFS_QUOTA_BASE + 2)
+
+/*!
+ * @messageid 120003
+ * @diagnosis Parent inode is not present in the inode table due to the
+ * inode table limits or the brick was restarted recently.
+ * @recommendedaction If it is a brick restart then perform a crawl on the
+ * file system or the specific directory in which the problem is observed.
+ * If inode table limit has been reached,please increase the limit of
+ * network.inode-lru-limit to a higher value(can be set through CLI).
+ */
+#define Q_MSG_PARENT_NULL (GLFS_QUOTA_BASE + 3)
+
+/*!
+ * @messageid 120004
+ * @diagnosis This is to inform the admin that the user has crossed the soft limit
+ * of the quota configured on the directory and expected to cross the hard limit soon.
+ * @recommendedaction You may reconfigure your quota limits.
+ */
+#define Q_MSG_CROSSED_SOFT_LIMIT (GLFS_QUOTA_BASE + 4)
+
+/*!
+ * @messageid 120005
+ * @diagnosis Quota translator failed to connect to quotad. This could be
+ * due to one or more of the following reasons, (1) Quotad is not running.
+ * (2) Brick process has run out of memory.
+ * @recommendedaction If quotad is not running, consider starting quotad.
+ * else check system memory consumption.
+ */
+#define Q_MSG_QUOTA_ENFORCER_RPC_INIT_FAILED (GLFS_QUOTA_BASE + 5)
+
+/*!
+ * @messageid 120006
+ * @diagnosis Getting cluster-wide size failed
+ * @recommendedaction Restart quotad. Kill quotad by searching
+ * "ps ax | grep quotad" and use volume start force to restart it.
+ */
+
+#define Q_MSG_REMOTE_OPERATION_FAILED (GLFS_QUOTA_BASE + 6)
+
+/*!
+ * @messageid 120007
+ * @diagnosis Updation of global quota size failed. This may be due to quotad
+ * is down or lost connection with quotad.
+ * @recommendedaction Please restart quotad.
+ */
+
+#define Q_MSG_FAILED_TO_SEND_FOP (GLFS_QUOTA_BASE + 7)
+
+/*!
+ * @messageid 120008
+ * @diagnosis
+ * @recommendedaction Check volfile for correctness
+ */
+
+#define Q_MSG_INVALID_VOLFILE (GLFS_QUOTA_BASE + 8)
+
+/*!
+ * @messageid 120009
+ * @diagnosis
+ * @recommendedaction
+ */
+
+#define Q_MSG_INODE_PARENT_NOT_FOUND (GLFS_QUOTA_BASE + 9)
+
+/*!
+ * @messageid 120010
+ * @diagnosis
+ * @recommendedaction
+ */
+
+#define Q_MSG_XDR_DECODE_ERROR (GLFS_QUOTA_BASE + 10)
+
+/*!
+ * @messageid 120011
+ * @diagnosis
+ * @recommendedaction
+ */
+
+#define Q_MSG_DICT_UNSERIALIZE_FAIL (GLFS_QUOTA_BASE + 11)
+
+/*!
+ * @messageid 120012
+ * @diagnosis
+ * @recommendedaction
+ */
+
+#define Q_MSG_DICT_SERIALIZE_FAIL (GLFS_QUOTA_BASE + 12)
+
+/*!
+ * @messageid 120013
+ * @diagnosis
+ * @recommendedaction
+ */
+
+#define Q_MSG_RPCSVC_INIT_FAILED (GLFS_QUOTA_BASE + 13)
+
+/*!
+ * @messageid 120014
+ * @diagnosis
+ * @recommendedaction
+ */
+
+#define Q_MSG_RPCSVC_LISTENER_CREATION_FAILED (GLFS_QUOTA_BASE + 14)
+
+/*!
+ * @messageid 120015
+ * @diagnosis
+ * @recommendedaction
+ */
+
+#define Q_MSG_RPCSVC_REGISTER_FAILED (GLFS_QUOTA_BASE + 15)
+
+/*!
+ * @messageid 120016
+ * @diagnosis
+ * @recommendedaction
+ */
+
+#define Q_MSG_XDR_DECODING_FAILED (GLFS_QUOTA_BASE + 16)
+/*!
+ * @messageid 120017
+ * @diagnosis
+ * @recommendedaction
+ */
+
+#define Q_MSG_RPCCLNT_REGISTER_NOTIFY_FAILED (GLFS_QUOTA_BASE + 17)
+/*!
+ * @messageid 120018
+ * @diagnosis
+ * @recommendedaction Umount and mount the corresponing volume
+ */
+
+#define Q_MSG_ANCESTRY_BUILD_FAILED (GLFS_QUOTA_BASE + 18)
+
+/*!
+ * @messageid 120019
+ * @diagnosis
+ * @recommendedaction
+ */
+
+#define Q_MSG_SIZE_KEY_MISSING (GLFS_QUOTA_BASE + 19)
+
+/*!
+ * @messageid 120020
+ * @diagnosis
+ * @recommendedaction
+ */
+
+#define Q_MSG_INODE_CTX_GET_FAILED (GLFS_QUOTA_BASE + 20)
+
+/*!
+ * @messageid 120021
+ * @diagnosis
+ * @recommendedaction
+ */
+
+#define Q_MSG_INODE_CTX_SET_FAILED (GLFS_QUOTA_BASE + 21)
+
+/*!
+ * @messageid 120022
+ * @diagnosis
+ * @recommendedaction
+ */
+
+#define Q_MSG_LOOKUP_FAILED (GLFS_QUOTA_BASE + 22)
+
+/*!
+ * @messageid 120023
+ * @diagnosis
+ * @recommendedaction
+ */
+
+#define Q_MSG_RPC_SUBMIT_FAILED (GLFS_QUOTA_BASE + 23)
+
+/*------------*/
+#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
+
+#endif /* !_QUOTA_MESSAGES_H_ */
+
diff --git a/xlators/features/quota/src/quota.c b/xlators/features/quota/src/quota.c
index a4266a32a85..7091a3c9156 100644
--- a/xlators/features/quota/src/quota.c
+++ b/xlators/features/quota/src/quota.c
@@ -1,1029 +1,5030 @@
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
+#include <fnmatch.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+#include "quota.h"
+#include "common-utils.h"
+#include "defaults.h"
+#include "statedump.h"
+#include "quota-common-utils.h"
+#include "quota-messages.h"
-#include <sys/time.h>
+struct volume_options options[];
-#include "xlator.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "quota-mem-types.h"
-
-#ifndef MAX_IOVEC
-#define MAX_IOVEC 16
-#endif
-
-struct quota_local {
- struct iatt stbuf;
- inode_t *inode;
- char *path;
- fd_t *fd;
- off_t offset;
- int32_t count;
- struct iovec vector[MAX_IOVEC];
- struct iobref *iobref;
- loc_t loc;
-};
+static int32_t
+__quota_init_inode_ctx (inode_t *inode, xlator_t *this,
+ quota_inode_ctx_t **context)
+{
+ int32_t ret = -1;
+ quota_inode_ctx_t *ctx = NULL;
+ if (inode == NULL) {
+ goto out;
+ }
-struct quota_priv {
- char only_first_time; /* Used to make sure a call is done only one time */
- gf_lock_t lock; /* Used while updating variables */
+ QUOTA_ALLOC_OR_GOTO (ctx, quota_inode_ctx_t, out);
- uint64_t disk_usage_limit; /* Used for Disk usage quota */
- uint64_t current_disk_usage; /* Keep the current usage value */
+ LOCK_INIT(&ctx->lock);
- uint32_t min_free_disk_limit; /* user specified limit, in %*/
- uint32_t current_free_disk; /* current free disk space available, in % */
- uint32_t refresh_interval; /* interval in seconds */
- uint32_t min_disk_last_updated_time; /* used for interval calculation */
+ if (context != NULL) {
+ *context = ctx;
+ }
- loc_t root_loc; /* Store '/' loc_t to make xattr calls */
-};
+ INIT_LIST_HEAD (&ctx->parents);
+ ret = __inode_ctx_put (inode, this, (uint64_t )(long)ctx);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ Q_MSG_INODE_CTX_SET_FAILED, "cannot set quota context "
+ "in inode (gfid:%s)", uuid_utoa (inode->gfid));
+ GF_FREE (ctx);
+ }
+out:
+ return ret;
+}
+
+
+static int32_t
+quota_inode_ctx_get (inode_t *inode, xlator_t *this,
+ quota_inode_ctx_t **ctx, char create_if_absent)
+{
+ int32_t ret = 0;
+ uint64_t ctx_int;
+
+ LOCK (&inode->lock);
+ {
+ ret = __inode_ctx_get (inode, this, &ctx_int);
+
+ if ((ret == 0) && (ctx != NULL)) {
+ *ctx = (quota_inode_ctx_t *) (unsigned long)ctx_int;
+ } else if (create_if_absent) {
+ ret = __quota_init_inode_ctx (inode, this, ctx);
+ }
+ }
+ UNLOCK (&inode->lock);
+
+ return ret;
+}
int
-quota_statvfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *stbuf)
-{
- struct quota_priv *priv = this->private;
-
- if (op_ret >= 0) {
- priv->current_free_disk =
- (stbuf->f_bavail * 100) / stbuf->f_blocks;
- }
+quota_loc_fill (loc_t *loc, inode_t *inode, inode_t *parent, char *path)
+{
+ int ret = -1;
+
+ if (!loc || (inode == NULL))
+ return ret;
+
+ if (inode) {
+ loc->inode = inode_ref (inode);
+ gf_uuid_copy (loc->gfid, inode->gfid);
+ }
+
+ if (parent) {
+ loc->parent = inode_ref (parent);
+ }
- STACK_DESTROY (frame->root);
- return 0;
+ if (path != NULL) {
+ loc->path = gf_strdup (path);
+
+ loc->name = strrchr (loc->path, '/');
+ if (loc->name) {
+ loc->name++;
+ }
+ }
+
+ ret = 0;
+
+ return ret;
+}
+
+
+int
+quota_inode_loc_fill (inode_t *inode, loc_t *loc)
+{
+ char *resolvedpath = NULL;
+ inode_t *parent = NULL;
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ if ((!inode) || (!loc)) {
+ return ret;
+ }
+
+ this = THIS;
+
+ if ((inode) && __is_root_gfid (inode->gfid)) {
+ loc->parent = NULL;
+ goto ignore_parent;
+ }
+
+ parent = inode_parent (inode, 0, NULL);
+ if (!parent) {
+ gf_msg_debug (this->name, 0, "cannot find parent for "
+ "inode (gfid:%s)", uuid_utoa (inode->gfid));
+ }
+
+ignore_parent:
+ ret = inode_path (inode, NULL, &resolvedpath);
+ if (ret < 0) {
+ gf_msg_debug (this->name, 0, "cannot construct path for "
+ "inode (gfid:%s)", uuid_utoa (inode->gfid));
+ }
+
+ ret = quota_loc_fill (loc, inode, parent, resolvedpath);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM,
+ "cannot fill loc");
+ goto err;
+ }
+
+err:
+ if (parent) {
+ inode_unref (parent);
+ }
+
+ GF_FREE (resolvedpath);
+
+ return ret;
+}
+
+
+int32_t
+quota_local_cleanup (quota_local_t *local)
+{
+ if (local == NULL) {
+ goto out;
+ }
+
+ loc_wipe (&local->loc);
+ loc_wipe (&local->newloc);
+ loc_wipe (&local->oldloc);
+ loc_wipe (&local->validate_loc);
+
+ inode_unref (local->inode);
+
+ if (local->xdata)
+ dict_unref (local->xdata);
+
+ if (local->validate_xdata)
+ dict_unref (local->validate_xdata);
+
+ if (local->stub)
+ call_stub_destroy (local->stub);
+
+ LOCK_DESTROY (&local->lock);
+
+ mem_put (local);
+out:
+ return 0;
+}
+
+
+static quota_local_t *
+quota_local_new ()
+{
+ quota_local_t *local = NULL;
+ local = mem_get0 (THIS->local_pool);
+ if (local == NULL)
+ goto out;
+
+ LOCK_INIT (&local->lock);
+ local->space_available = -1;
+
+out:
+ return local;
+}
+
+
+quota_dentry_t *
+__quota_dentry_new (quota_inode_ctx_t *ctx, char *name, uuid_t par)
+{
+ quota_dentry_t *dentry = NULL;
+ GF_UNUSED int32_t ret = 0;
+
+ QUOTA_ALLOC_OR_GOTO (dentry, quota_dentry_t, err);
+
+ INIT_LIST_HEAD (&dentry->next);
+
+ dentry->name = gf_strdup (name);
+ if (dentry->name == NULL) {
+ GF_FREE (dentry);
+ dentry = NULL;
+ goto err;
+ }
+
+ gf_uuid_copy (dentry->par, par);
+
+ if (ctx != NULL)
+ list_add_tail (&dentry->next, &ctx->parents);
+
+err:
+ return dentry;
}
void
-gf_quota_usage_subtract (xlator_t *this, size_t size)
+__quota_dentry_free (quota_dentry_t *dentry)
{
- struct quota_priv *priv = NULL;
+ if (dentry == NULL) {
+ goto out;
+ }
- priv = this->private;
+ list_del_init (&dentry->next);
- LOCK (&priv->lock);
- {
- if (priv->current_disk_usage < size)
- priv->current_disk_usage = 0;
- else
- priv->current_disk_usage -= size;
- }
- UNLOCK (&priv->lock);
+ GF_FREE (dentry->name);
+ GF_FREE (dentry);
+out:
+ return;
}
+void
+__quota_dentry_del (quota_inode_ctx_t *ctx, const char *name, uuid_t par)
+{
+ quota_dentry_t *dentry = NULL;
+ quota_dentry_t *tmp = NULL;
+
+ list_for_each_entry_safe (dentry, tmp, &ctx->parents, next) {
+ if ((strcmp (dentry->name, name) == 0) &&
+ (gf_uuid_compare (dentry->par, par) == 0)) {
+ __quota_dentry_free (dentry);
+ break;
+ }
+ }
+}
void
-gf_quota_usage_add (xlator_t *this, size_t size)
+quota_dentry_del (quota_inode_ctx_t *ctx, const char *name, uuid_t par)
{
- struct quota_priv *priv = this->private;
+ LOCK (&ctx->lock);
+ {
+ __quota_dentry_del (ctx, name, par);
+ }
+ UNLOCK (&ctx->lock);
+}
- LOCK (&priv->lock);
- {
- priv->current_disk_usage += size;
- }
- UNLOCK (&priv->lock);
+static inode_t*
+__quota_inode_parent (inode_t *inode, uuid_t pargfid, const char *name)
+{
+ inode_t *parent = NULL;
+
+ parent = inode_parent (inode, pargfid, name);
+ inode_unref (inode);
+ return parent;
}
+static inode_t*
+quota_inode_parent (inode_t *inode, uuid_t pargfid, const char *name)
+{
+ inode_t *parent = NULL;
+
+ parent = __quota_inode_parent (inode, pargfid, name);
+ if (!parent)
+ gf_msg_callingfn (THIS->name, GF_LOG_ERROR, 0,
+ Q_MSG_PARENT_NULL,
+ "Failed to find "
+ "ancestor for inode (%s)",
+ uuid_utoa(inode->gfid));
+
+ return parent;
+}
+
+int32_t
+quota_inode_depth (inode_t *inode)
+{
+ int depth = 0;
+ inode_t *cur_inode = NULL;
+
+ cur_inode = inode_ref (inode);
+ while (cur_inode && !__is_root_gfid (cur_inode->gfid)) {
+ depth++;
+ cur_inode = quota_inode_parent (cur_inode, 0 , NULL);
+ if (!cur_inode)
+ depth = -1;
+ }
+
+ if (cur_inode)
+ inode_unref (cur_inode);
+
+ return depth;
+}
+
+int32_t quota_find_common_ancestor (inode_t *inode1, inode_t *inode2,
+ uuid_t *common_ancestor)
+{
+ int32_t depth1 = 0;
+ int32_t depth2 = 0;
+ int32_t ret = -1;
+ inode_t *cur_inode1 = NULL;
+ inode_t *cur_inode2 = NULL;
+
+ depth1 = quota_inode_depth (inode1);
+ if (depth1 < 0)
+ goto out;
+
+ depth2 = quota_inode_depth (inode2);
+ if (depth2 < 0)
+ goto out;
+
+ cur_inode1 = inode_ref (inode1);
+ cur_inode2 = inode_ref (inode2);
+
+ while (cur_inode1 && depth1 > depth2) {
+ cur_inode1 = quota_inode_parent (cur_inode1, 0 , NULL);
+ depth1--;
+ }
+
+ while (cur_inode2 && depth2 > depth1) {
+ cur_inode2 = quota_inode_parent (cur_inode2, 0 , NULL);
+ depth2--;
+ }
+
+ while (depth1 && cur_inode1 && cur_inode2 && cur_inode1 != cur_inode2) {
+ cur_inode1 = quota_inode_parent (cur_inode1, 0 , NULL);
+ cur_inode2 = quota_inode_parent (cur_inode2, 0 , NULL);
+ depth1--;
+ }
+
+ if (cur_inode1 && cur_inode2) {
+ gf_uuid_copy (*common_ancestor, cur_inode1->gfid);
+ ret = 0;
+ }
+out:
+ if (cur_inode1)
+ inode_unref (cur_inode1);
+
+ if (cur_inode2)
+ inode_unref (cur_inode2);
+
+ return ret;
+ }
-void
-gf_quota_update_current_free_disk (xlator_t *this)
+void
+check_ancestory_continue (struct list_head *parents, inode_t *inode,
+ int32_t op_ret, int32_t op_errno, void *data)
{
- call_frame_t *frame = NULL;
- call_pool_t *pool = NULL;
+ call_frame_t *frame = NULL;
+ quota_local_t *local = NULL;
+ uint32_t link_count = 0;
+
+ frame = data;
+ local = frame->local;
+
+ if (parents && list_empty (parents)) {
+ gf_msg (THIS->name, GF_LOG_WARNING, EIO,
+ Q_MSG_ANCESTRY_BUILD_FAILED,
+ "Couldn't build ancestry for inode (gfid:%s). "
+ "Without knowing ancestors till root, quota "
+ "cannot be enforced. "
+ "Hence, failing fop with EIO",
+ uuid_utoa (inode->gfid));
+ op_errno = EIO;
+ op_ret = -1;
+ }
- struct quota_priv *priv = NULL;
+ LOCK (&local->lock);
+ {
+ link_count = --local->link_count;
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
+ }
+ UNLOCK (&local->lock);
- pool = this->ctx->pool;
- frame = create_frame (this, pool);
-
- priv = this->private;
+ if (link_count == 0)
+ local->fop_continue_cbk (frame);
+}
- STACK_WIND (frame, quota_statvfs_cbk,
- this->children->xlator,
- this->children->xlator->fops->statfs, &(priv->root_loc));
+void
+check_ancestory (call_frame_t *frame, inode_t *inode)
+{
+ inode_t *cur_inode = NULL;
+ inode_t *parent = NULL;
+
+ cur_inode = inode_ref (inode);
+ while (cur_inode && !__is_root_gfid (cur_inode->gfid)) {
+ parent = inode_parent (cur_inode, 0, NULL);
+ if (!parent) {
+ quota_build_ancestry (cur_inode,
+ check_ancestory_continue, frame);
+ inode_unref (cur_inode);
+ return;
+ }
+ inode_unref (cur_inode);
+ cur_inode = parent;
+ }
- return ;
+ if (cur_inode) {
+ inode_unref (cur_inode);
+ check_ancestory_continue (NULL, NULL, 0, 0, frame);
+ } else {
+ check_ancestory_continue (NULL, NULL, -1, ESTALE, frame);
+ }
}
+void
+check_ancestory_2_cbk (struct list_head *parents, inode_t *inode,
+ int32_t op_ret, int32_t op_errno, void *data)
+{
+ inode_t *this_inode = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+
+ this_inode = data;
+
+ if (op_ret < 0)
+ goto out;
+
+ if (parents == NULL || list_empty (parents)) {
+ gf_msg (THIS->name, GF_LOG_WARNING, 0,
+ Q_MSG_ENFORCEMENT_FAILED,
+ "Couldn't build ancestry for inode (gfid:%s). "
+ "Without knowing ancestors till root, quota "
+ "cannot be enforced.",
+ uuid_utoa (this_inode->gfid));
+ goto out;
+ }
-int
-gf_quota_check_free_disk (xlator_t *this)
-{
- struct quota_priv * priv = NULL;
- struct timeval tv = {0, 0};
-
- priv = this->private;
- if (priv->min_free_disk_limit) {
- gettimeofday (&tv, NULL);
- if (tv.tv_sec > (priv->refresh_interval +
- priv->min_disk_last_updated_time)) {
- priv->min_disk_last_updated_time = tv.tv_sec;
- gf_quota_update_current_free_disk (this);
- }
- if (priv->current_free_disk <= priv->min_free_disk_limit)
- return -1;
- }
+ quota_inode_ctx_get (this_inode, THIS, &ctx, 0);
+ if (ctx)
+ ctx->ancestry_built = _gf_true;
- return 0;
+out:
+ inode_unref (this_inode);
}
+void
+check_ancestory_2 (xlator_t *this, quota_local_t *local, inode_t *inode)
+{
+ inode_t *cur_inode = NULL;
+ inode_t *parent = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ char *name = NULL;
+ uuid_t pgfid = {0};
+
+ name = (char *) local->loc.name;
+ if (local->loc.parent) {
+ gf_uuid_copy (pgfid, local->loc.parent->gfid);
+ parent = local->loc.parent;
+ }
+
+ cur_inode = inode_ref (inode);
+ while (cur_inode && !__is_root_gfid (cur_inode->gfid)) {
+ quota_inode_ctx_get (cur_inode, this, &ctx, 0);
+ /* build ancestry is required only on the first lookup,
+ * so stop crawling when the inode_ctx is set for an inode
+ */
+ if (ctx && ctx->ancestry_built)
+ goto setctx;
+
+ parent = inode_parent (cur_inode, pgfid, name);
+ if (!parent) {
+ quota_build_ancestry (cur_inode, check_ancestory_2_cbk,
+ inode_ref (inode));
+ goto out;
+ }
-int
-quota_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ if (name != NULL) {
+ name = NULL;
+ gf_uuid_clear (pgfid);
+ }
+
+ inode_unref (cur_inode);
+ cur_inode = parent;
+ }
+
+setctx:
+ if (cur_inode && cur_inode != inode) {
+ quota_inode_ctx_get (inode, this, &ctx, 0);
+ if (ctx)
+ ctx->ancestry_built = _gf_true;
+ }
+out:
+ if (cur_inode)
+ inode_unref (cur_inode);
+}
+
+static void
+quota_link_count_decrement (call_frame_t *frame)
{
- struct quota_priv *priv = this->private;
- struct quota_local *local = NULL;
+ call_frame_t *tmpframe = NULL;
+ quota_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+ int link_count = -1;
+
+ local = frame->local;
+ if (local && local->par_frame) {
+ local = local->par_frame->local;
+ tmpframe = frame;
+ }
- local = frame->local;
+ if (local == NULL)
+ goto out;
- if ((op_ret >= 0) && priv->disk_usage_limit) {
- gf_quota_usage_subtract (this, (local->stbuf.ia_blocks -
- postbuf->ia_blocks) * 512);
- loc_wipe (&local->loc);
- }
+ LOCK (&local->lock);
+ {
+ link_count = --local->link_count;
+ if (link_count == 0) {
+ stub = local->stub;
+ local->stub = NULL;
+ }
+ }
+ UNLOCK (&local->lock);
+
+ if (stub != NULL) {
+ call_resume (stub);
+ }
- STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno,
- prebuf, postbuf);
- return 0;
+out:
+ if (tmpframe) {
+ local = tmpframe->local;
+ tmpframe->local = NULL;
+
+ STACK_DESTROY (frame->root);
+ if (local)
+ quota_local_cleanup (local);
+ }
+
+ return;
}
+static void
+quota_handle_validate_error (call_frame_t *frame, int32_t op_ret,
+ int32_t op_errno)
+{
+ quota_local_t *local;
-int
-quota_truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ local = frame->local;
+ if (local && local->par_frame)
+ local = local->par_frame->local;
+
+ if (local == NULL)
+ goto out;
+
+ LOCK (&local->lock);
+ {
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
+ }
+ UNLOCK (&local->lock);
+
+ /* we abort checking limits on this path to root */
+ quota_link_count_decrement (frame);
+out:
+ return;
+}
+
+int32_t
+quota_validate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
{
- struct quota_local *local = NULL;
- struct quota_priv *priv = NULL;
+ quota_local_t *local = NULL;
+ int32_t ret = 0;
+ quota_inode_ctx_t *ctx = NULL;
+ int64_t *object_size = 0;
+ uint64_t value = 0;
+ data_t *data = NULL;
+ quota_meta_t size = {0,};
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ goto unwind;
+ }
- priv = this->private;
- local = frame->local;
+ GF_ASSERT (local);
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO_WITH_ERROR ("quota", this, unwind, op_errno,
+ EINVAL);
+ GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, xdata, unwind, op_errno,
+ EINVAL);
+
+ ret = inode_ctx_get (local->validate_loc.inode, this, &value);
+
+ ctx = (quota_inode_ctx_t *)(unsigned long)value;
+ if ((ret == -1) || (ctx == NULL)) {
+ gf_msg (this->name, GF_LOG_WARNING, EINVAL,
+ Q_MSG_INODE_CTX_GET_FAILED, "quota context is"
+ " not present in inode (gfid:%s)",
+ uuid_utoa (local->validate_loc.inode->gfid));
+ op_errno = EINVAL;
+ goto unwind;
+ }
- if (op_ret >= 0) {
- local->stbuf = *buf;
- }
+ ret = quota_dict_get_meta (xdata, QUOTA_SIZE_KEY, &size);
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING, EINVAL,
+ Q_MSG_SIZE_KEY_MISSING, "quota size key not present "
+ "in dict");
+ op_errno = EINVAL;
+ }
- STACK_WIND (frame, quota_truncate_cbk,
- FIRST_CHILD (this), FIRST_CHILD (this)->fops->truncate,
- &local->loc, local->offset);
- return 0;
+ local->just_validated = 1; /* so that we don't go into infinite
+ * loop of validation and checking
+ * limit when timeout is zero.
+ */
+ LOCK (&ctx->lock);
+ {
+ ctx->size = size.size;
+ ctx->file_count = size.file_count;
+ ctx->dir_count = size.dir_count;
+ gettimeofday (&ctx->tv, NULL);
+ }
+ UNLOCK (&ctx->lock);
+
+ quota_check_limit (frame, local->validate_loc.inode, this);
+ return 0;
+
+unwind:
+ quota_handle_validate_error (frame, op_ret, op_errno);
+ return 0;
}
-int
-quota_truncate (call_frame_t *frame, xlator_t *this,
- loc_t *loc, off_t offset)
+static uint64_t
+quota_time_elapsed (struct timeval *now, struct timeval *then)
{
- struct quota_local *local = NULL;
- struct quota_priv *priv = NULL;
+ return (now->tv_sec - then->tv_sec);
+}
- priv = this->private;
- if (priv->disk_usage_limit) {
- local = GF_CALLOC (1, sizeof (struct quota_local),
- gf_quota_mt_quota_local);
- frame->local = local;
+int32_t
+quota_timeout (struct timeval *tv, int32_t timeout)
+{
+ struct timeval now = {0,};
+ int32_t timed_out = 0;
- loc_copy (&local->loc, loc);
- local->offset = offset;
+ gettimeofday (&now, NULL);
- STACK_WIND (frame, quota_truncate_stat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat, loc);
- return 0;
- }
+ if (quota_time_elapsed (&now, tv) >= timeout) {
+ timed_out = 1;
+ }
- STACK_WIND (frame, quota_truncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate,
- loc, offset);
- return 0;
+ return timed_out;
}
+/* Return: 1 if new entry added
+ * 0 no entry added
+ */
+static int32_t
+quota_add_parent (struct list_head *list, char *name, uuid_t pgfid)
+{
+ quota_dentry_t *entry = NULL;
+ gf_boolean_t found = _gf_false;
-int
-quota_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ if (list == NULL) {
+ goto out;
+ }
+
+ list_for_each_entry (entry, list, next) {
+ if (gf_uuid_compare (pgfid, entry->par) == 0) {
+ found = _gf_true;
+ goto out;
+ }
+ }
+
+ entry = __quota_dentry_new (NULL, name, pgfid);
+ if (entry)
+ list_add_tail (&entry->next, list);
+
+out:
+ if (found)
+ return 0;
+ else
+ return 1;
+
+}
+
+/* This function iterates the parent list in inode
+ * context and add unique parent to the list
+ * Returns number of dentry added to the list
+ */
+static int32_t
+quota_add_parents_from_ctx (quota_inode_ctx_t *ctx, struct list_head *list)
{
- struct quota_priv *priv = NULL;
- struct quota_local *local = NULL;
+ int ret = 0;
+ quota_dentry_t *dentry = NULL;
+ int32_t count = 0;
- local = frame->local;
- priv = this->private;
+ if (ctx == NULL || list == NULL)
+ goto out;
- if ((op_ret >= 0) && priv->disk_usage_limit) {
- gf_quota_usage_subtract (this, (local->stbuf.ia_blocks -
- postbuf->ia_blocks) * 512);
- fd_unref (local->fd);
- }
+ LOCK (&ctx->lock);
+ {
+ list_for_each_entry (dentry, &ctx->parents, next) {
+ ret = quota_add_parent (list, dentry->name,
+ dentry->par);
+
+ if (ret == 1)
+ count++;
+ }
+ }
+ UNLOCK (&ctx->lock);
- STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno,
- prebuf, postbuf);
- return 0;
+out:
+ return count;
}
+int32_t
+quota_build_ancestry_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ gf_dirent_t *entries, dict_t *xdata)
+{
+ inode_t *parent = NULL;
+ inode_t *tmp_parent = NULL;
+ inode_t *linked_inode = NULL;
+ inode_t *tmp_inode = NULL;
+ gf_dirent_t *entry = NULL;
+ loc_t loc = {0, };
+ quota_dentry_t *dentry = NULL;
+ quota_dentry_t *tmp = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ struct list_head parents = {0, };
+ quota_local_t *local = NULL;
+
+ INIT_LIST_HEAD (&parents);
+
+ local = frame->local;
+ frame->local = NULL;
+
+ if (op_ret < 0)
+ goto err;
+
+ if ((op_ret > 0) && (entries != NULL)) {
+ list_for_each_entry (entry, &entries->list, list) {
+ if (__is_root_gfid (entry->inode->gfid)) {
+ /* The list contains a sub-list for each
+ * possible path to the target inode. Each
+ * sub-list starts with the root entry of the
+ * tree and is followed by the child entries
+ * for a particular path to the target entry.
+ * The root entry is an implied sub-list
+ * delimiter, as it denotes we have started
+ * processing a new path. Reset the parent
+ * pointer and continue
+ */
+
+ tmp_parent = NULL;
+ } else {
+ /* For a non-root entry, link this inode */
+ linked_inode = inode_link (entry->inode,
+ tmp_parent,
+ entry->d_name,
+ &entry->d_stat);
+ if (linked_inode) {
+ tmp_inode = entry->inode;
+ entry->inode = linked_inode;
+ inode_unref (tmp_inode);
+ } else {
+ gf_msg (this->name, GF_LOG_WARNING,
+ EINVAL, Q_MSG_PARENT_NULL,
+ "inode link failed");
+ op_errno = EINVAL;
+ goto err;
+ }
+ }
+
+ gf_uuid_copy (loc.gfid, entry->d_stat.ia_gfid);
+
+ loc.inode = inode_ref (entry->inode);
+ loc.parent = inode_ref (tmp_parent);
+ loc.name = entry->d_name;
+
+ quota_fill_inodectx (this, entry->inode, entry->dict,
+ &loc, &entry->d_stat, &op_errno);
+
+ tmp_parent = entry->inode;
+
+ loc_wipe (&loc);
+ }
+ }
+
+ parent = inode_parent (local->loc.inode, 0, NULL);
+ if (parent == NULL) {
+ gf_msg (this->name, GF_LOG_WARNING, EINVAL,
+ Q_MSG_PARENT_NULL, "parent is NULL");
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ quota_inode_ctx_get (local->loc.inode, this, &ctx, 0);
+
+ quota_add_parents_from_ctx (ctx, &parents);
+
+ if (list_empty (&parents)) {
+ /* we built ancestry for a directory */
+ list_for_each_entry (entry, &entries->list, list) {
+ if (entry->inode == local->loc.inode)
+ break;
+ }
+
+ /* Getting assertion here, need to investigate
+ comment for now
+ GF_ASSERT (&entry->list != &entries->list);
+ */
+
+ quota_add_parent (&parents, entry->d_name, parent->gfid);
+ }
+
+ local->ancestry_cbk (&parents, local->loc.inode, 0, 0,
+ local->ancestry_data);
+ goto cleanup;
+
+err:
+ local->ancestry_cbk (NULL, NULL, -1, op_errno, local->ancestry_data);
+
+cleanup:
+ STACK_DESTROY (frame->root);
+ quota_local_cleanup (local);
+
+ if (parent != NULL) {
+ inode_unref (parent);
+ parent = NULL;
+ }
+
+ list_for_each_entry_safe (dentry, tmp, &parents, next) {
+ __quota_dentry_free (dentry);
+ }
+
+ return 0;
+}
int
-quota_ftruncate_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+quota_build_ancestry (inode_t *inode, quota_ancestry_built_t ancestry_cbk,
+ void *data)
{
- struct quota_local *local = NULL;
- struct quota_priv *priv = NULL;
+ fd_t *fd = NULL;
+ quota_local_t *local = NULL;
+ call_frame_t *new_frame = NULL;
+ int op_errno = ENOMEM;
+ int op_ret = -1;
+ xlator_t *this = NULL;
+ dict_t *xdata_req = NULL;
+
+ this = THIS;
+
+ xdata_req = dict_new ();
+ if (xdata_req == NULL)
+ goto err;
+
+ fd = fd_anonymous (inode);
+ if (fd == NULL)
+ goto err;
+
+ new_frame = create_frame (this, this->ctx->pool);
+ if (new_frame == NULL)
+ goto err;
+
+ local = quota_local_new ();
+ if (local == NULL)
+ goto err;
+
+ new_frame->root->uid = new_frame->root->gid = 0;
+ new_frame->local = local;
+ local->ancestry_cbk = ancestry_cbk;
+ local->ancestry_data = data;
+ local->loc.inode = inode_ref (inode);
+
+ op_ret = dict_set_int8 (xdata_req, QUOTA_LIMIT_KEY, 1);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto err;
+ }
+
+ op_ret = dict_set_int8 (xdata_req, QUOTA_LIMIT_OBJECTS_KEY, 1);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto err;
+ }
- priv = this->private;
- local = frame->local;
+ op_ret = dict_set_int8 (xdata_req, GET_ANCESTRY_DENTRY_KEY, 1);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto err;
+ }
- if (op_ret >= 0) {
- local->stbuf = *buf;
- }
+ /* This would ask posix layer to construct dentry chain till root
+ * We don't need to do a opendir, we can use the anonymous fd
+ * here for the readidrp.
+ * avoiding opendir also reduces the window size where another FOP
+ * can be executed before completion of build ancestry
+ */
+ STACK_WIND (new_frame, quota_build_ancestry_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, 0, 0, xdata_req);
- STACK_WIND (frame, quota_ftruncate_cbk,
- FIRST_CHILD (this), FIRST_CHILD (this)->fops->ftruncate,
- local->fd, local->offset);
- return 0;
-}
+ op_ret = 0;
+
+err:
+ if (fd)
+ fd_unref (fd);
+ if (xdata_req)
+ dict_unref (xdata_req);
+
+ if (op_ret < 0) {
+ ancestry_cbk (NULL, NULL, -1, op_errno, data);
+
+ if (new_frame) {
+ local = new_frame->local;
+ new_frame->local = NULL;
+ STACK_DESTROY (new_frame->root);
+ }
+
+ if (local)
+ quota_local_cleanup (local);
+ }
+
+ return 0;
+}
int
-quota_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset)
+quota_validate (call_frame_t *frame, inode_t *inode, xlator_t *this,
+ fop_lookup_cbk_t cbk_fn)
+{
+ quota_local_t *local = NULL;
+ int ret = 0;
+ dict_t *xdata = NULL;
+ quota_priv_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&local->lock);
+ {
+ loc_wipe (&local->validate_loc);
+
+ ret = quota_inode_loc_fill (inode, &local->validate_loc);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ Q_MSG_ENFORCEMENT_FAILED,
+ "cannot fill loc for inode (gfid:%s), hence "
+ "aborting quota-checks and continuing with fop",
+ uuid_utoa (inode->gfid));
+ }
+ }
+ UNLOCK (&local->lock);
+
+ if (ret < 0) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ xdata = dict_new ();
+ if (xdata == NULL) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ ret = dict_set_int8 (xdata, QUOTA_SIZE_KEY, 1);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ Q_MSG_ENOMEM, "dict set failed");
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ ret = dict_set_str (xdata, "volume-uuid", priv->volume_uuid);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ Q_MSG_ENOMEM, "dict set failed");
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ ret = quota_enforcer_lookup (frame, this, xdata, cbk_fn);
+ if (ret < 0) {
+ ret = -ENOTCONN;
+ goto err;
+ }
+
+ ret = 0;
+err:
+ if (xdata)
+ dict_unref (xdata);
+
+ return ret;
+}
+
+void
+quota_check_limit_continuation (struct list_head *parents, inode_t *inode,
+ int32_t op_ret, int32_t op_errno, void *data)
{
- struct quota_local *local = NULL;
- struct quota_priv *priv = NULL;
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ quota_local_t *local = NULL;
+ quota_local_t *par_local = NULL;
+ quota_dentry_t *entry = NULL;
+ inode_t *parent = NULL;
+ int parent_count = 0;
+
+ frame = data;
+ local = frame->local;
+ this = THIS;
+
+ if (local->par_frame)
+ par_local = local->par_frame->local;
+ else
+ par_local = local;
+
+
+ if ((op_ret < 0) || list_empty (parents)) {
+ if (op_ret >= 0) {
+ gf_msg (this->name, GF_LOG_WARNING, EIO,
+ Q_MSG_ANCESTRY_BUILD_FAILED,
+ "Couldn't build ancestry for inode (gfid:%s). "
+ "Without knowing ancestors till root, quota"
+ "cannot be enforced. "
+ "Hence, failing fop with EIO",
+ uuid_utoa (inode->gfid));
+ op_errno = EIO;
+ }
+
+ quota_handle_validate_error (frame, -1, op_errno);
+ goto out;
+ }
+
+ list_for_each_entry (entry, parents, next) {
+ parent_count++;
+ }
+
+ LOCK (&par_local->lock);
+ {
+ par_local->link_count += (parent_count - 1);
+ }
+ UNLOCK (&par_local->lock);
+
+ if (local->par_frame) {
+ list_for_each_entry (entry, parents, next) {
+ parent = inode_find (inode->table, entry->par);
+ quota_check_limit (frame, parent, this);
+ inode_unref (parent);
+ }
+ } else {
+ list_for_each_entry (entry, parents, next) {
+ parent = do_quota_check_limit (frame, inode, this,
+ entry, _gf_true);
+ if (parent)
+ inode_unref (parent);
+ else
+ quota_link_count_decrement (frame);
+ }
+ }
+out:
+ return;
+}
- priv = this->private;
+int32_t
+quota_check_object_limit (call_frame_t *frame, quota_inode_ctx_t *ctx,
+ quota_priv_t *priv, inode_t *_inode, xlator_t *this,
+ int32_t *op_errno, int just_validated,
+ quota_local_t *local, gf_boolean_t *skip_check)
+{
+ int32_t ret = -1;
+ uint32_t timeout = 0;
+ char need_validate = 0;
+ gf_boolean_t hard_limit_exceeded = 0;
+ int64_t object_aggr_count = 0;
+
+ GF_ASSERT (frame);
+ GF_ASSERT (priv);
+ GF_ASSERT (_inode);
+ GF_ASSERT (this);
+ GF_ASSERT (local);
+
+ if (ctx != NULL && (ctx->object_hard_lim > 0 ||
+ ctx->object_soft_lim)) {
+ LOCK (&ctx->lock);
+ {
+ timeout = priv->soft_timeout;
+
+ object_aggr_count = ctx->file_count +
+ ctx->dir_count + 1;
+ if (((ctx->object_soft_lim >= 0)
+ && (object_aggr_count) >
+ ctx->object_soft_lim)) {
+ timeout = priv->hard_timeout;
+ }
+
+ if (!just_validated
+ && quota_timeout (&ctx->tv, timeout)) {
+ need_validate = 1;
+ } else if ((object_aggr_count) >
+ ctx->object_hard_lim) {
+ hard_limit_exceeded = 1;
+ }
+ }
+ UNLOCK (&ctx->lock);
+
+ if (need_validate && *skip_check != _gf_true) {
+ *skip_check = _gf_true;
+ ret = quota_validate (frame, _inode, this,
+ quota_validate_cbk);
+ if (ret < 0) {
+ *op_errno = -ret;
+ *skip_check = _gf_false;
+ }
+ goto out;
+ }
- if (priv->disk_usage_limit) {
- local = GF_CALLOC (1, sizeof (struct quota_local),
- gf_quota_mt_quota_local);
- frame->local = local;
+ if (hard_limit_exceeded) {
+ local->op_ret = -1;
+ local->op_errno = EDQUOT;
+ *op_errno = EDQUOT;
+ goto out;
+ }
- local->fd = fd_ref (fd);
- local->offset = offset;
+ /*We log usage only if quota limit is configured on
+ that inode
+ */
+ quota_log_usage (this, ctx, _inode, 0);
+ }
- STACK_WIND (frame, quota_ftruncate_fstat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat, fd);
- return 0;
- }
+ ret = 0;
- STACK_WIND (frame, quota_ftruncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate,
- fd, offset);
- return 0;
+out:
+ return ret;
}
-int
-quota_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+int32_t
+quota_check_size_limit (call_frame_t *frame, quota_inode_ctx_t *ctx,
+ quota_priv_t *priv, inode_t *_inode, xlator_t *this,
+ int32_t *op_errno, int just_validated, int64_t delta,
+ quota_local_t *local, gf_boolean_t *skip_check)
{
- struct quota_priv *priv = NULL;
+ int32_t ret = -1;
+ uint32_t timeout = 0;
+ char need_validate = 0;
+ gf_boolean_t hard_limit_exceeded = 0;
+ int64_t space_available = 0;
+ int64_t wouldbe_size = 0;
+
+ GF_ASSERT (frame);
+ GF_ASSERT (priv);
+ GF_ASSERT (_inode);
+ GF_ASSERT (this);
+ GF_ASSERT (local);
+
+ if (ctx != NULL && (ctx->hard_lim > 0 || ctx->soft_lim > 0)) {
+ wouldbe_size = ctx->size + delta;
+
+ LOCK (&ctx->lock);
+ {
+ timeout = priv->soft_timeout;
+
+ if ((ctx->soft_lim >= 0)
+ && (wouldbe_size > ctx->soft_lim)) {
+ timeout = priv->hard_timeout;
+ }
+
+ if (!just_validated
+ && quota_timeout (&ctx->tv, timeout)) {
+ need_validate = 1;
+ } else if (wouldbe_size >= ctx->hard_lim) {
+ hard_limit_exceeded = 1;
+ }
+ }
+ UNLOCK (&ctx->lock);
+
+ if (need_validate && *skip_check != _gf_true) {
+ *skip_check = _gf_true;
+ ret = quota_validate (frame, _inode, this,
+ quota_validate_cbk);
+ if (ret < 0) {
+ *op_errno = -ret;
+ *skip_check = _gf_false;
+ }
+ goto out;
+ }
- priv = this->private;
+ if (hard_limit_exceeded) {
+ local->op_ret = -1;
+ local->op_errno = EDQUOT;
- if ((op_ret >= 0) && priv->disk_usage_limit) {
- gf_quota_usage_add (this, buf->ia_blocks * 512);
- }
+ space_available = ctx->hard_lim - ctx->size;
+
+ if (space_available < 0)
+ space_available = 0;
+
+ if ((local->space_available < 0)
+ || (local->space_available
+ > space_available)){
+ local->space_available
+ = space_available;
+
+ }
- STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
- return 0;
+ if (space_available == 0) {
+ *op_errno = EDQUOT;
+ goto out;
+ }
+ }
+
+ /* We log usage only if quota limit is configured on
+ that inode. */
+ quota_log_usage (this, ctx, _inode, delta);
+ }
+
+ ret = 0;
+out:
+ return ret;
}
-int
-quota_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t rdev)
+int32_t
+quota_check_limit (call_frame_t *frame, inode_t *inode, xlator_t *this)
{
- struct quota_priv *priv = NULL;
+ int32_t ret = -1, op_errno = EINVAL;
+ inode_t *_inode = NULL, *parent = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_priv_t *priv = NULL;
+ quota_local_t *local = NULL;
+ quota_local_t *par_local = NULL;
+ char need_validate = 0;
+ char just_validated = 0;
+ gf_boolean_t hard_limit_exceeded = 0;
+ int64_t delta = 0;
+ int8_t object_delta = 0;
+ uint64_t value = 0;
+ gf_boolean_t skip_check = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("quota", this, err);
+ GF_VALIDATE_OR_GOTO (this->name, frame, err);
+ GF_VALIDATE_OR_GOTO (this->name, inode, err);
+
+ local = frame->local;
+ GF_VALIDATE_OR_GOTO (this->name, local, err);
+
+ if (local->par_frame) {
+ par_local = local->par_frame->local;
+ GF_VALIDATE_OR_GOTO (this->name, par_local, err);
+ } else {
+ par_local = local;
+ }
- priv = this->private;
+ delta = par_local->delta;
+ object_delta = par_local->object_delta;
+
+ GF_VALIDATE_OR_GOTO (this->name, par_local->stub, err);
+ /* Allow all the trusted clients
+ * Don't block the gluster internal processes like rebalance, gsyncd,
+ * self heal etc from the disk quotas.
+ *
+ * Method: Allow all the clients with PID negative. This is by the
+ * assumption that any kernel assigned pid doesn't have the negative
+ * number.
+ */
+ if (0 > frame->root->pid) {
+ ret = 0;
+ quota_link_count_decrement (frame);
+ goto done;
+ }
- if (gf_quota_check_free_disk (this) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "min-free-disk limit (%u) crossed, current available is %u",
- priv->min_free_disk_limit, priv->current_free_disk);
- STACK_UNWIND_STRICT (mknod, frame, -1, ENOSPC, NULL, NULL,
- NULL, NULL);
- return 0;
- }
+ priv = this->private;
+
+ inode_ctx_get (inode, this, &value);
+ ctx = (quota_inode_ctx_t *)(unsigned long)value;
+
+ _inode = inode_ref (inode);
+
+ LOCK (&local->lock);
+ {
+ just_validated = local->just_validated;
+ local->just_validated = 0;
+ }
+ UNLOCK (&local->lock);
+
+ do {
+ /* In a rename operation, enforce should be stopped at common
+ ancestor */
+ if (!gf_uuid_is_null (par_local->common_ancestor) &&
+ !gf_uuid_compare (_inode->gfid, par_local->common_ancestor)
+ ) {
+ quota_link_count_decrement (frame);
+ break;
+ }
+
+ if (object_delta <= 0)
+ goto skip_check_object_limit;
+
+ ret = quota_check_object_limit (frame, ctx, priv, _inode, this,
+ &op_errno, just_validated,
+ par_local, &skip_check);
+ if (skip_check == _gf_true)
+ goto done;
+
+ if (ret) {
+ if (op_errno != EDQUOT)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ Q_MSG_ENFORCEMENT_FAILED, "Failed to "
+ "check quota object limit");
+ goto err;
+ }
+
+skip_check_object_limit:
+ ret = quota_check_size_limit (frame, ctx, priv, _inode, this,
+ &op_errno, just_validated, delta,
+ par_local, &skip_check);
+ if (skip_check == _gf_true)
+ goto done;
+
+ if (ret) {
+ if (op_errno != EDQUOT)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ Q_MSG_ENFORCEMENT_FAILED, "Failed to "
+ "check quota size limit");
+ goto err;
+ }
+
+ if (__is_root_gfid (_inode->gfid)) {
+ quota_link_count_decrement (frame);
+ break;
+ }
+
+ parent = inode_parent (_inode, 0, NULL);
+ if (parent == NULL) {
+ ret = quota_build_ancestry (_inode,
+ quota_check_limit_continuation,
+ frame);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto err;
+ }
+
+ break;
+ }
+
+ inode_unref (_inode);
+ _inode = parent;
+ just_validated = 0;
+
+ value = 0;
+ inode_ctx_get (_inode, this, &value);
+ ctx = (quota_inode_ctx_t *)(unsigned long)value;
+ } while (1);
+
+done:
+ if (_inode != NULL) {
+ inode_unref (_inode);
+ _inode = NULL;
+ }
+ return 0;
+
+err:
+ quota_handle_validate_error (frame, -1, op_errno);
+
+ inode_unref (_inode);
+ return 0;
+}
+
+inode_t *
+do_quota_check_limit (call_frame_t *frame, inode_t *inode, xlator_t *this,
+ quota_dentry_t *dentry, gf_boolean_t force)
+{
+ int32_t ret = -1;
+ inode_t *parent = NULL;
+ call_frame_t *new_frame = NULL;
+ quota_local_t *local = NULL;
+ quota_local_t *new_local = NULL;
+
+ local = frame->local;
+
+ parent = inode_parent (inode, dentry->par, dentry->name);
+ if (parent == NULL) {
+ if (force)
+ parent = inode_find (inode->table, dentry->par);
+ else
+ goto out;
+ }
+ if (parent == NULL)
+ goto out;
+
+ new_frame = copy_frame (frame);
+ if (new_frame == NULL)
+ goto out;
+
+ new_local = quota_local_new ();
+ if (new_local == NULL)
+ goto out;
+
+ new_frame->local = new_local;
+ new_local->par_frame = frame;
+
+ quota_check_limit (new_frame, parent, this);
+
+ ret = 0;
+out:
+ if (ret < 0) {
+ if (parent) {
+ /* Caller should decrement link_count, in case parent is
+ * NULL
+ */
+ quota_handle_validate_error (frame, -1, ENOMEM);
+ }
+
+ if (new_frame) {
+ new_frame->local = NULL;
+ STACK_DESTROY (new_frame->root);
+ }
- if (priv->current_disk_usage > priv->disk_usage_limit) {
- gf_log (this->name, GF_LOG_ERROR,
- "Disk usage limit (%"PRIu64") crossed, current usage is %"PRIu64"",
- priv->disk_usage_limit, priv->current_disk_usage);
- STACK_UNWIND_STRICT (mknod, frame, -1, ENOSPC, NULL, NULL,
- NULL, NULL);
- return 0;
+ if (new_local)
+ quota_local_cleanup (new_local);
}
- STACK_WIND (frame, quota_mknod_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mknod,
- loc, mode, rdev);
- return 0;
+ return parent;
}
+static int
+quota_get_limits (xlator_t *this, dict_t *dict, int64_t *hard_lim,
+ int64_t *soft_lim, int64_t *object_hard_limit,
+ int64_t *object_soft_limit)
+{
+ quota_limits_t *limit = NULL;
+ quota_limits_t *object_limit = NULL;
+ quota_priv_t *priv = NULL;
+ int64_t soft_lim_percent = 0;
+ int64_t *ptr = NULL;
+ int ret = 0;
+
+ if ((this == NULL) || (dict == NULL) || (hard_lim == NULL)
+ || (soft_lim == NULL))
+ goto out;
+
+ priv = this->private;
+
+ ret = dict_get_bin (dict, QUOTA_LIMIT_KEY, (void **) &ptr);
+ limit = (quota_limits_t *)ptr;
+
+ if (limit) {
+ *hard_lim = ntoh64 (limit->hl);
+ soft_lim_percent = ntoh64 (limit->sl);
+ }
+
+ if (soft_lim_percent < 0) {
+ soft_lim_percent = priv->default_soft_lim;
+ }
+
+ if ((*hard_lim > 0) && (soft_lim_percent > 0)) {
+ *soft_lim = (soft_lim_percent * (*hard_lim))/100;
+ }
+
+ ret = dict_get_bin (dict, QUOTA_LIMIT_OBJECTS_KEY, (void **) &ptr);
+ if (ret)
+ return 0;
+ object_limit = (quota_limits_t *)ptr;
+
+ if (object_limit) {
+ *object_hard_limit = ntoh64 (object_limit->hl);
+ soft_lim_percent = ntoh64 (object_limit->sl);
+ }
+
+ if (soft_lim_percent < 0) {
+ soft_lim_percent = priv->default_soft_lim;
+ }
+
+ if ((*object_hard_limit > 0) && (soft_lim_percent > 0)) {
+ *object_soft_limit = (soft_lim_percent *
+ (*object_hard_limit))/100;
+ }
+
+out:
+ return 0;
+}
int
+quota_fill_inodectx (xlator_t *this, inode_t *inode, dict_t *dict,
+ loc_t *loc, struct iatt *buf, int32_t *op_errno)
+{
+ int32_t ret = -1;
+ char found = 0;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_dentry_t *dentry = NULL;
+ uint64_t value = 0;
+ int64_t hard_lim = 0;
+ int64_t soft_lim = 0;
+ int64_t object_hard_limit = 0;
+ int64_t object_soft_limit = 0;
+
+ quota_get_limits (this, dict, &hard_lim, &soft_lim, &object_hard_limit,
+ &object_soft_limit);
+
+ inode_ctx_get (inode, this, &value);
+ ctx = (quota_inode_ctx_t *)(unsigned long)value;
+
+ if ((((ctx == NULL) || (ctx->hard_lim == hard_lim))
+ && (hard_lim < 0) && !QUOTA_REG_OR_LNK_FILE (buf->ia_type))) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = quota_inode_ctx_get (inode, this, &ctx, 1);
+ if ((ret == -1) || (ctx == NULL)) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ Q_MSG_INODE_CTX_GET_FAILED, "cannot create quota "
+ "context in inode(gfid:%s)", uuid_utoa (inode->gfid));
+ ret = -1;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ ctx->hard_lim = hard_lim;
+ ctx->soft_lim = soft_lim;
+ ctx->object_hard_lim = object_hard_limit;
+ ctx->object_soft_lim = object_soft_limit;
+
+ ctx->buf = *buf;
+
+ if (!QUOTA_REG_OR_LNK_FILE (buf->ia_type)) {
+ goto unlock;
+ }
+
+ /* do nothing if it is a nameless lookup */
+ if (loc->name == NULL || !loc->parent)
+ goto unlock;
+
+ list_for_each_entry (dentry, &ctx->parents, next) {
+ if ((strcmp (dentry->name, loc->name) == 0) &&
+ (gf_uuid_compare (loc->parent->gfid,
+ dentry->par) == 0)) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (!found) {
+ dentry = __quota_dentry_new (ctx,
+ (char *)loc->name,
+ loc->parent->gfid);
+ if (dentry == NULL) {
+ /*
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ Q_MSG_ENOMEM,
+ "cannot create a new dentry (par:%"
+- PRId64", name:%s) for inode(ino:%"
+- PRId64", gfid:%s)",
+- uuid_utoa (local->loc.inode->gfid));
+ */
+ ret = -1;
+ *op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
+unlock:
+ UNLOCK (&ctx->lock);
+
+out:
+ return ret;
+}
+
+int32_t
+quota_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *dict, struct iatt *postparent)
+{
+ quota_local_t *local = NULL;
+ int32_t ret = 0;
+ inode_t *this_inode = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+
+ if (op_ret >= 0 && inode) {
+ this_inode = inode_ref (inode);
+
+ op_ret = quota_fill_inodectx (this, inode, dict, &local->loc,
+ buf, &op_errno);
+ if (op_ret < 0)
+ op_errno = ENOMEM;
+ }
+
+ QUOTA_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf,
+ dict, postparent);
+
+ if (op_ret < 0 || this_inode == NULL || gf_uuid_is_null(this_inode->gfid))
+ goto out;
+
+ check_ancestory_2 (this, local, this_inode);
+
+out:
+ if (this_inode)
+ inode_unref (this_inode);
+
+ quota_local_cleanup (local);
+
+ return 0;
+}
+
+int32_t
+quota_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xattr_req)
+{
+ quota_priv_t *priv = NULL;
+ int32_t ret = -1;
+ quota_local_t *local = NULL;
+
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
+ xattr_req = xattr_req ? dict_ref(xattr_req) : dict_new();
+ if (!xattr_req)
+ goto err;
+
+ local = quota_local_new ();
+ if (local == NULL) {
+ goto err;
+ }
+
+ frame->local = local;
+ loc_copy (&local->loc, loc);
+
+ ret = dict_set_int8 (xattr_req, QUOTA_LIMIT_KEY, 1);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ Q_MSG_ENOMEM, "dict set of key for "
+ "hard-limit failed");
+ goto err;
+ }
+
+ ret = dict_set_int8 (xattr_req, QUOTA_LIMIT_OBJECTS_KEY, 1);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM,
+ "dict set of key for quota object limit failed");
+ goto err;
+ }
+
+ STACK_WIND (frame, quota_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
+
+ ret = 0;
+
+err:
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ if (ret < 0) {
+ QUOTA_STACK_UNWIND (lookup, frame, -1, ENOMEM,
+ NULL, NULL, NULL, NULL);
+ }
+
+ return 0;
+
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
+ return 0;
+}
+
+int32_t
+quota_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ int32_t ret = 0;
+ uint64_t ctx_int = 0;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_local_t *local = NULL;
+
+ local = frame->local;
+
+ if ((op_ret < 0) || (local == NULL) || (postbuf == NULL)) {
+ goto out;
+ }
+
+ ret = inode_ctx_get (local->loc.inode, this, &ctx_int);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ Q_MSG_INODE_CTX_GET_FAILED, "%s: failed to get the "
+ "context", local->loc.path);
+ goto out;
+ }
+
+ ctx = (quota_inode_ctx_t *)(unsigned long) ctx_int;
+
+ if (ctx == NULL) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ Q_MSG_INODE_CTX_GET_FAILED,
+ "quota context not set in %s (gfid:%s)",
+ local->loc.path, uuid_utoa (local->loc.inode->gfid));
+ goto out;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ ctx->buf = *postbuf;
+ }
+ UNLOCK (&ctx->lock);
+
+out:
+ QUOTA_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ return 0;
+}
+
+
+int32_t
+quota_writev_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t off,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+ quota_priv_t *priv = NULL;
+ struct iovec *new_vector = NULL;
+ int32_t new_count = 0;
+
+ priv = this->private;
+
+ local = frame->local;
+
+ GF_VALIDATE_OR_GOTO ("quota", local, unwind);
+
+ if (local->op_ret == -1) {
+ op_errno = local->op_errno;
+
+ if ((op_errno == EDQUOT) && (local->space_available > 0)) {
+ new_count = iov_subset (vector, count, 0,
+ local->space_available, NULL);
+
+ new_vector = GF_CALLOC (new_count,
+ sizeof (struct iovec),
+ gf_common_mt_iovec);
+ if (new_vector == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ new_count = iov_subset (vector, count, 0,
+ local->space_available,
+ new_vector);
+
+ vector = new_vector;
+ count = new_count;
+ } else if (op_errno == ENOENT || op_errno == ESTALE) {
+ /* We may get ENOENT/ESTALE in case of below scenario
+ * fd = open file.txt
+ * unlink file.txt
+ * write on fd
+ * Here build_ancestry can fail as the file is removed.
+ * For now ignore ENOENT/ESTALE with writes on active fd
+ * We need to re-visit this code once we understand
+ * how other file-system behave in this scenario
+ */
+ gf_msg_debug (this->name, 0, "quota enforcer failed "
+ "with ENOENT/ESTALE on %s, cannot check "
+ "quota limits and allowing writes",
+ uuid_utoa (fd->inode->gfid));
+ } else {
+ goto unwind;
+ }
+ }
+
+ STACK_WIND (frame, quota_writev_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, fd,
+ vector, count, off, flags, iobref, xdata);
+
+ if (new_vector != NULL)
+ GF_FREE (new_vector);
+
+ return 0;
+
+unwind:
+ QUOTA_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int32_t
+quota_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t off,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
+{
+ quota_priv_t *priv = NULL;
+ int32_t ret = -1, op_errno = EINVAL;
+ int32_t parents = 0;
+ int32_t fail_count = 0;
+ uint64_t size = 0;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_dentry_t *dentry = NULL, *tmp = NULL;
+ call_stub_t *stub = NULL;
+ struct list_head head = {0, };
+ inode_t *par_inode = NULL;
+
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
+ INIT_LIST_HEAD (&head);
+
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO ("quota", this, unwind);
+ GF_VALIDATE_OR_GOTO (this->name, fd, unwind);
+
+ local = quota_local_new ();
+ if (local == NULL) {
+ goto unwind;
+ }
+
+ frame->local = local;
+ local->loc.inode = inode_ref (fd->inode);
+
+ ret = quota_inode_ctx_get (fd->inode, this, &ctx, 0);
+ if (ctx == NULL) {
+ gf_msg_debug (this->name, 0, "quota context is NULL on inode"
+ " (%s). If quota is not enabled recently and "
+ "crawler has finished crawling, its an error",
+ uuid_utoa (fd->inode->gfid));
+ }
+
+ stub = fop_writev_stub (frame, quota_writev_helper, fd, vector, count,
+ off, flags, iobref, xdata);
+ if (stub == NULL) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, priv, unwind);
+
+ size = iov_length (vector, count);
+
+ parents = quota_add_parents_from_ctx (ctx, &head);
+
+ LOCK (&local->lock);
+ {
+ local->delta = size;
+ local->object_delta = 0;
+ local->link_count = (parents != 0) ? parents : 1;
+ local->stub = stub;
+ }
+ UNLOCK (&local->lock);
+
+ if (parents == 0) {
+ /* nameless lookup on this inode, allow quota to reconstruct
+ * ancestry as part of check_limit.
+ */
+ quota_check_limit (frame, fd->inode, this);
+ } else {
+ list_for_each_entry_safe (dentry, tmp, &head, next) {
+ par_inode = do_quota_check_limit (frame, fd->inode,
+ this, dentry,
+ _gf_false);
+ if (par_inode == NULL) {
+ /* remove stale entry from inode ctx */
+ quota_dentry_del (ctx, dentry->name,
+ dentry->par);
+ parents--;
+ fail_count++;
+ } else {
+ inode_unref (par_inode);
+ }
+ __quota_dentry_free (dentry);
+ }
+
+ if (parents == 0) {
+ LOCK (&local->lock);
+ {
+ local->link_count++;
+ }
+ UNLOCK (&local->lock);
+ quota_check_limit (frame, fd->inode, this);
+ }
+
+ while (fail_count != 0) {
+ quota_link_count_decrement (frame);
+ fail_count--;
+ }
+ }
+
+ return 0;
+
+unwind:
+ QUOTA_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd,
+ vector, count, off, flags, iobref, xdata);
+ return 0;
+}
+
+
+int32_t
quota_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- struct quota_priv *priv = NULL;
+ QUOTA_STACK_UNWIND (mkdir, frame, op_ret, op_errno, inode,
+ buf, preparent, postparent, xdata);
+ return 0;
+}
- priv = this->private;
- if ((op_ret >= 0) && priv->disk_usage_limit) {
- gf_quota_usage_subtract (this, buf->ia_blocks * 512);
- }
+int32_t
+quota_mkdir_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, mode_t umask, dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+
+ local = frame->local;
+
+ GF_VALIDATE_OR_GOTO ("quota", local, unwind);
+
+ op_errno = local->op_errno;
- STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
- return 0;
+ if (local->op_ret == -1) {
+ goto unwind;
+ }
+
+ STACK_WIND (frame, quota_mkdir_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, loc,
+ mode, umask, xdata);
+
+ return 0;
+
+unwind:
+ QUOTA_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
}
-int
-quota_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode)
+int32_t
+quota_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
{
- struct quota_priv *priv = NULL;
+ quota_priv_t *priv = NULL;
+ int32_t ret = 0, op_errno = 0;
+ quota_local_t *local = NULL;
+ call_stub_t *stub = NULL;
- priv = this->private;
+ priv = this->private;
- if (gf_quota_check_free_disk (this) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "min-free-disk limit (%u) crossed, current available is %u",
- priv->min_free_disk_limit, priv->current_free_disk);
- STACK_UNWIND_STRICT (mkdir, frame, -1, ENOSPC, NULL, NULL,
- NULL, NULL);
- return 0;
-
- }
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
+ local = quota_local_new ();
+ if (local == NULL) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ frame->local = local;
+
+ ret = loc_copy (&local->loc, loc);
+ if (ret) {
+ op_errno = ENOMEM;
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ Q_MSG_ENOMEM, "loc_copy failed");
+ goto err;
+ }
+
+ stub = fop_mkdir_stub (frame, quota_mkdir_helper, loc, mode, umask,
+ xdata);
+ if (stub == NULL) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- if (priv->current_disk_usage > priv->disk_usage_limit) {
- gf_log (this->name, GF_LOG_ERROR,
- "Disk usage limit (%"PRIu64") crossed, current usage is %"PRIu64"",
- priv->disk_usage_limit, priv->current_disk_usage);
- STACK_UNWIND_STRICT (mkdir, frame, -1, ENOSPC, NULL, NULL,
- NULL, NULL);
- return 0;
+ LOCK (&local->lock);
+ {
+ local->stub = stub;
+ local->delta = 0;
+ local->object_delta = 1;
+ local->link_count = 1;
}
+ UNLOCK (&local->lock);
- STACK_WIND (frame, quota_mkdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mkdir,
- loc, mode);
+ quota_check_limit (frame, loc->parent, this);
+ return 0;
- return 0;
+err:
+ QUOTA_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL);
+
+ return 0;
+
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir,
+ loc, mode, umask, xdata);
+
+ return 0;
}
-int
+int32_t
+quota_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int32_t ret = -1;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_dentry_t *dentry = NULL;
+
+ local = frame->local;
+ if (op_ret < 0) {
+ goto unwind;
+ }
+
+ ret = quota_inode_ctx_get (inode, this, &ctx, 1);
+ if ((ret == -1) || (ctx == NULL)) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ Q_MSG_INODE_CTX_GET_FAILED, "cannot create quota "
+ "context in inode(gfid:%s)", uuid_utoa (inode->gfid));
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ ctx->buf = *buf;
+
+ dentry = __quota_dentry_new (ctx, (char *)local->loc.name,
+ local->loc.parent->gfid);
+ if (dentry == NULL) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ Q_MSG_ENOMEM, "cannot create a new dentry "
+ "(name:%s) for inode(gfid:%s)", local->loc.name,
+ uuid_utoa (local->loc.inode->gfid));
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+unlock:
+ UNLOCK (&ctx->lock);
+
+unwind:
+ QUOTA_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+
+int32_t
+quota_create_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+ quota_priv_t *priv = NULL;
+
+ local = frame->local;
+
+ GF_VALIDATE_OR_GOTO ("quota", local, unwind);
+
+ priv = this->private;
+
+
+ if (local->op_ret == -1) {
+ op_errno = local->op_errno;
+ goto unwind;
+ }
+
+
+ STACK_WIND (frame, quota_create_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->create, loc,
+ flags, mode, umask, fd, xdata);
+ return 0;
+
+unwind:
+ QUOTA_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL,
+ NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int32_t
+quota_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ quota_priv_t *priv = NULL;
+ int32_t ret = -1;
+ quota_local_t *local = NULL;
+ int32_t op_errno = 0;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+ QUOTA_WIND_FOR_INTERNAL_FOP (xdata, off);
+
+ local = quota_local_new ();
+ if (local == NULL) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ frame->local = local;
+
+ ret = loc_copy (&local->loc, loc);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ Q_MSG_ENOMEM, "loc_copy failed");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ stub = fop_create_stub (frame, quota_create_helper, loc, flags, mode,
+ umask, fd, xdata);
+ if (stub == NULL) {
+ goto err;
+ }
+
+ LOCK (&local->lock);
+ {
+ local->link_count = 1;
+ local->stub = stub;
+ local->delta = 0;
+ local->object_delta = 1;
+ }
+ UNLOCK (&local->lock);
+
+ quota_check_limit (frame, loc->parent, this);
+ return 0;
+err:
+ QUOTA_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL, NULL);
+
+ return 0;
+
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->create, loc,
+ flags, mode, umask, fd, xdata);
+ return 0;
+}
+
+
+int32_t
quota_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- struct quota_local *local = NULL;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ uint64_t value = 0;
- local = frame->local;
+ if (op_ret < 0) {
+ goto out;
+ }
- if (local) {
- if (op_ret >= 0) {
- gf_quota_usage_subtract (this,
- local->stbuf.ia_blocks * 512);
- }
- loc_wipe (&local->loc);
- }
+ local = (quota_local_t *) frame->local;
- STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno, preparent, postparent);
- return 0;
+ inode_ctx_get (local->loc.inode, this, &value);
+ ctx = (quota_inode_ctx_t *)(unsigned long)value;
+
+ if (ctx == NULL) {
+ gf_msg (this->name, GF_LOG_WARNING, EINVAL,
+ Q_MSG_INODE_CTX_GET_FAILED,
+ "quota context not set inode (gfid:%s)",
+ uuid_utoa (local->loc.inode->gfid));
+ goto out;
+ }
+
+ quota_dentry_del (ctx, local->loc.name, local->loc.parent->gfid);
+
+out:
+ QUOTA_STACK_UNWIND (unlink, frame, op_ret, op_errno, preparent,
+ postparent, xdata);
+ return 0;
}
-int
-quota_unlink_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+int32_t
+quota_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
- struct quota_local *local = NULL;
+ quota_priv_t *priv = NULL;
+ int32_t ret = -1;
+ quota_local_t *local = NULL;
- local = frame->local;
+ priv = this->private;
- if (op_ret >= 0) {
- if (buf->ia_nlink == 1) {
- local->stbuf = *buf;
- }
- }
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
+ local = quota_local_new ();
+ if (local == NULL) {
+ goto err;
+ }
+
+ frame->local = local;
+
+ ret = loc_copy (&local->loc, loc);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ Q_MSG_ENOMEM, "loc_copy failed");
+ goto err;
+ }
+
+ STACK_WIND (frame, quota_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+
+ ret = 0;
+
+err:
+ if (ret == -1) {
+ QUOTA_STACK_UNWIND (unlink, frame, -1, 0, NULL, NULL, NULL);
+ }
- STACK_WIND (frame, quota_unlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink,
- &local->loc);
+ return 0;
- return 0;
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ return 0;
}
-int
-quota_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
+int32_t
+quota_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- struct quota_local *local = NULL;
- struct quota_priv *priv = NULL;
+ int32_t ret = -1;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_dentry_t *dentry = NULL;
+ char found = 0;
+
+ if (op_ret < 0) {
+ goto out;
+ }
+
+ local = (quota_local_t *) frame->local;
- priv = this->private;
+ ret = quota_inode_ctx_get (inode, this, &ctx, 0);
+ if ((ret == -1) || (ctx == NULL)) {
+ gf_msg_debug (this->name, 0, "quota context is NULL on inode"
+ " (%s). If quota is not enabled recently and "
+ "crawler has finished crawling, its an error",
+ uuid_utoa (inode->gfid));
+ goto out;
+ }
- if (priv->disk_usage_limit) {
- local = GF_CALLOC (1, sizeof (struct quota_local),
- gf_quota_mt_quota_local);
- frame->local = local;
+ LOCK (&ctx->lock);
+ {
+ list_for_each_entry (dentry, &ctx->parents, next) {
+ if ((strcmp (dentry->name, local->loc.name) == 0) &&
+ (gf_uuid_compare (local->loc.parent->gfid,
+ dentry->par) == 0)) {
+ found = 1;
+
+ gf_msg_debug (this->name, 0, "new entry being"
+ " linked (name:%s) for inode "
+ "(gfid:%s) is already present "
+ "in inode-dentry-list",
+ dentry->name,
+ uuid_utoa (local->loc.inode->gfid));
+ break;
+ }
+ }
- loc_copy (&local->loc, loc);
+ if (!found) {
+ dentry = __quota_dentry_new (ctx,
+ (char *)local->loc.name,
+ local->loc.parent->gfid);
+ if (dentry == NULL) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ Q_MSG_ENOMEM,
+ "cannot create a new dentry (name:%s)"
+ "for inode(gfid:%s)", local->loc.name,
+ uuid_utoa (local->loc.inode->gfid));
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
- STACK_WIND (frame,
- quota_unlink_stat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat,
- loc);
- return 0;
- }
+ ctx->buf = *buf;
+ }
+unlock:
+ UNLOCK (&ctx->lock);
- STACK_WIND (frame, quota_unlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink,
- loc);
- return 0;
+out:
+ QUOTA_STACK_UNWIND (link, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+
+ return 0;
}
-int
-quota_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+int32_t
+quota_link_helper (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
{
- struct quota_local *local = NULL;
+ quota_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+ quota_priv_t *priv = NULL;
- local = frame->local;
+ priv = this->private;
- if (local) {
- if (op_ret >= 0) {
- gf_quota_usage_subtract (this, local->stbuf.ia_blocks * 512);
- }
- loc_wipe (&local->loc);
- }
+ local = frame->local;
+
+ GF_VALIDATE_OR_GOTO ("quota", local, unwind);
+
+ op_errno = local->op_errno;
+
+ if (local->op_ret == -1) {
+ goto unwind;
+ }
+
+ STACK_WIND (frame, quota_link_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->link, oldloc,
+ newloc, xdata);
+ return 0;
+
+unwind:
+ QUOTA_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
+}
+
+void
+quota_link_continue (call_frame_t *frame)
+{
+ int32_t ret = -1;
+ int32_t op_errno = EIO;
+ quota_local_t *local = NULL;
+ uuid_t common_ancestor = {0};
+ xlator_t *this = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ inode_t *src_parent = NULL;
+ inode_t *dst_parent = NULL;
+
+ local = frame->local;
+ this = THIS;
+
+ if (local->op_ret < 0) {
+ op_errno = local->op_errno;
+ goto err;
+ }
+
+ if (local->xdata &&
+ dict_get (local->xdata, GLUSTERFS_INTERNAL_FOP_KEY)) {
+ /* Treat link as rename, crawl upwards only till common ancestor
+ */
+ ret = quota_find_common_ancestor (local->oldloc.inode,
+ local->newloc.parent,
+ &common_ancestor);
+ if (ret < 0 || gf_uuid_is_null(common_ancestor)) {
+ gf_msg (this->name, GF_LOG_ERROR, ESTALE,
+ Q_MSG_ANCESTRY_BUILD_FAILED, "failed to get "
+ "common_ancestor for %s and %s",
+ local->oldloc.path, local->newloc.path);
+ op_errno = ESTALE;
+ goto err;
+ }
+ } else {
+ /* Treat link as a new file.
+ * TODO: Currently marker accounts twice for the links created
+ * across directories.
+ * This needs re-visit if marker accounts only once
+ * for the links created across directories
+ */
+ if (local->oldloc.parent)
+ src_parent = inode_ref (local->oldloc.parent);
+ else
+ src_parent = inode_parent (local->oldloc.inode, 0,
+ NULL);
+ dst_parent = local->newloc.parent;
+
+ /* No need to check quota limit if src and dst parents are same
+ */
+ if (src_parent == dst_parent ||
+ gf_uuid_compare (src_parent->gfid, dst_parent->gfid) == 0) {
+ inode_unref (src_parent);
+ goto wind;
+ }
+
+ inode_unref (src_parent);
+ }
+
+ quota_inode_ctx_get (local->oldloc.inode, this, &ctx, 0);
+ if (ctx == NULL) {
+ gf_msg_debug (this->name, 0, "quota context is NULL on inode"
+ " (%s). If quota is not enabled recently and "
+ "crawler has finished crawling, its an error",
+ uuid_utoa (local->oldloc.inode->gfid));
+ }
+
+ LOCK (&local->lock);
+ {
+ local->link_count = 1;
+ local->delta = (ctx != NULL) ? ctx->buf.ia_blocks * 512 : 0;
+ local->object_delta = 1;
+ gf_uuid_copy (local->common_ancestor, common_ancestor);
+ }
+ UNLOCK (&local->lock);
- STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, preparent, postparent);
- return 0;
+ quota_check_limit (frame, local->newloc.parent, this);
+ return;
+
+err:
+ QUOTA_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL,
+ NULL, NULL, NULL);
+ return;
+
+wind:
+ STACK_WIND (frame, quota_link_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, &(local->oldloc),
+ &(local->newloc), local->xdata);
+ return;
}
+int32_t
+quota_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ quota_priv_t *priv = NULL;
+ int32_t ret = -1;
+ int32_t op_errno = ENOMEM;
+ quota_local_t *local = NULL;
+ call_stub_t *stub = NULL;
-int
-quota_rmdir_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
+ local = quota_local_new ();
+ if (local == NULL) {
+ goto err;
+ }
+
+ frame->local = (void *) local;
+
+ if (xdata)
+ local->xdata = dict_ref (xdata);
+
+ ret = loc_copy (&local->loc, newloc);
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ Q_MSG_ENOMEM, "loc_copy failed");
+ goto err;
+ }
+
+ ret = loc_copy (&local->oldloc, oldloc);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM,
+ "loc_copy failed");
+ goto err;
+ }
+
+ ret = loc_copy (&local->newloc, newloc);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM,
+ "loc_copy failed");
+ goto err;
+ }
+
+ /* No need to check quota limit if src and dst parents are same */
+ if (oldloc->parent && newloc->parent &&
+ !gf_uuid_compare(oldloc->parent->gfid, newloc->parent->gfid)) {
+ gf_msg_debug (this->name, GF_LOG_DEBUG, "link %s -> %s are "
+ "in the same directory, so skip check limit",
+ oldloc->path, newloc->path);
+ goto wind;
+ }
+
+ stub = fop_link_stub (frame, quota_link_helper, oldloc, newloc, xdata);
+ if (stub == NULL) {
+ goto err;
+ }
+
+ LOCK (&local->lock);
+ {
+ local->link_count = 2;
+ local->fop_continue_cbk = quota_link_continue;
+ local->stub = stub;
+ }
+ UNLOCK (&local->lock);
+
+ check_ancestory (frame, newloc->parent);
+
+ /* source parent can be NULL, so do check_ancestry on a file */
+ if (oldloc->parent)
+ check_ancestory (frame, oldloc->parent);
+ else
+ check_ancestory (frame, oldloc->inode);
+
+ return 0;
+
+err:
+ QUOTA_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
+
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, oldloc,
+ newloc, xdata);
+ return 0;
+
+wind:
+ STACK_WIND (frame, quota_link_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, oldloc,
+ newloc, xdata);
+ return 0;
+}
+
+
+int32_t
+quota_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
- struct quota_local *local = NULL;
+ int32_t ret = -1;
+ int64_t size = 0;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_dentry_t *old_dentry = NULL, *dentry = NULL;
+ char new_dentry_found = 0;
+
+ if (op_ret < 0) {
+ goto out;
+ }
- local = frame->local;
+ local = frame->local;
- if (op_ret >= 0) {
- local->stbuf = *buf;
- }
+ GF_VALIDATE_OR_GOTO ("quota", local, out);
+
+ if (QUOTA_REG_OR_LNK_FILE (local->oldloc.inode->ia_type))
+ size = buf->ia_blocks * 512;
+ else
+ goto out;
+
+ ret = quota_inode_ctx_get (local->oldloc.inode, this, &ctx, 0);
+ if ((ret == -1) || (ctx == NULL)) {
+ gf_msg_debug (this->name, 0, "quota context is NULL on inode"
+ " (%s). If quota is not enabled recently and "
+ "crawler has finished crawling, its an error",
+ uuid_utoa (local->oldloc.inode->gfid));
+
+ goto out;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ list_for_each_entry (dentry, &ctx->parents, next) {
+ if ((strcmp (dentry->name, local->oldloc.name) == 0) &&
+ (gf_uuid_compare (local->oldloc.parent->gfid,
+ dentry->par) == 0)) {
+ old_dentry = dentry;
+ } else if ((strcmp (dentry->name,
+ local->newloc.name) == 0) &&
+ (gf_uuid_compare (local->newloc.parent->gfid,
+ dentry->par) == 0)) {
+ new_dentry_found = 1;
+ gf_msg_debug (this->name, 0, "new entry being "
+ "linked (name:%s) for inode (gfid:%s) "
+ "is in inode-dentry-list", dentry->name,
+ uuid_utoa (local->oldloc.inode->gfid));
+ }
+
+ if (old_dentry && new_dentry_found)
+ break;
+ }
+
+ if (old_dentry != NULL) {
+ __quota_dentry_free (old_dentry);
+ } else {
+ gf_msg_debug (this->name, 0, "dentry corresponding"
+ "the path just renamed (name:%s) is not"
+ " present", local->oldloc.name);
+ }
+
+ if (!new_dentry_found) {
+ dentry = __quota_dentry_new (ctx,
+ (char *)local->newloc.name,
+ local->newloc.parent->gfid);
+ if (dentry == NULL) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ Q_MSG_ENOMEM,
+ "cannot create a new dentry (name:%s) "
+ "for inode(gfid:%s)",
+ local->newloc.name,
+ uuid_utoa (local->newloc.inode->gfid));
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+
+ ctx->buf = *buf;
+ }
+unlock:
+ UNLOCK (&ctx->lock);
- STACK_WIND (frame, quota_rmdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir,
- &local->loc);
+out:
+ QUOTA_STACK_UNWIND (rename, frame, op_ret, op_errno, buf, preoldparent,
+ postoldparent, prenewparent, postnewparent, xdata);
- return 0;
+ return 0;
}
-int
-quota_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc)
+int32_t
+quota_rename_helper (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
{
- struct quota_local *local = NULL;
- struct quota_priv *priv = NULL;
+ quota_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+ quota_priv_t *priv = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv->disk_usage_limit) {
- local = GF_CALLOC (1, sizeof (struct quota_local),
- gf_quota_mt_quota_local);
- frame->local = local;
+ local = frame->local;
- loc_copy (&local->loc, loc);
+ GF_VALIDATE_OR_GOTO ("quota", local, unwind);
- STACK_WIND (frame, quota_rmdir_stat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat, loc);
- return 0;
- }
+ op_errno = local->op_errno;
+
+ if (local->op_ret == -1) {
+ goto unwind;
+ }
- STACK_WIND (frame, quota_rmdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir,
- loc);
- return 0;
+ STACK_WIND (frame, quota_rename_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->rename, oldloc,
+ newloc, xdata);
+
+ return 0;
+
+unwind:
+ QUOTA_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL,
+ NULL, NULL, NULL, NULL);
+ return 0;
}
-int
+static int32_t
+quota_rename_get_size_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ quota_local_t *local = NULL;
+ int32_t ret = 0;
+ int64_t *size = 0;
+
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO_WITH_ERROR ("quota", this, out, op_errno,
+ EINVAL);
+ GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, xdata, out, op_errno,
+ EINVAL);
+ local = frame->local;
+ GF_ASSERT (local);
+ local->link_count = 1;
+
+ if (op_ret < 0)
+ goto out;
+
+
+ ret = dict_get_bin (xdata, QUOTA_SIZE_KEY, (void **) &size);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, EINVAL,
+ Q_MSG_SIZE_KEY_MISSING, "size key not present in dict");
+ op_errno = EINVAL;
+ goto out;
+ }
+ local->delta = ntoh64 (*size);
+ local->object_delta = 1;
+ quota_check_limit (frame, local->newloc.parent, this);
+ return 0;
+
+out:
+ quota_handle_validate_error (frame, -1, op_errno);
+ return 0;
+}
+
+void
+quota_rename_continue (call_frame_t *frame)
+{
+ int32_t ret = -1;
+ int32_t op_errno = EIO;
+ quota_local_t *local = NULL;
+ uuid_t common_ancestor = {0};
+ xlator_t *this = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+
+ local = frame->local;
+ this = THIS;
+
+ if (local->op_ret < 0) {
+ op_errno = local->op_errno;
+ goto err;
+ }
+
+ ret = quota_find_common_ancestor (local->oldloc.parent,
+ local->newloc.parent,
+ &common_ancestor);
+ if (ret < 0 || gf_uuid_is_null(common_ancestor)) {
+ gf_msg (this->name, GF_LOG_ERROR, ESTALE,
+ Q_MSG_ANCESTRY_BUILD_FAILED, "failed to get "
+ "common_ancestor for %s and %s",
+ local->oldloc.path, local->newloc.path);
+ op_errno = ESTALE;
+ goto err;
+ }
+
+ LOCK (&local->lock);
+ {
+ local->link_count = 1;
+ gf_uuid_copy (local->common_ancestor, common_ancestor);
+ }
+ UNLOCK (&local->lock);
+
+ if (QUOTA_REG_OR_LNK_FILE (local->oldloc.inode->ia_type)) {
+ ret = quota_inode_ctx_get (local->oldloc.inode, this, &ctx, 0);
+ if (ctx == NULL) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ Q_MSG_INODE_CTX_GET_FAILED,
+ "quota context not set in inode (gfid:%s), "
+ "considering file size as zero while enforcing "
+ "quota on new ancestry",
+ uuid_utoa (local->oldloc.inode->gfid));
+
+
+ local->delta = 0;
+ local->object_delta = 1;
+ } else {
+
+ /* FIXME: We need to account for the size occupied by
+ * this inode on the target directory. To avoid double
+ * accounting, we need to modify enforcer to perform
+ * quota_check_limit only up till the least common
+ * ancestor directory inode*/
+
+ /* FIXME: The following code assumes that regular files
+ * and link files are present, in their entirety, in a
+ * single brick. This *assumption is invalid in the
+ * case of stripe.*/
+
+ local->delta = ctx->buf.ia_blocks * 512;
+ local->object_delta = 1;
+ }
+
+ } else if (IA_ISDIR (local->oldloc.inode->ia_type)) {
+ ret = quota_validate (frame, local->oldloc.inode, this,
+ quota_rename_get_size_cbk);
+ if (ret){
+ op_errno = -ret;
+ goto err;
+ }
+
+ return;
+ }
+
+ quota_check_limit (frame, local->newloc.parent, this);
+ return;
+
+err:
+ QUOTA_STACK_UNWIND (rename, frame, -1, op_errno, NULL,
+ NULL, NULL, NULL, NULL, NULL);
+ return;
+
+}
+
+int32_t
+quota_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ quota_priv_t *priv = NULL;
+ int32_t ret = -1;
+ int32_t op_errno = ENOMEM;
+ quota_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
+ local = quota_local_new ();
+ if (local == NULL) {
+ goto err;
+ }
+
+ frame->local = local;
+
+ ret = loc_copy (&local->oldloc, oldloc);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM,
+ "loc_copy failed");
+ goto err;
+ }
+
+ ret = loc_copy (&local->newloc, newloc);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM,
+ "loc_copy failed");
+ goto err;
+ }
+
+ /* No need to check quota limit if src and dst parents are same */
+ if (oldloc->parent && newloc->parent &&
+ !gf_uuid_compare(oldloc->parent->gfid, newloc->parent->gfid)) {
+ gf_msg_debug (this->name, 0, "rename %s -> %s are "
+ "in the same directory, so skip check limit",
+ oldloc->path, newloc->path);
+ goto wind;
+ }
+
+ stub = fop_rename_stub (frame, quota_rename_helper, oldloc, newloc,
+ xdata);
+ if (stub == NULL) {
+ goto err;
+ }
+
+ LOCK (&local->lock);
+ {
+ /* link_count here tell how many check_ancestry should be done
+ * before continuing the FOP
+ */
+ local->link_count = 2;
+ local->stub = stub;
+ local->fop_continue_cbk = quota_rename_continue;
+ }
+ UNLOCK (&local->lock);
+
+ check_ancestory (frame, newloc->parent);
+ check_ancestory (frame, oldloc->parent);
+ return 0;
+
+err:
+ QUOTA_STACK_UNWIND (rename, frame, -1, op_errno, NULL,
+ NULL, NULL, NULL, NULL, NULL);
+ return 0;
+
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, oldloc,
+ newloc, xdata);
+ return 0;
+
+wind:
+ STACK_WIND (frame, quota_rename_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->rename, oldloc,
+ newloc, xdata);
+ return 0;
+}
+
+
+int32_t
quota_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- struct quota_priv *priv = NULL;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_dentry_t *dentry = NULL;
+
+ if (op_ret < 0) {
+ goto out;
+ }
- priv = this->private;
+ local = frame->local;
- if ((op_ret >= 0) && priv->disk_usage_limit) {
- gf_quota_usage_add (this, buf->ia_blocks * 512);
- }
+ quota_inode_ctx_get (local->loc.inode, this, &ctx, 1);
+ if (ctx == NULL) {
+ gf_msg_debug (this->name, 0, "quota context is NULL on inode"
+ " (%s). If quota is not enabled recently and "
+ "crawler has finished crawling, its an error",
+ uuid_utoa (local->loc.inode->gfid));
+
+ goto out;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ ctx->buf = *buf;
+
+ dentry = __quota_dentry_new (ctx, (char *)local->loc.name,
+ local->loc.parent->gfid);
+ if (dentry == NULL) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ Q_MSG_ENOMEM, "cannot create "
+ "a new dentry (name:%s) for inode(gfid:%s)",
+ local->loc.name,
+ uuid_utoa (local->loc.inode->gfid));
+ op_ret = -1;
+ op_errno = ENOMEM;
+ }
+ }
+ UNLOCK (&ctx->lock);
+
+out:
+ QUOTA_STACK_UNWIND (symlink, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
- STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
- return 0;
+ return 0;
}
int
-quota_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *loc)
+quota_symlink_helper (call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
- struct quota_priv *priv = NULL;
+ quota_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+ quota_priv_t *priv = NULL;
- priv = this->private;
+ local = frame->local;
- if (gf_quota_check_free_disk (this) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "min-free-disk limit (%u) crossed, current available is %u",
- priv->min_free_disk_limit, priv->current_free_disk);
- STACK_UNWIND_STRICT (symlink, frame, -1, ENOSPC, NULL, NULL,
- NULL, NULL);
- return 0;
-
- }
- if (priv->current_disk_usage > priv->disk_usage_limit) {
- gf_log (this->name, GF_LOG_ERROR,
- "Disk usage limit (%"PRIu64") crossed, current usage is %"PRIu64"",
- priv->disk_usage_limit, priv->current_disk_usage);
- STACK_UNWIND_STRICT (symlink, frame, -1, ENOSPC, NULL, NULL,
- NULL, NULL);
- return 0;
+ GF_VALIDATE_OR_GOTO ("quota", local, unwind);
+
+ priv = this->private;
+
+ if (local->op_ret == -1) {
+ op_errno = local->op_errno;
+ goto unwind;
}
- STACK_WIND (frame, quota_symlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->symlink,
- linkpath, loc);
- return 0;
+ STACK_WIND (frame, quota_symlink_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->symlink,
+ linkpath, loc, umask, xdata);
+ return 0;
+
+unwind:
+ QUOTA_STACK_UNWIND (symlink, frame, -1, op_errno, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
}
int
-quota_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- fd_t *fd, inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+quota_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
- struct quota_priv *priv = this->private;
- int ret = 0;
+ quota_priv_t *priv = NULL;
+ int32_t ret = -1;
+ int32_t op_errno = ENOMEM;
+ quota_local_t *local = NULL;
+ call_stub_t *stub = NULL;
- if ((op_ret >= 0) && priv->disk_usage_limit) {
- gf_quota_usage_add (this, buf->ia_blocks * 512);
+ priv = this->private;
- ret = fd_ctx_set (fd, this, 1);
- }
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
- STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent);
- return 0;
+ local = quota_local_new ();
+ if (local == NULL) {
+ goto err;
+ }
+
+ frame->local = local;
+
+ ret = loc_copy (&local->loc, loc);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ Q_MSG_ENOMEM, "loc_copy failed");
+ goto err;
+ }
+
+ stub = fop_symlink_stub (frame, quota_symlink_helper, linkpath, loc,
+ umask, xdata);
+ if (stub == NULL) {
+ goto err;
+ }
+
+ LOCK (&local->lock);
+ {
+ local->stub = stub;
+ local->delta = strlen (linkpath);
+ local->object_delta = 1;
+ local->link_count = 1;
+ }
+ UNLOCK (&local->lock);
+
+ quota_check_limit (frame, loc->parent, this);
+ return 0;
+
+err:
+ QUOTA_STACK_UNWIND (symlink, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL);
+
+ return 0;
+
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink,
+ linkpath, loc, umask, xdata);
+ return 0;
}
-int
-quota_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode, fd_t *fd)
+int32_t
+quota_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- struct quota_priv *priv = NULL;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
- priv = this->private;
+ if (op_ret < 0) {
+ goto out;
+ }
- if (gf_quota_check_free_disk (this) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "min-free-disk limit (%u) crossed, current available is %u",
- priv->min_free_disk_limit, priv->current_free_disk);
- STACK_UNWIND_STRICT (create, frame, -1, ENOSPC, NULL, NULL, NULL,
- NULL, NULL);
- return 0;
-
- }
- if (priv->current_disk_usage > priv->disk_usage_limit) {
- gf_log (this->name, GF_LOG_ERROR,
- "Disk usage limit (%"PRIu64") crossed, current usage is %"PRIu64"",
- priv->disk_usage_limit, priv->current_disk_usage);
- STACK_UNWIND_STRICT (create, frame, -1, ENOSPC, NULL, NULL, NULL,
- NULL, NULL);
- return 0;
+ local = frame->local;
+
+ GF_VALIDATE_OR_GOTO ("quota", local, out);
+
+ quota_inode_ctx_get (local->loc.inode, this, &ctx, 0);
+ if (ctx == NULL) {
+ gf_msg_debug (this->name, 0, "quota context is NULL on inode"
+ " (%s). If quota is not enabled recently and "
+ "crawler has finished crawling, its an error",
+ uuid_utoa (local->loc.inode->gfid));
+ goto out;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ ctx->buf = *postbuf;
}
+ UNLOCK (&ctx->lock);
- STACK_WIND (frame, quota_create_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create,
- loc, flags, mode, fd);
- return 0;
+out:
+ QUOTA_STACK_UNWIND (truncate, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+ return 0;
}
-int
-quota_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+int32_t
+quota_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
- int ret = 0;
+ quota_priv_t *priv = NULL;
+ int32_t ret = -1;
+ quota_local_t *local = NULL;
+
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
+ local = quota_local_new ();
+ if (local == NULL) {
+ goto err;
+ }
+
+ frame->local = local;
+
+ ret = loc_copy (&local->loc, loc);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ Q_MSG_ENOMEM, "loc_copy failed");
+ goto err;
+ }
- if (op_ret >= 0)
- ret = fd_ctx_set (fd, this, 1);
+ STACK_WIND (frame, quota_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd);
- return 0;
+ return 0;
+
+err:
+ QUOTA_STACK_UNWIND (truncate, frame, -1, ENOMEM, NULL, NULL, NULL);
+
+ return 0;
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ return 0;
+}
+
+
+int32_t
+quota_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+
+ if (op_ret < 0) {
+ goto out;
+ }
+
+ local = frame->local;
+
+ GF_VALIDATE_OR_GOTO ("quota", local, out);
+
+ quota_inode_ctx_get (local->loc.inode, this, &ctx, 0);
+ if (ctx == NULL) {
+ gf_msg_debug (this->name, 0, "quota context is NULL on inode"
+ " (%s). If quota is not enabled recently and "
+ "crawler has finished crawling, its an error",
+ uuid_utoa (local->loc.inode->gfid));
+ goto out;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ ctx->buf = *postbuf;
+ }
+ UNLOCK (&ctx->lock);
+
+out:
+ QUOTA_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+ return 0;
+}
+
+
+int32_t
+quota_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ quota_priv_t *priv = NULL;
+ quota_local_t *local = NULL;
+
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
+ local = quota_local_new ();
+ if (local == NULL)
+ goto err;
+
+ frame->local = local;
+
+ local->loc.inode = inode_ref (fd->inode);
+
+ STACK_WIND (frame, quota_ftruncate_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->ftruncate, fd,
+ offset, xdata);
+
+ return 0;
+err:
+ QUOTA_STACK_UNWIND (ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL);
+
+ return 0;
+
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd,
+ offset, xdata);
+ return 0;
+}
+
+
+int32_t
+quota_send_dir_limit_to_cli (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, const char *name)
+{
+ int32_t ret = 0;
+ char dir_limit [1024] = {0, };
+ dict_t *dict = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ uint64_t value = 0;
+ quota_priv_t *priv = NULL;
+
+ priv = this->private;
+ if (!priv->is_quota_on) {
+ snprintf (dir_limit, 1024, "Quota is disabled please turn on");
+ goto dict_set;
+ }
+
+ ret = inode_ctx_get (inode, this, &value);
+ if (ret < 0)
+ goto out;
+
+ ctx = (quota_inode_ctx_t *)(unsigned long)value;
+ snprintf (dir_limit, 1024, "%"PRId64",%"PRId64, ctx->size,
+ ctx->hard_lim);
+
+dict_set:
+ dict = dict_new ();
+ if (dict == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str (dict, (char *) name, dir_limit);
+ if (ret < 0)
+ goto out;
+
+ gf_msg_debug (this->name, 0, "str = %s", dir_limit);
+
+ QUOTA_STACK_UNWIND (getxattr, frame, 0, 0, dict, NULL);
+
+ ret = 0;
+
+out:
+ if (dict)
+ dict_unref (dict);
+ return ret;
+}
+
+
+int32_t
+quota_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ int32_t ret = 0;
+
+ if (name && strcasecmp (name, "trusted.limit.list") == 0) {
+ ret = quota_send_dir_limit_to_cli (frame, this, fd->inode,
+ name);
+ if (ret == 0) {
+ return 0;
+ }
+ }
+
+ STACK_WIND (frame, default_fgetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
+ return 0;
+}
+
+
+int32_t
+quota_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ int32_t ret = 0;
+
+ if ((name != NULL) && strcasecmp (name, "trusted.limit.list") == 0) {
+ ret = quota_send_dir_limit_to_cli (frame, this, loc->inode,
+ name);
+ if (ret == 0)
+ return 0;
+ }
+
+ STACK_WIND (frame, default_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
+ return 0;
+}
+
+
+int32_t
+quota_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+
+ if (op_ret < 0) {
+ goto out;
+ }
+
+ local = frame->local;
+
+ GF_VALIDATE_OR_GOTO ("quota", local, out);
+
+ quota_inode_ctx_get (local->loc.inode, this, &ctx, 0);
+ if (ctx == NULL) {
+ if (!IA_ISDIR (buf->ia_type)) {
+ gf_msg_debug (this->name, 0, "quota context is NULL on inode"
+ " (%s). If quota is not enabled recently and "
+ "crawler has finished crawling, its an error",
+ uuid_utoa (local->loc.inode->gfid));
+ }
+
+ goto out;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ if (buf)
+ ctx->buf = *buf;
+ }
+ UNLOCK (&ctx->lock);
+
+out:
+ QUOTA_STACK_UNWIND (stat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+
+int32_t
+quota_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ quota_priv_t *priv = NULL;
+ quota_local_t *local = NULL;
+ int32_t ret = -1;
+
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
+ local = quota_local_new ();
+ if (local == NULL) {
+ goto unwind;
+ }
+
+ frame->local = local;
+ ret = loc_copy (&local->loc, loc);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ Q_MSG_ENOMEM, "loc_copy failed");
+ goto unwind;
+ }
+
+ STACK_WIND (frame, quota_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc,
+ xdata);
+ return 0;
+
+unwind:
+ QUOTA_STACK_UNWIND (stat, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc,
+ xdata);
+ return 0;
+}
+
+
+int32_t
+quota_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+
+ if (op_ret < 0) {
+ goto out;
+ }
+
+ local = frame->local;
+
+ GF_VALIDATE_OR_GOTO ("quota", local, out);
+
+ quota_inode_ctx_get (local->loc.inode, this, &ctx, 0);
+ if (ctx == NULL) {
+ if (!IA_ISDIR (buf->ia_type)) {
+ gf_msg_debug (this->name, 0, "quota context is NULL on inode"
+ " (%s). If quota is not enabled recently and "
+ "crawler has finished crawling, its an error",
+ uuid_utoa (local->loc.inode->gfid));
+ }
+
+ goto out;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ if (buf)
+ ctx->buf = *buf;
+ }
+ UNLOCK (&ctx->lock);
+
+out:
+ QUOTA_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+
+int32_t
+quota_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ quota_priv_t *priv = NULL;
+ quota_local_t *local = NULL;
+
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
+ local = quota_local_new ();
+ if (local == NULL) {
+ goto unwind;
+ }
+
+ frame->local = local;
+
+ local->loc.inode = inode_ref (fd->inode);
+
+ STACK_WIND (frame, quota_fstat_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fstat, fd,
+ xdata);
+ return 0;
+
+unwind:
+ QUOTA_STACK_UNWIND (fstat, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd,
+ xdata);
+ return 0;
+}
+
+
+int32_t
+quota_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *path,
+ struct iatt *buf, dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+
+ if (op_ret < 0) {
+ goto out;
+ }
+
+ local = frame->local;
+
+ GF_VALIDATE_OR_GOTO ("quota", local, out);
+
+ quota_inode_ctx_get (local->loc.inode, this, &ctx, 0);
+ if (ctx == NULL) {
+ gf_msg_debug (this->name, 0, "quota context is NULL on inode"
+ " (%s). If quota is not enabled recently and "
+ "crawler has finished crawling, its an error",
+ uuid_utoa (local->loc.inode->gfid));
+ goto out;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ ctx->buf = *buf;
+ }
+ UNLOCK (&ctx->lock);
+
+out:
+ QUOTA_STACK_UNWIND (readlink, frame, op_ret, op_errno, path, buf,
+ xdata);
+ return 0;
+}
+
+
+int32_t
+quota_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
+{
+ quota_priv_t *priv = NULL;
+ quota_local_t *local = NULL;
+ int32_t ret = -1;
+
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
+ local = quota_local_new ();
+ if (local == NULL) {
+ goto unwind;
+ }
+
+ frame->local = local;
+
+ ret = loc_copy (&local->loc, loc);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ Q_MSG_ENOMEM, "loc_copy failed");
+ goto unwind;
+ }
+
+ STACK_WIND (frame, quota_readlink_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->readlink, loc,
+ size, xdata);
+ return 0;
+
+unwind:
+ QUOTA_STACK_UNWIND (readlink, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readlink, loc,
+ size, xdata);
+ return 0;
+}
+
+
+int32_t
+quota_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *buf, struct iobref *iobref,
+ dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+
+ if (op_ret < 0) {
+ goto out;
+ }
+
+ local = frame->local;
+
+ GF_VALIDATE_OR_GOTO ("quota", local, out);
+
+ quota_inode_ctx_get (local->loc.inode, this, &ctx, 0);
+ if (ctx == NULL) {
+ gf_msg_debug (this->name, 0, "quota context is NULL on inode"
+ " (%s). If quota is not enabled recently and "
+ "crawler has finished crawling, its an error",
+ uuid_utoa (local->loc.inode->gfid));
+ goto out;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ ctx->buf = *buf;
+ }
+ UNLOCK (&ctx->lock);
+
+out:
+ QUOTA_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count,
+ buf, iobref, xdata);
+ return 0;
+}
+
+
+int32_t
+quota_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ quota_priv_t *priv = NULL;
+ quota_local_t *local = NULL;
+
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
+ local = quota_local_new ();
+ if (local == NULL) {
+ goto unwind;
+ }
+
+ frame->local = local;
+
+ local->loc.inode = inode_ref (fd->inode);
+
+ STACK_WIND (frame, quota_readv_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv, fd,
+ size, offset, flags, xdata);
+ return 0;
+
+unwind:
+ QUOTA_STACK_UNWIND (readv, frame, -1, ENOMEM, NULL, -1, NULL, NULL,
+ NULL);
+ return 0;
+
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd,
+ size, offset, flags, xdata);
+ return 0;
+}
+
+
+int32_t
+quota_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+
+ if (op_ret < 0) {
+ goto out;
+ }
+
+ local = frame->local;
+
+ GF_VALIDATE_OR_GOTO ("quota", local, out);
+
+ quota_inode_ctx_get (local->loc.inode, this, &ctx, 0);
+ if (ctx == NULL) {
+ gf_msg_debug (this->name, 0, "quota context is NULL on inode"
+ " (%s). If quota is not enabled recently and "
+ "crawler has finished crawling, its an error",
+ uuid_utoa (local->loc.inode->gfid));
+ goto out;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ ctx->buf = *postbuf;
+ }
+ UNLOCK (&ctx->lock);
+
+out:
+ QUOTA_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+
+int32_t
+quota_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
+{
+ quota_priv_t *priv = NULL;
+ quota_local_t *local = NULL;
+
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
+ local = quota_local_new ();
+ if (local == NULL) {
+ goto unwind;
+ }
+
+ local->loc.inode = inode_ref (fd->inode);
+
+ frame->local = local;
+
+ STACK_WIND (frame, quota_fsync_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync, fd,
+ flags, xdata);
+ return 0;
+
+unwind:
+ QUOTA_STACK_UNWIND (fsync, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync, fd,
+ flags, xdata);
+ return 0;
+}
+
+
+int32_t
+quota_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+
+ if (op_ret < 0) {
+ goto out;
+ }
+
+ local = frame->local;
+
+ GF_VALIDATE_OR_GOTO ("quota", local, out);
+
+ quota_inode_ctx_get (local->loc.inode, this, &ctx, 0);
+ if (ctx == NULL) {
+ if (!IA_ISDIR (statpost->ia_type)) {
+ gf_msg_debug (this->name, 0, "quota context is NULL on inode"
+ " (%s). If quota is not enabled recently and "
+ "crawler has finished crawling, its an error",
+ uuid_utoa (local->loc.inode->gfid));
+ }
+
+ goto out;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ if (statpost)
+ ctx->buf = *statpost;
+ }
+ UNLOCK (&ctx->lock);
+
+out:
+ QUOTA_STACK_UNWIND (setattr, frame, op_ret, op_errno, statpre,
+ statpost, xdata);
+ return 0;
+}
+
+
+int32_t
+quota_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ quota_priv_t *priv = NULL;
+ quota_local_t *local = NULL;
+ int32_t ret = -1;
+
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
+ local = quota_local_new ();
+ if (local == NULL) {
+ goto unwind;
+ }
+
+ frame->local = local;
+
+ ret = loc_copy (&local->loc, loc);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ Q_MSG_ENOMEM, "loc_copy failed");
+ goto unwind;
+ }
+
+ STACK_WIND (frame, quota_setattr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setattr, loc,
+ stbuf, valid, xdata);
+ return 0;
+
+unwind:
+ QUOTA_STACK_UNWIND (setattr, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setattr, loc,
+ stbuf, valid, xdata);
+ return 0;
+}
+
+
+int32_t
+quota_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+
+ if (op_ret < 0) {
+ goto out;
+ }
+
+ local = frame->local;
+
+ GF_VALIDATE_OR_GOTO ("quota", local, out);
+
+ quota_inode_ctx_get (local->loc.inode, this, &ctx, 0);
+ if (ctx == NULL) {
+ if (!IA_ISDIR (statpost->ia_type)) {
+ gf_msg_debug (this->name, 0, "quota context is NULL on inode"
+ " (%s). If quota is not enabled recently and "
+ "crawler has finished crawling, its an error",
+ uuid_utoa (local->loc.inode->gfid));
+ }
+
+ goto out;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ ctx->buf = *statpost;
+ }
+ UNLOCK (&ctx->lock);
+
+out:
+ QUOTA_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, statpre,
+ statpost, xdata);
+ return 0;
+}
+
+
+int32_t
+quota_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ quota_priv_t *priv = NULL;
+ quota_local_t *local = NULL;
+
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
+ local = quota_local_new ();
+ if (local == NULL) {
+ goto unwind;
+ }
+
+ frame->local = local;
+
+ local->loc.inode = inode_ref (fd->inode);
+
+ STACK_WIND (frame, quota_fsetattr_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->fsetattr, fd,
+ stbuf, valid, xdata);
+ return 0;
+
+unwind:
+ QUOTA_STACK_UNWIND (fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fsetattr, fd,
+ stbuf, valid, xdata);
+ return 0;
+}
+
+
+int32_t
+quota_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int32_t ret = -1;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_dentry_t *dentry = NULL;
+
+ local = frame->local;
+ if (op_ret < 0) {
+ goto unwind;
+ }
+
+ ret = quota_inode_ctx_get (inode, this, &ctx, 1);
+ if ((ret == -1) || (ctx == NULL)) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ Q_MSG_INODE_CTX_GET_FAILED,
+ "cannot create quota context in "
+ "inode(gfid:%s)", uuid_utoa (inode->gfid));
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ ctx->buf = *buf;
+
+ dentry = __quota_dentry_new (ctx, (char *)local->loc.name,
+ local->loc.parent->gfid);
+ if (dentry == NULL) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ Q_MSG_ENOMEM, "cannot create a new dentry "
+ "(name:%s) for inode(gfid:%s)", local->loc.name,
+ uuid_utoa (local->loc.inode->gfid));
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+unlock:
+ UNLOCK (&ctx->lock);
+
+unwind:
+ QUOTA_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode,
+ buf, preparent, postparent, xdata);
+ return 0;
}
int
-quota_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int32_t wbflags)
+quota_mknod_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
{
- STACK_WIND (frame, quota_open_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open,
- loc, flags, fd, wbflags);
- return 0;
+ quota_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+ quota_priv_t *priv = NULL;
+
+ local = frame->local;
+
+ GF_VALIDATE_OR_GOTO ("quota", local, unwind);
+
+ priv = this->private;
+
+ if (local->op_ret == -1) {
+ op_errno = local->op_errno;
+ goto unwind;
+ }
+
+ STACK_WIND (frame, quota_mknod_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod, loc,
+ mode, rdev, umask, xdata);
+
+ return 0;
+
+unwind:
+ QUOTA_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
}
int
-quota_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+quota_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
{
- struct quota_priv *priv = NULL;
- struct quota_local *local = NULL;
+ quota_priv_t *priv = NULL;
+ int32_t ret = -1;
+ quota_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+ priv = this->private;
- priv = this->private;
- local = frame->local;
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+ QUOTA_WIND_FOR_INTERNAL_FOP (xdata, off);
- if (priv->disk_usage_limit) {
- if (op_ret >= 0) {
- gf_quota_usage_add (this, (postbuf->ia_blocks -
- prebuf->ia_blocks) * 512);
- }
- fd_unref (local->fd);
- iobref_unref (local->iobref);
- }
+ local = quota_local_new ();
+ if (local == NULL) {
+ goto err;
+ }
+
+ frame->local = local;
+
+ ret = loc_copy (&local->loc, loc);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ Q_MSG_ENOMEM, "loc_copy failed");
+ goto err;
+ }
+
+ stub = fop_mknod_stub (frame, quota_mknod_helper, loc, mode, rdev,
+ umask, xdata);
+ if (stub == NULL) {
+ goto err;
+ }
+
+ LOCK (&local->lock);
+ {
+ local->link_count = 1;
+ local->stub = stub;
+ local->delta = 0;
+ local->object_delta = 1;
+ }
+ UNLOCK (&local->lock);
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
+ quota_check_limit (frame, loc->parent, this);
+ return 0;
+
+err:
+ QUOTA_STACK_UNWIND (mknod, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
+
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod, loc,
+ mode, rdev, umask, xdata);
+ return 0;
}
+int
+quota_setxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno, dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ int ret = 0;
+
+ if (op_ret < 0) {
+ goto out;
+ }
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ ret = quota_inode_ctx_get (local->loc.inode, this, &ctx, 1);
+ if ((ret < 0) || (ctx == NULL)) {
+ op_errno = -1;
+ goto out;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ ctx->hard_lim = local->limit.hl;
+ ctx->soft_lim = local->limit.sl;
+ ctx->object_hard_lim = local->object_limit.hl;
+ ctx->object_soft_lim = local->object_limit.sl;
+ }
+ UNLOCK (&ctx->lock);
+
+out:
+ QUOTA_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
int
-quota_writev_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
-{
- struct quota_local *local = NULL;
- struct quota_priv *priv = NULL;
- int iovlen = 0;
-
-
- local = frame->local;
- priv = this->private;
-
- if (op_ret >= 0) {
- if (priv->current_disk_usage > priv->disk_usage_limit) {
- iovlen = iov_length (local->vector, local->count);
-
- if (iovlen > (buf->ia_blksize - (buf->ia_size % buf->ia_blksize))) {
- fd_unref (local->fd);
- iobref_unref (local->iobref);
- STACK_UNWIND_STRICT (writev, frame, -1, ENOSPC,
- NULL, NULL);
- return 0;
- }
- }
- local->stbuf = *buf;
- }
-
- STACK_WIND (frame, quota_writev_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev,
- local->fd, local->vector, local->count, local->offset,
- local->iobref);
+quota_setxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *dict, int flags, dict_t *xdata)
+{
+ quota_priv_t *priv = NULL;
+ int op_errno = EINVAL;
+ int op_ret = -1;
+ int64_t hard_lim = -1;
+ int64_t soft_lim = -1;
+ int64_t object_hard_limit = -1;
+ int64_t object_soft_limit = -1;
+ quota_local_t *local = NULL;
+ gf_boolean_t internal_fop = _gf_false;
+
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+
+ if (xdata && dict_get (xdata, GLUSTERFS_INTERNAL_FOP_KEY))
+ internal_fop = _gf_true;
+
+ if (frame->root->pid >= 0 && internal_fop == _gf_false) {
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.quota*", dict,
+ op_errno, err);
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.pgfid*", dict, op_errno,
+ err);
+ }
+
+ quota_get_limits (this, dict, &hard_lim, &soft_lim, &object_hard_limit,
+ &object_soft_limit);
+
+ if (hard_lim > 0 || object_hard_limit > 0) {
+ local = quota_local_new ();
+ if (local == NULL) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ frame->local = local;
+ loc_copy (&local->loc, loc);
+ }
- return 0;
+ if (hard_lim > 0) {
+ local->limit.hl = hard_lim;
+ local->limit.sl = soft_lim;
+ }
+
+ if (object_hard_limit > 0) {
+ local->object_limit.hl = object_hard_limit;
+ local->object_limit.sl = object_soft_limit;
+ }
+
+ STACK_WIND (frame, quota_setxattr_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr, loc,
+ dict, flags, xdata);
+ return 0;
+err:
+ QUOTA_STACK_UNWIND (setxattr, frame, op_ret, op_errno, NULL);
+ return 0;
+
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc,
+ dict, flags, xdata);
+ return 0;
}
+int
+quota_fsetxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno, dict_t *xdata)
+{
+ quota_inode_ctx_t *ctx = NULL;
+ quota_local_t *local = NULL;
+
+ if (op_ret < 0)
+ goto out;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ op_ret = quota_inode_ctx_get (local->loc.inode, this, &ctx, 1);
+ if ((op_ret < 0) || (ctx == NULL)) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ ctx->hard_lim = local->limit.hl;
+ ctx->soft_lim = local->limit.sl;
+ ctx->object_hard_lim = local->object_limit.hl;
+ ctx->object_soft_lim = local->object_limit.sl;
+ }
+ UNLOCK (&ctx->lock);
+
+out:
+ QUOTA_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
int
-quota_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t off,
- struct iobref *iobref)
-{
- struct quota_local *local = NULL;
- struct quota_priv *priv = NULL;
- int i = 0;
-
- priv = this->private;
-
- if (gf_quota_check_free_disk (this) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "min-free-disk limit (%u) crossed, current available is %u",
- priv->min_free_disk_limit, priv->current_free_disk);
- STACK_UNWIND_STRICT (writev, frame, -1, ENOSPC,
- NULL, NULL);
- return 0;
- }
+quota_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int flags, dict_t *xdata)
+{
+ quota_priv_t *priv = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ quota_local_t *local = NULL;
+ int64_t hard_lim = -1;
+ int64_t soft_lim = -1;
+ int64_t object_hard_limit = -1;
+ int64_t object_soft_limit = -1;
+
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ if (0 <= frame->root->pid) {
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.quota*",
+ dict, op_errno, err);
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.pgfid*", dict,
+ op_errno, err);
+ }
- if (priv->disk_usage_limit) {
- local = GF_CALLOC (1, sizeof (struct quota_local),
- gf_quota_mt_quota_local);
- local->fd = fd_ref (fd);
- local->iobref = iobref_ref (iobref);
- for (i = 0; i < count; i++) {
- local->vector[i].iov_base = vector[i].iov_base;
- local->vector[i].iov_len = vector[i].iov_len;
+ quota_get_limits (this, dict, &hard_lim, &soft_lim, &object_hard_limit,
+ &object_soft_limit);
+
+ if (hard_lim > 0 || object_hard_limit > 0) {
+ local = quota_local_new ();
+ if (local == NULL) {
+ op_errno = ENOMEM;
+ goto err;
}
+ frame->local = local;
+ local->loc.inode = inode_ref (fd->inode);
+ }
- local->count = count;
- local->offset = off;
- frame->local = local;
+ if (hard_lim > 0) {
+ local->limit.hl = hard_lim;
+ local->limit.sl = soft_lim;
+ }
- STACK_WIND (frame, quota_writev_fstat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat, fd);
- return 0;
- }
+ if (object_hard_limit > 0) {
+ local->object_limit.hl = object_hard_limit;
+ local->object_limit.sl = object_soft_limit;
+ }
- STACK_WIND (frame, quota_writev_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev,
- fd, vector, count, off, iobref);
- return 0;
+ STACK_WIND (frame, quota_fsetxattr_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsetxattr, fd,
+ dict, flags, xdata);
+ return 0;
+err:
+ QUOTA_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, NULL);
+ return 0;
+
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd,
+ dict, flags, xdata);
+ return 0;
}
int
quota_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "failed to remove the disk-usage value: %s",
- strerror (op_errno));
- }
-
- STACK_DESTROY (frame->root);
- return 0;
+ QUOTA_STACK_UNWIND (removexattr, frame, op_ret, op_errno, xdata);
+ return 0;
}
-
int
-quota_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+quota_removexattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
{
- dict_t *dict = NULL;
+ quota_priv_t *priv = NULL;
+ int32_t op_errno = EINVAL;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "failed to set the disk-usage value: %s",
- strerror (op_errno));
- }
+ priv = this->private;
- if (cookie) {
- dict = (dict_t *) cookie;
- dict_unref (dict);
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
+ VALIDATE_OR_GOTO (this, err);
+
+ /* all quota xattrs can be cleaned up by doing setxattr on special key.
+ * Hence its ok that we don't allow removexattr on quota keys here.
+ */
+ if (frame->root->pid >= 0) {
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.quota*",
+ name, op_errno, err);
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.pgfid*", name,
+ op_errno, err);
}
- STACK_DESTROY (frame->root);
- return 0;
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (loc, err);
+
+ STACK_WIND (frame, quota_removexattr_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->removexattr,
+ loc, name, xdata);
+ return 0;
+
+err:
+ QUOTA_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
+ return 0;
+
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr,
+ loc, name, xdata);
+ return 0;
}
int
-quota_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *statvfs)
+quota_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ QUOTA_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+quota_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
{
- struct quota_priv *priv = NULL;
- uint64_t f_blocks = 0;
- int64_t f_bfree = 0;
- uint64_t f_bused = 0;
+ quota_priv_t *priv = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ priv = this->private;
- priv = this->private;
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
- if (op_ret != 0)
- goto unwind;
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ if (frame->root->pid >= 0) {
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.quota*",
+ name, op_errno, err);
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.pgfid*", name,
+ op_errno, err);
+ }
+ STACK_WIND (frame, quota_fremovexattr_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fremovexattr,
+ fd, name, xdata);
+ return 0;
+err:
+ QUOTA_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, NULL);
+ return 0;
+
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr,
+ fd, name, xdata);
+ return 0;
+}
- f_blocks = priv->disk_usage_limit / statvfs->f_frsize;
- f_bused = priv->current_disk_usage / statvfs->f_frsize;
- if (f_blocks && (f_blocks < statvfs->f_blocks))
- statvfs->f_blocks = f_blocks;
+int32_t
+quota_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata)
+{
+ inode_t *inode = NULL;
+ uint64_t value = 0;
+ int64_t usage = -1;
+ int64_t avail = -1;
+ int64_t blocks = 0;
+ quota_inode_ctx_t *ctx = NULL;
+ int ret = 0;
+
+ inode = cookie;
+
+ /* This fop will fail mostly in case of client disconnect,
+ * which is already logged. Hence, not logging here */
+ if (op_ret == -1)
+ goto unwind;
+ /*
+ * We should never get here unless quota_statfs (below) sent us a
+ * cookie, and it would only do so if the value was non-NULL. This
+ * check is therefore just routine defensive coding.
+ */
+
+ GF_VALIDATE_OR_GOTO ("quota", inode, unwind);
+
+ inode_ctx_get (inode, this, &value);
+ ctx = (quota_inode_ctx_t *)(unsigned long)value;
+ if (!ctx || ctx->hard_lim <= 0)
+ goto unwind;
+
+ { /* statfs is adjusted in this code block */
+ usage = (ctx->size) / buf->f_bsize;
+
+ blocks = ctx->hard_lim / buf->f_bsize;
+ buf->f_blocks = blocks;
+
+ avail = buf->f_blocks - usage;
+ avail = max (avail, 0);
+
+ buf->f_bfree = avail;
+ /*
+ * We have to assume that the total assigned quota
+ * won't cause us to dip into the reserved space,
+ * because dealing with the overcommitted cases is
+ * just too hairy (especially when different bricks
+ * might be using different reserved percentages and
+ * such).
+ */
+ buf->f_bavail = buf->f_bfree;
+ }
- f_bfree = (statvfs->f_blocks - f_bused);
+ xdata = xdata ? dict_ref(xdata) : dict_new();
+ if (!xdata)
+ goto unwind;
- if (f_bfree >= 0)
- statvfs->f_bfree = statvfs->f_bavail = f_bfree;
- else
- statvfs->f_bfree = statvfs->f_bavail = 0;
+ ret = dict_set_int8 (xdata, "quota-deem-statfs", 1);
+ if (-1 == ret)
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ Q_MSG_ENOMEM, "Dict set failed, deem-statfs option may "
+ "have no effect");
unwind:
- STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, statvfs);
- return 0;
+ QUOTA_STACK_UNWIND (statfs, frame, op_ret, op_errno, buf, xdata);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
}
-int
-quota_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
+int32_t
+quota_statfs_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
{
- STACK_WIND (frame, quota_statfs_cbk,
- FIRST_CHILD (this), FIRST_CHILD (this)->fops->statfs, loc);
+ quota_local_t *local = frame->local;
+ int op_errno = EINVAL;
- return 0;
-}
+ GF_VALIDATE_OR_GOTO ("quota", local, err);
+ if (-1 == local->op_ret) {
+ op_errno = local->op_errno;
+ goto err;
+ }
-int
-quota_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *value)
+ STACK_WIND_COOKIE (frame, quota_statfs_cbk, local->inode,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->statfs, loc, xdata);
+ return 0;
+err:
+ QUOTA_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+quota_statfs_validate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
{
- data_t *data = NULL;
- struct quota_priv *priv = this->private;
-
- if (op_ret >= 0) {
- data = dict_get (value, "trusted.glusterfs-quota-du");
- if (data) {
- LOCK (&priv->lock);
- {
- priv->current_disk_usage = data_to_uint64 (data);
- }
- UNLOCK (&priv->lock);
+ quota_local_t *local = NULL;
+ int32_t ret = 0;
+ quota_inode_ctx_t *ctx = NULL;
+ uint64_t value = 0;
+ data_t *data = NULL;
+ quota_meta_t size = {0,};
+
+ local = frame->local;
+
+ if (op_ret < 0)
+ goto resume;
+
+ GF_ASSERT (local);
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO_WITH_ERROR ("quota", this, resume, op_errno,
+ EINVAL);
+ GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, xdata, resume, op_errno,
+ EINVAL);
+
+ ret = inode_ctx_get (local->validate_loc.inode, this, &value);
+
+ ctx = (quota_inode_ctx_t *)(unsigned long)value;
+ if ((ret == -1) || (ctx == NULL)) {
+ gf_msg (this->name, GF_LOG_WARNING, EINVAL,
+ Q_MSG_INODE_CTX_GET_FAILED,
+ "quota context is not present in inode (gfid:%s)",
+ uuid_utoa (local->validate_loc.inode->gfid));
+ op_errno = EINVAL;
+ goto resume;
+ }
- return 0;
- }
- }
+ ret = quota_dict_get_meta (xdata, QUOTA_SIZE_KEY, &size);
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING, EINVAL,
+ Q_MSG_SIZE_KEY_MISSING, "size key not present in "
+ "dict");
+ op_errno = EINVAL;
+ }
- STACK_DESTROY (frame->root);
+ LOCK (&ctx->lock);
+ {
+ ctx->size = size.size;
+ ctx->file_count = size.file_count;
+ ctx->dir_count = size.dir_count;
+ gettimeofday (&ctx->tv, NULL);
+ }
+ UNLOCK (&ctx->lock);
- return 0;
+resume:
+ quota_link_count_decrement (frame);
+ return 0;
}
-
void
-gf_quota_get_disk_usage (xlator_t *this)
+quota_get_limit_dir_continuation (struct list_head *parents, inode_t *inode,
+ int32_t op_ret, int32_t op_errno, void *data)
{
- call_frame_t *frame = NULL;
- call_pool_t *pool = NULL;
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ quota_local_t *local = NULL;
+ quota_dentry_t *entry = NULL;
+ inode_t *parent = NULL;
+
+ frame = data;
+ local = frame->local;
+ this = THIS;
+
+ if ((op_ret < 0) || list_empty (parents)) {
+ if (op_ret >= 0) {
+ gf_msg (this->name, GF_LOG_WARNING, EIO,
+ Q_MSG_ANCESTRY_BUILD_FAILED,
+ "Couldn't build ancestry for inode (gfid:%s). "
+ "Without knowing ancestors till root, quota "
+ "cannot be enforced. "
+ "Hence, failing fop with EIO",
+ uuid_utoa (inode->gfid));
+ op_errno = EIO;
+ }
+
+ quota_handle_validate_error (frame, -1, op_errno);
+ goto out;
+ }
- struct quota_priv *priv = NULL;
+ entry = list_entry (parents, quota_dentry_t, next);
+ parent = inode_find (inode->table, entry->par);
- pool = this->ctx->pool;
- frame = create_frame (this, pool);
- priv = this->private;
+ quota_get_limit_dir (frame, parent, this);
- STACK_WIND (frame, quota_getxattr_cbk,
- this->children->xlator,
- this->children->xlator->fops->getxattr,
- &(priv->root_loc),
- "trusted.glusterfs-quota-du");
- return ;
+ inode_unref (parent);
+out:
+ return;
}
+void
+quota_statfs_continue (call_frame_t *frame, xlator_t *this, inode_t *inode)
+{
+ call_stub_t *stub = NULL;
+ quota_local_t *local = frame->local;
+ int ret = -1;
+
+ LOCK (&local->lock);
+ {
+ local->inode = inode_ref (inode);
+ }
+ UNLOCK (&local->lock);
+
+ ret = quota_validate (frame, local->inode, this,
+ quota_statfs_validate_cbk);
+ if (0 > ret)
+ quota_handle_validate_error (frame, -1, -ret);
+}
void
-gf_quota_cache_sync (xlator_t *this)
+quota_get_limit_dir (call_frame_t *frame, inode_t *cur_inode, xlator_t *this)
{
- struct quota_priv *priv = NULL;
- call_frame_t *frame = NULL;
- dict_t *dict = get_new_dict ();
+ inode_t *inode = NULL;
+ inode_t *parent = NULL;
+ uint64_t value = 0;
+ quota_inode_ctx_t *ctx = NULL;
+ int ret = -1;
+ quota_local_t *local = frame->local;
+
+ if (!cur_inode)
+ goto out;
+
+ inode = inode_ref (cur_inode);
+ while (inode) {
+ value = 0;
+ inode_ctx_get (inode, this, &value);
+
+ if (value) {
+ ctx = (quota_inode_ctx_t *)(unsigned long)value;
+ if (ctx->hard_lim > 0)
+ break;
+ }
+ if (__is_root_gfid (inode->gfid))
+ goto off;
+ parent = inode_parent (inode, 0, NULL);
+ if (!parent) {
+ ret = quota_build_ancestry
+ (inode, quota_get_limit_dir_continuation,
+ (void *)frame);
+ goto out;
+ }
+
+ inode_unref (inode);
+ inode = parent;
+ }
- priv = this->private;
+ quota_statfs_continue (frame, this, inode);
+ inode_unref (inode);
+ return;
- frame = create_frame (this, this->ctx->pool);
- dict_set (dict, "trusted.glusterfs-quota-du",
- data_from_uint64 (priv->current_disk_usage));
+off:
+ gf_msg_debug (this->name, 0,
+ "No limit set on the inode or it's parents.");
- dict_ref (dict);
+ QUOTA_STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->statfs,
+ &local->loc, local->xdata);
+out:
+ inode_unref (inode);
- STACK_WIND_COOKIE (frame, quota_setxattr_cbk,
- (void *) (dict_t *) dict,
- this->children->xlator,
- this->children->xlator->fops->setxattr,
- &(priv->root_loc), dict, 0);
+ return;
}
+int32_t
+quota_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ int op_errno = 0;
+ int ret = -1;
+ int8_t ignore_deem_statfs = 0;
+ quota_priv_t *priv = NULL;
+ quota_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+ GF_ASSERT (loc);
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
+ ret = dict_get_int8 (xdata, GF_INTERNAL_IGNORE_DEEM_STATFS,
+ &ignore_deem_statfs);
+ ret = 0;
+
+ if (ignore_deem_statfs)
+ goto off;
+
+ if (priv->consider_statfs && loc->inode) {
+ local = quota_local_new ();
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ frame->local = local;
+
+ ret = loc_copy (&local->loc, loc);
+ if (-1 == ret) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ if (xdata)
+ local->xdata = dict_ref (xdata);
+
+ stub = fop_statfs_stub (frame, quota_statfs_helper,
+ &local->loc, local->xdata);
+ if (!stub) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ LOCK (&local->lock);
+ {
+ local->link_count = 1;
+ local->stub = stub;
+ }
+ UNLOCK (&local->lock);
+
+ quota_get_limit_dir (frame, loc->inode, this);
+
+ return 0;
+ }
+
+ /*
+ * We have to make sure that we never get to quota_statfs_cbk
+ * with a cookie that points to something other than an inode,
+ * which is exactly what would happen with STACK_UNWIND using
+ * that as a callback. Therefore, use default_statfs_cbk in
+ * this case instead.
+ *
+ * Also if the option deem-statfs is not set to "on" don't
+ * bother calculating quota limit on / in statfs_cbk.
+ */
+ if (priv->consider_statfs)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Missing inode, can't adjust for quota");
+
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->statfs, loc, xdata);
+ return 0;
+
+err:
+ QUOTA_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
int
-quota_release (xlator_t *this, fd_t *fd)
+quota_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
- gf_quota_cache_sync (this);
+ gf_dirent_t *entry = NULL;
+ quota_local_t *local = NULL;
+ loc_t loc = {0, };
- return 0;
+ if (op_ret <= 0)
+ goto unwind;
+
+ local = frame->local;
+
+ list_for_each_entry (entry, &entries->list, list) {
+ if ((strcmp (entry->d_name, ".") == 0) ||
+ (strcmp (entry->d_name, "..") == 0) ||
+ entry->inode == NULL)
+ continue;
+
+ gf_uuid_copy (loc.gfid, entry->d_stat.ia_gfid);
+ loc.inode = inode_ref (entry->inode);
+ loc.parent = inode_ref (local->loc.inode);
+ gf_uuid_copy (loc.pargfid, loc.parent->gfid);
+ loc.name = entry->d_name;
+
+ quota_fill_inodectx (this, entry->inode, entry->dict,
+ &loc, &entry->d_stat, &op_errno);
+
+ loc_wipe (&loc);
+ }
+
+unwind:
+ QUOTA_STACK_UNWIND (readdirp, frame, op_ret, op_errno, entries, xdata);
+
+ return 0;
}
+int
+quota_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *dict)
+{
+ quota_priv_t *priv = NULL;
+ int ret = 0;
+ gf_boolean_t new_dict = _gf_false;
+ quota_local_t *local = NULL;
+
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
+ local = quota_local_new ();
+
+ if (local == NULL) {
+ goto err;
+ }
+
+ frame->local = local;
+
+ local->loc.inode = inode_ref (fd->inode);
+
+ if (dict == NULL) {
+ dict = dict_new ();
+ new_dict = _gf_true;
+ }
+
+ if (dict) {
+ ret = dict_set_int8 (dict, QUOTA_LIMIT_KEY, 1);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ Q_MSG_ENOMEM,
+ "dict set of key for hard-limit");
+ goto err;
+ }
+ }
+
+ if (dict) {
+ ret = dict_set_int8 (dict, QUOTA_LIMIT_OBJECTS_KEY, 1);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ Q_MSG_ENOMEM, "dict set of key for hard-limit "
+ "failed");
+ goto err;
+ }
+ }
+
+ STACK_WIND (frame, quota_readdirp_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp, fd,
+ size, offset, dict);
+
+ if (new_dict) {
+ dict_unref (dict);
+ }
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (readdirp, frame, -1, EINVAL, NULL, NULL);
+
+ if (new_dict) {
+ dict_unref (dict);
+ }
+
+ return 0;
+
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd,
+ size, offset, dict);
+ return 0;
+}
-/* notify */
int32_t
-notify (xlator_t *this,
- int32_t event,
- void *data,
- ...)
+quota_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- default_notify (this, event, data);
- return 0;
+ int32_t ret = 0;
+ uint64_t ctx_int = 0;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_local_t *local = NULL;
+
+ local = frame->local;
+
+ if ((op_ret < 0) || (local == NULL)) {
+ goto out;
+ }
+
+ ret = inode_ctx_get (local->loc.inode, this, &ctx_int);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ Q_MSG_INODE_CTX_GET_FAILED,
+ "%s: failed to get the context", local->loc.path);
+ goto out;
+ }
+
+ ctx = (quota_inode_ctx_t *)(unsigned long) ctx_int;
+
+ if (ctx == NULL) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ Q_MSG_INODE_CTX_GET_FAILED,
+ "quota context not set in %s (gfid:%s)",
+ local->loc.path, uuid_utoa (local->loc.inode->gfid));
+ goto out;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ ctx->buf = *postbuf;
+ }
+ UNLOCK (&ctx->lock);
+
+out:
+ QUOTA_STACK_UNWIND (fallocate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ return 0;
}
+
int32_t
-quota_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *dict,
- struct iatt *postparent)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- inode,
- buf,
- dict,
- postparent);
- return 0;
+quota_fallocate_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t mode, off_t offset, size_t len, dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+ quota_priv_t *priv = NULL;
+
+ local = frame->local;
+
+ GF_VALIDATE_OR_GOTO ("quota", local, unwind);
+
+ priv = this->private;
+
+ if (local->op_ret == -1) {
+ op_errno = local->op_errno;
+ if (op_errno == ENOENT || op_errno == ESTALE) {
+ /* We may get ENOENT/ESTALE in case of below scenario
+ * fd = open file.txt
+ * unlink file.txt
+ * fallocate on fd
+ * Here build_ancestry can fail as the file is removed.
+ * For now ignore ENOENT/ESTALE on active fd
+ * We need to re-visit this code once we understand
+ * how other file-system behave in this scenario
+ */
+ gf_msg_debug (this->name, 0, "quota enforcer failed "
+ "with ENOENT/ESTALE on %s, cannot check "
+ "quota limits and allowing fallocate",
+ uuid_utoa (fd->inode->gfid));
+ } else {
+ goto unwind;
+ }
+ }
+
+ STACK_WIND (frame, quota_fallocate_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len,
+ xdata);
+ return 0;
+
+unwind:
+ QUOTA_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
+
int32_t
-quota_lookup (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *xattr_req)
-{
- struct quota_priv *priv = NULL;
-
- priv = this->private;
-
- if (priv->only_first_time) {
- if (strcmp (loc->path, "/") == 0) {
- loc_copy(&(priv->root_loc), loc);
- priv->only_first_time = 0;
- if (priv->disk_usage_limit)
- gf_quota_get_disk_usage (this);
- }
- }
+quota_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ int32_t ret = -1, op_errno = EINVAL;
+ int32_t parents = 0;
+ int32_t fail_count = 0;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_priv_t *priv = NULL;
+ quota_dentry_t *dentry = NULL;
+ quota_dentry_t *tmp = NULL;
+ call_stub_t *stub = NULL;
+ struct list_head head = {0, };
+ inode_t *par_inode = NULL;
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, priv, unwind);
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
+ INIT_LIST_HEAD (&head);
+
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO ("quota", this, unwind);
+ GF_VALIDATE_OR_GOTO (this->name, fd, unwind);
+
+ local = quota_local_new ();
+ if (local == NULL) {
+ goto unwind;
+ }
+
+ frame->local = local;
+ local->loc.inode = inode_ref (fd->inode);
+
+ ret = quota_inode_ctx_get (fd->inode, this, &ctx, 0);
+ if (ctx == NULL) {
+ gf_msg_debug (this->name, 0, "quota context is NULL on inode"
+ " (%s). If quota is not enabled recently and "
+ "crawler has finished crawling, its an error",
+ uuid_utoa (local->loc.inode->gfid));
+ }
+
+ stub = fop_fallocate_stub(frame, quota_fallocate_helper, fd, mode,
+ offset, len, xdata);
+ if (stub == NULL) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, priv, unwind);
+
+ parents = quota_add_parents_from_ctx (ctx, &head);
+
+ /*
+ * Note that by using len as the delta we're assuming the range from
+ * offset to offset+len has not already been allocated. This can result
+ * in ENOSPC errors attempting to allocate an already allocated range.
+ */
+ local->delta = len;
+ local->object_delta = 0;
+ local->stub = stub;
+ local->link_count = parents;
+
+ if (parents == 0) {
+ local->link_count = 1;
+ quota_check_limit (frame, fd->inode, this);
+ } else {
+ list_for_each_entry_safe (dentry, tmp, &head, next) {
+ par_inode = do_quota_check_limit (frame, fd->inode,
+ this, dentry,
+ _gf_false);
+ if (par_inode == NULL) {
+ /* remove stale entry from inode_ctx */
+ quota_dentry_del (ctx, dentry->name,
+ dentry->par);
+ parents--;
+ fail_count++;
+ } else {
+ inode_unref (par_inode);
+ }
+ __quota_dentry_free (dentry);
+ }
+
+ if (parents == 0) {
+ LOCK (&local->lock);
+ {
+ local->link_count++;
+ }
+ UNLOCK (&local->lock);
+ quota_check_limit (frame, fd->inode, this);
+ }
+
+ while (fail_count != 0) {
+ quota_link_count_decrement (frame);
+ fail_count--;
+ }
+ }
+
+ return 0;
+
+unwind:
+ QUOTA_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, mode, offset,
+ len, xdata);
+ return 0;
+}
+
+void
+quota_log_helper (char **usage_str, int64_t cur_size, inode_t *inode,
+ char **path, struct timeval *cur_time)
+{
+ xlator_t *this = THIS;
+
+ if (!usage_str || !inode || !path || !cur_time) {
+ gf_log (this->name, GF_LOG_ERROR, "Received null argument");
+ return;
+ }
+
+ *usage_str = gf_uint64_2human_readable (cur_size);
+ if (!(*usage_str))
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM, Q_MSG_ENOMEM,
+ "integer to string conversion failed Reason"
+ ":\"Cannot allocate memory\"");
+
+ inode_path (inode, NULL, path);
+ if (!(*path))
+ *path = uuid_utoa (inode->gfid);
+
+ gettimeofday (cur_time, NULL);
+}
+
+/* Logs if
+* i. Usage crossed soft limit
+* ii. Usage above soft limit and alert-time elapsed
+*/
+void
+quota_log_usage (xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode,
+ int64_t delta)
+{
+ struct timeval cur_time = {0,};
+ char *usage_str = NULL;
+ char *path = NULL;
+ int64_t cur_size = 0;
+ quota_priv_t *priv = NULL;
+
+ priv = this->private;
+ cur_size = ctx->size + delta;
- STACK_WIND (frame,
- quota_lookup_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup,
- loc,
- xattr_req);
- return 0;
+ if ((ctx->soft_lim <= 0) || cur_size < ctx->soft_lim)
+ return;
+
+ /* Usage crossed/reached soft limit */
+ if (DID_REACH_LIMIT (ctx->soft_lim, ctx->size, cur_size)) {
+
+ quota_log_helper (&usage_str, cur_size, inode,
+ &path, &cur_time);
+
+ gf_msg (this->name, GF_LOG_ALERT, 0,
+ Q_MSG_CROSSED_SOFT_LIMIT, "Usage crossed soft limit: "
+ "%s used by %s", usage_str, path);
+ ctx->prev_log = cur_time;
+ }
+ /* Usage is above soft limit */
+ else if (cur_size > ctx->soft_lim &&
+ quota_timeout (&ctx->prev_log, priv->log_timeout)) {
+
+ quota_log_helper (&usage_str, cur_size, inode,
+ &path, &cur_time);
+
+ gf_msg (this->name, GF_LOG_ALERT, 0, Q_MSG_CROSSED_SOFT_LIMIT,
+ "Usage is above soft limit: %s used by %s",
+ usage_str, path);
+ ctx->prev_log = cur_time;
+ }
+
+ if (usage_str)
+ GF_FREE (usage_str);
}
int32_t
@@ -1035,121 +5036,313 @@ mem_acct_init (xlator_t *this)
return ret;
ret = xlator_mem_acct_init (this, gf_quota_mt_end + 1);
-
+
if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM,
+ "Memory accounting init failed");
return ret;
}
return ret;
}
-int32_t
+
+int32_t
+quota_forget (xlator_t *this, inode_t *inode)
+{
+ int32_t ret = 0;
+ uint64_t ctx_int = 0;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_dentry_t *dentry = NULL, *tmp;
+
+ ret = inode_ctx_del (inode, this, &ctx_int);
+
+ if (ret < 0) {
+ return 0;
+ }
+
+ ctx = (quota_inode_ctx_t *) (long)ctx_int;
+
+ LOCK (&ctx->lock);
+ {
+ list_for_each_entry_safe (dentry, tmp, &ctx->parents, next) {
+ __quota_dentry_free (dentry);
+ }
+ }
+ UNLOCK (&ctx->lock);
+
+ LOCK_DESTROY (&ctx->lock);
+
+ GF_FREE (ctx);
+
+ return 0;
+}
+
+int32_t
init (xlator_t *this)
{
- int ret = 0;
- data_t *data = NULL;
- struct quota_priv *_private = NULL;
+ int32_t ret = -1;
+ quota_priv_t *priv = NULL;
+ rpc_clnt_t *rpc = NULL;
+
+ if ((this->children == NULL)
+ || this->children->next) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ Q_MSG_INVALID_VOLFILE,
+ "FATAL: quota (%s) not configured with "
+ "exactly one child", this->name);
+ return -1;
+ }
- if (!this->children || this->children->next) {
- gf_log (this->name, GF_LOG_ERROR,
- "FATAL: quota should have exactly one child");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
+ if (this->parents == NULL) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ Q_MSG_INVALID_VOLFILE,
+ "dangling volume. check volfile");
+ }
+
+ QUOTA_ALLOC_OR_GOTO (priv, quota_priv_t, err);
- _private = GF_CALLOC (1, sizeof (struct quota_priv),
- gf_quota_mt_quota_priv);
- _private->disk_usage_limit = 0;
- data = dict_get (this->options, "disk-usage-limit");
- if (data) {
- if (gf_string2bytesize (data->data, &_private->disk_usage_limit) != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid number '%s' for disk-usage limit", data->data);
- ret = -1;
- goto out;
+ LOCK_INIT (&priv->lock);
+
+ this->private = priv;
+
+ GF_OPTION_INIT ("deem-statfs", priv->consider_statfs, bool, err);
+ GF_OPTION_INIT ("server-quota", priv->is_quota_on, bool, err);
+ GF_OPTION_INIT ("default-soft-limit", priv->default_soft_lim, percent,
+ err);
+ GF_OPTION_INIT ("soft-timeout", priv->soft_timeout, time, err);
+ GF_OPTION_INIT ("hard-timeout", priv->hard_timeout, time, err);
+ GF_OPTION_INIT ("alert-time", priv->log_timeout, time, err);
+ GF_OPTION_INIT ("volume-uuid", priv->volume_uuid, str, err);
+
+ this->local_pool = mem_pool_new (quota_local_t, 64);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ Q_MSG_ENOMEM, "failed to create local_t's memory pool");
+ goto err;
+ }
+
+ if (priv->is_quota_on) {
+ rpc = quota_enforcer_init (this, this->options);
+ if (rpc == NULL) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ Q_MSG_QUOTA_ENFORCER_RPC_INIT_FAILED,
+ "quota enforcer rpc init failed");
+ goto err;
}
- LOCK_INIT (&_private->lock);
- _private->current_disk_usage = 0;
- }
-
- _private->min_free_disk_limit = 0;
- data = dict_get (this->options, "min-free-disk-limit");
- if (data) {
- if (gf_string2percent (data->data, &_private->min_free_disk_limit) != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid percent '%s' for min-free-disk limit", data->data);
- ret = -1;
- goto out;
- }
- _private->refresh_interval = 20; /* 20seconds is default */
- data = dict_get (this->options, "refresh-interval");
- if (data) {
- if (gf_string2time (data->data,
- &_private->refresh_interval)!= 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid time '%s' for refresh "
- "interval", data->data);
- ret = -1;
- goto out;
- }
- }
- }
-
- _private->only_first_time = 1;
- this->private = (void *)_private;
- ret = 0;
- out:
- return ret;
-}
-
-void
-fini (xlator_t *this)
+ LOCK (&priv->lock);
+ {
+ priv->rpc_clnt = rpc;
+ }
+ UNLOCK (&priv->lock);
+ }
+
+ ret = 0;
+err:
+ return ret;
+}
+
+int
+reconfigure (xlator_t *this, dict_t *options)
{
- struct quota_priv *_private = this->private;
+ int32_t ret = -1;
+ quota_priv_t *priv = NULL;
+ gf_boolean_t quota_on = _gf_false;
+ rpc_clnt_t *rpc = NULL;
+
+ priv = this->private;
+
+ GF_OPTION_RECONF ("deem-statfs", priv->consider_statfs, options, bool,
+ out);
+ GF_OPTION_RECONF ("server-quota", quota_on, options, bool,
+ out);
+ GF_OPTION_RECONF ("default-soft-limit", priv->default_soft_lim,
+ options, percent, out);
+ GF_OPTION_RECONF ("alert-time", priv->log_timeout, options,
+ time, out);
+ GF_OPTION_RECONF ("soft-timeout", priv->soft_timeout, options,
+ time, out);
+ GF_OPTION_RECONF ("hard-timeout", priv->hard_timeout, options,
+ time, out);
+
+ if (quota_on) {
+ priv->rpc_clnt = quota_enforcer_init (this,
+ this->options);
+ if (priv->rpc_clnt == NULL) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ Q_MSG_QUOTA_ENFORCER_RPC_INIT_FAILED,
+ "quota enforcer rpc init failed");
+ goto out;
+ }
- if (_private) {
- gf_quota_cache_sync (this);
- this->private = NULL;
- }
-
- return ;
+ } else {
+ LOCK (&priv->lock);
+ {
+ rpc = priv->rpc_clnt;
+ priv->rpc_clnt = NULL;
+ }
+ UNLOCK (&priv->lock);
+
+ if (rpc != NULL) {
+ // Quotad is shutdown when there is no started volume
+ // which has quota enabled. So, we should disable the
+ // enforcer client when quota is disabled on a volume,
+ // to avoid spurious reconnect attempts to a service
+ // (quotad), that is known to be down.
+ rpc_clnt_unref (rpc);
+ }
+ }
+
+ priv->is_quota_on = quota_on;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+quota_priv_dump (xlator_t *this)
+{
+ quota_priv_t *priv = NULL;
+ int32_t ret = -1;
+
+
+ GF_ASSERT (this);
+
+ priv = this->private;
+
+ gf_proc_dump_add_section ("xlators.features.quota.priv", this->name);
+
+ ret = TRY_LOCK (&priv->lock);
+ if (ret)
+ goto out;
+ else {
+ gf_proc_dump_write("soft-timeout", "%d", priv->soft_timeout);
+ gf_proc_dump_write("hard-timeout", "%d", priv->hard_timeout);
+ gf_proc_dump_write("alert-time", "%d", priv->log_timeout);
+ gf_proc_dump_write("quota-on", "%d", priv->is_quota_on);
+ gf_proc_dump_write("statfs", "%d", priv->consider_statfs);
+ gf_proc_dump_write("volume-uuid", "%s", priv->volume_uuid);
+ gf_proc_dump_write("validation-count", "%ld",
+ priv->validation_count);
+ }
+ UNLOCK (&priv->lock);
+
+out:
+ return 0;
+}
+
+void
+fini (xlator_t *this)
+{
+ return;
}
+
struct xlator_fops fops = {
- .create = quota_create,
- .open = quota_open,
- .lookup = quota_lookup,
- .truncate = quota_truncate,
- .ftruncate = quota_ftruncate,
- .writev = quota_writev,
- .unlink = quota_unlink,
- .rmdir = quota_rmdir,
- .mknod = quota_mknod,
- .mkdir = quota_mkdir,
- .symlink = quota_symlink,
- .statfs = quota_statfs,
+ .statfs = quota_statfs,
+ .lookup = quota_lookup,
+ .writev = quota_writev,
+ .create = quota_create,
+ .mkdir = quota_mkdir,
+ .truncate = quota_truncate,
+ .ftruncate = quota_ftruncate,
+ .unlink = quota_unlink,
+ .symlink = quota_symlink,
+ .link = quota_link,
+ .rename = quota_rename,
+ .getxattr = quota_getxattr,
+ .fgetxattr = quota_fgetxattr,
+ .stat = quota_stat,
+ .fstat = quota_fstat,
+ .readlink = quota_readlink,
+ .readv = quota_readv,
+ .fsync = quota_fsync,
+ .setattr = quota_setattr,
+ .fsetattr = quota_fsetattr,
+ .mknod = quota_mknod,
+ .setxattr = quota_setxattr,
+ .fsetxattr = quota_fsetxattr,
+ .removexattr = quota_removexattr,
+ .fremovexattr = quota_fremovexattr,
+ .readdirp = quota_readdirp,
+ .fallocate = quota_fallocate,
};
struct xlator_cbks cbks = {
- .release = quota_release
+ .forget = quota_forget
};
+struct xlator_dumpops dumpops = {
+ .priv = quota_priv_dump,
+};
struct volume_options options[] = {
- { .key = {"min-free-disk-limit"},
- .type = GF_OPTION_TYPE_PERCENT
- },
- { .key = {"refresh-interval"},
- .type = GF_OPTION_TYPE_TIME
- },
- { .key = {"disk-usage-limit"},
- .type = GF_OPTION_TYPE_SIZET
- },
- { .key = {NULL} },
+ {.key = {"limit-set"}},
+ {.key = {"deem-statfs"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "If set to on, it takes quota limits into"
+ " consideration while estimating fs size. (df command)"
+ " (Default is on)."
+ },
+ {.key = {"server-quota"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Skip the quota enforcement if the feature is"
+ " not turned on. This is not a user exposed option."
+ },
+ {.key = {"default-soft-limit"},
+ .type = GF_OPTION_TYPE_PERCENT,
+ .default_value = "80%",
+ },
+ {.key = {"soft-timeout"},
+ .type = GF_OPTION_TYPE_TIME,
+ .min = 0,
+ .max = 1800,
+ .default_value = "60",
+ .description = "quota caches the directory sizes on client. "
+ "soft-timeout indicates the timeout for the validity of"
+ " cache before soft-limit has been crossed."
+ },
+ {.key = {"hard-timeout"},
+ .type = GF_OPTION_TYPE_TIME,
+ .min = 0,
+ .max = 60,
+ .default_value = "5",
+ .description = "quota caches the directory sizes on client. "
+ "hard-timeout indicates the timeout for the validity of"
+ " cache after soft-limit has been crossed."
+ },
+ { .key = {"username"},
+ .type = GF_OPTION_TYPE_ANY,
+ },
+ { .key = {"password"},
+ .type = GF_OPTION_TYPE_ANY,
+ },
+ { .key = {"transport-type"},
+ .value = {"tcp", "socket", "ib-verbs", "unix", "ib-sdp",
+ "tcp/client", "ib-verbs/client", "rdma"},
+ .type = GF_OPTION_TYPE_STR,
+ },
+ { .key = {"remote-host"},
+ .type = GF_OPTION_TYPE_INTERNET_ADDRESS,
+ },
+ { .key = {"remote-port"},
+ .type = GF_OPTION_TYPE_INT,
+ },
+ { .key = {"volume-uuid"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "uuid of the volume this brick is part of."
+ },
+ { .key = {"alert-time"},
+ .type = GF_OPTION_TYPE_TIME,
+ .min = 0,
+ .max = 7*86400,
+ .default_value = "86400",
+ },
+ {.key = {NULL}}
};
diff --git a/xlators/features/quota/src/quota.h b/xlators/features/quota/src/quota.h
new file mode 100644
index 00000000000..6f74da789b6
--- /dev/null
+++ b/xlators/features/quota/src/quota.h
@@ -0,0 +1,282 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _QUOTA_H
+#define _QUOTA_H
+
+#include "xlator.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "common-utils.h"
+#include "quota-mem-types.h"
+#include "glusterfs.h"
+#include "compat.h"
+#include "logging.h"
+#include "dict.h"
+#include "stack.h"
+#include "event.h"
+#include "globals.h"
+#include "rpcsvc.h"
+#include "rpc-clnt.h"
+#include "byte-order.h"
+#include "glusterfs3-xdr.h"
+#include "glusterfs3.h"
+#include "xdr-generic.h"
+#include "compat-errno.h"
+#include "protocol-common.h"
+#include "quota-common-utils.h"
+#include "quota-messages.h"
+
+#define DIRTY "dirty"
+#define SIZE "size"
+#define CONTRIBUTION "contri"
+#define VAL_LENGTH 8
+#define READDIR_BUF 4096
+
+#ifndef UUID_CANONICAL_FORM_LEN
+#define UUID_CANONICAL_FORM_LEN 36
+#endif
+
+#define WIND_IF_QUOTAOFF(is_quota_on, label) \
+ if (!is_quota_on) \
+ goto label;
+
+#define QUOTA_WIND_FOR_INTERNAL_FOP(xdata, label) \
+ do { \
+ if (xdata && dict_get (xdata, GLUSTERFS_INTERNAL_FOP_KEY)) \
+ goto label; \
+ } while (0)
+
+#define DID_REACH_LIMIT(lim, prev_size, cur_size) \
+ ((cur_size) >= (lim) && (prev_size) < (lim))
+
+#define QUOTA_SAFE_INCREMENT(lock, var) \
+ do { \
+ LOCK (lock); \
+ var ++; \
+ UNLOCK (lock); \
+ } while (0)
+
+#define QUOTA_SAFE_DECREMENT(lock, var) \
+ do { \
+ LOCK (lock); \
+ var --; \
+ UNLOCK (lock); \
+ } while (0)
+
+#define QUOTA_ALLOC_OR_GOTO(var, type, label) \
+ do { \
+ var = GF_CALLOC (sizeof (type), 1, \
+ gf_quota_mt_##type); \
+ if (!var) { \
+ gf_msg ("", GF_LOG_ERROR, \
+ ENOMEM, Q_MSG_ENOMEM, \
+ "out of memory"); \
+ ret = -1; \
+ goto label; \
+ } \
+ } while (0);
+
+#define QUOTA_STACK_WIND_TAIL(frame, params...) \
+ do { \
+ quota_local_t *_local = NULL; \
+ xlator_t *_this = NULL; \
+ \
+ if (frame) { \
+ _local = frame->local; \
+ _this = frame->this; \
+ frame->local = NULL; \
+ } \
+ \
+ STACK_WIND_TAIL (frame, params); \
+ \
+ if (_local) \
+ quota_local_cleanup (_local); \
+ } while (0)
+
+#define QUOTA_STACK_UNWIND(fop, frame, params...) \
+ do { \
+ quota_local_t *_local = NULL; \
+ xlator_t *_this = NULL; \
+ if (frame) { \
+ _local = frame->local; \
+ _this = frame->this; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT (fop, frame, params); \
+ quota_local_cleanup (_local); \
+ } while (0)
+
+#define QUOTA_FREE_CONTRIBUTION_NODE(_contribution) \
+ do { \
+ list_del (&_contribution->contri_list); \
+ GF_FREE (_contribution); \
+ } while (0)
+
+#define GET_CONTRI_KEY(var, _vol_name, _gfid, _ret) \
+ do { \
+ char _gfid_unparsed[40]; \
+ if (_gfid != NULL) { \
+ gf_uuid_unparse (_gfid, _gfid_unparsed);\
+ _ret = gf_asprintf (var, QUOTA_XATTR_PREFIX \
+ "%s.%s." CONTRIBUTION, \
+ _vol_name, _gfid_unparsed); \
+ } else { \
+ _ret = gf_asprintf (var, QUOTA_XATTR_PREFIX \
+ "%s.." CONTRIBUTION, \
+ _vol_name); \
+ } \
+ } while (0)
+
+
+#define GET_CONTRI_KEY_OR_GOTO(var, _vol_name, _gfid, label) \
+ do { \
+ GET_CONTRI_KEY(var, _vol_name, _gfid, ret); \
+ if (ret == -1) \
+ goto label; \
+ } while (0)
+
+#define GET_DIRTY_KEY_OR_GOTO(var, _vol_name, label) \
+ do { \
+ ret = gf_asprintf (var, QUOTA_XATTR_PREFIX \
+ "%s." DIRTY, _vol_name); \
+ if (ret == -1) \
+ goto label; \
+ } while (0)
+
+#define QUOTA_REG_OR_LNK_FILE(ia_type) \
+ (IA_ISREG (ia_type) || IA_ISLNK (ia_type))
+
+
+
+struct quota_dentry {
+ char *name;
+ uuid_t par;
+ struct list_head next;
+};
+typedef struct quota_dentry quota_dentry_t;
+
+struct quota_inode_ctx {
+ int64_t size;
+ int64_t hard_lim;
+ int64_t soft_lim;
+ int64_t file_count;
+ int64_t dir_count;
+ int64_t object_hard_lim;
+ int64_t object_soft_lim;
+ struct iatt buf;
+ struct list_head parents;
+ struct timeval tv;
+ struct timeval prev_log;
+ gf_boolean_t ancestry_built;
+ gf_lock_t lock;
+};
+typedef struct quota_inode_ctx quota_inode_ctx_t;
+
+typedef void
+(*quota_ancestry_built_t) (struct list_head *parents, inode_t *inode,
+ int32_t op_ret, int32_t op_errno, void *data);
+
+typedef void
+(*quota_fop_continue_t) (call_frame_t *frame);
+
+struct quota_local {
+ gf_lock_t lock;
+ uint32_t link_count;
+ loc_t loc;
+ loc_t oldloc;
+ loc_t newloc;
+ loc_t validate_loc;
+ int64_t delta;
+ int8_t object_delta;
+ int32_t op_ret;
+ int32_t op_errno;
+ int64_t size;
+ char just_validated;
+ fop_lookup_cbk_t validate_cbk;
+ quota_fop_continue_t fop_continue_cbk;
+ inode_t *inode;
+ uuid_t common_ancestor; /* Used by quota_rename */
+ call_stub_t *stub;
+ struct iobref *iobref;
+ quota_limits_t limit;
+ quota_limits_t object_limit;
+ int64_t space_available;
+ quota_ancestry_built_t ancestry_cbk;
+ void *ancestry_data;
+ dict_t *xdata;
+ dict_t *validate_xdata;
+ int32_t quotad_conn_retry;
+ xlator_t *this;
+ call_frame_t *par_frame;
+};
+typedef struct quota_local quota_local_t;
+
+struct quota_priv {
+ uint32_t soft_timeout;
+ uint32_t hard_timeout;
+ uint32_t log_timeout;
+ double default_soft_lim;
+ gf_boolean_t is_quota_on;
+ gf_boolean_t consider_statfs;
+ gf_lock_t lock;
+ rpc_clnt_prog_t *quota_enforcer;
+ struct rpcsvc_program *quotad_aggregator;
+ struct rpc_clnt *rpc_clnt;
+ rpcsvc_t *rpcsvc;
+ inode_table_t *itable;
+ char *volume_uuid;
+ uint64_t validation_count;
+ int32_t quotad_conn_status;
+};
+typedef struct quota_priv quota_priv_t;
+
+int
+quota_enforcer_lookup (call_frame_t *frame, xlator_t *this, dict_t *xdata,
+ fop_lookup_cbk_t cbk);
+
+void
+_quota_enforcer_lookup (void *data);
+
+struct rpc_clnt *
+quota_enforcer_init (xlator_t *this, dict_t *options);
+
+void
+quota_log_usage (xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode,
+ int64_t delta);
+
+int
+quota_build_ancestry (inode_t *inode, quota_ancestry_built_t ancestry_cbk,
+ void *data);
+
+void
+quota_get_limit_dir (call_frame_t *frame, inode_t *cur_inode, xlator_t *this);
+
+int32_t
+quota_check_limit (call_frame_t *frame, inode_t *inode, xlator_t *this);
+
+inode_t *
+do_quota_check_limit (call_frame_t *frame, inode_t *inode, xlator_t *this,
+ quota_dentry_t *dentry, gf_boolean_t force);
+int
+quota_fill_inodectx (xlator_t *this, inode_t *inode, dict_t *dict,
+ loc_t *loc, struct iatt *buf, int32_t *op_errno);
+
+int32_t
+quota_check_size_limit (call_frame_t *frame, quota_inode_ctx_t *ctx,
+ quota_priv_t *priv, inode_t *_inode, xlator_t *this,
+ int32_t *op_errno, int just_validated, int64_t delta,
+ quota_local_t *local, gf_boolean_t *skip_check);
+
+int32_t
+quota_check_object_limit (call_frame_t *frame, quota_inode_ctx_t *ctx,
+ quota_priv_t *priv, inode_t *_inode, xlator_t *this,
+ int32_t *op_errno, int just_validated,
+ quota_local_t *local, gf_boolean_t *skip_check);
+#endif
diff --git a/xlators/features/quota/src/quotad-aggregator.c b/xlators/features/quota/src/quotad-aggregator.c
new file mode 100644
index 00000000000..82d18ea15e0
--- /dev/null
+++ b/xlators/features/quota/src/quotad-aggregator.c
@@ -0,0 +1,456 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "cli1-xdr.h"
+#include "quota.h"
+#include "quotad-helpers.h"
+#include "quotad-aggregator.h"
+
+struct rpcsvc_program quotad_aggregator_prog;
+
+struct iobuf *
+quotad_serialize_reply (rpcsvc_request_t *req, void *arg, struct iovec *outmsg,
+ xdrproc_t xdrproc)
+{
+ struct iobuf *iob = NULL;
+ ssize_t retlen = 0;
+ ssize_t xdr_size = 0;
+
+ GF_VALIDATE_OR_GOTO ("server", req, ret);
+
+ /* First, get the io buffer into which the reply in arg will
+ * be serialized.
+ */
+ if (arg && xdrproc) {
+ xdr_size = xdr_sizeof (xdrproc, arg);
+ iob = iobuf_get2 (req->svc->ctx->iobuf_pool, xdr_size);
+ if (!iob) {
+ gf_log_callingfn (THIS->name, GF_LOG_ERROR,
+ "Failed to get iobuf");
+ goto ret;
+ };
+
+ iobuf_to_iovec (iob, outmsg);
+ /* Use the given serializer to translate the given C structure
+ * in arg to XDR format which will be written into the buffer
+ * in outmsg.
+ */
+ /* retlen is used to received the error since size_t is unsigned and we
+ * need -1 for error notification during encoding.
+ */
+
+ retlen = xdr_serialize_generic (*outmsg, arg, xdrproc);
+ if (retlen == -1) {
+ /* Failed to Encode 'GlusterFS' msg in RPC is not exactly
+ failure of RPC return values.. Client should get
+ notified about this, so there are no missing frames */
+ gf_log_callingfn ("", GF_LOG_ERROR, "Failed to encode message");
+ req->rpc_err = GARBAGE_ARGS;
+ retlen = 0;
+ }
+ }
+ outmsg->iov_len = retlen;
+ret:
+ return iob;
+}
+
+int
+quotad_aggregator_submit_reply (call_frame_t *frame, rpcsvc_request_t *req,
+ void *arg, struct iovec *payload,
+ int payloadcount, struct iobref *iobref,
+ xdrproc_t xdrproc)
+{
+ struct iobuf *iob = NULL;
+ int ret = -1;
+ struct iovec rsp = {0,};
+ quotad_aggregator_state_t *state = NULL;
+ char new_iobref = 0;
+
+ GF_VALIDATE_OR_GOTO ("server", req, ret);
+
+ if (frame) {
+ state = frame->root->state;
+ frame->local = NULL;
+ }
+
+ if (!iobref) {
+ iobref = iobref_new ();
+ if (!iobref) {
+ goto ret;
+ }
+
+ new_iobref = 1;
+ }
+
+ iob = quotad_serialize_reply (req, arg, &rsp, xdrproc);
+ if (!iob) {
+ gf_msg ("", GF_LOG_ERROR, 0, Q_MSG_DICT_SERIALIZE_FAIL,
+ "Failed to serialize reply");
+ goto ret;
+ }
+
+ iobref_add (iobref, iob);
+
+ ret = rpcsvc_submit_generic (req, &rsp, 1, payload, payloadcount,
+ iobref);
+
+ iobuf_unref (iob);
+
+ ret = 0;
+ret:
+ if (state) {
+ quotad_aggregator_free_state (state);
+ }
+
+ if (frame)
+ STACK_DESTROY (frame->root);
+
+ if (new_iobref) {
+ iobref_unref (iobref);
+ }
+
+ return ret;
+}
+
+int
+quotad_aggregator_getlimit_cbk (xlator_t *this, call_frame_t *frame,
+ void *lookup_rsp)
+{
+ gfs3_lookup_rsp *rsp = lookup_rsp;
+ gf_cli_rsp cli_rsp = {0,};
+ dict_t *xdata = NULL;
+ quotad_aggregator_state_t *state = NULL;
+ int ret = -1;
+ int type = 0;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->this, xdata,
+ (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), rsp->op_ret,
+ rsp->op_errno, out);
+
+ if (xdata) {
+ state = frame->root->state;
+ ret = dict_get_int32 (state->xdata, "type", &type);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_set_int32 (xdata, "type", type);
+ if (ret < 0)
+ goto out;
+ }
+
+ ret = 0;
+out:
+ rsp->op_ret = ret;
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ Q_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to unserialize "
+ "nameless lookup rsp");
+ goto reply;
+ }
+ cli_rsp.op_ret = rsp->op_ret;
+ cli_rsp.op_errno = rsp->op_errno;
+ cli_rsp.op_errstr = "";
+ if (xdata) {
+ GF_PROTOCOL_DICT_SERIALIZE (frame->this, xdata,
+ (&cli_rsp.dict.dict_val),
+ (cli_rsp.dict.dict_len),
+ cli_rsp.op_errno, reply);
+ }
+
+reply:
+ quotad_aggregator_submit_reply (frame, frame->local, (void*)&cli_rsp, NULL, 0,
+ NULL, (xdrproc_t)xdr_gf_cli_rsp);
+
+ dict_unref (xdata);
+ GF_FREE (cli_rsp.dict.dict_val);
+ return 0;
+}
+
+int
+quotad_aggregator_getlimit (rpcsvc_request_t *req)
+{
+ call_frame_t *frame = NULL;
+ gf_cli_req cli_req = {{0}, };
+ gf_cli_rsp cli_rsp = {0};
+ gfs3_lookup_req args = {{0,},};
+ gfs3_lookup_rsp rsp = {0,};
+ quotad_aggregator_state_t *state = NULL;
+ xlator_t *this = NULL;
+ dict_t *dict = NULL;
+ int ret = -1, op_errno = 0;
+ char *gfid_str = NULL;
+ uuid_t gfid = {0};
+
+ GF_VALIDATE_OR_GOTO ("quotad-aggregator", req, err);
+
+ this = THIS;
+
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ gf_msg ("this->name", GF_LOG_ERROR, 0, Q_MSG_XDR_DECODE_ERROR,
+ "xdr decoding error");
+ req->rpc_err = GARBAGE_ARGS;
+ goto err;
+ }
+
+ if (cli_req.dict.dict_len) {
+ dict = dict_new ();
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len, &dict);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ Q_MSG_DICT_UNSERIALIZE_FAIL,
+ "Failed to unserialize req-buffer to "
+ "dictionary");
+ goto err;
+ }
+ }
+
+ ret = dict_get_str (dict, "gfid", &gfid_str);
+ if (ret) {
+ goto err;
+ }
+
+ gf_uuid_parse ((const char*)gfid_str, gfid);
+
+ frame = quotad_aggregator_get_frame_from_req (req);
+ if (frame == NULL) {
+ rsp.op_errno = ENOMEM;
+ goto err;
+ }
+ state = frame->root->state;
+ state->xdata = dict;
+
+ ret = dict_set_int32 (state->xdata, QUOTA_LIMIT_KEY, 42);
+ if (ret)
+ goto err;
+
+ ret = dict_set_int32 (state->xdata, QUOTA_LIMIT_OBJECTS_KEY, 42);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM, Q_MSG_ENOMEM,
+ "Failed to set QUOTA_LIMIT_OBJECTS_KEY");
+ goto err;
+ }
+
+ ret = dict_set_int32 (state->xdata, QUOTA_SIZE_KEY, 42);
+ if (ret)
+ goto err;
+
+ ret = dict_set_int32 (state->xdata, GET_ANCESTRY_PATH_KEY, 42);
+ if (ret)
+ goto err;
+
+ memcpy (&args.gfid, &gfid, 16);
+
+ args.bname = alloca (req->msg[0].iov_len);
+ args.xdata.xdata_val = alloca (req->msg[0].iov_len);
+
+ ret = qd_nameless_lookup (this, frame, &args, state->xdata,
+ quotad_aggregator_getlimit_cbk);
+ if (ret) {
+ rsp.op_errno = ret;
+ goto err;
+ }
+
+ return ret;
+
+err:
+ cli_rsp.op_ret = -1;
+ cli_rsp.op_errno = op_errno;
+ cli_rsp.op_errstr = "";
+
+ quotad_aggregator_getlimit_cbk (this, frame, &cli_rsp);
+ if (dict)
+ dict_unref (dict);
+
+ return ret;
+}
+
+int
+quotad_aggregator_lookup_cbk (xlator_t *this, call_frame_t *frame,
+ void *rsp)
+{
+ quotad_aggregator_submit_reply (frame, frame->local, rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_lookup_rsp);
+
+ return 0;
+}
+
+
+int
+quotad_aggregator_lookup (rpcsvc_request_t *req)
+{
+ call_frame_t *frame = NULL;
+ gfs3_lookup_req args = {{0,},};
+ int ret = -1, op_errno = 0;
+ gfs3_lookup_rsp rsp = {0,};
+ quotad_aggregator_state_t *state = NULL;
+ xlator_t *this = NULL;
+
+ GF_VALIDATE_OR_GOTO ("quotad-aggregator", req, err);
+
+ this = THIS;
+
+ args.bname = alloca (req->msg[0].iov_len);
+ args.xdata.xdata_val = alloca (req->msg[0].iov_len);
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_lookup_req);
+ if (ret < 0) {
+ rsp.op_errno = EINVAL;
+ goto err;
+ }
+
+ frame = quotad_aggregator_get_frame_from_req (req);
+ if (frame == NULL) {
+ rsp.op_errno = ENOMEM;
+ goto err;
+ }
+
+ state = frame->root->state;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, state->xdata,
+ (args.xdata.xdata_val),
+ (args.xdata.xdata_len), ret,
+ op_errno, err);
+
+
+ ret = qd_nameless_lookup (this, frame, &args, state->xdata,
+ quotad_aggregator_lookup_cbk);
+ if (ret) {
+ rsp.op_errno = ret;
+ goto err;
+ }
+
+ return ret;
+
+err:
+ rsp.op_ret = -1;
+ rsp.op_errno = op_errno;
+
+ quotad_aggregator_lookup_cbk (this, frame, &rsp);
+ return ret;
+}
+
+int
+quotad_aggregator_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
+ void *data)
+{
+ if (!xl || !data) {
+ gf_log_callingfn ("server", GF_LOG_WARNING,
+ "Calling rpc_notify without initializing");
+ goto out;
+ }
+
+ switch (event) {
+ case RPCSVC_EVENT_ACCEPT:
+ break;
+
+ case RPCSVC_EVENT_DISCONNECT:
+ break;
+
+ default:
+ break;
+ }
+
+out:
+ return 0;
+}
+
+int
+quotad_aggregator_init (xlator_t *this)
+{
+ quota_priv_t *priv = NULL;
+ int ret = -1;
+
+ priv = this->private;
+
+ if (priv->rpcsvc) {
+ /* Listener already created */
+ return 0;
+ }
+
+ ret = dict_set_str (this->options, "transport.address-family", "unix");
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (this->options, "transport-type", "socket");
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (this->options, "transport.socket.listen-path",
+ "/var/run/gluster/quotad.socket");
+ if (ret)
+ goto out;
+
+ /* RPC related */
+ priv->rpcsvc = rpcsvc_init (this, this->ctx, this->options, 0);
+ if (priv->rpcsvc == NULL) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ Q_MSG_RPCSVC_INIT_FAILED,
+ "creation of rpcsvc failed");
+ ret = -1;
+ goto out;
+ }
+
+ ret = rpcsvc_create_listeners (priv->rpcsvc, this->options,
+ this->name);
+ if (ret < 1) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ Q_MSG_RPCSVC_LISTENER_CREATION_FAILED,
+ "creation of listener failed");
+ ret = -1;
+ goto out;
+ }
+
+ priv->quotad_aggregator = &quotad_aggregator_prog;
+ quotad_aggregator_prog.options = this->options;
+
+ ret = rpcsvc_program_register (priv->rpcsvc, &quotad_aggregator_prog);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ Q_MSG_RPCSVC_REGISTER_FAILED,
+ "registration of program (name:%s, prognum:%d, "
+ "progver:%d) failed", quotad_aggregator_prog.progname,
+ quotad_aggregator_prog.prognum,
+ quotad_aggregator_prog.progver);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret && priv->rpcsvc) {
+ GF_FREE (priv->rpcsvc);
+ priv->rpcsvc = NULL;
+ }
+
+ return ret;
+}
+
+rpcsvc_actor_t quotad_aggregator_actors[GF_AGGREGATOR_MAXVALUE] = {
+ [GF_AGGREGATOR_NULL] = {"NULL", GF_AGGREGATOR_NULL, NULL, NULL, 0,
+ DRC_NA},
+ [GF_AGGREGATOR_LOOKUP] = {"LOOKUP", GF_AGGREGATOR_NULL,
+ quotad_aggregator_lookup, NULL, 0, DRC_NA},
+ [GF_AGGREGATOR_GETLIMIT] = {"GETLIMIT", GF_AGGREGATOR_GETLIMIT,
+ quotad_aggregator_getlimit, NULL, 0, DRC_NA},
+};
+
+
+struct rpcsvc_program quotad_aggregator_prog = {
+ .progname = "GlusterFS 3.3",
+ .prognum = GLUSTER_AGGREGATOR_PROGRAM,
+ .progver = GLUSTER_AGGREGATOR_VERSION,
+ .numactors = GF_AGGREGATOR_MAXVALUE,
+ .actors = quotad_aggregator_actors
+};
diff --git a/xlators/features/quota/src/quotad-aggregator.h b/xlators/features/quota/src/quotad-aggregator.h
new file mode 100644
index 00000000000..5ddea5b3c46
--- /dev/null
+++ b/xlators/features/quota/src/quotad-aggregator.h
@@ -0,0 +1,37 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _QUOTAD_AGGREGATOR_H
+#define _QUOTAD_AGGREGATOR_H
+
+#include "quota.h"
+#include "stack.h"
+#include "glusterfs3-xdr.h"
+#include "inode.h"
+
+typedef struct {
+ void *pool;
+ xlator_t *this;
+ xlator_t *active_subvol;
+ inode_table_t *itable;
+ loc_t loc;
+ dict_t *xdata;
+} quotad_aggregator_state_t;
+
+typedef int (*quotad_aggregator_lookup_cbk_t) (xlator_t *this,
+ call_frame_t *frame,
+ void *rsp);
+int
+qd_nameless_lookup (xlator_t *this, call_frame_t *frame, gfs3_lookup_req *req,
+ dict_t *xdata, quotad_aggregator_lookup_cbk_t lookup_cbk);
+int
+quotad_aggregator_init (xlator_t *this);
+
+#endif
diff --git a/xlators/features/quota/src/quotad-helpers.c b/xlators/features/quota/src/quotad-helpers.c
new file mode 100644
index 00000000000..70298fc87f5
--- /dev/null
+++ b/xlators/features/quota/src/quotad-helpers.c
@@ -0,0 +1,107 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "quotad-helpers.h"
+
+quotad_aggregator_state_t *
+get_quotad_aggregator_state (xlator_t *this, rpcsvc_request_t *req)
+{
+ quotad_aggregator_state_t *state = NULL;
+ xlator_t *active_subvol = NULL;
+ quota_priv_t *priv = NULL;
+
+ state = (void *)GF_CALLOC (1, sizeof (*state),
+ gf_quota_mt_aggregator_state_t);
+ if (!state)
+ return NULL;
+
+ state->this = THIS;
+ priv = this->private;
+
+ LOCK (&priv->lock);
+ {
+ active_subvol = state->active_subvol = FIRST_CHILD (this);
+ }
+ UNLOCK (&priv->lock);
+
+ if (active_subvol->itable == NULL)
+ active_subvol->itable = inode_table_new (4096, active_subvol);
+
+ state->itable = active_subvol->itable;
+
+ state->pool = this->ctx->pool;
+
+ return state;
+}
+
+void
+quotad_aggregator_free_state (quotad_aggregator_state_t *state)
+{
+ if (state->xdata)
+ dict_unref (state->xdata);
+
+ GF_FREE (state);
+}
+
+call_frame_t *
+quotad_aggregator_alloc_frame (rpcsvc_request_t *req)
+{
+ call_frame_t *frame = NULL;
+ quotad_aggregator_state_t *state = NULL;
+ xlator_t *this = NULL;
+
+ GF_VALIDATE_OR_GOTO ("server", req, out);
+ GF_VALIDATE_OR_GOTO ("server", req->trans, out);
+ GF_VALIDATE_OR_GOTO ("server", req->svc, out);
+ GF_VALIDATE_OR_GOTO ("server", req->svc->ctx, out);
+
+ this = req->svc->xl;
+
+ frame = create_frame (this, req->svc->ctx->pool);
+ if (!frame)
+ goto out;
+
+ state = get_quotad_aggregator_state (this, req);
+ if (!state)
+ goto out;
+
+ frame->root->state = state;
+ frame->root->unique = 0;
+
+ frame->this = this;
+out:
+ return frame;
+}
+
+call_frame_t *
+quotad_aggregator_get_frame_from_req (rpcsvc_request_t *req)
+{
+ call_frame_t *frame = NULL;
+
+ GF_VALIDATE_OR_GOTO ("server", req, out);
+
+ frame = quotad_aggregator_alloc_frame (req);
+ if (!frame)
+ goto out;
+
+ frame->root->op = req->procnum;
+
+ frame->root->unique = req->xid;
+
+ frame->root->uid = req->uid;
+ frame->root->gid = req->gid;
+ frame->root->pid = req->pid;
+
+ frame->root->lk_owner = req->lk_owner;
+
+ frame->local = req;
+out:
+ return frame;
+}
diff --git a/xlators/features/quota/src/quotad-helpers.h b/xlators/features/quota/src/quotad-helpers.h
new file mode 100644
index 00000000000..a10fb7fa82a
--- /dev/null
+++ b/xlators/features/quota/src/quotad-helpers.h
@@ -0,0 +1,24 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef QUOTAD_HELPERS_H
+#define QUOTAD_HELPERS_H
+
+#include "rpcsvc.h"
+#include "quota.h"
+#include "quotad-aggregator.h"
+
+void
+quotad_aggregator_free_state (quotad_aggregator_state_t *state);
+
+call_frame_t *
+quotad_aggregator_get_frame_from_req (rpcsvc_request_t *req);
+
+#endif
diff --git a/xlators/features/quota/src/quotad.c b/xlators/features/quota/src/quotad.c
new file mode 100644
index 00000000000..dc2665e9622
--- /dev/null
+++ b/xlators/features/quota/src/quotad.c
@@ -0,0 +1,242 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "quota.h"
+#include "quotad-aggregator.h"
+#include "common-utils.h"
+
+int
+qd_notify (xlator_t *this, int32_t event, void *data, ...)
+{
+ switch (event) {
+ case GF_EVENT_PARENT_UP:
+ quotad_aggregator_init (this);
+ }
+
+ default_notify (this, event, data);
+ return 0;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_quota_mt_end + 1);
+
+ if (0 != ret) {
+ gf_log (this->name, GF_LOG_WARNING, "Memory accounting "
+ "init failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+int32_t
+qd_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ quotad_aggregator_lookup_cbk_t lookup_cbk = NULL;
+ gfs3_lookup_rsp rsp = {0, };
+
+ lookup_cbk = cookie;
+
+ rsp.op_ret = op_ret;
+ rsp.op_errno = op_errno;
+
+ gf_stat_from_iatt (&rsp.postparent, postparent);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&rsp.xdata.xdata_val),
+ rsp.xdata.xdata_len, rsp.op_errno, out);
+
+ gf_stat_from_iatt (&rsp.stat, buf);
+
+out:
+ lookup_cbk (this, frame, &rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ inode_unref (inode);
+
+ return 0;
+}
+
+xlator_t *
+qd_find_subvol (xlator_t *this, char *volume_uuid)
+{
+ xlator_list_t *child = NULL;
+ xlator_t *subvol = NULL;
+ char key[1024];
+ char *optstr = NULL;
+
+ if (!this || !volume_uuid)
+ goto out;
+
+ for (child = this->children; child; child = child->next) {
+ snprintf(key, 1024, "%s.volume-id", child->xlator->name);
+ if (dict_get_str(this->options, key, &optstr) < 0)
+ continue;
+
+ if (strcmp (optstr, volume_uuid) == 0) {
+ subvol = child->xlator;
+ break;
+ }
+ }
+
+out:
+ return subvol;
+}
+
+int
+qd_nameless_lookup (xlator_t *this, call_frame_t *frame, gfs3_lookup_req *req,
+ dict_t *xdata, quotad_aggregator_lookup_cbk_t lookup_cbk)
+{
+ gfs3_lookup_rsp rsp = {0, };
+ int op_errno = 0, ret = -1;
+ loc_t loc = {0, };
+ quotad_aggregator_state_t *state = NULL;
+ quota_priv_t *priv = NULL;
+ xlator_t *subvol = NULL;
+ char *volume_uuid = NULL;
+
+ priv = this->private;
+ state = frame->root->state;
+
+ frame->root->op = GF_FOP_LOOKUP;
+
+ loc.inode = inode_new (state->itable);
+ if (loc.inode == NULL) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ memcpy (loc.gfid, req->gfid, 16);
+
+ ret = dict_get_str (xdata, "volume-uuid", &volume_uuid);
+ if (ret < 0) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = dict_set_int8 (xdata, QUOTA_READ_ONLY_KEY, 1);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ Q_MSG_ENOMEM, "dict set failed");
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ subvol = qd_find_subvol (this, volume_uuid);
+ if (subvol == NULL) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ STACK_WIND_COOKIE (frame, qd_lookup_cbk, lookup_cbk, subvol,
+ subvol->fops->lookup, &loc, xdata);
+ return 0;
+
+out:
+ rsp.op_ret = -1;
+ rsp.op_errno = op_errno;
+
+ lookup_cbk (this, frame, &rsp);
+
+ inode_unref (loc.inode);
+ return 0;
+}
+
+int
+qd_reconfigure (xlator_t *this, dict_t *options)
+{
+ /* As of now quotad is restarted upon alteration of volfile */
+ return 0;
+}
+
+void
+qd_fini (xlator_t *this)
+{
+ quota_priv_t *priv = NULL;
+
+ if (this == NULL || this->private == NULL)
+ goto out;
+
+ priv = this->private;
+
+ if (priv->rpcsvc) {
+ GF_FREE (priv->rpcsvc);
+ priv->rpcsvc = NULL;
+ }
+
+ GF_FREE (priv);
+
+out:
+ return;
+}
+
+int32_t
+qd_init (xlator_t *this)
+{
+ int32_t ret = -1;
+ quota_priv_t *priv = NULL;
+
+ if (NULL == this->children) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "FATAL: quota (%s) not configured for min of 1 child",
+ this->name);
+ ret = -1;
+ goto err;
+ }
+
+ QUOTA_ALLOC_OR_GOTO (priv, quota_priv_t, err);
+ LOCK_INIT (&priv->lock);
+
+ this->private = priv;
+
+ ret = 0;
+err:
+ if (ret) {
+ GF_FREE (priv);
+ }
+ return ret;
+}
+
+class_methods_t class_methods = {
+ .init = qd_init,
+ .fini = qd_fini,
+ .reconfigure = qd_reconfigure,
+ .notify = qd_notify
+};
+
+struct xlator_fops fops = {
+};
+
+struct xlator_cbks cbks = {
+};
+
+struct volume_options options[] = {
+ { .key = {"transport-type"},
+ .value = {"rpc", "rpc-over-rdma", "tcp", "socket", "ib-verbs",
+ "unix", "ib-sdp", "tcp/server", "ib-verbs/server", "rdma",
+ "rdma*([ \t]),*([ \t])socket",
+ "rdma*([ \t]),*([ \t])tcp",
+ "tcp*([ \t]),*([ \t])rdma",
+ "socket*([ \t]),*([ \t])rdma"},
+ .type = GF_OPTION_TYPE_STR
+ },
+ { .key = {"transport.*"},
+ .type = GF_OPTION_TYPE_ANY,
+ },
+ {.key = {NULL}}
+};
diff --git a/xlators/features/quota/src/quotad.sym b/xlators/features/quota/src/quotad.sym
new file mode 100644
index 00000000000..0829ffe1584
--- /dev/null
+++ b/xlators/features/quota/src/quotad.sym
@@ -0,0 +1,7 @@
+fops
+cbks
+class_methods
+options
+mem_acct_init
+reconfigure
+dumpops
diff --git a/xlators/features/read-only/src/Makefile.am b/xlators/features/read-only/src/Makefile.am
index 15f49966ff7..3edac3f8a1d 100644
--- a/xlators/features/read-only/src/Makefile.am
+++ b/xlators/features/read-only/src/Makefile.am
@@ -1,13 +1,22 @@
-xlator_LTLIBRARIES = read-only.la
+xlator_LTLIBRARIES = read-only.la worm.la
+
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
-read_only_la_LDFLAGS = -module -avoidversion
+noinst_HEADERS = read-only.h read-only-mem-types.h read-only-common.h worm-helper.h
+
+read_only_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
-read_only_la_SOURCES = read-only.c
+read_only_la_SOURCES = read-only.c read-only-common.c
read_only_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+worm_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+worm_la_SOURCES = read-only-common.c worm-helper.c worm.c
+worm_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/features/read-only/src/read-only-common.c b/xlators/features/read-only/src/read-only-common.c
new file mode 100644
index 00000000000..ad2eaaa5e26
--- /dev/null
+++ b/xlators/features/read-only/src/read-only-common.c
@@ -0,0 +1,417 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "read-only.h"
+#include "read-only-mem-types.h"
+#include "defaults.h"
+
+gf_boolean_t
+is_readonly_or_worm_enabled (xlator_t *this)
+{
+ read_only_priv_t *priv = NULL;
+ gf_boolean_t readonly_or_worm_enabled = _gf_false;
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ readonly_or_worm_enabled = priv->readonly_or_worm_enabled;
+
+ return readonly_or_worm_enabled;
+}
+
+static int
+_check_key_is_zero_filled (dict_t *d, char *k, data_t *v,
+ void *tmp)
+{
+ if (mem_0filled ((const char *)v->data, v->len)) {
+ /* -1 means, no more iterations, treat as 'break' */
+ return -1;
+ }
+ return 0;
+}
+
+int32_t
+ro_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ gf_boolean_t allzero = _gf_false;
+ int ret = 0;
+
+ ret = dict_foreach (dict, _check_key_is_zero_filled, NULL);
+ if (ret == 0)
+ allzero = _gf_true;
+
+ if (is_readonly_or_worm_enabled (this) && !allzero)
+ STACK_UNWIND_STRICT (xattrop, frame, -1, EROFS, NULL, xdata);
+ else
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD(this)->fops->xattrop,
+ loc, flags, dict, xdata);
+ return 0;
+}
+
+int32_t
+ro_fxattrop (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ gf_boolean_t allzero = _gf_false;
+ int ret = 0;
+
+ ret = dict_foreach (dict, _check_key_is_zero_filled, NULL);
+ if (ret == 0)
+ allzero = _gf_true;
+
+ if (is_readonly_or_worm_enabled (this) && !allzero)
+ STACK_UNWIND_STRICT (fxattrop, frame, -1, EROFS, NULL, xdata);
+ else
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD(this)->fops->fxattrop,
+ fd, flags, dict, xdata);
+
+ return 0;
+}
+
+int32_t
+ro_entrylk (call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
+{
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD(this)->fops->entrylk,
+ volume, loc, basename, cmd, type, xdata);
+
+ return 0;
+}
+
+int32_t
+ro_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+{
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD(this)->fops->fentrylk,
+ volume, fd, basename, cmd, type, xdata);
+
+ return 0;
+}
+
+int32_t
+ro_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+{
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD(this)->fops->inodelk,
+ volume, loc, cmd, lock, xdata);
+
+ return 0;
+}
+
+int32_t
+ro_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+{
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD(this)->fops->finodelk,
+ volume, fd, cmd, lock, xdata);
+
+ return 0;
+}
+
+int32_t
+ro_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd,
+ struct gf_flock *flock, dict_t *xdata)
+{
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD(this)->fops->lk, fd, cmd, flock,
+ xdata);
+
+ return 0;
+}
+
+int32_t
+ro_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ if (is_readonly_or_worm_enabled (this))
+ STACK_UNWIND_STRICT (setattr, frame, -1, EROFS, NULL, NULL,
+ xdata);
+ else
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD(this)->fops->setattr, loc, stbuf,
+ valid, xdata);
+
+ return 0;
+}
+
+int32_t
+ro_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ if (is_readonly_or_worm_enabled (this))
+ STACK_UNWIND_STRICT (fsetattr, frame, -1, EROFS, NULL, NULL,
+ xdata);
+ else
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf,
+ valid, xdata);
+
+ return 0;
+}
+
+
+int32_t
+ro_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata)
+{
+ if (is_readonly_or_worm_enabled (this))
+ STACK_UNWIND_STRICT (truncate, frame, -1, EROFS, NULL, NULL,
+ xdata);
+ else
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset,
+ xdata);
+
+ return 0;
+}
+
+int32_t
+ro_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata)
+{
+ if (is_readonly_or_worm_enabled (this))
+ STACK_UNWIND_STRICT (ftruncate, frame, -1, EROFS, NULL, NULL,
+ xdata);
+ else
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset,
+ xdata);
+
+ return 0;
+}
+
+int
+ro_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ if (is_readonly_or_worm_enabled (this))
+ STACK_UNWIND_STRICT (mknod, frame, -1, EROFS, NULL, NULL, NULL,
+ NULL, xdata);
+ else
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD(this)->fops->mknod, loc, mode,
+ rdev, umask, xdata);
+
+ return 0;
+}
+
+
+int
+ro_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+ if (is_readonly_or_worm_enabled (this))
+ STACK_UNWIND_STRICT (mkdir, frame, -1, EROFS, NULL, NULL, NULL,
+ NULL, xdata);
+ else
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD(this)->fops->mkdir, loc, mode,
+ umask, xdata);
+
+ return 0;
+}
+
+int32_t
+ro_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
+{
+ if (is_readonly_or_worm_enabled (this))
+ STACK_UNWIND_STRICT (unlink, frame, -1, EROFS, NULL, NULL,
+ xdata);
+ else
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD(this)->fops->unlink, loc, xflag,
+ xdata);
+
+ return 0;
+}
+
+
+int
+ro_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
+{
+ if (is_readonly_or_worm_enabled (this))
+ STACK_UNWIND_STRICT (rmdir, frame, -1, EROFS, NULL, NULL,
+ xdata);
+ else
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD(this)->fops->rmdir, loc, flags,
+ xdata);
+
+ return 0;
+}
+
+
+int
+ro_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ if (is_readonly_or_worm_enabled (this))
+ STACK_UNWIND_STRICT (symlink, frame, -1, EROFS, NULL, NULL,
+ NULL, NULL, xdata);
+ else
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD(this)->fops->symlink, linkpath,
+ loc, umask, xdata);
+
+ return 0;
+}
+
+
+
+int32_t
+ro_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ if (is_readonly_or_worm_enabled (this))
+ STACK_UNWIND_STRICT (rename, frame, -1, EROFS, NULL, NULL, NULL,
+ NULL, NULL, xdata);
+ else
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD(this)->fops->rename, oldloc,
+ newloc, xdata);
+
+ return 0;
+}
+
+
+int32_t
+ro_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ if (is_readonly_or_worm_enabled (this))
+ STACK_UNWIND_STRICT (link, frame, -1, EROFS, NULL, NULL, NULL,
+ NULL, xdata);
+ else
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD(this)->fops->link, oldloc, newloc,
+ xdata);
+
+ return 0;
+}
+
+int32_t
+ro_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ if (is_readonly_or_worm_enabled (this))
+ STACK_UNWIND_STRICT (create, frame, -1, EROFS, NULL, NULL, NULL,
+ NULL, NULL, xdata);
+ else
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD(this)->fops->create, loc, flags,
+ mode, umask, fd, xdata);
+
+ return 0;
+}
+
+
+static int32_t
+ro_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+int32_t
+ro_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ if (is_readonly_or_worm_enabled (this) &&
+ (((flags & O_ACCMODE) == O_WRONLY) ||
+ ((flags & O_ACCMODE) == O_RDWR))) {
+ STACK_UNWIND_STRICT (open, frame, -1, EROFS, NULL, xdata);
+ return 0;
+ }
+
+ STACK_WIND (frame, ro_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
+}
+
+int32_t
+ro_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ if (is_readonly_or_worm_enabled (this))
+ STACK_UNWIND_STRICT (fsetxattr, frame, -1, EROFS, xdata);
+ else
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict,
+ flags, xdata);
+
+ return 0;
+}
+
+int32_t
+ro_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
+{
+ if (is_readonly_or_worm_enabled (this))
+ STACK_UNWIND_STRICT (fsyncdir, frame, -1, EROFS, xdata);
+ else
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD(this)->fops->fsyncdir, fd, flags,
+ xdata);
+
+ return 0;
+}
+
+int32_t
+ro_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t off, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
+{
+ if (is_readonly_or_worm_enabled (this))
+ STACK_UNWIND_STRICT (writev, frame, -1, EROFS, NULL, NULL,
+ xdata);
+ else
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD(this)->fops->writev, fd, vector,
+ count, off, flags, iobref, xdata);
+
+ return 0;
+}
+
+
+int32_t
+ro_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ if (is_readonly_or_worm_enabled (this))
+ STACK_UNWIND_STRICT (setxattr, frame, -1, EROFS, xdata);
+ else
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD(this)->fops->setxattr, loc, dict,
+ flags, xdata);
+
+ return 0;
+}
+
+int32_t
+ro_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ if (is_readonly_or_worm_enabled (this))
+ STACK_UNWIND_STRICT (removexattr, frame, -1, EROFS, xdata);
+ else
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD(this)->fops->removexattr, loc,
+ name, xdata);
+
+ return 0;
+}
diff --git a/xlators/features/read-only/src/read-only-common.h b/xlators/features/read-only/src/read-only-common.h
new file mode 100644
index 00000000000..248ca47b660
--- /dev/null
+++ b/xlators/features/read-only/src/read-only-common.h
@@ -0,0 +1,113 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "xlator.h"
+#include "defaults.h"
+
+gf_boolean_t
+is_readonly_or_worm_enabled (xlator_t *this);
+
+int32_t
+ro_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata);
+
+int32_t
+ro_fxattrop (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata);
+
+int32_t
+ro_entrylk (call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata);
+
+int32_t
+ro_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, const char *basename, entrylk_cmd cmd, entrylk_type
+ type, dict_t *xdata);
+
+int32_t
+ro_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata);
+
+int32_t
+ro_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata);
+
+int32_t
+ro_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd,
+ struct gf_flock *flock, dict_t *xdata);
+
+int32_t
+ro_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+
+int32_t
+ro_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+
+
+int32_t
+ro_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata);
+
+int32_t
+ro_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata);
+
+int
+ro_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata);
+
+int
+ro_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata);
+
+int32_t
+ro_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata);
+
+int
+ro_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata);
+
+
+int
+ro_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata);
+
+int32_t
+ro_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata);
+
+int32_t
+ro_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata);
+
+int32_t
+ro_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata);
+
+int32_t
+ro_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata);
+
+int32_t
+ro_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata);
+
+int32_t
+ro_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata);
+
+int32_t
+ro_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t off, uint32_t flags, struct iobref *iobref, dict_t *xdata);
+
+int32_t
+ro_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata);
+
+int32_t
+ro_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata);
diff --git a/xlators/features/read-only/src/read-only-mem-types.h b/xlators/features/read-only/src/read-only-mem-types.h
new file mode 100644
index 00000000000..940700a017d
--- /dev/null
+++ b/xlators/features/read-only/src/read-only-mem-types.h
@@ -0,0 +1,20 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __READONLY_MEM_TYPES_H__
+#define __READONLY_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_read_only_mem_types_ {
+ gf_read_only_mt_priv_t = gf_common_mt_end + 1,
+ gf_read_only_mt_end
+};
+#endif
diff --git a/xlators/features/read-only/src/read-only.c b/xlators/features/read-only/src/read-only.c
index b8ba9218415..8733a40abce 100644
--- a/xlators/features/read-only/src/read-only.c
+++ b/xlators/features/read-only/src/read-only.c
@@ -1,251 +1,37 @@
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "xlator.h"
#include "defaults.h"
+#include "read-only-common.h"
+#include "read-only-mem-types.h"
+#include "read-only.h"
int32_t
-ro_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict)
-{
- STACK_UNWIND_STRICT (xattrop, frame, -1, EROFS, NULL);
- return 0;
-}
-
-int32_t
-ro_fxattrop (call_frame_t *frame, xlator_t *this,
- fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict)
-{
- STACK_UNWIND_STRICT (fxattrop, frame, -1, EROFS, NULL);
- return 0;
-}
-
-int32_t
-ro_entrylk (call_frame_t *frame, xlator_t *this, const char *volume,
- loc_t *loc, const char *basename, entrylk_cmd cmd,
- entrylk_type type)
-{
- STACK_UNWIND_STRICT (entrylk, frame, -1, EROFS);
- return 0;
-}
-
-int32_t
-ro_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume,
- fd_t *fd, const char *basename, entrylk_cmd cmd, entrylk_type type)
-{
- STACK_UNWIND_STRICT (fentrylk, frame, -1, EROFS);
- return 0;
-}
-
-int32_t
-ro_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
- loc_t *loc, int32_t cmd, struct flock *lock)
-{
- STACK_UNWIND_STRICT (inodelk, frame, -1, EROFS);
- return 0;
-}
-
-int32_t
-ro_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
- fd_t *fd, int32_t cmd, struct flock *lock)
-{
- STACK_UNWIND_STRICT (finodelk, frame, -1, EROFS);
- return 0;
-}
-
-int32_t
-ro_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd,
- struct flock *flock)
-{
- STACK_UNWIND_STRICT (lk, frame, -1, EROFS, NULL);
- return 0;
-}
-
-int32_t
-ro_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid)
-{
- STACK_UNWIND_STRICT (setattr, frame, -1, EROFS, NULL, NULL);
- return 0;
-}
-
-int32_t
-ro_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid)
-{
- STACK_UNWIND_STRICT (fsetattr, frame, -1, EROFS, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-ro_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
-{
- STACK_UNWIND_STRICT (truncate, frame, -1, EROFS, NULL, NULL);
- return 0;
-}
-
-int32_t
-ro_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
-{
- STACK_UNWIND_STRICT (ftruncate, frame, -1, EROFS, NULL, NULL);
- return 0;
-}
-
-int32_t
-ro_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev)
-{
- STACK_UNWIND_STRICT (mknod, frame, -1, EROFS, NULL, NULL, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-ro_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode)
-{
- STACK_UNWIND_STRICT (mkdir, frame, -1, EROFS, NULL, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-ro_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- STACK_UNWIND_STRICT (unlink, frame, -1, EROFS, NULL, NULL);
- return 0;
-}
-
-int32_t
-ro_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- STACK_UNWIND_STRICT (rmdir, frame, -1, EROFS, NULL, NULL);
- return 0;
-}
-
-int32_t
-ro_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
- loc_t *loc)
-{
- STACK_UNWIND_STRICT (symlink, frame, -1, EROFS, NULL, NULL, NULL, NULL);
- return 0;
-}
-
-
-
-int32_t
-ro_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
-{
- STACK_UNWIND_STRICT (rename, frame, -1, EROFS, NULL, NULL, NULL, NULL,
- NULL);
- return 0;
-}
-
-
-int32_t
-ro_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
-{
- STACK_UNWIND_STRICT (link, frame, -1, EROFS, NULL, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-ro_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, fd_t *fd)
-{
- STACK_UNWIND_STRICT (create, frame, -1, EROFS, NULL, NULL, NULL,
- NULL, NULL);
- return 0;
-}
-
-
-static int32_t
-ro_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, fd_t *fd)
-{
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd);
- return 0;
-}
-
-int32_t
-ro_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int32_t wbflags)
-{
- if (((flags & O_ACCMODE) == O_WRONLY) ||
- ((flags & O_ACCMODE) == O_RDWR)) {
- STACK_UNWIND_STRICT (open, frame, -1, EROFS, NULL);
- return 0;
- }
-
- STACK_WIND (frame, ro_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, loc, flags, fd, wbflags);
- return 0;
-}
-
-int32_t
-ro_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
- int32_t flags)
+mem_acct_init (xlator_t *this)
{
- STACK_UNWIND_STRICT (fsetxattr, frame, -1, EROFS);
- return 0;
-}
-
-int32_t
-ro_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
-{
- STACK_UNWIND_STRICT (fsyncdir, frame, -1, EROFS);
- return 0;
-}
-
-int32_t
-ro_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
- int32_t count, off_t off, struct iobref *iobref)
-{
- STACK_UNWIND_STRICT (writev, frame, -1, EROFS, NULL, NULL);
- return 0;
-}
-
+ int ret = -1;
-int32_t
-ro_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
- int32_t flags)
-{
- STACK_UNWIND_STRICT (setxattr, frame, -1, EROFS);
- return 0;
-}
+ ret = xlator_mem_acct_init (this, gf_read_only_mt_end + 1);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting "
+ "initialization failed.");
-int32_t
-ro_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
-{
- STACK_UNWIND_STRICT (removexattr, frame, -1, EROFS);
- return 0;
+ return ret;
}
int32_t
init (xlator_t *this)
{
- if (!this->children || this->children->next) {
+ int ret = -1;
+ read_only_priv_t *priv = NULL;
+
+ if (!this->children || this->children->next) {
gf_log (this->name, GF_LOG_ERROR,
"translator not configured with exactly one child");
return -1;
@@ -256,14 +42,50 @@ init (xlator_t *this)
"dangling volume. check volfile ");
}
- return 0;
+ priv = GF_CALLOC (1, sizeof (*priv), gf_read_only_mt_priv_t);
+ if (!priv)
+ goto out;
+
+ GF_OPTION_INIT ("read-only", priv->readonly_or_worm_enabled, bool, out);
+
+ this->private = priv;
+ ret = 0;
+out:
+ return ret;
}
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ read_only_priv_t *priv = NULL;
+ int ret = -1;
+ gf_boolean_t readonly_or_worm_enabled = _gf_false;
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ GF_OPTION_RECONF ("read-only", readonly_or_worm_enabled, options, bool,
+ out);
+ priv->readonly_or_worm_enabled = readonly_or_worm_enabled;
+ ret = 0;
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "returning %d", ret);
+ return ret;
+}
void
fini (xlator_t *this)
{
- return;
+ read_only_priv_t *priv = NULL;
+
+ priv = this->private;
+ if (!priv)
+ return;
+
+ this->private = NULL;
+ GF_FREE (priv);
+
+ return;
}
@@ -299,5 +121,10 @@ struct xlator_cbks cbks = {
};
struct volume_options options[] = {
- { .key = {NULL} },
+ { .key = {"read-only"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "When \"on\", makes a volume read-only. It is turned "
+ "\"off\" by default."
+ },
};
diff --git a/xlators/features/read-only/src/read-only.h b/xlators/features/read-only/src/read-only.h
new file mode 100644
index 00000000000..d0263e74179
--- /dev/null
+++ b/xlators/features/read-only/src/read-only.h
@@ -0,0 +1,37 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __READONLY_H__
+#define __READONLY_H__
+
+#include "read-only-mem-types.h"
+#include "xlator.h"
+
+
+typedef struct {
+ uint8_t worm : 1;
+ uint8_t retain : 1;
+ uint8_t legal_hold :1;
+ uint8_t ret_mode : 1;
+ uint64_t ret_period;
+ uint64_t auto_commit_period;
+} worm_reten_state_t;
+
+
+typedef struct {
+ gf_boolean_t readonly_or_worm_enabled;
+ gf_boolean_t worm_file;
+ uint64_t reten_period;
+ uint64_t com_period;
+ char *reten_mode;
+ time_t start_time;
+} read_only_priv_t;
+
+#endif
diff --git a/xlators/features/read-only/src/worm-helper.c b/xlators/features/read-only/src/worm-helper.c
new file mode 100644
index 00000000000..61aa4f02651
--- /dev/null
+++ b/xlators/features/read-only/src/worm-helper.c
@@ -0,0 +1,413 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "read-only-mem-types.h"
+#include "read-only.h"
+#include "xlator.h"
+#include "syncop.h"
+#include "worm-helper.h"
+
+/*Function to check whether file is read-only.
+ * The input *stbuf contains the attributes of the file, which is used to check
+ * the write protection bits for all the users of the file.
+ * Return true if all the write bits are disabled,false otherwise*/
+gf_boolean_t
+gf_worm_write_disabled (struct iatt *stbuf)
+{
+ gf_boolean_t ret = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("worm", stbuf, out);
+
+ if (stbuf->ia_prot.owner.write == 0 &&
+ stbuf->ia_prot.group.write == 0 &&
+ stbuf->ia_prot.other.write == 0)
+ ret = _gf_true;
+out:
+ return ret;
+}
+
+
+int32_t
+worm_init_state (xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr)
+{
+ int ret = -1;
+ uint64_t start_time = 0;
+ dict_t *dict = NULL;
+
+ GF_VALIDATE_OR_GOTO ("worm", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, file_ptr, out);
+
+ start_time = time (NULL);
+ dict = dict_new ();
+ if (!dict) {
+ gf_log (this->name, GF_LOG_ERROR, "Error creating the dict");
+ goto out;
+ }
+ ret = dict_set_uint64 (dict, "trusted.start_time", start_time);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Error in setting the dict");
+ goto out;
+ }
+ if (fop_with_fd)
+ ret = syncop_fsetxattr (this, (fd_t *)file_ptr, dict, 0,
+ NULL, NULL);
+ else
+ ret = syncop_setxattr (this, (loc_t *)file_ptr, dict, 0, NULL,
+ NULL);
+out:
+ if (dict)
+ dict_destroy (dict);
+ return ret;
+}
+
+
+/*Function to set the retention state for a file.
+ * It loads the WORM/Retention state into the retention_state pointer.*/
+int32_t
+worm_set_state (xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr,
+ worm_reten_state_t *retention_state, struct iatt *stbuf)
+{
+ read_only_priv_t *priv = NULL;
+ struct iatt stpre = {0,};
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("worm", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, file_ptr, out);
+ GF_VALIDATE_OR_GOTO (this->name, retention_state, out);
+ GF_VALIDATE_OR_GOTO (this->name, stbuf, out);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+ retention_state->worm = 1;
+ retention_state->retain = 1;
+ retention_state->legal_hold = 0;
+ if (strcmp (priv->reten_mode, "relax") == 0)
+ retention_state->ret_mode = 0;
+ else
+ retention_state->ret_mode = 1;
+ retention_state->ret_period = priv->reten_period;
+ retention_state->auto_commit_period = priv->com_period;
+ if (fop_with_fd)
+ ret = syncop_fstat (this, (fd_t *)file_ptr, &stpre, NULL, NULL);
+ else
+ ret = syncop_stat (this, (loc_t *)file_ptr, &stpre, NULL, NULL);
+ if (ret)
+ goto out;
+ stbuf->ia_mtime = stpre.ia_mtime;
+ stbuf->ia_atime = time (NULL) + retention_state->ret_period;
+
+ if (fop_with_fd)
+ ret = syncop_fsetattr (this, (fd_t *)file_ptr, stbuf,
+ GF_SET_ATTR_ATIME, NULL, NULL,
+ NULL, NULL);
+ else
+ ret = syncop_setattr (this, (loc_t *)file_ptr, stbuf,
+ GF_SET_ATTR_ATIME, NULL, NULL,
+ NULL, NULL);
+ if (ret)
+ goto out;
+
+ ret = gf_worm_set_xattr (this, retention_state, fop_with_fd, file_ptr);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Error setting xattr");
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+
+/*This function gets the state of the WORM/Retention xattr and loads it in the
+ * dict pointer.*/
+int32_t
+worm_get_state (xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr,
+ worm_reten_state_t *reten_state)
+{
+ dict_t *dict = NULL;
+ char *val = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("worm", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, file_ptr, out);
+ GF_VALIDATE_OR_GOTO (this->name, reten_state, out);
+
+ if (fop_with_fd)
+ ret = syncop_fgetxattr (this, (fd_t *)file_ptr, &dict,
+ "trusted.reten_state", NULL, NULL);
+ else
+ ret = syncop_getxattr (this, (loc_t *)file_ptr, &dict,
+ "trusted.reten_state", NULL, NULL);
+ if (ret < 0 || !dict) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_get_str (dict, "trusted.reten_state", &val);
+ if (ret) {
+ ret = -2;
+ gf_log (this->name, GF_LOG_ERROR, "Empty val");
+ }
+ gf_worm_deserialize_state (val, reten_state);
+out:
+ if (dict)
+ dict_unref (dict);
+ return ret;
+}
+
+
+/*Function to lookup the current state of the WORM/Retention profile.
+ * Based on the retain value and the access time of the file, the transition
+ * from WORM/Retention to WORM is made.*/
+void
+gf_worm_state_lookup (xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr,
+ worm_reten_state_t *reten_state, struct iatt *stbuf)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("worm", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, file_ptr, out);
+ GF_VALIDATE_OR_GOTO (this->name, reten_state, out);
+ GF_VALIDATE_OR_GOTO (this->name, stbuf, out);
+
+ stbuf->ia_atime -= reten_state->ret_period;
+ reten_state->retain = 0;
+ reten_state->ret_period = 0;
+ reten_state->auto_commit_period = 0;
+ ret = gf_worm_set_xattr (this, reten_state, fop_with_fd, file_ptr);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Error setting xattr");
+ goto out;
+ }
+
+ if (fop_with_fd)
+ ret = syncop_fsetattr (this, (fd_t *)file_ptr, stbuf,
+ GF_SET_ATTR_ATIME, NULL, NULL,
+ NULL, NULL);
+ else
+ ret = syncop_setattr (this, (loc_t *)file_ptr, stbuf,
+ GF_SET_ATTR_ATIME, NULL, NULL,
+ NULL, NULL);
+ if (ret)
+ goto out;
+ gf_log (this->name, GF_LOG_INFO, "Retention state reset");
+out:
+ return;
+}
+
+
+/*This function serializes and stores the WORM/Retention state of a file in an
+ * uint64_t variable by setting the bits using the bitwise operations.*/
+void
+gf_worm_serialize_state (worm_reten_state_t *reten_state, char *val)
+{
+ uint32_t state = 0;
+
+ GF_VALIDATE_OR_GOTO ("worm", reten_state, out);
+ GF_VALIDATE_OR_GOTO ("worm", val, out);
+
+ state |= reten_state->worm << 0;
+ state |= reten_state->retain << 1;
+ state |= reten_state->legal_hold << 2;
+ state |= reten_state->ret_mode << 3;
+ sprintf (val, "%d/%"PRIu64"/%"PRIu64, state, reten_state->ret_period,
+ reten_state->auto_commit_period);
+
+out:
+ return;
+}
+
+
+/*This function deserializes the data stored in the xattr of the file and loads
+ * the value to the reten_state structure.*/
+void
+gf_worm_deserialize_state (char *val, worm_reten_state_t *reten_state)
+{
+ char *token = NULL;
+ uint32_t state = 0;
+
+ GF_VALIDATE_OR_GOTO ("worm", val, out);
+ GF_VALIDATE_OR_GOTO ("worm", reten_state, out);
+
+ token = strtok (val, "/");
+ state = atoi (token);
+ reten_state->worm = (state >> 0) & 1;
+ reten_state->retain = (state >> 1) & 1;
+ reten_state->legal_hold = (state >> 2) & 1;
+ reten_state->ret_mode = (state >> 3) & 1;
+ token = strtok (NULL, "/");
+ reten_state->ret_period = atoi (token);
+ token = strtok (NULL, "/");
+ reten_state->auto_commit_period = atoi (token);
+
+out:
+ return;
+}
+
+
+/*Function to set the xattr for a file.
+ * If the xattr is already present then it will replace that.*/
+int32_t
+gf_worm_set_xattr (xlator_t *this, worm_reten_state_t *reten_state,
+ gf_boolean_t fop_with_fd, void *file_ptr)
+{
+ char val[100] = "";
+ int ret = -1;
+ dict_t *dict = NULL;
+
+ GF_VALIDATE_OR_GOTO ("worm", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, reten_state, out);
+ GF_VALIDATE_OR_GOTO (this->name, file_ptr, out);
+
+ gf_worm_serialize_state (reten_state, val);
+ dict = dict_new ();
+ if (!dict) {
+ gf_log (this->name, GF_LOG_ERROR, "Error creating the dict");
+ goto out;
+ }
+ ret = dict_set_str (dict, "trusted.reten_state", val);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Error in setting the dict");
+ goto out;
+ }
+ if (fop_with_fd)
+ ret = syncop_fsetxattr (this, (fd_t *)file_ptr, dict, 0,
+ NULL, NULL);
+ else
+ ret = syncop_setxattr (this, (loc_t *)file_ptr, dict, 0, NULL,
+ NULL);
+out:
+ if (dict)
+ dict_destroy (dict);
+ return ret;
+}
+
+
+/*This function checks whether a file's timeout is happend for the state
+ * transition and if yes, then it will do the transition from the current state
+ * to the appropriate state. It also decides whether to continue or to block
+ * the FOP.
+ * Return:
+ * 0 : If the FOP should continue i.e., if the file is not in the WORM-Retained
+ * state or if the FOP is unlink and the file is not in the Retained state.
+ * 1: If the FOP sholud block i.e., if the file is in WORM-Retained/WORM state.
+ * 2: Blocks the FOP if any operation fails while doing the state transition or
+ * fails to get the state of the file.*/
+int
+gf_worm_state_transition (xlator_t *this, gf_boolean_t fop_with_fd,
+ void *file_ptr, glusterfs_fop_t op)
+{
+ int op_errno = EROFS;
+ int ret = -1;
+ uint64_t com_period = 0;
+ uint64_t start_time = 0;
+ dict_t *dict = NULL;
+ worm_reten_state_t reten_state = {0,};
+ read_only_priv_t *priv = NULL;
+ struct iatt stbuf = {0,};
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ if (fop_with_fd)
+ ret = syncop_fgetxattr (this, (fd_t *)file_ptr, &dict,
+ "trusted.start_time", NULL, NULL);
+ else
+ ret = syncop_getxattr (this, (loc_t *)file_ptr, &dict,
+ "trusted.start_time", NULL, NULL);
+ if (ret < 0 || !dict) {
+ op_errno = ret;
+ gf_msg (this->name, GF_LOG_ERROR, -ret, 0,
+ "Error getting xattr");
+ goto out;
+ }
+ ret = dict_get_uint64 (dict, "trusted.start_time", &start_time);
+ if (ret) {
+ op_errno = ret;
+ gf_msg (this->name, GF_LOG_ERROR, -ret, 0,
+ "Error getting start time");
+ goto out;
+ }
+
+ com_period = priv->com_period;
+ if (fop_with_fd)
+ ret = syncop_fstat (this, (fd_t *)file_ptr, &stbuf, NULL, NULL);
+ else
+ ret = syncop_stat (this, (loc_t *)file_ptr, &stbuf, NULL, NULL);
+ if (ret) {
+ op_errno = ret;
+ gf_msg (this->name, GF_LOG_ERROR, -ret, 0,
+ "Error getting file stat");
+ goto out;
+ }
+
+ ret = worm_get_state (this, fop_with_fd, file_ptr, &reten_state);
+ if (ret == -2) {
+ op_errno = ret;
+ gf_msg (this->name, GF_LOG_ERROR, -ret, 0,
+ "Error getting worm/retention state");
+ goto out;
+ }
+
+ if (ret == -1 && (time (NULL) - start_time) >= com_period) {
+ if ((time (NULL) - stbuf.ia_mtime) >= com_period) {
+ ret = worm_set_state(this, fop_with_fd, file_ptr,
+ &reten_state, &stbuf);
+ if (ret) {
+ op_errno = ret;
+ gf_msg (this->name, GF_LOG_ERROR, -ret, 0,
+ "Error setting worm/retention state");
+ goto out;
+ }
+ goto out;
+ } else {
+ op_errno = 0;
+ goto out;
+ }
+ } else if (ret == -1 && (time (NULL) - start_time)
+ < com_period) {
+ op_errno = 0;
+ goto out;
+ } else if (reten_state.retain &&
+ ((time (NULL) >= stbuf.ia_atime))) {
+ gf_worm_state_lookup (this, fop_with_fd, file_ptr,
+ &reten_state, &stbuf);
+ }
+ if (reten_state.worm && !reten_state.retain &&
+ op == GF_FOP_UNLINK) {
+ op_errno = 0;
+ goto out;
+ }
+
+out:
+ if (dict)
+ dict_unref (dict);
+ return op_errno;
+}
+
+
+/*Function to check whether a file is independently WORMed (i.e., file level
+ * WORM is set on the file). */
+int32_t
+is_wormfile (xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr)
+{
+ int ret = -1;
+ dict_t *dict = NULL;
+
+ if (fop_with_fd)
+ ret = syncop_fgetxattr (this, (fd_t *)file_ptr, &dict,
+ "trusted.worm_file", NULL, NULL);
+ else
+ ret = syncop_getxattr (this, (loc_t *)file_ptr, &dict,
+ "trusted.worm_file", NULL, NULL);
+ if (dict) {
+ ret = 0;
+ dict_unref (dict);
+ }
+ return ret;
+} \ No newline at end of file
diff --git a/xlators/features/read-only/src/worm-helper.h b/xlators/features/read-only/src/worm-helper.h
new file mode 100644
index 00000000000..745df8294c3
--- /dev/null
+++ b/xlators/features/read-only/src/worm-helper.h
@@ -0,0 +1,37 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+gf_boolean_t gf_worm_write_disabled (struct iatt *stbuf);
+
+int32_t worm_init_state (xlator_t *this, gf_boolean_t fop_with_fd,
+ void *file_ptr);
+
+int32_t worm_set_state (xlator_t *this, gf_boolean_t fop_with_fd,
+ void *file_ptr, worm_reten_state_t *retention_state,
+ struct iatt *stbuf);
+
+int32_t worm_get_state (xlator_t *this, gf_boolean_t fop_with_fd,
+ void *file_ptr, worm_reten_state_t *reten_state);
+
+void gf_worm_state_lookup (xlator_t *this, gf_boolean_t fop_with_fd,
+ void *file_ptr, worm_reten_state_t *reten_state,
+ struct iatt *stbuf);
+
+void gf_worm_serialize_state (worm_reten_state_t *reten_state, char *val);
+
+void gf_worm_deserialize_state (char *val, worm_reten_state_t *reten_state);
+
+int32_t gf_worm_set_xattr (xlator_t *this, worm_reten_state_t *reten_state,
+ gf_boolean_t fop_with_fd, void *file_ptr);
+
+int gf_worm_state_transition (xlator_t *this, gf_boolean_t fop_with_fd,
+ void *file_ptr, glusterfs_fop_t op);
+
+int32_t is_wormfile (xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr);
diff --git a/xlators/features/read-only/src/worm.c b/xlators/features/read-only/src/worm.c
new file mode 100644
index 00000000000..3e32d65dbac
--- /dev/null
+++ b/xlators/features/read-only/src/worm.c
@@ -0,0 +1,606 @@
+/*
+ Copyright (c) 2008-2012, 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "xlator.h"
+#include "defaults.h"
+#include "read-only-common.h"
+#include "read-only-mem-types.h"
+#include "read-only.h"
+#include "syncop.h"
+#include "worm-helper.h"
+
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ ret = xlator_mem_acct_init (this, gf_read_only_mt_end + 1);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting "
+ "initialization failed.");
+
+ return ret;
+}
+
+
+static int32_t
+worm_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ if (is_readonly_or_worm_enabled (this) &&
+ (flags & (O_WRONLY | O_RDWR | O_APPEND))) {
+ STACK_UNWIND_STRICT (open, frame, -1, EROFS, NULL, NULL);
+ return 0;
+ }
+
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
+}
+
+
+static int32_t
+worm_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ int op_errno = EROFS;
+ read_only_priv_t *priv = NULL;
+
+ priv = this->private;
+ GF_ASSERT (priv);
+ if (is_readonly_or_worm_enabled (this))
+ goto out;
+ if (!priv->worm_file) {
+ op_errno = 0;
+ goto out;
+ }
+
+ gf_uuid_copy (oldloc->gfid, oldloc->inode->gfid);
+ if (is_wormfile (this, _gf_false, oldloc)) {
+ op_errno = 0;
+ goto out;
+ }
+ op_errno = gf_worm_state_transition (this, _gf_false, oldloc,
+ GF_FOP_LINK);
+
+out:
+ if (op_errno)
+ STACK_UNWIND_STRICT (link, frame, -1, op_errno, NULL, NULL,
+ NULL, NULL, NULL);
+ else
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link,
+ oldloc, newloc, xdata);
+ return 0;
+}
+
+
+static int32_t
+worm_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ dict_t *xdata)
+{
+ int op_errno = EROFS;
+ read_only_priv_t *priv = NULL;
+
+ priv = this->private;
+ GF_ASSERT (priv);
+ if (is_readonly_or_worm_enabled (this)) {
+ goto out;
+ }
+ if (!priv->worm_file) {
+ op_errno = 0;
+ goto out;
+ }
+
+ gf_uuid_copy (loc->gfid, loc->inode->gfid);
+ if (is_wormfile (this, _gf_false, loc)) {
+ op_errno = 0;
+ goto out;
+ }
+ op_errno = gf_worm_state_transition (this, _gf_false, loc,
+ GF_FOP_UNLINK);
+out:
+ if (op_errno)
+ STACK_UNWIND_STRICT (unlink, frame, -1, op_errno, NULL, NULL,
+ NULL);
+ else
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink,
+ loc, flags, xdata);
+ return 0;
+}
+
+
+static int32_t
+worm_rename (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ int op_errno = EROFS;
+ read_only_priv_t *priv = NULL;
+
+ priv = this->private;
+ GF_ASSERT (priv);
+ if (is_readonly_or_worm_enabled (this))
+ goto out;
+ if (!priv->worm_file) {
+ op_errno = 0;
+ goto out;
+ }
+
+ gf_uuid_copy (oldloc->gfid, oldloc->inode->gfid);
+ if (is_wormfile (this, _gf_false, oldloc)) {
+ op_errno = 0;
+ goto out;
+ }
+ op_errno = gf_worm_state_transition (this, _gf_false, oldloc,
+ GF_FOP_RENAME);
+
+out:
+ if (op_errno)
+ STACK_UNWIND_STRICT (rename, frame, -1, op_errno, NULL,
+ NULL, NULL, NULL, NULL, NULL);
+ else
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->rename,
+ oldloc, newloc, xdata);
+ return 0;
+}
+
+
+static int32_t
+worm_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ int op_errno = EROFS;
+ read_only_priv_t *priv = NULL;
+
+ priv = this->private;
+ GF_ASSERT (priv);
+ if (is_readonly_or_worm_enabled (this))
+ goto out;
+ if (!priv->worm_file) {
+ op_errno = 0;
+ goto out;
+ }
+
+ if (is_wormfile (this, _gf_false, loc)) {
+ op_errno = 0;
+ goto out;
+ }
+ op_errno = gf_worm_state_transition (this, _gf_false, loc,
+ GF_FOP_TRUNCATE);
+
+out:
+ if (op_errno)
+ STACK_UNWIND_STRICT (truncate, frame, -1, op_errno, NULL, NULL,
+ NULL);
+ else
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->truncate,
+ loc, offset, xdata);
+ return 0;
+}
+
+
+static int32_t
+worm_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ gf_boolean_t rd_only = _gf_false;
+ worm_reten_state_t reten_state = {0,};
+ struct iatt stpre = {0,};
+ read_only_priv_t *priv = NULL;
+ int op_errno = EROFS;
+ int ret = -1;
+
+ priv = this->private;
+ GF_ASSERT (priv);
+ if (!priv->worm_file) {
+ op_errno = 0;
+ goto out;
+ }
+
+ if (is_wormfile (this, _gf_false, loc)) {
+ op_errno = 0;
+ goto out;
+ }
+ if (valid & GF_SET_ATTR_MODE) {
+ rd_only = gf_worm_write_disabled (stbuf);
+ if (!rd_only) {
+ op_errno = 0;
+ goto out;
+ }
+
+ ret = worm_set_state (this, _gf_false, loc,
+ &reten_state, stbuf);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error setting worm state");
+ goto out;
+ }
+ } else if (valid & GF_SET_ATTR_ATIME) {
+ ret = worm_get_state (this, _gf_false, loc, &reten_state);
+ if (ret) {
+ op_errno = 0;
+ goto out;
+ }
+ if (reten_state.retain) {
+ ret = syncop_stat (this, loc, &stpre, NULL, NULL);
+ if (ret)
+ goto out;
+ if (reten_state.ret_mode == 0) {
+ if (stbuf->ia_atime < stpre.ia_mtime) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Cannot set atime less than "
+ "the mtime for a WORM-Retained "
+ "file");
+ goto out;
+ }
+ } else {
+ if (stbuf->ia_atime < stpre.ia_atime) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Cannot decrease the atime of a"
+ " WORM-Retained file in "
+ "Enterprise mode");
+ goto out;
+ }
+ }
+ stbuf->ia_mtime = stpre.ia_mtime;
+ }
+ }
+ op_errno = 0;
+
+out:
+ if (op_errno)
+ STACK_UNWIND_STRICT (setattr, frame, -1, EROFS, NULL, NULL,
+ NULL);
+ else
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setattr,
+ loc, stbuf, valid, xdata);
+ return 0;
+}
+
+
+static int32_t
+worm_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ gf_boolean_t rd_only = _gf_false;
+ worm_reten_state_t reten_state = {0,};
+ struct iatt stpre = {0,};
+ read_only_priv_t *priv = NULL;
+ int op_errno = EROFS;
+ int ret = -1;
+
+ priv = this->private;
+ GF_ASSERT (priv);
+ if (!priv->worm_file) {
+ op_errno = 0;
+ goto out;
+ }
+
+ if (is_wormfile (this, _gf_true, fd)) {
+ op_errno = 0;
+ goto out;
+ }
+ if (valid & GF_SET_ATTR_MODE) {
+ rd_only = gf_worm_write_disabled (stbuf);
+ if (!rd_only) {
+ op_errno = 0;
+ goto out;
+ }
+
+ ret = worm_set_state (this, _gf_true, fd,
+ &reten_state, stbuf);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error setting worm state");
+ goto out;
+ }
+ } else if (valid & GF_SET_ATTR_ATIME) {
+ ret = worm_get_state (this, _gf_true, fd, &reten_state);
+ if (ret) {
+ op_errno = 0;
+ goto out;
+ }
+ if (reten_state.retain) {
+ ret = syncop_fstat (this, fd, &stpre, NULL, NULL);
+ if (ret)
+ goto out;
+ if (reten_state.ret_mode == 0) {
+ if (stbuf->ia_atime < stpre.ia_mtime) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Cannot set atime less than "
+ "the mtime for a WORM-Retained "
+ "file");
+ goto out;
+ }
+ } else {
+ if (stbuf->ia_atime < stpre.ia_atime) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Cannot decrease the atime of a"
+ " WORM-Retained file in "
+ "Enterprise mode");
+ goto out;
+ }
+ }
+ stbuf->ia_mtime = stpre.ia_mtime;
+ }
+ }
+ op_errno = 0;
+
+out:
+ if (op_errno)
+ STACK_UNWIND_STRICT (fsetattr, frame, -1, op_errno, NULL, NULL,
+ NULL);
+ else
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fsetattr,
+ fd, stbuf, valid, xdata);
+ return 0;
+}
+
+
+static int32_t
+worm_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ worm_reten_state_t reten_state = {0,};
+ read_only_priv_t *priv = NULL;
+ int op_errno = EROFS;
+ int ret = -1;
+
+ priv = this->private;
+ GF_ASSERT (priv);
+ if (!priv->worm_file) {
+ op_errno = 0;
+ goto out;
+ }
+ if (is_wormfile (this, _gf_true, fd)) {
+ op_errno = 0;
+ goto out;
+ }
+ ret = worm_get_state (this, _gf_true, fd, &reten_state);
+ if (ret) {
+ if (ret == -1)
+ op_errno = 0;
+ goto out;
+ }
+ if (!reten_state.worm)
+ op_errno = 0;
+
+out:
+ if (op_errno)
+ STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL,
+ NULL);
+ else
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->writev,
+ fd, vector, count, offset, flags, iobref,
+ xdata);
+ return 0;
+}
+
+static int32_t
+worm_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ int ret = 0;
+ read_only_priv_t *priv = NULL;
+ dict_t *dict = NULL;
+
+ priv = this->private;
+ GF_ASSERT (priv);
+ if (priv->worm_file) {
+ dict = dict_new ();
+ if (!dict) {
+ gf_log (this->name, GF_LOG_ERROR, "Error creating the "
+ "dict");
+ goto out;
+ }
+ ret = dict_set_int8 (dict, "trusted.worm_file", 1);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Error in setting "
+ "the dict");
+ goto out;
+ }
+ ret = syncop_fsetxattr (this, fd, dict, 0, NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error setting xattr");
+ goto out;
+ }
+ ret = worm_init_state (this, _gf_true, fd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error initializing state");
+ }
+ }
+
+out:
+ STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+ if (dict)
+ dict_destroy (dict);
+ return ret;
+}
+
+
+static int32_t
+worm_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ STACK_WIND (frame, worm_create_cbk, FIRST_CHILD (this),
+ FIRST_CHILD(this)->fops->create, loc, flags,
+ mode, umask, fd, xdata);
+ return 0;
+}
+
+
+int32_t
+init (xlator_t *this)
+{
+ int ret = -1;
+ read_only_priv_t *priv = NULL;
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "translator not configured with exactly one child");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ this->local_pool = mem_pool_new (read_only_priv_t, 64);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create read_only_priv_t's memory pool");
+ goto out;
+ }
+
+ priv = mem_get0 (this->local_pool);
+ if (!priv) {
+ gf_log (this->name, GF_LOG_ERROR, "Error allocating priv");
+ goto out;
+ }
+
+ priv->reten_mode = mem_get0 (this->local_pool);
+ if (!priv->reten_mode) {
+ gf_log (this->name, GF_LOG_ERROR, "Error allocating "
+ "reten_mode");
+ goto out;
+ }
+
+ GF_OPTION_INIT ("worm", priv->readonly_or_worm_enabled,
+ bool, out);
+ GF_OPTION_INIT ("worm-file-level", priv->worm_file, bool, out);
+ GF_OPTION_INIT ("default-retention-period", priv->reten_period,
+ uint64, out);
+ GF_OPTION_INIT ("auto-commit-period", priv->com_period, uint64, out);
+ GF_OPTION_INIT ("retention-mode", priv->reten_mode, str, out);
+
+ this->private = priv;
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ read_only_priv_t *priv = NULL;
+ int ret = -1;
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ GF_OPTION_RECONF ("worm", priv->readonly_or_worm_enabled,
+ options, bool, out);
+ GF_OPTION_RECONF ("worm-file-level", priv->worm_file, options, bool,
+ out);
+ GF_OPTION_RECONF ("default-retention-period", priv->reten_period,
+ options, uint64, out);
+ GF_OPTION_RECONF ("retention-mode", priv->reten_mode, options, str,
+ out);
+ GF_OPTION_RECONF ("auto-commit-period", priv->com_period, options,
+ uint64, out);
+ ret = 0;
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "returning %d", ret);
+ return ret;
+}
+
+
+void
+fini (xlator_t *this)
+{
+ read_only_priv_t *priv = NULL;
+
+ priv = this->private;
+ if (!priv)
+ goto out;
+ if (priv->reten_mode != NULL) {
+ mem_put (priv->reten_mode);
+ priv->reten_mode = NULL;
+ }
+ mem_put (priv);
+ this->private = NULL;
+ mem_pool_destroy (this->local_pool);
+out:
+ return;
+}
+
+
+struct xlator_fops fops = {
+ .open = worm_open,
+ .writev = worm_writev,
+ .setattr = worm_setattr,
+ .fsetattr = worm_fsetattr,
+ .rename = worm_rename,
+ .link = worm_link,
+ .unlink = worm_unlink,
+ .truncate = worm_truncate,
+ .create = worm_create,
+
+ .rmdir = ro_rmdir,
+ .removexattr = ro_removexattr,
+ .fsyncdir = ro_fsyncdir,
+ .xattrop = ro_xattrop,
+ .inodelk = ro_inodelk,
+ .finodelk = ro_finodelk,
+ .entrylk = ro_entrylk,
+ .fentrylk = ro_fentrylk,
+ .lk = ro_lk,
+};
+
+
+struct xlator_cbks cbks;
+
+
+struct volume_options options[] = {
+ { .key = {"worm"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "When \"on\", makes a volume get write once read many "
+ " feature. It is turned \"off\" by default."
+ },
+ { .key = {"worm-file-level"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "When \"on\", activates the file level worm. "
+ "It is turned \"off\" by default."
+ },
+ { .key = {"default-retention-period"},
+ .type = GF_OPTION_TYPE_TIME,
+ .default_value = "120",
+ .description = "The default retention period for the files."
+ },
+ { .key = {"retention-mode"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "relax",
+ .description = "The mode of retention (relax/enterprise). "
+ "It is relax by default."
+ },
+ { .key = {"auto-commit-period"},
+ .type = GF_OPTION_TYPE_TIME,
+ .default_value = "180",
+ .description = "Auto commit period for the files."
+ },
+};
diff --git a/xlators/features/shard/Makefile.am b/xlators/features/shard/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/features/shard/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/shard/src/Makefile.am b/xlators/features/shard/src/Makefile.am
new file mode 100644
index 00000000000..89173f1203e
--- /dev/null
+++ b/xlators/features/shard/src/Makefile.am
@@ -0,0 +1,16 @@
+xlator_LTLIBRARIES = shard.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+shard_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+shard_la_SOURCES = shard.c
+
+shard_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = shard.h shard-mem-types.h shard-messages.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/shard/src/shard-mem-types.h b/xlators/features/shard/src/shard-mem-types.h
new file mode 100644
index 00000000000..77f0cee7f58
--- /dev/null
+++ b/xlators/features/shard/src/shard-mem-types.h
@@ -0,0 +1,23 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef __SHARD_MEM_TYPES_H__
+#define __SHARD_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_shard_mem_types_ {
+ gf_shard_mt_priv_t = gf_common_mt_end + 1,
+ gf_shard_mt_inode_list,
+ gf_shard_mt_inode_ctx_t,
+ gf_shard_mt_iovec,
+ gf_shard_mt_int64_t,
+ gf_shard_mt_end
+};
+#endif
diff --git a/xlators/features/shard/src/shard-messages.h b/xlators/features/shard/src/shard-messages.h
new file mode 100644
index 00000000000..588cb687d5d
--- /dev/null
+++ b/xlators/features/shard/src/shard-messages.h
@@ -0,0 +1,184 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
+
+#ifndef _SHARD_MESSAGES_H_
+#define _SHARD_MESSAGES_H_
+
+#include "glfs-message-id.h"
+
+/*! \file shard-messages.h
+ * \brief shard log-message IDs and their descriptions.
+ */
+
+/* NOTE: Rules for message additions
+ * 1) Each instance of a message is _better_ left with a unique message ID, even
+ * if the message format is the same. Reasoning is that, if the message
+ * format needs to change in one instance, the other instances are not
+ * impacted or the new change does not change the ID of the instance being
+ * modified.
+ * 2) Addition of a message,
+ * - Should increment the GLFS_NUM_MESSAGES
+ * - Append to the list of messages defined, towards the end
+ * - Retain macro naming as glfs_msg_X (for redability across developers)
+ * NOTE: Rules for message format modifications
+ * 3) Check across the code if the message ID macro in question is reused
+ * anywhere. If reused then the modifications should ensure correctness
+ * everywhere, or needs a new message ID as (1) above was not adhered to. If
+ * not used anywhere, proceed with the required modification.
+ * NOTE: Rules for message deletion
+ * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used
+ * anywhere, then can be deleted, but will leave a hole by design, as
+ * addition rules specify modification to the end of the list and not filling
+ * holes.
+ */
+
+#define GLFS_COMP_BASE_SHARD GLFS_MSGID_COMP_SHARD
+#define GLFS_NUM_MESSAGES 18
+#define GLFS_MSGID_END (GLFS_COMP_BASE_SHARD + GLFS_NUM_MESSAGES + 1)
+
+#define glfs_msg_start_x GLFS_COMP_BASE_SHARD, "Invalid: Start of messages"
+
+/*!
+ * @messageid 133001
+ * @diagnosis
+ * @recommendedaction
+ */
+#define SHARD_MSG_BASE_FILE_LOOKUP_FAILED (GLFS_COMP_BASE_SHARD + 1)
+
+
+/*!
+ * @messageid 133002
+ * @diagnosis
+ * @recommendedaction
+ */
+#define SHARD_MSG_DICT_SET_FAILED (GLFS_COMP_BASE_SHARD + 2)
+
+
+/*!
+ * @messageid 133003
+ * @diagnosis /.shard already exists and is not a directory.
+ * @recommendedaction Delete the /.shard file from the backend and try again.
+ */
+#define SHARD_MSG_DOT_SHARD_NODIR (GLFS_COMP_BASE_SHARD + 3)
+
+
+/*!
+ * @messageid 133004
+ * @diagnosis
+ * @recommendedaction
+ */
+#define SHARD_MSG_FD_CTX_SET_FAILED (GLFS_COMP_BASE_SHARD + 4)
+
+
+/*!
+ * @messageid 133005
+ * @diagnosis
+ * @recommendedaction
+ */
+#define SHARD_MSG_INODE_CTX_GET_FAILED (GLFS_COMP_BASE_SHARD + 5)
+
+
+/*!
+ * @messageid 133006
+ * @diagnosis
+ * @recommendedaction
+ */
+#define SHARD_MSG_INODE_CTX_SET_FAILED (GLFS_COMP_BASE_SHARD + 6)
+
+
+/*!
+ * @messageid 133007
+ * @diagnosis
+ * @recommendedaction
+*/
+#define SHARD_MSG_INODE_PATH_FAILED (GLFS_COMP_BASE_SHARD + 7)
+
+
+/*!
+ * @messageid 133008
+ * @diagnosis
+ * @recommendedaction
+ */
+#define SHARD_MSG_INTERNAL_XATTR_MISSING (GLFS_COMP_BASE_SHARD + 8)
+
+
+/*!
+ * @messageid 133009
+ * @diagnosis The client process did not get launched due to incorrect volfile.
+ * @recommendedaction Possibly check to see if the volfile is correct.
+ */
+#define SHARD_MSG_INVALID_VOLFILE (GLFS_COMP_BASE_SHARD + 9)
+
+
+/*!
+ * @messageid 133010
+ * @diagnosis
+ * @recommendedaction
+*/
+#define SHARD_MSG_LOOKUP_SHARD_FAILED (GLFS_COMP_BASE_SHARD + 10)
+
+/*!
+ * @messageid 133011
+ * @diagnosis
+ * @recommendedaction
+*/
+#define SHARD_MSG_MEM_ACCT_INIT_FAILED (GLFS_COMP_BASE_SHARD + 11)
+
+/*!
+ * @messageid 133012
+ * @diagnosis
+ * @recommendedaction
+*/
+#define SHARD_MSG_NULL_THIS (GLFS_COMP_BASE_SHARD + 12)
+
+/*!
+ * @messageid 133013
+ * @diagnosis
+ * @recommendedaction
+*/
+#define SHARD_MSG_SIZE_SET_FAILED (GLFS_COMP_BASE_SHARD + 13)
+
+/*!
+ * @messageid 133014
+ * @diagnosis
+ * @recommendedaction
+*/
+#define SHARD_MSG_STAT_FAILED (GLFS_COMP_BASE_SHARD + 14)
+
+/*!
+ * @messageid 133015
+ * @diagnosis
+ * @recommendedaction
+*/
+#define SHARD_MSG_TRUNCATE_LAST_SHARD_FAILED (GLFS_COMP_BASE_SHARD + 15)
+
+/*!
+ * @messageid 133016
+ * @diagnosis
+ * @recommendedaction
+*/
+#define SHARD_MSG_UPDATE_FILE_SIZE_FAILED (GLFS_COMP_BASE_SHARD + 16)
+
+/*!
+ * @messageid 133017
+ * @diagnosis The operation invoked is not supported.
+ * @recommendedaction Use other syscalls to write to the file.
+*/
+#define SHARD_MSG_FOP_NOT_SUPPORTED (GLFS_COMP_BASE_SHARD + 17)
+
+/*!
+ * @messageid 133018
+ * @diagnosis
+ * @recommendedaction
+*/
+#define SHARD_MSG_INVALID_FOP (GLFS_COMP_BASE_SHARD + 18)
+
+#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
+#endif /* !_SHARD_MESSAGES_H_ */
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
new file mode 100644
index 00000000000..abac0ccf64d
--- /dev/null
+++ b/xlators/features/shard/src/shard.c
@@ -0,0 +1,4925 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <unistd.h>
+
+#include "shard.h"
+#include "shard-mem-types.h"
+#include "byte-order.h"
+#include "defaults.h"
+#include "statedump.h"
+
+static gf_boolean_t
+__is_shard_dir (uuid_t gfid)
+{
+ shard_priv_t *priv = THIS->private;
+
+ if (gf_uuid_compare (gfid, priv->dot_shard_gfid) == 0)
+ return _gf_true;
+
+ return _gf_false;
+}
+
+static gf_boolean_t
+__is_gsyncd_on_shard_dir (call_frame_t *frame, loc_t *loc)
+{
+ if (frame->root->pid == GF_CLIENT_PID_GSYNCD &&
+ (__is_shard_dir (loc->pargfid) ||
+ (loc->parent && __is_shard_dir(loc->parent->gfid))))
+ return _gf_true;
+
+ return _gf_false;
+}
+
+void
+shard_make_block_bname (int block_num, uuid_t gfid, char *buf, size_t len)
+{
+ char gfid_str[GF_UUID_BUF_SIZE] = {0,};
+
+ gf_uuid_unparse (gfid, gfid_str);
+ snprintf (buf, len, "%s.%d", gfid_str, block_num);
+}
+
+void
+shard_make_block_abspath (int block_num, uuid_t gfid, char *filepath,
+ size_t len)
+{
+ char gfid_str[GF_UUID_BUF_SIZE] = {0,};
+
+ gf_uuid_unparse (gfid, gfid_str);
+ snprintf (filepath, len, "/%s/%s.%d", GF_SHARD_DIR, gfid_str,
+ block_num);
+}
+
+int
+__shard_inode_ctx_get (inode_t *inode, xlator_t *this, shard_inode_ctx_t **ctx)
+{
+ int ret = -1;
+ uint64_t ctx_uint = 0;
+ shard_inode_ctx_t *ctx_p = NULL;
+
+ ret = __inode_ctx_get (inode, this, &ctx_uint);
+ if (ret == 0) {
+ *ctx = (shard_inode_ctx_t *) ctx_uint;
+ return ret;
+ }
+
+ ctx_p = GF_CALLOC (1, sizeof (*ctx_p), gf_shard_mt_inode_ctx_t);
+ if (!ctx_p)
+ return ret;
+
+ INIT_LIST_HEAD (&ctx_p->ilist);
+
+ ret = __inode_ctx_set (inode, this, (uint64_t *)&ctx_p);
+ if (ret < 0) {
+ GF_FREE (ctx_p);
+ return ret;
+ }
+
+ *ctx = ctx_p;
+
+ return ret;
+}
+
+int
+shard_inode_ctx_get (inode_t *inode, xlator_t *this, shard_inode_ctx_t **ctx)
+{
+ int ret = 0;
+
+ LOCK(&inode->lock);
+ {
+ ret = __shard_inode_ctx_get (inode, this, ctx);
+ }
+ UNLOCK(&inode->lock);
+
+ return ret;
+}
+
+int
+__shard_inode_ctx_set (inode_t *inode, xlator_t *this, struct iatt *stbuf,
+ uint64_t block_size, int32_t valid)
+{
+ int ret = -1;
+ shard_inode_ctx_t *ctx = NULL;
+
+ ret = __shard_inode_ctx_get (inode, this, &ctx);
+ if (ret)
+ return ret;
+
+ if (valid & SHARD_MASK_BLOCK_SIZE)
+ ctx->block_size = block_size;
+
+ if (!stbuf)
+ return 0;
+
+ if (valid & SHARD_MASK_PROT)
+ ctx->stat.ia_prot = stbuf->ia_prot;
+
+ if (valid & SHARD_MASK_NLINK)
+ ctx->stat.ia_nlink = stbuf->ia_nlink;
+
+ if (valid & SHARD_MASK_UID)
+ ctx->stat.ia_uid = stbuf->ia_uid;
+
+ if (valid & SHARD_MASK_GID)
+ ctx->stat.ia_gid = stbuf->ia_gid;
+
+ if (valid & SHARD_MASK_SIZE)
+ ctx->stat.ia_size = stbuf->ia_size;
+
+ if (valid & SHARD_MASK_BLOCKS)
+ ctx->stat.ia_blocks = stbuf->ia_blocks;
+
+ if (valid & SHARD_MASK_TIMES) {
+ SHARD_TIME_UPDATE (ctx->stat.ia_mtime, ctx->stat.ia_mtime_nsec,
+ stbuf->ia_mtime, stbuf->ia_mtime_nsec);
+ SHARD_TIME_UPDATE (ctx->stat.ia_ctime, ctx->stat.ia_ctime_nsec,
+ stbuf->ia_ctime, stbuf->ia_ctime_nsec);
+ SHARD_TIME_UPDATE (ctx->stat.ia_atime, ctx->stat.ia_atime_nsec,
+ stbuf->ia_atime, stbuf->ia_atime_nsec);
+ }
+
+ if (valid & SHARD_MASK_OTHERS) {
+ ctx->stat.ia_ino = stbuf->ia_ino;
+ gf_uuid_copy (ctx->stat.ia_gfid, stbuf->ia_gfid);
+ ctx->stat.ia_dev = stbuf->ia_dev;
+ ctx->stat.ia_type = stbuf->ia_type;
+ ctx->stat.ia_rdev = stbuf->ia_rdev;
+ ctx->stat.ia_blksize = stbuf->ia_blksize;
+ }
+
+ if (valid & SHARD_MASK_REFRESH_RESET)
+ ctx->refresh = _gf_false;
+
+ return 0;
+}
+
+int
+shard_inode_ctx_set (inode_t *inode, xlator_t *this, struct iatt *stbuf,
+ uint64_t block_size, int32_t valid)
+{
+ int ret = -1;
+
+ LOCK (&inode->lock);
+ {
+ ret = __shard_inode_ctx_set (inode, this, stbuf, block_size,
+ valid);
+ }
+ UNLOCK (&inode->lock);
+
+ return ret;
+}
+
+int
+__shard_inode_ctx_invalidate (inode_t *inode, xlator_t *this, struct iatt *stbuf)
+{
+ int ret = -1;
+ shard_inode_ctx_t *ctx = NULL;
+
+ ret = __shard_inode_ctx_get (inode, this, &ctx);
+ if (ret)
+ return ret;
+
+ if ((stbuf->ia_size != ctx->stat.ia_size) ||
+ (stbuf->ia_blocks != ctx->stat.ia_blocks))
+ ctx->refresh = _gf_true;
+
+ return 0;
+}
+
+int
+shard_inode_ctx_invalidate (inode_t *inode, xlator_t *this, struct iatt *stbuf)
+{
+ int ret = -1;
+
+ LOCK (&inode->lock);
+ {
+ ret = __shard_inode_ctx_invalidate (inode, this, stbuf);
+ }
+ UNLOCK (&inode->lock);
+
+ return ret;
+}
+
+int
+__shard_inode_ctx_get_block_size (inode_t *inode, xlator_t *this,
+ uint64_t *block_size)
+{
+ int ret = -1;
+ uint64_t ctx_uint = 0;
+ shard_inode_ctx_t *ctx = NULL;
+
+ ret = __inode_ctx_get (inode, this, &ctx_uint);
+ if (ret < 0)
+ return ret;
+
+ ctx = (shard_inode_ctx_t *) ctx_uint;
+
+ *block_size = ctx->block_size;
+
+ return 0;
+}
+
+int
+shard_inode_ctx_get_block_size (inode_t *inode, xlator_t *this,
+ uint64_t *block_size)
+{
+ int ret = -1;
+
+ LOCK (&inode->lock);
+ {
+ ret = __shard_inode_ctx_get_block_size (inode, this,
+ block_size);
+ }
+ UNLOCK (&inode->lock);
+
+ return ret;
+}
+
+int
+__shard_inode_ctx_get_all (inode_t *inode, xlator_t *this,
+ shard_inode_ctx_t *ctx_out)
+{
+ int ret = -1;
+ uint64_t ctx_uint = 0;
+ shard_inode_ctx_t *ctx = NULL;
+
+ ret = __inode_ctx_get (inode, this, &ctx_uint);
+ if (ret < 0)
+ return ret;
+
+ ctx = (shard_inode_ctx_t *) ctx_uint;
+
+ memcpy (ctx_out, ctx, sizeof (shard_inode_ctx_t));
+ return 0;
+}
+
+int
+shard_inode_ctx_get_all (inode_t *inode, xlator_t *this,
+ shard_inode_ctx_t *ctx_out)
+{
+ int ret = -1;
+
+ LOCK (&inode->lock);
+ {
+ ret = __shard_inode_ctx_get_all (inode, this, ctx_out);
+ }
+ UNLOCK (&inode->lock);
+
+ return ret;
+}
+
+int
+__shard_inode_ctx_fill_iatt_from_cache (inode_t *inode, xlator_t *this,
+ struct iatt *buf,
+ gf_boolean_t *need_refresh)
+{
+ int ret = -1;
+ uint64_t ctx_uint = 0;
+ shard_inode_ctx_t *ctx = NULL;
+
+ ret = __inode_ctx_get (inode, this, &ctx_uint);
+ if (ret < 0)
+ return ret;
+
+ ctx = (shard_inode_ctx_t *) ctx_uint;
+
+ if (ctx->refresh == _gf_false)
+ *buf = ctx->stat;
+ else
+ *need_refresh = _gf_true;
+
+ return 0;
+}
+
+int
+shard_inode_ctx_fill_iatt_from_cache (inode_t *inode, xlator_t *this,
+ struct iatt *buf,
+ gf_boolean_t *need_refresh)
+{
+ int ret = -1;
+
+ LOCK (&inode->lock);
+ {
+ ret = __shard_inode_ctx_fill_iatt_from_cache (inode, this, buf,
+ need_refresh);
+ }
+ UNLOCK (&inode->lock);
+
+ return ret;
+}
+
+void
+shard_local_wipe (shard_local_t *local)
+{
+ int i = 0;
+ int count = 0;
+
+ count = local->num_blocks;
+
+ loc_wipe (&local->loc);
+ loc_wipe (&local->dot_shard_loc);
+ loc_wipe (&local->loc2);
+ loc_wipe (&local->tmp_loc);
+
+ if (local->fd)
+ fd_unref (local->fd);
+
+ if (local->xattr_req)
+ dict_unref (local->xattr_req);
+ if (local->xattr_rsp)
+ dict_unref (local->xattr_rsp);
+
+ for (i = 0; i < count; i++) {
+ if (!local->inode_list)
+ break;
+
+ if (local->inode_list[i])
+ inode_unref (local->inode_list[i]);
+ }
+
+ GF_FREE (local->inode_list);
+
+ GF_FREE (local->vector);
+ if (local->iobref)
+ iobref_unref (local->iobref);
+ if (local->list_inited)
+ gf_dirent_free (&local->entries_head);
+}
+
+int
+shard_modify_size_and_block_count (struct iatt *stbuf, dict_t *dict)
+{
+ int ret = -1;
+ void *size_attr = NULL;
+ uint64_t size_array[4];
+
+ ret = dict_get_ptr (dict, GF_XATTR_SHARD_FILE_SIZE, &size_attr);
+ if (ret) {
+ gf_msg_callingfn (THIS->name, GF_LOG_ERROR, 0,
+ SHARD_MSG_INTERNAL_XATTR_MISSING, "Failed to "
+ "get "GF_XATTR_SHARD_FILE_SIZE" for %s",
+ uuid_utoa (stbuf->ia_gfid));
+ return ret;
+ }
+
+ memcpy (size_array, size_attr, sizeof (size_array));
+
+ stbuf->ia_size = ntoh64 (size_array[0]);
+ stbuf->ia_blocks = ntoh64 (size_array[2]);
+
+ return 0;
+}
+
+int
+shard_call_count_return (call_frame_t *frame)
+{
+ int call_count = 0;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ call_count = --local->call_count;
+ }
+ UNLOCK (&frame->lock);
+
+ return call_count;
+}
+
+static int
+shard_init_dot_shard_loc (xlator_t *this, shard_local_t *local)
+{
+ int ret = -1;
+ loc_t *dot_shard_loc = NULL;
+
+ if (!local)
+ return -1;
+
+ dot_shard_loc = &local->dot_shard_loc;
+ dot_shard_loc->inode = inode_new (this->itable);
+ dot_shard_loc->parent = inode_ref (this->itable->root);
+ ret = inode_path (dot_shard_loc->parent, GF_SHARD_DIR,
+ (char **)&dot_shard_loc->path);
+ if (ret < 0 || !(dot_shard_loc->inode)) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ SHARD_MSG_INODE_PATH_FAILED,
+ "Inode path failed on %s", GF_SHARD_DIR);
+ goto out;
+ }
+
+ dot_shard_loc->name = strrchr (dot_shard_loc->path, '/');
+ if (dot_shard_loc->name)
+ dot_shard_loc->name++;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+void
+__shard_update_shards_inode_list (inode_t *linked_inode, xlator_t *this,
+ inode_t *base_inode, int block_num)
+{
+ char block_bname[256] = {0,};
+ inode_t *lru_inode = NULL;
+ shard_priv_t *priv = NULL;
+ shard_inode_ctx_t *ctx = NULL;
+ shard_inode_ctx_t *lru_inode_ctx = NULL;
+
+ priv = this->private;
+
+ shard_inode_ctx_get (linked_inode, this, &ctx);
+
+ if (list_empty (&ctx->ilist)) {
+ if (priv->inode_count + 1 <= SHARD_MAX_INODES) {
+ /* If this inode was linked here for the first time (indicated
+ * by empty list), and if there is still space in the priv list,
+ * add this ctx to the tail of the list.
+ */
+ gf_uuid_copy (ctx->base_gfid, base_inode->gfid);
+ ctx->block_num = block_num;
+ list_add_tail (&ctx->ilist, &priv->ilist_head);
+ priv->inode_count++;
+ } else {
+ /*If on the other hand there is no available slot for this inode
+ * in the list, delete the lru inode from the head of the list,
+ * unlink it. And in its place add this new inode into the list.
+ */
+ lru_inode_ctx = list_first_entry (&priv->ilist_head,
+ shard_inode_ctx_t,
+ ilist);
+ GF_ASSERT (lru_inode_ctx->block_num > 0);
+ list_del_init (&lru_inode_ctx->ilist);
+ lru_inode = inode_find (linked_inode->table,
+ lru_inode_ctx->stat.ia_gfid);
+ shard_make_block_bname (lru_inode_ctx->block_num,
+ lru_inode_ctx->base_gfid,
+ block_bname,
+ sizeof (block_bname));
+ inode_unlink (lru_inode, priv->dot_shard_inode,
+ block_bname);
+ /* The following unref corresponds to the ref held by
+ * inode_find() above.
+ */
+ inode_forget (lru_inode, 0);
+ inode_unref (lru_inode);
+ gf_uuid_copy (ctx->base_gfid, base_inode->gfid);
+ ctx->block_num = block_num;
+ list_add_tail (&ctx->ilist, &priv->ilist_head);
+ }
+ } else {
+ /* If this is not the first time this inode is being operated on, move
+ * it to the most recently used end of the list.
+ */
+ list_move_tail (&ctx->ilist, &priv->ilist_head);
+ }
+}
+
+int
+shard_common_inode_write_failure_unwind (glusterfs_fop_t fop,
+ call_frame_t *frame, int32_t op_ret,
+ int32_t op_errno)
+{
+ switch (fop) {
+ case GF_FOP_WRITE:
+ SHARD_STACK_UNWIND (writev, frame, op_ret, op_errno,
+ NULL, NULL, NULL);
+ break;
+ case GF_FOP_FALLOCATE:
+ SHARD_STACK_UNWIND (fallocate, frame, op_ret, op_errno,
+ NULL, NULL, NULL);
+ break;
+ case GF_FOP_ZEROFILL:
+ SHARD_STACK_UNWIND (zerofill, frame, op_ret, op_errno,
+ NULL, NULL, NULL);
+ break;
+ case GF_FOP_DISCARD:
+ SHARD_STACK_UNWIND (discard, frame, op_ret, op_errno,
+ NULL, NULL, NULL);
+ break;
+ default:
+ gf_msg (THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
+ "Invalid fop id = %d", fop);
+ break;
+ }
+ return 0;
+}
+
+int
+shard_common_inode_write_success_unwind (glusterfs_fop_t fop,
+ call_frame_t *frame, int32_t op_ret)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ switch (fop) {
+ case GF_FOP_WRITE:
+ SHARD_STACK_UNWIND (writev, frame, op_ret, 0, &local->prebuf,
+ &local->postbuf, local->xattr_rsp);
+ break;
+ case GF_FOP_FALLOCATE:
+ SHARD_STACK_UNWIND (fallocate, frame, op_ret, 0, &local->prebuf,
+ &local->postbuf, local->xattr_rsp);
+ break;
+ case GF_FOP_ZEROFILL:
+ SHARD_STACK_UNWIND (zerofill, frame, op_ret, 0, &local->prebuf,
+ &local->postbuf, local->xattr_rsp);
+ break;
+ case GF_FOP_DISCARD:
+ SHARD_STACK_UNWIND (discard, frame, op_ret, 0, &local->prebuf,
+ &local->postbuf, local->xattr_rsp);
+ break;
+ default:
+ gf_msg (THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
+ "Invalid fop id = %d", fop);
+ break;
+ }
+ return 0;
+}
+
+int
+shard_common_resolve_shards (call_frame_t *frame, xlator_t *this,
+ inode_t *res_inode,
+ shard_post_resolve_fop_handler_t post_res_handler)
+{
+ int i = -1;
+ uint32_t shard_idx_iter = 0;
+ char path[PATH_MAX] = {0,};
+ inode_t *inode = NULL;
+ shard_priv_t *priv = NULL;
+ shard_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ shard_idx_iter = local->first_block;
+
+ if (local->op_ret < 0)
+ goto out;
+
+ while (shard_idx_iter <= local->last_block) {
+ i++;
+ if (shard_idx_iter == 0) {
+ local->inode_list[i] = inode_ref (res_inode);
+ shard_idx_iter++;
+ continue;
+ }
+
+ shard_make_block_abspath (shard_idx_iter, res_inode->gfid, path,
+ sizeof(path));
+
+ inode = NULL;
+ inode = inode_resolve (this->itable, path);
+ if (inode) {
+ gf_msg_debug (this->name, 0, "Shard %d already "
+ "present. gfid=%s. Saving inode for future.",
+ shard_idx_iter, uuid_utoa(inode->gfid));
+ shard_idx_iter++;
+ local->inode_list[i] = inode;
+ /* Let the ref on the inodes that are already present
+ * in inode table still be held so that they don't get
+ * forgotten by the time the fop reaches the actual
+ * write stage.
+ */
+ LOCK(&priv->lock);
+ {
+ __shard_update_shards_inode_list (inode, this,
+ res_inode,
+ shard_idx_iter);
+ }
+ UNLOCK(&priv->lock);
+
+ continue;
+ } else {
+ local->call_count++;
+ shard_idx_iter++;
+ }
+ }
+
+out:
+ post_res_handler (frame, this);
+ return 0;
+}
+
+int
+shard_update_file_size_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ inode_t *inode = NULL;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if ((local->fd) && (local->fd->inode))
+ inode = local->fd->inode;
+ else if (local->loc.inode)
+ inode = local->loc.inode;
+
+ if (op_ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ SHARD_MSG_UPDATE_FILE_SIZE_FAILED, "Update to file size"
+ " xattr failed on %s", uuid_utoa (inode->gfid));
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto err;
+ }
+
+ if (shard_modify_size_and_block_count (&local->postbuf, dict)) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
+ }
+
+ if (local->fop == GF_FOP_FTRUNCATE || local->fop == GF_FOP_TRUNCATE)
+ shard_inode_ctx_set (inode, this, &local->postbuf, 0,
+ SHARD_INODE_WRITE_MASK);
+
+err:
+ local->post_update_size_handler (frame, this);
+ return 0;
+}
+
+int
+shard_set_size_attrs (int64_t size, int64_t block_count, int64_t **size_attr_p)
+{
+ int ret = -1;
+ int64_t *size_attr = NULL;
+
+ if (!size_attr_p)
+ goto out;
+
+ size_attr = GF_CALLOC (4, sizeof (int64_t), gf_shard_mt_int64_t);
+ if (!size_attr)
+ goto out;
+
+ size_attr[0] = hton64 (size);
+ /* As sharding evolves, it _may_ be necessary to embed more pieces of
+ * information within the same xattr. So allocating slots for them in
+ * advance. For now, only bytes 0-63 and 128-191 which would make up the
+ * current size and block count respectively of the file are valid.
+ */
+ size_attr[2] = hton64 (block_count);
+
+ *size_attr_p = size_attr;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+shard_update_file_size (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ loc_t *loc,
+ shard_post_update_size_fop_handler_t handler)
+{
+ int ret = -1;
+ int64_t *size_attr = NULL;
+ inode_t *inode = NULL;
+ shard_local_t *local = NULL;
+ dict_t *xattr_req = NULL;
+
+ local = frame->local;
+ local->post_update_size_handler = handler;
+
+ xattr_req = dict_new ();
+ if (!xattr_req) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (fd)
+ inode = fd->inode;
+ else
+ inode = loc->inode;
+
+ /* If both size and block count have not changed, then skip the xattrop.
+ */
+ if ((local->delta_size + local->hole_size == 0) &&
+ (local->delta_blocks == 0)) {
+ goto out;
+ }
+
+ ret = shard_set_size_attrs (local->delta_size + local->hole_size,
+ local->delta_blocks, &size_attr);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_SIZE_SET_FAILED,
+ "Failed to set size attrs for %s",
+ uuid_utoa (inode->gfid));
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto out;
+ }
+
+ ret = dict_set_bin (xattr_req, GF_XATTR_SHARD_FILE_SIZE, size_attr,
+ 8 * 4);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_SET_FAILED,
+ "Failed to set key %s into dict. gfid=%s",
+ GF_XATTR_SHARD_FILE_SIZE, uuid_utoa (inode->gfid));
+ GF_FREE (size_attr);
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (fd)
+ STACK_WIND (frame, shard_update_file_size_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fxattrop, fd,
+ GF_XATTROP_ADD_ARRAY64, xattr_req, NULL);
+ else
+ STACK_WIND (frame, shard_update_file_size_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->xattrop, loc,
+ GF_XATTROP_ADD_ARRAY64, xattr_req, NULL);
+
+ dict_unref (xattr_req);
+ return 0;
+
+out:
+ if (xattr_req)
+ dict_unref (xattr_req);
+ handler (frame, this);
+ return 0;
+
+}
+
+static void
+shard_link_dot_shard_inode (shard_local_t *local, inode_t *inode,
+ struct iatt *buf)
+{
+ inode_t *linked_inode = NULL;
+ shard_priv_t *priv = NULL;
+
+ priv = THIS->private;
+
+ linked_inode = inode_link (inode, local->dot_shard_loc.parent,
+ local->dot_shard_loc.name, buf);
+ inode_lookup (linked_inode);
+ priv->dot_shard_inode = linked_inode;
+}
+
+int
+shard_lookup_dot_shard_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto unwind;
+ }
+
+ if (!IA_ISDIR (buf->ia_type)) {
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ SHARD_MSG_DOT_SHARD_NODIR, "/.shard already exists and "
+ "is not a directory. Please remove /.shard from all "
+ "bricks and try again");
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto unwind;
+ }
+
+ shard_link_dot_shard_inode (local, inode, buf);
+ shard_common_resolve_shards (frame, this,
+ (local->fop == GF_FOP_RENAME) ?
+ local->loc2.inode : local->loc.inode,
+ local->post_res_handler);
+ return 0;
+
+unwind:
+ local->post_res_handler (frame, this);
+ return 0;
+}
+
+int
+shard_lookup_dot_shard (call_frame_t *frame, xlator_t *this,
+ shard_post_resolve_fop_handler_t post_res_handler)
+{
+ int ret = -1;
+ dict_t *xattr_req = NULL;
+ shard_priv_t *priv = NULL;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+ priv = this->private;
+ local->post_res_handler = post_res_handler;
+
+ xattr_req = dict_new ();
+ if (!xattr_req) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
+ }
+
+ ret = dict_set_static_bin (xattr_req, "gfid-req", priv->dot_shard_gfid,
+ 16);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_SET_FAILED,
+ "Failed to set gfid of /.shard into dict");
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
+ }
+
+ STACK_WIND (frame, shard_lookup_dot_shard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, &local->dot_shard_loc,
+ xattr_req);
+
+ dict_unref (xattr_req);
+ return 0;
+
+err:
+ if (xattr_req)
+ dict_unref (xattr_req);
+ post_res_handler (frame, this);
+ return 0;
+}
+
+static void
+shard_inode_ctx_update (inode_t *inode, xlator_t *this, dict_t *xdata,
+ struct iatt *buf)
+{
+ int ret = 0;
+ uint64_t size = 0;
+ void *bsize = NULL;
+
+ if (shard_inode_ctx_get_block_size (inode, this, &size)) {
+ /* Fresh lookup */
+ ret = dict_get_ptr (xdata, GF_XATTR_SHARD_BLOCK_SIZE, &bsize);
+ if (!ret)
+ size = ntoh64 (*((uint64_t *)bsize));
+ /* If the file is sharded, set its block size, otherwise just
+ * set 0.
+ */
+
+ shard_inode_ctx_set (inode, this, buf, size,
+ SHARD_MASK_BLOCK_SIZE);
+ }
+ /* If the file is sharded, also set the remaining attributes,
+ * except for ia_size and ia_blocks.
+ */
+ if (size) {
+ shard_inode_ctx_set (inode, this, buf, 0, SHARD_LOOKUP_MASK);
+ (void) shard_inode_ctx_invalidate (inode, this, buf);
+ }
+}
+
+int
+shard_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ if (op_ret < 0)
+ goto unwind;
+
+ if (IA_ISDIR (buf->ia_type))
+ goto unwind;
+
+ /* Also, if the file is sharded, get the file size and block cnt xattr,
+ * and store them in the stbuf appropriately.
+ */
+
+ if (dict_get (xdata, GF_XATTR_SHARD_FILE_SIZE) &&
+ frame->root->pid != GF_CLIENT_PID_GSYNCD)
+ shard_modify_size_and_block_count (buf, xdata);
+
+ /* If this was a fresh lookup, there are two possibilities:
+ * 1) If the file is sharded (indicated by the presence of block size
+ * xattr), store this block size, along with rdev and mode in its
+ * inode ctx.
+ * 2) If the file is not sharded, store size along with rdev and mode
+ * (which are anyway don't cares) in inode ctx. Since @ctx_tmp is
+ * already initialised to all zeroes, nothing more needs to be done.
+ */
+
+ (void) shard_inode_ctx_update (inode, this, xdata, buf);
+
+unwind:
+ SHARD_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf,
+ xdata, postparent);
+ return 0;
+}
+
+int
+shard_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xattr_req)
+{
+ int ret = -1;
+ int32_t op_errno = ENOMEM;
+ uint64_t block_size = 0;
+ shard_local_t *local = NULL;
+
+ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
+ SHARD_ENTRY_FOP_CHECK (loc, op_errno, err);
+ }
+
+ local = mem_get0 (this->local_pool);
+ if (!local)
+ goto err;
+
+ frame->local = local;
+
+ loc_copy (&local->loc, loc);
+
+ local->xattr_req = xattr_req ? dict_ref (xattr_req) : dict_new ();
+ if (!local->xattr_req)
+ goto err;
+
+ if (shard_inode_ctx_get_block_size (loc->inode, this, &block_size)) {
+ ret = dict_set_uint64 (local->xattr_req,
+ GF_XATTR_SHARD_BLOCK_SIZE, 0);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ SHARD_MSG_DICT_SET_FAILED, "Failed to set dict"
+ " value: key:%s for path %s",
+ GF_XATTR_SHARD_BLOCK_SIZE, loc->path);
+ goto err;
+ }
+ }
+
+ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
+ ret = dict_set_uint64 (local->xattr_req,
+ GF_XATTR_SHARD_FILE_SIZE, 8 * 4);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ SHARD_MSG_DICT_SET_FAILED,
+ "Failed to set dict value: key:%s for path %s.",
+ GF_XATTR_SHARD_FILE_SIZE, loc->path);
+ goto err;
+ }
+ }
+
+ if ((xattr_req) && (dict_get (xattr_req, GF_CONTENT_KEY)))
+ dict_del (xattr_req, GF_CONTENT_KEY);
+
+ STACK_WIND (frame, shard_lookup_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->lookup, loc, local->xattr_req);
+
+ return 0;
+
+
+err:
+ SHARD_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL,
+ NULL, NULL);
+ return 0;
+
+}
+
+int
+shard_lookup_base_file_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ int ret = -1;
+ int32_t mask = SHARD_INODE_WRITE_MASK;
+ shard_local_t *local = NULL;
+ shard_inode_ctx_t ctx = {0,};
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ SHARD_MSG_BASE_FILE_LOOKUP_FAILED, "Lookup on base file"
+ " failed : %s", loc_gfid_utoa (&(local->loc)));
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto unwind;
+ }
+
+ local->prebuf = *buf;
+ if (shard_modify_size_and_block_count (&local->prebuf, xdata)) {
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+ goto unwind;
+ }
+
+ if (shard_inode_ctx_get_all (inode, this, &ctx))
+ mask = SHARD_ALL_MASK;
+
+ ret = shard_inode_ctx_set (inode, this, &local->prebuf, 0,
+ (mask | SHARD_MASK_REFRESH_RESET));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR,
+ SHARD_MSG_INODE_CTX_SET_FAILED, 0, "Failed to set inode"
+ " write params into inode ctx for %s",
+ uuid_utoa (buf->ia_gfid));
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
+
+unwind:
+ local->handler (frame, this);
+ return 0;
+}
+
+int
+shard_lookup_base_file (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ shard_post_fop_handler_t handler)
+{
+ int ret = -1;
+ shard_local_t *local = NULL;
+ dict_t *xattr_req = NULL;
+ gf_boolean_t need_refresh = _gf_false;
+
+ local = frame->local;
+ local->handler = handler;
+
+ ret = shard_inode_ctx_fill_iatt_from_cache (loc->inode, this,
+ &local->prebuf,
+ &need_refresh);
+ /* By this time, inode ctx should have been created either in create,
+ * mknod, readdirp or lookup. If not it is a bug!
+ */
+ if ((ret == 0) && (need_refresh == _gf_false)) {
+ gf_msg_debug (this->name, 0, "Skipping lookup on base file: %s"
+ "Serving prebuf off the inode ctx cache",
+ uuid_utoa (loc->gfid));
+ goto out;
+ }
+
+ xattr_req = dict_new ();
+ if (!xattr_req) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto out;
+ }
+
+ SHARD_MD_READ_FOP_INIT_REQ_DICT (this, xattr_req, loc->gfid,
+ local, out);
+
+ STACK_WIND (frame, shard_lookup_base_file_cbk, FIRST_CHILD (this),
+ FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
+
+ dict_unref (xattr_req);
+ return 0;
+
+out:
+ if (xattr_req)
+ dict_unref (xattr_req);
+ handler (frame, this);
+ return 0;
+
+}
+
+int
+shard_post_fstat_handler (call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret >= 0)
+ shard_inode_ctx_set (local->fd->inode, this, &local->prebuf, 0,
+ SHARD_LOOKUP_MASK);
+
+ SHARD_STACK_UNWIND (fstat, frame, local->op_ret, local->op_errno,
+ &local->prebuf, local->xattr_rsp);
+ return 0;
+}
+
+int
+shard_post_stat_handler (call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret >= 0)
+ shard_inode_ctx_set (local->loc.inode, this, &local->prebuf, 0,
+ SHARD_LOOKUP_MASK);
+
+ SHARD_STACK_UNWIND (stat, frame, local->op_ret, local->op_errno,
+ &local->prebuf, local->xattr_rsp);
+ return 0;
+}
+
+int
+shard_common_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ inode_t *inode = NULL;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ SHARD_MSG_STAT_FAILED, "stat failed: %s",
+ local->fd ? uuid_utoa (local->fd->inode->gfid)
+ : uuid_utoa ((local->loc.inode)->gfid));
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto unwind;
+ }
+
+ local->prebuf = *buf;
+ if (shard_modify_size_and_block_count (&local->prebuf, xdata)) {
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+ goto unwind;
+ }
+ local->xattr_rsp = dict_ref (xdata);
+
+ if (local->loc.inode)
+ inode = local->loc.inode;
+ else
+ inode = local->fd->inode;
+
+ shard_inode_ctx_invalidate (inode, this, buf);
+
+unwind:
+ local->handler (frame, this);
+ return 0;
+}
+
+int
+shard_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ int ret = -1;
+ uint64_t block_size = 0;
+ shard_local_t *local = NULL;
+
+ if ((IA_ISDIR (loc->inode->ia_type)) ||
+ (IA_ISLNK (loc->inode->ia_type))) {
+ STACK_WIND (frame, default_stat_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->stat, loc, xdata);
+ return 0;
+ }
+
+ ret = shard_inode_ctx_get_block_size (loc->inode, this, &block_size);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ SHARD_MSG_INODE_CTX_GET_FAILED, "Failed to get block "
+ "size from inode ctx of %s",
+ uuid_utoa (loc->inode->gfid));
+ goto err;
+ }
+
+ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+ STACK_WIND (frame, default_stat_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->stat, loc, xdata);
+ return 0;
+ }
+
+ local = mem_get0 (this->local_pool);
+ if (!local)
+ goto err;
+
+ frame->local = local;
+
+ local->handler = shard_post_stat_handler;
+ loc_copy (&local->loc, loc);
+ local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new ();
+ if (!local->xattr_req)
+ goto err;
+
+ SHARD_MD_READ_FOP_INIT_REQ_DICT (this, local->xattr_req,
+ local->loc.gfid, local, err);
+
+ STACK_WIND (frame, shard_common_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, local->xattr_req);
+
+ return 0;
+
+err:
+ SHARD_STACK_UNWIND (stat, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+}
+
+int
+shard_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ int ret = -1;
+ uint64_t block_size = 0;
+ shard_local_t *local = NULL;
+
+ if ((IA_ISDIR (fd->inode->ia_type)) ||
+ (IA_ISLNK (fd->inode->ia_type))) {
+ STACK_WIND (frame, default_fstat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD (this)->fops->fstat, fd, xdata);
+ return 0;
+ }
+
+ ret = shard_inode_ctx_get_block_size (fd->inode, this, &block_size);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ SHARD_MSG_INODE_CTX_GET_FAILED, "Failed to get block "
+ "size from inode ctx of %s",
+ uuid_utoa (fd->inode->gfid));
+ goto err;
+ }
+
+ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+ STACK_WIND (frame, default_fstat_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fstat, fd, xdata);
+ return 0;
+ }
+
+ if (!this->itable)
+ this->itable = fd->inode->table;
+
+ local = mem_get0 (this->local_pool);
+ if (!local)
+ goto err;
+
+ frame->local = local;
+
+ local->handler = shard_post_fstat_handler;
+ local->fd = fd_ref (fd);
+ local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new ();
+ if (!local->xattr_req)
+ goto err;
+
+ SHARD_MD_READ_FOP_INIT_REQ_DICT (this, local->xattr_req,
+ fd->inode->gfid, local, err);
+
+ STACK_WIND (frame, shard_common_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, local->xattr_req);
+ return 0;
+
+err:
+ SHARD_STACK_UNWIND (fstat, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+}
+
+int
+shard_post_update_size_truncate_handler (call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->fop == GF_FOP_TRUNCATE)
+ SHARD_STACK_UNWIND (truncate, frame, local->op_ret,
+ local->op_errno, &local->prebuf,
+ &local->postbuf, NULL);
+ else
+ SHARD_STACK_UNWIND (ftruncate, frame, local->op_ret,
+ local->op_errno, &local->prebuf,
+ &local->postbuf, NULL);
+ return 0;
+}
+
+int
+shard_truncate_last_shard_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ inode_t *inode = NULL;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ SHARD_UNSET_ROOT_FS_ID (frame, local);
+
+ inode = (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode
+ : local->fd->inode;
+ if (op_ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ SHARD_MSG_TRUNCATE_LAST_SHARD_FAILED, "truncate on last"
+ " shard failed : %s", uuid_utoa (inode->gfid));
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto err;
+ }
+
+ local->postbuf.ia_size = local->offset;
+ local->postbuf.ia_blocks -= (prebuf->ia_blocks - postbuf->ia_blocks);
+ /* Let the delta be negative. We want xattrop to do subtraction */
+ local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size;
+ local->delta_blocks = postbuf->ia_blocks - prebuf->ia_blocks;
+ local->hole_size = 0;
+
+ shard_inode_ctx_set (inode, this, postbuf, 0, SHARD_MASK_TIMES);
+
+ shard_update_file_size (frame, this, NULL, &local->loc,
+ shard_post_update_size_truncate_handler);
+ return 0;
+
+err:
+ if (local->fop == GF_FOP_TRUNCATE)
+ SHARD_STACK_UNWIND (truncate, frame, local->op_ret,
+ local->op_errno, NULL, NULL, NULL);
+ else
+ SHARD_STACK_UNWIND (ftruncate, frame, local->op_ret,
+ local->op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+shard_truncate_last_shard (call_frame_t *frame, xlator_t *this, inode_t *inode)
+{
+ size_t last_shard_size_after = 0;
+ loc_t loc = {0,};
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ /* A NULL inode could be due to the fact that the last shard which
+ * needs to be truncated does not exist due to it lying in a hole
+ * region. So the only thing left to do in that case would be an
+ * update to file size xattr.
+ */
+ if (!inode) {
+ gf_msg_debug (this->name, 0, "Last shard to be truncated absent"
+ " in backend: %s. Directly proceeding to update "
+ "file size", uuid_utoa (inode->gfid));
+ shard_update_file_size (frame, this, NULL, &local->loc,
+ shard_post_update_size_truncate_handler);
+ return 0;
+ }
+
+ SHARD_SET_ROOT_FS_ID (frame, local);
+
+ loc.inode = inode_ref (inode);
+ gf_uuid_copy (loc.gfid, inode->gfid);
+
+ last_shard_size_after = (local->offset % local->block_size);
+
+ STACK_WIND (frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, &loc,
+ last_shard_size_after, NULL);
+ loc_wipe (&loc);
+ return 0;
+}
+
+int
+shard_unlink_shards_do_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata);
+
+int
+shard_truncate_htol (call_frame_t *frame, xlator_t *this, inode_t *inode)
+{
+ int i = 1;
+ int ret = -1;
+ int call_count = 0;
+ uint32_t cur_block = 0;
+ uint32_t last_block = 0;
+ char path[PATH_MAX] = {0,};
+ char *bname = NULL;
+ loc_t loc = {0,};
+ gf_boolean_t wind_failed = _gf_false;
+ shard_local_t *local = NULL;
+ shard_priv_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ cur_block = local->first_block + 1;
+ last_block = local->last_block;
+
+ /* Determine call count */
+ for (i = 1; i < local->num_blocks; i++) {
+ if (!local->inode_list[i])
+ continue;
+ call_count++;
+ }
+
+ if (!call_count) {
+ /* Call count = 0 implies that all of the shards that need to be
+ * unlinked do not exist. So shard xlator would now proceed to
+ * do the final truncate + size updates.
+ */
+ gf_msg_debug (this->name, 0, "Shards to be unlinked as part of "
+ "truncate absent in backend: %s. Directly "
+ "proceeding to update file size",
+ uuid_utoa (inode->gfid));
+ local->postbuf.ia_size = local->offset;
+ local->postbuf.ia_blocks = local->prebuf.ia_blocks;
+ local->delta_size = local->postbuf.ia_size -
+ local->prebuf.ia_size;
+ local->delta_blocks = 0;
+ local->hole_size = 0;
+ shard_update_file_size (frame, this, local->fd, &local->loc,
+ shard_post_update_size_truncate_handler);
+ return 0;
+ }
+
+ local->call_count = call_count;
+ i = 1;
+
+ SHARD_SET_ROOT_FS_ID (frame, local);
+ while (cur_block <= last_block) {
+ if (!local->inode_list[i]) {
+ cur_block++;
+ i++;
+ continue;
+ }
+ if (wind_failed) {
+ shard_unlink_shards_do_cbk (frame,
+ (void *)(long) cur_block,
+ this, -1, ENOMEM, NULL,
+ NULL, NULL);
+ goto next;
+ }
+
+ shard_make_block_abspath (cur_block, inode->gfid, path,
+ sizeof (path));
+ bname = strrchr (path, '/') + 1;
+ loc.parent = inode_ref (priv->dot_shard_inode);
+ ret = inode_path (loc.parent, bname, (char **)&(loc.path));
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ SHARD_MSG_INODE_PATH_FAILED, "Inode path failed"
+ " on %s. Base file gfid = %s", bname,
+ uuid_utoa (inode->gfid));
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ loc_wipe (&loc);
+ wind_failed = _gf_true;
+ shard_unlink_shards_do_cbk (frame,
+ (void *)(long) cur_block,
+ this, -1, ENOMEM, NULL,
+ NULL, NULL);
+ goto next;
+ }
+ loc.name = strrchr (loc.path, '/');
+ if (loc.name)
+ loc.name++;
+ loc.inode = inode_ref (local->inode_list[i]);
+
+ STACK_WIND_COOKIE (frame, shard_unlink_shards_do_cbk,
+ (void *) (long) cur_block, FIRST_CHILD(this),
+ FIRST_CHILD (this)->fops->unlink, &loc,
+ 0, NULL);
+ loc_wipe (&loc);
+next:
+ i++;
+ cur_block++;
+ if (!--call_count)
+ break;
+ }
+ return 0;
+
+}
+
+int
+shard_truncate_do (call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->num_blocks == 1) {
+ /* This means that there are no shards to be unlinked.
+ * The fop boils down to truncating the last shard, updating
+ * the size and unwinding.
+ */
+ shard_truncate_last_shard (frame, this,
+ local->inode_list[0]);
+ return 0;
+ } else {
+ shard_truncate_htol (frame, this, local->loc.inode);
+ }
+ return 0;
+}
+
+int
+shard_post_lookup_shards_truncate_handler (call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ if (local->fop == GF_FOP_TRUNCATE)
+ SHARD_STACK_UNWIND (truncate, frame, local->op_ret,
+ local->op_errno, NULL, NULL, NULL);
+ else
+ SHARD_STACK_UNWIND (ftruncate, frame, local->op_ret,
+ local->op_errno, NULL, NULL, NULL);
+ return 0;
+ }
+
+ shard_truncate_do (frame, this);
+ return 0;
+}
+
+void
+shard_link_block_inode (shard_local_t *local, int block_num, inode_t *inode,
+ struct iatt *buf)
+{
+ int list_index = 0;
+ char block_bname[256] = {0,};
+ inode_t *linked_inode = NULL;
+ xlator_t *this = NULL;
+ shard_priv_t *priv = NULL;
+
+ this = THIS;
+ priv = this->private;
+
+ shard_make_block_bname (block_num, (local->loc.inode)->gfid,
+ block_bname, sizeof (block_bname));
+
+ shard_inode_ctx_set (inode, this, buf, 0, SHARD_LOOKUP_MASK);
+ linked_inode = inode_link (inode, priv->dot_shard_inode, block_bname,
+ buf);
+ inode_lookup (linked_inode);
+ list_index = block_num - local->first_block;
+
+ /* Defer unref'ing the inodes until write is complete. These inodes are
+ * unref'd in the event of a failure or after successful fop completion
+ * in shard_local_wipe().
+ */
+ local->inode_list[list_index] = linked_inode;
+
+ LOCK(&priv->lock);
+ {
+ __shard_update_shards_inode_list (linked_inode, this,
+ local->loc.inode, block_num);
+ }
+ UNLOCK(&priv->lock);
+}
+
+int
+shard_common_lookup_shards_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ int call_count = 0;
+ int shard_block_num = (long) cookie;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ /* Ignore absence of shards in the backend in truncate fop. */
+ if (((local->fop == GF_FOP_TRUNCATE) ||
+ (local->fop == GF_FOP_FTRUNCATE) ||
+ (local->fop == GF_FOP_RENAME) ||
+ (local->fop == GF_FOP_UNLINK)) && (op_errno == ENOENT))
+ goto done;
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ SHARD_MSG_LOOKUP_SHARD_FAILED, "Lookup on shard %d "
+ "failed. Base file gfid = %s", shard_block_num,
+ (local->fop == GF_FOP_RENAME) ?
+ uuid_utoa (local->loc2.inode->gfid)
+ : uuid_utoa (local->loc.inode->gfid));
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto done;
+ }
+
+ shard_link_block_inode (local, shard_block_num, inode, buf);
+
+done:
+ call_count = shard_call_count_return (frame);
+ if (call_count == 0) {
+ if (local->op_ret < 0)
+ goto unwind;
+ else
+ local->pls_fop_handler (frame, this);
+ }
+ return 0;
+
+unwind:
+ local->pls_fop_handler (frame, this);
+ return 0;
+}
+
+dict_t*
+shard_create_gfid_dict (dict_t *dict)
+{
+ int ret = 0;
+ dict_t *new = NULL;
+ uuid_t *gfid = NULL;
+
+ new = dict_copy_with_ref (dict, NULL);
+ if (!new)
+ return NULL;
+
+ gfid = GF_CALLOC (1, sizeof (uuid_t), gf_common_mt_char);
+ if (!gfid) {
+ ret = -1;
+ goto out;
+ }
+
+ gf_uuid_generate (*gfid);
+
+ ret = dict_set_dynptr (new, "gfid-req", gfid, sizeof (uuid_t));
+
+out:
+ if (ret) {
+ dict_unref (new);
+ new = NULL;
+ GF_FREE (gfid);
+ }
+
+ return new;
+}
+
+int
+shard_common_lookup_shards (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ shard_post_lookup_shards_fop_handler_t handler)
+{
+ int i = 0;
+ int ret = 0;
+ int call_count = 0;
+ int32_t shard_idx_iter = 0;
+ int last_block = 0;
+ char path[PATH_MAX] = {0,};
+ char *bname = NULL;
+ loc_t loc = {0,};
+ shard_local_t *local = NULL;
+ shard_priv_t *priv = NULL;
+ gf_boolean_t wind_failed = _gf_false;
+ dict_t *xattr_req = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ call_count = local->call_count;
+ shard_idx_iter = local->first_block;
+ last_block = local->last_block;
+ local->pls_fop_handler = handler;
+
+ while (shard_idx_iter <= last_block) {
+ if (local->inode_list[i]) {
+ i++;
+ shard_idx_iter++;
+ continue;
+ }
+
+ if (wind_failed) {
+ shard_common_lookup_shards_cbk (frame,
+ (void *) (long) shard_idx_iter,
+ this, -1, ENOMEM, NULL, NULL,
+ NULL, NULL);
+ goto next;
+ }
+
+ shard_make_block_abspath (shard_idx_iter, inode->gfid, path,
+ sizeof(path));
+
+ bname = strrchr (path, '/') + 1;
+ loc.inode = inode_new (this->itable);
+ loc.parent = inode_ref (priv->dot_shard_inode);
+ ret = inode_path (loc.parent, bname, (char **) &(loc.path));
+ if (ret < 0 || !(loc.inode)) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ SHARD_MSG_INODE_PATH_FAILED, "Inode path failed"
+ " on %s, base file gfid = %s", bname,
+ uuid_utoa (inode->gfid));
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ loc_wipe (&loc);
+ wind_failed = _gf_true;
+ shard_common_lookup_shards_cbk (frame,
+ (void *) (long) shard_idx_iter,
+ this, -1, ENOMEM, NULL, NULL,
+ NULL, NULL);
+ goto next;
+ }
+
+ loc.name = strrchr (loc.path, '/');
+ if (loc.name)
+ loc.name++;
+
+ xattr_req = shard_create_gfid_dict (local->xattr_req);
+ if (!xattr_req) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ wind_failed = _gf_true;
+ loc_wipe (&loc);
+ shard_common_lookup_shards_cbk (frame,
+ (void *) (long) shard_idx_iter,
+ this, -1, ENOMEM, NULL, NULL,
+ NULL, NULL);
+ goto next;
+ }
+
+ STACK_WIND_COOKIE (frame, shard_common_lookup_shards_cbk,
+ (void *) (long) shard_idx_iter,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, &loc,
+ xattr_req);
+ loc_wipe (&loc);
+ dict_unref (xattr_req);
+next:
+ shard_idx_iter++;
+ i++;
+
+ if (!--call_count)
+ break;
+ }
+
+ return 0;
+}
+
+int
+shard_post_resolve_truncate_handler (call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ if (local->op_errno == ENOENT) {
+ /* If lookup on /.shard fails with ENOENT, it means that
+ * the file was 0-byte in size but truncated sometime in
+ * the past to a higher size which is reflected in the
+ * size xattr, and now being truncated to a lower size.
+ * In this case, the only thing that needs to be done is
+ * to update the size xattr of the file and unwind.
+ */
+ local->first_block = local->last_block = 0;
+ local->num_blocks = 1;
+ local->call_count = 0;
+ local->op_ret = 0;
+ local->postbuf.ia_size = local->offset;
+ shard_update_file_size (frame, this, local->fd,
+ &local->loc,
+ shard_post_update_size_truncate_handler);
+ return 0;
+ } else {
+ if (local->fop == GF_FOP_TRUNCATE)
+ SHARD_STACK_UNWIND (truncate, frame,
+ local->op_ret,
+ local->op_errno, NULL, NULL,
+ NULL);
+ else
+ SHARD_STACK_UNWIND (ftruncate, frame,
+ local->op_ret,
+ local->op_errno, NULL, NULL,
+ NULL);
+ return 0;
+ }
+ }
+
+ if (!local->call_count)
+ shard_truncate_do (frame, this);
+ else
+ shard_common_lookup_shards (frame, this, local->loc.inode,
+ shard_post_lookup_shards_truncate_handler);
+
+ return 0;
+}
+
+int
+shard_truncate_begin (call_frame_t *frame, xlator_t *this)
+{
+ int ret = 0;
+ shard_local_t *local = NULL;
+ shard_priv_t *priv = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ /* First participant block here is the lowest numbered block that would
+ * hold the last byte of the file post successful truncation.
+ * Last participant block is the block that contains the last byte in
+ * the current state of the file.
+ * If (first block == last_block):
+ * then that means that the file only needs truncation of the
+ * first (or last since both are same) block.
+ * Else
+ * if (new_size % block_size == 0)
+ * then that means there is no truncate to be done with
+ * only shards from first_block + 1 through the last
+ * block needing to be unlinked.
+ * else
+ * both truncate of the first block and unlink of the
+ * remaining shards until end of file is required.
+ */
+ local->first_block = (local->offset == 0) ? 0
+ : get_lowest_block (local->offset - 1,
+ local->block_size);
+ local->last_block = get_highest_block (0, local->prebuf.ia_size,
+ local->block_size);
+
+ local->num_blocks = local->last_block - local->first_block + 1;
+
+ if ((local->first_block == 0) && (local->num_blocks == 1)) {
+ if (local->fop == GF_FOP_TRUNCATE)
+ STACK_WIND (frame, shard_truncate_last_shard_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate,
+ &local->loc, local->offset,
+ local->xattr_req);
+ else
+ STACK_WIND (frame, shard_truncate_last_shard_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate,
+ local->fd, local->offset, local->xattr_req);
+ return 0;
+ }
+
+ local->inode_list = GF_CALLOC (local->num_blocks, sizeof (inode_t *),
+ gf_shard_mt_inode_list);
+ if (!local->inode_list)
+ goto err;
+
+ local->dot_shard_loc.inode = inode_find (this->itable,
+ priv->dot_shard_gfid);
+ if (!local->dot_shard_loc.inode) {
+ ret = shard_init_dot_shard_loc (this, local);
+ if (ret)
+ goto err;
+ shard_lookup_dot_shard (frame, this,
+ shard_post_resolve_truncate_handler);
+ } else {
+ shard_common_resolve_shards (frame, this,
+ (local->fop == GF_FOP_TRUNCATE) ?
+ local->loc.inode :
+ local->fd->inode,
+ shard_post_resolve_truncate_handler);
+ }
+ return 0;
+
+err:
+ if (local->fop == GF_FOP_TRUNCATE)
+ SHARD_STACK_UNWIND (truncate, frame, -1, ENOMEM, NULL, NULL,
+ NULL);
+ else
+ SHARD_STACK_UNWIND (ftruncate, frame, -1, ENOMEM, NULL, NULL,
+ NULL);
+
+ return 0;
+}
+
+int
+shard_post_lookup_truncate_handler (call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ if (local->fop == GF_FOP_TRUNCATE)
+ SHARD_STACK_UNWIND (truncate, frame, local->op_ret,
+ local->op_errno, NULL, NULL, NULL);
+ else
+ SHARD_STACK_UNWIND (ftruncate, frame, local->op_ret,
+ local->op_errno, NULL, NULL, NULL);
+
+ return 0;
+ }
+
+ local->postbuf = local->prebuf;
+
+ if (local->prebuf.ia_size == local->offset) {
+ /* If the file size is same as requested size, unwind the call
+ * immediately.
+ */
+ if (local->fop == GF_FOP_TRUNCATE)
+ SHARD_STACK_UNWIND (truncate, frame, 0, 0,
+ &local->prebuf, &local->postbuf,
+ NULL);
+ else
+ SHARD_STACK_UNWIND (ftruncate, frame, 0, 0,
+ &local->prebuf, &local->postbuf,
+ NULL);
+ } else if (local->offset > local->prebuf.ia_size) {
+ /* If the truncate is from a lower to a higher size, set the
+ * new size xattr and unwind.
+ */
+ local->hole_size = local->offset - local->prebuf.ia_size;
+ local->delta_size = 0;
+ local->delta_blocks = 0;
+ local->postbuf.ia_size = local->offset;
+ shard_update_file_size (frame, this, NULL, &local->loc,
+ shard_post_update_size_truncate_handler);
+ } else {
+ /* ... else
+ * i. unlink all shards that need to be unlinked.
+ * ii. truncate the last of the shards.
+ * iii. update the new size using setxattr.
+ * and unwind the fop.
+ */
+ local->hole_size = 0;
+ local->delta_size = (local->offset - local->prebuf.ia_size);
+ local->delta_blocks = 0;
+ shard_truncate_begin (frame, this);
+ }
+ return 0;
+}
+
+/* TO-DO:
+ * Fix updates to size and block count with racing write(s) and truncate(s).
+ */
+
+int
+shard_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ int ret = -1;
+ uint64_t block_size = 0;
+ shard_local_t *local = NULL;
+
+ ret = shard_inode_ctx_get_block_size (loc->inode, this, &block_size);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ SHARD_MSG_INODE_CTX_GET_FAILED, "Failed to get block "
+ "size from inode ctx of %s",
+ uuid_utoa (loc->inode->gfid));
+ goto err;
+ }
+
+ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+ STACK_WIND (frame, default_truncate_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->truncate, loc, offset,
+ xdata);
+ return 0;
+ }
+
+ if (!this->itable)
+ this->itable = loc->inode->table;
+
+ local = mem_get0 (this->local_pool);
+ if (!local)
+ goto err;
+
+ frame->local = local;
+
+ loc_copy (&local->loc, loc);
+ local->offset = offset;
+ local->block_size = block_size;
+ local->fop = GF_FOP_TRUNCATE;
+ local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new ();
+ if (!local->xattr_req)
+ goto err;
+
+ shard_lookup_base_file (frame, this, &local->loc,
+ shard_post_lookup_truncate_handler);
+ return 0;
+
+err:
+ SHARD_STACK_UNWIND (truncate, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+shard_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ int ret = -1;
+ uint64_t block_size = 0;
+ shard_local_t *local = NULL;
+
+ ret = shard_inode_ctx_get_block_size (fd->inode, this, &block_size);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ SHARD_MSG_INODE_CTX_GET_FAILED, "Failed to get block "
+ "size from inode ctx of %s",
+ uuid_utoa (fd->inode->gfid));
+ goto err;
+ }
+
+ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+ STACK_WIND (frame, default_ftruncate_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->ftruncate, fd, offset,
+ xdata);
+ return 0;
+ }
+
+ if (!this->itable)
+ this->itable = fd->inode->table;
+
+ local = mem_get0 (this->local_pool);
+ if (!local)
+ goto err;
+
+ frame->local = local;
+ local->fd = fd_ref (fd);
+ local->offset = offset;
+ local->block_size = block_size;
+ local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new ();
+ if (!local->xattr_req)
+ goto err;
+ local->fop = GF_FOP_FTRUNCATE;
+
+ local->loc.inode = inode_ref (fd->inode);
+ gf_uuid_copy (local->loc.gfid, fd->inode->gfid);
+
+ shard_lookup_base_file (frame, this, &local->loc,
+ shard_post_lookup_truncate_handler);
+ return 0;
+err:
+
+ SHARD_STACK_UNWIND (ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+shard_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int ret = -1;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret == -1)
+ goto unwind;
+
+ ret = shard_inode_ctx_set (inode, this, buf, ntoh64 (local->block_size),
+ SHARD_ALL_MASK);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ SHARD_MSG_INODE_CTX_SET_FAILED, "Failed to set inode "
+ "ctx for %s", uuid_utoa (inode->gfid));
+
+unwind:
+ SHARD_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+
+ return 0;
+}
+
+int
+shard_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ shard_local_t *local = NULL;
+
+ local = mem_get0 (this->local_pool);
+ if (!local)
+ goto err;
+
+ frame->local = local;
+ if (!__is_gsyncd_on_shard_dir (frame, loc)) {
+ SHARD_INODE_CREATE_INIT (this, local, xdata, loc, err);
+ }
+
+ STACK_WIND (frame, shard_mknod_cbk, FIRST_CHILD (this),
+ FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask,
+ xdata);
+ return 0;
+
+err:
+ SHARD_STACK_UNWIND (mknod, frame, -1, ENOMEM, NULL, NULL, NULL,
+ NULL, NULL);
+ return 0;
+
+}
+
+int32_t
+shard_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent,
+ dict_t *xdata)
+{
+ if (op_ret < 0)
+ goto err;
+
+ shard_inode_ctx_set (inode, this, buf, 0,
+ SHARD_MASK_NLINK | SHARD_MASK_TIMES);
+
+ SHARD_STACK_UNWIND (link, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+err:
+ SHARD_STACK_UNWIND (link, frame, op_ret, op_errno, inode, NULL, NULL,
+ NULL, NULL);
+ return 0;
+}
+
+int32_t
+shard_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ int ret = -1;
+ uint64_t block_size = 0;
+
+ ret = shard_inode_ctx_get_block_size (oldloc->inode, this, &block_size);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ SHARD_MSG_INODE_CTX_GET_FAILED, "Failed to get block "
+ "size from inode ctx of %s",
+ uuid_utoa (oldloc->inode->gfid));
+ goto err;
+ }
+
+ if (!block_size) {
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->link, oldloc, newloc,
+ xdata);
+ return 0;
+ }
+
+ STACK_WIND (frame, shard_link_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
+ return 0;
+
+err:
+ SHARD_STACK_UNWIND (link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
+}
+
+int
+shard_unlink_shards_do (call_frame_t *frame, xlator_t *this, inode_t *inode);
+
+int
+shard_post_lookup_shards_unlink_handler (call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if ((local->op_ret < 0) && (local->op_errno != ENOENT)) {
+ if (local->fop == GF_FOP_UNLINK)
+ SHARD_STACK_UNWIND (unlink, frame, local->op_ret,
+ local->op_errno, NULL, NULL, NULL);
+ else
+ SHARD_STACK_UNWIND (rename, frame, local->op_ret,
+ local->op_errno, NULL, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
+ }
+ local->op_ret = 0;
+ local->op_errno = 0;
+
+ shard_unlink_shards_do (frame, this,
+ (local->fop == GF_FOP_RENAME)
+ ? local->loc2.inode
+ : local->loc.inode);
+ return 0;
+}
+
+int
+shard_rename_cbk (call_frame_t *frame, xlator_t *this);
+
+int32_t
+shard_unlink_cbk (call_frame_t *frame, xlator_t *this);
+
+int
+shard_post_resolve_unlink_handler (call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ if (local->op_errno == ENOENT) {
+ /* If lookup on /.shard fails with ENOENT, it probably
+ * means that the file is being unlinked before it
+ * could grow beyond its first block. In this case,
+ * unlink boils down to unlinking the base file and
+ * unwinding the call.
+ */
+ local->op_ret = 0;
+ local->first_block = local->last_block = 0;
+ local->num_blocks = 1;
+ if (local->fop == GF_FOP_UNLINK)
+ shard_unlink_cbk (frame, this);
+ else
+ shard_rename_cbk (frame, this);
+ return 0;
+ } else {
+ if (local->fop == GF_FOP_UNLINK)
+ SHARD_STACK_UNWIND (unlink, frame,
+ local->op_ret,
+ local->op_errno, NULL, NULL,
+ NULL);
+ else
+ shard_rename_cbk (frame, this);
+ return 0;
+ }
+ }
+
+ if (!local->call_count)
+ shard_unlink_shards_do (frame, this,
+ (local->fop == GF_FOP_RENAME)
+ ? local->loc2.inode
+ : local->loc.inode);
+ else
+ shard_common_lookup_shards (frame, this,
+ (local->fop == GF_FOP_RENAME)
+ ? local->loc2.inode
+ : local->loc.inode,
+ shard_post_lookup_shards_unlink_handler);
+ return 0;
+}
+
+int
+shard_unlink_base_file_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ int ret = 0;
+ uint32_t link_count = 0;
+ shard_local_t *local = NULL;
+ shard_priv_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (op_ret < 0) {
+ SHARD_STACK_UNWIND (unlink, frame, op_ret, op_errno, NULL, NULL,
+ NULL);
+ return 0;
+ }
+
+ /* Because link() does not create links for all but the
+ * base shard, unlink() must delete these shards only when the
+ * link count is 1. We can return safely now.
+ */
+ if ((xdata) && (!dict_get_uint32 (xdata, GET_LINK_COUNT, &link_count))
+ && (link_count > 1))
+ goto unwind;
+
+ local->first_block = get_lowest_block (0, local->block_size);
+ local->last_block = get_highest_block (0, local->prebuf.ia_size,
+ local->block_size);
+ local->num_blocks = local->last_block - local->first_block + 1;
+
+ /* num_blocks = 1 implies that the file has not crossed its
+ * shard block size. So unlink boils down to unlinking just the
+ * base file. We can safely return now.
+ */
+ if (local->num_blocks == 1)
+ goto unwind;
+
+ local->inode_list = GF_CALLOC (local->num_blocks, sizeof (inode_t *),
+ gf_shard_mt_inode_list);
+ if (!local->inode_list)
+ goto unwind;
+
+ /* Save the xdata and preparent and postparent iatts now. This will be
+ * used at the time of unwinding the call to the parent xl.
+ */
+ local->preoldparent = *preparent;
+ local->postoldparent = *postparent;
+ if (xdata)
+ local->xattr_rsp = dict_ref (xdata);
+
+ local->dot_shard_loc.inode = inode_find (this->itable,
+ priv->dot_shard_gfid);
+ if (!local->dot_shard_loc.inode) {
+ ret = shard_init_dot_shard_loc (this, local);
+ if (ret)
+ goto unwind;
+ shard_lookup_dot_shard (frame, this,
+ shard_post_resolve_unlink_handler);
+ } else {
+ shard_common_resolve_shards (frame, this, local->loc.inode,
+ shard_post_resolve_unlink_handler);
+ }
+
+ return 0;
+
+unwind:
+ SHARD_STACK_UNWIND (unlink, frame, op_ret, op_errno, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int
+shard_unlink_base_file (call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (dict_set_uint32 (local->xattr_req, GET_LINK_COUNT, 0))
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ SHARD_MSG_DICT_SET_FAILED, "Failed to set "
+ GET_LINK_COUNT" in dict");
+
+ /* To-Do: Request open-fd count on base file */
+ STACK_WIND (frame, shard_unlink_base_file_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, &local->loc, local->xflag,
+ local->xattr_req);
+ return 0;
+}
+
+void
+shard_unlink_block_inode (shard_local_t *local, int shard_block_num)
+{
+ char block_bname[256] = {0,};
+ inode_t *inode = NULL;
+ xlator_t *this = NULL;
+ shard_priv_t *priv = NULL;
+ shard_inode_ctx_t *ctx = NULL;
+
+ this = THIS;
+ priv = this->private;
+
+ inode = local->inode_list[shard_block_num - local->first_block];
+
+ shard_make_block_bname (shard_block_num, (local->loc.inode)->gfid,
+ block_bname, sizeof (block_bname));
+
+ LOCK(&priv->lock);
+ {
+ shard_inode_ctx_get (inode, this, &ctx);
+ if (!list_empty (&ctx->ilist)) {
+ list_del_init (&ctx->ilist);
+ priv->inode_count--;
+ }
+ GF_ASSERT (priv->inode_count >= 0);
+ inode_unlink (inode, priv->dot_shard_inode, block_bname);
+ inode_forget (inode, 0);
+ }
+ UNLOCK(&priv->lock);
+
+}
+
+int
+shard_rename_cbk (call_frame_t *frame, xlator_t *this);
+
+int32_t
+shard_unlink_cbk (call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = frame->local;
+
+ SHARD_STACK_UNWIND (unlink, frame, local->op_ret, local->op_errno,
+ &local->preoldparent, &local->postoldparent,
+ local->xattr_rsp);
+ return 0;
+}
+
+int
+shard_unlink_shards_do_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ int call_count = 0;
+ int shard_block_num = (long) cookie;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto done;
+ }
+
+ shard_unlink_block_inode (local, shard_block_num);
+
+done:
+ call_count = shard_call_count_return (frame);
+ if (call_count == 0) {
+ SHARD_UNSET_ROOT_FS_ID (frame, local);
+
+ if (local->fop == GF_FOP_UNLINK)
+ shard_unlink_cbk (frame, this);
+ else if (local->fop == GF_FOP_RENAME)
+ shard_rename_cbk (frame, this);
+ else
+ shard_truncate_last_shard (frame, this,
+ local->inode_list[0]);
+ }
+
+ return 0;
+}
+
+int
+shard_unlink_shards_do (call_frame_t *frame, xlator_t *this, inode_t *inode)
+{
+ int i = 0;
+ int ret = -1;
+ int count = 0;
+ int call_count = 0;
+ uint32_t last_block = 0;
+ uint32_t cur_block = 0;
+ char *bname = NULL;
+ char path[PATH_MAX] = {0,};
+ loc_t loc = {0,};
+ gf_boolean_t wind_failed = _gf_false;
+ shard_local_t *local = NULL;
+ shard_priv_t *priv = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ /* local->num_blocks includes the base file block. This function only
+ * deletes the shards under /.shard. So subtract num_blocks by 1.
+ */
+ local->call_count = call_count = local->num_blocks - 1;
+ last_block = local->last_block;
+
+ /* Ignore the inode associated with the base file and start counting
+ * from 1.
+ */
+ for (i = 1; i < local->num_blocks; i++) {
+ if (!local->inode_list[i])
+ continue;
+ count++;
+ }
+
+ if (!count) {
+ /* callcount = 0 implies that all of the shards that need to be
+ * unlinked are non-existent (in other words the file is full of
+ * holes). So shard xlator can simply return the fop to its
+ * parent now.
+ */
+ gf_msg_debug (this->name, 0, "All shards that need to be "
+ "unlinked are non-existent: %s",
+ uuid_utoa (inode->gfid));
+ local->num_blocks = 1;
+ if (local->fop == GF_FOP_UNLINK) {
+ shard_unlink_cbk (frame, this);
+ } else if (local->fop == GF_FOP_RENAME) {
+ gf_msg_debug (this->name, 0, "Resuming rename()");
+ shard_rename_cbk (frame, this);
+ }
+ return 0;
+ }
+
+ local->call_count = call_count = count;
+ cur_block = 1;
+ SHARD_SET_ROOT_FS_ID (frame, local);
+
+ /* Ignore the base file and start iterating from the first block shard.
+ */
+ while (cur_block <= last_block) {
+ if (!local->inode_list[cur_block]) {
+ cur_block++;
+ continue;
+ }
+
+ if (wind_failed) {
+ shard_unlink_shards_do_cbk (frame,
+ (void *) (long) cur_block,
+ this, -1, ENOMEM, NULL,
+ NULL, NULL);
+ goto next;
+ }
+
+ shard_make_block_abspath (cur_block, inode->gfid, path,
+ sizeof (path));
+ bname = strrchr (path, '/') + 1;
+ loc.parent = inode_ref (priv->dot_shard_inode);
+ ret = inode_path (loc.parent, bname, (char **) &(loc.path));
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ SHARD_MSG_INODE_PATH_FAILED, "Inode path failed"
+ " on %s, base file gfid = %s", bname,
+ uuid_utoa (inode->gfid));
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ loc_wipe (&loc);
+ wind_failed = _gf_true;
+ shard_unlink_shards_do_cbk (frame,
+ (void *) (long) cur_block,
+ this, -1, ENOMEM, NULL,
+ NULL, NULL);
+ goto next;
+ }
+
+ loc.name = strrchr (loc.path, '/');
+ if (loc.name)
+ loc.name++;
+ loc.inode = inode_ref (local->inode_list[cur_block]);
+
+ STACK_WIND_COOKIE (frame, shard_unlink_shards_do_cbk,
+ (void *) (long) cur_block, FIRST_CHILD(this),
+ FIRST_CHILD (this)->fops->unlink, &loc,
+ local->xflag, local->xattr_req);
+ loc_wipe (&loc);
+
+next:
+ cur_block++;
+ if (!--call_count)
+ break;
+ }
+
+ return 0;
+}
+
+int
+shard_post_lookup_unlink_handler (call_frame_t *frame, xlator_t *this)
+{
+ int ret = -1;
+ shard_priv_t *priv = NULL;
+ shard_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ SHARD_STACK_UNWIND (unlink, frame, local->op_ret,
+ local->op_errno, NULL, NULL, NULL);
+ return 0;
+ }
+
+ shard_unlink_base_file (frame, this);
+ return 0;
+}
+
+int
+shard_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
+{
+ int ret = -1;
+ uint64_t block_size = 0;
+ shard_local_t *local = NULL;
+
+ ret = shard_inode_ctx_get_block_size (loc->inode, this, &block_size);
+ if ((ret) && (!IA_ISLNK(loc->inode->ia_type))) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ SHARD_MSG_INODE_CTX_GET_FAILED, "Failed to get block "
+ "size from inode ctx of %s",
+ uuid_utoa (loc->inode->gfid));
+ goto err;
+ }
+
+ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+ STACK_WIND (frame, default_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ return 0;
+ }
+
+ local = mem_get0 (this->local_pool);
+ if (!local)
+ goto err;
+
+ frame->local = local;
+
+ loc_copy (&local->loc, loc);
+ local->xflag = xflag;
+ local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new ();
+ local->block_size = block_size;
+ local->fop = GF_FOP_UNLINK;
+ if (!this->itable)
+ this->itable = (local->loc.inode)->table;
+
+ shard_lookup_base_file (frame, this, &local->loc,
+ shard_post_lookup_unlink_handler);
+ return 0;
+err:
+ SHARD_STACK_UNWIND (unlink, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+
+}
+
+int
+shard_rename_cbk (call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ SHARD_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
+ &local->prebuf, &local->preoldparent,
+ &local->postoldparent, &local->prenewparent,
+ &local->postnewparent, local->xattr_rsp);
+ return 0;
+}
+
+int
+shard_rename_unlink_dst_shards_do (call_frame_t *frame, xlator_t *this)
+{
+ int ret = -1;
+ uint32_t link_count = 0;
+ shard_local_t *local = NULL;
+ shard_priv_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ local->first_block = get_lowest_block (0, local->dst_block_size);
+ local->last_block = get_highest_block (0, local->postbuf.ia_size,
+ local->dst_block_size);
+ local->num_blocks = local->last_block - local->first_block + 1;
+
+ if ((local->xattr_rsp) &&
+ (!dict_get_uint32 (local->xattr_rsp, GET_LINK_COUNT, &link_count))
+ && (link_count > 1)) {
+ shard_rename_cbk (frame, this);
+ return 0;
+ }
+
+ if (local->num_blocks == 1) {
+ shard_rename_cbk (frame, this);
+ return 0;
+ }
+
+ local->inode_list = GF_CALLOC (local->num_blocks, sizeof (inode_t *),
+ gf_shard_mt_inode_list);
+ if (!local->inode_list)
+ goto out;
+
+ local->dot_shard_loc.inode = inode_find (this->itable,
+ priv->dot_shard_gfid);
+ if (!local->dot_shard_loc.inode) {
+ ret = shard_init_dot_shard_loc (this, local);
+ if (ret)
+ goto out;
+ shard_lookup_dot_shard (frame, this,
+ shard_post_resolve_unlink_handler);
+ } else {
+ shard_common_resolve_shards (frame, this, local->loc2.inode,
+ shard_post_resolve_unlink_handler);
+ }
+
+ return 0;
+
+out:
+ SHARD_STACK_UNWIND (rename, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
+ NULL, NULL);
+ return 0;
+}
+
+int
+shard_post_rename_lookup_handler (call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ SHARD_STACK_UNWIND (rename, frame, local->op_ret,
+ local->op_errno, NULL, NULL, NULL, NULL,
+ NULL, NULL);
+ return 0;
+ }
+
+ if (local->dst_block_size)
+ shard_rename_unlink_dst_shards_do (frame, this);
+ else
+ shard_rename_cbk (frame, this);
+
+ return 0;
+}
+
+int
+shard_rename_src_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto err;
+ }
+
+ local->prebuf = *buf;
+ local->preoldparent = *preoldparent;
+ local->postoldparent = *postoldparent;
+ local->prenewparent = *prenewparent;
+ local->postnewparent = *postnewparent;
+ if (xdata)
+ local->xattr_rsp = dict_ref (xdata);
+
+ /* Now the base file is looked up to gather the ia_size and ia_blocks.*/
+
+ if (local->block_size) {
+ local->tmp_loc.inode = inode_new (this->itable);
+ gf_uuid_copy (local->tmp_loc.gfid, (local->loc.inode)->gfid);
+ shard_lookup_base_file (frame, this, &local->tmp_loc,
+ shard_post_rename_lookup_handler);
+ } else {
+ shard_rename_unlink_dst_shards_do (frame, this);
+ }
+
+ return 0;
+err:
+ SHARD_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno, NULL,
+ NULL, NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+shard_rename_src_base_file (call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (dict_set_uint32 (local->xattr_req, GET_LINK_COUNT, 0))
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ SHARD_MSG_DICT_SET_FAILED, "Failed to set "
+ GET_LINK_COUNT" in dict");
+
+ /* To-Do: Request open-fd count on dst base file */
+ STACK_WIND (frame, shard_rename_src_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, &local->loc, &local->loc2,
+ local->xattr_req);
+ return 0;
+}
+
+int
+shard_post_lookup_dst_base_file_handler (call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ SHARD_STACK_UNWIND (rename, frame, local->op_ret,
+ local->op_errno, NULL, NULL, NULL, NULL,
+ NULL, NULL);
+ return 0;
+ }
+
+ /* Save dst base file attributes into postbuf so the information is not
+ * lost when it is overwritten after lookup on base file of src in
+ * shard_lookup_base_file_cbk().
+ */
+ local->postbuf = local->prebuf;
+ shard_rename_src_base_file (frame, this);
+ return 0;
+}
+
+int
+shard_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ int ret = -1;
+ uint64_t block_size = 0;
+ uint64_t dst_block_size = 0;
+ shard_local_t *local = NULL;
+
+ if (IA_ISDIR (oldloc->inode->ia_type)) {
+ STACK_WIND (frame, default_rename_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, oldloc, newloc,
+ xdata);
+ return 0;
+ }
+
+ ret = shard_inode_ctx_get_block_size (oldloc->inode, this, &block_size);
+ if ((ret) && (!IA_ISLNK (oldloc->inode->ia_type))) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ SHARD_MSG_INODE_CTX_GET_FAILED, "Failed to get block "
+ "size from inode ctx of %s",
+ uuid_utoa (oldloc->inode->gfid));
+ goto err;
+ }
+
+ if (newloc->inode)
+ ret = shard_inode_ctx_get_block_size (newloc->inode, this,
+ &dst_block_size);
+ /* The following stack_wind covers the case where:
+ * a. the src file is not sharded and dst doesn't exist, OR
+ * b. the src and dst both exist but are not sharded.
+ */
+ if (((!block_size) && (!dst_block_size)) ||
+ frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+ STACK_WIND (frame, default_rename_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, oldloc, newloc,
+ xdata);
+ return 0;
+ }
+
+ local = mem_get0 (this->local_pool);
+ if (!local)
+ goto err;
+
+ frame->local = local;
+ loc_copy (&local->loc, oldloc);
+ loc_copy (&local->loc2, newloc);
+ local->fop = GF_FOP_RENAME;
+ local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new();
+ if (!local->xattr_req)
+ goto err;
+
+ local->block_size = block_size;
+ local->dst_block_size = dst_block_size;
+ if (!this->itable)
+ this->itable = (local->loc.inode)->table;
+
+ if (local->dst_block_size)
+ /* The if block covers the case where the dst file exists and is
+ * sharded. So it is important to look up this inode, record its
+ * size, before renaming src to dst, so as to NOT lose this
+ * information.
+ */
+ shard_lookup_base_file (frame, this, &local->loc2,
+ shard_post_lookup_dst_base_file_handler);
+ else
+ /* The following block covers the case where the dst either
+ * doesn't exist or is NOT sharded. In this case, shard xlator
+ * would go ahead and rename src to dst.
+ */
+ shard_rename_src_base_file (frame, this);
+ return 0;
+
+err:
+ SHARD_STACK_UNWIND (rename, frame, -1, ENOMEM, NULL, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
+
+}
+
+
+int
+shard_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int ret = -1;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret == -1)
+ goto unwind;
+
+ ret = shard_inode_ctx_set (inode, this, stbuf,
+ ntoh64 (local->block_size), SHARD_ALL_MASK);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ SHARD_MSG_INODE_CTX_SET_FAILED, "Failed to set inode "
+ "ctx for %s", uuid_utoa (inode->gfid));
+
+unwind:
+ SHARD_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, stbuf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int
+shard_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ shard_local_t *local = NULL;
+
+ local = mem_get0 (this->local_pool);
+ if (!local)
+ goto err;
+
+ frame->local = local;
+
+ if (!__is_gsyncd_on_shard_dir (frame, loc)) {
+ SHARD_INODE_CREATE_INIT (this, local, xdata, loc, err);
+ }
+
+ STACK_WIND (frame, shard_create_cbk, FIRST_CHILD (this),
+ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask,
+ fd, xdata);
+ return 0;
+
+err:
+ SHARD_STACK_UNWIND (create, frame, -1, ENOMEM, NULL, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
+
+}
+
+int
+shard_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ /* To-Do: Handle open with O_TRUNC under locks */
+ SHARD_STACK_UNWIND (open, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+int
+shard_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ STACK_WIND (frame, shard_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
+}
+
+int
+shard_readv_do_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
+{
+ int i = 0;
+ int call_count = 0;
+ void *address = NULL;
+ uint64_t block_num = 0;
+ off_t off = 0;
+ struct iovec vec = {0,};
+ shard_local_t *local = NULL;
+ fd_t *anon_fd = cookie;
+
+ local = frame->local;
+
+ /* If shard has already seen a failure here before, there is no point
+ * in aggregating subsequent reads, so just go to out.
+ */
+ if (local->op_ret < 0)
+ goto out;
+
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto out;
+ }
+
+ if (local->op_ret >= 0)
+ local->op_ret += op_ret;
+
+ fd_ctx_get (anon_fd, this, &block_num);
+
+ if (block_num == local->first_block) {
+ address = local->iobuf->ptr;
+ } else {
+ /* else
+ * address to start writing to = beginning of buffer +
+ * number of bytes until end of first block +
+ * + block_size times number of blocks
+ * between the current block and the first
+ */
+ address = (char *) local->iobuf->ptr + (local->block_size -
+ (local->offset % local->block_size)) +
+ ((block_num - local->first_block - 1) *
+ local->block_size);
+ }
+
+ for (i = 0; i < count; i++) {
+ address = (char *) address + off;
+ memcpy (address, vector[i].iov_base, vector[i].iov_len);
+ off += vector[i].iov_len;
+ }
+
+out:
+ if (anon_fd)
+ fd_unref (anon_fd);
+ call_count = shard_call_count_return (frame);
+ if (call_count == 0) {
+ SHARD_UNSET_ROOT_FS_ID (frame, local);
+ if (local->op_ret < 0) {
+ SHARD_STACK_UNWIND (readv, frame, local->op_ret,
+ local->op_errno, NULL, 0, NULL,
+ NULL, NULL);
+ } else {
+ if (xdata)
+ local->xattr_rsp = dict_ref (xdata);
+ vec.iov_base = local->iobuf->ptr;
+ vec.iov_len = local->total_size;
+ SHARD_STACK_UNWIND (readv, frame, local->total_size,
+ local->op_errno, &vec, 1,
+ &local->prebuf, local->iobref,
+ local->xattr_rsp);
+ return 0;
+ }
+ }
+
+ return 0;
+}
+
+int
+shard_readv_do (call_frame_t *frame, xlator_t *this)
+{
+ int i = 0;
+ int ret = 0;
+ int call_count = 0;
+ int last_block = 0;
+ int cur_block = 0;
+ off_t orig_offset = 0;
+ off_t shard_offset = 0;
+ size_t read_size = 0;
+ size_t remaining_size = 0;
+ fd_t *fd = NULL;
+ fd_t *anon_fd = NULL;
+ shard_local_t *local = NULL;
+ gf_boolean_t wind_failed = _gf_false;
+
+ local = frame->local;
+ fd = local->fd;
+
+ orig_offset = local->offset;
+ cur_block = local->first_block;
+ last_block = local->last_block;
+ remaining_size = local->total_size;
+ local->call_count = call_count = local->num_blocks;
+
+ SHARD_SET_ROOT_FS_ID (frame, local);
+
+ if (fd->flags & O_DIRECT)
+ local->flags = O_DIRECT;
+
+ while (cur_block <= last_block) {
+ if (wind_failed) {
+ shard_readv_do_cbk (frame, (void *) (long) 0, this, -1,
+ ENOMEM, NULL, 0, NULL, NULL, NULL);
+ goto next;
+ }
+
+ shard_offset = orig_offset % local->block_size;
+ read_size = local->block_size - shard_offset;
+ if (read_size > remaining_size)
+ read_size = remaining_size;
+
+ remaining_size -= read_size;
+
+ if (cur_block == 0) {
+ anon_fd = fd_ref (fd);
+ } else {
+ anon_fd = fd_anonymous (local->inode_list[i]);
+ if (!anon_fd) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ wind_failed = _gf_true;
+ shard_readv_do_cbk (frame,
+ (void *) (long) anon_fd,
+ this, -1, ENOMEM, NULL, 0,
+ NULL, NULL, NULL);
+ goto next;
+ }
+ }
+
+ ret = fd_ctx_set (anon_fd, this, cur_block);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ SHARD_MSG_FD_CTX_SET_FAILED,
+ "Failed to set fd ctx for block %d, gfid=%s",
+ cur_block,
+ uuid_utoa (local->inode_list[i]->gfid));
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ wind_failed = _gf_true;
+ shard_readv_do_cbk (frame, (void *) (long) anon_fd,
+ this, -1, ENOMEM, NULL, 0, NULL,
+ NULL, NULL);
+ goto next;
+ }
+
+ STACK_WIND_COOKIE (frame, shard_readv_do_cbk, anon_fd,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, anon_fd,
+ read_size, shard_offset, local->flags,
+ local->xattr_req);
+
+ orig_offset += read_size;
+next:
+ cur_block++;
+ i++;
+ call_count--;
+ }
+ return 0;
+}
+
+int
+shard_post_lookup_shards_readv_handler (call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ SHARD_STACK_UNWIND (readv, frame, local->op_ret,
+ local->op_errno, NULL, 0, NULL, NULL, NULL);
+ return 0;
+ }
+
+ shard_readv_do (frame, this);
+
+ return 0;
+}
+
+int
+shard_post_mknod_readv_handler (call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ SHARD_STACK_UNWIND (readv, frame, local->op_ret,
+ local->op_errno, NULL, 0, NULL, NULL, NULL);
+ return 0;
+ }
+
+ if (!local->eexist_count) {
+ shard_readv_do (frame, this);
+ } else {
+ local->call_count = local->eexist_count;
+ shard_common_lookup_shards (frame, this, local->loc.inode,
+ shard_post_lookup_shards_readv_handler);
+ }
+ return 0;
+}
+
+int
+shard_common_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int shard_block_num = (long) cookie;
+ int call_count = 0;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ if (op_errno == EEXIST) {
+ local->eexist_count++;
+ } else {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
+ gf_msg_debug (this->name, 0, "mknod of shard %d "
+ "failed: %s", shard_block_num, strerror (op_errno));
+ goto done;
+ }
+
+ shard_link_block_inode (local, shard_block_num, inode, buf);
+
+done:
+ call_count = shard_call_count_return (frame);
+ if (call_count == 0) {
+ SHARD_UNSET_ROOT_FS_ID (frame, local);
+ local->post_mknod_handler (frame, this);
+ }
+
+ return 0;
+}
+
+int
+shard_common_resume_mknod (call_frame_t *frame, xlator_t *this,
+ shard_post_mknod_fop_handler_t post_mknod_handler)
+{
+ int i = 0;
+ int shard_idx_iter = 0;
+ int last_block = 0;
+ int ret = 0;
+ int call_count = 0;
+ char path[PATH_MAX] = {0,};
+ mode_t mode = 0;
+ char *bname = NULL;
+ shard_priv_t *priv = NULL;
+ shard_inode_ctx_t ctx_tmp = {0,};
+ shard_local_t *local = NULL;
+ gf_boolean_t wind_failed = _gf_false;
+ fd_t *fd = NULL;
+ loc_t loc = {0,};
+ dict_t *xattr_req = NULL;
+
+ local = frame->local;
+ priv = this->private;
+ fd = local->fd;
+ shard_idx_iter = local->first_block;
+ last_block = local->last_block;
+ call_count = local->call_count = local->create_count;
+ local->post_mknod_handler = post_mknod_handler;
+
+ SHARD_SET_ROOT_FS_ID (frame, local);
+
+ ret = shard_inode_ctx_get_all (fd->inode, this, &ctx_tmp);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ SHARD_MSG_INODE_CTX_GET_FAILED, "Failed to get inode "
+ "ctx for %s", uuid_utoa (fd->inode->gfid));
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
+ }
+ mode = st_mode_from_ia (ctx_tmp.stat.ia_prot, ctx_tmp.stat.ia_type);
+
+ while (shard_idx_iter <= last_block) {
+ if (local->inode_list[i]) {
+ shard_idx_iter++;
+ i++;
+ continue;
+ }
+
+ if (wind_failed) {
+ shard_common_mknod_cbk (frame,
+ (void *) (long) shard_idx_iter,
+ this, -1, ENOMEM, NULL, NULL,
+ NULL, NULL, NULL);
+ goto next;
+ }
+
+ shard_make_block_abspath (shard_idx_iter, fd->inode->gfid,
+ path, sizeof(path));
+
+ xattr_req = shard_create_gfid_dict (local->xattr_req);
+ if (!xattr_req) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ wind_failed = _gf_true;
+ shard_common_mknod_cbk (frame,
+ (void *) (long) shard_idx_iter,
+ this, -1, ENOMEM, NULL, NULL,
+ NULL, NULL, NULL);
+ goto next;
+ }
+
+ bname = strrchr (path, '/') + 1;
+ loc.inode = inode_new (this->itable);
+ loc.parent = inode_ref (priv->dot_shard_inode);
+ ret = inode_path (loc.parent, bname,
+ (char **) &(loc.path));
+ if (ret < 0 || !(loc.inode)) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ SHARD_MSG_INODE_PATH_FAILED, "Inode path failed"
+ "on %s, base file gfid = %s", bname,
+ uuid_utoa (fd->inode->gfid));
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ wind_failed = _gf_true;
+ loc_wipe (&loc);
+ dict_unref (xattr_req);
+ shard_common_mknod_cbk (frame,
+ (void *) (long) shard_idx_iter,
+ this, -1, ENOMEM, NULL, NULL,
+ NULL, NULL, NULL);
+ goto next;
+ }
+
+ loc.name = strrchr (loc.path, '/');
+ if (loc.name)
+ loc.name++;
+
+ STACK_WIND_COOKIE (frame, shard_common_mknod_cbk,
+ (void *) (long) shard_idx_iter,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod, &loc,
+ mode, ctx_tmp.stat.ia_rdev, 0, xattr_req);
+ loc_wipe (&loc);
+ dict_unref (xattr_req);
+
+next:
+ shard_idx_iter++;
+ i++;
+ if (!--call_count)
+ break;
+ }
+
+ return 0;
+err:
+ /*
+ * This block is for handling failure in shard_inode_ctx_get_all().
+ * Failures in the while-loop are handled within the loop.
+ */
+ SHARD_UNSET_ROOT_FS_ID (frame, local);
+ post_mknod_handler (frame, this);
+ return 0;
+}
+
+int
+shard_post_resolve_readv_handler (call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ if (local->op_errno != ENOENT) {
+ SHARD_STACK_UNWIND (readv, frame, local->op_ret,
+ local->op_errno, NULL, 0, NULL,
+ NULL, NULL);
+ return 0;
+ } else {
+ struct iovec vec = {0,};
+
+ vec.iov_base = local->iobuf->ptr;
+ vec.iov_len = local->total_size;
+ SHARD_STACK_UNWIND (readv, frame, local->total_size,
+ 0, &vec, 1, &local->prebuf,
+ local->iobref, NULL);
+ return 0;
+ }
+ }
+
+ if (local->call_count) {
+ local->create_count = local->call_count;
+ shard_common_resume_mknod (frame, this,
+ shard_post_mknod_readv_handler);
+ } else {
+ shard_readv_do (frame, this);
+ }
+
+ return 0;
+}
+
+int
+shard_post_lookup_readv_handler (call_frame_t *frame, xlator_t *this)
+{
+ int ret = 0;
+ struct iobuf *iobuf = NULL;
+ shard_local_t *local = NULL;
+ shard_priv_t *priv = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ SHARD_STACK_UNWIND (readv, frame, local->op_ret,
+ local->op_errno, NULL, 0, NULL, NULL, NULL);
+ return 0;
+ }
+
+ if (local->offset >= local->prebuf.ia_size) {
+ /* If the read is being performed past the end of the file,
+ * unwind the FOP with 0 bytes read as status.
+ */
+ struct iovec vec = {0,};
+
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool, local->req_size);
+ if (!iobuf)
+ goto err;
+
+ vec.iov_base = iobuf->ptr;
+ vec.iov_len = 0;
+ local->iobref = iobref_new ();
+ iobref_add (local->iobref, iobuf);
+ iobuf_unref (iobuf);
+
+ SHARD_STACK_UNWIND (readv, frame, 0, 0, &vec, 1, &local->prebuf,
+ local->iobref, NULL);
+ return 0;
+ }
+
+ local->first_block = get_lowest_block (local->offset,
+ local->block_size);
+
+ local->total_size = local->req_size;
+
+ local->last_block = get_highest_block (local->offset, local->total_size,
+ local->block_size);
+
+ local->num_blocks = local->last_block - local->first_block + 1;
+
+ local->inode_list = GF_CALLOC (local->num_blocks, sizeof (inode_t *),
+ gf_shard_mt_inode_list);
+ if (!local->inode_list)
+ goto err;
+
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool, local->total_size);
+ if (!iobuf)
+ goto err;
+
+ local->iobref = iobref_new ();
+ if (!local->iobref) {
+ iobuf_unref (iobuf);
+ goto err;
+ }
+
+ if (iobref_add (local->iobref, iobuf) != 0) {
+ iobuf_unref (iobuf);
+ goto err;
+ }
+
+ iobuf_unref (iobuf);
+ local->iobuf = iobuf;
+ memset (iobuf->ptr, 0, local->total_size);
+
+ local->dot_shard_loc.inode = inode_find (this->itable,
+ priv->dot_shard_gfid);
+ if (!local->dot_shard_loc.inode) {
+ ret = shard_init_dot_shard_loc (this, local);
+ if (ret)
+ goto err;
+ shard_lookup_dot_shard (frame, this,
+ shard_post_resolve_readv_handler);
+ } else {
+ shard_common_resolve_shards (frame, this, local->loc.inode,
+ shard_post_resolve_readv_handler);
+ }
+ return 0;
+
+err:
+ SHARD_STACK_UNWIND (readv, frame, -1, ENOMEM, NULL, 0, NULL, NULL,
+ NULL);
+ return 0;
+}
+
+int
+shard_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ int ret = 0;
+ uint64_t block_size = 0;
+ shard_local_t *local = NULL;
+
+ ret = shard_inode_ctx_get_block_size (fd->inode, this, &block_size);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ SHARD_MSG_INODE_CTX_GET_FAILED, "Failed to get block "
+ "size for %s from its inode ctx",
+ uuid_utoa (fd->inode->gfid));
+ goto err;
+ }
+
+ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+ /* block_size = 0 means that the file was created before
+ * sharding was enabled on the volume.
+ */
+ STACK_WIND (frame, default_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset,
+ flags, xdata);
+ return 0;
+ }
+
+ if (!this->itable)
+ this->itable = fd->inode->table;
+
+ local = mem_get0 (this->local_pool);
+ if (!local)
+ goto err;
+
+ frame->local = local;
+
+ local->fd = fd_ref (fd);
+ local->block_size = block_size;
+ local->offset = offset;
+ local->req_size = size;
+ local->flags = flags;
+ local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new ();
+ if (!local->xattr_req)
+ goto err;
+
+ local->loc.inode = inode_ref (fd->inode);
+ gf_uuid_copy (local->loc.gfid, fd->inode->gfid);
+
+ shard_lookup_base_file (frame, this, &local->loc,
+ shard_post_lookup_readv_handler);
+
+ return 0;
+
+err:
+ SHARD_STACK_UNWIND (readv, frame, -1, ENOMEM, NULL, 0, NULL, NULL,
+ NULL);
+ return 0;
+
+}
+
+int
+shard_common_inode_write_post_update_size_handler (call_frame_t *frame,
+ xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ shard_common_inode_write_failure_unwind (local->fop, frame,
+ local->op_ret,
+ local->op_errno);
+ } else {
+ shard_common_inode_write_success_unwind (local->fop, frame,
+ local->written_size);
+ }
+ return 0;
+}
+
+int
+__shard_get_delta_size_from_inode_ctx (shard_local_t *local, inode_t *inode,
+ xlator_t *this)
+{
+ int ret = -1;
+ uint64_t ctx_uint = 0;
+ shard_inode_ctx_t *ctx = NULL;
+
+ ret = __inode_ctx_get (inode, this, &ctx_uint);
+ if (ret < 0)
+ return ret;
+
+ ctx = (shard_inode_ctx_t *) ctx_uint;
+
+ if (local->offset + local->total_size > ctx->stat.ia_size) {
+ local->delta_size = (local->offset + local->total_size) -
+ ctx->stat.ia_size;
+ ctx->stat.ia_size += (local->delta_size);
+ } else {
+ local->delta_size = 0;
+ }
+ local->postbuf = ctx->stat;
+
+ return 0;
+}
+
+int
+shard_get_delta_size_from_inode_ctx (shard_local_t *local, inode_t *inode,
+ xlator_t *this)
+{
+ int ret = -1;
+
+ LOCK (&inode->lock);
+ {
+ ret = __shard_get_delta_size_from_inode_ctx (local, inode,
+ this);
+ }
+ UNLOCK (&inode->lock);
+
+ return ret;
+}
+
+int
+shard_common_inode_write_do_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ int call_count = 0;
+ fd_t *anon_fd = cookie;
+ shard_local_t *local = NULL;
+ glusterfs_fop_t fop = 0;
+
+ local = frame->local;
+ fop = local->fop;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ } else {
+ local->written_size += op_ret;
+ local->delta_blocks += (post->ia_blocks -
+ pre->ia_blocks);
+ local->delta_size += (post->ia_size - pre->ia_size);
+ shard_inode_ctx_set (local->fd->inode, this, post, 0,
+ SHARD_MASK_TIMES);
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (anon_fd)
+ fd_unref (anon_fd);
+
+ call_count = shard_call_count_return (frame);
+ if (call_count == 0) {
+ SHARD_UNSET_ROOT_FS_ID (frame, local);
+ if (local->op_ret < 0) {
+ shard_common_inode_write_failure_unwind (fop, frame,
+ local->op_ret,
+ local->op_errno);
+ } else {
+ shard_get_delta_size_from_inode_ctx (local,
+ local->fd->inode,
+ this);
+ local->hole_size = 0;
+ if (xdata)
+ local->xattr_rsp = dict_ref (xdata);
+ shard_update_file_size (frame, this, local->fd, NULL,
+ shard_common_inode_write_post_update_size_handler);
+ }
+ }
+
+ return 0;
+}
+
+int
+shard_common_inode_write_wind (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, struct iovec *vec, int count,
+ off_t shard_offset, size_t size)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ switch (local->fop) {
+ case GF_FOP_WRITE:
+ STACK_WIND_COOKIE (frame, shard_common_inode_write_do_cbk, fd,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vec,
+ count, shard_offset, local->flags,
+ local->iobref, local->xattr_req);
+ break;
+ case GF_FOP_FALLOCATE:
+ STACK_WIND_COOKIE (frame, shard_common_inode_write_do_cbk, fd,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd,
+ local->flags, shard_offset, size,
+ local->xattr_req);
+ break;
+ case GF_FOP_ZEROFILL:
+ STACK_WIND_COOKIE (frame, shard_common_inode_write_do_cbk, fd,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd,
+ shard_offset, size, local->xattr_req);
+ break;
+ case GF_FOP_DISCARD:
+ STACK_WIND_COOKIE (frame, shard_common_inode_write_do_cbk, fd,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd,
+ shard_offset, size, local->xattr_req);
+ break;
+ default:
+ gf_msg (this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
+ "Invalid fop id = %d", local->fop);
+ break;
+ }
+ return 0;
+}
+
+int
+shard_common_inode_write_do (call_frame_t *frame, xlator_t *this)
+{
+ int i = 0;
+ int count = 0;
+ int call_count = 0;
+ int last_block = 0;
+ uint32_t cur_block = 0;
+ fd_t *fd = NULL;
+ fd_t *anon_fd = NULL;
+ shard_local_t *local = NULL;
+ struct iovec *vec = NULL;
+ gf_boolean_t wind_failed = _gf_false;
+ gf_boolean_t odirect = _gf_false;
+ off_t orig_offset = 0;
+ off_t shard_offset = 0;
+ off_t vec_offset = 0;
+ size_t remaining_size = 0;
+ size_t shard_write_size = 0;
+
+ local = frame->local;
+ fd = local->fd;
+
+ orig_offset = local->offset;
+ remaining_size = local->total_size;
+ cur_block = local->first_block;
+ local->call_count = call_count = local->num_blocks;
+ last_block = local->last_block;
+
+ SHARD_SET_ROOT_FS_ID (frame, local);
+
+ if (dict_set_uint32 (local->xattr_req,
+ GLUSTERFS_WRITE_UPDATE_ATOMIC, 4)) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_SET_FAILED,
+ "Failed to set "GLUSTERFS_WRITE_UPDATE_ATOMIC" into "
+ "dict: %s", uuid_utoa (fd->inode->gfid));
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ local->call_count = 1;
+ shard_common_inode_write_do_cbk (frame, (void *)(long)0, this,
+ -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+ }
+
+ if ((fd->flags & O_DIRECT) && (local->fop == GF_FOP_WRITE))
+ odirect = _gf_true;
+
+ while (cur_block <= last_block) {
+ if (wind_failed) {
+ shard_common_inode_write_do_cbk (frame,
+ (void *) (long) 0,
+ this, -1, ENOMEM, NULL,
+ NULL, NULL);
+ goto next;
+ }
+
+ shard_offset = orig_offset % local->block_size;
+ shard_write_size = local->block_size - shard_offset;
+ if (shard_write_size > remaining_size)
+ shard_write_size = remaining_size;
+
+ remaining_size -= shard_write_size;
+
+ if (local->fop == GF_FOP_WRITE) {
+ count = iov_subset (local->vector, local->count,
+ vec_offset,
+ vec_offset + shard_write_size,
+ NULL);
+
+ vec = GF_CALLOC (count, sizeof (struct iovec),
+ gf_shard_mt_iovec);
+ if (!vec) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ wind_failed = _gf_true;
+ GF_FREE (vec);
+ shard_common_inode_write_do_cbk (frame,
+ (void *) (long) 0,
+ this, -1,
+ ENOMEM, NULL,
+ NULL, NULL);
+ goto next;
+ }
+ count = iov_subset (local->vector, local->count,
+ vec_offset,
+ vec_offset + shard_write_size, vec);
+ }
+
+ if (cur_block == 0) {
+ anon_fd = fd_ref (fd);
+ } else {
+ anon_fd = fd_anonymous (local->inode_list[i]);
+ if (!anon_fd) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ wind_failed = _gf_true;
+ GF_FREE (vec);
+ shard_common_inode_write_do_cbk (frame,
+ (void *) (long) anon_fd,
+ this, -1,
+ ENOMEM, NULL,
+ NULL, NULL);
+ goto next;
+ }
+
+ if (local->fop == GF_FOP_WRITE) {
+ if (odirect)
+ local->flags = O_DIRECT;
+ else
+ local->flags = GF_ANON_FD_FLAGS;
+ }
+ }
+
+ shard_common_inode_write_wind (frame, this, anon_fd,
+ vec, count, shard_offset,
+ shard_write_size);
+ if (vec)
+ vec_offset += shard_write_size;
+ orig_offset += shard_write_size;
+ GF_FREE (vec);
+ vec = NULL;
+next:
+ cur_block++;
+ i++;
+ call_count--;
+ }
+ return 0;
+}
+
+int
+shard_common_inode_write_post_lookup_shards_handler (call_frame_t *frame,
+ xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ shard_common_inode_write_failure_unwind (local->fop, frame,
+ local->op_ret,
+ local->op_errno);
+ return 0;
+ }
+
+ shard_common_inode_write_do (frame, this);
+
+ return 0;
+}
+
+int
+shard_common_inode_write_post_mknod_handler (call_frame_t *frame,
+ xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ shard_common_inode_write_failure_unwind (local->fop, frame,
+ local->op_ret,
+ local->op_errno);
+ return 0;
+ }
+
+ if (!local->eexist_count) {
+ shard_common_inode_write_do (frame, this);
+ } else {
+ local->call_count = local->eexist_count;
+ shard_common_lookup_shards (frame, this, local->loc.inode,
+ shard_common_inode_write_post_lookup_shards_handler);
+ }
+
+ return 0;
+}
+
+int
+shard_common_inode_write_post_lookup_handler (call_frame_t *frame,
+ xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ shard_common_inode_write_failure_unwind (local->fop, frame,
+ local->op_ret,
+ local->op_errno);
+ return 0;
+ }
+
+ local->postbuf = local->prebuf;
+
+ if (local->create_count)
+ shard_common_resume_mknod (frame, this,
+ shard_common_inode_write_post_mknod_handler);
+ else
+ shard_common_inode_write_do (frame, this);
+
+ return 0;
+}
+
+int
+shard_common_inode_write_post_resolve_handler (call_frame_t *frame,
+ xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ shard_common_inode_write_failure_unwind (local->fop, frame,
+ local->op_ret,
+ local->op_errno);
+ return 0;
+ }
+
+ local->create_count = local->call_count;
+
+ shard_lookup_base_file (frame, this, &local->loc,
+ shard_common_inode_write_post_lookup_handler);
+ return 0;
+}
+
+int
+shard_mkdir_dot_shard_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ SHARD_UNSET_ROOT_FS_ID (frame, local);
+
+ if (op_ret == -1) {
+ if (op_errno != EEXIST) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto unwind;
+ } else {
+ gf_msg_debug (this->name, 0, "mkdir on /.shard failed "
+ "with EEXIST. Attempting lookup now");
+ shard_lookup_dot_shard (frame, this,
+ local->post_res_handler);
+ return 0;
+ }
+ }
+
+ shard_link_dot_shard_inode (local, inode, buf);
+
+unwind:
+ shard_common_resolve_shards (frame, this, local->loc.inode,
+ local->post_res_handler);
+ return 0;
+}
+
+int
+shard_mkdir_dot_shard (call_frame_t *frame, xlator_t *this,
+ shard_post_resolve_fop_handler_t handler)
+{
+ int ret = -1;
+ shard_local_t *local = NULL;
+ shard_priv_t *priv = NULL;
+ dict_t *xattr_req = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ local->post_res_handler = handler;
+
+ xattr_req = dict_new ();
+ if (!xattr_req)
+ goto err;
+
+ ret = shard_init_dot_shard_loc (this, local);
+ if (ret)
+ goto err;
+
+ ret = dict_set_static_bin (xattr_req, "gfid-req", priv->dot_shard_gfid,
+ 16);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_SET_FAILED,
+ "Failed to set gfid-req for /.shard");
+ goto err;
+ }
+
+ SHARD_SET_ROOT_FS_ID (frame, local);
+
+ STACK_WIND (frame, shard_mkdir_dot_shard_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir,
+ &local->dot_shard_loc, 0755, 0, xattr_req);
+ dict_unref (xattr_req);
+ return 0;
+
+err:
+ if (xattr_req)
+ dict_unref (xattr_req);
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ handler (frame, this);
+ return 0;
+}
+
+int
+shard_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ /* To-Do: Wind flush on all shards of the file */
+ SHARD_STACK_UNWIND (flush, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+shard_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ STACK_WIND (frame, shard_flush_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush, fd, xdata);
+ return 0;
+}
+
+int
+shard_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ if (op_ret < 0)
+ goto out;
+
+ /* To-Do: Wind fsync on all shards of the file */
+ postbuf->ia_ctime = 0;
+out:
+ SHARD_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+int
+shard_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
+{
+ STACK_WIND (frame, shard_fsync_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata);
+ return 0;
+}
+
+int
+shard_readdir_past_dot_shard_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, gf_dirent_t *orig_entries,
+ dict_t *xdata)
+{
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ list_for_each_entry_safe (entry, tmp, (&orig_entries->list), list) {
+
+ list_del_init (&entry->list);
+ list_add_tail (&entry->list, &local->entries_head.list);
+
+ if (!entry->dict)
+ continue;
+
+ if (IA_ISDIR (entry->d_stat.ia_type))
+ continue;
+
+ if (dict_get (entry->dict, GF_XATTR_SHARD_FILE_SIZE))
+ shard_modify_size_and_block_count (&entry->d_stat,
+ entry->dict);
+ if (!entry->inode)
+ continue;
+
+ shard_inode_ctx_update (entry->inode, this, entry->dict,
+ &entry->d_stat);
+ }
+ local->op_ret += op_ret;
+
+unwind:
+ if (local->fop == GF_FOP_READDIR)
+ SHARD_STACK_UNWIND (readdir, frame, local->op_ret,
+ local->op_errno,
+ &local->entries_head, xdata);
+ else
+ SHARD_STACK_UNWIND (readdirp, frame, op_ret, op_errno,
+ &local->entries_head, xdata);
+ return 0;
+}
+
+int32_t
+shard_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *orig_entries,
+ dict_t *xdata)
+{
+ fd_t *fd = NULL;
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+ shard_local_t *local = NULL;
+ gf_boolean_t last_entry = _gf_false;
+
+ local = frame->local;
+ fd = local->fd;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ list_for_each_entry_safe (entry, tmp, (&orig_entries->list), list) {
+ if (last_entry)
+ last_entry = _gf_false;
+
+ if (__is_root_gfid (fd->inode->gfid) &&
+ !(strcmp (entry->d_name, GF_SHARD_DIR))) {
+ local->offset = entry->d_off;
+ op_ret--;
+ last_entry = _gf_true;
+ continue;
+ }
+
+ list_del_init (&entry->list);
+ list_add_tail (&entry->list, &local->entries_head.list);
+
+ if (!entry->dict)
+ continue;
+
+ if (IA_ISDIR (entry->d_stat.ia_type))
+ continue;
+
+ if (dict_get (entry->dict, GF_XATTR_SHARD_FILE_SIZE) &&
+ frame->root->pid != GF_CLIENT_PID_GSYNCD)
+ shard_modify_size_and_block_count (&entry->d_stat,
+ entry->dict);
+
+ if (!entry->inode)
+ continue;
+
+ shard_inode_ctx_update (entry->inode, this, entry->dict,
+ &entry->d_stat);
+ }
+
+ local->op_ret = op_ret;
+
+ if (last_entry) {
+ if (local->fop == GF_FOP_READDIR)
+ STACK_WIND (frame, shard_readdir_past_dot_shard_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdir, local->fd,
+ local->readdir_size, local->offset,
+ local->xattr_req);
+ else
+ STACK_WIND (frame, shard_readdir_past_dot_shard_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp,
+ local->fd, local->readdir_size,
+ local->offset, local->xattr_req);
+ return 0;
+ }
+
+unwind:
+ if (local->fop == GF_FOP_READDIR)
+ SHARD_STACK_UNWIND (readdir, frame, op_ret, op_errno,
+ &local->entries_head, xdata);
+ else
+ SHARD_STACK_UNWIND (readdirp, frame, op_ret, op_errno,
+ &local->entries_head, xdata);
+ return 0;
+}
+
+
+int
+shard_readdir_do (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, int whichop, dict_t *xdata)
+{
+ int ret = 0;
+ shard_local_t *local = NULL;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
+ }
+
+ frame->local = local;
+
+ local->fd = fd_ref (fd);
+ local->fop = whichop;
+ local->readdir_size = size;
+ INIT_LIST_HEAD (&local->entries_head.list);
+ local->list_inited = _gf_true;
+
+ if (whichop == GF_FOP_READDIR) {
+ STACK_WIND (frame, shard_readdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdir, fd, size, offset,
+ xdata);
+ } else {
+ local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new ();
+ SHARD_MD_READ_FOP_INIT_REQ_DICT (this, local->xattr_req,
+ fd->inode->gfid, local, err);
+ ret = dict_set_uint64 (local->xattr_req,
+ GF_XATTR_SHARD_BLOCK_SIZE, 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "Failed to set "
+ "dict value: key:%s, directory gfid=%s",
+ GF_XATTR_SHARD_BLOCK_SIZE,
+ uuid_utoa (fd->inode->gfid));
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
+ }
+
+ STACK_WIND (frame, shard_readdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, offset,
+ local->xattr_req);
+ }
+
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT (readdir, frame, local->op_ret, local->op_errno,
+ NULL, NULL);
+ return 0;
+
+}
+
+
+int32_t
+shard_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
+{
+ shard_readdir_do (frame, this, fd, size, offset, GF_FOP_READDIR, xdata);
+ return 0;
+}
+
+
+int32_t
+shard_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
+{
+ shard_readdir_do (frame, this, fd, size, offset, GF_FOP_READDIRP,
+ xdata);
+ return 0;
+}
+
+int32_t
+shard_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ int op_errno = EINVAL;
+
+ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
+ GF_IF_NATIVE_XATTR_GOTO (SHARD_XATTR_PREFIX"*",
+ name, op_errno, out);
+ }
+
+ if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
+ dict_del (xdata, GF_XATTR_SHARD_BLOCK_SIZE);
+ dict_del (xdata, GF_XATTR_SHARD_FILE_SIZE);
+ }
+
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name,
+ xdata);
+ return 0;
+
+out:
+ SHARD_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
+ return 0;
+}
+
+int32_t
+shard_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ int op_errno = EINVAL;
+
+ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
+ GF_IF_NATIVE_XATTR_GOTO (SHARD_XATTR_PREFIX"*",
+ name, op_errno, out);
+ }
+
+ if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
+ dict_del (xdata, GF_XATTR_SHARD_BLOCK_SIZE);
+ dict_del (xdata, GF_XATTR_SHARD_FILE_SIZE);
+ }
+
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, fd, name,
+ xdata);
+ return 0;
+
+out:
+ SHARD_STACK_UNWIND (fremovexattr, frame, -1, op_errno, NULL);
+ return 0;
+}
+
+int32_t
+shard_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ if (op_ret < 0)
+ goto unwind;
+
+ if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
+ dict_del (dict, GF_XATTR_SHARD_BLOCK_SIZE);
+ dict_del (dict, GF_XATTR_SHARD_FILE_SIZE);
+ }
+
+unwind:
+ SHARD_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+int32_t
+shard_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ int op_errno = EINVAL;
+
+ if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) &&
+ (name) && (!strncmp (name, SHARD_XATTR_PREFIX,
+ strlen (SHARD_XATTR_PREFIX)))) {
+ op_errno = ENODATA;
+ goto out;
+ }
+
+ STACK_WIND (frame, shard_fgetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
+ return 0;
+
+out:
+ SHARD_STACK_UNWIND (fgetxattr, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+
+int32_t
+shard_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ if (op_ret < 0)
+ goto unwind;
+
+ if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
+ dict_del (dict, GF_XATTR_SHARD_BLOCK_SIZE);
+ dict_del (dict, GF_XATTR_SHARD_FILE_SIZE);
+ }
+
+unwind:
+ SHARD_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+int32_t
+shard_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ int op_errno = EINVAL;
+
+ if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) &&
+ (name) && (!strncmp (name, SHARD_XATTR_PREFIX,
+ strlen (SHARD_XATTR_PREFIX)))) {
+ op_errno = ENODATA;
+ goto out;
+ }
+
+ STACK_WIND (frame, shard_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
+ return 0;
+
+out:
+ SHARD_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+int32_t
+shard_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ int op_errno = EINVAL;
+
+ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
+ GF_IF_INTERNAL_XATTR_GOTO (SHARD_XATTR_PREFIX"*", dict,
+ op_errno, out);
+ }
+
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags,
+ xdata);
+ return 0;
+
+out:
+ SHARD_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL);
+ return 0;
+}
+
+int32_t
+shard_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ int op_errno = EINVAL;
+
+ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
+ GF_IF_INTERNAL_XATTR_GOTO (SHARD_XATTR_PREFIX"*", dict,
+ op_errno, out);
+ }
+
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags,
+ xdata);
+ return 0;
+
+out:
+ SHARD_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
+ return 0;
+}
+
+int
+shard_post_setattr_handler (call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->fop == GF_FOP_SETATTR) {
+ if (local->op_ret >= 0)
+ shard_inode_ctx_set (local->loc.inode, this,
+ &local->postbuf, 0,
+ SHARD_LOOKUP_MASK);
+ SHARD_STACK_UNWIND (setattr, frame, local->op_ret,
+ local->op_errno, &local->prebuf,
+ &local->postbuf, local->xattr_rsp);
+ } else if (local->fop == GF_FOP_FSETATTR) {
+ if (local->op_ret >= 0)
+ shard_inode_ctx_set (local->fd->inode, this,
+ &local->postbuf, 0,
+ SHARD_LOOKUP_MASK);
+ SHARD_STACK_UNWIND (fsetattr, frame, local->op_ret,
+ local->op_errno, &local->prebuf,
+ &local->postbuf, local->xattr_rsp);
+ }
+
+ return 0;
+}
+
+int
+shard_common_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto unwind;
+ }
+
+ local->prebuf = *prebuf;
+ if (shard_modify_size_and_block_count (&local->prebuf, xdata)) {
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+ goto unwind;
+ }
+ if (xdata)
+ local->xattr_rsp = dict_ref (xdata);
+ local->postbuf = *postbuf;
+ local->postbuf.ia_size = local->prebuf.ia_size;
+ local->postbuf.ia_blocks = local->prebuf.ia_blocks;
+
+unwind:
+ local->handler (frame, this);
+ return 0;
+}
+
+int
+shard_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ int ret = -1;
+ uint64_t block_size = 0;
+ shard_local_t *local = NULL;
+
+ if ((IA_ISDIR (loc->inode->ia_type)) ||
+ (IA_ISLNK (loc->inode->ia_type))) {
+ STACK_WIND (frame, default_setattr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setattr, loc, stbuf,
+ valid, xdata);
+ return 0;
+ }
+
+ ret = shard_inode_ctx_get_block_size (loc->inode, this, &block_size);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ SHARD_MSG_INODE_CTX_GET_FAILED,
+ "Failed to get block size from inode ctx of %s",
+ uuid_utoa (loc->inode->gfid));
+ goto err;
+ }
+
+ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+ STACK_WIND (frame, default_setattr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setattr, loc, stbuf,
+ valid, xdata);
+ return 0;
+ }
+
+ local = mem_get0 (this->local_pool);
+ if (!local)
+ goto err;
+
+ frame->local = local;
+
+ local->handler = shard_post_setattr_handler;
+ local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new ();
+ if (!local->xattr_req)
+ goto err;
+ local->fop = GF_FOP_SETATTR;
+ loc_copy (&local->loc, loc);
+
+ SHARD_MD_READ_FOP_INIT_REQ_DICT (this, local->xattr_req,
+ local->loc.gfid, local, err);
+
+ STACK_WIND (frame, shard_common_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid,
+ local->xattr_req);
+
+ return 0;
+
+err:
+ SHARD_STACK_UNWIND (setattr, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+shard_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ int ret = -1;
+ uint64_t block_size = 0;
+ shard_local_t *local = NULL;
+
+ if ((IA_ISDIR (fd->inode->ia_type)) ||
+ (IA_ISLNK (fd->inode->ia_type))) {
+ STACK_WIND (frame, default_fsetattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD (this)->fops->fsetattr, fd, stbuf,
+ valid, xdata);
+ return 0;
+ }
+
+ ret = shard_inode_ctx_get_block_size (fd->inode, this, &block_size);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ SHARD_MSG_INODE_CTX_GET_FAILED,
+ "Failed to get block size from inode ctx of %s",
+ uuid_utoa (fd->inode->gfid));
+ goto err;
+ }
+
+ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+ STACK_WIND (frame, default_fsetattr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fsetattr, fd, stbuf,
+ valid, xdata);
+ return 0;
+ }
+
+ if (!this->itable)
+ this->itable = fd->inode->table;
+
+ local = mem_get0 (this->local_pool);
+ if (!local)
+ goto err;
+
+ frame->local = local;
+
+ local->handler = shard_post_setattr_handler;
+ local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new ();
+ if (!local->xattr_req)
+ goto err;
+ local->fop = GF_FOP_FSETATTR;
+ local->fd = fd_ref (fd);
+
+ SHARD_MD_READ_FOP_INIT_REQ_DICT (this, local->xattr_req,
+ fd->inode->gfid, local, err);
+
+ STACK_WIND (frame, shard_common_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid,
+ local->xattr_req);
+ return 0;
+
+err:
+ SHARD_STACK_UNWIND (fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+shard_common_inode_write_begin (call_frame_t *frame, xlator_t *this,
+ glusterfs_fop_t fop, fd_t *fd,
+ struct iovec *vector, int32_t count,
+ off_t offset, uint32_t flags, size_t len,
+ struct iobref *iobref, dict_t *xdata)
+{
+ int ret = 0;
+ int i = 0;
+ uint64_t block_size = 0;
+ shard_local_t *local = NULL;
+ shard_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ ret = shard_inode_ctx_get_block_size (fd->inode, this, &block_size);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ SHARD_MSG_INODE_CTX_GET_FAILED, "Failed to get block "
+ "size for %s from its inode ctx",
+ uuid_utoa (fd->inode->gfid));
+ goto out;
+ }
+
+ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+ /* block_size = 0 means that the file was created before
+ * sharding was enabled on the volume.
+ */
+ switch (fop) {
+ case GF_FOP_WRITE:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd,
+ vector, count, offset, flags, iobref,
+ xdata);
+ break;
+ case GF_FOP_FALLOCATE:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd,
+ flags, offset, len, xdata);
+ break;
+ case GF_FOP_ZEROFILL:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill,
+ fd, offset, len, xdata);
+ break;
+ case GF_FOP_DISCARD:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard,
+ fd, offset, len, xdata);
+ break;
+ default:
+ gf_msg (this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
+ "Invalid fop id = %d", fop);
+ break;
+ }
+ return 0;
+ }
+
+ if (!this->itable)
+ this->itable = fd->inode->table;
+
+ local = mem_get0 (this->local_pool);
+ if (!local)
+ goto out;
+
+ frame->local = local;
+
+ local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new ();
+ if (!local->xattr_req)
+ goto out;
+
+ if (vector) {
+ local->vector = iov_dup (vector, count);
+ if (!local->vector)
+ goto out;
+ for (i = 0; i < count; i++)
+ local->total_size += vector[i].iov_len;
+ local->count = count;
+ } else {
+ local->total_size = len;
+ }
+
+ local->fop = fop;
+ local->offset = offset;
+ local->flags = flags;
+ if (iobref)
+ local->iobref = iobref_ref (iobref);
+ local->fd = fd_ref (fd);
+ local->block_size = block_size;
+ local->first_block = get_lowest_block (offset, local->block_size);
+ local->last_block = get_highest_block (offset, local->total_size,
+ local->block_size);
+ local->num_blocks = local->last_block - local->first_block + 1;
+ local->inode_list = GF_CALLOC (local->num_blocks, sizeof (inode_t *),
+ gf_shard_mt_inode_list);
+ if (!local->inode_list)
+ goto out;
+
+ local->loc.inode = inode_ref (fd->inode);
+ gf_uuid_copy (local->loc.gfid, fd->inode->gfid);
+
+ gf_msg_trace (this->name, 0, "%s: gfid=%s first_block=%"PRIu32" "
+ "last_block=%"PRIu32" num_blocks=%"PRIu32" offset=%"PRId64""
+ " total_size=%zu flags=%"PRId32"", gf_fop_list[fop],
+ uuid_utoa (fd->inode->gfid), local->first_block,
+ local->last_block, local->num_blocks, offset,
+ local->total_size, local->flags);
+
+ local->dot_shard_loc.inode = inode_find (this->itable,
+ priv->dot_shard_gfid);
+
+ if (!local->dot_shard_loc.inode)
+ shard_mkdir_dot_shard (frame, this,
+ shard_common_inode_write_post_resolve_handler);
+ else
+ shard_common_resolve_shards (frame, this, local->loc.inode,
+ shard_common_inode_write_post_resolve_handler);
+
+ return 0;
+out:
+ shard_common_inode_write_failure_unwind (fop, frame, -1, ENOMEM);
+ return 0;
+}
+
+int
+shard_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ shard_common_inode_write_begin (frame, this, GF_FOP_WRITE, fd, vector,
+ count, offset, flags, 0, iobref, xdata);
+ return 0;
+}
+
+int
+shard_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t keep_size, off_t offset, size_t len, dict_t *xdata)
+{
+ if ((keep_size != 0) && (keep_size != FALLOC_FL_ZERO_RANGE) &&
+ (keep_size != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)))
+ goto out;
+
+ shard_common_inode_write_begin (frame, this, GF_FOP_FALLOCATE, fd, NULL,
+ 0, offset, keep_size, len, NULL, xdata);
+ return 0;
+
+out:
+ SHARD_STACK_UNWIND (fallocate, frame, -1, ENOTSUP, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+shard_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ shard_common_inode_write_begin (frame, this, GF_FOP_ZEROFILL, fd, NULL,
+ 0, offset, 0, len, NULL, xdata);
+ return 0;
+}
+
+int
+shard_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ shard_common_inode_write_begin (frame, this, GF_FOP_DISCARD, fd, NULL,
+ 0, offset, 0, len, NULL, xdata);
+ return 0;
+}
+
+int32_t
+shard_seek (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata)
+{
+ /* TBD */
+ gf_msg (this->name, GF_LOG_INFO, ENOTSUP, SHARD_MSG_FOP_NOT_SUPPORTED,
+ "seek called on %s.", uuid_utoa (fd->inode->gfid));
+ SHARD_STACK_UNWIND (seek, frame, -1, ENOTSUP, 0, NULL);
+ return 0;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_shard_mt_end + 1);
+
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ SHARD_MSG_MEM_ACCT_INIT_FAILED, "Memory accounting init"
+ "failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+int
+init (xlator_t *this)
+{
+ int ret = -1;
+ shard_priv_t *priv = NULL;
+
+ if (!this) {
+ gf_msg ("shard", GF_LOG_ERROR, 0, SHARD_MSG_NULL_THIS,
+ "this is NULL. init() failed");
+ goto out;
+ }
+
+ if (!this->parents) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE,
+ "Dangling volume. Check volfile");
+ goto out;
+ }
+
+ if (!this->children || this->children->next) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE,
+ "shard not configured with exactly one sub-volume. "
+ "Check volfile");
+ goto out;
+ }
+
+ priv = GF_CALLOC (1, sizeof (shard_priv_t), gf_shard_mt_priv_t);
+ if (!priv)
+ goto out;
+
+ GF_OPTION_INIT ("shard-block-size", priv->block_size, size_uint64, out);
+
+ this->local_pool = mem_pool_new (shard_local_t, 128);
+ if (!this->local_pool) {
+ ret = -1;
+ goto out;
+ }
+ gf_uuid_parse (SHARD_ROOT_GFID, priv->dot_shard_gfid);
+
+ this->private = priv;
+ LOCK_INIT (&priv->lock);
+ INIT_LIST_HEAD (&priv->ilist_head);
+ ret = 0;
+out:
+ if (ret) {
+ GF_FREE (priv);
+ mem_pool_destroy (this->local_pool);
+ }
+
+ return ret;
+
+}
+
+void
+fini (xlator_t *this)
+{
+ shard_priv_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("shard", this, out);
+
+ mem_pool_destroy (this->local_pool);
+ this->local_pool = NULL;
+
+ priv = this->private;
+ if (!priv)
+ goto out;
+
+ this->private = NULL;
+ LOCK_DESTROY (&priv->lock);
+ GF_FREE (priv);
+
+out:
+ return;
+}
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ int ret = -1;
+ shard_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ GF_OPTION_RECONF ("shard-block-size", priv->block_size, options, size,
+ out);
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int
+shard_forget (xlator_t *this, inode_t *inode)
+{
+ uint64_t ctx_uint = 0;
+ shard_inode_ctx_t *ctx = NULL;
+
+ inode_ctx_del (inode, this, &ctx_uint);
+ if (!ctx_uint)
+ return 0;
+
+ ctx = (shard_inode_ctx_t *)ctx_uint;
+
+ GF_FREE (ctx);
+
+ return 0;
+}
+
+int
+shard_release (xlator_t *this, fd_t *fd)
+{
+ /* TBD */
+ return 0;
+}
+
+int
+shard_priv_dump (xlator_t *this)
+{
+ shard_priv_t *priv = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0,};
+
+ priv = this->private;
+
+ snprintf (key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type,
+ this->name);
+ gf_proc_dump_add_section (key_prefix);
+ gf_proc_dump_write ("shard-block-size", "%s",
+ gf_uint64_2human_readable (priv->block_size));
+ gf_proc_dump_write ("inode-count", "%d", priv->inode_count);
+ gf_proc_dump_write ("ilist_head", "%p", &priv->ilist_head);
+ gf_proc_dump_write ("lru-max-limit", "%d", SHARD_MAX_INODES);
+
+ return 0;
+}
+
+int
+shard_releasedir (xlator_t *this, fd_t *fd)
+{
+ return 0;
+}
+
+struct xlator_fops fops = {
+ .lookup = shard_lookup,
+ .open = shard_open,
+ .flush = shard_flush,
+ .fsync = shard_fsync,
+ .stat = shard_stat,
+ .fstat = shard_fstat,
+ .getxattr = shard_getxattr,
+ .fgetxattr = shard_fgetxattr,
+ .readv = shard_readv,
+ .writev = shard_writev,
+ .truncate = shard_truncate,
+ .ftruncate = shard_ftruncate,
+ .setxattr = shard_setxattr,
+ .fsetxattr = shard_fsetxattr,
+ .setattr = shard_setattr,
+ .fsetattr = shard_fsetattr,
+ .removexattr = shard_removexattr,
+ .fremovexattr = shard_fremovexattr,
+ .fallocate = shard_fallocate,
+ .discard = shard_discard,
+ .zerofill = shard_zerofill,
+ .readdir = shard_readdir,
+ .readdirp = shard_readdirp,
+ .create = shard_create,
+ .mknod = shard_mknod,
+ .link = shard_link,
+ .unlink = shard_unlink,
+ .rename = shard_rename,
+ .seek = shard_seek,
+};
+
+struct xlator_cbks cbks = {
+ .forget = shard_forget,
+ .release = shard_release,
+ .releasedir = shard_releasedir,
+};
+
+struct xlator_dumpops dumpops = {
+ .priv = shard_priv_dump,
+};
+
+struct volume_options options[] = {
+ { .key = {"shard-block-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .default_value = "4MB",
+ .min = SHARD_MIN_BLOCK_SIZE,
+ .max = SHARD_MAX_BLOCK_SIZE,
+ .description = "The size unit used to break a file into multiple "
+ "chunks",
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h
new file mode 100644
index 00000000000..8303a2ca030
--- /dev/null
+++ b/xlators/features/shard/src/shard.h
@@ -0,0 +1,272 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef __SHARD_H__
+#define __SHARD_H__
+
+#include "xlator.h"
+#include "compat-errno.h"
+#include "shard-messages.h"
+
+#define GF_SHARD_DIR ".shard"
+#define SHARD_MIN_BLOCK_SIZE (4 * GF_UNIT_MB)
+#define SHARD_MAX_BLOCK_SIZE (4 * GF_UNIT_TB)
+#define SHARD_XATTR_PREFIX "trusted.glusterfs.shard."
+#define GF_XATTR_SHARD_BLOCK_SIZE "trusted.glusterfs.shard.block-size"
+#define SHARD_INODE_LRU_LIMIT 4096
+#define SHARD_MAX_INODES 16384
+/**
+ * Bit masks for the valid flag, which is used while updating ctx
+**/
+#define SHARD_MASK_BLOCK_SIZE (1 << 0)
+#define SHARD_MASK_PROT (1 << 1)
+#define SHARD_MASK_NLINK (1 << 2)
+#define SHARD_MASK_UID (1 << 3)
+#define SHARD_MASK_GID (1 << 4)
+#define SHARD_MASK_SIZE (1 << 6)
+#define SHARD_MASK_BLOCKS (1 << 7)
+#define SHARD_MASK_TIMES (1 << 8)
+#define SHARD_MASK_OTHERS (1 << 9)
+#define SHARD_MASK_REFRESH_RESET (1 << 10)
+
+#define SHARD_INODE_WRITE_MASK (SHARD_MASK_SIZE | SHARD_MASK_BLOCKS \
+ | SHARD_MASK_TIMES)
+
+#define SHARD_LOOKUP_MASK (SHARD_MASK_PROT | SHARD_MASK_NLINK | SHARD_MASK_UID \
+ | SHARD_MASK_GID | SHARD_MASK_TIMES \
+ | SHARD_MASK_OTHERS)
+
+#define SHARD_ALL_MASK (SHARD_MASK_BLOCK_SIZE | SHARD_MASK_PROT \
+ | SHARD_MASK_NLINK | SHARD_MASK_UID | SHARD_MASK_GID \
+ | SHARD_MASK_SIZE | SHARD_MASK_BLOCKS \
+ | SHARD_MASK_TIMES | SHARD_MASK_OTHERS)
+
+
+#define get_lowest_block(off, shard_size) ((off) / (shard_size))
+#define get_highest_block(off, len, shard_size) \
+ (((((off)+(len)) == 0)?0:((off)+(len)-1)) / (shard_size))
+
+#define SHARD_ENTRY_FOP_CHECK(loc, op_errno, label) do { \
+ if ((loc->name && !strcmp (GF_SHARD_DIR, loc->name)) && \
+ (((loc->parent) && \
+ __is_root_gfid (loc->parent->gfid)) || \
+ __is_root_gfid (loc->pargfid))) { \
+ op_errno = EPERM; \
+ goto label; \
+ } \
+ \
+ if ((loc->parent && \
+ __is_shard_dir (loc->parent->gfid)) || \
+ __is_shard_dir (loc->pargfid)) { \
+ op_errno = EPERM; \
+ goto label; \
+ } \
+} while (0)
+
+#define SHARD_INODE_OP_CHECK(gfid, err, label) do { \
+ if (__is_shard_dir(gfid)) { \
+ err = EPERM; \
+ goto label; \
+ } \
+} while (0)
+
+#define SHARD_STACK_UNWIND(fop, frame, params ...) do { \
+ shard_local_t *__local = NULL; \
+ if (frame) { \
+ __local = frame->local; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT (fop, frame, params); \
+ if (__local) { \
+ shard_local_wipe (__local); \
+ mem_put (__local); \
+ } \
+} while (0)
+
+
+#define SHARD_INODE_CREATE_INIT(this, local, xattr_req, loc, label) do { \
+ int __ret = -1; \
+ int64_t *__size_attr = NULL; \
+ shard_priv_t *__priv = NULL; \
+ \
+ __priv = this->private; \
+ \
+ local->block_size = hton64 (__priv->block_size); \
+ __ret = dict_set_static_bin (xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, \
+ &local->block_size, \
+ sizeof (local->block_size)); \
+ if (__ret) { \
+ gf_msg (this->name, GF_LOG_WARNING, 0, \
+ SHARD_MSG_DICT_SET_FAILED, "Failed to set key: %s " \
+ "on path %s", GF_XATTR_SHARD_BLOCK_SIZE, loc->path); \
+ goto label; \
+ } \
+ \
+ __ret = shard_set_size_attrs (0, 0, &__size_attr); \
+ if (__ret) \
+ goto label; \
+ \
+ __ret = dict_set_bin (xattr_req, GF_XATTR_SHARD_FILE_SIZE, \
+ __size_attr, 8 * 4); \
+ if (__ret) { \
+ gf_msg (this->name, GF_LOG_WARNING, 0, \
+ SHARD_MSG_DICT_SET_FAILED, "Failed to set key: %s " \
+ "on path %s", GF_XATTR_SHARD_FILE_SIZE, loc->path); \
+ GF_FREE (__size_attr); \
+ goto label; \
+ } \
+} while (0)
+
+
+#define SHARD_MD_READ_FOP_INIT_REQ_DICT(this, dict, gfid, local, label) do { \
+ int __ret = -1; \
+ \
+ __ret = dict_set_uint64 (dict, GF_XATTR_SHARD_FILE_SIZE, 8 * 4); \
+ if (__ret) { \
+ local->op_ret = -1; \
+ local->op_errno = ENOMEM; \
+ gf_msg (this->name, GF_LOG_WARNING, 0, \
+ SHARD_MSG_DICT_SET_FAILED, "Failed to set dict value:"\
+ " key:%s for %s.", GF_XATTR_SHARD_FILE_SIZE, \
+ uuid_utoa (gfid)); \
+ goto label; \
+ } \
+} while (0)
+
+#define SHARD_SET_ROOT_FS_ID(frame, local) do { \
+ if (!local->is_set_fsid) { \
+ local->uid = frame->root->uid; \
+ local->gid = frame->root->gid; \
+ frame->root->uid = 0; \
+ frame->root->gid = 0; \
+ local->is_set_fsid = _gf_true; \
+ } \
+} while (0)
+
+#define SHARD_UNSET_ROOT_FS_ID(frame, local) do { \
+ if (local->is_set_fsid) { \
+ frame->root->uid = local->uid; \
+ frame->root->gid = local->gid; \
+ local->is_set_fsid = _gf_false; \
+ } \
+} while (0)
+
+#define SHARD_TIME_UPDATE(ctx_sec, ctx_nsec, new_sec, new_nsec) do { \
+ if (ctx_sec == new_sec) \
+ ctx_nsec = new_nsec = max (new_nsec, ctx_nsec); \
+ else if (ctx_sec > new_sec) { \
+ new_sec = ctx_sec; \
+ new_nsec = ctx_nsec; \
+ } else { \
+ ctx_sec = new_sec; \
+ ctx_nsec = new_nsec; \
+ } \
+ } while (0)
+
+
+typedef struct shard_priv {
+ uint64_t block_size;
+ uuid_t dot_shard_gfid;
+ inode_t *dot_shard_inode;
+ gf_lock_t lock;
+ int inode_count;
+ struct list_head ilist_head;
+} shard_priv_t;
+
+typedef struct {
+ loc_t *loc;
+ short type;
+ char *domain;
+} shard_lock_t;
+
+typedef int32_t (*shard_post_fop_handler_t) (call_frame_t *frame,
+ xlator_t *this);
+typedef int32_t (*shard_post_resolve_fop_handler_t) (call_frame_t *frame,
+ xlator_t *this);
+typedef int32_t (*shard_post_lookup_shards_fop_handler_t) (call_frame_t *frame,
+ xlator_t *this);
+
+typedef int32_t (*shard_post_mknod_fop_handler_t) (call_frame_t *frame,
+ xlator_t *this);
+
+typedef int32_t (*shard_post_update_size_fop_handler_t) (call_frame_t *frame,
+ xlator_t *this);
+typedef struct shard_local {
+ int op_ret;
+ int op_errno;
+ int first_block;
+ int last_block;
+ int num_blocks;
+ int call_count;
+ int eexist_count;
+ int create_count;
+ int xflag;
+ int count;
+ uint32_t flags;
+ uint32_t uid;
+ uint32_t gid;
+ uint64_t block_size;
+ uint64_t dst_block_size;
+ off_t offset;
+ size_t total_size;
+ size_t written_size;
+ size_t hole_size;
+ size_t req_size;
+ size_t readdir_size;
+ int64_t delta_size;
+ int delta_blocks;
+ loc_t loc;
+ loc_t dot_shard_loc;
+ loc_t loc2;
+ loc_t tmp_loc;
+ fd_t *fd;
+ dict_t *xattr_req;
+ dict_t *xattr_rsp;
+ inode_t **inode_list;
+ glusterfs_fop_t fop;
+ struct iatt prebuf;
+ struct iatt postbuf;
+ struct iatt preoldparent;
+ struct iatt postoldparent;
+ struct iatt prenewparent;
+ struct iatt postnewparent;
+ struct iovec *vector;
+ struct iobref *iobref;
+ struct iobuf *iobuf;
+ gf_dirent_t entries_head;
+ gf_boolean_t is_set_fsid;
+ gf_boolean_t list_inited;
+ shard_post_fop_handler_t handler;
+ shard_post_lookup_shards_fop_handler_t pls_fop_handler;
+ shard_post_resolve_fop_handler_t post_res_handler;
+ shard_post_mknod_fop_handler_t post_mknod_handler;
+ shard_post_update_size_fop_handler_t post_update_size_handler;
+ struct {
+ int lock_count;
+ fop_inodelk_cbk_t inodelk_cbk;
+ shard_lock_t *shard_lock;
+ } lock;
+} shard_local_t;
+
+typedef struct shard_inode_ctx {
+ uint64_t block_size; /* The block size with which this inode is
+ sharded */
+ struct iatt stat;
+ gf_boolean_t refresh;
+ /* The following members of inode ctx will be applicable only to the
+ * individual shards' ctx and never the base file ctx.
+ */
+ struct list_head ilist;
+ uuid_t base_gfid;
+ int block_num;
+} shard_inode_ctx_t;
+
+#endif /* __SHARD_H__ */
diff --git a/xlators/performance/stat-prefetch/Makefile.am b/xlators/features/snapview-client/Makefile.am
index af437a64d6d..af437a64d6d 100644
--- a/xlators/performance/stat-prefetch/Makefile.am
+++ b/xlators/features/snapview-client/Makefile.am
diff --git a/xlators/features/snapview-client/src/Makefile.am b/xlators/features/snapview-client/src/Makefile.am
new file mode 100644
index 00000000000..72d8a2a1973
--- /dev/null
+++ b/xlators/features/snapview-client/src/Makefile.am
@@ -0,0 +1,15 @@
+xlator_LTLIBRARIES = snapview-client.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+snapview_client_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+snapview_client_la_SOURCES = snapview-client.c
+snapview_client_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = snapview-client.h snapview-client-mem-types.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/snapview-client/src/snapview-client-mem-types.h b/xlators/features/snapview-client/src/snapview-client-mem-types.h
new file mode 100644
index 00000000000..1a0158d950e
--- /dev/null
+++ b/xlators/features/snapview-client/src/snapview-client-mem-types.h
@@ -0,0 +1,24 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _SVC_MEM_TYPES_H
+#define _SVC_MEM_TYPES_H
+
+#include "mem-types.h"
+
+enum svc_mem_types {
+ gf_svc_mt_svc_private_t = gf_common_mt_end + 1,
+ gf_svc_mt_svc_local_t,
+ gf_svc_mt_svc_inode_t,
+ gf_svc_mt_svc_fd_t,
+ gf_svc_mt_end
+};
+
+#endif
diff --git a/xlators/features/snapview-client/src/snapview-client.c b/xlators/features/snapview-client/src/snapview-client.c
new file mode 100644
index 00000000000..6eb7cc071c2
--- /dev/null
+++ b/xlators/features/snapview-client/src/snapview-client.c
@@ -0,0 +1,2454 @@
+ /*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "snapview-client.h"
+#include "inode.h"
+#include "byte-order.h"
+
+
+static void
+svc_local_free (svc_local_t *local)
+{
+ if (local) {
+ loc_wipe (&local->loc);
+ if (local->fd)
+ fd_unref (local->fd);
+ if (local->xdata)
+ dict_unref (local->xdata);
+ mem_put (local);
+ }
+}
+
+static xlator_t *
+svc_get_subvolume (xlator_t *this, int inode_type)
+{
+ xlator_t *subvolume = NULL;
+
+ GF_VALIDATE_OR_GOTO ("snapview-client", this, out);
+
+ if (inode_type == VIRTUAL_INODE)
+ subvolume = SECOND_CHILD (this);
+ else
+ subvolume = FIRST_CHILD (this);
+
+out:
+ return subvolume;
+}
+
+static int32_t
+__svc_inode_ctx_set (xlator_t *this, inode_t *inode, int inode_type)
+{
+ uint64_t value = 0;
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("snapview-client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ value = inode_type;
+
+ ret = __inode_ctx_set (inode, this, &value);
+
+out:
+ return ret;
+}
+
+static int
+__svc_inode_ctx_get (xlator_t *this, inode_t *inode, int *inode_type)
+{
+ uint64_t value = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("snapview-client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ ret = __inode_ctx_get (inode, this, &value);
+ if (ret < 0)
+ goto out;
+
+ *inode_type = (int)(value);
+
+out:
+ return ret;
+}
+
+static int
+svc_inode_ctx_get (xlator_t *this, inode_t *inode, int *inode_type)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("snapview-client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ LOCK (&inode->lock);
+ {
+ ret = __svc_inode_ctx_get (this, inode, inode_type);
+ }
+ UNLOCK (&inode->lock);
+
+out:
+ return ret;
+}
+
+static int32_t
+svc_inode_ctx_set (xlator_t *this, inode_t *inode, int inode_type)
+{
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("snapview-client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ LOCK (&inode->lock);
+ {
+ ret = __svc_inode_ctx_set (this, inode, inode_type);
+ }
+ UNLOCK (&inode->lock);
+
+out:
+ return ret;
+}
+
+static svc_fd_t *
+svc_fd_new (void)
+{
+ svc_fd_t *svc_fd = NULL;
+
+ svc_fd = GF_CALLOC (1, sizeof (*svc_fd), gf_svc_mt_svc_fd_t);
+
+ return svc_fd;
+}
+
+static svc_fd_t *
+__svc_fd_ctx_get (xlator_t *this, fd_t *fd)
+{
+ svc_fd_t *svc_fd = NULL;
+ uint64_t value = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("snapview-client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ ret = __fd_ctx_get (fd, this, &value);
+ if (ret)
+ return NULL;
+
+ svc_fd = (svc_fd_t *) ((long) value);
+
+out:
+ return svc_fd;
+}
+
+static svc_fd_t *
+svc_fd_ctx_get (xlator_t *this, fd_t *fd)
+{
+ svc_fd_t *svc_fd = NULL;
+
+ GF_VALIDATE_OR_GOTO ("snapview-client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ LOCK (&fd->lock);
+ {
+ svc_fd = __svc_fd_ctx_get (this, fd);
+ }
+ UNLOCK (&fd->lock);
+
+out:
+ return svc_fd;
+}
+
+static int
+__svc_fd_ctx_set (xlator_t *this, fd_t *fd, svc_fd_t *svc_fd)
+{
+ uint64_t value = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("snapview-client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+ GF_VALIDATE_OR_GOTO (this->name, svc_fd, out);
+
+ value = (uint64_t)(long) svc_fd;
+
+ ret = __fd_ctx_set (fd, this, value);
+
+out:
+ return ret;
+}
+
+static svc_fd_t *
+__svc_fd_ctx_get_or_new (xlator_t *this, fd_t *fd)
+{
+ svc_fd_t *svc_fd = NULL;
+ int ret = -1;
+ inode_t *inode = NULL;
+
+ GF_VALIDATE_OR_GOTO ("snapview-client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ inode = fd->inode;
+ svc_fd = __svc_fd_ctx_get (this, fd);
+ if (svc_fd) {
+ ret = 0;
+ goto out;
+ }
+
+ svc_fd = svc_fd_new ();
+ if (!svc_fd) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to allocate new fd "
+ "context for gfid %s", uuid_utoa (inode->gfid));
+ goto out;
+ }
+
+ ret = __svc_fd_ctx_set (this, fd, svc_fd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to set fd context "
+ "for gfid %s", uuid_utoa (inode->gfid));
+ ret = -1;
+ }
+
+out:
+ if (ret) {
+ GF_FREE (svc_fd);
+ svc_fd = NULL;
+ }
+
+ return svc_fd;
+}
+
+static svc_fd_t *
+svc_fd_ctx_get_or_new (xlator_t *this, fd_t *fd)
+{
+ svc_fd_t *svc_fd = NULL;
+
+ GF_VALIDATE_OR_GOTO ("snapview-client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ LOCK (&fd->lock);
+ {
+ svc_fd = __svc_fd_ctx_get_or_new (this, fd);
+ }
+ UNLOCK (&fd->lock);
+
+out:
+ return svc_fd;
+}
+
+
+static int32_t
+gf_svc_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ svc_local_t *local = NULL;
+ xlator_t *subvolume = NULL;
+ gf_boolean_t do_unwind = _gf_true;
+ int inode_type = -1;
+ int ret = -1;
+
+ local = frame->local;
+ subvolume = local->subvolume;
+ if (!subvolume) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "path: %s, "
+ "gfid: %s ", local->loc.path,
+ inode?uuid_utoa (inode->gfid):"");
+ GF_ASSERT (0);
+ }
+
+ /* There is a possibility that, the client process just came online
+ and does not have the inode on which the lookup came. In that case,
+ the fresh inode created from fuse for the lookup fop, wont have
+ the inode context set without which svc cannot decide where to
+ STACK_WIND to. So by default it decides to send the fop to the
+ regular subvolume (i.e first child of the xlator). If lookup fails
+ on the regular volume, then there is a possibility that the lookup
+ is happening on a virtual inode (i.e history data residing in snaps).
+ So if lookup fails with ENOENT and the inode context is not there,
+ then send the lookup to the 2nd child of svc.
+
+ If there are any changes in volfile/client-restarted then inode-ctx
+ is lost. In this case if nameless lookup fails with ESTALE,
+ then send the lookup to the 2nd child of svc.
+ */
+ if (op_ret) {
+ if (subvolume == FIRST_CHILD (this)) {
+ gf_log (this->name,
+ (op_errno == ENOENT || op_errno == ESTALE)
+ ? GF_LOG_DEBUG:GF_LOG_ERROR,
+ "Lookup failed on normal graph with error %s",
+ strerror (op_errno));
+ } else {
+ gf_log (this->name,
+ (op_errno == ENOENT || op_errno == ESTALE)
+ ? GF_LOG_DEBUG:GF_LOG_ERROR,
+ "Lookup failed on snapview graph with error %s",
+ strerror (op_errno));
+ goto out;
+ }
+
+ if ((op_errno == ENOENT || op_errno == ESTALE) &&
+ !gf_uuid_is_null (local->loc.gfid)) {
+ if (inode != NULL)
+ ret = svc_inode_ctx_get (this, inode,
+ &inode_type);
+
+ if (ret < 0 || inode == NULL) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Lookup on normal graph failed. "
+ "Sending lookup to snapview-server");
+
+ subvolume = SECOND_CHILD (this);
+ local->subvolume = subvolume;
+ STACK_WIND (frame, gf_svc_lookup_cbk,
+ subvolume, subvolume->fops->lookup,
+ &local->loc, xdata);
+ do_unwind = _gf_false;
+ }
+ }
+
+ goto out;
+ }
+
+ if (subvolume == FIRST_CHILD (this))
+ inode_type = NORMAL_INODE;
+ else
+ inode_type = VIRTUAL_INODE;
+
+ ret = svc_inode_ctx_set (this, inode, inode_type);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "failed to set inode type"
+ "into the context");
+
+out:
+ if (do_unwind) {
+ SVC_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf,
+ xdata, postparent);
+ }
+
+ return 0;
+}
+
+static int32_t
+gf_svc_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ int32_t ret = -1;
+ svc_local_t *local = NULL;
+ xlator_t *subvolume = NULL;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ inode_t *parent = NULL;
+ svc_private_t *priv = NULL;
+ dict_t *new_xdata = NULL;
+ int inode_type = -1;
+ int parent_type = -1;
+ gf_boolean_t wind = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("svc", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc->inode, out);
+
+ priv = this->private;
+
+ ret = svc_inode_ctx_get (this, loc->inode, &inode_type);
+ if (!__is_root_gfid (loc->gfid)) {
+ if (loc->parent) {
+ parent = inode_ref (loc->parent);
+ ret = svc_inode_ctx_get (this, loc->parent,
+ &parent_type);
+ } else {
+ parent = inode_parent (loc->inode, loc->pargfid, NULL);
+ if (parent)
+ ret = svc_inode_ctx_get (this, parent,
+ &parent_type);
+ }
+ }
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to allocate local");
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ frame->local = local;
+ loc_copy (&local->loc, loc);
+
+ if (__is_root_gfid (loc->inode->gfid)) {
+ subvolume = FIRST_CHILD (this);
+ GF_ASSERT (subvolume);
+ local->subvolume = subvolume;
+ wind = _gf_true;
+ goto out;
+ }
+
+ /* nfs sends nameless lookups directly using the gfid. In that case
+ loc->name will be NULL. So check if loc->name is NULL. If so, then
+ try to get the subvolume using inode context. But if the inode has
+ not been looked up yet, then send the lookup call to the first
+ subvolume.
+ */
+
+ if (!loc->name) {
+ if (gf_uuid_is_null (loc->inode->gfid)) {
+ subvolume = FIRST_CHILD (this);
+ local->subvolume = subvolume;
+ wind = _gf_true;
+ goto out;
+ } else {
+ if (inode_type >= 0)
+ subvolume = svc_get_subvolume (this,
+ inode_type);
+ else
+ subvolume = FIRST_CHILD (this);
+ local->subvolume = subvolume;
+ wind = _gf_true;
+ goto out;
+ }
+ }
+
+ if (strcmp (loc->name, priv->path)) {
+ if (parent_type == NORMAL_INODE) {
+ subvolume = FIRST_CHILD (this);
+ local->subvolume = subvolume;
+ } else {
+ subvolume = SECOND_CHILD (this);
+ local->subvolume = subvolume;
+ }
+ } else {
+ subvolume = SECOND_CHILD (this);
+ local->subvolume = subvolume;
+ if (parent_type == NORMAL_INODE) {
+ /* Indication of whether the lookup is happening on the
+ entry point or not, to the snapview-server.
+ */
+ SVC_ENTRY_POINT_SET (this, xdata, op_ret, op_errno,
+ new_xdata, priv, ret, out);
+ }
+ }
+
+ wind = _gf_true;
+
+out:
+ if (wind)
+ STACK_WIND (frame, gf_svc_lookup_cbk, subvolume,
+ subvolume->fops->lookup, loc, xdata);
+ else
+ SVC_STACK_UNWIND (lookup, frame, op_ret, op_errno, NULL,
+ NULL, NULL, NULL);
+ if (new_xdata)
+ dict_unref (new_xdata);
+
+ if (parent)
+ inode_unref (parent);
+
+ return 0;
+}
+
+static int32_t
+gf_svc_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ xlator_t *subvolume = NULL;
+ int32_t ret = -1;
+ int inode_type = -1;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ gf_boolean_t wind = _gf_false;
+ svc_private_t *priv = NULL;
+ const char *path = NULL;
+ int path_len = -1;
+ int snap_len = -1;
+ loc_t root_loc = {0,};
+ loc_t *temp_loc = NULL;
+
+ GF_VALIDATE_OR_GOTO ("svc", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc->inode, out);
+
+ priv = this->private;
+ SVC_GET_SUBVOL_FROM_CTX (this, op_ret, op_errno, inode_type, ret,
+ loc->inode, subvolume, out);
+ path_len = strlen (loc->path);
+ snap_len = strlen (priv->path);
+ temp_loc = loc;
+
+ if (path_len >= snap_len && inode_type == VIRTUAL_INODE) {
+ path = &loc->path[path_len - snap_len];
+ if (!strcmp (path, priv->path)) {
+ /*
+ * statfs call for virtual snap directory.
+ * Sent the fops to parent volume by removing
+ * virtual directory from path
+ */
+ subvolume = FIRST_CHILD (this);
+ root_loc.path = gf_strdup("/");
+ gf_uuid_clear(root_loc.gfid);
+ root_loc.gfid[15] = 1;
+ root_loc.inode = inode_ref (loc->inode->table->root);
+ temp_loc = &root_loc;
+ }
+ }
+
+ STACK_WIND_TAIL (frame, subvolume, subvolume->fops->statfs,
+ temp_loc, xdata);
+ if (temp_loc == &root_loc)
+ loc_wipe (temp_loc);
+
+ wind = _gf_true;
+out:
+ if (!wind)
+ SVC_STACK_UNWIND (statfs, frame, op_ret, op_errno,
+ NULL, NULL);
+ return 0;
+}
+
+static int32_t
+gf_svc_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ /* Consider a testcase:
+ * #mount -t nfs host1:/vol1 /mnt
+ * #ls /mnt
+ * #ls /mnt/.snaps (As expected this fails)
+ * #gluster volume set vol1 features.uss enable
+ * Now `ls /mnt/.snaps` should work,
+ * but fails with No such file or directory.
+ * This is because NFS client caches the list of files in
+ * a directory. This cache is updated if there are any changes
+ * in the directory attributes. To solve this problem change
+ * a attribute 'ctime' when USS is enabled
+ */
+ if (op_ret == 0 && IA_ISDIR(buf->ia_type))
+ buf->ia_ctime_nsec++;
+
+ SVC_STACK_UNWIND (stat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+/* should all the fops be handled like lookup is supposed to be
+ handled? i.e just based on inode type decide where the call should
+ be sent and in the call back update the contexts.
+*/
+static int32_t
+gf_svc_stat (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ int32_t ret = -1;
+ int inode_type = -1;
+ xlator_t *subvolume = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ gf_boolean_t wind = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("svc", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc->inode, out);
+
+ SVC_GET_SUBVOL_FROM_CTX (this, op_ret, op_errno, inode_type, ret,
+ loc->inode, subvolume, out);
+
+ STACK_WIND (frame, gf_svc_stat_cbk, subvolume,
+ subvolume->fops->stat, loc, xdata);
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND (stat, frame, op_ret, op_errno,
+ NULL, NULL);
+ return 0;
+}
+
+static int32_t
+gf_svc_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ int32_t ret = -1;
+ int inode_type = -1;
+ xlator_t *subvolume = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ gf_boolean_t wind = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("svc", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd->inode, out);
+
+ SVC_GET_SUBVOL_FROM_CTX (this, op_ret, op_errno, inode_type, ret,
+ fd->inode, subvolume, out);
+
+ STACK_WIND_TAIL (frame, subvolume, subvolume->fops->fstat, fd, xdata);
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND (fstat, frame, op_ret, op_errno, NULL, NULL);
+
+ return ret;
+}
+
+static int32_t
+gf_svc_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ svc_fd_t *svc_fd = NULL;
+ svc_local_t *local = NULL;
+ svc_private_t *priv = NULL;
+ gf_boolean_t special_dir = _gf_false;
+ char path[PATH_MAX] = {0, };
+
+ GF_VALIDATE_OR_GOTO ("snapview-client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+
+ if (op_ret)
+ goto out;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (local->subvolume == FIRST_CHILD (this) && priv->special_dir
+ && strcmp (priv->special_dir, "")) {
+ if (!__is_root_gfid (fd->inode->gfid))
+ snprintf (path, sizeof (path), "%s/.",
+ priv->special_dir);
+ else
+ snprintf (path, sizeof (path), "/.");
+
+ if (!strcmp (local->loc.path, priv->special_dir) ||
+ !strcmp (local->loc.path, path)) {
+ gf_log_callingfn (this->name, GF_LOG_DEBUG,
+ "got opendir on special "
+ "directory %s (%s)", path,
+ uuid_utoa (fd->inode->gfid));
+ special_dir = _gf_true;
+ }
+ }
+
+ if (special_dir) {
+ svc_fd = svc_fd_ctx_get_or_new (this, fd);
+ if (!svc_fd) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "fd context not found for %s",
+ uuid_utoa (fd->inode->gfid));
+ goto out;
+ }
+
+ svc_fd->last_offset = -1;
+ svc_fd->special_dir = special_dir;
+ }
+
+out:
+ STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, xdata);
+
+ return 0;
+}
+
+
+/* If the inode represents a directory which is actually
+ present in a snapshot, then opendir on that directory
+ should be sent to the snap-view-server which opens
+ the directory in the corresponding graph.
+ In fact any opendir call on a virtual directory
+ should be sent to svs. Because if it fakes success
+ here, then later when readdir on that fd comes, there
+ will not be any corresponding fd opened on svs and
+ svc has to do things that open-behind is doing.
+*/
+static int32_t
+gf_svc_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
+{
+ int32_t ret = -1;
+ int inode_type = -1;
+ xlator_t *subvolume = NULL;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ gf_boolean_t wind = _gf_false;
+ svc_local_t *local = NULL;
+
+ GF_VALIDATE_OR_GOTO ("svc", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc->inode, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to allocate memory "
+ "for local (path: %s, gfid: %s)", loc->path,
+ uuid_utoa (fd->inode->gfid));
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ SVC_GET_SUBVOL_FROM_CTX (this, op_ret, op_errno, inode_type, ret,
+ loc->inode, subvolume, out);
+
+ loc_copy (&local->loc, loc);
+ local->subvolume = subvolume;
+ frame->local = local;
+
+ STACK_WIND (frame, gf_svc_opendir_cbk, subvolume,
+ subvolume->fops->opendir, loc, fd, xdata);
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND (opendir, frame, op_ret, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+static int32_t
+gf_svc_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ int32_t ret = -1;
+ int inode_type = -1;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ gf_boolean_t wind = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("svc", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc->inode, out);
+
+ ret = svc_inode_ctx_get (this, loc->inode, &inode_type);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ gf_log (this->name, GF_LOG_ERROR, "failed to get the inode "
+ "context for %s (gfid: %s)", loc->path,
+ uuid_utoa (loc->inode->gfid));
+ goto out;
+ }
+
+ if (inode_type == NORMAL_INODE) {
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setattr, loc, stbuf,
+ valid, xdata);
+ } else {
+ op_ret = -1;
+ op_errno = EROFS;
+ goto out;
+ }
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND (setattr, frame, op_ret, op_errno,
+ NULL, NULL, NULL);
+ return 0;
+}
+
+/* XXX: This function is currently not used. Remove "#if 0" when required */
+#if 0
+static int32_t
+gf_svc_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ int32_t ret = -1;
+ int inode_type = -1;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ gf_boolean_t wind = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("svc", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd->inode, out);
+
+ ret = svc_inode_ctx_get (this, fd->inode, &inode_type);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to get the inode "
+ "context for %s", uuid_utoa (fd->inode->gfid));
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ if (inode_type == NORMAL_INODE) {
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fsetattr, fd, stbuf,
+ valid, xdata);
+ } else {
+ op_ret = -1;
+ op_errno = EROFS;
+ goto out;
+ }
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND (fsetattr, frame, op_ret, op_errno,
+ NULL, NULL, NULL);
+ return 0;
+}
+#endif /* gf_svc_fsetattr() is not used */
+
+static int32_t
+gf_svc_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ int32_t ret = -1;
+ int inode_type = -1;
+ xlator_t *subvolume = NULL;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ gf_boolean_t wind = _gf_false;
+ svc_private_t *priv = NULL;
+ char attrname[PATH_MAX] = "";
+ char attrval[64] = "";
+ dict_t *dict = NULL;
+
+ GF_VALIDATE_OR_GOTO ("svc", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc->inode, out);
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, priv, out);
+
+ /*
+ * Samba sends this special key for case insensitive
+ * filename check. This request comes with a parent
+ * path and with a special key GF_XATTR_GET_REAL_FILENAME_KEY.
+ * e.g. "glusterfs.get_real_filename:.snaps".
+ * If the name variable matches this key then we have
+ * to send back .snaps as the real filename.
+ */
+ if (!name)
+ goto stack_wind;
+
+ sscanf (name, "%[^:]:%[^@]", attrname, attrval);
+ strcat (attrname, ":");
+
+ if (!strcmp (attrname, GF_XATTR_GET_REAL_FILENAME_KEY)) {
+ if (!strcasecmp (attrval, priv->path)) {
+ dict = dict_new ();
+ if (NULL == dict) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc (dict,
+ (char *)name,
+ priv->path);
+
+ if (ret) {
+ op_errno = ENOMEM;
+ dict_unref (dict);
+ goto out;
+ }
+
+ op_errno = 0;
+ op_ret = strlen (priv->path) + 1;
+ /* We should return from here */
+ goto out;
+ }
+ }
+stack_wind:
+ SVC_GET_SUBVOL_FROM_CTX (this, op_ret, op_errno, inode_type, ret,
+ loc->inode, subvolume, out);
+
+ STACK_WIND_TAIL (frame, subvolume, subvolume->fops->getxattr, loc, name,
+ xdata);
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND (getxattr, frame, op_ret, op_errno,
+ dict, NULL);
+
+ if (dict)
+ dict_unref (dict);
+
+ return 0;
+}
+
+/* XXX: This function is currently not used. Mark it '#if 0' when required */
+#if 0
+static int32_t
+gf_svc_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ int32_t ret = -1;
+ int inode_type = -1;
+ xlator_t *subvolume = NULL;
+ gf_boolean_t wind = _gf_false;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+
+ GF_VALIDATE_OR_GOTO ("svc", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd->inode, out);
+
+ SVC_GET_SUBVOL_FROM_CTX (this, op_ret, op_errno, inode_type, ret,
+ fd->inode, subvolume, out);
+
+ STACK_WIND_TAIL (frame, subvolume,
+ subvolume->fops->fgetxattr, fd, name, xdata);
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno,
+ NULL, NULL);
+ return 0;
+}
+#endif /* gf_svc_fgetxattr() is not used */
+
+static int32_t
+gf_svc_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ int32_t ret = -1;
+ int inode_type = -1;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ gf_boolean_t wind = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("svc", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc->inode, out);
+
+ ret = svc_inode_ctx_get (this, loc->inode, &inode_type);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to get inode context "
+ "for %s (gfid: %s)", loc->name,
+ uuid_utoa (loc->inode->gfid));
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ if (inode_type == NORMAL_INODE) {
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setxattr, loc, dict,
+ flags, xdata);
+ } else {
+ op_ret = -1;
+ op_errno = EROFS;
+ goto out;
+ }
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND (setxattr, frame, op_ret, op_errno,
+ NULL);
+
+ return 0;
+}
+
+static int32_t
+gf_svc_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ int32_t ret = -1;
+ int inode_type = -1;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ gf_boolean_t wind = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("svc", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd->inode, out);
+
+ ret = svc_inode_ctx_get (this, fd->inode, &inode_type);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to get inode context "
+ "for %s", uuid_utoa (fd->inode->gfid));
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ if (inode_type == NORMAL_INODE) {
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fsetxattr, fd, dict,
+ flags, xdata);
+ } else {
+ op_ret = -1;
+ op_errno = EROFS;
+ goto out;
+ }
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno,
+ NULL);
+
+ return 0;
+}
+
+static int32_t
+gf_svc_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
+{
+ int inode_type = -1;
+ int ret = -1;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ gf_boolean_t wind = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("svc", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc->inode, out);
+
+ ret = svc_inode_ctx_get (this, loc->inode, &inode_type);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to get the inode "
+ "context for %s (gfid: %s)", loc->name,
+ uuid_utoa (loc->inode->gfid));
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ if (inode_type == NORMAL_INODE) {
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->rmdir, loc, flags,
+ xdata);
+ } else {
+ op_ret = -1;
+ op_errno = EROFS;
+ goto out;
+ }
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND (rmdir, frame, op_ret, op_errno,
+ NULL, NULL, NULL);
+ return 0;
+}
+
+static int32_t
+gf_svc_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int inode_type = -1;
+ int ret = -1;
+
+ if (op_ret < 0)
+ goto out;
+
+ inode_type = NORMAL_INODE;
+ ret = svc_inode_ctx_set (this, inode, inode_type);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "failed to set inode "
+ "context");
+
+
+out:
+ SVC_STACK_UNWIND (mkdir, frame, op_ret, op_errno, inode,
+ buf, preparent, postparent, xdata);
+ return 0;
+}
+
+static int32_t
+gf_svc_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+ int parent_type = -1;
+ int ret = -1;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ svc_private_t *priv = NULL;
+ gf_boolean_t wind = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("svc", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc->inode, out);
+
+ priv = this->private;
+
+ ret = svc_inode_ctx_get (this, loc->parent, &parent_type);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to get the inode "
+ "context for %s", uuid_utoa (loc->parent->gfid));
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ if (strcmp (loc->name, priv->path) && parent_type == NORMAL_INODE) {
+ STACK_WIND (frame, gf_svc_mkdir_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->mkdir, loc, mode,
+ umask, xdata);
+ } else {
+ op_ret = -1;
+ op_errno = EROFS;
+ goto out;
+ }
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND (mkdir, frame, op_ret, op_errno, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
+}
+
+static int32_t
+gf_svc_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int inode_type = -1;
+ int ret = -1;
+
+ if (op_ret < 0)
+ goto out;
+
+ inode_type = NORMAL_INODE;
+ ret = svc_inode_ctx_set (this, inode, inode_type);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "failed to set inode "
+ "context");
+
+out:
+ SVC_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode,
+ buf, preparent, postparent, xdata);
+ return 0;
+}
+
+static int32_t
+gf_svc_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ int parent_type = -1;
+ int ret = -1;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ svc_private_t *priv = NULL;
+ gf_boolean_t wind = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("svc", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc->inode, out);
+
+ priv = this->private;
+
+ ret = svc_inode_ctx_get (this, loc->parent, &parent_type);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to get the inode "
+ "context for %s", uuid_utoa (loc->parent->gfid));
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ if (strcmp (loc->name, priv->path) && parent_type == NORMAL_INODE) {
+ STACK_WIND (frame, gf_svc_mknod_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->mknod, loc, mode,
+ rdev, umask, xdata);
+ } else {
+ op_ret = -1;
+ op_errno = EROFS;
+ goto out;
+ }
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND (mknod, frame, op_ret, op_errno, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
+}
+
+/* If the flags of the open call contain O_WRONLY or O_RDWR and the inode is
+ a virtual inode, then unwind the call back with EROFS. Otherwise simply
+ STACK_WIND the call to the first child of svc xlator.
+*/
+static int32_t
+gf_svc_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ xlator_t *subvolume = NULL;
+ int inode_type = -1;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ int ret = -1;
+ gf_boolean_t wind = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("svc", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc->inode, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ /* Another way is to STACK_WIND to normal subvolume, if inode
+ type is not there in the context. If the file actually resides
+ in snapshots, then ENOENT would be returned. Needs more analysis.
+ */
+ SVC_GET_SUBVOL_FROM_CTX (this, op_ret, op_errno, inode_type, ret,
+ loc->inode, subvolume, out);
+
+ if (((flags & O_ACCMODE) == O_WRONLY) ||
+ ((flags & O_ACCMODE) == O_RDWR)) {
+ if (subvolume != FIRST_CHILD (this)) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+ }
+
+ STACK_WIND_TAIL (frame, subvolume, subvolume->fops->open, loc,
+ flags, fd, xdata);
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND (open, frame, op_ret, op_errno, NULL,
+ NULL);
+ return 0;
+}
+
+static int32_t
+gf_svc_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int inode_type = -1;
+ int ret = -1;
+
+ if (op_ret < 0)
+ goto out;
+
+ inode_type = NORMAL_INODE;
+ ret = svc_inode_ctx_set (this, inode, inode_type);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "failed to set inode "
+ "context");
+
+out:
+ SVC_STACK_UNWIND (create, frame, op_ret, op_errno, fd,
+ inode, stbuf, preparent, postparent, xdata);
+
+ return 0;
+}
+
+static int32_t
+gf_svc_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ int parent_type = -1;
+ int ret = -1;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ svc_private_t *priv = NULL;
+ gf_boolean_t wind = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("svc", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc->inode, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ priv = this->private;
+
+ ret = svc_inode_ctx_get (this, loc->parent, &parent_type);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to get the inode "
+ "context for %s", uuid_utoa (loc->parent->gfid));
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ if (strcmp (loc->name, priv->path) && parent_type == NORMAL_INODE) {
+ STACK_WIND (frame, gf_svc_create_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->create, loc, flags,
+ mode, umask, fd, xdata);
+ } else {
+ op_ret = -1;
+ op_errno = EROFS;
+ goto out;
+ }
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND (create, frame, op_ret, op_errno,
+ NULL, NULL, NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+static int32_t
+gf_svc_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int inode_type = -1;
+ int ret = -1;
+
+ if (op_ret < 0)
+ goto out;
+
+ inode_type = NORMAL_INODE;
+ ret = svc_inode_ctx_set (this, inode, inode_type);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "failed to set inode "
+ "context");
+
+out:
+ SVC_STACK_UNWIND (symlink, frame, op_ret, op_errno, inode,
+ buf, preparent, postparent, xdata);
+
+ return 0;
+}
+
+static int32_t
+gf_svc_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ int parent_type = -1;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ int ret = -1;
+ svc_private_t *priv = NULL;
+ gf_boolean_t wind = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("svc", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc->inode, out);
+
+ priv = this->private;
+
+ ret = svc_inode_ctx_get (this, loc->parent, &parent_type);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to get the inode "
+ "context for %s", uuid_utoa (loc->parent->gfid));
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ if (strcmp (loc->name, priv->path) && parent_type == NORMAL_INODE) {
+ STACK_WIND (frame, gf_svc_symlink_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->symlink, linkpath, loc,
+ umask, xdata);
+ } else {
+ op_ret = -1;
+ op_errno = EROFS;
+ goto out;
+ }
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND (symlink, frame, op_ret, op_errno,
+ NULL, NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+static int32_t
+gf_svc_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
+{
+ int inode_type = -1;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ int ret = -1;
+ gf_boolean_t wind = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("svc", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc->inode, out);
+
+ ret = svc_inode_ctx_get (this, loc->inode, &inode_type);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to get the inode "
+ "context for %s", uuid_utoa (loc->parent->gfid));
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ if (inode_type == NORMAL_INODE) {
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->unlink, loc, flags,
+ xdata);
+ } else {
+ op_ret = -1;
+ op_errno = EROFS;
+ goto out;
+ }
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND (unlink, frame, op_ret, op_errno, NULL, NULL,
+ NULL);
+ return 0;
+}
+
+static int32_t
+gf_svc_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ int inode_type = -1;
+ xlator_t *subvolume = NULL;
+ int ret = -1;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ gf_boolean_t wind = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("svc", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd->inode, out);
+
+ SVC_GET_SUBVOL_FROM_CTX (this, op_ret, op_errno, inode_type, ret,
+ fd->inode, subvolume, out);
+
+ STACK_WIND_TAIL (frame, subvolume, subvolume->fops->readv,
+ fd, size, offset, flags, xdata);
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND (readv, frame, op_ret, op_errno, NULL, 0, NULL,
+ NULL, NULL);
+ return 0;
+}
+
+static int32_t
+gf_svc_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
+{
+ int inode_type = -1;
+ xlator_t *subvolume = NULL;
+ int ret = -1;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ gf_boolean_t wind = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("svc", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc->inode, out);
+
+ SVC_GET_SUBVOL_FROM_CTX (this, op_ret, op_errno, inode_type, ret,
+ loc->inode, subvolume, out);
+
+ STACK_WIND_TAIL (frame, subvolume, subvolume->fops->readlink, loc, size,
+ xdata);
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, NULL, NULL,
+ NULL);
+ return 0;
+}
+
+static int32_t
+gf_svc_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
+{
+ int ret = -1;
+ int inode_type = -1;
+ xlator_t *subvolume = NULL;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ gf_boolean_t wind = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("svc", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc->inode, out);
+
+ SVC_GET_SUBVOL_FROM_CTX (this, op_ret, op_errno, inode_type, ret,
+ loc->inode, subvolume, out);
+
+ STACK_WIND_TAIL (frame, subvolume, subvolume->fops->access, loc, mask,
+ xdata);
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND (access, frame, op_ret, op_errno, NULL);
+
+ return 0;
+}
+
+int32_t
+gf_svc_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmpentry = NULL;
+ svc_local_t *local = NULL;
+ svc_private_t *priv = NULL;
+
+ if (op_ret < 0)
+ goto out;
+
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+
+ priv = this->private;
+ local = frame->local;
+
+ /* If .snaps pre-exists, then it should not be listed
+ * in the NORMAL INODE directory when USS is enabled,
+ * so filter the .snaps entry if exists.
+ * However it is OK to list .snaps in VIRTUAL world
+ */
+ if (local->subvolume != FIRST_CHILD (this))
+ goto out;
+
+ list_for_each_entry_safe (entry, tmpentry, &entries->list, list) {
+ if (strcmp(priv->path, entry->d_name) == 0)
+ gf_dirent_entry_free (entry);
+ }
+
+out:
+ SVC_STACK_UNWIND (readdir, frame, op_ret, op_errno, entries, xdata);
+ return 0;
+}
+
+static int32_t
+gf_svc_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ int inode_type = -1;
+ xlator_t *subvolume = NULL;
+ svc_local_t *local = NULL;
+ int ret = -1;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ gf_boolean_t wind = _gf_false;
+ svc_fd_t *svc_fd = NULL;
+ gf_dirent_t entries;
+
+ INIT_LIST_HEAD (&entries);
+
+ GF_VALIDATE_OR_GOTO ("svc", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd->inode, out);
+
+ svc_fd = svc_fd_ctx_get_or_new (this, fd);
+ if (!svc_fd)
+ gf_log (this->name, GF_LOG_ERROR, "failed to get the fd "
+ "context for the inode %s",
+ uuid_utoa (fd->inode->gfid));
+ else {
+ if (svc_fd->entry_point_handled && off == svc_fd->last_offset) {
+ op_ret = 0;
+ op_errno = ENOENT;
+ goto out;
+ }
+ }
+
+ SVC_GET_SUBVOL_FROM_CTX (this, op_ret, op_errno, inode_type, ret,
+ fd->inode, subvolume, out);
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to allocate local");
+ goto out;
+ }
+ local->subvolume = subvolume;
+ frame->local = local;
+
+ STACK_WIND (frame, gf_svc_readdir_cbk, subvolume,
+ subvolume->fops->readdir, fd, size, off, xdata);
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND (readdir, frame, op_ret, op_errno, &entries,
+ NULL);
+
+ gf_dirent_free (&entries);
+
+ return 0;
+}
+
+/*
+ * This lookup if mainly for supporting USS for windows.
+ * Since the dentry for the entry-point directory is not sent in
+ * the readdir response, from windows explorer, there is no way
+ * to access the snapshots. If the explicit path of the entry-point
+ * directory is mentioned in the address bar, then windows sends
+ * readdir on the parent directory and compares if the entry point
+ * directory's name is there in readdir response. If it is not there
+ * then access to snapshot world is denied. And windows users cannot
+ * access snapshots via samba.
+ * So, to handle this a new option called special-directory is created,
+ * which if set, snapview-client will send the entry-point's dentry
+ * in readdirp o/p for the special directory, so that it will be
+ * visible from windows explorer.
+ * But to send that virtual entry, the following mechanism is used.
+ * 1) Check if readdir from posix is over.
+ * 2) If so, then send a lookup on entry point directory to snap daemon
+ * (this is needed because in readdirp inodes are linked, so we need to
+ * maintain 1:1 mapping between inodes (gfids) from snapview server to
+ * snapview client).
+ * 3) Once successful lookup response received, send a new entry to
+ * windows.
+ */
+
+static int32_t
+gf_svc_readdirp_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ gf_dirent_t entries;
+ gf_dirent_t *entry = NULL;
+ svc_private_t *private = NULL;
+ svc_fd_t *svc_fd = NULL;
+ svc_local_t *local = NULL;
+ int inode_type = -1;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("snapview-client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+
+ private = this->private;
+ INIT_LIST_HEAD (&entries.list);
+
+ local = frame->local;
+
+ if (local->xdata != NULL)
+ dict_unref (xdata);
+
+ if (op_ret) {
+ op_ret = 0;
+ op_errno = ENOENT;
+ goto out;
+ }
+
+ svc_fd = svc_fd_ctx_get (this, local->fd);
+ if (!svc_fd) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to get the fd "
+ "context for the inode %s",
+ uuid_utoa (local->fd->inode->gfid));
+ op_ret = 0;
+ op_errno = ENOENT;
+ goto out;
+ }
+
+ entry = gf_dirent_for_name (private->path);
+ if (!entry) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to allocate memory "
+ "for the entry %s", private->path);
+ op_ret = 0;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ entry->inode = inode_ref (inode);
+ entry->d_off = svc_fd->last_offset + 22;
+ entry->d_ino = buf->ia_ino;
+ entry->d_type = DT_DIR;
+ entry->d_stat = *buf;
+ inode_type = VIRTUAL_INODE;
+ ret = svc_inode_ctx_set (this, entry->inode, inode_type);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "failed to set the inode "
+ "context");
+
+ list_add_tail (&entry->list, &entries.list);
+ op_ret = 1;
+ svc_fd->last_offset = entry->d_off;
+ svc_fd->entry_point_handled = _gf_true;
+
+out:
+ SVC_STACK_UNWIND (readdirp, frame, op_ret, op_errno, &entries,
+ local->xdata);
+
+ gf_dirent_free (&entries);
+
+ return 0;
+}
+
+static gf_boolean_t
+gf_svc_readdir_on_special_dir (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ svc_local_t *local = NULL;
+ svc_private_t *private = NULL;
+ inode_t *inode = NULL;
+ fd_t *fd = NULL;
+ char *path = NULL;
+ loc_t *loc = NULL;
+ dict_t *tmp_xdata = NULL;
+ int ret = -1;
+ gf_boolean_t unwind = _gf_true;
+ svc_fd_t *svc_fd = NULL;
+
+ GF_VALIDATE_OR_GOTO ("snapview-client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+
+ private = this->private;
+ local = frame->local;
+
+ loc = &local->loc;
+ fd = local->fd;
+ svc_fd = svc_fd_ctx_get (this, fd);
+ if (!svc_fd) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to get the fd "
+ "context for the inode %s",
+ uuid_utoa (fd->inode->gfid));
+ goto out;
+ }
+
+ /*
+ * check if its end of readdir operation from posix, if special_dir
+ * option is set, if readdir is done on special directory and if
+ * readdirp is from normal regular graph.
+ */
+
+ if (!private->show_entry_point)
+ goto out;
+
+ if (op_ret == 0 && op_errno == ENOENT && private->special_dir &&
+ strcmp (private->special_dir, "") && svc_fd->special_dir &&
+ local->subvolume == FIRST_CHILD (this)) {
+ inode = inode_grep (fd->inode->table, fd->inode,
+ private->path);
+ if (!inode) {
+ inode = inode_new (fd->inode->table);
+ if (!inode) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "allocate new inode");
+ goto out;
+ }
+ }
+
+ gf_uuid_copy (local->loc.pargfid, fd->inode->gfid);
+ gf_uuid_copy (local->loc.gfid, inode->gfid);
+ if (gf_uuid_is_null (inode->gfid))
+ ret = inode_path (fd->inode, private->path, &path);
+ else
+ ret = inode_path (inode, NULL, &path);
+
+ if (ret < 0)
+ goto out;
+ loc->path = gf_strdup (path);
+ if (loc->path) {
+ if (!loc->name ||
+ (loc->name && !strcmp (loc->name, ""))) {
+ loc->name = strrchr (loc->path, '/');
+ if (loc->name)
+ loc->name++;
+ }
+ }
+
+ loc->inode = inode;
+ loc->parent = inode_ref (fd->inode);
+ tmp_xdata = dict_new ();
+ if (!tmp_xdata)
+ goto out;
+ ret = dict_set_str (tmp_xdata, "entry-point", "true");
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to set dict");
+ goto out;
+ }
+
+ local->cookie = cookie;
+ if (xdata == NULL)
+ local->xdata = NULL;
+ else
+ local->xdata = dict_ref (xdata);
+ STACK_WIND (frame, gf_svc_readdirp_lookup_cbk,
+ SECOND_CHILD (this),
+ SECOND_CHILD (this)->fops->lookup, loc, tmp_xdata);
+ unwind = _gf_false;
+ }
+
+out:
+ if (tmp_xdata)
+ dict_unref (tmp_xdata);
+
+ GF_FREE (path);
+ return unwind;
+}
+
+static int32_t
+gf_svc_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ gf_dirent_t *entries, dict_t *xdata)
+{
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmpentry = NULL;
+ svc_local_t *local = NULL;
+ int inode_type = -1;
+ int ret = -1;
+ svc_fd_t *svc_fd = NULL;
+ gf_boolean_t unwind = _gf_true;
+ svc_private_t *priv = NULL;
+
+ if (op_ret < 0)
+ goto out;
+
+ GF_VALIDATE_OR_GOTO ("snapview-client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+ priv = this->private;
+ local = frame->local;
+
+ svc_fd = svc_fd_ctx_get (this, local->fd);
+ if (!svc_fd) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to get the fd "
+ "context for the gfid %s",
+ uuid_utoa (local->fd->inode->gfid));
+ }
+
+ if (local->subvolume == FIRST_CHILD (this))
+ inode_type = NORMAL_INODE;
+ else
+ inode_type = VIRTUAL_INODE;
+
+ list_for_each_entry_safe (entry, tmpentry, &entries->list, list) {
+ /* If .snaps pre-exists, then it should not be listed
+ * in the NORMAL INODE directory when USS is enabled,
+ * so filter the .snaps entry if exists.
+ * However it is OK to list .snaps in VIRTUAL world
+ */
+ if (inode_type == NORMAL_INODE &&
+ !strcmp(priv->path, entry->d_name)) {
+ gf_dirent_entry_free (entry);
+ continue;
+ }
+
+ if (!entry->inode)
+ continue;
+
+ ret = svc_inode_ctx_set (this, entry->inode, inode_type);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "failed to set inode "
+ "context");
+ if (svc_fd)
+ svc_fd->last_offset = entry->d_off;
+ }
+
+ unwind = gf_svc_readdir_on_special_dir (frame, cookie, this, op_ret,
+ op_errno, entries, xdata);
+
+out:
+ if (unwind)
+ SVC_STACK_UNWIND (readdirp, frame, op_ret, op_errno, entries,
+ xdata);
+
+ return 0;
+}
+
+static int32_t
+gf_svc_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ int inode_type = -1;
+ xlator_t *subvolume = NULL;
+ svc_local_t *local = NULL;
+ int ret = -1;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ gf_boolean_t wind = _gf_false;
+ svc_fd_t *svc_fd = NULL;
+ gf_dirent_t entries;
+
+ INIT_LIST_HEAD (&entries.list);
+
+ GF_VALIDATE_OR_GOTO ("svc", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd->inode, out);
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to allocate local");
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ /*
+ * This is mainly for samba shares (or windows clients). As part of
+ * readdirp on the directory used as samba share, the entry point
+ * directory would have been added at the end. So when a new readdirp
+ * request comes, we have to check if the entry point has been handled
+ * or not in readdirp. That information and the offset used for it
+ * is remembered in fd context. If it has been handled, then simply
+ * unwind indication end of readdir operation.
+ */
+ svc_fd = svc_fd_ctx_get_or_new (this, fd);
+ if (!svc_fd)
+ gf_log (this->name, GF_LOG_ERROR, "failed to get the fd "
+ "context for the inode %s",
+ uuid_utoa (fd->inode->gfid));
+ else {
+ if (svc_fd->entry_point_handled && off == svc_fd->last_offset) {
+ op_ret = 0;
+ op_errno = ENOENT;
+ goto out;
+ }
+ }
+
+ SVC_GET_SUBVOL_FROM_CTX (this, op_ret, op_errno, inode_type, ret,
+ fd->inode, subvolume, out);
+
+ local->subvolume = subvolume;
+ local->fd = fd_ref (fd);
+ frame->local = local;
+
+ STACK_WIND (frame, gf_svc_readdirp_cbk, subvolume,
+ subvolume->fops->readdirp, fd, size, off, xdata);
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND (readdirp, frame, op_ret, op_errno, &entries,
+ NULL);
+
+ gf_dirent_free (&entries);
+
+ return 0;
+}
+
+/* Renaming the entries from or to snapshots is not allowed as the snapshots
+ are read-only.
+*/
+static int32_t
+gf_svc_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ int src_inode_type = -1;
+ int dst_inode_type = -1;
+ int dst_parent_type = -1;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int32_t ret = -1;
+ gf_boolean_t wind = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("svc", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, oldloc, out);
+ GF_VALIDATE_OR_GOTO (this->name, oldloc->inode, out);
+ GF_VALIDATE_OR_GOTO (this->name, newloc, out);
+
+ ret = svc_inode_ctx_get (this, oldloc->inode, &src_inode_type);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to get the inode "
+ "context for the inode %s",
+ uuid_utoa (oldloc->inode->gfid));
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ if (src_inode_type == VIRTUAL_INODE) {
+ gf_log (this->name, GF_LOG_ERROR, "rename happening on a entry"
+ " %s residing in snapshot", oldloc->name);
+ op_ret = -1;
+ op_errno = EROFS;
+ goto out;
+ }
+
+ if (newloc->inode) {
+ ret = svc_inode_ctx_get (this, newloc->inode, &dst_inode_type);
+ if (!ret && dst_inode_type == VIRTUAL_INODE) {
+ gf_log (this->name, GF_LOG_ERROR, "rename of %s "
+ "happening to a entry %s residing in snapshot",
+ oldloc->name, newloc->name);
+ op_ret = -1;
+ op_errno = EROFS;
+ goto out;
+ }
+ }
+
+ if (dst_inode_type < 0) {
+ ret = svc_inode_ctx_get (this, newloc->parent,
+ &dst_parent_type);
+ if (!ret && dst_parent_type == VIRTUAL_INODE) {
+ gf_log (this->name, GF_LOG_ERROR, "rename of %s "
+ "happening to a entry %s residing in snapshot",
+ oldloc->name, newloc->name);
+ op_ret = -1;
+ op_errno = EROFS;
+ goto out;
+ }
+ }
+
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->rename, oldloc, newloc,
+ xdata);
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND (rename, frame, op_ret, op_errno, NULL,
+ NULL, NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+/* Creating hardlinks for the files from the snapshot is not allowed as it
+ will be equivalent of creating hardlinks across different filesystems.
+ And so is vise versa.
+*/
+static int32_t
+gf_svc_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ int src_inode_type = -1;
+ int dst_parent_type = -1;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int32_t ret = -1;
+ gf_boolean_t wind = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("svc", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, oldloc, out);
+ GF_VALIDATE_OR_GOTO (this->name, oldloc->inode, out);
+ GF_VALIDATE_OR_GOTO (this->name, newloc, out);
+
+ ret = svc_inode_ctx_get (this, oldloc->inode, &src_inode_type);
+ if (!ret && src_inode_type == VIRTUAL_INODE) {
+ gf_log (this->name, GF_LOG_ERROR, "rename happening on a entry"
+ " %s residing in snapshot", oldloc->name);
+ op_ret = -1;
+ op_errno = EROFS;
+ goto out;
+ }
+
+ ret = svc_inode_ctx_get (this, newloc->parent, &dst_parent_type);
+ if (!ret && dst_parent_type == VIRTUAL_INODE) {
+ gf_log (this->name, GF_LOG_ERROR, "rename of %s "
+ "happening to a entry %s residing in snapshot",
+ oldloc->name, newloc->name);
+ op_ret = -1;
+ op_errno = EROFS;
+ goto out;
+ }
+
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->link, oldloc, newloc, xdata);
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND (link, frame, op_ret, op_errno,
+ NULL, NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+static int32_t
+gf_svc_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ int ret = -1;
+ int inode_type = -1;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ gf_boolean_t wind = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("svc", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc->inode, out);
+
+ ret = svc_inode_ctx_get (this, loc->inode, &inode_type);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to get te inode "
+ "context for %s (gfid: %s)", loc->path,
+ uuid_utoa (loc->inode->gfid));
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ if (inode_type == NORMAL_INODE) {
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->removexattr, loc,
+ name, xdata);
+ } else {
+ op_ret = -1;
+ op_errno = EROFS;
+ goto out;
+ }
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND (removexattr, frame, op_ret, op_errno,
+ NULL);
+
+ return 0;
+}
+
+static int
+gf_svc_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
+ dict_t *xdata)
+{
+ int inode_type = -1;
+ int ret = -1;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ gf_boolean_t wind = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("svc", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd->inode, out);
+
+ ret = svc_inode_ctx_get (this, fd->inode, &inode_type);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to get inode context "
+ "for %s", uuid_utoa (fd->inode->gfid));
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ if (inode_type == NORMAL_INODE) {
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fsync, fd, datasync,
+ xdata);
+ } else {
+ op_ret = -1;
+ op_errno = EROFS;
+ goto out;
+ }
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND (fsync, frame, op_ret, op_errno, NULL, NULL,
+ NULL);
+
+ return 0;
+}
+
+static int32_t
+gf_svc_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int ret = -1;
+ int inode_type = -1;
+ xlator_t *subvolume = NULL;
+ gf_boolean_t wind = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("svc", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd->inode, out);
+
+ SVC_GET_SUBVOL_FROM_CTX (this, op_ret, op_errno, inode_type, ret,
+ fd->inode, subvolume, out);
+
+ STACK_WIND_TAIL (frame, subvolume, subvolume->fops->flush, fd, xdata);
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND (flush, frame, op_ret, op_errno, NULL);
+
+ return 0;
+}
+
+static int32_t
+gf_svc_releasedir (xlator_t *this, fd_t *fd)
+{
+ svc_fd_t *sfd = NULL;
+ uint64_t tmp_pfd = 0;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO ("snapview-client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ ret = fd_ctx_del (fd, this, &tmp_pfd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "pfd from fd=%p is NULL", fd);
+ goto out;
+ }
+
+ GF_FREE (sfd);
+
+out:
+ return 0;
+}
+
+static int32_t
+gf_svc_forget (xlator_t *this, inode_t *inode)
+{
+ int ret = -1;
+ uint64_t value = 0;
+
+ GF_VALIDATE_OR_GOTO ("svc", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ ret = inode_ctx_del (inode, this, &value);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to delete inode "
+ "context for %s", uuid_utoa (inode->gfid));
+ goto out;
+ }
+
+out:
+ return 0;
+}
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ svc_private_t *priv = NULL;
+
+ priv = this->private;
+
+ GF_OPTION_RECONF ("snapshot-directory", priv->path, options, str, out);
+ GF_OPTION_RECONF ("show-snapshot-directory", priv->show_entry_point,
+ options, bool, out);
+
+out:
+ return 0;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int32_t ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_svc_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_WARNING, "Memory accounting"
+ " init failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+int32_t
+init (xlator_t *this)
+{
+ svc_private_t *private = NULL;
+ int ret = -1;
+ int children = 0;
+ xlator_list_t *xl = NULL;
+
+ if (!this->children) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "configured without any child");
+ goto out;
+ }
+
+ xl = this->children;
+ while (xl) {
+ children++;
+ xl = xl->next;
+ }
+
+ if (children != 2) {
+ gf_log (this->name, GF_LOG_ERROR, "snap-view-client has got "
+ "%d subvolumes. It can have only 2 subvolumes.",
+ children);
+ goto out;
+ }
+
+ /* This can be the top of graph in certain cases */
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "dangling volume. check volfile ");
+ }
+
+ private = GF_CALLOC (1, sizeof (*private), gf_svc_mt_svc_private_t);
+ if (!private)
+ goto out;
+
+ GF_OPTION_INIT ("snapshot-directory", private->path, str, out);
+ GF_OPTION_INIT ("snapdir-entry-path", private->special_dir, str,
+ out);
+ GF_OPTION_INIT ("show-snapshot-directory", private->show_entry_point,
+ bool, out);
+
+ if (strstr (private->special_dir, private->path)) {
+ gf_log (this->name, GF_LOG_ERROR, "entry point directory "
+ "cannot be part of the special directory");
+ GF_FREE (private->special_dir);
+ private->special_dir = NULL;
+ goto out;
+ }
+
+ this->private = private;
+ this->local_pool = mem_pool_new (svc_local_t, 128);
+ if (!this->local_pool) {
+ gf_log (this->name, GF_LOG_ERROR, "could not get mem pool for "
+ "frame->local");
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (ret)
+ GF_FREE (private);
+
+ return ret;
+}
+
+void
+fini (xlator_t *this)
+{
+ svc_private_t *priv = NULL;
+
+ if (!this)
+ return;
+
+ priv = this->private;
+ if (!priv)
+ return;
+
+ this->private = NULL;
+
+ GF_FREE (priv);
+
+ return;
+}
+
+int
+notify (xlator_t *this, int event, void *data, ...)
+{
+ xlator_t *subvol = NULL;
+ int ret = 0;
+
+ subvol = data;
+
+ /* As there are two subvolumes in snapview-client, there is
+ * a possibility that the regular subvolume is still down and
+ * snapd subvolume come up first. So if we don't handle this situation
+ * CHILD_UP event will be propagated upwards to fuse when
+ * regular subvolume is still down.
+ * This can cause data unavailable for the application.
+ * So for now send notifications up only for regular subvolume.
+ *
+ * TODO: In future if required we may need to handle
+ * notifications from virtual subvolume
+ */
+ if (subvol != SECOND_CHILD (this))
+ ret = default_notify (this, event, data);
+
+ return ret;
+}
+
+struct xlator_fops fops = {
+ .lookup = gf_svc_lookup,
+ .opendir = gf_svc_opendir,
+ .stat = gf_svc_stat,
+ .fstat = gf_svc_fstat,
+ .statfs = gf_svc_statfs,
+ .rmdir = gf_svc_rmdir,
+ .rename = gf_svc_rename,
+ .mkdir = gf_svc_mkdir,
+ .open = gf_svc_open,
+ .unlink = gf_svc_unlink,
+ .setattr = gf_svc_setattr,
+ .getxattr = gf_svc_getxattr,
+ .setxattr = gf_svc_setxattr,
+ .fsetxattr = gf_svc_fsetxattr,
+ .readv = gf_svc_readv,
+ .readdir = gf_svc_readdir,
+ .readdirp = gf_svc_readdirp,
+ .create = gf_svc_create,
+ .readlink = gf_svc_readlink,
+ .mknod = gf_svc_mknod,
+ .symlink = gf_svc_symlink,
+ .flush = gf_svc_flush,
+ .link = gf_svc_link,
+ .access = gf_svc_access,
+ .removexattr = gf_svc_removexattr,
+ .fsync = gf_svc_fsync,
+};
+
+struct xlator_cbks cbks = {
+ .forget = gf_svc_forget,
+ .releasedir = gf_svc_releasedir,
+};
+
+struct volume_options options[] = {
+ { .key = {"snapshot-directory"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = ".snaps",
+ },
+ { .key = {"snapdir-entry-path"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "An option to set the path of a directory on which "
+ "when readdir comes, dentry for the snapshot-directory"
+ " should be created and added in the readdir response",
+ .default_value = "",
+ },
+ { .key = {"show-snapshot-directory"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .description = "If this option is set, and the option "
+ "\"snapdir-entry-path\" is set (which is set by samba "
+ "vfs plugin for glusterfs, then send the entry point "
+ "when readdir comes on the snapdir-entry-path",
+ .default_value = "off",
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/features/snapview-client/src/snapview-client.h b/xlators/features/snapview-client/src/snapview-client.h
new file mode 100644
index 00000000000..5b7a862cf3f
--- /dev/null
+++ b/xlators/features/snapview-client/src/snapview-client.h
@@ -0,0 +1,97 @@
+ /*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef __SNAP_VIEW_CLIENT_H__
+#define __SNAP_VIEW_CLIENT_H__
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "defaults.h"
+#include "snapview-client-mem-types.h"
+
+struct __svc_local {
+ loc_t loc;
+ xlator_t *subvolume;
+ fd_t *fd;
+ void *cookie;
+ dict_t *xdata;
+};
+typedef struct __svc_local svc_local_t;
+
+#define SVC_STACK_UNWIND(fop, frame, params ...) do { \
+ svc_local_t *__local = NULL; \
+ if (frame) { \
+ __local = frame->local; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT (fop, frame, params); \
+ svc_local_free (__local); \
+ } while (0)
+
+#define SVC_ENTRY_POINT_SET(this, xdata, op_ret, op_errno, new_xdata, \
+ priv, ret, label) \
+ do { \
+ if (!xdata) { \
+ xdata = new_xdata = dict_new (); \
+ if (!new_xdata) { \
+ gf_log (this->name, GF_LOG_ERROR, \
+ "failed to allocate new dict"); \
+ op_ret = -1; \
+ op_errno = ENOMEM; \
+ goto label; \
+ } \
+ } \
+ ret = dict_set_str (xdata, "entry-point", "true"); \
+ if (ret) { \
+ gf_log (this->name, GF_LOG_ERROR, \
+ "failed to set dict"); \
+ op_ret = -1; \
+ op_errno = ENOMEM; \
+ goto label; \
+ } \
+ } while (0);
+
+#define SVC_GET_SUBVOL_FROM_CTX(this, op_ret, op_errno, inode_type, ret, \
+ inode, subvolume, label) \
+ do { \
+ ret = svc_inode_ctx_get (this, inode, &inode_type); \
+ if (ret < 0) { \
+ gf_log (this->name, GF_LOG_ERROR, \
+ "inode context not found for gfid %s", \
+ uuid_utoa (inode->gfid)); \
+ op_ret = -1; \
+ op_errno = EINVAL; \
+ goto label; \
+ } \
+ \
+ subvolume = svc_get_subvolume (this, inode_type); \
+ } while (0);
+
+struct svc_private {
+ char *path;
+ char *special_dir; /* needed for samba */
+ gf_boolean_t show_entry_point;
+};
+typedef struct svc_private svc_private_t;
+
+struct svc_fd {
+ off_t last_offset;
+ gf_boolean_t entry_point_handled;
+ gf_boolean_t special_dir;
+};
+typedef struct svc_fd svc_fd_t;
+
+typedef enum {
+ NORMAL_INODE = 1,
+ VIRTUAL_INODE
+} inode_type_t;
+
+#endif /* __SNAP_VIEW_CLIENT_H__ */
diff --git a/xlators/protocol/lib/Makefile.am b/xlators/features/snapview-server/Makefile.am
index af437a64d6d..af437a64d6d 100644
--- a/xlators/protocol/lib/Makefile.am
+++ b/xlators/features/snapview-server/Makefile.am
diff --git a/xlators/features/snapview-server/src/Makefile.am b/xlators/features/snapview-server/src/Makefile.am
new file mode 100644
index 00000000000..6b588e5d235
--- /dev/null
+++ b/xlators/features/snapview-server/src/Makefile.am
@@ -0,0 +1,22 @@
+xlator_LTLIBRARIES = snapview-server.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+snapview_server_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+snapview_server_la_SOURCES = snapview-server.c snapview-server-mgmt.c snapview-server-helpers.c
+snapview_server_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la\
+ $(top_builddir)/api/src/libgfapi.la\
+ $(RLLIBS) $(top_builddir)/rpc/xdr/src/libgfxdr.la \
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la
+
+noinst_HEADERS = snapview-server.h snapview-server-mem-types.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/api/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src \
+ -I$(top_srcdir)/rpc/xdr/src \
+ -DDATADIR=\"$(localstatedir)\"
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/snapview-server/src/snapview-server-helpers.c b/xlators/features/snapview-server/src/snapview-server-helpers.c
new file mode 100644
index 00000000000..5bf41c2317b
--- /dev/null
+++ b/xlators/features/snapview-server/src/snapview-server-helpers.c
@@ -0,0 +1,598 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "snapview-server.h"
+#include "snapview-server-mem-types.h"
+
+#include "xlator.h"
+#include "rpc-clnt.h"
+#include "xdr-generic.h"
+#include "protocol-common.h"
+#include <pthread.h>
+
+
+int
+__svs_inode_ctx_set (xlator_t *this, inode_t *inode, svs_inode_t *svs_inode)
+{
+ uint64_t value = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+ GF_VALIDATE_OR_GOTO (this->name, svs_inode, out);
+
+ value = (uint64_t)(long) svs_inode;
+
+ ret = __inode_ctx_set (inode, this, &value);
+
+out:
+ return ret;
+}
+
+svs_inode_t *
+__svs_inode_ctx_get (xlator_t *this, inode_t *inode)
+{
+ svs_inode_t *svs_inode = NULL;
+ uint64_t value = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ ret = __inode_ctx_get (inode, this, &value);
+ if (ret)
+ goto out;
+
+ svs_inode = (svs_inode_t *) ((long) value);
+
+out:
+ return svs_inode;
+}
+
+svs_inode_t *
+svs_inode_ctx_get (xlator_t *this, inode_t *inode)
+{
+ svs_inode_t *svs_inode = NULL;
+
+ GF_VALIDATE_OR_GOTO ("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ LOCK (&inode->lock);
+ {
+ svs_inode = __svs_inode_ctx_get (this, inode);
+ }
+ UNLOCK (&inode->lock);
+
+out:
+ return svs_inode;
+}
+
+int32_t
+svs_inode_ctx_set (xlator_t *this, inode_t *inode, svs_inode_t *svs_inode)
+{
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+ GF_VALIDATE_OR_GOTO (this->name, svs_inode, out);
+
+ LOCK (&inode->lock);
+ {
+ ret = __svs_inode_ctx_set (this, inode, svs_inode);
+ }
+ UNLOCK (&inode->lock);
+
+out:
+ return ret;
+}
+
+svs_inode_t *
+svs_inode_new (void)
+{
+ svs_inode_t *svs_inode = NULL;
+
+ svs_inode = GF_CALLOC (1, sizeof (*svs_inode), gf_svs_mt_svs_inode_t);
+
+ return svs_inode;
+}
+
+svs_inode_t *
+svs_inode_ctx_get_or_new (xlator_t *this, inode_t *inode)
+{
+ svs_inode_t *svs_inode = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ LOCK (&inode->lock);
+ {
+ svs_inode = __svs_inode_ctx_get (this, inode);
+ if (!svs_inode) {
+ svs_inode = svs_inode_new ();
+ if (svs_inode) {
+ ret = __svs_inode_ctx_set (this, inode,
+ svs_inode);
+ if (ret) {
+ GF_FREE (svs_inode);
+ svs_inode = NULL;
+ }
+ }
+ }
+ }
+ UNLOCK (&inode->lock);
+
+out:
+ return svs_inode;
+}
+
+svs_fd_t *
+svs_fd_new (void)
+{
+ svs_fd_t *svs_fd = NULL;
+
+ svs_fd = GF_CALLOC (1, sizeof (*svs_fd), gf_svs_mt_svs_fd_t);
+
+ return svs_fd;
+}
+
+int
+__svs_fd_ctx_set (xlator_t *this, fd_t *fd, svs_fd_t *svs_fd)
+{
+ uint64_t value = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+ GF_VALIDATE_OR_GOTO (this->name, svs_fd, out);
+
+ value = (uint64_t)(long) svs_fd;
+
+ ret = __fd_ctx_set (fd, this, value);
+
+out:
+ return ret;
+}
+
+svs_fd_t *
+__svs_fd_ctx_get (xlator_t *this, fd_t *fd)
+{
+ svs_fd_t *svs_fd = NULL;
+ uint64_t value = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ ret = __fd_ctx_get (fd, this, &value);
+ if (ret)
+ return NULL;
+
+ svs_fd = (svs_fd_t *) ((long) value);
+
+out:
+ return svs_fd;
+}
+
+svs_fd_t *
+svs_fd_ctx_get (xlator_t *this, fd_t *fd)
+{
+ svs_fd_t *svs_fd = NULL;
+
+ GF_VALIDATE_OR_GOTO ("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ LOCK (&fd->lock);
+ {
+ svs_fd = __svs_fd_ctx_get (this, fd);
+ }
+ UNLOCK (&fd->lock);
+
+out:
+ return svs_fd;
+}
+
+int32_t
+svs_fd_ctx_set (xlator_t *this, fd_t *fd, svs_fd_t *svs_fd)
+{
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+ GF_VALIDATE_OR_GOTO (this->name, svs_fd, out);
+
+ LOCK (&fd->lock);
+ {
+ ret = __svs_fd_ctx_set (this, fd, svs_fd);
+ }
+ UNLOCK (&fd->lock);
+
+out:
+ return ret;
+}
+
+svs_fd_t *
+__svs_fd_ctx_get_or_new (xlator_t *this, fd_t *fd)
+{
+ svs_fd_t *svs_fd = NULL;
+ int ret = -1;
+ glfs_t *fs = NULL;
+ glfs_object_t *object = NULL;
+ svs_inode_t *inode_ctx = NULL;
+ glfs_fd_t *glfd = NULL;
+ inode_t *inode = NULL;
+
+ GF_VALIDATE_OR_GOTO ("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ inode = fd->inode;
+ svs_fd = __svs_fd_ctx_get (this, fd);
+ if (svs_fd) {
+ ret = 0;
+ goto out;
+ }
+
+ svs_fd = svs_fd_new ();
+ if (!svs_fd) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to allocate new fd "
+ "context for gfid %s", uuid_utoa (inode->gfid));
+ goto out;
+ }
+
+ if (fd_is_anonymous (fd)) {
+ inode_ctx = svs_inode_ctx_get (this, inode);
+ if (!inode_ctx) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to get inode "
+ "context for %s", uuid_utoa (inode->gfid));
+ goto out;
+ }
+
+ fs = inode_ctx->fs;
+ object = inode_ctx->object;
+
+ if (inode->ia_type == IA_IFDIR) {
+ glfd = glfs_h_opendir (fs, object);
+ if (!glfd) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "open the directory %s",
+ uuid_utoa (inode->gfid));
+ goto out;
+ }
+ }
+
+ if (inode->ia_type == IA_IFREG) {
+ glfd = glfs_h_open (fs, object, O_RDONLY|O_LARGEFILE);
+ if (!glfd) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "open the file %s",
+ uuid_utoa (inode->gfid));
+ goto out;
+ }
+ }
+
+ svs_fd->fd = glfd;
+ }
+
+ ret = __svs_fd_ctx_set (this, fd, svs_fd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to set fd context "
+ "for gfid %s", uuid_utoa (inode->gfid));
+ if (svs_fd->fd) {
+ if (inode->ia_type == IA_IFDIR) {
+ ret = glfs_closedir (svs_fd->fd);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to close the fd for %s",
+ uuid_utoa (inode->gfid));
+ }
+ if (inode->ia_type == IA_IFREG) {
+ ret = glfs_close (svs_fd->fd);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to close the fd for %s",
+ uuid_utoa (inode->gfid));
+ }
+ }
+ ret = -1;
+ }
+
+out:
+ if (ret) {
+ GF_FREE (svs_fd);
+ svs_fd = NULL;
+ }
+
+ return svs_fd;
+}
+
+svs_fd_t *
+svs_fd_ctx_get_or_new (xlator_t *this, fd_t *fd)
+{
+ svs_fd_t *svs_fd = NULL;
+
+ GF_VALIDATE_OR_GOTO ("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ LOCK (&fd->lock);
+ {
+ svs_fd = __svs_fd_ctx_get_or_new (this, fd);
+ }
+ UNLOCK (&fd->lock);
+
+out:
+ return svs_fd;
+}
+
+void
+svs_uuid_generate (uuid_t gfid, char *snapname, uuid_t origin_gfid)
+{
+ unsigned char md5_sum[MD5_DIGEST_LENGTH] = {0};
+ char ino_string[NAME_MAX + 32] = "";
+ int ret = 0;
+
+ GF_ASSERT (snapname);
+
+ ret = snprintf (ino_string, sizeof (ino_string), "%s%s",
+ snapname, uuid_utoa(origin_gfid));
+ MD5((unsigned char *)ino_string, strlen(ino_string), md5_sum);
+ gf_uuid_copy (gfid, md5_sum);
+}
+
+void
+svs_fill_ino_from_gfid (struct iatt *buf)
+{
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ GF_VALIDATE_OR_GOTO ("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, buf, out);
+
+ /* consider least significant 8 bytes of value out of gfid */
+ if (gf_uuid_is_null (buf->ia_gfid)) {
+ buf->ia_ino = -1;
+ goto out;
+ }
+
+ buf->ia_ino = gfid_to_ino (buf->ia_gfid);
+out:
+ return;
+}
+
+void
+svs_iatt_fill (uuid_t gfid, struct iatt *buf)
+{
+ struct timeval tv = {0, };
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ GF_VALIDATE_OR_GOTO ("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, buf, out);
+
+ buf->ia_type = IA_IFDIR;
+ buf->ia_uid = 0;
+ buf->ia_gid = 0;
+ buf->ia_size = 0;
+ buf->ia_nlink = 2;
+ buf->ia_blocks = 8;
+ buf->ia_size = 4096;
+
+ gf_uuid_copy (buf->ia_gfid, gfid);
+ svs_fill_ino_from_gfid (buf);
+
+ buf->ia_prot = ia_prot_from_st_mode (0755);
+
+ gettimeofday (&tv, 0);
+
+ buf->ia_mtime = buf->ia_atime = buf->ia_ctime = tv.tv_sec;
+ buf->ia_mtime_nsec = buf->ia_atime_nsec = buf->ia_ctime_nsec =
+ (tv.tv_usec * 1000);
+
+out:
+ return;
+}
+
+/* priv->snaplist_lock should be held before calling this function */
+snap_dirent_t *
+__svs_get_snap_dirent (xlator_t *this, const char *name)
+{
+ svs_private_t *private = NULL;
+ int i = 0;
+ snap_dirent_t *dirents = NULL;
+ snap_dirent_t *tmp_dirent = NULL;
+ snap_dirent_t *dirent = NULL;
+
+ private = this->private;
+
+ dirents = private->dirents;
+ if (!dirents) {
+ goto out;
+ }
+
+ tmp_dirent = dirents;
+ for (i = 0; i < private->num_snaps; i++) {
+ if (!strcmp (tmp_dirent->name, name)) {
+ dirent = tmp_dirent;
+ break;
+ }
+ tmp_dirent++;
+ }
+
+ out:
+ return dirent;
+}
+
+glfs_t *
+__svs_initialise_snapshot_volume (xlator_t *this, const char *name,
+ int32_t *op_errno)
+{
+ svs_private_t *priv = NULL;
+ int32_t ret = -1;
+ int32_t local_errno = ESTALE;
+ snap_dirent_t *dirent = NULL;
+ char volname[PATH_MAX] = {0, };
+ glfs_t *fs = NULL;
+ int loglevel = GF_LOG_INFO;
+ char logfile[PATH_MAX] = {0, };
+
+ GF_VALIDATE_OR_GOTO ("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO (this->name, name, out);
+
+ priv = this->private;
+
+ dirent = __svs_get_snap_dirent (this, name);
+ if (!dirent) {
+ gf_log (this->name, GF_LOG_DEBUG, "snap entry for "
+ "name %s not found", name);
+ local_errno = ENOENT;
+ goto out;
+ }
+
+ if (dirent->fs) {
+ ret = 0;
+ fs = dirent->fs;
+ goto out;
+ }
+
+ snprintf (volname, sizeof (volname), "/snaps/%s/%s",
+ dirent->name, dirent->snap_volname);
+
+
+ fs = glfs_new (volname);
+ if (!fs) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "glfs instance for snap volume %s "
+ "failed", dirent->name);
+ local_errno = ENOMEM;
+ goto out;
+ }
+
+ ret = glfs_set_volfile_server (fs, "tcp", "localhost",
+ 24007);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "setting the "
+ "volfile server for snap volume %s "
+ "failed", dirent->name);
+ goto out;
+ }
+
+ snprintf (logfile, sizeof (logfile),
+ DEFAULT_SVD_LOG_FILE_DIRECTORY "/snaps/%s/%s-%s.log",
+ priv->volname, name, dirent->uuid);
+
+ ret = glfs_set_logging(fs, logfile, loglevel);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to set the "
+ "log file path");
+ goto out;
+ }
+
+ ret = glfs_init (fs);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "initing the "
+ "fs for %s failed", dirent->name);
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (ret) {
+ if (op_errno)
+ *op_errno = local_errno;
+
+ if (fs)
+ glfs_fini (fs);
+ fs = NULL;
+ }
+
+ if (fs) {
+ dirent->fs = fs;
+ }
+
+ return fs;
+}
+
+glfs_t *
+svs_initialise_snapshot_volume (xlator_t *this, const char *name,
+ int32_t *op_errno)
+{
+ glfs_t *fs = NULL;
+ svs_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO (this->name, name, out);
+
+ priv = this->private;
+
+ LOCK (&priv->snaplist_lock);
+ {
+ fs = __svs_initialise_snapshot_volume (this, name, op_errno);
+ }
+ UNLOCK (&priv->snaplist_lock);
+
+
+out:
+
+ return fs;
+}
+
+snap_dirent_t *
+svs_get_latest_snap_entry (xlator_t *this)
+{
+ svs_private_t *priv = NULL;
+ snap_dirent_t *dirents = NULL;
+ snap_dirent_t *dirent = NULL;
+
+ GF_VALIDATE_OR_GOTO ("svs", this, out);
+
+ priv = this->private;
+
+ LOCK (&priv->snaplist_lock);
+ {
+ dirents = priv->dirents;
+ if (!dirents) {
+ goto unlock;
+ }
+ if (priv->num_snaps)
+ dirent = &dirents[priv->num_snaps - 1];
+ }
+unlock:
+ UNLOCK (&priv->snaplist_lock);
+
+out:
+ return dirent;
+}
+
+glfs_t *
+svs_get_latest_snapshot (xlator_t *this)
+{
+ glfs_t *fs = NULL;
+ snap_dirent_t *dirent = NULL;
+ svs_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("svs", this, out);
+ priv = this->private;
+
+ dirent = svs_get_latest_snap_entry (this);
+
+ if (dirent) {
+ LOCK (&priv->snaplist_lock);
+ {
+ fs = dirent->fs;
+ }
+ UNLOCK (&priv->snaplist_lock);
+ }
+
+out:
+ return fs;
+}
diff --git a/xlators/features/snapview-server/src/snapview-server-mem-types.h b/xlators/features/snapview-server/src/snapview-server-mem-types.h
new file mode 100644
index 00000000000..a8035165000
--- /dev/null
+++ b/xlators/features/snapview-server/src/snapview-server-mem-types.h
@@ -0,0 +1,26 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __SNAP_VIEW_MEM_TYPES_H
+#define __SNAP_VIEW_MEM_TYPES_H
+
+#include "mem-types.h"
+
+enum snapview_mem_types {
+ gf_svs_mt_priv_t = gf_common_mt_end + 1,
+ gf_svs_mt_svs_inode_t,
+ gf_svs_mt_dirents_t,
+ gf_svs_mt_svs_fd_t,
+ gf_svs_mt_snaplist_t,
+ gf_svs_mt_end
+};
+
+#endif
+
diff --git a/xlators/features/snapview-server/src/snapview-server-mgmt.c b/xlators/features/snapview-server/src/snapview-server-mgmt.c
new file mode 100644
index 00000000000..fc2ff2ab10d
--- /dev/null
+++ b/xlators/features/snapview-server/src/snapview-server-mgmt.c
@@ -0,0 +1,476 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "snapview-server.h"
+#include "snapview-server-mem-types.h"
+#include <pthread.h>
+
+int
+mgmt_cbk_snap (struct rpc_clnt *rpc, void *mydata, void *data)
+{
+ xlator_t *this = NULL;
+
+ this = mydata;
+ GF_ASSERT (this);
+
+ gf_log ("mgmt", GF_LOG_INFO, "list of snapshots changed");
+
+ svs_get_snapshot_list (this);
+ return 0;
+}
+
+rpcclnt_cb_actor_t svs_cbk_actors[GF_CBK_MAXVALUE] = {
+ [GF_CBK_GET_SNAPS] = {"GETSNAPS", GF_CBK_GET_SNAPS, mgmt_cbk_snap},
+};
+
+struct rpcclnt_cb_program svs_cbk_prog = {
+ .progname = "GlusterFS Callback",
+ .prognum = GLUSTER_CBK_PROGRAM,
+ .progver = GLUSTER_CBK_VERSION,
+ .actors = svs_cbk_actors,
+ .numactors = GF_CBK_MAXVALUE,
+};
+
+char *clnt_handshake_procs[GF_HNDSK_MAXVALUE] = {
+ [GF_HNDSK_NULL] = "NULL",
+ [GF_HNDSK_EVENT_NOTIFY] = "EVENTNOTIFY",
+};
+
+rpc_clnt_prog_t svs_clnt_handshake_prog = {
+ .progname = "GlusterFS Handshake",
+ .prognum = GLUSTER_HNDSK_PROGRAM,
+ .progver = GLUSTER_HNDSK_VERSION,
+ .procnames = clnt_handshake_procs,
+};
+
+int
+svs_mgmt_init (xlator_t *this)
+{
+ int ret = -1;
+ svs_private_t *priv = NULL;
+ dict_t *options = NULL;
+ int port = GF_DEFAULT_BASE_PORT;
+ char *host = NULL;
+ cmd_args_t *cmd_args = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+
+ GF_VALIDATE_OR_GOTO ("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->ctx, out);
+
+ priv = this->private;
+
+ ctx = this->ctx;
+ cmd_args = &ctx->cmd_args;
+
+ host = "localhost";
+ if (cmd_args->volfile_server)
+ host = cmd_args->volfile_server;
+
+ ret = rpc_transport_inet_options_build (&options, host, port);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to build the "
+ "transport options");
+ goto out;
+ }
+
+ priv->rpc = rpc_clnt_new (options, this, this->name, 8);
+ if (!priv->rpc) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to initialize RPC");
+ goto out;
+ }
+
+ ret = rpcclnt_cbk_program_register (priv->rpc, &svs_cbk_prog,
+ this);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to register callback program");
+ goto out;
+ }
+
+ ret = rpc_clnt_start (priv->rpc);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to start the rpc "
+ "client");
+ goto out;
+ }
+
+ ret = 0;
+
+ gf_log (this->name, GF_LOG_DEBUG, "svs mgmt init successful");
+
+out:
+ if (ret)
+ if (priv) {
+ rpc_clnt_connection_cleanup (&priv->rpc->conn);
+ rpc_clnt_unref (priv->rpc);
+ priv->rpc = NULL;
+ }
+
+ return ret;
+}
+
+int
+svs_mgmt_submit_request (void *req, call_frame_t *frame,
+ glusterfs_ctx_t *ctx,
+ rpc_clnt_prog_t *prog, int procnum,
+ fop_cbk_fn_t cbkfn, xdrproc_t xdrproc)
+{
+ int ret = -1;
+ int count = 0;
+ struct iovec iov = {0, };
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ ssize_t xdr_size = 0;
+
+ GF_VALIDATE_OR_GOTO ("snapview-server", frame, out);
+ GF_VALIDATE_OR_GOTO ("snapview-server", req, out);
+ GF_VALIDATE_OR_GOTO ("snapview-server", ctx, out);
+ GF_VALIDATE_OR_GOTO ("snapview-server", prog, out);
+
+ GF_ASSERT (frame->this);
+
+ iobref = iobref_new ();
+ if (!iobref) {
+ goto out;
+ }
+
+ if (req) {
+ xdr_size = xdr_sizeof (xdrproc, req);
+
+ iobuf = iobuf_get2 (ctx->iobuf_pool, xdr_size);
+ if (!iobuf) {
+ goto out;
+ }
+
+ iobref_add (iobref, iobuf);
+
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = iobuf_pagesize (iobuf);
+
+ /* Create the xdr payload */
+ ret = xdr_serialize_generic (iov, req, xdrproc);
+ if (ret == -1) {
+ gf_log (frame->this->name, GF_LOG_WARNING,
+ "Failed to create XDR payload");
+ goto out;
+ }
+ iov.iov_len = ret;
+ count = 1;
+ }
+
+ ret = rpc_clnt_submit (ctx->mgmt, prog, procnum, cbkfn,
+ &iov, count,
+ NULL, 0, iobref, frame, NULL, 0, NULL, 0, NULL);
+
+out:
+ if (iobref)
+ iobref_unref (iobref);
+
+ if (iobuf)
+ iobuf_unref (iobuf);
+ return ret;
+}
+
+
+int
+mgmt_get_snapinfo_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gf_getsnap_name_uuid_rsp rsp = {0,};
+ call_frame_t *frame = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ int ret = -1;
+ dict_t *dict = NULL;
+ char key[1024] = {0};
+ int snapcount = 0;
+ svs_private_t *priv = NULL;
+ xlator_t *this = NULL;
+ int i = 0;
+ int j = 0;
+ char *value = NULL;
+ snap_dirent_t *dirents = NULL;
+ snap_dirent_t *old_dirents = NULL;
+ int oldcount = 0;
+
+ GF_VALIDATE_OR_GOTO ("snapview-server", req, error_out);
+ GF_VALIDATE_OR_GOTO ("snapview-server", myframe, error_out);
+ GF_VALIDATE_OR_GOTO ("snapview-server", iov, error_out);
+
+ frame = myframe;
+ this = frame->this;
+ ctx = frame->this->ctx;
+ priv = this->private;
+ old_dirents = priv->dirents;
+
+ if (!ctx) {
+ gf_log (frame->this->name, GF_LOG_ERROR, "NULL context");
+ errno = EINVAL;
+ goto out;
+ }
+
+ if (-1 == req->rpc_status) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "RPC call is not successful");
+ errno = EINVAL;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gf_getsnap_name_uuid_rsp);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response, rsp.op_ret = %d",
+ rsp.op_ret);
+ goto out;
+ }
+
+ if (rsp.op_ret == -1) {
+ errno = rsp.op_errno;
+ ret = -1;
+ goto out;
+ }
+
+ if (!rsp.dict.dict_len) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Response dict is not populated");
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = dict_unserialize (rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+ if (ret) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to unserialize dictionary");
+ errno = EINVAL;
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "snap-count", (int32_t*)&snapcount);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error retrieving snapcount");
+ errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
+
+ if (snapcount > 0) {
+ /* first time we are fetching snap list */
+ dirents = GF_CALLOC (snapcount, sizeof (snap_dirent_t),
+ gf_svs_mt_dirents_t);
+ if (!dirents) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Unable to allocate memory");
+ errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+ }
+
+ for (i = 0; i < snapcount; i++) {
+ snprintf (key, sizeof (key), "snap-volname.%d", i+1);
+ ret = dict_get_str (dict, key, &value);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error retrieving snap volname %d",
+ i+1);
+ errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
+
+ strncpy (dirents[i].snap_volname, value,
+ sizeof (dirents[i].snap_volname));
+
+ snprintf (key, sizeof (key), "snap-id.%d", i+1);
+ ret = dict_get_str (dict, key, &value);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error retrieving snap uuid %d", i+1);
+ errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
+ strncpy (dirents[i].uuid, value,
+ sizeof (dirents[i].uuid));
+
+ snprintf (key, sizeof (key), "snapname.%d", i+1);
+ ret = dict_get_str (dict, key, &value);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error retrieving snap name %d", i+1);
+ errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
+ strncpy (dirents[i].name, value,
+ sizeof (dirents[i].name));
+ }
+
+ /*
+ * Got the new snap list populated in dirents
+ * The new snap list is either a subset or a superset of
+ * the existing snaplist old_dirents which has priv->num_snaps
+ * number of entries.
+ *
+ * If subset, then clean up the fs for entries which are
+ * no longer relevant.
+ *
+ * For other overlapping entries set the fs for new dirents
+ * entries which have a fs assigned already in old_dirents
+ *
+ * We do this as we don't want to do new glfs_init()s repeatedly
+ * as the dirents entries for snapshot volumes get repatedly
+ * cleaned up and allocated. And if we don't then that will lead
+ * to memleaks
+ */
+
+ LOCK (&priv->snaplist_lock);
+ {
+ oldcount = priv->num_snaps;
+ for (i = 0; i < priv->num_snaps; i++) {
+ for (j = 0; j < snapcount; j++) {
+ if ((!strcmp (old_dirents[i].name,
+ dirents[j].name)) &&
+ (!strcmp (old_dirents[i].uuid,
+ dirents[j].uuid))) {
+ dirents[j].fs = old_dirents[i].fs;
+ old_dirents[i].fs = NULL;
+ break;
+ }
+ }
+ }
+
+ priv->dirents = dirents;
+ priv->num_snaps = snapcount;
+ }
+ UNLOCK (&priv->snaplist_lock);
+
+ if (old_dirents) {
+ for (i = 0; i < oldcount; i++) {
+ if (old_dirents[i].fs)
+ glfs_fini (old_dirents[i].fs);
+ }
+ }
+
+ GF_FREE (old_dirents);
+
+ ret = 0;
+
+out:
+ if (dict) {
+ dict_unref (dict);
+ }
+ free (rsp.dict.dict_val);
+ free (rsp.op_errstr);
+
+ if (ret && dirents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Could not update dirents with refreshed snap list");
+ GF_FREE (dirents);
+ }
+
+ if (myframe)
+ SVS_STACK_DESTROY (myframe);
+
+error_out:
+ return ret;
+}
+
+int
+svs_get_snapshot_list (xlator_t *this)
+{
+ gf_getsnap_name_uuid_req req = {{0,}};
+ int ret = -1;
+ dict_t *dict = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ call_frame_t *frame = NULL;
+ svs_private_t *priv = NULL;
+ gf_boolean_t frame_cleanup = _gf_true;
+
+ GF_VALIDATE_OR_GOTO ("snapview-server", this, out);
+
+ ctx = this->ctx;
+ if (!ctx) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "ctx is NULL");
+ goto out;
+ }
+
+ frame = create_frame (this, ctx->pool);
+ if (!frame) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error allocating frame");
+ goto out;
+ }
+
+ priv = this->private;
+
+ dict = dict_new ();
+ if (!dict) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error allocating dictionary");
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "volname", priv->volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error setting volname in dict");
+ goto out;
+ }
+
+ ret = dict_allocate_and_serialize (dict, &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to serialize dictionary");
+ ret = -1;
+ goto out;
+ }
+
+ ret = svs_mgmt_submit_request (&req, frame, ctx,
+ &svs_clnt_handshake_prog,
+ GF_HNDSK_GET_SNAPSHOT_INFO,
+ mgmt_get_snapinfo_cbk,
+ (xdrproc_t)xdr_gf_getsnap_name_uuid_req);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error sending snapshot names RPC request");
+ }
+
+ frame_cleanup = _gf_false;
+
+out:
+ if (dict) {
+ dict_unref (dict);
+ }
+ GF_FREE (req.dict.dict_val);
+
+ if (frame_cleanup && frame) {
+ /*
+ * Destroy the frame if we encountered an error
+ * Else we need to clean it up in
+ * mgmt_get_snapinfo_cbk
+ */
+ SVS_STACK_DESTROY (frame);
+ }
+
+ return ret;
+}
diff --git a/xlators/features/snapview-server/src/snapview-server.c b/xlators/features/snapview-server/src/snapview-server.c
new file mode 100644
index 00000000000..72cfc908bba
--- /dev/null
+++ b/xlators/features/snapview-server/src/snapview-server.c
@@ -0,0 +1,2350 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "snapview-server.h"
+#include "snapview-server-mem-types.h"
+#include "compat-errno.h"
+
+#include "xlator.h"
+#include "rpc-clnt.h"
+#include "xdr-generic.h"
+#include "protocol-common.h"
+#include "syscall.h"
+#include <pthread.h>
+
+
+int32_t
+svs_lookup_entry_point (xlator_t *this, loc_t *loc, inode_t *parent,
+ struct iatt *buf, struct iatt *postparent,
+ int32_t *op_errno)
+{
+ uuid_t gfid;
+ svs_inode_t *inode_ctx = NULL;
+ int op_ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc->inode, out);
+ GF_VALIDATE_OR_GOTO (this->name, buf, out);
+ GF_VALIDATE_OR_GOTO (this->name, postparent, out);
+
+ if (gf_uuid_is_null (loc->inode->gfid)) {
+ gf_uuid_generate (gfid);
+ svs_iatt_fill (gfid, buf);
+
+ /* Here the inode context of the entry point directory
+ is filled with just the type of the inode and the gfid
+ of the parent from where the entry point was entered.
+ The glfs object and the fs instance will be NULL.
+ */
+ if (parent)
+ svs_iatt_fill (parent->gfid, postparent);
+ else {
+ svs_iatt_fill (buf->ia_gfid, postparent);
+ }
+
+ inode_ctx = svs_inode_ctx_get_or_new (this, loc->inode);
+ if (!inode_ctx) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "allocate inode context for entry point "
+ "directory");
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+ gf_uuid_copy (inode_ctx->pargfid, loc->pargfid);
+ memcpy (&inode_ctx->buf, buf, sizeof (*buf));
+ inode_ctx->type = SNAP_VIEW_ENTRY_POINT_INODE;
+ } else {
+ if (inode_ctx) {
+ memcpy (buf, &inode_ctx->buf, sizeof (*buf));
+ svs_iatt_fill (inode_ctx->pargfid, postparent);
+ } else {
+ svs_iatt_fill (loc->inode->gfid, buf);
+ if (parent)
+ svs_iatt_fill (parent->gfid,
+ postparent);
+ else {
+ svs_iatt_fill (loc->inode->gfid,
+ postparent);
+ }
+ }
+ }
+
+ op_ret = 0;
+
+out:
+ return op_ret;
+}
+
+/* When lookup comes from client and the protocol/server tries to resolve
+ the pargfid via just sending the gfid as part of lookup, if the inode
+ for the parent gfid is not found. But since that gfid has not yet been
+ looked up yet, inode will not be having inode context and parent is not
+ there (as it is the parent of the entry that is being resolved). So
+ without parent and inode context, svs cannot know which snapshot
+ to look into. In such cases, the amguity is handled by looking
+ into the latest snapshot. If the directory is there in the latest
+ snapshot, lookup is successful, otherwise it is a failure. So for
+ any directory created after taking the latest snapshot, entry into
+ snapshot world is denied. i.e you have to be part of snapshot world
+ to enter it. If the gfid is not found there, then unwind with
+ ESTALE
+ This gets executed mainly in the situation where the snapshot entry
+ point is entered from a non-root directory and that non-root directory's
+ inode (or gfid) is not yet looked up. And in each case when a gfid has to
+ be looked up (without any inode contex and parent context present), last
+ snapshot is referred and a random gfid is not generated.
+*/
+int32_t
+svs_lookup_gfid (xlator_t *this, loc_t *loc, struct iatt *buf,
+ struct iatt *postparent, int32_t *op_errno)
+{
+ int32_t op_ret = -1;
+ unsigned char handle_obj[GFAPI_HANDLE_LENGTH] = {0, };
+ glfs_t *fs = NULL;
+ glfs_object_t *object = NULL;
+ struct stat statbuf = {0, };
+ svs_inode_t *inode_ctx = NULL;
+
+ GF_VALIDATE_OR_GOTO ("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc->inode, out);
+ GF_VALIDATE_OR_GOTO (this->name, buf, out);
+ GF_VALIDATE_OR_GOTO (this->name, postparent, out);
+
+ if (gf_uuid_is_null (loc->gfid) && gf_uuid_is_null (loc->inode->gfid)) {
+ gf_log (this->name, GF_LOG_ERROR, "gfid is NULL");
+ goto out;
+ }
+
+ if (!gf_uuid_is_null (loc->inode->gfid))
+ memcpy (handle_obj, loc->inode->gfid,
+ GFAPI_HANDLE_LENGTH);
+ else
+ memcpy (handle_obj, loc->gfid,
+ GFAPI_HANDLE_LENGTH);
+
+ fs = svs_get_latest_snapshot (this);
+ if (!fs) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to get the latest "
+ "snapshot");
+ op_ret = -1;
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+
+ object = glfs_h_create_from_handle (fs, handle_obj, GFAPI_HANDLE_LENGTH,
+ &statbuf);
+ if (!object) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to do lookup and get "
+ "the handle on the snapshot %s (path: %s, gfid: %s)",
+ loc->name, loc->path, uuid_utoa (loc->gfid));
+ op_ret = -1;
+ *op_errno = ESTALE;
+ goto out;
+ }
+
+ inode_ctx = svs_inode_ctx_get_or_new (this, loc->inode);
+ if (!inode_ctx) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to allocate inode "
+ "context");
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ iatt_from_stat (buf, &statbuf);
+ if (!gf_uuid_is_null (loc->gfid))
+ gf_uuid_copy (buf->ia_gfid, loc->gfid);
+ else
+ gf_uuid_copy (buf->ia_gfid, loc->inode->gfid);
+
+ inode_ctx->type = SNAP_VIEW_VIRTUAL_INODE;
+ inode_ctx->fs = fs;
+ inode_ctx->object = object;
+ memcpy (&inode_ctx->buf, buf, sizeof (*buf));
+ svs_iatt_fill (buf->ia_gfid, postparent);
+
+ op_ret = 0;
+
+out:
+ return op_ret;
+}
+
+/* If the parent is an entry point inode, then create the handle for the
+ snapshot on which lookup came. i.e in reality lookup came on
+ the directory from which the entry point directory was entered, but
+ lookup is into the past. So create the handle for it by doing
+ the name-less lookup on the gfid (which can be obtained from
+ parent's context
+*/
+int32_t
+svs_lookup_snapshot (xlator_t *this, loc_t *loc, struct iatt *buf,
+ struct iatt *postparent, inode_t *parent,
+ svs_inode_t *parent_ctx, int32_t *op_errno)
+{
+ int32_t op_ret = -1;
+ unsigned char handle_obj[GFAPI_HANDLE_LENGTH] = {0, };
+ glfs_t *fs = NULL;
+ glfs_object_t *object = NULL;
+ struct stat statbuf = {0, };
+ svs_inode_t *inode_ctx = NULL;
+ uuid_t gfid;
+
+ GF_VALIDATE_OR_GOTO ("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc->inode, out);
+ GF_VALIDATE_OR_GOTO (this->name, buf, out);
+ GF_VALIDATE_OR_GOTO (this->name, postparent, out);
+ GF_VALIDATE_OR_GOTO (this->name, parent_ctx, out);
+ GF_VALIDATE_OR_GOTO (this->name, parent, out);
+
+ fs = svs_initialise_snapshot_volume (this, loc->name, op_errno);
+ if (!fs) {
+ gf_log (this->name, GF_LOG_DEBUG, "failed to "
+ "create the fs instance for snap %s",
+ loc->name);
+ *op_errno = ENOENT;
+ op_ret = -1;
+ goto out;
+ }
+
+ memcpy (handle_obj, parent_ctx->pargfid,
+ GFAPI_HANDLE_LENGTH);
+ object = glfs_h_create_from_handle (fs, handle_obj, GFAPI_HANDLE_LENGTH,
+ &statbuf);
+ if (!object) {
+ gf_log (this->name, GF_LOG_DEBUG, "failed to do lookup and "
+ "get the handle on the snapshot %s", loc->name);
+ op_ret = -1;
+ *op_errno = errno;
+ goto out;
+ }
+
+ inode_ctx = svs_inode_ctx_get_or_new (this, loc->inode);
+ if (!inode_ctx) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "allocate inode context");
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (gf_uuid_is_null (loc->gfid) &&
+ gf_uuid_is_null (loc->inode->gfid))
+ gf_uuid_generate (gfid);
+ else {
+ if (!gf_uuid_is_null (loc->inode->gfid))
+ gf_uuid_copy (gfid, loc->inode->gfid);
+ else
+ gf_uuid_copy (gfid, loc->gfid);
+ }
+ iatt_from_stat (buf, &statbuf);
+ gf_uuid_copy (buf->ia_gfid, gfid);
+ svs_fill_ino_from_gfid (buf);
+ inode_ctx->type = SNAP_VIEW_SNAPSHOT_INODE;
+ inode_ctx->fs = fs;
+ inode_ctx->object = object;
+ memcpy (&inode_ctx->buf, buf, sizeof (*buf));
+ svs_iatt_fill (parent->gfid, postparent);
+
+ SVS_STRDUP (inode_ctx->snapname, loc->name);
+ if (!inode_ctx->snapname) {
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+ op_ret = 0;
+
+out:
+ if (op_ret) {
+ if (object)
+ glfs_h_close (object);
+
+ if (inode_ctx)
+ inode_ctx->object = NULL;
+ }
+
+ return op_ret;
+}
+
+/* Both parent and entry are from snapshot world */
+int32_t
+svs_lookup_entry (xlator_t *this, loc_t *loc, struct iatt *buf,
+ struct iatt *postparent, inode_t *parent,
+ svs_inode_t *parent_ctx, int32_t *op_errno)
+{
+ int32_t op_ret = -1;
+ glfs_t *fs = NULL;
+ glfs_object_t *object = NULL;
+ struct stat statbuf = {0, };
+ svs_inode_t *inode_ctx = NULL;
+ glfs_object_t *parent_object = NULL;
+ uuid_t gfid = {0, };
+
+ GF_VALIDATE_OR_GOTO ("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc->inode, out);
+ GF_VALIDATE_OR_GOTO (this->name, buf, out);
+ GF_VALIDATE_OR_GOTO (this->name, postparent, out);
+ GF_VALIDATE_OR_GOTO (this->name, parent_ctx, out);
+ GF_VALIDATE_OR_GOTO (this->name, parent, out);
+
+ parent_object = parent_ctx->object;
+ fs = parent_ctx->fs;
+
+ object = glfs_h_lookupat (fs, parent_object, loc->name,
+ &statbuf, 0);
+ if (!object) {
+ gf_log (this->name, GF_LOG_DEBUG, "failed to do lookup and "
+ "get the handle for entry %s (path: %s)", loc->name,
+ loc->path);
+ op_ret = -1;
+ *op_errno = errno;
+ goto out;
+ }
+
+ if (gf_uuid_is_null(object->gfid)) {
+ gf_log (this->name, GF_LOG_DEBUG, "gfid from glfs handle is "
+ "NULL for entry %s (path: %s)", loc->name, loc->path);
+ op_ret = -1;
+ *op_errno = errno;
+ goto out;
+ }
+
+ inode_ctx = svs_inode_ctx_get_or_new (this, loc->inode);
+ if (!inode_ctx) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "allocate inode context");
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (gf_uuid_is_null (loc->gfid) &&
+ gf_uuid_is_null (loc->inode->gfid))
+ svs_uuid_generate (gfid, parent_ctx->snapname, object->gfid);
+ else {
+ if (!gf_uuid_is_null (loc->inode->gfid))
+ gf_uuid_copy (gfid, loc->inode->gfid);
+ else
+ gf_uuid_copy (gfid, loc->gfid);
+ }
+
+ iatt_from_stat (buf, &statbuf);
+ gf_uuid_copy (buf->ia_gfid, gfid);
+ svs_fill_ino_from_gfid (buf);
+ inode_ctx->type = SNAP_VIEW_VIRTUAL_INODE;
+ inode_ctx->fs = fs;
+ inode_ctx->object = object;
+ memcpy (&inode_ctx->buf, buf, sizeof (*buf));
+ svs_iatt_fill (parent->gfid, postparent);
+
+ if (IA_ISDIR (buf->ia_type)) {
+ SVS_STRDUP (inode_ctx->snapname, parent_ctx->snapname);
+ if (!inode_ctx->snapname) {
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+ }
+
+ op_ret = 0;
+
+out:
+ if (op_ret) {
+ if (object)
+ glfs_h_close (object);
+
+ if (inode_ctx)
+ inode_ctx->object = NULL;
+ }
+
+ return op_ret;
+}
+
+/* inode context is there means lookup has come on an object which was
+ built either as part of lookup or as part of readdirp. But in readdirp
+ we would not have got the handle to access the object in the gfapi
+ world.
+ So if inode context contains glfs_t instance for the right
+ gfapi world and glfs_object_t handle for accessing it in the gfapi
+ world, then unwind with success as the snapshots as of now are
+ read-only.
+ If the above condition is not met, then send lookup call again to
+ the gfapi world. It can happen only if both parent context and
+ the name of the entry are present.
+
+ If parent is an entry point to snapshot world:
+ * parent is needed for getting the gfid on which lookup has to be done
+ (the gfid present in the inode is a virtual gfid) in the snapshot
+ world.
+ * name is required to get the right glfs_t instance on which lookup
+ has to be done
+
+ If parent is a directory from snapshot world:
+ * parent context is needed to get the glfs_t instance and to get the
+ handle to parent directory in the snapshot world.
+ * name is needed to do the lookup on the right entry in the snapshot
+ world
+*/
+int32_t
+svs_revalidate (xlator_t *this, loc_t *loc, inode_t *parent,
+ svs_inode_t *inode_ctx, svs_inode_t *parent_ctx,
+ struct iatt *buf, struct iatt *postparent, int32_t *op_errno)
+{
+ int32_t op_ret = -1;
+ int ret = -1;
+ char tmp_uuid[64] = {0, };
+ glfs_t *fs = NULL;
+ glfs_object_t *object = NULL;
+
+ GF_VALIDATE_OR_GOTO ("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, buf, out);
+ GF_VALIDATE_OR_GOTO (this->name, postparent, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode_ctx, out);
+
+ if (inode_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) {
+ svs_iatt_fill (loc->inode->gfid, buf);
+ if (parent)
+ svs_iatt_fill (parent->gfid,
+ postparent);
+ else
+ svs_iatt_fill (loc->inode->gfid, postparent);
+ op_ret = 0;
+ goto out;
+ } else {
+ /* Though fs and object are present in the inode context, its
+ * better to check if fs is valid or not before doing anything.
+ * Its for the protection from the following operations.
+ * 1) Create a file on the glusterfs mount point
+ * 2) Create a snapshot (say "snap1")
+ * 3) Access the contents of the snapshot
+ * 4) Delete the file from the mount point
+ * 5) Delete the snapshot "snap1"
+ * 6) Create a new snapshot "snap1"
+ *
+ * Now accessing the new snapshot "snap1" gives problems.
+ * Because the inode and dentry created for snap1 would not be
+ * deleted upon the deletion of the snapshot (as deletion of
+ * snapshot is a gluster cli operation, not a fop). So next time
+ * upon creation of a new snap with same name, the previous
+ * inode and dentry itself will be used. But the inode context
+ * contains old information about the glfs_t instance and the
+ * handle in the gfapi world. Thus the glfs_t instance should
+ * be checked before accessing. If its wrong, then right
+ * instance should be obtained by doing the lookup.
+ */
+ if (inode_ctx->fs && inode_ctx->object) {
+ fs = inode_ctx->fs;
+ object = inode_ctx->object;
+ SVS_CHECK_VALID_SNAPSHOT_HANDLE(fs, this);
+ if (fs) {
+ memcpy (buf, &inode_ctx->buf, sizeof (*buf));
+ if (parent)
+ svs_iatt_fill (parent->gfid,
+ postparent);
+ else
+ svs_iatt_fill (buf->ia_gfid,
+ postparent);
+ op_ret = 0;
+ goto out;
+ } else {
+ inode_ctx->fs = NULL;
+ inode_ctx->object = NULL;
+ ret = svs_get_handle (this, loc, inode_ctx,
+ op_errno);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to get the handle for "
+ "%s (gfid %s)", loc->path,
+ uuid_utoa_r (loc->inode->gfid,
+ tmp_uuid));
+ op_ret = -1;
+ goto out;
+ }
+ }
+ }
+
+ /* To send the lookup to gfapi world, both the name of the
+ entry as well as the parent context is needed.
+ */
+ if (!loc->name || !parent_ctx) {
+ *op_errno = ESTALE;
+ gf_log (this->name, GF_LOG_ERROR, "%s is NULL",
+ loc->name?"parent context":"loc->name");
+ goto out;
+ }
+
+ if (parent_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE)
+ op_ret = svs_lookup_snapshot (this, loc, buf,
+ postparent, parent,
+ parent_ctx, op_errno);
+ else
+ op_ret = svs_lookup_entry (this, loc, buf, postparent,
+ parent, parent_ctx,
+ op_errno);
+
+ goto out;
+ }
+
+out:
+ return op_ret;
+}
+
+int32_t
+svs_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ struct iatt buf = {0, };
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ struct iatt postparent = {0,};
+ svs_inode_t *inode_ctx = NULL;
+ svs_inode_t *parent_ctx = NULL;
+ int32_t ret = -1;
+ svs_private_t *private = NULL;
+ inode_t *parent = NULL;
+ snap_dirent_t *dirent = NULL;
+ gf_boolean_t entry_point_key = _gf_false;
+ gf_boolean_t entry_point = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("svs", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc->inode, out);
+
+ private = this->private;
+
+ /* For lookups sent on inodes (i.e not parent inode + basename, but
+ direct inode itself which usually is a nameless lookup or revalidate
+ on the inode), loc->name will not be there. Get it from path if
+ it is there.
+ This is the difference between nameless lookup and revalidate lookup
+ on an inode:
+ nameless lookup: loc->path contains gfid and strrchr on it fails
+ revalidate lookup: loc->path contains the entry name of the inode
+ and strrchr gives the name of the entry from path
+ */
+ if (loc->path) {
+ if (!loc->name || (loc->name && !strcmp (loc->name, ""))) {
+ loc->name = strrchr (loc->path, '/');
+ if (loc->name)
+ loc->name++;
+ }
+ }
+
+ if (loc->parent)
+ parent = inode_ref (loc->parent);
+ else {
+ parent = inode_find (loc->inode->table, loc->pargfid);
+ if (!parent)
+ parent = inode_parent (loc->inode, NULL, NULL);
+ }
+ if (parent)
+ parent_ctx = svs_inode_ctx_get (this, parent);
+
+ inode_ctx = svs_inode_ctx_get (this, loc->inode);
+
+ /* Initialize latest snapshot, which is used for nameless lookups */
+ dirent = svs_get_latest_snap_entry (this);
+
+ if (dirent && !dirent->fs) {
+ svs_initialise_snapshot_volume (this, dirent->name, NULL);
+ }
+
+ if (xdata && !inode_ctx) {
+ ret = dict_get_str_boolean (xdata, "entry-point", _gf_false);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG, "failed to get the "
+ "entry point info");
+ entry_point_key = _gf_false;
+ } else {
+ entry_point_key = ret;
+ }
+
+ if (loc->name && strlen (loc->name)) {
+ /* lookup can come with the entry-point set in the dict
+ * for the parent directory of the entry-point as well.
+ * So consider entry_point only for named lookup
+ */
+ entry_point = entry_point_key;
+ }
+ }
+
+ if (inode_ctx && inode_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) {
+ /* entry-point may not be set in the dictonary.
+ * This can happen if snap-view client is restarted where
+ * inode-ctx not available and a nameless lookup has come
+ */
+ entry_point = _gf_true;
+ }
+
+ /* lookup is on the entry point to the snapshot world */
+ if (entry_point) {
+ op_ret = svs_lookup_entry_point (this, loc, parent, &buf,
+ &postparent, &op_errno);
+ goto out;
+ }
+
+ /* revalidate */
+ if (inode_ctx) {
+ op_ret = svs_revalidate (this, loc, parent, inode_ctx,
+ parent_ctx, &buf, &postparent,
+ &op_errno);
+ goto out;
+ }
+
+ /* This can happen when entry point directory is entered from non-root
+ directory. (ex: if /mnt/glusterfs is the mount point, then entry
+ point (say .snaps) is entered from /mnt/glusterfs/dir/.snaps). Also
+ it can happen when client sends a nameless lookup on just a gfid and
+ the server does not have the inode in the inode table.
+ */
+ if (!inode_ctx && !parent_ctx) {
+ if (gf_uuid_is_null (loc->gfid) &&
+ gf_uuid_is_null (loc->inode->gfid)) {
+ gf_log (this->name, GF_LOG_DEBUG, "gfid is NULL, "
+ "either the lookup came on missing entry or "
+ "the entry is stale");
+ op_ret = -1;
+ op_errno = ESTALE;
+ goto out;
+ }
+
+ if (!entry_point_key) {
+ /* This can happen when there is no inode_ctx available.
+ * snapview-server might have restarted or
+ * graph change might have happened
+ */
+ op_ret = -1;
+ op_errno = ESTALE;
+ goto out;
+ }
+
+ /* lookup is on the parent directory of entry-point.
+ * this would have already looked up by snap-view client
+ * so return success
+ */
+ if (!gf_uuid_is_null (loc->gfid))
+ gf_uuid_copy (buf.ia_gfid, loc->gfid);
+ else
+ gf_uuid_copy (buf.ia_gfid, loc->inode->gfid);
+
+ svs_iatt_fill (buf.ia_gfid, &buf);
+ svs_iatt_fill (buf.ia_gfid, &postparent);
+
+ op_ret = 0;
+ goto out;
+ }
+
+ if (parent_ctx) {
+ if (parent_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE)
+ op_ret = svs_lookup_snapshot (this, loc, &buf,
+ &postparent, parent,
+ parent_ctx, &op_errno);
+ else
+ op_ret = svs_lookup_entry (this, loc, &buf,
+ &postparent, parent,
+ parent_ctx, &op_errno);
+ goto out;
+ }
+
+out:
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno,
+ loc?loc->inode:NULL, &buf, xdata, &postparent);
+
+ if (parent)
+ inode_unref (parent);
+
+ return 0;
+}
+
+int32_t
+svs_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
+{
+ svs_inode_t *inode_ctx = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ svs_fd_t *svs_fd = NULL;
+ glfs_fd_t *glfd = NULL;
+ glfs_t *fs = NULL;
+ glfs_object_t *object = NULL;
+
+ GF_VALIDATE_OR_GOTO ("snap-view-daemon", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc->inode, out);
+
+ inode_ctx = svs_inode_ctx_get (this, loc->inode);
+ if (!inode_ctx) {
+ gf_log (this->name, GF_LOG_ERROR, "inode context not found "
+ "for the inode %s", uuid_utoa (loc->inode->gfid));
+ op_ret = -1;
+ op_errno = ESTALE;
+ goto out;
+ }
+
+ /* Fake success is sent if the opendir is on the entry point directory
+ or the inode is SNAP_VIEW_ENTRY_POINT_INODE
+ */
+ if (inode_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) {
+ op_ret = 0;
+ op_errno = 0;
+ goto out;
+ }
+ else {
+
+ SVS_GET_INODE_CTX_INFO(inode_ctx, fs, object, this, loc, op_ret,
+ op_errno, out);
+
+ glfd = glfs_h_opendir (fs, object);
+ if (!glfd) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR, "opendir on %s "
+ "failed (gfid: %s)", loc->name,
+ uuid_utoa (loc->inode->gfid));
+ goto out;
+ }
+ svs_fd = svs_fd_ctx_get_or_new (this, fd);
+ if (!svs_fd) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to allocate "
+ "fd context %s (gfid: %s)", loc->name,
+ uuid_utoa (fd->inode->gfid));
+ op_ret = -1;
+ op_errno = ENOMEM;
+ glfs_closedir (glfd);
+ goto out;
+ }
+ svs_fd->fd = glfd;
+
+ op_ret = 0;
+ op_errno = 0;
+ }
+
+out:
+ STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, NULL);
+
+ return 0;
+}
+
+/*
+ * This function adds the xattr keys present in the list (@list) to the dict.
+ * But the list contains only the names of the xattrs (and no value, as
+ * the gfapi functions for the listxattr operations would return only the
+ * names of the xattrs in the buffer provided by the caller, though they had
+ * got the values of those xattrs from posix) as described in the man page of
+ * listxattr. But before unwinding snapview-server has to put those names
+ * back into the dict. But to get the values for those xattrs it has to do the
+ * getxattr operation on each xattr which might turn out to be a costly
+ * operation. So for each of the xattrs present in the list, a 0 byte value
+ * ("") is set into the dict before unwinding. This can be treated as an
+ * indicator to other xlators which want to cache the xattrs (as of now,
+ * md-cache which caches acl and selinux related xattrs) to not to cache the
+ * values of the xattrs present in the dict.
+ */
+int32_t
+svs_add_xattrs_to_dict (xlator_t *this, dict_t *dict, char *list, ssize_t size)
+{
+ char keybuffer[4096] = {0,};
+ size_t remaining_size = 0;
+ int32_t list_offset = 0;
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("snapview-daemon", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, dict, out);
+ GF_VALIDATE_OR_GOTO (this->name, list, out);
+
+ remaining_size = size;
+ list_offset = 0;
+ while (remaining_size > 0) {
+ strncpy (keybuffer, list + list_offset, sizeof (keybuffer) - 1);
+#ifdef GF_DARWIN_HOST_OS
+ /* The protocol expect namespace for now */
+ char *newkey = NULL;
+ gf_add_prefix (XATTR_USER_PREFIX, keybuffer, &newkey);
+ strcpy (keybuffer, newkey);
+ GF_FREE (newkey);
+#endif
+ ret = dict_set_str (dict, keybuffer, "");
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "dict set operation "
+ "for the key %s failed.", keybuffer);
+ goto out;
+ }
+
+ remaining_size -= strlen (keybuffer) + 1;
+ list_offset += strlen (keybuffer) + 1;
+ } /* while (remaining_size > 0) */
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int32_t
+svs_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name,
+ dict_t *xdata)
+{
+ svs_inode_t *inode_ctx = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ glfs_t *fs = NULL;
+ glfs_object_t *object = NULL;
+ char *value = 0;
+ ssize_t size = 0;
+ dict_t *dict = NULL;
+
+ GF_VALIDATE_OR_GOTO ("snap-view-daemon", this, out);
+ GF_VALIDATE_OR_GOTO ("snap-view-daemon", frame, out);
+ GF_VALIDATE_OR_GOTO ("snap-view-daemon", loc, out);
+ GF_VALIDATE_OR_GOTO ("snap-view-daemon", loc->inode, out);
+
+ inode_ctx = svs_inode_ctx_get (this, loc->inode);
+ if (!inode_ctx) {
+ gf_log (this->name, GF_LOG_ERROR, "inode context not found "
+ "for the inode %s", uuid_utoa (loc->inode->gfid));
+ op_ret = -1;
+ op_errno = ESTALE;
+ goto out;
+ }
+
+ /* ENODATA is sent if the getxattr is on entry point directory
+ or the inode is SNAP_VIEW_ENTRY_POINT_INODE. Entry point is
+ a virtual directory on which setxattr operations are not
+ allowed. If getxattr has to be faked as success, then a value
+ for the name of the xattr has to be sent which we dont have.
+ */
+ if (inode_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) {
+ op_ret = -1;
+ op_errno = ENODATA;
+ goto out;
+ }
+ else {
+
+ SVS_GET_INODE_CTX_INFO(inode_ctx, fs, object, this, loc, op_ret,
+ op_errno, out);
+
+ dict = dict_new ();
+ if (!dict) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "allocate dict");
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ size = glfs_h_getxattrs (fs, object, name, NULL, 0);
+ if (size == -1) {
+ gf_log (this->name,
+ errno == ENODATA?GF_LOG_DEBUG:GF_LOG_ERROR,
+ "getxattr on %s failed (key: %s) with %s",
+ loc->path, name, strerror(errno));
+ op_ret = -1;
+ op_errno = errno;
+ goto out;
+ }
+ value = GF_CALLOC (size + 1, sizeof (char),
+ gf_common_mt_char);
+ if (!value) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "allocate memory for getxattr on %s "
+ "(key: %s)", loc->name, name);
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ size = glfs_h_getxattrs (fs, object, name, value, size);
+ if (size == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "get the xattr %s for entry %s", name,
+ loc->name);
+ op_ret = -1;
+ op_errno = errno;
+ goto out;
+ }
+ value[size] = '\0';
+
+ if (name) {
+ op_ret = dict_set_dynptr (dict, (char *)name, value,
+ size);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ gf_log (this->name, GF_LOG_ERROR, "dict set "
+ "operation for %s for the key %s "
+ "failed.", loc->path, name);
+ GF_FREE (value);
+ value = NULL;
+ goto out;
+ }
+ } else {
+ op_ret = svs_add_xattrs_to_dict (this, dict, value,
+ size);
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "add the xattrs from the list to dict");
+ op_errno = ENOMEM;
+ goto out;
+ }
+ GF_FREE (value);
+ }
+ }
+
+out:
+ if (op_ret)
+ GF_FREE (value);
+
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, NULL);
+
+ if (dict)
+ dict_unref (dict);
+
+ return 0;
+}
+
+int32_t
+svs_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata)
+{
+ svs_inode_t *inode_ctx = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ char *value = 0;
+ ssize_t size = 0;
+ dict_t *dict = NULL;
+ svs_fd_t *sfd = NULL;
+ glfs_fd_t *glfd = NULL;
+
+ GF_VALIDATE_OR_GOTO ("snap-view-daemon", this, out);
+ GF_VALIDATE_OR_GOTO ("snap-view-daemon", frame, out);
+ GF_VALIDATE_OR_GOTO ("snap-view-daemon", fd, out);
+ GF_VALIDATE_OR_GOTO ("snap-view-daemon", fd->inode, out);
+
+ inode_ctx = svs_inode_ctx_get (this, fd->inode);
+ if (!inode_ctx) {
+ gf_log (this->name, GF_LOG_ERROR, "inode context not found "
+ "for the inode %s", uuid_utoa (fd->inode->gfid));
+ op_ret = -1;
+ op_errno = ESTALE;
+ goto out;
+ }
+
+ sfd = svs_fd_ctx_get_or_new (this, fd);
+ if (!sfd) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to get the fd "
+ "context for %s", uuid_utoa (fd->inode->gfid));
+ op_ret = -1;
+ op_errno = EBADFD;
+ goto out;
+ }
+
+ glfd = sfd->fd;
+ /* EINVAL is sent if the getxattr is on entry point directory
+ or the inode is SNAP_VIEW_ENTRY_POINT_INODE. Entry point is
+ a virtual directory on which setxattr operations are not
+ allowed. If getxattr has to be faked as success, then a value
+ for the name of the xattr has to be sent which we dont have.
+ */
+ if (inode_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+ else {
+ dict = dict_new ();
+ if (!dict) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "allocate dict");
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (name) {
+ size = glfs_fgetxattr (glfd, name, NULL, 0);
+ if (size == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "getxattr on "
+ "%s failed (key: %s)",
+ uuid_utoa (fd->inode->gfid), name);
+ op_ret = -1;
+ op_errno = errno;
+ goto out;
+ }
+ value = GF_CALLOC (size + 1, sizeof (char),
+ gf_common_mt_char);
+ if (!value) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "allocate memory for getxattr on %s "
+ "(key: %s)",
+ uuid_utoa (fd->inode->gfid), name);
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ size = glfs_fgetxattr (glfd, name, value, size);
+ if (size == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "get the xattr %s for inode %s", name,
+ uuid_utoa (fd->inode->gfid));
+ op_ret = -1;
+ op_errno = errno;
+ goto out;
+ }
+ value[size] = '\0';
+
+ op_ret = dict_set_dynptr (dict, (char *)name, value,
+ size);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ gf_log (this->name, GF_LOG_ERROR, "dict set "
+ "operation for gfid %s for the key %s "
+ "failed.",
+ uuid_utoa (fd->inode->gfid), name);
+ GF_FREE (value);
+ goto out;
+ }
+ } else {
+ size = glfs_flistxattr (glfd, NULL, 0);
+ if (size == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "listxattr "
+ "on %s failed",
+ uuid_utoa (fd->inode->gfid));
+ goto out;
+ }
+
+ value = GF_CALLOC (size + 1, sizeof (char),
+ gf_common_mt_char);
+ if (!value) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "allocate buffer for xattr list (%s)",
+ uuid_utoa (fd->inode->gfid));
+ goto out;
+ }
+
+ size = glfs_flistxattr (glfd, value, size);
+ if (size == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR, "listxattr "
+ "on %s failed",
+ uuid_utoa (fd->inode->gfid));
+ goto out;
+ }
+
+ op_ret = svs_add_xattrs_to_dict (this, dict, value,
+ size);
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "add the xattrs from the list to dict");
+ op_errno = ENOMEM;
+ goto out;
+ }
+ GF_FREE (value);
+ }
+
+ op_ret = 0;
+ op_errno = 0;
+ }
+
+out:
+ if (op_ret)
+ GF_FREE (value);
+
+ STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, NULL);
+
+ if (dict)
+ dict_unref (dict);
+
+ return 0;
+}
+
+int32_t
+svs_releasedir (xlator_t *this, fd_t *fd)
+{
+ svs_fd_t *sfd = NULL;
+ uint64_t tmp_pfd = 0;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO ("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ ret = fd_ctx_del (fd, this, &tmp_pfd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "pfd from fd=%p is NULL", fd);
+ goto out;
+ }
+
+ sfd = (svs_fd_t *)(long)tmp_pfd;
+ if (sfd->fd) {
+ ret = glfs_closedir (sfd->fd);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING, "failed to close "
+ "the glfd for directory %s",
+ uuid_utoa (fd->inode->gfid));
+ }
+
+ GF_FREE (sfd);
+
+out:
+ return 0;
+}
+
+int32_t
+svs_flush (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int ret = -1;
+ uint64_t value = 0;
+ svs_inode_t *inode_ctx = NULL;
+
+ GF_VALIDATE_OR_GOTO ("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ inode_ctx = svs_inode_ctx_get (this, fd->inode);
+ if (!inode_ctx) {
+ gf_log (this->name, GF_LOG_ERROR, "inode context not found for"
+ " the inode %s", uuid_utoa (fd->inode->gfid));
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = fd_ctx_get (fd, this, &value);
+ if (ret < 0 && inode_ctx->type != SNAP_VIEW_ENTRY_POINT_INODE) {
+ op_errno = EINVAL;
+ gf_log (this->name, GF_LOG_WARNING,
+ "pfd is NULL on fd=%p", fd);
+ goto out;
+ }
+
+ op_ret = 0;
+
+out:
+ STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, NULL);
+
+ return 0;
+}
+
+int32_t
+svs_release (xlator_t *this, fd_t *fd)
+{
+ svs_fd_t *sfd = NULL;
+ uint64_t tmp_pfd = 0;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO ("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ ret = fd_ctx_del (fd, this, &tmp_pfd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "pfd from fd=%p is NULL", fd);
+ goto out;
+ }
+
+ sfd = (svs_fd_t *)(long)tmp_pfd;
+ if (sfd->fd) {
+ ret = glfs_close (sfd->fd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to close "
+ "the glfd for %s",
+ uuid_utoa (fd->inode->gfid));
+ }
+ }
+
+ GF_FREE (sfd);
+out:
+ return 0;
+}
+
+int32_t
+svs_forget (xlator_t *this, inode_t *inode)
+{
+ int ret = -1;
+ uint64_t value = 0;
+ svs_inode_t *inode_ctx = NULL;
+
+ GF_VALIDATE_OR_GOTO ("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ ret = inode_ctx_del (inode, this, &value);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to delte the inode "
+ "context of %s", uuid_utoa (inode->gfid));
+ goto out;
+ }
+
+ inode_ctx = (svs_inode_t *)value;
+ if (!inode_ctx)
+ goto out;
+
+ if (inode_ctx->snapname)
+ GF_FREE (inode_ctx->snapname);
+
+ GF_FREE (inode_ctx);
+
+out:
+ return 0;
+}
+
+int
+svs_fill_readdir (xlator_t *this, gf_dirent_t *entries, size_t size, off_t off)
+{
+ gf_dirent_t *entry = NULL;
+ svs_private_t *priv = NULL;
+ int i = 0;
+ snap_dirent_t *dirents = NULL;
+ int this_size = 0;
+ int filled_size = 0;
+ int count = 0;
+
+ GF_VALIDATE_OR_GOTO ("snap-view-daemon", this, out);
+ GF_VALIDATE_OR_GOTO ("snap-view-daemon", entries, out);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ /* create the dir entries */
+ LOCK (&priv->snaplist_lock);
+ {
+ dirents = priv->dirents;
+
+ for (i = off; i < priv->num_snaps; ) {
+ this_size = sizeof (gf_dirent_t) +
+ strlen (dirents[i].name) + 1;
+ if (this_size + filled_size > size )
+ goto unlock;
+
+ entry = gf_dirent_for_name (dirents[i].name);
+ if (!entry) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to allocate dentry for %s",
+ dirents[i].name);
+ goto unlock;
+ }
+
+ entry->d_off = i + 1;
+ /*
+ * readdir on the entry-point directory to the snapshot
+ * world, will return elements in the list of the
+ * snapshots as the directory entries. Since the entries
+ * returned are virtual entries which does not exist
+ * physically on the disk, pseudo inode numbers are
+ * generated.
+ */
+ entry->d_ino = i + 2*42;
+ entry->d_type = DT_DIR;
+ list_add_tail (&entry->list, &entries->list);
+ ++i;
+ count++;
+ filled_size += this_size;
+ }
+ }
+unlock:
+ UNLOCK (&priv->snaplist_lock);
+
+out:
+ return count;
+}
+
+int32_t
+svs_glfs_readdir (xlator_t *this, glfs_fd_t *glfd, gf_dirent_t *entries,
+ int32_t *op_errno, struct iatt *buf, gf_boolean_t readdirplus,
+ size_t size)
+{
+ int filled_size = 0;
+ int this_size = 0;
+ int32_t ret = -1;
+ int32_t count = 0;
+ gf_dirent_t *entry = NULL;
+ struct dirent *dirents = NULL;
+ struct dirent de = {0, };
+ struct stat statbuf = {0, };
+ off_t in_case = -1;
+
+ GF_VALIDATE_OR_GOTO ("svs", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, glfd, out);
+ GF_VALIDATE_OR_GOTO (this->name, entries, out);
+
+ while (filled_size < size) {
+ in_case = glfs_telldir (glfd);
+ if (in_case == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "telldir failed");
+ break;
+ }
+
+ if (readdirplus)
+ ret = glfs_readdirplus_r (glfd, &statbuf, &de,
+ &dirents);
+ else
+ ret = glfs_readdir_r (glfd, &de, &dirents);
+
+ if (ret == 0 && dirents != NULL) {
+ if (readdirplus)
+ this_size = max (sizeof (gf_dirent_t),
+ sizeof (gfs3_dirplist))
+ + strlen (de.d_name) + 1;
+ else
+ this_size = sizeof (gf_dirent_t)
+ + strlen (de.d_name) + 1;
+
+ if (this_size + filled_size > size) {
+ glfs_seekdir (glfd, in_case);
+ break;
+ }
+
+ entry = gf_dirent_for_name (de.d_name);
+ if (!entry) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not create gf_dirent "
+ "for entry %s: (%s)",
+ entry->d_name,
+ strerror (errno));
+ break;
+ }
+ entry->d_off = glfs_telldir (glfd);
+ entry->d_ino = de.d_ino;
+ entry->d_type = de.d_type;
+ if (readdirplus) {
+ iatt_from_stat (buf, &statbuf);
+ entry->d_stat = *buf;
+ }
+ list_add_tail (&entry->list, &entries->list);
+
+ filled_size += this_size;
+ count++;
+ } else if (ret == 0 && dirents == NULL) {
+ *op_errno = ENOENT;
+ break;
+ } else if (ret != 0) {
+ *op_errno = errno;
+ break;
+ }
+ dirents = NULL;
+ ret = -1;
+ }
+
+out:
+ return count;
+}
+
+/* readdirp can be of 2 types.
+ 1) It can come on entry point directory where the list of snapshots
+ is sent as dirents. In this case, the iatt structure is filled
+ on the fly if the inode is not found for the entry or the inode
+ context is NULL. Other wise if inode is found and inode context
+ is there the iatt structure saved in the context is used.
+ 2) It can be on a directory in one of the snapshots. In this case,
+ the readdirp call would have sent us a iatt structure. So the same
+ structure is used with the exception that the gfid and the inode
+ numbers will be newly generated and filled in.
+*/
+void
+svs_readdirp_fill (xlator_t *this, inode_t *parent, svs_inode_t *parent_ctx,
+ gf_dirent_t *entry)
+{
+ inode_t *inode = NULL;
+ uuid_t random_gfid = {0,};
+ struct iatt buf = {0, };
+ svs_inode_t *inode_ctx = NULL;
+
+ GF_VALIDATE_OR_GOTO ("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, parent, out);
+ GF_VALIDATE_OR_GOTO (this->name, parent_ctx, out);
+ GF_VALIDATE_OR_GOTO (this->name, entry, out);
+
+ if (!strcmp (entry->d_name, ".") || !strcmp (entry->d_name, ".."))
+ goto out;
+
+ inode = inode_grep (parent->table, parent, entry->d_name);
+ if (inode) {
+ entry->inode = inode;
+ inode_ctx = svs_inode_ctx_get (this, inode);
+ if (!inode_ctx) {
+ gf_uuid_copy (buf.ia_gfid, inode->gfid);
+ svs_iatt_fill (inode->gfid, &buf);
+ buf.ia_type = inode->ia_type;
+ } else {
+ buf = inode_ctx->buf;
+ }
+
+ entry->d_ino = buf.ia_ino;
+
+ if (parent_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE)
+ entry->d_stat = buf;
+ else {
+ entry->d_stat.ia_ino = buf.ia_ino;
+ gf_uuid_copy (entry->d_stat.ia_gfid, buf.ia_gfid);
+ }
+ } else {
+
+ if (parent_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) {
+ inode = inode_new (parent->table);
+ entry->inode = inode;
+
+ /* If inode context allocation fails, then do not send
+ * the inode for that particular entry as part of
+ * readdirp response. Fuse and protocol/server will link
+ * the inodes in readdirp only if the entry contains
+ * inode in it.
+ */
+ inode_ctx = svs_inode_ctx_get_or_new (this, inode);
+ if (!inode_ctx) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "allocate inode context for %s",
+ entry->d_name);
+ inode_unref (entry->inode);
+ entry->inode = NULL;
+ goto out;
+ }
+
+ /* Generate virtual gfid for SNAPSHOT dir and
+ * update the statbuf
+ */
+ gf_uuid_generate (random_gfid);
+ gf_uuid_copy (buf.ia_gfid, random_gfid);
+ svs_fill_ino_from_gfid (&buf);
+ buf.ia_type = IA_IFDIR;
+ entry->d_ino = buf.ia_ino;
+ entry->d_stat = buf;
+ inode_ctx->buf = buf;
+ inode_ctx->type = SNAP_VIEW_SNAPSHOT_INODE;
+ } else {
+ /* For files under snapshot world do not set
+ * entry->inode and reset statbuf (except ia_ino),
+ * so that FUSE/Kernel will send an explicit lookup.
+ * entry->d_stat contains the statbuf information
+ * of original file, so for NFS not to cache this
+ * information and to send explicit lookup, it is
+ * required to reset the statbuf.
+ * Virtual gfid for these files will be generated in the
+ * first lookup.
+ */
+ buf.ia_ino = entry->d_ino;
+ entry->d_stat = buf;
+ }
+ }
+
+out:
+ return;
+}
+
+/* In readdirp, though new inode is created along with the generation of
+ new gfid, the inode context created will not contain the glfs_t instance
+ for the filesystem it belongs to and the handle for it in the gfapi
+ world. (handle is obtained only by doing the lookup call on the entry
+ and doing lookup on each entry received as part of readdir call is a
+ costly operation. So the fs and handle is NULL in the inode context
+ and is filled in when lookup comes on that object.
+*/
+int32_t
+svs_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *dict)
+{
+ gf_dirent_t entries;
+ gf_dirent_t *entry = NULL;
+ struct iatt buf = {0, };
+ int count = 0;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ svs_inode_t *parent_ctx = NULL;
+ svs_fd_t *svs_fd = NULL;
+
+ GF_VALIDATE_OR_GOTO ("snap-view-daemon", this, unwind);
+ GF_VALIDATE_OR_GOTO (this->name, frame, unwind);
+ GF_VALIDATE_OR_GOTO (this->name, fd, unwind);
+ GF_VALIDATE_OR_GOTO (this->name, fd->inode, unwind);
+
+ INIT_LIST_HEAD (&entries.list);
+
+ parent_ctx = svs_inode_ctx_get (this, fd->inode);
+ if (!parent_ctx) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to get the inode "
+ "context for %s", uuid_utoa (fd->inode->gfid));
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto unwind;
+ }
+
+ if (parent_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) {
+ LOCK (&fd->lock);
+ {
+ count = svs_fill_readdir (this, &entries, size, off);
+ }
+ UNLOCK (&fd->lock);
+
+ op_ret = count;
+
+ list_for_each_entry (entry, &entries.list, list) {
+ svs_readdirp_fill (this, fd->inode, parent_ctx, entry);
+ }
+
+ goto unwind;
+ } else {
+ svs_fd = svs_fd_ctx_get_or_new (this, fd);
+ if (!svs_fd) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to get the "
+ "fd context %s", uuid_utoa (fd->inode->gfid));
+ op_ret = -1;
+ op_errno = EBADFD;
+ goto unwind;
+ }
+
+ glfs_seekdir (svs_fd->fd, off);
+
+ LOCK (&fd->lock);
+ {
+ count = svs_glfs_readdir (this, svs_fd->fd, &entries,
+ &op_errno, &buf, _gf_true,
+ size);
+ }
+ UNLOCK (&fd->lock);
+
+ op_ret = count;
+
+ list_for_each_entry (entry, &entries.list, list) {
+ svs_readdirp_fill (this, fd->inode, parent_ctx, entry);
+ }
+
+ goto unwind;
+ }
+
+unwind:
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, &entries, dict);
+
+ gf_dirent_free (&entries);
+
+ return 0;
+}
+
+int32_t
+svs_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ svs_private_t *priv = NULL;
+ gf_dirent_t entries = {{{0, }, }, };
+ int count = 0;
+ svs_inode_t *inode_ctx = NULL;
+ int op_errno = EINVAL;
+ int op_ret = -1;
+ svs_fd_t *svs_fd = NULL;
+ glfs_fd_t *glfd = NULL;
+
+ INIT_LIST_HEAD (&entries.list);
+
+ GF_VALIDATE_OR_GOTO ("snap-view-server", this, unwind);
+ GF_VALIDATE_OR_GOTO (this->name, frame, unwind);
+ GF_VALIDATE_OR_GOTO (this->name, fd, unwind);
+ GF_VALIDATE_OR_GOTO (this->name, fd->inode, unwind);
+
+ priv = this->private;
+
+ inode_ctx = svs_inode_ctx_get (this, fd->inode);
+ if (!inode_ctx) {
+ gf_log (this->name, GF_LOG_ERROR, "inode context not found in "
+ "the inode %s", uuid_utoa (fd->inode->gfid));
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto unwind;
+ }
+
+ if (inode_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) {
+ LOCK (&fd->lock);
+ {
+ count = svs_fill_readdir (this, &entries, size, off);
+ }
+ UNLOCK (&fd->lock);
+ } else {
+ svs_fd = svs_fd_ctx_get_or_new (this, fd);
+ if (!svs_fd) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to get the "
+ "fd context %s", uuid_utoa (fd->inode->gfid));
+ op_ret = -1;
+ op_errno = EBADFD;
+ goto unwind;
+ }
+
+ glfd = svs_fd->fd;
+
+ LOCK (&fd->lock);
+ {
+ count = svs_glfs_readdir (this, glfd, &entries,
+ &op_errno, NULL, _gf_false,
+ size);
+ }
+ UNLOCK (&fd->lock);
+ }
+
+ op_ret = count;
+
+unwind:
+ STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, &entries, xdata);
+
+ gf_dirent_free (&entries);
+
+ return 0;
+}
+
+/*
+ * This function is mainly helpful for NFS. Till now NFS server was not linking
+ * the inodes in readdirp, which caused problems when below operations were
+ * performed.
+ *
+ * 1) ls -l in one of the snaopshots (snapview-server would generate gfids for
+ * each entry on the fly and link the inodes associated with those entries)
+ * 2) NFS server upon getting readdirp reply would not link the inodes of the
+ * entries. But it used to generate filehandles for each entry and associate
+ * the gfid of that entry with the filehandle and send it as part of the
+ * reply to nfs client.
+ * 3) NFS client would send the filehandle of one of those entries when some
+ * activity is done on it.
+ * 4) NFS server would not be able to find the inode for the gfid present in the
+ * filehandle (as the inode was not linked) and would go for hard resolution
+ * by sending a lookup on the gfid by creating a new inode.
+ * 5) snapview-client will not able to identify whether the inode is a real
+ * inode existing in the main volume or a virtual inode existing in the
+ * snapshots as there would not be any inode context.
+ * 6) Since the gfid upon which lookup is sent is a virtual gfid which is not
+ * present in the disk, lookup would fail and the application would get an
+ * error.
+ *
+ * The above problem is fixed by the below commit which makes snapview server
+ * more compatible with nfs server (1dea949cb60c3814c9206df6ba8dddec8d471a94).
+ * But now because NFS server does inode linking in readdirp has introduced
+ * the below issue.
+ * In readdirp though snapview-server allocates inode contexts it does not
+ * actually perform lookup on each entry it obtained in readdirp (as doing
+ * a lookup via gfapi over the network for each entry would be costly).
+ *
+ * Till now it was not a problem with NFS server, as NFS was sending a lookup on
+ * the gfid it got from NFS client, for which it was not able to find the right
+ * inode. So snapview-server was able to get the fs instance (glfs_t) of the
+ * snapshot volume to which the entry belongs to, and the handle for the entry
+ * from the corresponding snapshot volume and fill those informations in the
+ * inode context.
+ *
+ * But now, since NFS server is able to find the inode from the inode table for
+ * the gfid it got from the NFS client, it wont send lookup. Rather it directly
+ * sends the fop it received from the client. Now this causes problems for
+ * snapview-server. Because for each fop snapview-server assumes that lookup has
+ * been performed on that entry and the entry's inode context contains the
+ * pointers for the fs instance and the handle to the entry in that fs. When NFS
+ * server sends the fop and snapview-server finds that the fs instance and the
+ * handle within the inode context are NULL it unwinds with EINVAL.
+ *
+ * So to handle this, if fs instance or handle within the inode context are
+ * NULL, then do a lookup based on parent inode context's fs instance. And
+ * unwind the results obtained as part of lookup
+ */
+
+int32_t
+svs_get_handle (xlator_t *this, loc_t *loc, svs_inode_t *inode_ctx,
+ int32_t *op_errno)
+{
+ svs_inode_t *parent_ctx = NULL;
+ int ret = -1;
+ inode_t *parent = NULL;
+ struct iatt postparent = {0, };
+ struct iatt buf = {0, };
+ char uuid1[64];
+
+ GF_VALIDATE_OR_GOTO ("snap-view-daemon", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc->inode, out);
+
+ if (loc->path) {
+ if (!loc->name || (loc->name && !strcmp (loc->name, ""))) {
+ loc->name = strrchr (loc->path, '/');
+ if (loc->name)
+ loc->name++;
+ }
+ }
+
+ if (loc->parent)
+ parent = inode_ref (loc->parent);
+ else {
+ parent = inode_find (loc->inode->table, loc->pargfid);
+ if (!parent)
+ parent = inode_parent (loc->inode, NULL, NULL);
+ }
+
+ if (parent)
+ parent_ctx = svs_inode_ctx_get (this, parent);
+
+ if (!parent_ctx) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to get the parent "
+ "context for %s (%s)", loc->path,
+ uuid_utoa_r (loc->inode->gfid, uuid1));
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ if (parent_ctx) {
+ if (parent_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE)
+ ret = svs_lookup_snapshot (this, loc, &buf,
+ &postparent, parent,
+ parent_ctx, op_errno);
+ else
+ ret = svs_lookup_entry (this, loc, &buf,
+ &postparent, parent,
+ parent_ctx, op_errno);
+ }
+
+out:
+ if (parent)
+ inode_unref (parent);
+
+ return ret;
+}
+
+int32_t
+svs_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ svs_private_t *priv = NULL;
+ struct iatt buf = {0, };
+ int32_t op_errno = EINVAL;
+ int32_t op_ret = -1;
+ svs_inode_t *inode_ctx = NULL;
+ glfs_t *fs = NULL;
+ glfs_object_t *object = NULL;
+ struct stat stat = {0, };
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("snap-view-daemon", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc->inode, out);
+
+ priv = this->private;
+
+ /* Instead of doing the check of whether it is a entry point directory
+ or not by checking the name of the entry and then deciding what
+ to do, just check the inode context and decide what to be done.
+ */
+
+ inode_ctx = svs_inode_ctx_get (this, loc->inode);
+ if (!inode_ctx) {
+ gf_log (this->name, GF_LOG_ERROR, "inode context not found for"
+ " %s", uuid_utoa (loc->inode->gfid));
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ if (inode_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) {
+ svs_iatt_fill (loc->inode->gfid, &buf);
+ op_ret = 0;
+ }
+ else {
+
+ SVS_GET_INODE_CTX_INFO(inode_ctx, fs, object, this, loc, op_ret,
+ op_errno, out);
+
+ ret = glfs_h_stat (fs, object, &stat);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "glfs_h_stat on %s "
+ "(gfid: %s) failed", loc->name,
+ uuid_utoa (loc->inode->gfid));
+ op_ret = -1;
+ op_errno = errno;
+ goto out;
+ }
+
+ iatt_from_stat (&buf, &stat);
+ gf_uuid_copy (buf.ia_gfid, loc->inode->gfid);
+ svs_fill_ino_from_gfid (&buf);
+ op_ret = ret;
+ }
+
+out:
+ STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, &buf, xdata);
+ return 0;
+}
+
+int32_t
+svs_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ svs_private_t *priv = NULL;
+ struct iatt buf = {0, };
+ int32_t op_errno = EINVAL;
+ int32_t op_ret = -1;
+ svs_inode_t *inode_ctx = NULL;
+ struct stat stat = {0, };
+ int ret = -1;
+ glfs_fd_t *glfd = NULL;
+ svs_fd_t *sfd = NULL;
+
+ GF_VALIDATE_OR_GOTO ("snap-view-daemon", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd->inode, out);
+
+ priv = this->private;
+
+ /* Instead of doing the check of whether it is a entry point directory
+ or not by checking the name of the entry and then deciding what
+ to do, just check the inode context and decide what to be done.
+ */
+
+ inode_ctx = svs_inode_ctx_get (this, fd->inode);
+ if (!inode_ctx) {
+ gf_log (this->name, GF_LOG_ERROR, "inode context not found for"
+ " the inode %s", uuid_utoa (fd->inode->gfid));
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ if (inode_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) {
+ svs_iatt_fill (fd->inode->gfid, &buf);
+ op_ret = 0;
+ }
+ else {
+ sfd = svs_fd_ctx_get_or_new (this, fd);
+ if (!sfd) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to get the "
+ "fd context for %s",
+ uuid_utoa (fd->inode->gfid));
+ op_ret = -1;
+ op_errno = EBADFD;
+ goto out;
+ }
+
+ glfd = sfd->fd;
+ ret = glfs_fstat (glfd, &stat);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "glfs_fstat on "
+ "gfid: %s failed", uuid_utoa (fd->inode->gfid));
+ op_ret = -1;
+ op_errno = errno;
+ goto out;
+ }
+
+ iatt_from_stat (&buf, &stat);
+ gf_uuid_copy (buf.ia_gfid, fd->inode->gfid);
+ svs_fill_ino_from_gfid (&buf);
+ op_ret = ret;
+ }
+
+out:
+ STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, &buf, xdata);
+ return 0;
+}
+
+int32_t
+svs_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ svs_private_t *priv = NULL;
+ struct statvfs buf = {0, };
+ int32_t op_errno = EINVAL;
+ int32_t op_ret = -1;
+ svs_inode_t *inode_ctx = NULL;
+ glfs_t *fs = NULL;
+ glfs_object_t *object = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("snap-view-daemon", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc->inode, out);
+
+ priv = this->private;
+
+ /* Instead of doing the check of whether it is a entry point directory
+ or not by checking the name of the entry and then deciding what
+ to do, just check the inode context and decide what to be done.
+ */
+ inode_ctx = svs_inode_ctx_get (this, loc->inode);
+ if (!inode_ctx) {
+ gf_log (this->name, GF_LOG_ERROR, "inode context not found for"
+ " %s", uuid_utoa (loc->inode->gfid));
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ SVS_GET_INODE_CTX_INFO(inode_ctx, fs, object, this, loc, op_ret,
+ op_errno, out);
+
+ ret = glfs_h_statfs (fs, object, &buf);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "glfs_h_statvfs on %s "
+ "(gfid: %s) failed", loc->name,
+ uuid_utoa (loc->inode->gfid));
+ op_ret = -1;
+ op_errno = errno;
+ goto out;
+ }
+ op_ret = ret;
+
+out:
+ STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, &buf, xdata);
+ return 0;
+}
+
+
+int32_t
+svs_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ svs_inode_t *inode_ctx = NULL;
+ svs_fd_t *sfd = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ glfs_fd_t *glfd = NULL;
+ glfs_t *fs = NULL;
+ glfs_object_t *object = NULL;
+
+
+ GF_VALIDATE_OR_GOTO ("snap-view-daemon", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc->inode, out);
+
+ inode_ctx = svs_inode_ctx_get (this, loc->inode);
+ if (!inode_ctx) {
+ gf_log (this->name, GF_LOG_ERROR, "inode context for %s "
+ "(gfid: %s) not found", loc->name,
+ uuid_utoa (loc->inode->gfid));
+ goto out;
+ }
+
+ if (inode_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE)
+ GF_ASSERT (0); // on entry point it should always be opendir
+
+ SVS_GET_INODE_CTX_INFO(inode_ctx, fs, object, this, loc, op_ret,
+ op_errno, out);
+
+ glfd = glfs_h_open (fs, object, flags);
+ if (!glfd) {
+ gf_log (this->name, GF_LOG_ERROR, "glfs_h_open on %s failed "
+ "(gfid: %s)", loc->name, uuid_utoa (loc->inode->gfid));
+ op_ret = -1;
+ op_errno = errno;
+ goto out;
+ }
+
+ sfd = svs_fd_ctx_get_or_new (this, fd);
+ if (!sfd) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to allocate fd "
+ "context for %s (gfid: %s)", loc->name,
+ uuid_utoa (loc->inode->gfid));
+ op_ret = -1;
+ op_errno = ENOMEM;
+ glfs_close (glfd);
+ goto out;
+ }
+ sfd->fd = glfd;
+
+ op_ret = 0;
+
+out:
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, NULL);
+ return 0;
+}
+
+int32_t
+svs_readv (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ svs_private_t *priv = NULL;
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ struct iovec vec = {0,};
+ svs_fd_t *sfd = NULL;
+ int ret = -1;
+ struct stat fstatbuf = {0, };
+ glfs_fd_t *glfd = NULL;
+ struct iatt stbuf = {0, };
+
+ GF_VALIDATE_OR_GOTO ("snap-view-daemon", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd->inode, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO (priv, out);
+
+ sfd = svs_fd_ctx_get_or_new (this, fd);
+ if (!sfd) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to get the fd "
+ "context for %s", uuid_utoa (fd->inode->gfid));
+ op_ret = -1;
+ op_errno = EBADFD;
+ goto out;
+ }
+
+ glfd = sfd->fd;
+
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool, size);
+ if (!iobuf) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ ret = glfs_pread (glfd, iobuf->ptr, size, offset, 0);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR, "glfs_read failed (%s)",
+ strerror (op_errno));
+ goto out;
+ }
+
+ vec.iov_base = iobuf->ptr;
+ vec.iov_len = ret;
+
+ iobref = iobref_new ();
+
+ iobref_add (iobref, iobuf);
+
+ ret = glfs_fstat (glfd, &fstatbuf);
+ if (ret) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR, "glfs_fstat failed after "
+ "readv on %s", uuid_utoa (fd->inode->gfid));
+ goto out;
+ }
+
+ iatt_from_stat (&stbuf, &fstatbuf);
+ gf_uuid_copy (stbuf.ia_gfid, fd->inode->gfid);
+ svs_fill_ino_from_gfid (&stbuf);
+
+ /* Hack to notify higher layers of EOF. */
+ if (!stbuf.ia_size || (offset + vec.iov_len) >= stbuf.ia_size)
+ op_errno = ENOENT;
+
+ op_ret = vec.iov_len;
+
+out:
+
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno,
+ &vec, 1, &stbuf, iobref, NULL);
+
+ if (iobref)
+ iobref_unref (iobref);
+ if (iobuf)
+ iobuf_unref (iobuf);
+
+ return 0;
+}
+
+int32_t
+svs_readlink (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, size_t size, dict_t *xdata)
+{
+ svs_inode_t *inode_ctx = NULL;
+ glfs_t *fs = NULL;
+ glfs_object_t *object = NULL;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ char *buf = NULL;
+ struct iatt stbuf = {0, };
+ int ret = -1;
+ struct stat stat = {0, };
+
+ GF_VALIDATE_OR_GOTO ("snap-view-daemon", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc->inode, out);
+
+ inode_ctx = svs_inode_ctx_get (this, loc->inode);
+ if (!inode_ctx) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to get inode context "
+ "for %s (gfid: %s)", loc->name,
+ uuid_utoa (loc->inode->gfid));
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ SVS_GET_INODE_CTX_INFO(inode_ctx, fs, object, this, loc, op_ret,
+ op_errno, out);
+
+ ret = glfs_h_stat (fs, object, &stat);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "glfs_h_stat on %s "
+ "(gfid: %s) failed", loc->name,
+ uuid_utoa (loc->inode->gfid));
+ op_ret = -1;
+ op_errno = errno;
+ goto out;
+ }
+
+ iatt_from_stat (&stbuf, &stat);
+ gf_uuid_copy (stbuf.ia_gfid, loc->inode->gfid);
+ svs_fill_ino_from_gfid (&stbuf);
+
+ buf = alloca (size + 1);
+ op_ret = glfs_h_readlink (fs, object, buf, size);
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "readlink on %s failed "
+ "(gfid: %s)", loc->name, uuid_utoa (loc->inode->gfid));
+ op_errno = errno;
+ goto out;
+ }
+
+ buf[op_ret] = 0;
+
+out:
+ STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, buf, &stbuf,
+ NULL);
+
+ return 0;
+}
+
+int32_t
+svs_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int mask,
+ dict_t *xdata)
+{
+ int ret = -1;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ svs_private_t *priv = NULL;
+ glfs_t *fs = NULL;
+ glfs_object_t *object = NULL;
+ svs_inode_t *inode_ctx = NULL;
+ gf_boolean_t is_fuse_call = 0;
+ int mode = 0;
+
+ GF_VALIDATE_OR_GOTO ("svs", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc->inode, out);
+
+ priv = this->private;
+
+ inode_ctx = svs_inode_ctx_get (this, loc->inode);
+ if (!inode_ctx) {
+ gf_log (this->name, GF_LOG_ERROR, "inode context not found for"
+ " %s", uuid_utoa (loc->inode->gfid));
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ is_fuse_call = __is_fuse_call (frame);
+
+ /*
+ * For entry-point directory, set read and execute bits. But not write
+ * permissions.
+ */
+ if (inode_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) {
+ if (is_fuse_call) {
+ op_ret = 0;
+ op_errno = 0;
+ } else {
+ op_ret = 0;
+ mode |= POSIX_ACL_READ;
+ mode |= POSIX_ACL_EXECUTE;
+ op_errno = mode;
+ }
+ goto out;
+ }
+
+
+ SVS_GET_INODE_CTX_INFO(inode_ctx, fs, object, this, loc, op_ret,
+ op_errno, out);
+
+ /* The actual posix_acl xlator does acl checks differently for
+ fuse and nfs. So set frame->root->pid as fspid of the syncop
+ if the call came from nfs
+ */
+ if (!is_fuse_call) {
+ syncopctx_setfspid (&frame->root->pid);
+ syncopctx_setfsuid (&frame->root->uid);
+ syncopctx_setfsgid (&frame->root->gid);
+ syncopctx_setfsgroups (frame->root->ngrps,
+ frame->root->groups);
+ }
+
+ ret = glfs_h_access (fs, object, mask);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to access %s "
+ "(gfid: %s)", loc->path, uuid_utoa (loc->inode->gfid));
+ op_ret = -1;
+ op_errno = errno;
+ goto out;
+ }
+
+ op_ret = 0;
+ op_errno = ret;
+
+out:
+
+ STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, NULL);
+ return 0;
+}
+
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_svs_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_WARNING, "Memory accounting"
+ " init failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+int32_t
+init (xlator_t *this)
+{
+ svs_private_t *priv = NULL;
+ int ret = -1;
+ pthread_t snap_thread;
+
+ /* This can be the top of graph in certain cases */
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "dangling volume. check volfile ");
+ }
+
+ priv = GF_CALLOC (1, sizeof (*priv), gf_svs_mt_priv_t);
+ if (!priv)
+ goto out;
+
+ this->private = priv;
+
+ GF_OPTION_INIT ("volname", priv->volname, str, out);
+ LOCK_INIT (&priv->snaplist_lock);
+
+ LOCK (&priv->snaplist_lock);
+ {
+ priv->num_snaps = 0;
+ }
+ UNLOCK (&priv->snaplist_lock);
+
+ /* What to do here upon failure? should init be failed or succeed? */
+ /* If succeeded, then dynamic management of snapshots will not */
+ /* happen.*/
+ ret = svs_mgmt_init (this);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to initiate the "
+ "mgmt rpc callback for svs. Dymamic management of the"
+ "snapshots will not happen");
+ goto out;
+ }
+
+ /* get the list of snaps first to return to client xlator */
+ ret = svs_get_snapshot_list (this);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error initializing snaplist infrastructure");
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (ret && priv) {
+ LOCK_DESTROY (&priv->snaplist_lock);
+ GF_FREE (priv->dirents);
+ GF_FREE (priv);
+ }
+
+ return ret;
+}
+
+void
+fini (xlator_t *this)
+{
+ svs_private_t *priv = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ int ret = 0;
+
+ GF_ASSERT (this);
+ priv = this->private;
+ this->private = NULL;
+ ctx = this->ctx;
+ if (!ctx)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Invalid ctx found");
+
+ if (priv) {
+ ret = LOCK_DESTROY (&priv->snaplist_lock);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Could not destroy mutex snaplist_lock");
+ }
+
+ if (priv->dirents) {
+ GF_FREE (priv->dirents);
+ }
+
+ if (priv->rpc) {
+ /* cleanup the saved-frames before last unref */
+ rpc_clnt_connection_cleanup (&priv->rpc->conn);
+ rpc_clnt_unref (priv->rpc);
+ }
+
+ GF_FREE (priv);
+ }
+
+ return;
+}
+
+struct xlator_fops fops = {
+ .lookup = svs_lookup,
+ .stat = svs_stat,
+ .statfs = svs_statfs,
+ .opendir = svs_opendir,
+ .readdirp = svs_readdirp,
+ .readdir = svs_readdir,
+ .open = svs_open,
+ .readv = svs_readv,
+ .flush = svs_flush,
+ .fstat = svs_fstat,
+ .getxattr = svs_getxattr,
+ .access = svs_access,
+ .readlink = svs_readlink,
+ /* entry fops */
+};
+
+struct xlator_cbks cbks = {
+ .release = svs_release,
+ .releasedir = svs_releasedir,
+ .forget = svs_forget,
+};
+
+struct volume_options options[] = {
+ { .key = {"volname"},
+ .type = GF_OPTION_TYPE_STR,
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/features/snapview-server/src/snapview-server.h b/xlators/features/snapview-server/src/snapview-server.h
new file mode 100644
index 00000000000..a12319fa9b2
--- /dev/null
+++ b/xlators/features/snapview-server/src/snapview-server.h
@@ -0,0 +1,240 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef __SNAP_VIEW_H__
+#define __SNAP_VIEW_H__
+
+#include "dict.h"
+#include "defaults.h"
+#include "mem-types.h"
+#include "call-stub.h"
+#include "inode.h"
+#include "byte-order.h"
+#include "iatt.h"
+#include <ctype.h>
+#include <sys/uio.h>
+#include "glusterfs.h"
+#include "xlator.h"
+#include "logging.h"
+#include "glfs.h"
+#include "common-utils.h"
+#include "glfs-handles.h"
+#include "glfs-internal.h"
+#include "glusterfs3-xdr.h"
+#include "glusterfs-acl.h"
+#include "syncop.h"
+#include "list.h"
+#include "timer.h"
+#include "rpc-clnt.h"
+#include "protocol-common.h"
+#include "xdr-generic.h"
+
+
+#define DEFAULT_SVD_LOG_FILE_DIRECTORY DATADIR "/log/glusterfs"
+
+#define SNAP_VIEW_MAX_GLFS_T 256
+#define SNAP_VIEW_MAX_GLFS_FDS 1024
+#define SNAP_VIEW_MAX_GLFS_OBJ_HANDLES 1024
+
+#define SVS_STACK_DESTROY(_frame) \
+ do { \
+ ((call_frame_t *)_frame)->local = NULL; \
+ STACK_DESTROY (((call_frame_t *)_frame)->root); \
+ } while (0)
+
+#define SVS_CHECK_VALID_SNAPSHOT_HANDLE(fs, this) \
+ do { \
+ svs_private_t *_private = NULL; \
+ _private = this->private; \
+ int i = 0; \
+ gf_boolean_t found = _gf_false; \
+ LOCK (&_private->snaplist_lock); \
+ { \
+ for (i = 0; i < _private->num_snaps; i++) { \
+ if (_private->dirents->fs && fs && \
+ _private->dirents->fs == fs) { \
+ found = _gf_true; \
+ break; \
+ } \
+ } \
+ } \
+ UNLOCK (&_private->snaplist_lock); \
+ \
+ if (!found) \
+ fs = NULL; \
+ } while (0)
+
+#define SVS_GET_INODE_CTX_INFO(inode_ctx, fs, object, this, loc, ret, \
+ op_errno, label) \
+ do { \
+ fs = inode_ctx->fs; \
+ object = inode_ctx->object; \
+ SVS_CHECK_VALID_SNAPSHOT_HANDLE (fs, this); \
+ if (!fs) \
+ object = NULL; \
+ \
+ if (!fs || !object) { \
+ int32_t tmp = -1; \
+ char tmp_uuid[64]; \
+ \
+ tmp = svs_get_handle (this, loc, inode_ctx, \
+ &op_errno); \
+ if (tmp) { \
+ gf_log (this->name, GF_LOG_ERROR, \
+ "failed to get the handle for %s " \
+ "(gfid: %s)", loc->path, \
+ uuid_utoa_r (loc->inode->gfid, \
+ tmp_uuid)); \
+ ret = -1; \
+ goto label; \
+ } \
+ \
+ fs = inode_ctx->fs; \
+ object = inode_ctx->object; \
+ } \
+ } while(0);
+
+#define SVS_STRDUP(dst, src) \
+ do { \
+ if (dst && strcmp (src, dst)) { \
+ GF_FREE (dst); \
+ dst = NULL; \
+ } \
+ \
+ if (!dst) \
+ dst = gf_strdup (src); \
+ } while (0)
+
+int
+svs_mgmt_submit_request (void *req, call_frame_t *frame,
+ glusterfs_ctx_t *ctx,
+ rpc_clnt_prog_t *prog, int procnum,
+ fop_cbk_fn_t cbkfn, xdrproc_t xdrproc);
+
+int
+svs_get_snapshot_list (xlator_t *this);
+
+int
+mgmt_get_snapinfo_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe);
+
+typedef enum {
+ SNAP_VIEW_ENTRY_POINT_INODE = 0,
+ SNAP_VIEW_SNAPSHOT_INODE,
+ SNAP_VIEW_VIRTUAL_INODE
+} inode_type_t;
+
+struct svs_inode {
+ glfs_t *fs;
+ glfs_object_t *object;
+ inode_type_t type;
+
+ /* used only for entry point directory where gfid of the directory
+ from where the entry point was entered is saved.
+ */
+ uuid_t pargfid;
+
+ /* This is used to generate gfid for all sub files/dirs under this
+ * snapshot
+ */
+ char *snapname;
+ struct iatt buf;
+};
+typedef struct svs_inode svs_inode_t;
+
+struct svs_fd {
+ glfs_fd_t *fd;
+};
+typedef struct svs_fd svs_fd_t;
+
+struct snap_dirent {
+ char name[NAME_MAX];
+ char uuid[UUID_CANONICAL_FORM_LEN + 1];
+ char snap_volname[NAME_MAX];
+ glfs_t *fs;
+};
+typedef struct snap_dirent snap_dirent_t;
+
+struct svs_private {
+ snap_dirent_t *dirents;
+ int num_snaps;
+ char *volname;
+ struct list_head snaplist;
+ gf_lock_t snaplist_lock;
+ struct rpc_clnt *rpc;
+};
+typedef struct svs_private svs_private_t;
+
+int
+__svs_inode_ctx_set (xlator_t *this, inode_t *inode, svs_inode_t *svs_inode);
+
+svs_inode_t *
+__svs_inode_ctx_get (xlator_t *this, inode_t *inode);
+
+svs_inode_t *
+svs_inode_ctx_get (xlator_t *this, inode_t *inode);
+
+int32_t
+svs_inode_ctx_set (xlator_t *this, inode_t *inode, svs_inode_t *svs_inode);
+
+svs_inode_t *
+svs_inode_ctx_get_or_new (xlator_t *this, inode_t *inode);
+
+int
+__svs_fd_ctx_set (xlator_t *this, fd_t *fd, svs_fd_t *svs_fd);
+
+svs_fd_t *
+__svs_fd_ctx_get (xlator_t *this, fd_t *fd);
+
+svs_fd_t *
+svs_fd_ctx_get (xlator_t *this, fd_t *fd);
+
+int32_t
+svs_fd_ctx_set (xlator_t *this, fd_t *fd, svs_fd_t *svs_fd);
+
+svs_fd_t *
+__svs_fd_ctx_get_or_new (xlator_t *this, fd_t *fd);
+
+svs_fd_t *
+svs_fd_ctx_get_or_new (xlator_t *this, fd_t *fd);
+
+void
+svs_uuid_generate (uuid_t gfid, char *snapname, uuid_t origin_gfid);
+
+void
+svs_fill_ino_from_gfid (struct iatt *buf);
+
+void
+svs_iatt_fill (uuid_t gfid, struct iatt *buf);
+
+snap_dirent_t *
+svs_get_latest_snap_entry (xlator_t *this);
+
+glfs_t *
+svs_get_latest_snapshot (xlator_t *this);
+
+glfs_t *
+svs_initialise_snapshot_volume (xlator_t *this, const char *name,
+ int32_t *op_errno);
+
+glfs_t *
+__svs_initialise_snapshot_volume (xlator_t *this, const char *name,
+ int32_t *op_errno);
+
+snap_dirent_t *
+__svs_get_snap_dirent (xlator_t *this, const char *name);
+
+int
+svs_mgmt_init (xlator_t *this);
+
+int32_t
+svs_get_handle (xlator_t *this, loc_t *loc, svs_inode_t *inode_ctx,
+ int32_t *op_errno);
+
+#endif /* __SNAP_VIEW_H__ */
diff --git a/xlators/features/trash/src/Makefile.am b/xlators/features/trash/src/Makefile.am
index 4671d06d309..1304618cc68 100644
--- a/xlators/features/trash/src/Makefile.am
+++ b/xlators/features/trash/src/Makefile.am
@@ -1,15 +1,16 @@
xlator_LTLIBRARIES = trash.la
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/features
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
-trash_la_LDFLAGS = -module -avoidversion
+trash_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
trash_la_SOURCES = trash.c
trash_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = trash.h trash-mem-types.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/features/trash/src/trash-mem-types.h b/xlators/features/trash/src/trash-mem-types.h
index 48613d1e80b..b7cad3ce3a9 100644
--- a/xlators/features/trash/src/trash-mem-types.h
+++ b/xlators/features/trash/src/trash-mem-types.h
@@ -1,32 +1,22 @@
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __TRASH_MEM_TYPES_H__
#define __TRASH_MEM_TYPES_H__
#include "mem-types.h"
enum gf_trash_mem_types_ {
- gf_trash_mt_trash_local_t = gf_common_mt_end + 1,
- gf_trash_mt_trash_private_t,
+ gf_trash_mt_trash_private_t = gf_common_mt_end + 1,
gf_trash_mt_char,
- gf_trash_mt_trash_elim_pattern_t,
+ gf_trash_mt_uuid,
+ gf_trash_mt_trash_elim_path,
gf_trash_mt_end
};
#endif
diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c
index d60bf4b8310..fd5507ff694 100644
--- a/xlators/features/trash/src/trash.c
+++ b/xlators/features/trash/src/trash.c
@@ -1,53 +1,244 @@
/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include "trash.h"
#include "trash-mem-types.h"
+#include "syscall.h"
-int32_t
-trash_ftruncate_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iovec *vector, int32_t count,
- struct iatt *stbuf, struct iobref *iobuf);
+#define root_gfid (uuid_t){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}
+#define trash_gfid (uuid_t){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5}
+#define internal_op_gfid (uuid_t){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6}
int32_t
trash_truncate_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf);
+ struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata);
int32_t
trash_truncate_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent);
+ struct iatt *postparent, dict_t *xdata);
int32_t
trash_unlink_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent);
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata);
+
+/* Common routines used in this translator */
+
+/**
+ * When a directory/file is created under trash directory, it should have
+ * the same permission as before. This function will fetch permission from
+ * the existing directory and returns the same
+ */
+mode_t
+get_permission (char *path)
+{
+ mode_t mode = 0755;
+ struct stat sbuf = {0,};
+ struct iatt ibuf = {0,};
+ int ret = 0;
+
+ ret = sys_stat (path, &sbuf);
+ if (!ret) {
+ iatt_from_stat (&ibuf, &sbuf);
+ mode = st_mode_from_ia (ibuf.ia_prot, ibuf.ia_type);
+ } else
+ gf_log ("trash", GF_LOG_DEBUG, "stat on %s failed"
+ " using default", path);
+ return mode;
+}
+
+/**
+ * For normalization, trash directory name is stored inside priv structure as
+ * '/trash_directory/'. As a result the trailing and leading slashes are being
+ * striped out for additional usage.
+ */
+int
+extract_trash_directory (char *priv_value, const char **trash_directory)
+{
+ char *tmp = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO("trash", priv_value, out);
+
+ tmp = gf_strdup (priv_value + 1);
+ if (!tmp) {
+ ret = ENOMEM;
+ goto out;
+ }
+ if (tmp[strlen(tmp)-1] == '/')
+ tmp[strlen(tmp)-1] = '\0';
+ *trash_directory = gf_strdup (tmp);
+ if (!(*trash_directory)) {
+ ret = ENOMEM;
+ goto out;
+ }
+out:
+ if (tmp)
+ GF_FREE (tmp);
+ return ret;
+}
+
+/**
+ * The trash directory path should be append at begining of file path for
+ * delete or truncate operations. Normal trashing moves the contents to
+ * trash directory and trashing done by internal operations are moved to
+ * internal_op directory inside trash.
+ */
+void
+copy_trash_path (const char *priv_value, gf_boolean_t internal, char *path)
+{
+ char trash_path[PATH_MAX] = {0,};
+
+ strcpy (trash_path, priv_value);
+ if (internal)
+ strcat (trash_path, "internal_op/");
+
+ strcpy (path, trash_path);
+}
+
+/**
+ * This function performs the reverse operation of copy_trash_path(). It gives
+ * out a pointer, whose starting value will be the path inside trash directory,
+ * similar to orginal path.
+ */
+void
+remove_trash_path (const char *path, gf_boolean_t internal, char **rem_path)
+{
+ if (rem_path == NULL) {
+ return;
+ }
+
+ *rem_path = strchr (path + 1, '/');
+ if (internal)
+ *rem_path = strchr (*rem_path + 1, '/');
+}
+
+/**
+ * Check whether the path includes trash directory or internal op directory
+ * inside trash. This check is used to make sure that we avoid deletion,
+ * rename and creation operations from trash directory.
+ */
+int
+check_whether_trash_directory (const char *path,
+ const char *trash_directory_path)
+{
+ char tmp_path[PATH_MAX] = {0,};
+ char internal_op_path[PATH_MAX] = {0,};
+ int ret = 0;
+
+ if (path[strlen(path)-1] == '/')
+ sprintf (tmp_path, "%s", path);
+ else
+ sprintf (tmp_path, "%s/", path);
+
+ copy_trash_path (trash_directory_path, _gf_true, internal_op_path);
+ ret = strcmp (tmp_path, trash_directory_path) &&
+ strcmp (tmp_path, internal_op_path);
+
+ return ret;
+}
+
+/**
+ * Checks whether the given path reside under the specified eliminate path
+ */
+int
+check_whether_eliminate_path (trash_elim_path *trav, const char *path)
+{
+ int match = 0;
+
+ while (trav) {
+ if (strncmp (path, trav->path, strlen(trav->path)) == 0) {
+ match++;
+ break;
+ }
+ trav = trav->next;
+ }
+ return match;
+}
+/**
+ * Stores the eliminate path into internal eliminate path structure
+ */
+int
+store_eliminate_path (char *str, trash_elim_path **eliminate)
+{
+ trash_elim_path *trav = NULL;
+ char *component = NULL;
+ char elm_path[PATH_MAX] = {0,};
+ int ret = 0;
+ char *strtokptr = NULL;
+
+ if (eliminate == NULL) {
+ ret = EINVAL;
+ goto out;
+ }
+
+ component = strtok_r (str, ",", &strtokptr);
+ while (component) {
+ trav = GF_CALLOC (1, sizeof (*trav),
+ gf_trash_mt_trash_elim_path);
+ if (!trav) {
+ ret = ENOMEM;
+ goto out;
+ }
+ if (component[0] == '/')
+ sprintf(elm_path, "%s", component);
+ else
+ sprintf(elm_path, "/%s", component);
+
+ if (component[strlen(component)-1] != '/')
+ strcat (elm_path, "/");
+
+ trav->path = gf_strdup(elm_path);
+ if (!trav->path) {
+ ret = ENOMEM;
+ gf_log ("trash", GF_LOG_DEBUG, "out of memory");
+ goto out;
+ }
+ trav->next = *eliminate;
+ *eliminate = trav;
+ component = strtok_r (NULL, ",", &strtokptr);
+ }
+out:
+ return ret;
+}
+
+/**
+ * Appends time stamp to given string
+ */
+void
+append_time_stamp (char *name)
+{
+ int i;
+ char timestr[64] = {0,};
+
+ gf_time_fmt (timestr, sizeof(timestr), time (NULL),
+ gf_timefmt_F_HMS);
+
+ /* removing white spaces in timestamp */
+ for (i = 0; i < strlen (timestr); i++) {
+ if (timestr[i] == ' ')
+ timestr[i] = '_';
+ }
+ strcat (name, "_");
+ strcat (name, timestr);
+}
+
+/**
+ * Wipe the memory used by trash location variable
+ */
void
trash_local_wipe (trash_local_t *local)
{
@@ -59,48 +250,370 @@ trash_local_wipe (trash_local_t *local)
if (local->fd)
fd_unref (local->fd);
-
if (local->newfd)
fd_unref (local->newfd);
- GF_FREE (local);
+ mem_put (local);
out:
return;
}
+/**
+ * Wipe the memory used by eliminate path through a
+ * recursive call
+ */
+void
+wipe_eliminate_path (trash_elim_path **trav)
+{
+ if (trav == NULL) {
+ return;
+ }
+
+ if (*trav == NULL) {
+ return;
+ }
+
+ wipe_eliminate_path (&(*trav)->next);
+ GF_FREE ((*trav)->path);
+ GF_FREE (*trav);
+ *trav = NULL;
+}
+
+/**
+ * This getxattr calls returns existing trash directory path in
+ * the dictionary
+ */
+int32_t
+trash_notify_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ data_t *data = NULL;
+ trash_private_t *priv = NULL;
+ int ret = 0;
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO ("trash", priv, out);
+
+ data = dict_get (dict, GET_ANCESTRY_PATH_KEY);
+ if (!data) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "oldtrash-directory doesnot exists");
+ priv->oldtrash_dir = gf_strdup (priv->newtrash_dir);
+ if (!priv->oldtrash_dir) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
+ } else {
+ priv->oldtrash_dir = GF_CALLOC (1, PATH_MAX,
+ gf_common_mt_char);
+ if (!priv->oldtrash_dir) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
+ /* appending '/' if it is not present */
+ sprintf (priv->oldtrash_dir, "%s%c", data->data,
+ data->data[strlen(data->data) - 1] != '/' ? '/' : '\0'
+ );
+ gf_log (this->name, GF_LOG_DEBUG, "old trash directory path "
+ "is %s", priv->oldtrash_dir);
+ }
+
+out:
+ return ret;
+}
+
+/**
+ * This is a nameless look up for old trash directory
+ * The lookup is based on gfid, because trash directory
+ * has fixed gfid.
+ */
+int32_t
+trash_notify_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ trash_private_t *priv = NULL;
+ loc_t loc = {0,};
+ int ret = 0;
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO ("trash", priv, out);
+
+ if (op_ret == 0) {
+
+ gf_log (this->name, GF_LOG_DEBUG, "inode found with gfid %s",
+ uuid_utoa(buf->ia_gfid));
+
+ gf_uuid_copy (loc.gfid, trash_gfid);
+
+ /* Find trash inode using available information */
+ priv->trash_inode = inode_link (inode, NULL, NULL, buf);
+
+ loc.inode = inode_ref (priv->trash_inode);
+
+ /*Used to find path of old trash directory*/
+ STACK_WIND (frame, trash_notify_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, &loc,
+ GET_ANCESTRY_PATH_KEY, xdata);
+ }
+
+ /* If there is no old trash directory we set its value to new one,
+ * which is the valid condition for trash directory creation
+ */
+ else {
+ priv->oldtrash_dir = gf_strdup (priv->newtrash_dir);
+ if (!priv->oldtrash_dir) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto out;
+ }
+ }
+
+out:
+ loc_wipe (&loc);
+ return ret;
+}
+
+int32_t
+trash_internal_op_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ if (op_ret != 0 && !(op_errno == EEXIST))
+ gf_log (this->name, GF_LOG_ERROR, "mkdir failed for "
+ "internal op directory : %s", strerror (op_errno));
+ return op_ret;
+}
+
+/**
+ * This is the call back of mkdir fop initated using STACK_WIND in
+ * notify function which is used to create trash directory in the brick
+ * when a volume starts.The frame of the mkdir must destroyed from
+ * this function itself since it was created by trash xlator
+ */
+int32_t
+trash_notify_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ uuid_t *gfid_ptr = NULL;
+ loc_t loc = {0, };
+ int ret = 0;
+ dict_t *dict = NULL;
+ char internal_op_path[PATH_MAX] = {0,};
+ trash_private_t *priv = NULL;
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO ("trash", priv, out);
+
+ dict = dict_new ();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+ if ((op_ret == 0) || (op_ret == -1 && op_errno == EEXIST)) {
+ gfid_ptr = GF_CALLOC (1, sizeof(uuid_t),
+ gf_common_mt_uuid_t);
+ if (!gfid_ptr) {
+ ret = ENOMEM;
+ goto out;
+ }
+ gf_uuid_copy (*gfid_ptr, internal_op_gfid);
+
+ gf_uuid_copy (loc.gfid, internal_op_gfid);
+ gf_uuid_copy (loc.pargfid, trash_gfid);
+ loc.name = gf_strdup ("internal_op");
+
+ if (!loc.name) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
+ sprintf (internal_op_path, "%s%s",
+ priv->newtrash_dir, loc.name);
+
+ loc.path = gf_strdup (internal_op_path);
+
+ if (!loc.path) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
+
+ loc.inode = inode_new (priv->trash_itable);
+ loc.inode->ia_type = IA_IFDIR;
+ /* Fixed gfid is set for trash directory with
+ * this function
+ */
+ ret = dict_set_dynptr (dict, "gfid-req", gfid_ptr,
+ sizeof (uuid_t));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "setting key gfid-req failed");
+ goto out;
+ }
+
+ /* The mkdir call for creating trash directory */
+ STACK_WIND (frame, trash_internal_op_mkdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, &loc, 0755,
+ 0022, dict);
+ /* After creating we must call other notify functions */
+ default_notify (this, GF_EVENT_CHILD_UP, NULL);
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "mkdir failed for trash"
+ " directory : %s", strerror (op_errno));
+ }
+
+ STACK_DESTROY (frame->root);
+out:
+ if (ret && gfid_ptr)
+ GF_FREE (gfid_ptr);
+ if (dict)
+ dict_unref (dict);
+ return 0;
+}
+
+int32_t
+trash_notify_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ if ((op_ret == 0) || (op_ret == -1 && op_errno == EEXIST)) {
+ /* After creating we must call other notify functions */
+ default_notify (this, GF_EVENT_CHILD_UP, NULL);
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "rename failed: %s",
+ strerror (op_errno));
+ }
+
+ STACK_DESTROY (frame->root);
+ return op_ret;
+}
+
+/**
+ * This is the call back of rename fop initated using STACK_WIND in
+ * reconfigure function which is used to rename trash directory in
+ * the brick when we perform volume set.This frame must destroyed
+ * from this function itself since it was created by trash xlator
+ */
+int32_t
+trash_reconf_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ if (op_ret == -1 && op_errno == EEXIST) {
+
+ gf_log (this->name, GF_LOG_ERROR, "rename failed: %s",
+ strerror (op_errno));
+ }
+
+ STACK_DESTROY (frame->root);
+
+ return op_ret;
+}
+
+int32_t
+trash_common_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode,
+ buf, preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+trash_common_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf, preoldparent,
+ postoldparent, prenewparent, postnewparent, xdata);
+ return 0;
+}
+
+int32_t
+trash_common_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent,
+ dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+/**
+ * move backs from trash translator to unlink call
+ */
int32_t
trash_common_unwind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- TRASH_STACK_UNWIND (frame, op_ret, op_errno, preparent, postparent);
+ TRASH_STACK_UNWIND (unlink, frame, op_ret, op_errno, preparent,
+ postparent, xdata);
return 0;
}
+/**
+ * If the path is not present in the trash directory,it will recursively
+ * call this call-back and one by one directories will be created from
+ * the starting
+ */
int32_t
trash_unlink_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- trash_local_t *local = NULL;
- char *tmp_str = NULL;
- char *tmp_path = NULL;
- char *tmp_dirname = NULL;
- char *dir_name = NULL;
- int32_t count = 0;
- int32_t loop_count = 0;
- int i = 0;
- loc_t tmp_loc = {0,};
+ trash_local_t *local = NULL;
+ char *tmp_str = NULL;
+ char *tmp_path = NULL;
+ char *tmp_dirname = NULL;
+ char *tmp_stat = NULL;
+ char real_path[PATH_MAX] = {0,};
+ char *dir_name = NULL;
+ size_t count = 0;
+ int32_t loop_count = 0;
+ int i = 0;
+ loc_t tmp_loc = {0,};
+ trash_private_t *priv = NULL;
+ int ret = 0;
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO ("trash", priv, out);
local = frame->local;
+ GF_VALIDATE_OR_GOTO ("trash", local, out);
+
+ TRASH_UNSET_PID (frame, local);
+
tmp_str = gf_strdup (local->newpath);
if (!tmp_str) {
gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ ret = -1;
goto out;
}
loop_count = local->loop_count;
+ /* The directory is not present , need to create it */
if ((op_ret == -1) && (op_errno == ENOENT)) {
tmp_dirname = strchr (tmp_str, '/');
while (tmp_dirname) {
@@ -112,548 +625,524 @@ trash_unlink_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
break;
tmp_dirname = strchr (tmp_str + count + 1, '/');
}
- tmp_path = memdup (local->newpath, count);
+ tmp_path = gf_memdup (local->newpath, count + 1);
if (!tmp_path) {
gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ ret = ENOMEM;
goto out;
}
+ tmp_path[count] = '\0';
- tmp_loc.path = tmp_path;
+ loc_copy (&tmp_loc, &local->loc);
+ tmp_loc.path = gf_strdup (tmp_path);
+ if (!tmp_loc.path) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
- /* TODO:create the directory with proper permissions */
- STACK_WIND_COOKIE (frame, trash_unlink_mkdir_cbk, tmp_path,
- this->children->xlator,
- this->children->xlator->fops->mkdir,
- &tmp_loc, 0755);
+ /* Stores the the name of directory to be created */
+ tmp_loc.name = gf_strdup (strrchr(tmp_path, '/') + 1);
+ if (!tmp_loc.name) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
+ strcpy (real_path, priv->brick_path);
+ remove_trash_path (tmp_path, (frame->root->pid < 0), &tmp_stat);
+ if (tmp_stat)
+ strcat (real_path, tmp_stat);
+
+ TRASH_SET_PID (frame, local);
+ STACK_WIND_COOKIE (frame, trash_unlink_mkdir_cbk, tmp_path,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir,
+ &tmp_loc, get_permission(real_path),
+ 0022, xdata);
+ loc_wipe (&tmp_loc);
goto out;
}
+ /* Given path is created , comparing to the required path */
if (op_ret == 0) {
dir_name = dirname (tmp_str);
- if (strcmp((char*)cookie, dir_name) == 0) {
+ if (strcmp((char *)cookie, dir_name) == 0) {
+ /* File path exists we can rename it*/
+ loc_copy (&tmp_loc, &local->loc);
tmp_loc.path = local->newpath;
STACK_WIND (frame, trash_unlink_rename_cbk,
- this->children->xlator,
- this->children->xlator->fops->rename,
- &local->loc, &tmp_loc);
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename,
+ &local->loc, &tmp_loc, xdata);
goto out;
}
}
+ if ((op_ret == -1) && (op_errno != EEXIST)) {
+ gf_log (this->name, GF_LOG_ERROR, "Directory creation failed [%s]. "
+ "Therefore unlinking %s without moving to trash "
+ "directory", strerror(op_errno), local->loc.name);
+ STACK_WIND (frame, trash_common_unwind_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, &local->loc, 0,
+ xdata);
+ goto out;
+ }
+
LOCK (&frame->lock);
{
loop_count = ++local->loop_count;
}
UNLOCK (&frame->lock);
+
tmp_dirname = strchr (tmp_str, '/');
+
+ /* Path is not completed , need to create remaining path */
while (tmp_dirname) {
count = tmp_dirname - tmp_str;
if (count == 0)
count = 1;
i++;
- if ((i > loop_count) || (count > PATH_MAX))
+ if (i > loop_count)
break;
tmp_dirname = strchr (tmp_str + count + 1, '/');
}
- tmp_path = memdup (local->newpath, count);
+ tmp_path = gf_memdup (local->newpath, count + 1);
if (!tmp_path) {
gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ ret = -1;
+ goto out;
+ }
+ tmp_path[count] = '\0';
+
+ loc_copy (&tmp_loc, &local->loc);
+ tmp_loc.path = gf_strdup (tmp_path);
+ if (!tmp_loc.path) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ ret = -1;
+ goto out;
+ }
+
+ /* Stores the the name of directory to be created */
+ tmp_loc.name = gf_strdup (strrchr(tmp_path, '/') + 1);
+ if (!tmp_loc.name) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ ret = -1;
goto out;
}
- tmp_loc.path = tmp_path;
+
+ strcpy (real_path, priv->brick_path);
+ remove_trash_path (tmp_path, (frame->root->pid < 0), &tmp_stat);
+ if (tmp_stat)
+ strcat (real_path, tmp_stat);
+
+ TRASH_SET_PID (frame, local);
STACK_WIND_COOKIE (frame, trash_unlink_mkdir_cbk, tmp_path,
- this->children->xlator,
- this->children->xlator->fops->mkdir,
- &tmp_loc, 0755);
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, &tmp_loc,
+ get_permission(real_path), 0022, xdata);
out:
- GF_FREE (cookie);
+ if (tmp_path)
+ GF_FREE (tmp_path);
if (tmp_str)
GF_FREE (tmp_str);
-
- return 0;
+ return ret;
}
-int32_t
-trash_rename_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent);
-
+/**
+ * The name of unlinking file should be renamed as starting
+ * from trash directory as mentioned in the mount point
+ */
int32_t
trash_unlink_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
- trash_local_t *local = NULL;
- trash_private_t *priv = NULL;
- char *tmp_str = NULL;
- char *dir_name = NULL;
- char *tmp_cookie = NULL;
- loc_t tmp_loc = {0,};
+ trash_local_t *local = NULL;
+ trash_private_t *priv = NULL;
+ char *tmp_str = NULL;
+ char *dir_name = NULL;
+ char *tmp_cookie = NULL;
+ loc_t tmp_loc = {0,};
+ dict_t *new_xdata = NULL;
+ char *tmp_stat = NULL;
+ char real_path[PATH_MAX] = {0,};
+ int ret = 0;
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO ("trash", priv, out);
- priv = this->private;
local = frame->local;
+ GF_VALIDATE_OR_GOTO ("trash", local, out);
if ((op_ret == -1) && (op_errno == ENOENT)) {
+ /* the file path doesnot exists we want to create path
+ * for the file
+ */
tmp_str = gf_strdup (local->newpath);
if (!tmp_str) {
gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ ret = ENOMEM;
+ goto out;
}
- dir_name = dirname (tmp_str);
+ dir_name = dirname (tmp_str); /* stores directory name */
- tmp_loc.path = dir_name;
+ loc_copy (&tmp_loc, &local->loc);
+ tmp_loc.path = gf_strdup (dir_name);
+ if (!tmp_loc.path) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
tmp_cookie = gf_strdup (dir_name);
if (!tmp_cookie) {
gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ ret = ENOMEM;
+ goto out;
}
- /* TODO: create the directory with proper permissions */
+ strcpy (real_path, priv->brick_path);
+ remove_trash_path (tmp_str, (frame->root->pid < 0), &tmp_stat);
+ if (tmp_stat)
+ strcat (real_path, tmp_stat);
+
+ TRASH_SET_PID (frame, local);
+
+ /* create the directory with proper permissions */
STACK_WIND_COOKIE (frame, trash_unlink_mkdir_cbk, tmp_cookie,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mkdir,
- &tmp_loc, 0755);
-
- GF_FREE (tmp_str);
-
- return 0;
+ &tmp_loc, get_permission(real_path),
+ 0022, xdata);
+ loc_wipe (&tmp_loc);
+ goto out;
}
if ((op_ret == -1) && (op_errno == ENOTDIR)) {
-
+ /* if entry is already present in trash directory,
+ * new one is not copied*/
gf_log (this->name, GF_LOG_DEBUG,
"target(%s) exists, cannot keep the copy, deleting",
local->newpath);
STACK_WIND (frame, trash_common_unwind_cbk,
- this->children->xlator,
- this->children->xlator->fops->unlink, &local->loc);
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink,
+ &local->loc, 0, xdata);
- return 0;
+ goto out;
}
if ((op_ret == -1) && (op_errno == EISDIR)) {
+
+ /* if entry is directory,we remove directly */
gf_log (this->name, GF_LOG_DEBUG,
"target(%s) exists as directory, cannot keep copy, "
"deleting", local->newpath);
STACK_WIND (frame, trash_common_unwind_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, &local->loc);
- return 0;
+ FIRST_CHILD(this)->fops->unlink,
+ &local->loc, 0, xdata);
+ goto out;
}
+ /**********************************************************************
+ *
+ * CTR Xlator message handling done here!
+ *
+ **********************************************************************/
+ /**
+ * If unlink is handled by trash translator, it should inform the
+ * CTR Xlator. And trash translator only handles the unlink for
+ * the last hardlink.
+ *
+ * Check if there is a GF_REQUEST_LINK_COUNT_XDATA from CTR Xlator
+ *
+ */
+
+ if (local->ctr_link_count_req) {
+
+ /* Sending back inode link count to ctr_unlink
+ * (changetimerecoder xlator) via
+ * "GF_RESPONSE_LINK_COUNT_XDATA" key using xdata.
+ * */
+ if (xdata) {
+ ret = dict_set_uint32 (xdata,
+ GF_RESPONSE_LINK_COUNT_XDATA,
+ 1);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Failed to set"
+ " GF_RESPONSE_LINK_COUNT_XDATA");
+ }
+ } else {
+ new_xdata = dict_new ();
+ if (!new_xdata) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Memory allocation failure while "
+ "creating new_xdata");
+ goto ctr_out;
+ }
+ ret = dict_set_uint32 (new_xdata,
+ GF_RESPONSE_LINK_COUNT_XDATA,
+ 1);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Failed to set"
+ " GF_RESPONSE_LINK_COUNT_XDATA");
+ }
+ctr_out:
+ TRASH_STACK_UNWIND (unlink, frame, 0, op_errno,
+ preoldparent, postoldparent,
+ new_xdata);
+ goto out;
+ }
+ }
/* All other cases, unlink should return success */
- TRASH_STACK_UNWIND (frame, 0, op_errno, &local->preparent,
- &local->postparent);
-
- return 0;
-}
+ TRASH_STACK_UNWIND (unlink, frame, 0, op_errno, preoldparent,
+ postoldparent, xdata);
+out:
+ if (tmp_str)
+ GF_FREE (tmp_str);
+ if (tmp_cookie)
+ GF_FREE (tmp_cookie);
+ if (new_xdata)
+ dict_unref (new_xdata);
+ return ret;
+}
+/**
+ * move backs from trash translator to truncate call
+ */
int32_t
trash_common_unwind_buf_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
{
- TRASH_STACK_UNWIND (frame, op_ret, op_errno, prebuf, postbuf);
+ TRASH_STACK_UNWIND (truncate, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
return 0;
}
-int
-trash_common_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
-{
- TRASH_STACK_UNWIND (frame, op_ret, op_errno, stbuf, preoldparent,
- postoldparent, prenewparent, postnewparent);
- return 0;
-}
int32_t
trash_unlink_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
- trash_private_t *priv = NULL;
- trash_local_t *local = NULL;
- loc_t new_loc = {0,};
+ trash_private_t *priv = NULL;
+ trash_local_t *local = NULL;
+ loc_t new_loc = {0,};
+ int ret = 0;
priv = this->private;
+ GF_VALIDATE_OR_GOTO ("trash", priv, out);
+
local = frame->local;
+ GF_VALIDATE_OR_GOTO ("trash", local, out);
- if (-1 == op_ret) {
+ if (op_ret == -1) {
gf_log (this->name, GF_LOG_DEBUG, "%s: %s",
local->loc.path, strerror (op_errno));
- goto fail;
+ TRASH_STACK_UNWIND (unlink, frame, op_ret, op_errno, buf,
+ NULL, xdata);
+ ret = -1;
+ goto out;
}
- if ((buf->ia_size == 0) ||
- (buf->ia_size > priv->max_trash_file_size)) {
- /* if the file is too big or zero, just unlink it */
+ /* Only last hardlink will be moved to trash directory */
+ if (buf->ia_nlink > 1) {
+ STACK_WIND (frame, trash_common_unwind_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, &local->loc,
+ 0, xdata);
+ goto out;
+ }
- if (buf->ia_size > priv->max_trash_file_size) {
- gf_log (this->name, GF_LOG_DEBUG,
+ /* if the file is too big just unlink it */
+ if (buf->ia_size > (priv->max_trash_file_size)) {
+ gf_log (this->name, GF_LOG_DEBUG,
"%s: file size too big (%"PRId64") to "
"move into trash directory",
local->loc.path, buf->ia_size);
- }
STACK_WIND (frame, trash_common_unwind_cbk,
- this->children->xlator,
- this->children->xlator->fops->unlink, &local->loc);
- return 0;
- }
-
- new_loc.path = local->newpath;
-
- STACK_WIND (frame, trash_unlink_rename_cbk,
- this->children->xlator,
- this->children->xlator->fops->rename,
- &local->loc, &new_loc);
-
- return 0;
-
-fail:
- TRASH_STACK_UNWIND (frame, op_ret, op_errno, buf,
- NULL, NULL, NULL, NULL);
-
- return 0;
-
-}
-
-int32_t
-trash_rename_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
-{
- trash_local_t *local = NULL;
- char *tmp_str = NULL;
- char *dir_name = NULL;
- char *tmp_path = NULL;
- loc_t tmp_loc = {0,};
-
- local = frame->local;
- if ((op_ret == -1) && (op_errno == ENOENT)) {
- tmp_str = gf_strdup (local->newpath);
- if (!tmp_str) {
- gf_log (this->name, GF_LOG_DEBUG, "out of memory");
- }
- dir_name = dirname (tmp_str);
-
- /* check for the errno, if its ENOENT create directory and call
- * rename later
- */
- tmp_path = gf_strdup (dir_name);
- if (!tmp_path) {
- gf_log (this->name, GF_LOG_DEBUG, "out of memory");
- }
- tmp_loc.path = tmp_path;
-
- /* TODO: create the directory with proper permissions */
- STACK_WIND_COOKIE (frame, trash_rename_mkdir_cbk, tmp_path,
- this->children->xlator,
- this->children->xlator->fops->mkdir,
- &tmp_loc, 0755);
-
- GF_FREE (tmp_str);
- return 0;
- }
-
- if ((op_ret == -1) && (op_errno == ENOTDIR)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "target(%s) exists, cannot keep the dest entry(%s): "
- "renaming", local->newpath, local->origpath);
- } else if ((op_ret == -1) && (op_errno == EISDIR)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "target(%s) exists as a directory, cannot keep the "
- "copy (%s), renaming", local->newpath, local->origpath);
- }
-
- STACK_WIND (frame, trash_common_rename_cbk,
- this->children->xlator,
- this->children->xlator->fops->rename, &local->loc,
- &local->newloc);
-
- return 0;
-}
-
-
-int32_t
-trash_rename_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent)
-{
- trash_local_t *local = NULL;
- char *tmp_str = NULL;
- char *tmp_path = NULL;
- char *tmp_dirname = NULL;
- char *dir_name = NULL;
- int32_t count = 0;
- loc_t tmp_loc = {0,};
-
- local = frame->local;
- tmp_str = gf_strdup (local->newpath);
- if (!tmp_str) {
- gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, &local->loc,
+ 0, xdata);
goto out;
}
- if ((op_ret == -1) && (op_errno == ENOENT)) {
- tmp_dirname = strchr (tmp_str, '/');
- while (tmp_dirname) {
- count = tmp_dirname - tmp_str;
- if (count == 0)
- count = 1;
-
- tmp_dirname = strchr (tmp_str + count + 1, '/');
-
- tmp_path = memdup (local->newpath, count);
- if (!tmp_path) {
- gf_log (this->name, GF_LOG_DEBUG, "out of memory");
- }
-
- tmp_loc.path = tmp_path;
-
- /* TODO: create the directory with proper permissions */
- STACK_WIND_COOKIE (frame, trash_rename_mkdir_cbk,
- tmp_path, this->children->xlator,
- this->children->xlator->fops->mkdir,
- &tmp_loc, 0755);
- }
-
+ /* Copies new path for renaming */
+ loc_copy (&new_loc, &local->loc);
+ new_loc.path = gf_strdup (local->newpath);
+ if (!new_loc.path) {
+ gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ ret = ENOMEM;
goto out;
}
- dir_name = dirname (tmp_str);
- if (strcmp ((char*)cookie, dir_name) == 0) {
- tmp_loc.path = local->newpath;
- STACK_WIND (frame, trash_rename_rename_cbk,
- this->children->xlator,
- this->children->xlator->fops->rename,
- &local->newloc, &tmp_loc);
- }
+ STACK_WIND (frame, trash_unlink_rename_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename,
+ &local->loc, &new_loc, xdata);
out:
- GF_FREE (cookie); /* strdup (dir_name) was sent here :) */
- if (tmp_str)
- GF_FREE (tmp_str);
-
- return 0;
-}
+ loc_wipe (&new_loc);
-int32_t
-trash_rename_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
-{
- trash_private_t *priv = NULL;
- trash_local_t *local = NULL;
- loc_t tmp_loc = {0,};
+ return ret;
- local = frame->local;
- priv = this->private;
-
- if (op_ret == -1) {
- STACK_WIND (frame, trash_common_rename_cbk,
- this->children->xlator,
- this->children->xlator->fops->rename,
- &local->loc, &local->newloc);
- return 0;
- }
- if ((buf->ia_size == 0) ||
- (buf->ia_size > priv->max_trash_file_size)) {
- /* if the file is too big or zero, just unlink it */
-
- if (buf->ia_size > priv->max_trash_file_size) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: file size too big (%"PRId64") to "
- "move into trash directory",
- local->newloc.path, buf->ia_size);
- }
-
- STACK_WIND (frame, trash_common_rename_cbk,
- this->children->xlator,
- this->children->xlator->fops->rename,
- &local->loc, &local->newloc);
- return 0;
- }
-
- tmp_loc.path = local->newpath;
-
- STACK_WIND (frame, trash_rename_rename_cbk,
- this->children->xlator,
- this->children->xlator->fops->rename,
- &local->newloc, &tmp_loc);
-
- return 0;
}
-
+/**
+ * Unlink is called internally by rm system call and also
+ * by internal operations of gluster such as self-heal
+ */
int32_t
-trash_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc)
+trash_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags,
+ dict_t *xdata)
{
- trash_elim_pattern_t *trav = NULL;
- trash_private_t *priv = NULL;
- trash_local_t *local = NULL;
- struct tm *tm = NULL;
- char timestr[256] = {0,};
- time_t utime = 0;
- int32_t match = 0;
+ trash_private_t *priv = NULL;
+ trash_local_t *local = NULL;/* files inside trash */
+ int32_t match = 0;
+ int32_t ctr_link_req = 0;
+ char *pathbuf = NULL;
+ int ret = 0;
priv = this->private;
- if (priv->eliminate) {
- trav = priv->eliminate;
- while (trav) {
- if (fnmatch(trav->pattern, newloc->name, 0) == 0) {
- match++;
- break;
- }
- trav = trav->next;
- }
- }
+ GF_VALIDATE_OR_GOTO ("trash", priv, out);
- if ((strncmp (oldloc->path, priv->trash_dir,
- strlen (priv->trash_dir)) == 0) || match) {
- /* Trying to rename from the trash dir,
- do the actual rename */
- STACK_WIND (frame, trash_common_rename_cbk,
- this->children->xlator,
- this->children->xlator->fops->rename,
- oldloc, newloc);
-
- return 0;
- }
-
- local = GF_CALLOC (1, sizeof (trash_local_t),
- gf_trash_mt_trash_local_t);
- if (!local) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- TRASH_STACK_UNWIND (frame, -1, ENOMEM,
- NULL, NULL, NULL, NULL, NULL);
- return 0;
+ /* If trash is not active or not enabled through cli, then
+ * we bypass and wind back
+ */
+ if (!priv->state) {
+ STACK_WIND (frame, trash_common_unwind_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, 0,
+ xdata);
+ goto out;
}
- frame->local = local;
- loc_copy (&local->loc, oldloc);
-
- loc_copy (&local->newloc, newloc);
-
- strcpy (local->origpath, newloc->path);
- strcpy (local->newpath, priv->trash_dir);
- strcat (local->newpath, newloc->path);
-
- {
- /* append timestamp to file name */
- /* TODO: can we make it optional? */
- utime = time (NULL);
- tm = localtime (&utime);
- strftime (timestr, 256, ".%Y-%m-%d-%H%M%S", tm);
- strcat (local->newpath, timestr);
+ /* The files removed by gluster internal operations such as self-heal,
+ * should moved to trash directory , but files by client should not
+ * moved
+ */
+ if ((frame->root->pid < 0) && !priv->internal) {
+ STACK_WIND (frame, trash_common_unwind_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, 0,
+ xdata);
+ goto out;
}
+ /* loc need some gfid which will be present in inode */
+ gf_uuid_copy (loc->gfid, loc->inode->gfid);
- /* Send a lookup call on newloc, to ensure we are not
- overwriting */
- STACK_WIND (frame, trash_rename_lookup_cbk,
- this->children->xlator,
- this->children->xlator->fops->lookup, newloc, 0);
-
- return 0;
-}
-
-int32_t
-trash_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- trash_elim_pattern_t *trav = NULL;
- trash_private_t *priv = NULL;
- trash_local_t *local = NULL;
- struct tm *tm = NULL;
- char timestr[256] = {0,};
- time_t utime = 0;
- int32_t match = 0;
-
- priv = this->private;
-
- if (priv->eliminate) {
- trav = priv->eliminate;
- while (trav) {
- if (fnmatch(trav->pattern, loc->name, 0) == 0) {
- match++;
- break;
- }
- trav = trav->next;
- }
+ /* Checking for valid location */
+ if (gf_uuid_is_null (loc->gfid) && gf_uuid_is_null (loc->inode->gfid)) {
+ gf_log (this->name, GF_LOG_DEBUG, "Bad address");
+ STACK_WIND (frame, trash_common_unwind_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, 0,
+ xdata);
+ ret = EFAULT;
+ goto out;
}
- if ((strncmp (loc->path, priv->trash_dir,
- strlen (priv->trash_dir)) == 0) || (match)) {
+ /* This will be more accurate */
+ inode_path (loc->inode, NULL, &pathbuf);
+ /* Check whether the file is present under eliminate paths or
+ * inside trash directory. In both cases we don't need to move the
+ * file to trash directory. Instead delete it permanently
+ */
+ match = check_whether_eliminate_path (priv->eliminate, pathbuf);
+ if ((strncmp (pathbuf, priv->newtrash_dir,
+ strlen (priv->newtrash_dir)) == 0) || (match)) {
if (match) {
gf_log (this->name, GF_LOG_DEBUG,
- "%s: file matches eliminate pattern, "
- "not moved to trash", loc->name);
- } else {
- /* unlink from the trash-dir, not keeping any copy */
- ;
+ "%s is a file comes under an eliminate path, "
+ "so it is not moved to trash", loc->name);
}
+ /* Trying to unlink from the trash-dir. So do the
+ * actual unlink without moving to trash-dir.
+ */
STACK_WIND (frame, trash_common_unwind_cbk,
- this->children->xlator,
- this->children->xlator->fops->unlink, loc);
- return 0;
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, 0,
+ xdata);
+ goto out;
}
- local = GF_CALLOC (1, sizeof (trash_local_t),
- gf_trash_mt_trash_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
gf_log (this->name, GF_LOG_DEBUG, "out of memory");
- TRASH_STACK_UNWIND (frame, -1, ENOMEM, NULL, NULL);
- return 0;
+ TRASH_STACK_UNWIND (unlink, frame, -1, ENOMEM, NULL, NULL,
+ xdata);
+ ret = ENOMEM;
+ goto out;
}
frame->local = local;
loc_copy (&local->loc, loc);
- strcpy (local->origpath, loc->path);
- strcpy (local->newpath, priv->trash_dir);
- strcat (local->newpath, loc->path);
+ /* rename new location of file as starting from trash directory */
+ copy_trash_path (priv->newtrash_dir, (frame->root->pid < 0),
+ local->newpath);
+ strcat (local->newpath, pathbuf);
- {
- /* append timestamp to file name */
- /* TODO: can we make it optional? */
- utime = time (NULL);
- tm = localtime (&utime);
- strftime (timestr, 256, ".%Y-%m-%d-%H%M%S", tm);
- strcat (local->newpath, timestr);
+ /* append timestamp to file name so that we can avoid
+ * name collisions inside trash
+ */
+ append_time_stamp (local->newpath);
+ if (strlen (local->newpath) > PATH_MAX) {
+ STACK_WIND (frame, trash_common_unwind_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, 0,
+ xdata);
+ goto out;
}
+ /* To know whether CTR xlator requested for the link count */
+ ret = dict_get_int32 (xdata, GF_REQUEST_LINK_COUNT_XDATA,
+ &ctr_link_req);
+ if (ret) {
+ local->ctr_link_count_req = _gf_false;
+ ret = 0;
+ } else
+ local->ctr_link_count_req = _gf_true;
+
LOCK_INIT (&frame->lock);
STACK_WIND (frame, trash_unlink_stat_cbk,
- this->children->xlator,
- this->children->xlator->fops->stat, loc);
-
- return 0;
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
+out:
+ return ret;
}
+/**
+ * Use this when a failure occurs, and delete the newly created file
+ */
int32_t
trash_truncate_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- /* use this Function when a failure occurs, and
- delete the newly created file. */
- trash_local_t *local = NULL;
+ trash_local_t *local = NULL;
local = frame->local;
+ GF_VALIDATE_OR_GOTO ("trash", local, out);
if (op_ret == -1) {
gf_log (this->name, GF_LOG_DEBUG,
@@ -663,20 +1152,26 @@ trash_truncate_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STACK_WIND (frame, trash_common_unwind_buf_cbk,
FIRST_CHILD(this), FIRST_CHILD(this)->fops->truncate,
- &local->loc, local->fop_offset);
-
+ &local->loc, local->fop_offset, xdata);
+out:
return 0;
}
+/**
+ * Read from source file
+ */
int32_t
trash_truncate_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
struct iovec *vector, int32_t count,
- struct iatt *stbuf, struct iobref *iobuf)
+ struct iatt *stbuf, struct iobref *iobuf,
+ dict_t *xdata)
{
- trash_local_t *local = NULL;
+
+ trash_local_t *local = NULL;
local = frame->local;
+ GF_VALIDATE_OR_GOTO ("trash", local, out);
if (op_ret == -1) {
gf_log (this->name, GF_LOG_DEBUG,
@@ -685,28 +1180,34 @@ trash_truncate_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STACK_WIND (frame, trash_truncate_unlink_cbk,
FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink,
- &local->newloc);
+ &local->newloc, 0, xdata);
goto out;
}
local->fsize = stbuf->ia_size;
STACK_WIND (frame, trash_truncate_writev_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->writev,
- local->newfd, vector, count, local->cur_offset, iobuf);
+ local->newfd, vector, count, local->cur_offset, 0, iobuf,
+ xdata);
out:
return 0;
}
+/**
+ * Write to file created in trash directory
+ */
int32_t
trash_truncate_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
{
- trash_local_t *local = NULL;
+ trash_local_t *local = NULL;
local = frame->local;
+ GF_VALIDATE_OR_GOTO ("trash", local, out);
if (op_ret == -1) {
/* Let truncate work, but previous copy is not preserved. */
@@ -715,7 +1216,8 @@ trash_truncate_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
strerror (op_errno));
STACK_WIND (frame, trash_truncate_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, &local->newloc);
+ FIRST_CHILD(this)->fops->unlink, &local->newloc, 0,
+ xdata);
goto out;
}
@@ -725,7 +1227,7 @@ trash_truncate_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STACK_WIND (frame, trash_truncate_readv_cbk,
FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv,
local->fd, (size_t)GF_BLOCK_READV_SIZE,
- local->cur_offset);
+ local->cur_offset, 0, xdata);
goto out;
}
@@ -733,86 +1235,120 @@ trash_truncate_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
/* OOFH.....Finally calling Truncate. */
STACK_WIND (frame, trash_common_unwind_buf_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->truncate, &local->loc,
- local->fop_offset);
+ local->fop_offset, xdata);
out:
return 0;
}
-
-
+/**
+ * The source file is opened for reading and writing
+ */
int32_t
trash_truncate_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ dict_t *xdata)
{
- trash_local_t *local = NULL;
+ trash_local_t *local = NULL;
local = frame->local;
+ GF_VALIDATE_OR_GOTO ("trash", local, out);
if (op_ret == -1) {
- //Let truncate work, but previous copy is not preserved.
+ /* Let truncate work, but previous copy is not preserved. */
gf_log (this->name, GF_LOG_DEBUG,
"open on the existing file failed: %s",
strerror (op_errno));
STACK_WIND (frame, trash_truncate_unlink_cbk,
FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink,
- &local->newloc);
+ &local->newloc, 0, xdata);
goto out;
}
- local->cur_offset = local->fop_offset;
+ fd_bind (fd);
+
+ local->cur_offset = 0;
STACK_WIND (frame, trash_truncate_readv_cbk,
FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv,
- local->fd, (size_t)GF_BLOCK_READV_SIZE, local->cur_offset);
+ local->fd, (size_t)GF_BLOCK_READV_SIZE, local->cur_offset,
+ 0, xdata);
out:
return 0;
}
-
+/**
+ * Creates new file descriptor for read and write operations,
+ * if the path is present in trash directory
+ */
int32_t
trash_truncate_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, fd_t *fd,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- trash_local_t *local = NULL;
- char *tmp_str = NULL;
- char *dir_name = NULL;
- char *tmp_path = NULL;
- int32_t flags = 0;
- loc_t tmp_loc = {0,};
+ trash_local_t *local = NULL;
+ char *tmp_str = NULL;
+ char *dir_name = NULL;
+ char *tmp_path = NULL;
+ int32_t flags = 0;
+ loc_t tmp_loc = {0,};
+ char *tmp_stat = NULL;
+ char real_path[PATH_MAX] = {0,};
+ trash_private_t *priv = NULL;
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO ("trash", priv, out);
local = frame->local;
+ GF_VALIDATE_OR_GOTO ("trash", local, out);
+
+ /* Checks whether path is present in trash directory or not */
if ((op_ret == -1) && (op_errno == ENOENT)) {
- //Creating the directory structure here.
+ /* Creating the directory structure here. */
tmp_str = gf_strdup (local->newpath);
if (!tmp_str) {
gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ goto out;
}
dir_name = dirname (tmp_str);
tmp_path = gf_strdup (dir_name);
if (!tmp_path) {
gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ goto out;
+ }
+ loc_copy (&tmp_loc, &local->newloc);
+ tmp_loc.path = gf_strdup (tmp_path);
+ if (!tmp_loc.path) {
+ gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ goto out;
}
- tmp_loc.path = tmp_path;
+ strcpy (real_path, priv->brick_path);
+ remove_trash_path (tmp_path, (frame->root->pid < 0), &tmp_stat);
+ if (tmp_stat)
+ strcat (real_path, tmp_stat);
- /* TODO: create the directory with proper permissions */
+ TRASH_SET_PID (frame, local);
+
+ /* create the directory with proper permissions */
STACK_WIND_COOKIE (frame, trash_truncate_mkdir_cbk,
tmp_path, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mkdir,
- &tmp_loc, 0755);
- GF_FREE (tmp_str);
+ &tmp_loc, get_permission(real_path),
+ 0022, xdata);
+ loc_wipe (&tmp_loc);
goto out;
}
if (op_ret == -1) {
- //Let truncate work, but previous copy is not preserved.
- //Deleting the newly created copy.
+ /* Let truncate work, but previous copy is not preserved.
+ * Deleting the newly created copy.
+ */
gf_log (this->name, GF_LOG_DEBUG,
"creation of new file in trash-dir failed, "
"when truncate was called: %s", strerror (op_errno));
@@ -820,47 +1356,69 @@ trash_truncate_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STACK_WIND (frame, trash_common_unwind_buf_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->truncate, &local->loc,
- local->fop_offset);
+ local->fop_offset, xdata);
goto out;
}
+ fd_bind (fd);
flags = O_RDONLY;
+ /* fd which represents source file for reading and writing from it */
+
local->fd = fd_create (local->loc.inode, frame->root->pid);
STACK_WIND (frame, trash_truncate_open_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->open, &local->loc, flags,
local->fd, 0);
out:
+ if (tmp_str)
+ GF_FREE (tmp_str);
+ if (tmp_path)
+ GF_FREE (tmp_path);
+
return 0;
}
+/**
+ * If the path is not present in the trash directory,it will recursively call
+ * this call-back and one by one directories will be created from the
+ * beginning
+ */
int32_t
trash_truncate_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- trash_local_t *local = NULL;
- char *tmp_str = NULL;
- char *tmp_path = NULL;
- char *tmp_dirname = NULL;
- char *dir_name = NULL;
- int32_t count = 0;
- int32_t flags = 0;
- int32_t loop_count = 0;
- int i = 0;
- loc_t tmp_loc = {0,};
+ trash_local_t *local = NULL;
+ trash_private_t *priv = NULL;
+ char *tmp_str = NULL;
+ char *tmp_path = NULL;
+ char *tmp_dirname = NULL;
+ char *dir_name = NULL;
+ char *tmp_stat = NULL;
+ char real_path[PATH_MAX] = {0,};
+ size_t count = 0;
+ int32_t flags = 0;
+ int32_t loop_count = 0;
+ int i = 0;
+ loc_t tmp_loc = {0,};
+ int ret = 0;
- local = frame->local;
- if (!local)
- goto out;
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO ("trash", priv, out);
+
+ local = frame->local;
+ GF_VALIDATE_OR_GOTO ("trash", local, out);
loop_count = local->loop_count;
+ TRASH_UNSET_PID (frame, local);
+
tmp_str = gf_strdup (local->newpath);
if (!tmp_str) {
gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ ret = ENOMEM;
goto out;
}
@@ -875,16 +1433,42 @@ trash_truncate_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
break;
tmp_dirname = strchr (tmp_str + count + 1, '/');
}
- tmp_path = memdup (local->newpath, count);
+ tmp_path = gf_memdup (local->newpath, count + 1);
if (!tmp_path) {
gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
+ tmp_path[count] = '\0';
+
+ loc_copy (&tmp_loc, &local->newloc);
+ tmp_loc.path = gf_strdup (tmp_path);
+ if (!tmp_loc.path) {
+ gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ ret = ENOMEM;
+ goto out;
}
- tmp_loc.path = tmp_path;
- STACK_WIND_COOKIE (frame, trash_truncate_mkdir_cbk,
- tmp_path, this->children->xlator,
- this->children->xlator->fops->mkdir,
- &tmp_loc, 0755);
+ /* Stores the the name of directory to be created */
+ tmp_loc.name = gf_strdup (strrchr(tmp_path, '/') + 1);
+ if (!tmp_loc.name) {
+ gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
+ strcpy (real_path, priv->brick_path);
+ remove_trash_path (tmp_path, (frame->root->pid < 0), &tmp_stat);
+ if (tmp_stat)
+ strcat (real_path, tmp_stat);
+
+ TRASH_SET_PID (frame, local);
+
+ STACK_WIND_COOKIE (frame, trash_truncate_mkdir_cbk,
+ tmp_path, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir,
+ &tmp_loc, get_permission(real_path),
+ 0022, xdata);
+ loc_wipe (&tmp_loc);
goto out;
}
@@ -892,18 +1476,32 @@ trash_truncate_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
dir_name = dirname (tmp_str);
if (strcmp ((char*)cookie, dir_name) == 0) {
flags = O_CREAT|O_EXCL|O_WRONLY;
- ia_prot_t prot = {0, };
-
- //Call create again once directory structure is created.
+ strcpy (real_path, priv->brick_path);
+ strcat (real_path, local->origpath);
+ /* Call create again once directory structure
+ is created. */
STACK_WIND (frame, trash_truncate_create_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->create,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create,
&local->newloc, flags,
- st_mode_from_ia (prot, local->loc.inode->ia_type),
- local->newfd);
+ get_permission (real_path),
+ 0022, local->newfd, xdata);
goto out;
}
}
+ if ((op_ret == -1) && (op_errno != EEXIST)) {
+ gf_log (this->name, GF_LOG_ERROR, "Directory creation failed [%s]. "
+ "Therefore truncating %s without moving the "
+ "original copy to trash directory",
+ strerror(op_errno), local->loc.name);
+ STACK_WIND (frame, trash_common_unwind_buf_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, &local->loc,
+ local->fop_offset, xdata);
+ goto out;
+ }
+
LOCK (&frame->lock);
{
loop_count = ++local->loop_count;
@@ -915,87 +1513,174 @@ trash_truncate_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
count = tmp_dirname - tmp_str;
if (count == 0)
count = 1;
-
i++;
- if ((i > loop_count) || (count > PATH_MAX))
+ if (i > loop_count)
break;
tmp_dirname = strchr (tmp_str + count + 1, '/');
}
- tmp_path = memdup (local->newpath, count);
+ tmp_path = gf_memdup (local->newpath, count + 1);
if (!tmp_path) {
gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ ret = ENOMEM;
+ goto out;
}
- tmp_loc.path = tmp_path;
+ tmp_path[count] = '\0';
+
+ loc_copy (&tmp_loc, &local->newloc);
+ tmp_loc.path = gf_strdup (tmp_path);
+ if (!tmp_loc.path) {
+ gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
+
+ /* Stores the the name of directory to be created */
+ tmp_loc.name = gf_strdup (strrchr(tmp_path, '/') + 1);
+ if (!tmp_loc.name) {
+ gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ goto out;
+ }
+
+ strcpy (real_path, priv->brick_path);
+ remove_trash_path (tmp_path, (frame->root->pid < 0), &tmp_stat);
+ if (tmp_stat)
+ strcat (real_path, tmp_stat);
+
+ TRASH_SET_PID (frame, local);
STACK_WIND_COOKIE (frame, trash_truncate_mkdir_cbk, tmp_path,
- this->children->xlator,
- this->children->xlator->fops->mkdir,
- &tmp_loc, 0755);
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, &tmp_loc,
+ get_permission(real_path),
+ 0022, xdata);
out:
- GF_FREE (cookie); /* strdup (dir_name) was sent here :) */
if (tmp_str)
GF_FREE (tmp_str);
+ if (tmp_path)
+ GF_FREE (tmp_path);
- return 0;
+ return ret;
}
int32_t
trash_truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
- trash_private_t *priv = NULL;
- trash_local_t *local = NULL;
- struct tm *tm = NULL;
- char timestr[256] = {0,};
- char loc_newname[PATH_MAX] = {0,};
- time_t utime = 0;
- int32_t flags = 0;
+ trash_private_t *priv = NULL;
+ trash_local_t *local = NULL;
+ char loc_newname[PATH_MAX] = {0,};
+ int32_t flags = 0;
+ dentry_t *dir_entry = NULL;
+ inode_table_t *table = NULL;
+ int ret = 0;
priv = this->private;
+ GF_VALIDATE_OR_GOTO ("trash", priv, out);
+
local = frame->local;
+ GF_VALIDATE_OR_GOTO ("trash", local, out);
+
+ table = local->loc.inode->table;
+
+ pthread_mutex_lock (&table->lock);
+ {
+ dir_entry = __dentry_search_arbit (local->loc.inode);
+ }
+ pthread_mutex_unlock (&table->lock);
if (op_ret == -1) {
gf_log (this->name, GF_LOG_DEBUG,
"fstat on the file failed: %s",
strerror (op_errno));
- TRASH_STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
+ TRASH_STACK_UNWIND (truncate, frame, op_ret, op_errno, buf,
+ NULL, xdata);
+ goto out;
+ }
+
+ /* Only last hardlink will be moved to trash directory */
+ if (buf->ia_nlink > 1) {
+ STACK_WIND (frame, trash_common_unwind_buf_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate,
+ &local->loc, local->fop_offset, xdata);
+ goto out;
}
- if ((buf->ia_size == 0) || (buf->ia_size > priv->max_trash_file_size)) {
- // If the file is too big, just unlink it.
- if (buf->ia_size > priv->max_trash_file_size)
- gf_log (this->name, GF_LOG_DEBUG, "%s: file too big, "
- "not moving to trash", local->loc.path);
+ /**
+ * If the file is too big or if it is extended truncate,
+ * just don't move it to trash directory.
+ */
+ if (buf->ia_size > (priv->max_trash_file_size) ||
+ buf->ia_size <= local->fop_offset) {
+ gf_log (this->name, GF_LOG_DEBUG, "%s: not moving to trash , "
+ "having inappropiate file size", local->loc.path);
STACK_WIND (frame, trash_common_unwind_buf_cbk,
- this->children->xlator,
- this->children->xlator->fops->truncate,
- &local->loc, local->fop_offset);
- return 0;
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate,
+ &local->loc, local->fop_offset, xdata);
+ goto out;
+ }
+
+ /* Retrives the name of file from path */
+ local->loc.name = gf_strdup (strrchr (local->loc.path, '/'));
+ if (!local->loc.name) {
+ gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ goto out;
}
- strcpy (local->newpath, priv->trash_dir);
+ /* Stores new path for source file */
+ copy_trash_path (priv->newtrash_dir, (frame->root->pid < 0),
+ local->newpath);
strcat (local->newpath, local->loc.path);
- {
- utime = time (NULL);
- tm = localtime (&utime);
- strftime (timestr, 256, ".%Y-%m-%d-%H%M%S", tm);
- strcat (local->newpath, timestr);
+ /* append timestamp to file name so that we can avoid
+ name collisions inside trash */
+ append_time_stamp (local->newpath);
+ if (strlen (local->newpath) > PATH_MAX) {
+ STACK_WIND (frame, trash_common_unwind_buf_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate,
+ &local->loc, local->fop_offset, xdata);
+ goto out;
}
- strcpy (loc_newname,local->loc.name);
- strcat (loc_newname,timestr);
+ strcpy (loc_newname, local->loc.name);
+ append_time_stamp (loc_newname);
+ /* local->newloc represents old file(file inside trash),
+ where as local->loc represents truncated file. We need
+ to create new inode and fd for new file*/
local->newloc.name = gf_strdup (loc_newname);
+ if (!local->newloc.name) {
+ gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
local->newloc.path = gf_strdup (local->newpath);
+ if (!local->newloc.path) {
+ gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
local->newloc.inode = inode_new (local->loc.inode->table);
- local->newloc.ino = local->newloc.inode->ino;
local->newfd = fd_create (local->newloc.inode, frame->root->pid);
+ /* Creating vaild parent and pargfids for both files */
+
+ if (dir_entry == NULL) {
+ ret = EINVAL;
+ goto out;
+ }
+ local->loc.parent = inode_ref (dir_entry->parent);
+ gf_uuid_copy (local->loc.pargfid, dir_entry->parent->gfid);
+
+ local->newloc.parent = inode_ref (dir_entry->parent);
+ gf_uuid_copy (local->newloc.pargfid, dir_entry->parent->gfid);
+
flags = O_CREAT|O_EXCL|O_WRONLY;
STACK_WIND (frame, trash_truncate_create_cbk,
@@ -1003,565 +1688,883 @@ trash_truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
FIRST_CHILD(this)->fops->create,
&local->newloc, flags,
st_mode_from_ia (buf->ia_prot, local->loc.inode->ia_type),
- local->newfd);
+ 0022, local->newfd, xdata);
- return 0;
+out:
+ return ret;
}
+/**
+ * Truncate can be explicitly called or implicitly by some other applications
+ * like text editors etc..
+ */
int32_t
trash_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc,
- off_t offset)
+ off_t offset, dict_t *xdata)
{
- trash_elim_pattern_t *trav = NULL;
- trash_private_t *priv = NULL;
- trash_local_t *local = NULL;
- int32_t match = 0;
+ trash_private_t *priv = NULL;
+ trash_local_t *local = NULL;
+ int32_t match = 0;
+ char *pathbuf = NULL;
+ int ret = 0;
priv = this->private;
- if (priv->eliminate) {
- trav = priv->eliminate;
- while (trav) {
- if (fnmatch(trav->pattern, loc->name, 0) == 0) {
- match++;
- break;
- }
- trav = trav->next;
- }
+ GF_VALIDATE_OR_GOTO ("trash", priv, out);
+ /* If trash is not active or not enabled through cli, then
+ * we bypass and wind back
+ */
+ if (!priv->state) {
+ STACK_WIND (frame, trash_common_unwind_buf_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc,
+ offset, xdata);
+ goto out;
+ }
+
+ /* The files removed by gluster operations such as self-heal,
+ should moved to trash directory, but files by client should
+ not moved */
+ if ((frame->root->pid < 0) && !priv->internal) {
+ STACK_WIND (frame, trash_common_unwind_buf_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc,
+ offset, xdata);
+ goto out;
}
+ /* This will be more accurate */
+ inode_path(loc->inode, NULL, &pathbuf);
- if ((strncmp (loc->path, priv->trash_dir,
- strlen (priv->trash_dir)) == 0) || (offset) || (match)) {
+ /* Checks whether file is in trash directory or eliminate path.
+ * In all such cases it does not move to trash directory,
+ * truncate will be performed
+ */
+ match = check_whether_eliminate_path (priv->eliminate, pathbuf);
+
+ if ((strncmp (pathbuf, priv->newtrash_dir,
+ strlen (priv->newtrash_dir)) == 0) || (match)) {
if (match) {
gf_log (this->name, GF_LOG_DEBUG,
"%s: file not moved to trash as per option "
- "'eliminate'", loc->path);
+ "'eliminate path'", loc->path);
}
- // Trying to truncate from the trash can dir,
- // do the actual truncate without moving to trash-dir.
+ /* Trying to truncate from the trash-dir. So do the
+ * actual truncate without moving to trash-dir.
+ */
STACK_WIND (frame, trash_common_unwind_buf_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate, loc, offset);
+ FIRST_CHILD(this)->fops->truncate, loc, offset,
+ xdata);
goto out;
}
LOCK_INIT (&frame->lock);
- local = GF_CALLOC (1, sizeof (trash_local_t),
- gf_trash_mt_trash_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
gf_log (this->name, GF_LOG_DEBUG, "out of memory");
- TRASH_STACK_UNWIND (frame, -1, ENOMEM, NULL);
- return 0;
+ TRASH_STACK_UNWIND (truncate, frame, -1, ENOMEM, NULL, NULL,
+ xdata);
+ ret = ENOMEM;
+ goto out;
}
- loc_copy (&local->loc, loc);
+ strcpy (local->origpath, pathbuf);
+ loc_copy (&local->loc, loc);
+ local->loc.path = pathbuf;
local->fop_offset = offset;
frame->local = local;
STACK_WIND (frame, trash_truncate_stat_cbk,
- this->children->xlator,
- this->children->xlator->fops->stat, loc);
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc,
+ xdata);
out:
- return 0;
+ return ret;
}
+/**
+ * When we call truncate from terminal it comes to ftruncate of trash-xlator.
+ * Since truncate internally calls ftruncate and we receive fd of the file,
+ * other than that it also called by Rebalance operation
+ */
int32_t
-trash_ftruncate_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent)
+trash_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
- trash_local_t *local = NULL;
-
- local = frame->local;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: failed to unlink new file: %s",
- local->newloc.path, strerror(op_errno));
+ trash_private_t *priv = NULL;
+ trash_local_t *local = NULL;/* file inside trash */
+ char *pathbuf = NULL;/* path of file from fd */
+ int32_t retval = 0;
+ int32_t match = 0;
+ int ret = 0;
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO ("trash", priv, out);
+ /* If trash is not active or not enabled through cli, then
+ * we bypass and wind back
+ */
+ if (!priv->state) {
+ STACK_WIND (frame, trash_common_unwind_buf_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd,
+ offset, xdata);
+ goto out;
}
- STACK_WIND (frame, trash_common_unwind_buf_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->ftruncate,
- local->fd, local->fop_offset);
+ /* The files removed by gluster operations such as self-heal,
+ * should moved to trash directory, but files by client
+ * should not moved
+ */
+ if ((frame->root->pid < 0) && !priv->internal) {
+ STACK_WIND (frame, trash_common_unwind_buf_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd,
+ offset, xdata);
+ goto out;
+ }
+ /* This will be more accurate */
+ retval = inode_path (fd->inode, NULL, &pathbuf);
- return 0;
-}
+ /* Checking the eliminate path */
-int32_t
-trash_ftruncate_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
-{
- trash_local_t *local = NULL;
+ /* Checks whether file is trash directory or eliminate path or
+ * invalid fd. In all such cases it does not move to trash directory,
+ * ftruncate will be performed
+ */
+ match = check_whether_eliminate_path (priv->eliminate, pathbuf);
+ if ((strncmp (pathbuf, priv->newtrash_dir,
+ strlen (priv->newtrash_dir)) == 0) || match ||
+ !retval) {
- local = frame->local;
+ if (match) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s: file matches eliminate path, "
+ "not moved to trash", pathbuf);
+ }
- if (op_ret == -1) {
- STACK_WIND (frame, trash_ftruncate_unlink_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink,
- &local->newloc);
- return 0;
+ /* Trying to ftruncate from the trash-dir. So do the
+ * actual ftruncate without moving to trash-dir
+ */
+ STACK_WIND (frame, trash_common_unwind_buf_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate,
+ fd, offset, xdata);
+ goto out;
}
- if (local->cur_offset < local->fsize) {
- local->cur_offset += GF_BLOCK_READV_SIZE;
- STACK_WIND (frame, trash_ftruncate_readv_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv,
- local->fd, (size_t)GF_BLOCK_READV_SIZE,
- local->cur_offset);
- return 0;
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ TRASH_STACK_UNWIND (ftruncate, frame, -1, ENOMEM, NULL,
+ NULL, xdata);
+ ret = -1;
+ goto out;
}
- STACK_WIND (frame, trash_common_unwind_buf_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate, local->fd,
- local->fop_offset);
+ strcpy (local->origpath, pathbuf);
- return 0;
-}
+ /* To convert fd to location */
+ frame->local=local;
+
+ local->loc.path = pathbuf;
+ local->loc.inode = inode_ref (fd->inode);
+ gf_uuid_copy (local->loc.gfid, local->loc.inode->gfid);
+ local->fop_offset = offset;
+ /* Else remains same to truncate code, so from here flow goes
+ * to truncate_stat
+ */
+ STACK_WIND (frame, trash_truncate_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+out:
+ return ret;
+}
+
+/**
+ * The mkdir call is intercepted to avoid creation of
+ * trash directory in the mount by the user
+ */
int32_t
-trash_ftruncate_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iovec *vector, int32_t count,
- struct iatt *stbuf, struct iobref *iobuf)
+trash_mkdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
{
- trash_local_t *local = NULL;
-
- local = frame->local;
- local->fsize = stbuf->ia_size;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+ trash_private_t *priv = NULL;
- if (op_ret == -1) {
- STACK_WIND (frame, trash_ftruncate_unlink_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink,
- &local->newloc);
- return 0;
- }
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO ("trash", priv, out);
- STACK_WIND (frame, trash_ftruncate_writev_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev,
- local->newfd, vector, count, local->cur_offset, NULL);
+ if (!check_whether_trash_directory (loc->path, priv->newtrash_dir)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "mkdir issued on %s, which is not permitted",
+ priv->newtrash_dir);
+ op_errno = EPERM;
+ op_ret = -1;
+ STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno,
+ NULL, NULL, NULL, NULL, xdata);
+ } else {
+ STACK_WIND (frame, trash_common_mkdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
+ }
+out:
return 0;
}
-
-int32_t
-trash_ftruncate_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd,
- inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+/**
+ * The rename call is intercepted to avoid renaming
+ * of trash directory in the mount by the user
+ */
+int
+trash_rename (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
- trash_local_t *local = NULL;
- char *tmp_str = NULL;
- char *dir_name = NULL;
- char *tmp_path = NULL;
- loc_t tmp_loc = {0,};
-
- local = frame->local;
-
- if ((op_ret == -1) && (op_errno == ENOENT)) {
- tmp_str = gf_strdup (local->newpath);
- if (!tmp_str) {
- gf_log (this->name, GF_LOG_DEBUG, "out of memory");
- }
- dir_name = dirname (tmp_str);
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+ trash_private_t *priv = NULL;
- tmp_path = gf_strdup (dir_name);
- if (!tmp_path) {
- gf_log (this->name, GF_LOG_DEBUG, "out of memory");
- }
- tmp_loc.path = tmp_path;
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO ("trash", priv, out);
- /* TODO: create the directory with proper permissions */
- STACK_WIND_COOKIE (frame, trash_truncate_mkdir_cbk,
- tmp_path, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mkdir,
- &tmp_loc, 0755);
- GF_FREE (tmp_str);
- return 0;
- }
+ if (!check_whether_trash_directory (oldloc->path, priv->newtrash_dir)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "rename issued on %s, which is not permitted",
+ priv->newtrash_dir);
+ op_errno = EPERM;
+ op_ret = -1;
- if (op_ret == -1) {
- STACK_WIND (frame, trash_common_unwind_buf_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate,
- local->fd, local->fop_offset);
- return 0;
+ STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, NULL,
+ NULL, NULL, NULL, NULL, xdata);
+ } else {
+ STACK_WIND (frame, trash_common_rename_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
}
-
- STACK_WIND (frame, trash_ftruncate_readv_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv, local->fd,
- (size_t)GF_BLOCK_READV_SIZE, local->cur_offset);
-
+out:
return 0;
}
-
+/**
+ * The rmdir call is intercepted to avoid deletion of
+ * trash directory in the mount by the user
+ */
int32_t
-trash_ftruncate_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent)
+trash_rmdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int flags, dict_t *xdata)
{
- trash_local_t *local = NULL;
- char *tmp_str = NULL;
- char *tmp_path = NULL;
- char *tmp_dirname = NULL;
- char *dir_name = NULL;
- int32_t count = 0;
- int32_t flags = 0;
- int32_t loop_count = 0;
- int i = 0;
- loc_t tmp_loc = {0,};
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+ trash_private_t *priv = NULL;
- local = frame->local;
- if (!local)
- goto out;
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO ("trash", priv, out);
- loop_count = local->loop_count;
+ if (!check_whether_trash_directory (loc->path, priv->newtrash_dir)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "rmdir issued on %s, which is not permitted",
+ priv->newtrash_dir);
+ op_errno = EPERM;
+ op_ret = -1;
- tmp_str = gf_strdup (local->newpath);
- if (!tmp_str) {
- gf_log (this->name, GF_LOG_DEBUG, "out of memory");
- goto out;
+ STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno,
+ NULL, NULL, xdata);
+ } else {
+ STACK_WIND (frame, trash_common_rmdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir, loc, flags, xdata);
}
+out:
+ return 0;
+}
- if ((op_ret == -1) && (op_errno == ENOENT)) {
- tmp_dirname = strchr (tmp_str, '/');
- while (tmp_dirname) {
- count = tmp_dirname - tmp_str;
- if (count == 0)
- count = 1;
- i++;
- if (i > loop_count)
- break;
- tmp_dirname = strchr (tmp_str + count + 1, '/');
- }
- tmp_path = memdup (local->newpath, count);
- if (!tmp_path) {
- gf_log (this->name, GF_LOG_DEBUG, "out of memory");
- }
- tmp_loc.path = tmp_path;
- STACK_WIND_COOKIE (frame, trash_ftruncate_mkdir_cbk,
- tmp_path, this->children->xlator,
- this->children->xlator->fops->mkdir,
- &tmp_loc, 0755);
+/**
+ * Volume set option is handled by the reconfigure funtion.
+ * Here we checks whether each option is set or not ,if it
+ * sets then corresponding modifciations will be made
+ */
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ uint64_t max_fsize = 0;
+ int ret = 0;
+ char *tmp = NULL;
+ char *tmp_str = NULL;
+ trash_private_t *priv = NULL;
+ loc_t old_loc = {0, };
+ loc_t new_loc = {0, };
+ call_frame_t *frame = NULL;
+ char trash_dir[PATH_MAX] = {0,};
- goto out;
- }
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO ("trash", priv, out);
+
+ GF_OPTION_RECONF ("trash", priv->state, options, bool, out);
+
+ GF_OPTION_RECONF ("trash-dir", tmp, options, str, out);
+ if (tmp) {
+ sprintf(trash_dir, "/%s/", tmp);
+ if (strcmp(priv->newtrash_dir, trash_dir) != 0) {
+
+ /* When user set a new name for trash directory, trash
+ * xlator will perform a rename operation on old trash
+ * directory to the new one using a STACK_WIND from here.
+ * This option can be configured only when volume is in
+ * started state
+ */
+
+ GF_FREE (priv->newtrash_dir);
+
+ priv->newtrash_dir = gf_strdup (trash_dir);
+ if (!priv->newtrash_dir) {
+ ret = ENOMEM;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "out of memory");
+ goto out;
+ }
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Renaming %s -> %s from reconfigure",
+ priv->oldtrash_dir, priv->newtrash_dir);
+
+ if (!priv->newtrash_dir) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
+ frame = create_frame (this, this->ctx->pool);
+ if (frame == NULL) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create frame");
+ ret = ENOMEM;
+ goto out;
+ }
- if (op_ret == 0) {
- dir_name = dirname (tmp_str);
- if (strcmp ((char*)cookie, dir_name) == 0) {
- ia_prot_t prot = {0, };
- flags = O_CREAT|O_EXCL|O_WRONLY;
+ /* assign new location values to new_loc members */
+ gf_uuid_copy (new_loc.gfid, trash_gfid);
+ gf_uuid_copy (new_loc.pargfid, root_gfid);
+ ret = extract_trash_directory (priv->newtrash_dir,
+ &new_loc.name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "out of memory");
+ goto out;
+ }
+ new_loc.path = gf_strdup (priv->newtrash_dir);
+ if (!new_loc.path) {
+ ret = ENOMEM;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "out of memory");
+ goto out;
+ }
+
+ /* assign old location values to old_loc members */
+ gf_uuid_copy (old_loc.gfid, trash_gfid);
+ gf_uuid_copy (old_loc.pargfid, root_gfid);
+ ret = extract_trash_directory (priv->oldtrash_dir,
+ &old_loc.name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "out of memory");
+ goto out;
+ }
+ old_loc.path = gf_strdup (priv->oldtrash_dir);
+ if (!old_loc.path) {
+ ret = ENOMEM;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "out of memory");
+ goto out;
+ }
- //Call create again once directory structure is created.
- STACK_WIND (frame, trash_ftruncate_create_cbk,
+ old_loc.inode = inode_ref (priv->trash_inode);
+ gf_uuid_copy(old_loc.inode->gfid, old_loc.gfid);
+
+ STACK_WIND (frame, trash_reconf_rename_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create,
- &local->newloc, flags,
- st_mode_from_ia (prot, local->loc.inode->ia_type),
- local->newfd);
- goto out;
+ FIRST_CHILD(this)->fops->rename,
+ &old_loc, &new_loc, options);
+ GF_FREE (priv->oldtrash_dir);
+
+ priv->oldtrash_dir = gf_strdup(priv->newtrash_dir);
+ if (!priv->oldtrash_dir) {
+ ret = ENOMEM;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "out of memory");
+ goto out;
+ }
}
}
+ tmp = NULL;
- LOCK (&frame->lock);
- {
- loop_count = ++local->loop_count;
- }
- UNLOCK (&frame->lock);
- tmp_dirname = strchr (tmp_str, '/');
- while (tmp_dirname) {
- count = tmp_dirname - tmp_str;
- if (count == 0)
- count = 1;
+ GF_OPTION_RECONF ("trash-internal-op", priv->internal, options,
+ bool, out);
- i++;
- if ((i > loop_count) || (count > PATH_MAX))
- break;
- tmp_dirname = strchr (tmp_str + count + 1, '/');
- }
- tmp_path = memdup (local->newpath, count);
- if (!tmp_path) {
- gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ GF_OPTION_RECONF ("trash-max-filesize", max_fsize, options,
+ size_uint64, out);
+ if (max_fsize) {
+ if (max_fsize > GF_ALLOWED_MAX_FILE_SIZE) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Size specified for max-size(in MB) is too "
+ "large so using 1GB as max-size (NOT IDEAL)");
+ priv->max_trash_file_size = GF_ALLOWED_MAX_FILE_SIZE;
+ } else
+ priv->max_trash_file_size = max_fsize;
+ gf_log (this->name, GF_LOG_DEBUG, "%"GF_PRI_SIZET" max-size",
+ priv->max_trash_file_size);
}
- tmp_loc.path = tmp_path;
+ GF_OPTION_RECONF ("trash-eliminate-path", tmp, options, str, out);
+ if (!tmp) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no option specified for 'eliminate', using NULL");
+ } else {
+ if (priv->eliminate)
+ wipe_eliminate_path (&priv->eliminate);
+
+ tmp_str = gf_strdup (tmp);
+ if (!tmp_str) {
+ gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
+ ret = store_eliminate_path (tmp_str, &priv->eliminate);
- STACK_WIND_COOKIE (frame, trash_ftruncate_mkdir_cbk, tmp_path,
- this->children->xlator,
- this->children->xlator->fops->mkdir,
- &tmp_loc, 0755);
+ }
out:
- GF_FREE (cookie); /* strdup (dir_name) was sent here :) */
if (tmp_str)
GF_FREE (tmp_str);
+ loc_wipe (&new_loc);
+ loc_wipe (&old_loc);
- return 0;
+ return ret;
}
-
-int32_t
-trash_ftruncate_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+/**
+ * Notify is used to create the trash directory with fixed gfid
+ * using STACK_WIND only when posix xlator is up
+ */
+int
+notify (xlator_t *this, int event, void *data, ...)
{
- trash_private_t *priv = NULL;
- trash_local_t *local = NULL;
-
- priv = this->private;
- local = frame->local;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: %s",local->newloc.path, strerror(op_errno));
+ trash_private_t *priv = NULL;
+ dict_t *dict = NULL;
+ int ret = 0;
+ uuid_t *tgfid_ptr = NULL;
+ loc_t loc = {0, };
+ loc_t old_loc = {0, };
+ call_frame_t *frame = NULL;
- TRASH_STACK_UNWIND (frame, -1, op_errno, buf, NULL);
- return 0;
- }
- if ((buf->ia_size == 0) || (buf->ia_size > priv->max_trash_file_size))
- {
- STACK_WIND (frame, trash_common_unwind_buf_cbk,
- this->children->xlator,
- this->children->xlator->fops->ftruncate,
- local->fd, local->fop_offset);
- return 0;
- }
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO ("trash", priv, out);
+
+ /* Check whether posix is up not */
+ if (event == GF_EVENT_CHILD_UP) {
+ frame = create_frame(this, this->ctx->pool);
+ if (frame == NULL) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create frame");
+ ret = ENOMEM;
+ goto out;
+ }
+ dict = dict_new ();
+ if (!dict) {
+ ret = ENOMEM;
+ goto out;
+ }
+ priv->trash_itable = inode_table_new (0, this);
+
+ /* Here there is two possiblities ,if trash directory already
+ * exist ,then we need to perform a rename operation on the
+ * old one. Otherwise, we need to create the trash directory
+ * For both, we need to pass location variable, gfid of parent
+ * and a frame for calling STACK_WIND.The location variable
+ * requires name,path,gfid and inode
+ */
+ if (!priv->oldtrash_dir) {
+ loc.inode = inode_new (priv->trash_itable);
+ gf_uuid_copy (loc.gfid, trash_gfid);
- STACK_WIND (frame, trash_ftruncate_create_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create, &local->newloc,
- ( O_CREAT | O_EXCL | O_WRONLY ),
- st_mode_from_ia (buf->ia_prot, local->loc.inode->ia_type),
- local->newfd);
+ gf_log (this->name, GF_LOG_DEBUG, "nameless lookup for"
+ "old trash directory");
+ STACK_WIND (frame, trash_notify_lookup_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup,
+ &loc, dict);
+ gf_log (this->name, GF_LOG_DEBUG, "old_trash_dir %s",
+ priv->oldtrash_dir);
+ loc_wipe (&loc);
+ }
- return 0;
-}
+ if (priv->oldtrash_dir == NULL) {
+ ret = EINVAL;
+ goto out;
+ }
+ if (strcmp (priv->oldtrash_dir, priv->newtrash_dir) == 0) {
+ gf_log (this->name, GF_LOG_DEBUG, "Creating trash "
+ "directory %s from notify",
+ priv->newtrash_dir);
+
+ tgfid_ptr = GF_CALLOC (1, sizeof(uuid_t),
+ gf_common_mt_uuid_t);
+ if (!tgfid_ptr) {
+ ret = ENOMEM;
+ goto out;
+ }
+ gf_uuid_copy (*tgfid_ptr, trash_gfid);
+
+ gf_uuid_copy (loc.gfid, trash_gfid);
+ gf_uuid_copy (loc.pargfid, root_gfid);
+ ret = extract_trash_directory (priv->newtrash_dir,
+ &loc.name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "out of memory");
+ goto out;
+ }
+ loc.path = gf_strdup (priv->newtrash_dir);
+ if (!loc.path) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
-int32_t
-trash_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
-{
- trash_elim_pattern_t *trav = NULL;
- trash_private_t *priv = NULL;
- trash_local_t *local = NULL;
- dentry_t *dir_entry = NULL;
- struct tm *tm = NULL;
- char *pathbuf = NULL;
- inode_t *newinode = NULL;
- time_t utime = 0;
- char timestr[256];
- int32_t retval = 0;
- int32_t match = 0;
+ priv->trash_inode = inode_new (priv->trash_itable);
+ priv->trash_inode->ia_type = IA_IFDIR;
+ loc.inode = inode_ref (priv->trash_inode);
+
+ /* Fixed gfid is set for trash directory with
+ * this function
+ */
+ ret = dict_set_dynptr (dict, "gfid-req", tgfid_ptr,
+ sizeof (uuid_t));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "setting key gfid-req failed");
+ goto out;
+ }
- priv = this->private;
+ /* The mkdir call for creating trash directory */
+ STACK_WIND (frame, trash_notify_mkdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, &loc, 0755,
+ 0022, dict);
+ } else {
+ /* assign new location values to new_loc members */
+ gf_log (this->name, GF_LOG_DEBUG, "Renaming %s -> %s"
+ " from notify", priv->oldtrash_dir,
+ priv->newtrash_dir);
+ gf_uuid_copy (loc.gfid, trash_gfid);
+ gf_uuid_copy (loc.pargfid, root_gfid);
+ ret = extract_trash_directory (priv->newtrash_dir,
+ &loc.name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "out of memory");
+ goto out;
+ }
+ loc.path = gf_strdup (priv->newtrash_dir);
+ if (!loc.path) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
+ /* assign old location values to old_loc members */
+ gf_uuid_copy (old_loc.gfid, trash_gfid);
+ gf_uuid_copy (old_loc.pargfid, root_gfid);
+ ret = extract_trash_directory (priv->oldtrash_dir,
+ &old_loc.name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "out of memory");
+ goto out;
+ }
+ old_loc.path = gf_strdup (priv->oldtrash_dir);
+ if (!old_loc.path) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
- dir_entry = __dentry_search_arbit (fd->inode);
- retval = inode_path (fd->inode, NULL, &pathbuf);
+ old_loc.inode = inode_ref (priv->trash_inode);
+ gf_uuid_copy(old_loc.inode->gfid, old_loc.gfid);
- if (priv->eliminate) {
- trav = priv->eliminate;
- while (trav) {
- if (fnmatch(trav->pattern, dir_entry->name, 0) == 0) {
- match++;
- break;
+ STACK_WIND (frame, trash_notify_rename_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename,
+ &old_loc, &loc, dict);
+ GF_FREE (priv->oldtrash_dir);
+
+ priv->oldtrash_dir = gf_strdup(priv->newtrash_dir);
+ if (!priv->oldtrash_dir) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "out of memory");
+ ret = ENOMEM;
+ goto out;
}
- trav = trav->next;
}
+ } else {
+ ret = default_notify (this, event, data);
+ if (ret)
+ gf_log (this->name, GF_LOG_INFO,
+ "default notify event failed");
}
- if ((strncmp (pathbuf, priv->trash_dir,
- strlen (priv->trash_dir)) == 0) ||
- (offset >= priv->max_trash_file_size) ||
- (!retval) ||
- match) {
- STACK_WIND (frame, trash_common_unwind_buf_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate,
- fd, offset);
- return 0;
- }
-
- local = GF_CALLOC (1, sizeof (trash_local_t),
- gf_trash_mt_trash_local_t);
- if (!local) {
- gf_log (this->name, GF_LOG_DEBUG, "out of memory");
- TRASH_STACK_UNWIND (frame, -1, ENOMEM, NULL, NULL);
- return 0;
- }
-
- utime = time (NULL);
- tm = localtime (&utime);
- strftime (timestr, 256, ".%Y-%m-%d-%H%M%S", tm);
-
- strcpy (local->newpath, priv->trash_dir);
- strcat (local->newpath, pathbuf);
- strcat (local->newpath, timestr);
-
- local->fd = fd_ref (fd);
- newinode = inode_new (fd->inode->table);
- local->newfd = fd_create (newinode, frame->root->pid);
- frame->local=local;
-
- local->newloc.inode = newinode;
- local->newloc.path = local->newpath;
-
- local->loc.inode = inode_ref (fd->inode);
- local->loc.ino = fd->inode->ino;
- local->loc.path = pathbuf;
+out:
+ if (ret && tgfid_ptr)
+ GF_FREE (tgfid_ptr);
+ if (dict)
+ dict_unref (dict);
+ loc_wipe (&loc);
+ loc_wipe (&old_loc);
+
+ return ret;
+}
- local->fop_offset = offset;
- local->cur_offset = offset;
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
- STACK_WIND (frame, trash_ftruncate_fstat_cbk, this->children->xlator,
- this->children->xlator->fops->fstat, fd);
+ GF_VALIDATE_OR_GOTO ("trash", this, out);
- return 0;
+ ret = xlator_mem_acct_init (this, gf_trash_mt_end + 1);
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+ return ret;
+ }
+out:
+ return ret;
}
/**
- * trash_init -
+ * trash_init
*/
int32_t
init (xlator_t *this)
{
- int32_t ret = 0;
- data_t *data = NULL;
- trash_private_t *_priv = NULL;
- trash_elim_pattern_t *trav = NULL;
- char *tmp_str = NULL;
- char *strtokptr = NULL;
- char *component = NULL;
- char trash_dir[PATH_MAX] = {0,};
- uint64_t max_trash_file_size64 = 0;
-
- /* Create .trashcan directory in init */
+ trash_private_t *priv = NULL;
+ int ret = -1;
+ char *tmp = NULL;
+ char *tmp_str = NULL;
+ char trash_dir[PATH_MAX] = {0,};
+ uint64_t max_trash_file_size64 = 0;
+ data_t *data = NULL;
+
+ GF_VALIDATE_OR_GOTO ("trash", this, out);
+
if (!this->children || this->children->next) {
gf_log (this->name, GF_LOG_ERROR,
"not configured with exactly one child. exiting");
- return -1;
+ ret = -1;
+ goto out;
}
if (!this->parents) {
gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
+ "dangling volume. check volfile");
}
- _priv = GF_CALLOC (1, sizeof (*_priv), gf_trash_mt_trash_private_t);
- if (!_priv) {
+ priv = GF_CALLOC (1, sizeof (*priv), gf_trash_mt_trash_private_t);
+ if (!priv) {
gf_log (this->name, GF_LOG_ERROR, "out of memory");
- return -1;
+ ret = ENOMEM;
+ goto out;
}
- data = dict_get (this->options, "trash-dir");
- if (!data) {
- gf_log (this->name, GF_LOG_NORMAL,
+ /* Trash priv data members are initialized through the following
+ * set of statements
+ */
+ GF_OPTION_INIT ("trash", priv->state, bool, out);
+
+ GF_OPTION_INIT ("trash-dir", tmp, str, out);
+
+ /* We store trash dir value as path for easier manipulation*/
+ if (!tmp) {
+ gf_log (this->name, GF_LOG_INFO,
"no option specified for 'trash-dir', "
"using \"/.trashcan/\"");
- _priv->trash_dir = gf_strdup ("/.trashcan");
+ priv->newtrash_dir = gf_strdup ("/.trashcan/");
+ if (!priv->newtrash_dir) {
+ ret = ENOMEM;
+ gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ goto out;
+ }
} else {
- /* Need a path with '/' as the first char, if not
- given, append it */
- if (data->data[0] == '/') {
- _priv->trash_dir = gf_strdup (data->data);
- } else {
- /* TODO: Make sure there is no ".." in the path */
- strcpy (trash_dir, "/");
- strcat (trash_dir, data->data);
- _priv->trash_dir = gf_strdup (trash_dir);
+ sprintf(trash_dir, "/%s/", tmp);
+ priv->newtrash_dir = gf_strdup (trash_dir);
+ if (!priv->newtrash_dir) {
+ ret = ENOMEM;
+ gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ goto out;
}
}
+ tmp = NULL;
- data = dict_get (this->options, "eliminate-pattern");
- if (!data) {
- gf_log (this->name, GF_LOG_TRACE,
+ GF_OPTION_INIT ("trash-eliminate-path", tmp, str, out);
+ if (!tmp) {
+ gf_log (this->name, GF_LOG_INFO,
"no option specified for 'eliminate', using NULL");
} else {
- tmp_str = gf_strdup (data->data);
+ tmp_str = gf_strdup (tmp);
if (!tmp_str) {
- gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory");
+ ret = ENOMEM;
+ goto out;
}
+ ret = store_eliminate_path (tmp_str, &priv->eliminate);
- /* Match Filename to option specified in eliminate. */
- component = strtok_r (tmp_str, "|", &strtokptr);
- while (component) {
- trav = GF_CALLOC (1, sizeof (*trav),
- gf_trash_mt_trash_elim_pattern_t);
- if (!trav) {
- gf_log (this->name, GF_LOG_DEBUG, "out of memory");
- break;
- }
- trav->pattern = component;
- trav->next = _priv->eliminate;
- _priv->eliminate = trav;
-
- component = strtok_r (NULL, "|", &strtokptr);
- }
}
+ tmp = NULL;
- /* TODO: do gf_string2sizet () */
- data = dict_get (this->options, "max-trashable-file-size");
- if (!data) {
- gf_log (this->name, GF_LOG_DEBUG,
+ GF_OPTION_INIT ("trash-max-filesize", max_trash_file_size64,
+ size_uint64, out);
+ if (!max_trash_file_size64) {
+ gf_log (this->name, GF_LOG_ERROR,
"no option specified for 'max-trashable-file-size', "
"using default = %lld MB",
GF_DEFAULT_MAX_FILE_SIZE / GF_UNIT_MB);
- _priv->max_trash_file_size = GF_DEFAULT_MAX_FILE_SIZE;
+ priv->max_trash_file_size = GF_DEFAULT_MAX_FILE_SIZE;
} else {
- ret = gf_string2bytesize (data->data,
- &max_trash_file_size64);
if( max_trash_file_size64 > GF_ALLOWED_MAX_FILE_SIZE ) {
gf_log (this->name, GF_LOG_DEBUG,
"Size specified for max-size(in MB) is too "
"large so using 1GB as max-size (NOT IDEAL)");
- _priv->max_trash_file_size = GF_ALLOWED_MAX_FILE_SIZE;
+ priv->max_trash_file_size = GF_ALLOWED_MAX_FILE_SIZE;
} else
- _priv->max_trash_file_size = max_trash_file_size64;
+ priv->max_trash_file_size = max_trash_file_size64;
gf_log (this->name, GF_LOG_DEBUG, "%"GF_PRI_SIZET" max-size",
- _priv->max_trash_file_size);
+ priv->max_trash_file_size);
}
- this->private = (void *)_priv;
- return 0;
+ GF_OPTION_INIT ("trash-internal-op", priv->internal, bool, out);
+
+ this->local_pool = mem_pool_new (trash_local_t, 64);
+ if (!this->local_pool) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ ret = ENOMEM;
+ goto out;
+ }
+
+ /* For creating directories inside trash with proper permissions,
+ * we need to perform stat on that directories, for this we use
+ * brick path
+ */
+ data = dict_get (this->options, "brick-path");
+ if (!data) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no option specified for 'brick-path'");
+ ret = ENOMEM;
+ goto out;
+ }
+ priv->brick_path = gf_strdup (data->data);
+ if (!priv->brick_path) {
+ ret = ENOMEM;
+ gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "brick path is%s", priv->brick_path);
+
+ this->private = (void *)priv;
+ ret = 0;
+
+out:
+ if (tmp_str)
+ GF_FREE (tmp_str);
+ if (ret) {
+ if (priv) {
+ if (priv->newtrash_dir)
+ GF_FREE (priv->newtrash_dir);
+ if (priv->oldtrash_dir)
+ GF_FREE (priv->oldtrash_dir);
+ if (priv->brick_path)
+ GF_FREE (priv->brick_path);
+ if (priv->eliminate)
+ wipe_eliminate_path (&priv->eliminate);
+ GF_FREE (priv);
+ }
+ mem_pool_destroy (this->local_pool);
+ }
+ return ret;
}
+/**
+ * trash_fini
+ */
void
fini (xlator_t *this)
{
trash_private_t *priv = NULL;
+ GF_VALIDATE_OR_GOTO ("trash", this, out);
priv = this->private;
- if (priv)
- GF_FREE (priv);
+ if (priv) {
+ if (priv->newtrash_dir)
+ GF_FREE (priv->newtrash_dir);
+ if (priv->oldtrash_dir)
+ GF_FREE (priv->oldtrash_dir);
+ if (priv->brick_path)
+ GF_FREE (priv->brick_path);
+ if (priv->eliminate)
+ wipe_eliminate_path (&priv->eliminate);
+ GF_FREE (priv);
+ }
+ mem_pool_destroy (this->local_pool);
+ this->private = NULL;
+out:
return;
}
struct xlator_fops fops = {
- .unlink = trash_unlink,
- .rename = trash_rename,
- .truncate = trash_truncate,
- .ftruncate = trash_ftruncate,
+ .unlink = trash_unlink,
+ .truncate = trash_truncate,
+ .ftruncate = trash_ftruncate,
+ .rmdir = trash_rmdir,
+ .mkdir = trash_mkdir,
+ .rename = trash_rename,
};
struct xlator_cbks cbks = {
};
struct volume_options options[] = {
- { .key = { "trash-directory" },
- .type = GF_OPTION_TYPE_PATH,
+ { .key = { "trash" },
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Enable/disable trash translator",
+ },
+ { .key = { "trash-dir" },
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = ".trashcan",
+ .description = "Directory for trash files",
+ },
+ { .key = { "trash-eliminate-path" },
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Eliminate paths to be excluded "
+ "from trashing",
},
- { .key = { "eliminate-pattern" },
- .type = GF_OPTION_TYPE_STR,
+ { .key = { "trash-max-filesize" },
+ .type = GF_OPTION_TYPE_SIZET,
+ .default_value = "5MB",
+ .description = "Maximum size of file that can be "
+ "moved to trash",
},
- { .key = { "max-trashable-file-size" },
- .type = GF_OPTION_TYPE_SIZET,
+ { .key = { "trash-internal-op" },
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Enable/disable trash translator for "
+ "internal operations",
},
- { .key = {NULL} },
+ { .key = {NULL} },
};
diff --git a/xlators/features/trash/src/trash.h b/xlators/features/trash/src/trash.h
index e1a1c314dc8..088c1b9a286 100644
--- a/xlators/features/trash/src/trash.h
+++ b/xlators/features/trash/src/trash.h
@@ -1,30 +1,15 @@
/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __TRASH_H__
#define __TRASH_H__
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "glusterfs.h"
#include "logging.h"
#include "dict.h"
@@ -47,7 +32,6 @@
#define GF_ALLOWED_MAX_FILE_SIZE (1 * GF_UNIT_GB)
#endif
-
struct trash_struct {
fd_t *fd; /* for the fd of existing file */
fd_t *newfd; /* for the newly created file */
@@ -55,35 +39,60 @@ struct trash_struct {
loc_t newloc; /* to store the location for the new file */
size_t fsize; /* for keeping the size of existing file */
off_t cur_offset; /* current offset for read and write ops */
- off_t fop_offset;
+ off_t fop_offset; /* original offset received with the fop */
+ pid_t pid;
char origpath[PATH_MAX];
char newpath[PATH_MAX];
int32_t loop_count;
- struct stat preparent;
- struct stat postparent;
+ gf_boolean_t is_set_pid;
+ struct iatt preparent;
+ struct iatt postparent;
+ gf_boolean_t ctr_link_count_req;
};
typedef struct trash_struct trash_local_t;
-struct _trash_elim_pattern;
-typedef struct _trash_elim_pattern {
- struct _trash_elim_pattern *next;
- char *pattern;
-} trash_elim_pattern_t;
+struct _trash_elim_path {
+ struct _trash_elim_path *next;
+ char *path;
+};
+typedef struct _trash_elim_path trash_elim_path;
struct trash_priv {
- char *trash_dir;
- trash_elim_pattern_t *eliminate;
+ char *oldtrash_dir;
+ char *newtrash_dir;
+ char *brick_path;
+ trash_elim_path *eliminate;
size_t max_trash_file_size;
+ gf_boolean_t state;
+ gf_boolean_t internal;
+ inode_t *trash_inode;
+ inode_table_t *trash_itable;
};
typedef struct trash_priv trash_private_t;
-#define TRASH_STACK_UNWIND(frame, params ...) do { \
- trash_local_t *__local = NULL; \
- __local = frame->local; \
- frame->local = NULL; \
- STACK_UNWIND (frame, params); \
- trash_local_wipe (__local); \
- } while (0)
-
+#define TRASH_SET_PID(frame, local) do { \
+ GF_ASSERT (!local->is_set_pid); \
+ if (!local->is_set_pid) { \
+ local->pid = frame->root->pid; \
+ frame->root->pid = GF_SERVER_PID_TRASH; \
+ local->is_set_pid = _gf_true; \
+ } \
+} while (0)
+
+#define TRASH_UNSET_PID(frame, local) do { \
+ GF_ASSERT (local->is_set_pid); \
+ if (local->is_set_pid) { \
+ frame->root->pid = local->pid; \
+ local->is_set_pid = _gf_false; \
+ } \
+} while (0)
+
+#define TRASH_STACK_UNWIND(op, frame, params ...) do { \
+ trash_local_t *__local = NULL; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ STACK_UNWIND_STRICT (op, frame, params); \
+ trash_local_wipe (__local); \
+ } while (0)
#endif /* __TRASH_H__ */
diff --git a/xlators/features/upcall/Makefile.am b/xlators/features/upcall/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/features/upcall/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/upcall/src/Makefile.am b/xlators/features/upcall/src/Makefile.am
new file mode 100644
index 00000000000..7f63e792281
--- /dev/null
+++ b/xlators/features/upcall/src/Makefile.am
@@ -0,0 +1,21 @@
+xlator_LTLIBRARIES = upcall.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+upcall_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+upcall_la_SOURCES = upcall.c upcall-internal.c
+
+upcall_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
+ $(top_builddir)/rpc/xdr/src/libgfxdr.la
+
+noinst_HEADERS = upcall.h upcall-mem-types.h upcall-messages.h \
+ upcall-cache-invalidation.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src \
+ -I$(top_srcdir)/rpc/xdr/src
+
+AM_CFLAGS = -Wall -fno-strict-aliasing $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/upcall/src/upcall-cache-invalidation.h b/xlators/features/upcall/src/upcall-cache-invalidation.h
new file mode 100644
index 00000000000..62b458fa295
--- /dev/null
+++ b/xlators/features/upcall/src/upcall-cache-invalidation.h
@@ -0,0 +1,22 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __UPCALL_CACHE_INVALIDATION_H__
+#define __UPCALL_CACHE_INVALIDATION_H__
+
+/* The time period for which a client will be notified of cache_invalidation
+ * events post its last access */
+#define CACHE_INVALIDATION_TIMEOUT "60"
+
+/* xlator options */
+gf_boolean_t is_cache_invalidation_enabled(xlator_t *this);
+int32_t get_cache_invalidation_timeout(xlator_t *this);
+
+#endif /* __UPCALL_CACHE_INVALIDATION_H__ */
diff --git a/xlators/features/upcall/src/upcall-internal.c b/xlators/features/upcall/src/upcall-internal.c
new file mode 100644
index 00000000000..f3c81aff15c
--- /dev/null
+++ b/xlators/features/upcall/src/upcall-internal.c
@@ -0,0 +1,662 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <unistd.h>
+#include <fcntl.h>
+#include <limits.h>
+
+#include "glusterfs.h"
+#include "compat.h"
+#include "xlator.h"
+#include "inode.h"
+#include "logging.h"
+#include "common-utils.h"
+
+#include "statedump.h"
+#include "syncop.h"
+
+#include "upcall.h"
+#include "upcall-mem-types.h"
+#include "glusterfs3-xdr.h"
+#include "protocol-common.h"
+#include "defaults.h"
+
+/*
+ * Check if any of the upcall options are enabled:
+ * - cache_invalidation
+ */
+gf_boolean_t
+is_upcall_enabled(xlator_t *this) {
+ upcall_private_t *priv = NULL;
+ gf_boolean_t is_enabled = _gf_false;
+
+ if (this->private) {
+ priv = (upcall_private_t *)this->private;
+
+ if (priv->cache_invalidation_enabled) {
+ is_enabled = _gf_true;
+ }
+ }
+
+ return is_enabled;
+}
+
+/*
+ * Get the cache_invalidation_timeout
+ */
+int32_t
+get_cache_invalidation_timeout(xlator_t *this) {
+ upcall_private_t *priv = NULL;
+ int32_t timeout = 0;
+
+ if (this->private) {
+ priv = (upcall_private_t *)this->private;
+ timeout = priv->cache_invalidation_timeout;
+ }
+
+ return timeout;
+}
+
+/*
+ * Allocate and add a new client entry to the given upcall entry
+ */
+upcall_client_t*
+add_upcall_client (call_frame_t *frame, client_t *client,
+ upcall_inode_ctx_t *up_inode_ctx)
+{
+ upcall_client_t *up_client_entry = NULL;
+
+ pthread_mutex_lock (&up_inode_ctx->client_list_lock);
+ {
+ up_client_entry = __add_upcall_client (frame,
+ client,
+ up_inode_ctx);
+ }
+ pthread_mutex_unlock (&up_inode_ctx->client_list_lock);
+
+ return up_client_entry;
+}
+
+upcall_client_t*
+__add_upcall_client (call_frame_t *frame, client_t *client,
+ upcall_inode_ctx_t *up_inode_ctx)
+{
+ upcall_client_t *up_client_entry = NULL;
+
+ up_client_entry = GF_CALLOC (1, sizeof(*up_client_entry),
+ gf_upcall_mt_upcall_client_entry_t);
+ if (!up_client_entry) {
+ gf_msg ("upcall", GF_LOG_WARNING, 0,
+ UPCALL_MSG_NO_MEMORY,
+ "Memory allocation failed");
+ return NULL;
+ }
+ INIT_LIST_HEAD (&up_client_entry->client_list);
+ up_client_entry->client_uid = gf_strdup(client->client_uid);
+ up_client_entry->access_time = time(NULL);
+ up_client_entry->expire_time_attr =
+ get_cache_invalidation_timeout(frame->this);
+
+ list_add_tail (&up_client_entry->client_list,
+ &up_inode_ctx->client_list);
+
+ gf_log (THIS->name, GF_LOG_DEBUG, "upcall_entry_t client added - %s",
+ up_client_entry->client_uid);
+
+ return up_client_entry;
+}
+
+/*
+ * Given client->uid, retrieve the corresponding upcall client entry.
+ * If none found, create a new entry.
+ */
+upcall_client_t*
+__get_upcall_client (call_frame_t *frame, client_t *client,
+ upcall_inode_ctx_t *up_inode_ctx)
+{
+ upcall_client_t *up_client_entry = NULL;
+ upcall_client_t *up_client = NULL;
+ upcall_client_t *tmp = NULL;
+ gf_boolean_t found_client = _gf_false;
+
+ list_for_each_entry_safe (up_client_entry, tmp,
+ &up_inode_ctx->client_list,
+ client_list) {
+ if (strcmp(client->client_uid,
+ up_client_entry->client_uid) == 0) {
+ /* found client entry. Update the access_time */
+ up_client_entry->access_time = time(NULL);
+ found_client = _gf_true;
+ gf_log (THIS->name, GF_LOG_DEBUG,
+ "upcall_entry_t client found - %s",
+ up_client_entry->client_uid);
+ break;
+ }
+ }
+
+ if (!found_client) { /* create one */
+ up_client_entry = __add_upcall_client (frame, client,
+ up_inode_ctx);
+ }
+
+ return up_client_entry;
+}
+
+int
+__upcall_inode_ctx_set (inode_t *inode, xlator_t *this)
+{
+ upcall_inode_ctx_t *inode_ctx = NULL;
+ upcall_private_t *priv = NULL;
+ int ret = -1;
+ uint64_t ctx = 0;
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = __inode_ctx_get (inode, this, &ctx);
+
+ if (!ret)
+ goto out;
+
+ inode_ctx = GF_CALLOC (1, sizeof (upcall_inode_ctx_t),
+ gf_upcall_mt_upcall_inode_ctx_t);
+
+ if (!inode_ctx) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ pthread_mutex_init (&inode_ctx->client_list_lock, NULL);
+ INIT_LIST_HEAD (&inode_ctx->inode_ctx_list);
+ INIT_LIST_HEAD (&inode_ctx->client_list);
+ inode_ctx->destroy = 0;
+ gf_uuid_copy (inode_ctx->gfid, inode->gfid);
+
+ ctx = (long) inode_ctx;
+ ret = __inode_ctx_set (inode, this, &ctx);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to set inode ctx (%p)", inode);
+ goto out;
+ }
+
+ /* add this inode_ctx to the global list */
+ LOCK (&priv->inode_ctx_lk);
+ {
+ list_add_tail (&inode_ctx->inode_ctx_list,
+ &priv->inode_ctx_list);
+ }
+ UNLOCK (&priv->inode_ctx_lk);
+out:
+ return ret;
+}
+
+upcall_inode_ctx_t *
+__upcall_inode_ctx_get (inode_t *inode, xlator_t *this)
+{
+ upcall_inode_ctx_t *inode_ctx = NULL;
+ uint64_t ctx = 0;
+ int ret = 0;
+
+ ret = __inode_ctx_get (inode, this, &ctx);
+
+ if (ret < 0) {
+ ret = __upcall_inode_ctx_set (inode, this);
+ if (ret < 0)
+ goto out;
+
+ ret = __inode_ctx_get (inode, this, &ctx);
+ if (ret < 0)
+ goto out;
+ }
+
+ inode_ctx = (upcall_inode_ctx_t *) (long) (ctx);
+
+out:
+ return inode_ctx;
+}
+
+upcall_inode_ctx_t *
+upcall_inode_ctx_get (inode_t *inode, xlator_t *this)
+{
+ upcall_inode_ctx_t *inode_ctx = NULL;
+
+ LOCK (&inode->lock);
+ {
+ inode_ctx = __upcall_inode_ctx_get (inode, this);
+ }
+ UNLOCK (&inode->lock);
+
+ return inode_ctx;
+}
+
+int
+upcall_cleanup_expired_clients (xlator_t *this,
+ upcall_inode_ctx_t *up_inode_ctx) {
+
+ upcall_client_t *up_client = NULL;
+ upcall_client_t *tmp = NULL;
+ int ret = -1;
+ time_t timeout = 0;
+ time_t t_expired = 0;
+
+ timeout = get_cache_invalidation_timeout(this);
+
+ pthread_mutex_lock (&up_inode_ctx->client_list_lock);
+ {
+ list_for_each_entry_safe (up_client,
+ tmp,
+ &up_inode_ctx->client_list,
+ client_list) {
+ t_expired = time(NULL) -
+ up_client->access_time;
+
+ if (t_expired > (2*timeout)) {
+ ret =
+ __upcall_cleanup_client_entry (up_client);
+
+ if (ret) {
+ gf_msg ("upcall", GF_LOG_WARNING, 0,
+ UPCALL_MSG_INTERNAL_ERROR,
+ "Client entry cleanup failed (%p)",
+ up_client);
+ goto out;
+ }
+ gf_log (THIS->name, GF_LOG_TRACE,
+ "Cleaned up client_entry(%s)",
+ up_client->client_uid);
+ }
+ }
+ }
+ pthread_mutex_unlock (&up_inode_ctx->client_list_lock);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+__upcall_cleanup_client_entry (upcall_client_t *up_client)
+{
+ list_del_init (&up_client->client_list);
+
+ GF_FREE (up_client->client_uid);
+ GF_FREE (up_client);
+
+ return 0;
+}
+
+/*
+ * Free Upcall inode_ctx client list
+ */
+int
+__upcall_cleanup_inode_ctx_client_list (upcall_inode_ctx_t *inode_ctx)
+{
+ upcall_client_t *up_client = NULL;
+ upcall_client_t *tmp = NULL;
+
+ list_for_each_entry_safe (up_client, tmp,
+ &inode_ctx->client_list,
+ client_list) {
+ __upcall_cleanup_client_entry (up_client);
+ }
+
+ return 0;
+}
+
+/*
+ * Free upcall_inode_ctx
+ */
+int
+upcall_cleanup_inode_ctx (xlator_t *this, inode_t *inode)
+{
+ uint64_t ctx = 0;
+ upcall_inode_ctx_t *inode_ctx = NULL;
+ int ret = 0;
+ upcall_private_t *priv = NULL;
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = inode_ctx_del (inode, this, &ctx);
+
+ if (ret < 0) {
+ gf_msg ("upcall", GF_LOG_WARNING, 0,
+ UPCALL_MSG_INTERNAL_ERROR,
+ "Failed to del upcall_inode_ctx (%p)",
+ inode);
+ goto out;
+ }
+
+ inode_ctx = (upcall_inode_ctx_t *)(long) ctx;
+
+ if (inode_ctx) {
+
+ /* Invalidate all the upcall cache entries */
+ upcall_cache_forget (this, inode, inode_ctx);
+
+ /* do we really need lock? yes now reaper thread
+ * may also be trying to cleanup the client entries.
+ */
+ pthread_mutex_lock (&inode_ctx->client_list_lock);
+ {
+ if (!list_empty (&inode_ctx->client_list)) {
+ __upcall_cleanup_inode_ctx_client_list (inode_ctx);
+ }
+ }
+ pthread_mutex_unlock (&inode_ctx->client_list_lock);
+
+ /* Mark the inode_ctx to be destroyed */
+ inode_ctx->destroy = 1;
+ gf_msg_debug ("upcall", 0, "set upcall_inode_ctx (%p) to destroy mode",
+ inode_ctx);
+ }
+
+out:
+ return ret;
+}
+
+/*
+ * Traverse through the list of upcall_inode_ctx(s),
+ * cleanup the expired client entries and destroy the ctx
+ * which is no longer valid and has destroy bit set.
+ */
+void *
+upcall_reaper_thread (void *data)
+{
+ upcall_private_t *priv = NULL;
+ upcall_inode_ctx_t *inode_ctx = NULL;
+ upcall_inode_ctx_t *tmp = NULL;
+ xlator_t *this = NULL;
+ time_t timeout = 0;
+
+ this = (xlator_t *)data;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+
+ while (!priv->fini) {
+ list_for_each_entry_safe (inode_ctx, tmp,
+ &priv->inode_ctx_list,
+ inode_ctx_list) {
+
+ /* cleanup expired clients */
+ upcall_cleanup_expired_clients (this, inode_ctx);
+
+ if (!inode_ctx->destroy) {
+ continue;
+ }
+
+ LOCK (&priv->inode_ctx_lk);
+ {
+ /* client list would have been cleaned up*/
+ gf_msg_debug ("upcall", 0, "Freeing upcall_inode_ctx (%p)",
+ inode_ctx);
+ list_del_init (&inode_ctx->inode_ctx_list);
+ pthread_mutex_destroy (&inode_ctx->client_list_lock);
+ GF_FREE (inode_ctx);
+ inode_ctx = NULL;
+ }
+ UNLOCK (&priv->inode_ctx_lk);
+ }
+
+ /* don't do a very busy loop */
+ timeout = get_cache_invalidation_timeout (this);
+ sleep (timeout / 2);
+ }
+
+ return NULL;
+}
+
+/*
+ * Initialize upcall reaper thread.
+ */
+int
+upcall_reaper_thread_init (xlator_t *this)
+{
+ upcall_private_t *priv = NULL;
+ int ret = -1;
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = pthread_create (&priv->reaper_thr, NULL,
+ upcall_reaper_thread, this);
+
+ return ret;
+}
+
+int
+up_filter_virtual_xattr (dict_t *d, char *k, data_t *v, void *tmp)
+{
+ if (is_virtual_xattr (k) == _gf_true) {
+ dict_del (d, k);
+ }
+
+ return 0;
+}
+
+/*
+ * Given a client, first fetch upcall_entry_t from the inode_ctx client list.
+ * Later traverse through the client list of that upcall entry. If this client
+ * is not present in the list, create one client entry with this client info.
+ * Also check if there are other clients which need to be notified of this
+ * op. If yes send notify calls to them.
+ *
+ * Since sending notifications for cache_invalidation is a best effort,
+ * any errors during the process are logged and ignored.
+ */
+void
+upcall_cache_invalidate (call_frame_t *frame, xlator_t *this, client_t *client,
+ inode_t *inode, uint32_t flags, struct iatt *stbuf,
+ struct iatt *p_stbuf, struct iatt *oldp_stbuf,
+ dict_t *xattr)
+{
+ upcall_client_t *up_client = NULL;
+ upcall_client_t *up_client_entry = NULL;
+ upcall_client_t *tmp = NULL;
+ upcall_inode_ctx_t *up_inode_ctx = NULL;
+ gf_boolean_t found = _gf_false;
+
+ if (!is_upcall_enabled(this))
+ return;
+
+ /* server-side generated fops like quota/marker will not have any
+ * client associated with them. Ignore such fops.
+ */
+ if (!client) {
+ gf_msg_debug ("upcall", 0, "Internal fop - client NULL");
+ return;
+ }
+
+ up_inode_ctx = ((upcall_local_t *)frame->local)->upcall_inode_ctx;
+
+ if (!up_inode_ctx)
+ up_inode_ctx = upcall_inode_ctx_get (inode, this);
+
+ if (!up_inode_ctx) {
+ gf_msg ("upcall", GF_LOG_WARNING, 0,
+ UPCALL_MSG_INTERNAL_ERROR,
+ "upcall_inode_ctx_get failed (%p)",
+ inode);
+ return;
+ }
+
+ /* In case of LOOKUP, if first time, inode created shall be
+ * invalid till it gets linked to inode table. Read gfid from
+ * the stat returned in such cases.
+ */
+ if (gf_uuid_is_null (up_inode_ctx->gfid)) {
+ /* That means inode must have been invalid when this inode_ctx
+ * is created. Copy the gfid value from stbuf instead.
+ */
+ gf_uuid_copy (up_inode_ctx->gfid, stbuf->ia_gfid);
+ }
+
+ GF_VALIDATE_OR_GOTO ("upcall_cache_invalidate",
+ !(gf_uuid_is_null (up_inode_ctx->gfid)), out);
+ pthread_mutex_lock (&up_inode_ctx->client_list_lock);
+ {
+ list_for_each_entry_safe (up_client_entry, tmp,
+ &up_inode_ctx->client_list,
+ client_list) {
+
+ /* Do not send UPCALL event if same client. */
+ if (!strcmp(client->client_uid,
+ up_client_entry->client_uid)) {
+ up_client_entry->access_time = time(NULL);
+ found = _gf_true;
+ continue;
+ }
+
+ /*
+ * Ignore sending notifications in case of only UP_ATIME
+ */
+ if (!(flags & ~(UP_ATIME))) {
+ if (found)
+ break;
+ else /* we still need to find current client entry*/
+ continue;
+ }
+
+ /* any other client */
+
+ /* XXX: Send notifications asynchrounously
+ * instead of in the I/O path - BZ 1200264
+ * Also if the file is frequently accessed, set
+ * expire_time_attr to 0.
+ */
+ upcall_client_cache_invalidate (this,
+ up_inode_ctx->gfid,
+ up_client_entry,
+ flags, stbuf,
+ p_stbuf, oldp_stbuf,
+ xattr);
+ }
+
+ if (!found) {
+ up_client_entry = __add_upcall_client (frame,
+ client,
+ up_inode_ctx);
+ }
+ }
+ pthread_mutex_unlock (&up_inode_ctx->client_list_lock);
+out:
+ return;
+}
+
+/*
+ * If the upcall_client_t has recently accessed the file (i.e, within
+ * priv->cache_invalidation_timeout), send a upcall notification.
+ */
+void
+upcall_client_cache_invalidate (xlator_t *this, uuid_t gfid,
+ upcall_client_t *up_client_entry,
+ uint32_t flags, struct iatt *stbuf,
+ struct iatt *p_stbuf,
+ struct iatt *oldp_stbuf, dict_t *xattr)
+{
+ struct gf_upcall up_req = {0,};
+ struct gf_upcall_cache_invalidation ca_req = {0,};
+ time_t timeout = 0;
+ int ret = -1;
+ time_t t_expired = time(NULL) - up_client_entry->access_time;
+
+ GF_VALIDATE_OR_GOTO ("upcall_client_cache_invalidate",
+ !(gf_uuid_is_null (gfid)), out);
+ timeout = get_cache_invalidation_timeout(this);
+
+ if (t_expired < timeout) {
+ /* Send notify call */
+ up_req.client_uid = up_client_entry->client_uid;
+ gf_uuid_copy (up_req.gfid, gfid);
+
+ ca_req.flags = flags;
+ ca_req.expire_time_attr =
+ up_client_entry->expire_time_attr;
+ if (stbuf)
+ ca_req.stat = *stbuf;
+ if (p_stbuf)
+ ca_req.p_stat = *p_stbuf;
+ if (oldp_stbuf)
+ ca_req.oldp_stat = *oldp_stbuf;
+ ca_req.dict = xattr;
+
+ up_req.data = &ca_req;
+ up_req.event_type = GF_UPCALL_CACHE_INVALIDATION;
+
+ gf_log (THIS->name, GF_LOG_TRACE,
+ "Cache invalidation notification sent to %s",
+ up_client_entry->client_uid);
+
+ /* Need to send inode flags */
+ ret = this->notify (this, GF_EVENT_UPCALL, &up_req);
+
+ /*
+ * notify may fail as the client could have been
+ * dis(re)connected. Cleanup the client entry.
+ */
+ if (ret < 0)
+ __upcall_cleanup_client_entry (up_client_entry);
+
+ } else {
+ gf_log (THIS->name, GF_LOG_TRACE,
+ "Cache invalidation notification NOT sent to %s",
+ up_client_entry->client_uid);
+
+ if (t_expired > (2*timeout)) {
+ /* Cleanup the entry */
+ __upcall_cleanup_client_entry (up_client_entry);
+ }
+ }
+out:
+ return;
+}
+
+/*
+ * This is called during upcall_inode_ctx cleanup incase of 'inode_forget'.
+ * Send "UP_FORGET" to all the clients so that they invalidate their cache
+ * entry and do a fresh lookup next time when any I/O comes in.
+ */
+void
+upcall_cache_forget (xlator_t *this, inode_t *inode, upcall_inode_ctx_t *up_inode_ctx)
+{
+ upcall_client_t *up_client = NULL;
+ upcall_client_t *up_client_entry = NULL;
+ upcall_client_t *tmp = NULL;
+ uint32_t flags = 0;
+
+ if (!up_inode_ctx) {
+ return;
+ }
+
+ pthread_mutex_lock (&up_inode_ctx->client_list_lock);
+ {
+ list_for_each_entry_safe (up_client_entry, tmp,
+ &up_inode_ctx->client_list,
+ client_list) {
+ flags = UP_FORGET;
+
+ /* Set the access time to time(NULL)
+ * to send notify */
+ up_client_entry->access_time = time(NULL);
+
+ upcall_client_cache_invalidate(this,
+ up_inode_ctx->gfid,
+ up_client_entry,
+ flags, NULL,
+ NULL, NULL, NULL);
+ }
+
+ }
+ pthread_mutex_unlock (&up_inode_ctx->client_list_lock);
+}
diff --git a/xlators/features/upcall/src/upcall-mem-types.h b/xlators/features/upcall/src/upcall-mem-types.h
new file mode 100644
index 00000000000..55793ec65ca
--- /dev/null
+++ b/xlators/features/upcall/src/upcall-mem-types.h
@@ -0,0 +1,24 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __UPCALL_MEM_TYPES_H__
+#define __UPCALL_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_upcall_mem_types_ {
+ gf_upcall_mt_conf_t = gf_common_mt_end + 1,
+ gf_upcall_mt_private_t,
+ gf_upcall_mt_upcall_inode_ctx_t,
+ gf_upcall_mt_upcall_client_entry_t,
+ gf_upcall_mt_end
+};
+#endif
+
diff --git a/xlators/features/upcall/src/upcall-messages.h b/xlators/features/upcall/src/upcall-messages.h
new file mode 100644
index 00000000000..0cfdfd68b77
--- /dev/null
+++ b/xlators/features/upcall/src/upcall-messages.h
@@ -0,0 +1,59 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
+
+#ifndef _UPCALL_MESSAGES_H_
+#define _UPCALL_MESSAGES_H_
+
+#include "glfs-message-id.h"
+
+/*! \file upcall-messages.h
+ * \brief UPCALL log-message IDs and their descriptions.
+ */
+
+/* NOTE: Rules for message additions
+ * 1) Each instance of a message is _better_ left with a unique message ID, even
+ * if the message format is the same. Reasoning is that, if the message
+ * format needs to change in one instance, the other instances are not
+ * impacted or the new change does not change the ID of the instance being
+ * modified.
+ * 2) Addition of a message,
+ * - Should increment the GLFS_NUM_MESSAGES
+ * - Append to the list of messages defined, towards the end
+ * - Retain macro naming as glfs_msg_X (for redability across developers)
+ * NOTE: Rules for message format modifications
+ * 3) Check across the code if the message ID macro in question is reused
+ * anywhere. If reused then then the modifications should ensure correctness
+ * everywhere, or needs a new message ID as (1) above was not adhered to. If
+ * not used anywhere, proceed with the required modification.
+ * NOTE: Rules for message deletion
+ * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used
+ * anywhere, then can be deleted, but will leave a hole by design, as
+ * addition rules specify modification to the end of the list and not filling
+ * holes.
+ */
+
+#define GLFS_COMP_BASE_UPCALL GLFS_MSGID_COMP_UPCALL
+#define GLFS_NUM_MESSAGES 1
+#define GLFS_MSGID_END (GLFS_COMP_BASE_UPCALL + GLFS_NUM_MESSAGES + 1)
+
+#define glfs_msg_start_x GLFS_COMP_BASE_UPCALL, "Invalid: Start of messages"
+
+/*!
+ * @messageid 110001
+ * @diagnosis Out of Memory
+ * @recommendedaction None
+ */
+#define UPCALL_MSG_NO_MEMORY (GLFS_COMP_BASE_UPCALL + 1)
+#define UPCALL_MSG_INTERNAL_ERROR (GLFS_COMP_BASE_UPCALL + 2)
+#define UPCALL_MSG_NOTIFY_FAILED (GLFS_COMP_BASE_UPCALL + 3)
+
+#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
+
+#endif /* !_UPCALL_MESSAGES_H_ */
diff --git a/xlators/features/upcall/src/upcall.c b/xlators/features/upcall/src/upcall.c
new file mode 100644
index 00000000000..76f8ee4923d
--- /dev/null
+++ b/xlators/features/upcall/src/upcall.c
@@ -0,0 +1,2315 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <unistd.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <pthread.h>
+
+#include "glusterfs.h"
+#include "compat.h"
+#include "xlator.h"
+#include "inode.h"
+#include "logging.h"
+#include "common-utils.h"
+
+#include "statedump.h"
+#include "syncop.h"
+
+#include "upcall.h"
+#include "upcall-mem-types.h"
+#include "glusterfs3-xdr.h"
+#include "protocol-common.h"
+#include "defaults.h"
+
+int32_t
+up_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = UP_UPDATE_CLIENT;
+ upcall_cache_invalidate (frame, this, client, local->inode, flags,
+ NULL, NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND (open, frame, op_ret, op_errno, fd, xdata);
+
+ return 0;
+}
+
+
+int32_t
+up_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ int32_t op_errno = -1;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ local = upcall_local_init (frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+out:
+ STACK_WIND (frame, up_open_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->open,
+ loc, flags, fd, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND (open, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+up_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = UP_WRITE_FLAGS;
+ upcall_cache_invalidate (frame, this, client, local->inode, flags,
+ postbuf, NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND (writev, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+
+ return 0;
+}
+
+
+int32_t
+up_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int count, off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ int32_t op_errno = -1;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ local = upcall_local_init (frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+out:
+ STACK_WIND (frame, up_writev_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev,
+ fd, vector, count, off, flags, iobref, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+up_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ struct iovec *vector, int count, struct iatt *stbuf,
+ struct iobref *iobref, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = UP_UPDATE_CLIENT;
+ upcall_cache_invalidate (frame, this, client, local->inode, flags,
+ stbuf, NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND (readv, frame, op_ret, op_errno, vector,
+ count, stbuf, iobref, xdata);
+
+ return 0;
+}
+
+int32_t
+up_readv (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset,
+ uint32_t flags, dict_t *xdata)
+{
+ int32_t op_errno = -1;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ local = upcall_local_init (frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+out:
+ STACK_WIND (frame, up_readv_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv,
+ fd, size, offset, flags, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0,
+ NULL, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+up_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = UP_UPDATE_CLIENT;
+ upcall_cache_invalidate (frame, this, client, local->inode, flags,
+ NULL, NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND (lk, frame, op_ret, op_errno, lock, xdata);
+
+ return 0;
+}
+
+int32_t
+up_lk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
+{
+ int32_t op_errno = -1;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ local = upcall_local_init (frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+out:
+ STACK_WIND (frame, up_lk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lk,
+ fd, cmd, flock, xdata);
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+up_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = UP_WRITE_FLAGS;
+ upcall_cache_invalidate (frame, this, client, local->inode, flags,
+ postbuf, NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND (truncate, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+
+ return 0;
+}
+
+int32_t
+up_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ int32_t op_errno = -1;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ local = upcall_local_init (frame, this, NULL, NULL, loc->inode, NULL);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+out:
+ STACK_WIND (frame, up_truncate_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->truncate,
+ loc, offset, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+up_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ /* XXX: setattr -> UP_SIZE or UP_OWN or UP_MODE or UP_TIMES
+ * or INODE_UPDATE (or UP_PERM esp incase of ACLs -> INODE_INVALIDATE)
+ * Need to check what attr is changed and accordingly pass UP_FLAGS.
+ * Bug1200271.
+ */
+ flags = UP_ATTR_FLAGS;
+ upcall_cache_invalidate (frame, this, client, local->inode, flags,
+ statpost, NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND (setattr, frame, op_ret, op_errno,
+ statpre, statpost, xdata);
+
+ return 0;
+}
+
+int32_t
+up_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ int32_t op_errno = -1;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ local = upcall_local_init (frame, this, NULL, NULL, loc->inode, NULL);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+out:
+ STACK_WIND (frame, up_setattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr,
+ loc, stbuf, valid, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+up_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = (UP_RENAME_FLAGS | UP_PARENT_DENTRY_FLAGS);
+ upcall_cache_invalidate (frame, this, client, local->inode, flags,
+ stbuf, postnewparent, postoldparent, NULL);
+
+out:
+ UPCALL_STACK_UNWIND (rename, frame, op_ret, op_errno,
+ stbuf, preoldparent, postoldparent,
+ prenewparent, postnewparent, xdata);
+
+ return 0;
+}
+
+int32_t
+up_rename (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ int32_t op_errno = -1;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ local = upcall_local_init (frame, this, NULL, NULL, oldloc->inode, NULL);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ /* copy oldloc */
+ loc_copy (&local->rename_oldloc, oldloc);
+out:
+ STACK_WIND (frame, up_rename_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->rename,
+ oldloc, newloc, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND (rename, frame, -1, op_errno, NULL,
+ NULL, NULL, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+up_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = (UP_NLINK_FLAGS | UP_PARENT_DENTRY_FLAGS);
+ upcall_cache_invalidate (frame, this, client, local->inode, flags,
+ NULL, postparent, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND (unlink, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
+
+ return 0;
+}
+
+int32_t
+up_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
+{
+ int32_t op_errno = -1;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ local = upcall_local_init (frame, this, NULL, NULL, loc->inode, NULL);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+out:
+ STACK_WIND (frame, up_unlink_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink,
+ loc, xflag, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+up_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = (UP_NLINK_FLAGS | UP_PARENT_DENTRY_FLAGS);
+ upcall_cache_invalidate (frame, this, client, local->inode, flags,
+ stbuf, postparent, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND (link, frame, op_ret, op_errno,
+ inode, stbuf, preparent, postparent, xdata);
+
+ return 0;
+}
+
+int32_t
+up_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ int32_t op_errno = -1;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ local = upcall_local_init (frame, this, NULL, NULL, oldloc->inode, NULL);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+out:
+ STACK_WIND (frame, up_link_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->link,
+ oldloc, newloc, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND (link, frame, -1, op_errno, NULL,
+ NULL, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+up_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+
+ flags = (UP_NLINK_FLAGS | UP_PARENT_DENTRY_FLAGS);
+ upcall_cache_invalidate (frame, this, client, local->inode, flags,
+ NULL, postparent, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND (rmdir, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
+
+ return 0;
+}
+
+int32_t
+up_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
+{
+ int32_t op_errno = -1;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ local = upcall_local_init (frame, this, NULL, NULL, loc->inode, NULL);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+out:
+ STACK_WIND (frame, up_rmdir_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->rmdir,
+ loc, flags, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+up_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+
+ /* invalidate parent's entry too */
+ flags = UP_TIMES;
+ upcall_cache_invalidate (frame, this, client, local->inode, flags,
+ postparent, NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND (mkdir, frame, op_ret, op_errno,
+ inode, stbuf, preparent, postparent, xdata);
+
+ return 0;
+}
+
+int32_t
+up_mkdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *params)
+{
+ int32_t op_errno = -1;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ local = upcall_local_init (frame, this, NULL, NULL, loc->parent, NULL);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+out:
+ STACK_WIND (frame, up_mkdir_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir,
+ loc, mode, umask, params);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL,
+ NULL, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+up_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+
+ /* As its a new file create, no need of sending notification */
+ /* However invalidate parent's entry */
+ flags = UP_TIMES;
+ upcall_cache_invalidate (frame, this, client, local->inode, flags,
+ postparent, NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND (create, frame, op_ret, op_errno, fd,
+ inode, stbuf, preparent, postparent, xdata);
+
+ return 0;
+}
+
+int32_t
+up_create (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *params)
+{
+ int32_t op_errno = -1;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ local = upcall_local_init (frame, this, NULL, NULL, loc->parent, NULL);
+
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+out:
+ STACK_WIND (frame, up_create_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->create,
+ loc, flags, mode, umask, fd, params);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND (create, frame, -1, op_errno, NULL,
+ NULL, NULL, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+up_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = UP_UPDATE_CLIENT;
+ upcall_cache_invalidate (frame, this, client, local->inode, flags,
+ stbuf, NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, stbuf,
+ xattr, postparent);
+
+ return 0;
+}
+
+int32_t
+up_lookup (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xattr_req)
+{
+ int32_t op_errno = -1;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ local = upcall_local_init (frame, this, NULL, NULL, loc->inode, NULL);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+out:
+ STACK_WIND (frame, up_lookup_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup,
+ loc, xattr_req);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND (lookup, frame, -1, op_errno, NULL,
+ NULL, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+up_stat_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ struct iatt *buf, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = UP_UPDATE_CLIENT;
+ upcall_cache_invalidate (frame, this, client, local->inode, flags,
+ buf, NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND (stat, frame, op_ret, op_errno, buf,
+ xdata);
+
+ return 0;
+}
+
+int32_t
+up_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ int32_t op_errno = -1;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ local = upcall_local_init (frame, this, NULL, NULL, loc->inode, NULL);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+out:
+ STACK_WIND (frame, up_stat_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->stat,
+ loc, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+up_fstat (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, dict_t *xdata)
+{
+ int32_t op_errno = -1;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ local = upcall_local_init (frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+out:
+ STACK_WIND (frame, up_stat_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fstat,
+ fd, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+up_ftruncate (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, off_t offset, dict_t *xdata)
+{
+ int32_t op_errno = -1;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ local = upcall_local_init (frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+out:
+ STACK_WIND (frame, up_truncate_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->ftruncate,
+ fd, offset, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL,
+ NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+up_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = UP_UPDATE_CLIENT;
+ upcall_cache_invalidate (frame, this, client, local->inode, flags,
+ NULL, NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND (access, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int32_t
+up_access (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t mask, dict_t *xdata)
+{
+ int32_t op_errno = -1;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ local = upcall_local_init (frame, this, NULL, NULL, loc->inode, NULL);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+out:
+ STACK_WIND (frame, up_access_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->access,
+ loc, mask, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND (access, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+int32_t
+up_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, const char *path,
+ struct iatt *stbuf, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = UP_UPDATE_CLIENT;
+ upcall_cache_invalidate (frame, this, client, local->inode, flags,
+ stbuf, NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND (readlink, frame, op_ret, op_errno, path, stbuf,
+ xdata);
+
+ return 0;
+}
+
+int32_t
+up_readlink (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, size_t size, dict_t *xdata)
+{
+ int32_t op_errno = -1;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ local = upcall_local_init (frame, this, NULL, NULL, loc->inode, NULL);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+out:
+ STACK_WIND (frame, up_readlink_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->readlink,
+ loc, size, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND (readlink, frame, -1, op_errno, NULL,
+ NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+up_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+
+ /* invalidate parent's entry too */
+ flags = UP_TIMES;
+ upcall_cache_invalidate (frame, this, client, local->inode, flags,
+ postparent, NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+
+ return 0;
+}
+
+int32_t
+up_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ int32_t op_errno = -1;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ local = upcall_local_init (frame, this, NULL, NULL, loc->parent, NULL);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+out:
+ STACK_WIND (frame, up_mknod_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod,
+ loc, mode, rdev, umask, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND (mknod, frame, -1, op_errno, NULL,
+ NULL, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+up_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+
+ /* invalidate parent's entry too */
+ flags = UP_TIMES;
+ upcall_cache_invalidate (frame, this, client, local->inode, flags,
+ postparent, NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND (symlink, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+
+ return 0;
+}
+
+int32_t
+up_symlink (call_frame_t *frame, xlator_t *this,
+ const char *linkpath, loc_t *loc, mode_t umask,
+ dict_t *xdata)
+{
+ int32_t op_errno = -1;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ local = upcall_local_init (frame, this, NULL, NULL, loc->parent, NULL);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+out:
+ STACK_WIND (frame, up_symlink_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->symlink,
+ linkpath, loc, umask, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND (symlink, frame, -1, op_errno, NULL,
+ NULL, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+up_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = UP_UPDATE_CLIENT;
+ upcall_cache_invalidate (frame, this, client, local->inode, flags,
+ NULL, NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND (opendir, frame, op_ret, op_errno, fd, xdata);
+
+ return 0;
+}
+
+int32_t
+up_opendir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, fd_t *fd, dict_t *xdata)
+{
+ int32_t op_errno = -1;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ local = upcall_local_init (frame, this, NULL, NULL, loc->inode, NULL);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+out:
+ STACK_WIND (frame, up_opendir_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->opendir,
+ loc, fd, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND (opendir, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+up_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = UP_UPDATE_CLIENT;
+ upcall_cache_invalidate (frame, this, client, local->inode, flags,
+ NULL, NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND (statfs, frame, op_ret, op_errno, buf, xdata);
+
+ return 0;
+}
+
+int32_t
+up_statfs (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xdata)
+{
+ int32_t op_errno = -1;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ local = upcall_local_init (frame, this, NULL, NULL, loc->inode, NULL);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+out:
+ STACK_WIND (frame, up_statfs_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->statfs,
+ loc, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+up_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = UP_UPDATE_CLIENT;
+ upcall_cache_invalidate (frame, this, client, local->inode, flags,
+ NULL, NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND (readdir, frame, op_ret, op_errno, entries, xdata);
+
+ return 0;
+}
+
+int32_t
+up_readdir (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t off, dict_t *xdata)
+{
+ int32_t op_errno = -1;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ local = upcall_local_init (frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+out:
+ STACK_WIND (frame, up_readdir_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdir,
+ fd, size, off, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+up_readdirp (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t off, dict_t *dict)
+{
+ int32_t op_errno = -1;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ local = upcall_local_init (frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+out:
+ STACK_WIND (frame, up_readdir_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp,
+ fd, size, off, dict);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND (readdirp, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+up_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ int32_t op_errno = -1;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ local = upcall_local_init (frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+out:
+ STACK_WIND (frame, up_setattr_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsetattr,
+ fd, stbuf, valid, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL,
+ NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+up_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = UP_WRITE_FLAGS;
+ upcall_cache_invalidate (frame, this, client, local->inode, flags,
+ post, NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND (fallocate, frame, op_ret, op_errno, pre,
+ post, xdata);
+
+ return 0;
+}
+
+int32_t
+up_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t mode, off_t offset, size_t len, dict_t *xdata)
+{
+ int32_t op_errno = -1;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ local = upcall_local_init (frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+out:
+ STACK_WIND (frame, up_fallocate_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fallocate,
+ fd, mode, offset, len, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL,
+ NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+up_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = UP_WRITE_FLAGS;
+ upcall_cache_invalidate (frame, this, client, local->inode, flags,
+ post, NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND (discard, frame, op_ret, op_errno, pre,
+ post, xdata);
+
+ return 0;
+}
+
+int32_t
+up_discard(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ int32_t op_errno = -1;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ local = upcall_local_init (frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+out:
+ STACK_WIND (frame, up_discard_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->discard,
+ fd, offset, len, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND (discard, frame, -1, op_errno, NULL,
+ NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+up_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = UP_WRITE_FLAGS;
+ upcall_cache_invalidate (frame, this, client, local->inode, flags,
+ post, NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND (zerofill, frame, op_ret, op_errno, pre,
+ post, xdata);
+
+ return 0;
+}
+
+int
+up_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, off_t len, dict_t *xdata)
+{
+ int32_t op_errno = -1;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ local = upcall_local_init (frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+out:
+ STACK_WIND (frame, up_zerofill_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->zerofill,
+ fd, offset, len, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL,
+ NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+up_seek_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, off_t offset, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = UP_UPDATE_CLIENT;
+ upcall_cache_invalidate (frame, this, client, local->inode, flags,
+ NULL, NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND (seek, frame, op_ret, op_errno, offset, xdata);
+
+ return 0;
+}
+
+
+int32_t
+up_seek (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata)
+{
+ int32_t op_errno = -1;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ local = upcall_local_init (frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+out:
+ STACK_WIND (frame, up_seek_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->seek, fd, offset, what, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND (seek, frame, -1, op_errno, 0, NULL);
+
+ return 0;
+}
+
+
+int32_t
+up_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+ int ret = 0;
+ struct iatt stbuf = {0, };
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+
+ flags = UP_XATTR;
+ /* Remove the virtual xattrs from the dict */
+ ret = dict_foreach (local->xattr, up_filter_virtual_xattr, NULL);
+ if (ret < 0) {
+ op_ret = ret;
+ goto out;
+ }
+
+ ret = syncop_stat (FIRST_CHILD(frame->this), &local->loc, &stbuf,
+ NULL, NULL);
+ if (ret == 0)
+ flags |= UP_TIMES;
+
+ upcall_cache_invalidate (frame, this, client, local->inode, flags,
+ &stbuf, NULL, NULL, local->xattr);
+
+out:
+ UPCALL_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+
+int32_t
+up_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ int32_t op_errno = -1;
+ upcall_local_t *local = NULL;
+ dict_t *xattr = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ xattr = dict_copy_with_ref (dict, NULL);
+ if (!xattr) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local = upcall_local_init (frame, this, loc, NULL, loc->inode, xattr);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+out:
+ STACK_WIND (frame, up_setxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags,
+ xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int32_t
+up_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+ int ret = 0;
+ struct iatt stbuf = {0,};
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+
+ flags = UP_XATTR;
+ /* Remove the virtual xattrs from the dict */
+ ret = dict_foreach (local->xattr, up_filter_virtual_xattr, NULL);
+ if (ret < 0) {
+ op_ret = ret;
+ goto out;
+ }
+
+ ret = syncop_fstat (FIRST_CHILD(frame->this), local->fd, &stbuf, NULL,
+ NULL);
+ if (ret == 0)
+ flags |= UP_TIMES;
+
+ upcall_cache_invalidate (frame, this, client, local->inode, flags,
+ &stbuf, NULL, NULL, local->xattr);
+
+out:
+ UPCALL_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+
+int32_t
+up_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ int32_t op_errno = -1;
+ upcall_local_t *local = NULL;
+ dict_t *xattr = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ xattr = dict_copy_with_ref (dict, NULL);
+ if (!xattr) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local = upcall_local_init (frame, this, NULL, fd, fd->inode, xattr);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+out:
+ STACK_WIND (frame, up_fsetxattr_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsetxattr,
+ fd, dict, flags, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int32_t
+up_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+ struct iatt stbuf = {0,};
+ int ret = 0;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = UP_XATTR_RM;
+
+ ret = syncop_fstat (FIRST_CHILD(frame->this), local->fd, &stbuf, NULL,
+ NULL);
+ if (ret == 0)
+ flags |= UP_TIMES;
+
+ upcall_cache_invalidate (frame, this, client, local->inode, flags,
+ &stbuf, NULL, NULL, local->xattr);
+
+out:
+ UPCALL_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno,
+ xdata);
+ return 0;
+}
+
+
+int32_t
+up_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ int32_t op_errno = -1;
+ upcall_local_t *local = NULL;
+ dict_t *xattr = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ xattr = dict_for_key_value (name, "", 1);
+ if (!xattr) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local = upcall_local_init (frame, this, NULL, fd, fd->inode, xattr);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+out:
+ STACK_WIND (frame, up_fremovexattr_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fremovexattr,
+ fd, name, xdata);
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND (fremovexattr, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int32_t
+up_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+ struct iatt stbuf = {0,};
+ int ret = 0;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = UP_XATTR_RM;
+
+ ret = syncop_stat (FIRST_CHILD(frame->this), &local->loc, &stbuf, NULL,
+ NULL);
+ if (ret == 0)
+ flags |= UP_TIMES;
+
+ upcall_cache_invalidate (frame, this, client, local->inode, flags,
+ &stbuf, NULL, NULL, local->xattr);
+
+out:
+ UPCALL_STACK_UNWIND (removexattr, frame, op_ret, op_errno,
+ xdata);
+ return 0;
+}
+
+
+int32_t
+up_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ int32_t op_errno = -1;
+ upcall_local_t *local = NULL;
+ dict_t *xattr = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ xattr = dict_for_key_value (name, "", 1);
+ if (!xattr) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local = upcall_local_init (frame, this, loc, NULL, loc->inode, xattr);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+out:
+ STACK_WIND (frame, up_removexattr_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->removexattr,
+ loc, name, xdata);
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int32_t
+up_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+
+ flags = UP_UPDATE_CLIENT;
+ upcall_cache_invalidate (frame, this, client, local->inode, flags,
+ NULL, NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno,
+ dict, xdata);
+ return 0;
+}
+
+
+int32_t
+up_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ int32_t op_errno = -1;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ local = upcall_local_init (frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+out:
+ STACK_WIND (frame, up_fgetxattr_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fgetxattr,
+ fd, name, xdata);
+ return 0;
+err:
+ UPCALL_STACK_UNWIND (fgetxattr, frame, -1, op_errno,
+ NULL, NULL);
+ return 0;
+}
+
+
+int32_t
+up_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+
+ flags = UP_UPDATE_CLIENT;
+ upcall_cache_invalidate (frame, this, client, local->inode, flags,
+ NULL, NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND (getxattr, frame, op_ret, op_errno,
+ dict, xdata);
+ return 0;
+}
+
+int32_t
+up_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ int32_t op_errno = -1;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF (this, out);
+
+ local = upcall_local_init (frame, this, NULL, NULL, loc->inode, NULL);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+out:
+ STACK_WIND (frame, up_getxattr_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->getxattr,
+ loc, name, xdata);
+ return 0;
+err:
+ UPCALL_STACK_UNWIND (getxattr, frame, -1, op_errno,
+ NULL, NULL);
+ return 0;
+}
+
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_upcall_mt_end + 1);
+
+ if (ret != 0) {
+ gf_msg ("upcall", GF_LOG_WARNING, 0,
+ UPCALL_MSG_NO_MEMORY,
+ "Memory allocation failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+void
+upcall_local_wipe (xlator_t *this, upcall_local_t *local)
+{
+ if (local) {
+ inode_unref (local->inode);
+ if (local->xattr) {
+ /* There will be 2 refs at this point, hence dict_destroy:
+ * 1. taken by dict_copy_with_ref
+ * 2. taken by upcall_local_init ()
+ */
+ dict_destroy (local->xattr);
+ }
+ loc_wipe (&local->rename_oldloc);
+ loc_wipe (&local->loc);
+ if (local->fd)
+ fd_unref (local->fd);
+ mem_put (local);
+ }
+}
+
+upcall_local_t *
+upcall_local_init (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ inode_t *inode, dict_t *xattr)
+{
+ upcall_local_t *local = NULL;
+
+ local = mem_get0 (THIS->local_pool);
+
+ if (!local)
+ goto out;
+
+ local->inode = inode_ref (inode);
+ if (xattr)
+ local->xattr = dict_ref (xattr);
+
+ /* Shall we get inode_ctx and store it here itself? */
+ local->upcall_inode_ctx = upcall_inode_ctx_get (inode, this);
+
+ if (loc)
+ loc_copy (&local->loc, loc);
+ if (fd)
+ local->fd = fd_ref (fd);
+
+ frame->local = local;
+
+out:
+ return local;
+}
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ upcall_private_t *priv = NULL;
+ int ret = -1;
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ GF_OPTION_RECONF ("cache-invalidation", priv->cache_invalidation_enabled,
+ options, bool, out);
+ GF_OPTION_RECONF ("cache-invalidation-timeout", priv->cache_invalidation_timeout,
+ options, int32, out);
+
+ ret = 0;
+
+ if (priv->cache_invalidation_enabled &&
+ !priv->reaper_init_done) {
+ ret = upcall_reaper_thread_init (this);
+
+ if (ret) {
+ gf_msg ("upcall", GF_LOG_WARNING, 0,
+ UPCALL_MSG_INTERNAL_ERROR,
+ "reaper_thread creation failed (%s)."
+ " Disabling cache_invalidation",
+ strerror(errno));
+ }
+ priv->reaper_init_done = 1;
+ }
+
+out:
+ return ret;
+}
+
+int
+init (xlator_t *this)
+{
+ int ret = -1;
+ upcall_private_t *priv = NULL;
+
+ priv = GF_CALLOC (1, sizeof (*priv),
+ gf_upcall_mt_private_t);
+ if (!priv) {
+ gf_msg ("upcall", GF_LOG_WARNING, 0,
+ UPCALL_MSG_NO_MEMORY,
+ "Memory allocation failed");
+ goto out;
+ }
+
+ GF_OPTION_INIT ("cache-invalidation", priv->cache_invalidation_enabled,
+ bool, out);
+ GF_OPTION_INIT ("cache-invalidation-timeout",
+ priv->cache_invalidation_timeout, int32, out);
+
+ LOCK_INIT (&priv->inode_ctx_lk);
+ INIT_LIST_HEAD (&priv->inode_ctx_list);
+
+ this->private = priv;
+ priv->fini = 0;
+ priv->reaper_init_done = 0;
+
+ this->local_pool = mem_pool_new (upcall_local_t, 512);
+ ret = 0;
+
+ if (priv->cache_invalidation_enabled) {
+ ret = upcall_reaper_thread_init (this);
+
+ if (ret) {
+ gf_msg ("upcall", GF_LOG_WARNING, 0,
+ UPCALL_MSG_INTERNAL_ERROR,
+ "reaper_thread creation failed (%s)."
+ " Disabling cache_invalidation",
+ strerror(errno));
+ }
+ priv->reaper_init_done = 1;
+ }
+out:
+ if (ret) {
+ GF_FREE (priv);
+ }
+
+ return ret;
+}
+
+int
+fini (xlator_t *this)
+{
+ upcall_private_t *priv = NULL;
+
+ priv = this->private;
+ if (!priv) {
+ return 0;
+ }
+ this->private = NULL;
+
+ priv->fini = 1;
+
+ pthread_join (priv->reaper_thr, NULL);
+
+ LOCK_DESTROY (&priv->inode_ctx_lk);
+
+ /* Do we need to cleanup the inode_ctxs? IMO not required
+ * as inode_forget would have been done on all the inodes
+ * before calling xlator_fini */
+ GF_FREE (priv);
+
+ return 0;
+}
+
+int
+upcall_forget (xlator_t *this, inode_t *inode)
+{
+ upcall_cleanup_inode_ctx (this, inode);
+ return 0;
+}
+
+int
+upcall_release (xlator_t *this, fd_t *fd)
+{
+ return 0;
+}
+
+int
+notify (xlator_t *this, int32_t event, void *data, ...)
+{
+ int ret = -1;
+ int32_t val = 0;
+ struct gf_upcall *up_req = NULL;
+
+ switch (event) {
+ case GF_EVENT_UPCALL:
+ {
+ gf_log (this->name, GF_LOG_DEBUG, "Upcall Notify event = %d",
+ event);
+
+ up_req = (struct gf_upcall *) data;
+
+ GF_VALIDATE_OR_GOTO(this->name, up_req, out);
+
+ ret = default_notify (this, event, up_req);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ UPCALL_MSG_NOTIFY_FAILED,
+ "Failed to notify cache invalidation"
+ " to client(%s)",
+ up_req->client_uid);
+ goto out;
+ }
+ }
+ break;
+ default:
+ default_notify (this, event, data);
+ break;
+ }
+ ret = 0;
+
+out:
+ return ret;
+}
+
+struct xlator_fops fops = {
+ /* fops which change only "ATIME" do not result
+ * in any cache invalidation. Hence upcall
+ * notifications are not sent in this case.
+ * But however, we need to store/update the
+ * client info in the upcall state to be able
+ * to notify them incase of any changes done
+ * to the data.
+ *
+ * Below such fops do not trigger upcall
+ * notifications but will add/update
+ * clients info in the upcall inode ctx.*/
+ .lookup = up_lookup,
+ .open = up_open,
+ .statfs = up_statfs,
+ .opendir = up_opendir,
+ .readdir = up_readdir,
+ .readdirp = up_readdirp,
+ .stat = up_stat,
+ .fstat = up_fstat,
+ .access = up_access,
+ .readlink = up_readlink,
+ .readv = up_readv,
+ .lk = up_lk,
+ .seek = up_seek,
+
+ /* fops doing write */
+ .truncate = up_truncate,
+ .ftruncate = up_ftruncate,
+ .writev = up_writev,
+ .zerofill = up_zerofill,
+ .fallocate = up_fallocate,
+ .discard = up_discard,
+
+ /* fops changing attributes */
+ .fsetattr = up_fsetattr,
+ .setattr = up_setattr,
+
+ /* fops affecting parent dirent */
+ .mknod = up_mknod,
+ .create = up_create,
+ .symlink = up_symlink,
+ .mkdir = up_mkdir,
+
+ /* fops affecting both file and parent
+ * cache entries */
+ .unlink = up_unlink,
+ .link = up_link,
+ .rmdir = up_rmdir,
+ .rename = up_rename,
+
+ .setxattr = up_setxattr,
+ .fsetxattr = up_fsetxattr,
+ .getxattr = up_getxattr,
+ .fgetxattr = up_fgetxattr,
+ .fremovexattr = up_fremovexattr,
+ .removexattr = up_removexattr,
+
+#ifdef NOT_SUPPORTED
+ /* internal lk fops */
+ .inodelk = up_inodelk,
+ .finodelk = up_finodelk,
+ .entrylk = up_entrylk,
+ .fentrylk = up_fentrylk,
+
+ /* Below fops follow 'WRITE' which
+ * would have already sent upcall
+ * notifications */
+ .flush = up_flush,
+ .fsync = up_fsync,
+ .fsyncdir = up_fsyncdir,
+
+ .xattrop = up_xattrop,
+ .fxattrop = up_fxattrop,
+#endif
+};
+
+struct xlator_cbks cbks = {
+ .forget = upcall_forget,
+ .release = upcall_release,
+};
+
+struct volume_options options[] = {
+ { .key = {"cache-invalidation"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "When \"on\", sends cache-invalidation"
+ " notifications."
+ },
+ { .key = {"cache-invalidation-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = CACHE_INVALIDATION_TIMEOUT,
+ .description = "After 'timeout' seconds since the time"
+ " client accessed any file, cache-invalidation"
+ " notifications are no longer sent to that client."
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/features/upcall/src/upcall.h b/xlators/features/upcall/src/upcall.h
new file mode 100644
index 00000000000..1616825580f
--- /dev/null
+++ b/xlators/features/upcall/src/upcall.h
@@ -0,0 +1,135 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef __UPCALL_H__
+#define __UPCALL_H__
+
+#include "compat-errno.h"
+#include "upcall-mem-types.h"
+#include "client_t.h"
+#include "upcall-messages.h"
+#include "upcall-cache-invalidation.h"
+#include "upcall-utils.h"
+
+#define EXIT_IF_UPCALL_OFF(this, label) do { \
+ if (!is_upcall_enabled(this)) \
+ goto label; \
+} while (0)
+
+#define UPCALL_STACK_UNWIND(fop, frame, params ...) do { \
+ upcall_local_t *__local = NULL; \
+ xlator_t *__xl = NULL; \
+ if (frame) { \
+ __xl = frame->this; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT (fop, frame, params); \
+ upcall_local_wipe (__xl, __local); \
+} while (0)
+
+#define UPCALL_STACK_DESTROY(frame) do { \
+ upcall_local_t *__local = NULL; \
+ xlator_t *__xl = NULL; \
+ __xl = frame->this; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ STACK_DESTROY (frame->root); \
+ upcall_local_wipe (__xl, __local); \
+} while (0)
+
+struct _upcall_private_t {
+ gf_boolean_t cache_invalidation_enabled;
+ int32_t cache_invalidation_timeout;
+ struct list_head inode_ctx_list;
+ gf_lock_t inode_ctx_lk;
+ int32_t reaper_init_done;
+ pthread_t reaper_thr;
+ int32_t fini;
+};
+typedef struct _upcall_private_t upcall_private_t;
+
+struct _upcall_client_t {
+ struct list_head client_list;
+ /* strdup to store client_uid, strdup. Free it explicitly */
+ char *client_uid;
+ time_t access_time; /* time last accessed */
+ /* the amount of time which client can cache this entry */
+ uint32_t expire_time_attr;
+};
+typedef struct _upcall_client_t upcall_client_t;
+
+/* Upcall entries are maintained in inode_ctx */
+struct _upcall_inode_ctx_t {
+ struct list_head inode_ctx_list;
+ struct list_head client_list;
+ pthread_mutex_t client_list_lock; /* mutex for clients list
+ of this upcall entry */
+ int destroy;
+ uuid_t gfid; /* gfid of the entry */
+};
+typedef struct _upcall_inode_ctx_t upcall_inode_ctx_t;
+
+struct upcall_local {
+ /* XXX: need to check if we can store
+ * pointers in 'local' which may get freed
+ * in future by other thread
+ */
+ upcall_inode_ctx_t *upcall_inode_ctx;
+ inode_t *inode;
+ loc_t rename_oldloc;
+ loc_t loc; /* required for stat in *xattr_cbk */
+ fd_t *fd; /* required for fstat in *xattr_cbk */
+ dict_t *xattr;
+};
+typedef struct upcall_local upcall_local_t;
+
+void upcall_local_wipe (xlator_t *this, upcall_local_t *local);
+upcall_local_t *upcall_local_init (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, fd_t *fd, inode_t *inode,
+ dict_t *xattr);
+
+upcall_client_t *add_upcall_client (call_frame_t *frame, client_t *client,
+ upcall_inode_ctx_t *up_inode_ctx);
+upcall_client_t *__add_upcall_client (call_frame_t *frame, client_t *client,
+ upcall_inode_ctx_t *up_inode_ctx);
+upcall_client_t *__get_upcall_client (call_frame_t *frame, client_t *client,
+ upcall_inode_ctx_t *up_inode_ctx);
+int __upcall_cleanup_client_entry (upcall_client_t *up_client);
+int upcall_cleanup_expired_clients (xlator_t *this,
+ upcall_inode_ctx_t *up_inode_ctx);
+
+int __upcall_inode_ctx_set (inode_t *inode, xlator_t *this);
+upcall_inode_ctx_t *__upcall_inode_ctx_get (inode_t *inode, xlator_t *this);
+upcall_inode_ctx_t *upcall_inode_ctx_get (inode_t *inode, xlator_t *this);
+int upcall_cleanup_inode_ctx (xlator_t *this, inode_t *inode);
+void upcall_cache_forget (xlator_t *this, inode_t *inode,
+ upcall_inode_ctx_t *up_inode_ctx);
+
+void *upcall_reaper_thread (void *data);
+int upcall_reaper_thread_init (xlator_t *this);
+
+/* Xlator options */
+gf_boolean_t is_upcall_enabled (xlator_t *this);
+
+/* Cache invalidation specific */
+void upcall_cache_invalidate (call_frame_t *frame, xlator_t *this,
+ client_t *client, inode_t *inode,
+ uint32_t flags, struct iatt *stbuf,
+ struct iatt *p_stbuf,
+ struct iatt *oldp_stbuf, dict_t *xattr);
+void upcall_client_cache_invalidate (xlator_t *xl, uuid_t gfid,
+ upcall_client_t *up_client_entry,
+ uint32_t flags, struct iatt *stbuf,
+ struct iatt *p_stbuf,
+ struct iatt *oldp_stbuf, dict_t *xattr);
+
+int up_filter_virtual_xattr (dict_t *d, char *k, data_t *v, void *tmp);
+
+#endif /* __UPCALL_H__ */
diff --git a/xlators/lib/src/libxlator.c b/xlators/lib/src/libxlator.c
new file mode 100644
index 00000000000..627d74070e6
--- /dev/null
+++ b/xlators/lib/src/libxlator.c
@@ -0,0 +1,512 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "mem-types.h"
+#include "libxlator.h"
+
+
+int marker_xtime_default_gauge[] = {
+ [MCNT_FOUND] = 1,
+ [MCNT_NOTFOUND] = -1,
+ [MCNT_ENODATA] = -1,
+ [MCNT_ENOTCONN] = -1,
+ [MCNT_ENOENT] = -1,
+ [MCNT_EOTHER] = -1,
+};
+
+int marker_uuid_default_gauge[] = {
+ [MCNT_FOUND] = 1,
+ [MCNT_NOTFOUND] = 0,
+ [MCNT_ENODATA] = 0,
+ [MCNT_ENOTCONN] = 0,
+ [MCNT_ENOENT] = 0,
+ [MCNT_EOTHER] = 0,
+};
+
+static int marker_idx_errno_map[] = {
+ [MCNT_FOUND] = EINVAL,
+ [MCNT_NOTFOUND] = EINVAL,
+ [MCNT_ENOENT] = ENOENT,
+ [MCNT_ENOTCONN] = ENOTCONN,
+ [MCNT_ENODATA] = ENODATA,
+ [MCNT_EOTHER] = EINVAL,
+ [MCNT_MAX] = 0,
+};
+
+/*Copy the contents of oldtimebuf to newtimbuf*/
+static void
+update_timebuf (uint32_t *oldtimbuf, uint32_t *newtimebuf)
+{
+ newtimebuf[0] = (oldtimbuf[0]);
+ newtimebuf[1] = (oldtimbuf[1]);
+}
+
+/* Convert Timebuf in network order to host order */
+static void
+get_hosttime (uint32_t *oldtimbuf, uint32_t *newtimebuf)
+{
+ newtimebuf[0] = ntohl (oldtimbuf[0]);
+ newtimebuf[1] = ntohl (oldtimbuf[1]);
+}
+
+
+
+/* Match the Incoming trusted.glusterfs.<uuid>.xtime against volume uuid */
+int
+match_uuid_local (const char *name, char *uuid)
+{
+ if (!uuid || !*uuid)
+ return -1;
+
+ name = strtail ((char *)name, MARKER_XATTR_PREFIX);
+ if (!name || name++[0] != '.')
+ return -1;
+
+ name = strtail ((char *)name, uuid);
+ if (!name || strcmp (name, ".xtime") != 0)
+ return -1;
+
+ return 0;
+}
+
+static void
+marker_local_incr_errcount (xl_marker_local_t *local, int op_errno)
+{
+ marker_result_idx_t i = -1;
+
+ if (!local)
+ return;
+
+ switch (op_errno) {
+ case ENODATA:
+ i = MCNT_ENODATA;
+ break;
+ case ENOENT:
+ i = MCNT_ENOENT;
+ break;
+ case ENOTCONN:
+ i = MCNT_ENOTCONN;
+ break;
+ default:
+ i = MCNT_EOTHER;
+ break;
+ }
+
+ local->count[i]++;
+}
+
+static int
+evaluate_marker_results (int *gauge, int *count)
+{
+ int i = 0;
+ int op_errno = 0;
+ gf_boolean_t sane = _gf_true;
+
+ /* check if the policy of the gauge is violated;
+ * if yes, try to get the best errno, ie. look
+ * for the first position where there is a more
+ * specific kind of vioilation than the generic EINVAL
+ */
+ for (i = 0; i < MCNT_MAX; i++) {
+ if (sane) {
+ if ((gauge[i] > 0 && count[i] < gauge[i]) ||
+ (gauge[i] < 0 && count[i] >= -gauge[i])) {
+ sane = _gf_false;
+ /* generic action: adopt corresponding errno */
+ op_errno = marker_idx_errno_map[i];
+ }
+ } else {
+ /* already insane; trying to get a more informative
+ * errno by checking subsequent counters
+ */
+ if (count[i] > 0)
+ op_errno = marker_idx_errno_map[i];
+ }
+ if (op_errno && op_errno != EINVAL)
+ break;
+ }
+
+ return op_errno;
+}
+
+static void
+cluster_marker_unwind (call_frame_t *frame, char *key, void *value, size_t size,
+ dict_t *dict)
+{
+ xl_marker_local_t *local = frame->local;
+ int ret = 0;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+ gf_boolean_t unref = _gf_false;
+
+ frame->local = local->xl_local;
+
+ if (local->count[MCNT_FOUND]) {
+ if (!dict) {
+ dict = dict_new();
+ if (dict) {
+ unref = _gf_true;
+ } else {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+ }
+
+ ret = dict_set_static_bin (dict, key, value, size);
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+ }
+
+ op_errno = evaluate_marker_results (local->gauge, local->count);
+ if (op_errno)
+ op_ret = -1;
+
+out:
+ if (local->xl_specf_unwind) {
+ local->xl_specf_unwind (frame, op_ret,
+ op_errno, dict, NULL);
+ } else {
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno,
+ dict, NULL);
+ }
+
+ GF_FREE (local);
+ if (unref)
+ dict_unref (dict);
+
+}
+
+/* Aggregate all the <volid>.xtime attrs of the cluster and send the max*/
+int32_t
+cluster_markerxtime_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
+
+{
+
+ int32_t callcnt = 0;
+ uint32_t *net_timebuf = NULL;
+ uint32_t host_timebuf[2] = {0,};
+ char marker_xattr[128] = {0};
+ xl_marker_local_t *local = NULL;
+
+ local = frame->local;
+
+ snprintf (marker_xattr, sizeof (marker_xattr), "%s.%s.%s",
+ MARKER_XATTR_PREFIX, local->vol_uuid, XTIME);
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret) {
+ marker_local_incr_errcount (local, op_errno);
+ goto unlock;
+ }
+
+ if (dict_get_ptr (dict, marker_xattr, (void **)&net_timebuf)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unable to get <uuid>.xtime attr");
+ local->count[MCNT_NOTFOUND]++;
+ goto unlock;
+ }
+
+ if (local->count[MCNT_FOUND]) {
+ get_hosttime (net_timebuf, host_timebuf);
+ if ( (host_timebuf[0]>local->host_timebuf[0]) ||
+ (host_timebuf[0] == local->host_timebuf[0] &&
+ host_timebuf[1] >= local->host_timebuf[1])) {
+ update_timebuf (net_timebuf, local->net_timebuf);
+ update_timebuf (host_timebuf, local->host_timebuf);
+ }
+
+ } else {
+ get_hosttime (net_timebuf, local->host_timebuf);
+ update_timebuf (net_timebuf, local->net_timebuf);
+ local->count[MCNT_FOUND]++;
+ }
+
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (callcnt == 0)
+ cluster_marker_unwind (frame, marker_xattr, local->net_timebuf,
+ 8, dict);
+
+ return 0;
+
+}
+
+int32_t
+cluster_markeruuid_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
+{
+ int32_t callcnt = 0;
+ struct volume_mark *volmark = NULL;
+ xl_marker_local_t *local = NULL;
+ int32_t ret = -1;
+ char *vol_uuid = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+ vol_uuid = local->vol_uuid;
+
+ if (op_ret) {
+ marker_local_incr_errcount (local, op_errno);
+ goto unlock;
+ }
+
+ ret = dict_get_bin (dict, GF_XATTR_MARKER_KEY,
+ (void *)&volmark);
+ if (ret)
+ goto unlock;
+
+ if (local->count[MCNT_FOUND]) {
+ if ((local->volmark->major != volmark->major) ||
+ (local->volmark->minor != volmark->minor)) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto unlock;
+ }
+
+ if (local->retval) {
+ goto unlock;
+ } else if (volmark->retval) {
+ GF_FREE (local->volmark);
+ local->volmark =
+ memdup (volmark, sizeof (*volmark));
+ local->retval = volmark->retval;
+ } else if ((volmark->sec > local->volmark->sec) ||
+ ((volmark->sec == local->volmark->sec) &&
+ (volmark->usec >= local->volmark->usec))) {
+ GF_FREE (local->volmark);
+ local->volmark =
+ memdup (volmark, sizeof (*volmark));
+ }
+
+ } else {
+ local->volmark = memdup (volmark, sizeof (*volmark));
+ VALIDATE_OR_GOTO (local->volmark, unlock);
+ gf_uuid_unparse (volmark->uuid, vol_uuid);
+ if (volmark->retval)
+ local->retval = volmark->retval;
+ local->count[MCNT_FOUND]++;
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (callcnt == 0)
+ cluster_marker_unwind (frame, GF_XATTR_MARKER_KEY,
+ local->volmark, sizeof (*local->volmark),
+ dict);
+
+ return 0;
+}
+
+int
+gf_get_min_stime (xlator_t *this, dict_t *dst, char *key, data_t *value)
+{
+ int ret = -1;
+ uint32_t *net_timebuf = NULL;
+ uint32_t *value_timebuf = NULL;
+ uint32_t host_timebuf[2] = {0,};
+ uint32_t host_value_timebuf[2] = {0,};
+
+ /* stime should be minimum of all the other nodes */
+ ret = dict_get_bin (dst, key, (void **)&net_timebuf);
+ if (ret < 0) {
+ net_timebuf = GF_CALLOC (1, sizeof (int64_t),
+ gf_common_mt_char);
+ if (!net_timebuf)
+ goto out;
+
+ ret = dict_set_bin (dst, key, net_timebuf, sizeof (int64_t));
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "key=%s: dict set failed", key);
+ goto error;
+ }
+ }
+
+ value_timebuf = data_to_bin (value);
+ if (!value_timebuf) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "key=%s: getting value of stime failed", key);
+ ret = -1;
+ goto out;
+ }
+
+ get_hosttime (value_timebuf, host_value_timebuf);
+ get_hosttime (net_timebuf, host_timebuf);
+
+ /* can't use 'min()' macro here as we need to compare two fields
+ in the array, selectively */
+ if ((host_value_timebuf[0] < host_timebuf[0]) ||
+ ((host_value_timebuf[0] == host_timebuf[0]) &&
+ (host_value_timebuf[1] < host_timebuf[1]))) {
+ update_timebuf (value_timebuf, net_timebuf);
+ }
+
+ ret = 0;
+out:
+ return ret;
+error:
+ /* To be used only when net_timebuf is not set in the dict */
+ if (net_timebuf)
+ GF_FREE (net_timebuf);
+
+ return ret;
+}
+
+int
+gf_get_max_stime (xlator_t *this, dict_t *dst, char *key, data_t *value)
+{
+ int ret = -ENOMEM;
+ uint32_t *net_timebuf = NULL;
+ uint32_t *value_timebuf = NULL;
+ uint32_t host_timebuf[2] = {0,};
+ uint32_t host_value_timebuf[2] = {0,};
+
+ /* stime should be maximum of all the other nodes */
+ ret = dict_get_bin (dst, key, (void **)&net_timebuf);
+ if (ret < 0) {
+ net_timebuf = GF_CALLOC (1, sizeof (int64_t),
+ gf_common_mt_char);
+ if (!net_timebuf)
+ goto out;
+
+ ret = dict_set_bin (dst, key, net_timebuf, sizeof (int64_t));
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "key=%s: dict set failed", key);
+ goto error;
+ }
+ }
+
+ value_timebuf = data_to_bin (value);
+ if (!value_timebuf) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "key=%s: getting value of stime failed", key);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ get_hosttime (value_timebuf, host_value_timebuf);
+ get_hosttime (net_timebuf, host_timebuf);
+
+ /* can't use 'max()' macro here as we need to compare two fields
+ in the array, selectively */
+ if ((host_value_timebuf[0] > host_timebuf[0]) ||
+ ((host_value_timebuf[0] == host_timebuf[0]) &&
+ (host_value_timebuf[1] > host_timebuf[1]))) {
+ update_timebuf (value_timebuf, net_timebuf);
+ }
+
+ ret = 0;
+out:
+ return ret;
+error:
+ /* To be used only when net_timebuf is not set in the dict */
+ if (net_timebuf)
+ GF_FREE (net_timebuf);
+
+ return ret;
+}
+
+static int
+_get_children_count (xlator_t *xl)
+{
+ int i = 0;
+ xlator_list_t *trav = NULL;
+ for (i = 0, trav = xl->children; trav ; trav = trav->next, i++) {
+ /*'i' will have the value */
+ }
+
+ return i;
+}
+
+int
+cluster_handle_marker_getxattr (call_frame_t *frame, loc_t *loc,
+ const char *name, char *vol_uuid,
+ xlator_specf_unwind_t unwind,
+ int (*populate_args) (call_frame_t *frame,
+ int type, int *gauge,
+ xlator_t **subvols))
+{
+ xlator_t *this = frame->this;
+ xlator_t **subvols = NULL;
+ int num_subvols = 0;
+ int type = 0;
+ int i = 0;
+ int gauge[MCNT_MAX] = {0};
+ xl_marker_local_t *local = NULL;
+
+ if (GF_CLIENT_PID_GSYNCD != frame->root->pid)
+ return -EINVAL;
+
+ if (name == NULL)
+ return -EINVAL;
+
+ if (strcmp (GF_XATTR_MARKER_KEY, name) == 0) {
+ type = MARKER_UUID_TYPE;
+ memcpy (gauge, marker_uuid_default_gauge, sizeof (gauge));
+ } else if (match_uuid_local (name, vol_uuid) == 0) {
+ type = MARKER_XTIME_TYPE;
+ memcpy (gauge, marker_xtime_default_gauge, sizeof (gauge));
+ } else {
+ return -EINVAL;
+ }
+
+ num_subvols = _get_children_count (this);
+ subvols = alloca (num_subvols * sizeof (*subvols));
+ num_subvols = populate_args (frame, type, gauge, subvols);
+
+ local = GF_CALLOC (sizeof (struct marker_str), 1,
+ gf_common_mt_libxl_marker_local);
+
+ if (!local)
+ goto fail;
+
+ local->xl_local = frame->local;
+ local->call_count = num_subvols;
+ local->xl_specf_unwind = unwind;
+ local->vol_uuid = vol_uuid;
+ memcpy (local->gauge, gauge, sizeof (local->gauge));
+
+ frame->local = local;
+
+ for (i = 0; i < num_subvols; i++) {
+ if (MARKER_UUID_TYPE == type)
+ STACK_WIND (frame, cluster_markeruuid_cbk,
+ subvols[i],
+ subvols[i]->fops->getxattr,
+ loc, name, NULL);
+ else if (MARKER_XTIME_TYPE == type)
+ STACK_WIND (frame, cluster_markerxtime_cbk,
+ subvols[i],
+ subvols[i]->fops->getxattr,
+ loc, name, NULL);
+ }
+
+ return 0;
+fail:
+ if (unwind)
+ unwind (frame, -1, ENOMEM, NULL, NULL);
+ else
+ default_getxattr_failure_cbk (frame, ENOMEM);
+ return 0;
+}
diff --git a/xlators/lib/src/libxlator.h b/xlators/lib/src/libxlator.h
new file mode 100644
index 00000000000..53ea404cd73
--- /dev/null
+++ b/xlators/lib/src/libxlator.h
@@ -0,0 +1,149 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _LIBXLATOR_H
+#define _LIBXLATOR_H
+
+
+#include "xlator.h"
+#include "logging.h"
+#include "defaults.h"
+#include "common-utils.h"
+#include "compat.h"
+#include "compat-errno.h"
+
+
+#define MARKER_XATTR_PREFIX "trusted.glusterfs"
+#define XTIME "xtime"
+#define VOLUME_MARK "volume-mark"
+#define GF_XATTR_MARKER_KEY MARKER_XATTR_PREFIX "." VOLUME_MARK
+#define UUID_SIZE 36
+#define MARKER_UUID_TYPE 1
+#define MARKER_XTIME_TYPE 2
+
+typedef int32_t (*xlator_specf_unwind_t) (call_frame_t *frame,
+ int op_ret, int op_errno,
+ dict_t *dict, dict_t *xdata);
+
+
+struct volume_mark {
+ uint8_t major;
+ uint8_t minor;
+ uint8_t uuid[16];
+ uint8_t retval;
+ uint32_t sec;
+ uint32_t usec;
+}__attribute__ ((__packed__));
+
+
+/*
+ * The enumerated type here
+ * is used to index two kind
+ * of integer arrays:
+ * - gauges
+ * - counters
+
+ * A counter is used internally,
+ * in getxattr callbacks, to count
+ * the results, categorized as
+ * the enum names suggest. So values
+ * in the counter are always non-negative.
+
+ * Gauges are part of the API.
+ * The caller passes one to the
+ * top-level aggregator function,
+ * cluster_getmarkerattr(). The gauge
+ * defines an evaluation policy for the
+ * counter. That is, at the
+ * end of the aggregation process
+ * the gauge is matched against the
+ * counter, and the policy
+ * represented by the gauge decides
+ * whether to return with success or failure,
+ * and in latter case, what particular failure
+ * case (errno).
+
+ * The rules are the following: for some index i,
+ * - if gauge[i] == 0, no requirement is set
+ * against counter[i];
+ * - if gauge[i] > 0, counter[i] >= gauge[i]
+ * is required;
+ * - if gauge[i] < 0, counter[i] < |gauge[i]|
+ * is required.
+
+ * If the requirement is not met, then i is mapped
+ * to the respective errno (MCNT_ENOENT -> ENOENT),
+ * or in lack of that, EINVAL.
+
+ * Cf. evaluate_marker_results() and marker_idx_errno_map[]
+ * in libxlator.c
+
+ * We provide two default gauges, one inteded for xtime
+ * aggregation, other for volume mark aggregation. The
+ * policies they represent agree with the hard-coded
+ * one prior to gauges. Cf. marker_xtime_default_gauge
+ * and marker_uuid_default_gauge in libxlator.c
+ */
+
+typedef enum {
+ MCNT_FOUND,
+ MCNT_NOTFOUND,
+ MCNT_ENODATA,
+ MCNT_ENOTCONN,
+ MCNT_ENOENT,
+ MCNT_EOTHER,
+ MCNT_MAX
+} marker_result_idx_t;
+
+extern int marker_xtime_default_gauge[];
+extern int marker_uuid_default_gauge[];
+
+struct marker_str {
+ struct volume_mark *volmark;
+ data_t *data;
+
+ uint32_t host_timebuf[2];
+ uint32_t net_timebuf[2];
+ int32_t call_count;
+ int gauge[MCNT_MAX];
+ int count[MCNT_MAX];
+
+ xlator_specf_unwind_t xl_specf_unwind;
+ void *xl_local;
+ char *vol_uuid;
+ uint8_t retval;
+};
+
+typedef struct marker_str xl_marker_local_t;
+
+int32_t
+cluster_markerxtime_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata);
+
+int32_t
+cluster_markeruuid_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata);
+
+int
+cluster_handle_marker_getxattr (call_frame_t *frame, loc_t *loc,
+ const char *name, char *vol_uuid,
+ xlator_specf_unwind_t unwind,
+ int (*populate_args) (call_frame_t *frame,
+ int type, int *gauge,
+ xlator_t **subvols));
+int
+match_uuid_local (const char *name, char *uuid);
+
+int
+gf_get_min_stime (xlator_t *this, dict_t *dst, char *key, data_t *value);
+
+int
+gf_get_max_stime (xlator_t *this, dict_t *dst, char *key, data_t *value);
+
+#endif /* !_LIBXLATOR_H */
diff --git a/xlators/meta/src/Makefile.am b/xlators/meta/src/Makefile.am
index 385ff553f59..df06760b409 100644
--- a/xlators/meta/src/Makefile.am
+++ b/xlators/meta/src/Makefile.am
@@ -1,10 +1,43 @@
-xlator_PROGRAMS = meta.so
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/
+xlator_LTLIBRARIES = meta.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator
-meta_so_SOURCES = meta.c tree.c misc.c view.c
-noinst_HEADERS = meta.h tree.h misc.h view.h
+meta_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles
+meta_la_SOURCES = meta.c meta-helpers.c meta-defaults.c \
+ root-dir.c \
+ graphs-dir.c \
+ frames-file.c \
+ graph-dir.c \
+ active-link.c \
+ xlator-dir.c \
+ top-link.c \
+ logging-dir.c \
+ logfile-link.c \
+ loglevel-file.c \
+ process_uuid-file.c \
+ volfile-file.c \
+ view-dir.c \
+ subvolumes-dir.c \
+ subvolume-link.c \
+ type-file.c \
+ version-file.c \
+ options-dir.c \
+ option-file.c \
+ cmdline-file.c \
+ name-file.c \
+ private-file.c \
+ history-file.c \
+ mallinfo-file.c \
+ meminfo-file.c \
+ measure-file.c \
+ profile-file.c
-CLEANFILES =
+meta_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = meta.h meta-hooks.h meta-mem-types.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/meta/src/active-link.c b/xlators/meta/src/active-link.c
new file mode 100644
index 00000000000..dfa26b695b3
--- /dev/null
+++ b/xlators/meta/src/active-link.c
@@ -0,0 +1,39 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include "meta-mem-types.h"
+#include "meta.h"
+
+
+static int
+active_link_fill (xlator_t *this, inode_t *inode, strfd_t *strfd)
+{
+ strprintf (strfd, "%s", this->ctx->active->graph_uuid);
+
+ return 0;
+}
+
+
+struct meta_ops active_link_ops = {
+ .link_fill = active_link_fill
+};
+
+
+int
+meta_active_link_hook (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ops_set (loc->inode, this, &active_link_ops);
+
+ return 0;
+}
diff --git a/xlators/meta/src/cmdline-file.c b/xlators/meta/src/cmdline-file.c
new file mode 100644
index 00000000000..941b8073f4f
--- /dev/null
+++ b/xlators/meta/src/cmdline-file.c
@@ -0,0 +1,43 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include "strfd.h"
+#include "globals.h"
+#include "lkowner.h"
+
+
+static int
+cmdline_file_fill (xlator_t *this, inode_t *file, strfd_t *strfd)
+{
+ if (this->ctx->cmdlinestr)
+ strprintf (strfd, "{ \n \"Cmdlinestr\": \"%s\"\n}",
+ this->ctx->cmdlinestr);
+ return strfd->size;
+}
+
+
+static struct meta_ops cmdline_file_ops = {
+ .file_fill = cmdline_file_fill,
+};
+
+
+int
+meta_cmdline_file_hook (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ops_set (loc->inode, this, &cmdline_file_ops);
+
+ return 0;
+}
diff --git a/xlators/meta/src/frames-file.c b/xlators/meta/src/frames-file.c
new file mode 100644
index 00000000000..ebac3d9cbaa
--- /dev/null
+++ b/xlators/meta/src/frames-file.c
@@ -0,0 +1,117 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include "strfd.h"
+#include "globals.h"
+#include "lkowner.h"
+
+static int
+frames_file_fill (xlator_t *this, inode_t *file, strfd_t *strfd)
+{
+ struct call_pool *pool = NULL;
+ call_stack_t *stack = NULL;
+ call_frame_t *frame = NULL;
+ int i = 0;
+ int j = 1;
+
+ if (!this || !file || !strfd)
+ return -1;
+
+ pool = this->ctx->pool;
+
+ LOCK (&pool->lock);
+ {
+ strprintf (strfd, "{ \n\t\"Stack\": [\n");
+ list_for_each_entry (stack, &pool->all_frames, all_frames) {
+ strprintf (strfd, "\t {\n");
+ strprintf (strfd, "\t\t\"Number\": %d,\n", ++i);
+ strprintf (strfd, "\t\t\"Frame\": [\n");
+ j = 1;
+ list_for_each_entry (frame, &stack->myframes, frames) {
+ strprintf (strfd, "\t\t {\n");
+ strprintf (strfd, "\t\t\t\"Number\": %d,\n",
+ j++);
+ strprintf (strfd,
+ "\t\t\t\"Xlator\": \"%s\",\n",
+ frame->this->name);
+ if (frame->begin.tv_sec)
+ strprintf (strfd,
+ "\t\t\t\"Creation_time\": %d.%d,\n",
+ (int)frame->begin.tv_sec,
+ (int)frame->begin.tv_usec);
+ strprintf (strfd, " \t\t\t\"Refcount\": %d,\n",
+ frame->ref_count);
+ if (frame->parent)
+ strprintf (strfd, "\t\t\t\"Parent\": \"%s\",\n",
+ frame->parent->this->name);
+ if (frame->wind_from)
+ strprintf (strfd, "\t\t\t\"Wind_from\": \"%s\",\n",
+ frame->wind_from);
+ if (frame->wind_to)
+ strprintf (strfd, "\t\t\t\"Wind_to\": \"%s\",\n",
+ frame->wind_to);
+ if (frame->unwind_from)
+ strprintf (strfd, "\t\t\t\"Unwind_from\": \"%s\",\n",
+ frame->unwind_from);
+ if (frame->unwind_to)
+ strprintf (strfd, "\t\t\t\"Unwind_to\": \"%s\",\n",
+ frame->unwind_to);
+ strprintf (strfd, "\t\t\t\"Complete\": %d\n",
+ frame->complete);
+ if (list_is_last (&frame->frames,
+ &stack->myframes))
+ strprintf (strfd, "\t\t }\n");
+ else
+ strprintf (strfd, "\t\t },\n");
+ }
+ strprintf (strfd, "\t\t],\n");
+ strprintf (strfd, "\t\t\"Unique\": %"PRId64",\n",
+ stack->unique);
+ strprintf (strfd, "\t\t\"Type\": \"%s\",\n",
+ gf_fop_list[stack->op]);
+ strprintf (strfd, "\t\t\"UID\": %d,\n",
+ stack->uid);
+ strprintf (strfd, "\t\t\"GID\": %d,\n",
+ stack->gid);
+ strprintf (strfd, "\t\t\"LK_owner\": \"%s\"\n",
+ lkowner_utoa (&stack->lk_owner));
+ if (i == (int)pool->cnt)
+ strprintf (strfd, "\t }\n");
+ else
+ strprintf (strfd, "\t },\n");
+ }
+ strprintf (strfd, "\t],\n");
+ strprintf (strfd, "\t\"Call_Count\": %d\n",
+ (int)pool->cnt);
+ strprintf (strfd, "}");
+ }
+ UNLOCK (&pool->lock);
+
+ return strfd->size;
+}
+
+
+static struct meta_ops frames_file_ops = {
+ .file_fill = frames_file_fill,
+};
+
+
+int
+meta_frames_file_hook (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ops_set (loc->inode, this, &frames_file_ops);
+ return 0;
+}
diff --git a/xlators/meta/src/graph-dir.c b/xlators/meta/src/graph-dir.c
new file mode 100644
index 00000000000..541e806ddb5
--- /dev/null
+++ b/xlators/meta/src/graph-dir.c
@@ -0,0 +1,101 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include "meta-hooks.h"
+
+
+static struct meta_dirent graph_dir_dirents[] = {
+ DOT_DOTDOT,
+
+ { .name = "top",
+ .type = IA_IFLNK,
+ .hook = meta_top_link_hook,
+ },
+ { .name = "volfile",
+ .type = IA_IFREG,
+ .hook = meta_volfile_file_hook,
+ },
+ { .name = NULL }
+};
+
+
+static int
+graph_dir_fill (xlator_t *this, inode_t *inode, struct meta_dirent **dp)
+{
+ struct meta_dirent *dirents = NULL;
+ glusterfs_graph_t *graph = NULL;
+ int i = 0;
+ int count = 0;
+ xlator_t *xl = NULL;
+
+ graph = meta_ctx_get (inode, this);
+
+ for (xl = graph->first; xl; xl = xl->next)
+ count++;
+
+ dirents = GF_CALLOC (sizeof (*dirents), count, gf_meta_mt_dirents_t);
+ if (!dirents)
+ return -1;
+
+ i = 0;
+ for (xl = graph->first; xl; xl = xl->next) {
+ dirents[i].name = gf_strdup (xl->name);
+ dirents[i].type = IA_IFDIR;
+ dirents[i].hook = meta_xlator_dir_hook;
+ i++;
+ }
+
+ *dp = dirents;
+ return i;
+}
+
+
+struct meta_ops graph_dir_ops = {
+ .fixed_dirents = graph_dir_dirents,
+ .dir_fill = graph_dir_fill,
+};
+
+
+static glusterfs_graph_t *
+glusterfs_graph_lookup (xlator_t *this, const char *graph_uuid)
+{
+ glusterfs_graph_t *graph = NULL;
+ glusterfs_graph_t *tmp = NULL;
+
+ list_for_each_entry (tmp, &this->ctx->graphs, list) {
+ if (strcmp (graph_uuid, tmp->graph_uuid) == 0) {
+ graph = tmp;
+ break;
+ }
+ }
+
+ return graph;
+}
+
+
+int
+meta_graph_dir_hook (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ glusterfs_graph_t *graph = NULL;
+
+ graph = glusterfs_graph_lookup (this, loc->name);
+
+ meta_ops_set (loc->inode, this, &graph_dir_ops);
+
+ meta_ctx_set (loc->inode, this, (void *) graph);
+
+ return 0;
+}
diff --git a/xlators/meta/src/graphs-dir.c b/xlators/meta/src/graphs-dir.c
new file mode 100644
index 00000000000..e5f1319ec26
--- /dev/null
+++ b/xlators/meta/src/graphs-dir.c
@@ -0,0 +1,74 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include "meta-hooks.h"
+
+
+static struct meta_dirent graphs_dir_dirents[] = {
+ DOT_DOTDOT,
+
+ { .name = "active",
+ .type = IA_IFLNK,
+ .hook = meta_active_link_hook,
+ },
+ { .name = NULL }
+};
+
+
+static int
+graphs_dir_fill (xlator_t *this, inode_t *dir, struct meta_dirent **dp)
+{
+ glusterfs_graph_t *graph = NULL;
+ int graphs_count = 0;
+ int i = 0;
+ struct meta_dirent *dirents = NULL;
+
+ list_for_each_entry (graph, &this->ctx->graphs, list) {
+ graphs_count++;
+ }
+
+ dirents = GF_CALLOC (sizeof (*dirents), graphs_count + 3,
+ gf_meta_mt_dirents_t);
+ if (!dirents)
+ return -1;
+
+ i = 0;
+ list_for_each_entry (graph, &this->ctx->graphs, list) {
+ dirents[i].name = gf_strdup (graph->graph_uuid);
+ dirents[i].type = IA_IFDIR;
+ dirents[i].hook = meta_graph_dir_hook;
+ i++;
+ }
+
+ *dp = dirents;
+
+ return i;
+}
+
+
+struct meta_ops graphs_dir_ops = {
+ .fixed_dirents = graphs_dir_dirents,
+ .dir_fill = graphs_dir_fill
+};
+
+
+int
+meta_graphs_dir_hook (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ops_set (loc->inode, this, &graphs_dir_ops);
+
+ return 0;
+}
diff --git a/xlators/meta/src/history-file.c b/xlators/meta/src/history-file.c
new file mode 100644
index 00000000000..eadc9821f83
--- /dev/null
+++ b/xlators/meta/src/history-file.c
@@ -0,0 +1,47 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include "strfd.h"
+#include "statedump.h"
+
+
+static int
+history_file_fill (xlator_t *this, inode_t *file, strfd_t *strfd)
+{
+ xlator_t *xl = NULL;
+
+ xl = meta_ctx_get (file, this);
+
+ gf_proc_dump_xlator_history (xl, strfd);
+
+ return strfd->size;
+}
+
+
+static struct meta_ops history_file_ops = {
+ .file_fill = history_file_fill,
+};
+
+
+int
+meta_history_file_hook (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ops_set (loc->inode, this, &history_file_ops);
+
+ meta_ctx_set (loc->inode, this, meta_ctx_get (loc->parent, this));
+
+ return 0;
+}
diff --git a/xlators/meta/src/logfile-link.c b/xlators/meta/src/logfile-link.c
new file mode 100644
index 00000000000..d7b16b92eae
--- /dev/null
+++ b/xlators/meta/src/logfile-link.c
@@ -0,0 +1,39 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include "meta-mem-types.h"
+#include "meta.h"
+
+
+static int
+logfile_link_fill (xlator_t *this, inode_t *inode, strfd_t *strfd)
+{
+ strprintf (strfd, "%s", this->ctx->log.filename);
+
+ return 0;
+}
+
+
+struct meta_ops logfile_link_ops = {
+ .link_fill = logfile_link_fill
+};
+
+
+int
+meta_logfile_link_hook (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ops_set (loc->inode, this, &logfile_link_ops);
+
+ return 0;
+}
diff --git a/xlators/meta/src/logging-dir.c b/xlators/meta/src/logging-dir.c
new file mode 100644
index 00000000000..cfd0c123308
--- /dev/null
+++ b/xlators/meta/src/logging-dir.c
@@ -0,0 +1,46 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include "meta-hooks.h"
+
+
+static struct meta_dirent logging_dir_dirents[] = {
+ DOT_DOTDOT,
+
+ { .name = "logfile",
+ .type = IA_IFLNK,
+ .hook = meta_logfile_link_hook,
+ },
+ { .name = "loglevel",
+ .type = IA_IFREG,
+ .hook = meta_loglevel_file_hook,
+ },
+ { .name = NULL }
+};
+
+
+struct meta_ops logging_dir_ops = {
+ .fixed_dirents = logging_dir_dirents,
+};
+
+
+int
+meta_logging_dir_hook (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ops_set (loc->inode, this, &logging_dir_ops);
+
+ return 0;
+}
diff --git a/xlators/meta/src/loglevel-file.c b/xlators/meta/src/loglevel-file.c
new file mode 100644
index 00000000000..f9c5a993d73
--- /dev/null
+++ b/xlators/meta/src/loglevel-file.c
@@ -0,0 +1,54 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include "strfd.h"
+
+
+static int
+loglevel_file_fill (xlator_t *this, inode_t *file, strfd_t *strfd)
+{
+ strprintf (strfd, "%d\n", this->ctx->log.loglevel);
+
+ return strfd->size;
+}
+
+
+static int
+loglevel_file_write (xlator_t *this, fd_t *fd, struct iovec *iov, int count)
+{
+ long int level = -1;
+
+ level = strtol (iov[0].iov_base, NULL, 0);
+ if (level >= GF_LOG_NONE && level <= GF_LOG_TRACE)
+ gf_log_set_loglevel (level);
+
+ return iov_length (iov, count);
+}
+
+
+static struct meta_ops loglevel_file_ops = {
+ .file_fill = loglevel_file_fill,
+ .file_write = loglevel_file_write,
+};
+
+
+int
+meta_loglevel_file_hook (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ops_set (loc->inode, this, &loglevel_file_ops);
+
+ return 0;
+}
diff --git a/xlators/meta/src/mallinfo-file.c b/xlators/meta/src/mallinfo-file.c
new file mode 100644
index 00000000000..a1aec25e3a6
--- /dev/null
+++ b/xlators/meta/src/mallinfo-file.c
@@ -0,0 +1,39 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include "statedump.h"
+
+
+static int
+mallinfo_file_fill (xlator_t *this, inode_t *file, strfd_t *strfd)
+{
+ gf_proc_dump_mallinfo (strfd);
+ return strfd->size;
+}
+
+
+static struct meta_ops mallinfo_file_ops = {
+ .file_fill = mallinfo_file_fill,
+};
+
+
+int
+meta_mallinfo_file_hook (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ops_set (loc->inode, this, &mallinfo_file_ops);
+
+ return 0;
+}
diff --git a/xlators/meta/src/measure-file.c b/xlators/meta/src/measure-file.c
new file mode 100644
index 00000000000..7fe9ff390c0
--- /dev/null
+++ b/xlators/meta/src/measure-file.c
@@ -0,0 +1,52 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include "strfd.h"
+
+
+static int
+measure_file_fill (xlator_t *this, inode_t *file, strfd_t *strfd)
+{
+ strprintf (strfd, "%d\n", this->ctx->measure_latency);
+
+ return strfd->size;
+}
+
+
+static int
+measure_file_write (xlator_t *this, fd_t *fd, struct iovec *iov, int count)
+{
+ long int num = -1;
+
+ num = strtol (iov[0].iov_base, NULL, 0);
+ this->ctx->measure_latency = !!num;
+
+ return iov_length (iov, count);
+}
+
+static struct meta_ops measure_file_ops = {
+ .file_fill = measure_file_fill,
+ .file_write = measure_file_write,
+};
+
+
+int
+meta_measure_file_hook (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ops_set (loc->inode, this, &measure_file_ops);
+
+ return 0;
+}
diff --git a/xlators/meta/src/meminfo-file.c b/xlators/meta/src/meminfo-file.c
new file mode 100644
index 00000000000..900976ada3b
--- /dev/null
+++ b/xlators/meta/src/meminfo-file.c
@@ -0,0 +1,47 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include "strfd.h"
+#include "statedump.h"
+
+
+static int
+meminfo_file_fill (xlator_t *this, inode_t *file, strfd_t *strfd)
+{
+ xlator_t *xl = NULL;
+
+ xl = meta_ctx_get (file, this);
+
+ gf_proc_dump_xlator_meminfo (xl, strfd);
+
+ return strfd->size;
+}
+
+
+static struct meta_ops meminfo_file_ops = {
+ .file_fill = meminfo_file_fill,
+};
+
+
+int
+meta_meminfo_file_hook (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ops_set (loc->inode, this, &meminfo_file_ops);
+
+ meta_ctx_set (loc->inode, this, meta_ctx_get (loc->parent, this));
+
+ return 0;
+}
diff --git a/xlators/meta/src/meta-defaults.c b/xlators/meta/src/meta-defaults.c
new file mode 100644
index 00000000000..5a8558291ba
--- /dev/null
+++ b/xlators/meta/src/meta-defaults.c
@@ -0,0 +1,636 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include "meta-mem-types.h"
+#include "meta.h"
+
+#include "compat-errno.h"
+
+int
+meta_default_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ return default_fgetxattr_failure_cbk (frame, EPERM);
+}
+
+int
+meta_default_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ return default_fsetxattr_failure_cbk (frame, EPERM);
+}
+
+int
+meta_default_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ return default_setxattr_failure_cbk (frame, EPERM);
+}
+
+int
+meta_default_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ return default_statfs_failure_cbk (frame, EPERM);
+}
+
+int
+meta_default_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t flags, dict_t *xdata)
+{
+ return default_fsyncdir_failure_cbk (frame, EPERM);
+}
+
+int
+meta_default_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ fd_t *fd, dict_t *xdata)
+{
+ META_STACK_UNWIND (opendir, frame, 0, 0, fd, xdata);
+ return 0;
+}
+
+int
+meta_default_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *xdata)
+{
+ struct iatt iatt = { };
+
+ meta_iatt_fill (&iatt, fd->inode, fd->inode->ia_type);
+
+ META_STACK_UNWIND (fstat, frame, 0, 0, &iatt, xdata);
+
+ return 0;
+}
+
+int
+meta_default_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t flags, dict_t *xdata)
+{
+ return default_fsync_failure_cbk (frame, EPERM);
+}
+
+int
+meta_default_flush (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *xdata)
+{
+ META_STACK_UNWIND (flush, frame, 0, 0, xdata);
+ return 0;
+}
+
+int
+meta_default_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t off,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
+{
+ struct meta_ops *ops = NULL;
+ int ret = 0;
+ struct iatt dummy = { };
+
+ ops = meta_ops_get (fd->inode, this);
+ if (!ops)
+ goto err;
+
+ if (!ops->file_write)
+ goto err;
+
+ ret = ops->file_write (this, fd, vector, count);
+
+ META_STACK_UNWIND (writev, frame, (ret >= 0 ? ret : -1), (ret < 0 ? -ret : 0),
+ &dummy, &dummy, xdata);
+ return 0;
+err:
+ return default_writev_failure_cbk (frame, EPERM);
+}
+
+int
+meta_default_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ meta_fd_t *meta_fd = NULL;
+ struct iovec iov = {};
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ off_t copy_offset = 0;
+ size_t copy_size = 0;
+ struct iatt iatt = {};
+
+
+ meta_fd = meta_fd_get (fd, this);
+ if (!meta_fd)
+ return default_readv_failure_cbk (frame, ENODATA);
+
+ if (!meta_fd->size)
+ meta_file_fill (this, fd);
+
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool, size);
+ if (!iobuf)
+ return default_readv_failure_cbk (frame, ENOMEM);
+
+ iobref = iobref_new ();
+ if (!iobref) {
+ iobuf_unref (iobuf);
+ return default_readv_failure_cbk (frame, ENOMEM);
+ }
+
+ if (iobref_add (iobref, iobuf) != 0) {
+ iobref_unref (iobref);
+ iobuf_unref (iobuf);
+ return default_readv_failure_cbk (frame, ENOMEM);
+ }
+
+ iov.iov_base = iobuf_ptr (iobuf);
+
+ copy_offset = min (meta_fd->size, offset);
+ copy_size = min (size, (meta_fd->size - copy_offset));
+
+ if (copy_size)
+ memcpy (iov.iov_base, meta_fd->data + copy_offset, copy_size);
+ iov.iov_len = copy_size;
+
+ META_STACK_UNWIND (readv, frame, copy_size, 0, &iov, 1, &iatt, iobref, 0);
+
+ return 0;
+}
+
+
+int
+meta_default_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata)
+{
+ dict_t *xdata_rsp = NULL;
+
+ xdata_rsp = meta_direct_io_mode (xdata, frame);
+
+ META_STACK_UNWIND (open, frame, 0, 0, fd, xdata_rsp);
+
+ return 0;
+}
+
+int
+meta_default_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *xdata)
+{
+ return default_create_failure_cbk (frame, EPERM);
+}
+
+int
+meta_default_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ return default_link_failure_cbk (frame, EPERM);
+}
+
+int
+meta_default_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ return default_rename_failure_cbk (frame, EPERM);
+}
+
+int
+meta_default_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ return default_symlink_failure_cbk (frame, EPERM);
+}
+
+int
+meta_default_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
+{
+ return default_rmdir_failure_cbk (frame, EPERM);
+}
+
+int
+meta_default_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
+{
+ return default_unlink_failure_cbk (frame, EPERM);
+}
+
+int
+meta_default_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, mode_t umask, dict_t *xdata)
+{
+ return default_mkdir_failure_cbk (frame, EPERM);
+}
+
+int
+meta_default_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ return default_mknod_failure_cbk (frame, EPERM);
+}
+
+int
+meta_default_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ size_t size, dict_t *xdata)
+{
+ struct meta_ops *ops = NULL;
+ strfd_t *strfd = NULL;
+ struct iatt iatt = { };
+
+ ops = meta_ops_get (loc->inode, this);
+ if (!ops->link_fill) {
+ META_STACK_UNWIND (readlink, frame, -1, EPERM, 0, 0, 0);
+ return 0;
+ }
+
+ strfd = strfd_open ();
+ if (!strfd) {
+ META_STACK_UNWIND (readlink, frame, -1, ENOMEM, 0, 0, 0);
+ return 0;
+ }
+
+ ops->link_fill (this, loc->inode, strfd);
+
+ meta_iatt_fill (&iatt, loc->inode, IA_IFLNK);
+
+ if (strfd->data)
+ META_STACK_UNWIND (readlink, frame, strlen (strfd->data), 0,
+ strfd->data, &iatt, xdata);
+ else
+ META_STACK_UNWIND (readlink, frame, -1, ENODATA, 0, 0, 0);
+
+ strfd_close (strfd);
+
+ return 0;
+}
+
+int
+meta_default_access (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t mask, dict_t *xdata)
+{
+ return default_access_failure_cbk (frame, EPERM);
+}
+
+int
+meta_default_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, dict_t *xdata)
+{
+ struct iatt iatt = { };
+
+ meta_iatt_fill (&iatt, fd->inode, IA_IFREG);
+
+ META_STACK_UNWIND (ftruncate, frame, 0, 0, &iatt, &iatt, xdata);
+
+ return 0;
+}
+
+int
+meta_default_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ return default_getxattr_failure_cbk (frame, EPERM);
+}
+
+int
+meta_default_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ return default_xattrop_failure_cbk (frame, EPERM);
+}
+
+int
+meta_default_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ return default_fxattrop_failure_cbk (frame, EPERM);
+}
+
+int
+meta_default_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ return default_removexattr_failure_cbk (frame, EPERM);
+}
+
+int
+meta_default_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ return default_fremovexattr_failure_cbk (frame, EPERM);
+}
+
+int
+meta_default_lk (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+{
+ return default_lk_failure_cbk (frame, EPERM);
+}
+
+
+int
+meta_default_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ return default_inodelk_failure_cbk (frame, EPERM);
+}
+
+int
+meta_default_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, int32_t cmd, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ return default_finodelk_failure_cbk (frame, EPERM);
+}
+
+int
+meta_default_entrylk (call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
+{
+ return default_entrylk_failure_cbk (frame, EPERM);
+}
+
+int
+meta_default_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
+{
+ return default_fentrylk_failure_cbk (frame, EPERM);
+}
+
+int
+meta_default_rchecksum (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, int32_t len, dict_t *xdata)
+{
+ return default_rchecksum_failure_cbk (frame, EPERM);
+}
+
+
+int
+meta_default_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t off, dict_t *xdata)
+{
+ meta_fd_t *meta_fd = NULL;
+ int i = 0;
+ gf_dirent_t head;
+ gf_dirent_t *list = NULL;
+ int ret = 0;
+ int this_size = 0;
+ int filled_size = 0;
+ int fixed_size = 0;
+ int dyn_size = 0;
+ struct meta_dirent *fixed_dirents = NULL;
+ struct meta_dirent *dyn_dirents = NULL;
+ struct meta_dirent *dirents = NULL;
+ struct meta_dirent *end = NULL;
+ struct meta_ops *ops = NULL;
+
+ INIT_LIST_HEAD (&head.list);
+
+ ops = meta_ops_get (fd->inode, this);
+ if (!ops)
+ goto err;
+
+ meta_fd = meta_fd_get (fd, this);
+ if (!meta_fd)
+ goto err;
+
+ meta_dir_fill (this, fd);
+
+ fixed_dirents = ops->fixed_dirents;
+ fixed_size = fixed_dirents_len (fixed_dirents);
+
+ dyn_dirents = meta_fd->dirents;
+ dyn_size = meta_fd->size;
+
+ for (i = off; i < (fixed_size + dyn_size);) {
+ if (i >= fixed_size) {
+ dirents = dyn_dirents + (i - fixed_size);
+ end = dyn_dirents + dyn_size;
+ } else {
+ dirents = fixed_dirents + i;
+ end = fixed_dirents + fixed_size;
+ }
+
+ while (dirents < end) {
+ this_size = sizeof (gf_dirent_t) +
+ strlen (dirents->name) + 1;
+ if (this_size + filled_size > size)
+ goto unwind;
+
+ list = gf_dirent_for_name (dirents->name);
+ if (!list)
+ break;
+
+ list->d_off = i + 1;
+ list->d_ino = i + 42;
+ switch (dirents->type) {
+ case IA_IFDIR: list->d_type = DT_DIR; break;
+ case IA_IFCHR: list->d_type = DT_CHR; break;
+ case IA_IFBLK: list->d_type = DT_BLK; break;
+ case IA_IFIFO: list->d_type = DT_FIFO; break;
+ case IA_IFLNK: list->d_type = DT_LNK; break;
+ case IA_IFREG: list->d_type = DT_REG; break;
+ case IA_IFSOCK: list->d_type = DT_SOCK; break;
+ case IA_INVAL: list->d_type = DT_UNKNOWN; break;
+ }
+
+ list_add_tail (&list->list, &head.list);
+ ret++; i++; dirents++;
+ filled_size += this_size;
+ }
+ }
+
+unwind:
+ META_STACK_UNWIND (readdir, frame, ret, 0, &head, xdata);
+
+ gf_dirent_free (&head);
+
+ return 0;
+err:
+ META_STACK_UNWIND (readdir, frame, -1, ENOMEM, 0, 0);
+ return 0;
+}
+
+
+int
+meta_default_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t off, dict_t *xdata)
+{
+ return meta_default_readdir (frame, this, fd, size, off, xdata);
+}
+
+int
+meta_default_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid,
+ dict_t *xdata)
+{
+ return default_setattr_failure_cbk (frame, EPERM);
+}
+
+int
+meta_default_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ off_t offset, dict_t *xdata)
+{
+ struct iatt iatt = { };
+
+ meta_iatt_fill (&iatt, loc->inode, IA_IFREG);
+
+ META_STACK_UNWIND (truncate, frame, 0, 0, &iatt, &iatt, xdata);
+
+ return 0;
+}
+
+int
+meta_default_stat (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ struct iatt iatt = { };
+
+ meta_iatt_fill (&iatt, loc->inode, loc->inode->ia_type);
+
+ META_STACK_UNWIND (stat, frame, 0, 0, &iatt, xdata);
+
+ return 0;
+}
+
+int
+meta_default_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ struct meta_ops *ops = NULL;
+ struct meta_dirent *dirent = NULL;
+ struct meta_dirent *dp = NULL;
+ int i = 0;
+ int ret = 0;
+
+ if (!loc->name)
+ return meta_inode_discover (frame, this, loc, xdata);
+
+ ops = meta_ops_get (loc->parent, this);
+ if (!ops)
+ return default_lookup_failure_cbk (frame, EPERM);
+
+ for (dirent = ops->fixed_dirents; dirent && dirent->name; dirent++) {
+ if (strcmp (dirent->name, loc->name) == 0)
+ goto hook;
+ }
+
+ dirent = NULL;
+ if (ops->dir_fill)
+ ret = ops->dir_fill (this, loc->parent, &dp);
+
+ for (i = 0; i < ret; i++) {
+ if (strcmp (dp[i].name, loc->name) == 0) {
+ dirent = &dp[i];
+ goto hook;
+ }
+ }
+hook:
+ if (dirent && dirent->hook) {
+ struct iatt parent = { };
+ struct iatt iatt = { };
+
+ dirent->hook (frame, this, loc, xdata);
+
+ meta_iatt_fill (&iatt, loc->inode, dirent->type);
+
+ META_STACK_UNWIND (lookup, frame, 0, 0, loc->inode, &iatt,
+ xdata, &parent);
+ } else {
+ META_STACK_UNWIND (lookup, frame, -1, ENOENT, 0, 0, 0, 0);
+ }
+
+ for (i = 0; i < ret; i++)
+ GF_FREE ((void *)dp[i].name);
+ GF_FREE (dp);
+
+ return 0;
+}
+
+int
+meta_default_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ return default_fsetattr_failure_cbk (frame, EPERM);
+}
+
+int
+meta_default_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t keep_size, off_t offset, size_t len,
+ dict_t *xdata)
+{
+ return default_fallocate_failure_cbk (frame, EPERM);
+}
+
+int
+meta_default_discard (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ return default_discard_failure_cbk (frame, EPERM);
+}
+
+int
+meta_default_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, off_t len, dict_t *xdata)
+{
+ return default_zerofill_failure_cbk (frame, EPERM);
+}
+
+#define SET_META_DEFAULT_FOP(f,name) do { if (!f->name) f->name = meta_default_##name ; } while (0)
+
+struct xlator_fops *
+meta_defaults_init (struct xlator_fops *fops)
+{
+ SET_META_DEFAULT_FOP (fops,create);
+ SET_META_DEFAULT_FOP (fops,open);
+ SET_META_DEFAULT_FOP (fops,stat);
+ SET_META_DEFAULT_FOP (fops,readlink);
+ SET_META_DEFAULT_FOP (fops,mknod);
+ SET_META_DEFAULT_FOP (fops,mkdir);
+ SET_META_DEFAULT_FOP (fops,unlink);
+ SET_META_DEFAULT_FOP (fops,rmdir);
+ SET_META_DEFAULT_FOP (fops,symlink);
+ SET_META_DEFAULT_FOP (fops,rename);
+ SET_META_DEFAULT_FOP (fops,link);
+ SET_META_DEFAULT_FOP (fops,truncate);
+ SET_META_DEFAULT_FOP (fops,readv);
+ SET_META_DEFAULT_FOP (fops,writev);
+ SET_META_DEFAULT_FOP (fops,statfs);
+ SET_META_DEFAULT_FOP (fops,flush);
+ SET_META_DEFAULT_FOP (fops,fsync);
+ SET_META_DEFAULT_FOP (fops,setxattr);
+ SET_META_DEFAULT_FOP (fops,getxattr);
+ SET_META_DEFAULT_FOP (fops,fsetxattr);
+ SET_META_DEFAULT_FOP (fops,fgetxattr);
+ SET_META_DEFAULT_FOP (fops,removexattr);
+ SET_META_DEFAULT_FOP (fops,fremovexattr);
+ SET_META_DEFAULT_FOP (fops,opendir);
+ SET_META_DEFAULT_FOP (fops,readdir);
+ SET_META_DEFAULT_FOP (fops,readdirp);
+ SET_META_DEFAULT_FOP (fops,fsyncdir);
+ SET_META_DEFAULT_FOP (fops,access);
+ SET_META_DEFAULT_FOP (fops,ftruncate);
+ SET_META_DEFAULT_FOP (fops,fstat);
+ SET_META_DEFAULT_FOP (fops,lk);
+ SET_META_DEFAULT_FOP (fops,inodelk);
+ SET_META_DEFAULT_FOP (fops,finodelk);
+ SET_META_DEFAULT_FOP (fops,entrylk);
+ SET_META_DEFAULT_FOP (fops,fentrylk);
+ SET_META_DEFAULT_FOP (fops,lookup);
+ SET_META_DEFAULT_FOP (fops,rchecksum);
+ SET_META_DEFAULT_FOP (fops,xattrop);
+ SET_META_DEFAULT_FOP (fops,fxattrop);
+ SET_META_DEFAULT_FOP (fops,setattr);
+ SET_META_DEFAULT_FOP (fops,fsetattr);
+ SET_META_DEFAULT_FOP (fops,fallocate);
+ SET_META_DEFAULT_FOP (fops,discard);
+ SET_META_DEFAULT_FOP (fops,zerofill);
+
+ return fops;
+}
diff --git a/xlators/meta/src/meta-helpers.c b/xlators/meta/src/meta-helpers.c
new file mode 100644
index 00000000000..e681af59bf7
--- /dev/null
+++ b/xlators/meta/src/meta-helpers.c
@@ -0,0 +1,350 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include "meta-mem-types.h"
+#include "meta.h"
+
+
+meta_fd_t *
+meta_fd_get (fd_t *fd, xlator_t *this)
+{
+ uint64_t value = 0;
+ meta_fd_t *meta_fd = NULL;
+
+ LOCK (&fd->lock);
+ {
+ __fd_ctx_get (fd, this, &value);
+ if (!value) {
+ meta_fd = GF_CALLOC (1, sizeof (*meta_fd),
+ gf_meta_mt_fd_t);
+ if (!meta_fd)
+ goto unlock;
+
+ value = (long) meta_fd;
+ __fd_ctx_set (fd, this, value);
+ }
+
+ meta_fd = (void *) value;
+ }
+unlock:
+ UNLOCK (&fd->lock);
+
+ return meta_fd;
+}
+
+
+int
+meta_fd_release (fd_t *fd, xlator_t *this)
+{
+ uint64_t value = 0;
+ meta_fd_t *meta_fd = NULL;
+ int i = 0;
+
+ fd_ctx_get (fd, this, &value);
+ meta_fd = (void *) value;
+
+ if (meta_fd->dirents) {
+ for (i = 0; i < meta_fd->size; i++)
+ GF_FREE ((void *)meta_fd->dirents[i].name);
+ GF_FREE (meta_fd->dirents);
+ }
+
+ if (meta_fd) {
+ GF_FREE (meta_fd->data);
+ GF_FREE (meta_fd);
+ }
+ return 0;
+}
+
+
+struct meta_ops *
+meta_ops_get (inode_t *inode, xlator_t *this)
+{
+ struct meta_ops *ops = NULL;
+ uint64_t value = 0;
+
+ inode_ctx_get2 (inode, this, NULL, &value);
+
+ ops = (void *) value;
+
+ return ops;
+}
+
+
+struct xlator_fops *
+meta_fops_get (inode_t *inode, xlator_t *this)
+{
+ struct meta_ops *ops = NULL;
+
+ ops = meta_ops_get (inode, this);
+ if (!ops)
+ return default_fops;
+
+ return &ops->fops;
+}
+
+
+int
+meta_ops_set (inode_t *inode, xlator_t *this, struct meta_ops *ops)
+{
+ uint64_t value = 0;
+ int ret = 0;
+
+ meta_defaults_init (&ops->fops);
+
+ value = (long) ops;
+
+ ret = inode_ctx_set2 (inode, this, NULL, &value);
+
+ return ret;
+}
+
+void *
+meta_ctx_get (inode_t *inode, xlator_t *this)
+{
+ void *ctx = NULL;
+ uint64_t value = 0;
+
+ inode_ctx_get2 (inode, this, &value, 0);
+
+ ctx = (void *) value;
+
+ return ctx;
+}
+
+
+int
+meta_ctx_set (inode_t *inode, xlator_t *this, void *ctx)
+{
+ uint64_t value = 0;
+ int ret = 0;
+
+ value = (long) ctx;
+
+ ret = inode_ctx_set2 (inode, this, &value, 0);
+
+ return ret;
+}
+
+
+void
+meta_local_cleanup (meta_local_t *local, xlator_t *this)
+{
+ if (!local)
+ return;
+
+ if (local->xdata)
+ dict_unref (local->xdata);
+
+ GF_FREE (local);
+ return;
+}
+
+
+meta_local_t *
+meta_local (call_frame_t *frame)
+{
+ meta_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ local = frame->local = GF_CALLOC (1, sizeof(*local),
+ gf_meta_mt_local_t);
+ return local;
+}
+
+
+dict_t *
+meta_direct_io_mode (dict_t *xdata, call_frame_t *frame)
+{
+ meta_local_t *local = NULL;
+
+ if (!xdata) {
+ local = meta_local (frame);
+ if (!local)
+ return NULL;
+ xdata = local->xdata = dict_new();
+ if (!xdata)
+ return NULL;
+ }
+
+ if (dict_set_int8 (xdata, "direct-io-mode", 1) != 0)
+ return NULL;
+
+ return xdata;
+}
+
+
+static void
+meta_uuid_copy (uuid_t dst, uuid_t src)
+{
+ gf_uuid_copy (dst, src);
+ if (gf_uuid_is_null (dst))
+ gf_uuid_generate (dst);
+}
+
+
+static void
+default_meta_iatt_fill (struct iatt *iatt, inode_t *inode, ia_type_t type)
+{
+ struct timeval tv = { };
+
+ iatt->ia_type = type;
+ switch (type)
+ {
+ case IA_IFDIR:
+ iatt->ia_prot = ia_prot_from_st_mode (0755);
+ iatt->ia_nlink = 2;
+ break;
+ case IA_IFLNK:
+ iatt->ia_prot = ia_prot_from_st_mode (0777);
+ iatt->ia_nlink = 1;
+ break;
+ default:
+ iatt->ia_prot = ia_prot_from_st_mode (0644);
+ iatt->ia_nlink = 1;
+ break;
+ }
+ iatt->ia_uid = 0;
+ iatt->ia_gid = 0;
+ iatt->ia_size = 0;
+
+ meta_uuid_copy (iatt->ia_gfid, inode->gfid);
+ iatt->ia_ino = gfid_to_ino (iatt->ia_gfid);
+
+ gettimeofday (&tv, 0);
+ iatt->ia_mtime = iatt->ia_ctime = iatt->ia_atime = tv.tv_sec;
+ iatt->ia_mtime_nsec = iatt->ia_ctime_nsec = iatt->ia_atime_nsec =
+ (tv.tv_usec * 1000);
+ return;
+}
+
+
+void
+meta_iatt_fill (struct iatt *iatt, inode_t *inode, ia_type_t type)
+{
+ struct meta_ops *ops = NULL;
+
+ ops = meta_ops_get (inode, THIS);
+ if (!ops)
+ return;
+
+ if (!ops->iatt_fill)
+ default_meta_iatt_fill (iatt, inode, type);
+ else
+ ops->iatt_fill (THIS, inode, iatt);
+ return;
+}
+
+
+int
+meta_inode_discover (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ struct iatt iatt = { };
+ struct iatt postparent = { };
+
+ meta_iatt_fill (&iatt, loc->inode, loc->inode->ia_type);
+
+ META_STACK_UNWIND (lookup, frame, 0, 0, loc->inode, &iatt, xdata,
+ &postparent);
+ return 0;
+}
+
+
+int
+meta_file_fill (xlator_t *this, fd_t *fd)
+{
+ meta_fd_t *meta_fd = NULL;
+ strfd_t *strfd = NULL;
+ struct meta_ops *ops = NULL;
+ int ret = 0;
+
+ meta_fd = meta_fd_get (fd, this);
+ if (!meta_fd)
+ return -1;
+
+ if (meta_fd->data)
+ return meta_fd->size;
+
+ strfd = strfd_open ();
+ if (!strfd)
+ return -1;
+
+ ops = meta_ops_get (fd->inode, this);
+ if (!ops) {
+ strfd_close (strfd);
+ return -1;
+ }
+
+ if (ops->file_fill)
+ ret = ops->file_fill (this, fd->inode, strfd);
+
+ if (ret >= 0) {
+ meta_fd->data = strfd->data;
+ meta_fd->size = strfd->size;
+
+ strfd->data = NULL;
+ }
+
+ strfd_close (strfd);
+
+ return meta_fd->size;
+}
+
+
+int
+meta_dir_fill (xlator_t *this, fd_t *fd)
+{
+ meta_fd_t *meta_fd = NULL;
+ struct meta_ops *ops = NULL;
+ struct meta_dirent *dp = NULL;
+ int ret = 0;
+
+ meta_fd = meta_fd_get (fd, this);
+ if (!meta_fd)
+ return -1;
+
+ if (meta_fd->dirents)
+ return meta_fd->size;
+
+ ops = meta_ops_get (fd->inode, this);
+ if (!ops)
+ return -1;
+
+ if (ops->dir_fill)
+ ret = ops->dir_fill (this, fd->inode, &dp);
+
+ if (dp) {
+ meta_fd->dirents = dp;
+ meta_fd->size = ret;
+ }
+
+ return meta_fd->size;
+}
+
+
+int
+fixed_dirents_len (struct meta_dirent *dirents)
+{
+ int i = 0;
+ struct meta_dirent *dirent = NULL;
+
+ if (!dirents)
+ return 0;
+
+ for (dirent = dirents; dirent->name; dirent++)
+ i++;
+
+ return i;
+}
diff --git a/xlators/meta/src/meta-hooks.h b/xlators/meta/src/meta-hooks.h
new file mode 100644
index 00000000000..bcf3643d223
--- /dev/null
+++ b/xlators/meta/src/meta-hooks.h
@@ -0,0 +1,46 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __META_HOOKS_H
+#define __META_HOOKS_H
+#include "xlator.h"
+
+#define DECLARE_HOOK(name) int meta_##name##_hook (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+
+DECLARE_HOOK(root_dir);
+DECLARE_HOOK(graphs_dir);
+DECLARE_HOOK(frames_file);
+DECLARE_HOOK(graph_dir);
+DECLARE_HOOK(active_link);
+DECLARE_HOOK(xlator_dir);
+DECLARE_HOOK(top_link);
+DECLARE_HOOK(logging_dir);
+DECLARE_HOOK(logfile_link);
+DECLARE_HOOK(loglevel_file);
+DECLARE_HOOK(process_uuid_file);
+DECLARE_HOOK(volfile_file);
+DECLARE_HOOK(view_dir);
+DECLARE_HOOK(subvolumes_dir);
+DECLARE_HOOK(subvolume_link);
+DECLARE_HOOK(type_file);
+DECLARE_HOOK(version_file);
+DECLARE_HOOK(options_dir);
+DECLARE_HOOK(option_file);
+DECLARE_HOOK(cmdline_file);
+DECLARE_HOOK(name_file);
+DECLARE_HOOK(private_file);
+DECLARE_HOOK(mallinfo_file);
+DECLARE_HOOK(history_file);
+DECLARE_HOOK(master_dir);
+DECLARE_HOOK(meminfo_file);
+DECLARE_HOOK(measure_file);
+DECLARE_HOOK(profile_file);
+
+#endif
diff --git a/xlators/meta/src/meta-mem-types.h b/xlators/meta/src/meta-mem-types.h
index a9ec9435a4a..e8a31856e71 100644
--- a/xlators/meta/src/meta-mem-types.h
+++ b/xlators/meta/src/meta-mem-types.h
@@ -1,34 +1,25 @@
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __META_MEM_TYPES_H__
#define __META_MEM_TYPES_H__
#include "mem-types.h"
enum gf_meta_mem_types_ {
- gf_meta_mt__open_local = gf_common_mt_end + 1,
- gf_meta_mt_dir_entry_t,
- gf_meta_mt_meta_dirent_t,
- gf_meta_mt_meta_private_t,
- gf_meta_mt_stat,
+ gf_meta_mt_priv_t = gf_common_mt_end + 1,
+ gf_meta_mt_fd_t,
+ gf_meta_mt_fd_data_t,
+ gf_meta_mt_strfd_t,
+ gf_meta_mt_dirents_t,
+ gf_meta_mt_local_t,
gf_meta_mt_end
};
#endif
diff --git a/xlators/meta/src/meta.c b/xlators/meta/src/meta.c
index face2fe70f1..25720136714 100644
--- a/xlators/meta/src/meta.c
+++ b/xlators/meta/src/meta.c
@@ -1,1307 +1,283 @@
/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <unistd.h>
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "dict.h"
#include "xlator.h"
+#include "defaults.h"
-#include "meta.h"
-#include "view.h"
#include "meta-mem-types.h"
+#include "meta.h"
-int32_t
-meta_getattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-int32_t
-meta_getattr (call_frame_t *frame,
- xlator_t *this,
- const char *path)
-{
- meta_private_t *priv = (meta_private_t *) this->private;
- meta_dirent_t *root = priv->tree;
- meta_dirent_t *file = lookup_meta_entry (root, path, NULL);
-
- if (file) {
- if (file->fops && file->fops->getattr) {
- STACK_WIND (frame, meta_getattr_cbk,
- this, file->fops->getattr, path);
- return 0;
- }
- else {
- STACK_UNWIND (frame, 0, 0, file->stbuf);
- return 0;
- }
- }
- else {
- STACK_WIND (frame, meta_getattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getattr,
- path);
- return 0;
- }
-}
-
-int32_t
-meta_chmod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
-}
-
-int32_t
-meta_chmod (call_frame_t *frame,
- xlator_t *this,
- const char *path,
- mode_t mode)
-{
- STACK_WIND (frame,
- meta_chmod_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->chmod,
- path,
- mode);
- return 0;
-}
-
-int32_t
-meta_chown_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
-}
-
-int32_t
-meta_chown (call_frame_t *frame,
- xlator_t *this,
- const char *path,
- uid_t uid,
- gid_t gid)
-{
- STACK_WIND (frame,
- meta_chown_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->chown,
- path,
- uid,
- gid);
- return 0;
-}
-
+#include "meta-hooks.h"
-int32_t
-meta_truncate_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
-}
-int32_t
-meta_truncate (call_frame_t *frame,
- xlator_t *this,
- const char *path,
- off_t offset)
+int
+meta_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- STACK_WIND (frame,
- meta_truncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate,
- path,
- offset);
- return 0;
-}
+ inode_t *inode = NULL;
+ if (META_HOOK (loc) || IS_META_ROOT_GFID (loc->gfid)) {
+ struct iatt iatt = { };
+ struct iatt parent = { };
-int32_t
-meta_ftruncate_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
-}
+ meta_root_dir_hook (frame, this, loc, xdata);
-int32_t
-meta_ftruncate (call_frame_t *frame,
- xlator_t *this,
- dict_t *fd,
- off_t offset)
-{
- STACK_WIND (frame,
- meta_ftruncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate,
- fd,
- offset);
- return 0;
-}
+ meta_iatt_fill (&iatt, loc->inode, IA_IFDIR);
+ gf_uuid_parse (META_ROOT_GFID, iatt.ia_gfid);
+ META_STACK_UNWIND (lookup, frame, 0, 0, loc->inode, &iatt,
+ xdata, &parent);
+ return 0;
+ }
-int32_t
-meta_utimes_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
-}
-
-int32_t
-meta_utimes (call_frame_t *frame,
- xlator_t *this,
- const char *path,
- struct timespec *buf)
-{
- STACK_WIND (frame,
- meta_utimes_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->utimes,
- path,
- buf);
- return 0;
-}
+ if (loc->parent)
+ inode = loc->parent;
+ else
+ inode = loc->inode;
+ META_FOP (inode, lookup, frame, this, loc, xdata);
-int32_t
-meta_access_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
-}
-
-int32_t
-meta_access (call_frame_t *frame,
- xlator_t *this,
- const char *path,
- mode_t mode)
-{
- STACK_WIND (frame,
- meta_access_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->access,
- path,
- mode);
- return 0;
-}
-
-int32_t
-meta_readlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- char *dest)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- dest);
- return 0;
-}
-
-int32_t
-meta_readlink (call_frame_t *frame,
- xlator_t *this,
- const char *path,
- size_t size)
-{
- STACK_WIND (frame,
- meta_readlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readlink,
- path,
- size);
- return 0;
-}
-
-int32_t
-meta_mknod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
-}
-
-int32_t
-meta_mknod (call_frame_t *frame,
- xlator_t *this,
- const char *path,
- mode_t mode,
- dev_t dev)
-{
- STACK_WIND (frame,
- meta_mknod_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mknod,
- path,
- mode,
- dev);
- return 0;
+ return 0;
}
-int32_t
-meta_mkdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
-}
-int32_t
-meta_mkdir (call_frame_t *frame,
- xlator_t *this,
- const char *path,
- mode_t mode)
+int
+meta_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
{
- STACK_WIND (frame,
- meta_mkdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mkdir,
- path,
- mode);
- return 0;
-}
+ META_FOP (fd->inode, opendir, frame, this, loc, fd, xdata);
-int32_t
-meta_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
-}
-
-int32_t
-meta_unlink (call_frame_t *frame,
- xlator_t *this,
- const char *path)
-{
- STACK_WIND (frame,
- meta_unlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink,
- path);
- return 0;
+ return 0;
}
-int32_t
-meta_rmdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
-}
-int32_t
-meta_rmdir (call_frame_t *frame,
- xlator_t *this,
- const char *path)
+int
+meta_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd,
+ dict_t *xdata)
{
- STACK_WIND (frame,
- meta_rmdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir,
- path);
- return 0;
-}
+ META_FOP (fd->inode, open, frame, this, loc, flags, fd, xdata);
-int32_t
-meta_symlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
+ return 0;
}
-int32_t
-meta_symlink (call_frame_t *frame,
- xlator_t *this,
- const char *oldpath,
- const char *newpath)
-{
- STACK_WIND (frame,
- meta_symlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->symlink,
- oldpath,
- newpath);
- return 0;
-}
-int32_t
-meta_rename_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+int
+meta_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
-}
+ META_FOP (fd->inode, readv, frame, this, fd, size, offset, flags, xdata);
-int32_t
-meta_rename (call_frame_t *frame,
- xlator_t *this,
- const char *oldpath,
- const char *newpath)
-{
- STACK_WIND (frame,
- meta_rename_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename,
- oldpath,
- newpath);
- return 0;
+ return 0;
}
-int32_t
-meta_link_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
-}
-int32_t
-meta_link (call_frame_t *frame,
- xlator_t *this,
- const char *oldpath,
- const char *newpath)
+int
+meta_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- STACK_WIND (frame,
- meta_link_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link,
- oldpath,
- newpath);
- return 0;
-}
-
-struct _open_local {
- const char *path;
-};
+ META_FOP (fd->inode, flush, frame, this, fd, xdata);
-int32_t
-meta_open_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *ctx, struct stat *buf)
-{
- struct _open_local *local = frame->local;
- if (local)
- dict_set (ctx, this->name, str_to_data (local->path));
- STACK_UNWIND (frame, op_ret, op_errno, ctx, buf);
- return 0;
-}
-
-int32_t
-meta_open (call_frame_t *frame, xlator_t *this,
- const char *path, int32_t flags, mode_t mode)
-{
- meta_private_t *priv = (meta_private_t *) this->private;
- meta_dirent_t *root = priv->tree;
- meta_dirent_t *file = lookup_meta_entry (root, path, NULL);
-
- if (file) {
- if (file->fops && file->fops->open) {
- struct _open_local *local = GF_CALLOC (1, sizeof (struct _open_local), gf_meta_mt__open_local);
- ERR_ABORT (local);
- local->path = gf_strdup (path);
- frame->local = local;
- STACK_WIND (frame, meta_open_cbk,
- this, file->fops->open,
- path, flags, mode);
- return 0;
- }
- else {
- dict_t *ctx = get_new_dict ();
- dict_ref (ctx);
- dict_set (ctx, this->name, str_to_data (gf_strdup (path)));
- STACK_UNWIND (frame, 0, 0, ctx, file->stbuf);
- return 0;
- }
- }
- else {
- STACK_WIND (frame, meta_open_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->open,
- path, flags, mode);
- return 0;
- }
-}
-
-int32_t
-meta_create (call_frame_t *frame, xlator_t *this,
- const char *path, int32_t flags, mode_t mode)
-{
- meta_private_t *priv = (meta_private_t *) this->private;
- meta_dirent_t *root = priv->tree;
- meta_dirent_t *file = lookup_meta_entry (root, path, NULL);
-
- if (file) {
- if (file->fops && file->fops->create) {
- struct _open_local *local = GF_CALLOC (1, sizeof (struct _open_local), gf_meta_mt__open_local);
- ERR_ABORT (local);
- local->path = gf_strdup (path);
- frame->local = local;
- STACK_WIND (frame, meta_open_cbk,
- this, file->fops->create,
- path, flags, mode);
- return 0;
- }
- else {
- STACK_UNWIND (frame, -1, 0, NULL, NULL);
- return 0;
- }
- }
- else {
- STACK_WIND (frame, meta_open_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->create,
- path, flags, mode);
- return 0;
- }
+ return 0;
}
-int32_t
-meta_readv_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- vector,
- count);
- return 0;
-}
-int32_t
-meta_readv (call_frame_t *frame,
- xlator_t *this,
- dict_t *fd,
- size_t size,
- off_t offset)
+int
+meta_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- meta_private_t *priv = (meta_private_t *) this->private;
- meta_dirent_t *root = priv->tree;
- data_t *path_data = dict_get (fd, this->name);
-
- if (path_data) {
- const char *path = data_to_str (path_data);
- meta_dirent_t *file = lookup_meta_entry (root, path, NULL);
-
- if (file && file->fops && file->fops->readv) {
- STACK_WIND (frame, meta_readv_cbk,
- this, file->fops->readv,
- fd, size, offset);
- return 0;
- }
- }
- else {
- STACK_WIND (frame, meta_readv_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv,
- fd, size, offset);
- return 0;
- }
-}
+ META_FOP (loc->inode, stat, frame, this, loc, xdata);
-int32_t
-meta_writev_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
+ return 0;
}
-int32_t
-meta_writev (call_frame_t *frame, xlator_t *this,
- dict_t *fd,
- struct iovec *vector, int32_t count, off_t offset)
-{
- meta_private_t *priv = (meta_private_t *) this->private;
- meta_dirent_t *root = priv->tree;
- data_t *path_data = dict_get (fd, this->name);
-
- if (path_data) {
- const char *path = data_to_str (path_data);
- meta_dirent_t *file = lookup_meta_entry (root, path, NULL);
-
- if (file && file->fops && file->fops->writev) {
- STACK_WIND (frame, meta_writev_cbk,
- this, file->fops->writev,
- fd, vector, count, offset);
- return 0;
- }
- }
- else {
- STACK_WIND (frame, meta_readv_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev,
- fd, vector, count, offset);
- return 0;
- }
-}
-int32_t
-meta_flush_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+int
+meta_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
-}
+ META_FOP (fd->inode, fstat, frame, this, fd, xdata);
-int32_t
-meta_flush (call_frame_t *frame,
- xlator_t *this,
- dict_t *fd)
-{
- meta_private_t *priv = (meta_private_t *) this->private;
- meta_dirent_t *root = priv->tree;
- data_t *path_data = dict_get (fd, this->name);
-
- if (path_data) {
- const char *path = data_to_str (path_data);
- meta_dirent_t *file = lookup_meta_entry (root, path, NULL);
-
- if (file) {
- if (file->fops && file->fops->flush) {
- STACK_WIND (frame, meta_flush_cbk,
- this, file->fops->flush,
- fd);
- return 0;
- }
- else {
- STACK_UNWIND (frame, 0, 0);
return 0;
- }
- }
- }
- else {
- STACK_WIND (frame, meta_flush_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->flush,
- fd);
- return 0;
- }
-}
-
-int32_t
-meta_release_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
}
-int32_t
-meta_release (call_frame_t *frame,
- xlator_t *this,
- dict_t *fd)
-{
- meta_private_t *priv = (meta_private_t *) this->private;
- meta_dirent_t *root = priv->tree;
- data_t *path_data = dict_get (fd, this->name);
-
- if (path_data) {
- const char *path = data_to_str (path_data);
- meta_dirent_t *file = lookup_meta_entry (root, path, NULL);
-
- if (file) {
- dict_unref (fd);
- STACK_UNWIND (frame, 0, 0);
- return 0;
- }
- }
- else {
- STACK_WIND (frame, meta_release_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->release,
- fd);
- return 0;
- }
-}
-int32_t
-meta_fsync_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+int
+meta_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
-}
+ META_FOP (fd->inode, readdir, frame, this, fd, size, offset, xdata);
-int32_t
-meta_fsync (call_frame_t *frame,
- xlator_t *this,
- dict_t *fd,
- int32_t flags)
-{
- STACK_WIND (frame,
- meta_fsync_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsync,
- fd,
- flags);
- return 0;
+ return 0;
}
-int32_t
-meta_fgetattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
-}
-int32_t
-meta_fgetattr (call_frame_t *frame,
- xlator_t *this,
- dict_t *fd)
+int
+meta_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
{
- STACK_WIND (frame,
- meta_fgetattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fgetattr,
- fd);
- return 0;
-}
+ META_FOP (fd->inode, readdirp, frame, this, fd, size, offset, xdata);
-int32_t
-meta_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *fd)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- fd);
- return 0;
+ return 0;
}
-int32_t
-meta_opendir (call_frame_t *frame,
- xlator_t *this,
- const char *path)
-{
- meta_private_t *priv = (meta_private_t *) this->private;
- meta_dirent_t *root = priv->tree;
- meta_dirent_t *dir = lookup_meta_entry (root, path, NULL);
-
- if (dir) {
- dict_t *ctx = get_new_dict ();
- dict_set (ctx, this->name, str_to_data (gf_strdup (path)));
- STACK_UNWIND (frame, 0, 0, ctx);
- return 0;
- }
- else {
- STACK_WIND (frame, meta_opendir_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->opendir,
- path);
- return 0;
- }
-}
-int32_t
-meta_readdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entries,
- int32_t count)
+int
+meta_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
{
- meta_private_t *priv = (meta_private_t *)this->private;
-
- if ((int) cookie == 1) {
- dir_entry_t *dir = GF_CALLOC (1, sizeof (dir_entry_t),
- gf_meta_mt_dir_entry_t);
- ERR_ABORT (dir);
-
- dir->name = gf_strdup (".meta");
- memcpy (&dir->buf, priv->tree->stbuf, sizeof (struct stat));
- dir->next = entries->next;
- entries->next = dir;
-
- STACK_UNWIND (frame, op_ret, op_errno, entries, count+1);
- return 0;
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, entries, count);
- return 0;
-}
+ META_FOP (loc->inode, readlink, frame, this, loc, size, xdata);
-int32_t
-meta_readdir (call_frame_t *frame,
- xlator_t *this,
- const char *path)
-{
- meta_private_t *priv = (meta_private_t *) this->private;
- meta_dirent_t *root = priv->tree;
-
- meta_dirent_t *dir = lookup_meta_entry (root, path, NULL);
- if (dir) {
- if (dir->fops && dir->fops->readdir) {
- STACK_WIND (frame, meta_readdir_cbk,
- this, dir->fops->readdir, path);
- return 0;
- }
- else {
- int count = 0;
- dir = dir->children;
- dir_entry_t *entries = NULL;
-
- while (dir) {
- dir_entry_t *d = GF_CALLOC (1, sizeof (dir_entry_t),
- gf_meta_mt_dir_entry_t);
- ERR_ABORT (d);
- d->name = dir->name;
- d->buf = *dir->stbuf;
- d->next = entries;
- entries = d;
- count++;
- dir = dir->next;
- }
-
- dir_entry_t *header = GF_CALLOC (1, sizeof (dir_entry_t),
- gf_meta_mt_dir_entry_t);
- ERR_ABORT (header);
- header->next = entries;
- STACK_UNWIND (frame, 0, 0, header, count);
- return 0;
- }
- }
- else {
- if (!strcmp (path, "/")) {
- STACK_WIND_COOKIE (frame, meta_readdir_cbk,
- (int) 1, /* cookie to tell _cbk to add .meta entry */
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdir,
- path);
- }
- else {
- STACK_WIND (frame, meta_readdir_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdir,
- path);
- }
- }
- return 0;
+ return 0;
}
-int32_t
-meta_releasedir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
-}
-int32_t
-meta_releasedir (call_frame_t *frame,
- xlator_t *this,
- dict_t *fd)
+int
+meta_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *iov,
+ int count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
{
- STACK_WIND (frame,
- meta_releasedir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->releasedir,
- fd);
- return 0;
+ META_FOP (fd->inode, writev, frame, this, fd, iov, count, offset, flags,
+ iobref, xdata);
+ return 0;
}
-int32_t
-meta_fsyncdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
-}
-int32_t
-meta_fsyncdir (call_frame_t *frame,
- xlator_t *this,
- dict_t *fd,
- int32_t flags)
+int
+meta_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
- STACK_WIND (frame,
- meta_fsyncdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsyncdir,
- fd,
- flags);
- return 0;
-}
+ META_FOP (loc->inode, truncate, frame, this, loc, offset, xdata);
-int32_t
-meta_statfs_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct statvfs *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
+ return 0;
}
-int32_t
-meta_statfs (call_frame_t *frame,
- xlator_t *this,
- const char *path)
-{
- STACK_WIND (frame,
- meta_statfs_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->statfs,
- path);
- return 0;
-}
-int32_t
-meta_setxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+int
+meta_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
-}
+ META_FOP (fd->inode, ftruncate, frame, this, fd, offset, xdata);
-int32_t
-meta_setxattr (call_frame_t *frame,
- xlator_t *this,
- const char *path,
- const char *name,
- const char *value,
- size_t size,
- int32_t flags)
-{
- STACK_WIND (frame,
- meta_setxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr,
- path,
- name,
- value,
- size,
- flags);
- return 0;
+ return 0;
}
int32_t
-meta_getxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- char *value)
+meta_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- value);
- return 0;
-}
+ META_FOP (fd->inode, fsync, frame, this, fd, flags, xdata);
-int32_t
-meta_getxattr (call_frame_t *frame,
- xlator_t *this,
- const char *path,
- const char *name,
- size_t size)
-{
- STACK_WIND (frame,
- meta_getxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr,
- path,
- name,
- size);
- return 0;
+ return 0;
}
int32_t
-meta_listxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- char *value)
+meta_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- value);
- return 0;
-}
+ META_FOP (fd->inode, fsyncdir, frame, this, fd, flags, xdata);
-int32_t
-meta_listxattr (call_frame_t *frame,
- xlator_t *this,
- const char *path,
- size_t size)
-{
- STACK_WIND (frame,
- meta_listxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->listxattr,
- path,
- size);
- return 0;
+ return 0;
}
-int32_t
-meta_removexattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+int
+meta_forget (xlator_t *this, inode_t *inode)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
+ return 0;
}
-int32_t
-meta_removexattr (call_frame_t *frame,
- xlator_t *this,
- const char *path,
- const char *name)
-{
- STACK_WIND (frame,
- meta_removexattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr,
- path,
- name);
- return 0;
-}
-int32_t
-meta_lk_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct flock *lock)
+int
+meta_release (xlator_t *this, fd_t *fd)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- lock);
- return 0;
+ return meta_fd_release (fd, this);
}
-int32_t
-meta_lk (call_frame_t *frame,
- xlator_t *this,
- dict_t *file,
- int32_t cmd,
- struct flock *lock)
-{
- STACK_WIND (frame,
- meta_lk_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lk,
- file,
- cmd,
- lock);
- return 0;
-}
-static void
-add_xlator_to_tree (meta_dirent_t *tree, xlator_t *this,
- const char *prefix)
+int
+meta_releasedir (xlator_t *this, fd_t *fd)
{
- char *dir;
- gf_asprintf (&dir, "%s/%s", prefix, this->name);
-
- char *children;
- gf_asprintf (&children, "%s/%s", dir, "subvolumes");
-
- char *type;
- gf_asprintf (&type, "%s/%s", dir, "type");
-
- char *view;
- gf_asprintf (&view, "%s/%s", dir, "view");
-
- insert_meta_entry (tree, dir, S_IFDIR, NULL, NULL);
- insert_meta_entry (tree, children, S_IFDIR, NULL, NULL);
- meta_dirent_t *v = insert_meta_entry (tree, view, S_IFDIR, NULL,
- &meta_xlator_view_fops);
- v->view_xlator = this;
- meta_dirent_t *t = insert_meta_entry (tree, type, S_IFREG, NULL,
- &meta_xlator_type_fops);
- t->view_xlator = this;
-
- xlator_list_t *trav = this->children;
- while (trav) {
- add_xlator_to_tree (tree, trav->xlator, children);
- trav = trav->next;
- }
+ return meta_fd_release (fd, this);
}
-static void
-build_meta_tree (xlator_t *this)
-{
- meta_private_t *priv = (meta_private_t *) this->private;
- priv->tree = GF_CALLOC (1, sizeof (meta_dirent_t),
- gf_meta_mt_meta_dirent_t);
- ERR_ABORT (priv->tree);
- priv->tree->name = gf_strdup (".meta");
- priv->tree->stbuf = new_stbuf ();
- priv->tree->stbuf->st_mode = S_IFDIR | S_IRUSR | S_IRGRP | S_IROTH |
- S_IXUSR | S_IXGRP | S_IXOTH;
-
- insert_meta_entry (priv->tree, "/.meta/version",
- S_IFREG, NULL, &meta_version_fops);
-
- insert_meta_entry (priv->tree, "/.meta/xlators",
- S_IFDIR, NULL, NULL);
-
- xlator_list_t *trav = this->children;
- while (trav) {
- add_xlator_to_tree (priv->tree, trav->xlator, "/.meta/xlators");
- trav = trav->next;
- }
-}
-int32_t
+int
mem_acct_init (xlator_t *this)
{
- int ret = -1;
+ int ret = -1;
if (!this)
return ret;
ret = xlator_mem_acct_init (this, gf_meta_mt_end + 1);
-
+
if (ret != 0) {
- gf_log(this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
+ gf_log (this->name, GF_LOG_ERROR,
+ "Memory accounting init failed");
return ret;
}
return ret;
}
-int32_t
+
+int
init (xlator_t *this)
{
- if (this->parent != NULL) {
- gf_log ("meta", GF_LOG_ERROR, "FATAL: meta should be the root of the xlator tree");
- return -1;
- }
-
- meta_private_t *priv = GF_CALLOC (1, sizeof (meta_private_t),
- gf_meta_mt_meta_private_t);
- ERR_ABORT (priv);
-
- data_t *directory = dict_get (this->options, "directory");
- if (directory) {
- priv->directory = gf_strdup (data_to_str (directory));
- }
- else {
- priv->directory = ".meta";
- }
-
- this->private = priv;
- build_meta_tree (this);
-
- return 0;
+ meta_priv_t *priv = NULL;
+
+ priv = GF_CALLOC (sizeof(*priv), 1, gf_meta_mt_priv_t);
+ if (!priv)
+ return -1;
+
+ GF_OPTION_INIT ("meta-dir-name", priv->meta_dir_name, str, out);
+
+ this->private = priv;
+out:
+ return 0;
}
-int32_t
+
+int
fini (xlator_t *this)
{
- return 0;
+ return 0;
}
+
struct xlator_fops fops = {
- .getattr = meta_getattr,
- .readlink = meta_readlink,
- .mknod = meta_mknod,
- .mkdir = meta_mkdir,
- .unlink = meta_unlink,
- .rmdir = meta_rmdir,
- .symlink = meta_symlink,
- .rename = meta_rename,
- .link = meta_link,
- .chmod = meta_chmod,
- .chown = meta_chown,
- .truncate = meta_truncate,
- .utimes = meta_utimes,
- .open = meta_open,
- .readv = meta_readv,
- .writev = meta_writev,
- .statfs = meta_statfs,
- .flush = meta_flush,
- .release = meta_release,
- .fsync = meta_fsync,
- .setxattr = meta_setxattr,
- .getxattr = meta_getxattr,
- .listxattr = meta_listxattr,
- .removexattr = meta_removexattr,
- .opendir = meta_opendir,
- .readdir = meta_readdir,
- .releasedir = meta_releasedir,
- .fsyncdir = meta_fsyncdir,
- .access = meta_access,
- .ftruncate = meta_ftruncate,
- .fgetattr = meta_fgetattr,
- .create = meta_create,
- .lk = meta_lk,
+ .lookup = meta_lookup,
+ .opendir = meta_opendir,
+ .open = meta_open,
+ .readv = meta_readv,
+ .flush = meta_flush,
+ .stat = meta_stat,
+ .fstat = meta_fstat,
+ .readdir = meta_readdir,
+ .readdirp = meta_readdirp,
+ .readlink = meta_readlink,
+ .writev = meta_writev,
+ .truncate = meta_truncate,
+ .ftruncate = meta_ftruncate,
+ .fsync = meta_fsync,
+ .fsyncdir = meta_fsyncdir
+};
+
+
+struct xlator_cbks cbks = {
+ .forget = meta_forget,
+ .release = meta_release,
+ .releasedir = meta_releasedir,
+};
+
+
+struct volume_options options[] = {
+ { .key = {"meta-dir-name"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = DEFAULT_META_DIR_NAME,
+ .description = "Name of default meta directory."
+ },
+ { .key = {NULL} },
};
diff --git a/xlators/meta/src/meta.h b/xlators/meta/src/meta.h
index 7f44162cc10..d9c56c656ad 100644
--- a/xlators/meta/src/meta.h
+++ b/xlators/meta/src/meta.h
@@ -1,48 +1,121 @@
/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __META_H__
#define __META_H__
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-struct _meta_dirent {
- const char *name;
- int type;
- struct _meta_dirent *children;
- struct _meta_dirent *parent;
- struct _meta_dirent *next;
- struct stat *stbuf;
- xlator_t *view_xlator;
- struct xlator_fops *fops;
+#include "strfd.h"
+
+#define DEFAULT_META_DIR_NAME ".meta"
+
+#define META_ROOT_GFID "ba926388-bb9c-4eec-ad60-79dba4cc083a"
+
+#define IS_META_ROOT_GFID(g) (strcmp (uuid_utoa(g), META_ROOT_GFID) == 0)
+
+typedef int (*meta_hook_t) (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
+
+typedef struct {
+ dict_t *xdata;
+} meta_local_t;
+
+typedef struct {
+ char *meta_dir_name;
+} meta_priv_t;
+
+struct meta_dirent {
+ const char *name;
+ ia_type_t type;
+ meta_hook_t hook;
+};
+
+#define DOT_DOTDOT { .name = ".", .type = IA_IFDIR }, { .name = "..", .type = IA_IFDIR }
+
+struct meta_ops {
+ struct meta_dirent *fixed_dirents;
+ int (*dir_fill) (xlator_t *this, inode_t *dir, struct meta_dirent **entries);
+ int (*file_fill) (xlator_t *this, inode_t *file, strfd_t *strfd);
+ int (*iatt_fill) (xlator_t *this, inode_t *inode, struct iatt *iatt);
+ int (*link_fill) (xlator_t *this, inode_t *inode, strfd_t *strfd);
+ int (*file_write) (xlator_t *this, fd_t *fd, struct iovec *iov, int count);
+ struct xlator_fops fops;
+ struct xlator_cbks cbks;
};
-typedef struct _meta_dirent meta_dirent_t;
typedef struct {
- const char *directory;
- meta_dirent_t *tree;
-} meta_private_t;
+ char *data;
+ struct meta_dirent *dirents;
+ size_t size;
+} meta_fd_t;
+
+
+#define COUNT(arr) (sizeof(arr)/sizeof(arr[0]))
+
+#define META_HOOK(loc) (__is_root_gfid (loc->pargfid) && !strcmp (loc->name, META_PRIV(THIS)->meta_dir_name))
+
+#define META_PRIV(t) ((meta_priv_t *)(t->private))
+
+#define META_STACK_UNWIND(fop, frame, params ...) \
+ do { \
+ meta_local_t *__local = NULL; \
+ xlator_t *__this = NULL; \
+ if (frame) { \
+ __local = frame->local; \
+ __this = frame->this; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT (fop, frame, params); \
+ if (__local) { \
+ meta_local_cleanup (__local, __this); \
+ } \
+ } while (0)
+
+
+#define META_FOP(i, fop, fr, t, params ...) { \
+ struct xlator_fops *_fops = NULL; \
+ \
+ _fops = meta_fops_get (i, t); \
+ \
+ _fops->fop (fr, t, params); \
+ } while (0)
+
+
+void meta_iatt_fill (struct iatt *iatt, inode_t *inode, ia_type_t type);
+
+int meta_inode_discover (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata);
+
+int meta_ops_set (inode_t *inode, xlator_t *this, struct meta_ops *ops);
+
+struct xlator_fops *meta_fops_get (inode_t *inode, xlator_t *this);
+struct xlator_cbks *meta_cbks_get (inode_t *inode, xlator_t *this);
+struct meta_ops *meta_ops_get (inode_t *inode, xlator_t *this);
+
+int meta_ctx_set (inode_t *inode, xlator_t *this, void *ctx);
+
+void *meta_ctx_get (inode_t *inode, xlator_t *this);
+
+
+void meta_local_cleanup (meta_local_t *local, xlator_t *this);
+
+struct xlator_fops *meta_defaults_init (struct xlator_fops *fops);
+
+meta_fd_t *meta_fd_get (fd_t *fd, xlator_t *this);
+
+int meta_fd_release (fd_t *fd, xlator_t *this);
+
+dict_t *meta_direct_io_mode (dict_t *xdata, call_frame_t *frame);
+
+meta_local_t *meta_local (call_frame_t *frame);
+
+int meta_file_fill (xlator_t *this, fd_t *fd);
-#include "tree.h"
-#include "misc.h"
+int meta_dir_fill (xlator_t *this, fd_t *fd);
+int fixed_dirents_len (struct meta_dirent *dirents);
#endif /* __META_H__ */
diff --git a/xlators/meta/src/misc.c b/xlators/meta/src/misc.c
deleted file mode 100644
index 062e741f25d..00000000000
--- a/xlators/meta/src/misc.c
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include <unistd.h>
-#include <sys/uio.h>
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "xlator.h"
-#include "meta.h"
-
-#define min(x,y) ((x) < (y) ? (x) : (y))
-
-/* /.meta/version */
-static const char *version_str = PACKAGE_NAME " " PACKAGE_VERSION "\n";
-
-int32_t
-meta_version_readv (call_frame_t *frame, xlator_t *this,
- dict_t *fd, size_t size, off_t offset)
-{
- static int version_size;
- version_size = strlen (version_str);
-
- struct iovec vec;
- vec.iov_base = version_str + offset;
- vec.iov_len = min (version_size - offset, size);
-
- STACK_UNWIND (frame, vec.iov_len, 0, &vec, 1);
- return 0;
-}
-
-int32_t
-meta_version_getattr (call_frame_t *frame,
- xlator_t *this,
- const char *path)
-{
- meta_private_t *priv = (meta_private_t *) this->private;
- meta_dirent_t *root = priv->tree;
- meta_dirent_t *file = lookup_meta_entry (root, path, NULL);
- file->stbuf->st_size = strlen (version_str);
- STACK_UNWIND (frame, 0, 0, file->stbuf);
-}
-
-struct xlator_fops meta_version_fops = {
- .readv = meta_version_readv,
- .getattr = meta_version_getattr
-};
-
diff --git a/xlators/meta/src/misc.h b/xlators/meta/src/misc.h
deleted file mode 100644
index 8ede1328b3d..00000000000
--- a/xlators/meta/src/misc.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef __MISC_H__
-#define __MISC_H__
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-
-struct xlator_fops meta_version_fops;
-
-#endif /* __MISC_H__ */
diff --git a/xlators/meta/src/name-file.c b/xlators/meta/src/name-file.c
new file mode 100644
index 00000000000..44c359ef5c8
--- /dev/null
+++ b/xlators/meta/src/name-file.c
@@ -0,0 +1,48 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include "strfd.h"
+#include "globals.h"
+#include "lkowner.h"
+
+
+static int
+name_file_fill (xlator_t *this, inode_t *file, strfd_t *strfd)
+{
+ xlator_t *xl = NULL;
+
+ xl = meta_ctx_get (file, this);
+
+ strprintf (strfd, "%s\n", xl->name);
+
+ return strfd->size;
+}
+
+
+static struct meta_ops name_file_ops = {
+ .file_fill = name_file_fill,
+};
+
+
+int
+meta_name_file_hook (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ops_set (loc->inode, this, &name_file_ops);
+
+ meta_ctx_set (loc->inode, this, meta_ctx_get (loc->parent, this));
+
+ return 0;
+}
diff --git a/xlators/meta/src/option-file.c b/xlators/meta/src/option-file.c
new file mode 100644
index 00000000000..5a8465c5d8b
--- /dev/null
+++ b/xlators/meta/src/option-file.c
@@ -0,0 +1,51 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include "meta-hooks.h"
+
+
+static int
+option_file_fill (xlator_t *this, inode_t *inode, strfd_t *strfd)
+{
+ data_t *data = NULL;
+
+ data = meta_ctx_get (inode, this);
+
+ strprintf (strfd, "%s\n", data_to_str (data));
+
+ return strfd->size;
+}
+
+
+static struct meta_ops option_file_ops = {
+ .file_fill = option_file_fill
+};
+
+
+int
+meta_option_file_hook (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ xlator_t *xl = NULL;
+
+ xl = meta_ctx_get (loc->parent, this);
+
+ meta_ctx_set (loc->inode, this,
+ dict_get (xl->options, (char *) loc->name));
+
+ meta_ops_set (loc->inode, this, &option_file_ops);
+
+ return 0;
+}
diff --git a/xlators/meta/src/options-dir.c b/xlators/meta/src/options-dir.c
new file mode 100644
index 00000000000..e637afb1f73
--- /dev/null
+++ b/xlators/meta/src/options-dir.c
@@ -0,0 +1,71 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include "meta-hooks.h"
+
+
+static int
+dict_key_add (dict_t *dict, char *key, data_t *value, void *data)
+{
+ struct meta_dirent **direntp = data;
+
+ (*direntp)->name = gf_strdup (key);
+ (*direntp)->type = IA_IFREG;
+ (*direntp)->hook = meta_option_file_hook;
+
+ (*direntp)++;
+ return 0;
+}
+
+
+static int
+options_dir_fill (xlator_t *this, inode_t *inode, struct meta_dirent **dp)
+{
+ struct meta_dirent *dirent = NULL;
+ struct meta_dirent *direntp = NULL;
+ xlator_t *xl = NULL;
+
+ xl = meta_ctx_get (inode, this);
+
+ dirent = GF_CALLOC (sizeof (*dirent), xl->options->count,
+ gf_meta_mt_dirents_t);
+ if (!dirent)
+ return -1;
+
+ direntp = dirent;
+
+ dict_foreach (xl->options, dict_key_add, &direntp);
+
+ *dp = dirent;
+
+ return xl->options->count;
+}
+
+
+static struct meta_ops options_dir_ops = {
+ .dir_fill = options_dir_fill
+};
+
+
+int
+meta_options_dir_hook (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ctx_set (loc->inode, this, meta_ctx_get (loc->parent, this));
+
+ meta_ops_set (loc->inode, this, &options_dir_ops);
+
+ return 0;
+}
diff --git a/xlators/meta/src/private-file.c b/xlators/meta/src/private-file.c
new file mode 100644
index 00000000000..8d12b467d75
--- /dev/null
+++ b/xlators/meta/src/private-file.c
@@ -0,0 +1,47 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include "strfd.h"
+#include "statedump.h"
+
+
+static int
+private_file_fill (xlator_t *this, inode_t *file, strfd_t *strfd)
+{
+ xlator_t *xl = NULL;
+
+ xl = meta_ctx_get (file, this);
+
+ gf_proc_dump_xlator_private (xl, strfd);
+
+ return strfd->size;
+}
+
+
+static struct meta_ops private_file_ops = {
+ .file_fill = private_file_fill,
+};
+
+
+int
+meta_private_file_hook (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ops_set (loc->inode, this, &private_file_ops);
+
+ meta_ctx_set (loc->inode, this, meta_ctx_get (loc->parent, this));
+
+ return 0;
+}
diff --git a/xlators/meta/src/process_uuid-file.c b/xlators/meta/src/process_uuid-file.c
new file mode 100644
index 00000000000..3210de1d484
--- /dev/null
+++ b/xlators/meta/src/process_uuid-file.c
@@ -0,0 +1,41 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include "strfd.h"
+#include "globals.h"
+#include "lkowner.h"
+
+
+static int
+process_uuid_file_fill (xlator_t *this, inode_t *file, strfd_t *strfd)
+{
+ strprintf (strfd, "%s\n", this->ctx->process_uuid);
+ return strfd->size;
+}
+
+
+static struct meta_ops process_uuid_file_ops = {
+ .file_fill = process_uuid_file_fill,
+};
+
+
+int
+meta_process_uuid_file_hook (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ops_set (loc->inode, this, &process_uuid_file_ops);
+
+ return 0;
+}
diff --git a/xlators/meta/src/profile-file.c b/xlators/meta/src/profile-file.c
new file mode 100644
index 00000000000..7a8d0bf810e
--- /dev/null
+++ b/xlators/meta/src/profile-file.c
@@ -0,0 +1,47 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include "strfd.h"
+#include "statedump.h"
+
+
+static int
+profile_file_fill (xlator_t *this, inode_t *file, strfd_t *strfd)
+{
+ xlator_t *xl = NULL;
+
+ xl = meta_ctx_get (file, this);
+
+ gf_proc_dump_xlator_profile (xl, strfd);
+
+ return strfd->size;
+}
+
+
+static struct meta_ops profile_file_ops = {
+ .file_fill = profile_file_fill,
+};
+
+
+int
+meta_profile_file_hook (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ops_set (loc->inode, this, &profile_file_ops);
+
+ meta_ctx_set (loc->inode, this, meta_ctx_get (loc->parent, this));
+
+ return 0;
+}
diff --git a/xlators/meta/src/root-dir.c b/xlators/meta/src/root-dir.c
new file mode 100644
index 00000000000..b57313fd9ad
--- /dev/null
+++ b/xlators/meta/src/root-dir.c
@@ -0,0 +1,74 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include "meta-hooks.h"
+
+
+static struct meta_dirent root_dir_dirents[] = {
+ DOT_DOTDOT,
+
+ { .name = "graphs",
+ .type = IA_IFDIR,
+ .hook = meta_graphs_dir_hook,
+ },
+ { .name = "frames",
+ .type = IA_IFREG,
+ .hook = meta_frames_file_hook,
+ },
+ { .name = "logging",
+ .type = IA_IFDIR,
+ .hook = meta_logging_dir_hook,
+ },
+ { .name = "process_uuid",
+ .type = IA_IFREG,
+ .hook = meta_process_uuid_file_hook,
+ },
+ { .name = "version",
+ .type = IA_IFREG,
+ .hook = meta_version_file_hook,
+ },
+ { .name = "cmdline",
+ .type = IA_IFREG,
+ .hook = meta_cmdline_file_hook,
+ },
+ { .name = "mallinfo",
+ .type = IA_IFREG,
+ .hook = meta_mallinfo_file_hook,
+ },
+ { .name = "master",
+ .type = IA_IFDIR,
+ .hook = meta_master_dir_hook,
+ },
+ { .name = "measure_latency",
+ .type = IA_IFREG,
+ .hook = meta_measure_file_hook,
+ },
+ { .name = NULL }
+};
+
+
+static struct meta_ops meta_root_dir_ops = {
+ .fixed_dirents = root_dir_dirents
+};
+
+
+int
+meta_root_dir_hook (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ops_set (loc->inode, this, &meta_root_dir_ops);
+
+ return 0;
+}
diff --git a/xlators/meta/src/subvolume-link.c b/xlators/meta/src/subvolume-link.c
new file mode 100644
index 00000000000..018d42a53a1
--- /dev/null
+++ b/xlators/meta/src/subvolume-link.c
@@ -0,0 +1,61 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include "meta-mem-types.h"
+#include "meta.h"
+
+
+static int
+subvolume_link_fill (xlator_t *this, inode_t *inode, strfd_t *strfd)
+{
+ xlator_t *xl = NULL;
+
+ xl = meta_ctx_get (inode, this);
+
+ strprintf (strfd, "../../%s", xl->name);
+
+ return 0;
+}
+
+
+struct meta_ops subvolume_link_ops = {
+ .link_fill = subvolume_link_fill
+};
+
+
+int
+meta_subvolume_link_hook (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ int count = 0;
+ int i = 0;
+ xlator_t *xl = NULL;
+ xlator_list_t *subv = NULL;
+ xlator_t *subvol = NULL;
+
+ count = strtol (loc->name, 0, 0);
+ xl = meta_ctx_get (loc->parent, this);
+
+ for (subv = xl->children; subv; subv = subv->next) {
+ if (i == count) {
+ subvol = subv->xlator;
+ break;
+ }
+ i++;
+ }
+
+ meta_ctx_set (loc->inode, this, subvol);
+
+ meta_ops_set (loc->inode, this, &subvolume_link_ops);
+ return 0;
+}
diff --git a/xlators/meta/src/subvolumes-dir.c b/xlators/meta/src/subvolumes-dir.c
new file mode 100644
index 00000000000..00218b1a8ec
--- /dev/null
+++ b/xlators/meta/src/subvolumes-dir.c
@@ -0,0 +1,67 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include "meta-hooks.h"
+
+
+static int
+subvolumes_dir_fill (xlator_t *this, inode_t *dir, struct meta_dirent **dp)
+{
+ struct meta_dirent *dirents = NULL;
+ xlator_t *xl = NULL;
+ xlator_list_t *subv = NULL;
+ int i = 0;
+ int count = 0;
+
+ xl = meta_ctx_get (dir, this);
+
+ for (subv = xl->children; subv; subv = subv->next)
+ count++;
+
+ dirents = GF_CALLOC (sizeof (*dirents), count, gf_meta_mt_dirents_t);
+ if (!dirents)
+ return -1;
+
+ for (subv = xl->children; subv; subv = subv->next) {
+ char num[16] = { };
+ snprintf (num, 16, "%d", i);
+
+ dirents[i].name = gf_strdup (num);
+ dirents[i].type = IA_IFLNK;
+ dirents[i].hook = meta_subvolume_link_hook;
+ i++;
+ }
+
+ *dp = dirents;
+
+ return count;
+}
+
+
+static struct meta_ops subvolumes_dir_ops = {
+ .dir_fill = subvolumes_dir_fill
+};
+
+
+int
+meta_subvolumes_dir_hook (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ctx_set (loc->inode, this, meta_ctx_get (loc->parent, this));
+
+ meta_ops_set (loc->inode, this, &subvolumes_dir_ops);
+
+ return 0;
+}
diff --git a/xlators/meta/src/top-link.c b/xlators/meta/src/top-link.c
new file mode 100644
index 00000000000..97cec0a2b62
--- /dev/null
+++ b/xlators/meta/src/top-link.c
@@ -0,0 +1,45 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include "meta-mem-types.h"
+#include "meta.h"
+
+
+static int
+top_link_fill (xlator_t *this, inode_t *inode, strfd_t *strfd)
+{
+ glusterfs_graph_t *graph = NULL;
+
+ graph = meta_ctx_get (inode, this);
+
+ strprintf (strfd, "%s", ((xlator_t *)graph->top)->name);
+
+ return 0;
+}
+
+
+struct meta_ops top_link_ops = {
+ .link_fill = top_link_fill
+};
+
+
+int
+meta_top_link_hook (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ops_set (loc->inode, this, &top_link_ops);
+
+ meta_ctx_set (loc->inode, this, meta_ctx_get (loc->parent, this));
+
+ return 0;
+}
diff --git a/xlators/meta/src/tree.c b/xlators/meta/src/tree.c
deleted file mode 100644
index 787f27da4f0..00000000000
--- a/xlators/meta/src/tree.c
+++ /dev/null
@@ -1,179 +0,0 @@
-/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <unistd.h>
-#include <string.h>
-
-#include "glusterfs.h"
-#include "xlator.h"
-
-#include "meta.h"
-#include "meta-mem-types.h"
-
-static int
-is_meta_path (const char *path)
-{
- while (*path == '/')
- path++;
- if (!strncmp (path, ".meta", strlen (".meta")))
- return 1;
- return 0;
-}
-
-struct stat *
-new_stbuf (void)
-{
- static int next_inode = 0;
- struct stat *stbuf = GF_CALLOC (1, sizeof (struct stat), gf_meta_mt_stat);
-
- ERR_ABORT (stbuf);
-
- stbuf->st_dev = 0;
- stbuf->st_ino = next_inode++;
- stbuf->st_mode = S_IRUSR | S_IRGRP | S_IROTH;
- stbuf->st_nlink = 1;
- stbuf->st_uid = 0;
- stbuf->st_gid = 0;
- stbuf->st_rdev = 0;
- stbuf->st_size = 0;
- stbuf->st_blksize = 0;
- stbuf->st_blocks = 0;
- stbuf->st_atime = time (NULL);
- stbuf->st_atim.tv_nsec = 0;
- stbuf->st_mtime = stbuf->st_atime;
- stbuf->st_mtim.tv_nsec = 0;
- stbuf->st_ctime = stbuf->st_ctime;
- stbuf->st_ctim.tv_nsec = 0;
-
- return stbuf;
-}
-
-/* find an entry among the siblings of an entry */
-static meta_dirent_t *
-find_entry (meta_dirent_t *node, const char *dir)
-{
- meta_dirent_t *trav = node;
- while (trav) {
- if (!strcmp (trav->name, dir))
- return trav;
- trav = trav->next;
- }
- return NULL;
-}
-
-/*
- * Return the meta_dirent_t corresponding to the pathname.
- *
- * If pathname does not exist in the meta tree, try to return
- * its highest parent that does exist. The part of the
- * pathname that is left over is returned in the value-result
- * variable {remain}.
- * For example, for "/.meta/xlators/brick1/view/foo/bar/baz",
- * return the entry for "/.meta/xlators/brick1/view"
- * and set remain to "/bar/baz"
- */
-
-meta_dirent_t *
-lookup_meta_entry (meta_dirent_t *root, const char *path,
- char **remain)
-{
- char *_path = gf_strdup (path);
-
- if (!is_meta_path (path))
- return NULL;
-
- meta_dirent_t *trav = root;
- char *dir = strtok (_path, "/");
- dir = strtok (NULL, "/");
-
- while (dir) {
- meta_dirent_t *ntrav;
- ntrav = find_entry (trav->children, dir);
- if (!ntrav) {
- /* we have reached bottom of the meta tree.
- Unknown dragons lie further below */
- if (remain) {
- char *piece = dir;
- while (piece) {
- char *tmp = *remain;
- if (*remain)
- gf_asprintf (remain, "/%s/%s", *remain, piece);
- else
- gf_asprintf (remain, "/%s", piece);
- if (tmp) GF_FREE (tmp);
- piece = strtok (NULL, "/");
- }
- }
- return trav;
- }
- dir = strtok (NULL, "/");
- trav = ntrav;
- }
-
- GF_FREE (_path);
- return trav;
-}
-
-meta_dirent_t *
-insert_meta_entry (meta_dirent_t *root, const char *path,
- int type, struct stat *stbuf, struct xlator_fops *fops)
-{
- if (!is_meta_path (path))
- return NULL;
- char *slashpos = strrchr (path, '/');
- char *dir = strndup (path, slashpos - path);
- meta_dirent_t *parent = lookup_meta_entry (root, dir, NULL);
- if (!dir)
- return NULL;
-
- meta_dirent_t *new = GF_CALLOC (1, sizeof (meta_dirent_t),
- gf_meta_mt_meta_dirent_t);
- ERR_ABORT (new);
- new->name = gf_strdup (slashpos+1);
- new->type = type;
- new->parent = parent;
- new->next = parent->children;
- parent->children = new;
- if (stbuf)
- new->stbuf = stbuf;
- else
- new->stbuf = new_stbuf ();
-
- new->stbuf->st_mode |= type;
- new->fops = fops;
- return new;
-}
-
-int main (void)
-{
- meta_dirent_t *root = GF_CALLOC (1, sizeof (meta_dirent_t),
- gf_meta_mt_meta_dirent_t);
- ERR_ABORT (root);
- root->name = gf_strdup (".meta");
-
- insert_meta_entry (root, "/.meta/version", S_IFREG, NULL, NULL);
- return 0;
-}
diff --git a/xlators/meta/src/tree.h b/xlators/meta/src/tree.h
deleted file mode 100644
index bb2ffb976cb..00000000000
--- a/xlators/meta/src/tree.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef __TREE_H__
-#define __TREE_H__
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-meta_dirent_t *
-insert_meta_entry (meta_dirent_t *root, const char *path,
- int type, struct stat *stbuf, struct xlator_fops *fops);
-meta_dirent_t *
-lookup_meta_entry (meta_dirent_t *root, const char *path,
- char **remain);
-
-#endif /* __TREE_H__ */
diff --git a/xlators/meta/src/type-file.c b/xlators/meta/src/type-file.c
new file mode 100644
index 00000000000..f27e4b0a777
--- /dev/null
+++ b/xlators/meta/src/type-file.c
@@ -0,0 +1,48 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include "strfd.h"
+#include "globals.h"
+#include "lkowner.h"
+
+
+static int
+type_file_fill (xlator_t *this, inode_t *file, strfd_t *strfd)
+{
+ xlator_t *xl = NULL;
+
+ xl = meta_ctx_get (file, this);
+
+ strprintf (strfd, "%s\n", xl->type);
+
+ return strfd->size;
+}
+
+
+static struct meta_ops type_file_ops = {
+ .file_fill = type_file_fill,
+};
+
+
+int
+meta_type_file_hook (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ops_set (loc->inode, this, &type_file_ops);
+
+ meta_ctx_set (loc->inode, this, meta_ctx_get (loc->parent, this));
+
+ return 0;
+}
diff --git a/xlators/meta/src/version-file.c b/xlators/meta/src/version-file.c
new file mode 100644
index 00000000000..ace419ea439
--- /dev/null
+++ b/xlators/meta/src/version-file.c
@@ -0,0 +1,42 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include "strfd.h"
+#include "globals.h"
+#include "lkowner.h"
+
+
+static int
+version_file_fill (xlator_t *this, inode_t *file, strfd_t *strfd)
+{
+ strprintf (strfd, "{ \n \"Package Version\": \"%s\"\n}",
+ PACKAGE_VERSION);
+ return strfd->size;
+}
+
+
+static struct meta_ops version_file_ops = {
+ .file_fill = version_file_fill,
+};
+
+
+int
+meta_version_file_hook (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ops_set (loc->inode, this, &version_file_ops);
+
+ return 0;
+}
diff --git a/xlators/meta/src/view-dir.c b/xlators/meta/src/view-dir.c
new file mode 100644
index 00000000000..dc208cb5b65
--- /dev/null
+++ b/xlators/meta/src/view-dir.c
@@ -0,0 +1,40 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include "meta-hooks.h"
+
+
+static struct meta_dirent view_dir_dirents[] = {
+ DOT_DOTDOT,
+
+ { .name = NULL }
+};
+
+
+static struct meta_ops view_dir_ops = {
+ .fixed_dirents = view_dir_dirents
+};
+
+
+int
+meta_view_dir_hook (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ctx_set (loc->inode, this, meta_ctx_get (loc->parent, this));
+
+ meta_ops_set (loc->inode, this, &view_dir_ops);
+
+ return 0;
+}
diff --git a/xlators/meta/src/view.c b/xlators/meta/src/view.c
deleted file mode 100644
index cbb0b710a97..00000000000
--- a/xlators/meta/src/view.c
+++ /dev/null
@@ -1,258 +0,0 @@
-/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "xlator.h"
-
-#include "meta.h"
-
-/*
- * This file contains fops for the files and directories in
- * an xlator directory
- */
-
-/* /.meta/xlators/.../type */
-
-int32_t
-meta_xlator_type_readv (call_frame_t *frame, xlator_t *this,
- dict_t *fd, size_t size, off_t offset)
-{
- meta_private_t *priv = (meta_private_t *) this->private;
- meta_dirent_t *root = priv->tree;
- data_t *path_data = dict_get (fd, this->name);
-
- if (path_data) {
- const char *path = data_to_str (path_data);
- meta_dirent_t *file = lookup_meta_entry (root, path, NULL);
- xlator_t *view_xlator = file->view_xlator;
-
- int type_size;
- type_size = strlen (view_xlator->type);
-
- struct iovec vec;
- vec.iov_base = view_xlator->type + offset;
- vec.iov_len = min (type_size - offset, size);
-
- STACK_UNWIND (frame, vec.iov_len, 0, &vec, 1);
- return 0;
- }
-}
-
-int32_t
-meta_xlator_type_getattr (call_frame_t *frame,
- xlator_t *this,
- const char *path)
-{
- meta_private_t *priv = (meta_private_t *) this->private;
- meta_dirent_t *root = priv->tree;
-
- meta_dirent_t *file = lookup_meta_entry (root, path, NULL);
- xlator_t *view_xlator = file->view_xlator;
- file->stbuf->st_size = strlen (view_xlator->type);
-
- STACK_UNWIND (frame, 0, 0, file->stbuf);
- return 0;
-}
-
-struct xlator_fops meta_xlator_type_fops = {
- .readv = meta_xlator_type_readv,
- .getattr = meta_xlator_type_getattr
-};
-
-/*
- * fops for the "view" directory
- * {xlator}/view shows the filesystem as it appears
- * to {xlator}
- */
-
-static int32_t
-meta_xlator_view_getattr_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-int32_t
-meta_xlator_view_getattr (call_frame_t *frame,
- xlator_t *this,
- const char *path)
-{
- meta_private_t *priv = (meta_private_t *) this->private;
- meta_dirent_t *root = priv->tree;
- char *op_path = NULL;
-
- meta_dirent_t *file = lookup_meta_entry (root, path, &op_path);
-
- if (op_path) {
- STACK_WIND (frame, meta_xlator_view_getattr_cbk, file->view_xlator,
- file->view_xlator->fops->getattr,
- op_path);
- }
- else {
- STACK_UNWIND (frame, 0, 0, file->stbuf);
- }
-
- return 0;
-}
-
-static int32_t
-meta_xlator_view_readdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dir_entry_t *entries, int32_t count)
-{
- STACK_UNWIND (frame, op_ret, op_errno, entries, count);
- return 0;
-}
-
-int32_t
-meta_xlator_view_readdir (call_frame_t *frame,
- xlator_t *this,
- const char *path)
-{
- meta_private_t *priv = (meta_private_t *) this->private;
- meta_dirent_t *root = priv->tree;
- char *op_path = NULL;
-
- meta_dirent_t *dir = lookup_meta_entry (root, path, &op_path);
-
- STACK_WIND (frame, meta_xlator_view_readdir_cbk,
- dir->view_xlator, dir->view_xlator->fops->readdir,
- op_path ? op_path : "/");
- return 0;
-}
-
-static int32_t
-meta_xlator_view_open_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- dict_t *ctx, struct stat *buf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, ctx, buf);
- return 0;
-}
-
-int32_t
-meta_xlator_view_open (call_frame_t *frame, xlator_t *this,
- const char *path, int32_t flags, mode_t mode)
-{
- meta_private_t *priv = (meta_private_t *) this->private;
- meta_dirent_t *root = priv->tree;
- char *op_path = NULL;
-
- meta_dirent_t *file = lookup_meta_entry (root, path, &op_path);
- STACK_WIND (frame, meta_xlator_view_open_cbk,
- file->view_xlator, file->view_xlator->fops->open,
- op_path, flags, mode);
- return 0;
-}
-
-int32_t
-meta_xlator_view_create (call_frame_t *frame, xlator_t *this,
- const char *path, int32_t flags, mode_t mode)
-{
- meta_private_t *priv = (meta_private_t *) this->private;
- meta_dirent_t *root = priv->tree;
- char *op_path = NULL;
-
- meta_dirent_t *file = lookup_meta_entry (root, path, &op_path);
- STACK_WIND (frame, meta_xlator_view_open_cbk,
- file->view_xlator, file->view_xlator->fops->create,
- op_path, flags, mode);
- return 0;
-}
-
-static int32_t
-meta_xlator_view_readv_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iovec *vector,
- int32_t count)
-{
- STACK_UNWIND (frame, op_ret, op_errno, vector, count);
- return 0;
-}
-
-int32_t
-meta_xlator_view_readv (call_frame_t *frame, xlator_t *this,
- dict_t *fd, size_t size, off_t offset)
-{
- meta_private_t *priv = (meta_private_t *) this->private;
- meta_dirent_t *root = priv->tree;
- data_t *path_data = dict_get (fd, this->name);
-
- if (path_data) {
- const char *path = data_to_str (path_data);
- meta_dirent_t *file = lookup_meta_entry (root, path, NULL);
-
- STACK_WIND (frame, meta_xlator_view_readv_cbk,
- file->view_xlator, file->view_xlator->fops->readv,
- fd, size, offset);
- return 0;
- }
-
- STACK_UNWIND (frame, -1, EBADFD, NULL, 0);
- return 0;
-}
-
-static int32_t
-meta_xlator_view_writev_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-int32_t
-meta_xlator_view_writev (call_frame_t *frame, xlator_t *this,
- dict_t *fd,
- struct iovec *vector, int32_t count, off_t offset)
-{
- meta_private_t *priv = (meta_private_t *) this->private;
- meta_dirent_t *root = priv->tree;
- data_t *path_data = dict_get (fd, this->name);
-
- if (path_data) {
- const char *path = data_to_str (path_data);
- meta_dirent_t *file = lookup_meta_entry (root, path, NULL);
-
- STACK_WIND (frame, meta_xlator_view_writev_cbk,
- file->view_xlator, file->view_xlator->fops->writev,
- fd, vector, count, offset);
- return 0;
- }
-
- STACK_UNWIND (frame, -1, EBADFD, NULL, 0);
- return 0;
-}
-
-struct xlator_fops meta_xlator_view_fops = {
- .getattr = meta_xlator_view_getattr,
- .readdir = meta_xlator_view_readdir,
- .open = meta_xlator_view_open,
- .create = meta_xlator_view_create,
- .readv = meta_xlator_view_readv,
- .writev = meta_xlator_view_writev
-};
diff --git a/xlators/meta/src/view.h b/xlators/meta/src/view.h
deleted file mode 100644
index d26d42e26c4..00000000000
--- a/xlators/meta/src/view.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef __VIEW_H__
-#define __VIEW_H__
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-
-struct xlator_fops meta_xlator_type_fops;
-struct xlator_fops meta_xlator_view_fops;
-
-#endif /* __VIEW_H__ */
diff --git a/xlators/meta/src/volfile-file.c b/xlators/meta/src/volfile-file.c
new file mode 100644
index 00000000000..c6027658fee
--- /dev/null
+++ b/xlators/meta/src/volfile-file.c
@@ -0,0 +1,86 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include "strfd.h"
+
+
+
+static int
+xldump_options (dict_t *this, char *key, data_t *value, void *strfd)
+{
+ strprintf (strfd, " option %s %s\n", key, value->data);
+ return 0;
+}
+
+
+static void
+xldump_subvolumes (xlator_t *this, void *strfd)
+{
+ xlator_list_t *subv = NULL;
+
+ if (!this->children)
+ return;
+
+ strprintf (strfd, " subvolumes");
+
+ for (subv = this->children; subv; subv= subv->next)
+ strprintf (strfd, " %s", subv->xlator->name);
+
+ strprintf (strfd, "\n");
+}
+
+
+static void
+xldump (xlator_t *each, void *strfd)
+{
+ strprintf (strfd, "volume %s\n", each->name);
+ strprintf (strfd, " type %s\n", each->type);
+ dict_foreach (each->options, xldump_options, strfd);
+
+ xldump_subvolumes (each, strfd);
+
+ strprintf (strfd, "end-volume\n");
+ strprintf (strfd, "\n");
+}
+
+
+static int
+volfile_file_fill (xlator_t *this, inode_t *file, strfd_t *strfd)
+{
+ glusterfs_graph_t *graph = NULL;
+
+ graph = meta_ctx_get (file, this);
+
+ xlator_foreach_depth_first (graph->top, xldump, strfd);
+
+ return strfd->size;
+}
+
+
+static struct meta_ops volfile_file_ops = {
+ .file_fill = volfile_file_fill,
+};
+
+
+int
+meta_volfile_file_hook (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ops_set (loc->inode, this, &volfile_file_ops);
+
+ meta_ctx_set (loc->inode, this, meta_ctx_get (loc->parent, this));
+
+ return 0;
+}
diff --git a/xlators/meta/src/xlator-dir.c b/xlators/meta/src/xlator-dir.c
new file mode 100644
index 00000000000..910e82b3871
--- /dev/null
+++ b/xlators/meta/src/xlator-dir.c
@@ -0,0 +1,95 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include "meta-hooks.h"
+
+
+static struct meta_dirent xlator_dir_dirents[] = {
+ DOT_DOTDOT,
+
+ { .name = "view",
+ .type = IA_IFDIR,
+ .hook = meta_view_dir_hook,
+ },
+ { .name = "type",
+ .type = IA_IFREG,
+ .hook = meta_type_file_hook,
+ },
+ { .name = "name",
+ .type = IA_IFREG,
+ .hook = meta_name_file_hook,
+ },
+ { .name = "subvolumes",
+ .type = IA_IFDIR,
+ .hook = meta_subvolumes_dir_hook,
+ },
+ { .name = "options",
+ .type = IA_IFDIR,
+ .hook = meta_options_dir_hook,
+ },
+ { .name = "private",
+ .type = IA_IFREG,
+ .hook = meta_private_file_hook,
+ },
+ { .name = "history",
+ .type = IA_IFREG,
+ .hook = meta_history_file_hook,
+ },
+ { .name = "meminfo",
+ .type = IA_IFREG,
+ .hook = meta_meminfo_file_hook,
+ },
+ { .name = "profile",
+ .type = IA_IFREG,
+ .hook = meta_profile_file_hook,
+ },
+ { .name = NULL }
+};
+
+
+static struct meta_ops xlator_dir_ops = {
+ .fixed_dirents = xlator_dir_dirents
+};
+
+
+int
+meta_xlator_dir_hook (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ glusterfs_graph_t *graph = NULL;
+ xlator_t *xl = NULL;
+
+ graph = meta_ctx_get (loc->parent, this);
+
+ xl = xlator_search_by_name (graph->first, loc->name);
+
+ meta_ctx_set (loc->inode, this, xl);
+
+ meta_ops_set (loc->inode, this, &xlator_dir_ops);
+
+ return 0;
+}
+
+
+int
+meta_master_dir_hook (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ctx_set (loc->inode, this, this->ctx->master);
+
+ meta_ops_set (loc->inode, this, &xlator_dir_ops);
+
+ return 0;
+}
diff --git a/xlators/mgmt/Makefile.am b/xlators/mgmt/Makefile.am
new file mode 100644
index 00000000000..bf09b07c309
--- /dev/null
+++ b/xlators/mgmt/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = glusterd
+
+CLEANFILES =
diff --git a/xlators/protocol/legacy/server/Makefile.am b/xlators/mgmt/glusterd/Makefile.am
index d471a3f9243..d471a3f9243 100644
--- a/xlators/protocol/legacy/server/Makefile.am
+++ b/xlators/mgmt/glusterd/Makefile.am
diff --git a/xlators/mgmt/glusterd/src/Makefile.am b/xlators/mgmt/glusterd/src/Makefile.am
new file mode 100644
index 00000000000..f3381e34930
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/Makefile.am
@@ -0,0 +1,67 @@
+xlator_LTLIBRARIES = glusterd.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/mgmt
+glusterd_la_CPPFLAGS = $(AM_CPPFLAGS) "-DFILTERDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/filter\""
+glusterd_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+glusterd_la_SOURCES = glusterd.c glusterd-handler.c glusterd-sm.c \
+ glusterd-op-sm.c glusterd-utils.c glusterd-rpc-ops.c \
+ glusterd-store.c glusterd-handshake.c glusterd-pmap.c \
+ glusterd-volgen.c glusterd-rebalance.c glusterd-ganesha.c \
+ glusterd-quota.c glusterd-bitrot.c glusterd-geo-rep.c \
+ glusterd-replace-brick.c glusterd-log-ops.c \
+ glusterd-volume-ops.c glusterd-brick-ops.c glusterd-mountbroker.c \
+ glusterd-syncop.c glusterd-hooks.c glusterd-volume-set.c \
+ glusterd-locks.c glusterd-snapshot.c glusterd-mgmt-handler.c \
+ glusterd-mgmt.c glusterd-peer-utils.c glusterd-statedump.c \
+ glusterd-snapshot-utils.c glusterd-conn-mgmt.c \
+ glusterd-proc-mgmt.c glusterd-svc-mgmt.c glusterd-shd-svc.c \
+ glusterd-nfs-svc.c glusterd-quotad-svc.c glusterd-svc-helper.c \
+ glusterd-conn-helper.c glusterd-snapd-svc.c glusterd-snapd-svc-helper.c \
+ glusterd-bitd-svc.c glusterd-scrub-svc.c glusterd-server-quorum.c
+
+
+glusterd_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(top_builddir)/rpc/xdr/src/libgfxdr.la \
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
+ $(XML_LIBS) -lcrypto $(URCU_LIBS) $(URCU_CDS_LIBS)
+if ENABLE_BD_XLATOR
+glusterd_la_LIBADD += -llvm2app
+endif
+
+noinst_HEADERS = glusterd.h glusterd-utils.h glusterd-op-sm.h \
+ glusterd-sm.h glusterd-store.h glusterd-mem-types.h \
+ glusterd-pmap.h glusterd-volgen.h glusterd-mountbroker.h \
+ glusterd-syncop.h glusterd-hooks.h glusterd-locks.h \
+ glusterd-mgmt.h glusterd-messages.h glusterd-peer-utils.h \
+ glusterd-statedump.h glusterd-snapshot-utils.h glusterd-geo-rep.h \
+ glusterd-conn-mgmt.h glusterd-conn-helper.h glusterd-proc-mgmt.h \
+ glusterd-svc-mgmt.h glusterd-shd-svc.h glusterd-nfs-svc.h \
+ glusterd-quotad-svc.h glusterd-svc-helper.h glusterd-snapd-svc.h \
+ glusterd-snapd-svc-helper.h glusterd-rcu.h glusterd-bitd-svc.h \
+ glusterd-scrub-svc.h glusterd-server-quorum.h glusterd-errno.h \
+ $(CONTRIBDIR)/userspace-rcu/rculist-extra.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(CONTRIBDIR)/rbtree \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_srcdir)/rpc/rpc-lib/src \
+ -I$(CONTRIBDIR)/mount \
+ -I$(CONTRIBDIR)/userspace-rcu \
+ -DSBIN_DIR=\"$(sbindir)\" -DDATADIR=\"$(localstatedir)\" \
+ -DGSYNCD_PREFIX=\"$(libexecdir)/glusterfs\" \
+ -DCONFDIR=\"$(sysconfdir)/ganesha\" \
+ -DGANESHA_PREFIX=\"$(libexecdir)/ganesha\" \
+ -DSYNCDAEMON_COMPILE=$(SYNCDAEMON_COMPILE) $(XML_CPPFLAGS)
+
+
+AM_CFLAGS = -Wall $(GF_CFLAGS) $(URCU_CFLAGS) $(URCU_CDS_CFLAGS)
+
+AM_LDFLAGS = -L$(xlatordir) $(URCU_LIBS) $(URCU_CDS_LIBS)
+
+CLEANFILES =
+
+install-data-hook:
+if GF_INSTALL_GLUSTERD_WORKDIR
+ $(mkdir_p) $(DESTDIR)$(GLUSTERD_WORKDIR)
+ (stat $(DESTDIR)$(sysconfdir)/glusterd && \
+ mv $(DESTDIR)$(sysconfdir)/glusterd $(DESTDIR)$(GLUSTERD_WORKDIR)) || true;
+ (ln -sf $(DESTDIR)$(GLUSTERD_WORKDIR) $(sysconfdir)/glusterd) || true;
+endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-bitd-svc.c b/xlators/mgmt/glusterd/src/glusterd-bitd-svc.c
new file mode 100644
index 00000000000..ee96ccbff80
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-bitd-svc.c
@@ -0,0 +1,207 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "globals.h"
+#include "run.h"
+#include "glusterd.h"
+#include "glusterd-utils.h"
+#include "glusterd-volgen.h"
+#include "glusterd-bitd-svc.h"
+#include "glusterd-svc-helper.h"
+
+void
+glusterd_bitdsvc_build (glusterd_svc_t *svc)
+{
+ svc->manager = glusterd_bitdsvc_manager;
+ svc->start = glusterd_bitdsvc_start;
+ svc->stop = glusterd_bitdsvc_stop;
+}
+
+int
+glusterd_bitdsvc_init (glusterd_svc_t *svc)
+{
+ return glusterd_svc_init (svc, bitd_svc_name);
+}
+
+static int
+glusterd_bitdsvc_create_volfile ()
+{
+ char filepath[PATH_MAX] = {0,};
+ int ret = -1;
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ conf = this->private;
+ GF_ASSERT (conf);
+
+
+ glusterd_svc_build_volfile_path (bitd_svc_name, conf->workdir,
+ filepath, sizeof (filepath));
+
+ ret = glusterd_create_global_volfile (build_bitd_graph,
+ filepath, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLFILE_CREATE_FAIL,
+ "Failed to create volfile");
+ goto out;
+ }
+
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+int
+glusterd_bitdsvc_manager (glusterd_svc_t *svc, void *data, int flags)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ if (!svc->inited) {
+ ret = glusterd_bitdsvc_init (svc);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BITD_INIT_FAIL, "Failed to init "
+ "bitd service");
+ goto out;
+ } else {
+ svc->inited = _gf_true;
+ gf_msg_debug (this->name, 0, "BitD service "
+ "initialized");
+ }
+ }
+
+ if (glusterd_should_i_stop_bitd ()) {
+ ret = svc->stop (svc, SIGTERM);
+ } else {
+ ret = glusterd_bitdsvc_create_volfile ();
+ if (ret)
+ goto out;
+
+ ret = svc->stop (svc, SIGKILL);
+ if (ret)
+ goto out;
+
+ ret = svc->start (svc, flags);
+ if (ret)
+ goto out;
+
+ ret = glusterd_conn_connect (&(svc->conn));
+ if (ret)
+ goto out;
+ }
+
+out:
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+int
+glusterd_bitdsvc_start (glusterd_svc_t *svc, int flags)
+{
+ int ret = -1;
+ dict_t *cmdict = NULL;
+
+ cmdict = dict_new ();
+ if (!cmdict)
+ goto error_return;
+
+ ret = dict_set_str (cmdict, "cmdarg0", "--global-timer-wheel");
+ if (ret)
+ goto dealloc_dict;
+
+ ret = glusterd_svc_start (svc, flags, cmdict);
+
+ dealloc_dict:
+ dict_unref (cmdict);
+ error_return:
+ return ret;
+}
+
+int
+glusterd_bitdsvc_stop (glusterd_svc_t *svc, int sig)
+{
+ return glusterd_svc_stop (svc, sig);
+}
+
+int
+glusterd_bitdsvc_reconfigure ()
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ gf_boolean_t identical = _gf_false;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO (this->name, this, out);
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, priv, out);
+
+ if (glusterd_should_i_stop_bitd ())
+ goto manager;
+ /*
+ * Check both OLD and NEW volfiles, if they are SAME by size
+ * and cksum i.e. "character-by-character". If YES, then
+ * NOTHING has been changed, just return.
+ */
+ ret = glusterd_svc_check_volfile_identical (priv->bitd_svc.name,
+ build_bitd_graph,
+ &identical);
+ if (ret)
+ goto out;
+ if (identical) {
+ ret = 0;
+ goto out;
+ }
+
+ /*
+ * They are not identical. Find out if the topology is changed
+ * OR just the volume options. If just the options which got
+ * changed, then inform the xlator to reconfigure the options.
+ */
+ identical = _gf_false; /* RESET the FLAG */
+ ret = glusterd_svc_check_topology_identical (priv->bitd_svc.name,
+ build_bitd_graph,
+ &identical);
+ if (ret)
+ goto out; /*not able to compare due to some corruption */
+
+ /* Topology is not changed, but just the options. But write the
+ * options to bitd volfile, so that bitd will be reconfigured.
+ */
+ if (identical) {
+ ret = glusterd_bitdsvc_create_volfile ();
+ if (ret == 0) {/* Only if above PASSES */
+ ret = glusterd_fetchspec_notify (THIS);
+ }
+ goto out;
+ }
+
+manager:
+ /*
+ * bitd volfile's topology has been changed. bitd server needs
+ * to be RESTARTED to ACT on the changed volfile.
+ */
+ ret = priv->bitd_svc.manager (&(priv->bitd_svc), NULL,
+ PROC_START_NO_WAIT);
+
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-bitd-svc.h b/xlators/mgmt/glusterd/src/glusterd-bitd-svc.h
new file mode 100644
index 00000000000..7f276fb0b5c
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-bitd-svc.h
@@ -0,0 +1,40 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERD_BITD_SVC_H_
+#define _GLUSTERD_BITD_SVC_H_
+
+#include "glusterd-svc-mgmt.h"
+
+#define bitd_svc_name "bitd"
+
+void
+glusterd_bitdsvc_build (glusterd_svc_t *svc);
+
+int
+glusterd_bitdsvc_init (glusterd_svc_t *svc);
+
+int
+glusterd_bitdsvc_manager (glusterd_svc_t *svc, void *data, int flags);
+
+int
+glusterd_bitdsvc_start (glusterd_svc_t *svc, int flags);
+
+int
+glusterd_bitdsvc_stop (glusterd_svc_t *svc, int sig);
+
+int
+glusterd_bitdsvc_reconfigure ();
+
+void
+glusterd_bitdsvc_build_volfile_path (char *server, char *workdir,
+ char *volfile, size_t len);
+
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-bitrot.c b/xlators/mgmt/glusterd/src/glusterd-bitrot.c
new file mode 100644
index 00000000000..6e91106c8e5
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-bitrot.c
@@ -0,0 +1,709 @@
+/*
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
+
+#include "common-utils.h"
+#include "cli1-xdr.h"
+#include "xdr-generic.h"
+#include "glusterd.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-store.h"
+#include "glusterd-utils.h"
+#include "glusterd-volgen.h"
+#include "run.h"
+#include "syscall.h"
+#include "byte-order.h"
+#include "compat-errno.h"
+#include "glusterd-scrub-svc.h"
+#include "glusterd-messages.h"
+
+#include <sys/wait.h>
+#include <dlfcn.h>
+
+const char *gd_bitrot_op_list[GF_BITROT_OPTION_TYPE_MAX] = {
+ [GF_BITROT_OPTION_TYPE_NONE] = "none",
+ [GF_BITROT_OPTION_TYPE_ENABLE] = "enable",
+ [GF_BITROT_OPTION_TYPE_DISABLE] = "disable",
+ [GF_BITROT_OPTION_TYPE_SCRUB_THROTTLE] = "scrub-throttle",
+ [GF_BITROT_OPTION_TYPE_SCRUB_FREQ] = "scrub-frequency",
+ [GF_BITROT_OPTION_TYPE_SCRUB] = "scrub",
+ [GF_BITROT_OPTION_TYPE_EXPIRY_TIME] = "expiry-time",
+};
+
+int
+__glusterd_handle_bitrot (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = { {0,} };
+ dict_t *dict = NULL;
+ glusterd_op_t cli_op = GD_OP_BITROT;
+ char *volname = NULL;
+ char *scrub = NULL;
+ int32_t type = 0;
+ char msg[2048] = {0,};
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ GF_ASSERT (req);
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL, "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (msg, sizeof (msg), "Unable to decode the "
+ "command");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
+ }
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Unable to get volume name");
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED, "Unable to get volume name, "
+ "while handling bitrot command");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "type", &type);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Unable to get type of command");
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED, "Unable to get type of cmd, "
+ "while handling bitrot command");
+ goto out;
+ }
+
+ if (conf->op_version < GD_OP_VERSION_3_7_0) {
+ snprintf (msg, sizeof (msg), "Cannot execute command. The "
+ "cluster is operating at version %d. Bitrot command "
+ "%s is unavailable in this version", conf->op_version,
+ gd_bitrot_op_list[type]);
+ ret = -1;
+ goto out;
+ }
+
+ if (type == GF_BITROT_CMD_SCRUB_STATUS) {
+ /* Backward compatibility handling for scrub status command*/
+ if (conf->op_version < GD_OP_VERSION_3_7_7) {
+ snprintf (msg, sizeof (msg), "Cannot execute command. "
+ "The cluster is operating at version %d. "
+ "Bitrot scrub status command unavailable in "
+ "this version", conf->op_version);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "scrub-value", &scrub);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get scrub value.");
+ ret = -1;
+ goto out;
+ }
+
+ if (!strncmp (scrub, "status", strlen ("status"))) {
+ ret = glusterd_op_begin_synctask (req,
+ GD_OP_SCRUB_STATUS,
+ dict);
+ goto out;
+ }
+ }
+
+ ret = glusterd_op_begin_synctask (req, GD_OP_BITROT, dict);
+
+out:
+ if (ret) {
+ if (msg[0] == '\0')
+ snprintf (msg, sizeof (msg), "Bitrot operation failed");
+ ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
+ dict, msg);
+ }
+
+ return ret;
+}
+
+int
+glusterd_handle_bitrot (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_bitrot);
+}
+
+static int
+glusterd_bitrot_scrub_throttle (glusterd_volinfo_t *volinfo, dict_t *dict,
+ char *key, char **op_errstr)
+{
+ int32_t ret = -1;
+ char *scrub_throttle = NULL;
+ char *option = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = dict_get_str (dict, "scrub-throttle-value", &scrub_throttle);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED, "Unable to fetch scrub-"
+ "throttle value");
+ goto out;
+ }
+
+ option = gf_strdup (scrub_throttle);
+ ret = dict_set_dynstr (volinfo->dict, key, option);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_SET_FAILED, "Failed to set option %s",
+ key);
+ goto out;
+ }
+
+ ret = glusterd_scrubsvc_reconfigure ();
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SCRUBSVC_RECONF_FAIL,
+ "Failed to reconfigure scrub "
+ "services");
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+static int
+glusterd_bitrot_scrub_freq (glusterd_volinfo_t *volinfo, dict_t *dict,
+ char *key, char **op_errstr)
+{
+ int32_t ret = -1;
+ char *scrub_freq = NULL;
+ xlator_t *this = NULL;
+ char *option = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = dict_get_str (dict, "scrub-frequency-value", &scrub_freq);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED, "Unable to fetch scrub-"
+ "freq value");
+ goto out;
+ }
+
+ option = gf_strdup (scrub_freq);
+ ret = dict_set_dynstr (volinfo->dict, key, option);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_SET_FAILED, "Failed to set option %s",
+ key);
+ goto out;
+ }
+
+ ret = glusterd_scrubsvc_reconfigure ();
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SCRUBSVC_RECONF_FAIL,
+ "Failed to reconfigure scrub "
+ "services");
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+static int
+glusterd_bitrot_scrub (glusterd_volinfo_t *volinfo, dict_t *dict,
+ char *key, char **op_errstr)
+{
+ int32_t ret = -1;
+ char *scrub_value = NULL;
+ xlator_t *this = NULL;
+ char *option = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = dict_get_str (dict, "scrub-value", &scrub_value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to fetch scrub"
+ "value");
+ goto out;
+ }
+
+ if (!strcmp (scrub_value, "resume")) {
+ option = gf_strdup ("Active");
+ } else {
+ option = gf_strdup (scrub_value);
+ }
+
+ ret = dict_set_dynstr (volinfo->dict, key, option);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_SET_FAILED, "Failed to set option %s",
+ key);
+ goto out;
+ }
+
+ ret = glusterd_scrubsvc_reconfigure ();
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SCRUBSVC_RECONF_FAIL,
+ "Failed to reconfigure scrub "
+ "services");
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+static int
+glusterd_bitrot_expiry_time (glusterd_volinfo_t *volinfo, dict_t *dict,
+ char *key, char **op_errstr)
+{
+ int32_t ret = -1;
+ uint32_t expiry_time = 0;
+ xlator_t *this = NULL;
+ char dkey[1024] = {0,};
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = dict_get_uint32 (dict, "expiry-time", &expiry_time);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED, "Unable to get bitrot expiry"
+ " timer value.");
+ goto out;
+ }
+
+ snprintf (dkey, sizeof (dkey), "%d", expiry_time);
+
+ ret = dict_set_dynstr_with_alloc (volinfo->dict, key, dkey);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_SET_FAILED, "Failed to set option %s",
+ key);
+ goto out;
+ }
+
+ ret = glusterd_bitdsvc_reconfigure ();
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BITDSVC_RECONF_FAIL,
+ "Failed to reconfigure bitrot"
+ "services");
+ goto out;
+ }
+out:
+ return ret;
+}
+
+static int
+glusterd_bitrot_enable (glusterd_volinfo_t *volinfo, char **op_errstr)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_VALIDATE_OR_GOTO (this->name, volinfo, out);
+ GF_VALIDATE_OR_GOTO (this->name, op_errstr, out);
+
+ if (glusterd_is_volume_started (volinfo) == 0) {
+ *op_errstr = gf_strdup ("Volume is stopped, start volume "
+ "to enable bitrot.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_is_bitrot_enabled (volinfo);
+ if (ret) {
+ *op_errstr = gf_strdup ("Bitrot is already enabled");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc (volinfo->dict, VKEY_FEATURES_BITROT,
+ "on");
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_SET_FAILED, "dict set failed");
+ goto out;
+ }
+
+ /*Once bitrot is enable scrubber should be in Active state*/
+ ret = dict_set_dynstr_with_alloc (volinfo->dict, "features.scrub",
+ "Active");
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_SET_FAILED, "Failed to set option "
+ "features.scrub value");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret && op_errstr && !*op_errstr)
+ gf_asprintf (op_errstr, "Enabling bitrot on volume %s has been "
+ "unsuccessful", volinfo->volname);
+ return ret;
+}
+
+static int
+glusterd_bitrot_disable (glusterd_volinfo_t *volinfo, char **op_errstr)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("glusterd", this, out);
+
+ GF_VALIDATE_OR_GOTO (this->name, volinfo, out);
+ GF_VALIDATE_OR_GOTO (this->name, op_errstr, out);
+
+ ret = dict_set_dynstr_with_alloc (volinfo->dict, VKEY_FEATURES_BITROT,
+ "off");
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_SET_FAILED, "dict set failed");
+ goto out;
+ }
+
+ /*Once bitrot disabled scrubber should be Inactive state*/
+ ret = dict_set_dynstr_with_alloc (volinfo->dict, "features.scrub",
+ "Inactive");
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_SET_FAILED, "Failed to set "
+ "features.scrub value");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret && op_errstr && !*op_errstr)
+ gf_asprintf (op_errstr, "Disabling bitrot on volume %s has "
+ "been unsuccessful", volinfo->volname);
+ return ret;
+}
+
+gf_boolean_t
+glusterd_should_i_stop_bitd ()
+{
+ glusterd_conf_t *conf = THIS->private;
+ glusterd_volinfo_t *volinfo = NULL;
+ gf_boolean_t stopped = _gf_true;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ cds_list_for_each_entry (volinfo, &conf->volumes, vol_list) {
+ if (!glusterd_is_bitrot_enabled (volinfo))
+ continue;
+ else if (volinfo->status != GLUSTERD_STATUS_STARTED)
+ continue;
+ else {
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks,
+ brick_list) {
+ if (!glusterd_is_local_brick (this, volinfo,
+ brickinfo))
+ continue;
+ stopped = _gf_false;
+ return stopped;
+ }
+
+ /* Before stoping bitrot/scrubber daemon check
+ * other volume also whether respective volume
+ * host a brick from this node or not.*/
+ continue;
+ }
+ }
+
+ return stopped;
+}
+
+static int
+glusterd_manage_bitrot (int opcode)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ switch (opcode) {
+ case GF_BITROT_OPTION_TYPE_ENABLE:
+ case GF_BITROT_OPTION_TYPE_DISABLE:
+ ret = priv->bitd_svc.manager (&(priv->bitd_svc),
+ NULL, PROC_START_NO_WAIT);
+ if (ret)
+ break;
+ ret = priv->scrub_svc.manager (&(priv->scrub_svc), NULL,
+ PROC_START_NO_WAIT);
+ break;
+ default:
+ ret = 0;
+ break;
+ }
+
+ return ret;
+
+}
+
+int
+glusterd_op_bitrot (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+{
+ glusterd_volinfo_t *volinfo = NULL;
+ int32_t ret = -1;
+ char *volname = NULL;
+ int type = -1;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED, "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_asprintf (op_errstr, FMTSTR_CHECK_VOL_EXISTS, volname);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "type", &type);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED, "Unable to get type from "
+ "dict");
+ goto out;
+ }
+
+ switch (type) {
+ case GF_BITROT_OPTION_TYPE_ENABLE:
+ ret = glusterd_bitrot_enable (volinfo, op_errstr);
+ if (ret < 0)
+ goto out;
+ break;
+
+ case GF_BITROT_OPTION_TYPE_DISABLE:
+ ret = glusterd_bitrot_disable (volinfo, op_errstr);
+ if (ret < 0)
+ goto out;
+
+ break;
+
+ case GF_BITROT_OPTION_TYPE_SCRUB_THROTTLE:
+ ret = glusterd_bitrot_scrub_throttle (volinfo, dict,
+ "features.scrub-throttle",
+ op_errstr);
+ if (ret)
+ goto out;
+ break;
+
+ case GF_BITROT_OPTION_TYPE_SCRUB_FREQ:
+ ret = glusterd_bitrot_scrub_freq (volinfo, dict,
+ "features.scrub-freq",
+ op_errstr);
+ if (ret)
+ goto out;
+ break;
+
+ case GF_BITROT_OPTION_TYPE_SCRUB:
+ ret = glusterd_bitrot_scrub (volinfo, dict, "features.scrub",
+ op_errstr);
+ if (ret)
+ goto out;
+ break;
+
+ case GF_BITROT_OPTION_TYPE_EXPIRY_TIME:
+ ret = glusterd_bitrot_expiry_time (volinfo, dict,
+ "features.expiry-time",
+ op_errstr);
+ if (ret)
+ goto out;
+ case GF_BITROT_CMD_SCRUB_STATUS:
+ break;
+
+ default:
+ gf_asprintf (op_errstr, "Bitrot command failed. Invalid "
+ "opcode");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_manage_bitrot (type);
+ if (ret)
+ goto out;
+
+ ret = glusterd_create_volfiles_and_notify_services (volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLFILE_CREATE_FAIL, "Unable to re-create "
+ "volfiles");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_store_volinfo (volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Failed to store volinfo for "
+ "bitrot");
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+int
+glusterd_op_stage_bitrot (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+{
+ int ret = 0;
+ char *volname = NULL;
+ char *scrub_cmd = NULL;
+ char *scrub_cmd_from_dict = NULL;
+ char msg[2048] = {0,};
+ int type = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED, "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_asprintf (op_errstr, FMTSTR_CHECK_VOL_EXISTS, volname);
+ goto out;
+ }
+
+ if (!glusterd_is_volume_started (volinfo)) {
+ *op_errstr = gf_strdup ("Volume is stopped, start volume "
+ "before executing bit rot command.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "type", &type);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED, "Unable to get type for "
+ "operation");
+
+ *op_errstr = gf_strdup ("Staging stage failed for bitrot "
+ "operation.");
+ goto out;
+ }
+
+
+ if ((GF_BITROT_OPTION_TYPE_ENABLE != type) &&
+ (glusterd_is_bitrot_enabled (volinfo) == 0)) {
+ ret = -1;
+ gf_asprintf (op_errstr, "Bitrot is not enabled on volume %s",
+ volname);
+ goto out;
+ }
+
+ if ((GF_BITROT_OPTION_TYPE_SCRUB == type)) {
+ ret = dict_get_str (volinfo->dict, "features.scrub",
+ &scrub_cmd_from_dict);
+ if (!ret) {
+ ret = dict_get_str (dict, "scrub-value", &scrub_cmd);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED, "Unable to "
+ "get scrub-value");
+ *op_errstr = gf_strdup ("Staging failed for "
+ "bitrot operation. "
+ "Please check log file"
+ " for more details.");
+ goto out;
+ }
+ /* If scrubber is resume then value of scrubber will be
+ * "Active" in the dictionary. */
+ if (!strcmp (scrub_cmd_from_dict, scrub_cmd) ||
+ (!strncmp ("Active", scrub_cmd_from_dict,
+ strlen("Active")) && !strncmp ("resume",
+ scrub_cmd, strlen("resume")))) {
+ snprintf (msg, sizeof (msg), "Scrub is already"
+ " %sd for volume %s", scrub_cmd,
+ volinfo->volname);
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+ goto out;
+ }
+ }
+ ret = 0;
+ }
+
+ out:
+ if (ret && op_errstr && *op_errstr)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_OP_STAGE_BITROT_FAIL, "%s", *op_errstr);
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
new file mode 100644
index 00000000000..24317726079
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
@@ -0,0 +1,3025 @@
+/*
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "common-utils.h"
+#include "cli1-xdr.h"
+#include "xdr-generic.h"
+#include "glusterd.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-geo-rep.h"
+#include "glusterd-store.h"
+#include "glusterd-mgmt.h"
+#include "glusterd-utils.h"
+#include "glusterd-volgen.h"
+#include "glusterd-svc-helper.h"
+#include "glusterd-messages.h"
+#include "glusterd-server-quorum.h"
+#include "run.h"
+#include "glusterd-volgen.h"
+#include <sys/signal.h>
+
+/* misc */
+
+gf_boolean_t
+glusterd_is_tiering_supported (char *op_errstr)
+{
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ gf_boolean_t supported = _gf_false;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("glusterd", this, out);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
+
+ if (conf->op_version < GD_OP_VERSION_3_7_0)
+ goto out;
+
+ supported = _gf_true;
+
+out:
+ if (!supported && op_errstr != NULL && conf)
+ sprintf (op_errstr, "Tier operation failed. The cluster is "
+ "operating at version %d. Tiering"
+ " is unavailable in this version.",
+ conf->op_version);
+
+ return supported;
+}
+
+/* In this function, we decide, based on the 'count' of the brick,
+ where to add it in the current volume. 'count' tells us already
+ how many of the given bricks are added. other argument are self-
+ descriptive. */
+int
+add_brick_at_right_order (glusterd_brickinfo_t *brickinfo,
+ glusterd_volinfo_t *volinfo, int count,
+ int32_t stripe_cnt, int32_t replica_cnt)
+{
+ int idx = 0;
+ int i = 0;
+ int sub_cnt = 0;
+ glusterd_brickinfo_t *brick = NULL;
+
+ /* The complexity of the function is in deciding at which index
+ to add new brick. Even though it can be defined with a complex
+ single formula for all volume, it is separated out to make it
+ more readable */
+ if (stripe_cnt) {
+ /* common formula when 'stripe_count' is set */
+ /* idx = ((count / ((stripe_cnt * volinfo->replica_count) -
+ volinfo->dist_leaf_count)) * volinfo->dist_leaf_count) +
+ (count + volinfo->dist_leaf_count);
+ */
+
+ sub_cnt = volinfo->dist_leaf_count;
+
+ idx = ((count / ((stripe_cnt * volinfo->replica_count) -
+ sub_cnt)) * sub_cnt) +
+ (count + sub_cnt);
+
+ goto insert_brick;
+ }
+
+ /* replica count is set */
+ /* common formula when 'replica_count' is set */
+ /* idx = ((count / (replica_cnt - existing_replica_count)) *
+ existing_replica_count) +
+ (count + existing_replica_count);
+ */
+
+ sub_cnt = volinfo->replica_count;
+ idx = (count / (replica_cnt - sub_cnt) * sub_cnt) +
+ (count + sub_cnt);
+
+insert_brick:
+ i = 0;
+ cds_list_for_each_entry (brick, &volinfo->bricks, brick_list) {
+ i++;
+ if (i < idx)
+ continue;
+ gf_msg_debug (THIS->name, 0, "brick:%s index=%d, count=%d",
+ brick->path, idx, count);
+
+ cds_list_add (&brickinfo->brick_list, &brick->brick_list);
+ break;
+ }
+
+ return 0;
+}
+
+
+static int
+gd_addbr_validate_stripe_count (glusterd_volinfo_t *volinfo, int stripe_count,
+ int total_bricks, int *type, char *err_str,
+ size_t err_len)
+{
+ int ret = -1;
+
+ switch (volinfo->type) {
+ case GF_CLUSTER_TYPE_NONE:
+ if ((volinfo->brick_count * stripe_count) == total_bricks) {
+ /* Change the volume type */
+ *type = GF_CLUSTER_TYPE_STRIPE;
+ gf_msg (THIS->name, GF_LOG_INFO, 0,
+ GD_MSG_VOL_TYPE_CHANGING_INFO,
+ "Changing the type of volume %s from "
+ "'distribute' to 'stripe'", volinfo->volname);
+ ret = 0;
+ goto out;
+ } else {
+ snprintf (err_str, err_len, "Incorrect number of "
+ "bricks (%d) supplied for stripe count (%d).",
+ (total_bricks - volinfo->brick_count),
+ stripe_count);
+ gf_msg (THIS->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "%s", err_str);
+ goto out;
+ }
+ break;
+ case GF_CLUSTER_TYPE_REPLICATE:
+ if (!(total_bricks % (volinfo->replica_count * stripe_count))) {
+ /* Change the volume type */
+ *type = GF_CLUSTER_TYPE_STRIPE_REPLICATE;
+ gf_msg (THIS->name, GF_LOG_INFO, 0,
+ GD_MSG_VOL_TYPE_CHANGING_INFO,
+ "Changing the type of volume %s from "
+ "'replicate' to 'replicate-stripe'",
+ volinfo->volname);
+ ret = 0;
+ goto out;
+ } else {
+ snprintf (err_str, err_len, "Incorrect number of "
+ "bricks (%d) supplied for changing volume's "
+ "stripe count to %d, need at least %d bricks",
+ (total_bricks - volinfo->brick_count),
+ stripe_count,
+ (volinfo->replica_count * stripe_count));
+ gf_msg (THIS->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "%s", err_str);
+ goto out;
+ }
+ break;
+ case GF_CLUSTER_TYPE_STRIPE:
+ case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
+ if (stripe_count < volinfo->stripe_count) {
+ snprintf (err_str, err_len,
+ "Incorrect stripe count (%d) supplied. "
+ "Volume already has stripe count (%d)",
+ stripe_count, volinfo->stripe_count);
+ gf_msg (THIS->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "%s", err_str);
+ goto out;
+ }
+ if (stripe_count == volinfo->stripe_count) {
+ if (!(total_bricks % volinfo->dist_leaf_count)) {
+ /* its same as the one which exists */
+ ret = 1;
+ goto out;
+ }
+ }
+ if (stripe_count > volinfo->stripe_count) {
+ /* We have to make sure before and after 'add-brick',
+ the number or subvolumes for distribute will remain
+ same, when stripe count is given */
+ if ((volinfo->brick_count * (stripe_count *
+ volinfo->replica_count)) ==
+ (total_bricks * volinfo->dist_leaf_count)) {
+ /* Change the dist_leaf_count */
+ gf_msg (THIS->name, GF_LOG_INFO, 0,
+ GD_MSG_STRIPE_COUNT_CHANGE_INFO,
+ "Changing the stripe count of "
+ "volume %s from %d to %d",
+ volinfo->volname,
+ volinfo->stripe_count, stripe_count);
+ ret = 0;
+ goto out;
+ }
+ }
+ break;
+ case GF_CLUSTER_TYPE_DISPERSE:
+ snprintf (err_str, err_len, "Volume %s cannot be converted "
+ "from dispersed to striped-"
+ "dispersed", volinfo->volname);
+ gf_msg(THIS->name, GF_LOG_ERROR, EPERM,
+ GD_MSG_OP_NOT_PERMITTED, "%s", err_str);
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+static int
+gd_addbr_validate_replica_count (glusterd_volinfo_t *volinfo, int replica_count,
+ int arbiter_count, int total_bricks, int *type,
+ char *err_str, int err_len)
+{
+ int ret = -1;
+
+ /* replica count is set */
+ switch (volinfo->type) {
+ case GF_CLUSTER_TYPE_NONE:
+ if ((volinfo->brick_count * replica_count) == total_bricks) {
+ /* Change the volume type */
+ *type = GF_CLUSTER_TYPE_REPLICATE;
+ gf_msg (THIS->name, GF_LOG_INFO, 0,
+ GD_MSG_VOL_TYPE_CHANGING_INFO,
+ "Changing the type of volume %s from "
+ "'distribute' to 'replica'", volinfo->volname);
+ ret = 0;
+ goto out;
+
+ } else {
+ snprintf (err_str, err_len, "Incorrect number of "
+ "bricks (%d) supplied for replica count (%d).",
+ (total_bricks - volinfo->brick_count),
+ replica_count);
+ gf_msg (THIS->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "%s", err_str);
+ goto out;
+ }
+ break;
+ case GF_CLUSTER_TYPE_STRIPE:
+ if (!(total_bricks % (volinfo->dist_leaf_count * replica_count))) {
+ /* Change the volume type */
+ *type = GF_CLUSTER_TYPE_STRIPE_REPLICATE;
+ gf_msg (THIS->name, GF_LOG_INFO, 0,
+ GD_MSG_VOL_TYPE_CHANGING_INFO,
+ "Changing the type of volume %s from "
+ "'stripe' to 'replicate-stripe'",
+ volinfo->volname);
+ ret = 0;
+ goto out;
+ } else {
+ snprintf (err_str, err_len, "Incorrect number of "
+ "bricks (%d) supplied for changing volume's "
+ "replica count to %d, need at least %d "
+ "bricks",
+ (total_bricks - volinfo->brick_count),
+ replica_count, (volinfo->dist_leaf_count *
+ replica_count));
+ gf_msg (THIS->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "%s", err_str);
+ goto out;
+ }
+ break;
+ case GF_CLUSTER_TYPE_REPLICATE:
+ case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
+ if (replica_count < volinfo->replica_count) {
+ snprintf (err_str, err_len,
+ "Incorrect replica count (%d) supplied. "
+ "Volume already has (%d)",
+ replica_count, volinfo->replica_count);
+ gf_msg (THIS->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "%s", err_str);
+ goto out;
+ }
+ if (replica_count == volinfo->replica_count) {
+ if (arbiter_count && !volinfo->arbiter_count) {
+ snprintf (err_str, err_len,
+ "Cannot convert replica 3 volume "
+ "to arbiter volume.");
+ gf_msg (THIS->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "%s", err_str);
+ goto out;
+ }
+ if (!(total_bricks % volinfo->dist_leaf_count)) {
+ ret = 1;
+ goto out;
+ }
+ }
+ if (replica_count > volinfo->replica_count) {
+ /* We have to make sure before and after 'add-brick',
+ the number or subvolumes for distribute will remain
+ same, when replica count is given */
+ if ((total_bricks * volinfo->dist_leaf_count) ==
+ (volinfo->brick_count * (replica_count *
+ volinfo->stripe_count))) {
+ /* Change the dist_leaf_count */
+ gf_msg (THIS->name, GF_LOG_INFO, 0,
+ GD_MSG_REPLICA_COUNT_CHANGE_INFO,
+ "Changing the replica count of "
+ "volume %s from %d to %d",
+ volinfo->volname, volinfo->replica_count,
+ replica_count);
+ ret = 0;
+ goto out;
+ }
+ }
+ break;
+ case GF_CLUSTER_TYPE_DISPERSE:
+ snprintf (err_str, err_len, "Volume %s cannot be converted "
+ "from dispersed to replicated-"
+ "dispersed", volinfo->volname);
+ gf_msg(THIS->name, GF_LOG_ERROR, EPERM,
+ GD_MSG_OP_NOT_PERMITTED, "%s", err_str);
+ goto out;
+ }
+out:
+ return ret;
+}
+
+static int
+gd_rmbr_validate_replica_count (glusterd_volinfo_t *volinfo,
+ int32_t replica_count,
+ int32_t brick_count, char *err_str,
+ size_t err_len)
+{
+ int ret = -1;
+ int replica_nodes = 0;
+
+ switch (volinfo->type) {
+ case GF_CLUSTER_TYPE_TIER:
+ ret = 1;
+ goto out;
+
+ case GF_CLUSTER_TYPE_NONE:
+ case GF_CLUSTER_TYPE_STRIPE:
+ case GF_CLUSTER_TYPE_DISPERSE:
+ snprintf (err_str, err_len,
+ "replica count (%d) option given for non replicate "
+ "volume %s", replica_count, volinfo->volname);
+ gf_msg (THIS->name, GF_LOG_WARNING, 0,
+ GD_MSG_VOL_NOT_REPLICA, "%s", err_str);
+ goto out;
+
+ case GF_CLUSTER_TYPE_REPLICATE:
+ case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
+ /* in remove brick, you can only reduce the replica count */
+ if (replica_count > volinfo->replica_count) {
+ snprintf (err_str, err_len,
+ "given replica count (%d) option is more "
+ "than volume %s's replica count (%d)",
+ replica_count, volinfo->volname,
+ volinfo->replica_count);
+ gf_msg (THIS->name, GF_LOG_WARNING, EINVAL,
+ GD_MSG_INVALID_ENTRY, "%s", err_str);
+ goto out;
+ }
+ if (replica_count == volinfo->replica_count) {
+ /* This means the 'replica N' option on CLI was
+ redundant. Check if the total number of bricks given
+ for removal is same as 'dist_leaf_count' */
+ if (brick_count % volinfo->dist_leaf_count) {
+ snprintf (err_str, err_len,
+ "number of bricks provided (%d) is "
+ "not valid. need at least %d "
+ "(or %dxN)", brick_count,
+ volinfo->dist_leaf_count,
+ volinfo->dist_leaf_count);
+ gf_msg (THIS->name, GF_LOG_WARNING, EINVAL,
+ GD_MSG_INVALID_ENTRY, "%s",
+ err_str);
+ goto out;
+ }
+ ret = 1;
+ goto out;
+ }
+
+ replica_nodes = ((volinfo->brick_count /
+ volinfo->replica_count) *
+ (volinfo->replica_count - replica_count));
+
+ if (brick_count % replica_nodes) {
+ snprintf (err_str, err_len,
+ "need %d(xN) bricks for reducing replica "
+ "count of the volume from %d to %d",
+ replica_nodes, volinfo->replica_count,
+ replica_count);
+ goto out;
+ }
+ break;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/* Handler functions */
+int
+__glusterd_handle_add_brick (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,}};
+ dict_t *dict = NULL;
+ char *bricks = NULL;
+ char *volname = NULL;
+ int brick_count = 0;
+ void *cli_rsp = NULL;
+ char err_str[2048] = {0,};
+ gf_cli_rsp rsp = {0,};
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+ int total_bricks = 0;
+ int32_t replica_count = 0;
+ int32_t arbiter_count = 0;
+ int32_t stripe_count = 0;
+ int type = 0;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT (req);
+
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req,
+ (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ snprintf (err_str, sizeof (err_str), "Garbage args received");
+ goto out;
+ }
+
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_ADD_BRICK_REQ_RECVD, "Received add brick req");
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (err_str, sizeof (err_str), "Unable to decode "
+ "the command");
+ goto out;
+ }
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get volume "
+ "name");
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED, "%s", err_str);
+ goto out;
+ }
+
+ if (!(ret = glusterd_check_volume_exists (volname))) {
+ ret = -1;
+ snprintf (err_str, sizeof (err_str), "Volume %s does not exist",
+ volname);
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_VOL_NOT_FOUND, "%s", err_str);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "count", &brick_count);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get volume "
+ "brick count");
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED, "%s", err_str);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "replica-count", &replica_count);
+ if (!ret) {
+ gf_msg (this->name, GF_LOG_INFO, errno,
+ GD_MSG_DICT_GET_SUCCESS, "replica-count is %d",
+ replica_count);
+ }
+
+ ret = dict_get_int32 (dict, "arbiter-count", &arbiter_count);
+ if (!ret) {
+ gf_msg (this->name, GF_LOG_INFO, errno,
+ GD_MSG_DICT_GET_SUCCESS, "arbiter-count is %d",
+ arbiter_count);
+ }
+
+ ret = dict_get_int32 (dict, "stripe-count", &stripe_count);
+ if (!ret) {
+ gf_msg (this->name, GF_LOG_INFO, errno,
+ GD_MSG_DICT_GET_SUCCESS, "stripe-count is %d",
+ stripe_count);
+ }
+
+ if (!dict_get (dict, "force")) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED, "Failed to get flag");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get volinfo "
+ "for volume name %s", volname);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLINFO_GET_FAIL, "%s", err_str);
+ goto out;
+
+ }
+
+ total_bricks = volinfo->brick_count + brick_count;
+
+ if (dict_get (dict, "attach-tier")) {
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
+ snprintf (err_str, sizeof (err_str),
+ "Volume %s is already a tier.", volname);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_ALREADY_TIER, "%s", err_str);
+ ret = -1;
+ goto out;
+ }
+
+ if (glusterd_is_tiering_supported(err_str) == _gf_false) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VERSION_UNSUPPORTED,
+ "Tiering not supported at this version");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "hot-type", &type);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED,
+ "failed to get type from dictionary");
+ goto out;
+ }
+
+ goto brick_val;
+ }
+
+ ret = glusterd_disallow_op_for_tier (volinfo, GD_OP_ADD_BRICK, -1);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Add-brick operation is "
+ "not supported on a tiered volume %s", volname);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_OP_UNSUPPORTED, "%s", err_str);
+ goto out;
+ }
+
+ if (!stripe_count && !replica_count) {
+ if (volinfo->type == GF_CLUSTER_TYPE_NONE)
+ goto brick_val;
+
+ if ((volinfo->brick_count < volinfo->dist_leaf_count) &&
+ (total_bricks <= volinfo->dist_leaf_count))
+ goto brick_val;
+
+ if ((brick_count % volinfo->dist_leaf_count) != 0) {
+ snprintf (err_str, sizeof (err_str), "Incorrect number "
+ "of bricks supplied %d with count %d",
+ brick_count, volinfo->dist_leaf_count);
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_VOL_NOT_REPLICA, "%s", err_str);
+ ret = -1;
+ goto out;
+ }
+ goto brick_val;
+ /* done with validation.. below section is if stripe|replica
+ count is given */
+ }
+
+ /* These bricks needs to be added one per a replica or stripe volume */
+ if (stripe_count) {
+ ret = gd_addbr_validate_stripe_count (volinfo, stripe_count,
+ total_bricks, &type,
+ err_str,
+ sizeof (err_str));
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_COUNT_VALIDATE_FAILED, "%s", err_str);
+ goto out;
+ }
+
+ /* if stripe count is same as earlier, set it back to 0 */
+ if (ret == 1)
+ stripe_count = 0;
+
+ ret = dict_set_int32 (dict, "stripe-count", stripe_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED,
+ "failed to set the stripe-count in dict");
+ goto out;
+ }
+ goto brick_val;
+ }
+
+ ret = gd_addbr_validate_replica_count (volinfo, replica_count,
+ arbiter_count, total_bricks,
+ &type, err_str,
+ sizeof (err_str));
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_COUNT_VALIDATE_FAILED, "%s", err_str);
+ goto out;
+ }
+
+ /* if replica count is same as earlier, set it back to 0 */
+ if (ret == 1)
+ replica_count = 0;
+
+ ret = dict_set_int32 (dict, "replica-count", replica_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_SET_FAILED,
+ "failed to set the replica-count in dict");
+ goto out;
+ }
+
+brick_val:
+ ret = dict_get_str (dict, "bricks", &bricks);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get volume "
+ "bricks");
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED, "%s", err_str);
+ goto out;
+ }
+
+ if (type != volinfo->type) {
+ ret = dict_set_int32 (dict, "type", type);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_SET_FAILED,
+ "failed to set the new type in dict");
+ goto out;
+ }
+ }
+
+ if (conf->op_version <= GD_OP_VERSION_3_7_5) {
+ gf_msg_debug (this->name, 0, "The cluster is operating at "
+ "version less than or equal to %d. Falling back "
+ "to syncop framework.",
+ GD_OP_VERSION_3_7_5);
+ ret = glusterd_op_begin_synctask (req, GD_OP_ADD_BRICK, dict);
+ } else {
+ ret = glusterd_mgmt_v3_initiate_all_phases (req,
+ GD_OP_ADD_BRICK,
+ dict);
+ }
+
+out:
+ if (ret) {
+ rsp.op_ret = -1;
+ rsp.op_errno = 0;
+ if (err_str[0] == '\0')
+ snprintf (err_str, sizeof (err_str), "Operation failed");
+ rsp.op_errstr = err_str;
+ cli_rsp = &rsp;
+ glusterd_to_cli (req, cli_rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_cli_rsp, dict);
+ ret = 0; //sent error to cli, prevent second reply
+ }
+
+ free (cli_req.dict.dict_val); //its malloced by xdr
+
+ return ret;
+}
+
+int
+glusterd_handle_add_brick (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_add_brick);
+}
+
+static int
+subvol_matcher_init (int **subvols, int count)
+{
+ int ret = -1;
+
+ *subvols = GF_CALLOC (count, sizeof(int), gf_gld_mt_int);
+ if (*subvols)
+ ret = 0;
+
+ return ret;
+}
+
+static void
+subvol_matcher_update (int *subvols, glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo)
+{
+ glusterd_brickinfo_t *tmp = NULL;
+ int32_t sub_volume = 0;
+ int pos = 0;
+
+ cds_list_for_each_entry (tmp, &volinfo->bricks, brick_list) {
+
+ if (strcmp (tmp->hostname, brickinfo->hostname) ||
+ strcmp (tmp->path, brickinfo->path)) {
+ pos++;
+ continue;
+ }
+ gf_msg_debug (THIS->name, 0, LOGSTR_FOUND_BRICK,
+ brickinfo->hostname, brickinfo->path,
+ volinfo->volname);
+ sub_volume = (pos / volinfo->dist_leaf_count);
+ subvols[sub_volume]++;
+ break;
+ }
+
+}
+
+static int
+subvol_matcher_verify (int *subvols, glusterd_volinfo_t *volinfo, char *err_str,
+ size_t err_len, char *vol_type, int replica_count)
+{
+ int i = 0;
+ int ret = 0;
+ int count = volinfo->replica_count-replica_count;
+
+ if (replica_count) {
+ for (i = 0; i < volinfo->subvol_count; i++) {
+ if (subvols[i] != count) {
+ ret = -1;
+ snprintf (err_str, err_len, "Remove exactly %d"
+ " brick(s) from each subvolume.", count);
+ break;
+ }
+ }
+ return ret;
+ }
+
+ do {
+
+ if (subvols[i] % volinfo->dist_leaf_count == 0) {
+ continue;
+ } else {
+ ret = -1;
+ snprintf (err_str, err_len,
+ "Bricks not from same subvol for %s", vol_type);
+ break;
+ }
+ } while (++i < volinfo->subvol_count);
+
+ return ret;
+}
+
+static void
+subvol_matcher_destroy (int *subvols)
+{
+ GF_FREE (subvols);
+}
+
+static int
+glusterd_set_detach_bricks(dict_t *dict, glusterd_volinfo_t *volinfo)
+{
+ char key[256] = {0,};
+ char value[256] = {0,};
+ int brick_num = 0;
+ int hot_brick_num = 0;
+ glusterd_brickinfo_t *brickinfo;
+ int ret = 0;
+
+ /* cold tier bricks at tail of list so use reverse iteration */
+ cds_list_for_each_entry_reverse (brickinfo, &volinfo->bricks,
+ brick_list) {
+ brick_num++;
+ if (brick_num > volinfo->tier_info.cold_brick_count) {
+ hot_brick_num++;
+ sprintf (key, "brick%d", hot_brick_num);
+ snprintf (value, 256, "%s:%s",
+ brickinfo->hostname,
+ brickinfo->path);
+
+ ret = dict_set_str (dict, key, strdup(value));
+ if (ret)
+ break;
+ }
+ }
+
+ ret = dict_set_int32(dict, "count", hot_brick_num);
+ if (ret)
+ return -1;
+
+ return hot_brick_num;
+}
+
+static int
+glusterd_remove_brick_validate_arbiters (glusterd_volinfo_t *volinfo,
+ int32_t count, int32_t replica_count,
+ glusterd_brickinfo_t **brickinfo_list,
+ char *err_str, size_t err_len)
+{
+ int i = 0;
+ int ret = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *last = NULL;
+ char *arbiter_array = NULL;
+
+ if ((volinfo->type != GF_CLUSTER_TYPE_REPLICATE) &&
+ (volinfo->type != GF_CLUSTER_TYPE_STRIPE_REPLICATE))
+ goto out;
+
+ if (!replica_count || !volinfo->arbiter_count)
+ goto out;
+
+ if (replica_count == 2) {
+ /* If it is an arbiter to replica 2 conversion, only permit
+ * removal of the arbiter brick.*/
+ for (i = 0; i < count; i++) {
+ brickinfo = brickinfo_list[i];
+ last = get_last_brick_of_brick_group (volinfo,
+ brickinfo);
+ if (last != brickinfo) {
+ snprintf (err_str, err_len, "Remove arbiter "
+ "brick(s) only when converting from "
+ "arbiter to replica 2 subvolume.");
+ ret = -1;
+ goto out;
+ }
+ }
+ } else if (replica_count == 1) {
+ /* If it is an arbiter to plain distribute conversion, in every
+ * replica subvol, the arbiter has to be one of the bricks that
+ * are removed. */
+ arbiter_array = GF_CALLOC (volinfo->subvol_count,
+ sizeof (*arbiter_array),
+ gf_common_mt_char);
+ if (!arbiter_array)
+ return -1;
+ for (i = 0; i < count; i++) {
+ brickinfo = brickinfo_list[i];
+ last = get_last_brick_of_brick_group (volinfo,
+ brickinfo);
+ if (last == brickinfo)
+ arbiter_array[brickinfo->group] = 1;
+ }
+ for (i = 0; i < volinfo->subvol_count; i++)
+ if (!arbiter_array[i]) {
+ snprintf (err_str, err_len, "Removed bricks "
+ "must contain arbiter when converting"
+ " to plain distrubute.");
+ ret = -1;
+ break;
+ }
+ GF_FREE (arbiter_array);
+ }
+
+out:
+ return ret;
+}
+
+int
+__glusterd_handle_remove_brick (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,}};
+ dict_t *dict = NULL;
+ int32_t count = 0;
+ char *brick = NULL;
+ char key[256] = {0,};
+ int i = 1;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t **brickinfo_list = NULL;
+ int *subvols = NULL;
+ char err_str[2048] = {0};
+ gf_cli_rsp rsp = {0,};
+ void *cli_rsp = NULL;
+ char vol_type[256] = {0,};
+ int32_t replica_count = 0;
+ char *volname = 0;
+ xlator_t *this = NULL;
+ int cmd = -1;
+
+ GF_ASSERT (req);
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req,
+ (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ snprintf (err_str, sizeof (err_str), "Received garbage args");
+ goto out;
+ }
+
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_REM_BRICK_REQ_RECVD,
+ "Received rem brick req");
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (err_str, sizeof (err_str), "Unable to decode "
+ "the command");
+ goto out;
+ }
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get volume "
+ "name");
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED, "%s", err_str);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "count", &count);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get brick "
+ "count");
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED, "%s", err_str);
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str),"Volume %s does not exist",
+ volname);
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_VOL_NOT_FOUND, "%s", err_str);
+ goto out;
+ }
+
+ if ((volinfo->type == GF_CLUSTER_TYPE_TIER) &&
+ (glusterd_is_tiering_supported(err_str) == _gf_false)) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VERSION_UNSUPPORTED,
+ "Tiering not supported at this version");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "command", &cmd);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get cmd "
+ "ccommand");
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED, "%s", err_str);
+ goto out;
+ }
+
+ ret = glusterd_disallow_op_for_tier (volinfo, GD_OP_REMOVE_BRICK, cmd);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str),
+ "Removing brick from a Tier volume is not allowed");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_OP_UNSUPPORTED, "%s", err_str);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "replica-count", &replica_count);
+ if (!ret) {
+ gf_msg (this->name, GF_LOG_INFO, errno,
+ GD_MSG_DICT_GET_FAILED,
+ "request to change replica-count to %d", replica_count);
+ ret = gd_rmbr_validate_replica_count (volinfo, replica_count,
+ count, err_str,
+ sizeof (err_str));
+ if (ret < 0) {
+ /* logging and error msg are done in above function
+ itself */
+ goto out;
+ }
+ dict_del (dict, "replica-count");
+ if (ret) {
+ replica_count = 0;
+ } else {
+ ret = dict_set_int32 (dict, "replica-count",
+ replica_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ GD_MSG_DICT_SET_FAILED,
+ "failed to set the replica_count "
+ "in dict");
+ goto out;
+ }
+ }
+ }
+
+ /* 'vol_type' is used for giving the meaning full error msg for user */
+ if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) {
+ strcpy (vol_type, "replica");
+ } else if (volinfo->type == GF_CLUSTER_TYPE_STRIPE) {
+ strcpy (vol_type, "stripe");
+ } else if (volinfo->type == GF_CLUSTER_TYPE_STRIPE_REPLICATE) {
+ strcpy (vol_type, "stripe-replicate");
+ } else if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) {
+ strcpy (vol_type, "disperse");
+ } else {
+ strcpy (vol_type, "distribute");
+ }
+
+ /* Do not allow remove-brick if the volume is a stripe volume*/
+ if ((volinfo->type == GF_CLUSTER_TYPE_STRIPE) &&
+ (volinfo->brick_count == volinfo->stripe_count)) {
+ snprintf (err_str, sizeof (err_str),
+ "Removing brick from a stripe volume is not allowed");
+ gf_msg (this->name, GF_LOG_ERROR, EPERM,
+ GD_MSG_OP_NOT_PERMITTED, "%s", err_str);
+ ret = -1;
+ goto out;
+ }
+
+ if (!replica_count &&
+ (volinfo->type == GF_CLUSTER_TYPE_STRIPE_REPLICATE) &&
+ (volinfo->brick_count == volinfo->dist_leaf_count)) {
+ snprintf (err_str, sizeof(err_str),
+ "Removing bricks from stripe-replicate"
+ " configuration is not allowed without reducing "
+ "replica or stripe count explicitly.");
+ gf_msg (this->name, GF_LOG_ERROR, EPERM,
+ GD_MSG_OP_NOT_PERMITTED_AC_REQD, "%s", err_str);
+ ret = -1;
+ goto out;
+ }
+
+ if (!replica_count &&
+ (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) &&
+ (volinfo->brick_count == volinfo->dist_leaf_count)) {
+ snprintf (err_str, sizeof (err_str),
+ "Removing bricks from replicate configuration "
+ "is not allowed without reducing replica count "
+ "explicitly.");
+ gf_msg (this->name, GF_LOG_ERROR, EPERM,
+ GD_MSG_OP_NOT_PERMITTED_AC_REQD, "%s", err_str);
+ ret = -1;
+ goto out;
+ }
+
+ /* Do not allow remove-brick if the bricks given is less than
+ the replica count or stripe count */
+ if (!replica_count && (volinfo->type != GF_CLUSTER_TYPE_NONE) &&
+ (volinfo->type != GF_CLUSTER_TYPE_TIER)) {
+ if (volinfo->dist_leaf_count &&
+ (count % volinfo->dist_leaf_count)) {
+ snprintf (err_str, sizeof (err_str), "Remove brick "
+ "incorrect brick count of %d for %s %d",
+ count, vol_type, volinfo->dist_leaf_count);
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "%s", err_str);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ /* subvol match is not required for tiered volume*/
+ if ((volinfo->type != GF_CLUSTER_TYPE_NONE) &&
+ (volinfo->type != GF_CLUSTER_TYPE_TIER) &&
+ (volinfo->subvol_count > 1)) {
+ ret = subvol_matcher_init (&subvols, volinfo->subvol_count);
+ if (ret)
+ goto out;
+ }
+
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER)
+ count = glusterd_set_detach_bricks(dict, volinfo);
+
+ brickinfo_list = GF_CALLOC (count, sizeof (*brickinfo_list),
+ gf_common_mt_pointer);
+ if (!brickinfo_list) {
+ ret = -1;
+ goto out;
+ }
+
+ while ( i <= count) {
+ snprintf (key, sizeof (key), "brick%d", i);
+ ret = dict_get_str (dict, key, &brick);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get %s",
+ key);
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED, "%s", err_str);
+ goto out;
+ }
+ gf_msg_debug (this->name, 0, "Remove brick count %d brick:"
+ " %s", i, brick);
+
+ ret = glusterd_volume_brickinfo_get_by_brick(brick, volinfo,
+ &brickinfo,
+ _gf_false);
+
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Incorrect brick "
+ "%s for volume %s", brick, volname);
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_BRICK_NOT_FOUND, "%s", err_str);
+ goto out;
+ }
+ brickinfo_list[i-1] = brickinfo;
+
+ i++;
+ if ((volinfo->type == GF_CLUSTER_TYPE_NONE) ||
+ (volinfo->brick_count <= volinfo->dist_leaf_count))
+ continue;
+
+ /* Find which subvolume the brick belongs to.
+ * subvol match is not required for tiered volume
+ *
+ */
+ if (volinfo->type != GF_CLUSTER_TYPE_TIER)
+ subvol_matcher_update (subvols, volinfo, brickinfo);
+ }
+
+ /* Check if the bricks belong to the same subvolumes.*/
+ /* subvol match is not required for tiered volume*/
+ if ((volinfo->type != GF_CLUSTER_TYPE_NONE) &&
+ (volinfo->type != GF_CLUSTER_TYPE_TIER) &&
+ (volinfo->subvol_count > 1)) {
+ ret = subvol_matcher_verify (subvols, volinfo,
+ err_str, sizeof(err_str),
+ vol_type, replica_count);
+ if (ret)
+ goto out;
+ }
+
+ ret = glusterd_remove_brick_validate_arbiters (volinfo, count,
+ replica_count,
+ brickinfo_list,
+ err_str,
+ sizeof (err_str));
+ if (ret)
+ goto out;
+
+ ret = glusterd_op_begin_synctask (req, GD_OP_REMOVE_BRICK, dict);
+
+out:
+ if (ret) {
+ rsp.op_ret = -1;
+ rsp.op_errno = 0;
+ if (err_str[0] == '\0')
+ snprintf (err_str, sizeof (err_str),
+ "Operation failed");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GLUSTERD_OP_FAILED, "%s", err_str);
+ rsp.op_errstr = err_str;
+ cli_rsp = &rsp;
+ glusterd_to_cli (req, cli_rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_cli_rsp, dict);
+
+ ret = 0; //sent error to cli, prevent second reply
+
+ }
+
+ if (brickinfo_list)
+ GF_FREE (brickinfo_list);
+ subvol_matcher_destroy (subvols);
+ free (cli_req.dict.dict_val); //its malloced by xdr
+
+ return ret;
+}
+
+int
+glusterd_handle_remove_brick (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_remove_brick);
+}
+
+static int
+_glusterd_restart_gsync_session (dict_t *this, char *key,
+ data_t *value, void *data)
+{
+ char *slave = NULL;
+ char *slave_buf = NULL;
+ char *path_list = NULL;
+ char *slave_vol = NULL;
+ char *slave_host = NULL;
+ char *slave_url = NULL;
+ char *conf_path = NULL;
+ char **errmsg = NULL;
+ int ret = -1;
+ glusterd_gsync_status_temp_t *param = NULL;
+ gf_boolean_t is_running = _gf_false;
+
+ param = (glusterd_gsync_status_temp_t *)data;
+
+ GF_ASSERT (param);
+ GF_ASSERT (param->volinfo);
+
+ slave = strchr(value->data, ':');
+ if (slave) {
+ slave++;
+ slave_buf = gf_strdup (slave);
+ if (!slave_buf) {
+ gf_msg ("glusterd", GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY,
+ "Failed to gf_strdup");
+ ret = -1;
+ goto out;
+ }
+ }
+ else
+ return 0;
+
+ ret = dict_set_dynstr (param->rsp_dict, "slave", slave_buf);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to store slave");
+ if (slave_buf)
+ GF_FREE(slave_buf);
+ goto out;
+ }
+
+ ret = glusterd_get_slave_details_confpath (param->volinfo,
+ param->rsp_dict, &slave_url,
+ &slave_host, &slave_vol,
+ &conf_path, errmsg);
+ if (ret) {
+ if (*errmsg)
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_SLAVE_CONFPATH_DETAILS_FETCH_FAIL,
+ "%s", *errmsg);
+ else
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_SLAVE_CONFPATH_DETAILS_FETCH_FAIL,
+ "Unable to fetch slave or confpath details.");
+ goto out;
+ }
+
+ /* In cases that gsyncd is not running, we will not invoke it
+ * because of add-brick. */
+ ret = glusterd_check_gsync_running_local (param->volinfo->volname,
+ slave, conf_path,
+ &is_running);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_GSYNC_VALIDATION_FAIL, "gsync running validation failed.");
+ goto out;
+ }
+ if (_gf_false == is_running) {
+ gf_msg_debug ("glusterd", 0, "gsync session for %s and %s is"
+ " not running on this node. Hence not restarting.",
+ param->volinfo->volname, slave);
+ ret = 0;
+ goto out;
+ }
+
+ ret = glusterd_get_local_brickpaths (param->volinfo, &path_list);
+ if (!path_list) {
+ gf_msg_debug ("glusterd", 0, "This node not being part of"
+ " volume should not be running gsyncd. Hence"
+ " no gsyncd process to restart.");
+ ret = 0;
+ goto out;
+ }
+
+ ret = glusterd_check_restart_gsync_session (param->volinfo, slave,
+ param->rsp_dict, path_list,
+ conf_path, 0);
+ if (ret)
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_GSYNC_RESTART_FAIL,
+ "Unable to restart gsync session.");
+
+out:
+ gf_msg_debug ("glusterd", 0, "Returning %d.", ret);
+ return ret;
+}
+
+/* op-sm */
+
+int
+glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count,
+ char *bricks, dict_t *dict)
+{
+ char *brick = NULL;
+ int32_t i = 1;
+ char *brick_list = NULL;
+ char *free_ptr1 = NULL;
+ char *free_ptr2 = NULL;
+ char *saveptr = NULL;
+ int32_t ret = -1;
+ int32_t stripe_count = 0;
+ int32_t replica_count = 0;
+ int32_t arbiter_count = 0;
+ int32_t type = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_gsync_status_temp_t param = {0, };
+ gf_boolean_t restart_needed = 0;
+ char msg[1024] __attribute__((unused)) = {0, };
+ int caps = 0;
+ int brickid = 0;
+ char key[PATH_MAX] = "";
+ char *brick_mount_dir = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ gf_boolean_t is_valid_add_brick = _gf_false;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (volinfo);
+
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ if (bricks) {
+ brick_list = gf_strdup (bricks);
+ free_ptr1 = brick_list;
+ }
+
+ if (count)
+ brick = strtok_r (brick_list+1, " \n", &saveptr);
+
+ if (dict) {
+ ret = dict_get_int32 (dict, "stripe-count", &stripe_count);
+ if (!ret)
+ gf_msg (THIS->name, GF_LOG_INFO, errno,
+ GD_MSG_DICT_GET_SUCCESS,
+ "stripe-count is set %d", stripe_count);
+
+ ret = dict_get_int32 (dict, "replica-count", &replica_count);
+ if (!ret)
+ gf_msg (THIS->name, GF_LOG_INFO, errno,
+ GD_MSG_DICT_GET_SUCCESS,
+ "replica-count is set %d", replica_count);
+ ret = dict_get_int32 (dict, "arbiter-count", &arbiter_count);
+ if (!ret)
+ gf_msg (THIS->name, GF_LOG_INFO, errno,
+ GD_MSG_DICT_GET_SUCCESS,
+ "arbiter-count is set %d", arbiter_count);
+ ret = dict_get_int32 (dict, "type", &type);
+ if (!ret)
+ gf_msg (THIS->name, GF_LOG_INFO, errno,
+ GD_MSG_DICT_GET_SUCCESS,
+ "type is set %d, need to change it", type);
+ }
+
+ brickid = glusterd_get_next_available_brickid (volinfo);
+ if (brickid < 0)
+ goto out;
+ while ( i <= count) {
+ ret = glusterd_brickinfo_new_from_brick (brick, &brickinfo,
+ _gf_true, NULL);
+ if (ret)
+ goto out;
+
+ GLUSTERD_ASSIGN_BRICKID_TO_BRICKINFO (brickinfo, volinfo,
+ brickid++);
+
+ /* A bricks mount dir is required only by snapshots which were
+ * introduced in gluster-3.6.0
+ */
+ if (conf->op_version >= GD_OP_VERSION_3_6_0) {
+ brick_mount_dir = NULL;
+
+ snprintf (key, sizeof(key), "brick%d.mount_dir", i);
+ ret = dict_get_str (dict, key, &brick_mount_dir);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED,
+ "%s not present", key);
+ goto out;
+ }
+ strncpy (brickinfo->mount_dir, brick_mount_dir,
+ sizeof(brickinfo->mount_dir));
+ }
+
+ ret = glusterd_resolve_brick (brickinfo);
+ if (ret)
+ goto out;
+
+ /* hot tier bricks are added to head of brick list */
+ if (dict_get (dict, "attach-tier")) {
+ cds_list_add (&brickinfo->brick_list, &volinfo->bricks);
+ } else if (stripe_count || replica_count) {
+ add_brick_at_right_order (brickinfo, volinfo, (i - 1),
+ stripe_count, replica_count);
+ } else {
+ cds_list_add_tail (&brickinfo->brick_list,
+ &volinfo->bricks);
+ }
+ brick = strtok_r (NULL, " \n", &saveptr);
+ i++;
+ volinfo->brick_count++;
+
+ }
+
+ /* Gets changed only if the options are given in add-brick cli */
+ if (type)
+ volinfo->type = type;
+
+ if (replica_count) {
+ volinfo->replica_count = replica_count;
+ }
+ if (arbiter_count) {
+ volinfo->arbiter_count = arbiter_count;
+ }
+ if (stripe_count) {
+ volinfo->stripe_count = stripe_count;
+ }
+ volinfo->dist_leaf_count = glusterd_get_dist_leaf_count (volinfo);
+
+ /* backward compatibility */
+ volinfo->sub_count = ((volinfo->dist_leaf_count == 1) ? 0:
+ volinfo->dist_leaf_count);
+
+ volinfo->subvol_count = (volinfo->brick_count /
+ volinfo->dist_leaf_count);
+
+ ret = 0;
+ if (GLUSTERD_STATUS_STARTED != volinfo->status)
+ goto generate_volfiles;
+
+ ret = generate_brick_volfiles (volinfo);
+ if (ret)
+ goto out;
+
+ brick_list = gf_strdup (bricks);
+ free_ptr2 = brick_list;
+ i = 1;
+
+ if (count)
+ brick = strtok_r (brick_list+1, " \n", &saveptr);
+#ifdef HAVE_BD_XLATOR
+ if (brickinfo->vg[0])
+ caps = CAPS_BD | CAPS_THIN |
+ CAPS_OFFLOAD_COPY | CAPS_OFFLOAD_SNAPSHOT;
+#endif
+
+ /* This check needs to be added to distinguish between
+ * attach-tier commands and add-brick commands.
+ * When a tier is attached, adding is done via add-brick
+ * and setting of pending xattrs shouldn't be done for
+ * attach-tiers as they are virtually new volumes.
+ */
+ if (glusterd_is_volume_replicate (volinfo)) {
+ if (replica_count &&
+ !dict_get (dict, "attach-tier") &&
+ conf->op_version >= GD_OP_VERSION_3_7_10) {
+ is_valid_add_brick = _gf_true;
+ ret = generate_dummy_client_volfiles (volinfo);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLFILE_CREATE_FAIL,
+ "Failed to create volfile.");
+ goto out;
+ }
+ }
+ }
+
+ while (i <= count) {
+ ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo,
+ &brickinfo,
+ _gf_true);
+ if (ret)
+ goto out;
+#ifdef HAVE_BD_XLATOR
+ /* Check for VG/thin pool if its BD volume */
+ if (brickinfo->vg[0]) {
+ ret = glusterd_is_valid_vg (brickinfo, 0, msg);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_INVALID_VG, "%s", msg);
+ goto out;
+ }
+ /* if anyone of the brick does not have thin support,
+ disable it for entire volume */
+ caps &= brickinfo->caps;
+ } else
+ caps = 0;
+#endif
+
+ if (gf_uuid_is_null (brickinfo->uuid)) {
+ ret = glusterd_resolve_brick (brickinfo);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_RESOLVE_BRICK_FAIL, FMTSTR_RESOLVE_BRICK,
+ brickinfo->hostname, brickinfo->path);
+ goto out;
+ }
+ }
+
+ /* if the volume is a replicate volume, do: */
+ if (is_valid_add_brick) {
+ if (!gf_uuid_compare (brickinfo->uuid, MY_UUID)) {
+ ret = glusterd_handle_replicate_brick_ops (
+ volinfo, brickinfo,
+ GD_OP_ADD_BRICK);
+ if (ret < 0)
+ goto out;
+ }
+ }
+ ret = glusterd_brick_start (volinfo, brickinfo,
+ _gf_true);
+ if (ret)
+ goto out;
+ i++;
+ brick = strtok_r (NULL, " \n", &saveptr);
+
+ /* Check if the brick is added in this node, and set
+ * the restart_needed flag. */
+ if ((!gf_uuid_compare (brickinfo->uuid, MY_UUID)) &&
+ !restart_needed) {
+ restart_needed = 1;
+ gf_msg_debug ("glusterd", 0,
+ "Restart gsyncd session, if it's already "
+ "running.");
+ }
+ }
+
+ /* If the restart_needed flag is set, restart gsyncd sessions for that
+ * particular master with all the slaves. */
+ if (restart_needed) {
+ param.rsp_dict = dict;
+ param.volinfo = volinfo;
+ dict_foreach (volinfo->gsync_slaves,
+ _glusterd_restart_gsync_session, &param);
+ }
+ volinfo->caps = caps;
+
+generate_volfiles:
+ if (conf->op_version <= GD_OP_VERSION_3_7_5) {
+ ret = glusterd_create_volfiles_and_notify_services (volinfo);
+ } else {
+ /*
+ * The cluster is operating at version greater than
+ * gluster-3.7.5. So no need to sent volfile fetch
+ * request in commit phase, the same will be done
+ * in post validate phase with v3 framework.
+ */
+ }
+
+out:
+ GF_FREE (free_ptr1);
+ GF_FREE (free_ptr2);
+
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+
+int
+glusterd_op_perform_remove_brick (glusterd_volinfo_t *volinfo, char *brick,
+ int force, int *need_migrate)
+{
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (brick);
+
+ priv = THIS->private;
+ GF_ASSERT (priv);
+
+ ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo,
+ &brickinfo,
+ _gf_false);
+ if (ret)
+ goto out;
+
+ ret = glusterd_resolve_brick (brickinfo);
+ if (ret)
+ goto out;
+
+ glusterd_volinfo_reset_defrag_stats (volinfo);
+
+ if (!gf_uuid_compare (brickinfo->uuid, MY_UUID)) {
+ /* Only if the brick is in this glusterd, do the rebalance */
+ if (need_migrate)
+ *need_migrate = 1;
+ }
+
+ if (force) {
+ ret = glusterd_brick_stop (volinfo, brickinfo,
+ _gf_true);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_STOP_FAIL, "Unable to stop "
+ "glusterfs, ret: %d", ret);
+ }
+ goto out;
+ }
+
+ brickinfo->decommissioned = 1;
+ ret = 0;
+out:
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_op_stage_add_brick (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+{
+ int ret = 0;
+ char *volname = NULL;
+ int count = 0;
+ int replica_count = 0;
+ int arbiter_count = 0;
+ int i = 0;
+ int32_t local_brick_count = 0;
+ char *bricks = NULL;
+ char *brick_list = NULL;
+ char *saveptr = NULL;
+ char *free_ptr = NULL;
+ char *brick = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+ char msg[2048] = {0,};
+ char key[PATH_MAX] = "";
+ gf_boolean_t brick_alloc = _gf_false;
+ char *all_bricks = NULL;
+ char *str_ret = NULL;
+ gf_boolean_t is_force = _gf_false;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_FOUND,
+ "Unable to find volume: %s", volname);
+ goto out;
+ }
+
+ ret = glusterd_validate_volume_id (dict, volinfo);
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32 (dict, "replica-count", &replica_count);
+ if (ret) {
+ gf_msg_debug (THIS->name, 0,
+ "Unable to get replica count");
+ }
+
+ ret = dict_get_int32 (dict, "arbiter-count", &arbiter_count);
+ if (ret) {
+ gf_msg_debug (THIS->name, 0,
+ "No arbiter count present in the dict");
+ }
+
+ if (replica_count > 0) {
+ ret = op_version_check (this, GD_OP_VER_PERSISTENT_AFR_XATTRS,
+ msg, sizeof(msg));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_OP_VERSION_MISMATCH, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+ }
+
+ if (glusterd_is_volume_replicate (volinfo)) {
+ /* Do not allow add-brick for stopped volumes when replica-count
+ * is being increased.
+ */
+ if (conf->op_version >= GD_OP_VERSION_3_7_10 &&
+ !dict_get (dict, "attach-tier") &&
+ replica_count &&
+ GLUSTERD_STATUS_STOPPED == volinfo->status) {
+ ret = -1;
+ snprintf (msg, sizeof (msg), " Volume must not be in"
+ " stopped state when replica-count needs to "
+ " be increased.");
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_ADD_FAIL, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+ /* op-version check for replica 2 to arbiter conversion. If we
+ * dont have this check, an older peer added as arbiter brick
+ * will not have the arbiter xlator in its volfile. */
+ if ((conf->op_version < GD_OP_VERSION_3_8_0) &&
+ (arbiter_count == 1) && (replica_count == 3)) {
+ ret = -1;
+ snprintf (msg, sizeof (msg), "Cluster op-version must "
+ "be >= 30800 to add arbiter brick to a "
+ "replica 2 volume.");
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_ADD_FAIL, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+ }
+
+ if (conf->op_version > GD_OP_VERSION_3_7_5 &&
+ is_origin_glusterd (dict)) {
+ ret = glusterd_validate_quorum (this, GD_OP_ADD_BRICK, dict,
+ op_errstr);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_SERVER_QUORUM_NOT_MET,
+ "Server quorum not met. Rejecting operation.");
+ goto out;
+ }
+ } else {
+ /* Case 1: conf->op_version <= GD_OP_VERSION_3_7_5
+ * in this case the add-brick is running
+ * syncop framework that will do a quorum
+ * check by default
+ * Case 2: We don't need to do quorum check on every
+ * node, only originator glusterd need to
+ * check for quorum
+ * So nothing need to be done in else
+ */
+ }
+
+ if (glusterd_is_defrag_on(volinfo)) {
+ snprintf (msg, sizeof(msg), "Volume name %s rebalance is in "
+ "progress. Please retry after completion", volname);
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_OIP_RETRY_LATER, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+ goto out;
+ }
+
+ if (dict_get(dict, "attach-tier")) {
+
+ /*
+ * This check is needed because of add/remove brick
+ * is not supported on a tiered volume. So once a tier
+ * is attached we cannot commit or stop the remove-brick
+ * task. Please change this comment once we start supporting
+ * add/remove brick on a tiered volume.
+ */
+ if (!gd_is_remove_brick_committed (volinfo)) {
+
+ snprintf (msg, sizeof (msg), "An earlier remove-brick "
+ "task exists for volume %s. Either commit it"
+ " or stop it before attaching a tier.",
+ volinfo->volname);
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_OLD_REMOVE_BRICK_EXISTS, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = dict_get_int32 (dict, "count", &count);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED, "Unable to get count");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "bricks", &bricks);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED, "Unable to get bricks");
+ goto out;
+ }
+
+ is_force = dict_get_str_boolean (dict, "force", _gf_false);
+
+ if (bricks) {
+ brick_list = gf_strdup (bricks);
+ all_bricks = gf_strdup (bricks);
+ free_ptr = brick_list;
+ }
+
+ if (count)
+ brick = strtok_r (brick_list+1, " \n", &saveptr);
+
+
+ while ( i < count) {
+ if (!glusterd_store_is_valid_brickpath (volname, brick) ||
+ !glusterd_is_valid_volfpath (volname, brick)) {
+ snprintf (msg, sizeof (msg), "brick path %s is "
+ "too long", brick);
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRKPATH_TOO_LONG, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+
+ ret = -1;
+ goto out;
+
+ }
+
+ ret = glusterd_brickinfo_new_from_brick (brick, &brickinfo,
+ _gf_true, NULL);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_NOT_FOUND,
+ "Add-brick: Unable"
+ " to get brickinfo");
+ goto out;
+ }
+ brick_alloc = _gf_true;
+
+ ret = glusterd_new_brick_validate (brick, brickinfo, msg,
+ sizeof (msg));
+ if (ret) {
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+ goto out;
+ }
+
+ if (!gf_uuid_compare (brickinfo->uuid, MY_UUID)) {
+#ifdef HAVE_BD_XLATOR
+ if (brickinfo->vg[0]) {
+ ret = glusterd_is_valid_vg (brickinfo, 1, msg);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_VG, "%s",
+ msg);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+ }
+#endif
+
+ ret = glusterd_validate_and_create_brickpath (brickinfo,
+ volinfo->volume_id,
+ op_errstr, is_force);
+ if (ret)
+ goto out;
+
+ /* A bricks mount dir is required only by snapshots which were
+ * introduced in gluster-3.6.0
+ */
+ if (conf->op_version >= GD_OP_VERSION_3_6_0) {
+ ret = glusterd_get_brick_mount_dir
+ (brickinfo->path, brickinfo->hostname,
+ brickinfo->mount_dir);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_MOUNTDIR_GET_FAIL,
+ "Failed to get brick mount_dir");
+ goto out;
+ }
+
+ snprintf (key, sizeof(key), "brick%d.mount_dir",
+ i + 1);
+ ret = dict_set_dynstr_with_alloc
+ (rsp_dict, key, brickinfo->mount_dir);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+ }
+
+ local_brick_count = i + 1;
+ }
+
+ glusterd_brickinfo_delete (brickinfo);
+ brick_alloc = _gf_false;
+ brickinfo = NULL;
+ brick = strtok_r (NULL, " \n", &saveptr);
+ i++;
+ }
+
+ ret = dict_set_int32 (rsp_dict, "brick_count",
+ local_brick_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set local_brick_count");
+ goto out;
+ }
+
+out:
+ GF_FREE (free_ptr);
+ if (brick_alloc && brickinfo)
+ glusterd_brickinfo_delete (brickinfo);
+ GF_FREE (str_ret);
+ GF_FREE (all_bricks);
+
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_remove_brick_validate_bricks (gf1_op_commands cmd, int32_t brick_count,
+ dict_t *dict,
+ glusterd_volinfo_t *volinfo,
+ char **errstr)
+{
+ char *brick = NULL;
+ char msg[2048] = {0,};
+ char key[256] = {0,};
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ int i = 0;
+ int ret = -1;
+
+ /* Check whether all the nodes of the bricks to be removed are
+ * up, if not fail the operation */
+ for (i = 1; i <= brick_count; i++) {
+ snprintf (key, sizeof (key), "brick%d", i);
+ ret = dict_get_str (dict, key, &brick);
+ if (ret) {
+ snprintf (msg, sizeof (msg),
+ "Unable to get %s", key);
+ *errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ ret =
+ glusterd_volume_brickinfo_get_by_brick(brick, volinfo,
+ &brickinfo,
+ _gf_false);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Incorrect brick "
+ "%s for volume %s", brick, volinfo->volname);
+ *errstr = gf_strdup (msg);
+ goto out;
+ }
+ /* Do not allow commit if the bricks are not decommissioned
+ * if its a remove brick commit or detach-tier commit
+ */
+ if (!brickinfo->decommissioned) {
+ if (cmd == GF_OP_CMD_COMMIT) {
+ snprintf (msg, sizeof (msg), "Brick %s "
+ "is not decommissioned. "
+ "Use start or force option", brick);
+ *errstr = gf_strdup (msg);
+ ret = -1;
+ goto out;
+ }
+
+ if (cmd == GF_OP_CMD_DETACH_COMMIT) {
+ snprintf (msg, sizeof (msg), "Bricks in Hot "
+ "tier are not decommissioned yet. Use "
+ "gluster volume tier <VOLNAME> "
+ "detach start to start the decommission process");
+ *errstr = gf_strdup (msg);
+ ret = -1;
+ goto out;
+ }
+ } else {
+ if (cmd == GF_OP_CMD_DETACH_COMMIT &&
+ (volinfo->rebal.defrag_status == GF_DEFRAG_STATUS_STARTED)) {
+ snprintf (msg, sizeof (msg), "Bricks in Hot "
+ "tier are not decommissioned yet. Wait for "
+ "the detach to complete using gluster volume "
+ "tier <VOLNAME> status.");
+ *errstr = gf_strdup (msg);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (glusterd_is_local_brick (THIS, volinfo, brickinfo)) {
+ if (((cmd == GF_OP_CMD_START) ||
+ (cmd == GF_OP_CMD_DETACH_START)) &&
+ brickinfo->status != GF_BRICK_STARTED) {
+ snprintf (msg, sizeof (msg), "Found stopped "
+ "brick %s", brick);
+ *errstr = gf_strdup (msg);
+ ret = -1;
+ goto out;
+ }
+ continue;
+ }
+
+ rcu_read_lock ();
+ peerinfo = glusterd_peerinfo_find_by_uuid
+ (brickinfo->uuid);
+ if (!peerinfo) {
+ snprintf (msg, sizeof(msg), "Host node of the "
+ "brick %s is not in cluster", brick);
+ *errstr = gf_strdup (msg);
+ ret = -1;
+ rcu_read_unlock ();
+ goto out;
+ }
+ if (!peerinfo->connected) {
+ snprintf (msg, sizeof(msg), "Host node of the "
+ "brick %s is down", brick);
+ *errstr = gf_strdup (msg);
+ ret = -1;
+ rcu_read_unlock ();
+ goto out;
+ }
+ rcu_read_unlock ();
+ }
+
+out:
+ return ret;
+}
+
+int
+glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
+{
+ int ret = -1;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *errstr = NULL;
+ int32_t brick_count = 0;
+ char msg[2048] = {0,};
+ int32_t flag = 0;
+ gf1_op_commands cmd = GF_OP_CMD_NONE;
+ char *task_id_str = NULL;
+ xlator_t *this = NULL;
+ int i = 1;
+ char key[256] = {0,};
+ char *brick = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ gsync_status_param_t param = {0,};
+ glusterd_peerinfo_t *peerinfo = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = op_version_check (this, GD_OP_VER_PERSISTENT_AFR_XATTRS,
+ msg, sizeof(msg));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_OP_VERSION_MISMATCH, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_SET_FAILED, "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_FOUND, "Volume %s does not exist", volname);
+ goto out;
+ }
+
+ ret = glusterd_validate_volume_id (dict, volinfo);
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32 (dict, "command", &flag);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to get brick command");
+ goto out;
+ }
+ cmd = flag;
+
+ ret = dict_get_int32 (dict, "count", &brick_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED, "Unable to get brick count");
+ goto out;
+ }
+
+ ret = 0;
+ if (volinfo->brick_count == brick_count) {
+ errstr = gf_strdup ("Deleting all the bricks of the "
+ "volume is not allowed");
+ ret = -1;
+ goto out;
+ }
+
+ ret = -1;
+ switch (cmd) {
+ case GF_OP_CMD_NONE:
+ errstr = gf_strdup ("no remove-brick command issued");
+ goto out;
+
+ case GF_OP_CMD_STATUS:
+ ret = 0;
+ goto out;
+
+ case GF_OP_CMD_DETACH_START:
+ if (volinfo->type != GF_CLUSTER_TYPE_TIER) {
+ snprintf (msg, sizeof(msg), "volume %s is not a tier "
+ "volume", volinfo->volname);
+ errstr = gf_strdup (msg);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_TIER, "%s", errstr);
+ goto out;
+ }
+
+ case GF_OP_CMD_START:
+ {
+ if ((volinfo->type == GF_CLUSTER_TYPE_REPLICATE) &&
+ dict_get (dict, "replica-count")) {
+ snprintf (msg, sizeof(msg), "Migration of data is not "
+ "needed when reducing replica count. Use the"
+ " 'force' option");
+ errstr = gf_strdup (msg);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_USE_THE_FORCE, "%s", errstr);
+ goto out;
+ }
+
+ if (GLUSTERD_STATUS_STARTED != volinfo->status) {
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
+ snprintf (msg, sizeof (msg), "Volume %s needs "
+ "to be started before detach-tier "
+ "(you can use 'force' or 'commit' "
+ "to override this behavior)",
+ volinfo->volname);
+ } else {
+ snprintf (msg, sizeof (msg), "Volume %s needs "
+ "to be started before remove-brick "
+ "(you can use 'force' or 'commit' "
+ "to override this behavior)",
+ volinfo->volname);
+ }
+ errstr = gf_strdup (msg);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_STARTED, "%s", errstr);
+ goto out;
+ }
+ if (!gd_is_remove_brick_committed (volinfo)) {
+ snprintf (msg, sizeof (msg), "An earlier remove-brick "
+ "task exists for volume %s. Either commit it"
+ " or stop it before starting a new task.",
+ volinfo->volname);
+ errstr = gf_strdup (msg);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_OLD_REMOVE_BRICK_EXISTS, "Earlier remove-brick"
+ " task exists for volume %s.",
+ volinfo->volname);
+ goto out;
+ }
+ if (glusterd_is_defrag_on(volinfo)) {
+ errstr = gf_strdup("Rebalance is in progress. Please "
+ "retry after completion");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_OIP_RETRY_LATER, "%s", errstr);
+ goto out;
+ }
+
+ /* Check if the connected clients are all of version
+ * glusterfs-3.6 and higher. This is needed to prevent some data
+ * loss issues that could occur when older clients are connected
+ * when rebalance is run.
+ */
+ ret = glusterd_check_client_op_version_support
+ (volname, GD_OP_VERSION_3_6_0, NULL);
+ if (ret) {
+ ret = gf_asprintf (op_errstr, "Volume %s has one or "
+ "more connected clients of a version"
+ " lower than GlusterFS-v3.6.0. "
+ "Starting remove-brick in this state "
+ "could lead to data loss.\nPlease "
+ "disconnect those clients before "
+ "attempting this command again.",
+ volname);
+ goto out;
+ }
+
+ ret = glusterd_remove_brick_validate_bricks (cmd, brick_count,
+ dict, volinfo,
+ &errstr);
+ if (ret)
+ goto out;
+
+ if (is_origin_glusterd (dict)) {
+ ret = glusterd_generate_and_set_task_id
+ (dict, GF_REMOVE_BRICK_TID_KEY);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TASKID_GEN_FAIL,
+ "Failed to generate task-id");
+ goto out;
+ }
+ } else {
+ ret = dict_get_str (dict, GF_REMOVE_BRICK_TID_KEY,
+ &task_id_str);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ GD_MSG_DICT_GET_FAILED,
+ "Missing remove-brick-id");
+ ret = 0;
+ }
+ }
+ break;
+ }
+
+ case GF_OP_CMD_STOP:
+ case GF_OP_CMD_STOP_DETACH_TIER:
+ ret = 0;
+ break;
+
+ case GF_OP_CMD_DETACH_COMMIT:
+ if (volinfo->type != GF_CLUSTER_TYPE_TIER) {
+ snprintf (msg, sizeof(msg), "volume %s is not a tier "
+ "volume", volinfo->volname);
+ errstr = gf_strdup (msg);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_TIER, "%s", errstr);
+ goto out;
+ }
+ ret = glusterd_remove_brick_validate_bricks (cmd, brick_count,
+ dict, volinfo,
+ &errstr);
+ if (ret)
+ goto out;
+
+ /* If geo-rep is configured, for this volume, it should be
+ * stopped.
+ */
+ param.volinfo = volinfo;
+ ret = glusterd_check_geo_rep_running (&param, op_errstr);
+ if (ret || param.is_active) {
+ ret = -1;
+ goto out;
+ }
+ break;
+
+ case GF_OP_CMD_COMMIT:
+ if (volinfo->decommission_in_progress) {
+ errstr = gf_strdup ("use 'force' option as migration "
+ "is in progress");
+ goto out;
+ }
+
+ if (volinfo->rebal.defrag_status == GF_DEFRAG_STATUS_FAILED) {
+ errstr = gf_strdup ("use 'force' option as migration "
+ "has failed");
+ goto out;
+ }
+
+ ret = glusterd_remove_brick_validate_bricks (cmd, brick_count,
+ dict, volinfo,
+ &errstr);
+ if (ret)
+ goto out;
+
+ /* If geo-rep is configured, for this volume, it should be
+ * stopped.
+ */
+ param.volinfo = volinfo;
+ ret = glusterd_check_geo_rep_running (&param, op_errstr);
+ if (ret || param.is_active) {
+ ret = -1;
+ goto out;
+ }
+
+ break;
+
+ case GF_OP_CMD_DETACH_COMMIT_FORCE:
+ if (volinfo->type != GF_CLUSTER_TYPE_TIER) {
+ snprintf (msg, sizeof(msg), "volume %s is not a tier "
+ "volume", volinfo->volname);
+ errstr = gf_strdup (msg);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_TIER, "%s", errstr);
+ goto out;
+ }
+ case GF_OP_CMD_COMMIT_FORCE:
+ break;
+ }
+ ret = 0;
+
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ if (ret && errstr) {
+ if (op_errstr)
+ *op_errstr = errstr;
+ }
+
+ return ret;
+}
+
+int
+glusterd_remove_brick_migrate_cbk (glusterd_volinfo_t *volinfo,
+ gf_defrag_status_t status)
+{
+ int ret = 0;
+
+#if 0 /* TODO: enable this behavior once cluster-wide awareness comes for
+ defrag cbk function */
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *tmp = NULL;
+
+ switch (status) {
+ case GF_DEFRAG_STATUS_PAUSED:
+ case GF_DEFRAG_STATUS_FAILED:
+ /* No changes required in the volume file.
+ everything should remain as is */
+ break;
+ case GF_DEFRAG_STATUS_STOPPED:
+ /* Fall back to the old volume file */
+ cds_list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks,
+ brick_list) {
+ if (!brickinfo->decommissioned)
+ continue;
+ brickinfo->decommissioned = 0;
+ }
+ break;
+
+ case GF_DEFRAG_STATUS_COMPLETE:
+ /* Done with the task, you can remove the brick from the
+ volume file */
+ cds_list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks,
+ brick_list) {
+ if (!brickinfo->decommissioned)
+ continue;
+ gf_log (THIS->name, GF_LOG_INFO, "removing the brick %s",
+ brickinfo->path);
+ brickinfo->decommissioned = 0;
+ if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+ /*TODO: use the 'atomic' flavour of brick_stop*/
+ ret = glusterd_brick_stop (volinfo, brickinfo);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Unable to stop glusterfs (%d)", ret);
+ }
+ }
+ glusterd_delete_brick (volinfo, brickinfo);
+ }
+ break;
+
+ default:
+ GF_ASSERT (!"cbk function called with wrong status");
+ break;
+ }
+
+ ret = glusterd_create_volfiles_and_notify_services (volinfo);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Unable to write volume files (%d)", ret);
+
+ ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Unable to store volume info (%d)", ret);
+
+
+ if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+ ret = glusterd_check_generate_start_nfs ();
+ if (ret)
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Unable to start nfs process (%d)", ret);
+ }
+
+#endif
+
+ volinfo->decommission_in_progress = 0;
+ return ret;
+}
+
+static int
+glusterd_op_perform_attach_tier (dict_t *dict,
+ glusterd_volinfo_t *volinfo,
+ int count,
+ char *bricks)
+{
+ int ret = 0;
+ int replica_count = 0;
+ int type = 0;
+
+ /*
+ * Store the new (cold) tier's structure until the graph is generated.
+ * If there is a failure before the graph is generated the
+ * structure will revert to its original state.
+ */
+ volinfo->tier_info.cold_dist_leaf_count = volinfo->dist_leaf_count;
+ volinfo->tier_info.cold_type = volinfo->type;
+ volinfo->tier_info.cold_brick_count = volinfo->brick_count;
+ volinfo->tier_info.cold_replica_count = volinfo->replica_count;
+ volinfo->tier_info.cold_disperse_count = volinfo->disperse_count;
+ volinfo->tier_info.cold_redundancy_count = volinfo->redundancy_count;
+
+ ret = dict_get_int32 (dict, "replica-count", &replica_count);
+ if (!ret)
+ volinfo->tier_info.hot_replica_count = replica_count;
+ else
+ volinfo->tier_info.hot_replica_count = 1;
+ volinfo->tier_info.hot_brick_count = count;
+ ret = dict_get_int32 (dict, "hot-type", &type);
+ volinfo->tier_info.hot_type = type;
+ ret = dict_set_int32 (dict, "type", GF_CLUSTER_TYPE_TIER);
+
+ if (!ret)
+ ret = dict_set_str (volinfo->dict, "features.ctr-enabled", "on");
+
+ if (!ret)
+ ret = dict_set_str (volinfo->dict, "cluster.tier-mode", "cache");
+
+ return ret;
+}
+
+int
+glusterd_op_add_brick (dict_t *dict, char **op_errstr)
+{
+ int ret = 0;
+ char *volname = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+ char *bricks = NULL;
+ int32_t count = 0;
+ int32_t replica_count = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_str (dict, "volname", &volname);
+
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED, "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, EINVAL,
+ GD_MSG_VOL_NOT_FOUND, "Unable to allocate memory");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "count", &count);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED, "Unable to get count");
+ goto out;
+ }
+
+
+ ret = dict_get_str (dict, "bricks", &bricks);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED, "Unable to get bricks");
+ goto out;
+ }
+
+ if (dict_get(dict, "attach-tier")) {
+ gf_msg_debug (THIS->name, 0, "Adding tier");
+ glusterd_op_perform_attach_tier (dict, volinfo, count, bricks);
+ }
+
+ ret = glusterd_op_perform_add_bricks (volinfo, count, bricks, dict);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_ADD_FAIL, "Unable to add bricks");
+ goto out;
+ }
+ if (priv->op_version <= GD_OP_VERSION_3_7_5) {
+ ret = glusterd_store_volinfo (volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret)
+ goto out;
+ } else {
+ /*
+ * The cluster is operating at version greater than
+ * gluster-3.7.5. So no need to store volfiles
+ * in commit phase, the same will be done
+ * in post validate phase with v3 framework.
+ */
+ }
+
+ if (GLUSTERD_STATUS_STARTED == volinfo->status)
+ ret = glusterd_svcs_manager (volinfo);
+
+out:
+ return ret;
+}
+
+static void
+glusterd_op_perform_detach_tier (glusterd_volinfo_t *volinfo)
+{
+ volinfo->type = volinfo->tier_info.cold_type;
+ volinfo->replica_count = volinfo->tier_info.cold_replica_count;
+ volinfo->disperse_count = volinfo->tier_info.cold_disperse_count;
+ volinfo->redundancy_count = volinfo->tier_info.cold_redundancy_count;
+ volinfo->dist_leaf_count = volinfo->tier_info.cold_dist_leaf_count;
+}
+
+int
+glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
+{
+ int ret = -1;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *brick = NULL;
+ int32_t count = 0;
+ int32_t i = 1;
+ char key[256] = {0,};
+ int32_t flag = 0;
+ char err_str[4096] = {0,};
+ int need_rebalance = 0;
+ int force = 0;
+ gf1_op_commands cmd = 0;
+ int32_t replica_count = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *tmp = NULL;
+ char *task_id_str = NULL;
+ xlator_t *this = NULL;
+ dict_t *bricks_dict = NULL;
+ char *brick_tmpstr = NULL;
+ int start_remove = 0;
+ uint32_t commit_hash = 0;
+ int defrag_cmd = 0;
+ int detach_commit = 0;
+ void *tier_info = NULL;
+ char *cold_shd_key = NULL;
+ char *hot_shd_key = NULL;
+ int delete_key = 1;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = dict_get_str (dict, "volname", &volname);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_ADD_FAIL, "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_VOL_NOT_FOUND, "Unable to allocate memory");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "command", &flag);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED, "Unable to get command");
+ goto out;
+ }
+ cmd = flag;
+
+ if ((GF_OP_CMD_START == cmd) ||
+ (GF_OP_CMD_DETACH_START == cmd))
+ start_remove = 1;
+
+ /* Set task-id, if available, in ctx dict for operations other than
+ * start
+ */
+
+ if (is_origin_glusterd (dict) && (!start_remove)) {
+ if (!gf_uuid_is_null (volinfo->rebal.rebalance_id)) {
+ ret = glusterd_copy_uuid_to_dict
+ (volinfo->rebal.rebalance_id, dict,
+ GF_REMOVE_BRICK_TID_KEY);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REMOVE_BRICK_ID_SET_FAIL,
+ "Failed to set remove-brick-id");
+ goto out;
+ }
+ }
+ }
+
+ /* Clear task-id, rebal.op and stored bricks on commmitting/stopping
+ * remove-brick */
+ if ((!start_remove) && (cmd != GF_OP_CMD_STATUS)) {
+ gf_uuid_clear (volinfo->rebal.rebalance_id);
+ volinfo->rebal.op = GD_OP_NONE;
+ dict_unref (volinfo->rebal.dict);
+ volinfo->rebal.dict = NULL;
+ }
+
+ ret = -1;
+ switch (cmd) {
+ case GF_OP_CMD_NONE:
+ goto out;
+
+ case GF_OP_CMD_STATUS:
+ ret = 0;
+ goto out;
+
+ case GF_OP_CMD_STOP:
+ case GF_OP_CMD_STOP_DETACH_TIER:
+ {
+ /* Fall back to the old volume file */
+ cds_list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks,
+ brick_list) {
+ if (!brickinfo->decommissioned)
+ continue;
+ brickinfo->decommissioned = 0;
+ }
+ ret = glusterd_create_volfiles_and_notify_services (volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_VOLFILE_CREATE_FAIL,
+ "failed to create volfiles");
+ goto out;
+ }
+
+ ret = glusterd_store_volinfo (volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_VOLINFO_SET_FAIL,
+ "failed to store volinfo");
+ goto out;
+ }
+
+ ret = 0;
+ goto out;
+ }
+
+ case GF_OP_CMD_DETACH_START:
+ case GF_OP_CMD_START:
+ /* Reset defrag status to 'NOT STARTED' whenever a
+ * remove-brick/rebalance command is issued to remove
+ * stale information from previous run.
+ */
+ volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_NOT_STARTED;
+ ret = dict_get_str (dict, GF_REMOVE_BRICK_TID_KEY, &task_id_str);
+ if (ret) {
+ gf_msg_debug (this->name, errno,
+ "Missing remove-brick-id");
+ ret = 0;
+ } else {
+ gf_uuid_parse (task_id_str, volinfo->rebal.rebalance_id) ;
+ volinfo->rebal.op = GD_OP_REMOVE_BRICK;
+ }
+ force = 0;
+ break;
+
+ case GF_OP_CMD_COMMIT:
+ force = 1;
+ break;
+
+ case GF_OP_CMD_DETACH_COMMIT:
+ case GF_OP_CMD_DETACH_COMMIT_FORCE:
+ glusterd_op_perform_detach_tier (volinfo);
+ detach_commit = 1;
+
+ /* Disabling ctr when detaching a tier, since
+ * currently tier is the only consumer of ctr.
+ * Revisit this code when this constraint no
+ * longer exist.
+ */
+ dict_del (volinfo->dict, "features.ctr-enabled");
+ dict_del (volinfo->dict, "cluster.tier-mode");
+
+ hot_shd_key = gd_get_shd_key (volinfo->tier_info.hot_type);
+ cold_shd_key = gd_get_shd_key (volinfo->tier_info.cold_type);
+ if (hot_shd_key) {
+ /*
+ * Since post detach, shd graph will not contain hot
+ * tier. So we need to clear option set for hot tier.
+ * For a tiered volume there can be different key
+ * for both hot and cold. If hot tier is shd compatible
+ * then we need to remove the configured value when
+ * detaching a tier, only if the key's are different or
+ * cold key is NULL. So we will set delete_key first,
+ * and if cold key is not null and they are equal then
+ * we will clear the flag. Otherwise we will delete the
+ * key.
+ */
+ if (cold_shd_key)
+ delete_key = strcmp (hot_shd_key, cold_shd_key);
+ if (delete_key)
+ dict_del (volinfo->dict, hot_shd_key);
+ }
+ /* fall through */
+
+ case GF_OP_CMD_COMMIT_FORCE:
+
+ if (volinfo->decommission_in_progress) {
+ if (volinfo->rebal.defrag) {
+ LOCK (&volinfo->rebal.defrag->lock);
+ /* Fake 'rebalance-complete' so the graph change
+ happens right away */
+ volinfo->rebal.defrag_status =
+ GF_DEFRAG_STATUS_COMPLETE;
+
+ UNLOCK (&volinfo->rebal.defrag->lock);
+ }
+ /* Graph change happens in rebalance _cbk function,
+ no need to do anything here */
+ /* TODO: '_cbk' function is not doing anything for now */
+ }
+
+ ret = 0;
+ force = 1;
+ break;
+ }
+
+ ret = dict_get_int32 (dict, "count", &count);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED, "Unable to get count");
+ goto out;
+ }
+
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER)
+ count = glusterd_set_detach_bricks(dict, volinfo);
+
+ /* Save the list of bricks for later usage only on starting a
+ * remove-brick. Right now this is required for displaying the task
+ * parameters with task status in volume status.
+ */
+
+ if (start_remove) {
+ bricks_dict = dict_new ();
+ if (!bricks_dict) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_int32 (bricks_dict, "count", count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to save remove-brick count");
+ goto out;
+ }
+ }
+
+ while ( i <= count) {
+ snprintf (key, 256, "brick%d", i);
+ ret = dict_get_str (dict, key, &brick);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED, "Unable to get %s",
+ key);
+ goto out;
+ }
+
+ if (start_remove) {
+ brick_tmpstr = gf_strdup (brick);
+ if (!brick_tmpstr) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY,
+ "Failed to duplicate brick name");
+ goto out;
+ }
+ ret = dict_set_dynstr (bricks_dict, key, brick_tmpstr);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to add brick to dict");
+ goto out;
+ }
+ brick_tmpstr = NULL;
+ }
+
+ ret = glusterd_op_perform_remove_brick (volinfo, brick, force,
+ &need_rebalance);
+ if (ret)
+ goto out;
+ i++;
+ }
+
+ if (detach_commit) {
+ /* Clear related information from volinfo */
+ tier_info = ((void *)(&volinfo->tier_info));
+ memset (tier_info, 0, sizeof (volinfo->tier_info));
+ }
+
+ if (start_remove)
+ volinfo->rebal.dict = dict_ref (bricks_dict);
+
+ ret = dict_get_int32 (dict, "replica-count", &replica_count);
+ if (!ret) {
+ gf_msg (this->name, GF_LOG_INFO, errno,
+ GD_MSG_DICT_GET_FAILED,
+ "changing replica count %d to %d on volume %s",
+ volinfo->replica_count, replica_count,
+ volinfo->volname);
+ volinfo->replica_count = replica_count;
+ /* A reduction in replica count implies an arbiter volume
+ * earlier is now no longer one. */
+ if (volinfo->arbiter_count)
+ volinfo->arbiter_count = 0;
+ volinfo->sub_count = replica_count;
+ volinfo->dist_leaf_count = glusterd_get_dist_leaf_count (volinfo);
+
+ /*
+ * volinfo->type and sub_count have already been set for
+ * volumes undergoing a detach operation, they should not
+ * be modified here.
+ */
+ if ((replica_count == 1) && (cmd != GF_OP_CMD_DETACH_COMMIT) &&
+ (cmd != GF_OP_CMD_DETACH_COMMIT_FORCE)) {
+ if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) {
+ volinfo->type = GF_CLUSTER_TYPE_NONE;
+ /* backward compatibility */
+ volinfo->sub_count = 0;
+ } else {
+ volinfo->type = GF_CLUSTER_TYPE_STRIPE;
+ /* backward compatibility */
+ volinfo->sub_count = volinfo->dist_leaf_count;
+ }
+ }
+ }
+ volinfo->subvol_count = (volinfo->brick_count /
+ volinfo->dist_leaf_count);
+
+ ret = glusterd_create_volfiles_and_notify_services (volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_VOLFILE_CREATE_FAIL, "failed to create volfiles");
+ goto out;
+ }
+
+ ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_VOLINFO_STORE_FAIL, "failed to store volinfo");
+ goto out;
+ }
+
+ if (start_remove &&
+ volinfo->status == GLUSTERD_STATUS_STARTED) {
+ ret = glusterd_svcs_reconfigure ();
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_NFS_RECONF_FAIL,
+ "Unable to reconfigure NFS-Server");
+ goto out;
+ }
+ }
+
+ /* Need to reset the defrag/rebalance status accordingly */
+ switch (volinfo->rebal.defrag_status) {
+ case GF_DEFRAG_STATUS_FAILED:
+ case GF_DEFRAG_STATUS_COMPLETE:
+ volinfo->rebal.defrag_status = 0;
+ default:
+ break;
+ }
+ if (!force && need_rebalance) {
+ if (dict_get_uint32(dict, "commit-hash", &commit_hash) == 0) {
+ volinfo->rebal.commit_hash = commit_hash;
+ }
+ /* perform the rebalance operations */
+ defrag_cmd = GF_DEFRAG_CMD_START_FORCE;
+ if (cmd == GF_OP_CMD_DETACH_START)
+ defrag_cmd = GF_DEFRAG_CMD_START_DETACH_TIER;
+ ret = glusterd_handle_defrag_start
+ (volinfo, err_str, sizeof (err_str),
+ defrag_cmd,
+ glusterd_remove_brick_migrate_cbk, GD_OP_REMOVE_BRICK);
+
+ if (!ret)
+ volinfo->decommission_in_progress = 1;
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REBALANCE_START_FAIL,
+ "failed to start the rebalance");
+ }
+ } else {
+ if (GLUSTERD_STATUS_STARTED == volinfo->status)
+ ret = glusterd_svcs_manager (volinfo);
+ }
+out:
+ if (ret && err_str[0] && op_errstr)
+ *op_errstr = gf_strdup (err_str);
+
+ GF_FREE (brick_tmpstr);
+ if (bricks_dict)
+ dict_unref (bricks_dict);
+
+ return ret;
+}
+
+int
+glusterd_op_stage_barrier (dict_t *dict, char **op_errstr)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ char *volname = NULL;
+ glusterd_volinfo_t *vol = NULL;
+
+ GF_ASSERT (dict);
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED, "Volname not present in "
+ "dict");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &vol);
+ if (ret) {
+ gf_asprintf (op_errstr, "Volume %s does not exist", volname);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_FOUND, "%s", *op_errstr);
+ goto out;
+ }
+
+ if (!glusterd_is_volume_started (vol)) {
+ gf_asprintf (op_errstr, "Volume %s is not started", volname);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_str_boolean (dict, "barrier", -1);
+ if (ret == -1) {
+ gf_asprintf (op_errstr, "Barrier op for volume %s not present "
+ "in dict", volname);
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED, "%s", *op_errstr);
+ goto out;
+ }
+ ret = 0;
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_op_barrier (dict_t *dict, char **op_errstr)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ char *volname = NULL;
+ glusterd_volinfo_t *vol = NULL;
+ char *barrier_op = NULL;
+
+ GF_ASSERT (dict);
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED, "Volname not present in "
+ "dict");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &vol);
+ if (ret) {
+ gf_asprintf (op_errstr, "Volume %s does not exist", volname);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_FOUND, "%s", *op_errstr);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "barrier", &barrier_op);
+ if (ret) {
+ gf_asprintf (op_errstr, "Barrier op for volume %s not present "
+ "in dict", volname);
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED, "%s", *op_errstr);
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc (vol->dict, "features.barrier",
+ barrier_op);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_SET_FAILED, "Failed to set barrier op in"
+ " volume option dict");
+ goto out;
+ }
+
+ gd_update_volume_op_versions (vol);
+ ret = glusterd_create_volfiles (vol);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLFILE_CREATE_FAIL, "Failed to create volfiles");
+ goto out;
+ }
+ ret = glusterd_store_volinfo (vol, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_handle_attach_tier (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_add_brick);
+}
+
+int
+glusterd_handle_detach_tier (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_remove_brick);
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-conn-helper.c b/xlators/mgmt/glusterd/src/glusterd-conn-helper.c
new file mode 100644
index 00000000000..bfa9d02aa1b
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-conn-helper.c
@@ -0,0 +1,21 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "glusterd-conn-mgmt.h"
+#include "glusterd-svc-mgmt.h"
+
+#define _LGPL_SOURCE
+#include <urcu/rculist.h>
+
+glusterd_svc_t *
+glusterd_conn_get_svc_object (glusterd_conn_t *conn)
+{
+ return cds_list_entry (conn, glusterd_svc_t, conn);
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-conn-helper.h b/xlators/mgmt/glusterd/src/glusterd-conn-helper.h
new file mode 100644
index 00000000000..80468d6de75
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-conn-helper.h
@@ -0,0 +1,21 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERD_CONN_HELPER_H_
+#define _GLUSTERD_CONN_HELPER_H_
+
+#include "rpc-clnt.h"
+
+#include "glusterd-conn-mgmt.h"
+
+glusterd_svc_t *
+glusterd_conn_get_svc_object (glusterd_conn_t *conn);
+
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c
new file mode 100644
index 00000000000..607a0655432
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c
@@ -0,0 +1,136 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "rpc-clnt.h"
+#include "glusterd.h"
+#include "glusterd-conn-mgmt.h"
+#include "glusterd-conn-helper.h"
+#include "glusterd-utils.h"
+#include "glusterd-messages.h"
+
+int
+glusterd_conn_init (glusterd_conn_t *conn, char *sockpath,
+ int frame_timeout, glusterd_conn_notify_t notify)
+{
+ int ret = -1;
+ dict_t *options = NULL;
+ struct rpc_clnt *rpc = NULL;
+ xlator_t *this = THIS;
+ glusterd_svc_t *svc = NULL;
+
+ if (!this)
+ goto out;
+
+ svc = glusterd_conn_get_svc_object (conn);
+ if (!svc) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SVC_GET_FAIL, "Failed to get the service");
+ goto out;
+ }
+
+ ret = rpc_transport_unix_options_build (&options, sockpath,
+ frame_timeout);
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (options, "transport.socket.ignore-enoent", "on");
+ if (ret)
+ goto out;
+
+ /* @options is free'd by rpc_transport when destroyed */
+ rpc = rpc_clnt_new (options, this, (char *)svc->name, 16);
+ if (!rpc) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = rpc_clnt_register_notify (rpc, glusterd_conn_common_notify,
+ conn);
+ if (ret)
+ goto out;
+
+ ret = snprintf (conn->sockpath, sizeof (conn->sockpath), "%s",
+ sockpath);
+ if (ret < 0)
+ goto out;
+ else
+ ret = 0;
+
+ conn->frame_timeout = frame_timeout;
+ conn->rpc = rpc;
+ conn->notify = notify;
+out:
+ if (ret) {
+ if (rpc) {
+ rpc_clnt_unref (rpc);
+ rpc = NULL;
+ }
+ }
+ return ret;
+}
+
+int
+glusterd_conn_term (glusterd_conn_t *conn)
+{
+ rpc_clnt_unref (conn->rpc);
+ return 0;
+}
+
+int
+glusterd_conn_connect (glusterd_conn_t *conn)
+{
+ return rpc_clnt_start (conn->rpc);
+}
+
+int
+glusterd_conn_disconnect (glusterd_conn_t *conn)
+{
+ rpc_clnt_disconnect (conn->rpc);
+
+ return 0;
+}
+
+
+int
+__glusterd_conn_common_notify (struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data)
+{
+ glusterd_conn_t *conn = mydata;
+
+ /* Silently ignoring this error, exactly like the current
+ * implementation */
+ if (!conn)
+ return 0;
+
+ return conn->notify (conn, event);
+}
+
+int
+glusterd_conn_common_notify (struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data)
+{
+ return glusterd_big_locked_notify
+ (rpc, mydata, event, data,
+ __glusterd_conn_common_notify);
+}
+
+int32_t
+glusterd_conn_build_socket_filepath (char *rundir, uuid_t uuid,
+ char *socketpath, int len)
+{
+ char sockfilepath[PATH_MAX] = {0,};
+
+ snprintf (sockfilepath, sizeof (sockfilepath), "%s/run-%s",
+ rundir, uuid_utoa (uuid));
+
+ glusterd_set_socket_filepath (sockfilepath, socketpath, len);
+ return 0;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h
new file mode 100644
index 00000000000..5820419dbf5
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h
@@ -0,0 +1,51 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERD_CONN_MGMT_H_
+#define _GLUSTERD_CONN_MGMT_H_
+
+#include "rpc-clnt.h"
+
+typedef struct glusterd_conn_ glusterd_conn_t;
+
+typedef int (*glusterd_conn_notify_t)
+ (glusterd_conn_t *conn, rpc_clnt_event_t event);
+
+struct glusterd_conn_ {
+ struct rpc_clnt *rpc;
+ char sockpath[PATH_MAX];
+ int frame_timeout;
+ /* Existing daemons tend to specialize their respective
+ * notify implementations, so ... */
+ glusterd_conn_notify_t notify;
+};
+
+int
+glusterd_conn_init (glusterd_conn_t *conn, char *sockpath,
+ int frame_timeout, glusterd_conn_notify_t notify);
+
+int
+glusterd_conn_term (glusterd_conn_t *conn);
+
+int
+glusterd_conn_connect (glusterd_conn_t *conn);
+
+int
+glusterd_conn_disconnect (glusterd_conn_t *conn);
+
+int
+glusterd_conn_common_notify (struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data);
+
+int32_t
+glusterd_conn_build_socket_filepath (char *rundir, uuid_t uuid,
+ char *socketpath, int len);
+
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-errno.h b/xlators/mgmt/glusterd/src/glusterd-errno.h
new file mode 100644
index 00000000000..55d44a5c6a1
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-errno.h
@@ -0,0 +1,32 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _GLUSTERD_ERRNO_H
+#define _GLUSTERD_ERRNO_H
+
+enum glusterd_op_errno {
+ EG_INTRNL = 30800, /* Internal Error */
+ EG_OPNOTSUP = 30801, /* Gluster Op Not Supported */
+ EG_ANOTRANS = 30802, /* Another Transaction in Progress */
+ EG_BRCKDWN = 30803, /* One or more brick is down */
+ EG_NODEDWN = 30804, /* One or more node is down */
+ EG_HRDLMT = 30805, /* Hard Limit is reached */
+ EG_NOVOL = 30806, /* Volume does not exist */
+ EG_NOSNAP = 30807, /* Snap does not exist */
+ EG_RBALRUN = 30808, /* Rebalance is running */
+ EG_VOLRUN = 30809, /* Volume is running */
+ EG_VOLSTP = 30810, /* Volume is not running */
+ EG_VOLEXST = 30811, /* Volume exists */
+ EG_SNAPEXST = 30812, /* Snapshot exists */
+ EG_ISSNAP = 30813, /* Volume is a snap volume */
+ EG_GEOREPRUN = 30814, /* Geo-Replication is running */
+ EG_NOTTHINP = 30815, /* Bricks are not thinly provisioned */
+};
+
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
new file mode 100644
index 00000000000..d34ec05c5f5
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
@@ -0,0 +1,882 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+
+#include "common-utils.h"
+#include "glusterd.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-store.h"
+#include "glusterd-utils.h"
+#include "glusterd-nfs-svc.h"
+#include "glusterd-volgen.h"
+#include "glusterd-messages.h"
+#include "syscall.h"
+
+#include <ctype.h>
+
+#define SHARED_STORAGE_MNT "/var/run/gluster/shared_storage/nfs-ganesha"
+
+int start_ganesha (char **op_errstr);
+
+
+typedef struct service_command {
+ char *binary;
+ char *service;
+ int (*action) (struct service_command *, char *);
+} service_command;
+
+/* parsing_ganesha_ha_conf will allocate the returned string
+ * to be freed (GF_FREE) by the caller
+ * return NULL if error or not found */
+static char*
+parsing_ganesha_ha_conf(const char *key) {
+#define MAX_LINE 1024
+ char scratch[MAX_LINE * 2] = {0,};
+ char *value = NULL, *pointer = NULL, *end_pointer = NULL;
+ FILE *fp;
+ struct stat st = {0,};
+
+ fp = fopen (GANESHA_HA_CONF, "r");
+ if (fp == NULL) {
+ gf_msg (THIS->name, GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED, "couldn't open the file %s",
+ GANESHA_HA_CONF);
+ goto end_ret;
+ }
+ while ((pointer = fgets (scratch, MAX_LINE, fp)) != NULL) {
+ /* Read config file until we get matching "^[[:space:]]*key" */
+ if (*pointer == '#') {
+ continue;
+ }
+ while (isblank(*pointer)) {
+ pointer++;
+ }
+ if (strncmp (pointer, key, strlen (key))) {
+ continue;
+ }
+ pointer += strlen (key);
+ /* key found : if we fail to parse, we'll return an error
+ * rather than trying next one
+ * - supposition : conf file is bash compatible : no space
+ * around the '=' */
+ if (*pointer != '=') {
+ gf_msg (THIS->name, GF_LOG_ERROR, errno,
+ GD_MSG_GET_CONFIG_INFO_FAILED,
+ "Parsing %s failed at key %s",
+ GANESHA_HA_CONF, key);
+ goto end_close;
+ }
+ pointer++; /* jump the '=' */
+
+ if (*pointer == '"' || *pointer == '\'') {
+ /* dont get the quote */
+ pointer++;
+ }
+ end_pointer = pointer;
+ /* stop at the next closing quote or blank/newline */
+ do {
+ end_pointer++;
+ } while (!(*end_pointer == '\'' || *end_pointer == '"' ||
+ isspace(*end_pointer) || *end_pointer == '\0'));
+ *end_pointer = '\0';
+
+ /* got it. copy it and return */
+ value = gf_strdup (pointer);
+ break;
+ }
+
+end_close:
+ fclose(fp);
+end_ret:
+ return value;
+}
+
+static int
+sc_systemctl_action (struct service_command *sc, char *command)
+{
+ runner_t runner = {0,};
+
+ runinit (&runner);
+ runner_add_args (&runner, sc->binary, command, sc->service, NULL);
+ return runner_run (&runner);
+}
+
+static int
+sc_service_action (struct service_command *sc, char *command)
+{
+ runner_t runner = {0,};
+
+ runinit (&runner);
+ runner_add_args (&runner, sc->binary, sc->service, command, NULL);
+ return runner_run (&runner);
+}
+
+static int
+manage_service (char *action)
+{
+ struct stat stbuf = {0,};
+ int i = 0;
+ int ret = 0;
+ struct service_command sc_list[] = {
+ { .binary = "/usr/bin/systemctl",
+ .service = "nfs-ganesha",
+ .action = sc_systemctl_action
+ },
+ { .binary = "/sbin/invoke-rc.d",
+ .service = "nfs-ganesha",
+ .action = sc_service_action
+ },
+ { .binary = "/sbin/service",
+ .service = "nfs-ganesha",
+ .action = sc_service_action
+ },
+ { .binary = NULL
+ }
+ };
+
+ while (sc_list[i].binary != NULL) {
+ ret = sys_stat (sc_list[i].binary, &stbuf);
+ if (ret == 0) {
+ gf_msg_debug (THIS->name, 0,
+ "%s found.", sc_list[i].binary);
+ if (strcmp (sc_list[i].binary, "/usr/bin/systemctl") == 0)
+ ret = sc_systemctl_action (&sc_list[i], action);
+ else
+ ret = sc_service_action (&sc_list[i], action);
+
+ return ret;
+ }
+ i++;
+ }
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_UNRECOGNIZED_SVC_MNGR,
+ "Could not %s NFS-Ganesha.Service manager for distro"
+ " not recognized.", action);
+ return ret;
+}
+/* Check if ganesha.enable is set to 'on', that checks if
+ * a particular volume is exported via NFS-Ganesha */
+gf_boolean_t
+glusterd_check_ganesha_export (glusterd_volinfo_t *volinfo) {
+
+ char *value = NULL;
+ gf_boolean_t is_exported = _gf_false;
+ int ret = 0;
+
+ ret = glusterd_volinfo_get (volinfo, "ganesha.enable", &value);
+ if ((ret == 0) && value) {
+ if (strcmp (value, "on") == 0) {
+ gf_msg_debug (THIS->name, 0, "ganesha.enable set"
+ " to %s", value);
+ is_exported = _gf_true;
+ }
+ }
+ return is_exported;
+}
+
+
+int
+glusterd_check_ganesha_cmd (char *key, char *value, char **errstr, dict_t *dict)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (key);
+ GF_ASSERT (value);
+
+ if ((strcmp (key, "ganesha.enable") == 0)) {
+ if ((strcmp (value, "on")) && (strcmp (value, "off"))) {
+ gf_asprintf (errstr, "Invalid value"
+ " for volume set command. Use on/off only.");
+ ret = -1;
+ goto out;
+ }
+ ret = glusterd_handle_ganesha_op (dict, errstr, key, value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_NFS_GNS_OP_HANDLE_FAIL,
+ "Handling NFS-Ganesha"
+ " op failed.");
+ }
+ }
+out:
+ return ret;
+}
+
+int
+glusterd_op_stage_set_ganesha (dict_t *dict, char **op_errstr)
+{
+ int ret = -1;
+ char *volname = NULL;
+ int exists = 0;
+ int value = -1;
+ gf_boolean_t option = _gf_false;
+ char *str = NULL;
+ int dict_count = 0;
+ int flags = 0;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ GF_ASSERT (dict);
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ value = dict_get_str_boolean (dict, "value", _gf_false);
+ if (value == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED,
+ "value not present.");
+ goto out;
+ }
+ /* This dict_get will fail if the user had never set the key before */
+ /*Ignoring the ret value and proceeding */
+ ret = dict_get_str (priv->opts, GLUSTERD_STORE_KEY_GANESHA_GLOBAL, &str);
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ GD_MSG_DICT_GET_FAILED, "Global dict not present.");
+ ret = 0;
+ goto out;
+ }
+ /* Validity of the value is already checked */
+ ret = gf_string2boolean (str, &option);
+ /* Check if the feature is already enabled, fail in that case */
+ if (value == option) {
+ gf_asprintf (op_errstr, "nfs-ganesha is already %sd.", str);
+ ret = -1;
+ goto out;
+ }
+
+ if (value) {
+ ret = start_ganesha (op_errstr);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_NFS_GNS_START_FAIL,
+ "Could not start NFS-Ganesha");
+
+ }
+ }
+
+out:
+
+ if (ret) {
+ if (!(*op_errstr)) {
+ *op_errstr = gf_strdup ("Error, Validation Failed");
+ gf_msg_debug (this->name, 0,
+ "Error, Cannot Validate option :%s",
+ GLUSTERD_STORE_KEY_GANESHA_GLOBAL);
+ } else {
+ gf_msg_debug (this->name, 0,
+ "Error, Cannot Validate option");
+ }
+ }
+ return ret;
+}
+
+int
+glusterd_op_set_ganesha (dict_t *dict, char **errstr)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ char *key = NULL;
+ char *value = NULL;
+ dict_t *vol_opts = NULL;
+ char *next_version = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+
+ ret = dict_get_str (dict, "key", &key);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED,
+ "Couldn't get key in global option set");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "value", &value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED,
+ "Couldn't get value in global option set");
+ goto out;
+ }
+
+ ret = glusterd_handle_ganesha_op (dict, errstr, key, value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_NFS_GNS_SETUP_FAIL,
+ "Initial NFS-Ganesha set up failed");
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_dynstr_with_alloc (priv->opts,
+ GLUSTERD_STORE_KEY_GANESHA_GLOBAL,
+ value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ GD_MSG_DICT_SET_FAILED, "Failed to set"
+ " nfs-ganesha in dict.");
+ goto out;
+ }
+ ret = glusterd_get_next_global_opt_version_str (priv->opts,
+ &next_version);
+ if (ret) {
+ gf_msg_debug (THIS->name, 0, "Could not fetch "
+ " global op version");
+ goto out;
+ }
+ ret = dict_set_str (priv->opts, GLUSTERD_GLOBAL_OPT_VERSION,
+ next_version);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_options (this, priv->opts);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_STORE_FAIL, "Failed to store options");
+ goto out;
+ }
+
+out:
+ gf_msg_debug (this->name, 0, "returning %d", ret);
+ return ret;
+}
+
+/* Following 2 functions parse GANESHA_HA_CONF
+ * The sample file looks like below,
+ * HA_NAME="ganesha-ha-360"
+ * HA_VOL_NAME="ha-state"
+ * HA_VOL_MNT="/mount-point"
+ * HA_VOL_SERVER="server1"
+ * HA_CLUSTER_NODES="server1,server2"
+ * VIP_rhs_1="10.x.x.x"
+ * VIP_rhs_2="10.x.x.x." */
+
+gf_boolean_t
+is_ganesha_host (void)
+{
+ char *host_from_file = NULL;
+ gf_boolean_t ret = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ host_from_file = parsing_ganesha_ha_conf ("HA_VOL_SERVER");
+ if (host_from_file == NULL) {
+ gf_msg (this->name, GF_LOG_INFO, errno,
+ GD_MSG_GET_CONFIG_INFO_FAILED,
+ "couldn't get HA_VOL_SERVER from file %s",
+ GANESHA_HA_CONF);
+ return _gf_false;
+ }
+
+ ret = gf_is_local_addr (host_from_file);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_NFS_GNS_HOST_FOUND,
+ "ganesha host found "
+ "Hostname is %s", host_from_file);
+ }
+
+ GF_FREE (host_from_file);
+ return ret;
+}
+
+/* Check if the localhost is listed as one of nfs-ganesha nodes */
+gf_boolean_t
+check_host_list (void)
+{
+
+ glusterd_conf_t *priv = NULL;
+ char *hostname, *hostlist;
+ int ret = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ priv = THIS->private;
+ GF_ASSERT (priv);
+
+ hostlist = parsing_ganesha_ha_conf ("HA_CLUSTER_NODES");
+ if (hostlist == NULL) {
+ gf_msg (this->name, GF_LOG_INFO, errno,
+ GD_MSG_GET_CONFIG_INFO_FAILED,
+ "couldn't get HA_CLUSTER_NODES from file %s",
+ GANESHA_HA_CONF);
+ return _gf_false;
+ }
+
+ /* Hostlist is a comma separated list now */
+ hostname = strtok (hostlist, ",");
+ while (hostname != NULL) {
+ ret = gf_is_local_addr (hostname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_NFS_GNS_HOST_FOUND,
+ "ganesha host found "
+ "Hostname is %s", hostname);
+ break;
+ }
+ hostname = strtok (NULL, ",");
+ }
+
+ GF_FREE (hostlist);
+ return ret;
+
+}
+
+int
+create_export_config (char *volname, char **op_errstr)
+{
+ runner_t runner = {0,};
+ int ret = -1;
+
+ GF_ASSERT(volname);
+ runinit (&runner);
+ runner_add_args (&runner, "sh",
+ GANESHA_PREFIX"/create-export-ganesha.sh",
+ CONFDIR, volname, NULL);
+ ret = runner_run(&runner);
+
+ if (ret && op_errstr)
+ gf_asprintf (op_errstr, "Failed to create"
+ " NFS-Ganesha export config file.");
+
+ return ret;
+}
+
+int
+copy_export_config (char *volname, char **op_errstr)
+{
+ runner_t runner = {0,};
+ int ret = -1;
+
+ GF_ASSERT(volname);
+ runinit (&runner);
+ runner_add_args (&runner, "sh",
+ GANESHA_PREFIX"/copy-export-ganesha.sh",
+ CONFDIR, volname, NULL);
+ ret = runner_run(&runner);
+
+ if (ret && op_errstr)
+ gf_asprintf (op_errstr, "Failed to copy"
+ " NFS-Ganesha export config file.");
+
+ return ret;
+}
+/* Exports and unexports a particular volume via NFS-Ganesha */
+int
+ganesha_manage_export (char *volname, char *value, char **op_errstr,
+ gf_boolean_t reboot)
+{
+ runner_t runner = {0,};
+ int ret = -1;
+ char str[1024];
+ glusterd_volinfo_t *volinfo = NULL;
+ dict_t *vol_opts = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ gf_boolean_t option = _gf_false;
+ int i = 1;
+
+ runinit (&runner);
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+
+ GF_ASSERT (value);
+ GF_ASSERT (priv);
+ GF_VALIDATE_OR_GOTO (this->name, volname, out);
+
+
+ ret = gf_string2boolean (value, &option);
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "invalid value.");
+ goto out;
+ }
+
+ /* *
+ * Incase of reboot, following checks are already made before calling
+ * ganesha_manage_export. So it will be reductant do it again
+ */
+ if (!reboot) {
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_VOL_NOT_FOUND,
+ FMTSTR_CHECK_VOL_EXISTS, volname);
+ goto out;
+ }
+
+ ret = glusterd_check_ganesha_export (volinfo);
+ if (ret && option) {
+ if (op_errstr)
+ gf_asprintf (op_errstr, "ganesha.enable "
+ "is already 'on'.");
+ ret = -1;
+ goto out;
+
+ } else if (!option && !ret) {
+ if (op_errstr)
+ gf_asprintf (op_errstr, "ganesha.enable "
+ "is already 'off'.");
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = 0;
+
+ /* *
+ * Incase of restart, there is chance that global option turned off
+ * with volume set command. Still we may need to clean up the
+ * configuration files.
+ * Otherwise check if global option is enabled, only then proceed
+ * */
+ if (!(reboot && !option)) {
+ ret = dict_get_str_boolean (priv->opts,
+ GLUSTERD_STORE_KEY_GANESHA_GLOBAL, _gf_false);
+ if (ret == -1) {
+ gf_msg_debug (this->name, 0, "Failed to get "
+ "global option dict.");
+ if (op_errstr)
+ gf_asprintf (op_errstr, "The option "
+ "nfs-ganesha should be "
+ "enabled before setting "
+ "ganesha.enable.");
+ goto out;
+ }
+ if (!ret) {
+ if (op_errstr)
+ gf_asprintf (op_errstr, "The option "
+ "nfs-ganesha should be "
+ "enabled before setting "
+ "ganesha.enable.");
+ ret = -1;
+ goto out;
+ }
+ }
+ /* Create the export file only when ganesha.enable "on" is executed */
+ if (option) {
+ if (reboot)
+ ret = copy_export_config (volname, op_errstr);
+ else
+ ret = create_export_config (volname, op_errstr);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_EXPORT_FILE_CREATE_FAIL,
+ "Failed to create/copy "
+ "export file for NFS-Ganesha\n");
+ goto out;
+ }
+ }
+
+ if (check_host_list()) {
+ runner_add_args (&runner, "sh", GANESHA_PREFIX"/dbus-send.sh",
+ CONFDIR, value, volname, NULL);
+ ret = runner_run (&runner);
+ if (ret) {
+ if (op_errstr)
+ gf_asprintf(op_errstr, "Dynamic export"
+ " addition/deletion failed."
+ " Please see log file for details");
+ /* *
+ * Incase of reboot scenarios, we cannot guarantee
+ * nfs-ganesha to be running on that node, so that
+ * dynamic export may fail
+ */
+ if (reboot)
+ ret = 0;
+ else
+ goto out;
+ }
+ }
+
+
+ /* *
+ * cache-invalidation should be on when a volume is exported
+ * and off when a volume is unexported. It is not required
+ * for reboot scenarios, already it will be copied.
+ * */
+ if (!reboot) {
+ vol_opts = volinfo->dict;
+ ret = dict_set_dynstr_with_alloc (vol_opts,
+ "features.cache-invalidation", value);
+ if (ret && op_errstr)
+ gf_asprintf (op_errstr, "Cache-invalidation could not"
+ " be set to %s.", value);
+ ret = glusterd_store_volinfo (volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret && op_errstr)
+ gf_asprintf (op_errstr, "failed to store volinfo for %s"
+ , volinfo->volname);
+
+ }
+out:
+ return ret;
+}
+
+int
+tear_down_cluster(void)
+{
+ int ret = 0;
+ runner_t runner = {0,};
+
+ if (is_ganesha_host()) {
+ runinit (&runner);
+ runner_add_args (&runner, "sh",
+ GANESHA_PREFIX"/ganesha-ha.sh", "teardown",
+ CONFDIR, NULL);
+ ret = runner_run(&runner);
+ }
+ return ret;
+}
+
+
+int
+setup_cluster(void)
+{
+ int ret = 0;
+ runner_t runner = {0,};
+
+ if (is_ganesha_host()) {
+ runinit (&runner);
+ runner_add_args (&runner, "sh", GANESHA_PREFIX"/ganesha-ha.sh",
+ "setup", CONFDIR, NULL);
+ ret = runner_run (&runner);
+ }
+ return ret;
+}
+
+
+static int
+teardown (char **op_errstr)
+{
+ runner_t runner = {0,};
+ int ret = 1;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ dict_t *vol_opts = NULL;
+
+ priv = THIS->private;
+
+ ret = tear_down_cluster();
+ if (ret == -1) {
+ gf_asprintf (op_errstr, "Cleanup of NFS-Ganesha"
+ " HA config failed.");
+ goto out;
+ }
+ ret = stop_ganesha (op_errstr);
+ if (ret) {
+ gf_asprintf (op_errstr, "Could not stop NFS-Ganesha.");
+ goto out;
+ }
+
+ runinit (&runner);
+ runner_add_args (&runner, "sh", GANESHA_PREFIX"/ganesha-ha.sh",
+ "cleanup", CONFDIR, NULL);
+ ret = runner_run (&runner);
+ if (ret)
+ gf_msg_debug (THIS->name, 0, "Could not clean up"
+ " NFS-Ganesha related config");
+
+ cds_list_for_each_entry (volinfo, &priv->volumes, vol_list) {
+ vol_opts = volinfo->dict;
+ /* All the volumes exported via NFS-Ganesha will be
+ unexported, hence setting the appropriate keys */
+ ret = dict_set_str (vol_opts, "features.cache-invalidation",
+ "off");
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_WARNING, errno,
+ GD_MSG_DICT_SET_FAILED,
+ "Could not set features.cache-invalidation "
+ "to off for %s", volinfo->volname);
+
+ ret = dict_set_str (vol_opts, "ganesha.enable", "off");
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_WARNING, errno,
+ GD_MSG_DICT_SET_FAILED,
+ "Could not set ganesha.enable to off for %s",
+ volinfo->volname);
+
+ ret = glusterd_store_volinfo (volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_WARNING, 0,
+ GD_MSG_VOLINFO_SET_FAIL,
+ "failed to store volinfo for %s",
+ volinfo->volname);
+ }
+out:
+ return ret;
+}
+
+int
+stop_ganesha (char **op_errstr) {
+
+ int ret = 0;
+
+ if (check_host_list ()) {
+ ret = manage_service ("stop");
+ if (ret)
+ gf_asprintf (op_errstr, "NFS-Ganesha service could not"
+ "be stopped.");
+ }
+ return ret;
+
+}
+
+int
+start_ganesha (char **op_errstr)
+{
+ int ret = -1;
+ char *hostname = NULL;
+ dict_t *vol_opts = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ int count = 0;
+ char *volname = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ priv = THIS->private;
+ GF_ASSERT (priv);
+
+ cds_list_for_each_entry (volinfo, &priv->volumes, vol_list) {
+ vol_opts = volinfo->dict;
+ /* Gluster-nfs has to be disabled across the trusted pool */
+ /* before attempting to start nfs-ganesha */
+ ret = dict_set_str (vol_opts, NFS_DISABLE_MAP_KEY, "on");
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_volinfo (volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ *op_errstr = gf_strdup ("Failed to store the "
+ "Volume information");
+ goto out;
+ }
+ }
+
+ /* If the nfs svc is not initialized it means that the service is not
+ * running, hence we can skip the process of stopping gluster-nfs
+ * service
+ */
+ if (priv->nfs_svc.inited) {
+ ret = priv->nfs_svc.stop (&(priv->nfs_svc), SIGKILL);
+ if (ret) {
+ ret = -1;
+ gf_asprintf (op_errstr, "Gluster-NFS service could"
+ "not be stopped, exiting.");
+ goto out;
+ }
+ }
+ if (check_host_list()) {
+ ret = manage_service ("start");
+ if (ret)
+ gf_asprintf (op_errstr, "NFS-Ganesha failed to start."
+ "Please see log file for details");
+ }
+
+out:
+ return ret;
+}
+
+static int
+pre_setup (char **op_errstr)
+{
+ int ret = 0;
+
+ ret = sys_mkdir (SHARED_STORAGE_MNT, 0775);
+
+ if ((-1 == ret) && (EEXIST != errno)) {
+ gf_msg ("THIS->name", GF_LOG_ERROR, errno,
+ GD_MSG_CREATE_DIR_FAILED, "mkdir() failed on path %s,",
+ SHARED_STORAGE_MNT);
+ goto out;
+ }
+
+ ret = check_host_list();
+
+ if (ret) {
+ ret = setup_cluster();
+ if (ret == -1)
+ gf_asprintf (op_errstr, "Failed to set up HA "
+ "config for NFS-Ganesha. "
+ "Please check the log file for details");
+ }
+
+out:
+ return ret;
+}
+
+int
+glusterd_handle_ganesha_op (dict_t *dict, char **op_errstr,
+ char *key, char *value)
+{
+
+ int32_t ret = -1;
+ char *volname = NULL;
+ gf_boolean_t option = _gf_false;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+ GF_ASSERT (key);
+ GF_ASSERT (value);
+
+
+ if (strcmp (key, "ganesha.enable") == 0) {
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to get volume name");
+ goto out;
+ }
+ ret = ganesha_manage_export (volname, value, op_errstr,
+ _gf_false);
+ if (ret < 0)
+ goto out;
+ }
+
+ /* It is possible that the key might not be set */
+ ret = gf_string2boolean (value, &option);
+ if (ret == -1) {
+ gf_asprintf (op_errstr, "Invalid value in key-value pair.");
+ goto out;
+ }
+
+ if (strcmp (key, GLUSTERD_STORE_KEY_GANESHA_GLOBAL) == 0) {
+ if (option) {
+ ret = pre_setup (op_errstr);
+ if (ret < 0)
+ goto out;
+ } else {
+ ret = teardown (op_errstr);
+ if (ret < 0)
+ goto out;
+ }
+ }
+
+out:
+ return ret;
+}
+
diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c
new file mode 100644
index 00000000000..55e249643c0
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c
@@ -0,0 +1,6521 @@
+/*
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "common-utils.h"
+#include "cli1-xdr.h"
+#include "xdr-generic.h"
+#include "glusterd.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-geo-rep.h"
+#include "glusterd-store.h"
+#include "glusterd-utils.h"
+#include "glusterd-volgen.h"
+#include "glusterd-svc-helper.h"
+#include "run.h"
+#include "syscall.h"
+#include "glusterd-messages.h"
+
+#include <signal.h>
+
+static int
+dict_get_param (dict_t *dict, char *key, char **param);
+
+struct gsync_config_opt_vals_ gsync_confopt_vals[] = {
+ {.op_name = "change_detector",
+ .no_of_pos_vals = 2,
+ .case_sensitive = _gf_true,
+ .values = {"xsync", "changelog"},
+ },
+ {.op_name = "special_sync_mode",
+ .no_of_pos_vals = 2,
+ .case_sensitive = _gf_true,
+ .values = {"partial", "recover"}
+ },
+ {.op_name = "log-level",
+ .no_of_pos_vals = 5,
+ .case_sensitive = _gf_false,
+ .values = {"critical", "error", "warning", "info", "debug"}
+ },
+ {.op_name = "use-tarssh",
+ .no_of_pos_vals = 6,
+ .case_sensitive = _gf_false,
+ .values = {"true", "false", "0", "1", "yes", "no"}
+ },
+ {.op_name = "ignore_deletes",
+ .no_of_pos_vals = 6,
+ .case_sensitive = _gf_false,
+ .values = {"true", "false", "0", "1", "yes", "no"}
+ },
+ {.op_name = "use_meta_volume",
+ .no_of_pos_vals = 6,
+ .case_sensitive = _gf_false,
+ .values = {"true", "false", "0", "1", "yes", "no"}
+ },
+ {.op_name = "use-meta-volume",
+ .no_of_pos_vals = 6,
+ .case_sensitive = _gf_false,
+ .values = {"true", "false", "0", "1", "yes", "no"}
+ },
+ {.op_name = NULL,
+ },
+};
+
+static char *gsync_reserved_opts[] = {
+ "gluster-command-dir",
+ "pid-file",
+ "state-file",
+ "session-owner",
+ "state-socket-unencoded",
+ "socketdir",
+ "local-id",
+ "local-path",
+ "slave-id",
+ NULL
+};
+
+static char *gsync_no_restart_opts[] = {
+ "checkpoint",
+ NULL
+};
+
+int
+__glusterd_handle_sys_exec (rpcsvc_request_t *req)
+{
+ int32_t ret = 0;
+ dict_t *dict = NULL;
+ gf_cli_req cli_req = {{0},};
+ glusterd_op_t cli_op = GD_OP_SYS_EXEC;
+ glusterd_conf_t *priv = NULL;
+ char *host_uuid = NULL;
+ char err_str[2048] = {0,};
+ xlator_t *this = NULL;
+
+ GF_ASSERT (req);
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req,
+ (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL, "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (err_str, sizeof (err_str), "Unable to decode "
+ "the command");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
+ }
+
+ host_uuid = gf_strdup (uuid_utoa(MY_UUID));
+ if (host_uuid == NULL) {
+ snprintf (err_str, sizeof (err_str), "Failed to get "
+ "the uuid of local glusterd");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr (dict, "host-uuid", host_uuid);
+ if (ret)
+ goto out;
+ }
+
+ ret = glusterd_op_begin_synctask (req, cli_op, dict);
+
+out:
+ if (ret) {
+ if (err_str[0] == '\0')
+ snprintf (err_str, sizeof (err_str),
+ "Operation failed");
+ ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
+ dict, err_str);
+ }
+ return ret;
+}
+
+int
+__glusterd_handle_copy_file (rpcsvc_request_t *req)
+{
+ int32_t ret = 0;
+ dict_t *dict = NULL;
+ gf_cli_req cli_req = {{0},};
+ glusterd_op_t cli_op = GD_OP_COPY_FILE;
+ glusterd_conf_t *priv = NULL;
+ char *host_uuid = NULL;
+ char err_str[2048] = {0,};
+ xlator_t *this = NULL;
+
+ GF_ASSERT (req);
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req,
+ (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL, "failed to"
+ "unserialize req-buffer to dictionary");
+ snprintf (err_str, sizeof (err_str), "Unable to decode "
+ "the command");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
+ }
+
+ host_uuid = gf_strdup (uuid_utoa(MY_UUID));
+ if (host_uuid == NULL) {
+ snprintf (err_str, sizeof (err_str), "Failed to get "
+ "the uuid of local glusterd");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr (dict, "host-uuid", host_uuid);
+ if (ret)
+ goto out;
+ }
+
+ ret = glusterd_op_begin_synctask (req, cli_op, dict);
+
+out:
+ if (ret) {
+ if (err_str[0] == '\0')
+ snprintf (err_str, sizeof (err_str),
+ "Operation failed");
+ ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
+ dict, err_str);
+ }
+ return ret;
+}
+
+int
+__glusterd_handle_gsync_set (rpcsvc_request_t *req)
+{
+ int32_t ret = 0;
+ dict_t *dict = NULL;
+ gf_cli_req cli_req = {{0},};
+ glusterd_op_t cli_op = GD_OP_GSYNC_SET;
+ char *master = NULL;
+ char *slave = NULL;
+ char operation[256] = {0,};
+ int type = 0;
+ glusterd_conf_t *priv = NULL;
+ char *host_uuid = NULL;
+ char err_str[2048] = {0,};
+ xlator_t *this = NULL;
+
+ GF_ASSERT (req);
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req,
+ (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL, "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (err_str, sizeof (err_str), "Unable to decode "
+ "the command");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
+ }
+
+ host_uuid = gf_strdup (uuid_utoa(MY_UUID));
+ if (host_uuid == NULL) {
+ snprintf (err_str, sizeof (err_str), "Failed to get "
+ "the uuid of local glusterd");
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_dynstr (dict, "host-uuid", host_uuid);
+ if (ret)
+ goto out;
+
+ }
+
+ ret = dict_get_str (dict, "master", &master);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED,
+ "master not found, while handling "GEOREP" options");
+ master = "(No Master)";
+ }
+
+ ret = dict_get_str (dict, "slave", &slave);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED,
+ "slave not found, while handling "GEOREP" options");
+ slave = "(No Slave)";
+ }
+
+ ret = dict_get_int32 (dict, "type", &type);
+ if (ret < 0) {
+ snprintf (err_str, sizeof (err_str), "Command type not found "
+ "while handling "GEOREP" options");
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "%s", err_str);
+ goto out;
+ }
+
+ switch (type) {
+ case GF_GSYNC_OPTION_TYPE_CREATE:
+ strncpy (operation, "create", sizeof (operation));
+ cli_op = GD_OP_GSYNC_CREATE;
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_START:
+ strncpy (operation, "start", sizeof (operation));
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_STOP:
+ strncpy (operation, "stop", sizeof (operation));
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_PAUSE:
+ strncpy (operation, "pause", sizeof (operation));
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_RESUME:
+ strncpy (operation, "resume", sizeof (operation));
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_CONFIG:
+ strncpy (operation, "config", sizeof (operation));
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_STATUS:
+ strncpy (operation, "status", sizeof (operation));
+ break;
+ }
+
+ ret = glusterd_op_begin_synctask (req, cli_op, dict);
+
+out:
+ if (ret) {
+ if (err_str[0] == '\0')
+ snprintf (err_str, sizeof (err_str),
+ "Operation failed");
+ ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
+ dict, err_str);
+ }
+ return ret;
+}
+
+int
+glusterd_handle_sys_exec (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_sys_exec);
+}
+
+int
+glusterd_handle_copy_file (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_copy_file);
+}
+
+int
+glusterd_handle_gsync_set (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_gsync_set);
+}
+
+/*****
+ *
+ * glusterd_urltransform* internal API
+ *
+ *****/
+
+static void
+glusterd_urltransform_init (runner_t *runner, const char *transname)
+{
+ runinit (runner);
+ runner_add_arg (runner, GSYNCD_PREFIX"/gsyncd");
+ runner_argprintf (runner, "--%s-url", transname);
+}
+
+static void
+glusterd_urltransform_add (runner_t *runner, const char *url)
+{
+ runner_add_arg (runner, url);
+}
+
+/* Helper routine to terminate just before slave_voluuid */
+static int32_t
+parse_slave_url (char *slv_url, char **slave)
+{
+ char *tmp = NULL;
+ xlator_t *this = NULL;
+ int32_t ret = -1;
+
+ this = THIS;
+
+ /* slave format:
+ * master_node_uuid:ssh://slave_host::slave_vol:slave_voluuid */
+ *slave = strchr (slv_url, ':');
+ if (!(*slave)) {
+ goto out;
+ }
+ (*slave)++;
+
+ /* To terminate at : before slave volume uuid */
+ tmp = strstr (*slave, "::");
+ if (!tmp) {
+ goto out;
+ }
+ tmp += 2;
+ tmp = strchr (tmp, ':');
+ if (!tmp)
+ gf_msg_debug (this->name, 0, "old slave: %s!", *slave);
+ else
+ *tmp = '\0';
+
+ ret = 0;
+ gf_msg_debug (this->name, 0, "parsed slave: %s!", *slave);
+out:
+ return ret;
+}
+
+static int
+_glusterd_urltransform_add_iter (dict_t *dict, char *key, data_t *value, void *data)
+{
+ runner_t *runner = (runner_t *)data;
+ char slv_url[VOLINFO_SLAVE_URL_MAX] = {0};
+ char *slave = NULL;
+ xlator_t *this = NULL;
+ int32_t ret = -1;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("glusterd", this, out);
+
+ gf_msg_debug (this->name, 0, "value->data %s", value->data);
+
+ strncpy (slv_url, value->data, sizeof(slv_url));
+ ret = parse_slave_url (slv_url, &slave);
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SLAVE_VOL_PARSE_FAIL,
+ "Error in parsing slave: %s!", value->data);
+ goto out;
+ }
+
+ runner_add_arg (runner, slave);
+ ret = 0;
+out:
+ return ret;
+}
+
+static void
+glusterd_urltransform_free (char **linearr, unsigned n)
+{
+ int i = 0;
+
+ for (; i < n; i++)
+ GF_FREE (linearr[i]);
+
+ GF_FREE (linearr);
+}
+
+static int
+glusterd_urltransform (runner_t *runner, char ***linearrp)
+{
+ char **linearr = NULL;
+ char *line = NULL;
+ unsigned arr_len = 32;
+ unsigned arr_idx = 0;
+ gf_boolean_t error = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ linearr = GF_CALLOC (arr_len, sizeof (char *), gf_gld_mt_linearr);
+ if (!linearr) {
+ error = _gf_true;
+ goto out;
+ }
+
+ runner_redir (runner, STDOUT_FILENO, RUN_PIPE);
+ if (runner_start (runner) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SPAWNING_CHILD_FAILED,
+ "spawning child failed");
+
+ error = _gf_true;
+ goto out;
+ }
+
+ arr_idx = 0;
+ for (;;) {
+ size_t len;
+ line = GF_MALLOC (1024, gf_gld_mt_linebuf);
+ if (!line) {
+ error = _gf_true;
+ goto out;
+ }
+
+ if (fgets (line, 1024, runner_chio (runner, STDOUT_FILENO)) ==
+ NULL)
+ break;
+
+ len = strlen (line);
+ if (len == 0 || line[len - 1] != '\n') {
+ GF_FREE (line);
+ error = _gf_true;
+ goto out;
+ }
+ line[len - 1] = '\0';
+
+ if (arr_idx == arr_len) {
+ void *p = linearr;
+ arr_len <<= 1;
+ p = GF_REALLOC (linearr, arr_len);
+ if (!p) {
+ GF_FREE (line);
+ error = _gf_true;
+ goto out;
+ }
+ linearr = p;
+ }
+ linearr[arr_idx] = line;
+
+ arr_idx++;
+ }
+
+ out:
+
+ /* XXX chpid field is not exported by run API
+ * but runner_end() does not abort the invoked
+ * process (ie. it might block in waitpid(2))
+ * so we resort to a manual kill a the private field
+ */
+ if (error && runner->chpid > 0)
+ kill (runner->chpid, SIGKILL);
+
+ if (runner_end (runner) != 0)
+ error = _gf_true;
+
+ if (error) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_READ_CHILD_DATA_FAILED,
+ "reading data from child failed");
+ glusterd_urltransform_free (linearr, arr_idx);
+ return -1;
+ }
+
+ *linearrp = linearr;
+ return arr_idx;
+}
+
+static int
+glusterd_urltransform_single (const char *url, const char *transname,
+ char ***linearrp)
+{
+ runner_t runner = {0,};
+
+ glusterd_urltransform_init (&runner, transname);
+ glusterd_urltransform_add (&runner, url);
+ return glusterd_urltransform (&runner, linearrp);
+}
+
+
+struct dictidxmark {
+ unsigned isrch;
+ unsigned ithis;
+ char *ikey;
+};
+
+
+struct slave_vol_config {
+ char old_slvhost[_POSIX_HOST_NAME_MAX+1];
+ char old_slvuser[_POSIX_LOGIN_NAME_MAX];
+ unsigned old_slvidx;
+ char slave_voluuid[GF_UUID_BUF_SIZE];
+};
+
+static int
+_dict_mark_atindex (dict_t *dict, char *key, data_t *value, void *data)
+{
+ struct dictidxmark *dim = data;
+
+ if (dim->isrch == dim->ithis)
+ dim->ikey = key;
+
+ dim->ithis++;
+ return 0;
+}
+
+static char *
+dict_get_by_index (dict_t *dict, unsigned i)
+{
+ struct dictidxmark dim = {0,};
+
+ dim.isrch = i;
+ dict_foreach (dict, _dict_mark_atindex, &dim);
+
+ return dim.ikey;
+}
+
+static int
+glusterd_get_slave (glusterd_volinfo_t *vol, const char *slaveurl, char **slavekey)
+{
+ runner_t runner = {0,};
+ int n = 0;
+ int i = 0;
+ char **linearr = NULL;
+ int32_t ret = 0;
+
+ glusterd_urltransform_init (&runner, "canonicalize");
+ ret = dict_foreach (vol->gsync_slaves, _glusterd_urltransform_add_iter,
+ &runner);
+ if (ret < 0)
+ return -2;
+
+ glusterd_urltransform_add (&runner, slaveurl);
+
+ n = glusterd_urltransform (&runner, &linearr);
+ if (n == -1)
+ return -2;
+
+ for (i = 0; i < n - 1; i++) {
+ if (strcmp (linearr[i], linearr[n - 1]) == 0)
+ break;
+ }
+ glusterd_urltransform_free (linearr, i);
+
+ if (i < n - 1)
+ *slavekey = dict_get_by_index (vol->gsync_slaves, i);
+ else
+ i = -1;
+
+ return i;
+}
+
+static int
+glusterd_query_extutil_generic (char *resbuf, size_t blen, runner_t *runner, void *data,
+ int (*fcbk)(char *resbuf, size_t blen, FILE *fp, void *data))
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ runner_redir (runner, STDOUT_FILENO, RUN_PIPE);
+ if (runner_start (runner) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SPAWNING_CHILD_FAILED,
+ "spawning child failed");
+
+ return -1;
+ }
+
+ ret = fcbk (resbuf, blen, runner_chio (runner, STDOUT_FILENO), data);
+
+ ret |= runner_end (runner);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_READ_CHILD_DATA_FAILED,
+ "reading data from child failed");
+
+ return ret ? -1 : 0;
+}
+
+static int
+_fcbk_singleline(char *resbuf, size_t blen, FILE *fp, void *data)
+{
+ char *ptr = NULL;
+
+ errno = 0;
+ ptr = fgets (resbuf, blen, fp);
+ if (ptr) {
+ size_t len = strlen(resbuf);
+ if (len && resbuf[len-1] == '\n')
+ resbuf[len-1] = '\0'; //strip off \n
+ }
+
+ return errno ? -1 : 0;
+}
+
+static int
+glusterd_query_extutil (char *resbuf, runner_t *runner)
+{
+ return glusterd_query_extutil_generic (resbuf, PATH_MAX, runner, NULL,
+ _fcbk_singleline);
+}
+
+static int
+glusterd_get_slave_voluuid (char *slave_host, char *slave_vol, char *vol_uuid)
+{
+ runner_t runner = {0,};
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ int ret = -1;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("glusterd", this, out);
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, priv, out);
+
+ runinit (&runner);
+ runner_add_arg (&runner, GSYNCD_PREFIX"/gsyncd");
+ runner_add_arg (&runner, "--slavevoluuid-get");
+ runner_argprintf (&runner, "%s::%s", slave_host, slave_vol);
+
+ synclock_unlock (&priv->big_lock);
+ ret = glusterd_query_extutil (vol_uuid, &runner);
+ synclock_lock (&priv->big_lock);
+
+out:
+ return ret;
+}
+
+
+static int
+_fcbk_conftodict (char *resbuf, size_t blen, FILE *fp, void *data)
+{
+ char *ptr = NULL;
+ dict_t *dict = data;
+ char *v = NULL;
+
+ for (;;) {
+ errno = 0;
+ ptr = fgets (resbuf, blen, fp);
+ if (!ptr)
+ break;
+ v = resbuf + strlen(resbuf) - 1;
+ while (isspace (*v))
+ /* strip trailing space */
+ *v-- = '\0';
+ if (v == resbuf)
+ /* skip empty line */
+ continue;
+ v = strchr (resbuf, ':');
+ if (!v)
+ return -1;
+ *v++ = '\0';
+ while (isspace (*v))
+ v++;
+ v = gf_strdup (v);
+ if (!v)
+ return -1;
+ if (dict_set_dynstr (dict, resbuf, v) != 0) {
+ GF_FREE (v);
+ return -1;
+ }
+ }
+
+ return errno ? -1 : 0;
+}
+
+static int
+glusterd_gsync_get_config (char *master, char *slave, char *conf_path, dict_t *dict)
+{
+ /* key + value, where value must be able to accommodate a path */
+ char resbuf[256 + PATH_MAX] = {0,};
+ runner_t runner = {0,};
+
+ runinit (&runner);
+ runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "-c", NULL);
+ runner_argprintf (&runner, "%s", conf_path);
+ runner_argprintf (&runner, "--iprefix=%s", DATADIR);
+ runner_argprintf (&runner, ":%s", master);
+ runner_add_args (&runner, slave, "--config-get-all", NULL);
+
+ return glusterd_query_extutil_generic (resbuf, sizeof (resbuf),
+ &runner, dict, _fcbk_conftodict);
+}
+
+static int
+_fcbk_statustostruct (char *resbuf, size_t blen, FILE *fp,
+ void *data)
+{
+ char *ptr = NULL;
+ char *v = NULL;
+ char *k = NULL;
+ gf_gsync_status_t *sts_val = NULL;
+
+ sts_val = (gf_gsync_status_t *)data;
+
+ for (;;) {
+ errno = 0;
+ ptr = fgets (resbuf, blen, fp);
+ if (!ptr)
+ break;
+
+ v = resbuf + strlen(resbuf) - 1;
+ while (isspace (*v))
+ /* strip trailing space */
+ *v-- = '\0';
+ if (v == resbuf)
+ /* skip empty line */
+ continue;
+ v = strchr (resbuf, ':');
+ if (!v)
+ return -1;
+ *v++ = '\0';
+ while (isspace (*v))
+ v++;
+ v = gf_strdup (v);
+ if (!v)
+ return -1;
+
+ k = gf_strdup (resbuf);
+ if (!k) {
+ GF_FREE (v);
+ return -1;
+ }
+
+ if (strcmp (k, "worker_status") == 0) {
+ memcpy (sts_val->worker_status, v,
+ strlen(v));
+ sts_val->worker_status[strlen(v)] = '\0';
+ } else if (strcmp (k, "slave_node") == 0) {
+ memcpy (sts_val->slave_node, v,
+ strlen(v));
+ sts_val->slave_node[strlen(v)] = '\0';
+ } else if (strcmp (k, "crawl_status") == 0) {
+ memcpy (sts_val->crawl_status, v,
+ strlen(v));
+ sts_val->crawl_status[strlen(v)] = '\0';
+ } else if (strcmp (k, "last_synced") == 0) {
+ memcpy (sts_val->last_synced, v,
+ strlen(v));
+ sts_val->last_synced[strlen(v)] = '\0';
+ } else if (strcmp (k, "last_synced_utc") == 0) {
+ memcpy (sts_val->last_synced_utc, v,
+ strlen(v));
+ sts_val->last_synced_utc[strlen(v)] = '\0';
+ } else if (strcmp (k, "entry") == 0) {
+ memcpy (sts_val->entry, v,
+ strlen(v));
+ sts_val->entry[strlen(v)] = '\0';
+ } else if (strcmp (k, "data") == 0) {
+ memcpy (sts_val->data, v,
+ strlen(v));
+ sts_val->data[strlen(v)] = '\0';
+ } else if (strcmp (k, "meta") == 0) {
+ memcpy (sts_val->meta, v,
+ strlen(v));
+ sts_val->meta[strlen(v)] = '\0';
+ } else if (strcmp (k, "failures") == 0) {
+ memcpy (sts_val->failures, v,
+ strlen(v));
+ sts_val->failures[strlen(v)] = '\0';
+ } else if (strcmp (k, "checkpoint_time") == 0) {
+ memcpy (sts_val->checkpoint_time, v,
+ strlen(v));
+ sts_val->checkpoint_time[strlen(v)] = '\0';
+ } else if (strcmp (k, "checkpoint_time_utc") == 0) {
+ memcpy (sts_val->checkpoint_time_utc, v,
+ strlen(v));
+ sts_val->checkpoint_time_utc[strlen(v)] = '\0';
+ } else if (strcmp (k, "checkpoint_completed") == 0) {
+ memcpy (sts_val->checkpoint_completed, v,
+ strlen(v));
+ sts_val->checkpoint_completed[strlen(v)] = '\0';
+ } else if (strcmp (k, "checkpoint_completion_time") == 0) {
+ memcpy (sts_val->checkpoint_completion_time, v,
+ strlen(v));
+ sts_val->checkpoint_completion_time[strlen(v)] = '\0';
+ } else if (strcmp (k, "checkpoint_completion_time_utc") == 0) {
+ memcpy (sts_val->checkpoint_completion_time_utc, v,
+ strlen(v));
+ sts_val->checkpoint_completion_time_utc[strlen(v)] =
+ '\0';
+ }
+ GF_FREE(v);
+ GF_FREE(k);
+ }
+
+ return errno ? -1 : 0;
+}
+
+
+static int
+glusterd_gsync_get_status (char *master, char *slave, char *conf_path,
+ char *brick_path, gf_gsync_status_t *sts_val)
+{
+ /* key + value, where value must be able to accommodate a path */
+ char resbuf[256 + PATH_MAX] = {0,};
+ runner_t runner = {0,};
+
+ runinit (&runner);
+ runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "-c", NULL);
+ runner_argprintf (&runner, "%s", conf_path);
+ runner_argprintf (&runner, "--iprefix=%s", DATADIR);
+ runner_argprintf (&runner, ":%s", master);
+ runner_add_args (&runner, slave, "--status-get", NULL);
+ runner_add_args (&runner, "--path", brick_path, NULL);
+
+ return glusterd_query_extutil_generic (resbuf, sizeof (resbuf),
+ &runner, sts_val,
+ _fcbk_statustostruct);
+}
+
+static int
+glusterd_gsync_get_param_file (char *prmfile, const char *param, char *master,
+ char *slave, char *conf_path)
+{
+ runner_t runner = {0,};
+
+ runinit (&runner);
+ runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "-c", NULL);
+ runner_argprintf (&runner, "%s", conf_path);
+ runner_argprintf (&runner, "--iprefix=%s", DATADIR);
+ runner_argprintf (&runner, ":%s", master);
+ runner_add_args (&runner, slave, "--config-get", NULL);
+ runner_argprintf (&runner, "%s-file", param);
+
+ return glusterd_query_extutil (prmfile, &runner);
+}
+
+static int
+gsyncd_getpidfile (char *master, char *slave, char *pidfile,
+ char *conf_path, gf_boolean_t *is_template_in_use)
+{
+ char temp_conf_path[PATH_MAX] = "";
+ char *working_conf_path = NULL;
+ glusterd_conf_t *priv = NULL;
+ int ret = -1;
+ struct stat stbuf = {0,};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (this->private);
+ GF_ASSERT (conf_path);
+
+ priv = this->private;
+
+ GF_VALIDATE_OR_GOTO ("gsync", master, out);
+ GF_VALIDATE_OR_GOTO ("gsync", slave, out);
+
+ snprintf (temp_conf_path, sizeof(temp_conf_path) - 1,
+ "%s/"GSYNC_CONF_TEMPLATE, priv->workdir);
+
+ ret = sys_lstat (conf_path, &stbuf);
+ if (!ret) {
+ gf_msg_debug (this->name, 0, "Using passed config template(%s).",
+ conf_path);
+ working_conf_path = conf_path;
+ } else {
+ gf_msg (this->name, GF_LOG_WARNING, ENOENT,
+ GD_MSG_FILE_OP_FAILED,
+ "Config file (%s) missing. Looking for template "
+ "config file (%s)", conf_path, temp_conf_path);
+ ret = sys_lstat (temp_conf_path, &stbuf);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOENT,
+ GD_MSG_FILE_OP_FAILED,
+ "Template config file (%s) missing.",
+ temp_conf_path);
+ goto out;
+ }
+ gf_msg (this->name, GF_LOG_INFO, 0, GD_MSG_DEFAULT_TEMP_CONFIG,
+ "Using default config template(%s).",
+ temp_conf_path);
+ working_conf_path = temp_conf_path;
+ *is_template_in_use = _gf_true;
+ }
+
+fetch_data:
+
+ ret = glusterd_gsync_get_param_file (pidfile, "pid", master,
+ slave, working_conf_path);
+ if ((ret == -1) || strlen(pidfile) == 0) {
+ if (*is_template_in_use == _gf_false) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_PIDFILE_CREATE_FAILED,
+ "failed to create the pidfile string. "
+ "Trying default config template");
+ working_conf_path = temp_conf_path;
+ *is_template_in_use = _gf_true;
+ goto fetch_data;
+ } else {
+ ret = -2;
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_PIDFILE_CREATE_FAILED, "failed to "
+ "create the pidfile string from template "
+ "config");
+ goto out;
+ }
+ }
+
+ gf_msg_debug (this->name, 0, "pidfile = %s", pidfile);
+
+ ret = open (pidfile, O_RDWR);
+ out:
+ return ret;
+}
+
+static int
+gsync_status_byfd (int fd)
+{
+ GF_ASSERT (fd >= -1);
+
+ if (lockf (fd, F_TEST, 0) == -1 &&
+ (errno == EAGAIN || errno == EACCES))
+ /* gsyncd keeps the pidfile locked */
+ return 0;
+
+ return -1;
+}
+
+/* status: return 0 when gsync is running
+ * return -1 when not running
+ */
+int
+gsync_status (char *master, char *slave, char *conf_path,
+ int *status, gf_boolean_t *is_template_in_use)
+{
+ char pidfile[PATH_MAX] = {0,};
+ int fd = -1;
+
+ fd = gsyncd_getpidfile (master, slave, pidfile,
+ conf_path, is_template_in_use);
+ if (fd == -2)
+ return -1;
+
+ *status = gsync_status_byfd (fd);
+
+ sys_close (fd);
+
+ return 0;
+}
+
+
+static int32_t
+glusterd_gsync_volinfo_dict_set (glusterd_volinfo_t *volinfo,
+ char *key, char *value)
+{
+ int32_t ret = -1;
+ char *gsync_status = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ gsync_status = gf_strdup (value);
+ if (!gsync_status) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+ "Unable to allocate memory");
+ goto out;
+ }
+
+ ret = dict_set_dynstr (volinfo->dict, key, gsync_status);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set dict");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+glusterd_verify_gsyncd_spawn (char *master, char *slave)
+{
+ int ret = 0;
+ runner_t runner = {0,};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ runinit (&runner);
+ runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd",
+ "--verify", "spawning", NULL);
+ runner_argprintf (&runner, ":%s", master);
+ runner_add_args (&runner, slave, NULL);
+ runner_redir (&runner, STDOUT_FILENO, RUN_PIPE);
+ ret = runner_start (&runner);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SPAWNING_CHILD_FAILED,
+ "spawning child failed");
+ ret = -1;
+ goto out;
+ }
+
+ if (runner_end (&runner) != 0)
+ ret = -1;
+
+out:
+ gf_msg_debug (this->name, 0, "returning %d", ret);
+ return ret;
+}
+
+static int
+gsync_verify_config_options (dict_t *dict, char **op_errstr, char *volname)
+{
+ char **resopt = NULL;
+ int i = 0;
+ int ret = -1;
+ char *subop = NULL;
+ char *slave = NULL;
+ char *op_name = NULL;
+ char *op_value = NULL;
+ char *t = NULL;
+ char errmsg[PATH_MAX] = "";
+ gf_boolean_t banned = _gf_true;
+ gf_boolean_t op_match = _gf_true;
+ gf_boolean_t val_match = _gf_true;
+ struct gsync_config_opt_vals_ *conf_vals = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ if (dict_get_str (dict, "subop", &subop) != 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED,
+ "missing subop");
+ *op_errstr = gf_strdup ("Invalid config request");
+ return -1;
+ }
+
+ if (dict_get_str (dict, "slave", &slave) != 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED,
+ GEOREP" CONFIG: no slave given");
+ *op_errstr = gf_strdup ("Slave required");
+ return -1;
+ }
+
+ if (strcmp (subop, "get-all") == 0)
+ return 0;
+
+ if (dict_get_str (dict, "op_name", &op_name) != 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED,
+ "option name missing");
+ *op_errstr = gf_strdup ("Option name missing");
+ return -1;
+ }
+
+ if (runcmd (GSYNCD_PREFIX"/gsyncd", "--config-check", op_name, NULL)) {
+ ret = glusterd_verify_gsyncd_spawn (volname, slave);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GSYNCD_SPAWN_FAILED, "Unable to spawn "
+ "gsyncd");
+ return 0;
+ }
+
+ gf_msg (this->name, GF_LOG_WARNING, EINVAL,
+ GD_MSG_INVALID_ENTRY,
+ "Invalid option %s", op_name);
+ *op_errstr = gf_strdup ("Invalid option");
+
+ return -1;
+ }
+
+ if (strcmp (subop, "get") == 0)
+ return 0;
+
+ t = strtail (subop, "set");
+ if (!t)
+ t = strtail (subop, "del");
+ if (!t || (t[0] && strcmp (t, "-glob") != 0)) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, GD_MSG_SUBOP_NOT_FOUND,
+ "unknown subop %s", subop);
+ *op_errstr = gf_strdup ("Invalid config request");
+ return -1;
+ }
+
+ if (strtail (subop, "set") &&
+ dict_get_str (dict, "op_value", &op_value) != 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED,
+ "missing value for set");
+ *op_errstr = gf_strdup ("missing value");
+ }
+
+ /* match option name against reserved options, modulo -/_
+ * difference
+ */
+ for (resopt = gsync_reserved_opts; *resopt; resopt++) {
+ banned = _gf_true;
+ for (i = 0; (*resopt)[i] && op_name[i]; i++) {
+ if ((*resopt)[i] == op_name[i] ||
+ ((*resopt)[i] == '-' && op_name[i] == '_'))
+ continue;
+ banned = _gf_false;
+ }
+ if (banned) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_RESERVED_OPTION,
+ "Reserved option %s", op_name);
+ *op_errstr = gf_strdup ("Reserved option");
+
+ return -1;
+ break;
+ }
+ }
+
+ /* Check options in gsync_confopt_vals for invalid values */
+ for (conf_vals = gsync_confopt_vals; conf_vals->op_name; conf_vals++) {
+ op_match = _gf_true;
+ for (i = 0; conf_vals->op_name[i] && op_name[i]; i++) {
+ if (conf_vals->op_name[i] == op_name[i] ||
+ (conf_vals->op_name[i] == '_' && op_name[i] == '-'))
+ continue;
+ op_match = _gf_false;
+ }
+
+ if (op_match) {
+ if (!op_value)
+ goto out;
+ val_match = _gf_false;
+ for (i = 0; i < conf_vals->no_of_pos_vals; i++) {
+ if(conf_vals->case_sensitive){
+ if (!strcmp (conf_vals->values[i], op_value))
+ val_match = _gf_true;
+ } else {
+ if (!strcasecmp (conf_vals->values[i], op_value))
+ val_match = _gf_true;
+ }
+ }
+
+ if (!val_match) {
+ ret = snprintf (errmsg, sizeof(errmsg) - 1,
+ "Invalid value(%s) for"
+ " option %s", op_value,
+ op_name);
+ errmsg[ret] = '\0';
+
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "%s", errmsg);
+ *op_errstr = gf_strdup (errmsg);
+ return -1;
+ }
+ }
+ }
+out:
+ return 0;
+}
+
+static int
+glusterd_get_gsync_status_mst_slv (glusterd_volinfo_t *volinfo,
+ char *slave, char *conf_path,
+ dict_t *rsp_dict, char *node);
+
+static int
+_get_status_mst_slv (dict_t *dict, char *key, data_t *value, void *data)
+{
+ glusterd_gsync_status_temp_t *param = NULL;
+ char *slave = NULL;
+ char *slave_buf = NULL;
+ char *slave_url = NULL;
+ char *slave_vol = NULL;
+ char *slave_host = NULL;
+ char *errmsg = NULL;
+ char conf_path[PATH_MAX] = "";
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ char slv_url[VOLINFO_SLAVE_URL_MAX] = {0};
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("glusterd", this, out);
+
+ param = (glusterd_gsync_status_temp_t *)data;
+
+ GF_VALIDATE_OR_GOTO (this->name, param, out);
+ GF_VALIDATE_OR_GOTO (this->name, param->volinfo, out);
+
+ if (this)
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, priv, out);
+
+ strncpy (slv_url, value->data, sizeof(slv_url));
+ ret = parse_slave_url (slv_url, &slave);
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SLAVE_VOL_PARSE_FAIL,
+ "Error in parsing slave: %s!", value->data);
+ goto out;
+ }
+
+ ret = glusterd_get_slave_info (slave, &slave_url,
+ &slave_host, &slave_vol, &errmsg);
+ if (ret) {
+ if (errmsg)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SLAVEINFO_FETCH_ERROR,
+ "Unable to fetch slave details. Error: %s",
+ errmsg);
+ else
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SLAVEINFO_FETCH_ERROR,
+ "Unable to fetch slave details.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = snprintf (conf_path, sizeof(conf_path) - 1,
+ "%s/"GEOREP"/%s_%s_%s/gsyncd.conf",
+ priv->workdir, param->volinfo->volname,
+ slave_host, slave_vol);
+ conf_path[ret] = '\0';
+
+ ret = glusterd_get_gsync_status_mst_slv(param->volinfo,
+ slave, conf_path,
+ param->rsp_dict,
+ param->node);
+out:
+
+ if (errmsg)
+ GF_FREE (errmsg);
+
+ if (slave_buf)
+ GF_FREE(slave_buf);
+
+ gf_msg_debug (this->name, 0, "Returning %d.", ret);
+ return ret;
+}
+
+
+static int
+_get_max_gsync_slave_num (dict_t *dict, char *key, data_t *value, void *data)
+{
+ int tmp_slvnum = 0;
+ int *slvnum = (int *)data;
+
+ sscanf (key, "slave%d", &tmp_slvnum);
+ if (tmp_slvnum > *slvnum)
+ *slvnum = tmp_slvnum;
+
+ return 0;
+}
+
+static int
+_get_slave_idx_slave_voluuid (dict_t *dict, char *key, data_t *value,
+ void *data)
+{
+ char *slave_voluuid = NULL;
+ char *slave_info = NULL;
+ xlator_t *this = NULL;
+ struct slave_vol_config *slave_cfg = NULL;
+ int i = 0;
+ int ret = -1;
+ unsigned tmp_slvnum = 0;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("glusterd", this, out);
+
+ slave_cfg = data;
+
+ if (value)
+ slave_info = value->data;
+
+ if (!(slave_info) || strlen (slave_info) == 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_INVALID_SLAVE,
+ "Invalid slave in dict");
+ ret = -2;
+ goto out;
+ }
+
+ /* slave format:
+ * master_node_uuid:ssh://slave_host::slave_vol:slave_voluuid */
+ while (i++ < 5) {
+ slave_info = strchr (slave_info, ':');
+ if (slave_info)
+ slave_info++;
+ else {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SLAVE_VOL_PARSE_FAIL,
+ "slave_info becomes NULL!");
+ ret = -2;
+ goto out;
+ }
+ }
+ if (strcmp (slave_info, slave_cfg->slave_voluuid) == 0) {
+ gf_msg_debug (this->name, 0, "Same slave volume "
+ "already present %s",
+ slave_cfg->slave_voluuid);
+ ret = -1;
+
+ sscanf (key, "slave%d", &tmp_slvnum);
+ slave_cfg->old_slvidx = tmp_slvnum;
+
+ gf_msg_debug (this->name, 0, "and "
+ "its index is: %d", tmp_slvnum);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+glusterd_remove_slave_in_info (glusterd_volinfo_t *volinfo, char *slave,
+ char **op_errstr)
+{
+ int zero_slave_entries = _gf_true;
+ int ret = 0;
+ char *slavekey = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (slave);
+
+ do {
+ ret = glusterd_get_slave (volinfo, slave, &slavekey);
+ if (ret < 0 && zero_slave_entries) {
+ ret++;
+ goto out;
+ }
+ zero_slave_entries = _gf_false;
+ dict_del (volinfo->gsync_slaves, slavekey);
+ } while (ret >= 0);
+
+ ret = glusterd_store_volinfo (volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ *op_errstr = gf_strdup ("Failed to store the Volume"
+ "information");
+ goto out;
+ }
+ out:
+ gf_msg_debug (this->name, 0, "returning %d", ret);
+ return ret;
+
+}
+
+static int
+glusterd_gsync_get_uuid (char *slave, glusterd_volinfo_t *vol,
+ uuid_t uuid)
+{
+ int ret = 0;
+ char *slavekey = NULL;
+ char *slaveentry = NULL;
+ char *t = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (vol);
+ GF_ASSERT (slave);
+
+ ret = glusterd_get_slave (vol, slave, &slavekey);
+ if (ret < 0) {
+ /* XXX colliding cases of failure and non-extant
+ * slave... now just doing this as callers of this
+ * function can make sense only of -1 and 0 as retvals;
+ * getting at the proper semanticals will involve
+ * fixing callers as well.
+ */
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_str (vol->gsync_slaves, slavekey, &slaveentry);
+ GF_ASSERT (ret == 0);
+
+ t = strchr (slaveentry, ':');
+ GF_ASSERT (t);
+ *t = '\0';
+ ret = gf_uuid_parse (slaveentry, uuid);
+ *t = ':';
+
+ out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+static int
+update_slave_voluuid (dict_t *dict, char *key, data_t *value, void *data)
+{
+ char *slave = NULL;
+ char *slave_url = NULL;
+ char *slave_vol = NULL;
+ char *slave_host = NULL;
+ char *errmsg = NULL;
+ xlator_t *this = NULL;
+ int ret = -1;
+ char slv_url[VOLINFO_SLAVE_URL_MAX] = {0};
+ char slave_voluuid[GF_UUID_BUF_SIZE] = {0};
+ char *slave_info = NULL;
+ char *new_value = NULL;
+ char *same_key = NULL;
+ int cnt = 0;
+ gf_boolean_t *voluuid_updated = NULL;
+
+ this = THIS;
+
+ voluuid_updated = data;
+ slave_info = value->data;
+ gf_msg_debug (this->name, 0, "slave_info: %s!", slave_info);
+
+ /* old slave format:
+ * master_node_uuid:ssh://slave_host::slave_vol
+ * New slave format:
+ * master_node_uuid:ssh://slave_host::slave_vol:slave_voluuid */
+ while (slave_info) {
+ slave_info = strchr (slave_info, ':');
+ if (slave_info)
+ cnt++;
+ else
+ break;
+
+ slave_info++;
+ }
+
+ gf_msg_debug (this->name, 0, "cnt: %d", cnt);
+ /* check whether old slave format and update vol uuid if old format.
+ * With volume uuid, number of ':' is 5 and is 4 without.
+ */
+ if (cnt == 4) {
+ strncpy (slv_url, value->data, sizeof(slv_url));
+
+ ret = parse_slave_url (slv_url, &slave);
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SLAVE_VOL_PARSE_FAIL,
+ "Error in parsing slave: %s!", value->data);
+ goto out;
+ }
+
+ ret = glusterd_get_slave_info (slave, &slave_url,
+ &slave_host, &slave_vol, &errmsg);
+ if (ret) {
+ if (errmsg)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SLAVEINFO_FETCH_ERROR,
+ "Unable to fetch slave details. Error: %s",
+ errmsg);
+ else
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SLAVEINFO_FETCH_ERROR,
+ "Unable to fetch slave details.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_get_slave_voluuid (slave_host, slave_vol,
+ slave_voluuid);
+ if ((ret) || (strlen(slave_voluuid) == 0)) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REMOTE_VOL_UUID_FAIL,
+ "Unable to get remote volume uuid"
+ "slavehost:%s slavevol:%s",
+ slave_host, slave_vol);
+ /* Avoiding failure due to remote vol uuid fetch */
+ ret = 0;
+ goto out;
+ }
+ ret = gf_asprintf (&new_value, "%s:%s",
+ value->data, slave_voluuid);
+ ret = gf_asprintf (&same_key, "%s", key);
+
+ /* delete old key and add new value */
+ dict_del (dict, key);
+
+ /* set new value for the same key*/
+ ret = dict_set_dynstr (dict, same_key, new_value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REMOTE_VOL_UUID_FAIL,
+ "Error in setting dict value"
+ "new_value :%s", new_value);
+ goto out;
+ }
+ *voluuid_updated = _gf_true;
+ }
+
+ ret = 0;
+out:
+ if (errmsg)
+ GF_FREE (errmsg);
+
+ gf_msg_debug (this->name, 0, "Returning %d.", ret);
+ return ret;
+}
+
+static int
+glusterd_update_slave_voluuid_slaveinfo (glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ gf_boolean_t voluuid_updated = _gf_false;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("glusterd", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, volinfo, out);
+
+ ret = dict_foreach (volinfo->gsync_slaves, update_slave_voluuid,
+ &voluuid_updated);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REMOTE_VOL_UUID_FAIL, "Error in updating"
+ "volinfo");
+ goto out;
+ }
+
+ if (_gf_true == voluuid_updated) {
+ ret = glusterd_store_volinfo (volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLINFO_STORE_FAIL, "Error in storing"
+ "volinfo");
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_check_gsync_running_local (char *master, char *slave,
+ char *conf_path,
+ gf_boolean_t *is_run)
+{
+ int ret = -1;
+ int ret_status = 0;
+ gf_boolean_t is_template_in_use = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (master);
+ GF_ASSERT (slave);
+ GF_ASSERT (is_run);
+
+ *is_run = _gf_false;
+ ret = gsync_status (master, slave, conf_path,
+ &ret_status, &is_template_in_use);
+ if (ret == 0 && ret_status == 0)
+ *is_run = _gf_true;
+ else if (ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, GD_MSG_VALIDATE_FAILED,
+ GEOREP" validation failed");
+ goto out;
+ }
+ ret = 0;
+ out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+
+}
+
+static int
+glusterd_store_slave_in_info (glusterd_volinfo_t *volinfo, char *slave,
+ char *host_uuid, char *slave_voluuid,
+ char **op_errstr, gf_boolean_t is_force)
+{
+ int ret = 0;
+ int maxslv = 0;
+ char **linearr = NULL;
+ char *value = NULL;
+ char *slavekey = NULL;
+ char *slaveentry = NULL;
+ char key[512] = {0, };
+ char *t = NULL;
+ xlator_t *this = NULL;
+ struct slave_vol_config slave1 = {{0},};
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (slave);
+ GF_ASSERT (host_uuid);
+ GF_VALIDATE_OR_GOTO (this->name, slave_voluuid, out);
+
+ ret = glusterd_get_slave (volinfo, slave, &slavekey);
+ switch (ret) {
+ case -2:
+ ret = -1;
+ goto out;
+ case -1:
+ break;
+ default:
+ if (!is_force)
+ GF_ASSERT (ret > 0);
+ ret = dict_get_str (volinfo->gsync_slaves, slavekey, &slaveentry);
+ GF_ASSERT (ret == 0);
+
+ /* same-name + same-uuid slave entries should have been filtered
+ * out in glusterd_op_verify_gsync_start_options(), so we can
+ * assert an uuid mismatch
+ */
+ t = strtail (slaveentry, host_uuid);
+ if (!is_force)
+ GF_ASSERT (!t || *t != ':');
+
+ if (is_force) {
+ gf_msg_debug (this->name, 0, GEOREP" has already "
+ "been invoked for the %s (master) and "
+ "%s (slave). Allowing without saving "
+ "info again due to force command.",
+ volinfo->volname, slave);
+ ret = 0;
+ goto out;
+ }
+
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_INVOKE_ERROR,
+ GEOREP" has already been invoked for "
+ "the %s (master) and %s (slave) from a different "
+ "machine", volinfo->volname, slave);
+ *op_errstr = gf_strdup (GEOREP" already running in "
+ "another machine");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_urltransform_single (slave, "normalize", &linearr);
+ if (ret == -1)
+ goto out;
+
+ ret = gf_asprintf (&value, "%s:%s:%s", host_uuid,
+ linearr[0], slave_voluuid);
+
+ glusterd_urltransform_free (linearr, 1);
+ if (ret == -1)
+ goto out;
+
+ /* Given the slave volume uuid, check and get any existing slave */
+ strncpy (slave1.slave_voluuid, slave_voluuid, GF_UUID_BUF_SIZE);
+ ret = dict_foreach (volinfo->gsync_slaves,
+ _get_slave_idx_slave_voluuid, &slave1);
+
+ if (ret == 0) { /* New slave */
+ dict_foreach (volinfo->gsync_slaves, _get_max_gsync_slave_num,
+ &maxslv);
+ snprintf (key, 512, "slave%d", maxslv + 1);
+
+ ret = dict_set_dynstr (volinfo->gsync_slaves, key, value);
+ if (ret) {
+ GF_FREE (value);
+ goto out;
+ }
+ } else if (ret == -1) { /* Existing slave */
+ snprintf (key, 512, "slave%d", slave1.old_slvidx);
+
+ /* Delete present slave info(with old hostname) */
+ dict_del (volinfo->gsync_slaves, key);
+
+ gf_msg_debug (this->name, 0, "Replacing key:%s with new value"
+ ":%s", key, value);
+
+ /* Add new slave's value, with the same slave index */
+ ret = dict_set_dynstr (volinfo->gsync_slaves, key, value);
+ if (ret) {
+ GF_FREE (value);
+ goto out;
+ }
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REMOTE_VOL_UUID_FAIL,
+ "_get_slave_idx_slave_voluuid failed!");
+ GF_FREE (value);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_store_volinfo (volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ *op_errstr = gf_strdup ("Failed to store the Volume "
+ "information");
+ goto out;
+ }
+ ret = 0;
+ out:
+ return ret;
+}
+
+static int
+glusterd_op_verify_gsync_start_options (glusterd_volinfo_t *volinfo,
+ char *slave, char *conf_path,
+ char *statefile, char **op_errstr,
+ gf_boolean_t is_force)
+{
+ int ret = -1;
+ int ret_status = 0;
+ gf_boolean_t is_template_in_use = _gf_false;
+ char msg[2048] = {0};
+ uuid_t uuid = {0};
+ xlator_t *this = NULL;
+ struct stat stbuf = {0,};
+ char statefiledir[PATH_MAX] = {0,};
+ char *statedir = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (slave);
+ GF_ASSERT (op_errstr);
+ GF_ASSERT (conf_path);
+ GF_ASSERT (this && this->private);
+
+ if (GLUSTERD_STATUS_STARTED != volinfo->status) {
+ snprintf (msg, sizeof (msg), "Volume %s needs to be started "
+ "before "GEOREP" start", volinfo->volname);
+ goto out;
+ }
+
+ /* check session directory as statefile may not present
+ * during upgrade */
+ strncpy (statefiledir, statefile, sizeof(statefiledir));
+ statedir = dirname (statefiledir);
+
+ ret = sys_lstat (statedir, &stbuf);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Session between %s and %s has"
+ " not been created. Please create session and retry.",
+ volinfo->volname, slave);
+ gf_msg (this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED,
+ "%s statefile: %s", msg, statefile);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ /* Check if the gsync slave info is stored. If not
+ * session has not been created */
+ ret = glusterd_gsync_get_uuid (slave, volinfo, uuid);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Session between %s and %s has"
+ " not been created. Please create session and retry.",
+ volinfo->volname, slave);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SESSION_CREATE_ERROR,
+ "%s", msg);
+ goto out;
+ }
+
+ /*Check if the gsync is already started in cmd. inited host
+ * If so initiate add it into the glusterd's priv*/
+ ret = gsync_status (volinfo->volname, slave, conf_path,
+ &ret_status, &is_template_in_use);
+ if (ret == 0) {
+ if ((ret_status == 0) && !is_force) {
+ snprintf (msg, sizeof (msg), GEOREP " session between"
+ " %s & %s already started", volinfo->volname,
+ slave);
+ ret = -1;
+ goto out;
+ }
+ } else if (ret == -1) {
+ snprintf (msg, sizeof (msg), GEOREP" start option "
+ "validation failed ");
+ goto out;
+ }
+
+ if (is_template_in_use == _gf_true) {
+ snprintf (msg, sizeof (msg), GEOREP" start "
+ "failed : pid-file entry missing "
+ "in config file.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_verify_gsyncd_spawn (volinfo->volname, slave);
+ if (ret && !is_force) {
+ snprintf (msg, sizeof (msg), "Unable to spawn gsyncd");
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_GSYNCD_SPAWN_FAILED,
+ "%s", msg);
+ }
+out:
+ if (ret && (msg[0] != '\0')) {
+ *op_errstr = gf_strdup (msg);
+ }
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+void
+glusterd_check_geo_rep_configured (glusterd_volinfo_t *volinfo,
+ gf_boolean_t *flag)
+{
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (flag);
+
+ if (volinfo->gsync_slaves->count)
+ *flag = _gf_true;
+ else
+ *flag = _gf_false;
+
+ return;
+}
+
+/*
+ * is_geo_rep_active:
+ * This function reads the state_file and sets is_active to 1 if the
+ * monitor status is neither "Stopped" or "Created"
+ *
+ * RETURN VALUE:
+ * 0: On successful read of state_file.
+ * -1: error.
+ */
+
+static int
+is_geo_rep_active (glusterd_volinfo_t *volinfo, char *slave,
+ char *conf_path, int *is_active)
+{
+ dict_t *confd = NULL;
+ char *statefile = NULL;
+ char *master = NULL;
+ char monitor_status[PATH_MAX] = "";
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ master = volinfo->volname;
+
+ confd = dict_new ();
+ if (!confd) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL,
+ "Not able to create dict.");
+ goto out;
+ }
+
+ ret = glusterd_gsync_get_config (master, slave, conf_path,
+ confd);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GET_CONFIG_INFO_FAILED,
+ "Unable to get configuration data "
+ "for %s(master), %s(slave)", master, slave);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_param (confd, "state_file", &statefile);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get state_file's name "
+ "for %s(master), %s(slave). Please check gsync "
+ "config file.", master, slave);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_gsync_read_frm_status (statefile, monitor_status,
+ sizeof (monitor_status));
+ if (ret <= 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_STAT_FILE_READ_FAILED,
+ "Unable to read the status file for %s(master), "
+ "%s(slave)", master, slave);
+ strncpy (monitor_status, "defunct", sizeof (monitor_status));
+ }
+
+ if ((!strcmp(monitor_status, "Stopped")) ||
+ (!strcmp(monitor_status, "Created"))) {
+ *is_active = 0;
+ } else {
+ *is_active = 1;
+ }
+ ret = 0;
+out:
+ if (confd)
+ dict_destroy (confd);
+ return ret;
+}
+
+/*
+ * _get_slave_status:
+ * Called for each slave in the volume from dict_foreach.
+ * It calls is_geo_rep_active to get the monitor status.
+ *
+ * RETURN VALUE:
+ * 0: On successful read of state_file from is_geo_rep_active.
+ * When it is found geo-rep is already active from previous calls.
+ * When there is no slave.
+ * -1: On error.
+ */
+
+int
+_get_slave_status (dict_t *dict, char *key, data_t *value, void *data)
+{
+ gsync_status_param_t *param = NULL;
+ char *slave = NULL;
+ char *slave_url = NULL;
+ char *slave_vol = NULL;
+ char *slave_host = NULL;
+ char *errmsg = NULL;
+ char conf_path[PATH_MAX] = "";
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ param = (gsync_status_param_t *)data;
+
+ GF_ASSERT (param);
+ GF_ASSERT (param->volinfo);
+ if (param->is_active) {
+ ret = 0;
+ goto out;
+ }
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ if (priv == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GLUSTERD_PRIV_NOT_FOUND,
+ "priv of glusterd not present");
+ goto out;
+ }
+
+ slave = strchr (value->data, ':');
+ if (!slave) {
+ ret = 0;
+ goto out;
+ }
+ slave++;
+
+ ret = glusterd_get_slave_info (slave, &slave_url,
+ &slave_host, &slave_vol, &errmsg);
+ if (ret) {
+ if (errmsg)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SLAVEINFO_FETCH_ERROR, "Unable to fetch"
+ " slave details. Error: %s", errmsg);
+ else
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SLAVEINFO_FETCH_ERROR,
+ "Unable to fetch slave details.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = snprintf (conf_path, sizeof(conf_path) - 1,
+ "%s/"GEOREP"/%s_%s_%s/gsyncd.conf",
+ priv->workdir, param->volinfo->volname,
+ slave_host, slave_vol);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_CONF_PATH_ASSIGN_FAILED,
+ "Unable to assign conf_path.");
+ ret = -1;
+ goto out;
+ }
+ conf_path[ret] = '\0';
+
+ ret = is_geo_rep_active (param->volinfo,slave, conf_path,
+ &param->is_active);
+out:
+ GF_FREE(errmsg);
+ return ret;
+}
+
+/* glusterd_check_geo_rep_running:
+ * Checks if any geo-rep session is running for the volume.
+ *
+ * RETURN VALUE:
+ * Sets param.active to true if any geo-rep session is active.
+ * This function sets op_errstr during some error and when any geo-rep
+ * session is active. It is caller's responsibility to free op_errstr
+ * in above cases.
+ */
+
+int
+glusterd_check_geo_rep_running (gsync_status_param_t *param, char **op_errstr)
+{
+ char msg[2048] = {0,};
+ gf_boolean_t enabled = _gf_false;
+ int ret = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (param);
+ GF_ASSERT (param->volinfo);
+ GF_ASSERT (op_errstr);
+
+ glusterd_check_geo_rep_configured (param->volinfo, &enabled);
+
+ if (enabled) {
+ ret = dict_foreach (param->volinfo->gsync_slaves,
+ _get_slave_status, param);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SLAVEINFO_FETCH_ERROR,
+ "_get_slave_satus failed");
+ snprintf (msg, sizeof(msg), GEOREP" Unable to"
+ " get the status of active "GEOREP""
+ " session for the volume '%s'.\n"
+ " Please check the log file for"
+ " more info.", param->volinfo->volname);
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+ goto out;
+ }
+
+ if (param->is_active) {
+ snprintf (msg, sizeof(msg), GEOREP" sessions"
+ " are active for the volume %s.\nStop"
+ " "GEOREP " sessions involved in this"
+ " volume. Use 'volume "GEOREP
+ " status' command for more info.",
+ param->volinfo->volname);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+ }
+ out:
+ return ret;
+}
+
+static int
+glusterd_op_verify_gsync_running (glusterd_volinfo_t *volinfo,
+ char *slave, char *conf_path,
+ char **op_errstr)
+{
+ int pfd = -1;
+ int ret = -1;
+ char msg[2048] = {0};
+ char pidfile[PATH_MAX] = {0,};
+ gf_boolean_t is_template_in_use = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (THIS && THIS->private);
+ GF_ASSERT (volinfo);
+ GF_ASSERT (slave);
+ GF_ASSERT (conf_path);
+ GF_ASSERT (op_errstr);
+
+ if (GLUSTERD_STATUS_STARTED != volinfo->status) {
+ snprintf (msg, sizeof (msg), "Volume %s needs to be started "
+ "before "GEOREP" start", volinfo->volname);
+
+ goto out;
+ }
+
+ pfd = gsyncd_getpidfile (volinfo->volname, slave, pidfile,
+ conf_path, &is_template_in_use);
+ if (pfd == -2) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_VALIDATE_FAILED,
+ GEOREP" stop validation failed for %s & %s",
+ volinfo->volname, slave);
+ ret = -1;
+ goto out;
+ }
+ if (gsync_status_byfd (pfd) == -1) {
+ snprintf (msg, sizeof (msg), GEOREP" session b/w %s & %s is "
+ "not running on this node.", volinfo->volname,
+ slave);
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_SESSION_INACTIVE,
+ "%s", msg);
+ ret = -1;
+ /* monitor gsyncd already dead */
+ goto out;
+ }
+
+ if (is_template_in_use) {
+ snprintf (msg, sizeof (msg), "pid-file entry missing in "
+ "the config file(%s).", conf_path);
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_PIDFILE_NOT_FOUND,
+ "%s", msg);
+ ret = -1;
+ goto out;
+ }
+
+ if (pfd < 0)
+ goto out;
+
+ ret = 0;
+out:
+ if (ret && (msg[0] != '\0')) {
+ *op_errstr = gf_strdup (msg);
+ }
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_verify_gsync_status_opts (dict_t *dict, char **op_errstr)
+{
+ char *slave = NULL;
+ char *volname = NULL;
+ char errmsg[PATH_MAX] = {0, };
+ gf_boolean_t exists = _gf_false;
+ glusterd_volinfo_t *volinfo = NULL;
+ int ret = 0;
+ char *conf_path = NULL;
+ char *slave_url = NULL;
+ char *slave_host = NULL;
+ char *slave_vol = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ if (THIS)
+ priv = THIS->private;
+ if (priv == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GLUSTERD_PRIV_NOT_FOUND,
+ "priv of glusterd not present");
+ *op_errstr = gf_strdup ("glusterd defunct");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "master", &volname);
+ if (ret < 0) {
+ ret = 0;
+ goto out;
+ }
+
+ exists = glusterd_check_volume_exists (volname);
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if ((ret) || (!exists)) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, GD_MSG_VOL_NOT_FOUND,
+ "volume name does not exist");
+ snprintf (errmsg, sizeof(errmsg), "Volume name %s does not"
+ " exist", volname);
+ *op_errstr = gf_strdup (errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "slave", &slave);
+ if (ret < 0) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = glusterd_get_slave_details_confpath (volinfo, dict, &slave_url,
+ &slave_host, &slave_vol,
+ &conf_path, op_errstr);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SLAVEINFO_FETCH_ERROR,
+ "Unable to fetch slave or confpath details.");
+ ret = -1;
+ goto out;
+ }
+
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+
+int
+glusterd_op_gsync_args_get (dict_t *dict, char **op_errstr,
+ char **master, char **slave, char **host_uuid)
+{
+
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+
+ if (master) {
+ ret = dict_get_str (dict, "master", master);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_DICT_GET_FAILED, "master not found");
+ *op_errstr = gf_strdup ("master not found");
+ goto out;
+ }
+ }
+
+ if (slave) {
+ ret = dict_get_str (dict, "slave", slave);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_DICT_GET_FAILED, "slave not found");
+ *op_errstr = gf_strdup ("slave not found");
+ goto out;
+ }
+ }
+
+ if (host_uuid) {
+ ret = dict_get_str (dict, "host-uuid", host_uuid);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_DICT_GET_FAILED, "host_uuid not found");
+ *op_errstr = gf_strdup ("host_uuid not found");
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_op_stage_sys_exec (dict_t *dict, char **op_errstr)
+{
+ char errmsg[PATH_MAX] = "";
+ char *command = NULL;
+ char command_path[PATH_MAX] = "";
+ struct stat st = {0,};
+ int ret = -1;
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ if (conf->op_version < 2) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_UNSUPPORTED_VERSION,
+ "Op Version not supported.");
+ snprintf (errmsg, sizeof(errmsg), "One or more nodes do not"
+ " support the required op version.");
+ *op_errstr = gf_strdup (errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "command", &command);
+ if (ret) {
+ strcpy (errmsg, "internal error");
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get command from dict");
+ goto out;
+ }
+
+ /* enforce local occurrence of the command */
+ if (strchr (command, '/')) {
+ strcpy (errmsg, "invalid command name");
+ ret = -1;
+ goto out;
+ }
+
+ sprintf (command_path, GSYNCD_PREFIX"/peer_%s", command);
+ /* check if it's executable */
+ ret = sys_access (command_path, X_OK);
+ if (!ret)
+ /* check if it's a regular file */
+ ret = sys_stat (command_path, &st);
+ if (!ret && !S_ISREG (st.st_mode))
+ ret = -1;
+
+out:
+ if (ret) {
+ if (errmsg[0] == '\0') {
+ if (command)
+ snprintf (errmsg, sizeof (errmsg),
+ "gsync peer_%s command not found.",
+ command);
+ else
+ snprintf (errmsg, sizeof (errmsg), "%s",
+ "gsync peer command was not "
+ "specified");
+ }
+ *op_errstr = gf_strdup (errmsg);
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_PEER_CMD_ERROR,
+ "%s", errmsg);
+ }
+
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_op_stage_copy_file (dict_t *dict, char **op_errstr)
+{
+ char abs_filename[PATH_MAX] = "";
+ char errmsg[PATH_MAX] = "";
+ char *filename = NULL;
+ char *host_uuid = NULL;
+ char uuid_str [64] = {0};
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ struct stat stbuf = {0,};
+ xlator_t *this = NULL;
+ char workdir[PATH_MAX] = {0,};
+ char realpath_filename[PATH_MAX] = {0,};
+ char realpath_workdir[PATH_MAX] = {0,};
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ if (THIS)
+ priv = THIS->private;
+ if (priv == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GLUSTERD_PRIV_NOT_FOUND,
+ "priv of glusterd not present");
+ *op_errstr = gf_strdup ("glusterd defunct");
+ goto out;
+ }
+
+ if (priv->op_version < 2) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_UNSUPPORTED_VERSION,
+ "Op Version not supported.");
+ snprintf (errmsg, sizeof(errmsg), "One or more nodes do not"
+ " support the required op version.");
+ *op_errstr = gf_strdup (errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "host-uuid", &host_uuid);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch host-uuid from dict.");
+ goto out;
+ }
+
+ uuid_utoa_r (MY_UUID, uuid_str);
+ if (!strcmp (uuid_str, host_uuid)) {
+ ret = dict_get_str (dict, "source", &filename);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch filename from dict.");
+ *op_errstr = gf_strdup ("command unsuccessful");
+ goto out;
+ }
+ snprintf (abs_filename, sizeof(abs_filename),
+ "%s/%s", priv->workdir, filename);
+
+ if (!realpath (priv->workdir, realpath_workdir)) {
+ snprintf (errmsg, sizeof (errmsg), "Failed to get "
+ "realpath of %s: %s", priv->workdir,
+ strerror (errno));
+ *op_errstr = gf_strdup (errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ if (!realpath (abs_filename, realpath_filename)) {
+ snprintf (errmsg, sizeof (errmsg), "Failed to get "
+ "realpath of %s: %s", filename,
+ strerror (errno));
+ *op_errstr = gf_strdup (errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ /* Add Trailing slash to workdir, without slash strncmp
+ will succeed for /var/lib/glusterd_bad */
+ snprintf (workdir, sizeof(workdir), "%s/", realpath_workdir);
+
+ /* Protect against file copy outside $workdir */
+ if (strncmp (workdir, realpath_filename, strlen (workdir))) {
+ snprintf (errmsg, sizeof (errmsg), "Source file"
+ " is outside of %s directory", priv->workdir);
+ *op_errstr = gf_strdup (errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ ret = sys_lstat (abs_filename, &stbuf);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg), "Source file"
+ " does not exist in %s", priv->workdir);
+ *op_errstr = gf_strdup (errmsg);
+ goto out;
+ }
+
+ if (!S_ISREG(stbuf.st_mode)) {
+ snprintf (errmsg, sizeof (errmsg), "Source file"
+ " is not a regular file.");
+ *op_errstr = gf_strdup (errmsg);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SRC_FILE_ERROR,
+ "%s", errmsg);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_get_statefile_name (glusterd_volinfo_t *volinfo, char *slave,
+ char *conf_path, char **statefile,
+ gf_boolean_t *is_template_in_use)
+{
+ char *master = NULL;
+ char *buf = NULL;
+ char *working_conf_path = NULL;
+ char temp_conf_path[PATH_MAX] = "";
+ dict_t *confd = NULL;
+ glusterd_conf_t *priv = NULL;
+ int ret = -1;
+ struct stat stbuf = {0,};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (this->private);
+ GF_ASSERT (volinfo);
+ GF_ASSERT (conf_path);
+ GF_ASSERT (is_template_in_use);
+
+ master = volinfo->volname;
+
+ confd = dict_new ();
+ if (!confd) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL,
+ "Unable to create new dict");
+ goto out;
+ }
+
+ priv = THIS->private;
+
+ snprintf (temp_conf_path, sizeof(temp_conf_path) - 1,
+ "%s/"GSYNC_CONF_TEMPLATE, priv->workdir);
+
+ ret = sys_lstat (conf_path, &stbuf);
+ if (!ret) {
+ gf_msg (this->name, GF_LOG_INFO, 0, GD_MSG_CONFIG_INFO,
+ "Using passed config template(%s).",
+ conf_path);
+ working_conf_path = conf_path;
+ } else {
+ gf_msg (this->name, GF_LOG_WARNING, ENOENT,
+ GD_MSG_FILE_OP_FAILED,
+ "Config file (%s) missing. Looking for template config"
+ " file (%s)", conf_path, temp_conf_path);
+ ret = sys_lstat (temp_conf_path, &stbuf);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOENT,
+ GD_MSG_FILE_OP_FAILED, "Template "
+ "config file (%s) missing.", temp_conf_path);
+ goto out;
+ }
+ gf_msg (this->name, GF_LOG_INFO, 0, GD_MSG_DEFAULT_TEMP_CONFIG,
+ "Using default config template(%s).", temp_conf_path);
+ working_conf_path = temp_conf_path;
+ *is_template_in_use = _gf_true;
+ }
+
+fetch_data:
+ ret = glusterd_gsync_get_config (master, slave, working_conf_path,
+ confd);
+ if (ret) {
+ if (*is_template_in_use == _gf_false) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GET_CONFIG_INFO_FAILED,
+ "Unable to get configuration data "
+ "for %s(master), %s(slave). "
+ "Trying template config.",
+ master, slave);
+ working_conf_path = temp_conf_path;
+ *is_template_in_use = _gf_true;
+ goto fetch_data;
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GET_CONFIG_INFO_FAILED,
+ "Unable to get configuration data "
+ "for %s(master), %s(slave) from "
+ "template config",
+ master, slave);
+ goto out;
+ }
+ }
+
+ ret = dict_get_param (confd, "state_file", &buf);
+ if (ret) {
+ if (*is_template_in_use == _gf_false) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to get state_file's name. "
+ "Trying template config.");
+ working_conf_path = temp_conf_path;
+ *is_template_in_use = _gf_true;
+ goto fetch_data;
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GET_STATEFILE_NAME_FAILED,
+ "Unable to get state_file's "
+ "name from template.");
+ goto out;
+ }
+ }
+
+ ret = 0;
+ out:
+ if (buf) {
+ *statefile = gf_strdup(buf);
+ if (!*statefile)
+ ret = -1;
+ }
+
+ if (confd)
+ dict_destroy (confd);
+
+ gf_msg_debug (this->name, 0, "Returning %d ", ret);
+ return ret;
+}
+
+int
+glusterd_create_status_file (char *master, char *slave, char *slave_host,
+ char *slave_vol, char *status)
+{
+ int ret = -1;
+ runner_t runner = {0,};
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ if (THIS)
+ priv = THIS->private;
+ if (priv == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GLUSTERD_PRIV_NOT_FOUND,
+ "priv of glusterd not present");
+ goto out;
+ }
+
+ if (!status) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_STATUS_NULL,
+ "Status Empty");
+ goto out;
+ }
+ gf_msg_debug (this->name, 0, "slave = %s", slave);
+
+ runinit (&runner);
+ runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "--create",
+ status, "-c", NULL);
+ runner_argprintf (&runner, "%s/"GEOREP"/%s_%s_%s/gsyncd.conf",
+ priv->workdir, master, slave_host, slave_vol);
+ runner_argprintf (&runner, "--iprefix=%s", DATADIR);
+ runner_argprintf (&runner, ":%s", master);
+ runner_add_args (&runner, slave, NULL);
+ synclock_unlock (&priv->big_lock);
+ ret = runner_run (&runner);
+ synclock_lock (&priv->big_lock);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_STATUSFILE_CREATE_FAILED,
+ "Creating status file failed.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ gf_msg_debug (this->name, 0, "returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_verify_slave (char *volname, char *slave_url, char *slave_vol,
+ int ssh_port, char **op_errstr,
+ gf_boolean_t *is_force_blocker)
+{
+ int32_t ret = -1;
+ runner_t runner = {0,};
+ char log_file_path[PATH_MAX] = "";
+ char buf[PATH_MAX] = "";
+ char *tmp = NULL;
+ char *slave_url_buf = NULL;
+ char *save_ptr = NULL;
+ char *slave_user = NULL;
+ char *slave_ip = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (volname);
+ GF_ASSERT (slave_url);
+ GF_ASSERT (slave_vol);
+
+ /* Fetch the slave_user and slave_ip from the slave_url.
+ * If the slave_user is not present. Use "root"
+ */
+ if (strstr(slave_url, "@")) {
+ slave_url_buf = gf_strdup (slave_url);
+ if (!slave_url_buf)
+ goto out;
+
+ slave_user = strtok_r (slave_url_buf, "@", &save_ptr);
+ slave_ip = strtok_r (NULL, "@", &save_ptr);
+ } else {
+ slave_user = "root";
+ slave_ip = slave_url;
+ }
+
+ if (!slave_user || !slave_ip) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVE_URL_INVALID,
+ "Invalid slave url.");
+ goto out;
+ }
+
+ snprintf (log_file_path, sizeof(log_file_path),
+ DEFAULT_LOG_FILE_DIRECTORY"/create_verify_log");
+
+ runinit (&runner);
+ runner_add_args (&runner, GSYNCD_PREFIX"/gverify.sh", NULL);
+ runner_argprintf (&runner, "%s", volname);
+ runner_argprintf (&runner, "%s", slave_user);
+ runner_argprintf (&runner, "%s", slave_ip);
+ runner_argprintf (&runner, "%s", slave_vol);
+ runner_argprintf (&runner, "%d", ssh_port);
+ runner_argprintf (&runner, "%s", log_file_path);
+ gf_msg_debug (this->name, 0, "gverify Args = %s %s %s %s %s %s %s",
+ runner.argv[0], runner.argv[1], runner.argv[2],
+ runner.argv[3], runner.argv[4], runner.argv[5],
+ runner.argv[6]);
+ runner_redir (&runner, STDOUT_FILENO, RUN_PIPE);
+ synclock_unlock (&priv->big_lock);
+ ret = runner_run (&runner);
+ synclock_lock (&priv->big_lock);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_SLAVE,
+ "Not a valid slave");
+ ret = glusterd_gsync_read_frm_status (log_file_path,
+ buf, sizeof(buf));
+ if (ret <= 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_READ_ERROR,
+ "Unable to read from %s", log_file_path);
+ goto out;
+ }
+
+ /* Tokenize the error message from gverify.sh to figure out
+ * if the error is a force blocker or not. */
+ tmp = strtok_r (buf, "|", &save_ptr);
+ if (!strcmp (tmp, "FORCE_BLOCKER"))
+ *is_force_blocker = 1;
+ else {
+ /* No FORCE_BLOCKER flag present so all that is
+ * present is the error message. */
+ *is_force_blocker = 0;
+ if (tmp)
+ *op_errstr = gf_strdup (tmp);
+ ret = -1;
+ goto out;
+ }
+
+ /* Copy rest of the error message to op_errstr */
+ tmp = strtok_r (NULL, "|", &save_ptr);
+ if (tmp)
+ *op_errstr = gf_strdup (tmp);
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+out:
+ GF_FREE (slave_url_buf);
+ sys_unlink (log_file_path);
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/** @slave_ip remains unmodified */
+int
+glusterd_geo_rep_parse_slave (char *slave_url,
+ char **hostname, char **op_errstr)
+{
+ int ret = -1;
+ char *tmp = NULL;
+ char *save_ptr = NULL;
+ char *host = NULL;
+ char errmsg[PATH_MAX] = "";
+ char *saved_url = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (slave_url);
+ GF_ASSERT (*slave_url);
+
+ saved_url = gf_strdup (slave_url);
+ if (!saved_url)
+ goto out;
+
+ /* Checking if hostname has user specified */
+ host = strstr (saved_url, "@");
+ if (!host) { /* no user specified */
+ if (hostname) {
+ *hostname = gf_strdup (saved_url);
+ if (!*hostname)
+ goto out;
+ }
+
+ ret = 0;
+ goto out;
+ } else {
+ /* Moving the host past the '@' and checking if the
+ * actual hostname also has '@' */
+ host++;
+ if (strstr (host, "@")) {
+ gf_msg_debug (this->name, 0, "host = %s", host);
+ ret = snprintf (errmsg, sizeof(errmsg) - 1,
+ "Invalid Hostname (%s).", host);
+ errmsg[ret] = '\0';
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "%s", errmsg);
+ ret = -1;
+ if (op_errstr)
+ *op_errstr = gf_strdup (errmsg);
+ goto out;
+ }
+
+ ret = -1;
+
+ /**
+ * preliminary check for valid slave format.
+ */
+ tmp = strtok_r (saved_url, "@", &save_ptr);
+ tmp = strtok_r (NULL, "@", &save_ptr);
+ if (!tmp)
+ goto out;
+ if (hostname) {
+ *hostname = gf_strdup (tmp);
+ if (!*hostname)
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ GF_FREE (saved_url);
+ if (ret)
+ if (hostname)
+ GF_FREE (*hostname);
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* Return -1 only if there is a match in volume uuid */
+static int
+get_slavehost_from_voluuid (dict_t *dict, char *key, data_t *value, void *data)
+{
+ char *slave_voluuid = NULL;
+ char *slave_info = NULL;
+ char *tmp = NULL;
+ char tmp_char = 0;
+ char *slave_host = NULL;
+ xlator_t *this = NULL;
+ struct slave_vol_config *slave_vol = NULL;
+ int i = 0;
+ int ret = -1;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("glusterd", this, out);
+
+ slave_vol = data;
+ slave_info = value->data;
+
+ gf_msg_debug (this->name, 0, "slave_info:%s !", slave_info);
+
+ if (!(slave_info) || strlen (slave_info) == 0) {
+ /* no slaves present, peace */
+ ret = 0;
+ goto out;
+ }
+
+ /* slave format:
+ * master_node_uuid:ssh://slave_host::slave_vol:slave_voluuid */
+ while (i++ < 5) {
+ slave_info = strchr (slave_info, ':');
+ if (slave_info)
+ slave_info++;
+ else
+ break;
+ }
+
+ if (!(slave_info) || strlen(slave_info) == 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SLAVE_VOL_PARSE_FAIL,
+ "slave_info format is wrong!");
+ ret = -2;
+ goto out;
+ } else {
+ if (strcmp (slave_info, slave_vol->slave_voluuid) == 0) {
+ ret = -1;
+
+ /* get corresponding slave host for reference*/
+ slave_host = value->data;
+ slave_host = strstr (slave_host, "://");
+ if (slave_host)
+ slave_host += 3;
+
+ /* To go past username in non-root geo-rep session */
+ tmp = strchr (slave_host, '@');
+ if (tmp) {
+ strncpy (slave_vol->old_slvuser, slave_host,
+ (tmp - slave_host));
+ slave_vol->old_slvuser[(tmp - slave_host) + 1]
+ = '\0';
+ slave_host = tmp + 1;
+ } else
+ strcpy (slave_vol->old_slvuser, "root");
+
+ tmp = strchr (slave_host, ':');
+ if (!tmp) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SLAVE_VOL_PARSE_FAIL,
+ "Invalid slave_host!");
+ ret = -2;
+ goto out;
+ }
+
+ strncpy (slave_vol->old_slvhost, slave_host,
+ (tmp - slave_host));
+ slave_vol->old_slvhost[(tmp - slave_host) + 1] = '\0';
+
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/* Given slave host and slave volume, check whether slave volume uuid
+ * already present.
+ * If slave volume uuid is present, get corresponding slave host
+ * for reference */
+static int
+glusterd_get_slavehost_from_voluuid (glusterd_volinfo_t *volinfo,
+ char *slave_host, char *slave_vol,
+ struct slave_vol_config *slave1)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ GF_VALIDATE_OR_GOTO (this->name, volinfo, out);
+
+ ret = dict_foreach (volinfo->gsync_slaves, get_slavehost_from_voluuid,
+ slave1);
+out:
+ return ret;
+}
+
+int
+glusterd_op_stage_gsync_create (dict_t *dict, char **op_errstr)
+{
+ char *down_peerstr = NULL;
+ char *slave = NULL;
+ char *volname = NULL;
+ char *host_uuid = NULL;
+ char *statefile = NULL;
+ char *slave_url = NULL;
+ char *slave_host = NULL;
+ char *slave_vol = NULL;
+ char *conf_path = NULL;
+ char errmsg[PATH_MAX] = "";
+ char common_pem_file[PATH_MAX] = "";
+ char hook_script[PATH_MAX] = "";
+ char uuid_str [64] = "";
+ int ret = -1;
+ int is_pem_push = -1;
+ int ssh_port = 22;
+ gf_boolean_t is_force = -1;
+ gf_boolean_t is_no_verify = -1;
+ gf_boolean_t is_force_blocker = -1;
+ gf_boolean_t exists = _gf_false;
+ gf_boolean_t is_template_in_use = _gf_false;
+ glusterd_conf_t *conf = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ struct stat stbuf = {0,};
+ xlator_t *this = NULL;
+ char *georep_session_wrkng_dir = NULL;
+ struct slave_vol_config slave1 = {{0},};
+ int type = 0;
+ char old_slave_url[SLAVE_URL_INFO_MAX] = {0};
+ char old_confpath[PATH_MAX] = {0};
+ gf_boolean_t is_running = _gf_false;
+ int ret_status = 0;
+ char *statedir = NULL;
+ char statefiledir[PATH_MAX] = {0,};
+ gf_boolean_t is_different_slavehost = _gf_false;
+ gf_boolean_t is_different_username = _gf_false;
+ char *slave_user = NULL;
+ char *save_ptr = NULL;
+ char *slave_url_buf = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ ret = glusterd_op_gsync_args_get (dict, op_errstr, &volname,
+ &slave, &host_uuid);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_ARG_FETCH_ERROR,
+ "Unable to fetch arguments");
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return -1;
+ }
+
+ if (conf->op_version < 2) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_UNSUPPORTED_VERSION,
+ "Op Version not supported.");
+ snprintf (errmsg, sizeof(errmsg), "One or more nodes do not"
+ " support the required op version.");
+ *op_errstr = gf_strdup (errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ exists = glusterd_check_volume_exists (volname);
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if ((ret) || (!exists)) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, GD_MSG_VOL_NOT_FOUND,
+ "volume name does not exist");
+ snprintf (errmsg, sizeof(errmsg), "Volume name %s does not"
+ " exist", volname);
+ *op_errstr = gf_strdup (errmsg);
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return -1;
+ }
+
+ ret = glusterd_get_slave_details_confpath (volinfo, dict, &slave_url,
+ &slave_host, &slave_vol,
+ &conf_path, op_errstr);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SLAVEINFO_FETCH_ERROR,
+ "Unable to fetch slave or confpath details.");
+ ret = -1;
+ goto out;
+ }
+
+ is_force = dict_get_str_boolean (dict, "force", _gf_false);
+
+ uuid_utoa_r (MY_UUID, uuid_str);
+ if (!strcmp (uuid_str, host_uuid)) {
+ ret = glusterd_are_vol_all_peers_up (volinfo,
+ &conf->peers,
+ &down_peerstr);
+ if ((ret == _gf_false) && !is_force) {
+ snprintf (errmsg, sizeof (errmsg), "Peer %s,"
+ " which is a part of %s volume, is"
+ " down. Please bring up the peer and"
+ " retry.", down_peerstr,
+ volinfo->volname);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_PEER_DISCONNECTED,
+ "%s", errmsg);
+ *op_errstr = gf_strdup (errmsg);
+ GF_FREE (down_peerstr);
+ down_peerstr = NULL;
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return -1;
+ } else if (ret == _gf_false) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_PEER_DISCONNECTED,
+ "Peer %s, which is a part of %s volume, is"
+ " down. Force creating geo-rep session."
+ " On bringing up the peer, re-run"
+ " \"gluster system:: execute"
+ " gsec_create\" and \"gluster volume"
+ " geo-replication %s %s create push-pem"
+ " force\"", down_peerstr, volinfo->volname,
+ volinfo->volname, slave);
+ GF_FREE (down_peerstr);
+ down_peerstr = NULL;
+ }
+
+ ret = dict_get_int32 (dict, "ssh_port", &ssh_port);
+ if (ret < 0 && ret != -ENOENT) {
+ snprintf (errmsg, sizeof (errmsg),
+ "Fetching ssh_port failed while "
+ "handling "GEOREP" options");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "%s", errmsg);
+ goto out;
+ }
+
+ is_no_verify = dict_get_str_boolean (dict, "no_verify", _gf_false);
+
+ if (!is_no_verify) {
+ /* Checking if slave host is pingable, has proper passwordless
+ * ssh login setup, slave volume is created, slave vol is empty,
+ * and if it has enough memory and bypass in case of force if
+ * the error is not a force blocker */
+ ret = glusterd_verify_slave (volname, slave_url, slave_vol,
+ ssh_port, op_errstr,
+ &is_force_blocker);
+ if (ret) {
+ if (is_force && !is_force_blocker) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_INVALID_SLAVE,
+ "%s is not a valid slave "
+ "volume. Error: %s. Force "
+ "creating geo-rep"
+ " session.", slave,
+ *op_errstr);
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_INVALID_SLAVE,
+ "%s is not a valid slave "
+ "volume. Error: %s",
+ slave, *op_errstr);
+ ret = -1;
+
+ goto out;
+ }
+ }
+ }
+
+ ret = dict_get_int32 (dict, "push_pem", &is_pem_push);
+ if (!ret && is_pem_push) {
+ ret = snprintf (common_pem_file,
+ sizeof(common_pem_file) - 1,
+ "%s"GLUSTERD_COMMON_PEM_PUB_FILE,
+ conf->workdir);
+ common_pem_file[ret] = '\0';
+
+ ret = snprintf (hook_script, sizeof(hook_script) - 1,
+ "%s"GLUSTERD_CREATE_HOOK_SCRIPT,
+ conf->workdir);
+ hook_script[ret] = '\0';
+
+ ret = sys_lstat (common_pem_file, &stbuf);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg), "%s"
+ " required for push-pem is"
+ " not present. Please run"
+ " \"gluster system:: execute"
+ " gsec_create\"", common_pem_file);
+ gf_msg (this->name, GF_LOG_ERROR, ENOENT,
+ GD_MSG_FILE_OP_FAILED,
+ "%s", errmsg);
+ *op_errstr = gf_strdup (errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ ret = sys_lstat (hook_script, &stbuf);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg),
+ "The hook-script (%s) required "
+ "for push-pem is not present. "
+ "Please install the hook-script "
+ "and retry", hook_script);
+ gf_msg (this->name, GF_LOG_ERROR, ENOENT,
+ GD_MSG_FILE_OP_FAILED, "%s", errmsg);
+ *op_errstr = gf_strdup (errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ if (!S_ISREG(stbuf.st_mode)) {
+ snprintf (errmsg, sizeof (errmsg), "%s"
+ " required for push-pem is"
+ " not a regular file. Please run"
+ " \"gluster system:: execute"
+ " gsec_create\"", common_pem_file);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REG_FILE_MISSING, "%s", errmsg);
+ ret = -1;
+ goto out;
+ }
+ }
+ }
+
+ ret = glusterd_get_statefile_name (volinfo, slave,
+ conf_path, &statefile,
+ &is_template_in_use);
+ if (ret) {
+ if (!strstr(slave, "::"))
+ snprintf (errmsg, sizeof (errmsg),
+ "%s is not a valid slave url.", slave);
+ else
+ snprintf (errmsg, sizeof (errmsg), "Please check gsync "
+ "config file. Unable to get statefile's name");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_STATEFILE_NAME_NOT_FOUND,
+ "%s", errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "statefile", statefile);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to store statefile path");
+ goto out;
+ }
+
+ strncpy (statefiledir, statefile, sizeof(statefiledir));
+ statedir = dirname (statefiledir);
+
+ ret = sys_lstat (statedir, &stbuf);
+ if (!ret && !is_force) {
+ snprintf (errmsg, sizeof (errmsg), "Session between %s"
+ " and %s is already created.",
+ volinfo->volname, slave);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SESSION_ALREADY_EXIST,
+ "%s", errmsg);
+ ret = -1;
+ goto out;
+ } else if (!ret)
+ gf_msg (this->name, GF_LOG_INFO, 0, GD_MSG_FORCE_CREATE_SESSION,
+ "Session between %s and %s is already created. Force"
+ " creating again.", volinfo->volname, slave);
+
+ ret = glusterd_get_slave_voluuid (slave_host, slave_vol,
+ slave1.slave_voluuid);
+ if ((ret) || (strlen(slave1.slave_voluuid) == 0)) {
+ snprintf (errmsg, sizeof (errmsg),
+ "Unable to get remote volume uuid.");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REMOTE_VOL_UUID_FAIL, "%s", errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc (dict, "slave_voluuid",
+ slave1.slave_voluuid);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set slave volume uuid in the dict");
+ goto out;
+ }
+
+ /* Check whether session is already created using slave volume uuid */
+ ret = glusterd_get_slavehost_from_voluuid (volinfo, slave_host,
+ slave_vol, &slave1);
+ if (ret == -1) {
+ if (!is_force) {
+ snprintf (errmsg, sizeof (errmsg), "Session between %s"
+ " and %s:%s is already created! Cannot create "
+ "with new slave:%s again!",
+ volinfo->volname, slave1.old_slvhost,
+ slave_vol, slave_host);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_FORCE_CREATE_SESSION, "Session between"
+ " %s and %s:%s is already created! "
+ "Cannot create with new slave:%s again!",
+ volinfo->volname, slave1.old_slvhost,
+ slave_vol, slave_host);
+ goto out;
+ }
+
+ /* Now, check whether session is already started.If so, warn!*/
+ is_different_slavehost =
+ (strcmp (slave_host, slave1.old_slvhost) != 0)
+ ? _gf_true : _gf_false;
+
+ if (strstr (slave_url, "@")) {
+ slave_url_buf = gf_strdup (slave_url);
+ if (!slave_url_buf) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY,
+ "Unable to allocate memory");
+ ret = -1;
+ goto out;
+ }
+ slave_user = strtok_r (slave_url_buf, "@", &save_ptr);
+ } else
+ slave_user = "root";
+ is_different_username =
+ (strcmp (slave_user, slave1.old_slvuser) != 0)
+ ? _gf_true : _gf_false;
+
+ /* Do the check, only if different slave host/slave user */
+ if (is_different_slavehost || is_different_username) {
+ (void) snprintf (old_confpath, sizeof(old_confpath) - 1,
+ "%s/"GEOREP"/%s_%s_%s/gsyncd.conf",
+ conf->workdir, volinfo->volname,
+ slave1.old_slvhost, slave_vol);
+
+ /* construct old slave url with (old) slave host */
+ (void) snprintf (old_slave_url,
+ sizeof(old_slave_url) - 1,
+ "%s::%s", slave1.old_slvhost,
+ slave_vol);
+
+ ret = glusterd_check_gsync_running_local (volinfo->volname,
+ old_slave_url, old_confpath, &is_running);
+ if (_gf_true == is_running) {
+ (void) snprintf (errmsg, sizeof(errmsg), "Geo"
+ "-replication session between %s and %s"
+ " is still active. Please stop the "
+ "session and retry.",
+ volinfo->volname, old_slave_url);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = dict_set_dynstr_with_alloc (dict, "old_slavehost",
+ slave1.old_slvhost);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to set old_slavehost in the dict");
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, "existing_session", _gf_true);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to set existing_session in the dict");
+ goto out;
+ }
+ } else if (ret == -2) {
+ snprintf (errmsg, sizeof (errmsg), "get_slavehost_from_voluuid"
+ " failed %s %s!!", slave_host, slave_vol);
+ gf_msg (this->name, GF_LOG_INFO, 0, GD_MSG_FORCE_CREATE_SESSION,
+ "get_slavehost_from_voluuid failed %s %s!!",
+ slave_host, slave_vol);
+ goto out;
+ }
+
+ ret = glusterd_verify_gsyncd_spawn (volinfo->volname, slave);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg), "Unable to spawn gsyncd.");
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_GSYNCD_SPAWN_FAILED,
+ "%s", errmsg);
+ goto out;
+ }
+
+ ret = 0;
+out:
+
+ if (ret && errmsg[0] != '\0')
+ *op_errstr = gf_strdup (errmsg);
+ if (slave_url_buf)
+ GF_FREE (slave_url_buf);
+
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* pre-condition check for geo-rep pause/resume.
+ * Return: 0 on success
+ * -1 on any check failed.
+ */
+static int
+gd_pause_resume_validation (int type, glusterd_volinfo_t *volinfo,
+ char *slave, char *statefile, char **op_errstr)
+{
+ int ret = 0;
+ char errmsg[PATH_MAX] = {0,};
+ char monitor_status[NAME_MAX] = {0,};
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (slave);
+ GF_ASSERT (statefile);
+ GF_ASSERT (op_errstr);
+
+ ret = glusterd_gsync_read_frm_status (statefile, monitor_status,
+ sizeof (monitor_status));
+ if (ret <= 0) {
+ snprintf (errmsg, sizeof(errmsg), "Pause check Failed:"
+ " Geo-rep session is not setup");
+ ret = -1;
+ goto out;
+ }
+
+ if ( type == GF_GSYNC_OPTION_TYPE_PAUSE &&
+ strstr (monitor_status, "Paused")) {
+ snprintf (errmsg, sizeof(errmsg), "Geo-replication"
+ " session between %s and %s already Paused.",
+ volinfo->volname, slave);
+ ret = -1;
+ goto out;
+ }
+ if ( type == GF_GSYNC_OPTION_TYPE_RESUME &&
+ !strstr (monitor_status, "Paused")) {
+ snprintf (errmsg, sizeof(errmsg), "Geo-replication"
+ " session between %s and %s is not Paused.",
+ volinfo->volname, slave);
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+out:
+ if (ret && (errmsg[0] != '\0')) {
+ *op_errstr = gf_strdup (errmsg);
+ }
+ return ret;
+}
+
+int
+glusterd_op_stage_gsync_set (dict_t *dict, char **op_errstr)
+{
+ int ret = 0;
+ int type = 0;
+ int pfd = -1;
+ char *volname = NULL;
+ char *slave = NULL;
+ char *slave_url = NULL;
+ char *slave_host = NULL;
+ char *slave_vol = NULL;
+ char *down_peerstr = NULL;
+ char *statefile = NULL;
+ char statefiledir[PATH_MAX] = {0,};
+ char *statedir = NULL;
+ char *path_list = NULL;
+ char *conf_path = NULL;
+ gf_boolean_t exists = _gf_false;
+ glusterd_volinfo_t *volinfo = NULL;
+ char errmsg[PATH_MAX] = {0,};
+ char pidfile[PATH_MAX] = {0,};
+ dict_t *ctx = NULL;
+ gf_boolean_t is_force = 0;
+ gf_boolean_t is_running = _gf_false;
+ gf_boolean_t is_template_in_use = _gf_false;
+ uuid_t uuid = {0};
+ char uuid_str [64] = {0};
+ char *host_uuid = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ struct stat stbuf = {0,};
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ ret = dict_get_int32 (dict, "type", &type);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED,
+ "command type not found");
+ *op_errstr = gf_strdup ("command unsuccessful");
+ goto out;
+ }
+
+ if (type == GF_GSYNC_OPTION_TYPE_STATUS) {
+ ret = glusterd_verify_gsync_status_opts (dict, op_errstr);
+ goto out;
+ }
+
+ ret = glusterd_op_gsync_args_get (dict, op_errstr,
+ &volname, &slave, &host_uuid);
+ if (ret)
+ goto out;
+
+ uuid_utoa_r (MY_UUID, uuid_str);
+
+ if (conf->op_version < 2) {
+ snprintf (errmsg, sizeof(errmsg), "One or more nodes do not"
+ " support the required op version.");
+ ret = -1;
+ goto out;
+ }
+
+ exists = glusterd_check_volume_exists (volname);
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if ((ret) || (!exists)) {
+ snprintf (errmsg, sizeof(errmsg), "Volume name %s does not"
+ " exist", volname);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_get_slave_details_confpath (volinfo, dict, &slave_url,
+ &slave_host, &slave_vol,
+ &conf_path, op_errstr);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SLAVEINFO_FETCH_ERROR,
+ "Unable to fetch slave or confpath details.");
+ ret = -1;
+ goto out;
+ }
+
+ is_force = dict_get_str_boolean (dict, "force", _gf_false);
+
+ ret = glusterd_get_statefile_name (volinfo, slave,
+ conf_path, &statefile,
+ &is_template_in_use);
+ if (ret) {
+ if (!strstr(slave, "::")) {
+ snprintf (errmsg, sizeof(errmsg),
+ "%s is not a valid slave url.", slave);
+ ret = -1;
+ goto out;
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SLAVE_URL_INVALID,
+ "state_file entry missing in config file (%s)",
+ conf_path);
+
+ if ((type == GF_GSYNC_OPTION_TYPE_STOP) && is_force) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_STOP_FORCE, "Allowing stop "
+ "force to bypass missing statefile "
+ "entry in config file (%s), and "
+ "template file", conf_path);
+ ret = 0;
+ } else
+ goto out;
+ }
+ } else {
+ ret = dict_set_str (dict, "statefile", statefile);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to store statefile path");
+ goto out;
+ }
+ }
+
+ /* Allowing stop force to bypass the statefile check
+ * as this command acts as a fail safe method to stop geo-rep
+ * session. */
+ if (!((type == GF_GSYNC_OPTION_TYPE_STOP) && is_force)) {
+
+ /* check session directory as statefile may not present
+ * during upgrade */
+ strncpy (statefiledir, statefile, sizeof(statefiledir));
+ statedir = dirname (statefiledir);
+
+ ret = sys_lstat (statedir, &stbuf);
+ if (ret) {
+ snprintf (errmsg, sizeof(errmsg), "Geo-replication"
+ " session between %s and %s does not exist.",
+ volinfo->volname, slave);
+ gf_msg (this->name, GF_LOG_ERROR, ENOENT,
+ GD_MSG_FILE_OP_FAILED,
+ "%s. statefile = %s", errmsg, statefile);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ /* Check if all peers that are a part of the volume are up or not */
+ if ((type == GF_GSYNC_OPTION_TYPE_DELETE) ||
+ ((type == GF_GSYNC_OPTION_TYPE_STOP) && !is_force) ||
+ (type == GF_GSYNC_OPTION_TYPE_PAUSE) ||
+ (type == GF_GSYNC_OPTION_TYPE_RESUME)) {
+ if (!strcmp (uuid_str, host_uuid)) {
+ ret = glusterd_are_vol_all_peers_up (volinfo,
+ &conf->peers,
+ &down_peerstr);
+ if (ret == _gf_false) {
+ snprintf (errmsg, sizeof (errmsg), "Peer %s,"
+ " which is a part of %s volume, is"
+ " down. Please bring up the peer and"
+ " retry.", down_peerstr,
+ volinfo->volname);
+ ret = -1;
+ GF_FREE (down_peerstr);
+ down_peerstr = NULL;
+ goto out;
+ }
+ }
+ }
+
+ switch (type) {
+ case GF_GSYNC_OPTION_TYPE_START:
+ if (is_template_in_use) {
+ snprintf (errmsg, sizeof(errmsg), "state-file entry "
+ "missing in the config file(%s).",
+ conf_path);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_op_verify_gsync_start_options (volinfo, slave,
+ conf_path,
+ statefile,
+ op_errstr, is_force);
+ if (ret)
+ goto out;
+ ctx = glusterd_op_get_ctx();
+ if (ctx) {
+ /* gsyncd does a fuse mount to start
+ * the geo-rep session */
+ if (!glusterd_is_fuse_available ()) {
+ gf_msg ("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_GEO_REP_START_FAILED, "Unable "
+ "to open /dev/fuse (%s), "
+ "geo-replication start failed",
+ strerror (errno));
+ snprintf (errmsg, sizeof(errmsg),
+ "fuse unvailable");
+ ret = -1;
+ goto out;
+ }
+ }
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_STOP:
+ if (!is_force) {
+ if (is_template_in_use) {
+ snprintf (errmsg, sizeof(errmsg),
+ "state-file entry missing in "
+ "the config file(%s).", conf_path);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_op_verify_gsync_running (volinfo, slave,
+ conf_path,
+ op_errstr);
+ if (ret) {
+ ret = glusterd_get_local_brickpaths (volinfo,
+ &path_list);
+ if (path_list)
+ ret = -1;
+ }
+ }
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_PAUSE:
+ case GF_GSYNC_OPTION_TYPE_RESUME:
+ if (is_template_in_use) {
+ snprintf (errmsg, sizeof(errmsg),
+ "state-file entry missing in "
+ "the config file(%s).", conf_path);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_op_verify_gsync_running (volinfo, slave,
+ conf_path, op_errstr);
+ if (ret) {
+ ret = glusterd_get_local_brickpaths (volinfo,
+ &path_list);
+ if (path_list) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (!is_force) {
+ ret = gd_pause_resume_validation (type, volinfo, slave,
+ statefile, op_errstr);
+ if (ret) {
+ ret = glusterd_get_local_brickpaths (volinfo,
+ &path_list);
+ if (path_list) {
+ ret = -1;
+ goto out;
+ }
+ }
+ }
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_CONFIG:
+ if (is_template_in_use) {
+ snprintf (errmsg, sizeof(errmsg), "state-file entry "
+ "missing in the config file(%s).",
+ conf_path);
+ ret = -1;
+ goto out;
+ }
+
+ pfd = gsyncd_getpidfile (volname, slave, pidfile,
+ conf_path, &is_template_in_use);
+ if (is_template_in_use) {
+ snprintf (errmsg, sizeof(errmsg), "pid-file entry "
+ "missing in the config file(%s).",
+ conf_path);
+ ret = -1;
+ goto out;
+ }
+
+ ret = gsync_verify_config_options (dict, op_errstr, volname);
+ goto out;
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_DELETE:
+ /* Check if the gsync session is still running
+ * If so ask the user to stop geo-replication first.*/
+ if (is_template_in_use) {
+ snprintf (errmsg, sizeof(errmsg), "state-file entry "
+ "missing in the config file(%s).",
+ conf_path);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_gsync_get_uuid (slave, volinfo, uuid);
+ if (ret) {
+ snprintf (errmsg, sizeof(errmsg), "Geo-replication"
+ " session between %s and %s does not exist.",
+ volinfo->volname, slave);
+ ret = -1;
+ goto out;
+ } else {
+ ret = glusterd_check_gsync_running_local (volinfo->volname,
+ slave, conf_path,
+ &is_running);
+ if (_gf_true == is_running) {
+ snprintf (errmsg, sizeof (errmsg), GEOREP
+ " session between %s & %s is "
+ "still active. Please stop the "
+ "session and retry.",
+ volinfo->volname, slave);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = glusterd_verify_gsyncd_spawn (volinfo->volname, slave);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg),
+ "Unable to spawn gsyncd");
+ }
+
+ break;
+ }
+
+out:
+ if (path_list)
+ GF_FREE (path_list);
+
+ if (ret && errmsg[0] != '\0') {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_GSYNCD_ERROR,
+ "%s", errmsg);
+ *op_errstr = gf_strdup (errmsg);
+ }
+
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+static int
+gd_pause_or_resume_gsync (dict_t *dict, char *master, char *slave,
+ char *slave_host, char *slave_vol, char *conf_path,
+ char **op_errstr, gf_boolean_t is_pause)
+{
+ int32_t ret = 0;
+ int pfd = -1;
+ pid_t pid = 0;
+ char pidfile[PATH_MAX] = {0,};
+ char errmsg[PATH_MAX] = "";
+ char buf [1024] = {0,};
+ int i = 0;
+ gf_boolean_t is_template_in_use = _gf_false;
+ char monitor_status[NAME_MAX] = {0,};
+ char *statefile = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (master);
+ GF_ASSERT (slave);
+ GF_ASSERT (slave_host);
+ GF_ASSERT (slave_vol);
+ GF_ASSERT (conf_path);
+
+ pfd = gsyncd_getpidfile (master, slave, pidfile,
+ conf_path, &is_template_in_use);
+ if (pfd == -2) {
+ snprintf (errmsg, sizeof(errmsg),
+ "pid-file entry mising in config file and "
+ "template config file.");
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_PIDFILE_NOT_FOUND,
+ "%s", errmsg);
+ *op_errstr = gf_strdup (errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ if (gsync_status_byfd (pfd) == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_GSYNCD_ERROR,
+ "gsyncd b/w %s & %s is not running", master, slave);
+ /* monitor gsyncd already dead */
+ goto out;
+ }
+
+ if (pfd < 0)
+ goto out;
+
+ /* Prepare to update status file*/
+ ret = dict_get_str (dict, "statefile", &statefile);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Pause/Resume Failed: Unable to fetch statefile path");
+ goto out;
+ }
+ ret = glusterd_gsync_read_frm_status (statefile, monitor_status,
+ sizeof (monitor_status));
+ if (ret <= 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_STAT_FILE_READ_FAILED, "Pause/Resume Failed: "
+ "Unable to read status file for %s(master)"
+ " %s(slave)", master, slave);
+ goto out;
+ }
+
+ ret = sys_read (pfd, buf, 1024);
+ if (ret > 0) {
+ pid = strtol (buf, NULL, 10);
+ if (is_pause) {
+ ret = kill (-pid, SIGSTOP);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_PID_KILL_FAIL, "Failed"
+ " to pause gsyncd. Error: %s",
+ strerror (errno));
+ goto out;
+ }
+ /*On pause force, if status is already paused
+ do not update status again*/
+ if (strstr (monitor_status, "Paused"))
+ goto out;
+
+ ret = glusterd_create_status_file ( master, slave,
+ slave_host, slave_vol,
+ "Paused");
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_UPDATE_STATEFILE_FAILED,
+ "Unable to update state_file."
+ " Error : %s", strerror (errno));
+ /* If status cannot be updated resume back */
+ if (kill (-pid, SIGCONT)) {
+ snprintf (errmsg, sizeof(errmsg),
+ "Pause successful but could "
+ "not update status file. "
+ "Please use 'resume force' to"
+ " resume back and retry pause"
+ " to reflect in status");
+ gf_msg (this->name, GF_LOG_ERROR,
+ errno,
+ GD_MSG_PID_KILL_FAIL,
+ "Resume back Failed. Error:"
+ "%s", strerror (errno));
+ *op_errstr = gf_strdup (errmsg);
+ }
+ goto out;
+ }
+ } else {
+ ret = glusterd_create_status_file (master, slave,
+ slave_host,
+ slave_vol,
+ "Started");
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_UPDATE_STATEFILE_FAILED,
+ "Resume Failed: Unable to update "
+ "state_file. Error : %s",
+ strerror (errno));
+ goto out;
+ }
+ ret = kill (-pid, SIGCONT);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_PID_KILL_FAIL,
+ "Resumed Failed: Unable to send"
+ " SIGCONT. Error: %s",
+ strerror (errno));
+ /* Process can't be resumed, update status
+ * back to paused. */
+ ret = glusterd_create_status_file (master,
+ slave,
+ slave_host,
+ slave_vol,
+ monitor_status);
+ if (ret) {
+ snprintf (errmsg, sizeof(errmsg),
+ "Resume failed!!! Status "
+ "inconsistent. Please use "
+ "'resume force' to resume and"
+ " reach consistent state");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_STATUS_UPDATE_FAILED,
+ "Updating status back to paused"
+ " Failed. Error: %s",
+ strerror (errno));
+ *op_errstr = gf_strdup (errmsg);
+ }
+ goto out;
+ }
+ }
+ }
+ ret = 0;
+
+out:
+ sys_close (pfd);
+ return ret;
+}
+
+static int
+stop_gsync (char *master, char *slave, char **msg,
+ char *conf_path, char **op_errstr,
+ gf_boolean_t is_force)
+{
+ int32_t ret = 0;
+ int pfd = -1;
+ pid_t pid = 0;
+ char pidfile[PATH_MAX] = {0,};
+ char errmsg[PATH_MAX] = "";
+ char buf[1024] = {0,};
+ int i = 0;
+ gf_boolean_t is_template_in_use = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (this->private);
+
+ pfd = gsyncd_getpidfile (master, slave, pidfile,
+ conf_path, &is_template_in_use);
+ if (pfd == -2) {
+ snprintf (errmsg, sizeof(errmsg) - 1,
+ "pid-file entry mising in config file and "
+ "template config file.");
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_PIDFILE_NOT_FOUND,
+ "%s", errmsg);
+ *op_errstr = gf_strdup (errmsg);
+ ret = -1;
+ goto out;
+ }
+ if (gsync_status_byfd (pfd) == -1 && !is_force) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_GSYNCD_ERROR,
+ "gsyncd b/w %s & %s is not running", master,
+ slave);
+ /* monitor gsyncd already dead */
+ goto out;
+ }
+
+ if (pfd < 0)
+ goto out;
+
+ ret = sys_read (pfd, buf, 1024);
+ if (ret > 0) {
+ pid = strtol (buf, NULL, 10);
+ ret = kill (-pid, SIGTERM);
+ if (ret && !is_force) {
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ GD_MSG_PID_KILL_FAIL,
+ "failed to kill gsyncd");
+ goto out;
+ }
+ for (i = 0; i < 20; i++) {
+ if (gsync_status_byfd (pfd) == -1) {
+ /* monitor gsyncd is dead but worker may
+ * still be alive, give some more time
+ * before SIGKILL (hack)
+ */
+ usleep (50000);
+ break;
+ }
+ usleep (50000);
+ }
+ kill (-pid, SIGKILL);
+ sys_unlink (pidfile);
+ }
+ ret = 0;
+
+out:
+ sys_close (pfd);
+
+ return ret;
+}
+
+/*
+ * glusterd_gsync_op_already_set:
+ * This funcion checks whether the op_value is same as in the
+ * gsyncd.conf file.
+ *
+ * RETURN VALUE:
+ * 0 : op_value matches the conf file.
+ * 1 : op_value does not matches the conf file or op_param not
+ * found in conf file.
+ * -1 : error
+ */
+
+int
+glusterd_gsync_op_already_set (char* master, char* slave, char* conf_path,
+ char* op_name, char* op_value)
+{
+ dict_t *confd = NULL;
+ char *op_val_buf = NULL;
+ int32_t op_val_conf = 0;
+ int32_t op_val_cli = 0;
+ int32_t ret = -1;
+ gf_boolean_t is_bool = _gf_true;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ confd = dict_new ();
+ if (!confd) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL,
+ "Not able to create dict.");
+ return -1;
+ }
+
+ ret = glusterd_gsync_get_config (master, slave, conf_path,
+ confd);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GET_CONFIG_INFO_FAILED,
+ "Unable to get configuration data for %s(master), "
+ "%s(slave)", master, slave);
+ goto out;
+ }
+
+ ret = dict_get_param (confd, op_name, &op_val_buf);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get op_value for %s(master), %s(slave). "
+ "Please check gsync config file.", master, slave);
+ ret = 1;
+ goto out;
+ }
+
+ gf_msg_debug (this->name, 0, "val_cli:%s val_conf:%s", op_value,
+ op_val_buf);
+
+ if (!strcmp(op_val_buf,"true") || !strcmp(op_val_buf,"1")
+ || !strcmp(op_val_buf,"yes")) {
+ op_val_conf = 1;
+ } else if(!strcmp(op_val_buf,"false") || !strcmp(op_val_buf,"0")
+ || !strcmp(op_val_buf,"no")) {
+ op_val_conf = 0;
+ } else {
+ is_bool = _gf_false;
+ }
+
+ if (is_bool) {
+ if (!strcmp(op_value,"true") || !strcmp(op_value,"1")
+ || !strcmp(op_value,"yes")) {
+ op_val_cli = 1;
+ } else {
+ op_val_cli = 0;
+ }
+
+ if ( op_val_cli == op_val_conf ) {
+ ret = 0;
+ goto out;
+ }
+ } else {
+ if (!strcmp(op_val_buf,op_value)) {
+ ret = 0;
+ goto out;
+ }
+ }
+
+ ret = 1;
+
+out:
+ dict_unref(confd);
+ return ret;
+}
+
+static int
+glusterd_gsync_configure (glusterd_volinfo_t *volinfo, char *slave,
+ char *path_list, dict_t *dict,
+ dict_t *resp_dict, char **op_errstr)
+{
+ int32_t ret = -1;
+ char *op_name = NULL;
+ char *op_value = NULL;
+ runner_t runner = {0,};
+ glusterd_conf_t *priv = NULL;
+ char *subop = NULL;
+ char *master = NULL;
+ char *conf_path = NULL;
+ char *slave_host = NULL;
+ char *slave_vol = NULL;
+ struct stat stbuf = {0, };
+ gf_boolean_t restart_required = _gf_true;
+ char **resopt = NULL;
+ gf_boolean_t op_already_set = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (slave);
+ GF_ASSERT (op_errstr);
+ GF_ASSERT (dict);
+ GF_ASSERT (resp_dict);
+
+ ret = dict_get_str (dict, "subop", &subop);
+ if (ret != 0)
+ goto out;
+
+ if (strcmp (subop, "get") == 0 || strcmp (subop, "get-all") == 0) {
+ /* deferred to cli */
+ gf_msg_debug (this->name, 0, "Returning 0");
+ return 0;
+ }
+
+ ret = dict_get_str (dict, "op_name", &op_name);
+ if (ret != 0)
+ goto out;
+
+ if (strtail (subop, "set")) {
+ ret = dict_get_str (dict, "op_value", &op_value);
+ if (ret != 0)
+ goto out;
+ }
+
+ priv = THIS->private;
+ if (priv == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GLUSTERD_PRIV_NOT_FOUND,
+ "priv of glusterd not present");
+ *op_errstr = gf_strdup ("glusterd defunct");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "conf_path", &conf_path);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch conf file path.");
+ goto out;
+ }
+
+ master = "";
+ runinit (&runner);
+ runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "-c", NULL);
+ runner_argprintf (&runner, "%s", conf_path);
+ runner_argprintf (&runner, "--iprefix=%s", DATADIR);
+ if (volinfo) {
+ master = volinfo->volname;
+ runner_argprintf (&runner, ":%s", master);
+ }
+ runner_add_arg (&runner, slave);
+ runner_argprintf (&runner, "--config-%s", subop);
+ runner_add_arg (&runner, op_name);
+ if (op_value)
+ runner_add_arg (&runner, op_value);
+
+ if ( strcmp(op_name,"checkpoint") != 0 && strtail (subop, "set")) {
+ ret = glusterd_gsync_op_already_set(master,slave,conf_path,
+ op_name,op_value);
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_GSYNCD_OP_SET_FAILED,
+ "glusterd_gsync_op_already_set failed.");
+ gf_asprintf (op_errstr, GEOREP" config-%s failed for "
+ "%s %s", subop, master, slave);
+ goto out;
+ }
+ if (ret == 0) {
+ gf_msg_debug (this->name, 0, "op_value is already set");
+ op_already_set = _gf_true;
+ goto out;
+ }
+ }
+
+ synclock_unlock (&priv->big_lock);
+ ret = runner_run (&runner);
+ synclock_lock (&priv->big_lock);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, GD_MSG_GSYNCD_ERROR,
+ "gsyncd failed to %s %s option for "
+ "%s %s peers", subop, op_name, master,
+ slave);
+
+ gf_asprintf (op_errstr, GEOREP" config-%s failed for %s %s",
+ subop, master, slave);
+
+ goto out;
+ }
+
+ if ((!strcmp (op_name, "state_file")) && (op_value)) {
+
+ ret = sys_lstat (op_value, &stbuf);
+ if (ret) {
+ ret = dict_get_str (dict, "slave_host", &slave_host);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch slave host.");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "slave_vol", &slave_vol);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch slave volume name.");
+ goto out;
+ }
+
+ ret = glusterd_create_status_file (volinfo->volname,
+ slave, slave_host,
+ slave_vol,
+ "Switching Status "
+ "File");
+ if (ret || sys_lstat (op_value, &stbuf)) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED, "Unable to "
+ "create %s. Error : %s", op_value,
+ strerror (errno));
+ ret = -1;
+ goto out;
+ }
+ }
+ }
+
+ ret = 0;
+ gf_asprintf (op_errstr, "config-%s successful", subop);
+
+out:
+ if (!ret && volinfo && !op_already_set) {
+ for (resopt = gsync_no_restart_opts; *resopt; resopt++) {
+ restart_required = _gf_true;
+ if (!strcmp ((*resopt), op_name)){
+ restart_required = _gf_false;
+ break;
+ }
+ }
+
+ if (restart_required) {
+ ret = glusterd_check_restart_gsync_session (volinfo, slave,
+ resp_dict, path_list,
+ conf_path, 0);
+ if (ret)
+ *op_errstr = gf_strdup ("internal error");
+ }
+ }
+
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_gsync_read_frm_status (char *path, char *buf, size_t blen)
+{
+ int ret = 0;
+ int status_fd = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (path);
+ GF_ASSERT (buf);
+ status_fd = open (path, O_RDONLY);
+ if (status_fd == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED,
+ "Unable to read gsyncd status file %s", path);
+ return -1;
+ }
+ ret = sys_read (status_fd, buf, blen - 1);
+ if (ret > 0) {
+ size_t len = strnlen (buf, ret);
+ /* Ensure there is a NUL byte and that it's not the first. */
+ if (len == 0 || len == blen - 1) {
+ ret = -1;
+ } else {
+ char *p = buf + len - 1;
+ while (isspace (*p))
+ *p-- = '\0';
+ }
+ } else if (ret == 0)
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_GSYNCD_ERROR,
+ "Status file of gsyncd is empty");
+ else /* ret < 0 */
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_GSYNCD_ERROR,
+ "Status file of gsyncd is corrupt");
+
+ sys_close (status_fd);
+ return ret;
+}
+
+static int
+dict_get_param (dict_t *dict, char *key, char **param)
+{
+ char *dk = NULL;
+ char *s = NULL;
+ char x = '\0';
+ int ret = 0;
+
+ if (dict_get_str (dict, key, param) == 0)
+ return 0;
+
+ dk = gf_strdup (key);
+ if (!dk)
+ return -1;
+
+ s = strpbrk (dk, "-_");
+ if (!s) {
+ ret = -1;
+ goto out;
+ }
+ x = (*s == '-') ? '_' : '-';
+ *s++ = x;
+ while ((s = strpbrk (s, "-_")))
+ *s++ = x;
+
+ ret = dict_get_str (dict, dk, param);
+out:
+ GF_FREE (dk);
+ return ret;
+}
+
+int
+glusterd_fetch_values_from_config (char *master, char *slave,
+ char *confpath, dict_t *confd,
+ char **statefile,
+ char **georep_session_wrkng_dir,
+ char **socketfile)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = glusterd_gsync_get_config (master, slave, confpath,
+ confd);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GET_CONFIG_INFO_FAILED,
+ "Unable to get configuration data for %s(master), "
+ "%s(slave)", master, slave);
+ goto out;
+ }
+
+ if (statefile) {
+ ret = dict_get_param (confd, "state_file", statefile);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to get state_file's name "
+ "for %s(master), %s(slave). "
+ "Please check gsync config file.",
+ master, slave);
+ goto out;
+ }
+ }
+
+ if (georep_session_wrkng_dir) {
+ ret = dict_get_param (confd, "georep_session_working_dir",
+ georep_session_wrkng_dir);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to get geo-rep session's "
+ "working directory name for %s(master), "
+ "%s(slave). Please check gsync config file.",
+ master, slave);
+ goto out;
+ }
+ }
+
+ if (socketfile) {
+ ret = dict_get_param (confd, "state_socket_unencoded",
+ socketfile);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to get socket file's name "
+ "for %s(master), %s(slave). "
+ "Please check gsync config file.",
+ master, slave);
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_read_status_file (glusterd_volinfo_t *volinfo, char *slave,
+ char *conf_path, dict_t *dict, char *node)
+{
+ char brick_state_file[PATH_MAX] = "";
+ char brick_path[PATH_MAX] = "";
+ char temp_conf_path[PATH_MAX] = "";
+ char *working_conf_path = NULL;
+ char *georep_session_wrkng_dir = NULL;
+ char *master = NULL;
+ char tmp[1024] = "";
+ char sts_val_name[1024] = "";
+ char monitor_status[NAME_MAX] = "";
+ char *statefile = NULL;
+ char *socketfile = NULL;
+ dict_t *confd = NULL;
+ char *slavekey = NULL;
+ char *slaveentry = NULL;
+ char *slaveuser = NULL;
+ char *saveptr = NULL;
+ char *temp = NULL;
+ char *temp_inp = NULL;
+ char *brick_host_uuid = NULL;
+ int brick_host_uuid_length = 0;
+ int gsync_count = 0;
+ int i = 0;
+ int ret = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ gf_gsync_status_t *sts_val = NULL;
+ gf_boolean_t is_template_in_use = _gf_false;
+ glusterd_conf_t *priv = NULL;
+ struct stat stbuf = {0,};
+ dict_t *statusd = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (this->private);
+ GF_ASSERT (volinfo);
+ GF_ASSERT (conf_path);
+
+ master = volinfo->volname;
+
+ confd = dict_new ();
+ if (!confd) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL,
+ "Not able to create dict.");
+ return -1;
+ }
+
+ priv = THIS->private;
+
+ snprintf (temp_conf_path, sizeof(temp_conf_path) - 1,
+ "%s/"GSYNC_CONF_TEMPLATE, priv->workdir);
+
+ ret = sys_lstat (conf_path, &stbuf);
+ if (!ret) {
+ gf_msg (this->name, GF_LOG_INFO, 0, GD_MSG_CONFIG_INFO,
+ "Using passed config template(%s).",
+ conf_path);
+ working_conf_path = conf_path;
+ } else {
+ gf_msg (this->name, GF_LOG_WARNING, ENOENT,
+ GD_MSG_FILE_OP_FAILED,
+ "Config file (%s) missing. Looking for template "
+ "config file (%s)", conf_path, temp_conf_path);
+ ret = sys_lstat (temp_conf_path, &stbuf);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOENT,
+ GD_MSG_FILE_OP_FAILED, "Template "
+ "config file (%s) missing.", temp_conf_path);
+ goto out;
+ }
+ gf_msg (this->name, GF_LOG_INFO, 0, GD_MSG_DEFAULT_TEMP_CONFIG,
+ "Using default config template(%s).", temp_conf_path);
+ working_conf_path = temp_conf_path;
+ is_template_in_use = _gf_true;
+ }
+
+fetch_data:
+ ret = glusterd_fetch_values_from_config (master, slave,
+ working_conf_path,
+ confd,
+ &statefile,
+ &georep_session_wrkng_dir,
+ &socketfile);
+ if (ret) {
+ if (is_template_in_use == _gf_false) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_FETCH_CONFIG_VAL_FAILED,
+ "Unable to fetch config values "
+ "for %s(master), %s(slave). "
+ "Trying default config template",
+ master, slave);
+ working_conf_path = temp_conf_path;
+ is_template_in_use = _gf_true;
+ goto fetch_data;
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_FETCH_CONFIG_VAL_FAILED, "Unable to "
+ "fetch config values for %s(master), "
+ "%s(slave)", master, slave);
+ goto out;
+ }
+ }
+
+ ret = glusterd_gsync_read_frm_status (statefile, monitor_status,
+ sizeof (monitor_status));
+ if (ret <= 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_STAT_FILE_READ_FAILED,
+ "Unable to read the status file for %s(master), "
+ "%s(slave) statefile: %s", master, slave,
+ statefile);
+ strncpy (monitor_status, "defunct", sizeof (monitor_status));
+ }
+
+ ret = dict_get_int32 (dict, "gsync-count", &gsync_count);
+ if (ret)
+ gsync_count = 0;
+
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ if (gf_uuid_compare (brickinfo->uuid, MY_UUID))
+ continue;
+
+ sts_val = GF_CALLOC (1, sizeof(gf_gsync_status_t),
+ gf_common_mt_gsync_status_t);
+ if (!sts_val) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY,
+ "Out Of Memory");
+ goto out;
+ }
+
+ /* Slave Key */
+ ret = glusterd_get_slave (volinfo, slave, &slavekey);
+ if (ret < 0) {
+ GF_FREE (sts_val);
+ goto out;
+ }
+ memcpy (sts_val->slavekey, slavekey, strlen(slavekey));
+ sts_val->slavekey[strlen(slavekey)] = '\0';
+
+ /* Master Volume */
+ memcpy (sts_val->master, master, strlen(master));
+ sts_val->master[strlen(master)] = '\0';
+
+ /* Master Brick Node */
+ memcpy (sts_val->node, brickinfo->hostname,
+ strlen(brickinfo->hostname));
+ sts_val->node[strlen(brickinfo->hostname)] = '\0';
+
+ /* Master Brick Path */
+ memcpy (sts_val->brick, brickinfo->path,
+ strlen(brickinfo->path));
+ sts_val->brick[strlen(brickinfo->path)] = '\0';
+
+ /* Brick Host UUID */
+ brick_host_uuid = uuid_utoa(brickinfo->uuid);
+ brick_host_uuid_length = strlen (brick_host_uuid);
+ memcpy (sts_val->brick_host_uuid, brick_host_uuid,
+ brick_host_uuid_length);
+ sts_val->brick_host_uuid[brick_host_uuid_length] = '\0';
+
+ /* Slave */
+ memcpy (sts_val->slave, slave, strlen(slave));
+ sts_val->slave[strlen(slave)] = '\0';
+
+ snprintf (sts_val->slave_node,
+ sizeof(sts_val->slave_node), "N/A");
+
+ snprintf (sts_val->worker_status,
+ sizeof(sts_val->worker_status), "N/A");
+
+ snprintf (sts_val->crawl_status,
+ sizeof(sts_val->crawl_status), "N/A");
+
+ snprintf (sts_val->last_synced,
+ sizeof(sts_val->last_synced), "N/A");
+
+ snprintf (sts_val->last_synced_utc,
+ sizeof(sts_val->last_synced_utc), "N/A");
+
+ snprintf (sts_val->entry, sizeof(sts_val->entry), "N/A");
+
+ snprintf (sts_val->data, sizeof(sts_val->data), "N/A");
+
+ snprintf (sts_val->meta, sizeof(sts_val->meta), "N/A");
+
+ snprintf (sts_val->failures, sizeof(sts_val->failures), "N/A");
+
+ snprintf (sts_val->checkpoint_time,
+ sizeof(sts_val->checkpoint_time), "N/A");
+
+ snprintf (sts_val->checkpoint_time_utc,
+ sizeof(sts_val->checkpoint_time_utc), "N/A");
+
+ snprintf (sts_val->checkpoint_completed,
+ sizeof(sts_val->checkpoint_completed), "N/A");
+
+ snprintf (sts_val->checkpoint_completion_time,
+ sizeof(sts_val->checkpoint_completion_time),
+ "N/A");
+
+ snprintf (sts_val->checkpoint_completion_time_utc,
+ sizeof(sts_val->checkpoint_completion_time_utc),
+ "N/A");
+
+ /* Get all the other values from Gsyncd */
+ ret = glusterd_gsync_get_status (master, slave, conf_path,
+ brickinfo->path, sts_val);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GET_STATUS_DATA_FAIL,
+ "Unable to get status data "
+ "for %s(master), %s(slave), %s(brick)",
+ master, slave, brickinfo->path);
+ ret = -1;
+ goto out;
+ }
+
+ if (is_template_in_use) {
+ snprintf (sts_val->worker_status,
+ sizeof(sts_val->worker_status),
+ "Config Corrupted");
+ }
+
+ ret = dict_get_str (volinfo->gsync_slaves, slavekey,
+ &slaveentry);
+ if (ret < 0) {
+ GF_FREE (sts_val);
+ goto out;
+ }
+
+
+ memcpy (sts_val->session_slave, slaveentry,
+ strlen(slaveentry));
+ sts_val->session_slave[strlen(slaveentry)] = '\0';
+
+ temp_inp = gf_strdup(slaveentry);
+ if (!temp_inp)
+ goto out;
+
+ if (strstr(temp_inp, "@") == NULL) {
+ slaveuser = "root";
+ } else {
+ temp = strtok_r(temp_inp, "//", &saveptr);
+ temp = strtok_r(NULL, "/", &saveptr);
+ slaveuser = strtok_r(temp, "@", &saveptr);
+ }
+ memcpy (sts_val->slave_user, slaveuser,
+ strlen(slaveuser));
+ sts_val->slave_user[strlen(slaveuser)] = '\0';
+
+ snprintf (sts_val_name, sizeof (sts_val_name),
+ "status_value%d", gsync_count);
+ ret = dict_set_bin (dict, sts_val_name, sts_val,
+ sizeof(gf_gsync_status_t));
+ if (ret) {
+ GF_FREE (sts_val);
+ goto out;
+ }
+
+ gsync_count++;
+ sts_val = NULL;
+ }
+
+ ret = dict_set_int32 (dict, "gsync-count", gsync_count);
+ if (ret)
+ goto out;
+
+out:
+ GF_FREE (temp_inp);
+ dict_unref (confd);
+
+ return 0;
+}
+
+int
+glusterd_check_restart_gsync_session (glusterd_volinfo_t *volinfo, char *slave,
+ dict_t *resp_dict, char *path_list,
+ char *conf_path, gf_boolean_t is_force)
+{
+
+ int ret = 0;
+ glusterd_conf_t *priv = NULL;
+ char *status_msg = NULL;
+ gf_boolean_t is_running = _gf_false;
+ char *op_errstr = NULL;
+ char *key = NULL;
+ xlator_t *this = NULL;
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (slave);
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ key = slave;
+
+ ret = glusterd_check_gsync_running_local (volinfo->volname,
+ slave, conf_path,
+ &is_running);
+ if (!ret && (_gf_true != is_running))
+ /* gsynd not running, nothing to do */
+ goto out;
+
+ ret = stop_gsync (volinfo->volname, slave, &status_msg,
+ conf_path, &op_errstr,
+ is_force);
+ if (ret == 0 && status_msg)
+ ret = dict_set_str (resp_dict, "gsync-status",
+ status_msg);
+ if (ret == 0) {
+ dict_del (volinfo->gsync_active_slaves, key);
+ ret = glusterd_start_gsync (volinfo, slave, path_list,
+ conf_path, uuid_utoa(MY_UUID),
+ NULL, _gf_false);
+ if (!ret) {
+ /* Add slave to the dict indicating geo-rep session is
+ * running.*/
+ ret = dict_set_dynstr_with_alloc (
+ volinfo->gsync_active_slaves,
+ key, "running");
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Unable to set"
+ " key:%s value:running in dict. But "
+ "the config succeeded.", key);
+ goto out;
+ }
+ }
+ }
+
+ out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+static int32_t
+glusterd_marker_changelog_create_volfile (glusterd_volinfo_t *volinfo)
+{
+ int32_t ret = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = glusterd_create_volfiles_and_notify_services (volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+ "Unable to create volfile for setting of marker "
+ "while '"GEOREP" start'");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret)
+ goto out;
+
+ if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+ ret = glusterd_svcs_manager (volinfo);
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+glusterd_set_gsync_knob (glusterd_volinfo_t *volinfo, char *key, int *vc)
+{
+ int ret = -1;
+ int conf_enabled = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (this->private);
+
+ conf_enabled = glusterd_volinfo_get_boolean (volinfo, key);
+ if (conf_enabled == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_GET_KEY_FAILED,
+ "failed to get key %s from volinfo", key);
+ goto out;
+ }
+
+ ret = 0;
+ if (conf_enabled == _gf_false) {
+ *vc = 1;
+ ret = glusterd_gsync_volinfo_dict_set (volinfo,
+ key, "on");
+ }
+
+ out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_set_gsync_confs (glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ int volfile_changed = 0;
+
+ ret = glusterd_set_gsync_knob (volinfo,
+ VKEY_MARKER_XTIME, &volfile_changed);
+ if (ret)
+ goto out;
+
+ /**
+ * enable ignore-pid-check blindly as it could be needed for
+ * cascading setups.
+ */
+ ret = glusterd_set_gsync_knob (volinfo, VKEY_MARKER_XTIME_FORCE,
+ &volfile_changed);
+ if (ret)
+ goto out;
+
+ ret = glusterd_set_gsync_knob (volinfo,
+ VKEY_CHANGELOG, &volfile_changed);
+ if (ret)
+ goto out;
+
+ if (volfile_changed)
+ ret = glusterd_marker_changelog_create_volfile (volinfo);
+
+ out:
+ return ret;
+}
+
+static int
+glusterd_get_gsync_status_mst_slv (glusterd_volinfo_t *volinfo,
+ char *slave, char *conf_path,
+ dict_t *rsp_dict, char *node)
+{
+ char *statefile = NULL;
+ uuid_t uuid = {0, };
+ glusterd_conf_t *priv = NULL;
+ int ret = 0;
+ gf_boolean_t is_template_in_use = _gf_false;
+ struct stat stbuf = {0, };
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (slave);
+ GF_ASSERT (this->private);
+
+ priv = this->private;
+
+ ret = glusterd_gsync_get_uuid (slave, volinfo, uuid);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_INFO, 0, GD_MSG_SESSION_INACTIVE,
+ "geo-replication status %s %s : session is not "
+ "active", volinfo->volname, slave);
+
+ ret = glusterd_get_statefile_name (volinfo, slave,
+ conf_path, &statefile,
+ &is_template_in_use);
+ if (ret) {
+ if (!strstr(slave, "::"))
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_SLAVE_URL_INVALID,
+ "%s is not a valid slave url.", slave);
+ else
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_GET_STATEFILE_NAME_FAILED,
+ "Unable to get statefile's name");
+ ret = 0;
+ goto out;
+ }
+
+ ret = sys_lstat (statefile, &stbuf);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_INFO, ENOENT,
+ GD_MSG_FILE_OP_FAILED,
+ "%s statefile not present.", statefile);
+ ret = 0;
+ goto out;
+ }
+ }
+
+ ret = glusterd_read_status_file (volinfo, slave, conf_path,
+ rsp_dict, node);
+out:
+ if (statefile)
+ GF_FREE (statefile);
+
+ gf_msg_debug (this->name, 0, "Returning with %d", ret);
+ return ret;
+}
+
+static int
+glusterd_get_gsync_status_mst (glusterd_volinfo_t *volinfo, dict_t *rsp_dict,
+ char *node)
+{
+ glusterd_gsync_status_temp_t param = {0, };
+
+ GF_ASSERT (volinfo);
+
+ param.rsp_dict = rsp_dict;
+ param.volinfo = volinfo;
+ param.node = node;
+ dict_foreach (volinfo->gsync_slaves, _get_status_mst_slv, &param);
+
+ return 0;
+}
+
+static int
+glusterd_get_gsync_status_all (dict_t *rsp_dict, char *node)
+{
+
+ int32_t ret = 0;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+
+ GF_ASSERT (priv);
+
+ cds_list_for_each_entry (volinfo, &priv->volumes, vol_list) {
+ ret = glusterd_get_gsync_status_mst (volinfo, rsp_dict, node);
+ if (ret)
+ goto out;
+ }
+
+out:
+ gf_msg_debug (this->name, 0, "Returning with %d", ret);
+ return ret;
+
+}
+
+static int
+glusterd_get_gsync_status (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+{
+ char *slave = NULL;
+ char *volname = NULL;
+ char *conf_path = NULL;
+ char errmsg[PATH_MAX] = {0, };
+ gf_boolean_t exists = _gf_false;
+ glusterd_volinfo_t *volinfo = NULL;
+ int ret = 0;
+ char my_hostname[256] = {0,};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = gethostname(my_hostname, 256);
+ if (ret) {
+ /* stick to N/A */
+ (void) strcpy (my_hostname, "N/A");
+ }
+
+ ret = dict_get_str (dict, "master", &volname);
+ if (ret < 0){
+ ret = glusterd_get_gsync_status_all (rsp_dict, my_hostname);
+ goto out;
+ }
+
+ exists = glusterd_check_volume_exists (volname);
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if ((ret) || (!exists)) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, GD_MSG_VOL_NOT_FOUND,
+ "volume name does not exist");
+ snprintf (errmsg, sizeof(errmsg), "Volume name %s does not"
+ " exist", volname);
+ *op_errstr = gf_strdup (errmsg);
+ ret = -1;
+ goto out;
+ }
+
+
+ ret = dict_get_str (dict, "slave", &slave);
+ if (ret < 0) {
+ ret = glusterd_get_gsync_status_mst (volinfo,
+ rsp_dict, my_hostname);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "conf_path", &conf_path);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch conf file path.");
+ goto out;
+ }
+
+ ret = glusterd_get_gsync_status_mst_slv (volinfo, slave, conf_path,
+ rsp_dict, my_hostname);
+
+ out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_gsync_delete (glusterd_volinfo_t *volinfo, char *slave,
+ char *slave_host, char *slave_vol, char *path_list,
+ dict_t *dict, dict_t *resp_dict, char **op_errstr)
+{
+ int32_t ret = -1;
+ runner_t runner = {0,};
+ glusterd_conf_t *priv = NULL;
+ char *master = NULL;
+ char *gl_workdir = NULL;
+ char geo_rep_dir[PATH_MAX] = "";
+ char *conf_path = NULL;
+ xlator_t *this = NULL;
+ uint32_t reset_sync_time = _gf_false;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (slave);
+ GF_ASSERT (slave_host);
+ GF_ASSERT (slave_vol);
+ GF_ASSERT (op_errstr);
+ GF_ASSERT (dict);
+ GF_ASSERT (resp_dict);
+
+ if (THIS)
+ priv = THIS->private;
+ if (priv == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GLUSTERD_PRIV_NOT_FOUND,
+ "priv of glusterd not present");
+ *op_errstr = gf_strdup ("glusterd defunct");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "conf_path", &conf_path);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch conf file path.");
+ goto out;
+ }
+
+ gl_workdir = priv->workdir;
+ master = "";
+ runinit (&runner);
+ runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd",
+ "--delete", "-c", NULL);
+ runner_argprintf (&runner, "%s", conf_path);
+ runner_argprintf (&runner, "--iprefix=%s", DATADIR);
+
+ runner_argprintf (&runner, "--path-list=%s", path_list);
+
+ ret = dict_get_uint32 (dict, "reset-sync-time", &reset_sync_time);
+ if (!ret && reset_sync_time) {
+ runner_add_args (&runner, "--reset-sync-time", NULL);
+ }
+
+ if (volinfo) {
+ master = volinfo->volname;
+ runner_argprintf (&runner, ":%s", master);
+ }
+ runner_add_arg (&runner, slave);
+ runner_redir (&runner, STDOUT_FILENO, RUN_PIPE);
+ synclock_unlock (&priv->big_lock);
+ ret = runner_run (&runner);
+ synclock_lock (&priv->big_lock);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_SESSION_DEL_FAILED,
+ "gsyncd failed to delete session info for %s and "
+ "%s peers", master, slave);
+
+ gf_asprintf (op_errstr, "gsyncd failed to "
+ "delete session info for %s and %s peers",
+ master, slave);
+
+ goto out;
+ }
+
+ ret = snprintf (geo_rep_dir, sizeof(geo_rep_dir) - 1,
+ "%s/"GEOREP"/%s_%s_%s", gl_workdir,
+ volinfo->volname, slave_host, slave_vol);
+ geo_rep_dir[ret] = '\0';
+
+ ret = sys_rmdir (geo_rep_dir);
+ if (ret) {
+ if (errno == ENOENT)
+ gf_msg_debug (this->name, 0, "Geo Rep Dir(%s) Not Present.",
+ geo_rep_dir);
+ else {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DIR_OP_FAILED,
+ "Unable to delete Geo Rep Dir(%s). Error: %s",
+ geo_rep_dir, strerror (errno));
+ goto out;
+ }
+ }
+
+ ret = 0;
+
+ gf_asprintf (op_errstr, "delete successful");
+
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_op_sys_exec (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+{
+ char buf[PATH_MAX] = "";
+ char cmd_arg_name[PATH_MAX] = "";
+ char output_name[PATH_MAX] = "";
+ char errmsg[PATH_MAX] = "";
+ char *ptr = NULL;
+ char *bufp = NULL;
+ char *command = NULL;
+ char **cmd_args = NULL;
+ int ret = -1;
+ int i = -1;
+ int cmd_args_count = 0;
+ int output_count = 0;
+ glusterd_conf_t *priv = NULL;
+ runner_t runner = {0,};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+ GF_ASSERT (rsp_dict);
+
+ if (THIS)
+ priv = THIS->private;
+ if (priv == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GLUSTERD_PRIV_NOT_FOUND,
+ "priv of glusterd not present");
+ *op_errstr = gf_strdup ("glusterd defunct");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "command", &command);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get command from dict");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "cmd_args_count", &cmd_args_count);
+ if (ret)
+ gf_msg (this->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED,
+ "No cmd_args_count");
+
+ if (cmd_args_count) {
+ cmd_args = GF_CALLOC (cmd_args_count, sizeof (char*),
+ gf_common_mt_char);
+ if (!cmd_args) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY,
+ "Unable to calloc. Errno = %s",
+ strerror(errno));
+ goto out;
+ }
+
+ for (i=1; i <= cmd_args_count; i++) {
+ memset (cmd_arg_name, '\0', sizeof(cmd_arg_name));
+ snprintf (cmd_arg_name, sizeof(cmd_arg_name),
+ "cmd_arg_%d", i);
+ ret = dict_get_str (dict, cmd_arg_name, &cmd_args[i-1]);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get"
+ " %s in dict", cmd_arg_name);
+ goto out;
+ }
+ }
+ }
+
+ runinit (&runner);
+ runner_argprintf (&runner, GSYNCD_PREFIX"/peer_%s", command);
+ for (i=0; i < cmd_args_count; i++)
+ runner_add_arg (&runner, cmd_args[i]);
+ runner_redir (&runner, STDOUT_FILENO, RUN_PIPE);
+ synclock_unlock (&priv->big_lock);
+ ret = runner_start (&runner);
+ if (ret == -1) {
+ snprintf (errmsg, sizeof (errmsg), "Unable to "
+ "execute command. Error : %s",
+ strerror (errno));
+ *op_errstr = gf_strdup (errmsg);
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_CMD_EXEC_FAIL, "%s",
+ errmsg);
+ ret = -1;
+ synclock_lock (&priv->big_lock);
+ goto out;
+ }
+
+ do {
+ ptr = fgets(buf, sizeof(buf), runner_chio (&runner, STDOUT_FILENO));
+ if (ptr) {
+ ret = dict_get_int32 (rsp_dict, "output_count", &output_count);
+ if (ret)
+ output_count = 1;
+ else
+ output_count++;
+ memset (output_name, '\0', sizeof (output_name));
+ snprintf (output_name, sizeof (output_name),
+ "output_%d", output_count);
+ if (buf[strlen(buf) - 1] == '\n')
+ buf[strlen(buf) - 1] = '\0';
+ bufp = gf_strdup (buf);
+ if (!bufp)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_STRDUP_FAILED,
+ "gf_strdup failed.");
+ ret = dict_set_dynstr (rsp_dict, output_name, bufp);
+ if (ret) {
+ GF_FREE (bufp);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "output set "
+ "failed.");
+ }
+ ret = dict_set_int32 (rsp_dict, "output_count", output_count);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "output_count "
+ "set failed.");
+ }
+ } while (ptr);
+
+ ret = runner_end (&runner);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg), "Unable to "
+ "end. Error : %s",
+ strerror (errno));
+ *op_errstr = gf_strdup (errmsg);
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_UNABLE_TO_END, "%s",
+ errmsg);
+ ret = -1;
+ synclock_lock (&priv->big_lock);
+ goto out;
+ }
+ synclock_lock (&priv->big_lock);
+
+ ret = 0;
+out:
+ if (cmd_args) {
+ GF_FREE (cmd_args);
+ cmd_args = NULL;
+ }
+
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_op_copy_file (dict_t *dict, char **op_errstr)
+{
+ char abs_filename[PATH_MAX] = "";
+ char errmsg[PATH_MAX] = "";
+ char *filename = NULL;
+ char *host_uuid = NULL;
+ char uuid_str [64] = {0};
+ char *contents = NULL;
+ char buf[1024] = "";
+ int ret = -1;
+ int fd = -1;
+ int bytes_writen = 0;
+ int bytes_read = 0;
+ int contents_size = -1;
+ int file_mode = -1;
+ glusterd_conf_t *priv = NULL;
+ struct stat stbuf = {0,};
+ gf_boolean_t free_contents = _gf_true;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ if (THIS)
+ priv = THIS->private;
+ if (priv == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GLUSTERD_PRIV_NOT_FOUND,
+ "priv of glusterd not present");
+ *op_errstr = gf_strdup ("glusterd defunct");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "host-uuid", &host_uuid);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_str (dict, "source", &filename);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch filename from dict.");
+ *op_errstr = gf_strdup ("command unsuccessful");
+ goto out;
+ }
+ snprintf (abs_filename, sizeof(abs_filename),
+ "%s/%s", priv->workdir, filename);
+
+ uuid_utoa_r (MY_UUID, uuid_str);
+ if (!strcmp (uuid_str, host_uuid)) {
+ ret = sys_lstat (abs_filename, &stbuf);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg), "Source file"
+ " does not exist in %s", priv->workdir);
+ *op_errstr = gf_strdup (errmsg);
+ gf_msg (this->name, GF_LOG_ERROR, ENOENT,
+ GD_MSG_FILE_OP_FAILED, "%s", errmsg);
+ goto out;
+ }
+
+ contents = GF_CALLOC(1, stbuf.st_size+1, gf_common_mt_char);
+ if (!contents) {
+ snprintf (errmsg, sizeof (errmsg),
+ "Unable to allocate memory");
+ *op_errstr = gf_strdup (errmsg);
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY,
+ "%s", errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ fd = open (abs_filename, O_RDONLY);
+ if (fd < 0) {
+ snprintf (errmsg, sizeof (errmsg), "Unable to open %s",
+ abs_filename);
+ *op_errstr = gf_strdup (errmsg);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_FILE_OP_FAILED,
+ "%s", errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ do {
+ ret = sys_read (fd, buf, sizeof(buf));
+ if (ret > 0) {
+ memcpy (contents+bytes_read, buf, ret);
+ bytes_read += ret;
+ memset (buf, '\0', sizeof(buf));
+ }
+ } while (ret > 0);
+
+ if (bytes_read != stbuf.st_size) {
+ snprintf (errmsg, sizeof (errmsg), "Unable to read all "
+ "the data from %s", abs_filename);
+ *op_errstr = gf_strdup (errmsg);
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_READ_ERROR,
+ "%s", errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, "contents_size", stbuf.st_size);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg), "Unable to set"
+ " contents size in dict.");
+ *op_errstr = gf_strdup (errmsg);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "%s", errmsg);
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, "file_mode",
+ (int32_t)stbuf.st_mode);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg), "Unable to set"
+ " file mode in dict.");
+ *op_errstr = gf_strdup (errmsg);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "%s", errmsg);
+ goto out;
+ }
+
+ ret = dict_set_bin (dict, "common_pem_contents",
+ contents, stbuf.st_size);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg), "Unable to set"
+ " pem contents in dict.");
+ *op_errstr = gf_strdup (errmsg);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "%s", errmsg);
+ goto out;
+ }
+ free_contents = _gf_false;
+ } else {
+ free_contents = _gf_false;
+ ret = dict_get_bin (dict, "common_pem_contents",
+ (void **) &contents);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg), "Unable to get"
+ " pem contents in dict.");
+ *op_errstr = gf_strdup (errmsg);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "%s", errmsg);
+ goto out;
+ }
+ ret = dict_get_int32 (dict, "contents_size", &contents_size);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg), "Unable to set"
+ " contents size in dict.");
+ *op_errstr = gf_strdup (errmsg);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "%s", errmsg);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "file_mode", &file_mode);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg), "Unable to get"
+ " file mode in dict.");
+ *op_errstr = gf_strdup (errmsg);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "%s", errmsg);
+ goto out;
+ }
+
+ fd = open (abs_filename, O_WRONLY | O_TRUNC | O_CREAT, 0600);
+ if (fd < 0) {
+ snprintf (errmsg, sizeof (errmsg), "Unable to open %s",
+ abs_filename);
+ *op_errstr = gf_strdup (errmsg);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_FILE_OP_FAILED, "%s", errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ bytes_writen = sys_write (fd, contents, contents_size);
+
+ if (bytes_writen != contents_size) {
+ snprintf (errmsg, sizeof (errmsg), "Failed to write"
+ " to %s", abs_filename);
+ *op_errstr = gf_strdup (errmsg);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_FILE_OP_FAILED, "%s", errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ sys_fchmod (fd, file_mode);
+ }
+
+ ret = 0;
+out:
+ if (fd != -1)
+ sys_close (fd);
+
+ if (free_contents)
+ GF_FREE(contents);
+
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_op_gsync_set (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ int32_t type = -1;
+ char *host_uuid = NULL;
+ char *slave = NULL;
+ char *slave_url = NULL;
+ char *slave_vol = NULL;
+ char *slave_host = NULL;
+ char *volname = NULL;
+ char *path_list = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ gf_boolean_t is_force = _gf_false;
+ char *status_msg = NULL;
+ gf_boolean_t is_running = _gf_false;
+ char *conf_path = NULL;
+ char errmsg[PATH_MAX] = "";
+ char *key = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+ GF_ASSERT (rsp_dict);
+
+ ret = dict_get_int32 (dict, "type", &type);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_str (dict, "host-uuid", &host_uuid);
+ if (ret < 0)
+ goto out;
+
+ if (type == GF_GSYNC_OPTION_TYPE_STATUS) {
+ ret = glusterd_get_gsync_status (dict, op_errstr, rsp_dict);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "slave", &slave);
+ if (ret < 0)
+ goto out;
+
+ key = slave;
+
+ ret = dict_get_str (dict, "slave_url", &slave_url);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch slave url.");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "slave_host", &slave_host);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch slave hostname.");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "slave_vol", &slave_vol);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch slave volume name.");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "conf_path", &conf_path);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch conf file path.");
+ goto out;
+ }
+
+ if (dict_get_str (dict, "master", &volname) == 0) {
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_DICT_GET_FAILED, "Volinfo for"
+ " %s (master) not found", volname);
+ goto out;
+ }
+
+ ret = glusterd_get_local_brickpaths (volinfo, &path_list);
+ }
+
+ if (type == GF_GSYNC_OPTION_TYPE_CONFIG) {
+ ret = glusterd_gsync_configure (volinfo, slave, path_list,
+ dict, rsp_dict, op_errstr);
+ if (!ret) {
+ ret = dict_set_str (rsp_dict, "conf_path", conf_path);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to store conf_file_path.");
+ goto out;
+ }
+ }
+ goto out;
+ }
+
+ if (type == GF_GSYNC_OPTION_TYPE_DELETE) {
+ ret = glusterd_remove_slave_in_info(volinfo, slave, op_errstr);
+ if (ret && !is_force && path_list)
+ goto out;
+
+ ret = glusterd_gsync_delete (volinfo, slave, slave_host,
+ slave_vol, path_list, dict,
+ rsp_dict, op_errstr);
+ goto out;
+ }
+
+ if (!volinfo) {
+ ret = -1;
+ goto out;
+ }
+
+ is_force = dict_get_str_boolean (dict, "force", _gf_false);
+
+ if (type == GF_GSYNC_OPTION_TYPE_START) {
+ /* Add slave to the dict indicating geo-rep session is running*/
+ ret = dict_set_dynstr_with_alloc (volinfo->gsync_active_slaves,
+ key, "running");
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Unable to set key:%s"
+ " value:running in the dict", key);
+ goto out;
+ }
+
+ /* If slave volume uuid is not present in gsync_slaves
+ * update it*/
+ ret = glusterd_update_slave_voluuid_slaveinfo (volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REMOTE_VOL_UUID_FAIL, "Error in updating"
+ " slave volume uuid for old slave info");
+ goto out;
+ }
+
+ ret = glusterd_start_gsync (volinfo, slave, path_list,
+ conf_path, host_uuid, op_errstr,
+ _gf_false);
+
+ /* Delete added slave in the dict if start fails*/
+ if (ret)
+ dict_del (volinfo->gsync_active_slaves, key);
+ }
+
+ if (type == GF_GSYNC_OPTION_TYPE_STOP ||
+ type == GF_GSYNC_OPTION_TYPE_PAUSE ||
+ type == GF_GSYNC_OPTION_TYPE_RESUME) {
+ ret = glusterd_check_gsync_running_local (volinfo->volname,
+ slave, conf_path,
+ &is_running);
+ if (!ret && !is_force && path_list &&
+ (_gf_true != is_running)) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_GSYNCD_OP_SET_FAILED, GEOREP" is not "
+ "set up for %s(master) and %s(slave)",
+ volname, slave);
+ *op_errstr = strdup (GEOREP" is not set up");
+ goto out;
+ }
+
+ if (type == GF_GSYNC_OPTION_TYPE_PAUSE) {
+ ret = gd_pause_or_resume_gsync (dict, volname, slave,
+ slave_host, slave_vol,
+ conf_path, op_errstr,
+ _gf_true);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_PAUSE_FAILED,
+ GEOREP" Pause Failed");
+ else
+ dict_del (volinfo->gsync_active_slaves, key);
+
+ } else if (type == GF_GSYNC_OPTION_TYPE_RESUME) {
+
+ /* Add slave to the dict indicating geo-rep session is
+ * running*/
+ ret = dict_set_dynstr_with_alloc (
+ volinfo->gsync_active_slaves,
+ key, "running");
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Unable to set "
+ "key:%s value:running in dict", key);
+ goto out;
+ }
+
+ ret = gd_pause_or_resume_gsync (dict, volname, slave,
+ slave_host, slave_vol,
+ conf_path, op_errstr,
+ _gf_false);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RESUME_FAILED,
+ GEOREP" Resume Failed");
+ dict_del (volinfo->gsync_active_slaves, key);
+ }
+ } else {
+
+ ret = stop_gsync (volname, slave, &status_msg,
+ conf_path, op_errstr, is_force);
+
+ if (ret == 0 && status_msg)
+ ret = dict_set_str (rsp_dict, "gsync-status",
+ status_msg);
+ if (!ret) {
+ ret = glusterd_create_status_file (
+ volinfo->volname,
+ slave, slave_host,
+ slave_vol,"Stopped");
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_UPDATE_STATEFILE_FAILED,
+ "Unable to update state_file. "
+ "Error : %s", strerror (errno));
+ }
+ dict_del (volinfo->gsync_active_slaves, key);
+ }
+ }
+ }
+
+out:
+ if (path_list) {
+ GF_FREE (path_list);
+ path_list = NULL;
+ }
+
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_get_slave_details_confpath (glusterd_volinfo_t *volinfo,
+ dict_t *dict, char **slave_url,
+ char **slave_host, char **slave_vol,
+ char **conf_path, char **op_errstr)
+{
+ int ret = -1;
+ char confpath[PATH_MAX] = "";
+ glusterd_conf_t *priv = NULL;
+ char *slave = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_str (dict, "slave", &slave);
+ if (ret || !slave) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch slave from dict");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_get_slave_info (slave, slave_url,
+ slave_host, slave_vol, op_errstr);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SLAVEINFO_FETCH_ERROR,
+ "Unable to fetch slave details.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "slave_url", *slave_url);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to store slave IP.");
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "slave_host", *slave_host);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to store slave hostname");
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "slave_vol", *slave_vol);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to store slave volume name.");
+ goto out;
+ }
+
+ ret = snprintf (confpath, sizeof(confpath) - 1,
+ "%s/"GEOREP"/%s_%s_%s/gsyncd.conf",
+ priv->workdir, volinfo->volname,
+ *slave_host, *slave_vol);
+ confpath[ret] = '\0';
+ *conf_path = gf_strdup (confpath);
+ if (!(*conf_path)) {
+ gf_msg (this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED,
+ "Unable to gf_strdup. Error: %s", strerror (errno));
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "conf_path", *conf_path);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to store conf_path");
+ goto out;
+ }
+
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+
+}
+
+int
+glusterd_get_slave_info (char *slave,
+ char **slave_url, char **hostname,
+ char **slave_vol, char **op_errstr)
+{
+ char *tmp = NULL;
+ char *save_ptr = NULL;
+ char **linearr = NULL;
+ int32_t ret = -1;
+ char errmsg[PATH_MAX] = "";
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = glusterd_urltransform_single (slave, "normalize",
+ &linearr);
+ if (ret == -1) {
+ ret = snprintf (errmsg, sizeof(errmsg) - 1,
+ "Invalid Url: %s", slave);
+ errmsg[ret] = '\0';
+ *op_errstr = gf_strdup (errmsg);
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_NORMALIZE_URL_FAIL,
+ "Failed to normalize url");
+ goto out;
+ }
+
+ tmp = strtok_r (linearr[0], "/", &save_ptr);
+ tmp = strtok_r (NULL, "/", &save_ptr);
+ slave = strtok_r (tmp, ":", &save_ptr);
+ if (slave) {
+ ret = glusterd_geo_rep_parse_slave (slave, hostname, op_errstr);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SLAVE_URL_INVALID,
+ "Invalid slave url: %s", *op_errstr);
+ goto out;
+ }
+ gf_msg_debug (this->name, 0, "Hostname : %s", *hostname);
+
+ *slave_url = gf_strdup (slave);
+ if (!*slave_url) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_STRDUP_FAILED,
+ "Failed to gf_strdup");
+ ret = -1;
+ goto out;
+ }
+ gf_msg_debug (this->name, 0, "Slave URL : %s", *slave_url);
+ ret = 0;
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "Invalid slave name");
+ goto out;
+ }
+
+ slave = strtok_r (NULL, ":", &save_ptr);
+ if (slave) {
+ *slave_vol = gf_strdup (slave);
+ if (!*slave_vol) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_STRDUP_FAILED,
+ "Failed to gf_strdup");
+ ret = -1;
+ GF_FREE (*slave_url);
+ goto out;
+ }
+ gf_msg_debug (this->name, 0, "Slave Vol : %s", *slave_vol);
+ ret = 0;
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "Invalid slave name");
+ goto out;
+ }
+
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+static void
+runinit_gsyncd_setrx (runner_t *runner, char *conf_path)
+{
+ runinit (runner);
+ runner_add_args (runner, GSYNCD_PREFIX"/gsyncd", "-c", NULL);
+ runner_argprintf (runner, "%s", conf_path);
+ runner_add_arg (runner, "--config-set-rx");
+}
+
+static int
+glusterd_check_gsync_present (int *valid_state)
+{
+ char buff[PATH_MAX] = {0, };
+ runner_t runner = {0,};
+ char *ptr = NULL;
+ int ret = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ runinit (&runner);
+ runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "--version", NULL);
+ runner_redir (&runner, STDOUT_FILENO, RUN_PIPE);
+ ret = runner_start (&runner);
+ if (ret == -1) {
+ if (errno == ENOENT) {
+ gf_msg ("glusterd", GF_LOG_INFO, ENOENT,
+ GD_MSG_MODULE_NOT_INSTALLED, GEOREP" module "
+ "not installed in the system");
+ *valid_state = 0;
+ }
+ else {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_MODULE_ERROR,
+ GEOREP" module not working as desired");
+ *valid_state = -1;
+ }
+ goto out;
+ }
+
+ ptr = fgets(buff, sizeof(buff), runner_chio (&runner, STDOUT_FILENO));
+ if (ptr) {
+ if (!strstr (buff, "gsyncd")) {
+ ret = -1;
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_MODULE_ERROR,
+ GEOREP" module not working as desired");
+ *valid_state = -1;
+ goto out;
+ }
+ } else {
+ ret = -1;
+ gf_msg ("glusterd", GF_LOG_ERROR, 0, GD_MSG_MODULE_ERROR,
+ GEOREP" module not working as desired");
+ *valid_state = -1;
+ goto out;
+ }
+
+ ret = 0;
+ out:
+
+ runner_end (&runner);
+
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+
+}
+
+static int
+create_conf_file (glusterd_conf_t *conf, char *conf_path)
+#define RUN_GSYNCD_CMD do { \
+ ret = runner_run_reuse (&runner); \
+ if (ret == -1) { \
+ runner_log (&runner, "glusterd", GF_LOG_ERROR, "command failed"); \
+ runner_end (&runner); \
+ goto out; \
+ } \
+ runner_end (&runner); \
+} while (0)
+{
+ int ret = 0;
+ runner_t runner = {0,};
+ char georepdir[PATH_MAX] = {0,};
+ int valid_state = 0;
+
+ valid_state = -1;
+ ret = glusterd_check_gsync_present (&valid_state);
+ if (-1 == ret) {
+ ret = valid_state;
+ goto out;
+ }
+
+ ret = snprintf (georepdir, sizeof(georepdir) - 1, "%s/"GEOREP,
+ conf->workdir);
+ georepdir[ret] = '\0';
+
+ /************
+ * master pre-configuration
+ ************/
+
+ /* remote-gsyncd */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_args (&runner, "remote-gsyncd", GSYNCD_PREFIX"/gsyncd", ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_args (&runner, "remote-gsyncd", "/nonexistent/gsyncd",
+ ".", "^ssh:", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* gluster-command-dir */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_args (&runner, "gluster-command-dir", SBIN_DIR"/",
+ ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* gluster-params */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_args (&runner, "gluster-params",
+ "aux-gfid-mount acl",
+ ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* ssh-command */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_arg (&runner, "ssh-command");
+ runner_argprintf (&runner,
+ "ssh -oPasswordAuthentication=no "
+ "-oStrictHostKeyChecking=no "
+ "-i %s/secret.pem", georepdir);
+ runner_add_args (&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* ssh-command tar */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_arg (&runner, "ssh-command-tar");
+ runner_argprintf (&runner,
+ "ssh -oPasswordAuthentication=no "
+ "-oStrictHostKeyChecking=no "
+ "-i %s/tar_ssh.pem", georepdir);
+ runner_add_args (&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* pid-file */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_arg (&runner, "pid-file");
+ runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/monitor.pid", georepdir);
+ runner_add_args (&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* geo-rep-working-dir */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_arg (&runner, "georep-session-working-dir");
+ runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/", georepdir);
+ runner_add_args (&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* state-file */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_arg (&runner, "state-file");
+ runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/monitor.status", georepdir);
+ runner_add_args (&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* state-detail-file */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_arg (&runner, "state-detail-file");
+ runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/${eSlave}-detail.status", georepdir);
+ runner_add_args (&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* state-socket */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_arg (&runner, "state-socket-unencoded");
+ runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/${eSlave}.socket", georepdir);
+ runner_add_args (&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* socketdir */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_args (&runner, "socketdir", GLUSTERD_SOCK_DIR, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* log-file */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_args (&runner,
+ "log-file",
+ DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"/${mastervol}/${eSlave}.log",
+ ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* changelog-log-file */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_args (&runner,
+ "changelog-log-file",
+ DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"/${mastervol}/${eSlave}${local_id}-changes.log",
+ ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* gluster-log-file */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_args (&runner,
+ "gluster-log-file",
+ DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"/${mastervol}/${eSlave}${local_id}.gluster.log",
+ ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* ignore-deletes */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_args (&runner, "ignore-deletes", "false", ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* special-sync-mode */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_args (&runner, "special-sync-mode", "partial", ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* change-detector == changelog */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_args(&runner, "change-detector", "changelog", ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_arg(&runner, "working-dir");
+ runner_argprintf(&runner, "%s/${mastervol}/${eSlave}",
+ DEFAULT_GLUSTERFSD_MISC_DIRETORY);
+ runner_add_args (&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /************
+ * slave pre-configuration
+ ************/
+
+ /* gluster-command-dir */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_args (&runner, "gluster-command-dir", SBIN_DIR"/",
+ ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* gluster-params */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_args (&runner, "gluster-params",
+ "aux-gfid-mount acl",
+ ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* log-file */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_args (&runner,
+ "log-file",
+ DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"-slaves/${session_owner}:${eSlave}.log",
+ ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* MountBroker log-file */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_args (&runner,
+ "log-file-mbr",
+ DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"-slaves/mbr/${session_owner}:${eSlave}.log",
+ ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* gluster-log-file */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_args (&runner,
+ "gluster-log-file",
+ DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"-slaves/${session_owner}:${eSlave}.gluster.log",
+ ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ out:
+ return ret ? -1 : 0;
+}
+
+static int
+glusterd_create_essential_dir_files (glusterd_volinfo_t *volinfo, dict_t *dict,
+ char *slave, char *slave_host,
+ char *slave_vol, char **op_errstr)
+{
+ int ret = -1;
+ char *conf_path = NULL;
+ char *statefile = NULL;
+ char buf[PATH_MAX] = "";
+ char errmsg[PATH_MAX] = "";
+ glusterd_conf_t *conf = NULL;
+ struct stat stbuf = {0,};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ conf = this->private;
+
+ ret = dict_get_str (dict, "conf_path", &conf_path);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg),
+ "Unable to fetch conf file path.");
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "%s", errmsg);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "statefile", &statefile);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg),
+ "Unable to fetch statefile path.");
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "%s", errmsg);
+ goto out;
+ }
+
+ ret = snprintf (buf, sizeof(buf) - 1, "%s/"GEOREP"/%s_%s_%s",
+ conf->workdir, volinfo->volname, slave_host, slave_vol);
+ buf[ret] = '\0';
+ ret = mkdir_p (buf, 0777, _gf_true);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg), "Unable to create %s"
+ ". Error : %s", buf, strerror (errno));
+ *op_errstr = gf_strdup (errmsg);
+ gf_msg (this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED,
+ "%s", errmsg);
+ goto out;
+ }
+
+ ret = snprintf (buf, PATH_MAX, DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"/%s",
+ volinfo->volname);
+ buf[ret] = '\0';
+ ret = mkdir_p (buf, 0777, _gf_true);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg), "Unable to create %s"
+ ". Error : %s", buf, strerror (errno));
+ *op_errstr = gf_strdup (errmsg);
+ gf_msg (this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED,
+ "%s", errmsg);
+ goto out;
+ }
+
+ ret = sys_lstat (conf_path, &stbuf);
+ if (!ret) {
+ gf_msg_debug (this->name, 0, "Session already running."
+ " Not creating config file again.");
+ } else {
+ ret = create_conf_file (conf, conf_path);
+ if (ret || sys_lstat (conf_path, &stbuf)) {
+ snprintf (errmsg, sizeof (errmsg), "Failed to create"
+ " config file(%s).", conf_path);
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED, "%s", errmsg);
+ goto out;
+ }
+ }
+
+ ret = sys_lstat (statefile, &stbuf);
+ if (!ret) {
+ gf_msg_debug (this->name, 0, "Session already running."
+ " Not creating status file again.");
+ goto out;
+ } else {
+ ret = glusterd_create_status_file (volinfo->volname, slave,
+ slave_host, slave_vol,
+ "Created");
+ if (ret || sys_lstat (statefile, &stbuf)) {
+ snprintf (errmsg, sizeof (errmsg), "Unable to create %s"
+ ". Error : %s", statefile, strerror (errno));
+ *op_errstr = gf_strdup (errmsg);
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED, "%s", errmsg);
+ ret = -1;
+ goto out;
+ }
+ }
+
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_op_gsync_create (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+{
+ char common_pem_file[PATH_MAX] = "";
+ char errmsg[PATH_MAX] = {0,};
+ char hooks_args[PATH_MAX] = "";
+ char uuid_str [64] = "";
+ char *host_uuid = NULL;
+ char *slave_url = NULL;
+ char *slave_url_buf = NULL;
+ char *slave_user = NULL;
+ char *slave_ip = NULL;
+ char *save_ptr = NULL;
+ char *slave_host = NULL;
+ char *slave_vol = NULL;
+ char *arg_buf = NULL;
+ char *volname = NULL;
+ char *slave = NULL;
+ int32_t ret = -1;
+ int32_t is_pem_push = -1;
+ int32_t ssh_port = 22;
+ gf_boolean_t is_force = -1;
+ glusterd_conf_t *conf = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+ char old_working_dir[PATH_MAX] = {0};
+ char new_working_dir[PATH_MAX] = {0};
+ char *slave_info = NULL;
+ char *slave_voluuid = NULL;
+ char *old_slavehost = NULL;
+ gf_boolean_t is_existing_session = _gf_false;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+
+ ret = glusterd_op_gsync_args_get (dict, op_errstr,
+ &volname, &slave, &host_uuid);
+ if (ret)
+ goto out;
+
+ snprintf (common_pem_file, sizeof(common_pem_file),
+ "%s"GLUSTERD_COMMON_PEM_PUB_FILE, conf->workdir);
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND,
+ "Volinfo for %s (master) not found", volname);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "slave_vol", &slave_vol);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg),
+ "Unable to fetch slave volume name.");
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "%s", errmsg);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "slave_url", &slave_url);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg),
+ "Unable to fetch slave IP.");
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "%s", errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ /* Fetch the slave_user and slave_ip from the slave_url.
+ * If the slave_user is not present. Use "root"
+ */
+ if (strstr(slave_url, "@")) {
+ slave_url_buf = gf_strdup (slave_url);
+ if (!slave_url_buf) {
+ ret = -1;
+ goto out;
+ }
+ slave_user = strtok_r (slave_url, "@", &save_ptr);
+ slave_ip = strtok_r (NULL, "@", &save_ptr);
+ } else {
+ slave_user = "root";
+ slave_ip = slave_url;
+ }
+
+ if (!slave_user || !slave_ip) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVE_URL_INVALID,
+ "Invalid slave url.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "slave_host", &slave_host);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg),
+ "Unable to fetch slave host");
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "%s", errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "ssh_port", &ssh_port);
+ if (ret < 0 && ret != -ENOENT) {
+ snprintf (errmsg, sizeof (errmsg), "Fetching ssh_port failed");
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "%s", errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ is_force = dict_get_str_boolean (dict, "force", _gf_false);
+
+ uuid_utoa_r (MY_UUID, uuid_str);
+ if (!strcmp (uuid_str, host_uuid)) {
+ ret = dict_get_int32 (dict, "push_pem", &is_pem_push);
+ if (!ret && is_pem_push) {
+ gf_msg_debug (this->name, 0, "Trying to setup"
+ " pem files in slave");
+ is_pem_push = 1;
+ } else
+ is_pem_push = 0;
+
+ snprintf(hooks_args, sizeof(hooks_args),
+ "is_push_pem=%d,pub_file=%s,slave_user=%s,slave_ip=%s,"
+ "slave_vol=%s,ssh_port=%d", is_pem_push,
+ common_pem_file, slave_user, slave_ip, slave_vol,
+ ssh_port);
+ } else
+ snprintf(hooks_args, sizeof(hooks_args),
+ "This argument will stop the hooks script");
+
+ arg_buf = gf_strdup (hooks_args);
+ if (!arg_buf) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_STRDUP_FAILED,
+ "Failed to gf_strdup");
+ if (is_force) {
+ ret = 0;
+ goto create_essentials;
+ }
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "hooks_args", arg_buf);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set hooks_args in dict.");
+ if (is_force) {
+ ret = 0;
+ goto create_essentials;
+ }
+ goto out;
+ }
+
+create_essentials:
+ /* Fetch slave volume uuid, to get stored in volume info. */
+ ret = dict_get_str (dict, "slave_voluuid", &slave_voluuid);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg),
+ "Unable to fetch slave volume uuid from dict");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "%s", errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ is_existing_session = dict_get_str_boolean (dict, "existing_session",
+ _gf_false);
+ if (is_existing_session) {
+ ret = dict_get_str (dict, "old_slavehost", &old_slavehost);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg),
+ "Unable to fetch old_slavehost");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "%s", errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ /* Rename existing geo-rep session with new Slave Host */
+ ret = snprintf (old_working_dir,
+ sizeof (old_working_dir) - 1,
+ "%s/"GEOREP"/%s_%s_%s", conf->workdir,
+ volinfo->volname, old_slavehost,
+ slave_vol);
+
+ ret = snprintf (new_working_dir,
+ sizeof (new_working_dir) - 1,
+ "%s/"GEOREP"/%s_%s_%s", conf->workdir,
+ volinfo->volname, slave_host, slave_vol);
+
+ ret = sys_rename (old_working_dir, new_working_dir);
+ if (!ret) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_FORCE_CREATE_SESSION,
+ "rename of old working dir %s to "
+ "new working dir %s is done! ",
+ old_working_dir, new_working_dir);
+ } else {
+ if (errno == ENOENT) {
+ /* log error, but proceed with directory
+ * creation below */
+ gf_msg_debug (this->name, 0,
+ "old_working_dir(%s) "
+ "not present.",
+ old_working_dir);
+ } else {
+ snprintf (errmsg, sizeof (errmsg),
+ "rename of old working dir %s to "
+ "new working dir %s failed! Error: %s",
+ old_working_dir, new_working_dir,
+ strerror (errno));
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_FORCE_CREATE_SESSION,
+ "rename of old working dir %s to "
+ "new working dir %s failed! Error: %s!",
+ old_working_dir, new_working_dir,
+ strerror (errno));
+
+ ret = -1;
+ goto out;
+ }
+ }
+ }
+
+ ret = glusterd_create_essential_dir_files (volinfo, dict, slave,
+ slave_host, slave_vol,
+ op_errstr);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_slave_in_info (volinfo, slave,
+ host_uuid, slave_voluuid,
+ op_errstr, is_force);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg), "Unable to store"
+ " slave info.");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SLAVEINFO_STORE_ERROR,
+ "%s", errmsg);
+ goto out;
+ }
+
+ /* Enable marker and changelog */
+ ret = glusterd_set_gsync_confs (volinfo);
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_MARKER_START_FAIL, "marker/changelog"
+ " start failed");
+ snprintf (errmsg, sizeof (errmsg),
+ "Index initialization failed");
+
+ ret = -1;
+ goto out;
+ }
+
+out:
+ if (ret && errmsg[0] != '\0') {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_GSYNCD_ERROR,
+ "%s", errmsg);
+ *op_errstr = gf_strdup (errmsg);
+ }
+
+ GF_FREE (slave_url_buf);
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.h b/xlators/mgmt/glusterd/src/glusterd-geo-rep.h
new file mode 100644
index 00000000000..0524ec48fca
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.h
@@ -0,0 +1,49 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _GLUSTERD_GEO_REP_H_
+#define _GLUSTERD_GEO_REP_H_
+
+#ifndef GSYNC_CONF_TEMPLATE
+#define GSYNC_CONF_TEMPLATE GEOREP"/gsyncd_template.conf"
+#endif
+
+/* <slave host>::<slave volume> */
+#define SLAVE_URL_INFO_MAX (_POSIX_HOST_NAME_MAX + GD_VOLUME_NAME_MAX + 3)
+
+/* slave info format:
+ * <master host uuid>:ssh://{<slave_user>@}<slave host>::<slave volume> \
+ * :<slave volume uuid> */
+#define VOLINFO_SLAVE_URL_MAX (_POSIX_LOGIN_NAME_MAX + (2*GF_UUID_BUF_SIZE) \
+ + SLAVE_URL_INFO_MAX + 10)
+
+typedef struct glusterd_gsync_status_temp {
+ dict_t *rsp_dict;
+ glusterd_volinfo_t *volinfo;
+ char *node;
+} glusterd_gsync_status_temp_t;
+
+typedef struct gsync_status_param {
+ int is_active;
+ glusterd_volinfo_t *volinfo;
+} gsync_status_param_t;
+
+int
+gsync_status (char *master, char *slave, char *conf_path,
+ int *status, gf_boolean_t *is_template_in_use);
+
+void
+glusterd_check_geo_rep_configured (glusterd_volinfo_t *volinfo,
+ gf_boolean_t *flag);
+int
+_get_slave_status (dict_t *dict, char *key, data_t *value, void *data);
+int
+glusterd_check_geo_rep_running (gsync_status_param_t *param, char **op_errstr);
+#endif
+
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
new file mode 100644
index 00000000000..91ae6237c54
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
@@ -0,0 +1,5405 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <inttypes.h>
+
+#include "globals.h"
+#include "glusterfs.h"
+#include "compat.h"
+#include "dict.h"
+#include "protocol-common.h"
+#include "xlator.h"
+#include "logging.h"
+#include "syscall.h"
+#include "timer.h"
+#include "defaults.h"
+#include "compat.h"
+#include "compat-errno.h"
+#include "statedump.h"
+#include "run.h"
+#include "glusterd-mem-types.h"
+#include "glusterd.h"
+#include "glusterd-sm.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-utils.h"
+#include "glusterd-server-quorum.h"
+#include "glusterd-store.h"
+#include "glusterd-locks.h"
+#include "glusterd-snapshot-utils.h"
+
+#include "glusterd1-xdr.h"
+#include "cli1-xdr.h"
+#include "xdr-generic.h"
+#include "rpc-clnt.h"
+#include "glusterd-volgen.h"
+#include "glusterd-mountbroker.h"
+#include "glusterd-messages.h"
+#include "glusterd-errno.h"
+
+#include <sys/resource.h>
+#include <inttypes.h>
+
+#include "common-utils.h"
+
+#include "globals.h"
+#include "glusterd-syncop.h"
+#include "glusterd-messages.h"
+
+#ifdef HAVE_BD_XLATOR
+#include <lvm2app.h>
+#endif
+
+extern glusterd_op_info_t opinfo;
+
+int glusterd_big_locked_notify (struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event,
+ void *data, rpc_clnt_notify_t notify_fn)
+{
+ glusterd_conf_t *priv = THIS->private;
+ int ret = -1;
+
+ synclock_lock (&priv->big_lock);
+ ret = notify_fn (rpc, mydata, event, data);
+ synclock_unlock (&priv->big_lock);
+
+ return ret;
+}
+
+int glusterd_big_locked_handler (rpcsvc_request_t *req, rpcsvc_actor actor_fn)
+{
+ glusterd_conf_t *priv = THIS->private;
+ int ret = -1;
+
+ synclock_lock (&priv->big_lock);
+ ret = actor_fn (req);
+ synclock_unlock (&priv->big_lock);
+
+ return ret;
+}
+
+static int
+glusterd_handle_friend_req (rpcsvc_request_t *req, uuid_t uuid,
+ char *hostname, int port,
+ gd1_mgmt_friend_req *friend_req)
+{
+ int ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_friend_sm_event_t *event = NULL;
+ glusterd_friend_req_ctx_t *ctx = NULL;
+ char rhost[UNIX_PATH_MAX + 1] = {0};
+ uuid_t friend_uuid = {0};
+ dict_t *dict = NULL;
+
+ gf_uuid_parse (uuid_utoa (uuid), friend_uuid);
+ if (!port)
+ port = GF_DEFAULT_BASE_PORT;
+
+ ret = glusterd_remote_hostname_get (req, rhost, sizeof (rhost));
+
+ rcu_read_lock ();
+
+ peerinfo = glusterd_peerinfo_find (uuid, rhost);
+
+ if (peerinfo == NULL) {
+ ret = glusterd_xfer_friend_add_resp (req, hostname, rhost, port,
+ -1, GF_PROBE_UNKNOWN_PEER);
+ if (friend_req->vols.vols_val) {
+ free (friend_req->vols.vols_val);
+ friend_req->vols.vols_val = NULL;
+ }
+ goto out;
+ }
+
+ ret = glusterd_friend_sm_new_event
+ (GD_FRIEND_EVENT_RCVD_FRIEND_REQ, &event);
+
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_EVENT_NEW_GET_FAIL,
+ "event generation failed: %d", ret);
+ goto out;
+ }
+
+ event->peername = gf_strdup (peerinfo->hostname);
+ gf_uuid_copy (event->peerid, peerinfo->uuid);
+
+ ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_friend_req_ctx_t);
+
+ if (!ctx) {
+ gf_msg ("glusterd", GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY, "Unable to allocate memory");
+ ret = -1;
+ goto out;
+ }
+
+ gf_uuid_copy (ctx->uuid, uuid);
+ if (hostname)
+ ctx->hostname = gf_strdup (hostname);
+ ctx->req = req;
+
+ dict = dict_new ();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (friend_req->vols.vols_val,
+ friend_req->vols.vols_len,
+ &dict);
+
+ if (ret)
+ goto out;
+ else
+ dict->extra_stdfree = friend_req->vols.vols_val;
+
+ ctx->vols = dict;
+ event->ctx = ctx;
+
+ ret = glusterd_friend_sm_inject_event (event);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_EVENT_INJECT_FAIL,
+ "Unable to inject event %d, "
+ "ret = %d", event->event, ret);
+ goto out;
+ }
+
+ ret = 0;
+ if (peerinfo && (0 == peerinfo->connected))
+ ret = GLUSTERD_CONNECTION_AWAITED;
+
+out:
+ rcu_read_unlock ();
+
+ if (ret && (ret != GLUSTERD_CONNECTION_AWAITED)) {
+ if (ctx && ctx->hostname)
+ GF_FREE (ctx->hostname);
+ GF_FREE (ctx);
+ if (dict) {
+ if ((!dict->extra_stdfree) &&
+ friend_req->vols.vols_val)
+ free (friend_req->vols.vols_val);
+ dict_unref (dict);
+ } else {
+ free (friend_req->vols.vols_val);
+ }
+ if (event)
+ GF_FREE (event->peername);
+ GF_FREE (event);
+ }
+
+
+ return ret;
+}
+
+static int
+glusterd_handle_unfriend_req (rpcsvc_request_t *req, uuid_t uuid,
+ char *hostname, int port)
+{
+ int ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_friend_sm_event_t *event = NULL;
+ glusterd_friend_req_ctx_t *ctx = NULL;
+
+ if (!port)
+ port = GF_DEFAULT_BASE_PORT;
+
+ rcu_read_lock ();
+
+ peerinfo = glusterd_peerinfo_find (uuid, hostname);
+
+ if (peerinfo == NULL) {
+ gf_msg ("glusterd", GF_LOG_CRITICAL, 0,
+ GD_MSG_REQ_FROM_UNKNOWN_PEER,
+ "Received remove-friend from unknown peer %s",
+ hostname);
+ ret = glusterd_xfer_friend_remove_resp (req, hostname,
+ port);
+ goto out;
+ }
+
+ ret = glusterd_friend_sm_new_event
+ (GD_FRIEND_EVENT_RCVD_REMOVE_FRIEND, &event);
+
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_EVENT_NEW_GET_FAIL,
+ "event generation failed: %d", ret);
+ goto out;
+ }
+
+ event->peername = gf_strdup (hostname);
+ gf_uuid_copy (event->peerid, uuid);
+
+ ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_friend_req_ctx_t);
+
+ if (!ctx) {
+ gf_msg ("glusterd", GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY, "Unable to allocate memory");
+ ret = -1;
+ goto out;
+ }
+
+ gf_uuid_copy (ctx->uuid, uuid);
+ if (hostname)
+ ctx->hostname = gf_strdup (hostname);
+ ctx->req = req;
+
+ event->ctx = ctx;
+
+ ret = glusterd_friend_sm_inject_event (event);
+
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_EVENT_INJECT_FAIL, "Unable to inject event %d, "
+ "ret = %d", event->event, ret);
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ rcu_read_unlock ();
+
+ if (0 != ret) {
+ if (ctx && ctx->hostname)
+ GF_FREE (ctx->hostname);
+ GF_FREE (ctx);
+ if (event)
+ GF_FREE (event->peername);
+ GF_FREE (event);
+ }
+
+ return ret;
+}
+
+struct args_pack {
+ dict_t *dict;
+ int vol_count;
+ int opt_count;
+};
+
+static int
+_build_option_key (dict_t *d, char *k, data_t *v, void *tmp)
+{
+ char reconfig_key[256] = {0, };
+ struct args_pack *pack = NULL;
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ pack = tmp;
+ if (strcmp (k, GLUSTERD_GLOBAL_OPT_VERSION) == 0)
+ return 0;
+
+ if (priv->op_version > GD_OP_VERSION_MIN) {
+ if ((strcmp (k, "features.limit-usage") == 0) ||
+ (strcmp (k, "features.soft-limit") == 0))
+ return 0;
+ }
+
+ /* snap-max-hard-limit and snap-max-soft-limit are system *
+ * options set and managed by snapshot config option. Hence *
+ * they should not be displayed in gluster volume info. *
+ */
+ if ((strcmp (k, "snap-max-hard-limit") == 0) ||
+ (strcmp (k, "snap-max-soft-limit") == 0))
+ return 0;
+
+ snprintf (reconfig_key, 256, "volume%d.option.%s",
+ pack->vol_count, k);
+ ret = dict_set_str (pack->dict, reconfig_key, v->data);
+ if (0 == ret)
+ pack->opt_count++;
+
+ return 0;
+}
+
+int
+glusterd_add_tier_volume_detail_to_dict (glusterd_volinfo_t *volinfo,
+ dict_t *dict, int count)
+{
+ int ret = -1;
+ char key[256] = {0,};
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (dict);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, 256, "volume%d.cold_type", count);
+ ret = dict_set_int32 (dict, key, volinfo->tier_info.cold_type);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, 256, "volume%d.cold_brick_count", count);
+ ret = dict_set_int32 (dict, key, volinfo->tier_info.cold_brick_count);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, 256, "volume%d.cold_dist_count", count);
+ ret = dict_set_int32 (dict, key,
+ volinfo->tier_info.cold_dist_leaf_count);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, 256, "volume%d.cold_replica_count", count);
+ ret = dict_set_int32 (dict, key,
+ volinfo->tier_info.cold_replica_count);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, 256, "volume%d.cold_arbiter_count", count);
+ ret = dict_set_int32 (dict, key, volinfo->arbiter_count);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, 256, "volume%d.cold_disperse_count", count);
+ ret = dict_set_int32 (dict, key,
+ volinfo->tier_info.cold_disperse_count);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, 256, "volume%d.cold_redundancy_count", count);
+ ret = dict_set_int32 (dict, key,
+ volinfo->tier_info.cold_redundancy_count);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, 256, "volume%d.hot_type", count);
+ ret = dict_set_int32 (dict, key, volinfo->tier_info.hot_type);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, 256, "volume%d.hot_brick_count", count);
+ ret = dict_set_int32 (dict, key, volinfo->tier_info.hot_brick_count);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, 256, "volume%d.hot_replica_count", count);
+ ret = dict_set_int32 (dict, key, volinfo->tier_info.hot_replica_count);
+ if (ret)
+ goto out;
+
+out:
+ return ret;
+
+}
+
+int
+glusterd_add_arbiter_info_to_bricks (glusterd_volinfo_t *volinfo,
+ dict_t *volumes, int count)
+{
+ char key[256] = {0, };
+ int i = 0;
+ int start_index = 0;
+ int ret = 0;
+
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
+ /*TODO: Add info for hot tier once attach tier of arbiter
+ * volumes is supported. */
+
+ /* cold tier */
+ if (volinfo->tier_info.cold_replica_count == 1 ||
+ volinfo->arbiter_count != 1)
+ return 0;
+
+ i = start_index = volinfo->tier_info.hot_brick_count + 1;
+ for (; i <= volinfo->brick_count; i++) {
+ if ((i - start_index + 1) %
+ volinfo->tier_info.cold_replica_count != 0)
+ continue;
+ memset (key, 0, sizeof (key));
+ snprintf (key, 256, "volume%d.brick%d.isArbiter",
+ count, i);
+ ret = dict_set_int32 (volumes, key, 1);
+ if (ret)
+ return ret;
+ }
+ } else {
+ if (volinfo->replica_count == 1 || volinfo->arbiter_count != 1)
+ return 0;
+ for (i = 1; i <= volinfo->brick_count; i++) {
+ if (i % volinfo->replica_count != 0)
+ continue;
+ memset (key, 0, sizeof (key));
+ snprintf (key, 256, "volume%d.brick%d.isArbiter",
+ count, i);
+ ret = dict_set_int32 (volumes, key, 1);
+ if (ret)
+ return ret;
+ }
+ }
+ return 0;
+}
+
+int
+glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo,
+ dict_t *volumes, int count)
+{
+
+ int ret = -1;
+ char key[256] = {0, };
+ glusterd_brickinfo_t *brickinfo = NULL;
+ char *buf = NULL;
+ int i = 1;
+ dict_t *dict = NULL;
+ glusterd_conf_t *priv = NULL;
+ char *volume_id_str = NULL;
+ struct args_pack pack = {0,};
+ xlator_t *this = NULL;
+ GF_UNUSED int caps = 0;
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (volumes);
+
+ this = THIS;
+ priv = this->private;
+
+ GF_ASSERT (priv);
+
+ snprintf (key, 256, "volume%d.name", count);
+ ret = dict_set_str (volumes, key, volinfo->volname);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "volume%d.type", count);
+ ret = dict_set_int32 (volumes, key, volinfo->type);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "volume%d.status", count);
+ ret = dict_set_int32 (volumes, key, volinfo->status);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "volume%d.brick_count", count);
+ ret = dict_set_int32 (volumes, key, volinfo->brick_count);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "volume%d.hot_brick_count", count);
+ ret = dict_set_int32 (volumes, key, volinfo->tier_info.hot_brick_count);
+ if (ret)
+ goto out;
+
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
+ ret = glusterd_add_tier_volume_detail_to_dict (volinfo,
+ volumes, count);
+ if (ret)
+ goto out;
+ }
+
+ snprintf (key, 256, "volume%d.dist_count", count);
+ ret = dict_set_int32 (volumes, key, volinfo->dist_leaf_count);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "volume%d.stripe_count", count);
+ ret = dict_set_int32 (volumes, key, volinfo->stripe_count);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "volume%d.replica_count", count);
+ ret = dict_set_int32 (volumes, key, volinfo->replica_count);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "volume%d.disperse_count", count);
+ ret = dict_set_int32 (volumes, key, volinfo->disperse_count);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "volume%d.redundancy_count", count);
+ ret = dict_set_int32 (volumes, key, volinfo->redundancy_count);
+ if (ret)
+ goto out;
+
+ snprintf (key, sizeof (key), "volume%d.arbiter_count", count);
+ ret = dict_set_int32 (volumes, key, volinfo->arbiter_count);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "volume%d.transport", count);
+ ret = dict_set_int32 (volumes, key, volinfo->transport_type);
+ if (ret)
+ goto out;
+
+ volume_id_str = gf_strdup (uuid_utoa (volinfo->volume_id));
+ if (!volume_id_str)
+ goto out;
+
+ snprintf (key, sizeof (key), "volume%d.volume_id", count);
+ ret = dict_set_dynstr (volumes, key, volume_id_str);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "volume%d.rebalance", count);
+ ret = dict_set_int32 (volumes, key, volinfo->rebal.defrag_cmd);
+ if (ret)
+ goto out;
+
+#ifdef HAVE_BD_XLATOR
+ if (volinfo->caps) {
+ caps = 0;
+ snprintf (key, 256, "volume%d.xlator0", count);
+ buf = GF_MALLOC (256, gf_common_mt_char);
+ if (!buf) {
+ ret = ENOMEM;
+ goto out;
+ }
+ if (volinfo->caps & CAPS_BD)
+ snprintf (buf, 256, "BD");
+ ret = dict_set_dynstr (volumes, key, buf);
+ if (ret) {
+ GF_FREE (buf);
+ goto out;
+ }
+
+ if (volinfo->caps & CAPS_THIN) {
+ snprintf (key, 256, "volume%d.xlator0.caps%d", count,
+ caps++);
+ buf = GF_MALLOC (256, gf_common_mt_char);
+ if (!buf) {
+ ret = ENOMEM;
+ goto out;
+ }
+ snprintf (buf, 256, "thin");
+ ret = dict_set_dynstr (volumes, key, buf);
+ if (ret) {
+ GF_FREE (buf);
+ goto out;
+ }
+ }
+
+ if (volinfo->caps & CAPS_OFFLOAD_COPY) {
+ snprintf (key, 256, "volume%d.xlator0.caps%d", count,
+ caps++);
+ buf = GF_MALLOC (256, gf_common_mt_char);
+ if (!buf) {
+ ret = ENOMEM;
+ goto out;
+ }
+ snprintf (buf, 256, "offload_copy");
+ ret = dict_set_dynstr (volumes, key, buf);
+ if (ret) {
+ GF_FREE (buf);
+ goto out;
+ }
+ }
+
+ if (volinfo->caps & CAPS_OFFLOAD_SNAPSHOT) {
+ snprintf (key, 256, "volume%d.xlator0.caps%d", count,
+ caps++);
+ buf = GF_MALLOC (256, gf_common_mt_char);
+ if (!buf) {
+ ret = ENOMEM;
+ goto out;
+ }
+ snprintf (buf, 256, "offload_snapshot");
+ ret = dict_set_dynstr (volumes, key, buf);
+ if (ret) {
+ GF_FREE (buf);
+ goto out;
+ }
+ }
+
+ if (volinfo->caps & CAPS_OFFLOAD_ZERO) {
+ snprintf (key, 256, "volume%d.xlator0.caps%d", count,
+ caps++);
+ buf = GF_MALLOC (256, gf_common_mt_char);
+ if (!buf) {
+ ret = ENOMEM;
+ goto out;
+ }
+ snprintf (buf, 256, "offload_zerofill");
+ ret = dict_set_dynstr (volumes, key, buf);
+ if (ret) {
+ GF_FREE (buf);
+ goto out;
+ }
+ }
+
+ }
+#endif
+
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ char brick[1024] = {0,};
+ char brick_uuid[64] = {0,};
+ snprintf (key, 256, "volume%d.brick%d", count, i);
+ snprintf (brick, 1024, "%s:%s", brickinfo->hostname,
+ brickinfo->path);
+ buf = gf_strdup (brick);
+ ret = dict_set_dynstr (volumes, key, buf);
+ if (ret)
+ goto out;
+ snprintf (key, 256, "volume%d.brick%d.uuid", count, i);
+ snprintf (brick_uuid, 64, "%s", uuid_utoa (brickinfo->uuid));
+ buf = gf_strdup (brick_uuid);
+ if (!buf)
+ goto out;
+ ret = dict_set_dynstr (volumes, key, buf);
+ if (ret)
+ goto out;
+
+#ifdef HAVE_BD_XLATOR
+ if (volinfo->caps & CAPS_BD) {
+ snprintf (key, 256, "volume%d.vg%d", count, i);
+ snprintf (brick, 1024, "%s", brickinfo->vg);
+ buf = gf_strdup (brick);
+ ret = dict_set_dynstr (volumes, key, buf);
+ if (ret)
+ goto out;
+ }
+#endif
+ i++;
+ }
+ ret = glusterd_add_arbiter_info_to_bricks (volinfo, volumes, count);
+ if (ret)
+ goto out;
+
+ dict = volinfo->dict;
+ if (!dict) {
+ ret = 0;
+ goto out;
+ }
+
+ pack.dict = volumes;
+ pack.vol_count = count;
+ pack.opt_count = 0;
+ dict_foreach (dict, _build_option_key, (void *) &pack);
+ dict_foreach (priv->opts, _build_option_key, &pack);
+
+ snprintf (key, 256, "volume%d.opt_count", pack.vol_count);
+ ret = dict_set_int32 (volumes, key, pack.opt_count);
+out:
+ return ret;
+}
+
+int32_t
+glusterd_op_txn_begin (rpcsvc_request_t *req, glusterd_op_t op, void *ctx,
+ char *err_str, size_t err_len)
+{
+ int32_t ret = -1;
+ int npeers = 0;
+ dict_t *dict = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ int32_t locked = 0;
+ char *tmp = NULL;
+ char *volname = NULL;
+ uuid_t *txn_id = NULL;
+ glusterd_op_info_t txn_op_info = {{0},};
+ glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE;
+ uint32_t op_errno = 0;
+
+ GF_ASSERT (req);
+ GF_ASSERT ((op > GD_OP_NONE) && (op < GD_OP_MAX));
+ GF_ASSERT (NULL != ctx);
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ dict = ctx;
+
+ /* Generate a transaction-id for this operation and
+ * save it in the dict. This transaction id distinguishes
+ * each transaction, and helps separate opinfos in the
+ * op state machine. */
+ ret = glusterd_generate_txn_id (dict, &txn_id);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_IDGEN_FAIL,
+ "Failed to generate transaction id");
+ goto out;
+ }
+
+ /* Save the MY_UUID as the originator_uuid. This originator_uuid
+ * will be used by is_origin_glusterd() to determine if a node
+ * is the originator node for a command. */
+ ret = glusterd_set_originator_uuid (dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_UUID_SET_FAIL,
+ "Failed to set originator_uuid.");
+ goto out;
+ }
+
+ /* Based on the op_version, acquire a cluster or mgmt_v3 lock */
+ if (priv->op_version < GD_OP_VERSION_3_6_0) {
+ ret = glusterd_lock (MY_UUID);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GLUSTERD_LOCK_FAIL,
+ "Unable to acquire lock on localhost, ret: %d",
+ ret);
+ snprintf (err_str, err_len,
+ "Another transaction is in progress. "
+ "Please try again after sometime.");
+ goto out;
+ }
+ } else {
+ /* If no volname is given as a part of the command, locks will
+ * not be held */
+ ret = dict_get_str (dict, "volname", &tmp);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_INFO, errno,
+ GD_MSG_DICT_GET_FAILED,
+ "No Volume name present. "
+ "Locks not being held.");
+ goto local_locking_done;
+ } else {
+ /* Use a copy of volname, as cli response will be
+ * sent before the unlock, and the volname in the
+ * dict, might be removed */
+ volname = gf_strdup (tmp);
+ if (!volname)
+ goto out;
+ }
+
+ ret = glusterd_mgmt_v3_lock (volname, MY_UUID, &op_errno,
+ "vol");
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_LOCK_GET_FAIL,
+ "Unable to acquire lock for %s", volname);
+ snprintf (err_str, err_len,
+ "Another transaction is in progress for %s. "
+ "Please try again after sometime.", volname);
+ goto out;
+ }
+ }
+
+ locked = 1;
+ gf_msg_debug (this->name, 0, "Acquired lock on localhost");
+
+local_locking_done:
+ /* If no volname is given as a part of the command, locks will
+ * not be held, hence sending stage event. */
+ if (volname || (priv->op_version < GD_OP_VERSION_3_6_0))
+ event_type = GD_OP_EVENT_START_LOCK;
+ else {
+ txn_op_info.state.state = GD_OP_STATE_LOCK_SENT;
+ event_type = GD_OP_EVENT_ALL_ACC;
+ }
+
+ /* Save opinfo for this transaction with the transaction id */
+ glusterd_txn_opinfo_init (&txn_op_info, NULL, &op, ctx, req);
+
+ ret = glusterd_set_txn_opinfo (txn_id, &txn_op_info);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set transaction's opinfo");
+ if (ctx)
+ dict_unref (ctx);
+ goto out;
+ }
+
+ ret = glusterd_op_sm_inject_event (event_type, txn_id, ctx);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_EVENT_INJECT_FAIL, "Failed to acquire cluster"
+ " lock.");
+ goto out;
+ }
+
+out:
+ if (locked && ret) {
+ /* Based on the op-version, we release the
+ * cluster or mgmt_v3 lock */
+ if (priv->op_version < GD_OP_VERSION_3_6_0)
+ glusterd_unlock (MY_UUID);
+ else {
+ ret = glusterd_mgmt_v3_unlock (volname, MY_UUID,
+ "vol");
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_UNLOCK_FAIL,
+ "Unable to release lock for %s",
+ volname);
+ ret = -1;
+ }
+ }
+
+ if (volname)
+ GF_FREE (volname);
+
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+__glusterd_handle_cluster_lock (rpcsvc_request_t *req)
+{
+ dict_t *op_ctx = NULL;
+ int32_t ret = -1;
+ gd1_mgmt_cluster_lock_req lock_req = {{0},};
+ glusterd_op_lock_ctx_t *ctx = NULL;
+ glusterd_op_sm_event_type_t op = GD_OP_EVENT_LOCK;
+ glusterd_op_info_t txn_op_info = {{0},};
+ glusterd_conf_t *priv = NULL;
+ uuid_t *txn_id = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (req);
+
+ txn_id = &priv->global_txn_id;
+
+ ret = xdr_to_generic (req->msg[0], &lock_req,
+ (xdrproc_t)xdr_gd1_mgmt_cluster_lock_req);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL, "Failed to decode lock "
+ "request received from peer");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ gf_msg_debug (this->name, 0, "Received LOCK from uuid: %s",
+ uuid_utoa (lock_req.uuid));
+
+ rcu_read_lock ();
+ ret = (glusterd_peerinfo_find_by_uuid (lock_req.uuid) == NULL);
+ rcu_read_unlock ();
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_PEER_NOT_FOUND, "%s doesn't "
+ "belong to the cluster. Ignoring request.",
+ uuid_utoa (lock_req.uuid));
+ ret = -1;
+ goto out;
+ }
+
+ ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_op_lock_ctx_t);
+
+ if (!ctx) {
+ //respond here
+ return -1;
+ }
+
+ gf_uuid_copy (ctx->uuid, lock_req.uuid);
+ ctx->req = req;
+ ctx->dict = NULL;
+
+ op_ctx = dict_new ();
+ if (!op_ctx) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ GD_MSG_DICT_CREATE_FAIL,
+ "Unable to set new dict");
+ goto out;
+ }
+
+ glusterd_txn_opinfo_init (&txn_op_info, NULL, &op, op_ctx, req);
+
+ ret = glusterd_set_txn_opinfo (txn_id, &txn_op_info);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set transaction's opinfo");
+ dict_unref (txn_op_info.op_ctx);
+ goto out;
+ }
+
+ ret = glusterd_op_sm_inject_event (GD_OP_EVENT_LOCK, txn_id, ctx);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_EVENT_INJECT_FAIL,
+ "Failed to inject event GD_OP_EVENT_LOCK");
+
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+
+ return ret;
+}
+
+int
+glusterd_handle_cluster_lock (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_cluster_lock);
+}
+
+static int
+glusterd_req_ctx_create (rpcsvc_request_t *rpc_req,
+ int op, uuid_t uuid,
+ char *buf_val, size_t buf_len,
+ gf_gld_mem_types_t mem_type,
+ glusterd_req_ctx_t **req_ctx_out)
+{
+ int ret = -1;
+ char str[50] = {0,};
+ glusterd_req_ctx_t *req_ctx = NULL;
+ dict_t *dict = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ gf_uuid_unparse (uuid, str);
+ gf_msg_debug (this->name, 0, "Received op from uuid %s", str);
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ req_ctx = GF_CALLOC (1, sizeof (*req_ctx), mem_type);
+ if (!req_ctx) {
+ goto out;
+ }
+
+ gf_uuid_copy (req_ctx->uuid, uuid);
+ req_ctx->op = op;
+ ret = dict_unserialize (buf_val, buf_len, &dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to unserialize the dictionary");
+ goto out;
+ }
+
+ req_ctx->dict = dict;
+ req_ctx->req = rpc_req;
+ *req_ctx_out = req_ctx;
+ ret = 0;
+out:
+ if (ret) {
+ if (dict)
+ dict_unref (dict);
+ GF_FREE (req_ctx);
+ }
+ return ret;
+}
+
+int
+__glusterd_handle_stage_op (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ glusterd_req_ctx_t *req_ctx = NULL;
+ gd1_mgmt_stage_op_req op_req = {{0},};
+ xlator_t *this = NULL;
+ uuid_t *txn_id = NULL;
+ glusterd_op_info_t txn_op_info = {{0},};
+ glusterd_op_sm_state_info_t state = {0,};
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (req);
+
+ txn_id = &priv->global_txn_id;
+
+ ret = xdr_to_generic (req->msg[0], &op_req,
+ (xdrproc_t)xdr_gd1_mgmt_stage_op_req);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL, "Failed to decode stage "
+ "request received from peer");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ ret = glusterd_req_ctx_create (req, op_req.op, op_req.uuid,
+ op_req.buf.buf_val, op_req.buf.buf_len,
+ gf_gld_mt_op_stage_ctx_t, &req_ctx);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REQ_CTX_CREATE_FAIL, "Failed to create req_ctx");
+ goto out;
+ }
+
+ ret = dict_get_bin (req_ctx->dict, "transaction_id", (void **)&txn_id);
+ gf_msg_debug (this->name, 0, "transaction ID = %s",
+ uuid_utoa (*txn_id));
+
+ rcu_read_lock ();
+ ret = (glusterd_peerinfo_find_by_uuid (op_req.uuid) == NULL);
+ rcu_read_unlock ();
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_PEER_NOT_FOUND, "%s doesn't "
+ "belong to the cluster. Ignoring request.",
+ uuid_utoa (op_req.uuid));
+ ret = -1;
+ goto out;
+ }
+
+ /* In cases where there is no volname, the receivers won't have a
+ * transaction opinfo created, as for those operations, the locking
+ * phase where the transaction opinfos are created, won't be called. */
+ ret = glusterd_get_txn_opinfo (txn_id, &txn_op_info);
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "No transaction's opinfo set");
+
+ state.state = GD_OP_STATE_LOCKED;
+ glusterd_txn_opinfo_init (&txn_op_info, &state, &op_req.op,
+ req_ctx->dict, req);
+
+ ret = glusterd_set_txn_opinfo (txn_id, &txn_op_info);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set transaction's opinfo");
+ dict_unref (req_ctx->dict);
+ goto out;
+ }
+ }
+
+ ret = glusterd_op_sm_inject_event (GD_OP_EVENT_STAGE_OP,
+ txn_id, req_ctx);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_EVENT_INJECT_FAIL,
+ "Failed to inject event GD_OP_EVENT_STAGE_OP");
+
+ out:
+ free (op_req.buf.buf_val);//malloced by xdr
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+ return ret;
+}
+
+int
+glusterd_handle_stage_op (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_stage_op);
+}
+
+
+int
+__glusterd_handle_commit_op (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ glusterd_req_ctx_t *req_ctx = NULL;
+ gd1_mgmt_commit_op_req op_req = {{0},};
+ xlator_t *this = NULL;
+ uuid_t *txn_id = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (req);
+
+ txn_id = &priv->global_txn_id;
+
+ ret = xdr_to_generic (req->msg[0], &op_req,
+ (xdrproc_t)xdr_gd1_mgmt_commit_op_req);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL, "Failed to decode commit "
+ "request received from peer");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ rcu_read_lock ();
+ ret = (glusterd_peerinfo_find_by_uuid (op_req.uuid) == NULL);
+ rcu_read_unlock ();
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_PEER_NOT_FOUND, "%s doesn't "
+ "belong to the cluster. Ignoring request.",
+ uuid_utoa (op_req.uuid));
+ ret = -1;
+ goto out;
+ }
+
+ //the structures should always be equal
+ GF_ASSERT (sizeof (gd1_mgmt_commit_op_req) == sizeof (gd1_mgmt_stage_op_req));
+ ret = glusterd_req_ctx_create (req, op_req.op, op_req.uuid,
+ op_req.buf.buf_val, op_req.buf.buf_len,
+ gf_gld_mt_op_commit_ctx_t, &req_ctx);
+ if (ret)
+ goto out;
+
+ ret = dict_get_bin (req_ctx->dict, "transaction_id", (void **)&txn_id);
+ gf_msg_debug (this->name, 0, "transaction ID = %s",
+ uuid_utoa (*txn_id));
+
+ ret = glusterd_op_sm_inject_event (GD_OP_EVENT_COMMIT_OP,
+ txn_id, req_ctx);
+
+out:
+ free (op_req.buf.buf_val);//malloced by xdr
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+ return ret;
+}
+
+int
+glusterd_handle_commit_op (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_commit_op);
+}
+
+int
+__glusterd_handle_cli_probe (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,},};
+ glusterd_peerinfo_t *peerinfo = NULL;
+ gf_boolean_t run_fsm = _gf_true;
+ xlator_t *this = NULL;
+ char *bind_name = NULL;
+ dict_t *dict = NULL;
+ char *hostname = NULL;
+ int port = 0;
+ int op_errno = 0;
+
+ GF_ASSERT (req);
+ this = THIS;
+
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL, "xdr decoding error");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len, &dict);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL, "Failed to "
+ "unserialize req-buffer to dictionary");
+ goto out;
+ }
+ }
+
+ ret = dict_get_str (dict, "hostname", &hostname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_HOSTNAME_NOTFOUND_IN_DICT,
+ "Failed to get hostname");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "port", &port);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_PORT_NOTFOUND_IN_DICT, "Failed to get port");
+ goto out;
+ }
+
+ if (glusterd_is_any_volume_in_server_quorum (this) &&
+ !does_gd_meet_server_quorum (this)) {
+ glusterd_xfer_cli_probe_resp (req, -1, GF_PROBE_QUORUM_NOT_MET,
+ NULL, hostname, port, dict);
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_SERVER_QUORUM_NOT_MET,
+ "Server quorum not met. Rejecting operation.");
+ ret = 0;
+ goto out;
+ }
+
+ gf_msg ("glusterd", GF_LOG_INFO, 0,
+ GD_MSG_CLI_REQ_RECVD,
+ "Received CLI probe req %s %d",
+ hostname, port);
+
+ if (dict_get_str(this->options,"transport.socket.bind-address",
+ &bind_name) == 0) {
+ gf_msg_debug ("glusterd", 0,
+ "only checking probe address vs. bind address");
+ ret = gf_is_same_address (bind_name, hostname);
+ }
+ else {
+ ret = gf_is_local_addr (hostname);
+ }
+ if (ret) {
+ glusterd_xfer_cli_probe_resp (req, 0, GF_PROBE_LOCALHOST,
+ NULL, hostname, port, dict);
+ ret = 0;
+ goto out;
+ }
+
+ rcu_read_lock ();
+
+ peerinfo = glusterd_peerinfo_find_by_hostname (hostname);
+ ret = (peerinfo && gd_peer_has_address (peerinfo, hostname));
+
+ rcu_read_unlock ();
+
+ if (ret) {
+ gf_msg_debug ("glusterd", 0, "Probe host %s port %d "
+ "already a peer", hostname, port);
+ glusterd_xfer_cli_probe_resp (req, 0, GF_PROBE_FRIEND, NULL,
+ hostname, port, dict);
+ ret = 0;
+ goto out;
+ }
+
+ ret = glusterd_probe_begin (req, hostname, port, dict, &op_errno);
+
+ if (ret == GLUSTERD_CONNECTION_AWAITED) {
+ //fsm should be run after connection establishes
+ run_fsm = _gf_false;
+ ret = 0;
+
+ } else if (ret == -1) {
+ glusterd_xfer_cli_probe_resp (req, -1, op_errno,
+ NULL, hostname, port, dict);
+ goto out;
+ }
+
+out:
+ free (cli_req.dict.dict_val);
+
+ if (run_fsm) {
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+ }
+
+ return ret;
+}
+
+int
+glusterd_handle_cli_probe (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_cli_probe);
+}
+
+int
+__glusterd_handle_cli_deprobe (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,},};
+ uuid_t uuid = {0};
+ int op_errno = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ dict_t *dict = NULL;
+ char *hostname = NULL;
+ int port = 0;
+ int flags = 0;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_volinfo_t *tmp = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (req);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req,
+ (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL, "Failed to decode "
+ "request received from cli");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len, &dict);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL, "Failed to "
+ "unserialize req-buffer to dictionary");
+ goto out;
+ }
+ }
+
+ gf_msg ("glusterd", GF_LOG_INFO, 0,
+ GD_MSG_CLI_REQ_RECVD,
+ "Received CLI deprobe req");
+
+ ret = dict_get_str (dict, "hostname", &hostname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_HOSTNAME_NOTFOUND_IN_DICT,
+ "Failed to get hostname");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "port", &port);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_PORT_NOTFOUND_IN_DICT, "Failed to get port");
+ goto out;
+ }
+ ret = dict_get_int32 (dict, "flags", &flags);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_FLAGS_NOTFOUND_IN_DICT, "Failed to get flags");
+ goto out;
+ }
+
+ ret = glusterd_hostname_to_uuid (hostname, uuid);
+ if (ret) {
+ op_errno = GF_DEPROBE_NOT_FRIEND;
+ goto out;
+ }
+
+ if (!gf_uuid_compare (uuid, MY_UUID)) {
+ op_errno = GF_DEPROBE_LOCALHOST;
+ ret = -1;
+ goto out;
+ }
+
+ if (!(flags & GF_CLI_FLAG_OP_FORCE)) {
+ /* Check if peers are connected, except peer being
+ * detached*/
+ if (!glusterd_chk_peers_connected_befriended (uuid)) {
+ ret = -1;
+ op_errno = GF_DEPROBE_FRIEND_DOWN;
+ goto out;
+ }
+ }
+
+ /* Check for if volumes exist with some bricks on the peer being
+ * detached. It's not a problem if a volume contains none or all
+ * of its bricks on the peer being detached
+ */
+ cds_list_for_each_entry_safe (volinfo, tmp, &priv->volumes,
+ vol_list) {
+ ret = glusterd_friend_contains_vol_bricks (volinfo,
+ uuid);
+ if (ret == 1) {
+ op_errno = GF_DEPROBE_BRICK_EXIST;
+ goto out;
+ }
+ }
+
+ if (!(flags & GF_CLI_FLAG_OP_FORCE)) {
+ if (glusterd_is_any_volume_in_server_quorum (this) &&
+ !does_gd_meet_server_quorum (this)) {
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_SERVER_QUORUM_NOT_MET,
+ "Server quorum not met. Rejecting operation.");
+ ret = -1;
+ op_errno = GF_DEPROBE_QUORUM_NOT_MET;
+ goto out;
+ }
+ }
+
+ if (!gf_uuid_is_null (uuid)) {
+ ret = glusterd_deprobe_begin (req, hostname, port, uuid, dict,
+ &op_errno);
+ } else {
+ ret = glusterd_deprobe_begin (req, hostname, port, NULL, dict,
+ &op_errno);
+ }
+
+out:
+ free (cli_req.dict.dict_val);
+
+ if (ret) {
+ ret = glusterd_xfer_cli_deprobe_resp (req, ret, op_errno, NULL,
+ hostname, dict);
+ }
+
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+
+ return ret;
+}
+
+int
+glusterd_handle_cli_deprobe (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_cli_deprobe);
+}
+
+int
+__glusterd_handle_cli_list_friends (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf1_cli_peer_list_req cli_req = {0,};
+ dict_t *dict = NULL;
+
+ GF_ASSERT (req);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req,
+ (xdrproc_t)xdr_gf1_cli_peer_list_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL, "Failed to decode "
+ "request received from cli");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ gf_msg ("glusterd", GF_LOG_INFO, 0,
+ GD_MSG_CLI_REQ_RECVD,
+ "Received cli list req");
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
+ }
+ }
+
+ ret = glusterd_list_friends (req, dict, cli_req.flags);
+
+out:
+ if (dict)
+ dict_unref (dict);
+
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+
+ return ret;
+}
+
+int
+glusterd_handle_cli_list_friends (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_cli_list_friends);
+}
+
+static int
+__glusterd_handle_cli_get_volume (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,}};
+ int32_t flags = 0;
+ dict_t *dict = NULL;
+ xlator_t *this = NULL;
+
+ GF_ASSERT (req);
+ this = THIS;
+
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL, "Failed to decode "
+ "request received from cli");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_GET_VOL_REQ_RCVD,
+ "Received get vol req");
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
+ }
+ }
+
+ ret = dict_get_int32 (dict, "flags", &flags);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_FLAGS_NOTFOUND_IN_DICT, "failed to get flags");
+ goto out;
+ }
+ ret = glusterd_get_volumes (req, dict, flags);
+
+out:
+ if (dict)
+ dict_unref (dict);
+
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+
+ return ret;
+}
+
+int
+glusterd_handle_cli_get_volume (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_cli_get_volume);
+}
+
+int
+__glusterd_handle_cli_uuid_reset (rpcsvc_request_t *req)
+{
+ int ret = -1;
+ dict_t *dict = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ uuid_t uuid = {0};
+ gf_cli_rsp rsp = {0,};
+ gf_cli_req cli_req = {{0,}};
+ char msg_str[2048] = {0,};
+
+ GF_ASSERT (req);
+
+ this = THIS;
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL, "Failed to decode "
+ "request received from cli");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ gf_msg_debug ("glusterd", 0, "Received uuid reset req");
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (msg_str, sizeof (msg_str), "Unable to decode "
+ "the buffer");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
+ }
+ }
+
+ /* In the above section if dict_unserialize is successful, ret is set
+ * to zero.
+ */
+ ret = -1;
+ // Do not allow peer reset if there are any volumes in the cluster
+ if (!cds_list_empty (&priv->volumes)) {
+ snprintf (msg_str, sizeof (msg_str), "volumes are already "
+ "present in the cluster. Resetting uuid is not "
+ "allowed");
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_VOLS_ALREADY_PRESENT, "%s", msg_str);
+ goto out;
+ }
+
+ // Do not allow peer reset if trusted storage pool is already formed
+ if (!cds_list_empty (&priv->peers)) {
+ snprintf (msg_str, sizeof (msg_str),"trusted storage pool "
+ "has been already formed. Please detach this peer "
+ "from the pool and reset its uuid.");
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_TSP_ALREADY_FORMED, "%s", msg_str);
+ goto out;
+ }
+
+ gf_uuid_copy (uuid, priv->uuid);
+ ret = glusterd_uuid_generate_save ();
+
+ if (!gf_uuid_compare (uuid, MY_UUID)) {
+ snprintf (msg_str, sizeof (msg_str), "old uuid and the new uuid"
+ " are same. Try gluster peer reset again");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_UUIDS_SAME_RETRY, "%s", msg_str);
+ ret = -1;
+ goto out;
+ }
+
+out:
+ if (ret) {
+ rsp.op_ret = -1;
+ if (msg_str[0] == '\0')
+ snprintf (msg_str, sizeof (msg_str), "Operation "
+ "failed");
+ rsp.op_errstr = msg_str;
+ ret = 0;
+ } else {
+ rsp.op_errstr = "";
+ }
+
+ glusterd_to_cli (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_cli_rsp, dict);
+
+ return ret;
+}
+
+int
+glusterd_handle_cli_uuid_reset (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_cli_uuid_reset);
+}
+
+int
+__glusterd_handle_cli_uuid_get (rpcsvc_request_t *req)
+{
+ int ret = -1;
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ gf_cli_rsp rsp = {0,};
+ gf_cli_req cli_req = {{0,}};
+ char msg_str[2048] = {0,};
+ char uuid_str[64] = {0,};
+
+ GF_ASSERT (req);
+
+ this = THIS;
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL, "Failed to decode "
+ "request received from cli");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ gf_msg_debug ("glusterd", 0, "Received uuid get req");
+
+ if (cli_req.dict.dict_len) {
+ dict = dict_new ();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (msg_str, sizeof (msg_str), "Unable to decode "
+ "the buffer");
+ goto out;
+
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
+
+ }
+ }
+
+ rsp_dict = dict_new ();
+ if (!rsp_dict) {
+ ret = -1;
+ goto out;
+ }
+
+ uuid_utoa_r (MY_UUID, uuid_str);
+ ret = dict_set_str (rsp_dict, "uuid", uuid_str);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set uuid in "
+ "dictionary.");
+ goto out;
+ }
+
+ ret = dict_allocate_and_serialize (rsp_dict, &rsp.dict.dict_val,
+ &rsp.dict.dict_len);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
+ "Failed to serialize "
+ "dictionary.");
+ goto out;
+ }
+ ret = 0;
+out:
+ if (ret) {
+ rsp.op_ret = -1;
+ if (msg_str[0] == '\0')
+ snprintf (msg_str, sizeof (msg_str), "Operation "
+ "failed");
+ rsp.op_errstr = msg_str;
+
+ } else {
+ rsp.op_errstr = "";
+
+ }
+
+ glusterd_to_cli (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_cli_rsp, dict);
+
+ return 0;
+}
+int
+glusterd_handle_cli_uuid_get (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_cli_uuid_get);
+}
+
+int
+__glusterd_handle_cli_list_volume (rpcsvc_request_t *req)
+{
+ int ret = -1;
+ dict_t *dict = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ int count = 0;
+ char key[1024] = {0,};
+ gf_cli_rsp rsp = {0,};
+
+ GF_ASSERT (req);
+
+ priv = THIS->private;
+ GF_ASSERT (priv);
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ cds_list_for_each_entry (volinfo, &priv->volumes, vol_list) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d", count);
+ ret = dict_set_str (dict, key, volinfo->volname);
+ if (ret)
+ goto out;
+ count++;
+ }
+
+ ret = dict_set_int32 (dict, "count", count);
+ if (ret)
+ goto out;
+
+ ret = dict_allocate_and_serialize (dict, &rsp.dict.dict_val,
+ &rsp.dict.dict_len);
+ if (ret)
+ goto out;
+
+ ret = 0;
+
+out:
+ rsp.op_ret = ret;
+ if (ret)
+ rsp.op_errstr = "Error listing volumes";
+ else
+ rsp.op_errstr = "";
+
+ glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_cli_rsp);
+ ret = 0;
+
+ if (dict)
+ dict_unref (dict);
+
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+
+ return ret;
+}
+
+int
+glusterd_handle_cli_list_volume (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_cli_list_volume);
+}
+
+int32_t
+glusterd_op_begin (rpcsvc_request_t *req, glusterd_op_t op, void *ctx,
+ char *err_str, size_t err_len)
+{
+ int ret = -1;
+
+ ret = glusterd_op_txn_begin (req, op, ctx, err_str, err_len);
+
+ return ret;
+}
+
+int
+__glusterd_handle_ganesha_cmd (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = { {0,} } ;
+ dict_t *dict = NULL;
+ glusterd_op_t cli_op = GD_OP_GANESHA;
+ char *volname = NULL;
+ char *op_errstr = NULL;
+ gf_boolean_t help = _gf_false;
+ char err_str[2048] = {0,};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (req);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ snprintf (err_str, sizeof (err_str), "Failed to decode "
+ "request received from cli");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL, "%s", err_str);
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (err_str, sizeof (err_str), "Unable to decode "
+ "the command");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
+ }
+ }
+
+ gf_msg_trace (this->name, 0, "Received global option request");
+
+ ret = glusterd_op_begin_synctask (req, GD_OP_GANESHA, dict);
+out:
+ if (ret) {
+ if (err_str[0] == '\0')
+ snprintf (err_str, sizeof (err_str),
+ "Operation failed");
+ ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
+ dict, err_str);
+ }
+ if (op_errstr)
+ GF_FREE (op_errstr);
+ if (dict)
+ dict_unref(dict);
+
+ return ret;
+}
+
+
+int
+glusterd_handle_ganesha_cmd (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_ganesha_cmd);
+}
+
+static int
+__glusterd_handle_reset_volume (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,}};
+ dict_t *dict = NULL;
+ glusterd_op_t cli_op = GD_OP_RESET_VOLUME;
+ char *volname = NULL;
+ char err_str[2048] = {0,};
+ xlator_t *this = NULL;
+
+ GF_ASSERT (req);
+ this = THIS;
+ GF_ASSERT (this);
+
+ gf_msg (this->name, GF_LOG_INFO, 0, 0,
+ "Received reset vol req");
+
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ snprintf (err_str, sizeof (err_str), "Failed to decode request "
+ "received from cli");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL, "%s", err_str);
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL, "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (err_str, sizeof (err_str), "Unable to decode "
+ "the command");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
+ }
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Failed to get volume "
+ "name");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLNAME_NOTFOUND_IN_DICT, "%s", err_str);
+ goto out;
+ }
+ gf_msg_debug (this->name, 0, "Received volume reset request for "
+ "volume %s", volname);
+
+ ret = glusterd_op_begin_synctask (req, GD_OP_RESET_VOLUME, dict);
+
+out:
+ if (ret) {
+ if (err_str[0] == '\0')
+ snprintf (err_str, sizeof (err_str),
+ "Operation failed");
+ ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
+ dict, err_str);
+ }
+
+ return ret;
+}
+
+int
+glusterd_handle_reset_volume (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_reset_volume);
+}
+
+int
+__glusterd_handle_set_volume (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,}};
+ dict_t *dict = NULL;
+ glusterd_op_t cli_op = GD_OP_SET_VOLUME;
+ char *key = NULL;
+ char *value = NULL;
+ char *volname = NULL;
+ char *op_errstr = NULL;
+ gf_boolean_t help = _gf_false;
+ char err_str[2048] = {0,};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (req);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ snprintf (err_str, sizeof (err_str), "Failed to decode "
+ "request received from cli");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL, "%s", err_str);
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (err_str, sizeof (err_str), "Unable to decode "
+ "the command");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
+ }
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Failed to get volume "
+ "name while handling volume set command");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "%s", err_str);
+ goto out;
+ }
+
+ if (strcmp (volname, "help") == 0 ||
+ strcmp (volname, "help-xml") == 0) {
+ ret = glusterd_volset_help (dict, &op_errstr);
+ help = _gf_true;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "key1", &key);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Failed to get key while"
+ " handling volume set for %s", volname);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "%s", err_str);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "value1", &value);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Failed to get value while"
+ " handling volume set for %s", volname);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "%s", err_str);
+ goto out;
+ }
+ gf_msg_debug (this->name, 0, "Received volume set request for "
+ "volume %s", volname);
+
+ ret = glusterd_op_begin_synctask (req, GD_OP_SET_VOLUME, dict);
+
+out:
+ if (help)
+ ret = glusterd_op_send_cli_response (cli_op, ret, 0, req, dict,
+ (op_errstr)? op_errstr:"");
+ else if (ret) {
+ if (err_str[0] == '\0')
+ snprintf (err_str, sizeof (err_str),
+ "Operation failed");
+ ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
+ dict, err_str);
+ }
+ if (op_errstr)
+ GF_FREE (op_errstr);
+
+ return ret;
+}
+
+int
+glusterd_handle_set_volume (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_set_volume);
+}
+
+int
+__glusterd_handle_sync_volume (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,}};
+ dict_t *dict = NULL;
+ gf_cli_rsp cli_rsp = {0.};
+ char msg[2048] = {0,};
+ char *volname = NULL;
+ gf1_cli_sync_volume flags = 0;
+ char *hostname = NULL;
+ xlator_t *this = NULL;
+
+ GF_ASSERT (req);
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL, "%s", "Failed to decode "
+ "request received from cli");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (msg, sizeof (msg), "Unable to decode the "
+ "command");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
+ }
+ }
+
+ ret = dict_get_str (dict, "hostname", &hostname);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Failed to get hostname");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_HOSTNAME_NOTFOUND_IN_DICT, "%s", msg);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ ret = dict_get_int32 (dict, "flags", (int32_t*)&flags);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Failed to get volume name"
+ " or flags");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_FLAGS_NOTFOUND_IN_DICT, "%s", msg);
+ goto out;
+ }
+ }
+
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_VOL_SYNC_REQ_RCVD, "Received volume sync req "
+ "for volume %s", (flags & GF_CLI_SYNC_ALL) ? "all" : volname);
+
+ if (gf_is_local_addr (hostname)) {
+ ret = -1;
+ snprintf (msg, sizeof (msg), "sync from localhost"
+ " not allowed");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SYNC_FROM_LOCALHOST_UNALLOWED, "%s", msg);
+ goto out;
+ }
+
+ ret = glusterd_op_begin_synctask (req, GD_OP_SYNC_VOLUME, dict);
+
+out:
+ if (ret) {
+ cli_rsp.op_ret = -1;
+ cli_rsp.op_errstr = msg;
+ if (msg[0] == '\0')
+ snprintf (msg, sizeof (msg), "Operation failed");
+ glusterd_to_cli (req, &cli_rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_cli_rsp, dict);
+
+ ret = 0; //sent error to cli, prevent second reply
+ }
+
+ return ret;
+}
+
+int
+glusterd_handle_sync_volume (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_sync_volume);
+}
+
+int
+glusterd_fsm_log_send_resp (rpcsvc_request_t *req, int op_ret,
+ char *op_errstr, dict_t *dict)
+{
+
+ int ret = -1;
+ gf1_cli_fsm_log_rsp rsp = {0};
+
+ GF_ASSERT (req);
+ GF_ASSERT (op_errstr);
+
+ rsp.op_ret = op_ret;
+ rsp.op_errstr = op_errstr;
+ if (rsp.op_ret == 0)
+ ret = dict_allocate_and_serialize (dict, &rsp.fsm_log.fsm_log_val,
+ &rsp.fsm_log.fsm_log_len);
+
+ ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf1_cli_fsm_log_rsp);
+ GF_FREE (rsp.fsm_log.fsm_log_val);
+
+ gf_msg_debug ("glusterd", 0, "Responded, ret: %d", ret);
+
+ return 0;
+}
+
+int
+__glusterd_handle_fsm_log (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf1_cli_fsm_log_req cli_req = {0,};
+ dict_t *dict = NULL;
+ glusterd_sm_tr_log_t *log = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ char msg[2048] = {0};
+ glusterd_peerinfo_t *peerinfo = NULL;
+
+ GF_ASSERT (req);
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("xlator", (this != NULL), out);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req,
+ (xdrproc_t)xdr_gf1_cli_fsm_log_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL, "Failed to decode "
+ "request received from client.");
+ req->rpc_err = GARBAGE_ARGS;
+ snprintf (msg, sizeof (msg), "Garbage request");
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ if (strcmp ("", cli_req.name) == 0) {
+ conf = this->private;
+ ret = glusterd_sm_tr_log_add_to_dict (dict, &conf->op_sm_log);
+ } else {
+ rcu_read_lock ();
+
+ peerinfo = glusterd_peerinfo_find_by_hostname (cli_req.name);
+ if (!peerinfo) {
+ ret = -1;
+ snprintf (msg, sizeof (msg), "%s is not a peer",
+ cli_req.name);
+ } else {
+ ret = glusterd_sm_tr_log_add_to_dict
+ (dict, &peerinfo->sm_log);
+ }
+
+ rcu_read_unlock ();
+ }
+
+out:
+ (void)glusterd_fsm_log_send_resp (req, ret, msg, dict);
+ free (cli_req.name);//malloced by xdr
+ if (dict)
+ dict_unref (dict);
+
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+
+ return 0;//send 0 to avoid double reply
+}
+
+int
+glusterd_handle_fsm_log (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_fsm_log);
+}
+
+int
+glusterd_op_lock_send_resp (rpcsvc_request_t *req, int32_t status)
+{
+
+ gd1_mgmt_cluster_lock_rsp rsp = {{0},};
+ int ret = -1;
+
+ GF_ASSERT (req);
+ glusterd_get_uuid (&rsp.uuid);
+ rsp.op_ret = status;
+
+ ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_cluster_lock_rsp);
+
+ gf_msg_debug (THIS->name, 0, "Responded to lock, ret: %d", ret);
+
+ return 0;
+}
+
+int
+glusterd_op_unlock_send_resp (rpcsvc_request_t *req, int32_t status)
+{
+
+ gd1_mgmt_cluster_unlock_rsp rsp = {{0},};
+ int ret = -1;
+
+ GF_ASSERT (req);
+ rsp.op_ret = status;
+ glusterd_get_uuid (&rsp.uuid);
+
+ ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_cluster_unlock_rsp);
+
+ gf_msg_debug (THIS->name, 0, "Responded to unlock, ret: %d", ret);
+
+ return ret;
+}
+
+int
+glusterd_op_mgmt_v3_lock_send_resp (rpcsvc_request_t *req, uuid_t *txn_id,
+ int32_t status)
+{
+
+ gd1_mgmt_v3_lock_rsp rsp = {{0},};
+ int ret = -1;
+
+ GF_ASSERT (req);
+ GF_ASSERT (txn_id);
+ glusterd_get_uuid (&rsp.uuid);
+ rsp.op_ret = status;
+ if (rsp.op_ret)
+ rsp.op_errno = errno;
+ gf_uuid_copy (rsp.txn_id, *txn_id);
+
+ ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp);
+
+ gf_msg_debug (THIS->name, 0, "Responded to mgmt_v3 lock, ret: %d",
+ ret);
+
+ return ret;
+}
+
+int
+glusterd_op_mgmt_v3_unlock_send_resp (rpcsvc_request_t *req, uuid_t *txn_id,
+ int32_t status)
+{
+
+ gd1_mgmt_v3_unlock_rsp rsp = {{0},};
+ int ret = -1;
+
+ GF_ASSERT (req);
+ GF_ASSERT (txn_id);
+ rsp.op_ret = status;
+ if (rsp.op_ret)
+ rsp.op_errno = errno;
+ glusterd_get_uuid (&rsp.uuid);
+ gf_uuid_copy (rsp.txn_id, *txn_id);
+
+ ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp);
+
+ gf_msg_debug (THIS->name, 0, "Responded to mgmt_v3 unlock, ret: %d",
+ ret);
+
+ return ret;
+}
+
+int
+__glusterd_handle_cluster_unlock (rpcsvc_request_t *req)
+{
+ gd1_mgmt_cluster_unlock_req unlock_req = {{0}, };
+ int32_t ret = -1;
+ glusterd_op_lock_ctx_t *ctx = NULL;
+ xlator_t *this = NULL;
+ uuid_t *txn_id = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (req);
+
+ txn_id = &priv->global_txn_id;
+
+ ret = xdr_to_generic (req->msg[0], &unlock_req,
+ (xdrproc_t)xdr_gd1_mgmt_cluster_unlock_req);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL, "Failed to decode unlock "
+ "request received from peer");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+
+ gf_msg_debug (this->name, 0,
+ "Received UNLOCK from uuid: %s", uuid_utoa (unlock_req.uuid));
+
+ rcu_read_lock ();
+ ret = (glusterd_peerinfo_find_by_uuid (unlock_req.uuid) == NULL);
+ rcu_read_unlock ();
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_PEER_NOT_FOUND, "%s doesn't "
+ "belong to the cluster. Ignoring request.",
+ uuid_utoa (unlock_req.uuid));
+ ret = -1;
+ goto out;
+ }
+
+ ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_op_lock_ctx_t);
+
+ if (!ctx) {
+ //respond here
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY, "No memory.");
+ return -1;
+ }
+ gf_uuid_copy (ctx->uuid, unlock_req.uuid);
+ ctx->req = req;
+ ctx->dict = NULL;
+
+ ret = glusterd_op_sm_inject_event (GD_OP_EVENT_UNLOCK, txn_id, ctx);
+
+out:
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+
+ return ret;
+}
+
+int
+glusterd_handle_cluster_unlock (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_cluster_unlock);
+}
+
+int
+glusterd_op_stage_send_resp (rpcsvc_request_t *req,
+ int32_t op, int32_t status,
+ char *op_errstr, dict_t *rsp_dict)
+{
+ gd1_mgmt_stage_op_rsp rsp = {{0},};
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ rsp.op_ret = status;
+ glusterd_get_uuid (&rsp.uuid);
+ rsp.op = op;
+ if (op_errstr)
+ rsp.op_errstr = op_errstr;
+ else
+ rsp.op_errstr = "";
+
+ ret = dict_allocate_and_serialize (rsp_dict, &rsp.dict.dict_val,
+ &rsp.dict.dict_len);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
+ "failed to get serialized length of dict");
+ return ret;
+ }
+
+ ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_stage_op_rsp);
+
+ gf_msg_debug (this->name, 0, "Responded to stage, ret: %d", ret);
+ GF_FREE (rsp.dict.dict_val);
+
+ return ret;
+}
+
+int
+glusterd_op_commit_send_resp (rpcsvc_request_t *req,
+ int32_t op, int32_t status, char *op_errstr,
+ dict_t *rsp_dict)
+{
+ gd1_mgmt_commit_op_rsp rsp = {{0}, };
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+ rsp.op_ret = status;
+ glusterd_get_uuid (&rsp.uuid);
+ rsp.op = op;
+
+ if (op_errstr)
+ rsp.op_errstr = op_errstr;
+ else
+ rsp.op_errstr = "";
+
+ if (rsp_dict) {
+ ret = dict_allocate_and_serialize (rsp_dict, &rsp.dict.dict_val,
+ &rsp.dict.dict_len);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
+ "failed to get serialized length of dict");
+ goto out;
+ }
+ }
+
+
+ ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_commit_op_rsp);
+
+ gf_msg_debug (this->name, 0, "Responded to commit, ret: %d", ret);
+
+out:
+ GF_FREE (rsp.dict.dict_val);
+ return ret;
+}
+
+int
+__glusterd_handle_incoming_friend_req (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gd1_mgmt_friend_req friend_req = {{0},};
+ gf_boolean_t run_fsm = _gf_true;
+
+ GF_ASSERT (req);
+ ret = xdr_to_generic (req->msg[0], &friend_req,
+ (xdrproc_t)xdr_gd1_mgmt_friend_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL, "Failed to decode "
+ "request received from friend");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ gf_msg ("glusterd", GF_LOG_INFO, 0,
+ GD_MSG_PROBE_RCVD,
+ "Received probe from uuid: %s", uuid_utoa (friend_req.uuid));
+ ret = glusterd_handle_friend_req (req, friend_req.uuid,
+ friend_req.hostname, friend_req.port,
+ &friend_req);
+
+ if (ret == GLUSTERD_CONNECTION_AWAITED) {
+ //fsm should be run after connection establishes
+ run_fsm = _gf_false;
+ ret = 0;
+ }
+
+out:
+ free (friend_req.hostname);//malloced by xdr
+
+ if (run_fsm) {
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+ }
+
+ return ret;
+}
+
+int
+glusterd_handle_incoming_friend_req (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_incoming_friend_req);
+}
+
+int
+__glusterd_handle_incoming_unfriend_req (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gd1_mgmt_friend_req friend_req = {{0},};
+ char remote_hostname[UNIX_PATH_MAX + 1] = {0,};
+
+ GF_ASSERT (req);
+ ret = xdr_to_generic (req->msg[0], &friend_req,
+ (xdrproc_t)xdr_gd1_mgmt_friend_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL, "Failed to decode "
+ "request received.");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ gf_msg ("glusterd", GF_LOG_INFO, 0,
+ GD_MSG_UNFRIEND_REQ_RCVD,
+ "Received unfriend from uuid: %s", uuid_utoa (friend_req.uuid));
+
+ ret = glusterd_remote_hostname_get (req, remote_hostname,
+ sizeof (remote_hostname));
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_HOSTNAME_RESOLVE_FAIL,
+ "Unable to get the remote hostname");
+ goto out;
+ }
+ ret = glusterd_handle_unfriend_req (req, friend_req.uuid,
+ remote_hostname, friend_req.port);
+
+out:
+ free (friend_req.hostname);//malloced by xdr
+ free (friend_req.vols.vols_val);//malloced by xdr
+
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+
+ return ret;
+}
+
+int
+glusterd_handle_incoming_unfriend_req (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_incoming_unfriend_req);
+
+}
+
+int
+glusterd_handle_friend_update_delete (dict_t *dict)
+{
+ char *hostname = NULL;
+ int32_t ret = -1;
+
+ GF_ASSERT (dict);
+
+ ret = dict_get_str (dict, "hostname", &hostname);
+ if (ret)
+ goto out;
+
+ ret = glusterd_friend_remove (NULL, hostname);
+
+out:
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_peer_hostname_update (glusterd_peerinfo_t *peerinfo,
+ const char *hostname, gf_boolean_t store_update)
+{
+ int ret = 0;
+
+ GF_ASSERT (peerinfo);
+ GF_ASSERT (hostname);
+
+ ret = gd_add_address_to_peer (peerinfo, hostname);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_HOSTNAME_ADD_TO_PEERLIST_FAIL,
+ "Couldn't add address to the peer info");
+ goto out;
+ }
+
+ if (store_update)
+ ret = glusterd_store_peerinfo (peerinfo);
+out:
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+__glusterd_handle_friend_update (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gd1_mgmt_friend_update friend_req = {{0},};
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ gd1_mgmt_friend_update_rsp rsp = {{0},};
+ dict_t *dict = NULL;
+ char key[100] = {0,};
+ char *uuid_buf = NULL;
+ int i = 1;
+ int count = 0;
+ uuid_t uuid = {0,};
+ glusterd_peerctx_args_t args = {0};
+ int32_t op = 0;
+
+ GF_ASSERT (req);
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = xdr_to_generic (req->msg[0], &friend_req,
+ (xdrproc_t)xdr_gd1_mgmt_friend_update);
+ if (ret < 0) {
+ //failed to decode msg;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL, "Failed to decode "
+ "request received");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ ret = 0;
+ rcu_read_lock ();
+ if (glusterd_peerinfo_find (friend_req.uuid, NULL) == NULL) {
+ ret = -1;
+ }
+ rcu_read_unlock ();
+ if (ret) {
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_REQ_FROM_UNKNOWN_PEER,
+ "Received friend update request "
+ "from unknown peer %s", uuid_utoa (friend_req.uuid));
+ goto out;
+ }
+
+ gf_msg ("glusterd", GF_LOG_INFO, 0,
+ GD_MSG_FRIEND_UPDATE_RCVD,
+ "Received friend update from uuid: %s", uuid_utoa (friend_req.uuid));
+
+ if (friend_req.friends.friends_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (friend_req.friends.friends_val,
+ friend_req.friends.friends_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ goto out;
+ } else {
+ dict->extra_stdfree = friend_req.friends.friends_val;
+ }
+ }
+
+ ret = dict_get_int32 (dict, "count", &count);
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32 (dict, "op", &op);
+ if (ret)
+ goto out;
+
+ if (GD_FRIEND_UPDATE_DEL == op) {
+ ret = glusterd_handle_friend_update_delete (dict);
+ goto out;
+ }
+
+ args.mode = GD_MODE_ON;
+ while ( i <= count) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "friend%d.uuid", i);
+ ret = dict_get_str (dict, key, &uuid_buf);
+ if (ret)
+ goto out;
+ gf_uuid_parse (uuid_buf, uuid);
+
+ if (!gf_uuid_compare (uuid, MY_UUID)) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_UUID_RECEIVED,
+ "Received my uuid as Friend");
+ i++;
+ continue;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "friend%d", i);
+
+ rcu_read_lock ();
+ peerinfo = glusterd_peerinfo_find (uuid, NULL);
+ if (peerinfo == NULL) {
+ /* Create a new peer and add it to the list as there is
+ * no existing peer with the uuid
+ */
+ peerinfo = gd_peerinfo_from_dict (dict, key);
+ if (peerinfo == NULL) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_PEERINFO_CREATE_FAIL,
+ "Could not create peerinfo from dict "
+ "for prefix %s", key);
+ goto unlock;
+ }
+
+ /* As this is a new peer, it should be added as a
+ * friend. The friend state machine will take care of
+ * correcting the state as required
+ */
+ peerinfo->state.state = GD_FRIEND_STATE_BEFRIENDED;
+
+ ret = glusterd_friend_add_from_peerinfo (peerinfo, 0,
+ &args);
+ } else {
+ /* As an existing peer was found, update it with the new
+ * information
+ */
+ ret = gd_update_peerinfo_from_dict (peerinfo, dict,
+ key);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_PEER_INFO_UPDATE_FAIL,
+ "Failed to "
+ "update peer %s", peerinfo->hostname);
+ goto unlock;
+ }
+ ret = glusterd_store_peerinfo (peerinfo);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_PEERINFO_CREATE_FAIL,
+ "Failed to store peerinfo");
+ }
+unlock:
+ rcu_read_unlock ();
+ if (ret)
+ break;
+
+ peerinfo = NULL;
+ i++;
+ }
+
+out:
+ gf_uuid_copy (rsp.uuid, MY_UUID);
+ ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_friend_update_rsp);
+ if (dict) {
+ if (!dict->extra_stdfree && friend_req.friends.friends_val)
+ free (friend_req.friends.friends_val);//malloced by xdr
+ dict_unref (dict);
+ } else {
+ free (friend_req.friends.friends_val);//malloced by xdr
+ }
+
+ if (peerinfo)
+ glusterd_peerinfo_cleanup (peerinfo);
+
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+
+ return ret;
+}
+
+int
+glusterd_handle_friend_update (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_friend_update);
+}
+
+int
+__glusterd_handle_probe_query (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ gd1_mgmt_probe_req probe_req = {{0},};
+ gd1_mgmt_probe_rsp rsp = {{0},};
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_peerctx_args_t args = {0};
+ int port = 0;
+ char remote_hostname[UNIX_PATH_MAX + 1] = {0,};
+
+ GF_ASSERT (req);
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("xlator", (this != NULL), out);
+
+ ret = xdr_to_generic (req->msg[0], &probe_req,
+ (xdrproc_t)xdr_gd1_mgmt_probe_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL, "Failed to decode probe "
+ "request");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ conf = this->private;
+ if (probe_req.port)
+ port = probe_req.port;
+ else
+ port = GF_DEFAULT_BASE_PORT;
+
+ gf_msg ("glusterd", GF_LOG_INFO, 0,
+ GD_MSG_PROBE_RCVD,
+ "Received probe from uuid: %s", uuid_utoa (probe_req.uuid));
+
+ /* Check for uuid collision and handle it in a user friendly way by
+ * sending the error.
+ */
+ if (!gf_uuid_compare (probe_req.uuid, MY_UUID)) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_UUIDS_SAME_RETRY, "Peer uuid %s is same as "
+ "local uuid. Please check the uuid of both the peers "
+ "from %s/%s", uuid_utoa (probe_req.uuid),
+ GLUSTERD_DEFAULT_WORKDIR, GLUSTERD_INFO_FILE);
+ rsp.op_ret = -1;
+ rsp.op_errno = GF_PROBE_SAME_UUID;
+ rsp.port = port;
+ goto respond;
+ }
+
+ ret = glusterd_remote_hostname_get (req, remote_hostname,
+ sizeof (remote_hostname));
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_HOSTNAME_RESOLVE_FAIL,
+ "Unable to get the remote hostname");
+ goto out;
+ }
+
+ rcu_read_lock ();
+ peerinfo = glusterd_peerinfo_find (probe_req.uuid, remote_hostname);
+ if ((peerinfo == NULL) && (!cds_list_empty (&conf->peers))) {
+ rsp.op_ret = -1;
+ rsp.op_errno = GF_PROBE_ANOTHER_CLUSTER;
+ } else if (peerinfo == NULL) {
+ gf_msg ("glusterd", GF_LOG_INFO, 0,
+ GD_MSG_PEER_NOT_FOUND,
+ "Unable to find peerinfo"
+ " for host: %s (%d)", remote_hostname, port);
+ args.mode = GD_MODE_ON;
+ ret = glusterd_friend_add (remote_hostname, port,
+ GD_FRIEND_STATE_PROBE_RCVD,
+ NULL, &peerinfo, 0, &args);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_PEER_ADD_FAIL,
+ "Failed to add peer %s",
+ remote_hostname);
+ rsp.op_errno = GF_PROBE_ADD_FAILED;
+ }
+ }
+ rcu_read_unlock ();
+
+respond:
+ gf_uuid_copy (rsp.uuid, MY_UUID);
+
+ rsp.hostname = probe_req.hostname;
+ rsp.op_errstr = "";
+
+ glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_probe_rsp);
+ ret = 0;
+
+ gf_msg ("glusterd", GF_LOG_INFO, 0,
+ GD_MSG_RESPONSE_INFO, "Responded to %s, op_ret: %d, "
+ "op_errno: %d, ret: %d", remote_hostname,
+ rsp.op_ret, rsp.op_errno, ret);
+
+out:
+ free (probe_req.hostname);//malloced by xdr
+
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+
+ return ret;
+}
+
+int glusterd_handle_probe_query (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_probe_query);
+}
+
+int
+__glusterd_handle_cli_profile_volume (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,}};
+ dict_t *dict = NULL;
+ glusterd_op_t cli_op = GD_OP_PROFILE_VOLUME;
+ char *volname = NULL;
+ int32_t op = 0;
+ char err_str[2048] = {0,};
+ xlator_t *this = NULL;
+
+ GF_ASSERT (req);
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL, "Failed to decode "
+ "request received from cli");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len > 0) {
+ dict = dict_new();
+ if (!dict)
+ goto out;
+ dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len, &dict);
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get volume "
+ "name");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLNAME_NOTFOUND_IN_DICT, "%s", err_str);
+ goto out;
+ }
+
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_VOL_PROFILE_REQ_RCVD,
+ "Received volume profile req "
+ "for volume %s", volname);
+ ret = dict_get_int32 (dict, "op", &op);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get operation");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "%s", err_str);
+ goto out;
+ }
+
+ ret = glusterd_op_begin (req, cli_op, dict, err_str, sizeof (err_str));
+
+out:
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+
+ free (cli_req.dict.dict_val);
+
+ if (ret) {
+ if (err_str[0] == '\0')
+ snprintf (err_str, sizeof (err_str),
+ "Operation failed");
+ ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
+ dict, err_str);
+ }
+
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_handle_cli_profile_volume (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_cli_profile_volume);
+}
+
+int
+__glusterd_handle_getwd (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf1_cli_getwd_rsp rsp = {0,};
+ glusterd_conf_t *priv = NULL;
+
+ GF_ASSERT (req);
+
+ priv = THIS->private;
+ GF_ASSERT (priv);
+
+ gf_msg ("glusterd", GF_LOG_INFO, 0,
+ GD_MSG_GETWD_REQ_RCVD, "Received getwd req");
+
+ rsp.wd = priv->workdir;
+
+ glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf1_cli_getwd_rsp);
+ ret = 0;
+
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+
+ return ret;
+}
+
+int
+glusterd_handle_getwd (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_getwd);
+}
+
+int
+__glusterd_handle_mount (rpcsvc_request_t *req)
+{
+ gf1_cli_mount_req mnt_req = {0,};
+ gf1_cli_mount_rsp rsp = {0,};
+ dict_t *dict = NULL;
+ int ret = 0;
+ glusterd_conf_t *priv = NULL;
+
+ GF_ASSERT (req);
+ priv = THIS->private;
+
+ ret = xdr_to_generic (req->msg[0], &mnt_req,
+ (xdrproc_t)xdr_gf1_cli_mount_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL, "Failed to decode mount "
+ "request received");
+ req->rpc_err = GARBAGE_ARGS;
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ gf_msg ("glusterd", GF_LOG_INFO, 0,
+ GD_MSG_MOUNT_REQ_RCVD,
+ "Received mount req");
+
+ if (mnt_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (mnt_req.dict.dict_val,
+ mnt_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ rsp.op_ret = -1;
+ rsp.op_errno = -EINVAL;
+ goto out;
+ } else {
+ dict->extra_stdfree = mnt_req.dict.dict_val;
+ }
+ }
+
+ synclock_unlock (&priv->big_lock);
+ rsp.op_ret = glusterd_do_mount (mnt_req.label, dict,
+ &rsp.path, &rsp.op_errno);
+ synclock_lock (&priv->big_lock);
+
+ out:
+ if (!rsp.path)
+ rsp.path = "";
+
+ glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf1_cli_mount_rsp);
+ ret = 0;
+
+ if (dict)
+ dict_unref (dict);
+ if (*rsp.path)
+ GF_FREE (rsp.path);
+
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+
+ return ret;
+}
+
+int
+glusterd_handle_mount (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_mount);
+}
+
+int
+__glusterd_handle_umount (rpcsvc_request_t *req)
+{
+ gf1_cli_umount_req umnt_req = {0,};
+ gf1_cli_umount_rsp rsp = {0,};
+ char *mountbroker_root = NULL;
+ char mntp[PATH_MAX] = {0,};
+ char *path = NULL;
+ runner_t runner = {0,};
+ int ret = 0;
+ xlator_t *this = THIS;
+ gf_boolean_t dir_ok = _gf_false;
+ char *pdir = NULL;
+ char *t = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ GF_ASSERT (req);
+ GF_ASSERT (this);
+ priv = this->private;
+
+ ret = xdr_to_generic (req->msg[0], &umnt_req,
+ (xdrproc_t)xdr_gf1_cli_umount_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL, "Failed to decode umount"
+ "request");
+ req->rpc_err = GARBAGE_ARGS;
+ rsp.op_ret = -1;
+ goto out;
+ }
+
+ gf_msg ("glusterd", GF_LOG_INFO, 0,
+ GD_MSG_UMOUNT_REQ_RCVD,
+ "Received umount req");
+
+ if (dict_get_str (this->options, "mountbroker-root",
+ &mountbroker_root) != 0) {
+ rsp.op_errno = ENOENT;
+ goto out;
+ }
+
+ /* check if it is allowed to umount path */
+ path = gf_strdup (umnt_req.path);
+ if (!path) {
+ rsp.op_errno = ENOMEM;
+ goto out;
+ }
+ dir_ok = _gf_false;
+ pdir = dirname (path);
+ t = strtail (pdir, mountbroker_root);
+ if (t && *t == '/') {
+ t = strtail(++t, MB_HIVE);
+ if (t && !*t)
+ dir_ok = _gf_true;
+ }
+ GF_FREE (path);
+ if (!dir_ok) {
+ rsp.op_errno = EACCES;
+ goto out;
+ }
+
+ synclock_unlock (&priv->big_lock);
+
+ if (umnt_req.lazy) {
+ rsp.op_ret = gf_umount_lazy (this->name, umnt_req.path, 0);
+ } else {
+ runinit (&runner);
+ runner_add_args (&runner, _PATH_UMOUNT, umnt_req.path, NULL);
+ rsp.op_ret = runner_run (&runner);
+ }
+
+ synclock_lock (&priv->big_lock);
+ if (rsp.op_ret == 0) {
+ if (realpath (umnt_req.path, mntp))
+ sys_rmdir (mntp);
+ else {
+ rsp.op_ret = -1;
+ rsp.op_errno = errno;
+ }
+ if (sys_unlink (umnt_req.path) != 0) {
+ rsp.op_ret = -1;
+ rsp.op_errno = errno;
+ }
+ }
+
+ out:
+ if (rsp.op_errno)
+ rsp.op_ret = -1;
+
+ glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf1_cli_umount_rsp);
+ ret = 0;
+
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+
+ return ret;
+}
+
+int
+glusterd_handle_umount (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_umount);
+}
+
+int
+glusterd_friend_remove (uuid_t uuid, char *hostname)
+{
+ int ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+
+ rcu_read_lock ();
+
+ peerinfo = glusterd_peerinfo_find (uuid, hostname);
+ if (peerinfo == NULL) {
+ rcu_read_unlock ();
+ goto out;
+ }
+
+ ret = glusterd_friend_remove_cleanup_vols (peerinfo->uuid);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_WARNING, 0,
+ GD_MSG_VOL_CLEANUP_FAIL, "Volumes cleanup failed");
+ rcu_read_unlock ();
+ /* Giving up the critical section here as glusterd_peerinfo_cleanup must
+ * be called from outside a critical section
+ */
+ ret = glusterd_peerinfo_cleanup (peerinfo);
+out:
+ gf_msg_debug (THIS->name, 0, "returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_rpc_create (struct rpc_clnt **rpc,
+ dict_t *options,
+ rpc_clnt_notify_t notify_fn,
+ void *notify_data)
+{
+ struct rpc_clnt *new_rpc = NULL;
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (options);
+
+ /* TODO: is 32 enough? or more ? */
+ new_rpc = rpc_clnt_new (options, this, this->name, 16);
+ if (!new_rpc)
+ goto out;
+
+ ret = rpc_clnt_register_notify (new_rpc, notify_fn, notify_data);
+ *rpc = new_rpc;
+ if (ret)
+ goto out;
+ ret = rpc_clnt_start (new_rpc);
+out:
+ if (ret) {
+ if (new_rpc) {
+ (void) rpc_clnt_unref (new_rpc);
+ }
+ }
+
+ gf_msg_debug (this->name, 0, "returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_transport_keepalive_options_get (int *interval, int *time,
+ int *timeout)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = dict_get_int32 (this->options,
+ "transport.socket.keepalive-interval",
+ interval);
+ ret = dict_get_int32 (this->options,
+ "transport.socket.keepalive-time",
+ time);
+ ret = dict_get_int32 (this->options,
+ "transport.tcp-user-timeout",
+ timeout);
+ return 0;
+}
+
+int
+glusterd_transport_inet_options_build (dict_t **options, const char *hostname,
+ int port)
+{
+ dict_t *dict = NULL;
+ int32_t interval = -1;
+ int32_t time = -1;
+ int32_t timeout = -1;
+ int ret = 0;
+
+ GF_ASSERT (options);
+ GF_ASSERT (hostname);
+
+ if (!port)
+ port = GLUSTERD_DEFAULT_PORT;
+
+ /* Build default transport options */
+ ret = rpc_transport_inet_options_build (&dict, hostname, port);
+ if (ret)
+ goto out;
+
+ /* Set frame-timeout to 10mins. Default timeout of 30 mins is too long
+ * when compared to 2 mins for cli timeout. This ensures users don't
+ * wait too long after cli timesout before being able to resume normal
+ * operations
+ */
+ ret = dict_set_int32 (dict, "frame-timeout", 600);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set frame-timeout");
+ goto out;
+ }
+
+ /* Set keepalive options */
+ glusterd_transport_keepalive_options_get (&interval, &time, &timeout);
+
+ if ((interval > 0) || (time > 0))
+ ret = rpc_transport_keepalive_options_set (dict, interval,
+ time, timeout);
+ *options = dict;
+out:
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_friend_rpc_create (xlator_t *this, glusterd_peerinfo_t *peerinfo,
+ glusterd_peerctx_args_t *args)
+{
+ dict_t *options = NULL;
+ int ret = -1;
+ glusterd_peerctx_t *peerctx = NULL;
+ data_t *data = NULL;
+
+ peerctx = GF_CALLOC (1, sizeof (*peerctx), gf_gld_mt_peerctx_t);
+ if (!peerctx)
+ goto out;
+
+ if (args)
+ peerctx->args = *args;
+
+ gf_uuid_copy (peerctx->peerid, peerinfo->uuid);
+ peerctx->peername = gf_strdup (peerinfo->hostname);
+ peerctx->peerinfo_gen = peerinfo->generation; /* A peerinfos generation
+ number can be used to
+ uniquely identify a
+ peerinfo */
+
+ ret = glusterd_transport_inet_options_build (&options,
+ peerinfo->hostname,
+ peerinfo->port);
+ if (ret)
+ goto out;
+
+ /*
+ * For simulated multi-node testing, we need to make sure that we
+ * create our RPC endpoint with the same address that the peer would
+ * use to reach us.
+ */
+ if (this->options) {
+ data = dict_get(this->options,"transport.socket.bind-address");
+ if (data) {
+ ret = dict_set(options,
+ "transport.socket.source-addr",data);
+ }
+ data = dict_get(this->options,"ping-timeout");
+ if (data) {
+ ret = dict_set(options,
+ "ping-timeout",data);
+ }
+ }
+
+ /* Enable encryption for the client connection if management encryption
+ * is enabled
+ */
+ if (this->ctx->secure_mgmt) {
+ ret = dict_set_str (options, "transport.socket.ssl-enabled",
+ "on");
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "failed to set ssl-enabled in dict");
+ goto out;
+ }
+ }
+
+ ret = glusterd_rpc_create (&peerinfo->rpc, options,
+ glusterd_peer_rpc_notify, peerctx);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RPC_CREATE_FAIL,
+ "failed to create rpc for"
+ " peer %s", peerinfo->hostname);
+ goto out;
+ }
+ peerctx = NULL;
+ ret = 0;
+out:
+ GF_FREE (peerctx);
+ return ret;
+}
+
+int
+glusterd_friend_add (const char *hoststr, int port,
+ glusterd_friend_sm_state_t state,
+ uuid_t *uuid,
+ glusterd_peerinfo_t **friend,
+ gf_boolean_t restore,
+ glusterd_peerctx_args_t *args)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ conf = this->private;
+ GF_ASSERT (conf);
+ GF_ASSERT (hoststr);
+ GF_ASSERT (friend);
+
+ *friend = glusterd_peerinfo_new (state, uuid, hoststr, port);
+ if (*friend == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ /*
+ * We can't add to the list after calling glusterd_friend_rpc_create,
+ * even if it succeeds, because by then the callback to take it back
+ * off and free might have happened already (notably in the case of an
+ * invalid peer name). That would mean we're adding something that had
+ * just been free, and we're likely to crash later.
+ */
+ cds_list_add_tail_rcu (&(*friend)->uuid_list, &conf->peers);
+
+ //restore needs to first create the list of peers, then create rpcs
+ //to keep track of quorum in race-free manner. In restore for each peer
+ //rpc-create calls rpc_notify when the friend-list is partially
+ //constructed, leading to wrong quorum calculations.
+ if (!restore) {
+ ret = glusterd_store_peerinfo (*friend);
+ if (ret == 0) {
+ ret = glusterd_friend_rpc_create (this, *friend, args);
+ }
+ else {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_PEERINFO_CREATE_FAIL,
+ "Failed to store peerinfo");
+ }
+ }
+
+ if (ret) {
+ (void) glusterd_peerinfo_cleanup (*friend);
+ *friend = NULL;
+ }
+
+out:
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_CONNECT_RETURNED, "connect returned %d", ret);
+ return ret;
+}
+
+/* glusterd_friend_add_from_peerinfo() adds a new peer into the local friends
+ * list from a pre created @peerinfo object. It otherwise works similarly to
+ * glusterd_friend_add()
+ */
+int
+glusterd_friend_add_from_peerinfo (glusterd_peerinfo_t *friend,
+ gf_boolean_t restore,
+ glusterd_peerctx_args_t *args)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ GF_VALIDATE_OR_GOTO (this->name, (friend != NULL), out);
+
+ /*
+ * We can't add to the list after calling glusterd_friend_rpc_create,
+ * even if it succeeds, because by then the callback to take it back
+ * off and free might have happened already (notably in the case of an
+ * invalid peer name). That would mean we're adding something that had
+ * just been free, and we're likely to crash later.
+ */
+ cds_list_add_tail_rcu (&friend->uuid_list, &conf->peers);
+
+ //restore needs to first create the list of peers, then create rpcs
+ //to keep track of quorum in race-free manner. In restore for each peer
+ //rpc-create calls rpc_notify when the friend-list is partially
+ //constructed, leading to wrong quorum calculations.
+ if (!restore) {
+ ret = glusterd_store_peerinfo (friend);
+ if (ret == 0) {
+ ret = glusterd_friend_rpc_create (this, friend, args);
+ }
+ else {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_PEERINFO_CREATE_FAIL,
+ "Failed to store peerinfo");
+ }
+ }
+
+out:
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_CONNECT_RETURNED,
+ "connect returned %d", ret);
+ return ret;
+}
+
+int
+glusterd_probe_begin (rpcsvc_request_t *req, const char *hoststr, int port,
+ dict_t *dict, int *op_errno)
+{
+ int ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_peerctx_args_t args = {0};
+ glusterd_friend_sm_event_t *event = NULL;
+
+ GF_ASSERT (hoststr);
+
+ rcu_read_lock ();
+ peerinfo = glusterd_peerinfo_find (NULL, hoststr);
+
+ if (peerinfo == NULL) {
+ gf_msg ("glusterd", GF_LOG_INFO, 0,
+ GD_MSG_PEER_NOT_FOUND, "Unable to find peerinfo"
+ " for host: %s (%d)", hoststr, port);
+ args.mode = GD_MODE_ON;
+ args.req = req;
+ args.dict = dict;
+ ret = glusterd_friend_add (hoststr, port,
+ GD_FRIEND_STATE_DEFAULT,
+ NULL, &peerinfo, 0, &args);
+ if ((!ret) && (!peerinfo->connected)) {
+ ret = GLUSTERD_CONNECTION_AWAITED;
+ }
+
+ } else if (peerinfo->connected &&
+ (GD_FRIEND_STATE_BEFRIENDED == peerinfo->state.state)) {
+ if (peerinfo->detaching) {
+ ret = -1;
+ if (op_errno)
+ *op_errno = GF_PROBE_FRIEND_DETACHING;
+ goto out;
+ }
+ ret = glusterd_peer_hostname_update (peerinfo, hoststr,
+ _gf_false);
+ if (ret)
+ goto out;
+ // Injecting a NEW_NAME event to update cluster
+ ret = glusterd_friend_sm_new_event (GD_FRIEND_EVENT_NEW_NAME,
+ &event);
+ if (!ret) {
+ event->peername = gf_strdup (peerinfo->hostname);
+ gf_uuid_copy (event->peerid, peerinfo->uuid);
+
+ ret = glusterd_friend_sm_inject_event (event);
+ glusterd_xfer_cli_probe_resp (req, 0, GF_PROBE_SUCCESS,
+ NULL, (char*)hoststr,
+ port, dict);
+ }
+ } else {
+ glusterd_xfer_cli_probe_resp (req, 0, GF_PROBE_FRIEND, NULL,
+ (char*)hoststr, port, dict);
+ ret = 0;
+ }
+
+out:
+ rcu_read_unlock ();
+ gf_msg_debug ("glusterd", 0, "returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_deprobe_begin (rpcsvc_request_t *req, const char *hoststr, int port,
+ uuid_t uuid, dict_t *dict, int *op_errno)
+{
+ int ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_friend_sm_event_t *event = NULL;
+ glusterd_probe_ctx_t *ctx = NULL;
+
+ GF_ASSERT (hoststr);
+ GF_ASSERT (req);
+
+ rcu_read_lock ();
+
+ peerinfo = glusterd_peerinfo_find (uuid, hoststr);
+ if (peerinfo == NULL) {
+ ret = -1;
+ gf_msg ("glusterd", GF_LOG_INFO, 0,
+ GD_MSG_PEER_NOT_FOUND, "Unable to find peerinfo"
+ " for host: %s %d", hoststr, port);
+ goto out;
+ }
+
+ if (!peerinfo->rpc) {
+ //handle this case
+ goto out;
+ }
+
+ if (peerinfo->detaching) {
+ ret = -1;
+ if (op_errno)
+ *op_errno = GF_DEPROBE_FRIEND_DETACHING;
+ goto out;
+ }
+
+ ret = glusterd_friend_sm_new_event
+ (GD_FRIEND_EVENT_INIT_REMOVE_FRIEND, &event);
+
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_EVENT_NEW_GET_FAIL,
+ "Unable to get new event");
+ goto out;
+ }
+
+ ctx = GF_CALLOC (1, sizeof(*ctx), gf_gld_mt_probe_ctx_t);
+
+ if (!ctx) {
+ goto out;
+ }
+
+ ctx->hostname = gf_strdup (hoststr);
+ ctx->port = port;
+ ctx->req = req;
+ ctx->dict = dict;
+
+ event->ctx = ctx;
+
+ event->peername = gf_strdup (hoststr);
+ gf_uuid_copy (event->peerid, uuid);
+
+ ret = glusterd_friend_sm_inject_event (event);
+
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_EVENT_INJECT_FAIL, "Unable to inject event %d, "
+ "ret = %d", event->event, ret);
+ goto out;
+ }
+ peerinfo->detaching = _gf_true;
+
+out:
+ rcu_read_unlock ();
+ return ret;
+}
+
+
+int
+glusterd_xfer_friend_remove_resp (rpcsvc_request_t *req, char *hostname, int port)
+{
+ gd1_mgmt_friend_rsp rsp = {{0}, };
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ GF_ASSERT (hostname);
+
+ rsp.op_ret = 0;
+ this = THIS;
+ GF_ASSERT (this);
+
+ conf = this->private;
+
+ gf_uuid_copy (rsp.uuid, MY_UUID);
+ rsp.hostname = hostname;
+ rsp.port = port;
+ ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_friend_rsp);
+
+ gf_msg ("glusterd", GF_LOG_INFO, 0,
+ GD_MSG_RESPONSE_INFO,
+ "Responded to %s (%d), ret: %d", hostname, port, ret);
+ return ret;
+}
+
+
+int
+glusterd_xfer_friend_add_resp (rpcsvc_request_t *req, char *myhostname,
+ char *remote_hostname, int port, int32_t op_ret,
+ int32_t op_errno)
+{
+ gd1_mgmt_friend_rsp rsp = {{0}, };
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ GF_ASSERT (myhostname);
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ conf = this->private;
+
+ gf_uuid_copy (rsp.uuid, MY_UUID);
+ rsp.op_ret = op_ret;
+ rsp.op_errno = op_errno;
+ rsp.hostname = gf_strdup (myhostname);
+ rsp.port = port;
+
+ ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_friend_rsp);
+
+ gf_msg ("glusterd", GF_LOG_INFO, 0,
+ GD_MSG_RESPONSE_INFO,
+ "Responded to %s (%d), ret: %d, op_ret: %d", remote_hostname,
+ port, ret, op_ret);
+ GF_FREE (rsp.hostname);
+ return ret;
+}
+
+static void
+set_probe_error_str (int op_ret, int op_errno, char *op_errstr, char *errstr,
+ size_t len, char *hostname, int port)
+{
+ if ((op_errstr) && (strcmp (op_errstr, ""))) {
+ snprintf (errstr, len, "%s", op_errstr);
+ return;
+ }
+
+ if (!op_ret) {
+ switch (op_errno) {
+ case GF_PROBE_LOCALHOST:
+ snprintf (errstr, len, "Probe on localhost not "
+ "needed");
+ break;
+
+ case GF_PROBE_FRIEND:
+ snprintf (errstr, len, "Host %s port %d already"
+ " in peer list", hostname, port);
+ break;
+
+ case GF_PROBE_FRIEND_DETACHING:
+ snprintf (errstr, len, "Peer is already being "
+ "detached from cluster.\n"
+ "Check peer status by running "
+ "gluster peer status");
+ break;
+ default:
+ if (op_errno != 0)
+ snprintf (errstr, len, "Probe returned "
+ "with %s",
+ strerror (op_errno));
+ break;
+ }
+ } else {
+ switch (op_errno) {
+ case GF_PROBE_ANOTHER_CLUSTER:
+ snprintf (errstr, len, "%s is either already "
+ "part of another cluster or having "
+ "volumes configured", hostname);
+ break;
+
+ case GF_PROBE_VOLUME_CONFLICT:
+ snprintf (errstr, len, "Atleast one volume on "
+ "%s conflicts with existing volumes "
+ "in the cluster", hostname);
+ break;
+
+ case GF_PROBE_UNKNOWN_PEER:
+ snprintf (errstr, len, "%s responded with "
+ "'unknown peer' error, this could "
+ "happen if %s doesn't have localhost "
+ "in its peer database", hostname,
+ hostname);
+ break;
+
+ case GF_PROBE_ADD_FAILED:
+ snprintf (errstr, len, "Failed to add peer "
+ "information on %s", hostname);
+ break;
+
+ case GF_PROBE_SAME_UUID:
+ snprintf (errstr, len, "Peer uuid (host %s) is "
+ "same as local uuid", hostname);
+ break;
+
+ case GF_PROBE_QUORUM_NOT_MET:
+ snprintf (errstr, len, "Cluster quorum is not "
+ "met. Changing peers is not allowed "
+ "in this state");
+ break;
+
+ case GF_PROBE_MISSED_SNAP_CONFLICT:
+ snprintf (errstr, len, "Failed to update "
+ "list of missed snapshots from "
+ "peer %s", hostname);
+ break;
+
+ case GF_PROBE_SNAP_CONFLICT:
+ snprintf (errstr, len, "Conflict in comparing "
+ "list of snapshots from "
+ "peer %s", hostname);
+ break;
+
+ default:
+ snprintf (errstr, len, "Probe returned with "
+ "%s", strerror (op_errno));
+ break;
+ }
+ }
+}
+
+int
+glusterd_xfer_cli_probe_resp (rpcsvc_request_t *req, int32_t op_ret,
+ int32_t op_errno, char *op_errstr, char *hostname,
+ int port, dict_t *dict)
+{
+ gf_cli_rsp rsp = {0,};
+ int32_t ret = -1;
+ char errstr[2048] = {0,};
+ char *cmd_str = NULL;
+ xlator_t *this = THIS;
+
+ GF_ASSERT (req);
+ GF_ASSERT (this);
+
+ (void) set_probe_error_str (op_ret, op_errno, op_errstr, errstr,
+ sizeof (errstr), hostname, port);
+
+ if (dict) {
+ ret = dict_get_str (dict, "cmd-str", &cmd_str);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_CMDSTR_NOTFOUND_IN_DICT, "Failed to get "
+ "command string");
+ }
+
+ rsp.op_ret = op_ret;
+ rsp.op_errno = op_errno;
+ rsp.op_errstr = (errstr[0] != '\0') ? errstr : "";
+
+ gf_cmd_log ("", "%s : %s %s %s", cmd_str,
+ (op_ret) ? "FAILED" : "SUCCESS",
+ (errstr[0] != '\0') ? ":" : " ",
+ (errstr[0] != '\0') ? errstr : " ");
+
+ ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_cli_rsp);
+
+ if (dict)
+ dict_unref (dict);
+ gf_msg_debug (this->name, 0, "Responded to CLI, ret: %d", ret);
+
+ return ret;
+}
+
+static void
+set_deprobe_error_str (int op_ret, int op_errno, char *op_errstr, char *errstr,
+ size_t len, char *hostname)
+{
+ if ((op_errstr) && (strcmp (op_errstr, ""))) {
+ snprintf (errstr, len, "%s", op_errstr);
+ return;
+ }
+
+ if (op_ret) {
+ switch (op_errno) {
+ case GF_DEPROBE_LOCALHOST:
+ snprintf (errstr, len, "%s is localhost",
+ hostname);
+ break;
+
+ case GF_DEPROBE_NOT_FRIEND:
+ snprintf (errstr, len, "%s is not part of "
+ "cluster", hostname);
+ break;
+
+ case GF_DEPROBE_BRICK_EXIST:
+ snprintf (errstr, len, "Brick(s) with the peer "
+ "%s exist in cluster", hostname);
+ break;
+
+ case GF_DEPROBE_FRIEND_DOWN:
+ snprintf (errstr, len, "One of the peers is "
+ "probably down. Check with "
+ "'peer status'");
+ break;
+
+ case GF_DEPROBE_QUORUM_NOT_MET:
+ snprintf (errstr, len, "Cluster quorum is not "
+ "met. Changing peers is not allowed "
+ "in this state");
+ break;
+
+ case GF_DEPROBE_FRIEND_DETACHING:
+ snprintf (errstr, len, "Peer is already being "
+ "detached from cluster.\n"
+ "Check peer status by running "
+ "gluster peer status");
+ break;
+ default:
+ snprintf (errstr, len, "Detach returned with "
+ "%s", strerror (op_errno));
+ break;
+
+ }
+ }
+}
+
+
+int
+glusterd_xfer_cli_deprobe_resp (rpcsvc_request_t *req, int32_t op_ret,
+ int32_t op_errno, char *op_errstr,
+ char *hostname, dict_t *dict)
+{
+ gf_cli_rsp rsp = {0,};
+ int32_t ret = -1;
+ char *cmd_str = NULL;
+ char errstr[2048] = {0,};
+
+ GF_ASSERT (req);
+
+ (void) set_deprobe_error_str (op_ret, op_errno, op_errstr, errstr,
+ sizeof (errstr), hostname);
+
+ if (dict) {
+ ret = dict_get_str (dict, "cmd-str", &cmd_str);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_CMDSTR_NOTFOUND_IN_DICT, "Failed to get "
+ "command string");
+ }
+
+ rsp.op_ret = op_ret;
+ rsp.op_errno = op_errno;
+ rsp.op_errstr = (errstr[0] != '\0') ? errstr : "";
+
+ gf_cmd_log ("", "%s : %s %s %s", cmd_str,
+ (op_ret) ? "FAILED" : "SUCCESS",
+ (errstr[0] != '\0') ? ":" : " ",
+ (errstr[0] != '\0') ? errstr : " ");
+
+ ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_cli_rsp);
+
+ gf_msg_debug (THIS->name, 0, "Responded to CLI, ret: %d", ret);
+
+ return ret;
+}
+
+int32_t
+glusterd_list_friends (rpcsvc_request_t *req, dict_t *dict, int32_t flags)
+{
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+ glusterd_peerinfo_t *entry = NULL;
+ int32_t count = 0;
+ dict_t *friends = NULL;
+ gf1_cli_peer_list_rsp rsp = {0,};
+ char my_uuid_str[64] = {0,};
+ char key[256] = {0,};
+
+ priv = THIS->private;
+ GF_ASSERT (priv);
+
+ friends = dict_new ();
+ if (!friends) {
+ gf_msg (THIS->name, GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY, "Out of Memory");
+ goto out;
+ }
+
+ /* Reset ret to 0, needed to prevent failure incase no peers exist */
+ ret = 0;
+ rcu_read_lock ();
+ if (!cds_list_empty (&priv->peers)) {
+ cds_list_for_each_entry_rcu (entry, &priv->peers, uuid_list) {
+ count++;
+ ret = gd_add_peer_detail_to_dict (entry,
+ friends, count);
+ if (ret)
+ goto unlock;
+ }
+ }
+unlock:
+ rcu_read_unlock ();
+ if (ret)
+ goto out;
+
+ if (flags == GF_CLI_LIST_POOL_NODES) {
+ count++;
+ snprintf (key, 256, "friend%d.uuid", count);
+ uuid_utoa_r (MY_UUID, my_uuid_str);
+ ret = dict_set_str (friends, key, my_uuid_str);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "friend%d.hostname", count);
+ ret = dict_set_str (friends, key, "localhost");
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "friend%d.connected", count);
+ ret = dict_set_int32 (friends, key, 1);
+ if (ret)
+ goto out;
+ }
+
+ ret = dict_set_int32 (friends, "count", count);
+ if (ret)
+ goto out;
+
+ ret = dict_allocate_and_serialize (friends, &rsp.friends.friends_val,
+ &rsp.friends.friends_len);
+
+ if (ret)
+ goto out;
+
+ ret = 0;
+out:
+
+ if (friends)
+ dict_unref (friends);
+
+ rsp.op_ret = ret;
+
+ glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf1_cli_peer_list_rsp);
+ ret = 0;
+ GF_FREE (rsp.friends.friends_val);
+
+ return ret;
+}
+
+int32_t
+glusterd_get_volumes (rpcsvc_request_t *req, dict_t *dict, int32_t flags)
+{
+ int32_t ret = -1;
+ int32_t ret_bkp = 0;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *entry = NULL;
+ int32_t count = 0;
+ dict_t *volumes = NULL;
+ gf_cli_rsp rsp = {0,};
+ char *volname = NULL;
+
+ priv = THIS->private;
+ GF_ASSERT (priv);
+ volumes = dict_new ();
+ if (!volumes) {
+ gf_msg ("glusterd", GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY, "Out of Memory");
+ goto out;
+ }
+
+ if (cds_list_empty (&priv->volumes)) {
+ if (flags == GF_CLI_GET_VOLUME)
+ ret_bkp = -1;
+ ret = 0;
+ goto respond;
+ }
+ if (flags == GF_CLI_GET_VOLUME_ALL) {
+ cds_list_for_each_entry (entry, &priv->volumes, vol_list) {
+ ret = glusterd_add_volume_detail_to_dict (entry,
+ volumes, count);
+ if (ret)
+ goto respond;
+
+ count++;
+
+ }
+
+ } else if (flags == GF_CLI_GET_NEXT_VOLUME) {
+ ret = dict_get_str (dict, "volname", &volname);
+
+ if (ret) {
+ if (priv->volumes.next) {
+ entry = cds_list_entry (priv->volumes.next,
+ typeof (*entry),
+ vol_list);
+ }
+ } else {
+ ret = glusterd_volinfo_find (volname, &entry);
+ if (ret)
+ goto respond;
+ entry = cds_list_entry (entry->vol_list.next,
+ typeof (*entry),
+ vol_list);
+ }
+
+ if (&entry->vol_list == &priv->volumes) {
+ goto respond;
+ } else {
+ ret = glusterd_add_volume_detail_to_dict (entry,
+ volumes, count);
+ if (ret)
+ goto respond;
+
+ count++;
+ }
+ } else if (flags == GF_CLI_GET_VOLUME) {
+ ret = dict_get_str (dict, "volname", &volname);
+
+ if (ret)
+ goto respond;
+
+ ret = glusterd_volinfo_find (volname, &entry);
+ if (ret) {
+ ret_bkp = ret;
+ goto respond;
+ }
+
+ ret = glusterd_add_volume_detail_to_dict (entry,
+ volumes, count);
+ if (ret)
+ goto respond;
+
+ count++;
+ }
+
+respond:
+ ret = dict_set_int32 (volumes, "count", count);
+ if (ret)
+ goto out;
+ ret = dict_allocate_and_serialize (volumes, &rsp.dict.dict_val,
+ &rsp.dict.dict_len);
+
+ if (ret)
+ goto out;
+
+ ret = 0;
+out:
+ if (ret_bkp == -1) {
+ rsp.op_ret = ret_bkp;
+ rsp.op_errstr = "Volume does not exist";
+ rsp.op_errno = EG_NOVOL;
+ } else {
+ rsp.op_ret = ret;
+ rsp.op_errstr = "";
+ }
+ glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_cli_rsp);
+ ret = 0;
+
+ if (volumes)
+ dict_unref (volumes);
+
+ GF_FREE (rsp.dict.dict_val);
+ return ret;
+}
+
+int
+__glusterd_handle_status_volume (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ uint32_t cmd = 0;
+ dict_t *dict = NULL;
+ char *volname = 0;
+ gf_cli_req cli_req = {{0,}};
+ glusterd_op_t cli_op = GD_OP_STATUS_VOLUME;
+ char err_str[2048] = {0,};
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ GF_ASSERT (req);
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL, "Failed to decode "
+ "request received from cli");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len > 0) {
+ dict = dict_new();
+ if (!dict)
+ goto out;
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len, &dict);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL, "failed to "
+ "unserialize buffer");
+ snprintf (err_str, sizeof (err_str), "Unable to decode "
+ "the command");
+ goto out;
+ }
+
+ }
+
+ ret = dict_get_uint32 (dict, "cmd", &cmd);
+ if (ret)
+ goto out;
+
+ if (!(cmd & GF_CLI_STATUS_ALL)) {
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get "
+ "volume name");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_FOUND, "%s", err_str);
+ goto out;
+ }
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_STATUS_VOL_REQ_RCVD,
+ "Received status volume req for volume %s", volname);
+
+ }
+ if ((cmd & GF_CLI_STATUS_QUOTAD) &&
+ (conf->op_version == GD_OP_VERSION_MIN)) {
+ snprintf (err_str, sizeof (err_str), "The cluster is operating "
+ "at version 1. Getting the status of quotad is not "
+ "allowed in this state.");
+ ret = -1;
+ goto out;
+ }
+
+ if ((cmd & GF_CLI_STATUS_SNAPD) &&
+ (conf->op_version < GD_OP_VERSION_3_6_0)) {
+ snprintf (err_str, sizeof (err_str), "The cluster is operating "
+ "at a lesser version than %d. Getting the status of "
+ "snapd is not allowed in this state",
+ GD_OP_VERSION_3_6_0);
+ ret = -1;
+ goto out;
+ }
+
+ if ((cmd & GF_CLI_STATUS_BITD) &&
+ (conf->op_version < GD_OP_VERSION_3_7_0)) {
+ snprintf (err_str, sizeof (err_str), "The cluster is operating "
+ "at a lesser version than %d. Getting the status of "
+ "bitd is not allowed in this state",
+ GD_OP_VERSION_3_7_0);
+ ret = -1;
+ goto out;
+ }
+
+ if ((cmd & GF_CLI_STATUS_SCRUB) &&
+ (conf->op_version < GD_OP_VERSION_3_7_0)) {
+ snprintf (err_str, sizeof (err_str), "The cluster is operating "
+ "at a lesser version than %d. Getting the status of "
+ "scrub is not allowed in this state",
+ GD_OP_VERSION_3_7_0);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_op_begin_synctask (req, GD_OP_STATUS_VOLUME, dict);
+
+out:
+
+ if (ret) {
+ if (err_str[0] == '\0')
+ snprintf (err_str, sizeof (err_str),
+ "Operation failed");
+ ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
+ dict, err_str);
+ }
+ free (cli_req.dict.dict_val);
+
+ return ret;
+}
+
+int
+glusterd_handle_status_volume (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_status_volume);
+}
+
+int
+__glusterd_handle_cli_clearlocks_volume (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,}};
+ glusterd_op_t cli_op = GD_OP_CLEARLOCKS_VOLUME;
+ char *volname = NULL;
+ dict_t *dict = NULL;
+ char err_str[2048] = {0,};
+ xlator_t *this = NULL;
+
+ GF_ASSERT (req);
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = -1;
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL, "Failed to decode "
+ "request received from cli");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to unserialize req-buffer to"
+ " dictionary");
+ snprintf (err_str, sizeof (err_str), "unable to decode "
+ "the command");
+ goto out;
+ }
+
+ } else {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_CLI_REQ_EMPTY, "Empty cli request.");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get volume "
+ "name");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLNAME_NOTFOUND_IN_DICT, "%s", err_str);
+ goto out;
+ }
+
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_CLRCLK_VOL_REQ_RCVD, "Received clear-locks volume req "
+ "for volume %s", volname);
+
+ ret = glusterd_op_begin_synctask (req, GD_OP_CLEARLOCKS_VOLUME, dict);
+
+out:
+ if (ret) {
+ if (err_str[0] == '\0')
+ snprintf (err_str, sizeof (err_str),
+ "Operation failed");
+ ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
+ dict, err_str);
+ }
+ free (cli_req.dict.dict_val);
+
+ return ret;
+}
+
+int
+glusterd_handle_cli_clearlocks_volume (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_cli_clearlocks_volume);
+}
+
+static int
+get_volinfo_from_brickid (char *brickid, glusterd_volinfo_t **volinfo)
+{
+ int ret = -1;
+ char *volid_str = NULL;
+ char *brick = NULL;
+ char *brickid_dup = NULL;
+ uuid_t volid = {0};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (brickid);
+
+ brickid_dup = gf_strdup (brickid);
+ if (!brickid_dup)
+ goto out;
+
+ volid_str = brickid_dup;
+ brick = strchr (brickid_dup, ':');
+ if (!brick) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_NOT_FOUND,
+ "Invalid brickid");
+ goto out;
+ }
+
+ *brick = '\0';
+ brick++;
+ gf_uuid_parse (volid_str, volid);
+ ret = glusterd_volinfo_find_by_volume_id (volid, volinfo);
+ if (ret) {
+ /* Check if it is a snapshot volume */
+ ret = glusterd_snap_volinfo_find_by_volume_id (volid, volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_VOLINFO_GET_FAIL,
+ "Failed to find volinfo");
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ GF_FREE (brickid_dup);
+ return ret;
+}
+
+static int
+__glusterd_handle_barrier (rpcsvc_request_t *req)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ gf_cli_req cli_req = {{0,}};
+ dict_t *dict = NULL;
+ char *volname = NULL;
+
+ GF_ASSERT (req);
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL, "Failed to decode "
+ "request received from cli");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (!cli_req.dict.dict_len) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = dict_new();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_unserialize (cli_req.dict.dict_val, cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL, "Failed to unserialize "
+ "request dictionary.");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLNAME_NOTFOUND_IN_DICT,
+ "Volname not present in "
+ "dict");
+ goto out;
+ }
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_BARRIER_VOL_REQ_RCVD,
+ "Received barrier volume request for "
+ "volume %s", volname);
+
+ ret = glusterd_op_begin_synctask (req, GD_OP_BARRIER, dict);
+
+out:
+ if (ret) {
+ ret = glusterd_op_send_cli_response (GD_OP_BARRIER, ret, 0, req,
+ dict, "Operation failed");
+ }
+ free (cli_req.dict.dict_val);
+ return ret;
+}
+
+int
+glusterd_handle_barrier (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_barrier);
+}
+
+int32_t
+glusterd_get_volume_opts (rpcsvc_request_t *req, dict_t *dict)
+{
+ int32_t ret = -1;
+ int32_t count = 1;
+ int exists = 0;
+ char *key = NULL;
+ char *orig_key = NULL;
+ char *key_fixed = NULL;
+ char *volname = NULL;
+ char *value = NULL;
+ char err_str[2048] = {0,};
+ char dict_key[50] = {0,};
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ gf_cli_rsp rsp = {0,};
+ char op_version_buff[10] = {0,};
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ GF_ASSERT (req);
+ GF_ASSERT (dict);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Failed to get volume "
+ "name while handling get volume option command");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLNAME_NOTFOUND_IN_DICT, "%s", err_str);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "key", &key);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Failed to get key "
+ "while handling get volume option for %s", volname);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "%s", err_str);
+ goto out;
+ }
+ gf_msg_debug (this->name, 0, "Received get volume opt request for "
+ "volume %s", volname);
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (err_str, sizeof(err_str),
+ FMTSTR_CHECK_VOL_EXISTS, volname);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_FOUND, FMTSTR_CHECK_VOL_EXISTS,
+ volname);
+ goto out;
+ }
+ if (strcmp(key, "all")) {
+ if (fnmatch (GD_HOOKS_SPECIFIC_KEY, key, FNM_NOESCAPE) == 0) {
+ sprintf (dict_key, "key%d", count);
+ ret = dict_set_str(dict, dict_key, key);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to "
+ "set %s in dictionary", key);
+ goto out;
+ }
+ sprintf (dict_key, "value%d", count);
+ ret = dict_get_str (volinfo->dict, key, &value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to "
+ "get %s in dictionary", key);
+ goto out;
+ }
+ ret = dict_set_str(dict, dict_key, value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to "
+ "set %s in dictionary", key);
+ goto out;
+ }
+ } else {
+ exists = glusterd_check_option_exists (key, &key_fixed);
+ if (!exists) {
+ snprintf (err_str, sizeof (err_str), "Option "
+ "with name: %s does not exist", key);
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_UNKNOWN_KEY, "%s",
+ err_str);
+ if (key_fixed)
+ snprintf (err_str + ret,
+ sizeof (err_str) - ret,
+ "Did you mean %s?",
+ key_fixed);
+ ret = -1;
+ goto out;
+ }
+ if (key_fixed) {
+ orig_key = key;
+ key = key_fixed;
+ }
+ if (strcmp (key, "cluster.op-version") == 0) {
+ sprintf (dict_key, "key%d", count);
+ ret = dict_set_str(dict, dict_key, key);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed"
+ "to set %s in dictionary", key);
+ goto out;
+ }
+ sprintf (dict_key, "value%d", count);
+ sprintf (op_version_buff, "%d",
+ priv->op_version);
+ ret = dict_set_str (dict, dict_key,
+ op_version_buff);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed"
+ " to set value for key %s in "
+ "dictionary", key);
+ goto out;
+ }
+ } else if (strcmp (key,
+ "config.memory-accounting") == 0) {
+ sprintf (dict_key, "key%d", count);
+ ret = dict_set_str(dict, dict_key, key);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed"
+ " to set %s in dictionary",
+ key);
+ goto out;
+ }
+ sprintf (dict_key, "value%d", count);
+
+ if (volinfo->memory_accounting)
+ ret = dict_set_str(dict, dict_key,
+ "Enabled");
+ else
+ ret = dict_set_str(dict, dict_key,
+ "Disabled");
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed"
+ " to set value for key %s in "
+ "dictionary", key);
+ goto out;
+ }
+ } else if (strcmp (key, "config.transport") == 0) {
+ sprintf (dict_key, "key%d", count);
+ ret = dict_set_str(dict, dict_key, key);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed"
+ "to set %s in dictionary", key);
+ goto out;
+ }
+ sprintf (dict_key, "value%d", count);
+
+ if (volinfo->transport_type
+ == GF_TRANSPORT_RDMA)
+ ret = dict_set_str(dict, dict_key,
+ "rdma");
+ else if (volinfo->transport_type
+ == GF_TRANSPORT_TCP)
+ ret = dict_set_str(dict, dict_key,
+ "tcp");
+ else if (volinfo->transport_type ==
+ GF_TRANSPORT_BOTH_TCP_RDMA)
+ ret = dict_set_str(dict, dict_key,
+ "tcp,rdma");
+ else
+ ret = dict_set_str(dict, dict_key,
+ "none");
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed"
+ " to set value for key %s in "
+ "dictionary", key);
+ goto out;
+ }
+ } else {
+ sprintf (dict_key, "key%d", count);
+ ret = dict_set_str(dict, dict_key, key);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed"
+ " to set %s in dictionary",
+ key);
+ goto out;
+ }
+ sprintf (dict_key, "value%d", count);
+ ret = dict_get_str (priv->opts, key, &value);
+ if (!ret) {
+ ret = dict_set_str(dict, dict_key,
+ value);
+ if (ret) {
+ gf_msg (this->name,
+ GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s in "
+ " dictionary", key);
+ goto out;
+ }
+ } else {
+ ret = glusterd_get_default_val_for_volopt
+ (dict,
+ _gf_false,
+ key, orig_key,
+ volinfo->dict,
+ &rsp.op_errstr);
+ if (ret && !rsp.op_errstr) {
+ snprintf (err_str,
+ sizeof(err_str),
+ "Failed to fetch the "
+ "value of %s, check "
+ "log file for more"
+ " details", key);
+ }
+ }
+ }
+ }
+ /* Request is for a single option, explicitly set count to 1
+ * in the dictionary.
+ */
+ ret = dict_set_int32 (dict, "count", 1);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_SET_FAILED, "Failed to set count "
+ "value in the dictionary");
+ goto out;
+ }
+ } else {
+ /* Handle the "all" volume option request */
+ ret = glusterd_get_default_val_for_volopt (dict, _gf_true, NULL,
+ NULL, volinfo->dict,
+ &rsp.op_errstr);
+ if (ret && !rsp.op_errstr) {
+ snprintf (err_str, sizeof(err_str),
+ "Failed to fetch the value of all volume "
+ "options, check log file for more details");
+ }
+
+ }
+
+out:
+ if (ret) {
+ if (!rsp.op_errstr)
+ rsp.op_errstr = err_str;
+ rsp.op_ret = ret;
+ }
+ else {
+ rsp.op_errstr = "";
+ rsp.op_ret = 0;
+ }
+
+ ret = dict_allocate_and_serialize (dict, &rsp.dict.dict_val,
+ &rsp.dict.dict_len);
+
+ glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_cli_rsp);
+ return ret;
+}
+
+int
+__glusterd_handle_get_vol_opt (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,}};
+ dict_t *dict = NULL;
+ char err_str[2048] = {0,};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (req);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ snprintf (err_str, sizeof (err_str), "Failed to decode "
+ "request received from cli");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL, "%s", err_str);
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (err_str, sizeof (err_str), "Unable to decode "
+ "the command");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
+ }
+ }
+ ret = glusterd_get_volume_opts (req, dict);
+
+out:
+ if (dict)
+ dict_unref (dict);
+
+ return ret;
+}
+
+int
+glusterd_handle_get_vol_opt (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_get_vol_opt);
+}
+static int
+get_brickinfo_from_brickid (char *brickid, glusterd_brickinfo_t **brickinfo)
+{
+ glusterd_volinfo_t *volinfo = NULL;
+ char *volid_str = NULL;
+ char *brick = NULL;
+ char *brickid_dup = NULL;
+ uuid_t volid = {0};
+ int ret = -1;
+
+ brickid_dup = gf_strdup (brickid);
+ if (!brickid_dup)
+ goto out;
+
+ volid_str = brickid_dup;
+ brick = strchr (brickid_dup, ':');
+ if (!volid_str || !brick)
+ goto out;
+
+ *brick = '\0';
+ brick++;
+ gf_uuid_parse (volid_str, volid);
+ ret = glusterd_volinfo_find_by_volume_id (volid, &volinfo);
+ if (ret) {
+ /* Check if it a snapshot volume */
+ ret = glusterd_snap_volinfo_find_by_volume_id (volid, &volinfo);
+ if (ret)
+ goto out;
+ }
+
+ ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo,
+ brickinfo,
+ _gf_false);
+ if (ret)
+ goto out;
+
+ ret = 0;
+out:
+ GF_FREE (brickid_dup);
+ return ret;
+}
+
+int
+__glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data)
+{
+ char *brickid = NULL;
+ int ret = 0;
+ glusterd_conf_t *conf = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+
+ brickid = mydata;
+ if (!brickid)
+ return 0;
+
+ ret = get_brickinfo_from_brickid (brickid, &brickinfo);
+ if (ret)
+ return 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ switch (event) {
+ case RPC_CLNT_CONNECT:
+ /* If a node on coming back up, already starts a brick
+ * before the handshake, and the notification comes after
+ * the handshake is done, then we need to check if this
+ * is a restored brick with a snapshot pending. If so, we
+ * need to stop the brick
+ */
+ if (brickinfo->snap_status == -1) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_SNAPSHOT_PENDING,
+ "Snapshot is pending on %s:%s. "
+ "Hence not starting the brick",
+ brickinfo->hostname,
+ brickinfo->path);
+ ret = get_volinfo_from_brickid (brickid, &volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLINFO_GET_FAIL,
+ "Failed to get volinfo from "
+ "brickid(%s)", brickid);
+ goto out;
+ }
+
+ ret = glusterd_brick_stop (volinfo, brickinfo,
+ _gf_false);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_STOP_FAIL,
+ "Unable to stop %s:%s",
+ brickinfo->hostname, brickinfo->path);
+ goto out;
+ }
+
+ break;
+ }
+ rpc_clnt_set_connected (&rpc->conn);
+ gf_msg_debug (this->name, 0, "Connected to %s:%s",
+ brickinfo->hostname, brickinfo->path);
+ glusterd_set_brick_status (brickinfo, GF_BRICK_STARTED);
+ ret = default_notify (this, GF_EVENT_CHILD_UP, NULL);
+
+ break;
+
+ case RPC_CLNT_DISCONNECT:
+ rpc_clnt_unset_connected (&rpc->conn);
+ if (glusterd_is_brick_started (brickinfo))
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_BRICK_DISCONNECTED,
+ "Brick %s:%s has disconnected from glusterd.",
+ brickinfo->hostname, brickinfo->path);
+
+ glusterd_set_brick_status (brickinfo, GF_BRICK_STOPPED);
+ break;
+
+ case RPC_CLNT_DESTROY:
+ GF_FREE (mydata);
+ mydata = NULL;
+ break;
+ default:
+ gf_msg_trace (this->name, 0,
+ "got some other RPC event %d", event);
+ break;
+ }
+
+out:
+ return ret;
+}
+
+int
+glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data)
+{
+ return glusterd_big_locked_notify (rpc, mydata, event, data,
+ __glusterd_brick_rpc_notify);
+}
+
+int
+glusterd_friend_remove_notify (glusterd_peerctx_t *peerctx, int32_t op_errno)
+{
+ int ret = -1;
+ glusterd_friend_sm_event_t *new_event = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ rpcsvc_request_t *req = NULL;
+ char *errstr = NULL;
+ dict_t *dict = NULL;
+
+ GF_ASSERT (peerctx);
+
+ rcu_read_lock ();
+ peerinfo = glusterd_peerinfo_find_by_generation (peerctx->peerinfo_gen);
+ if (!peerinfo) {
+ gf_msg_debug (THIS->name, 0, "Could not find peer %s(%s). "
+ "Peer could have been deleted.", peerctx->peername,
+ uuid_utoa (peerctx->peerid));
+ ret = 0;
+ goto out;
+ }
+
+ req = peerctx->args.req;
+ dict = peerctx->args.dict;
+ errstr = peerctx->errstr;
+
+ ret = glusterd_friend_sm_new_event (GD_FRIEND_EVENT_REMOVE_FRIEND,
+ &new_event);
+ if (!ret) {
+ if (!req) {
+ gf_msg (THIS->name, GF_LOG_WARNING, 0,
+ GD_MSG_EVENT_NEW_GET_FAIL,
+ "Unable to find the request for responding "
+ "to User (%s)", peerinfo->hostname);
+ goto out;
+ }
+
+ glusterd_xfer_cli_probe_resp (req, -1, op_errno, errstr,
+ peerinfo->hostname,
+ peerinfo->port, dict);
+
+ new_event->peername = gf_strdup (peerinfo->hostname);
+ gf_uuid_copy (new_event->peerid, peerinfo->uuid);
+ ret = glusterd_friend_sm_inject_event (new_event);
+
+ } else {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_EVENT_INJECT_FAIL,
+ "Unable to create event for removing peer %s",
+ peerinfo->hostname);
+ }
+
+out:
+ rcu_read_unlock ();
+ return ret;
+}
+
+int
+__glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data)
+{
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ int ret = 0;
+ int32_t op_errno = ENOTCONN;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_peerctx_t *peerctx = NULL;
+ gf_boolean_t quorum_action = _gf_false;
+ glusterd_volinfo_t *volinfo = NULL;
+ uuid_t uuid;
+
+ peerctx = mydata;
+ if (!peerctx)
+ return 0;
+
+ this = THIS;
+ conf = this->private;
+
+ if (RPC_CLNT_DESTROY == event) {
+ GF_FREE (peerctx->errstr);
+ GF_FREE (peerctx->peername);
+ GF_FREE (peerctx);
+ return 0;
+ }
+
+ rcu_read_lock ();
+
+ peerinfo = glusterd_peerinfo_find_by_generation (peerctx->peerinfo_gen);
+ if (!peerinfo) {
+ /* Peerinfo should be available at this point if its a connect
+ * event. Not finding it means that something terrible has
+ * happened. For non-connect event we might end up having a null
+ * peerinfo, so log at debug level.
+ */
+ gf_msg (THIS->name, (RPC_CLNT_CONNECT == event) ?
+ GF_LOG_CRITICAL : GF_LOG_DEBUG, ENOENT,
+ GD_MSG_PEER_NOT_FOUND, "Could not find peer "
+ "%s(%s)", peerctx->peername,
+ uuid_utoa (peerctx->peerid));
+
+ ret = -1;
+ goto out;
+ }
+
+ switch (event) {
+ case RPC_CLNT_CONNECT:
+ {
+ rpc_clnt_set_connected (&rpc->conn);
+ gf_msg_debug (this->name, 0, "got RPC_CLNT_CONNECT");
+ peerinfo->connected = 1;
+ peerinfo->quorum_action = _gf_true;
+ peerinfo->generation = uatomic_add_return
+ (&conf->generation, 1);
+ peerctx->peerinfo_gen = peerinfo->generation;
+
+ ret = glusterd_peer_dump_version (this, rpc, peerctx);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_HANDSHAKE_FAILED,
+ "glusterd handshake failed");
+ break;
+ }
+
+ case RPC_CLNT_DISCONNECT:
+ {
+ /* If DISCONNECT event is already processed, skip the further
+ * ones
+ */
+ if (is_rpc_clnt_disconnected (&rpc->conn))
+ break;
+
+ rpc_clnt_unset_connected (&rpc->conn);
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_PEER_DISCONNECTED,
+ "Peer <%s> (<%s>), in state <%s>, has disconnected "
+ "from glusterd.",
+ peerinfo->hostname, uuid_utoa (peerinfo->uuid),
+ glusterd_friend_sm_state_name_get (peerinfo->state.state));
+
+ if (peerinfo->connected) {
+ if (conf->op_version < GD_OP_VERSION_3_6_0) {
+ glusterd_get_lock_owner (&uuid);
+ if (!gf_uuid_is_null (uuid) &&
+ !gf_uuid_compare (peerinfo->uuid, uuid))
+ glusterd_unlock (peerinfo->uuid);
+ } else {
+ cds_list_for_each_entry (volinfo,
+ &conf->volumes,
+ vol_list) {
+ ret = glusterd_mgmt_v3_unlock
+ (volinfo->volname,
+ peerinfo->uuid,
+ "vol");
+ if (ret)
+ gf_msg (this->name,
+ GF_LOG_WARNING, 0,
+ GD_MSG_MGMTV3_UNLOCK_FAIL,
+ "Lock not released "
+ "for %s",
+ volinfo->volname);
+ }
+ }
+
+ op_errno = GF_PROBE_ANOTHER_CLUSTER;
+ ret = 0;
+ }
+
+ if ((peerinfo->quorum_contrib != QUORUM_DOWN) &&
+ (peerinfo->state.state == GD_FRIEND_STATE_BEFRIENDED)) {
+ peerinfo->quorum_contrib = QUORUM_DOWN;
+ quorum_action = _gf_true;
+ peerinfo->quorum_action = _gf_false;
+ }
+
+ /* Remove peer if it is not a friend and connection/handshake
+ * fails, and notify cli. Happens only during probe.
+ */
+ if (peerinfo->state.state == GD_FRIEND_STATE_DEFAULT) {
+ glusterd_friend_remove_notify (peerctx, op_errno);
+ goto out;
+ }
+
+ peerinfo->connected = 0;
+ break;
+ }
+
+ default:
+ gf_msg_trace (this->name, 0,
+ "got some other RPC event %d", event);
+ ret = 0;
+ break;
+ }
+
+out:
+ rcu_read_unlock ();
+
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+ if (quorum_action)
+ glusterd_do_quorum_action ();
+ return ret;
+}
+
+int
+glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data)
+{
+ return glusterd_big_locked_notify (rpc, mydata, event, data,
+ __glusterd_peer_rpc_notify);
+}
+
+int
+glusterd_null (rpcsvc_request_t *req)
+{
+
+ return 0;
+}
+
+rpcsvc_actor_t gd_svc_mgmt_actors[GLUSTERD_MGMT_MAXVALUE] = {
+ [GLUSTERD_MGMT_NULL] = { "NULL", GLUSTERD_MGMT_NULL, glusterd_null, NULL, 0, DRC_NA},
+ [GLUSTERD_MGMT_CLUSTER_LOCK] = { "CLUSTER_LOCK", GLUSTERD_MGMT_CLUSTER_LOCK, glusterd_handle_cluster_lock, NULL, 0, DRC_NA},
+ [GLUSTERD_MGMT_CLUSTER_UNLOCK] = { "CLUSTER_UNLOCK", GLUSTERD_MGMT_CLUSTER_UNLOCK, glusterd_handle_cluster_unlock, NULL, 0, DRC_NA},
+ [GLUSTERD_MGMT_STAGE_OP] = { "STAGE_OP", GLUSTERD_MGMT_STAGE_OP, glusterd_handle_stage_op, NULL, 0, DRC_NA},
+ [GLUSTERD_MGMT_COMMIT_OP] = { "COMMIT_OP", GLUSTERD_MGMT_COMMIT_OP, glusterd_handle_commit_op, NULL, 0, DRC_NA},
+};
+
+struct rpcsvc_program gd_svc_mgmt_prog = {
+ .progname = "GlusterD svc mgmt",
+ .prognum = GD_MGMT_PROGRAM,
+ .progver = GD_MGMT_VERSION,
+ .numactors = GLUSTERD_MGMT_MAXVALUE,
+ .actors = gd_svc_mgmt_actors,
+ .synctask = _gf_true,
+};
+
+rpcsvc_actor_t gd_svc_peer_actors[GLUSTERD_FRIEND_MAXVALUE] = {
+ [GLUSTERD_FRIEND_NULL] = { "NULL", GLUSTERD_MGMT_NULL, glusterd_null, NULL, 0, DRC_NA},
+ [GLUSTERD_PROBE_QUERY] = { "PROBE_QUERY", GLUSTERD_PROBE_QUERY, glusterd_handle_probe_query, NULL, 0, DRC_NA},
+ [GLUSTERD_FRIEND_ADD] = { "FRIEND_ADD", GLUSTERD_FRIEND_ADD, glusterd_handle_incoming_friend_req, NULL, 0, DRC_NA},
+ [GLUSTERD_FRIEND_REMOVE] = { "FRIEND_REMOVE", GLUSTERD_FRIEND_REMOVE, glusterd_handle_incoming_unfriend_req, NULL, 0, DRC_NA},
+ [GLUSTERD_FRIEND_UPDATE] = { "FRIEND_UPDATE", GLUSTERD_FRIEND_UPDATE, glusterd_handle_friend_update, NULL, 0, DRC_NA},
+};
+
+struct rpcsvc_program gd_svc_peer_prog = {
+ .progname = "GlusterD svc peer",
+ .prognum = GD_FRIEND_PROGRAM,
+ .progver = GD_FRIEND_VERSION,
+ .numactors = GLUSTERD_FRIEND_MAXVALUE,
+ .actors = gd_svc_peer_actors,
+ .synctask = _gf_false,
+};
+
+
+
+rpcsvc_actor_t gd_svc_cli_actors[GLUSTER_CLI_MAXVALUE] = {
+ [GLUSTER_CLI_PROBE] = { "CLI_PROBE", GLUSTER_CLI_PROBE, glusterd_handle_cli_probe, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_CREATE_VOLUME] = { "CLI_CREATE_VOLUME", GLUSTER_CLI_CREATE_VOLUME, glusterd_handle_create_volume, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_DEFRAG_VOLUME] = { "CLI_DEFRAG_VOLUME", GLUSTER_CLI_DEFRAG_VOLUME, glusterd_handle_defrag_volume, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_DEPROBE] = { "FRIEND_REMOVE", GLUSTER_CLI_DEPROBE, glusterd_handle_cli_deprobe, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_LIST_FRIENDS] = { "LIST_FRIENDS", GLUSTER_CLI_LIST_FRIENDS, glusterd_handle_cli_list_friends, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_UUID_RESET] = { "UUID_RESET", GLUSTER_CLI_UUID_RESET, glusterd_handle_cli_uuid_reset, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_UUID_GET] = { "UUID_GET", GLUSTER_CLI_UUID_GET, glusterd_handle_cli_uuid_get, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_START_VOLUME] = { "START_VOLUME", GLUSTER_CLI_START_VOLUME, glusterd_handle_cli_start_volume, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_STOP_VOLUME] = { "STOP_VOLUME", GLUSTER_CLI_STOP_VOLUME, glusterd_handle_cli_stop_volume, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_DELETE_VOLUME] = { "DELETE_VOLUME", GLUSTER_CLI_DELETE_VOLUME, glusterd_handle_cli_delete_volume, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_GET_VOLUME] = { "GET_VOLUME", GLUSTER_CLI_GET_VOLUME, glusterd_handle_cli_get_volume, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_ADD_BRICK] = { "ADD_BRICK", GLUSTER_CLI_ADD_BRICK, glusterd_handle_add_brick, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_ATTACH_TIER] = { "ATTACH_TIER", GLUSTER_CLI_ATTACH_TIER, glusterd_handle_attach_tier, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_DETACH_TIER] = { "DETACH_TIER", GLUSTER_CLI_DETACH_TIER, glusterd_handle_detach_tier, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_REPLACE_BRICK] = { "REPLACE_BRICK", GLUSTER_CLI_REPLACE_BRICK, glusterd_handle_replace_brick, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_REMOVE_BRICK] = { "REMOVE_BRICK", GLUSTER_CLI_REMOVE_BRICK, glusterd_handle_remove_brick, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_LOG_ROTATE] = { "LOG FILENAME", GLUSTER_CLI_LOG_ROTATE, glusterd_handle_log_rotate, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_SET_VOLUME] = { "SET_VOLUME", GLUSTER_CLI_SET_VOLUME, glusterd_handle_set_volume, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_SYNC_VOLUME] = { "SYNC_VOLUME", GLUSTER_CLI_SYNC_VOLUME, glusterd_handle_sync_volume, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_RESET_VOLUME] = { "RESET_VOLUME", GLUSTER_CLI_RESET_VOLUME, glusterd_handle_reset_volume, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_FSM_LOG] = { "FSM_LOG", GLUSTER_CLI_FSM_LOG, glusterd_handle_fsm_log, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_GSYNC_SET] = { "GSYNC_SET", GLUSTER_CLI_GSYNC_SET, glusterd_handle_gsync_set, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_PROFILE_VOLUME] = { "STATS_VOLUME", GLUSTER_CLI_PROFILE_VOLUME, glusterd_handle_cli_profile_volume, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_QUOTA] = { "QUOTA", GLUSTER_CLI_QUOTA, glusterd_handle_quota, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_GETWD] = { "GETWD", GLUSTER_CLI_GETWD, glusterd_handle_getwd, NULL, 1, DRC_NA},
+ [GLUSTER_CLI_STATUS_VOLUME] = {"STATUS_VOLUME", GLUSTER_CLI_STATUS_VOLUME, glusterd_handle_status_volume, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_MOUNT] = { "MOUNT", GLUSTER_CLI_MOUNT, glusterd_handle_mount, NULL, 1, DRC_NA},
+ [GLUSTER_CLI_UMOUNT] = { "UMOUNT", GLUSTER_CLI_UMOUNT, glusterd_handle_umount, NULL, 1, DRC_NA},
+ [GLUSTER_CLI_HEAL_VOLUME] = { "HEAL_VOLUME", GLUSTER_CLI_HEAL_VOLUME, glusterd_handle_cli_heal_volume, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_STATEDUMP_VOLUME] = {"STATEDUMP_VOLUME", GLUSTER_CLI_STATEDUMP_VOLUME, glusterd_handle_cli_statedump_volume, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_LIST_VOLUME] = {"LIST_VOLUME", GLUSTER_CLI_LIST_VOLUME, glusterd_handle_cli_list_volume, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_CLRLOCKS_VOLUME] = {"CLEARLOCKS_VOLUME", GLUSTER_CLI_CLRLOCKS_VOLUME, glusterd_handle_cli_clearlocks_volume, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_COPY_FILE] = {"COPY_FILE", GLUSTER_CLI_COPY_FILE, glusterd_handle_copy_file, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_SYS_EXEC] = {"SYS_EXEC", GLUSTER_CLI_SYS_EXEC, glusterd_handle_sys_exec, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_SNAP] = {"SNAP", GLUSTER_CLI_SNAP, glusterd_handle_snapshot, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_BARRIER_VOLUME] = {"BARRIER_VOLUME", GLUSTER_CLI_BARRIER_VOLUME, glusterd_handle_barrier, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_GANESHA] = { "GANESHA" , GLUSTER_CLI_GANESHA, glusterd_handle_ganesha_cmd, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_GET_VOL_OPT] = {"GET_VOL_OPT", GLUSTER_CLI_GET_VOL_OPT, glusterd_handle_get_vol_opt, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_BITROT] = {"BITROT", GLUSTER_CLI_BITROT, glusterd_handle_bitrot, NULL, 0, DRC_NA},
+};
+
+struct rpcsvc_program gd_svc_cli_prog = {
+ .progname = "GlusterD svc cli",
+ .prognum = GLUSTER_CLI_PROGRAM,
+ .progver = GLUSTER_CLI_VERSION,
+ .numactors = GLUSTER_CLI_MAXVALUE,
+ .actors = gd_svc_cli_actors,
+ .synctask = _gf_true,
+};
+
+/**
+ * This set of RPC progs are deemed to be trusted. Most of the actors support
+ * read only queries, the only exception being MOUNT/UMOUNT which is required
+ * by geo-replication to supprt unprivileged master -> slave sessions.
+ */
+rpcsvc_actor_t gd_svc_cli_trusted_actors[GLUSTER_CLI_MAXVALUE] = {
+ [GLUSTER_CLI_LIST_FRIENDS] = { "LIST_FRIENDS", GLUSTER_CLI_LIST_FRIENDS, glusterd_handle_cli_list_friends, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_UUID_GET] = { "UUID_GET", GLUSTER_CLI_UUID_GET, glusterd_handle_cli_uuid_get, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_GET_VOLUME] = { "GET_VOLUME", GLUSTER_CLI_GET_VOLUME, glusterd_handle_cli_get_volume, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_GETWD] = { "GETWD", GLUSTER_CLI_GETWD, glusterd_handle_getwd, NULL, 1, DRC_NA},
+ [GLUSTER_CLI_STATUS_VOLUME] = {"STATUS_VOLUME", GLUSTER_CLI_STATUS_VOLUME, glusterd_handle_status_volume, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_LIST_VOLUME] = {"LIST_VOLUME", GLUSTER_CLI_LIST_VOLUME, glusterd_handle_cli_list_volume, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_MOUNT] = { "MOUNT", GLUSTER_CLI_MOUNT, glusterd_handle_mount, NULL, 1, DRC_NA},
+ [GLUSTER_CLI_UMOUNT] = { "UMOUNT", GLUSTER_CLI_UMOUNT, glusterd_handle_umount, NULL, 1, DRC_NA},
+};
+
+struct rpcsvc_program gd_svc_cli_trusted_progs = {
+ .progname = "GlusterD svc cli read-only",
+ .prognum = GLUSTER_CLI_PROGRAM,
+ .progver = GLUSTER_CLI_VERSION,
+ .numactors = GLUSTER_CLI_MAXVALUE,
+ .actors = gd_svc_cli_trusted_actors,
+ .synctask = _gf_true,
+};
diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c
new file mode 100644
index 00000000000..0ea66a027bf
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c
@@ -0,0 +1,2289 @@
+/*
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "defaults.h"
+#include "glusterfs.h"
+#include "syscall.h"
+#include "compat-errno.h"
+
+#include "glusterd.h"
+#include "glusterd-utils.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-store.h"
+#include "glusterd-snapshot-utils.h"
+#include "glusterd-svc-mgmt.h"
+#include "glusterd-snapd-svc-helper.h"
+#include "glusterd-quotad-svc.h"
+#include "glusterd-messages.h"
+
+#include "glusterfs3.h"
+#include "protocol-common.h"
+#include "rpcsvc.h"
+#include "rpc-common-xdr.h"
+
+extern struct rpc_clnt_program gd_peer_prog;
+extern struct rpc_clnt_program gd_mgmt_prog;
+extern struct rpc_clnt_program gd_mgmt_v3_prog;
+
+
+#define TRUSTED_PREFIX "trusted-"
+#define GD_PEER_ID_KEY "peer-id"
+
+typedef ssize_t (*gfs_serialize_t) (struct iovec outmsg, void *data);
+
+static int
+get_snap_volname_and_volinfo (const char *volpath, char **volname,
+ glusterd_volinfo_t **volinfo)
+{
+ int ret = -1;
+ char *save_ptr = NULL;
+ char *str_token = NULL;
+ char *snapname = NULL;
+ char *volname_token = NULL;
+ char *vol = NULL;
+ glusterd_snap_t *snap = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (volpath);
+ GF_ASSERT (volinfo);
+
+ str_token = gf_strdup (volpath);
+ if (NULL == str_token) {
+ goto out;
+ }
+
+ /* Input volname will have below formats:
+ * /snaps/<snapname>/<volname>.<hostname>
+ * or
+ * /snaps/<snapname>/<parent-volname>
+ * We need to extract snapname and parent_volname */
+
+ /*split string by "/" */
+ strtok_r (str_token, "/", &save_ptr);
+ snapname = strtok_r(NULL, "/", &save_ptr);
+ if (!snapname) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY,
+ "Invalid path: %s", volpath);
+ goto out;
+ }
+
+ volname_token = strtok_r(NULL, "/", &save_ptr);
+ if (!volname_token) {
+ gf_msg (this->name, GF_LOG_ERROR,
+ EINVAL, GD_MSG_INVALID_ENTRY,
+ "Invalid path: %s", volpath);
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name (snapname);
+ if (!snap) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_SNAP_NOT_FOUND, "Failed to "
+ "fetch snap %s", snapname);
+ goto out;
+ }
+
+ /* Find if its a parent volume name or snap volume
+ * name. This function will succeed if volname_token
+ * is a parent volname
+ */
+ ret = glusterd_volinfo_find (volname_token, volinfo);
+ if (ret) {
+ *volname = gf_strdup (volname_token);
+ if (NULL == *volname) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_snap_volinfo_find (volname_token, snap,
+ volinfo);
+ if (ret) {
+ /* Split the volume name */
+ vol = strtok_r (volname_token, ".", &save_ptr);
+ if (!vol) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "Invalid "
+ "volname (%s)", volname_token);
+ goto out;
+ }
+
+ ret = glusterd_snap_volinfo_find (vol, snap, volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_INFO_FAIL, "Failed to "
+ "fetch snap volume from volname (%s)",
+ vol);
+ goto out;
+ }
+ }
+ } else {
+ /*volname_token is parent volname*/
+ ret = glusterd_snap_volinfo_find_from_parent_volname (
+ volname_token, snap, volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_INFO_FAIL, "Failed to "
+ "fetch snap volume from parent "
+ "volname (%s)", volname_token);
+ goto out;
+ }
+
+ /* Since volname_token is a parent volname we should
+ * get the snap volname here*/
+ *volname = gf_strdup ((*volinfo)->volname);
+ if (NULL == *volname) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+out:
+ if (ret && NULL != *volname) {
+ GF_FREE (*volname);
+ *volname = NULL;
+ }
+ return ret;
+}
+
+int32_t
+glusterd_get_client_per_brick_volfile (glusterd_volinfo_t *volinfo,
+ char *filename, char *path, int path_len)
+{
+ char workdir[PATH_MAX] = {0,};
+ glusterd_conf_t *priv = NULL;
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("glusterd", THIS, out);
+ priv = THIS->private;
+ GF_VALIDATE_OR_GOTO (THIS->name, priv, out);
+
+ GLUSTERD_GET_VOLUME_DIR (workdir, volinfo, priv);
+
+ snprintf (path, path_len, "%s/%s", workdir, filename);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static size_t
+build_volfile_path (char *volume_id, char *path,
+ size_t path_len, char *trusted_str)
+{
+ struct stat stbuf = {0,};
+ int32_t ret = -1;
+ char *vol = NULL;
+ char *dup_volname = NULL;
+ char *save_ptr = NULL;
+ char *free_ptr = NULL;
+ char *volname = NULL;
+ char *volid_ptr = NULL;
+ char dup_volid[PATH_MAX] = {0,};
+ char path_prefix[PATH_MAX] = {0,};
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (volume_id);
+ GF_ASSERT (path);
+
+ volid_ptr = strstr (volume_id, "snapd/");
+ if (volid_ptr) {
+ volid_ptr = strchr (volid_ptr, '/');
+ if (!volid_ptr) {
+ ret = -1;
+ goto out;
+ }
+ volid_ptr++;
+
+ ret = glusterd_volinfo_find (volid_ptr, &volinfo);
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLINFO_GET_FAIL,
+ "Couldn't find volinfo");
+ goto out;
+ }
+ glusterd_svc_build_snapd_volfile (volinfo, path, path_len);
+ ret = 0;
+ goto out;
+
+ }
+
+ volid_ptr = strstr (volume_id, "gluster/");
+ if (volid_ptr) {
+ volid_ptr = strchr (volid_ptr, '/');
+ if (!volid_ptr) {
+ ret = -1;
+ goto out;
+ }
+ volid_ptr++;
+
+ glusterd_svc_build_volfile_path (volid_ptr,
+ priv->workdir,
+ path, path_len);
+ ret = 0;
+ goto out;
+
+ }
+
+ volid_ptr = strstr (volume_id, "/snaps/");
+ if (volid_ptr) {
+ ret = get_snap_volname_and_volinfo (volid_ptr, &volname,
+ &volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_INFO_FAIL, "Failed to get snap"
+ " volinfo from path (%s)", volume_id);
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (path_prefix, sizeof (path_prefix), "%s/snaps/%s",
+ priv->workdir, volinfo->snapshot->snapname);
+
+ volid_ptr = volname;
+ /* this is to ensure that volname recvd from
+ get_snap_volname_and_volinfo is free'd */
+ free_ptr = volname;
+ goto gotvolinfo;
+
+ }
+
+ volid_ptr = strstr (volume_id, "rebalance/");
+ if (volid_ptr) {
+ volid_ptr = strchr (volid_ptr, '/');
+ if (!volid_ptr) {
+ ret = -1;
+ goto out;
+ }
+ volid_ptr++;
+
+ ret = glusterd_volinfo_find (volid_ptr, &volinfo);
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLINFO_GET_FAIL,
+ "Couldn't find volinfo");
+ goto out;
+ }
+ glusterd_get_rebalance_volfile (volinfo, path, path_len);
+ ret = 0;
+ goto out;
+ }
+
+ volid_ptr = strstr (volume_id, "client_per_brick/");
+ if (volid_ptr) {
+ volid_ptr = strchr (volid_ptr, '/');
+ if (!volid_ptr) {
+ ret = -1;
+ goto out;
+ }
+ volid_ptr++;
+
+ dup_volname = gf_strdup (volid_ptr);
+ if (!dup_volname) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY,
+ "strdup failed");
+ ret = -1;
+ goto out;
+ }
+
+ /* Split the volume name */
+ vol = strtok_r (dup_volname, ".", &save_ptr);
+ if (!vol) {
+ ret = -1;
+ goto out;
+ }
+ ret = glusterd_volinfo_find (vol, &volinfo);
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLINFO_GET_FAIL,
+ "Couldn't find volinfo");
+ goto out;
+ }
+ ret = glusterd_get_client_per_brick_volfile (volinfo, volid_ptr,
+ path, path_len);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_NO_MEMORY, "failed to get volinfo path");
+ goto out;
+ }
+
+ ret = sys_access (path, F_OK);
+ goto out;
+ }
+
+ if (volume_id[0] == '/') {
+ /* Normal behavior */
+ volid_ptr = volume_id;
+ volid_ptr++;
+
+ } else {
+ /* Bringing in NFS like behavior for mount command, */
+ /* With this, one can mount a volume with below cmd */
+ /* bash# mount -t glusterfs server:/volume /mnt/pnt */
+ volid_ptr = volume_id;
+ }
+
+ snprintf (path_prefix, sizeof (path_prefix), "%s/vols",
+ priv->workdir);
+
+ ret = glusterd_volinfo_find (volid_ptr, &volinfo);
+
+ if (ret) {
+ dup_volname = gf_strdup (volid_ptr);
+ if (!dup_volname) {
+ ret = -1;
+ goto out;
+ }
+ /* Split the volume name */
+ vol = strtok_r (dup_volname, ".", &save_ptr);
+ if (!vol) {
+ ret = -1;
+ goto out;
+ }
+ ret = glusterd_volinfo_find (vol, &volinfo);
+ if (ret)
+ goto out;
+ }
+
+gotvolinfo:
+ if (!glusterd_auth_get_username (volinfo))
+ trusted_str = NULL;
+
+ ret = snprintf (path, path_len, "%s/%s/%s.vol", path_prefix,
+ volinfo->volname, volid_ptr);
+ if (ret == -1)
+ goto out;
+
+ ret = sys_stat (path, &stbuf);
+
+ if ((ret == -1) && (errno == ENOENT)) {
+ strncpy (dup_volid, volid_ptr, (PATH_MAX - 1));
+ if (!strchr (dup_volid, '.')) {
+ switch (volinfo->transport_type) {
+ case GF_TRANSPORT_TCP:
+ strcat (dup_volid, ".tcp");
+ break;
+ case GF_TRANSPORT_RDMA:
+ strcat (dup_volid, ".rdma");
+ break;
+ case GF_TRANSPORT_BOTH_TCP_RDMA:
+ strcat (dup_volid, ".tcp");
+ break;
+ default:
+ ret = -1;
+ break;
+ }
+ }
+ snprintf (path, path_len, "%s/%s/%s%s-fuse.vol",
+ path_prefix, volinfo->volname,
+ (trusted_str ? trusted_str : ""),
+ dup_volid);
+ ret = sys_stat (path, &stbuf);
+ }
+out:
+ if (dup_volname)
+ GF_FREE (dup_volname);
+ if (free_ptr)
+ GF_FREE (free_ptr);
+ return ret;
+}
+
+/* Get and store op-versions of the clients sending the getspec request
+ * Clients of versions <= 3.3, don't send op-versions, their op-versions are
+ * defaulted to 1. Also fetch brick_name.
+ */
+int32_t
+glusterd_get_args_from_dict (gf_getspec_req *args, peer_info_t *peerinfo,
+ char **brick_name)
+{
+ dict_t *dict = NULL;
+ int client_max_op_version = 1;
+ int client_min_op_version = 1;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (args);
+ GF_ASSERT (peerinfo);
+
+ if (!args->xdata.xdata_len) {
+ ret = 0;
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (args->xdata.xdata_val,
+ args->xdata.xdata_len, &dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "Failed to unserialize request dictionary");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "min-op-version",
+ &client_min_op_version);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get client-min-op-version");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "max-op-version",
+ &client_max_op_version);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get client-max-op-version");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "brick_name",
+ brick_name);
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "No brick name present");
+ ret = 0;
+ goto out;
+ }
+
+ gf_msg_debug (this->name, 0, "brick_name = %s", *brick_name);
+out:
+ peerinfo->max_op_version = client_max_op_version;
+ peerinfo->min_op_version = client_min_op_version;
+
+ return ret;
+}
+
+/* Given the missed_snapinfo and snap_opinfo take the
+ * missed lvm snapshot
+ */
+int32_t
+glusterd_create_missed_snap (glusterd_missed_snap_info *missed_snapinfo,
+ glusterd_snap_op_t *snap_opinfo)
+{
+ char *device = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int32_t ret = -1;
+ int32_t i = 0;
+ uuid_t snap_uuid = {0,};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (missed_snapinfo);
+ GF_ASSERT (snap_opinfo);
+
+ gf_uuid_parse (missed_snapinfo->snap_uuid, snap_uuid);
+
+ /* Find the snap-object */
+ snap = glusterd_find_snap_by_id (snap_uuid);
+ if (!snap) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_NOT_FOUND,
+ "Unable to find the snap with snap_uuid %s",
+ missed_snapinfo->snap_uuid);
+ ret = -1;
+ goto out;
+ }
+
+ /* Find the snap_vol */
+ cds_list_for_each_entry (volinfo, &snap->volumes, vol_list) {
+ if (!strcmp (volinfo->volname,
+ snap_opinfo->snap_vol_id)) {
+ snap_vol = volinfo;
+ break;
+ }
+ }
+
+ if (!snap_vol) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_FOUND,
+ "Unable to find the snap_vol(%s) "
+ "for snap(%s)", snap_opinfo->snap_vol_id,
+ snap->snapname);
+ ret = -1;
+ goto out;
+ }
+
+ /* Find the missed brick in the snap volume */
+ cds_list_for_each_entry (brickinfo, &snap_vol->bricks, brick_list) {
+ i++;
+ if (i == snap_opinfo->brick_num)
+ break;
+ }
+
+ if (brickinfo->snap_status != -1) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_STATUS_NOT_PENDING,
+ "The snap status of the missed "
+ "brick(%s) is not pending", brickinfo->path);
+ goto out;
+ }
+
+ /* Fetch the device path */
+ device = glusterd_get_brick_mount_device (snap_opinfo->brick_path);
+ if (!device) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_GET_INFO_FAIL,
+ "Getting device name for the"
+ "brick %s:%s failed", brickinfo->hostname,
+ snap_opinfo->brick_path);
+ ret = -1;
+ goto out;
+ }
+
+ device = glusterd_build_snap_device_path (device, snap_vol->volname,
+ snap_opinfo->brick_num - 1);
+ if (!device) {
+ gf_msg (this->name, GF_LOG_ERROR, ENXIO,
+ GD_MSG_SNAP_DEVICE_NAME_GET_FAIL,
+ "cannot copy the snapshot "
+ "device name (volname: %s, snapname: %s)",
+ snap_vol->volname, snap->snapname);
+ ret = -1;
+ goto out;
+ }
+ strncpy (brickinfo->device_path, device,
+ sizeof(brickinfo->device_path));
+
+ /* Update the backend file-system type of snap brick in
+ * snap volinfo. */
+ ret = glusterd_update_mntopts (snap_opinfo->brick_path, brickinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRK_MOUNTOPTS_FAIL, "Failed to update "
+ "mount options for %s brick", brickinfo->path);
+ /* We should not fail snapshot operation if we fail to get
+ * the file-system type */
+ }
+
+ ret = glusterd_take_lvm_snapshot (brickinfo, snap_opinfo->brick_path);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAPSHOT_OP_FAILED,
+ "Failed to take snapshot of %s",
+ snap_opinfo->brick_path);
+ goto out;
+ }
+
+ /* After the snapshot both the origin brick (LVM brick) and
+ * the snapshot brick will have the same file-system label. This
+ * will cause lot of problems at mount time. Therefore we must
+ * generate a new label for the snapshot brick
+ */
+ ret = glusterd_update_fs_label (brickinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_SET_INFO_FAIL, "Failed to update "
+ "file-system label for %s brick", brickinfo->path);
+ /* Failing to update label should not cause snapshot failure.
+ * Currently label is updated only for XFS and ext2/ext3/ext4
+ * file-system.
+ */
+ }
+
+ /* Create and mount the snap brick */
+ ret = glusterd_snap_brick_create (snap_vol, brickinfo,
+ snap_opinfo->brick_num - 1);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_CREATION_FAIL, "Failed to "
+ " create and mount the brick(%s) for the snap %s",
+ snap_opinfo->brick_path,
+ snap_vol->snapshot->snapname);
+ goto out;
+ }
+
+ brickinfo->snap_status = 0;
+ ret = glusterd_store_volinfo (snap_vol,
+ GLUSTERD_VOLINFO_VER_AC_NONE);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLINFO_STORE_FAIL, "Failed to store snapshot "
+ "volinfo (%s) for snap %s", snap_vol->volname,
+ snap->snapname);
+ goto out;
+ }
+
+ ret = glusterd_brick_start (snap_vol, brickinfo, _gf_false);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_BRICK_DISCONNECTED, "starting the "
+ "brick %s:%s for the snap %s failed",
+ brickinfo->hostname, brickinfo->path,
+ snap->snapname);
+ goto out;
+ }
+out:
+ if (device)
+ GF_FREE (device);
+
+ return ret;
+}
+
+/* Look into missed_snap_list, to see it the given brick_name,
+ * has any missed snap creates for the local node */
+int32_t
+glusterd_take_missing_brick_snapshots (char *brick_name)
+{
+ char *my_node_uuid = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_missed_snap_info *missed_snapinfo = NULL;
+ glusterd_snap_op_t *snap_opinfo = NULL;
+ int32_t ret = -1;
+ gf_boolean_t update_list = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (brick_name);
+
+ my_node_uuid = uuid_utoa (MY_UUID);
+
+ cds_list_for_each_entry (missed_snapinfo, &priv->missed_snaps_list,
+ missed_snaps) {
+ /* If the missed snap op is not for the local node
+ * then continue
+ */
+ if (strcmp (my_node_uuid, missed_snapinfo->node_uuid))
+ continue;
+
+ cds_list_for_each_entry (snap_opinfo,
+ &missed_snapinfo->snap_ops,
+ snap_ops_list) {
+ /* Check if the missed snap's op is a create for
+ * the brick name in question
+ */
+ if ((snap_opinfo->op == GF_SNAP_OPTION_TYPE_CREATE) &&
+ (!strcmp (brick_name, snap_opinfo->brick_path))) {
+ /* Perform a snap create if the
+ * op is still pending
+ */
+ if (snap_opinfo->status ==
+ GD_MISSED_SNAP_PENDING) {
+ ret = glusterd_create_missed_snap
+ (missed_snapinfo,
+ snap_opinfo);
+ if (ret) {
+ gf_msg (this->name,
+ GF_LOG_ERROR, 0,
+ GD_MSG_MISSED_SNAP_CREATE_FAIL,
+ "Failed to create "
+ "missed snap for %s",
+ brick_name);
+ /* At this stage, we will mark
+ * the entry as done. Because
+ * of the failure other
+ * snapshots will not be
+ * affected, and neither the
+ * brick. Only the current snap
+ * brick will always remain as
+ * pending.
+ */
+ }
+ snap_opinfo->status =
+ GD_MISSED_SNAP_DONE;
+ update_list = _gf_true;
+ }
+ /* One snap-id won't have more than one missed
+ * create for the same brick path. Hence
+ * breaking in search of another missed create
+ * for the same brick path in the local node
+ */
+ break;
+ }
+ }
+ }
+
+ if (update_list == _gf_true) {
+ ret = glusterd_store_update_missed_snaps ();
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MISSED_SNAP_LIST_STORE_FAIL,
+ "Failed to update missed_snaps_list");
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/* Checks if the client supports the volume, ie. client can understand all the
+ * options in the volfile
+ */
+static gf_boolean_t
+_client_supports_volume (peer_info_t *peerinfo, int32_t *op_errno)
+{
+ gf_boolean_t ret = _gf_true;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ GF_ASSERT (peerinfo);
+ GF_ASSERT (op_errno);
+
+
+ /* Only check when the volfile being requested is a volume. Not finding
+ * a volinfo implies that the volfile requested for is not of a gluster
+ * volume. A non volume volfile is requested by the local gluster
+ * services like shd and nfs-server. These need not be checked as they
+ * will be running at the same op-version as glusterd and will be able
+ * to support all the features
+ */
+ if ((glusterd_volinfo_find (peerinfo->volname, &volinfo) == 0) &&
+ ((peerinfo->min_op_version > volinfo->client_op_version) ||
+ (peerinfo->max_op_version < volinfo->client_op_version))) {
+ ret = _gf_false;
+ *op_errno = ENOTSUP;
+ gf_msg ("glusterd", GF_LOG_INFO, ENOTSUP,
+ GD_MSG_UNSUPPORTED_VERSION,
+ "Client %s (%d -> %d) doesn't support required "
+ "op-version (%d). Rejecting volfile request.",
+ peerinfo->identifier, peerinfo->min_op_version,
+ peerinfo->max_op_version, volinfo->client_op_version);
+ }
+
+ return ret;
+}
+
+int
+__server_getspec (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int32_t spec_fd = -1;
+ size_t file_len = 0;
+ char filename[PATH_MAX] = {0,};
+ struct stat stbuf = {0,};
+ char *brick_name = NULL;
+ char *volume = NULL;
+ char *tmp = NULL;
+ int cookie = 0;
+ rpc_transport_t *trans = NULL;
+ gf_getspec_req args = {0,};
+ gf_getspec_rsp rsp = {0,};
+ char addrstr[RPCSVC_PEER_STRLEN] = {0};
+ peer_info_t *peerinfo = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gf_getspec_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto fail;
+ }
+
+ peerinfo = &req->trans->peerinfo;
+
+ volume = args.key;
+ /* Need to strip leading '/' from volnames. This was introduced to
+ * support nfs style mount parameters for native gluster mount
+ */
+ if (volume[0] == '/')
+ strncpy (peerinfo->volname, &volume[1], strlen(&volume[1]));
+ else
+ strncpy (peerinfo->volname, volume, strlen(volume));
+
+ ret = glusterd_get_args_from_dict (&args, peerinfo, &brick_name);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get args from dict");
+ goto fail;
+ }
+
+ if (!_client_supports_volume (peerinfo, &op_errno)) {
+ ret = -1;
+ goto fail;
+ }
+
+ trans = req->trans;
+ /* addrstr will be empty for cli socket connections */
+ ret = rpcsvc_transport_peername (trans, (char *)&addrstr,
+ sizeof (addrstr));
+ if (ret)
+ goto fail;
+
+ tmp = strrchr (addrstr, ':');
+ if (tmp)
+ *tmp = '\0';
+
+ /* The trusted volfiles are given to the glusterd owned process like NFS
+ * server, self-heal daemon etc., so that they are not inadvertently
+ * blocked by a auth.{allow,reject} setting. The trusted volfile is not
+ * meant for external users.
+ * For unix domain socket, address will be empty.
+ */
+ if (strlen (addrstr) == 0 || gf_is_local_addr (addrstr)) {
+
+ ret = build_volfile_path (volume, filename,
+ sizeof (filename),
+ TRUSTED_PREFIX);
+ } else {
+ ret = build_volfile_path (volume, filename,
+ sizeof (filename), NULL);
+ }
+
+ if (ret == 0) {
+ /* to allocate the proper buffer to hold the file data */
+ ret = sys_stat (filename, &stbuf);
+ if (ret < 0){
+ gf_msg ("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED,
+ "Unable to stat %s (%s)",
+ filename, strerror (errno));
+ goto fail;
+ }
+
+ spec_fd = open (filename, O_RDONLY);
+ if (spec_fd < 0) {
+ gf_msg ("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED,
+ "Unable to open %s (%s)",
+ filename, strerror (errno));
+ goto fail;
+ }
+ ret = file_len = stbuf.st_size;
+ } else {
+ op_errno = ENOENT;
+ goto fail;
+ }
+
+ if (file_len) {
+ rsp.spec = CALLOC (file_len+1, sizeof (char));
+ if (!rsp.spec) {
+ ret = -1;
+ op_errno = ENOMEM;
+ goto fail;
+ }
+ ret = sys_read (spec_fd, rsp.spec, file_len);
+ }
+
+ if (brick_name) {
+ gf_msg_debug (this->name, 0,
+ "Look for missing snap creates for %s", brick_name);
+ op_ret = glusterd_take_missing_brick_snapshots (brick_name);
+ if (op_ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MISSED_SNAP_CREATE_FAIL,
+ "Failed to take missing brick snapshots");
+ ret = -1;
+ goto fail;
+ }
+ }
+
+ /* convert to XDR */
+fail:
+ if (spec_fd > 0)
+ sys_close (spec_fd);
+
+ rsp.op_ret = ret;
+
+ if (op_errno)
+ rsp.op_errno = gf_errno_to_error (op_errno);
+ if (cookie)
+ rsp.op_errno = cookie;
+
+ if (!rsp.spec)
+ rsp.spec = strdup ("");
+
+ glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_getspec_rsp);
+ free (args.key);//malloced by xdr
+ free (rsp.spec);
+
+ return 0;
+}
+
+int
+server_getspec (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __server_getspec);
+}
+
+int32_t
+__server_event_notify (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ int32_t op_errno = 0;
+ gf_event_notify_req args = {0,};
+ gf_event_notify_rsp rsp = {0,};
+ dict_t *dict = NULL;
+ gf_boolean_t need_rsp = _gf_true;
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gf_event_notify_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto fail;
+ }
+
+ if (args.dict.dict_len) {
+ dict = dict_new ();
+ if (!dict)
+ return ret;
+ ret = dict_unserialize (args.dict.dict_val,
+ args.dict.dict_len, &dict);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "Failed to unserialize req");
+ goto fail;
+ }
+ }
+
+ switch (args.op) {
+ case GF_EN_DEFRAG_STATUS:
+ gf_msg ("glusterd", GF_LOG_INFO, 0,
+ GD_MSG_DEFRAG_STATUS_UPDATED,
+ "received defrag status updated");
+ if (dict) {
+ glusterd_defrag_event_notify_handle (dict);
+ need_rsp = _gf_false;
+ }
+ break;
+ default:
+ gf_msg ("glusterd", GF_LOG_ERROR, EINVAL,
+ GD_MSG_OP_UNSUPPORTED, "Unknown op received in event "
+ "notify");
+ ret = -1;
+ break;
+ }
+
+fail:
+ rsp.op_ret = ret;
+
+ if (op_errno)
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ if (need_rsp)
+ glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_event_notify_rsp);
+ if (dict)
+ dict_unref (dict);
+ free (args.dict.dict_val);//malloced by xdr
+
+ return 0;
+}
+
+int32_t
+server_event_notify (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __server_event_notify);
+}
+
+int
+gd_validate_cluster_op_version (xlator_t *this, int cluster_op_version,
+ char *peerid)
+{
+ int ret = -1;
+ glusterd_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (cluster_op_version > GD_OP_VERSION_MAX) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_OP_VERSION_MISMATCH,
+ "operating version %d is more than the maximum "
+ "supported (%d) on the machine (as per peer request "
+ "from %s)", cluster_op_version, GD_OP_VERSION_MAX,
+ peerid);
+ goto out;
+ }
+
+ /* The peer can only reduce its op-version when it doesn't have any
+ * volumes. Reducing op-version when it already contains volumes can
+ * lead to inconsistencies in the cluster
+ */
+ if ((cluster_op_version < conf->op_version) &&
+ !cds_list_empty (&conf->volumes)) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_OP_VERS_ADJUST_FAIL,
+ "cannot reduce operating version to %d from current "
+ "version %d as volumes exist (as per peer request from "
+ "%s)", cluster_op_version, conf->op_version, peerid);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/* Validate if glusterd can serve the management handshake request
+ *
+ * Requests are allowed if,
+ * - glusterd has no peers & no volumes, or
+ * - the request came from a known peer
+ * A known peer is identified using the following steps
+ * - the dict is checked for a peer uuid, which if present is matched with the
+ * peer list, else
+ * - the incoming request address is matched with the peer list
+ */
+gf_boolean_t
+gd_validate_mgmt_hndsk_req (rpcsvc_request_t *req, dict_t *dict)
+{
+ int ret = -1;
+ char hostname[UNIX_PATH_MAX + 1] = {0,};
+ glusterd_peerinfo_t *peer = NULL;
+ xlator_t *this = NULL;
+ char *uuid_str = NULL;
+ uuid_t peer_uuid = {0,};
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ if (!glusterd_have_peers () && !glusterd_have_volumes ())
+ return _gf_true;
+
+ ret = dict_get_str (dict, GD_PEER_ID_KEY, &uuid_str);
+ /* Try to match uuid only if available, don't fail as older peers will
+ * not send a uuid
+ */
+ if (!ret) {
+ gf_uuid_parse (uuid_str, peer_uuid);
+ rcu_read_lock ();
+ ret = (glusterd_peerinfo_find (peer_uuid, NULL) != NULL);
+ rcu_read_unlock ();
+ if (ret)
+ return _gf_true;
+ }
+
+ /* If you cannot get the hostname, you cannot authenticate */
+ ret = glusterd_remote_hostname_get (req, hostname, sizeof (hostname));
+ if (ret)
+ return _gf_false;
+
+ /* If peer object is not found it indicates that request is from an
+ * unknown peer, if its found, validate whether its uuid is also
+ * available in the peerinfo list. There could be a case where hostname
+ * is available in the peerinfo list but the uuid has changed of the
+ * node due to a reinstall, in that case the validation should fail!
+ */
+ rcu_read_lock ();
+ peer = glusterd_peerinfo_find (NULL, hostname);
+ if (!peer) {
+ ret = -1;
+ } else if (peer && glusterd_peerinfo_find (peer_uuid, NULL) != NULL) {
+ ret = 0;
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_HANDSHAKE_REQ_REJECTED, "Request from peer %s "
+ "has an entry in peerinfo, but uuid does not match",
+ req->trans->peerinfo.identifier);
+ ret = -1;
+ }
+ rcu_read_unlock ();
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_HANDSHAKE_REQ_REJECTED, "Rejecting management "
+ "handshake request from unknown peer %s",
+ req->trans->peerinfo.identifier);
+ return _gf_false;
+ }
+
+ return _gf_true;
+}
+
+int
+__glusterd_mgmt_hndsk_versions (rpcsvc_request_t *req)
+{
+ dict_t *dict = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ int ret = -1;
+ int op_errno = EINVAL;
+ gf_mgmt_hndsk_req args = {{0,},};
+ gf_mgmt_hndsk_rsp rsp = {0,};
+ dict_t *args_dict = NULL;
+
+ this = THIS;
+ conf = this->private;
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gf_mgmt_hndsk_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, args_dict, args.hndsk.hndsk_val,
+ (args.hndsk.hndsk_len), ret, op_errno,
+ out);
+
+ /* Check if we can service the request */
+ if (!gd_validate_mgmt_hndsk_req (req, args_dict)) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_set_int32 (dict, GD_OP_VERSION_KEY, conf->op_version);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "failed to set operating version");
+ rsp.op_ret = ret;
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, GD_MIN_OP_VERSION_KEY, GD_OP_VERSION_MIN);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "failed to set %s", GD_MIN_OP_VERSION_KEY);
+ rsp.op_ret = ret;
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, GD_MAX_OP_VERSION_KEY, GD_OP_VERSION_MAX);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "failed to set %s", GD_MAX_OP_VERSION_KEY);
+ rsp.op_ret = ret;
+ goto out;
+ }
+
+ ret = 0;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, dict, (&rsp.hndsk.hndsk_val),
+ rsp.hndsk.hndsk_len, op_errno, out);
+out:
+
+ rsp.op_ret = ret;
+ rsp.op_errno = op_errno;
+
+ glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_mgmt_hndsk_rsp);
+
+ ret = 0;
+
+ if (dict)
+ dict_unref (dict);
+
+ if (args.hndsk.hndsk_val)
+ free (args.hndsk.hndsk_val);
+
+ if (rsp.hndsk.hndsk_val)
+ GF_FREE (rsp.hndsk.hndsk_val);
+
+ return ret;
+}
+
+int
+glusterd_mgmt_hndsk_versions (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_mgmt_hndsk_versions);
+}
+
+int
+__glusterd_mgmt_hndsk_versions_ack (rpcsvc_request_t *req)
+{
+ dict_t *clnt_dict = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ int ret = -1;
+ int op_errno = EINVAL;
+ int peer_op_version = 0;
+ gf_mgmt_hndsk_req args = {{0,},};
+ gf_mgmt_hndsk_rsp rsp = {0,};
+
+ this = THIS;
+ conf = this->private;
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gf_mgmt_hndsk_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, clnt_dict, args.hndsk.hndsk_val,
+ (args.hndsk.hndsk_len), ret, op_errno,
+ out);
+
+ ret = dict_get_int32 (clnt_dict, GD_OP_VERSION_KEY, &peer_op_version);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "failed to get the op-version key peer=%s",
+ req->trans->peerinfo.identifier);
+ goto out;
+ }
+
+ ret = gd_validate_cluster_op_version (this, peer_op_version,
+ req->trans->peerinfo.identifier);
+ if (ret)
+ goto out;
+
+
+ /* As this is ACK from the Cluster for the versions supported,
+ can set the op-version of 'this' glusterd to the one
+ received. */
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_VERS_INFO, "using the op-version %d",
+ peer_op_version);
+ conf->op_version = peer_op_version;
+ ret = glusterd_store_global_info (this);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GLOBAL_OP_VERSION_SET_FAIL,
+ "Failed to store op-version");
+
+out:
+ rsp.op_ret = ret;
+ rsp.op_errno = op_errno;
+
+ glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_mgmt_hndsk_rsp);
+
+ ret = 0;
+
+ if (clnt_dict)
+ dict_unref (clnt_dict);
+
+ if (args.hndsk.hndsk_val)
+ free (args.hndsk.hndsk_val);
+
+ return ret;
+}
+
+int
+glusterd_mgmt_hndsk_versions_ack (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_mgmt_hndsk_versions_ack);
+}
+
+int
+__server_get_volume_info (rpcsvc_request_t *req)
+{
+ int ret = -1;
+ int32_t op_errno = ENOENT;
+ gf_get_volume_info_req vol_info_req = {{0,}};
+ gf_get_volume_info_rsp vol_info_rsp = {0,};
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ dict_t *dict = NULL;
+ dict_t *dict_rsp = NULL;
+ char *volume_id_str = NULL;
+ int32_t flags = 0;
+
+ ret = xdr_to_generic (req->msg[0], &vol_info_req,
+ (xdrproc_t)xdr_gf_get_volume_info_req);
+ if (ret < 0) {
+ /* failed to decode msg */
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+ gf_msg ("glusterd", GF_LOG_INFO, 0,
+ GD_MSG_VOL_INFO_REQ_RECVD, "Received get volume info req");
+
+ if (vol_info_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+ if (!dict) {
+ gf_msg ("glusterd", GF_LOG_WARNING, ENOMEM,
+ GD_MSG_NO_MEMORY, "Out of Memory");
+ op_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (vol_info_req.dict.dict_val,
+ vol_info_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ } else {
+ dict->extra_stdfree = vol_info_req.dict.dict_val;
+ }
+ }
+
+ ret = dict_get_int32 (dict, "flags", &flags);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, -ret,
+ GD_MSG_DICT_GET_FAILED, "failed to get flags");
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ if (!flags) {
+ /* Nothing to query about. Just return success */
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_NO_FLAG_SET, "No flags set");
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ op_errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ op_errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
+
+ if (flags | (int32_t)GF_GET_VOLUME_UUID) {
+ volume_id_str = gf_strdup (uuid_utoa (volinfo->volume_id));
+ if (!volume_id_str) {
+ op_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ dict_rsp = dict_new ();
+ if (!dict_rsp) {
+ gf_msg ("glusterd", GF_LOG_WARNING, ENOMEM,
+ GD_MSG_NO_MEMORY, "Out of Memory");
+ op_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_dynstr (dict_rsp, "volume_id", volume_id_str);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+ }
+ ret = dict_allocate_and_serialize (dict_rsp, &vol_info_rsp.dict.dict_val,
+ &vol_info_rsp.dict.dict_len);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+out:
+ vol_info_rsp.op_ret = ret;
+ vol_info_rsp.op_errno = op_errno;
+ vol_info_rsp.op_errstr = "";
+ glusterd_submit_reply (req, &vol_info_rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_get_volume_info_rsp);
+ ret = 0;
+
+ if (dict) {
+ dict_unref (dict);
+ }
+
+ if (dict_rsp) {
+ dict_unref (dict_rsp);
+ }
+
+ if (vol_info_rsp.dict.dict_val) {
+ GF_FREE (vol_info_rsp.dict.dict_val);
+ }
+ return ret;
+}
+
+int
+server_get_volume_info (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __server_get_volume_info);
+}
+
+
+/*
+ * glusterd function to get the list of snapshot names and uuids
+ */
+int
+__server_get_snap_info (rpcsvc_request_t *req)
+{
+ int ret = -1;
+ int op_errno = ENOENT;
+ gf_getsnap_name_uuid_req snap_info_req = {{0,}};
+ gf_getsnap_name_uuid_rsp snap_info_rsp = {0,};
+ dict_t *dict = NULL;
+ dict_t *dict_rsp = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *volname = NULL;
+
+ GF_ASSERT (req);
+
+ ret = xdr_to_generic (req->msg[0], &snap_info_req,
+ (xdrproc_t)xdr_gf_getsnap_name_uuid_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL,
+ "Failed to decode management handshake response");
+ goto out;
+ }
+
+ if (snap_info_req.dict.dict_len) {
+ dict = dict_new ();
+ if (!dict) {
+ op_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (snap_info_req.dict.dict_val,
+ snap_info_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg ("glusterd", GF_LOG_ERROR, EINVAL,
+ GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "Failed to unserialize dictionary");
+ op_errno = EINVAL;
+ ret = -1;
+ goto out;
+ } else {
+ dict->extra_stdfree = snap_info_req.dict.dict_val;
+ }
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ op_errno = EINVAL;
+ gf_msg ("glusterd", GF_LOG_ERROR, EINVAL,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to retrieve volname");
+ ret = -1;
+ goto out;
+ }
+
+ dict_rsp = dict_new ();
+ if (!dict_rsp) {
+ op_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_snapshot_get_volnames_uuids (dict_rsp, volname,
+ &snap_info_rsp);
+
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, EINVAL,
+ GD_MSG_VOL_NOT_FOUND,
+ "Error getting snapshot volume names and uuids : %s",
+ volname);
+ op_errno = EINVAL;
+ }
+
+out:
+ snap_info_rsp.op_ret = ret;
+ snap_info_rsp.op_errno = op_errno;
+ snap_info_rsp.op_errstr = "";
+ glusterd_submit_reply (req, &snap_info_rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_getsnap_name_uuid_rsp);
+
+ if (dict) {
+ dict_unref (dict);
+ }
+
+ if (dict_rsp) {
+ dict_unref (dict_rsp);
+ }
+
+ if (snap_info_rsp.dict.dict_val) {
+ GF_FREE (snap_info_rsp.dict.dict_val);
+ }
+
+ return 0;
+}
+
+int
+server_get_snap_info (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __server_get_snap_info);
+}
+
+rpcsvc_actor_t gluster_handshake_actors[GF_HNDSK_MAXVALUE] = {
+ [GF_HNDSK_NULL] = {"NULL", GF_HNDSK_NULL, NULL, NULL, 0, DRC_NA},
+ [GF_HNDSK_GETSPEC] = {"GETSPEC", GF_HNDSK_GETSPEC, server_getspec, NULL, 0, DRC_NA},
+ [GF_HNDSK_EVENT_NOTIFY] = {"EVENTNOTIFY", GF_HNDSK_EVENT_NOTIFY, server_event_notify, NULL, 0, DRC_NA},
+ [GF_HNDSK_GET_VOLUME_INFO] = {"GETVOLUMEINFO", GF_HNDSK_GET_VOLUME_INFO, server_get_volume_info, NULL, 0, DRC_NA},
+ [GF_HNDSK_GET_SNAPSHOT_INFO] = {"GETSNAPINFO", GF_HNDSK_GET_SNAPSHOT_INFO, server_get_snap_info, NULL, 0, DRC_NA},
+};
+
+
+struct rpcsvc_program gluster_handshake_prog = {
+ .progname = "Gluster Handshake",
+ .prognum = GLUSTER_HNDSK_PROGRAM,
+ .progver = GLUSTER_HNDSK_VERSION,
+ .actors = gluster_handshake_actors,
+ .numactors = GF_HNDSK_MAXVALUE,
+};
+
+/* A minimal RPC program just for the cli getspec command */
+rpcsvc_actor_t gluster_cli_getspec_actors[GF_HNDSK_MAXVALUE] = {
+ [GF_HNDSK_GETSPEC] = {"GETSPEC", GF_HNDSK_GETSPEC, server_getspec, NULL, 0, DRC_NA},
+};
+
+struct rpcsvc_program gluster_cli_getspec_prog = {
+ .progname = "Gluster Handshake (CLI Getspec)",
+ .prognum = GLUSTER_HNDSK_PROGRAM,
+ .progver = GLUSTER_HNDSK_VERSION,
+ .actors = gluster_cli_getspec_actors,
+ .numactors = GF_HNDSK_MAXVALUE,
+};
+
+
+char *glusterd_dump_proc[GF_DUMP_MAXVALUE] = {
+ [GF_DUMP_NULL] = "NULL",
+ [GF_DUMP_DUMP] = "DUMP",
+ [GF_DUMP_PING] = "PING",
+};
+
+rpc_clnt_prog_t glusterd_dump_prog = {
+ .progname = "GLUSTERD-DUMP",
+ .prognum = GLUSTER_DUMP_PROGRAM,
+ .progver = GLUSTER_DUMP_VERSION,
+ .procnames = glusterd_dump_proc,
+};
+
+
+rpcsvc_actor_t glusterd_mgmt_hndsk_actors[GD_MGMT_HNDSK_MAXVALUE] = {
+ [GD_MGMT_HNDSK_NULL] = {"NULL", GD_MGMT_HNDSK_NULL, NULL,
+ NULL, 0, DRC_NA},
+ [GD_MGMT_HNDSK_VERSIONS] = {"MGMT-VERS", GD_MGMT_HNDSK_VERSIONS,
+ glusterd_mgmt_hndsk_versions, NULL,
+ 0, DRC_NA},
+ [GD_MGMT_HNDSK_VERSIONS_ACK] = {"MGMT-VERS-ACK",
+ GD_MGMT_HNDSK_VERSIONS_ACK,
+ glusterd_mgmt_hndsk_versions_ack,
+ NULL, 0, DRC_NA},
+};
+
+struct rpcsvc_program glusterd_mgmt_hndsk_prog = {
+ .progname = "Gluster MGMT Handshake",
+ .prognum = GD_MGMT_HNDSK_PROGRAM,
+ .progver = GD_MGMT_HNDSK_VERSION,
+ .actors = glusterd_mgmt_hndsk_actors,
+ .numactors = GD_MGMT_HNDSK_MAXVALUE,
+};
+
+char *glusterd_mgmt_hndsk_proc[GD_MGMT_HNDSK_MAXVALUE] = {
+ [GD_MGMT_HNDSK_NULL] = "NULL",
+ [GD_MGMT_HNDSK_VERSIONS] = "MGMT-VERS",
+ [GD_MGMT_HNDSK_VERSIONS_ACK] = "MGMT-VERS-ACK",
+};
+
+rpc_clnt_prog_t gd_clnt_mgmt_hndsk_prog = {
+ .progname = "Gluster MGMT Handshake",
+ .prognum = GD_MGMT_HNDSK_PROGRAM,
+ .progver = GD_MGMT_HNDSK_VERSION,
+ .procnames = glusterd_mgmt_hndsk_proc,
+};
+
+
+static int
+glusterd_event_connected_inject (glusterd_peerctx_t *peerctx)
+{
+ GF_ASSERT (peerctx);
+
+ glusterd_friend_sm_event_t *event = NULL;
+ glusterd_probe_ctx_t *ctx = NULL;
+ int ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+
+
+ ret = glusterd_friend_sm_new_event
+ (GD_FRIEND_EVENT_CONNECTED, &event);
+
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_EVENT_NEW_GET_FAIL, "Unable to get new event");
+ goto out;
+ }
+
+ ctx = GF_CALLOC (1, sizeof(*ctx), gf_gld_mt_probe_ctx_t);
+
+ if (!ctx) {
+ ret = -1;
+ gf_msg ("glusterd", GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY, "Memory not available");
+ goto out;
+ }
+
+ rcu_read_lock ();
+
+ peerinfo = glusterd_peerinfo_find_by_generation (peerctx->peerinfo_gen);
+ if (!peerinfo) {
+ ret = -1;
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_PEER_NOT_FOUND, "Could not find peer %s(%s)",
+ peerctx->peername, uuid_utoa (peerctx->peerid));
+ goto unlock;
+ }
+ ctx->hostname = gf_strdup (peerinfo->hostname);
+ ctx->port = peerinfo->port;
+ ctx->req = peerctx->args.req;
+ ctx->dict = peerctx->args.dict;
+
+ event->peername = gf_strdup (peerinfo->hostname);
+ gf_uuid_copy (event->peerid, peerinfo->uuid);
+ event->ctx = ctx;
+
+ ret = glusterd_friend_sm_inject_event (event);
+
+ if (ret)
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_EVENT_INJECT_FAIL, "Unable to inject "
+ "EVENT_CONNECTED ret = %d", ret);
+unlock:
+ rcu_read_unlock ();
+
+out:
+ gf_msg_debug ("glusterd", 0, "returning %d", ret);
+ return ret;
+}
+
+
+int
+gd_validate_peer_op_version (xlator_t *this, glusterd_peerinfo_t *peerinfo,
+ dict_t *dict, char **errstr)
+{
+ int ret = -1;
+ glusterd_conf_t *conf = NULL;
+ int32_t peer_op_version = 0;
+ int32_t peer_min_op_version = 0;
+ int32_t peer_max_op_version = 0;
+
+ if (!dict && !this && !peerinfo)
+ goto out;
+
+ conf = this->private;
+
+ ret = dict_get_int32 (dict, GD_OP_VERSION_KEY, &peer_op_version);
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32 (dict, GD_MAX_OP_VERSION_KEY,
+ &peer_max_op_version);
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32 (dict, GD_MIN_OP_VERSION_KEY,
+ &peer_min_op_version);
+ if (ret)
+ goto out;
+
+ ret = -1;
+ /* Check if peer can support our op_version */
+ if ((peer_max_op_version < conf->op_version) ||
+ (peer_min_op_version > conf->op_version)) {
+ ret = gf_asprintf (errstr, "Peer %s does not support required "
+ "op-version", peerinfo->hostname);
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ gf_msg_debug (this->name , 0, "Peer %s %s", peerinfo->hostname,
+ ((ret < 0) ? "rejected" : "accepted"));
+ return ret;
+}
+
+int
+__glusterd_mgmt_hndsk_version_ack_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int ret = -1;
+ int op_errno = EINVAL;
+ gf_mgmt_hndsk_rsp rsp = {0,};
+ xlator_t *this = NULL;
+ call_frame_t *frame = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_peerctx_t *peerctx = NULL;
+ char msg[1024] = {0,};
+
+ this = THIS;
+ frame = myframe;
+ peerctx = frame->local;
+
+ rcu_read_lock ();
+ peerinfo = glusterd_peerinfo_find_by_generation (peerctx->peerinfo_gen);
+ if (!peerinfo) {
+ gf_msg_debug (this->name, 0, "Could not find peer %s(%s)",
+ peerctx->peername, uuid_utoa (peerctx->peerid));
+ ret = -1;
+ goto out;
+ }
+
+ if (-1 == req->rpc_status) {
+ snprintf (msg, sizeof (msg),
+ "Error through RPC layer, retry again later");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RPC_LAYER_ERROR, "%s", msg);
+ peerctx->errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_mgmt_hndsk_rsp);
+ if (ret < 0) {
+ snprintf (msg, sizeof (msg), "Failed to decode XDR");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL, "%s", msg);
+ peerctx->errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ op_errno = rsp.op_errno;
+ if (-1 == rsp.op_ret) {
+ ret = -1;
+ snprintf (msg, sizeof (msg),
+ "Failed to get handshake ack from remote server");
+ gf_msg (frame->this->name, GF_LOG_ERROR, 0,
+ GD_MSG_NO_HANDSHAKE_ACK, "%s", msg);
+ peerctx->errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ /* TODO: this is hardcoded as of now, but I don't forsee any problems
+ * with this as long as we are properly handshaking operating versions
+ */
+ peerinfo->mgmt = &gd_mgmt_prog;
+ peerinfo->peer = &gd_peer_prog;
+ peerinfo->mgmt_v3 = &gd_mgmt_v3_prog;
+
+ ret = default_notify (this, GF_EVENT_CHILD_UP, NULL);
+
+ if (GD_MODE_ON == peerctx->args.mode) {
+ ret = glusterd_event_connected_inject (peerctx);
+ peerctx->args.req = NULL;
+ } else if (GD_MODE_SWITCH_ON == peerctx->args.mode) {
+ peerctx->args.mode = GD_MODE_ON;
+ } else {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_UNKNOWN_MODE, "unknown mode %d",
+ peerctx->args.mode);
+ }
+
+ ret = 0;
+out:
+
+ if (ret != 0 && peerinfo)
+ rpc_transport_disconnect (peerinfo->rpc->conn.trans);
+
+ rcu_read_unlock ();
+
+ frame->local = NULL;
+ STACK_DESTROY (frame->root);
+
+ if (rsp.hndsk.hndsk_val)
+ free (rsp.hndsk.hndsk_val);
+
+ glusterd_friend_sm ();
+
+ return 0;
+}
+
+int
+glusterd_mgmt_hndsk_version_ack_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ __glusterd_mgmt_hndsk_version_ack_cbk);
+}
+
+int
+__glusterd_mgmt_hndsk_version_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int ret = -1;
+ int op_errno = EINVAL;
+ gf_mgmt_hndsk_rsp rsp = {0,};
+ gf_mgmt_hndsk_req arg = {{0,}};
+ xlator_t *this = NULL;
+ call_frame_t *frame = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_peerctx_t *peerctx = NULL;
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
+ glusterd_conf_t *conf = NULL;
+ char msg[1024] = {0,};
+
+ this = THIS;
+ conf = this->private;
+ frame = myframe;
+ peerctx = frame->local;
+
+ rcu_read_lock ();
+
+ peerinfo = glusterd_peerinfo_find_by_generation (peerctx->peerinfo_gen);
+ if (!peerinfo) {
+ ret = -1;
+ gf_msg_debug (this->name, 0, "Could not find peer %s(%s)",
+ peerctx->peername, uuid_utoa (peerctx->peerid));
+ goto out;
+ }
+
+ if (-1 == req->rpc_status) {
+ ret = -1;
+ snprintf (msg, sizeof (msg),
+ "Error through RPC layer, retry again later");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RPC_LAYER_ERROR, "%s", msg);
+ peerctx->errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_mgmt_hndsk_rsp);
+ if (ret < 0) {
+ snprintf (msg, sizeof (msg), "Failed to decode management "
+ "handshake response");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL, "%s", msg);
+ peerctx->errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, dict, rsp.hndsk.hndsk_val,
+ rsp.hndsk.hndsk_len, ret, op_errno,
+ out);
+
+ op_errno = rsp.op_errno;
+ if (-1 == rsp.op_ret) {
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ GD_MSG_VERS_GET_FAIL,
+ "failed to get the 'versions' from peer (%s)",
+ req->conn->trans->peerinfo.identifier);
+ goto out;
+ }
+
+ /* Check if peer can be part of cluster */
+ ret = gd_validate_peer_op_version (this, peerinfo, dict,
+ &peerctx->errstr);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_OP_VERSION_MISMATCH,
+ "failed to validate the operating version of peer (%s)",
+ peerinfo->hostname);
+ goto out;
+ }
+
+ rsp_dict = dict_new ();
+ if (!rsp_dict)
+ goto out;
+
+ ret = dict_set_int32 (rsp_dict, GD_OP_VERSION_KEY, conf->op_version);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "failed to set operating version in dict");
+ goto out;
+ }
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, rsp_dict, (&arg.hndsk.hndsk_val),
+ arg.hndsk.hndsk_len, op_errno, out);
+
+ ret = glusterd_submit_request (peerinfo->rpc, &arg, frame,
+ &gd_clnt_mgmt_hndsk_prog,
+ GD_MGMT_HNDSK_VERSIONS_ACK, NULL, this,
+ glusterd_mgmt_hndsk_version_ack_cbk,
+ (xdrproc_t)xdr_gf_mgmt_hndsk_req);
+
+out:
+ if (ret) {
+ frame->local = NULL;
+ STACK_DESTROY (frame->root);
+ if (peerinfo)
+ rpc_transport_disconnect (peerinfo->rpc->conn.trans);
+ }
+
+ rcu_read_unlock ();
+
+ if (rsp.hndsk.hndsk_val)
+ free (rsp.hndsk.hndsk_val);
+
+ if (arg.hndsk.hndsk_val)
+ GF_FREE (arg.hndsk.hndsk_val);
+
+ if (dict)
+ dict_unref (dict);
+
+ if (rsp_dict)
+ dict_unref (rsp_dict);
+
+ return 0;
+}
+
+int
+glusterd_mgmt_hndsk_version_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ __glusterd_mgmt_hndsk_version_cbk);
+}
+
+int
+glusterd_mgmt_handshake (xlator_t *this, glusterd_peerctx_t *peerctx)
+{
+ call_frame_t *frame = NULL;
+ gf_mgmt_hndsk_req req = {{0,},};
+ glusterd_peerinfo_t *peerinfo = NULL;
+ dict_t *req_dict = NULL;
+ int ret = -1;
+ int op_errno = EINVAL;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ frame->local = peerctx;
+
+ req_dict = dict_new ();
+ if (!req_dict)
+ goto out;
+
+ ret = dict_set_dynstr (req_dict, GD_PEER_ID_KEY,
+ gf_strdup (uuid_utoa (MY_UUID)));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_SET_FAILED,
+ "failed to set peer ID in dict");
+ goto out;
+ }
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, req_dict, (&req.hndsk.hndsk_val),
+ req.hndsk.hndsk_len, op_errno, out);
+
+ rcu_read_lock ();
+
+ peerinfo = glusterd_peerinfo_find_by_generation (peerctx->peerinfo_gen);
+ if (!peerinfo) {
+ gf_msg_debug (THIS->name, 0, "Could not find peer %s(%s)",
+ peerctx->peername, uuid_utoa (peerctx->peerid));
+ goto unlock;
+ }
+
+ ret = glusterd_submit_request (peerinfo->rpc, &req, frame,
+ &gd_clnt_mgmt_hndsk_prog,
+ GD_MGMT_HNDSK_VERSIONS, NULL, this,
+ glusterd_mgmt_hndsk_version_cbk,
+ (xdrproc_t)xdr_gf_mgmt_hndsk_req);
+ ret = 0;
+unlock:
+ rcu_read_unlock ();
+out:
+ if (ret && frame)
+ STACK_DESTROY (frame->root);
+
+ return ret;
+}
+
+int
+glusterd_set_clnt_mgmt_program (glusterd_peerinfo_t *peerinfo,
+ gf_prog_detail *prog)
+{
+ gf_prog_detail *trav = NULL;
+ int ret = -1;
+
+ if (!peerinfo || !prog)
+ goto out;
+
+ trav = prog;
+
+ while (trav) {
+ ret = -1;
+ if ((gd_mgmt_prog.prognum == trav->prognum) &&
+ (gd_mgmt_prog.progver == trav->progver)) {
+ peerinfo->mgmt = &gd_mgmt_prog;
+ ret = 0;
+ }
+
+ if ((gd_peer_prog.prognum == trav->prognum) &&
+ (gd_peer_prog.progver == trav->progver)) {
+ peerinfo->peer = &gd_peer_prog;
+ ret = 0;
+ }
+
+ if (ret) {
+ gf_msg_debug ("glusterd", 0,
+ "%s (%"PRId64":%"PRId64") not supported",
+ trav->progname, trav->prognum,
+ trav->progver);
+ }
+
+ trav = trav->next;
+ }
+
+ if (peerinfo->mgmt) {
+ gf_msg ("glusterd", GF_LOG_INFO, 0,
+ GD_MSG_VERS_INFO,
+ "Using Program %s, Num (%d), Version (%d)",
+ peerinfo->mgmt->progname, peerinfo->mgmt->prognum,
+ peerinfo->mgmt->progver);
+ }
+
+ if (peerinfo->peer) {
+ gf_msg ("glusterd", GF_LOG_INFO, 0,
+ GD_MSG_VERS_INFO,
+ "Using Program %s, Num (%d), Version (%d)",
+ peerinfo->peer->progname, peerinfo->peer->prognum,
+ peerinfo->peer->progver);
+ }
+
+ if (peerinfo->mgmt_v3) {
+ gf_msg ("glusterd", GF_LOG_INFO, 0,
+ GD_MSG_VERS_INFO,
+ "Using Program %s, Num (%d), Version (%d)",
+ peerinfo->mgmt_v3->progname,
+ peerinfo->mgmt_v3->prognum,
+ peerinfo->mgmt_v3->progver);
+ }
+
+ ret = 0;
+out:
+ return ret;
+
+}
+
+static gf_boolean_t
+_mgmt_hndsk_prog_present (gf_prog_detail *prog) {
+ gf_boolean_t ret = _gf_false;
+ gf_prog_detail *trav = NULL;
+
+ GF_ASSERT (prog);
+
+ trav = prog;
+
+ while (trav) {
+ if ((trav->prognum == GD_MGMT_HNDSK_PROGRAM) &&
+ (trav->progver == GD_MGMT_HNDSK_VERSION)) {
+ ret = _gf_true;
+ goto out;
+ }
+ trav = trav->next;
+ }
+out:
+ return ret;
+}
+
+int
+__glusterd_peer_dump_version_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int ret = -1;
+ gf_dump_rsp rsp = {0,};
+ xlator_t *this = NULL;
+ gf_prog_detail *trav = NULL;
+ gf_prog_detail *next = NULL;
+ call_frame_t *frame = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_peerctx_t *peerctx = NULL;
+ glusterd_conf_t *conf = NULL;
+ char msg[1024] = {0,};
+
+ this = THIS;
+ conf = this->private;
+ frame = myframe;
+ peerctx = frame->local;
+
+ rcu_read_lock ();
+
+ peerinfo = glusterd_peerinfo_find_by_generation (peerctx->peerinfo_gen);
+ if (!peerinfo) {
+ gf_msg_debug (this->name, 0, "Couldn't find peer %s(%s)",
+ peerctx->peername, uuid_utoa (peerctx->peerid));
+ goto out;
+ }
+
+ if (-1 == req->rpc_status) {
+ snprintf (msg, sizeof (msg),
+ "Error through RPC layer, retry again later");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RPC_LAYER_ERROR, "%s", msg);
+ peerctx->errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_dump_rsp);
+ if (ret < 0) {
+ snprintf (msg, sizeof (msg), "Failed to decode XDR");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL, "%s", msg);
+ peerctx->errstr = gf_strdup (msg);
+ goto out;
+ }
+ if (-1 == rsp.op_ret) {
+ snprintf (msg, sizeof (msg),
+ "Failed to get the 'versions' from remote server");
+ gf_msg (frame->this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VERS_GET_FAIL, "%s", msg);
+ peerctx->errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ if (_mgmt_hndsk_prog_present (rsp.prog)) {
+ gf_msg_debug (this->name, 0,
+ "Proceeding to op-version handshake with peer %s",
+ peerinfo->hostname);
+ ret = glusterd_mgmt_handshake (this, peerctx);
+ goto out;
+ } else if (conf->op_version > 1) {
+ ret = -1;
+ snprintf (msg, sizeof (msg),
+ "Peer %s does not support required op-version",
+ peerinfo->hostname);
+ peerctx->errstr = gf_strdup (msg);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VERSION_UNSUPPORTED, "%s", msg);
+ goto out;
+ }
+
+ /* Make sure we assign the proper program to peer */
+ ret = glusterd_set_clnt_mgmt_program (peerinfo, rsp.prog);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_MGMT_PGM_SET_FAIL,
+ "failed to set the mgmt program");
+ goto out;
+ }
+
+ ret = default_notify (this, GF_EVENT_CHILD_UP, NULL);
+
+ if (GD_MODE_ON == peerctx->args.mode) {
+ ret = glusterd_event_connected_inject (peerctx);
+ peerctx->args.req = NULL;
+ } else if (GD_MODE_SWITCH_ON == peerctx->args.mode) {
+ peerctx->args.mode = GD_MODE_ON;
+ } else {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_UNKNOWN_MODE, "unknown mode %d",
+ peerctx->args.mode);
+ }
+
+ ret = 0;
+
+out:
+ if (ret != 0 && peerinfo)
+ rpc_transport_disconnect (peerinfo->rpc->conn.trans);
+
+ rcu_read_unlock ();
+
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+
+ /* don't use GF_FREE, buffer was allocated by libc */
+ if (rsp.prog) {
+ trav = rsp.prog;
+ while (trav) {
+ next = trav->next;
+ free (trav->progname);
+ free (trav);
+ trav = next;
+ }
+ }
+
+ frame->local = NULL;
+ STACK_DESTROY (frame->root);
+
+ return 0;
+}
+
+
+int
+glusterd_peer_dump_version_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ __glusterd_peer_dump_version_cbk);
+}
+
+int
+glusterd_peer_dump_version (xlator_t *this, struct rpc_clnt *rpc,
+ glusterd_peerctx_t *peerctx)
+{
+ call_frame_t *frame = NULL;
+ gf_dump_req req = {0,};
+ glusterd_peerinfo_t *peerinfo = NULL;
+ int ret = -1;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ frame->local = peerctx;
+ if (!peerctx)
+ goto out;
+
+ rcu_read_lock ();
+
+ peerinfo = glusterd_peerinfo_find_by_generation (peerctx->peerinfo_gen);
+ if (!peerinfo) {
+ gf_msg_debug (this->name, 0, "Couldn't find peer %s(%s)",
+ peerctx->peername, uuid_utoa (peerctx->peerid));
+ goto unlock;
+ }
+
+ req.gfs_id = 0xcafe;
+
+ ret = glusterd_submit_request (peerinfo->rpc, &req, frame,
+ &glusterd_dump_prog, GF_DUMP_DUMP,
+ NULL, this,
+ glusterd_peer_dump_version_cbk,
+ (xdrproc_t)xdr_gf_dump_req);
+unlock:
+ rcu_read_unlock ();
+out:
+ if (ret && frame)
+ STACK_DESTROY (frame->root);
+
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-hooks.c b/xlators/mgmt/glusterd/src/glusterd-hooks.c
new file mode 100644
index 00000000000..cb3d38d2358
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-hooks.c
@@ -0,0 +1,598 @@
+/*
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "globals.h"
+#include "glusterfs.h"
+#include "dict.h"
+#include "xlator.h"
+#include "logging.h"
+#include "run.h"
+#include "defaults.h"
+#include "syscall.h"
+#include "compat.h"
+#include "compat-errno.h"
+#include "glusterd.h"
+#include "glusterd-sm.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-utils.h"
+#include "glusterd-store.h"
+#include "glusterd-hooks.h"
+#include "glusterd-messages.h"
+
+#include <fnmatch.h>
+
+#define EMPTY ""
+char glusterd_hook_dirnames[GD_OP_MAX][256] =
+{
+ [GD_OP_NONE] = EMPTY,
+ [GD_OP_CREATE_VOLUME] = "create",
+ [GD_OP_START_BRICK] = EMPTY,
+ [GD_OP_STOP_BRICK] = EMPTY,
+ [GD_OP_DELETE_VOLUME] = "delete",
+ [GD_OP_START_VOLUME] = "start",
+ [GD_OP_STOP_VOLUME] = "stop",
+ [GD_OP_DEFRAG_VOLUME] = EMPTY,
+ [GD_OP_ADD_BRICK] = "add-brick",
+ [GD_OP_REMOVE_BRICK] = "remove-brick",
+ [GD_OP_REPLACE_BRICK] = EMPTY,
+ [GD_OP_SET_VOLUME] = "set",
+ [GD_OP_RESET_VOLUME] = "reset",
+ [GD_OP_SYNC_VOLUME] = EMPTY,
+ [GD_OP_LOG_ROTATE] = EMPTY,
+ [GD_OP_GSYNC_CREATE] = "gsync-create",
+ [GD_OP_GSYNC_SET] = EMPTY,
+ [GD_OP_PROFILE_VOLUME] = EMPTY,
+ [GD_OP_QUOTA] = EMPTY,
+ [GD_OP_STATUS_VOLUME] = EMPTY,
+ [GD_OP_REBALANCE] = EMPTY,
+ [GD_OP_HEAL_VOLUME] = EMPTY,
+ [GD_OP_STATEDUMP_VOLUME] = EMPTY,
+ [GD_OP_LIST_VOLUME] = EMPTY,
+ [GD_OP_CLEARLOCKS_VOLUME] = EMPTY,
+ [GD_OP_DEFRAG_BRICK_VOLUME] = EMPTY,
+};
+#undef EMPTY
+
+static gf_boolean_t
+glusterd_is_hook_enabled (char *script)
+{
+ return (script[0] == 'S' && (fnmatch ("*.rpmsave", script, 0) != 0)
+ && (fnmatch ("*.rpmnew", script, 0) != 0));
+}
+
+int
+glusterd_hooks_create_hooks_directory (char *basedir)
+{
+ int ret = -1;
+ int op = GD_OP_NONE;
+ int type = GD_COMMIT_HOOK_NONE;
+ char version_dir[PATH_MAX] = {0, };
+ char path[PATH_MAX] = {0, };
+ char *cmd_subdir = NULL;
+ char type_subdir[GD_COMMIT_HOOK_MAX][256] = {{0, },
+ "pre",
+ "post"};
+ glusterd_conf_t *priv = NULL;
+
+ priv = THIS->private;
+
+ snprintf (path, sizeof (path), "%s/hooks", basedir);
+ ret = mkdir_p (path, 0777, _gf_true);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_CRITICAL, errno,
+ GD_MSG_CREATE_DIR_FAILED, "Unable to create %s",
+ path);
+ goto out;
+ }
+
+ GLUSTERD_GET_HOOKS_DIR (version_dir, GLUSTERD_HOOK_VER, priv);
+ ret = mkdir_p (version_dir, 0777, _gf_true);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_CRITICAL, errno,
+ GD_MSG_CREATE_DIR_FAILED, "Unable to create %s",
+ version_dir);
+ goto out;
+ }
+
+ for (op = GD_OP_NONE+1; op < GD_OP_MAX; op++) {
+ cmd_subdir = glusterd_hooks_get_hooks_cmd_subdir (op);
+ if (strlen (cmd_subdir) == 0)
+ continue;
+
+ snprintf (path, sizeof (path), "%s/%s", version_dir,
+ cmd_subdir);
+ ret = mkdir_p (path, 0777, _gf_true);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_CRITICAL, errno,
+ GD_MSG_CREATE_DIR_FAILED,
+ "Unable to create %s",
+ path);
+ goto out;
+ }
+
+ for (type = GD_COMMIT_HOOK_PRE; type < GD_COMMIT_HOOK_MAX;
+ type++) {
+ snprintf (path, sizeof (path), "%s/%s/%s",
+ version_dir, cmd_subdir, type_subdir[type]);
+ ret = mkdir_p (path, 0777, _gf_true);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_CRITICAL, errno,
+ GD_MSG_CREATE_DIR_FAILED,
+ "Unable to create %s",
+ path);
+ goto out;
+ }
+ }
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+char*
+glusterd_hooks_get_hooks_cmd_subdir (glusterd_op_t op)
+{
+ GF_ASSERT ((op > GD_OP_NONE) && (op < GD_OP_MAX));
+
+ return glusterd_hook_dirnames[op];
+}
+
+void
+glusterd_hooks_add_working_dir (runner_t *runner, glusterd_conf_t *priv)
+{
+ runner_argprintf (runner, "--gd-workdir=%s", priv->workdir);
+}
+
+void
+glusterd_hooks_add_op (runner_t *runner, char *op)
+{
+ runner_argprintf (runner, "--volume-op=%s", op);
+}
+
+void
+glusterd_hooks_add_hooks_version (runner_t* runner)
+{
+ runner_argprintf (runner, "--version=%d", GLUSTERD_HOOK_VER);
+}
+
+static void
+glusterd_hooks_add_custom_args (dict_t *dict, runner_t *runner)
+{
+ char *hooks_args = NULL;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("glusterd", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, dict, out);
+ GF_VALIDATE_OR_GOTO (this->name, runner, out);
+
+ ret = dict_get_str (dict, "hooks_args", &hooks_args);
+ if (ret)
+ gf_msg_debug (this->name, 0,
+ "No Hooks Arguments.");
+ else
+ gf_msg_debug (this->name, 0,
+ "Hooks Args = %s", hooks_args);
+
+ if (hooks_args)
+ runner_argprintf (runner, "%s", hooks_args);
+
+out:
+ return;
+}
+
+
+int
+glusterd_hooks_set_volume_args (dict_t *dict, runner_t *runner)
+{
+ int i = 0;
+ int count = 0;
+ int ret = -1;
+ char query[1024] = {0,};
+ char *key = NULL;
+ char *value = NULL;
+
+ ret = dict_get_int32 (dict, "count", &count);
+ if (ret)
+ goto out;
+
+ /* This will not happen unless op_ctx
+ * is corrupted*/
+ if (!count)
+ goto out;
+
+ runner_add_arg (runner, "-o");
+ for (i = 1; ret == 0; i++) {
+ snprintf (query, sizeof (query), "key%d", i);
+ ret = dict_get_str (dict, query, &key);
+ if (ret)
+ continue;
+
+ snprintf (query, sizeof (query), "value%d", i);
+ ret = dict_get_str (dict, query, &value);
+ if (ret)
+ continue;
+
+ runner_argprintf (runner, "%s=%s", key, value);
+ }
+
+ glusterd_hooks_add_custom_args (dict, runner);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+glusterd_hooks_add_op_args (runner_t *runner, glusterd_op_t op,
+ dict_t *op_ctx, glusterd_commit_hook_type_t type)
+{
+ char *hooks_args = NULL;
+ int vol_count = 0;
+ gf_boolean_t truth = _gf_false;
+ glusterd_volinfo_t *voliter = NULL;
+ glusterd_conf_t *priv = NULL;
+ int ret = -1;
+
+ priv = THIS->private;
+ cds_list_for_each_entry (voliter, &priv->volumes, vol_list) {
+ if (glusterd_is_volume_started (voliter))
+ vol_count++;
+ }
+
+ ret = 0;
+ switch (op) {
+ case GD_OP_START_VOLUME:
+ if (type == GD_COMMIT_HOOK_PRE &&
+ vol_count == 0)
+ truth = _gf_true;
+
+ else if (type == GD_COMMIT_HOOK_POST &&
+ vol_count == 1)
+ truth = _gf_true;
+
+ else
+ truth = _gf_false;
+
+ runner_argprintf (runner, "--first=%s",
+ truth? "yes":"no");
+
+ glusterd_hooks_add_hooks_version (runner);
+ glusterd_hooks_add_op (runner, "start");
+ glusterd_hooks_add_working_dir (runner, priv);
+
+ break;
+
+ case GD_OP_STOP_VOLUME:
+ if (type == GD_COMMIT_HOOK_PRE &&
+ vol_count == 1)
+ truth = _gf_true;
+
+ else if (type == GD_COMMIT_HOOK_POST &&
+ vol_count == 0)
+ truth = _gf_true;
+
+ else
+ truth = _gf_false;
+
+ runner_argprintf (runner, "--last=%s",
+ truth? "yes":"no");
+ break;
+
+ case GD_OP_SET_VOLUME:
+ ret = glusterd_hooks_set_volume_args (op_ctx, runner);
+ glusterd_hooks_add_working_dir (runner, priv);
+ break;
+
+ case GD_OP_GSYNC_CREATE:
+ glusterd_hooks_add_custom_args (op_ctx, runner);
+ break;
+
+ case GD_OP_ADD_BRICK:
+ glusterd_hooks_add_hooks_version (runner);
+ glusterd_hooks_add_op (runner, "add-brick");
+ glusterd_hooks_add_working_dir (runner, priv);
+ break;
+
+ case GD_OP_RESET_VOLUME:
+ glusterd_hooks_add_hooks_version (runner);
+ glusterd_hooks_add_op (runner, "reset");
+ glusterd_hooks_add_working_dir (runner, priv);
+ break;
+
+ default:
+ break;
+
+ }
+
+ return ret;
+}
+
+int
+glusterd_hooks_run_hooks (char *hooks_path, glusterd_op_t op, dict_t *op_ctx,
+ glusterd_commit_hook_type_t type)
+{
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ runner_t runner = {0,};
+ DIR *hookdir = NULL;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {{0,},};
+ char *volname = NULL;
+ char **lines = NULL;
+ int N = 8; /*arbitrary*/
+ int lineno = 0;
+ int line_count = 0;
+ int ret = -1;
+
+ this = THIS;
+ priv = this->private;
+
+ ret = dict_get_str (op_ctx, "volname", &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_CRITICAL, errno,
+ GD_MSG_DICT_GET_FAILED, "Failed to get volname "
+ "from operation context");
+ goto out;
+ }
+
+ hookdir = sys_opendir (hooks_path);
+ if (!hookdir) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DIR_OP_FAILED,
+ "Failed to open dir %s",
+ hooks_path);
+ goto out;
+ }
+
+ lines = GF_CALLOC (1, N * sizeof (*lines), gf_gld_mt_charptr);
+ if (!lines) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = -1;
+ line_count = 0;
+ GF_FOR_EACH_ENTRY_IN_DIR (entry, hookdir, scratch);
+ while (entry) {
+ if (line_count == N-1) {
+ N *= 2;
+ lines = GF_REALLOC (lines, N * sizeof (char *));
+ if (!lines)
+ goto out;
+ }
+
+ if (glusterd_is_hook_enabled (entry->d_name)) {
+ lines[line_count] = gf_strdup (entry->d_name);
+ line_count++;
+ }
+
+ GF_FOR_EACH_ENTRY_IN_DIR (entry, hookdir, scratch);
+ }
+
+ lines[line_count] = NULL;
+ lines = GF_REALLOC (lines, (line_count + 1) * sizeof (char *));
+ if (!lines)
+ goto out;
+
+ qsort (lines, line_count, sizeof (*lines), glusterd_compare_lines);
+
+ for (lineno = 0; lineno < line_count; lineno++) {
+
+ runinit (&runner);
+ runner_argprintf (&runner, "%s/%s", hooks_path, lines[lineno]);
+ /*Add future command line arguments to hook scripts below*/
+ runner_argprintf (&runner, "--volname=%s", volname);
+ ret = glusterd_hooks_add_op_args (&runner, op, op_ctx, type);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_ADD_OP_ARGS_FAIL, "Failed to add "
+ "command specific arguments");
+ goto out;
+ }
+
+ ret = runner_run_reuse (&runner);
+ if (ret) {
+ runner_log (&runner, this->name, GF_LOG_ERROR,
+ "Failed to execute script");
+ } else {
+ runner_log (&runner, this->name, GF_LOG_INFO,
+ "Ran script");
+ }
+ runner_end (&runner);
+ }
+
+ ret = 0;
+out:
+ if (lines) {
+ for (lineno = 0; lineno < line_count+1; lineno++)
+ GF_FREE (lines[lineno]);
+
+ GF_FREE (lines);
+ }
+
+ if (hookdir)
+ sys_closedir (hookdir);
+
+ return ret;
+}
+
+int
+glusterd_hooks_post_stub_enqueue (char *scriptdir, glusterd_op_t op,
+ dict_t *op_ctx)
+{
+ int ret = -1;
+ glusterd_hooks_stub_t *stub = NULL;
+ glusterd_hooks_private_t *hooks_priv = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ conf = THIS->private;
+ hooks_priv = conf->hooks_priv;
+
+ ret = glusterd_hooks_stub_init (&stub, scriptdir, op, op_ctx);
+ if (ret)
+ goto out;
+
+ pthread_mutex_lock (&hooks_priv->mutex);
+ {
+ hooks_priv->waitcount++;
+ cds_list_add_tail (&stub->all_hooks, &hooks_priv->list);
+ pthread_cond_signal (&hooks_priv->cond);
+ }
+ pthread_mutex_unlock (&hooks_priv->mutex);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glusterd_hooks_stub_init (glusterd_hooks_stub_t **stub, char *scriptdir,
+ glusterd_op_t op, dict_t *op_ctx)
+{
+ int ret = -1;
+ glusterd_hooks_stub_t *hooks_stub = NULL;
+
+ GF_ASSERT (stub);
+ if (!stub)
+ goto out;
+
+ hooks_stub = GF_CALLOC (1, sizeof (*hooks_stub),
+ gf_gld_mt_hooks_stub_t);
+ if (!hooks_stub)
+ goto out;
+
+ CDS_INIT_LIST_HEAD (&hooks_stub->all_hooks);
+ hooks_stub->op = op;
+ hooks_stub->scriptdir = gf_strdup (scriptdir);
+ if (!hooks_stub->scriptdir)
+ goto out;
+
+ hooks_stub->op_ctx = dict_copy_with_ref (op_ctx, hooks_stub->op_ctx);
+ if (!hooks_stub->op_ctx)
+ goto out;
+
+ *stub = hooks_stub;
+ ret = 0;
+out:
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_POST_HOOK_STUB_INIT_FAIL, "Failed to initialize "
+ "post hooks stub");
+ glusterd_hooks_stub_cleanup (hooks_stub);
+ }
+
+ return ret;
+}
+
+void
+glusterd_hooks_stub_cleanup (glusterd_hooks_stub_t *stub)
+{
+ if (!stub) {
+ gf_msg_callingfn (THIS->name, GF_LOG_WARNING, 0,
+ GD_MSG_HOOK_STUB_NULL,
+ "hooks_stub is NULL");
+ return;
+ }
+
+ if (stub->op_ctx)
+ dict_unref (stub->op_ctx);
+
+ GF_FREE (stub->scriptdir);
+
+ GF_FREE (stub);
+}
+
+static void*
+hooks_worker (void *args)
+{
+ glusterd_conf_t *conf = NULL;
+ glusterd_hooks_private_t *hooks_priv = NULL;
+ glusterd_hooks_stub_t *stub = NULL;
+
+ THIS = args;
+ conf = THIS->private;
+ hooks_priv = conf->hooks_priv;
+
+ for (;;) {
+ pthread_mutex_lock (&hooks_priv->mutex);
+ {
+ while (cds_list_empty (&hooks_priv->list)) {
+ pthread_cond_wait (&hooks_priv->cond,
+ &hooks_priv->mutex);
+ }
+ stub = cds_list_entry (hooks_priv->list.next,
+ glusterd_hooks_stub_t,
+ all_hooks);
+ cds_list_del_init (&stub->all_hooks);
+ hooks_priv->waitcount--;
+
+ }
+ pthread_mutex_unlock (&hooks_priv->mutex);
+
+ glusterd_hooks_run_hooks (stub->scriptdir, stub->op,
+ stub->op_ctx, GD_COMMIT_HOOK_POST);
+ glusterd_hooks_stub_cleanup (stub);
+ }
+
+ return NULL;
+}
+
+int
+glusterd_hooks_priv_init (glusterd_hooks_private_t **new)
+{
+ int ret = -1;
+ glusterd_hooks_private_t *hooks_priv = NULL;
+
+ if (!new)
+ goto out;
+
+ hooks_priv = GF_CALLOC (1, sizeof (*hooks_priv),
+ gf_gld_mt_hooks_priv_t);
+ if (!hooks_priv)
+ goto out;
+
+ pthread_mutex_init (&hooks_priv->mutex, NULL);
+ pthread_cond_init (&hooks_priv->cond, NULL);
+ CDS_INIT_LIST_HEAD (&hooks_priv->list);
+ hooks_priv->waitcount = 0;
+
+ *new = hooks_priv;
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glusterd_hooks_spawn_worker (xlator_t *this)
+{
+ int ret = -1;
+ glusterd_conf_t *conf = NULL;
+ glusterd_hooks_private_t *hooks_priv = NULL;
+
+
+ ret = glusterd_hooks_priv_init (&hooks_priv);
+ if (ret)
+ goto out;
+
+ conf = this->private;
+ conf->hooks_priv = hooks_priv;
+ ret = pthread_create (&hooks_priv->worker, NULL, hooks_worker,
+ (void *)this);
+ if (ret)
+ gf_msg (this->name, GF_LOG_CRITICAL, errno,
+ GD_MSG_SPAWN_THREADS_FAIL, "Failed to spawn post "
+ "hooks worker thread");
+out:
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-hooks.h b/xlators/mgmt/glusterd/src/glusterd-hooks.h
new file mode 100644
index 00000000000..7bab6adb626
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-hooks.h
@@ -0,0 +1,84 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _GLUSTERD_HOOKS_H_
+#define _GLUSTERD_HOOKS_H_
+
+#include <fnmatch.h>
+
+#define GLUSTERD_GET_HOOKS_DIR(path, version, priv) \
+ snprintf (path, PATH_MAX, "%s/hooks/%d", priv->workdir,\
+ version);
+
+#define GLUSTERD_HOOK_VER 1
+
+#define GD_HOOKS_SPECIFIC_KEY "user.*"
+
+typedef enum glusterd_commit_hook_type {
+ GD_COMMIT_HOOK_NONE = 0,
+ GD_COMMIT_HOOK_PRE,
+ GD_COMMIT_HOOK_POST,
+ GD_COMMIT_HOOK_MAX
+} glusterd_commit_hook_type_t;
+
+typedef struct hooks_private {
+ struct cds_list_head list;
+ int waitcount; //debug purposes
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+ pthread_t worker;
+} glusterd_hooks_private_t;
+
+typedef struct hooks_stub {
+ struct cds_list_head all_hooks;
+ char *scriptdir;
+ glusterd_op_t op;
+ dict_t *op_ctx;
+
+} glusterd_hooks_stub_t;
+
+
+static inline gf_boolean_t
+is_key_glusterd_hooks_friendly (char *key)
+{
+ gf_boolean_t is_friendly = _gf_false;
+
+ /* This is very specific to hooks friendly behavior */
+ if (fnmatch (GD_HOOKS_SPECIFIC_KEY, key, FNM_NOESCAPE) == 0) {
+ gf_msg_debug (THIS->name, 0, "user namespace key %s", key);
+ is_friendly = _gf_true;
+ }
+
+ return is_friendly;
+}
+
+int
+glusterd_hooks_create_hooks_directory (char *basedir);
+
+char *
+glusterd_hooks_get_hooks_cmd_subdir (glusterd_op_t op);
+
+int
+glusterd_hooks_run_hooks (char *hooks_path, glusterd_op_t op, dict_t *op_ctx,
+ glusterd_commit_hook_type_t type);
+int
+glusterd_hooks_spawn_worker (xlator_t *this);
+
+int
+glusterd_hooks_stub_init (glusterd_hooks_stub_t **stub, char *scriptdir,
+ glusterd_op_t op, dict_t *op_ctx);
+void
+glusterd_hooks_stub_cleanup (glusterd_hooks_stub_t *stub);
+
+int
+glusterd_hooks_post_stub_enqueue (char *scriptdir, glusterd_op_t op,
+ dict_t *op_ctx);
+int
+glusterd_hooks_priv_init (glusterd_hooks_private_t **new);
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-locks.c b/xlators/mgmt/glusterd/src/glusterd-locks.c
new file mode 100644
index 00000000000..146092db79b
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-locks.c
@@ -0,0 +1,714 @@
+/*
+ Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "common-utils.h"
+#include "cli1-xdr.h"
+#include "xdr-generic.h"
+#include "glusterd.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-store.h"
+#include "glusterd-utils.h"
+#include "glusterd-volgen.h"
+#include "glusterd-locks.h"
+#include "glusterd-errno.h"
+#include "run.h"
+#include "syscall.h"
+#include "glusterd-messages.h"
+
+#include <signal.h>
+
+#define GF_MAX_LOCKING_ENTITIES 3
+
+/* Valid entities that the mgmt_v3 lock can hold locks upon *
+ * To add newer entities to be locked, we can just add more *
+ * entries to this table along with the type and default value */
+glusterd_valid_entities valid_types[] = {
+ { "vol", _gf_true },
+ { "snap", _gf_false },
+ { "global", _gf_false},
+ { NULL },
+};
+
+/* Checks if the lock request is for a valid entity */
+gf_boolean_t
+glusterd_mgmt_v3_is_type_valid (char *type)
+{
+ int32_t i = 0;
+ gf_boolean_t ret = _gf_false;
+
+ GF_ASSERT (type);
+
+ for (i = 0; valid_types[i].type; i++) {
+ if (!strcmp (type, valid_types[i].type)) {
+ ret = _gf_true;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+/* Initialize the global mgmt_v3 lock list(dict) when
+ * glusterd is spawned */
+int32_t
+glusterd_mgmt_v3_lock_init ()
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ priv->mgmt_v3_lock = dict_new ();
+ if (!priv->mgmt_v3_lock)
+ goto out;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/* Destroy the global mgmt_v3 lock list(dict) when
+ * glusterd cleanup is performed */
+void
+glusterd_mgmt_v3_lock_fini ()
+{
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ if (priv->mgmt_v3_lock)
+ dict_unref (priv->mgmt_v3_lock);
+}
+
+int32_t
+glusterd_get_mgmt_v3_lock_owner (char *key, uuid_t *uuid)
+{
+ int32_t ret = -1;
+ glusterd_mgmt_v3_lock_obj *lock_obj = NULL;
+ glusterd_conf_t *priv = NULL;
+ uuid_t no_owner = {0,};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ if (!key || !uuid) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "key or uuid is null.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_bin (priv->mgmt_v3_lock, key, (void **) &lock_obj);
+ if (!ret)
+ gf_uuid_copy (*uuid, lock_obj->lock_owner);
+ else
+ gf_uuid_copy (*uuid, no_owner);
+
+ ret = 0;
+out:
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* This function is called with the locked_count and type, to *
+ * release all the acquired locks. */
+static int32_t
+glusterd_release_multiple_locks_per_entity (dict_t *dict, uuid_t uuid,
+ int32_t locked_count,
+ char *type)
+{
+ char name_buf[PATH_MAX] = "";
+ char *name = NULL;
+ int32_t i = -1;
+ int32_t op_ret = 0;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT (dict);
+ GF_ASSERT (type);
+
+ if (locked_count == 0) {
+ gf_msg_debug (this->name, 0,
+ "No %s locked as part of this transaction",
+ type);
+ goto out;
+ }
+
+ /* Release all the locks held */
+ for (i = 0; i < locked_count; i++) {
+ snprintf (name_buf, sizeof(name_buf),
+ "%sname%d", type, i+1);
+
+ /* Looking for volname1, volname2 or snapname1, *
+ * as key in the dict snapname2 */
+ ret = dict_get_str (dict, name_buf, &name);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to get %s locked_count = %d",
+ name_buf, locked_count);
+ op_ret = ret;
+ continue;
+ }
+
+ ret = glusterd_mgmt_v3_unlock (name, uuid, type);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_UNLOCK_FAIL,
+ "Failed to release lock for %s.",
+ name);
+ op_ret = ret;
+ }
+ }
+
+out:
+ gf_msg_trace (this->name, 0, "Returning %d", op_ret);
+ return op_ret;
+}
+
+/* Given the count and type of the entity this function acquires *
+ * locks on multiple elements of the same entity. For example: *
+ * If type is "vol" this function tries to acquire locks on multiple *
+ * volumes */
+static int32_t
+glusterd_acquire_multiple_locks_per_entity (dict_t *dict, uuid_t uuid,
+ uint32_t *op_errno,
+ int32_t count, char *type)
+{
+ char name_buf[PATH_MAX] = "";
+ char *name = NULL;
+ int32_t i = -1;
+ int32_t ret = -1;
+ int32_t locked_count = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT (dict);
+ GF_ASSERT (type);
+
+ /* Locking one element after other */
+ for (i = 0; i < count; i++) {
+ snprintf (name_buf, sizeof(name_buf),
+ "%sname%d", type, i+1);
+
+ /* Looking for volname1, volname2 or snapname1, *
+ * as key in the dict snapname2 */
+ ret = dict_get_str (dict, name_buf, &name);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to get %s count = %d",
+ name_buf, count);
+ break;
+ }
+
+ ret = glusterd_mgmt_v3_lock (name, uuid, op_errno, type);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_LOCK_GET_FAIL,
+ "Failed to acquire lock for %s %s "
+ "on behalf of %s. Reversing "
+ "this transaction", type, name,
+ uuid_utoa(uuid));
+ break;
+ }
+ locked_count++;
+ }
+
+ if (count == locked_count) {
+ /* If all locking ops went successfuly, return as success */
+ ret = 0;
+ goto out;
+ }
+
+ /* If we failed to lock one element, unlock others and return failure */
+ ret = glusterd_release_multiple_locks_per_entity (dict, uuid,
+ locked_count,
+ type);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MULTIPLE_LOCK_RELEASE_FAIL,
+ "Failed to release multiple %s locks",
+ type);
+ }
+ ret = -1;
+out:
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* Given the type of entity, this function figures out if it should unlock a *
+ * single element of multiple elements of the said entity. For example: *
+ * if the type is "vol", this function will accordingly unlock a single volume *
+ * or multiple volumes */
+static int32_t
+glusterd_mgmt_v3_unlock_entity (dict_t *dict, uuid_t uuid, char *type,
+ gf_boolean_t default_value)
+{
+ char name_buf[PATH_MAX] = "";
+ char *name = NULL;
+ int32_t count = -1;
+ int32_t ret = -1;
+ gf_boolean_t hold_locks = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT (dict);
+ GF_ASSERT (type);
+
+ snprintf (name_buf, sizeof(name_buf), "hold_%s_locks", type);
+ hold_locks = dict_get_str_boolean (dict, name_buf, default_value);
+
+ if (hold_locks == _gf_false) {
+ /* Locks were not held for this particular entity *
+ * Hence nothing to release */
+ ret = 0;
+ goto out;
+ }
+
+ /* Looking for volcount or snapcount in the dict */
+ snprintf (name_buf, sizeof(name_buf), "%scount", type);
+ ret = dict_get_int32 (dict, name_buf, &count);
+ if (ret) {
+ /* count is not present. Only one *
+ * element name needs to be unlocked */
+ snprintf (name_buf, sizeof(name_buf), "%sname",
+ type);
+ ret = dict_get_str (dict, name_buf, &name);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch %sname", type);
+ goto out;
+ }
+
+ ret = glusterd_mgmt_v3_unlock (name, uuid, type);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_UNLOCK_FAIL,
+ "Failed to release lock for %s %s "
+ "on behalf of %s.", type, name,
+ uuid_utoa(uuid));
+ goto out;
+ }
+ } else {
+ /* Unlocking one element name after another */
+ ret = glusterd_release_multiple_locks_per_entity (dict,
+ uuid,
+ count,
+ type);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MULTIPLE_LOCK_RELEASE_FAIL,
+ "Failed to release all %s locks", type);
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* Given the type of entity, this function figures out if it should lock a *
+ * single element or multiple elements of the said entity. For example: *
+ * if the type is "vol", this function will accordingly lock a single volume *
+ * or multiple volumes */
+static int32_t
+glusterd_mgmt_v3_lock_entity (dict_t *dict, uuid_t uuid, uint32_t *op_errno,
+ char *type, gf_boolean_t default_value)
+{
+ char name_buf[PATH_MAX] = "";
+ char *name = NULL;
+ int32_t count = -1;
+ int32_t ret = -1;
+ gf_boolean_t hold_locks = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT (dict);
+ GF_ASSERT (type);
+
+ snprintf (name_buf, sizeof(name_buf), "hold_%s_locks", type);
+ hold_locks = dict_get_str_boolean (dict, name_buf, default_value);
+
+ if (hold_locks == _gf_false) {
+ /* Not holding locks for this particular entity */
+ ret = 0;
+ goto out;
+ }
+
+ /* Looking for volcount or snapcount in the dict */
+ snprintf (name_buf, sizeof(name_buf), "%scount", type);
+ ret = dict_get_int32 (dict, name_buf, &count);
+ if (ret) {
+ /* count is not present. Only one *
+ * element name needs to be locked */
+ snprintf (name_buf, sizeof(name_buf), "%sname",
+ type);
+ ret = dict_get_str (dict, name_buf, &name);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch %sname", type);
+ goto out;
+ }
+
+ ret = glusterd_mgmt_v3_lock (name, uuid, op_errno, type);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_LOCK_GET_FAIL,
+ "Failed to acquire lock for %s %s "
+ "on behalf of %s.", type, name,
+ uuid_utoa(uuid));
+ goto out;
+ }
+ } else {
+ /* Locking one element name after another */
+ ret = glusterd_acquire_multiple_locks_per_entity (dict,
+ uuid,
+ op_errno,
+ count,
+ type);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MULTIPLE_LOCK_ACQUIRE_FAIL,
+ "Failed to acquire all %s locks", type);
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* Try to release locks of multiple entities like *
+ * volume, snaps etc. */
+int32_t
+glusterd_multiple_mgmt_v3_unlock (dict_t *dict, uuid_t uuid)
+{
+ int32_t i = -1;
+ int32_t ret = -1;
+ int32_t op_ret = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ if (!dict) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_EMPTY, "dict is null.");
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; valid_types[i].type; i++) {
+ ret = glusterd_mgmt_v3_unlock_entity
+ (dict, uuid,
+ valid_types[i].type,
+ valid_types[i].default_value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MULTIPLE_LOCK_RELEASE_FAIL,
+ "Unable to unlock all %s",
+ valid_types[i].type);
+ op_ret = ret;
+ }
+ }
+
+ ret = op_ret;
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* Try to acquire locks on multiple entities like *
+ * volume, snaps etc. */
+int32_t
+glusterd_multiple_mgmt_v3_lock (dict_t *dict, uuid_t uuid, uint32_t *op_errno)
+{
+ int32_t i = -1;
+ int32_t ret = -1;
+ int32_t locked_count = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ if (!dict) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_EMPTY, "dict is null.");
+ ret = -1;
+ goto out;
+ }
+
+ /* Locking one entity after other */
+ for (i = 0; valid_types[i].type; i++) {
+ ret = glusterd_mgmt_v3_lock_entity
+ (dict, uuid, op_errno,
+ valid_types[i].type,
+ valid_types[i].default_value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MULTIPLE_LOCK_ACQUIRE_FAIL,
+ "Unable to lock all %s",
+ valid_types[i].type);
+ break;
+ }
+ locked_count++;
+ }
+
+ if (locked_count == GF_MAX_LOCKING_ENTITIES) {
+ /* If all locking ops went successfuly, return as success */
+ ret = 0;
+ goto out;
+ }
+
+ /* If we failed to lock one entity, unlock others and return failure */
+ for (i = 0; i < locked_count; i++) {
+ ret = glusterd_mgmt_v3_unlock_entity
+ (dict, uuid,
+ valid_types[i].type,
+ valid_types[i].default_value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MULTIPLE_LOCK_RELEASE_FAIL,
+ "Unable to unlock all %s",
+ valid_types[i].type);
+ }
+ }
+ ret = -1;
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+
+int32_t
+glusterd_mgmt_v3_lock (const char *name, uuid_t uuid, uint32_t *op_errno,
+ char *type)
+{
+ char key[PATH_MAX] = "";
+ int32_t ret = -1;
+ glusterd_mgmt_v3_lock_obj *lock_obj = NULL;
+ glusterd_conf_t *priv = NULL;
+ gf_boolean_t is_valid = _gf_true;
+ uuid_t owner = {0};
+ xlator_t *this = NULL;
+ char *bt = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ if (!name || !type) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "name or type is null.");
+ ret = -1;
+ goto out;
+ }
+
+ is_valid = glusterd_mgmt_v3_is_type_valid (type);
+ if (is_valid != _gf_true) {
+ gf_msg_callingfn (this->name, GF_LOG_ERROR,
+ EINVAL, GD_MSG_INVALID_ENTRY,
+ "Invalid entity. Cannot perform locking "
+ "operation on %s types", type);
+ ret = -1;
+ goto out;
+ }
+
+ ret = snprintf (key, sizeof(key), "%s_%s", name, type);
+ if (ret != strlen(name) + 1 + strlen(type)) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_CREATE_KEY_FAIL, "Unable to create key");
+ goto out;
+ }
+
+ gf_msg_debug (this->name, 0,
+ "Trying to acquire lock of %s %s for %s as %s",
+ type, name, uuid_utoa (uuid), key);
+
+ ret = glusterd_get_mgmt_v3_lock_owner (key, &owner);
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "Unable to get mgmt_v3 lock owner");
+ goto out;
+ }
+
+ /* If the lock has already been held for the given volume
+ * we fail */
+ if (!gf_uuid_is_null (owner)) {
+ gf_msg_callingfn (this->name, GF_LOG_WARNING,
+ 0, GD_MSG_LOCK_ALREADY_HELD,
+ "Lock for %s held by %s",
+ name, uuid_utoa (owner));
+ ret = -1;
+ *op_errno = EG_ANOTRANS;
+ goto out;
+ }
+
+ lock_obj = GF_CALLOC (1, sizeof(glusterd_mgmt_v3_lock_obj),
+ gf_common_mt_mgmt_v3_lock_obj_t);
+ if (!lock_obj) {
+ ret = -1;
+ goto out;
+ }
+
+ gf_uuid_copy (lock_obj->lock_owner, uuid);
+
+ ret = dict_set_bin (priv->mgmt_v3_lock, key, lock_obj,
+ sizeof(glusterd_mgmt_v3_lock_obj));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to set lock owner in mgmt_v3 lock");
+ GF_FREE (lock_obj);
+ goto out;
+ }
+
+ /* Saving the backtrace into the pre-allocated buffer, ctx->btbuf*/
+ if ((bt = gf_backtrace_save (NULL))) {
+ snprintf (key, sizeof (key), "debug.last-success-bt-%s-%s",
+ name, type);
+ ret = dict_set_dynstr_with_alloc (priv->mgmt_v3_lock, key, bt);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to save "
+ "the back trace for lock %s-%s granted to %s",
+ name, type, uuid_utoa (uuid));
+ ret = 0;
+ }
+
+ gf_msg_debug (this->name, 0,
+ "Lock for %s %s successfully held by %s",
+ type, name, uuid_utoa (uuid));
+
+ ret = 0;
+out:
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_mgmt_v3_unlock (const char *name, uuid_t uuid, char *type)
+{
+ char key[PATH_MAX] = "";
+ int32_t ret = -1;
+ gf_boolean_t is_valid = _gf_true;
+ glusterd_conf_t *priv = NULL;
+ uuid_t owner = {0};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ if (!name || !type) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "name is null.");
+ ret = -1;
+ goto out;
+ }
+
+ is_valid = glusterd_mgmt_v3_is_type_valid (type);
+ if (is_valid != _gf_true) {
+ gf_msg_callingfn (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY,
+ "Invalid entity. Cannot perform unlocking "
+ "operation on %s types", type);
+ ret = -1;
+ goto out;
+ }
+
+ ret = snprintf (key, sizeof(key), "%s_%s",
+ name, type);
+ if (ret != strlen(name) + 1 + strlen(type)) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_CREATE_KEY_FAIL, "Unable to create key");
+ ret = -1;
+ goto out;
+ }
+
+ gf_msg_debug (this->name, 0,
+ "Trying to release lock of %s %s for %s as %s",
+ type, name, uuid_utoa (uuid), key);
+
+ ret = glusterd_get_mgmt_v3_lock_owner (key, &owner);
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "Unable to get mgmt_v3 lock owner");
+ goto out;
+ }
+
+ if (gf_uuid_is_null (owner)) {
+ gf_msg_callingfn (this->name, GF_LOG_WARNING,
+ 0, GD_MSG_LOCK_NOT_HELD,
+ "Lock for %s %s not held", type, name);
+ ret = -1;
+ goto out;
+ }
+
+ ret = gf_uuid_compare (uuid, owner);
+ if (ret) {
+ gf_msg_callingfn (this->name, GF_LOG_WARNING,
+ 0, GD_MSG_LOCK_OWNER_MISMATCH,
+ "Lock owner mismatch. "
+ "Lock for %s %s held by %s",
+ type, name, uuid_utoa (owner));
+ goto out;
+ }
+
+ /* Removing the mgmt_v3 lock from the global list */
+ dict_del (priv->mgmt_v3_lock, key);
+
+ /* Remove the backtrace key as well */
+ ret = snprintf (key, sizeof(key), "debug.last-success-bt-%s-%s", name,
+ type);
+ if (ret != strlen ("debug.last-success-bt-") + strlen (name) +
+ strlen (type) + 1) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_CREATE_KEY_FAIL, "Unable to create backtrace "
+ "key");
+ ret = -1;
+ goto out;
+ }
+ dict_del (priv->mgmt_v3_lock, key);
+
+ gf_msg_debug (this->name, 0,
+ "Lock for %s %s successfully released",
+ type, name);
+
+ ret = 0;
+out:
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-locks.h b/xlators/mgmt/glusterd/src/glusterd-locks.h
new file mode 100644
index 00000000000..437053d9f38
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-locks.h
@@ -0,0 +1,47 @@
+/*
+ Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _GLUSTERD_LOCKS_H_
+#define _GLUSTERD_LOCKS_H_
+
+typedef struct glusterd_mgmt_v3_lock_object_ {
+ uuid_t lock_owner;
+} glusterd_mgmt_v3_lock_obj;
+
+typedef struct glusterd_mgmt_v3_lock_valid_entities {
+ char *type; /* Entity type like vol, snap */
+ gf_boolean_t default_value; /* The default value that *
+ * determines if the locks *
+ * should be held for that *
+ * entity */
+} glusterd_valid_entities;
+
+int32_t
+glusterd_mgmt_v3_lock_init ();
+
+void
+glusterd_mgmt_v3_lock_fini ();
+
+int32_t
+glusterd_get_mgmt_v3_lock_owner (char *volname, uuid_t *uuid);
+
+int32_t
+glusterd_mgmt_v3_lock (const char *key, uuid_t uuid, uint32_t *op_errno,
+ char *type);
+
+int32_t
+glusterd_mgmt_v3_unlock (const char *key, uuid_t uuid, char *type);
+
+int32_t
+glusterd_multiple_mgmt_v3_lock (dict_t *dict, uuid_t uuid, uint32_t *op_errno);
+
+int32_t
+glusterd_multiple_mgmt_v3_unlock (dict_t *dict, uuid_t uuid);
+
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-log-ops.c b/xlators/mgmt/glusterd/src/glusterd-log-ops.c
new file mode 100644
index 00000000000..d04492af7cc
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-log-ops.c
@@ -0,0 +1,285 @@
+/*
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "common-utils.h"
+#include "cli1-xdr.h"
+#include "xdr-generic.h"
+#include "glusterd.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-store.h"
+#include "glusterd-utils.h"
+#include "glusterd-volgen.h"
+#include "glusterd-messages.h"
+#include "syscall.h"
+
+#include <signal.h>
+
+int
+__glusterd_handle_log_rotate (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,}};
+ dict_t *dict = NULL;
+ glusterd_op_t cli_op = GD_OP_LOG_ROTATE;
+ char *volname = NULL;
+ char msg[2048] = {0,};
+ xlator_t *this = NULL;
+
+ GF_ASSERT (req);
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (msg, sizeof (msg), "Unable to decode the "
+ "command");
+ goto out;
+ }
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Failed to get volume name");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "%s", msg);
+ goto out;
+ }
+
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_LOG_ROTATE_REQ_RECVD,
+ "Received log rotate req "
+ "for volume %s", volname);
+
+ ret = dict_set_uint64 (dict, "rotate-key", (uint64_t)time (NULL));
+ if (ret)
+ goto out;
+
+ ret = glusterd_op_begin_synctask (req, GD_OP_LOG_ROTATE, dict);
+
+out:
+ if (ret) {
+ if (msg[0] == '\0')
+ snprintf (msg, sizeof (msg), "Operation failed");
+ ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
+ dict, msg);
+ }
+
+ free (cli_req.dict.dict_val);
+ return ret;
+}
+
+int
+glusterd_handle_log_rotate (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_log_rotate);
+}
+
+/* op-sm */
+int
+glusterd_op_stage_log_rotate (dict_t *dict, char **op_errstr)
+{
+ int ret = -1;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ gf_boolean_t exists = _gf_false;
+ char msg[2048] = {0};
+ char *brick = NULL;
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get volume name");
+ goto out;
+ }
+
+ exists = glusterd_check_volume_exists (volname);
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (!exists) {
+ snprintf (msg, sizeof (msg), "Volume %s does not exist",
+ volname);
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_FOUND, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+ goto out;
+ }
+
+ if (_gf_false == glusterd_is_volume_started (volinfo)) {
+ snprintf (msg, sizeof (msg), "Volume %s needs to be started before"
+ " log rotate.", volname);
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_STARTED, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "brick", &brick);
+ /* If no brick is specified, do log-rotate for
+ all the bricks in the volume */
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo, NULL,
+ _gf_false);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Incorrect brick %s "
+ "for volume %s", brick, volname);
+ gf_msg ("glusterd", GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+out:
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+
+ return ret;
+}
+
+
+int
+glusterd_op_log_rotate (dict_t *dict)
+{
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ xlator_t *this = NULL;
+ char *volname = NULL;
+ char *brick = NULL;
+ char logfile[PATH_MAX] = {0,};
+ char pidfile[PATH_MAX] = {0,};
+ FILE *file = NULL;
+ pid_t pid = 0;
+ uint64_t key = 0;
+ int valid_brick = 0;
+ glusterd_brickinfo_t *tmpbrkinfo = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "volname not found");
+ goto out;
+ }
+
+ ret = dict_get_uint64 (dict, "rotate-key", &key);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "rotate key not found");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "brick", &brick);
+ /* If no brick is specified, do log-rotate for
+ all the bricks in the volume */
+ if (ret)
+ goto cont;
+
+ ret = glusterd_brickinfo_new_from_brick (brick, &tmpbrkinfo,
+ _gf_false, NULL);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_NOT_FOUND,
+ "cannot get brickinfo from brick");
+ goto out;
+ }
+
+cont:
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret)
+ goto out;
+
+ ret = -1;
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ if (gf_uuid_compare (brickinfo->uuid, MY_UUID))
+ continue;
+
+ if (brick &&
+ (strcmp (tmpbrkinfo->hostname, brickinfo->hostname) ||
+ strcmp (tmpbrkinfo->path,brickinfo->path)))
+ continue;
+
+ valid_brick = 1;
+
+ GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, priv);
+ file = fopen (pidfile, "r+");
+ if (!file) {
+ gf_msg ("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED, "Unable to open pidfile: %s",
+ pidfile);
+ ret = -1;
+ goto out;
+ }
+
+ ret = fscanf (file, "%d", &pid);
+ if (ret <= 0) {
+ gf_msg ("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED, "Unable to read pidfile: %s",
+ pidfile);
+ ret = -1;
+ goto out;
+ }
+ fclose (file);
+ file = NULL;
+
+ snprintf (logfile, PATH_MAX, "%s.%"PRIu64,
+ brickinfo->logfile, key);
+
+ ret = sys_rename (brickinfo->logfile, logfile);
+ if (ret)
+ gf_msg ("glusterd", GF_LOG_WARNING, errno,
+ GD_MSG_FILE_OP_FAILED, "rename failed");
+
+ ret = kill (pid, SIGHUP);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_PID_KILL_FAIL, "Unable to SIGHUP to %d", pid);
+ goto out;
+ }
+ ret = 0;
+
+ /* If request was for brick, only one iteration is enough */
+ if (brick)
+ break;
+ }
+
+ if (ret && !valid_brick)
+ ret = 0;
+
+out:
+ if (tmpbrkinfo)
+ glusterd_brickinfo_delete (tmpbrkinfo);
+
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-mem-types.h b/xlators/mgmt/glusterd/src/glusterd-mem-types.h
new file mode 100644
index 00000000000..ed171b69b66
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-mem-types.h
@@ -0,0 +1,77 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __GLUSTERD_MEM_TYPES_H__
+#define __GLUSTERD_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+typedef enum gf_gld_mem_types_ {
+ gf_gld_mt_dir_entry_t = gf_common_mt_end + 1,
+ gf_gld_mt_volfile_ctx = gf_common_mt_end + 2,
+ gf_gld_mt_glusterd_state_t = gf_common_mt_end + 3,
+ gf_gld_mt_glusterd_conf_t = gf_common_mt_end + 4,
+ gf_gld_mt_locker = gf_common_mt_end + 5,
+ gf_gld_mt_string = gf_common_mt_end + 6,
+ gf_gld_mt_lock_table = gf_common_mt_end + 7,
+ gf_gld_mt_char = gf_common_mt_end + 8,
+ gf_gld_mt_glusterd_connection_t = gf_common_mt_end + 9,
+ gf_gld_mt_resolve_comp = gf_common_mt_end + 10,
+ gf_gld_mt_peerinfo_t = gf_common_mt_end + 11,
+ gf_gld_mt_friend_sm_event_t = gf_common_mt_end + 12,
+ gf_gld_mt_friend_req_ctx_t = gf_common_mt_end + 13,
+ gf_gld_mt_friend_update_ctx_t = gf_common_mt_end + 14,
+ gf_gld_mt_op_sm_event_t = gf_common_mt_end + 15,
+ gf_gld_mt_op_lock_ctx_t = gf_common_mt_end + 16,
+ gf_gld_mt_op_stage_ctx_t = gf_common_mt_end + 17,
+ gf_gld_mt_op_commit_ctx_t = gf_common_mt_end + 18,
+ gf_gld_mt_mop_stage_req_t = gf_common_mt_end + 19,
+ gf_gld_mt_probe_ctx_t = gf_common_mt_end + 20,
+ gf_gld_mt_create_volume_ctx_t = gf_common_mt_end + 21,
+ gf_gld_mt_start_volume_ctx_t = gf_common_mt_end + 22,
+ gf_gld_mt_stop_volume_ctx_t = gf_common_mt_end + 23,
+ gf_gld_mt_delete_volume_ctx_t = gf_common_mt_end + 24,
+ gf_gld_mt_glusterd_volinfo_t = gf_common_mt_end + 25,
+ gf_gld_mt_glusterd_brickinfo_t = gf_common_mt_end + 26,
+ gf_gld_mt_peer_hostname_t = gf_common_mt_end + 27,
+ gf_gld_mt_ifreq = gf_common_mt_end + 28,
+ gf_gld_mt_store_handle_t = gf_common_mt_end + 29,
+ gf_gld_mt_store_iter_t = gf_common_mt_end + 30,
+ gf_gld_mt_defrag_info = gf_common_mt_end + 31,
+ gf_gld_mt_log_filename_ctx_t = gf_common_mt_end + 32,
+ gf_gld_mt_log_locate_ctx_t = gf_common_mt_end + 33,
+ gf_gld_mt_log_rotate_ctx_t = gf_common_mt_end + 34,
+ gf_gld_mt_peerctx_t = gf_common_mt_end + 35,
+ gf_gld_mt_sm_tr_log_t = gf_common_mt_end + 36,
+ gf_gld_mt_pending_node_t = gf_common_mt_end + 37,
+ gf_gld_mt_brick_rsp_ctx_t = gf_common_mt_end + 38,
+ gf_gld_mt_mop_brick_req_t = gf_common_mt_end + 39,
+ gf_gld_mt_op_allack_ctx_t = gf_common_mt_end + 40,
+ gf_gld_mt_linearr = gf_common_mt_end + 41,
+ gf_gld_mt_linebuf = gf_common_mt_end + 42,
+ gf_gld_mt_mount_pattern = gf_common_mt_end + 43,
+ gf_gld_mt_mount_comp_container = gf_common_mt_end + 44,
+ gf_gld_mt_mount_component = gf_common_mt_end + 45,
+ gf_gld_mt_mount_spec = gf_common_mt_end + 46,
+ gf_gld_mt_georep_meet_spec = gf_common_mt_end + 47,
+ gf_gld_mt_nodesrv_t = gf_common_mt_end + 48,
+ gf_gld_mt_charptr = gf_common_mt_end + 49,
+ gf_gld_mt_hooks_stub_t = gf_common_mt_end + 50,
+ gf_gld_mt_hooks_priv_t = gf_common_mt_end + 51,
+ gf_gld_mt_mop_commit_req_t = gf_common_mt_end + 52,
+ gf_gld_mt_int = gf_common_mt_end + 53,
+ gf_gld_mt_snap_t = gf_common_mt_end + 54,
+ gf_gld_mt_missed_snapinfo_t = gf_common_mt_end + 55,
+ gf_gld_mt_snap_create_args_t = gf_common_mt_end + 56,
+ gf_gld_mt_local_peers_t = gf_common_mt_end + 57,
+ gf_gld_mt_end = gf_common_mt_end + 58,
+} gf_gld_mem_types_t;
+#endif
+
diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h
new file mode 100644
index 00000000000..ba40b8f7628
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-messages.h
@@ -0,0 +1,4679 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERD_MESSAGES_H_
+#define _GLUSTERD_MESSAGES_H_
+
+#include "glfs-message-id.h"
+
+/*! \file glusterd-messages.h
+ * \brief Glusterd log-message IDs and their descriptions
+ */
+
+/* NOTE: Rules for message additions
+ * 1) Each instance of a message is _better_ left with a unique message ID, even
+ * if the message format is the same. Reasoning is that, if the message
+ * format needs to change in one instance, the other instances are not
+ * impacted or the new change does not change the ID of the instance being
+ * modified.
+ * 2) Addition of a message,
+ * - Should increment the GLFS_NUM_MESSAGES
+ * - Append to the list of messages defined, towards the end
+ * - Retain macro naming as glfs_msg_X (for redability across developers)
+ * NOTE: Rules for message format modifications
+ * 3) Check acorss the code if the message ID macro in question is reused
+ * anywhere. If reused then then the modifications should ensure correctness
+ * everywhere, or needs a new message ID as (1) above was not adhered to. If
+ * not used anywhere, proceed with the required modification.
+ * NOTE: Rules for message deletion
+ * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used
+ * anywhere, then can be deleted, but will leave a hole by design, as
+ * addition rules specify modification to the end of the list and not filling
+ * holes.
+ */
+
+#define GLUSTERD_COMP_BASE GLFS_MSGID_GLUSTERD
+
+#define GLFS_NUM_MESSAGES 578
+
+#define GLFS_MSGID_END (GLUSTERD_COMP_BASE + GLFS_NUM_MESSAGES + 1)
+/* Messaged with message IDs */
+#define glfs_msg_start_x GLFS_COMP_BASE, "Invalid: Start of messages"
+/*------------*/
+
+/*!
+ * @messageid 106001
+ * @diagnosis Operation could not be performed because the server quorum was not
+ * met
+ * @recommendedaction Ensure that other peer nodes are online and reachable from
+ * the local peer node
+ */
+#define GD_MSG_SERVER_QUORUM_NOT_MET (GLUSTERD_COMP_BASE + 1)
+
+/*!
+ * @messageid 106002
+ * @diagnosis The local bricks belonging to the volume were killed because
+ * the server-quorum was not met
+ * @recommendedaction Ensure that other peer nodes are online and reachable from
+ * the local peer node
+ */
+#define GD_MSG_SERVER_QUORUM_LOST_STOPPING_BRICKS (GLUSTERD_COMP_BASE + 2)
+
+/*!
+ * @messageid 106003
+ * @diagnosis The local bricks belonging to the named volume were (re)started
+ * because the server-quorum was met
+ * @recommendedaction None
+ */
+#define GD_MSG_SERVER_QUORUM_MET_STARTING_BRICKS (GLUSTERD_COMP_BASE + 3)
+
+/*!
+ * @messageid 106004
+ * @diagnosis Glusterd on the peer might be down or unreachable
+ * @recommendedaction Check if glusterd is running on the peer node or if
+ * the firewall rules are not blocking port 24007
+ */
+#define GD_MSG_PEER_DISCONNECTED (GLUSTERD_COMP_BASE + 4)
+
+/*!
+ * @messageid 106005
+ * @diagnosis Brick process might be down
+ * @recommendedaction Check brick log files to get more information on the cause
+ * for the brick's offline status. To bring the brick back
+ * online,run gluster volume start <VOLNAME> force
+ */
+#define GD_MSG_BRICK_DISCONNECTED (GLUSTERD_COMP_BASE + 5)
+
+/*!
+ * @messageid 106006
+ * @diagnosis NFS Server or Self-heal daemon might be down
+ * @recommendedaction Check nfs or self-heal daemon log files to get more
+ * information on the cause for the brick's offline status.
+ * To bring the brick back online, run gluster volume
+ * start <VOLNAME> force
+ */
+#define GD_MSG_NODE_DISCONNECTED (GLUSTERD_COMP_BASE + 6)
+
+/*!
+ * @messageid 106007
+ * @diagnosis Rebalance process might be down
+ * @recommendedaction None
+ */
+#define GD_MSG_REBALANCE_DISCONNECTED (GLUSTERD_COMP_BASE + 7)
+
+/*!
+ * @messageid 106008
+ * @diagnosis Volume cleanup failed
+ * @recommendedaction None
+ */
+#define GD_MSG_VOL_CLEANUP_FAIL (GLUSTERD_COMP_BASE + 8)
+
+/*!
+ * @messageid 106009
+ * @diagnosis Volume version mismatch while adding a peer
+ * @recommendedaction None
+ */
+#define GD_MSG_VOL_VERS_MISMATCH (GLUSTERD_COMP_BASE + 9)
+
+/*!
+ * @messageid 106010
+ * @diagnosis Volume checksum mismatch while adding a peer
+ * @recommendedaction Check for which node the checksum mismatch happens
+ * and delete the volume configuration files from it andi
+ * restart glusterd
+ */
+#define GD_MSG_CKSUM_VERS_MISMATCH (GLUSTERD_COMP_BASE + 10)
+
+/*!
+ * @messageid 106011
+ * @diagnosis A volume quota-conf version mismatch occurred while adding a peer
+ * @recommendedaction None
+ */
+#define GD_MSG_QUOTA_CONFIG_VERS_MISMATCH (GLUSTERD_COMP_BASE + 11)
+
+/*!
+ * @messageid 106012
+ * @diagnosis A quota-conf checksum mismatch occurred while adding a peer
+ * @recommendedaction Check for which node the checksum mismatch happens
+ * and delete the volume configuration files from it and
+ * restart glusterd
+ */
+#define GD_MSG_QUOTA_CONFIG_CKSUM_MISMATCH (GLUSTERD_COMP_BASE + 12)
+
+/*!
+ * @messageid 106013
+ * @diagnosis Brick process could not be terminated
+ * @recommendedaction Find the pid of the brick process from the log file and
+ * manually kill it
+ */
+#define GD_MSG_BRICK_STOP_FAIL (GLUSTERD_COMP_BASE + 13)
+
+/*!
+ * @messageid 106014
+ * @diagnosis One of the listed services:NFS Server, Quota Daemon, Self Heal
+ * Daemon, or brick process could not be brought offline
+ * @recommendedaction Find the pid of the process from the log file and
+ * manually kill it
+ */
+#define GD_MSG_SVC_KILL_FAIL (GLUSTERD_COMP_BASE + 14)
+
+/*!
+ * @messageid 106015
+ * @diagnosis The process could not be killed with the specified PID
+ * @recommendedaction None
+ */
+#define GD_MSG_PID_KILL_FAIL (GLUSTERD_COMP_BASE + 15)
+
+/*!
+ * @messageid 106016
+ * @diagnosis Rebalance socket file is not found
+ * @recommendedaction Rebalance failed as the socket file for rebalance is
+ * missing. Restart the rebalance process
+ */
+#define GD_MSG_REBAL_NO_SOCK_FILE (GLUSTERD_COMP_BASE + 16)
+
+/*!
+ * @messageid 106017
+ * @diagnosis Unix options could not be set
+ * @recommendedaction Server is out of memory and needs a restart
+ */
+#define GD_MSG_UNIX_OP_BUILD_FAIL (GLUSTERD_COMP_BASE + 17)
+
+/*!
+ * @messageid 106018
+ * @diagnosis RPC creation failed
+ * @recommendedaction Rebalance failed as glusterd could not establish an RPC
+ * connection. Check the log file for the exact reason of the
+ * failure and then restart the rebalance process
+ */
+#define GD_MSG_RPC_CREATE_FAIL (GLUSTERD_COMP_BASE + 18)
+
+/*!
+ * @messageid 106019
+ * @diagnosis The default options on volume could not be set with the volume
+ * create and volume reset commands
+ * @recommendedaction Check glusterd log files to see the exact reason for
+ * failure to set default options
+ */
+#define GD_MSG_FAIL_DEFAULT_OPT_SET (GLUSTERD_COMP_BASE + 19)
+
+/*!
+ * @messageid 106020
+ * @diagnosis Failed to release cluster wide lock for one of the peer
+ * @recommendedaction Restart the glusterd service on the node where the command
+ * was issued
+ */
+#define GD_MSG_CLUSTER_UNLOCK_FAILED (GLUSTERD_COMP_BASE + 20)
+
+/*!
+ * @messageid 106021
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_NO_MEMORY (GLUSTERD_COMP_BASE + 21)
+
+/*!
+ * @messageid 106022
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_UNSUPPORTED_VERSION (GLUSTERD_COMP_BASE + 22)
+
+/*!
+ * @messageid 106023
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_COMMAND_NOT_FOUND (GLUSTERD_COMP_BASE + 23)
+
+/*!
+ * @messageid 106024
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAPSHOT_OP_FAILED (GLUSTERD_COMP_BASE + 24)
+
+/*!
+ * @messageid 106025
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_INVALID_ENTRY (GLUSTERD_COMP_BASE + 25)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VOL_NOT_FOUND (GLUSTERD_COMP_BASE + 27)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_REG_COMPILE_FAILED (GLUSTERD_COMP_BASE + 28)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_FILE_OP_FAILED (GLUSTERD_COMP_BASE + 29)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAP_CREATION_FAIL (GLUSTERD_COMP_BASE + 30)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VOL_OP_FAILED (GLUSTERD_COMP_BASE + 31)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_CREATE_DIR_FAILED (GLUSTERD_COMP_BASE + 32)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_DIR_OP_FAILED (GLUSTERD_COMP_BASE + 33)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VOL_STOP_FAILED (GLUSTERD_COMP_BASE + 34)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_NO_CLI_RESP (GLUSTERD_COMP_BASE + 35)
+
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_LOCK_INIT_FAILED (GLUSTERD_COMP_BASE + 36)
+
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAP_LIST_GET_FAIL (GLUSTERD_COMP_BASE + 37)
+
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_UNOUNT_FAILED (GLUSTERD_COMP_BASE + 38)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_LOCK_DESTROY_FAILED (GLUSTERD_COMP_BASE + 39)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAP_CLEANUP_FAIL (GLUSTERD_COMP_BASE + 40)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAP_ACTIVATE_FAIL (GLUSTERD_COMP_BASE + 41)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAP_DEACTIVATE_FAIL (GLUSTERD_COMP_BASE + 42)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAP_RESTORE_FAIL (GLUSTERD_COMP_BASE + 43)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAP_REMOVE_FAIL (GLUSTERD_COMP_BASE + 44)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAP_CONFIG_FAIL (GLUSTERD_COMP_BASE + 45)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAP_STATUS_FAIL (GLUSTERD_COMP_BASE + 46)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAP_INIT_FAIL (GLUSTERD_COMP_BASE + 47)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VOLINFO_SET_FAIL (GLUSTERD_COMP_BASE + 48)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VOLINFO_GET_FAIL (GLUSTERD_COMP_BASE + 49)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BRICK_CREATION_FAIL (GLUSTERD_COMP_BASE + 50)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BRICK_GET_INFO_FAIL (GLUSTERD_COMP_BASE + 51)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BRICK_NEW_INFO_FAIL (GLUSTERD_COMP_BASE + 52)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_LVS_FAIL (GLUSTERD_COMP_BASE + 53)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SETXATTR_FAIL (GLUSTERD_COMP_BASE + 54)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_UMOUNTING_SNAP_BRICK (GLUSTERD_COMP_BASE + 55)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_OP_UNSUPPORTED (GLUSTERD_COMP_BASE + 56)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAP_NOT_FOUND (GLUSTERD_COMP_BASE + 57)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_FS_LABEL_UPDATE_FAIL (GLUSTERD_COMP_BASE + 58)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_LVM_MOUNT_FAILED (GLUSTERD_COMP_BASE + 59)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_DICT_SET_FAILED (GLUSTERD_COMP_BASE + 60)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_CANONICALIZE_FAIL (GLUSTERD_COMP_BASE + 61)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_DICT_GET_FAILED (GLUSTERD_COMP_BASE + 62)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAP_INFO_FAIL (GLUSTERD_COMP_BASE + 63)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAP_VOL_CONFIG_FAIL (GLUSTERD_COMP_BASE + 64)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAP_OBJECT_STORE_FAIL (GLUSTERD_COMP_BASE + 65)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_DICT_UNSERIALIZE_FAIL (GLUSTERD_COMP_BASE + 66)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAP_RESTORE_REVERT_FAIL (GLUSTERD_COMP_BASE + 67)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAP_LIST_SET_FAIL (GLUSTERD_COMP_BASE + 68)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VOLFILE_CREATE_FAIL (GLUSTERD_COMP_BASE + 69)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VOLINFO_REMOVE_FAIL (GLUSTERD_COMP_BASE + 70)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VOL_DELETE_FAIL (GLUSTERD_COMP_BASE + 71)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAPSHOT_PENDING (GLUSTERD_COMP_BASE + 72)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BRICK_PATH_UNMOUNTED (GLUSTERD_COMP_BASE + 73)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BRICK_ADD_FAIL (GLUSTERD_COMP_BASE + 74)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BRICK_SET_INFO_FAIL (GLUSTERD_COMP_BASE + 75)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_LVCREATE_FAIL (GLUSTERD_COMP_BASE + 76)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VG_GET_FAIL (GLUSTERD_COMP_BASE + 77)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_TPOOL_GET_FAIL (GLUSTERD_COMP_BASE + 78)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_LVM_REMOVE_FAILED (GLUSTERD_COMP_BASE + 79)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_MISSEDSNAP_INFO_SET_FAIL (GLUSTERD_COMP_BASE + 80)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BRK_MOUNTOPTS_FAIL (GLUSTERD_COMP_BASE + 81)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_MISSED_SNAP_LIST_STORE_FAIL (GLUSTERD_COMP_BASE + 82)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_INVALID_MISSED_SNAP_ENTRY (GLUSTERD_COMP_BASE + 83)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_MISSED_SNAP_GET_FAIL (GLUSTERD_COMP_BASE + 84)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_MISSED_SNAP_CREATE_FAIL (GLUSTERD_COMP_BASE + 85)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_DUP_ENTRY (GLUSTERD_COMP_BASE + 86)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_MISSED_SNAP_STATUS_DONE (GLUSTERD_COMP_BASE + 87)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_NO_EXEC_PERMS (GLUSTERD_COMP_BASE + 88)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_GLOBAL_OP_VERSION_SET_FAIL (GLUSTERD_COMP_BASE + 89)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_HARD_LIMIT_SET_FAIL (GLUSTERD_COMP_BASE + 90)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_OP_SUCCESS (GLUSTERD_COMP_BASE + 91)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_STORE_FAIL (GLUSTERD_COMP_BASE + 92)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_GLOBAL_OP_VERSION_GET_FAIL (GLUSTERD_COMP_BASE + 93)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_GEOREP_GET_FAILED (GLUSTERD_COMP_BASE + 94)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_GLUSTERD_UMOUNT_FAIL (GLUSTERD_COMP_BASE + 95)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_QUORUM_CHECK_FAIL (GLUSTERD_COMP_BASE + 96)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_QUORUM_COUNT_IGNORED (GLUSTERD_COMP_BASE + 97)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAP_MOUNT_FAIL (GLUSTERD_COMP_BASE + 98)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_RSP_DICT_USE_FAIL (GLUSTERD_COMP_BASE + 99)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAP_IMPORT_FAIL (GLUSTERD_COMP_BASE + 100)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAP_CONFLICT (GLUSTERD_COMP_BASE + 101)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_MISSED_SNAP_DELETE (GLUSTERD_COMP_BASE + 102)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_QUOTA_CONFIG_IMPORT_FAIL (GLUSTERD_COMP_BASE + 103)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAPDIR_CREATE_FAIL (GLUSTERD_COMP_BASE + 104)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_MISSED_SNAP_PRESENT (GLUSTERD_COMP_BASE + 105)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_UUID_NULL (GLUSTERD_COMP_BASE + 106)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_TSTAMP_SET_FAIL (GLUSTERD_COMP_BASE + 107)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_RESP_AGGR_FAIL (GLUSTERD_COMP_BASE + 108)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_DICT_EMPTY (GLUSTERD_COMP_BASE + 109)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_DICT_CREATE_FAIL (GLUSTERD_COMP_BASE + 110)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAPD_STOP_FAIL (GLUSTERD_COMP_BASE + 111)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SOFT_LIMIT_REACHED (GLUSTERD_COMP_BASE + 112)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAPD_START_FAIL (GLUSTERD_COMP_BASE + 113)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAPD_CREATE_FAIL (GLUSTERD_COMP_BASE + 114)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAPD_INIT_FAIL (GLUSTERD_COMP_BASE + 115)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_MGMTV3_OP_FAIL (GLUSTERD_COMP_BASE + 116)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_MGMTV3_PAYLOAD_BUILD_FAIL (GLUSTERD_COMP_BASE + 117)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_MGMTV3_UNLOCK_FAIL (GLUSTERD_COMP_BASE + 118)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_MGMTV3_LOCK_GET_FAIL (GLUSTERD_COMP_BASE + 119)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_MGMTV3_LOCKDOWN_FAIL (GLUSTERD_COMP_BASE + 120)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_POST_VALIDATION_FAIL (GLUSTERD_COMP_BASE + 121)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_PRE_VALIDATION_FAIL (GLUSTERD_COMP_BASE + 122)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_COMMIT_OP_FAIL (GLUSTERD_COMP_BASE + 123)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_PEER_LIST_CREATE_FAIL (GLUSTERD_COMP_BASE + 124)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BRICK_OP_FAIL (GLUSTERD_COMP_BASE + 125)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_OPINFO_SET_FAIL (GLUSTERD_COMP_BASE + 126)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_OP_EVENT_UNLOCK_FAIL (GLUSTERD_COMP_BASE + 127)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_MGMTV3_OP_RESP_FAIL (GLUSTERD_COMP_BASE + 128)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_PEER_NOT_FOUND (GLUSTERD_COMP_BASE + 129)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_REQ_DECODE_FAIL (GLUSTERD_COMP_BASE + 130)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_DICT_SERL_LENGTH_GET_FAIL (GLUSTERD_COMP_BASE + 131)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_ALREADY_STOPPED (GLUSTERD_COMP_BASE + 132)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_PRE_VALD_RESP_FAIL (GLUSTERD_COMP_BASE + 133)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SVC_GET_FAIL (GLUSTERD_COMP_BASE + 134)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VOLFILE_NOT_FOUND (GLUSTERD_COMP_BASE + 135)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_OP_EVENT_LOCK_FAIL (GLUSTERD_COMP_BASE + 136)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_NON_STRIPE_VOL (GLUSTERD_COMP_BASE + 137)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAPD_OBJ_GET_FAIL (GLUSTERD_COMP_BASE + 138)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_QUOTA_DISABLED (GLUSTERD_COMP_BASE + 139)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_CACHE_MINMAX_SIZE_INVALID (GLUSTERD_COMP_BASE + 140)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_QUOTA_GET_STAT_FAIL (GLUSTERD_COMP_BASE + 141)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SUBVOLUMES_EXCEED (GLUSTERD_COMP_BASE + 142)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BRICK_ADD (GLUSTERD_COMP_BASE + 143)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BRICK_REMOVE (GLUSTERD_COMP_BASE + 144)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_CREATE_KEY_FAIL (GLUSTERD_COMP_BASE + 145)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_MULTIPLE_LOCK_ACQUIRE_FAIL (GLUSTERD_COMP_BASE + 146)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_MULTIPLE_LOCK_RELEASE_FAIL (GLUSTERD_COMP_BASE + 147)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_RESP_FROM_UNKNOWN_PEER (GLUSTERD_COMP_BASE + 148)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BRICK_MOUNDIRS_AGGR_FAIL (GLUSTERD_COMP_BASE + 149)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_GFID_VALIDATE_SET_FAIL (GLUSTERD_COMP_BASE + 150)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_PEER_LOCK_FAIL (GLUSTERD_COMP_BASE + 151)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_PEER_UNLOCK_FAIL (GLUSTERD_COMP_BASE + 152)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_MGMT_OP_FAIL (GLUSTERD_COMP_BASE + 153)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_TRANS_OPINFO_CLEAR_FAIL (GLUSTERD_COMP_BASE + 154)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_GLUSTERD_LOCK_FAIL (GLUSTERD_COMP_BASE + 155)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_TRANS_OPINFO_SET_FAIL (GLUSTERD_COMP_BASE + 156)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_TRANS_IDGEN_FAIL (GLUSTERD_COMP_BASE + 157)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_RPC_FAILURE (GLUSTERD_COMP_BASE + 158)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_OP_VERS_ADJUST_FAIL (GLUSTERD_COMP_BASE + 159)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAP_DEVICE_NAME_GET_FAIL (GLUSTERD_COMP_BASE + 160)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAP_STATUS_NOT_PENDING (GLUSTERD_COMP_BASE + 161)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_MGMT_PGM_SET_FAIL (GLUSTERD_COMP_BASE + 161)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_EVENT_INJECT_FAIL (GLUSTERD_COMP_BASE + 162)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VERS_INFO (GLUSTERD_COMP_BASE + 163)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VOL_INFO_REQ_RECVD (GLUSTERD_COMP_BASE + 164)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VERS_GET_FAIL (GLUSTERD_COMP_BASE + 165)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_EVENT_NEW_GET_FAIL (GLUSTERD_COMP_BASE + 166)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_RPC_LAYER_ERROR (GLUSTERD_COMP_BASE + 167)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_NO_HANDSHAKE_ACK (GLUSTERD_COMP_BASE + 168)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_OP_VERSION_MISMATCH (GLUSTERD_COMP_BASE + 169)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_HANDSHAKE_REQ_REJECTED (GLUSTERD_COMP_BASE + 170)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_UNKNOWN_MODE (GLUSTERD_COMP_BASE + 171)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_DEFRAG_STATUS_UPDATED (GLUSTERD_COMP_BASE + 172)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_NO_FLAG_SET (GLUSTERD_COMP_BASE + 173)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VERSION_UNSUPPORTED (GLUSTERD_COMP_BASE + 174)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_UUID_SET_FAIL (GLUSTERD_COMP_BASE + 175)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_MOUNT_REQ_FAIL (GLUSTERD_COMP_BASE + 176)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_GLUSTERD_GLOBAL_INFO_STORE_FAIL (GLUSTERD_COMP_BASE + 177)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_OP_VERS_STORE_FAIL (GLUSTERD_COMP_BASE + 178)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAP_AUTOMIC_UPDATE_FAIL (GLUSTERD_COMP_BASE + 179)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAPINFO_WRITE_FAIL (GLUSTERD_COMP_BASE + 180)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAPINFO_CREATE_FAIL (GLUSTERD_COMP_BASE + 181)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAPD_INFO_STORE_FAIL (GLUSTERD_COMP_BASE + 182)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BRK_MNTPATH_MOUNT_FAIL (GLUSTERD_COMP_BASE + 183)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BRK_MNTPATH_GET_FAIL (GLUSTERD_COMP_BASE + 184)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAP_BRK_MNT_RECREATE_FAIL (GLUSTERD_COMP_BASE + 185)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAP_RESOLVE_BRICK_FAIL (GLUSTERD_COMP_BASE + 186)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_RESOLVE_BRICK_FAIL (GLUSTERD_COMP_BASE + 187)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BRK_MNT_RECREATE_FAIL (GLUSTERD_COMP_BASE + 188)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_TMP_FILE_UNLINK_FAIL (GLUSTERD_COMP_BASE + 189)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VOL_VALS_WRITE_FAIL (GLUSTERD_COMP_BASE + 190)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_STORE_HANDLE_GET_FAIL (GLUSTERD_COMP_BASE + 191)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_STORE_HANDLE_WRITE_FAIL (GLUSTERD_COMP_BASE + 192)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_MISSED_SNAP_LIST_STORE_HANDLE_GET_FAIL \
+ (GLUSTERD_COMP_BASE + 193)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_MISSED_SNAP_LIST_EMPTY (GLUSTERD_COMP_BASE + 194)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAP_VOL_RETRIEVE_FAIL (GLUSTERD_COMP_BASE + 195)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAPSHOT_UPDATE_FAIL (GLUSTERD_COMP_BASE + 196)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAPD_PORT_STORE_FAIL (GLUSTERD_COMP_BASE + 197)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_CKSUM_STORE_FAIL (GLUSTERD_COMP_BASE + 198)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_STORE_HANDLE_CREATE_FAIL (GLUSTERD_COMP_BASE + 199)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_HANDLE_NULL (GLUSTERD_COMP_BASE + 200)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VOL_RESTORE_FAIL (GLUSTERD_COMP_BASE + 201)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_NAME_TOO_LONG (GLUSTERD_COMP_BASE + 202)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_UUID_PARSE_FAIL (GLUSTERD_COMP_BASE + 203)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_UNKNOWN_KEY (GLUSTERD_COMP_BASE + 204)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_STORE_ITER_DESTROY_FAIL (GLUSTERD_COMP_BASE + 205)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_STORE_ITER_GET_FAIL (GLUSTERD_COMP_BASE + 206)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VOLINFO_UPDATE_FAIL (GLUSTERD_COMP_BASE + 207)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_PARSE_BRICKINFO_FAIL (GLUSTERD_COMP_BASE + 208)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VERS_STORE_FAIL (GLUSTERD_COMP_BASE + 209)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_HEADER_ADD_FAIL (GLUSTERD_COMP_BASE + 210)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_QUOTA_CONF_WRITE_FAIL (GLUSTERD_COMP_BASE + 211)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_QUOTA_CONF_CORRUPT (GLUSTERD_COMP_BASE + 212)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_FORK_FAIL (GLUSTERD_COMP_BASE + 213)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_CKSUM_COMPUTE_FAIL (GLUSTERD_COMP_BASE + 214)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VERS_CKSUM_STORE_FAIL (GLUSTERD_COMP_BASE + 215)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_GETXATTR_FAIL (GLUSTERD_COMP_BASE + 216)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_CONVERSION_FAILED (GLUSTERD_COMP_BASE + 217)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VOL_NOT_DISTRIBUTE (GLUSTERD_COMP_BASE + 218)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VOL_STOPPED (GLUSTERD_COMP_BASE + 219)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_OPCTX_GET_FAIL (GLUSTERD_COMP_BASE + 220)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_TASKID_GEN_FAIL (GLUSTERD_COMP_BASE + 221)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_REBALANCE_ID_MISSING (GLUSTERD_COMP_BASE + 222)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_NO_REBALANCE_PFX_IN_VOLNAME (GLUSTERD_COMP_BASE + 223)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_DEFRAG_STATUS_UPDATE_FAIL (GLUSTERD_COMP_BASE + 224)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_UUID_GEN_STORE_FAIL (GLUSTERD_COMP_BASE + 225)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_UUID_STORE_FAIL (GLUSTERD_COMP_BASE + 226)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_NO_INIT (GLUSTERD_COMP_BASE + 227)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_MODULE_NOT_INSTALLED (GLUSTERD_COMP_BASE + 228)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_MODULE_NOT_WORKING (GLUSTERD_COMP_BASE + 229)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_WRITE_ACCESS_GRANT_FAIL (GLUSTERD_COMP_BASE + 230)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_DIRPATH_TOO_LONG (GLUSTERD_COMP_BASE + 231)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_LOGGROUP_INVALID (GLUSTERD_COMP_BASE + 232)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_DIR_PERM_LIBERAL (GLUSTERD_COMP_BASE + 233)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_DIR_PERM_STRICT (GLUSTERD_COMP_BASE + 234)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_MOUNT_SPEC_INSTALL_FAIL (GLUSTERD_COMP_BASE + 234)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_GLUSTERD_SOCK_LISTENER_START_FAIL (GLUSTERD_COMP_BASE + 235)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_DIR_NOT_FOUND (GLUSTERD_COMP_BASE + 236)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_FAILED_INIT_SHDSVC (GLUSTERD_COMP_BASE + 237)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_FAILED_INIT_NFSSVC (GLUSTERD_COMP_BASE + 238)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_FAILED_INIT_QUOTASVC (GLUSTERD_COMP_BASE + 239)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_RPC_INIT_FAIL (GLUSTERD_COMP_BASE + 240)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_RPCSVC_REG_NOTIFY_RETURNED (GLUSTERD_COMP_BASE + 241)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_RPC_TRANSPORT_COUNT_GET_FAIL (GLUSTERD_COMP_BASE + 242)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_RPC_LISTENER_CREATE_FAIL (GLUSTERD_COMP_BASE + 243)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_OP_VERS_RESTORE_FAIL (GLUSTERD_COMP_BASE + 244)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SELF_HEALD_DISABLED (GLUSTERD_COMP_BASE + 245)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_PRIV_NULL (GLUSTERD_COMP_BASE + 246)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_GSYNC_VALIDATION_FAIL (GLUSTERD_COMP_BASE + 247)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SLAVE_CONFPATH_DETAILS_FETCH_FAIL (GLUSTERD_COMP_BASE + 248)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_OP_NOT_PERMITTED_AC_REQD (GLUSTERD_COMP_BASE + 250)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_OP_NOT_PERMITTED (GLUSTERD_COMP_BASE + 251)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_REBALANCE_START_FAIL (GLUSTERD_COMP_BASE + 252)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_NFS_RECONF_FAIL (GLUSTERD_COMP_BASE + 253)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_REMOVE_BRICK_ID_SET_FAIL (GLUSTERD_COMP_BASE + 254)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BRICK_MOUNTDIR_GET_FAIL (GLUSTERD_COMP_BASE + 255)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BRICK_NOT_FOUND (GLUSTERD_COMP_BASE + 256)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BRKPATH_TOO_LONG (GLUSTERD_COMP_BASE + 257)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_CLRLOCKS_CLNT_UMOUNT_FAIL (GLUSTERD_COMP_BASE + 258)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_CLRLOCKS_CLNT_MOUNT_FAIL (GLUSTERD_COMP_BASE + 259)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_CLRLOCKS_MOUNTDIR_CREATE_FAIL (GLUSTERD_COMP_BASE + 260)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BRK_PORT_NUM_GET_FAIL (GLUSTERD_COMP_BASE + 261)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BRK_STATEDUMP_FAIL (GLUSTERD_COMP_BASE + 262)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VOL_GRAPH_CHANGE_NOTIFY_FAIL (GLUSTERD_COMP_BASE + 263)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_INVALID_VG (GLUSTERD_COMP_BASE + 264)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_GLUSTERD_OP_FAILED (GLUSTERD_COMP_BASE + 265)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_HOSTNAME_ADD_TO_PEERLIST_FAIL (GLUSTERD_COMP_BASE + 266)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_STALE_PEERINFO_REMOVE_FAIL (GLUSTERD_COMP_BASE + 267)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_TRANS_ID_GET_FAIL (GLUSTERD_COMP_BASE + 268)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_RES_DECODE_FAIL (GLUSTERD_COMP_BASE + 269)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VOL_ALREADY_EXIST (GLUSTERD_COMP_BASE + 270)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BAD_BRKORDER (GLUSTERD_COMP_BASE + 271)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BAD_BRKORDER_CHECK_FAIL (GLUSTERD_COMP_BASE + 272)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BRICK_SELECT_FAIL (GLUSTERD_COMP_BASE + 273)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_NO_LOCK_RESP_FROM_PEER (GLUSTERD_COMP_BASE + 274)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_MGMTV3_LOCK_FROM_UUID_REJCT (GLUSTERD_COMP_BASE + 275)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_STAGE_FROM_UUID_REJCT (GLUSTERD_COMP_BASE + 276)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_UNLOCK_FROM_UUID_REJCT (GLUSTERD_COMP_BASE + 277)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_MGMTV3_UNLOCK_FROM_UUID_REJCT (GLUSTERD_COMP_BASE + 278)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_COMMIT_FROM_UUID_REJCT (GLUSTERD_COMP_BASE + 279)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VOL_NOT_STARTED (GLUSTERD_COMP_BASE + 280)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VOL_NOT_REPLICA (GLUSTERD_COMP_BASE + 281)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_OLD_REMOVE_BRICK_EXISTS (GLUSTERD_COMP_BASE + 283)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_USE_THE_FORCE (GLUSTERD_COMP_BASE + 284)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_OIP (GLUSTERD_COMP_BASE + 285)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_OIP_RETRY_LATER (GLUSTERD_COMP_BASE + 286)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_GSYNC_RESTART_FAIL (GLUSTERD_COMP_BASE + 287)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_LOCK_FROM_UUID_REJCT (GLUSTERD_COMP_BASE + 288)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BRICK_OP_PAYLOAD_BUILD_FAIL (GLUSTERD_COMP_BASE + 289)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_HOSTNAME_RESOLVE_FAIL (GLUSTERD_COMP_BASE + 290)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_COUNT_VALIDATE_FAILED (GLUSTERD_COMP_BASE + 291)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SPAWNING_CHILD_FAILED (GLUSTERD_COMP_BASE + 292)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_READ_CHILD_DATA_FAILED (GLUSTERD_COMP_BASE + 293)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_DEFAULT_TEMP_CONFIG (GLUSTERD_COMP_BASE + 294)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_PIDFILE_CREATE_FAILED (GLUSTERD_COMP_BASE + 295)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_GSYNCD_SPAWN_FAILED (GLUSTERD_COMP_BASE + 296)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SUBOP_NOT_FOUND (GLUSTERD_COMP_BASE + 297)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_RESERVED_OPTION (GLUSTERD_COMP_BASE + 298)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_GLUSTERD_PRIV_NOT_FOUND (GLUSTERD_COMP_BASE + 299)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SLAVEINFO_FETCH_ERROR (GLUSTERD_COMP_BASE + 300)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VALIDATE_FAILED (GLUSTERD_COMP_BASE + 301)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_INVOKE_ERROR (GLUSTERD_COMP_BASE + 302)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SESSION_CREATE_ERROR (GLUSTERD_COMP_BASE + 303)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_STOP_FORCE (GLUSTERD_COMP_BASE + 304)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_GET_CONFIG_INFO_FAILED (GLUSTERD_COMP_BASE + 305)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_STAT_FILE_READ_FAILED (GLUSTERD_COMP_BASE + 306)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_CONF_PATH_ASSIGN_FAILED (GLUSTERD_COMP_BASE + 307)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SESSION_INACTIVE (GLUSTERD_COMP_BASE + 308)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_PIDFILE_NOT_FOUND (GLUSTERD_COMP_BASE + 309)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_PEER_CMD_ERROR (GLUSTERD_COMP_BASE + 310)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SRC_FILE_ERROR (GLUSTERD_COMP_BASE + 311)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_GET_STATEFILE_NAME_FAILED (GLUSTERD_COMP_BASE + 312)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_STATUS_NULL (GLUSTERD_COMP_BASE + 313)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_STATUSFILE_CREATE_FAILED (GLUSTERD_COMP_BASE + 314)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SLAVE_URL_INVALID (GLUSTERD_COMP_BASE + 315)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_INVALID_SLAVE (GLUSTERD_COMP_BASE + 316)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_READ_ERROR (GLUSTERD_COMP_BASE + 317)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_ARG_FETCH_ERROR (GLUSTERD_COMP_BASE + 318)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_REG_FILE_MISSING (GLUSTERD_COMP_BASE + 319)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_STATEFILE_NAME_NOT_FOUND (GLUSTERD_COMP_BASE + 320)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_GEO_REP_START_FAILED (GLUSTERD_COMP_BASE + 321)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_GSYNCD_ERROR (GLUSTERD_COMP_BASE + 322)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_UPDATE_STATEFILE_FAILED (GLUSTERD_COMP_BASE + 323)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_STATUS_UPDATE_FAILED (GLUSTERD_COMP_BASE + 324)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_GSYNCD_OP_SET_FAILED (GLUSTERD_COMP_BASE + 325)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BUFFER_EMPTY (GLUSTERD_COMP_BASE + 326)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_CONFIG_INFO (GLUSTERD_COMP_BASE + 327)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_FETCH_CONFIG_VAL_FAILED (GLUSTERD_COMP_BASE + 328)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_GSYNCD_PARSE_ERROR (GLUSTERD_COMP_BASE + 329)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SESSION_ALREADY_EXIST (GLUSTERD_COMP_BASE + 330)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_FORCE_CREATE_SESSION (GLUSTERD_COMP_BASE + 331)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_GET_KEY_FAILED (GLUSTERD_COMP_BASE + 332)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SESSION_DEL_FAILED (GLUSTERD_COMP_BASE + 333)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_CMD_EXEC_FAIL (GLUSTERD_COMP_BASE + 334)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_STRDUP_FAILED (GLUSTERD_COMP_BASE + 335)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_UNABLE_TO_END (GLUSTERD_COMP_BASE + 336)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_PAUSE_FAILED (GLUSTERD_COMP_BASE + 337)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_NORMALIZE_URL_FAIL (GLUSTERD_COMP_BASE + 338)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_MODULE_ERROR (GLUSTERD_COMP_BASE + 339)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SLAVEINFO_STORE_ERROR (GLUSTERD_COMP_BASE + 340)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_MARKER_START_FAIL (GLUSTERD_COMP_BASE + 341)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_RESUME_FAILED (GLUSTERD_COMP_BASE + 342)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_GLUSTERFS_START_FAIL (GLUSTERD_COMP_BASE + 343)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_GLUSTERFS_STOP_FAIL (GLUSTERD_COMP_BASE + 344)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_RBOP_STATE_STORE_FAIL (GLUSTERD_COMP_BASE + 345)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_PUMP_XLATOR_DISABLED (GLUSTERD_COMP_BASE + 346)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_ABORT_OP_FAIL (GLUSTERD_COMP_BASE + 347)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_PAUSE_OP_FAIL (GLUSTERD_COMP_BASE + 348)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_NFS_VOL_FILE_GEN_FAIL (GLUSTERD_COMP_BASE + 349)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_HANDSHAKE_FAILED (GLUSTERD_COMP_BASE + 350)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_CLI_REQ_EMPTY (GLUSTERD_COMP_BASE + 351)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_PEER_ADD_FAIL (GLUSTERD_COMP_BASE + 352)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SYNC_FROM_LOCALHOST_UNALLOWED (GLUSTERD_COMP_BASE + 353)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_UUIDS_SAME_RETRY (GLUSTERD_COMP_BASE + 354)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_TSP_ALREADY_FORMED (GLUSTERD_COMP_BASE + 355)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VOLS_ALREADY_PRESENT (GLUSTERD_COMP_BASE + 356)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_REQ_CTX_CREATE_FAIL (GLUSTERD_COMP_BASE + 357)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_PEER_INFO_UPDATE_FAIL (GLUSTERD_COMP_BASE + 358)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_PEERINFO_CREATE_FAIL (GLUSTERD_COMP_BASE + 359)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_REQ_FROM_UNKNOWN_PEER (GLUSTERD_COMP_BASE + 360)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_STATUS_REPLY_STRING_CREATE_FAIL (GLUSTERD_COMP_BASE + 361)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_TOKENIZE_FAIL (GLUSTERD_COMP_BASE + 362)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_LAZY_UMOUNT_FAIL (GLUSTERD_COMP_BASE + 363)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_NFS_SERVER_START_FAIL (GLUSTERD_COMP_BASE + 364)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_NFS_SERVER_STOP_FAIL (GLUSTERD_COMP_BASE + 365)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BRK_CLEANUP_FAIL (GLUSTERD_COMP_BASE + 366)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_RB_ALREADY_STARTED (GLUSTERD_COMP_BASE + 367)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_RB_BRICKINFO_GET_FAIL (GLUSTERD_COMP_BASE + 368)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BAD_FORMAT (GLUSTERD_COMP_BASE + 369)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_RB_CMD_FAIL (GLUSTERD_COMP_BASE + 370)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_RB_NOT_STARTED_OR_PAUSED (GLUSTERD_COMP_BASE + 371)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_RB_NOT_STARTED (GLUSTERD_COMP_BASE + 372)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_RB_PAUSED_ALREADY (GLUSTERD_COMP_BASE + 373)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_NO_FREE_PORTS (GLUSTERD_COMP_BASE + 374)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_EVENT_STATE_TRANSITION_FAIL (GLUSTERD_COMP_BASE + 375)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_HANDLER_RETURNED (GLUSTERD_COMP_BASE + 376)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAP_COMPARE_CONFLICT (GLUSTERD_COMP_BASE + 377)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_PEER_DETACH_CLEANUP_FAIL (GLUSTERD_COMP_BASE + 378)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_STALE_VOL_REMOVE_FAIL (GLUSTERD_COMP_BASE + 379)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_AC_ERROR (GLUSTERD_COMP_BASE + 380)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_LOCK_FAIL (GLUSTERD_COMP_BASE + 381)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_MGMTV3_LOCK_REQ_SEND_FAIL (GLUSTERD_COMP_BASE + 382)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_GLUSTERD_UNLOCK_FAIL (GLUSTERD_COMP_BASE + 383)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_RBOP_START_FAIL (GLUSTERD_COMP_BASE + 384)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_UNKNOWN_RESPONSE (GLUSTERD_COMP_BASE + 385)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_COMMIT_REQ_SEND_FAIL (GLUSTERD_COMP_BASE + 386)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_OPCTX_UPDATE_FAIL (GLUSTERD_COMP_BASE + 387)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_OPCTX_NULL (GLUSTERD_COMP_BASE + 388)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_DICT_COPY_FAIL (GLUSTERD_COMP_BASE + 389)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SHD_STATUS_SET_FAIL (GLUSTERD_COMP_BASE + 390)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_REPLICA_INDEX_GET_FAIL (GLUSTERD_COMP_BASE + 391)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_NFS_SERVER_NOT_RUNNING (GLUSTERD_COMP_BASE + 392)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_STAGE_REQ_SEND_FAIL (GLUSTERD_COMP_BASE + 393)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_LOCK_REQ_SEND_FAIL (GLUSTERD_COMP_BASE + 394)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VOLNAMES_GET_FAIL (GLUSTERD_COMP_BASE + 395)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_NO_TASK_ID (GLUSTERD_COMP_BASE + 396)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_ADD_REMOVE_BRICK_FAIL (GLUSTERD_COMP_BASE + 397)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SVC_RESTART_FAIL (GLUSTERD_COMP_BASE + 398)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VOL_SET_FAIL (GLUSTERD_COMP_BASE + 399)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_QUOTAD_NOT_RUNNING (GLUSTERD_COMP_BASE + 400)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_XLATOR_COUNT_GET_FAIL (GLUSTERD_COMP_BASE + 401)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_TRANS_OPINFO_GET_FAIL (GLUSTERD_COMP_BASE + 402)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_TRANS_ID_INVALID (GLUSTERD_COMP_BASE + 403)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_NO_OPTIONS_GIVEN (GLUSTERD_COMP_BASE + 404)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAPD_NOT_RUNNING (GLUSTERD_COMP_BASE + 405)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_ADD_ADDRESS_TO_PEER_FAIL (GLUSTERD_COMP_BASE + 406)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_PEER_ADDRESS_GET_FAIL (GLUSTERD_COMP_BASE + 407)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_GETADDRINFO_FAIL (GLUSTERD_COMP_BASE + 408)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_PEERINFO_DELETE_FAIL (GLUSTERD_COMP_BASE + 409)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_KEY_NULL (GLUSTERD_COMP_BASE + 410)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SPAWN_SVCS_FAIL (GLUSTERD_COMP_BASE + 411)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_DICT_ITER_FAIL (GLUSTERD_COMP_BASE + 412)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_TASK_STATUS_UPDATE_FAIL (GLUSTERD_COMP_BASE + 413)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VOL_ID_MISMATCH (GLUSTERD_COMP_BASE + 414)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_STR_TO_BOOL_FAIL (GLUSTERD_COMP_BASE + 415)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_RB_MNT_BRICKS_MISMATCH (GLUSTERD_COMP_BASE + 416)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_RB_SRC_BRICKS_MISMATCH (GLUSTERD_COMP_BASE + 417)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_MNTENTRY_GET_FAIL (GLUSTERD_COMP_BASE + 418)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_INODE_SIZE_GET_FAIL (GLUSTERD_COMP_BASE + 419)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_NO_STATEFILE_ENTRY (GLUSTERD_COMP_BASE + 420)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_PMAP_UNSET_FAIL (GLUSTERD_COMP_BASE + 421)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_GLOBAL_OPT_IMPORT_FAIL (GLUSTERD_COMP_BASE + 422)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSD_BRICK_DISCONNECT_FAIL (GLUSTERD_COMP_BASE + 423)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAP_DETAILS_IMPORT_FAIL (GLUSTERD_COMP_BASE + 424)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BRICKINFO_CREATE_FAIL (GLUSTERD_COMP_BASE + 425)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_QUOTA_CKSUM_VER_STORE_FAIL (GLUSTERD_COMP_BASE + 426)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_CKSUM_GET_FAIL (GLUSTERD_COMP_BASE + 427)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BRICKPATH_ROOT_GET_FAIL (GLUSTERD_COMP_BASE + 428)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_HOSTNAME_TO_UUID_FAIL (GLUSTERD_COMP_BASE + 429)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_REPLY_SUBMIT_FAIL (GLUSTERD_COMP_BASE + 430)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SERIALIZE_MSG_FAIL (GLUSTERD_COMP_BASE + 431)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_ENCODE_FAIL (GLUSTERD_COMP_BASE + 432)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_RB_DST_BRICKS_MISMATCH (GLUSTERD_COMP_BASE + 433)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_XLATOR_VOLOPT_DYNLOAD_ERROR (GLUSTERD_COMP_BASE + 434)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VOLNAME_NOTFOUND_IN_DICT (GLUSTERD_COMP_BASE + 435)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_FLAGS_NOTFOUND_IN_DICT (GLUSTERD_COMP_BASE + 436)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedactio
+ *
+ */
+#define GD_MSG_HOSTNAME_NOTFOUND_IN_DICT (GLUSTERD_COMP_BASE + 437)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_PORT_NOTFOUND_IN_DICT (GLUSTERD_COMP_BASE + 438)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_CMDSTR_NOTFOUND_IN_DICT (GLUSTERD_COMP_BASE + 439)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAP_OBJ_NEW_FAIL (GLUSTERD_COMP_BASE + 440)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAP_BACKEND_MAKE_FAIL (GLUSTERD_COMP_BASE + 441)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAP_CLONE_FAILED (GLUSTERD_COMP_BASE + 442)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAP_CLONE_PREVAL_FAILED (GLUSTERD_COMP_BASE + 443)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAP_CLONE_POSTVAL_FAILED (GLUSTERD_COMP_BASE + 444)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VOLINFO_STORE_FAIL (GLUSTERD_COMP_BASE + 445)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_NEW_FRIEND_SM_EVENT_GET_FAIL (GLUSTERD_COMP_BASE + 446)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VOL_TYPE_CHANGING_INFO (GLUSTERD_COMP_BASE + 447)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BRKPATH_MNTPNT_MISMATCH (GLUSTERD_COMP_BASE + 448)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_TASKS_COUNT_MISMATCH (GLUSTERD_COMP_BASE + 449)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_WRONG_OPTS_SETTING (GLUSTERD_COMP_BASE + 450)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_PATH_ALREADY_PART_OF_VOL (GLUSTERD_COMP_BASE + 451)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BRICK_VALIDATE_FAIL (GLUSTERD_COMP_BASE + 452)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_READIN_FILE_FAILED (GLUSTERD_COMP_BASE + 453)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_IMPORT_PRDICT_DICT (GLUSTERD_COMP_BASE + 454)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VOL_OPTS_IMPORT_FAIL (GLUSTERD_COMP_BASE + 455)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BRICK_IMPORT_FAIL (GLUSTERD_COMP_BASE + 456)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VOLINFO_IMPORT_FAIL (GLUSTERD_COMP_BASE + 457)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BRICK_ID_GEN_FAILED (GLUSTERD_COMP_BASE + 458)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_GET_STATUS_DATA_FAIL (GLUSTERD_COMP_BASE + 459)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BITROT_NOT_RUNNING (GLUSTERD_COMP_BASE + 460)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SCRUBBER_NOT_RUNNING (GLUSTERD_COMP_BASE + 461)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SRC_BRICK_PORT_UNAVAIL (GLUSTERD_COMP_BASE + 462)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BITD_INIT_FAIL (GLUSTERD_COMP_BASE + 463)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SCRUB_INIT_FAIL (GLUSTERD_COMP_BASE + 464)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VAR_RUN_DIR_INIT_FAIL (GLUSTERD_COMP_BASE + 465)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VAR_RUN_DIR_FIND_FAIL (GLUSTERD_COMP_BASE + 466)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SCRUBSVC_RECONF_FAIL (GLUSTERD_COMP_BASE + 467)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BITDSVC_RECONF_FAIL (GLUSTERD_COMP_BASE + 468)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_NFS_GNS_START_FAIL (GLUSTERD_COMP_BASE + 469)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_NFS_GNS_SETUP_FAIL (GLUSTERD_COMP_BASE + 470)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_UNRECOGNIZED_SVC_MNGR (GLUSTERD_COMP_BASE + 471)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_NFS_GNS_OP_HANDLE_FAIL (GLUSTERD_COMP_BASE + 472)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_EXPORT_FILE_CREATE_FAIL (GLUSTERD_COMP_BASE + 473)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_NFS_GNS_HOST_FOUND (GLUSTERD_COMP_BASE + 474)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_REBALANCE_CMD_IN_TIER_VOL (GLUSTERD_COMP_BASE + 475)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_INCOMPATIBLE_VALUE (GLUSTERD_COMP_BASE + 476)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_GENERATED_UUID (GLUSTERD_COMP_BASE + 477)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_FILE_DESC_LIMIT_SET (GLUSTERD_COMP_BASE + 478)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_CURR_WORK_DIR_INFO (GLUSTERD_COMP_BASE + 479)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_STRIPE_COUNT_CHANGE_INFO (GLUSTERD_COMP_BASE + 480)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_REPLICA_COUNT_CHANGE_INFO (GLUSTERD_COMP_BASE + 481)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_ADD_BRICK_REQ_RECVD (GLUSTERD_COMP_BASE + 482)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VOL_ALREADY_TIER (GLUSTERD_COMP_BASE + 483)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_REM_BRICK_REQ_RECVD (GLUSTERD_COMP_BASE + 484)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VOL_NOT_TIER (GLUSTERD_COMP_BASE + 485)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_LOG_ROTATE_REQ_RECVD (GLUSTERD_COMP_BASE + 486)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_CLI_REQ_RECVD (GLUSTERD_COMP_BASE + 487)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_GET_VOL_REQ_RCVD (GLUSTERD_COMP_BASE + 488)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VOL_SYNC_REQ_RCVD (GLUSTERD_COMP_BASE + 489)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_PROBE_RCVD (GLUSTERD_COMP_BASE + 490)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_UNFRIEND_REQ_RCVD (GLUSTERD_COMP_BASE + 491)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_FRIEND_UPDATE_RCVD (GLUSTERD_COMP_BASE + 492)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_RESPONSE_INFO (GLUSTERD_COMP_BASE + 493)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VOL_PROFILE_REQ_RCVD (GLUSTERD_COMP_BASE + 494)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_GETWD_REQ_RCVD (GLUSTERD_COMP_BASE + 495)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_MOUNT_REQ_RCVD (GLUSTERD_COMP_BASE + 496)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_UMOUNT_REQ_RCVD (GLUSTERD_COMP_BASE + 497)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_CONNECT_RETURNED (GLUSTERD_COMP_BASE + 498)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_STATUS_VOL_REQ_RCVD (GLUSTERD_COMP_BASE + 499)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_CLRCLK_VOL_REQ_RCVD (GLUSTERD_COMP_BASE + 500)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BARRIER_VOL_REQ_RCVD (GLUSTERD_COMP_BASE + 501)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_UUID_RECEIVED (GLUSTERD_COMP_BASE + 502)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_REPLACE_BRK_COMMIT_FORCE_REQ_RCVD (GLUSTERD_COMP_BASE + 503)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BRK_PORT_NO_ADD_INDO (GLUSTERD_COMP_BASE + 504)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_REPLACE_BRK_REQ_RCVD (GLUSTERD_COMP_BASE + 505)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_ADD_OP_ARGS_FAIL (GLUSTERD_COMP_BASE + 506)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_POST_HOOK_STUB_INIT_FAIL (GLUSTERD_COMP_BASE + 507)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_HOOK_STUB_NULL (GLUSTERD_COMP_BASE + 508)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SPAWN_THREADS_FAIL (GLUSTERD_COMP_BASE + 509)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_STALE_VOL_DELETE_INFO (GLUSTERD_COMP_BASE + 510)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_PROBE_REQ_RESP_RCVD (GLUSTERD_COMP_BASE + 511)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_HOST_PRESENT_ALREADY (GLUSTERD_COMP_BASE + 512)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_OP_VERS_INFO (GLUSTERD_COMP_BASE + 513)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_OP_VERS_SET_INFO (GLUSTERD_COMP_BASE + 514)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_NEW_NODE_STATE_CREATION (GLUSTERD_COMP_BASE + 515)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_ALREADY_MOUNTED (GLUSTERD_COMP_BASE + 516)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SHARED_STRG_VOL_OPT_VALIDATE_FAIL (GLUSTERD_COMP_BASE + 517)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_NFS_GNS_STOP_FAIL (GLUSTERD_COMP_BASE + 518)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_NFS_GNS_RESET_FAIL (GLUSTERD_COMP_BASE + 519)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SHARED_STRG_SET_FAIL (GLUSTERD_COMP_BASE + 520)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VOL_TRANSPORT_TYPE_CHANGE (GLUSTERD_COMP_BASE + 521)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_PEER_COUNT_GET_FAIL (GLUSTERD_COMP_BASE + 522)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_INSUFFICIENT_UP_NODES (GLUSTERD_COMP_BASE + 523)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_OP_STAGE_STATS_VOL_FAIL (GLUSTERD_COMP_BASE + 524)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VOL_ID_SET_FAIL (GLUSTERD_COMP_BASE + 525)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_OP_STAGE_RESET_VOL_FAIL (GLUSTERD_COMP_BASE + 526)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_OP_STAGE_BITROT_FAIL (GLUSTERD_COMP_BASE + 527)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_OP_STAGE_QUOTA_FAIL (GLUSTERD_COMP_BASE + 528)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_OP_STAGE_DELETE_VOL_FAIL (GLUSTERD_COMP_BASE + 529)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_HANDLE_HEAL_CMD_FAIL (GLUSTERD_COMP_BASE + 530)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_CLRCLK_SND_CMD_FAIL (GLUSTERD_COMP_BASE + 531)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_DISPERSE_CLUSTER_FOUND (GLUSTERD_COMP_BASE + 532)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_HEAL_VOL_REQ_RCVD (GLUSTERD_COMP_BASE + 533)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_STATEDUMP_VOL_REQ_RCVD (GLUSTERD_COMP_BASE + 534)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_THINPOOLS_FOR_THINLVS (GLUSTERD_COMP_BASE + 535)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_OP_STAGE_CREATE_VOL_FAIL (GLUSTERD_COMP_BASE + 536)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_OP_STAGE_START_VOL_FAIL (GLUSTERD_COMP_BASE + 537)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_NFS_GNS_UNEXPRT_VOL_FAIL (GLUSTERD_COMP_BASE + 538)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_TASK_ID_INFO (GLUSTERD_COMP_BASE + 539)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_DEREGISTER_SUCCESS (GLUSTERD_COMP_BASE + 540)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_STATEDUMP_OPTS_RCVD (GLUSTERD_COMP_BASE + 541)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_STATEDUMP_INFO (GLUSTERD_COMP_BASE + 542)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_RECOVERING_CORRUPT_CONF (GLUSTERD_COMP_BASE + 543)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_RETRIEVED_UUID (GLUSTERD_COMP_BASE + 544)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_XLATOR_CREATE_FAIL (GLUSTERD_COMP_BASE + 545)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_GRAPH_ENTRY_ADD_FAIL (GLUSTERD_COMP_BASE + 546)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_ERROR_ENCOUNTERED (GLUSTERD_COMP_BASE + 547)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_FILTER_RUN_FAILED (GLUSTERD_COMP_BASE + 548)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_DEFAULT_OPT_INFO (GLUSTERD_COMP_BASE + 549)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_MARKER_STATUS_GET_FAIL (GLUSTERD_COMP_BASE + 550)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_MARKER_DISABLE_FAIL (GLUSTERD_COMP_BASE + 551)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_GRAPH_FEATURE_ADD_FAIL (GLUSTERD_COMP_BASE + 552)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_XLATOR_SET_OPT_FAIL (GLUSTERD_COMP_BASE + 553)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_BUILD_GRAPH_FAILED (GLUSTERD_COMP_BASE + 554)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_XML_TEXT_WRITE_FAIL (GLUSTERD_COMP_BASE + 555)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_XML_DOC_START_FAIL (GLUSTERD_COMP_BASE + 556)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_XML_ELE_CREATE_FAIL (GLUSTERD_COMP_BASE + 557)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VOLUME_INCONSISTENCY (GLUSTERD_COMP_BASE + 558)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_XLATOR_LINK_FAIL (GLUSTERD_COMP_BASE + 559)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_REMOTE_HOST_GET_FAIL (GLUSTERD_COMP_BASE + 560)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_GRAPH_SET_OPT_FAIL (GLUSTERD_COMP_BASE + 561)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_ROOT_SQUASH_ENABLED (GLUSTERD_COMP_BASE + 562)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_ROOT_SQUASH_FAILED (GLUSTERD_COMP_BASE + 563)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_LOCK_OWNER_MISMATCH (GLUSTERD_COMP_BASE + 564)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_LOCK_NOT_HELD (GLUSTERD_COMP_BASE + 565)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_LOCK_ALREADY_HELD (GLUSTERD_COMP_BASE + 566)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SVC_START_SUCCESS (GLUSTERD_COMP_BASE + 567)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SVC_STOP_SUCCESS (GLUSTERD_COMP_BASE + 568)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_PARAM_NULL (GLUSTERD_COMP_BASE + 569)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SVC_STOP_FAIL (GLUSTERD_COMP_BASE + 570)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define GD_MSG_SHARED_STORAGE_DOES_NOT_EXIST (GLUSTERD_COMP_BASE + 571)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define GD_MSG_SNAP_PAUSE_TIER_FAIL (GLUSTERD_COMP_BASE + 572)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SNAP_RESUME_TIER_FAIL (GLUSTERD_COMP_BASE + 573)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_FILE_NOT_FOUND (GLUSTERD_COMP_BASE + 574)
+
+/*!
+ * @messageid 106575
+ * @diagnosis Brick failed to start with given port, hence it gets a fresh port
+ * on its own and try to restart the brick with a new port
+ * @recommendedaction Ensure the new port is not blocked by firewall
+ */
+#define GD_MSG_RETRY_WITH_NEW_PORT (GLUSTERD_COMP_BASE + 575)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_REMOTE_VOL_UUID_FAIL (GLUSTERD_COMP_BASE + 576)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SLAVE_VOL_PARSE_FAIL (GLUSTERD_COMP_BASE + 577)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_DICT_GET_SUCCESS (GLUSTERD_COMP_BASE + 578)
+
+/*------------*/
+#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
+#endif /* !_GLUSTERD_MESSAGES_H_ */
+
diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c b/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c
new file mode 100644
index 00000000000..5b7f0fa3c25
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c
@@ -0,0 +1,1015 @@
+/*
+ Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+/* rpc related syncops */
+#include "rpc-clnt.h"
+#include "protocol-common.h"
+#include "xdr-generic.h"
+#include "glusterd1-xdr.h"
+#include "glusterd-syncop.h"
+
+#include "glusterd.h"
+#include "glusterd-utils.h"
+#include "glusterd-locks.h"
+#include "glusterd-mgmt.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-messages.h"
+
+static int
+glusterd_mgmt_v3_null (rpcsvc_request_t *req)
+{
+ return 0;
+}
+
+static int
+glusterd_mgmt_v3_lock_send_resp (rpcsvc_request_t *req, int32_t status,
+ uint32_t op_errno)
+{
+
+ gd1_mgmt_v3_lock_rsp rsp = {{0},};
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ rsp.op_ret = status;
+ if (rsp.op_ret)
+ rsp.op_errno = op_errno;
+
+ glusterd_get_uuid (&rsp.uuid);
+
+ ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp);
+
+ gf_msg_debug (this->name, 0,
+ "Responded to mgmt_v3 lock, ret: %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_synctasked_mgmt_v3_lock (rpcsvc_request_t *req,
+ gd1_mgmt_v3_lock_req *lock_req,
+ glusterd_op_lock_ctx_t *ctx)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ uint32_t op_errno = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+ GF_ASSERT (ctx);
+ GF_ASSERT (ctx->dict);
+
+ /* Trying to acquire multiple mgmt_v3 locks */
+ ret = glusterd_multiple_mgmt_v3_lock (ctx->dict, ctx->uuid, &op_errno);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_LOCK_GET_FAIL,
+ "Failed to acquire mgmt_v3 locks for %s",
+ uuid_utoa (ctx->uuid));
+
+ ret = glusterd_mgmt_v3_lock_send_resp (req, ret, op_errno);
+
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_op_state_machine_mgmt_v3_lock (rpcsvc_request_t *req,
+ gd1_mgmt_v3_lock_req *lock_req,
+ glusterd_op_lock_ctx_t *ctx)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_op_info_t txn_op_info = {{0},};
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ glusterd_txn_opinfo_init (&txn_op_info, NULL, &lock_req->op, ctx->dict,
+ req);
+
+ ret = glusterd_set_txn_opinfo (&lock_req->txn_id, &txn_op_info);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_OPINFO_SET_FAIL,
+ "Unable to set transaction's opinfo");
+ goto out;
+ }
+
+ ret = glusterd_op_sm_inject_event (GD_OP_EVENT_LOCK,
+ &lock_req->txn_id, ctx);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_OP_EVENT_LOCK_FAIL,
+ "Failed to inject event GD_OP_EVENT_LOCK");
+
+out:
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_handle_mgmt_v3_lock_fn (rpcsvc_request_t *req)
+{
+ gd1_mgmt_v3_lock_req lock_req = {{0},};
+ int32_t ret = -1;
+ glusterd_op_lock_ctx_t *ctx = NULL;
+ xlator_t *this = NULL;
+ gf_boolean_t is_synctasked = _gf_false;
+ gf_boolean_t free_ctx = _gf_false;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ ret = xdr_to_generic (req->msg[0], &lock_req,
+ (xdrproc_t)xdr_gd1_mgmt_v3_lock_req);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL, "Failed to decode lock "
+ "request received from peer");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ gf_msg_debug (this->name, 0, "Received mgmt_v3 lock req "
+ "from uuid: %s", uuid_utoa (lock_req.uuid));
+
+ if (glusterd_peerinfo_find_by_uuid (lock_req.uuid) == NULL) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_PEER_NOT_FOUND, "%s doesn't "
+ "belong to the cluster. Ignoring request.",
+ uuid_utoa (lock_req.uuid));
+ ret = -1;
+ goto out;
+ }
+
+ ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_op_lock_ctx_t);
+ if (!ctx) {
+ ret = -1;
+ goto out;
+ }
+
+ gf_uuid_copy (ctx->uuid, lock_req.uuid);
+ ctx->req = req;
+
+ ctx->dict = dict_new ();
+ if (!ctx->dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (lock_req.dict.dict_val,
+ lock_req.dict.dict_len, &ctx->dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to unserialize the dictionary");
+ goto out;
+ }
+
+ is_synctasked = dict_get_str_boolean (ctx->dict,
+ "is_synctasked", _gf_false);
+ if (is_synctasked) {
+ ret = glusterd_synctasked_mgmt_v3_lock (req, &lock_req, ctx);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_LOCK_GET_FAIL,
+ "Failed to acquire mgmt_v3_locks");
+ /* Ignore the return code, as it shouldn't be propagated
+ * from the handler function so as to avoid double
+ * deletion of the req
+ */
+ ret = 0;
+ }
+
+ /* The above function does not take ownership of ctx.
+ * Therefore we need to free the ctx explicitly. */
+ free_ctx = _gf_true;
+ }
+ else {
+ /* Shouldn't ignore the return code here, and it should
+ * be propagated from the handler function as in failure
+ * case it doesn't delete the req object
+ */
+ ret = glusterd_op_state_machine_mgmt_v3_lock (req, &lock_req,
+ ctx);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_LOCK_GET_FAIL,
+ "Failed to acquire mgmt_v3_locks");
+ }
+
+out:
+
+ if (ctx && (ret || free_ctx)) {
+ if (ctx->dict)
+ dict_unref (ctx->dict);
+
+ GF_FREE (ctx);
+ }
+
+ free (lock_req.dict.dict_val);
+
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_mgmt_v3_pre_validate_send_resp (rpcsvc_request_t *req,
+ int32_t op, int32_t status,
+ char *op_errstr, dict_t *rsp_dict,
+ uint32_t op_errno)
+{
+ gd1_mgmt_v3_pre_val_rsp rsp = {{0},};
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ rsp.op_ret = status;
+ glusterd_get_uuid (&rsp.uuid);
+ rsp.op = op;
+ rsp.op_errno = op_errno;
+ if (op_errstr)
+ rsp.op_errstr = op_errstr;
+ else
+ rsp.op_errstr = "";
+
+ ret = dict_allocate_and_serialize (rsp_dict, &rsp.dict.dict_val,
+ &rsp.dict.dict_len);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
+ "failed to get serialized length of dict");
+ goto out;
+ }
+
+ ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_v3_pre_val_rsp);
+
+ GF_FREE (rsp.dict.dict_val);
+out:
+ gf_msg_debug (this->name, 0,
+ "Responded to pre validation, ret: %d", ret);
+ return ret;
+}
+
+static int
+glusterd_handle_pre_validate_fn (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gd1_mgmt_v3_pre_val_req op_req = {{0},};
+ xlator_t *this = NULL;
+ char *op_errstr = NULL;
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
+ uint32_t op_errno = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ ret = xdr_to_generic (req->msg[0], &op_req,
+ (xdrproc_t)xdr_gd1_mgmt_v3_pre_val_req);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL,
+ "Failed to decode pre validation "
+ "request received from peer");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (glusterd_peerinfo_find_by_uuid (op_req.uuid) == NULL) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_PEER_NOT_FOUND, "%s doesn't "
+ "belong to the cluster. Ignoring request.",
+ uuid_utoa (op_req.uuid));
+ ret = -1;
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_unserialize (op_req.dict.dict_val,
+ op_req.dict.dict_len, &dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to unserialize the dictionary");
+ goto out;
+ }
+
+ rsp_dict = dict_new ();
+ if (!rsp_dict) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_CREATE_FAIL,
+ "Failed to get new dictionary");
+ return -1;
+ }
+
+ ret = gd_mgmt_v3_pre_validate_fn (op_req.op, dict, &op_errstr,
+ rsp_dict, &op_errno);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_PRE_VALIDATION_FAIL,
+ "Pre Validation failed on operation %s",
+ gd_op_list[op_req.op]);
+ }
+
+ ret = glusterd_mgmt_v3_pre_validate_send_resp (req, op_req.op,
+ ret, op_errstr,
+ rsp_dict, op_errno);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_OP_RESP_FAIL,
+ "Failed to send Pre Validation "
+ "response for operation %s",
+ gd_op_list[op_req.op]);
+ goto out;
+ }
+
+out:
+ if (op_errstr && (strcmp (op_errstr, "")))
+ GF_FREE (op_errstr);
+
+ free (op_req.dict.dict_val);
+
+ if (dict)
+ dict_unref (dict);
+
+ if (rsp_dict)
+ dict_unref (rsp_dict);
+
+ /* Return 0 from handler to avoid double deletion of req obj */
+ return 0;
+}
+
+static int
+glusterd_mgmt_v3_brick_op_send_resp (rpcsvc_request_t *req,
+ int32_t op, int32_t status,
+ char *op_errstr, dict_t *rsp_dict)
+{
+ gd1_mgmt_v3_brick_op_rsp rsp = {{0},};
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ rsp.op_ret = status;
+ glusterd_get_uuid (&rsp.uuid);
+ rsp.op = op;
+ if (op_errstr)
+ rsp.op_errstr = op_errstr;
+ else
+ rsp.op_errstr = "";
+
+ ret = dict_allocate_and_serialize (rsp_dict, &rsp.dict.dict_val,
+ &rsp.dict.dict_len);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
+ "failed to get serialized length of dict");
+ goto out;
+ }
+
+ ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_v3_brick_op_rsp);
+
+ GF_FREE (rsp.dict.dict_val);
+out:
+ gf_msg_debug (this->name, 0,
+ "Responded to brick op, ret: %d", ret);
+ return ret;
+}
+
+static int
+glusterd_handle_brick_op_fn (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gd1_mgmt_v3_brick_op_req op_req = {{0},};
+ xlator_t *this = NULL;
+ char *op_errstr = NULL;
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ ret = xdr_to_generic (req->msg[0], &op_req,
+ (xdrproc_t)xdr_gd1_mgmt_v3_brick_op_req);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL, "Failed to decode brick op "
+ "request received from peer");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (glusterd_peerinfo_find_by_uuid (op_req.uuid) == NULL) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_PEER_NOT_FOUND, "%s doesn't "
+ "belong to the cluster. Ignoring request.",
+ uuid_utoa (op_req.uuid));
+ ret = -1;
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_unserialize (op_req.dict.dict_val,
+ op_req.dict.dict_len, &dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to unserialize the dictionary");
+ goto out;
+ }
+
+ rsp_dict = dict_new ();
+ if (!rsp_dict) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_CREATE_FAIL,
+ "Failed to get new dictionary");
+ return -1;
+ }
+
+ ret = gd_mgmt_v3_brick_op_fn (op_req.op, dict, &op_errstr,
+ rsp_dict);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_OP_FAIL,
+ "Brick Op failed on operation %s",
+ gd_op_list[op_req.op]);
+ }
+
+ ret = glusterd_mgmt_v3_brick_op_send_resp (req, op_req.op,
+ ret, op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_PRE_VALD_RESP_FAIL,
+ "Failed to send brick op "
+ "response for operation %s",
+ gd_op_list[op_req.op]);
+ goto out;
+ }
+
+out:
+ if (op_errstr && (strcmp (op_errstr, "")))
+ GF_FREE (op_errstr);
+
+ free (op_req.dict.dict_val);
+
+ if (dict)
+ dict_unref (dict);
+
+ if (rsp_dict)
+ dict_unref (rsp_dict);
+
+ /* Return 0 from handler to avoid double deletion of req obj */
+ return 0;
+}
+
+static int
+glusterd_mgmt_v3_commit_send_resp (rpcsvc_request_t *req,
+ int32_t op, int32_t status,
+ char *op_errstr, uint32_t op_errno,
+ dict_t *rsp_dict)
+{
+ gd1_mgmt_v3_commit_rsp rsp = {{0},};
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ rsp.op_ret = status;
+ glusterd_get_uuid (&rsp.uuid);
+ rsp.op = op;
+ rsp.op_errno = op_errno;
+ if (op_errstr)
+ rsp.op_errstr = op_errstr;
+ else
+ rsp.op_errstr = "";
+
+ ret = dict_allocate_and_serialize (rsp_dict, &rsp.dict.dict_val,
+ &rsp.dict.dict_len);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
+ "failed to get serialized length of dict");
+ goto out;
+ }
+
+ ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_v3_commit_rsp);
+
+ GF_FREE (rsp.dict.dict_val);
+out:
+ gf_msg_debug (this->name, 0, "Responded to commit, ret: %d", ret);
+ return ret;
+}
+
+static int
+glusterd_handle_commit_fn (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gd1_mgmt_v3_commit_req op_req = {{0},};
+ xlator_t *this = NULL;
+ char *op_errstr = NULL;
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
+ uint32_t op_errno = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ ret = xdr_to_generic (req->msg[0], &op_req,
+ (xdrproc_t)xdr_gd1_mgmt_v3_commit_req);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL, "Failed to decode commit "
+ "request received from peer");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (glusterd_peerinfo_find_by_uuid (op_req.uuid) == NULL) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_PEER_NOT_FOUND, "%s doesn't "
+ "belong to the cluster. Ignoring request.",
+ uuid_utoa (op_req.uuid));
+ ret = -1;
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_unserialize (op_req.dict.dict_val,
+ op_req.dict.dict_len, &dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to unserialize the dictionary");
+ goto out;
+ }
+
+ rsp_dict = dict_new ();
+ if (!rsp_dict) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_CREATE_FAIL,
+ "Failed to get new dictionary");
+ return -1;
+ }
+
+ ret = gd_mgmt_v3_commit_fn (op_req.op, dict, &op_errstr,
+ &op_errno, rsp_dict);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_COMMIT_OP_FAIL,
+ "commit failed on operation %s",
+ gd_op_list[op_req.op]);
+ }
+
+ ret = glusterd_mgmt_v3_commit_send_resp (req, op_req.op,
+ ret, op_errstr,
+ op_errno, rsp_dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_OP_RESP_FAIL,
+ "Failed to send commit "
+ "response for operation %s",
+ gd_op_list[op_req.op]);
+ goto out;
+ }
+
+out:
+ if (op_errstr && (strcmp (op_errstr, "")))
+ GF_FREE (op_errstr);
+
+ free (op_req.dict.dict_val);
+
+ if (dict)
+ dict_unref (dict);
+
+ if (rsp_dict)
+ dict_unref (rsp_dict);
+
+ /* Return 0 from handler to avoid double deletion of req obj */
+ return 0;
+}
+
+static int
+glusterd_mgmt_v3_post_validate_send_resp (rpcsvc_request_t *req,
+ int32_t op, int32_t status,
+ char *op_errstr, dict_t *rsp_dict)
+{
+ gd1_mgmt_v3_post_val_rsp rsp = {{0},};
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ rsp.op_ret = status;
+ glusterd_get_uuid (&rsp.uuid);
+ rsp.op = op;
+ if (op_errstr)
+ rsp.op_errstr = op_errstr;
+ else
+ rsp.op_errstr = "";
+
+ ret = dict_allocate_and_serialize (rsp_dict, &rsp.dict.dict_val,
+ &rsp.dict.dict_len);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
+ "failed to get serialized length of dict");
+ goto out;
+ }
+
+ ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_v3_post_val_rsp);
+
+ GF_FREE (rsp.dict.dict_val);
+out:
+ gf_msg_debug (this->name, 0,
+ "Responded to post validation, ret: %d", ret);
+ return ret;
+}
+
+static int
+glusterd_handle_post_validate_fn (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gd1_mgmt_v3_post_val_req op_req = {{0},};
+ xlator_t *this = NULL;
+ char *op_errstr = NULL;
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ ret = xdr_to_generic (req->msg[0], &op_req,
+ (xdrproc_t)xdr_gd1_mgmt_v3_post_val_req);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL,
+ "Failed to decode post validation "
+ "request received from peer");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (glusterd_peerinfo_find_by_uuid (op_req.uuid) == NULL) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_PEER_NOT_FOUND, "%s doesn't "
+ "belong to the cluster. Ignoring request.",
+ uuid_utoa (op_req.uuid));
+ ret = -1;
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_unserialize (op_req.dict.dict_val,
+ op_req.dict.dict_len, &dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to unserialize the dictionary");
+ goto out;
+ }
+
+ rsp_dict = dict_new ();
+ if (!rsp_dict) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_CREATE_FAIL,
+ "Failed to get new dictionary");
+ return -1;
+ }
+
+ ret = gd_mgmt_v3_post_validate_fn (op_req.op, op_req.op_ret, dict,
+ &op_errstr, rsp_dict);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_POST_VALIDATION_FAIL,
+ "Post Validation failed on operation %s",
+ gd_op_list[op_req.op]);
+ }
+
+ ret = glusterd_mgmt_v3_post_validate_send_resp (req, op_req.op,
+ ret, op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_OP_RESP_FAIL,
+ "Failed to send Post Validation "
+ "response for operation %s",
+ gd_op_list[op_req.op]);
+ goto out;
+ }
+
+out:
+ if (op_errstr && (strcmp (op_errstr, "")))
+ GF_FREE (op_errstr);
+
+ free (op_req.dict.dict_val);
+
+ if (dict)
+ dict_unref (dict);
+
+ if (rsp_dict)
+ dict_unref (rsp_dict);
+
+ /* Return 0 from handler to avoid double deletion of req obj */
+ return 0;
+}
+
+static int
+glusterd_mgmt_v3_unlock_send_resp (rpcsvc_request_t *req, int32_t status)
+{
+
+ gd1_mgmt_v3_unlock_rsp rsp = {{0},};
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ rsp.op_ret = status;
+ if (rsp.op_ret)
+ rsp.op_errno = errno;
+
+ glusterd_get_uuid (&rsp.uuid);
+
+ ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp);
+
+ gf_msg_debug (this->name, 0,
+ "Responded to mgmt_v3 unlock, ret: %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_syctasked_mgmt_v3_unlock (rpcsvc_request_t *req,
+ gd1_mgmt_v3_unlock_req *unlock_req,
+ glusterd_op_lock_ctx_t *ctx)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+ GF_ASSERT (ctx);
+
+ /* Trying to release multiple mgmt_v3 locks */
+ ret = glusterd_multiple_mgmt_v3_unlock (ctx->dict, ctx->uuid);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_UNLOCK_FAIL,
+ "Failed to release mgmt_v3 locks for %s",
+ uuid_utoa(ctx->uuid));
+ }
+
+ ret = glusterd_mgmt_v3_unlock_send_resp (req, ret);
+
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+
+static int
+glusterd_op_state_machine_mgmt_v3_unlock (rpcsvc_request_t *req,
+ gd1_mgmt_v3_unlock_req *lock_req,
+ glusterd_op_lock_ctx_t *ctx)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ ret = glusterd_op_sm_inject_event (GD_OP_EVENT_UNLOCK,
+ &lock_req->txn_id, ctx);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_OP_EVENT_UNLOCK_FAIL,
+ "Failed to inject event GD_OP_EVENT_UNLOCK");
+
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_handle_mgmt_v3_unlock_fn (rpcsvc_request_t *req)
+{
+ gd1_mgmt_v3_unlock_req lock_req = {{0},};
+ int32_t ret = -1;
+ glusterd_op_lock_ctx_t *ctx = NULL;
+ xlator_t *this = NULL;
+ gf_boolean_t is_synctasked = _gf_false;
+ gf_boolean_t free_ctx = _gf_false;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ ret = xdr_to_generic (req->msg[0], &lock_req,
+ (xdrproc_t)xdr_gd1_mgmt_v3_unlock_req);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL, "Failed to decode unlock "
+ "request received from peer");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ gf_msg_debug (this->name, 0, "Received volume unlock req "
+ "from uuid: %s", uuid_utoa (lock_req.uuid));
+
+ if (glusterd_peerinfo_find_by_uuid (lock_req.uuid) == NULL) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_PEER_NOT_FOUND, "%s doesn't "
+ "belong to the cluster. Ignoring request.",
+ uuid_utoa (lock_req.uuid));
+ ret = -1;
+ goto out;
+ }
+
+ ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_op_lock_ctx_t);
+ if (!ctx) {
+ ret = -1;
+ goto out;
+ }
+
+ gf_uuid_copy (ctx->uuid, lock_req.uuid);
+ ctx->req = req;
+
+ ctx->dict = dict_new ();
+ if (!ctx->dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (lock_req.dict.dict_val,
+ lock_req.dict.dict_len, &ctx->dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to unserialize the dictionary");
+ goto out;
+ }
+
+ is_synctasked = dict_get_str_boolean (ctx->dict,
+ "is_synctasked", _gf_false);
+ if (is_synctasked) {
+ ret = glusterd_syctasked_mgmt_v3_unlock (req, &lock_req, ctx);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_UNLOCK_FAIL,
+ "Failed to release mgmt_v3_locks");
+ /* Ignore the return code, as it shouldn't be propagated
+ * from the handler function so as to avoid double
+ * deletion of the req
+ */
+ ret = 0;
+ }
+
+ /* The above function does not take ownership of ctx.
+ * Therefore we need to free the ctx explicitly. */
+ free_ctx = _gf_true;
+ }
+ else {
+ /* Shouldn't ignore the return code here, and it should
+ * be propagated from the handler function as in failure
+ * case it doesn't delete the req object
+ */
+ ret = glusterd_op_state_machine_mgmt_v3_unlock (req, &lock_req,
+ ctx);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_UNLOCK_FAIL,
+ "Failed to release mgmt_v3_locks");
+ }
+
+out:
+
+ if (ctx && (ret || free_ctx)) {
+ if (ctx->dict)
+ dict_unref (ctx->dict);
+
+ GF_FREE (ctx);
+ }
+
+ free (lock_req.dict.dict_val);
+
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_handle_mgmt_v3_lock (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ glusterd_handle_mgmt_v3_lock_fn);
+}
+
+static int
+glusterd_handle_pre_validate (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ glusterd_handle_pre_validate_fn);
+}
+
+static int
+glusterd_handle_brick_op (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ glusterd_handle_brick_op_fn);
+}
+
+static int
+glusterd_handle_commit (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ glusterd_handle_commit_fn);
+}
+
+static int
+glusterd_handle_post_validate (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ glusterd_handle_post_validate_fn);
+}
+
+int
+glusterd_handle_mgmt_v3_unlock (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ glusterd_handle_mgmt_v3_unlock_fn);
+}
+
+rpcsvc_actor_t gd_svc_mgmt_v3_actors[GLUSTERD_MGMT_V3_MAXVALUE] = {
+ [GLUSTERD_MGMT_V3_NULL] = { "NULL", GLUSTERD_MGMT_V3_NULL, glusterd_mgmt_v3_null, NULL, 0, DRC_NA},
+ [GLUSTERD_MGMT_V3_LOCK] = { "MGMT_V3_LOCK", GLUSTERD_MGMT_V3_LOCK, glusterd_handle_mgmt_v3_lock, NULL, 0, DRC_NA},
+ [GLUSTERD_MGMT_V3_PRE_VALIDATE] = { "PRE_VAL", GLUSTERD_MGMT_V3_PRE_VALIDATE, glusterd_handle_pre_validate, NULL, 0, DRC_NA},
+ [GLUSTERD_MGMT_V3_BRICK_OP] = { "BRCK_OP", GLUSTERD_MGMT_V3_BRICK_OP, glusterd_handle_brick_op, NULL, 0, DRC_NA},
+ [GLUSTERD_MGMT_V3_COMMIT] = { "COMMIT", GLUSTERD_MGMT_V3_COMMIT, glusterd_handle_commit, NULL, 0, DRC_NA},
+ [GLUSTERD_MGMT_V3_POST_VALIDATE] = { "POST_VAL", GLUSTERD_MGMT_V3_POST_VALIDATE, glusterd_handle_post_validate, NULL, 0, DRC_NA},
+ [GLUSTERD_MGMT_V3_UNLOCK] = { "MGMT_V3_UNLOCK", GLUSTERD_MGMT_V3_UNLOCK, glusterd_handle_mgmt_v3_unlock, NULL, 0, DRC_NA},
+};
+
+struct rpcsvc_program gd_svc_mgmt_v3_prog = {
+ .progname = "GlusterD svc mgmt v3",
+ .prognum = GD_MGMT_PROGRAM,
+ .progver = GD_MGMT_V3_VERSION,
+ .numactors = GLUSTERD_MGMT_V3_MAXVALUE,
+ .actors = gd_svc_mgmt_v3_actors,
+ .synctask = _gf_true,
+};
diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-mgmt.c
new file mode 100644
index 00000000000..092283a7daf
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.c
@@ -0,0 +1,2411 @@
+/*
+ Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+/* rpc related syncops */
+#include "rpc-clnt.h"
+#include "protocol-common.h"
+#include "xdr-generic.h"
+#include "glusterd1-xdr.h"
+#include "glusterd-syncop.h"
+
+#include "glusterd.h"
+#include "glusterd-utils.h"
+#include "glusterd-locks.h"
+#include "glusterd-mgmt.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-volgen.h"
+#include "glusterd-store.h"
+#include "glusterd-snapshot-utils.h"
+#include "glusterd-messages.h"
+#include "glusterd-errno.h"
+#include "glusterd-hooks.h"
+
+extern struct rpc_clnt_program gd_mgmt_v3_prog;
+
+
+void
+gd_mgmt_v3_collate_errors (struct syncargs *args, int op_ret, int op_errno,
+ char *op_errstr, int op_code, uuid_t peerid,
+ u_char *uuid)
+{
+ char *peer_str = NULL;
+ char err_str[PATH_MAX] = "Please check log file for details.";
+ char op_err[PATH_MAX] = "";
+ int32_t len = -1;
+ xlator_t *this = NULL;
+ int is_operrstr_blk = 0;
+ char *err_string = NULL;
+ char *cli_err_str = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (args);
+ GF_ASSERT (uuid);
+
+ if (op_ret) {
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ rcu_read_lock ();
+ peerinfo = glusterd_peerinfo_find (peerid, NULL);
+ if (peerinfo)
+ peer_str = gf_strdup (peerinfo->hostname);
+ else
+ peer_str = gf_strdup (uuid_utoa (uuid));
+
+ rcu_read_unlock ();
+
+ is_operrstr_blk = (op_errstr && strcmp (op_errstr, ""));
+ err_string = (is_operrstr_blk) ? op_errstr : err_str;
+
+ switch (op_code) {
+ case GLUSTERD_MGMT_V3_LOCK:
+ {
+ len = snprintf (op_err, sizeof(op_err),
+ "Locking failed "
+ "on %s. %s", peer_str,
+ err_string);
+ break;
+ }
+ case GLUSTERD_MGMT_V3_PRE_VALIDATE:
+ {
+ len = snprintf (op_err, sizeof(op_err),
+ "Pre Validation failed "
+ "on %s. %s", peer_str,
+ err_string);
+ break;
+ }
+ case GLUSTERD_MGMT_V3_BRICK_OP:
+ {
+ len = snprintf (op_err, sizeof(op_err),
+ "Brick ops failed "
+ "on %s. %s", peer_str,
+ err_string);
+ break;
+ }
+ case GLUSTERD_MGMT_V3_COMMIT:
+ {
+ len = snprintf (op_err, sizeof(op_err),
+ "Commit failed"
+ " on %s. %s", peer_str,
+ err_string);
+ break;
+ }
+ case GLUSTERD_MGMT_V3_POST_VALIDATE:
+ {
+ len = snprintf (op_err, sizeof(op_err),
+ "Post Validation failed "
+ "on %s. %s", peer_str,
+ err_string);
+ break;
+ }
+ case GLUSTERD_MGMT_V3_UNLOCK:
+ {
+ len = snprintf (op_err, sizeof(op_err),
+ "Unlocking failed "
+ "on %s. %s", peer_str,
+ err_string);
+ break;
+ }
+ default :
+ len = snprintf (op_err, sizeof(op_err),
+ "Unknown error! "
+ "on %s. %s", peer_str,
+ err_string);
+ }
+
+ if (args->errstr) {
+ len = snprintf (err_str, sizeof(err_str),
+ "%s\n%s", args->errstr,
+ op_err);
+ GF_FREE (args->errstr);
+ args->errstr = NULL;
+ } else
+ len = snprintf (err_str, sizeof(err_str),
+ "%s", op_err);
+
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_OP_FAIL, "%s", op_err);
+ args->errstr = gf_strdup (err_str);
+ }
+
+ GF_FREE (peer_str);
+
+ return;
+}
+
+int32_t
+gd_mgmt_v3_pre_validate_fn (glusterd_op_t op, dict_t *dict,
+ char **op_errstr, dict_t *rsp_dict,
+ uint32_t *op_errno)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+ GF_ASSERT (rsp_dict);
+ GF_VALIDATE_OR_GOTO (this->name, op_errno, out);
+
+ switch (op) {
+ case GD_OP_SNAP:
+ ret = glusterd_snapshot_prevalidate (dict, op_errstr,
+ rsp_dict, op_errno);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_PRE_VALIDATION_FAIL,
+ "Snapshot Prevalidate Failed");
+ goto out;
+ }
+
+ break;
+
+ case GD_OP_REPLACE_BRICK:
+ ret = glusterd_op_stage_replace_brick (dict, op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_PRE_VALIDATION_FAIL,
+ "Replace-brick prevalidation failed.");
+ goto out;
+ }
+ break;
+ case GD_OP_ADD_BRICK:
+ ret = glusterd_op_stage_add_brick (dict, op_errstr, rsp_dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_PRE_VALIDATION_FAIL,
+ "ADD-brick prevalidation failed.");
+ goto out;
+ }
+ break;
+ case GD_OP_START_VOLUME:
+ ret = glusterd_op_stage_start_volume (dict, op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_PRE_VALIDATION_FAIL,
+ "Volume start prevalidation failed.");
+ goto out;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ ret = 0;
+out:
+ gf_msg_debug (this->name, 0, "OP = %d. Returning %d", op, ret);
+ return ret;
+}
+
+int32_t
+gd_mgmt_v3_brick_op_fn (glusterd_op_t op, dict_t *dict,
+ char **op_errstr, dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+ GF_ASSERT (rsp_dict);
+
+ switch (op) {
+ case GD_OP_SNAP:
+ {
+ ret = glusterd_snapshot_brickop (dict, op_errstr, rsp_dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_BRICK_OP_FAIL,
+ "snapshot brickop failed");
+ goto out;
+ }
+ break;
+ }
+ default:
+ break;
+ }
+
+ ret = 0;
+out:
+ gf_msg_trace (this->name, 0, "OP = %d. Returning %d", op, ret);
+ return ret;
+}
+
+int32_t
+gd_mgmt_v3_commit_fn (glusterd_op_t op, dict_t *dict,
+ char **op_errstr, uint32_t *op_errno,
+ dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+ GF_VALIDATE_OR_GOTO (this->name, op_errno, out);
+ GF_ASSERT (rsp_dict);
+
+ glusterd_op_commit_hook (op, dict, GD_COMMIT_HOOK_PRE);
+ switch (op) {
+ case GD_OP_SNAP:
+ {
+ ret = glusterd_snapshot (dict, op_errstr,
+ op_errno, rsp_dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_COMMIT_OP_FAIL,
+ "Snapshot Commit Failed");
+ goto out;
+ }
+ break;
+ }
+ case GD_OP_REPLACE_BRICK:
+ {
+ ret = glusterd_op_replace_brick (dict, rsp_dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_COMMIT_OP_FAIL,
+ "Replace-brick commit failed.");
+ goto out;
+ }
+ break;
+ }
+ case GD_OP_ADD_BRICK:
+ {
+ ret = glusterd_op_add_brick (dict, op_errstr);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_COMMIT_OP_FAIL,
+ "Add-brick commit failed.");
+ goto out;
+ }
+ break;
+
+ }
+ case GD_OP_START_VOLUME:
+ {
+ ret = glusterd_op_start_volume (dict, op_errstr);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_COMMIT_OP_FAIL,
+ "Volume start commit failed.");
+ goto out;
+ }
+ break;
+
+ }
+
+ default:
+ break;
+ }
+
+ ret = 0;
+out:
+ gf_msg_debug (this->name, 0, "OP = %d. Returning %d", op, ret);
+ return ret;
+}
+
+int32_t
+gd_mgmt_v3_post_validate_fn (glusterd_op_t op, int32_t op_ret, dict_t *dict,
+ char **op_errstr, dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+ GF_ASSERT (rsp_dict);
+
+ if (op_ret == 0)
+ glusterd_op_commit_hook (op, dict, GD_COMMIT_HOOK_POST);
+
+ switch (op) {
+ case GD_OP_SNAP:
+ {
+ ret = glusterd_snapshot_postvalidate (dict, op_ret,
+ op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_POST_VALIDATION_FAIL,
+ "postvalidate operation failed");
+ goto out;
+ }
+ break;
+ }
+ case GD_OP_ADD_BRICK:
+ {
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get"
+ " volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, EINVAL,
+ GD_MSG_VOL_NOT_FOUND, "Unable to "
+ "allocate memory");
+ goto out;
+ }
+ ret = glusterd_create_volfiles_and_notify_services (
+ volinfo);
+ if (ret)
+ goto out;
+ ret = glusterd_store_volinfo (volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret)
+ goto out;
+ break;
+
+ }
+ case GD_OP_START_VOLUME:
+ {
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get"
+ " volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, EINVAL,
+ GD_MSG_VOL_NOT_FOUND, "Unable to "
+ "allocate memory");
+ goto out;
+ }
+
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
+ if (volinfo->rebal.op != GD_OP_REMOVE_BRICK) {
+ glusterd_defrag_info_set (volinfo, dict,
+ GF_DEFRAG_CMD_START_TIER,
+ GF_DEFRAG_CMD_START,
+ GD_OP_REBALANCE);
+ }
+ glusterd_restart_rebalance_for_volume (volinfo);
+ }
+ break;
+ }
+
+ default:
+ break;
+ }
+
+ ret = 0;
+
+out:
+ gf_msg_trace (this->name, 0, "OP = %d. Returning %d", op, ret);
+ return ret;
+}
+
+int32_t
+gd_mgmt_v3_lock_cbk_fn (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int32_t ret = -1;
+ struct syncargs *args = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ gd1_mgmt_v3_lock_rsp rsp = {{0},};
+ call_frame_t *frame = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = -1;
+ xlator_t *this = NULL;
+ uuid_t *peerid = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+ GF_ASSERT (myframe);
+
+ /* Even though the lock command has failed, while collating the errors
+ (gd_mgmt_v3_collate_errors), args->op_ret and args->op_errno will be
+ used. @args is obtained from frame->local. So before checking the
+ status of the request and going out if its a failure, args should be
+ set to frame->local. Otherwise, while collating args will be NULL.
+ This applies to other phases such as prevalidate, brickop, commit and
+ postvalidate also.
+ */
+ frame = myframe;
+ args = frame->local;
+ peerid = frame->cookie;
+ frame->local = NULL;
+ frame->cookie = NULL;
+
+ if (-1 == req->rpc_status) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, iov, out, op_errno,
+ EINVAL);
+
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp);
+ if (ret < 0)
+ goto out;
+
+ gf_uuid_copy (args->uuid, rsp.uuid);
+
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+
+out:
+ gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL,
+ GLUSTERD_MGMT_V3_LOCK, *peerid, rsp.uuid);
+ GF_FREE (peerid);
+
+ if (rsp.dict.dict_val)
+ free (rsp.dict.dict_val);
+ /* req->rpc_status set to -1 means, STACK_DESTROY will be called from
+ * the caller function.
+ */
+ if (req->rpc_status != -1)
+ STACK_DESTROY (frame->root);
+ synctask_barrier_wake(args);
+ return 0;
+}
+
+int32_t
+gd_mgmt_v3_lock_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ gd_mgmt_v3_lock_cbk_fn);
+}
+
+int
+gd_mgmt_v3_lock (glusterd_op_t op, dict_t *op_ctx,
+ glusterd_peerinfo_t *peerinfo,
+ struct syncargs *args, uuid_t my_uuid,
+ uuid_t recv_uuid)
+{
+ gd1_mgmt_v3_lock_req req = {{0},};
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ uuid_t *peerid = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (op_ctx);
+ GF_ASSERT (peerinfo);
+ GF_ASSERT (args);
+
+ ret = dict_allocate_and_serialize (op_ctx,
+ &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret)
+ goto out;
+
+ gf_uuid_copy (req.uuid, my_uuid);
+ req.op = op;
+
+ GD_ALLOC_COPY_UUID (peerid, peerinfo->uuid, ret);
+ if (ret)
+ goto out;
+
+ ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerid,
+ &gd_mgmt_v3_prog,
+ GLUSTERD_MGMT_V3_LOCK,
+ gd_mgmt_v3_lock_cbk,
+ (xdrproc_t) xdr_gd1_mgmt_v3_lock_req);
+out:
+ GF_FREE (req.dict.dict_val);
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_mgmt_v3_initiate_lockdown (glusterd_op_t op, dict_t *dict,
+ char **op_errstr, uint32_t *op_errno,
+ gf_boolean_t *is_acquired,
+ uint32_t txn_generation)
+{
+ char *volname = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ int32_t ret = -1;
+ int32_t peer_cnt = 0;
+ struct syncargs args = {0};
+ uuid_t peer_uuid = {0};
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+ GF_ASSERT (is_acquired);
+
+ /* Trying to acquire multiple mgmt_v3 locks on local node */
+ ret = glusterd_multiple_mgmt_v3_lock (dict, MY_UUID, op_errno);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_LOCK_GET_FAIL,
+ "Failed to acquire mgmt_v3 locks on localhost");
+ goto out;
+ }
+
+ *is_acquired = _gf_true;
+
+ /* Sending mgmt_v3 lock req to other nodes in the cluster */
+ gd_syncargs_init (&args, NULL);
+ synctask_barrier_init((&args));
+ peer_cnt = 0;
+
+ rcu_read_lock ();
+ cds_list_for_each_entry_rcu (peerinfo, &conf->peers, uuid_list) {
+ /* Only send requests to peers who were available before the
+ * transaction started
+ */
+ if (peerinfo->generation > txn_generation)
+ continue;
+
+ if (!peerinfo->connected)
+ continue;
+ if (op != GD_OP_SYNC_VOLUME &&
+ peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED)
+ continue;
+
+ gd_mgmt_v3_lock (op, dict, peerinfo, &args,
+ MY_UUID, peer_uuid);
+ peer_cnt++;
+ }
+ rcu_read_unlock ();
+
+ if (0 == peer_cnt) {
+ ret = 0;
+ goto out;
+ }
+
+ gd_synctask_barrier_wait((&args), peer_cnt);
+
+ if (args.errstr)
+ *op_errstr = gf_strdup (args.errstr);
+
+ ret = args.op_ret;
+ *op_errno = args.op_errno;
+
+ gf_msg_debug (this->name, 0, "Sent lock op req for %s "
+ "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret);
+out:
+ if (ret) {
+ if (*op_errstr)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_LOCK_GET_FAIL, "%s",
+ *op_errstr);
+
+ if (volname)
+ ret = gf_asprintf (op_errstr,
+ "Another transaction is in progress "
+ "for %s. Please try again after "
+ "sometime.", volname);
+ else
+ ret = gf_asprintf (op_errstr,
+ "Another transaction is in progress "
+ "Please try again after sometime.");
+
+ if (ret == -1)
+ *op_errstr = NULL;
+
+ ret = -1;
+ }
+
+ return ret;
+}
+
+int
+glusterd_pre_validate_aggr_rsp_dict (glusterd_op_t op,
+ dict_t *aggr, dict_t *rsp)
+{
+ int32_t ret = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (aggr);
+ GF_ASSERT (rsp);
+
+ switch (op) {
+ case GD_OP_SNAP:
+ ret = glusterd_snap_pre_validate_use_rsp_dict (aggr, rsp);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_PRE_VALIDATION_FAIL,
+ "Failed to aggregate prevalidate "
+ "response dictionaries.");
+ goto out;
+ }
+ break;
+ case GD_OP_REPLACE_BRICK:
+ ret = glusterd_rb_use_rsp_dict (aggr, rsp);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_PRE_VALIDATION_FAIL,
+ "Failed to aggregate prevalidate "
+ "response dictionaries.");
+ goto out;
+ }
+ break;
+ case GD_OP_START_VOLUME:
+ case GD_OP_ADD_BRICK:
+ ret = glusterd_aggr_brick_mount_dirs (aggr, rsp);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_MOUNDIRS_AGGR_FAIL, "Failed to "
+ "aggregate brick mount dirs");
+ goto out;
+ }
+ break;
+ default:
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "Invalid op (%s)",
+ gd_op_list[op]);
+
+ break;
+ }
+out:
+ return ret;
+}
+
+int32_t
+gd_mgmt_v3_pre_validate_cbk_fn (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int32_t ret = -1;
+ struct syncargs *args = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ gd1_mgmt_v3_pre_val_rsp rsp = {{0},};
+ call_frame_t *frame = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = -1;
+ dict_t *rsp_dict = NULL;
+ xlator_t *this = NULL;
+ uuid_t *peerid = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+ GF_ASSERT (myframe);
+
+ frame = myframe;
+ args = frame->local;
+ peerid = frame->cookie;
+ frame->local = NULL;
+ frame->cookie = NULL;
+
+ if (-1 == req->rpc_status) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, iov, out, op_errno,
+ EINVAL);
+
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gd1_mgmt_v3_pre_val_rsp);
+ if (ret < 0)
+ goto out;
+
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ rsp_dict = dict_new ();
+
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &rsp_dict);
+ if (ret < 0) {
+ free (rsp.dict.dict_val);
+ goto out;
+ } else {
+ rsp_dict->extra_stdfree = rsp.dict.dict_val;
+ }
+ }
+
+ gf_uuid_copy (args->uuid, rsp.uuid);
+ pthread_mutex_lock (&args->lock_dict);
+ {
+ ret = glusterd_pre_validate_aggr_rsp_dict (rsp.op, args->dict,
+ rsp_dict);
+ }
+ pthread_mutex_unlock (&args->lock_dict);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RESP_AGGR_FAIL, "%s",
+ "Failed to aggregate response from "
+ " node/brick");
+ if (!rsp.op_ret)
+ op_ret = ret;
+ else {
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+ }
+ } else {
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+ }
+
+out:
+ if (rsp_dict)
+ dict_unref (rsp_dict);
+
+ gd_mgmt_v3_collate_errors (args, op_ret, op_errno, rsp.op_errstr,
+ GLUSTERD_MGMT_V3_PRE_VALIDATE,
+ *peerid, rsp.uuid);
+
+ if (rsp.op_errstr)
+ free (rsp.op_errstr);
+ GF_FREE (peerid);
+ /* req->rpc_status set to -1 means, STACK_DESTROY will be called from
+ * the caller function.
+ */
+ if (req->rpc_status != -1)
+ STACK_DESTROY (frame->root);
+ synctask_barrier_wake(args);
+ return 0;
+}
+
+int32_t
+gd_mgmt_v3_pre_validate_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ gd_mgmt_v3_pre_validate_cbk_fn);
+}
+
+int
+gd_mgmt_v3_pre_validate_req (glusterd_op_t op, dict_t *op_ctx,
+ glusterd_peerinfo_t *peerinfo,
+ struct syncargs *args, uuid_t my_uuid,
+ uuid_t recv_uuid)
+{
+ int32_t ret = -1;
+ gd1_mgmt_v3_pre_val_req req = {{0},};
+ xlator_t *this = NULL;
+ uuid_t *peerid = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (op_ctx);
+ GF_ASSERT (peerinfo);
+ GF_ASSERT (args);
+
+ ret = dict_allocate_and_serialize (op_ctx,
+ &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret)
+ goto out;
+
+ gf_uuid_copy (req.uuid, my_uuid);
+ req.op = op;
+
+ GD_ALLOC_COPY_UUID (peerid, peerinfo->uuid, ret);
+ if (ret)
+ goto out;
+
+ ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerid,
+ &gd_mgmt_v3_prog,
+ GLUSTERD_MGMT_V3_PRE_VALIDATE,
+ gd_mgmt_v3_pre_validate_cbk,
+ (xdrproc_t) xdr_gd1_mgmt_v3_pre_val_req);
+out:
+ GF_FREE (req.dict.dict_val);
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_mgmt_v3_pre_validate (glusterd_op_t op, dict_t *req_dict,
+ char **op_errstr, uint32_t *op_errno,
+ uint32_t txn_generation)
+{
+ int32_t ret = -1;
+ int32_t peer_cnt = 0;
+ dict_t *rsp_dict = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ struct syncargs args = {0};
+ uuid_t peer_uuid = {0};
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ GF_ASSERT (req_dict);
+ GF_ASSERT (op_errstr);
+ GF_VALIDATE_OR_GOTO (this->name, op_errno, out);
+
+ rsp_dict = dict_new ();
+ if (!rsp_dict) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_CREATE_FAIL,
+ "Failed to create response dictionary");
+ goto out;
+ }
+
+ /* Pre Validation on local node */
+ ret = gd_mgmt_v3_pre_validate_fn (op, req_dict, op_errstr,
+ rsp_dict, op_errno);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_PRE_VALIDATION_FAIL,
+ "Pre Validation failed for "
+ "operation %s on local node",
+ gd_op_list[op]);
+
+ if (*op_errstr == NULL) {
+ ret = gf_asprintf (op_errstr,
+ "Pre-validation failed "
+ "on localhost. Please "
+ "check log file for details");
+ if (ret == -1)
+ *op_errstr = NULL;
+
+ ret = -1;
+ }
+ goto out;
+ }
+
+ ret = glusterd_pre_validate_aggr_rsp_dict (op, req_dict,
+ rsp_dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_PRE_VALIDATION_FAIL, "%s",
+ "Failed to aggregate response from "
+ " node/brick");
+ goto out;
+ }
+
+ dict_unref (rsp_dict);
+ rsp_dict = NULL;
+
+ /* Sending Pre Validation req to other nodes in the cluster */
+ gd_syncargs_init (&args, req_dict);
+ synctask_barrier_init((&args));
+ peer_cnt = 0;
+
+ rcu_read_lock ();
+ cds_list_for_each_entry_rcu (peerinfo, &conf->peers, uuid_list) {
+ /* Only send requests to peers who were available before the
+ * transaction started
+ */
+ if (peerinfo->generation > txn_generation)
+ continue;
+
+ if (!peerinfo->connected)
+ continue;
+ if (op != GD_OP_SYNC_VOLUME &&
+ peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED)
+ continue;
+
+ gd_mgmt_v3_pre_validate_req (op, req_dict, peerinfo, &args,
+ MY_UUID, peer_uuid);
+ peer_cnt++;
+ }
+ rcu_read_unlock ();
+
+ if (0 == peer_cnt) {
+ ret = 0;
+ goto out;
+ }
+
+ gd_synctask_barrier_wait((&args), peer_cnt);
+
+ if (args.op_ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_PRE_VALIDATION_FAIL,
+ "Pre Validation failed on peers");
+
+ if (args.errstr)
+ *op_errstr = gf_strdup (args.errstr);
+ }
+
+ ret = args.op_ret;
+ *op_errno = args.op_errno;
+
+ gf_msg_debug (this->name, 0, "Sent pre valaidation req for %s "
+ "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret);
+out:
+ return ret;
+}
+
+int
+glusterd_mgmt_v3_build_payload (dict_t **req, char **op_errstr, dict_t *dict,
+ glusterd_op_t op)
+{
+ int32_t ret = -1;
+ dict_t *req_dict = NULL;
+ xlator_t *this = NULL;
+ char *volname = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+ GF_ASSERT (op_errstr);
+ GF_ASSERT (dict);
+
+ req_dict = dict_new ();
+ if (!req_dict)
+ goto out;
+
+ switch (op) {
+ case GD_OP_SNAP:
+ dict_copy (dict, req_dict);
+ break;
+ case GD_OP_START_VOLUME:
+ case GD_OP_ADD_BRICK:
+ case GD_OP_REPLACE_BRICK:
+ {
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_CRITICAL, errno,
+ GD_MSG_DICT_GET_FAILED,
+ "volname is not present in "
+ "operation ctx");
+ goto out;
+ }
+
+ if (strcasecmp (volname, "all")) {
+ ret = glusterd_dict_set_volid (dict,
+ volname,
+ op_errstr);
+ if (ret)
+ goto out;
+ }
+ dict_copy (dict, req_dict);
+ }
+ break;
+ default:
+ break;
+ }
+
+ *req = req_dict;
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+gd_mgmt_v3_brick_op_cbk_fn (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int32_t ret = -1;
+ struct syncargs *args = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ gd1_mgmt_v3_brick_op_rsp rsp = {{0},};
+ call_frame_t *frame = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = -1;
+ xlator_t *this = NULL;
+ uuid_t *peerid = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+ GF_ASSERT (myframe);
+
+ frame = myframe;
+ args = frame->local;
+ peerid = frame->cookie;
+ frame->local = NULL;
+ frame->cookie = NULL;
+
+ /* If the operation failed, then iov can be NULL. So better check the
+ status of the operation and then worry about iov (if the status of
+ the command is success)
+ */
+ if (-1 == req->rpc_status) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, iov, out, op_errno,
+ EINVAL);
+
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gd1_mgmt_v3_brick_op_rsp);
+ if (ret < 0)
+ goto out;
+
+ gf_uuid_copy (args->uuid, rsp.uuid);
+
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+
+out:
+ gd_mgmt_v3_collate_errors (args, op_ret, op_errno, rsp.op_errstr,
+ GLUSTERD_MGMT_V3_BRICK_OP, *peerid,
+ rsp.uuid);
+
+ if (rsp.op_errstr)
+ free (rsp.op_errstr);
+
+ if (rsp.dict.dict_val)
+ free (rsp.dict.dict_val);
+ GF_FREE (peerid);
+ /* req->rpc_status set to -1 means, STACK_DESTROY will be called from
+ * the caller function.
+ */
+ if (req->rpc_status != -1)
+ STACK_DESTROY (frame->root);
+ synctask_barrier_wake(args);
+ return 0;
+}
+
+int32_t
+gd_mgmt_v3_brick_op_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ gd_mgmt_v3_brick_op_cbk_fn);
+}
+
+int
+gd_mgmt_v3_brick_op_req (glusterd_op_t op, dict_t *op_ctx,
+ glusterd_peerinfo_t *peerinfo,
+ struct syncargs *args, uuid_t my_uuid,
+ uuid_t recv_uuid)
+{
+ int32_t ret = -1;
+ gd1_mgmt_v3_brick_op_req req = {{0},};
+ xlator_t *this = NULL;
+ uuid_t *peerid = {0,};
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (op_ctx);
+ GF_ASSERT (peerinfo);
+ GF_ASSERT (args);
+
+ ret = dict_allocate_and_serialize (op_ctx,
+ &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret)
+ goto out;
+
+ gf_uuid_copy (req.uuid, my_uuid);
+ req.op = op;
+
+ GD_ALLOC_COPY_UUID (peerid, peerinfo->uuid, ret);
+ if (ret)
+ goto out;
+
+ ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerid,
+ &gd_mgmt_v3_prog,
+ GLUSTERD_MGMT_V3_BRICK_OP,
+ gd_mgmt_v3_brick_op_cbk,
+ (xdrproc_t) xdr_gd1_mgmt_v3_brick_op_req);
+out:
+ GF_FREE (req.dict.dict_val);
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_mgmt_v3_brick_op (glusterd_op_t op, dict_t *req_dict, char **op_errstr,
+ uint32_t txn_generation)
+{
+ int32_t ret = -1;
+ int32_t peer_cnt = 0;
+ dict_t *rsp_dict = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ struct syncargs args = {0};
+ uuid_t peer_uuid = {0};
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ GF_ASSERT (req_dict);
+ GF_ASSERT (op_errstr);
+
+ rsp_dict = dict_new ();
+ if (!rsp_dict) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_CREATE_FAIL,
+ "Failed to create response dictionary");
+ goto out;
+ }
+
+ /* Perform brick op on local node */
+ ret = gd_mgmt_v3_brick_op_fn (op, req_dict, op_errstr,
+ rsp_dict);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_OP_FAIL,
+ "Brick ops failed for "
+ "operation %s on local node",
+ gd_op_list[op]);
+
+ if (*op_errstr == NULL) {
+ ret = gf_asprintf (op_errstr,
+ "Brick ops failed "
+ "on localhost. Please "
+ "check log file for details");
+ if (ret == -1)
+ *op_errstr = NULL;
+
+ ret = -1;
+ }
+ goto out;
+ }
+
+ dict_unref (rsp_dict);
+ rsp_dict = NULL;
+
+ /* Sending brick op req to other nodes in the cluster */
+ gd_syncargs_init (&args, NULL);
+ synctask_barrier_init((&args));
+ peer_cnt = 0;
+
+ rcu_read_lock ();
+ cds_list_for_each_entry_rcu (peerinfo, &conf->peers, uuid_list) {
+ /* Only send requests to peers who were available before the
+ * transaction started
+ */
+ if (peerinfo->generation > txn_generation)
+ continue;
+
+ if (!peerinfo->connected)
+ continue;
+ if (op != GD_OP_SYNC_VOLUME &&
+ peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED)
+ continue;
+
+ gd_mgmt_v3_brick_op_req (op, req_dict, peerinfo, &args,
+ MY_UUID, peer_uuid);
+ peer_cnt++;
+ }
+ rcu_read_unlock ();
+
+ if (0 == peer_cnt) {
+ ret = 0;
+ goto out;
+ }
+
+ gd_synctask_barrier_wait((&args), peer_cnt);
+
+ if (args.op_ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_OP_FAIL,
+ "Brick ops failed on peers");
+
+ if (args.errstr)
+ *op_errstr = gf_strdup (args.errstr);
+ }
+
+ ret = args.op_ret;
+
+ gf_msg_debug (this->name, 0, "Sent brick op req for %s "
+ "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret);
+out:
+ return ret;
+}
+
+int32_t
+gd_mgmt_v3_commit_cbk_fn (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int32_t ret = -1;
+ struct syncargs *args = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ gd1_mgmt_v3_commit_rsp rsp = {{0},};
+ call_frame_t *frame = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = -1;
+ dict_t *rsp_dict = NULL;
+ xlator_t *this = NULL;
+ uuid_t *peerid = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+ GF_ASSERT (myframe);
+
+ frame = myframe;
+ args = frame->local;
+ peerid = frame->cookie;
+ frame->local = NULL;
+ frame->cookie = NULL;
+
+ if (-1 == req->rpc_status) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, iov, out, op_errno,
+ EINVAL);
+
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gd1_mgmt_v3_commit_rsp);
+ if (ret < 0)
+ goto out;
+
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ rsp_dict = dict_new ();
+
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &rsp_dict);
+ if (ret < 0) {
+ free (rsp.dict.dict_val);
+ goto out;
+ } else {
+ rsp_dict->extra_stdfree = rsp.dict.dict_val;
+ }
+ }
+
+ gf_uuid_copy (args->uuid, rsp.uuid);
+ pthread_mutex_lock (&args->lock_dict);
+ {
+ ret = glusterd_syncop_aggr_rsp_dict (rsp.op, args->dict,
+ rsp_dict);
+ }
+ pthread_mutex_unlock (&args->lock_dict);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RESP_AGGR_FAIL, "%s",
+ "Failed to aggregate response from "
+ " node/brick");
+ if (!rsp.op_ret)
+ op_ret = ret;
+ else {
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+ }
+ } else {
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+ }
+
+out:
+ if (rsp_dict)
+ dict_unref (rsp_dict);
+
+ gd_mgmt_v3_collate_errors (args, op_ret, op_errno, rsp.op_errstr,
+ GLUSTERD_MGMT_V3_COMMIT, *peerid, rsp.uuid);
+ GF_FREE (peerid);
+ /* req->rpc_status set to -1 means, STACK_DESTROY will be called from
+ * the caller function.
+ */
+ if (req->rpc_status != -1)
+ STACK_DESTROY (frame->root);
+ synctask_barrier_wake(args);
+ return 0;
+}
+
+int32_t
+gd_mgmt_v3_commit_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ gd_mgmt_v3_commit_cbk_fn);
+}
+
+int
+gd_mgmt_v3_commit_req (glusterd_op_t op, dict_t *op_ctx,
+ glusterd_peerinfo_t *peerinfo,
+ struct syncargs *args, uuid_t my_uuid,
+ uuid_t recv_uuid)
+{
+ int32_t ret = -1;
+ gd1_mgmt_v3_commit_req req = {{0},};
+ xlator_t *this = NULL;
+ uuid_t *peerid = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (op_ctx);
+ GF_ASSERT (peerinfo);
+ GF_ASSERT (args);
+
+ ret = dict_allocate_and_serialize (op_ctx,
+ &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret)
+ goto out;
+
+ gf_uuid_copy (req.uuid, my_uuid);
+ req.op = op;
+
+ GD_ALLOC_COPY_UUID (peerid, peerinfo->uuid, ret);
+ if (ret)
+ goto out;
+
+ ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerid,
+ &gd_mgmt_v3_prog,
+ GLUSTERD_MGMT_V3_COMMIT,
+ gd_mgmt_v3_commit_cbk,
+ (xdrproc_t) xdr_gd1_mgmt_v3_commit_req);
+out:
+ GF_FREE (req.dict.dict_val);
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_mgmt_v3_commit (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
+ char **op_errstr, uint32_t *op_errno,
+ uint32_t txn_generation)
+{
+ int32_t ret = -1;
+ int32_t peer_cnt = 0;
+ dict_t *rsp_dict = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ struct syncargs args = {0};
+ uuid_t peer_uuid = {0};
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ GF_ASSERT (op_ctx);
+ GF_ASSERT (req_dict);
+ GF_ASSERT (op_errstr);
+ GF_VALIDATE_OR_GOTO (this->name, op_errno, out);
+
+ rsp_dict = dict_new ();
+ if (!rsp_dict) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_CREATE_FAIL,
+ "Failed to create response dictionary");
+ goto out;
+ }
+
+ /* Commit on local node */
+ ret = gd_mgmt_v3_commit_fn (op, req_dict, op_errstr,
+ op_errno, rsp_dict);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_COMMIT_OP_FAIL,
+ "Commit failed for "
+ "operation %s on local node",
+ gd_op_list[op]);
+
+ if (*op_errstr == NULL) {
+ ret = gf_asprintf (op_errstr,
+ "Commit failed "
+ "on localhost. Please "
+ "check log file for details.");
+ if (ret == -1)
+ *op_errstr = NULL;
+
+ ret = -1;
+ }
+ goto out;
+ }
+
+ ret = glusterd_syncop_aggr_rsp_dict (op, op_ctx,
+ rsp_dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RESP_AGGR_FAIL, "%s",
+ "Failed to aggregate response from "
+ " node/brick");
+ goto out;
+ }
+
+ dict_unref (rsp_dict);
+ rsp_dict = NULL;
+
+ /* Sending commit req to other nodes in the cluster */
+ gd_syncargs_init (&args, op_ctx);
+ synctask_barrier_init((&args));
+ peer_cnt = 0;
+
+ rcu_read_lock ();
+ cds_list_for_each_entry_rcu (peerinfo, &conf->peers, uuid_list) {
+ /* Only send requests to peers who were available before the
+ * transaction started
+ */
+ if (peerinfo->generation > txn_generation)
+ continue;
+
+ if (!peerinfo->connected)
+ continue;
+ if (op != GD_OP_SYNC_VOLUME &&
+ peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED)
+ continue;
+
+ gd_mgmt_v3_commit_req (op, req_dict, peerinfo, &args,
+ MY_UUID, peer_uuid);
+ peer_cnt++;
+ }
+ rcu_read_unlock ();
+
+ if (0 == peer_cnt) {
+ ret = 0;
+ goto out;
+ }
+
+ gd_synctask_barrier_wait((&args), peer_cnt);
+
+ if (args.op_ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_COMMIT_OP_FAIL,
+ "Commit failed on peers");
+
+ if (args.errstr)
+ *op_errstr = gf_strdup (args.errstr);
+ }
+
+ ret = args.op_ret;
+ *op_errno = args.op_errno;
+
+ gf_msg_debug (this->name, 0, "Sent commit req for %s to %d "
+ "peers. Returning %d", gd_op_list[op], peer_cnt, ret);
+out:
+ return ret;
+}
+
+int32_t
+gd_mgmt_v3_post_validate_cbk_fn (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int32_t ret = -1;
+ struct syncargs *args = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ gd1_mgmt_v3_post_val_rsp rsp = {{0},};
+ call_frame_t *frame = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = -1;
+ xlator_t *this = NULL;
+ uuid_t *peerid = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+ GF_ASSERT (myframe);
+
+ frame = myframe;
+ args = frame->local;
+ peerid = frame->cookie;
+ frame->local = NULL;
+ frame->cookie = NULL;
+
+ if (-1 == req->rpc_status) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, iov, out, op_errno,
+ EINVAL);
+
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gd1_mgmt_v3_post_val_rsp);
+ if (ret < 0)
+ goto out;
+
+ gf_uuid_copy (args->uuid, rsp.uuid);
+
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+
+out:
+ gd_mgmt_v3_collate_errors (args, op_ret, op_errno, rsp.op_errstr,
+ GLUSTERD_MGMT_V3_POST_VALIDATE, *peerid,
+ rsp.uuid);
+ if (rsp.op_errstr)
+ free (rsp.op_errstr);
+
+ if (rsp.dict.dict_val)
+ free (rsp.dict.dict_val);
+ GF_FREE (peerid);
+ /* req->rpc_status set to -1 means, STACK_DESTROY will be called from
+ * the caller function.
+ */
+ if (req->rpc_status != -1)
+ STACK_DESTROY (frame->root);
+ synctask_barrier_wake(args);
+ return 0;
+}
+
+int32_t
+gd_mgmt_v3_post_validate_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ gd_mgmt_v3_post_validate_cbk_fn);
+}
+
+int
+gd_mgmt_v3_post_validate_req (glusterd_op_t op, int32_t op_ret, dict_t *op_ctx,
+ glusterd_peerinfo_t *peerinfo,
+ struct syncargs *args, uuid_t my_uuid,
+ uuid_t recv_uuid)
+{
+ int32_t ret = -1;
+ gd1_mgmt_v3_post_val_req req = {{0},};
+ xlator_t *this = NULL;
+ uuid_t *peerid = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (op_ctx);
+ GF_ASSERT (peerinfo);
+ GF_ASSERT (args);
+
+ ret = dict_allocate_and_serialize (op_ctx,
+ &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret)
+ goto out;
+
+ gf_uuid_copy (req.uuid, my_uuid);
+ req.op = op;
+ req.op_ret = op_ret;
+
+ GD_ALLOC_COPY_UUID (peerid, peerinfo->uuid, ret);
+ if (ret)
+ goto out;
+
+ ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerid,
+ &gd_mgmt_v3_prog,
+ GLUSTERD_MGMT_V3_POST_VALIDATE,
+ gd_mgmt_v3_post_validate_cbk,
+ (xdrproc_t) xdr_gd1_mgmt_v3_post_val_req);
+out:
+ GF_FREE (req.dict.dict_val);
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_mgmt_v3_post_validate (glusterd_op_t op, int32_t op_ret, dict_t *dict,
+ dict_t *req_dict, char **op_errstr,
+ uint32_t txn_generation)
+{
+ int32_t ret = -1;
+ int32_t peer_cnt = 0;
+ dict_t *rsp_dict = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ struct syncargs args = {0};
+ uuid_t peer_uuid = {0};
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ GF_ASSERT (dict);
+ GF_VALIDATE_OR_GOTO (this->name, req_dict, out);
+ GF_ASSERT (op_errstr);
+
+ rsp_dict = dict_new ();
+ if (!rsp_dict) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_CREATE_FAIL,
+ "Failed to create response dictionary");
+ goto out;
+ }
+
+ /* Copy the contents of dict like missed snaps info to req_dict */
+ dict_copy (dict, req_dict);
+
+ /* Post Validation on local node */
+ ret = gd_mgmt_v3_post_validate_fn (op, op_ret, req_dict, op_errstr,
+ rsp_dict);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_POST_VALIDATION_FAIL,
+ "Post Validation failed for "
+ "operation %s on local node",
+ gd_op_list[op]);
+
+ if (*op_errstr == NULL) {
+ ret = gf_asprintf (op_errstr,
+ "Post-validation failed "
+ "on localhost. Please check "
+ "log file for details");
+ if (ret == -1)
+ *op_errstr = NULL;
+
+ ret = -1;
+ }
+ goto out;
+ }
+
+ dict_unref (rsp_dict);
+ rsp_dict = NULL;
+
+ /* Sending Post Validation req to other nodes in the cluster */
+ gd_syncargs_init (&args, req_dict);
+ synctask_barrier_init((&args));
+ peer_cnt = 0;
+
+ rcu_read_lock ();
+ cds_list_for_each_entry_rcu (peerinfo, &conf->peers, uuid_list) {
+ /* Only send requests to peers who were available before the
+ * transaction started
+ */
+ if (peerinfo->generation > txn_generation)
+ continue;
+
+ if (!peerinfo->connected)
+ continue;
+ if (op != GD_OP_SYNC_VOLUME &&
+ peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED)
+ continue;
+
+ gd_mgmt_v3_post_validate_req (op, op_ret, req_dict, peerinfo,
+ &args, MY_UUID, peer_uuid);
+ peer_cnt++;
+ }
+ rcu_read_unlock ();
+
+ if (0 == peer_cnt) {
+ ret = 0;
+ goto out;
+ }
+
+ gd_synctask_barrier_wait((&args), peer_cnt);
+
+ if (args.op_ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_POST_VALIDATION_FAIL,
+ "Post Validation failed on peers");
+
+ if (args.errstr)
+ *op_errstr = gf_strdup (args.errstr);
+ }
+
+ ret = args.op_ret;
+
+ gf_msg_debug (this->name, 0, "Sent post valaidation req for %s "
+ "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret);
+out:
+ return ret;
+}
+
+int32_t
+gd_mgmt_v3_unlock_cbk_fn (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int32_t ret = -1;
+ struct syncargs *args = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ gd1_mgmt_v3_unlock_rsp rsp = {{0},};
+ call_frame_t *frame = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = -1;
+ xlator_t *this = NULL;
+ uuid_t *peerid = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+ GF_ASSERT (myframe);
+
+ frame = myframe;
+ args = frame->local;
+ peerid = frame->cookie;
+ frame->local = NULL;
+ frame->cookie = NULL;
+
+ if (-1 == req->rpc_status) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, iov, out, op_errno,
+ EINVAL);
+
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp);
+ if (ret < 0)
+ goto out;
+
+ gf_uuid_copy (args->uuid, rsp.uuid);
+
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+
+out:
+ gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL,
+ GLUSTERD_MGMT_V3_UNLOCK, *peerid, rsp.uuid);
+ if (rsp.dict.dict_val)
+ free (rsp.dict.dict_val);
+ GF_FREE (peerid);
+ /* req->rpc_status set to -1 means, STACK_DESTROY will be called from
+ * the caller function.
+ */
+ if (req->rpc_status != -1)
+ STACK_DESTROY (frame->root);
+ synctask_barrier_wake(args);
+ return 0;
+}
+
+int32_t
+gd_mgmt_v3_unlock_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ gd_mgmt_v3_unlock_cbk_fn);
+}
+
+int
+gd_mgmt_v3_unlock (glusterd_op_t op, dict_t *op_ctx,
+ glusterd_peerinfo_t *peerinfo,
+ struct syncargs *args, uuid_t my_uuid,
+ uuid_t recv_uuid)
+{
+ int32_t ret = -1;
+ gd1_mgmt_v3_unlock_req req = {{0},};
+ xlator_t *this = NULL;
+ uuid_t *peerid = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (op_ctx);
+ GF_ASSERT (peerinfo);
+ GF_ASSERT (args);
+
+ ret = dict_allocate_and_serialize (op_ctx,
+ &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret)
+ goto out;
+
+ gf_uuid_copy (req.uuid, my_uuid);
+ req.op = op;
+
+ GD_ALLOC_COPY_UUID (peerid, peerinfo->uuid, ret);
+ if (ret)
+ goto out;
+
+ ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerid,
+ &gd_mgmt_v3_prog,
+ GLUSTERD_MGMT_V3_UNLOCK,
+ gd_mgmt_v3_unlock_cbk,
+ (xdrproc_t) xdr_gd1_mgmt_v3_unlock_req);
+out:
+ GF_FREE (req.dict.dict_val);
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_mgmt_v3_release_peer_locks (glusterd_op_t op, dict_t *dict,
+ int32_t op_ret, char **op_errstr,
+ gf_boolean_t is_acquired,
+ uint32_t txn_generation)
+{
+ int32_t ret = -1;
+ int32_t peer_cnt = 0;
+ uuid_t peer_uuid = {0};
+ xlator_t *this = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ struct syncargs args = {0};
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+
+ /* If the lock has not been held during this
+ * transaction, do not send unlock requests */
+ if (!is_acquired)
+ goto out;
+
+ /* Sending mgmt_v3 unlock req to other nodes in the cluster */
+ gd_syncargs_init (&args, NULL);
+ synctask_barrier_init((&args));
+ peer_cnt = 0;
+
+ rcu_read_lock ();
+ cds_list_for_each_entry_rcu (peerinfo, &conf->peers, uuid_list) {
+ /* Only send requests to peers who were available before the
+ * transaction started
+ */
+ if (peerinfo->generation > txn_generation)
+ continue;
+
+ if (!peerinfo->connected)
+ continue;
+ if (op != GD_OP_SYNC_VOLUME &&
+ peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED)
+ continue;
+
+ gd_mgmt_v3_unlock (op, dict, peerinfo, &args,
+ MY_UUID, peer_uuid);
+ peer_cnt++;
+ }
+ rcu_read_unlock ();
+
+ if (0 == peer_cnt) {
+ ret = 0;
+ goto out;
+ }
+
+ gd_synctask_barrier_wait((&args), peer_cnt);
+
+ if (args.op_ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_UNLOCK_FAIL,
+ "Unlock failed on peers");
+
+ if (!op_ret && args.errstr)
+ *op_errstr = gf_strdup (args.errstr);
+ }
+
+ ret = args.op_ret;
+
+ gf_msg_debug (this->name, 0, "Sent unlock op req for %s "
+ "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret);
+
+out:
+ return ret;
+}
+
+int32_t
+glusterd_mgmt_v3_initiate_all_phases (rpcsvc_request_t *req, glusterd_op_t op,
+ dict_t *dict)
+{
+ int32_t ret = -1;
+ int32_t op_ret = -1;
+ dict_t *req_dict = NULL;
+ dict_t *tmp_dict = NULL;
+ glusterd_conf_t *conf = NULL;
+ char *op_errstr = NULL;
+ xlator_t *this = NULL;
+ gf_boolean_t is_acquired = _gf_false;
+ uuid_t *originator_uuid = NULL;
+ uint32_t txn_generation = 0;
+ uint32_t op_errno = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+ GF_ASSERT (dict);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ /* Save the peer list generation */
+ txn_generation = conf->generation;
+ cmm_smp_rmb ();
+ /* This read memory barrier makes sure that this assignment happens here
+ * only and is not reordered and optimized by either the compiler or the
+ * processor.
+ */
+
+ /* Save the MY_UUID as the originator_uuid. This originator_uuid
+ * will be used by is_origin_glusterd() to determine if a node
+ * is the originator node for a command. */
+ originator_uuid = GF_CALLOC (1, sizeof(uuid_t),
+ gf_common_mt_uuid_t);
+ if (!originator_uuid) {
+ ret = -1;
+ goto out;
+ }
+
+ gf_uuid_copy (*originator_uuid, MY_UUID);
+ ret = dict_set_bin (dict, "originator_uuid",
+ originator_uuid, sizeof (uuid_t));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set originator_uuid.");
+ GF_FREE (originator_uuid);
+ goto out;
+ }
+
+ /* Marking the operation as complete synctasked */
+ ret = dict_set_int32 (dict, "is_synctasked", _gf_true);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set synctasked flag.");
+ goto out;
+ }
+
+ /* Use a copy at local unlock as cli response will be sent before
+ * the unlock and the volname in the dict might be removed */
+ tmp_dict = dict_new();
+ if (!tmp_dict) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_CREATE_FAIL, "Unable to create dict");
+ goto out;
+ }
+ dict_copy (dict, tmp_dict);
+
+ /* LOCKDOWN PHASE - Acquire mgmt_v3 locks */
+ ret = glusterd_mgmt_v3_initiate_lockdown (op, dict, &op_errstr,
+ &op_errno, &is_acquired,
+ txn_generation);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_LOCKDOWN_FAIL,
+ "mgmt_v3 lockdown failed.");
+ goto out;
+ }
+
+ /* BUILD PAYLOAD */
+ ret = glusterd_mgmt_v3_build_payload (&req_dict, &op_errstr, dict, op);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_PAYLOAD_BUILD_FAIL, LOGSTR_BUILD_PAYLOAD,
+ gd_op_list[op]);
+ if (op_errstr == NULL)
+ gf_asprintf (&op_errstr, OPERRSTR_BUILD_PAYLOAD);
+ goto out;
+ }
+
+ /* PRE-COMMIT VALIDATE PHASE */
+ ret = glusterd_mgmt_v3_pre_validate (op, req_dict, &op_errstr,
+ &op_errno, txn_generation);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_PRE_VALIDATION_FAIL, "Pre Validation Failed");
+ goto out;
+ }
+
+ /* COMMIT OP PHASE */
+ ret = glusterd_mgmt_v3_commit (op, dict, req_dict, &op_errstr,
+ &op_errno, txn_generation);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_COMMIT_OP_FAIL, "Commit Op Failed");
+ goto out;
+ }
+
+ /* POST-COMMIT VALIDATE PHASE */
+ /* As of now, post_validate is not trying to cleanup any failed
+ commands. So as of now, I am sending 0 (op_ret as 0).
+ */
+ ret = glusterd_mgmt_v3_post_validate (op, 0, dict, req_dict, &op_errstr,
+ txn_generation);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_POST_VALIDATION_FAIL, "Post Validation Failed");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ op_ret = ret;
+ /* UNLOCK PHASE FOR PEERS*/
+ (void) glusterd_mgmt_v3_release_peer_locks (op, dict, op_ret,
+ &op_errstr, is_acquired,
+ txn_generation);
+
+ /* LOCAL VOLUME(S) UNLOCK */
+ if (is_acquired) {
+ /* Trying to release multiple mgmt_v3 locks */
+ ret = glusterd_multiple_mgmt_v3_unlock (tmp_dict, MY_UUID);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_UNLOCK_FAIL,
+ "Failed to release mgmt_v3 locks on localhost");
+ op_ret = ret;
+ }
+ }
+
+ if (op_ret && (op_errno == 0))
+ op_errno = EG_INTRNL;
+
+ /* SEND CLI RESPONSE */
+ glusterd_op_send_cli_response (op, op_ret, op_errno, req,
+ dict, op_errstr);
+
+ if (req_dict)
+ dict_unref (req_dict);
+
+ if (tmp_dict)
+ dict_unref (tmp_dict);
+
+ if (op_errstr) {
+ GF_FREE (op_errstr);
+ op_errstr = NULL;
+ }
+
+ return 0;
+}
+
+int32_t
+glusterd_set_barrier_value (dict_t *dict, char *option)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *vol = NULL;
+ char *volname = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (dict);
+ GF_ASSERT (option);
+
+ /* TODO : Change this when we support multiple volume.
+ * As of now only snapshot of single volume is supported,
+ * Hence volname1 is directly fetched
+ */
+ ret = dict_get_str (dict, "volname1", &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Volname not present in "
+ "dict");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &vol);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_FOUND, "Volume %s not found ",
+ volname);
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc (dict, "barrier", option);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set barrier op "
+ "in request dictionary");
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc (vol->dict, "features.barrier",
+ option);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set barrier op "
+ "in volume option dict");
+ goto out;
+ }
+
+ gd_update_volume_op_versions (vol);
+
+ ret = glusterd_create_volfiles (vol);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLFILE_CREATE_FAIL,
+ "Failed to create volfiles");
+ goto out;
+ }
+
+ ret = glusterd_store_volinfo (vol, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_mgmt_v3_initiate_snap_phases (rpcsvc_request_t *req, glusterd_op_t op,
+ dict_t *dict)
+{
+ int32_t ret = -1;
+ int32_t op_ret = -1;
+ dict_t *req_dict = NULL;
+ dict_t *tmp_dict = NULL;
+ glusterd_conf_t *conf = NULL;
+ char *op_errstr = NULL;
+ xlator_t *this = NULL;
+ gf_boolean_t is_acquired = _gf_false;
+ uuid_t *originator_uuid = NULL;
+ gf_boolean_t success = _gf_false;
+ char *cli_errstr = NULL;
+ uint32_t txn_generation = 0;
+ uint32_t op_errno = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+ GF_ASSERT (dict);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ /* Save the peer list generation */
+ txn_generation = conf->generation;
+ cmm_smp_rmb ();
+ /* This read memory barrier makes sure that this assignment happens here
+ * only and is not reordered and optimized by either the compiler or the
+ * processor.
+ */
+
+ /* Save the MY_UUID as the originator_uuid. This originator_uuid
+ * will be used by is_origin_glusterd() to determine if a node
+ * is the originator node for a command. */
+ originator_uuid = GF_CALLOC (1, sizeof(uuid_t),
+ gf_common_mt_uuid_t);
+ if (!originator_uuid) {
+ ret = -1;
+ goto out;
+ }
+
+ gf_uuid_copy (*originator_uuid, MY_UUID);
+ ret = dict_set_bin (dict, "originator_uuid",
+ originator_uuid, sizeof (uuid_t));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set originator_uuid.");
+ GF_FREE (originator_uuid);
+ goto out;
+ }
+
+ /* Marking the operation as complete synctasked */
+ ret = dict_set_int32 (dict, "is_synctasked", _gf_true);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set synctasked flag.");
+ goto out;
+ }
+
+ /* Use a copy at local unlock as cli response will be sent before
+ * the unlock and the volname in the dict might be removed */
+ tmp_dict = dict_new();
+ if (!tmp_dict) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_CREATE_FAIL, "Unable to create dict");
+ goto out;
+ }
+ dict_copy (dict, tmp_dict);
+
+ /* LOCKDOWN PHASE - Acquire mgmt_v3 locks */
+ ret = glusterd_mgmt_v3_initiate_lockdown (op, dict, &op_errstr,
+ &op_errno, &is_acquired,
+ txn_generation);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_LOCKDOWN_FAIL,
+ "mgmt_v3 lockdown failed.");
+ goto out;
+ }
+
+ /* BUILD PAYLOAD */
+ ret = glusterd_mgmt_v3_build_payload (&req_dict, &op_errstr, dict, op);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_PAYLOAD_BUILD_FAIL, LOGSTR_BUILD_PAYLOAD,
+ gd_op_list[op]);
+ if (op_errstr == NULL)
+ gf_asprintf (&op_errstr, OPERRSTR_BUILD_PAYLOAD);
+ goto out;
+ }
+
+ /* PRE-COMMIT VALIDATE PHASE */
+ ret = glusterd_mgmt_v3_pre_validate (op, req_dict, &op_errstr,
+ &op_errno, txn_generation);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_PRE_VALIDATION_FAIL, "Pre Validation Failed");
+ goto out;
+ }
+
+ /* quorum check of the volume is done here */
+ ret = glusterd_snap_quorum_check (req_dict, _gf_false, &op_errstr,
+ &op_errno);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_QUORUM_CHECK_FAIL, "Volume quorum check failed");
+ goto out;
+ }
+
+ /* Set the operation type as pre, so that differentiation can be
+ * made whether the brickop is sent during pre-commit or post-commit
+ */
+ ret = dict_set_dynstr_with_alloc (req_dict, "operation-type", "pre");
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set "
+ "operation-type in dictionary");
+ goto out;
+ }
+
+ ret = glusterd_mgmt_v3_brick_op (op, req_dict, &op_errstr,
+ txn_generation);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_OP_FAIL, "Brick Ops Failed");
+ goto unbarrier;
+ }
+
+ /* COMMIT OP PHASE */
+ /* TODO: As of now, the plan is to do quorum check before sending the
+ commit fop and if the quorum succeeds, then commit is sent to all
+ the other glusterds.
+ snap create functionality now creates the in memory and on disk
+ objects for the snapshot (marking them as incomplete), takes the lvm
+ snapshot and then updates the status of the in memory and on disk
+ snap objects as complete. Suppose one of the glusterds goes down
+ after taking the lvm snapshot, but before updating the snap object,
+ then treat it as a snapshot create failure and trigger cleanup.
+ i.e the number of commit responses received by the originator
+ glusterd shold be the same as the number of peers it has sent the
+ request to (i.e npeers variable). If not, then originator glusterd
+ will initiate cleanup in post-validate fop.
+ Question: What if one of the other glusterds goes down as explained
+ above and along with it the originator glusterd also goes down?
+ Who will initiate the cleanup?
+ */
+ ret = dict_set_int32 (req_dict, "cleanup", 1);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "failed to set dict");
+ goto unbarrier;
+ }
+
+ ret = glusterd_mgmt_v3_commit (op, dict, req_dict, &op_errstr,
+ &op_errno, txn_generation);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_COMMIT_OP_FAIL, "Commit Op Failed");
+ /* If the main op fails, we should save the error string.
+ Because, op_errstr will be used for unbarrier and
+ unlock ops also. We might lose the actual error that
+ caused the failure.
+ */
+ cli_errstr = op_errstr;
+ op_errstr = NULL;
+ goto unbarrier;
+ }
+
+ success = _gf_true;
+unbarrier:
+ /* Set the operation type as post, so that differentiation can be
+ * made whether the brickop is sent during pre-commit or post-commit
+ */
+ ret = dict_set_dynstr_with_alloc (req_dict, "operation-type", "post");
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set "
+ "operation-type in dictionary");
+ goto out;
+ }
+
+ ret = glusterd_mgmt_v3_brick_op (op, req_dict, &op_errstr,
+ txn_generation);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_OP_FAIL, "Brick Ops Failed");
+ goto out;
+ }
+
+ /*Do a quorum check if the commit phase is successful*/
+ if (success) {
+ //quorum check of the snapshot volume
+ ret = glusterd_snap_quorum_check (dict, _gf_true, &op_errstr,
+ &op_errno);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_QUORUM_CHECK_FAIL,
+ "Snapshot Volume quorum check failed");
+ goto out;
+ }
+ }
+
+ ret = 0;
+
+out:
+ op_ret = ret;
+
+ if (success == _gf_false)
+ op_ret = -1;
+
+ /* POST-COMMIT VALIDATE PHASE */
+ ret = glusterd_mgmt_v3_post_validate (op, op_ret, dict, req_dict,
+ &op_errstr, txn_generation);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_PRE_VALIDATION_FAIL, "Post Validation Failed");
+ op_ret = -1;
+ }
+
+ /* UNLOCK PHASE FOR PEERS*/
+ (void) glusterd_mgmt_v3_release_peer_locks (op, dict, op_ret,
+ &op_errstr, is_acquired,
+ txn_generation);
+
+ /* If the commit op (snapshot taking) failed, then the error is stored
+ in cli_errstr and unbarrier is called. Suppose, if unbarrier also
+ fails, then the error happened in unbarrier is logged and freed.
+ The error happened in commit op, which is stored in cli_errstr
+ is sent to cli.
+ */
+ if (cli_errstr) {
+ GF_FREE (op_errstr);
+ op_errstr = NULL;
+ op_errstr = cli_errstr;
+ }
+
+ /* LOCAL VOLUME(S) UNLOCK */
+ if (is_acquired) {
+ /* Trying to release multiple mgmt_v3 locks */
+ ret = glusterd_multiple_mgmt_v3_unlock (tmp_dict, MY_UUID);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_UNLOCK_FAIL,
+ "Failed to release mgmt_v3 locks on localhost");
+ op_ret = ret;
+ }
+ }
+
+ if (op_ret && (op_errno == 0))
+ op_errno = EG_INTRNL;
+
+ /* SEND CLI RESPONSE */
+ glusterd_op_send_cli_response (op, op_ret, op_errno, req,
+ dict, op_errstr);
+
+ if (req_dict)
+ dict_unref (req_dict);
+
+ if (tmp_dict)
+ dict_unref (tmp_dict);
+
+ if (op_errstr) {
+ GF_FREE (op_errstr);
+ op_errstr = NULL;
+ }
+
+ return 0;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-mgmt.h
new file mode 100644
index 00000000000..bf87ec710f1
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.h
@@ -0,0 +1,77 @@
+/*
+ Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _GLUSTERD_MGMT_H_
+#define _GLUSTERD_MGMT_H_
+
+void gd_mgmt_v3_collate_errors (struct syncargs *args, int op_ret, int op_errno,
+ char *op_errstr, int op_code, uuid_t peerid,
+ u_char *uuid);
+
+int32_t
+gd_mgmt_v3_pre_validate_fn (glusterd_op_t op, dict_t *dict,
+ char **op_errstr, dict_t *rsp_dict,
+ uint32_t *op_errno);
+
+int32_t
+gd_mgmt_v3_brick_op_fn (glusterd_op_t op, dict_t *dict,
+ char **op_errstr, dict_t *rsp_dict);
+
+int32_t
+gd_mgmt_v3_commit_fn (glusterd_op_t op, dict_t *dict,
+ char **op_errstr, uint32_t *op_errno,
+ dict_t *rsp_dict);
+
+int32_t
+gd_mgmt_v3_post_validate_fn (glusterd_op_t op, int32_t op_ret, dict_t *dict,
+ char **op_errstr, dict_t *rsp_dict);
+
+int32_t
+glusterd_mgmt_v3_initiate_all_phases (rpcsvc_request_t *req, glusterd_op_t op,
+ dict_t *dict);
+
+int32_t
+glusterd_mgmt_v3_initiate_snap_phases (rpcsvc_request_t *req, glusterd_op_t op,
+ dict_t *dict);
+
+int
+glusterd_snap_pre_validate_use_rsp_dict (dict_t *dst, dict_t *src);
+
+int32_t
+glusterd_set_barrier_value (dict_t *dict, char *option);
+int
+
+glusterd_mgmt_v3_initiate_lockdown (glusterd_op_t op, dict_t *dict,
+ char **op_errstr, uint32_t *op_errno,
+ gf_boolean_t *is_acquired,
+ uint32_t txn_generation);
+
+int
+glusterd_mgmt_v3_build_payload (dict_t **req, char **op_errstr, dict_t *dict,
+ glusterd_op_t op);
+
+int
+glusterd_mgmt_v3_pre_validate (glusterd_op_t op, dict_t *req_dict,
+ char **op_errstr, uint32_t *op_errno,
+ uint32_t txn_generation);
+
+int
+glusterd_mgmt_v3_commit (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
+ char **op_errstr, uint32_t *op_errno,
+ uint32_t txn_generation);
+
+int
+glusterd_mgmt_v3_release_peer_locks (glusterd_op_t op, dict_t *dict,
+ int32_t op_ret, char **op_errstr,
+ gf_boolean_t is_acquired,
+ uint32_t txn_generation);
+
+int32_t
+glusterd_multiple_mgmt_v3_unlock (dict_t *dict, uuid_t uuid);
+#endif /* _GLUSTERD_MGMT_H_ */
diff --git a/xlators/mgmt/glusterd/src/glusterd-mountbroker.c b/xlators/mgmt/glusterd/src/glusterd-mountbroker.c
new file mode 100644
index 00000000000..7c069ced984
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-mountbroker.c
@@ -0,0 +1,698 @@
+/*
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <inttypes.h>
+#include <fnmatch.h>
+#include <pwd.h>
+
+#include "globals.h"
+#include "glusterfs.h"
+#include "compat.h"
+#include "dict.h"
+#include "list.h"
+#include "logging.h"
+#include "syscall.h"
+#include "defaults.h"
+#include "compat.h"
+#include "compat-errno.h"
+#include "run.h"
+#include "glusterd-mem-types.h"
+#include "glusterd.h"
+#include "glusterd-utils.h"
+#include "common-utils.h"
+#include "glusterd-mountbroker.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-messages.h"
+
+static int
+seq_dict_foreach (dict_t *dict,
+ int (*fn)(char *str, void *data),
+ void *data)
+{
+ char index[] = "4294967296"; // 1<<32
+ int i = 0;
+ char *val = NULL;
+ int ret = 0;
+
+ for (;;i++) {
+ snprintf(index, sizeof(index), "%d", i);
+ ret = dict_get_str (dict, index, &val);
+ if (ret != 0)
+ return ret == -ENOENT ? 0 : ret;
+ ret = fn (val, data);
+ if (ret != 0)
+ return ret;
+ }
+}
+
+int
+parse_mount_pattern_desc (gf_mount_spec_t *mspec, char *pdesc)
+#define SYNTAX_ERR -2
+{
+ char *curs = NULL;
+ char *c2 = NULL;
+ char sc = '\0';
+ char **cc = NULL;
+ gf_mount_pattern_t *pat = NULL;
+ int pnum = 0;
+ int ret = 0;
+ int lastsup = -1;
+ int incl = -1;
+ char **pcc = NULL;
+ int pnc = 0;
+
+ skipwhite (&pdesc);
+
+ /* a bow to theory */
+ if (!*pdesc)
+ return 0;
+
+ /* count number of components, separated by '&' */
+ mspec->len = 0;
+ for (curs = pdesc; *curs; curs++) {
+ if (*curs == ')')
+ mspec->len++;
+ }
+
+ mspec->patterns = GF_CALLOC (mspec->len, sizeof (*mspec->patterns),
+ gf_gld_mt_mount_pattern);
+ if (!mspec->patterns) {
+ ret = -1;
+ goto out;
+ }
+
+ pat = mspec->patterns;
+ curs = pdesc;
+ skipwhite (&curs);
+ for (;;) {
+ incl = -1;
+
+ /* check for pattern signedness modifier */
+ if (*curs == '-') {
+ pat->negative = _gf_true;
+ curs++;
+ }
+
+ /* now should come condition specifier,
+ * then opening paren
+ */
+ c2 = nwstrtail (curs, "SUB(");
+ if (c2) {
+ pat->condition = SET_SUB;
+ goto got_cond;
+ }
+ c2 = nwstrtail (curs, "SUP(");
+ if (c2) {
+ pat->condition = SET_SUPER;
+ lastsup = pat - mspec->patterns;
+ goto got_cond;
+ }
+ c2 = nwstrtail (curs, "EQL(");
+ if (c2) {
+ pat->condition = SET_EQUAL;
+ goto got_cond;
+ }
+ c2 = nwstrtail (curs, "MEET(");
+ if (c2) {
+ pat->condition = SET_INTERSECT;
+ goto got_cond;
+ }
+ c2 = nwstrtail (curs, "SUB+(");
+ if (c2) {
+ pat->condition = SET_SUB;
+ incl = lastsup;
+ goto got_cond;
+ }
+
+ ret = SYNTAX_ERR;
+ goto out;
+
+ got_cond:
+ curs = c2;
+ skipwhite (&curs);
+ /* count the number of components for pattern */
+ pnum = *curs == ')' ? 0 : 1;
+ for (c2 = curs ;*c2 != ')';) {
+ if (strchr ("&|", *c2)) {
+ ret = SYNTAX_ERR;
+ goto out;
+ }
+ while (!strchr ("|&)", *c2) && !isspace (*c2))
+ c2++;
+ skipwhite (&c2);
+ switch (*c2) {
+ case ')':
+ break;
+ case '\0':
+ case '&':
+ ret = SYNTAX_ERR;
+ goto out;
+ case '|':
+ *c2 = ' ';
+ skipwhite (&c2);
+ /* fall through */
+ default:
+ pnum++;
+ }
+ }
+ if (incl >= 0) {
+ pnc = 0;
+ for (pcc = mspec->patterns[incl].components; *pcc; pcc++)
+ pnc++;
+ pnum += pnc;
+ }
+ pat->components = GF_CALLOC (pnum + 1, sizeof (*pat->components),
+ gf_gld_mt_mount_comp_container);
+ if (!pat->components) {
+ ret = -1;
+ goto out;
+ }
+
+ cc = pat->components;
+ /* copy over included component set */
+ if (incl >= 0) {
+ memcpy (pat->components,
+ mspec->patterns[incl].components,
+ pnc * sizeof (*pat->components));
+ cc += pnc;
+ }
+ /* parse and add components */
+ c2 = ""; /* reset c2 */
+ while (*c2 != ')') {
+ c2 = curs;
+ while (!isspace (*c2) && *c2 != ')')
+ c2++;
+ sc = *c2;
+ *c2 = '\0';;
+ *cc = gf_strdup (curs);
+ if (!*cc) {
+ ret = -1;
+ goto out;
+ }
+ *c2 = sc;
+ skipwhite (&c2);
+ curs = c2;
+ cc++;
+ }
+
+ curs++;
+ skipwhite (&curs);
+ if (*curs == '&') {
+ curs++;
+ skipwhite (&curs);
+ }
+
+ if (!*curs)
+ break;
+ pat++;
+ }
+
+ out:
+ if (ret == SYNTAX_ERR) {
+ gf_msg ("glusterd", GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "cannot parse mount patterns %s",
+ pdesc);
+ }
+
+ /* We've allocted a lotta stuff here but don't bother with freeing
+ * on error, in that case we'll terminate anyway
+ */
+ return ret ? -1 : 0;
+}
+#undef SYNTAX_ERR
+
+
+const char *georep_mnt_desc_template =
+ "SUP("
+ "aux-gfid-mount "
+ "acl "
+ "volfile-server=localhost "
+ "client-pid=%d "
+ "user-map-root=%s "
+ ")"
+ "SUB+("
+ "log-file="DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"*/* "
+ "log-level=* "
+ "volfile-id=* "
+ ")"
+ "MEET("
+ "%s"
+ ")";
+
+const char *hadoop_mnt_desc_template =
+ "SUP("
+ "volfile-server=%s "
+ "client-pid=%d "
+ "volfile-id=%s "
+ "user-map-root=%s "
+ ")"
+ "SUB+("
+ "log-file="DEFAULT_LOG_FILE_DIRECTORY"/"GHADOOP"*/* "
+ "log-level=* "
+ ")";
+
+int
+make_georep_mountspec (gf_mount_spec_t *mspec, const char *volnames,
+ char *user)
+{
+ char *georep_mnt_desc = NULL;
+ char *meetspec = NULL;
+ char *vols = NULL;
+ char *vol = NULL;
+ char *p = NULL;
+ char *savetok = NULL;
+ char *fa[3] = {0,};
+ size_t siz = 0;
+ int vc = 0;
+ int i = 0;
+ int ret = 0;
+
+ vols = gf_strdup ((char *)volnames);
+ if (!vols)
+ goto out;
+
+ for (vc = 1, p = vols; *p; p++) {
+ if (*p == ',')
+ vc++;
+ }
+ siz = strlen (volnames) + vc * strlen("volfile-id=");
+ meetspec = GF_CALLOC (1, siz + 1, gf_gld_mt_georep_meet_spec);
+ if (!meetspec)
+ goto out;
+
+ for (p = vols;;) {
+ vol = strtok_r (p, ",", &savetok);
+ if (!vol) {
+ GF_ASSERT (vc == 0);
+ break;
+ }
+ p = NULL;
+ strcat (meetspec, "volfile-id=");
+ strcat (meetspec, vol);
+ if (--vc > 0)
+ strcat (meetspec, " ");
+ }
+
+ ret = gf_asprintf (&georep_mnt_desc, georep_mnt_desc_template,
+ GF_CLIENT_PID_GSYNCD, user, meetspec);
+ if (ret == -1) {
+ georep_mnt_desc = NULL;
+ goto out;
+ }
+
+ ret = parse_mount_pattern_desc (mspec, georep_mnt_desc);
+
+ out:
+ fa[0] = meetspec;
+ fa[1] = vols;
+ fa[2] = georep_mnt_desc;
+
+ for (i = 0; i < 3; i++) {
+ if (fa[i] == NULL)
+ ret = -1;
+ else
+ GF_FREE (fa[i]);
+ }
+
+ return ret;
+}
+
+int
+make_ghadoop_mountspec (gf_mount_spec_t *mspec, const char *volname,
+ char *user, char *server)
+{
+ char *hadoop_mnt_desc = NULL;
+ int ret = 0;
+
+ ret = gf_asprintf (&hadoop_mnt_desc, hadoop_mnt_desc_template,
+ server, GF_CLIENT_PID_HADOOP, volname, user);
+ if (ret == -1)
+ return ret;
+
+ return parse_mount_pattern_desc (mspec, hadoop_mnt_desc);
+}
+
+static gf_boolean_t
+match_comp (char *str, char *patcomp)
+{
+ char *c1 = patcomp;
+ char *c2 = str;
+
+ GF_ASSERT (c1);
+ GF_ASSERT (c2);
+
+ while (*c1 == *c2) {
+ if (!*c1)
+ return _gf_true;
+ c1++;
+ c2++;
+ if (c1[-1] == '=')
+ break;
+ }
+
+ return fnmatch (c1, c2, 0) == 0 ? _gf_true : _gf_false;
+}
+
+struct gf_set_descriptor {
+ gf_boolean_t priv[2];
+ gf_boolean_t common;
+};
+
+static int
+_gf_set_dict_iter1 (char *val, void *data)
+{
+ void **dataa = data;
+ struct gf_set_descriptor *sd = dataa[0];
+ char **curs = dataa[1];
+ gf_boolean_t priv = _gf_true;
+
+ while (*curs) {
+ if (match_comp (val, *curs)) {
+ priv = _gf_false;
+ sd->common = _gf_true;
+ }
+ curs++;
+ }
+
+ if (priv)
+ sd->priv[0] = _gf_true;
+
+ return 0;
+}
+
+static int
+_gf_set_dict_iter2 (char *val, void *data)
+{
+ void **dataa = data;
+ gf_boolean_t *boo = dataa[0];
+ char *comp = dataa[1];
+
+ if (match_comp (val, comp))
+ *boo = _gf_true;
+
+ return 0;
+}
+
+static void
+relate_sets (struct gf_set_descriptor *sd, dict_t *argdict, char **complist)
+{
+ void *dataa[] = {NULL, NULL};
+ gf_boolean_t boo = _gf_false;
+
+ memset (sd, 0, sizeof (*sd));
+
+ dataa[0] = sd;
+ dataa[1] = complist;
+ seq_dict_foreach (argdict, _gf_set_dict_iter1, dataa);
+
+ while (*complist) {
+ boo = _gf_false;
+ dataa[0] = &boo;
+ dataa[1] = *complist;
+ seq_dict_foreach (argdict, _gf_set_dict_iter2, dataa);
+
+ if (boo)
+ sd->common = _gf_true;
+ else
+ sd->priv[1] = _gf_true;
+
+ complist++;
+ }
+}
+
+static int
+_arg_parse_uid (char *val, void *data)
+{
+ char *user = strtail (val, "user-map-root=");
+ struct passwd *pw = NULL;
+
+ if (!user)
+ return 0;
+ pw = getpwnam (user);
+ if (!pw)
+ return -EINVAL;
+
+ if (*(int *)data >= 0)
+ /* uid ambiguity, already found */
+ return -EINVAL;
+
+ *(int *)data = pw->pw_uid;
+ return 0;
+}
+
+static int
+evaluate_mount_request (gf_mount_spec_t *mspec, dict_t *argdict)
+{
+ struct gf_set_descriptor sd = {{0,},};
+ int i = 0;
+ int uid = -1;
+ int ret = 0;
+ gf_boolean_t match = _gf_false;
+
+ for (i = 0; i < mspec->len; i++) {
+ relate_sets (&sd, argdict, mspec->patterns[i].components);
+ switch (mspec->patterns[i].condition) {
+ case SET_SUB:
+ match = !sd.priv[0];
+ break;
+ case SET_SUPER:
+ match = !sd.priv[1];
+ break;
+ case SET_EQUAL:
+ match = (!sd.priv[0] && !sd.priv[1]);
+ break;
+ case SET_INTERSECT:
+ match = sd.common;
+ break;
+ default:
+ GF_ASSERT(!"unreached");
+ }
+ if (mspec->patterns[i].negative)
+ match = !match;
+
+ if (!match)
+ return -EPERM;
+ }
+
+ ret = seq_dict_foreach (argdict, _arg_parse_uid, &uid);
+ if (ret != 0)
+ return ret;
+
+ return uid;
+}
+
+static int
+_volname_get (char *val, void *data)
+{
+ char **volname = data;
+
+ *volname = strtail (val, "volfile-id=");
+
+ return *volname ? 1 : 0;
+}
+
+static int
+_runner_add (char *val, void *data)
+{
+ runner_t *runner = data;
+
+ runner_argprintf (runner, "--%s", val);
+
+ return 0;
+}
+
+int
+glusterd_do_mount (char *label, dict_t *argdict, char **path, int *op_errno)
+{
+ glusterd_conf_t *priv = NULL;
+ char *mountbroker_root = NULL;
+ gf_mount_spec_t *mspec = NULL;
+ int uid = -ENOENT;
+ char *volname = NULL;
+ glusterd_volinfo_t *vol = NULL;
+ char *mtptemp = NULL;
+ char *mntlink = NULL;
+ char *cookieswitch = NULL;
+ char *cookie = NULL;
+ char *sla = NULL;
+ struct stat st = {0,};
+ runner_t runner = {0,};
+ int ret = 0;
+ xlator_t *this = THIS;
+ mode_t orig_umask = 0;
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ GF_ASSERT (op_errno);
+ *op_errno = 0;
+
+ if (dict_get_str (this->options, "mountbroker-root",
+ &mountbroker_root) != 0) {
+ *op_errno = ENOENT;
+ goto out;
+ }
+
+ GF_ASSERT (label);
+ if (!*label) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ /* look up spec for label */
+ cds_list_for_each_entry (mspec, &priv->mount_specs,
+ speclist) {
+ if (strcmp (mspec->label, label) != 0)
+ continue;
+ uid = evaluate_mount_request (mspec, argdict);
+ break;
+ }
+ if (uid < 0) {
+ *op_errno = -uid;
+ goto out;
+ }
+
+ /* some sanity check on arguments */
+ seq_dict_foreach (argdict, _volname_get, &volname);
+ if (!volname) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+ if (glusterd_volinfo_find (volname, &vol) != 0 ||
+ !glusterd_is_volume_started (vol)) {
+ *op_errno = ENOENT;
+ goto out;
+ }
+
+ /* go do mount */
+
+ /** create actual mount dir */
+
+ /*** "overload" string name to be possible to used for cookie
+ creation, see below */
+ ret = gf_asprintf (&mtptemp, "%s/user%d/mtpt-%s-XXXXXX/cookie",
+ mountbroker_root, uid, label);
+ if (ret == -1) {
+ mtptemp = NULL;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+ /*** hide cookie part */
+ cookieswitch = strrchr (mtptemp, '/');
+ *cookieswitch = '\0';
+
+ sla = strrchr (mtptemp, '/');
+ *sla = '\0';
+ ret = sys_mkdir (mtptemp, 0700);
+ if (ret == 0)
+ ret = sys_chown (mtptemp, uid, 0);
+ else if (errno == EEXIST)
+ ret = 0;
+ if (ret == -1) {
+ *op_errno = errno;
+ goto out;
+ }
+ ret = sys_lstat (mtptemp, &st);
+ if (ret == -1) {
+ *op_errno = errno;
+ goto out;
+ }
+ if (!(S_ISDIR (st.st_mode) && (st.st_mode & ~S_IFMT) == 0700 &&
+ st.st_uid == uid && st.st_gid == 0)) {
+ *op_errno = EACCES;
+ goto out;
+ }
+ *sla = '/';
+
+ if (!mkdtemp (mtptemp)) {
+ *op_errno = errno;
+ goto out;
+ }
+
+ /** create private "cookie" symlink */
+
+ /*** occupy an entry in the hive dir via mkstemp */
+ ret = gf_asprintf (&cookie, "%s/"MB_HIVE"/mntXXXXXX",
+ mountbroker_root);
+ if (ret == -1) {
+ cookie = NULL;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+ orig_umask = umask(S_IRWXG | S_IRWXO);
+ ret = mkstemp (cookie);
+ umask(orig_umask);
+ if (ret == -1) {
+ *op_errno = errno;
+ goto out;
+ }
+ sys_close (ret);
+
+ /*** assembly the path from cookie to mountpoint */
+ sla = strchr (sla - 1, '/');
+ GF_ASSERT (sla);
+ ret = gf_asprintf (&mntlink, "../user%d%s", uid, sla);
+ if (ret == -1) {
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ /*** create cookie link in (to-be) mountpoint,
+ move it over to the final place */
+ *cookieswitch = '/';
+ ret = sys_symlink (mntlink, mtptemp);
+ if (ret != -1)
+ ret = sys_rename (mtptemp, cookie);
+ *cookieswitch = '\0';
+ if (ret == -1) {
+ *op_errno = errno;
+ goto out;
+ }
+
+ /** invoke glusterfs on the mountpoint */
+
+ runinit (&runner);
+ runner_add_arg (&runner, SBIN_DIR"/glusterfs");
+ seq_dict_foreach (argdict, _runner_add, &runner);
+ runner_add_arg (&runner, mtptemp);
+ ret = runner_run_reuse (&runner);
+ if (ret == -1) {
+ *op_errno = EIO; /* XXX hacky fake */
+ runner_log (&runner, "", GF_LOG_ERROR, "command failed");
+ }
+ runner_end (&runner);
+
+ out:
+
+ if (*op_errno) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_WARNING, *op_errno,
+ GD_MSG_MOUNT_REQ_FAIL,
+ "unsuccessful mount request (%s)",
+ strerror (*op_errno));
+ if (mtptemp) {
+ *cookieswitch = '/';
+ sys_unlink (mtptemp);
+ *cookieswitch = '\0';
+ sys_rmdir (mtptemp);
+ }
+ if (cookie) {
+ sys_unlink (cookie);
+ GF_FREE (cookie);
+ }
+
+ } else {
+ ret = 0;
+ *path = cookie;
+ }
+
+ GF_FREE (mtptemp);
+
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-mountbroker.h b/xlators/mgmt/glusterd/src/glusterd-mountbroker.h
new file mode 100644
index 00000000000..83267c203ca
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-mountbroker.h
@@ -0,0 +1,42 @@
+/*
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#define MB_HIVE "mb_hive"
+
+typedef enum {
+ SET_SUB = 1,
+ SET_SUPER,
+ SET_EQUAL,
+ SET_INTERSECT
+} gf_setrel_t;
+
+struct gf_mount_pattern {
+ char **components;
+ gf_setrel_t condition;
+ gf_boolean_t negative;
+};
+typedef struct gf_mount_pattern gf_mount_pattern_t;
+
+struct gf_mount_spec {
+ struct cds_list_head speclist;
+ char *label;
+ gf_mount_pattern_t *patterns;
+ size_t len;
+};
+typedef struct gf_mount_spec gf_mount_spec_t;
+
+
+int parse_mount_pattern_desc (gf_mount_spec_t *mspec, char *pdesc);
+
+int make_georep_mountspec (gf_mount_spec_t *mspec, const char *volname,
+ char *user);
+int make_ghadoop_mountspec (gf_mount_spec_t *mspec, const char *volname,
+ char *user, char *server);
+
+int glusterd_do_mount (char *label, dict_t *argdict, char **path, int *op_errno);
diff --git a/xlators/mgmt/glusterd/src/glusterd-nfs-svc.c b/xlators/mgmt/glusterd/src/glusterd-nfs-svc.c
new file mode 100644
index 00000000000..60b792ffac2
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-nfs-svc.c
@@ -0,0 +1,201 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "globals.h"
+#include "run.h"
+#include "glusterd.h"
+#include "glusterd-utils.h"
+#include "glusterd-volgen.h"
+#include "glusterd-nfs-svc.h"
+#include "glusterd-messages.h"
+#include "glusterd-svc-helper.h"
+
+static char *nfs_svc_name = "nfs";
+
+static gf_boolean_t
+glusterd_nfssvc_need_start ()
+{
+ glusterd_conf_t *priv = NULL;
+ gf_boolean_t start = _gf_false;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ priv = THIS->private;
+
+ cds_list_for_each_entry (volinfo, &priv->volumes, vol_list) {
+ if (!glusterd_is_volume_started (volinfo))
+ continue;
+
+ if (dict_get_str_boolean (volinfo->dict, NFS_DISABLE_MAP_KEY, 1))
+ continue;
+ start = _gf_true;
+ break;
+ }
+
+ return start;
+}
+
+int
+glusterd_nfssvc_init (glusterd_svc_t *svc)
+{
+ return glusterd_svc_init (svc, nfs_svc_name);
+}
+
+static int
+glusterd_nfssvc_create_volfile ()
+{
+ char filepath[PATH_MAX] = {0,};
+ glusterd_conf_t *conf = THIS->private;
+
+ glusterd_svc_build_volfile_path (nfs_svc_name, conf->workdir,
+ filepath, sizeof (filepath));
+ return glusterd_create_global_volfile (build_nfs_graph,
+ filepath, NULL);
+}
+
+static int
+glusterd_nfssvc_manager (glusterd_svc_t *svc, void *data, int flags)
+{
+ int ret = -1;
+
+ if (!svc->inited) {
+ ret = glusterd_nfssvc_init (svc);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_FAILED_INIT_NFSSVC, "Failed to init nfs "
+ "service");
+ goto out;
+ } else {
+ svc->inited = _gf_true;
+ gf_msg_debug (THIS->name, 0, "nfs service initialized");
+ }
+ }
+
+ ret = svc->stop (svc, SIGKILL);
+ if (ret)
+ goto out;
+
+ ret = glusterd_nfssvc_create_volfile ();
+ if (ret)
+ goto out;
+
+ if (glusterd_nfssvc_need_start ()) {
+ ret = svc->start (svc, flags);
+ if (ret)
+ goto out;
+
+ ret = glusterd_conn_connect (&(svc->conn));
+ if (ret)
+ goto out;
+ }
+out:
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_nfssvc_start (glusterd_svc_t *svc, int flags)
+{
+ return glusterd_svc_start (svc, flags, NULL);
+}
+
+static int
+glusterd_nfssvc_stop (glusterd_svc_t *svc, int sig)
+{
+ int ret = -1;
+ gf_boolean_t deregister = _gf_false;
+
+ if (glusterd_proc_is_running (&(svc->proc)))
+ deregister = _gf_true;
+
+ ret = glusterd_svc_stop (svc, sig);
+ if (ret)
+ goto out;
+ if (deregister)
+ glusterd_nfs_pmap_deregister ();
+
+out:
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+void
+glusterd_nfssvc_build (glusterd_svc_t *svc)
+{
+ svc->manager = glusterd_nfssvc_manager;
+ svc->start = glusterd_nfssvc_start;
+ svc->stop = glusterd_nfssvc_stop;
+}
+
+int
+glusterd_nfssvc_reconfigure ()
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ gf_boolean_t identical = _gf_false;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO (this->name, this, out);
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, priv, out);
+ /*
+ * Check both OLD and NEW volfiles, if they are SAME by size
+ * and cksum i.e. "character-by-character". If YES, then
+ * NOTHING has been changed, just return.
+ */
+ ret = glusterd_svc_check_volfile_identical (priv->nfs_svc.name,
+ build_nfs_graph,
+ &identical);
+ if (ret)
+ goto out;
+
+ if (identical) {
+ ret = 0;
+ goto out;
+ }
+
+ /*
+ * They are not identical. Find out if the topology is changed
+ * OR just the volume options. If just the options which got
+ * changed, then inform the xlator to reconfigure the options.
+ */
+ identical = _gf_false; /* RESET the FLAG */
+ ret = glusterd_svc_check_topology_identical (priv->nfs_svc.name,
+ build_nfs_graph,
+ &identical);
+ if (ret)
+ goto out;
+
+ /* Topology is not changed, but just the options. But write the
+ * options to NFS volfile, so that NFS will be reconfigured.
+ */
+ if (identical) {
+ ret = glusterd_nfssvc_create_volfile();
+ if (ret == 0) {/* Only if above PASSES */
+ ret = glusterd_fetchspec_notify (THIS);
+ }
+ goto out;
+ }
+
+ /*
+ * NFS volfile's topology has been changed. NFS server needs
+ * to be RESTARTED to ACT on the changed volfile.
+ */
+ ret = priv->nfs_svc.manager (&(priv->nfs_svc), NULL,
+ PROC_START_NO_WAIT);
+
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-nfs-svc.h b/xlators/mgmt/glusterd/src/glusterd-nfs-svc.h
new file mode 100644
index 00000000000..6330b71ba7d
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-nfs-svc.h
@@ -0,0 +1,25 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERD_NFS_SVC_H_
+#define _GLUSTERD_NFS_SVC_H_
+
+#include "glusterd-svc-mgmt.h"
+
+void
+glusterd_nfssvc_build (glusterd_svc_t *svc);
+
+int
+glusterd_nfssvc_init (glusterd_svc_t *svc);
+
+int
+glusterd_nfssvc_reconfigure ();
+
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
new file mode 100644
index 00000000000..e9f261c2fb3
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
@@ -0,0 +1,7877 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <time.h>
+#include <sys/uio.h>
+#include <sys/resource.h>
+#include <sys/mount.h>
+
+#include <libgen.h>
+#include "compat-uuid.h"
+
+#include "fnmatch.h"
+#include "xlator.h"
+#include "protocol-common.h"
+#include "glusterd.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "list.h"
+#include "dict.h"
+#include "compat.h"
+#include "compat-errno.h"
+#include "statedump.h"
+#include "glusterd-sm.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-utils.h"
+#include "glusterd-store.h"
+#include "glusterd-hooks.h"
+#include "glusterd-volgen.h"
+#include "glusterd-locks.h"
+#include "glusterd-messages.h"
+#include "glusterd-utils.h"
+#include "syscall.h"
+#include "cli1-xdr.h"
+#include "common-utils.h"
+#include "run.h"
+#include "glusterd-snapshot-utils.h"
+#include "glusterd-svc-mgmt.h"
+#include "glusterd-svc-helper.h"
+#include "glusterd-shd-svc.h"
+#include "glusterd-nfs-svc.h"
+#include "glusterd-quotad-svc.h"
+#include "glusterd-server-quorum.h"
+#include "glusterd-volgen.h"
+#include <sys/types.h>
+#include <signal.h>
+#include <sys/wait.h>
+
+extern char local_node_hostname[PATH_MAX];
+static int
+glusterd_set_shared_storage (dict_t *dict, char *key, char *value,
+ char **op_errstr);
+
+/* Valid options for all volumes to be listed in the *
+ * valid_all_vol_opts table. To add newer options to *
+ * all volumes, we can just add more entries to this *
+ * table *
+ */
+glusterd_all_vol_opts valid_all_vol_opts[] = {
+ { GLUSTERD_QUORUM_RATIO_KEY },
+ { GLUSTERD_SHARED_STORAGE_KEY },
+ { NULL },
+};
+
+#define ALL_VOLUME_OPTION_CHECK(volname, key, ret, op_errstr, label) \
+ do { \
+ gf_boolean_t _all = !strcmp ("all", volname); \
+ gf_boolean_t _ratio = _gf_false; \
+ int32_t i = 0; \
+ \
+ for (i = 0; valid_all_vol_opts[i].option; i++) { \
+ if (!strcmp (key, valid_all_vol_opts[i].option)) { \
+ _ratio = _gf_true; \
+ break; \
+ } \
+ } \
+ \
+ if (_all && !_ratio) { \
+ ret = -1; \
+ *op_errstr = gf_strdup ("Not a valid option for all " \
+ "volumes"); \
+ goto label; \
+ } else if (!_all && _ratio) { \
+ ret = -1; \
+ *op_errstr = gf_strdup ("Not a valid option for " \
+ "single volume"); \
+ goto label; \
+ } \
+ } while (0)
+
+static struct cds_list_head gd_op_sm_queue;
+synclock_t gd_op_sm_lock;
+glusterd_op_info_t opinfo = {{0},};
+
+int
+glusterd_bricks_select_rebalance_volume (dict_t *dict, char **op_errstr,
+ struct cds_list_head *selected);
+
+
+int32_t
+glusterd_txn_opinfo_dict_init ()
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ priv->glusterd_txn_opinfo = dict_new ();
+ if (!priv->glusterd_txn_opinfo) {
+ ret = -1;
+ goto out;
+ }
+
+ memset (priv->global_txn_id, '\0', sizeof(uuid_t));
+
+ ret = 0;
+out:
+ return ret;
+}
+
+void
+glusterd_txn_opinfo_dict_fini ()
+{
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ if (priv->glusterd_txn_opinfo)
+ dict_unref (priv->glusterd_txn_opinfo);
+}
+
+void
+glusterd_txn_opinfo_init (glusterd_op_info_t *opinfo,
+ glusterd_op_sm_state_info_t *state, int *op,
+ dict_t *op_ctx, rpcsvc_request_t *req)
+{
+ glusterd_conf_t *conf = NULL;
+
+ GF_ASSERT (opinfo);
+
+ conf = THIS->private;
+ GF_ASSERT (conf);
+
+ if (state)
+ opinfo->state = *state;
+
+ if (op)
+ opinfo->op = *op;
+
+ if (op_ctx)
+ opinfo->op_ctx = dict_ref(op_ctx);
+ else
+ opinfo->op_ctx = NULL;
+
+ if (req)
+ opinfo->req = req;
+
+ opinfo->txn_generation = conf->generation;
+ cmm_smp_rmb ();
+
+ return;
+}
+
+int32_t
+glusterd_generate_txn_id (dict_t *dict, uuid_t **txn_id)
+{
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (dict);
+
+ *txn_id = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t);
+ if (!*txn_id)
+ goto out;
+
+ if (priv->op_version < GD_OP_VERSION_3_6_0)
+ gf_uuid_copy (**txn_id, priv->global_txn_id);
+ else
+ gf_uuid_generate (**txn_id);
+
+ ret = dict_set_bin (dict, "transaction_id",
+ *txn_id, sizeof (**txn_id));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set transaction id.");
+ goto out;
+ }
+
+ gf_msg_debug (this->name, 0,
+ "Transaction_id = %s", uuid_utoa (**txn_id));
+out:
+ if (ret && *txn_id) {
+ GF_FREE (*txn_id);
+ *txn_id = NULL;
+ }
+
+ return ret;
+}
+
+int32_t
+glusterd_get_txn_opinfo (uuid_t *txn_id, glusterd_op_info_t *opinfo)
+{
+ int32_t ret = -1;
+ glusterd_txn_opinfo_obj *opinfo_obj = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ if (!txn_id || !opinfo) {
+ gf_msg_callingfn (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_ID_GET_FAIL,
+ "Empty transaction id or opinfo received.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_bin(priv->glusterd_txn_opinfo,
+ uuid_utoa (*txn_id),
+ (void **) &opinfo_obj);
+ if (ret)
+ goto out;
+
+ (*opinfo) = opinfo_obj->opinfo;
+
+ gf_msg_debug (this->name, 0,
+ "Successfully got opinfo for transaction ID : %s",
+ uuid_utoa (*txn_id));
+
+ ret = 0;
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_set_txn_opinfo (uuid_t *txn_id, glusterd_op_info_t *opinfo)
+{
+ int32_t ret = -1;
+ glusterd_txn_opinfo_obj *opinfo_obj = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ if (!txn_id) {
+ gf_msg_callingfn (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_ID_GET_FAIL,
+ "Empty transaction id received.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_bin(priv->glusterd_txn_opinfo,
+ uuid_utoa (*txn_id),
+ (void **) &opinfo_obj);
+ if (ret) {
+ opinfo_obj = GF_CALLOC (1, sizeof(glusterd_txn_opinfo_obj),
+ gf_common_mt_txn_opinfo_obj_t);
+ if (!opinfo_obj) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_bin(priv->glusterd_txn_opinfo,
+ uuid_utoa (*txn_id), opinfo_obj,
+ sizeof(glusterd_txn_opinfo_obj));
+ if (ret) {
+ gf_msg_callingfn (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to set opinfo for transaction"
+ " ID : %s", uuid_utoa (*txn_id));
+ goto out;
+ }
+ }
+
+ opinfo_obj->opinfo = (*opinfo);
+
+ gf_msg_debug (this->name, 0,
+ "Successfully set opinfo for transaction ID : %s",
+ uuid_utoa (*txn_id));
+ ret = 0;
+out:
+ if (ret)
+ if (opinfo_obj)
+ GF_FREE (opinfo_obj);
+
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_clear_txn_opinfo (uuid_t *txn_id)
+{
+ int32_t ret = -1;
+ glusterd_op_info_t txn_op_info = {{0},};
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ if (!txn_id) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_ID_GET_FAIL,
+ "Empty transaction id received.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_get_txn_opinfo (txn_id, &txn_op_info);
+ if (ret) {
+ gf_msg_callingfn (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_OPINFO_GET_FAIL,
+ "Unable to get transaction opinfo "
+ "for transaction ID : %s",
+ uuid_utoa (*txn_id));
+ goto out;
+ }
+
+ if (txn_op_info.op_ctx)
+ dict_unref (txn_op_info.op_ctx);
+
+ dict_del(priv->glusterd_txn_opinfo, uuid_utoa (*txn_id));
+
+ gf_msg_debug (this->name, 0,
+ "Successfully cleared opinfo for transaction ID : %s",
+ uuid_utoa (*txn_id));
+
+ ret = 0;
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+static int glusterfs_port = GLUSTERD_DEFAULT_PORT;
+static char *glusterd_op_sm_state_names[] = {
+ "Default",
+ "Lock sent",
+ "Locked",
+ "Stage op sent",
+ "Staged",
+ "Commit op sent",
+ "Committed",
+ "Unlock sent",
+ "Stage op failed",
+ "Commit op failed",
+ "Brick op sent",
+ "Brick op failed",
+ "Brick op Committed",
+ "Brick op Commit failed",
+ "Ack drain",
+ "Invalid",
+};
+
+static char *glusterd_op_sm_event_names[] = {
+ "GD_OP_EVENT_NONE",
+ "GD_OP_EVENT_START_LOCK",
+ "GD_OP_EVENT_LOCK",
+ "GD_OP_EVENT_RCVD_ACC",
+ "GD_OP_EVENT_ALL_ACC",
+ "GD_OP_EVENT_STAGE_ACC",
+ "GD_OP_EVENT_COMMIT_ACC",
+ "GD_OP_EVENT_RCVD_RJT",
+ "GD_OP_EVENT_STAGE_OP",
+ "GD_OP_EVENT_COMMIT_OP",
+ "GD_OP_EVENT_UNLOCK",
+ "GD_OP_EVENT_START_UNLOCK",
+ "GD_OP_EVENT_ALL_ACK",
+ "GD_OP_EVENT_LOCAL_UNLOCK_NO_RESP",
+ "GD_OP_EVENT_INVALID"
+};
+
+char*
+glusterd_op_sm_state_name_get (int state)
+{
+ if (state < 0 || state >= GD_OP_STATE_MAX)
+ return glusterd_op_sm_state_names[GD_OP_STATE_MAX];
+ return glusterd_op_sm_state_names[state];
+}
+
+char*
+glusterd_op_sm_event_name_get (int event)
+{
+ if (event < 0 || event >= GD_OP_EVENT_MAX)
+ return glusterd_op_sm_event_names[GD_OP_EVENT_MAX];
+ return glusterd_op_sm_event_names[event];
+}
+
+void
+glusterd_destroy_lock_ctx (glusterd_op_lock_ctx_t *ctx)
+{
+ if (!ctx)
+ return;
+ GF_FREE (ctx);
+}
+
+void
+glusterd_set_volume_status (glusterd_volinfo_t *volinfo,
+ glusterd_volume_status status)
+{
+ GF_ASSERT (volinfo);
+ volinfo->status = status;
+}
+
+static int
+glusterd_op_sm_inject_all_acc (uuid_t *txn_id)
+{
+ int32_t ret = -1;
+ ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC, txn_id, NULL);
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_check_bitrot_cmd (char *key, char *value, char *errstr, size_t size)
+{
+ int ret = -1;
+
+ if ((!strncmp (key, "bitrot", strlen ("bitrot"))) ||
+ (!strncmp (key, "features.bitrot", strlen ("features.bitrot")))) {
+ snprintf (errstr, size, " 'gluster volume set <VOLNAME> %s' "
+ "is invalid command. Use 'gluster volume bitrot "
+ "<VOLNAME> {enable|disable}' instead.", key);
+ ret = -1;
+ goto out;
+ } else if ((!strncmp (key, "scrub-freq", strlen ("scrub-freq"))) ||
+ (!strncmp (key, "features.scrub-freq",
+ strlen ("features.scrub-freq")))) {
+ snprintf (errstr, size, " 'gluster volume "
+ "set <VOLNAME> %s' is invalid command. Use 'gluster "
+ "volume bitrot <VOLNAME> scrub-frequency"
+ " {hourly|daily|weekly|biweekly|monthly}' instead.",
+ key);
+ ret = -1;
+ goto out;
+ } else if ((!strncmp (key, "scrub", strlen ("scrub"))) ||
+ (!strncmp (key, "features.scrub",
+ strlen ("features.scrub")))) {
+ snprintf (errstr, size, " 'gluster volume set <VOLNAME> %s' is "
+ "invalid command. Use 'gluster volume bitrot "
+ "<VOLNAME> scrub {pause|resume}' instead.", key);
+ ret = -1;
+ goto out;
+ } else if ((!strncmp (key, "scrub-throttle",
+ strlen ("scrub-throttle"))) ||
+ (!strncmp (key, "features.scrub-throttle",
+ strlen ("features.scrub-throttle")))) {
+ snprintf (errstr, size, " 'gluster volume set <VOLNAME> %s' is "
+ "invalid command. Use 'gluster volume bitrot "
+ "<VOLNAME> scrub-throttle {lazy|normal|aggressive}' "
+ "instead.",
+ key);
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+glusterd_check_quota_cmd (char *key, char *value, char *errstr, size_t size)
+{
+ int ret = -1;
+ gf_boolean_t b = _gf_false;
+
+ if ((strcmp (key, "quota") == 0) ||
+ (strcmp (key, "features.quota") == 0)) {
+ ret = gf_string2boolean (value, &b);
+ if (ret)
+ goto out;
+ if (b) {
+ snprintf (errstr, size, " 'gluster "
+ "volume set <VOLNAME> %s %s' is "
+ "deprecated. Use 'gluster volume "
+ "quota <VOLNAME> enable' instead.",
+ key, value);
+ ret = -1;
+ goto out;
+ } else {
+ snprintf (errstr, size, " 'gluster "
+ "volume set <VOLNAME> %s %s' is "
+ "deprecated. Use 'gluster volume "
+ "quota <VOLNAME> disable' instead.",
+ key, value);
+ ret = -1;
+ goto out;
+ }
+ } else if ((strcmp (key, "inode-quota") == 0) ||
+ (strcmp (key, "features.inode-quota") == 0)) {
+ ret = gf_string2boolean (value, &b);
+ if (ret)
+ goto out;
+ if (b) {
+ snprintf (errstr, size, " 'gluster "
+ "volume set <VOLNAME> %s %s' is "
+ "deprecated. Use 'gluster volume "
+ "inode-quota <VOLNAME> enable' instead.",
+ key, value);
+ ret = -1;
+ goto out;
+ } else {
+ /* inode-quota disable not supported,
+ * use quota disable
+ */
+ snprintf (errstr, size, " 'gluster "
+ "volume set <VOLNAME> %s %s' is "
+ "deprecated. Use 'gluster volume "
+ "quota <VOLNAME> disable' instead.",
+ key, value);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickinfo,
+ gd1_mgmt_brick_op_req **req, dict_t *dict)
+{
+ int ret = -1;
+ gd1_mgmt_brick_op_req *brick_req = NULL;
+ char *volname = NULL;
+ char name[1024] = {0,};
+ gf_xl_afr_op_t heal_op = GF_SHD_OP_INVALID;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (op < GD_OP_MAX);
+ GF_ASSERT (op > GD_OP_NONE);
+ GF_ASSERT (req);
+
+
+ switch (op) {
+ case GD_OP_REMOVE_BRICK:
+ case GD_OP_STOP_VOLUME:
+ brick_req = GF_CALLOC (1, sizeof (*brick_req),
+ gf_gld_mt_mop_brick_req_t);
+ if (!brick_req)
+ goto out;
+ brick_req->op = GLUSTERD_BRICK_TERMINATE;
+ brick_req->name = "";
+ break;
+ case GD_OP_PROFILE_VOLUME:
+ brick_req = GF_CALLOC (1, sizeof (*brick_req),
+ gf_gld_mt_mop_brick_req_t);
+
+ if (!brick_req)
+ goto out;
+
+ brick_req->op = GLUSTERD_BRICK_XLATOR_INFO;
+ brick_req->name = brickinfo->path;
+
+ break;
+ case GD_OP_HEAL_VOLUME:
+ {
+ brick_req = GF_CALLOC (1, sizeof (*brick_req),
+ gf_gld_mt_mop_brick_req_t);
+ if (!brick_req)
+ goto out;
+
+ brick_req->op = GLUSTERD_BRICK_XLATOR_OP;
+ brick_req->name = "";
+ ret = dict_get_int32 (dict, "heal-op", (int32_t*)&heal_op);
+ if (ret)
+ goto out;
+ ret = dict_set_int32 (dict, "xl-op", heal_op);
+ }
+ break;
+ case GD_OP_STATUS_VOLUME:
+ {
+ brick_req = GF_CALLOC (1, sizeof (*brick_req),
+ gf_gld_mt_mop_brick_req_t);
+ if (!brick_req)
+ goto out;
+ brick_req->op = GLUSTERD_BRICK_STATUS;
+ brick_req->name = "";
+ }
+ break;
+ case GD_OP_REBALANCE:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ brick_req = GF_CALLOC (1, sizeof (*brick_req),
+ gf_gld_mt_mop_brick_req_t);
+ if (!brick_req)
+ goto out;
+
+ brick_req->op = GLUSTERD_BRICK_XLATOR_DEFRAG;
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER)
+ snprintf (name, 1024, "%s-tier-dht", volname);
+ else
+ snprintf (name, 1024, "%s-dht", volname);
+ brick_req->name = gf_strdup (name);
+
+ break;
+ case GD_OP_SNAP:
+ brick_req = GF_CALLOC (1, sizeof (*brick_req),
+ gf_gld_mt_mop_brick_req_t);
+ if (!brick_req)
+ goto out;
+
+ brick_req->op = GLUSTERD_BRICK_BARRIER;
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
+ brick_req->name = gf_strdup (volname);
+
+ break;
+ case GD_OP_BARRIER:
+ brick_req = GF_CALLOC (1, sizeof(*brick_req),
+ gf_gld_mt_mop_brick_req_t);
+ if (!brick_req)
+ goto out;
+ brick_req->op = GLUSTERD_BRICK_BARRIER;
+ ret = dict_get_str(dict, "volname", &volname);
+ if (ret)
+ goto out;
+ brick_req->name = gf_strdup (volname);
+ break;
+
+ default:
+ goto out;
+ break;
+ }
+
+ ret = dict_allocate_and_serialize (dict, &brick_req->input.input_val,
+ &brick_req->input.input_len);
+ if (ret)
+ goto out;
+ *req = brick_req;
+ ret = 0;
+
+out:
+ if (ret && brick_req)
+ GF_FREE (brick_req);
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_node_op_build_payload (glusterd_op_t op, gd1_mgmt_brick_op_req **req,
+ dict_t *dict)
+{
+ int ret = -1;
+ gd1_mgmt_brick_op_req *brick_req = NULL;
+ char xlname[1024] = {0,};
+ char *volname = NULL;
+
+ GF_ASSERT (op < GD_OP_MAX);
+ GF_ASSERT (op > GD_OP_NONE);
+ GF_ASSERT (req);
+
+ switch (op) {
+ case GD_OP_PROFILE_VOLUME:
+ brick_req = GF_CALLOC (1, sizeof (*brick_req),
+ gf_gld_mt_mop_brick_req_t);
+ if (!brick_req)
+ goto out;
+
+ brick_req->op = GLUSTERD_NODE_PROFILE;
+ brick_req->name = "";
+
+ break;
+
+ case GD_OP_STATUS_VOLUME:
+ brick_req = GF_CALLOC (1, sizeof (*brick_req),
+ gf_gld_mt_mop_brick_req_t);
+ if (!brick_req)
+ goto out;
+
+ brick_req->op = GLUSTERD_NODE_STATUS;
+ brick_req->name = "";
+
+ break;
+
+ case GD_OP_SCRUB_STATUS:
+ brick_req = GF_CALLOC (1, sizeof(*brick_req),
+ gf_gld_mt_mop_brick_req_t);
+ if (!brick_req)
+ goto out;
+
+ brick_req->op = GLUSTERD_NODE_BITROT;
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
+
+ brick_req->name = gf_strdup (volname);
+ break;
+ default:
+ goto out;
+ }
+
+ ret = dict_allocate_and_serialize (dict, &brick_req->input.input_val,
+ &brick_req->input.input_len);
+
+ if (ret)
+ goto out;
+
+ *req = brick_req;
+ ret = 0;
+
+out:
+ if (ret && brick_req)
+ GF_FREE (brick_req);
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_validate_quorum_options (xlator_t *this, char *fullkey, char *value,
+ char **op_errstr)
+{
+ int ret = 0;
+ char *key = NULL;
+ volume_option_t *opt = NULL;
+
+ if (!glusterd_is_quorum_option (fullkey))
+ goto out;
+ key = strchr (fullkey, '.');
+ if (key == NULL) {
+ ret = -1;
+ goto out;
+ }
+ key++;
+ opt = xlator_volume_option_get (this, key);
+ ret = xlator_option_validate (this, key, value, opt, op_errstr);
+out:
+ return ret;
+}
+
+static int
+glusterd_validate_shared_storage (char *key, char *value, char *errstr)
+{
+ int32_t ret = -1;
+ int32_t exists = -1;
+ int32_t count = -1;
+ char *op = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("glusterd", this, out);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
+
+ GF_VALIDATE_OR_GOTO (this->name, key, out);
+ GF_VALIDATE_OR_GOTO (this->name, value, out);
+ GF_VALIDATE_OR_GOTO (this->name, errstr, out);
+
+ ret = 0;
+
+ if (strcmp (key, GLUSTERD_SHARED_STORAGE_KEY)) {
+ goto out;
+ }
+
+ if ((strcmp (value, "enable")) &&
+ (strcmp (value, "disable"))) {
+ snprintf (errstr, PATH_MAX,
+ "Invalid option(%s). Valid options "
+ "are 'enable' and 'disable'", value);
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "%s", errstr);
+ ret = -1;
+ goto out;
+ }
+
+ if (!strncmp (value, "disable", strlen ("disable"))) {
+ ret = dict_get_str (conf->opts, GLUSTERD_SHARED_STORAGE_KEY,
+ &op);
+ if (ret || !strncmp (op, "disable", strlen ("disable"))) {
+ snprintf (errstr, PATH_MAX, "Shared storage volume "
+ "does not exist. Please enable shared storage"
+ " for creating shared storage volume.");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SHARED_STORAGE_DOES_NOT_EXIST, "%s",
+ errstr);
+ ret = -1;
+ goto out;
+ }
+ goto out;
+ }
+
+ exists = glusterd_check_volume_exists (GLUSTER_SHARED_STORAGE);
+ if (exists) {
+ snprintf (errstr, PATH_MAX,
+ "Shared storage volume("GLUSTER_SHARED_STORAGE
+ ") already exists.");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_ALREADY_EXIST, "%s", errstr);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_count_connected_peers (&count);
+ if (ret) {
+ snprintf (errstr, PATH_MAX,
+ "Failed to calculate number of connected peers.");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_PEER_COUNT_GET_FAIL, "%s", errstr);
+ goto out;
+ }
+
+ if (count <= 1) {
+ snprintf (errstr, PATH_MAX,
+ "More than one node should "
+ "be up/present in the cluster to enable this option");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_INSUFFICIENT_UP_NODES, "%s", errstr);
+ ret = -1;
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+static int
+glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr)
+{
+ int ret = -1;
+ char *volname = NULL;
+ int exists = 0;
+ char *key = NULL;
+ char *key_fixed = NULL;
+ char *value = NULL;
+ char *val_dup = NULL;
+ char str[100] = {0, };
+ char *trash_path = NULL;
+ int trash_path_len = 0;
+ int count = 0;
+ int dict_count = 0;
+ char errstr[PATH_MAX] = {0, };
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ dict_t *val_dict = NULL;
+ gf_boolean_t global_opt = _gf_false;
+ glusterd_volinfo_t *voliter = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ uint32_t new_op_version = GD_OP_VERSION_MIN;
+ uint32_t local_new_op_version = GD_OP_VERSION_MIN;
+ uint32_t local_new_client_op_version = GD_OP_VERSION_MIN;
+ uint32_t key_op_version = GD_OP_VERSION_MIN;
+ uint32_t local_key_op_version = GD_OP_VERSION_MIN;
+ gf_boolean_t origin_glusterd = _gf_true;
+ gf_boolean_t check_op_version = _gf_true;
+ gf_boolean_t trash_enabled = _gf_false;
+ gf_boolean_t all_vol = _gf_false;
+ struct stat stbuf = {0, };
+
+ GF_ASSERT (dict);
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ val_dict = dict_new();
+ if (!val_dict)
+ goto out;
+
+ /* Check if we can support the required op-version
+ * This check is not done on the originator glusterd. The originator
+ * glusterd sets this value.
+ */
+ origin_glusterd = is_origin_glusterd (dict);
+
+ if (!origin_glusterd) {
+ /* Check for v3.3.x origin glusterd */
+ check_op_version = dict_get_str_boolean (dict,
+ "check-op-version",
+ _gf_false);
+
+ if (check_op_version) {
+ ret = dict_get_uint32 (dict, "new-op-version",
+ &new_op_version);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get new_op_version");
+ goto out;
+ }
+
+ if ((new_op_version > GD_OP_VERSION_MAX) ||
+ (new_op_version < GD_OP_VERSION_MIN)) {
+ ret = -1;
+ snprintf (errstr, sizeof (errstr),
+ "Required op_version (%d) is not "
+ "supported", new_op_version);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_UNSUPPORTED_VERSION, "%s",
+ errstr);
+ goto out;
+ }
+ }
+ }
+
+ ret = dict_get_int32 (dict, "count", &dict_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Count(dict),not set in Volume-Set");
+ goto out;
+ }
+
+ if (dict_count == 0) {
+ /*No options would be specified of volume set help */
+ if (dict_get (dict, "help" )) {
+ ret = 0;
+ goto out;
+ }
+
+ if (dict_get (dict, "help-xml" )) {
+#if (HAVE_LIB_XML)
+ ret = 0;
+ goto out;
+#else
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MODULE_NOT_INSTALLED,
+ "libxml not present in the system");
+ *op_errstr = gf_strdup ("Error: xml libraries not "
+ "present to produce xml-output");
+ goto out;
+#endif
+ }
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_NO_OPTIONS_GIVEN, "No options received ");
+ *op_errstr = gf_strdup ("Options not specified");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to get volume name");
+ goto out;
+ }
+
+ if (strcasecmp (volname, "all") != 0) {
+ exists = glusterd_check_volume_exists (volname);
+ if (!exists) {
+ snprintf (errstr, sizeof (errstr),
+ FMTSTR_CHECK_VOL_EXISTS, volname);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_FOUND, "%s", errstr);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_FOUND,
+ FMTSTR_CHECK_VOL_EXISTS, volname);
+ goto out;
+ }
+
+ ret = glusterd_validate_volume_id (dict, volinfo);
+ if (ret)
+ goto out;
+
+ local_new_op_version = volinfo->op_version;
+ local_new_client_op_version = volinfo->client_op_version;
+
+ } else {
+ all_vol = _gf_true;
+ }
+
+ for ( count = 1; ret != 1 ; count++ ) {
+ global_opt = _gf_false;
+ sprintf (str, "key%d", count);
+ ret = dict_get_str (dict, str, &key);
+ if (ret)
+ break;
+
+ sprintf (str, "value%d", count);
+ ret = dict_get_str (dict, str, &value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "invalid key,value pair in 'volume set'");
+ ret = -1;
+ goto out;
+ }
+
+ if (strcmp (key, "config.memory-accounting") == 0) {
+ gf_msg_debug (this->name, 0,
+ "enabling memory accounting for volume %s",
+ volname);
+ ret = 0;
+ }
+
+ if (strcmp (key, "config.transport") == 0) {
+ gf_msg_debug (this->name, 0,
+ "changing transport-type for volume %s",
+ volname);
+ ret = 0;
+ /* if value is none of 'tcp/rdma/tcp,rdma' error out */
+ if (!((strcasecmp (value, "rdma") == 0) ||
+ (strcasecmp (value, "tcp") == 0) ||
+ (strcasecmp (value, "tcp,rdma") == 0) ||
+ (strcasecmp (value, "rdma,tcp") == 0))) {
+ ret = snprintf (errstr, sizeof (errstr),
+ "transport-type %s does "
+ "not exist", value);
+ /* lets not bother about above return value,
+ its a failure anyways */
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = glusterd_check_bitrot_cmd (key, value, errstr,
+ sizeof (errstr));
+ if (ret)
+ goto out;
+
+ ret = glusterd_check_quota_cmd (key, value, errstr, sizeof (errstr));
+ if (ret)
+ goto out;
+
+ if (is_key_glusterd_hooks_friendly (key))
+ continue;
+
+ ret = glusterd_volopt_validate (volinfo, dict, key, value,
+ op_errstr);
+ if (ret)
+ goto out;
+
+ exists = glusterd_check_option_exists (key, &key_fixed);
+ if (exists == -1) {
+ ret = -1;
+ goto out;
+ }
+
+ if (!exists) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_INVALID_ENTRY,
+ "Option with name: %s does not exist", key);
+ ret = snprintf (errstr, sizeof (errstr),
+ "option : %s does not exist",
+ key);
+ if (key_fixed)
+ snprintf (errstr + ret, sizeof (errstr) - ret,
+ "\nDid you mean %s?", key_fixed);
+ ret = -1;
+ goto out;
+ }
+
+ if (key_fixed)
+ key = key_fixed;
+
+ /* Check if the key is cluster.op-version and set
+ * local_new_op_version to the value given if possible.
+ */
+ if (strcmp (key, "cluster.op-version") == 0) {
+ if (!all_vol) {
+ ret = -1;
+ snprintf (errstr, sizeof (errstr), "Option \""
+ "%s\" is not valid for a single "
+ "volume", key);
+ goto out;
+ }
+ /* Check if cluster.op-version is the only option being
+ * set
+ */
+ if (count != 1) {
+ ret = -1;
+ snprintf (errstr, sizeof (errstr), "Option \""
+ "%s\" cannot be set along with other "
+ "options", key);
+ goto out;
+ }
+ /* Just reusing the variable, but I'm using it for
+ * storing the op-version from value
+ */
+ ret = gf_string2uint (value, &local_key_op_version);
+ if (ret) {
+ snprintf (errstr, sizeof (errstr), "invalid "
+ "number format \"%s\" in option "
+ "\"%s\"", value, key);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_INVALID_ENTRY, "%s", errstr);
+ goto out;
+ }
+
+ if (local_key_op_version > GD_OP_VERSION_MAX ||
+ local_key_op_version < GD_OP_VERSION_MIN) {
+ ret = -1;
+ snprintf (errstr, sizeof (errstr),
+ "Required op_version (%d) is not "
+ "supported", local_key_op_version);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VERSION_UNSUPPORTED,
+ "%s", errstr);
+ goto out;
+ }
+ if (local_key_op_version > priv->op_version) {
+ local_new_op_version = local_key_op_version;
+ } else {
+ ret = -1;
+ snprintf (errstr, sizeof (errstr),
+ "Required op-version (%d) should"
+ " not be equal or lower than current"
+ " cluster op-version (%d).",
+ local_key_op_version,
+ priv->op_version);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VERSION_UNSUPPORTED,
+ "%s", errstr);
+ goto out;
+ }
+
+ goto cont;
+ }
+
+ ALL_VOLUME_OPTION_CHECK (volname, key, ret, op_errstr, out);
+ ret = glusterd_validate_quorum_options (this, key, value,
+ op_errstr);
+ if (ret)
+ goto out;
+
+ local_key_op_version = glusterd_get_op_version_for_key (key);
+ if (local_key_op_version > local_new_op_version)
+ local_new_op_version = local_key_op_version;
+ if (gd_is_client_option (key) &&
+ (local_key_op_version > local_new_client_op_version))
+ local_new_client_op_version = local_key_op_version;
+
+ sprintf (str, "op-version%d", count);
+ if (origin_glusterd) {
+ ret = dict_set_uint32 (dict, str, local_key_op_version);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set key-op-version in dict");
+ goto out;
+ }
+ } else if (check_op_version) {
+ ret = dict_get_uint32 (dict, str, &key_op_version);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get key-op-version from"
+ " dict");
+ goto out;
+ }
+ if (local_key_op_version != key_op_version) {
+ ret = -1;
+ snprintf (errstr, sizeof (errstr),
+ "option: %s op-version mismatch",
+ key);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_OP_VERSION_MISMATCH,
+ "%s, required op-version = %"PRIu32", "
+ "available op-version = %"PRIu32,
+ errstr, key_op_version,
+ local_key_op_version);
+ goto out;
+ }
+ }
+
+ if (glusterd_check_globaloption (key))
+ global_opt = _gf_true;
+
+ if (volinfo) {
+ ret = glusterd_volinfo_get (volinfo,
+ VKEY_FEATURES_TRASH, &val_dup);
+ if (val_dup) {
+ ret = gf_string2boolean (val_dup,
+ &trash_enabled);
+ if (ret)
+ goto out;
+ }
+ }
+
+ ret = glusterd_validate_shared_storage (key, value, errstr);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SHARED_STRG_VOL_OPT_VALIDATE_FAIL,
+ "Failed to validate shared "
+ "storage volume options");
+ goto out;
+ }
+
+ if (!strcmp(key, "features.trash-dir") && trash_enabled) {
+ if (strchr (value, '/')) {
+ snprintf (errstr, sizeof (errstr),
+ "Path is not allowed as option");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_SET_FAIL,
+ "Unable to set the options in 'volume "
+ "set': %s", errstr);
+ ret = -1;
+ goto out;
+ }
+
+ list_for_each_entry (brickinfo, &volinfo->bricks,
+ brick_list) {
+ /* Check for local brick */
+ if (!gf_uuid_compare (brickinfo->uuid, MY_UUID)) {
+ trash_path_len = strlen (value) +
+ strlen (brickinfo->path) + 2;
+ trash_path = GF_CALLOC (1,
+ trash_path_len,
+ gf_common_mt_char);
+ snprintf (trash_path, trash_path_len,
+ "%s/%s", brickinfo->path,
+ value);
+
+ /* Checks whether a directory with
+ given option exists or not */
+ if (!sys_stat (trash_path, &stbuf)) {
+ snprintf (errstr,
+ sizeof (errstr),
+ "Path %s exists",
+ value);
+ gf_msg (this->name,
+ GF_LOG_ERROR,
+ 0, GD_MSG_VOL_SET_FAIL,
+ "Unable to set the "
+ "options in "
+ "'volume set': %s",
+ errstr);
+ ret = -1;
+ goto out;
+ } else {
+ gf_msg_debug (this->name, 0,
+ "Directory with given "
+ "name does not exists,"
+ " continuing");
+ }
+
+ if (volinfo->status == GLUSTERD_STATUS_STARTED
+ && brickinfo->status != GF_BRICK_STARTED) {
+ /* If volume is in started state , checks
+ whether bricks are online */
+ snprintf (errstr, sizeof (errstr),
+ "One or more bricks are down");
+ gf_msg (this->name,
+ GF_LOG_ERROR, 0,
+ GD_MSG_VOL_SET_FAIL,
+ "Unable to set the "
+ "options in "
+ "'volume set': %s",
+ errstr);
+ ret = -1;
+ goto out;
+ }
+ }
+ if (trash_path) {
+ GF_FREE (trash_path);
+ trash_path = NULL;
+ trash_path_len = 0;
+ }
+ }
+ } else if (!strcmp(key, "features.trash-dir") && !trash_enabled) {
+ snprintf (errstr, sizeof (errstr),
+ "Trash translator is not enabled. Use "
+ "volume set %s trash on", volname);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_SET_FAIL,
+ "Unable to set the options in 'volume "
+ "set': %s", errstr);
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_str (val_dict, key, value);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to set the options in 'volume set'");
+ ret = -1;
+ goto out;
+ }
+
+ *op_errstr = NULL;
+ if (!global_opt && !all_vol)
+ ret = glusterd_validate_reconfopts (volinfo, val_dict, op_errstr);
+ else if (!all_vol) {
+ voliter = NULL;
+ cds_list_for_each_entry (voliter, &priv->volumes,
+ vol_list) {
+ ret = glusterd_validate_globalopts (voliter,
+ val_dict,
+ op_errstr);
+ if (ret)
+ break;
+ }
+ }
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLFILE_CREATE_FAIL,
+ "Could not create "
+ "temp volfile, some option failed: %s",
+ *op_errstr);
+ goto out;
+ }
+ dict_del (val_dict, key);
+
+ if (key_fixed) {
+ GF_FREE (key_fixed);
+ key_fixed = NULL;
+ }
+ }
+
+ /* Check if all the connected clients support the new client-op-version
+ */
+ ret = glusterd_check_client_op_version_support
+ (volname, local_new_client_op_version, op_errstr);
+ if (ret)
+ goto out;
+
+cont:
+ if (origin_glusterd) {
+ ret = dict_set_uint32 (dict, "new-op-version",
+ local_new_op_version);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set new-op-version in dict");
+ goto out;
+ }
+ /* Set this value in dict so other peers know to check for
+ * op-version. This is a hack for 3.3.x compatibility
+ *
+ * TODO: Remove this and the other places this is referred once
+ * 3.3.x compatibility is not required
+ */
+ ret = dict_set_uint32 (dict, "check-op-version",
+ _gf_true);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set check-op-version in dict");
+ goto out;
+ }
+ }
+
+ ret = 0;
+
+out:
+ if (val_dict)
+ dict_unref (val_dict);
+
+ if (trash_path)
+ GF_FREE (trash_path);
+
+ GF_FREE (key_fixed);
+ if (errstr[0] != '\0')
+ *op_errstr = gf_strdup (errstr);
+
+ if (ret) {
+ if (!(*op_errstr)) {
+ *op_errstr = gf_strdup ("Error, Validation Failed");
+ gf_msg_debug (this->name, 0,
+ "Error, Cannot Validate option :%s",
+ *op_errstr);
+ } else {
+ gf_msg_debug (this->name, 0,
+ "Error, Cannot Validate option");
+ }
+ }
+ return ret;
+}
+
+static int
+glusterd_op_stage_reset_volume (dict_t *dict, char **op_errstr)
+{
+ int ret = 0;
+ char *volname = NULL;
+ int exists = 0;
+ char msg[2048] = {0};
+ char *key = NULL;
+ char *key_fixed = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = dict_get_str (dict, "volname", &volname);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get volume name");
+ goto out;
+ }
+
+ if (strcasecmp (volname, "all") != 0) {
+ exists = glusterd_check_volume_exists (volname);
+ if (!exists) {
+ snprintf (msg, sizeof (msg), FMTSTR_CHECK_VOL_EXISTS,
+ volname);
+ ret = -1;
+ goto out;
+ }
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (msg, sizeof (msg), FMTSTR_CHECK_VOL_EXISTS,
+ volname);
+ goto out;
+ }
+
+ ret = glusterd_validate_volume_id (dict, volinfo);
+ if (ret)
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "key", &key);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get option key");
+ goto out;
+ }
+ if (strcmp(key, "all")) {
+ exists = glusterd_check_option_exists (key, &key_fixed);
+ if (exists == -1) {
+ ret = -1;
+ goto out;
+ }
+
+ if (!exists) {
+ ret = snprintf (msg, sizeof (msg),
+ "Option %s does not exist", key);
+ if (key_fixed)
+ snprintf (msg + ret, sizeof (msg) - ret,
+ "\nDid you mean %s?", key_fixed);
+ ret = -1;
+ goto out;
+ } else if (exists > 0) {
+ if (key_fixed)
+ key = key_fixed;
+
+ /* 'gluster volume set/reset <VOLNAME>
+ * features.quota/features.inode-quota' should
+ * not be allowed as it is deprecated.
+ * Setting and resetting quota/inode-quota features
+ * should be allowed only through 'gluster volume quota
+ * <VOLNAME> enable/disable'.
+ * But, 'gluster volume set features.quota-deem-statfs'
+ * can be turned on/off when quota is enabled.
+ */
+
+ if (strcmp (VKEY_FEATURES_INODE_QUOTA, key) == 0 ||
+ strcmp (VKEY_FEATURES_QUOTA, key) == 0) {
+ snprintf (msg, sizeof (msg), "'gluster volume "
+ "reset <VOLNAME> %s' is deprecated. "
+ "Use 'gluster volume quota <VOLNAME> "
+ "disable' instead.", key);
+ ret = -1;
+ goto out;
+ }
+ ALL_VOLUME_OPTION_CHECK (volname, key, ret,
+ op_errstr, out);
+ }
+ }
+
+out:
+ GF_FREE (key_fixed);
+
+ if (msg[0] != '\0') {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_OP_STAGE_RESET_VOL_FAIL, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ }
+
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+
+
+static int
+glusterd_op_stage_sync_volume (dict_t *dict, char **op_errstr)
+{
+ int ret = -1;
+ char *volname = NULL;
+ char *hostname = NULL;
+ gf_boolean_t exists = _gf_false;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ char msg[2048] = {0,};
+ glusterd_volinfo_t *volinfo = NULL;
+
+ ret = dict_get_str (dict, "hostname", &hostname);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "hostname couldn't be "
+ "retrieved from msg");
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ if (gf_is_local_addr (hostname)) {
+ //volname is not present in case of sync all
+ ret = dict_get_str (dict, "volname", &volname);
+ if (!ret) {
+ exists = glusterd_check_volume_exists (volname);
+ if (!exists) {
+ snprintf (msg, sizeof (msg), "Volume %s "
+ "does not exist", volname);
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+ goto out;
+ }
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret)
+ goto out;
+
+ } else {
+ ret = 0;
+ }
+ } else {
+ rcu_read_lock ();
+
+ peerinfo = glusterd_peerinfo_find (NULL, hostname);
+ if (peerinfo == NULL) {
+ ret = -1;
+ snprintf (msg, sizeof (msg), "%s, is not a friend",
+ hostname);
+ *op_errstr = gf_strdup (msg);
+
+ } else if (!peerinfo->connected) {
+ snprintf (msg, sizeof (msg), "%s, is not connected at "
+ "the moment", hostname);
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+ }
+
+ rcu_read_unlock ();
+ }
+
+out:
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_op_stage_status_volume (dict_t *dict, char **op_errstr)
+{
+ int ret = -1;
+ uint32_t cmd = 0;
+ char msg[2048] = {0,};
+ char *volname = NULL;
+ char *brick = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ dict_t *vol_opts = NULL;
+ gf_boolean_t nfs_disabled = _gf_false;
+ gf_boolean_t shd_enabled = _gf_false;
+
+ GF_ASSERT (dict);
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = dict_get_uint32 (dict, "cmd", &cmd);
+ if (ret)
+ goto out;
+
+ if (cmd & GF_CLI_STATUS_ALL)
+ goto out;
+
+ if ((cmd & GF_CLI_STATUS_QUOTAD) &&
+ (priv->op_version == GD_OP_VERSION_MIN)) {
+ snprintf (msg, sizeof (msg), "The cluster is operating at "
+ "version 1. Getting the status of quotad is not "
+ "allowed in this state.");
+ ret = -1;
+ goto out;
+ }
+
+ if ((cmd & GF_CLI_STATUS_SNAPD) &&
+ (priv->op_version < GD_OP_VERSION_3_6_0)) {
+ snprintf (msg, sizeof (msg), "The cluster is operating at "
+ "version less than %d. Getting the "
+ "status of snapd is not allowed in this state.",
+ GD_OP_VERSION_3_6_0);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (msg, sizeof(msg), FMTSTR_CHECK_VOL_EXISTS, volname);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_validate_volume_id (dict, volinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_is_volume_started (volinfo);
+ if (!ret) {
+ snprintf (msg, sizeof (msg), "Volume %s is not started",
+ volname);
+ ret = -1;
+ goto out;
+ }
+
+ vol_opts = volinfo->dict;
+
+ if ((cmd & GF_CLI_STATUS_NFS) != 0) {
+ nfs_disabled = dict_get_str_boolean (vol_opts,
+ NFS_DISABLE_MAP_KEY,
+ _gf_false);
+ if (nfs_disabled) {
+ ret = -1;
+ snprintf (msg, sizeof (msg),
+ "NFS server is disabled for volume %s",
+ volname);
+ goto out;
+ }
+ } else if ((cmd & GF_CLI_STATUS_SHD) != 0) {
+ if (glusterd_is_shd_compatible_volume (volinfo)) {
+ shd_enabled = gd_is_self_heal_enabled (volinfo,
+ vol_opts);
+ } else {
+ ret = -1;
+ snprintf (msg, sizeof (msg),
+ "Volume %s is not Self-heal compatible",
+ volname);
+ goto out;
+ }
+ if (!shd_enabled) {
+ ret = -1;
+ snprintf (msg, sizeof (msg),
+ "Self-heal Daemon is disabled for volume %s",
+ volname);
+ goto out;
+ }
+ } else if ((cmd & GF_CLI_STATUS_QUOTAD) != 0) {
+ if (!glusterd_is_volume_quota_enabled (volinfo)) {
+ ret = -1;
+ snprintf (msg, sizeof (msg), "Volume %s does not have "
+ "quota enabled", volname);
+ goto out;
+ }
+ } else if ((cmd & GF_CLI_STATUS_BITD) != 0) {
+ if (!glusterd_is_bitrot_enabled (volinfo)) {
+ ret = -1;
+ snprintf (msg, sizeof (msg), "Volume %s does not have "
+ "bitrot enabled", volname);
+ goto out;
+ }
+ } else if ((cmd & GF_CLI_STATUS_SCRUB) != 0) {
+ if (!glusterd_is_bitrot_enabled (volinfo)) {
+ ret = -1;
+ snprintf (msg, sizeof (msg), "Volume %s does not have "
+ "bitrot enabled. Scrubber will be enabled "
+ "automatically if bitrot is enabled",
+ volname);
+ goto out;
+ }
+ } else if ((cmd & GF_CLI_STATUS_SNAPD) != 0) {
+ if (!glusterd_is_snapd_enabled (volinfo)) {
+ ret = -1;
+ snprintf (msg, sizeof (msg), "Volume %s does not have "
+ "uss enabled", volname);
+ goto out;
+ }
+ } else if ((cmd & GF_CLI_STATUS_BRICK) != 0) {
+ ret = dict_get_str (dict, "brick", &brick);
+ if (ret)
+ goto out;
+
+ ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo,
+ &brickinfo,
+ _gf_false);
+ if (ret) {
+ snprintf (msg, sizeof(msg), "No brick %s in"
+ " volume %s", brick, volname);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = 0;
+
+ out:
+ if (ret) {
+ if (msg[0] != '\0')
+ *op_errstr = gf_strdup (msg);
+ else
+ *op_errstr = gf_strdup ("Validation Failed for Status");
+ }
+
+ gf_msg_debug (this->name, 0, "Returning: %d", ret);
+ return ret;
+}
+
+
+static gf_boolean_t
+glusterd_is_profile_on (glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ gf_boolean_t is_latency_on = _gf_false;
+ gf_boolean_t is_fd_stats_on = _gf_false;
+
+ GF_ASSERT (volinfo);
+
+ ret = glusterd_volinfo_get_boolean (volinfo, VKEY_DIAG_CNT_FOP_HITS);
+ if (ret != -1)
+ is_fd_stats_on = ret;
+ ret = glusterd_volinfo_get_boolean (volinfo, VKEY_DIAG_LAT_MEASUREMENT);
+ if (ret != -1)
+ is_latency_on = ret;
+ if ((_gf_true == is_latency_on) &&
+ (_gf_true == is_fd_stats_on))
+ return _gf_true;
+ return _gf_false;
+}
+
+static int
+glusterd_op_stage_stats_volume (dict_t *dict, char **op_errstr)
+{
+ int ret = -1;
+ char *volname = NULL;
+ gf_boolean_t exists = _gf_false;
+ char msg[2048] = {0,};
+ int32_t stats_op = GF_CLI_STATS_NONE;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Volume name get failed");
+ goto out;
+ }
+
+ exists = glusterd_check_volume_exists (volname);
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if ((!exists) || (ret < 0)) {
+ snprintf (msg, sizeof (msg), "Volume %s, "
+ "doesn't exist", volname);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_validate_volume_id (dict, volinfo);
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32 (dict, "op", &stats_op);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Volume profile op get failed");
+ goto out;
+ }
+
+ if (GF_CLI_STATS_START == stats_op) {
+ if (_gf_true == glusterd_is_profile_on (volinfo)) {
+ snprintf (msg, sizeof (msg), "Profile on Volume %s is"
+ " already started", volinfo->volname);
+ ret = -1;
+ goto out;
+ }
+
+ }
+ if ((GF_CLI_STATS_STOP == stats_op) ||
+ (GF_CLI_STATS_INFO == stats_op)) {
+ if (_gf_false == glusterd_is_profile_on (volinfo)) {
+ snprintf (msg, sizeof (msg), "Profile on Volume %s is"
+ " not started", volinfo->volname);
+ ret = -1;
+
+ goto out;
+ }
+ }
+ if ((GF_CLI_STATS_TOP == stats_op) ||
+ (GF_CLI_STATS_INFO == stats_op)) {
+ if (_gf_false == glusterd_is_volume_started (volinfo)) {
+ snprintf (msg, sizeof (msg), "Volume %s is not started.",
+ volinfo->volname);
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_STARTED, "%s", msg);
+ ret = -1;
+ goto out;
+ }
+ }
+ ret = 0;
+out:
+ if (msg[0] != '\0') {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_OP_STAGE_STATS_VOL_FAIL, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ }
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+
+static int
+_delete_reconfig_opt (dict_t *this, char *key, data_t *value, void *data)
+{
+ int32_t *is_force = 0;
+
+ GF_ASSERT (data);
+ is_force = (int32_t*)data;
+
+ /* Keys which has the flag OPT_FLAG_NEVER_RESET
+ * should not be deleted
+ */
+
+ if (_gf_true == glusterd_check_voloption_flags (key,
+ OPT_FLAG_NEVER_RESET)) {
+ if (*is_force != 1)
+ *is_force = *is_force | GD_OP_PROTECTED;
+ goto out;
+ }
+
+ if (*is_force != 1) {
+ if (_gf_true == glusterd_check_voloption_flags (key,
+ OPT_FLAG_FORCE)) {
+ /* indicate to caller that we don't set the option
+ * due to being protected
+ */
+ *is_force = *is_force | GD_OP_PROTECTED;
+ goto out;
+ } else {
+ *is_force = *is_force | GD_OP_UNPROTECTED;
+ }
+ }
+
+ gf_msg_debug ("glusterd", 0, "deleting dict with key=%s,value=%s",
+ key, value->data);
+ dict_del (this, key);
+ /**Delete scrubber (pause/resume) option from the dictionary if bitrot
+ * option is going to be reset
+ * */
+ if (!strncmp (key, VKEY_FEATURES_BITROT,
+ strlen (VKEY_FEATURES_BITROT))) {
+ dict_del (this, VKEY_FEATURES_SCRUB);
+ }
+out:
+ return 0;
+}
+
+static int
+_delete_reconfig_global_opt (dict_t *this, char *key, data_t *value, void *data)
+{
+ int32_t *is_force = 0;
+
+ GF_ASSERT (data);
+ is_force = (int32_t*)data;
+
+ if (strcmp (GLUSTERD_GLOBAL_OPT_VERSION, key) == 0)
+ goto out;
+
+ _delete_reconfig_opt (this, key, value, data);
+out:
+ return 0;
+}
+
+static int
+glusterd_options_reset (glusterd_volinfo_t *volinfo, char *key,
+ int32_t *is_force)
+{
+ int ret = 0;
+ data_t *value = NULL;
+ char *key_fixed = NULL;
+ xlator_t *this = NULL;
+ glusterd_svc_t *svc = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (volinfo->dict);
+ GF_ASSERT (key);
+
+ if (!strncmp(key, "all", 3)) {
+ dict_foreach (volinfo->dict, _delete_reconfig_opt, is_force);
+ ret = glusterd_enable_default_options (volinfo, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_FAIL_DEFAULT_OPT_SET, "Failed to set "
+ "default options on reset for volume %s",
+ volinfo->volname);
+ goto out;
+ }
+ } else {
+ value = dict_get (volinfo->dict, key);
+ if (!value) {
+ gf_msg_debug (this->name, 0,
+ "no value set for option %s", key);
+ goto out;
+ }
+ _delete_reconfig_opt (volinfo->dict, key, value, is_force);
+ ret = glusterd_enable_default_options (volinfo, key);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_FAIL_DEFAULT_OPT_SET, "Failed to set "
+ "default value for option '%s' on reset for "
+ "volume %s", key, volinfo->volname);
+ goto out;
+ }
+ }
+
+ gd_update_volume_op_versions (volinfo);
+ if (!volinfo->is_snap_volume) {
+ svc = &(volinfo->snapd.svc);
+ ret = svc->manager (svc, volinfo, PROC_START_NO_WAIT);
+ if (ret)
+ goto out;
+ }
+
+ ret = glusterd_create_volfiles_and_notify_services (volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLFILE_CREATE_FAIL,
+ "Unable to create volfile for"
+ " 'volume reset'");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret)
+ goto out;
+
+ if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+ ret = glusterd_svcs_reconfigure ();
+ if (ret)
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ GF_FREE (key_fixed);
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_op_reset_all_volume_options (xlator_t *this, dict_t *dict)
+{
+ char *key = NULL;
+ char *key_fixed = NULL;
+ int ret = -1;
+ int32_t is_force = 0;
+ glusterd_conf_t *conf = NULL;
+ dict_t *dup_opt = NULL;
+ gf_boolean_t all = _gf_false;
+ char *next_version = NULL;
+ gf_boolean_t quorum_action = _gf_false;
+ gf_boolean_t option = _gf_false;
+ char *op_errstr = NULL;
+
+ conf = this->private;
+ ret = dict_get_str (dict, "key", &key);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to get key");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "force", &is_force);
+ if (ret)
+ is_force = 0;
+
+ if (strcmp (key, "all")) {
+ ret = glusterd_check_option_exists (key, &key_fixed);
+ if (ret <= 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_INVALID_ENTRY, "Option %s does not "
+ "exist", key);
+ ret = -1;
+ goto out;
+ }
+ } else {
+ all = _gf_true;
+ }
+
+ if (key_fixed)
+ key = key_fixed;
+ option = dict_get_str_boolean (conf->opts, GLUSTERD_STORE_KEY_GANESHA_GLOBAL,
+ _gf_false);
+ if (option) {
+ ret = tear_down_cluster();
+ if (ret == -1)
+ gf_msg (THIS->name, GF_LOG_WARNING, errno,
+ GD_MSG_DICT_GET_FAILED,
+ "Could not tear down NFS-Ganesha cluster");
+ ret = stop_ganesha (&op_errstr);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_WARNING, 0,
+ GD_MSG_NFS_GNS_STOP_FAIL,
+ "Could not stop NFS-Ganesha service");
+ }
+
+ ret = -1;
+ dup_opt = dict_new ();
+ if (!dup_opt)
+ goto out;
+ if (!all) {
+ dict_copy (conf->opts, dup_opt);
+ dict_del (dup_opt, key);
+ }
+ ret = glusterd_get_next_global_opt_version_str (conf->opts,
+ &next_version);
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (dup_opt, GLUSTERD_GLOBAL_OPT_VERSION, next_version);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_options (this, dup_opt);
+ if (ret)
+ goto out;
+
+ if (glusterd_is_quorum_changed (conf->opts, key, NULL))
+ quorum_action = _gf_true;
+
+ ret = dict_set_dynstr (conf->opts, GLUSTERD_GLOBAL_OPT_VERSION,
+ next_version);
+ if (ret)
+ goto out;
+ else
+ next_version = NULL;
+
+ if (!all) {
+ dict_del (conf->opts, key);
+ } else {
+ dict_foreach (conf->opts, _delete_reconfig_global_opt,
+ &is_force);
+ }
+out:
+ GF_FREE (key_fixed);
+ if (dup_opt)
+ dict_unref (dup_opt);
+
+ gf_msg_debug (this->name, 0, "returning %d", ret);
+ if (quorum_action)
+ glusterd_do_quorum_action ();
+ GF_FREE (next_version);
+ return ret;
+}
+
+static int
+glusterd_op_reset_volume (dict_t *dict, char **op_rspstr)
+{
+ glusterd_volinfo_t *volinfo = NULL;
+ int ret = -1;
+ char *volname = NULL;
+ char *key = NULL;
+ char *key_fixed = NULL;
+ int32_t is_force = 0;
+ gf_boolean_t quorum_action = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get volume name");
+ goto out;
+ }
+
+ if (strcasecmp (volname, "all") == 0) {
+ ret = glusterd_op_reset_all_volume_options (this, dict);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "force", &is_force);
+ if (ret)
+ is_force = 0;
+
+ ret = dict_get_str (dict, "key", &key);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get option key");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_FOUND, FMTSTR_CHECK_VOL_EXISTS,
+ volname);
+ goto out;
+ }
+
+ if (strcmp (key, "all") &&
+ glusterd_check_option_exists (key, &key_fixed) != 1) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_INVALID_ENTRY,
+ "volinfo dict inconsistency: option %s not found",
+ key);
+ ret = -1;
+ goto out;
+ }
+ if (key_fixed)
+ key = key_fixed;
+
+ if (glusterd_is_quorum_changed (volinfo->dict, key, NULL))
+ quorum_action = _gf_true;
+ ret = glusterd_check_ganesha_export (volinfo);
+ if (ret) {
+ ret = ganesha_manage_export (volname, "off", op_rspstr, _gf_false);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_WARNING, 0,
+ GD_MSG_NFS_GNS_RESET_FAIL,
+ "Could not reset ganesha.enable key");
+ ret = 0;
+ }
+ }
+
+ ret = glusterd_options_reset (volinfo, key, &is_force);
+ if (ret == -1) {
+ gf_asprintf(op_rspstr, "Volume reset : failed");
+ } else if (is_force & GD_OP_PROTECTED) {
+ if (is_force & GD_OP_UNPROTECTED) {
+ gf_asprintf (op_rspstr, "All unprotected fields were"
+ " reset. To reset the protected fields,"
+ " use 'force'.");
+ } else {
+ ret = -1;
+ gf_asprintf (op_rspstr, "'%s' is protected. To reset"
+ " use 'force'.", key);
+ }
+ }
+
+out:
+ GF_FREE (key_fixed);
+ if (quorum_action)
+ glusterd_do_quorum_action ();
+
+ gf_msg_debug (this->name, 0, "'volume reset' returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_stop_bricks (glusterd_volinfo_t *volinfo)
+{
+ glusterd_brickinfo_t *brickinfo = NULL;
+
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ /*TODO: Need to change @del_brick in brick_stop to _gf_true
+ * once we enable synctask in peer rpc prog */
+ if (glusterd_brick_stop (volinfo, brickinfo, _gf_false))
+ return -1;
+ }
+
+ return 0;
+}
+
+int
+glusterd_start_bricks (glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ glusterd_brickinfo_t *brickinfo = NULL;
+
+ GF_ASSERT (volinfo);
+
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ ret = glusterd_brick_start (volinfo, brickinfo, _gf_false);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_DISCONNECTED,
+ "Failed to start %s:%s for %s",
+ brickinfo->hostname, brickinfo->path,
+ volinfo->volname);
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+glusterd_op_set_all_volume_options (xlator_t *this, dict_t *dict,
+ char **op_errstr)
+{
+ char *key = NULL;
+ char *key_fixed = NULL;
+ char *value = NULL;
+ char *dup_value = NULL;
+ int ret = -1;
+ glusterd_conf_t *conf = NULL;
+ dict_t *dup_opt = NULL;
+ char *next_version = NULL;
+ gf_boolean_t quorum_action = _gf_false;
+ uint32_t op_version = 0;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ conf = this->private;
+ ret = dict_get_str (dict, "key1", &key);
+ if (ret)
+ goto out;
+
+ ret = dict_get_str (dict, "value1", &value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "invalid key,value pair in 'volume set'");
+ goto out;
+ }
+
+ ret = glusterd_check_option_exists (key, &key_fixed);
+ if (ret <= 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_UNKNOWN_KEY, "Invalid key %s", key);
+ ret = -1;
+ goto out;
+ }
+
+ if (key_fixed)
+ key = key_fixed;
+
+ ret = glusterd_set_shared_storage (dict, key, value, op_errstr);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SHARED_STRG_SET_FAIL,
+ "Failed to set shared storage option");
+ goto out;
+ }
+
+ /* If the key is cluster.op-version, set conf->op_version to the value
+ * if needed and save it.
+ */
+ if (strcmp(key, "cluster.op-version") == 0) {
+ ret = 0;
+
+ ret = gf_string2uint (value, &op_version);
+ if (ret)
+ goto out;
+
+ if (op_version >= conf->op_version) {
+ conf->op_version = op_version;
+ ret = glusterd_store_global_info (this);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_OP_VERS_STORE_FAIL,
+ "Failed to store op-version.");
+ }
+ }
+ cds_list_for_each_entry (volinfo, &conf->volumes, vol_list) {
+ ret = glusterd_store_volinfo
+ (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret)
+ goto out;
+ }
+ /* No need to save cluster.op-version in conf->opts
+ */
+ goto out;
+ }
+ ret = -1;
+ dup_opt = dict_new ();
+ if (!dup_opt)
+ goto out;
+ dict_copy (conf->opts, dup_opt);
+ ret = dict_set_str (dup_opt, key, value);
+ if (ret)
+ goto out;
+
+ ret = glusterd_get_next_global_opt_version_str (conf->opts,
+ &next_version);
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (dup_opt, GLUSTERD_GLOBAL_OPT_VERSION, next_version);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_options (this, dup_opt);
+ if (ret)
+ goto out;
+
+ if (glusterd_is_quorum_changed (conf->opts, key, value))
+ quorum_action = _gf_true;
+
+ ret = dict_set_dynstr (conf->opts, GLUSTERD_GLOBAL_OPT_VERSION,
+ next_version);
+ if (ret)
+ goto out;
+ else
+ next_version = NULL;
+
+ dup_value = gf_strdup (value);
+ if (!dup_value)
+ goto out;
+
+ ret = dict_set_dynstr (conf->opts, key, dup_value);
+ if (ret)
+ goto out;
+ else
+ dup_value = NULL; /* Protect the allocation from GF_FREE */
+
+out:
+ GF_FREE (dup_value);
+ GF_FREE (key_fixed);
+ if (dup_opt)
+ dict_unref (dup_opt);
+
+ gf_msg_debug (this->name, 0, "returning %d", ret);
+ if (quorum_action)
+ glusterd_do_quorum_action ();
+ GF_FREE (next_version);
+ return ret;
+}
+
+static int
+glusterd_set_shared_storage (dict_t *dict, char *key, char *value,
+ char **op_errstr)
+{
+ int32_t ret = -1;
+ int32_t exists = -1;
+ int32_t count = -1;
+ char hooks_args[PATH_MAX] = {0, };
+ char errstr[PATH_MAX] = {0, };
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("glusterd", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, dict, out);
+ GF_VALIDATE_OR_GOTO (this->name, key, out);
+ GF_VALIDATE_OR_GOTO (this->name, value, out);
+ GF_VALIDATE_OR_GOTO (this->name, op_errstr, out);
+
+ ret = 0;
+
+ if (strcmp (key, GLUSTERD_SHARED_STORAGE_KEY)) {
+ goto out;
+ }
+
+ /* Re-create the brick path so as to be *
+ * able to re-use it *
+ */
+ ret = recursive_rmdir (GLUSTER_SHARED_STORAGE_BRICK_DIR);
+ if (ret) {
+ snprintf (errstr, PATH_MAX,
+ "Failed to remove shared "
+ "storage brick(%s). "
+ "Reason: %s", GLUSTER_SHARED_STORAGE_BRICK_DIR,
+ strerror (errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DIR_OP_FAILED, "%s", errstr);
+ ret = -1;
+ goto out;
+ }
+
+ ret = mkdir_p (GLUSTER_SHARED_STORAGE_BRICK_DIR, 0777, _gf_true);
+ if (-1 == ret) {
+ snprintf (errstr, PATH_MAX,
+ "Failed to create shared "
+ "storage brick(%s). "
+ "Reason: %s", GLUSTER_SHARED_STORAGE_BRICK_DIR,
+ strerror (errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_CREATE_DIR_FAILED, "%s", errstr);
+ goto out;
+ }
+
+ if (is_origin_glusterd (dict)) {
+ snprintf(hooks_args, sizeof(hooks_args),
+ "is_originator=1,local_node_hostname=%s",
+ local_node_hostname);
+ } else {
+ snprintf(hooks_args, sizeof(hooks_args),
+ "is_originator=0,local_node_hostname=%s",
+ local_node_hostname);
+ }
+
+ ret = dict_set_dynstr_with_alloc (dict, "hooks_args", hooks_args);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_SET_FAILED, "Failed to set"
+ " hooks_args in dict.");
+ goto out;
+ }
+
+out:
+ if (ret && strlen(errstr)) {
+ *op_errstr = gf_strdup (errstr);
+ }
+
+ return ret;
+}
+
+
+static int
+glusterd_op_set_volume (dict_t *dict, char **errstr)
+{
+ int ret = 0;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *volname = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ int count = 1;
+ char *key = NULL;
+ char *key_fixed = NULL;
+ char *value = NULL;
+ char str[50] = {0, };
+ char *op_errstr = NULL;
+ gf_boolean_t global_opt = _gf_false;
+ gf_boolean_t global_opts_set = _gf_false;
+ glusterd_volinfo_t *voliter = NULL;
+ int32_t dict_count = 0;
+ gf_boolean_t check_op_version = _gf_false;
+ uint32_t new_op_version = 0;
+ gf_boolean_t quorum_action = _gf_false;
+ glusterd_svc_t *svc = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_int32 (dict, "count", &dict_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Count(dict),not set in Volume-Set");
+ goto out;
+ }
+
+ if (dict_count == 0) {
+ ret = glusterd_volset_help (NULL, &op_errstr);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_SET_FAIL, "%s",
+ (op_errstr)? op_errstr:
+ "Volume set help internal error");
+ }
+
+ GF_FREE(op_errstr);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get volume name");
+ goto out;
+ }
+
+ if (strcasecmp (volname, "all") == 0) {
+ ret = glusterd_op_set_all_volume_options (this, dict,
+ &op_errstr);
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_FOUND, FMTSTR_CHECK_VOL_EXISTS,
+ volname);
+ goto out;
+ }
+
+ /* TODO: Remove this once v3.3 compatibility is not required */
+ check_op_version = dict_get_str_boolean (dict, "check-op-version",
+ _gf_false);
+
+ if (check_op_version) {
+ ret = dict_get_uint32 (dict, "new-op-version", &new_op_version);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to get new op-version from dict");
+ goto out;
+ }
+ }
+
+ for (count = 1; ret != -1 ; count++) {
+
+ snprintf (str, sizeof str, "key%d", count);
+ ret = dict_get_str (dict, str, &key);
+ if (ret)
+ break;
+
+ snprintf (str, sizeof str, "value%d", count);
+ ret = dict_get_str (dict, str, &value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "invalid key,value pair in 'volume set'");
+ ret = -1;
+ goto out;
+ }
+
+ if (strcmp (key, "config.memory-accounting") == 0) {
+ ret = gf_string2boolean (value,
+ &volinfo->memory_accounting);
+ }
+
+ if (strcmp (key, "config.transport") == 0) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_VOL_TRANSPORT_TYPE_CHANGE,
+ "changing transport-type for volume %s to %s",
+ volname, value);
+ ret = 0;
+ if (strcasecmp (value, "rdma") == 0) {
+ volinfo->transport_type = GF_TRANSPORT_RDMA;
+ } else if (strcasecmp (value, "tcp") == 0) {
+ volinfo->transport_type = GF_TRANSPORT_TCP;
+ } else if ((strcasecmp (value, "tcp,rdma") == 0) ||
+ (strcasecmp (value, "rdma,tcp") == 0)) {
+ volinfo->transport_type =
+ GF_TRANSPORT_BOTH_TCP_RDMA;
+ } else {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = glusterd_check_ganesha_cmd (key, value, errstr, dict);
+ if (ret == -1)
+ goto out;
+ if (!is_key_glusterd_hooks_friendly (key)) {
+ ret = glusterd_check_option_exists (key, &key_fixed);
+ GF_ASSERT (ret);
+ if (ret <= 0) {
+ key_fixed = NULL;
+ goto out;
+ }
+ }
+
+ global_opt = _gf_false;
+ if (glusterd_check_globaloption (key)) {
+ global_opt = _gf_true;
+ global_opts_set = _gf_true;
+ }
+
+ if (!global_opt)
+ value = gf_strdup (value);
+
+ if (!value) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_SET_FAIL,
+ "Unable to set the options in 'volume set'");
+ ret = -1;
+ goto out;
+ }
+
+ if (key_fixed)
+ key = key_fixed;
+
+ if (glusterd_is_quorum_changed (volinfo->dict, key, value))
+ quorum_action = _gf_true;
+
+ if (global_opt) {
+ cds_list_for_each_entry (voliter, &priv->volumes,
+ vol_list) {
+ value = gf_strdup (value);
+ ret = dict_set_dynstr (voliter->dict, key,
+ value);
+ if (ret)
+ goto out;
+ }
+ } else {
+ ret = dict_set_dynstr (volinfo->dict, key, value);
+ if (ret)
+ goto out;
+ }
+
+ if (key_fixed) {
+ GF_FREE (key_fixed);
+ key_fixed = NULL;
+ }
+ }
+
+ if (count == 1) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_NO_OPTIONS_GIVEN, "No options received ");
+ ret = -1;
+ goto out;
+ }
+
+ /* Update the cluster op-version before regenerating volfiles so that
+ * correct volfiles are generated
+ */
+ if (new_op_version > priv->op_version) {
+ priv->op_version = new_op_version;
+ ret = glusterd_store_global_info (this);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_OP_VERS_STORE_FAIL,
+ "Failed to store op-version");
+ goto out;
+ }
+ }
+ if (!global_opts_set) {
+ gd_update_volume_op_versions (volinfo);
+
+ if (!volinfo->is_snap_volume) {
+ svc = &(volinfo->snapd.svc);
+ ret = svc->manager (svc, volinfo, PROC_START_NO_WAIT);
+ if (ret)
+ goto out;
+ }
+ ret = glusterd_create_volfiles_and_notify_services (volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLFILE_CREATE_FAIL,
+ "Unable to create volfile for"
+ " 'volume set'");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret)
+ goto out;
+
+ if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+ ret = glusterd_svcs_reconfigure ();
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SVC_RESTART_FAIL,
+ "Unable to restart services");
+ goto out;
+ }
+ }
+
+ } else {
+ cds_list_for_each_entry (voliter, &priv->volumes, vol_list) {
+ volinfo = voliter;
+ gd_update_volume_op_versions (volinfo);
+
+ if (!volinfo->is_snap_volume) {
+ svc = &(volinfo->snapd.svc);
+ ret = svc->manager (svc, volinfo,
+ PROC_START_NO_WAIT);
+ if (ret)
+ goto out;
+ }
+
+ ret = glusterd_create_volfiles_and_notify_services (volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLFILE_CREATE_FAIL,
+ "Unable to create volfile for"
+ " 'volume set'");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_store_volinfo (volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret)
+ goto out;
+
+ if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+ ret = glusterd_svcs_reconfigure ();
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_NFS_SERVER_START_FAIL,
+ "Unable to restart NFS-Server");
+ goto out;
+ }
+ }
+ }
+ }
+
+ out:
+ GF_FREE (key_fixed);
+ gf_msg_debug (this->name, 0, "returning %d", ret);
+ if (quorum_action)
+ glusterd_do_quorum_action ();
+ return ret;
+}
+
+
+static int
+glusterd_op_sync_volume (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int ret = -1;
+ char *volname = NULL;
+ char *hostname = NULL;
+ char msg[2048] = {0,};
+ int count = 1;
+ int vol_count = 0;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_str (dict, "hostname", &hostname);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "hostname couldn't be "
+ "retrieved from msg");
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ if (!gf_is_local_addr (hostname)) {
+ ret = 0;
+ goto out;
+ }
+
+ //volname is not present in case of sync all
+ ret = dict_get_str (dict, "volname", &volname);
+ if (!ret) {
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_FOUND, "Volume with name: %s "
+ "not exists", volname);
+ goto out;
+ }
+ }
+
+ if (!rsp_dict) {
+ //this should happen only on source
+ ret = 0;
+ goto out;
+ }
+
+ if (volname) {
+ ret = glusterd_add_volume_to_dict (volinfo, rsp_dict,
+ 1, "volume");
+ vol_count = 1;
+ } else {
+ cds_list_for_each_entry (volinfo, &priv->volumes, vol_list) {
+ ret = glusterd_add_volume_to_dict (volinfo, rsp_dict,
+ count, "volume");
+ if (ret)
+ goto out;
+
+ vol_count = count++;
+ }
+ }
+ ret = dict_set_int32 (rsp_dict, "count", vol_count);
+
+out:
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_add_profile_volume_options (glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ char *latency_key = NULL;
+ char *fd_stats_key = NULL;
+
+ GF_ASSERT (volinfo);
+
+ latency_key = VKEY_DIAG_LAT_MEASUREMENT;
+ fd_stats_key = VKEY_DIAG_CNT_FOP_HITS;
+
+ ret = dict_set_str (volinfo->dict, latency_key, "on");
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "failed to set the volume %s "
+ "option %s value %s",
+ volinfo->volname, latency_key, "on");
+ goto out;
+ }
+
+ ret = dict_set_str (volinfo->dict, fd_stats_key, "on");
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "failed to set the volume %s "
+ "option %s value %s",
+ volinfo->volname, fd_stats_key, "on");
+ goto out;
+ }
+out:
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+static void
+glusterd_remove_profile_volume_options (glusterd_volinfo_t *volinfo)
+{
+ char *latency_key = NULL;
+ char *fd_stats_key = NULL;
+
+ GF_ASSERT (volinfo);
+
+ latency_key = VKEY_DIAG_LAT_MEASUREMENT;
+ fd_stats_key = VKEY_DIAG_CNT_FOP_HITS;
+ dict_del (volinfo->dict, latency_key);
+ dict_del (volinfo->dict, fd_stats_key);
+}
+
+static int
+glusterd_op_stats_volume (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int ret = -1;
+ char *volname = NULL;
+ char msg[2048] = {0,};
+ glusterd_volinfo_t *volinfo = NULL;
+ int32_t stats_op = GF_CLI_STATS_NONE;
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "volume name get failed");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Volume %s does not exists",
+ volname);
+
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_FOUND, "%s", msg);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "op", &stats_op);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "volume profile op get failed");
+ goto out;
+ }
+
+ switch (stats_op) {
+ case GF_CLI_STATS_START:
+ ret = glusterd_add_profile_volume_options (volinfo);
+ if (ret)
+ goto out;
+ break;
+ case GF_CLI_STATS_STOP:
+ glusterd_remove_profile_volume_options (volinfo);
+ break;
+ case GF_CLI_STATS_INFO:
+ case GF_CLI_STATS_TOP:
+ //info is already collected in brick op.
+ //just goto out;
+ ret = 0;
+ goto out;
+ break;
+ default:
+ GF_ASSERT (0);
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_INVALID_ENTRY, "Invalid profile op: %d",
+ stats_op);
+ ret = -1;
+ goto out;
+ break;
+ }
+ ret = glusterd_create_volfiles_and_notify_services (volinfo);
+
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_VOLFILE_CREATE_FAIL,
+ "Unable to create volfile for"
+ " 'volume set'");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_store_volinfo (volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret)
+ goto out;
+
+ if (GLUSTERD_STATUS_STARTED == volinfo->status)
+ ret = glusterd_svcs_reconfigure ();
+
+ ret = 0;
+
+out:
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+_add_remove_bricks_to_dict (dict_t *dict, glusterd_volinfo_t *volinfo,
+ char *prefix)
+{
+ int ret = -1;
+ int count = 0;
+ int i = 0;
+ char brick_key[1024] = {0,};
+ char dict_key[1024] ={0,};
+ char *brick = NULL;
+ xlator_t *this = NULL;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (volinfo);
+ GF_ASSERT (prefix);
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = dict_get_int32 (volinfo->rebal.dict, "count", &count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get brick count");
+ goto out;
+ }
+
+ snprintf (dict_key, sizeof (dict_key), "%s.count", prefix);
+ ret = dict_set_int32 (dict, dict_key, count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set brick count in dict");
+ goto out;
+ }
+
+ for (i = 1; i <= count; i++) {
+ memset (brick_key, 0, sizeof (brick_key));
+ snprintf (brick_key, sizeof (brick_key), "brick%d", i);
+
+ ret = dict_get_str (volinfo->rebal.dict, brick_key, &brick);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to get %s", brick_key);
+ goto out;
+ }
+
+ memset (dict_key, 0, sizeof (dict_key));
+ snprintf (dict_key, sizeof (dict_key), "%s.%s", prefix,
+ brick_key);
+ ret = dict_set_str (dict, dict_key, brick);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to add brick to dict");
+ goto out;
+ }
+ brick = NULL;
+ }
+
+out:
+ return ret;
+}
+
+/* This adds the respective task-id and all available parameters of a task into
+ * a dictionary
+ */
+static int
+_add_task_to_dict (dict_t *dict, glusterd_volinfo_t *volinfo, int op, int index)
+{
+
+ int ret = -1;
+ char key[128] = {0,};
+ char *uuid_str = NULL;
+ int status = 0;
+ xlator_t *this = NULL;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (volinfo);
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ switch (op) {
+ case GD_OP_DETACH_TIER:
+ case GD_OP_REMOVE_BRICK:
+ snprintf (key, sizeof (key), "task%d", index);
+ ret = _add_remove_bricks_to_dict (dict, volinfo, key);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_ADD_REMOVE_BRICK_FAIL,
+ "Failed to add remove bricks to dict");
+ goto out;
+ }
+ case GD_OP_TIER_MIGRATE:
+ case GD_OP_REBALANCE:
+ uuid_str = gf_strdup (uuid_utoa (volinfo->rebal.rebalance_id));
+ status = volinfo->rebal.defrag_status;
+ break;
+
+ default:
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_NO_TASK_ID, "%s operation doesn't have a"
+ " task_id", gd_op_list[op]);
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "task%d.type", index);
+ ret = dict_set_str (dict, key, (char *)gd_op_list[op]);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Error setting task type in dict");
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "task%d.id", index);
+
+ if (!uuid_str)
+ goto out;
+ ret = dict_set_dynstr (dict, key, uuid_str);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Error setting task id in dict");
+ goto out;
+ }
+ uuid_str = NULL;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "task%d.status", index);
+ ret = dict_set_int32 (dict, key, status);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Error setting task status in dict");
+ goto out;
+ }
+
+out:
+ if (uuid_str)
+ GF_FREE (uuid_str);
+ return ret;
+}
+
+static int
+glusterd_aggregate_task_status (dict_t *rsp_dict, glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ int tasks = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ if (!gf_uuid_is_null (volinfo->rebal.rebalance_id)) {
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
+ if (volinfo->rebal.op == GD_OP_REMOVE_BRICK)
+ ret = _add_task_to_dict (rsp_dict, volinfo,
+ GD_OP_DETACH_TIER,
+ tasks);
+ else if (volinfo->rebal.op == GD_OP_REBALANCE)
+ ret = _add_task_to_dict (rsp_dict, volinfo,
+ GD_OP_TIER_MIGRATE,
+ tasks);
+ } else
+ ret = _add_task_to_dict (rsp_dict, volinfo,
+ volinfo->rebal.op, tasks);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to add task details to dict");
+ goto out;
+ }
+ tasks++;
+ }
+
+ ret = dict_set_int32 (rsp_dict, "tasks", tasks);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Error setting tasks count in dict");
+ goto out;
+ }
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static int
+glusterd_op_status_volume (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int ret = -1;
+ int node_count = 0;
+ int brick_index = -1;
+ int other_count = 0;
+ int hot_brick_count = -1;
+ int other_index = 0;
+ uint32_t cmd = 0;
+ char *volname = NULL;
+ char *brick = NULL;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ dict_t *vol_opts = NULL;
+ gf_boolean_t nfs_disabled = _gf_false;
+ gf_boolean_t shd_enabled = _gf_false;
+ gf_boolean_t origin_glusterd = _gf_false;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+
+ GF_ASSERT (priv);
+
+ GF_ASSERT (dict);
+
+ origin_glusterd = is_origin_glusterd (dict);
+
+ ret = dict_get_uint32 (dict, "cmd", &cmd);
+ if (ret)
+ goto out;
+
+ if (origin_glusterd) {
+ ret = 0;
+ if ((cmd & GF_CLI_STATUS_ALL)) {
+ ret = glusterd_get_all_volnames (rsp_dict);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLNAMES_GET_FAIL,
+ "failed to get all volume "
+ "names for status");
+ }
+ }
+
+ ret = dict_set_uint32 (rsp_dict, "cmd", cmd);
+ if (ret)
+ goto out;
+
+ if (cmd & GF_CLI_STATUS_ALL)
+ goto out;
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_FOUND, "Volume with name: %s "
+ "does not exist", volname);
+ goto out;
+ }
+ vol_opts = volinfo->dict;
+
+ if ((cmd & GF_CLI_STATUS_NFS) != 0) {
+ ret = glusterd_add_node_to_dict (priv->nfs_svc.name, rsp_dict,
+ 0, vol_opts);
+ if (ret)
+ goto out;
+ other_count++;
+ node_count++;
+
+ } else if ((cmd & GF_CLI_STATUS_SHD) != 0) {
+ ret = glusterd_add_node_to_dict (priv->shd_svc.name, rsp_dict,
+ 0, vol_opts);
+ if (ret)
+ goto out;
+ other_count++;
+ node_count++;
+
+ } else if ((cmd & GF_CLI_STATUS_QUOTAD) != 0) {
+ ret = glusterd_add_node_to_dict (priv->quotad_svc.name,
+ rsp_dict, 0, vol_opts);
+ if (ret)
+ goto out;
+ other_count++;
+ node_count++;
+ } else if ((cmd & GF_CLI_STATUS_BITD) != 0) {
+ ret = glusterd_add_node_to_dict (priv->bitd_svc.name,
+ rsp_dict, 0, vol_opts);
+ if (ret)
+ goto out;
+ other_count++;
+ node_count++;
+ } else if ((cmd & GF_CLI_STATUS_SCRUB) != 0) {
+ ret = glusterd_add_node_to_dict (priv->scrub_svc.name,
+ rsp_dict, 0, vol_opts);
+ if (ret)
+ goto out;
+ other_count++;
+ node_count++;
+ } else if ((cmd & GF_CLI_STATUS_SNAPD) != 0) {
+ ret = glusterd_add_snapd_to_dict (volinfo, rsp_dict,
+ other_index);
+ if (ret)
+ goto out;
+ other_count++;
+ node_count++;
+ } else if ((cmd & GF_CLI_STATUS_BRICK) != 0) {
+ ret = dict_get_str (dict, "brick", &brick);
+ if (ret)
+ goto out;
+
+ ret = glusterd_volume_brickinfo_get_by_brick (brick,
+ volinfo,
+ &brickinfo,
+ _gf_false);
+ if (ret)
+ goto out;
+
+ if (gf_uuid_compare (brickinfo->uuid, MY_UUID))
+ goto out;
+
+ glusterd_add_brick_to_dict (volinfo, brickinfo, rsp_dict,
+ ++brick_index);
+ if (cmd & GF_CLI_STATUS_DETAIL)
+ glusterd_add_brick_detail_to_dict (volinfo, brickinfo,
+ rsp_dict,
+ brick_index);
+ node_count++;
+
+ } else if ((cmd & GF_CLI_STATUS_TASKS) != 0) {
+ ret = glusterd_aggregate_task_status (rsp_dict, volinfo);
+ goto out;
+
+ } else {
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks,
+ brick_list) {
+ brick_index++;
+ if (gf_uuid_compare (brickinfo->uuid, MY_UUID))
+ continue;
+
+ glusterd_add_brick_to_dict (volinfo, brickinfo,
+ rsp_dict, brick_index);
+
+ if (cmd & GF_CLI_STATUS_DETAIL) {
+ glusterd_add_brick_detail_to_dict (volinfo,
+ brickinfo,
+ rsp_dict,
+ brick_index);
+ }
+ node_count++;
+ }
+
+ if ((cmd & GF_CLI_STATUS_MASK) == GF_CLI_STATUS_NONE) {
+ other_index = brick_index + 1;
+ if (glusterd_is_snapd_enabled (volinfo)) {
+ ret = glusterd_add_snapd_to_dict (volinfo,
+ rsp_dict,
+ other_index);
+ if (ret)
+ goto out;
+ other_count++;
+ other_index++;
+ node_count++;
+ }
+
+ nfs_disabled = dict_get_str_boolean (vol_opts,
+ NFS_DISABLE_MAP_KEY,
+ _gf_false);
+ if (!nfs_disabled) {
+ ret = glusterd_add_node_to_dict
+ (priv->nfs_svc.name,
+ rsp_dict,
+ other_index,
+ vol_opts);
+ if (ret)
+ goto out;
+ other_index++;
+ other_count++;
+ node_count++;
+ }
+
+ if (glusterd_is_shd_compatible_volume (volinfo))
+ shd_enabled = gd_is_self_heal_enabled
+ (volinfo, vol_opts);
+ if (shd_enabled) {
+ ret = glusterd_add_node_to_dict
+ (priv->shd_svc.name, rsp_dict,
+ other_index, vol_opts);
+ if (ret)
+ goto out;
+ other_count++;
+ node_count++;
+ other_index++;
+ }
+
+ if (glusterd_is_volume_quota_enabled (volinfo)) {
+ ret = glusterd_add_node_to_dict
+ (priv->quotad_svc.name,
+ rsp_dict,
+ other_index,
+ vol_opts);
+ if (ret)
+ goto out;
+ other_count++;
+ node_count++;
+ other_index++;
+ }
+
+ if (glusterd_is_bitrot_enabled (volinfo)) {
+ ret = glusterd_add_node_to_dict
+ (priv->bitd_svc.name,
+ rsp_dict,
+ other_index,
+ vol_opts);
+ if (ret)
+ goto out;
+ other_count++;
+ node_count++;
+ other_index++;
+ }
+
+ /* For handling scrub status. Scrub daemon will be
+ * running automatically when bitrot is enable*/
+ if (glusterd_is_bitrot_enabled (volinfo)) {
+ ret = glusterd_add_node_to_dict
+ (priv->scrub_svc.name,
+ rsp_dict,
+ other_index,
+ vol_opts);
+ if (ret)
+ goto out;
+ other_count++;
+ node_count++;
+ }
+ }
+ }
+
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER)
+ hot_brick_count = volinfo->tier_info.hot_brick_count;
+ ret = dict_set_int32 (rsp_dict, "hot_brick_count", hot_brick_count);
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32 (rsp_dict, "type", volinfo->type);
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32 (rsp_dict, "brick-index-max", brick_index);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Error setting brick-index-max to dict");
+ goto out;
+ }
+ ret = dict_set_int32 (rsp_dict, "other-count", other_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Error setting other-count to dict");
+ goto out;
+ }
+ ret = dict_set_int32 (rsp_dict, "count", node_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Error setting node count to dict");
+ goto out;
+ }
+
+ /* Active tasks */
+ /* Tasks are added only for normal volume status request for either a
+ * single volume or all volumes
+ */
+ if (!glusterd_status_has_tasks (cmd))
+ goto out;
+
+ ret = glusterd_aggregate_task_status (rsp_dict, volinfo);
+ if (ret)
+ goto out;
+ ret = 0;
+
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_op_ac_none (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+
+ gf_msg_debug (THIS->name, 0, "Returning with %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_op_sm_locking_failed (uuid_t *txn_id)
+{
+ int ret = -1;
+
+ opinfo.op_ret = -1;
+ opinfo.op_errstr = gf_strdup ("locking failed for one of the peer.");
+
+ ret = glusterd_set_txn_opinfo (txn_id, &opinfo);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
+ /* Inject a reject event such that unlocking gets triggered right away*/
+ ret = glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, txn_id, NULL);
+
+ return ret;
+}
+
+static int
+glusterd_op_ac_send_lock (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+ rpc_clnt_procedure_t *proc = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ uint32_t pending_count = 0;
+ dict_t *dict = NULL;
+
+ this = THIS;
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ rcu_read_lock ();
+ cds_list_for_each_entry_rcu (peerinfo, &priv->peers, uuid_list) {
+ /* Only send requests to peers who were available before the
+ * transaction started
+ */
+ if (peerinfo->generation > opinfo.txn_generation)
+ continue;
+
+ if (!peerinfo->connected || !peerinfo->mgmt)
+ continue;
+ if ((peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) &&
+ (glusterd_op_get_op() != GD_OP_SYNC_VOLUME))
+ continue;
+
+ /* Based on the op_version, acquire a cluster or mgmt_v3 lock */
+ if (priv->op_version < GD_OP_VERSION_3_6_0) {
+ proc = &peerinfo->mgmt->proctable
+ [GLUSTERD_MGMT_CLUSTER_LOCK];
+ if (proc->fn) {
+ ret = proc->fn (NULL, this, peerinfo);
+ if (ret) {
+ rcu_read_unlock ();
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_LOCK_REQ_SEND_FAIL,
+ "Failed to send lock request "
+ "for operation 'Volume %s' to "
+ "peer %s",
+ gd_op_list[opinfo.op],
+ peerinfo->hostname);
+ goto out;
+ }
+ /* Mark the peer as locked*/
+ peerinfo->locked = _gf_true;
+ pending_count++;
+ }
+ } else {
+ dict = glusterd_op_get_ctx ();
+ dict_ref (dict);
+
+ proc = &peerinfo->mgmt_v3->proctable
+ [GLUSTERD_MGMT_V3_LOCK];
+ if (proc->fn) {
+ ret = dict_set_static_ptr (dict, "peerinfo",
+ peerinfo);
+ if (ret) {
+ rcu_read_unlock ();
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "failed to set peerinfo");
+ dict_unref (dict);
+ goto out;
+ }
+
+ ret = proc->fn (NULL, this, dict);
+ if (ret) {
+ rcu_read_unlock ();
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_MGMTV3_LOCK_REQ_SEND_FAIL,
+ "Failed to send mgmt_v3 lock "
+ "request for operation "
+ "'Volume %s' to peer %s",
+ gd_op_list[opinfo.op],
+ peerinfo->hostname);
+ dict_unref (dict);
+ goto out;
+ }
+ /* Mark the peer as locked*/
+ peerinfo->locked = _gf_true;
+ pending_count++;
+ }
+ }
+ }
+ rcu_read_unlock ();
+
+ opinfo.pending_count = pending_count;
+
+ ret = glusterd_set_txn_opinfo (&event->txn_id, &opinfo);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
+
+
+ if (!opinfo.pending_count)
+ ret = glusterd_op_sm_inject_all_acc (&event->txn_id);
+
+out:
+ if (ret)
+ ret = glusterd_op_sm_locking_failed (&event->txn_id);
+
+ gf_msg_debug (this->name, 0, "Returning with %d", ret);
+ return ret;
+}
+
+static int
+glusterd_op_ac_send_unlock (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+ rpc_clnt_procedure_t *proc = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ uint32_t pending_count = 0;
+ dict_t *dict = NULL;
+
+ this = THIS;
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ rcu_read_lock ();
+ cds_list_for_each_entry_rcu (peerinfo, &priv->peers, uuid_list) {
+ /* Only send requests to peers who were available before the
+ * transaction started
+ */
+ if (peerinfo->generation > opinfo.txn_generation)
+ continue;
+
+ if (!peerinfo->connected || !peerinfo->mgmt ||
+ !peerinfo->locked)
+ continue;
+ if ((peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) &&
+ (glusterd_op_get_op() != GD_OP_SYNC_VOLUME))
+ continue;
+ /* Based on the op_version,
+ * release the cluster or mgmt_v3 lock */
+ if (priv->op_version < GD_OP_VERSION_3_6_0) {
+ proc = &peerinfo->mgmt->proctable
+ [GLUSTERD_MGMT_CLUSTER_UNLOCK];
+ if (proc->fn) {
+ ret = proc->fn (NULL, this, peerinfo);
+ if (ret) {
+ opinfo.op_errstr = gf_strdup
+ ("Unlocking failed for one of "
+ "the peer.");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_CLUSTER_UNLOCK_FAILED,
+ "Unlocking failed for operation"
+ " volume %s on peer %s",
+ gd_op_list[opinfo.op],
+ peerinfo->hostname);
+ continue;
+ }
+ pending_count++;
+ peerinfo->locked = _gf_false;
+ }
+ } else {
+ dict = glusterd_op_get_ctx ();
+ dict_ref (dict);
+
+ proc = &peerinfo->mgmt_v3->proctable
+ [GLUSTERD_MGMT_V3_UNLOCK];
+ if (proc->fn) {
+ ret = dict_set_static_ptr (dict, "peerinfo",
+ peerinfo);
+ if (ret) {
+ opinfo.op_errstr = gf_strdup
+ ("Unlocking failed for one of the "
+ "peer.");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_CLUSTER_UNLOCK_FAILED,
+ "Unlocking failed for operation"
+ " volume %s on peer %s",
+ gd_op_list[opinfo.op],
+ peerinfo->hostname);
+ dict_unref (dict);
+ continue;
+ }
+
+ ret = proc->fn (NULL, this, dict);
+ if (ret) {
+ opinfo.op_errstr = gf_strdup
+ ("Unlocking failed for one of the "
+ "peer.");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_CLUSTER_UNLOCK_FAILED,
+ "Unlocking failed for operation"
+ " volume %s on peer %s",
+ gd_op_list[opinfo.op],
+ peerinfo->hostname);
+ dict_unref (dict);
+ continue;
+ }
+ pending_count++;
+ peerinfo->locked = _gf_false;
+ }
+ }
+ }
+ rcu_read_unlock ();
+
+ opinfo.pending_count = pending_count;
+
+ ret = glusterd_set_txn_opinfo (&event->txn_id, &opinfo);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
+
+ if (!opinfo.pending_count)
+ ret = glusterd_op_sm_inject_all_acc (&event->txn_id);
+
+ gf_msg_debug (this->name, 0, "Returning with %d", ret);
+ return ret;
+}
+
+static int
+glusterd_op_ac_ack_drain (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+
+ if (opinfo.pending_count > 0)
+ opinfo.pending_count--;
+
+
+ ret = glusterd_set_txn_opinfo (&event->txn_id, &opinfo);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
+
+
+ if (!opinfo.pending_count)
+ ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK,
+ &event->txn_id, NULL);
+
+ gf_msg_debug (THIS->name, 0, "Returning with %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_op_ac_send_unlock_drain (glusterd_op_sm_event_t *event, void *ctx)
+{
+ return glusterd_op_ac_ack_drain (event, ctx);
+}
+
+static int
+glusterd_op_ac_lock (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int32_t ret = 0;
+ int32_t err = 0;
+ char *volname = NULL;
+ char *globalname = NULL;
+ glusterd_op_lock_ctx_t *lock_ctx = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ uint32_t op_errno = 0;
+
+ GF_ASSERT (event);
+ GF_ASSERT (ctx);
+
+ this = THIS;
+ priv = this->private;
+
+ lock_ctx = (glusterd_op_lock_ctx_t *)ctx;
+
+ /* If the req came from a node running on older op_version
+ * the dict won't be present. Based on it acquiring a cluster
+ * or mgmt_v3 lock */
+ if (lock_ctx->dict == NULL) {
+ ret = glusterd_lock (lock_ctx->uuid);
+ glusterd_op_lock_send_resp (lock_ctx->req, ret);
+ } else {
+ ret = dict_get_str (lock_ctx->dict, "volname", &volname);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to acquire volname");
+ else {
+ ret = glusterd_mgmt_v3_lock (volname, lock_ctx->uuid,
+ &op_errno, "vol");
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_LOCK_GET_FAIL,
+ "Unable to acquire lock for %s",
+ volname);
+ goto out;
+ }
+ ret = dict_get_str (lock_ctx->dict, "globalname", &globalname);
+ if (!ret) {
+ ret = glusterd_mgmt_v3_lock (globalname, lock_ctx->uuid,
+ &op_errno, "global");
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_LOCK_GET_FAIL,
+ "Unable to acquire lock for %s",
+ globalname);
+
+ }
+out:
+ glusterd_op_mgmt_v3_lock_send_resp (lock_ctx->req,
+ &event->txn_id, ret);
+
+ dict_unref (lock_ctx->dict);
+ }
+
+ gf_msg_debug (THIS->name, 0, "Lock Returned %d", ret);
+ return ret;
+}
+
+static int
+glusterd_op_ac_unlock (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int32_t ret = 0;
+ char *volname = NULL;
+ char *globalname = NULL;
+ glusterd_op_lock_ctx_t *lock_ctx = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+
+ GF_ASSERT (event);
+ GF_ASSERT (ctx);
+
+ this = THIS;
+ priv = this->private;
+
+ lock_ctx = (glusterd_op_lock_ctx_t *)ctx;
+
+ /* If the req came from a node running on older op_version
+ * the dict won't be present. Based on it releasing the cluster
+ * or mgmt_v3 lock */
+ if (lock_ctx->dict == NULL) {
+ ret = glusterd_unlock (lock_ctx->uuid);
+ glusterd_op_unlock_send_resp (lock_ctx->req, ret);
+ } else {
+ ret = dict_get_str (lock_ctx->dict, "volname", &volname);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to acquire volname");
+ else {
+ ret = glusterd_mgmt_v3_unlock (volname, lock_ctx->uuid,
+ "vol");
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_UNLOCK_FAIL,
+ "Unable to release lock for %s",
+ volname);
+ goto out;
+ }
+
+ ret = dict_get_str (lock_ctx->dict, "globalname", &globalname);
+ if (!ret) {
+ ret = glusterd_mgmt_v3_unlock (globalname, lock_ctx->uuid,
+ "global");
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_UNLOCK_FAIL,
+ "Unable to release lock for %s",
+ globalname);
+
+ }
+out:
+ glusterd_op_mgmt_v3_unlock_send_resp (lock_ctx->req,
+ &event->txn_id, ret);
+
+ dict_unref (lock_ctx->dict);
+ }
+
+ gf_msg_debug (this->name, 0, "Unlock Returned %d", ret);
+
+ if (priv->pending_quorum_action)
+ glusterd_do_quorum_action ();
+ return ret;
+}
+
+static int
+glusterd_op_ac_local_unlock (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+ uuid_t *originator = NULL;
+
+ GF_ASSERT (event);
+ GF_ASSERT (ctx);
+
+ originator = (uuid_t *) ctx;
+
+ ret = glusterd_unlock (*originator);
+
+ gf_msg_debug (THIS->name, 0, "Unlock Returned %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_op_ac_rcvd_lock_acc (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+
+ GF_ASSERT (event);
+
+ if (opinfo.pending_count > 0)
+ opinfo.pending_count--;
+
+ ret = glusterd_set_txn_opinfo (&event->txn_id, &opinfo);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
+
+
+ if (opinfo.pending_count > 0)
+ goto out;
+
+ ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC,
+ &event->txn_id, NULL);
+
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+
+out:
+ return ret;
+}
+
+int
+glusterd_dict_set_volid (dict_t *dict, char *volname, char **op_errstr)
+{
+ int ret = -1;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *volid = NULL;
+ char msg[1024] = {0,};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ if (!dict || !volname)
+ goto out;
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (msg, sizeof (msg), FMTSTR_CHECK_VOL_EXISTS, volname);
+ goto out;
+ }
+ volid = gf_strdup (uuid_utoa (volinfo->volume_id));
+ if (!volid) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_dynstr (dict, "vol-id", volid);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Failed to set volume id of volume"
+ " %s", volname);
+ goto out;
+ }
+out:
+ if (msg[0] != '\0') {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_ID_SET_FAIL, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ }
+ return ret;
+}
+
+int
+gd_set_commit_hash (dict_t *dict)
+{
+ struct timeval tv;
+ uint32_t hash;
+
+ /*
+ * We need a commit hash that won't conflict with others we might have
+ * set, or zero which is the implicit value if we never have. Using
+ * seconds<<3 like this ensures that we'll only get a collision if two
+ * consecutive rebalances are separated by exactly 2^29 seconds - about
+ * 17 years - and even then there's only a 1/8 chance of a collision in
+ * the low order bits. It's far more likely that this code will have
+ * changed completely by then. If not, call me in 2031.
+ *
+ * P.S. Time zone changes? Yeah, right.
+ */
+ gettimeofday (&tv, NULL);
+ hash = tv.tv_sec << 3;
+
+ /*
+ * Make sure at least one of those low-order bits is set. The extra
+ * shifting is because not all machines have sub-millisecond time
+ * resolution.
+ */
+ hash |= 1 << ((tv.tv_usec >> 10) % 3);
+
+ return dict_set_uint32 (dict, "commit-hash", hash);
+}
+
+int
+glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx)
+{
+ int ret = -1;
+ void *ctx = NULL;
+ dict_t *dict = NULL;
+ dict_t *req_dict = NULL;
+ glusterd_op_t op = GD_OP_NONE;
+ char *volname = NULL;
+ uint32_t status_cmd = GF_CLI_STATUS_NONE;
+ char *errstr = NULL;
+ xlator_t *this = NULL;
+ gf_boolean_t do_common = _gf_false;
+
+ GF_ASSERT (req);
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ req_dict = dict_new ();
+ if (!req_dict)
+ goto out;
+
+ if (!op_ctx) {
+ op = glusterd_op_get_op ();
+ ctx = (void*)glusterd_op_get_ctx ();
+ if (!ctx) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_NO_OPTIONS_GIVEN, "Null Context for "
+ "op %d", op);
+ ret = -1;
+ goto out;
+ }
+
+ } else {
+#define GD_SYNC_OPCODE_KEY "sync-mgmt-operation"
+ ret = dict_get_int32 (op_ctx, GD_SYNC_OPCODE_KEY, (int32_t*)&op);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to get volume"
+ " operation");
+ goto out;
+ }
+ ctx = op_ctx;
+#undef GD_SYNC_OPCODE_KEY
+ }
+
+ dict = ctx;
+ switch (op) {
+ case GD_OP_CREATE_VOLUME:
+ {
+ ++glusterfs_port;
+ ret = dict_set_int32 (dict, "port",
+ glusterfs_port);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set port in "
+ "dictionary");
+ goto out;
+ }
+ dict_copy (dict, req_dict);
+ }
+ break;
+
+ case GD_OP_GSYNC_CREATE:
+ case GD_OP_GSYNC_SET:
+ {
+ ret = glusterd_op_gsync_args_get (dict,
+ &errstr,
+ &volname,
+ NULL, NULL);
+ if (ret == 0) {
+ ret = glusterd_dict_set_volid
+ (dict, volname, op_errstr);
+ if (ret)
+ goto out;
+ }
+ dict_copy (dict, req_dict);
+ }
+ break;
+
+ case GD_OP_SET_VOLUME:
+ {
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "volname is not present in "
+ "operation ctx");
+ goto out;
+ }
+ if (strcmp (volname, "help") &&
+ strcmp (volname, "help-xml") &&
+ strcasecmp (volname, "all")) {
+ ret = glusterd_dict_set_volid
+ (dict, volname, op_errstr);
+ if (ret)
+ goto out;
+ }
+ dict_destroy (req_dict);
+ req_dict = dict_ref (dict);
+ }
+ break;
+
+ case GD_OP_REMOVE_BRICK:
+ {
+ dict_t *dict = ctx;
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "volname is not present in "
+ "operation ctx");
+ goto out;
+ }
+
+ ret = glusterd_dict_set_volid (dict, volname,
+ op_errstr);
+ if (ret)
+ goto out;
+
+ if (gd_set_commit_hash(dict) != 0) {
+ goto out;
+ }
+
+ dict_destroy (req_dict);
+ req_dict = dict_ref (dict);
+ }
+ break;
+
+ case GD_OP_STATUS_VOLUME:
+ {
+ ret = dict_get_uint32 (dict, "cmd",
+ &status_cmd);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Status command not present "
+ "in op ctx");
+ goto out;
+ }
+ if (GF_CLI_STATUS_ALL & status_cmd) {
+ dict_copy (dict, req_dict);
+ break;
+ }
+ do_common = _gf_true;
+ }
+ break;
+
+ case GD_OP_DELETE_VOLUME:
+ case GD_OP_START_VOLUME:
+ case GD_OP_STOP_VOLUME:
+ case GD_OP_ADD_BRICK:
+ case GD_OP_REPLACE_BRICK:
+ case GD_OP_RESET_VOLUME:
+ case GD_OP_LOG_ROTATE:
+ case GD_OP_QUOTA:
+ case GD_OP_PROFILE_VOLUME:
+ case GD_OP_HEAL_VOLUME:
+ case GD_OP_STATEDUMP_VOLUME:
+ case GD_OP_CLEARLOCKS_VOLUME:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ case GD_OP_BARRIER:
+ case GD_OP_BITROT:
+ case GD_OP_SCRUB_STATUS:
+ {
+ do_common = _gf_true;
+ }
+ break;
+
+ case GD_OP_REBALANCE:
+ {
+ if (gd_set_commit_hash(dict) != 0) {
+ goto out;
+ }
+ do_common = _gf_true;
+ }
+ break;
+
+ case GD_OP_SYNC_VOLUME:
+ case GD_OP_COPY_FILE:
+ case GD_OP_SYS_EXEC:
+ {
+ dict_copy (dict, req_dict);
+ }
+ break;
+
+ case GD_OP_GANESHA:
+ {
+ dict_copy (dict, req_dict);
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ /*
+ * This has been moved out of the switch so that multiple ops with
+ * other special needs can all "fall through" to it.
+ */
+ if (do_common) {
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_CRITICAL, -ret,
+ GD_MSG_DICT_GET_FAILED,
+ "volname is not present in "
+ "operation ctx");
+ goto out;
+ }
+
+ if (strcasecmp (volname, "all")) {
+ ret = glusterd_dict_set_volid (dict,
+ volname,
+ op_errstr);
+ if (ret)
+ goto out;
+ }
+ dict_copy (dict, req_dict);
+ }
+
+ *req = req_dict;
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static int
+glusterd_op_ac_send_stage_op (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+ int ret1 = 0;
+ rpc_clnt_procedure_t *proc = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
+ char *op_errstr = NULL;
+ glusterd_op_t op = GD_OP_NONE;
+ uint32_t pending_count = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ op = glusterd_op_get_op ();
+
+ rsp_dict = dict_new();
+ if (!rsp_dict) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ GD_MSG_DICT_CREATE_FAIL,
+ "Failed to create rsp_dict");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_op_build_payload (&dict, &op_errstr, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_OP_PAYLOAD_BUILD_FAIL,
+ LOGSTR_BUILD_PAYLOAD,
+ gd_op_list[op]);
+ if (op_errstr == NULL)
+ gf_asprintf (&op_errstr, OPERRSTR_BUILD_PAYLOAD);
+ opinfo.op_errstr = op_errstr;
+ goto out;
+ }
+
+ ret = glusterd_validate_quorum (this, op, dict, &op_errstr);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_SERVER_QUORUM_NOT_MET,
+ "Server quorum not met. Rejecting operation.");
+ opinfo.op_errstr = op_errstr;
+ goto out;
+ }
+
+ ret = glusterd_op_stage_validate (op, dict, &op_errstr, rsp_dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VALIDATE_FAILED, LOGSTR_STAGE_FAIL,
+ gd_op_list[op], "localhost",
+ (op_errstr) ? ":" : " ", (op_errstr) ? op_errstr : " ");
+ if (op_errstr == NULL)
+ gf_asprintf (&op_errstr, OPERRSTR_STAGE_FAIL,
+ "localhost");
+ opinfo.op_errstr = op_errstr;
+ goto out;
+ }
+
+ rcu_read_lock ();
+ cds_list_for_each_entry_rcu (peerinfo, &priv->peers, uuid_list) {
+ /* Only send requests to peers who were available before the
+ * transaction started
+ */
+ if (peerinfo->generation > opinfo.txn_generation)
+ continue;
+
+ if (!peerinfo->connected || !peerinfo->mgmt)
+ continue;
+ if ((peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) &&
+ (glusterd_op_get_op() != GD_OP_SYNC_VOLUME))
+ continue;
+
+ proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_STAGE_OP];
+ GF_ASSERT (proc);
+ if (proc->fn) {
+ ret = dict_set_static_ptr (dict, "peerinfo", peerinfo);
+ if (ret) {
+ rcu_read_unlock ();
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "failed to "
+ "set peerinfo");
+ goto out;
+ }
+
+ ret = proc->fn (NULL, this, dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_STAGE_REQ_SEND_FAIL, "Failed to "
+ "send stage request for operation "
+ "'Volume %s' to peer %s",
+ gd_op_list[op], peerinfo->hostname);
+ continue;
+ }
+ pending_count++;
+ }
+ }
+ rcu_read_unlock ();
+
+ opinfo.pending_count = pending_count;
+out:
+ if (ret)
+ opinfo.op_ret = ret;
+
+ ret1 = glusterd_set_txn_opinfo (&event->txn_id, &opinfo);
+ if (ret1)
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
+
+
+ if (rsp_dict)
+ dict_unref (rsp_dict);
+
+ if (dict)
+ dict_unref (dict);
+ if (ret) {
+ glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT,
+ &event->txn_id, NULL);
+ opinfo.op_ret = ret;
+ }
+
+ gf_msg_debug (this->name, 0, "Sent stage op request for "
+ "'Volume %s' to %d peers", gd_op_list[op],
+ opinfo.pending_count);
+
+ if (!opinfo.pending_count)
+ ret = glusterd_op_sm_inject_all_acc (&event->txn_id);
+
+ gf_msg_debug (this->name, 0, "Returning with %d", ret);
+
+ return ret;
+
+}
+
+/* This function takes a dict and converts the uuid values of key specified
+ * into hostnames
+ */
+static int
+glusterd_op_volume_dict_uuid_to_hostname (dict_t *dict, const char *key_fmt,
+ int idx_min, int idx_max)
+{
+ int ret = -1;
+ int i = 0;
+ char key[1024];
+ char *uuid_str = NULL;
+ uuid_t uuid = {0,};
+ char *hostname = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (dict);
+ GF_ASSERT (key_fmt);
+
+ for (i = idx_min; i < idx_max; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), key_fmt, i);
+ ret = dict_get_str (dict, key, &uuid_str);
+ if (ret)
+ continue;
+
+ gf_msg_debug (this->name, 0, "Got uuid %s",
+ uuid_str);
+
+ ret = gf_uuid_parse (uuid_str, uuid);
+ /* if parsing fails don't error out
+ * let the original value be retained
+ */
+ if (ret)
+ continue;
+
+ hostname = glusterd_uuid_to_hostname (uuid);
+ if (hostname) {
+ gf_msg_debug (this->name, 0, "%s -> %s",
+ uuid_str, hostname);
+ ret = dict_set_dynstr (dict, key, hostname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Error setting hostname %s to dict",
+ hostname);
+ GF_FREE (hostname);
+ goto out;
+ }
+ }
+ }
+
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+static int
+reassign_defrag_status (dict_t *dict, char *key, gf_defrag_status_t *status)
+{
+ int ret = 0;
+
+ if (!*status)
+ return ret;
+
+ switch (*status) {
+ case GF_DEFRAG_STATUS_STARTED:
+ *status = GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED;
+ break;
+
+ case GF_DEFRAG_STATUS_STOPPED:
+ *status = GF_DEFRAG_STATUS_LAYOUT_FIX_STOPPED;
+ break;
+
+ case GF_DEFRAG_STATUS_COMPLETE:
+ *status = GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE;
+ break;
+
+ case GF_DEFRAG_STATUS_FAILED:
+ *status = GF_DEFRAG_STATUS_LAYOUT_FIX_FAILED;
+ break;
+ default:
+ break;
+ }
+
+ ret = dict_set_int32(dict, key, *status);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_WARNING, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "failed to reset defrag %s in dict", key);
+
+ return ret;
+}
+
+/* Check and reassign the defrag_status enum got from the rebalance process
+ * of all peers so that the rebalance-status CLI command can display if a
+ * full-rebalance or just a fix-layout was carried out.
+ */
+static int
+glusterd_op_check_peer_defrag_status (dict_t *dict, int count)
+{
+ glusterd_volinfo_t *volinfo = NULL;
+ gf_defrag_status_t status = GF_DEFRAG_STATUS_NOT_STARTED;
+ char key[256] = {0,};
+ char *volname = NULL;
+ int ret = -1;
+ int i = 1;
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_WARNING, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_WARNING, 0,
+ GD_MSG_VOL_NOT_FOUND, FMTSTR_CHECK_VOL_EXISTS,
+ volname);
+ goto out;
+ }
+
+ if (volinfo->rebal.defrag_cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) {
+ /* Fix layout was not issued; we don't need to reassign
+ the status */
+ ret = 0;
+ goto out;
+ }
+
+ do {
+ memset (key, 0, 256);
+ snprintf (key, 256, "status-%d", i);
+ ret = dict_get_int32 (dict, key, (int32_t *)&status);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_WARNING, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "failed to get defrag %s", key);
+ goto out;
+ }
+ ret = reassign_defrag_status (dict, key, &status);
+ if (ret)
+ goto out;
+ i++;
+ } while (i <= count);
+
+ ret = 0;
+out:
+ return ret;
+
+}
+
+/* This function is used to verify if op_ctx indeed
+ requires modification. This is necessary since the
+ dictionary for certain commands might not have the
+ necessary keys required for the op_ctx modification
+ to succeed.
+
+ Special Cases:
+ - volume status all
+ - volume status
+
+ Regular Cases:
+ - volume status <volname> <brick>
+ - volume status <volname> mem
+ - volume status <volname> clients
+ - volume status <volname> inode
+ - volume status <volname> fd
+ - volume status <volname> callpool
+ - volume status <volname> tasks
+*/
+
+static gf_boolean_t
+glusterd_is_volume_status_modify_op_ctx (uint32_t cmd)
+{
+ if ((cmd & GF_CLI_STATUS_MASK) == GF_CLI_STATUS_NONE) {
+ if (cmd & GF_CLI_STATUS_BRICK)
+ return _gf_false;
+ if (cmd & GF_CLI_STATUS_ALL)
+ return _gf_false;
+ return _gf_true;
+ }
+ return _gf_false;
+}
+
+int
+glusterd_op_modify_port_key (dict_t *op_ctx, int brick_index_max)
+{
+ char *port = NULL;
+ int i = 0;
+ int ret = -1;
+ char key[1024] = {0};
+ char old_key[1024] = {0};
+
+ for (i = 0; i <= brick_index_max; i++) {
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.rdma_port", i);
+ ret = dict_get_str (op_ctx, key, &port);
+
+ if (ret) {
+
+ memset (old_key, 0, sizeof (old_key));
+ snprintf (old_key, sizeof (old_key),
+ "brick%d.port", i);
+ ret = dict_get_str (op_ctx, old_key, &port);
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (op_ctx, key, port);
+ if (ret)
+ goto out;
+ ret = dict_set_str (op_ctx, old_key, "\0");
+ if (ret)
+ goto out;
+ }
+ }
+out:
+ return ret;
+}
+
+/* This function is used to modify the op_ctx dict before sending it back
+ * to cli. This is useful in situations like changing the peer uuids to
+ * hostnames etc.
+ */
+void
+glusterd_op_modify_op_ctx (glusterd_op_t op, void *ctx)
+{
+ int ret = -1;
+ dict_t *op_ctx = NULL;
+ int brick_index_max = -1;
+ int other_count = 0;
+ int count = 0;
+ uint32_t cmd = GF_CLI_STATUS_NONE;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *port = 0;
+ int i = 0;
+ char key[1024] = {0,};
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+
+ if (ctx)
+ op_ctx = ctx;
+ else
+ op_ctx = glusterd_op_get_ctx();
+
+ if (!op_ctx) {
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_OPCTX_NULL,
+ "Operation context is not present.");
+ goto out;
+ }
+
+ switch (op) {
+ case GD_OP_STATUS_VOLUME:
+ ret = dict_get_uint32 (op_ctx, "cmd", &cmd);
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "Failed to get status cmd");
+ goto out;
+ }
+
+ if (!glusterd_is_volume_status_modify_op_ctx (cmd)) {
+ gf_msg_debug (this->name, 0,
+ "op_ctx modification not required for status "
+ "operation being performed");
+ goto out;
+ }
+
+ ret = dict_get_int32 (op_ctx, "brick-index-max",
+ &brick_index_max);
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "Failed to get brick-index-max");
+ goto out;
+ }
+
+ ret = dict_get_int32 (op_ctx, "other-count", &other_count);
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "Failed to get other-count");
+ goto out;
+ }
+
+ count = brick_index_max + other_count + 1;
+
+ /*
+ * a glusterd lesser than version 3.7 will be sending the
+ * rdma port in older key. Changing that value from here
+ * to support backward compatibility
+ */
+ ret = dict_get_str (op_ctx, "volname", &volname);
+ if (ret)
+ goto out;
+
+ for (i = 0; i <= brick_index_max; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.rdma_port", i);
+ ret = dict_get_str (op_ctx, key, &port);
+ if (ret) {
+ ret = dict_set_str (op_ctx, key, "\0");
+ if (ret)
+ goto out;
+ }
+ }
+ glusterd_volinfo_find (volname, &volinfo);
+ if (conf->op_version < GD_OP_VERSION_3_7_0 &&
+ volinfo->transport_type == GF_TRANSPORT_RDMA) {
+ ret = glusterd_op_modify_port_key (op_ctx,
+ brick_index_max);
+ if (ret)
+ goto out;
+ }
+ /* add 'brick%d.peerid' into op_ctx with value of 'brick%d.path'.
+ nfs/sshd like services have this additional uuid */
+ {
+ char key[1024];
+ char *uuid_str = NULL;
+ char *uuid = NULL;
+ int i;
+
+ for (i = brick_index_max + 1; i < count; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.path", i);
+ ret = dict_get_str (op_ctx, key, &uuid_str);
+ if (!ret) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key),
+ "brick%d.peerid", i);
+ uuid = gf_strdup (uuid_str);
+ if (!uuid) {
+ gf_msg_debug (this->name, 0,
+ "unable to create dup of"
+ " uuid_str");
+ continue;
+ }
+ ret = dict_set_dynstr (op_ctx, key,
+ uuid);
+ if (ret != 0) {
+ GF_FREE (uuid);
+ }
+ }
+ }
+ }
+
+ ret = glusterd_op_volume_dict_uuid_to_hostname (op_ctx,
+ "brick%d.path",
+ 0, count);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_CONVERSION_FAILED,
+ "Failed uuid to hostname conversion");
+
+ break;
+
+ case GD_OP_PROFILE_VOLUME:
+ ret = dict_get_str_boolean (op_ctx, "nfs", _gf_false);
+ if (!ret)
+ goto out;
+
+ ret = dict_get_int32 (op_ctx, "count", &count);
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "Failed to get brick count");
+ goto out;
+ }
+
+ ret = glusterd_op_volume_dict_uuid_to_hostname (op_ctx,
+ "%d-brick",
+ 1, (count + 1));
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_CONVERSION_FAILED,
+ "Failed uuid to hostname conversion");
+
+ break;
+
+ /* For both rebalance and remove-brick status, the glusterd op is the
+ * same
+ */
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ case GD_OP_SCRUB_STATUS:
+ ret = dict_get_int32 (op_ctx, "count", &count);
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "Failed to get count");
+ goto out;
+ }
+
+ /* add 'node-name-%d' into op_ctx with value uuid_str.
+ this will be used to convert to hostname later */
+ {
+ char key[1024];
+ char *uuid_str = NULL;
+ char *uuid = NULL;
+ int i;
+
+ for (i = 1; i <= count; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "node-uuid-%d", i);
+ ret = dict_get_str (op_ctx, key, &uuid_str);
+ if (!ret) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key),
+ "node-name-%d", i);
+ uuid = gf_strdup (uuid_str);
+ if (!uuid) {
+ gf_msg_debug (this->name, 0,
+ "unable to create dup of"
+ " uuid_str");
+ continue;
+ }
+ ret = dict_set_dynstr (op_ctx, key,
+ uuid);
+ if (ret != 0) {
+ GF_FREE (uuid);
+ }
+ }
+ }
+ }
+
+ ret = glusterd_op_volume_dict_uuid_to_hostname (op_ctx,
+ "node-name-%d",
+ 1, (count + 1));
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_CONVERSION_FAILED,
+ "Failed uuid to hostname conversion");
+
+ /* Since Both rebalance and bitrot scrub status are going to
+ * use same code path till here, we should break in case
+ * of scrub status */
+ if (op == GD_OP_SCRUB_STATUS) {
+ break;
+ }
+
+ ret = glusterd_op_check_peer_defrag_status (op_ctx, count);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DEFRAG_STATUS_UPDATE_FAIL,
+ "Failed to reset defrag status for fix-layout");
+ break;
+
+ default:
+ ret = 0;
+ gf_msg_debug (this->name, 0,
+ "op_ctx modification not required");
+ break;
+
+ }
+
+out:
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_OPCTX_UPDATE_FAIL,
+ "op_ctx modification failed");
+ return;
+}
+
+int
+glusterd_op_commit_hook (glusterd_op_t op, dict_t *op_ctx,
+ glusterd_commit_hook_type_t type)
+{
+ glusterd_conf_t *priv = NULL;
+ char hookdir[PATH_MAX] = {0, };
+ char scriptdir[PATH_MAX] = {0, };
+ char type_subdir[256] = {0, };
+ char *cmd_subdir = NULL;
+ int ret = -1;
+
+ priv = THIS->private;
+ switch (type) {
+ case GD_COMMIT_HOOK_NONE:
+ case GD_COMMIT_HOOK_MAX:
+ /*Won't be called*/
+ break;
+
+ case GD_COMMIT_HOOK_PRE:
+ strcpy (type_subdir, "pre");
+ break;
+ case GD_COMMIT_HOOK_POST:
+ strcpy (type_subdir, "post");
+ break;
+ }
+
+ cmd_subdir = glusterd_hooks_get_hooks_cmd_subdir (op);
+ if (strlen (cmd_subdir) == 0)
+ return -1;
+
+ GLUSTERD_GET_HOOKS_DIR (hookdir, GLUSTERD_HOOK_VER, priv);
+ snprintf (scriptdir, sizeof (scriptdir), "%s/%s/%s",
+ hookdir, cmd_subdir, type_subdir);
+
+ switch (type) {
+ case GD_COMMIT_HOOK_NONE:
+ case GD_COMMIT_HOOK_MAX:
+ /*Won't be called*/
+ break;
+
+ case GD_COMMIT_HOOK_PRE:
+ ret = glusterd_hooks_run_hooks (scriptdir, op, op_ctx,
+ type);
+ break;
+ case GD_COMMIT_HOOK_POST:
+ ret = glusterd_hooks_post_stub_enqueue (scriptdir, op,
+ op_ctx);
+ break;
+ }
+
+ return ret;
+}
+
+static int
+glusterd_op_ac_send_commit_op (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+ int ret1 = 0;
+ rpc_clnt_procedure_t *proc = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ dict_t *dict = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ char *op_errstr = NULL;
+ glusterd_op_t op = GD_OP_NONE;
+ uint32_t pending_count = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ op = glusterd_op_get_op ();
+
+ ret = glusterd_op_build_payload (&dict, &op_errstr, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_OP_PAYLOAD_BUILD_FAIL,
+ LOGSTR_BUILD_PAYLOAD,
+ gd_op_list[op]);
+ if (op_errstr == NULL)
+ gf_asprintf (&op_errstr, OPERRSTR_BUILD_PAYLOAD);
+ opinfo.op_errstr = op_errstr;
+ goto out;
+ }
+
+ ret = glusterd_op_commit_perform (op, dict, &op_errstr, NULL); //rsp_dict invalid for source
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_COMMIT_OP_FAIL, LOGSTR_COMMIT_FAIL,
+ gd_op_list[op], "localhost", (op_errstr) ? ":" : " ",
+ (op_errstr) ? op_errstr : " ");
+ if (op_errstr == NULL)
+ gf_asprintf (&op_errstr, OPERRSTR_COMMIT_FAIL,
+ "localhost");
+ opinfo.op_errstr = op_errstr;
+ goto out;
+ }
+
+ rcu_read_lock ();
+ cds_list_for_each_entry_rcu (peerinfo, &priv->peers, uuid_list) {
+ /* Only send requests to peers who were available before the
+ * transaction started
+ */
+ if (peerinfo->generation > opinfo.txn_generation)
+ continue;
+
+ if (!peerinfo->connected || !peerinfo->mgmt)
+ continue;
+ if ((peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) &&
+ (glusterd_op_get_op() != GD_OP_SYNC_VOLUME))
+ continue;
+
+ proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_COMMIT_OP];
+ GF_ASSERT (proc);
+ if (proc->fn) {
+ ret = dict_set_static_ptr (dict, "peerinfo", peerinfo);
+ if (ret) {
+ rcu_read_unlock ();
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "failed to set peerinfo");
+ goto out;
+ }
+ ret = proc->fn (NULL, this, dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_COMMIT_REQ_SEND_FAIL,
+ "Failed to "
+ "send commit request for operation "
+ "'Volume %s' to peer %s",
+ gd_op_list[op], peerinfo->hostname);
+ continue;
+ }
+ pending_count++;
+ }
+ }
+ rcu_read_unlock ();
+
+ opinfo.pending_count = pending_count;
+ gf_msg_debug (this->name, 0, "Sent commit op req for 'Volume %s' "
+ "to %d peers", gd_op_list[op], opinfo.pending_count);
+out:
+ if (dict)
+ dict_unref (dict);
+
+ if (ret)
+ opinfo.op_ret = ret;
+
+ ret1 = glusterd_set_txn_opinfo (&event->txn_id, &opinfo);
+ if (ret1)
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
+
+ if (ret) {
+ glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT,
+ &event->txn_id, NULL);
+ opinfo.op_ret = ret;
+ }
+
+ if (!opinfo.pending_count) {
+ if (op == GD_OP_REPLACE_BRICK) {
+ ret = glusterd_op_sm_inject_all_acc (&event->txn_id);
+ } else {
+ glusterd_op_modify_op_ctx (op, NULL);
+ ret = glusterd_op_sm_inject_all_acc (&event->txn_id);
+ }
+ goto err;
+ }
+
+err:
+ gf_msg_debug (this->name, 0, "Returning with %d", ret);
+
+ return ret;
+
+}
+
+static int
+glusterd_op_ac_rcvd_stage_op_acc (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+
+ GF_ASSERT (event);
+
+ if (opinfo.pending_count > 0)
+ opinfo.pending_count--;
+
+
+ ret = glusterd_set_txn_opinfo (&event->txn_id, &opinfo);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
+
+
+
+ if (opinfo.pending_count > 0)
+ goto out;
+
+ ret = glusterd_op_sm_inject_event (GD_OP_EVENT_STAGE_ACC,
+ &event->txn_id, NULL);
+
+out:
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_op_ac_stage_op_failed (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+
+ GF_ASSERT (event);
+
+ if (opinfo.pending_count > 0)
+ opinfo.pending_count--;
+
+
+ ret = glusterd_set_txn_opinfo (&event->txn_id, &opinfo);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
+
+
+
+ if (opinfo.pending_count > 0)
+ goto out;
+
+ ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK,
+ &event->txn_id, NULL);
+
+out:
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_op_ac_commit_op_failed (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+
+ GF_ASSERT (event);
+
+ if (opinfo.pending_count > 0)
+ opinfo.pending_count--;
+
+
+ ret = glusterd_set_txn_opinfo (&event->txn_id, &opinfo);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
+
+
+
+ if (opinfo.pending_count > 0)
+ goto out;
+
+ ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK,
+ &event->txn_id, NULL);
+
+out:
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_op_ac_brick_op_failed (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+ glusterd_op_brick_rsp_ctx_t *ev_ctx = NULL;
+ gf_boolean_t free_errstr = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (event);
+ GF_ASSERT (ctx);
+ ev_ctx = ctx;
+
+ ret = glusterd_remove_pending_entry (&opinfo.pending_bricks, ev_ctx->pending_node->node);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_UNKNOWN_RESPONSE, "unknown response received ");
+ ret = -1;
+ free_errstr = _gf_true;
+ goto out;
+ }
+ if (opinfo.brick_pending_count > 0)
+ opinfo.brick_pending_count--;
+ if (opinfo.op_ret == 0)
+ opinfo.op_ret = ev_ctx->op_ret;
+
+ if (opinfo.op_errstr == NULL)
+ opinfo.op_errstr = ev_ctx->op_errstr;
+ else
+ free_errstr = _gf_true;
+
+
+ ret = glusterd_set_txn_opinfo (&event->txn_id, &opinfo);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
+
+
+ if (opinfo.brick_pending_count > 0)
+ goto out;
+
+ ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK,
+ &event->txn_id, ev_ctx->commit_ctx);
+
+out:
+ if (ev_ctx->rsp_dict)
+ dict_unref (ev_ctx->rsp_dict);
+ if (free_errstr && ev_ctx->op_errstr)
+ GF_FREE (ev_ctx->op_errstr);
+ GF_FREE (ctx);
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_op_ac_rcvd_commit_op_acc (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+ gf_boolean_t commit_ack_inject = _gf_true;
+ glusterd_op_t op = GD_OP_NONE;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ op = glusterd_op_get_op ();
+ GF_ASSERT (event);
+
+ if (opinfo.pending_count > 0)
+ opinfo.pending_count--;
+
+ ret = glusterd_set_txn_opinfo (&event->txn_id, &opinfo);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
+
+
+ if (opinfo.pending_count > 0)
+ goto out;
+
+ if (op == GD_OP_REPLACE_BRICK) {
+ ret = glusterd_op_sm_inject_all_acc (&event->txn_id);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RBOP_START_FAIL, "Couldn't start "
+ "replace-brick operation.");
+ goto out;
+ }
+
+ commit_ack_inject = _gf_false;
+ goto out;
+ }
+
+
+out:
+ if (commit_ack_inject) {
+ if (ret)
+ ret = glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT,
+ &event->txn_id,
+ NULL);
+ else if (!opinfo.pending_count) {
+ glusterd_op_modify_op_ctx (op, NULL);
+ ret = glusterd_op_sm_inject_event
+ (GD_OP_EVENT_COMMIT_ACC,
+ &event->txn_id, NULL);
+ }
+ /*else do nothing*/
+ }
+
+ return ret;
+}
+
+static int
+glusterd_op_ac_rcvd_unlock_acc (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+
+ GF_ASSERT (event);
+
+ if (opinfo.pending_count > 0)
+ opinfo.pending_count--;
+
+ ret = glusterd_set_txn_opinfo (&event->txn_id, &opinfo);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
+
+
+ if (opinfo.pending_count > 0)
+ goto out;
+
+ ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC,
+ &event->txn_id, NULL);
+
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+
+out:
+ return ret;
+}
+
+int32_t
+glusterd_op_clear_errstr() {
+ opinfo.op_errstr = NULL;
+ return 0;
+}
+
+int32_t
+glusterd_op_set_ctx (void *ctx)
+{
+
+ opinfo.op_ctx = ctx;
+
+ return 0;
+
+}
+
+int32_t
+glusterd_op_reset_ctx ()
+{
+
+ glusterd_op_set_ctx (NULL);
+
+ return 0;
+}
+
+int32_t
+glusterd_op_txn_complete (uuid_t *txn_id)
+{
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+ int32_t op = -1;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+ rpcsvc_request_t *req = NULL;
+ void *ctx = NULL;
+ char *op_errstr = NULL;
+ char *volname = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ op = glusterd_op_get_op ();
+ ctx = glusterd_op_get_ctx ();
+ op_ret = opinfo.op_ret;
+ op_errno = opinfo.op_errno;
+ req = opinfo.req;
+ if (opinfo.op_errstr)
+ op_errstr = opinfo.op_errstr;
+
+ opinfo.op_ret = 0;
+ opinfo.op_errno = 0;
+ glusterd_op_clear_op ();
+ glusterd_op_reset_ctx ();
+ glusterd_op_clear_errstr ();
+
+ /* Based on the op-version, we release the cluster or mgmt_v3 lock */
+ if (priv->op_version < GD_OP_VERSION_3_6_0) {
+ ret = glusterd_unlock (MY_UUID);
+ /* unlock cant/shouldnt fail here!! */
+ if (ret)
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_GLUSTERD_UNLOCK_FAIL,
+ "Unable to clear local lock, ret: %d", ret);
+ else
+ gf_msg_debug (this->name, 0, "Cleared local lock");
+ } else {
+ ret = dict_get_str (ctx, "volname", &volname);
+ if (ret)
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "No Volume name present. "
+ "Locks have not been held.");
+
+ if (volname) {
+ ret = glusterd_mgmt_v3_unlock (volname, MY_UUID,
+ "vol");
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_UNLOCK_FAIL,
+ "Unable to release lock for %s",
+ volname);
+ }
+ }
+
+ ret = glusterd_op_send_cli_response (op, op_ret,
+ op_errno, req, ctx, op_errstr);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_NO_CLI_RESP,
+ "Responding to cli failed, "
+ "ret: %d", ret);
+ //Ignore this error, else state machine blocks
+ ret = 0;
+ }
+
+ if (op_errstr && (strcmp (op_errstr, "")))
+ GF_FREE (op_errstr);
+
+
+ if (priv->pending_quorum_action)
+ glusterd_do_quorum_action ();
+
+ /* Clearing the transaction opinfo */
+ ret = glusterd_clear_txn_opinfo (txn_id);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_OPINFO_CLEAR_FAIL,
+ "Unable to clear transaction's opinfo");
+
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_op_ac_unlocked_all (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+
+ GF_ASSERT (event);
+
+ ret = glusterd_op_txn_complete (&event->txn_id);
+
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_op_ac_stage_op (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = -1;
+ glusterd_req_ctx_t *req_ctx = NULL;
+ int32_t status = 0;
+ dict_t *rsp_dict = NULL;
+ char *op_errstr = NULL;
+ dict_t *dict = NULL;
+ xlator_t *this = NULL;
+ uuid_t *txn_id = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (ctx);
+
+ req_ctx = ctx;
+
+ dict = req_ctx->dict;
+
+ rsp_dict = dict_new ();
+ if (!rsp_dict) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ GD_MSG_DICT_CREATE_FAIL,
+ "Failed to get new dictionary");
+ return -1;
+ }
+
+ status = glusterd_op_stage_validate (req_ctx->op, dict, &op_errstr,
+ rsp_dict);
+
+ if (status) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VALIDATE_FAILED, "Stage failed on operation"
+ " 'Volume %s', Status : %d", gd_op_list[req_ctx->op],
+ status);
+ }
+
+ txn_id = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t);
+
+ if (txn_id)
+ gf_uuid_copy (*txn_id, event->txn_id);
+ else {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_bin (rsp_dict, "transaction_id",
+ txn_id, sizeof(*txn_id));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set transaction id.");
+ GF_FREE (txn_id);
+ goto out;
+ }
+
+ ret = glusterd_op_stage_send_resp (req_ctx->req, req_ctx->op,
+ status, op_errstr, rsp_dict);
+
+out:
+ if (op_errstr && (strcmp (op_errstr, "")))
+ GF_FREE (op_errstr);
+
+ gf_msg_debug (this->name, 0, "Returning with %d", ret);
+
+ if (rsp_dict)
+ dict_unref (rsp_dict);
+
+ return ret;
+}
+
+static gf_boolean_t
+glusterd_need_brick_op (glusterd_op_t op)
+{
+ gf_boolean_t ret = _gf_false;
+
+ GF_ASSERT (GD_OP_NONE < op && op < GD_OP_MAX);
+
+ switch (op) {
+ case GD_OP_PROFILE_VOLUME:
+ case GD_OP_STATUS_VOLUME:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ case GD_OP_HEAL_VOLUME:
+ case GD_OP_SCRUB_STATUS:
+ ret = _gf_true;
+ break;
+ default:
+ ret = _gf_false;
+ }
+
+ return ret;
+}
+
+dict_t*
+glusterd_op_init_commit_rsp_dict (glusterd_op_t op)
+{
+ dict_t *rsp_dict = NULL;
+ dict_t *op_ctx = NULL;
+
+ GF_ASSERT (GD_OP_NONE < op && op < GD_OP_MAX);
+
+ if (glusterd_need_brick_op (op)) {
+ op_ctx = glusterd_op_get_ctx ();
+ GF_ASSERT (op_ctx);
+ rsp_dict = dict_ref (op_ctx);
+ } else {
+ rsp_dict = dict_new ();
+ }
+
+ return rsp_dict;
+}
+
+static int
+glusterd_op_ac_commit_op (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+ glusterd_req_ctx_t *req_ctx = NULL;
+ int32_t status = 0;
+ char *op_errstr = NULL;
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
+ xlator_t *this = NULL;
+ uuid_t *txn_id = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (ctx);
+
+ req_ctx = ctx;
+
+ dict = req_ctx->dict;
+
+ rsp_dict = glusterd_op_init_commit_rsp_dict (req_ctx->op);
+ if (NULL == rsp_dict)
+ return -1;
+
+
+ if (GD_OP_CLEARLOCKS_VOLUME == req_ctx->op) {
+ /*clear locks should be run only on
+ * originator glusterd*/
+ status = 0;
+
+ } else {
+ status = glusterd_op_commit_perform (req_ctx->op, dict,
+ &op_errstr, rsp_dict);
+ }
+
+ if (status)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_COMMIT_OP_FAIL, "Commit of operation "
+ "'Volume %s' failed: %d", gd_op_list[req_ctx->op],
+ status);
+
+ txn_id = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t);
+
+ if (txn_id)
+ gf_uuid_copy (*txn_id, event->txn_id);
+ else {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_bin (rsp_dict, "transaction_id",
+ txn_id, sizeof(*txn_id));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set transaction id.");
+ GF_FREE (txn_id);
+ goto out;
+ }
+
+ ret = glusterd_op_commit_send_resp (req_ctx->req, req_ctx->op,
+ status, op_errstr, rsp_dict);
+
+out:
+ if (op_errstr && (strcmp (op_errstr, "")))
+ GF_FREE (op_errstr);
+
+ if (rsp_dict)
+ dict_unref (rsp_dict);
+
+ gf_msg_debug (this->name, 0, "Returning with %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_op_ac_send_commit_failed (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+ glusterd_req_ctx_t *req_ctx = NULL;
+ dict_t *op_ctx = NULL;
+
+ GF_ASSERT (ctx);
+
+ req_ctx = ctx;
+
+ op_ctx = glusterd_op_get_ctx ();
+
+ ret = glusterd_op_commit_send_resp (req_ctx->req, req_ctx->op,
+ opinfo.op_ret, opinfo.op_errstr,
+ op_ctx);
+
+ if (opinfo.op_errstr && (strcmp (opinfo.op_errstr, ""))) {
+ GF_FREE (opinfo.op_errstr);
+ opinfo.op_errstr = NULL;
+ }
+
+ ret = glusterd_set_txn_opinfo (&event->txn_id, &opinfo);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
+
+
+ gf_msg_debug (THIS->name, 0, "Returning with %d", ret);
+ return ret;
+}
+
+static int
+glusterd_op_sm_transition_state (glusterd_op_info_t *opinfo,
+ glusterd_op_sm_t *state,
+ glusterd_op_sm_event_type_t event_type)
+{
+ glusterd_conf_t *conf = NULL;
+
+ GF_ASSERT (state);
+ GF_ASSERT (opinfo);
+
+ conf = THIS->private;
+ GF_ASSERT (conf);
+
+ (void) glusterd_sm_tr_log_transition_add (&conf->op_sm_log,
+ opinfo->state.state,
+ state[event_type].next_state,
+ event_type);
+
+ opinfo->state.state = state[event_type].next_state;
+ return 0;
+}
+
+int32_t
+glusterd_op_stage_validate (glusterd_op_t op, dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int ret = -1;
+ xlator_t *this = THIS;
+
+ switch (op) {
+ case GD_OP_CREATE_VOLUME:
+ ret = glusterd_op_stage_create_volume (dict, op_errstr,
+ rsp_dict);
+ break;
+
+ case GD_OP_START_VOLUME:
+ ret = glusterd_op_stage_start_volume (dict, op_errstr,
+ rsp_dict);
+ break;
+
+ case GD_OP_STOP_VOLUME:
+ ret = glusterd_op_stage_stop_volume (dict, op_errstr);
+ break;
+
+ case GD_OP_DELETE_VOLUME:
+ ret = glusterd_op_stage_delete_volume (dict, op_errstr);
+ break;
+
+ case GD_OP_ADD_BRICK:
+ ret = glusterd_op_stage_add_brick (dict, op_errstr,
+ rsp_dict);
+ break;
+
+ case GD_OP_REPLACE_BRICK:
+ ret = glusterd_op_stage_replace_brick (dict, op_errstr,
+ rsp_dict);
+ break;
+
+ case GD_OP_SET_VOLUME:
+ ret = glusterd_op_stage_set_volume (dict, op_errstr);
+ break;
+
+ case GD_OP_GANESHA:
+ ret = glusterd_op_stage_set_ganesha (dict, op_errstr);
+ break;
+
+ case GD_OP_RESET_VOLUME:
+ ret = glusterd_op_stage_reset_volume (dict, op_errstr);
+ break;
+ case GD_OP_REMOVE_BRICK:
+ ret = glusterd_op_stage_remove_brick (dict, op_errstr);
+ break;
+
+ case GD_OP_LOG_ROTATE:
+ ret = glusterd_op_stage_log_rotate (dict, op_errstr);
+ break;
+
+ case GD_OP_SYNC_VOLUME:
+ ret = glusterd_op_stage_sync_volume (dict, op_errstr);
+ break;
+
+ case GD_OP_GSYNC_CREATE:
+ ret = glusterd_op_stage_gsync_create (dict, op_errstr);
+ break;
+
+ case GD_OP_GSYNC_SET:
+ ret = glusterd_op_stage_gsync_set (dict, op_errstr);
+ break;
+
+ case GD_OP_PROFILE_VOLUME:
+ ret = glusterd_op_stage_stats_volume (dict, op_errstr);
+ break;
+
+ case GD_OP_QUOTA:
+ ret = glusterd_op_stage_quota (dict, op_errstr,
+ rsp_dict);
+ break;
+
+ case GD_OP_STATUS_VOLUME:
+ ret = glusterd_op_stage_status_volume (dict, op_errstr);
+ break;
+
+ case GD_OP_REBALANCE:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ ret = glusterd_op_stage_rebalance (dict, op_errstr);
+ break;
+
+ case GD_OP_HEAL_VOLUME:
+ ret = glusterd_op_stage_heal_volume (dict, op_errstr);
+ break;
+
+ case GD_OP_STATEDUMP_VOLUME:
+ ret = glusterd_op_stage_statedump_volume (dict,
+ op_errstr);
+ break;
+ case GD_OP_CLEARLOCKS_VOLUME:
+ ret = glusterd_op_stage_clearlocks_volume (dict,
+ op_errstr);
+ break;
+
+ case GD_OP_COPY_FILE:
+ ret = glusterd_op_stage_copy_file (dict, op_errstr);
+ break;
+
+ case GD_OP_SYS_EXEC:
+ ret = glusterd_op_stage_sys_exec (dict, op_errstr);
+ break;
+
+ case GD_OP_BARRIER:
+ ret = glusterd_op_stage_barrier (dict, op_errstr);
+ break;
+
+ case GD_OP_BITROT:
+ case GD_OP_SCRUB_STATUS:
+ ret = glusterd_op_stage_bitrot (dict, op_errstr,
+ rsp_dict);
+ break;
+
+ default:
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_INVALID_ENTRY, "Unknown op %s",
+ gd_op_list[op]);
+ }
+
+ gf_msg_debug (this->name, 0, "OP = %d. Returning %d", op, ret);
+ return ret;
+}
+
+
+int32_t
+glusterd_op_commit_perform (glusterd_op_t op, dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int ret = -1;
+ xlator_t *this = THIS;
+
+ glusterd_op_commit_hook (op, dict, GD_COMMIT_HOOK_PRE);
+ switch (op) {
+ case GD_OP_CREATE_VOLUME:
+ ret = glusterd_op_create_volume (dict, op_errstr);
+ break;
+
+ case GD_OP_START_VOLUME:
+ ret = glusterd_op_start_volume (dict, op_errstr);
+ break;
+
+ case GD_OP_STOP_VOLUME:
+ ret = glusterd_op_stop_volume (dict);
+ break;
+
+ case GD_OP_DELETE_VOLUME:
+ ret = glusterd_op_delete_volume (dict);
+ break;
+
+ case GD_OP_ADD_BRICK:
+ ret = glusterd_op_add_brick (dict, op_errstr);
+ break;
+
+ case GD_OP_REPLACE_BRICK:
+ ret = glusterd_op_replace_brick (dict, rsp_dict);
+ break;
+
+ case GD_OP_SET_VOLUME:
+ ret = glusterd_op_set_volume (dict, op_errstr);
+ break;
+ case GD_OP_GANESHA:
+ ret = glusterd_op_set_ganesha (dict, op_errstr);
+ break;
+
+ case GD_OP_RESET_VOLUME:
+ ret = glusterd_op_reset_volume (dict, op_errstr);
+ break;
+
+ case GD_OP_REMOVE_BRICK:
+ ret = glusterd_op_remove_brick (dict, op_errstr);
+ break;
+
+ case GD_OP_LOG_ROTATE:
+ ret = glusterd_op_log_rotate (dict);
+ break;
+
+ case GD_OP_SYNC_VOLUME:
+ ret = glusterd_op_sync_volume (dict, op_errstr, rsp_dict);
+ break;
+
+ case GD_OP_GSYNC_CREATE:
+ ret = glusterd_op_gsync_create (dict, op_errstr,
+ rsp_dict);
+ break;
+
+ case GD_OP_GSYNC_SET:
+ ret = glusterd_op_gsync_set (dict, op_errstr, rsp_dict);
+ break;
+
+ case GD_OP_PROFILE_VOLUME:
+ ret = glusterd_op_stats_volume (dict, op_errstr,
+ rsp_dict);
+ break;
+
+ case GD_OP_QUOTA:
+ ret = glusterd_op_quota (dict, op_errstr, rsp_dict);
+ break;
+
+ case GD_OP_STATUS_VOLUME:
+ ret = glusterd_op_status_volume (dict, op_errstr, rsp_dict);
+ break;
+
+ case GD_OP_REBALANCE:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ ret = glusterd_op_rebalance (dict, op_errstr, rsp_dict);
+ break;
+
+ case GD_OP_HEAL_VOLUME:
+ ret = glusterd_op_heal_volume (dict, op_errstr);
+ break;
+
+ case GD_OP_STATEDUMP_VOLUME:
+ ret = glusterd_op_statedump_volume (dict, op_errstr);
+ break;
+
+ case GD_OP_CLEARLOCKS_VOLUME:
+ ret = glusterd_op_clearlocks_volume (dict, op_errstr,
+ rsp_dict);
+ break;
+
+ case GD_OP_COPY_FILE:
+ ret = glusterd_op_copy_file (dict, op_errstr);
+ break;
+
+ case GD_OP_SYS_EXEC:
+ ret = glusterd_op_sys_exec (dict, op_errstr, rsp_dict);
+ break;
+
+ case GD_OP_BARRIER:
+ ret = glusterd_op_barrier (dict, op_errstr);
+ break;
+
+ case GD_OP_BITROT:
+ case GD_OP_SCRUB_STATUS:
+ ret = glusterd_op_bitrot (dict, op_errstr, rsp_dict);
+ break;
+
+ default:
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_INVALID_ENTRY, "Unknown op %s",
+ gd_op_list[op]);
+ break;
+ }
+
+ if (ret == 0)
+ glusterd_op_commit_hook (op, dict, GD_COMMIT_HOOK_POST);
+
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+
+static int
+glusterd_bricks_select_stop_volume (dict_t *dict, char **op_errstr,
+ struct cds_list_head *selected)
+{
+ int ret = 0;
+ int flags = 0;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_pending_node_t *pending_node = NULL;
+
+ ret = glusterd_op_stop_volume_args_get (dict, &volname, &flags);
+ if (ret)
+ goto out;
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_FOUND, FMTSTR_CHECK_VOL_EXISTS,
+ volname);
+ gf_asprintf (op_errstr, FMTSTR_CHECK_VOL_EXISTS, volname);
+ goto out;
+ }
+
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ if (glusterd_is_brick_started (brickinfo)) {
+ pending_node = GF_CALLOC (1, sizeof (*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ } else {
+ pending_node->node = brickinfo;
+ pending_node->type = GD_NODE_BRICK;
+ cds_list_add_tail (&pending_node->list,
+ selected);
+ pending_node = NULL;
+ }
+ }
+ }
+
+out:
+ return ret;
+}
+
+static int
+glusterd_bricks_select_remove_brick (dict_t *dict, char **op_errstr,
+ struct cds_list_head *selected)
+{
+ int ret = -1;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ char *brick = NULL;
+ int32_t count = 0;
+ int32_t i = 1;
+ char key[256] = {0,};
+ glusterd_pending_node_t *pending_node = NULL;
+ int32_t command = 0;
+ int32_t force = 0;
+
+
+ ret = dict_get_str (dict, "volname", &volname);
+
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_FOUND, "Unable to allocate memory");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "count", &count);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, -ret,
+ GD_MSG_DICT_GET_FAILED, "Unable to get count");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "command", &command);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, -ret,
+ GD_MSG_DICT_GET_FAILED, "Unable to get command");
+ goto out;
+ }
+
+ if (command == GF_OP_CMD_DETACH_START)
+ return glusterd_bricks_select_rebalance_volume(dict, op_errstr, selected);
+
+ ret = dict_get_int32 (dict, "force", &force);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_INFO, 0,
+ GD_MSG_DICT_GET_FAILED, "force flag is not set");
+ ret = 0;
+ goto out;
+ }
+
+ while ( i <= count) {
+ snprintf (key, 256, "brick%d", i);
+
+ ret = dict_get_str (dict, key, &brick);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get brick");
+ goto out;
+ }
+
+ ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo,
+ &brickinfo,
+ _gf_false);
+
+ if (ret)
+ goto out;
+
+ if (glusterd_is_brick_started (brickinfo)) {
+ pending_node = GF_CALLOC (1, sizeof (*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ } else {
+ pending_node->node = brickinfo;
+ pending_node->type = GD_NODE_BRICK;
+ cds_list_add_tail (&pending_node->list,
+ selected);
+ pending_node = NULL;
+ }
+ }
+ i++;
+ }
+
+out:
+ return ret;
+}
+
+static int
+glusterd_bricks_select_profile_volume (dict_t *dict, char **op_errstr,
+ struct cds_list_head *selected)
+{
+ int ret = -1;
+ char *volname = NULL;
+ char msg[2048] = {0,};
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+ int32_t stats_op = GF_CLI_STATS_NONE;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_pending_node_t *pending_node = NULL;
+ char *brick = NULL;
+
+
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "volume name get failed");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Volume %s does not exists",
+ volname);
+
+ *op_errstr = gf_strdup (msg);
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_FOUND, "%s", msg);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "op", &stats_op);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "volume profile op get failed");
+ goto out;
+ }
+
+ switch (stats_op) {
+ case GF_CLI_STATS_START:
+ case GF_CLI_STATS_STOP:
+ goto out;
+ break;
+ case GF_CLI_STATS_INFO:
+ ret = dict_get_str_boolean (dict, "nfs", _gf_false);
+ if (ret) {
+ if (!priv->nfs_svc.online) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_NFS_SERVER_NOT_RUNNING,
+ "NFS server"
+ " is not running");
+ goto out;
+ }
+ pending_node = GF_CALLOC (1, sizeof (*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ }
+ pending_node->node = &(priv->nfs_svc);
+ pending_node->type = GD_NODE_NFS;
+ cds_list_add_tail (&pending_node->list, selected);
+ pending_node = NULL;
+
+ ret = 0;
+ goto out;
+
+ }
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks,
+ brick_list) {
+ if (glusterd_is_brick_started (brickinfo)) {
+ pending_node = GF_CALLOC (1, sizeof (*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ } else {
+ pending_node->node = brickinfo;
+ pending_node->type = GD_NODE_BRICK;
+ cds_list_add_tail (&pending_node->list,
+ selected);
+ pending_node = NULL;
+ }
+ }
+ }
+ break;
+
+ case GF_CLI_STATS_TOP:
+ ret = dict_get_str_boolean (dict, "nfs", _gf_false);
+ if (ret) {
+ if (!priv->nfs_svc.online) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_NFS_SERVER_NOT_RUNNING,
+ "NFS server"
+ " is not running");
+ goto out;
+ }
+ pending_node = GF_CALLOC (1, sizeof (*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ }
+ pending_node->node = &(priv->nfs_svc);
+ pending_node->type = GD_NODE_NFS;
+ cds_list_add_tail (&pending_node->list, selected);
+ pending_node = NULL;
+
+ ret = 0;
+ goto out;
+
+ }
+ ret = dict_get_str (dict, "brick", &brick);
+ if (!ret) {
+ ret = glusterd_volume_brickinfo_get_by_brick
+ (brick, volinfo, &brickinfo,
+ _gf_true);
+ if (ret)
+ goto out;
+
+ if (!glusterd_is_brick_started (brickinfo))
+ goto out;
+
+ pending_node = GF_CALLOC (1, sizeof (*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ } else {
+ pending_node->node = brickinfo;
+ pending_node->type = GD_NODE_BRICK;
+ cds_list_add_tail (&pending_node->list,
+ selected);
+ pending_node = NULL;
+ goto out;
+ }
+ }
+ ret = 0;
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks,
+ brick_list) {
+ if (glusterd_is_brick_started (brickinfo)) {
+ pending_node = GF_CALLOC (1, sizeof (*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ } else {
+ pending_node->node = brickinfo;
+ pending_node->type = GD_NODE_BRICK;
+ cds_list_add_tail (&pending_node->list,
+ selected);
+ pending_node = NULL;
+ }
+ }
+ }
+ break;
+
+ default:
+ GF_ASSERT (0);
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_INVALID_ENTRY, "Invalid profile op: %d",
+ stats_op);
+ ret = -1;
+ goto out;
+ break;
+ }
+
+
+out:
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+
+ return ret;
+}
+
+int
+_get_hxl_children_count (glusterd_volinfo_t *volinfo)
+{
+ if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) {
+ return volinfo->disperse_count;
+ } else {
+ return volinfo->replica_count;
+ }
+}
+
+static int
+_add_hxlator_to_dict (dict_t *dict, glusterd_volinfo_t *volinfo, int index,
+ int count)
+{
+ int ret = -1;
+ char key[128] = {0,};
+ char *xname = NULL;
+ char *xl_type = 0;
+
+ if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) {
+ xl_type = "disperse";
+ } else {
+ xl_type = "replicate";
+ }
+ snprintf (key, sizeof (key), "xl-%d", count);
+ ret = gf_asprintf (&xname, "%s-%s-%d", volinfo->volname, xl_type,
+ index);
+ if (ret == -1)
+ goto out;
+
+ ret = dict_set_dynstr (dict, key, xname);
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32 (dict, xname, index);
+out:
+ return ret;
+}
+
+int
+get_replica_index_for_per_replica_cmd (glusterd_volinfo_t *volinfo,
+ dict_t *dict)
+{
+ int ret = 0;
+ char *hostname = NULL;
+ char *path = NULL;
+ int index = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int cmd_replica_index = -1;
+ int replica_count = -1;
+
+
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "per-replica-cmd-hostname", &hostname);
+ if (ret)
+ goto out;
+ ret = dict_get_str (dict, "per-replica-cmd-path", &path);
+ if (ret)
+ goto out;
+
+ replica_count = volinfo->replica_count;
+
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ if (gf_uuid_is_null (brickinfo->uuid))
+ (void)glusterd_resolve_brick (brickinfo);
+ if (!strcmp (brickinfo->path, path) &&
+ !strcmp (brickinfo->hostname, hostname)) {
+ cmd_replica_index = index/(replica_count);
+ goto out;
+ }
+ index++;
+ }
+
+
+out:
+ if (ret)
+ cmd_replica_index = -1;
+
+ return cmd_replica_index;
+}
+
+int
+_select_hxlator_with_matching_brick (xlator_t *this,
+ glusterd_volinfo_t *volinfo, dict_t *dict,
+ int *index)
+{
+ char *hostname = NULL;
+ char *path = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ int hxl_children = 0;
+
+ priv = this->private;
+ if (!dict ||
+ dict_get_str (dict, "per-replica-cmd-hostname", &hostname) ||
+ dict_get_str (dict, "per-replica-cmd-path", &path))
+ return -1;
+
+ hxl_children = _get_hxl_children_count (volinfo);
+ if ((*index) == 0)
+ (*index)++;
+
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ if (gf_uuid_is_null (brickinfo->uuid))
+ (void)glusterd_resolve_brick (brickinfo);
+
+ if (!gf_uuid_compare (MY_UUID, brickinfo->uuid)) {
+ _add_hxlator_to_dict (dict, volinfo,
+ ((*index) - 1)/hxl_children, 0);
+ return 1;
+ }
+ (*index)++;
+ }
+
+ return 0;
+}
+void
+_select_hxlators_with_local_bricks (xlator_t *this, glusterd_volinfo_t *volinfo,
+ dict_t *dict, int *index,
+ int *hxlator_count)
+{
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ int hxl_children = 0;
+ gf_boolean_t add = _gf_false;
+ int cmd_replica_index = -1;
+
+ priv = this->private;
+ hxl_children = _get_hxl_children_count (volinfo);
+
+ if ((*index) == 0)
+ (*index)++;
+
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ if (gf_uuid_is_null (brickinfo->uuid))
+ (void)glusterd_resolve_brick (brickinfo);
+
+ if (!gf_uuid_compare (MY_UUID, brickinfo->uuid))
+ add = _gf_true;
+
+ if ((*index) % hxl_children == 0) {
+ if (add) {
+ _add_hxlator_to_dict (dict, volinfo,
+ ((*index) - 1)/hxl_children,
+ (*hxlator_count));
+ (*hxlator_count)++;
+ }
+ add = _gf_false;
+ }
+
+ (*index)++;
+ }
+
+}
+
+int
+_select_hxlators_for_full_self_heal (xlator_t *this,
+ glusterd_volinfo_t *volinfo,
+ dict_t *dict, int *index,
+ int *hxlator_count)
+{
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ int hxl_children = 0;
+ uuid_t candidate = {0};
+
+ priv = this->private;
+ if ((*index) == 0)
+ (*index)++;
+ if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) {
+ hxl_children = volinfo->disperse_count;
+ } else {
+ hxl_children = volinfo->replica_count;
+ }
+
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ if (gf_uuid_is_null (brickinfo->uuid))
+ (void)glusterd_resolve_brick (brickinfo);
+
+ if (gf_uuid_compare (brickinfo->uuid, candidate) > 0)
+ gf_uuid_copy (candidate, brickinfo->uuid);
+
+ if ((*index) % hxl_children == 0) {
+ if (!gf_uuid_compare (MY_UUID, candidate)) {
+ _add_hxlator_to_dict (dict, volinfo,
+ ((*index)-1)/hxl_children,
+ (*hxlator_count));
+ (*hxlator_count)++;
+ }
+ gf_uuid_clear (candidate);
+ }
+
+ (*index)++;
+ }
+ return *hxlator_count;
+}
+
+
+static int
+glusterd_bricks_select_snap (dict_t *dict, char **op_errstr,
+ struct cds_list_head *selected)
+{
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ glusterd_pending_node_t *pending_node = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *volname = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int brick_index = -1;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get"
+ " volname");
+ goto out;
+ }
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret)
+ goto out;
+
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ brick_index++;
+ if (gf_uuid_compare (brickinfo->uuid, MY_UUID) ||
+ !glusterd_is_brick_started (brickinfo)) {
+ continue;
+ }
+ pending_node = GF_CALLOC (1, sizeof (*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ }
+ pending_node->node = brickinfo;
+ pending_node->type = GD_NODE_BRICK;
+ pending_node->index = brick_index;
+ cds_list_add_tail (&pending_node->list, selected);
+ pending_node = NULL;
+ }
+
+ ret = 0;
+
+out:
+ gf_msg_debug (THIS->name, 0, "Returning ret %d", ret);
+ return ret;
+}
+
+static int
+fill_shd_status_for_local_bricks (dict_t *dict, glusterd_volinfo_t *volinfo,
+ cli_cmd_type type, int *index,
+ dict_t *req_dict)
+{
+ glusterd_brickinfo_t *brickinfo = NULL;
+ char msg[1024] = {0,};
+ char key[1024] = {0,};
+ char value[1024] = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ int cmd_replica_index = -1;
+
+ this = THIS;
+ snprintf (msg, sizeof (msg), "self-heal-daemon is not running on");
+
+ if (type == PER_HEAL_XL) {
+ cmd_replica_index = get_replica_index_for_per_replica_cmd
+ (volinfo, req_dict);
+ if (cmd_replica_index == -1) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_REPLICA_INDEX_GET_FAIL,
+ "Could not find the "
+ "replica index for per replica type command");
+ ret = -1;
+ goto out;
+ }
+ }
+
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ if (gf_uuid_is_null (brickinfo->uuid))
+ (void)glusterd_resolve_brick (brickinfo);
+
+ if (gf_uuid_compare (MY_UUID, brickinfo->uuid)) {
+ (*index)++;
+ continue;
+ }
+
+ if (type == PER_HEAL_XL) {
+ if (cmd_replica_index != ((*index)/volinfo->replica_count)) {
+ (*index)++;
+ continue;
+ }
+
+ }
+ snprintf (key, sizeof (key), "%d-status", (*index));
+ snprintf (value, sizeof (value), "%s %s",msg,
+ uuid_utoa(MY_UUID));
+ ret = dict_set_dynstr (dict, key, gf_strdup(value));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Unable to"
+ "set the dictionary for shd status msg");
+ goto out;
+ }
+ snprintf (key, sizeof (key), "%d-shd-status", (*index));
+ ret = dict_set_str (dict, key, "off");
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Unable to"
+ " set dictionary for shd status msg");
+ goto out;
+ }
+
+ (*index)++;
+ }
+
+out:
+ return ret;
+
+}
+int
+glusterd_shd_select_brick_xlator (dict_t *dict, gf_xl_afr_op_t heal_op,
+ glusterd_volinfo_t *volinfo, int *index,
+ int *hxlator_count, dict_t *rsp_dict)
+{
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ char msg[2048] = {0,};
+ glusterd_pending_node_t *pending_node = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+
+ switch (heal_op) {
+ case GF_SHD_OP_INDEX_SUMMARY:
+ case GF_SHD_OP_STATISTICS_HEAL_COUNT:
+ if (!priv->shd_svc.online) {
+ if (!rsp_dict) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_OPCTX_NULL, "Received "
+ "empty ctx.");
+ goto out;
+ }
+
+ ret = fill_shd_status_for_local_bricks (rsp_dict,
+ volinfo,
+ ALL_HEAL_XL,
+ index,
+ dict);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SHD_STATUS_SET_FAIL, "Unable to "
+ "fill the shd status for the local "
+ "bricks");
+ goto out;
+ }
+ break;
+
+ case GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA:
+ if (!priv->shd_svc.online) {
+ if (!rsp_dict) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_OPCTX_NULL, "Received "
+ "empty ctx.");
+ goto out;
+ }
+ ret = fill_shd_status_for_local_bricks (rsp_dict,
+ volinfo,
+ PER_HEAL_XL,
+ index,
+ dict);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SHD_STATUS_SET_FAIL, "Unable to "
+ "fill the shd status for the local"
+ " bricks.");
+ goto out;
+
+ }
+ break;
+
+ default:
+ break;
+ }
+
+
+ switch (heal_op) {
+ case GF_SHD_OP_HEAL_FULL:
+ _select_hxlators_for_full_self_heal (this, volinfo, dict,
+ index, hxlator_count);
+ break;
+ case GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA:
+ (*hxlator_count) += _select_hxlator_with_matching_brick (this,
+ volinfo,
+ dict,
+ index);
+ break;
+ default:
+ _select_hxlators_with_local_bricks (this, volinfo, dict,
+ index, hxlator_count);
+ break;
+ }
+ ret = (*hxlator_count);
+out:
+ return ret;
+}
+
+
+static int
+glusterd_bricks_select_heal_volume (dict_t *dict, char **op_errstr,
+ struct cds_list_head *selected,
+ dict_t *rsp_dict)
+{
+ int ret = -1;
+ char *volname = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_volinfo_t *dup_volinfo = NULL;
+ xlator_t *this = NULL;
+ char msg[2048] = {0,};
+ glusterd_pending_node_t *pending_node = NULL;
+ gf_xl_afr_op_t heal_op = GF_SHD_OP_INVALID;
+ int hxlator_count = 0;
+ int index = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "volume name get failed");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Volume %s does not exist",
+ volname);
+
+ *op_errstr = gf_strdup (msg);
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_FOUND, "%s", msg);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "heal-op", (int32_t *)&heal_op);
+ if (ret || (heal_op == GF_SHD_OP_INVALID)) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "heal op invalid");
+ goto out;
+ }
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
+ ret = glusterd_create_sub_tier_volinfo (volinfo, &dup_volinfo,
+ _gf_false, volname);
+ if (ret < 0)
+ goto out;
+
+ ret = glusterd_shd_select_brick_xlator (dict, heal_op,
+ dup_volinfo,
+ &index, &hxlator_count,
+ rsp_dict);
+ glusterd_volinfo_delete (dup_volinfo);
+ if (ret < 0)
+ goto out;
+ ret = glusterd_create_sub_tier_volinfo (volinfo, &dup_volinfo,
+ _gf_true, volname);
+ if (ret < 0)
+ goto out;
+ ret = glusterd_shd_select_brick_xlator (dict, heal_op,
+ dup_volinfo,
+ &index, &hxlator_count,
+ rsp_dict);
+ glusterd_volinfo_delete (dup_volinfo);
+ if (ret < 0)
+ goto out;
+ } else {
+ ret = glusterd_shd_select_brick_xlator (dict, heal_op,
+ volinfo,
+ &index, &hxlator_count,
+ rsp_dict);
+ if (ret < 0)
+ goto out;
+ }
+
+ if (!hxlator_count)
+ goto out;
+ if (hxlator_count == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_XLATOR_COUNT_GET_FAIL, "Could not determine the"
+ "translator count");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, "count", hxlator_count);
+ if (ret)
+ goto out;
+ pending_node = GF_CALLOC (1, sizeof (*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ } else {
+ pending_node->node = &(priv->shd_svc);
+ pending_node->type = GD_NODE_SHD;
+ cds_list_add_tail (&pending_node->list, selected);
+ pending_node = NULL;
+ }
+
+out:
+ gf_msg_debug (THIS->name, 0, "Returning ret %d", ret);
+ return ret;
+
+}
+
+int
+glusterd_bricks_select_rebalance_volume (dict_t *dict, char **op_errstr,
+ struct cds_list_head *selected)
+{
+ int ret = -1;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+ char msg[2048] = {0,};
+ glusterd_pending_node_t *pending_node = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "volume name get failed");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Volume %s does not exist",
+ volname);
+
+ *op_errstr = gf_strdup (msg);
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_FOUND, "%s", msg);
+ goto out;
+ }
+ pending_node = GF_CALLOC (1, sizeof (*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ } else {
+ pending_node->node = volinfo;
+ pending_node->type = GD_NODE_REBALANCE;
+ cds_list_add_tail (&pending_node->list, selected);
+ pending_node = NULL;
+ }
+
+out:
+ return ret;
+}
+
+static int
+glusterd_bricks_select_status_volume (dict_t *dict, char **op_errstr,
+ struct cds_list_head *selected)
+{
+ int ret = -1;
+ int cmd = 0;
+ int brick_index = -1;
+ char *volname = NULL;
+ char *brickname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_pending_node_t *pending_node = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_snapdsvc_t *snapd = NULL;
+
+ GF_ASSERT (dict);
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_int32 (dict, "cmd", &cmd);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get status type");
+ goto out;
+ }
+
+ if (cmd & GF_CLI_STATUS_ALL)
+ goto out;
+
+ switch (cmd & GF_CLI_STATUS_MASK) {
+ case GF_CLI_STATUS_MEM:
+ case GF_CLI_STATUS_CLIENTS:
+ case GF_CLI_STATUS_INODE:
+ case GF_CLI_STATUS_FD:
+ case GF_CLI_STATUS_CALLPOOL:
+ case GF_CLI_STATUS_NFS:
+ case GF_CLI_STATUS_SHD:
+ case GF_CLI_STATUS_QUOTAD:
+ case GF_CLI_STATUS_SNAPD:
+ case GF_CLI_STATUS_BITD:
+ case GF_CLI_STATUS_SCRUB:
+ break;
+ default:
+ goto out;
+ }
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get volname");
+ goto out;
+ }
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ goto out;
+ }
+
+ if ( (cmd & GF_CLI_STATUS_BRICK) != 0) {
+ ret = dict_get_str (dict, "brick", &brickname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to get brick");
+ goto out;
+ }
+ ret = glusterd_volume_brickinfo_get_by_brick (brickname,
+ volinfo,
+ &brickinfo,
+ _gf_false);
+ if (ret)
+ goto out;
+
+ if (gf_uuid_compare (brickinfo->uuid, MY_UUID)||
+ !glusterd_is_brick_started (brickinfo))
+ goto out;
+
+ pending_node = GF_CALLOC (1, sizeof (*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ }
+ pending_node->node = brickinfo;
+ pending_node->type = GD_NODE_BRICK;
+ pending_node->index = 0;
+ cds_list_add_tail (&pending_node->list, selected);
+
+ ret = 0;
+ } else if ((cmd & GF_CLI_STATUS_NFS) != 0) {
+ if (!priv->nfs_svc.online) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_NFS_SERVER_NOT_RUNNING,
+ "NFS server is not running");
+ goto out;
+ }
+ pending_node = GF_CALLOC (1, sizeof (*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ }
+ pending_node->node = &(priv->nfs_svc);
+ pending_node->type = GD_NODE_NFS;
+ pending_node->index = 0;
+ cds_list_add_tail (&pending_node->list, selected);
+
+ ret = 0;
+ } else if ((cmd & GF_CLI_STATUS_SHD) != 0) {
+ if (!priv->shd_svc.online) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SELF_HEALD_DISABLED,
+ "Self-heal daemon is not running");
+ goto out;
+ }
+ pending_node = GF_CALLOC (1, sizeof (*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ }
+ pending_node->node = &(priv->shd_svc);
+ pending_node->type = GD_NODE_SHD;
+ pending_node->index = 0;
+ cds_list_add_tail (&pending_node->list, selected);
+
+ ret = 0;
+ } else if ((cmd & GF_CLI_STATUS_QUOTAD) != 0) {
+ if (!priv->quotad_svc.online) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_QUOTAD_NOT_RUNNING, "Quotad is not "
+ "running");
+ ret = -1;
+ goto out;
+ }
+ pending_node = GF_CALLOC (1, sizeof (*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ }
+ pending_node->node = &(priv->quotad_svc);
+ pending_node->type = GD_NODE_QUOTAD;
+ pending_node->index = 0;
+ cds_list_add_tail (&pending_node->list, selected);
+
+ ret = 0;
+ } else if ((cmd & GF_CLI_STATUS_BITD) != 0) {
+ if (!priv->bitd_svc.online) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BITROT_NOT_RUNNING, "Bitrot is not "
+ "running");
+ ret = -1;
+ goto out;
+ }
+ pending_node = GF_CALLOC (1, sizeof (*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ }
+ pending_node->node = &(priv->bitd_svc);
+ pending_node->type = GD_NODE_BITD;
+ pending_node->index = 0;
+ cds_list_add_tail (&pending_node->list, selected);
+
+ ret = 0;
+ } else if ((cmd & GF_CLI_STATUS_SCRUB) != 0) {
+ if (!priv->scrub_svc.online) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SCRUBBER_NOT_RUNNING, "Scrubber is not "
+ "running");
+ ret = -1;
+ goto out;
+ }
+ pending_node = GF_CALLOC (1, sizeof (*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ }
+ pending_node->node = &(priv->scrub_svc);
+ pending_node->type = GD_NODE_SCRUB;
+ pending_node->index = 0;
+ cds_list_add_tail (&pending_node->list, selected);
+
+ ret = 0;
+ } else if ((cmd & GF_CLI_STATUS_SNAPD) != 0) {
+ if (!volinfo->snapd.svc.online) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAPD_NOT_RUNNING, "snapd is not "
+ "running");
+ ret = -1;
+ goto out;
+ }
+ pending_node = GF_CALLOC (1, sizeof (*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY, "failed to allocate "
+ "memory for pending node");
+ ret = -1;
+ goto out;
+ }
+
+ pending_node->node = (void *)(&volinfo->snapd);
+ pending_node->type = GD_NODE_SNAPD;
+ pending_node->index = 0;
+ cds_list_add_tail (&pending_node->list, selected);
+
+ ret = 0;
+ } else {
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks,
+ brick_list) {
+ brick_index++;
+ if (gf_uuid_compare (brickinfo->uuid, MY_UUID) ||
+ !glusterd_is_brick_started (brickinfo)) {
+ continue;
+ }
+ pending_node = GF_CALLOC (1, sizeof (*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ gf_msg (THIS->name, GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY,
+ "Unable to allocate memory");
+ goto out;
+ }
+ pending_node->node = brickinfo;
+ pending_node->type = GD_NODE_BRICK;
+ pending_node->index = brick_index;
+ cds_list_add_tail (&pending_node->list, selected);
+ pending_node = NULL;
+ }
+ }
+out:
+ return ret;
+}
+
+static int
+glusterd_bricks_select_scrub (dict_t *dict, char **op_errstr,
+ struct cds_list_head *selected)
+{
+ int ret = -1;
+ char *volname = NULL;
+ char msg[2048] = {0,};
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_pending_node_t *pending_node = NULL;
+
+ this = THIS;
+ priv = this->private;
+ GF_ASSERT (this);
+ GF_ASSERT (priv);
+
+ GF_ASSERT (dict);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get"
+ " volname");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Volume %s does not exist",
+ volname);
+
+ *op_errstr = gf_strdup (msg);
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_VOL_NOT_FOUND, "%s", msg);
+ goto out;
+ }
+
+ if (!priv->scrub_svc.online) {
+ ret = 0;
+ snprintf (msg, sizeof (msg), "Scrubber daemon is not running");
+
+ gf_msg_debug (this->name, 0, "%s", msg);
+ goto out;
+ }
+
+ pending_node = GF_CALLOC (1, sizeof (*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ }
+
+ pending_node->node = &(priv->scrub_svc);
+ pending_node->type = GD_NODE_SCRUB;
+ cds_list_add_tail (&pending_node->list, selected);
+ pending_node = NULL;
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+/* Select the bricks to send the barrier request to.
+ * This selects the bricks of the given volume which are present on this peer
+ * and are running
+ */
+static int
+glusterd_bricks_select_barrier (dict_t *dict, struct cds_list_head *selected)
+{
+ int ret = -1;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_pending_node_t *pending_node = NULL;
+
+ GF_ASSERT (dict);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to get volname");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_FOUND, "Failed to find volume %s",
+ volname);
+ goto out;
+ }
+
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ if (gf_uuid_compare (brickinfo->uuid, MY_UUID) ||
+ !glusterd_is_brick_started (brickinfo)) {
+ continue;
+ }
+ pending_node = GF_CALLOC (1, sizeof (*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ }
+ pending_node->node = brickinfo;
+ pending_node->type = GD_NODE_BRICK;
+ cds_list_add_tail (&pending_node->list, selected);
+ pending_node = NULL;
+ }
+
+out:
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_op_ac_send_brick_op (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+ rpc_clnt_procedure_t *proc = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ glusterd_op_t op = GD_OP_NONE;
+ glusterd_req_ctx_t *req_ctx = NULL;
+ char *op_errstr = NULL;
+
+ this = THIS;
+ priv = this->private;
+
+ if (ctx) {
+ req_ctx = ctx;
+ } else {
+ req_ctx = GF_CALLOC (1, sizeof (*req_ctx),
+ gf_gld_mt_op_allack_ctx_t);
+ op = glusterd_op_get_op ();
+ req_ctx->op = op;
+ gf_uuid_copy (req_ctx->uuid, MY_UUID);
+ ret = glusterd_op_build_payload (&req_ctx->dict, &op_errstr,
+ NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_OP_PAYLOAD_BUILD_FAIL,
+ LOGSTR_BUILD_PAYLOAD,
+ gd_op_list[op]);
+ if (op_errstr == NULL)
+ gf_asprintf (&op_errstr,
+ OPERRSTR_BUILD_PAYLOAD);
+ opinfo.op_errstr = op_errstr;
+ goto out;
+ }
+ }
+
+ proc = &priv->gfs_mgmt->proctable[GLUSTERD_BRICK_OP];
+ if (proc->fn) {
+ ret = proc->fn (NULL, this, req_ctx);
+ if (ret)
+ goto out;
+ }
+
+ if (!opinfo.pending_count && !opinfo.brick_pending_count) {
+ glusterd_clear_pending_nodes (&opinfo.pending_bricks);
+ ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK,
+ &event->txn_id, req_ctx);
+ }
+
+out:
+ gf_msg_debug (this->name, 0, "Returning with %d", ret);
+
+ return ret;
+}
+
+
+static int
+glusterd_op_ac_rcvd_brick_op_acc (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+ glusterd_op_brick_rsp_ctx_t *ev_ctx = NULL;
+ char *op_errstr = NULL;
+ glusterd_op_t op = GD_OP_NONE;
+ gd_node_type type = GD_NODE_NONE;
+ dict_t *op_ctx = NULL;
+ glusterd_req_ctx_t *req_ctx = NULL;
+ void *pending_entry = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (event);
+ GF_ASSERT (ctx);
+ ev_ctx = ctx;
+
+ req_ctx = ev_ctx->commit_ctx;
+ GF_ASSERT (req_ctx);
+
+ op = req_ctx->op;
+ op_ctx = glusterd_op_get_ctx ();
+ pending_entry = ev_ctx->pending_node->node;
+ type = ev_ctx->pending_node->type;
+
+ ret = glusterd_remove_pending_entry (&opinfo.pending_bricks,
+ pending_entry);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_UNKNOWN_RESPONSE, "unknown response received ");
+ ret = -1;
+ goto out;
+ }
+
+ if (opinfo.brick_pending_count > 0)
+ opinfo.brick_pending_count--;
+
+
+ ret = glusterd_set_txn_opinfo (&event->txn_id, &opinfo);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
+
+
+ glusterd_handle_node_rsp (req_ctx->dict, pending_entry, op, ev_ctx->rsp_dict,
+ op_ctx, &op_errstr, type);
+
+ if (opinfo.brick_pending_count > 0)
+ goto out;
+
+ ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, &event->txn_id,
+ ev_ctx->commit_ctx);
+
+out:
+ if (ev_ctx->rsp_dict)
+ dict_unref (ev_ctx->rsp_dict);
+ GF_FREE (ev_ctx);
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+int32_t
+glusterd_op_bricks_select (glusterd_op_t op, dict_t *dict, char **op_errstr,
+ struct cds_list_head *selected, dict_t *rsp_dict)
+{
+ int ret = 0;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+ GF_ASSERT (op > GD_OP_NONE);
+ GF_ASSERT (op < GD_OP_MAX);
+
+ switch (op) {
+ case GD_OP_STOP_VOLUME:
+ ret = glusterd_bricks_select_stop_volume (dict, op_errstr,
+ selected);
+ break;
+ case GD_OP_REMOVE_BRICK:
+ ret = glusterd_bricks_select_remove_brick (dict, op_errstr,
+ selected);
+ break;
+
+ case GD_OP_PROFILE_VOLUME:
+ ret = glusterd_bricks_select_profile_volume (dict, op_errstr,
+ selected);
+ break;
+
+ case GD_OP_HEAL_VOLUME:
+ ret = glusterd_bricks_select_heal_volume (dict, op_errstr,
+ selected, rsp_dict);
+ break;
+
+ case GD_OP_STATUS_VOLUME:
+ ret = glusterd_bricks_select_status_volume (dict, op_errstr,
+ selected);
+ break;
+
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ ret = glusterd_bricks_select_rebalance_volume (dict, op_errstr,
+ selected);
+ break;
+
+ case GD_OP_BARRIER:
+ ret = glusterd_bricks_select_barrier (dict, selected);
+ break;
+ case GD_OP_SNAP:
+ ret = glusterd_bricks_select_snap (dict, op_errstr, selected);
+ break;
+ case GD_OP_SCRUB_STATUS:
+ ret = glusterd_bricks_select_scrub (dict, op_errstr, selected);
+ break;
+ default:
+ break;
+ }
+
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+glusterd_op_sm_t glusterd_op_state_default [] = {
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_NONE
+ {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_send_lock},//EVENT_START_LOCK
+ {GD_OP_STATE_LOCKED, glusterd_op_ac_lock}, //EVENT_LOCK
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_RCVD_ACC
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_ALL_ACC
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_STAGE_ACC
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_RCVD_RJT
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_STAGE_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_START_UNLOCK
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_ALL_ACK
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_MAX
+};
+
+glusterd_op_sm_t glusterd_op_state_lock_sent [] = {
+ {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_NONE
+ {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none},//EVENT_START_LOCK
+ {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_lock}, //EVENT_LOCK
+ {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_rcvd_lock_acc}, //EVENT_RCVD_ACC
+ {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_send_stage_op}, //EVENT_ALL_ACC
+ {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_STAGE_ACC
+ {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_send_unlock_drain}, //EVENT_RCVD_RJT
+ {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_STAGE_OP
+ {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_START_UNLOCK
+ {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_ALL_ACK
+ {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_MAX
+};
+
+glusterd_op_sm_t glusterd_op_state_locked [] = {
+ {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_NONE
+ {GD_OP_STATE_LOCKED, glusterd_op_ac_none},//EVENT_START_LOCK
+ {GD_OP_STATE_LOCKED, glusterd_op_ac_lock}, //EVENT_LOCK
+ {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_RCVD_ACC
+ {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_ALL_ACC
+ {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_STAGE_ACC
+ {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
+ {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_RCVD_RJT
+ {GD_OP_STATE_STAGED, glusterd_op_ac_stage_op}, //EVENT_STAGE_OP
+ {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
+ {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_START_UNLOCK
+ {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_ALL_ACK
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_local_unlock}, //EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_MAX
+};
+
+glusterd_op_sm_t glusterd_op_state_stage_op_sent [] = {
+ {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, //EVENT_NONE
+ {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none},//EVENT_START_LOCK
+ {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_lock}, //EVENT_LOCK
+ {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_rcvd_stage_op_acc}, //EVENT_RCVD_ACC
+ {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_send_brick_op}, //EVENT_ALL_ACC
+ {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_send_brick_op}, //EVENT_STAGE_ACC
+ {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
+ {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_stage_op_failed}, //EVENT_RCVD_RJT
+ {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, //EVENT_STAGE_OP
+ {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_START_UNLOCK
+ {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, //EVENT_ALL_ACK
+ {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, //EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, //EVENT_MAX
+};
+
+glusterd_op_sm_t glusterd_op_state_stage_op_failed [] = {
+ {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_NONE
+ {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none},//EVENT_START_LOCK
+ {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_lock}, //EVENT_LOCK
+ {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_stage_op_failed}, //EVENT_RCVD_ACC
+ {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_ALL_ACC
+ {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_STAGE_ACC
+ {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
+ {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_stage_op_failed}, //EVENT_RCVD_RJT
+ {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_STAGE_OP
+ {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_START_UNLOCK
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, //EVENT_ALL_ACK
+ {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_MAX
+};
+
+glusterd_op_sm_t glusterd_op_state_staged [] = {
+ {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_NONE
+ {GD_OP_STATE_STAGED, glusterd_op_ac_none},//EVENT_START_LOCK
+ {GD_OP_STATE_STAGED, glusterd_op_ac_lock}, //EVENT_LOCK
+ {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_RCVD_ACC
+ {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_ALL_ACC
+ {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_STAGE_ACC
+ {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
+ {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_RCVD_RJT
+ {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_STAGE_OP
+ {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_send_brick_op}, //EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
+ {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_START_UNLOCK
+ {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_ALL_ACK
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_local_unlock}, //EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_MAX
+};
+
+glusterd_op_sm_t glusterd_op_state_brick_op_sent [] = {
+ {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_NONE
+ {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none},//EVENT_START_LOCK
+ {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_lock}, //EVENT_LOCK
+ {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_rcvd_brick_op_acc}, //EVENT_RCVD_ACC
+ {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_ALL_ACC
+ {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_STAGE_ACC
+ {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
+ {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_brick_op_failed}, //EVENT_RCVD_RJT
+ {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_BRICK_OP
+ {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_START_UNLOCK
+ {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_send_commit_op}, //EVENT_ALL_ACK
+ {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_MAX
+};
+
+glusterd_op_sm_t glusterd_op_state_brick_op_failed [] = {
+ {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_NONE
+ {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none},//EVENT_START_LOCK
+ {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_lock}, //EVENT_LOCK
+ {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_brick_op_failed}, //EVENT_RCVD_ACC
+ {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_ALL_ACC
+ {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_STAGE_ACC
+ {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
+ {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_brick_op_failed}, //EVENT_RCVD_RJT
+ {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_BRICK_OP
+ {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_START_UNLOCK
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, //EVENT_ALL_ACK
+ {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_MAX
+};
+
+glusterd_op_sm_t glusterd_op_state_brick_committed [] = {
+ {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_NONE
+ {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none},//EVENT_START_LOCK
+ {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_lock}, //EVENT_LOCK
+ {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_rcvd_brick_op_acc}, //EVENT_RCVD_ACC
+ {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_ALL_ACC
+ {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_STAGE_ACC
+ {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
+ {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_brick_op_failed}, //EVENT_RCVD_RJT
+ {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_STAGE_OP
+ {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
+ {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_START_UNLOCK
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_commit_op}, //EVENT_ALL_ACK
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_local_unlock}, //EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_MAX
+};
+
+glusterd_op_sm_t glusterd_op_state_brick_commit_failed [] = {
+ {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_NONE
+ {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none},//EVENT_START_LOCK
+ {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_lock}, //EVENT_LOCK
+ {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_brick_op_failed}, //EVENT_RCVD_ACC
+ {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_ALL_ACC
+ {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_STAGE_ACC
+ {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
+ {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_brick_op_failed}, //EVENT_RCVD_RJT
+ {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_STAGE_OP
+ {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
+ {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_START_UNLOCK
+ {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_send_commit_failed}, //EVENT_ALL_ACK
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_local_unlock}, //EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_MAX
+};
+
+glusterd_op_sm_t glusterd_op_state_commit_op_failed [] = {
+ {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_NONE
+ {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none},//EVENT_START_LOCK
+ {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_lock}, //EVENT_LOCK
+ {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_commit_op_failed}, //EVENT_RCVD_ACC
+ {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_ALL_ACC
+ {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_STAGE_ACC
+ {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
+ {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_commit_op_failed}, //EVENT_RCVD_RJT
+ {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_STAGE_OP
+ {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_START_UNLOCK
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, //EVENT_ALL_ACK
+ {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_MAX
+};
+
+glusterd_op_sm_t glusterd_op_state_commit_op_sent [] = {
+ {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, //EVENT_NONE
+ {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none},//EVENT_START_LOCK
+ {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_lock}, //EVENT_LOCK
+ {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_rcvd_commit_op_acc}, //EVENT_RCVD_ACC
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, //EVENT_ALL_ACC
+ {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, //EVENT_STAGE_ACC
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, //EVENT_COMMIT_ACC
+ {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_commit_op_failed}, //EVENT_RCVD_RJT
+ {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, //EVENT_STAGE_OP
+ {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_START_UNLOCK
+ {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, //EVENT_ALL_ACK
+ {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, //EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, //EVENT_MAX
+};
+
+glusterd_op_sm_t glusterd_op_state_committed [] = {
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_NONE
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_none},//EVENT_START_LOCK
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_lock}, //EVENT_LOCK
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_RCVD_ACC
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_ALL_ACC
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_STAGE_ACC
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_RCVD_RJT
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_STAGE_OP
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_START_UNLOCK
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_ALL_ACK
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_local_unlock}, //EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_MAX
+};
+
+glusterd_op_sm_t glusterd_op_state_unlock_sent [] = {
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_NONE
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none},//EVENT_START_LOCK
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_lock}, //EVENT_LOCK
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_rcvd_unlock_acc}, //EVENT_RCVD_ACC
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlocked_all}, //EVENT_ALL_ACC
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_STAGE_ACC
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_rcvd_unlock_acc}, //EVENT_RCVD_RJT
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_STAGE_OP
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_START_UNLOCK
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_ALL_ACK
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_MAX
+};
+
+glusterd_op_sm_t glusterd_op_state_ack_drain [] = {
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_NONE
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none},//EVENT_START_LOCK
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_lock}, //EVENT_LOCK
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_send_unlock_drain}, //EVENT_RCVD_ACC
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_ALL_ACC
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_STAGE_ACC
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_send_unlock_drain}, //EVENT_RCVD_RJT
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_STAGE_OP
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_START_UNLOCK
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, //EVENT_ALL_ACK
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_MAX
+};
+
+glusterd_op_sm_t *glusterd_op_state_table [] = {
+ glusterd_op_state_default,
+ glusterd_op_state_lock_sent,
+ glusterd_op_state_locked,
+ glusterd_op_state_stage_op_sent,
+ glusterd_op_state_staged,
+ glusterd_op_state_commit_op_sent,
+ glusterd_op_state_committed,
+ glusterd_op_state_unlock_sent,
+ glusterd_op_state_stage_op_failed,
+ glusterd_op_state_commit_op_failed,
+ glusterd_op_state_brick_op_sent,
+ glusterd_op_state_brick_op_failed,
+ glusterd_op_state_brick_committed,
+ glusterd_op_state_brick_commit_failed,
+ glusterd_op_state_ack_drain
+};
+
+int
+glusterd_op_sm_new_event (glusterd_op_sm_event_type_t event_type,
+ glusterd_op_sm_event_t **new_event)
+{
+ glusterd_op_sm_event_t *event = NULL;
+
+ GF_ASSERT (new_event);
+ GF_ASSERT (GD_OP_EVENT_NONE <= event_type &&
+ GD_OP_EVENT_MAX > event_type);
+
+ event = GF_CALLOC (1, sizeof (*event), gf_gld_mt_op_sm_event_t);
+
+ if (!event)
+ return -1;
+
+ *new_event = event;
+ event->event = event_type;
+ CDS_INIT_LIST_HEAD (&event->list);
+
+ return 0;
+}
+
+int
+glusterd_op_sm_inject_event (glusterd_op_sm_event_type_t event_type,
+ uuid_t *txn_id, void *ctx)
+{
+ int32_t ret = -1;
+ glusterd_op_sm_event_t *event = NULL;
+
+ GF_ASSERT (event_type < GD_OP_EVENT_MAX &&
+ event_type >= GD_OP_EVENT_NONE);
+
+ ret = glusterd_op_sm_new_event (event_type, &event);
+
+ if (ret)
+ goto out;
+
+ event->ctx = ctx;
+
+ if (txn_id)
+ gf_uuid_copy (event->txn_id, *txn_id);
+
+ gf_msg_debug (THIS->name, 0, "Enqueue event: '%s'",
+ glusterd_op_sm_event_name_get (event->event));
+ cds_list_add_tail (&event->list, &gd_op_sm_queue);
+
+out:
+ return ret;
+}
+
+void
+glusterd_destroy_req_ctx (glusterd_req_ctx_t *ctx)
+{
+ if (!ctx)
+ return;
+ if (ctx->dict)
+ dict_unref (ctx->dict);
+ GF_FREE (ctx);
+}
+
+void
+glusterd_destroy_local_unlock_ctx (uuid_t *ctx)
+{
+ if (!ctx)
+ return;
+ GF_FREE (ctx);
+}
+
+void
+glusterd_destroy_op_event_ctx (glusterd_op_sm_event_t *event)
+{
+ if (!event)
+ return;
+
+ switch (event->event) {
+ case GD_OP_EVENT_LOCK:
+ case GD_OP_EVENT_UNLOCK:
+ glusterd_destroy_lock_ctx (event->ctx);
+ break;
+ case GD_OP_EVENT_STAGE_OP:
+ case GD_OP_EVENT_ALL_ACK:
+ glusterd_destroy_req_ctx (event->ctx);
+ break;
+ case GD_OP_EVENT_LOCAL_UNLOCK_NO_RESP:
+ glusterd_destroy_local_unlock_ctx (event->ctx);
+ break;
+ default:
+ break;
+ }
+}
+
+int
+glusterd_op_sm ()
+{
+ glusterd_op_sm_event_t *event = NULL;
+ glusterd_op_sm_event_t *tmp = NULL;
+ int ret = -1;
+ int lock_err = 0;
+ glusterd_op_sm_ac_fn handler = NULL;
+ glusterd_op_sm_t *state = NULL;
+ glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE;
+ xlator_t *this = NULL;
+ glusterd_op_info_t txn_op_info;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = synclock_trylock (&gd_op_sm_lock);
+ if (ret) {
+ lock_err = errno;
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_LOCK_FAIL, "lock failed due to %s",
+ strerror (lock_err));
+ goto lock_failed;
+ }
+
+ while (!cds_list_empty (&gd_op_sm_queue)) {
+
+ cds_list_for_each_entry_safe (event, tmp, &gd_op_sm_queue,
+ list) {
+
+ cds_list_del_init (&event->list);
+ event_type = event->event;
+ gf_msg_debug (this->name, 0, "Dequeued event of "
+ "type: '%s'",
+ glusterd_op_sm_event_name_get(event_type));
+
+ gf_msg_debug (this->name, 0, "transaction ID = %s",
+ uuid_utoa (event->txn_id));
+
+ ret = glusterd_get_txn_opinfo (&event->txn_id,
+ &txn_op_info);
+ if (ret) {
+ gf_msg_callingfn (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_OPINFO_GET_FAIL,
+ "Unable to get transaction "
+ "opinfo for transaction ID :"
+ "%s",
+ uuid_utoa (event->txn_id));
+ glusterd_destroy_op_event_ctx (event);
+ GF_FREE (event);
+ continue;
+ } else
+ opinfo = txn_op_info;
+
+ state = glusterd_op_state_table[opinfo.state.state];
+
+ GF_ASSERT (state);
+
+ handler = state[event_type].handler;
+ GF_ASSERT (handler);
+
+ ret = handler (event, event->ctx);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_HANDLER_RETURNED,
+ "handler returned: %d", ret);
+ glusterd_destroy_op_event_ctx (event);
+ GF_FREE (event);
+ continue;
+ }
+
+ ret = glusterd_op_sm_transition_state (&opinfo, state,
+ event_type);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_EVENT_STATE_TRANSITION_FAIL,
+ "Unable to transition"
+ "state from '%s' to '%s'",
+ glusterd_op_sm_state_name_get(opinfo.state.state),
+ glusterd_op_sm_state_name_get(state[event_type].next_state));
+ (void) synclock_unlock (&gd_op_sm_lock);
+ return ret;
+ }
+
+ if ((state[event_type].next_state ==
+ GD_OP_STATE_DEFAULT) &&
+ (event_type == GD_OP_EVENT_UNLOCK)) {
+ /* Clearing the transaction opinfo */
+ ret = glusterd_clear_txn_opinfo(&event->txn_id);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_OPINFO_CLEAR_FAIL,
+ "Unable to clear "
+ "transaction's opinfo");
+ } else {
+ ret = glusterd_set_txn_opinfo (&event->txn_id,
+ &opinfo);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
+ }
+
+ glusterd_destroy_op_event_ctx (event);
+ GF_FREE (event);
+
+ }
+ }
+
+
+ (void) synclock_unlock (&gd_op_sm_lock);
+ ret = 0;
+
+lock_failed:
+
+ return ret;
+}
+
+int32_t
+glusterd_op_set_op (glusterd_op_t op)
+{
+
+ GF_ASSERT (op < GD_OP_MAX);
+ GF_ASSERT (op > GD_OP_NONE);
+
+ opinfo.op = op;
+
+ return 0;
+
+}
+
+int32_t
+glusterd_op_get_op ()
+{
+
+ return opinfo.op;
+
+}
+
+int32_t
+glusterd_op_set_req (rpcsvc_request_t *req)
+{
+
+ GF_ASSERT (req);
+ opinfo.req = req;
+ return 0;
+}
+
+int32_t
+glusterd_op_clear_op (glusterd_op_t op)
+{
+
+ opinfo.op = GD_OP_NONE;
+
+ return 0;
+
+}
+
+int32_t
+glusterd_op_free_ctx (glusterd_op_t op, void *ctx)
+{
+
+ if (ctx) {
+ switch (op) {
+ case GD_OP_CREATE_VOLUME:
+ case GD_OP_DELETE_VOLUME:
+ case GD_OP_STOP_VOLUME:
+ case GD_OP_ADD_BRICK:
+ case GD_OP_REMOVE_BRICK:
+ case GD_OP_REPLACE_BRICK:
+ case GD_OP_LOG_ROTATE:
+ case GD_OP_SYNC_VOLUME:
+ case GD_OP_SET_VOLUME:
+ case GD_OP_START_VOLUME:
+ case GD_OP_RESET_VOLUME:
+ case GD_OP_GSYNC_SET:
+ case GD_OP_QUOTA:
+ case GD_OP_PROFILE_VOLUME:
+ case GD_OP_STATUS_VOLUME:
+ case GD_OP_REBALANCE:
+ case GD_OP_HEAL_VOLUME:
+ case GD_OP_STATEDUMP_VOLUME:
+ case GD_OP_CLEARLOCKS_VOLUME:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ dict_unref (ctx);
+ break;
+ default:
+ GF_ASSERT (0);
+ break;
+ }
+ }
+
+ glusterd_op_reset_ctx ();
+ return 0;
+
+}
+
+void *
+glusterd_op_get_ctx ()
+{
+
+ return opinfo.op_ctx;
+
+}
+
+int
+glusterd_op_sm_init ()
+{
+ CDS_INIT_LIST_HEAD (&gd_op_sm_queue);
+ synclock_init (&gd_op_sm_lock, SYNC_LOCK_DEFAULT);
+ return 0;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.h b/xlators/mgmt/glusterd/src/glusterd-op-sm.h
new file mode 100644
index 00000000000..19b1bd97e04
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.h
@@ -0,0 +1,302 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _GLUSTERD_OP_SM_H_
+#define _GLUSTERD_OP_SM_H_
+
+
+#include <pthread.h>
+#include "compat-uuid.h"
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "logging.h"
+#include "call-stub.h"
+#include "fd.h"
+#include "byte-order.h"
+#include "glusterd.h"
+#include "protocol-common.h"
+#include "glusterd-hooks.h"
+
+#define GD_OP_PROTECTED (0x02)
+#define GD_OP_UNPROTECTED (0x04)
+
+typedef enum glusterd_op_sm_state_ {
+ GD_OP_STATE_DEFAULT = 0,
+ GD_OP_STATE_LOCK_SENT,
+ GD_OP_STATE_LOCKED,
+ GD_OP_STATE_STAGE_OP_SENT,
+ GD_OP_STATE_STAGED,
+ GD_OP_STATE_COMMIT_OP_SENT,
+ GD_OP_STATE_COMMITED,
+ GD_OP_STATE_UNLOCK_SENT,
+ GD_OP_STATE_STAGE_OP_FAILED,
+ GD_OP_STATE_COMMIT_OP_FAILED,
+ GD_OP_STATE_BRICK_OP_SENT,
+ GD_OP_STATE_BRICK_OP_FAILED,
+ GD_OP_STATE_BRICK_COMMITTED,
+ GD_OP_STATE_BRICK_COMMIT_FAILED,
+ GD_OP_STATE_ACK_DRAIN,
+ GD_OP_STATE_MAX,
+} glusterd_op_sm_state_t;
+
+typedef enum glusterd_op_sm_event_type_ {
+ GD_OP_EVENT_NONE = 0,
+ GD_OP_EVENT_START_LOCK,
+ GD_OP_EVENT_LOCK,
+ GD_OP_EVENT_RCVD_ACC,
+ GD_OP_EVENT_ALL_ACC,
+ GD_OP_EVENT_STAGE_ACC,
+ GD_OP_EVENT_COMMIT_ACC,
+ GD_OP_EVENT_RCVD_RJT,
+ GD_OP_EVENT_STAGE_OP,
+ GD_OP_EVENT_COMMIT_OP,
+ GD_OP_EVENT_UNLOCK,
+ GD_OP_EVENT_START_UNLOCK,
+ GD_OP_EVENT_ALL_ACK,
+ GD_OP_EVENT_LOCAL_UNLOCK_NO_RESP,
+ GD_OP_EVENT_MAX
+} glusterd_op_sm_event_type_t;
+
+
+struct glusterd_op_sm_event_ {
+ struct cds_list_head list;
+ void *ctx;
+ glusterd_op_sm_event_type_t event;
+ uuid_t txn_id;
+};
+
+typedef struct glusterd_op_sm_event_ glusterd_op_sm_event_t;
+
+typedef int (*glusterd_op_sm_ac_fn) (glusterd_op_sm_event_t *, void *);
+
+typedef struct glusterd_op_sm_ {
+ glusterd_op_sm_state_t next_state;
+ glusterd_op_sm_ac_fn handler;
+} glusterd_op_sm_t;
+
+typedef struct glusterd_op_sm_state_info_ {
+ glusterd_op_sm_state_t state;
+ struct timeval time;
+} glusterd_op_sm_state_info_t;
+
+struct glusterd_op_info_ {
+ glusterd_op_sm_state_info_t state;
+ int32_t pending_count;
+ int32_t brick_pending_count;
+ int32_t op_count;
+ /* op is an enum, glusterd_op_t or glusterd_op_sm_state_info_t */
+ int op;
+ struct cds_list_head op_peers;
+ void *op_ctx;
+ rpcsvc_request_t *req;
+ int32_t op_ret;
+ int32_t op_errno;
+ char *op_errstr;
+ struct cds_list_head pending_bricks;
+ uint32_t txn_generation;
+};
+
+typedef struct glusterd_op_info_ glusterd_op_info_t;
+
+struct glusterd_op_log_filename_ctx_ {
+ char volume_name[GD_VOLUME_NAME_MAX];
+ char brick[GD_VOLUME_NAME_MAX];
+ char path[PATH_MAX];
+};
+typedef struct glusterd_op_log_filename_ctx_ glusterd_op_log_filename_ctx_t;
+
+struct glusterd_op_lock_ctx_ {
+ uuid_t uuid;
+ dict_t *dict;
+ rpcsvc_request_t *req;
+};
+
+typedef struct glusterd_op_lock_ctx_ glusterd_op_lock_ctx_t;
+
+struct glusterd_req_ctx_ {
+ rpcsvc_request_t *req;
+ u_char uuid[16];
+ int op;
+ dict_t *dict;
+};
+
+typedef struct glusterd_req_ctx_ glusterd_req_ctx_t;
+
+typedef struct glusterd_op_brick_rsp_ctx_ {
+ int op_ret;
+ char *op_errstr;
+ dict_t *rsp_dict;
+ glusterd_req_ctx_t *commit_ctx;
+ glusterd_pending_node_t *pending_node;
+} glusterd_op_brick_rsp_ctx_t;
+
+typedef struct glusterd_pr_brick_rsp_conv_t {
+ int count;
+ dict_t *dict;
+} glusterd_pr_brick_rsp_conv_t;
+
+typedef struct glusterd_heal_rsp_conv_ {
+ dict_t *dict;
+ glusterd_volinfo_t *volinfo;
+ xlator_t *this;
+} glusterd_heal_rsp_conv_t;
+
+typedef struct glusterd_status_rsp_conv_ {
+ int count;
+ int brick_index_max;
+ int other_count;
+ dict_t *dict;
+} glusterd_status_rsp_conv_t;
+
+
+typedef struct glusterd_txn_opinfo_object_ {
+ glusterd_op_info_t opinfo;
+} glusterd_txn_opinfo_obj;
+
+typedef enum cli_cmd_type_ {
+ PER_HEAL_XL,
+ ALL_HEAL_XL,
+ } cli_cmd_type;
+
+typedef struct glusterd_all_volume_options {
+ char *option;
+} glusterd_all_vol_opts;
+
+int
+glusterd_op_commit_hook (glusterd_op_t op, dict_t *op_ctx,
+ glusterd_commit_hook_type_t type);
+
+int
+glusterd_op_sm_new_event (glusterd_op_sm_event_type_t event_type,
+ glusterd_op_sm_event_t **new_event);
+int
+glusterd_op_sm_inject_event (glusterd_op_sm_event_type_t event_type,
+ uuid_t *txn_id, void *ctx);
+
+int
+glusterd_op_sm_init ();
+
+int
+glusterd_op_sm ();
+
+int32_t
+glusterd_op_set_ctx (void *ctx);
+
+int32_t
+glusterd_op_set_op (glusterd_op_t op);
+
+int
+glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx);
+
+int32_t
+glusterd_op_stage_validate (glusterd_op_t op, dict_t *req, char **op_errstr,
+ dict_t *rsp_dict);
+
+int32_t
+glusterd_op_commit_perform (glusterd_op_t op, dict_t *req, char **op_errstr,
+ dict_t* dict);
+
+int32_t
+glusterd_op_txn_begin (rpcsvc_request_t *req, glusterd_op_t op, void *ctx,
+ char *err_str, size_t err_len);
+
+int32_t
+glusterd_op_txn_complete ();
+
+void *
+glusterd_op_get_ctx ();
+
+int32_t
+glusterd_op_set_req (rpcsvc_request_t *req);
+
+int32_t
+glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret,
+ int32_t op_errno, rpcsvc_request_t *req,
+ void *ctx, char *op_errstr);
+int32_t
+glusterd_op_get_op ();
+
+int32_t
+glusterd_op_clear_op ();
+
+int32_t
+glusterd_op_free_ctx (glusterd_op_t op, void *ctx);
+
+int
+glusterd_check_option_exists(char *optstring, char **completion);
+
+int
+set_xlator_option (dict_t *dict, char *key, char *value);
+
+char*
+glusterd_op_sm_state_name_get (int state);
+
+char*
+glusterd_op_sm_event_name_get (int event);
+int32_t
+glusterd_op_bricks_select (glusterd_op_t op, dict_t *dict, char **op_errstr,
+ struct cds_list_head *selected, dict_t *rsp_dict);
+int
+glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickinfo,
+ gd1_mgmt_brick_op_req **req, dict_t *dict);
+int
+glusterd_node_op_build_payload (glusterd_op_t op, gd1_mgmt_brick_op_req **req,
+ dict_t *dict);
+int32_t
+glusterd_handle_brick_rsp (void *pending_entry, glusterd_op_t op,
+ dict_t *rsp_dict, dict_t *ctx_dict, char **op_errstr,
+ gd_node_type type);
+
+dict_t*
+glusterd_op_init_commit_rsp_dict (glusterd_op_t op);
+
+void
+glusterd_op_modify_op_ctx (glusterd_op_t op, void *op_ctx);
+
+int32_t
+glusterd_volume_stats_read_perf (char *brick_path, int32_t blk_size,
+ int32_t blk_count, double *throughput, double *time);
+int32_t
+glusterd_volume_stats_write_perf (char *brick_path, int32_t blk_size,
+ int32_t blk_count, double *throughput, double *time);
+gf_boolean_t
+glusterd_is_volume_started (glusterd_volinfo_t *volinfo);
+int
+glusterd_start_bricks (glusterd_volinfo_t *volinfo);
+gf_boolean_t
+glusterd_are_all_volumes_stopped ();
+int
+glusterd_stop_bricks (glusterd_volinfo_t *volinfo);
+int
+glusterd_defrag_volume_node_rsp (dict_t *req_dict, dict_t *rsp_dict,
+ dict_t *op_ctx);
+#ifdef HAVE_BD_XLATOR
+int
+glusterd_is_valid_vg (glusterd_brickinfo_t *brick, int check_tag, char *msg);
+#endif
+
+int32_t
+glusterd_get_txn_opinfo (uuid_t *txn_id, glusterd_op_info_t *opinfo);
+
+int32_t
+glusterd_set_txn_opinfo (uuid_t *txn_id, glusterd_op_info_t *opinfo);
+
+int32_t
+glusterd_clear_txn_opinfo (uuid_t *txn_id);
+
+int32_t
+glusterd_generate_txn_id (dict_t *dict, uuid_t **txn_id);
+
+void
+glusterd_set_opinfo (char *errstr, int32_t op_errno, int32_t op_ret);
+
+int
+glusterd_dict_set_volid (dict_t *dict, char *volname, char **op_errstr);
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-peer-utils.c b/xlators/mgmt/glusterd/src/glusterd-peer-utils.c
new file mode 100644
index 00000000000..1a97111d0f5
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-peer-utils.c
@@ -0,0 +1,1058 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "glusterd-peer-utils.h"
+#include "glusterd-store.h"
+#include "glusterd-server-quorum.h"
+#include "glusterd-messages.h"
+#include "common-utils.h"
+
+void
+glusterd_peerinfo_destroy (struct rcu_head *head)
+{
+ int32_t ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_peer_hostname_t *hostname = NULL;
+ glusterd_peer_hostname_t *tmp = NULL;
+
+ /* This works as rcu_head is the first member of gd_rcu_head */
+ peerinfo = caa_container_of ((gd_rcu_head *)head, glusterd_peerinfo_t,
+ rcu_head);
+
+ /* Set THIS to the saved this. Needed by some functions below */
+ THIS = peerinfo->rcu_head.this;
+
+ CDS_INIT_LIST_HEAD (&peerinfo->uuid_list);
+
+ ret = glusterd_store_delete_peerinfo (peerinfo);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_PEERINFO_DELETE_FAIL,
+ "Deleting peer info failed");
+ }
+
+ GF_FREE (peerinfo->hostname);
+ peerinfo->hostname = NULL;
+
+ cds_list_for_each_entry_safe (hostname, tmp, &peerinfo->hostnames,
+ hostname_list) {
+ glusterd_peer_hostname_free (hostname);
+ }
+
+ glusterd_sm_tr_log_delete (&peerinfo->sm_log);
+ pthread_mutex_destroy (&peerinfo->delete_lock);
+ GF_FREE (peerinfo);
+
+ peerinfo = NULL;
+
+ return;
+}
+
+int32_t
+glusterd_peerinfo_cleanup (glusterd_peerinfo_t *peerinfo)
+{
+ GF_ASSERT (peerinfo);
+ glusterd_peerctx_t *peerctx = NULL;
+ gf_boolean_t quorum_action = _gf_false;
+ glusterd_conf_t *priv = THIS->private;
+
+ if (pthread_mutex_trylock (&peerinfo->delete_lock)) {
+ /* Someone else is already deleting the peer, so give up */
+ return 0;
+ }
+
+ if (peerinfo->quorum_contrib != QUORUM_NONE)
+ quorum_action = _gf_true;
+ if (peerinfo->rpc) {
+ peerinfo->rpc = glusterd_rpc_clnt_unref (priv, peerinfo->rpc);
+ peerinfo->rpc = NULL;
+ }
+
+ cds_list_del_rcu (&peerinfo->uuid_list);
+ /* Saving THIS, as it is needed by the callback function */
+ peerinfo->rcu_head.this = THIS;
+ call_rcu (&peerinfo->rcu_head.head, glusterd_peerinfo_destroy);
+
+ if (quorum_action)
+ glusterd_do_quorum_action ();
+ return 0;
+}
+
+/* glusterd_peerinfo_find_by_hostname searches for a peer which matches the
+ * hostname @hoststr and if found returns the pointer to peerinfo object.
+ * Returns NULL otherwise.
+ *
+ * It first attempts a quick search by string matching @hoststr. If that fails,
+ * it'll attempt a more thorough match by resolving the addresses and matching
+ * the resolved addrinfos.
+ */
+glusterd_peerinfo_t *
+glusterd_peerinfo_find_by_hostname (const char *hoststr)
+{
+ int ret = -1;
+ struct addrinfo *addr = NULL;
+ struct addrinfo *p = NULL;
+ xlator_t *this = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+
+
+ this = THIS;
+ GF_ASSERT (hoststr);
+
+ peerinfo = NULL;
+
+ peerinfo = gd_peerinfo_find_from_hostname (hoststr);
+ if (peerinfo)
+ return peerinfo;
+
+ ret = getaddrinfo (hoststr, NULL, NULL, &addr);
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, ret,
+ GD_MSG_GETADDRINFO_FAIL,
+ "error in getaddrinfo: %s\n",
+ gai_strerror(ret));
+ goto out;
+ }
+
+ for (p = addr; p != NULL; p = p->ai_next) {
+ peerinfo = gd_peerinfo_find_from_addrinfo (p);
+ if (peerinfo) {
+ freeaddrinfo (addr);
+ return peerinfo;
+ }
+ }
+
+out:
+ gf_msg_debug (this->name, 0, "Unable to find friend: %s", hoststr);
+ if (addr)
+ freeaddrinfo (addr);
+ return NULL;
+}
+
+int
+glusterd_hostname_to_uuid (char *hostname, uuid_t uuid)
+{
+ GF_ASSERT (hostname);
+ GF_ASSERT (uuid);
+
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ peerinfo = glusterd_peerinfo_find_by_hostname (hostname);
+ if (peerinfo) {
+ ret = 0;
+ gf_uuid_copy (uuid, peerinfo->uuid);
+ } else {
+ if (gf_is_local_addr (hostname)) {
+ gf_uuid_copy (uuid, MY_UUID);
+ ret = 0;
+ } else {
+ ret = -1;
+ }
+ }
+
+ gf_msg_debug (this->name, 0, "returning %d", ret);
+ return ret;
+}
+
+/* glusterd_peerinfo_find_by_uuid searches for a peer which matches the
+ * uuid @uuid and if found returns the pointer to peerinfo object.
+ * Returns NULL otherwise.
+ */
+glusterd_peerinfo_t *
+glusterd_peerinfo_find_by_uuid (uuid_t uuid)
+{
+ glusterd_conf_t *priv = NULL;
+ glusterd_peerinfo_t *entry = NULL;
+ glusterd_peerinfo_t *found = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+
+ GF_ASSERT (priv);
+
+ if (gf_uuid_is_null (uuid))
+ return NULL;
+
+ rcu_read_lock ();
+ cds_list_for_each_entry_rcu (entry, &priv->peers, uuid_list) {
+ if (!gf_uuid_compare (entry->uuid, uuid)) {
+
+ gf_msg_debug (this->name, 0,
+ "Friend found... state: %s",
+ glusterd_friend_sm_state_name_get (entry->state.state));
+ found = entry; /* Probably should be rcu_dereferenced */
+ break;
+ }
+ }
+ rcu_read_unlock ();
+
+ if (!found)
+ gf_msg_debug (this->name, 0,
+ "Friend with uuid: %s, not found", uuid_utoa (uuid));
+ return found;
+}
+
+/* glusterd_peerinfo_find will search for a peer matching either @uuid or
+ * @hostname and return a pointer to the peerinfo object
+ * Returns NULL otherwise.
+ */
+glusterd_peerinfo_t *
+glusterd_peerinfo_find (uuid_t uuid, const char *hostname)
+{
+ glusterd_peerinfo_t *peerinfo = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+
+ if (uuid) {
+ peerinfo = glusterd_peerinfo_find_by_uuid (uuid);
+
+ if (peerinfo) {
+ return peerinfo;
+ } else {
+ gf_msg_debug (this->name, 0,
+ "Unable to find peer by uuid: %s",
+ uuid_utoa (uuid));
+ }
+
+ }
+
+ if (hostname) {
+ peerinfo = glusterd_peerinfo_find_by_hostname (hostname);
+
+ if (peerinfo) {
+ return peerinfo;
+ } else {
+ gf_msg_debug (this->name, 0,
+ "Unable to find hostname: %s", hostname);
+ }
+ }
+ return NULL;
+}
+
+/* glusterd_peerinfo_new will create a new peerinfo object and set it's members
+ * values using the passed parameters.
+ * @hostname is added as the first entry in peerinfo->hostnames list and also
+ * set to peerinfo->hostname.
+ * It returns a pointer to peerinfo object if successful and returns NULL
+ * otherwise. The caller should take care of freeing the created peerinfo
+ * object.
+ */
+glusterd_peerinfo_t *
+glusterd_peerinfo_new (glusterd_friend_sm_state_t state, uuid_t *uuid,
+ const char *hostname, int port)
+{
+ glusterd_peerinfo_t *new_peer = NULL;
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ new_peer = GF_CALLOC (1, sizeof (*new_peer), gf_gld_mt_peerinfo_t);
+ if (!new_peer)
+ goto out;
+
+ CDS_INIT_LIST_HEAD (&new_peer->uuid_list);
+
+ new_peer->state.state = state;
+
+ CDS_INIT_LIST_HEAD (&new_peer->hostnames);
+ if (hostname) {
+ ret = gd_add_address_to_peer (new_peer, hostname);
+ if (ret)
+ goto out;
+ /* Also set it to peerinfo->hostname. Doing this as we use
+ * peerinfo->hostname in a lot of places and is really hard to
+ * get everything right
+ */
+ new_peer->hostname = gf_strdup (hostname);
+ }
+
+ if (uuid) {
+ gf_uuid_copy (new_peer->uuid, *uuid);
+ }
+
+ ret = glusterd_sm_tr_log_init (&new_peer->sm_log,
+ glusterd_friend_sm_state_name_get,
+ glusterd_friend_sm_event_name_get,
+ GLUSTERD_TR_LOG_SIZE);
+ if (ret)
+ goto out;
+
+ if (new_peer->state.state == GD_FRIEND_STATE_BEFRIENDED)
+ new_peer->quorum_contrib = QUORUM_WAITING;
+ new_peer->port = port;
+
+ pthread_mutex_init (&new_peer->delete_lock, NULL);
+
+ new_peer->generation = uatomic_add_return (&conf->generation, 1);
+out:
+ if (ret && new_peer) {
+ glusterd_peerinfo_cleanup (new_peer);
+ new_peer = NULL;
+ }
+ return new_peer;
+}
+
+/* Check if the all peers are connected and befriended, except the peer
+ * specified (the peer being detached)
+ */
+gf_boolean_t
+glusterd_chk_peers_connected_befriended (uuid_t skip_uuid)
+{
+ gf_boolean_t ret = _gf_true;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ priv= THIS->private;
+ GF_ASSERT (priv);
+
+ rcu_read_lock ();
+ cds_list_for_each_entry_rcu (peerinfo, &priv->peers, uuid_list) {
+
+ if (!gf_uuid_is_null (skip_uuid) && !gf_uuid_compare (skip_uuid,
+ peerinfo->uuid))
+ continue;
+
+ if ((GD_FRIEND_STATE_BEFRIENDED != peerinfo->state.state)
+ || !(peerinfo->connected)) {
+ ret = _gf_false;
+ break;
+ }
+ }
+ rcu_read_unlock ();
+
+ gf_msg_debug (THIS->name, 0, "Returning %s",
+ (ret?"TRUE":"FALSE"));
+ return ret;
+}
+
+/* Return hostname for given uuid if it exists
+ * else return NULL
+ */
+char *
+glusterd_uuid_to_hostname (uuid_t uuid)
+{
+ char *hostname = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_peerinfo_t *entry = NULL;
+
+ priv = THIS->private;
+ GF_ASSERT (priv);
+
+ if (!gf_uuid_compare (MY_UUID, uuid)) {
+ hostname = gf_strdup ("localhost");
+ }
+ rcu_read_lock ();
+ if (!cds_list_empty (&priv->peers)) {
+ cds_list_for_each_entry_rcu (entry, &priv->peers, uuid_list) {
+ if (!gf_uuid_compare (entry->uuid, uuid)) {
+ hostname = gf_strdup (entry->hostname);
+ break;
+ }
+ }
+ }
+ rcu_read_unlock ();
+
+ return hostname;
+}
+
+char*
+gd_peer_uuid_str (glusterd_peerinfo_t *peerinfo)
+{
+ if ((peerinfo == NULL) || gf_uuid_is_null (peerinfo->uuid))
+ return NULL;
+
+ if (peerinfo->uuid_str[0] == '\0')
+ uuid_utoa_r (peerinfo->uuid, peerinfo->uuid_str);
+
+ return peerinfo->uuid_str;
+}
+
+gf_boolean_t
+glusterd_are_all_peers_up ()
+{
+ glusterd_peerinfo_t *peerinfo = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ gf_boolean_t peers_up = _gf_false;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("glusterd", this, out);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
+
+ rcu_read_lock ();
+ cds_list_for_each_entry_rcu (peerinfo, &conf->peers, uuid_list) {
+ if (!peerinfo->connected) {
+ rcu_read_unlock ();
+ goto out;
+ }
+ }
+ rcu_read_unlock ();
+
+ peers_up = _gf_true;
+
+out:
+ return peers_up;
+}
+
+gf_boolean_t
+glusterd_are_vol_all_peers_up (glusterd_volinfo_t *volinfo,
+ struct cds_list_head *peers,
+ char **down_peerstr)
+{
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ gf_boolean_t ret = _gf_false;
+
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ if (!gf_uuid_compare (brickinfo->uuid, MY_UUID))
+ continue;
+
+ rcu_read_lock ();
+ cds_list_for_each_entry_rcu (peerinfo, peers, uuid_list) {
+ if (gf_uuid_compare (peerinfo->uuid, brickinfo->uuid))
+ continue;
+
+ /*Found peer who owns the brick, return false
+ * if peer is not connected or not friend */
+ if (!(peerinfo->connected) ||
+ (peerinfo->state.state !=
+ GD_FRIEND_STATE_BEFRIENDED)) {
+ *down_peerstr = gf_strdup (peerinfo->hostname);
+ gf_msg_debug (THIS->name, 0, "Peer %s is down. ",
+ peerinfo->hostname);
+ rcu_read_unlock ();
+ goto out;
+ }
+ }
+ rcu_read_unlock ();
+ }
+
+ ret = _gf_true;
+out:
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_peer_hostname_new (const char *hostname,
+ glusterd_peer_hostname_t **name)
+{
+ glusterd_peer_hostname_t *peer_hostname = NULL;
+ int32_t ret = -1;
+
+ GF_ASSERT (hostname);
+ GF_ASSERT (name);
+
+ peer_hostname = GF_CALLOC (1, sizeof (*peer_hostname),
+ gf_gld_mt_peer_hostname_t);
+
+ if (!peer_hostname)
+ goto out;
+
+ peer_hostname->hostname = gf_strdup (hostname);
+ CDS_INIT_LIST_HEAD (&peer_hostname->hostname_list);
+
+ *name = peer_hostname;
+ ret = 0;
+
+out:
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+void
+glusterd_peer_hostname_free (glusterd_peer_hostname_t *name)
+{
+ if (!name)
+ return;
+
+ cds_list_del_init (&name->hostname_list);
+
+ GF_FREE (name->hostname);
+ name->hostname = NULL;
+
+ GF_FREE (name);
+
+ return;
+}
+
+gf_boolean_t
+gd_peer_has_address (glusterd_peerinfo_t *peerinfo, const char *address)
+{
+ gf_boolean_t ret = _gf_false;
+ glusterd_peer_hostname_t *hostname = NULL;
+
+ GF_VALIDATE_OR_GOTO ("glusterd", (peerinfo != NULL), out);
+ GF_VALIDATE_OR_GOTO ("glusterd", (address != NULL), out);
+
+ cds_list_for_each_entry (hostname, &peerinfo->hostnames,
+ hostname_list) {
+ if (strcmp (hostname->hostname, address) == 0) {
+ ret = _gf_true;
+ break;
+ }
+ }
+
+out:
+ return ret;
+}
+
+int
+gd_add_address_to_peer (glusterd_peerinfo_t *peerinfo, const char *address)
+{
+
+ int ret = -1;
+ glusterd_peer_hostname_t *hostname = NULL;
+
+ GF_VALIDATE_OR_GOTO ("glusterd", (peerinfo != NULL), out);
+ GF_VALIDATE_OR_GOTO ("glusterd", (address != NULL), out);
+
+ if (gd_peer_has_address (peerinfo, address)) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = glusterd_peer_hostname_new (address, &hostname);
+ if (ret)
+ goto out;
+
+ cds_list_add_tail_rcu (&hostname->hostname_list, &peerinfo->hostnames);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/* gd_add_friend_to_dict() adds details of @friend into @dict with the given
+ * @prefix. All the parameters are compulsory.
+ *
+ * The complete address list is added to the dict only if the cluster op-version
+ * is >= GD_OP_VERSION_3_6_0
+ */
+int
+gd_add_friend_to_dict (glusterd_peerinfo_t *friend, dict_t *dict,
+ const char *prefix)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ char key[100] = {0,};
+ glusterd_peer_hostname_t *address = NULL;
+ int count = 0;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("glusterd", (this != NULL), out);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, (conf != NULL), out);
+
+ GF_VALIDATE_OR_GOTO (this->name, (friend != NULL), out);
+ GF_VALIDATE_OR_GOTO (this->name, (dict != NULL), out);
+ GF_VALIDATE_OR_GOTO (this->name, (prefix != NULL), out);
+
+ snprintf (key, sizeof (key), "%s.uuid", prefix);
+ ret = dict_set_dynstr_with_alloc (dict, key, uuid_utoa (friend->uuid));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set key %s in dict", key);
+ goto out;
+ }
+
+ /* Setting the first hostname from the list with this key for backward
+ * compatibility
+ */
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.hostname", prefix);
+ address = cds_list_entry (&friend->hostnames, glusterd_peer_hostname_t,
+ hostname_list);
+ if (!address) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_PEER_ADDRESS_GET_FAIL,
+ "Could not retrieve first "
+ "address for peer");
+ goto out;
+ }
+ ret = dict_set_dynstr_with_alloc (dict, key, address->hostname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set key %s in dict", key);
+ goto out;
+ }
+
+ if (conf->op_version < GD_OP_VERSION_3_6_0) {
+ ret = 0;
+ goto out;
+ }
+
+ address = NULL;
+ count = 0;
+ cds_list_for_each_entry (address, &friend->hostnames, hostname_list) {
+ GF_VALIDATE_OR_GOTO (this->name, (address != NULL), out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.hostname%d", prefix, count);
+ ret = dict_set_dynstr_with_alloc (dict, key, address->hostname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set key %s in dict", key);
+ goto out;
+ }
+ count++;
+ }
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.address-count", prefix);
+ ret = dict_set_int32 (dict, key, count);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set key %s in dict", key);
+
+out:
+ gf_msg_debug (this ? this->name : "glusterd", 0, "Returning %d",
+ ret);
+ return ret;
+}
+
+/* gd_peerinfo_find_from_hostname iterates over all the addresses saved for each
+ * peer and matches it to @hoststr.
+ * Returns the matched peer if found else returns NULL
+ */
+glusterd_peerinfo_t *
+gd_peerinfo_find_from_hostname (const char *hoststr)
+{
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_peerinfo_t *peer = NULL;
+ glusterd_peerinfo_t *found = NULL;
+ glusterd_peer_hostname_t *tmphost = NULL;
+
+ this = THIS;
+ GF_ASSERT (this != NULL);
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, (priv != NULL), out);
+
+ GF_VALIDATE_OR_GOTO (this->name, (hoststr != NULL), out);
+
+ rcu_read_lock ();
+ cds_list_for_each_entry_rcu (peer, &priv->peers, uuid_list) {
+ cds_list_for_each_entry_rcu (tmphost, &peer->hostnames,
+ hostname_list) {
+ if (!strncasecmp (tmphost->hostname, hoststr, 1024)) {
+ gf_msg_debug (this->name, 0,
+ "Friend %s found.. state: %d",
+ tmphost->hostname, peer->state.state);
+ found = peer; /* Probably needs to be
+ dereferenced*/
+ goto unlock;
+ }
+ }
+ }
+unlock:
+ rcu_read_unlock ();
+out:
+ return found;
+}
+
+/* gd_peerinfo_find_from_addrinfo iterates over all the addresses saved for each
+ * peer, resolves them and compares them to @addr.
+ *
+ *
+ * NOTE: As getaddrinfo is a blocking call and is being performed multiple times
+ * in this function, it could lead to the calling thread to be blocked for
+ * significant amounts of time.
+ *
+ * Returns the matched peer if found else returns NULL
+ */
+glusterd_peerinfo_t *
+gd_peerinfo_find_from_addrinfo (const struct addrinfo *addr)
+{
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ glusterd_peerinfo_t *peer = NULL;
+ glusterd_peerinfo_t *found = NULL;
+ glusterd_peer_hostname_t *address = NULL;
+ int ret = 0;
+ struct addrinfo *paddr = NULL;
+ struct addrinfo *tmp = NULL;
+
+ this = THIS;
+ GF_ASSERT (this != NULL);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, (conf != NULL), out);
+
+ GF_VALIDATE_OR_GOTO (this->name, (addr != NULL), out);
+
+ rcu_read_lock ();
+ cds_list_for_each_entry_rcu (peer, &conf->peers, uuid_list) {
+ cds_list_for_each_entry_rcu (address, &peer->hostnames,
+ hostname_list) {
+ /* TODO: Cache the resolved addrinfos to improve
+ * performance
+ */
+ ret = getaddrinfo (address->hostname, NULL, NULL,
+ &paddr);
+ if (ret) {
+ /* Don't fail if getaddrinfo fails, continue
+ * onto the next address
+ */
+ gf_msg_trace (this->name, 0,
+ "getaddrinfo for %s failed (%s)",
+ address->hostname, gai_strerror (ret));
+ ret = 0;
+ continue;
+ }
+
+ for (tmp = paddr; tmp != NULL; tmp = tmp->ai_next) {
+ if (gf_compare_sockaddr (addr->ai_addr,
+ tmp->ai_addr)) {
+ found = peer; /* (de)referenced? */
+ break;
+ }
+ }
+
+ freeaddrinfo (paddr);
+ if (found)
+ goto unlock;
+ }
+ }
+unlock:
+ rcu_read_unlock ();
+out:
+ return found;
+}
+
+/* gd_update_peerinfo_from_dict will update the hostnames for @peerinfo from
+ * peer details with @prefix in @dict.
+ * Returns 0 on success and -1 on failure.
+ */
+int
+gd_update_peerinfo_from_dict (glusterd_peerinfo_t *peerinfo, dict_t *dict,
+ const char *prefix)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ char key[100] = {0,};
+ char *hostname = NULL;
+ int count = 0;
+ int i = 0;
+
+ this = THIS;
+ GF_ASSERT (this != NULL);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, (conf != NULL), out);
+
+ GF_VALIDATE_OR_GOTO (this->name, (peerinfo != NULL), out);
+ GF_VALIDATE_OR_GOTO (this->name, (dict != NULL), out);
+ GF_VALIDATE_OR_GOTO (this->name, (prefix != NULL), out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.hostname", prefix);
+ ret = dict_get_str (dict, key, &hostname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Key %s not present in "
+ "dictionary", key);
+ goto out;
+ }
+ ret = gd_add_address_to_peer (peerinfo, hostname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_ADD_ADDRESS_TO_PEER_FAIL,
+ "Could not add address to peer");
+ goto out;
+ }
+ /* Also set peerinfo->hostname to the first address */
+ if (peerinfo->hostname != NULL)
+ GF_FREE (peerinfo->hostname);
+ peerinfo->hostname = gf_strdup (hostname);
+
+ if (conf->op_version < GD_OP_VERSION_3_6_0) {
+ ret = 0;
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.address-count", prefix);
+ ret = dict_get_int32 (dict, key, &count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Key %s not present in "
+ "dictionary", key);
+ goto out;
+ }
+ hostname = NULL;
+ for (i = 0; i < count; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.hostname%d",prefix, i);
+ ret = dict_get_str (dict, key, &hostname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Key %s not present "
+ "in dictionary", key);
+ goto out;
+ }
+ ret = gd_add_address_to_peer (peerinfo, hostname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_ADD_ADDRESS_TO_PEER_FAIL,
+ "Could not add address to peer");
+ goto out;
+ }
+
+ hostname = NULL;
+ }
+
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* gd_peerinfo_from_dict creates a peerinfo object from details of peer with
+ * @prefix in @dict.
+ * Returns a pointer to the created peerinfo object on success, and NULL on
+ * failure.
+ */
+glusterd_peerinfo_t *
+gd_peerinfo_from_dict (dict_t *dict, const char *prefix)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ glusterd_peerinfo_t *new_peer = NULL;
+ char key[100] = {0,};
+ char *uuid_str = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("glusterd", (this != NULL), out);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, (conf != NULL), out);
+
+ GF_VALIDATE_OR_GOTO (this->name, (dict != NULL), out);
+ GF_VALIDATE_OR_GOTO (this->name, (prefix != NULL), out);
+
+ new_peer = glusterd_peerinfo_new (GD_FRIEND_STATE_DEFAULT, NULL, NULL,
+ 0);
+ if (new_peer == NULL) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_PEERINFO_CREATE_FAIL,
+ "Could not create peerinfo "
+ "object");
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "%s.uuid", prefix);
+ ret = dict_get_str (dict, key, &uuid_str);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Key %s not present in "
+ "dictionary", key);
+ goto out;
+ }
+ gf_uuid_parse (uuid_str, new_peer->uuid);
+
+ ret = gd_update_peerinfo_from_dict (new_peer, dict, prefix);
+
+out:
+ if ((ret != 0) && (new_peer != NULL)) {
+ glusterd_peerinfo_cleanup (new_peer);
+ new_peer = NULL;
+ }
+
+ return new_peer;
+}
+
+int
+gd_add_peer_hostnames_to_dict (glusterd_peerinfo_t *peerinfo, dict_t *dict,
+ const char *prefix)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ char key[256] = {0,};
+ glusterd_peer_hostname_t *addr = NULL;
+ int count = 0;
+
+ this = THIS;
+ GF_ASSERT (this != NULL);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, (conf != NULL), out);
+
+ if (conf->op_version < GD_OP_VERSION_3_6_0) {
+ ret = 0;
+ goto out;
+ }
+
+ GF_VALIDATE_OR_GOTO (this->name, (peerinfo != NULL), out);
+ GF_VALIDATE_OR_GOTO (this->name, (dict != NULL), out);
+ GF_VALIDATE_OR_GOTO (this->name, (prefix != NULL), out);
+
+ cds_list_for_each_entry (addr, &peerinfo->hostnames, hostname_list) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.hostname%d", prefix, count);
+ ret = dict_set_dynstr_with_alloc (dict, key, addr->hostname);
+ if (ret)
+ goto out;
+ count++;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.hostname_count", prefix);
+ ret = dict_set_int32 (dict, key, count);
+
+out:
+ return ret;
+}
+
+int
+gd_add_peer_detail_to_dict (glusterd_peerinfo_t *peerinfo, dict_t *friends,
+ int count)
+{
+
+ int ret = -1;
+ char key[256] = {0, };
+ char *peer_uuid_str = NULL;
+
+ GF_ASSERT (peerinfo);
+ GF_ASSERT (friends);
+
+ snprintf (key, sizeof (key), "friend%d.uuid", count);
+ peer_uuid_str = gd_peer_uuid_str (peerinfo);
+ ret = dict_set_str (friends, key, peer_uuid_str);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "friend%d.hostname", count);
+ ret = dict_set_str (friends, key, peerinfo->hostname);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "friend%d.port", count);
+ ret = dict_set_int32 (friends, key, peerinfo->port);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "friend%d.stateId", count);
+ ret = dict_set_int32 (friends, key, peerinfo->state.state);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "friend%d.state", count);
+ ret = dict_set_str (friends, key,
+ glusterd_friend_sm_state_name_get(peerinfo->state.state));
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "friend%d.connected", count);
+ ret = dict_set_int32 (friends, key, (int32_t)peerinfo->connected);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "friend%d", count);
+ ret = gd_add_peer_hostnames_to_dict (peerinfo, friends, key);
+
+out:
+ return ret;
+}
+
+/* glusterd_peerinfo_find_by_generation searches for a peer which has the
+ * generation number @generation and if found returns the pointer to peerinfo
+ * object. Returns NULL otherwise.
+ */
+glusterd_peerinfo_t *
+glusterd_peerinfo_find_by_generation (uint32_t generation) {
+ glusterd_conf_t *priv = NULL;
+ glusterd_peerinfo_t *entry = NULL;
+ glusterd_peerinfo_t *found = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+
+ GF_ASSERT (priv);
+
+ rcu_read_lock ();
+ cds_list_for_each_entry_rcu (entry, &priv->peers, uuid_list) {
+ if (entry->generation == generation) {
+
+ gf_msg_debug (this->name, 0,
+ "Friend found... state: %s",
+ glusterd_friend_sm_state_name_get (entry->state.state));
+ found = entry; /* Probably should be rcu_dereferenced */
+ break;
+ }
+ }
+ rcu_read_unlock ();
+
+ if (!found)
+ gf_msg_debug (this->name, 0,
+ "Friend with generation: %"PRIu32", not found",
+ generation);
+ return found;
+}
+
+int
+glusterd_get_peers_count () {
+ int count = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ glusterd_peerinfo_t *peer = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("glusterd", this, out);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
+
+ rcu_read_lock ();
+ cds_list_for_each_entry_rcu (peer, &conf->peers, uuid_list)
+ count++;
+ rcu_read_unlock ();
+
+out:
+ return count;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-peer-utils.h b/xlators/mgmt/glusterd/src/glusterd-peer-utils.h
new file mode 100644
index 00000000000..e74d1ed9536
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-peer-utils.h
@@ -0,0 +1,93 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERD_PEER_UTILS_H
+#define _GLUSTERD_PEER_UTILS_H
+
+#include "glusterd.h"
+#include "glusterd-utils.h"
+
+int32_t
+glusterd_peerinfo_cleanup (glusterd_peerinfo_t *peerinfo);
+
+glusterd_peerinfo_t *
+glusterd_peerinfo_find_by_hostname (const char *hoststr);
+
+int
+glusterd_hostname_to_uuid (char *hostname, uuid_t uuid);
+
+glusterd_peerinfo_t *
+glusterd_peerinfo_find_by_uuid (uuid_t uuid);
+
+glusterd_peerinfo_t *
+glusterd_peerinfo_find (uuid_t uuid, const char *hostname);
+
+glusterd_peerinfo_t *
+glusterd_peerinfo_new (glusterd_friend_sm_state_t state, uuid_t *uuid,
+ const char *hostname, int port);
+
+gf_boolean_t
+glusterd_chk_peers_connected_befriended (uuid_t skip_uuid);
+
+char *
+glusterd_uuid_to_hostname (uuid_t uuid);
+
+char*
+gd_peer_uuid_str (glusterd_peerinfo_t *peerinfo);
+
+gf_boolean_t
+glusterd_are_all_peers_up ();
+
+gf_boolean_t
+glusterd_are_vol_all_peers_up (glusterd_volinfo_t *volinfo,
+ struct cds_list_head *peers,
+ char **down_peerstr);
+
+int32_t
+glusterd_peer_hostname_new (const char *hostname,
+ glusterd_peer_hostname_t **name);
+void
+glusterd_peer_hostname_free (glusterd_peer_hostname_t *name);
+
+gf_boolean_t
+gd_peer_has_address (glusterd_peerinfo_t *peerinfo, const char *address);
+
+int
+gd_add_address_to_peer (glusterd_peerinfo_t *peerinfo, const char *address);
+
+int
+gd_add_friend_to_dict (glusterd_peerinfo_t *friend, dict_t *dict,
+ const char *prefix);
+
+glusterd_peerinfo_t *
+gd_peerinfo_find_from_hostname (const char *hoststr);
+
+glusterd_peerinfo_t *
+gd_peerinfo_find_from_addrinfo (const struct addrinfo *addr);
+
+int
+gd_update_peerinfo_from_dict (glusterd_peerinfo_t *peerinfo, dict_t *dict,
+ const char *prefix);
+
+glusterd_peerinfo_t *
+gd_peerinfo_from_dict (dict_t *dict, const char *prefix);
+
+int
+gd_add_peer_hostnames_to_dict (glusterd_peerinfo_t *peerinfo, dict_t *dict,
+ const char *prefix);
+int
+gd_add_peer_detail_to_dict (glusterd_peerinfo_t *peerinfo, dict_t *friends,
+ int count);
+glusterd_peerinfo_t *
+glusterd_peerinfo_find_by_generation (uint32_t generation);
+
+int
+glusterd_get_peers_count ();
+#endif /* _GLUSTERD_PEER_UTILS_H */
diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.c b/xlators/mgmt/glusterd/src/glusterd-pmap.c
new file mode 100644
index 00000000000..7ed03905774
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-pmap.c
@@ -0,0 +1,467 @@
+/*
+ Copyright (c) 2010-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "glusterfs.h"
+#include "syscall.h"
+#include "compat-errno.h"
+
+#include "glusterd.h"
+#include "glusterd-utils.h"
+
+#include "portmap-xdr.h"
+#include "xdr-generic.h"
+#include "protocol-common.h"
+#include "glusterd-messages.h"
+#include "rpcsvc.h"
+
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+
+
+int
+pmap_port_isfree (int port)
+{
+ struct sockaddr_in sin;
+ int sock = -1;
+ int ret = 0;
+
+ memset (&sin, 0, sizeof (sin));
+ sin.sin_family = PF_INET;
+ sin.sin_port = hton16 (port);
+
+ sock = socket (PF_INET, SOCK_STREAM, 0);
+ if (sock == -1)
+ return -1;
+
+ ret = bind (sock, (struct sockaddr *)&sin, sizeof (sin));
+ sys_close (sock);
+
+ return (ret == 0) ? 1 : 0;
+}
+
+
+static struct pmap_registry *
+pmap_registry_new (xlator_t *this)
+{
+ struct pmap_registry *pmap = NULL;
+ int i = 0;
+
+ pmap = CALLOC (sizeof (*pmap), 1);
+ if (!pmap)
+ return NULL;
+
+ pmap->base_port = pmap->last_alloc =
+ ((glusterd_conf_t *)(this->private))->base_port;
+
+ for (i = pmap->base_port; i <= GF_PORT_MAX; i++) {
+ if (pmap_port_isfree (i))
+ pmap->ports[i].type = GF_PMAP_PORT_FREE;
+ else
+ pmap->ports[i].type = GF_PMAP_PORT_FOREIGN;
+ }
+
+ return pmap;
+}
+
+
+struct pmap_registry *
+pmap_registry_get (xlator_t *this)
+{
+ glusterd_conf_t *priv = NULL;
+ struct pmap_registry *pmap = NULL;
+
+ priv = this->private;
+
+ pmap = priv->pmap;
+ if (!pmap) {
+ pmap = pmap_registry_new (this);
+ if (!pmap)
+ return NULL;
+ priv->pmap = pmap;
+ }
+
+ return pmap;
+}
+
+
+static char*
+nextword (char *str)
+{
+ while (*str && !isspace (*str))
+ str++;
+ while (*str && isspace (*str))
+ str++;
+
+ return str;
+}
+
+int
+pmap_registry_search (xlator_t *this, const char *brickname,
+ gf_pmap_port_type_t type)
+{
+ struct pmap_registry *pmap = NULL;
+ int p = 0;
+ char *brck = NULL;
+ char *nbrck = NULL;
+
+ pmap = pmap_registry_get (this);
+
+ for (p = pmap->last_alloc; p >= pmap->base_port; p--) {
+ if (!pmap->ports[p].brickname || pmap->ports[p].type != type)
+ continue;
+
+ for (brck = pmap->ports[p].brickname;;) {
+ nbrck = strtail (brck, brickname);
+ if (nbrck && (!*nbrck || isspace (*nbrck)))
+ return p;
+ brck = nextword (brck);
+ if (!*brck)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+int
+pmap_registry_search_by_xprt (xlator_t *this, void *xprt,
+ gf_pmap_port_type_t type)
+{
+ struct pmap_registry *pmap = NULL;
+ int p = 0;
+ int port = 0;
+
+ pmap = pmap_registry_get (this);
+
+ for (p = pmap->last_alloc; p >= pmap->base_port; p--) {
+ if (!pmap->ports[p].xprt)
+ continue;
+ if (pmap->ports[p].xprt == xprt &&
+ pmap->ports[p].type == type) {
+ port = p;
+ break;
+ }
+ }
+
+ return port;
+}
+
+
+char *
+pmap_registry_search_by_port (xlator_t *this, int port)
+{
+ struct pmap_registry *pmap = NULL;
+ char *brickname = NULL;
+
+ if (port > GF_PORT_MAX)
+ goto out;
+
+ pmap = pmap_registry_get (this);
+
+ if (pmap->ports[port].type == GF_PMAP_PORT_BRICKSERVER)
+ brickname = pmap->ports[port].brickname;
+
+out:
+ return brickname;
+}
+
+
+int
+pmap_registry_alloc (xlator_t *this)
+{
+ struct pmap_registry *pmap = NULL;
+ int p = 0;
+ int port = 0;
+
+ pmap = pmap_registry_get (this);
+
+ for (p = pmap->base_port; p <= GF_PORT_MAX; p++) {
+ /* GF_PMAP_PORT_FOREIGN may be freed up ? */
+ if ((pmap->ports[p].type == GF_PMAP_PORT_FREE) ||
+ (pmap->ports[p].type == GF_PMAP_PORT_FOREIGN)) {
+
+ if (pmap_port_isfree (p)) {
+ pmap->ports[p].type = GF_PMAP_PORT_LEASED;
+ port = p;
+ break;
+ }
+ }
+ }
+
+ if (port > pmap->last_alloc)
+ pmap->last_alloc = port;
+
+ return port;
+}
+
+int
+pmap_registry_bind (xlator_t *this, int port, const char *brickname,
+ gf_pmap_port_type_t type, void *xprt)
+{
+ struct pmap_registry *pmap = NULL;
+ int p = 0;
+
+ pmap = pmap_registry_get (this);
+
+ if (port > GF_PORT_MAX)
+ goto out;
+
+ p = port;
+ pmap->ports[p].type = type;
+ free (pmap->ports[p].brickname);
+ pmap->ports[p].brickname = strdup (brickname);
+ pmap->ports[p].type = type;
+ pmap->ports[p].xprt = xprt;
+
+ gf_msg ("pmap", GF_LOG_INFO, 0,
+ GD_MSG_BRICK_ADD, "adding brick %s on port %d",
+ brickname, port);
+
+ if (pmap->last_alloc < p)
+ pmap->last_alloc = p;
+out:
+ return 0;
+}
+
+int
+pmap_registry_remove (xlator_t *this, int port, const char *brickname,
+ gf_pmap_port_type_t type, void *xprt)
+{
+ struct pmap_registry *pmap = NULL;
+ int p = 0;
+ glusterd_conf_t *priv = NULL;
+
+ priv = this->private;
+ pmap = priv->pmap;
+ if (!pmap)
+ goto out;
+
+ if (port) {
+ if (port > GF_PORT_MAX)
+ goto out;
+
+ p = port;
+ goto remove;
+ }
+
+ if (brickname && strchr (brickname, '/')) {
+ p = pmap_registry_search (this, brickname, type);
+ if (p)
+ goto remove;
+ }
+
+ if (xprt) {
+ p = pmap_registry_search_by_xprt (this, xprt, type);
+ if (p)
+ goto remove;
+ }
+
+ goto out;
+remove:
+ gf_msg ("pmap", GF_LOG_INFO, 0,
+ GD_MSG_BRICK_REMOVE, "removing brick %s on port %d",
+ pmap->ports[p].brickname, p);
+
+ free (pmap->ports[p].brickname);
+
+ pmap->ports[p].type = GF_PMAP_PORT_FREE;
+ pmap->ports[p].brickname = NULL;
+ pmap->ports[p].xprt = NULL;
+
+out:
+ return 0;
+}
+
+int
+__gluster_pmap_portbybrick (rpcsvc_request_t *req)
+{
+ pmap_port_by_brick_req args = {0,};
+ pmap_port_by_brick_rsp rsp = {0,};
+ char *brick = NULL;
+ int port = 0;
+ int ret = -1;
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_pmap_port_by_brick_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto fail;
+ }
+
+ brick = args.brick;
+
+ port = pmap_registry_search (THIS, brick, GF_PMAP_PORT_BRICKSERVER);
+
+ if (!port)
+ rsp.op_ret = -1;
+
+ rsp.port = port;
+
+fail:
+ glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_pmap_port_by_brick_rsp);
+ free (args.brick);//malloced by xdr
+
+ return 0;
+}
+
+
+int
+gluster_pmap_portbybrick (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __gluster_pmap_portbybrick);
+}
+
+
+int
+__gluster_pmap_brickbyport (rpcsvc_request_t *req)
+{
+ pmap_brick_by_port_req args = {0,};
+ pmap_brick_by_port_rsp rsp = {0,};
+ int ret = -1;
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_pmap_brick_by_port_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto fail;
+ }
+
+ rsp.brick = pmap_registry_search_by_port (THIS, args.port);
+ if (!rsp.brick) {
+ rsp.op_ret = -1;
+ rsp.brick = "";
+ }
+fail:
+
+ glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_pmap_brick_by_port_rsp);
+
+ return 0;
+}
+
+
+int
+gluster_pmap_brickbyport (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __gluster_pmap_brickbyport);
+}
+
+
+static int
+glusterd_brick_update_signin (glusterd_brickinfo_t *brickinfo,
+ gf_boolean_t value)
+{
+ brickinfo->signed_in = value;
+
+ return 0;
+}
+
+int
+__gluster_pmap_signin (rpcsvc_request_t *req)
+{
+ pmap_signin_req args = {0,};
+ pmap_signin_rsp rsp = {0,};
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int ret = -1;
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_pmap_signin_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto fail;
+ }
+
+ rsp.op_ret = pmap_registry_bind (THIS, args.port, args.brick,
+ GF_PMAP_PORT_BRICKSERVER, req->trans);
+
+ ret = glusterd_get_brickinfo (THIS, args.brick, args.port, &brickinfo);
+fail:
+ glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_pmap_signin_rsp);
+ free (args.brick);//malloced by xdr
+
+ if (!ret)
+ glusterd_brick_update_signin (brickinfo, _gf_true);
+
+ return 0;
+}
+
+
+int
+gluster_pmap_signin (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __gluster_pmap_signin);
+}
+
+
+int
+__gluster_pmap_signout (rpcsvc_request_t *req)
+{
+ pmap_signout_req args = {0,};
+ pmap_signout_rsp rsp = {0,};
+ int ret = -1;
+ char brick_path[PATH_MAX] = {0,};
+ glusterd_brickinfo_t *brickinfo = NULL;
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_pmap_signout_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto fail;
+ }
+
+ rsp.op_ret = pmap_registry_remove (THIS, args.port, args.brick,
+ GF_PMAP_PORT_BRICKSERVER, req->trans);
+
+ ret = glusterd_get_brickinfo (THIS, args.brick, args.port, &brickinfo);
+ if (args.rdma_port) {
+ snprintf(brick_path, PATH_MAX, "%s.rdma", args.brick);
+ rsp.op_ret = pmap_registry_remove (THIS, args.rdma_port,
+ brick_path, GF_PMAP_PORT_BRICKSERVER,
+ req->trans);
+ }
+
+ if (!ret)
+ glusterd_brick_update_signin (brickinfo, _gf_false);
+
+fail:
+ glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_pmap_signout_rsp);
+ free (args.brick);//malloced by xdr
+
+ return 0;
+}
+
+int
+gluster_pmap_signout (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __gluster_pmap_signout);
+}
+
+rpcsvc_actor_t gluster_pmap_actors[GF_PMAP_MAXVALUE] = {
+ [GF_PMAP_NULL] = {"NULL", GF_PMAP_NULL, NULL, NULL, 0, DRC_NA},
+ [GF_PMAP_PORTBYBRICK] = {"PORTBYBRICK", GF_PMAP_PORTBYBRICK, gluster_pmap_portbybrick, NULL, 0, DRC_NA},
+ [GF_PMAP_BRICKBYPORT] = {"BRICKBYPORT", GF_PMAP_BRICKBYPORT, gluster_pmap_brickbyport, NULL, 0, DRC_NA},
+ [GF_PMAP_SIGNIN] = {"SIGNIN", GF_PMAP_SIGNIN, gluster_pmap_signin, NULL, 0, DRC_NA},
+ [GF_PMAP_SIGNOUT] = {"SIGNOUT", GF_PMAP_SIGNOUT, gluster_pmap_signout, NULL, 0, DRC_NA},
+};
+
+
+struct rpcsvc_program gluster_pmap_prog = {
+ .progname = "Gluster Portmap",
+ .prognum = GLUSTER_PMAP_PROGRAM,
+ .progver = GLUSTER_PMAP_VERSION,
+ .actors = gluster_pmap_actors,
+ .numactors = GF_PMAP_MAXVALUE,
+};
diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.h b/xlators/mgmt/glusterd/src/glusterd-pmap.h
new file mode 100644
index 00000000000..95ded04208d
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-pmap.h
@@ -0,0 +1,47 @@
+/*
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _GLUSTERD_PMAP_H_
+#define _GLUSTERD_PMAP_H_
+
+#include <pthread.h>
+#include "compat-uuid.h"
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "logging.h"
+#include "call-stub.h"
+#include "fd.h"
+#include "byte-order.h"
+#include "glusterd.h"
+#include "rpcsvc.h"
+
+
+struct pmap_port_status {
+ gf_pmap_port_type_t type;
+ char *brickname;
+ void *xprt;
+};
+
+struct pmap_registry {
+ int base_port;
+ int last_alloc;
+ struct pmap_port_status ports[65536];
+};
+
+int pmap_registry_alloc (xlator_t *this);
+int pmap_registry_bind (xlator_t *this, int port, const char *brickname,
+ gf_pmap_port_type_t type, void *xprt);
+int pmap_registry_remove (xlator_t *this, int port, const char *brickname,
+ gf_pmap_port_type_t type, void *xprt);
+int pmap_registry_search (xlator_t *this, const char *brickname,
+ gf_pmap_port_type_t type);
+struct pmap_registry *pmap_registry_get (xlator_t *this);
+
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c
new file mode 100644
index 00000000000..9f934629330
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c
@@ -0,0 +1,135 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <stdio.h>
+#include <limits.h>
+#include <signal.h>
+
+#include "common-utils.h"
+#include "xlator.h"
+#include "logging.h"
+#include "glusterd-messages.h"
+#include "glusterd-proc-mgmt.h"
+
+int
+glusterd_proc_init (glusterd_proc_t *proc, char *name, char *pidfile,
+ char *logdir, char *logfile, char *volfile, char *volfileid,
+ char *volfileserver)
+{
+ int ret = -1;
+
+ ret = snprintf (proc->name, sizeof (proc->name), "%s", name);
+ if (ret < 0)
+ goto out;
+
+ ret = snprintf (proc->pidfile, sizeof (proc->pidfile), "%s", pidfile);
+ if (ret < 0)
+ goto out;
+
+ ret = snprintf (proc->logdir, sizeof (proc->logdir), "%s", logdir);
+ if (ret < 0)
+ goto out;
+
+ ret = snprintf (proc->logfile, sizeof (proc->logfile), "%s", logfile);
+ if (ret < 0)
+ goto out;
+
+ ret = snprintf (proc->volfile, sizeof (proc->volfile), "%s", volfile);
+ if (ret < 0)
+ goto out;
+
+ ret = snprintf (proc->volfileid, sizeof (proc->volfileid), "%s",
+ volfileid);
+ if (ret < 0)
+ goto out;
+
+ ret = snprintf (proc->volfileserver, sizeof (proc->volfileserver), "%s",
+ volfileserver);
+ if (ret < 0)
+ goto out;
+
+out:
+ if (ret > 0)
+ ret = 0;
+
+ return ret;
+}
+
+int
+glusterd_proc_stop (glusterd_proc_t *proc, int sig, int flags)
+{
+
+ /* NB: Copy-paste code from glusterd_service_stop, the source may be
+ * removed once all daemon management use proc */
+
+ int32_t ret = -1;
+ pid_t pid = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ if (!gf_is_service_running (proc->pidfile, &pid)) {
+ ret = 0;
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_ALREADY_STOPPED, "%s already stopped",
+ proc->name);
+ goto out;
+ }
+ gf_msg (this->name, GF_LOG_INFO, 0, GD_MSG_SVC_STOP_SUCCESS,
+ "Stopping %s daemon running in pid: " "%d", proc->name, pid);
+
+ ret = kill (pid, sig);
+ if (ret) {
+ switch (errno) {
+ case ESRCH:
+ gf_msg_debug (this->name, 0, "%s is already "
+ "stopped", proc->name);
+ ret = 0;
+ goto out;
+ default:
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_SVC_KILL_FAIL, "Unable to kill %s "
+ "service, reason:%s", proc->name,
+ strerror (errno));
+ }
+ }
+ if (flags != PROC_STOP_FORCE)
+ goto out;
+
+ sleep (1);
+ if (gf_is_service_running (proc->pidfile, NULL)) {
+ ret = kill (pid, SIGKILL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_PID_KILL_FAIL, "Unable to kill pid:%d, "
+ "reason:%s", pid, strerror(errno));
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glusterd_proc_get_pid (glusterd_proc_t *proc)
+{
+ int pid = -1;
+ (void) gf_is_service_running (proc->pidfile, &pid);
+ return pid;
+}
+
+int
+glusterd_proc_is_running (glusterd_proc_t *proc)
+{
+ return gf_is_service_running (proc->pidfile, NULL);
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.h
new file mode 100644
index 00000000000..f5235171816
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.h
@@ -0,0 +1,44 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERD_PROC_MGMT_H_
+#define _GLUSTERD_PROC_MGMT_H_
+
+typedef struct glusterd_proc_ glusterd_proc_t;
+
+enum proc_flags {
+ PROC_NONE = 0,
+ PROC_START,
+ PROC_START_NO_WAIT,
+ PROC_STOP,
+ PROC_STOP_FORCE
+};
+
+struct glusterd_proc_ {
+ char name[PATH_MAX];
+ char pidfile[PATH_MAX];
+ char logdir[PATH_MAX];
+ char logfile[PATH_MAX];
+ char volfile[PATH_MAX];
+ char volfileserver[PATH_MAX];
+ char volfileid[256];
+};
+
+int
+glusterd_proc_init (glusterd_proc_t *proc, char *name, char *pidfile,
+ char *logdir, char *logfile, char *volfile, char *volfileid,
+ char *volfileserver);
+
+int
+glusterd_proc_stop (glusterd_proc_t *proc, int sig, int flags);
+
+int
+glusterd_proc_is_running (glusterd_proc_t *proc);
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-quota.c b/xlators/mgmt/glusterd/src/glusterd-quota.c
new file mode 100644
index 00000000000..0d7113bc1a0
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-quota.c
@@ -0,0 +1,2107 @@
+/*
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "common-utils.h"
+#include "cli1-xdr.h"
+#include "xdr-generic.h"
+#include "glusterd.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-store.h"
+#include "glusterd-utils.h"
+#include "glusterd-nfs-svc.h"
+#include "glusterd-quotad-svc.h"
+#include "glusterd-volgen.h"
+#include "glusterd-messages.h"
+#include "run.h"
+#include "syscall.h"
+#include "byte-order.h"
+#include "compat-errno.h"
+#include "quota-common-utils.h"
+
+#include <sys/wait.h>
+#include <dlfcn.h>
+
+#ifndef _PATH_SETFATTR
+# ifdef GF_LINUX_HOST_OS
+# define _PATH_SETFATTR "/usr/bin/setfattr"
+# endif
+# ifdef __NetBSD__
+# define _PATH_SETFATTR "/usr/pkg/bin/setfattr"
+# endif
+#endif
+
+/* Any negative pid to make it special client */
+#define QUOTA_CRAWL_PID "-100"
+
+const char *gd_quota_op_list[GF_QUOTA_OPTION_TYPE_MAX + 1] = {
+ [GF_QUOTA_OPTION_TYPE_NONE] = "none",
+ [GF_QUOTA_OPTION_TYPE_ENABLE] = "enable",
+ [GF_QUOTA_OPTION_TYPE_DISABLE] = "disable",
+ [GF_QUOTA_OPTION_TYPE_LIMIT_USAGE] = "limit-usage",
+ [GF_QUOTA_OPTION_TYPE_REMOVE] = "remove",
+ [GF_QUOTA_OPTION_TYPE_LIST] = "list",
+ [GF_QUOTA_OPTION_TYPE_VERSION] = "version",
+ [GF_QUOTA_OPTION_TYPE_ALERT_TIME] = "alert-time",
+ [GF_QUOTA_OPTION_TYPE_SOFT_TIMEOUT] = "soft-timeout",
+ [GF_QUOTA_OPTION_TYPE_HARD_TIMEOUT] = "hard-timeout",
+ [GF_QUOTA_OPTION_TYPE_DEFAULT_SOFT_LIMIT] = "default-soft-limit",
+ [GF_QUOTA_OPTION_TYPE_LIMIT_OBJECTS] = "limit-objects",
+ [GF_QUOTA_OPTION_TYPE_LIST_OBJECTS] = "list-objects",
+ [GF_QUOTA_OPTION_TYPE_REMOVE_OBJECTS] = "remove-objects",
+ [GF_QUOTA_OPTION_TYPE_ENABLE_OBJECTS] = "enable-objects",
+ [GF_QUOTA_OPTION_TYPE_MAX] = NULL
+};
+
+int
+glusterd_store_quota_config (glusterd_volinfo_t *volinfo, char *path,
+ char *gfid_str, int opcode, char **op_errstr);
+
+gf_boolean_t
+glusterd_is_quota_supported (int32_t type, char **op_errstr)
+{
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ gf_boolean_t supported = _gf_false;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("glusterd", this, out);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
+
+ if ((conf->op_version == GD_OP_VERSION_MIN) &&
+ (type > GF_QUOTA_OPTION_TYPE_VERSION))
+ goto out;
+
+ if ((conf->op_version < GD_OP_VERSION_3_7_0) &&
+ (type > GF_QUOTA_OPTION_TYPE_VERSION_OBJECTS))
+ goto out;
+
+ /* Quota Operations that change quota.conf shouldn't
+ * be allowed as the quota.conf format changes in 3.7
+ */
+ if ((conf->op_version < GD_OP_VERSION_3_7_0) &&
+ (type == GF_QUOTA_OPTION_TYPE_ENABLE ||
+ type == GF_QUOTA_OPTION_TYPE_LIMIT_USAGE ||
+ type == GF_QUOTA_OPTION_TYPE_REMOVE))
+ goto out;
+
+ /* Quota xattr version implemented in 3.7.6
+ * quota-version is incremented when quota is enabled
+ * Quota enable and disable performance enhancement has been done
+ * in version 3.7.12.
+ * so don't allow enabling/disabling quota in heterogeneous
+ * cluster during upgrade
+ */
+ if (type == GF_QUOTA_OPTION_TYPE_ENABLE ||
+ type == GF_QUOTA_OPTION_TYPE_ENABLE_OBJECTS ||
+ type == GF_QUOTA_OPTION_TYPE_DISABLE) {
+ if (conf->op_version < GD_OP_VERSION_3_7_12)
+ goto out;
+ }
+
+ supported = _gf_true;
+
+out:
+ if (!supported && op_errstr != NULL && conf)
+ gf_asprintf (op_errstr, "Volume quota failed. The cluster is "
+ "operating at version %d. Quota command"
+ " %s is unavailable in this version.",
+ conf->op_version, gd_quota_op_list[type]);
+
+ return supported;
+}
+
+int
+__glusterd_handle_quota (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,}};
+ dict_t *dict = NULL;
+ glusterd_op_t cli_op = GD_OP_QUOTA;
+ char *volname = NULL;
+ int32_t type = 0;
+ char msg[2048] = {0,};
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ GF_ASSERT (req);
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL, "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (msg, sizeof (msg), "Unable to decode the "
+ "command");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
+ }
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Unable to get volume name");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get volume name, "
+ "while handling quota command");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "type", &type);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Unable to get type of command");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get type of cmd, "
+ "while handling quota command");
+ goto out;
+ }
+
+ if (!glusterd_is_quota_supported (type, NULL)) {
+ snprintf (msg, sizeof (msg), "Volume quota failed. The cluster "
+ "is operating at version %d. Quota command"
+ " %s is unavailable in this version.",
+ conf->op_version, gd_quota_op_list[type]);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_op_begin_synctask (req, GD_OP_QUOTA, dict);
+
+out:
+ if (ret) {
+ if (msg[0] == '\0')
+ snprintf (msg, sizeof (msg), "Operation failed");
+ ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
+ dict, msg);
+ }
+
+ return ret;
+}
+
+int
+glusterd_handle_quota (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_quota);
+}
+
+int32_t
+glusterd_check_if_quota_trans_enabled (glusterd_volinfo_t *volinfo)
+{
+ int32_t ret = 0;
+ int flag = _gf_false;
+
+ flag = glusterd_volinfo_get_boolean (volinfo, VKEY_FEATURES_QUOTA);
+ if (flag == -1) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_QUOTA_GET_STAT_FAIL,
+ "failed to get the quota status");
+ ret = -1;
+ goto out;
+ }
+
+ if (flag == _gf_false) {
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+_glusterd_quota_initiate_fs_crawl (glusterd_conf_t *priv,
+ glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brick, int type,
+ char *pid_dir)
+{
+ pid_t pid;
+ int32_t ret = -1;
+ int status = 0;
+ char mountdir[PATH_MAX] = {0,};
+ char logfile[PATH_MAX] = {0,};
+ char brickpath[PATH_MAX] = {0,};
+ char vol_id[PATH_MAX] = {0,};
+ char pidfile[PATH_MAX] = {0,};
+ runner_t runner = {0};
+ char *volfileserver = NULL;
+ FILE *pidfp = NULL;
+
+ GF_VALIDATE_OR_GOTO ("glusterd", THIS, out);
+
+ GLUSTERD_GET_TMP_PATH (mountdir, "/");
+ ret = sys_mkdir (mountdir, 0777);
+ if (ret && errno != EEXIST) {
+ gf_msg (THIS->name, GF_LOG_WARNING, errno,
+ GD_MSG_MOUNT_REQ_FAIL, "failed to create temporary "
+ "directory %s", mountdir);
+ ret = -1;
+ goto out;
+ }
+
+ strcat (mountdir, "mntXXXXXX");
+ if (mkdtemp (mountdir) == NULL) {
+ gf_msg (THIS->name, GF_LOG_WARNING, errno,
+ GD_MSG_MOUNT_REQ_FAIL, "failed to create a temporary "
+ "mount directory: %s", mountdir);
+ ret = -1;
+ goto out;
+ }
+
+ GLUSTERD_REMOVE_SLASH_FROM_PATH (brick->path, brickpath);
+ snprintf (logfile, sizeof (logfile),
+ DEFAULT_QUOTA_CRAWL_LOG_DIRECTORY"/%s.log",
+ brickpath);
+
+ if (dict_get_str (THIS->options, "transport.socket.bind-address",
+ &volfileserver) != 0)
+ volfileserver = "localhost";
+
+ snprintf (vol_id, sizeof (vol_id), "client_per_brick/%s.%s.%s.%s.vol",
+ volinfo->volname, "client", brick->hostname, brickpath);
+
+ runinit (&runner);
+
+ if (type == GF_QUOTA_OPTION_TYPE_ENABLE ||
+ type == GF_QUOTA_OPTION_TYPE_ENABLE_OBJECTS)
+ runner_add_args (&runner, SBIN_DIR"/glusterfs",
+ "-s", volfileserver,
+ "--volfile-id", vol_id,
+ "--use-readdirp=yes",
+ "--client-pid", QUOTA_CRAWL_PID,
+ "-l", logfile, mountdir, NULL);
+ else
+ runner_add_args (&runner, SBIN_DIR"/glusterfs",
+ "-s", volfileserver,
+ "--volfile-id", vol_id,
+ "--use-readdirp=no",
+ "--client-pid", QUOTA_CRAWL_PID,
+ "-l", logfile, mountdir, NULL);
+
+ synclock_unlock (&priv->big_lock);
+ ret = runner_run_reuse (&runner);
+ synclock_lock (&priv->big_lock);
+ if (ret == -1) {
+ runner_log (&runner, "glusterd", GF_LOG_DEBUG, "command failed");
+ runner_end (&runner);
+ goto out;
+ }
+ runner_end (&runner);
+
+ if ((pid = fork ()) < 0) {
+ gf_msg (THIS->name, GF_LOG_WARNING, 0,
+ GD_MSG_FORK_FAIL, "fork from parent failed");
+ ret = -1;
+ goto out;
+ } else if (pid == 0) {//first child
+ /* fork one more to not hold back main process on
+ * blocking call below
+ */
+ pid = fork ();
+ if (pid)
+ _exit (pid > 0 ? EXIT_SUCCESS : EXIT_FAILURE);
+
+ ret = chdir (mountdir);
+ if (ret == -1) {
+ gf_msg (THIS->name, GF_LOG_WARNING, errno,
+ GD_MSG_DIR_OP_FAILED, "chdir %s failed",
+ mountdir);
+ exit (EXIT_FAILURE);
+ }
+ runinit (&runner);
+
+ if (type == GF_QUOTA_OPTION_TYPE_ENABLE ||
+ type == GF_QUOTA_OPTION_TYPE_ENABLE_OBJECTS)
+ runner_add_args (&runner, "/usr/bin/find", ".", NULL);
+
+ else if (type == GF_QUOTA_OPTION_TYPE_DISABLE) {
+
+#if defined(GF_DARWIN_HOST_OS)
+ runner_add_args (&runner, "/usr/bin/find", ".",
+ "-exec", "/usr/bin/xattr", "-w",
+ VIRTUAL_QUOTA_XATTR_CLEANUP_KEY, "1",
+ "{}", "\\", ";", NULL);
+#elif defined(__FreeBSD__)
+ runner_add_args (&runner, "/usr/bin/find", ".",
+ "-exec", "/usr/sbin/setextattr",
+ EXTATTR_NAMESPACE_USER,
+ VIRTUAL_QUOTA_XATTR_CLEANUP_KEY, "1",
+ "{}", "\\", ";", NULL);
+#else
+ runner_add_args (&runner, "/usr/bin/find", ".",
+ "-exec", _PATH_SETFATTR, "-n",
+ VIRTUAL_QUOTA_XATTR_CLEANUP_KEY, "-v",
+ "1", "{}", "\\", ";", NULL);
+#endif
+
+ }
+
+ if (runner_start (&runner) == -1) {
+ gf_umount_lazy ("glusterd", mountdir, 1);
+ _exit (EXIT_FAILURE);
+ }
+
+ snprintf (pidfile, sizeof (pidfile), "%s/%s.pid", pid_dir,
+ brickpath);
+ pidfp = fopen (pidfile, "w");
+ if (pidfp) {
+ fprintf (pidfp, "%d\n", runner.chpid);
+ fflush (pidfp);
+ fclose (pidfp);
+ }
+
+#ifndef GF_LINUX_HOST_OS
+ runner_end (&runner); /* blocks in waitpid */
+#endif
+ gf_umount_lazy ("glusterd", mountdir, 1);
+
+ _exit (EXIT_SUCCESS);
+ }
+ ret = (waitpid (pid, &status, 0) == pid &&
+ WIFEXITED (status) && WEXITSTATUS (status) == EXIT_SUCCESS) ? 0 : -1;
+
+out:
+ return ret;
+}
+
+void
+glusterd_stop_all_quota_crawl_service (glusterd_conf_t *priv,
+ glusterd_volinfo_t *volinfo, int type)
+{
+ DIR *dir = NULL;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {{0,},};
+ char pid_dir[PATH_MAX] = {0,};
+ char pidfile[PATH_MAX] = {0,};
+
+ GLUSTERD_GET_QUOTA_CRAWL_PIDDIR (pid_dir, volinfo, type);
+
+ dir = sys_opendir (pid_dir);
+ if (dir == NULL)
+ return;
+
+ GF_FOR_EACH_ENTRY_IN_DIR (entry, dir, scratch);
+ while (entry) {
+ snprintf (pidfile, sizeof (pidfile), "%s/%s",
+ pid_dir, entry->d_name);
+
+ glusterd_service_stop_nolock ("quota_crawl", pidfile, SIGKILL,
+ _gf_true);
+ sys_unlink (pidfile);
+
+ GF_FOR_EACH_ENTRY_IN_DIR (entry, dir, scratch);
+ }
+ sys_closedir (dir);
+}
+
+int32_t
+glusterd_quota_initiate_fs_crawl (glusterd_conf_t *priv,
+ glusterd_volinfo_t *volinfo, int type)
+{
+ int32_t ret = -1;
+ glusterd_brickinfo_t *brick = NULL;
+ char pid_dir[PATH_MAX] = {0, };
+
+ GF_VALIDATE_OR_GOTO ("glusterd", THIS, out);
+
+ ret = glusterd_generate_client_per_brick_volfile (volinfo);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_GLUSTERD_OP_FAILED,
+ "failed to generate client volume file");
+ goto out;
+ }
+
+ ret = mkdir_p (DEFAULT_QUOTA_CRAWL_LOG_DIRECTORY, 0777, _gf_true);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, errno,
+ GD_MSG_GLUSTERD_OP_FAILED,
+ "failed to create dir %s: %s",
+ DEFAULT_QUOTA_CRAWL_LOG_DIRECTORY, strerror (errno));
+ goto out;
+ }
+
+ GLUSTERD_GET_QUOTA_CRAWL_PIDDIR (pid_dir, volinfo, type);
+ ret = mkdir_p (pid_dir, 0777, _gf_true);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, errno,
+ GD_MSG_GLUSTERD_OP_FAILED,
+ "failed to create dir %s: %s",
+ pid_dir, strerror (errno));
+ goto out;
+ }
+
+ /* When quota enable is performed, stop alreday running enable crawl
+ * process and start fresh crawl process. let disable process continue
+ * if running to cleanup the older xattrs
+ * When quota disable is performed, stop both enable/disable crawl
+ * process and start fresh crawl process to cleanup the xattrs
+ */
+ glusterd_stop_all_quota_crawl_service (priv, volinfo,
+ GF_QUOTA_OPTION_TYPE_ENABLE);
+ if (type == GF_QUOTA_OPTION_TYPE_DISABLE)
+ glusterd_stop_all_quota_crawl_service (priv, volinfo,
+ GF_QUOTA_OPTION_TYPE_DISABLE);
+
+ cds_list_for_each_entry (brick, &volinfo->bricks, brick_list) {
+ if (gf_uuid_compare (brick->uuid, MY_UUID))
+ continue;
+
+ ret = _glusterd_quota_initiate_fs_crawl (priv, volinfo, brick,
+ type, pid_dir);
+
+ if (ret)
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+glusterd_quota_get_default_soft_limit (glusterd_volinfo_t *volinfo,
+ dict_t *rsp_dict)
+{
+ int32_t ret = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ char *default_limit = NULL;
+ char *val = NULL;
+
+ if (rsp_dict == NULL)
+ return -1;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ ret = glusterd_volinfo_get (volinfo, "features.default-soft-limit",
+ &default_limit);
+ if (default_limit)
+ val = gf_strdup (default_limit);
+ else
+ val = gf_strdup ("80%");
+
+ ret = dict_set_dynstr (rsp_dict, "default-soft-limit", val);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set default "
+ "soft-limit into dict");
+ goto out;
+ }
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int32_t
+glusterd_inode_quota_enable (glusterd_volinfo_t *volinfo, char **op_errstr,
+ gf_boolean_t *crawl)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_VALIDATE_OR_GOTO (this->name, volinfo, out);
+ GF_VALIDATE_OR_GOTO (this->name, crawl, out);
+ GF_VALIDATE_OR_GOTO (this->name, op_errstr, out);
+
+ if (glusterd_is_volume_started (volinfo) == 0) {
+ *op_errstr = gf_strdup ("Volume is stopped, start volume "
+ "to enable inode quota.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_check_if_quota_trans_enabled (volinfo);
+ if (ret != 0) {
+ *op_errstr = gf_strdup ("Quota is disabled. Enabling quota "
+ "will enable inode quota");
+ ret = -1;
+ goto out;
+ }
+
+ if (glusterd_is_volume_inode_quota_enabled (volinfo)) {
+ *op_errstr = gf_strdup ("Inode Quota is already enabled");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc (volinfo->dict,
+ VKEY_FEATURES_INODE_QUOTA, "on");
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_SET_FAILED,
+ "dict set failed");
+ goto out;
+ }
+
+ *crawl = _gf_true;
+
+ ret = glusterd_store_quota_config (volinfo, NULL, NULL,
+ GF_QUOTA_OPTION_TYPE_ENABLE_OBJECTS,
+ op_errstr);
+
+ ret = 0;
+out:
+ if (ret && op_errstr && !*op_errstr)
+ gf_asprintf (op_errstr, "Enabling inode quota on volume %s has "
+ "been unsuccessful", volinfo->volname);
+ return ret;
+}
+
+int32_t
+glusterd_quota_enable (glusterd_volinfo_t *volinfo, char **op_errstr,
+ gf_boolean_t *crawl)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_VALIDATE_OR_GOTO (this->name, volinfo, out);
+ GF_VALIDATE_OR_GOTO (this->name, crawl, out);
+ GF_VALIDATE_OR_GOTO (this->name, op_errstr, out);
+
+ if (glusterd_is_volume_started (volinfo) == 0) {
+ *op_errstr = gf_strdup ("Volume is stopped, start volume "
+ "to enable quota.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_check_if_quota_trans_enabled (volinfo);
+ if (ret == 0) {
+ *op_errstr = gf_strdup ("Quota is already enabled");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc (volinfo->dict, VKEY_FEATURES_QUOTA,
+ "on");
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_SET_FAILED, "dict set failed");
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc (volinfo->dict,
+ VKEY_FEATURES_INODE_QUOTA, "on");
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "dict set failed");
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc (volinfo->dict,
+ "features.quota-deem-statfs",
+ "on");
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_SET_FAILED, "setting quota-deem-statfs"
+ "in volinfo failed");
+ goto out;
+ }
+
+ *crawl = _gf_true;
+
+ ret = glusterd_store_quota_config (volinfo, NULL, NULL,
+ GF_QUOTA_OPTION_TYPE_ENABLE,
+ op_errstr);
+
+ ret = 0;
+out:
+ if (ret && op_errstr && !*op_errstr)
+ gf_asprintf (op_errstr, "Enabling quota on volume %s has been "
+ "unsuccessful", volinfo->volname);
+ return ret;
+}
+
+int32_t
+glusterd_quota_disable (glusterd_volinfo_t *volinfo, char **op_errstr,
+ gf_boolean_t *crawl)
+{
+ int32_t ret = -1;
+ int i = 0;
+ char *value = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ char *quota_options[] = {"features.soft-timeout",
+ "features.hard-timeout",
+ "features.alert-time",
+ "features.default-soft-limit",
+ "features.quota-deem-statfs",
+ "features.quota-timeout", NULL};
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ GF_VALIDATE_OR_GOTO (this->name, volinfo, out);
+ GF_VALIDATE_OR_GOTO (this->name, op_errstr, out);
+
+ ret = glusterd_check_if_quota_trans_enabled (volinfo);
+ if (ret == -1) {
+ *op_errstr = gf_strdup ("Quota is already disabled");
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc (volinfo->dict, VKEY_FEATURES_QUOTA,
+ "off");
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_SET_FAILED, "dict set failed");
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc (volinfo->dict,
+ VKEY_FEATURES_INODE_QUOTA, "off");
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "dict set failed");
+ goto out;
+ }
+
+ for (i = 0; quota_options [i]; i++) {
+ ret = glusterd_volinfo_get (volinfo, quota_options[i], &value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_VOLINFO_GET_FAIL, "failed to get option"
+ " %s", quota_options[i]);
+ } else {
+ dict_del (volinfo->dict, quota_options[i]);
+ }
+ }
+
+ //Remove aux mount of the volume on every node in the cluster
+ ret = glusterd_remove_auxiliary_mount (volinfo->volname);
+ if (ret)
+ goto out;
+
+ *crawl = _gf_true;
+
+ (void) glusterd_clean_up_quota_store (volinfo);
+
+ ret = 0;
+out:
+ if (ret && op_errstr && !*op_errstr)
+ gf_asprintf (op_errstr, "Disabling quota on volume %s has been "
+ "unsuccessful", volinfo->volname);
+ return ret;
+}
+
+static int
+glusterd_set_quota_limit (char *volname, char *path, char *hard_limit,
+ char *soft_limit, char *key, char **op_errstr)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ char abspath[PATH_MAX] = {0,};
+ glusterd_conf_t *priv = NULL;
+ quota_limits_t existing_limit = {0,};
+ quota_limits_t new_limit = {0,};
+ double soft_limit_double = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ GLUSTERD_GET_QUOTA_AUX_MOUNT_PATH (abspath, volname, path);
+ ret = gf_lstat_dir (abspath, NULL);
+ if (ret) {
+ gf_asprintf (op_errstr, "Failed to find the directory %s. "
+ "Reason : %s", abspath, strerror (errno));
+ goto out;
+ }
+
+ if (!soft_limit) {
+ ret = sys_lgetxattr (abspath, key, (void *)&existing_limit,
+ sizeof (existing_limit));
+ if (ret < 0) {
+ switch (errno) {
+#if defined(ENOATTR) && (ENOATTR != ENODATA)
+ case ENODATA: /* FALLTHROUGH */
+#endif
+ case ENOATTR:
+ existing_limit.sl = -1;
+ break;
+ default:
+ gf_asprintf (op_errstr, "Failed to get the "
+ "xattr %s from %s. Reason : %s",
+ key, abspath, strerror (errno));
+ goto out;
+ }
+ } else {
+ existing_limit.hl = ntoh64 (existing_limit.hl);
+ existing_limit.sl = ntoh64 (existing_limit.sl);
+ }
+ new_limit.sl = existing_limit.sl;
+
+ } else {
+ ret = gf_string2percent (soft_limit, &soft_limit_double);
+ if (ret)
+ goto out;
+ new_limit.sl = soft_limit_double;
+ }
+
+ new_limit.sl = hton64 (new_limit.sl);
+
+ ret = gf_string2bytesize_int64 (hard_limit, &new_limit.hl);
+ if (ret)
+ goto out;
+
+ new_limit.hl = hton64 (new_limit.hl);
+
+ ret = sys_lsetxattr (abspath, key, (char *)(void *)&new_limit,
+ sizeof (new_limit), 0);
+ if (ret == -1) {
+ gf_asprintf (op_errstr, "setxattr of %s failed on %s."
+ " Reason : %s", key, abspath, strerror (errno));
+ goto out;
+ }
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static int
+glusterd_update_quota_conf_version (glusterd_volinfo_t *volinfo)
+{
+ volinfo->quota_conf_version++;
+ return 0;
+}
+
+/*The function glusterd_find_gfid_match () does the following:
+ * Given a buffer of gfids, the number of bytes read and the key gfid that needs
+ * to be found, the function compares 16 bytes at a time from @buf against
+ * @gfid.
+ *
+ * What happens when the match is found:
+ * i. If the function was called as part of 'limit-usage' operation, the call
+ * returns with write_byte_count = bytes_read
+ *ii. If the function as called as part of 'quota remove' operation, @buf
+ * is modified in memory such that the match is deleted from the buffer, and
+ * also @write_byte_count is set to original buf size minus the sixteen bytes
+ * that was deleted as part of 'remove'.
+ *
+ * What happens when the match is not found in the current buffer:
+ * The function returns with write_byte_count = bytes_read, which means to say
+ * that the caller of this function must write the entire buffer to the tmp file
+ * and continue the search.
+ */
+static gf_boolean_t
+glusterd_find_gfid_match_3_6 (uuid_t gfid, unsigned char *buf,
+ size_t bytes_read, int opcode,
+ size_t *write_byte_count)
+{
+ int gfid_index = 0;
+ int shift_count = 0;
+ unsigned char tmp_buf[17] = {0,};
+
+ /* This function if for backward compatibility */
+
+ while (gfid_index != bytes_read) {
+ memcpy ((void *)tmp_buf, (void *)&buf[gfid_index], 16);
+ if (!gf_uuid_compare (gfid, tmp_buf)) {
+ if (opcode == GF_QUOTA_OPTION_TYPE_REMOVE) {
+ shift_count = bytes_read - (gfid_index + 16);
+ memmove ((void *)&buf[gfid_index],
+ (void *)&buf[gfid_index+16],
+ shift_count);
+ *write_byte_count = bytes_read - 16;
+ } else {
+ *write_byte_count = bytes_read;
+ }
+ return _gf_true;
+ } else {
+ gfid_index += 16;
+ }
+ }
+ if (gfid_index == bytes_read)
+ *write_byte_count = bytes_read;
+
+ return _gf_false;
+}
+
+static gf_boolean_t
+glusterd_find_gfid_match (uuid_t gfid, char gfid_type, unsigned char *buf,
+ size_t bytes_read, int opcode,
+ size_t *write_byte_count)
+{
+ int gfid_index = 0;
+ int shift_count = 0;
+ unsigned char tmp_buf[17] = {0,};
+ char type = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("glusterd", this, out);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
+
+ if (conf->op_version < GD_OP_VERSION_3_7_0)
+ return glusterd_find_gfid_match_3_6 (gfid, buf, bytes_read,
+ opcode, write_byte_count);
+
+ while (gfid_index != bytes_read) {
+ memcpy ((void *)tmp_buf, (void *)&buf[gfid_index], 16);
+ type = buf[gfid_index + 16];
+
+ if (!gf_uuid_compare (gfid, tmp_buf) && type == gfid_type) {
+ if (opcode == GF_QUOTA_OPTION_TYPE_REMOVE ||
+ opcode == GF_QUOTA_OPTION_TYPE_REMOVE_OBJECTS) {
+ shift_count = bytes_read - (gfid_index + 17);
+ memmove ((void *)&buf[gfid_index],
+ (void *)&buf[gfid_index + 17],
+ shift_count);
+ *write_byte_count = bytes_read - 17;
+ } else {
+ *write_byte_count = bytes_read;
+ }
+ return _gf_true;
+ } else {
+ gfid_index += 17;
+ }
+ }
+ if (gfid_index == bytes_read)
+ *write_byte_count = bytes_read;
+
+out:
+
+ return _gf_false;
+}
+
+/* The function glusterd_copy_to_tmp_file() reads the "remaining" bytes from
+ * the source fd and writes them to destination fd, at the rate of 128K bytes
+ * of read+write at a time.
+ */
+
+static int
+glusterd_copy_to_tmp_file (int src_fd, int dst_fd)
+{
+ int ret = 0;
+ size_t entry_sz = 131072;
+ ssize_t bytes_read = 0;
+ unsigned char buf[131072] = {0,};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ while ((bytes_read = sys_read (src_fd, (void *)&buf, entry_sz)) > 0) {
+ if (bytes_read % 16 != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_QUOTA_CONF_CORRUPT, "quota.conf "
+ "corrupted");
+ ret = -1;
+ goto out;
+ }
+ ret = sys_write (dst_fd, (void *) buf, bytes_read);
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_QUOTA_CONF_WRITE_FAIL,
+ "write into quota.conf failed.");
+ goto out;
+ }
+ }
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int
+glusterd_store_quota_conf_upgrade (glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ int fd = -1;
+ int conf_fd = -1;
+ unsigned char gfid[17] = {0,};
+ xlator_t *this = NULL;
+ char type = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ fd = gf_store_mkstemp (volinfo->quota_conf_shandle);
+ if (fd < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ conf_fd = open (volinfo->quota_conf_shandle->path, O_RDONLY);
+ if (conf_fd == -1) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = quota_conf_skip_header (conf_fd);
+ if (ret)
+ goto out;
+
+ ret = glusterd_quota_conf_write_header (fd);
+ if (ret)
+ goto out;
+
+ while (1) {
+ ret = quota_conf_read_gfid (conf_fd, gfid, &type, 1.1);
+ if (ret == 0)
+ break;
+ else if (ret < 0)
+ goto out;
+
+ ret = glusterd_quota_conf_write_gfid (fd, gfid,
+ GF_QUOTA_CONF_TYPE_USAGE);
+ if (ret < 0)
+ goto out;
+ }
+
+out:
+ if (conf_fd != -1)
+ sys_close (conf_fd);
+
+ if (ret && (fd > 0)) {
+ gf_store_unlink_tmppath (volinfo->quota_conf_shandle);
+ } else if (!ret) {
+ ret = gf_store_rename_tmppath (volinfo->quota_conf_shandle);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED,
+ "Failed to rename "
+ "quota conf file");
+ return ret;
+ }
+
+ ret = glusterd_compute_cksum (volinfo, _gf_true);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_CKSUM_COMPUTE_FAIL, "Failed to "
+ "compute cksum for quota conf file");
+ return ret;
+ }
+
+ ret = glusterd_store_save_quota_version_and_cksum (volinfo);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_QUOTA_CKSUM_VER_STORE_FAIL, "Failed to "
+ "store quota version and cksum");
+ }
+
+ return ret;
+}
+
+int
+glusterd_store_quota_config (glusterd_volinfo_t *volinfo, char *path,
+ char *gfid_str, int opcode, char **op_errstr)
+{
+ int ret = -1;
+ int fd = -1;
+ int conf_fd = -1;
+ ssize_t bytes_read = 0;
+ size_t bytes_to_write = 0;
+ unsigned char buf[131072] = {0,};
+ uuid_t gfid = {0,};
+ xlator_t *this = NULL;
+ gf_boolean_t found = _gf_false;
+ gf_boolean_t modified = _gf_false;
+ gf_boolean_t is_file_empty = _gf_false;
+ gf_boolean_t is_first_read = _gf_true;
+ glusterd_conf_t *conf = NULL;
+ float version = 0.0f;
+ char type = 0;
+ int quota_conf_line_sz = 16;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ glusterd_store_create_quota_conf_sh_on_absence (volinfo);
+
+ conf_fd = open (volinfo->quota_conf_shandle->path, O_RDONLY);
+ if (conf_fd == -1) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = quota_conf_read_version (conf_fd, &version);
+ if (ret)
+ goto out;
+
+ if (version < 1.2f && conf->op_version >= GD_OP_VERSION_3_7_0) {
+ /* Upgrade quota.conf file to newer format */
+ sys_close (conf_fd);
+ ret = glusterd_store_quota_conf_upgrade(volinfo);
+ if (ret)
+ goto out;
+
+ conf_fd = open (volinfo->quota_conf_shandle->path, O_RDONLY);
+ if (conf_fd == -1) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = quota_conf_skip_header (conf_fd);
+ if (ret)
+ goto out;
+ }
+
+ /* If op-ver is gt 3.7, then quota.conf will be upgraded, and 17 bytes
+ * storted in the new format. 16 bytes uuid and
+ * 1 byte type (usage/object)
+ */
+ if (conf->op_version >= GD_OP_VERSION_3_7_0)
+ quota_conf_line_sz++;
+
+ fd = gf_store_mkstemp (volinfo->quota_conf_shandle);
+ if (fd < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_quota_conf_write_header (fd);
+ if (ret)
+ goto out;
+
+ /* Just create empty quota.conf file if create */
+ if (GF_QUOTA_OPTION_TYPE_ENABLE == opcode ||
+ GF_QUOTA_OPTION_TYPE_ENABLE_OBJECTS == opcode) {
+ modified = _gf_true;
+ goto out;
+ }
+
+ /* Check if gfid_str is given for opts other than ENABLE */
+ if (!gfid_str) {
+ ret = -1;
+ goto out;
+ }
+ gf_uuid_parse (gfid_str, gfid);
+
+ if (opcode > GF_QUOTA_OPTION_TYPE_VERSION_OBJECTS)
+ type = GF_QUOTA_CONF_TYPE_OBJECTS;
+ else
+ type = GF_QUOTA_CONF_TYPE_USAGE;
+
+ for (;;) {
+ bytes_read = sys_read (conf_fd, (void *)&buf, sizeof (buf));
+ if (bytes_read <= 0) {
+ /*The flag @is_first_read is TRUE when the loop is
+ * entered, and is set to false if the first read
+ * reads non-zero bytes of data. The flag is used to
+ * detect if quota.conf is an empty file, but for the
+ * header. This is done to log appropriate error message
+ * when 'quota remove' is attempted when there are no
+ * limits set on the given volume.
+ */
+ if (is_first_read)
+ is_file_empty = _gf_true;
+ break;
+ }
+ if ((bytes_read % quota_conf_line_sz) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_QUOTA_CONF_CORRUPT, "quota.conf "
+ "corrupted");
+ ret = -1;
+ goto out;
+ }
+ found = glusterd_find_gfid_match (gfid, type, buf, bytes_read,
+ opcode, &bytes_to_write);
+
+ ret = sys_write (fd, (void *) buf, bytes_to_write);
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_QUOTA_CONF_WRITE_FAIL,
+ "write into quota.conf failed.");
+ goto out;
+ }
+
+ /*If the match is found in this iteration, copy the rest of
+ * quota.conf into quota.conf.tmp and break.
+ * Else continue with the search.
+ */
+ if (found) {
+ ret = glusterd_copy_to_tmp_file (conf_fd, fd);
+ if (ret)
+ goto out;
+ break;
+ }
+ is_first_read = _gf_false;
+ }
+
+ switch (opcode) {
+ case GF_QUOTA_OPTION_TYPE_LIMIT_USAGE:
+ if (!found) {
+ ret = glusterd_quota_conf_write_gfid (fd, gfid,
+ GF_QUOTA_CONF_TYPE_USAGE);
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_QUOTA_CONF_WRITE_FAIL,
+ "write into quota.conf failed. ");
+ goto out;
+ }
+ modified = _gf_true;
+ }
+ break;
+ case GF_QUOTA_OPTION_TYPE_LIMIT_OBJECTS:
+ if (!found) {
+ ret = glusterd_quota_conf_write_gfid (fd, gfid,
+ GF_QUOTA_CONF_TYPE_OBJECTS);
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_QUOTA_CONF_WRITE_FAIL,
+ "write into quota.conf failed. ");
+ goto out;
+ }
+ modified = _gf_true;
+ }
+ break;
+
+ case GF_QUOTA_OPTION_TYPE_REMOVE:
+ case GF_QUOTA_OPTION_TYPE_REMOVE_OBJECTS:
+ if (is_file_empty) {
+ gf_asprintf (op_errstr, "Cannot remove limit on"
+ " %s. The quota configuration file"
+ " for volume %s is empty.", path,
+ volinfo->volname);
+ ret = -1;
+ goto out;
+ } else {
+ if (!found) {
+ gf_asprintf (op_errstr, "Error. gfid %s"
+ " for path %s not found in"
+ " store", gfid_str, path);
+ ret = -1;
+ goto out;
+ } else {
+ modified = _gf_true;
+ }
+ }
+ break;
+
+ default:
+ ret = 0;
+ break;
+ }
+
+ if (modified)
+ glusterd_update_quota_conf_version (volinfo);
+
+ ret = 0;
+out:
+ if (conf_fd != -1) {
+ sys_close (conf_fd);
+ }
+
+ if (ret && (fd > 0)) {
+ gf_store_unlink_tmppath (volinfo->quota_conf_shandle);
+ } else if (!ret) {
+ ret = gf_store_rename_tmppath (volinfo->quota_conf_shandle);
+ if (modified) {
+ ret = glusterd_compute_cksum (volinfo, _gf_true);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_CKSUM_COMPUTE_FAIL, "Failed to "
+ "compute cksum for quota conf file");
+ return ret;
+ }
+
+ ret = glusterd_store_save_quota_version_and_cksum
+ (volinfo);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VERS_CKSUM_STORE_FAIL,
+ "Failed to "
+ "store quota version and cksum");
+ }
+ }
+
+ return ret;
+}
+
+int32_t
+glusterd_quota_limit_usage (glusterd_volinfo_t *volinfo, dict_t *dict,
+ int opcode, char **op_errstr)
+{
+ int32_t ret = -1;
+ char *path = NULL;
+ char *hard_limit = NULL;
+ char *soft_limit = NULL;
+ char *gfid_str = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_VALIDATE_OR_GOTO (this->name, dict, out);
+ GF_VALIDATE_OR_GOTO (this->name, volinfo, out);
+ GF_VALIDATE_OR_GOTO (this->name, op_errstr, out);
+
+ ret = glusterd_check_if_quota_trans_enabled (volinfo);
+ if (ret == -1) {
+ *op_errstr = gf_strdup ("Quota is disabled, please enable "
+ "quota");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "path", &path);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to fetch path");
+ goto out;
+ }
+ ret = gf_canonicalize_path (path);
+ if (ret)
+ goto out;
+
+ ret = dict_get_str (dict, "hard-limit", &hard_limit);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to fetch hard limit");
+ goto out;
+ }
+
+ if (dict_get (dict, "soft-limit")) {
+ ret = dict_get_str (dict, "soft-limit", &soft_limit);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to fetch "
+ "soft limit");
+ goto out;
+ }
+ }
+
+ if (is_origin_glusterd (dict)) {
+ if (opcode == GF_QUOTA_OPTION_TYPE_LIMIT_USAGE) {
+ ret = glusterd_set_quota_limit (volinfo->volname, path,
+ hard_limit, soft_limit,
+ QUOTA_LIMIT_KEY,
+ op_errstr);
+ } else {
+ ret = glusterd_set_quota_limit (volinfo->volname, path,
+ hard_limit, soft_limit,
+ QUOTA_LIMIT_OBJECTS_KEY,
+ op_errstr);
+ }
+ if (ret)
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "gfid", &gfid_str);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to get gfid of path "
+ "%s", path);
+ goto out;
+ }
+
+ ret = glusterd_store_quota_config (volinfo, path, gfid_str, opcode,
+ op_errstr);
+ if (ret)
+ goto out;
+
+ ret = 0;
+out:
+
+ if (ret && op_errstr && !*op_errstr)
+ gf_asprintf (op_errstr, "Failed to set hard limit on path %s "
+ "for volume %s", path, volinfo->volname);
+ return ret;
+}
+
+static int
+glusterd_remove_quota_limit (char *volname, char *path, char **op_errstr,
+ int type)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ char abspath[PATH_MAX] = {0,};
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ GLUSTERD_GET_QUOTA_AUX_MOUNT_PATH (abspath, volname, path);
+ ret = gf_lstat_dir (abspath, NULL);
+ if (ret) {
+ gf_asprintf (op_errstr, "Failed to find the directory %s. "
+ "Reason : %s", abspath, strerror (errno));
+ goto out;
+ }
+
+ if (type == GF_QUOTA_OPTION_TYPE_REMOVE) {
+ ret = sys_lremovexattr (abspath, QUOTA_LIMIT_KEY);
+ if (ret) {
+ gf_asprintf (op_errstr, "removexattr failed on %s. "
+ "Reason : %s", abspath, strerror (errno));
+ goto out;
+ }
+ }
+
+ if (type == GF_QUOTA_OPTION_TYPE_REMOVE_OBJECTS) {
+ ret = sys_lremovexattr (abspath, QUOTA_LIMIT_OBJECTS_KEY);
+ if (ret) {
+ gf_asprintf (op_errstr, "removexattr failed on %s. "
+ "Reason : %s", abspath, strerror (errno));
+ goto out;
+ }
+ }
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int32_t
+glusterd_quota_remove_limits (glusterd_volinfo_t *volinfo, dict_t *dict,
+ int opcode, char **op_errstr, int type)
+{
+ int32_t ret = -1;
+ char *path = NULL;
+ char *gfid_str = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_VALIDATE_OR_GOTO (this->name, dict, out);
+ GF_VALIDATE_OR_GOTO (this->name, volinfo, out);
+ GF_VALIDATE_OR_GOTO (this->name, op_errstr, out);
+
+ ret = glusterd_check_if_quota_trans_enabled (volinfo);
+ if (ret == -1) {
+ *op_errstr = gf_strdup ("Quota is disabled, please enable "
+ "quota");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "path", &path);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to fetch path");
+ goto out;
+ }
+
+ ret = gf_canonicalize_path (path);
+ if (ret)
+ goto out;
+
+ if (is_origin_glusterd (dict)) {
+ ret = glusterd_remove_quota_limit (volinfo->volname, path,
+ op_errstr, type);
+ if (ret)
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "gfid", &gfid_str);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to get gfid of path "
+ "%s", path);
+ goto out;
+ }
+
+ ret = glusterd_store_quota_config (volinfo, path, gfid_str, opcode,
+ op_errstr);
+ if (ret)
+ goto out;
+
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int
+glusterd_set_quota_option (glusterd_volinfo_t *volinfo, dict_t *dict,
+ char *key, char **op_errstr)
+{
+ int ret = 0;
+ char *value = NULL;
+ xlator_t *this = NULL;
+ char *option = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = glusterd_check_if_quota_trans_enabled (volinfo);
+ if (ret == -1) {
+ gf_asprintf (op_errstr, "Cannot set %s. Quota on volume %s is "
+ "disabled", key, volinfo->volname);
+ return -1;
+ }
+
+ ret = dict_get_str (dict, "value", &value);
+ if(ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Option value absent.");
+ return -1;
+ }
+
+ option = gf_strdup (value);
+ ret = dict_set_dynstr (volinfo->dict, key, option);
+ if(ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to set option %s",
+ key);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+glusterd_quotad_op (int opcode)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ switch (opcode) {
+ case GF_QUOTA_OPTION_TYPE_ENABLE:
+ case GF_QUOTA_OPTION_TYPE_DISABLE:
+
+ if (glusterd_all_volumes_with_quota_stopped ())
+ ret = glusterd_svc_stop (&(priv->quotad_svc),
+ SIGTERM);
+ else
+ ret = priv->quotad_svc.manager
+ (&(priv->quotad_svc), NULL,
+ PROC_START);
+ break;
+
+ default:
+ ret = 0;
+ break;
+ }
+ return ret;
+}
+
+int
+glusterd_op_quota (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+{
+ glusterd_volinfo_t *volinfo = NULL;
+ int32_t ret = -1;
+ char *volname = NULL;
+ int type = -1;
+ gf_boolean_t start_crawl = _gf_false;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_asprintf (op_errstr, FMTSTR_CHECK_VOL_EXISTS, volname);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "type", &type);
+
+ if (!glusterd_is_quota_supported (type, op_errstr)) {
+ ret = -1;
+ goto out;
+ }
+
+ switch (type) {
+ case GF_QUOTA_OPTION_TYPE_ENABLE:
+ ret = glusterd_quota_enable (volinfo, op_errstr,
+ &start_crawl);
+ if (ret < 0)
+ goto out;
+ break;
+
+ case GF_QUOTA_OPTION_TYPE_ENABLE_OBJECTS:
+ ret = glusterd_inode_quota_enable (volinfo, op_errstr,
+ &start_crawl);
+ if (ret < 0)
+ goto out;
+ break;
+
+ case GF_QUOTA_OPTION_TYPE_DISABLE:
+ ret = glusterd_quota_disable (volinfo, op_errstr,
+ &start_crawl);
+ if (ret < 0)
+ goto out;
+
+ break;
+
+ case GF_QUOTA_OPTION_TYPE_LIMIT_USAGE:
+ case GF_QUOTA_OPTION_TYPE_LIMIT_OBJECTS:
+ ret = glusterd_quota_limit_usage (volinfo, dict, type,
+ op_errstr);
+ goto out;
+
+ case GF_QUOTA_OPTION_TYPE_REMOVE:
+ case GF_QUOTA_OPTION_TYPE_REMOVE_OBJECTS:
+ ret = glusterd_quota_remove_limits (volinfo, dict, type,
+ op_errstr, type);
+ goto out;
+
+ case GF_QUOTA_OPTION_TYPE_LIST:
+ case GF_QUOTA_OPTION_TYPE_LIST_OBJECTS:
+ ret = glusterd_check_if_quota_trans_enabled (volinfo);
+ if (ret == -1) {
+ *op_errstr = gf_strdup ("Cannot list limits, "
+ "quota is disabled");
+ goto out;
+ }
+ ret = glusterd_quota_get_default_soft_limit (volinfo,
+ rsp_dict);
+ goto out;
+
+ case GF_QUOTA_OPTION_TYPE_SOFT_TIMEOUT:
+ ret = glusterd_set_quota_option (volinfo, dict,
+ "features.soft-timeout",
+ op_errstr);
+ if (ret)
+ goto out;
+ break;
+
+ case GF_QUOTA_OPTION_TYPE_HARD_TIMEOUT:
+ ret = glusterd_set_quota_option (volinfo, dict,
+ "features.hard-timeout",
+ op_errstr);
+ if (ret)
+ goto out;
+ break;
+
+ case GF_QUOTA_OPTION_TYPE_ALERT_TIME:
+ ret = glusterd_set_quota_option (volinfo, dict,
+ "features.alert-time",
+ op_errstr);
+ if (ret)
+ goto out;
+ break;
+
+ case GF_QUOTA_OPTION_TYPE_DEFAULT_SOFT_LIMIT:
+ ret = glusterd_set_quota_option (volinfo, dict,
+ "features.default-soft-limit",
+ op_errstr);
+ if (ret)
+ goto out;
+ break;
+
+ default:
+ gf_asprintf (op_errstr, "Quota command failed. Invalid "
+ "opcode");
+ ret = -1;
+ goto out;
+ }
+
+ if (priv->op_version > GD_OP_VERSION_MIN) {
+ ret = glusterd_quotad_op (type);
+ if (ret)
+ goto out;
+ }
+
+
+ if (GF_QUOTA_OPTION_TYPE_ENABLE == type)
+ volinfo->quota_xattr_version++;
+ ret = glusterd_store_volinfo (volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ if (GF_QUOTA_OPTION_TYPE_ENABLE == type)
+ volinfo->quota_xattr_version--;
+ goto out;
+ }
+
+ ret = glusterd_create_volfiles_and_notify_services (volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLFILE_CREATE_FAIL, "Unable to re-create "
+ "volfiles");
+ if (GF_QUOTA_OPTION_TYPE_ENABLE == type) {
+ /* rollback volinfo */
+ volinfo->quota_xattr_version--;
+ ret = glusterd_store_volinfo (volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ }
+
+ ret = -1;
+ goto out;
+ }
+
+ if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+ if (priv->op_version == GD_OP_VERSION_MIN)
+ ret = priv->nfs_svc.manager (&(priv->nfs_svc), NULL, 0);
+ }
+
+ if (rsp_dict && start_crawl == _gf_true)
+ glusterd_quota_initiate_fs_crawl (priv, volinfo, type);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/*
+ * glusterd_get_gfid_from_brick() fetches the 'trusted.gfid' attribute of @path
+ * from each brick in the backend and places the same in the rsp_dict with the
+ * keys being gfid0, gfid1, gfid2 and so on. The absence of @path in the backend
+ * is not treated as error.
+ */
+static int
+glusterd_get_gfid_from_brick (dict_t *dict, glusterd_volinfo_t *volinfo,
+ dict_t *rsp_dict, char **op_errstr)
+{
+ int ret = -1;
+ int count = 0;
+ char *path = NULL;
+ char backend_path[PATH_MAX] = {0,};
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ char key[256] = {0,};
+ char *gfid_str = NULL;
+ uuid_t gfid;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_str (dict, "path", &path);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to get path");
+ goto out;
+ }
+
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ ret = glusterd_resolve_brick (brickinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RESOLVE_BRICK_FAIL, FMTSTR_RESOLVE_BRICK,
+ brickinfo->hostname, brickinfo->path);
+ goto out;
+ }
+
+ if (gf_uuid_compare (brickinfo->uuid, MY_UUID))
+ continue;
+
+ if (brickinfo->vg[0])
+ continue;
+
+ snprintf (backend_path, sizeof (backend_path), "%s%s",
+ brickinfo->path, path);
+
+ ret = gf_lstat_dir (backend_path, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_INFO, errno,
+ GD_MSG_DIR_OP_FAILED, "Failed to find "
+ "directory %s.", backend_path);
+ ret = 0;
+ continue;
+ }
+ ret = sys_lgetxattr (backend_path, GFID_XATTR_KEY, gfid, 16);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_INFO, errno,
+ GD_MSG_SETXATTR_FAIL, "Failed to get "
+ "extended attribute %s for directory %s. ",
+ GFID_XATTR_KEY, backend_path);
+ ret = 0;
+ continue;
+ }
+ snprintf (key, sizeof (key), "gfid%d", count);
+
+ gfid_str = gf_strdup (uuid_utoa (gfid));
+ if (!gfid_str) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr (rsp_dict, key, gfid_str);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to place "
+ "gfid of %s in dict", backend_path);
+ GF_FREE (gfid_str);
+ goto out;
+ }
+ count++;
+ }
+
+ ret = dict_set_int32 (rsp_dict, "count", count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set count");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+_glusterd_validate_quota_opts (dict_t *dict, int type, char **errstr)
+{
+ int ret = -1;
+ xlator_t *this = THIS;
+ void *quota_xl = NULL;
+ volume_opt_list_t opt_list = {{0},};
+ volume_option_t *opt = NULL;
+ char *key = NULL;
+ char *value = NULL;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (this);
+
+ ret = xlator_volopt_dynload ("features/quota", &quota_xl, &opt_list);
+ if (ret)
+ goto out;
+
+ switch (type) {
+ case GF_QUOTA_OPTION_TYPE_SOFT_TIMEOUT:
+ case GF_QUOTA_OPTION_TYPE_HARD_TIMEOUT:
+ case GF_QUOTA_OPTION_TYPE_ALERT_TIME:
+ case GF_QUOTA_OPTION_TYPE_DEFAULT_SOFT_LIMIT:
+ key = (char *)gd_quota_op_list[type];
+ break;
+ default:
+ ret = -1;
+ goto out;
+ }
+
+ opt = xlator_volume_option_get_list (&opt_list, key);
+ if (!opt) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_UNKNOWN_KEY, "Unknown option: %s", key);
+ goto out;
+ }
+ ret = dict_get_str (dict, "value", &value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Value not found for key %s",
+ key);
+ goto out;
+ }
+
+ ret = xlator_option_validate (this, key, value, opt, errstr);
+
+out:
+ if (quota_xl) {
+ dlclose (quota_xl);
+ quota_xl = NULL;
+ }
+ return ret;
+}
+
+static int
+glusterd_create_quota_auxiliary_mount (xlator_t *this, char *volname)
+{
+ int ret = -1;
+ int retry = 0;
+ char mountdir[PATH_MAX] = {0,};
+ char pidfile_path[PATH_MAX] = {0,};
+ char logfile[PATH_MAX] = {0,};
+ char qpid[16] = {0,};
+ char *volfileserver = NULL;
+ glusterd_conf_t *priv = NULL;
+ struct stat buf = {0,};
+
+ GF_VALIDATE_OR_GOTO ("glusterd", this, out);
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, priv, out);
+
+ GLUSTERFS_GET_AUX_MOUNT_PIDFILE (pidfile_path, volname);
+
+ if (gf_is_service_running (pidfile_path, NULL)) {
+ gf_msg_debug (this->name, 0, "Aux mount of volume %s is running"
+ " already", volname);
+ ret = 0;
+ goto out;
+ }
+
+ if (glusterd_is_fuse_available () == _gf_false) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MOUNT_REQ_FAIL, "Fuse unavailable");
+ ret = -1;
+ goto out;
+ }
+
+ GLUSTERD_GET_QUOTA_AUX_MOUNT_PATH (mountdir, volname, "/");
+ ret = sys_mkdir (mountdir, 0777);
+ if (ret && errno != EEXIST) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_MOUNT_REQ_FAIL, "Failed to create auxiliary "
+ "mount directory %s", mountdir);
+ goto out;
+ }
+ snprintf (logfile, PATH_MAX-1, "%s/quota-mount-%s.log",
+ DEFAULT_LOG_FILE_DIRECTORY, volname);
+ snprintf(qpid, 15, "%d", GF_CLIENT_PID_QUOTA_MOUNT);
+
+ if (dict_get_str (this->options, "transport.socket.bind-address",
+ &volfileserver) != 0)
+ volfileserver = "localhost";
+
+ synclock_unlock (&priv->big_lock);
+ ret = runcmd (SBIN_DIR"/glusterfs",
+ "--volfile-server", volfileserver,
+ "--volfile-id", volname,
+ "-l", logfile,
+ "-p", pidfile_path,
+ "--client-pid", qpid,
+ mountdir,
+ NULL);
+ if (ret == 0) {
+ /* Block here till mount process is ready to accept FOPs.
+ * Else, if glusterd acquires biglock below before
+ * mount process is ready, then glusterd and mount process
+ * can get into a deadlock situation.
+ */
+ ret = sys_stat (mountdir, &buf);
+ if (ret < 0)
+ ret = -errno;
+ } else {
+ ret = -errno;
+ }
+
+ synclock_lock (&priv->big_lock);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
+ GD_MSG_MOUNT_REQ_FAIL, "Failed to mount glusterfs "
+ "client. Please check the log file %s for more details",
+ logfile);
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int
+glusterd_op_stage_quota (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+{
+ int ret = 0;
+ char *volname = NULL;
+ gf_boolean_t exists = _gf_false;
+ int type = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *hard_limit_str = NULL;
+ int64_t hard_limit = 0;
+ gf_boolean_t get_gfid = _gf_false;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get volume name");
+ goto out;
+ }
+
+ exists = glusterd_check_volume_exists (volname);
+ if (!exists) {
+ gf_asprintf (op_errstr, FMTSTR_CHECK_VOL_EXISTS, volname);
+ ret = -1;
+ goto out;
+ }
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_asprintf (op_errstr, FMTSTR_CHECK_VOL_EXISTS, volname);
+ goto out;
+ }
+
+ if (!glusterd_is_volume_started (volinfo)) {
+ *op_errstr = gf_strdup ("Volume is stopped, start volume "
+ "before executing quota command.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "type", &type);
+ if (ret) {
+ *op_errstr = gf_strdup ("Volume quota failed, internal error, "
+ "unable to get type of operation");
+ goto out;
+ }
+
+ if ((!glusterd_is_volume_quota_enabled (volinfo)) &&
+ (type != GF_QUOTA_OPTION_TYPE_ENABLE)) {
+ *op_errstr = gf_strdup ("Quota is disabled, please enable "
+ "quota");
+ ret = -1;
+ goto out;
+ }
+
+ if (type > GF_QUOTA_OPTION_TYPE_VERSION_OBJECTS) {
+ if (!glusterd_is_volume_inode_quota_enabled (volinfo) &&
+ type != GF_QUOTA_OPTION_TYPE_ENABLE_OBJECTS) {
+ *op_errstr = gf_strdup ("Inode Quota is disabled, "
+ "please enable inode quota");
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (!glusterd_is_quota_supported (type, op_errstr)) {
+ ret = -1;
+ goto out;
+ }
+
+ if ((GF_QUOTA_OPTION_TYPE_ENABLE != type) &&
+ (glusterd_check_if_quota_trans_enabled (volinfo) != 0)) {
+ ret = -1;
+ gf_asprintf (op_errstr, "Quota is not enabled on volume %s",
+ volname);
+ goto out;
+ }
+
+ switch (type) {
+ case GF_QUOTA_OPTION_TYPE_LIST:
+ case GF_QUOTA_OPTION_TYPE_LIST_OBJECTS:
+ case GF_QUOTA_OPTION_TYPE_LIMIT_USAGE:
+ case GF_QUOTA_OPTION_TYPE_LIMIT_OBJECTS:
+ case GF_QUOTA_OPTION_TYPE_REMOVE:
+ case GF_QUOTA_OPTION_TYPE_REMOVE_OBJECTS:
+ /* Quota auxiliary mount is needed by CLI
+ * for list command and need by glusterd for
+ * setting/removing limit
+ */
+ if (is_origin_glusterd (dict)) {
+ ret = glusterd_create_quota_auxiliary_mount (this,
+ volname);
+ if (ret) {
+ *op_errstr = gf_strdup ("Failed to start aux "
+ "mount");
+ goto out;
+ }
+ }
+ break;
+ }
+
+ switch (type) {
+ case GF_QUOTA_OPTION_TYPE_LIMIT_USAGE:
+ ret = dict_get_str (dict, "hard-limit", &hard_limit_str);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get hard-limit from dict");
+ goto out;
+ }
+ ret = gf_string2bytesize_int64 (hard_limit_str, &hard_limit);
+ if (ret) {
+ if (errno == ERANGE || hard_limit < 0)
+ gf_asprintf (op_errstr, "Hard-limit "
+ "value out of range (0 - %"PRId64
+ "): %s", hard_limit_str);
+ else
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_CONVERSION_FAILED,
+ "Failed to convert hard-limit "
+ "string to value");
+ goto out;
+ }
+ get_gfid = _gf_true;
+ break;
+ case GF_QUOTA_OPTION_TYPE_LIMIT_OBJECTS:
+ get_gfid = _gf_true;
+ break;
+
+ case GF_QUOTA_OPTION_TYPE_REMOVE:
+ case GF_QUOTA_OPTION_TYPE_REMOVE_OBJECTS:
+ get_gfid = _gf_true;
+ break;
+
+ case GF_QUOTA_OPTION_TYPE_SOFT_TIMEOUT:
+ case GF_QUOTA_OPTION_TYPE_HARD_TIMEOUT:
+ case GF_QUOTA_OPTION_TYPE_ALERT_TIME:
+ case GF_QUOTA_OPTION_TYPE_DEFAULT_SOFT_LIMIT:
+ ret = _glusterd_validate_quota_opts (dict, type, op_errstr);
+ if (ret)
+ goto out;
+ break;
+
+ default:
+ break;
+ }
+
+ if (get_gfid == _gf_true) {
+ ret = glusterd_get_gfid_from_brick (dict, volinfo, rsp_dict,
+ op_errstr);
+ if (ret)
+ goto out;
+ }
+
+ ret = 0;
+
+ out:
+ if (ret && op_errstr && *op_errstr)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_OP_STAGE_QUOTA_FAIL, "%s", *op_errstr);
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-quotad-svc.c b/xlators/mgmt/glusterd/src/glusterd-quotad-svc.c
new file mode 100644
index 00000000000..f3475a3f0ec
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-quotad-svc.c
@@ -0,0 +1,222 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "globals.h"
+#include "run.h"
+#include "glusterd.h"
+#include "glusterd-utils.h"
+#include "glusterd-volgen.h"
+#include "glusterd-quotad-svc.h"
+#include "glusterd-messages.h"
+#include "glusterd-svc-helper.h"
+
+char *quotad_svc_name = "quotad";
+
+void
+glusterd_quotadsvc_build (glusterd_svc_t *svc)
+{
+ svc->manager = glusterd_quotadsvc_manager;
+ svc->start = glusterd_quotadsvc_start;
+ svc->stop = glusterd_svc_stop;
+}
+
+int glusterd_quotadsvc_init (glusterd_svc_t *svc)
+{
+ int ret = -1;
+ char volfile[PATH_MAX] = {0,};
+ glusterd_conf_t *conf = THIS->private;
+
+ ret = glusterd_svc_init (svc, quotad_svc_name);
+ if (ret)
+ goto out;
+
+out:
+ return ret;
+}
+
+static int
+glusterd_quotadsvc_create_volfile ()
+{
+ char filepath[PATH_MAX] = {0,};
+ glusterd_conf_t *conf = THIS->private;
+
+ glusterd_svc_build_volfile_path (quotad_svc_name, conf->workdir,
+ filepath, sizeof (filepath));
+ return glusterd_create_global_volfile (build_quotad_graph,
+ filepath, NULL);
+}
+
+int
+glusterd_quotadsvc_manager (glusterd_svc_t *svc, void *data, int flags)
+{
+ int ret = 0;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ if (!svc->inited) {
+ ret = glusterd_quotadsvc_init (svc);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_FAILED_INIT_QUOTASVC, "Failed to init "
+ "quotad service");
+ goto out;
+ } else {
+ svc->inited = _gf_true;
+ gf_msg_debug (THIS->name, 0, "quotad service "
+ "initialized");
+ }
+ }
+
+ volinfo = data;
+
+ /* If all the volumes are stopped or all shd compatible volumes
+ * are stopped then stop the service if:
+ * - volinfo is NULL or
+ * - volinfo is present and volume is shd compatible
+ * Otherwise create volfile and restart service if:
+ * - volinfo is NULL or
+ * - volinfo is present and volume is shd compatible
+ */
+ if (glusterd_are_all_volumes_stopped () ||
+ glusterd_all_volumes_with_quota_stopped ()) {
+ if (!(volinfo && !glusterd_is_volume_quota_enabled (volinfo))) {
+ ret = svc->stop (svc, SIGTERM);
+ }
+ } else {
+ if (!(volinfo && !glusterd_is_volume_quota_enabled (volinfo))) {
+ ret = glusterd_quotadsvc_create_volfile ();
+ if (ret)
+ goto out;
+
+ ret = svc->stop (svc, SIGTERM);
+ if (ret)
+ goto out;
+
+ ret = svc->start (svc, flags);
+ if (ret)
+ goto out;
+
+ ret = glusterd_conn_connect (&(svc->conn));
+ if (ret)
+ goto out;
+ }
+ }
+out:
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+int
+glusterd_quotadsvc_start (glusterd_svc_t *svc, int flags)
+{
+ int i = 0;
+ int ret = -1;
+ dict_t *cmdline = NULL;
+ char key[16] = {0};
+ char *options[] = {
+ "*replicate*.entry-self-heal=off",
+ "--xlator-option",
+ "*replicate*.metadata-self-heal=off",
+ "--xlator-option",
+ "*replicate*.data-self-heal=off",
+ "--xlator-option",
+ NULL
+ };
+
+ cmdline = dict_new ();
+ if (!cmdline)
+ goto out;
+
+ for (i = 0; options[i]; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "arg%d", i);
+ ret = dict_set_str (cmdline, key, options[i]);
+ if (ret)
+ goto out;
+ }
+
+ ret = glusterd_svc_start (svc, flags, cmdline);
+
+out:
+ if (cmdline)
+ dict_unref (cmdline);
+
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+int
+glusterd_quotadsvc_reconfigure ()
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ gf_boolean_t identical = _gf_false;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO (this->name, this, out);
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, priv, out);
+
+ if (glusterd_all_volumes_with_quota_stopped ())
+ goto manager;
+
+ /*
+ * Check both OLD and NEW volfiles, if they are SAME by size
+ * and cksum i.e. "character-by-character". If YES, then
+ * NOTHING has been changed, just return.
+ */
+ ret = glusterd_svc_check_volfile_identical (priv->quotad_svc.name,
+ build_quotad_graph,
+ &identical);
+ if (ret)
+ goto out;
+
+ if (identical) {
+ ret = 0;
+ goto out;
+ }
+
+ /*
+ * They are not identical. Find out if the topology is changed
+ * OR just the volume options. If just the options which got
+ * changed, then inform the xlator to reconfigure the options.
+ */
+ identical = _gf_false; /* RESET the FLAG */
+ ret = glusterd_svc_check_topology_identical (priv->quotad_svc.name,
+ build_quotad_graph,
+ &identical);
+ if (ret)
+ goto out;
+
+ /* Topology is not changed, but just the options. But write the
+ * options to quotad volfile, so that quotad will be reconfigured.
+ */
+ if (identical) {
+ ret = glusterd_quotadsvc_create_volfile ();
+ if (ret == 0) {/* Only if above PASSES */
+ ret = glusterd_fetchspec_notify (THIS);
+ }
+ goto out;
+ }
+manager:
+ /*
+ * quotad volfile's topology has been changed. quotad server needs
+ * to be RESTARTED to ACT on the changed volfile.
+ */
+ ret = priv->quotad_svc.manager (&(priv->quotad_svc), NULL,
+ PROC_START_NO_WAIT);
+
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-quotad-svc.h b/xlators/mgmt/glusterd/src/glusterd-quotad-svc.h
new file mode 100644
index 00000000000..91da50dc36d
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-quotad-svc.h
@@ -0,0 +1,31 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERD_QUOTAD_SVC_H_
+#define _GLUSTERD_QUOTAD_SVC_H_
+
+#include "glusterd-svc-mgmt.h"
+
+void
+glusterd_quotadsvc_build (glusterd_svc_t *svc);
+
+int
+glusterd_quotadsvc_init (glusterd_svc_t *svc);
+
+int
+glusterd_quotadsvc_start (glusterd_svc_t *svc, int flags);
+
+int
+glusterd_quotadsvc_manager (glusterd_svc_t *svc, void *data, int flags);
+
+int
+glusterd_quotadsvc_reconfigure ();
+
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-rcu.h b/xlators/mgmt/glusterd/src/glusterd-rcu.h
new file mode 100644
index 00000000000..15beac5a745
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-rcu.h
@@ -0,0 +1,36 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERD_RCU_H
+#define _GLUSTERD_RCU_H
+
+#include <urcu-bp.h>
+#include <urcu/rculist.h>
+#include <urcu/compiler.h>
+#include <urcu/uatomic.h>
+#include <urcu-call-rcu.h>
+
+#ifdef URCU_OLD
+#include "rculist-extra.h"
+#endif
+
+#include "xlator.h"
+
+/* gd_rcu_head is a composite struct, composed of struct rcu_head and a this
+ * pointer, which is used to pass the THIS pointer to call_rcu callbacks.
+ *
+ * Use this in place of struct rcu_head when embedding into another struct
+ */
+typedef struct glusterd_rcu_head_ {
+ struct rcu_head head;
+ xlator_t *this;
+} gd_rcu_head;
+
+#endif /* _GLUSTERD_RCU_H */
diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
new file mode 100644
index 00000000000..35fa4627d04
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
@@ -0,0 +1,1134 @@
+/*
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <inttypes.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/resource.h>
+#include <sys/statvfs.h>
+
+#include "globals.h"
+#include "compat.h"
+#include "protocol-common.h"
+#include "xlator.h"
+#include "logging.h"
+#include "timer.h"
+#include "glusterd-mem-types.h"
+#include "glusterd.h"
+#include "glusterd-sm.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-utils.h"
+#include "glusterd-messages.h"
+#include "glusterd-store.h"
+#include "run.h"
+#include "glusterd-volgen.h"
+#include "glusterd-messages.h"
+
+#include "syscall.h"
+#include "cli1-xdr.h"
+#include "xdr-generic.h"
+
+int32_t
+glusterd_brick_op_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe);
+int
+glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr,
+ size_t len, glusterd_op_t op)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ /* Check only if operation is not remove-brick */
+ if ((GD_OP_REMOVE_BRICK != op) &&
+ !gd_is_remove_brick_committed (volinfo)) {
+ gf_msg_debug (this->name, 0, "A remove-brick task on "
+ "volume %s is not yet committed", volinfo->volname);
+ snprintf (op_errstr, len, "A remove-brick task on volume %s is"
+ " not yet committed. Either commit or stop the "
+ "remove-brick task.", volinfo->volname);
+ goto out;
+ }
+
+ if (glusterd_is_defrag_on (volinfo)) {
+ gf_msg_debug (this->name, 0,
+ "rebalance on volume %s already started",
+ volinfo->volname);
+ snprintf (op_errstr, len, "Rebalance on %s is already started",
+ volinfo->volname);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+
+int32_t
+__glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data)
+{
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_defrag_info_t *defrag = NULL;
+ int ret = 0;
+ char pidfile[PATH_MAX];
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ if (!this)
+ return 0;
+
+ priv = this->private;
+ if (!priv)
+ return 0;
+
+ volinfo = mydata;
+ if (!volinfo)
+ return 0;
+
+ defrag = volinfo->rebal.defrag;
+ if (!defrag)
+ return 0;
+
+ if ((event == RPC_CLNT_DISCONNECT) && defrag->connected)
+ volinfo->rebal.defrag = NULL;
+
+ GLUSTERD_GET_DEFRAG_PID_FILE(pidfile, volinfo, priv);
+
+ switch (event) {
+ case RPC_CLNT_CONNECT:
+ {
+ if (defrag->connected)
+ return 0;
+
+ LOCK (&defrag->lock);
+ {
+ defrag->connected = 1;
+ }
+ UNLOCK (&defrag->lock);
+
+ gf_msg_debug (this->name, 0, "%s got RPC_CLNT_CONNECT",
+ rpc->conn.name);
+ break;
+ }
+
+ case RPC_CLNT_DISCONNECT:
+ {
+ if (!defrag->connected)
+ return 0;
+
+ LOCK (&defrag->lock);
+ {
+ defrag->connected = 0;
+ }
+ UNLOCK (&defrag->lock);
+
+ if (!gf_is_service_running (pidfile, NULL)) {
+ if (volinfo->rebal.defrag_status ==
+ GF_DEFRAG_STATUS_STARTED) {
+ volinfo->rebal.defrag_status =
+ GF_DEFRAG_STATUS_FAILED;
+ }
+ }
+
+ glusterd_store_perform_node_state_store (volinfo);
+
+ rpc_clnt_reconnect_cleanup (&defrag->rpc->conn);
+ glusterd_defrag_rpc_put (defrag);
+ if (defrag->cbk_fn)
+ defrag->cbk_fn (volinfo,
+ volinfo->rebal.defrag_status);
+
+ GF_FREE (defrag);
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_REBALANCE_DISCONNECTED,
+ "Rebalance process for volume %s has disconnected.",
+ volinfo->volname);
+ break;
+ }
+ case RPC_CLNT_DESTROY:
+ glusterd_volinfo_unref (volinfo);
+ break;
+ default:
+ gf_msg_trace (this->name, 0,
+ "got some other RPC event %d", event);
+ ret = 0;
+ break;
+ }
+
+ return ret;
+}
+
+int32_t
+glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data)
+{
+ return glusterd_big_locked_notify (rpc, mydata, event,
+ data, __glusterd_defrag_notify);
+}
+
+int
+glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
+ size_t len, int cmd, defrag_cbk_fn_t cbk,
+ glusterd_op_t op)
+{
+ int ret = -1;
+ glusterd_defrag_info_t *defrag = NULL;
+ runner_t runner = {0,};
+ glusterd_conf_t *priv = NULL;
+ char defrag_path[PATH_MAX];
+ char sockfile[PATH_MAX] = {0,};
+ char pidfile[PATH_MAX] = {0,};
+ char logfile[PATH_MAX] = {0,};
+ char volname[PATH_MAX] = {0,};
+ char valgrind_logfile[PATH_MAX] = {0,};
+ char *volfileserver = NULL;
+
+ priv = THIS->private;
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (op_errstr);
+
+
+ ret = glusterd_defrag_start_validate (volinfo, op_errstr, len, op);
+ if (ret)
+ goto out;
+ if (!volinfo->rebal.defrag)
+ volinfo->rebal.defrag =
+ GF_CALLOC (1, sizeof (*volinfo->rebal.defrag),
+ gf_gld_mt_defrag_info);
+ if (!volinfo->rebal.defrag)
+ goto out;
+
+ defrag = volinfo->rebal.defrag;
+
+ defrag->cmd = cmd;
+
+ volinfo->rebal.defrag_cmd = cmd;
+ volinfo->rebal.op = op;
+
+ LOCK_INIT (&defrag->lock);
+
+ volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_STARTED;
+
+ glusterd_volinfo_reset_defrag_stats (volinfo);
+ glusterd_store_perform_node_state_store (volinfo);
+
+ GLUSTERD_GET_DEFRAG_DIR (defrag_path, volinfo, priv);
+ ret = mkdir_p (defrag_path, 0777, _gf_true);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, errno,
+ GD_MSG_CREATE_DIR_FAILED, "Failed to create "
+ "directory %s", defrag_path);
+ goto out;
+ }
+
+ GLUSTERD_GET_DEFRAG_SOCK_FILE (sockfile, volinfo);
+ GLUSTERD_GET_DEFRAG_PID_FILE (pidfile, volinfo, priv);
+ snprintf (logfile, PATH_MAX, "%s/%s-%s.log",
+ DEFAULT_LOG_FILE_DIRECTORY, volinfo->volname,
+ (cmd == GF_DEFRAG_CMD_START_TIER ? "tier":"rebalance"));
+ runinit (&runner);
+
+ if (priv->valgrind) {
+ snprintf (valgrind_logfile, PATH_MAX,
+ "%s/valgrind-%s-rebalance.log",
+ DEFAULT_LOG_FILE_DIRECTORY,
+ volinfo->volname);
+
+ runner_add_args (&runner, "valgrind", "--leak-check=full",
+ "--trace-children=yes", "--track-origins=yes",
+ NULL);
+ runner_argprintf (&runner, "--log-file=%s", valgrind_logfile);
+ }
+
+ snprintf (volname, sizeof(volname), "rebalance/%s", volinfo->volname);
+
+ if (dict_get_str (THIS->options, "transport.socket.bind-address",
+ &volfileserver) == 0) {
+ /*In the case of running multiple glusterds on a single machine,
+ *we should ensure that log file and unix socket file shouls be
+ *unique in given cluster */
+
+ GLUSTERD_GET_DEFRAG_SOCK_FILE_OLD (sockfile, volinfo,
+ priv);
+ snprintf (logfile, PATH_MAX, "%s/%s-%s-%s.log",
+ DEFAULT_LOG_FILE_DIRECTORY, volinfo->volname,
+ (cmd == GF_DEFRAG_CMD_START_TIER ?
+ "tier":"rebalance"),
+ uuid_utoa(MY_UUID));
+
+ } else {
+ volfileserver = "localhost";
+ }
+
+ runner_add_args (&runner, SBIN_DIR"/glusterfs",
+ "-s", volfileserver, "--volfile-id", volname,
+ "--xlator-option", "*dht.use-readdirp=yes",
+ "--xlator-option", "*dht.lookup-unhashed=yes",
+ "--xlator-option", "*dht.assert-no-child-down=yes",
+ "--xlator-option", "*replicate*.data-self-heal=off",
+ "--xlator-option",
+ "*replicate*.metadata-self-heal=off",
+ "--xlator-option", "*replicate*.entry-self-heal=off",
+ "--xlator-option", "*dht.readdir-optimize=on",
+ NULL);
+
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
+ runner_add_arg (&runner, "--xlator-option");
+ runner_argprintf (&runner,
+ "*tier-dht.xattr-name=trusted.tier.tier-dht");
+ }
+
+ runner_add_arg (&runner, "--xlator-option");
+ runner_argprintf ( &runner, "*dht.rebalance-cmd=%d",cmd);
+ runner_add_arg (&runner, "--xlator-option");
+ runner_argprintf (&runner, "*dht.node-uuid=%s", uuid_utoa(MY_UUID));
+ runner_add_arg (&runner, "--xlator-option");
+ runner_argprintf (&runner, "*dht.commit-hash=%u",
+ volinfo->rebal.commit_hash);
+ runner_add_arg (&runner, "--socket-file");
+ runner_argprintf (&runner, "%s",sockfile);
+ runner_add_arg (&runner, "--pid-file");
+ runner_argprintf (&runner, "%s",pidfile);
+ runner_add_arg (&runner, "-l");
+ runner_argprintf (&runner, logfile);
+ if (volinfo->memory_accounting)
+ runner_add_arg (&runner, "--mem-accounting");
+
+ ret = runner_run_nowait (&runner);
+ if (ret) {
+ gf_msg_debug ("glusterd", 0, "rebalance command failed");
+ goto out;
+ }
+
+ sleep (5);
+
+ ret = glusterd_rebalance_rpc_create (volinfo, _gf_false);
+
+ //FIXME: this cbk is passed as NULL in all occurrences. May be
+ //we never needed it.
+ if (cbk)
+ defrag->cbk_fn = cbk;
+
+out:
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_rebalance_defrag_init (glusterd_volinfo_t *volinfo,
+ defrag_cbk_fn_t cbk)
+
+{
+ glusterd_defrag_info_t *defrag = NULL;
+ int ret = -1;
+
+ if (!volinfo->rebal.defrag) {
+ volinfo->rebal.defrag =
+ GF_CALLOC (1, sizeof (*volinfo->rebal.defrag),
+ gf_gld_mt_defrag_info);
+ } else {
+ /*
+ * if defrag variable is already initialized,
+ * we skip the initialization.
+ */
+ ret = 0;
+ goto out;
+ }
+
+ if (!volinfo->rebal.defrag)
+ goto out;
+ defrag = volinfo->rebal.defrag;
+
+ defrag->cmd = volinfo->rebal.defrag_cmd;
+ LOCK_INIT (&defrag->lock);
+ if (cbk)
+ defrag->cbk_fn = cbk;
+ ret = 0;
+out:
+ return ret;
+
+}
+
+int
+glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo,
+ gf_boolean_t reconnect)
+{
+ dict_t *options = NULL;
+ char sockfile[PATH_MAX] = {0,};
+ int ret = -1;
+ glusterd_defrag_info_t *defrag = volinfo->rebal.defrag;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ struct stat buf = {0,};
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ //rebalance process is not started
+ if (!defrag)
+ goto out;
+
+ //rpc obj for rebalance process already in place.
+ if (glusterd_defrag_rpc_get (defrag)) {
+ ret = 0;
+ glusterd_defrag_rpc_put (defrag);
+ goto out;
+ }
+ GLUSTERD_GET_DEFRAG_SOCK_FILE (sockfile, volinfo);
+ /* If reconnecting check if defrag sockfile exists in the new location
+ * in /var/run/ , if it does not try the old location
+ */
+ if (reconnect) {
+ ret = sys_stat (sockfile, &buf);
+ /* TODO: Remove this once we don't need backward compatibility
+ * with the older path
+ */
+ if (ret && (errno == ENOENT)) {
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ GD_MSG_FILE_OP_FAILED, "Rebalance sockfile "
+ "%s does not exist. Trying old path.",
+ sockfile);
+ GLUSTERD_GET_DEFRAG_SOCK_FILE_OLD (sockfile, volinfo,
+ priv);
+ ret =sys_stat (sockfile, &buf);
+ if (ret && (ENOENT == errno)) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REBAL_NO_SOCK_FILE, "Rebalance "
+ "sockfile %s does not exist", sockfile);
+ goto out;
+ }
+ }
+ }
+
+ /* Setting frame-timeout to 10mins (600seconds).
+ * Unix domain sockets ensures that the connection is reliable. The
+ * default timeout of 30mins used for unreliable network connections is
+ * too long for unix domain socket connections.
+ */
+ ret = rpc_transport_unix_options_build (&options, sockfile, 600);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0, GD_MSG_UNIX_OP_BUILD_FAIL,
+ "Unix options build failed");
+ goto out;
+ }
+
+ glusterd_volinfo_ref (volinfo);
+ ret = glusterd_rpc_create (&defrag->rpc, options,
+ glusterd_defrag_notify, volinfo);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0, GD_MSG_RPC_CREATE_FAIL,
+ "Glusterd RPC creation failed");
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glusterd_rebalance_cmd_validate (int cmd, char *volname,
+ glusterd_volinfo_t **volinfo,
+ char *op_errstr, size_t len)
+{
+ int ret = -1;
+
+ if (glusterd_volinfo_find(volname, volinfo)) {
+ gf_msg ("glusterd", GF_LOG_ERROR, EINVAL,
+ GD_MSG_VOL_NOT_FOUND, "Received rebalance on invalid"
+ " volname %s", volname);
+ snprintf (op_errstr, len, "Volume %s does not exist",
+ volname);
+ goto out;
+ }
+ if ((*volinfo)->brick_count <= (*volinfo)->dist_leaf_count) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_DISTRIBUTE, "Volume %s is not a "
+ "distribute type or contains only 1 brick", volname);
+ snprintf (op_errstr, len, "Volume %s is not a distribute "
+ "volume or contains only 1 brick.\n"
+ "Not performing rebalance", volname);
+ goto out;
+ }
+
+ if ((*volinfo)->status != GLUSTERD_STATUS_STARTED) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_VOL_STOPPED, "Received rebalance on stopped"
+ " volname %s", volname);
+ snprintf (op_errstr, len, "Volume %s needs to "
+ "be started to perform rebalance", volname);
+ goto out;
+ }
+
+ ret = glusterd_disallow_op_for_tier (*volinfo, GD_OP_REBALANCE, cmd);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_REBALANCE_CMD_IN_TIER_VOL,
+ "Received rebalance command "
+ "on Tier volume %s", volname);
+ snprintf (op_errstr, len, "Rebalance operations are not "
+ "supported on a tiered volume");
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+__glusterd_handle_defrag_volume (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,}};
+ glusterd_conf_t *priv = NULL;
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ gf_cli_defrag_type cmd = 0;
+ char msg[2048] = {0,};
+ xlator_t *this = NULL;
+
+ GF_ASSERT (req);
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL, "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (msg, sizeof (msg), "Unable to decode the "
+ "command");
+ goto out;
+ }
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Failed to get volume name");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "%s", msg);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "rebalance-command", (int32_t*)&cmd);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Failed to get command");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "%s", msg);
+ goto out;
+ }
+
+ ret = dict_set_static_bin (dict, "node-uuid", MY_UUID, 16);
+ if (ret)
+ goto out;
+
+ if ((cmd == GF_DEFRAG_CMD_STATUS) ||
+ (cmd == GF_DEFRAG_CMD_STATUS_TIER) ||
+ (cmd == GF_DEFRAG_CMD_STOP_DETACH_TIER) ||
+ (cmd == GF_DEFRAG_CMD_STOP) ||
+ (cmd == GF_DEFRAG_CMD_DETACH_STATUS)) {
+ ret = glusterd_op_begin (req, GD_OP_DEFRAG_BRICK_VOLUME,
+ dict, msg, sizeof (msg));
+ } else
+ ret = glusterd_op_begin (req, GD_OP_REBALANCE, dict,
+ msg, sizeof (msg));
+
+out:
+
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+
+ if (ret) {
+ if (msg[0] == '\0')
+ snprintf (msg, sizeof (msg), "Operation failed");
+ ret = glusterd_op_send_cli_response (GD_OP_REBALANCE, ret, 0,
+ req, dict, msg);
+
+ }
+
+ free (cli_req.dict.dict_val);//malloced by xdr
+
+ return 0;
+}
+
+int
+glusterd_handle_defrag_volume (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_defrag_volume);
+}
+
+static int
+glusterd_brick_validation (dict_t *dict, char *key, data_t *value,
+ void *data)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *volinfo = data;
+ glusterd_brickinfo_t *brickinfo = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = glusterd_volume_brickinfo_get_by_brick (value->data, volinfo,
+ &brickinfo,
+ _gf_false);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_BRICK_NOT_FOUND,
+ "Incorrect brick %s for "
+ "volume %s", value->data, volinfo->volname);
+ return ret;
+ }
+
+ if (!brickinfo->decommissioned) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_BRICK_NOT_FOUND, "Incorrect brick %s for "
+ "volume %s", value->data, volinfo->volname);
+ ret = -1;
+ return ret;
+ }
+
+ return ret;
+}
+
+int
+glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr)
+{
+ char *volname = NULL;
+ char *cmd_str = NULL;
+ int ret = 0;
+ int32_t cmd = 0;
+ char msg[2048] = {0};
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ char *task_id_str = NULL;
+ dict_t *op_ctx = NULL;
+ xlator_t *this = 0;
+ int32_t is_force = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "volname not found");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "rebalance-command", &cmd);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "cmd not found");
+ goto out;
+ }
+
+ ret = glusterd_rebalance_cmd_validate (cmd, volname, &volinfo,
+ msg, sizeof (msg));
+ if (ret) {
+ gf_msg_debug (this->name, 0, "failed to validate");
+ goto out;
+ }
+ switch (cmd) {
+ case GF_DEFRAG_CMD_START_TIER:
+ ret = dict_get_int32 (dict, "force", &is_force);
+ if (ret)
+ is_force = 0;
+
+ if (volinfo->type != GF_CLUSTER_TYPE_TIER) {
+ gf_asprintf (op_errstr, "volume %s is not a tier "
+ "volume.", volinfo->volname);
+ ret = -1;
+ goto out;
+ }
+ if ((!is_force) && glusterd_is_tier_daemon_running (volinfo)) {
+ ret = gf_asprintf (op_errstr, "A Tier daemon is "
+ "already running on volume %s",
+ volname);
+ ret = -1;
+ goto out;
+ }
+ case GF_DEFRAG_CMD_START:
+ case GF_DEFRAG_CMD_START_LAYOUT_FIX:
+ /* Check if the connected clients are all of version
+ * glusterfs-3.6 and higher. This is needed to prevent some data
+ * loss issues that could occur when older clients are connected
+ * when rebalance is run. This check can be bypassed by using
+ * 'force'
+ */
+ ret = glusterd_check_client_op_version_support
+ (volname, GD_OP_VERSION_3_6_0, NULL);
+ if (ret) {
+ ret = gf_asprintf (op_errstr, "Volume %s has one or "
+ "more connected clients of a version"
+ " lower than GlusterFS-v3.6.0. "
+ "Starting rebalance in this state "
+ "could lead to data loss.\nPlease "
+ "disconnect those clients before "
+ "attempting this command again.",
+ volname);
+ goto out;
+ }
+
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks,
+ brick_list) {
+ if (glusterd_is_local_brick (THIS, volinfo, brickinfo)) {
+ if (brickinfo->status != GF_BRICK_STARTED) {
+ gf_asprintf (op_errstr, "Received"
+ " rebalance on volume with "
+ " stopped brick %s",
+ brickinfo->path);
+ ret = -1;
+ goto out;
+ }
+ } else {
+ rcu_read_lock ();
+ peerinfo = glusterd_peerinfo_find_by_uuid
+ (brickinfo->uuid);
+ if (!peerinfo) {
+ gf_asprintf (op_errstr, "Host node %s "
+ "of brick %s doesn't "
+ "belong to cluster",
+ brickinfo->hostname,
+ brickinfo->path);
+ ret = -1;
+ rcu_read_unlock ();
+ goto out;
+ } else if (!peerinfo->connected) {
+ gf_asprintf (op_errstr, "Host node %s "
+ "of brick %s is down",
+ brickinfo->hostname,
+ brickinfo->path);
+ ret = -1;
+ rcu_read_unlock ();
+ goto out;
+ }
+ rcu_read_unlock ();
+ }
+ }
+
+ case GF_DEFRAG_CMD_START_FORCE:
+ if (is_origin_glusterd (dict)) {
+ op_ctx = glusterd_op_get_ctx ();
+ if (!op_ctx) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_OPCTX_GET_FAIL,
+ "Failed to get op_ctx");
+ goto out;
+ }
+
+ ret = glusterd_generate_and_set_task_id
+ (op_ctx, GF_REBALANCE_TID_KEY);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TASKID_GEN_FAIL,
+ "Failed to generate task-id");
+ goto out;
+ }
+ } else {
+ ret = dict_get_str (dict, GF_REBALANCE_TID_KEY,
+ &task_id_str);
+ if (ret) {
+ snprintf (msg, sizeof (msg),
+ "Missing rebalance-id");
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_REBALANCE_ID_MISSING, "%s", msg);
+ ret = 0;
+ }
+ }
+ ret = glusterd_defrag_start_validate (volinfo, msg,
+ sizeof (msg),
+ GD_OP_REBALANCE);
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "start validate failed");
+ goto out;
+ }
+ break;
+ case GF_DEFRAG_CMD_STATUS_TIER:
+ case GF_DEFRAG_CMD_STATUS:
+ case GF_DEFRAG_CMD_STOP:
+
+ ret = dict_get_str (dict, "cmd-str", &cmd_str);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to get "
+ "command string");
+ ret = -1;
+ goto out;
+ }
+ if ((strstr(cmd_str, "rebalance") != NULL) &&
+ (volinfo->rebal.op != GD_OP_REBALANCE)) {
+ snprintf (msg, sizeof(msg), "Rebalance not started.");
+ ret = -1;
+ goto out;
+ }
+
+ if (strstr(cmd_str, "remove-brick") != NULL) {
+ if (volinfo->rebal.op != GD_OP_REMOVE_BRICK) {
+ snprintf (msg, sizeof(msg), "remove-brick not "
+ "started.");
+ ret = -1;
+ goto out;
+ }
+
+ /* For remove-brick status/stop command check whether
+ * given input brick is part of volume or not.*/
+
+ ret = dict_foreach_fnmatch (dict, "brick*",
+ glusterd_brick_validation,
+ volinfo);
+ if (ret == -1) {
+ snprintf (msg, sizeof (msg), "Incorrect brick"
+ " for volume %s", volinfo->volname);
+ goto out;
+ }
+ }
+ if (cmd == GF_DEFRAG_CMD_STATUS_TIER) {
+ if (volinfo->type != GF_CLUSTER_TYPE_TIER) {
+ snprintf (msg, sizeof(msg), "volume %s is not "
+ "a tier volume.", volinfo->volname);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ break;
+
+ case GF_DEFRAG_CMD_STOP_DETACH_TIER:
+ case GF_DEFRAG_CMD_DETACH_STATUS:
+ if (volinfo->type != GF_CLUSTER_TYPE_TIER) {
+ snprintf (msg, sizeof(msg), "volume %s is not "
+ "a tier volume.", volinfo->volname);
+ ret = -1;
+ goto out;
+ }
+
+ if (volinfo->rebal.op != GD_OP_REMOVE_BRICK) {
+ snprintf (msg, sizeof(msg), "Detach-tier "
+ "not started");
+ ret = -1;
+ goto out;
+ }
+ break;
+ default:
+ break;
+ }
+
+ ret = 0;
+out:
+ if (ret && op_errstr && msg[0])
+ *op_errstr = gf_strdup (msg);
+
+ return ret;
+}
+
+int
+glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+{
+ char *volname = NULL;
+ int ret = 0;
+ int32_t cmd = 0;
+ char msg[2048] = {0};
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *tmp = NULL;
+ gf_boolean_t volfile_update = _gf_false;
+ char *task_id_str = NULL;
+ dict_t *ctx = NULL;
+ xlator_t *this = NULL;
+ uint32_t commit_hash;
+ int32_t is_force = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "volname not given");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "rebalance-command", &cmd);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "command not given");
+ goto out;
+ }
+
+
+ ret = glusterd_rebalance_cmd_validate (cmd, volname, &volinfo,
+ msg, sizeof (msg));
+ if (ret) {
+ gf_msg_debug (this->name, 0, "cmd validate failed");
+ goto out;
+ }
+
+ /* Set task-id, if available, in op_ctx dict for operations other than
+ * start
+ */
+ if (cmd == GF_DEFRAG_CMD_STATUS ||
+ cmd == GF_DEFRAG_CMD_STOP ||
+ cmd == GF_DEFRAG_CMD_STATUS_TIER) {
+ if (!gf_uuid_is_null (volinfo->rebal.rebalance_id)) {
+ ctx = glusterd_op_get_ctx ();
+ if (!ctx) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_OPCTX_GET_FAIL,
+ "Failed to get op_ctx");
+ ret = -1;
+ goto out;
+ }
+
+ if (GD_OP_REMOVE_BRICK == volinfo->rebal.op)
+ ret = glusterd_copy_uuid_to_dict
+ (volinfo->rebal.rebalance_id, ctx,
+ GF_REMOVE_BRICK_TID_KEY);
+ else
+ ret = glusterd_copy_uuid_to_dict
+ (volinfo->rebal.rebalance_id, ctx,
+ GF_REBALANCE_TID_KEY);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TASKID_GEN_FAIL,
+ "Failed to set task-id");
+ goto out;
+ }
+ }
+ }
+
+ switch (cmd) {
+ case GF_DEFRAG_CMD_START:
+ case GF_DEFRAG_CMD_START_LAYOUT_FIX:
+ case GF_DEFRAG_CMD_START_FORCE:
+ case GF_DEFRAG_CMD_START_TIER:
+
+
+ ret = dict_get_int32 (dict, "force", &is_force);
+ if (ret)
+ is_force = 0;
+ if (!is_force) {
+ /* Reset defrag status to 'NOT STARTED' whenever a
+ * remove-brick/rebalance command is issued to remove
+ * stale information from previous run.
+ */
+ volinfo->rebal.defrag_status =
+ GF_DEFRAG_STATUS_NOT_STARTED;
+
+ ret = dict_get_str (dict, GF_REBALANCE_TID_KEY,
+ &task_id_str);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Missing rebalance"
+ " id");
+ ret = 0;
+ } else {
+ gf_uuid_parse (task_id_str,
+ volinfo->rebal.rebalance_id);
+ volinfo->rebal.op = GD_OP_REBALANCE;
+ }
+ if (!gd_should_i_start_rebalance (volinfo)) {
+ /* Store the rebalance-id and rebalance command
+ * even if the peer isn't starting a rebalance
+ * process. On peers where a rebalance process
+ * is started, glusterd_handle_defrag_start
+ * performs the storing.
+ * Storing this is needed for having
+ * 'volume status' work correctly.
+ */
+ glusterd_store_perform_node_state_store
+ (volinfo);
+ break;
+ }
+ if (dict_get_uint32 (dict, "commit-hash", &commit_hash)
+ == 0) {
+ volinfo->rebal.commit_hash = commit_hash;
+ }
+ ret = glusterd_handle_defrag_start (volinfo, msg,
+ sizeof (msg),
+ cmd, NULL, GD_OP_REBALANCE);
+ break;
+ } else {
+ /* Reset defrag status to 'STARTED' so that the
+ * pid is checked and restarted accordingly.
+ * If the pid is not running it executes the
+ * "NOT_STARTED" case and restarts the process
+ */
+ volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_STARTED;
+ volinfo->rebal.defrag_cmd = cmd;
+ volinfo->rebal.op = GD_OP_REBALANCE;
+
+ ret = dict_get_str (dict, GF_REBALANCE_TID_KEY,
+ &task_id_str);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Missing rebalance"
+ " id");
+ ret = 0;
+ } else {
+ gf_uuid_parse (task_id_str,
+ volinfo->rebal.rebalance_id);
+ volinfo->rebal.op = GD_OP_REBALANCE;
+ }
+ if (dict_get_uint32 (dict, "commit-hash", &commit_hash)
+ == 0) {
+ volinfo->rebal.commit_hash = commit_hash;
+ }
+ ret = glusterd_restart_rebalance_for_volume (volinfo);
+ break;
+ }
+ case GF_DEFRAG_CMD_STOP:
+ case GF_DEFRAG_CMD_STOP_DETACH_TIER:
+ /* Clear task-id only on explicitly stopping rebalance.
+ * Also clear the stored operation, so it doesn't cause trouble
+ * with future rebalance/remove-brick starts
+ */
+ gf_uuid_clear (volinfo->rebal.rebalance_id);
+ volinfo->rebal.op = GD_OP_NONE;
+
+ /* Fall back to the old volume file in case of decommission*/
+ cds_list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks,
+ brick_list) {
+ if (!brickinfo->decommissioned)
+ continue;
+ brickinfo->decommissioned = 0;
+ volfile_update = _gf_true;
+ }
+
+ if (volfile_update == _gf_false) {
+ ret = 0;
+ break;
+ }
+
+ ret = glusterd_create_volfiles_and_notify_services (volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_VOLFILE_CREATE_FAIL,
+ "failed to create volfiles");
+ goto out;
+ }
+
+ ret = glusterd_store_volinfo (volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_VOLINFO_SET_FAIL,
+ "failed to store volinfo");
+ goto out;
+ }
+
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER &&
+ cmd == GF_OP_CMD_STOP_DETACH_TIER) {
+ glusterd_defrag_info_set (volinfo, dict,
+ GF_DEFRAG_CMD_START_TIER,
+ GF_DEFRAG_CMD_START,
+ GD_OP_REBALANCE);
+ glusterd_restart_rebalance_for_volume (volinfo);
+ }
+
+ ret = 0;
+ break;
+
+ case GF_DEFRAG_CMD_START_DETACH_TIER:
+ case GF_DEFRAG_CMD_STATUS:
+ case GF_DEFRAG_CMD_STATUS_TIER:
+ break;
+ default:
+ break;
+ }
+
+out:
+ if (ret && op_errstr && msg[0])
+ *op_errstr = gf_strdup (msg);
+
+ return ret;
+}
+
+int32_t
+glusterd_defrag_event_notify_handle (dict_t *dict)
+{
+ glusterd_volinfo_t *volinfo = NULL;
+ char *volname = NULL;
+ char *volname_ptr = NULL;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to get volname");
+ return ret;
+ }
+
+ volname_ptr = strstr (volname, "rebalance/");
+ if (volname_ptr) {
+ volname_ptr = strchr (volname_ptr, '/');
+ if (!volname_ptr) {
+ ret = -1;
+ goto out;
+ }
+ volname = volname_ptr + 1;
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_NO_REBALANCE_PFX_IN_VOLNAME,
+ "volname received (%s) is not prefixed with rebalance.",
+ volname);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLINFO_GET_FAIL,
+ "Failed to get volinfo for %s"
+ , volname);
+ return ret;
+ }
+
+ ret = glusterd_defrag_volume_status_update (volinfo, dict);
+
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DEFRAG_STATUS_UPDATE_FAIL,
+ "Failed to update status");
+
+out:
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
new file mode 100644
index 00000000000..2b2120738ca
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
@@ -0,0 +1,905 @@
+/*
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "common-utils.h"
+#include "cli1-xdr.h"
+#include "xdr-generic.h"
+#include "glusterfs.h"
+#include "glusterd.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-geo-rep.h"
+#include "glusterd-store.h"
+#include "glusterd-utils.h"
+#include "glusterd-svc-mgmt.h"
+#include "glusterd-svc-helper.h"
+#include "glusterd-nfs-svc.h"
+#include "glusterd-volgen.h"
+#include "glusterd-messages.h"
+#include "glusterd-mgmt.h"
+#include "run.h"
+#include "syscall.h"
+
+#include <signal.h>
+
+#define GLUSTERD_GET_RB_MNTPT(path, len, volinfo) \
+ snprintf (path, len, \
+ DEFAULT_VAR_RUN_DIRECTORY"/%s-"RB_CLIENT_MOUNTPOINT, \
+ volinfo->volname);
+
+extern uuid_t global_txn_id;
+
+int
+glusterd_mgmt_v3_initiate_replace_brick_cmd_phases (rpcsvc_request_t *req,
+ glusterd_op_t op,
+ dict_t *dict);
+int
+__glusterd_handle_replace_brick (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,}};
+ dict_t *dict = NULL;
+ char *src_brick = NULL;
+ char *dst_brick = NULL;
+ int32_t op = 0;
+ glusterd_op_t cli_op = GD_OP_REPLACE_BRICK;
+ char *volname = NULL;
+ char msg[2048] = {0,};
+ xlator_t *this = NULL;
+
+ GF_ASSERT (req);
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL, "Failed to decode "
+ "request received from cli");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_REPLACE_BRK_REQ_RCVD,
+ "Received replace brick req");
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (msg, sizeof (msg), "Unable to decode the "
+ "command");
+ goto out;
+ }
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Could not get volume name");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "%s", msg);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "operation", &op);
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "dict_get on operation failed");
+ snprintf (msg, sizeof (msg), "Could not get operation");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "src-brick", &src_brick);
+
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Failed to get src brick");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "%s", msg);
+ goto out;
+ }
+ gf_msg_debug (this->name, 0,
+ "src brick=%s", src_brick);
+
+ ret = dict_get_str (dict, "dst-brick", &dst_brick);
+
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Failed to get dest brick");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "%s", msg);
+ goto out;
+ }
+
+ gf_msg_debug (this->name, 0, "dst brick=%s", dst_brick);
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_REPLACE_BRK_COMMIT_FORCE_REQ_RCVD,
+ "Received replace brick commit-force "
+ "request operation");
+
+ ret = glusterd_mgmt_v3_initiate_replace_brick_cmd_phases (req,
+ GD_OP_REPLACE_BRICK, dict);
+
+out:
+ free (cli_req.dict.dict_val);//malloced by xdr
+
+ return ret;
+}
+
+int
+glusterd_handle_replace_brick (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_replace_brick);
+}
+
+static int
+glusterd_get_rb_dst_brickinfo (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t **brickinfo)
+{
+ int32_t ret = -1;
+
+ if (!volinfo || !brickinfo)
+ goto out;
+
+ *brickinfo = volinfo->rep_brick.dst_brick;
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int
+glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int ret = 0;
+ int32_t port = 0;
+ char *src_brick = NULL;
+ char *dst_brick = NULL;
+ char *volname = NULL;
+ char *replace_op = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *src_brickinfo = NULL;
+ char *host = NULL;
+ char *path = NULL;
+ char msg[2048] = {0};
+ char *dup_dstbrick = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_brickinfo_t *dst_brickinfo = NULL;
+ gf_boolean_t enabled = _gf_false;
+ glusterd_conf_t *priv = NULL;
+ char *savetok = NULL;
+ char pidfile[PATH_MAX] = {0};
+ char *task_id_str = NULL;
+ xlator_t *this = NULL;
+ gf_boolean_t is_force = _gf_false;
+ gsync_status_param_t param = {0,};
+ char *c = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_str (dict, "src-brick", &src_brick);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get src brick");
+ goto out;
+ }
+
+ gf_msg_debug (this->name, 0, "src brick=%s", src_brick);
+
+ ret = dict_get_str (dict, "dst-brick", &dst_brick);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get dest brick");
+ goto out;
+ }
+
+ gf_msg_debug (this->name, 0, "dst brick=%s", dst_brick);
+
+ ret = dict_get_str (dict, "volname", &volname);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get volume name");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "operation", &replace_op);
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "dict get on replace-brick operation failed");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "volume: %s does not exist",
+ volname);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ if (GLUSTERD_STATUS_STARTED != volinfo->status) {
+ ret = -1;
+ snprintf (msg, sizeof (msg), "volume: %s is not started",
+ volname);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ ret = glusterd_disallow_op_for_tier (volinfo, GD_OP_REPLACE_BRICK, -1);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Replace brick commands are not "
+ "supported on tiered volume %s", volname);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ if (!glusterd_store_is_valid_brickpath (volname, dst_brick) ||
+ !glusterd_is_valid_volfpath (volname, dst_brick)) {
+ snprintf (msg, sizeof (msg), "brick path %s is too "
+ "long.", dst_brick);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRKPATH_TOO_LONG, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+
+ ret = -1;
+ goto out;
+ }
+
+ /* If geo-rep is configured, for this volume, it should be stopped. */
+ param.volinfo = volinfo;
+ ret = glusterd_check_geo_rep_running (&param, op_errstr);
+ if (ret || param.is_active) {
+ ret = -1;
+ goto out;
+ }
+
+ if (glusterd_is_defrag_on(volinfo)) {
+ snprintf (msg, sizeof(msg), "Volume name %s rebalance is in "
+ "progress. Please retry after completion", volname);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_OIP_RETRY_LATER, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+ goto out;
+ }
+
+ if (!strcmp(replace_op, "GF_REPLACE_OP_COMMIT_FORCE")) {
+ is_force = _gf_true;
+ } else {
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_volume_brickinfo_get_by_brick (src_brick, volinfo,
+ &src_brickinfo,
+ _gf_false);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "brick: %s does not exist in "
+ "volume: %s", src_brick, volname);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ if (dict) {
+ if (!glusterd_is_fuse_available ()) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RB_CMD_FAIL, "Unable to open /dev/"
+ "fuse (%s), replace-brick command failed",
+ strerror (errno));
+ snprintf (msg, sizeof(msg), "Fuse unavailable\n "
+ "Replace-brick failed");
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (gf_is_local_addr (src_brickinfo->hostname)) {
+ gf_msg_debug (this->name, 0,
+ "I AM THE SOURCE HOST");
+ if (src_brickinfo->port && rsp_dict) {
+ ret = dict_set_int32 (rsp_dict, "src-brick-port",
+ src_brickinfo->port);
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "Could not set src-brick-port=%d",
+ src_brickinfo->port);
+ }
+ }
+
+ GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, src_brickinfo,
+ priv);
+
+ }
+
+ dup_dstbrick = gf_strdup (dst_brick);
+ if (!dup_dstbrick) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY, "Memory allocation failed");
+ goto out;
+ }
+
+ /*
+ * IPv4 address contains '.' and ipv6 addresses contains ':'
+ * So finding the last occurance of ':' to
+ * mark the start of brick path
+ */
+ c = strrchr(dup_dstbrick, ':');
+ if (c != NULL) {
+ c[0] = '\0';
+ host = dup_dstbrick;
+ path = c++;
+ }
+
+ if (!host || !path) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BAD_FORMAT,
+ "dst brick %s is not of form <HOSTNAME>:<export-dir>",
+ dst_brick);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_brickinfo_new_from_brick (dst_brick, &dst_brickinfo,
+ _gf_true, NULL);
+ if (ret)
+ goto out;
+
+ ret = glusterd_new_brick_validate (dst_brick, dst_brickinfo,
+ msg, sizeof (msg));
+ if (ret) {
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_VALIDATE_FAIL, "%s", *op_errstr);
+ goto out;
+ }
+
+ if (!strcmp(replace_op, "GF_REPLACE_OP_COMMIT_FORCE")) {
+
+ volinfo->rep_brick.src_brick = src_brickinfo;
+ volinfo->rep_brick.dst_brick = dst_brickinfo;
+ }
+
+ if (glusterd_rb_check_bricks (volinfo, src_brickinfo, dst_brickinfo)) {
+
+ ret = -1;
+ *op_errstr = gf_strdup ("Incorrect source or "
+ "destination brick");
+ if (*op_errstr)
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_BRICK_NOT_FOUND, "%s", *op_errstr);
+ goto out;
+ }
+
+ if (gf_is_local_addr (host)) {
+ ret = glusterd_validate_and_create_brickpath (dst_brickinfo,
+ volinfo->volume_id,
+ op_errstr, is_force);
+ if (ret)
+ goto out;
+ }
+
+ if (!gf_is_local_addr (host)) {
+ rcu_read_lock ();
+
+ peerinfo = glusterd_peerinfo_find (NULL, host);
+ if (peerinfo == NULL) {
+ ret = -1;
+ snprintf (msg, sizeof (msg), "%s, is not a friend",
+ host);
+ *op_errstr = gf_strdup (msg);
+
+ } else if (!peerinfo->connected) {
+ snprintf (msg, sizeof (msg), "%s, is not connected at "
+ "the moment", host);
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+
+ } else if (GD_FRIEND_STATE_BEFRIENDED !=
+ peerinfo->state.state) {
+ snprintf (msg, sizeof (msg), "%s, is not befriended "
+ "at the moment", host);
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+ }
+ rcu_read_unlock ();
+
+ if (ret)
+ goto out;
+
+ } else if (priv->op_version >= GD_OP_VERSION_3_6_0) {
+ /* A bricks mount dir is required only by snapshots which were
+ * introduced in gluster-3.6.0
+ */
+ ret = glusterd_get_brick_mount_dir (dst_brickinfo->path,
+ dst_brickinfo->hostname,
+ dst_brickinfo->mount_dir);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_MOUNTDIR_GET_FAIL,
+ "Failed to get brick mount_dir");
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc (rsp_dict, "brick1.mount_dir",
+ dst_brickinfo->mount_dir);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set brick1.mount_dir");
+ goto out;
+ }
+
+ ret = dict_set_int32 (rsp_dict, "brick_count", 1);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set local_brick_count");
+ goto out;
+ }
+ }
+
+ ret = 0;
+
+out:
+ GF_FREE (dup_dstbrick);
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+rb_kill_destination_brick (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *dst_brickinfo)
+{
+ glusterd_conf_t *priv = NULL;
+ char pidfile[PATH_MAX] = {0,};
+
+ priv = THIS->private;
+
+ snprintf (pidfile, PATH_MAX, "%s/vols/%s/%s",
+ priv->workdir, volinfo->volname,
+ RB_DSTBRICK_PIDFILE);
+
+ return glusterd_service_stop ("brick", pidfile, SIGTERM, _gf_true);
+}
+
+static int
+rb_update_dstbrick_port (glusterd_brickinfo_t *dst_brickinfo, dict_t *rsp_dict,
+ dict_t *req_dict, char *replace_op)
+{
+ int ret = 0;
+ int dict_ret = 0;
+ int dst_port = 0;
+
+ dict_ret = dict_get_int32 (req_dict, "dst-brick-port", &dst_port);
+ if (!dict_ret)
+ dst_brickinfo->port = dst_port;
+
+ if (gf_is_local_addr (dst_brickinfo->hostname)) {
+ gf_msg ("glusterd", GF_LOG_INFO, 0,
+ GD_MSG_BRK_PORT_NO_ADD_INDO,
+ "adding dst-brick port no");
+
+ if (rsp_dict) {
+ ret = dict_set_int32 (rsp_dict, "dst-brick-port",
+ dst_brickinfo->port);
+ if (ret) {
+ gf_msg_debug ("glusterd", 0,
+ "Could not set dst-brick port no in rsp dict");
+ goto out;
+ }
+ }
+
+ if (req_dict) {
+ ret = dict_set_int32 (req_dict, "dst-brick-port",
+ dst_brickinfo->port);
+ if (ret) {
+ gf_msg_debug ("glusterd", 0,
+ "Could not set dst-brick port no");
+ goto out;
+ }
+ }
+ }
+out:
+ return ret;
+}
+
+static int
+glusterd_op_perform_replace_brick (glusterd_volinfo_t *volinfo,
+ char *old_brick, char *new_brick,
+ dict_t *dict)
+{
+ char *brick_mount_dir = NULL;
+ glusterd_brickinfo_t *old_brickinfo = NULL;
+ glusterd_brickinfo_t *new_brickinfo = NULL;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (volinfo);
+
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ ret = glusterd_brickinfo_new_from_brick (new_brick, &new_brickinfo,
+ _gf_true, NULL);
+ if (ret)
+ goto out;
+
+ ret = glusterd_resolve_brick (new_brickinfo);
+
+ if (ret)
+ goto out;
+
+ ret = glusterd_volume_brickinfo_get_by_brick (old_brick,
+ volinfo, &old_brickinfo,
+ _gf_false);
+ if (ret)
+ goto out;
+
+ strncpy (new_brickinfo->brick_id, old_brickinfo->brick_id,
+ sizeof (new_brickinfo->brick_id));
+
+ /* A bricks mount dir is required only by snapshots which were
+ * introduced in gluster-3.6.0
+ */
+ if (conf->op_version >= GD_OP_VERSION_3_6_0) {
+ ret = dict_get_str (dict, "brick1.mount_dir", &brick_mount_dir);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_BRICK_MOUNTDIR_GET_FAIL,
+ "brick1.mount_dir not present");
+ goto out;
+ }
+ strncpy (new_brickinfo->mount_dir, brick_mount_dir,
+ sizeof(new_brickinfo->mount_dir));
+ }
+
+ cds_list_add_tail (&new_brickinfo->brick_list,
+ &old_brickinfo->brick_list);
+
+ volinfo->brick_count++;
+
+ ret = glusterd_op_perform_remove_brick (volinfo, old_brick, 1, NULL);
+ if (ret)
+ goto out;
+
+ /* if the volume is a replicate volume, do: */
+ if (glusterd_is_volume_replicate (volinfo)) {
+ if (!gf_uuid_compare (new_brickinfo->uuid, MY_UUID)) {
+ ret = glusterd_handle_replicate_brick_ops (volinfo,
+ new_brickinfo, GD_OP_REPLACE_BRICK);
+ if (ret < 0)
+ goto out;
+ }
+ }
+
+ ret = glusterd_create_volfiles_and_notify_services (volinfo);
+ if (ret)
+ goto out;
+
+ if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+ ret = glusterd_brick_start (volinfo, new_brickinfo, _gf_false);
+ if (ret)
+ goto out;
+ }
+
+out:
+
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict)
+{
+ int ret = 0;
+ dict_t *ctx = NULL;
+ char *replace_op = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *volname = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ char *src_brick = NULL;
+ char *dst_brick = NULL;
+ glusterd_brickinfo_t *src_brickinfo = NULL;
+ glusterd_brickinfo_t *dst_brickinfo = NULL;
+ char *task_id_str = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_str (dict, "src-brick", &src_brick);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get src brick");
+ goto out;
+ }
+
+ gf_msg_debug (this->name, 0, "src brick=%s", src_brick);
+
+ ret = dict_get_str (dict, "dst-brick", &dst_brick);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get dst brick");
+ goto out;
+ }
+
+ gf_msg_debug (this->name, 0, "dst brick=%s", dst_brick);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get volume name");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "operation", &replace_op);
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "dict_get on operation failed");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY, "Unable to allocate memory");
+ goto out;
+ }
+
+ ret = glusterd_volume_brickinfo_get_by_brick (src_brick, volinfo,
+ &src_brickinfo,
+ _gf_false);
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "Unable to get src-brickinfo");
+ goto out;
+ }
+
+
+ ret = glusterd_get_rb_dst_brickinfo (volinfo, &dst_brickinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RB_BRICKINFO_GET_FAIL, "Unable to get "
+ "replace brick destination brickinfo");
+ goto out;
+ }
+
+ ret = glusterd_resolve_brick (dst_brickinfo);
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "Unable to resolve dst-brickinfo");
+ goto out;
+ }
+
+ ret = rb_update_dstbrick_port (dst_brickinfo, rsp_dict,
+ dict, replace_op);
+ if (ret)
+ goto out;
+
+ if (strcmp(replace_op, "GF_REPLACE_OP_COMMIT_FORCE")) {
+ ret = -1;
+ goto out;
+ }
+
+ if (gf_is_local_addr (dst_brickinfo->hostname)) {
+ gf_msg_debug (this->name, 0, "I AM THE DESTINATION HOST");
+ ret = rb_kill_destination_brick (volinfo, dst_brickinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_BRK_CLEANUP_FAIL,
+ "Unable to cleanup dst brick");
+ goto out;
+ }
+ }
+
+ ret = glusterd_svcs_stop (volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_NFS_SERVER_STOP_FAIL,
+ "Unable to stop nfs server, ret: %d", ret);
+ }
+
+ ret = glusterd_op_perform_replace_brick (volinfo, src_brick,
+ dst_brick, dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_BRICK_ADD_FAIL, "Unable to add dst-brick: "
+ "%s to volume: %s", dst_brick, volinfo->volname);
+ (void) glusterd_svcs_manager (volinfo);
+ goto out;
+ }
+
+ volinfo->rebal.defrag_status = 0;
+
+ ret = glusterd_svcs_manager (volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_NFS_VOL_FILE_GEN_FAIL,
+ "Failed to generate nfs volume file");
+ }
+
+
+ ret = glusterd_fetchspec_notify (THIS);
+ glusterd_brickinfo_delete (volinfo->rep_brick.dst_brick);
+ volinfo->rep_brick.src_brick = NULL;
+ volinfo->rep_brick.dst_brick = NULL;
+
+ if (!ret)
+ ret = glusterd_store_volinfo (volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RBOP_STATE_STORE_FAIL, "Couldn't store"
+ " replace brick operation's state");
+
+out:
+ return ret;
+}
+
+int
+glusterd_mgmt_v3_initiate_replace_brick_cmd_phases (rpcsvc_request_t *req,
+ glusterd_op_t op,
+ dict_t *dict)
+{
+ int32_t ret = -1;
+ int32_t op_ret = -1;
+ uint32_t txn_generation = 0;
+ uint32_t op_errno = 0;
+ char *cli_errstr = NULL;
+ char *op_errstr = NULL;
+ dict_t *req_dict = NULL;
+ dict_t *tmp_dict = NULL;
+ uuid_t *originator_uuid = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ gf_boolean_t success = _gf_false;
+ gf_boolean_t is_acquired = _gf_false;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+ GF_ASSERT (dict);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ txn_generation = conf->generation;
+ originator_uuid = GF_CALLOC (1, sizeof(uuid_t),
+ gf_common_mt_uuid_t);
+ if (!originator_uuid) {
+ ret = -1;
+ goto out;
+ }
+
+ gf_uuid_copy (*originator_uuid, MY_UUID);
+ ret = dict_set_bin (dict, "originator_uuid",
+ originator_uuid, sizeof (uuid_t));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set originator_uuid.");
+ GF_FREE (originator_uuid);
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, "is_synctasked", _gf_true);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set synctasked flag to true.");
+ goto out;
+ }
+
+ tmp_dict = dict_new();
+ if (!tmp_dict) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_CREATE_FAIL, "Unable to create dict");
+ goto out;
+ }
+ dict_copy (dict, tmp_dict);
+
+ ret = glusterd_mgmt_v3_initiate_lockdown (op, dict, &op_errstr,
+ &op_errno, &is_acquired,
+ txn_generation);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_LOCKDOWN_FAIL,
+ "mgmt_v3 lockdown failed.");
+ goto out;
+ }
+
+ ret = glusterd_mgmt_v3_build_payload (&req_dict, &op_errstr, dict, op);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_PAYLOAD_BUILD_FAIL, LOGSTR_BUILD_PAYLOAD,
+ gd_op_list[op]);
+ if (op_errstr == NULL)
+ gf_asprintf (&op_errstr, OPERRSTR_BUILD_PAYLOAD);
+ goto out;
+ }
+
+ ret = glusterd_mgmt_v3_pre_validate (op, req_dict, &op_errstr,
+ &op_errno, txn_generation);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_PRE_VALIDATION_FAIL, "Pre Validation Failed");
+ goto out;
+ }
+
+ ret = glusterd_mgmt_v3_commit (op, dict, req_dict, &op_errstr,
+ &op_errno, txn_generation);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_COMMIT_OP_FAIL, "Commit Op Failed");
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ op_ret = ret;
+
+ (void) glusterd_mgmt_v3_release_peer_locks (op, dict, op_ret,
+ &op_errstr, is_acquired,
+ txn_generation);
+
+ if (is_acquired) {
+ ret = glusterd_multiple_mgmt_v3_unlock (tmp_dict, MY_UUID);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_UNLOCK_FAIL,
+ "Failed to release mgmt_v3 locks on "
+ "localhost.");
+ op_ret = ret;
+ }
+ }
+ /* SEND CLI RESPONSE */
+ glusterd_op_send_cli_response (op, op_ret, op_errno, req,
+ dict, op_errstr);
+
+ if (req_dict)
+ dict_unref (req_dict);
+
+ if (tmp_dict)
+ dict_unref (tmp_dict);
+
+ if (op_errstr) {
+ GF_FREE (op_errstr);
+ op_errstr = NULL;
+ }
+
+ return 0;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
new file mode 100644
index 00000000000..73646ec4bfc
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
@@ -0,0 +1,2452 @@
+/*
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "rpc-clnt.h"
+#include "glusterd1-xdr.h"
+#include "cli1-xdr.h"
+
+#include "xdr-generic.h"
+
+#include "compat-errno.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-sm.h"
+#include "glusterd.h"
+#include "protocol-common.h"
+#include "glusterd-utils.h"
+#include "common-utils.h"
+#include "glusterd-messages.h"
+#include "glusterd-snapshot-utils.h"
+#include <sys/uio.h>
+
+
+#define SERVER_PATH_MAX (16 * 1024)
+
+
+extern glusterd_op_info_t opinfo;
+extern uuid_t global_txn_id;
+
+int32_t
+glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret,
+ int32_t op_errno, rpcsvc_request_t *req,
+ void *op_ctx, char *op_errstr)
+{
+ int32_t ret = -1;
+ void *cli_rsp = NULL;
+ dict_t *ctx = NULL;
+ char *free_ptr = NULL;
+ glusterd_conf_t *conf = NULL;
+ xdrproc_t xdrproc = NULL;
+ char *errstr = NULL;
+ int32_t status = 0;
+ int32_t count = 0;
+ gf_cli_rsp rsp = {0,};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+
+ GF_ASSERT (conf);
+
+ ctx = op_ctx;
+
+ switch (op) {
+ case GD_OP_DETACH_TIER:
+ case GD_OP_REMOVE_BRICK:
+ {
+ if (ctx)
+ ret = dict_get_str (ctx, "errstr", &errstr);
+ break;
+ }
+ case GD_OP_RESET_VOLUME:
+ {
+ if (op_ret && !op_errstr)
+ errstr = "Error while resetting options";
+ break;
+ }
+ case GD_OP_TIER_MIGRATE:
+ case GD_OP_REBALANCE:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ {
+ if (ctx) {
+ ret = dict_get_int32 (ctx, "status", &status);
+ if (ret) {
+ gf_msg_trace (this->name, 0,
+ "failed to get status");
+ }
+ }
+ break;
+ }
+ case GD_OP_GSYNC_CREATE:
+ case GD_OP_GSYNC_SET:
+ {
+ if (ctx) {
+ ret = dict_get_str (ctx, "errstr", &errstr);
+ ret = dict_set_str (ctx, "glusterd_workdir", conf->workdir);
+ /* swallow error here, that will be re-triggered in cli */
+
+ }
+ break;
+
+ }
+ case GD_OP_PROFILE_VOLUME:
+ {
+ if (ctx && dict_get_int32 (ctx, "count", &count)) {
+ ret = dict_set_int32 (ctx, "count", 0);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "failed to set count in dictionary");
+ }
+ }
+ break;
+ }
+ case GD_OP_START_BRICK:
+ case GD_OP_STOP_BRICK:
+ {
+ gf_msg_debug (this->name, 0, "op '%s' not supported",
+ gd_op_list[op]);
+ break;
+ }
+ case GD_OP_NONE:
+ case GD_OP_MAX:
+ {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_OP_UNSUPPORTED, "invalid operation");
+ break;
+ }
+ case GD_OP_CREATE_VOLUME:
+ case GD_OP_START_VOLUME:
+ case GD_OP_STOP_VOLUME:
+ case GD_OP_DELETE_VOLUME:
+ case GD_OP_DEFRAG_VOLUME:
+ case GD_OP_ADD_BRICK:
+ case GD_OP_LOG_ROTATE:
+ case GD_OP_SYNC_VOLUME:
+ case GD_OP_STATEDUMP_VOLUME:
+ case GD_OP_REPLACE_BRICK:
+ case GD_OP_STATUS_VOLUME:
+ case GD_OP_SET_VOLUME:
+ case GD_OP_GANESHA:
+ case GD_OP_LIST_VOLUME:
+ case GD_OP_CLEARLOCKS_VOLUME:
+ case GD_OP_HEAL_VOLUME:
+ case GD_OP_QUOTA:
+ case GD_OP_SNAP:
+ case GD_OP_BARRIER:
+ case GD_OP_BITROT:
+ case GD_OP_SCRUB_STATUS:
+ {
+ /*nothing specific to be done*/
+ break;
+ }
+ case GD_OP_COPY_FILE:
+ {
+ if (ctx)
+ ret = dict_get_str (ctx, "errstr", &errstr);
+ break;
+ }
+ case GD_OP_SYS_EXEC:
+ {
+ if (ctx) {
+ ret = dict_get_str (ctx, "errstr", &errstr);
+ ret = dict_set_str (ctx, "glusterd_workdir",
+ conf->workdir);
+ }
+ break;
+ }
+ }
+
+ rsp.op_ret = op_ret;
+ rsp.op_errno = op_errno;
+
+ if (errstr)
+ rsp.op_errstr = errstr;
+ else if (op_errstr)
+ rsp.op_errstr = op_errstr;
+
+ if (!rsp.op_errstr)
+ rsp.op_errstr = "";
+
+ if (ctx) {
+ ret = dict_allocate_and_serialize (ctx, &rsp.dict.dict_val,
+ &rsp.dict.dict_len);
+ if (ret < 0 )
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SERL_LENGTH_GET_FAIL, "failed to "
+ "serialize buffer");
+ else
+ free_ptr = rsp.dict.dict_val;
+ }
+
+ /* needed by 'rebalance status' */
+ if (status)
+ rsp.op_errno = status;
+
+ cli_rsp = &rsp;
+ xdrproc = (xdrproc_t) xdr_gf_cli_rsp;
+
+ glusterd_to_cli (req, cli_rsp, NULL, 0, NULL,
+ xdrproc, ctx);
+ ret = 0;
+
+ GF_FREE (free_ptr);
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_big_locked_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe, fop_cbk_fn_t fn)
+{
+ glusterd_conf_t *priv = THIS->private;
+ int ret = -1;
+
+ synclock_lock (&priv->big_lock);
+ ret = fn (req, iov, count, myframe);
+ synclock_unlock (&priv->big_lock);
+
+ return ret;
+}
+
+int
+__glusterd_probe_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gd1_mgmt_probe_rsp rsp = {{0},};
+ int ret = 0;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_friend_sm_event_t *event = NULL;
+ glusterd_probe_ctx_t *ctx = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ this = THIS;
+ GF_ASSERT (this != NULL);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, (conf != NULL), out);
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_probe_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RES_DECODE_FAIL, "error");
+ //rsp.op_ret = -1;
+ //rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_PROBE_REQ_RESP_RCVD,
+ "Received probe resp from uuid: %s, host: %s",
+ uuid_utoa (rsp.uuid), rsp.hostname);
+ if (rsp.op_ret != 0) {
+ ctx = ((call_frame_t *)myframe)->local;
+ ((call_frame_t *)myframe)->local = NULL;
+
+ GF_ASSERT (ctx);
+
+ if (ctx->req) {
+ glusterd_xfer_cli_probe_resp (ctx->req, rsp.op_ret,
+ rsp.op_errno,
+ rsp.op_errstr,
+ ctx->hostname, ctx->port,
+ ctx->dict);
+ }
+
+ glusterd_destroy_probe_ctx (ctx);
+ (void) glusterd_friend_remove (rsp.uuid, rsp.hostname);
+ ret = rsp.op_ret;
+ goto out;
+ }
+
+ rcu_read_lock ();
+ peerinfo = glusterd_peerinfo_find (rsp.uuid, rsp.hostname);
+ if (peerinfo == NULL) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_PEER_NOT_FOUND, "Could not find peerd %s(%s)",
+ rsp.hostname, uuid_utoa (rsp.uuid));
+ goto unlock;
+ }
+
+ /*
+ * In the case of a fresh probe rsp.uuid and peerinfo.uuid will not
+ * match, as peerinfo->uuid will be NULL.
+ *
+ * In the case of a peer probe being done to add a new network to a
+ * peer, rsp.uuid will match an existing peerinfo.uuid. If we have this
+ * stage it means that the current address/hostname being used isn't
+ * present in the found peerinfo. If it were, we would have found out
+ * earlier in the probe process and wouldn't even reach till here. So,
+ * we need to add the new hostname to the peer.
+ *
+ * This addition should only be done for cluster op-version >=
+ * GD_OP_VERSION_3_6_0 as address lists are only supported from then on.
+ * Also, this update should only be done when an explicit CLI probe
+ * command was used to begin the probe process.
+ */
+ if ((conf->op_version >= GD_OP_VERSION_3_6_0) &&
+ (gf_uuid_compare (rsp.uuid, peerinfo->uuid) == 0)) {
+ ctx = ((call_frame_t *)myframe)->local;
+ /* Presence of ctx->req implies this probe was started by a cli
+ * probe command
+ */
+ if (ctx->req == NULL)
+ goto cont;
+
+ gf_msg_debug (this->name, 0, "Adding address '%s' to "
+ "existing peer %s", rsp.hostname, uuid_utoa (rsp.uuid));
+
+ ret = glusterd_friend_remove (NULL, rsp.hostname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_STALE_PEERINFO_REMOVE_FAIL,
+ "Could not remove "
+ "stale peerinfo with name %s", rsp.hostname);
+ goto reply;
+ }
+
+ ret = gd_add_address_to_peer (peerinfo, rsp.hostname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_HOSTNAME_ADD_TO_PEERLIST_FAIL,
+ "Couldn't add hostname to peer list");
+ goto reply;
+ }
+
+ /* Injecting EVENT_NEW_NAME to send update */
+ ret = glusterd_friend_sm_new_event (GD_FRIEND_EVENT_NEW_NAME,
+ &event);
+ if (!ret) {
+ event->peername = gf_strdup (peerinfo->hostname);
+ gf_uuid_copy (event->peerid, peerinfo->uuid);
+
+ ret = glusterd_friend_sm_inject_event (event);
+ }
+ rsp.op_errno = GF_PROBE_FRIEND;
+
+reply:
+ ctx = ((call_frame_t *)myframe)->local;
+ ((call_frame_t *)myframe)->local = NULL;
+
+ if (!ctx) {
+ ret = -1;
+ goto unlock;
+ }
+
+ if (ctx->req) {
+ glusterd_xfer_cli_probe_resp (ctx->req, ret,
+ rsp.op_errno,
+ rsp.op_errstr,
+ ctx->hostname, ctx->port,
+ ctx->dict);
+ }
+
+ glusterd_destroy_probe_ctx (ctx);
+
+ goto unlock;
+
+ } else if (strncasecmp (rsp.hostname, peerinfo->hostname, 1024)) {
+ gf_msg (THIS->name, GF_LOG_INFO, 0,
+ GD_MSG_HOST_PRESENT_ALREADY, "Host: %s with uuid: %s "
+ "already present in cluster with alias hostname: %s",
+ rsp.hostname, uuid_utoa (rsp.uuid), peerinfo->hostname);
+
+ ctx = ((call_frame_t *)myframe)->local;
+ ((call_frame_t *)myframe)->local = NULL;
+
+ if (!ctx) {
+ ret = -1;
+ goto unlock;
+ }
+
+ rsp.op_errno = GF_PROBE_FRIEND;
+ if (ctx->req) {
+ glusterd_xfer_cli_probe_resp (ctx->req, rsp.op_ret,
+ rsp.op_errno,
+ rsp.op_errstr,
+ ctx->hostname, ctx->port,
+ ctx->dict);
+ }
+
+ glusterd_destroy_probe_ctx (ctx);
+ (void) glusterd_friend_remove (NULL, rsp.hostname);
+ ret = rsp.op_ret;
+
+ goto unlock;
+ }
+
+cont:
+ gf_uuid_copy (peerinfo->uuid, rsp.uuid);
+
+ ret = glusterd_friend_sm_new_event
+ (GD_FRIEND_EVENT_INIT_FRIEND_REQ, &event);
+
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_NEW_FRIEND_SM_EVENT_GET_FAIL,
+ "Unable to get event");
+ goto unlock;
+ }
+
+ event->peername = gf_strdup (peerinfo->hostname);
+ gf_uuid_copy (event->peerid, peerinfo->uuid);
+
+ event->ctx = ((call_frame_t *)myframe)->local;
+ ((call_frame_t *)myframe)->local = NULL;
+ ret = glusterd_friend_sm_inject_event (event);
+
+
+ gf_msg ("glusterd", GF_LOG_INFO, 0,
+ GD_MSG_PROBE_REQ_RESP_RCVD, "Received resp to probe req");
+
+unlock:
+ rcu_read_unlock ();
+
+out:
+ free (rsp.hostname);//malloced by xdr
+ GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe));
+
+ /* Attempt to start the state machine. Needed as no state machine could
+ * be running at time this RPC reply was received
+ */
+ if (!ret) {
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+ }
+
+ return ret;
+}
+
+int
+glusterd_probe_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ __glusterd_probe_cbk);
+}
+
+
+int
+__glusterd_friend_add_cbk (struct rpc_req * req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gd1_mgmt_friend_rsp rsp = {{0},};
+ int ret = -1;
+ glusterd_friend_sm_event_t *event = NULL;
+ glusterd_friend_sm_event_type_t event_type = GD_FRIEND_EVENT_NONE;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = -1;
+ glusterd_probe_ctx_t *ctx = NULL;
+ glusterd_friend_update_ctx_t *ev_ctx = NULL;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_friend_rsp);
+ if (ret < 0) {
+ gf_msg ("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_RES_DECODE_FAIL, "error");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+
+ gf_msg ("glusterd", GF_LOG_INFO, 0,
+ GD_MSG_RESPONSE_INFO,
+ "Received %s from uuid: %s, host: %s, port: %d",
+ (op_ret)?"RJT":"ACC", uuid_utoa (rsp.uuid), rsp.hostname, rsp.port);
+
+ rcu_read_lock ();
+
+ peerinfo = glusterd_peerinfo_find (rsp.uuid, rsp.hostname);
+ if (peerinfo == NULL) {
+ ret = -1;
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_RESP_FROM_UNKNOWN_PEER,
+ "received friend add response from"
+ " unknown peer uuid: %s", uuid_utoa (rsp.uuid));
+ goto unlock;
+ }
+
+ if (op_ret)
+ event_type = GD_FRIEND_EVENT_RCVD_RJT;
+ else
+ event_type = GD_FRIEND_EVENT_RCVD_ACC;
+
+ ret = glusterd_friend_sm_new_event (event_type, &event);
+
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_EVENT_NEW_GET_FAIL,
+ "Unable to get event");
+ goto unlock;
+ }
+
+ ev_ctx = GF_CALLOC (1, sizeof (*ev_ctx),
+ gf_gld_mt_friend_update_ctx_t);
+ if (!ev_ctx) {
+ ret = -1;
+ goto unlock;
+ }
+
+ gf_uuid_copy (ev_ctx->uuid, rsp.uuid);
+ ev_ctx->hostname = gf_strdup (rsp.hostname);
+
+ event->peername = gf_strdup (peerinfo->hostname);
+ gf_uuid_copy (event->peerid, peerinfo->uuid);
+ event->ctx = ev_ctx;
+ ret = glusterd_friend_sm_inject_event (event);
+
+unlock:
+ rcu_read_unlock ();
+out:
+ ctx = ((call_frame_t *)myframe)->local;
+ ((call_frame_t *)myframe)->local = NULL;
+
+ GF_ASSERT (ctx);
+
+ if (ctx->req)//reverse probe doesn't have req
+ ret = glusterd_xfer_cli_probe_resp (ctx->req, op_ret, op_errno,
+ NULL, ctx->hostname,
+ ctx->port, ctx->dict);
+ if (!ret) {
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+ }
+
+ if (ctx)
+ glusterd_destroy_probe_ctx (ctx);
+ free (rsp.hostname);//malloced by xdr
+ GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe));
+ return ret;
+}
+
+int
+glusterd_friend_add_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ __glusterd_friend_add_cbk);
+}
+
+int
+__glusterd_friend_remove_cbk (struct rpc_req * req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gd1_mgmt_friend_rsp rsp = {{0},};
+ glusterd_conf_t *conf = NULL;
+ int ret = -1;
+ glusterd_friend_sm_event_t *event = NULL;
+ glusterd_friend_sm_event_type_t event_type = GD_FRIEND_EVENT_NONE;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ glusterd_probe_ctx_t *ctx = NULL;
+ gf_boolean_t move_sm_now = _gf_true;
+
+ conf = THIS->private;
+ GF_ASSERT (conf);
+
+ ctx = ((call_frame_t *)myframe)->local;
+ ((call_frame_t *)myframe)->local = NULL;
+ GF_ASSERT (ctx);
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ move_sm_now = _gf_false;
+ goto inject;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_friend_rsp);
+ if (ret < 0) {
+ gf_msg ("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_RES_DECODE_FAIL, "error");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto respond;
+ }
+
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+
+ gf_msg ("glusterd", GF_LOG_INFO, 0,
+ GD_MSG_RESPONSE_INFO,
+ "Received %s from uuid: %s, host: %s, port: %d",
+ (op_ret)?"RJT":"ACC", uuid_utoa (rsp.uuid), rsp.hostname, rsp.port);
+
+inject:
+ rcu_read_lock ();
+
+ peerinfo = glusterd_peerinfo_find (rsp.uuid, ctx->hostname);
+ if (peerinfo == NULL) {
+ //can happen as part of rpc clnt connection cleanup
+ //when the frame timeout happens after 30 minutes
+ ret = -1;
+ goto unlock;
+ }
+
+ event_type = GD_FRIEND_EVENT_REMOVE_FRIEND;
+
+ ret = glusterd_friend_sm_new_event (event_type, &event);
+
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_EVENT_NEW_GET_FAIL,
+ "Unable to get event");
+ goto unlock;
+ }
+ event->peername = gf_strdup (peerinfo->hostname);
+ gf_uuid_copy (event->peerid, peerinfo->uuid);
+
+ ret = glusterd_friend_sm_inject_event (event);
+
+ if (ret)
+ goto unlock;
+
+ /*friend_sm would be moved on CLNT_DISCONNECT, consequently
+ cleaning up peerinfo. Else, we run the risk of triggering
+ a clnt_destroy within saved_frames_unwind.
+ */
+ op_ret = 0;
+
+unlock:
+ rcu_read_unlock ();
+
+respond:
+ ret = glusterd_xfer_cli_deprobe_resp (ctx->req, op_ret, op_errno, NULL,
+ ctx->hostname, ctx->dict);
+ if (!ret && move_sm_now) {
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+ }
+
+ if (ctx) {
+ glusterd_broadcast_friend_delete (ctx->hostname, NULL);
+ glusterd_destroy_probe_ctx (ctx);
+ }
+
+ free (rsp.hostname);//malloced by xdr
+ GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe));
+ return ret;
+}
+
+int
+glusterd_friend_remove_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ __glusterd_friend_remove_cbk);
+}
+
+int32_t
+__glusterd_friend_update_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int ret = -1;
+ gd1_mgmt_friend_update_rsp rsp = {{0}, };
+ xlator_t *this = NULL;
+
+ GF_ASSERT (req);
+ this = THIS;
+
+ if (-1 == req->rpc_status) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RPC_FAILURE, "RPC Error");
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gd1_mgmt_friend_update_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RES_DECODE_FAIL, "Failed to serialize friend"
+ " update repsonse");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_RESPONSE_INFO, "Received %s from uuid: %s",
+ (ret)?"RJT":"ACC", uuid_utoa (rsp.uuid));
+
+ GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe));
+ return ret;
+}
+
+int
+glusterd_friend_update_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ __glusterd_friend_update_cbk);
+}
+
+int32_t
+__glusterd_cluster_lock_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gd1_mgmt_cluster_lock_rsp rsp = {{0},};
+ int ret = -1;
+ int32_t op_ret = -1;
+ glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ xlator_t *this = NULL;
+ uuid_t *txn_id = NULL;
+ glusterd_conf_t *priv = NULL;
+ char *err_str = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (req);
+
+ txn_id = &priv->global_txn_id;
+
+ if (-1 == req->rpc_status) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_NO_LOCK_RESP_FROM_PEER, "Lock response is not "
+ "received from one of the peer");
+ err_str = "Lock response is not received from one of the peer";
+ glusterd_set_opinfo (err_str, ENETRESET, -1);
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gd1_mgmt_cluster_lock_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RES_DECODE_FAIL, "Failed to decode "
+ "cluster lock response received from peer");
+ err_str = "Failed to decode cluster lock response received from"
+ " peer";
+ glusterd_set_opinfo (err_str, EINVAL, -1);
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ goto out;
+ }
+
+ op_ret = rsp.op_ret;
+
+ if (op_ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_LOCK_FROM_UUID_REJCT,
+ "Received lock RJT from uuid: %s",
+ uuid_utoa (rsp.uuid));
+ } else {
+ gf_msg_debug (this->name, 0,
+ "Received lock ACC from uuid: %s",
+ uuid_utoa (rsp.uuid));
+ }
+
+ rcu_read_lock ();
+ ret = (glusterd_peerinfo_find (rsp.uuid, NULL) == NULL);
+ rcu_read_unlock ();
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_RESP_FROM_UNKNOWN_PEER,
+ "cluster lock response received from unknown peer: %s."
+ "Ignoring response", uuid_utoa (rsp.uuid));
+ err_str = "cluster lock response received from unknown peer";
+ goto out;
+
+ }
+
+ if (op_ret) {
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ opinfo.op_ret = op_ret;
+ opinfo.op_errstr = gf_strdup ("Another transaction could be in "
+ "progress. Please try again after"
+ " sometime.");
+ } else {
+ event_type = GD_OP_EVENT_RCVD_ACC;
+ }
+
+out:
+
+ ret = glusterd_set_txn_opinfo (txn_id, &opinfo);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
+
+
+ ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL);
+
+ if (!ret) {
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+ }
+
+ GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe));
+ return ret;
+}
+
+int32_t
+glusterd_cluster_lock_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ __glusterd_cluster_lock_cbk);
+}
+
+void
+glusterd_set_opinfo (char *errstr, int32_t op_errno, int32_t op_ret)
+{
+ opinfo.op_errstr = gf_strdup (errstr);
+ opinfo.op_errno = op_errno;
+ opinfo.op_ret = op_ret;
+}
+
+static int32_t
+glusterd_mgmt_v3_lock_peers_cbk_fn (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gd1_mgmt_v3_lock_rsp rsp = {{0},};
+ int ret = -1;
+ int32_t op_ret = -1;
+ glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE;
+ xlator_t *this = NULL;
+ call_frame_t *frame = NULL;
+ uuid_t *txn_id = NULL;
+ char *err_str = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ frame = myframe;
+ txn_id = frame->cookie;
+ frame->cookie = NULL;
+
+ if (-1 == req->rpc_status) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_NO_LOCK_RESP_FROM_PEER, "Lock response is not "
+ "received from one of the peer");
+ err_str = "Lock response is not received from one of the peer";
+ glusterd_set_opinfo (err_str, ENETRESET, -1);
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RES_DECODE_FAIL, "Failed to decode "
+ "mgmt_v3 lock response received from peer");
+ err_str = "Failed to decode mgmt_v3 lock response received from"
+ " peer";
+ glusterd_set_opinfo (err_str, EINVAL, -1);
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ goto out;
+ }
+
+ op_ret = rsp.op_ret;
+
+ txn_id = &rsp.txn_id;
+
+ if (op_ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_LOCK_FROM_UUID_REJCT,
+ "Received mgmt_v3 lock RJT from uuid: %s",
+ uuid_utoa (rsp.uuid));
+ } else {
+ gf_msg_debug (this->name, 0,
+ "Received mgmt_v3 lock ACC from uuid: %s",
+ uuid_utoa (rsp.uuid));
+ }
+
+ rcu_read_lock ();
+ ret = (glusterd_peerinfo_find (rsp.uuid, NULL) == NULL);
+ rcu_read_unlock ();
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_RESP_FROM_UNKNOWN_PEER,
+ "mgmt_v3 lock response received "
+ "from unknown peer: %s. Ignoring response",
+ uuid_utoa (rsp.uuid));
+ goto out;
+ }
+
+ if (op_ret) {
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ opinfo.op_ret = op_ret;
+ opinfo.op_errstr = gf_strdup ("Another transaction could be in "
+ "progress. Please try again after"
+ " sometime.");
+ } else {
+ event_type = GD_OP_EVENT_RCVD_ACC;
+ }
+
+out:
+
+ ret = glusterd_set_txn_opinfo (txn_id, &opinfo);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
+
+ ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL);
+ if (!ret) {
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+ }
+
+ GF_FREE (frame->cookie);
+ GLUSTERD_STACK_DESTROY (frame);
+ return ret;
+}
+
+int32_t
+glusterd_mgmt_v3_lock_peers_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ glusterd_mgmt_v3_lock_peers_cbk_fn);
+}
+
+static int32_t
+glusterd_mgmt_v3_unlock_peers_cbk_fn (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gd1_mgmt_v3_unlock_rsp rsp = {{0},};
+ int ret = -1;
+ int32_t op_ret = -1;
+ glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE;
+ xlator_t *this = NULL;
+ call_frame_t *frame = NULL;
+ uuid_t *txn_id = NULL;
+ char *err_str = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ frame = myframe;
+ txn_id = frame->cookie;
+ frame->cookie = NULL;
+
+ if (-1 == req->rpc_status) {
+ err_str = "Unlock response not received from one of the peer.";
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_CLUSTER_UNLOCK_FAILED,
+ "UnLock response is not received from one of the peer");
+ glusterd_set_opinfo (err_str, 0, 0);
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_CLUSTER_UNLOCK_FAILED,
+ "Failed to decode mgmt_v3 unlock response received from"
+ "peer");
+ err_str = "Failed to decode mgmt_v3 unlock response received "
+ "from peer";
+ glusterd_set_opinfo (err_str, 0, 0);
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ goto out;
+ }
+
+ op_ret = rsp.op_ret;
+
+ txn_id = &rsp.txn_id;
+
+ if (op_ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_UNLOCK_FROM_UUID_REJCT,
+ "Received mgmt_v3 unlock RJT from uuid: %s",
+ uuid_utoa (rsp.uuid));
+ } else {
+ gf_msg_debug (this->name, 0,
+ "Received mgmt_v3 unlock ACC from uuid: %s",
+ uuid_utoa (rsp.uuid));
+ }
+
+ rcu_read_lock ();
+ ret = (glusterd_peerinfo_find (rsp.uuid, NULL) == NULL);
+ rcu_read_unlock ();
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_CLUSTER_UNLOCK_FAILED,
+ "mgmt_v3 unlock response received "
+ "from unknown peer: %s. Ignoring response",
+ uuid_utoa (rsp.uuid));
+ goto out;
+ }
+
+ if (op_ret) {
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ opinfo.op_ret = op_ret;
+ opinfo.op_errstr = gf_strdup ("Another transaction could be in "
+ "progress. Please try again after"
+ " sometime.");
+ } else {
+ event_type = GD_OP_EVENT_RCVD_ACC;
+ }
+
+out:
+
+ ret = glusterd_set_txn_opinfo (txn_id, &opinfo);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
+
+ ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL);
+
+ if (!ret) {
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+ }
+
+ GF_FREE (frame->cookie);
+ GLUSTERD_STACK_DESTROY (frame);
+ return ret;
+}
+
+int32_t
+glusterd_mgmt_v3_unlock_peers_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ glusterd_mgmt_v3_unlock_peers_cbk_fn);
+}
+
+int32_t
+__glusterd_cluster_unlock_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gd1_mgmt_cluster_lock_rsp rsp = {{0},};
+ int ret = -1;
+ int32_t op_ret = -1;
+ glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ xlator_t *this = NULL;
+ uuid_t *txn_id = NULL;
+ glusterd_conf_t *priv = NULL;
+ char *err_str = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (req);
+
+ txn_id = &priv->global_txn_id;
+
+ if (-1 == req->rpc_status) {
+ err_str = "Unlock response not received from one of the peer.";
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_CLUSTER_UNLOCK_FAILED,
+ "UnLock response is not received from one of the peer");
+ glusterd_set_opinfo (err_str, 0, 0);
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gd1_mgmt_cluster_unlock_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_CLUSTER_UNLOCK_FAILED,
+ "Failed to decode unlock response received from peer");
+ err_str = "Failed to decode cluster unlock response received "
+ "from peer";
+ glusterd_set_opinfo (err_str, 0, 0);
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ goto out;
+ }
+
+ op_ret = rsp.op_ret;
+
+ if (op_ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_UNLOCK_FROM_UUID_REJCT,
+ "Received unlock RJT from uuid: %s",
+ uuid_utoa (rsp.uuid));
+ } else {
+ gf_msg_debug (this->name, 0,
+ "Received unlock ACC from uuid: %s",
+ uuid_utoa (rsp.uuid));
+ }
+
+ rcu_read_lock ();
+ ret = (glusterd_peerinfo_find (rsp.uuid, NULL) == NULL);
+ rcu_read_unlock ();
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_CLUSTER_UNLOCK_FAILED,
+ "Unlock response received from unknown peer %s",
+ uuid_utoa (rsp.uuid));
+ goto out;
+ }
+
+ if (op_ret) {
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ opinfo.op_ret = op_ret;
+ } else {
+ event_type = GD_OP_EVENT_RCVD_ACC;
+ }
+
+out:
+
+ ret = glusterd_set_txn_opinfo (txn_id, &opinfo);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
+
+ ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL);
+
+ if (!ret) {
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+ }
+
+ GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe));
+ return ret;
+}
+
+int32_t
+glusterd_cluster_unlock_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ __glusterd_cluster_unlock_cbk);
+}
+
+int32_t
+__glusterd_stage_op_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gd1_mgmt_stage_op_rsp rsp = {{0},};
+ int ret = -1;
+ int32_t op_ret = -1;
+ glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ dict_t *dict = NULL;
+ char err_str[2048] = {0};
+ char *peer_str = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ uuid_t *txn_id = NULL;
+ call_frame_t *frame = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT(myframe);
+
+ frame = myframe;
+ txn_id = frame->cookie;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ /* use standard allocation because to keep uniformity
+ in freeing it */
+ rsp.op_errstr = strdup ("error");
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_stage_op_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RES_DECODE_FAIL, "Failed to decode stage "
+ "response received from peer");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ /* use standard allocation because to keep uniformity
+ in freeing it */
+ rsp.op_errstr = strdup ("Failed to decode stage response "
+ "received from peer.");
+ goto out;
+ }
+
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize rsp-buffer to dictionary");
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ goto out;
+ } else {
+ dict->extra_stdfree = rsp.dict.dict_val;
+ }
+ }
+
+out:
+ op_ret = rsp.op_ret;
+
+ if (op_ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_STAGE_FROM_UUID_REJCT,
+ "Received stage RJT from uuid: %s",
+ uuid_utoa (rsp.uuid));
+ } else {
+ gf_msg_debug (this->name, 0,
+ "Received stage ACC from uuid: %s",
+ uuid_utoa (rsp.uuid));
+ }
+
+ rcu_read_lock ();
+ peerinfo = glusterd_peerinfo_find (rsp.uuid, NULL);
+ if (peerinfo == NULL) {
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_RESP_FROM_UNKNOWN_PEER, "Stage response received "
+ "from unknown peer: %s. Ignoring response.",
+ uuid_utoa (rsp.uuid));
+ }
+
+ if (op_ret) {
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ opinfo.op_ret = op_ret;
+ if (strcmp ("", rsp.op_errstr)) {
+ opinfo.op_errstr = gf_strdup (rsp.op_errstr);
+ } else {
+ if (peerinfo)
+ peer_str = peerinfo->hostname;
+ else
+ peer_str = uuid_utoa (rsp.uuid);
+ snprintf (err_str, sizeof (err_str),
+ OPERRSTR_STAGE_FAIL, peer_str);
+ opinfo.op_errstr = gf_strdup (err_str);
+ }
+ if (!opinfo.op_errstr)
+ ret = -1;
+ } else {
+ event_type = GD_OP_EVENT_RCVD_ACC;
+ }
+
+ rcu_read_unlock ();
+
+
+ ret = glusterd_set_txn_opinfo (txn_id, &opinfo);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
+
+ ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL);
+
+ if (!ret) {
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+ }
+
+ free (rsp.op_errstr); //malloced by xdr
+ if (dict) {
+ if (!dict->extra_stdfree && rsp.dict.dict_val)
+ free (rsp.dict.dict_val); //malloced by xdr
+ dict_unref (dict);
+ } else {
+ free (rsp.dict.dict_val); //malloced by xdr
+ }
+ GF_FREE (frame->cookie);
+ GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe));
+ return ret;
+}
+
+int32_t
+glusterd_stage_op_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ __glusterd_stage_op_cbk);
+}
+
+int32_t
+__glusterd_commit_op_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gd1_mgmt_commit_op_rsp rsp = {{0},};
+ int ret = -1;
+ int32_t op_ret = -1;
+ glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ dict_t *dict = NULL;
+ char err_str[2048] = {0};
+ char *peer_str = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ uuid_t *txn_id = NULL;
+ glusterd_op_info_t txn_op_info = {{0},};
+ call_frame_t *frame = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT(myframe);
+
+ frame = myframe;
+ txn_id = frame->cookie;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ /* use standard allocation because to keep uniformity
+ in freeing it */
+ rsp.op_errstr = strdup ("error");
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_commit_op_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RES_DECODE_FAIL, "Failed to decode commit "
+ "response received from peer");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ /* use standard allocation because to keep uniformity
+ in freeing it */
+ rsp.op_errstr = strdup ("Failed to decode commit response "
+ "received from peer.");
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ goto out;
+ }
+
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize rsp-buffer to dictionary");
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ goto out;
+ } else {
+ dict->extra_stdfree = rsp.dict.dict_val;
+ }
+ }
+
+ op_ret = rsp.op_ret;
+
+ if (op_ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_COMMIT_FROM_UUID_REJCT,
+ "Received commit RJT from uuid: %s",
+ uuid_utoa (rsp.uuid));
+ } else {
+ gf_msg_debug (this->name, 0,
+ "Received commit ACC from uuid: %s",
+ uuid_utoa (rsp.uuid));
+ }
+
+ ret = glusterd_get_txn_opinfo (txn_id, &txn_op_info);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_OPINFO_GET_FAIL,
+ "Failed to get txn_op_info "
+ "for txn_id = %s", uuid_utoa (*txn_id));
+ }
+
+ rcu_read_lock ();
+ peerinfo = glusterd_peerinfo_find (rsp.uuid, NULL);
+ if (peerinfo == NULL) {
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_RESP_FROM_UNKNOWN_PEER, "Commit response for "
+ "'Volume %s' received from unknown peer: %s",
+ gd_op_list[opinfo.op], uuid_utoa (rsp.uuid));
+ }
+
+ if (op_ret) {
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ opinfo.op_ret = op_ret;
+ if (strcmp ("", rsp.op_errstr)) {
+ opinfo.op_errstr = gf_strdup(rsp.op_errstr);
+ } else {
+ if (peerinfo)
+ peer_str = peerinfo->hostname;
+ else
+ peer_str = uuid_utoa (rsp.uuid);
+ snprintf (err_str, sizeof (err_str),
+ OPERRSTR_COMMIT_FAIL, peer_str);
+ opinfo.op_errstr = gf_strdup (err_str);
+ }
+ if (!opinfo.op_errstr) {
+ ret = -1;
+ goto unlock;
+ }
+ } else {
+ event_type = GD_OP_EVENT_RCVD_ACC;
+ GF_ASSERT (rsp.op == txn_op_info.op);
+
+ switch (rsp.op) {
+
+ case GD_OP_PROFILE_VOLUME:
+ ret = glusterd_profile_volume_use_rsp_dict (txn_op_info.op_ctx, dict);
+ if (ret)
+ goto unlock;
+ break;
+
+ case GD_OP_REBALANCE:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ ret = glusterd_volume_rebalance_use_rsp_dict (txn_op_info.op_ctx, dict);
+ if (ret)
+ goto unlock;
+ break;
+
+ default:
+ break;
+ }
+ }
+unlock:
+ rcu_read_unlock ();
+
+out:
+
+ ret = glusterd_set_txn_opinfo (txn_id, &opinfo);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
+
+ ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL);
+
+ if (!ret) {
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+ }
+
+ if (dict)
+ dict_unref (dict);
+ free (rsp.op_errstr); //malloced by xdr
+ GF_FREE (frame->cookie);
+ GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe));
+ return ret;
+}
+
+int32_t
+glusterd_commit_op_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ __glusterd_commit_op_cbk);
+}
+
+int32_t
+glusterd_rpc_probe (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gd1_mgmt_probe_req req = {{0},};
+ int ret = 0;
+ int port = 0;
+ char *hostname = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ dict_t *dict = NULL;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+ priv = this->private;
+
+ GF_ASSERT (priv);
+ ret = dict_get_str (dict, "hostname", &hostname);
+ if (ret)
+ goto out;
+ ret = dict_get_int32 (dict, "port", &port);
+ if (ret)
+ port = GF_DEFAULT_BASE_PORT;
+
+ ret = dict_get_ptr (dict, "peerinfo", VOID (&peerinfo));
+ if (ret)
+ goto out;
+
+ gf_uuid_copy (req.uuid, MY_UUID);
+ req.hostname = gf_strdup (hostname);
+ req.port = port;
+
+ ret = glusterd_submit_request (peerinfo->rpc, &req, frame, peerinfo->peer,
+ GLUSTERD_PROBE_QUERY,
+ NULL, this, glusterd_probe_cbk,
+ (xdrproc_t)xdr_gd1_mgmt_probe_req);
+
+out:
+ GF_FREE (req.hostname);
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+
+int32_t
+glusterd_rpc_friend_add (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gd1_mgmt_friend_req req = {{0},};
+ int ret = 0;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_friend_sm_event_t *event = NULL;
+ dict_t *peer_data = NULL;
+
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ event = data;
+ priv = this->private;
+
+ GF_ASSERT (priv);
+
+ rcu_read_lock ();
+
+ peerinfo = glusterd_peerinfo_find (event->peerid, event->peername);
+ if (!peerinfo) {
+ rcu_read_unlock ();
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_PEER_NOT_FOUND, "Could not find peer %s(%s)",
+ event->peername, uuid_utoa (event->peerid));
+ goto out;
+ }
+
+ gf_uuid_copy (req.uuid, MY_UUID);
+ req.hostname = gf_strdup (peerinfo->hostname);
+ req.port = peerinfo->port;
+
+ rcu_read_unlock ();
+
+ ret = glusterd_add_volumes_to_export_dict (&peer_data);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to add list of volumes "
+ "in the peer_data dict for handshake");
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc (peer_data,
+ "hostname_in_cluster",
+ peerinfo->hostname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to add hostname of the peer");
+ goto out;
+ }
+
+ if (priv->op_version >= GD_OP_VERSION_3_6_0) {
+ ret = glusterd_add_missed_snaps_to_export_dict (peer_data);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MISSED_SNAP_LIST_STORE_FAIL,
+ "Unable to add list of missed snapshots "
+ "in the peer_data dict for handshake");
+ goto out;
+ }
+
+ ret = glusterd_add_snapshots_to_export_dict (peer_data);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_LIST_SET_FAIL,
+ "Unable to add list of snapshots "
+ "in the peer_data dict for handshake");
+ goto out;
+ }
+ }
+
+ ret = dict_allocate_and_serialize (peer_data, &req.vols.vols_val,
+ &req.vols.vols_len);
+ if (ret)
+ goto out;
+
+ ret = glusterd_submit_request (peerinfo->rpc, &req, frame, peerinfo->peer,
+ GLUSTERD_FRIEND_ADD,
+ NULL, this, glusterd_friend_add_cbk,
+ (xdrproc_t)xdr_gd1_mgmt_friend_req);
+
+
+out:
+ GF_FREE (req.vols.vols_val);
+ GF_FREE (req.hostname);
+
+ if (peer_data)
+ dict_unref (peer_data);
+
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_rpc_friend_remove (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gd1_mgmt_friend_req req = {{0},};
+ int ret = 0;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_friend_sm_event_t *event = NULL;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ event = data;
+ priv = this->private;
+
+ GF_ASSERT (priv);
+
+ rcu_read_lock ();
+
+ peerinfo = glusterd_peerinfo_find (event->peerid, event->peername);
+ if (!peerinfo) {
+ rcu_read_unlock ();
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_PEER_NOT_FOUND, "Could not find peer %s(%s)",
+ event->peername, uuid_utoa (event->peerid));
+ goto out;
+ }
+
+ gf_uuid_copy (req.uuid, MY_UUID);
+ req.hostname = gf_strdup (peerinfo->hostname);
+ req.port = peerinfo->port;
+
+ ret = glusterd_submit_request (peerinfo->rpc, &req, frame, peerinfo->peer,
+ GLUSTERD_FRIEND_REMOVE, NULL,
+ this, glusterd_friend_remove_cbk,
+ (xdrproc_t)xdr_gd1_mgmt_friend_req);
+
+ rcu_read_unlock ();
+out:
+ GF_FREE (req.hostname);
+
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+
+int32_t
+glusterd_rpc_friend_update (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gd1_mgmt_friend_update req = {{0},};
+ int ret = 0;
+ glusterd_conf_t *priv = NULL;
+ dict_t *friends = NULL;
+ call_frame_t *dummy_frame = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ friends = data;
+ if (!friends)
+ goto out;
+
+ ret = dict_get_ptr (friends, "peerinfo", VOID(&peerinfo));
+ if (ret)
+ goto out;
+ /* Don't want to send the pointer over */
+ dict_del (friends, "peerinfo");
+
+ ret = dict_allocate_and_serialize (friends, &req.friends.friends_val,
+ &req.friends.friends_len);
+ if (ret)
+ goto out;
+
+ gf_uuid_copy (req.uuid, MY_UUID);
+
+ dummy_frame = create_frame (this, this->ctx->pool);
+ ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame,
+ peerinfo->peer,
+ GLUSTERD_FRIEND_UPDATE, NULL,
+ this, glusterd_friend_update_cbk,
+ (xdrproc_t)xdr_gd1_mgmt_friend_update);
+
+out:
+ GF_FREE (req.friends.friends_val);
+
+ if (ret && dummy_frame)
+ STACK_DESTROY (dummy_frame->root);
+
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_cluster_lock (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gd1_mgmt_cluster_lock_req req = {{0},};
+ int ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ call_frame_t *dummy_frame = NULL;
+
+ if (!this)
+ goto out;
+
+ peerinfo = data;
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ glusterd_get_uuid (&req.uuid);
+
+ dummy_frame = create_frame (this, this->ctx->pool);
+ if (!dummy_frame)
+ goto out;
+
+ ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame,
+ peerinfo->mgmt, GLUSTERD_MGMT_CLUSTER_LOCK,
+ NULL,
+ this, glusterd_cluster_lock_cbk,
+ (xdrproc_t)xdr_gd1_mgmt_cluster_lock_req);
+out:
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+
+ if (ret && dummy_frame)
+ STACK_DESTROY (dummy_frame->root);
+ return ret;
+}
+
+int32_t
+glusterd_mgmt_v3_lock_peers (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gd1_mgmt_v3_lock_req req = {{0},};
+ int ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ dict_t *dict = NULL;
+ uuid_t *txn_id = NULL;
+
+ if (!this)
+ goto out;
+
+ dict = data;
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_ptr (dict, "peerinfo", VOID (&peerinfo));
+ if (ret)
+ goto out;
+
+ //peerinfo should not be in payload
+ dict_del (dict, "peerinfo");
+
+ glusterd_get_uuid (&req.uuid);
+
+ ret = dict_allocate_and_serialize (dict, &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SERL_LENGTH_GET_FAIL, "Failed to serialize dict "
+ "to request buffer");
+ goto out;
+ }
+
+ /* Sending valid transaction ID to peers */
+ ret = dict_get_bin (dict, "transaction_id",
+ (void **)&txn_id);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_ID_GET_FAIL,
+ "Failed to get transaction id.");
+ goto out;
+ } else {
+ gf_msg_debug (this->name, 0,
+ "Transaction_id = %s", uuid_utoa (*txn_id));
+ gf_uuid_copy (req.txn_id, *txn_id);
+ }
+
+ if (!frame)
+ frame = create_frame (this, this->ctx->pool);
+
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+ frame->cookie = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t);
+ if (!frame->cookie) {
+ ret = -1;
+ goto out;
+ }
+ gf_uuid_copy (frame->cookie, req.txn_id);
+
+ ret = glusterd_submit_request (peerinfo->rpc, &req, frame,
+ peerinfo->mgmt_v3,
+ GLUSTERD_MGMT_V3_LOCK, NULL,
+ this, glusterd_mgmt_v3_lock_peers_cbk,
+ (xdrproc_t)xdr_gd1_mgmt_v3_lock_req);
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_mgmt_v3_unlock_peers (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gd1_mgmt_v3_unlock_req req = {{0},};
+ int ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ dict_t *dict = NULL;
+ uuid_t *txn_id = NULL;
+
+ if (!this)
+ goto out;
+
+ dict = data;
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_ptr (dict, "peerinfo", VOID (&peerinfo));
+ if (ret)
+ goto out;
+
+ //peerinfo should not be in payload
+ dict_del (dict, "peerinfo");
+
+ glusterd_get_uuid (&req.uuid);
+
+ ret = dict_allocate_and_serialize (dict, &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
+ "Failed to serialize dict "
+ "to request buffer");
+ goto out;
+ }
+
+ /* Sending valid transaction ID to peers */
+ ret = dict_get_bin (dict, "transaction_id",
+ (void **)&txn_id);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_ID_GET_FAIL,
+ "Failed to get transaction id.");
+ goto out;
+ } else {
+ gf_msg_debug (this->name, 0,
+ "Transaction_id = %s", uuid_utoa (*txn_id));
+ gf_uuid_copy (req.txn_id, *txn_id);
+ }
+
+ if (!frame)
+ frame = create_frame (this, this->ctx->pool);
+
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+ frame->cookie = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t);
+ if (!frame->cookie) {
+ ret = -1;
+ goto out;
+ }
+ gf_uuid_copy (frame->cookie, req.txn_id);
+
+ ret = glusterd_submit_request (peerinfo->rpc, &req, frame,
+ peerinfo->mgmt_v3,
+ GLUSTERD_MGMT_V3_UNLOCK, NULL,
+ this, glusterd_mgmt_v3_unlock_peers_cbk,
+ (xdrproc_t)
+ xdr_gd1_mgmt_v3_unlock_req);
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_cluster_unlock (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gd1_mgmt_cluster_lock_req req = {{0},};
+ int ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ call_frame_t *dummy_frame = NULL;
+
+ if (!this ) {
+ ret = -1;
+ goto out;
+ }
+ peerinfo = data;
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ glusterd_get_uuid (&req.uuid);
+
+ dummy_frame = create_frame (this, this->ctx->pool);
+ if (!dummy_frame)
+ goto out;
+
+ ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame,
+ peerinfo->mgmt, GLUSTERD_MGMT_CLUSTER_UNLOCK,
+ NULL,
+ this, glusterd_cluster_unlock_cbk,
+ (xdrproc_t)xdr_gd1_mgmt_cluster_unlock_req);
+out:
+ gf_msg_debug (this ? this->name : "glusterd", 0, "Returning %d", ret);
+
+ if (ret && dummy_frame)
+ STACK_DESTROY (dummy_frame->root);
+
+ return ret;
+}
+
+int32_t
+glusterd_stage_op (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gd1_mgmt_stage_op_req req = {{0,},};
+ int ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ dict_t *dict = NULL;
+ gf_boolean_t is_alloc = _gf_true;
+ uuid_t *txn_id = NULL;
+
+ if (!this) {
+ goto out;
+ }
+
+ dict = data;
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_ptr (dict, "peerinfo", VOID (&peerinfo));
+ if (ret)
+ goto out;
+
+ //peerinfo should not be in payload
+ dict_del (dict, "peerinfo");
+
+ glusterd_get_uuid (&req.uuid);
+ req.op = glusterd_op_get_op ();
+
+ ret = dict_allocate_and_serialize (dict, &req.buf.buf_val,
+ &req.buf.buf_len);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
+ "Failed to serialize dict "
+ "to request buffer");
+ goto out;
+ }
+ /* Sending valid transaction ID to peers */
+ ret = dict_get_bin (dict, "transaction_id",
+ (void **)&txn_id);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_ID_GET_FAIL,
+ "Failed to get transaction id.");
+ goto out;
+ } else {
+ gf_msg_debug (this->name, 0,
+ "Transaction_id = %s", uuid_utoa (*txn_id));
+ }
+
+ if (!frame)
+ frame = create_frame (this, this->ctx->pool);
+
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+ frame->cookie = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t);
+ if (!frame->cookie) {
+ ret = -1;
+ goto out;
+ }
+ gf_uuid_copy (frame->cookie, *txn_id);
+
+ ret = glusterd_submit_request (peerinfo->rpc, &req, frame,
+ peerinfo->mgmt, GLUSTERD_MGMT_STAGE_OP,
+ NULL,
+ this, glusterd_stage_op_cbk,
+ (xdrproc_t)xdr_gd1_mgmt_stage_op_req);
+
+out:
+ if ((_gf_true == is_alloc) && req.buf.buf_val)
+ GF_FREE (req.buf.buf_val);
+
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_commit_op (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gd1_mgmt_commit_op_req req = {{0,},};
+ int ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ call_frame_t *dummy_frame = NULL;
+ dict_t *dict = NULL;
+ gf_boolean_t is_alloc = _gf_true;
+ uuid_t *txn_id = NULL;
+
+ if (!this) {
+ goto out;
+ }
+
+ dict = data;
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_ptr (dict, "peerinfo", VOID (&peerinfo));
+ if (ret)
+ goto out;
+
+ //peerinfo should not be in payload
+ dict_del (dict, "peerinfo");
+
+ glusterd_get_uuid (&req.uuid);
+ req.op = glusterd_op_get_op ();
+
+ ret = dict_allocate_and_serialize (dict, &req.buf.buf_val,
+ &req.buf.buf_len);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
+ "Failed to serialize dict to "
+ "request buffer");
+ goto out;
+ }
+ /* Sending valid transaction ID to peers */
+ ret = dict_get_bin (dict, "transaction_id",
+ (void **)&txn_id);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_ID_GET_FAIL,
+ "Failed to get transaction id.");
+ goto out;
+ } else {
+ gf_msg_debug (this->name, 0,
+ "Transaction_id = %s", uuid_utoa (*txn_id));
+ }
+
+ if (!frame)
+ frame = create_frame (this, this->ctx->pool);
+
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+ frame->cookie = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t);
+ if (!frame->cookie) {
+ ret = -1;
+ goto out;
+ }
+ gf_uuid_copy (frame->cookie, *txn_id);
+
+ ret = glusterd_submit_request (peerinfo->rpc, &req, frame,
+ peerinfo->mgmt, GLUSTERD_MGMT_COMMIT_OP,
+ NULL,
+ this, glusterd_commit_op_cbk,
+ (xdrproc_t)xdr_gd1_mgmt_commit_op_req);
+
+out:
+ if ((_gf_true == is_alloc) && req.buf.buf_val)
+ GF_FREE (req.buf.buf_val);
+
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+__glusterd_brick_op_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gd1_mgmt_brick_op_rsp rsp = {0};
+ int ret = -1;
+ int32_t op_ret = -1;
+ glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE;
+ call_frame_t *frame = NULL;
+ glusterd_op_brick_rsp_ctx_t *ev_ctx = NULL;
+ dict_t *dict = NULL;
+ int index = 0;
+ glusterd_req_ctx_t *req_ctx = NULL;
+ glusterd_pending_node_t *node = NULL;
+ xlator_t *this = NULL;
+ uuid_t *txn_id = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (req);
+
+ txn_id = &priv->global_txn_id;
+ frame = myframe;
+ req_ctx = frame->local;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ /* use standard allocation because to keep uniformity
+ in freeing it */
+ rsp.op_errstr = strdup ("error");
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RES_DECODE_FAIL,
+ "Failed to decode brick op "
+ "response received");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ rsp.op_errstr = strdup ("Unable to decode brick op response");
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ goto out;
+ }
+
+ if (rsp.output.output_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (rsp.output.output_val,
+ rsp.output.output_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "Failed to "
+ "unserialize rsp-buffer to dictionary");
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ goto out;
+ } else {
+ dict->extra_stdfree = rsp.output.output_val;
+ }
+ }
+
+ op_ret = rsp.op_ret;
+
+ /* Add index to rsp_dict for GD_OP_STATUS_VOLUME */
+ if (GD_OP_STATUS_VOLUME == req_ctx->op) {
+ node = frame->cookie;
+ index = node->index;
+ ret = dict_set_int32 (dict, "index", index);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Error setting index on brick status rsp dict");
+ rsp.op_ret = -1;
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ goto out;
+ }
+ }
+out:
+
+ if (req_ctx && req_ctx->dict) {
+ ret = dict_get_bin (req_ctx->dict, "transaction_id",
+ (void **)&txn_id);
+ gf_msg_debug (this->name, 0,
+ "transaction ID = %s", uuid_utoa (*txn_id));
+ }
+
+ ev_ctx = GF_CALLOC (1, sizeof (*ev_ctx), gf_gld_mt_brick_rsp_ctx_t);
+ GF_ASSERT (ev_ctx);
+ if (op_ret) {
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ ev_ctx->op_ret = op_ret;
+ ev_ctx->op_errstr = gf_strdup(rsp.op_errstr);
+ } else {
+ event_type = GD_OP_EVENT_RCVD_ACC;
+ }
+ ev_ctx->pending_node = frame->cookie;
+ ev_ctx->rsp_dict = dict;
+ ev_ctx->commit_ctx = frame->local;
+ ret = glusterd_op_sm_inject_event (event_type, txn_id, ev_ctx);
+ if (!ret) {
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+ }
+
+ if (ret && dict)
+ dict_unref (dict);
+ free (rsp.op_errstr); //malloced by xdr
+ GLUSTERD_STACK_DESTROY (frame);
+ return ret;
+}
+
+int32_t
+glusterd_brick_op_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ __glusterd_brick_op_cbk);
+}
+
+int32_t
+glusterd_brick_op (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+
+ gd1_mgmt_brick_op_req *req = NULL;
+ int ret = 0;
+ int ret1 = 0;
+ glusterd_conf_t *priv = NULL;
+ call_frame_t *dummy_frame = NULL;
+ char *op_errstr = NULL;
+ int pending_bricks = 0;
+ glusterd_pending_node_t *pending_node;
+ glusterd_req_ctx_t *req_ctx = NULL;
+ struct rpc_clnt *rpc = NULL;
+ dict_t *op_ctx = NULL;
+ uuid_t *txn_id = NULL;
+
+ if (!this) {
+ ret = -1;
+ goto out;
+ }
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ txn_id = &priv->global_txn_id;
+
+ req_ctx = data;
+ GF_ASSERT (req_ctx);
+ CDS_INIT_LIST_HEAD (&opinfo.pending_bricks);
+
+ ret = dict_get_bin (req_ctx->dict, "transaction_id", (void **)&txn_id);
+ gf_msg_debug (this->name, 0, "transaction ID = %s",
+ uuid_utoa (*txn_id));
+
+ ret = glusterd_op_bricks_select (req_ctx->op, req_ctx->dict, &op_errstr,
+ &opinfo.pending_bricks, NULL);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_SELECT_FAIL, "Failed to select bricks "
+ "while performing brick op during 'Volume %s'",
+ gd_op_list[opinfo.op]);
+ opinfo.op_errstr = op_errstr;
+ goto out;
+ }
+
+ cds_list_for_each_entry (pending_node, &opinfo.pending_bricks, list) {
+ dummy_frame = create_frame (this, this->ctx->pool);
+ if (!dummy_frame)
+ continue;
+
+ if ((pending_node->type == GD_NODE_NFS) ||
+ (pending_node->type == GD_NODE_QUOTAD) ||
+ (pending_node->type == GD_NODE_SNAPD) ||
+ (pending_node->type == GD_NODE_SCRUB) ||
+ ((pending_node->type == GD_NODE_SHD) &&
+ (req_ctx->op == GD_OP_STATUS_VOLUME)))
+ ret = glusterd_node_op_build_payload
+ (req_ctx->op,
+ (gd1_mgmt_brick_op_req **)&req,
+ req_ctx->dict);
+ else {
+ ret = glusterd_brick_op_build_payload
+ (req_ctx->op, pending_node->node,
+ (gd1_mgmt_brick_op_req **)&req,
+ req_ctx->dict);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_OP_PAYLOAD_BUILD_FAIL,
+ "Failed to "
+ "build brick op payload during "
+ "'Volume %s'", gd_op_list[req_ctx->op]);
+ goto out;
+ }
+ }
+
+ dummy_frame->local = data;
+ dummy_frame->cookie = pending_node;
+
+ rpc = glusterd_pending_node_get_rpc (pending_node);
+ if (!rpc) {
+ if (pending_node->type == GD_NODE_REBALANCE) {
+ opinfo.brick_pending_count = 0;
+ ret = 0;
+ if (req) {
+ GF_FREE (req->input.input_val);
+ GF_FREE (req);
+ req = NULL;
+ }
+ GLUSTERD_STACK_DESTROY (dummy_frame);
+
+ op_ctx = glusterd_op_get_ctx ();
+ if (!op_ctx)
+ goto out;
+ glusterd_defrag_volume_node_rsp (req_ctx->dict,
+ NULL, op_ctx);
+
+ goto out;
+ }
+
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RPC_FAILURE, "Brick Op failed "
+ "due to rpc failure.");
+ goto out;
+ }
+
+ ret = glusterd_submit_request (rpc, req, dummy_frame,
+ priv->gfs_mgmt,
+ req->op, NULL,
+ this, glusterd_brick_op_cbk,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (req) {
+ GF_FREE (req->input.input_val);
+ GF_FREE (req);
+ req = NULL;
+ }
+ if (!ret)
+ pending_bricks++;
+
+ glusterd_pending_node_put_rpc (pending_node);
+ }
+
+ gf_msg_trace (this->name, 0, "Sent brick op req for operation "
+ "'Volume %s' to %d bricks", gd_op_list[req_ctx->op],
+ pending_bricks);
+ opinfo.brick_pending_count = pending_bricks;
+
+out:
+
+ if (ret)
+ opinfo.op_ret = ret;
+
+ ret1 = glusterd_set_txn_opinfo (txn_id, &opinfo);
+ if (ret1)
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
+
+ if (ret) {
+ glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT,
+ txn_id, data);
+ opinfo.op_ret = ret;
+ }
+
+ gf_msg_debug (this ? this->name : "glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+struct rpc_clnt_procedure gd_brick_actors[GLUSTERD_BRICK_MAXVALUE] = {
+ [GLUSTERD_BRICK_NULL] = {"NULL", NULL },
+ [GLUSTERD_BRICK_OP] = {"BRICK_OP", glusterd_brick_op},
+};
+
+struct rpc_clnt_procedure gd_peer_actors[GLUSTERD_FRIEND_MAXVALUE] = {
+ [GLUSTERD_FRIEND_NULL] = {"NULL", NULL },
+ [GLUSTERD_PROBE_QUERY] = {"PROBE_QUERY", glusterd_rpc_probe},
+ [GLUSTERD_FRIEND_ADD] = {"FRIEND_ADD", glusterd_rpc_friend_add},
+ [GLUSTERD_FRIEND_REMOVE] = {"FRIEND_REMOVE", glusterd_rpc_friend_remove},
+ [GLUSTERD_FRIEND_UPDATE] = {"FRIEND_UPDATE", glusterd_rpc_friend_update},
+};
+
+struct rpc_clnt_procedure gd_mgmt_actors[GLUSTERD_MGMT_MAXVALUE] = {
+ [GLUSTERD_MGMT_NULL] = {"NULL", NULL },
+ [GLUSTERD_MGMT_CLUSTER_LOCK] = {"CLUSTER_LOCK", glusterd_cluster_lock},
+ [GLUSTERD_MGMT_CLUSTER_UNLOCK] = {"CLUSTER_UNLOCK", glusterd_cluster_unlock},
+ [GLUSTERD_MGMT_STAGE_OP] = {"STAGE_OP", glusterd_stage_op},
+ [GLUSTERD_MGMT_COMMIT_OP] = {"COMMIT_OP", glusterd_commit_op},
+};
+
+struct rpc_clnt_procedure gd_mgmt_v3_actors[GLUSTERD_MGMT_V3_MAXVALUE] = {
+ [GLUSTERD_MGMT_V3_NULL] = {"NULL", NULL },
+ [GLUSTERD_MGMT_V3_LOCK] = {"MGMT_V3_LOCK", glusterd_mgmt_v3_lock_peers},
+ [GLUSTERD_MGMT_V3_UNLOCK] = {"MGMT_V3_UNLOCK", glusterd_mgmt_v3_unlock_peers},
+};
+
+struct rpc_clnt_program gd_mgmt_prog = {
+ .progname = "glusterd mgmt",
+ .prognum = GD_MGMT_PROGRAM,
+ .progver = GD_MGMT_VERSION,
+ .proctable = gd_mgmt_actors,
+ .numproc = GLUSTERD_MGMT_MAXVALUE,
+};
+
+struct rpc_clnt_program gd_brick_prog = {
+ .progname = "brick operations",
+ .prognum = GD_BRICK_PROGRAM,
+ .progver = GD_BRICK_VERSION,
+ .proctable = gd_brick_actors,
+ .numproc = GLUSTERD_BRICK_MAXVALUE,
+};
+
+struct rpc_clnt_program gd_peer_prog = {
+ .progname = "Peer mgmt",
+ .prognum = GD_FRIEND_PROGRAM,
+ .progver = GD_FRIEND_VERSION,
+ .proctable = gd_peer_actors,
+ .numproc = GLUSTERD_FRIEND_MAXVALUE,
+};
+
+struct rpc_clnt_program gd_mgmt_v3_prog = {
+ .progname = "glusterd mgmt v3",
+ .prognum = GD_MGMT_PROGRAM,
+ .progver = GD_MGMT_V3_VERSION,
+ .proctable = gd_mgmt_v3_actors,
+ .numproc = GLUSTERD_MGMT_V3_MAXVALUE,
+};
diff --git a/xlators/mgmt/glusterd/src/glusterd-scrub-svc.c b/xlators/mgmt/glusterd/src/glusterd-scrub-svc.c
new file mode 100644
index 00000000000..3761dbadfd1
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-scrub-svc.c
@@ -0,0 +1,207 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "globals.h"
+#include "run.h"
+#include "glusterd.h"
+#include "glusterd-utils.h"
+#include "glusterd-volgen.h"
+#include "glusterd-scrub-svc.h"
+#include "glusterd-svc-helper.h"
+
+char *scrub_svc_name = "scrub";
+
+void
+glusterd_scrubsvc_build (glusterd_svc_t *svc)
+{
+ svc->manager = glusterd_scrubsvc_manager;
+ svc->start = glusterd_scrubsvc_start;
+ svc->stop = glusterd_scrubsvc_stop;
+}
+
+int
+glusterd_scrubsvc_init (glusterd_svc_t *svc)
+{
+ return glusterd_svc_init (svc, scrub_svc_name);
+}
+
+static int
+glusterd_scrubsvc_create_volfile ()
+{
+ char filepath[PATH_MAX] = {0,};
+ int ret = -1;
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ glusterd_svc_build_volfile_path (scrub_svc_name, conf->workdir,
+ filepath, sizeof (filepath));
+
+ ret = glusterd_create_global_volfile (build_scrub_graph,
+ filepath, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLFILE_CREATE_FAIL, "Failed to create volfile");
+ goto out;
+ }
+
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+int
+glusterd_scrubsvc_manager (glusterd_svc_t *svc, void *data, int flags)
+{
+ int ret = -EINVAL;
+
+ if (!svc->inited) {
+ ret = glusterd_scrubsvc_init (svc);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_SCRUB_INIT_FAIL, "Failed to init "
+ "scrub service");
+ goto out;
+ } else {
+ svc->inited = _gf_true;
+ gf_msg_debug (THIS->name, 0, "scrub service "
+ "initialized");
+ }
+ }
+
+ if (glusterd_should_i_stop_bitd ()) {
+ ret = svc->stop (svc, SIGTERM);
+ } else {
+ ret = glusterd_scrubsvc_create_volfile ();
+ if (ret)
+ goto out;
+
+ ret = svc->stop (svc, SIGKILL);
+ if (ret)
+ goto out;
+
+ ret = svc->start (svc, flags);
+ if (ret)
+ goto out;
+
+ ret = glusterd_conn_connect (&(svc->conn));
+ if (ret)
+ goto out;
+ }
+
+out:
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+int
+glusterd_scrubsvc_start (glusterd_svc_t *svc, int flags)
+{
+ int ret = -1;
+ dict_t *cmdict = NULL;
+
+ cmdict = dict_new ();
+ if (!cmdict)
+ goto error_return;
+
+ ret = dict_set_str (cmdict, "cmdarg0", "--global-timer-wheel");
+ if (ret)
+ goto dealloc_dict;
+
+ ret = glusterd_svc_start (svc, flags, cmdict);
+
+ dealloc_dict:
+ dict_unref (cmdict);
+ error_return:
+ return ret;
+}
+
+int
+glusterd_scrubsvc_stop (glusterd_svc_t *svc, int sig)
+{
+ return glusterd_svc_stop (svc, sig);
+}
+
+int
+glusterd_scrubsvc_reconfigure ()
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ gf_boolean_t identical = _gf_false;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO (this->name, this, out);
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, priv, out);
+
+ if (glusterd_should_i_stop_bitd ())
+ goto manager;
+
+
+ /*
+ * Check both OLD and NEW volfiles, if they are SAME by size
+ * and cksum i.e. "character-by-character". If YES, then
+ * NOTHING has been changed, just return.
+ */
+ ret = glusterd_svc_check_volfile_identical (priv->scrub_svc.name,
+ build_scrub_graph,
+ &identical);
+ if (ret)
+ goto out;
+
+ if (identical) {
+ ret = 0;
+ goto out;
+ }
+
+ /*
+ * They are not identical. Find out if the topology is changed
+ * OR just the volume options. If just the options which got
+ * changed, then inform the xlator to reconfigure the options.
+ */
+ identical = _gf_false; /* RESET the FLAG */
+ ret = glusterd_svc_check_topology_identical (priv->scrub_svc.name,
+ build_scrub_graph,
+ &identical);
+ if (ret)
+ goto out;
+
+ /* Topology is not changed, but just the options. But write the
+ * options to scrub volfile, so that scrub will be reconfigured.
+ */
+ if (identical) {
+ ret = glusterd_scrubsvc_create_volfile ();
+ if (ret == 0) {/* Only if above PASSES */
+ ret = glusterd_fetchspec_notify (THIS);
+ }
+ goto out;
+ }
+
+manager:
+ /*
+ * scrub volfile's topology has been changed. scrub server needs
+ * to be RESTARTED to ACT on the changed volfile.
+ */
+ ret = priv->scrub_svc.manager (&(priv->scrub_svc),
+ NULL,
+ PROC_START_NO_WAIT);
+
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-scrub-svc.h b/xlators/mgmt/glusterd/src/glusterd-scrub-svc.h
new file mode 100644
index 00000000000..dbdcf43529c
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-scrub-svc.h
@@ -0,0 +1,45 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERD_SCRUB_SVC_H_
+#define _GLUSTERD_SCRUB_SVC_H_
+
+#include "glusterd-svc-mgmt.h"
+
+typedef struct glusterd_scrubsvc_ glusterd_scrubsvc_t;
+
+struct glusterd_scrubsvc_{
+ glusterd_svc_t svc;
+ gf_store_handle_t *handle;
+};
+
+void
+glusterd_scrubsvc_build (glusterd_svc_t *svc);
+
+int
+glusterd_scrubsvc_init (glusterd_svc_t *svc);
+
+int
+glusterd_scrubsvc_manager (glusterd_svc_t *svc, void *data, int flags);
+
+int
+glusterd_scrubsvc_start (glusterd_svc_t *svc, int flags);
+
+int
+glusterd_scrubsvc_stop (glusterd_svc_t *svc, int sig);
+
+int
+glusterd_scrubsvc_reconfigure ();
+
+void
+glusterd_scrubsvc_build_volfile_path (char *server, char *workdir,
+ char *volfile, size_t len);
+
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-server-quorum.c b/xlators/mgmt/glusterd/src/glusterd-server-quorum.c
new file mode 100644
index 00000000000..ecf9d53b71e
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-server-quorum.c
@@ -0,0 +1,421 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "common-utils.h"
+#include "glusterd.h"
+#include "glusterd-utils.h"
+#include "glusterd-messages.h"
+#include "glusterd-server-quorum.h"
+#include "glusterd-syncop.h"
+#include "glusterd-op-sm.h"
+
+#define CEILING_POS(X) (((X)-(int)(X)) > 0 ? (int)((X)+1) : (int)(X))
+
+static gf_boolean_t
+glusterd_is_get_op (xlator_t *this, glusterd_op_t op, dict_t *dict)
+{
+ char *key = NULL;
+ char *volname = NULL;
+ int ret = 0;
+
+ if (op == GD_OP_STATUS_VOLUME)
+ return _gf_true;
+
+ if (op == GD_OP_SET_VOLUME) {
+ /*check for set volume help*/
+ ret = dict_get_str (dict, "volname", &volname);
+ if (volname &&
+ ((strcmp (volname, "help") == 0) ||
+ (strcmp (volname, "help-xml") == 0))) {
+ ret = dict_get_str (dict, "key1", &key);
+ if (ret < 0)
+ return _gf_true;
+ }
+ }
+ return _gf_false;
+}
+
+gf_boolean_t
+glusterd_is_quorum_validation_required (xlator_t *this, glusterd_op_t op,
+ dict_t *dict)
+{
+ gf_boolean_t required = _gf_true;
+ char *key = NULL;
+ char *key_fixed = NULL;
+ int ret = -1;
+
+ if (glusterd_is_get_op (this, op, dict)) {
+ required = _gf_false;
+ goto out;
+ }
+ if ((op != GD_OP_SET_VOLUME) && (op != GD_OP_RESET_VOLUME))
+ goto out;
+ if (op == GD_OP_SET_VOLUME)
+ ret = dict_get_str (dict, "key1", &key);
+ else if (op == GD_OP_RESET_VOLUME)
+ ret = dict_get_str (dict, "key", &key);
+ if (ret)
+ goto out;
+ ret = glusterd_check_option_exists (key, &key_fixed);
+ if (ret <= 0)
+ goto out;
+ if (key_fixed)
+ key = key_fixed;
+ if (glusterd_is_quorum_option (key))
+ required = _gf_false;
+out:
+ GF_FREE (key_fixed);
+ return required;
+}
+
+int
+glusterd_validate_quorum (xlator_t *this, glusterd_op_t op,
+ dict_t *dict, char **op_errstr)
+{
+ int ret = 0;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *errstr = NULL;
+
+ errstr = "Quorum not met. Volume operation not allowed.";
+ if (!glusterd_is_quorum_validation_required (this, op, dict))
+ goto out;
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
+
+ if (does_gd_meet_server_quorum (this)) {
+ ret = 0;
+ goto out;
+ }
+
+ if (glusterd_is_volume_in_server_quorum (volinfo)) {
+ ret = -1;
+ *op_errstr = gf_strdup (errstr);
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+gf_boolean_t
+glusterd_is_quorum_option (char *option)
+{
+ gf_boolean_t res = _gf_false;
+ int i = 0;
+ static const char * const keys[] = {GLUSTERD_QUORUM_TYPE_KEY,
+ GLUSTERD_QUORUM_RATIO_KEY,
+ NULL};
+
+ for (i = 0; keys[i]; i++) {
+ if (strcmp (option, keys[i]) == 0) {
+ res = _gf_true;
+ break;
+ }
+ }
+ return res;
+}
+
+gf_boolean_t
+glusterd_is_quorum_changed (dict_t *options, char *option, char *value)
+{
+ int ret = 0;
+ gf_boolean_t reconfigured = _gf_false;
+ gf_boolean_t all = _gf_false;
+ char *oldquorum = NULL;
+ char *newquorum = NULL;
+ char *oldratio = NULL;
+ char *newratio = NULL;
+
+ if ((strcmp ("all", option) != 0) &&
+ !glusterd_is_quorum_option (option))
+ goto out;
+
+ if (strcmp ("all", option) == 0)
+ all = _gf_true;
+
+ if (all || (strcmp (GLUSTERD_QUORUM_TYPE_KEY, option) == 0)) {
+ newquorum = value;
+ ret = dict_get_str (options, GLUSTERD_QUORUM_TYPE_KEY,
+ &oldquorum);
+ }
+
+ if (all || (strcmp (GLUSTERD_QUORUM_RATIO_KEY, option) == 0)) {
+ newratio = value;
+ ret = dict_get_str (options, GLUSTERD_QUORUM_RATIO_KEY,
+ &oldratio);
+ }
+
+ reconfigured = _gf_true;
+
+ if (oldquorum && newquorum && (strcmp (oldquorum, newquorum) == 0))
+ reconfigured = _gf_false;
+ if (oldratio && newratio && (strcmp (oldratio, newratio) == 0))
+ reconfigured = _gf_false;
+
+ if ((oldratio == NULL) && (newratio == NULL) && (oldquorum == NULL) &&
+ (newquorum == NULL))
+ reconfigured = _gf_false;
+out:
+ return reconfigured;
+}
+
+static gf_boolean_t
+_is_contributing_to_quorum (gd_quorum_contrib_t contrib)
+{
+ if ((contrib == QUORUM_UP) || (contrib == QUORUM_DOWN))
+ return _gf_true;
+ return _gf_false;
+}
+
+gf_boolean_t
+does_quorum_meet (int active_count, int quorum_count)
+{
+ return (active_count >= quorum_count);
+}
+
+int
+glusterd_get_quorum_cluster_counts (xlator_t *this, int *active_count,
+ int *quorum_count)
+{
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *conf = NULL;
+ int ret = -1;
+ int inquorum_count = 0;
+ char *val = NULL;
+ double quorum_percentage = 0.0;
+ gf_boolean_t ratio = _gf_false;
+ int count = 0;
+
+ conf = this->private;
+
+ /* Start with counting self */
+ inquorum_count = 1;
+ if (active_count)
+ *active_count = 1;
+
+ rcu_read_lock ();
+ cds_list_for_each_entry_rcu (peerinfo, &conf->peers, uuid_list) {
+ if (_is_contributing_to_quorum (peerinfo->quorum_contrib))
+ inquorum_count = inquorum_count + 1;
+ if (active_count && (peerinfo->quorum_contrib == QUORUM_UP))
+ *active_count = *active_count + 1;
+ }
+ rcu_read_unlock ();
+
+ ret = dict_get_str (conf->opts, GLUSTERD_QUORUM_RATIO_KEY, &val);
+ if (ret == 0) {
+ ratio = _gf_true;
+ ret = gf_string2percent (val, &quorum_percentage);
+ if (!ret)
+ ratio = _gf_true;
+ }
+ if (ratio)
+ count = CEILING_POS (inquorum_count *
+ quorum_percentage / 100.0);
+ else
+ count = (inquorum_count * 50 / 100) + 1;
+
+ *quorum_count = count;
+ ret = 0;
+
+ return ret;
+}
+
+gf_boolean_t
+glusterd_is_volume_in_server_quorum (glusterd_volinfo_t *volinfo)
+{
+ gf_boolean_t res = _gf_false;
+ char *quorum_type = NULL;
+ int ret = 0;
+
+ ret = dict_get_str (volinfo->dict, GLUSTERD_QUORUM_TYPE_KEY,
+ &quorum_type);
+ if (ret)
+ goto out;
+
+ if (strcmp (quorum_type, GLUSTERD_SERVER_QUORUM) == 0)
+ res = _gf_true;
+out:
+ return res;
+}
+
+gf_boolean_t
+glusterd_is_any_volume_in_server_quorum (xlator_t *this)
+{
+ glusterd_conf_t *conf = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ conf = this->private;
+ list_for_each_entry (volinfo, &conf->volumes, vol_list) {
+ if (glusterd_is_volume_in_server_quorum (volinfo)) {
+ return _gf_true;
+ }
+ }
+ return _gf_false;
+}
+
+gf_boolean_t
+does_gd_meet_server_quorum (xlator_t *this)
+{
+ int quorum_count = 0;
+ int active_count = 0;
+ gf_boolean_t in = _gf_false;
+ glusterd_conf_t *conf = NULL;
+ int ret = -1;
+
+ conf = this->private;
+ ret = glusterd_get_quorum_cluster_counts (this, &active_count,
+ &quorum_count);
+ if (ret)
+ goto out;
+
+ if (!does_quorum_meet (active_count, quorum_count)) {
+ goto out;
+ }
+
+ in = _gf_true;
+out:
+ return in;
+}
+
+void
+glusterd_do_volume_quorum_action (xlator_t *this, glusterd_volinfo_t *volinfo,
+ gf_boolean_t meets_quorum)
+{
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_conf_t *conf = NULL;
+ gd_quorum_status_t quorum_status = NOT_APPLICABLE_QUORUM;
+ gf_boolean_t follows_quorum = _gf_false;
+
+ conf = this->private;
+ if (volinfo->status != GLUSTERD_STATUS_STARTED) {
+ volinfo->quorum_status = NOT_APPLICABLE_QUORUM;
+ goto out;
+ }
+
+ follows_quorum = glusterd_is_volume_in_server_quorum (volinfo);
+ if (follows_quorum) {
+ if (meets_quorum)
+ quorum_status = MEETS_QUORUM;
+ else
+ quorum_status = DOESNT_MEET_QUORUM;
+ } else {
+ quorum_status = NOT_APPLICABLE_QUORUM;
+ }
+
+ /*
+ * The following check is added to prevent spurious brick starts when
+ * events occur that affect quorum.
+ * Example:
+ * There is a cluster of 10 peers. Volume is in quorum. User
+ * takes down one brick from the volume to perform maintenance.
+ * Suddenly one of the peers go down. Cluster is still in quorum. But
+ * because of this 'peer going down' event, quorum is calculated and
+ * the bricks that are down are brought up again. In this process it
+ * also brings up the brick that is purposefully taken down.
+ */
+ if (volinfo->quorum_status == quorum_status)
+ goto out;
+
+ if (quorum_status == MEETS_QUORUM) {
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_SERVER_QUORUM_MET_STARTING_BRICKS,
+ "Server quorum regained for volume %s. Starting local "
+ "bricks.", volinfo->volname);
+ } else if (quorum_status == DOESNT_MEET_QUORUM) {
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_SERVER_QUORUM_LOST_STOPPING_BRICKS,
+ "Server quorum lost for volume %s. Stopping local "
+ "bricks.", volinfo->volname);
+ }
+
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ if (!glusterd_is_local_brick (this, volinfo, brickinfo))
+ continue;
+ if (quorum_status == DOESNT_MEET_QUORUM)
+ glusterd_brick_stop (volinfo, brickinfo, _gf_false);
+ else
+ glusterd_brick_start (volinfo, brickinfo, _gf_false);
+ }
+ volinfo->quorum_status = quorum_status;
+out:
+ return;
+}
+
+int
+glusterd_do_quorum_action ()
+{
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ int ret = 0;
+ int active_count = 0;
+ int quorum_count = 0;
+ gf_boolean_t meets = _gf_false;
+
+ this = THIS;
+ conf = this->private;
+
+ conf->pending_quorum_action = _gf_true;
+ ret = glusterd_lock (conf->uuid);
+ if (ret)
+ goto out;
+
+ {
+ ret = glusterd_get_quorum_cluster_counts (this, &active_count,
+ &quorum_count);
+ if (ret)
+ goto unlock;
+
+ if (does_quorum_meet (active_count, quorum_count))
+ meets = _gf_true;
+ list_for_each_entry (volinfo, &conf->volumes, vol_list) {
+ glusterd_do_volume_quorum_action (this, volinfo, meets);
+ }
+ }
+unlock:
+ (void)glusterd_unlock (conf->uuid);
+ conf->pending_quorum_action = _gf_false;
+out:
+ return ret;
+}
+
+/* ret = 0 represents quorum is not met
+ * ret = 1 represents quorum is met
+ * ret = 2 represents quorum not applicable
+ */
+
+int
+check_quorum_for_brick_start (glusterd_volinfo_t *volinfo,
+ gf_boolean_t node_quorum)
+{
+ gf_boolean_t volume_quorum = _gf_false;
+ int ret = 0;
+
+ volume_quorum = glusterd_is_volume_in_server_quorum (volinfo);
+ if (volume_quorum) {
+ if (node_quorum)
+ ret = 1;
+ } else {
+ ret = 2;
+ }
+ return ret;
+}
+
diff --git a/xlators/mgmt/glusterd/src/glusterd-server-quorum.h b/xlators/mgmt/glusterd/src/glusterd-server-quorum.h
new file mode 100644
index 00000000000..ea6a8bd6158
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-server-quorum.h
@@ -0,0 +1,46 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _GLUSTERD_SERVER_QUORUM_H
+#define _GLUSTERD_SERVER_QUORUM_H
+
+int
+glusterd_validate_quorum (xlator_t *this, glusterd_op_t op, dict_t *dict,
+ char **op_errstr);
+
+gf_boolean_t
+glusterd_is_quorum_changed (dict_t *options, char *option, char *value);
+
+int
+glusterd_do_quorum_action ();
+
+int
+glusterd_get_quorum_cluster_counts (xlator_t *this, int *active_count,
+ int *quorum_count);
+
+gf_boolean_t
+glusterd_is_quorum_option (char *option);
+
+gf_boolean_t
+glusterd_is_volume_in_server_quorum (glusterd_volinfo_t *volinfo);
+
+gf_boolean_t
+glusterd_is_any_volume_in_server_quorum (xlator_t *this);
+
+gf_boolean_t
+does_gd_meet_server_quorum (xlator_t *this);
+
+int
+check_quorum_for_brick_start (glusterd_volinfo_t *volinfo,
+ gf_boolean_t node_quorum);
+
+gf_boolean_t
+does_quorum_meet (int active_count, int quorum_count);
+
+#endif /* _GLUSTERD_SERVER_QUORUM_H */
diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
new file mode 100644
index 00000000000..0e664b5c786
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
@@ -0,0 +1,250 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "globals.h"
+#include "run.h"
+#include "glusterd.h"
+#include "glusterd-utils.h"
+#include "glusterd-volgen.h"
+#include "glusterd-svc-mgmt.h"
+#include "glusterd-shd-svc.h"
+#include "glusterd-svc-helper.h"
+
+char *shd_svc_name = "glustershd";
+
+void
+glusterd_shdsvc_build (glusterd_svc_t *svc)
+{
+ svc->manager = glusterd_shdsvc_manager;
+ svc->start = glusterd_shdsvc_start;
+ svc->stop = glusterd_svc_stop;
+}
+
+int
+glusterd_shdsvc_init (glusterd_svc_t *svc)
+{
+ return glusterd_svc_init (svc, shd_svc_name);
+}
+
+static int
+glusterd_shdsvc_create_volfile ()
+{
+ char filepath[PATH_MAX] = {0,};
+ int ret = -1;
+ glusterd_conf_t *conf = THIS->private;
+ dict_t *mod_dict = NULL;
+
+ mod_dict = dict_new ();
+ if (!mod_dict)
+ goto out;
+
+ ret = dict_set_uint32 (mod_dict, "cluster.background-self-heal-count",
+ 0);
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (mod_dict, "cluster.data-self-heal", "on");
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (mod_dict, "cluster.metadata-self-heal", "on");
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (mod_dict, "cluster.entry-self-heal", "on");
+ if (ret)
+ goto out;
+
+ glusterd_svc_build_volfile_path (shd_svc_name, conf->workdir,
+ filepath, sizeof (filepath));
+ ret = glusterd_create_global_volfile (build_shd_graph, filepath,
+ mod_dict);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLFILE_CREATE_FAIL, "Failed to create volfile");
+ goto out;
+ }
+
+out:
+ if (mod_dict)
+ dict_unref (mod_dict);
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+int
+glusterd_shdsvc_manager (glusterd_svc_t *svc, void *data, int flags)
+{
+ int ret = 0;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ if (!svc->inited) {
+ ret = glusterd_shdsvc_init (svc);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_FAILED_INIT_SHDSVC, "Failed to init shd "
+ "service");
+ goto out;
+ } else {
+ svc->inited = _gf_true;
+ gf_msg_debug (THIS->name, 0, "shd service initialized");
+ }
+ }
+
+ volinfo = data;
+
+ /* If all the volumes are stopped or all shd compatible volumes
+ * are stopped then stop the service if:
+ * - volinfo is NULL or
+ * - volinfo is present and volume is shd compatible
+ * Otherwise create volfile and restart service if:
+ * - volinfo is NULL or
+ * - volinfo is present and volume is shd compatible
+ */
+ if (glusterd_are_all_volumes_stopped () ||
+ glusterd_all_shd_compatible_volumes_stopped ()) {
+ if (!(volinfo &&
+ !glusterd_is_shd_compatible_volume (volinfo))) {
+ ret = svc->stop (svc, SIGTERM);
+ }
+ } else {
+ if (!(volinfo &&
+ !glusterd_is_shd_compatible_volume (volinfo))) {
+ ret = glusterd_shdsvc_create_volfile ();
+ if (ret)
+ goto out;
+
+ ret = svc->stop (svc, SIGTERM);
+ if (ret)
+ goto out;
+
+ ret = svc->start (svc, flags);
+ if (ret)
+ goto out;
+
+ ret = glusterd_conn_connect (&(svc->conn));
+ if (ret)
+ goto out;
+ }
+ }
+out:
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+int
+glusterd_shdsvc_start (glusterd_svc_t *svc, int flags)
+{
+ int ret = -1;
+ char glusterd_uuid_option[PATH_MAX] = {0};
+ dict_t *cmdline = NULL;
+
+ cmdline = dict_new ();
+ if (!cmdline)
+ goto out;
+
+ ret = snprintf (glusterd_uuid_option, sizeof (glusterd_uuid_option),
+ "*replicate*.node-uuid=%s", uuid_utoa (MY_UUID));
+ if (ret < 0)
+ goto out;
+
+ /* Pass cmdline arguments as key-value pair. The key is merely
+ * a carrier and is not used. Since dictionary follows LIFO the value
+ * should be put in reverse order*/
+ ret = dict_set_str (cmdline, "arg2", glusterd_uuid_option);
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (cmdline, "arg1", "--xlator-option");
+ if (ret)
+ goto out;
+
+ ret = glusterd_svc_start (svc, flags, cmdline);
+
+out:
+ if (cmdline)
+ dict_unref (cmdline);
+
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+
+int
+glusterd_shdsvc_reconfigure ()
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ gf_boolean_t identical = _gf_false;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO (this->name, this, out);
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, priv, out);
+
+ if (glusterd_all_shd_compatible_volumes_stopped ())
+ goto manager;
+
+ /*
+ * Check both OLD and NEW volfiles, if they are SAME by size
+ * and cksum i.e. "character-by-character". If YES, then
+ * NOTHING has been changed, just return.
+ */
+ ret = glusterd_svc_check_volfile_identical (priv->shd_svc.name,
+ build_shd_graph,
+ &identical);
+ if (ret)
+ goto out;
+
+ if (identical) {
+ ret = 0;
+ goto out;
+ }
+
+ /*
+ * They are not identical. Find out if the topology is changed
+ * OR just the volume options. If just the options which got
+ * changed, then inform the xlator to reconfigure the options.
+ */
+ identical = _gf_false; /* RESET the FLAG */
+ ret = glusterd_svc_check_topology_identical (priv->shd_svc.name,
+ build_shd_graph,
+ &identical);
+ if (ret)
+ goto out;
+
+ /* Topology is not changed, but just the options. But write the
+ * options to shd volfile, so that shd will be reconfigured.
+ */
+ if (identical) {
+ ret = glusterd_shdsvc_create_volfile ();
+ if (ret == 0) {/* Only if above PASSES */
+ ret = glusterd_fetchspec_notify (THIS);
+ }
+ goto out;
+ }
+manager:
+ /*
+ * shd volfile's topology has been changed. shd server needs
+ * to be RESTARTED to ACT on the changed volfile.
+ */
+ ret = priv->shd_svc.manager (&(priv->shd_svc), NULL,
+ PROC_START_NO_WAIT);
+
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.h b/xlators/mgmt/glusterd/src/glusterd-shd-svc.h
new file mode 100644
index 00000000000..38a3fd1afd1
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.h
@@ -0,0 +1,30 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERD_SHD_SVC_H_
+#define _GLUSTERD_SHD_SVC_H_
+
+#include "glusterd-svc-mgmt.h"
+
+void
+glusterd_shdsvc_build (glusterd_svc_t *svc);
+
+int
+glusterd_shdsvc_init (glusterd_svc_t *svc);
+
+int
+glusterd_shdsvc_manager (glusterd_svc_t *svc, void *data, int flags);
+
+int
+glusterd_shdsvc_start (glusterd_svc_t *svc, int flags);
+
+int
+glusterd_shdsvc_reconfigure ();
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c
new file mode 100644
index 00000000000..c1fb3181b90
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-sm.c
@@ -0,0 +1,1500 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <time.h>
+#include <sys/uio.h>
+#include <sys/resource.h>
+
+#include <libgen.h>
+#include "compat-uuid.h"
+
+#include "fnmatch.h"
+#include "xlator.h"
+#include "protocol-common.h"
+#include "glusterd.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "list.h"
+#include "glusterd-messages.h"
+#include "dict.h"
+#include "compat.h"
+#include "compat-errno.h"
+#include "statedump.h"
+#include "glusterd-sm.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-utils.h"
+#include "glusterd-store.h"
+#include "glusterd-svc-helper.h"
+#include "glusterd-snapshot-utils.h"
+#include "glusterd-server-quorum.h"
+
+char local_node_hostname[PATH_MAX] = {0, };
+
+static struct cds_list_head gd_friend_sm_queue;
+
+static char *glusterd_friend_sm_state_names[] = {
+ "Establishing Connection",
+ "Probe Sent to Peer",
+ "Probe Received from Peer",
+ "Peer in Cluster",
+ "Accepted peer request",
+ "Sent and Received peer request",
+ "Peer Rejected",
+ "Peer detach in progress",
+ "Probe Received from peer",
+ "Connected to Peer",
+ "Peer is connected and Accepted",
+ "Invalid State"
+};
+
+static char *glusterd_friend_sm_event_names[] = {
+ "GD_FRIEND_EVENT_NONE",
+ "GD_FRIEND_EVENT_PROBE",
+ "GD_FRIEND_EVENT_INIT_FRIEND_REQ",
+ "GD_FRIEND_EVENT_RCVD_ACC",
+ "GD_FRIEND_EVENT_LOCAL_ACC",
+ "GD_FRIEND_EVENT_RCVD_RJT",
+ "GD_FRIEND_EVENT_LOCAL_RJT",
+ "GD_FRIEND_EVENT_RCVD_FRIEND_REQ",
+ "GD_FRIEND_EVENT_INIT_REMOVE_FRIEND",
+ "GD_FRIEND_EVENT_RCVD_REMOVE_FRIEND",
+ "GD_FRIEND_EVENT_REMOVE_FRIEND",
+ "GD_FRIEND_EVENT_CONNECTED",
+ "GD_FRIEND_EVENT_NEW_NAME",
+ "GD_FRIEND_EVENT_MAX"
+};
+
+char*
+glusterd_friend_sm_state_name_get (int state)
+{
+ if (state < 0 || state >= GD_FRIEND_STATE_MAX)
+ return glusterd_friend_sm_state_names[GD_FRIEND_STATE_MAX];
+ return glusterd_friend_sm_state_names[state];
+}
+
+char*
+glusterd_friend_sm_event_name_get (int event)
+{
+ if (event < 0 || event >= GD_FRIEND_EVENT_MAX)
+ return glusterd_friend_sm_event_names[GD_FRIEND_EVENT_MAX];
+ return glusterd_friend_sm_event_names[event];
+}
+
+void
+glusterd_destroy_probe_ctx (glusterd_probe_ctx_t *ctx)
+{
+ if (!ctx)
+ return;
+
+ GF_FREE (ctx->hostname);
+ GF_FREE (ctx);
+}
+
+void
+glusterd_destroy_friend_req_ctx (glusterd_friend_req_ctx_t *ctx)
+{
+ if (!ctx)
+ return;
+
+ if (ctx->vols)
+ dict_unref (ctx->vols);
+ GF_FREE (ctx->hostname);
+ GF_FREE (ctx);
+}
+
+void
+glusterd_destroy_friend_update_ctx (glusterd_friend_update_ctx_t *ctx)
+{
+ if (!ctx)
+ return;
+ GF_FREE (ctx->hostname);
+ GF_FREE (ctx);
+}
+
+int
+glusterd_broadcast_friend_delete (char *hostname, uuid_t uuid)
+{
+ int ret = 0;
+ rpc_clnt_procedure_t *proc = NULL;
+ xlator_t *this = NULL;
+ glusterd_friend_update_ctx_t ctx = {{0},};
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ dict_t *friends = NULL;
+ char key[100] = {0,};
+ int32_t count = 0;
+
+ this = THIS;
+ priv = this->private;
+
+ GF_ASSERT (priv);
+
+ ctx.hostname = hostname;
+ ctx.op = GD_FRIEND_UPDATE_DEL;
+
+ friends = dict_new ();
+ if (!friends)
+ goto out;
+
+ snprintf (key, sizeof (key), "op");
+ ret = dict_set_int32 (friends, key, ctx.op);
+ if (ret)
+ goto out;
+
+ snprintf (key, sizeof (key), "hostname");
+ ret = dict_set_str (friends, key, hostname);
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32 (friends, "count", count);
+ if (ret)
+ goto out;
+
+ rcu_read_lock ();
+ cds_list_for_each_entry_rcu (peerinfo, &priv->peers, uuid_list) {
+ if (!peerinfo->connected || !peerinfo->peer)
+ continue;
+
+ /* Setting a direct reference to peerinfo in the dict is okay as
+ * it is only going to be used within this read critical section
+ * (in glusterd_rpc_friend_update)
+ */
+ ret = dict_set_static_ptr (friends, "peerinfo", peerinfo);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "failed to set peerinfo");
+ goto unlock;
+ }
+
+ proc = &peerinfo->peer->proctable[GLUSTERD_FRIEND_UPDATE];
+ if (proc->fn) {
+ ret = proc->fn (NULL, this, friends);
+ }
+ }
+unlock:
+ rcu_read_unlock ();
+
+ gf_msg_debug ("glusterd", 0, "Returning with %d", ret);
+
+out:
+ if (friends)
+ dict_unref (friends);
+
+ return ret;
+}
+
+
+static int
+glusterd_ac_none (glusterd_friend_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+
+ gf_msg_debug ("glusterd", 0, "Returning with %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_ac_error (glusterd_friend_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_AC_ERROR, "Received event %d ", event->event);
+
+ return ret;
+}
+
+static int
+glusterd_ac_reverse_probe_begin (glusterd_friend_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_friend_sm_event_t *new_event = NULL;
+ glusterd_probe_ctx_t *new_ev_ctx = NULL;
+
+ GF_ASSERT (event);
+ GF_ASSERT (ctx);
+
+ rcu_read_lock ();
+
+ peerinfo = glusterd_peerinfo_find (event->peerid, event->peername);
+ if (!peerinfo) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_PEER_NOT_FOUND, "Could not find peer %s(%s)",
+ event->peername, uuid_utoa (event->peerid));
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_friend_sm_new_event
+ (GD_FRIEND_EVENT_PROBE, &new_event);
+
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_EVENT_NEW_GET_FAIL,
+ "Unable to get new new_event");
+ ret = -1;
+ goto out;
+ }
+
+ new_ev_ctx = GF_CALLOC (1, sizeof(*new_ev_ctx), gf_gld_mt_probe_ctx_t);
+
+ if (!new_ev_ctx) {
+ ret = -1;
+ goto out;
+ }
+
+ new_ev_ctx->hostname = gf_strdup (peerinfo->hostname);
+ new_ev_ctx->port = peerinfo->port;
+ new_ev_ctx->req = NULL;
+
+ new_event->peername = gf_strdup (peerinfo->hostname);
+ gf_uuid_copy (new_event->peerid, peerinfo->uuid);
+ new_event->ctx = new_ev_ctx;
+
+ ret = glusterd_friend_sm_inject_event (new_event);
+
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_EVENT_INJECT_FAIL,
+ "Unable to inject new_event %d, "
+ "ret = %d", new_event->event, ret);
+ }
+
+out:
+ rcu_read_unlock ();
+
+ if (ret) {
+ if (new_event)
+ GF_FREE (new_event->peername);
+ GF_FREE (new_event);
+ if (new_ev_ctx)
+ GF_FREE (new_ev_ctx->hostname);
+ GF_FREE (new_ev_ctx);
+ }
+ gf_msg_debug ("glusterd", 0, "returning with %d", ret);
+ return ret;
+}
+
+static int
+glusterd_ac_friend_add (glusterd_friend_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+
+ GF_ASSERT (event);
+
+ this = THIS;
+ conf = this->private;
+
+ GF_ASSERT (conf);
+
+ rcu_read_lock ();
+
+ peerinfo = glusterd_peerinfo_find (event->peerid, event->peername);
+ if (!peerinfo) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_PEER_NOT_FOUND,
+ "Could not find peer %s(%s)",
+ event->peername, uuid_utoa (event->peerid));
+ goto out;
+ }
+
+ if (!peerinfo->peer)
+ goto out;
+ proc = &peerinfo->peer->proctable[GLUSTERD_FRIEND_ADD];
+ if (proc->fn) {
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame) {
+ goto out;
+ }
+ frame->local = ctx;
+ ret = proc->fn (frame, this, event);
+ }
+
+out:
+ rcu_read_unlock ();
+
+ if (ret && frame)
+ STACK_DESTROY (frame->root);
+
+ gf_msg_debug ("glusterd", 0, "Returning with %d", ret);
+ return ret;
+}
+
+static int
+glusterd_ac_friend_probe (glusterd_friend_sm_event_t *event, void *ctx)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ glusterd_probe_ctx_t *probe_ctx = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ dict_t *dict = NULL;
+
+ GF_ASSERT (ctx);
+
+ probe_ctx = ctx;
+
+ this = THIS;
+
+ GF_ASSERT (this);
+
+ conf = this->private;
+
+ GF_ASSERT (conf);
+
+ rcu_read_lock ();
+ peerinfo = glusterd_peerinfo_find (NULL, probe_ctx->hostname);
+ if (peerinfo == NULL) {
+ //We should not reach this state ideally
+ ret = -1;
+ goto out;
+ }
+
+ if (!peerinfo->peer)
+ goto out;
+ proc = &peerinfo->peer->proctable[GLUSTERD_PROBE_QUERY];
+ if (proc->fn) {
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame) {
+ goto out;
+ }
+ frame->local = ctx;
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+ ret = dict_set_str (dict, "hostname", probe_ctx->hostname);
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32 (dict, "port", probe_ctx->port);
+ if (ret)
+ goto out;
+
+ /* The peerinfo reference being set here is going to be used
+ * only within this critical section, in glusterd_rpc_probe
+ * (ie. proc->fn).
+ */
+ ret = dict_set_static_ptr (dict, "peerinfo", peerinfo);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "failed to set peerinfo");
+ goto out;
+ }
+
+ ret = proc->fn (frame, this, dict);
+ if (ret)
+ goto out;
+
+ }
+
+out:
+ rcu_read_unlock ();
+
+ if (dict)
+ dict_unref (dict);
+ gf_msg_debug ("glusterd", 0, "Returning with %d", ret);
+
+ if (ret && frame)
+ STACK_DESTROY (frame->root);
+
+ return ret;
+}
+
+static int
+glusterd_ac_send_friend_remove_req (glusterd_friend_sm_event_t *event,
+ void *data)
+{
+ int ret = 0;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ glusterd_friend_sm_event_type_t event_type = GD_FRIEND_EVENT_NONE;
+ glusterd_probe_ctx_t *ctx = NULL;
+ glusterd_friend_sm_event_t *new_event = NULL;
+
+ GF_ASSERT (event);
+
+ this = THIS;
+ conf = this->private;
+
+ GF_ASSERT (conf);
+
+ rcu_read_lock ();
+
+ peerinfo = glusterd_peerinfo_find (event->peerid, event->peername);
+ if (!peerinfo) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_PEER_NOT_FOUND, "Could not find peer %s(%s)",
+ event->peername, uuid_utoa (event->peerid));
+ goto out;
+ }
+ ctx = event->ctx;
+
+ if (!peerinfo->connected) {
+ event_type = GD_FRIEND_EVENT_REMOVE_FRIEND;
+
+ ret = glusterd_friend_sm_new_event (event_type, &new_event);
+
+ if (!ret) {
+ new_event->peername = peerinfo->hostname;
+ gf_uuid_copy (new_event->peerid, peerinfo->uuid);
+ ret = glusterd_friend_sm_inject_event (new_event);
+ } else {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_EVENT_NEW_GET_FAIL,
+ "Unable to get event");
+ }
+
+ if (ctx) {
+ ret = glusterd_xfer_cli_deprobe_resp (ctx->req, ret, 0,
+ NULL,
+ ctx->hostname,
+ ctx->dict);
+ glusterd_broadcast_friend_delete (ctx->hostname, NULL);
+ glusterd_destroy_probe_ctx (ctx);
+ }
+ goto out;
+ }
+
+ if (!peerinfo->peer)
+ goto out;
+ proc = &peerinfo->peer->proctable[GLUSTERD_FRIEND_REMOVE];
+ if (proc->fn) {
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame) {
+ goto out;
+ }
+ frame->local = data;
+ ret = proc->fn (frame, this, event);
+ }
+
+out:
+ rcu_read_unlock ();
+
+ gf_msg_debug ("glusterd", 0, "Returning with %d", ret);
+
+ if (ret && frame)
+ STACK_DESTROY (frame->root);
+
+ return ret;
+}
+
+static gf_boolean_t
+glusterd_should_update_peer (glusterd_peerinfo_t *peerinfo,
+ glusterd_peerinfo_t *cur_peerinfo)
+{
+ gf_boolean_t is_valid = _gf_false;
+
+ if ((peerinfo == cur_peerinfo) ||
+ (peerinfo->state.state == GD_FRIEND_STATE_BEFRIENDED))
+ is_valid = _gf_true;
+
+ return is_valid;
+}
+
+static int
+glusterd_ac_send_friend_update (glusterd_friend_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+ glusterd_peerinfo_t *cur_peerinfo = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ xlator_t *this = NULL;
+ glusterd_friend_update_ctx_t ev_ctx = {{0}};
+ glusterd_conf_t *priv = NULL;
+ dict_t *friends = NULL;
+ char key[100] = {0,};
+ int32_t count = 0;
+
+ GF_ASSERT (event);
+
+ this = THIS;
+ priv = this->private;
+
+ GF_ASSERT (priv);
+
+ rcu_read_lock ();
+
+ cur_peerinfo = glusterd_peerinfo_find (event->peerid, event->peername);
+ if (!cur_peerinfo) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_PEER_NOT_FOUND, "Could not find peer %s(%s)",
+ event->peername, uuid_utoa (event->peerid));
+ ret = -1;
+ goto out;
+ }
+
+ ev_ctx.op = GD_FRIEND_UPDATE_ADD;
+
+ friends = dict_new ();
+ if (!friends)
+ goto out;
+
+ snprintf (key, sizeof (key), "op");
+ ret = dict_set_int32 (friends, key, ev_ctx.op);
+ if (ret)
+ goto out;
+
+ cds_list_for_each_entry_rcu (peerinfo, &priv->peers, uuid_list) {
+ if (!glusterd_should_update_peer (peerinfo, cur_peerinfo))
+ continue;
+
+ count++;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "friend%d", count);
+ ret = gd_add_friend_to_dict (peerinfo, friends, key);
+ if (ret)
+ goto out;
+ }
+
+ ret = dict_set_int32 (friends, "count", count);
+ if (ret)
+ goto out;
+
+ cds_list_for_each_entry_rcu (peerinfo, &priv->peers, uuid_list) {
+ if (!peerinfo->connected || !peerinfo->peer)
+ continue;
+
+ if (!glusterd_should_update_peer (peerinfo, cur_peerinfo))
+ continue;
+
+ ret = dict_set_static_ptr (friends, "peerinfo", peerinfo);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "failed to set peerinfo");
+ goto out;
+ }
+
+ proc = &peerinfo->peer->proctable[GLUSTERD_FRIEND_UPDATE];
+ if (proc->fn) {
+ ret = proc->fn (NULL, this, friends);
+ }
+ }
+
+ gf_msg_debug ("glusterd", 0, "Returning with %d", ret);
+
+out:
+ rcu_read_unlock ();
+
+ if (friends)
+ dict_unref (friends);
+
+ return ret;
+}
+
+/* ac_update_friend only sends friend update to the friend that caused this
+ * event to happen
+ */
+static int
+glusterd_ac_update_friend (glusterd_friend_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+ glusterd_peerinfo_t *cur_peerinfo = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ xlator_t *this = NULL;
+ glusterd_friend_update_ctx_t ev_ctx = {{0}};
+ glusterd_conf_t *priv = NULL;
+ dict_t *friends = NULL;
+ char key[100] = {0,};
+ int32_t count = 0;
+
+ GF_ASSERT (event);
+
+ this = THIS;
+ priv = this->private;
+
+ GF_ASSERT (priv);
+
+ rcu_read_lock ();
+
+ cur_peerinfo = glusterd_peerinfo_find (event->peerid, event->peername);
+ if (!cur_peerinfo) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_PEER_NOT_FOUND, "Could not find peer %s(%s)",
+ event->peername, uuid_utoa (event->peerid));
+ ret = -1;
+ goto out;
+ }
+
+ /* Bail out early if peer is not connected.
+ * We cannot send requests to the peer until we have established our
+ * client connection to it.
+ */
+ if (!cur_peerinfo->connected || !cur_peerinfo->peer) {
+ ret = 0;
+ goto out;
+ }
+
+ ev_ctx.op = GD_FRIEND_UPDATE_ADD;
+
+ friends = dict_new ();
+ if (!friends)
+ goto out;
+
+ snprintf (key, sizeof (key), "op");
+ ret = dict_set_int32 (friends, key, ev_ctx.op);
+ if (ret)
+ goto out;
+
+ cds_list_for_each_entry_rcu (peerinfo, &priv->peers, uuid_list) {
+ if (!glusterd_should_update_peer (peerinfo, cur_peerinfo))
+ continue;
+
+ count++;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "friend%d", count);
+ ret = gd_add_friend_to_dict (peerinfo, friends, key);
+ if (ret)
+ goto out;
+ }
+
+ ret = dict_set_int32 (friends, "count", count);
+ if (ret)
+ goto out;
+
+ ret = dict_set_static_ptr (friends, "peerinfo", cur_peerinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "failed to set peerinfo");
+ goto out;
+ }
+
+ proc = &cur_peerinfo->peer->proctable[GLUSTERD_FRIEND_UPDATE];
+ if (proc->fn)
+ ret = proc->fn (NULL, this, friends);
+
+ gf_msg_debug (this->name, 0, "Returning with %d", ret);
+
+out:
+ rcu_read_unlock ();
+
+ if (friends)
+ dict_unref (friends);
+
+ return ret;
+}
+
+/* Clean up stale volumes on the peer being detached. The volumes which have
+ * bricks on other peers are stale with respect to the detached peer.
+ */
+static void
+glusterd_peer_detach_cleanup (glusterd_conf_t *priv)
+{
+ int ret = -1;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_volinfo_t *tmp_volinfo = NULL;
+ glusterd_svc_t *svc = NULL;
+
+ GF_ASSERT (priv);
+
+ cds_list_for_each_entry_safe (volinfo, tmp_volinfo, &priv->volumes,
+ vol_list) {
+ /* The peer detach checks make sure that, at this point in the
+ * detach process, there are only volumes contained completely
+ * within or completely outside the detached peer.
+ * The only stale volumes at this point are the ones
+ * completely outside the peer and can be safely deleted.
+ */
+ if (!glusterd_friend_contains_vol_bricks (volinfo,
+ MY_UUID)) {
+ gf_msg (THIS->name, GF_LOG_INFO, 0,
+ GD_MSG_STALE_VOL_DELETE_INFO,
+ "Deleting stale volume %s", volinfo->volname);
+
+ /*Stop snapd daemon service if snapd daemon is running*/
+ if (!volinfo->is_snap_volume) {
+ svc = &(volinfo->snapd.svc);
+ ret = svc->stop (svc, SIGTERM);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_SVC_STOP_FAIL, "Failed "
+ "to stop snapd daemon service");
+ }
+ }
+
+ ret = glusterd_cleanup_snaps_for_volume (volinfo);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_DELETE_FAIL,
+ "Error deleting snapshots for volume %s",
+ volinfo->volname);
+ }
+
+ ret = glusterd_delete_volume (volinfo);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_STALE_VOL_REMOVE_FAIL,
+ "Error deleting stale volume");
+ }
+ }
+ }
+
+ /*Reconfigure all daemon services upon peer detach*/
+ ret = glusterd_svcs_reconfigure ();
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_SVC_STOP_FAIL,
+ "Failed to reconfigure all daemon services.");
+ }
+}
+
+static int
+glusterd_ac_handle_friend_remove_req (glusterd_friend_sm_event_t *event,
+ void *ctx)
+{
+ int ret = 0;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_friend_req_ctx_t *ev_ctx = NULL;
+ glusterd_friend_sm_event_t *new_event = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ GF_ASSERT (ctx);
+ ev_ctx = ctx;
+
+ priv = THIS->private;
+ GF_ASSERT (priv);
+
+ ret = glusterd_xfer_friend_remove_resp (ev_ctx->req, ev_ctx->hostname,
+ ev_ctx->port);
+
+ rcu_read_lock ();
+ cds_list_for_each_entry_rcu (peerinfo, &priv->peers, uuid_list) {
+
+ ret = glusterd_friend_sm_new_event (GD_FRIEND_EVENT_REMOVE_FRIEND,
+ &new_event);
+ if (ret) {
+ rcu_read_unlock ();
+ goto out;
+ }
+
+ new_event->peername = gf_strdup (peerinfo->hostname);
+ gf_uuid_copy (new_event->peerid, peerinfo->uuid);
+
+ ret = glusterd_friend_sm_inject_event (new_event);
+ if (ret) {
+ rcu_read_unlock ();
+ goto out;
+ }
+
+ new_event = NULL;
+ }
+ rcu_read_unlock ();
+
+ glusterd_peer_detach_cleanup (priv);
+out:
+ if (new_event)
+ GF_FREE (new_event->peername);
+ GF_FREE (new_event);
+
+ gf_msg_debug (THIS->name, 0, "Returning with %d", ret);
+ return ret;
+}
+
+static int
+glusterd_ac_friend_remove (glusterd_friend_sm_event_t *event, void *ctx)
+{
+ int ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+
+ GF_ASSERT (event);
+
+ rcu_read_lock ();
+
+ peerinfo = glusterd_peerinfo_find (event->peerid, event->peername);
+ if (!peerinfo) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_PEER_NOT_FOUND,
+ "Could not find peer %s(%s)",
+ event->peername, uuid_utoa (event->peerid));
+ rcu_read_unlock ();
+ goto out;
+ }
+ ret = glusterd_friend_remove_cleanup_vols (peerinfo->uuid);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_WARNING, 0, GD_MSG_VOL_CLEANUP_FAIL,
+ "Volumes cleanup failed");
+
+ rcu_read_unlock ();
+ /* Exiting read critical section as glusterd_peerinfo_cleanup calls
+ * synchronize_rcu before freeing the peerinfo
+ */
+
+ ret = glusterd_peerinfo_cleanup (peerinfo);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_PEER_DETACH_CLEANUP_FAIL,
+ "Cleanup returned: %d", ret);
+ }
+out:
+ return 0;
+}
+
+/*static int
+glusterd_ac_none (void *ctx)
+{
+ int ret = 0;
+
+ gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+
+ return ret;
+}*/
+
+static int
+glusterd_ac_handle_friend_add_req (glusterd_friend_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+ uuid_t uuid;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_friend_req_ctx_t *ev_ctx = NULL;
+ glusterd_friend_update_ctx_t *new_ev_ctx = NULL;
+ glusterd_friend_sm_event_t *new_event = NULL;
+ glusterd_friend_sm_event_type_t event_type = GD_FRIEND_EVENT_NONE;
+ glusterd_conf_t *conf = NULL;
+ int status = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ xlator_t *this = NULL;
+ char *hostname = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (ctx);
+ ev_ctx = ctx;
+ gf_uuid_copy (uuid, ev_ctx->uuid);
+
+ rcu_read_lock ();
+ peerinfo = glusterd_peerinfo_find (event->peerid, event->peername);
+ if (!peerinfo) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_PEER_NOT_FOUND, "Could not find peer %s(%s)",
+ event->peername, uuid_utoa (event->peerid));
+ ret = -1;
+ rcu_read_unlock ();
+ goto out;
+ }
+
+ /* TODO: How do you do an atomic copy of uuid_t */
+ /* TODO: Updating within a read-critical section is also invalid
+ * Update properly with updater synchronization
+ */
+ gf_uuid_copy (peerinfo->uuid, ev_ctx->uuid);
+
+ rcu_read_unlock ();
+
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ /* Passing the peername from the event. glusterd_compare_friend_data
+ * updates volumes and will use synchronize_rcu. If we were to pass
+ * peerinfo->hostname, we would have to do it under a read critical
+ * section which would lead to a deadlock
+ */
+
+ //Build comparison logic here.
+ ret = glusterd_compare_friend_data (ev_ctx->vols, &status,
+ event->peername);
+ if (ret)
+ goto out;
+
+ if (GLUSTERD_VOL_COMP_RJT != status) {
+ event_type = GD_FRIEND_EVENT_LOCAL_ACC;
+ op_ret = 0;
+ } else {
+ event_type = GD_FRIEND_EVENT_LOCAL_RJT;
+ op_errno = GF_PROBE_VOLUME_CONFLICT;
+ op_ret = -1;
+ }
+
+ /* Compare missed_snapshot list with the peer *
+ * if volume comparison is successful */
+ if ((op_ret == 0) &&
+ (conf->op_version >= GD_OP_VERSION_3_6_0)) {
+ ret = glusterd_import_friend_missed_snap_list (ev_ctx->vols);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MISSED_SNAP_LIST_STORE_FAIL,
+ "Failed to import peer's "
+ "missed_snaps_list.");
+ event_type = GD_FRIEND_EVENT_LOCAL_RJT;
+ op_errno = GF_PROBE_MISSED_SNAP_CONFLICT;
+ op_ret = -1;
+ }
+
+ /* glusterd_compare_friend_snapshots and functions only require
+ * a peers hostname and uuid. It also does updates, which
+ * require use of synchronize_rcu. So we pass the hostname and
+ * id from the event instead of the peerinfo object to prevent
+ * deadlocks as above.
+ */
+ ret = glusterd_compare_friend_snapshots (ev_ctx->vols,
+ event->peername,
+ event->peerid);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_COMPARE_CONFLICT,
+ "Conflict in comparing peer's snapshots");
+ event_type = GD_FRIEND_EVENT_LOCAL_RJT;
+ op_errno = GF_PROBE_SNAP_CONFLICT;
+ op_ret = -1;
+ }
+ }
+
+ ret = glusterd_friend_sm_new_event (event_type, &new_event);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY, "Out of Memory");
+ }
+
+ new_event->peername = gf_strdup (event->peername);
+ gf_uuid_copy (new_event->peerid, event->peerid);
+
+ new_ev_ctx = GF_CALLOC (1, sizeof (*new_ev_ctx),
+ gf_gld_mt_friend_update_ctx_t);
+ if (!new_ev_ctx) {
+ ret = -1;
+ goto out;
+ }
+
+ gf_uuid_copy (new_ev_ctx->uuid, ev_ctx->uuid);
+ new_ev_ctx->hostname = gf_strdup (ev_ctx->hostname);
+ new_ev_ctx->op = GD_FRIEND_UPDATE_ADD;
+
+ new_event->ctx = new_ev_ctx;
+
+ ret = dict_get_str (ev_ctx->vols, "hostname_in_cluster",
+ &hostname);
+ if (ret || !hostname) {
+ gf_msg_debug (this->name, 0,
+ "Unable to fetch local hostname from peer");
+ } else
+ strncpy (local_node_hostname, hostname,
+ sizeof(local_node_hostname));
+
+ glusterd_friend_sm_inject_event (new_event);
+ new_event = NULL;
+
+ ret = glusterd_xfer_friend_add_resp (ev_ctx->req, ev_ctx->hostname,
+ event->peername, ev_ctx->port,
+ op_ret, op_errno);
+
+out:
+ if (new_event)
+ GF_FREE (new_event->peername);
+ GF_FREE (new_event);
+
+ gf_msg_debug ("glusterd", 0, "Returning with %d", ret);
+ return ret;
+}
+
+static int
+glusterd_friend_sm_transition_state (uuid_t peerid, char *peername,
+ glusterd_sm_t *state,
+ glusterd_friend_sm_event_type_t event_type)
+{
+ int ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+
+ GF_ASSERT (state);
+ GF_ASSERT (peername);
+
+ rcu_read_lock ();
+ peerinfo = glusterd_peerinfo_find (peerid, peername);
+ if (!peerinfo) {
+ goto out;
+ }
+
+ (void) glusterd_sm_tr_log_transition_add (&peerinfo->sm_log,
+ peerinfo->state.state,
+ state[event_type].next_state,
+ event_type);
+
+ uatomic_set (&peerinfo->state.state, state[event_type].next_state);
+
+ ret = 0;
+out:
+ rcu_read_unlock ();
+ return ret;
+}
+
+
+glusterd_sm_t glusterd_state_default [] = {
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none},
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_probe},//EV_PROBE
+ {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_friend_add}, //EV_INIT_FRIEND_REQ
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_RCVD_ACC
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_RCVD_LOCAL_ACC
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_RCVD_RJT
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT
+ {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_handle_friend_add_req}, //EVENT_RCV_FRIEND_REQ
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_send_friend_remove_req}, //EV_INIT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_RCVD_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_probe}, //EVENT_CONNECTED
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_NEW_NAME
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_MAX
+};
+
+glusterd_sm_t glusterd_state_probe_rcvd [] = {
+ {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none},
+ {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, //EV_PROBE
+ {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, //EV_INIT_FRIEND_REQ
+ {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, //EVENT_RCVD_ACC
+ {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, //EVENT_RCVD_LOCAL_ACC
+ {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, //EVENT_RCVD_RJT
+ {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT
+ {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_handle_friend_add_req}, //EVENT_RCV_FRIEND_REQ
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_send_friend_remove_req}, //EV_INIT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_RCVD_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, //EVENT_CONNECTED
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_NEW_NAME
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_MAX
+};
+
+glusterd_sm_t glusterd_state_connected_rcvd [] = {
+ {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none},
+ {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, //EV_PROBE
+ {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, //EV_INIT_FRIEND_REQ
+ {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, //EVENT_RCVD_ACC
+ {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_reverse_probe_begin}, //EVENT_RCVD_LOCAL_ACC
+ {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, //EVENT_RCVD_RJT
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT
+ {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_handle_friend_add_req}, //EVENT_RCV_FRIEND_REQ
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_send_friend_remove_req}, //EV_INIT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_RCVD_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, //EVENT_CONNECTED
+ {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, //EVENT_NEW_NAME
+ {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, //EVENT_MAX
+};
+
+glusterd_sm_t glusterd_state_connected_accepted [] = {
+ {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none},
+ {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_friend_probe}, //EV_PROBE
+ {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_friend_add}, //EV_INIT_FRIEND_REQ
+ {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, //EVENT_RCVD_ACC
+ {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_ACC
+ {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, //EVENT_RCVD_RJT
+ {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT
+ {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, //EVENT_RCV_FRIEND_REQ
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_send_friend_remove_req}, //EV_INIT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_RCVD_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, //EVENT_CONNECTED
+ {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, //EVENT_NEW_NAME
+ {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, //EVENT_MAX
+};
+
+glusterd_sm_t glusterd_state_req_sent [] = {
+ {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, //EVENT_NONE,
+ {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, //EVENT_PROBE,
+ {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, //EVENT_INIT_FRIEND_REQ,
+ {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_none}, //EVENT_RCVD_ACC
+ {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, //EVENT_RCVD_LOCAL_ACC
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_RJT
+ {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT
+ {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_handle_friend_add_req}, //EVENT_RCV_FRIEND_REQ
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_send_friend_remove_req}, //EVENT_INIT_REMOVE_FRIEND,
+ {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, //EVENT_RCVD_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none},//EVENT_CONNECTED
+ {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none},//EVENT_NEW_NAME
+ {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none},//EVENT_MAX
+};
+
+glusterd_sm_t glusterd_state_req_rcvd [] = {
+ {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none}, //EVENT_NONE,
+ {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none}, //EVENT_PROBE,
+ {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, //EVENT_INIT_FRIEND_REQ,
+ {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none}, //EVENT_RCVD_ACC
+ {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_ACC
+ {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none}, //EVENT_RCVD_RJT
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT
+ {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none}, //EVENT_RCV_FRIEND_REQ
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_send_friend_remove_req}, //EVENT_INIT_REMOVE_FRIEND,
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_handle_friend_remove_req}, //EVENT_RCVD_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none},//EVENT_CONNECTED
+ {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none},//EVENT_NEW_NAME
+ {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none},//EVENT_MAX
+};
+
+glusterd_sm_t glusterd_state_befriended [] = {
+ {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none}, //EVENT_NONE,
+ {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none}, //EVENT_PROBE,
+ {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none}, //EVENT_INIT_FRIEND_REQ,
+ {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_update_friend}, //EVENT_RCVD_ACC
+ {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_update_friend}, //EVENT_RCVD_LOCAL_ACC
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_RJT
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT
+ {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_handle_friend_add_req}, //EVENT_RCV_FRIEND_REQ
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_send_friend_remove_req}, //EVENT_INIT_REMOVE_FRIEND,
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_handle_friend_remove_req}, //EVENT_RCVD_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_friend_add},//EVENT_CONNECTED
+ {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_send_friend_update},//EVENT_NEW_NAME
+ {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none},//EVENT_MAX
+};
+
+glusterd_sm_t glusterd_state_req_sent_rcvd [] = {
+ {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, //EVENT_NONE,
+ {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, //EVENT_PROBE,
+ {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, //EVENT_INIT_FRIEND_REQ,
+ {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_send_friend_update}, //EVENT_RCVD_ACC
+ {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, //EVENT_RCVD_LOCAL_ACC
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_RJT
+ {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT
+ {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, //EVENT_RCV_FRIEND_REQ
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_send_friend_remove_req}, //EVENT_INIT_REMOVE_FRIEND,
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_handle_friend_remove_req}, //EVENT_RCVD_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none},//EVENT_CONNECTED
+ {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none},//EVENT_NEW_NAME
+ {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none},//EVENT_MAX
+};
+
+glusterd_sm_t glusterd_state_rejected [] = {
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_NONE,
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_friend_probe}, //EVENT_PROBE,
+ {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_friend_add}, //EVENT_INIT_FRIEND_REQ,
+ {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none}, //EVENT_RCVD_ACC
+ {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_ACC
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_RJT
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT
+ {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_handle_friend_add_req}, //EVENT_RCV_FRIEND_REQ
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_send_friend_remove_req}, //EVENT_INIT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_handle_friend_remove_req}, //EVENT_RCVD_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_friend_add},//EVENT_CONNECTED
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_none},//EVENT_NEW_NAME
+ {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none},//EVENT_MAX
+};
+
+glusterd_sm_t glusterd_state_req_accepted [] = {
+ {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_none}, //EVENT_NONE,
+ {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_none}, //EVENT_PROBE,
+ {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_none}, //EVENT_INIT_FRIEND_REQ,
+ {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_send_friend_update}, //EVENT_RCVD_ACC
+ {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_send_friend_update}, //EVENT_RCVD_LOCAL_ACC
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_RJT
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT
+ {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_handle_friend_add_req}, //EVENT_RCV_FRIEND_REQ
+ {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_send_friend_remove_req}, //EVENT_INIT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_handle_friend_remove_req}, //EVENT_RCVD_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_reverse_probe_begin},//EVENT_CONNECTED
+ {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_none},//EVENT_NEW_NAME
+ {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none},//EVENT_MAX
+};
+
+glusterd_sm_t glusterd_state_unfriend_sent [] = {
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none}, //EVENT_NONE,
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_error}, //EVENT_PROBE,
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none}, //EVENT_INIT_FRIEND_REQ,
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none}, //EVENT_RCVD_ACC
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none}, //EVENT_RCVD_LOCAL_ACC
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_error}, //EVENT_RCVD_RJT
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_error}, //EVENT_RCVD_LOCAL_RJT
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_error}, //EVENT_RCV_FRIEND_REQ
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none}, //EVENT_INIT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none}, //EVENT_RCVD_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none},//EVENT_CONNECTED
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none},//EVENT_NEW_NAME
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none},//EVENT_MAX
+};
+
+glusterd_sm_t *glusterd_friend_state_table [] = {
+ glusterd_state_default,
+ glusterd_state_req_sent,
+ glusterd_state_req_rcvd,
+ glusterd_state_befriended,
+ glusterd_state_req_accepted,
+ glusterd_state_req_sent_rcvd,
+ glusterd_state_rejected,
+ glusterd_state_unfriend_sent,
+ glusterd_state_probe_rcvd,
+ glusterd_state_connected_rcvd,
+ glusterd_state_connected_accepted
+};
+
+int
+glusterd_friend_sm_new_event (glusterd_friend_sm_event_type_t event_type,
+ glusterd_friend_sm_event_t **new_event)
+{
+ glusterd_friend_sm_event_t *event = NULL;
+
+ GF_ASSERT (new_event);
+ GF_ASSERT (GD_FRIEND_EVENT_NONE <= event_type &&
+ GD_FRIEND_EVENT_MAX > event_type);
+
+ event = GF_CALLOC (1, sizeof (*event), gf_gld_mt_friend_sm_event_t);
+
+ if (!event)
+ return -1;
+
+ *new_event = event;
+ event->event = event_type;
+ CDS_INIT_LIST_HEAD (&event->list);
+
+ return 0;
+}
+
+int
+glusterd_friend_sm_inject_event (glusterd_friend_sm_event_t *event)
+{
+ GF_ASSERT (event);
+ gf_msg_debug ("glusterd", 0, "Enqueue event: '%s'",
+ glusterd_friend_sm_event_name_get (event->event));
+ cds_list_add_tail (&event->list, &gd_friend_sm_queue);
+
+ return 0;
+}
+
+void
+glusterd_destroy_friend_event_context (glusterd_friend_sm_event_t *event)
+{
+ if (!event)
+ return;
+
+ switch (event->event) {
+ case GD_FRIEND_EVENT_RCVD_FRIEND_REQ:
+ case GD_FRIEND_EVENT_RCVD_REMOVE_FRIEND:
+ glusterd_destroy_friend_req_ctx (event->ctx);
+ break;
+ case GD_FRIEND_EVENT_LOCAL_ACC:
+ case GD_FRIEND_EVENT_LOCAL_RJT:
+ case GD_FRIEND_EVENT_RCVD_ACC:
+ case GD_FRIEND_EVENT_RCVD_RJT:
+ glusterd_destroy_friend_update_ctx (event->ctx);
+ break;
+ default:
+ break;
+ }
+}
+
+gf_boolean_t
+gd_does_peer_affect_quorum (glusterd_friend_sm_state_t old_state,
+ glusterd_friend_sm_event_type_t event_type,
+ glusterd_peerinfo_t *peerinfo)
+{
+ gf_boolean_t affects = _gf_false;
+
+ //When glusterd comes up with friends in BEFRIENDED state in store,
+ //wait until compare-data happens.
+ if ((old_state == GD_FRIEND_STATE_BEFRIENDED) &&
+ (event_type != GD_FRIEND_EVENT_RCVD_ACC) &&
+ (event_type != GD_FRIEND_EVENT_LOCAL_ACC))
+ goto out;
+ if ((peerinfo->state.state == GD_FRIEND_STATE_BEFRIENDED)
+ && peerinfo->connected) {
+ affects = _gf_true;
+ }
+out:
+ return affects;
+}
+
+int
+glusterd_friend_sm ()
+{
+ glusterd_friend_sm_event_t *event = NULL;
+ glusterd_friend_sm_event_t *tmp = NULL;
+ int ret = -1;
+ glusterd_friend_sm_ac_fn handler = NULL;
+ glusterd_sm_t *state = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_friend_sm_event_type_t event_type = 0;
+ gf_boolean_t is_await_conn = _gf_false;
+ gf_boolean_t quorum_action = _gf_false;
+ glusterd_friend_sm_state_t old_state = GD_FRIEND_STATE_DEFAULT;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ while (!cds_list_empty (&gd_friend_sm_queue)) {
+ cds_list_for_each_entry_safe (event, tmp, &gd_friend_sm_queue,
+ list) {
+
+ cds_list_del_init (&event->list);
+ event_type = event->event;
+
+ rcu_read_lock ();
+
+ peerinfo = glusterd_peerinfo_find (event->peerid,
+ event->peername);
+ if (!peerinfo) {
+ gf_msg ("glusterd", GF_LOG_CRITICAL, 0,
+ GD_MSG_PEER_NOT_FOUND, "Received"
+ " event %s with empty peer info",
+ glusterd_friend_sm_event_name_get (event_type));
+
+ GF_FREE (event);
+ rcu_read_unlock ();
+ continue;
+ }
+ gf_msg_debug ("glusterd", 0, "Dequeued event of type: '%s'",
+ glusterd_friend_sm_event_name_get (event_type));
+
+
+ old_state = peerinfo->state.state;
+
+ rcu_read_unlock ();
+ /* Giving up read-critical section here as we only need
+ * the current state to call the handler.
+ *
+ * We cannot continue into the handler in a read
+ * critical section as there are handlers who do
+ * updates, and could cause deadlocks.
+ */
+
+ state = glusterd_friend_state_table[old_state];
+
+ GF_ASSERT (state);
+
+ handler = state[event_type].handler;
+ GF_ASSERT (handler);
+
+ ret = handler (event, event->ctx);
+ if (ret == GLUSTERD_CONNECTION_AWAITED) {
+ is_await_conn = _gf_true;
+ ret = 0;
+ }
+
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_HANDLER_RETURNED,
+ "handler returned: "
+ "%d", ret);
+ glusterd_destroy_friend_event_context (event);
+ GF_FREE (event);
+ continue;
+ }
+
+ if ((GD_FRIEND_EVENT_REMOVE_FRIEND == event_type) ||
+ (GD_FRIEND_EVENT_INIT_REMOVE_FRIEND == event_type)){
+ glusterd_destroy_friend_event_context (event);
+ GF_FREE (event);
+ continue;
+ }
+
+ ret = glusterd_friend_sm_transition_state
+ (event->peerid, event->peername, state,
+ event_type);
+
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_EVENT_STATE_TRANSITION_FAIL,
+ "Unable to transition"
+ " state from '%s' to '%s' for event '%s'",
+ glusterd_friend_sm_state_name_get(old_state),
+ glusterd_friend_sm_state_name_get(state[event_type].next_state),
+ glusterd_friend_sm_event_name_get(event_type));
+ goto out;
+ }
+
+ peerinfo = NULL;
+ /* We need to obtain peerinfo reference once again as we
+ * had exited the read critical section above.
+ */
+ rcu_read_lock ();
+ peerinfo = glusterd_peerinfo_find (event->peerid,
+ event->peername);
+ if (!peerinfo) {
+ rcu_read_unlock ();
+ /* A peer can only be deleted as a effect of
+ * this state machine, and two such state
+ * machines can never run at the same time.
+ * So if we cannot find the peerinfo here,
+ * something has gone terribly wrong.
+ */
+ ret = -1;
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_PEER_NOT_FOUND,
+ "Cannot find peer %s(%s)",
+ event->peername, uuid_utoa (event->peerid));
+ goto out;
+ }
+ if (gd_does_peer_affect_quorum (old_state, event_type,
+ peerinfo)) {
+ peerinfo->quorum_contrib = QUORUM_UP;
+ if (peerinfo->quorum_action) {
+ peerinfo->quorum_action = _gf_false;
+ quorum_action = _gf_true;
+ }
+ }
+
+ ret = glusterd_store_peerinfo (peerinfo);
+ rcu_read_unlock ();
+
+ glusterd_destroy_friend_event_context (event);
+ GF_FREE (event);
+ if (is_await_conn)
+ break;
+ }
+ if (is_await_conn)
+ break;
+ }
+
+ ret = 0;
+out:
+ if (quorum_action) {
+ /* When glusterd is restarted, it needs to wait until the 'friends' view
+ * of the volumes settle, before it starts any of the internal daemons.
+ *
+ * Every friend that was part of the cluster, would send its
+ * cluster-view, 'our' way. For every friend, who belongs to
+ * a partition which has a different cluster-view from our
+ * partition, we may update our cluster-view. For subsequent
+ * friends from that partition would agree with us, if the first
+ * friend wasn't rejected. For every first friend, whom we agreed with,
+ * we would need to start internal daemons/bricks belonging to the
+ * new volumes.
+ * glusterd_spawn_daemons calls functions that are idempotent. ie,
+ * the functions spawn process(es) only if they are not started yet.
+ *
+ * */
+ synclock_unlock (&priv->big_lock);
+ glusterd_launch_synctask (glusterd_spawn_daemons, NULL);
+ synclock_lock (&priv->big_lock);
+ glusterd_do_quorum_action ();
+ }
+ return ret;
+}
+
+
+int
+glusterd_friend_sm_init ()
+{
+ CDS_INIT_LIST_HEAD (&gd_friend_sm_queue);
+ return 0;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.h b/xlators/mgmt/glusterd/src/glusterd-sm.h
new file mode 100644
index 00000000000..9e4fe33b558
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-sm.h
@@ -0,0 +1,222 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _GLUSTERD_SM_H_
+#define _GLUSTERD_SM_H_
+
+#include <pthread.h>
+#include "compat-uuid.h"
+
+#include "rpc-clnt.h"
+#include "glusterfs.h"
+#include "xlator.h"
+#include "logging.h"
+#include "call-stub.h"
+#include "fd.h"
+#include "byte-order.h"
+//#include "glusterd.h"
+#include "rpcsvc.h"
+#include "store.h"
+
+#include "glusterd-rcu.h"
+
+typedef enum gd_quorum_contribution_ {
+ QUORUM_NONE,
+ QUORUM_WAITING,
+ QUORUM_DOWN,
+ QUORUM_UP
+} gd_quorum_contrib_t;
+
+typedef enum glusterd_friend_sm_state_ {
+ GD_FRIEND_STATE_DEFAULT = 0,
+ GD_FRIEND_STATE_REQ_SENT,
+ GD_FRIEND_STATE_REQ_RCVD,
+ GD_FRIEND_STATE_BEFRIENDED,
+ GD_FRIEND_STATE_REQ_ACCEPTED,
+ GD_FRIEND_STATE_REQ_SENT_RCVD,
+ GD_FRIEND_STATE_REJECTED,
+ GD_FRIEND_STATE_UNFRIEND_SENT,
+ GD_FRIEND_STATE_PROBE_RCVD,
+ GD_FRIEND_STATE_CONNECTED_RCVD,
+ GD_FRIEND_STATE_CONNECTED_ACCEPTED,
+ GD_FRIEND_STATE_MAX
+} glusterd_friend_sm_state_t;
+
+typedef struct glusterd_peer_state_info_ {
+ glusterd_friend_sm_state_t state;
+ struct timeval transition_time;
+}glusterd_peer_state_info_t;
+
+typedef struct glusterd_peer_hostname_ {
+ char *hostname;
+ struct cds_list_head hostname_list;
+} glusterd_peer_hostname_t;
+
+typedef struct glusterd_sm_transition_ {
+ int old_state;
+ int event;
+ int new_state;
+ time_t time;
+} glusterd_sm_transition_t;
+
+typedef struct glusterd_sm_tr_log_ {
+ glusterd_sm_transition_t *transitions;
+ size_t current;
+ size_t size;
+ size_t count;
+ char* (*state_name_get) (int);
+ char* (*event_name_get) (int);
+} glusterd_sm_tr_log_t;
+
+struct glusterd_peerinfo_ {
+ uuid_t uuid;
+ char uuid_str[50]; /* Retrieve this using
+ * gd_peer_uuid_str ()
+ */
+ glusterd_peer_state_info_t state;
+ char *hostname;
+ struct cds_list_head hostnames;
+ int port;
+ struct cds_list_head uuid_list;
+ struct cds_list_head op_peers_list;
+ struct rpc_clnt *rpc;
+ rpc_clnt_prog_t *mgmt;
+ rpc_clnt_prog_t *peer;
+ rpc_clnt_prog_t *mgmt_v3;
+ int connected;
+ gf_store_handle_t *shandle;
+ glusterd_sm_tr_log_t sm_log;
+ gf_boolean_t quorum_action;
+ gd_quorum_contrib_t quorum_contrib;
+ gf_boolean_t locked;
+ gf_boolean_t detaching;
+ /* Members required for proper cleanup using RCU */
+ gd_rcu_head rcu_head;
+ pthread_mutex_t delete_lock;
+ uint32_t generation;
+};
+
+typedef struct glusterd_peerinfo_ glusterd_peerinfo_t;
+
+typedef struct glusterd_local_peers_ {
+ glusterd_peerinfo_t *peerinfo;
+ struct cds_list_head op_peers_list;
+} glusterd_local_peers_t;
+
+typedef enum glusterd_ev_gen_mode_ {
+ GD_MODE_OFF,
+ GD_MODE_ON,
+ GD_MODE_SWITCH_ON
+} glusterd_ev_gen_mode_t;
+
+typedef struct glusterd_peer_ctx_args_ {
+ rpcsvc_request_t *req;
+ glusterd_ev_gen_mode_t mode;
+ dict_t *dict;
+} glusterd_peerctx_args_t;
+
+typedef struct glusterd_peer_ctx_ {
+ glusterd_peerctx_args_t args;
+ uuid_t peerid;
+ char *peername;
+ uint32_t peerinfo_gen;
+ char *errstr;
+} glusterd_peerctx_t;
+
+typedef enum glusterd_friend_sm_event_type_ {
+ GD_FRIEND_EVENT_NONE = 0,
+ GD_FRIEND_EVENT_PROBE,
+ GD_FRIEND_EVENT_INIT_FRIEND_REQ,
+ GD_FRIEND_EVENT_RCVD_ACC,
+ GD_FRIEND_EVENT_LOCAL_ACC,
+ GD_FRIEND_EVENT_RCVD_RJT,
+ GD_FRIEND_EVENT_LOCAL_RJT,
+ GD_FRIEND_EVENT_RCVD_FRIEND_REQ,
+ GD_FRIEND_EVENT_INIT_REMOVE_FRIEND,
+ GD_FRIEND_EVENT_RCVD_REMOVE_FRIEND,
+ GD_FRIEND_EVENT_REMOVE_FRIEND,
+ GD_FRIEND_EVENT_CONNECTED,
+ GD_FRIEND_EVENT_NEW_NAME,
+ GD_FRIEND_EVENT_MAX
+} glusterd_friend_sm_event_type_t;
+
+
+typedef enum glusterd_friend_update_op_ {
+ GD_FRIEND_UPDATE_NONE = 0,
+ GD_FRIEND_UPDATE_ADD,
+ GD_FRIEND_UPDATE_DEL,
+} glusterd_friend_update_op_t;
+
+
+struct glusterd_friend_sm_event_ {
+ struct cds_list_head list;
+ uuid_t peerid;
+ char *peername;
+ void *ctx;
+ glusterd_friend_sm_event_type_t event;
+};
+
+typedef struct glusterd_friend_sm_event_ glusterd_friend_sm_event_t;
+
+typedef int (*glusterd_friend_sm_ac_fn) (glusterd_friend_sm_event_t *, void *);
+
+typedef struct glusterd_sm_ {
+ glusterd_friend_sm_state_t next_state;
+ glusterd_friend_sm_ac_fn handler;
+} glusterd_sm_t;
+
+typedef struct glusterd_friend_req_ctx_ {
+ uuid_t uuid;
+ char *hostname;
+ rpcsvc_request_t *req;
+ int port;
+ dict_t *vols;
+} glusterd_friend_req_ctx_t;
+
+typedef struct glusterd_friend_update_ctx_ {
+ uuid_t uuid;
+ char *hostname;
+ int op;
+} glusterd_friend_update_ctx_t;
+
+typedef struct glusterd_probe_ctx_ {
+ char *hostname;
+ rpcsvc_request_t *req;
+ int port;
+ dict_t *dict;
+} glusterd_probe_ctx_t;
+int
+glusterd_friend_sm_new_event (glusterd_friend_sm_event_type_t event_type,
+ glusterd_friend_sm_event_t **new_event);
+int
+glusterd_friend_sm_inject_event (glusterd_friend_sm_event_t *event);
+
+int
+glusterd_friend_sm_init ();
+
+int
+glusterd_friend_sm ();
+
+void
+glusterd_destroy_probe_ctx (glusterd_probe_ctx_t *ctx);
+
+void
+glusterd_destroy_friend_req_ctx (glusterd_friend_req_ctx_t *ctx);
+
+char*
+glusterd_friend_sm_state_name_get (int state);
+
+char*
+glusterd_friend_sm_event_name_get (int event);
+
+int
+glusterd_broadcast_friend_delete (char *hostname, uuid_t uuid);
+void
+glusterd_destroy_friend_update_ctx (glusterd_friend_update_ctx_t *ctx);
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-snapd-svc-helper.c
new file mode 100644
index 00000000000..826b4ca7463
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-snapd-svc-helper.c
@@ -0,0 +1,63 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "glusterd.h"
+#include "glusterd-utils.h"
+#include "glusterd-snapd-svc-helper.h"
+
+void
+glusterd_svc_build_snapd_rundir (glusterd_volinfo_t *volinfo,
+ char *path, int path_len)
+{
+ char workdir[PATH_MAX] = {0,};
+ glusterd_conf_t *priv = THIS->private;
+
+ GLUSTERD_GET_VOLUME_DIR (workdir, volinfo, priv);
+
+ snprintf (path, path_len, "%s/run", workdir);
+}
+
+void
+glusterd_svc_build_snapd_socket_filepath (glusterd_volinfo_t *volinfo,
+ char *path, int path_len)
+{
+ char sockfilepath[PATH_MAX] = {0,};
+ char rundir[PATH_MAX] = {0,};
+
+ glusterd_svc_build_snapd_rundir (volinfo, rundir, sizeof (rundir));
+ snprintf (sockfilepath, sizeof (sockfilepath), "%s/run-%s",
+ rundir, uuid_utoa (MY_UUID));
+
+ glusterd_set_socket_filepath (sockfilepath, path, path_len);
+}
+
+void
+glusterd_svc_build_snapd_pidfile (glusterd_volinfo_t *volinfo,
+ char *path, int path_len)
+{
+ char rundir[PATH_MAX] = {0,};
+
+ glusterd_svc_build_snapd_rundir (volinfo, rundir, sizeof (rundir));
+
+ snprintf (path, path_len, "%s/%s-snapd.pid", rundir, volinfo->volname);
+}
+
+void
+glusterd_svc_build_snapd_volfile (glusterd_volinfo_t *volinfo,
+ char *path, int path_len)
+{
+ char workdir[PATH_MAX] = {0,};
+ glusterd_conf_t *priv = THIS->private;
+
+ GLUSTERD_GET_VOLUME_DIR (workdir, volinfo, priv);
+
+ snprintf (path, path_len, "%s/%s-snapd.vol", workdir,
+ volinfo->volname);
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapd-svc-helper.h b/xlators/mgmt/glusterd/src/glusterd-snapd-svc-helper.h
new file mode 100644
index 00000000000..4c452b91658
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-snapd-svc-helper.h
@@ -0,0 +1,32 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERD_SNAPD_SVC_HELPER_H_
+#define _GLUSTERD_SNAPD_SVC_HELPER_H_
+
+#include "glusterd.h"
+
+void
+glusterd_svc_build_snapd_rundir (glusterd_volinfo_t *volinfo,
+ char *path, int path_len);
+
+void
+glusterd_svc_build_snapd_socket_filepath (glusterd_volinfo_t *volinfo,
+ char *path, int path_len);
+
+void
+glusterd_svc_build_snapd_pidfile (glusterd_volinfo_t *volinfo,
+ char *path, int path_len);
+
+void
+glusterd_svc_build_snapd_volfile (glusterd_volinfo_t *volinfo,
+ char *path, int path_len);
+
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c
new file mode 100644
index 00000000000..830dc1a706d
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c
@@ -0,0 +1,439 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "globals.h"
+#include "run.h"
+#include "glusterd-utils.h"
+#include "glusterd-volgen.h"
+#include "glusterd-messages.h"
+#include "glusterd-svc-mgmt.h"
+#include "glusterd-svc-helper.h"
+#include "glusterd-conn-mgmt.h"
+#include "glusterd-proc-mgmt.h"
+#include "glusterd-snapd-svc.h"
+#include "glusterd-snapd-svc-helper.h"
+#include "glusterd-snapshot-utils.h"
+#include "syscall.h"
+
+char *snapd_svc_name = "snapd";
+
+static void
+glusterd_svc_build_snapd_logdir (char *logdir, char *volname, size_t len)
+{
+ snprintf (logdir, len, "%s/snaps/%s", DEFAULT_LOG_FILE_DIRECTORY,
+ volname);
+}
+
+static void
+glusterd_svc_build_snapd_logfile (char *logfile, char *logdir, size_t len)
+{
+ snprintf (logfile, len, "%s/snapd.log", logdir);
+}
+
+void
+glusterd_snapdsvc_build (glusterd_svc_t *svc)
+{
+ svc->manager = glusterd_snapdsvc_manager;
+ svc->start = glusterd_snapdsvc_start;
+ svc->stop = glusterd_svc_stop;
+}
+
+int
+glusterd_snapdsvc_init (void *data)
+{
+ int ret = -1;
+ char rundir[PATH_MAX] = {0,};
+ char sockpath[PATH_MAX] = {0,};
+ char pidfile[PATH_MAX] = {0,};
+ char volfile[PATH_MAX] = {0,};
+ char logdir[PATH_MAX] = {0,};
+ char logfile[PATH_MAX] = {0,};
+ char volfileid[256] = {0};
+ glusterd_svc_t *svc = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_conn_notify_t notify = NULL;
+ xlator_t *this = NULL;
+ char *volfileserver = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ volinfo = data;
+
+ svc = &(volinfo->snapd.svc);
+
+ ret = snprintf (svc->name, sizeof (svc->name), "%s", snapd_svc_name);
+ if (ret < 0)
+ goto out;
+
+ notify = glusterd_snapdsvc_rpc_notify;
+
+ glusterd_svc_build_snapd_rundir (volinfo, rundir, sizeof (rundir));
+ glusterd_svc_create_rundir (rundir);
+
+ /* Initialize the connection mgmt */
+ glusterd_svc_build_snapd_socket_filepath (volinfo, sockpath,
+ sizeof (sockpath));
+ ret = glusterd_conn_init (&(svc->conn), sockpath, 600, notify);
+ if (ret)
+ goto out;
+
+ /* Initialize the process mgmt */
+ glusterd_svc_build_snapd_pidfile (volinfo, pidfile, sizeof (pidfile));
+ glusterd_svc_build_snapd_volfile (volinfo, volfile, sizeof (volfile));
+ glusterd_svc_build_snapd_logdir (logdir, volinfo->volname,
+ sizeof (logdir));
+ ret = mkdir_p (logdir, 0755, _gf_true);
+ if ((ret == -1) && (EEXIST != errno)) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_CREATE_DIR_FAILED, "Unable to create logdir %s",
+ logdir);
+ goto out;
+ }
+ glusterd_svc_build_snapd_logfile (logfile, logdir, sizeof (logfile));
+ snprintf (volfileid, sizeof (volfileid), "snapd/%s", volinfo->volname);
+
+ if (dict_get_str (this->options, "transport.socket.bind-address",
+ &volfileserver) != 0) {
+ volfileserver = "localhost";
+ }
+ ret = glusterd_proc_init (&(svc->proc), snapd_svc_name, pidfile, logdir,
+ logfile, volfile, volfileid, volfileserver);
+ if (ret)
+ goto out;
+
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_snapdsvc_manager (glusterd_svc_t *svc, void *data, int flags)
+{
+ int ret = 0;
+ xlator_t *this = THIS;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ volinfo = data;
+
+ if (!svc->inited) {
+ ret = glusterd_snapdsvc_init (volinfo);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAPD_INIT_FAIL, "Failed to initialize "
+ "snapd service for volume %s",
+ volinfo->volname);
+ goto out;
+ } else {
+ svc->inited = _gf_true;
+ gf_msg_debug (THIS->name, 0, "snapd service "
+ "initialized");
+ }
+ }
+
+ ret = glusterd_is_snapd_enabled (volinfo);
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLINFO_GET_FAIL, "Failed to read volume "
+ "options");
+ goto out;
+ }
+
+ if (ret) {
+ if (!glusterd_is_volume_started (volinfo)) {
+ if (glusterd_proc_is_running (&svc->proc)) {
+ ret = svc->stop (svc, SIGTERM);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAPD_STOP_FAIL,
+ "Couldn't stop snapd for "
+ "volume: %s",
+ volinfo->volname);
+ } else {
+ /* Since snapd is not running set ret to 0 */
+ ret = 0;
+ }
+ goto out;
+ }
+
+ ret = glusterd_snapdsvc_create_volfile (volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAPD_CREATE_FAIL, "Couldn't create "
+ "snapd volfile for volume: %s",
+ volinfo->volname);
+ goto out;
+ }
+
+ ret = svc->start (svc, flags);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAPD_START_FAIL, "Couldn't start "
+ "snapd for volume: %s", volinfo->volname);
+ goto out;
+ }
+
+ glusterd_volinfo_ref (volinfo);
+ ret = glusterd_conn_connect (&(svc->conn));
+ if (ret) {
+ glusterd_volinfo_unref (volinfo);
+ goto out;
+ }
+
+ } else if (glusterd_proc_is_running (&svc->proc)) {
+ ret = svc->stop (svc, SIGTERM);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAPD_STOP_FAIL,
+ "Couldn't stop snapd for volume: %s",
+ volinfo->volname);
+ goto out;
+ }
+ volinfo->snapd.port = 0;
+ }
+
+out:
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+int32_t
+glusterd_snapdsvc_start (glusterd_svc_t *svc, int flags)
+{
+ int ret = -1;
+ runner_t runner = {0,};
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ char valgrind_logfile[PATH_MAX] = {0};
+ int snapd_port = 0;
+ char msg[1024] = {0,};
+ char snapd_id[PATH_MAX] = {0,};
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_snapdsvc_t *snapd = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ if (glusterd_proc_is_running (&svc->proc)) {
+ ret = 0;
+ goto out;
+ }
+
+ /* Get volinfo->snapd from svc object */
+ snapd = cds_list_entry (svc, glusterd_snapdsvc_t, svc);
+ if (!snapd) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAPD_OBJ_GET_FAIL, "Failed to get snapd object "
+ "from snapd service");
+ goto out;
+ }
+
+ /* Get volinfo from snapd */
+ volinfo = cds_list_entry (snapd, glusterd_volinfo_t, snapd);
+ if (!volinfo) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLINFO_GET_FAIL, "Failed to get volinfo from "
+ "from snapd");
+ goto out;
+ }
+
+ ret = sys_access (svc->proc.volfile, F_OK);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_DEBUG, 0,
+ GD_MSG_VOLINFO_GET_FAIL,
+ "snapd Volfile %s is not present", svc->proc.volfile);
+ /* If glusterd is down on one of the nodes and during
+ * that time "USS is enabled" for the first time. After some
+ * time when the glusterd which was down comes back it tries
+ * to look for the snapd volfile and it does not find snapd
+ * volfile and because of this starting of snapd fails.
+ * Therefore, if volfile is not present then create a fresh
+ * volfile.
+ */
+ ret = glusterd_snapdsvc_create_volfile (volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLFILE_CREATE_FAIL, "Couldn't create "
+ "snapd volfile for volume: %s",
+ volinfo->volname);
+ goto out;
+ }
+ }
+ runinit (&runner);
+
+ if (priv->valgrind) {
+ snprintf (valgrind_logfile, PATH_MAX, "%s/valgrind-snapd.log",
+ svc->proc.logdir);
+
+ runner_add_args (&runner, "valgrind", "--leak-check=full",
+ "--trace-children=yes", "--track-origins=yes",
+ NULL);
+ runner_argprintf (&runner, "--log-file=%s", valgrind_logfile);
+ }
+
+ snprintf (snapd_id, sizeof (snapd_id), "snapd-%s", volinfo->volname);
+ runner_add_args (&runner, SBIN_DIR"/glusterfsd",
+ "-s", svc->proc.volfileserver,
+ "--volfile-id", svc->proc.volfileid,
+ "-p", svc->proc.pidfile,
+ "-l", svc->proc.logfile,
+ "--brick-name", snapd_id,
+ "-S", svc->conn.sockpath, NULL);
+
+ /* Do a pmap registry remove on the older connected port */
+ if (volinfo->snapd.port) {
+ ret = pmap_registry_remove (this, volinfo->snapd.port,
+ snapd_id, GF_PMAP_PORT_BRICKSERVER,
+ NULL);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Failed to remove pmap "
+ "registry for older signin");
+ goto out;
+ }
+ }
+
+ snapd_port = pmap_registry_alloc (THIS);
+ if (!snapd_port) {
+ snprintf (msg, sizeof (msg), "Could not allocate port "
+ "for snapd service for volume %s",
+ volinfo->volname);
+ runner_log (&runner, this->name, GF_LOG_DEBUG, msg);
+ ret = -1;
+ goto out;
+ }
+
+ volinfo->snapd.port = snapd_port;
+
+ runner_add_arg (&runner, "--brick-port");
+ runner_argprintf (&runner, "%d", snapd_port);
+ runner_add_arg (&runner, "--xlator-option");
+ runner_argprintf (&runner, "%s-server.listen-port=%d",
+ volinfo->volname, snapd_port);
+ runner_add_arg (&runner, "--no-mem-accounting");
+
+ snprintf (msg, sizeof (msg),
+ "Starting the snapd service for volume %s", volinfo->volname);
+ runner_log (&runner, this->name, GF_LOG_DEBUG, msg);
+
+ if (flags == PROC_START_NO_WAIT) {
+ ret = runner_run_nowait (&runner);
+ } else {
+ synclock_unlock (&priv->big_lock);
+ {
+ ret = runner_run (&runner);
+ }
+ synclock_lock (&priv->big_lock);
+ }
+
+out:
+ return ret;
+}
+
+int
+glusterd_snapdsvc_restart ()
+{
+ glusterd_volinfo_t *volinfo = NULL;
+ int ret = 0;
+ xlator_t *this = THIS;
+ glusterd_conf_t *conf = NULL;
+ glusterd_svc_t *svc = NULL;
+
+ GF_ASSERT (this);
+
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ cds_list_for_each_entry (volinfo, &conf->volumes, vol_list) {
+ /* Start per volume snapd svc */
+ if (volinfo->status == GLUSTERD_STATUS_STARTED) {
+ svc = &(volinfo->snapd.svc);
+ ret = svc->manager (svc, volinfo, PROC_START_NO_WAIT);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAPD_START_FAIL,
+ "Couldn't resolve snapd for "
+ "vol: %s on restart", volinfo->volname);
+ goto out;
+ }
+ }
+ }
+out:
+ return ret;
+}
+
+int
+glusterd_snapdsvc_rpc_notify (glusterd_conn_t *conn, rpc_clnt_event_t event)
+{
+ int ret = 0;
+ glusterd_svc_t *svc = NULL;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_snapdsvc_t *snapd = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ svc = cds_list_entry (conn, glusterd_svc_t, conn);
+ if (!svc) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SVC_GET_FAIL, "Failed to get the service");
+ return -1;
+ }
+
+ switch (event) {
+ case RPC_CLNT_CONNECT:
+ gf_msg_debug (this->name, 0, "%s has connected with "
+ "glusterd.", svc->name);
+ svc->online = _gf_true;
+ break;
+
+ case RPC_CLNT_DISCONNECT:
+ if (svc->online) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_NODE_DISCONNECTED, "%s has disconnected "
+ "from glusterd.", svc->name);
+ svc->online = _gf_false;
+ }
+ break;
+
+ case RPC_CLNT_DESTROY:
+ snapd = cds_list_entry (svc, glusterd_snapdsvc_t, svc);
+ if (!snapd) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAPD_OBJ_GET_FAIL, "Failed to get the "
+ "snapd object");
+ return -1;
+ }
+
+ volinfo = cds_list_entry (snapd, glusterd_volinfo_t, snapd);
+ if (!volinfo) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLINFO_GET_FAIL, "Failed to get the "
+ "volinfo object");
+ return -1;
+ }
+ glusterd_volinfo_unref (volinfo);
+
+ default:
+ gf_msg_trace (this->name, 0,
+ "got some other RPC event %d", event);
+ break;
+ }
+
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapd-svc.h b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.h
new file mode 100644
index 00000000000..40dae848f58
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.h
@@ -0,0 +1,42 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERD_SNAPD_SVC_H_
+#define _GLUSTERD_SNAPD_SVC_H_
+
+#include "glusterd-svc-mgmt.h"
+
+typedef struct glusterd_snapdsvc_ glusterd_snapdsvc_t;
+
+struct glusterd_snapdsvc_{
+ glusterd_svc_t svc;
+ int port;
+ gf_store_handle_t *handle;
+};
+
+void
+glusterd_snapdsvc_build (glusterd_svc_t *svc);
+
+int
+glusterd_snapdsvc_init (void *data);
+
+int
+glusterd_snapdsvc_manager (glusterd_svc_t *svc, void *data, int flags);
+
+int
+glusterd_snapdsvc_start (glusterd_svc_t *svc, int flags);
+
+int
+glusterd_snapdsvc_restart ();
+
+int
+glusterd_snapdsvc_rpc_notify (glusterd_conn_t *conn, rpc_clnt_event_t event);
+
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
new file mode 100644
index 00000000000..1765df3d0ef
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
@@ -0,0 +1,4093 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <inttypes.h>
+
+#if defined(GF_LINUX_HOST_OS)
+#include <mntent.h>
+#else
+#include "mntent_compat.h"
+#endif
+#include <dlfcn.h>
+
+#include "dict.h"
+#include "syscall.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-utils.h"
+#include "glusterd-messages.h"
+#include "glusterd-store.h"
+#include "glusterd-volgen.h"
+#include "glusterd-snapd-svc.h"
+#include "glusterd-svc-helper.h"
+#include "glusterd-snapd-svc-helper.h"
+#include "glusterd-snapshot-utils.h"
+#include "glusterd-server-quorum.h"
+#include "glusterd-messages.h"
+#include "glusterd-errno.h"
+
+/*
+ * glusterd_snap_geo_rep_restore:
+ * This function restores the atime and mtime of marker.tstamp
+ * if present from snapped marker.tstamp file.
+ */
+
+int32_t
+glusterd_snapobject_delete (glusterd_snap_t *snap)
+{
+ if (snap == NULL) {
+ gf_msg(THIS->name, GF_LOG_WARNING, 0,
+ GD_MSG_PARAM_NULL, "snap is NULL");
+ return -1;
+ }
+
+ cds_list_del_init (&snap->snap_list);
+ cds_list_del_init (&snap->volumes);
+ if (LOCK_DESTROY(&snap->lock))
+ gf_msg (THIS->name, GF_LOG_WARNING, 0,
+ GD_MSG_LOCK_DESTROY_FAILED,
+ "Failed destroying lock"
+ "of snap %s", snap->snapname);
+
+ GF_FREE (snap->description);
+ GF_FREE (snap);
+
+ return 0;
+}
+
+
+/*
+ * This function is to be called only from glusterd_peer_detach_cleanup()
+ * as this continues to delete snaps inspite of faiure while deleting
+ * one, as we don't want to fail peer_detach in such a case.
+ */
+int
+glusterd_cleanup_snaps_for_volume (glusterd_volinfo_t *volinfo)
+{
+ int32_t op_ret = 0;
+ int32_t ret = 0;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_volinfo_t *dummy_snap_vol = NULL;
+ glusterd_snap_t *snap = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ cds_list_for_each_entry_safe (snap_vol, dummy_snap_vol,
+ &volinfo->snap_volumes,
+ snapvol_list) {
+ ret = glusterd_store_delete_volume (snap_vol);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_VOL_DELETE_FAIL, "Failed to remove "
+ "volume %s from store", snap_vol->volname);
+ op_ret = ret;
+ continue;
+ }
+
+ ret = glusterd_volinfo_delete (snap_vol);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_VOL_DELETE_FAIL, "Failed to remove "
+ "volinfo %s ", snap_vol->volname);
+ op_ret = ret;
+ continue;
+ }
+
+ snap = snap_vol->snapshot;
+ ret = glusterd_store_delete_snap (snap);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_VOL_DELETE_FAIL, "Failed to remove "
+ "snap %s from store", snap->snapname);
+ op_ret = ret;
+ continue;
+ }
+
+ ret = glusterd_snapobject_delete (snap);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_VOL_DELETE_FAIL, "Failed to delete "
+ "snap object %s", snap->snapname);
+ op_ret = ret;
+ continue;
+ }
+ }
+
+ return op_ret;
+}
+
+
+
+int
+glusterd_snap_geo_rep_restore (glusterd_volinfo_t *snap_volinfo,
+ glusterd_volinfo_t *new_volinfo)
+{
+ char vol_tstamp_file[PATH_MAX] = {0,};
+ char snap_tstamp_file[PATH_MAX] = {0,};
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ int geo_rep_indexing_on = 0;
+ int ret = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (snap_volinfo);
+ GF_ASSERT (new_volinfo);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ /* Check if geo-rep indexing is enabled, if yes, we need restore
+ * back the mtime of 'marker.tstamp' file.
+ */
+ geo_rep_indexing_on = glusterd_volinfo_get_boolean (new_volinfo,
+ VKEY_MARKER_XTIME);
+ if (geo_rep_indexing_on == -1) {
+ gf_msg_debug (this->name, 0, "Failed"
+ " to check whether geo-rep-indexing enabled or not");
+ ret = 0;
+ goto out;
+ }
+
+ if (geo_rep_indexing_on == 1) {
+ GLUSTERD_GET_VOLUME_DIR (vol_tstamp_file, new_volinfo, priv);
+ strncat (vol_tstamp_file, "/marker.tstamp",
+ PATH_MAX - strlen(vol_tstamp_file) - 1);
+ GLUSTERD_GET_VOLUME_DIR (snap_tstamp_file, snap_volinfo, priv);
+ strncat (snap_tstamp_file, "/marker.tstamp",
+ PATH_MAX - strlen(snap_tstamp_file) - 1);
+ ret = gf_set_timestamp (snap_tstamp_file, vol_tstamp_file);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TSTAMP_SET_FAIL,
+ "Unable to set atime and mtime of %s as of %s",
+ vol_tstamp_file, snap_tstamp_file);
+ goto out;
+ }
+ }
+
+out:
+ return ret;
+}
+
+/* This function will copy snap volinfo to the new
+ * passed volinfo and regenerate backend store files
+ * for the restored snap.
+ *
+ * @param new_volinfo new volinfo
+ * @param snap_volinfo volinfo of snap volume
+ *
+ * @return 0 on success and -1 on failure
+ *
+ * TODO: Duplicate all members of volinfo, e.g. geo-rep sync slaves
+ */
+int32_t
+glusterd_snap_volinfo_restore (dict_t *dict, dict_t *rsp_dict,
+ glusterd_volinfo_t *new_volinfo,
+ glusterd_volinfo_t *snap_volinfo,
+ int32_t volcount)
+{
+ char *value = NULL;
+ char key[PATH_MAX] = "";
+ int32_t brick_count = -1;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *new_brickinfo = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (rsp_dict);
+
+ GF_VALIDATE_OR_GOTO (this->name, new_volinfo, out);
+ GF_VALIDATE_OR_GOTO (this->name, snap_volinfo, out);
+
+ brick_count = 0;
+ cds_list_for_each_entry (brickinfo, &snap_volinfo->bricks, brick_list) {
+ brick_count++;
+ ret = glusterd_brickinfo_new (&new_brickinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_NEW_INFO_FAIL, "Failed to create "
+ "new brickinfo");
+ goto out;
+ }
+
+ /* Duplicate brickinfo */
+ ret = glusterd_brickinfo_dup (brickinfo, new_brickinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_SET_INFO_FAIL, "Failed to dup "
+ "brickinfo");
+ goto out;
+ }
+
+ /* Fetch values if present in dict These values won't
+ * be present in case of a missed restore. In that case
+ * it's fine to use the local node's value
+ */
+ snprintf (key, sizeof (key), "snap%d.brick%d.path",
+ volcount, brick_count);
+ ret = dict_get_str (dict, key, &value);
+ if (!ret)
+ strncpy (new_brickinfo->path, value,
+ sizeof(new_brickinfo->path));
+
+ snprintf (key, sizeof (key), "snap%d.brick%d.snap_status",
+ volcount, brick_count);
+ ret = dict_get_int32 (dict, key, &new_brickinfo->snap_status);
+
+ snprintf (key, sizeof (key), "snap%d.brick%d.device_path",
+ volcount, brick_count);
+ ret = dict_get_str (dict, key, &value);
+ if (!ret)
+ strncpy (new_brickinfo->device_path, value,
+ sizeof(new_brickinfo->device_path));
+
+ snprintf (key, sizeof (key), "snap%d.brick%d.fs_type",
+ volcount, brick_count);
+ ret = dict_get_str (dict, key, &value);
+ if (!ret)
+ strncpy (new_brickinfo->fstype, value,
+ sizeof(new_brickinfo->fstype));
+
+ snprintf (key, sizeof (key), "snap%d.brick%d.mnt_opts",
+ volcount, brick_count);
+ ret = dict_get_str (dict, key, &value);
+ if (!ret)
+ strncpy (new_brickinfo->mnt_opts, value,
+ sizeof(new_brickinfo->mnt_opts));
+
+ /* If the brick is not of this peer, or snapshot is missed *
+ * for the brick do not replace the xattr for it */
+ if ((!gf_uuid_compare (brickinfo->uuid, MY_UUID)) &&
+ (brickinfo->snap_status != -1)) {
+ /* We need to replace the volume id of all the bricks
+ * to the volume id of the origin volume. new_volinfo
+ * has the origin volume's volume id*/
+ ret = sys_lsetxattr (new_brickinfo->path,
+ GF_XATTR_VOL_ID_KEY,
+ new_volinfo->volume_id,
+ sizeof (new_volinfo->volume_id),
+ XATTR_REPLACE);
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SETXATTR_FAIL, "Failed to "
+ "set extended attribute %s on %s. "
+ "Reason: %s, snap: %s",
+ GF_XATTR_VOL_ID_KEY,
+ new_brickinfo->path, strerror (errno),
+ new_volinfo->volname);
+ goto out;
+ }
+ }
+
+ /* If a snapshot is pending for this brick then
+ * restore should also be pending
+ */
+ if (brickinfo->snap_status == -1) {
+ /* Adding missed delete to the dict */
+ ret = glusterd_add_missed_snaps_to_dict
+ (rsp_dict,
+ snap_volinfo,
+ brickinfo,
+ brick_count,
+ GF_SNAP_OPTION_TYPE_RESTORE);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MISSEDSNAP_INFO_SET_FAIL,
+ "Failed to add missed snapshot info "
+ "for %s:%s in the rsp_dict",
+ brickinfo->hostname,
+ brickinfo->path);
+ goto out;
+ }
+ }
+
+ cds_list_add_tail (&new_brickinfo->brick_list,
+ &new_volinfo->bricks);
+ /* ownership of new_brickinfo is passed to new_volinfo */
+ new_brickinfo = NULL;
+ }
+
+ /* Regenerate all volfiles */
+ ret = glusterd_create_volfiles_and_notify_services (new_volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLFILE_CREATE_FAIL,
+ "Failed to regenerate volfiles");
+ goto out;
+ }
+
+ /* Restore geo-rep marker.tstamp's timestamp */
+ ret = glusterd_snap_geo_rep_restore (snap_volinfo, new_volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TSTAMP_SET_FAIL,
+ "Geo-rep: marker.tstamp's timestamp restoration failed");
+ goto out;
+ }
+
+out:
+ if (ret && (NULL != new_brickinfo)) {
+ (void) glusterd_brickinfo_delete (new_brickinfo);
+ }
+
+ return ret;
+}
+
+int
+glusterd_snap_volinfo_find_by_volume_id (uuid_t volume_id,
+ glusterd_volinfo_t **volinfo)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *voliter = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (volinfo);
+
+ if (gf_uuid_is_null(volume_id)) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_UUID_NULL, "Volume UUID is NULL");
+ goto out;
+ }
+
+ cds_list_for_each_entry (snap, &priv->snapshots, snap_list) {
+ cds_list_for_each_entry (voliter, &snap->volumes, vol_list) {
+ if (gf_uuid_compare (volume_id, voliter->volume_id))
+ continue;
+ *volinfo = voliter;
+ ret = 0;
+ goto out;
+ }
+ }
+
+ gf_msg (this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_NOT_FOUND,
+ "Snap volume not found");
+out:
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_snap_volinfo_find (char *snap_volname, glusterd_snap_t *snap,
+ glusterd_volinfo_t **volinfo)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (snap);
+ GF_ASSERT (snap_volname);
+
+ cds_list_for_each_entry (snap_vol, &snap->volumes, vol_list) {
+ if (!strcmp (snap_vol->volname, snap_volname)) {
+ ret = 0;
+ *volinfo = snap_vol;
+ goto out;
+ }
+ }
+
+ gf_msg (this->name, GF_LOG_WARNING, EINVAL,
+ GD_MSG_SNAP_NOT_FOUND, "Snap volume %s not found",
+ snap_volname);
+out:
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_snap_volinfo_find_from_parent_volname (char *origin_volname,
+ glusterd_snap_t *snap,
+ glusterd_volinfo_t **volinfo)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (snap);
+ GF_ASSERT (origin_volname);
+
+ cds_list_for_each_entry (snap_vol, &snap->volumes, vol_list) {
+ if (!strcmp (snap_vol->parent_volname, origin_volname)) {
+ ret = 0;
+ *volinfo = snap_vol;
+ goto out;
+ }
+ }
+
+ gf_msg_debug (this->name, 0, "Snap volume not found(snap: %s, "
+ "origin-volume: %s", snap->snapname, origin_volname);
+
+out:
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* Exports a bricks snapshot details only if required
+ *
+ * The details will be exported only if the cluster op-version is greather than
+ * 4, ie. snapshot is supported in the cluster
+ */
+int
+gd_add_brick_snap_details_to_dict (dict_t *dict, char *prefix,
+ glusterd_brickinfo_t *brickinfo)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ char key[256] = {0,};
+
+ this = THIS;
+ GF_ASSERT (this != NULL);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, (conf != NULL), out);
+
+ GF_VALIDATE_OR_GOTO (this->name, (dict != NULL), out);
+ GF_VALIDATE_OR_GOTO (this->name, (prefix != NULL), out);
+ GF_VALIDATE_OR_GOTO (this->name, (brickinfo != NULL), out);
+
+ if (conf->op_version < GD_OP_VERSION_3_6_0) {
+ ret = 0;
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "%s.snap_status", prefix);
+ ret = dict_set_int32 (dict, key, brickinfo->snap_status);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_STATUS_FAIL,
+ "Failed to set snap_status for %s:%s",
+ brickinfo->hostname, brickinfo->path);
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.device_path", prefix);
+ ret = dict_set_str (dict, key, brickinfo->device_path);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set snap_device for %s:%s",
+ brickinfo->hostname, brickinfo->path);
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "%s.fs_type", prefix);
+ ret = dict_set_str (dict, key, brickinfo->fstype);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set fstype for %s:%s",
+ brickinfo->hostname, brickinfo->path);
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "%s.mnt_opts", prefix);
+ ret = dict_set_str (dict, key, brickinfo->mnt_opts);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRK_MOUNTOPTS_FAIL,
+ "Failed to set mnt_opts for %s:%s",
+ brickinfo->hostname, brickinfo->path);
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.mount_dir", prefix);
+ ret = dict_set_str (dict, key, brickinfo->mount_dir);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to set mount_dir for %s:%s",
+ brickinfo->hostname, brickinfo->path);
+
+out:
+ return ret;
+}
+
+/* Exports a volumes snapshot details only if required.
+ *
+ * The snapshot details will only be exported if the cluster op-version is
+ * greater than 4, ie. snapshot is supported in the cluster
+ */
+int
+gd_add_vol_snap_details_to_dict (dict_t *dict, char *prefix,
+ glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ char key[256] = {0,};
+
+ this = THIS;
+ GF_ASSERT (this != NULL);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, (conf != NULL), out);
+
+ GF_VALIDATE_OR_GOTO (this->name, (dict != NULL), out);
+ GF_VALIDATE_OR_GOTO (this->name, (volinfo != NULL), out);
+ GF_VALIDATE_OR_GOTO (this->name, (prefix != NULL), out);
+
+ if (conf->op_version < GD_OP_VERSION_3_6_0) {
+ ret = 0;
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "%s.restored_from_snap", prefix);
+ ret = dict_set_dynstr_with_alloc
+ (dict, key,
+ uuid_utoa (volinfo->restored_from_snap));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Unable to set %s for volume"
+ "%s", key, volinfo->volname);
+ goto out;
+ }
+
+ if (strlen (volinfo->parent_volname) > 0) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.parent_volname", prefix);
+ ret = dict_set_dynstr_with_alloc (dict, key,
+ volinfo->parent_volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Unable to set %s "
+ "for volume %s", key, volinfo->volname);
+ goto out;
+ }
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.is_snap_volume", prefix);
+ ret = dict_set_uint32 (dict, key, volinfo->is_snap_volume);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Unable to set %s for volume"
+ "%s", key, volinfo->volname);
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.snap-max-hard-limit", prefix);
+ ret = dict_set_uint64 (dict, key, volinfo->snap_max_hard_limit);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Unable to set %s for volume"
+ "%s", key, volinfo->volname);
+ }
+
+out:
+ return ret;
+}
+
+int32_t
+glusterd_add_missed_snaps_to_export_dict (dict_t *peer_data)
+{
+ char name_buf[PATH_MAX] = "";
+ char value[PATH_MAX] = "";
+ int32_t missed_snap_count = 0;
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+ glusterd_missed_snap_info *missed_snapinfo = NULL;
+ glusterd_snap_op_t *snap_opinfo = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (peer_data);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ /* Add the missed_entries in the dict */
+ cds_list_for_each_entry (missed_snapinfo, &priv->missed_snaps_list,
+ missed_snaps) {
+ cds_list_for_each_entry (snap_opinfo,
+ &missed_snapinfo->snap_ops,
+ snap_ops_list) {
+ snprintf (name_buf, sizeof(name_buf),
+ "missed_snaps_%d", missed_snap_count);
+ snprintf (value, sizeof(value), "%s:%s=%s:%d:%s:%d:%d",
+ missed_snapinfo->node_uuid,
+ missed_snapinfo->snap_uuid,
+ snap_opinfo->snap_vol_id,
+ snap_opinfo->brick_num,
+ snap_opinfo->brick_path,
+ snap_opinfo->op,
+ snap_opinfo->status);
+
+ ret = dict_set_dynstr_with_alloc (peer_data, name_buf,
+ value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to set %s",
+ name_buf);
+ goto out;
+ }
+ missed_snap_count++;
+ }
+ }
+
+ ret = dict_set_int32 (peer_data, "missed_snap_count",
+ missed_snap_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to set missed_snap_count");
+ goto out;
+ }
+
+out:
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_add_snap_to_dict (glusterd_snap_t *snap, dict_t *peer_data,
+ int32_t snap_count)
+{
+ char buf[NAME_MAX] = "";
+ char prefix[NAME_MAX] = "";
+ int32_t ret = -1;
+ int32_t volcount = 0;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ gf_boolean_t host_bricks = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (snap);
+ GF_ASSERT (peer_data);
+
+ snprintf (prefix, sizeof(prefix), "snap%d", snap_count);
+
+ cds_list_for_each_entry (volinfo, &snap->volumes, vol_list) {
+ volcount++;
+ ret = glusterd_add_volume_to_dict (volinfo, peer_data,
+ volcount, prefix);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to add snap:%s volume:%s "
+ "to peer_data dict for handshake",
+ snap->snapname, volinfo->volname);
+ goto out;
+ }
+
+ if (glusterd_is_volume_quota_enabled (volinfo)) {
+
+ ret = glusterd_vol_add_quota_conf_to_dict (volinfo,
+ peer_data,
+ volcount,
+ prefix);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to add quota conf for "
+ "snap:%s volume:%s to peer_data "
+ "dict for handshake", snap->snapname,
+ volinfo->volname);
+ goto out;
+ }
+ }
+
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks,
+ brick_list) {
+ if (!gf_uuid_compare (brickinfo->uuid, MY_UUID)) {
+ host_bricks = _gf_true;
+ break;
+ }
+ }
+ }
+
+ snprintf (buf, sizeof(buf), "%s.host_bricks", prefix);
+ ret = dict_set_int8 (peer_data, buf, (int8_t) host_bricks);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to set host_bricks for snap %s",
+ snap->snapname);
+ goto out;
+ }
+
+ snprintf (buf, sizeof(buf), "%s.volcount", prefix);
+ ret = dict_set_int32 (peer_data, buf, volcount);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to set volcount for snap %s",
+ snap->snapname);
+ goto out;
+ }
+
+ snprintf (buf, sizeof(buf), "%s.snapname", prefix);
+ ret = dict_set_dynstr_with_alloc (peer_data, buf, snap->snapname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to set snapname for snap %s",
+ snap->snapname);
+ goto out;
+ }
+
+ snprintf (buf, sizeof(buf), "%s.snap_id", prefix);
+ ret = dict_set_dynstr_with_alloc (peer_data, buf,
+ uuid_utoa (snap->snap_id));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to set snap_id for snap %s",
+ snap->snapname);
+ goto out;
+ }
+
+ if (snap->description) {
+ snprintf (buf, sizeof(buf), "%s.snapid", prefix);
+ ret = dict_set_dynstr_with_alloc (peer_data, buf,
+ snap->description);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to set description for snap %s",
+ snap->snapname);
+ goto out;
+ }
+ }
+
+ snprintf (buf, sizeof(buf), "%s.time_stamp", prefix);
+ ret = dict_set_int64 (peer_data, buf, (int64_t)snap->time_stamp);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to set time_stamp for snap %s",
+ snap->snapname);
+ goto out;
+ }
+
+ snprintf (buf, sizeof(buf), "%s.snap_restored", prefix);
+ ret = dict_set_int8 (peer_data, buf, snap->snap_restored);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to set snap_restored for snap %s",
+ snap->snapname);
+ goto out;
+ }
+
+ snprintf (buf, sizeof(buf), "%s.snap_status", prefix);
+ ret = dict_set_int32 (peer_data, buf, snap->snap_status);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to set snap_status for snap %s",
+ snap->snapname);
+ goto out;
+ }
+out:
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_add_snapshots_to_export_dict (dict_t *peer_data)
+{
+ int32_t snap_count = 0;
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+ glusterd_snap_t *snap = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (peer_data);
+
+ cds_list_for_each_entry (snap, &priv->snapshots, snap_list) {
+ snap_count++;
+ ret = glusterd_add_snap_to_dict (snap, peer_data, snap_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to add snap(%s) to the "
+ " peer_data dict for handshake",
+ snap->snapname);
+ goto out;
+ }
+ }
+
+ ret = dict_set_int32 (peer_data, "snap_count", snap_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set snap_count");
+ goto out;
+ }
+
+out:
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* Imports the snapshot details of a brick if required and available
+ *
+ * Snapshot details will be imported only if the cluster op-version is >= 4
+ */
+int
+gd_import_new_brick_snap_details (dict_t *dict, char *prefix,
+ glusterd_brickinfo_t *brickinfo)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ char key[512] = {0,};
+ char *snap_device = NULL;
+ char *fs_type = NULL;
+ char *mnt_opts = NULL;
+ char *mount_dir = NULL;
+
+ this = THIS;
+ GF_ASSERT (this != NULL);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, (conf != NULL), out);
+
+ GF_VALIDATE_OR_GOTO (this->name, (dict != NULL), out);
+ GF_VALIDATE_OR_GOTO (this->name, (prefix != NULL), out);
+ GF_VALIDATE_OR_GOTO (this->name, (brickinfo != NULL), out);
+
+ if (conf->op_version < GD_OP_VERSION_3_6_0) {
+ ret = 0;
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "%s.snap_status", prefix);
+ ret = dict_get_int32 (dict, key, &brickinfo->snap_status);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "%s missing in payload", key);
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.device_path", prefix);
+ ret = dict_get_str (dict, key, &snap_device);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "%s missing in payload", key);
+ goto out;
+ }
+ strcpy (brickinfo->device_path, snap_device);
+
+ snprintf (key, sizeof (key), "%s.fs_type", prefix);
+ ret = dict_get_str (dict, key, &fs_type);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "%s missing in payload", key);
+ goto out;
+ }
+ strcpy (brickinfo->fstype, fs_type);
+
+ snprintf (key, sizeof (key), "%s.mnt_opts", prefix);
+ ret = dict_get_str (dict, key, &mnt_opts);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "%s missing in payload", key);
+ goto out;
+ }
+ strcpy (brickinfo->mnt_opts, mnt_opts);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.mount_dir", prefix);
+ ret = dict_get_str (dict, key, &mount_dir);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "%s missing in payload", key);
+ goto out;
+ }
+ strncpy (brickinfo->mount_dir, mount_dir,
+ (sizeof (brickinfo->mount_dir) - 1));
+
+out:
+ return ret;
+}
+
+/*
+ * Imports the snapshot details of a volume if required and available
+ *
+ * Snapshot details will be imported only if cluster.op_version is greater than
+ * or equal to GD_OP_VERSION_3_6_0, the op-version from which volume snapshot is
+ * supported.
+ */
+int
+gd_import_volume_snap_details (dict_t *dict, glusterd_volinfo_t *volinfo,
+ char *prefix, char *volname)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ char key[256] = {0,};
+ char *restored_snap = NULL;
+
+ this = THIS;
+ GF_ASSERT (this != NULL);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, (conf != NULL), out);
+
+ GF_VALIDATE_OR_GOTO (this->name, (dict != NULL), out);
+ GF_VALIDATE_OR_GOTO (this->name, (volinfo != NULL), out);
+ GF_VALIDATE_OR_GOTO (this->name, (prefix != NULL), out);
+ GF_VALIDATE_OR_GOTO (this->name, (volname != NULL), out);
+
+ if (conf->op_version < GD_OP_VERSION_3_6_0) {
+ ret = 0;
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "%s.is_snap_volume", prefix);
+ ret = dict_get_uint32 (dict, key, &volinfo->is_snap_volume);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "%s missing in payload "
+ "for %s", key, volname);
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.restored_from_snap", prefix);
+ ret = dict_get_str (dict, key, &restored_snap);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "%s missing in payload "
+ "for %s", key, volname);
+ goto out;
+ }
+
+ gf_uuid_parse (restored_snap, volinfo->restored_from_snap);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.snap-max-hard-limit", prefix);
+ ret = dict_get_uint64 (dict, key,
+ &volinfo->snap_max_hard_limit);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "%s missing in payload "
+ "for %s", key, volname);
+out:
+ return ret;
+}
+
+int32_t
+glusterd_perform_missed_op (glusterd_snap_t *snap, int32_t op)
+{
+ dict_t *dict = NULL;
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *snap_volinfo = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_volinfo_t *tmp = NULL;
+ xlator_t *this = NULL;
+ uuid_t null_uuid = {0};
+ char *parent_volname = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (snap);
+
+ dict = dict_new();
+ if (!dict) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_CREATE_FAIL, "Unable to create dict");
+ ret = -1;
+ goto out;
+ }
+
+ switch (op) {
+ case GF_SNAP_OPTION_TYPE_DELETE:
+ ret = glusterd_snap_remove (dict, snap, _gf_true, _gf_false,
+ _gf_false);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_REMOVE_FAIL,
+ "Failed to remove snap");
+ goto out;
+ }
+
+ break;
+ case GF_SNAP_OPTION_TYPE_RESTORE:
+ cds_list_for_each_entry_safe (snap_volinfo, tmp, &snap->volumes,
+ vol_list) {
+ parent_volname = gf_strdup
+ (snap_volinfo->parent_volname);
+ if (!parent_volname)
+ goto out;
+
+ ret = glusterd_volinfo_find (parent_volname, &volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLINFO_GET_FAIL,
+ "Could not get volinfo of %s",
+ parent_volname);
+ goto out;
+ }
+
+ volinfo->version--;
+ gf_uuid_copy (volinfo->restored_from_snap, null_uuid);
+
+ /* gd_restore_snap_volume() uses the dict and volcount
+ * to fetch snap brick info from other nodes, which were
+ * collected during prevalidation. As this is an ad-hoc
+ * op and only local node's data matter, hence sending
+ * volcount as 0 and re-using the same dict because we
+ * need not record any missed creates in the rsp_dict.
+ */
+ ret = gd_restore_snap_volume (dict, dict, volinfo,
+ snap_volinfo, 0);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_RESTORE_FAIL,
+ "Failed to restore snap for %s",
+ snap->snapname);
+ volinfo->version++;
+ goto out;
+ }
+
+ /* Restore is successful therefore delete the original
+ * volume's volinfo. If the volinfo is already restored
+ * then we should delete the backend LVMs */
+ if (!gf_uuid_is_null (volinfo->restored_from_snap)) {
+ ret = glusterd_lvm_snapshot_remove (dict,
+ volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_REMOVE_FAIL,
+ "Failed to remove LVM backend");
+ goto out;
+ }
+ }
+
+ /* Detach the volinfo from priv->volumes, so that no new
+ * command can ref it any more and then unref it.
+ */
+ cds_list_del_init (&volinfo->vol_list);
+ glusterd_volinfo_unref (volinfo);
+
+ ret = glusterd_snapshot_restore_cleanup (dict,
+ parent_volname,
+ snap);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_CLEANUP_FAIL,
+ "Failed to perform snapshot restore "
+ "cleanup for %s volume",
+ parent_volname);
+ goto out;
+ }
+
+ GF_FREE (parent_volname);
+ parent_volname = NULL;
+ }
+
+ break;
+ default:
+ /* The entry must be a create, delete, or
+ * restore entry
+ */
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "Invalid missed snap entry");
+ ret = -1;
+ goto out;
+ }
+
+out:
+ dict_unref (dict);
+ if (parent_volname) {
+ GF_FREE (parent_volname);
+ parent_volname = NULL;
+ }
+
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* Perform missed deletes and restores on this node */
+int32_t
+glusterd_perform_missed_snap_ops ()
+{
+ int32_t ret = -1;
+ int32_t op_status = -1;
+ glusterd_conf_t *priv = NULL;
+ glusterd_missed_snap_info *missed_snapinfo = NULL;
+ glusterd_snap_op_t *snap_opinfo = NULL;
+ glusterd_snap_t *snap = NULL;
+ uuid_t snap_uuid = {0,};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ cds_list_for_each_entry (missed_snapinfo, &priv->missed_snaps_list,
+ missed_snaps) {
+ /* If the pending snap_op is not for this node then continue */
+ if (strcmp (missed_snapinfo->node_uuid, uuid_utoa (MY_UUID)))
+ continue;
+
+ /* Find the snap id */
+ gf_uuid_parse (missed_snapinfo->snap_uuid, snap_uuid);
+ snap = NULL;
+ snap = glusterd_find_snap_by_id (snap_uuid);
+ if (!snap) {
+ /* If the snap is not found, then a delete or a
+ * restore can't be pending on that snap_uuid.
+ */
+ gf_msg_debug (this->name, 0,
+ "Not a pending delete or restore op");
+ continue;
+ }
+
+ op_status = GD_MISSED_SNAP_PENDING;
+ cds_list_for_each_entry (snap_opinfo,
+ &missed_snapinfo->snap_ops,
+ snap_ops_list) {
+ /* If the snap_op is create or its status is
+ * GD_MISSED_SNAP_DONE then continue
+ */
+ if ((snap_opinfo->status == GD_MISSED_SNAP_DONE) ||
+ (snap_opinfo->op == GF_SNAP_OPTION_TYPE_CREATE))
+ continue;
+
+ /* Perform the actual op for the first time for
+ * this snap, and mark the snap_status as
+ * GD_MISSED_SNAP_DONE. For other entries for the same
+ * snap, just mark the entry as done.
+ */
+ if (op_status == GD_MISSED_SNAP_PENDING) {
+ ret = glusterd_perform_missed_op
+ (snap,
+ snap_opinfo->op);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAPSHOT_OP_FAILED,
+ "Failed to perform missed snap op");
+ goto out;
+ }
+ op_status = GD_MISSED_SNAP_DONE;
+ }
+
+ snap_opinfo->status = GD_MISSED_SNAP_DONE;
+ }
+ }
+
+ ret = 0;
+out:
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* Import friend volumes missed_snap_list and update *
+ * missed_snap_list if need be */
+int32_t
+glusterd_import_friend_missed_snap_list (dict_t *peer_data)
+{
+ int32_t missed_snap_count = -1;
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (peer_data);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ /* Add the friends missed_snaps entries to the in-memory list */
+ ret = dict_get_int32 (peer_data, "missed_snap_count",
+ &missed_snap_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_MISSED_SNAP_GET_FAIL,
+ "No missed snaps");
+ ret = 0;
+ goto out;
+ }
+
+ ret = glusterd_add_missed_snaps_to_list (peer_data,
+ missed_snap_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MISSED_SNAP_LIST_STORE_FAIL,
+ "Failed to add missed snaps to list");
+ goto out;
+ }
+
+ ret = glusterd_perform_missed_snap_ops ();
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAPSHOT_OP_FAILED,
+ "Failed to perform snap operations");
+ /* Not going to out at this point coz some *
+ * missed ops might have been performed. We *
+ * need to persist the current list *
+ */
+ }
+
+ ret = glusterd_store_update_missed_snaps ();
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MISSED_SNAP_LIST_STORE_FAIL,
+ "Failed to update missed_snaps_list");
+ goto out;
+ }
+
+out:
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/*
+ * This function will set boolean "conflict" to true if peer snap
+ * has a version greater than snap version of local node. Otherwise
+ * boolean "conflict" will be set to false.
+ */
+int
+glusterd_check_peer_has_higher_snap_version (dict_t *peer_data,
+ char *peer_snap_name, int volcount,
+ gf_boolean_t *conflict, char *prefix,
+ glusterd_snap_t *snap, char *hostname)
+{
+ glusterd_volinfo_t *snap_volinfo = NULL;
+ char key[256] = {0};
+ int version = 0, i = 0;
+ int ret = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (snap);
+ GF_ASSERT (peer_data);
+
+ for (i = 1; i <= volcount; i++) {
+ snprintf (key, sizeof (key), "%s%d.version", prefix, i);
+ ret = dict_get_int32 (peer_data, key, &version);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "failed to get "
+ "version of snap volume = %s", peer_snap_name);
+ return -1;
+ }
+
+ /* TODO : As of now there is only one volume in snapshot.
+ * Change this when multiple volume snapshot is introduced
+ */
+ snap_volinfo = cds_list_entry (snap->volumes.next,
+ glusterd_volinfo_t, vol_list);
+ if (!snap_volinfo) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLINFO_GET_FAIL, "Failed to get snap "
+ "volinfo %s", snap->snapname);
+ return -1;
+ }
+
+ if (version > snap_volinfo->version) {
+ /* Mismatch detected */
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_VOL_VERS_MISMATCH,
+ "Version of volume %s differ. "
+ "local version = %d, remote version = %d "
+ "on peer %s", snap_volinfo->volname,
+ snap_volinfo->version, version, hostname);
+ *conflict = _gf_true;
+ break;
+ } else {
+ *conflict = _gf_false;
+ }
+ }
+ return 0;
+}
+
+/* Check for the peer_snap_name in the list of existing snapshots.
+ * If a snap exists with the same name and a different snap_id, then
+ * there is a conflict. Set conflict as _gf_true, and snap to the
+ * conflicting snap object. If a snap exists with the same name, and the
+ * same snap_id, then there is no conflict. Set conflict as _gf_false
+ * and snap to the existing snap object. If no snap exists with the
+ * peer_snap_name, then there is no conflict. Set conflict as _gf_false
+ * and snap to NULL.
+ */
+void
+glusterd_is_peer_snap_conflicting (char *peer_snap_name, char *peer_snap_id,
+ gf_boolean_t *conflict,
+ glusterd_snap_t **snap, char *hostname)
+{
+ uuid_t peer_snap_uuid = {0,};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (peer_snap_name);
+ GF_ASSERT (peer_snap_id);
+ GF_ASSERT (conflict);
+ GF_ASSERT (snap);
+ GF_ASSERT (hostname);
+
+ *snap = glusterd_find_snap_by_name (peer_snap_name);
+ if (*snap) {
+ gf_uuid_parse (peer_snap_id, peer_snap_uuid);
+ if (!gf_uuid_compare (peer_snap_uuid, (*snap)->snap_id)) {
+ /* Current node contains the same snap having
+ * the same snapname and snap_id
+ */
+ gf_msg_debug (this->name, 0,
+ "Snapshot %s from peer %s present in "
+ "localhost", peer_snap_name, hostname);
+ *conflict = _gf_false;
+ } else {
+ /* Current node contains the same snap having
+ * the same snapname but different snap_id
+ */
+ gf_msg_debug (this->name, 0,
+ "Snapshot %s from peer %s conflicts with "
+ "snapshot in localhost", peer_snap_name,
+ hostname);
+ *conflict = _gf_true;
+ }
+ } else {
+ /* Peer contains snapshots missing on the current node */
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_MISSED_SNAP_PRESENT,
+ "Snapshot %s from peer %s missing on localhost",
+ peer_snap_name, hostname);
+ *conflict = _gf_false;
+ }
+}
+
+/* Check if the local node is hosting any bricks for the given snapshot */
+gf_boolean_t
+glusterd_are_snap_bricks_local (glusterd_snap_t *snap)
+{
+ gf_boolean_t is_local = _gf_false;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (snap);
+
+ cds_list_for_each_entry (volinfo, &snap->volumes, vol_list) {
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks,
+ brick_list) {
+ if (!gf_uuid_compare (brickinfo->uuid, MY_UUID)) {
+ is_local = _gf_true;
+ goto out;
+ }
+ }
+ }
+
+out:
+ gf_msg_trace (this->name, 0, "Returning %d", is_local);
+ return is_local;
+}
+
+/* Check if the peer has missed any snap delete
+ * or restore for the given snap_id
+ */
+gf_boolean_t
+glusterd_peer_has_missed_snap_delete (uuid_t peerid, char *peer_snap_id)
+{
+ char *peer_uuid = NULL;
+ gf_boolean_t missed_delete = _gf_false;
+ glusterd_conf_t *priv = NULL;
+ glusterd_missed_snap_info *missed_snapinfo = NULL;
+ glusterd_snap_op_t *snap_opinfo = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (peer_snap_id);
+
+ peer_uuid = uuid_utoa (peerid);
+
+ cds_list_for_each_entry (missed_snapinfo, &priv->missed_snaps_list,
+ missed_snaps) {
+ /* Look for missed snap for the same peer, and
+ * the same snap_id
+ */
+ if ((!strcmp (peer_uuid, missed_snapinfo->node_uuid)) &&
+ (!strcmp (peer_snap_id, missed_snapinfo->snap_uuid))) {
+ /* Check if the missed snap's op is delete and the
+ * status is pending
+ */
+ cds_list_for_each_entry (snap_opinfo,
+ &missed_snapinfo->snap_ops,
+ snap_ops_list) {
+ if (((snap_opinfo->op ==
+ GF_SNAP_OPTION_TYPE_DELETE) ||
+ (snap_opinfo->op ==
+ GF_SNAP_OPTION_TYPE_RESTORE)) &&
+ (snap_opinfo->status ==
+ GD_MISSED_SNAP_PENDING)) {
+ missed_delete = _gf_true;
+ goto out;
+ }
+ }
+ }
+ }
+
+out:
+ gf_msg_trace (this->name, 0, "Returning %d", missed_delete);
+ return missed_delete;
+}
+
+/* Genrate and store snap volfiles for imported snap object */
+int32_t
+glusterd_gen_snap_volfiles (glusterd_volinfo_t *snap_vol, char *peer_snap_name)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *parent_volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (snap_vol);
+ GF_ASSERT (peer_snap_name);
+
+ ret = glusterd_store_volinfo (snap_vol, GLUSTERD_VOLINFO_VER_AC_NONE);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLINFO_SET_FAIL, "Failed to store snapshot "
+ "volinfo (%s) for snap %s", snap_vol->volname,
+ peer_snap_name);
+ goto out;
+ }
+
+ ret = generate_brick_volfiles (snap_vol);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLFILE_CREATE_FAIL,
+ "generating the brick volfiles for the "
+ "snap %s failed", peer_snap_name);
+ goto out;
+ }
+
+ ret = generate_client_volfiles (snap_vol, GF_CLIENT_TRUSTED);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLFILE_CREATE_FAIL,
+ "generating the trusted client volfiles for "
+ "the snap %s failed", peer_snap_name);
+ goto out;
+ }
+
+ ret = generate_client_volfiles (snap_vol, GF_CLIENT_OTHER);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLFILE_CREATE_FAIL,
+ "generating the client volfiles for the "
+ "snap %s failed", peer_snap_name);
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (snap_vol->parent_volname,
+ &parent_volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLINFO_GET_FAIL, "Parent volinfo "
+ "not found for %s volume of snap %s",
+ snap_vol->volname, peer_snap_name);
+ goto out;
+ }
+
+ glusterd_list_add_snapvol (parent_volinfo, snap_vol);
+
+ ret = glusterd_store_volinfo (snap_vol, GLUSTERD_VOLINFO_VER_AC_NONE);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLINFO_SET_FAIL,
+ "Failed to store snap volinfo");
+ goto out;
+ }
+out:
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* Import snapshot info from peer_data and add it to priv */
+int32_t
+glusterd_import_friend_snap (dict_t *peer_data, int32_t snap_count,
+ char *peer_snap_name, char *peer_snap_id)
+{
+ char buf[NAME_MAX] = "";
+ char prefix[NAME_MAX] = "";
+ dict_t *dict = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_conf_t *priv = NULL;
+ int32_t ret = -1;
+ int32_t volcount = -1;
+ int32_t i = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (peer_data);
+ GF_ASSERT (peer_snap_name);
+ GF_ASSERT (peer_snap_id);
+
+ snprintf (prefix, sizeof(prefix), "snap%d", snap_count);
+
+ snap = glusterd_new_snap_object ();
+ if (!snap) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_CREATION_FAIL, "Could not create "
+ "the snap object for snap %s", peer_snap_name);
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_CREATE_FAIL,
+ "Failed to create dict");
+ ret = -1;
+ goto out;
+ }
+
+ strncpy (snap->snapname, peer_snap_name, sizeof (snap->snapname) - 1);
+ gf_uuid_parse (peer_snap_id, snap->snap_id);
+
+ snprintf (buf, sizeof(buf), "%s.snapid", prefix);
+ ret = dict_get_str (peer_data, buf, &snap->description);
+
+ snprintf (buf, sizeof(buf), "%s.time_stamp", prefix);
+ ret = dict_get_int64 (peer_data, buf, &snap->time_stamp);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to get time_stamp for snap %s",
+ peer_snap_name);
+ goto out;
+ }
+
+ snprintf (buf, sizeof(buf), "%s.snap_restored", prefix);
+ ret = dict_get_int8 (peer_data, buf, (int8_t *) &snap->snap_restored);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to get snap_restored for snap %s",
+ peer_snap_name);
+ goto out;
+ }
+
+ snprintf (buf, sizeof(buf), "%s.snap_status", prefix);
+ ret = dict_get_int32 (peer_data, buf, (int32_t *) &snap->snap_status);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to get snap_status for snap %s",
+ peer_snap_name);
+ goto out;
+ }
+
+ /* If the snap is scheduled to be decommissioned, then
+ * don't accept the snap */
+ if (snap->snap_status == GD_SNAP_STATUS_DECOMMISSION) {
+ gf_msg_debug (this->name, 0,
+ "The snap(%s) is scheduled to be decommissioned "
+ "Not accepting the snap.", peer_snap_name);
+ glusterd_snap_remove (dict, snap,
+ _gf_true, _gf_true, _gf_false);
+ ret = 0;
+ goto out;
+ }
+
+ snprintf (buf, sizeof(buf), "%s.volcount", prefix);
+ ret = dict_get_int32 (peer_data, buf, &volcount);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to get volcount for snap %s",
+ peer_snap_name);
+ goto out;
+ }
+
+ ret = glusterd_store_create_snap_dir (snap);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAPDIR_CREATE_FAIL,
+ "Failed to create snap dir");
+ goto out;
+ }
+
+ glusterd_list_add_order (&snap->snap_list, &priv->snapshots,
+ glusterd_compare_snap_time);
+
+
+ for (i = 1; i <= volcount; i++) {
+ ret = glusterd_import_volinfo (peer_data, i,
+ &snap_vol, prefix);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLINFO_SET_FAIL,
+ "Failed to import snap volinfo for "
+ "snap %s", peer_snap_name);
+ goto out;
+ }
+
+ snap_vol->snapshot = snap;
+
+ ret = glusterd_gen_snap_volfiles (snap_vol, peer_snap_name);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLFILE_CREATE_FAIL,
+ "Failed to generate snap vol files "
+ "for snap %s", peer_snap_name);
+ goto out;
+ }
+ if (glusterd_is_volume_started (snap_vol)) {
+ (void) glusterd_start_bricks (snap_vol);
+ } else {
+ (void) glusterd_stop_bricks(snap_vol);
+ }
+
+ ret = glusterd_import_quota_conf (peer_data, i,
+ snap_vol, prefix);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_QUOTA_CONFIG_IMPORT_FAIL,
+ "Failed to import quota conf "
+ "for snap %s", peer_snap_name);
+ goto out;
+ }
+
+ snap_vol = NULL;
+ }
+
+ ret = glusterd_store_snap (snap);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_CREATION_FAIL, "Could not store snap"
+ "object %s", peer_snap_name);
+ goto out;
+ }
+
+out:
+ if (ret)
+ glusterd_snap_remove (dict, snap,
+ _gf_true, _gf_true, _gf_false);
+
+ if (dict)
+ dict_unref (dict);
+
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* During a peer-handshake, after the volumes have synced, and the list of
+ * missed snapshots have synced, the node will perform the pending deletes
+ * and restores on this list. At this point, the current snapshot list in
+ * the node will be updated, and hence in case of conflicts arising during
+ * snapshot handshake, the peer hosting the bricks will be given precedence
+ * Likewise, if there will be a conflict, and both peers will be in the same
+ * state, i.e either both would be hosting bricks or both would not be hosting
+ * bricks, then a decision can't be taken and a peer-reject will happen.
+ *
+ * glusterd_compare_and_update_snap() implements the following algorithm to
+ * perform the above task:
+ * Step 1: Start.
+ * Step 2: Check if the peer is missing a delete or restore on the said snap.
+ * If yes, goto step 6.
+ * Step 3: Check if there is a conflict between the peer's data and the
+ * local snap. If no, goto step 5.
+ * Step 4: As there is a conflict, check if both the peer and the local nodes
+ * are hosting bricks. Based on the results perform the following:
+ * Peer Hosts Bricks Local Node Hosts Bricks Action
+ * Yes Yes Goto Step 8
+ * No No Goto Step 8
+ * Yes No Goto Step 9
+ * No Yes Goto Step 7
+ * Step 5: Check if the local node is missing the peer's data.
+ * If yes, goto step 10.
+ * Step 6: Check if the snap volume version is lesser than peer_data
+ * if yes goto step 9
+ * Step 7: It's a no-op. Goto step 11
+ * Step 8: Peer Reject. Goto step 11
+ * Step 9: Delete local node's data.
+ * Step 10: Accept Peer Data.
+ * Step 11: Stop
+ *
+ */
+int32_t
+glusterd_compare_and_update_snap (dict_t *peer_data, int32_t snap_count,
+ char *peername, uuid_t peerid)
+{
+ char buf[NAME_MAX] = "";
+ char prefix[NAME_MAX] = "";
+ char *peer_snap_name = NULL;
+ char *peer_snap_id = NULL;
+ dict_t *dict = NULL;
+ glusterd_snap_t *snap = NULL;
+ gf_boolean_t conflict = _gf_false;
+ gf_boolean_t is_local = _gf_false;
+ gf_boolean_t is_hosted = _gf_false;
+ gf_boolean_t missed_delete = _gf_false;
+ gf_boolean_t remove_lvm = _gf_true;
+
+ int32_t ret = -1;
+ int32_t volcount = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (peer_data);
+ GF_ASSERT (peername);
+
+ snprintf (prefix, sizeof(prefix), "snap%d", snap_count);
+
+ /* Fetch the peer's snapname */
+ snprintf (buf, sizeof(buf), "%s.snapname", prefix);
+ ret = dict_get_str (peer_data, buf, &peer_snap_name);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch snapname from peer: %s",
+ peername);
+ goto out;
+ }
+
+ /* Fetch the peer's snap_id */
+ snprintf (buf, sizeof(buf), "%s.snap_id", prefix);
+ ret = dict_get_str (peer_data, buf, &peer_snap_id);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch snap_id from peer: %s",
+ peername);
+ goto out;
+ }
+
+ snprintf (buf, sizeof(buf), "%s.volcount", prefix);
+ ret = dict_get_int32 (peer_data, buf, &volcount);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get volcount for snap %s",
+ peer_snap_name);
+ goto out;
+ }
+
+ /* Check if the peer has missed a snap delete or restore
+ * resulting in stale data for the snap in question
+ */
+ missed_delete = glusterd_peer_has_missed_snap_delete (peerid,
+ peer_snap_id);
+ if (missed_delete == _gf_true) {
+ /* Peer has missed delete on the missing/conflicting snap_id */
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_MISSED_SNAP_DELETE,
+ "Peer %s has missed a delete "
+ "on snap %s", peername, peer_snap_name);
+ ret = 0;
+ goto out;
+ }
+
+ /* Check if there is a conflict, and if the
+ * peer data is already present
+ */
+ glusterd_is_peer_snap_conflicting (peer_snap_name, peer_snap_id,
+ &conflict, &snap, peername);
+ if (conflict == _gf_false) {
+ if (!snap) {
+ /* Peer has snap with the same snapname
+ * and snap_id, which local node doesn't have.
+ */
+ goto accept_peer_data;
+ }
+ /* Peer has snap with the same snapname
+ * and snap_id. Now check if peer has a
+ * snap with higher snap version than local
+ * node has.
+ */
+ ret = glusterd_check_peer_has_higher_snap_version (peer_data,
+ peer_snap_name, volcount,
+ &conflict, prefix, snap,
+ peername);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_VOL_VERS_MISMATCH, "Failed "
+ "to check version of snap volume");
+ goto out;
+ }
+ if (conflict == _gf_true) {
+ /*
+ * Snap version of peer is higher than snap
+ * version of local node.
+ *
+ * Remove data in local node and accept peer data.
+ * We just need to heal snap info of local node, So
+ * When removing data from local node, make sure
+ * we are not removing backend lvm of the snap.
+ */
+ remove_lvm = _gf_false;
+ goto remove_my_data;
+ } else {
+ ret = 0;
+ goto out;
+ }
+ }
+
+ /* There is a conflict. Check if the current node is
+ * hosting bricks for the conflicted snap.
+ */
+ is_local = glusterd_are_snap_bricks_local (snap);
+
+ /* Check if the peer is hosting any bricks for the
+ * conflicting snap
+ */
+ snprintf (buf, sizeof(buf), "%s.host_bricks", prefix);
+ ret = dict_get_int8 (peer_data, buf, (int8_t *) &is_hosted);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch host_bricks from peer: %s "
+ "for %s", peername, peer_snap_name);
+ goto out;
+ }
+
+ /* As there is a conflict at this point of time, the data of the
+ * node that hosts a brick takes precedence. If both the local
+ * node and the peer are in the same state, i.e if both of them
+ * are either hosting or not hosting the bricks, for the snap,
+ * then it's a peer reject
+ */
+ if (is_hosted == is_local) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_CONFLICT,
+ "Conflict in snapshot %s with peer %s",
+ peer_snap_name, peername);
+ ret = -1;
+ goto out;
+ }
+
+ if (is_hosted == _gf_false) {
+ /* If there was a conflict, and the peer is not hosting
+ * any brick, then don't accept peer data
+ */
+ gf_msg_debug (this->name, 0,
+ "Peer doesn't hosts bricks for conflicting "
+ "snap(%s). Not accepting peer data.",
+ peer_snap_name);
+ ret = 0;
+ goto out;
+ }
+
+ /* The peer is hosting a brick in case of conflict
+ * And local node isn't. Hence remove local node's
+ * data and accept peer data
+ */
+
+ gf_msg_debug (this->name, 0, "Peer hosts bricks for conflicting "
+ "snap(%s). Removing local data. Accepting peer data.",
+ peer_snap_name);
+ remove_lvm = _gf_true;
+
+remove_my_data:
+
+ dict = dict_new();
+ if (!dict) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_CREATE_FAIL,
+ "Unable to create dict");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_snap_remove (dict, snap, remove_lvm, _gf_false,
+ _gf_false);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_REMOVE_FAIL,
+ "Failed to remove snap %s", snap->snapname);
+ goto out;
+ }
+
+accept_peer_data:
+
+ /* Accept Peer Data */
+ ret = glusterd_import_friend_snap (peer_data, snap_count,
+ peer_snap_name, peer_snap_id);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_IMPORT_FAIL,
+ "Failed to import snap %s from peer %s",
+ peer_snap_name, peername);
+ goto out;
+ }
+
+out:
+ if (dict)
+ dict_unref (dict);
+
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* Compare snapshots present in peer_data, with the snapshots in
+ * the current node
+ */
+int32_t
+glusterd_compare_friend_snapshots (dict_t *peer_data, char *peername,
+ uuid_t peerid)
+{
+ int32_t ret = -1;
+ int32_t snap_count = 0;
+ int i = 1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (peer_data);
+ GF_ASSERT (peername);
+
+ ret = dict_get_int32 (peer_data, "snap_count", &snap_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to fetch snap_count");
+ goto out;
+ }
+
+ for (i = 1; i <= snap_count; i++) {
+ /* Compare one snapshot from peer_data at a time */
+ ret = glusterd_compare_and_update_snap (peer_data, i, peername,
+ peerid);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAPSHOT_OP_FAILED,
+ "Failed to compare snapshots with peer %s",
+ peername);
+ goto out;
+ }
+ }
+
+out:
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_add_snapd_to_dict (glusterd_volinfo_t *volinfo,
+ dict_t *dict, int32_t count)
+{
+
+ int ret = -1;
+ int32_t pid = -1;
+ int32_t brick_online = -1;
+ char key[1024] = {0};
+ char base_key[1024] = {0};
+ char pidfile[PATH_MAX] = {0};
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (dict);
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+
+ snprintf (base_key, sizeof (base_key), "brick%d", count);
+ snprintf (key, sizeof (key), "%s.hostname", base_key);
+ ret = dict_set_str (dict, key, "Snapshot Daemon");
+ if (ret)
+ goto out;
+
+ snprintf (key, sizeof (key), "%s.path", base_key);
+ ret = dict_set_dynstr (dict, key, gf_strdup (uuid_utoa (MY_UUID)));
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.port", base_key);
+ ret = dict_set_int32 (dict, key, volinfo->snapd.port);
+ if (ret)
+ goto out;
+
+ glusterd_svc_build_snapd_pidfile (volinfo, pidfile, sizeof (pidfile));
+
+ brick_online = gf_is_service_running (pidfile, &pid);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pid", base_key);
+ ret = dict_set_int32 (dict, key, pid);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.status", base_key);
+ ret = dict_set_int32 (dict, key, brick_online);
+
+out:
+ if (ret)
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+int
+glusterd_snap_config_use_rsp_dict (dict_t *dst, dict_t *src)
+{
+ char buf[PATH_MAX] = "";
+ char *volname = NULL;
+ int ret = -1;
+ int config_command = 0;
+ uint64_t i = 0;
+ uint64_t hard_limit = GLUSTERD_SNAPS_MAX_HARD_LIMIT;
+ uint64_t soft_limit = GLUSTERD_SNAPS_DEF_SOFT_LIMIT_PERCENT;
+ uint64_t value = 0;
+ uint64_t voldisplaycount = 0;
+
+ if (!dst || !src) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_EMPTY, "Source or Destination "
+ "dict is empty.");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dst, "config-command", &config_command);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "failed to get config-command type");
+ goto out;
+ }
+
+ switch (config_command) {
+ case GF_SNAP_CONFIG_DISPLAY:
+ ret = dict_get_uint64 (src,
+ GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT,
+ &hard_limit);
+ if (!ret) {
+ ret = dict_set_uint64 (dst,
+ GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT,
+ hard_limit);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to set snap_max_hard_limit");
+ goto out;
+ }
+ } else {
+ /* Received dummy response from other nodes */
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_get_uint64 (src,
+ GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT,
+ &soft_limit);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to get snap_max_soft_limit");
+ goto out;
+ }
+
+ ret = dict_set_uint64 (dst,
+ GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT,
+ soft_limit);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to set snap_max_soft_limit");
+ goto out;
+ }
+
+ ret = dict_get_uint64 (src, "voldisplaycount",
+ &voldisplaycount);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to get voldisplaycount");
+ goto out;
+ }
+
+ ret = dict_set_uint64 (dst, "voldisplaycount",
+ voldisplaycount);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to set voldisplaycount");
+ goto out;
+ }
+
+ for (i = 0; i < voldisplaycount; i++) {
+ snprintf (buf, sizeof(buf),
+ "volume%"PRIu64"-volname", i);
+ ret = dict_get_str (src, buf, &volname);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to get %s", buf);
+ goto out;
+ }
+ ret = dict_set_str (dst, buf, volname);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to set %s", buf);
+ goto out;
+ }
+
+ snprintf (buf, sizeof(buf),
+ "volume%"PRIu64"-snap-max-hard-limit", i);
+ ret = dict_get_uint64 (src, buf, &value);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to get %s", buf);
+ goto out;
+ }
+ ret = dict_set_uint64 (dst, buf, value);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to set %s", buf);
+ goto out;
+ }
+
+ snprintf (buf, sizeof(buf),
+ "volume%"PRIu64"-active-hard-limit", i);
+ ret = dict_get_uint64 (src, buf, &value);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to get %s", buf);
+ goto out;
+ }
+ ret = dict_set_uint64 (dst, buf, value);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to set %s", buf);
+ goto out;
+ }
+
+ snprintf (buf, sizeof(buf),
+ "volume%"PRIu64"-snap-max-soft-limit", i);
+ ret = dict_get_uint64 (src, buf, &value);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to get %s", buf);
+ goto out;
+ }
+ ret = dict_set_uint64 (dst, buf, value);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to set %s", buf);
+ goto out;
+ }
+ }
+
+ break;
+ default:
+ break;
+ }
+
+ ret = 0;
+out:
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_merge_brick_status (dict_t *dst, dict_t *src)
+{
+ int64_t volume_count = 0;
+ int64_t index = 0;
+ int64_t j = 0;
+ int64_t brick_count = 0;
+ int64_t brick_order = 0;
+ char key[PATH_MAX] = {0, };
+ char key_prefix[PATH_MAX] = {0, };
+ char snapbrckcnt[PATH_MAX] = {0, };
+ char snapbrckord[PATH_MAX] = {0, };
+ char *clonename = NULL;
+ int ret = -1;
+ int32_t brick_online = 0;
+ xlator_t *this = NULL;
+ int32_t snap_command = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ if (!dst || !src) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_EMPTY, "Source or Destination "
+ "dict is empty.");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dst, "type", &snap_command);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "unable to get the type of "
+ "the snapshot command");
+ goto out;
+ }
+
+ if (snap_command == GF_SNAP_OPTION_TYPE_DELETE) {
+ gf_msg_debug (this->name, 0, "snapshot delete command."
+ " Need not merge the status of the bricks");
+ ret = 0;
+ goto out;
+ }
+
+ /* Try and fetch clonename. If present set status with clonename *
+ * else do so as snap-vol */
+ ret = dict_get_str (dst, "clonename", &clonename);
+ if (ret) {
+ snprintf (key_prefix, sizeof (key_prefix), "snap-vol");
+ } else
+ snprintf (key_prefix, sizeof (key_prefix), "clone");
+
+ ret = dict_get_int64 (src, "volcount", &volume_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "failed to "
+ "get the volume count");
+ goto out;
+ }
+
+ for (index = 0; index < volume_count; index++) {
+ ret = snprintf (snapbrckcnt, sizeof(snapbrckcnt) - 1,
+ "snap-vol%"PRId64"_brickcount", index+1);
+ ret = dict_get_int64 (src, snapbrckcnt, &brick_count);
+ if (ret) {
+ gf_msg_trace (this->name, 0,
+ "No bricks for this volume in this dict (%s)",
+ snapbrckcnt);
+ continue;
+ }
+
+ for (j = 0; j < brick_count; j++) {
+ /* Fetching data from source dict */
+ snprintf (snapbrckord, sizeof(snapbrckord) - 1,
+ "snap-vol%"PRId64".brick%"PRId64".order",
+ index+1, j);
+
+ ret = dict_get_int64 (src, snapbrckord, &brick_order);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get brick order (%s)",
+ snapbrckord);
+ goto out;
+ }
+
+ snprintf (key, sizeof (key) - 1,
+ "%s%"PRId64".brick%"PRId64".status",
+ key_prefix, index+1, brick_order);
+ ret = dict_get_int32 (src, key, &brick_online);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "failed to "
+ "get the brick status (%s)", key);
+ goto out;
+ }
+
+ ret = dict_set_int32 (dst, key, brick_online);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "failed to "
+ "set the brick status (%s)", key);
+ goto out;
+ }
+ brick_online = 0;
+ }
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+/* Aggregate missed_snap_counts from different nodes and save it *
+ * in the req_dict of the originator node */
+int
+glusterd_snap_create_use_rsp_dict (dict_t *dst, dict_t *src)
+{
+ char *buf = NULL;
+ char *tmp_str = NULL;
+ char name_buf[PATH_MAX] = "";
+ int32_t i = -1;
+ int32_t ret = -1;
+ int32_t src_missed_snap_count = -1;
+ int32_t dst_missed_snap_count = -1;
+ xlator_t *this = NULL;
+ int8_t soft_limit_flag = -1;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ if (!dst || !src) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_EMPTY, "Source or Destination "
+ "dict is empty.");
+ goto out;
+ }
+
+ ret = glusterd_merge_brick_status (dst, src);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_SET_INFO_FAIL, "failed to merge brick "
+ "status");
+ goto out;
+ }
+
+ ret = dict_get_str (src, "snapuuid", &buf);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "failed to get snap UUID");
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc (dst, "snapuuid", buf);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set snap uuid in dict");
+ goto out;
+ }
+
+ /* set in dst dictionary soft-limit-reach only if soft-limit-reach
+ * is present src dictionary */
+ ret = dict_get_int8 (src, "soft-limit-reach", &soft_limit_flag);
+ if (!ret) {
+ ret = dict_set_int8 (dst, "soft-limit-reach", soft_limit_flag);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set "
+ "soft_limit_flag");
+ goto out;
+ }
+ }
+
+ ret = dict_get_int32 (src, "missed_snap_count",
+ &src_missed_snap_count);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "No missed snaps");
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_get_int32 (dst, "missed_snap_count",
+ &dst_missed_snap_count);
+ if (ret) {
+ /* Initialize dst_missed_count for the first time */
+ dst_missed_snap_count = 0;
+ }
+
+ for (i = 0; i < src_missed_snap_count; i++) {
+ snprintf (name_buf, sizeof(name_buf), "missed_snaps_%d", i);
+ ret = dict_get_str (src, name_buf, &buf);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch %s", name_buf);
+ goto out;
+ }
+
+ snprintf (name_buf, sizeof(name_buf), "missed_snaps_%d",
+ dst_missed_snap_count);
+
+ tmp_str = gf_strdup (buf);
+ if (!tmp_str) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr (dst, name_buf, tmp_str);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to set %s", name_buf);
+ goto out;
+ }
+
+ tmp_str = NULL;
+ dst_missed_snap_count++;
+ }
+
+ ret = dict_set_int32 (dst, "missed_snap_count", dst_missed_snap_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to set dst_missed_snap_count");
+ goto out;
+ }
+
+out:
+ if (ret && tmp_str)
+ GF_FREE(tmp_str);
+
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_snap_use_rsp_dict (dict_t *dst, dict_t *src)
+{
+ int ret = -1;
+ int32_t snap_command = 0;
+
+ if (!dst || !src) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_EMPTY, "Source or Destination "
+ "dict is empty.");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dst, "type", &snap_command);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "unable to get the type of "
+ "the snapshot command");
+ goto out;
+ }
+
+ switch (snap_command) {
+ case GF_SNAP_OPTION_TYPE_CREATE:
+ case GF_SNAP_OPTION_TYPE_DELETE:
+ case GF_SNAP_OPTION_TYPE_CLONE:
+ ret = glusterd_snap_create_use_rsp_dict (dst, src);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_RSP_DICT_USE_FAIL,
+ "Unable to use rsp dict");
+ goto out;
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_CONFIG:
+ ret = glusterd_snap_config_use_rsp_dict (dst, src);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_RSP_DICT_USE_FAIL,
+ "Unable to use rsp dict");
+ goto out;
+ }
+ break;
+ default:
+ /* copy the response dictinary's contents to the dict to be
+ * sent back to the cli */
+ dict_copy (src, dst);
+ break;
+ }
+
+ ret = 0;
+out:
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_compare_snap_time (struct cds_list_head *list1,
+ struct cds_list_head *list2)
+{
+ glusterd_snap_t *snap1 = NULL;
+ glusterd_snap_t *snap2 = NULL;
+ double diff_time = 0;
+
+ GF_ASSERT (list1);
+ GF_ASSERT (list2);
+
+ snap1 = cds_list_entry (list1, glusterd_snap_t, snap_list);
+ snap2 = cds_list_entry (list2, glusterd_snap_t, snap_list);
+ diff_time = difftime(snap1->time_stamp, snap2->time_stamp);
+
+ return (int)diff_time;
+}
+
+int
+glusterd_compare_snap_vol_time (struct cds_list_head *list1,
+ struct cds_list_head *list2)
+{
+ glusterd_volinfo_t *snapvol1 = NULL;
+ glusterd_volinfo_t *snapvol2 = NULL;
+ double diff_time = 0;
+
+ GF_ASSERT (list1);
+ GF_ASSERT (list2);
+
+ snapvol1 = cds_list_entry (list1, glusterd_volinfo_t, snapvol_list);
+ snapvol2 = cds_list_entry (list2, glusterd_volinfo_t, snapvol_list);
+ diff_time = difftime(snapvol1->snapshot->time_stamp,
+ snapvol2->snapshot->time_stamp);
+
+ return (int)diff_time;
+}
+
+int32_t
+glusterd_missed_snapinfo_new (glusterd_missed_snap_info **missed_snapinfo)
+{
+ glusterd_missed_snap_info *new_missed_snapinfo = NULL;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (missed_snapinfo);
+
+ new_missed_snapinfo = GF_CALLOC (1, sizeof(*new_missed_snapinfo),
+ gf_gld_mt_missed_snapinfo_t);
+
+ if (!new_missed_snapinfo)
+ goto out;
+
+ CDS_INIT_LIST_HEAD (&new_missed_snapinfo->missed_snaps);
+ CDS_INIT_LIST_HEAD (&new_missed_snapinfo->snap_ops);
+
+ *missed_snapinfo = new_missed_snapinfo;
+
+ ret = 0;
+
+out:
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_missed_snap_op_new (glusterd_snap_op_t **snap_op)
+{
+ glusterd_snap_op_t *new_snap_op = NULL;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (snap_op);
+
+ new_snap_op = GF_CALLOC (1, sizeof(*new_snap_op),
+ gf_gld_mt_missed_snapinfo_t);
+
+ if (!new_snap_op)
+ goto out;
+
+ new_snap_op->brick_num = -1;
+ new_snap_op->op = -1;
+ new_snap_op->status = -1;
+ CDS_INIT_LIST_HEAD (&new_snap_op->snap_ops_list);
+
+ *snap_op = new_snap_op;
+
+ ret = 0;
+out:
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+gf_boolean_t
+mntopts_exists (const char *str, const char *opts)
+{
+ char *dup_val = NULL;
+ char *savetok = NULL;
+ char *token = NULL;
+ gf_boolean_t exists = _gf_false;
+
+ GF_ASSERT (opts);
+
+ if (!str || !strlen(str))
+ goto out;
+
+ dup_val = gf_strdup (str);
+ if (!dup_val)
+ goto out;
+
+ token = strtok_r (dup_val, ",", &savetok);
+ while (token) {
+ if (!strcmp (token, opts)) {
+ exists = _gf_true;
+ goto out;
+ }
+ token = strtok_r (NULL, ",", &savetok);
+ }
+
+out:
+ GF_FREE (dup_val);
+ return exists;
+}
+
+int32_t
+glusterd_mount_lvm_snapshot (glusterd_brickinfo_t *brickinfo,
+ char *brick_mount_path)
+{
+ char msg[NAME_MAX] = "";
+ char mnt_opts[1024] = "";
+ int32_t ret = -1;
+ runner_t runner = {0, };
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (brick_mount_path);
+ GF_ASSERT (brickinfo);
+
+
+ runinit (&runner);
+ snprintf (msg, sizeof (msg), "mount %s %s",
+ brickinfo->device_path, brick_mount_path);
+
+ strcpy (mnt_opts, brickinfo->mnt_opts);
+
+ /* XFS file-system does not allow to mount file-system with duplicate
+ * UUID. File-system UUID of snapshot and its origin volume is same.
+ * Therefore to mount such a snapshot in XFS we need to pass nouuid
+ * option
+ */
+ if (!strcmp (brickinfo->fstype, "xfs") &&
+ !mntopts_exists (mnt_opts, "nouuid")) {
+ if (strlen (mnt_opts) > 0)
+ strcat (mnt_opts, ",");
+ strcat (mnt_opts, "nouuid");
+ }
+
+
+ if (strlen (mnt_opts) > 0) {
+ runner_add_args (&runner, "mount", "-o", mnt_opts,
+ brickinfo->device_path, brick_mount_path, NULL);
+ } else {
+ runner_add_args (&runner, "mount", brickinfo->device_path,
+ brick_mount_path, NULL);
+ }
+
+ runner_log (&runner, this->name, GF_LOG_DEBUG, msg);
+ ret = runner_run (&runner);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_MOUNT_FAIL, "mounting the snapshot "
+ "logical device %s failed (error: %s)",
+ brickinfo->device_path, strerror (errno));
+ goto out;
+ } else
+ gf_msg_debug (this->name, 0, "mounting the snapshot "
+ "logical device %s successful", brickinfo->device_path);
+
+out:
+ gf_msg_trace (this->name, 0, "Returning with %d", ret);
+ return ret;
+}
+
+gf_boolean_t
+glusterd_volume_quorum_calculate (glusterd_volinfo_t *volinfo, dict_t *dict,
+ int down_count, gf_boolean_t first_brick_on,
+ int8_t snap_force, int quorum_count,
+ char *quorum_type, char **op_errstr,
+ uint32_t *op_errno)
+{
+ gf_boolean_t quorum_met = _gf_false;
+ char err_str[PATH_MAX] = {0, };
+ xlator_t *this = NULL;
+ int up_count = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_VALIDATE_OR_GOTO (this->name, op_errno, out);
+
+ if (!volinfo || !dict) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_INVALID_ENTRY, "input parameters NULL");
+ goto out;
+ }
+
+ /* In a n-way replication where n >= 3 we should not take a snapshot
+ * if even one brick is down, irrespective of the quorum being met.
+ * TODO: Remove this restriction once n-way replication is
+ * supported with snapshot.
+ */
+ if (down_count) {
+ snprintf (err_str, sizeof (err_str), "One or more bricks may "
+ "be down.");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_DISCONNECTED, "%s", err_str);
+ *op_errstr = gf_strdup (err_str);
+ *op_errno = EG_BRCKDWN;
+ goto out;
+ } else {
+ quorum_met = _gf_true;
+ goto out;
+ }
+
+ up_count = volinfo->dist_leaf_count - down_count;
+
+ if (quorum_type && !strcmp (quorum_type, "fixed")) {
+ if (up_count >= quorum_count) {
+ quorum_met = _gf_true;
+ goto out;
+ }
+ } else {
+ if ((GF_CLUSTER_TYPE_DISPERSE != volinfo->type) &&
+ (volinfo->dist_leaf_count % 2 == 0)) {
+ if ((up_count > quorum_count) ||
+ ((up_count == quorum_count) && first_brick_on)) {
+ quorum_met = _gf_true;
+ goto out;
+ }
+ } else {
+ if (up_count >= quorum_count) {
+ quorum_met = _gf_true;
+ goto out;
+ }
+ }
+ }
+
+ if (!quorum_met) {
+ snprintf (err_str, sizeof (err_str), "quorum is not met");
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SERVER_QUORUM_NOT_MET, "%s", err_str);
+ *op_errstr = gf_strdup (err_str);
+ *op_errno = EG_BRCKDWN;
+ }
+
+out:
+ return quorum_met;
+}
+
+int32_t
+glusterd_volume_quorum_check (glusterd_volinfo_t *volinfo, int64_t index,
+ dict_t *dict, char *key_prefix,
+ int8_t snap_force, int quorum_count,
+ char *quorum_type, char **op_errstr,
+ uint32_t *op_errno)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+ int64_t i = 0;
+ int64_t j = 0;
+ char key[1024] = {0, };
+ int down_count = 0;
+ gf_boolean_t first_brick_on = _gf_true;
+ glusterd_conf_t *priv = NULL;
+ gf_boolean_t quorum_met = _gf_false;
+ int distribute_subvols = 0;
+ int32_t brick_online = 0;
+ char err_str[PATH_MAX] = {0, };
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_VALIDATE_OR_GOTO (this->name, op_errno, out);
+
+ if (!volinfo || !dict) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_INVALID_ENTRY, "input parameters NULL");
+ goto out;
+ }
+
+ if ((!glusterd_is_volume_replicate (volinfo) ||
+ volinfo->replica_count < 3) &&
+ (GF_CLUSTER_TYPE_DISPERSE != volinfo->type)) {
+ for (i = 0; i < volinfo->brick_count ; i++) {
+ /* for a pure distribute volume, and replica volume
+ with replica count 2, quorum is not met if even
+ one of its subvolumes is down
+ */
+ snprintf (key, sizeof (key),
+ "%s%"PRId64".brick%"PRId64".status",
+ key_prefix, index, i);
+ ret = dict_get_int32 (dict, key, &brick_online);
+ if (ret || !brick_online) {
+ ret = 1;
+ snprintf (err_str, sizeof (err_str), "quorum "
+ "is not met");
+ gf_msg (this->name, GF_LOG_ERROR,
+ 0, GD_MSG_SERVER_QUORUM_NOT_MET, "%s",
+ err_str);
+ *op_errstr = gf_strdup (err_str);
+ *op_errno = EG_BRCKDWN;
+ goto out;
+ }
+ }
+ ret = 0;
+ quorum_met = _gf_true;
+ } else {
+ distribute_subvols = volinfo->brick_count /
+ volinfo->dist_leaf_count;
+ for (j = 0; j < distribute_subvols; j++) {
+ /* by default assume quorum is not met
+ TODO: Handle distributed striped replicate volumes
+ Currently only distributed replicate volumes are
+ handled.
+ */
+ ret = 1;
+ quorum_met = _gf_false;
+ for (i = 0; i < volinfo->dist_leaf_count; i++) {
+ snprintf (key, sizeof (key),
+ "%s%"PRId64".brick%"PRId64".status",
+ key_prefix, index,
+ (j * volinfo->dist_leaf_count) + i);
+ ret = dict_get_int32 (dict, key, &brick_online);
+ if (ret || !brick_online) {
+ if (i == 0)
+ first_brick_on = _gf_false;
+ down_count++;
+ }
+ }
+
+ quorum_met = glusterd_volume_quorum_calculate (volinfo,
+ dict,
+ down_count,
+ first_brick_on,
+ snap_force,
+ quorum_count,
+ quorum_type,
+ op_errstr,
+ op_errno);
+ /* goto out if quorum is not met */
+ if (!quorum_met) {
+ ret = -1;
+ goto out;
+ }
+
+ down_count = 0;
+ first_brick_on = _gf_true;
+ }
+ }
+
+ if (quorum_met) {
+ gf_msg_debug (this->name, 0, "volume %s is in quorum",
+ volinfo->volname);
+ ret = 0;
+ }
+
+out:
+ return ret;
+}
+
+int32_t
+glusterd_snap_common_quorum_calculate (glusterd_volinfo_t *volinfo,
+ dict_t *dict, int64_t index,
+ char *key_prefix,
+ int8_t snap_force,
+ gf_boolean_t snap_volume,
+ char **op_errstr,
+ uint32_t *op_errno)
+{
+ int quorum_count = 0;
+ char *quorum_type = NULL;
+ int32_t tmp = 0;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_VALIDATE_OR_GOTO (this->name, op_errno, out);
+
+ /* for replicate volumes with replica count equal to or
+ greater than 3, do quorum check by getting what type
+ of quorum rule has been set by getting the volume
+ option set. If getting the option fails, then assume
+ default.
+ AFR does this:
+ if quorum type is "auto":
+ - for odd numner of bricks (n), n/2 + 1
+ bricks should be present
+ - for even number of bricks n, n/2 bricks
+ should be present along with the 1st
+ subvolume
+ if quorum type is not "auto":
+ - get the quorum count from dict with the
+ help of the option "cluster.quorum-count"
+ if the option is not there in the dict,
+ then assume quorum type is auto and follow
+ the above method.
+ For non replicate volumes quorum is met only if all
+ the bricks of the volume are online
+ */
+
+ if (GF_CLUSTER_TYPE_REPLICATE == volinfo->type) {
+ if (volinfo->replica_count % 2 == 0)
+ quorum_count = volinfo->replica_count/2;
+ else
+ quorum_count =
+ volinfo->replica_count/2 + 1;
+ } else if (GF_CLUSTER_TYPE_DISPERSE == volinfo->type) {
+ quorum_count = volinfo->disperse_count -
+ volinfo->redundancy_count;
+ } else {
+ quorum_count = volinfo->brick_count;
+ }
+
+ ret = dict_get_str (volinfo->dict, "cluster.quorum-type",
+ &quorum_type);
+ if (!ret && !strcmp (quorum_type, "fixed")) {
+ ret = dict_get_int32 (volinfo->dict,
+ "cluster.quorum-count", &tmp);
+ /* if quorum-type option is not found in the
+ dict assume auto quorum type. i.e n/2 + 1.
+ The same assumption is made when quorum-count
+ option cannot be obtained from the dict (even
+ if the quorum-type option is not set to auto,
+ the behavior is set to the default behavior)
+ */
+ if (!ret) {
+ /* for dispersed volumes, only allow quorums
+ equal or larger than minimum functional
+ value.
+ */
+ if ((GF_CLUSTER_TYPE_DISPERSE != volinfo->type) ||
+ (tmp >= quorum_count)) {
+ quorum_count = tmp;
+ } else {
+ gf_msg(this->name, GF_LOG_INFO, 0,
+ GD_MSG_QUORUM_COUNT_IGNORED,
+ "Ignoring small quorum-count "
+ "(%d) on dispersed volume", tmp);
+ quorum_type = NULL;
+ }
+ } else
+ quorum_type = NULL;
+ }
+
+ ret = glusterd_volume_quorum_check (volinfo, index, dict,
+ key_prefix,
+ snap_force,
+ quorum_count,
+ quorum_type,
+ op_errstr,
+ op_errno);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_VOL_NOT_FOUND, "volume %s "
+ "is not in quorum", volinfo->volname);
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+int32_t
+glusterd_snap_quorum_check_for_clone (dict_t *dict, gf_boolean_t snap_volume,
+ char **op_errstr, uint32_t *op_errno)
+{
+ int32_t force = 0;
+ char err_str[PATH_MAX] = {0, };
+ char key_prefix[PATH_MAX] = {0, };
+ char *snapname = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_volinfo_t *tmp_volinfo = NULL;
+ char *volname = NULL;
+ int64_t volcount = 0;
+ char key[PATH_MAX] = {0, };
+ int64_t i = 0;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_VALIDATE_OR_GOTO (this->name, op_errno, out);
+
+ if (!dict) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_EMPTY, "dict is NULL");
+ goto out;
+ }
+
+ if (snap_volume) {
+ ret = dict_get_str (dict, "snapname", &snapname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "failed to "
+ "get snapname");
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name (snapname);
+ if (!snap) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_NOT_FOUND, "failed to "
+ "get the snapshot %s", snapname);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ /* Do a quorum check of glusterds also. Because, the missed snapshot
+ * information will be saved by glusterd and if glusterds are not in
+ * quorum, then better fail the snapshot
+ */
+ if (!does_gd_meet_server_quorum (this)) {
+ snprintf (err_str, sizeof (err_str),
+ "glusterds are not in quorum");
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SERVER_QUORUM_NOT_MET, "%s", err_str);
+ *op_errstr = gf_strdup (err_str);
+ *op_errno = EG_NODEDWN;
+ ret = -1;
+ goto out;
+ } else
+ gf_msg_debug (this->name, 0, "glusterds are in quorum");
+
+ ret = dict_get_int64 (dict, "volcount", &volcount);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "failed to get "
+ "volcount");
+ goto out;
+ }
+
+ for (i = 1; i <= volcount; i++) {
+ snprintf (key, sizeof (key), "%s%"PRId64,
+ snap_volume?"snap-volname":"volname", i);
+ ret = dict_get_str (dict, "clonename", &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "failed to "
+ "get clonename");
+ goto out;
+ }
+
+ if (snap_volume && snap) {
+ cds_list_for_each_entry (tmp_volinfo, &snap->volumes,
+ vol_list) {
+ if (!tmp_volinfo) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_NOT_FOUND,
+ "failed to get snap volume "
+ "for snap %s", snapname);
+ ret = -1;
+ goto out;
+ }
+ volinfo = tmp_volinfo;
+ }
+ } else {
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_FOUND,
+ "failed to find the volume %s",
+ volname);
+ goto out;
+ }
+ }
+
+ snprintf (key_prefix, sizeof (key_prefix),
+ "%s", snap_volume?"vol":"clone");
+
+ ret = glusterd_snap_common_quorum_calculate (volinfo,
+ dict, i,
+ key_prefix,
+ 0,
+ snap_volume,
+ op_errstr,
+ op_errno);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_VOL_NOT_FOUND, "volume %s "
+ "is not in quorum", volinfo->volname);
+ goto out;
+ }
+ }
+out:
+ return ret;
+}
+
+
+int32_t
+glusterd_snap_quorum_check_for_create (dict_t *dict, gf_boolean_t snap_volume,
+ char **op_errstr, uint32_t *op_errno)
+{
+ int8_t snap_force = 0;
+ int32_t force = 0;
+ char err_str[PATH_MAX] = {0, };
+ char key_prefix[PATH_MAX] = {0, };
+ char *snapname = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *volname = NULL;
+ int64_t volcount = 0;
+ char key[PATH_MAX] = {0, };
+ int64_t i = 0;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_VALIDATE_OR_GOTO (this->name, op_errno, out);
+
+ if (!dict) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_EMPTY, "dict is NULL");
+ goto out;
+ }
+
+ if (snap_volume) {
+ ret = dict_get_str (dict, "snapname", &snapname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "failed to "
+ "get snapname");
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name (snapname);
+ if (!snap) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_NOT_FOUND, "failed to "
+ "get the snapshot %s", snapname);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = dict_get_int32 (dict, "flags", &force);
+ if (!ret && (force & GF_CLI_FLAG_OP_FORCE))
+ snap_force = 1;
+
+ /* Do a quorum check of glusterds also. Because, the missed snapshot
+ * information will be saved by glusterd and if glusterds are not in
+ * quorum, then better fail the snapshot
+ */
+ if (!does_gd_meet_server_quorum (this)) {
+ snprintf (err_str, sizeof (err_str),
+ "glusterds are not in quorum");
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SERVER_QUORUM_NOT_MET, "%s", err_str);
+ *op_errstr = gf_strdup (err_str);
+ *op_errno = EG_NODEDWN;
+ ret = -1;
+ goto out;
+ } else
+ gf_msg_debug (this->name, 0, "glusterds are in quorum");
+
+ ret = dict_get_int64 (dict, "volcount", &volcount);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "failed to get "
+ "volcount");
+ goto out;
+ }
+
+ for (i = 1; i <= volcount; i++) {
+ snprintf (key, sizeof (key), "%s%"PRId64,
+ snap_volume?"snap-volname":"volname", i);
+ ret = dict_get_str (dict, key, &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "failed to "
+ "get volname");
+ goto out;
+ }
+
+ if (snap_volume) {
+ ret = glusterd_snap_volinfo_find (volname, snap,
+ &volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_NOT_FOUND,
+ "failed to get snap volume %s "
+ "for snap %s", volname,
+ snapname);
+ goto out;
+ }
+ } else {
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_FOUND,
+ "failed to find the volume %s",
+ volname);
+ goto out;
+ }
+ }
+
+ snprintf (key_prefix, sizeof (key_prefix),
+ "%s", snap_volume?"snap-vol":"vol");
+
+ ret = glusterd_snap_common_quorum_calculate (volinfo,
+ dict, i,
+ key_prefix,
+ snap_force,
+ snap_volume,
+ op_errstr,
+ op_errno);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_VOL_NOT_FOUND, "volume %s "
+ "is not in quorum", volinfo->volname);
+ goto out;
+ }
+ }
+out:
+ return ret;
+}
+
+int32_t
+glusterd_snap_quorum_check (dict_t *dict, gf_boolean_t snap_volume,
+ char **op_errstr, uint32_t *op_errno)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ int32_t snap_command = 0;
+ char err_str[PATH_MAX] = {0, };
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_VALIDATE_OR_GOTO (this->name, op_errno, out);
+
+ if (!dict) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_EMPTY, "dict is NULL");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "type", &snap_command);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "unable to get the type of "
+ "the snapshot command");
+ goto out;
+ }
+
+ switch (snap_command) {
+ case GF_SNAP_OPTION_TYPE_CREATE:
+ ret = glusterd_snap_quorum_check_for_create (dict, snap_volume,
+ op_errstr,
+ op_errno);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_QUORUM_CHECK_FAIL, "Quorum check"
+ "failed during snapshot create command");
+ goto out;
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_CLONE:
+ ret = glusterd_snap_quorum_check_for_clone (dict, !snap_volume,
+ op_errstr,
+ op_errno);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_QUORUM_CHECK_FAIL, "Quorum check"
+ "failed during snapshot clone command");
+ goto out;
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_DELETE:
+ case GF_SNAP_OPTION_TYPE_RESTORE:
+ if (!does_gd_meet_server_quorum (this)) {
+ ret = -1;
+ snprintf (err_str, sizeof (err_str),
+ "glusterds are not in quorum");
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SERVER_QUORUM_NOT_MET, "%s",
+ err_str);
+ *op_errstr = gf_strdup (err_str);
+ *op_errno = EG_NODEDWN;
+ goto out;
+ }
+
+ gf_msg_debug (this->name, 0, "glusterds are in "
+ "quorum");
+ break;
+ default:
+ break;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int32_t
+glusterd_umount (const char *path)
+{
+ char msg[NAME_MAX] = "";
+ int32_t ret = -1;
+ runner_t runner = {0, };
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (path);
+
+ runinit (&runner);
+ snprintf (msg, sizeof (msg), "umount path %s", path);
+ runner_add_args (&runner, _PATH_UMOUNT, "-f", path, NULL);
+ runner_log (&runner, this->name, GF_LOG_DEBUG, msg);
+ ret = runner_run (&runner);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_GLUSTERD_UMOUNT_FAIL, "umounting %s failed (%s)",
+ path, strerror (errno));
+
+ gf_msg_trace (this->name, 0, "Returning with %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_copy_file (const char *source, const char *destination)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ char buffer[1024] = "";
+ int src_fd = -1;
+ int dest_fd = -1;
+ int read_len = -1;
+ struct stat stbuf = {0,};
+ mode_t dest_mode = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (source);
+ GF_ASSERT (destination);
+
+ /* Here is stat is made to get the file permission of source file*/
+ ret = sys_lstat (source, &stbuf);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED, "%s not found", source);
+ goto out;
+ }
+
+ dest_mode = stbuf.st_mode & 0777;
+
+ src_fd = open (source, O_RDONLY);
+ if (src_fd < 0) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED, "Unable to open file %s",
+ source);
+ goto out;
+ }
+
+ dest_fd = sys_creat (destination, dest_mode);
+ if (dest_fd < 0) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_FILE_OP_FAILED,
+ "Unble to open a file %s", destination);
+ goto out;
+ }
+
+ do {
+ ret = sys_read (src_fd, buffer, sizeof (buffer));
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED, "Error reading file "
+ "%s", source);
+ goto out;
+ }
+ read_len = ret;
+ if (read_len == 0)
+ break;
+
+ ret = sys_write (dest_fd, buffer, read_len);
+ if (ret != read_len) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_FILE_OP_FAILED, "Error writing in "
+ "file %s", destination);
+ goto out;
+ }
+ } while (ret > 0);
+out:
+ if (src_fd > 0)
+ sys_close (src_fd);
+
+ if (dest_fd > 0)
+ sys_close (dest_fd);
+ return ret;
+}
+
+int32_t
+glusterd_copy_folder (const char *source, const char *destination)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ DIR *dir_ptr = NULL;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {{0,},};
+ char src_path[PATH_MAX] = {0,};
+ char dest_path[PATH_MAX] = {0,};
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (source);
+ GF_ASSERT (destination);
+
+ dir_ptr = sys_opendir (source);
+ if (!dir_ptr) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DIR_OP_FAILED, "Unable to open %s", source);
+ goto out;
+ }
+
+ for (;;) {
+ errno = 0;
+ entry = sys_readdir (dir_ptr, scratch);
+ if (!entry || errno != 0)
+ break;
+
+ if (strcmp (entry->d_name, ".") == 0 ||
+ strcmp (entry->d_name, "..") == 0)
+ continue;
+ ret = snprintf (src_path, sizeof (src_path), "%s/%s",
+ source, entry->d_name);
+ if (ret < 0)
+ goto out;
+
+ ret = snprintf (dest_path, sizeof (dest_path), "%s/%s",
+ destination, entry->d_name);
+ if (ret < 0)
+ goto out;
+
+ ret = glusterd_copy_file (src_path, dest_path);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY, "Could not copy "
+ "%s to %s", src_path, dest_path);
+ goto out;
+ }
+ }
+out:
+ if (dir_ptr)
+ (void) sys_closedir (dir_ptr);
+
+ return ret;
+}
+
+int32_t
+glusterd_get_geo_rep_session (char *slave_key, char *origin_volname,
+ dict_t *gsync_slaves_dict, char *session,
+ char *slave)
+{
+ int32_t ret = -1;
+ char *token = NULL;
+ char *tok = NULL;
+ char *temp = NULL;
+ char *ip = NULL;
+ char *ip_i = NULL;
+ char *ip_temp = NULL;
+ char *buffer = NULL;
+ xlator_t *this = NULL;
+ char *slave_temp = NULL;
+ char *save_ptr = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (slave_key);
+ GF_ASSERT (origin_volname);
+ GF_ASSERT (gsync_slaves_dict);
+
+ ret = dict_get_str (gsync_slaves_dict, slave_key, &buffer);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to "
+ "get value for key %s", slave_key);
+ goto out;
+ }
+
+ temp = gf_strdup (buffer);
+ if (!temp) {
+ ret = -1;
+ goto out;
+ }
+
+ /* geo-rep session string format being parsed:
+ * "master_node_uuid:ssh://slave_host::slave_vol:slave_voluuid"
+ */
+ token = strtok_r (temp, "/", &save_ptr);
+
+ token = strtok_r (NULL, ":", &save_ptr);
+ if (!token) {
+ ret = -1;
+ goto out;
+ }
+ token++;
+
+ ip = gf_strdup (token);
+ if (!ip) {
+ ret = -1;
+ goto out;
+ }
+ ip_i = ip;
+
+ token = strtok_r (NULL, ":", &save_ptr);
+ if (!token) {
+ ret = -1;
+ goto out;
+ }
+
+ slave_temp = gf_strdup (token);
+ if (!slave) {
+ ret = -1;
+ goto out;
+ }
+
+ /* If 'ip' has 'root@slavehost', point to 'slavehost' as
+ * working directory for root users are created without
+ * 'root@' */
+ ip_temp = gf_strdup (ip);
+ tok = strtok_r (ip_temp, "@", &save_ptr);
+ if (tok && !strcmp (tok, "root"))
+ ip_i = ip + 5;
+
+ ret = snprintf (session, PATH_MAX, "%s_%s_%s",
+ origin_volname, ip_i, slave_temp);
+ if (ret < 0) /* Negative value is an error */
+ goto out;
+
+ ret = snprintf (slave, PATH_MAX, "%s::%s", ip, slave_temp);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = 0; /* Success */
+
+out:
+ if (temp)
+ GF_FREE (temp);
+
+ if (ip)
+ GF_FREE (ip);
+
+ if (ip_temp)
+ GF_FREE (ip_temp);
+
+ if (slave_temp)
+ GF_FREE (slave_temp);
+
+ return ret;
+}
+
+int32_t
+glusterd_copy_quota_files (glusterd_volinfo_t *src_vol,
+ glusterd_volinfo_t *dest_vol,
+ gf_boolean_t *conf_present) {
+
+ int32_t ret = -1;
+ char src_dir[PATH_MAX] = "";
+ char dest_dir[PATH_MAX] = "";
+ char src_path[PATH_MAX] = "";
+ char dest_path[PATH_MAX] = "";
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ struct stat stbuf = {0,};
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ GF_ASSERT (src_vol);
+ GF_ASSERT (dest_vol);
+
+ GLUSTERD_GET_VOLUME_DIR (src_dir, src_vol, priv);
+
+ GLUSTERD_GET_VOLUME_DIR (dest_dir, dest_vol, priv);
+
+ ret = snprintf (src_path, sizeof (src_path), "%s/quota.conf",
+ src_dir);
+ if (ret < 0)
+ goto out;
+
+ /* quota.conf is not present if quota is not enabled, Hence ignoring
+ * the absence of this file
+ */
+ ret = sys_lstat (src_path, &stbuf);
+ if (ret) {
+ ret = 0;
+ gf_msg_debug (this->name, 0, "%s not found", src_path);
+ goto out;
+ }
+
+ ret = snprintf (dest_path, sizeof (dest_path), "%s/quota.conf",
+ dest_dir);
+ if (ret < 0)
+ goto out;
+
+ ret = glusterd_copy_file (src_path, dest_path);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY, "Failed to copy %s in %s",
+ src_path, dest_path);
+ goto out;
+ }
+
+ ret = snprintf (src_path, sizeof (src_path), "%s/quota.cksum",
+ src_dir);
+ if (ret < 0)
+ goto out;
+
+ /* if quota.conf is present, quota.cksum has to be present. *
+ * Fail snapshot operation if file is absent *
+ */
+ ret = sys_lstat (src_path, &stbuf);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_FILE_NOT_FOUND, "%s not found", src_path);
+ goto out;
+ }
+
+ ret = snprintf (dest_path, sizeof (dest_path), "%s/quota.cksum",
+ dest_dir);
+ if (ret < 0)
+ goto out;
+
+ ret = glusterd_copy_file (src_path, dest_path);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY, "Failed to copy %s in %s",
+ src_path, dest_path);
+ goto out;
+ }
+
+ *conf_present = _gf_true;
+out:
+ return ret;
+
+}
+
+/* *
+ * Here there are two possibilities, either destination is snaphot or
+ * clone. In the case of snapshot nfs_ganesha export file will be copied
+ * to snapdir. If it is clone , then new export file will be created for
+ * the clone in the GANESHA_EXPORT_DIRECTORY, replacing occurences of
+ * volname with clonename
+ */
+int
+glusterd_copy_nfs_ganesha_file (glusterd_volinfo_t *src_vol,
+ glusterd_volinfo_t *dest_vol)
+{
+
+ int32_t ret = -1;
+ char snap_dir[PATH_MAX] = {0,};
+ char src_path[PATH_MAX] = {0,};
+ char dest_path[PATH_MAX] = {0,};
+ char buffer[BUFSIZ] = {0,};
+ char *find_ptr = NULL;
+ char *buff_ptr = NULL;
+ char *tmp_ptr = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ struct stat stbuf = {0,};
+ FILE *src = NULL;
+ FILE *dest = NULL;
+
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("snapshot", this, out);
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, priv, out);
+
+ GF_VALIDATE_OR_GOTO (this->name, src_vol, out);
+ GF_VALIDATE_OR_GOTO (this->name, dest_vol, out);
+
+ if (src_vol->is_snap_volume) {
+ GLUSTERD_GET_SNAP_DIR (snap_dir, src_vol->snapshot, priv);
+ ret = snprintf (src_path, PATH_MAX, "%s/export.%s.conf",
+ snap_dir, src_vol->snapshot->snapname);
+ } else {
+ ret = snprintf (src_path, PATH_MAX, "%s/export.%s.conf",
+ GANESHA_EXPORT_DIRECTORY, src_vol->volname);
+ }
+ if (ret < 0 || ret >= PATH_MAX)
+ goto out;
+
+ ret = sys_lstat (src_path, &stbuf);
+ if (ret) {
+ /* *
+ * If export file is not present, volume is not exported
+ * via ganesha. So it is not necessary to copy that during
+ * snapshot.
+ */
+ if (errno == ENOENT) {
+ ret = 0;
+ gf_msg_debug (this->name, 0, "%s not found", src_path);
+ } else
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ GD_MSG_FILE_OP_FAILED,
+ "Stat on %s failed with %s",
+ src_path, strerror (errno));
+ goto out;
+ }
+
+ if (dest_vol->is_snap_volume) {
+ memset (snap_dir, 0 , PATH_MAX);
+ GLUSTERD_GET_SNAP_DIR (snap_dir, dest_vol->snapshot, priv);
+ ret = snprintf (dest_path, sizeof (dest_path),
+ "%s/export.%s.conf", snap_dir,
+ dest_vol->snapshot->snapname);
+ if (ret < 0)
+ goto out;
+
+ ret = glusterd_copy_file (src_path, dest_path);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY, "Failed to copy %s in %s",
+ src_path, dest_path);
+ goto out;
+ }
+
+ } else {
+ ret = snprintf (dest_path, sizeof (dest_path),
+ "%s/export.%s.conf", GANESHA_EXPORT_DIRECTORY,
+ dest_vol->volname);
+ if (ret < 0)
+ goto out;
+
+ src = fopen (src_path, "r");
+ dest = fopen (dest_path, "w");
+
+ /* *
+ * if the source volume is snapshot, the export conf file
+ * consists of orginal volname
+ */
+ if (src_vol->is_snap_volume)
+ find_ptr = gf_strdup (src_vol->parent_volname);
+ else
+ find_ptr = gf_strdup (src_vol->volname);
+
+ if (!find_ptr)
+ goto out;
+
+ /* Replacing volname with clonename */
+ while (fgets(buffer, BUFSIZ, src)) {
+ buff_ptr = buffer;
+ while ((tmp_ptr = strstr(buff_ptr, find_ptr))) {
+ while (buff_ptr < tmp_ptr)
+ fputc((int)*buff_ptr++, dest);
+ fputs(dest_vol->volname, dest);
+ buff_ptr += strlen(find_ptr);
+ }
+ fputs(buff_ptr, dest);
+ memset (buffer, 0, BUFSIZ);
+ }
+ }
+out:
+ if (src)
+ fclose (src);
+ if (dest)
+ fclose (dest);
+ if (find_ptr)
+ GF_FREE(find_ptr);
+
+ return ret;
+}
+
+int32_t
+glusterd_restore_geo_rep_files (glusterd_volinfo_t *snap_vol)
+{
+ int32_t ret = -1;
+ char src_path[PATH_MAX] = "";
+ char dest_path[PATH_MAX] = "";
+ xlator_t *this = NULL;
+ char *origin_volname = NULL;
+ glusterd_volinfo_t *origin_vol = NULL;
+ int i = 0;
+ char key[PATH_MAX] = "";
+ char session[PATH_MAX] = "";
+ char slave[PATH_MAX] = "";
+ char snapgeo_dir[PATH_MAX] = "";
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ GF_ASSERT (snap_vol);
+
+ origin_volname = gf_strdup (snap_vol->parent_volname);
+ if (!origin_volname) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (origin_volname, &origin_vol);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_FOUND, "Unable to fetch "
+ "volinfo for volname %s", origin_volname);
+ goto out;
+ }
+
+ for (i = 1 ; i <= snap_vol->gsync_slaves->count; i++) {
+ ret = snprintf (key, sizeof (key), "slave%d", i);
+ if (ret < 0) {
+ goto out;
+ }
+
+ /* "origin_vol" is used here because geo-replication saves
+ * the session in the form of master_ip_slave.
+ * As we need the master volume to be same even after
+ * restore, we are passing the origin volume name.
+ *
+ * "snap_vol->gsync_slaves" contain the slave information
+ * when the snapshot was taken, hence we have to restore all
+ * those slaves information when we do snapshot restore.
+ */
+ ret = glusterd_get_geo_rep_session (key, origin_vol->volname,
+ snap_vol->gsync_slaves,
+ session, slave);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GEOREP_GET_FAILED,
+ "Failed to get geo-rep session");
+ goto out;
+ }
+
+ GLUSTERD_GET_SNAP_GEO_REP_DIR(snapgeo_dir, snap_vol->snapshot,
+ priv);
+ ret = snprintf (src_path, sizeof (src_path),
+ "%s/%s", snapgeo_dir, session);
+ if (ret < 0)
+ goto out;
+
+ ret = snprintf (dest_path, sizeof (dest_path),
+ "%s/%s/%s", priv->workdir, GEOREP,
+ session);
+ if (ret < 0)
+ goto out;
+
+ ret = glusterd_copy_folder (src_path, dest_path);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DIR_OP_FAILED, "Could not copy "
+ "%s to %s", src_path, dest_path);
+ goto out;
+ }
+ }
+out:
+ if (origin_volname)
+ GF_ASSERT (origin_volname);
+
+ return ret;
+}
+
+int
+glusterd_restore_nfs_ganesha_file (glusterd_volinfo_t *src_vol,
+ glusterd_snap_t *snap)
+{
+
+ int32_t ret = -1;
+ char snap_dir[PATH_MAX] = "";
+ char src_path[PATH_MAX] = "";
+ char dest_path[PATH_MAX] = "";
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ struct stat stbuf = {0,};
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("snapshot", this, out);
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, priv, out);
+
+ GF_VALIDATE_OR_GOTO (this->name, src_vol, out);
+ GF_VALIDATE_OR_GOTO (this->name, snap, out);
+
+ GLUSTERD_GET_SNAP_DIR (snap_dir, snap, priv);
+
+ ret = snprintf (src_path, sizeof (src_path), "%s/export.%s.conf",
+ snap_dir, snap->snapname);
+ if (ret < 0)
+ goto out;
+
+ ret = sys_lstat (src_path, &stbuf);
+ if (ret) {
+ if (errno == ENOENT) {
+ ret = 0;
+ gf_msg_debug (this->name, 0, "%s not found", src_path);
+ } else
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ GD_MSG_FILE_OP_FAILED,
+ "Stat on %s failed with %s",
+ src_path, strerror (errno));
+ goto out;
+ }
+
+ ret = snprintf (dest_path, sizeof (dest_path), "%s/export.%s.conf",
+ GANESHA_EXPORT_DIRECTORY, src_vol->volname);
+ if (ret < 0)
+ goto out;
+
+ ret = glusterd_copy_file (src_path, dest_path);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY, "Failed to copy %s in %s",
+ src_path, dest_path);
+
+out:
+ return ret;
+
+}
+/* Snapd functions */
+int
+glusterd_is_snapd_enabled (glusterd_volinfo_t *volinfo)
+{
+ int ret = 0;
+ xlator_t *this = THIS;
+
+ ret = dict_get_str_boolean (volinfo->dict, "features.uss", -2);
+ if (ret == -2) {
+ gf_msg_debug (this->name, 0, "Key features.uss not "
+ "present in the dict for volume %s", volinfo->volname);
+ ret = 0;
+
+ } else if (ret == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to get 'features.uss'"
+ " from dict for volume %s", volinfo->volname);
+ }
+
+ return ret;
+}
+
+
+int32_t
+glusterd_is_snap_soft_limit_reached (glusterd_volinfo_t *volinfo, dict_t *dict)
+{
+ int32_t ret = -1;
+ uint64_t opt_max_hard = GLUSTERD_SNAPS_MAX_HARD_LIMIT;
+ uint64_t opt_max_soft = GLUSTERD_SNAPS_DEF_SOFT_LIMIT_PERCENT;
+ uint64_t limit = 0;
+ int auto_delete = 0;
+ uint64_t effective_max_limit = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (dict);
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ /* config values snap-max-hard-limit and snap-max-soft-limit are
+ * optional and hence we are not erroring out if values are not
+ * present
+ */
+ gd_get_snap_conf_values_if_present (priv->opts, &opt_max_hard,
+ &opt_max_soft);
+
+ /* "auto-delete" might not be set by user explicitly,
+ * in that case it's better to consider the default value.
+ * Hence not erroring out if Key is not found.
+ */
+ auto_delete = dict_get_str_boolean (priv->opts,
+ GLUSTERD_STORE_KEY_SNAP_AUTO_DELETE,
+ _gf_false);
+
+ if (volinfo->snap_max_hard_limit < opt_max_hard)
+ effective_max_limit = volinfo->snap_max_hard_limit;
+ else
+ effective_max_limit = opt_max_hard;
+
+ limit = (opt_max_soft * effective_max_limit)/100;
+
+ if (volinfo->snap_count >= limit && auto_delete != _gf_true) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SOFT_LIMIT_REACHED, "Soft-limit "
+ "(value = %"PRIu64") of volume %s is reached. "
+ "Snapshot creation is not possible once effective "
+ "hard-limit (value = %"PRIu64") is reached.",
+ limit, volinfo->volname, effective_max_limit);
+
+ ret = dict_set_int8 (dict, "soft-limit-reach",
+ _gf_true);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to "
+ "set soft limit exceed flag in "
+ "response dictionary");
+ }
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+/* This function initializes the parameter sys_hard_limit,
+ * sys_soft_limit and auto_delete value to the value set
+ * in dictionary, If value is not present then it is
+ * initialized to default values. Hence this function does not
+ * return any values.
+ */
+void
+gd_get_snap_conf_values_if_present (dict_t *dict, uint64_t *sys_hard_limit,
+ uint64_t *sys_soft_limit)
+{
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (dict);
+
+ /* "snap-max-hard-limit" might not be set by user explicitly,
+ * in that case it's better to consider the default value.
+ * Hence not erroring out if Key is not found.
+ */
+ if (dict_get_uint64 (dict, GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT,
+ sys_hard_limit)) {
+ gf_msg_debug (this->name, 0, "%s is not present in"
+ "dictionary",
+ GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT);
+ }
+
+ /* "snap-max-soft-limit" might not be set by user explicitly,
+ * in that case it's better to consider the default value.
+ * Hence not erroring out if Key is not found.
+ */
+ if (dict_get_uint64 (dict, GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT,
+ sys_soft_limit)) {
+ gf_msg_debug (this->name, 0, "%s is not present in"
+ "dictionary",
+ GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT);
+ }
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.h b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.h
new file mode 100644
index 00000000000..c0e7e8e218d
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.h
@@ -0,0 +1,166 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _GLUSTERD_SNAP_UTILS_H
+#define _GLUSTERD_SNAP_UTILS_H
+
+int32_t
+glusterd_snap_volinfo_find (char *volname, glusterd_snap_t *snap,
+ glusterd_volinfo_t **volinfo);
+
+int32_t
+glusterd_snap_volinfo_find_from_parent_volname (char *origin_volname,
+ glusterd_snap_t *snap,
+ glusterd_volinfo_t **volinfo);
+
+int
+glusterd_snap_volinfo_find_by_volume_id (uuid_t volume_id,
+ glusterd_volinfo_t **volinfo);
+
+int32_t
+glusterd_add_snapd_to_dict (glusterd_volinfo_t *volinfo,
+ dict_t *dict, int32_t count);
+
+int
+glusterd_compare_snap_time (struct cds_list_head *, struct cds_list_head *);
+
+int
+glusterd_compare_snap_vol_time (struct cds_list_head *, struct cds_list_head *);
+
+int32_t
+glusterd_snap_volinfo_restore (dict_t *dict, dict_t *rsp_dict,
+ glusterd_volinfo_t *new_volinfo,
+ glusterd_volinfo_t *snap_volinfo,
+ int32_t volcount);
+int32_t
+glusterd_snapobject_delete (glusterd_snap_t *snap);
+
+int32_t
+glusterd_cleanup_snaps_for_volume (glusterd_volinfo_t *volinfo);
+
+int32_t
+glusterd_missed_snapinfo_new (glusterd_missed_snap_info **missed_snapinfo);
+
+int32_t
+glusterd_missed_snap_op_new (glusterd_snap_op_t **snap_op);
+
+int32_t
+glusterd_add_missed_snaps_to_dict (dict_t *rsp_dict,
+ glusterd_volinfo_t *snap_vol,
+ glusterd_brickinfo_t *brickinfo,
+ int32_t brick_number, int32_t op);
+
+int32_t
+glusterd_add_missed_snaps_to_export_dict (dict_t *peer_data);
+
+int32_t
+glusterd_import_friend_missed_snap_list (dict_t *peer_data);
+
+int
+gd_restore_snap_volume (dict_t *dict, dict_t *rsp_dict,
+ glusterd_volinfo_t *orig_vol,
+ glusterd_volinfo_t *snap_vol,
+ int32_t volcount);
+
+int32_t
+glusterd_mount_lvm_snapshot (glusterd_brickinfo_t *brickinfo,
+ char *brick_mount_path);
+
+int32_t
+glusterd_umount (const char *path);
+
+int32_t
+glusterd_add_snapshots_to_export_dict (dict_t *peer_data);
+
+int32_t
+glusterd_compare_friend_snapshots (dict_t *peer_data, char *peername,
+ uuid_t peerid);
+
+int32_t
+glusterd_store_create_snap_dir (glusterd_snap_t *snap);
+
+int32_t
+glusterd_copy_file (const char *source, const char *destination);
+
+int32_t
+glusterd_copy_folder (const char *source, const char *destination);
+
+int32_t
+glusterd_get_geo_rep_session (char *slave_key, char *origin_volname,
+ dict_t *gsync_slaves_dict, char *session,
+ char *slave);
+
+int32_t
+glusterd_restore_geo_rep_files (glusterd_volinfo_t *snap_vol);
+
+int
+glusterd_restore_nfs_ganesha_file (glusterd_volinfo_t *src_vol,
+ glusterd_snap_t *snap);
+int32_t
+glusterd_copy_quota_files (glusterd_volinfo_t *src_vol,
+ glusterd_volinfo_t *dest_vol,
+ gf_boolean_t *conf_present);
+
+int
+glusterd_copy_nfs_ganesha_file (glusterd_volinfo_t *src_vol,
+ glusterd_volinfo_t *dest_vol);
+
+int
+glusterd_snap_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict);
+
+int
+gd_add_vol_snap_details_to_dict (dict_t *dict, char *prefix,
+ glusterd_volinfo_t *volinfo);
+
+int
+gd_add_brick_snap_details_to_dict (dict_t *dict, char *prefix,
+ glusterd_brickinfo_t *brickinfo);
+
+int
+gd_import_new_brick_snap_details (dict_t *dict, char *prefix,
+ glusterd_brickinfo_t *brickinfo);
+
+int
+gd_import_volume_snap_details (dict_t *dict, glusterd_volinfo_t *volinfo,
+ char *prefix, char *volname);
+
+int32_t
+glusterd_snap_quorum_check (dict_t *dict, gf_boolean_t snap_volume,
+ char **op_errstr, uint32_t *op_errno);
+
+int32_t
+glusterd_snap_brick_create (glusterd_volinfo_t *snap_volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ int32_t brick_count);
+
+int
+glusterd_snapshot_restore_cleanup (dict_t *rsp_dict,
+ char *volname,
+ glusterd_snap_t *snap);
+
+void
+glusterd_get_snapd_dir (glusterd_volinfo_t *volinfo,
+ char *path, int path_len);
+
+int
+glusterd_is_snapd_enabled (glusterd_volinfo_t *volinfo);
+
+int32_t
+glusterd_check_and_set_config_limit (glusterd_conf_t *priv);
+
+int32_t
+glusterd_is_snap_soft_limit_reached (glusterd_volinfo_t *volinfo,
+ dict_t *dict);
+
+void
+gd_get_snap_conf_values_if_present (dict_t *opts, uint64_t *sys_hard_limit,
+ uint64_t *sys_soft_limit);
+
+#endif
+
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
new file mode 100644
index 00000000000..5d5bdead416
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
@@ -0,0 +1,9990 @@
+/*
+ Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <inttypes.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/resource.h>
+#include <sys/statvfs.h>
+#include <sys/mount.h>
+#include <signal.h>
+#include "glusterd-messages.h"
+#include "glusterd-errno.h"
+
+#if defined(GF_LINUX_HOST_OS)
+#include <mntent.h>
+#else
+#include "mntent_compat.h"
+#endif
+
+#ifdef __NetBSD__
+#define umount2(dir, flags) unmount(dir, ((flags) != 0) ? MNT_FORCE : 0)
+#endif
+
+#if defined(GF_DARWIN_HOST_OS) || defined(__FreeBSD__)
+#include <sys/param.h>
+#include <sys/mount.h>
+#define umount2(dir, flags) unmount(dir, ((flags) != 0) ? MNT_FORCE : 0)
+#endif
+
+#include <regex.h>
+
+#include "globals.h"
+#include "compat.h"
+#include "protocol-common.h"
+#include "xlator.h"
+#include "logging.h"
+#include "timer.h"
+#include "glusterd-mem-types.h"
+#include "glusterd.h"
+#include "glusterd-sm.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-utils.h"
+#include "glusterd-store.h"
+#include "run.h"
+#include "glusterd-volgen.h"
+#include "glusterd-mgmt.h"
+#include "glusterd-syncop.h"
+#include "glusterd-snapshot-utils.h"
+#include "glusterd-snapd-svc.h"
+
+#include "glusterfs3.h"
+
+#include "syscall.h"
+#include "cli1-xdr.h"
+#include "xdr-generic.h"
+
+#include "lvm-defaults.h"
+
+char snap_mount_dir[PATH_MAX];
+struct snap_create_args_ {
+ xlator_t *this;
+ dict_t *dict;
+ dict_t *rsp_dict;
+ glusterd_volinfo_t *snap_vol;
+ glusterd_brickinfo_t *brickinfo;
+ struct syncargs *args;
+ int32_t volcount;
+ int32_t brickcount;
+ int32_t brickorder;
+};
+
+/* This structure is used to store unsupported options and thier values
+ * for snapshotted volume.
+ */
+struct gd_snap_unsupported_opt_t {
+ char *key;
+ char *value;
+};
+
+typedef struct snap_create_args_ snap_create_args_t;
+
+/* This function is called to get the device path of the snap lvm. Usually
+ if /dev/mapper/<group-name>-<lvm-name> is the device for the lvm,
+ then the snap device will be /dev/<group-name>/<snapname>.
+ This function takes care of building the path for the snap device.
+*/
+
+char *
+glusterd_build_snap_device_path (char *device, char *snapname,
+ int32_t brickcount)
+{
+ char snap[PATH_MAX] = "";
+ char msg[1024] = "";
+ char volgroup[PATH_MAX] = "";
+ char *snap_device = NULL;
+ xlator_t *this = NULL;
+ runner_t runner = {0,};
+ char *ptr = NULL;
+ int ret = -1;
+
+ this = THIS;
+ GF_ASSERT (this);
+ if (!device) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY,
+ "device is NULL");
+ goto out;
+ }
+ if (!snapname) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY,
+ "snapname is NULL");
+ goto out;
+ }
+
+ runinit (&runner);
+ runner_add_args (&runner, "/sbin/lvs", "--noheadings", "-o", "vg_name",
+ device, NULL);
+ runner_redir (&runner, STDOUT_FILENO, RUN_PIPE);
+ snprintf (msg, sizeof (msg), "Get volume group for device %s", device);
+ runner_log (&runner, this->name, GF_LOG_DEBUG, msg);
+ ret = runner_start (&runner);
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_VG_GET_FAIL, "Failed to get volume group "
+ "for device %s", device);
+ runner_end (&runner);
+ goto out;
+ }
+ ptr = fgets(volgroup, sizeof(volgroup),
+ runner_chio (&runner, STDOUT_FILENO));
+ if (!ptr || !strlen(volgroup)) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_VG_GET_FAIL, "Failed to get volume group "
+ "for snap %s", snapname);
+ runner_end (&runner);
+ ret = -1;
+ goto out;
+ }
+ runner_end (&runner);
+
+ snprintf (snap, sizeof(snap), "/dev/%s/%s_%d", gf_trim(volgroup),
+ snapname, brickcount);
+ snap_device = gf_strdup (snap);
+ if (!snap_device) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ GD_MSG_NO_MEMORY,
+ "Cannot copy the snapshot device name for snapname: %s",
+ snapname);
+ }
+
+out:
+ return snap_device;
+}
+
+/* Look for disconnected peers, for missed snap creates or deletes */
+static int32_t
+glusterd_find_missed_snap (dict_t *rsp_dict, glusterd_volinfo_t *vol,
+ struct cds_list_head *peers, int32_t op)
+{
+ int32_t brick_count = -1;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (peers);
+ GF_ASSERT (vol);
+
+ brick_count = 0;
+ cds_list_for_each_entry (brickinfo, &vol->bricks, brick_list) {
+ if (!gf_uuid_compare (brickinfo->uuid, MY_UUID)) {
+ /* If the brick belongs to the same node */
+ brick_count++;
+ continue;
+ }
+
+ rcu_read_lock ();
+ cds_list_for_each_entry_rcu (peerinfo, peers, uuid_list) {
+ if (gf_uuid_compare (peerinfo->uuid, brickinfo->uuid)) {
+ /* If the brick doesnt belong to this peer */
+ continue;
+ }
+
+ /* Found peer who owns the brick, *
+ * if peer is not connected or not *
+ * friend add it to missed snap list */
+ if (!(peerinfo->connected) ||
+ (peerinfo->state.state !=
+ GD_FRIEND_STATE_BEFRIENDED)) {
+ ret = glusterd_add_missed_snaps_to_dict
+ (rsp_dict,
+ vol, brickinfo,
+ brick_count + 1,
+ op);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MISSED_SNAP_CREATE_FAIL,
+ "Failed to add missed snapshot "
+ "info for %s:%s in the "
+ "rsp_dict", brickinfo->hostname,
+ brickinfo->path);
+ rcu_read_unlock ();
+ goto out;
+ }
+ }
+ }
+ rcu_read_unlock ();
+ brick_count++;
+ }
+
+ ret = 0;
+out:
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+snap_max_limits_display_commit (dict_t *rsp_dict, char *volname,
+ char *op_errstr, int len)
+{
+ char err_str[PATH_MAX] = "";
+ char buf[PATH_MAX] = "";
+ glusterd_conf_t *conf = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ int ret = -1;
+ uint64_t active_hard_limit = 0;
+ uint64_t snap_max_limit = 0;
+ uint64_t soft_limit_value = -1;
+ uint64_t count = 0;
+ xlator_t *this = NULL;
+ uint64_t opt_hard_max = GLUSTERD_SNAPS_MAX_HARD_LIMIT;
+ uint64_t opt_soft_max = GLUSTERD_SNAPS_DEF_SOFT_LIMIT_PERCENT;
+ char *auto_delete = "disable";
+ char *snap_activate = "disable";
+
+ this = THIS;
+
+ GF_ASSERT (this);
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (op_errstr);
+
+ conf = this->private;
+
+ GF_ASSERT (conf);
+
+
+ /* config values snap-max-hard-limit and snap-max-soft-limit are
+ * optional and hence we are not erroring out if values are not
+ * present
+ */
+ gd_get_snap_conf_values_if_present (conf->opts, &opt_hard_max,
+ &opt_soft_max);
+
+ if (!volname) {
+ /* For system limit */
+ cds_list_for_each_entry (volinfo, &conf->volumes, vol_list) {
+ if (volinfo->is_snap_volume == _gf_true)
+ continue;
+
+ snap_max_limit = volinfo->snap_max_hard_limit;
+ if (snap_max_limit > opt_hard_max)
+ active_hard_limit = opt_hard_max;
+ else
+ active_hard_limit = snap_max_limit;
+
+ soft_limit_value = (opt_soft_max *
+ active_hard_limit) / 100;
+
+ snprintf (buf, sizeof(buf), "volume%"PRId64"-volname",
+ count);
+ ret = dict_set_str (rsp_dict, buf, volinfo->volname);
+ if (ret) {
+ snprintf (err_str, PATH_MAX,
+ "Failed to set %s", buf);
+ goto out;
+ }
+
+ snprintf (buf, sizeof(buf),
+ "volume%"PRId64"-snap-max-hard-limit", count);
+ ret = dict_set_uint64 (rsp_dict, buf, snap_max_limit);
+ if (ret) {
+ snprintf (err_str, PATH_MAX,
+ "Failed to set %s", buf);
+ goto out;
+ }
+
+ snprintf (buf, sizeof(buf),
+ "volume%"PRId64"-active-hard-limit", count);
+ ret = dict_set_uint64 (rsp_dict, buf,
+ active_hard_limit);
+ if (ret) {
+ snprintf (err_str, PATH_MAX,
+ "Failed to set %s", buf);
+ goto out;
+ }
+
+ snprintf (buf, sizeof(buf),
+ "volume%"PRId64"-snap-max-soft-limit", count);
+ ret = dict_set_uint64 (rsp_dict, buf, soft_limit_value);
+ if (ret) {
+ snprintf (err_str, PATH_MAX,
+ "Failed to set %s", buf);
+ goto out;
+ }
+ count++;
+ }
+
+ ret = dict_set_uint64 (rsp_dict, "voldisplaycount", count);
+ if (ret) {
+ snprintf (err_str, PATH_MAX,
+ "Failed to set voldisplaycount");
+ goto out;
+ }
+ } else {
+ /* For one volume */
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (err_str, PATH_MAX, "Volume (%s) does not "
+ "exist", volname);
+ goto out;
+ }
+
+ snap_max_limit = volinfo->snap_max_hard_limit;
+ if (snap_max_limit > opt_hard_max)
+ active_hard_limit = opt_hard_max;
+ else
+ active_hard_limit = snap_max_limit;
+
+ soft_limit_value = (opt_soft_max *
+ active_hard_limit) / 100;
+
+ snprintf (buf, sizeof(buf), "volume%"PRId64"-volname", count);
+ ret = dict_set_str (rsp_dict, buf, volinfo->volname);
+ if (ret) {
+ snprintf (err_str, PATH_MAX,
+ "Failed to set %s", buf);
+ goto out;
+ }
+
+ snprintf (buf, sizeof(buf),
+ "volume%"PRId64"-snap-max-hard-limit", count);
+ ret = dict_set_uint64 (rsp_dict, buf, snap_max_limit);
+ if (ret) {
+ snprintf (err_str, PATH_MAX,
+ "Failed to set %s", buf);
+ goto out;
+ }
+
+ snprintf (buf, sizeof(buf),
+ "volume%"PRId64"-active-hard-limit", count);
+ ret = dict_set_uint64 (rsp_dict, buf, active_hard_limit);
+ if (ret) {
+ snprintf (err_str, PATH_MAX,
+ "Failed to set %s", buf);
+ goto out;
+ }
+
+ snprintf (buf, sizeof(buf),
+ "volume%"PRId64"-snap-max-soft-limit", count);
+ ret = dict_set_uint64 (rsp_dict, buf, soft_limit_value);
+ if (ret) {
+ snprintf (err_str, PATH_MAX,
+ "Failed to set %s", buf);
+ goto out;
+ }
+
+ count++;
+
+ ret = dict_set_uint64 (rsp_dict, "voldisplaycount", count);
+ if (ret) {
+ snprintf (err_str, PATH_MAX,
+ "Failed to set voldisplaycount");
+ goto out;
+ }
+
+ }
+
+ ret = dict_set_uint64 (rsp_dict,
+ GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT,
+ opt_hard_max);
+ if (ret) {
+ snprintf (err_str, PATH_MAX,
+ "Failed to set %s in response dictionary",
+ GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT);
+ goto out;
+ }
+
+ ret = dict_set_uint64 (rsp_dict,
+ GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT,
+ opt_soft_max);
+ if (ret) {
+ snprintf (err_str, PATH_MAX,
+ "Failed to set %s in response dictionary",
+ GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT);
+ goto out;
+ }
+
+ /* "auto-delete" might not be set by user explicitly,
+ * in that case it's better to consider the default value.
+ * Hence not erroring out if Key is not found.
+ */
+ ret = dict_get_str (conf->opts, GLUSTERD_STORE_KEY_SNAP_AUTO_DELETE,
+ &auto_delete);
+
+ ret = dict_set_dynstr_with_alloc (rsp_dict,
+ GLUSTERD_STORE_KEY_SNAP_AUTO_DELETE,
+ auto_delete);
+ if (ret) {
+ snprintf (err_str, PATH_MAX,
+ "Failed to set %s in response dictionary",
+ GLUSTERD_STORE_KEY_SNAP_AUTO_DELETE);
+ goto out;
+ }
+
+ /* "snap-activate-on-create" might not be set by user explicitly,
+ * in that case it's better to consider the default value.
+ * Hence not erroring out if Key is not found.
+ */
+ ret = dict_get_str (conf->opts, GLUSTERD_STORE_KEY_SNAP_ACTIVATE,
+ &snap_activate);
+
+ ret = dict_set_dynstr_with_alloc (rsp_dict,
+ GLUSTERD_STORE_KEY_SNAP_ACTIVATE,
+ snap_activate);
+ if (ret) {
+ snprintf (err_str, PATH_MAX,
+ "Failed to set %s in response dictionary",
+ GLUSTERD_STORE_KEY_SNAP_ACTIVATE);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret) {
+ strncpy (op_errstr, err_str, len);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "%s", err_str);
+ }
+ return ret;
+}
+
+
+/* Third argument of scandir(used in glusterd_copy_geo_rep_session_files)
+ * is filter function. As we dont want "." and ".." files present in the
+ * directory, we are excliding these 2 files.
+ * "file_select" function here does the job of filtering.
+ */
+int
+file_select (const struct dirent *entry)
+{
+ if (entry == NULL)
+ return (FALSE);
+
+ if ((strcmp(entry->d_name, ".") == 0) ||
+ (strcmp(entry->d_name, "..") == 0))
+ return (FALSE);
+ else
+ return (TRUE);
+}
+
+int32_t
+glusterd_copy_geo_rep_session_files (char *session,
+ glusterd_volinfo_t *snap_vol)
+{
+ int32_t ret = -1;
+ char snap_session_dir[PATH_MAX] = "";
+ char georep_session_dir[PATH_MAX] = "";
+ regex_t *reg_exp = NULL;
+ int file_count = -1;
+ struct dirent **files = {0,};
+ xlator_t *this = NULL;
+ int i = 0;
+ char src_path[PATH_MAX] = "";
+ char dest_path[PATH_MAX] = "";
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ GF_ASSERT (session);
+ GF_ASSERT (snap_vol);
+
+ ret = snprintf (georep_session_dir, sizeof (georep_session_dir),
+ "%s/%s/%s", priv->workdir, GEOREP,
+ session);
+ if (ret < 0) { /* Negative value is an error */
+ goto out;
+ }
+
+ ret = snprintf (snap_session_dir, sizeof (snap_session_dir),
+ "%s/%s/%s/%s/%s", priv->workdir,
+ GLUSTERD_VOL_SNAP_DIR_PREFIX,
+ snap_vol->snapshot->snapname, GEOREP, session);
+ if (ret < 0) { /* Negative value is an error */
+ goto out;
+ }
+
+ ret = mkdir_p (snap_session_dir, 0777, _gf_true);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DIR_OP_FAILED,
+ "Creating directory %s failed", snap_session_dir);
+ goto out;
+ }
+
+ /* TODO : good to have - Allocate in stack instead of heap */
+ reg_exp = GF_CALLOC (1, sizeof (regex_t), gf_common_mt_regex_t);
+ if (!reg_exp) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY,
+ "Failed to allocate memory for regular expression");
+ goto out;
+ }
+
+ ret = regcomp (reg_exp, "(.*status$)|(.*conf$)\0", REG_EXTENDED);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REG_COMPILE_FAILED,
+ "Failed to compile the regular expression");
+ goto out;
+ }
+
+ /* If there are no files in a particular session then fail it*/
+ file_count = scandir (georep_session_dir, &files, file_select,
+ alphasort);
+ if (file_count <= 0) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, ENOENT,
+ GD_MSG_FILE_OP_FAILED, "Session files not present "
+ "in %s", georep_session_dir);
+ goto out;
+ }
+
+ /* Now compare the file name with regular expression to see if
+ * there is a match
+ */
+ for (i = 0 ; i < file_count; i++) {
+ if (regexec (reg_exp, files[i]->d_name, 0, NULL, 0))
+ continue;
+
+ ret = snprintf (src_path, sizeof (src_path), "%s/%s",
+ georep_session_dir, files[i]->d_name);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = snprintf (dest_path , sizeof (dest_path), "%s/%s",
+ snap_session_dir, files[i]->d_name);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = glusterd_copy_file (src_path, dest_path);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY,
+ "Could not copy file %s of session %s",
+ files[i]->d_name, session);
+ goto out;
+ }
+ }
+out:
+ /* files are malloc'd by scandir, free them */
+ if (file_count > 0) {
+ while (file_count--) {
+ free(files[file_count]);
+ }
+ free(files);
+ }
+
+ if (reg_exp)
+ GF_FREE (reg_exp);
+
+ return ret;
+}
+
+/* This function will take backup of the volume store
+ * of the to-be restored volume. This will help us to
+ * revert the operation if it fails.
+ *
+ * @param volinfo volinfo of the origin volume
+ *
+ * @return 0 on success and -1 on failure
+ */
+int
+glusterd_snapshot_backup_vol (glusterd_volinfo_t *volinfo)
+{
+ char pathname[PATH_MAX] = {0,};
+ int ret = -1;
+ int op_ret = 0;
+ char delete_path[PATH_MAX] = {0,};
+ char trashdir[PATH_MAX] = {0,};
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (volinfo);
+
+ GLUSTERD_GET_VOLUME_DIR (pathname, volinfo, priv);
+
+ snprintf (delete_path, sizeof (delete_path),
+ "%s/"GLUSTERD_TRASH"/vols-%s.deleted", priv->workdir,
+ volinfo->volname);
+
+ snprintf (trashdir, sizeof (trashdir), "%s/"GLUSTERD_TRASH,
+ priv->workdir);
+
+ /* Create trash folder if it is not there */
+ ret = sys_mkdir (trashdir, 0777);
+ if (ret && errno != EEXIST) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DIR_OP_FAILED,
+ "Failed to create trash directory, reason : %s",
+ strerror (errno));
+ ret = -1;
+ goto out;
+ }
+
+ /* Move the origin volume volder to the backup location */
+ ret = sys_rename (pathname, delete_path);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED,
+ "Failed to rename snap "
+ "directory %s to %s", pathname, delete_path);
+ goto out;
+ }
+
+ /* Re-create an empty origin volume folder so that restore can
+ * happen. */
+ ret = sys_mkdir (pathname, 0777);
+ if (ret && errno != EEXIST) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DIR_OP_FAILED,
+ "Failed to create origin "
+ "volume directory (%s), reason : %s",
+ pathname, strerror (errno));
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ /* Save the actual return value */
+ op_ret = ret;
+ if (ret) {
+ /* Revert the changes in case of failure */
+ ret = sys_rmdir (pathname);
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "Failed to rmdir: %s,err: %s",
+ pathname, strerror (errno));
+ }
+
+ ret = sys_rename (delete_path, pathname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED,
+ "Failed to rename directory %s to %s",
+ delete_path, pathname);
+ }
+
+ ret = sys_rmdir (trashdir);
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "Failed to rmdir: %s, Reason: %s",
+ trashdir, strerror (errno));
+ }
+ }
+
+ gf_msg_trace (this->name, 0, "Returning %d", op_ret);
+
+ return op_ret;
+}
+
+int32_t
+glusterd_copy_geo_rep_files (glusterd_volinfo_t *origin_vol,
+ glusterd_volinfo_t *snap_vol, dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ int i = 0;
+ xlator_t *this = NULL;
+ char key[PATH_MAX] = "";
+ char session[PATH_MAX] = "";
+ char slave[PATH_MAX] = "";
+ char snapgeo_dir[PATH_MAX] = "";
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ GF_ASSERT (origin_vol);
+ GF_ASSERT (snap_vol);
+ GF_ASSERT (rsp_dict);
+
+ /* This condition is not satisfied if the volume
+ * is slave volume.
+ */
+ if (!origin_vol->gsync_slaves) {
+ ret = 0;
+ goto out;
+ }
+
+ GLUSTERD_GET_SNAP_GEO_REP_DIR(snapgeo_dir, snap_vol->snapshot, priv);
+
+ ret = sys_mkdir (snapgeo_dir, 0777);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DIR_OP_FAILED,
+ "Creating directory %s failed", snapgeo_dir);
+ goto out;
+ }
+
+ for (i = 1 ; i <= origin_vol->gsync_slaves->count ; i++) {
+ ret = snprintf (key, sizeof (key), "slave%d", i);
+ if (ret < 0) /* Negative value is an error */
+ goto out;
+
+ ret = glusterd_get_geo_rep_session (key, origin_vol->volname,
+ origin_vol->gsync_slaves,
+ session, slave);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GEOREP_GET_FAILED,
+ "Failed to get geo-rep session");
+ goto out;
+ }
+
+ ret = glusterd_copy_geo_rep_session_files (session, snap_vol);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_FILE_OP_FAILED, "Failed to copy files"
+ " related to session %s", session);
+ goto out;
+ }
+ }
+
+out:
+ return ret;
+}
+
+/* This function will restore a snapshot volumes
+ *
+ * @param dict dictionary containing snapshot restore request
+ * @param op_errstr In case of any failure error message will be returned
+ * in this variable
+ * @return Negative value on Failure and 0 in success
+ */
+int
+glusterd_snapshot_restore (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+{
+ int ret = -1;
+ int32_t volcount = -1;
+ char *snapname = NULL;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *snap_volinfo = NULL;
+ glusterd_volinfo_t *tmp = NULL;
+ glusterd_volinfo_t *parent_volinfo = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+ GF_ASSERT (rsp_dict);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_str (dict, "snapname", &snapname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get snap name");
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name (snapname);
+ if (NULL == snap) {
+ ret = gf_asprintf (op_errstr, "Snapshot (%s) does not exist",
+ snapname);
+ if (ret < 0) {
+ goto out;
+ }
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_NOT_FOUND,
+ "%s", *op_errstr);
+ ret = -1;
+ goto out;
+ }
+
+ volcount = 0;
+ cds_list_for_each_entry_safe (snap_volinfo, tmp, &snap->volumes,
+ vol_list) {
+ volcount++;
+ ret = glusterd_volinfo_find (snap_volinfo->parent_volname,
+ &parent_volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_VOL_NOT_FOUND,
+ "Could not get volinfo of %s",
+ snap_volinfo->parent_volname);
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc (rsp_dict, "snapuuid",
+ uuid_utoa (snap->snap_id));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set snap "
+ "uuid in response dictionary for %s snapshot",
+ snap->snapname);
+ goto out;
+ }
+
+
+ ret = dict_set_dynstr_with_alloc (rsp_dict, "volname",
+ snap_volinfo->parent_volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set snap "
+ "uuid in response dictionary for %s snapshot",
+ snap->snapname);
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc (rsp_dict, "volid",
+ uuid_utoa (parent_volinfo->volume_id));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set snap "
+ "uuid in response dictionary for %s snapshot",
+ snap->snapname);
+ goto out;
+ }
+
+ if (is_origin_glusterd (dict) == _gf_true) {
+ /* From origin glusterd check if *
+ * any peers with snap bricks is down */
+ ret = glusterd_find_missed_snap
+ (rsp_dict, snap_volinfo,
+ &priv->peers,
+ GF_SNAP_OPTION_TYPE_RESTORE);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MISSED_SNAP_GET_FAIL,
+ "Failed to find missed snap restores");
+ goto out;
+ }
+ }
+
+ ret = gd_restore_snap_volume (dict, rsp_dict, parent_volinfo,
+ snap_volinfo, volcount);
+ if (ret) {
+ /* No need to update op_errstr because it is assumed
+ * that the called function will do that in case of
+ * failure.
+ */
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_RESTORE_FAIL, "Failed to restore "
+ "snap for %s", snapname);
+ goto out;
+ }
+
+ /* Restore is successful therefore delete the original volume's
+ * volinfo. If the volinfo is already restored then we should
+ * delete the backend LVMs */
+ if (!gf_uuid_is_null (parent_volinfo->restored_from_snap)) {
+ ret = glusterd_lvm_snapshot_remove (rsp_dict,
+ parent_volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_LVM_REMOVE_FAILED,
+ "Failed to remove LVM backend");
+ }
+ }
+
+ /* Detach the volinfo from priv->volumes, so that no new
+ * command can ref it any more and then unref it.
+ */
+ cds_list_del_init (&parent_volinfo->vol_list);
+ glusterd_volinfo_unref (parent_volinfo);
+
+ if (ret)
+ goto out;
+ }
+
+ ret = 0;
+
+ /* TODO: Need to check if we need to delete the snap after the
+ * operation is successful or not. Also need to persist the state
+ * of restore operation in the store.
+ */
+out:
+ return ret;
+}
+
+/* This function is called before actual restore is taken place. This function
+ * will validate whether the snapshot volumes are ready to be restored or not.
+ *
+ * @param dict dictionary containing snapshot restore request
+ * @param op_errstr In case of any failure error message will be returned
+ * in this variable
+ * @param rsp_dict response dictionary
+ * @return Negative value on Failure and 0 in success
+ */
+int32_t
+glusterd_snapshot_restore_prevalidate (dict_t *dict, char **op_errstr,
+ uint32_t *op_errno, dict_t *rsp_dict)
+{
+ int ret = -1;
+ int32_t i = 0;
+ int32_t volcount = 0;
+ int32_t brick_count = 0;
+ gf_boolean_t snap_restored = _gf_false;
+ char key[PATH_MAX] = {0, };
+ char *volname = NULL;
+ char *snapname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_snap_t *snap = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+ GF_VALIDATE_OR_GOTO (this->name, op_errno, out);
+ GF_ASSERT (rsp_dict);
+
+ ret = dict_get_str (dict, "snapname", &snapname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to get "
+ "snap name");
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name (snapname);
+ if (NULL == snap) {
+ ret = gf_asprintf (op_errstr, "Snapshot (%s) does not exist",
+ snapname);
+ *op_errno = EG_SNAPEXST;
+ if (ret < 0) {
+ goto out;
+ }
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_SNAP_NOT_FOUND, "%s", *op_errstr);
+ ret = -1;
+ goto out;
+ }
+
+ snap_restored = snap->snap_restored;
+
+ if (snap_restored) {
+ ret = gf_asprintf (op_errstr, "Snapshot (%s) is already "
+ "restored", snapname);
+ if (ret < 0) {
+ goto out;
+ }
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAPSHOT_OP_FAILED, "%s", *op_errstr);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str (rsp_dict, "snapname", snapname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set "
+ "snap name(%s)", snapname);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "volcount", &volcount);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to get volume count");
+ goto out;
+ }
+
+ /* Snapshot restore will only work if all the volumes,
+ that are part of the snapshot, are stopped. */
+ for (i = 1; i <= volcount; ++i) {
+ snprintf (key, sizeof (key), "volname%d", i);
+ ret = dict_get_str (dict, key, &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to "
+ "get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ ret = gf_asprintf (op_errstr, "Volume (%s) "
+ "does not exist", volname);
+ *op_errno = EG_NOVOL;
+ if (ret < 0) {
+ goto out;
+ }
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_VOL_NOT_FOUND, "%s", *op_errstr);
+ ret = -1;
+ goto out;
+ }
+
+ if (glusterd_is_volume_started (volinfo)) {
+ ret = gf_asprintf (op_errstr, "Volume (%s) has been "
+ "started. Volume needs to be stopped before restoring "
+ "a snapshot.", volname);
+ *op_errno = EG_VOLRUN;
+ if (ret < 0) {
+ goto out;
+ }
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAPSHOT_OP_FAILED, "%s", *op_errstr);
+ ret = -1;
+ goto out;
+ }
+
+ /* Take backup of the volinfo folder */
+ ret = glusterd_snapshot_backup_vol (volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_OP_FAILED,
+ "Failed to backup "
+ "volume backend files for %s volume",
+ volinfo->volname);
+ goto out;
+ }
+ }
+
+ /* Get brickinfo for snap_volumes */
+ volcount = 0;
+ cds_list_for_each_entry (volinfo, &snap->volumes, vol_list) {
+ volcount++;
+ brick_count = 0;
+
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks,
+ brick_list) {
+ brick_count++;
+ if (gf_uuid_compare (brickinfo->uuid, MY_UUID))
+ continue;
+
+ snprintf (key, sizeof (key), "snap%d.brick%d.path",
+ volcount, brick_count);
+ ret = dict_set_str (rsp_dict, key, brickinfo->path);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+ snprintf (key, sizeof (key),
+ "snap%d.brick%d.snap_status",
+ volcount, brick_count);
+ ret = dict_set_int32 (rsp_dict, key,
+ brickinfo->snap_status);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+ snprintf (key, sizeof (key),
+ "snap%d.brick%d.device_path",
+ volcount, brick_count);
+ ret = dict_set_str (rsp_dict, key,
+ brickinfo->device_path);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+ snprintf (key, sizeof (key),
+ "snap%d.brick%d.fs_type",
+ volcount, brick_count);
+ ret = dict_set_str (rsp_dict, key,
+ brickinfo->fstype);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+ snprintf (key, sizeof (key),
+ "snap%d.brick%d.mnt_opts",
+ volcount, brick_count);
+ ret = dict_set_str (rsp_dict, key,
+ brickinfo->mnt_opts);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+ }
+
+ snprintf (key, sizeof (key), "snap%d.brick_count", volcount);
+ ret = dict_set_int32 (rsp_dict, key, brick_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+ }
+
+ ret = dict_set_int32 (rsp_dict, "volcount", volcount);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+int
+snap_max_hard_limits_validate (dict_t *dict, char *volname,
+ uint64_t value, char **op_errstr)
+{
+ char err_str[PATH_MAX] = "";
+ glusterd_conf_t *conf = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ int ret = -1;
+ uint64_t max_limit = GLUSTERD_SNAPS_MAX_HARD_LIMIT;
+ xlator_t *this = NULL;
+ uint64_t opt_hard_max = GLUSTERD_SNAPS_MAX_HARD_LIMIT;
+
+ this = THIS;
+
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+
+ conf = this->private;
+
+ GF_ASSERT (conf);
+
+ if (volname) {
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (!ret) {
+ if (volinfo->is_snap_volume) {
+ ret = -1;
+ snprintf (err_str, PATH_MAX,
+ "%s is a snap volume. Configuring "
+ "snap-max-hard-limit for a snap "
+ "volume is prohibited.", volname);
+ goto out;
+ }
+ }
+ }
+
+ /* "snap-max-hard-limit" might not be set by user explicitly,
+ * in that case it's better to use the default value.
+ * Hence not erroring out if Key is not found.
+ */
+ ret = dict_get_uint64 (conf->opts,
+ GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT,
+ &opt_hard_max);
+ if (ret) {
+ ret = 0;
+ gf_msg_debug (this->name, 0, "%s is not present in "
+ "opts dictionary",
+ GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT);
+ }
+
+ /* volume snap-max-hard-limit cannot exceed system snap-max-hard-limit.
+ * Hence during prevalidate following checks are made to ensure the
+ * snap-max-hard-limit set on one particular volume does not
+ * exceed snap-max-hard-limit set globally (system limit).
+ */
+ if (value && volname) {
+ max_limit = opt_hard_max;
+ }
+
+ if (value > max_limit) {
+ ret = -1;
+ snprintf (err_str, PATH_MAX, "Invalid snap-max-hard-limit "
+ "%"PRIu64 ". Expected range 1 - %"PRIu64,
+ value, max_limit);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret) {
+ *op_errstr = gf_strdup (err_str);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAPSHOT_OP_FAILED, "%s", err_str);
+ }
+ return ret;
+}
+
+int
+glusterd_snapshot_config_prevalidate (dict_t *dict, char **op_errstr,
+ uint32_t *op_errno)
+{
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+ int ret = -1;
+ int config_command = 0;
+ char err_str[PATH_MAX] = {0,};
+ glusterd_conf_t *conf = NULL;
+ uint64_t hard_limit = 0;
+ uint64_t soft_limit = 0;
+ gf_loglevel_t loglevel = GF_LOG_ERROR;
+ uint64_t max_limit = GLUSTERD_SNAPS_MAX_HARD_LIMIT;
+ int32_t cur_auto_delete = 0;
+ int32_t req_auto_delete = 0;
+ int32_t cur_snap_activate = 0;
+ int32_t req_snap_activate = 0;
+
+ this = THIS;
+
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+ GF_VALIDATE_OR_GOTO (this->name, op_errno, out);
+
+ conf = this->private;
+
+ GF_ASSERT (conf);
+
+ ret = dict_get_int32 (dict, "config-command", &config_command);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str),
+ "failed to get config-command type");
+ goto out;
+ }
+
+ if (config_command != GF_SNAP_CONFIG_TYPE_SET) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (volname) {
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str),
+ "Volume (%s) does not exist.", volname);
+ *op_errno = EG_NOVOL;
+ goto out;
+ }
+ }
+
+ /* config values snap-max-hard-limit and snap-max-soft-limit are
+ * optional and hence we are not erroring out if values are not
+ * present
+ */
+ gd_get_snap_conf_values_if_present (dict, &hard_limit, &soft_limit);
+
+ if (hard_limit) {
+ /* Validations for snap-max-hard-limits */
+ ret = snap_max_hard_limits_validate (dict, volname,
+ hard_limit, op_errstr);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_HARD_LIMIT_SET_FAIL,
+ "snap-max-hard-limit validation failed.");
+ *op_errno = EINVAL;
+ goto out;
+ }
+ }
+
+ if (soft_limit) {
+ max_limit = GLUSTERD_SNAPS_MAX_SOFT_LIMIT_PERCENT;
+ if (soft_limit > max_limit) {
+ ret = -1;
+ snprintf (err_str, PATH_MAX, "Invalid "
+ "snap-max-soft-limit ""%"
+ PRIu64 ". Expected range 1 - %"PRIu64,
+ soft_limit, max_limit);
+ *op_errno = EINVAL;
+ goto out;
+ }
+ }
+
+ if (hard_limit || soft_limit) {
+ ret = 0;
+ goto out;
+ }
+
+ if (dict_get(dict, GLUSTERD_STORE_KEY_SNAP_AUTO_DELETE)) {
+ req_auto_delete = dict_get_str_boolean (dict,
+ GLUSTERD_STORE_KEY_SNAP_AUTO_DELETE,
+ _gf_false);
+ if (req_auto_delete < 0) {
+ ret = -1;
+ snprintf (err_str, sizeof (err_str), "Please enter a "
+ "valid boolean value for auto-delete");
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ /* Ignoring the error as the auto-delete is optional and
+ might not be present in the options dictionary.*/
+ cur_auto_delete = dict_get_str_boolean (conf->opts,
+ GLUSTERD_STORE_KEY_SNAP_AUTO_DELETE,
+ _gf_false);
+
+ if (cur_auto_delete == req_auto_delete) {
+ ret = -1;
+ if (cur_auto_delete == _gf_true)
+ snprintf (err_str, sizeof (err_str),
+ "auto-delete is already enabled");
+ else
+ snprintf (err_str, sizeof (err_str),
+ "auto-delete is already disabled");
+ *op_errno = EINVAL;
+ goto out;
+ }
+ } else if (dict_get(dict, GLUSTERD_STORE_KEY_SNAP_ACTIVATE)) {
+ req_snap_activate = dict_get_str_boolean (dict,
+ GLUSTERD_STORE_KEY_SNAP_ACTIVATE,
+ _gf_false);
+ if (req_snap_activate < 0) {
+ ret = -1;
+ snprintf (err_str, sizeof (err_str), "Please enter a "
+ "valid boolean value for activate-on-create");
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ /* Ignoring the error as the activate-on-create is optional and
+ might not be present in the options dictionary.*/
+ cur_snap_activate = dict_get_str_boolean (conf->opts,
+ GLUSTERD_STORE_KEY_SNAP_ACTIVATE,
+ _gf_false);
+
+ if (cur_snap_activate == req_snap_activate) {
+ ret = -1;
+ if (cur_snap_activate == _gf_true)
+ snprintf (err_str, sizeof (err_str),
+ "activate-on-create is already enabled");
+ else
+ snprintf (err_str, sizeof (err_str),
+ "activate-on-create is already disabled");
+ *op_errno = EINVAL;
+ goto out;
+ }
+ } else {
+ ret = -1;
+ snprintf (err_str, sizeof (err_str), "Invalid option");
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = 0;
+out:
+
+ if (ret && err_str[0] != '\0') {
+ gf_msg (this->name, loglevel, 0,
+ GD_MSG_SNAPSHOT_OP_FAILED, "%s", err_str);
+ *op_errstr = gf_strdup (err_str);
+ }
+
+ return ret;
+}
+
+/* This function will be called from RPC handler routine.
+ * This function is responsible for getting the requested
+ * snapshot config into the dictionary.
+ *
+ * @param req RPC request object. Required for sending a response back.
+ * @param op glusterd operation. Required for sending a response back.
+ * @param dict pointer to dictionary which will contain both
+ * request and response key-pair values.
+ * @return -1 on error and 0 on success
+ */
+int
+glusterd_handle_snapshot_config (rpcsvc_request_t *req, glusterd_op_t op,
+ dict_t *dict, char *err_str, size_t len)
+{
+ int32_t ret = -1;
+ char *volname = NULL;
+ xlator_t *this = NULL;
+ int config_command = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_VALIDATE_OR_GOTO (this->name, req, out);
+ GF_VALIDATE_OR_GOTO (this->name, dict, out);
+
+ /* TODO : Type of lock to be taken when we are setting
+ * limits system wide
+ */
+ ret = dict_get_int32 (dict, "config-command", &config_command);
+ if (ret) {
+ snprintf (err_str, len,
+ "Failed to get config-command type");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+
+ switch (config_command) {
+ case GF_SNAP_CONFIG_TYPE_SET:
+ if (!volname) {
+ ret = dict_set_int32 (dict, "hold_vol_locks",
+ _gf_false);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to set hold_vol_locks value "
+ "as _gf_false");
+ goto out;
+ }
+
+ }
+ ret = glusterd_mgmt_v3_initiate_all_phases (req, op, dict);
+ break;
+ case GF_SNAP_CONFIG_DISPLAY:
+ /* Reading data from local node only */
+ ret = snap_max_limits_display_commit (dict, volname,
+ err_str, len);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_HARD_LIMIT_SET_FAIL,
+ "snap-max-limit "
+ "display commit failed.");
+ goto out;
+ }
+
+ /* If everything is successful then send the response
+ * back to cli
+ */
+ ret = glusterd_op_send_cli_response (op, 0, 0, req, dict,
+ err_str);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_NO_CLI_RESP, "Failed to send cli "
+ "response");
+ goto out;
+ }
+
+ break;
+ default:
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_COMMAND_NOT_FOUND, "Unknown config type");
+ ret = -1;
+ break;
+ }
+out:
+ return ret;
+}
+int
+glusterd_snap_create_clone_pre_val_use_rsp_dict (dict_t *dst, dict_t *src)
+{
+ char *snap_brick_dir = NULL;
+ char *snap_device = NULL;
+ char key[PATH_MAX] = "";
+ char *value = "";
+ char snapbrckcnt[PATH_MAX] = "";
+ char snapbrckord[PATH_MAX] = "";
+ int ret = -1;
+ int64_t i = -1;
+ int64_t j = -1;
+ int64_t volume_count = 0;
+ int64_t brick_count = 0;
+ int64_t brick_order = 0;
+ xlator_t *this = NULL;
+ int32_t brick_online = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (dst);
+ GF_ASSERT (src);
+
+ ret = dict_get_int64 (src, "volcount", &volume_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "failed to "
+ "get the volume count");
+ goto out;
+ }
+
+ for (i = 0; i < volume_count; i++) {
+ memset (snapbrckcnt, '\0', sizeof(snapbrckcnt));
+ ret = snprintf (snapbrckcnt, sizeof(snapbrckcnt) - 1,
+ "vol%"PRId64"_brickcount", i+1);
+ ret = dict_get_int64 (src, snapbrckcnt, &brick_count);
+ if (ret) {
+ gf_msg_trace (this->name, 0,
+ "No bricks for this volume in this dict");
+ continue;
+ }
+
+ for (j = 0; j < brick_count; j++) {
+ /* Fetching data from source dict */
+ snprintf (key, sizeof(key) - 1,
+ "vol%"PRId64".brickdir%"PRId64, i+1, j);
+ ret = dict_get_ptr (src, key,
+ (void **)&snap_brick_dir);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch %s", key);
+ continue;
+ }
+
+ /* Fetching brick order from source dict */
+ snprintf (snapbrckord, sizeof(snapbrckord) - 1,
+ "vol%"PRId64".brick%"PRId64".order", i+1, j);
+ ret = dict_get_int64 (src, snapbrckord, &brick_order);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get brick order");
+ goto out;
+ }
+
+ snprintf (key, sizeof(key) - 1,
+ "vol%"PRId64".brickdir%"PRId64, i+1,
+ brick_order);
+ ret = dict_set_dynstr_with_alloc (dst, key,
+ snap_brick_dir);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+ snprintf (key, sizeof(key) - 1,
+ "vol%"PRId64".fstype%"PRId64, i+1, j);
+ ret = dict_get_str (src, key, &value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch %s", key);
+ continue;
+ }
+
+ snprintf (key, sizeof(key) - 1,
+ "vol%"PRId64".fstype%"PRId64, i+1,
+ brick_order);
+ ret = dict_set_dynstr_with_alloc (dst, key, value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+ snprintf (key, sizeof(key) - 1,
+ "vol%"PRId64".mnt_opts%"PRId64, i+1, j);
+ ret = dict_get_str (src, key, &value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch %s", key);
+ continue;
+ }
+
+ snprintf (key, sizeof(key) - 1,
+ "vol%"PRId64".mnt_opts%"PRId64, i+1,
+ brick_order);
+ ret = dict_set_dynstr_with_alloc (dst, key, value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+ snprintf (key, sizeof(key) - 1,
+ "vol%"PRId64".brick_snapdevice%"PRId64,
+ i+1, j);
+ ret = dict_get_ptr (src, key,
+ (void **)&snap_device);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch snap_device");
+ goto out;
+ }
+
+ snprintf (key, sizeof(key) - 1,
+ "vol%"PRId64".brick_snapdevice%"PRId64,
+ i+1, brick_order);
+ ret = dict_set_dynstr_with_alloc (dst, key,
+ snap_device);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+ snprintf (key, sizeof (key),
+ "vol%"PRId64".brick%"PRId64".status", i+1, brick_order);
+ ret = dict_get_int32 (src, key, &brick_online);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "failed to "
+ "get the brick status");
+ goto out;
+ }
+
+ ret = dict_set_int32 (dst, key, brick_online);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "failed to "
+ "set the brick status");
+ goto out;
+ }
+ brick_online = 0;
+ }
+ }
+ ret = 0;
+out:
+
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* Aggregate brickinfo's of the snap volumes to be restored from */
+int32_t
+glusterd_snap_restore_use_rsp_dict (dict_t *dst, dict_t *src)
+{
+ char key[PATH_MAX] = "";
+ char *strvalue = NULL;
+ int32_t value = -1;
+ int32_t i = -1;
+ int32_t j = -1;
+ int32_t vol_count = -1;
+ int32_t brickcount = -1;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ if (!dst || !src) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "Source or Destination "
+ "dict is empty.");
+ goto out;
+ }
+
+ ret = dict_get_int32 (src, "volcount", &vol_count);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "No volumes");
+ ret = 0;
+ goto out;
+ }
+
+ for (i = 1; i <= vol_count; i++) {
+ snprintf (key, sizeof (key), "snap%d.brick_count", i);
+ ret = dict_get_int32 (src, key, &brickcount);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get %s", key);
+ goto out;
+ }
+
+ for (j = 1; j <= brickcount; j++) {
+ snprintf (key, sizeof (key), "snap%d.brick%d.path",
+ i, j);
+ ret = dict_get_str (src, key, &strvalue);
+ if (ret) {
+ /* The brickinfo will be present in
+ * another rsp_dict */
+ gf_msg_debug (this->name, 0,
+ "%s not present", key);
+ ret = 0;
+ continue;
+ }
+ ret = dict_set_dynstr_with_alloc (dst, key, strvalue);
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+ snprintf (key, sizeof (key),
+ "snap%d.brick%d.snap_status", i, j);
+ ret = dict_get_int32 (src, key, &value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get %s", key);
+ goto out;
+ }
+ ret = dict_set_int32 (dst, key, value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+ snprintf (key, sizeof (key),
+ "snap%d.brick%d.device_path", i, j);
+ ret = dict_get_str (src, key, &strvalue);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get %s", key);
+ goto out;
+ }
+ ret = dict_set_dynstr_with_alloc (dst, key, strvalue);
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+ snprintf (key, sizeof (key),
+ "snap%d.brick%d.fs_type", i, j);
+ ret = dict_get_str (src, key, &strvalue);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get %s", key);
+ goto out;
+ }
+ ret = dict_set_dynstr_with_alloc (dst, key, strvalue);
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+ snprintf (key, sizeof (key),
+ "snap%d.brick%d.mnt_opts", i, j);
+ ret = dict_get_str (src, key, &strvalue);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get %s", key);
+ goto out;
+ }
+ ret = dict_set_dynstr_with_alloc (dst, key, strvalue);
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "Failed to set %s", key);
+ goto out;
+ }
+ }
+ }
+
+out:
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_snap_pre_validate_use_rsp_dict (dict_t *dst, dict_t *src)
+{
+ int ret = -1;
+ int32_t snap_command = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ if (!dst || !src) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_INVALID_ENTRY, "Source or Destination "
+ "dict is empty.");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dst, "type", &snap_command);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "unable to get the type of "
+ "the snapshot command");
+ goto out;
+ }
+
+ switch (snap_command) {
+ case GF_SNAP_OPTION_TYPE_CREATE:
+ case GF_SNAP_OPTION_TYPE_CLONE:
+ ret = glusterd_snap_create_clone_pre_val_use_rsp_dict (dst,
+ src);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to use "
+ "rsp dict");
+ goto out;
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_RESTORE:
+ ret = glusterd_snap_restore_use_rsp_dict (dst, src);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RSP_DICT_USE_FAIL, "Unable to use "
+ "rsp dict");
+ goto out;
+ }
+ break;
+ default:
+ break;
+ }
+
+ ret = 0;
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_add_brick_status_to_dict (dict_t *dict, glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ char *key_prefix)
+{
+ char pidfile[PATH_MAX] = {0, };
+ int32_t brick_online = 0;
+ pid_t pid = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ int ret = -1;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (volinfo);
+ GF_ASSERT (brickinfo);
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ if (!key_prefix) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "key prefix is NULL");
+ goto out;
+ }
+
+ GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, conf);
+
+ brick_online = gf_is_service_running (pidfile, &pid);
+
+ ret = dict_set_int32 (dict, key_prefix, brick_online);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key_prefix);
+ goto out;
+ }
+ brick_online = 0;
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+/* This function will check whether the given device
+ * is a thinly provisioned LV or not.
+ *
+ * @param device LV device path
+ *
+ * @return _gf_true if LV is thin else _gf_false
+ */
+gf_boolean_t
+glusterd_is_thinp_brick (char *device, uint32_t *op_errno)
+{
+ int ret = -1;
+ char msg [1024] = "";
+ char pool_name [PATH_MAX] = "";
+ char *ptr = NULL;
+ xlator_t *this = NULL;
+ runner_t runner = {0,};
+ gf_boolean_t is_thin = _gf_false;
+
+ this = THIS;
+
+ GF_VALIDATE_OR_GOTO ("glusterd", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, device, out);
+ GF_VALIDATE_OR_GOTO (this->name, op_errno, out);
+
+ snprintf (msg, sizeof (msg), "Get thin pool name for device %s",
+ device);
+
+ runinit (&runner);
+
+ runner_add_args (&runner, "/sbin/lvs", "--noheadings", "-o", "pool_lv",
+ device, NULL);
+ runner_redir (&runner, STDOUT_FILENO, RUN_PIPE);
+ runner_log (&runner, this->name, GF_LOG_DEBUG, msg);
+
+ ret = runner_start (&runner);
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_TPOOL_GET_FAIL, "Failed to get thin pool "
+ "name for device %s", device);
+ runner_end (&runner);
+ goto out;
+ }
+
+ ptr = fgets(pool_name, sizeof(pool_name),
+ runner_chio (&runner, STDOUT_FILENO));
+ if (!ptr || !strlen(pool_name)) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_TPOOL_GET_FAIL, "Failed to get pool name "
+ "for device %s", device);
+ runner_end (&runner);
+ ret = -1;
+ goto out;
+ }
+
+ runner_end (&runner);
+
+ /* Trim all the whitespaces. */
+ ptr = gf_trim (pool_name);
+
+ /* If the LV has thin pool associated with this
+ * then it is a thinly provisioned LV else it is
+ * regular LV */
+ if (0 != ptr [0]) {
+ is_thin = _gf_true;
+ }
+
+out:
+ if (!is_thin)
+ *op_errno = EG_NOTTHINP;
+
+ return is_thin;
+}
+
+int
+glusterd_snapshot_pause_tier (xlator_t *this, glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ dict_t *dict = NULL;
+ char *op_errstr = NULL;
+
+ GF_VALIDATE_OR_GOTO ("glusterd", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, volinfo, out);
+
+ if (volinfo->type != GF_CLUSTER_TYPE_TIER) {
+ ret = 0;
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict) {
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, "rebalance-command",
+ GF_DEFRAG_CMD_PAUSE_TIER);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set rebalance-command");
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "volname", volinfo->volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set volname");
+ goto out;
+ }
+
+ ret = gd_brick_op_phase (GD_OP_DEFRAG_BRICK_VOLUME, NULL,
+ dict, &op_errstr);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_PAUSE_TIER_FAIL,
+ "Failed to pause tier. Errstr=%s",
+ op_errstr);
+ goto out;
+ }
+
+out:
+ if (dict)
+ dict_unref (dict);
+
+ return ret;
+}
+
+
+int
+glusterd_snapshot_resume_tier (xlator_t *this, dict_t *snap_dict)
+{
+ int ret = -1;
+ dict_t *dict = NULL;
+ int64_t volcount = 0;
+ char key[PATH_MAX] = "";
+ char *volname = NULL;
+ int i = 0;
+ char *op_errstr = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ GF_VALIDATE_OR_GOTO ("glusterd", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, snap_dict, out);
+
+ ret = dict_get_int64 (snap_dict, "volcount", &volcount);
+ if (ret) {
+ goto out;
+ }
+ if (volcount <= 0) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ for (i = 1; i <= volcount; i++) {
+ snprintf (key, sizeof (key), "volname%d", i);
+ ret = dict_get_str (snap_dict, key, &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to get key %s", volname);
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret)
+ goto out;
+
+ if (volinfo->type != GF_CLUSTER_TYPE_TIER)
+ continue;
+
+ ret = dict_set_int32 (dict, "rebalance-command",
+ GF_DEFRAG_CMD_RESUME_TIER);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set rebalance-command");
+
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "volname", volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set volname");
+ goto out;
+ }
+
+ ret = gd_brick_op_phase (GD_OP_DEFRAG_BRICK_VOLUME, NULL,
+ dict, &op_errstr);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_RESUME_TIER_FAIL,
+ "Failed to resume tier");
+ goto out;
+ }
+ }
+
+out:
+ if (dict)
+ dict_unref (dict);
+
+ return ret;
+}
+
+
+int
+glusterd_snap_create_clone_common_prevalidate (dict_t *rsp_dict, int flags,
+ char *snapname, char *err_str,
+ char *snap_volname,
+ int64_t volcount,
+ glusterd_volinfo_t *volinfo,
+ gf_loglevel_t *loglevel,
+ int clone, uint32_t *op_errno)
+{
+ char *device = NULL;
+ char key[PATH_MAX] = "";
+ int ret = -1;
+ int64_t i = 1;
+ int64_t brick_order = 0;
+ int64_t brick_count = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+
+ this = THIS;
+ conf = this->private;
+ GF_ASSERT (conf);
+ GF_VALIDATE_OR_GOTO (this->name, op_errno, out);
+
+ if (!snapname || !volinfo) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "Failed to validate "
+ "snapname or volume information");
+ ret = -1;
+ goto out;
+ }
+
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks,
+ brick_list) {
+ if (gf_uuid_compare (brickinfo->uuid, MY_UUID)) {
+ brick_order++;
+ continue;
+ }
+
+ if (!glusterd_is_brick_started (brickinfo)) {
+ if (!clone && (flags & GF_CLI_FLAG_OP_FORCE)) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_BRICK_DISCONNECTED,
+ "brick %s:%s is not started",
+ brickinfo->hostname,
+ brickinfo->path);
+ brick_order++;
+ brick_count++;
+ continue;
+ }
+ if (!clone) {
+ snprintf (err_str, PATH_MAX,
+ "One or more bricks are not running. "
+ "Please run volume status command to see "
+ "brick status.\n"
+ "Please start the stopped brick "
+ "and then issue snapshot create "
+ "command or use [force] option in "
+ "snapshot create to override this "
+ "behavior.");
+ } else {
+ snprintf (err_str, PATH_MAX,
+ "One or more bricks are not running. "
+ "Please run snapshot status command to see "
+ "brick status.\n"
+ "Please start the stopped brick "
+ "and then issue snapshot clone "
+ "command ");
+ }
+ *op_errno = EG_BRCKDWN;
+ ret = -1;
+ goto out;
+ }
+
+
+ device = glusterd_get_brick_mount_device
+ (brickinfo->path);
+ if (!device) {
+ snprintf (err_str, PATH_MAX,
+ "getting device name for the brick "
+ "%s:%s failed", brickinfo->hostname,
+ brickinfo->path);
+ ret = -1;
+ goto out;
+ }
+ if (!clone) {
+ if (!glusterd_is_thinp_brick (device, op_errno)) {
+ snprintf (err_str, PATH_MAX,
+ "Snapshot is supported only for "
+ "thin provisioned LV. Ensure that "
+ "all bricks of %s are thinly "
+ "provisioned LV.", volinfo->volname);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ device = glusterd_build_snap_device_path (device,
+ snap_volname,
+ brick_count);
+ if (!device) {
+ snprintf (err_str, PATH_MAX,
+ "cannot copy the snapshot device "
+ "name (volname: %s, snapname: %s)",
+ volinfo->volname, snapname);
+ *loglevel = GF_LOG_WARNING;
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (key, sizeof(key),
+ "vol%"PRId64".brick_snapdevice%"PRId64,
+ i, brick_count);
+ ret = dict_set_dynstr (rsp_dict, key, device);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ GF_FREE (device);
+ goto out;
+ }
+ device = NULL;
+
+ ret = glusterd_update_mntopts (brickinfo->path,
+ brickinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRK_MOUNTOPTS_FAIL, "Failed to "
+ "update mount options for %s brick",
+ brickinfo->path);
+ }
+
+ snprintf (key, sizeof(key), "vol%"PRId64".fstype%"
+ PRId64, i, brick_count);
+ ret = dict_set_dynstr_with_alloc (rsp_dict, key,
+ brickinfo->fstype);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+ snprintf (key, sizeof(key), "vol%"PRId64".mnt_opts%"
+ PRId64, i, brick_count);
+ ret = dict_set_dynstr_with_alloc (rsp_dict, key,
+ brickinfo->mnt_opts);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+ snprintf (key, sizeof(key), "vol%"PRId64".brickdir%"PRId64, i,
+ brick_count);
+ ret = dict_set_dynstr_with_alloc (rsp_dict, key,
+ brickinfo->mount_dir);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+ snprintf (key, sizeof(key) - 1,
+ "vol%"PRId64".brick%"PRId64".order", i, brick_count);
+ ret = dict_set_int64 (rsp_dict, key, brick_order);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+ snprintf (key, sizeof (key),
+ "vol%"PRId64".brick%"PRId64".status", i, brick_order);
+
+ ret = glusterd_add_brick_status_to_dict (rsp_dict,
+ volinfo,
+ brickinfo,
+ key);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "failed to "
+ "add brick status to dict");
+ goto out;
+ }
+ brick_count++;
+ brick_order++;
+ }
+ snprintf (key, sizeof(key) - 1, "vol%"PRId64"_brickcount", volcount);
+ ret = dict_set_int64 (rsp_dict, key, brick_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set %s",
+ key);
+ goto out;
+ }
+ ret = 0;
+out:
+ if (device)
+ GF_FREE (device);
+
+ return ret;
+
+}
+
+
+int
+glusterd_snapshot_clone_prevalidate (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict, uint32_t *op_errno)
+{
+ char *clonename = NULL;
+ char *snapname = NULL;
+ char key[PATH_MAX] = "";
+ glusterd_snap_t *snap = NULL;
+ char err_str[PATH_MAX] = "";
+ int ret = -1;
+ int64_t volcount = 1;
+ glusterd_volinfo_t *snap_vol = NULL;
+ xlator_t *this = NULL;
+ uuid_t *snap_volid = NULL;
+ gf_loglevel_t loglevel = GF_LOG_ERROR;
+
+ this = THIS;
+ GF_ASSERT (op_errstr);
+ GF_ASSERT (dict);
+ GF_VALIDATE_OR_GOTO (this->name, op_errno, out);
+
+ ret = dict_get_str (dict, "clonename", &clonename);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Failed to "
+ "get the clone name");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "snapname", &snapname);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Failed to get snapname");
+ goto out;
+ }
+
+ if (glusterd_check_volume_exists(clonename)) {
+ ret = -1;
+ snprintf (err_str, sizeof (err_str), "Volume with name:%s "
+ "already exists", clonename);
+ *op_errno = EG_VOLEXST;
+ goto out;
+ }
+ /* need to find snap volinfo*/
+ snap = glusterd_find_snap_by_name (snapname);
+ if (!snap) {
+ ret = -1;
+ snprintf (err_str, sizeof (err_str), "Failed to find :%s "
+ "snap", snapname);
+ goto out;
+ }
+
+ /* TODO : As of now there is only one volume in snapshot.
+ * Change this when multiple volume snapshot is introduced
+ */
+ snap_vol = list_entry (snap->volumes.next,
+ glusterd_volinfo_t, vol_list);
+ if (!snap_vol) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_VOLINFO_GET_FAIL, "Failed to get snap "
+ "volinfo %s", snap->snapname);
+ goto out;
+ }
+
+ snprintf (key, sizeof(key) - 1, "vol1_volid");
+ ret = dict_get_bin (dict, key, (void **)&snap_volid);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch snap_volid");
+ goto out;
+ }
+
+ /* Adding snap bricks mount paths to the dict */
+ ret = glusterd_snap_create_clone_common_prevalidate (rsp_dict, 0,
+ snapname, err_str,
+ clonename, 1,
+ snap_vol,
+ &loglevel,
+ 1, op_errno);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_PRE_VALIDATION_FAIL, "Failed to pre validate");
+ goto out;
+ }
+
+ ret = dict_set_int64 (rsp_dict, "volcount", volcount);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set volcount");
+ goto out;
+ }
+
+out:
+
+ if (ret && err_str[0] != '\0') {
+ gf_msg (this->name, loglevel, 0,
+ GD_MSG_SNAP_CLONE_PREVAL_FAILED, "%s", err_str);
+ *op_errstr = gf_strdup (err_str);
+ }
+
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+
+int
+glusterd_snapshot_create_prevalidate (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict, uint32_t *op_errno)
+{
+ char *volname = NULL;
+ char *snapname = NULL;
+ char key[PATH_MAX] = "";
+ char snap_volname[64] = "";
+ char err_str[PATH_MAX] = "";
+ int ret = -1;
+ int64_t i = 0;
+ int64_t volcount = 0;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+ uuid_t *snap_volid = NULL;
+ gf_loglevel_t loglevel = GF_LOG_ERROR;
+ glusterd_conf_t *conf = NULL;
+ int64_t effective_max_limit = 0;
+ int flags = 0;
+ uint64_t opt_hard_max = GLUSTERD_SNAPS_MAX_HARD_LIMIT;
+
+ this = THIS;
+ GF_ASSERT (op_errstr);
+ conf = this->private;
+ GF_ASSERT (conf);
+ GF_VALIDATE_OR_GOTO (this->name, op_errno, out);
+
+ ret = dict_get_int64 (dict, "volcount", &volcount);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Failed to "
+ "get the volume count");
+ goto out;
+ }
+ if (volcount <= 0) {
+ snprintf (err_str, sizeof (err_str),
+ "Invalid volume count %"PRId64" supplied", volcount);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "snapname", &snapname);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Failed to get snapname");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "flags", &flags);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get flags");
+ goto out;
+ }
+
+ if (glusterd_find_snap_by_name (snapname)) {
+ ret = -1;
+ snprintf (err_str, sizeof (err_str), "Snapshot %s already "
+ "exists", snapname);
+ *op_errno = EG_SNAPEXST;
+ goto out;
+ }
+
+ for (i = 1; i <= volcount; i++) {
+ snprintf (key, sizeof (key), "volname%"PRId64, i);
+ ret = dict_get_str (dict, key, &volname);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str),
+ "failed to get volume name");
+ goto out;
+ }
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str),
+ "Volume (%s) does not exist ", volname);
+ *op_errno = EG_NOVOL;
+ goto out;
+ }
+
+ ret = -1;
+ if (!glusterd_is_volume_started (volinfo)) {
+ snprintf (err_str, sizeof (err_str), "volume %s is "
+ "not started", volinfo->volname);
+ loglevel = GF_LOG_WARNING;
+ *op_errno = EG_VOLSTP;
+ goto out;
+ }
+
+ if (glusterd_is_defrag_on (volinfo)) {
+ snprintf (err_str, sizeof (err_str),
+ "rebalance process is running for the "
+ "volume %s", volname);
+ loglevel = GF_LOG_WARNING;
+ *op_errno = EG_RBALRUN;
+ goto out;
+ }
+
+ if (gd_vol_is_geo_rep_active (volinfo)) {
+ snprintf (err_str, sizeof (err_str),
+ "geo-replication session is running for "
+ "the volume %s. Session needs to be "
+ "stopped before taking a snapshot.",
+ volname);
+ loglevel = GF_LOG_WARNING;
+ *op_errno = EG_GEOREPRUN;
+ goto out;
+ }
+
+ if (volinfo->is_snap_volume == _gf_true) {
+ snprintf (err_str, sizeof (err_str),
+ "Volume %s is a snap volume", volname);
+ loglevel = GF_LOG_WARNING;
+ *op_errno = EG_ISSNAP;
+ goto out;
+ }
+
+ /* "snap-max-hard-limit" might not be set by user explicitly,
+ * in that case it's better to consider the default value.
+ * Hence not erroring out if Key is not found.
+ */
+ ret = dict_get_uint64 (conf->opts,
+ GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT,
+ &opt_hard_max);
+ if (ret) {
+ ret = 0;
+ gf_msg_debug (this->name, 0, "%s is not present "
+ "in opts dictionary",
+ GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT);
+ }
+
+ if (volinfo->snap_max_hard_limit < opt_hard_max)
+ effective_max_limit = volinfo->snap_max_hard_limit;
+ else
+ effective_max_limit = opt_hard_max;
+
+ if (volinfo->snap_count >= effective_max_limit) {
+ ret = -1;
+ snprintf (err_str, sizeof (err_str),
+ "The number of existing snaps has reached "
+ "the effective maximum limit of %"PRIu64", "
+ "for the volume (%s). Please delete few "
+ "snapshots before taking further snapshots.",
+ effective_max_limit, volname);
+ loglevel = GF_LOG_WARNING;
+ *op_errno = EG_HRDLMT;
+ goto out;
+ }
+
+ snprintf (key, sizeof(key) - 1, "vol%"PRId64"_volid", i);
+ ret = dict_get_bin (dict, key, (void **)&snap_volid);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch snap_volid");
+ goto out;
+ }
+
+ /* snap volume uuid is used as lvm snapshot name.
+ This will avoid restrictions on snapshot names
+ provided by user */
+ GLUSTERD_GET_UUID_NOHYPHEN (snap_volname, *snap_volid);
+
+ ret = glusterd_snap_create_clone_common_prevalidate (rsp_dict,
+ flags,
+ snapname,
+ err_str,
+ snap_volname,
+ i,
+ volinfo,
+ &loglevel,
+ 0, op_errno);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_PRE_VALIDATION_FAIL,
+ "Failed to pre validate");
+ goto out;
+ }
+
+ ret = glusterd_snapshot_pause_tier (this, volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_PAUSE_TIER_FAIL,
+ "Failed to pause tier in snap prevalidate.");
+ goto out;
+ }
+
+ }
+
+ ret = dict_set_int64 (rsp_dict, "volcount", volcount);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set volcount");
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (ret && err_str[0] != '\0') {
+ gf_msg (this->name, loglevel, 0,
+ GD_MSG_SNAPSHOT_OP_FAILED, "%s", err_str);
+ *op_errstr = gf_strdup (err_str);
+ }
+
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+glusterd_snap_t*
+glusterd_new_snap_object()
+{
+ glusterd_snap_t *snap = NULL;
+
+ snap = GF_CALLOC (1, sizeof (*snap), gf_gld_mt_snap_t);
+
+ if (snap) {
+ if (LOCK_INIT (&snap->lock)) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_LOCK_INIT_FAILED, "Failed initiating"
+ " snap lock");
+ GF_FREE (snap);
+ return NULL;
+ }
+
+ CDS_INIT_LIST_HEAD (&snap->snap_list);
+ CDS_INIT_LIST_HEAD (&snap->volumes);
+ snap->snapname[0] = 0;
+ snap->snap_status = GD_SNAP_STATUS_INIT;
+ }
+
+ return snap;
+
+};
+
+/* Function glusterd_list_add_snapvol adds the volinfo object (snapshot volume)
+ to the snapshot object list and to the parent volume list */
+int32_t
+glusterd_list_add_snapvol (glusterd_volinfo_t *origin_vol,
+ glusterd_volinfo_t *snap_vol)
+{
+ int ret = -1;
+ glusterd_snap_t *snap = NULL;
+
+ GF_VALIDATE_OR_GOTO ("glusterd", origin_vol, out);
+ GF_VALIDATE_OR_GOTO ("glusterd", snap_vol, out);
+
+ snap = snap_vol->snapshot;
+ GF_ASSERT (snap);
+
+ cds_list_add_tail (&snap_vol->vol_list, &snap->volumes);
+ LOCK (&origin_vol->lock);
+ {
+ glusterd_list_add_order (&snap_vol->snapvol_list,
+ &origin_vol->snap_volumes,
+ glusterd_compare_snap_vol_time);
+
+ origin_vol->snap_count++;
+ }
+ UNLOCK (&origin_vol->lock);
+
+ gf_msg_debug (THIS->name, 0, "Snapshot %s added to the list",
+ snap->snapname);
+ ret = 0;
+ out:
+ return ret;
+}
+
+glusterd_snap_t*
+glusterd_find_snap_by_name (char *snapname)
+{
+ glusterd_snap_t *snap = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ priv = THIS->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (snapname);
+
+ cds_list_for_each_entry (snap, &priv->snapshots, snap_list) {
+ if (!strcmp (snap->snapname, snapname)) {
+ gf_msg_debug (THIS->name, 0, "Found "
+ "snap %s (%s)", snap->snapname,
+ uuid_utoa (snap->snap_id));
+ goto out;
+ }
+ }
+ snap = NULL;
+out:
+ return snap;
+}
+
+glusterd_snap_t*
+glusterd_find_snap_by_id (uuid_t snap_id)
+{
+ glusterd_snap_t *snap = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ priv = THIS->private;
+ GF_ASSERT (priv);
+
+ if (gf_uuid_is_null(snap_id))
+ goto out;
+
+ cds_list_for_each_entry (snap, &priv->snapshots, snap_list) {
+ if (!gf_uuid_compare (snap->snap_id, snap_id)) {
+ gf_msg_debug (THIS->name, 0, "Found "
+ "snap %s (%s)", snap->snapname,
+ uuid_utoa (snap->snap_id));
+ goto out;
+ }
+ }
+ snap = NULL;
+out:
+ return snap;
+}
+
+int
+glusterd_do_lvm_snapshot_remove (glusterd_volinfo_t *snap_vol,
+ glusterd_brickinfo_t *brickinfo,
+ const char *mount_pt, const char *snap_device)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ runner_t runner = {0,};
+ char msg[1024] = {0, };
+ char pidfile[PATH_MAX] = {0, };
+ pid_t pid = -1;
+ int retry_count = 0;
+ char *mnt_pt = NULL;
+ struct mntent *entry = NULL;
+ gf_boolean_t unmount = _gf_true;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ if (!brickinfo) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_INVALID_ENTRY, "brickinfo NULL");
+ goto out;
+ }
+ GF_ASSERT (snap_vol);
+ GF_ASSERT (mount_pt);
+ GF_ASSERT (snap_device);
+
+ GLUSTERD_GET_BRICK_PIDFILE (pidfile, snap_vol, brickinfo, priv);
+ if (gf_is_service_running (pidfile, &pid)) {
+ ret = kill (pid, SIGKILL);
+ if (ret && errno != ESRCH) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_PID_KILL_FAIL, "Unable to kill pid "
+ "%d reason : %s", pid, strerror(errno));
+ goto out;
+ }
+ }
+
+ /* Check if the brick is mounted and then try unmounting the brick */
+ ret = glusterd_get_brick_root (brickinfo->path, &mnt_pt);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_BRICK_PATH_UNMOUNTED, "Getting the root "
+ "of the brick for volume %s (snap %s) failed. "
+ "Removing lv (%s).", snap_vol->volname,
+ snap_vol->snapshot->snapname, snap_device);
+ /* The brick path is already unmounted. Remove the lv only *
+ * Need not fail the operation */
+ ret = 0;
+ unmount = _gf_false;
+ }
+
+ if ((unmount == _gf_true) && (strcmp (mnt_pt, mount_pt))) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_BRICK_PATH_UNMOUNTED,
+ "Lvm is not mounted for brick %s:%s. "
+ "Removing lv (%s).", brickinfo->hostname,
+ brickinfo->path, snap_device);
+ /* The brick path is already unmounted. Remove the lv only *
+ * Need not fail the operation */
+ unmount = _gf_false;
+ }
+
+ /* umount cannot be done when the brick process is still in the process
+ of shutdown, so give three re-tries */
+ while ((unmount == _gf_true) && (retry_count < 3)) {
+ retry_count++;
+ /*umount2 system call doesn't cleanup mtab entry after un-mount.
+ So use external umount command*/
+ ret = glusterd_umount(mount_pt);
+ if (!ret)
+ break;
+
+ gf_msg_debug (this->name, 0, "umount failed for "
+ "path %s (brick: %s): %s. Retry(%d)", mount_pt,
+ brickinfo->path, strerror (errno), retry_count);
+
+ sleep (1);
+ }
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_UNOUNT_FAILED, "umount failed for "
+ "path %s (brick: %s): %s.", mount_pt,
+ brickinfo->path, strerror (errno));
+ goto out;
+ }
+
+ runinit (&runner);
+ snprintf (msg, sizeof(msg), "remove snapshot of the brick %s:%s, "
+ "device: %s", brickinfo->hostname, brickinfo->path,
+ snap_device);
+ runner_add_args (&runner, LVM_REMOVE, "-f", snap_device, NULL);
+ runner_log (&runner, "", GF_LOG_DEBUG, msg);
+
+ ret = runner_run (&runner);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_REMOVE_FAIL, "removing snapshot of the "
+ "brick (%s:%s) of device %s failed",
+ brickinfo->hostname, brickinfo->path, snap_device);
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+int32_t
+glusterd_lvm_snapshot_remove (dict_t *rsp_dict, glusterd_volinfo_t *snap_vol)
+{
+ int32_t brick_count = -1;
+ int32_t ret = -1;
+ int32_t err = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ xlator_t *this = NULL;
+ char buff[PATH_MAX] = "";
+ char brick_dir[PATH_MAX] = "";
+ char *tmp = NULL;
+ char *brick_mount_path = NULL;
+ gf_boolean_t is_brick_dir_present = _gf_false;
+ struct stat stbuf = {0,};
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (snap_vol);
+
+ if ((snap_vol->is_snap_volume == _gf_false) &&
+ (gf_uuid_is_null (snap_vol->restored_from_snap))) {
+ gf_msg_debug (this->name, 0,
+ "Not a snap volume, or a restored snap volume.");
+ ret = 0;
+ goto out;
+ }
+
+ brick_count = -1;
+ cds_list_for_each_entry (brickinfo, &snap_vol->bricks, brick_list) {
+ brick_count++;
+ if (gf_uuid_compare (brickinfo->uuid, MY_UUID)) {
+ gf_msg_debug (this->name, 0,
+ "%s:%s belongs to a different node",
+ brickinfo->hostname, brickinfo->path);
+ continue;
+ }
+
+ /* Fetch the brick mount path from the brickinfo->path */
+ ret = glusterd_find_brick_mount_path (brickinfo->path,
+ &brick_mount_path);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_GET_INFO_FAIL,
+ "Failed to find brick_mount_path for %s",
+ brickinfo->path);
+ ret = 0;
+ continue;
+ }
+
+ ret = sys_lstat (brick_mount_path, &stbuf);
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "Brick %s:%s already deleted.",
+ brickinfo->hostname, brickinfo->path);
+ ret = 0;
+ continue;
+ }
+
+ if (brickinfo->snap_status == -1) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_SNAPSHOT_PENDING,
+ "snapshot was pending. lvm not present "
+ "for brick %s:%s of the snap %s.",
+ brickinfo->hostname, brickinfo->path,
+ snap_vol->snapshot->snapname);
+
+ if (rsp_dict &&
+ (snap_vol->is_snap_volume == _gf_true)) {
+ /* Adding missed delete to the dict */
+ ret = glusterd_add_missed_snaps_to_dict
+ (rsp_dict,
+ snap_vol,
+ brickinfo,
+ brick_count + 1,
+ GF_SNAP_OPTION_TYPE_DELETE);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MISSED_SNAP_CREATE_FAIL,
+ "Failed to add missed snapshot "
+ "info for %s:%s in the "
+ "rsp_dict", brickinfo->hostname,
+ brickinfo->path);
+ goto out;
+ }
+ }
+
+ continue;
+ }
+
+ /* Check if the brick has a LV associated with it */
+ if (strlen(brickinfo->device_path) == 0) {
+ gf_msg_debug (this->name, 0,
+ "Brick (%s:%s) does not have a LV "
+ "associated with it. Removing the brick path",
+ brickinfo->hostname, brickinfo->path);
+ goto remove_brick_path;
+ }
+
+ /* Verify if the device path exists or not */
+ ret = sys_stat (brickinfo->device_path, &stbuf);
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "LV (%s) for brick (%s:%s) not present. "
+ "Removing the brick path",
+ brickinfo->device_path,
+ brickinfo->hostname, brickinfo->path);
+ /* Making ret = 0 as absence of device path should *
+ * not fail the remove operation */
+ ret = 0;
+ goto remove_brick_path;
+ }
+
+ ret = glusterd_do_lvm_snapshot_remove (snap_vol, brickinfo,
+ brick_mount_path,
+ brickinfo->device_path);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_REMOVE_FAIL, "Failed to "
+ "remove the snapshot %s (%s)",
+ brickinfo->path, brickinfo->device_path);
+ err = -1; /* We need to record this failure */
+ }
+
+remove_brick_path:
+ /* After removing the brick dir fetch the parent path
+ * i.e /var/run/gluster/snaps/<snap-vol-id>/
+ */
+ if (is_brick_dir_present == _gf_false) {
+ /* Need to fetch brick_dir to be removed from
+ * brickinfo->path, as in a restored volume,
+ * snap_vol won't have the non-hyphenated snap_vol_id
+ */
+ tmp = strstr (brick_mount_path, "brick");
+ if (!tmp) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY,
+ "Invalid brick %s", brickinfo->path);
+ GF_FREE (brick_mount_path);
+ brick_mount_path = NULL;
+ continue;
+ }
+
+ strncpy (brick_dir, brick_mount_path,
+ (size_t) (tmp - brick_mount_path));
+
+ /* Peers not hosting bricks will have _gf_false */
+ is_brick_dir_present = _gf_true;
+ }
+
+ GF_FREE (brick_mount_path);
+ brick_mount_path = NULL;
+ }
+
+ if (is_brick_dir_present == _gf_true) {
+ ret = recursive_rmdir (brick_dir);
+ if (ret) {
+ if (errno == ENOTEMPTY) {
+ /* Will occur when multiple glusterds
+ * are running in the same node
+ */
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ GD_MSG_DIR_OP_FAILED,
+ "Failed to rmdir: %s, err: %s. "
+ "More than one glusterd running "
+ "on this node.",
+ brick_dir, strerror (errno));
+ ret = 0;
+ goto out;
+ } else
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DIR_OP_FAILED,
+ "Failed to rmdir: %s, err: %s",
+ brick_dir, strerror (errno));
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ if (err) {
+ ret = err;
+ }
+ GF_FREE (brick_mount_path);
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+
+int32_t
+glusterd_snap_volume_remove (dict_t *rsp_dict,
+ glusterd_volinfo_t *snap_vol,
+ gf_boolean_t remove_lvm,
+ gf_boolean_t force)
+{
+ int ret = -1;
+ int save_ret = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_volinfo_t *origin_vol = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (snap_vol);
+
+ if (!snap_vol) {
+ gf_msg (this->name, GF_LOG_WARNING, EINVAL,
+ GD_MSG_INVALID_ENTRY, "snap_vol in NULL");
+ ret = -1;
+ goto out;
+ }
+
+ cds_list_for_each_entry (brickinfo, &snap_vol->bricks, brick_list) {
+ if (gf_uuid_compare (brickinfo->uuid, MY_UUID))
+ continue;
+
+ ret = glusterd_brick_stop (snap_vol, brickinfo, _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_BRICK_STOP_FAIL, "Failed to stop "
+ "brick for volume %s", snap_vol->volname);
+ save_ret = ret;
+
+ /* Don't clean up the snap on error when
+ force flag is disabled */
+ if (!force)
+ goto out;
+ }
+ }
+
+ /* Only remove the backend lvm when required */
+ if (remove_lvm) {
+ ret = glusterd_lvm_snapshot_remove (rsp_dict, snap_vol);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_REMOVE_FAIL, "Failed to remove "
+ "lvm snapshot volume %s", snap_vol->volname);
+ save_ret = ret;
+ if (!force)
+ goto out;
+ }
+ }
+
+ ret = glusterd_store_delete_volume (snap_vol);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_VOL_DELETE_FAIL, "Failed to remove volume %s "
+ "from store", snap_vol->volname);
+ save_ret = ret;
+ if (!force)
+ goto out;
+ }
+
+ if (!cds_list_empty (&snap_vol->snapvol_list)) {
+ ret = glusterd_volinfo_find (snap_vol->parent_volname,
+ &origin_vol);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_VOL_NOT_FOUND, "Failed to get "
+ "parent volinfo %s for volume %s",
+ snap_vol->parent_volname, snap_vol->volname);
+ save_ret = ret;
+ if (!force)
+ goto out;
+ }
+ origin_vol->snap_count--;
+ }
+
+ glusterd_volinfo_unref (snap_vol);
+
+ if (save_ret)
+ ret = save_ret;
+out:
+ gf_msg_trace (this->name, 0, "returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_snap_remove (dict_t *rsp_dict,
+ glusterd_snap_t *snap,
+ gf_boolean_t remove_lvm,
+ gf_boolean_t force,
+ gf_boolean_t is_clone)
+{
+ int ret = -1;
+ int save_ret = 0;
+ glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_volinfo_t *tmp = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (snap);
+
+ if (!snap) {
+ gf_msg(this->name, GF_LOG_WARNING, EINVAL,
+ GD_MSG_INVALID_ENTRY, "snap is NULL");
+ ret = -1;
+ goto out;
+ }
+
+ cds_list_for_each_entry_safe (snap_vol, tmp, &snap->volumes, vol_list) {
+ ret = glusterd_snap_volume_remove (rsp_dict, snap_vol,
+ remove_lvm, force);
+ if (ret && !force) {
+ /* Don't clean up the snap on error when
+ force flag is disabled */
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_REMOVE_FAIL, "Failed to remove "
+ "volinfo %s for snap %s", snap_vol->volname,
+ snap->snapname);
+ save_ret = ret;
+ goto out;
+ }
+ }
+
+ /* A clone does not persist snap info in /var/lib/glusterd/snaps/ *
+ * and hence there is no snap info to be deleted from there *
+ */
+ if (!is_clone) {
+ ret = glusterd_store_delete_snap (snap);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_REMOVE_FAIL,
+ "Failed to remove snap %s from store",
+ snap->snapname);
+ save_ret = ret;
+ if (!force)
+ goto out;
+ }
+ }
+
+ ret = glusterd_snapobject_delete (snap);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_REMOVE_FAIL, "Failed to delete "
+ "snap object %s", snap->snapname);
+
+ if (save_ret)
+ ret = save_ret;
+out:
+ gf_msg_trace (THIS->name, 0, "returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_snapshot_get_snapvol_detail (dict_t *dict,
+ glusterd_volinfo_t *snap_vol,
+ char *keyprefix, int detail)
+{
+ int ret = -1;
+ int snap_limit = 0;
+ char key[PATH_MAX] = {0,};
+ char *value = NULL;
+ glusterd_volinfo_t *origin_vol = NULL;
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ uint64_t opt_hard_max = GLUSTERD_SNAPS_MAX_HARD_LIMIT;
+
+ this = THIS;
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ GF_ASSERT (dict);
+ GF_ASSERT (snap_vol);
+ GF_ASSERT (keyprefix);
+
+ /* Volume Name */
+ value = gf_strdup (snap_vol->volname);
+ if (!value)
+ goto out;
+
+ snprintf (key, sizeof (key), "%s.volname", keyprefix);
+ ret = dict_set_dynstr (dict, key, value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set "
+ "volume name in dictionary: %s", key);
+ goto out;
+ }
+
+ /* Volume ID */
+ value = gf_strdup (uuid_utoa (snap_vol->volume_id));
+ if (NULL == value) {
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "%s.vol-id", keyprefix);
+ ret = dict_set_dynstr (dict, key, value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_NO_MEMORY, "Failed to set "
+ "volume id in dictionary: %s", key);
+ goto out;
+ }
+ value = NULL;
+
+ /* volume status */
+ snprintf (key, sizeof (key), "%s.vol-status", keyprefix);
+ switch (snap_vol->status) {
+ case GLUSTERD_STATUS_STARTED:
+ ret = dict_set_str (dict, key, "Started");
+ break;
+ case GLUSTERD_STATUS_STOPPED:
+ ret = dict_set_str (dict, key, "Stopped");
+ break;
+ case GD_SNAP_STATUS_NONE:
+ ret = dict_set_str (dict, key, "None");
+ break;
+ default:
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "Invalid volume status");
+ ret = -1;
+ goto out;
+ }
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set volume status"
+ " in dictionary: %s", key);
+ goto out;
+ }
+
+
+ ret = glusterd_volinfo_find (snap_vol->parent_volname, &origin_vol);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_VOL_NOT_FOUND, "failed to get the parent "
+ "volinfo for the volume %s", snap_vol->volname);
+ goto out;
+ }
+
+ /* "snap-max-hard-limit" might not be set by user explicitly,
+ * in that case it's better to consider the default value.
+ * Hence not erroring out if Key is not found.
+ */
+ ret = dict_get_uint64 (conf->opts,
+ GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT,
+ &opt_hard_max);
+ if (ret) {
+ ret = 0;
+ gf_msg_debug (this->name, 0, "%s is not present in "
+ "opts dictionary",
+ GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT);
+ }
+
+ if (opt_hard_max < origin_vol->snap_max_hard_limit) {
+ snap_limit = opt_hard_max;
+ gf_msg_debug (this->name, 0, "system snap-max-hard-limit is"
+ " lesser than volume snap-max-hard-limit, "
+ "snap-max-hard-limit value is set to %d", snap_limit);
+ } else {
+ snap_limit = origin_vol->snap_max_hard_limit;
+ gf_msg_debug (this->name, 0, "volume snap-max-hard-limit is"
+ " lesser than system snap-max-hard-limit, "
+ "snap-max-hard-limit value is set to %d", snap_limit);
+ }
+
+ snprintf (key, sizeof (key), "%s.snaps-available", keyprefix);
+ if (snap_limit > origin_vol->snap_count)
+ ret = dict_set_int32 (dict, key,
+ snap_limit - origin_vol->snap_count);
+ else
+ ret = dict_set_int32 (dict, key, 0);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set available snaps");
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "%s.snapcount", keyprefix);
+ ret = dict_set_int32 (dict, key, origin_vol->snap_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Could not save snapcount");
+ goto out;
+ }
+
+ if (!detail)
+ goto out;
+
+ /* Parent volume name */
+ value = gf_strdup (snap_vol->parent_volname);
+ if (!value)
+ goto out;
+
+ snprintf (key, sizeof (key), "%s.origin-volname", keyprefix);
+ ret = dict_set_dynstr (dict, key, value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set parent "
+ "volume name in dictionary: %s", key);
+ goto out;
+ }
+ value = NULL;
+
+ ret = 0;
+out:
+ if (value)
+ GF_FREE (value);
+
+ return ret;
+}
+
+static int
+glusterd_snapshot_get_snap_detail (dict_t *dict, glusterd_snap_t *snap,
+ char *keyprefix, glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ int volcount = 0;
+ char key[PATH_MAX] = {0,};
+ char timestr[64] = {0,};
+ char *value = NULL;
+ glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_volinfo_t *tmp_vol = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (snap);
+ GF_ASSERT (keyprefix);
+
+ /* Snap Name */
+ value = gf_strdup (snap->snapname);
+ if (!value)
+ goto out;
+
+ snprintf (key, sizeof (key), "%s.snapname", keyprefix);
+ ret = dict_set_dynstr (dict, key, value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set "
+ "snap name in dictionary");
+ goto out;
+ }
+
+ /* Snap ID */
+ value = gf_strdup (uuid_utoa (snap->snap_id));
+ if (NULL == value) {
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "%s.snap-id", keyprefix);
+ ret = dict_set_dynstr (dict, key, value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set "
+ "snap id in dictionary");
+ goto out;
+ }
+ value = NULL;
+
+ gf_time_fmt (timestr, sizeof timestr, snap->time_stamp,
+ gf_timefmt_FT);
+ value = gf_strdup (timestr);
+
+ if (NULL == value) {
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "%s.snap-time", keyprefix);
+ ret = dict_set_dynstr (dict, key, value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set "
+ "snap time stamp in dictionary");
+ goto out;
+ }
+ value = NULL;
+
+ /* If snap description is provided then add that into dictionary */
+ if (NULL != snap->description) {
+ value = gf_strdup (snap->description);
+ if (NULL == value) {
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "%s.snap-desc", keyprefix);
+ ret = dict_set_dynstr (dict, key, value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set "
+ "snap description in dictionary");
+ goto out;
+ }
+ value = NULL;
+ }
+
+ snprintf (key, sizeof (key), "%s.snap-status", keyprefix);
+ switch (snap->snap_status) {
+ case GD_SNAP_STATUS_INIT:
+ ret = dict_set_str (dict, key, "Init");
+ break;
+ case GD_SNAP_STATUS_IN_USE:
+ ret = dict_set_str (dict, key, "In-use");
+ break;
+ case GD_SNAP_STATUS_DECOMMISSION:
+ ret = dict_set_str (dict, key, "Decommisioned");
+ break;
+ case GD_SNAP_STATUS_RESTORED:
+ ret = dict_set_str (dict, key, "Restored");
+ break;
+ case GD_SNAP_STATUS_NONE:
+ ret = dict_set_str (dict, key, "None");
+ break;
+ default:
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "Invalid snap status");
+ ret = -1;
+ goto out;
+ }
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set snap status "
+ "in dictionary");
+ goto out;
+ }
+
+ if (volinfo) {
+ volcount = 1;
+ snprintf (key, sizeof (key), "%s.vol%d", keyprefix, volcount);
+ ret = glusterd_snapshot_get_snapvol_detail (dict,
+ volinfo, key, 0);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_DICT_GET_FAILED, "Failed to "
+ "get volume detail %s for snap %s",
+ snap_vol->volname, snap->snapname);
+ goto out;
+ }
+ goto done;
+ }
+
+ cds_list_for_each_entry_safe (snap_vol, tmp_vol, &snap->volumes,
+ vol_list) {
+ volcount++;
+ snprintf (key, sizeof (key), "%s.vol%d", keyprefix, volcount);
+ ret = glusterd_snapshot_get_snapvol_detail (dict,
+ snap_vol, key, 1);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to "
+ "get volume detail %s for snap %s",
+ snap_vol->volname, snap->snapname);
+ goto out;
+ }
+ }
+
+done:
+ snprintf (key, sizeof (key), "%s.vol-count", keyprefix);
+ ret = dict_set_int32 (dict, key, volcount);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set %s",
+ key);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (value)
+ GF_FREE (value);
+
+ return ret;
+}
+
+static int
+glusterd_snapshot_get_all_snap_info (dict_t *dict)
+{
+ int ret = -1;
+ int snapcount = 0;
+ char key[PATH_MAX] = {0,};
+ glusterd_snap_t *snap = NULL;
+ glusterd_snap_t *tmp_snap = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ /* General parameter validation */
+ GF_ASSERT (dict);
+
+ cds_list_for_each_entry_safe (snap, tmp_snap, &priv->snapshots,
+ snap_list) {
+ snapcount++;
+ snprintf (key, sizeof (key), "snap%d", snapcount);
+ ret = glusterd_snapshot_get_snap_detail (dict, snap, key, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to get "
+ "snapdetail for snap %s", snap->snapname);
+ goto out;
+ }
+ }
+
+ ret = dict_set_int32 (dict, "snapcount", snapcount);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set snapcount");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glusterd_snapshot_get_info_by_volume (dict_t *dict, char *volname,
+ char *err_str, size_t len)
+{
+ int ret = -1;
+ int snapcount = 0;
+ int snap_limit = 0;
+ char *value = NULL;
+ char key[PATH_MAX] = "";
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_volinfo_t *tmp_vol = NULL;
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ uint64_t opt_hard_max = GLUSTERD_SNAPS_MAX_HARD_LIMIT;
+
+ this = THIS;
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ GF_ASSERT (dict);
+ GF_ASSERT (volname);
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (err_str, len, "Volume (%s) does not exist", volname);
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_VOL_NOT_FOUND, "%s", err_str);
+ goto out;
+ }
+
+ /* "snap-max-hard-limit" might not be set by user explicitly,
+ * in that case it's better to consider the default value.
+ * Hence not erroring out if Key is not found.
+ */
+ ret = dict_get_uint64 (conf->opts,
+ GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT,
+ &opt_hard_max);
+ if (ret) {
+ ret = 0;
+ gf_msg_debug (this->name, 0, "%s is not present in "
+ "opts dictionary",
+ GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT);
+ }
+
+ if (opt_hard_max < volinfo->snap_max_hard_limit) {
+ snap_limit = opt_hard_max;
+ gf_msg_debug (this->name, 0, "system snap-max-hard-limit is"
+ " lesser than volume snap-max-hard-limit, "
+ "snap-max-hard-limit value is set to %d", snap_limit);
+ } else {
+ snap_limit = volinfo->snap_max_hard_limit;
+ gf_msg_debug (this->name, 0, "volume snap-max-hard-limit is"
+ " lesser than system snap-max-hard-limit, "
+ "snap-max-hard-limit value is set to %d", snap_limit);
+ }
+
+ if (snap_limit > volinfo->snap_count)
+ ret = dict_set_int32 (dict, "snaps-available",
+ snap_limit - volinfo->snap_count);
+ else
+ ret = dict_set_int32 (dict, "snaps-available", 0);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set available snaps");
+ goto out;
+ }
+
+ /* Origin volume name */
+ value = gf_strdup (volinfo->volname);
+ if (!value)
+ goto out;
+
+ ret = dict_set_dynstr (dict, "origin-volname", value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set parent "
+ "volume name in dictionary: %s", key);
+ goto out;
+ }
+ value = NULL;
+
+ cds_list_for_each_entry_safe (snap_vol, tmp_vol, &volinfo->snap_volumes,
+ snapvol_list) {
+ snapcount++;
+ snprintf (key, sizeof (key), "snap%d", snapcount);
+ ret = glusterd_snapshot_get_snap_detail (dict,
+ snap_vol->snapshot,
+ key, snap_vol);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to get "
+ "snapdetail for snap %s",
+ snap_vol->snapshot->snapname);
+ goto out;
+ }
+ }
+ ret = dict_set_int32 (dict, "snapcount", snapcount);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set snapcount");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (value)
+ GF_FREE (value);
+
+ return ret;
+}
+
+/* This function will be called from RPC handler routine.
+ * This function is responsible for getting the requested
+ * snapshot info into the dictionary.
+ *
+ * @param req RPC request object. Required for sending a response back.
+ * @param op glusterd operation. Required for sending a response back.
+ * @param dict pointer to dictionary which will contain both
+ * request and response key-pair values.
+ * @return -1 on error and 0 on success
+ */
+int
+glusterd_handle_snapshot_info (rpcsvc_request_t *req, glusterd_op_t op,
+ dict_t *dict, char *err_str, size_t len)
+{
+ int ret = -1;
+ int8_t snap_driven = 1;
+ char *volname = NULL;
+ char *snapname = NULL;
+ glusterd_snap_t *snap = NULL;
+ xlator_t *this = NULL;
+ int32_t cmd = GF_SNAP_INFO_TYPE_ALL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_VALIDATE_OR_GOTO (this->name, req, out);
+ GF_VALIDATE_OR_GOTO (this->name, dict, out);
+
+
+ ret = dict_get_int32 (dict, "sub-cmd", &cmd);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to get type "
+ "of snapshot info");
+ goto out;
+ }
+
+ switch (cmd) {
+ case GF_SNAP_INFO_TYPE_ALL:
+ {
+ ret = glusterd_snapshot_get_all_snap_info (dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get info of all snaps");
+ goto out;
+ }
+ break;
+ }
+
+ case GF_SNAP_INFO_TYPE_SNAP:
+ {
+ ret = dict_get_str (dict, "snapname", &snapname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get snap name");
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, "snapcount", 1);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set snapcount");
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name (snapname);
+ if (!snap) {
+ snprintf (err_str, len,
+ "Snapshot (%s) does not exist",
+ snapname);
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_SNAP_NOT_FOUND,
+ "%s", err_str);
+ ret = -1;
+ goto out;
+ }
+ ret = glusterd_snapshot_get_snap_detail (dict, snap,
+ "snap1", NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_NOT_FOUND,
+ "Failed to get snap detail of snap "
+ "%s", snap->snapname);
+ goto out;
+ }
+ break;
+ }
+
+ case GF_SNAP_INFO_TYPE_VOL:
+ {
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_FOUND,
+ "Failed to get volname");
+ goto out;
+ }
+ ret = glusterd_snapshot_get_info_by_volume (dict,
+ volname, err_str, len);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_VOL_NOT_FOUND,
+ "Failed to get volume info of volume "
+ "%s", volname);
+ goto out;
+ }
+ snap_driven = 0;
+ break;
+ }
+ }
+
+ ret = dict_set_int8 (dict, "snap-driven", snap_driven);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set snap-driven");
+ goto out;
+ }
+
+ /* If everything is successful then send the response back to cli.
+ * In case of failure the caller of this function will take care
+ of the response */
+ ret = glusterd_op_send_cli_response (op, 0, 0, req, dict, err_str);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_NO_CLI_RESP, "Failed to send cli "
+ "response");
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+/* This function sets all the snapshot names in the dictionary */
+int
+glusterd_snapshot_get_all_snapnames (dict_t *dict)
+{
+ int ret = -1;
+ int snapcount = 0;
+ char *snapname = NULL;
+ char key[PATH_MAX] = {0,};
+ glusterd_snap_t *snap = NULL;
+ glusterd_snap_t *tmp_snap = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (dict);
+
+ cds_list_for_each_entry_safe (snap, tmp_snap, &priv->snapshots,
+ snap_list) {
+ snapcount++;
+ snapname = gf_strdup (snap->snapname);
+ if (!snapname) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY, "strdup failed");
+ ret = -1;
+ goto out;
+ }
+ snprintf (key, sizeof (key), "snapname%d", snapcount);
+ ret = dict_set_dynstr (dict, key, snapname);
+ if (ret) {
+ GF_FREE (snapname);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set %s",
+ key);
+ goto out;
+ }
+ }
+
+ ret = dict_set_int32 (dict, "snapcount", snapcount);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set snapcount");
+ goto out;
+ }
+
+ ret = 0;
+out:
+
+ return ret;
+}
+
+/* This function sets all the snapshot names
+ under a given volume in the dictionary */
+int
+glusterd_snapshot_get_vol_snapnames (dict_t *dict, glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ int snapcount = 0;
+ char *snapname = NULL;
+ char key[PATH_MAX] = {0,};
+ glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_volinfo_t *tmp_vol = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (dict);
+ GF_ASSERT (volinfo);
+
+ cds_list_for_each_entry_safe (snap_vol, tmp_vol,
+ &volinfo->snap_volumes, snapvol_list) {
+ snapcount++;
+ snprintf (key, sizeof (key), "snapname%d", snapcount);
+
+ ret = dict_set_dynstr_with_alloc (dict, key,
+ snap_vol->snapshot->snapname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to "
+ "set %s", key);
+ GF_FREE (snapname);
+ goto out;
+ }
+ }
+
+ ret = dict_set_int32 (dict, "snapcount", snapcount);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set snapcount");
+ goto out;
+ }
+
+ ret = 0;
+out:
+
+ return ret;
+}
+
+int
+glusterd_handle_snapshot_list (rpcsvc_request_t *req, glusterd_op_t op,
+ dict_t *dict, char *err_str, size_t len,
+ uint32_t *op_errno)
+{
+ int ret = -1;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ GF_VALIDATE_OR_GOTO (this->name, req, out);
+ GF_VALIDATE_OR_GOTO (this->name, dict, out);
+ GF_VALIDATE_OR_GOTO (this->name, op_errno, out);
+
+ /* Ignore error for getting volname as it is optional */
+ ret = dict_get_str (dict, "volname", &volname);
+
+ if (NULL == volname) {
+ ret = glusterd_snapshot_get_all_snapnames (dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_SNAP_LIST_GET_FAIL,
+ "Failed to get snapshot list");
+ goto out;
+ }
+ } else {
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (err_str, len,
+ "Volume (%s) does not exist", volname);
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_VOL_NOT_FOUND,
+ "%s", err_str);
+ *op_errno = EG_NOVOL;
+ goto out;
+ }
+
+ ret = glusterd_snapshot_get_vol_snapnames (dict, volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_SNAP_LIST_GET_FAIL,
+ "Failed to get snapshot list for volume %s",
+ volname);
+ goto out;
+ }
+ }
+
+ /* If everything is successful then send the response back to cli.
+ In case of failure the caller of this function will take of response.*/
+ ret = glusterd_op_send_cli_response (op, 0, 0, req, dict, err_str);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_NO_CLI_RESP, "Failed to send cli "
+ "response");
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+/* This is a snapshot create handler function. This function will be
+ * executed in the originator node. This function is responsible for
+ * calling mgmt_v3 framework to do the actual snap creation on all the bricks
+ *
+ * @param req RPC request object
+ * @param op gluster operation
+ * @param dict dictionary containing snapshot restore request
+ * @param err_str In case of an err this string should be populated
+ * @param len length of err_str buffer
+ *
+ * @return Negative value on Failure and 0 in success
+ */
+int
+glusterd_handle_snapshot_create (rpcsvc_request_t *req, glusterd_op_t op,
+ dict_t *dict, char *err_str, size_t len)
+{
+ int ret = -1;
+ char *volname = NULL;
+ char *snapname = NULL;
+ int64_t volcount = 0;
+ xlator_t *this = NULL;
+ char key[PATH_MAX] = "";
+ char *username = NULL;
+ char *password = NULL;
+ uuid_t *uuid_ptr = NULL;
+ uuid_t tmp_uuid = {0};
+ int i = 0;
+ gf_boolean_t timestamp = _gf_false;
+ char snap_volname[GD_VOLUME_NAME_MAX] = {0, };
+ char new_snapname[GLUSTERD_MAX_SNAP_NAME] = {0, };
+ char gmt_snaptime[GLUSTERD_MAX_SNAP_NAME] = {0, };
+ time_t snap_time;
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+ GF_ASSERT (dict);
+ GF_ASSERT (err_str);
+
+ ret = dict_get_int64 (dict, "volcount", &volcount);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "failed to "
+ "get the volume count");
+ goto out;
+ }
+ if (volcount <= 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "Invalid volume count %"PRId64
+ " supplied", volcount);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "snapname", &snapname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "failed to get the snapname");
+ goto out;
+ }
+
+ timestamp = dict_get_str_boolean (dict, "no-timestamp", _gf_false);
+ if (timestamp == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get "
+ "no-timestamp flag ");
+ goto out;
+ }
+
+ ret = dict_set_int64 (dict, "snap-time", (int64_t)time(&snap_time));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Unable to set snap-time");
+ goto out;
+ }
+
+ if (!timestamp) {
+ strftime (gmt_snaptime, sizeof (gmt_snaptime),
+ "_GMT-%Y.%m.%d-%H.%M.%S", gmtime(&snap_time));
+ snprintf (new_snapname, sizeof (new_snapname), "%s%s",
+ snapname, gmt_snaptime);
+ ret = dict_set_dynstr_with_alloc (dict, "snapname",
+ new_snapname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Unable to update "
+ "snap-name");
+ goto out;
+ }
+ snapname = new_snapname;
+ }
+
+ if (strlen(snapname) >= GLUSTERD_MAX_SNAP_NAME) {
+ snprintf (err_str, len, "snapname cannot exceed 255 "
+ "characters");
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "%s", err_str);
+ ret = -1;
+ goto out;
+ }
+
+ uuid_ptr = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t);
+ if (!uuid_ptr) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY, "Out Of Memory");
+ ret = -1;
+ goto out;
+ }
+
+ gf_uuid_generate (*uuid_ptr);
+ ret = dict_set_bin (dict, "snap-id", uuid_ptr, sizeof(uuid_t));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Unable to set snap-id");
+ GF_FREE (uuid_ptr);
+ goto out;
+ }
+ uuid_ptr = NULL;
+
+ for (i = 1; i <= volcount; i++) {
+ snprintf (key, sizeof (key), "volname%d", i);
+ ret = dict_get_str (dict, key, &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get volume name");
+ goto out;
+ }
+
+ /* generate internal username and password for the snap*/
+ gf_uuid_generate (tmp_uuid);
+ username = gf_strdup (uuid_utoa (tmp_uuid));
+ snprintf (key, sizeof(key), "volume%d_username", i);
+ ret = dict_set_dynstr (dict, key, username);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set snap "
+ "username for volume %s", volname);
+ GF_FREE (username);
+ goto out;
+ }
+
+ gf_uuid_generate (tmp_uuid);
+ password = gf_strdup (uuid_utoa (tmp_uuid));
+ snprintf (key, sizeof(key), "volume%d_password", i);
+ ret = dict_set_dynstr (dict, key, password);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set snap "
+ "password for volume %s", volname);
+ GF_FREE (password);
+ goto out;
+ }
+
+ uuid_ptr = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t);
+ if (!uuid_ptr) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY, "Out Of Memory");
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (key, sizeof(key) - 1, "vol%d_volid", i);
+ gf_uuid_generate (*uuid_ptr);
+ ret = dict_set_bin (dict, key, uuid_ptr, sizeof(uuid_t));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to set snap_volid");
+ GF_FREE (uuid_ptr);
+ goto out;
+ }
+ GLUSTERD_GET_UUID_NOHYPHEN (snap_volname, *uuid_ptr);
+ snprintf (key, sizeof (key), "snap-volname%d", i);
+ ret = dict_set_dynstr_with_alloc (dict, key, snap_volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to set snap volname");
+ GF_FREE (uuid_ptr);
+ goto out;
+ }
+ }
+
+ ret = glusterd_mgmt_v3_initiate_snap_phases (req, op, dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_INIT_FAIL,
+ "Failed to initiate snap "
+ "phases");
+ }
+
+out:
+ return ret;
+}
+
+/* This is a snapshot status handler function. This function will be
+ * executed in a originator node. This function is responsible for
+ * calling mgmt v3 framework to get the actual snapshot status from
+ * all the bricks
+ *
+ * @param req RPC request object
+ * @param op gluster operation
+ * @param dict dictionary containing snapshot status request
+ * @param err_str In case of an err this string should be populated
+ * @param len length of err_str buffer
+ *
+ * return : 0 in case of success.
+ * -1 in case of failure.
+ *
+ */
+int
+glusterd_handle_snapshot_status (rpcsvc_request_t *req, glusterd_op_t op,
+ dict_t *dict, char *err_str, size_t len)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (req);
+ GF_ASSERT (dict);
+ GF_ASSERT (err_str);
+
+
+ ret = glusterd_mgmt_v3_initiate_snap_phases (req, op, dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_INIT_FAIL, "Failed to initiate "
+ "snap phases");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/* This is a snapshot clone handler function. This function will be
+ * executed in the originator node. This function is responsible for
+ * calling mgmt_v3 framework to do the actual snap clone on all the bricks
+ *
+ * @param req RPC request object
+ * @param op gluster operation
+ * @param dict dictionary containing snapshot restore request
+ * @param err_str In case of an err this string should be populated
+ * @param len length of err_str buffer
+ *
+ * @return Negative value on Failure and 0 in success
+ */
+int
+glusterd_handle_snapshot_clone (rpcsvc_request_t *req, glusterd_op_t op,
+ dict_t *dict, char *err_str, size_t len)
+{
+ int ret = -1;
+ char *clonename = NULL;
+ char *snapname = NULL;
+ int64_t volcount = 0;
+ xlator_t *this = NULL;
+ char key[PATH_MAX] = "";
+ char *username = NULL;
+ char *password = NULL;
+ char *volname = NULL;
+ uuid_t *uuid_ptr = NULL;
+ uuid_t tmp_uuid = {0};
+ int i = 0;
+ char snap_volname[GD_VOLUME_NAME_MAX] = {0, };
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+ GF_ASSERT (dict);
+ GF_ASSERT (err_str);
+
+ ret = dict_get_str (dict, "clonename", &clonename);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "failed to "
+ "get the clone name");
+ goto out;
+ }
+ /*We need to take a volume lock on clone name*/
+ volname = gf_strdup (clonename);
+ snprintf (key, sizeof(key), "volname1");
+ ret = dict_set_dynstr (dict, key, volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set clone "
+ "name for volume locking");
+ GF_FREE (volname);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "snapname", &snapname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "failed to get the snapname");
+ goto out;
+ }
+
+ uuid_ptr = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t);
+ if (!uuid_ptr) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY, "Out Of Memory");
+ ret = -1;
+ goto out;
+ }
+
+ gf_uuid_generate (*uuid_ptr);
+ ret = dict_set_bin (dict, "clone-id", uuid_ptr, sizeof(uuid_t));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Unable to set clone-id");
+ GF_FREE (uuid_ptr);
+ goto out;
+ }
+ uuid_ptr = NULL;
+
+ ret = dict_get_str (dict, "snapname", &snapname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get snapname name");
+ goto out;
+ }
+
+ gf_uuid_generate (tmp_uuid);
+ username = gf_strdup (uuid_utoa (tmp_uuid));
+ snprintf (key, sizeof(key), "volume1_username");
+ ret = dict_set_dynstr (dict, key, username);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set clone "
+ "username for volume %s", clonename);
+ GF_FREE (username);
+ goto out;
+ }
+
+ gf_uuid_generate (tmp_uuid);
+ password = gf_strdup (uuid_utoa (tmp_uuid));
+ snprintf (key, sizeof(key), "volume1_password");
+ ret = dict_set_dynstr (dict, key, password);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set clone "
+ "password for volume %s", clonename);
+ GF_FREE (password);
+ goto out;
+ }
+
+ uuid_ptr = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t);
+ if (!uuid_ptr) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY, "Out Of Memory");
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (key, sizeof(key) - 1, "vol1_volid");
+ gf_uuid_generate (*uuid_ptr);
+ ret = dict_set_bin (dict, key, uuid_ptr, sizeof(uuid_t));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to set clone_volid");
+ GF_FREE (uuid_ptr);
+ goto out;
+ }
+ snprintf (key, sizeof (key), "clone-volname%d", i);
+ ret = dict_set_dynstr_with_alloc (dict, key, snap_volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to set snap volname");
+ GF_FREE (uuid_ptr);
+ goto out;
+ }
+
+ ret = glusterd_mgmt_v3_initiate_snap_phases (req, op, dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_INIT_FAIL, "Failed to initiate "
+ "snap phases");
+ }
+
+out:
+ return ret;
+}
+
+
+/* This is a snapshot restore handler function. This function will be
+ * executed in the originator node. This function is responsible for
+ * calling mgmt_v3 framework to do the actual restore on all the bricks
+ *
+ * @param req RPC request object
+ * @param op gluster operation
+ * @param dict dictionary containing snapshot restore request
+ * @param err_str In case of an err this string should be populated
+ * @param len length of err_str buffer
+ *
+ * @return Negative value on Failure and 0 in success
+ */
+int
+glusterd_handle_snapshot_restore (rpcsvc_request_t *req, glusterd_op_t op,
+ dict_t *dict, char *err_str,
+ uint32_t *op_errno, size_t len)
+{
+ int ret = -1;
+ char *snapname = NULL;
+ char *buf = NULL;
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_volinfo_t *snap_volinfo = NULL;
+ int32_t i = 0;
+ char key[PATH_MAX] = "";
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+
+ GF_ASSERT (conf);
+ GF_ASSERT (req);
+ GF_ASSERT (dict);
+ GF_ASSERT (err_str);
+
+ ret = dict_get_str (dict, "snapname", &snapname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to "
+ "get snapname");
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name (snapname);
+ if (!snap) {
+ snprintf (err_str, len, "Snapshot (%s) does not exist",
+ snapname);
+ *op_errno = EG_NOSNAP;
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_SNAP_NOT_FOUND, "%s", err_str);
+ ret = -1;
+ goto out;
+ }
+
+ list_for_each_entry (snap_volinfo, &snap->volumes, vol_list) {
+ i++;
+ snprintf (key, sizeof (key), "volname%d", i);
+ buf = gf_strdup (snap_volinfo->parent_volname);
+ if (!buf) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_dynstr (dict, key, buf);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Could not set "
+ "parent volume name %s in the dict",
+ snap_volinfo->parent_volname);
+ GF_FREE (buf);
+ goto out;
+ }
+ buf = NULL;
+ }
+
+ ret = dict_set_int32 (dict, "volcount", i);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Could not save volume count");
+ goto out;
+ }
+
+ ret = glusterd_mgmt_v3_initiate_snap_phases (req, op, dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_INIT_FAIL,
+ "Failed to initiate snap phases");
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+glusterd_snap_t*
+glusterd_create_snap_object (dict_t *dict, dict_t *rsp_dict)
+{
+ char *snapname = NULL;
+ uuid_t *snap_id = NULL;
+ char *description = NULL;
+ glusterd_snap_t *snap = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ int ret = -1;
+ int64_t time_stamp = 0;
+
+ this = THIS;
+ priv = this->private;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (rsp_dict);
+
+ /* Fetch snapname, description, id and time from dict */
+ ret = dict_get_str (dict, "snapname", &snapname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to fetch snapname");
+ goto out;
+ }
+
+ /* Ignore ret value for description*/
+ ret = dict_get_str (dict, "description", &description);
+
+ ret = dict_get_bin (dict, "snap-id", (void **)&snap_id);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to fetch snap_id");
+ goto out;
+ }
+
+ ret = dict_get_int64 (dict, "snap-time", &time_stamp);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to fetch snap-time");
+ goto out;
+ }
+ if (time_stamp <= 0) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "Invalid time-stamp: %"PRId64,
+ time_stamp);
+ goto out;
+ }
+
+ cds_list_for_each_entry (snap, &priv->snapshots, snap_list) {
+ if (!strcmp (snap->snapname, snapname) ||
+ !gf_uuid_compare (snap->snap_id, *snap_id)) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_CREATION_FAIL,
+ "Found duplicate snap %s (%s)",
+ snap->snapname, uuid_utoa (snap->snap_id));
+ ret = -1;
+ break;
+ }
+ }
+ if (ret) {
+ snap = NULL;
+ goto out;
+ }
+
+ snap = glusterd_new_snap_object ();
+ if (!snap) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_CREATION_FAIL, "Could not create "
+ "the snap object for snap %s", snapname);
+ goto out;
+ }
+
+ strcpy (snap->snapname, snapname);
+ gf_uuid_copy (snap->snap_id, *snap_id);
+ snap->time_stamp = (time_t)time_stamp;
+ /* Set the status as GD_SNAP_STATUS_INIT and once the backend snapshot
+ is taken and snap is really ready to use, set the status to
+ GD_SNAP_STATUS_IN_USE. This helps in identifying the incomplete
+ snapshots and cleaning them up.
+ */
+ snap->snap_status = GD_SNAP_STATUS_INIT;
+ if (description) {
+ snap->description = gf_strdup (description);
+ if (snap->description == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_CREATION_FAIL,
+ "Saving the Snapshot Description Failed");
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = glusterd_store_snap (snap);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_CREATION_FAIL, "Could not store snap"
+ "object %s", snap->snapname);
+ goto out;
+ }
+
+ glusterd_list_add_order (&snap->snap_list, &priv->snapshots,
+ glusterd_compare_snap_time);
+
+ gf_msg_trace (this->name, 0, "Snapshot %s added to the list",
+ snap->snapname);
+
+ ret = 0;
+
+out:
+ if (ret) {
+ if (snap)
+ glusterd_snap_remove (rsp_dict, snap,
+ _gf_true, _gf_true,
+ _gf_false);
+ snap = NULL;
+ }
+
+ return snap;
+}
+
+/* Added missed_snap_entry to rsp_dict */
+int32_t
+glusterd_add_missed_snaps_to_dict (dict_t *rsp_dict,
+ glusterd_volinfo_t *snap_vol,
+ glusterd_brickinfo_t *brickinfo,
+ int32_t brick_number, int32_t op)
+{
+ char *snap_uuid = NULL;
+ char missed_snap_entry[PATH_MAX] = "";
+ char name_buf[PATH_MAX] = "";
+ int32_t missed_snap_count = -1;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (snap_vol);
+ GF_ASSERT (brickinfo);
+
+ snap_uuid = gf_strdup (uuid_utoa (snap_vol->snapshot->snap_id));
+ if (!snap_uuid) {
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (missed_snap_entry, sizeof(missed_snap_entry),
+ "%s:%s=%s:%d:%s:%d:%d", uuid_utoa(brickinfo->uuid),
+ snap_uuid, snap_vol->volname, brick_number, brickinfo->path,
+ op, GD_MISSED_SNAP_PENDING);
+
+ /* Fetch the missed_snap_count from the dict */
+ ret = dict_get_int32 (rsp_dict, "missed_snap_count",
+ &missed_snap_count);
+ if (ret) {
+ /* Initialize the missed_snap_count for the first time */
+ missed_snap_count = 0;
+ }
+
+ /* Setting the missed_snap_entry in the rsp_dict */
+ snprintf (name_buf, sizeof(name_buf), "missed_snaps_%d",
+ missed_snap_count);
+ ret = dict_set_dynstr_with_alloc (rsp_dict, name_buf,
+ missed_snap_entry);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set missed_snap_entry (%s) "
+ "in the rsp_dict.", missed_snap_entry);
+ goto out;
+ }
+ missed_snap_count++;
+
+ /* Setting the new missed_snap_count in the dict */
+ ret = dict_set_int32 (rsp_dict, "missed_snap_count",
+ missed_snap_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set missed_snap_count for %s "
+ "in the rsp_dict.", missed_snap_entry);
+ goto out;
+ }
+
+out:
+ if (snap_uuid)
+ GF_FREE (snap_uuid);
+
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* This function actually calls the command (or the API) for taking the
+ snapshot of the backend brick filesystem. If this is successful,
+ then call the glusterd_snap_create function to create the snap object
+ for glusterd
+*/
+int32_t
+glusterd_take_lvm_snapshot (glusterd_brickinfo_t *brickinfo,
+ char *origin_brick_path)
+{
+ char msg[NAME_MAX] = "";
+ char buf[PATH_MAX] = "";
+ char *ptr = NULL;
+ char *origin_device = NULL;
+ int ret = -1;
+ gf_boolean_t match = _gf_false;
+ runner_t runner = {0,};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (brickinfo);
+ GF_ASSERT (origin_brick_path);
+
+ origin_device = glusterd_get_brick_mount_device (origin_brick_path);
+ if (!origin_device) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_GET_INFO_FAIL, "getting device name for "
+ "the brick %s failed", origin_brick_path);
+ goto out;
+ }
+
+ /* Figuring out if setactivationskip flag is supported or not */
+ runinit (&runner);
+ snprintf (msg, sizeof (msg), "running lvcreate help");
+ runner_add_args (&runner, LVM_CREATE, "--help", NULL);
+ runner_log (&runner, "", GF_LOG_DEBUG, msg);
+ runner_redir (&runner, STDOUT_FILENO, RUN_PIPE);
+ ret = runner_start (&runner);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_LVCREATE_FAIL,
+ "Failed to run lvcreate help");
+ runner_end (&runner);
+ goto out;
+ }
+
+ /* Looking for setactivationskip in lvcreate --help */
+ do {
+ ptr = fgets(buf, sizeof(buf),
+ runner_chio (&runner, STDOUT_FILENO));
+ if (ptr) {
+ if (strstr(buf, "setactivationskip")) {
+ match = _gf_true;
+ break;
+ }
+ }
+ } while (ptr != NULL);
+ runner_end (&runner);
+
+ /* Taking the actual snapshot */
+ runinit (&runner);
+ snprintf (msg, sizeof (msg), "taking snapshot of the brick %s",
+ origin_brick_path);
+ if (match == _gf_true)
+ runner_add_args (&runner, LVM_CREATE, "-s", origin_device,
+ "--setactivationskip", "n", "--name",
+ brickinfo->device_path, NULL);
+ else
+ runner_add_args (&runner, LVM_CREATE, "-s", origin_device,
+ "--name", brickinfo->device_path, NULL);
+ runner_log (&runner, this->name, GF_LOG_DEBUG, msg);
+ ret = runner_run (&runner);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_CREATION_FAIL, "taking snapshot of the "
+ "brick (%s) of device %s failed",
+ origin_brick_path, origin_device);
+ }
+
+out:
+ return ret;
+}
+
+int32_t
+glusterd_snap_brick_create (glusterd_volinfo_t *snap_volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ int32_t brick_count)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ char snap_brick_mount_path[PATH_MAX] = "";
+ struct stat statbuf = {0, };
+
+ this = THIS;
+ priv = this->private;
+
+ GF_ASSERT (snap_volinfo);
+ GF_ASSERT (brickinfo);
+
+ snprintf (snap_brick_mount_path, sizeof (snap_brick_mount_path),
+ "%s/%s/brick%d", snap_mount_dir, snap_volinfo->volname,
+ brick_count + 1);
+
+ ret = mkdir_p (snap_brick_mount_path, 0777, _gf_true);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DIR_OP_FAILED,
+ "creating the brick directory"
+ " %s for the snapshot %s(device: %s) failed",
+ snap_brick_mount_path, snap_volinfo->volname,
+ brickinfo->device_path);
+ goto out;
+ }
+ /* mount the snap logical device on the directory inside
+ /run/gluster/snaps/<snapname>/@snap_brick_mount_path
+ Way to mount the snap brick via mount api is this.
+ ret = mount (device, snap_brick_mount_path, entry->mnt_type,
+ MS_MGC_VAL, "nouuid");
+ But for now, mounting using runner apis.
+ */
+ ret = glusterd_mount_lvm_snapshot (brickinfo, snap_brick_mount_path);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_LVM_MOUNT_FAILED,
+ "Failed to mount lvm snapshot.");
+ goto out;
+ }
+
+ ret = sys_stat (brickinfo->path, &statbuf);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ GD_MSG_FILE_OP_FAILED,
+ "stat of the brick %s"
+ "(brick mount: %s) failed (%s)", brickinfo->path,
+ snap_brick_mount_path, strerror (errno));
+ goto out;
+ }
+ ret = sys_lsetxattr (brickinfo->path,
+ GF_XATTR_VOL_ID_KEY,
+ snap_volinfo->volume_id, 16,
+ XATTR_REPLACE);
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_SETXATTR_FAIL, "Failed to set "
+ "extended attribute %s on %s. Reason: "
+ "%s, snap: %s", GF_XATTR_VOL_ID_KEY,
+ brickinfo->path, strerror (errno),
+ snap_volinfo->volname);
+ goto out;
+ }
+
+out:
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_UMOUNTING_SNAP_BRICK, "unmounting the snap brick"
+ " mount %s", snap_brick_mount_path);
+ /*umount2 system call doesn't cleanup mtab entry after un-mount.
+ So use external umount command*/
+ glusterd_umount (snap_brick_mount_path);
+ }
+
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+static int32_t
+glusterd_add_brick_to_snap_volume (dict_t *dict, dict_t *rsp_dict,
+ glusterd_volinfo_t *snap_vol,
+ glusterd_brickinfo_t *original_brickinfo,
+ int64_t volcount, int32_t brick_count,
+ int clone)
+{
+ char key[PATH_MAX] = "";
+ char *value = NULL;
+ char *snap_brick_dir = NULL;
+ char snap_brick_path[PATH_MAX] = "";
+ char *snap_device = NULL;
+ glusterd_brickinfo_t *snap_brickinfo = NULL;
+ gf_boolean_t add_missed_snap = _gf_false;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ char abspath[PATH_MAX] = {0};
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (snap_vol);
+ GF_ASSERT (original_brickinfo);
+
+ snprintf (key, sizeof(key), "vol%"PRId64".origin_brickpath%d",
+ volcount, brick_count);
+ ret = dict_set_dynstr_with_alloc (dict, key, original_brickinfo->path);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set %s", key);
+ goto out;
+ }
+
+ ret = glusterd_brickinfo_new (&snap_brickinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_NEW_INFO_FAIL,
+ "initializing the brick for the snap "
+ "volume failed (snapname: %s)",
+ snap_vol->snapshot->snapname);
+ goto out;
+ }
+
+ snprintf (key, sizeof(key) - 1, "vol%"PRId64".fstype%d", volcount,
+ brick_count);
+ ret = dict_get_str (dict, key, &value);
+ if (!ret) {
+ /* Update the fstype in original brickinfo as well */
+ strcpy (original_brickinfo->fstype, value);
+ strncpy (snap_brickinfo->fstype, value,
+ (sizeof (snap_brickinfo->fstype) - 1));
+ } else {
+ if (is_origin_glusterd (dict) == _gf_true)
+ add_missed_snap = _gf_true;
+ }
+
+ snprintf (key, sizeof(key) - 1, "vol%"PRId64".mnt_opts%d", volcount,
+ brick_count);
+ ret = dict_get_str (dict, key, &value);
+ if (!ret) {
+ /* Update the mnt_opts in original brickinfo as well */
+ strcpy (original_brickinfo->mnt_opts, value);
+ strcpy (snap_brickinfo->mnt_opts, value);
+ } else {
+ if (is_origin_glusterd (dict) == _gf_true)
+ add_missed_snap = _gf_true;
+ }
+
+ snprintf (key, sizeof(key) - 1, "vol%"PRId64".brickdir%d", volcount,
+ brick_count);
+ ret = dict_get_str (dict, key, &snap_brick_dir);
+ if (ret) {
+ /* Using original brickinfo here because it will be a
+ * pending snapshot and storing the original brickinfo
+ * will help in mapping while recreating the missed snapshot
+ */
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_NOT_FOUND, "Unable to fetch "
+ "snap mount path(%s). Adding to missed_snap_list", key);
+ snap_brickinfo->snap_status = -1;
+
+ snap_brick_dir = original_brickinfo->mount_dir;
+
+ /* In origiator node add snaps missed
+ * from different nodes to the dict
+ */
+ if (is_origin_glusterd (dict) == _gf_true)
+ add_missed_snap = _gf_true;
+ }
+
+ if ((snap_brickinfo->snap_status != -1) &&
+ (!gf_uuid_compare (original_brickinfo->uuid, MY_UUID)) &&
+ (!glusterd_is_brick_started (original_brickinfo))) {
+ /* In case if the brick goes down after prevalidate. */
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_BRICK_DISCONNECTED, "brick %s:%s is not"
+ " started (snap: %s)",
+ original_brickinfo->hostname,
+ original_brickinfo->path,
+ snap_vol->snapshot->snapname);
+
+ snap_brickinfo->snap_status = -1;
+ add_missed_snap = _gf_true;
+ }
+
+ if (add_missed_snap) {
+ ret = glusterd_add_missed_snaps_to_dict (rsp_dict,
+ snap_vol,
+ original_brickinfo,
+ brick_count + 1,
+ GF_SNAP_OPTION_TYPE_CREATE);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MISSEDSNAP_INFO_SET_FAIL,
+ "Failed to add missed"
+ " snapshot info for %s:%s in the rsp_dict",
+ original_brickinfo->hostname,
+ original_brickinfo->path);
+ goto out;
+ }
+ }
+
+ /* Create brick-path in the format /var/run/gluster/snaps/ *
+ * <snap-uuid>/<original-brick#>/snap-brick-dir *
+ */
+ snprintf (snap_brick_path, sizeof(snap_brick_path),
+ "%s/%s/brick%d%s", snap_mount_dir,
+ snap_vol->volname, brick_count+1,
+ snap_brick_dir);
+
+ snprintf (key, sizeof(key), "vol%"PRId64".brick_snapdevice%d",
+ volcount, brick_count);
+ ret = dict_get_str (dict, key, &snap_device);
+ if (ret) {
+ /* If the device name is empty, so will be the brick path
+ * Hence the missed snap has already been added above
+ */
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_NOT_FOUND, "Unable to fetch "
+ "snap device (%s). Leaving empty", key);
+ } else
+ strcpy (snap_brickinfo->device_path, snap_device);
+
+ ret = gf_canonicalize_path (snap_brick_path);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_CANONICALIZE_FAIL,
+ "Failed to canonicalize path");
+ goto out;
+ }
+
+ strcpy (snap_brickinfo->hostname, original_brickinfo->hostname);
+ strcpy (snap_brickinfo->path, snap_brick_path);
+
+ if (!realpath (snap_brick_path, abspath)) {
+ /* ENOENT indicates that brick path has not been created which
+ * is a valid scenario */
+ if (errno != ENOENT) {
+ gf_msg (this->name, GF_LOG_CRITICAL, errno,
+ GD_MSG_BRICKINFO_CREATE_FAIL, "realpath () "
+ "failed for brick %s. The underlying filesystem"
+ " may be in bad state", snap_brick_path);
+ ret = -1;
+ goto out;
+ }
+ }
+ strncpy (snap_brickinfo->real_path, abspath, strlen(abspath));
+
+ strcpy (snap_brickinfo->mount_dir, original_brickinfo->mount_dir);
+ gf_uuid_copy (snap_brickinfo->uuid, original_brickinfo->uuid);
+ /* AFR changelog names are based on brick_id and hence the snap
+ * volume's bricks must retain the same ID */
+ cds_list_add_tail (&snap_brickinfo->brick_list, &snap_vol->bricks);
+
+ if (clone) {
+ GLUSTERD_ASSIGN_BRICKID_TO_BRICKINFO (snap_brickinfo, snap_vol,
+ brick_count);
+ } else
+ strcpy (snap_brickinfo->brick_id, original_brickinfo->brick_id);
+
+out:
+ if (ret && snap_brickinfo)
+ GF_FREE (snap_brickinfo);
+
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* This function will update the file-system label of the
+ * backend snapshot brick.
+ *
+ * @param brickinfo brickinfo of the snap volume
+ *
+ * @return 0 on success and -1 on failure
+ */
+int
+glusterd_update_fs_label (glusterd_brickinfo_t *brickinfo)
+{
+ int32_t ret = -1;
+ char msg [PATH_MAX] = "";
+ char label [NAME_MAX] = "";
+ uuid_t uuid = {0,};
+ runner_t runner = {0,};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (brickinfo);
+
+ /* Generate a new UUID */
+ gf_uuid_generate (uuid);
+
+ GLUSTERD_GET_UUID_NOHYPHEN (label, uuid);
+
+ runinit (&runner);
+
+ /* Call the file-system specific tools to update the file-system
+ * label. Currently we are only supporting xfs and ext2/ext3/ext4
+ * file-system.
+ */
+ if (0 == strcmp (brickinfo->fstype, "xfs")) {
+ /* XFS label is of size 12. Therefore we should truncate the
+ * label to 12 bytes*/
+ label [12] = '\0';
+ snprintf (msg, sizeof (msg), "Changing filesystem label of "
+ "%s brick to %s", brickinfo->path, label);
+ /* Run the run xfs_admin tool to change the label
+ * of the file-system */
+ runner_add_args (&runner, "xfs_admin", "-L", label,
+ brickinfo->device_path, NULL);
+ } else if (0 == strcmp (brickinfo->fstype, "ext4") ||
+ 0 == strcmp (brickinfo->fstype, "ext3") ||
+ 0 == strcmp (brickinfo->fstype, "ext2")) {
+ /* Ext2/Ext3/Ext4 label is of size 16. Therefore we should
+ * truncate the label to 16 bytes*/
+ label [16] = '\0';
+ snprintf (msg, sizeof (msg), "Changing filesystem label of "
+ "%s brick to %s", brickinfo->path, label);
+ /* For ext2/ext3/ext4 run tune2fs to change the
+ * file-system label */
+ runner_add_args (&runner, "tune2fs", "-L", label,
+ brickinfo->device_path, NULL);
+ } else {
+ gf_msg (this->name, GF_LOG_WARNING, EOPNOTSUPP,
+ GD_MSG_OP_UNSUPPORTED, "Changing file-system "
+ "label of %s file-system is not supported as of now",
+ brickinfo->fstype);
+ runner_end (&runner);
+ ret = -1;
+ goto out;
+ }
+
+ runner_log (&runner, this->name, GF_LOG_DEBUG, msg);
+ ret = runner_run (&runner);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_FS_LABEL_UPDATE_FAIL, "Failed to change "
+ "filesystem label of %s brick to %s",
+ brickinfo->path, label);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int32_t
+glusterd_take_brick_snapshot (dict_t *dict, glusterd_volinfo_t *snap_vol,
+ glusterd_brickinfo_t *brickinfo,
+ int32_t volcount, int32_t brick_count)
+{
+ char *origin_brick_path = NULL;
+ char key[PATH_MAX] = "";
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (snap_vol);
+ GF_ASSERT (brickinfo);
+
+ if (strlen(brickinfo->device_path) == 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "Device path is empty "
+ "brick %s:%s", brickinfo->hostname, brickinfo->path);
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (key, sizeof(key) - 1, "vol%d.origin_brickpath%d", volcount,
+ brick_count);
+ ret = dict_get_str (dict, key, &origin_brick_path);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to fetch "
+ "brick path (%s)", key);
+ goto out;
+ }
+
+ ret = glusterd_take_lvm_snapshot (brickinfo, origin_brick_path);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_CREATION_FAIL, "Failed to take snapshot of "
+ "brick %s:%s", brickinfo->hostname, origin_brick_path);
+ goto out;
+ }
+
+ /* After the snapshot both the origin brick (LVM brick) and
+ * the snapshot brick will have the same file-system label. This
+ * will cause lot of problems at mount time. Therefore we must
+ * generate a new label for the snapshot brick
+ */
+ ret = glusterd_update_fs_label (brickinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_FS_LABEL_UPDATE_FAIL, "Failed to update "
+ "file-system label for %s brick", brickinfo->path);
+ /* Failing to update label should not cause snapshot failure.
+ * Currently label is updated only for XFS and ext2/ext3/ext4
+ * file-system.
+ */
+ }
+
+ /* create the complete brick here */
+ ret = glusterd_snap_brick_create (snap_vol, brickinfo, brick_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_CREATION_FAIL, "not able to"
+ " create the brick for the snap %s"
+ ", volume %s", snap_vol->snapshot->snapname,
+ snap_vol->volname);
+ goto out;
+ }
+
+out:
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_snap_clear_unsupported_opt (glusterd_volinfo_t *volinfo,
+ struct gd_snap_unsupported_opt_t *unsupported_opt)
+{
+ int ret = -1;
+ int i = 0;
+
+ GF_VALIDATE_OR_GOTO ("glusterd", volinfo, out);
+
+ for (i = 0; unsupported_opt[i].key; i++) {
+ glusterd_volinfo_get (volinfo, unsupported_opt[i].key,
+ &unsupported_opt[i].value);
+
+ if (unsupported_opt[i].value) {
+ unsupported_opt[i].value = gf_strdup (
+ unsupported_opt[i].value);
+ if (!unsupported_opt[i].value) {
+ ret = -1;
+ goto out;
+ }
+ dict_del (volinfo->dict, unsupported_opt[i].key);
+ }
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+glusterd_snap_set_unsupported_opt (glusterd_volinfo_t *volinfo,
+ struct gd_snap_unsupported_opt_t *unsupported_opt)
+{
+ int ret = -1;
+ int i = 0;
+
+ GF_VALIDATE_OR_GOTO ("glusterd", volinfo, out);
+
+ for (i = 0; unsupported_opt[i].key; i++) {
+ if (!unsupported_opt[i].value)
+ continue;
+
+ ret = dict_set_dynstr (volinfo->dict, unsupported_opt[i].key,
+ unsupported_opt[i].value);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_DICT_SET_FAILED, "dict set failed");
+ goto out;
+ }
+ unsupported_opt[i].value = NULL;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+glusterd_volinfo_t *
+glusterd_do_snap_vol (glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap,
+ dict_t *dict, dict_t *rsp_dict, int64_t volcount,
+ int clone)
+{
+ char key[PATH_MAX] = "";
+ char *username = NULL;
+ char *password = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *snap_vol = NULL;
+ uuid_t *snap_volid = NULL;
+ int32_t ret = -1;
+ int32_t brick_count = 0;
+ xlator_t *this = NULL;
+ int64_t brick_order = 0;
+ char *clonename = NULL;
+ gf_boolean_t conf_present = _gf_false;
+ int i = 0;
+
+ struct gd_snap_unsupported_opt_t unsupported_opt[] = {
+ {.key = VKEY_FEATURES_QUOTA,
+ .value = NULL},
+ {.key = VKEY_FEATURES_INODE_QUOTA,
+ .value = NULL},
+ {.key = "feature.deem-statfs",
+ .value = NULL},
+ {.key = "features.quota-deem-statfs",
+ .value = NULL},
+ {.key = NULL,
+ .value = NULL}
+ };
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (dict);
+ GF_ASSERT (origin_vol);
+ GF_ASSERT (rsp_dict);
+
+ /* fetch username, password and vol_id from dict*/
+ snprintf (key, sizeof(key), "volume%"PRId64"_username", volcount);
+ ret = dict_get_str (dict, key, &username);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to get %s for "
+ "snap %s", key, snap->snapname);
+ goto out;
+ }
+ snprintf (key, sizeof(key), "volume%"PRId64"_password", volcount);
+ ret = dict_get_str (dict, key, &password);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to get %s for "
+ "snap %s", key, snap->snapname);
+ goto out;
+ }
+
+ snprintf (key, sizeof(key) - 1, "vol%"PRId64"_volid", volcount);
+ ret = dict_get_bin (dict, key, (void **)&snap_volid);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch snap_volid");
+ goto out;
+ }
+
+ /* We are not setting the username and password here as
+ * we need to set the user name and password passed in
+ * the dictionary
+ */
+ ret = glusterd_volinfo_dup (origin_vol, &snap_vol, _gf_false);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_OP_FAILED, "Failed to duplicate volinfo "
+ "for the snapshot %s", snap->snapname);
+ goto out;
+ }
+
+ /* uuid is used as lvm snapshot name.
+ This will avoid restrictions on snapshot names provided by user */
+ gf_uuid_copy (snap_vol->volume_id, *snap_volid);
+ snap_vol->is_snap_volume = _gf_true;
+ snap_vol->snapshot = snap;
+
+ if (clone) {
+ snap_vol->is_snap_volume = _gf_false;
+ ret = dict_get_str (dict, "clonename", &clonename);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to get %s "
+ "for snap %s", key, snap->snapname);
+ goto out;
+ }
+ cds_list_add_tail (&snap_vol->vol_list, &snap->volumes);
+ strcpy(snap_vol->volname, clonename);
+ gf_uuid_copy (snap_vol->restored_from_snap,
+ origin_vol->snapshot->snap_id);
+
+ } else {
+ GLUSTERD_GET_UUID_NOHYPHEN (snap_vol->volname, *snap_volid);
+ strcpy (snap_vol->parent_volname, origin_vol->volname);
+ ret = glusterd_list_add_snapvol (origin_vol, snap_vol);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_LIST_SET_FAIL, "could not add the "
+ "snap volume %s to the list",
+ snap_vol->volname);
+ goto out;
+ }
+ /* TODO : Sync before taking a snapshot */
+ /* Copy the status and config files of geo-replication before
+ * taking a snapshot. During restore operation these files needs
+ * to be copied back in /var/lib/glusterd/georeplication/
+ */
+ ret = glusterd_copy_geo_rep_files (origin_vol, snap_vol,
+ rsp_dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_OP_FAILED, "Failed to copy "
+ "geo-rep config and status files for volume %s",
+ origin_vol->volname);
+ goto out;
+ }
+
+
+ }
+
+ ret = glusterd_copy_nfs_ganesha_file (origin_vol, snap_vol);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_OP_FAILED, "Failed to copy export "
+ "file for volume %s", origin_vol->volname);
+ goto out;
+ }
+ glusterd_auth_set_username (snap_vol, username);
+ glusterd_auth_set_password (snap_vol, password);
+
+ /* Adding snap brickinfos to the snap volinfo */
+ brick_count = 0;
+ cds_list_for_each_entry (brickinfo, &origin_vol->bricks, brick_list) {
+ ret = glusterd_add_brick_to_snap_volume (dict, rsp_dict,
+ snap_vol, brickinfo,
+ volcount, brick_count,
+ clone);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_ADD_FAIL,
+ "Failed to add the snap brick for "
+ "%s:%s to the snap volume",
+ brickinfo->hostname, brickinfo->path);
+ goto out;
+ }
+ brick_count++;
+ }
+
+
+ /* During snapshot creation if I/O is in progress,
+ * then barrier value is enabled. Hence during snapshot create
+ * and in-turn snapshot restore the barrier value is set to enable.
+ * Because of this further I/O on the mount point fails.
+ * Hence remove the barrier key from newly created snap volinfo
+ * before storing and generating the brick volfiles. Also update
+ * the snap vol's version after removing the barrier key.
+ */
+ dict_del (snap_vol->dict, "features.barrier");
+ gd_update_volume_op_versions (snap_vol);
+
+ ret = glusterd_store_volinfo (snap_vol,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLINFO_SET_FAIL, "Failed to store snapshot "
+ "volinfo (%s) for snap %s", snap_vol->volname,
+ snap->snapname);
+ goto out;
+ }
+
+ ret = glusterd_copy_quota_files (origin_vol, snap_vol, &conf_present);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_VOL_CONFIG_FAIL, "Failed to copy quota "
+ "config and cksum for volume %s", origin_vol->volname);
+ goto out;
+ }
+
+ if (snap_vol->is_snap_volume) {
+ ret = glusterd_snap_clear_unsupported_opt (snap_vol,
+ unsupported_opt);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_OP_FAILED, "Failed to clear quota "
+ "option for the snap %s (volume: %s)",
+ snap->snapname, origin_vol->volname);
+ goto out;
+ }
+ }
+
+ ret = generate_brick_volfiles (snap_vol);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLFILE_CREATE_FAIL, "generating the brick "
+ "volfiles for the snap %s (volume: %s) failed",
+ snap->snapname, origin_vol->volname);
+ goto reset_option;
+ }
+
+ ret = generate_client_volfiles (snap_vol, GF_CLIENT_TRUSTED);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLFILE_CREATE_FAIL, "generating the trusted "
+ "client volfiles for the snap %s (volume: %s) failed",
+ snap->snapname, origin_vol->volname);
+ goto reset_option;
+ }
+
+ ret = generate_client_volfiles (snap_vol, GF_CLIENT_OTHER);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLFILE_CREATE_FAIL, "generating the client "
+ "volfiles for the snap %s (volume: %s) failed",
+ snap->snapname, origin_vol->volname);
+ goto reset_option;
+ }
+
+reset_option:
+ if (snap_vol->is_snap_volume) {
+ if (glusterd_snap_set_unsupported_opt (snap_vol,
+ unsupported_opt)) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_OP_FAILED, "Failed to reset quota "
+ "option for the snap %s (volume: %s)",
+ snap->snapname, origin_vol->volname);
+ }
+ }
+out:
+ if (ret) {
+ for (i = 0; unsupported_opt[i].key; i++)
+ GF_FREE (unsupported_opt[i].value);
+
+ if (snap_vol)
+ glusterd_snap_volume_remove (rsp_dict, snap_vol,
+ _gf_true, _gf_true);
+ snap_vol = NULL;
+ }
+
+ return snap_vol;
+}
+
+/*This is the prevalidate function for both activate and deactive of snap
+ * For Activate operation pass is_op_activate as _gf_true
+ * For Deactivate operation pass is_op_activate as _gf_false
+ * */
+int
+glusterd_snapshot_activate_deactivate_prevalidate (dict_t *dict,
+ char **op_errstr,
+ uint32_t *op_errno,
+ dict_t *rsp_dict,
+ gf_boolean_t is_op_activate)
+{
+ int32_t ret = -1;
+ char *snapname = NULL;
+ xlator_t *this = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_volinfo_t *snap_volinfo = NULL;
+ char err_str[PATH_MAX] = "";
+ gf_loglevel_t loglevel = GF_LOG_ERROR;
+ glusterd_volume_status volume_status = GLUSTERD_STATUS_STOPPED;
+ int flags = 0;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("glusterd", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, op_errno, out);
+
+ if (!dict || !op_errstr) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_INVALID_ENTRY, "input parameters NULL");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "snapname", &snapname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Getting the snap name "
+ "failed");
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name (snapname);
+ if (!snap) {
+ snprintf (err_str, sizeof (err_str), "Snapshot (%s) does not "
+ "exist.", snapname);
+ *op_errno = EG_NOSNAP;
+ ret = -1;
+ goto out;
+ }
+
+ /*If its activation of snap then fetch the flags*/
+ if (is_op_activate) {
+ ret = dict_get_int32 (dict, "flags", &flags);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to get flags");
+ goto out;
+ }
+ }
+
+ /* TODO : As of now there is only volume in snapshot.
+ * Change this when multiple volume snapshot is introduced
+ */
+ snap_volinfo = cds_list_entry (snap->volumes.next, glusterd_volinfo_t,
+ vol_list);
+ if (!snap_volinfo) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_VOLINFO_GET_FAIL,
+ "Unable to fetch snap_volinfo");
+ ret = -1;
+ goto out;
+ }
+
+ /*TODO: When multiple snapvolume are involved a cummulative
+ * logic is required to tell whether is snapshot is
+ * started/partially started/stopped*/
+ if (is_op_activate) {
+ volume_status = GLUSTERD_STATUS_STARTED;
+ }
+
+ if (snap_volinfo->status == volume_status) {
+ if (is_op_activate) {
+ /* if flag is to GF_CLI_FLAG_OP_FORCE
+ * try to start the snap volume, even
+ * if the volume_status is GLUSTERD_STATUS_STARTED.
+ * By doing so we try to bring
+ * back the brick processes that are down*/
+ if (!(flags & GF_CLI_FLAG_OP_FORCE)) {
+ snprintf (err_str, sizeof (err_str),
+ "Snapshot %s is already activated.",
+ snapname);
+ *op_errno = EINVAL;
+ ret = -1;
+ }
+ } else {
+ snprintf (err_str, sizeof (err_str),
+ "Snapshot %s is already deactivated.", snapname);
+ *op_errno = EINVAL;
+ ret = -1;
+ }
+ goto out;
+ }
+ ret = 0;
+out:
+
+ if (ret && err_str[0] != '\0') {
+ gf_msg (this->name, loglevel, 0,
+ GD_MSG_SNAPSHOT_OP_FAILED, "%s", err_str);
+ *op_errstr = gf_strdup (err_str);
+ }
+
+ return ret;
+}
+
+int32_t
+glusterd_handle_snapshot_delete_vol (dict_t *dict, char *err_str,
+ uint32_t *op_errno, int len)
+{
+ int32_t ret = -1;
+ int32_t i = 0;
+ glusterd_volinfo_t *snap_volinfo = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_volinfo_t *temp_volinfo = NULL;
+ char key[PATH_MAX] = "";
+ xlator_t *this = NULL;
+ char *volname = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_VALIDATE_OR_GOTO (this->name, op_errno, out);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to get "
+ "volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (err_str, len, "Volume (%s) does not exist", volname);
+ *op_errno = EG_NOVOL;
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_VOL_NOT_FOUND, "Failed to get volinfo of "
+ "volume %s", volname);
+ goto out;
+ }
+
+ ret = glusterd_snapshot_get_vol_snapnames (dict, volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_LIST_GET_FAIL,
+ "Failed to get snapshot list for volume %s", volname);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+glusterd_handle_snapshot_delete_all (dict_t *dict)
+{
+ int32_t ret = -1;
+ int32_t i = 0;
+ char key[PATH_MAX] = "";
+ glusterd_conf_t *priv = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_snap_t *tmp_snap = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ GF_ASSERT (dict);
+
+ cds_list_for_each_entry_safe (snap, tmp_snap, &priv->snapshots,
+ snap_list) {
+ /* indexing from 1 to n, to keep it uniform with other code
+ * paths
+ */
+ i++;
+ ret = snprintf (key, sizeof (key), "snapname%d", i);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc (dict, key, snap->snapname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Could not save "
+ "snap name");
+ goto out;
+ }
+ }
+
+ ret = dict_set_int32 (dict, "snapcount", i);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Could not save snapcount");
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int32_t
+glusterd_handle_snapshot_delete_type_snap (rpcsvc_request_t *req,
+ glusterd_op_t op,
+ dict_t *dict, char *err_str,
+ uint32_t *op_errno, size_t len)
+{
+ int32_t ret = -1;
+ int64_t volcount = 0;
+ char *snapname = NULL;
+ char *volname = NULL;
+ char key[PATH_MAX] = "";
+ glusterd_snap_t *snap = NULL;
+ glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_volinfo_t *tmp = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (req);
+ GF_ASSERT (dict);
+ GF_ASSERT (err_str);
+
+ ret = dict_get_str (dict, "snapname", &snapname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to get snapname");
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name (snapname);
+ if (!snap) {
+ snprintf (err_str, len, "Snapshot (%s) does not exist",
+ snapname);
+ *op_errno = EG_NOSNAP;
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_SNAP_NOT_FOUND, "%s", err_str);
+ ret = -1;
+ goto out;
+ }
+
+ /* Set volnames in the dict to get mgmt_v3 lock */
+ cds_list_for_each_entry_safe (snap_vol, tmp, &snap->volumes, vol_list) {
+ volcount++;
+ volname = gf_strdup (snap_vol->parent_volname);
+ if (!volname) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY, "strdup failed");
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "volname%"PRId64, volcount);
+ ret = dict_set_dynstr (dict, key, volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set "
+ "volume name in dictionary");
+ GF_FREE (volname);
+ goto out;
+ }
+ volname = NULL;
+ }
+ ret = dict_set_int64 (dict, "volcount", volcount);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set volcount");
+ goto out;
+ }
+
+ ret = glusterd_mgmt_v3_initiate_snap_phases (req, op, dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_INIT_FAIL, "Failed to initiate snap "
+ "phases");
+ goto out;
+ }
+
+ ret = 0;
+
+out :
+ return ret;
+}
+
+/* This is a snapshot remove handler function. This function will be
+ * executed in the originator node. This function is responsible for
+ * calling mgmt v3 framework to do the actual remove on all the bricks
+ *
+ * @param req RPC request object
+ * @param op gluster operation
+ * @param dict dictionary containing snapshot remove request
+ * @param err_str In case of an err this string should be populated
+ * @param len length of err_str buffer
+ *
+ * @return Negative value on Failure and 0 in success
+ */
+int
+glusterd_handle_snapshot_delete (rpcsvc_request_t *req, glusterd_op_t op,
+ dict_t *dict, char *err_str,
+ uint32_t *op_errno, size_t len)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ int32_t delete_cmd = -1;
+
+ this = THIS;
+
+ GF_ASSERT (this);
+
+ GF_ASSERT (req);
+ GF_ASSERT (dict);
+ GF_ASSERT (err_str);
+ GF_VALIDATE_OR_GOTO (this->name, op_errno, out);
+
+ ret = dict_get_int32 (dict, "sub-cmd", &delete_cmd);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_COMMAND_NOT_FOUND, "Failed to get sub-cmd");
+ goto out;
+ }
+
+ switch (delete_cmd) {
+ case GF_SNAP_DELETE_TYPE_SNAP:
+ case GF_SNAP_DELETE_TYPE_ITER:
+ ret = glusterd_handle_snapshot_delete_type_snap (req, op, dict,
+ err_str,
+ op_errno, len);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_REMOVE_FAIL, "Failed to handle "
+ "snapshot delete for type SNAP");
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_DELETE_TYPE_ALL:
+ ret = glusterd_handle_snapshot_delete_all (dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_REMOVE_FAIL, "Failed to handle "
+ "snapshot delete for type ALL");
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_DELETE_TYPE_VOL:
+ ret = glusterd_handle_snapshot_delete_vol (dict, err_str,
+ op_errno, len);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_REMOVE_FAIL, "Failed to handle "
+ "snapshot delete for type VOL");
+ goto out;
+ }
+ break;
+
+ default:
+ *op_errno = EINVAL;
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "Wrong snapshot delete type");
+ break;
+ }
+
+ if ( ret == 0 && (delete_cmd == GF_SNAP_DELETE_TYPE_ALL ||
+ delete_cmd == GF_SNAP_DELETE_TYPE_VOL)) {
+ ret = glusterd_op_send_cli_response (op, 0, 0, req, dict,
+ err_str);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_NO_CLI_RESP, "Failed to send cli "
+ "response");
+ goto out;
+ }
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glusterd_snapshot_remove_prevalidate (dict_t *dict, char **op_errstr,
+ uint32_t *op_errno, dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ char *snapname = NULL;
+ xlator_t *this = NULL;
+ glusterd_snap_t *snap = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("glusterd", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, op_errno, out);
+
+ if (!dict || !op_errstr) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_INVALID_ENTRY, "input parameters NULL");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "snapname", &snapname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Getting the snap name "
+ "failed");
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name (snapname);
+ if (!snap) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_SNAP_NOT_FOUND,
+ "Snapshot (%s) does not exist", snapname);
+ *op_errno = EG_NOSNAP;
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc (dict, "snapuuid",
+ uuid_utoa (snap->snap_id));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set snap "
+ "uuid in response dictionary for %s snapshot",
+ snap->snapname);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glusterd_snapshot_status_prevalidate (dict_t *dict, char **op_errstr,
+ uint32_t *op_errno, dict_t *rsp_dict)
+{
+ int ret = -1;
+ char *snapname = NULL;
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ int32_t cmd = -1;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *volname = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+ GF_ASSERT (op_errstr);
+ GF_VALIDATE_OR_GOTO (this->name, op_errno, out);
+
+ if (!dict) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_INVALID_ENTRY, "Input dict is NULL");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "sub-cmd", &cmd);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Could not fetch status cmd");
+ goto out;
+ }
+
+ switch (cmd) {
+ case GF_SNAP_STATUS_TYPE_ALL:
+ {
+ break;
+ }
+ case GF_SNAP_STATUS_TYPE_SNAP:
+ {
+ ret = dict_get_str (dict, "snapname", &snapname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Could not fetch snapname");
+ goto out;
+ }
+
+ if (!glusterd_find_snap_by_name (snapname)) {
+ ret = gf_asprintf (op_errstr, "Snapshot (%s) "
+ "does not exist", snapname);
+ *op_errno = EG_NOSNAP;
+ if (ret < 0) {
+ goto out;
+ }
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_SNAP_NOT_FOUND,
+ "Snapshot (%s) does not exist",
+ snapname);
+ goto out;
+ }
+ break;
+ }
+ case GF_SNAP_STATUS_TYPE_VOL:
+ {
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Could not fetch volname");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ ret = gf_asprintf (op_errstr, "Volume (%s) "
+ "does not exist", volname);
+ *op_errno = EG_NOVOL;
+ if (ret < 0) {
+ goto out;
+ }
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_VOL_NOT_FOUND, "Volume "
+ "%s not present", volname);
+ goto out;
+ }
+ break;
+
+ }
+ default:
+ {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_COMMAND_NOT_FOUND, "Invalid command");
+ *op_errno = EINVAL;
+ break;
+ }
+ }
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int32_t
+glusterd_snapshot_activate_commit (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ char *snapname = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_volinfo_t *snap_volinfo = NULL;
+ xlator_t *this = NULL;
+ int flags = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (op_errstr);
+
+ if (!dict || !op_errstr) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_INVALID_ENTRY, "input parameters NULL");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "snapname", &snapname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Getting the snap name "
+ "failed");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "flags", &flags);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get flags");
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name (snapname);
+ if (!snap) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_SNAP_NOT_FOUND,
+ "Snapshot (%s) does not exist", snapname);
+ ret = -1;
+ goto out;
+ }
+
+ /* TODO : As of now there is only volume in snapshot.
+ * Change this when multiple volume snapshot is introduced
+ */
+ snap_volinfo = cds_list_entry (snap->volumes.next, glusterd_volinfo_t,
+ vol_list);
+ if (!snap_volinfo) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLINFO_GET_FAIL,
+ "Unable to fetch snap_volinfo");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_start_volume (snap_volinfo, flags, _gf_true);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_ACTIVATE_FAIL,
+ "Failed to activate snap volume %s of the snap %s",
+ snap_volinfo->volname, snap->snapname);
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc (rsp_dict, "snapuuid",
+ uuid_utoa (snap->snap_id));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set snap "
+ "uuid in response dictionary for %s snapshot",
+ snap->snapname);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+glusterd_snapshot_deactivate_commit (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ char *snapname = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_volinfo_t *snap_volinfo = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (op_errstr);
+
+ if (!dict || !op_errstr) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_INVALID_ENTRY, "input parameters NULL");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "snapname", &snapname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Getting the snap name "
+ "failed");
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name (snapname);
+ if (!snap) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_SNAP_NOT_FOUND,
+ "Snapshot (%s) does not exist", snapname);
+ ret = -1;
+ goto out;
+ }
+
+ /* TODO : As of now there is only volume in snapshot.
+ * Change this when multiple volume snapshot is introduced
+ */
+ snap_volinfo = cds_list_entry (snap->volumes.next, glusterd_volinfo_t,
+ vol_list);
+ if (!snap_volinfo) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLINFO_GET_FAIL,
+ "Unable to fetch snap_volinfo");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_stop_volume (snap_volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_DEACTIVATE_FAIL, "Failed to deactivate"
+ "snap %s", snapname);
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc (rsp_dict, "snapuuid",
+ uuid_utoa (snap->snap_id));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set snap "
+ "uuid in response dictionary for %s snapshot",
+ snap->snapname);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+glusterd_snapshot_remove_commit (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ char *snapname = NULL;
+ char *dup_snapname = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *snap_volinfo = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (op_errstr);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ if (!dict || !op_errstr) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_INVALID_ENTRY, "input parameters NULL");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "snapname", &snapname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Getting the snap name "
+ "failed");
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name (snapname);
+ if (!snap) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_SNAP_NOT_FOUND,
+ "Snapshot (%s) does not exist", snapname);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc (rsp_dict, "snapuuid",
+ uuid_utoa (snap->snap_id));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set snap uuid in "
+ "response dictionary for %s snapshot",
+ snap->snapname);
+ goto out;
+ }
+
+ /* Save the snap status as GD_SNAP_STATUS_DECOMMISSION so
+ * that if the node goes down the snap would be removed
+ */
+ snap->snap_status = GD_SNAP_STATUS_DECOMMISSION;
+ ret = glusterd_store_snap (snap);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_OBJECT_STORE_FAIL, "Failed to "
+ "store snap object %s", snap->snapname);
+ goto out;
+ } else
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_OP_SUCCESS, "Successfully marked "
+ "snap %s for decommission.", snap->snapname);
+
+ if (is_origin_glusterd (dict) == _gf_true) {
+ /* TODO : As of now there is only volume in snapshot.
+ * Change this when multiple volume snapshot is introduced
+ */
+ snap_volinfo = cds_list_entry (snap->volumes.next,
+ glusterd_volinfo_t,
+ vol_list);
+ if (!snap_volinfo) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLINFO_GET_FAIL,
+ "Unable to fetch snap_volinfo");
+ ret = -1;
+ goto out;
+ }
+
+ /* From origin glusterd check if *
+ * any peers with snap bricks is down */
+ ret = glusterd_find_missed_snap (rsp_dict, snap_volinfo,
+ &priv->peers,
+ GF_SNAP_OPTION_TYPE_DELETE);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MISSED_SNAP_GET_FAIL,
+ "Failed to find missed snap deletes");
+ goto out;
+ }
+ }
+
+ ret = glusterd_snap_remove (rsp_dict, snap, _gf_true, _gf_false,
+ _gf_false);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_REMOVE_FAIL, "Failed to remove snap %s",
+ snapname);
+ goto out;
+ }
+
+ dup_snapname = gf_strdup (snapname);
+ if (!dup_snapname) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY, "Strdup failed");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr (rsp_dict, "snapname", dup_snapname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set the snapname");
+ GF_FREE (dup_snapname);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+glusterd_do_snap_cleanup (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ char *name = NULL;
+ char *volname = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ glusterd_snap_t *snap = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ if (!dict || !op_errstr) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_INVALID_ENTRY, "input parameters NULL");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get"
+ " volume name");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "snapname", &name);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "getting the snap "
+ "name failed (volume: %s)", volname);
+ goto out;
+ }
+
+ /*
+ If the snapname is not found that means the failure happened at
+ staging, or in commit, before the snap object is created, in which
+ case there is nothing to cleanup. So set ret to 0.
+ */
+ snap = glusterd_find_snap_by_name (name);
+ if (!snap) {
+ gf_msg (this->name, GF_LOG_INFO, EINVAL,
+ GD_MSG_SNAP_NOT_FOUND, "Snapshot (%s) does not exist",
+ name);
+ ret = 0;
+ goto out;
+ }
+
+ ret = glusterd_snap_remove (rsp_dict, snap, _gf_true, _gf_true,
+ _gf_false);
+ if (ret) {
+ /* Ignore failure as this is a cleanup of half cooked
+ snapshot */
+ gf_msg_debug (this->name, 0, "removing the snap %s failed",
+ name);
+ ret = 0;
+ }
+
+ name = NULL;
+
+ ret = 0;
+
+out:
+
+ return ret;
+}
+
+/* In case of a successful, delete or create operation, during post_validate *
+ * look for missed snap operations and update the missed snap lists */
+int32_t
+glusterd_snapshot_update_snaps_post_validate (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ int32_t missed_snap_count = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (op_errstr);
+
+ ret = dict_get_int32 (dict, "missed_snap_count",
+ &missed_snap_count);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "No missed snaps");
+ ret = 0;
+ goto out;
+ }
+
+ ret = glusterd_add_missed_snaps_to_list (dict, missed_snap_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MISSEDSNAP_INFO_SET_FAIL,
+ "Failed to add missed snaps to list");
+ goto out;
+ }
+
+ ret = glusterd_store_update_missed_snaps ();
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MISSEDSNAP_INFO_SET_FAIL,
+ "Failed to update missed_snaps_list");
+ goto out;
+ }
+
+out:
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_take_brick_snapshot_task (void *opaque)
+{
+ int ret = 0;
+ snap_create_args_t *snap_args = NULL;
+ char *clonename = NULL;
+ char key[PATH_MAX] = "";
+
+ GF_ASSERT (opaque);
+
+ snap_args = (snap_create_args_t*) opaque;
+ THIS = snap_args->this;
+
+ /* Try and fetch clonename. If present set status with clonename *
+ * else do so as snap-vol */
+ ret = dict_get_str (snap_args->dict, "clonename", &clonename);
+ if (ret) {
+ snprintf (key, sizeof (key), "snap-vol%d.brick%d.status",
+ snap_args->volcount, snap_args->brickorder);
+ } else
+ snprintf (key, sizeof (key), "clone%d.brick%d.status",
+ snap_args->volcount, snap_args->brickorder);
+
+ ret = glusterd_take_brick_snapshot (snap_args->dict,
+ snap_args->snap_vol,
+ snap_args->brickinfo,
+ snap_args->volcount,
+ snap_args->brickorder);
+
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_CREATION_FAIL, "Failed to "
+ "take backend snapshot for brick "
+ "%s:%s volume(%s)", snap_args->brickinfo->hostname,
+ snap_args->brickinfo->path,
+ snap_args->snap_vol->volname);
+ }
+
+ if (dict_set_int32 (snap_args->rsp_dict, key, (ret)?0:1)) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "failed to "
+ "add %s to dict", key);
+ ret = -1;
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+int32_t
+glusterd_take_brick_snapshot_cbk (int ret, call_frame_t *frame, void *opaque)
+{
+ snap_create_args_t *snap_args = NULL;
+ struct syncargs *args = NULL;
+
+ GF_ASSERT (opaque);
+
+ snap_args = (snap_create_args_t*) opaque;
+ args = snap_args->args;
+
+ if (ret)
+ args->op_ret = ret;
+
+ GF_FREE (opaque);
+ synctask_barrier_wake(args);
+ return 0;
+}
+
+int32_t
+glusterd_schedule_brick_snapshot (dict_t *dict, dict_t *rsp_dict,
+ glusterd_snap_t *snap)
+{
+ int ret = -1;
+ int32_t volcount = 0;
+ int32_t brickcount = 0;
+ int32_t brickorder = 0;
+ int32_t taskcount = 0;
+ char key[PATH_MAX] = "";
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ struct syncargs args = {0};
+ snap_create_args_t *snap_args = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(snap);
+
+ synctask_barrier_init ((&args));
+ cds_list_for_each_entry (snap_vol, &snap->volumes, vol_list) {
+ volcount++;
+ brickcount = 0;
+ brickorder = 0;
+ cds_list_for_each_entry (brickinfo, &snap_vol->bricks,
+ brick_list) {
+ snprintf (key, sizeof(key) - 1,
+ "snap-vol%d.brick%d.order", volcount,
+ brickcount);
+ ret = dict_set_int32 (rsp_dict, key, brickorder);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+ if ((gf_uuid_compare (brickinfo->uuid, MY_UUID)) ||
+ (brickinfo->snap_status == -1)) {
+ if (!gf_uuid_compare (brickinfo->uuid, MY_UUID)) {
+ brickcount++;
+ snprintf (key, sizeof (key),
+ "snap-vol%d.brick%d.status",
+ volcount, brickorder);
+ ret = dict_set_int32 (rsp_dict, key, 0);
+ if (ret) {
+ gf_msg (this->name,
+ GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "failed to add %s to "
+ "dict", key);
+ goto out;
+ }
+ }
+ brickorder++;
+ continue;
+ }
+
+ snap_args = GF_CALLOC (1, sizeof (*snap_args),
+ gf_gld_mt_snap_create_args_t);
+ if (!snap_args) {
+ ret = -1;
+ goto out;
+ }
+
+
+ snap_args->this = this;
+ snap_args->dict = dict;
+ snap_args->rsp_dict = rsp_dict;
+ snap_args->snap_vol = snap_vol;
+ snap_args->brickinfo = brickinfo;
+ snap_args->volcount = volcount;
+ snap_args->brickcount = brickcount;
+ snap_args->brickorder = brickorder;
+ snap_args->args = &args;
+
+ ret = synctask_new (this->ctx->env,
+ glusterd_take_brick_snapshot_task,
+ glusterd_take_brick_snapshot_cbk,
+ NULL, snap_args);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_CREATION_FAIL, "Failed to "
+ "spawn task for snapshot create");
+ GF_FREE (snap_args);
+ goto out;
+ }
+ taskcount++;
+ brickcount++;
+ brickorder++;
+ }
+
+ snprintf (key, sizeof (key), "snap-vol%d_brickcount", volcount);
+ ret = dict_set_int64 (rsp_dict, key, brickcount);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "failed to "
+ "add %s to dict", key);
+ goto out;
+ }
+ }
+ synctask_barrier_wait ((&args), taskcount);
+ taskcount = 0;
+
+ if (args.op_ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_CREATION_FAIL, "Failed to create snapshot");
+
+ ret = args.op_ret;
+out:
+ if (ret && taskcount)
+ synctask_barrier_wait ((&args), taskcount);
+
+ return ret;
+}
+
+glusterd_snap_t*
+glusterd_create_snap_object_for_clone (dict_t *dict, dict_t *rsp_dict)
+{
+ char *snapname = NULL;
+ uuid_t *snap_id = NULL;
+ glusterd_snap_t *snap = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ int ret = -1;
+ int64_t time_stamp = 0;
+
+ this = THIS;
+ priv = this->private;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (rsp_dict);
+
+ /* Fetch snapname, description, id and time from dict */
+ ret = dict_get_str (dict, "clonename", &snapname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to fetch clonename");
+ goto out;
+ }
+
+ ret = dict_get_bin (dict, "clone-id", (void **)&snap_id);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to fetch clone_id");
+ goto out;
+ }
+
+ snap = glusterd_new_snap_object ();
+ if (!snap) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_OBJ_NEW_FAIL, "Could not create "
+ "the snap object for snap %s", snapname);
+ goto out;
+ }
+
+ strcpy (snap->snapname, snapname);
+ gf_uuid_copy (snap->snap_id, *snap_id);
+
+ ret = 0;
+
+out:
+ if (ret) {
+ if (snap)
+ glusterd_snap_remove (rsp_dict, snap,
+ _gf_true, _gf_true, _gf_true);
+ snap = NULL;
+ }
+
+ return snap;
+}
+
+
+int32_t
+glusterd_snapshot_clone_commit (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int ret = -1;
+ int64_t i = 0;
+ int64_t volcount = 0;
+ int32_t snap_activate = 0;
+ char *snapname = NULL;
+ char *volname = NULL;
+ char *tmp_name = NULL;
+ char key[PATH_MAX] = "";
+ xlator_t *this = NULL;
+ glusterd_snap_t *snap_parent = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_volinfo_t *origin_vol = NULL;
+ glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(op_errstr);
+ GF_ASSERT(rsp_dict);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+
+ ret = dict_get_str (dict, "clonename", &snapname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to fetch clonename");
+ goto out;
+ }
+ tmp_name = gf_strdup (snapname);
+ if (!tmp_name) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY, "Out of memory");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr (rsp_dict, "clonename", tmp_name);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to set clonename in rsp_dict");
+ GF_FREE (tmp_name);
+ goto out;
+ }
+ tmp_name = NULL;
+
+
+ ret = dict_get_str (dict, "snapname", &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "failed to get snap name");
+ goto out;
+ }
+ snap_parent = glusterd_find_snap_by_name (volname);
+ /* TODO : As of now there is only one volume in snapshot.
+ * Change this when multiple volume snapshot is introduced
+ */
+ origin_vol = cds_list_entry (snap_parent->volumes.next,
+ glusterd_volinfo_t, vol_list);
+ if (!origin_vol) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_VOLINFO_GET_FAIL, "Failed to get snap "
+ "volinfo %s", snap_parent->snapname);
+ goto out;
+ }
+ snap = glusterd_create_snap_object_for_clone (dict, rsp_dict);
+ if (!snap) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_OBJ_NEW_FAIL, "creating the"
+ "snap object %s failed", snapname);
+ ret = -1;
+ goto out;
+ }
+
+ snap_vol = glusterd_do_snap_vol (origin_vol, snap, dict,
+ rsp_dict, 1, 1);
+ if (!snap_vol) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_CREATION_FAIL, "taking the "
+ "snapshot of the volume %s failed", volname);
+ goto out;
+ }
+ volcount = 1;
+ ret = dict_set_int64 (rsp_dict, "volcount", volcount);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set volcount");
+ goto out;
+ }
+
+ ret = glusterd_schedule_brick_snapshot (dict, rsp_dict, snap);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_BACKEND_MAKE_FAIL, "Failed to take backend "
+ "snapshot %s", snap->snapname);
+ goto out;
+ }
+
+ cds_list_del_init (&snap_vol->vol_list);
+ ret = dict_set_dynstr_with_alloc (rsp_dict, "snapuuid",
+ uuid_utoa (snap->snap_id));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set snap "
+ "uuid in response dictionary for %s snapshot",
+ snap->snapname);
+ goto out;
+ }
+
+ glusterd_list_add_order (&snap_vol->vol_list, &priv->volumes,
+ glusterd_compare_volume_name);
+
+ ret = 0;
+
+
+out:
+ if (ret) {
+ if (snap)
+ glusterd_snap_remove (rsp_dict, snap,
+ _gf_true, _gf_true,
+ _gf_true);
+ snap = NULL;
+ }
+
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+
+int32_t
+glusterd_snapshot_create_commit (dict_t *dict, char **op_errstr,
+ uint32_t *op_errno, dict_t *rsp_dict)
+{
+ int ret = -1;
+ int64_t i = 0;
+ int64_t volcount = 0;
+ int32_t snap_activate = 0;
+ char *snapname = NULL;
+ char *volname = NULL;
+ char *tmp_name = NULL;
+ char key[PATH_MAX] = "";
+ xlator_t *this = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_volinfo_t *origin_vol = NULL;
+ glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(op_errstr);
+ GF_VALIDATE_OR_GOTO (this->name, op_errno, out);
+ GF_ASSERT(rsp_dict);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = dict_get_int64 (dict, "volcount", &volcount);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "failed to "
+ "get the volume count");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "snapname", &snapname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to fetch snapname");
+ goto out;
+ }
+ tmp_name = gf_strdup (snapname);
+ if (!tmp_name) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY, "Out of memory");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr (rsp_dict, "snapname", tmp_name);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to set snapname in rsp_dict");
+ GF_FREE (tmp_name);
+ goto out;
+ }
+ tmp_name = NULL;
+
+ snap = glusterd_create_snap_object (dict, rsp_dict);
+ if (!snap) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_CREATION_FAIL, "creating the"
+ "snap object %s failed", snapname);
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 1; i <= volcount; i++) {
+ snprintf (key, sizeof (key), "volname%"PRId64, i);
+ ret = dict_get_str (dict, key, &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "failed to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &origin_vol);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_VOL_NOT_FOUND,
+ "failed to get the volinfo for "
+ "the volume %s", volname);
+ goto out;
+ }
+
+ if (is_origin_glusterd (dict)) {
+ ret = glusterd_is_snap_soft_limit_reached (origin_vol,
+ rsp_dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAPSHOT_OP_FAILED, "Failed to "
+ "check soft limit exceeded or not, "
+ "for volume %s ", origin_vol->volname);
+ goto out;
+ }
+ }
+
+ snap_vol = glusterd_do_snap_vol (origin_vol, snap, dict,
+ rsp_dict, i, 0);
+ if (!snap_vol) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_CREATION_FAIL, "taking the "
+ "snapshot of the volume %s failed", volname);
+ goto out;
+ }
+ }
+ ret = dict_set_int64 (rsp_dict, "volcount", volcount);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set volcount");
+ goto out;
+ }
+
+ ret = glusterd_schedule_brick_snapshot (dict, rsp_dict, snap);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_CREATION_FAIL, "Failed to take backend "
+ "snapshot %s", snap->snapname);
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc (rsp_dict, "snapuuid",
+ uuid_utoa (snap->snap_id));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set snap "
+ "uuid in response dictionary for %s snapshot",
+ snap->snapname);
+ goto out;
+ }
+
+ snap_activate = dict_get_str_boolean (priv->opts,
+ GLUSTERD_STORE_KEY_SNAP_ACTIVATE,
+ _gf_false);
+ if (!snap_activate) {
+ cds_list_for_each_entry (snap_vol, &snap->volumes, vol_list) {
+ snap_vol->status = GLUSTERD_STATUS_STOPPED;
+ ret = glusterd_store_volinfo (snap_vol,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLINFO_SET_FAIL,
+ "Failed to store snap volinfo %s",
+ snap_vol->volname);
+ goto out;
+ }
+ }
+
+ goto out;
+ }
+
+ cds_list_for_each_entry (snap_vol, &snap->volumes, vol_list) {
+ cds_list_for_each_entry (brickinfo, &snap_vol->bricks,
+ brick_list) {
+ ret = glusterd_brick_start (snap_vol, brickinfo,
+ _gf_false);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_BRICK_DISCONNECTED, "starting "
+ "the brick %s:%s for the snap %s "
+ "(volume: %s) failed",
+ brickinfo->hostname, brickinfo->path,
+ snap_vol->snapshot->snapname,
+ snap_vol->volname);
+ goto out;
+ }
+ }
+
+ snap_vol->status = GLUSTERD_STATUS_STARTED;
+ ret = glusterd_store_volinfo (snap_vol,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLINFO_SET_FAIL, "Failed to store "
+ "snap volinfo %s", snap_vol->volname);
+ goto out;
+ }
+ }
+
+ ret = 0;
+
+out:
+ if (ret) {
+ if (snap)
+ glusterd_snap_remove (rsp_dict, snap,
+ _gf_true, _gf_true,
+ _gf_false);
+ snap = NULL;
+ }
+
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+snap_max_hard_limit_set_commit (dict_t *dict, uint64_t value,
+ char *volname, char **op_errstr)
+{
+ char err_str[PATH_MAX] = "";
+ glusterd_conf_t *conf = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ int ret = -1;
+ xlator_t *this = NULL;
+ char *next_version = NULL;
+
+ this = THIS;
+
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+
+ conf = this->private;
+
+ GF_ASSERT (conf);
+
+ /* TODO: Initiate auto deletion when there is a limit change */
+ if (!volname) {
+ /* For system limit */
+ ret = dict_set_uint64 (conf->opts,
+ GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT,
+ value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to store "
+ "%s in the options",
+ GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT);
+ goto out;
+ }
+
+
+ ret = glusterd_get_next_global_opt_version_str (conf->opts,
+ &next_version);
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (conf->opts, GLUSTERD_GLOBAL_OPT_VERSION,
+ next_version);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_options (this, conf->opts);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_STORE_FAIL, "Failed to store "
+ "options");
+ goto out;
+ }
+ } else {
+ /* For one volume */
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (err_str, PATH_MAX, "Failed to get the"
+ " volinfo for volume %s", volname);
+ goto out;
+ }
+
+ volinfo->snap_max_hard_limit = value;
+
+ ret = glusterd_store_volinfo (volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ snprintf (err_str, PATH_MAX, "Failed to store "
+ "snap-max-hard-limit for volume %s", volname);
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ if (ret) {
+ *op_errstr = gf_strdup (err_str);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAPSHOT_OP_FAILED, "%s", err_str);
+ }
+ return ret;
+}
+
+int
+glusterd_snapshot_config_commit (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ char *volname = NULL;
+ xlator_t *this = NULL;
+ int ret = -1;
+ glusterd_conf_t *conf = NULL;
+ int config_command = 0;
+ uint64_t hard_limit = 0;
+ uint64_t soft_limit = 0;
+ char *next_version = NULL;
+ char *auto_delete = NULL;
+ char *snap_activate = NULL;
+ gf_boolean_t system_conf = _gf_false;
+
+ this = THIS;
+
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+
+ conf = this->private;
+
+ GF_ASSERT (conf);
+
+ ret = dict_get_int32 (dict, "config-command", &config_command);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_COMMAND_NOT_FOUND,
+ "failed to get config-command type");
+ goto out;
+ }
+ if (config_command != GF_SNAP_CONFIG_TYPE_SET) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+
+ /* config values snap-max-hard-limit and snap-max-soft-limit are
+ * optional and hence we are not erroring out if values are not
+ * present
+ */
+ gd_get_snap_conf_values_if_present (dict, &hard_limit,
+ &soft_limit);
+
+ if (hard_limit) {
+ /* Commit ops for snap-max-hard-limit */
+ ret = snap_max_hard_limit_set_commit (dict, hard_limit, volname,
+ op_errstr);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_HARD_LIMIT_SET_FAIL,
+ "snap-max-hard-limit set commit failed.");
+ goto out;
+ }
+ }
+
+ if (soft_limit) {
+ /* For system limit */
+ system_conf = _gf_true;
+ ret = dict_set_uint64 (conf->opts,
+ GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT,
+ soft_limit);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to save %s in the dictionary",
+ GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT);
+ goto out;
+ }
+ }
+
+ if (hard_limit || soft_limit) {
+ ret = 0;
+ goto done;
+ }
+
+ if (!dict_get_str(dict,
+ GLUSTERD_STORE_KEY_SNAP_AUTO_DELETE,
+ &auto_delete)) {
+ system_conf = _gf_true;
+ ret = dict_set_dynstr_with_alloc (conf->opts,
+ GLUSTERD_STORE_KEY_SNAP_AUTO_DELETE,
+ auto_delete);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Could not "
+ "save auto-delete value in conf->opts");
+ goto out;
+ }
+ } else if (!dict_get_str(dict,
+ GLUSTERD_STORE_KEY_SNAP_ACTIVATE,
+ &snap_activate)) {
+ system_conf = _gf_true;
+ ret = dict_set_dynstr_with_alloc (conf->opts,
+ GLUSTERD_STORE_KEY_SNAP_ACTIVATE,
+ snap_activate);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Could not save "
+ "snap-activate-on-create value in conf->opts");
+ goto out;
+ }
+ } else {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "Invalid option");
+ goto out;
+ }
+
+done:
+ if (system_conf) {
+ ret = glusterd_get_next_global_opt_version_str (conf->opts,
+ &next_version);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GLOBAL_OP_VERSION_GET_FAIL,
+ "Failed to get next global opt-version");
+ goto out;
+ }
+
+ ret = dict_set_str (conf->opts, GLUSTERD_GLOBAL_OPT_VERSION,
+ next_version);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GLOBAL_OP_VERSION_SET_FAIL,
+ "Failed to set next global opt-version");
+ goto out;
+ }
+
+ ret = glusterd_store_options (this, conf->opts);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_STORE_FAIL,
+ "Failed to store options");
+ goto out;
+ }
+ }
+
+out:
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_get_brick_lvm_details (dict_t *rsp_dict,
+ glusterd_brickinfo_t *brickinfo, char *volname,
+ char *device, char *key_prefix)
+{
+
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ runner_t runner = {0,};
+ xlator_t *this = NULL;
+ char msg[PATH_MAX] = "";
+ char buf[PATH_MAX] = "";
+ char *ptr = NULL;
+ char *token = NULL;
+ char key[PATH_MAX] = "";
+ char *value = NULL;
+
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (brickinfo);
+ GF_ASSERT (volname);
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ device = glusterd_get_brick_mount_device (brickinfo->path);
+ if (!device) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_GET_INFO_FAIL,
+ "Getting device name for "
+ "the brick %s:%s failed", brickinfo->hostname,
+ brickinfo->path);
+ goto out;
+ }
+ runinit (&runner);
+ snprintf (msg, sizeof (msg), "running lvs command, "
+ "for getting snap status");
+ /* Using lvs command fetch the Volume Group name,
+ * Percentage of data filled and Logical Volume size
+ *
+ * "-o" argument is used to get the desired information,
+ * example : "lvs /dev/VolGroup/thin_vol -o vgname,lv_size",
+ * will get us Volume Group name and Logical Volume size.
+ *
+ * Here separator used is ":",
+ * for the above given command with separator ":",
+ * The output will be "vgname:lvsize"
+ */
+ runner_add_args (&runner, LVS, device, "--noheading", "-o",
+ "vg_name,data_percent,lv_size",
+ "--separator", ":", NULL);
+ runner_redir (&runner, STDOUT_FILENO, RUN_PIPE);
+ runner_log (&runner, "", GF_LOG_DEBUG, msg);
+ ret = runner_start (&runner);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_LVS_FAIL,
+ "Could not perform lvs action");
+ goto end;
+ }
+ do {
+ ptr = fgets (buf, sizeof (buf),
+ runner_chio (&runner, STDOUT_FILENO));
+
+ if (ptr == NULL)
+ break;
+ token = strtok (buf, ":");
+ if (token != NULL) {
+ while (token && token[0] == ' ')
+ token++;
+ if (!token) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY,
+ "Invalid vg entry");
+ goto end;
+ }
+ value = gf_strdup (token);
+ if (!value) {
+ ret = -1;
+ goto end;
+ }
+ ret = snprintf (key, sizeof (key), "%s.vgname",
+ key_prefix);
+ if (ret < 0) {
+ goto end;
+ }
+
+ ret = dict_set_dynstr (rsp_dict, key, value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Could not save vgname ");
+ goto end;
+ }
+ }
+
+ token = strtok (NULL, ":");
+ if (token != NULL) {
+ value = gf_strdup (token);
+ if (!value) {
+ ret = -1;
+ goto end;
+ }
+ ret = snprintf (key, sizeof (key), "%s.data",
+ key_prefix);
+ if (ret < 0) {
+ goto end;
+ }
+
+ ret = dict_set_dynstr (rsp_dict, key, value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Could not save data percent ");
+ goto end;
+ }
+ }
+ token = strtok (NULL, ":");
+ if (token != NULL) {
+ value = gf_strdup (token);
+ if (!value) {
+ ret = -1;
+ goto end;
+ }
+ ret = snprintf (key, sizeof (key), "%s.lvsize",
+ key_prefix);
+ if (ret < 0) {
+ goto end;
+ }
+
+ ret = dict_set_dynstr (rsp_dict, key, value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Could not save meta data percent ");
+ goto end;
+ }
+ }
+
+ } while (ptr != NULL);
+
+ ret = 0;
+
+end:
+ runner_end (&runner);
+
+out:
+ if (ret && value) {
+ GF_FREE (value);
+ }
+
+ return ret;
+}
+
+int
+glusterd_get_single_brick_status (char **op_errstr, dict_t *rsp_dict,
+ char *keyprefix, int index,
+ glusterd_volinfo_t *snap_volinfo,
+ glusterd_brickinfo_t *brickinfo)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ char key[PATH_MAX] = "";
+ char *device = NULL;
+ char *value = NULL;
+ char brick_path[PATH_MAX] = "";
+ char pidfile[PATH_MAX] = "";
+ pid_t pid = -1;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ GF_ASSERT (op_errstr);
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (keyprefix);
+ GF_ASSERT (snap_volinfo);
+ GF_ASSERT (brickinfo);
+
+ ret = snprintf (key, sizeof (key), "%s.brick%d.path", keyprefix,
+ index);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = snprintf (brick_path, sizeof (brick_path),
+ "%s:%s", brickinfo->hostname, brickinfo->path);
+ if (ret < 0) {
+ goto out;
+ }
+
+ value = gf_strdup (brick_path);
+ if (!value) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr (rsp_dict, key, value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Unable to store "
+ "brick_path %s", brickinfo->path);
+ goto out;
+ }
+
+ if (brickinfo->snap_status == -1) {
+ /* Setting vgname as "Pending Snapshot" */
+ value = gf_strdup ("Pending Snapshot");
+ if (!value) {
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "%s.brick%d.vgname",
+ keyprefix, index);
+ ret = dict_set_dynstr (rsp_dict, key, value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Could not save vgname ");
+ goto out;
+ }
+
+ ret = 0;
+ goto out;
+ }
+ value = NULL;
+
+ ret = snprintf (key, sizeof (key), "%s.brick%d.status",
+ keyprefix, index);
+ if (ret < 0) {
+ goto out;
+ }
+
+ if (brickinfo->status == GF_BRICK_STOPPED) {
+ value = gf_strdup ("No");
+ if (!value) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_str (rsp_dict, key, value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Could not save brick status");
+ goto out;
+ }
+ value = NULL;
+ } else {
+ value = gf_strdup ("Yes");
+ if (!value) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_str (rsp_dict, key, value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Could not save brick status");
+ goto out;
+ }
+ value = NULL;
+
+ GLUSTERD_GET_BRICK_PIDFILE (pidfile, snap_volinfo,
+ brickinfo, priv);
+ ret = gf_is_service_running (pidfile, &pid);
+
+ ret = snprintf (key, sizeof (key), "%s.brick%d.pid",
+ keyprefix, index);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_set_int32 (rsp_dict, key, pid);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Could not save pid %d", pid);
+ goto out;
+ }
+ }
+
+ ret = snprintf (key, sizeof (key), "%s.brick%d",
+ keyprefix, index);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = glusterd_get_brick_lvm_details (rsp_dict, brickinfo,
+ snap_volinfo->volname,
+ device, key);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_GET_INFO_FAIL, "Failed to get "
+ "brick LVM details");
+ goto out;
+ }
+out:
+ if (ret && value) {
+ GF_FREE (value);
+ }
+
+ return ret;
+}
+
+int
+glusterd_get_single_snap_status (char **op_errstr, dict_t *rsp_dict,
+ char *keyprefix, glusterd_snap_t *snap)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ char key[PATH_MAX] = "";
+ char brickkey[PATH_MAX] = "";
+ glusterd_volinfo_t *snap_volinfo = NULL;
+ glusterd_volinfo_t *tmp_volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int volcount = 0;
+ int brickcount = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (op_errstr);
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (keyprefix);
+ GF_ASSERT (snap);
+
+ cds_list_for_each_entry_safe (snap_volinfo, tmp_volinfo, &snap->volumes,
+ vol_list) {
+ ret = snprintf (key, sizeof (key), "%s.vol%d", keyprefix,
+ volcount);
+ if (ret < 0) {
+ goto out;
+ }
+ cds_list_for_each_entry (brickinfo, &snap_volinfo->bricks,
+ brick_list) {
+ if (!glusterd_is_local_brick (this, snap_volinfo,
+ brickinfo)) {
+ brickcount++;
+ continue;
+ }
+
+ ret = glusterd_get_single_brick_status (op_errstr,
+ rsp_dict, key, brickcount,
+ snap_volinfo, brickinfo);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_STATUS_FAIL, "Getting "
+ "single snap status failed");
+ goto out;
+ }
+ brickcount++;
+ }
+ ret = snprintf (brickkey, sizeof (brickkey), "%s.brickcount",
+ key);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_set_int32 (rsp_dict, brickkey, brickcount);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Could not save brick count");
+ goto out;
+ }
+ volcount++;
+ }
+
+ ret = snprintf (key, sizeof (key), "%s.volcount", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_set_int32 (rsp_dict, key, volcount);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Could not save volcount");
+ goto out;
+ }
+
+out:
+
+ return ret;
+}
+
+int
+glusterd_get_each_snap_object_status (char **op_errstr, dict_t *rsp_dict,
+ glusterd_snap_t *snap, char *keyprefix)
+{
+ int ret = -1;
+ char key[PATH_MAX] = "";
+ char *temp = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (op_errstr);
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (snap);
+ GF_ASSERT (keyprefix);
+
+ /* TODO : Get all the snap volume info present in snap object,
+ * as of now, There will be only one snapvolinfo per snap object
+ */
+ ret = snprintf (key, sizeof (key), "%s.snapname", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ temp = gf_strdup (snap->snapname);
+ if (temp == NULL) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_dynstr (rsp_dict, key, temp);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Could not save "
+ "snap name");
+ goto out;
+ }
+
+ temp = NULL;
+
+ ret = snprintf (key, sizeof (key), "%s.uuid", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ temp = gf_strdup (uuid_utoa (snap->snap_id));
+ if (temp == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr (rsp_dict, key, temp);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Could not save "
+ "snap UUID");
+ goto out;
+ }
+
+ temp = NULL;
+
+ ret = glusterd_get_single_snap_status (op_errstr, rsp_dict, keyprefix,
+ snap);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_STATUS_FAIL,
+ "Could not get single snap status");
+ goto out;
+ }
+
+ ret = snprintf (key, sizeof (key), "%s.volcount", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_set_int32 (rsp_dict, key, 1);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Could not save volcount");
+ goto out;
+ }
+out:
+ if (ret && temp)
+ GF_FREE (temp);
+
+ return ret;
+}
+
+int
+glusterd_get_snap_status_of_volume (char **op_errstr, dict_t *rsp_dict,
+ char *volname, char *keyprefix) {
+ int ret = -1;
+ glusterd_volinfo_t *snap_volinfo = NULL;
+ glusterd_volinfo_t *temp_volinfo = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char key[PATH_MAX] = "";
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ int i = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ GF_ASSERT (op_errstr);
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (volname);
+ GF_ASSERT (keyprefix);
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_VOL_NOT_FOUND, "Failed to get volinfo of "
+ "volume %s", volname);
+ goto out;
+ }
+
+ cds_list_for_each_entry_safe (snap_volinfo, temp_volinfo,
+ &volinfo->snap_volumes, snapvol_list) {
+ ret = snprintf (key, sizeof (key),
+ "status.snap%d.snapname", i);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc (rsp_dict, key,
+ snap_volinfo->snapshot->snapname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Could not save "
+ "snap name");
+ goto out;
+ }
+
+ i++;
+ }
+
+ ret = dict_set_int32 (rsp_dict, "status.snapcount", i);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to save snapcount");
+ ret = -1;
+ goto out;
+ }
+out:
+ return ret;
+}
+
+int
+glusterd_get_all_snapshot_status (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int32_t i = 0;
+ int ret = -1;
+ char key[PATH_MAX] = "";
+ glusterd_conf_t *priv = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_snap_t *tmp_snap = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+
+ cds_list_for_each_entry_safe (snap, tmp_snap, &priv->snapshots,
+ snap_list) {
+ ret = snprintf (key, sizeof (key),
+ "status.snap%d.snapname", i);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc (rsp_dict, key,
+ snap->snapname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Could not save "
+ "snap name");
+ goto out;
+ }
+
+ i++;
+ }
+
+ ret = dict_set_int32 (rsp_dict, "status.snapcount", i);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Could not save snapcount");
+ goto out;
+ }
+
+ ret = 0;
+out :
+ return ret;
+}
+
+
+int
+glusterd_snapshot_status_commit (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ xlator_t *this = NULL;
+ int ret = -1;
+ glusterd_conf_t *conf = NULL;
+ char *get_buffer = NULL;
+ int32_t cmd = -1;
+ char *snapname = NULL;
+ glusterd_snap_t *snap = NULL;
+ char *volname = NULL;
+
+ this = THIS;
+
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+
+ conf = this->private;
+
+ GF_ASSERT (conf);
+ ret = dict_get_int32 (dict, "sub-cmd", &cmd);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get status cmd type");
+ goto out;
+ }
+
+ ret = dict_set_int32 (rsp_dict, "sub-cmd", cmd);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Could not save status cmd in rsp dictionary");
+ goto out;
+ }
+ switch (cmd) {
+ case GF_SNAP_STATUS_TYPE_ALL:
+ {
+ ret = glusterd_get_all_snapshot_status (dict, op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_STATUS_FAIL, "Unable to "
+ "get snapshot status");
+ goto out;
+ }
+ break;
+ }
+ case GF_SNAP_STATUS_TYPE_SNAP:
+ {
+
+ ret = dict_get_str (dict, "snapname", &snapname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to "
+ "get snap name");
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name (snapname);
+ if (!snap) {
+ ret = gf_asprintf (op_errstr, "Snapshot (%s) "
+ "does not exist", snapname);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLINFO_GET_FAIL, "Unable to "
+ "get snap volinfo");
+ goto out;
+ }
+ ret = glusterd_get_each_snap_object_status (op_errstr,
+ rsp_dict, snap, "status.snap0");
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_STATUS_FAIL, "Unable to "
+ "get status of snap %s", get_buffer);
+ goto out;
+ }
+
+ ret = dict_set_int32 (rsp_dict, "status.snapcount", 1);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Unable to "
+ "set snapcount to 1");
+ goto out;
+ }
+ break;
+ }
+ case GF_SNAP_STATUS_TYPE_VOL:
+ {
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to"
+ " get volume name");
+ goto out;
+ }
+
+ ret = glusterd_get_snap_status_of_volume (op_errstr,
+ rsp_dict, volname, "status.vol0");
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_STATUS_FAIL, "Function :"
+ " glusterd_get_snap_status_of_volume "
+ "failed");
+ goto out;
+ }
+ }
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+glusterd_handle_snap_limit (dict_t *dict, dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ uint64_t effective_max_limit = 0;
+ int64_t volcount = 0;
+ int i = 0;
+ char *volname = NULL;
+ char key[PATH_MAX] = {0, };
+ glusterd_volinfo_t *volinfo = NULL;
+ uint64_t limit = 0;
+ int64_t count = 0;
+ glusterd_snap_t *snap = NULL;
+ glusterd_volinfo_t *tmp_volinfo = NULL;
+ glusterd_volinfo_t *other_volinfo = NULL;
+ uint64_t opt_max_hard = GLUSTERD_SNAPS_MAX_HARD_LIMIT;
+ uint64_t opt_max_soft = GLUSTERD_SNAPS_DEF_SOFT_LIMIT_PERCENT;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (rsp_dict);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_int64 (dict, "volcount", &volcount);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "failed to get the volcount");
+ goto out;
+ }
+
+ for (i = 1; i <= volcount; i++) {
+ snprintf (key, sizeof (key), "volname%d", i);
+ ret = dict_get_str (dict, key, &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "failed to get the "
+ "volname");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_VOL_NOT_FOUND, "volinfo for %s "
+ "not found", volname);
+ goto out;
+ }
+
+ /* config values snap-max-hard-limit and snap-max-soft-limit are
+ * optional and hence we are not erroring out if values are not
+ * present
+ */
+ gd_get_snap_conf_values_if_present (priv->opts, &opt_max_hard,
+ &opt_max_soft);
+
+ /* The minimum of the 2 limits i.e system wide limit and
+ volume wide limit will be considered
+ */
+ if (volinfo->snap_max_hard_limit < opt_max_hard)
+ effective_max_limit = volinfo->snap_max_hard_limit;
+ else
+ effective_max_limit = opt_max_hard;
+
+ limit = (opt_max_soft * effective_max_limit)/100;
+
+ count = volinfo->snap_count - limit;
+ if (count <= 0)
+ goto out;
+
+ tmp_volinfo = cds_list_entry (volinfo->snap_volumes.next,
+ glusterd_volinfo_t, snapvol_list);
+ snap = tmp_volinfo->snapshot;
+ GF_ASSERT (snap);
+
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SOFT_LIMIT_REACHED, "Soft-limit "
+ "(value = %"PRIu64") of volume %s is reached. "
+ "Deleting snapshot %s.", limit, volinfo->volname,
+ snap->snapname);
+
+ LOCK (&snap->lock);
+ {
+ snap->snap_status = GD_SNAP_STATUS_DECOMMISSION;
+ ret = glusterd_store_snap (snap);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_OBJECT_STORE_FAIL, "could "
+ "not store snap object %s",
+ snap->snapname);
+ goto unlock;
+ }
+
+ ret = glusterd_snap_remove (rsp_dict, snap,
+ _gf_true, _gf_true,
+ _gf_false);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_REMOVE_FAIL,
+ "failed to remove snap %s",
+ snap->snapname);
+ }
+ unlock: UNLOCK (&snap->lock);
+ }
+
+out:
+ return ret;
+}
+
+int32_t
+glusterd_snapshot_clone_postvalidate (dict_t *dict, int32_t op_ret,
+ char **op_errstr, dict_t *rsp_dict)
+{
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ int ret = -1;
+ int32_t cleanup = 0;
+ glusterd_snap_t *snap = NULL;
+ glusterd_volinfo_t *snap_vol = NULL;
+ char *clonename = NULL;
+ char *auto_delete = NULL;
+
+ this = THIS;
+
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (rsp_dict);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_str (dict, "clonename", &clonename);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to fetch "
+ "clonename");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (clonename, &snap_vol);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_FOUND, "unable to find clone "
+ "%s volinfo", clonename);
+ goto out;
+ }
+
+ if (snap_vol)
+ snap = snap_vol->snapshot;
+
+ /* Fetch snap object from snap_vol and delete it all in case of *
+ * a failure, or else, just delete the snap object as it is not *
+ * needed in case of a clone *
+ */
+ if (op_ret) {
+ ret = dict_get_int32 (dict, "cleanup", &cleanup);
+ if (!ret && cleanup && snap) {
+ glusterd_snap_remove (rsp_dict, snap,
+ _gf_true, _gf_true,
+ _gf_true);
+ }
+ /* Irrespective of status of cleanup its better
+ * to return from this function. As the functions
+ * following this block is not required to be
+ * executed in case of failure scenario.
+ */
+ ret = 0;
+ goto out;
+ }
+
+ ret = glusterd_snapobject_delete (snap);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_REMOVE_FAIL, "Failed to delete "
+ "snap object %s", snap->snapname);
+ goto out;
+ }
+ snap_vol->snapshot = NULL;
+
+out:
+ return ret;
+}
+
+
+int32_t
+glusterd_snapshot_create_postvalidate (dict_t *dict, int32_t op_ret,
+ char **op_errstr, dict_t *rsp_dict)
+{
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ int ret = -1;
+ int32_t cleanup = 0;
+ glusterd_snap_t *snap = NULL;
+ char *snapname = NULL;
+ char *auto_delete = NULL;
+
+ this = THIS;
+
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (rsp_dict);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ if (op_ret) {
+ ret = dict_get_int32 (dict, "cleanup", &cleanup);
+ if (!ret && cleanup) {
+ ret = glusterd_do_snap_cleanup (dict, op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_CLEANUP_FAIL, "cleanup "
+ "operation failed");
+ goto out;
+ }
+ }
+ /* Irrespective of status of cleanup its better
+ * to return from this function. As the functions
+ * following this block is not required to be
+ * executed in case of failure scenario.
+ */
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "snapname", &snapname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to fetch "
+ "snapname");
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name (snapname);
+ if (!snap) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_NOT_FOUND, "unable to find snap "
+ "%s", snapname);
+ goto out;
+ }
+
+ snap->snap_status = GD_SNAP_STATUS_IN_USE;
+ ret = glusterd_store_snap (snap);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_OBJECT_STORE_FAIL, "Could not store snap"
+ "object %s", snap->snapname);
+ goto out;
+ }
+
+ ret = glusterd_snapshot_update_snaps_post_validate (dict,
+ op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_CREATION_FAIL, "Failed to "
+ "create snapshot");
+ goto out;
+ }
+
+ /* "auto-delete" might not be set by user explicitly,
+ * in that case it's better to consider the default value.
+ * Hence not erroring out if Key is not found.
+ */
+ ret = dict_get_str_boolean (priv->opts,
+ GLUSTERD_STORE_KEY_SNAP_AUTO_DELETE,
+ _gf_false);
+ if ( _gf_true == ret ) {
+ //ignore the errors of autodelete
+ ret = glusterd_handle_snap_limit (dict, rsp_dict);
+ }
+
+ ret = glusterd_snapshot_resume_tier (this, dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_RESUME_TIER_FAIL,
+ "Failed to resume tier in snapshot postvalidate.");
+ }
+
+out:
+ return ret;
+}
+
+int32_t
+glusterd_snapshot (dict_t *dict, char **op_errstr,
+ uint32_t *op_errno, dict_t *rsp_dict)
+{
+
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ int32_t snap_command = 0;
+ char *snap_name = NULL;
+ char temp[PATH_MAX] = "";
+ int ret = -1;
+
+ this = THIS;
+
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (rsp_dict);
+ GF_VALIDATE_OR_GOTO (this->name, op_errno, out);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_int32 (dict, "type", &snap_command);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_COMMAND_NOT_FOUND, "unable to get the type of "
+ "the snapshot command");
+ goto out;
+ }
+
+ switch (snap_command) {
+
+ case (GF_SNAP_OPTION_TYPE_CREATE):
+ ret = glusterd_snapshot_create_commit (dict, op_errstr,
+ op_errno, rsp_dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_CREATION_FAIL, "Failed to "
+ "create snapshot");
+ goto out;
+ }
+ break;
+
+ case (GF_SNAP_OPTION_TYPE_CLONE):
+ ret = glusterd_snapshot_clone_commit (dict, op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_CLONE_FAILED, "Failed to "
+ "clone snapshot");
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_CONFIG:
+ ret = glusterd_snapshot_config_commit (dict, op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_CONFIG_FAIL,
+ "snapshot config failed");
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_DELETE:
+ ret = glusterd_snapshot_remove_commit (dict, op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_REMOVE_FAIL, "Failed to "
+ "delete snapshot");
+ if (*op_errstr) {
+ /* If error string is already set
+ * then goto out */
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "snapname", &snap_name);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get snapname");
+ snap_name = "NA";
+ }
+
+ snprintf (temp, sizeof (temp), "Snapshot %s might "
+ "not be in an usable state.", snap_name);
+
+ *op_errstr = gf_strdup (temp);
+ ret = -1;
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_RESTORE:
+ ret = glusterd_snapshot_restore (dict, op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_RESTORE_FAIL, "Failed to "
+ "restore snapshot");
+ goto out;
+ }
+
+ break;
+ case GF_SNAP_OPTION_TYPE_ACTIVATE:
+ ret = glusterd_snapshot_activate_commit (dict, op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_ACTIVATE_FAIL, "Failed to "
+ "activate snapshot");
+ goto out;
+ }
+
+ break;
+
+ case GF_SNAP_OPTION_TYPE_DEACTIVATE:
+ ret = glusterd_snapshot_deactivate_commit (dict, op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_DEACTIVATE_FAIL, "Failed to "
+ "deactivate snapshot");
+ goto out;
+ }
+
+ break;
+
+ case GF_SNAP_OPTION_TYPE_STATUS:
+ ret = glusterd_snapshot_status_commit (dict, op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_STATUS_FAIL, "Failed to "
+ "show snapshot status");
+ goto out;
+ }
+ break;
+
+
+ default:
+ gf_msg (this->name, GF_LOG_WARNING, EINVAL,
+ GD_MSG_INVALID_ENTRY, "invalid snap command");
+ goto out;
+ break;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int
+glusterd_snapshot_brickop (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+{
+ int ret = -1;
+ int64_t vol_count = 0;
+ int64_t count = 1;
+ char key[1024] = {0,};
+ char *volname = NULL;
+ int32_t snap_command = 0;
+ xlator_t *this = NULL;
+ char *op_type = NULL;
+
+ this = THIS;
+
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (rsp_dict);
+
+ ret = dict_get_int32 (dict, "type", &snap_command);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_COMMAND_NOT_FOUND, "unable to get the type of "
+ "the snapshot command");
+ goto out;
+ }
+
+ switch (snap_command) {
+ case GF_SNAP_OPTION_TYPE_CREATE:
+
+ /* op_type with tell us whether its pre-commit operation
+ * or post-commit
+ */
+ ret = dict_get_str (dict, "operation-type", &op_type);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to fetch "
+ "operation type");
+ goto out;
+ }
+
+ if (strcmp (op_type, "pre") == 0) {
+ /* BRICK OP PHASE for enabling barrier, Enable barrier
+ * if its a pre-commit operation
+ */
+ ret = glusterd_set_barrier_value (dict, "enable");
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to "
+ "set barrier value as enable in dict");
+ goto out;
+ }
+ } else if (strcmp (op_type, "post") == 0) {
+ /* BRICK OP PHASE for disabling barrier, Disable barrier
+ * if its a post-commit operation
+ */
+ ret = glusterd_set_barrier_value (dict, "disable");
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to "
+ "set barrier value as disable in "
+ "dict");
+ goto out;
+ }
+ } else {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "Invalid op_type");
+ goto out;
+ }
+
+ ret = dict_get_int64 (dict, "volcount", &vol_count);
+ if (ret)
+ goto out;
+ while (count <= vol_count) {
+ snprintf (key, 1024, "volname%"PRId64, count);
+ ret = dict_get_str (dict, key, &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to get volname");
+ goto out;
+ }
+ ret = dict_set_str (dict, "volname", volname);
+ if (ret)
+ goto out;
+
+ ret = gd_brick_op_phase (GD_OP_SNAP, NULL, dict,
+ op_errstr);
+ if (ret)
+ goto out;
+ volname = NULL;
+ count++;
+ }
+
+ dict_del (dict, "volname");
+ ret = 0;
+ break;
+ case GF_SNAP_OPTION_TYPE_DELETE:
+ break;
+ default:
+ break;
+ }
+
+out:
+ return ret;
+}
+
+int
+glusterd_snapshot_prevalidate (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict, uint32_t *op_errno)
+{
+ int snap_command = 0;
+ xlator_t *this = NULL;
+ int ret = -1;
+
+ this = THIS;
+
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (rsp_dict);
+ GF_VALIDATE_OR_GOTO (this->name, op_errno, out);
+
+ ret = dict_get_int32 (dict, "type", &snap_command);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_COMMAND_NOT_FOUND, "unable to get the type of "
+ "the snapshot command");
+ goto out;
+ }
+
+ switch (snap_command) {
+ case (GF_SNAP_OPTION_TYPE_CREATE):
+ ret = glusterd_snapshot_create_prevalidate (dict, op_errstr,
+ rsp_dict, op_errno);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_CREATION_FAIL, "Snapshot create "
+ "pre-validation failed");
+ goto out;
+ }
+ break;
+
+ case (GF_SNAP_OPTION_TYPE_CLONE):
+ ret = glusterd_snapshot_clone_prevalidate (dict, op_errstr,
+ rsp_dict, op_errno);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_CLONE_PREVAL_FAILED,
+ "Snapshot clone "
+ "pre-validation failed");
+ goto out;
+ }
+ break;
+
+
+ case (GF_SNAP_OPTION_TYPE_CONFIG):
+ ret = glusterd_snapshot_config_prevalidate (dict, op_errstr,
+ op_errno);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_CONFIG_FAIL, "Snapshot config "
+ "pre-validation failed");
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_RESTORE:
+ ret = glusterd_snapshot_restore_prevalidate (dict, op_errstr,
+ op_errno,
+ rsp_dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_RESTORE_FAIL, "Snapshot restore "
+ "validation failed");
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_ACTIVATE:
+ ret = glusterd_snapshot_activate_deactivate_prevalidate (dict,
+ op_errstr,
+ op_errno,
+ rsp_dict,
+ _gf_true);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_ACTIVATE_FAIL, "Snapshot activate "
+ "validation failed");
+ goto out;
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_DEACTIVATE:
+ ret = glusterd_snapshot_activate_deactivate_prevalidate (dict,
+ op_errstr,
+ op_errno,
+ rsp_dict,
+ _gf_false);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_DEACTIVATE_FAIL,
+ "Snapshot deactivate validation failed");
+ goto out;
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_DELETE:
+ ret = glusterd_snapshot_remove_prevalidate (dict, op_errstr,
+ op_errno, rsp_dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_REMOVE_FAIL, "Snapshot remove "
+ "validation failed");
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_STATUS:
+ ret = glusterd_snapshot_status_prevalidate (dict, op_errstr,
+ op_errno, rsp_dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_STATUS_FAIL, "Snapshot status "
+ "validation failed");
+ goto out;
+ }
+ break;
+
+ default:
+ gf_msg (this->name, GF_LOG_WARNING, EINVAL,
+ GD_MSG_COMMAND_NOT_FOUND, "invalid snap command");
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/* This function is called to remove the trashpath, in cases
+ * when the restore operation is successful and we don't need
+ * the backup, and incases when the restore op is failed before
+ * commit, and we don't need to revert the backup.
+ *
+ * @param volname name of the volume which is being restored
+ *
+ * @return 0 on success or -1 on failure
+ */
+int
+glusterd_remove_trashpath (char *volname)
+{
+ int ret = -1;
+ char delete_path[PATH_MAX] = {0,};
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ struct stat stbuf = {0, };
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+
+ GF_ASSERT (volname);
+
+ snprintf (delete_path, sizeof (delete_path),
+ "%s/"GLUSTERD_TRASH"/vols-%s.deleted", priv->workdir,
+ volname);
+
+ ret = lstat (delete_path, &stbuf);
+ if (ret) {
+ /* If the trash dir does not exist, return *
+ * without failure *
+ */
+ if (errno == ENOENT) {
+ ret = 0;
+ goto out;
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DIR_OP_FAILED, "Failed to lstat "
+ "backup dir (%s)", delete_path);
+ goto out;
+ }
+ }
+
+ /* Delete the backup copy of volume folder */
+ ret = recursive_rmdir (delete_path);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DIR_OP_FAILED, "Failed to remove "
+ "backup dir (%s)", delete_path);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/* This function is called if snapshot restore operation
+ * is successful. It will cleanup the backup files created
+ * during the restore operation.
+ *
+ * @param rsp_dict Response dictionary
+ * @param volinfo volinfo of the volume which is being restored
+ * @param snap snap object
+ *
+ * @return 0 on success or -1 on failure
+ */
+int
+glusterd_snapshot_restore_cleanup (dict_t *rsp_dict,
+ char *volname,
+ glusterd_snap_t *snap)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (volname);
+ GF_ASSERT (snap);
+
+ /* Now delete the snap entry. */
+ ret = glusterd_snap_remove (rsp_dict, snap, _gf_false, _gf_true,
+ _gf_false);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_REMOVE_FAIL, "Failed to delete "
+ "snap %s", snap->snapname);
+ goto out;
+ }
+
+ /* Delete the backup copy of volume folder */
+ ret = glusterd_remove_trashpath(volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DIR_OP_FAILED, "Failed to remove "
+ "backup dir");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/* This function is called when the snapshot restore operation failed
+ * for some reasons. In such case we revert the restore operation.
+ *
+ * @param volinfo volinfo of the origin volume
+ *
+ * @return 0 on success and -1 on failure
+ */
+int
+glusterd_snapshot_revert_partial_restored_vol (glusterd_volinfo_t *volinfo)
+{
+ int ret = 0;
+ char pathname [PATH_MAX] = {0,};
+ char trash_path[PATH_MAX] = {0,};
+ glusterd_volinfo_t *reverted_vol = NULL;
+ glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_volinfo_t *tmp_vol = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (volinfo);
+
+ GLUSTERD_GET_VOLUME_DIR (pathname, volinfo, priv);
+
+ snprintf (trash_path, sizeof (trash_path),
+ "%s/"GLUSTERD_TRASH"/vols-%s.deleted", priv->workdir,
+ volinfo->volname);
+
+ /* Since snapshot restore failed we cannot rely on the volume
+ * data stored under vols folder. Therefore delete the origin
+ * volume's backend folder.*/
+ ret = recursive_rmdir (pathname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DIR_OP_FAILED, "Failed to remove "
+ "%s directory", pathname);
+ goto out;
+ }
+
+ /* Now move the backup copy of the vols to its original
+ * location.*/
+ ret = sys_rename (trash_path, pathname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DIR_OP_FAILED, "Failed to rename folder "
+ "from %s to %s", trash_path, pathname);
+ goto out;
+ }
+
+ /* Retrieve the volume from the store */
+ reverted_vol = glusterd_store_retrieve_volume (volinfo->volname, NULL);
+ if (NULL == reverted_vol) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_OP_FAILED, "Failed to load restored "
+ "%s volume", volinfo->volname);
+ goto out;
+ }
+
+ /* Retrieve the snap_volumes list from the older volinfo */
+ reverted_vol->snap_count = volinfo->snap_count;
+ cds_list_for_each_entry_safe (snap_vol, tmp_vol, &volinfo->snap_volumes,
+ snapvol_list) {
+ cds_list_add_tail (&snap_vol->snapvol_list,
+ &reverted_vol->snap_volumes);
+ }
+
+ /* Since we retrieved the volinfo from store now we don't
+ * want the older volinfo. Therefore delete the older volinfo */
+ glusterd_volinfo_unref (volinfo);
+ ret = 0;
+out:
+ return ret;
+}
+
+/* This function is called when glusterd is started and we need
+ * to revert a failed snapshot restore.
+ *
+ * @param snap snapshot object of the restored snap
+ *
+ * @return 0 on success and -1 on failure
+ */
+int
+glusterd_snapshot_revert_restore_from_snap (glusterd_snap_t *snap)
+{
+ int ret = -1;
+ char volname [PATH_MAX] = {0,};
+ glusterd_volinfo_t *snap_volinfo = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ GF_ASSERT (this);
+ GF_ASSERT (snap);
+
+ /* TODO : As of now there is only one volume in snapshot.
+ * Change this when multiple volume snapshot is introduced
+ */
+ snap_volinfo = cds_list_entry (snap->volumes.next, glusterd_volinfo_t,
+ vol_list);
+
+ strcpy (volname, snap_volinfo->parent_volname);
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_VOL_NOT_FOUND, "Could not get volinfo of "
+ "%s", snap_volinfo->parent_volname);
+ goto out;
+ }
+
+ ret = glusterd_snapshot_revert_partial_restored_vol (volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_RESTORE_REVERT_FAIL,
+ "Failed to revert snapshot "
+ "restore operation for %s volume", volname);
+ goto out;
+ }
+out:
+ return ret;
+}
+
+/* This function is called from post-validation. Based on the op_ret
+ * it will take a decision on whether to revert the operation or
+ * perform cleanup.
+ *
+ * @param dict dictionary object
+ * @param op_ret return value of the restore operation
+ * @param op_errstr error string
+ * @param rsp_dict Response dictionary
+ *
+ * @return 0 on success and -1 on failure
+ */
+int
+glusterd_snapshot_restore_postop (dict_t *dict, int32_t op_ret,
+ char **op_errstr, dict_t *rsp_dict)
+{
+ int ret = -1;
+ char *name = NULL;
+ char *volname = NULL;
+ int cleanup = 0;
+ glusterd_snap_t *snap = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (rsp_dict);
+
+ ret = dict_get_str (dict, "snapname", &name);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "getting the snap "
+ "name failed (volume: %s)", name);
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name (name);
+ if (!snap) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_NOT_FOUND,
+ "Snapshot (%s) does not exist", name);
+ ret = -1;
+ goto out;
+ }
+
+ /* TODO: fix this when multiple volume support will come */
+ ret = dict_get_str (dict, "volname1", &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "failed to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_FOUND,
+ "Volume (%s) does not exist ", volname);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "snapname", &name);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "getting the snap "
+ "name failed (volume: %s)", volinfo->volname);
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name (name);
+ if (!snap) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_SNAP_NOT_FOUND, "snap %s is not found", name);
+ ret = -1;
+ goto out;
+ }
+
+ /* On success perform the cleanup operation */
+ if (0 == op_ret) {
+ ret = glusterd_snapshot_restore_cleanup (rsp_dict,
+ volname,
+ snap);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_CLEANUP_FAIL, "Failed to perform "
+ "snapshot restore cleanup for %s volume",
+ volname);
+ goto out;
+ }
+ } else { /* On failure revert snapshot restore */
+ ret = dict_get_int32 (dict, "cleanup", &cleanup);
+ /* Perform cleanup only when required */
+ if (ret || (0 == cleanup)) {
+ /* Delete the backup copy of volume folder */
+ ret = glusterd_remove_trashpath(volinfo->volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DIR_OP_FAILED,
+ "Failed to remove backup dir");
+ goto out;
+ }
+ ret = 0;
+ goto out;
+ }
+
+ ret = glusterd_snapshot_revert_partial_restored_vol (volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_RESTORE_REVERT_FAIL,
+ "Failed to revert "
+ "restore operation for %s volume", volname);
+ goto out;
+ }
+
+ snap->snap_status = GD_SNAP_STATUS_IN_USE;
+ /* We need to save this in disk */
+ ret = glusterd_store_snap (snap);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_OBJECT_STORE_FAIL,
+ "Could not store snap object for %s snap",
+ snap->snapname);
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glusterd_snapshot_postvalidate (dict_t *dict, int32_t op_ret, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int snap_command = 0;
+ xlator_t *this = NULL;
+ int ret = -1;
+
+ this = THIS;
+
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (rsp_dict);
+
+ ret = dict_get_int32 (dict, "type", &snap_command);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_COMMAND_NOT_FOUND, "unable to get the type of "
+ "the snapshot command");
+ goto out;
+ }
+
+ switch (snap_command) {
+ case GF_SNAP_OPTION_TYPE_CREATE:
+ ret = glusterd_snapshot_create_postvalidate (dict, op_ret,
+ op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_CREATION_FAIL, "Snapshot create "
+ "post-validation failed");
+ goto out;
+ }
+ glusterd_fetchsnap_notify (this);
+ break;
+ case GF_SNAP_OPTION_TYPE_CLONE:
+ ret = glusterd_snapshot_clone_postvalidate (dict, op_ret,
+ op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_CLONE_POSTVAL_FAILED,
+ "Snapshot create "
+ "post-validation failed");
+ goto out;
+ }
+ glusterd_fetchsnap_notify (this);
+ break;
+ case GF_SNAP_OPTION_TYPE_DELETE:
+ if (op_ret) {
+ gf_msg_debug (this->name, 0,
+ "op_ret = %d. Not performing delete "
+ "post_validate", op_ret);
+ ret = 0;
+ goto out;
+ }
+ ret = glusterd_snapshot_update_snaps_post_validate (dict,
+ op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MISSED_SNAP_LIST_STORE_FAIL, "Failed to "
+ "update missed snaps list");
+ goto out;
+ }
+ glusterd_fetchsnap_notify (this);
+ break;
+ case GF_SNAP_OPTION_TYPE_RESTORE:
+ ret = glusterd_snapshot_update_snaps_post_validate (dict,
+ op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_RESTORE_FAIL, "Failed to "
+ "update missed snaps list");
+ goto out;
+ }
+
+ ret = glusterd_snapshot_restore_postop (dict, op_ret,
+ op_errstr, rsp_dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_RESTORE_FAIL, "Failed to "
+ "perform snapshot restore post-op");
+ goto out;
+ }
+ glusterd_fetchsnap_notify (this);
+ break;
+ case GF_SNAP_OPTION_TYPE_ACTIVATE:
+ case GF_SNAP_OPTION_TYPE_DEACTIVATE:
+ glusterd_fetchsnap_notify (this);
+ break;
+ case GF_SNAP_OPTION_TYPE_STATUS:
+ case GF_SNAP_OPTION_TYPE_CONFIG:
+ case GF_SNAP_OPTION_TYPE_INFO:
+ case GF_SNAP_OPTION_TYPE_LIST:
+ /*Nothing to be done. But want to
+ * avoid the default case warning*/
+ ret = 0;
+ break;
+ default:
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_COMMAND_NOT_FOUND, "invalid snap command");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/*
+ Verify availability of lvm commands
+*/
+
+static gf_boolean_t
+glusterd_is_lvm_cmd_available (char *lvm_cmd)
+{
+ int32_t ret = 0;
+ struct stat buf = {0,};
+
+ if (!lvm_cmd)
+ return _gf_false;
+
+ ret = sys_stat (lvm_cmd, &buf);
+ if (ret != 0) {
+ gf_msg (THIS->name, GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED,
+ "stat fails on %s, exiting. (errno = %d (%s))",
+ lvm_cmd, errno, strerror(errno));
+ return _gf_false;
+ }
+
+ if ((!ret) && (!S_ISREG(buf.st_mode))) {
+ gf_msg (THIS->name, GF_LOG_CRITICAL, EINVAL,
+ GD_MSG_COMMAND_NOT_FOUND,
+ "Provided command %s is not a regular file,"
+ "exiting", lvm_cmd);
+ return _gf_false;
+ }
+
+ if ((!ret) && (!(buf.st_mode & S_IXUSR))) {
+ gf_msg (THIS->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_NO_EXEC_PERMS,
+ "Provided command %s has no exec permissions,"
+ "exiting", lvm_cmd);
+ return _gf_false;
+ }
+
+ return _gf_true;
+}
+
+int
+glusterd_handle_snapshot_fn (rpcsvc_request_t *req)
+{
+ int32_t ret = 0;
+ dict_t *dict = NULL;
+ gf_cli_req cli_req = {{0},};
+ glusterd_op_t cli_op = GD_OP_SNAP;
+ int type = 0;
+ glusterd_conf_t *conf = NULL;
+ char *host_uuid = NULL;
+ char err_str[2048] = {0,};
+ xlator_t *this = NULL;
+ uint32_t op_errno = 0;
+
+ GF_ASSERT (req);
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req,
+ (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len > 0) {
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL, "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (err_str, sizeof (err_str), "Unable to decode "
+ "the command");
+ goto out;
+ }
+
+ dict->extra_stdfree = cli_req.dict.dict_val;
+
+ host_uuid = gf_strdup (uuid_utoa(MY_UUID));
+ if (host_uuid == NULL) {
+ snprintf (err_str, sizeof (err_str), "Failed to get "
+ "the uuid of local glusterd");
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_dynstr (dict, "host-uuid", host_uuid);
+ if (ret) {
+ GF_FREE (host_uuid);
+ goto out;
+ }
+
+
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "request dict length is %d",
+ cli_req.dict.dict_len);
+ goto out;
+ }
+
+ if (conf->op_version < GD_OP_VERSION_3_6_0) {
+ snprintf (err_str, sizeof (err_str), "Cluster operating version"
+ " is lesser than the supported version "
+ "for a snapshot");
+ op_errno = EG_OPNOTSUP;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_UNSUPPORTED_VERSION,
+ "%s (%d < %d)", err_str,
+ conf->op_version, GD_OP_VERSION_3_6_0);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "type", &type);
+ if (ret < 0) {
+ snprintf (err_str, sizeof (err_str), "Command type not found");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_COMMAND_NOT_FOUND, "%s", err_str);
+ goto out;
+ }
+
+ if (!glusterd_is_lvm_cmd_available (LVM_CREATE)) {
+ snprintf (err_str, sizeof (err_str), "LVM commands not found,"
+ " snapshot functionality is disabled");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_COMMAND_NOT_FOUND, "%s", err_str);
+ ret = -1;
+ goto out;
+ }
+
+ switch (type) {
+ case GF_SNAP_OPTION_TYPE_CREATE:
+ ret = glusterd_handle_snapshot_create (req, cli_op, dict,
+ err_str, sizeof (err_str));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_CREATION_FAIL,
+ "Snapshot create failed: %s", err_str);
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_CLONE:
+ ret = glusterd_handle_snapshot_clone (req, cli_op, dict,
+ err_str, sizeof (err_str));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_CLONE_FAILED, "Snapshot clone "
+ "failed: %s", err_str);
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_RESTORE:
+ ret = glusterd_handle_snapshot_restore (req, cli_op, dict,
+ err_str, &op_errno,
+ sizeof (err_str));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_RESTORE_FAIL,
+ "Snapshot restore failed: %s", err_str);
+ }
+
+ break;
+ case GF_SNAP_OPTION_TYPE_INFO:
+ ret = glusterd_handle_snapshot_info (req, cli_op, dict,
+ err_str, sizeof (err_str));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_INFO_FAIL,
+ "Snapshot info failed");
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_LIST:
+ ret = glusterd_handle_snapshot_list (req, cli_op, dict,
+ err_str, sizeof (err_str),
+ &op_errno);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_LIST_GET_FAIL,
+ "Snapshot list failed");
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_CONFIG:
+ ret = glusterd_handle_snapshot_config (req, cli_op, dict,
+ err_str, sizeof (err_str));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_CONFIG_FAIL,
+ "snapshot config failed");
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_DELETE:
+ ret = glusterd_handle_snapshot_delete (req, cli_op, dict,
+ err_str, &op_errno,
+ sizeof (err_str));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_REMOVE_FAIL,
+ "Snapshot delete failed: %s", err_str);
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_ACTIVATE:
+ ret = glusterd_mgmt_v3_initiate_snap_phases (req, cli_op,
+ dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_ACTIVATE_FAIL,
+ "Snapshot activate failed: %s", err_str);
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_DEACTIVATE:
+ ret = glusterd_mgmt_v3_initiate_snap_phases (req, cli_op,
+ dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_DEACTIVATE_FAIL,
+ "Snapshot deactivate failed: %s", err_str);
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_STATUS:
+ ret = glusterd_handle_snapshot_status (req, cli_op, dict,
+ err_str,
+ sizeof (err_str));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_STATUS_FAIL,
+ "Snapshot status failed: %s", err_str);
+ }
+ break;
+ default:
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_COMMAND_NOT_FOUND, "Unkown snapshot request "
+ "type (%d)", type);
+ ret = -1; /* Failure */
+ }
+
+out:
+ if (ret) {
+ if (err_str[0] == '\0')
+ snprintf (err_str, sizeof (err_str),
+ "Operation failed");
+
+ if (ret && (op_errno == 0))
+ op_errno = EG_INTRNL;
+
+ ret = glusterd_op_send_cli_response (cli_op, ret, op_errno, req,
+ dict, err_str);
+ }
+
+ return ret;
+}
+
+int
+glusterd_handle_snapshot (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, glusterd_handle_snapshot_fn);
+}
+
+static void
+glusterd_free_snap_op (glusterd_snap_op_t *snap_op)
+{
+ if (snap_op) {
+ if (snap_op->brick_path)
+ GF_FREE (snap_op->brick_path);
+
+ GF_FREE (snap_op);
+ }
+}
+
+static void
+glusterd_free_missed_snapinfo (glusterd_missed_snap_info *missed_snapinfo)
+{
+ glusterd_snap_op_t *snap_opinfo = NULL;
+ glusterd_snap_op_t *tmp = NULL;
+
+ if (missed_snapinfo) {
+ cds_list_for_each_entry_safe (snap_opinfo, tmp,
+ &missed_snapinfo->snap_ops,
+ snap_ops_list) {
+ glusterd_free_snap_op (snap_opinfo);
+ snap_opinfo = NULL;
+ }
+
+ if (missed_snapinfo->node_uuid)
+ GF_FREE (missed_snapinfo->node_uuid);
+
+ if (missed_snapinfo->snap_uuid)
+ GF_FREE (missed_snapinfo->snap_uuid);
+
+ GF_FREE (missed_snapinfo);
+ }
+}
+
+/* Look for duplicates and accordingly update the list */
+int32_t
+glusterd_update_missed_snap_entry (glusterd_missed_snap_info *missed_snapinfo,
+ glusterd_snap_op_t *missed_snap_op)
+{
+ int32_t ret = -1;
+ glusterd_snap_op_t *snap_opinfo = NULL;
+ gf_boolean_t match = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(missed_snapinfo);
+ GF_ASSERT(missed_snap_op);
+
+ cds_list_for_each_entry (snap_opinfo, &missed_snapinfo->snap_ops,
+ snap_ops_list) {
+ /* If the entry is not for the same snap_vol_id
+ * then continue
+ */
+ if (strcmp (snap_opinfo->snap_vol_id,
+ missed_snap_op->snap_vol_id))
+ continue;
+
+ if ((!strcmp (snap_opinfo->brick_path,
+ missed_snap_op->brick_path)) &&
+ (snap_opinfo->op == missed_snap_op->op)) {
+ /* If two entries have conflicting status
+ * GD_MISSED_SNAP_DONE takes precedence
+ */
+ if ((snap_opinfo->status == GD_MISSED_SNAP_PENDING) &&
+ (missed_snap_op->status == GD_MISSED_SNAP_DONE)) {
+ snap_opinfo->status = GD_MISSED_SNAP_DONE;
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_MISSED_SNAP_STATUS_DONE,
+ "Updating missed snap status "
+ "for %s:%s=%s:%d:%s:%d as DONE",
+ missed_snapinfo->node_uuid,
+ missed_snapinfo->snap_uuid,
+ snap_opinfo->snap_vol_id,
+ snap_opinfo->brick_num,
+ snap_opinfo->brick_path,
+ snap_opinfo->op);
+ ret = 0;
+ glusterd_free_snap_op (missed_snap_op);
+ goto out;
+ }
+ match = _gf_true;
+ break;
+ } else if ((snap_opinfo->brick_num ==
+ missed_snap_op->brick_num) &&
+ (snap_opinfo->op == GF_SNAP_OPTION_TYPE_CREATE) &&
+ ((missed_snap_op->op ==
+ GF_SNAP_OPTION_TYPE_DELETE) ||
+ (missed_snap_op->op ==
+ GF_SNAP_OPTION_TYPE_RESTORE))) {
+ /* Optimizing create and delete entries for the same
+ * brick and same node
+ */
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_MISSED_SNAP_STATUS_DONE,
+ "Updating missed snap status "
+ "for %s:%s=%s:%d:%s:%d as DONE",
+ missed_snapinfo->node_uuid,
+ missed_snapinfo->snap_uuid,
+ snap_opinfo->snap_vol_id,
+ snap_opinfo->brick_num,
+ snap_opinfo->brick_path,
+ snap_opinfo->op);
+ snap_opinfo->status = GD_MISSED_SNAP_DONE;
+ ret = 0;
+ glusterd_free_snap_op (missed_snap_op);
+ goto out;
+ }
+ }
+
+ if (match == _gf_true) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_DUP_ENTRY,
+ "Duplicate entry. Not updating");
+ glusterd_free_snap_op (missed_snap_op);
+ } else {
+ cds_list_add_tail (&missed_snap_op->snap_ops_list,
+ &missed_snapinfo->snap_ops);
+ }
+
+ ret = 0;
+out:
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* Add new missed snap entry to the missed_snaps list. */
+int32_t
+glusterd_add_new_entry_to_list (char *missed_info, char *snap_vol_id,
+ int32_t brick_num, char *brick_path,
+ int32_t snap_op, int32_t snap_status)
+{
+ char *buf = NULL;
+ char *save_ptr = NULL;
+ char node_snap_info[PATH_MAX] = "";
+ int32_t ret = -1;
+ glusterd_missed_snap_info *missed_snapinfo = NULL;
+ glusterd_snap_op_t *missed_snap_op = NULL;
+ glusterd_conf_t *priv = NULL;
+ gf_boolean_t match = _gf_false;
+ gf_boolean_t free_missed_snap_info = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(missed_info);
+ GF_ASSERT(snap_vol_id);
+ GF_ASSERT(brick_path);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ /* Create the snap_op object consisting of the *
+ * snap id and the op */
+ ret = glusterd_missed_snap_op_new (&missed_snap_op);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MISSED_SNAP_CREATE_FAIL,
+ "Failed to create new missed snap object.");
+ ret = -1;
+ goto out;
+ }
+
+ missed_snap_op->snap_vol_id = gf_strdup(snap_vol_id);
+ if (!missed_snap_op->snap_vol_id) {
+ ret = -1;
+ goto out;
+ }
+ missed_snap_op->brick_path = gf_strdup(brick_path);
+ if (!missed_snap_op->brick_path) {
+ ret = -1;
+ goto out;
+ }
+ missed_snap_op->brick_num = brick_num;
+ missed_snap_op->op = snap_op;
+ missed_snap_op->status = snap_status;
+
+ /* Look for other entries for the same node and same snap */
+ cds_list_for_each_entry (missed_snapinfo, &priv->missed_snaps_list,
+ missed_snaps) {
+ snprintf (node_snap_info, sizeof(node_snap_info),
+ "%s:%s", missed_snapinfo->node_uuid,
+ missed_snapinfo->snap_uuid);
+ if (!strcmp (node_snap_info, missed_info)) {
+ /* Found missed snapshot info for *
+ * the same node and same snap */
+ match = _gf_true;
+ break;
+ }
+ }
+
+ if (match == _gf_false) {
+ /* First snap op missed for the brick */
+ ret = glusterd_missed_snapinfo_new (&missed_snapinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MISSED_SNAP_CREATE_FAIL,
+ "Failed to create missed snapinfo");
+ goto out;
+ }
+ free_missed_snap_info = _gf_true;
+ buf = strtok_r (missed_info, ":", &save_ptr);
+ if (!buf) {
+ ret = -1;
+ goto out;
+ }
+ missed_snapinfo->node_uuid = gf_strdup(buf);
+ if (!missed_snapinfo->node_uuid) {
+ ret = -1;
+ goto out;
+ }
+
+ buf = strtok_r (NULL, ":", &save_ptr);
+ if (!buf) {
+ ret = -1;
+ goto out;
+ }
+ missed_snapinfo->snap_uuid = gf_strdup(buf);
+ if (!missed_snapinfo->snap_uuid) {
+ ret = -1;
+ goto out;
+ }
+
+ cds_list_add_tail (&missed_snap_op->snap_ops_list,
+ &missed_snapinfo->snap_ops);
+ cds_list_add_tail (&missed_snapinfo->missed_snaps,
+ &priv->missed_snaps_list);
+
+ ret = 0;
+ goto out;
+ } else {
+ ret = glusterd_update_missed_snap_entry (missed_snapinfo,
+ missed_snap_op);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MISSED_SNAP_CREATE_FAIL,
+ "Failed to update existing missed snap entry.");
+ goto out;
+ }
+ }
+
+out:
+ if (ret) {
+ glusterd_free_snap_op (missed_snap_op);
+
+ if (missed_snapinfo &&
+ (free_missed_snap_info == _gf_true))
+ glusterd_free_missed_snapinfo (missed_snapinfo);
+ }
+
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* Add missing snap entries to the in-memory conf->missed_snap_list */
+int32_t
+glusterd_add_missed_snaps_to_list (dict_t *dict, int32_t missed_snap_count)
+{
+ char *buf = NULL;
+ char *tmp = NULL;
+ char *save_ptr = NULL;
+ char *nodeid = NULL;
+ char *snap_uuid = NULL;
+ char *snap_vol_id = NULL;
+ char *brick_path = NULL;
+ char missed_info[PATH_MAX] = "";
+ char name_buf[PATH_MAX] = "";
+ int32_t i = -1;
+ int32_t ret = -1;
+ int32_t brick_num = -1;
+ int32_t snap_op = -1;
+ int32_t snap_status = -1;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ /* We can update the missed_snaps_list without acquiring *
+ * any additional locks as big lock will be held. */
+ for (i = 0; i < missed_snap_count; i++) {
+ snprintf (name_buf, sizeof(name_buf), "missed_snaps_%d",
+ i);
+ ret = dict_get_str (dict, name_buf, &buf);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch %s", name_buf);
+ goto out;
+ }
+
+ gf_msg_debug (this->name, 0, "missed_snap_entry = %s",
+ buf);
+
+ /* Need to make a duplicate string coz the same dictionary *
+ * is resent to the non-originator nodes */
+ tmp = gf_strdup (buf);
+ if (!tmp) {
+ ret = -1;
+ goto out;
+ }
+
+ /* Fetch the node-id, snap-id, brick_num,
+ * brick_path, snap_op and snap status
+ */
+ nodeid = strtok_r (tmp, ":", &save_ptr);
+ snap_uuid = strtok_r (NULL, "=", &save_ptr);
+ snap_vol_id = strtok_r (NULL, ":", &save_ptr);
+ brick_num = atoi(strtok_r (NULL, ":", &save_ptr));
+ brick_path = strtok_r (NULL, ":", &save_ptr);
+ snap_op = atoi(strtok_r (NULL, ":", &save_ptr));
+ snap_status = atoi(strtok_r (NULL, ":", &save_ptr));
+
+ if (!nodeid || !snap_uuid || !brick_path ||
+ !snap_vol_id || brick_num < 1 || snap_op < 1 ||
+ snap_status < 1) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_INVALID_MISSED_SNAP_ENTRY,
+ "Invalid missed_snap_entry");
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (missed_info, sizeof(missed_info), "%s:%s",
+ nodeid, snap_uuid);
+
+ ret = glusterd_add_new_entry_to_list (missed_info,
+ snap_vol_id,
+ brick_num,
+ brick_path,
+ snap_op,
+ snap_status);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MISSED_SNAP_LIST_STORE_FAIL,
+ "Failed to store missed snaps_list");
+ goto out;
+ }
+
+ GF_FREE (tmp);
+ tmp = NULL;
+ }
+
+ ret = 0;
+out:
+ if (tmp)
+ GF_FREE (tmp);
+
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* This function will restore origin volume to it's snap.
+ * The restore operation will simply replace the Gluster origin
+ * volume with the snap volume.
+ * TODO: Multi-volume delete to be done.
+ * Cleanup in case of restore failure is pending.
+ *
+ * @param orig_vol volinfo of origin volume
+ * @param snap_vol volinfo of snapshot volume
+ *
+ * @return 0 on success and negative value on error
+ */
+int
+gd_restore_snap_volume (dict_t *dict, dict_t *rsp_dict,
+ glusterd_volinfo_t *orig_vol,
+ glusterd_volinfo_t *snap_vol,
+ int32_t volcount)
+{
+ int ret = -1;
+ glusterd_volinfo_t *new_volinfo = NULL;
+ glusterd_snap_t *snap = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ glusterd_volinfo_t *temp_volinfo = NULL;
+ glusterd_volinfo_t *voliter = NULL;
+ gf_boolean_t conf_present = _gf_false;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (rsp_dict);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ GF_VALIDATE_OR_GOTO (this->name, orig_vol, out);
+ GF_VALIDATE_OR_GOTO (this->name, snap_vol, out);
+ snap = snap_vol->snapshot;
+ GF_VALIDATE_OR_GOTO (this->name, snap, out);
+
+ /* Set the status to under restore so that if the
+ * the node goes down during restore and comes back
+ * the state of the volume can be reverted correctly
+ */
+ snap->snap_status = GD_SNAP_STATUS_UNDER_RESTORE;
+
+ /* We need to save this in disk so that if node goes
+ * down the status is in updated state.
+ */
+ ret = glusterd_store_snap (snap);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_FILE_OP_FAILED,
+ "Could not store snap "
+ "object for %s snap of %s volume", snap_vol->volname,
+ snap_vol->parent_volname);
+ goto out;
+ }
+
+ /* Snap volume must be stoped before performing the
+ * restore operation.
+ */
+ ret = glusterd_stop_volume (snap_vol);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_STOP_FAILED,
+ "Failed to stop "
+ "snap volume");
+ goto out;
+ }
+
+ /* Create a new volinfo for the restored volume */
+ ret = glusterd_volinfo_dup (snap_vol, &new_volinfo, _gf_true);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_OP_FAILED, "Failed to create volinfo");
+ goto out;
+ }
+
+ /* Following entries need to be derived from origin volume. */
+ strcpy (new_volinfo->volname, orig_vol->volname);
+ gf_uuid_copy (new_volinfo->volume_id, orig_vol->volume_id);
+ new_volinfo->snap_count = orig_vol->snap_count;
+ gf_uuid_copy (new_volinfo->restored_from_snap,
+ snap_vol->snapshot->snap_id);
+
+ /* Use the same version as the original version */
+ new_volinfo->version = orig_vol->version;
+
+ /* Copy the snap vol info to the new_volinfo.*/
+ ret = glusterd_snap_volinfo_restore (dict, rsp_dict, new_volinfo,
+ snap_vol, volcount);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_RESTORE_FAIL, "Failed to restore snap");
+ goto out;
+ }
+
+ /* In case a new node is added to the peer, after a snapshot was
+ * taken, the geo-rep files are not synced to that node. This
+ * leads to the failure of snapshot restore. Hence, ignoring the
+ * missing geo-rep files in the new node, and proceeding with
+ * snapshot restore. Once the restore is successful, the missing
+ * geo-rep files can be generated with "gluster volume geo-rep
+ * <master-vol> <slave-vol> create push-pem force"
+ */
+ ret = glusterd_restore_geo_rep_files (snap_vol);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_RESTORE_FAIL,
+ "Failed to restore "
+ "geo-rep files for snap %s",
+ snap_vol->snapshot->snapname);
+ }
+
+ ret = glusterd_restore_nfs_ganesha_file (orig_vol, snap);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_RESTORE_FAIL,
+ "Failed to restore "
+ "nfs-ganesha export file for snap %s",
+ snap_vol->snapshot->snapname);
+ goto out;
+ }
+
+ /* Need not save cksum, as we will copy cksum file in *
+ * this function *
+ */
+ ret = glusterd_copy_quota_files (snap_vol, orig_vol, &conf_present);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_RESTORE_FAIL, "Failed to restore "
+ "quota files for snap %s",
+ snap_vol->snapshot->snapname);
+ goto out;
+ }
+
+ /* New volinfo always shows the status as created. Therefore
+ * set the status to the original volume's status. */
+ glusterd_set_volume_status (new_volinfo, orig_vol->status);
+
+ cds_list_add_tail (&new_volinfo->vol_list, &conf->volumes);
+
+ ret = glusterd_store_volinfo (new_volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_OP_FAILED, "Failed to store volinfo");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret) {
+ /* In case of any failure we should free new_volinfo. Doing
+ * this will also remove the entry we added in conf->volumes
+ * if it was added there.
+ */
+ if (new_volinfo)
+ (void)glusterd_volinfo_delete (new_volinfo);
+ } else {
+ cds_list_for_each_entry_safe (voliter, temp_volinfo,
+ &orig_vol->snap_volumes,
+ snapvol_list) {
+ cds_list_add_tail (&voliter->snapvol_list,
+ &new_volinfo->snap_volumes);
+ }
+ }
+
+ return ret;
+}
+
+
+
+int
+glusterd_snapshot_get_volnames_uuids (dict_t *dict,
+ char *volname,
+ gf_getsnap_name_uuid_rsp *snap_info_rsp)
+{
+ int ret = -1;
+ int snapcount = 0;
+ char key[PATH_MAX] = {0,};
+ glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_volinfo_t *tmp_vol = NULL;
+ xlator_t *this = NULL;
+ int op_errno = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (volname);
+ GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, dict, out,
+ op_errno, EINVAL);
+ GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, volname, out,
+ op_errno, EINVAL);
+ GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, snap_info_rsp, out,
+ op_errno, EINVAL);
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_VOL_NOT_FOUND,
+ "Failed to get volinfo of volume %s",
+ volname);
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ cds_list_for_each_entry_safe (snap_vol, tmp_vol, &volinfo->snap_volumes,
+ snapvol_list) {
+
+ if (GLUSTERD_STATUS_STARTED != snap_vol->status)
+ continue;
+
+ snapcount++;
+
+ /* Set Snap Name */
+ snprintf (key, sizeof (key), "snapname.%d", snapcount);
+ ret = dict_set_dynstr_with_alloc (dict, key,
+ snap_vol->snapshot->snapname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set "
+ "snap name in dictionary");
+ goto out;
+ }
+
+ /* Set Snap ID */
+ snprintf (key, sizeof (key), "snap-id.%d", snapcount);
+ ret = dict_set_dynstr_with_alloc (dict, key,
+ uuid_utoa(snap_vol->snapshot->snap_id));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set "
+ "snap id in dictionary");
+ goto out;
+ }
+
+ /* Snap Volname which is used to activate the snap vol */
+ snprintf (key, sizeof (key), "snap-volname.%d", snapcount);
+ ret = dict_set_dynstr_with_alloc (dict, key, snap_vol->volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set "
+ "snap id in dictionary");
+ goto out;
+ }
+ }
+
+ ret = dict_set_int32 (dict, "snap-count", snapcount);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set snapcount");
+ op_errno = -ret;
+ goto out;
+ }
+
+ ret = dict_allocate_and_serialize (dict, &snap_info_rsp->dict.dict_val,
+ &snap_info_rsp->dict.dict_len);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ snap_info_rsp->op_ret = ret;
+ snap_info_rsp->op_errno = op_errno;
+ snap_info_rsp->op_errstr = "";
+
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-statedump.c b/xlators/mgmt/glusterd/src/glusterd-statedump.c
new file mode 100644
index 00000000000..a01a6b8bfed
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-statedump.c
@@ -0,0 +1,247 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "statedump.h"
+#include "glusterd.h"
+#include "glusterd-shd-svc.h"
+#include "glusterd-quotad-svc.h"
+#include "glusterd-nfs-svc.h"
+#include "glusterd-locks.h"
+#include "glusterd-messages.h"
+
+static void
+glusterd_dump_peer (glusterd_peerinfo_t *peerinfo, char *input_key, int index,
+ gf_boolean_t xpeers)
+{
+ char subkey[50] = {0,};
+ char key[GF_DUMP_MAX_BUF_LEN] = {0,};
+
+ strncpy (key, input_key, (GF_DUMP_MAX_BUF_LEN - 1));
+
+ snprintf (subkey, sizeof (subkey), "%s%d", key, index);
+
+ gf_proc_dump_build_key (key, subkey, "uuid");
+ gf_proc_dump_write (key, "%s",
+ uuid_utoa (peerinfo->uuid));
+
+ gf_proc_dump_build_key (key, subkey, "hostname");
+ gf_proc_dump_write (key, "%s", peerinfo->hostname);
+
+ gf_proc_dump_build_key (key, subkey, "port");
+ gf_proc_dump_write (key, "%d", peerinfo->port);
+
+ gf_proc_dump_build_key (key, subkey, "state");
+ gf_proc_dump_write (key, "%d", peerinfo->state.state);
+
+ gf_proc_dump_build_key (key, subkey, "quorum-action");
+ gf_proc_dump_write (key, "%d", peerinfo->quorum_action);
+
+ gf_proc_dump_build_key (key, subkey, "quorum-contrib");
+ gf_proc_dump_write (key, "%d",
+ peerinfo->quorum_contrib);
+
+ gf_proc_dump_build_key (key, subkey, "detaching");
+ gf_proc_dump_write (key, "%d", peerinfo->detaching);
+
+ gf_proc_dump_build_key (key, subkey, "locked");
+ gf_proc_dump_write (key, "%d", peerinfo->locked);
+
+}
+
+
+static void
+glusterd_dump_peer_rpcstat (glusterd_peerinfo_t *peerinfo, char *input_key,
+ int index)
+{
+ rpc_clnt_connection_t *conn = NULL;
+ int ret = -1;
+ rpc_clnt_t *rpc = NULL;
+ char rpcsvc_peername[RPCSVC_PEER_STRLEN] = {0,};
+ char subkey[50] = {0,};
+ char key[GF_DUMP_MAX_BUF_LEN] = {0,};
+
+ strncpy (key, input_key, (GF_DUMP_MAX_BUF_LEN - 1));
+
+ /* Dump the rpc connection statistics */
+ rpc = peerinfo->rpc;
+ if (rpc) {
+ conn = &rpc->conn;
+ snprintf (subkey, sizeof (subkey), "%s%d", key, index);
+ ret = rpcsvc_transport_peername (conn->trans,
+ (char *)&rpcsvc_peername,
+ sizeof (rpcsvc_peername));
+ if (!ret) {
+ gf_proc_dump_build_key (key, subkey, "rpc.peername");
+ gf_proc_dump_write (key, "%s", rpcsvc_peername);
+ }
+ gf_proc_dump_build_key (key, subkey, "rpc.connected");
+ gf_proc_dump_write (key, "%d", conn->connected);
+
+ gf_proc_dump_build_key (key, subkey, "rpc.total-bytes-read");
+ gf_proc_dump_write (key, "%"PRIu64,
+ conn->trans->total_bytes_read);
+
+ gf_proc_dump_build_key (key, subkey, "rpc.total-bytes-written");
+ gf_proc_dump_write (key, "%"PRIu64,
+ conn->trans->total_bytes_write);
+
+ gf_proc_dump_build_key (key, subkey, "rpc.ping_msgs_sent");
+ gf_proc_dump_write (key, "%"PRIu64, conn->pingcnt);
+
+ gf_proc_dump_build_key (key, subkey, "rpc.msgs_sent");
+ gf_proc_dump_write (key, "%"PRIu64, conn->msgcnt);
+ }
+
+}
+
+
+static void
+glusterd_dump_client_details (glusterd_conf_t *conf)
+{
+ rpc_transport_t *xprt = NULL;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0,};
+ char subkey[50] = {0,};
+ int index = 1;
+
+ pthread_mutex_lock (&conf->xprt_lock);
+ {
+ list_for_each_entry (xprt, &conf->xprt_list, list) {
+ snprintf (subkey, sizeof (subkey), "glusterd.client%d",
+ index);
+
+ gf_proc_dump_build_key (key, subkey, "identifier");
+ gf_proc_dump_write (key, "%s",
+ xprt->peerinfo.identifier);
+
+ gf_proc_dump_build_key (key, subkey, "volname");
+ gf_proc_dump_write (key, "%s",
+ xprt->peerinfo.volname);
+
+ gf_proc_dump_build_key (key, subkey, "max-op-version");
+ gf_proc_dump_write (key, "%u",
+ xprt->peerinfo.max_op_version);
+
+ gf_proc_dump_build_key (key, subkey, "min-op-version");
+ gf_proc_dump_write (key, "%u",
+ xprt->peerinfo.min_op_version);
+ index++;
+ }
+ }
+ pthread_mutex_unlock (&conf->xprt_lock);
+}
+
+
+/* The following function is just for dumping mgmt_v3_lock dictionary, any other
+ * dict passed to this API will not work */
+
+static void
+glusterd_dict_mgmt_v3_lock_statedump (dict_t *dict)
+{
+ int ret = 0;
+ int dumplen = 0;
+ data_pair_t *trav = NULL;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0,};
+ char dump[64*1024] = {0,};
+
+ if (!dict) {
+ gf_msg_callingfn ("glusterd", GF_LOG_WARNING, EINVAL,
+ GD_MSG_DICT_EMPTY,
+ "dict NULL");
+ goto out;
+ }
+ for (trav = dict->members_list; trav; trav = trav->next) {
+ if (strstr (trav->key, "debug.last-success-bt") != NULL) {
+ ret = snprintf (&dump[dumplen], sizeof(dump) - dumplen,
+ "\n\t%s:%s", trav->key,
+ trav->value->data);
+ } else {
+ ret = snprintf (&dump[dumplen], sizeof(dump) - dumplen,
+ "\n\t%s:%s", trav->key,
+ uuid_utoa (((glusterd_mgmt_v3_lock_obj *)
+ (trav->value->data))->lock_owner));
+ }
+ if ((ret == -1) || !ret)
+ return;
+ dumplen += ret;
+ }
+
+ if (dumplen) {
+ gf_proc_dump_build_key (key, "glusterd", "mgmt_v3_lock");
+ gf_proc_dump_write (key, "%s", dump);
+ }
+
+out:
+ return;
+}
+
+
+int
+glusterd_dump_priv (xlator_t *this)
+{
+ int index = 1;
+ glusterd_conf_t *priv = NULL;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0,};
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ GF_VALIDATE_OR_GOTO ("glusterd", this, out);
+
+ priv = this->private;
+ if (!priv)
+ return 0;
+
+ gf_proc_dump_build_key (key, "xlator.glusterd", "priv");
+ gf_proc_dump_add_section (key);
+
+ pthread_mutex_lock (&priv->mutex);
+ {
+ gf_proc_dump_build_key (key, "glusterd", "my-uuid");
+ gf_proc_dump_write (key, "%s", uuid_utoa (priv->uuid));
+
+ gf_proc_dump_build_key (key, "glusterd", "working-directory");
+ gf_proc_dump_write (key, "%s", priv->workdir);
+
+ gf_proc_dump_build_key (key, "glusterd", "max-op-version");
+ gf_proc_dump_write (key, "%d", GD_OP_VERSION_MAX);
+
+ gf_proc_dump_build_key (key, "glusterd", "min-op-version");
+ gf_proc_dump_write (key, "%d", GD_OP_VERSION_MIN);
+
+ gf_proc_dump_build_key (key, "glusterd", "current-op-version");
+ gf_proc_dump_write (key, "%d", priv->op_version);
+
+ gf_proc_dump_build_key (key, "glusterd", "ping-timeout");
+ gf_proc_dump_write (key, "%d", priv->ping_timeout);
+
+ gf_proc_dump_build_key (key, "glusterd", "shd.online");
+ gf_proc_dump_write (key, "%d", priv->shd_svc.online);
+
+ gf_proc_dump_build_key (key, "glusterd", "nfs.online");
+ gf_proc_dump_write (key, "%d", priv->nfs_svc.online);
+
+ gf_proc_dump_build_key (key, "glusterd", "quotad.online");
+ gf_proc_dump_write (key, "%d", priv->quotad_svc.online);
+
+ gf_proc_dump_build_key (key, "glusterd", "bitd.online");
+ gf_proc_dump_write (key, "%d", priv->bitd_svc.online);
+
+ gf_proc_dump_build_key (key, "glusterd", "scrub.online");
+ gf_proc_dump_write (key, "%d", priv->scrub_svc.online);
+
+ GLUSTERD_DUMP_PEERS (&priv->peers, uuid_list, _gf_false);
+ glusterd_dump_client_details (priv);
+ glusterd_dict_mgmt_v3_lock_statedump(priv->mgmt_v3_lock);
+ dict_dump_to_statedump (priv->opts, "options", "glusterd");
+ }
+ pthread_mutex_unlock (&priv->mutex);
+
+out:
+ return 0;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-statedump.h b/xlators/mgmt/glusterd/src/glusterd-statedump.h
new file mode 100644
index 00000000000..3ac8659f293
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-statedump.h
@@ -0,0 +1,18 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERD_STATEDUMP_H_
+#define _GLUSTERD_STATEDUMP_H_
+
+#include "xlator.h"
+
+int
+glusterd_dump_priv (xlator_t *this);
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
new file mode 100644
index 00000000000..deaa0892afe
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-store.c
@@ -0,0 +1,4657 @@
+/*
+ Copyright (c) 2007-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "glusterd-op-sm.h"
+#include <inttypes.h>
+
+
+#include "globals.h"
+#include "glusterfs.h"
+#include "compat.h"
+#include "dict.h"
+#include "protocol-common.h"
+#include "xlator.h"
+#include "logging.h"
+#include "timer.h"
+#include "syscall.h"
+#include "defaults.h"
+#include "compat.h"
+#include "compat-errno.h"
+#include "statedump.h"
+#include "glusterd-mem-types.h"
+#include "glusterd.h"
+#include "glusterd-sm.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-utils.h"
+#include "glusterd-hooks.h"
+#include "store.h"
+#include "glusterd-store.h"
+#include "glusterd-snapshot-utils.h"
+#include "glusterd-messages.h"
+
+#include "rpc-clnt.h"
+#include "common-utils.h"
+#include "quota-common-utils.h"
+
+#include <sys/resource.h>
+#include <inttypes.h>
+#include <dirent.h>
+
+#if defined(GF_LINUX_HOST_OS)
+#include <mntent.h>
+#else
+#include "mntent_compat.h"
+#endif
+
+void
+glusterd_replace_slash_with_hyphen (char *str)
+{
+ char *ptr = NULL;
+
+ ptr = strchr (str, '/');
+
+ while (ptr) {
+ *ptr = '-';
+ ptr = strchr (str, '/');
+ }
+}
+
+int32_t
+glusterd_store_create_brick_dir (glusterd_volinfo_t *volinfo)
+{
+ int32_t ret = -1;
+ char brickdirpath[PATH_MAX] = {0,};
+ glusterd_conf_t *priv = NULL;
+
+ GF_ASSERT (volinfo);
+
+ priv = THIS->private;
+ GF_ASSERT (priv);
+
+ GLUSTERD_GET_BRICK_DIR (brickdirpath, volinfo, priv);
+ ret = gf_store_mkdir (brickdirpath);
+
+ return ret;
+}
+
+static void
+glusterd_store_key_vol_brick_set (glusterd_brickinfo_t *brickinfo,
+ char *key_vol_brick, size_t len)
+{
+ GF_ASSERT (brickinfo);
+ GF_ASSERT (key_vol_brick);
+ GF_ASSERT (len >= PATH_MAX);
+
+ snprintf (key_vol_brick, len, "%s", brickinfo->path);
+ glusterd_replace_slash_with_hyphen (key_vol_brick);
+}
+
+static void
+glusterd_store_brickinfofname_set (glusterd_brickinfo_t *brickinfo,
+ char *brickfname, size_t len)
+{
+ char key_vol_brick[PATH_MAX] = {0};
+
+ GF_ASSERT (brickfname);
+ GF_ASSERT (brickinfo);
+ GF_ASSERT (len >= PATH_MAX);
+
+ glusterd_store_key_vol_brick_set (brickinfo, key_vol_brick,
+ sizeof (key_vol_brick));
+ snprintf (brickfname, len, "%s:%s", brickinfo->hostname, key_vol_brick);
+}
+
+static void
+glusterd_store_brickinfopath_set (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ char *brickpath, size_t len)
+{
+ char brickfname[PATH_MAX] = {0};
+ char brickdirpath[PATH_MAX] = {0,};
+ glusterd_conf_t *priv = NULL;
+
+ GF_ASSERT (brickpath);
+ GF_ASSERT (brickinfo);
+ GF_ASSERT (len >= PATH_MAX);
+
+ priv = THIS->private;
+ GF_ASSERT (priv);
+
+ GLUSTERD_GET_BRICK_DIR (brickdirpath, volinfo, priv);
+ glusterd_store_brickinfofname_set (brickinfo, brickfname,
+ sizeof (brickfname));
+ snprintf (brickpath, len, "%s/%s", brickdirpath, brickfname);
+}
+
+static void
+glusterd_store_snapd_path_set (glusterd_volinfo_t *volinfo,
+ char *snapd_path, size_t len)
+{
+ char volpath[PATH_MAX] = {0, };
+ glusterd_conf_t *priv = NULL;
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (len >= PATH_MAX);
+
+ priv = THIS->private;
+ GF_ASSERT (priv);
+
+ GLUSTERD_GET_VOLUME_DIR (volpath, volinfo, priv);
+
+ snprintf (snapd_path, len, "%s/snapd.info", volpath);
+}
+
+gf_boolean_t
+glusterd_store_is_valid_brickpath (char *volname, char *brick)
+{
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ int32_t ret = 0;
+ size_t volname_len = strlen (volname);
+ xlator_t *this = NULL;
+ int bpath_len = 0;
+ const char delim[2] = "/";
+ char *sub_dir = NULL;
+ char *saveptr = NULL;
+ char *brickpath_ptr = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = glusterd_brickinfo_new_from_brick (brick, &brickinfo, _gf_false,
+ NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_BRICK_CREATION_FAIL, "Failed to create brick "
+ "info for brick %s", brick);
+ ret = 0;
+ goto out;
+ }
+ ret = glusterd_volinfo_new (&volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_VOLFILE_CREATE_FAIL, "Failed to create volinfo");
+ ret = 0;
+ goto out;
+ }
+ if (volname_len >= sizeof (volinfo->volname)) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_NAME_TOO_LONG, "volume name too long");
+ ret = 0;
+ goto out;
+ }
+ memcpy (volinfo->volname, volname, volname_len+1);
+
+ /* Check whether brickpath is less than PATH_MAX */
+ ret = 1;
+ bpath_len = strlen (brickinfo->path);
+
+ if (brickinfo->path[bpath_len - 1] != '/') {
+ if (strlen (brickinfo->path) >= PATH_MAX) {
+ ret = 0;
+ goto out;
+ }
+ } else {
+ /* Path has a trailing "/" which should not be considered in
+ * length check validation
+ */
+ if (strlen (brickinfo->path) >= PATH_MAX + 1) {
+ ret = 0;
+ goto out;
+ }
+ }
+
+ /* The following validation checks whether each sub directories in the
+ * brick path meets the POSIX max length validation
+ */
+
+ brickpath_ptr = brickinfo->path;
+ sub_dir = strtok_r (brickpath_ptr, delim, &saveptr);
+
+ while (sub_dir != NULL) {
+ if (strlen(sub_dir) >= _POSIX_PATH_MAX) {
+ ret = 0;
+ goto out;
+ }
+ sub_dir = strtok_r (NULL, delim, &saveptr);
+ }
+
+out:
+ if (brickinfo)
+ glusterd_brickinfo_delete (brickinfo);
+ if (volinfo)
+ glusterd_volinfo_unref (volinfo);
+
+ return ret;
+}
+
+int32_t
+glusterd_store_volinfo_brick_fname_write (int vol_fd,
+ glusterd_brickinfo_t *brickinfo,
+ int32_t brick_count)
+{
+ char key[PATH_MAX] = {0,};
+ char brickfname[PATH_MAX] = {0,};
+ int32_t ret = -1;
+
+ snprintf (key, sizeof (key), "%s-%d", GLUSTERD_STORE_KEY_VOL_BRICK,
+ brick_count);
+ glusterd_store_brickinfofname_set (brickinfo, brickfname,
+ sizeof (brickfname));
+ ret = gf_store_save_value (vol_fd, key, brickfname);
+ if (ret)
+ goto out;
+
+out:
+ return ret;
+}
+
+int32_t
+glusterd_store_create_brick_shandle_on_absence (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo)
+{
+ char brickpath[PATH_MAX] = {0,};
+ int32_t ret = 0;
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (brickinfo);
+
+ glusterd_store_brickinfopath_set (volinfo, brickinfo, brickpath,
+ sizeof (brickpath));
+ ret = gf_store_handle_create_on_absence (&brickinfo->shandle,
+ brickpath);
+ return ret;
+}
+
+int32_t
+glusterd_store_create_snapd_shandle_on_absence (glusterd_volinfo_t *volinfo)
+{
+ char snapd_path[PATH_MAX] = {0,};
+ int32_t ret = 0;
+
+ GF_ASSERT (volinfo);
+
+ glusterd_store_snapd_path_set (volinfo, snapd_path,
+ sizeof (snapd_path));
+ ret = gf_store_handle_create_on_absence (&volinfo->snapd.handle,
+ snapd_path);
+ return ret;
+}
+
+/* Store the bricks snapshot details only if required
+ *
+ * The snapshot details will be stored only if the cluster op-version is
+ * greater than or equal to 4
+ */
+int
+gd_store_brick_snap_details_write (int fd, glusterd_brickinfo_t *brickinfo)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ char value[256] = {0,};
+
+ this = THIS;
+ GF_ASSERT (this != NULL);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, (conf != NULL), out);
+
+ GF_VALIDATE_OR_GOTO (this->name, (fd > 0), out);
+ GF_VALIDATE_OR_GOTO (this->name, (brickinfo != NULL), out);
+
+ if (conf->op_version < GD_OP_VERSION_3_6_0) {
+ ret = 0;
+ goto out;
+ }
+
+ if (strlen(brickinfo->device_path) > 0) {
+ snprintf (value, sizeof(value), "%s", brickinfo->device_path);
+ ret = gf_store_save_value (fd,
+ GLUSTERD_STORE_KEY_BRICK_DEVICE_PATH, value);
+ if (ret)
+ goto out;
+ }
+
+ if (strlen(brickinfo->mount_dir) > 0) {
+ memset (value, 0, sizeof (value));
+ snprintf (value, sizeof(value), "%s", brickinfo->mount_dir);
+ ret = gf_store_save_value (fd,
+ GLUSTERD_STORE_KEY_BRICK_MOUNT_DIR, value);
+ if (ret)
+ goto out;
+ }
+
+ if (strlen (brickinfo->fstype) > 0) {
+ snprintf (value, sizeof (value), "%s", brickinfo->fstype);
+ ret = gf_store_save_value (fd,
+ GLUSTERD_STORE_KEY_BRICK_FSTYPE, value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_FS_LABEL_UPDATE_FAIL, "Failed to save "
+ "brick fs type of brick %s", brickinfo->path);
+ goto out;
+ }
+ }
+
+ if (strlen (brickinfo->mnt_opts) > 0) {
+ snprintf (value, sizeof (value), "%s", brickinfo->mnt_opts);
+ ret = gf_store_save_value (fd,
+ GLUSTERD_STORE_KEY_BRICK_MNTOPTS, value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRK_MOUNTOPTS_FAIL, "Failed to save "
+ "brick mnt opts of brick %s", brickinfo->path);
+ goto out;
+ }
+ }
+
+ memset (value, 0, sizeof (value));
+ snprintf (value, sizeof(value), "%d", brickinfo->snap_status);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS,
+ value);
+
+out:
+ return ret;
+}
+
+int32_t
+glusterd_store_brickinfo_write (int fd, glusterd_brickinfo_t *brickinfo)
+{
+ char value[256] = {0,};
+ int32_t ret = 0;
+
+ GF_ASSERT (brickinfo);
+ GF_ASSERT (fd > 0);
+
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_HOSTNAME,
+ brickinfo->hostname);
+ if (ret)
+ goto out;
+
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_PATH,
+ brickinfo->path);
+ if (ret)
+ goto out;
+
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_REAL_PATH,
+ brickinfo->path);
+ if (ret)
+ goto out;
+
+ snprintf (value, sizeof(value), "%d", brickinfo->port);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_PORT, value);
+
+ snprintf (value, sizeof(value), "%d", brickinfo->rdma_port);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_RDMA_PORT,
+ value);
+
+ snprintf (value, sizeof(value), "%d", brickinfo->decommissioned);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED,
+ value);
+ if (ret)
+ goto out;
+
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_ID,
+ brickinfo->brick_id);
+ if (ret)
+ goto out;
+
+ ret = gd_store_brick_snap_details_write (fd, brickinfo);
+ if (ret)
+ goto out;
+
+ if (!brickinfo->vg[0])
+ goto out;
+
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_VGNAME,
+ brickinfo->vg);
+out:
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_store_snapd_write (int fd, glusterd_volinfo_t *volinfo)
+{
+ char value[256] = {0,};
+ int32_t ret = 0;
+ xlator_t *this = NULL;
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (fd > 0);
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ snprintf (value, sizeof(value), "%d", volinfo->snapd.port);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_SNAPD_PORT, value);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAPD_PORT_STORE_FAIL,
+ "failed to store the snapd "
+ "port of volume %s", volinfo->volname);
+
+
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_store_perform_brick_store (glusterd_brickinfo_t *brickinfo)
+{
+ int fd = -1;
+ int32_t ret = -1;
+ GF_ASSERT (brickinfo);
+
+ fd = gf_store_mkstemp (brickinfo->shandle);
+ if (fd <= 0) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_store_brickinfo_write (fd, brickinfo);
+ if (ret)
+ goto out;
+
+out:
+ if (ret && (fd > 0))
+ gf_store_unlink_tmppath (brickinfo->shandle);
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_store_perform_snapd_store (glusterd_volinfo_t *volinfo)
+{
+ int fd = -1;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ GF_ASSERT (volinfo);
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ fd = gf_store_mkstemp (volinfo->snapd.handle);
+ if (fd <= 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED, "failed to create the "
+ "temporary file for the snapd store handle of volume "
+ "%s", volinfo->volname);
+ goto out;
+ }
+
+ ret = glusterd_store_snapd_write (fd, volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAPD_PORT_STORE_FAIL,
+ "failed to write snapd port "
+ "info to store handle (volume: %s", volinfo->volname);
+ goto out;
+ }
+
+ ret = gf_store_rename_tmppath (volinfo->snapd.handle);
+
+out:
+ if (ret && (fd > 0))
+ gf_store_unlink_tmppath (volinfo->snapd.handle);
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_store_brickinfo (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo, int32_t brick_count,
+ int vol_fd)
+{
+ int32_t ret = -1;
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (brickinfo);
+
+ ret = glusterd_store_volinfo_brick_fname_write (vol_fd, brickinfo,
+ brick_count);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_create_brick_dir (volinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_create_brick_shandle_on_absence (volinfo,
+ brickinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_perform_brick_store (brickinfo);
+out:
+ gf_msg_debug (THIS->name, 0, "Returning with %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_store_snapd_info (glusterd_volinfo_t *volinfo)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ GF_ASSERT (volinfo);
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = glusterd_store_create_snapd_shandle_on_absence (volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_STORE_HANDLE_CREATE_FAIL,
+ "failed to create store "
+ "handle for snapd (volume: %s)", volinfo->volname);
+ goto out;
+ }
+
+ ret = glusterd_store_perform_snapd_store (volinfo);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAPD_INFO_STORE_FAIL,
+ "failed to store snapd info "
+ "of the volume %s", volinfo->volname);
+
+out:
+ if (ret)
+ gf_store_unlink_tmppath (volinfo->snapd.handle);
+
+ gf_msg_debug (this->name, 0, "Returning with %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_store_delete_brick (glusterd_brickinfo_t *brickinfo, char *delete_path)
+{
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+ char brickpath[PATH_MAX] = {0,};
+ char *ptr = NULL;
+ char *tmppath = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (brickinfo);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ tmppath = gf_strdup (brickinfo->path);
+
+ ptr = strchr (tmppath, '/');
+
+ while (ptr) {
+ *ptr = '-';
+ ptr = strchr (tmppath, '/');
+ }
+
+ snprintf (brickpath, sizeof (brickpath),
+ "%s/"GLUSTERD_BRICK_INFO_DIR"/%s:%s", delete_path,
+ brickinfo->hostname, tmppath);
+
+ GF_FREE (tmppath);
+
+ ret = sys_unlink (brickpath);
+
+ if ((ret < 0) && (errno != ENOENT)) {
+ gf_msg_debug (this->name, 0, "Unlink failed on %s",
+ brickpath);
+ ret = -1;
+ goto out;
+ } else {
+ ret = 0;
+ }
+
+out:
+ if (brickinfo->shandle) {
+ gf_store_handle_destroy (brickinfo->shandle);
+ brickinfo->shandle = NULL;
+ }
+ gf_msg_debug (this->name, 0, "Returning with %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_store_remove_bricks (glusterd_volinfo_t *volinfo, char *delete_path)
+{
+ int32_t ret = 0;
+ glusterd_brickinfo_t *tmp = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ DIR *dir = NULL;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {{0,},};
+ char path[PATH_MAX] = {0,};
+ char brickdir[PATH_MAX] = {0,};
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (volinfo);
+
+ cds_list_for_each_entry (tmp, &volinfo->bricks, brick_list) {
+ ret = glusterd_store_delete_brick (tmp, delete_path);
+ if (ret)
+ goto out;
+ }
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ snprintf (brickdir, sizeof (brickdir), "%s/%s", delete_path,
+ GLUSTERD_BRICK_INFO_DIR);
+
+ dir = sys_opendir (brickdir);
+
+ GF_FOR_EACH_ENTRY_IN_DIR (entry, dir, scratch);
+
+ while (entry) {
+ snprintf (path, sizeof (path), "%s/%s",
+ brickdir, entry->d_name);
+ ret = sys_unlink (path);
+ if (ret && errno != ENOENT) {
+ gf_msg_debug (this->name, 0, "Unable to unlink %s",
+ path);
+ }
+ GF_FOR_EACH_ENTRY_IN_DIR (entry, dir, scratch);
+ }
+
+ sys_closedir (dir);
+
+ ret = sys_rmdir (brickdir);
+
+out:
+ gf_msg_debug (this->name, 0, "Returning with %d", ret);
+ return ret;
+}
+
+static int
+_storeslaves (dict_t *this, char *key, data_t *value, void *data)
+{
+ int32_t ret = 0;
+ gf_store_handle_t *shandle = NULL;
+ xlator_t *xl = NULL;
+
+ xl = THIS;
+ GF_ASSERT (xl);
+
+ shandle = (gf_store_handle_t*)data;
+
+ GF_ASSERT (shandle);
+ GF_ASSERT (shandle->fd > 0);
+ GF_ASSERT (shandle->path);
+ GF_ASSERT (key);
+ GF_ASSERT (value && value->data);
+
+ if ((!shandle) || (shandle->fd <= 0) || (!shandle->path))
+ return -1;
+
+ if (!key)
+ return -1;
+ if (!value || !value->data)
+ return -1;
+
+ gf_msg_debug (xl->name, 0, "Storing in volinfo:key= %s, val=%s",
+ key, value->data);
+
+ ret = gf_store_save_value (shandle->fd, key, (char*)value->data);
+ if (ret) {
+ gf_msg (xl->name, GF_LOG_ERROR, 0,
+ GD_MSG_STORE_HANDLE_WRITE_FAIL,
+ "Unable to write into store"
+ " handle for path: %s", shandle->path);
+ return -1;
+ }
+ return 0;
+}
+
+
+int _storeopts (dict_t *this, char *key, data_t *value, void *data)
+{
+ int32_t ret = 0;
+ int32_t exists = 0;
+ gf_store_handle_t *shandle = NULL;
+ xlator_t *xl = NULL;
+
+ xl = THIS;
+ GF_ASSERT (xl);
+
+ shandle = (gf_store_handle_t*)data;
+
+ GF_ASSERT (shandle);
+ GF_ASSERT (shandle->fd > 0);
+ GF_ASSERT (shandle->path);
+ GF_ASSERT (key);
+ GF_ASSERT (value && value->data);
+
+ if ((!shandle) || (shandle->fd <= 0) || (!shandle->path))
+ return -1;
+
+ if (!key)
+ return -1;
+ if (!value || !value->data)
+ return -1;
+
+ if (is_key_glusterd_hooks_friendly (key)) {
+ exists = 1;
+
+ } else {
+ exists = glusterd_check_option_exists (key, NULL);
+ }
+
+ if (1 == exists) {
+ gf_msg_debug (xl->name, 0, "Storing in volinfo:key= %s, "
+ "val=%s", key, value->data);
+
+ } else {
+ gf_msg_debug (xl->name, 0, "Discarding:key= %s, val=%s",
+ key, value->data);
+ return 0;
+ }
+
+ ret = gf_store_save_value (shandle->fd, key, (char*)value->data);
+ if (ret) {
+ gf_msg (xl->name, GF_LOG_ERROR, 0,
+ GD_MSG_STORE_HANDLE_WRITE_FAIL,
+ "Unable to write into store"
+ " handle for path: %s", shandle->path);
+ return -1;
+ }
+ return 0;
+}
+
+/* Store the volumes snapshot details only if required
+ *
+ * The snapshot details will be stored only if the cluster op-version is
+ * greater than or equal to 4
+ */
+int
+glusterd_volume_write_snap_details (int fd, glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ char buf[PATH_MAX] = {0,};
+
+ this = THIS;
+ GF_ASSERT (this != NULL);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, (conf != NULL), out);
+
+ GF_VALIDATE_OR_GOTO (this->name, (fd > 0), out);
+ GF_VALIDATE_OR_GOTO (this->name, (volinfo != NULL), out);
+
+ if (conf->op_version < GD_OP_VERSION_3_6_0) {
+ ret = 0;
+ goto out;
+ }
+
+ snprintf (buf, sizeof (buf), "%s", volinfo->parent_volname);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_PARENT_VOLNAME, buf);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_STORE_FAIL, "Failed to store "
+ GLUSTERD_STORE_KEY_PARENT_VOLNAME);
+ goto out;
+ }
+
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_RESTORED_SNAP,
+ uuid_utoa (volinfo->restored_from_snap));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_STORE_HANDLE_WRITE_FAIL,
+ "Unable to write restored_from_snap");
+ goto out;
+ }
+
+ memset (buf, 0, sizeof (buf));
+ snprintf (buf, sizeof (buf), "%"PRIu64, volinfo->snap_max_hard_limit);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT,
+ buf);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_HARD_LIMIT_SET_FAIL,
+ "Unable to write snap-max-hard-limit");
+ goto out;
+ }
+
+ ret = glusterd_store_snapd_info (volinfo);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAPD_INFO_STORE_FAIL, "snapd info store failed "
+ "volume: %s", volinfo->volname);
+
+out:
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAPINFO_WRITE_FAIL,
+ "Failed to write snap details"
+ " for volume %s", volinfo->volname);
+ return ret;
+}
+
+int32_t
+glusterd_volume_write_tier_details (int fd, glusterd_volinfo_t *volinfo)
+{
+ int32_t ret = -1;
+ char buf[PATH_MAX] = "";
+
+ if (volinfo->type != GF_CLUSTER_TYPE_TIER) {
+ ret = 0;
+ goto out;
+ }
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.cold_brick_count);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_COLD_COUNT, buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d",
+ volinfo->tier_info.cold_replica_count);
+ ret = gf_store_save_value (fd,
+ GLUSTERD_STORE_KEY_COLD_REPLICA_COUNT,
+ buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.cold_disperse_count);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_COLD_DISPERSE_COUNT,
+ buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d",
+ volinfo->tier_info.cold_redundancy_count);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_COLD_REDUNDANCY_COUNT,
+ buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.hot_brick_count);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_HOT_COUNT,
+ buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.hot_replica_count);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_HOT_REPLICA_COUNT,
+ buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.hot_type);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_HOT_TYPE, buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.cold_type);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_COLD_TYPE, buf);
+ if (ret)
+ goto out;
+
+ out:
+ return ret;
+}
+
+int32_t
+glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo)
+{
+ char *str = NULL;
+ char buf[PATH_MAX] = "";
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (fd > 0);
+ GF_ASSERT (volinfo);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, (conf != NULL), out);
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->type);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_TYPE, buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->brick_count);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_COUNT, buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->status);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_STATUS, buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->sub_count);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_SUB_COUNT, buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->stripe_count);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_STRIPE_CNT, buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->replica_count);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_REPLICA_CNT,
+ buf);
+ if (ret)
+ goto out;
+
+ if ((conf->op_version >= GD_OP_VERSION_3_7_6) &&
+ volinfo->arbiter_count) {
+ snprintf (buf, sizeof (buf), "%d", volinfo->arbiter_count);
+ ret = gf_store_save_value (fd,
+ GLUSTERD_STORE_KEY_VOL_ARBITER_CNT,
+ buf);
+ if (ret)
+ goto out;
+ }
+
+ if (conf->op_version >= GD_OP_VERSION_3_6_0) {
+ snprintf (buf, sizeof (buf), "%d", volinfo->disperse_count);
+ ret = gf_store_save_value (fd,
+ GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT,
+ buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->redundancy_count);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_REDUNDANCY_CNT,
+ buf);
+ if (ret)
+ goto out;
+ }
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->version);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_VERSION, buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->transport_type);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_TRANSPORT, buf);
+ if (ret)
+ goto out;
+
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_ID,
+ uuid_utoa (volinfo->volume_id));
+ if (ret)
+ goto out;
+
+ str = glusterd_auth_get_username (volinfo);
+ if (str) {
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_USERNAME,
+ str);
+ if (ret)
+ goto out;
+ }
+
+ str = glusterd_auth_get_password (volinfo);
+ if (str) {
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_PASSWORD,
+ str);
+ if (ret)
+ goto out;
+ }
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->op_version);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_OP_VERSION, buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->client_op_version);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION,
+ buf);
+ if (ret)
+ goto out;
+ if (volinfo->caps) {
+ snprintf (buf, sizeof (buf), "%d", volinfo->caps);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_CAPS,
+ buf);
+ if (ret)
+ goto out;
+ }
+
+ if (conf->op_version >= GD_OP_VERSION_3_7_6) {
+ snprintf (buf, sizeof (buf), "%d",
+ volinfo->quota_xattr_version);
+ ret = gf_store_save_value (fd,
+ GLUSTERD_STORE_KEY_VOL_QUOTA_VERSION,
+ buf);
+ if (ret)
+ goto out;
+ }
+
+ ret = glusterd_volume_write_tier_details (fd, volinfo);
+
+ ret = glusterd_volume_write_snap_details (fd, volinfo);
+
+out:
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_VALS_WRITE_FAIL, "Unable to write volume "
+ "values for %s", volinfo->volname);
+ return ret;
+}
+
+static void
+glusterd_store_voldirpath_set (glusterd_volinfo_t *volinfo, char *voldirpath,
+ size_t len)
+{
+ glusterd_conf_t *priv = NULL;
+
+ GF_ASSERT (volinfo);
+ priv = THIS->private;
+ GF_ASSERT (priv);
+
+ GLUSTERD_GET_VOLUME_DIR (voldirpath, volinfo, priv);
+}
+
+static int32_t
+glusterd_store_create_volume_dir (glusterd_volinfo_t *volinfo)
+{
+ int32_t ret = -1;
+ char voldirpath[PATH_MAX] = {0,};
+
+ GF_ASSERT (volinfo);
+
+ glusterd_store_voldirpath_set (volinfo, voldirpath,
+ sizeof (voldirpath));
+ ret = gf_store_mkdir (voldirpath);
+
+ gf_msg_debug (THIS->name, 0, "Returning with %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_store_create_snap_dir (glusterd_snap_t *snap)
+{
+ int32_t ret = -1;
+ char snapdirpath[PATH_MAX] = {0,};
+ glusterd_conf_t *priv = NULL;
+
+ priv = THIS->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (snap);
+
+ GLUSTERD_GET_SNAP_DIR (snapdirpath, snap, priv);
+
+ ret = mkdir_p (snapdirpath, 0755, _gf_true);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, errno,
+ GD_MSG_CREATE_DIR_FAILED, "Failed to create snaps dir "
+ "%s", snapdirpath);
+ }
+ return ret;
+}
+
+int32_t
+glusterd_store_volinfo_write (int fd, glusterd_volinfo_t *volinfo)
+{
+ int32_t ret = -1;
+ gf_store_handle_t *shandle = NULL;
+ GF_ASSERT (fd > 0);
+ GF_ASSERT (volinfo);
+ GF_ASSERT (volinfo->shandle);
+
+ shandle = volinfo->shandle;
+ ret = glusterd_volume_exclude_options_write (fd, volinfo);
+ if (ret)
+ goto out;
+
+ shandle->fd = fd;
+ dict_foreach (volinfo->dict, _storeopts, shandle);
+
+ dict_foreach (volinfo->gsync_slaves, _storeslaves, shandle);
+ shandle->fd = 0;
+out:
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_store_snapinfo_write (glusterd_snap_t *snap)
+{
+ int32_t ret = -1;
+ int fd = 0;
+ char buf[PATH_MAX] = "";
+
+ GF_ASSERT (snap);
+
+ fd = gf_store_mkstemp (snap->shandle);
+ if (fd <= 0)
+ goto out;
+
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_SNAP_ID,
+ uuid_utoa (snap->snap_id));
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", snap->snap_status);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_SNAP_STATUS, buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", snap->snap_restored);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_SNAP_RESTORED, buf);
+ if (ret)
+ goto out;
+
+ if (snap->description) {
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_SNAP_DESC,
+ snap->description);
+ if (ret)
+ goto out;
+ }
+
+ snprintf (buf, sizeof (buf), "%ld", snap->time_stamp);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_SNAP_TIMESTAMP, buf);
+
+out:
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+static void
+glusterd_store_volfpath_set (glusterd_volinfo_t *volinfo, char *volfpath,
+ size_t len)
+{
+ char voldirpath[PATH_MAX] = {0,};
+ GF_ASSERT (volinfo);
+ GF_ASSERT (volfpath);
+ GF_ASSERT (len <= PATH_MAX);
+
+ glusterd_store_voldirpath_set (volinfo, voldirpath,
+ sizeof (voldirpath));
+ snprintf (volfpath, len, "%s/%s", voldirpath, GLUSTERD_VOLUME_INFO_FILE);
+}
+
+static void
+glusterd_store_node_state_path_set (glusterd_volinfo_t *volinfo,
+ char *node_statepath, size_t len)
+{
+ char voldirpath[PATH_MAX] = {0,};
+ GF_ASSERT (volinfo);
+ GF_ASSERT (node_statepath);
+ GF_ASSERT (len <= PATH_MAX);
+
+ glusterd_store_voldirpath_set (volinfo, voldirpath,
+ sizeof (voldirpath));
+ snprintf (node_statepath, len, "%s/%s", voldirpath,
+ GLUSTERD_NODE_STATE_FILE);
+}
+
+static void
+glusterd_store_quota_conf_path_set (glusterd_volinfo_t *volinfo,
+ char *quota_conf_path, size_t len)
+{
+ char voldirpath[PATH_MAX] = {0,};
+ GF_ASSERT (volinfo);
+ GF_ASSERT (quota_conf_path);
+ GF_ASSERT (len <= PATH_MAX);
+
+ glusterd_store_voldirpath_set (volinfo, voldirpath,
+ sizeof (voldirpath));
+ snprintf (quota_conf_path, len, "%s/%s", voldirpath,
+ GLUSTERD_VOLUME_QUOTA_CONFIG);
+}
+
+static void
+glusterd_store_missed_snaps_list_path_set (char *missed_snaps_list,
+ size_t len)
+{
+ glusterd_conf_t *priv = NULL;
+
+ priv = THIS->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (missed_snaps_list);
+ GF_ASSERT (len <= PATH_MAX);
+
+ snprintf (missed_snaps_list, len, "%s/snaps/"
+ GLUSTERD_MISSED_SNAPS_LIST_FILE, priv->workdir);
+}
+
+static void
+glusterd_store_snapfpath_set (glusterd_snap_t *snap, char *snap_fpath,
+ size_t len)
+{
+ glusterd_conf_t *priv = NULL;
+ priv = THIS->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (snap);
+ GF_ASSERT (snap_fpath);
+ GF_ASSERT (len <= PATH_MAX);
+
+ snprintf (snap_fpath, len, "%s/snaps/%s/%s", priv->workdir,
+ snap->snapname, GLUSTERD_SNAP_INFO_FILE);
+}
+
+int32_t
+glusterd_store_create_vol_shandle_on_absence (glusterd_volinfo_t *volinfo)
+{
+ char volfpath[PATH_MAX] = {0};
+ int32_t ret = 0;
+
+ GF_ASSERT (volinfo);
+
+ glusterd_store_volfpath_set (volinfo, volfpath, sizeof (volfpath));
+ ret = gf_store_handle_create_on_absence (&volinfo->shandle, volfpath);
+ return ret;
+}
+
+int32_t
+glusterd_store_create_nodestate_sh_on_absence (glusterd_volinfo_t *volinfo)
+{
+ char node_state_path[PATH_MAX] = {0};
+ int32_t ret = 0;
+
+ GF_ASSERT (volinfo);
+
+ glusterd_store_node_state_path_set (volinfo, node_state_path,
+ sizeof (node_state_path));
+ ret =
+ gf_store_handle_create_on_absence (&volinfo->node_state_shandle,
+ node_state_path);
+
+ return ret;
+}
+
+int32_t
+glusterd_store_create_quota_conf_sh_on_absence (glusterd_volinfo_t *volinfo)
+{
+ char quota_conf_path[PATH_MAX] = {0};
+ int32_t ret = 0;
+
+ GF_ASSERT (volinfo);
+
+ glusterd_store_quota_conf_path_set (volinfo, quota_conf_path,
+ sizeof (quota_conf_path));
+ ret =
+ gf_store_handle_create_on_absence (&volinfo->quota_conf_shandle,
+ quota_conf_path);
+
+ return ret;
+}
+
+static int32_t
+glusterd_store_create_missed_snaps_list_shandle_on_absence ()
+{
+ char missed_snaps_list[PATH_MAX] = "";
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ glusterd_store_missed_snaps_list_path_set (missed_snaps_list,
+ sizeof(missed_snaps_list));
+
+ ret = gf_store_handle_create_on_absence
+ (&priv->missed_snaps_list_shandle,
+ missed_snaps_list);
+ return ret;
+}
+
+int32_t
+glusterd_store_create_snap_shandle_on_absence (glusterd_snap_t *snap)
+{
+ char snapfpath[PATH_MAX] = {0};
+ int32_t ret = 0;
+
+ GF_ASSERT (snap);
+
+ glusterd_store_snapfpath_set (snap, snapfpath, sizeof (snapfpath));
+ ret = gf_store_handle_create_on_absence (&snap->shandle, snapfpath);
+ return ret;
+}
+
+int32_t
+glusterd_store_brickinfos (glusterd_volinfo_t *volinfo, int vol_fd)
+{
+ int32_t ret = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int32_t brick_count = 0;
+
+ GF_ASSERT (volinfo);
+
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ ret = glusterd_store_brickinfo (volinfo, brickinfo,
+ brick_count, vol_fd);
+ if (ret)
+ goto out;
+ brick_count++;
+ }
+out:
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+_gd_store_rebalance_dict (dict_t *dict, char *key, data_t *value, void *data)
+{
+ int ret = -1;
+ int fd = 0;
+
+ fd = *(int *)data;
+
+ ret = gf_store_save_value (fd, key, value->data);
+
+ return ret;
+}
+
+int32_t
+glusterd_store_node_state_write (int fd, glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ char buf[PATH_MAX] = {0, };
+
+ GF_ASSERT (fd > 0);
+ GF_ASSERT (volinfo);
+
+ if (volinfo->rebal.defrag_cmd == GF_DEFRAG_CMD_STATUS) {
+ ret = 0;
+ goto out;
+ }
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->rebal.defrag_cmd);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_DEFRAG, buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->rebal.defrag_status);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_DEFRAG_STATUS,
+ buf);
+ if (ret)
+ goto out;
+
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->rebal.op);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_DEFRAG_OP, buf);
+ if (ret)
+ goto out;
+
+ gf_uuid_unparse (volinfo->rebal.rebalance_id, buf);
+ ret = gf_store_save_value (fd, GF_REBALANCE_TID_KEY, buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%"PRIu64, volinfo->rebal.rebalance_files);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_DEFRAG_REB_FILES,
+ buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%"PRIu64, volinfo->rebal.rebalance_data);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_DEFRAG_SIZE, buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%"PRIu64, volinfo->rebal.lookedup_files);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_DEFRAG_SCANNED,
+ buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%"PRIu64, volinfo->rebal.rebalance_failures);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_DEFRAG_FAILURES,
+ buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%"PRIu64, volinfo->rebal.skipped_files);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_DEFRAG_SKIPPED,
+ buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%lf", volinfo->rebal.rebalance_time);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_DEFRAG_RUN_TIME,
+ buf);
+ if (ret)
+ goto out;
+
+ if (volinfo->rebal.dict) {
+ dict_foreach (volinfo->rebal.dict, _gd_store_rebalance_dict,
+ &fd);
+ }
+out:
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_store_perform_node_state_store (glusterd_volinfo_t *volinfo)
+{
+ int fd = -1;
+ int32_t ret = -1;
+ GF_ASSERT (volinfo);
+
+ fd = gf_store_mkstemp (volinfo->node_state_shandle);
+ if (fd <= 0) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_store_node_state_write (fd, volinfo);
+ if (ret)
+ goto out;
+
+ ret = gf_store_rename_tmppath (volinfo->node_state_shandle);
+ if (ret)
+ goto out;
+
+out:
+ if (ret && (fd > 0))
+ gf_store_unlink_tmppath (volinfo->node_state_shandle);
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_store_perform_volume_store (glusterd_volinfo_t *volinfo)
+{
+ int fd = -1;
+ int32_t ret = -1;
+ GF_ASSERT (volinfo);
+
+ fd = gf_store_mkstemp (volinfo->shandle);
+ if (fd <= 0) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_store_volinfo_write (fd, volinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_brickinfos (volinfo, fd);
+ if (ret)
+ goto out;
+
+out:
+ if (ret && (fd > 0))
+ gf_store_unlink_tmppath (volinfo->shandle);
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+void
+glusterd_perform_volinfo_version_action (glusterd_volinfo_t *volinfo,
+ glusterd_volinfo_ver_ac_t ac)
+{
+ GF_ASSERT (volinfo);
+
+ switch (ac) {
+ case GLUSTERD_VOLINFO_VER_AC_NONE:
+ break;
+ case GLUSTERD_VOLINFO_VER_AC_INCREMENT:
+ volinfo->version++;
+ break;
+ case GLUSTERD_VOLINFO_VER_AC_DECREMENT:
+ volinfo->version--;
+ break;
+ }
+}
+
+void
+glusterd_store_bricks_cleanup_tmp (glusterd_volinfo_t *volinfo)
+{
+ glusterd_brickinfo_t *brickinfo = NULL;
+
+ GF_ASSERT (volinfo);
+
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ gf_store_unlink_tmppath (brickinfo->shandle);
+ }
+}
+
+void
+glusterd_store_volume_cleanup_tmp (glusterd_volinfo_t *volinfo)
+{
+ GF_ASSERT (volinfo);
+
+ glusterd_store_bricks_cleanup_tmp (volinfo);
+
+ gf_store_unlink_tmppath (volinfo->shandle);
+
+ gf_store_unlink_tmppath (volinfo->node_state_shandle);
+
+ gf_store_unlink_tmppath (volinfo->snapd.handle);
+}
+
+int32_t
+glusterd_store_brickinfos_atomic_update (glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ glusterd_brickinfo_t *brickinfo = NULL;
+
+ GF_ASSERT (volinfo);
+
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ ret = gf_store_rename_tmppath (brickinfo->shandle);
+ if (ret)
+ goto out;
+ }
+out:
+ return ret;
+}
+
+int32_t
+glusterd_store_volinfo_atomic_update (glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ GF_ASSERT (volinfo);
+
+ ret = gf_store_rename_tmppath (volinfo->shandle);
+ if (ret)
+ goto out;
+
+out:
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED, "Couldn't rename "
+ "temporary file(s)");
+ return ret;
+}
+
+int32_t
+glusterd_store_volume_atomic_update (glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ GF_ASSERT (volinfo);
+
+ ret = glusterd_store_brickinfos_atomic_update (volinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_volinfo_atomic_update (volinfo);
+
+out:
+ return ret;
+}
+
+int32_t
+glusterd_store_snap_atomic_update (glusterd_snap_t *snap)
+{
+ int ret = -1;
+ GF_ASSERT (snap);
+
+ ret = gf_store_rename_tmppath (snap->shandle);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED, "Couldn't rename "
+ "temporary file(s)");
+
+ return ret;
+}
+
+int32_t
+glusterd_store_snap (glusterd_snap_t *snap)
+{
+ int32_t ret = -1;
+
+ GF_ASSERT (snap);
+
+ ret = glusterd_store_create_snap_dir (snap);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAPDIR_CREATE_FAIL,
+ "Failed to create snap dir");
+ goto out;
+ }
+
+ ret = glusterd_store_create_snap_shandle_on_absence (snap);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAPINFO_CREATE_FAIL,
+ "Failed to create snap info "
+ "file");
+ goto out;
+ }
+
+ ret = glusterd_store_snapinfo_write (snap);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAPINFO_WRITE_FAIL,
+ "Failed to write snap info");
+ goto out;
+ }
+
+ ret = glusterd_store_snap_atomic_update (snap);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_AUTOMIC_UPDATE_FAIL,
+ "Failed to do automic update");
+ goto out;
+ }
+
+out:
+ if (ret && snap->shandle)
+ gf_store_unlink_tmppath (snap->shandle);
+
+ gf_msg_trace (THIS->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_store_volinfo (glusterd_volinfo_t *volinfo, glusterd_volinfo_ver_ac_t ac)
+{
+ int32_t ret = -1;
+
+ GF_ASSERT (volinfo);
+
+ glusterd_perform_volinfo_version_action (volinfo, ac);
+ ret = glusterd_store_create_volume_dir (volinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_create_vol_shandle_on_absence (volinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_create_nodestate_sh_on_absence (volinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_perform_volume_store (volinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_volume_atomic_update (volinfo);
+ if (ret) {
+ glusterd_perform_volinfo_version_action (volinfo,
+ GLUSTERD_VOLINFO_VER_AC_DECREMENT);
+ goto out;
+ }
+
+ ret = glusterd_store_perform_node_state_store (volinfo);
+ if (ret)
+ goto out;
+
+ /* checksum should be computed at the end */
+ ret = glusterd_compute_cksum (volinfo, _gf_false);
+ if (ret)
+ goto out;
+
+out:
+ if (ret)
+ glusterd_store_volume_cleanup_tmp (volinfo);
+
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+int32_t
+glusterd_store_delete_volume (glusterd_volinfo_t *volinfo)
+{
+ char pathname[PATH_MAX] = {0,};
+ int32_t ret = 0;
+ glusterd_conf_t *priv = NULL;
+ char delete_path[PATH_MAX] = {0,};
+ char trashdir[PATH_MAX] = {0,};
+ xlator_t *this = NULL;
+ gf_boolean_t rename_fail = _gf_false;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (volinfo);
+ priv = this->private;
+
+ GF_ASSERT (priv);
+
+ GLUSTERD_GET_VOLUME_DIR (pathname, volinfo, priv);
+
+ snprintf (delete_path, sizeof (delete_path),
+ "%s/"GLUSTERD_TRASH"/%s.deleted", priv->workdir,
+ uuid_utoa (volinfo->volume_id));
+
+ snprintf (trashdir, sizeof (trashdir), "%s/"GLUSTERD_TRASH,
+ priv->workdir);
+
+ ret = sys_mkdir (trashdir, 0777);
+ if (ret && errno != EEXIST) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_CREATE_DIR_FAILED, "Failed to create trash "
+ "directory");
+ ret = -1;
+ goto out;
+ }
+
+ ret = sys_rename (pathname, delete_path);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DIR_OP_FAILED, "Failed to rename volume "
+ "directory for volume %s", volinfo->volname);
+ rename_fail = _gf_true;
+ goto out;
+ }
+
+ ret = recursive_rmdir (trashdir);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Failed to rmdir: %s",
+ trashdir);
+ }
+
+out:
+ if (volinfo->shandle) {
+ gf_store_handle_destroy (volinfo->shandle);
+ volinfo->shandle = NULL;
+ }
+ ret = (rename_fail == _gf_true) ? -1: 0;
+
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/*TODO: cleanup the duplicate code and implement a generic function for
+ * deleting snap/volume depending on the parameter flag */
+int32_t
+glusterd_store_delete_snap (glusterd_snap_t *snap)
+{
+ char pathname[PATH_MAX] = {0,};
+ int32_t ret = 0;
+ glusterd_conf_t *priv = NULL;
+ DIR *dir = NULL;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {{0,},};
+ char path[PATH_MAX] = {0,};
+ char delete_path[PATH_MAX] = {0,};
+ char trashdir[PATH_MAX] = {0,};
+ struct stat st = {0, };
+ xlator_t *this = NULL;
+ gf_boolean_t rename_fail = _gf_false;
+
+ this = THIS;
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ GF_ASSERT (snap);
+ GLUSTERD_GET_SNAP_DIR (pathname, snap, priv);
+
+ snprintf (delete_path, sizeof (delete_path),
+ "%s/"GLUSTERD_TRASH"/snap-%s.deleted", priv->workdir,
+ uuid_utoa (snap->snap_id));
+
+ snprintf (trashdir, sizeof (trashdir), "%s/"GLUSTERD_TRASH,
+ priv->workdir);
+
+ ret = sys_mkdir (trashdir, 0777);
+ if (ret && errno != EEXIST) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_CREATE_DIR_FAILED, "Failed to create trash "
+ "directory");
+ ret = -1;
+ goto out;
+ }
+
+ ret = sys_rename (pathname, delete_path);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DIR_OP_FAILED, "Failed to rename snap "
+ "directory %s to %s", pathname, delete_path);
+ rename_fail = _gf_true;
+ goto out;
+ }
+
+ dir = sys_opendir (delete_path);
+ if (!dir) {
+ gf_msg_debug (this->name, 0, "Failed to open directory %s.",
+ delete_path);
+ ret = 0;
+ goto out;
+ }
+
+ GF_FOR_EACH_ENTRY_IN_DIR (entry, dir, scratch);
+ while (entry) {
+ snprintf (path, PATH_MAX, "%s/%s", delete_path, entry->d_name);
+ ret = sys_stat (path, &st);
+ if (ret == -1) {
+ gf_msg_debug (this->name, 0, "Failed to stat "
+ "entry %s", path);
+ goto stat_failed;
+ }
+
+ if (S_ISDIR (st.st_mode))
+ ret = sys_rmdir (path);
+ else
+ ret = sys_unlink (path);
+
+ if (ret) {
+ gf_msg_debug (this->name, 0, " Failed to remove "
+ "%s", path);
+ }
+
+ gf_msg_debug (this->name, 0, "%s %s",
+ ret ? "Failed to remove":"Removed",
+ entry->d_name);
+stat_failed:
+ memset (path, 0, sizeof(path));
+ GF_FOR_EACH_ENTRY_IN_DIR (entry, dir, scratch);
+ }
+
+ ret = sys_closedir (dir);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Failed to close dir %s.",
+ delete_path);
+ }
+
+ ret = sys_rmdir (delete_path);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Failed to rmdir: %s",
+ delete_path);
+ }
+ ret = sys_rmdir (trashdir);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Failed to rmdir: %s",
+ trashdir);
+ }
+
+out:
+ if (snap->shandle) {
+ gf_store_handle_destroy (snap->shandle);
+ snap->shandle = NULL;
+ }
+ ret = (rename_fail == _gf_true) ? -1: 0;
+
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_store_global_info (xlator_t *this)
+{
+ int ret = -1;
+ glusterd_conf_t *conf = NULL;
+ char op_version_str[15] = {0,};
+ char path[PATH_MAX] = {0,};
+ gf_store_handle_t *handle = NULL;
+ char *uuid_str = NULL;
+
+ conf = this->private;
+
+ uuid_str = gf_strdup (uuid_utoa (MY_UUID));
+ if (!uuid_str)
+ goto out;
+
+ if (!conf->handle) {
+ snprintf (path, PATH_MAX, "%s/%s", conf->workdir,
+ GLUSTERD_INFO_FILE);
+ ret = gf_store_handle_new (path, &handle);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_STORE_HANDLE_GET_FAIL,
+ "Unable to get store handle");
+ goto out;
+ }
+
+ conf->handle = handle;
+ } else
+ handle = conf->handle;
+
+ /* These options need to be available for all users */
+ ret = sys_chmod (handle->path, 0644);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED, "chmod error for %s",
+ GLUSTERD_INFO_FILE);
+ goto out;
+ }
+
+ handle->fd = gf_store_mkstemp (handle);
+ if (handle->fd <= 0) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = gf_store_save_value (handle->fd, GLUSTERD_STORE_UUID_KEY,
+ uuid_str);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_UUID_SET_FAIL,
+ "Storing uuid failed ret = %d", ret);
+ goto out;
+ }
+
+ snprintf (op_version_str, 15, "%d", conf->op_version);
+ ret = gf_store_save_value (handle->fd, GD_OP_VERSION_KEY,
+ op_version_str);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_OP_VERS_STORE_FAIL,
+ "Storing op-version failed ret = %d", ret);
+ goto out;
+ }
+
+ ret = gf_store_rename_tmppath (handle);
+out:
+ if (handle) {
+ if (ret && (handle->fd > 0))
+ gf_store_unlink_tmppath (handle);
+
+ if (handle->fd > 0) {
+ handle->fd = 0;
+ }
+ }
+
+ if (uuid_str)
+ GF_FREE (uuid_str);
+
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GLUSTERD_GLOBAL_INFO_STORE_FAIL,
+ "Failed to store glusterd global-info");
+
+ return ret;
+}
+
+int
+glusterd_retrieve_op_version (xlator_t *this, int *op_version)
+{
+ char *op_version_str = NULL;
+ glusterd_conf_t *priv = NULL;
+ int ret = -1;
+ int tmp_version = 0;
+ char *tmp = NULL;
+ char path[PATH_MAX] = {0,};
+ gf_store_handle_t *handle = NULL;
+
+ priv = this->private;
+
+ if (!priv->handle) {
+ snprintf (path, PATH_MAX, "%s/%s", priv->workdir,
+ GLUSTERD_INFO_FILE);
+ ret = gf_store_handle_retrieve (path, &handle);
+
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Unable to get store "
+ "handle!");
+ goto out;
+ }
+
+ priv->handle = handle;
+ }
+
+ ret = gf_store_retrieve_value (priv->handle, GD_OP_VERSION_KEY,
+ &op_version_str);
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "No previous op_version present");
+ goto out;
+ }
+
+ tmp_version = strtol (op_version_str, &tmp, 10);
+ if ((tmp_version <= 0) || (tmp && strlen (tmp) > 1)) {
+ gf_msg (this->name, GF_LOG_WARNING, EINVAL,
+ GD_MSG_UNSUPPORTED_VERSION, "invalid version number");
+ goto out;
+ }
+
+ *op_version = tmp_version;
+
+ ret = 0;
+out:
+ if (op_version_str)
+ GF_FREE (op_version_str);
+
+ return ret;
+}
+
+int
+glusterd_retrieve_sys_snap_max_limit (xlator_t *this, uint64_t *limit,
+ char *key)
+{
+ char *limit_str = NULL;
+ glusterd_conf_t *priv = NULL;
+ int ret = -1;
+ uint64_t tmp_limit = 0;
+ char *tmp = NULL;
+ char path[PATH_MAX] = {0,};
+ gf_store_handle_t *handle = NULL;
+
+ GF_ASSERT (this);
+ priv = this->private;
+
+ GF_ASSERT (priv);
+ GF_ASSERT (limit);
+ GF_ASSERT (key);
+
+ if (!priv->handle) {
+ snprintf (path, PATH_MAX, "%s/%s", priv->workdir,
+ GLUSTERD_INFO_FILE);
+ ret = gf_store_handle_retrieve (path, &handle);
+
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Unable to get store "
+ "handle!");
+ goto out;
+ }
+
+ priv->handle = handle;
+ }
+
+ ret = gf_store_retrieve_value (priv->handle,
+ key,
+ &limit_str);
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "No previous %s present", key);
+ goto out;
+ }
+
+ tmp_limit = strtoul (limit_str, &tmp, 10);
+ if ((tmp_limit <= 0) || (tmp && strlen (tmp) > 1)) {
+ gf_msg (this->name, GF_LOG_WARNING, EINVAL,
+ GD_MSG_UNSUPPORTED_VERSION, "invalid version number");
+ goto out;
+ }
+
+ *limit = tmp_limit;
+
+ ret = 0;
+out:
+ if (limit_str)
+ GF_FREE (limit_str);
+
+ return ret;
+}
+
+int
+glusterd_restore_op_version (xlator_t *this)
+{
+ glusterd_conf_t *conf = NULL;
+ int ret = 0;
+ int op_version = 0;
+
+ conf = this->private;
+
+ ret = glusterd_retrieve_op_version (this, &op_version);
+ if (!ret) {
+ if ((op_version < GD_OP_VERSION_MIN) ||
+ (op_version > GD_OP_VERSION_MAX)) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_UNSUPPORTED_VERSION,
+ "wrong op-version (%d) retrieved", op_version);
+ ret = -1;
+ goto out;
+ }
+ conf->op_version = op_version;
+ gf_msg ("glusterd", GF_LOG_INFO, 0,
+ GD_MSG_OP_VERS_INFO,
+ "retrieved op-version: %d", conf->op_version);
+ goto out;
+ }
+
+ /* op-version can be missing from the store file in 2 cases,
+ * 1. This is a new install of glusterfs
+ * 2. This is an upgrade of glusterfs from a version without op-version
+ * to a version with op-version (eg. 3.3 -> 3.4)
+ *
+ * Detection of a new install or an upgrade from an older install can be
+ * done by checking for the presence of the its peer-id in the store
+ * file. If peer-id is present, the installation is an upgrade else, it
+ * is a new install.
+ *
+ * For case 1, set op-version to GD_OP_VERSION_MAX.
+ * For case 2, set op-version to GD_OP_VERSION_MIN.
+ */
+ ret = glusterd_retrieve_uuid();
+ if (ret) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_OP_VERS_SET_INFO,
+ "Detected new install. Setting"
+ " op-version to maximum : %d", GD_OP_VERSION_MAX);
+ conf->op_version = GD_OP_VERSION_MAX;
+ } else {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_OP_VERS_SET_INFO,
+ "Upgrade detected. Setting"
+ " op-version to minimum : %d", GD_OP_VERSION_MIN);
+ conf->op_version = GD_OP_VERSION_MIN;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+glusterd_retrieve_uuid ()
+{
+ char *uuid_str = NULL;
+ int32_t ret = -1;
+ gf_store_handle_t *handle = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ char path[PATH_MAX] = {0,};
+
+ this = THIS;
+ priv = this->private;
+
+ if (!priv->handle) {
+ snprintf (path, PATH_MAX, "%s/%s", priv->workdir,
+ GLUSTERD_INFO_FILE);
+ ret = gf_store_handle_retrieve (path, &handle);
+
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Unable to get store"
+ "handle!");
+ goto out;
+ }
+
+ priv->handle = handle;
+ }
+
+ ret = gf_store_retrieve_value (priv->handle, GLUSTERD_STORE_UUID_KEY,
+ &uuid_str);
+
+ if (ret) {
+ gf_msg_debug (this->name, 0, "No previous uuid is present");
+ goto out;
+ }
+
+ gf_uuid_parse (uuid_str, priv->uuid);
+
+out:
+ GF_FREE (uuid_str);
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_store_retrieve_snapd (glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ int exists = 0;
+ char *key = NULL;
+ char *value = NULL;
+ char volpath[PATH_MAX] = {0,};
+ char path[PATH_MAX] = {0,};
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ gf_store_iter_t *iter = NULL;
+ gf_store_op_errno_t op_errno = GD_STORE_SUCCESS;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = THIS->private;
+ GF_ASSERT (volinfo);
+
+ if (conf->op_version < GD_OP_VERSION_3_6_0) {
+ ret = 0;
+ goto out;
+ }
+
+ /*
+ * This is needed for upgrade situations. Say a volume is created with
+ * older version of glusterfs and upgraded to a glusterfs version equal
+ * to or greater than GD_OP_VERSION_3_6_0. The older glusterd would not
+ * have created the snapd.info file related to snapshot daemon for user
+ * serviceable snapshots. So as part of upgrade when the new glusterd
+ * starts, as part of restore (restoring the volume to be precise), it
+ * tries to snapd related info from snapd.info file. But since there was
+ * no such file till now, the restore operation fails. Thus, to prevent
+ * it from happening check whether user serviceable snapshots features
+ * is enabled before restoring snapd. If its disbaled, then simply
+ * exit by returning success (without even checking for the snapd.info).
+ */
+
+ if (!dict_get_str_boolean (volinfo->dict, "features.uss", _gf_false)) {
+ ret = 0;
+ goto out;
+ }
+
+ GLUSTERD_GET_VOLUME_DIR(volpath, volinfo, conf);
+
+ snprintf (path, sizeof (path), "%s/%s", volpath,
+ GLUSTERD_VOLUME_SNAPD_INFO_FILE);
+
+ ret = gf_store_handle_retrieve (path, &volinfo->snapd.handle);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_HANDLE_NULL, "volinfo handle is NULL");
+ goto out;
+ }
+
+ ret = gf_store_iter_new (volinfo->snapd.handle, &iter);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_STORE_ITER_GET_FAIL, "Failed to get new store "
+ "iter");
+ goto out;
+ }
+
+ ret = gf_store_iter_get_next (iter, &key, &value, &op_errno);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_STORE_ITER_GET_FAIL, "Failed to get next store "
+ "iter");
+ goto out;
+ }
+
+ while (!ret) {
+ if (!strncmp (key, GLUSTERD_STORE_KEY_SNAPD_PORT,
+ strlen (GLUSTERD_STORE_KEY_SNAPD_PORT))) {
+ volinfo->snapd.port = atoi (value);
+ }
+
+ ret = gf_store_iter_get_next (iter, &key, &value,
+ &op_errno);
+ }
+
+ if (op_errno != GD_STORE_EOF)
+ goto out;
+
+ ret = gf_store_iter_destroy (iter);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_STORE_ITER_DESTROY_FAIL,
+ "Failed to destroy store "
+ "iter");
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int32_t
+glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo)
+{
+ int32_t ret = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ gf_store_iter_t *iter = NULL;
+ char *key = NULL;
+ char *value = NULL;
+ char brickdir[PATH_MAX] = {0,};
+ char path[PATH_MAX] = {0,};
+ glusterd_conf_t *priv = NULL;
+ int32_t brick_count = 0;
+ char tmpkey[4096] = {0,};
+ gf_store_iter_t *tmpiter = NULL;
+ char *tmpvalue = NULL;
+ char abspath[PATH_MAX] = {0};
+ struct pmap_registry *pmap = NULL;
+ xlator_t *this = NULL;
+ int brickid = 0;
+ gf_store_op_errno_t op_errno = GD_STORE_SUCCESS;
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (volinfo->volname);
+
+ this = THIS;
+ priv = this->private;
+
+ GLUSTERD_GET_BRICK_DIR (brickdir, volinfo, priv);
+
+ ret = gf_store_iter_new (volinfo->shandle, &tmpiter);
+
+ if (ret)
+ goto out;
+
+ while (brick_count < volinfo->brick_count) {
+ ret = glusterd_brickinfo_new (&brickinfo);
+
+ if (ret)
+ goto out;
+ snprintf (tmpkey, sizeof (tmpkey), "%s-%d",
+ GLUSTERD_STORE_KEY_VOL_BRICK,brick_count);
+ ret = gf_store_iter_get_matching (tmpiter, tmpkey, &tmpvalue);
+ snprintf (path, sizeof (path), "%s/%s", brickdir, tmpvalue);
+
+ GF_FREE (tmpvalue);
+
+ tmpvalue = NULL;
+
+ ret = gf_store_handle_retrieve (path, &brickinfo->shandle);
+
+ if (ret)
+ goto out;
+
+ ret = gf_store_iter_new (brickinfo->shandle, &iter);
+
+ if (ret)
+ goto out;
+
+ ret = gf_store_iter_get_next (iter, &key, &value, &op_errno);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, op_errno,
+ GD_MSG_STORE_ITER_GET_FAIL, "Unable to iterate "
+ "the store for brick: %s", path);
+ goto out;
+ }
+ while (!ret) {
+ if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_HOSTNAME,
+ strlen (GLUSTERD_STORE_KEY_BRICK_HOSTNAME))) {
+ strncpy (brickinfo->hostname, value, 1024);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_PATH,
+ strlen (GLUSTERD_STORE_KEY_BRICK_PATH))) {
+ strncpy (brickinfo->path, value,
+ sizeof (brickinfo->path));
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_REAL_PATH,
+ strlen (GLUSTERD_STORE_KEY_BRICK_REAL_PATH))) {
+ strncpy (brickinfo->real_path, value,
+ sizeof (brickinfo->real_path));
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_PORT,
+ strlen (GLUSTERD_STORE_KEY_BRICK_PORT))) {
+ gf_string2int (value, &brickinfo->port);
+
+ if (brickinfo->port < priv->base_port) {
+ /* This is required to adhere to the
+ IANA standards */
+ brickinfo->port = 0;
+ } else {
+ /* This is required to have proper ports
+ assigned to bricks after restart */
+ pmap = pmap_registry_get (THIS);
+ if (pmap->last_alloc <= brickinfo->port)
+ pmap->last_alloc =
+ brickinfo->port + 1;
+ }
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_RDMA_PORT,
+ strlen (GLUSTERD_STORE_KEY_BRICK_RDMA_PORT))) {
+ gf_string2int (value, &brickinfo->rdma_port);
+
+ if (brickinfo->rdma_port < priv->base_port) {
+ /* This is required to adhere to the
+ IANA standards */
+ brickinfo->rdma_port = 0;
+ } else {
+ /* This is required to have proper ports
+ assigned to bricks after restart */
+ pmap = pmap_registry_get (THIS);
+ if (pmap->last_alloc <=
+ brickinfo->rdma_port)
+ pmap->last_alloc =
+ brickinfo->rdma_port +1;
+ }
+
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED,
+ strlen (GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED))) {
+ gf_string2int (value, &brickinfo->decommissioned);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_DEVICE_PATH,
+ strlen (GLUSTERD_STORE_KEY_BRICK_DEVICE_PATH))) {
+ strncpy (brickinfo->device_path, value,
+ sizeof (brickinfo->device_path));
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_MOUNT_DIR,
+ strlen (GLUSTERD_STORE_KEY_BRICK_MOUNT_DIR))) {
+ strncpy (brickinfo->mount_dir, value,
+ sizeof (brickinfo->mount_dir));
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS,
+ strlen (GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS))) {
+ gf_string2int (value, &brickinfo->snap_status);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_FSTYPE,
+ strlen (GLUSTERD_STORE_KEY_BRICK_FSTYPE))) {
+ strncpy (brickinfo->fstype, value,
+ sizeof (brickinfo->fstype));
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_MNTOPTS,
+ strlen (GLUSTERD_STORE_KEY_BRICK_MNTOPTS))) {
+ strncpy (brickinfo->mnt_opts, value,
+ sizeof (brickinfo->mnt_opts));
+ } else if (!strncmp (key,
+ GLUSTERD_STORE_KEY_BRICK_VGNAME,
+ strlen (GLUSTERD_STORE_KEY_BRICK_VGNAME))) {
+ strncpy (brickinfo->vg, value,
+ sizeof (brickinfo->vg));
+ } else if (!strcmp(key, GLUSTERD_STORE_KEY_BRICK_ID)) {
+ strncpy (brickinfo->brick_id, value,
+ sizeof (brickinfo->brick_id));
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_UNKNOWN_KEY, "Unknown key: %s",
+ key);
+ }
+
+ GF_FREE (key);
+ GF_FREE (value);
+ key = NULL;
+ value = NULL;
+
+ ret = gf_store_iter_get_next (iter, &key, &value,
+ &op_errno);
+ }
+
+ if (op_errno != GD_STORE_EOF) {
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ GD_MSG_PARSE_BRICKINFO_FAIL,
+ "Error parsing brickinfo: "
+ "op_errno=%d", op_errno);
+ goto out;
+ }
+ ret = gf_store_iter_destroy (iter);
+
+ if (ret)
+ goto out;
+
+ if (brickinfo->brick_id[0] == '\0') {
+ /* This is an old volume upgraded to op_version 4 */
+ GLUSTERD_ASSIGN_BRICKID_TO_BRICKINFO (brickinfo, volinfo,
+ brickid++);
+ }
+ /* Populate brickinfo->real_path for normal volumes, for
+ * snapshot or snapshot restored volume this would be done post
+ * creating the brick mounts
+ */
+ if (brickinfo->real_path[0] == '\0' && !volinfo->is_snap_volume
+ && gf_uuid_is_null (volinfo->restored_from_snap)) {
+ /* By now if the brick is a local brick then it will be
+ * able to resolve which is the only thing we want now
+ * for checking whether the brickinfo->uuid matches
+ * with MY_UUID for realpath check. Hence do not handle
+ * error
+ */
+ (void)glusterd_resolve_brick (brickinfo);
+ if (!gf_uuid_compare(brickinfo->uuid, MY_UUID)) {
+ if (!realpath (brickinfo->path, abspath)) {
+ gf_msg (this->name, GF_LOG_CRITICAL,
+ errno,
+ GD_MSG_BRICKINFO_CREATE_FAIL,
+ "realpath() failed for brick %s"
+ ". The underlying file system "
+ "may be in bad state",
+ brickinfo->path);
+ ret = -1;
+ goto out;
+ }
+ strncpy (brickinfo->real_path, abspath,
+ strlen(abspath));
+ }
+ }
+ cds_list_add_tail (&brickinfo->brick_list, &volinfo->bricks);
+ brick_count++;
+ }
+
+ assign_brick_groups (volinfo);
+ ret = gf_store_iter_destroy (tmpiter);
+ if (ret)
+ goto out;
+out:
+ gf_msg_debug (this->name, 0, "Returning with %d", ret);
+
+ return ret;
+}
+
+int32_t
+glusterd_store_retrieve_node_state (glusterd_volinfo_t *volinfo)
+{
+ int32_t ret = -1;
+ gf_store_iter_t *iter = NULL;
+ char *key = NULL;
+ char *value = NULL;
+ char *dup_value = NULL;
+ char volpath[PATH_MAX] = {0,};
+ glusterd_conf_t *priv = NULL;
+ char path[PATH_MAX] = {0,};
+ gf_store_op_errno_t op_errno = GD_STORE_SUCCESS;
+ dict_t *tmp_dict = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (volinfo);
+
+ GLUSTERD_GET_VOLUME_DIR(volpath, volinfo, priv);
+ snprintf (path, sizeof (path), "%s/%s", volpath,
+ GLUSTERD_NODE_STATE_FILE);
+
+ ret = gf_store_handle_retrieve (path, &volinfo->node_state_shandle);
+ if (ret)
+ goto out;
+
+ ret = gf_store_iter_new (volinfo->node_state_shandle, &iter);
+
+ if (ret)
+ goto out;
+
+ ret = gf_store_iter_get_next (iter, &key, &value, &op_errno);
+ if (ret)
+ goto out;
+
+ while (ret == 0) {
+ if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_DEFRAG,
+ strlen (GLUSTERD_STORE_KEY_VOL_DEFRAG))) {
+ volinfo->rebal.defrag_cmd = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_DEFRAG_STATUS,
+ strlen (GLUSTERD_STORE_KEY_VOL_DEFRAG_STATUS))) {
+ volinfo->rebal.defrag_status = atoi (value);
+ } else if (!strncmp (key, GF_REBALANCE_TID_KEY,
+ strlen (GF_REBALANCE_TID_KEY))) {
+ gf_uuid_parse (value, volinfo->rebal.rebalance_id);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_DEFRAG_OP,
+ strlen (GLUSTERD_STORE_KEY_DEFRAG_OP))) {
+ volinfo->rebal.op = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_DEFRAG_REB_FILES,
+ strlen (GLUSTERD_STORE_KEY_VOL_DEFRAG_REB_FILES))) {
+ volinfo->rebal.rebalance_files = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_DEFRAG_SIZE,
+ strlen (GLUSTERD_STORE_KEY_VOL_DEFRAG_SIZE))) {
+ volinfo->rebal.rebalance_data = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_DEFRAG_SCANNED,
+ strlen (GLUSTERD_STORE_KEY_VOL_DEFRAG_SCANNED))) {
+ volinfo->rebal.lookedup_files = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_DEFRAG_FAILURES,
+ strlen (GLUSTERD_STORE_KEY_VOL_DEFRAG_FAILURES))) {
+ volinfo->rebal.rebalance_failures = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_DEFRAG_SKIPPED,
+ strlen (GLUSTERD_STORE_KEY_VOL_DEFRAG_SKIPPED))) {
+ volinfo->rebal.skipped_files = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_DEFRAG_RUN_TIME,
+ strlen (GLUSTERD_STORE_KEY_VOL_DEFRAG_RUN_TIME))) {
+ volinfo->rebal.rebalance_time = atoi (value);
+ } else {
+ if (!tmp_dict) {
+ tmp_dict = dict_new ();
+ if (!tmp_dict) {
+ ret = -1;
+ goto out;
+ }
+ }
+ dup_value = gf_strdup (value);
+ if (!dup_value) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY,
+ "Failed to strdup value string");
+ goto out;
+ }
+ ret = dict_set_str (tmp_dict, key, dup_value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Error setting data in rebal "
+ "dict.");
+ goto out;
+ }
+ dup_value = NULL;
+ }
+
+ GF_FREE (key);
+ GF_FREE (value);
+ key = NULL;
+ value = NULL;
+
+ ret = gf_store_iter_get_next (iter, &key, &value, &op_errno);
+ }
+ if (tmp_dict)
+ volinfo->rebal.dict = dict_ref (tmp_dict);
+
+ if (op_errno != GD_STORE_EOF) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = gf_store_iter_destroy (iter);
+
+ if (ret)
+ goto out;
+
+out:
+ if (dup_value)
+ GF_FREE (dup_value);
+ if (ret && volinfo->rebal.dict)
+ dict_unref (volinfo->rebal.dict);
+ if (tmp_dict)
+ dict_unref (tmp_dict);
+
+ gf_msg_trace (this->name, 0, "Returning with %d", ret);
+
+ return ret;
+}
+
+
+int
+glusterd_store_update_volinfo (glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ int exists = 0;
+ char *key = NULL;
+ char *value = NULL;
+ char volpath[PATH_MAX] = {0,};
+ char path[PATH_MAX] = {0,};
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ gf_store_iter_t *iter = NULL;
+ gf_store_op_errno_t op_errno = GD_STORE_SUCCESS;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = THIS->private;
+ GF_ASSERT (volinfo);
+
+ GLUSTERD_GET_VOLUME_DIR(volpath, volinfo, conf);
+
+ snprintf (path, sizeof (path), "%s/%s", volpath,
+ GLUSTERD_VOLUME_INFO_FILE);
+
+ ret = gf_store_handle_retrieve (path, &volinfo->shandle);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_HANDLE_NULL, "volinfo handle is NULL");
+ goto out;
+ }
+
+ ret = gf_store_iter_new (volinfo->shandle, &iter);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_STORE_ITER_GET_FAIL, "Failed to get new store "
+ "iter");
+ goto out;
+ }
+
+ ret = gf_store_iter_get_next (iter, &key, &value, &op_errno);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_STORE_ITER_GET_FAIL, "Failed to get next store "
+ "iter");
+ goto out;
+ }
+
+ while (!ret) {
+ gf_msg_debug (this->name, 0, "key = %s value = %s", key, value);
+ if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_TYPE,
+ strlen (GLUSTERD_STORE_KEY_VOL_TYPE))) {
+ volinfo->type = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_COUNT,
+ strlen (GLUSTERD_STORE_KEY_VOL_COUNT))) {
+ volinfo->brick_count = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_STATUS,
+ strlen (GLUSTERD_STORE_KEY_VOL_STATUS))) {
+ volinfo->status = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_VERSION,
+ strlen (GLUSTERD_STORE_KEY_VOL_VERSION))) {
+ volinfo->version = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_PORT,
+ strlen (GLUSTERD_STORE_KEY_VOL_PORT))) {
+ volinfo->port = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_SUB_COUNT,
+ strlen (GLUSTERD_STORE_KEY_VOL_SUB_COUNT))) {
+ volinfo->sub_count = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_STRIPE_CNT,
+ strlen (GLUSTERD_STORE_KEY_VOL_STRIPE_CNT))) {
+ volinfo->stripe_count = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_REPLICA_CNT,
+ strlen (GLUSTERD_STORE_KEY_VOL_REPLICA_CNT))) {
+ volinfo->replica_count = atoi (value);
+ } else if (!strcmp (key, GLUSTERD_STORE_KEY_VOL_ARBITER_CNT)) {
+ volinfo->arbiter_count = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT,
+ strlen (GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT))) {
+ volinfo->disperse_count = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_REDUNDANCY_CNT,
+ strlen (GLUSTERD_STORE_KEY_VOL_REDUNDANCY_CNT))) {
+ volinfo->redundancy_count = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_TRANSPORT,
+ strlen (GLUSTERD_STORE_KEY_VOL_TRANSPORT))) {
+ volinfo->transport_type = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_ID,
+ strlen (GLUSTERD_STORE_KEY_VOL_ID))) {
+ ret = gf_uuid_parse (value, volinfo->volume_id);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_UUID_PARSE_FAIL,
+ "failed to parse uuid");
+
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_USERNAME,
+ strlen (GLUSTERD_STORE_KEY_USERNAME))) {
+
+ glusterd_auth_set_username (volinfo, value);
+
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_PASSWORD,
+ strlen (GLUSTERD_STORE_KEY_PASSWORD))) {
+
+ glusterd_auth_set_password (volinfo, value);
+
+ } else if (strstr (key, "slave")) {
+ ret = dict_set_dynstr (volinfo->gsync_slaves, key,
+ gf_strdup (value));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Error in "
+ "dict_set_str");
+ goto out;
+ }
+ gf_msg_debug (this->name, 0, "Parsed as "GEOREP" "
+ " slave:key=%s,value:%s", key, value);
+
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_OP_VERSION,
+ strlen (GLUSTERD_STORE_KEY_VOL_OP_VERSION))) {
+ volinfo->op_version = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION,
+ strlen (GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION))) {
+ volinfo->client_op_version = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_CAPS,
+ strlen (GLUSTERD_STORE_KEY_VOL_CAPS))) {
+ volinfo->caps = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT,
+ strlen (GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT))) {
+ volinfo->snap_max_hard_limit = (uint64_t) atoll (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_RESTORED_SNAP,
+ strlen (GLUSTERD_STORE_KEY_VOL_RESTORED_SNAP))) {
+ ret = gf_uuid_parse (value, volinfo->restored_from_snap);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_UUID_PARSE_FAIL,
+ "failed to parse restored snap's uuid");
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_PARENT_VOLNAME,
+ strlen (GLUSTERD_STORE_KEY_PARENT_VOLNAME))) {
+ strncpy (volinfo->parent_volname, value,
+ sizeof(volinfo->parent_volname) - 1);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_COLD_COUNT,
+ strlen (key))) {
+ volinfo->tier_info.cold_brick_count = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_COLD_REPLICA_COUNT,
+ strlen (key))) {
+ volinfo->tier_info.cold_replica_count = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_COLD_DISPERSE_COUNT,
+ strlen (key))) {
+ volinfo->tier_info.cold_disperse_count = atoi (value);
+ } else if (!strncmp (key,
+ GLUSTERD_STORE_KEY_COLD_REDUNDANCY_COUNT,
+ strlen (key))) {
+ volinfo->tier_info.cold_redundancy_count = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_HOT_COUNT,
+ strlen (key))) {
+ volinfo->tier_info.hot_brick_count = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_HOT_REPLICA_COUNT,
+ strlen (key))) {
+ volinfo->tier_info.hot_replica_count = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_HOT_TYPE,
+ strlen (key))) {
+ volinfo->tier_info.hot_type = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_COLD_TYPE,
+ strlen (key))) {
+ volinfo->tier_info.cold_type = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_QUOTA_VERSION,
+ strlen (GLUSTERD_STORE_KEY_VOL_QUOTA_VERSION))) {
+ volinfo->quota_xattr_version = atoi (value);
+ } else {
+
+ if (is_key_glusterd_hooks_friendly (key)) {
+ exists = 1;
+
+ } else {
+ exists = glusterd_check_option_exists (key,
+ NULL);
+ }
+
+ switch (exists) {
+ case -1:
+ ret = -1;
+ goto out;
+
+ case 0:
+ /*Ignore GLUSTERD_STORE_KEY_VOL_BRICK since
+ glusterd_store_retrieve_bricks gets it later*/
+ if (!strstr (key, GLUSTERD_STORE_KEY_VOL_BRICK))
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_UNKNOWN_KEY,
+ "Unknown key: %s", key);
+ break;
+
+ case 1:
+ /*The following strcmp check is to ensure that
+ * glusterd does not restore the quota limits
+ * into volinfo->dict post upgradation from 3.3
+ * to 3.4 as the same limits will now be stored
+ * in xattrs on the respective directories.
+ */
+ if (!strcmp (key, "features.limit-usage"))
+ break;
+ ret = dict_set_str(volinfo->dict, key,
+ gf_strdup (value));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Error in "
+ "dict_set_str");
+ goto out;
+ }
+ gf_msg_debug (this->name, 0, "Parsed as Volume-"
+ "set:key=%s,value:%s", key, value);
+ break;
+ }
+ }
+
+ GF_FREE (key);
+ GF_FREE (value);
+ key = NULL;
+ value = NULL;
+
+ ret = gf_store_iter_get_next (iter, &key, &value, &op_errno);
+ }
+
+ /* backward compatibility */
+ {
+
+ switch (volinfo->type) {
+
+ case GF_CLUSTER_TYPE_NONE:
+ volinfo->stripe_count = 1;
+ volinfo->replica_count = 1;
+ break;
+
+ case GF_CLUSTER_TYPE_STRIPE:
+ volinfo->stripe_count = volinfo->sub_count;
+ volinfo->replica_count = 1;
+ break;
+
+ case GF_CLUSTER_TYPE_REPLICATE:
+ volinfo->stripe_count = 1;
+ volinfo->replica_count = volinfo->sub_count;
+ break;
+
+ case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
+ /* Introduced in 3.3 */
+ GF_ASSERT (volinfo->stripe_count > 0);
+ GF_ASSERT (volinfo->replica_count > 0);
+ break;
+
+ case GF_CLUSTER_TYPE_DISPERSE:
+ GF_ASSERT (volinfo->disperse_count > 0);
+ GF_ASSERT (volinfo->redundancy_count > 0);
+ break;
+
+ case GF_CLUSTER_TYPE_TIER:
+ if (volinfo->tier_info.cold_type ==
+ GF_CLUSTER_TYPE_DISPERSE)
+ volinfo->tier_info.cold_dist_leaf_count
+ = volinfo->disperse_count;
+ else
+ volinfo->tier_info.cold_dist_leaf_count
+ = glusterd_calc_dist_leaf_count (
+ volinfo->tier_info.
+ cold_replica_count,
+ 1);
+
+ break;
+
+ default:
+ GF_ASSERT (0);
+ break;
+ }
+
+ volinfo->dist_leaf_count = glusterd_get_dist_leaf_count (volinfo);
+
+ volinfo->subvol_count = (volinfo->brick_count /
+ volinfo->dist_leaf_count);
+
+ /* Only calculate volume op-versions if they are not found */
+ if (!volinfo->op_version && !volinfo->client_op_version)
+ gd_update_volume_op_versions (volinfo);
+ }
+
+ if (op_errno != GD_STORE_EOF)
+ goto out;
+
+ ret = gf_store_iter_destroy (iter);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_STORE_ITER_DESTROY_FAIL,
+ "Failed to destroy store "
+ "iter");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+glusterd_volinfo_t*
+glusterd_store_retrieve_volume (char *volname, glusterd_snap_t *snap)
+{
+ int32_t ret = -1;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_volinfo_t *origin_volinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (volname);
+
+ ret = glusterd_volinfo_new (&volinfo);
+ if (ret)
+ goto out;
+
+ strncpy (volinfo->volname, volname, GD_VOLUME_NAME_MAX);
+ volinfo->snapshot = snap;
+ if (snap)
+ volinfo->is_snap_volume = _gf_true;
+
+ ret = glusterd_store_update_volinfo (volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLINFO_UPDATE_FAIL, "Failed to update volinfo "
+ "for %s volume", volname);
+ goto out;
+ }
+
+ ret = glusterd_store_retrieve_bricks (volinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_retrieve_snapd (volinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_compute_cksum (volinfo, _gf_false);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_retrieve_quota_version (volinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_create_quota_conf_sh_on_absence (volinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_compute_cksum (volinfo, _gf_true);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_save_quota_version_and_cksum (volinfo);
+ if (ret)
+ goto out;
+
+
+ if (!snap) {
+ glusterd_list_add_order (&volinfo->vol_list, &priv->volumes,
+ glusterd_compare_volume_name);
+
+ } else {
+ ret = glusterd_volinfo_find (volinfo->parent_volname,
+ &origin_volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLINFO_GET_FAIL, "Parent volinfo "
+ "not found for %s volume", volname);
+ goto out;
+ }
+ glusterd_list_add_snapvol (origin_volinfo, volinfo);
+ }
+
+out:
+ if (ret) {
+ if (volinfo)
+ glusterd_volinfo_unref (volinfo);
+ volinfo = NULL;
+ }
+
+ gf_msg_trace (this->name, 0, "Returning with %d", ret);
+
+ return volinfo;
+}
+
+static void
+glusterd_store_set_options_path (glusterd_conf_t *conf, char *path, size_t len)
+{
+ snprintf (path, len, "%s/options", conf->workdir);
+}
+
+int
+_store_global_opts (dict_t *this, char *key, data_t *value, void *data)
+{
+ gf_store_handle_t *shandle = data;
+
+ gf_store_save_value (shandle->fd, key, (char*)value->data);
+ return 0;
+}
+
+int32_t
+glusterd_store_options (xlator_t *this, dict_t *opts)
+{
+ gf_store_handle_t *shandle = NULL;
+ glusterd_conf_t *conf = NULL;
+ char path[PATH_MAX] = {0};
+ int fd = -1;
+ int32_t ret = -1;
+
+ conf = this->private;
+ glusterd_store_set_options_path (conf, path, sizeof (path));
+
+ ret = gf_store_handle_new (path, &shandle);
+ if (ret)
+ goto out;
+
+ fd = gf_store_mkstemp (shandle);
+ if (fd <= 0) {
+ ret = -1;
+ goto out;
+ }
+
+ shandle->fd = fd;
+ dict_foreach (opts, _store_global_opts, shandle);
+ shandle->fd = 0;
+ ret = gf_store_rename_tmppath (shandle);
+ if (ret)
+ goto out;
+out:
+ if ((ret < 0) && (fd > 0))
+ gf_store_unlink_tmppath (shandle);
+ gf_store_handle_destroy (shandle);
+ return ret;
+}
+
+int32_t
+glusterd_store_retrieve_options (xlator_t *this)
+{
+ char path[PATH_MAX] = {0};
+ glusterd_conf_t *conf = NULL;
+ gf_store_handle_t *shandle = NULL;
+ gf_store_iter_t *iter = NULL;
+ char *key = NULL;
+ char *value = NULL;
+ gf_store_op_errno_t op_errno = 0;
+ int ret = -1;
+
+ conf = this->private;
+ glusterd_store_set_options_path (conf, path, sizeof (path));
+
+ ret = gf_store_handle_retrieve (path, &shandle);
+ if (ret)
+ goto out;
+
+ ret = gf_store_iter_new (shandle, &iter);
+ if (ret)
+ goto out;
+
+ ret = gf_store_iter_get_next (iter, &key, &value, &op_errno);
+ while (!ret) {
+ ret = dict_set_dynstr (conf->opts, key, value);
+ if (ret) {
+ GF_FREE (key);
+ GF_FREE (value);
+ goto out;
+ }
+ GF_FREE (key);
+ key = NULL;
+ value = NULL;
+
+ ret = gf_store_iter_get_next (iter, &key, &value, &op_errno);
+ }
+ if (op_errno != GD_STORE_EOF)
+ goto out;
+ ret = 0;
+out:
+ gf_store_iter_destroy (iter);
+ gf_store_handle_destroy (shandle);
+ return ret;
+}
+
+int32_t
+glusterd_store_retrieve_volumes (xlator_t *this, glusterd_snap_t *snap)
+{
+ int32_t ret = -1;
+ char path[PATH_MAX] = {0,};
+ glusterd_conf_t *priv = NULL;
+ DIR *dir = NULL;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {{0,},};
+ glusterd_volinfo_t *volinfo = NULL;
+
+ GF_ASSERT (this);
+ priv = this->private;
+
+ GF_ASSERT (priv);
+
+ if (snap)
+ snprintf (path, PATH_MAX, "%s/snaps/%s", priv->workdir,
+ snap->snapname);
+ else
+ snprintf (path, PATH_MAX, "%s/%s", priv->workdir,
+ GLUSTERD_VOLUME_DIR_PREFIX);
+
+ dir = sys_opendir (path);
+
+ if (!dir) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DIR_OP_FAILED, "Unable to open dir %s", path);
+ goto out;
+ }
+
+ GF_FOR_EACH_ENTRY_IN_DIR (entry, dir, scratch);
+
+ while (entry) {
+ if (snap && ((!strcmp (entry->d_name, "geo-replication")) ||
+ (!strcmp (entry->d_name, "info"))))
+ goto next;
+
+ volinfo = glusterd_store_retrieve_volume (entry->d_name, snap);
+ if (!volinfo) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_RESTORE_FAIL, "Unable to restore "
+ "volume: %s", entry->d_name);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_store_retrieve_node_state (volinfo);
+ if (ret) {
+ /* Backward compatibility */
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_NEW_NODE_STATE_CREATION,
+ "Creating a new node_state "
+ "for volume: %s.", entry->d_name);
+ glusterd_store_create_nodestate_sh_on_absence (volinfo);
+ ret = glusterd_store_perform_node_state_store (volinfo);
+
+ }
+next:
+ GF_FOR_EACH_ENTRY_IN_DIR (entry, dir, scratch);
+ }
+
+ ret = 0;
+
+out:
+ if (dir)
+ sys_closedir (dir);
+ gf_msg_debug (this->name, 0, "Returning with %d", ret);
+
+ return ret;
+}
+
+/* Figure out the brick mount path, from the brick path */
+int32_t
+glusterd_find_brick_mount_path (char *brick_path, char **brick_mount_path)
+{
+ char *ptr = NULL;
+ char *save_ptr = NULL;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (brick_path);
+ GF_ASSERT (brick_mount_path);
+
+ *brick_mount_path = gf_strdup (brick_path);
+ if (!*brick_mount_path) {
+ ret = -1;
+ goto out;
+ }
+
+ /* Finding the pointer to the end of
+ * /var/run/gluster/snaps/<snap-uuid>
+ */
+ ptr = strstr (*brick_mount_path, "brick");
+ if (!ptr) {
+ /* Snapshot bricks must have brick num as part
+ * of the brickpath
+ */
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY,
+ "Invalid brick path(%s)", brick_path);
+ ret = -1;
+ goto out;
+ }
+
+ /* Moving the pointer to the end of
+ * /var/run/gluster/snaps/<snap-uuid>/<brick_num>
+ * and assigning '\0' to it.
+ */
+ while ((*ptr != '\0') && (*ptr != '/'))
+ ptr++;
+
+ if (*ptr == '/') {
+ *ptr = '\0';
+ }
+
+ ret = 0;
+out:
+ if (ret && *brick_mount_path) {
+ GF_FREE (*brick_mount_path);
+ *brick_mount_path = NULL;
+ }
+ gf_msg_trace (this->name, 0, "Returning with %d", ret);
+ return ret;
+}
+
+/* Check if brick_mount_path is already mounted. If not, mount the device_path
+ * at the brick_mount_path
+ */
+int32_t
+glusterd_mount_brick_paths (char *brick_mount_path,
+ glusterd_brickinfo_t *brickinfo)
+{
+ int32_t ret = -1;
+ runner_t runner = {0, };
+ char buff [PATH_MAX] = {0, };
+ struct mntent save_entry = {0, };
+ struct mntent *entry = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (brick_mount_path);
+ GF_ASSERT (brickinfo);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ /* Check if the brick_mount_path is already mounted */
+ entry = glusterd_get_mnt_entry_info (brick_mount_path, buff,
+ sizeof (buff), &save_entry);
+ if (entry) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_ALREADY_MOUNTED,
+ "brick_mount_path (%s) already mounted.",
+ brick_mount_path);
+ ret = 0;
+ goto out;
+ }
+
+ /* TODO RHEL 6.5 has the logical volumes inactive by default
+ * on reboot. Hence activating the logical vol. Check behaviour
+ * on other systems
+ */
+ /* Activate the snapshot */
+ runinit (&runner);
+ runner_add_args (&runner, "lvchange", "-ay", brickinfo->device_path,
+ NULL);
+ ret = runner_run (&runner);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_SNAP_ACTIVATE_FAIL,
+ "Failed to activate %s.",
+ brickinfo->device_path);
+ goto out;
+ } else
+ gf_msg_debug (this->name, 0,
+ "Activating %s successful", brickinfo->device_path);
+
+ /* Mount the snapshot */
+ ret = glusterd_mount_lvm_snapshot (brickinfo, brick_mount_path);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_MOUNT_FAIL,
+ "Failed to mount lvm snapshot.");
+ goto out;
+ }
+
+out:
+ gf_msg_trace (this->name, 0, "Returning with %d", ret);
+ return ret;
+}
+
+static int32_t
+glusterd_recreate_vol_brick_mounts (xlator_t *this,
+ glusterd_volinfo_t *volinfo)
+{
+ char *brick_mount_path = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int32_t ret = -1;
+ struct stat st_buf = {0, };
+ char abspath[PATH_MAX] = {0};
+
+ GF_ASSERT (this);
+ GF_ASSERT (volinfo);
+
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ /* If the brick is not of this node, or its
+ * snapshot is pending, or the brick is not
+ * a snapshotted brick, we continue
+ */
+ if ((gf_uuid_compare (brickinfo->uuid, MY_UUID)) ||
+ (brickinfo->snap_status == -1) ||
+ (strlen(brickinfo->device_path) == 0))
+ continue;
+
+ /* Fetch the brick mount path from the brickinfo->path */
+ ret = glusterd_find_brick_mount_path (brickinfo->path,
+ &brick_mount_path);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRK_MNTPATH_GET_FAIL,
+ "Failed to find brick_mount_path for %s",
+ brickinfo->path);
+ goto out;
+ }
+
+ /* Check if the brickinfo path is present.
+ * If not create the brick_mount_path */
+ ret = sys_lstat (brickinfo->path, &st_buf);
+ if (ret) {
+ if (errno == ENOENT) {
+ ret = mkdir_p (brick_mount_path, 0777,
+ _gf_true);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_CREATE_DIR_FAILED,
+ "Failed to create %s. ",
+ brick_mount_path);
+ goto out;
+ }
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED,
+ "Brick Path(%s) not valid. ",
+ brickinfo->path);
+ goto out;
+ }
+ }
+
+ /* Check if brick_mount_path is already mounted.
+ * If not, mount the device_path at the brick_mount_path */
+ ret = glusterd_mount_brick_paths (brick_mount_path, brickinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRK_MNTPATH_MOUNT_FAIL,
+ "Failed to mount brick_mount_path");
+ }
+ if (!gf_uuid_compare(brickinfo->uuid, MY_UUID)) {
+ if (brickinfo->real_path[0] == '\0') {
+ if (!realpath (brickinfo->path, abspath)) {
+ gf_msg (this->name, GF_LOG_CRITICAL,
+ errno,
+ GD_MSG_BRICKINFO_CREATE_FAIL,
+ "realpath() failed for brick %s"
+ ". The underlying file system "
+ "may be in bad state",
+ brickinfo->path);
+ ret = -1;
+ goto out;
+ }
+ strncpy (brickinfo->real_path, abspath,
+ strlen(abspath));
+ }
+ }
+
+ if (brick_mount_path) {
+ GF_FREE (brick_mount_path);
+ brick_mount_path = NULL;
+ }
+ }
+
+ ret = 0;
+out:
+ if (ret && brick_mount_path)
+ GF_FREE (brick_mount_path);
+
+ gf_msg_trace (this->name, 0, "Returning with %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_resolve_snap_bricks (xlator_t *this, glusterd_snap_t *snap)
+{
+ int32_t ret = -1;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+
+ GF_ASSERT (this);
+ GF_VALIDATE_OR_GOTO (this->name, snap, out);
+
+ cds_list_for_each_entry (volinfo, &snap->volumes, vol_list) {
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks,
+ brick_list) {
+ ret = glusterd_resolve_brick (brickinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RESOLVE_BRICK_FAIL,
+ "resolve brick failed in restore");
+ goto out;
+ }
+ }
+ }
+
+ ret = 0;
+
+out:
+ gf_msg_trace (this->name, 0, "Returning with %d", ret);
+
+ return ret;
+}
+
+int
+glusterd_store_update_snap (glusterd_snap_t *snap)
+{
+ int ret = -1;
+ char *key = NULL;
+ char *value = NULL;
+ char snappath[PATH_MAX] = {0,};
+ char path[PATH_MAX] = {0,};
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ gf_store_iter_t *iter = NULL;
+ gf_store_op_errno_t op_errno = GD_STORE_SUCCESS;
+
+ this = THIS;
+ conf = this->private;
+ GF_ASSERT (snap);
+
+ GLUSTERD_GET_SNAP_DIR (snappath, snap, conf);
+
+ snprintf (path, sizeof (path), "%s/%s", snappath,
+ GLUSTERD_SNAP_INFO_FILE);
+
+ ret = gf_store_handle_retrieve (path, &snap->shandle);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_HANDLE_NULL, "snap handle is NULL");
+ goto out;
+ }
+
+ ret = gf_store_iter_new (snap->shandle, &iter);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_STORE_ITER_GET_FAIL, "Failed to get new store "
+ "iter");
+ goto out;
+ }
+
+ ret = gf_store_iter_get_next (iter, &key, &value, &op_errno);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_STORE_ITER_GET_FAIL, "Failed to get next store "
+ "iter");
+ goto out;
+ }
+
+ while (!ret) {
+ gf_msg_debug (this->name, 0, "key = %s value = %s",
+ key, value);
+
+ if (!strncmp (key, GLUSTERD_STORE_KEY_SNAP_ID,
+ strlen (GLUSTERD_STORE_KEY_SNAP_ID))) {
+ ret = gf_uuid_parse (value, snap->snap_id);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_UUID_PARSE_FAIL,
+ "Failed to parse uuid");
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_SNAP_RESTORED,
+ strlen (GLUSTERD_STORE_KEY_SNAP_RESTORED))) {
+ snap->snap_restored = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_SNAP_STATUS,
+ strlen (GLUSTERD_STORE_KEY_SNAP_STATUS))) {
+ snap->snap_status = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_SNAP_DESC,
+ strlen (GLUSTERD_STORE_KEY_SNAP_DESC))) {
+ snap->description = gf_strdup (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_SNAP_TIMESTAMP,
+ strlen (GLUSTERD_STORE_KEY_SNAP_TIMESTAMP))) {
+ snap->time_stamp = atoi (value);
+ }
+
+ GF_FREE (key);
+ GF_FREE (value);
+ key = NULL;
+ value = NULL;
+
+ ret = gf_store_iter_get_next (iter, &key, &value, &op_errno);
+ }
+
+ if (op_errno != GD_STORE_EOF)
+ goto out;
+
+ ret = gf_store_iter_destroy (iter);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_STORE_ITER_DESTROY_FAIL,
+ "Failed to destroy store "
+ "iter");
+ }
+
+out:
+ return ret;
+}
+
+int32_t
+glusterd_store_retrieve_snap (char *snapname)
+{
+ int32_t ret = -1;
+ glusterd_snap_t *snap = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (snapname);
+
+ snap = glusterd_new_snap_object ();
+ if (!snap) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_OBJECT_STORE_FAIL, "Failed to create "
+ " snap object");
+ goto out;
+ }
+
+ strncpy (snap->snapname, snapname, strlen(snapname));
+ ret = glusterd_store_update_snap (snap);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAPSHOT_UPDATE_FAIL,
+ "Failed to update snapshot "
+ "for %s snap", snapname);
+ goto out;
+ }
+
+ ret = glusterd_store_retrieve_volumes (this, snap);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_VOL_RETRIEVE_FAIL, "Failed to retrieve "
+ "snap volumes for snap %s", snapname);
+ goto out;
+ }
+
+ /* TODO: list_add_order can do 'N-square' comparisons and
+ is not efficient. Find a better solution to store the snap
+ in order */
+ glusterd_list_add_order (&snap->snap_list, &priv->snapshots,
+ glusterd_compare_snap_time);
+
+out:
+ gf_msg_trace (this->name, 0, "Returning with %d", ret);
+ return ret;
+}
+
+/* Read the missed_snap_list and update the in-memory structs */
+int32_t
+glusterd_store_retrieve_missed_snaps_list (xlator_t *this)
+{
+ char buf[PATH_MAX] = "";
+ char path[PATH_MAX] = "";
+ char *snap_vol_id = NULL;
+ char *missed_node_info = NULL;
+ char *brick_path = NULL;
+ char *value = NULL;
+ char *save_ptr = NULL;
+ FILE *fp = NULL;
+ int32_t brick_num = -1;
+ int32_t snap_op = -1;
+ int32_t snap_status = -1;
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+ gf_store_op_errno_t store_errno = GD_STORE_SUCCESS;
+
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ /* Get the path of the missed_snap_list */
+ glusterd_store_missed_snaps_list_path_set (path, sizeof(path));
+
+ fp = fopen (path, "r");
+ if (!fp) {
+ /* If errno is ENOENT then there are no missed snaps yet */
+ if (errno != ENOENT) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED,
+ "Failed to open %s. ",
+ path);
+ } else {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_MISSED_SNAP_LIST_EMPTY,
+ "No missed snaps list.");
+ ret = 0;
+ }
+ goto out;
+ }
+
+ do {
+ ret = gf_store_read_and_tokenize (fp, buf, sizeof (buf),
+ &missed_node_info, &value,
+ &store_errno);
+ if (ret) {
+ if (store_errno == GD_STORE_EOF) {
+ gf_msg_debug (this->name,
+ 0,
+ "EOF for missed_snap_list");
+ ret = 0;
+ break;
+ }
+ gf_msg (this->name, GF_LOG_ERROR, store_errno,
+ GD_MSG_MISSED_SNAP_GET_FAIL,
+ "Failed to fetch data from "
+ "missed_snaps_list.");
+ goto out;
+ }
+
+ /* Fetch the brick_num, brick_path, snap_op and snap status */
+ snap_vol_id = strtok_r (value, ":", &save_ptr);
+ brick_num = atoi(strtok_r (NULL, ":", &save_ptr));
+ brick_path = strtok_r (NULL, ":", &save_ptr);
+ snap_op = atoi(strtok_r (NULL, ":", &save_ptr));
+ snap_status = atoi(strtok_r (NULL, ":", &save_ptr));
+
+ if (!missed_node_info || !brick_path || !snap_vol_id ||
+ brick_num < 1 || snap_op < 1 ||
+ snap_status < 1) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_MISSED_SNAP_ENTRY,
+ "Invalid missed_snap_entry");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_add_new_entry_to_list (missed_node_info,
+ snap_vol_id,
+ brick_num,
+ brick_path,
+ snap_op,
+ snap_status);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MISSED_SNAP_LIST_STORE_FAIL,
+ "Failed to store missed snaps_list");
+ goto out;
+ }
+
+ } while (store_errno == GD_STORE_SUCCESS);
+
+ ret = 0;
+out:
+ gf_msg_trace (this->name, 0, "Returning with %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_store_retrieve_snaps (xlator_t *this)
+{
+ int32_t ret = 0;
+ char path[PATH_MAX] = {0,};
+ glusterd_conf_t *priv = NULL;
+ DIR *dir = NULL;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {{0,},};
+
+ GF_ASSERT (this);
+ priv = this->private;
+
+ GF_ASSERT (priv);
+
+ snprintf (path, PATH_MAX, "%s/snaps", priv->workdir);
+
+ dir = sys_opendir (path);
+
+ if (!dir) {
+ /* If snaps dir doesn't exists ignore the error for
+ backward compatibility */
+ if (errno != ENOENT) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DIR_OP_FAILED, "Unable to open dir %s",
+ path);
+ }
+ goto out;
+ }
+
+ GF_FOR_EACH_ENTRY_IN_DIR (entry, dir, scratch);
+
+ while (entry) {
+ if (strcmp (entry->d_name, GLUSTERD_MISSED_SNAPS_LIST_FILE)) {
+ ret = glusterd_store_retrieve_snap (entry->d_name);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_RESTORE_FAIL,
+ "Unable to restore snapshot: %s",
+ entry->d_name);
+ goto out;
+ }
+ }
+ GF_FOR_EACH_ENTRY_IN_DIR (entry, dir, scratch);
+ }
+
+ /* Retrieve missed_snaps_list */
+ ret = glusterd_store_retrieve_missed_snaps_list (this);
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "Failed to retrieve missed_snaps_list");
+ goto out;
+ }
+
+out:
+ if (dir)
+ sys_closedir (dir);
+ gf_msg_debug (this->name, 0, "Returning with %d", ret);
+
+ return ret;
+}
+
+/* Writes all the contents of conf->missed_snap_list */
+int32_t
+glusterd_store_write_missed_snapinfo (int32_t fd)
+{
+ char key[PATH_MAX] = "";
+ char value[PATH_MAX] = "";
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+ glusterd_missed_snap_info *missed_snapinfo = NULL;
+ glusterd_snap_op_t *snap_opinfo = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ /* Write the missed_snap_entry */
+ cds_list_for_each_entry (missed_snapinfo, &priv->missed_snaps_list,
+ missed_snaps) {
+ cds_list_for_each_entry (snap_opinfo,
+ &missed_snapinfo->snap_ops,
+ snap_ops_list) {
+ snprintf (key, sizeof(key), "%s:%s",
+ missed_snapinfo->node_uuid,
+ missed_snapinfo->snap_uuid);
+ snprintf (value, sizeof(value), "%s:%d:%s:%d:%d",
+ snap_opinfo->snap_vol_id,
+ snap_opinfo->brick_num,
+ snap_opinfo->brick_path,
+ snap_opinfo->op, snap_opinfo->status);
+ ret = gf_store_save_value (fd, key, value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MISSEDSNAP_INFO_SET_FAIL,
+ "Failed to write missed snapinfo");
+ goto out;
+ }
+ }
+ }
+
+ ret = 0;
+out:
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* Adds the missed snap entries to the in-memory conf->missed_snap_list *
+ * and writes them to disk */
+int32_t
+glusterd_store_update_missed_snaps ()
+{
+ int32_t fd = -1;
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = glusterd_store_create_missed_snaps_list_shandle_on_absence ();
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MISSED_SNAP_LIST_STORE_HANDLE_GET_FAIL,
+ "Unable to obtain "
+ "missed_snaps_list store handle.");
+ goto out;
+ }
+
+ fd = gf_store_mkstemp (priv->missed_snaps_list_shandle);
+ if (fd <= 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED,
+ "Failed to create tmp file");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_store_write_missed_snapinfo (fd);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MISSED_SNAP_CREATE_FAIL,
+ "Failed to write missed snaps to disk");
+ goto out;
+ }
+
+ ret = gf_store_rename_tmppath (priv->missed_snaps_list_shandle);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED,
+ "Failed to rename the tmp file");
+ goto out;
+ }
+out:
+ if (ret && (fd > 0)) {
+ ret = gf_store_unlink_tmppath (priv->missed_snaps_list_shandle);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TMP_FILE_UNLINK_FAIL,
+ "Failed to unlink the tmp file");
+ }
+ ret = -1;
+ }
+
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_store_delete_peerinfo (glusterd_peerinfo_t *peerinfo)
+{
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ char peerdir[PATH_MAX] = {0,};
+ char filepath[PATH_MAX] = {0,};
+ char hostname_path[PATH_MAX] = {0,};
+
+
+ if (!peerinfo) {
+ ret = 0;
+ goto out;
+ }
+
+ this = THIS;
+ priv = this->private;
+
+ snprintf (peerdir, PATH_MAX, "%s/peers", priv->workdir);
+
+
+ if (gf_uuid_is_null (peerinfo->uuid)) {
+
+ if (peerinfo->hostname) {
+ snprintf (filepath, PATH_MAX, "%s/%s", peerdir,
+ peerinfo->hostname);
+ } else {
+ ret = 0;
+ goto out;
+ }
+ } else {
+
+ snprintf (filepath, PATH_MAX, "%s/%s", peerdir,
+ uuid_utoa (peerinfo->uuid));
+ snprintf (hostname_path, PATH_MAX, "%s/%s",
+ peerdir, peerinfo->hostname);
+
+ ret = sys_unlink (hostname_path);
+
+ if (!ret)
+ goto out;
+ }
+
+ ret = sys_unlink (filepath);
+ if (ret && (errno == ENOENT))
+ ret = 0;
+
+out:
+ if (peerinfo && peerinfo->shandle) {
+ gf_store_handle_destroy (peerinfo->shandle);
+ peerinfo->shandle = NULL;
+ }
+ gf_msg_debug (this->name, 0, "Returning with %d", ret);
+
+ return ret;
+}
+
+void
+glusterd_store_peerinfo_dirpath_set (char *path, size_t len)
+{
+ glusterd_conf_t *priv = NULL;
+ GF_ASSERT (path);
+ GF_ASSERT (len >= PATH_MAX);
+
+ priv = THIS->private;
+ snprintf (path, len, "%s/peers", priv->workdir);
+}
+
+int32_t
+glusterd_store_create_peer_dir ()
+{
+ int32_t ret = 0;
+ char path[PATH_MAX];
+
+ glusterd_store_peerinfo_dirpath_set (path, sizeof (path));
+ ret = gf_store_mkdir (path);
+
+ gf_msg_debug ("glusterd", 0, "Returning with %d", ret);
+ return ret;
+}
+
+static void
+glusterd_store_uuid_peerpath_set (glusterd_peerinfo_t *peerinfo, char *peerfpath,
+ size_t len)
+{
+ char peerdir[PATH_MAX];
+ char str[50] = {0};
+
+ GF_ASSERT (peerinfo);
+ GF_ASSERT (peerfpath);
+ GF_ASSERT (len >= PATH_MAX);
+
+ glusterd_store_peerinfo_dirpath_set (peerdir, sizeof (peerdir));
+ gf_uuid_unparse (peerinfo->uuid, str);
+ snprintf (peerfpath, len, "%s/%s", peerdir, str);
+}
+
+static void
+glusterd_store_hostname_peerpath_set (glusterd_peerinfo_t *peerinfo,
+ char *peerfpath, size_t len)
+{
+ char peerdir[PATH_MAX];
+
+ GF_ASSERT (peerinfo);
+ GF_ASSERT (peerfpath);
+ GF_ASSERT (len >= PATH_MAX);
+
+ glusterd_store_peerinfo_dirpath_set (peerdir, sizeof (peerdir));
+ snprintf (peerfpath, len, "%s/%s", peerdir, peerinfo->hostname);
+}
+
+int32_t
+glusterd_store_peerinfo_hostname_shandle_create (glusterd_peerinfo_t *peerinfo)
+{
+ char peerfpath[PATH_MAX];
+ int32_t ret = -1;
+
+ glusterd_store_hostname_peerpath_set (peerinfo, peerfpath,
+ sizeof (peerfpath));
+ ret = gf_store_handle_create_on_absence (&peerinfo->shandle,
+ peerfpath);
+ return ret;
+}
+
+int32_t
+glusterd_store_peerinfo_uuid_shandle_create (glusterd_peerinfo_t *peerinfo)
+{
+ char peerfpath[PATH_MAX];
+ int32_t ret = -1;
+
+ glusterd_store_uuid_peerpath_set (peerinfo, peerfpath,
+ sizeof (peerfpath));
+ ret = gf_store_handle_create_on_absence (&peerinfo->shandle,
+ peerfpath);
+ return ret;
+}
+
+int32_t
+glusterd_peerinfo_hostname_shandle_check_destroy (glusterd_peerinfo_t *peerinfo)
+{
+ char peerfpath[PATH_MAX];
+ int32_t ret = -1;
+ struct stat stbuf = {0,};
+
+ glusterd_store_hostname_peerpath_set (peerinfo, peerfpath,
+ sizeof (peerfpath));
+ ret = sys_stat (peerfpath, &stbuf);
+ if (!ret) {
+ if (peerinfo->shandle)
+ gf_store_handle_destroy (peerinfo->shandle);
+ peerinfo->shandle = NULL;
+ ret = sys_unlink (peerfpath);
+ }
+ return ret;
+}
+
+int32_t
+glusterd_store_create_peer_shandle (glusterd_peerinfo_t *peerinfo)
+{
+ int32_t ret = 0;
+
+ GF_ASSERT (peerinfo);
+
+ if (gf_uuid_is_null (peerinfo->uuid)) {
+ ret = glusterd_store_peerinfo_hostname_shandle_create (peerinfo);
+ } else {
+ ret = glusterd_peerinfo_hostname_shandle_check_destroy (peerinfo);
+ ret = glusterd_store_peerinfo_uuid_shandle_create (peerinfo);
+ }
+ return ret;
+}
+
+int32_t
+glusterd_store_peer_write (int fd, glusterd_peerinfo_t *peerinfo)
+{
+ char buf[50] = {0};
+ int32_t ret = 0;
+ int32_t i = 1;
+ glusterd_peer_hostname_t *hostname = NULL;
+ char *key = NULL;
+
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_PEER_UUID,
+ uuid_utoa (peerinfo->uuid));
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", peerinfo->state.state);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_PEER_STATE, buf);
+ if (ret)
+ goto out;
+
+ cds_list_for_each_entry (hostname, &peerinfo->hostnames,
+ hostname_list) {
+ ret = gf_asprintf (&key, GLUSTERD_STORE_KEY_PEER_HOSTNAME"%d",
+ i);
+ if (ret < 0)
+ goto out;
+ ret = gf_store_save_value (fd, key, hostname->hostname);
+ if (ret)
+ goto out;
+ GF_FREE (key);
+ key = NULL;
+ i++;
+ }
+
+out:
+ gf_msg_debug ("glusterd", 0, "Returning with %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_store_perform_peer_store (glusterd_peerinfo_t *peerinfo)
+{
+ int fd = -1;
+ int32_t ret = -1;
+
+ GF_ASSERT (peerinfo);
+
+ fd = gf_store_mkstemp (peerinfo->shandle);
+ if (fd <= 0) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_store_peer_write (fd, peerinfo);
+ if (ret)
+ goto out;
+
+ ret = gf_store_rename_tmppath (peerinfo->shandle);
+out:
+ if (ret && (fd > 0))
+ gf_store_unlink_tmppath (peerinfo->shandle);
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_store_peerinfo (glusterd_peerinfo_t *peerinfo)
+{
+ int32_t ret = -1;
+
+ GF_ASSERT (peerinfo);
+
+ ret = glusterd_store_create_peer_dir ();
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_create_peer_shandle (peerinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_perform_peer_store (peerinfo);
+out:
+ gf_msg_debug ("glusterd", 0, "Returning with %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_store_retrieve_peers (xlator_t *this)
+{
+ int32_t ret = 0;
+ glusterd_conf_t *priv = NULL;
+ DIR *dir = NULL;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {{0,},};
+ char path[PATH_MAX] = {0,};
+ glusterd_peerinfo_t *peerinfo = NULL;
+ gf_store_handle_t *shandle = NULL;
+ char filepath[PATH_MAX] = {0,};
+ gf_store_iter_t *iter = NULL;
+ char *key = NULL;
+ char *value = NULL;
+ glusterd_peerctx_args_t args = {0};
+ gf_store_op_errno_t op_errno = GD_STORE_SUCCESS;
+ glusterd_peer_hostname_t *address = NULL;
+
+ GF_ASSERT (this);
+ priv = this->private;
+
+ GF_ASSERT (priv);
+
+ snprintf (path, PATH_MAX, "%s/%s", priv->workdir,
+ GLUSTERD_PEER_DIR_PREFIX);
+
+ dir = sys_opendir (path);
+
+ if (!dir) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DIR_OP_FAILED,
+ "Unable to open dir %s", path);
+ ret = -1;
+ goto out;
+ }
+
+ GF_FOR_EACH_ENTRY_IN_DIR (entry, dir, scratch);
+
+ while (entry) {
+ snprintf (filepath, PATH_MAX, "%s/%s", path, entry->d_name);
+ ret = gf_store_handle_retrieve (filepath, &shandle);
+ if (ret)
+ goto out;
+
+ ret = gf_store_iter_new (shandle, &iter);
+ if (ret)
+ goto out;
+
+ ret = gf_store_iter_get_next (iter, &key, &value, &op_errno);
+ if (ret)
+ goto out;
+
+ /* Create an empty peerinfo object before reading in the
+ * details
+ */
+ peerinfo = glusterd_peerinfo_new (GD_FRIEND_STATE_DEFAULT, NULL,
+ NULL, 0);
+ if (peerinfo == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ while (!ret) {
+
+ if (!strncmp (GLUSTERD_STORE_KEY_PEER_UUID, key,
+ strlen (GLUSTERD_STORE_KEY_PEER_UUID))) {
+ if (value)
+ gf_uuid_parse (value, peerinfo->uuid);
+ } else if (!strncmp (GLUSTERD_STORE_KEY_PEER_STATE,
+ key,
+ strlen (GLUSTERD_STORE_KEY_PEER_STATE))) {
+ peerinfo->state.state = atoi (value);
+ } else if (!strncmp (GLUSTERD_STORE_KEY_PEER_HOSTNAME,
+ key,
+ strlen (GLUSTERD_STORE_KEY_PEER_HOSTNAME))) {
+ ret = gd_add_address_to_peer (peerinfo, value);
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_UNKNOWN_KEY, "Unknown key: %s",
+ key);
+ }
+
+ GF_FREE (key);
+ GF_FREE (value);
+ key = NULL;
+ value = NULL;
+
+ ret = gf_store_iter_get_next (iter, &key, &value,
+ &op_errno);
+ }
+ if (op_errno != GD_STORE_EOF) {
+ goto out;
+ }
+
+ (void) gf_store_iter_destroy (iter);
+
+ /* Set first hostname from peerinfo->hostnames to
+ * peerinfo->hostname
+ */
+ address = cds_list_entry (peerinfo->hostnames.next,
+ glusterd_peer_hostname_t,
+ hostname_list);
+ if (!address) {
+ ret = -1;
+ goto out;
+ }
+ peerinfo->hostname = gf_strdup (address->hostname);
+
+ ret = glusterd_friend_add_from_peerinfo (peerinfo, 1, NULL);
+ if (ret)
+ goto out;
+
+ peerinfo->shandle = shandle;
+ peerinfo = NULL;
+ GF_FOR_EACH_ENTRY_IN_DIR (entry, dir, scratch);
+ }
+
+ args.mode = GD_MODE_ON;
+
+ rcu_read_lock ();
+ cds_list_for_each_entry_rcu (peerinfo, &priv->peers, uuid_list) {
+ ret = glusterd_friend_rpc_create (this, peerinfo, &args);
+ if (ret)
+ break;
+ }
+ rcu_read_unlock ();
+ peerinfo = NULL;
+
+out:
+ if (peerinfo)
+ glusterd_peerinfo_cleanup (peerinfo);
+
+ if (dir)
+ sys_closedir (dir);
+ gf_msg_debug (this->name, 0, "Returning with %d", ret);
+
+ return ret;
+}
+
+/* Bricks for snap volumes are hosted at /var/run/gluster/snaps
+ * When a volume is restored, it points to the bricks of the snap
+ * volume it was restored from. Hence on a node restart these
+ * paths need to be recreated and re-mounted
+ */
+int32_t
+glusterd_recreate_all_snap_brick_mounts (xlator_t *this)
+{
+ int32_t ret = 0;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_snap_t *snap = NULL;
+
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ /* Recreate bricks of volumes restored from snaps */
+ cds_list_for_each_entry (volinfo, &priv->volumes, vol_list) {
+ /* If the volume is not a restored volume then continue */
+ if (gf_uuid_is_null (volinfo->restored_from_snap))
+ continue;
+
+ ret = glusterd_recreate_vol_brick_mounts (this, volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRK_MNT_RECREATE_FAIL,
+ "Failed to recreate brick mounts "
+ "for %s", volinfo->volname);
+ goto out;
+ }
+ }
+
+ /* Recreate bricks of snapshot volumes */
+ cds_list_for_each_entry (snap, &priv->snapshots, snap_list) {
+ cds_list_for_each_entry (volinfo, &snap->volumes, vol_list) {
+ ret = glusterd_recreate_vol_brick_mounts (this,
+ volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRK_MNT_RECREATE_FAIL,
+ "Failed to recreate brick mounts "
+ "for %s", snap->snapname);
+ goto out;
+ }
+ }
+ }
+
+out:
+ gf_msg_trace (this->name, 0, "Returning with %d", ret);
+ return ret;
+}
+
+/* When the snapshot command from cli is received, the on disk and
+ * in memory structures for the snapshot are created (with the status)
+ * being marked as GD_SNAP_STATUS_INIT. Once the backend snapshot is
+ * taken, the status is changed to GD_SNAP_STATUS_IN_USE. If glusterd
+ * dies after taking the backend snapshot, but before updating the
+ * status, then when glusterd comes up, it should treat that snapshot
+ * as a failed snapshot and clean it up.
+ *
+ * Restore operation starts by setting the status to
+ * GD_SNAP_STATUS_RESTORED. If the server goes down before changing
+ * the status the status back we need to revert the partial snapshot
+ * taken.
+ */
+int32_t
+glusterd_snap_cleanup (xlator_t *this)
+{
+ dict_t *dict = NULL;
+ int32_t ret = 0;
+ glusterd_conf_t *priv = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_snap_t *tmp_snap = NULL;
+
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ dict = dict_new();
+ if (!dict) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_CREATE_FAIL,
+ "Failed to create dict");
+ ret = -1;
+ goto out;
+ }
+
+ cds_list_for_each_entry_safe (snap, tmp_snap, &priv->snapshots,
+ snap_list) {
+ if (snap->snap_status == GD_SNAP_STATUS_RESTORED) {
+ ret = glusterd_snapshot_revert_restore_from_snap (snap);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_RESTORE_REVERT_FAIL,
+ "Failed to "
+ "revert partially restored snapshot "
+ "(%s)", snap->snapname);
+ goto out;
+ }
+ } else if (snap->snap_status != GD_SNAP_STATUS_IN_USE) {
+ ret = glusterd_snap_remove (dict, snap,
+ _gf_true, _gf_true,
+ _gf_false);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_REMOVE_FAIL,
+ "Failed to remove the snapshot %s",
+ snap->snapname);
+ goto out;
+ }
+ }
+ }
+out:
+ if (dict)
+ dict_unref (dict);
+
+ gf_msg_trace (this->name, 0, "Returning with %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_resolve_all_bricks (xlator_t *this)
+{
+ int32_t ret = 0;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_snap_t *snap = NULL;
+
+ GF_ASSERT (this);
+ priv = this->private;
+
+ GF_ASSERT (priv);
+
+ /* Resolve bricks of volumes */
+ cds_list_for_each_entry (volinfo, &priv->volumes, vol_list) {
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks,
+ brick_list) {
+ ret = glusterd_resolve_brick (brickinfo);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_RESOLVE_BRICK_FAIL,
+ "resolve brick failed in restore");
+ goto out;
+ }
+ }
+ }
+
+ /* Resolve bricks of snapshot volumes */
+ cds_list_for_each_entry (snap, &priv->snapshots, snap_list) {
+ ret = glusterd_resolve_snap_bricks (this, snap);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_RESOLVE_BRICK_FAIL,
+ "resolving the snap bricks"
+ " failed for snap: %s",
+ snap->snapname);
+ goto out;
+ }
+ }
+
+out:
+ gf_msg_trace (this->name, 0, "Returning with %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_restore ()
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ ret = glusterd_store_retrieve_volumes (this, NULL);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_retrieve_peers (this);
+ if (ret)
+ goto out;
+
+ /* While retrieving snapshots, if the snapshot status
+ is not GD_SNAP_STATUS_IN_USE, then the snapshot is
+ cleaned up. To do that, the snap volume has to be
+ stopped by stopping snapshot volume's bricks. And for
+ that the snapshot bricks should be resolved. But without
+ retrieving the peers, resolving bricks will fail. So
+ do retrieving of snapshots after retrieving peers.
+ */
+ ret = glusterd_store_retrieve_snaps (this);
+ if (ret)
+ goto out;
+
+ ret = glusterd_resolve_all_bricks (this);
+ if (ret)
+ goto out;
+
+ ret = glusterd_snap_cleanup (this);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_CLEANUP_FAIL, "Failed to perform "
+ "a cleanup of the snapshots");
+ goto out;
+ }
+
+ ret = glusterd_recreate_all_snap_brick_mounts (this);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_BRK_MNT_RECREATE_FAIL, "Failed to recreate "
+ "all snap brick mounts");
+ goto out;
+ }
+
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_store_retrieve_quota_version (glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ uint32_t version = 0;
+ char cksum_path[PATH_MAX] = {0,};
+ char path[PATH_MAX] = {0,};
+ char *version_str = NULL;
+ char *tmp = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ gf_store_handle_t *handle = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ GLUSTERD_GET_VOLUME_DIR (path, volinfo, conf);
+ snprintf (cksum_path, sizeof (cksum_path), "%s/%s", path,
+ GLUSTERD_VOL_QUOTA_CKSUM_FILE);
+
+ ret = gf_store_handle_new (cksum_path, &handle);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_STORE_HANDLE_GET_FAIL,
+ "Unable to get store handle "
+ "for %s", cksum_path);
+ goto out;
+ }
+
+ ret = gf_store_retrieve_value (handle, "version", &version_str);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Version absent");
+ ret = 0;
+ goto out;
+ }
+
+ version = strtoul (version_str, &tmp, 10);
+ if ((errno == ERANGE) || (errno == EINVAL)) {
+ gf_msg_debug (this->name, 0, "Invalid version number");
+ goto out;
+ }
+ volinfo->quota_conf_version = version;
+ ret = 0;
+
+out:
+ if (version_str)
+ GF_FREE (version_str);
+ gf_store_handle_destroy (handle);
+ return ret;
+}
+
+int
+glusterd_store_save_quota_version_and_cksum (glusterd_volinfo_t *volinfo)
+{
+ gf_store_handle_t *shandle = NULL;
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ char path[PATH_MAX] = {0};
+ char cksum_path[PATH_MAX] = {0,};
+ char buf[256] = {0};
+ int fd = -1;
+ int32_t ret = -1;
+
+ this = THIS;
+ conf = this->private;
+
+ GLUSTERD_GET_VOLUME_DIR (path, volinfo, conf);
+ snprintf (cksum_path, sizeof (cksum_path), "%s/%s", path,
+ GLUSTERD_VOL_QUOTA_CKSUM_FILE);
+
+ ret = gf_store_handle_new (cksum_path, &shandle);
+ if (ret)
+ goto out;
+
+ fd = gf_store_mkstemp (shandle);
+ if (fd <= 0) {
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (buf, sizeof (buf)-1, "%u", volinfo->quota_conf_cksum);
+ ret = gf_store_save_value (fd, "cksum", buf);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_CKSUM_STORE_FAIL, "Failed to store cksum");
+ goto out;
+ }
+
+ memset (buf, 0, sizeof (buf));
+ snprintf (buf, sizeof (buf)-1, "%u", volinfo->quota_conf_version);
+ ret = gf_store_save_value (fd, "version", buf);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VERS_STORE_FAIL, "Failed to store version");
+ goto out;
+ }
+
+ ret = gf_store_rename_tmppath (shandle);
+ if (ret)
+ goto out;
+
+out:
+ if ((ret < 0) && (fd > 0))
+ gf_store_unlink_tmppath (shandle);
+ gf_store_handle_destroy (shandle);
+ return ret;
+}
+
+int32_t
+glusterd_quota_conf_write_header (int fd)
+{
+ int header_len = 0;
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("quota", this, out);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
+
+
+ if (conf->op_version < GD_OP_VERSION_3_7_0) {
+ header_len = strlen (QUOTA_CONF_HEADER_1_1);
+ ret = gf_nwrite (fd, QUOTA_CONF_HEADER_1_1, header_len);
+ } else {
+ header_len = strlen (QUOTA_CONF_HEADER);
+ ret = gf_nwrite (fd, QUOTA_CONF_HEADER, header_len);
+ }
+
+ if (ret != header_len) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (ret < 0)
+ gf_msg_callingfn ("quota", GF_LOG_ERROR, 0,
+ GD_MSG_QUOTA_CONF_WRITE_FAIL,
+ "failed to write "
+ "header to a quota conf");
+
+ return ret;
+}
+
+int32_t
+glusterd_quota_conf_write_gfid (int fd, void *buf, char type)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("quota", this, out);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
+
+
+ ret = gf_nwrite (fd, buf, 16);
+ if (ret != 16) {
+ ret = -1;
+ goto out;
+ }
+
+ if (conf->op_version >= GD_OP_VERSION_3_7_0) {
+ ret = gf_nwrite (fd, &type, 1);
+ if (ret != 1) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = 0;
+
+out:
+ if (ret < 0)
+ gf_msg_callingfn ("quota", GF_LOG_ERROR, 0,
+ GD_MSG_QUOTA_CONF_WRITE_FAIL,
+ "failed to write "
+ "gfid %s to a quota conf", uuid_utoa (buf));
+
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h
new file mode 100644
index 00000000000..afb04cb5ec6
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-store.h
@@ -0,0 +1,195 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _GLUSTERD_HA_H_
+#define _GLUSTERD_HA_H_
+
+#include <pthread.h>
+#include "compat-uuid.h"
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "run.h"
+#include "logging.h"
+#include "call-stub.h"
+#include "fd.h"
+#include "byte-order.h"
+#include "glusterd.h"
+#include "rpcsvc.h"
+
+typedef enum glusterd_store_ver_ac_{
+ GLUSTERD_VOLINFO_VER_AC_NONE = 0,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT = 1,
+ GLUSTERD_VOLINFO_VER_AC_DECREMENT = 2,
+} glusterd_volinfo_ver_ac_t;
+
+
+#define GLUSTERD_STORE_UUID_KEY "UUID"
+
+#define GLUSTERD_STORE_KEY_VOL_TYPE "type"
+#define GLUSTERD_STORE_KEY_VOL_COUNT "count"
+#define GLUSTERD_STORE_KEY_VOL_STATUS "status"
+#define GLUSTERD_STORE_KEY_VOL_PORT "port"
+#define GLUSTERD_STORE_KEY_VOL_SUB_COUNT "sub_count"
+#define GLUSTERD_STORE_KEY_VOL_STRIPE_CNT "stripe_count"
+#define GLUSTERD_STORE_KEY_VOL_REPLICA_CNT "replica_count"
+#define GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT "disperse_count"
+#define GLUSTERD_STORE_KEY_VOL_REDUNDANCY_CNT "redundancy_count"
+#define GLUSTERD_STORE_KEY_VOL_ARBITER_CNT "arbiter_count"
+#define GLUSTERD_STORE_KEY_VOL_BRICK "brick"
+#define GLUSTERD_STORE_KEY_VOL_VERSION "version"
+#define GLUSTERD_STORE_KEY_VOL_TRANSPORT "transport-type"
+#define GLUSTERD_STORE_KEY_VOL_ID "volume-id"
+#define GLUSTERD_STORE_KEY_VOL_RESTORED_SNAP "restored_from_snap"
+#define GLUSTERD_STORE_KEY_RB_STATUS "rb_status"
+#define GLUSTERD_STORE_KEY_RB_SRC_BRICK "rb_src"
+#define GLUSTERD_STORE_KEY_RB_DST_BRICK "rb_dst"
+#define GLUSTERD_STORE_KEY_RB_DST_PORT "rb_port"
+#define GLUSTERD_STORE_KEY_VOL_DEFRAG "rebalance_status"
+#define GLUSTERD_STORE_KEY_VOL_DEFRAG_STATUS "status"
+#define GLUSTERD_STORE_KEY_DEFRAG_OP "rebalance_op"
+#define GLUSTERD_STORE_KEY_USERNAME "username"
+#define GLUSTERD_STORE_KEY_PASSWORD "password"
+#define GLUSTERD_STORE_KEY_PARENT_VOLNAME "parent_volname"
+#define GLUSTERD_STORE_KEY_VOL_OP_VERSION "op-version"
+#define GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION "client-op-version"
+#define GLUSTERD_STORE_KEY_VOL_QUOTA_VERSION "quota-version"
+
+#define GLUSTERD_STORE_KEY_COLD_TYPE "cold_type"
+#define GLUSTERD_STORE_KEY_COLD_COUNT "cold_count"
+#define GLUSTERD_STORE_KEY_COLD_REPLICA_COUNT "cold_replica_count"
+#define GLUSTERD_STORE_KEY_COLD_DISPERSE_COUNT "cold_disperse_count"
+#define GLUSTERD_STORE_KEY_COLD_REDUNDANCY_COUNT "cold_redundancy_count"
+#define GLUSTERD_STORE_KEY_HOT_TYPE "hot_type"
+#define GLUSTERD_STORE_KEY_HOT_COUNT "hot_count"
+#define GLUSTERD_STORE_KEY_HOT_REPLICA_COUNT "hot_replica_count"
+
+#define GLUSTERD_STORE_KEY_SNAP_NAME "name"
+#define GLUSTERD_STORE_KEY_SNAP_ID "snap-id"
+#define GLUSTERD_STORE_KEY_SNAP_DESC "desc"
+#define GLUSTERD_STORE_KEY_SNAP_TIMESTAMP "time-stamp"
+#define GLUSTERD_STORE_KEY_SNAP_STATUS "status"
+#define GLUSTERD_STORE_KEY_SNAP_RESTORED "snap-restored"
+#define GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT "snap-max-hard-limit"
+#define GLUSTERD_STORE_KEY_SNAP_AUTO_DELETE "auto-delete"
+#define GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT "snap-max-soft-limit"
+#define GLUSTERD_STORE_KEY_SNAPD_PORT "snapd-port"
+#define GLUSTERD_STORE_KEY_SNAP_ACTIVATE "snap-activate-on-create"
+#define GLUSTERD_STORE_KEY_GANESHA_GLOBAL "nfs-ganesha"
+
+#define GLUSTERD_STORE_KEY_BRICK_HOSTNAME "hostname"
+#define GLUSTERD_STORE_KEY_BRICK_PATH "path"
+#define GLUSTERD_STORE_KEY_BRICK_REAL_PATH "real_path"
+#define GLUSTERD_STORE_KEY_BRICK_PORT "listen-port"
+#define GLUSTERD_STORE_KEY_BRICK_RDMA_PORT "rdma.listen-port"
+#define GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED "decommissioned"
+#define GLUSTERD_STORE_KEY_BRICK_VGNAME "vg"
+#define GLUSTERD_STORE_KEY_BRICK_DEVICE_PATH "device_path"
+#define GLUSTERD_STORE_KEY_BRICK_MOUNT_DIR "mount_dir"
+#define GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS "snap-status"
+#define GLUSTERD_STORE_KEY_BRICK_FSTYPE "fs-type"
+#define GLUSTERD_STORE_KEY_BRICK_MNTOPTS "mnt-opts"
+#define GLUSTERD_STORE_KEY_BRICK_ID "brick-id"
+
+#define GLUSTERD_STORE_KEY_PEER_UUID "uuid"
+#define GLUSTERD_STORE_KEY_PEER_HOSTNAME "hostname"
+#define GLUSTERD_STORE_KEY_PEER_STATE "state"
+
+#define GLUSTERD_STORE_KEY_VOL_CAPS "caps"
+
+#define GLUSTERD_STORE_KEY_VOL_DEFRAG_REB_FILES "rebalanced-files"
+#define GLUSTERD_STORE_KEY_VOL_DEFRAG_SIZE "size"
+#define GLUSTERD_STORE_KEY_VOL_DEFRAG_SCANNED "scanned"
+#define GLUSTERD_STORE_KEY_VOL_DEFRAG_FAILURES "failures"
+#define GLUSTERD_STORE_KEY_VOL_DEFRAG_SKIPPED "skipped"
+#define GLUSTERD_STORE_KEY_VOL_DEFRAG_RUN_TIME "run-time"
+
+int32_t
+glusterd_store_volinfo (glusterd_volinfo_t *volinfo, glusterd_volinfo_ver_ac_t ac);
+
+int32_t
+glusterd_store_delete_volume (glusterd_volinfo_t *volinfo);
+
+int32_t
+glusterd_store_delete_snap (glusterd_snap_t *snap);
+
+int32_t
+glusterd_retrieve_uuid ();
+
+int32_t
+glusterd_store_peerinfo (glusterd_peerinfo_t *peerinfo);
+
+int32_t
+glusterd_store_delete_peerinfo (glusterd_peerinfo_t *peerinfo);
+
+int32_t
+glusterd_store_delete_brick (glusterd_brickinfo_t *brickinfo,
+ char *delete_path);
+
+int32_t
+glusterd_restore ();
+
+void
+glusterd_perform_volinfo_version_action (glusterd_volinfo_t *volinfo,
+ glusterd_volinfo_ver_ac_t ac);
+gf_boolean_t
+glusterd_store_is_valid_brickpath (char *volname, char *brick);
+
+int32_t
+glusterd_store_perform_node_state_store (glusterd_volinfo_t *volinfo);
+
+int
+glusterd_retrieve_op_version (xlator_t *this, int *op_version);
+
+int
+glusterd_store_global_info (xlator_t *this);
+
+int32_t
+glusterd_store_retrieve_options (xlator_t *this);
+
+int32_t
+glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo);
+
+int32_t
+glusterd_store_options (xlator_t *this, dict_t *opts);
+
+void
+glusterd_replace_slash_with_hyphen (char *str);
+
+int32_t
+glusterd_store_perform_volume_store (glusterd_volinfo_t *volinfo);
+
+int32_t
+glusterd_store_create_quota_conf_sh_on_absence (glusterd_volinfo_t *volinfo);
+
+int
+glusterd_store_retrieve_quota_version (glusterd_volinfo_t *volinfo);
+
+int
+glusterd_store_save_quota_version_and_cksum (glusterd_volinfo_t *volinfo);
+
+int32_t
+glusterd_store_snap (glusterd_snap_t *snap);
+
+int32_t
+glusterd_store_update_missed_snaps ();
+
+glusterd_volinfo_t*
+glusterd_store_retrieve_volume (char *volname, glusterd_snap_t *snap);
+
+int
+glusterd_restore_op_version (xlator_t *this);
+
+int32_t
+glusterd_quota_conf_write_header (int fd);
+
+int32_t
+glusterd_quota_conf_write_gfid (int fd, void *buf, char type);
+
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
new file mode 100644
index 00000000000..44ee6d08d68
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
@@ -0,0 +1,251 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "globals.h"
+#include "run.h"
+#include "glusterd.h"
+#include "glusterfs.h"
+#include "glusterd-utils.h"
+#include "glusterd-svc-mgmt.h"
+#include "glusterd-shd-svc.h"
+#include "glusterd-quotad-svc.h"
+#include "glusterd-nfs-svc.h"
+#include "glusterd-bitd-svc.h"
+#include "glusterd-scrub-svc.h"
+#include "glusterd-svc-helper.h"
+#include "syscall.h"
+
+int
+glusterd_svcs_reconfigure ()
+{
+ int ret = 0;
+ xlator_t *this = THIS;
+ glusterd_conf_t *conf = NULL;
+
+ GF_ASSERT (this);
+
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ ret = glusterd_nfssvc_reconfigure ();
+ if (ret)
+ goto out;
+
+ ret = glusterd_shdsvc_reconfigure ();
+ if (ret)
+ goto out;
+
+ if (conf->op_version == GD_OP_VERSION_MIN)
+ goto out;
+
+ ret = glusterd_quotadsvc_reconfigure ();
+ if (ret)
+ goto out;
+
+ ret = glusterd_bitdsvc_reconfigure ();
+ if (ret)
+ goto out;
+
+ ret = glusterd_scrubsvc_reconfigure ();
+ if (ret)
+ goto out;
+out:
+ return ret;
+}
+
+int
+glusterd_svcs_stop ()
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = glusterd_svc_stop (&(priv->nfs_svc), SIGKILL);
+ if (ret)
+ goto out;
+
+ ret = glusterd_svc_stop (&(priv->shd_svc), SIGTERM);
+ if (ret)
+ goto out;
+
+ ret = glusterd_svc_stop (&(priv->quotad_svc), SIGTERM);
+ if (ret)
+ goto out;
+
+ ret = glusterd_svc_stop (&(priv->bitd_svc), SIGTERM);
+ if (ret)
+ goto out;
+
+ ret = glusterd_svc_stop (&(priv->scrub_svc), SIGTERM);
+
+out:
+ return ret;
+}
+
+int
+glusterd_svcs_manager (glusterd_volinfo_t *volinfo)
+{
+ int ret = 0;
+ xlator_t *this = THIS;
+ glusterd_conf_t *conf = NULL;
+
+ GF_ASSERT (this);
+
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ if (volinfo && volinfo->is_snap_volume)
+ return 0;
+
+ ret = conf->nfs_svc.manager (&(conf->nfs_svc), NULL,
+ PROC_START_NO_WAIT);
+ if (ret)
+ goto out;
+
+ ret = conf->shd_svc.manager (&(conf->shd_svc), volinfo,
+ PROC_START_NO_WAIT);
+ if (ret == -EINVAL)
+ ret = 0;
+ if (ret)
+ goto out;
+
+ if (conf->op_version == GD_OP_VERSION_MIN)
+ goto out;
+
+ ret = conf->quotad_svc.manager (&(conf->quotad_svc), volinfo,
+ PROC_START_NO_WAIT);
+ if (ret == -EINVAL)
+ ret = 0;
+ if (ret)
+ goto out;
+
+ ret = conf->bitd_svc.manager (&(conf->bitd_svc), NULL,
+ PROC_START_NO_WAIT);
+ if (ret == -EINVAL)
+ ret = 0;
+ if (ret)
+ goto out;
+
+ ret = conf->scrub_svc.manager (&(conf->scrub_svc), NULL,
+ PROC_START_NO_WAIT);
+ if (ret == -EINVAL)
+ ret = 0;
+
+out:
+ return ret;
+}
+
+
+int
+glusterd_svc_check_volfile_identical (char *svc_name,
+ glusterd_graph_builder_t builder,
+ gf_boolean_t *identical)
+{
+ char orgvol[PATH_MAX] = {0,};
+ char tmpvol[PATH_MAX] = {0,};
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ int ret = -1;
+ int need_unlink = 0;
+ int tmp_fd = -1;
+
+ this = THIS;
+
+ GF_ASSERT (this);
+ GF_ASSERT (identical);
+ conf = this->private;
+
+ glusterd_svc_build_volfile_path (svc_name, conf->workdir,
+ orgvol, sizeof (orgvol));
+
+ snprintf (tmpvol, sizeof (tmpvol), "/tmp/g%s-XXXXXX", svc_name);
+
+ tmp_fd = mkstemp (tmpvol);
+ if (tmp_fd < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ GD_MSG_FILE_OP_FAILED, "Unable to create temp file"
+ " %s:(%s)", tmpvol, strerror (errno));
+ goto out;
+ }
+
+ need_unlink = 1;
+
+ ret = glusterd_create_global_volfile (builder, tmpvol, NULL);
+ if (ret)
+ goto out;
+
+ ret = glusterd_check_files_identical (orgvol, tmpvol, identical);
+
+out:
+ if (need_unlink)
+ sys_unlink (tmpvol);
+
+ if (tmp_fd >= 0)
+ sys_close (tmp_fd);
+
+ return ret;
+}
+
+int
+glusterd_svc_check_topology_identical (char *svc_name,
+ glusterd_graph_builder_t builder,
+ gf_boolean_t *identical)
+{
+ char orgvol[PATH_MAX] = {0,};
+ char tmpvol[PATH_MAX] = {0,};
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = THIS;
+ int ret = -1;
+ int tmpclean = 0;
+ int tmpfd = -1;
+
+ if ((!identical) || (!this) || (!this->private))
+ goto out;
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
+
+ /* Fetch the original volfile */
+ glusterd_svc_build_volfile_path (svc_name, conf->workdir,
+ orgvol, sizeof (orgvol));
+
+ /* Create the temporary volfile */
+ snprintf (tmpvol, sizeof (tmpvol), "/tmp/g%s-XXXXXX", svc_name);
+ tmpfd = mkstemp (tmpvol);
+ if (tmpfd < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ GD_MSG_FILE_OP_FAILED, "Unable to create temp file"
+ " %s:(%s)", tmpvol, strerror (errno));
+ goto out;
+ }
+
+ tmpclean = 1; /* SET the flag to unlink() tmpfile */
+
+ ret = glusterd_create_global_volfile (builder,
+ tmpvol, NULL);
+ if (ret)
+ goto out;
+
+ /* Compare the topology of volfiles */
+ ret = glusterd_check_topology_identical (orgvol, tmpvol,
+ identical);
+out:
+ if (tmpfd >= 0)
+ sys_close (tmpfd);
+ if (tmpclean)
+ sys_unlink (tmpvol);
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.h b/xlators/mgmt/glusterd/src/glusterd-svc-helper.h
new file mode 100644
index 00000000000..b5aafefc1b5
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.h
@@ -0,0 +1,36 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERD_SVC_HELPER_H_
+#define _GLUSTERD_SVC_HELPER_H_
+
+#include "glusterd.h"
+#include "glusterd-svc-mgmt.h"
+#include "glusterd-volgen.h"
+
+int
+glusterd_svcs_reconfigure ();
+
+int
+glusterd_svcs_stop ();
+
+int
+glusterd_svcs_manager (glusterd_volinfo_t *volinfo);
+
+int
+glusterd_svc_check_volfile_identical (char *svc_name,
+ glusterd_graph_builder_t builder,
+ gf_boolean_t *identical);
+int
+glusterd_svc_check_topology_identical (char *svc_name,
+ glusterd_graph_builder_t builder,
+ gf_boolean_t *identical);
+
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c
new file mode 100644
index 00000000000..454c2a453b2
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c
@@ -0,0 +1,338 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "globals.h"
+#include "run.h"
+#include "glusterd.h"
+#include "glusterfs.h"
+#include "glusterd-utils.h"
+#include "glusterd-svc-mgmt.h"
+#include "glusterd-proc-mgmt.h"
+#include "glusterd-conn-mgmt.h"
+#include "glusterd-messages.h"
+#include "syscall.h"
+
+int
+glusterd_svc_create_rundir (char *rundir)
+{
+ int ret = -1;
+
+ ret = mkdir_p (rundir, 0777, _gf_true);
+ if ((ret == -1) && (EEXIST != errno)) {
+ gf_msg (THIS->name, GF_LOG_ERROR, errno,
+ GD_MSG_CREATE_DIR_FAILED, "Unable to create rundir %s",
+ rundir);
+ }
+ return ret;
+}
+
+static void
+glusterd_svc_build_logfile_path (char *server, char *logdir, char *logfile,
+ size_t len)
+{
+ snprintf (logfile, len, "%s/%s.log", logdir, server);
+}
+
+static void
+glusterd_svc_build_volfileid_path (char *server, char *volfileid, size_t len)
+{
+ snprintf (volfileid, len, "gluster/%s", server);
+}
+
+static int
+glusterd_svc_init_common (glusterd_svc_t *svc,
+ char *svc_name, char *workdir,
+ char *rundir, char *logdir,
+ glusterd_conn_notify_t notify)
+{
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ char pidfile[PATH_MAX] = {0,};
+ char logfile[PATH_MAX] = {0,};
+ char volfile[PATH_MAX] = {0,};
+ char sockfpath[PATH_MAX] = {0,};
+ char volfileid[256] = {0};
+ char *volfileserver = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = snprintf (svc->name, sizeof (svc->name), "%s", svc_name);
+ if (ret < 0)
+ goto out;
+
+ if (!notify)
+ notify = glusterd_svc_common_rpc_notify;
+
+ glusterd_svc_create_rundir (rundir);
+
+ /* Initialize the connection mgmt */
+ glusterd_conn_build_socket_filepath (rundir, MY_UUID,
+ sockfpath, sizeof (sockfpath));
+
+ ret = glusterd_conn_init (&(svc->conn), sockfpath, 600, notify);
+ if (ret)
+ goto out;
+
+ /* Initialize the process mgmt */
+ glusterd_svc_build_pidfile_path (svc_name, workdir, pidfile,
+ sizeof(pidfile));
+ glusterd_svc_build_volfile_path (svc_name, workdir, volfile,
+ sizeof (volfile));
+
+ glusterd_svc_build_logfile_path (svc_name, logdir, logfile,
+ sizeof (logfile));
+ glusterd_svc_build_volfileid_path (svc_name, volfileid,
+ sizeof(volfileid));
+
+ if (dict_get_str (this->options, "transport.socket.bind-address",
+ &volfileserver) != 0) {
+ volfileserver = "localhost";
+ }
+
+ ret = glusterd_proc_init (&(svc->proc), svc_name, pidfile, logdir,
+ logfile, volfile, volfileid, volfileserver);
+ if (ret)
+ goto out;
+
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+
+static int
+svc_add_args (dict_t *cmdline, char *arg, data_t *value, void *data)
+{
+ runner_t *runner = data;
+ runner_add_arg (runner, value->data);
+ return 0;
+}
+
+int glusterd_svc_init (glusterd_svc_t *svc, char *svc_name)
+{
+ int ret = -1;
+ char rundir[PATH_MAX] = {0,};
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ glusterd_svc_build_rundir (svc_name, priv->workdir, rundir,
+ sizeof (rundir));
+ ret = glusterd_svc_init_common (svc, svc_name, priv->workdir, rundir,
+ DEFAULT_LOG_FILE_DIRECTORY, NULL);
+
+ return ret;
+}
+
+int
+glusterd_svc_start (glusterd_svc_t *svc, int flags, dict_t *cmdline)
+{
+ int ret = -1;
+ runner_t runner = {0,};
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ char valgrind_logfile[PATH_MAX] = {0};
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ if (glusterd_proc_is_running (&(svc->proc))) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = sys_access (svc->proc.volfile, F_OK);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLFILE_NOT_FOUND, "Volfile %s is not present",
+ svc->proc.volfile);
+ goto out;
+ }
+
+ runinit (&runner);
+
+ if (priv->valgrind) {
+ snprintf (valgrind_logfile, PATH_MAX, "%s/valgrind-%s.log",
+ svc->proc.logfile, svc->name);
+
+ runner_add_args (&runner, "valgrind", "--leak-check=full",
+ "--trace-children=yes", "--track-origins=yes",
+ NULL);
+ runner_argprintf (&runner, "--log-file=%s", valgrind_logfile);
+ }
+
+ runner_add_args (&runner, SBIN_DIR"/glusterfs",
+ "-s", svc->proc.volfileserver,
+ "--volfile-id", svc->proc.volfileid,
+ "-p", svc->proc.pidfile,
+ "-l", svc->proc.logfile,
+ "-S", svc->conn.sockpath,
+ NULL);
+
+ if (cmdline)
+ dict_foreach (cmdline, svc_add_args, (void *) &runner);
+
+ gf_msg (this->name, GF_LOG_INFO, 0, GD_MSG_SVC_START_SUCCESS,
+ "Starting %s service", svc->name);
+
+ if (flags == PROC_START_NO_WAIT) {
+ ret = runner_run_nowait (&runner);
+ } else {
+ synclock_unlock (&priv->big_lock);
+ {
+ ret = runner_run (&runner);
+ }
+ synclock_lock (&priv->big_lock);
+ }
+
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+int glusterd_svc_stop (glusterd_svc_t *svc, int sig)
+{
+ int ret = -1;
+
+ ret = glusterd_proc_stop (&(svc->proc), sig, PROC_STOP_FORCE);
+ if (ret)
+ goto out;
+ glusterd_conn_disconnect (&(svc->conn));
+
+ if (ret == 0) {
+ svc->online = _gf_false;
+ (void) glusterd_unlink_file ((char *)svc->conn.sockpath);
+ }
+ gf_msg (THIS->name, GF_LOG_INFO, 0, GD_MSG_SVC_STOP_SUCCESS,
+ "%s service is stopped", svc->name);
+out:
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+void
+glusterd_svc_build_pidfile_path (char *server, char *workdir, char *path,
+ size_t len)
+{
+ char dir[PATH_MAX] = {0};
+
+ GF_ASSERT (len == PATH_MAX);
+
+ glusterd_svc_build_rundir (server, workdir, dir, sizeof (dir));
+ snprintf (path, len, "%s/%s.pid", dir, server);
+}
+
+void
+glusterd_svc_build_volfile_path (char *server, char *workdir, char *volfile,
+ size_t len)
+{
+ char dir[PATH_MAX] = {0,};
+
+ GF_ASSERT (len == PATH_MAX);
+
+ glusterd_svc_build_svcdir (server, workdir, dir, sizeof (dir));
+
+ if (!strcmp(server, "quotad")) /*quotad has different volfile name*/
+ snprintf (volfile, len, "%s/%s.vol", dir, server);
+ else
+ snprintf (volfile, len, "%s/%s-server.vol", dir, server);
+}
+
+void
+glusterd_svc_build_svcdir (char *server, char *workdir, char *path, size_t len)
+{
+ GF_ASSERT (len == PATH_MAX);
+
+ snprintf (path, len, "%s/%s", workdir, server);
+}
+
+void
+glusterd_svc_build_rundir (char *server, char *workdir, char *path, size_t len)
+{
+ char dir[PATH_MAX] = {0};
+
+ GF_ASSERT (len == PATH_MAX);
+
+ glusterd_svc_build_svcdir (server, workdir, dir, sizeof (dir));
+ snprintf (path, len, "%s/run", dir);
+}
+
+int
+glusterd_svc_reconfigure (int (*create_volfile) ())
+{
+ int ret = -1;
+
+ ret = create_volfile ();
+ if (ret)
+ goto out;
+
+ ret = glusterd_fetchspec_notify (THIS);
+out:
+ return ret;
+}
+
+int
+glusterd_svc_common_rpc_notify (glusterd_conn_t *conn,
+ rpc_clnt_event_t event)
+{
+ int ret = 0;
+ glusterd_svc_t *svc = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ /* Get the parent onject i.e. svc using list_entry macro */
+ svc = cds_list_entry (conn, glusterd_svc_t, conn);
+ if (!svc) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SVC_GET_FAIL, "Failed to get the service");
+ return -1;
+ }
+
+ switch (event) {
+ case RPC_CLNT_CONNECT:
+ gf_msg_debug (this->name, 0, "%s has connected with "
+ "glusterd.", svc->name);
+ svc->online = _gf_true;
+ break;
+
+ case RPC_CLNT_DISCONNECT:
+ if (svc->online) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_NODE_DISCONNECTED, "%s has disconnected "
+ "from glusterd.", svc->name);
+ svc->online = _gf_false;
+ }
+ break;
+
+ default:
+ gf_msg_trace (this->name, 0,
+ "got some other RPC event %d", event);
+ break;
+ }
+
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h
new file mode 100644
index 00000000000..fe7a19385cd
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h
@@ -0,0 +1,74 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERD_SVC_MGMT_H_
+#define _GLUSTERD_SVC_MGMT_H_
+
+#include "glusterd-proc-mgmt.h"
+#include "glusterd-conn-mgmt.h"
+
+struct glusterd_svc_;
+typedef struct glusterd_svc_ glusterd_svc_t;
+
+typedef void (*glusterd_svc_build_t) (glusterd_svc_t *svc);
+
+typedef int (*glusterd_svc_manager_t) (glusterd_svc_t *svc,
+ void *data, int flags);
+typedef int (*glusterd_svc_start_t) (glusterd_svc_t *svc, int flags);
+typedef int (*glusterd_svc_stop_t) (glusterd_svc_t *svc, int sig);
+
+struct glusterd_svc_ {
+ char name[PATH_MAX];
+ glusterd_conn_t conn;
+ glusterd_proc_t proc;
+ glusterd_svc_build_t build;
+ glusterd_svc_manager_t manager;
+ glusterd_svc_start_t start;
+ glusterd_svc_stop_t stop;
+ gf_boolean_t online;
+ gf_boolean_t inited;
+};
+
+int
+glusterd_svc_create_rundir (char *rundir);
+
+int
+glusterd_svc_init (glusterd_svc_t *svc, char *svc_name);
+
+int
+glusterd_svc_start (glusterd_svc_t *svc, int flags, dict_t *cmdline);
+
+int
+glusterd_svc_stop (glusterd_svc_t *svc, int sig);
+
+void
+glusterd_svc_build_pidfile_path (char *server, char *workdir,
+ char *path, size_t len);
+
+void
+glusterd_svc_build_volfile_path (char *server, char *workdir,
+ char *volfile, size_t len);
+
+void
+glusterd_svc_build_svcdir (char *server, char *workdir,
+ char *path, size_t len);
+
+void
+glusterd_svc_build_rundir (char *server, char *workdir,
+ char *path, size_t len);
+
+int
+glusterd_svc_reconfigure (int (*create_volfile) ());
+
+int
+glusterd_svc_common_rpc_notify (glusterd_conn_t *conn,
+ rpc_clnt_event_t event);
+
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c
new file mode 100644
index 00000000000..7c5721f25d0
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c
@@ -0,0 +1,1978 @@
+/*
+ Copyright (c) 2012-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+/* rpc related syncops */
+#include "rpc-clnt.h"
+#include "protocol-common.h"
+#include "xdr-generic.h"
+#include "glusterd1-xdr.h"
+#include "glusterd-syncop.h"
+#include "glusterd-mgmt.h"
+
+#include "glusterd.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-utils.h"
+#include "glusterd-server-quorum.h"
+#include "glusterd-locks.h"
+#include "glusterd-snapshot-utils.h"
+#include "glusterd-messages.h"
+#include "glusterd-errno.h"
+
+extern glusterd_op_info_t opinfo;
+
+void
+gd_synctask_barrier_wait (struct syncargs *args, int count)
+{
+ glusterd_conf_t *conf = THIS->private;
+
+ synclock_unlock (&conf->big_lock);
+ synctask_barrier_wait (args, count);
+ synclock_lock (&conf->big_lock);
+
+ syncbarrier_destroy (&args->barrier);
+}
+
+static void
+gd_collate_errors (struct syncargs *args, int op_ret, int op_errno,
+ char *op_errstr, int op_code, uuid_t peerid, u_char *uuid)
+{
+ char err_str[PATH_MAX] = "Please check log file for details.";
+ char op_err[PATH_MAX] = "";
+ int len = -1;
+ char *peer_str = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+
+ if (op_ret) {
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ rcu_read_lock ();
+ peerinfo = glusterd_peerinfo_find (peerid, NULL);
+ if (peerinfo)
+ peer_str = gf_strdup (peerinfo->hostname);
+ else
+ peer_str = gf_strdup (uuid_utoa (uuid));
+ rcu_read_unlock ();
+
+ if (op_errstr && strcmp (op_errstr, "")) {
+ len = snprintf (err_str, sizeof(err_str) - 1,
+ "Error: %s", op_errstr);
+ err_str[len] = '\0';
+ }
+
+ switch (op_code){
+ case GLUSTERD_MGMT_CLUSTER_LOCK :
+ {
+ len = snprintf (op_err, sizeof(op_err) - 1,
+ "Locking failed on %s. %s",
+ peer_str, err_str);
+ break;
+ }
+ case GLUSTERD_MGMT_CLUSTER_UNLOCK :
+ {
+ len = snprintf (op_err, sizeof(op_err) - 1,
+ "Unlocking failed on %s. %s",
+ peer_str, err_str);
+ break;
+ }
+ case GLUSTERD_MGMT_STAGE_OP :
+ {
+ len = snprintf (op_err, sizeof(op_err) - 1,
+ "Staging failed on %s. %s",
+ peer_str, err_str);
+ break;
+ }
+ case GLUSTERD_MGMT_COMMIT_OP :
+ {
+ len = snprintf (op_err, sizeof(op_err) - 1,
+ "Commit failed on %s. %s",
+ peer_str, err_str);
+ break;
+ }
+ }
+ op_err[len] = '\0';
+
+ if (args->errstr) {
+ len = snprintf (err_str, sizeof(err_str) - 1,
+ "%s\n%s", args->errstr,
+ op_err);
+ GF_FREE (args->errstr);
+ args->errstr = NULL;
+ } else
+ len = snprintf (err_str, sizeof(err_str) - 1,
+ "%s", op_err);
+ err_str[len] = '\0';
+
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_MGMT_OP_FAIL, "%s", op_err);
+ args->errstr = gf_strdup (err_str);
+ }
+
+ GF_FREE (peer_str);
+
+ return;
+}
+
+void
+gd_syncargs_init (struct syncargs *args, dict_t *op_ctx)
+{
+ args->dict = op_ctx;
+ pthread_mutex_init (&args->lock_dict, NULL);
+}
+
+static void
+gd_stage_op_req_free (gd1_mgmt_stage_op_req *req)
+{
+ if (!req)
+ return;
+
+ GF_FREE (req->buf.buf_val);
+ GF_FREE (req);
+}
+
+static void
+gd_commit_op_req_free (gd1_mgmt_commit_op_req *req)
+{
+ if (!req)
+ return;
+
+ GF_FREE (req->buf.buf_val);
+ GF_FREE (req);
+}
+
+static void
+gd_brick_op_req_free (gd1_mgmt_brick_op_req *req)
+{
+ if (!req)
+ return;
+
+ if (strcmp (req->name, "") != 0)
+ GF_FREE (req->name);
+ GF_FREE (req->input.input_val);
+ GF_FREE (req);
+}
+
+int
+gd_syncop_submit_request (struct rpc_clnt *rpc, void *req, void *local,
+ void *cookie, rpc_clnt_prog_t *prog, int procnum,
+ fop_cbk_fn_t cbkfn, xdrproc_t xdrproc)
+{
+ int ret = -1;
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ int count = 0;
+ struct iovec iov = {0, };
+ ssize_t req_size = 0;
+ call_frame_t *frame = NULL;
+
+ GF_ASSERT (rpc);
+ if (!req)
+ goto out;
+
+ req_size = xdr_sizeof (xdrproc, req);
+ iobuf = iobuf_get2 (rpc->ctx->iobuf_pool, req_size);
+ if (!iobuf)
+ goto out;
+
+ iobref = iobref_new ();
+ if (!iobref)
+ goto out;
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ iobref_add (iobref, iobuf);
+
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = iobuf_pagesize (iobuf);
+
+ /* Create the xdr payload */
+ ret = xdr_serialize_generic (iov, req, xdrproc);
+ if (ret == -1)
+ goto out;
+
+ iov.iov_len = ret;
+ count = 1;
+
+ frame->local = local;
+ frame->cookie = cookie;
+
+ /* Send the msg */
+ ret = rpc_clnt_submit (rpc, prog, procnum, cbkfn,
+ &iov, count, NULL, 0, iobref,
+ frame, NULL, 0, NULL, 0, NULL);
+
+ /* TODO: do we need to start ping also? */
+
+out:
+ iobref_unref (iobref);
+ iobuf_unref (iobuf);
+
+ if (ret && frame)
+ STACK_DESTROY (frame->root);
+ return ret;
+}
+
+/* Defined in glusterd-rpc-ops.c */
+extern struct rpc_clnt_program gd_mgmt_prog;
+extern struct rpc_clnt_program gd_brick_prog;
+extern struct rpc_clnt_program gd_mgmt_v3_prog;
+
+int
+glusterd_syncop_aggr_rsp_dict (glusterd_op_t op, dict_t *aggr, dict_t *rsp)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ switch (op) {
+ case GD_OP_CREATE_VOLUME:
+ case GD_OP_ADD_BRICK:
+ case GD_OP_START_VOLUME:
+ ret = glusterd_aggr_brick_mount_dirs (aggr, rsp);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_MOUNDIRS_AGGR_FAIL, "Failed to "
+ "aggregate brick mount dirs");
+ goto out;
+ }
+ break;
+
+ case GD_OP_REPLACE_BRICK:
+ ret = glusterd_rb_use_rsp_dict (aggr, rsp);
+ if (ret)
+ goto out;
+ break;
+
+ case GD_OP_SYNC_VOLUME:
+ ret = glusterd_sync_use_rsp_dict (aggr, rsp);
+ if (ret)
+ goto out;
+ break;
+
+ case GD_OP_GSYNC_CREATE:
+ break;
+
+ case GD_OP_GSYNC_SET:
+ ret = glusterd_gsync_use_rsp_dict (aggr, rsp, NULL);
+ if (ret)
+ goto out;
+ break;
+
+ case GD_OP_STATUS_VOLUME:
+ ret = glusterd_volume_status_copy_to_op_ctx_dict (aggr, rsp);
+ if (ret)
+ goto out;
+ break;
+
+
+ case GD_OP_HEAL_VOLUME:
+ ret = glusterd_volume_heal_use_rsp_dict (aggr, rsp);
+ if (ret)
+ goto out;
+
+ break;
+
+ case GD_OP_CLEARLOCKS_VOLUME:
+ ret = glusterd_use_rsp_dict (aggr, rsp);
+ if (ret)
+ goto out;
+ break;
+
+ case GD_OP_QUOTA:
+ ret = glusterd_volume_quota_copy_to_op_ctx_dict (aggr, rsp);
+ if (ret)
+ goto out;
+ break;
+
+ case GD_OP_SYS_EXEC:
+ ret = glusterd_sys_exec_output_rsp_dict (aggr, rsp);
+ if (ret)
+ goto out;
+ break;
+
+ case GD_OP_SNAP:
+ ret = glusterd_snap_use_rsp_dict (aggr, rsp);
+ if (ret)
+ goto out;
+ break;
+
+ case GD_OP_SCRUB_STATUS:
+ ret = glusterd_volume_bitrot_scrub_use_rsp_dict (aggr, rsp);
+ break;
+ default:
+ break;
+ }
+out:
+ return ret;
+}
+
+int32_t
+gd_syncop_mgmt_v3_lock_cbk_fn (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int ret = -1;
+ struct syncargs *args = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ gd1_mgmt_v3_lock_rsp rsp = {{0},};
+ call_frame_t *frame = NULL;
+ int op_ret = -1;
+ int op_errno = -1;
+ xlator_t *this = NULL;
+ uuid_t *peerid = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT(req);
+ GF_ASSERT(myframe);
+
+ frame = myframe;
+ args = frame->local;
+ peerid = frame->cookie;
+ frame->local = NULL;
+ frame->cookie = NULL;
+
+ if (-1 == req->rpc_status) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, iov, out, op_errno,
+ EINVAL);
+
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp);
+ if (ret < 0)
+ goto out;
+
+ gf_uuid_copy (args->uuid, rsp.uuid);
+
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+out:
+ gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL,
+ GLUSTERD_MGMT_V3_LOCK, *peerid, rsp.uuid);
+
+ GF_FREE (peerid);
+ /* req->rpc_status set to -1 means, STACK_DESTROY will be called from
+ * the caller function.
+ */
+ if (req->rpc_status != -1)
+ STACK_DESTROY (frame->root);
+ synctask_barrier_wake(args);
+ return 0;
+}
+
+int32_t
+gd_syncop_mgmt_v3_lock_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ gd_syncop_mgmt_v3_lock_cbk_fn);
+}
+
+int
+gd_syncop_mgmt_v3_lock (glusterd_op_t op, dict_t *op_ctx,
+ glusterd_peerinfo_t *peerinfo,
+ struct syncargs *args, uuid_t my_uuid,
+ uuid_t recv_uuid, uuid_t txn_id)
+{
+ int ret = -1;
+ gd1_mgmt_v3_lock_req req = {{0},};
+ uuid_t *peerid = NULL;
+
+ GF_ASSERT(op_ctx);
+ GF_ASSERT(peerinfo);
+ GF_ASSERT(args);
+
+ ret = dict_allocate_and_serialize (op_ctx,
+ &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret)
+ goto out;
+
+ gf_uuid_copy (req.uuid, my_uuid);
+ gf_uuid_copy (req.txn_id, txn_id);
+ req.op = op;
+
+ GD_ALLOC_COPY_UUID (peerid, peerinfo->uuid, ret);
+ if (ret)
+ goto out;
+
+ ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerid,
+ &gd_mgmt_v3_prog,
+ GLUSTERD_MGMT_V3_LOCK,
+ gd_syncop_mgmt_v3_lock_cbk,
+ (xdrproc_t)
+ xdr_gd1_mgmt_v3_lock_req);
+out:
+ GF_FREE (req.dict.dict_val);
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+gd_syncop_mgmt_v3_unlock_cbk_fn (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int ret = -1;
+ struct syncargs *args = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ gd1_mgmt_v3_unlock_rsp rsp = {{0},};
+ call_frame_t *frame = NULL;
+ int op_ret = -1;
+ int op_errno = -1;
+ xlator_t *this = NULL;
+ uuid_t *peerid = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT(req);
+ GF_ASSERT(myframe);
+
+ frame = myframe;
+ args = frame->local;
+ peerid = frame->cookie;
+ frame->local = NULL;
+ frame->cookie = NULL;
+
+ if (-1 == req->rpc_status) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, iov, out, op_errno,
+ EINVAL);
+
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp);
+ if (ret < 0)
+ goto out;
+
+ gf_uuid_copy (args->uuid, rsp.uuid);
+
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+out:
+ gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL,
+ GLUSTERD_MGMT_V3_UNLOCK, *peerid, rsp.uuid);
+
+ GF_FREE (peerid);
+ /* req->rpc_status set to -1 means, STACK_DESTROY will be called from
+ * the caller function.
+ */
+ if (req->rpc_status != -1)
+ STACK_DESTROY (frame->root);
+ synctask_barrier_wake(args);
+ return 0;
+}
+
+int32_t
+gd_syncop_mgmt_v3_unlock_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ gd_syncop_mgmt_v3_unlock_cbk_fn);
+}
+
+int
+gd_syncop_mgmt_v3_unlock (dict_t *op_ctx, glusterd_peerinfo_t *peerinfo,
+ struct syncargs *args, uuid_t my_uuid,
+ uuid_t recv_uuid, uuid_t txn_id)
+{
+ int ret = -1;
+ gd1_mgmt_v3_unlock_req req = {{0},};
+ uuid_t *peerid = NULL;
+
+ GF_ASSERT(op_ctx);
+ GF_ASSERT(peerinfo);
+ GF_ASSERT(args);
+
+ ret = dict_allocate_and_serialize (op_ctx,
+ &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret)
+ goto out;
+
+ gf_uuid_copy (req.uuid, my_uuid);
+ gf_uuid_copy (req.txn_id, txn_id);
+
+ GD_ALLOC_COPY_UUID (peerid, peerinfo->uuid, ret);
+ if (ret)
+ goto out;
+
+ ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerid,
+ &gd_mgmt_v3_prog,
+ GLUSTERD_MGMT_V3_UNLOCK,
+ gd_syncop_mgmt_v3_unlock_cbk,
+ (xdrproc_t)
+ xdr_gd1_mgmt_v3_unlock_req);
+out:
+ GF_FREE (req.dict.dict_val);
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+_gd_syncop_mgmt_lock_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int ret = -1;
+ struct syncargs *args = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ gd1_mgmt_cluster_lock_rsp rsp = {{0},};
+ call_frame_t *frame = NULL;
+ int op_ret = -1;
+ int op_errno = -1;
+ xlator_t *this = NULL;
+ uuid_t *peerid = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ frame = myframe;
+ args = frame->local;
+ peerid = frame->cookie;
+ frame->local = NULL;
+ frame->cookie = NULL;
+
+ if (-1 == req->rpc_status) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, iov, out, op_errno,
+ EINVAL);
+
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gd1_mgmt_cluster_lock_rsp);
+ if (ret < 0)
+ goto out;
+
+ gf_uuid_copy (args->uuid, rsp.uuid);
+
+ rcu_read_lock ();
+ peerinfo = glusterd_peerinfo_find (*peerid, NULL);
+ if (peerinfo) {
+ /* Set peer as locked, so we unlock only the locked peers */
+ if (rsp.op_ret == 0)
+ peerinfo->locked = _gf_true;
+ } else {
+ rsp.op_ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_PEER_NOT_FOUND,
+ "Could not find peer with "
+ "ID %s", uuid_utoa (*peerid));
+ }
+ rcu_read_unlock ();
+
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+out:
+ gd_collate_errors (args, op_ret, op_errno, NULL,
+ GLUSTERD_MGMT_CLUSTER_LOCK, *peerid, rsp.uuid);
+
+ GF_FREE (peerid);
+ /* req->rpc_status set to -1 means, STACK_DESTROY will be called from
+ * the caller function.
+ */
+ if (req->rpc_status != -1)
+ STACK_DESTROY (frame->root);
+ synctask_barrier_wake(args);
+ return 0;
+}
+
+int32_t
+gd_syncop_mgmt_lock_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ _gd_syncop_mgmt_lock_cbk);
+}
+
+int
+gd_syncop_mgmt_lock (glusterd_peerinfo_t *peerinfo, struct syncargs *args,
+ uuid_t my_uuid, uuid_t recv_uuid)
+{
+ int ret = -1;
+ gd1_mgmt_cluster_lock_req req = {{0},};
+ uuid_t *peerid = NULL;
+
+ gf_uuid_copy (req.uuid, my_uuid);
+ GD_ALLOC_COPY_UUID (peerid, peerinfo->uuid, ret);
+ if (ret)
+ goto out;
+
+ ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerid,
+ &gd_mgmt_prog,
+ GLUSTERD_MGMT_CLUSTER_LOCK,
+ gd_syncop_mgmt_lock_cbk,
+ (xdrproc_t) xdr_gd1_mgmt_cluster_lock_req);
+out:
+ return ret;
+}
+
+int32_t
+_gd_syncop_mgmt_unlock_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int ret = -1;
+ struct syncargs *args = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ gd1_mgmt_cluster_unlock_rsp rsp = {{0},};
+ call_frame_t *frame = NULL;
+ int op_ret = -1;
+ int op_errno = -1;
+ xlator_t *this = NULL;
+ uuid_t *peerid = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ frame = myframe;
+ args = frame->local;
+ peerid = frame->cookie;
+ frame->local = NULL;
+ frame->cookie = NULL;
+
+ if (-1 == req->rpc_status) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, iov, out, op_errno,
+ EINVAL);
+
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gd1_mgmt_cluster_unlock_rsp);
+ if (ret < 0)
+ goto out;
+
+ gf_uuid_copy (args->uuid, rsp.uuid);
+
+ rcu_read_lock ();
+ peerinfo = glusterd_peerinfo_find (*peerid, NULL);
+ if (peerinfo) {
+ peerinfo->locked = _gf_false;
+ } else {
+ rsp.op_ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_PEER_NOT_FOUND, "Could not find peer with "
+ "ID %s", uuid_utoa (*peerid));
+ }
+ rcu_read_unlock ();
+
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+out:
+ gd_collate_errors (args, op_ret, op_errno, NULL,
+ GLUSTERD_MGMT_CLUSTER_UNLOCK, *peerid, rsp.uuid);
+
+ GF_FREE (peerid);
+ /* req->rpc_status set to -1 means, STACK_DESTROY will be called from
+ * the caller function.
+ */
+ if (req->rpc_status != -1)
+ STACK_DESTROY (frame->root);
+ synctask_barrier_wake(args);
+ return 0;
+}
+
+int32_t
+gd_syncop_mgmt_unlock_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ _gd_syncop_mgmt_unlock_cbk);
+}
+
+
+int
+gd_syncop_mgmt_unlock (glusterd_peerinfo_t *peerinfo, struct syncargs *args,
+ uuid_t my_uuid, uuid_t recv_uuid)
+{
+ int ret = -1;
+ gd1_mgmt_cluster_unlock_req req = {{0},};
+ uuid_t *peerid = NULL;
+
+ gf_uuid_copy (req.uuid, my_uuid);
+ GD_ALLOC_COPY_UUID (peerid, peerinfo->uuid, ret);
+ if (ret)
+ goto out;
+
+ ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerid,
+ &gd_mgmt_prog,
+ GLUSTERD_MGMT_CLUSTER_UNLOCK,
+ gd_syncop_mgmt_unlock_cbk,
+ (xdrproc_t) xdr_gd1_mgmt_cluster_lock_req);
+out:
+ return ret;
+}
+
+int32_t
+_gd_syncop_stage_op_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int ret = -1;
+ gd1_mgmt_stage_op_rsp rsp = {{0},};
+ struct syncargs *args = NULL;
+ xlator_t *this = NULL;
+ dict_t *rsp_dict = NULL;
+ call_frame_t *frame = NULL;
+ int op_ret = -1;
+ int op_errno = -1;
+ uuid_t *peerid = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ frame = myframe;
+ args = frame->local;
+ peerid = frame->cookie;
+ frame->local = NULL;
+ frame->cookie = NULL;
+
+ if (-1 == req->rpc_status) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, iov, out, op_errno,
+ EINVAL);
+
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gd1_mgmt_stage_op_rsp);
+ if (ret < 0)
+ goto out;
+
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ rsp_dict = dict_new ();
+
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &rsp_dict);
+ if (ret < 0) {
+ GF_FREE (rsp.dict.dict_val);
+ goto out;
+ } else {
+ rsp_dict->extra_stdfree = rsp.dict.dict_val;
+ }
+ }
+
+ rcu_read_lock ();
+ ret = (glusterd_peerinfo_find (rsp.uuid, NULL) == NULL);
+ rcu_read_unlock ();
+ if (ret) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_RESP_FROM_UNKNOWN_PEER, "Staging response "
+ "for 'Volume %s' received from unknown "
+ "peer: %s", gd_op_list[rsp.op],
+ uuid_utoa (rsp.uuid));
+ goto out;
+ }
+
+ gf_uuid_copy (args->uuid, rsp.uuid);
+ if (rsp.op == GD_OP_REPLACE_BRICK || rsp.op == GD_OP_QUOTA ||
+ rsp.op == GD_OP_CREATE_VOLUME || rsp.op == GD_OP_ADD_BRICK ||
+ rsp.op == GD_OP_START_VOLUME) {
+ pthread_mutex_lock (&args->lock_dict);
+ {
+ ret = glusterd_syncop_aggr_rsp_dict (rsp.op, args->dict,
+ rsp_dict);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RESP_AGGR_FAIL, "%s",
+ "Failed to aggregate response from "
+ " node/brick");
+ }
+ pthread_mutex_unlock (&args->lock_dict);
+ }
+
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+
+out:
+ gd_collate_errors (args, op_ret, op_errno, rsp.op_errstr,
+ GLUSTERD_MGMT_STAGE_OP, *peerid, rsp.uuid);
+
+ if (rsp_dict)
+ dict_unref (rsp_dict);
+ GF_FREE (peerid);
+ /* req->rpc_status set to -1 means, STACK_DESTROY will be called from
+ * the caller function.
+ */
+ if (req->rpc_status != -1)
+ STACK_DESTROY (frame->root);
+ synctask_barrier_wake(args);
+ return 0;
+}
+
+int32_t
+gd_syncop_stage_op_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ _gd_syncop_stage_op_cbk);
+}
+
+
+int
+gd_syncop_mgmt_stage_op (glusterd_peerinfo_t *peerinfo, struct syncargs *args,
+ uuid_t my_uuid, uuid_t recv_uuid, int op,
+ dict_t *dict_out, dict_t *op_ctx)
+{
+ gd1_mgmt_stage_op_req *req = NULL;
+ int ret = -1;
+ uuid_t *peerid = NULL;
+
+ req = GF_CALLOC (1, sizeof (*req), gf_gld_mt_mop_stage_req_t);
+ if (!req)
+ goto out;
+
+ gf_uuid_copy (req->uuid, my_uuid);
+ req->op = op;
+
+ ret = dict_allocate_and_serialize (dict_out,
+ &req->buf.buf_val, &req->buf.buf_len);
+ if (ret)
+ goto out;
+
+ GD_ALLOC_COPY_UUID (peerid, peerinfo->uuid, ret);
+ if (ret)
+ goto out;
+
+ ret = gd_syncop_submit_request (peerinfo->rpc, req, args, peerid,
+ &gd_mgmt_prog, GLUSTERD_MGMT_STAGE_OP,
+ gd_syncop_stage_op_cbk,
+ (xdrproc_t) xdr_gd1_mgmt_stage_op_req);
+out:
+ gd_stage_op_req_free (req);
+ return ret;
+
+}
+
+int32_t
+_gd_syncop_brick_op_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ struct syncargs *args = NULL;
+ gd1_mgmt_brick_op_rsp rsp = {0,};
+ int ret = -1;
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ frame = myframe;
+ args = frame->local;
+ frame->local = NULL;
+
+ /* initialize */
+ args->op_ret = -1;
+ args->op_errno = EINVAL;
+
+ if (-1 == req->rpc_status) {
+ args->op_errno = ENOTCONN;
+ goto out;
+ }
+
+ GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, iov, out, args->op_errno,
+ EINVAL);
+
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
+ if (ret < 0)
+ goto out;
+
+ if (rsp.output.output_len) {
+ args->dict = dict_new ();
+ if (!args->dict) {
+ ret = -1;
+ args->op_errno = ENOMEM;
+ goto out;
+ }
+
+ ret = dict_unserialize (rsp.output.output_val,
+ rsp.output.output_len,
+ &args->dict);
+ if (ret < 0)
+ goto out;
+ }
+
+ args->op_ret = rsp.op_ret;
+ args->op_errno = rsp.op_errno;
+ args->errstr = gf_strdup (rsp.op_errstr);
+
+out:
+ if ((rsp.op_errstr) && (strcmp (rsp.op_errstr, "") != 0))
+ free (rsp.op_errstr);
+ free (rsp.output.output_val);
+
+ /* req->rpc_status set to -1 means, STACK_DESTROY will be called from
+ * the caller function.
+ */
+ if (req->rpc_status != -1)
+ STACK_DESTROY (frame->root);
+ __wake (args);
+
+ return 0;
+}
+
+int32_t
+gd_syncop_brick_op_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ _gd_syncop_brick_op_cbk);
+}
+
+int
+gd_syncop_mgmt_brick_op (struct rpc_clnt *rpc, glusterd_pending_node_t *pnode,
+ int op, dict_t *dict_out, dict_t *op_ctx,
+ char **errstr)
+{
+ struct syncargs args = {0, };
+ gd1_mgmt_brick_op_req *req = NULL;
+ int ret = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ args.op_ret = -1;
+ args.op_errno = ENOTCONN;
+
+ if ((pnode->type == GD_NODE_NFS) ||
+ (pnode->type == GD_NODE_QUOTAD) || (pnode->type == GD_NODE_SCRUB) ||
+ ((pnode->type == GD_NODE_SHD) && (op == GD_OP_STATUS_VOLUME))) {
+ ret = glusterd_node_op_build_payload (op, &req, dict_out);
+
+ } else {
+ ret = glusterd_brick_op_build_payload (op, pnode->node, &req,
+ dict_out);
+
+ }
+
+ if (ret)
+ goto out;
+
+ GD_SYNCOP (rpc, (&args), NULL, gd_syncop_brick_op_cbk, req,
+ &gd_brick_prog, req->op, xdr_gd1_mgmt_brick_op_req);
+
+ if (args.errstr) {
+ if ((strlen(args.errstr) > 0) && errstr)
+ *errstr = args.errstr;
+ else
+ GF_FREE (args.errstr);
+ }
+
+ if (GD_OP_STATUS_VOLUME == op) {
+ ret = dict_set_int32 (args.dict, "index", pnode->index);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Error setting index on brick status"
+ " rsp dict");
+ args.op_ret = -1;
+ goto out;
+ }
+ }
+ if (args.op_ret == 0)
+ glusterd_handle_node_rsp (dict_out, pnode->node, op,
+ args.dict, op_ctx, errstr,
+ pnode->type);
+
+out:
+ errno = args.op_errno;
+ if (args.dict)
+ dict_unref (args.dict);
+ gd_brick_op_req_free (req);
+ return args.op_ret;
+
+}
+
+int32_t
+_gd_syncop_commit_op_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int ret = -1;
+ gd1_mgmt_commit_op_rsp rsp = {{0},};
+ struct syncargs *args = NULL;
+ xlator_t *this = NULL;
+ dict_t *rsp_dict = NULL;
+ call_frame_t *frame = NULL;
+ int op_ret = -1;
+ int op_errno = -1;
+ int type = GF_QUOTA_OPTION_TYPE_NONE;
+ uuid_t *peerid = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ frame = myframe;
+ args = frame->local;
+ peerid = frame->cookie;
+ frame->local = NULL;
+ frame->cookie = NULL;
+
+ if (-1 == req->rpc_status) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, iov, out, op_errno,
+ EINVAL);
+
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gd1_mgmt_commit_op_rsp);
+ if (ret < 0) {
+ goto out;
+ }
+
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ rsp_dict = dict_new ();
+
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &rsp_dict);
+ if (ret < 0) {
+ GF_FREE (rsp.dict.dict_val);
+ goto out;
+ } else {
+ rsp_dict->extra_stdfree = rsp.dict.dict_val;
+ }
+ }
+
+ rcu_read_lock ();
+ ret = (glusterd_peerinfo_find (rsp.uuid, NULL) == 0);
+ rcu_read_unlock ();
+ if (ret) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_RESP_FROM_UNKNOWN_PEER, "Commit response "
+ "for 'Volume %s' received from unknown "
+ "peer: %s", gd_op_list[rsp.op],
+ uuid_utoa (rsp.uuid));
+ goto out;
+ }
+
+ gf_uuid_copy (args->uuid, rsp.uuid);
+ if (rsp.op == GD_OP_QUOTA) {
+ ret = dict_get_int32 (args->dict, "type", &type);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to get "
+ "opcode");
+ goto out;
+ }
+ }
+
+ if ((rsp.op != GD_OP_QUOTA) || (type == GF_QUOTA_OPTION_TYPE_LIST)) {
+ pthread_mutex_lock (&args->lock_dict);
+ {
+ ret = glusterd_syncop_aggr_rsp_dict (rsp.op, args->dict,
+ rsp_dict);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RESP_AGGR_FAIL, "%s",
+ "Failed to aggregate response from "
+ " node/brick");
+ }
+ pthread_mutex_unlock (&args->lock_dict);
+ }
+
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+
+out:
+ gd_collate_errors (args, op_ret, op_errno, rsp.op_errstr,
+ GLUSTERD_MGMT_COMMIT_OP, *peerid, rsp.uuid);
+ if (rsp_dict)
+ dict_unref (rsp_dict);
+ GF_FREE (peerid);
+ /* req->rpc_status set to -1 means, STACK_DESTROY will be called from
+ * the caller function.
+ */
+ if (req->rpc_status != -1)
+ STACK_DESTROY (frame->root);
+ synctask_barrier_wake(args);
+
+ return 0;
+}
+
+int32_t
+gd_syncop_commit_op_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ _gd_syncop_commit_op_cbk);
+}
+
+
+int
+gd_syncop_mgmt_commit_op (glusterd_peerinfo_t *peerinfo, struct syncargs *args,
+ uuid_t my_uuid, uuid_t recv_uuid,
+ int op, dict_t *dict_out, dict_t *op_ctx)
+{
+ gd1_mgmt_commit_op_req *req = NULL;
+ int ret = -1;
+ uuid_t *peerid = NULL;
+
+ req = GF_CALLOC (1, sizeof (*req), gf_gld_mt_mop_commit_req_t);
+ if (!req)
+ goto out;
+
+ gf_uuid_copy (req->uuid, my_uuid);
+ req->op = op;
+
+ ret = dict_allocate_and_serialize (dict_out,
+ &req->buf.buf_val, &req->buf.buf_len);
+ if (ret)
+ goto out;
+
+ GD_ALLOC_COPY_UUID (peerid, peerinfo->uuid, ret);
+ if (ret)
+ goto out;
+
+ ret = gd_syncop_submit_request (peerinfo->rpc, req, args, peerid,
+ &gd_mgmt_prog, GLUSTERD_MGMT_COMMIT_OP,
+ gd_syncop_commit_op_cbk,
+ (xdrproc_t) xdr_gd1_mgmt_commit_op_req);
+out:
+ gd_commit_op_req_free (req);
+ return ret;
+}
+
+
+int
+gd_lock_op_phase (glusterd_conf_t *conf, glusterd_op_t op, dict_t *op_ctx,
+ char **op_errstr, uuid_t txn_id,
+ glusterd_op_info_t *txn_opinfo, gf_boolean_t cluster_lock)
+{
+ int ret = -1;
+ int peer_cnt = 0;
+ uuid_t peer_uuid = {0};
+ xlator_t *this = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ struct syncargs args = {0};
+
+ this = THIS;
+ synctask_barrier_init((&args));
+ peer_cnt = 0;
+
+ rcu_read_lock ();
+ cds_list_for_each_entry_rcu (peerinfo, &conf->peers, uuid_list) {
+ /* Only send requests to peers who were available before the
+ * transaction started
+ */
+ if (peerinfo->generation > txn_opinfo->txn_generation)
+ continue;
+
+ if (!peerinfo->connected)
+ continue;
+ if (op != GD_OP_SYNC_VOLUME &&
+ peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED)
+ continue;
+
+
+ if (cluster_lock) {
+ /* Reset lock status */
+ peerinfo->locked = _gf_false;
+ gd_syncop_mgmt_lock (peerinfo, &args,
+ MY_UUID, peer_uuid);
+ } else
+ gd_syncop_mgmt_v3_lock (op, op_ctx, peerinfo, &args,
+ MY_UUID, peer_uuid, txn_id);
+ peer_cnt++;
+ }
+ rcu_read_unlock ();
+
+ if (0 == peer_cnt) {
+ ret = 0;
+ goto out;
+ }
+
+ gd_synctask_barrier_wait((&args), peer_cnt);
+
+ if (args.op_ret) {
+ if (args.errstr)
+ *op_errstr = gf_strdup (args.errstr);
+ else {
+ ret = gf_asprintf (op_errstr, "Another transaction "
+ "could be in progress. Please try "
+ "again after sometime.");
+ if (ret == -1)
+ *op_errstr = NULL;
+
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_PEER_LOCK_FAIL,
+ "Failed to acquire lock");
+
+ }
+ }
+
+ ret = args.op_ret;
+
+ gf_msg_debug (this->name, 0, "Sent lock op req for 'Volume %s' "
+ "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret);
+out:
+ return ret;
+}
+
+int
+gd_stage_op_phase (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
+ char **op_errstr, glusterd_op_info_t *txn_opinfo)
+{
+ int ret = -1;
+ int peer_cnt = 0;
+ dict_t *rsp_dict = NULL;
+ char *hostname = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ uuid_t tmp_uuid = {0};
+ char *errstr = NULL;
+ struct syncargs args = {0};
+ dict_t *aggr_dict = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ rsp_dict = dict_new ();
+ if (!rsp_dict)
+ goto out;
+
+ if ((op == GD_OP_CREATE_VOLUME) || (op == GD_OP_ADD_BRICK) ||
+ (op == GD_OP_START_VOLUME))
+ aggr_dict = req_dict;
+ else
+ aggr_dict = op_ctx;
+
+ ret = glusterd_validate_quorum (this, op, req_dict, op_errstr);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_SERVER_QUORUM_NOT_MET,
+ "Server quorum not met. Rejecting operation.");
+ goto out;
+ }
+
+ ret = glusterd_op_stage_validate (op, req_dict, op_errstr, rsp_dict);
+ if (ret) {
+ hostname = "localhost";
+ goto stage_done;
+ }
+
+ if ((op == GD_OP_REPLACE_BRICK || op == GD_OP_QUOTA ||
+ op == GD_OP_CREATE_VOLUME || op == GD_OP_ADD_BRICK ||
+ op == GD_OP_START_VOLUME)) {
+ ret = glusterd_syncop_aggr_rsp_dict (op, aggr_dict, rsp_dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RESP_AGGR_FAIL, "%s",
+ "Failed to aggregate response from node/brick");
+ goto out;
+ }
+ }
+ dict_unref (rsp_dict);
+ rsp_dict = NULL;
+
+stage_done:
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VALIDATE_FAILED, LOGSTR_STAGE_FAIL,
+ gd_op_list[op], hostname, (*op_errstr) ? ":" : " ",
+ (*op_errstr) ? *op_errstr : " ");
+ if (*op_errstr == NULL)
+ gf_asprintf (op_errstr, OPERRSTR_STAGE_FAIL, hostname);
+ goto out;
+ }
+
+ gd_syncargs_init (&args, aggr_dict);
+ synctask_barrier_init((&args));
+ peer_cnt = 0;
+
+ rcu_read_lock ();
+ cds_list_for_each_entry_rcu (peerinfo, &conf->peers, uuid_list) {
+ /* Only send requests to peers who were available before the
+ * transaction started
+ */
+ if (peerinfo->generation > txn_opinfo->txn_generation)
+ continue;
+
+ if (!peerinfo->connected)
+ continue;
+ if (op != GD_OP_SYNC_VOLUME &&
+ peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED)
+ continue;
+
+ ret = gd_syncop_mgmt_stage_op (peerinfo, &args,
+ MY_UUID, tmp_uuid,
+ op, req_dict, op_ctx);
+ peer_cnt++;
+ }
+ rcu_read_unlock ();
+
+ if (0 == peer_cnt) {
+ ret = 0;
+ goto out;
+ }
+
+
+ gf_msg_debug (this->name, 0, "Sent stage op req for 'Volume %s' "
+ "to %d peers", gd_op_list[op], peer_cnt);
+
+ gd_synctask_barrier_wait((&args), peer_cnt);
+
+ if (args.errstr)
+ *op_errstr = gf_strdup (args.errstr);
+ else if (dict_get_str (aggr_dict, "errstr", &errstr) == 0)
+ *op_errstr = gf_strdup (errstr);
+
+ ret = args.op_ret;
+
+out:
+ if ((ret == 0) && (op == GD_OP_QUOTA)) {
+ ret = glusterd_validate_and_set_gfid (op_ctx, req_dict,
+ op_errstr);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GFID_VALIDATE_SET_FAIL,
+ "Failed to validate and set gfid");
+ }
+
+ if (rsp_dict)
+ dict_unref (rsp_dict);
+ return ret;
+}
+
+int
+gd_commit_op_phase (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
+ char **op_errstr, glusterd_op_info_t *txn_opinfo)
+{
+ dict_t *rsp_dict = NULL;
+ int peer_cnt = -1;
+ int ret = -1;
+ char *hostname = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ uuid_t tmp_uuid = {0};
+ char *errstr = NULL;
+ struct syncargs args = {0};
+ int type = GF_QUOTA_OPTION_TYPE_NONE;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ rsp_dict = dict_new ();
+ if (!rsp_dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_op_commit_perform (op, req_dict, op_errstr, rsp_dict);
+ if (ret) {
+ hostname = "localhost";
+ goto commit_done;
+ }
+
+ if (op == GD_OP_QUOTA) {
+ ret = dict_get_int32 (op_ctx, "type", &type);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to get "
+ "opcode");
+ goto out;
+ }
+ }
+
+ if (((op == GD_OP_QUOTA) && ((type == GF_QUOTA_OPTION_TYPE_LIST) ||
+ (type == GF_QUOTA_OPTION_TYPE_LIST_OBJECTS))) ||
+ ((op != GD_OP_SYNC_VOLUME) && (op != GD_OP_QUOTA))) {
+
+ ret = glusterd_syncop_aggr_rsp_dict (op, op_ctx,
+ rsp_dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RESP_AGGR_FAIL, "%s",
+ "Failed to aggregate "
+ "response from node/brick");
+ goto out;
+ }
+ }
+
+ dict_unref (rsp_dict);
+ rsp_dict = NULL;
+
+commit_done:
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_COMMIT_OP_FAIL, LOGSTR_COMMIT_FAIL,
+ gd_op_list[op], hostname, (*op_errstr) ? ":" : " ",
+ (*op_errstr) ? *op_errstr : " ");
+ if (*op_errstr == NULL)
+ gf_asprintf (op_errstr, OPERRSTR_COMMIT_FAIL,
+ hostname);
+ goto out;
+ }
+
+ gd_syncargs_init (&args, op_ctx);
+ synctask_barrier_init((&args));
+ peer_cnt = 0;
+
+ rcu_read_lock ();
+ cds_list_for_each_entry_rcu (peerinfo, &conf->peers, uuid_list) {
+ /* Only send requests to peers who were available before the
+ * transaction started
+ */
+ if (peerinfo->generation > txn_opinfo->txn_generation)
+ continue;
+
+ if (!peerinfo->connected)
+ continue;
+ if (op != GD_OP_SYNC_VOLUME &&
+ peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED)
+ continue;
+
+ ret = gd_syncop_mgmt_commit_op (peerinfo, &args,
+ MY_UUID, tmp_uuid,
+ op, req_dict, op_ctx);
+ peer_cnt++;
+ }
+ rcu_read_unlock ();
+
+ if (0 == peer_cnt) {
+ ret = 0;
+ goto out;
+ }
+
+ gd_synctask_barrier_wait((&args), peer_cnt);
+ ret = args.op_ret;
+ if (args.errstr)
+ *op_errstr = gf_strdup (args.errstr);
+ else if (dict_get_str (op_ctx, "errstr", &errstr) == 0)
+ *op_errstr = gf_strdup (errstr);
+
+ gf_msg_debug (this->name, 0, "Sent commit op req for 'Volume %s' "
+ "to %d peers", gd_op_list[op], peer_cnt);
+out:
+ if (!ret)
+ glusterd_op_modify_op_ctx (op, op_ctx);
+
+ if (rsp_dict)
+ dict_unref (rsp_dict);
+
+ GF_FREE (args.errstr);
+ args.errstr = NULL;
+
+ return ret;
+}
+
+int
+gd_unlock_op_phase (glusterd_conf_t *conf, glusterd_op_t op, int *op_ret,
+ rpcsvc_request_t *req, dict_t *op_ctx, char *op_errstr,
+ char *volname, gf_boolean_t is_acquired, uuid_t txn_id,
+ glusterd_op_info_t *txn_opinfo, gf_boolean_t cluster_lock)
+{
+ glusterd_peerinfo_t *peerinfo = NULL;
+ uuid_t tmp_uuid = {0};
+ int peer_cnt = 0;
+ int ret = -1;
+ xlator_t *this = NULL;
+ struct syncargs args = {0};
+ int32_t global = 0;
+ char *type = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ /* If the lock has not been held during this
+ * transaction, do not send unlock requests */
+ if (!is_acquired) {
+ ret = 0;
+ goto out;
+ }
+
+ synctask_barrier_init((&args));
+ peer_cnt = 0;
+
+ if (cluster_lock) {
+ rcu_read_lock ();
+ cds_list_for_each_entry_rcu (peerinfo, &conf->peers,
+ uuid_list) {
+ /* Only send requests to peers who were available before
+ * the transaction started
+ */
+ if (peerinfo->generation > txn_opinfo->txn_generation)
+ continue;
+
+ if (!peerinfo->connected)
+ continue;
+ if (op != GD_OP_SYNC_VOLUME &&
+ peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED)
+ continue;
+
+ /* Only unlock peers that were locked */
+ if (peerinfo->locked) {
+ gd_syncop_mgmt_unlock (peerinfo, &args,
+ MY_UUID, tmp_uuid);
+ peer_cnt++;
+ }
+ }
+ rcu_read_unlock ();
+ } else {
+
+ ret = dict_get_int32 (op_ctx, "hold_global_locks", &global);
+ if (global)
+ type = "global";
+ else
+ type = "vol";
+ if (volname || global) {
+ rcu_read_lock ();
+ cds_list_for_each_entry_rcu (peerinfo, &conf->peers,
+ uuid_list) {
+ /* Only send requests to peers who were
+ * available before the transaction started
+ */
+ if (peerinfo->generation >
+ txn_opinfo->txn_generation)
+ continue;
+
+ if (!peerinfo->connected)
+ continue;
+ if (op != GD_OP_SYNC_VOLUME &&
+ peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED)
+ continue;
+
+ gd_syncop_mgmt_v3_unlock (op_ctx, peerinfo,
+ &args, MY_UUID,
+ tmp_uuid, txn_id);
+ peer_cnt++;
+ }
+ rcu_read_unlock ();
+ }
+ }
+
+ if (0 == peer_cnt) {
+ ret = 0;
+ goto out;
+ }
+
+ gd_synctask_barrier_wait((&args), peer_cnt);
+
+ ret = args.op_ret;
+
+ gf_msg_debug (this->name, 0, "Sent unlock op req for 'Volume %s' "
+ "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_PEER_UNLOCK_FAIL, "Failed to unlock "
+ "on some peer(s)");
+ }
+
+out:
+ /* If unlock failed, and op_ret was previously set
+ * priority is given to the op_ret. If op_ret was
+ * not set, and unlock failed, then set op_ret */
+ if (!*op_ret)
+ *op_ret = ret;
+
+ if (is_acquired) {
+ /* Based on the op-version,
+ * we release the cluster or mgmt_v3 lock
+ * and clear the op */
+
+ glusterd_op_clear_op (op);
+ if (cluster_lock)
+ glusterd_unlock (MY_UUID);
+ else {
+ if (type) {
+ ret = glusterd_mgmt_v3_unlock (volname, MY_UUID,
+ type);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_UNLOCK_FAIL,
+ "Unable to release lock for %s",
+ volname);
+ }
+ }
+ }
+
+ if (!*op_ret)
+ *op_ret = ret;
+
+ /*
+ * If there are any quorum events while the OP is in progress, process
+ * them.
+ */
+ if (conf->pending_quorum_action)
+ glusterd_do_quorum_action ();
+
+ return 0;
+}
+
+int
+gd_get_brick_count (struct cds_list_head *bricks)
+{
+ glusterd_pending_node_t *pending_node = NULL;
+ int npeers = 0;
+ cds_list_for_each_entry (pending_node, bricks, list) {
+ npeers++;
+ }
+ return npeers;
+}
+
+int
+gd_brick_op_phase (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
+ char **op_errstr)
+{
+ glusterd_pending_node_t *pending_node = NULL;
+ struct cds_list_head selected = {0,};
+ xlator_t *this = NULL;
+ int brick_count = 0;
+ int ret = -1;
+ rpc_clnt_t *rpc = NULL;
+ dict_t *rsp_dict = NULL;
+ glusterd_conf_t *conf = NULL;
+ int32_t cmd = GF_OP_CMD_NONE;
+
+ this = THIS;
+ conf = this->private;
+ rsp_dict = dict_new ();
+ if (!rsp_dict) {
+ ret = -1;
+ goto out;
+ }
+
+ CDS_INIT_LIST_HEAD (&selected);
+ ret = glusterd_op_bricks_select (op, req_dict, op_errstr, &selected,
+ rsp_dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_OP_FAIL, "%s",
+ (*op_errstr) ? *op_errstr : "Brick op failed. Check "
+ "glusterd log file for more details.");
+ goto out;
+ }
+
+ if (op == GD_OP_HEAL_VOLUME) {
+ ret = glusterd_syncop_aggr_rsp_dict (op, op_ctx, rsp_dict);
+ if (ret)
+ goto out;
+ }
+ dict_unref (rsp_dict);
+ rsp_dict = NULL;
+
+ brick_count = 0;
+ cds_list_for_each_entry (pending_node, &selected, list) {
+ rpc = glusterd_pending_node_get_rpc (pending_node);
+ if (!rpc) {
+ if (pending_node->type == GD_NODE_REBALANCE) {
+ ret = 0;
+ glusterd_defrag_volume_node_rsp (req_dict,
+ NULL, op_ctx);
+ goto out;
+ }
+
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RPC_FAILURE, "Brick Op failed "
+ "due to rpc failure.");
+ goto out;
+ }
+
+ /* Redirect operation to be detach tier via rebalance flow. */
+ ret = dict_get_int32 (req_dict, "command", &cmd);
+ if (!ret) {
+ if (cmd == GF_OP_CMD_DETACH_START) {
+ op = GD_OP_REBALANCE;
+ ret = dict_set_int32 (req_dict, "rebalance-command",
+ GF_DEFRAG_CMD_START_DETACH_TIER);
+ if (ret)
+ goto out;
+ }
+ }
+ ret = gd_syncop_mgmt_brick_op (rpc, pending_node, op, req_dict,
+ op_ctx, op_errstr);
+ if (cmd == GF_OP_CMD_DETACH_START) {
+ op = GD_OP_REMOVE_BRICK;
+ dict_del (req_dict, "rebalance-command");
+ }
+ if (ret)
+ goto out;
+
+ brick_count++;
+ glusterd_pending_node_put_rpc (pending_node);
+ }
+
+ pending_node = NULL;
+ ret = 0;
+out:
+ if (pending_node)
+ glusterd_pending_node_put_rpc (pending_node);
+
+ if (rsp_dict)
+ dict_unref (rsp_dict);
+ gf_msg_debug (this->name, 0, "Sent op req to %d bricks",
+ brick_count);
+ return ret;
+}
+
+void
+gd_sync_task_begin (dict_t *op_ctx, rpcsvc_request_t * req)
+{
+ int ret = -1;
+ int op_ret = -1;
+ dict_t *req_dict = NULL;
+ glusterd_conf_t *conf = NULL;
+ glusterd_op_t op = GD_OP_NONE;
+ int32_t tmp_op = 0;
+ char *op_errstr = NULL;
+ char *tmp = NULL;
+ char *global = NULL;
+ char *volname = NULL;
+ xlator_t *this = NULL;
+ gf_boolean_t is_acquired = _gf_false;
+ gf_boolean_t is_global = _gf_false;
+ uuid_t *txn_id = NULL;
+ glusterd_op_info_t txn_opinfo = {{0},};
+ uint32_t op_errno = 0;
+ gf_boolean_t cluster_lock = _gf_false;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ ret = dict_get_int32 (op_ctx, GD_SYNC_OPCODE_KEY, &tmp_op);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to get volume "
+ "operation");
+ goto out;
+ }
+ op = tmp_op;
+
+ /* Generate a transaction-id for this operation and
+ * save it in the dict */
+ ret = glusterd_generate_txn_id (op_ctx, &txn_id);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_IDGEN_FAIL,
+ "Failed to generate transaction id");
+ goto out;
+ }
+
+ /* Save opinfo for this transaction with the transaction id */
+ glusterd_txn_opinfo_init (&txn_opinfo, NULL, &op, NULL, NULL);
+ ret = glusterd_set_txn_opinfo (txn_id, &txn_opinfo);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set transaction's opinfo");
+
+ gf_msg_debug (this->name, 0,
+ "Transaction ID : %s", uuid_utoa (*txn_id));
+
+ /* Save the MY_UUID as the originator_uuid */
+ ret = glusterd_set_originator_uuid (op_ctx);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_UUID_SET_FAIL,
+ "Failed to set originator_uuid.");
+ goto out;
+ }
+
+ if (conf->op_version < GD_OP_VERSION_3_6_0)
+ cluster_lock = _gf_true;
+
+ /* Based on the op_version, acquire a cluster or mgmt_v3 lock */
+ if (cluster_lock) {
+ ret = glusterd_lock (MY_UUID);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GLUSTERD_LOCK_FAIL,
+ "Unable to acquire lock");
+ gf_asprintf (&op_errstr,
+ "Another transaction is in progress. "
+ "Please try again after sometime.");
+ goto out;
+ }
+ } else {
+
+ ret = dict_get_str (op_ctx, "globalname", &global);
+ if (!ret) {
+ is_global = _gf_true;
+ goto global;
+ }
+
+ /* If no volname is given as a part of the command, locks will
+ * not be held */
+ ret = dict_get_str (op_ctx, "volname", &tmp);
+ if (ret) {
+ gf_msg_debug ("glusterd", 0,
+ "Failed to get volume "
+ "name");
+ goto local_locking_done;
+ } else {
+ /* Use a copy of volname, as cli response will be
+ * sent before the unlock, and the volname in the
+ * dict, might be removed */
+ volname = gf_strdup (tmp);
+ if (!volname)
+ goto out;
+ }
+
+ ret = glusterd_mgmt_v3_lock (volname, MY_UUID,
+ &op_errno, "vol");
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_LOCK_GET_FAIL,
+ "Unable to acquire lock for %s", volname);
+ gf_asprintf (&op_errstr,
+ "Another transaction is in progress "
+ "for %s. Please try again after sometime.",
+ volname);
+ goto out;
+ }
+ }
+
+global:
+ if (is_global) {
+ ret = glusterd_mgmt_v3_lock (global, MY_UUID, &op_errno,
+ "global");
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_LOCK_GET_FAIL,
+ "Unable to acquire lock for %s", global);
+ gf_asprintf (&op_errstr,
+ "Another transaction is in progress "
+ "for %s. Please try again after sometime.",
+ global);
+ is_global = _gf_false;
+ goto out;
+ }
+ }
+
+ is_acquired = _gf_true;
+
+local_locking_done:
+
+ /* If no volname is given as a part of the command, locks will
+ * not be held */
+ if (volname || cluster_lock || is_global) {
+ ret = gd_lock_op_phase (conf, op, op_ctx, &op_errstr, *txn_id,
+ &txn_opinfo, cluster_lock);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_PEER_LOCK_FAIL,
+ "Locking Peers Failed.");
+ goto out;
+ }
+ }
+
+ ret = glusterd_op_build_payload (&req_dict, &op_errstr, op_ctx);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_OP_PAYLOAD_BUILD_FAIL,
+ LOGSTR_BUILD_PAYLOAD,
+ gd_op_list[op]);
+ if (op_errstr == NULL)
+ gf_asprintf (&op_errstr, OPERRSTR_BUILD_PAYLOAD);
+ goto out;
+ }
+
+ ret = gd_stage_op_phase (op, op_ctx, req_dict, &op_errstr, &txn_opinfo);
+ if (ret)
+ goto out;
+
+ ret = gd_brick_op_phase (op, op_ctx, req_dict, &op_errstr);
+ if (ret)
+ goto out;
+
+ ret = gd_commit_op_phase (op, op_ctx, req_dict, &op_errstr,
+ &txn_opinfo);
+ if (ret)
+ goto out;
+
+ ret = 0;
+out:
+ op_ret = ret;
+ if (txn_id) {
+ if (global)
+ (void) gd_unlock_op_phase (conf, op, &op_ret, req, op_ctx,
+ op_errstr, global, is_acquired,
+ *txn_id, &txn_opinfo,
+ cluster_lock);
+ else
+ (void) gd_unlock_op_phase (conf, op, &op_ret, req, op_ctx,
+ op_errstr, volname, is_acquired,
+ *txn_id, &txn_opinfo,
+ cluster_lock);
+
+
+ /* Clearing the transaction opinfo */
+ ret = glusterd_clear_txn_opinfo (txn_id);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_OPINFO_CLEAR_FAIL,
+ "Unable to clear transaction's "
+ "opinfo for transaction ID : %s",
+ uuid_utoa (*txn_id));
+ }
+
+ if (op_ret && (op_errno == 0))
+ op_errno = EG_INTRNL;
+
+ glusterd_op_send_cli_response (op, op_ret, op_errno, req,
+ op_ctx, op_errstr);
+
+ if (volname)
+ GF_FREE (volname);
+
+ if (req_dict)
+ dict_unref (req_dict);
+
+ if (op_errstr) {
+ GF_FREE (op_errstr);
+ op_errstr = NULL;
+ }
+
+ return;
+}
+
+int32_t
+glusterd_op_begin_synctask (rpcsvc_request_t *req, glusterd_op_t op,
+ void *dict)
+{
+ int ret = 0;
+
+ ret = dict_set_int32 (dict, GD_SYNC_OPCODE_KEY, op);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "dict set failed for setting operations");
+ goto out;
+ }
+
+ gd_sync_task_begin (dict, req);
+ ret = 0;
+out:
+
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.h b/xlators/mgmt/glusterd/src/glusterd-syncop.h
new file mode 100644
index 00000000000..f3425c2f538
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-syncop.h
@@ -0,0 +1,83 @@
+/*
+ Copyright (c) 2012-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef __RPC_SYNCOP_H
+#define __RPC_SYNCOP_H
+
+#include "syncop.h"
+#include "glusterd-sm.h"
+#include "glusterd.h"
+
+#define GD_SYNC_OPCODE_KEY "sync-mgmt-operation"
+
+/* gd_syncop_* */
+#define GD_SYNCOP(rpc, stb, cookie, cbk, req, prog, procnum, xdrproc) do { \
+ int ret = 0; \
+ struct synctask *task = NULL; \
+ glusterd_conf_t *conf= THIS->private; \
+ \
+ task = synctask_get (); \
+ stb->task = task; \
+ \
+ /*This is to ensure that the brick_op_cbk is able to \
+ * take the big lock*/ \
+ synclock_unlock (&conf->big_lock); \
+ ret = gd_syncop_submit_request (rpc, req, stb, cookie, \
+ prog, procnum, cbk, \
+ (xdrproc_t)xdrproc); \
+ if (!ret) \
+ synctask_yield (stb->task); \
+ else \
+ gf_asprintf (&stb->errstr, "%s failed. Check log file" \
+ " for more details", (prog)->progname); \
+ synclock_lock (&conf->big_lock); \
+ } while (0)
+
+#define GD_ALLOC_COPY_UUID(dst_ptr, uuid, ret) do { \
+ dst_ptr = GF_CALLOC (1, sizeof (*dst_ptr), gf_common_mt_uuid_t); \
+ if (dst_ptr) { \
+ gf_uuid_copy (*dst_ptr, uuid); \
+ ret = 0; \
+ } else { \
+ ret = -1; \
+ } \
+} while (0)
+
+int gd_syncop_submit_request (struct rpc_clnt *rpc, void *req, void *local,
+ void *cookie, rpc_clnt_prog_t *prog, int procnum,
+ fop_cbk_fn_t cbkfn, xdrproc_t xdrproc);
+int gd_syncop_mgmt_lock (glusterd_peerinfo_t *peerinfo, struct syncargs *arg,
+ uuid_t my_uuid, uuid_t recv_uuid);
+
+int gd_syncop_mgmt_unlock (glusterd_peerinfo_t *peerinfo, struct syncargs *arg,
+ uuid_t my_uuid, uuid_t recv_uuid);
+
+int gd_syncop_mgmt_stage_op (glusterd_peerinfo_t *peerinfo,
+ struct syncargs *arg, uuid_t my_uuid,
+ uuid_t recv_uuid, int op, dict_t *dict_out,
+ dict_t *op_ctx);
+
+int gd_syncop_mgmt_commit_op (glusterd_peerinfo_t *peerinfo,
+ struct syncargs *arg, uuid_t my_uuid,
+ uuid_t recv_uuid, int op, dict_t *dict_out,
+ dict_t *op_ctx);
+
+void
+gd_synctask_barrier_wait (struct syncargs *args, int count);
+
+int
+gd_brick_op_phase (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
+ char **op_errstr);
+
+int
+glusterd_syncop_aggr_rsp_dict (glusterd_op_t op, dict_t *aggr, dict_t *rsp);
+
+void
+gd_syncargs_init (struct syncargs *args, dict_t *op_ctx);
+#endif /* __RPC_SYNCOP_H */
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
new file mode 100644
index 00000000000..a03b041a4e8
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -0,0 +1,11510 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <inttypes.h>
+
+#if defined(GF_LINUX_HOST_OS)
+#include <mntent.h>
+#else
+#include "mntent_compat.h"
+#endif
+#include <dlfcn.h>
+#if (HAVE_LIB_XML)
+#include <libxml/encoding.h>
+#include <libxml/xmlwriter.h>
+#endif
+
+#include "globals.h"
+#include "glusterfs.h"
+#include "compat.h"
+#include "dict.h"
+#include "xlator.h"
+#include "logging.h"
+#include "glusterd-messages.h"
+#include "timer.h"
+#include "defaults.h"
+#include "compat.h"
+#include "syncop.h"
+#include "run.h"
+#include "compat-errno.h"
+#include "statedump.h"
+#include "syscall.h"
+#include "glusterd-mem-types.h"
+#include "glusterd.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-geo-rep.h"
+#include "glusterd-sm.h"
+#include "glusterd-utils.h"
+#include "glusterd-store.h"
+#include "glusterd-volgen.h"
+#include "glusterd-pmap.h"
+#include "glusterfs-acl.h"
+#include "glusterd-syncop.h"
+#include "glusterd-locks.h"
+#include "glusterd-messages.h"
+#include "glusterd-volgen.h"
+#include "glusterd-snapshot-utils.h"
+#include "glusterd-svc-mgmt.h"
+#include "glusterd-svc-helper.h"
+#include "glusterd-shd-svc.h"
+#include "glusterd-nfs-svc.h"
+#include "glusterd-quotad-svc.h"
+#include "glusterd-snapd-svc.h"
+#include "glusterd-bitd-svc.h"
+#include "glusterd-server-quorum.h"
+#include "quota-common-utils.h"
+
+#include "xdr-generic.h"
+#include <sys/resource.h>
+#include <inttypes.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <rpc/pmap_clnt.h>
+#include <unistd.h>
+#include <fnmatch.h>
+#include <sys/statvfs.h>
+#include <ifaddrs.h>
+#ifdef HAVE_BD_XLATOR
+#include <lvm2app.h>
+#endif
+
+#ifdef GF_SOLARIS_HOST_OS
+#include <sys/sockio.h>
+#endif
+
+#define NFS_PROGRAM 100003
+#define NFSV3_VERSION 3
+
+#define MOUNT_PROGRAM 100005
+#define MOUNTV3_VERSION 3
+#define MOUNTV1_VERSION 1
+
+#define NLM_PROGRAM 100021
+#define NLMV4_VERSION 4
+#define NLMV1_VERSION 1
+
+extern struct volopt_map_entry glusterd_volopt_map[];
+
+static glusterd_lock_t lock;
+
+static int
+_brick_for_each (glusterd_volinfo_t *volinfo, dict_t *mod_dict,
+ void *data,
+ int (*fn) (glusterd_volinfo_t *, glusterd_brickinfo_t *,
+ dict_t *mod_dict, void *))
+{
+ int ret = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ xlator_t *this = THIS;
+
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ gf_msg_debug (this->name, 0, "Found a brick - %s:%s",
+ brickinfo->hostname, brickinfo->path);
+ ret = fn (volinfo, brickinfo, mod_dict, data);
+ if (ret)
+ goto out;
+ }
+out:
+ return ret;
+}
+
+int
+glusterd_volume_brick_for_each (glusterd_volinfo_t *volinfo, void *data,
+ int (*fn) (glusterd_volinfo_t *, glusterd_brickinfo_t *,
+ dict_t *mod_dict, void *))
+{
+ dict_t *mod_dict = NULL;
+ glusterd_volinfo_t *dup_volinfo = NULL;
+ int ret = 0;
+
+ if (volinfo->type != GF_CLUSTER_TYPE_TIER) {
+ ret = _brick_for_each (volinfo, NULL, data, fn);
+ if (ret)
+ goto out;
+ } else {
+ ret = glusterd_create_sub_tier_volinfo (volinfo, &dup_volinfo,
+ _gf_true,
+ volinfo->volname);
+ if (ret)
+ goto out;
+
+ mod_dict = dict_new();
+ if (!mod_dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str (mod_dict, "hot-brick", "on");
+ if (ret)
+ goto out;
+
+ ret = _brick_for_each (dup_volinfo, mod_dict, data, fn);
+ if (ret)
+ goto out;
+ GF_FREE (dup_volinfo);
+ dup_volinfo = NULL;
+ ret = glusterd_create_sub_tier_volinfo (volinfo, &dup_volinfo,
+ _gf_false,
+ volinfo->volname);
+ if (ret)
+ goto out;
+ ret = _brick_for_each (dup_volinfo, NULL, data, fn);
+ if (ret)
+ goto out;
+ }
+out:
+ if (dup_volinfo)
+ glusterd_volinfo_delete (dup_volinfo);
+
+ if (mod_dict)
+ dict_unref (mod_dict);
+ return ret;
+}
+
+int32_t
+glusterd_get_lock_owner (uuid_t *uuid)
+{
+ gf_uuid_copy (*uuid, lock.owner) ;
+ return 0;
+}
+
+static int32_t
+glusterd_set_lock_owner (uuid_t owner)
+{
+ gf_uuid_copy (lock.owner, owner);
+ //TODO: set timestamp
+ return 0;
+}
+
+static int32_t
+glusterd_unset_lock_owner (uuid_t owner)
+{
+ gf_uuid_clear (lock.owner);
+ //TODO: set timestamp
+ return 0;
+}
+
+gf_boolean_t
+glusterd_is_fuse_available ()
+{
+
+ int fd = 0;
+
+#ifdef __NetBSD__
+ fd = open ("/dev/puffs", O_RDWR);
+#else
+ fd = open ("/dev/fuse", O_RDWR);
+#endif
+
+ if (fd > -1 && !sys_close (fd))
+ return _gf_true;
+ else
+ return _gf_false;
+}
+
+int32_t
+glusterd_lock (uuid_t uuid)
+{
+
+ uuid_t owner;
+ char new_owner_str[50];
+ char owner_str[50];
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (uuid);
+
+ glusterd_get_lock_owner (&owner);
+
+ if (!gf_uuid_is_null (owner)) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GLUSTERD_LOCK_FAIL, "Unable to get lock"
+ " for uuid: %s, lock held by: %s",
+ uuid_utoa_r (uuid, new_owner_str),
+ uuid_utoa_r (owner, owner_str));
+ goto out;
+ }
+
+ ret = glusterd_set_lock_owner (uuid);
+
+ if (!ret) {
+ gf_msg_debug (this->name, 0, "Cluster lock held by"
+ " %s", uuid_utoa (uuid));
+ }
+
+out:
+ return ret;
+}
+
+
+int32_t
+glusterd_unlock (uuid_t uuid)
+{
+ uuid_t owner;
+ char new_owner_str[50];
+ char owner_str[50];
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (uuid);
+
+ glusterd_get_lock_owner (&owner);
+
+ if (gf_uuid_is_null (owner)) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GLUSTERD_LOCK_FAIL, "Cluster lock not held!");
+ goto out;
+ }
+
+ ret = gf_uuid_compare (uuid, owner);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GLUSTERD_LOCK_FAIL, "Cluster lock held by %s ,"
+ "unlock req from %s!", uuid_utoa_r (owner, owner_str),
+ uuid_utoa_r (uuid, new_owner_str));
+ goto out;
+ }
+
+ ret = glusterd_unset_lock_owner (uuid);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GLUSTERD_UNLOCK_FAIL, "Unable to clear cluster "
+ "lock");
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+
+int
+glusterd_get_uuid (uuid_t *uuid)
+{
+ glusterd_conf_t *priv = NULL;
+
+ priv = THIS->private;
+
+ GF_ASSERT (priv);
+
+ gf_uuid_copy (*uuid, MY_UUID);
+
+ return 0;
+}
+
+int
+glusterd_submit_request (struct rpc_clnt *rpc, void *req,
+ call_frame_t *frame, rpc_clnt_prog_t *prog,
+ int procnum, struct iobref *iobref,
+ xlator_t *this, fop_cbk_fn_t cbkfn, xdrproc_t xdrproc)
+{
+ char new_iobref = 0;
+ int ret = -1;
+ int count = 0;
+ ssize_t req_size = 0;
+ struct iobuf *iobuf = NULL;
+ struct iovec iov = {0, };
+
+ GF_ASSERT (rpc);
+ GF_ASSERT (this);
+
+ if (req) {
+ req_size = xdr_sizeof (xdrproc, req);
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool, req_size);
+ if (!iobuf) {
+ goto out;
+ };
+
+ if (!iobref) {
+ iobref = iobref_new ();
+ if (!iobref) {
+ goto out;
+ }
+
+ new_iobref = 1;
+ }
+
+ iobref_add (iobref, iobuf);
+
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = iobuf_pagesize (iobuf);
+
+ /* Create the xdr payload */
+ ret = xdr_serialize_generic (iov, req, xdrproc);
+ if (ret == -1) {
+ goto out;
+ }
+ iov.iov_len = ret;
+ count = 1;
+ }
+
+ /* Send the msg */
+ rpc_clnt_submit (rpc, prog, procnum, cbkfn, &iov, count, NULL, 0,
+ iobref, frame, NULL, 0, NULL, 0, NULL);
+
+ /* Unconditionally set ret to 0 here. This is to guard against a double
+ * STACK_DESTROY in case of a failure in rpc_clnt_submit AFTER the
+ * request is sent over the wire: once in the callback function of the
+ * request and once in the error codepath of some of the callers of
+ * glusterd_submit_request().
+ */
+ ret = 0;
+out:
+ if (new_iobref) {
+ iobref_unref (iobref);
+ }
+
+ iobuf_unref (iobuf);
+
+ return ret;
+}
+
+
+
+
+struct iobuf *
+glusterd_serialize_reply (rpcsvc_request_t *req, void *arg,
+ struct iovec *outmsg, xdrproc_t xdrproc)
+{
+ struct iobuf *iob = NULL;
+ ssize_t retlen = -1;
+ ssize_t rsp_size = 0;
+
+ /* First, get the io buffer into which the reply in arg will
+ * be serialized.
+ */
+ rsp_size = xdr_sizeof (xdrproc, arg);
+ iob = iobuf_get2 (req->svc->ctx->iobuf_pool, rsp_size);
+ if (!iob) {
+ gf_msg ("glusterd", GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY,
+ "Failed to get iobuf");
+ goto ret;
+ }
+
+ iobuf_to_iovec (iob, outmsg);
+ /* Use the given serializer to translate the give C structure in arg
+ * to XDR format which will be written into the buffer in outmsg.
+ */
+ /* retlen is used to received the error since size_t is unsigned and we
+ * need -1 for error notification during encoding.
+ */
+ retlen = xdr_serialize_generic (*outmsg, arg, xdrproc);
+ if (retlen == -1) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_ENCODE_FAIL, "Failed to encode message");
+ goto ret;
+ }
+
+ outmsg->iov_len = retlen;
+ret:
+ if (retlen == -1) {
+ iobuf_unref (iob);
+ iob = NULL;
+ }
+
+ return iob;
+}
+
+int
+glusterd_submit_reply (rpcsvc_request_t *req, void *arg,
+ struct iovec *payload, int payloadcount,
+ struct iobref *iobref, xdrproc_t xdrproc)
+{
+ struct iobuf *iob = NULL;
+ int ret = -1;
+ struct iovec rsp = {0,};
+ char new_iobref = 0;
+
+ if (!req) {
+ GF_ASSERT (req);
+ goto out;
+ }
+
+ if (!iobref) {
+ iobref = iobref_new ();
+ if (!iobref) {
+ gf_msg ("glusterd", GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY, "out of memory");
+ goto out;
+ }
+
+ new_iobref = 1;
+ }
+
+ iob = glusterd_serialize_reply (req, arg, &rsp, xdrproc);
+ if (!iob) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_SERIALIZE_MSG_FAIL, "Failed to serialize reply");
+ } else {
+ iobref_add (iobref, iob);
+ }
+
+ ret = rpcsvc_submit_generic (req, &rsp, 1, payload, payloadcount,
+ iobref);
+
+ /* Now that we've done our job of handing the message to the RPC layer
+ * we can safely unref the iob in the hope that RPC layer must have
+ * ref'ed the iob on receiving into the txlist.
+ */
+ if (ret == -1) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_REPLY_SUBMIT_FAIL, "Reply submission failed");
+ goto out;
+ }
+
+ ret = 0;
+out:
+
+ if (new_iobref) {
+ iobref_unref (iobref);
+ }
+
+ if (iob)
+ iobuf_unref (iob);
+ return ret;
+}
+
+gf_boolean_t
+glusterd_check_volume_exists (char *volname)
+{
+ glusterd_volinfo_t *volinfo = NULL;
+
+ return (glusterd_volinfo_find (volname, &volinfo) == 0);
+}
+
+glusterd_volinfo_t *
+glusterd_volinfo_unref (glusterd_volinfo_t *volinfo)
+{
+ int refcnt = -1;
+
+ pthread_mutex_lock (&volinfo->reflock);
+ {
+ refcnt = --volinfo->refcnt;
+ }
+ pthread_mutex_unlock (&volinfo->reflock);
+
+ if (!refcnt) {
+ glusterd_volinfo_delete (volinfo);
+ return NULL;
+ }
+
+ return volinfo;
+}
+
+glusterd_volinfo_t *
+glusterd_volinfo_ref (glusterd_volinfo_t *volinfo)
+{
+ pthread_mutex_lock (&volinfo->reflock);
+ {
+ ++volinfo->refcnt;
+ }
+ pthread_mutex_unlock (&volinfo->reflock);
+
+ return volinfo;
+}
+
+int32_t
+glusterd_volinfo_new (glusterd_volinfo_t **volinfo)
+{
+ glusterd_volinfo_t *new_volinfo = NULL;
+ int32_t ret = -1;
+
+ GF_ASSERT (volinfo);
+
+ new_volinfo = GF_CALLOC (1, sizeof(*new_volinfo),
+ gf_gld_mt_glusterd_volinfo_t);
+
+ if (!new_volinfo)
+ goto out;
+
+ LOCK_INIT (&new_volinfo->lock);
+ CDS_INIT_LIST_HEAD (&new_volinfo->vol_list);
+ CDS_INIT_LIST_HEAD (&new_volinfo->snapvol_list);
+ CDS_INIT_LIST_HEAD (&new_volinfo->bricks);
+ CDS_INIT_LIST_HEAD (&new_volinfo->snap_volumes);
+
+ new_volinfo->dict = dict_new ();
+ if (!new_volinfo->dict) {
+ GF_FREE (new_volinfo);
+
+ goto out;
+ }
+
+ new_volinfo->gsync_slaves = dict_new ();
+ if (!new_volinfo->gsync_slaves) {
+ dict_unref (new_volinfo->dict);
+ GF_FREE (new_volinfo);
+ goto out;
+ }
+
+ new_volinfo->gsync_active_slaves = dict_new ();
+ if (!new_volinfo->gsync_active_slaves) {
+ dict_unref (new_volinfo->dict);
+ dict_unref (new_volinfo->gsync_slaves);
+ GF_FREE (new_volinfo);
+ goto out;
+ }
+
+ snprintf (new_volinfo->parent_volname, GD_VOLUME_NAME_MAX, "N/A");
+
+ new_volinfo->snap_max_hard_limit = GLUSTERD_SNAPS_MAX_HARD_LIMIT;
+
+ new_volinfo->xl = THIS;
+
+ new_volinfo->snapd.svc.build = glusterd_snapdsvc_build;
+ new_volinfo->snapd.svc.build (&(new_volinfo->snapd.svc));
+
+ pthread_mutex_init (&new_volinfo->reflock, NULL);
+ *volinfo = glusterd_volinfo_ref (new_volinfo);
+
+ ret = 0;
+
+out:
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* This function will create a new volinfo and then
+ * dup the entries from volinfo to the new_volinfo.
+ *
+ * @param volinfo volinfo which will be duplicated
+ * @param dup_volinfo new volinfo which will be created
+ * @param set_userauth if this true then auth info is also set
+ *
+ * @return 0 on success else -1
+ */
+int32_t
+glusterd_volinfo_dup (glusterd_volinfo_t *volinfo,
+ glusterd_volinfo_t **dup_volinfo,
+ gf_boolean_t set_userauth)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *new_volinfo = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_VALIDATE_OR_GOTO (this->name, volinfo, out);
+ GF_VALIDATE_OR_GOTO (this->name, dup_volinfo, out);
+
+ ret = glusterd_volinfo_new (&new_volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLINFO_SET_FAIL, "not able to create the "
+ "duplicate volinfo for the volume %s",
+ volinfo->volname);
+ goto out;
+ }
+
+ new_volinfo->type = volinfo->type;
+ new_volinfo->replica_count = volinfo->replica_count;
+ new_volinfo->arbiter_count = volinfo->arbiter_count;
+ new_volinfo->stripe_count = volinfo->stripe_count;
+ new_volinfo->disperse_count = volinfo->disperse_count;
+ new_volinfo->redundancy_count = volinfo->redundancy_count;
+ new_volinfo->dist_leaf_count = volinfo->dist_leaf_count;
+ new_volinfo->sub_count = volinfo->sub_count;
+ new_volinfo->subvol_count = volinfo->subvol_count;
+ new_volinfo->transport_type = volinfo->transport_type;
+ new_volinfo->brick_count = volinfo->brick_count;
+ new_volinfo->tier_info = volinfo->tier_info;
+ new_volinfo->quota_conf_version = volinfo->quota_conf_version;
+ new_volinfo->quota_xattr_version = volinfo->quota_xattr_version;
+ new_volinfo->snap_max_hard_limit = volinfo->snap_max_hard_limit;
+ new_volinfo->quota_conf_cksum = volinfo->quota_conf_cksum;
+
+ dict_copy (volinfo->dict, new_volinfo->dict);
+ dict_copy (volinfo->gsync_slaves, new_volinfo->gsync_slaves);
+ dict_copy (volinfo->gsync_active_slaves,
+ new_volinfo->gsync_active_slaves);
+ gd_update_volume_op_versions (new_volinfo);
+
+ if (set_userauth) {
+ glusterd_auth_set_username (new_volinfo,
+ volinfo->auth.username);
+ glusterd_auth_set_password (new_volinfo,
+ volinfo->auth.password);
+ }
+
+ *dup_volinfo = new_volinfo;
+ ret = 0;
+out:
+ if (ret && (NULL != new_volinfo)) {
+ (void) glusterd_volinfo_delete (new_volinfo);
+ }
+ return ret;
+}
+
+/* This function will duplicate brickinfo
+ *
+ * @param brickinfo Source brickinfo
+ * @param dup_brickinfo Destination brickinfo
+ *
+ * @return 0 on success else -1
+ */
+int32_t
+glusterd_brickinfo_dup (glusterd_brickinfo_t *brickinfo,
+ glusterd_brickinfo_t *dup_brickinfo)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_VALIDATE_OR_GOTO (this->name, brickinfo, out);
+ GF_VALIDATE_OR_GOTO (this->name, dup_brickinfo, out);
+
+ strcpy (dup_brickinfo->hostname, brickinfo->hostname);
+ strcpy (dup_brickinfo->path, brickinfo->path);
+ strcpy (dup_brickinfo->real_path, brickinfo->real_path);
+ strcpy (dup_brickinfo->device_path, brickinfo->device_path);
+ strcpy (dup_brickinfo->fstype, brickinfo->fstype);
+ strcpy (dup_brickinfo->mnt_opts, brickinfo->mnt_opts);
+ ret = gf_canonicalize_path (dup_brickinfo->path);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_CANONICALIZE_FAIL, "Failed to canonicalize "
+ "brick path");
+ goto out;
+ }
+ gf_uuid_copy (dup_brickinfo->uuid, brickinfo->uuid);
+
+ dup_brickinfo->port = brickinfo->port;
+ dup_brickinfo->rdma_port = brickinfo->rdma_port;
+ if (NULL != brickinfo->logfile) {
+ dup_brickinfo->logfile = gf_strdup (brickinfo->logfile);
+ if (NULL == dup_brickinfo->logfile) {
+ ret = -1;
+ goto out;
+ }
+ }
+ strcpy (dup_brickinfo->brick_id, brickinfo->brick_id);
+ strcpy (dup_brickinfo->mount_dir, brickinfo->mount_dir);
+ dup_brickinfo->status = brickinfo->status;
+ dup_brickinfo->snap_status = brickinfo->snap_status;
+out:
+ return ret;
+}
+int32_t
+glusterd_create_sub_tier_volinfo (glusterd_volinfo_t *volinfo,
+ glusterd_volinfo_t **dup_volinfo,
+ gf_boolean_t is_hot_tier,
+ const char *new_volname)
+{
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *brickinfo_dup = NULL;
+ gd_tier_info_t *tier_info = NULL;
+ int i = 0;
+ int ret = -1;
+
+ tier_info = &(volinfo->tier_info);
+
+ ret = glusterd_volinfo_dup (volinfo, dup_volinfo, _gf_true);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOL_OP_FAILED,
+ "Failed to create volinfo");
+ return ret;
+ }
+
+ gf_uuid_copy ((*dup_volinfo)->volume_id, volinfo->volume_id);
+ (*dup_volinfo)->is_snap_volume = volinfo->is_snap_volume;
+ (*dup_volinfo)->status = volinfo->status;
+ (*dup_volinfo)->snapshot = volinfo->snapshot;
+
+ memcpy (&(*dup_volinfo)->tier_info, &volinfo->tier_info,
+ sizeof (volinfo->tier_info));
+
+ strcpy ((*dup_volinfo)->volname, new_volname);
+
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ i++;
+
+ if (is_hot_tier) {
+ if (i > volinfo->tier_info.hot_brick_count)
+ break;
+ } else {
+ if (i <= volinfo->tier_info.hot_brick_count)
+ continue;
+ }
+
+ ret = glusterd_brickinfo_new (&brickinfo_dup);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_NEW_INFO_FAIL, "Failed to create "
+ "new brickinfo");
+ goto out;
+ }
+
+
+ glusterd_brickinfo_dup (brickinfo, brickinfo_dup);
+ cds_list_add_tail (&brickinfo_dup->brick_list,
+ &((*dup_volinfo)->bricks));
+ }
+
+ if (is_hot_tier) {
+ (*dup_volinfo)->type = tier_info->hot_type;
+ (*dup_volinfo)->replica_count = tier_info->hot_replica_count;
+ (*dup_volinfo)->brick_count = tier_info->hot_brick_count;
+ (*dup_volinfo)->dist_leaf_count =
+ glusterd_get_dist_leaf_count(*dup_volinfo);
+
+ } else {
+ (*dup_volinfo)->type = tier_info->cold_type;
+ (*dup_volinfo)->replica_count = tier_info->cold_replica_count;
+ (*dup_volinfo)->disperse_count = tier_info->cold_disperse_count;
+ (*dup_volinfo)->redundancy_count = tier_info->cold_redundancy_count;
+ (*dup_volinfo)->dist_leaf_count = tier_info->cold_dist_leaf_count;
+ (*dup_volinfo)->brick_count = tier_info->cold_brick_count;
+ }
+out:
+ if (ret && *dup_volinfo) {
+ glusterd_volinfo_delete (*dup_volinfo);
+ *dup_volinfo = NULL;
+ }
+
+ return ret;
+
+}
+
+/*
+ * gd_vol_is_geo_rep_active:
+ * This function checks for any running geo-rep session for
+ * the volume given.
+ *
+ * Return Value:
+ * _gf_true : If any running geo-rep session.
+ * _gf_false: If no running geo-rep session.
+ */
+
+gf_boolean_t
+gd_vol_is_geo_rep_active (glusterd_volinfo_t *volinfo)
+{
+ gf_boolean_t active = _gf_false;
+
+ GF_ASSERT (volinfo);
+
+ if (volinfo->gsync_active_slaves &&
+ volinfo->gsync_active_slaves->count > 0)
+ active = _gf_true;
+
+ return active;
+}
+
+void
+glusterd_auth_cleanup (glusterd_volinfo_t *volinfo) {
+
+ GF_ASSERT (volinfo);
+
+ GF_FREE (volinfo->auth.username);
+
+ GF_FREE (volinfo->auth.password);
+}
+
+char *
+glusterd_auth_get_username (glusterd_volinfo_t *volinfo) {
+
+ GF_ASSERT (volinfo);
+
+ return volinfo->auth.username;
+}
+
+char *
+glusterd_auth_get_password (glusterd_volinfo_t *volinfo) {
+
+ GF_ASSERT (volinfo);
+
+ return volinfo->auth.password;
+}
+
+int32_t
+glusterd_auth_set_username (glusterd_volinfo_t *volinfo, char *username) {
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (username);
+
+ volinfo->auth.username = gf_strdup (username);
+ return 0;
+}
+
+int32_t
+glusterd_auth_set_password (glusterd_volinfo_t *volinfo, char *password) {
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (password);
+
+ volinfo->auth.password = gf_strdup (password);
+ return 0;
+}
+
+int32_t
+glusterd_brickinfo_delete (glusterd_brickinfo_t *brickinfo)
+{
+ int32_t ret = -1;
+
+ GF_ASSERT (brickinfo);
+
+ cds_list_del_init (&brickinfo->brick_list);
+
+ GF_FREE (brickinfo->logfile);
+ GF_FREE (brickinfo);
+
+ ret = 0;
+
+ return ret;
+}
+
+int32_t
+glusterd_volume_brickinfos_delete (glusterd_volinfo_t *volinfo)
+{
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *tmp = NULL;
+ int32_t ret = 0;
+
+ GF_ASSERT (volinfo);
+
+ cds_list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks,
+ brick_list) {
+ ret = glusterd_brickinfo_delete (brickinfo);
+ if (ret)
+ goto out;
+ }
+
+out:
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_volinfo_remove (glusterd_volinfo_t *volinfo)
+{
+ cds_list_del_init (&volinfo->vol_list);
+ glusterd_volinfo_unref (volinfo);
+ return 0;
+}
+
+int32_t
+glusterd_volinfo_delete (glusterd_volinfo_t *volinfo)
+{
+ int32_t ret = -1;
+
+ GF_ASSERT (volinfo);
+
+ cds_list_del_init (&volinfo->vol_list);
+ cds_list_del_init (&volinfo->snapvol_list);
+
+ ret = glusterd_volume_brickinfos_delete (volinfo);
+ if (ret)
+ goto out;
+ if (volinfo->dict)
+ dict_unref (volinfo->dict);
+ if (volinfo->gsync_slaves)
+ dict_unref (volinfo->gsync_slaves);
+ if (volinfo->gsync_active_slaves)
+ dict_unref (volinfo->gsync_active_slaves);
+ GF_FREE (volinfo->logdir);
+ if (volinfo->rebal.dict)
+ dict_unref (volinfo->rebal.dict);
+
+ gf_store_handle_destroy (volinfo->quota_conf_shandle);
+
+ glusterd_auth_cleanup (volinfo);
+
+ pthread_mutex_destroy (&volinfo->reflock);
+ GF_FREE (volinfo);
+ ret = 0;
+
+out:
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_brickinfo_new (glusterd_brickinfo_t **brickinfo)
+{
+ glusterd_brickinfo_t *new_brickinfo = NULL;
+ int32_t ret = -1;
+
+ GF_ASSERT (brickinfo);
+
+ new_brickinfo = GF_CALLOC (1, sizeof(*new_brickinfo),
+ gf_gld_mt_glusterd_brickinfo_t);
+
+ if (!new_brickinfo)
+ goto out;
+
+ CDS_INIT_LIST_HEAD (&new_brickinfo->brick_list);
+
+ *brickinfo = new_brickinfo;
+
+ ret = 0;
+
+out:
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_get_next_available_brickid (glusterd_volinfo_t *volinfo)
+{
+ glusterd_brickinfo_t *brickinfo = NULL;
+ char *token = NULL;
+ int brickid = 0;
+ int max_brickid = -1;
+ int ret = -1;
+
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ token = strrchr (brickinfo->brick_id, '-');
+ ret = gf_string2int32 (++token, &brickid);
+ if (ret < 0) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_ID_GEN_FAILED,
+ "Unable to generate brick ID");
+ return ret;
+ }
+ if (brickid > max_brickid)
+ max_brickid = brickid;
+ }
+
+ return max_brickid + 1 ;
+}
+
+int32_t
+glusterd_resolve_brick (glusterd_brickinfo_t *brickinfo)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (brickinfo);
+
+ ret = glusterd_hostname_to_uuid (brickinfo->hostname, brickinfo->uuid);
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_get_brick_mount_dir (char *brickpath, char *hostname, char *mount_dir)
+{
+ char *mnt_pt = NULL;
+ char *brick_dir = NULL;
+ int32_t ret = -1;
+ uuid_t brick_uuid = {0, };
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (brickpath);
+ GF_ASSERT (hostname);
+ GF_ASSERT (mount_dir);
+
+ ret = glusterd_hostname_to_uuid (hostname, brick_uuid);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_HOSTNAME_TO_UUID_FAIL,
+ "Failed to convert hostname %s to uuid",
+ hostname);
+ goto out;
+ }
+
+ if (!gf_uuid_compare (brick_uuid, MY_UUID)) {
+ ret = glusterd_get_brick_root (brickpath, &mnt_pt);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_BRICKPATH_ROOT_GET_FAIL,
+ "Could not get the root of the brick path %s",
+ brickpath);
+ goto out;
+ }
+
+ if (strncmp (brickpath, mnt_pt, strlen(mnt_pt))) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_BRKPATH_MNTPNT_MISMATCH,
+ "brick: %s brick mount: %s",
+ brickpath, mnt_pt);
+ ret = -1;
+ goto out;
+ }
+
+ brick_dir = &brickpath[strlen (mnt_pt)];
+ brick_dir++;
+
+ snprintf (mount_dir, PATH_MAX, "/%s", brick_dir);
+ }
+
+out:
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_brickinfo_new_from_brick (char *brick,
+ glusterd_brickinfo_t **brickinfo,
+ gf_boolean_t construct_real_path,
+ char **op_errstr)
+{
+ char *hostname = NULL;
+ char *path = NULL;
+ char *tmp_host = NULL;
+ char *tmp_path = NULL;
+ char *vg = NULL;
+ int32_t ret = -1;
+ glusterd_brickinfo_t *new_brickinfo = NULL;
+ xlator_t *this = NULL;
+ char abspath[PATH_MAX] = {0};
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (brick);
+ GF_ASSERT (brickinfo);
+
+ tmp_host = gf_strdup (brick);
+ if (tmp_host && !get_host_name (tmp_host, &hostname))
+ goto out;
+ tmp_path = gf_strdup (brick);
+ if (tmp_path && !get_path_name (tmp_path, &path))
+ goto out;
+
+ GF_ASSERT (hostname);
+ GF_ASSERT (path);
+
+ ret = glusterd_brickinfo_new (&new_brickinfo);
+ if (ret)
+ goto out;
+
+#ifdef HAVE_BD_XLATOR
+ vg = strchr (path, '?');
+ /* ? is used as a delimiter for vg */
+ if (vg) {
+ strncpy (new_brickinfo->vg, vg + 1, PATH_MAX - 1);
+ *vg = '\0';
+ }
+ new_brickinfo->caps = CAPS_BD;
+#else
+ vg = NULL; /* Avoid compiler warnings when BD not enabled */
+#endif
+ ret = gf_canonicalize_path (path);
+ if (ret)
+ goto out;
+ strncpy (new_brickinfo->hostname, hostname, 1024);
+ strncpy (new_brickinfo->path, path, 1024);
+
+ if (construct_real_path) {
+ ret = glusterd_hostname_to_uuid (new_brickinfo->hostname,
+ new_brickinfo->uuid);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_HOSTNAME_TO_UUID_FAIL,
+ "Failed to convert hostname %s to uuid",
+ hostname);
+ if (op_errstr)
+ gf_asprintf (op_errstr, "Host %s is not in \' "
+ "Peer in Cluster\' state",
+ new_brickinfo->hostname);
+ goto out;
+ }
+ }
+
+ if (construct_real_path &&
+ !gf_uuid_compare (new_brickinfo->uuid, MY_UUID)
+ && new_brickinfo->real_path[0] == '\0') {
+ if (!realpath (new_brickinfo->path, abspath)) {
+ /* ENOENT indicates that brick path has not been created
+ * which is a valid scenario */
+ if (errno != ENOENT) {
+ gf_msg (this->name, GF_LOG_CRITICAL, errno,
+ GD_MSG_BRICKINFO_CREATE_FAIL, "realpath"
+ " () failed for brick %s. The "
+ "underlying filesystem may be in bad "
+ "state", new_brickinfo->path);
+ ret = -1;
+ goto out;
+ }
+ }
+ strncpy (new_brickinfo->real_path, abspath, strlen(abspath));
+ }
+
+ *brickinfo = new_brickinfo;
+
+ ret = 0;
+out:
+ GF_FREE (tmp_host);
+ if (tmp_host)
+ GF_FREE (tmp_path);
+
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+static gf_boolean_t
+_is_prefix (char *str1, char *str2)
+{
+ GF_ASSERT (str1);
+ GF_ASSERT (str2);
+
+ int i = 0;
+ int len1 = 0;
+ int len2 = 0;
+ int small_len = 0;
+ char *bigger = NULL;
+ gf_boolean_t prefix = _gf_true;
+
+ len1 = strlen (str1);
+ len2 = strlen (str2);
+ small_len = min (len1, len2);
+ for (i = 0; i < small_len; i++) {
+ if (str1[i] != str2[i]) {
+ prefix = _gf_false;
+ break;
+ }
+ }
+
+ if (len1 < len2)
+ bigger = str2;
+
+ else if (len1 > len2)
+ bigger = str1;
+
+ else
+ return prefix;
+
+ if (bigger[small_len] != '/')
+ prefix = _gf_false;
+
+ return prefix;
+}
+
+/* Checks if @path is available in the peer identified by @uuid
+ * 'availability' is determined by querying current state of volumes
+ * in the cluster. */
+gf_boolean_t
+glusterd_is_brickpath_available (uuid_t uuid, char *path)
+{
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ gf_boolean_t available = _gf_false;
+ char tmp_path[PATH_MAX+1] = {0};
+
+ priv = THIS->private;
+
+ strncpy (tmp_path, path, PATH_MAX);
+ /* path may not yet exist */
+ if (!realpath (path, tmp_path)) {
+ if (errno != ENOENT) {
+ goto out;
+ }
+ /* When realpath(3) fails, tmp_path is undefined. */
+ strncpy(tmp_path,path,PATH_MAX);
+ }
+
+ cds_list_for_each_entry (volinfo, &priv->volumes, vol_list) {
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks,
+ brick_list) {
+ if (gf_uuid_compare (uuid, brickinfo->uuid))
+ continue;
+ if (_is_prefix (brickinfo->real_path, tmp_path))
+ goto out;
+ }
+ }
+ available = _gf_true;
+out:
+ return available;
+}
+
+#ifdef HAVE_BD_XLATOR
+/*
+ * Sets the tag of the format "trusted.glusterfs.volume-id:<uuid>" in
+ * the brick VG. It is used to avoid using same VG for another brick.
+ * @volume-id - gfid, @brick - brick info, @msg - Error message returned
+ * to the caller
+ */
+int
+glusterd_bd_set_vg_tag (unsigned char *volume_id, glusterd_brickinfo_t *brick,
+ char *msg, int msg_size)
+{
+ lvm_t handle = NULL;
+ vg_t vg = NULL;
+ char *uuid = NULL;
+ int ret = -1;
+
+ gf_asprintf (&uuid, "%s:%s", GF_XATTR_VOL_ID_KEY,
+ uuid_utoa (volume_id));
+ if (!uuid) {
+ snprintf (msg, sizeof(*msg), "Could not allocate memory "
+ "for tag");
+ return -1;
+ }
+
+ handle = lvm_init (NULL);
+ if (!handle) {
+ snprintf (msg, sizeof(*msg), "lvm_init failed");
+ goto out;
+ }
+
+ vg = lvm_vg_open (handle, brick->vg, "w", 0);
+ if (!vg) {
+ snprintf (msg, sizeof(*msg), "Could not open VG %s",
+ brick->vg);
+ goto out;
+ }
+
+ if (lvm_vg_add_tag (vg, uuid) < 0) {
+ snprintf (msg, sizeof(*msg), "Could not set tag %s for "
+ "VG %s", uuid, brick->vg);
+ goto out;
+ }
+ lvm_vg_write (vg);
+ ret = 0;
+out:
+ GF_FREE (uuid);
+
+ if (vg)
+ lvm_vg_close (vg);
+ if (handle)
+ lvm_quit (handle);
+
+ return ret;
+}
+#endif
+
+
+int
+glusterd_validate_and_create_brickpath (glusterd_brickinfo_t *brickinfo,
+ uuid_t volume_id, char **op_errstr,
+ gf_boolean_t is_force)
+{
+ int ret = -1;
+ char parentdir[PATH_MAX] = {0,};
+ struct stat parent_st = {0,};
+ struct stat brick_st = {0,};
+ struct stat root_st = {0,};
+ char msg[2048] = {0,};
+ gf_boolean_t is_created = _gf_false;
+
+ ret = sys_mkdir (brickinfo->path, 0777);
+ if (ret) {
+ if (errno != EEXIST) {
+ snprintf (msg, sizeof (msg), "Failed to create brick "
+ "directory for brick %s:%s. Reason : %s ",
+ brickinfo->hostname, brickinfo->path,
+ strerror (errno));
+ goto out;
+ }
+ } else {
+ is_created = _gf_true;
+ }
+
+ ret = sys_lstat (brickinfo->path, &brick_st);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "lstat failed on %s. Reason : %s",
+ brickinfo->path, strerror (errno));
+ goto out;
+ }
+
+ if ((!is_created) && (!S_ISDIR (brick_st.st_mode))) {
+ snprintf (msg, sizeof (msg), "The provided path %s which is "
+ "already present, is not a directory",
+ brickinfo->path);
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (parentdir, sizeof (parentdir), "%s/..", brickinfo->path);
+
+ ret = sys_lstat ("/", &root_st);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "lstat failed on /. Reason : %s",
+ strerror (errno));
+ goto out;
+ }
+
+ ret = sys_lstat (parentdir, &parent_st);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "lstat failed on %s. Reason : %s",
+ parentdir, strerror (errno));
+ goto out;
+ }
+
+ if (!is_force) {
+ if (brick_st.st_dev != parent_st.st_dev) {
+ snprintf (msg, sizeof (msg), "The brick %s:%s is a "
+ "mount point. Please create a sub-directory "
+ "under the mount point and use that as the "
+ "brick directory. Or use 'force' at the end "
+ "of the command if you want to override this "
+ "behavior.", brickinfo->hostname,
+ brickinfo->path);
+ ret = -1;
+ goto out;
+ }
+ else if (parent_st.st_dev == root_st.st_dev) {
+ snprintf (msg, sizeof (msg), "The brick %s:%s "
+ "is being created in the root partition. It "
+ "is recommended that you don't use the "
+ "system's root partition for storage backend."
+ " Or use 'force' at the end of the command if"
+ " you want to override this behavior.",
+ brickinfo->hostname, brickinfo->path);
+ ret = -1;
+ goto out;
+ }
+ }
+
+#ifdef HAVE_BD_XLATOR
+ if (brickinfo->vg[0]) {
+ ret = glusterd_bd_set_vg_tag (volume_id, brickinfo, msg,
+ sizeof(msg));
+ if (ret)
+ goto out;
+ }
+#endif
+ ret = glusterd_check_and_set_brick_xattr (brickinfo->hostname,
+ brickinfo->path, volume_id,
+ op_errstr, is_force);
+ if (ret)
+ goto out;
+
+ ret = 0;
+
+out:
+ if (ret && is_created)
+ sys_rmdir (brickinfo->path);
+ if (ret && !*op_errstr && msg[0] != '\0')
+ *op_errstr = gf_strdup (msg);
+
+ return ret;
+}
+
+int32_t
+glusterd_volume_brickinfo_get (uuid_t uuid, char *hostname, char *path,
+ glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t **brickinfo)
+{
+ glusterd_brickinfo_t *brickiter = NULL;
+ uuid_t peer_uuid = {0};
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ if (uuid) {
+ gf_uuid_copy (peer_uuid, uuid);
+ } else {
+ ret = glusterd_hostname_to_uuid (hostname, peer_uuid);
+ if (ret)
+ goto out;
+ }
+ ret = -1;
+ cds_list_for_each_entry (brickiter, &volinfo->bricks, brick_list) {
+
+ if ((gf_uuid_is_null (brickiter->uuid)) &&
+ (glusterd_resolve_brick (brickiter) != 0))
+ goto out;
+ if (gf_uuid_compare (peer_uuid, brickiter->uuid))
+ continue;
+
+ if (strcmp (brickiter->path, path) == 0) {
+ gf_msg_debug (this->name, 0, LOGSTR_FOUND_BRICK,
+ brickiter->hostname, brickiter->path,
+ volinfo->volname);
+ ret = 0;
+ if (brickinfo)
+ *brickinfo = brickiter;
+ break;
+ }
+ }
+
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_volume_brickinfo_get_by_brick (char *brick,
+ glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t **brickinfo,
+ gf_boolean_t construct_real_path)
+{
+ int32_t ret = -1;
+ glusterd_brickinfo_t *tmp_brickinfo = NULL;
+
+ GF_ASSERT (brick);
+ GF_ASSERT (volinfo);
+
+ ret = glusterd_brickinfo_new_from_brick (brick, &tmp_brickinfo,
+ construct_real_path, NULL);
+ if (ret)
+ goto out;
+
+ ret = glusterd_volume_brickinfo_get (NULL, tmp_brickinfo->hostname,
+ tmp_brickinfo->path, volinfo,
+ brickinfo);
+ (void) glusterd_brickinfo_delete (tmp_brickinfo);
+out:
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+gf_boolean_t
+glusterd_is_brick_decommissioned (glusterd_volinfo_t *volinfo, char *hostname,
+ char *path)
+{
+ gf_boolean_t decommissioned = _gf_false;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int ret = -1;
+
+ ret = glusterd_volume_brickinfo_get (NULL, hostname, path, volinfo,
+ &brickinfo);
+ if (ret)
+ goto out;
+ decommissioned = brickinfo->decommissioned;
+out:
+ return decommissioned;
+}
+
+int
+glusterd_volinfo_find_by_volume_id (uuid_t volume_id, glusterd_volinfo_t **volinfo)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *voliter = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ if (!volume_id)
+ return -1;
+
+ this = THIS;
+ priv = this->private;
+
+ cds_list_for_each_entry (voliter, &priv->volumes, vol_list) {
+ if (gf_uuid_compare (volume_id, voliter->volume_id))
+ continue;
+ *volinfo = voliter;
+ ret = 0;
+ gf_msg_debug (this->name, 0, "Volume %s found",
+ voliter->volname);
+ break;
+ }
+ return ret;
+}
+
+int32_t
+glusterd_volinfo_find (char *volname, glusterd_volinfo_t **volinfo)
+{
+ glusterd_volinfo_t *tmp_volinfo = NULL;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ GF_ASSERT (volname);
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ cds_list_for_each_entry (tmp_volinfo, &priv->volumes, vol_list) {
+ if (!strcmp (tmp_volinfo->volname, volname)) {
+ gf_msg_debug (this->name, 0, "Volume %s found",
+ volname);
+ ret = 0;
+ *volinfo = tmp_volinfo;
+ break;
+ }
+ }
+
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_service_stop (const char *service, char *pidfile, int sig,
+ gf_boolean_t force_kill)
+{
+ int32_t ret = -1;
+ pid_t pid = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ if (!gf_is_service_running (pidfile, &pid)) {
+ ret = 0;
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_ALREADY_STOPPED,
+ "%s already stopped", service);
+ goto out;
+ }
+ gf_msg_debug (this->name, 0, "Stopping gluster %s running in pid: "
+ "%d", service, pid);
+
+ ret = kill (pid, sig);
+ if (ret) {
+ switch (errno) {
+ case ESRCH:
+ gf_msg_debug (this->name, 0, "%s is already stopped",
+ service);
+ ret = 0;
+ goto out;
+ default:
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_SVC_KILL_FAIL, "Unable to kill %s "
+ "service, reason:%s", service,
+ strerror (errno));
+ }
+ }
+ if (!force_kill)
+ goto out;
+
+ sleep (1);
+ if (gf_is_service_running (pidfile, NULL)) {
+ ret = kill (pid, SIGKILL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_PID_KILL_FAIL, "Unable to kill pid:%d, "
+ "reason:%s", pid, strerror(errno));
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+glusterd_service_stop_nolock (const char *service, char *pidfile, int sig,
+ gf_boolean_t force_kill)
+{
+ int32_t ret = -1;
+ pid_t pid = -1;
+ xlator_t *this = NULL;
+ FILE *file = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ file = fopen (pidfile, "r+");
+ if (file) {
+ ret = fscanf (file, "%d", &pid);
+ if (ret <= 0) {
+ gf_msg_debug (this->name, 0,
+ "Unable to read pidfile: %s", pidfile);
+ goto out;
+ }
+ }
+
+ if (kill (pid, 0) < 0) {
+ ret = 0;
+ gf_msg_debug (this->name, 0, "%s process not running: (%d) %s",
+ service, pid, strerror (errno));
+ goto out;
+ }
+ gf_msg_debug (this->name, 0, "Stopping gluster %s service running with "
+ "pid: %d", service, pid);
+
+ ret = kill (pid, sig);
+ if (ret) {
+ switch (errno) {
+ case ESRCH:
+ gf_msg_debug (this->name, 0, "%s is already stopped",
+ service);
+ ret = 0;
+ goto out;
+ default:
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_SVC_KILL_FAIL, "Unable to kill %s "
+ "service, reason:%s", service,
+ strerror (errno));
+ }
+ }
+ if (!force_kill)
+ goto out;
+
+ sleep (1);
+ if (kill(pid, 0) == 0) {
+ ret = kill (pid, SIGKILL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_PID_KILL_FAIL, "Unable to kill pid:%d, "
+ "reason:%s", pid, strerror(errno));
+ goto out;
+ }
+ }
+
+ ret = 0;
+
+out:
+ if (file)
+ fclose (file);
+
+ return ret;
+}
+void
+glusterd_set_socket_filepath (char *sock_filepath, char *sockpath, size_t len)
+{
+ char md5_sum[MD5_DIGEST_LENGTH*2+1] = {0,};
+
+ md5_wrapper ((unsigned char *) sock_filepath, strlen(sock_filepath), md5_sum);
+ snprintf (sockpath, len, "%s/%s.socket", GLUSTERD_SOCK_DIR, md5_sum);
+}
+
+void
+glusterd_set_brick_socket_filepath (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ char *sockpath, size_t len)
+{
+ char export_path[PATH_MAX] = {0,};
+ char sock_filepath[PATH_MAX] = {0,};
+ char volume_dir[PATH_MAX] = {0,};
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ int expected_file_len = 0;
+
+ expected_file_len = strlen (GLUSTERD_SOCK_DIR) + strlen ("/") +
+ MD5_DIGEST_LENGTH*2 + strlen (".socket") + 1;
+ GF_ASSERT (len >= expected_file_len);
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+
+ GLUSTERD_GET_VOLUME_DIR (volume_dir, volinfo, priv);
+ GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, export_path);
+ snprintf (sock_filepath, PATH_MAX, "%s/run/%s-%s",
+ volume_dir, brickinfo->hostname, export_path);
+
+ glusterd_set_socket_filepath (sock_filepath, sockpath, len);
+}
+
+/* connection happens only if it is not aleady connected,
+ * reconnections are taken care by rpc-layer
+ */
+int32_t
+glusterd_brick_connect (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo, char *socketpath)
+{
+ int ret = 0;
+ char volume_id_str[64];
+ char *brickid = NULL;
+ dict_t *options = NULL;
+ struct rpc_clnt *rpc = NULL;
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (brickinfo);
+ GF_ASSERT (socketpath);
+
+ if (brickinfo->rpc == NULL) {
+ /* Setting frame-timeout to 10mins (600seconds).
+ * Unix domain sockets ensures that the connection is reliable.
+ * The default timeout of 30mins used for unreliable network
+ * connections is too long for unix domain socket connections.
+ */
+ ret = rpc_transport_unix_options_build (&options, socketpath,
+ 600);
+ if (ret)
+ goto out;
+
+ uuid_utoa_r (volinfo->volume_id, volume_id_str);
+ ret = gf_asprintf (&brickid, "%s:%s:%s", volume_id_str,
+ brickinfo->hostname, brickinfo->path);
+ if (ret < 0)
+ goto out;
+
+ ret = glusterd_rpc_create (&rpc, options,
+ glusterd_brick_rpc_notify,
+ brickid);
+ if (ret) {
+ GF_FREE (brickid);
+ goto out;
+ }
+ brickinfo->rpc = rpc;
+ }
+out:
+
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+static int
+_mk_rundir_p (glusterd_volinfo_t *volinfo)
+{
+ char voldir[PATH_MAX] = {0,};
+ char rundir[PATH_MAX] = {0,};
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ int ret = -1;
+
+ this = THIS;
+ priv = this->private;
+ GLUSTERD_GET_VOLUME_DIR (voldir, volinfo, priv);
+ snprintf (rundir, sizeof (rundir)-1, "%s/run", voldir);
+ ret = mkdir_p (rundir, 0777, _gf_true);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_CREATE_DIR_FAILED, "Failed to create rundir");
+ return ret;
+}
+
+int32_t
+glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ gf_boolean_t wait)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ char pidfile[PATH_MAX+1] = {0,};
+ char volfile[PATH_MAX] = {0,};
+ runner_t runner = {0,};
+ char exp_path[PATH_MAX] = {0,};
+ char logfile[PATH_MAX] = {0,};
+ int port = 0;
+ int rdma_port = 0;
+ char *bind_address = NULL;
+ char socketpath[PATH_MAX] = {0};
+ char glusterd_uuid[1024] = {0,};
+ char valgrind_logfile[PATH_MAX] = {0};
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (brickinfo);
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ if (brickinfo->snap_status == -1) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_SNAPSHOT_PENDING,
+ "Snapshot is pending on %s:%s. "
+ "Hence not starting the brick",
+ brickinfo->hostname,
+ brickinfo->path);
+ ret = 0;
+ goto out;
+ }
+
+ ret = _mk_rundir_p (volinfo);
+ if (ret)
+ goto out;
+
+ glusterd_set_brick_socket_filepath (volinfo, brickinfo, socketpath,
+ sizeof (socketpath));
+
+ GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, priv);
+ if (gf_is_service_running (pidfile, NULL))
+ goto connect;
+
+ port = brickinfo->port;
+ if (!port)
+ port = pmap_registry_alloc (THIS);
+
+ /* Build the exp_path, before starting the glusterfsd even in
+ valgrind mode. Otherwise all the glusterfsd processes start
+ writing the valgrind log to the same file.
+ */
+ GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, exp_path);
+
+retry:
+ runinit (&runner);
+
+ if (priv->valgrind) {
+ /* Run bricks with valgrind */
+ if (volinfo->logdir) {
+ snprintf (valgrind_logfile, PATH_MAX,
+ "%s/valgrind-%s-%s.log",
+ volinfo->logdir,
+ volinfo->volname, exp_path);
+ } else {
+ snprintf (valgrind_logfile, PATH_MAX,
+ "%s/bricks/valgrind-%s-%s.log",
+ DEFAULT_LOG_FILE_DIRECTORY,
+ volinfo->volname, exp_path);
+ }
+
+ runner_add_args (&runner, "valgrind", "--leak-check=full",
+ "--trace-children=yes", "--track-origins=yes",
+ NULL);
+ runner_argprintf (&runner, "--log-file=%s", valgrind_logfile);
+ }
+
+ if (volinfo->is_snap_volume) {
+ snprintf (volfile, PATH_MAX,"/%s/%s/%s.%s.%s",
+ GLUSTERD_VOL_SNAP_DIR_PREFIX,
+ volinfo->snapshot->snapname, volinfo->volname,
+ brickinfo->hostname, exp_path);
+ } else {
+ snprintf (volfile, PATH_MAX, "%s.%s.%s", volinfo->volname,
+ brickinfo->hostname, exp_path);
+ }
+
+ if (volinfo->logdir) {
+ snprintf (logfile, PATH_MAX, "%s/%s.log",
+ volinfo->logdir, exp_path);
+ } else {
+ snprintf (logfile, PATH_MAX, "%s/bricks/%s.log",
+ DEFAULT_LOG_FILE_DIRECTORY, exp_path);
+ }
+ if (!brickinfo->logfile)
+ brickinfo->logfile = gf_strdup (logfile);
+
+ (void) snprintf (glusterd_uuid, 1024, "*-posix.glusterd-uuid=%s",
+ uuid_utoa (MY_UUID));
+ runner_add_args (&runner, SBIN_DIR"/glusterfsd",
+ "-s", brickinfo->hostname, "--volfile-id", volfile,
+ "-p", pidfile, "-S", socketpath,
+ "--brick-name", brickinfo->path,
+ "-l", brickinfo->logfile,
+ "--xlator-option", glusterd_uuid,
+ NULL);
+
+ runner_add_arg (&runner, "--brick-port");
+ if (volinfo->transport_type != GF_TRANSPORT_BOTH_TCP_RDMA) {
+ runner_argprintf (&runner, "%d", port);
+ } else {
+ rdma_port = brickinfo->rdma_port;
+ if (!rdma_port)
+ rdma_port = pmap_registry_alloc (THIS);
+ runner_argprintf (&runner, "%d,%d", port, rdma_port);
+ runner_add_arg (&runner, "--xlator-option");
+ runner_argprintf (&runner, "%s-server.transport.rdma.listen-port=%d",
+ volinfo->volname, rdma_port);
+ }
+
+ runner_add_arg (&runner, "--xlator-option");
+ runner_argprintf (&runner, "%s-server.listen-port=%d",
+ volinfo->volname, port);
+
+ if (dict_get_str (this->options, "transport.socket.bind-address",
+ &bind_address) == 0) {
+ runner_add_arg (&runner, "--xlator-option");
+ runner_argprintf (&runner, "transport.socket.bind-address=%s",
+ bind_address);
+ }
+
+ if (volinfo->transport_type == GF_TRANSPORT_RDMA)
+ runner_argprintf (&runner, "--volfile-server-transport=rdma");
+ else if (volinfo->transport_type == GF_TRANSPORT_BOTH_TCP_RDMA)
+ runner_argprintf (&runner,
+ "--volfile-server-transport=socket,rdma");
+
+ if (volinfo->memory_accounting)
+ runner_add_arg (&runner, "--mem-accounting");
+
+ runner_log (&runner, "", 0, "Starting GlusterFS");
+ if (wait) {
+ synclock_unlock (&priv->big_lock);
+ ret = runner_run (&runner);
+ synclock_lock (&priv->big_lock);
+
+ if (ret == -EADDRINUSE) {
+ /* retry after getting a new port */
+ gf_msg (this->name, GF_LOG_WARNING, -ret,
+ GD_MSG_SRC_BRICK_PORT_UNAVAIL,
+ "Port %d is used by other process", port);
+
+ port = pmap_registry_alloc (this);
+ if (!port) {
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_NO_FREE_PORTS,
+ "Couldn't allocate a port");
+ ret = -1;
+ goto out;
+ }
+ gf_msg (this->name, GF_LOG_NOTICE, 0,
+ GD_MSG_RETRY_WITH_NEW_PORT,
+ "Retrying to start brick %s with new port %d",
+ brickinfo->path, port);
+ goto retry;
+ }
+ } else {
+ ret = runner_run_nowait (&runner);
+ }
+
+ if (ret)
+ goto out;
+
+ brickinfo->port = port;
+ brickinfo->rdma_port = rdma_port;
+
+connect:
+ ret = glusterd_brick_connect (volinfo, brickinfo, socketpath);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_DISCONNECTED,
+ "Failed to connect to brick %s:%s on %s",
+ brickinfo->hostname, brickinfo->path, socketpath);
+ goto out;
+ }
+out:
+ return ret;
+}
+
+int32_t
+glusterd_brick_unlink_socket_file (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo)
+{
+ char path[PATH_MAX] = {0,};
+ char socketpath[PATH_MAX] = {0};
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (brickinfo);
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GLUSTERD_GET_VOLUME_DIR (path, volinfo, priv);
+ glusterd_set_brick_socket_filepath (volinfo, brickinfo, socketpath,
+ sizeof (socketpath));
+
+ return glusterd_unlink_file (socketpath);
+}
+
+int32_t
+glusterd_brick_disconnect (glusterd_brickinfo_t *brickinfo)
+{
+ rpc_clnt_t *rpc = NULL;
+ glusterd_conf_t *priv = THIS->private;
+
+ GF_ASSERT (brickinfo);
+
+ if (!brickinfo) {
+ gf_msg_callingfn ("glusterd", GF_LOG_WARNING, EINVAL,
+ GD_MSG_BRICK_NOT_FOUND, "!brickinfo");
+ return -1;
+ }
+
+ rpc = brickinfo->rpc;
+ brickinfo->rpc = NULL;
+
+ if (rpc) {
+ glusterd_rpc_clnt_unref (priv, rpc);
+ }
+
+ return 0;
+}
+
+int32_t
+glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ gf_boolean_t del_brick)
+{
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ char pidfile[PATH_MAX] = {0,};
+ int ret = 0;
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (brickinfo);
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ if (del_brick)
+ cds_list_del_init (&brickinfo->brick_list);
+
+ if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+ (void) glusterd_brick_disconnect (brickinfo);
+ GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, priv);
+ ret = glusterd_service_stop ("brick", pidfile, SIGTERM, _gf_false);
+ if (ret == 0) {
+ glusterd_set_brick_status (brickinfo, GF_BRICK_STOPPED);
+ (void) glusterd_brick_unlink_socket_file (volinfo, brickinfo);
+ }
+ }
+
+ if (del_brick)
+ glusterd_delete_brick (volinfo, brickinfo);
+
+ return ret;
+}
+
+/* Free LINE[0..N-1] and then the LINE buffer. */
+static void
+free_lines (char **line, size_t n)
+{
+ size_t i;
+ for (i = 0; i < n; i++)
+ GF_FREE (line[i]);
+ GF_FREE (line);
+}
+
+char **
+glusterd_readin_file (const char *filepath, int *line_count)
+{
+ int ret = -1;
+ int n = 8;
+ int counter = 0;
+ char buffer[PATH_MAX + 256] = {0};
+ char **lines = NULL;
+ FILE *fp = NULL;
+ void *p;
+
+ fp = fopen (filepath, "r");
+ if (!fp)
+ goto out;
+
+ lines = GF_CALLOC (1, n * sizeof (*lines), gf_gld_mt_charptr);
+ if (!lines)
+ goto out;
+
+ for (counter = 0; fgets (buffer, sizeof (buffer), fp); counter++) {
+
+ if (counter == n-1) {
+ n *= 2;
+ p = GF_REALLOC (lines, n * sizeof (char *));
+ if (!p) {
+ free_lines (lines, n/2);
+ lines = NULL;
+ goto out;
+ }
+ lines = p;
+ }
+
+ lines[counter] = gf_strdup (buffer);
+ }
+
+ lines[counter] = NULL;
+ /* Reduce allocation to minimal size. */
+ p = GF_REALLOC (lines, (counter + 1) * sizeof (char *));
+ if (!p) {
+ free_lines (lines, counter);
+ lines = NULL;
+ goto out;
+ }
+ lines = p;
+
+ *line_count = counter;
+ ret = 0;
+
+ out:
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_ERROR, errno,
+ GD_MSG_READIN_FILE_FAILED, "%s", strerror (errno));
+ if (fp)
+ fclose (fp);
+
+ return lines;
+}
+
+int
+glusterd_compare_lines (const void *a, const void *b) {
+
+ return strcmp(* (char * const *) a, * (char * const *) b);
+}
+
+int
+glusterd_sort_and_redirect (const char *src_filepath, int dest_fd)
+{
+ int ret = -1;
+ int line_count = 0;
+ int counter = 0;
+ char **lines = NULL;
+
+
+ if (!src_filepath || dest_fd < 0)
+ goto out;
+
+ lines = glusterd_readin_file (src_filepath, &line_count);
+ if (!lines)
+ goto out;
+
+ qsort (lines, line_count, sizeof (*lines), glusterd_compare_lines);
+
+ for (counter = 0; lines[counter]; counter++) {
+
+ ret = sys_write (dest_fd, lines[counter],
+ strlen (lines[counter]));
+ if (ret < 0)
+ goto out;
+
+ GF_FREE (lines[counter]);
+ }
+
+ ret = 0;
+ out:
+ GF_FREE (lines);
+
+ return ret;
+}
+
+int
+glusterd_volume_compute_cksum (glusterd_volinfo_t *volinfo, char *cksum_path,
+ char *filepath, gf_boolean_t is_quota_conf,
+ uint32_t *cs)
+{
+ int32_t ret = -1;
+ uint32_t cksum = 0;
+ int fd = -1;
+ int sort_fd = 0;
+ char sort_filepath[PATH_MAX] = {0};
+ char *cksum_path_final = NULL;
+ char buf[4096] = {0,};
+ gf_boolean_t unlink_sortfile = _gf_false;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ mode_t orig_umask = 0;
+
+ GF_ASSERT (volinfo);
+ this = THIS;
+ priv = THIS->private;
+ GF_ASSERT (priv);
+
+ fd = open (cksum_path, O_RDWR | O_APPEND | O_CREAT| O_TRUNC, 0600);
+
+ if (-1 == fd) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED, "Unable to open %s,"
+ " errno: %d", cksum_path, errno);
+ ret = -1;
+ goto out;
+ }
+
+ if (!is_quota_conf) {
+ snprintf (sort_filepath, sizeof (sort_filepath),
+ "/tmp/%s.XXXXXX", volinfo->volname);
+
+ orig_umask = umask(S_IRWXG | S_IRWXO);
+ sort_fd = mkstemp (sort_filepath);
+ umask(orig_umask);
+ if (sort_fd < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED, "Could not generate "
+ "temp file, reason: %s for volume: %s",
+ strerror (errno), volinfo->volname);
+ goto out;
+ } else {
+ unlink_sortfile = _gf_true;
+ }
+
+ /* sort the info file, result in sort_filepath */
+
+ ret = glusterd_sort_and_redirect (filepath, sort_fd);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_FILE_OP_FAILED, "sorting info file "
+ "failed");
+ goto out;
+ }
+
+ ret = sys_close (sort_fd);
+ if (ret)
+ goto out;
+ }
+
+ cksum_path_final = is_quota_conf ? filepath : sort_filepath;
+
+ ret = get_checksum_for_path (cksum_path_final, &cksum);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_CKSUM_GET_FAIL, "unable to get "
+ "checksum for path: %s", cksum_path_final);
+ goto out;
+ }
+ if (!is_quota_conf) {
+ snprintf (buf, sizeof (buf), "%s=%u\n", "info", cksum);
+ ret = sys_write (fd, buf, strlen (buf));
+ if (ret <= 0) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = get_checksum_for_file (fd, &cksum);
+ if (ret)
+ goto out;
+
+ *cs = cksum;
+
+out:
+ if (fd > 0)
+ sys_close (fd);
+ if (unlink_sortfile)
+ sys_unlink (sort_filepath);
+ gf_msg_debug (this->name, 0, "Returning with %d", ret);
+
+ return ret;
+}
+
+int glusterd_compute_cksum (glusterd_volinfo_t *volinfo,
+ gf_boolean_t is_quota_conf)
+{
+ int ret = -1;
+ uint32_t cs = 0;
+ char cksum_path[PATH_MAX] = {0,};
+ char path[PATH_MAX] = {0,};
+ char filepath[PATH_MAX] = {0,};
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ GLUSTERD_GET_VOLUME_DIR (path, volinfo, conf);
+
+ if (is_quota_conf) {
+ snprintf (cksum_path, sizeof (cksum_path), "%s/%s", path,
+ GLUSTERD_VOL_QUOTA_CKSUM_FILE);
+ snprintf (filepath, sizeof (filepath), "%s/%s", path,
+ GLUSTERD_VOLUME_QUOTA_CONFIG);
+ } else {
+ snprintf (cksum_path, sizeof (cksum_path), "%s/%s", path,
+ GLUSTERD_CKSUM_FILE);
+ snprintf (filepath, sizeof (filepath), "%s/%s", path,
+ GLUSTERD_VOLUME_INFO_FILE);
+ }
+
+ ret = glusterd_volume_compute_cksum (volinfo, cksum_path, filepath,
+ is_quota_conf, &cs);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_CKSUM_COMPUTE_FAIL, "Failed to compute checksum "
+ "for volume %s", volinfo->volname);
+ goto out;
+ }
+
+ if (is_quota_conf)
+ volinfo->quota_conf_cksum = cs;
+ else
+ volinfo->cksum = cs;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+_add_dict_to_prdict (dict_t *this, char *key, data_t *value, void *data)
+{
+ glusterd_dict_ctx_t *ctx = NULL;
+ char optkey[512] = {0,};
+ int ret = -1;
+
+ ctx = data;
+ snprintf (optkey, sizeof (optkey), "%s.%s%d", ctx->prefix,
+ ctx->key_name, ctx->opt_count);
+ ret = dict_set_str (ctx->dict, optkey, key);
+ if (ret)
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "option add for %s%d %s",
+ ctx->key_name, ctx->opt_count, key);
+ snprintf (optkey, sizeof (optkey), "%s.%s%d", ctx->prefix,
+ ctx->val_name, ctx->opt_count);
+ ret = dict_set_str (ctx->dict, optkey, value->data);
+ if (ret)
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "option add for %s%d %s",
+ ctx->val_name, ctx->opt_count, value->data);
+ ctx->opt_count++;
+
+ return ret;
+}
+
+int32_t
+glusterd_add_bricks_hname_path_to_dict (dict_t *dict,
+ glusterd_volinfo_t *volinfo)
+{
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int ret = 0;
+ char key[256] = {0};
+ int index = 0;
+
+
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ snprintf (key, sizeof (key), "%d-hostname", index);
+ ret = dict_set_str (dict, key, brickinfo->hostname);
+ if (ret)
+ goto out;
+
+ snprintf (key, sizeof (key), "%d-path", index);
+ ret = dict_set_str (dict, key, brickinfo->path);
+ if (ret)
+ goto out;
+
+ index++;
+ }
+out:
+ return ret;
+}
+
+/* The prefix represents the type of volume to be added.
+ * It will be "volume" for normal volumes, and snap# like
+ * snap1, snap2, for snapshot volumes
+ */
+int32_t
+glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo,
+ dict_t *dict, int32_t count,
+ char *prefix)
+{
+ int32_t ret = -1;
+ char pfx[512] = {0,};
+ char key[512] = {0,};
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int32_t i = 1;
+ char *volume_id_str = NULL;
+ char *src_brick = NULL;
+ char *dst_brick = NULL;
+ char *str = NULL;
+ glusterd_dict_ctx_t ctx = {0};
+ char *rebalance_id_str = NULL;
+ char *rb_id_str = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (volinfo);
+ GF_ASSERT (prefix);
+
+ snprintf (key, sizeof (key), "%s%d.name", prefix, count);
+ ret = dict_set_str (dict, key, volinfo->volname);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.type", prefix, count);
+ ret = dict_set_int32 (dict, key, volinfo->type);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.brick_count", prefix, count);
+ ret = dict_set_int32 (dict, key, volinfo->brick_count);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.version", prefix, count);
+ ret = dict_set_int32 (dict, key, volinfo->version);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.status", prefix, count);
+ ret = dict_set_int32 (dict, key, volinfo->status);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.sub_count", prefix, count);
+ ret = dict_set_int32 (dict, key, volinfo->sub_count);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.stripe_count", prefix, count);
+ ret = dict_set_int32 (dict, key, volinfo->stripe_count);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.replica_count", prefix, count);
+ ret = dict_set_int32 (dict, key, volinfo->replica_count);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.arbiter_count", prefix, count);
+ ret = dict_set_int32 (dict, key, volinfo->arbiter_count);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.disperse_count", prefix, count);
+ ret = dict_set_int32 (dict, key, volinfo->disperse_count);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.redundancy_count", prefix, count);
+ ret = dict_set_int32 (dict, key, volinfo->redundancy_count);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.dist_count", prefix, count);
+ ret = dict_set_int32 (dict, key, volinfo->dist_leaf_count);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.ckusm", prefix, count);
+ ret = dict_set_int64 (dict, key, volinfo->cksum);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.transport_type", prefix, count);
+ ret = dict_set_uint32 (dict, key, volinfo->transport_type);
+ if (ret)
+ goto out;
+
+ /* tiering related variables */
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.cold_brick_count", prefix, count);
+ ret = dict_set_uint32 (dict, key, volinfo->tier_info.cold_brick_count);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.cold_type", prefix, count);
+ ret = dict_set_uint32 (dict, key, volinfo->tier_info.cold_type);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.cold_replica_count", prefix, count);
+ ret = dict_set_uint32 (dict, key,
+ volinfo->tier_info.cold_replica_count);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.cold_disperse_count", prefix, count);
+ ret = dict_set_uint32 (dict, key,
+ volinfo->tier_info.cold_disperse_count);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.cold_redundancy_count",
+ prefix, count);
+ ret = dict_set_uint32 (dict, key,
+ volinfo->tier_info.cold_redundancy_count);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.cold_dist_count", prefix, count);
+ ret = dict_set_uint32 (dict, key,
+ volinfo->tier_info.cold_dist_leaf_count);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.hot_brick_count", prefix, count);
+ ret = dict_set_uint32 (dict, key, volinfo->tier_info.hot_brick_count);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.hot_type", prefix, count);
+ ret = dict_set_uint32 (dict, key, volinfo->tier_info.hot_type);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.hot_replica_count", prefix, count);
+ ret = dict_set_uint32 (dict, key,
+ volinfo->tier_info.hot_replica_count);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d", prefix, count);
+ ret = gd_add_vol_snap_details_to_dict (dict, key, volinfo);
+ if (ret)
+ goto out;
+
+ volume_id_str = gf_strdup (uuid_utoa (volinfo->volume_id));
+ if (!volume_id_str) {
+ ret = -1;
+ goto out;
+ }
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.volume_id", prefix, count);
+ ret = dict_set_dynstr (dict, key, volume_id_str);
+ if (ret)
+ goto out;
+ volume_id_str = NULL;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.username", prefix, count);
+ str = glusterd_auth_get_username (volinfo);
+ if (str) {
+ ret = dict_set_dynstr (dict, key, gf_strdup (str));
+ if (ret)
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.password", prefix, count);
+ str = glusterd_auth_get_password (volinfo);
+ if (str) {
+ ret = dict_set_dynstr (dict, key, gf_strdup (str));
+ if (ret)
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, 256, "%s%d.rebalance", prefix, count);
+ ret = dict_set_int32 (dict, key, volinfo->rebal.defrag_cmd);
+ if (ret)
+ goto out;
+
+ rebalance_id_str = gf_strdup (uuid_utoa
+ (volinfo->rebal.rebalance_id));
+ if (!rebalance_id_str) {
+ ret = -1;
+ goto out;
+ }
+ memset (key, 0, sizeof (key));
+ snprintf (key, 256, "%s%d.rebalance-id", prefix, count);
+ ret = dict_set_dynstr (dict, key, rebalance_id_str);
+ if (ret)
+ goto out;
+ rebalance_id_str = NULL;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.rebalance-op", prefix, count);
+ ret = dict_set_uint32 (dict, key, volinfo->rebal.op);
+ if (ret)
+ goto out;
+
+ if (volinfo->rebal.dict) {
+ snprintf (pfx, sizeof (pfx), "%s%d", prefix, count);
+ ctx.dict = dict;
+ ctx.prefix = pfx;
+ ctx.opt_count = 1;
+ ctx.key_name = "rebal-dict-key";
+ ctx.val_name = "rebal-dict-value";
+
+ dict_foreach (volinfo->rebal.dict, _add_dict_to_prdict, &ctx);
+ ctx.opt_count--;
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.rebal-dict-count", count);
+ ret = dict_set_int32 (dict, key, ctx.opt_count);
+ if (ret)
+ goto out;
+ }
+
+ snprintf (pfx, sizeof (pfx), "%s%d", prefix, count);
+ ctx.dict = dict;
+ ctx.prefix = pfx;
+ ctx.opt_count = 1;
+ ctx.key_name = "key";
+ ctx.val_name = "value";
+ GF_ASSERT (volinfo->dict);
+
+ dict_foreach (volinfo->dict, _add_dict_to_prdict, &ctx);
+ ctx.opt_count--;
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.opt-count", prefix, count);
+ ret = dict_set_int32 (dict, key, ctx.opt_count);
+ if (ret)
+ goto out;
+
+ ctx.dict = dict;
+ ctx.prefix = pfx;
+ ctx.opt_count = 1;
+ ctx.key_name = "slave-num";
+ ctx.val_name = "slave-val";
+ GF_ASSERT (volinfo->gsync_slaves);
+
+ dict_foreach (volinfo->gsync_slaves, _add_dict_to_prdict, &ctx);
+ ctx.opt_count--;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.gsync-count", prefix, count);
+ ret = dict_set_int32 (dict, key, ctx.opt_count);
+ if (ret)
+ goto out;
+
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.brick%d.hostname",
+ prefix, count, i);
+ ret = dict_set_str (dict, key, brickinfo->hostname);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.brick%d.path",
+ prefix, count, i);
+ ret = dict_set_str (dict, key, brickinfo->path);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.brick%d.decommissioned",
+ prefix, count, i);
+ ret = dict_set_int32 (dict, key, brickinfo->decommissioned);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.brick%d.brick_id",
+ prefix, count, i);
+ ret = dict_set_str (dict, key, brickinfo->brick_id);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.brick%d.uuid",
+ prefix, count, i);
+ ret = dict_set_dynstr_with_alloc (dict, key,
+ uuid_utoa(brickinfo->uuid));
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.brick%d", prefix, count, i);
+ ret = gd_add_brick_snap_details_to_dict (dict, key, brickinfo);
+ if (ret)
+ goto out;
+
+ i++;
+ }
+
+ /* Add volume op-versions to dict. This prevents volume inconsistencies
+ * in the cluster
+ */
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.op-version", prefix, count);
+ ret = dict_set_int32 (dict, key, volinfo->op_version);
+ if (ret)
+ goto out;
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.client-op-version", prefix, count);
+ ret = dict_set_int32 (dict, key, volinfo->client_op_version);
+ if (ret)
+ goto out;
+
+ /*Add volume Capability (BD Xlator) to dict*/
+ memset (key, 0 ,sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.caps", prefix, count);
+ ret = dict_set_int32 (dict, key, volinfo->caps);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.quota-xattr-version", prefix, count);
+ ret = dict_set_int32 (dict, key, volinfo->quota_xattr_version);
+out:
+ GF_FREE (volume_id_str);
+ GF_FREE (rebalance_id_str);
+ GF_FREE (rb_id_str);
+
+ gf_msg_debug (this->name, 0, "Returning with %d", ret);
+ return ret;
+}
+
+/* The prefix represents the type of volume to be added.
+ * It will be "volume" for normal volumes, and snap# like
+ * snap1, snap2, for snapshot volumes
+ */
+int
+glusterd_vol_add_quota_conf_to_dict (glusterd_volinfo_t *volinfo, dict_t* load,
+ int vol_idx, char *prefix)
+{
+ int fd = -1;
+ unsigned char buf[16] = {0};
+ char key[PATH_MAX] = {0};
+ int gfid_idx = 0;
+ int ret = -1;
+ xlator_t *this = NULL;
+ char type = 0;
+ float version = 0.0f;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (prefix);
+
+ ret = glusterd_store_create_quota_conf_sh_on_absence (volinfo);
+ if (ret)
+ goto out;
+
+ fd = open (volinfo->quota_conf_shandle->path, O_RDONLY);
+ if (fd == -1) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = quota_conf_read_version (fd, &version);
+ if (ret)
+ goto out;
+
+ for (gfid_idx=0; ; gfid_idx++) {
+ ret = quota_conf_read_gfid (fd, buf, &type, version);
+ if (ret == 0) {
+ break;
+ } else if (ret < 0) {
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_QUOTA_CONF_CORRUPT, "Quota "
+ "configuration store may be corrupt.");
+ goto out;
+ }
+
+ snprintf (key, sizeof(key)-1, "%s%d.gfid%d", prefix,
+ vol_idx, gfid_idx);
+ ret = dict_set_dynstr_with_alloc (load, key, uuid_utoa (buf));
+ if (ret)
+ goto out;
+
+ snprintf (key, sizeof(key)-1, "%s%d.gfid-type%d", prefix,
+ vol_idx, gfid_idx);
+ ret = dict_set_int8 (load, key, type);
+ if (ret)
+ goto out;
+ }
+
+ snprintf (key, sizeof(key)-1, "%s%d.gfid-count", prefix, vol_idx);
+ key[sizeof(key)-1] = '\0';
+ ret = dict_set_int32 (load, key, gfid_idx);
+ if (ret)
+ goto out;
+
+ snprintf (key, sizeof(key)-1, "%s%d.quota-cksum", prefix, vol_idx);
+ key[sizeof(key)-1] = '\0';
+ ret = dict_set_uint32 (load, key, volinfo->quota_conf_cksum);
+ if (ret)
+ goto out;
+
+ snprintf (key, sizeof(key)-1, "%s%d.quota-version", prefix, vol_idx);
+ key[sizeof(key)-1] = '\0';
+ ret = dict_set_uint32 (load, key, volinfo->quota_conf_version);
+ if (ret)
+ goto out;
+
+ ret = 0;
+out:
+ if (fd != -1)
+ sys_close (fd);
+ return ret;
+}
+
+int32_t
+glusterd_add_volumes_to_export_dict (dict_t **peer_data)
+{
+ int32_t ret = -1;
+ dict_t *dict = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ int32_t count = 0;
+ glusterd_dict_ctx_t ctx = {0};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ cds_list_for_each_entry (volinfo, &priv->volumes, vol_list) {
+ count++;
+ ret = glusterd_add_volume_to_dict (volinfo, dict, count,
+ "volume");
+ if (ret)
+ goto out;
+ if (!glusterd_is_volume_quota_enabled (volinfo))
+ continue;
+ ret = glusterd_vol_add_quota_conf_to_dict (volinfo, dict,
+ count, "volume");
+ if (ret)
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, "count", count);
+ if (ret)
+ goto out;
+
+ ctx.dict = dict;
+ ctx.prefix = "global";
+ ctx.opt_count = 1;
+ ctx.key_name = "key";
+ ctx.val_name = "val";
+ dict_foreach (priv->opts, _add_dict_to_prdict, &ctx);
+ ctx.opt_count--;
+ ret = dict_set_int32 (dict, "global-opt-count", ctx.opt_count);
+ if (ret)
+ goto out;
+
+ *peer_data = dict;
+out:
+ if (ret)
+ dict_unref (dict);
+
+ gf_msg_trace (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_compare_friend_volume (dict_t *peer_data, int32_t count,
+ int32_t *status, char *hostname)
+{
+
+ int32_t ret = -1;
+ char key[512] = {0,};
+ glusterd_volinfo_t *volinfo = NULL;
+ char *volname = NULL;
+ uint32_t cksum = 0;
+ uint32_t quota_cksum = 0;
+ uint32_t quota_version = 0;
+ int32_t version = 0;
+ xlator_t *this = NULL;
+
+ GF_ASSERT (peer_data);
+ GF_ASSERT (status);
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ snprintf (key, sizeof (key), "volume%d.name", count);
+ ret = dict_get_str (peer_data, key, &volname);
+ if (ret)
+ goto out;
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+
+ if (ret) {
+ *status = GLUSTERD_VOL_COMP_UPDATE_REQ;
+ ret = 0;
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.version", count);
+ ret = dict_get_int32 (peer_data, key, &version);
+ if (ret)
+ goto out;
+
+ if (version > volinfo->version) {
+ //Mismatch detected
+ ret = 0;
+ gf_msg (this->name, GF_LOG_INFO, 0, GD_MSG_VOL_VERS_MISMATCH,
+ "Version of volume %s differ. local version = %d, "
+ "remote version = %d on peer %s", volinfo->volname,
+ volinfo->version, version, hostname);
+ *status = GLUSTERD_VOL_COMP_UPDATE_REQ;
+ goto out;
+ } else if (version < volinfo->version) {
+ *status = GLUSTERD_VOL_COMP_SCS;
+ goto out;
+ }
+
+ //Now, versions are same, compare cksums.
+ //
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.ckusm", count);
+ ret = dict_get_uint32 (peer_data, key, &cksum);
+ if (ret)
+ goto out;
+
+ if (cksum != volinfo->cksum) {
+ ret = 0;
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_CKSUM_VERS_MISMATCH,
+ "Version of Cksums %s differ. local cksum = %u, remote "
+ "cksum = %u on peer %s", volinfo->volname,
+ volinfo->cksum, cksum, hostname);
+ *status = GLUSTERD_VOL_COMP_RJT;
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.quota-version", count);
+ ret = dict_get_uint32 (peer_data, key, &quota_version);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "quota-version key absent for"
+ " volume %s in peer %s's response", volinfo->volname,
+ hostname);
+ ret = 0;
+ } else {
+ if (quota_version > volinfo->quota_conf_version) {
+ //Mismatch detected
+ ret = 0;
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_QUOTA_CONFIG_VERS_MISMATCH,
+ "Quota configuration versions of volume %s "
+ "differ. local version = %d, remote version = "
+ "%d on peer %s", volinfo->volname,
+ volinfo->quota_conf_version,
+ quota_version, hostname);
+ *status = GLUSTERD_VOL_COMP_UPDATE_REQ;
+ goto out;
+ } else if (quota_version < volinfo->quota_conf_version) {
+ *status = GLUSTERD_VOL_COMP_SCS;
+ goto out;
+ }
+ }
+
+ //Now, versions are same, compare cksums.
+ //
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.quota-cksum", count);
+ ret = dict_get_uint32 (peer_data, key, &quota_cksum);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "quota checksum absent for "
+ "volume %s in peer %s's response", volinfo->volname,
+ hostname);
+ ret = 0;
+ } else {
+ if (quota_cksum != volinfo->quota_conf_cksum) {
+ ret = 0;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_QUOTA_CONFIG_CKSUM_MISMATCH, "Cksums of "
+ "quota configuration of volume %s differ. local"
+ " cksum = %u, remote cksum = %u on peer %s",
+ volinfo->volname, volinfo->quota_conf_cksum,
+ quota_cksum, hostname);
+ *status = GLUSTERD_VOL_COMP_RJT;
+ goto out;
+ }
+ }
+ *status = GLUSTERD_VOL_COMP_SCS;
+
+out:
+ gf_msg_debug (this->name, 0, "Returning with ret: %d, status: %d",
+ ret, *status);
+ return ret;
+}
+
+static int32_t
+import_prdict_dict (dict_t *peer_data, dict_t *dst_dict, char *key_prefix,
+ char *value_prefix, int opt_count, char *prefix)
+{
+ char key[512] = {0,};
+ int32_t ret = 0;
+ int i = 1;
+ char *opt_key = NULL;
+ char *opt_val = NULL;
+ char *dup_opt_val = NULL;
+ char msg[2048] = {0};
+
+ while (i <= opt_count) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.%s%d",
+ prefix, key_prefix, i);
+ ret = dict_get_str (peer_data, key, &opt_key);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Volume dict key not "
+ "specified");
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.%s%d",
+ prefix, value_prefix, i);
+ ret = dict_get_str (peer_data, key, &opt_val);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Volume dict value not "
+ "specified");
+ goto out;
+ }
+ dup_opt_val = gf_strdup (opt_val);
+ if (!dup_opt_val) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_dynstr (dst_dict, opt_key, dup_opt_val);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Volume set %s %s "
+ "unsuccessful", opt_key, dup_opt_val);
+ goto out;
+ }
+ i++;
+ }
+
+out:
+ if (msg[0])
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_IMPORT_PRDICT_DICT, "%s", msg);
+ gf_msg_debug ("glusterd", 0, "Returning with %d", ret);
+ return ret;
+
+}
+
+
+int
+glusterd_spawn_daemons (void *opaque)
+{
+ glusterd_conf_t *conf = THIS->private;
+ int ret = -1;
+
+ synclock_lock (&conf->big_lock);
+ glusterd_restart_bricks (conf);
+ glusterd_restart_gsyncds (conf);
+ glusterd_restart_rebalance (conf);
+ ret = glusterd_snapdsvc_restart ();
+
+ return ret;
+}
+
+
+int32_t
+glusterd_import_friend_volume_opts (dict_t *peer_data, int count,
+ glusterd_volinfo_t *volinfo,
+ char *prefix)
+{
+ char key[512] = {0,};
+ int32_t ret = -1;
+ int opt_count = 0;
+ char msg[2048] = {0};
+ char volume_prefix[1024] = {0};
+
+ GF_ASSERT (peer_data);
+ GF_ASSERT (volinfo);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.opt-count", prefix, count);
+ ret = dict_get_int32 (peer_data, key, &opt_count);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Volume option count not "
+ "specified for %s", volinfo->volname);
+ goto out;
+ }
+
+ snprintf (volume_prefix, sizeof (volume_prefix), "%s%d", prefix, count);
+ ret = import_prdict_dict (peer_data, volinfo->dict, "key", "value",
+ opt_count, volume_prefix);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Unable to import options dict "
+ "specified for %s", volinfo->volname);
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.gsync-count", prefix, count);
+ ret = dict_get_int32 (peer_data, key, &opt_count);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Gsync count not "
+ "specified for %s", volinfo->volname);
+ goto out;
+ }
+
+ ret = import_prdict_dict (peer_data, volinfo->gsync_slaves, "slave-num",
+ "slave-val", opt_count, volume_prefix);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Unable to import gsync sessions "
+ "specified for %s", volinfo->volname);
+ goto out;
+ }
+
+out:
+ if (msg[0])
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_VOL_OPTS_IMPORT_FAIL, "%s", msg);
+ gf_msg_debug ("glusterd", 0, "Returning with %d", ret);
+ return ret;
+}
+
+/* The prefix represents the type of volume to be added.
+ * It will be "volume" for normal volumes, and snap# like
+ * snap1, snap2, for snapshot volumes
+ */
+int32_t
+glusterd_import_new_brick (dict_t *peer_data, int32_t vol_count,
+ int32_t brick_count,
+ glusterd_brickinfo_t **brickinfo,
+ char *prefix)
+{
+ char key[512] = {0,};
+ int ret = -1;
+ char *hostname = NULL;
+ char *path = NULL;
+ char *brick_id = NULL;
+ int decommissioned = 0;
+ glusterd_brickinfo_t *new_brickinfo = NULL;
+ char msg[2048] = {0};
+ char *brick_uuid_str = NULL;
+
+ GF_ASSERT (peer_data);
+ GF_ASSERT (vol_count >= 0);
+ GF_ASSERT (brickinfo);
+ GF_ASSERT (prefix);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.brick%d.hostname",
+ prefix, vol_count, brick_count);
+ ret = dict_get_str (peer_data, key, &hostname);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "%s missing in payload", key);
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.brick%d.path",
+ prefix, vol_count, brick_count);
+ ret = dict_get_str (peer_data, key, &path);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "%s missing in payload", key);
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.brick%d.brick_id",
+ prefix, vol_count, brick_count);
+ ret = dict_get_str (peer_data, key, &brick_id);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.brick%d.decommissioned",
+ prefix, vol_count, brick_count);
+ ret = dict_get_int32 (peer_data, key, &decommissioned);
+ if (ret) {
+ /* For backward compatibility */
+ ret = 0;
+ }
+
+ ret = glusterd_brickinfo_new (&new_brickinfo);
+ if (ret)
+ goto out;
+
+ strcpy (new_brickinfo->path, path);
+ strcpy (new_brickinfo->hostname, hostname);
+ new_brickinfo->decommissioned = decommissioned;
+ if (brick_id)
+ strcpy (new_brickinfo->brick_id, brick_id);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.brick%d", prefix, vol_count,
+ brick_count);
+ ret = gd_import_new_brick_snap_details (peer_data, key, new_brickinfo);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.brick%d.uuid",
+ prefix, vol_count, brick_count);
+ ret = dict_get_str (peer_data, key, &brick_uuid_str);
+ if (ret)
+ goto out;
+ gf_uuid_parse (brick_uuid_str, new_brickinfo->uuid);
+
+ *brickinfo = new_brickinfo;
+out:
+ if (msg[0])
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_IMPORT_FAIL, "%s", msg);
+ gf_msg_debug ("glusterd", 0, "Returning with %d", ret);
+ return ret;
+}
+
+/* The prefix represents the type of volume to be added.
+ * It will be "volume" for normal volumes, and snap# like
+ * snap1, snap2, for snapshot volumes
+ */
+static int32_t
+glusterd_import_bricks (dict_t *peer_data, int32_t vol_count,
+ glusterd_volinfo_t *new_volinfo, char *prefix)
+{
+ int ret = -1;
+ int brick_count = 1;
+ int brickid = 0;
+ glusterd_brickinfo_t *new_brickinfo = NULL;
+
+ GF_ASSERT (peer_data);
+ GF_ASSERT (vol_count >= 0);
+ GF_ASSERT (new_volinfo);
+ GF_ASSERT (prefix);
+ while (brick_count <= new_volinfo->brick_count) {
+
+ ret = glusterd_import_new_brick (peer_data, vol_count,
+ brick_count,
+ &new_brickinfo, prefix);
+ if (ret)
+ goto out;
+ if (new_brickinfo->brick_id[0] == '\0')
+ /*We were probed from a peer having op-version
+ less than GD_OP_VER_PERSISTENT_AFR_XATTRS*/
+ GLUSTERD_ASSIGN_BRICKID_TO_BRICKINFO (new_brickinfo,
+ new_volinfo,
+ brickid++);
+ cds_list_add_tail (&new_brickinfo->brick_list,
+ &new_volinfo->bricks);
+ brick_count++;
+ }
+ ret = 0;
+out:
+ gf_msg_debug ("glusterd", 0, "Returning with %d", ret);
+ return ret;
+}
+
+/* The prefix represents the type of volume to be added.
+ * It will be "volume" for normal volumes, and snap# like
+ * snap1, snap2, for snapshot volumes
+ */
+int
+glusterd_import_quota_conf (dict_t *peer_data, int vol_idx,
+ glusterd_volinfo_t *new_volinfo,
+ char *prefix)
+{
+ int gfid_idx = 0;
+ int gfid_count = 0;
+ int ret = -1;
+ int fd = -1;
+ char key[PATH_MAX] = {0};
+ char *gfid_str = NULL;
+ uuid_t gfid = {0,};
+ xlator_t *this = NULL;
+ int8_t gfid_type = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (peer_data);
+ GF_ASSERT (prefix);
+
+ if (!glusterd_is_volume_quota_enabled (new_volinfo)) {
+ (void) glusterd_clean_up_quota_store (new_volinfo);
+ return 0;
+ }
+
+ ret = glusterd_store_create_quota_conf_sh_on_absence (new_volinfo);
+ if (ret)
+ goto out;
+
+ fd = gf_store_mkstemp (new_volinfo->quota_conf_shandle);
+ if (fd < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (key, sizeof (key)-1, "%s%d.quota-cksum", prefix, vol_idx);
+ key[sizeof(key)-1] = '\0';
+ ret = dict_get_uint32 (peer_data, key, &new_volinfo->quota_conf_cksum);
+ if (ret)
+ gf_msg_debug (this->name, 0, "Failed to get quota cksum");
+
+ snprintf (key, sizeof (key)-1, "%s%d.quota-version", prefix, vol_idx);
+ key[sizeof(key)-1] = '\0';
+ ret = dict_get_uint32 (peer_data, key,
+ &new_volinfo->quota_conf_version);
+ if (ret)
+ gf_msg_debug (this->name, 0, "Failed to get quota "
+ "version");
+
+ snprintf (key, sizeof (key)-1, "%s%d.gfid-count", prefix, vol_idx);
+ key[sizeof(key)-1] = '\0';
+ ret = dict_get_int32 (peer_data, key, &gfid_count);
+ if (ret)
+ goto out;
+
+ ret = glusterd_quota_conf_write_header (fd);
+ if (ret)
+ goto out;
+
+ gfid_idx = 0;
+ for (gfid_idx = 0; gfid_idx < gfid_count; gfid_idx++) {
+
+ snprintf (key, sizeof (key)-1, "%s%d.gfid%d",
+ prefix, vol_idx, gfid_idx);
+ ret = dict_get_str (peer_data, key, &gfid_str);
+ if (ret)
+ goto out;
+
+ snprintf (key, sizeof (key)-1, "%s%d.gfid-type%d",
+ prefix, vol_idx, gfid_idx);
+ ret = dict_get_int8 (peer_data, key, &gfid_type);
+ if (ret)
+ gfid_type = GF_QUOTA_CONF_TYPE_USAGE;
+
+ gf_uuid_parse (gfid_str, gfid);
+ ret = glusterd_quota_conf_write_gfid (fd, gfid,
+ (char)gfid_type);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_CRITICAL, errno,
+ GD_MSG_QUOTA_CONF_WRITE_FAIL, "Unable to write "
+ "gfid %s into quota.conf for %s", gfid_str,
+ new_volinfo->volname);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = gf_store_rename_tmppath (new_volinfo->quota_conf_shandle);
+
+ ret = 0;
+
+out:
+ if (!ret) {
+ ret = glusterd_compute_cksum (new_volinfo, _gf_true);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_CKSUM_COMPUTE_FAIL,
+ "Failed to compute checksum");
+ goto clear_quota_conf;
+ }
+
+ ret = glusterd_store_save_quota_version_and_cksum (new_volinfo);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_QUOTA_CKSUM_VER_STORE_FAIL,
+ "Failed to save quota version and checksum");
+ }
+
+clear_quota_conf:
+ if (ret && (fd > 0)) {
+ gf_store_unlink_tmppath (new_volinfo->quota_conf_shandle);
+ (void) gf_store_handle_destroy
+ (new_volinfo->quota_conf_shandle);
+ new_volinfo->quota_conf_shandle = NULL;
+ }
+
+ return ret;
+}
+
+int
+gd_import_friend_volume_rebal_dict (dict_t *dict, int count,
+ glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ char key[256] = {0,};
+ int dict_count = 0;
+ char prefix[64] = {0};
+
+ GF_ASSERT (dict);
+ GF_ASSERT (volinfo);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.rebal-dict-count", count);
+ ret = dict_get_int32 (dict, key, &dict_count);
+ if (ret) {
+ /* Older peers will not have this dict */
+ ret = 0;
+ goto out;
+ }
+
+ volinfo->rebal.dict = dict_new ();
+ if(!volinfo->rebal.dict) {
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (prefix, sizeof (prefix), "volume%d", count);
+ ret = import_prdict_dict (dict, volinfo->rebal.dict, "rebal-dict-key",
+ "rebal-dict-value", dict_count, prefix);
+out:
+ if (ret && volinfo->rebal.dict)
+ dict_unref (volinfo->rebal.dict);
+ gf_msg_debug (THIS->name, 0, "Returning with %d", ret);
+ return ret;
+}
+
+/* The prefix represents the type of volume to be added.
+ * It will be "volume" for normal volumes, and snap# like
+ * snap1, snap2, for snapshot volumes
+ */
+int32_t
+glusterd_import_volinfo (dict_t *peer_data, int count,
+ glusterd_volinfo_t **volinfo,
+ char *prefix)
+{
+ int ret = -1;
+ char key[256] = {0};
+ char *parent_volname = NULL;
+ char *volname = NULL;
+ glusterd_volinfo_t *new_volinfo = NULL;
+ glusterd_volinfo_t *old_volinfo = NULL;
+ char *volume_id_str = NULL;
+ char *restored_snap = NULL;
+ char msg[2048] = {0};
+ char *src_brick = NULL;
+ char *dst_brick = NULL;
+ char *str = NULL;
+ int rb_status = 0;
+ char *rebalance_id_str = NULL;
+ char *rb_id_str = NULL;
+ int op_version = 0;
+ int client_op_version = 0;
+
+ GF_ASSERT (peer_data);
+ GF_ASSERT (volinfo);
+ GF_ASSERT (prefix);
+
+ snprintf (key, sizeof (key), "%s%d.name", prefix, count);
+ ret = dict_get_str (peer_data, key, &volname);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "%s missing in payload", key);
+ goto out;
+ }
+
+ ret = glusterd_volinfo_new (&new_volinfo);
+ if (ret)
+ goto out;
+ strncpy (new_volinfo->volname, volname, strlen (volname));
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.type", prefix, count);
+ ret = dict_get_int32 (peer_data, key, &new_volinfo->type);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "%s missing in payload for %s",
+ key, volname);
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "%s%d.parent_volname", prefix, count);
+ ret = dict_get_str (peer_data, key, &parent_volname);
+ if (!ret)
+ strncpy (new_volinfo->parent_volname, parent_volname,
+ sizeof(new_volinfo->parent_volname));
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.brick_count", prefix, count);
+ ret = dict_get_int32 (peer_data, key, &new_volinfo->brick_count);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "%s missing in payload for %s",
+ key, volname);
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.version", prefix, count);
+ ret = dict_get_int32 (peer_data, key, &new_volinfo->version);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "%s missing in payload for %s",
+ key, volname);
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.status", prefix, count);
+ ret = dict_get_int32 (peer_data, key, (int32_t *)&new_volinfo->status);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "%s missing in payload for %s",
+ key, volname);
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.sub_count", prefix, count);
+ ret = dict_get_int32 (peer_data, key, &new_volinfo->sub_count);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "%s missing in payload for %s",
+ key, volname);
+ goto out;
+ }
+
+ /* not having a 'stripe_count' key is not a error
+ (as peer may be of old version) */
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.stripe_count", prefix, count);
+ ret = dict_get_int32 (peer_data, key, &new_volinfo->stripe_count);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_INFO, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "peer is possibly old version");
+
+ /* not having a 'replica_count' key is not a error
+ (as peer may be of old version) */
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.replica_count", prefix, count);
+ ret = dict_get_int32 (peer_data, key, &new_volinfo->replica_count);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_INFO, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "peer is possibly old version");
+
+ /* not having a 'arbiter_count' key is not a error
+ (as peer may be of old version) */
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.arbiter_count", prefix, count);
+ ret = dict_get_int32 (peer_data, key, &new_volinfo->arbiter_count);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_INFO, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "peer is possibly old version");
+
+ /* not having a 'disperse_count' key is not a error
+ (as peer may be of old version) */
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.disperse_count", prefix, count);
+ ret = dict_get_int32 (peer_data, key, &new_volinfo->disperse_count);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_INFO, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "peer is possibly old version");
+
+ /* not having a 'redundancy_count' key is not a error
+ (as peer may be of old version) */
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.redundancy_count", prefix, count);
+ ret = dict_get_int32 (peer_data, key, &new_volinfo->redundancy_count);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_INFO, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "peer is possibly old version");
+
+ /* not having a 'dist_count' key is not a error
+ (as peer may be of old version) */
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.dist_count", prefix, count);
+ ret = dict_get_int32 (peer_data, key, &new_volinfo->dist_leaf_count);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_INFO, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "peer is possibly old version");
+
+ /* not having a 'hot_brick_count' key is not a error
+ (as peer may be of old version) */
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.hot_brick_count", prefix, count);
+ ret = dict_get_int32 (peer_data, key,
+ &new_volinfo->tier_info.hot_brick_count);
+ if (ret)
+ gf_msg_debug (THIS->name, 0,
+ "peer is possibly old version");
+
+ /* not having a 'hot_type' key is not a error
+ (as peer may be of old version) */
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.hot_type", prefix, count);
+ ret = dict_get_int32 (peer_data, key,
+ &new_volinfo->tier_info.hot_type);
+ if (ret)
+ gf_msg_debug (THIS->name, 0,
+ "peer is possibly old version");
+
+ /* not having a 'hot_replica_count' key is not a error
+ (as peer may be of old version) */
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.hot_replica_count", prefix, count);
+ ret = dict_get_int32 (peer_data, key,
+ &new_volinfo->tier_info.hot_replica_count);
+ if (ret)
+ gf_msg_debug (THIS->name, 0,
+ "peer is possibly old version");
+
+ /* not having a 'cold_brick_count' key is not a error
+ (as peer may be of old version) */
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.cold_brick_count", prefix, count);
+ ret = dict_get_int32 (peer_data, key,
+ &new_volinfo->tier_info.cold_brick_count);
+ if (ret)
+ gf_msg_debug (THIS->name, 0,
+ "peer is possibly old version");
+
+ /* not having a 'cold_type' key is not a error
+ (as peer may be of old version) */
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.cold_type", prefix, count);
+ ret = dict_get_int32 (peer_data, key,
+ &new_volinfo->tier_info.cold_type);
+ if (ret)
+ gf_msg_debug (THIS->name, 0,
+ "peer is possibly old version");
+
+ /* not having a 'cold_replica_count' key is not a error
+ (as peer may be of old version) */
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.cold_replica_count", prefix, count);
+ ret = dict_get_int32 (peer_data, key,
+ &new_volinfo->tier_info.cold_replica_count);
+ if (ret)
+ gf_msg_debug (THIS->name, 0,
+ "peer is possibly old version");
+
+ /* not having a 'cold_disperse_count' key is not a error
+ (as peer may be of old version) */
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.cold_disperse_count", prefix, count);
+ ret = dict_get_int32 (peer_data, key,
+ &new_volinfo->tier_info.cold_disperse_count);
+ if (ret)
+ gf_msg_debug (THIS->name, 0,
+ "peer is possibly old version");
+
+ /* not having a 'cold_redundancy_count' key is not a error
+ (as peer may be of old version) */
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.cold_redundancy_count",
+ prefix, count);
+ ret = dict_get_int32 (peer_data, key,
+ &new_volinfo->tier_info.cold_redundancy_count);
+ if (ret)
+ gf_msg_debug (THIS->name, 0,
+ "peer is possibly old version");
+
+ /* not having a 'cold_dist_count' key is not a error
+ (as peer may be of old version) */
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.cold_dist_count", prefix, count);
+ ret = dict_get_int32 (peer_data, key,
+ &new_volinfo->tier_info.cold_dist_leaf_count);
+ if (ret)
+ gf_msg_debug (THIS->name, 0,
+ "peer is possibly old version");
+
+ new_volinfo->subvol_count = new_volinfo->brick_count/
+ glusterd_get_dist_leaf_count (new_volinfo);
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.ckusm", prefix, count);
+ ret = dict_get_uint32 (peer_data, key, &new_volinfo->cksum);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "%s missing in payload for %s",
+ key, volname);
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.volume_id", prefix, count);
+ ret = dict_get_str (peer_data, key, &volume_id_str);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "%s missing in payload for %s",
+ key, volname);
+ goto out;
+ }
+
+ gf_uuid_parse (volume_id_str, new_volinfo->volume_id);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.username", prefix, count);
+ ret = dict_get_str (peer_data, key, &str);
+ if (!ret) {
+ ret = glusterd_auth_set_username (new_volinfo, str);
+ if (ret)
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.password", prefix, count);
+ ret = dict_get_str (peer_data, key, &str);
+ if (!ret) {
+ ret = glusterd_auth_set_password (new_volinfo, str);
+ if (ret)
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.transport_type", prefix, count);
+ ret = dict_get_uint32 (peer_data, key, &new_volinfo->transport_type);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "%s missing in payload for %s",
+ key, volname);
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.rebalance", prefix, count);
+ ret = dict_get_uint32 (peer_data, key, &new_volinfo->rebal.defrag_cmd);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "%s missing in payload for %s",
+ key, volname);
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.rebalance-id", prefix, count);
+ ret = dict_get_str (peer_data, key, &rebalance_id_str);
+ if (ret) {
+ /* This is not present in older glusterfs versions,
+ * so don't error out
+ */
+ ret = 0;
+ } else {
+ gf_uuid_parse (rebalance_id_str, new_volinfo->rebal.rebalance_id);
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.rebalance-op", prefix, count);
+ ret = dict_get_uint32 (peer_data, key,
+ (uint32_t *) &new_volinfo->rebal.op);
+ if (ret) {
+ /* This is not present in older glusterfs versions,
+ * so don't error out
+ */
+ ret = 0;
+ }
+ ret = gd_import_friend_volume_rebal_dict (peer_data, count,
+ new_volinfo);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Failed to import rebalance dict "
+ "for volume.");
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d", prefix, count);
+ ret = gd_import_volume_snap_details (peer_data, new_volinfo, key,
+ volname);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_DETAILS_IMPORT_FAIL,
+ "Failed to import snapshot "
+ "details for volume %s", volname);
+ goto out;
+ }
+
+ ret = glusterd_import_friend_volume_opts (peer_data, count,
+ new_volinfo, prefix);
+ if (ret)
+ goto out;
+
+ /* Import the volume's op-versions if available else set it to 1.
+ * Not having op-versions implies this informtation was obtained from a
+ * op-version 1 friend (gluster-3.3), ergo the cluster is at op-version
+ * 1 and all volumes are at op-versions 1.
+ *
+ * Either both the volume op-versions should be absent or both should be
+ * present. Only one being present is a failure
+ */
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.op-version", prefix, count);
+ ret = dict_get_int32 (peer_data, key, &op_version);
+ if (ret)
+ ret = 0;
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.client-op-version", prefix, count);
+ ret = dict_get_int32 (peer_data, key, &client_op_version);
+ if (ret)
+ ret = 0;
+
+ if (op_version && client_op_version) {
+ new_volinfo->op_version = op_version;
+ new_volinfo->client_op_version = client_op_version;
+ } else if (((op_version == 0) && (client_op_version != 0)) ||
+ ((op_version != 0) && (client_op_version == 0))) {
+ ret = -1;
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Only one volume op-version found");
+ goto out;
+ } else {
+ new_volinfo->op_version = 1;
+ new_volinfo->client_op_version = 1;
+ }
+
+ memset (key, 0 ,sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.caps", prefix, count);
+ /*This is not present in older glusterfs versions, so ignore ret value*/
+ ret = dict_get_int32 (peer_data, key, &new_volinfo->caps);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.quota-xattr-version", prefix, count);
+ /*This is not present in older glusterfs versions, so ignore ret value*/
+ ret = dict_get_int32 (peer_data, key,
+ &new_volinfo->quota_xattr_version);
+
+ ret = glusterd_import_bricks (peer_data, count, new_volinfo, prefix);
+ if (ret)
+ goto out;
+
+ *volinfo = new_volinfo;
+out:
+ if (msg[0])
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_VOLINFO_IMPORT_FAIL, "%s", msg);
+ gf_msg_debug ("glusterd", 0, "Returning with %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_volume_disconnect_all_bricks (glusterd_volinfo_t *volinfo)
+{
+ int ret = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ GF_ASSERT (volinfo);
+
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ if (glusterd_is_brick_started (brickinfo)) {
+ ret = glusterd_brick_disconnect (brickinfo);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSD_BRICK_DISCONNECT_FAIL,
+ "Failed to "
+ "disconnect %s:%s", brickinfo->hostname,
+ brickinfo->path);
+ break;
+ }
+ }
+ }
+
+ return ret;
+}
+
+int32_t
+glusterd_volinfo_copy_brickinfo (glusterd_volinfo_t *old_volinfo,
+ glusterd_volinfo_t *new_volinfo)
+{
+ char pidfile[PATH_MAX+1] = {0,};
+ glusterd_brickinfo_t *new_brickinfo = NULL;
+ glusterd_brickinfo_t *old_brickinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ int ret = 0;
+ xlator_t *this = NULL;
+ char abspath[PATH_MAX] = {0};
+
+ GF_ASSERT (new_volinfo);
+ GF_ASSERT (old_volinfo);
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ cds_list_for_each_entry (new_brickinfo, &new_volinfo->bricks,
+ brick_list) {
+ ret = glusterd_volume_brickinfo_get (new_brickinfo->uuid,
+ new_brickinfo->hostname,
+ new_brickinfo->path,
+ old_volinfo,
+ &old_brickinfo);
+ if (ret == 0) {
+ new_brickinfo->port = old_brickinfo->port;
+
+ if (old_brickinfo->real_path == '\0') {
+ if (!realpath (new_brickinfo->path, abspath)) {
+ /* Here an ENOENT should also be a
+ * failure as the brick is expected to
+ * be in existance
+ */
+ gf_msg (this->name, GF_LOG_CRITICAL,
+ errno,
+ GD_MSG_BRICKINFO_CREATE_FAIL,
+ "realpath () failed for brick "
+ "%s. The underlying filesystem "
+ "may be in bad state",
+ new_brickinfo->path);
+ ret = -1;
+ goto out;
+ }
+ strncpy (new_brickinfo->real_path, abspath,
+ strlen(abspath));
+ } else {
+ strncpy (new_brickinfo->real_path,
+ old_brickinfo->real_path,
+ strlen (old_brickinfo->real_path));
+ }
+ }
+ }
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int32_t
+glusterd_volinfo_stop_stale_bricks (glusterd_volinfo_t *new_volinfo,
+ glusterd_volinfo_t *old_volinfo)
+{
+ glusterd_brickinfo_t *new_brickinfo = NULL;
+ glusterd_brickinfo_t *old_brickinfo = NULL;
+
+ int ret = 0;
+ GF_ASSERT (new_volinfo);
+ GF_ASSERT (old_volinfo);
+ if (_gf_false == glusterd_is_volume_started (old_volinfo))
+ goto out;
+ cds_list_for_each_entry (old_brickinfo, &old_volinfo->bricks,
+ brick_list) {
+ ret = glusterd_volume_brickinfo_get (old_brickinfo->uuid,
+ old_brickinfo->hostname,
+ old_brickinfo->path,
+ new_volinfo,
+ &new_brickinfo);
+ /* If the brick is stale, i.e it's not a part of the new volume
+ * or if it's part of the new volume and is pending a snap,
+ * then stop the brick process
+ */
+ if (ret || (new_brickinfo->snap_status == -1)) {
+ /*TODO: may need to switch to 'atomic' flavour of
+ * brick_stop, once we make peer rpc program also
+ * synctask enabled*/
+ ret = glusterd_brick_stop (old_volinfo, old_brickinfo,
+ _gf_false);
+ if (ret)
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_STOP_FAIL, "Failed to stop"
+ " brick %s:%s", old_brickinfo->hostname,
+ old_brickinfo->path);
+ }
+ }
+ ret = 0;
+out:
+ gf_msg_debug ("glusterd", 0, "Returning with %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_delete_stale_volume (glusterd_volinfo_t *stale_volinfo,
+ glusterd_volinfo_t *valid_volinfo)
+{
+ int32_t ret = -1;
+ glusterd_volinfo_t *temp_volinfo = NULL;
+ glusterd_volinfo_t *voliter = NULL;
+ xlator_t *this = NULL;
+ glusterd_svc_t *svc = NULL;
+
+ GF_ASSERT (stale_volinfo);
+ GF_ASSERT (valid_volinfo);
+ this = THIS;
+ GF_ASSERT (this);
+
+ /* Copy snap_volumes list from stale_volinfo to valid_volinfo */
+ valid_volinfo->snap_count = 0;
+ cds_list_for_each_entry_safe (voliter, temp_volinfo,
+ &stale_volinfo->snap_volumes,
+ snapvol_list) {
+ cds_list_add_tail (&voliter->snapvol_list,
+ &valid_volinfo->snap_volumes);
+ valid_volinfo->snap_count++;
+ }
+
+ if ((!gf_uuid_is_null (stale_volinfo->restored_from_snap)) &&
+ (gf_uuid_compare (stale_volinfo->restored_from_snap,
+ valid_volinfo->restored_from_snap))) {
+ ret = glusterd_lvm_snapshot_remove (NULL, stale_volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_REMOVE_FAIL,
+ "Failed to remove lvm snapshot for "
+ "restored volume %s", stale_volinfo->volname);
+ }
+ }
+
+ /* If stale volume is in started state, stop the stale bricks if the new
+ * volume is started else, stop all bricks.
+ * We dont want brick_rpc_notify to access already deleted brickinfo,
+ * so disconnect all bricks from stale_volinfo (unconditionally), since
+ * they are being deleted subsequently.
+ */
+ if (glusterd_is_volume_started (stale_volinfo)) {
+ if (glusterd_is_volume_started (valid_volinfo)) {
+ (void) glusterd_volinfo_stop_stale_bricks (valid_volinfo,
+ stale_volinfo);
+
+ } else {
+ (void) glusterd_stop_bricks (stale_volinfo);
+ }
+
+ (void) glusterd_volume_disconnect_all_bricks (stale_volinfo);
+ }
+ /* Delete all the bricks and stores and vol files. They will be created
+ * again by the valid_volinfo. Volume store delete should not be
+ * performed because some of the bricks could still be running,
+ * keeping pid files under run directory
+ */
+ (void) glusterd_delete_all_bricks (stale_volinfo);
+ if (stale_volinfo->shandle) {
+ sys_unlink (stale_volinfo->shandle->path);
+ (void) gf_store_handle_destroy (stale_volinfo->shandle);
+ stale_volinfo->shandle = NULL;
+ }
+
+ /* Marking volume as stopped, so that svc manager stops snapd
+ * and we are deleting the volume.
+ */
+ stale_volinfo->status = GLUSTERD_STATUS_STOPPED;
+
+ if (!stale_volinfo->is_snap_volume) {
+ svc = &(stale_volinfo->snapd.svc);
+ (void) svc->manager (svc, stale_volinfo, PROC_START_NO_WAIT);
+ }
+ (void) glusterd_volinfo_remove (stale_volinfo);
+
+ return 0;
+}
+
+/* This function updates the rebalance information of the new volinfo using the
+ * information from the old volinfo.
+ */
+int
+gd_check_and_update_rebalance_info (glusterd_volinfo_t *old_volinfo,
+ glusterd_volinfo_t *new_volinfo)
+{
+ int ret = -1;
+ glusterd_rebalance_t *old = NULL;
+ glusterd_rebalance_t *new = NULL;
+
+ GF_ASSERT (old_volinfo);
+ GF_ASSERT (new_volinfo);
+
+ old = &(old_volinfo->rebal);
+ new = &(new_volinfo->rebal);
+
+ //Disconnect from rebalance process
+ if (glusterd_defrag_rpc_get (old->defrag)) {
+ rpc_transport_disconnect (old->defrag->rpc->conn.trans);
+ glusterd_defrag_rpc_put (old->defrag);
+ }
+
+ if (!gf_uuid_is_null (old->rebalance_id) &&
+ gf_uuid_compare (old->rebalance_id, new->rebalance_id)) {
+ (void)gd_stop_rebalance_process (old_volinfo);
+ goto out;
+ }
+
+ /* If the tasks match, copy the status and other information of the
+ * rebalance process from old_volinfo to new_volinfo
+ */
+ new->defrag_status = old->defrag_status;
+ new->rebalance_files = old->rebalance_files;
+ new->rebalance_data = old->rebalance_data;
+ new->lookedup_files = old->lookedup_files;
+ new->skipped_files = old->skipped_files;
+ new->rebalance_failures = old->rebalance_failures;
+ new->rebalance_time = old->rebalance_time;
+
+ /* glusterd_rebalance_t.{op, id, defrag_cmd} are copied during volume
+ * import
+ * a new defrag object should come to life with rebalance being restarted
+ */
+out:
+ return ret;
+}
+
+int32_t
+glusterd_import_friend_volume (dict_t *peer_data, size_t count)
+{
+
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *old_volinfo = NULL;
+ glusterd_volinfo_t *new_volinfo = NULL;
+ glusterd_svc_t *svc = NULL;
+ gf_boolean_t newexportvalue = _gf_false;
+ gf_boolean_t oldexportvalue = _gf_false;
+ char *value = NULL;
+
+ GF_ASSERT (peer_data);
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+ ret = glusterd_import_volinfo (peer_data, count,
+ &new_volinfo, "volume");
+ if (ret)
+ goto out;
+
+ if (!new_volinfo) {
+ gf_msg_debug (this->name, 0,
+ "Not importing snap volume");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (new_volinfo->volname, &old_volinfo);
+ if (0 == ret) {
+ oldexportvalue = glusterd_check_ganesha_export (old_volinfo);
+
+ /* Ref count the old_volinfo such that deleting it doesn't crash
+ * if its been already in use by other thread
+ */
+ glusterd_volinfo_ref (old_volinfo);
+ (void) gd_check_and_update_rebalance_info (old_volinfo,
+ new_volinfo);
+
+ /* Copy brick ports & real_path from the old volinfo always.
+ * The old_volinfo will be cleaned up and this information
+ * could be lost
+ */
+ (void) glusterd_volinfo_copy_brickinfo (old_volinfo,
+ new_volinfo);
+
+ (void) glusterd_delete_stale_volume (old_volinfo, new_volinfo);
+ glusterd_volinfo_unref (old_volinfo);
+ }
+
+ if (glusterd_is_volume_started (new_volinfo)) {
+ (void) glusterd_start_bricks (new_volinfo);
+ if (glusterd_is_snapd_enabled (new_volinfo)) {
+ svc = &(new_volinfo->snapd.svc);
+ (void) svc->manager (svc, new_volinfo,
+ PROC_START_NO_WAIT);
+ }
+ }
+
+ ret = glusterd_volinfo_get (new_volinfo, "ganesha.enable", &value);
+ if (ret)
+ goto out;
+ ret = gf_string2boolean (value, &newexportvalue);
+ if (ret)
+ goto out;
+
+ /* *
+ * if new and old export value is off, then there is no point in calling
+ * ganesha_manage_export
+ */
+ if (!((newexportvalue == oldexportvalue) &&
+ newexportvalue == _gf_false)) {
+ ret = ganesha_manage_export (new_volinfo->volname, value,
+ NULL, _gf_true);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_NFS_GNS_OP_HANDLE_FAIL,
+ "Returning from ganesha_manage_export with"
+ " ret: %d for volume %s ganesha.enable %s",
+ ret, new_volinfo->volname,
+ value);
+ goto out;
+ }
+ }
+ ret = glusterd_store_volinfo (new_volinfo, GLUSTERD_VOLINFO_VER_AC_NONE);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLINFO_STORE_FAIL, "Failed to store "
+ "volinfo for volume %s", new_volinfo->volname);
+ goto out;
+ }
+
+ ret = glusterd_create_volfiles_and_notify_services (new_volinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_import_quota_conf (peer_data, count,
+ new_volinfo, "volume");
+ if (ret)
+ goto out;
+
+ glusterd_list_add_order (&new_volinfo->vol_list, &priv->volumes,
+ glusterd_compare_volume_name);
+
+out:
+ gf_msg_debug ("glusterd", 0, "Returning with ret: %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_import_friend_volumes (dict_t *peer_data)
+{
+ int32_t ret = -1;
+ int32_t count = 0;
+ int i = 1;
+
+ GF_ASSERT (peer_data);
+
+ ret = dict_get_int32 (peer_data, "count", &count);
+ if (ret)
+ goto out;
+
+ while (i <= count) {
+ ret = glusterd_import_friend_volume (peer_data, i);
+ if (ret)
+ goto out;
+ i++;
+ }
+
+out:
+ gf_msg_debug ("glusterd", 0, "Returning with %d", ret);
+ return ret;
+}
+
+int
+glusterd_get_global_server_quorum_ratio (dict_t *opts, double *quorum)
+{
+ int ret = -1;
+ char *quorum_str = NULL;
+
+ ret = dict_get_str (opts, GLUSTERD_QUORUM_RATIO_KEY, &quorum_str);
+ if (ret)
+ goto out;
+
+ ret = gf_string2percent (quorum_str, quorum);
+ if (ret)
+ goto out;
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glusterd_get_global_opt_version (dict_t *opts, uint32_t *version)
+{
+ int ret = -1;
+ char *version_str = NULL;
+
+ ret = dict_get_str (opts, GLUSTERD_GLOBAL_OPT_VERSION, &version_str);
+ if (ret)
+ goto out;
+
+ ret = gf_string2uint (version_str, version);
+ if (ret)
+ goto out;
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glusterd_get_next_global_opt_version_str (dict_t *opts, char **version_str)
+{
+ int ret = -1;
+ char version_string[64] = {0};
+ uint32_t version = 0;
+
+ ret = glusterd_get_global_opt_version (opts, &version);
+ if (ret)
+ goto out;
+ version++;
+ snprintf (version_string, sizeof (version_string), "%"PRIu32, version);
+ *version_str = gf_strdup (version_string);
+ if (*version_str)
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+glusterd_import_global_opts (dict_t *friend_data)
+{
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ int ret = -1;
+ dict_t *import_options = NULL;
+ int count = 0;
+ uint32_t local_version = 0;
+ uint32_t remote_version = 0;
+ double old_quorum = 0.0;
+ double new_quorum = 0.0;
+
+ this = THIS;
+ conf = this->private;
+
+ ret = dict_get_int32 (friend_data, "global-opt-count", &count);
+ if (ret) {
+ //old version peer
+ ret = 0;
+ goto out;
+ }
+
+ import_options = dict_new ();
+ if (!import_options)
+ goto out;
+ ret = import_prdict_dict (friend_data, import_options, "key", "val",
+ count, "global");
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GLOBAL_OPT_IMPORT_FAIL, "Failed to import"
+ " global options");
+ goto out;
+ }
+
+ /* Not handling ret since server-quorum-ratio might not yet be set */
+ ret = glusterd_get_global_server_quorum_ratio (conf->opts,
+ &old_quorum);
+ ret = glusterd_get_global_server_quorum_ratio (import_options,
+ &new_quorum);
+
+ ret = glusterd_get_global_opt_version (conf->opts, &local_version);
+ if (ret)
+ goto out;
+ ret = glusterd_get_global_opt_version (import_options, &remote_version);
+ if (ret)
+ goto out;
+
+ if (remote_version > local_version) {
+ ret = glusterd_store_options (this, import_options);
+ if (ret)
+ goto out;
+ dict_unref (conf->opts);
+ conf->opts = dict_ref (import_options);
+
+ /* If server quorum ratio has changed, restart bricks to
+ * recompute if quorum is met. If quorum is not met bricks are
+ * not started and those already running are stopped
+ */
+ if (old_quorum != new_quorum) {
+ ret = glusterd_restart_bricks (conf);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_INFO, 0,
+ GD_MSG_SERVER_QUORUM_NOT_MET,
+ "Restarting bricks failed");
+ goto out;
+ }
+ }
+ }
+
+ ret = 0;
+out:
+ if (import_options)
+ dict_unref (import_options);
+ return ret;
+}
+
+int32_t
+glusterd_compare_friend_data (dict_t *peer_data, int32_t *status,
+ char *hostname)
+{
+ int32_t ret = -1;
+ int32_t count = 0;
+ int i = 1;
+ gf_boolean_t update = _gf_false;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (peer_data);
+ GF_ASSERT (status);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+ ret = glusterd_import_global_opts (peer_data);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GLOBAL_OPT_IMPORT_FAIL, "Importing global "
+ "options failed");
+ goto out;
+ }
+
+ ret = dict_get_int32 (peer_data, "count", &count);
+ if (ret)
+ goto out;
+
+ while (i <= count) {
+ ret = glusterd_compare_friend_volume (peer_data, i, status,
+ hostname);
+ if (ret)
+ goto out;
+
+ if (GLUSTERD_VOL_COMP_RJT == *status) {
+ ret = 0;
+ goto out;
+ }
+ if (GLUSTERD_VOL_COMP_UPDATE_REQ == *status)
+ update = _gf_true;
+
+ i++;
+ }
+
+ if (update) {
+ ret = glusterd_import_friend_volumes (peer_data);
+ if (ret)
+ goto out;
+
+ glusterd_svcs_manager (NULL);
+ }
+
+out:
+ gf_msg_debug (this->name, 0,
+ "Returning with ret: %d, status: %d", ret, *status);
+ return ret;
+}
+
+struct rpc_clnt*
+glusterd_defrag_rpc_get (glusterd_defrag_info_t *defrag)
+{
+ struct rpc_clnt *rpc = NULL;
+
+ if (!defrag)
+ return NULL;
+
+ LOCK (&defrag->lock);
+ {
+ rpc = rpc_clnt_ref (defrag->rpc);
+ }
+ UNLOCK (&defrag->lock);
+ return rpc;
+}
+
+struct rpc_clnt*
+glusterd_defrag_rpc_put (glusterd_defrag_info_t *defrag)
+{
+ struct rpc_clnt *rpc = NULL;
+
+ if (!defrag)
+ return NULL;
+
+ LOCK (&defrag->lock);
+ {
+ rpc = rpc_clnt_unref (defrag->rpc);
+ defrag->rpc = rpc;
+ }
+ UNLOCK (&defrag->lock);
+ return rpc;
+}
+
+struct rpc_clnt*
+glusterd_pending_node_get_rpc (glusterd_pending_node_t *pending_node)
+{
+ struct rpc_clnt *rpc = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_svc_t *svc = NULL;
+
+ GF_VALIDATE_OR_GOTO (THIS->name, pending_node, out);
+ GF_VALIDATE_OR_GOTO (THIS->name, pending_node->node, out);
+
+ if (pending_node->type == GD_NODE_BRICK) {
+ brickinfo = pending_node->node;
+ rpc = brickinfo->rpc;
+
+ } else if (pending_node->type == GD_NODE_SHD ||
+ pending_node->type == GD_NODE_NFS ||
+ pending_node->type == GD_NODE_QUOTAD ||
+ pending_node->type == GD_NODE_SCRUB) {
+ svc = pending_node->node;
+ rpc = svc->conn.rpc;
+ } else if (pending_node->type == GD_NODE_REBALANCE) {
+ volinfo = pending_node->node;
+ rpc = glusterd_defrag_rpc_get (volinfo->rebal.defrag);
+
+ } else if (pending_node->type == GD_NODE_SNAPD) {
+ volinfo = pending_node->node;
+ rpc = volinfo->snapd.svc.conn.rpc;
+ } else {
+ GF_ASSERT (0);
+ }
+
+out:
+ return rpc;
+}
+
+void
+glusterd_pending_node_put_rpc (glusterd_pending_node_t *pending_node)
+{
+ glusterd_volinfo_t *volinfo = NULL;
+
+ switch (pending_node->type) {
+ case GD_NODE_REBALANCE:
+ volinfo = pending_node->node;
+ glusterd_defrag_rpc_put (volinfo->rebal.defrag);
+ break;
+
+ default:
+ break;
+ }
+
+}
+
+int32_t
+glusterd_unlink_file (char *sockfpath)
+{
+ int ret = 0;
+
+ ret = sys_unlink (sockfpath);
+ if (ret) {
+ if (ENOENT == errno)
+ ret = 0;
+ else
+ gf_msg (THIS->name, GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED, "Failed to remove %s"
+ " error: %s", sockfpath, strerror (errno));
+ }
+
+ return ret;
+}
+
+void
+glusterd_nfs_pmap_deregister ()
+{
+ if (pmap_unset (MOUNT_PROGRAM, MOUNTV3_VERSION))
+ gf_msg ("glusterd", GF_LOG_INFO, 0,
+ GD_MSG_DEREGISTER_SUCCESS,
+ "De-registered MOUNTV3 successfully");
+ else
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_PMAP_UNSET_FAIL,
+ "De-register MOUNTV3 is unsuccessful");
+
+ if (pmap_unset (MOUNT_PROGRAM, MOUNTV1_VERSION))
+ gf_msg ("glusterd", GF_LOG_INFO, 0,
+ GD_MSG_DEREGISTER_SUCCESS,
+ "De-registered MOUNTV1 successfully");
+ else
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_PMAP_UNSET_FAIL,
+ "De-register MOUNTV1 is unsuccessful");
+
+ if (pmap_unset (NFS_PROGRAM, NFSV3_VERSION))
+ gf_msg ("glusterd", GF_LOG_INFO, 0,
+ GD_MSG_DEREGISTER_SUCCESS,
+ "De-registered NFSV3 successfully");
+ else
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_PMAP_UNSET_FAIL,
+ "De-register NFSV3 is unsuccessful");
+
+ if (pmap_unset (NLM_PROGRAM, NLMV4_VERSION))
+ gf_msg ("glusterd", GF_LOG_INFO, 0,
+ GD_MSG_DEREGISTER_SUCCESS,
+ "De-registered NLM v4 successfully");
+ else
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_PMAP_UNSET_FAIL,
+ "De-registration of NLM v4 failed");
+
+ if (pmap_unset (NLM_PROGRAM, NLMV1_VERSION))
+ gf_msg ("glusterd", GF_LOG_INFO, 0,
+ GD_MSG_DEREGISTER_SUCCESS,
+ "De-registered NLM v1 successfully");
+ else
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_PMAP_UNSET_FAIL,
+ "De-registration of NLM v1 failed");
+
+ if (pmap_unset (ACL_PROGRAM, ACLV3_VERSION))
+ gf_msg ("glusterd", GF_LOG_INFO, 0,
+ GD_MSG_DEREGISTER_SUCCESS,
+ "De-registered ACL v3 successfully");
+ else
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_PMAP_UNSET_FAIL,
+ "De-registration of ACL v3 failed");
+}
+
+int
+glusterd_add_node_to_dict (char *server, dict_t *dict, int count,
+ dict_t *vol_opts)
+{
+ int ret = -1;
+ char pidfile[PATH_MAX] = {0,};
+ gf_boolean_t running = _gf_false;
+ int pid = -1;
+ int port = 0;
+ glusterd_svc_t *svc = NULL;
+ char key[1024] = {0,};
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ glusterd_svc_build_pidfile_path (server, priv->workdir, pidfile,
+ sizeof (pidfile));
+
+ if (strcmp(server, priv->shd_svc.name) == 0)
+ svc = &(priv->shd_svc);
+ else if (strcmp(server, priv->nfs_svc.name) == 0)
+ svc = &(priv->nfs_svc);
+ else if (strcmp(server, priv->quotad_svc.name) == 0)
+ svc = &(priv->quotad_svc);
+ else if (strcmp(server, priv->bitd_svc.name) == 0)
+ svc = &(priv->bitd_svc);
+ else if (strcmp(server, priv->scrub_svc.name) == 0)
+ svc = &(priv->scrub_svc);
+
+ //Consider service to be running only when glusterd sees it Online
+ if (svc->online)
+ running = gf_is_service_running (pidfile, &pid);
+
+ /* For nfs-servers/self-heal-daemon setting
+ * brick<n>.hostname = "NFS Server" / "Self-heal Daemon"
+ * brick<n>.path = uuid
+ * brick<n>.port = 0
+ *
+ * This might be confusing, but cli displays the name of
+ * the brick as hostname+path, so this will make more sense
+ * when output.
+ */
+ snprintf (key, sizeof (key), "brick%d.hostname", count);
+ if (!strcmp (server, priv->nfs_svc.name))
+ ret = dict_set_str (dict, key, "NFS Server");
+ else if (!strcmp (server, priv->shd_svc.name))
+ ret = dict_set_str (dict, key, "Self-heal Daemon");
+ else if (!strcmp (server, priv->quotad_svc.name))
+ ret = dict_set_str (dict, key, "Quota Daemon");
+ else if (!strcmp (server, priv->bitd_svc.name))
+ ret = dict_set_str (dict, key, "Bitrot Daemon");
+ else if (!strcmp (server, priv->scrub_svc.name))
+ ret = dict_set_str (dict, key, "Scrubber Daemon");
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.path", count);
+ ret = dict_set_dynstr (dict, key, gf_strdup (uuid_utoa (MY_UUID)));
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.port", count);
+ /* Port is available only for the NFS server.
+ * Self-heal daemon doesn't provide any port for access
+ * by entities other than gluster.
+ */
+ if (!strcmp (server, priv->nfs_svc.name)) {
+ if (dict_get (vol_opts, "nfs.port")) {
+ ret = dict_get_int32 (vol_opts, "nfs.port", &port);
+ if (ret)
+ goto out;
+ } else
+ port = GF_NFS3_PORT;
+ }
+ ret = dict_set_int32 (dict, key, port);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.pid", count);
+ ret = dict_set_int32 (dict, key, pid);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.status", count);
+ ret = dict_set_int32 (dict, key, running);
+ if (ret)
+ goto out;
+
+
+out:
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_remote_hostname_get (rpcsvc_request_t *req, char *remote_host, int len)
+{
+ GF_ASSERT (req);
+ GF_ASSERT (remote_host);
+ GF_ASSERT (req->trans);
+
+ char *name = NULL;
+ char *hostname = NULL;
+ char *tmp_host = NULL;
+ char *canon = NULL;
+ int ret = 0;
+
+ name = req->trans->peerinfo.identifier;
+ tmp_host = gf_strdup (name);
+ if (tmp_host)
+ get_host_name (tmp_host, &hostname);
+
+ GF_ASSERT (hostname);
+ if (!hostname) {
+ memset (remote_host, 0, len);
+ ret = -1;
+ goto out;
+ }
+
+ if ((gf_get_hostname_from_ip(hostname,&canon) == 0) && canon) {
+ GF_FREE(tmp_host);
+ tmp_host = hostname = canon;
+ }
+
+ strncpy (remote_host, hostname, strlen (hostname));
+
+
+out:
+ GF_FREE (tmp_host);
+ return ret;
+}
+
+gf_boolean_t
+glusterd_are_all_volumes_stopped ()
+{
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *voliter = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ cds_list_for_each_entry (voliter, &priv->volumes, vol_list) {
+ if (voliter->status == GLUSTERD_STATUS_STARTED)
+ return _gf_false;
+ }
+
+ return _gf_true;
+
+}
+
+gf_boolean_t
+glusterd_all_shd_compatible_volumes_stopped ()
+{
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *voliter = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ cds_list_for_each_entry (voliter, &priv->volumes, vol_list) {
+ if (!glusterd_is_shd_compatible_volume (voliter))
+ continue;
+ if (voliter->status == GLUSTERD_STATUS_STARTED)
+ return _gf_false;
+ }
+
+ return _gf_true;
+}
+
+gf_boolean_t
+glusterd_all_volumes_with_quota_stopped ()
+{
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *voliter = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ cds_list_for_each_entry (voliter, &priv->volumes, vol_list) {
+ if (!glusterd_is_volume_quota_enabled (voliter))
+ continue;
+ if (voliter->status == GLUSTERD_STATUS_STARTED)
+ return _gf_false;
+ }
+
+ return _gf_true;
+}
+
+gf_boolean_t
+glusterd_have_volumes ()
+{
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ gf_boolean_t volumes_exist = _gf_false;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("glusterd", (this != NULL), out);
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, (priv != NULL), out);
+
+
+ volumes_exist = !cds_list_empty (&priv->volumes);
+out:
+ return volumes_exist;
+}
+
+int
+glusterd_volume_count_get (void)
+{
+ glusterd_volinfo_t *tmp_volinfo = NULL;
+ int32_t ret = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+
+ cds_list_for_each_entry (tmp_volinfo, &priv->volumes, vol_list) {
+ ret++;
+ }
+
+
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+
+}
+
+int
+glusterd_brickinfo_get (uuid_t uuid, char *hostname, char *path,
+ glusterd_brickinfo_t **brickinfo)
+{
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ int ret = -1;
+
+ GF_ASSERT (path);
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+
+ cds_list_for_each_entry (volinfo, &priv->volumes, vol_list) {
+
+ ret = glusterd_volume_brickinfo_get (uuid, hostname, path,
+ volinfo, brickinfo);
+ if (ret == 0)
+ /*Found*/
+ goto out;
+ }
+out:
+ return ret;
+}
+
+int
+glusterd_brick_start (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ gf_boolean_t wait)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ if ((!brickinfo) || (!volinfo))
+ goto out;
+
+ if (gf_uuid_is_null (brickinfo->uuid)) {
+ ret = glusterd_resolve_brick (brickinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RESOLVE_BRICK_FAIL,
+ FMTSTR_RESOLVE_BRICK,
+ brickinfo->hostname, brickinfo->path);
+ goto out;
+ }
+ }
+
+ if (gf_uuid_compare (brickinfo->uuid, MY_UUID)) {
+ ret = 0;
+ goto out;
+ }
+ ret = glusterd_volume_start_glusterfs (volinfo, brickinfo, wait);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_DISCONNECTED,
+ "Unable to start brick %s:%s",
+ brickinfo->hostname, brickinfo->path);
+ goto out;
+ }
+
+out:
+ gf_msg_debug (this->name, 0, "returning %d ", ret);
+ return ret;
+}
+
+int
+glusterd_restart_bricks (glusterd_conf_t *conf)
+{
+ int ret = 0;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_snap_t *snap = NULL;
+ gf_boolean_t start_svcs = _gf_false;
+ xlator_t *this = NULL;
+ int active_count = 0;
+ int quorum_count = 0;
+ gf_boolean_t node_quorum = _gf_false;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("glusterd", this, out);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
+
+ ret = glusterd_get_quorum_cluster_counts (this, &active_count,
+ &quorum_count);
+ if (ret)
+ goto out;
+
+ if (does_quorum_meet (active_count, quorum_count))
+ node_quorum = _gf_true;
+
+ cds_list_for_each_entry (volinfo, &conf->volumes, vol_list) {
+ if (volinfo->status != GLUSTERD_STATUS_STARTED)
+ continue;
+ if (start_svcs == _gf_false) {
+ start_svcs = _gf_true;
+ glusterd_svcs_manager (NULL);
+ }
+ gf_msg_debug (this->name, 0, "starting the volume %s",
+ volinfo->volname);
+
+ /* Check the quorum, if quorum is not met, don't start the
+ bricks. Stop bricks in case they are running.
+ */
+ ret = check_quorum_for_brick_start (volinfo, node_quorum);
+ if (ret == 0) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_SERVER_QUORUM_NOT_MET, "Skipping brick "
+ "restart for volume %s as quorum is not met",
+ volinfo->volname);
+ (void) glusterd_stop_bricks (volinfo);
+ continue;
+ } else if (ret == 2 && conf->restart_done == _gf_true) {
+ /* If glusterd has been restarted and quorum is not
+ * applicable then do not restart the bricks as this
+ * might start bricks brought down purposely, say for
+ * maintenance
+ */
+ continue;
+ } else {
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks,
+ brick_list) {
+ glusterd_brick_start (volinfo, brickinfo,
+ _gf_false);
+ }
+ conf->restart_done = _gf_true;
+ }
+ }
+
+ cds_list_for_each_entry (snap, &conf->snapshots, snap_list) {
+ cds_list_for_each_entry (volinfo, &snap->volumes, vol_list) {
+ if (volinfo->status != GLUSTERD_STATUS_STARTED)
+ continue;
+ /* Check the quorum, if quorum is not met, don't start the
+ bricks
+ */
+ ret = check_quorum_for_brick_start (volinfo,
+ node_quorum);
+ if (ret == 0) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_SERVER_QUORUM_NOT_MET, "Skipping"
+ " brick restart for volume %s as "
+ "quorum is not met", volinfo->volname);
+ continue;
+ }
+ if (start_svcs == _gf_false) {
+ start_svcs = _gf_true;
+ glusterd_svcs_manager (volinfo);
+ }
+ start_svcs = _gf_true;
+ gf_msg_debug (this->name, 0, "starting the snap "
+ "volume %s", volinfo->volname);
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks,
+ brick_list) {
+ glusterd_brick_start (volinfo, brickinfo,
+ _gf_false);
+ }
+ }
+ }
+
+out:
+ return ret;
+}
+
+int
+_local_gsyncd_start (dict_t *this, char *key, data_t *value, void *data)
+{
+ char *path_list = NULL;
+ char *slave = NULL;
+ char *slave_url = NULL;
+ char *slave_vol = NULL;
+ char *slave_host = NULL;
+ char *statefile = NULL;
+ char buf[1024] = "faulty";
+ int uuid_len = 0;
+ int ret = 0;
+ int op_ret = 0;
+ int ret_status = 0;
+ char uuid_str[64] = {0};
+ glusterd_volinfo_t *volinfo = NULL;
+ char confpath[PATH_MAX] = "";
+ char *op_errstr = NULL;
+ glusterd_conf_t *priv = NULL;
+ gf_boolean_t is_template_in_use = _gf_false;
+ gf_boolean_t is_paused = _gf_false;
+ char *key1 = NULL;
+ xlator_t *this1 = NULL;
+
+ this1 = THIS;
+ GF_ASSERT (this1);
+ priv = this1->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (data);
+
+ volinfo = data;
+ slave = strchr(value->data, ':');
+ if (slave)
+ slave ++;
+ else
+ return 0;
+ uuid_len = (slave - value->data - 1);
+
+ strncpy (uuid_str, (char*)value->data, uuid_len);
+
+ /* Getting Local Brickpaths */
+ ret = glusterd_get_local_brickpaths (volinfo, &path_list);
+
+ /*Generating the conf file path needed by gsyncd */
+ ret = glusterd_get_slave_info (slave, &slave_url, &slave_host,
+ &slave_vol, &op_errstr);
+ if (ret) {
+ gf_msg (this1->name, GF_LOG_ERROR, 0,
+ GD_MSG_SLAVEINFO_FETCH_ERROR,
+ "Unable to fetch slave details.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = snprintf (confpath, sizeof(confpath) - 1,
+ "%s/"GEOREP"/%s_%s_%s/gsyncd.conf",
+ priv->workdir, volinfo->volname,
+ slave_host, slave_vol);
+ confpath[ret] = '\0';
+
+ /* Fetching the last status of the node */
+ ret = glusterd_get_statefile_name (volinfo, slave,
+ confpath, &statefile,
+ &is_template_in_use);
+ if (ret) {
+ if (!strstr(slave, "::"))
+ gf_msg (this1->name, GF_LOG_INFO, 0,
+ GD_MSG_SLAVE_URL_INVALID,
+ "%s is not a valid slave url.", slave);
+ else
+ gf_msg (this1->name, GF_LOG_INFO, 0,
+ GD_MSG_GET_STATEFILE_NAME_FAILED,
+ "Unable to get"
+ " statefile's name");
+ goto out;
+ }
+
+ /* If state-file entry is missing from the config file,
+ * do not start gsyncd on restart */
+ if (is_template_in_use) {
+ gf_msg (this1->name, GF_LOG_INFO, 0,
+ GD_MSG_NO_STATEFILE_ENTRY,
+ "state-file entry is missing in config file."
+ "Not Restarting");
+ goto out;
+ }
+
+ is_template_in_use = _gf_false;
+
+ ret = gsync_status (volinfo->volname, slave, confpath,
+ &ret_status, &is_template_in_use);
+ if (ret == -1) {
+ gf_msg (this1->name, GF_LOG_INFO, 0,
+ GD_MSG_GSYNC_VALIDATION_FAIL,
+ GEOREP" start option validation failed ");
+ ret = 0;
+ goto out;
+ }
+
+ if (is_template_in_use == _gf_true) {
+ gf_msg (this1->name, GF_LOG_INFO, 0,
+ GD_MSG_PIDFILE_NOT_FOUND,
+ "pid-file entry is missing in config file."
+ "Not Restarting");
+ ret = 0;
+ goto out;
+ }
+
+ ret = glusterd_gsync_read_frm_status (statefile, buf, sizeof (buf));
+ if (ret <= 0) {
+ gf_msg (this1->name, GF_LOG_ERROR, 0,
+ GD_MSG_STAT_FILE_READ_FAILED,
+ "Unable to read the status");
+ goto out;
+ }
+
+ /* Move the pointer two characters ahead to surpass '//' */
+ if ((key1 = strchr (slave, '/')))
+ key1 = key1 + 2;
+
+ /* Looks for the last status, to find if the session was running
+ * when the node went down. If the session was just created or
+ * stopped, do not restart the geo-rep session */
+ if ((!strcmp (buf, "Created")) ||
+ (!strcmp (buf, "Stopped"))) {
+ gf_msg (this1->name, GF_LOG_INFO, 0,
+ GD_MSG_GEO_REP_START_FAILED,
+ "Geo-Rep Session was not started between "
+ "%s and %s::%s. Not Restarting", volinfo->volname,
+ slave_url, slave_vol);
+ goto out;
+ } else if (strstr(buf, "Paused")) {
+ is_paused = _gf_true;
+ } else if ((!strcmp (buf, "Config Corrupted"))) {
+ gf_msg (this1->name, GF_LOG_INFO, 0,
+ GD_MSG_RECOVERING_CORRUPT_CONF,
+ "Recovering from a corrupted config. "
+ "Not Restarting. Use start (force) to "
+ "start the session between %s and %s::%s.",
+ volinfo->volname,
+ slave_url, slave_vol);
+ goto out;
+ }
+
+ if (is_paused) {
+ glusterd_start_gsync (volinfo, slave, path_list, confpath,
+ uuid_str, NULL, _gf_true);
+ }
+ else {
+ /* Add slave to the dict indicating geo-rep session is running*/
+ ret = dict_set_dynstr_with_alloc (volinfo->gsync_active_slaves,
+ key1, "running");
+ if (ret) {
+ gf_msg (this1->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to set key:%s"
+ " value:running in the dict", key1);
+ goto out;
+ }
+ ret = glusterd_start_gsync (volinfo, slave, path_list, confpath,
+ uuid_str, NULL, _gf_false);
+ if (ret)
+ dict_del (volinfo->gsync_active_slaves, key1);
+ }
+
+out:
+ if (statefile)
+ GF_FREE (statefile);
+
+ if (is_template_in_use) {
+ op_ret = glusterd_create_status_file (volinfo->volname, slave,
+ slave_host, slave_vol,
+ "Config Corrupted");
+ if (op_ret) {
+ gf_msg (this1->name, GF_LOG_ERROR, 0,
+ GD_MSG_STATUSFILE_CREATE_FAILED,
+ "Unable to create status file"
+ ". Error : %s", strerror (errno));
+ ret = op_ret;
+ }
+ }
+
+ GF_FREE (path_list);
+ GF_FREE (op_errstr);
+
+ return ret;
+}
+
+int
+glusterd_volume_restart_gsyncds (glusterd_volinfo_t *volinfo)
+{
+ GF_ASSERT (volinfo);
+
+ dict_foreach (volinfo->gsync_slaves, _local_gsyncd_start, volinfo);
+ return 0;
+}
+
+int
+glusterd_restart_gsyncds (glusterd_conf_t *conf)
+{
+ glusterd_volinfo_t *volinfo = NULL;
+ int ret = 0;
+
+ cds_list_for_each_entry (volinfo, &conf->volumes, vol_list) {
+ glusterd_volume_restart_gsyncds (volinfo);
+ }
+ return ret;
+}
+
+int
+glusterd_calc_dist_leaf_count (int rcount, int scount)
+{
+ return (rcount ? rcount : 1) * (scount ? scount : 1);
+}
+
+int
+glusterd_get_dist_leaf_count (glusterd_volinfo_t *volinfo)
+{
+ int rcount = volinfo->replica_count;
+ int scount = volinfo->stripe_count;
+
+ if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE)
+ return volinfo->disperse_count;
+
+ return glusterd_calc_dist_leaf_count (rcount, scount);
+}
+
+int
+glusterd_get_brickinfo (xlator_t *this, const char *brickname, int port,
+ glusterd_brickinfo_t **brickinfo)
+{
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *tmpbrkinfo = NULL;
+ int ret = -1;
+
+ GF_ASSERT (brickname);
+ GF_ASSERT (this);
+
+ priv = this->private;
+ cds_list_for_each_entry (volinfo, &priv->volumes, vol_list) {
+ cds_list_for_each_entry (tmpbrkinfo, &volinfo->bricks,
+ brick_list) {
+ if (gf_uuid_compare (tmpbrkinfo->uuid, MY_UUID))
+ continue;
+ if (!strcmp(tmpbrkinfo->path, brickname) &&
+ (tmpbrkinfo->port == port)) {
+ *brickinfo = tmpbrkinfo;
+ return 0;
+ }
+ }
+ }
+ return ret;
+}
+
+glusterd_brickinfo_t*
+glusterd_get_brickinfo_by_position (glusterd_volinfo_t *volinfo, uint32_t pos)
+{
+ glusterd_brickinfo_t *tmpbrkinfo = NULL;
+
+ cds_list_for_each_entry (tmpbrkinfo, &volinfo->bricks, brick_list) {
+ if (pos == 0)
+ return tmpbrkinfo;
+ pos--;
+ }
+ return NULL;
+}
+
+void
+glusterd_set_brick_status (glusterd_brickinfo_t *brickinfo,
+ gf_brick_status_t status)
+{
+ GF_ASSERT (brickinfo);
+ brickinfo->status = status;
+ if (GF_BRICK_STARTED == status) {
+ gf_msg_debug ("glusterd", 0, "Setting brick %s:%s status "
+ "to started", brickinfo->hostname, brickinfo->path);
+ } else {
+ gf_msg_debug ("glusterd", 0, "Setting brick %s:%s status "
+ "to stopped", brickinfo->hostname, brickinfo->path);
+ }
+}
+
+gf_boolean_t
+glusterd_is_brick_started (glusterd_brickinfo_t *brickinfo)
+{
+ GF_ASSERT (brickinfo);
+ return (brickinfo->status == GF_BRICK_STARTED);
+}
+
+int
+glusterd_friend_brick_belongs (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo, void* uuid)
+{
+ int ret = -1;
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (brickinfo);
+ GF_ASSERT (uuid);
+
+ if (gf_uuid_is_null (brickinfo->uuid)) {
+ ret = glusterd_resolve_brick (brickinfo);
+ if (ret) {
+ GF_ASSERT (0);
+ goto out;
+ }
+ }
+ if (!gf_uuid_compare (brickinfo->uuid, *((uuid_t *)uuid)))
+ return 0;
+out:
+ return -1;
+}
+
+int
+glusterd_get_brick_root (char *path, char **mount_point)
+{
+ char *ptr = NULL;
+ char *mnt_pt = NULL;
+ struct stat brickstat = {0};
+ struct stat buf = {0};
+
+ if (!path)
+ goto err;
+ mnt_pt = gf_strdup (path);
+ if (!mnt_pt)
+ goto err;
+ if (sys_stat (mnt_pt, &brickstat))
+ goto err;
+
+ while ((ptr = strrchr (mnt_pt, '/')) &&
+ ptr != mnt_pt) {
+
+ *ptr = '\0';
+ if (sys_stat (mnt_pt, &buf)) {
+ gf_msg (THIS->name, GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED, "error in "
+ "stat: %s", strerror (errno));
+ goto err;
+ }
+
+ if (brickstat.st_dev != buf.st_dev) {
+ *ptr = '/';
+ break;
+ }
+ }
+
+ if (ptr == mnt_pt) {
+ if (sys_stat ("/", &buf)) {
+ gf_msg (THIS->name, GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED, "error in "
+ "stat: %s", strerror (errno));
+ goto err;
+ }
+ if (brickstat.st_dev == buf.st_dev)
+ strcpy (mnt_pt, "/");
+ }
+
+ *mount_point = mnt_pt;
+ return 0;
+
+ err:
+ GF_FREE (mnt_pt);
+ return -1;
+}
+
+static char*
+glusterd_parse_inode_size (char *stream, char *pattern)
+{
+ char *needle = NULL;
+ char *trail = NULL;
+
+ needle = strstr (stream, pattern);
+ if (!needle)
+ goto out;
+
+ needle = nwstrtail (needle, pattern);
+
+ trail = needle;
+ while (trail && isdigit (*trail)) trail++;
+ if (trail)
+ *trail = '\0';
+
+out:
+ return needle;
+}
+
+static struct fs_info {
+ char *fs_type_name;
+ char *fs_tool_name;
+ char *fs_tool_arg;
+ char *fs_tool_pattern;
+ char *fs_tool_pkg;
+} glusterd_fs[] = {
+ /* some linux have these in /usr/sbin/and others in /sbin/? */
+ { "xfs", "xfs_info", NULL, "isize=", "xfsprogs" },
+ { "ext3", "tune2fs", "-l", "Inode size:", "e2fsprogs" },
+ { "ext4", "tune2fs", "-l", "Inode size:", "e2fsprogs" },
+ { "btrfs", NULL, NULL, NULL, NULL },
+ { NULL, NULL, NULL, NULL, NULL}
+};
+
+static int
+glusterd_add_inode_size_to_dict (dict_t *dict, int count)
+{
+ int ret = -1;
+ char key[1024] = {0};
+ char buffer[4096] = {0};
+ char *inode_size = NULL;
+ char *device = NULL;
+ char *fs_name = NULL;
+ char *cur_word = NULL;
+ char *trail = NULL;
+ runner_t runner = {0, };
+ struct fs_info *fs = NULL;
+ char fs_tool_name[256] = {0, };
+ static dict_t *cached_fs = NULL;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.device", count);
+ ret = dict_get_str (dict, key, &device);
+ if (ret)
+ goto out;
+
+ if (cached_fs) {
+ if (dict_get_str (cached_fs, device, &cur_word) == 0) {
+ goto cached;
+ }
+ } else {
+ cached_fs = dict_new ();
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.fs_name", count);
+ ret = dict_get_str (dict, key, &fs_name);
+ if (ret)
+ goto out;
+
+ runinit (&runner);
+ runner_redir (&runner, STDOUT_FILENO, RUN_PIPE);
+
+ for (fs = glusterd_fs ; fs->fs_type_name; fs++) {
+ if (strcmp (fs_name, fs->fs_type_name) == 0) {
+ snprintf (fs_tool_name, sizeof (fs_tool_name),
+ "/usr/sbin/%s", fs->fs_tool_name);
+ if (sys_access (fs_tool_name, R_OK|X_OK) == 0)
+ runner_add_arg (&runner, fs_tool_name);
+ else {
+ snprintf (fs_tool_name, sizeof (fs_tool_name),
+ "/sbin/%s", fs->fs_tool_name);
+ if (sys_access (fs_tool_name, R_OK|X_OK) == 0)
+ runner_add_arg (&runner, fs_tool_name);
+ }
+ break;
+ }
+ }
+
+ if (runner.argv[0]) {
+ if (fs->fs_tool_arg)
+ runner_add_arg (&runner, fs->fs_tool_arg);
+ runner_add_arg (&runner, device);
+ } else {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_INODE_SIZE_GET_FAIL,
+ "could not find %s to get"
+ "inode size for %s (%s): %s package missing?",
+ fs->fs_tool_name, device, fs_name, fs->fs_tool_pkg);
+ goto out;
+ }
+
+ ret = runner_start (&runner);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, errno,
+ GD_MSG_CMD_EXEC_FAIL, "failed to execute "
+ "\"%s\"", fs->fs_tool_name);
+ /*
+ * Runner_start might return an error after the child has
+ * been forked, e.g. if the program isn't there. In that
+ * case, we still need to call runner_end to reap the
+ * child and free resources. Fortunately, that seems to
+ * be harmless for other kinds of failures.
+ */
+ (void) runner_end (&runner);
+ goto out;
+ }
+
+ for (;;) {
+ if (fgets (buffer, sizeof (buffer),
+ runner_chio (&runner, STDOUT_FILENO)) == NULL)
+ break;
+ trail = strrchr (buffer, '\n');
+ if (trail)
+ *trail = '\0';
+
+ cur_word =
+ glusterd_parse_inode_size (buffer, fs->fs_tool_pattern);
+
+ if (cur_word)
+ break;
+ }
+
+ ret = runner_end (&runner);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, errno,
+ GD_MSG_CMD_EXEC_FAIL,
+ "%s exited with non-zero exit status",
+ fs->fs_tool_name);
+
+ goto out;
+ }
+ if (!cur_word) {
+ ret = -1;
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_INODE_SIZE_GET_FAIL,
+ "Unable to retrieve inode size using %s",
+ fs->fs_tool_name);
+ goto out;
+ }
+
+ if (dict_set_dynstr_with_alloc (cached_fs, device, cur_word)) {
+ /* not fatal if not entered into the cache */
+ gf_msg_debug (THIS->name, 0,
+ "failed to cache fs inode size for %s", device);
+ }
+
+cached:
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.inode_size", count);
+
+ ret = dict_set_dynstr_with_alloc (dict, key, cur_word);
+
+out:
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_INODE_SIZE_GET_FAIL, "failed to get inode size");
+ return ret;
+}
+
+struct mntent *
+glusterd_get_mnt_entry_info (char *mnt_pt, char *buff, int buflen,
+ struct mntent *entry_ptr)
+{
+ struct mntent *entry = NULL;
+ FILE *mtab = NULL;
+
+ GF_ASSERT (mnt_pt);
+ GF_ASSERT (buff);
+ GF_ASSERT (entry_ptr);
+
+ mtab = setmntent (_PATH_MOUNTED, "r");
+ if (!mtab)
+ goto out;
+
+ entry = getmntent_r (mtab, entry_ptr, buff, buflen);
+
+ while (1) {
+ if (!entry)
+ goto out;
+
+ if (!strcmp (entry->mnt_dir, mnt_pt) &&
+ strcmp (entry->mnt_type, "rootfs"))
+ break;
+ entry = getmntent_r (mtab, entry_ptr, buff, buflen);
+ }
+
+out:
+ if (NULL != mtab) {
+ endmntent (mtab);
+ }
+ return entry;
+}
+
+static int
+glusterd_add_brick_mount_details (glusterd_brickinfo_t *brickinfo,
+ dict_t *dict, int count)
+{
+ int ret = -1;
+ char key[1024] = {0};
+ char buff [PATH_MAX] = {0};
+ char base_key[1024] = {0};
+ struct mntent save_entry = {0};
+ char *mnt_pt = NULL;
+ struct mntent *entry = NULL;
+
+ snprintf (base_key, sizeof (base_key), "brick%d", count);
+
+ ret = glusterd_get_brick_root (brickinfo->path, &mnt_pt);
+ if (ret)
+ goto out;
+
+ entry = glusterd_get_mnt_entry_info (mnt_pt, buff, sizeof (buff),
+ &save_entry);
+ if (!entry) {
+ ret = -1;
+ goto out;
+ }
+
+ /* get device file */
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.device", base_key);
+
+ ret = dict_set_dynstr_with_alloc (dict, key, entry->mnt_fsname);
+ if (ret)
+ goto out;
+
+ /* fs type */
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.fs_name", base_key);
+
+ ret = dict_set_dynstr_with_alloc (dict, key, entry->mnt_type);
+ if (ret)
+ goto out;
+
+ /* mount options */
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.mnt_options", base_key);
+
+ ret = dict_set_dynstr_with_alloc (dict, key, entry->mnt_opts);
+
+ out:
+ GF_FREE (mnt_pt);
+
+ return ret;
+}
+
+char*
+glusterd_get_brick_mount_device (char *brick_path)
+{
+ int ret = -1;
+ char *mnt_pt = NULL;
+ char *device = NULL;
+ char buff [PATH_MAX] = "";
+ struct mntent *entry = NULL;
+ struct mntent save_entry = {0,};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (brick_path);
+
+ ret = glusterd_get_brick_root (brick_path, &mnt_pt);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICKPATH_ROOT_GET_FAIL,
+ "Failed to get mount point "
+ "for %s brick", brick_path);
+ goto out;
+ }
+
+ entry = glusterd_get_mnt_entry_info (mnt_pt, buff, sizeof (buff),
+ &save_entry);
+ if (NULL == entry) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MNTENTRY_GET_FAIL,
+ "Failed to get mnt entry "
+ "for %s mount path", mnt_pt);
+ goto out;
+ }
+
+ /* get the fs_name/device */
+ device = gf_strdup (entry->mnt_fsname);
+
+out:
+ return device;
+}
+
+int
+glusterd_add_brick_detail_to_dict (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ dict_t *dict, int count)
+{
+ int ret = -1;
+ uint64_t memtotal = 0;
+ uint64_t memfree = 0;
+ uint64_t inodes_total = 0;
+ uint64_t inodes_free = 0;
+ uint64_t block_size = 0;
+ char key[1024] = {0};
+ char base_key[1024] = {0};
+ struct statvfs brickstat = {0};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (volinfo);
+ GF_ASSERT (brickinfo);
+ GF_ASSERT (dict);
+
+ snprintf (base_key, sizeof (base_key), "brick%d", count);
+
+ ret = sys_statvfs (brickinfo->path, &brickstat);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED, "statfs error: %s ",
+ strerror (errno));
+ goto out;
+ }
+
+ /* file system block size */
+ block_size = brickstat.f_bsize;
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.block_size", base_key);
+ ret = dict_set_uint64 (dict, key, block_size);
+ if (ret)
+ goto out;
+
+ /* free space in brick */
+ memfree = brickstat.f_bfree * brickstat.f_bsize;
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.free", base_key);
+ ret = dict_set_uint64 (dict, key, memfree);
+ if (ret)
+ goto out;
+
+ /* total space of brick */
+ memtotal = brickstat.f_blocks * brickstat.f_bsize;
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.total", base_key);
+ ret = dict_set_uint64 (dict, key, memtotal);
+ if (ret)
+ goto out;
+
+ /* inodes: total and free counts only for ext2/3/4 and xfs */
+ inodes_total = brickstat.f_files;
+ if (inodes_total) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.total_inodes", base_key);
+ ret = dict_set_uint64 (dict, key, inodes_total);
+ if (ret)
+ goto out;
+ }
+
+ inodes_free = brickstat.f_ffree;
+ if (inodes_free) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.free_inodes", base_key);
+ ret = dict_set_uint64 (dict, key, inodes_free);
+ if (ret)
+ goto out;
+ }
+
+ ret = glusterd_add_brick_mount_details (brickinfo, dict, count);
+ if (ret)
+ goto out;
+
+ ret = glusterd_add_inode_size_to_dict (dict, count);
+ out:
+ if (ret)
+ gf_msg_debug (this->name, 0, "Error adding brick"
+ " detail to dict: %s", strerror (errno));
+ return ret;
+}
+
+int32_t
+glusterd_add_brick_to_dict (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ dict_t *dict, int32_t count)
+{
+
+ int ret = -1;
+ int32_t pid = -1;
+ char key[1024] = {0};
+ char base_key[1024] = {0};
+ char pidfile[PATH_MAX] = {0};
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ gf_boolean_t brick_online = _gf_false;
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (brickinfo);
+ GF_ASSERT (dict);
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+
+ snprintf (base_key, sizeof (base_key), "brick%d", count);
+ snprintf (key, sizeof (key), "%s.hostname", base_key);
+
+ ret = dict_set_str (dict, key, brickinfo->hostname);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.path", base_key);
+ ret = dict_set_str (dict, key, brickinfo->path);
+ if (ret)
+ goto out;
+
+ /* add peer uuid */
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.peerid", base_key);
+ ret = dict_set_dynstr_with_alloc (dict, key,
+ uuid_utoa (brickinfo->uuid));
+ if (ret) {
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.port", base_key);
+ ret = dict_set_int32 (dict, key, (volinfo->transport_type ==
+ GF_TRANSPORT_RDMA) ? 0 : brickinfo->port);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.rdma_port", base_key);
+ if (volinfo->transport_type == GF_TRANSPORT_RDMA) {
+ ret = dict_set_int32 (dict, key, brickinfo->port);
+ } else if (volinfo->transport_type == GF_TRANSPORT_BOTH_TCP_RDMA) {
+ ret = dict_set_int32 (dict, key, brickinfo->rdma_port);
+ } else
+ ret = dict_set_int32 (dict, key, 0);
+
+ if (ret)
+ goto out;
+
+
+ GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, priv);
+
+ if (glusterd_is_brick_started (brickinfo)) {
+ brick_online = gf_is_service_running (pidfile, &pid);
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pid", base_key);
+ ret = dict_set_int32 (dict, key, pid);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.status", base_key);
+ ret = dict_set_int32 (dict, key, brick_online);
+
+out:
+ if (ret)
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+int32_t
+glusterd_get_all_volnames (dict_t *dict)
+{
+ int ret = -1;
+ int32_t vol_count = 0;
+ char key[256] = {0};
+ glusterd_volinfo_t *entry = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ priv = THIS->private;
+ GF_ASSERT (priv);
+
+ cds_list_for_each_entry (entry, &priv->volumes, vol_list) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "vol%d", vol_count);
+ ret = dict_set_str (dict, key, entry->volname);
+ if (ret)
+ goto out;
+
+ vol_count++;
+ }
+
+ ret = dict_set_int32 (dict, "vol_count", vol_count);
+
+ out:
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "failed to get all "
+ "volume names for status");
+ return ret;
+}
+
+int
+glusterd_all_volume_cond_check (glusterd_condition_func func, int status,
+ void *ctx)
+{
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ priv = this->private;
+
+ cds_list_for_each_entry (volinfo, &priv->volumes, vol_list) {
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks,
+ brick_list) {
+ ret = func (volinfo, brickinfo, ctx);
+ if (ret != status) {
+ ret = -1;
+ goto out;
+ }
+ }
+ }
+ ret = 0;
+out:
+ gf_msg_debug ("glusterd", 0, "returning %d", ret);
+ return ret;
+}
+
+
+int
+glusterd_brick_stop (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ gf_boolean_t del_brick)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ if ((!brickinfo) || (!volinfo))
+ goto out;
+
+ if (gf_uuid_is_null (brickinfo->uuid)) {
+ ret = glusterd_resolve_brick (brickinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RESOLVE_BRICK_FAIL, FMTSTR_RESOLVE_BRICK,
+ brickinfo->hostname, brickinfo->path);
+ goto out;
+ }
+ }
+
+ if (gf_uuid_compare (brickinfo->uuid, MY_UUID)) {
+ ret = 0;
+ if (del_brick)
+ glusterd_delete_brick (volinfo, brickinfo);
+ goto out;
+ }
+
+ gf_msg_debug (this->name, 0, "About to stop glusterfs"
+ " for brick %s:%s", brickinfo->hostname,
+ brickinfo->path);
+ ret = glusterd_volume_stop_glusterfs (volinfo, brickinfo, del_brick);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_BRICK_STOP_FAIL, "Unable to stop"
+ " brick: %s:%s", brickinfo->hostname,
+ brickinfo->path);
+ goto out;
+ }
+
+out:
+ gf_msg_debug (this->name, 0, "returning %d ", ret);
+ return ret;
+}
+
+gf_boolean_t
+glusterd_is_tier_daemon_running (glusterd_volinfo_t *volinfo)
+{
+ if (volinfo->type != GF_CLUSTER_TYPE_TIER)
+ return _gf_false;
+
+ if (volinfo->rebal.defrag &&
+ volinfo->rebal.defrag_cmd == GF_DEFRAG_CMD_START_TIER) {
+ return _gf_true;
+ }
+
+ return _gf_false;
+
+}
+
+
+int
+glusterd_is_defrag_on (glusterd_volinfo_t *volinfo)
+{
+ /*
+ * Do not need to consider tier daemon as a rebalance
+ * daemon and with current design rebalance is not supported
+ * on a tiered volume.
+ */
+
+ if (glusterd_is_tier_daemon_running (volinfo))
+ return 0;
+
+ return (volinfo->rebal.defrag != NULL);
+}
+
+int
+glusterd_new_brick_validate (char *brick, glusterd_brickinfo_t *brickinfo,
+ char *op_errstr, size_t len)
+{
+ glusterd_brickinfo_t *newbrickinfo = NULL;
+ int ret = -1;
+ gf_boolean_t is_allocated = _gf_false;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+
+ GF_ASSERT (brick);
+ GF_ASSERT (op_errstr);
+
+ if (!brickinfo) {
+ ret = glusterd_brickinfo_new_from_brick (brick, &newbrickinfo,
+ _gf_true, NULL);
+ if (ret)
+ goto out;
+ is_allocated = _gf_true;
+ } else {
+ newbrickinfo = brickinfo;
+ }
+
+ ret = glusterd_resolve_brick (newbrickinfo);
+ if (ret) {
+ snprintf(op_errstr, len, "Host %s is not in \'Peer "
+ "in Cluster\' state", newbrickinfo->hostname);
+ goto out;
+ }
+
+ if (!gf_uuid_compare (MY_UUID, newbrickinfo->uuid)) {
+ /* brick is local */
+ if (!glusterd_is_brickpath_available (newbrickinfo->uuid,
+ newbrickinfo->path)) {
+ snprintf(op_errstr, len, "Brick: %s not available."
+ " Brick may be containing or be contained "
+ "by an existing brick", brick);
+ ret = -1;
+ goto out;
+ }
+
+ } else {
+ peerinfo = glusterd_peerinfo_find_by_uuid (newbrickinfo->uuid);
+ if (peerinfo == NULL) {
+ ret = -1;
+ snprintf (op_errstr, len, "Failed to find host %s",
+ newbrickinfo->hostname);
+ goto out;
+ }
+
+ if ((!peerinfo->connected)) {
+ snprintf(op_errstr, len, "Host %s not connected",
+ newbrickinfo->hostname);
+ ret = -1;
+ goto out;
+ }
+
+ if (peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) {
+ snprintf(op_errstr, len, "Host %s is not in \'Peer "
+ "in Cluster\' state",
+ newbrickinfo->hostname);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ if (is_allocated)
+ glusterd_brickinfo_delete (newbrickinfo);
+ if (op_errstr[0] != '\0')
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_VALIDATE_FAIL, "%s", op_errstr);
+ gf_msg_debug (this->name, 0, "returning %d ", ret);
+ return ret;
+}
+
+int
+glusterd_rb_check_bricks (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *src, glusterd_brickinfo_t *dst)
+{
+ glusterd_replace_brick_t *rb = NULL;
+
+ GF_ASSERT (volinfo);
+
+ rb = &volinfo->rep_brick;
+
+ if (!rb->src_brick || !rb->dst_brick)
+ return -1;
+
+ if (strcmp (rb->src_brick->hostname, src->hostname) ||
+ strcmp (rb->src_brick->path, src->path)) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_RB_SRC_BRICKS_MISMATCH,
+ "Replace brick src bricks differ");
+ return -1;
+ }
+
+ if (strcmp (rb->dst_brick->hostname, dst->hostname) ||
+ strcmp (rb->dst_brick->path, dst->path)) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_RB_DST_BRICKS_MISMATCH,
+ "Replace brick dst bricks differ");
+ return -1;
+ }
+
+ return 0;
+}
+
+/*path needs to be absolute; works only on gfid, volume-id*/
+static int
+glusterd_is_uuid_present (char *path, char *xattr, gf_boolean_t *present)
+{
+ GF_ASSERT (path);
+ GF_ASSERT (xattr);
+ GF_ASSERT (present);
+
+ int ret = -1;
+ uuid_t uid = {0,};
+
+ if (!path || !xattr || !present)
+ goto out;
+
+ ret = sys_lgetxattr (path, xattr, &uid, 16);
+
+ if (ret >= 0) {
+ *present = _gf_true;
+ ret = 0;
+ goto out;
+ }
+
+ switch (errno) {
+#if defined(ENODATA)
+ case ENODATA: /* FALLTHROUGH */
+#endif
+#if defined(ENOATTR) && (ENOATTR != ENODATA)
+ case ENOATTR: /* FALLTHROUGH */
+#endif
+ case ENOTSUP:
+ *present = _gf_false;
+ ret = 0;
+ break;
+ default:
+ break;
+ }
+out:
+ return ret;
+}
+
+/*path needs to be absolute*/
+static int
+glusterd_is_path_in_use (char *path, gf_boolean_t *in_use, char **op_errstr)
+{
+ int i = 0;
+ int ret = -1;
+ gf_boolean_t used = _gf_false;
+ char dir[PATH_MAX] = {0,};
+ char *curdir = NULL;
+ char msg[2048] = {0};
+ char *keys[3] = {GFID_XATTR_KEY,
+ GF_XATTR_VOL_ID_KEY,
+ NULL};
+
+ GF_ASSERT (path);
+ if (!path)
+ goto out;
+
+ strncpy (dir, path, (sizeof (dir) - 1));
+ curdir = dir;
+ do {
+ for (i = 0; !used && keys[i]; i++) {
+ ret = glusterd_is_uuid_present (curdir, keys[i], &used);
+ if (ret)
+ goto out;
+ }
+
+ if (used)
+ break;
+
+ curdir = dirname (curdir);
+ if (!strcmp (curdir, "."))
+ goto out;
+
+
+ } while (strcmp (curdir, "/"));
+
+ if (!strcmp (curdir, "/")) {
+ for (i = 0; !used && keys[i]; i++) {
+ ret = glusterd_is_uuid_present (curdir, keys[i], &used);
+ if (ret)
+ goto out;
+ }
+ }
+
+ ret = 0;
+ *in_use = used;
+out:
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Failed to get extended "
+ "attribute %s, reason: %s", keys[i],
+ strerror (errno));
+ }
+
+ if (*in_use) {
+ if (!strcmp (path, curdir)) {
+ snprintf (msg, sizeof (msg), "%s is already part of a "
+ "volume", path);
+ } else {
+ snprintf (msg, sizeof (msg), "parent directory %s is "
+ "already part of a volume", curdir);
+ }
+ }
+
+ if (strlen (msg)) {
+ gf_msg (THIS->name, GF_LOG_ERROR, errno,
+ GD_MSG_PATH_ALREADY_PART_OF_VOL, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ }
+
+ return ret;
+}
+
+int
+glusterd_check_and_set_brick_xattr (char *host, char *path, uuid_t uuid,
+ char **op_errstr, gf_boolean_t is_force)
+{
+ int ret = -1;
+ char msg[2048] = {0,};
+ gf_boolean_t in_use = _gf_false;
+ int flags = 0;
+
+ /* Check for xattr support in backend fs */
+ ret = sys_lsetxattr (path, "trusted.glusterfs.test",
+ "working", 8, 0);
+ if (ret == -1) {
+ snprintf (msg, sizeof (msg), "Glusterfs is not"
+ " supported on brick: %s:%s.\nSetting"
+ " extended attributes failed, reason:"
+ " %s.", host, path, strerror(errno));
+ goto out;
+
+ } else {
+ sys_lremovexattr (path, "trusted.glusterfs.test");
+ }
+
+ ret = glusterd_is_path_in_use (path, &in_use, op_errstr);
+ if (ret)
+ goto out;
+
+ if (in_use && !is_force) {
+ ret = -1;
+ goto out;
+ }
+
+
+ if (!is_force)
+ flags = XATTR_CREATE;
+
+ ret = sys_lsetxattr (path, GF_XATTR_VOL_ID_KEY, uuid, 16,
+ flags);
+ if (ret == -1) {
+ snprintf (msg, sizeof (msg), "Failed to set extended "
+ "attributes %s, reason: %s",
+ GF_XATTR_VOL_ID_KEY, strerror (errno));
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (strlen (msg))
+ *op_errstr = gf_strdup (msg);
+
+ return ret;
+}
+
+int
+glusterd_sm_tr_log_transition_add_to_dict (dict_t *dict,
+ glusterd_sm_tr_log_t *log, int i,
+ int count)
+{
+ int ret = -1;
+ char key[512] = {0};
+ char timestr[64] = {0,};
+ char *str = NULL;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (log);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "log%d-old-state", count);
+ str = log->state_name_get (log->transitions[i].old_state);
+ ret = dict_set_str (dict, key, str);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "log%d-event", count);
+ str = log->event_name_get (log->transitions[i].event);
+ ret = dict_set_str (dict, key, str);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "log%d-new-state", count);
+ str = log->state_name_get (log->transitions[i].new_state);
+ ret = dict_set_str (dict, key, str);
+ if (ret)
+ goto out;
+
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "log%d-time", count);
+ gf_time_fmt (timestr, sizeof timestr, log->transitions[i].time,
+ gf_timefmt_FT);
+ ret = dict_set_dynstr_with_alloc (dict, key, timestr);
+ if (ret)
+ goto out;
+
+out:
+ gf_msg_debug ("glusterd", 0, "returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_sm_tr_log_add_to_dict (dict_t *dict,
+ glusterd_sm_tr_log_t *circular_log)
+{
+ int ret = -1;
+ int i = 0;
+ int start = 0;
+ int end = 0;
+ int index = 0;
+ char key[256] = {0};
+ glusterd_sm_tr_log_t *log = NULL;
+ int count = 0;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (circular_log);
+
+ log = circular_log;
+ if (!log->count)
+ return 0;
+
+ if (log->count == log->size)
+ start = log->current + 1;
+
+ end = start + log->count;
+ for (i = start; i < end; i++, count++) {
+ index = i % log->count;
+ ret = glusterd_sm_tr_log_transition_add_to_dict (dict, log, index,
+ count);
+ if (ret)
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "count");
+ ret = dict_set_int32 (dict, key, log->count);
+
+out:
+ gf_msg_debug ("glusterd", 0, "returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_sm_tr_log_init (glusterd_sm_tr_log_t *log,
+ char * (*state_name_get) (int),
+ char * (*event_name_get) (int),
+ size_t size)
+{
+ glusterd_sm_transition_t *transitions = NULL;
+ int ret = -1;
+
+ GF_ASSERT (size > 0);
+ GF_ASSERT (log && state_name_get && event_name_get);
+
+ if (!log || !state_name_get || !event_name_get || (size <= 0))
+ goto out;
+
+ transitions = GF_CALLOC (size, sizeof (*transitions),
+ gf_gld_mt_sm_tr_log_t);
+ if (!transitions)
+ goto out;
+
+ log->transitions = transitions;
+ log->size = size;
+ log->state_name_get = state_name_get;
+ log->event_name_get = event_name_get;
+ ret = 0;
+
+out:
+ gf_msg_debug ("glusterd", 0, "returning %d", ret);
+ return ret;
+}
+
+void
+glusterd_sm_tr_log_delete (glusterd_sm_tr_log_t *log)
+{
+ if (!log)
+ return;
+ GF_FREE (log->transitions);
+ return;
+}
+
+int
+glusterd_sm_tr_log_transition_add (glusterd_sm_tr_log_t *log,
+ int old_state, int new_state,
+ int event)
+{
+ glusterd_sm_transition_t *transitions = NULL;
+ int ret = -1;
+ int next = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (log);
+ if (!log)
+ goto out;
+
+ transitions = log->transitions;
+ if (!transitions)
+ goto out;
+
+ if (log->count)
+ next = (log->current + 1) % log->size;
+ else
+ next = 0;
+
+ transitions[next].old_state = old_state;
+ transitions[next].new_state = new_state;
+ transitions[next].event = event;
+ time (&transitions[next].time);
+ log->current = next;
+ if (log->count < log->size)
+ log->count++;
+ ret = 0;
+ gf_msg_debug (this->name, 0, "Transitioning from '%s' to '%s' "
+ "due to event '%s'", log->state_name_get (old_state),
+ log->state_name_get (new_state), log->event_name_get (event));
+out:
+ gf_msg_debug (this->name, 0, "returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_remove_pending_entry (struct cds_list_head *list, void *elem)
+{
+ glusterd_pending_node_t *pending_node = NULL;
+ glusterd_pending_node_t *tmp = NULL;
+ int ret = 0;
+
+ cds_list_for_each_entry_safe (pending_node, tmp, list, list) {
+ if (elem == pending_node->node) {
+ cds_list_del_init (&pending_node->list);
+ GF_FREE (pending_node);
+ ret = 0;
+ goto out;
+ }
+ }
+out:
+ gf_msg_debug (THIS->name, 0, "returning %d", ret);
+ return ret;
+
+}
+
+int
+glusterd_clear_pending_nodes (struct cds_list_head *list)
+{
+ glusterd_pending_node_t *pending_node = NULL;
+ glusterd_pending_node_t *tmp = NULL;
+
+ cds_list_for_each_entry_safe (pending_node, tmp, list, list) {
+ cds_list_del_init (&pending_node->list);
+ GF_FREE (pending_node);
+ }
+
+ return 0;
+}
+
+int32_t
+glusterd_delete_volume (glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ GF_ASSERT (volinfo);
+
+ ret = glusterd_store_delete_volume (volinfo);
+
+ if (ret)
+ goto out;
+
+ glusterd_volinfo_remove (volinfo);
+out:
+ gf_msg_debug (THIS->name, 0, "returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_delete_brick (glusterd_volinfo_t* volinfo,
+ glusterd_brickinfo_t *brickinfo)
+{
+ int ret = 0;
+ char voldir[PATH_MAX] = {0,};
+ glusterd_conf_t *priv = THIS->private;
+ GF_ASSERT (volinfo);
+ GF_ASSERT (brickinfo);
+
+ GLUSTERD_GET_VOLUME_DIR(voldir, volinfo, priv);
+
+ glusterd_delete_volfile (volinfo, brickinfo);
+ glusterd_store_delete_brick (brickinfo, voldir);
+ glusterd_brickinfo_delete (brickinfo);
+ volinfo->brick_count--;
+ return ret;
+}
+
+int32_t
+glusterd_delete_all_bricks (glusterd_volinfo_t* volinfo)
+{
+ int ret = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *tmp = NULL;
+
+ GF_ASSERT (volinfo);
+
+ cds_list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks,
+ brick_list) {
+ ret = glusterd_delete_brick (volinfo, brickinfo);
+ }
+ return ret;
+}
+
+int
+glusterd_get_local_brickpaths (glusterd_volinfo_t *volinfo, char **pathlist)
+{
+ char **path_tokens = NULL;
+ char *tmp_path_list = NULL;
+ char path[PATH_MAX] = "";
+ int32_t count = 0;
+ int32_t pathlen = 0;
+ int32_t total_len = 0;
+ int32_t ret = 0;
+ int i = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+
+ if ((!volinfo) || (!pathlist))
+ goto out;
+
+ path_tokens = GF_CALLOC (sizeof(char*), volinfo->brick_count,
+ gf_gld_mt_charptr);
+ if (!path_tokens) {
+ gf_msg_debug ("glusterd", 0, "Could not allocate memory.");
+ ret = -1;
+ goto out;
+ }
+
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ if (gf_uuid_compare (brickinfo->uuid, MY_UUID))
+ continue;
+
+ pathlen = snprintf (path, sizeof(path),
+ "--path=%s ", brickinfo->path);
+ if (pathlen < sizeof(path))
+ path[pathlen] = '\0';
+ else
+ path[sizeof(path)-1] = '\0';
+ path_tokens[count] = gf_strdup (path);
+ if (!path_tokens[count]) {
+ gf_msg_debug ("glusterd", 0,
+ "Could not allocate memory.");
+ ret = -1;
+ goto out;
+ }
+ count++;
+ total_len += pathlen;
+ }
+
+ tmp_path_list = GF_CALLOC (sizeof(char), total_len + 1,
+ gf_gld_mt_char);
+ if (!tmp_path_list) {
+ gf_msg_debug ("glusterd", 0, "Could not allocate memory.");
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; i < count; i++)
+ strcat (tmp_path_list, path_tokens[i]);
+
+ if (count)
+ *pathlist = tmp_path_list;
+
+ ret = count;
+out:
+ if (path_tokens) {
+ for (i = 0; i < count; i++) {
+ GF_FREE (path_tokens[i]);
+ }
+ }
+
+ GF_FREE (path_tokens);
+ path_tokens = NULL;
+
+ if (ret == 0) {
+ gf_msg_debug ("glusterd", 0, "No Local Bricks Present.");
+ GF_FREE (tmp_path_list);
+ tmp_path_list = NULL;
+ }
+
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_start_gsync (glusterd_volinfo_t *master_vol, char *slave,
+ char *path_list, char *conf_path,
+ char *glusterd_uuid_str,
+ char **op_errstr, gf_boolean_t is_pause)
+{
+ int32_t ret = 0;
+ int32_t status = 0;
+ char uuid_str [64] = {0};
+ runner_t runner = {0,};
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ int errcode = 0;
+ gf_boolean_t is_template_in_use = _gf_false;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ uuid_utoa_r (MY_UUID, uuid_str);
+
+ if (!path_list) {
+ ret = 0;
+ gf_msg_debug ("glusterd", 0, "No Bricks in this node."
+ " Not starting gsyncd.");
+ goto out;
+ }
+
+ ret = gsync_status (master_vol->volname, slave, conf_path,
+ &status, &is_template_in_use);
+ if (status == 0)
+ goto out;
+
+ if (is_template_in_use == _gf_true) {
+ gf_asprintf (op_errstr, GEOREP" start failed for %s %s : "
+ "pid-file entry missing in config file",
+ master_vol->volname, slave);
+ ret = -1;
+ goto out;
+ }
+
+ uuid_utoa_r (master_vol->volume_id, uuid_str);
+ runinit (&runner);
+ runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd",
+ path_list, "-c", NULL);
+ runner_argprintf (&runner, "%s", conf_path);
+ runner_argprintf (&runner, ":%s", master_vol->volname);
+ runner_add_args (&runner, slave, "--config-set", "session-owner",
+ uuid_str, NULL);
+ synclock_unlock (&priv->big_lock);
+ ret = runner_run (&runner);
+ synclock_lock (&priv->big_lock);
+ if (ret == -1) {
+ errcode = -1;
+ goto out;
+ }
+
+ runinit (&runner);
+ runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd",
+ path_list, "--monitor", "-c", NULL);
+ runner_argprintf (&runner, "%s", conf_path);
+ runner_argprintf (&runner, "--iprefix=%s", DATADIR);
+ runner_argprintf (&runner, ":%s", master_vol->volname);
+ runner_argprintf (&runner, "--glusterd-uuid=%s",
+ uuid_utoa (priv->uuid));
+ runner_add_arg (&runner, slave);
+ if (is_pause)
+ runner_add_arg (&runner, "--pause-on-start");
+ synclock_unlock (&priv->big_lock);
+ ret = runner_run (&runner);
+ synclock_lock (&priv->big_lock);
+ if (ret == -1) {
+ gf_asprintf (op_errstr, GEOREP" start failed for %s %s",
+ master_vol->volname, slave);
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if ((ret != 0) && errcode == -1) {
+ if (op_errstr)
+ *op_errstr = gf_strdup ("internal error, cannot start "
+ "the " GEOREP " session");
+ }
+
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_recreate_volfiles (glusterd_conf_t *conf)
+{
+
+ glusterd_volinfo_t *volinfo = NULL;
+ int ret = 0;
+ int op_ret = 0;
+
+ GF_ASSERT (conf);
+
+ cds_list_for_each_entry (volinfo, &conf->volumes, vol_list) {
+ ret = generate_brick_volfiles (volinfo);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_VOLFILE_CREATE_FAIL, "Failed to "
+ "regenerate brick volfiles for %s",
+ volinfo->volname);
+ op_ret = ret;
+ }
+ ret = generate_client_volfiles (volinfo, GF_CLIENT_TRUSTED);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_VOLFILE_CREATE_FAIL, "Failed to "
+ "regenerate trusted client volfiles for %s",
+ volinfo->volname);
+ op_ret = ret;
+ }
+ ret = generate_client_volfiles (volinfo, GF_CLIENT_OTHER);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_VOLFILE_CREATE_FAIL, "Failed to "
+ "regenerate client volfiles for %s",
+ volinfo->volname);
+ op_ret = ret;
+ }
+ }
+ return op_ret;
+}
+
+int32_t
+glusterd_handle_upgrade_downgrade (dict_t *options, glusterd_conf_t *conf,
+ gf_boolean_t upgrade, gf_boolean_t downgrade)
+{
+ int ret = 0;
+ char *type = NULL;
+ gf_boolean_t regenerate_volfiles = _gf_false;
+ gf_boolean_t terminate = _gf_false;
+
+ if (_gf_true == upgrade)
+ regenerate_volfiles = _gf_true;
+
+ if (upgrade && downgrade) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_WRONG_OPTS_SETTING, "Both upgrade and downgrade"
+ " options are set. Only one should be on");
+ ret = -1;
+ goto out;
+ }
+
+ if (!upgrade && !downgrade)
+ ret = 0;
+ else
+ terminate = _gf_true;
+ if (regenerate_volfiles) {
+ ret = glusterd_recreate_volfiles (conf);
+ }
+out:
+ if (terminate && (ret == 0))
+ kill (getpid(), SIGTERM);
+ return ret;
+}
+
+static inline int
+glusterd_is_replica_volume (int type)
+{
+ if (type == GF_CLUSTER_TYPE_REPLICATE ||
+ type == GF_CLUSTER_TYPE_STRIPE_REPLICATE)
+ return 1;
+ return 0;
+}
+gf_boolean_t
+glusterd_is_volume_replicate (glusterd_volinfo_t *volinfo)
+{
+ gf_boolean_t replicates = _gf_false;
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
+ replicates = glusterd_is_replica_volume
+ (volinfo->tier_info.cold_type) |
+ glusterd_is_replica_volume
+ (volinfo->tier_info.hot_type);
+ return replicates;
+ }
+ return glusterd_is_replica_volume ((volinfo->type));
+}
+
+gf_boolean_t
+glusterd_is_shd_compatible_type (int type)
+{
+ switch (type) {
+ case GF_CLUSTER_TYPE_REPLICATE:
+ case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
+ case GF_CLUSTER_TYPE_DISPERSE:
+ return _gf_true;
+
+ }
+ return _gf_false;
+}
+
+gf_boolean_t
+glusterd_is_shd_compatible_volume (glusterd_volinfo_t *volinfo)
+{
+
+ int ret = 0;
+
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
+ ret = glusterd_is_shd_compatible_type
+ (volinfo->tier_info.cold_type) |
+ glusterd_is_shd_compatible_type
+ (volinfo->tier_info.hot_type);
+ return ret;
+ }
+ return glusterd_is_shd_compatible_type (volinfo->type);
+}
+
+int
+glusterd_set_dump_options (char *dumpoptions_path, char *options,
+ int option_cnt)
+{
+ int ret = 0;
+ char *dup_options = NULL;
+ char *option = NULL;
+ char *tmpptr = NULL;
+ FILE *fp = NULL;
+ int nfs_cnt = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ if (0 == option_cnt ||
+ (option_cnt == 1 && (!strcmp (options, "nfs ")))) {
+ ret = 0;
+ goto out;
+ }
+
+ fp = fopen (dumpoptions_path, "w");
+ if (!fp) {
+ ret = -1;
+ goto out;
+ }
+ dup_options = gf_strdup (options);
+ gf_msg ("glusterd", GF_LOG_INFO, 0,
+ GD_MSG_STATEDUMP_OPTS_RCVD,
+ "Received following statedump options: %s",
+ dup_options);
+ option = strtok_r (dup_options, " ", &tmpptr);
+ while (option) {
+ if (!strcmp (option, priv->nfs_svc.name)) {
+ if (nfs_cnt > 0) {
+ sys_unlink (dumpoptions_path);
+ ret = 0;
+ goto out;
+ }
+ nfs_cnt++;
+ option = strtok_r (NULL, " ", &tmpptr);
+ continue;
+ }
+ fprintf (fp, "%s=yes\n", option);
+ option = strtok_r (NULL, " ", &tmpptr);
+ }
+
+out:
+ if (fp)
+ fclose (fp);
+ GF_FREE (dup_options);
+ return ret;
+}
+
+int
+glusterd_brick_statedump (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ char *options, int option_cnt, char **op_errstr)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ char pidfile_path[PATH_MAX] = {0,};
+ char dumpoptions_path[PATH_MAX] = {0,};
+ FILE *pidfile = NULL;
+ pid_t pid = -1;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ if (gf_uuid_is_null (brickinfo->uuid)) {
+ ret = glusterd_resolve_brick (brickinfo);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_RESOLVE_BRICK_FAIL,
+ "Cannot resolve brick %s:%s",
+ brickinfo->hostname, brickinfo->path);
+ goto out;
+ }
+ }
+
+ if (gf_uuid_compare (brickinfo->uuid, MY_UUID)) {
+ ret = 0;
+ goto out;
+ }
+
+ GLUSTERD_GET_BRICK_PIDFILE (pidfile_path, volinfo, brickinfo, conf);
+
+ pidfile = fopen (pidfile_path, "r");
+ if (!pidfile) {
+ gf_msg ("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED, "Unable to open pidfile: %s",
+ pidfile_path);
+ ret = -1;
+ goto out;
+ }
+
+ ret = fscanf (pidfile, "%d", &pid);
+ if (ret <= 0) {
+ gf_msg ("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED,
+ "Unable to get pid of brick process");
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (dumpoptions_path, sizeof (dumpoptions_path),
+ DEFAULT_VAR_RUN_DIRECTORY"/glusterdump.%d.options", pid);
+ ret = glusterd_set_dump_options (dumpoptions_path, options, option_cnt);
+ if (ret < 0) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_BRK_STATEDUMP_FAIL,
+ "error while parsing the statedump "
+ "options");
+ ret = -1;
+ goto out;
+ }
+
+ gf_msg ("glusterd", GF_LOG_INFO, 0,
+ GD_MSG_STATEDUMP_INFO,
+ "Performing statedump on brick with pid %d",
+ pid);
+
+ kill (pid, SIGUSR1);
+
+ sleep (1);
+ ret = 0;
+out:
+ sys_unlink (dumpoptions_path);
+ if (pidfile)
+ fclose (pidfile);
+ return ret;
+}
+
+int
+glusterd_nfs_statedump (char *options, int option_cnt, char **op_errstr)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ char pidfile_path[PATH_MAX] = {0,};
+ char path[PATH_MAX] = {0,};
+ FILE *pidfile = NULL;
+ pid_t pid = -1;
+ char dumpoptions_path[PATH_MAX] = {0,};
+ char *option = NULL;
+ char *tmpptr = NULL;
+ char *dup_options = NULL;
+ char msg[256] = {0,};
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ dup_options = gf_strdup (options);
+ option = strtok_r (dup_options, " ", &tmpptr);
+ if (strcmp (option, conf->nfs_svc.name)) {
+ snprintf (msg, sizeof (msg), "for nfs statedump, options should"
+ " be after the key nfs");
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+ goto out;
+ }
+
+ GLUSTERD_GET_NFS_DIR (path, conf);
+ GLUSTERD_GET_NFS_PIDFILE (pidfile_path, path);
+
+ pidfile = fopen (pidfile_path, "r");
+ if (!pidfile) {
+ gf_msg ("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED, "Unable to open pidfile: %s",
+ pidfile_path);
+ ret = -1;
+ goto out;
+ }
+
+ ret = fscanf (pidfile, "%d", &pid);
+ if (ret <= 0) {
+ gf_msg ("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED,
+ "Unable to get pid of brick process");
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (dumpoptions_path, sizeof (dumpoptions_path),
+ DEFAULT_VAR_RUN_DIRECTORY"/glusterdump.%d.options", pid);
+ ret = glusterd_set_dump_options (dumpoptions_path, options, option_cnt);
+ if (ret < 0) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_BRK_STATEDUMP_FAIL,
+ "error while parsing the statedump "
+ "options");
+ ret = -1;
+ goto out;
+ }
+
+ gf_msg ("glusterd", GF_LOG_INFO, 0,
+ GD_MSG_STATEDUMP_INFO,
+ "Performing statedump on nfs server with "
+ "pid %d", pid);
+
+ kill (pid, SIGUSR1);
+
+ sleep (1);
+
+ ret = 0;
+out:
+ if (pidfile)
+ fclose (pidfile);
+ sys_unlink (dumpoptions_path);
+ GF_FREE (dup_options);
+ return ret;
+}
+
+int
+glusterd_quotad_statedump (char *options, int option_cnt, char **op_errstr)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ char pidfile_path[PATH_MAX] = {0,};
+ char path[PATH_MAX] = {0,};
+ FILE *pidfile = NULL;
+ pid_t pid = -1;
+ char dumpoptions_path[PATH_MAX] = {0,};
+ char *option = NULL;
+ char *tmpptr = NULL;
+ char *dup_options = NULL;
+ char msg[256] = {0,};
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ dup_options = gf_strdup (options);
+ option = strtok_r (dup_options, " ", &tmpptr);
+ if (strcmp (option, conf->quotad_svc.name)) {
+ snprintf (msg, sizeof (msg), "for quotad statedump, options "
+ "should be after the key 'quotad'");
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+ goto out;
+ }
+
+ GLUSTERD_GET_QUOTAD_DIR (path, conf);
+ GLUSTERD_GET_QUOTAD_PIDFILE (pidfile_path, path);
+
+ pidfile = fopen (pidfile_path, "r");
+ if (!pidfile) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED, "Unable to open pidfile: %s",
+ pidfile_path);
+ ret = -1;
+ goto out;
+ }
+
+ ret = fscanf (pidfile, "%d", &pid);
+ if (ret <= 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED, "Unable to get pid of quotad "
+ "process");
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (dumpoptions_path, sizeof (dumpoptions_path),
+ DEFAULT_VAR_RUN_DIRECTORY"/glusterdump.%d.options", pid);
+ ret = glusterd_set_dump_options (dumpoptions_path, options, option_cnt);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRK_STATEDUMP_FAIL, "error while parsing "
+ "statedump options");
+ ret = -1;
+ goto out;
+ }
+
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_STATEDUMP_INFO,
+ "Performing statedump on quotad with "
+ "pid %d", pid);
+
+ kill (pid, SIGUSR1);
+
+ sleep (1);
+
+ ret = 0;
+out:
+ if (pidfile)
+ fclose (pidfile);
+ sys_unlink (dumpoptions_path);
+ GF_FREE (dup_options);
+ return ret;
+}
+
+/* Checks if the given peer contains bricks belonging to the given volume.
+ * Returns,
+ * 2 - if peer contains all the bricks
+ * 1 - if peer contains at least 1 brick
+ * 0 - if peer contains no bricks
+ */
+int
+glusterd_friend_contains_vol_bricks (glusterd_volinfo_t *volinfo,
+ uuid_t friend_uuid)
+{
+ int ret = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int count = 0;
+
+ GF_ASSERT (volinfo);
+
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ if (!gf_uuid_compare (brickinfo->uuid, friend_uuid)) {
+ count++;
+ }
+ }
+
+ if (count) {
+ if (count == volinfo->brick_count)
+ ret = 2;
+ else
+ ret = 1;
+ }
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* Cleanup the stale volumes left behind in the cluster. The volumes which are
+ * contained completely within the detached peer are stale with respect to the
+ * cluster.
+ */
+int
+glusterd_friend_remove_cleanup_vols (uuid_t uuid)
+{
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ glusterd_svc_t *svc = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_volinfo_t *tmp_volinfo = NULL;
+
+ priv = THIS->private;
+ GF_ASSERT (priv);
+
+ cds_list_for_each_entry_safe (volinfo, tmp_volinfo, &priv->volumes,
+ vol_list) {
+ if (!glusterd_friend_contains_vol_bricks (volinfo,
+ MY_UUID)) {
+ /*Stop snapd daemon service if snapd daemon is running*/
+ if (!volinfo->is_snap_volume) {
+ svc = &(volinfo->snapd.svc);
+ ret = svc->stop (svc, SIGTERM);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_SVC_STOP_FAIL, "Failed "
+ "to stop snapd daemon service");
+ }
+ }
+ }
+
+ if (glusterd_friend_contains_vol_bricks (volinfo, uuid) == 2) {
+ gf_msg (THIS->name, GF_LOG_INFO, 0,
+ GD_MSG_STALE_VOL_DELETE_INFO,
+ "Deleting stale volume %s", volinfo->volname);
+ ret = glusterd_delete_volume (volinfo);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_STALE_VOL_REMOVE_FAIL,
+ "Error deleting stale volume");
+ goto out;
+ }
+ }
+
+ }
+
+ /* Reconfigure all daemon services upon peer detach */
+ ret = glusterd_svcs_reconfigure ();
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_SVC_STOP_FAIL,
+ "Failed to reconfigure all daemon services.");
+ }
+ ret = 0;
+out:
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_get_bitd_filepath (char *filepath, glusterd_volinfo_t *volinfo)
+{
+ int ret = 0;
+ char path[PATH_MAX] = {0,};
+ glusterd_conf_t *priv = NULL;
+
+ priv = THIS->private;
+
+ GLUSTERD_GET_VOLUME_DIR (path, volinfo, priv);
+
+ snprintf (filepath, PATH_MAX,
+ "%s/%s-bitd.vol", path, volinfo->volname);
+
+ return ret;
+}
+
+int
+glusterd_get_client_filepath (char *filepath, glusterd_volinfo_t *volinfo,
+ gf_transport_type type)
+{
+ int ret = 0;
+ char path[PATH_MAX] = {0,};
+ glusterd_conf_t *priv = NULL;
+
+ priv = THIS->private;
+
+ GLUSTERD_GET_VOLUME_DIR (path, volinfo, priv);
+
+ switch (type) {
+ case GF_TRANSPORT_TCP:
+ snprintf (filepath, PATH_MAX,
+ "%s/%s.tcp-fuse.vol", path, volinfo->volname);
+ break;
+
+ case GF_TRANSPORT_RDMA:
+ snprintf (filepath, PATH_MAX,
+ "%s/%s.rdma-fuse.vol", path, volinfo->volname);
+ break;
+ default:
+ ret = -1;
+ break;
+ }
+
+ return ret;
+}
+
+int
+glusterd_get_trusted_client_filepath (char *filepath,
+ glusterd_volinfo_t *volinfo,
+ gf_transport_type type)
+{
+ int ret = 0;
+ char path[PATH_MAX] = {0,};
+ glusterd_conf_t *priv = NULL;
+
+ priv = THIS->private;
+
+ GLUSTERD_GET_VOLUME_DIR (path, volinfo, priv);
+
+ switch (type) {
+ case GF_TRANSPORT_TCP:
+ snprintf (filepath, PATH_MAX, "%s/trusted-%s.tcp-fuse.vol",
+ path, volinfo->volname);
+ break;
+
+ case GF_TRANSPORT_RDMA:
+ snprintf (filepath, PATH_MAX, "%s/trusted-%s.rdma-fuse.vol",
+ path, volinfo->volname);
+ break;
+ default:
+ ret = -1;
+ break;
+ }
+
+ return ret;
+}
+
+void glusterd_update_tier_status (glusterd_volinfo_t *volinfo) {
+
+ glusterd_rebalance_t *rebal = NULL;
+
+ rebal = &volinfo->rebal;
+
+ if (volinfo->type != GF_CLUSTER_TYPE_TIER)
+ return;
+
+ /*
+ * If tier process status is stopped or failed, then
+ * manually changing the status.
+ */
+
+ switch (rebal->defrag_status) {
+ case GF_DEFRAG_STATUS_FAILED:
+ case GF_DEFRAG_STATUS_STOPPED:
+ rebal->defrag_status = GF_DEFRAG_STATUS_STARTED;
+ break;
+ default:
+ break;
+ }
+ return;
+}
+
+int
+glusterd_get_dummy_client_filepath (char *filepath,
+ glusterd_volinfo_t *volinfo,
+ gf_transport_type type)
+{
+ int ret = 0;
+ char path[PATH_MAX] = {0,};
+
+ switch (type) {
+ case GF_TRANSPORT_TCP:
+ case GF_TRANSPORT_BOTH_TCP_RDMA:
+ snprintf (filepath, PATH_MAX,
+ "/tmp/%s.tcp-fuse.vol", volinfo->volname);
+ break;
+
+ case GF_TRANSPORT_RDMA:
+ snprintf (filepath, PATH_MAX,
+ "/tmp/%s.rdma-fuse.vol", volinfo->volname);
+ break;
+ default:
+ ret = -1;
+ break;
+ }
+
+ return ret;
+}
+
+int
+glusterd_volume_defrag_restart (glusterd_volinfo_t *volinfo, char *op_errstr,
+ size_t len, int cmd, defrag_cbk_fn_t cbk)
+{
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ char pidfile[PATH_MAX] = {0,};
+ int ret = -1;
+ pid_t pid = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ if (!priv)
+ return ret;
+
+ /* Don't start the rebalance process if the stautus is already
+ * completed, stopped or failed. If the status is started, check if
+ * there is an existing process already and connect to it. If not, then
+ * start the rebalance process
+ */
+
+ /*
+ * Changing the status of tier process to start the daemon
+ * forcefully.
+ */
+ glusterd_update_tier_status (volinfo);
+
+ switch (volinfo->rebal.defrag_status) {
+ case GF_DEFRAG_STATUS_COMPLETE:
+ case GF_DEFRAG_STATUS_STOPPED:
+ case GF_DEFRAG_STATUS_FAILED:
+ break;
+ case GF_DEFRAG_STATUS_STARTED:
+ GLUSTERD_GET_DEFRAG_PID_FILE(pidfile, volinfo, priv);
+ if (gf_is_service_running (pidfile, &pid)) {
+ ret = glusterd_rebalance_defrag_init (volinfo, cbk);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REBALANCE_START_FAIL,
+ "Failed to initialize defrag."
+ "Not starting rebalance process for "
+ "%s.", volinfo->volname);
+ goto out;
+ }
+ ret = glusterd_rebalance_rpc_create (volinfo, _gf_true);
+ break;
+ }
+ case GF_DEFRAG_STATUS_NOT_STARTED:
+ ret = glusterd_handle_defrag_start (volinfo, op_errstr, len,
+ cmd, cbk, volinfo->rebal.op);
+ if (ret)
+ volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_FAILED;
+ break;
+ default:
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REBALANCE_START_FAIL,
+ "Unknown defrag status (%d)."
+ "Not starting rebalance process for %s.",
+ volinfo->rebal.defrag_status, volinfo->volname);
+ break;
+ }
+out:
+ return ret;
+
+}
+
+void
+glusterd_defrag_info_set (glusterd_volinfo_t *volinfo, dict_t *dict, int cmd,
+ int status, int op)
+{
+
+ xlator_t *this = NULL;
+ dict_t *op_ctx = NULL;
+ int ret = -1;
+ char *task_id_str = NULL;
+ glusterd_rebalance_t *rebal = NULL;
+
+ this = THIS;
+ rebal = &volinfo->rebal;
+
+ rebal->defrag_cmd = cmd;
+ rebal->defrag_status = status;
+ rebal->op = op;
+
+ if (gf_uuid_is_null (rebal->rebalance_id))
+ return;
+
+ if (is_origin_glusterd (dict)) {
+
+ ret = glusterd_generate_and_set_task_id(dict,
+ GF_REBALANCE_TID_KEY);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TASKID_GEN_FAIL,
+ "Failed to generate task-id");
+ goto out;
+ }
+ }
+ ret = dict_get_str (dict, GF_REBALANCE_TID_KEY,
+ &task_id_str);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_REBALANCE_ID_MISSING, "Missing rebalance-id");
+ ret = 0;
+ goto out;
+ }
+
+ gf_uuid_parse (task_id_str, rebal->rebalance_id);
+out:
+
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "Rebalance start validate failed");
+ }
+ return;
+
+}
+
+
+int
+glusterd_restart_rebalance_for_volume (glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ char op_errstr[PATH_MAX];
+
+ if (!gd_should_i_start_rebalance (volinfo)) {
+
+ /* Store the rebalance-id and rebalance command even if
+ * the peer isn't starting a rebalance process. On peers
+ * where a rebalance process is started,
+ * glusterd_handle_defrag_start performs the storing.
+ *
+ * Storing this is needed for having 'volume status'
+ * work correctly.
+ */
+ volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_NOT_STARTED;
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER)
+ glusterd_store_perform_node_state_store (volinfo);
+
+ return 0;
+ }
+ if (!volinfo->rebal.defrag_cmd) {
+ volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_FAILED;
+ return -1;
+ }
+
+ ret = glusterd_volume_defrag_restart (volinfo, op_errstr, PATH_MAX,
+ volinfo->rebal.defrag_cmd,
+ volinfo->rebal.op == GD_OP_REMOVE_BRICK ?
+ glusterd_remove_brick_migrate_cbk : NULL);
+ if (!ret) {
+ /* If remove brick is started then ensure that on a glusterd
+ * restart decommission_is_in_progress is set to avoid remove
+ * brick commit to happen when rebalance is not completed.
+ */
+ if (volinfo->rebal.op == GD_OP_REMOVE_BRICK &&
+ volinfo->rebal.defrag_status == GF_DEFRAG_STATUS_STARTED) {
+ volinfo->decommission_in_progress = 1;
+ }
+ }
+ return ret;
+}
+int
+glusterd_restart_rebalance (glusterd_conf_t *conf)
+{
+ glusterd_volinfo_t *volinfo = NULL;
+ int ret = 0;
+
+ cds_list_for_each_entry (volinfo, &conf->volumes, vol_list) {
+ glusterd_restart_rebalance_for_volume (volinfo);
+ }
+ return ret;
+}
+
+void
+glusterd_volinfo_reset_defrag_stats (glusterd_volinfo_t *volinfo)
+{
+ glusterd_rebalance_t *rebal = NULL;
+ GF_ASSERT (volinfo);
+
+ rebal = &volinfo->rebal;
+ rebal->rebalance_files = 0;
+ rebal->rebalance_data = 0;
+ rebal->lookedup_files = 0;
+ rebal->rebalance_failures = 0;
+ rebal->rebalance_time = 0;
+ rebal->skipped_files = 0;
+
+}
+
+gf_boolean_t
+glusterd_is_local_brick (xlator_t *this, glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo)
+{
+ gf_boolean_t local = _gf_false;
+ int ret = 0;
+ glusterd_conf_t *conf = NULL;
+
+ if (gf_uuid_is_null (brickinfo->uuid)) {
+ ret = glusterd_resolve_brick (brickinfo);
+ if (ret)
+ goto out;
+ }
+ conf = this->private;
+ local = !gf_uuid_compare (brickinfo->uuid, MY_UUID);
+out:
+ return local;
+}
+int
+glusterd_validate_volume_id (dict_t *op_dict, glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ char *volid_str = NULL;
+ uuid_t vol_uid = {0, };
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = dict_get_str (op_dict, "vol-id", &volid_str);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get volume id for "
+ "volume %s", volinfo->volname);
+ goto out;
+ }
+ ret = gf_uuid_parse (volid_str, vol_uid);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_UUID_PARSE_FAIL,
+ "Failed to parse volume id "
+ "for volume %s", volinfo->volname);
+ goto out;
+ }
+
+ if (gf_uuid_compare (vol_uid, volinfo->volume_id)) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_ID_MISMATCH, "Volume ids of volume %s - %s"
+ " and %s - are different. Possibly a split brain among "
+ "peers.", volinfo->volname, volid_str,
+ uuid_utoa (volinfo->volume_id));
+ ret = -1;
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+int
+glusterd_defrag_volume_status_update (glusterd_volinfo_t *volinfo,
+ dict_t *rsp_dict)
+{
+ int ret = 0;
+ uint64_t files = 0;
+ uint64_t size = 0;
+ uint64_t lookup = 0;
+ gf_defrag_status_t status = GF_DEFRAG_STATUS_NOT_STARTED;
+ uint64_t failures = 0;
+ uint64_t skipped = 0;
+ xlator_t *this = NULL;
+ double run_time = 0;
+ uint64_t promoted = 0;
+ uint64_t demoted = 0;
+
+ this = THIS;
+
+ ret = dict_get_uint64 (rsp_dict, "files", &files);
+ if (ret)
+ gf_msg_trace (this->name, 0,
+ "failed to get file count");
+
+ ret = dict_get_uint64 (rsp_dict, "size", &size);
+ if (ret)
+ gf_msg_trace (this->name, 0,
+ "failed to get size of xfer");
+
+ ret = dict_get_uint64 (rsp_dict, "lookups", &lookup);
+ if (ret)
+ gf_msg_trace (this->name, 0,
+ "failed to get lookedup file count");
+
+ ret = dict_get_int32 (rsp_dict, "status", (int32_t *)&status);
+ if (ret)
+ gf_msg_trace (this->name, 0,
+ "failed to get status");
+
+ ret = dict_get_uint64 (rsp_dict, "failures", &failures);
+ if (ret)
+ gf_msg_trace (this->name, 0,
+ "failed to get failure count");
+
+ ret = dict_get_uint64 (rsp_dict, "skipped", &skipped);
+ if (ret)
+ gf_msg_trace (this->name, 0,
+ "failed to get skipped count");
+
+ ret = dict_get_uint64 (rsp_dict, "promoted", &promoted);
+ if (ret)
+ gf_msg_trace (this->name, 0,
+ "failed to get promoted count");
+
+ ret = dict_get_uint64 (rsp_dict, "demoted", &demoted);
+ if (ret)
+ gf_msg_trace (this->name, 0,
+ "failed to get demoted count");
+
+ ret = dict_get_double (rsp_dict, "run-time", &run_time);
+ if (ret)
+ gf_msg_trace (this->name, 0,
+ "failed to get run-time");
+
+ if (files)
+ volinfo->rebal.rebalance_files = files;
+ if (size)
+ volinfo->rebal.rebalance_data = size;
+ if (lookup)
+ volinfo->rebal.lookedup_files = lookup;
+ if (status)
+ volinfo->rebal.defrag_status = status;
+ if (failures)
+ volinfo->rebal.rebalance_failures = failures;
+ if (skipped)
+ volinfo->rebal.skipped_files = skipped;
+ if (run_time)
+ volinfo->rebal.rebalance_time = run_time;
+ if (promoted)
+ volinfo->tier_info.promoted = promoted;
+ if (demoted)
+ volinfo->tier_info.demoted = demoted;
+
+ return ret;
+}
+
+int
+glusterd_check_topology_identical (const char *filename1,
+ const char *filename2,
+ gf_boolean_t *identical)
+{
+ int ret = -1; /* FAILURE */
+ xlator_t *this = THIS;
+ FILE *fp1 = NULL;
+ FILE *fp2 = NULL;
+ glusterfs_graph_t *grph1 = NULL;
+ glusterfs_graph_t *grph2 = NULL;
+
+ /* Invalid xlator, Nothing to do */
+ if (!this)
+ return (-1);
+
+ /* Sanitize the inputs */
+ GF_VALIDATE_OR_GOTO (this->name, filename1, out);
+ GF_VALIDATE_OR_GOTO (this->name, filename2, out);
+ GF_VALIDATE_OR_GOTO (this->name, identical, out);
+
+ /* fopen() the volfile1 to create the graph */
+ fp1 = fopen (filename1, "r");
+ if (fp1 == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED,
+ "fopen() on file: %s failed "
+ "(%s)", filename1, strerror (errno));
+ goto out;
+ }
+
+ /* fopen() the volfile2 to create the graph */
+ fp2 = fopen (filename2, "r");
+ if (fp2 == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED,
+ "fopen() on file: %s failed "
+ "(%s)", filename2, strerror (errno));
+ goto out;
+ }
+
+ /* create the graph for filename1 */
+ grph1 = glusterfs_graph_construct(fp1);
+ if (grph1 == NULL)
+ goto out;
+
+ /* create the graph for filename2 */
+ grph2 = glusterfs_graph_construct(fp2);
+ if (grph2 == NULL)
+ goto out;
+
+ /* compare the graph topology */
+ *identical = is_graph_topology_equal(grph1, grph2);
+ ret = 0; /* SUCCESS */
+out:
+ if (fp1)
+ fclose(fp1);
+ if (fp2)
+ fclose(fp2);
+ if (grph1)
+ glusterfs_graph_destroy(grph1);
+ if (grph2)
+ glusterfs_graph_destroy(grph2);
+
+ gf_msg_debug (this->name, 0, "Returning with %d", ret);
+ return ret;
+}
+
+int
+glusterd_check_files_identical (char *filename1, char *filename2,
+ gf_boolean_t *identical)
+{
+ int ret = -1;
+ struct stat buf1 = {0,};
+ struct stat buf2 = {0,};
+ uint32_t cksum1 = 0;
+ uint32_t cksum2 = 0;
+ xlator_t *this = NULL;
+
+ GF_ASSERT (filename1);
+ GF_ASSERT (filename2);
+ GF_ASSERT (identical);
+
+ this = THIS;
+
+ ret = sys_stat (filename1, &buf1);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED,
+ "stat on file: %s failed "
+ "(%s)", filename1, strerror (errno));
+ goto out;
+ }
+
+ ret = sys_stat (filename2, &buf2);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED,
+ "stat on file: %s failed "
+ "(%s)", filename2, strerror (errno));
+ goto out;
+ }
+
+ if (buf1.st_size != buf2.st_size) {
+ *identical = _gf_false;
+ goto out;
+ }
+
+ ret = get_checksum_for_path (filename1, &cksum1);
+ if (ret)
+ goto out;
+
+
+ ret = get_checksum_for_path (filename2, &cksum2);
+ if (ret)
+ goto out;
+
+ if (cksum1 != cksum2)
+ *identical = _gf_false;
+ else
+ *identical = _gf_true;
+
+out:
+ gf_msg_debug (this->name, 0, "Returning with %d", ret);
+ return ret;
+}
+
+int
+glusterd_volset_help (dict_t *dict, char **op_errstr)
+{
+ int ret = -1;
+ gf_boolean_t xml_out = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ if (!dict) {
+ if (!(dict = glusterd_op_get_ctx ())) {
+ ret = 0;
+ goto out;
+ }
+ }
+
+ if (dict_get (dict, "help" )) {
+ xml_out = _gf_false;
+
+ } else if (dict_get (dict, "help-xml" )) {
+ xml_out = _gf_true;
+#if (HAVE_LIB_XML)
+ ret = 0;
+#else
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MODULE_NOT_INSTALLED,
+ "libxml not present in the system");
+ if (op_errstr)
+ *op_errstr = gf_strdup ("Error: xml libraries not "
+ "present to produce "
+ "xml-output");
+ goto out;
+#endif
+
+ } else {
+ goto out;
+ }
+
+ ret = glusterd_get_volopt_content (dict, xml_out);
+ if (ret && op_errstr)
+ *op_errstr = gf_strdup ("Failed to get volume options help");
+ out:
+
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_to_cli (rpcsvc_request_t *req, gf_cli_rsp *arg, struct iovec *payload,
+ int payloadcount, struct iobref *iobref, xdrproc_t xdrproc,
+ dict_t *dict)
+{
+ int ret = -1;
+ char *cmd = NULL;
+ int op_ret = 0;
+ char *op_errstr = NULL;
+ int op_errno = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ op_ret = arg->op_ret;
+ op_errstr = arg->op_errstr;
+ op_errno = arg->op_errno;
+
+ ret = dict_get_str (dict, "cmd-str", &cmd);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get command "
+ "string");
+
+ if (cmd) {
+ if (op_ret)
+ gf_cmd_log ("", "%s : FAILED %s %s", cmd,
+ (op_errstr)? ":" : " ",
+ (op_errstr)? op_errstr : " ");
+ else
+ gf_cmd_log ("", "%s : SUCCESS", cmd);
+ }
+
+ glusterd_submit_reply (req, arg, payload, payloadcount, iobref,
+ (xdrproc_t) xdrproc);
+ if (dict)
+ dict_unref (dict);
+
+ return ret;
+}
+
+static int32_t
+glusterd_append_gsync_status (dict_t *dst, dict_t *src)
+{
+ int ret = 0;
+ char *stop_msg = NULL;
+
+ ret = dict_get_str (src, "gsync-status", &stop_msg);
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc (dst, "gsync-status", stop_msg);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_WARNING, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to set the stop"
+ "message in the ctx dictionary");
+ goto out;
+ }
+
+ ret = 0;
+ out:
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+
+}
+
+int32_t
+glusterd_append_status_dicts (dict_t *dst, dict_t *src)
+{
+ char sts_val_name[PATH_MAX] = {0, };
+ int dst_count = 0;
+ int src_count = 0;
+ int i = 0;
+ int ret = 0;
+ gf_gsync_status_t *sts_val = NULL;
+ gf_gsync_status_t *dst_sts_val = NULL;
+
+ GF_ASSERT (dst);
+
+ if (src == NULL)
+ goto out;
+
+ ret = dict_get_int32 (dst, "gsync-count", &dst_count);
+ if (ret)
+ dst_count = 0;
+
+ ret = dict_get_int32 (src, "gsync-count", &src_count);
+ if (ret || !src_count) {
+ gf_msg_debug ("glusterd", 0, "Source brick empty");
+ ret = 0;
+ goto out;
+ }
+
+ for (i = 0; i < src_count; i++) {
+ memset (sts_val_name, '\0', sizeof(sts_val_name));
+ snprintf (sts_val_name, sizeof(sts_val_name), "status_value%d", i);
+
+ ret = dict_get_bin (src, sts_val_name, (void **) &sts_val);
+ if (ret)
+ goto out;
+
+ dst_sts_val = GF_CALLOC (1, sizeof(gf_gsync_status_t),
+ gf_common_mt_gsync_status_t);
+ if (!dst_sts_val) {
+ gf_msg ("glusterd", GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY, "Out Of Memory");
+ goto out;
+ }
+
+ memcpy (dst_sts_val, sts_val, sizeof(gf_gsync_status_t));
+
+ memset (sts_val_name, '\0', sizeof(sts_val_name));
+ snprintf (sts_val_name, sizeof(sts_val_name), "status_value%d", i + dst_count);
+
+ ret = dict_set_bin (dst, sts_val_name, dst_sts_val, sizeof(gf_gsync_status_t));
+ if (ret) {
+ GF_FREE (dst_sts_val);
+ goto out;
+ }
+ }
+
+ ret = dict_set_int32 (dst, "gsync-count", dst_count+src_count);
+
+ out:
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+
+}
+
+int32_t
+glusterd_aggr_brick_mount_dirs (dict_t *aggr, dict_t *rsp_dict)
+{
+ char key[PATH_MAX] = "";
+ char *brick_mount_dir = NULL;
+ int32_t brick_count = -1;
+ int32_t ret = -1;
+ int32_t i = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (aggr);
+ GF_ASSERT (rsp_dict);
+
+ ret = dict_get_int32 (rsp_dict, "brick_count", &brick_count);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "No brick_count present");
+ ret = 0;
+ goto out;
+ }
+
+ for (i = 1; i <= brick_count; i++) {
+ brick_mount_dir = NULL;
+ snprintf (key, sizeof(key), "brick%d.mount_dir", i);
+ ret = dict_get_str (rsp_dict, key, &brick_mount_dir);
+ if (ret) {
+ /* Coz the info will come from a different node */
+ gf_msg_debug (this->name, 0,
+ "%s not present", key);
+ continue;
+ }
+
+ ret = dict_set_dynstr_with_alloc (aggr, key,
+ brick_mount_dir);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ gf_msg_trace (this->name, 0, "Returning %d ", ret);
+ return ret;
+}
+
+int32_t
+glusterd_gsync_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict, char *op_errstr)
+{
+ dict_t *ctx = NULL;
+ int ret = 0;
+ char *conf_path = NULL;
+
+ if (aggr) {
+ ctx = aggr;
+
+ } else {
+ ctx = glusterd_op_get_ctx ();
+ if (!ctx) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_OPCTX_GET_FAIL,
+ "Operation Context is not present");
+ GF_ASSERT (0);
+ }
+ }
+
+ if (rsp_dict) {
+ ret = glusterd_append_status_dicts (ctx, rsp_dict);
+ if (ret)
+ goto out;
+
+ ret = glusterd_append_gsync_status (ctx, rsp_dict);
+ if (ret)
+ goto out;
+
+ ret = dict_get_str (rsp_dict, "conf_path", &conf_path);
+ if (!ret && conf_path) {
+ ret = dict_set_dynstr_with_alloc (ctx, "conf_path",
+ conf_path);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to store conf path.");
+ goto out;
+ }
+ }
+ }
+ if ((op_errstr) && (strcmp ("", op_errstr))) {
+ ret = dict_set_dynstr_with_alloc (ctx, "errstr",
+ op_errstr);
+ if (ret)
+ goto out;
+ }
+
+ ret = 0;
+ out:
+ gf_msg_debug ("glusterd", 0, "Returning %d ", ret);
+ return ret;
+}
+
+int32_t
+glusterd_rb_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict)
+{
+ int32_t src_port = 0;
+ int32_t dst_port = 0;
+ int ret = 0;
+ dict_t *ctx = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ if (aggr) {
+ ctx = aggr;
+
+ } else {
+ ctx = glusterd_op_get_ctx ();
+ if (!ctx) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_OPCTX_GET_FAIL,
+ "Operation Context is not present");
+ GF_ASSERT (0);
+ }
+ }
+
+ if (rsp_dict) {
+ ret = dict_get_int32 (rsp_dict, "src-brick-port", &src_port);
+ if (ret == 0) {
+ gf_msg_debug ("glusterd", 0,
+ "src-brick-port=%d found", src_port);
+ }
+
+ ret = dict_get_int32 (rsp_dict, "dst-brick-port", &dst_port);
+ if (ret == 0) {
+ gf_msg_debug ("glusterd", 0,
+ "dst-brick-port=%d found", dst_port);
+ }
+
+ ret = glusterd_aggr_brick_mount_dirs (ctx, rsp_dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_MOUNDIRS_AGGR_FAIL,
+ "Failed to "
+ "aggregate brick mount dirs");
+ goto out;
+ }
+ }
+
+ if (src_port) {
+ ret = dict_set_int32 (ctx, "src-brick-port",
+ src_port);
+ if (ret) {
+ gf_msg_debug ("glusterd", 0,
+ "Could not set src-brick");
+ goto out;
+ }
+ }
+
+ if (dst_port) {
+ ret = dict_set_int32 (ctx, "dst-brick-port",
+ dst_port);
+ if (ret) {
+ gf_msg_debug ("glusterd", 0,
+ "Could not set dst-brick");
+ goto out;
+ }
+
+ }
+
+out:
+ return ret;
+
+}
+
+int32_t
+glusterd_sync_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict)
+{
+ int ret = 0;
+
+ GF_ASSERT (rsp_dict);
+
+ if (!rsp_dict) {
+ goto out;
+ }
+
+ ret = glusterd_import_friend_volumes (rsp_dict);
+out:
+ return ret;
+
+}
+
+static int
+_profile_volume_add_friend_rsp (dict_t *this, char *key, data_t *value,
+ void *data)
+{
+ char new_key[256] = {0};
+ glusterd_pr_brick_rsp_conv_t *rsp_ctx = NULL;
+ data_t *new_value = NULL;
+ int brick_count = 0;
+ char brick_key[256];
+
+ if (strcmp (key, "count") == 0)
+ return 0;
+ sscanf (key, "%d%s", &brick_count, brick_key);
+ rsp_ctx = data;
+ new_value = data_copy (value);
+ GF_ASSERT (new_value);
+ snprintf (new_key, sizeof (new_key), "%d%s",
+ rsp_ctx->count + brick_count, brick_key);
+ dict_set (rsp_ctx->dict, new_key, new_value);
+ return 0;
+}
+
+int
+glusterd_profile_volume_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict)
+{
+ int ret = 0;
+ glusterd_pr_brick_rsp_conv_t rsp_ctx = {0};
+ int32_t brick_count = 0;
+ int32_t count = 0;
+ dict_t *ctx_dict = NULL;
+ xlator_t *this = NULL;
+
+ GF_ASSERT (rsp_dict);
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = dict_get_int32 (rsp_dict, "count", &brick_count);
+ if (ret) {
+ ret = 0; //no bricks in the rsp
+ goto out;
+ }
+ if (aggr) {
+ ctx_dict = aggr;
+
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_OPCTX_GET_FAIL,
+ "Operation Context is not present");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_int32 (ctx_dict, "count", &count);
+ rsp_ctx.count = count;
+ rsp_ctx.dict = ctx_dict;
+ dict_foreach (rsp_dict, _profile_volume_add_friend_rsp, &rsp_ctx);
+ dict_del (ctx_dict, "count");
+ ret = dict_set_int32 (ctx_dict, "count", count + brick_count);
+out:
+ return ret;
+}
+
+static int
+glusterd_volume_status_add_peer_rsp (dict_t *this, char *key, data_t *value,
+ void *data)
+{
+ glusterd_status_rsp_conv_t *rsp_ctx = NULL;
+ data_t *new_value = NULL;
+ char brick_key[1024] = {0,};
+ char new_key[1024] = {0,};
+ int32_t index = 0;
+ int32_t ret = 0;
+
+ /* Skip the following keys, they are already present in the ctx_dict */
+ /* Also, skip all the task related pairs. They will be added to the
+ * ctx_dict later
+ */
+ if (!strcmp (key, "count") || !strcmp (key, "cmd") ||
+ !strcmp (key, "brick-index-max") || !strcmp (key, "other-count") ||
+ !strncmp (key, "task", 4))
+ return 0;
+
+ rsp_ctx = data;
+ new_value = data_copy (value);
+ GF_ASSERT (new_value);
+
+ sscanf (key, "brick%d.%s", &index, brick_key);
+
+ if (index > rsp_ctx->brick_index_max) {
+ snprintf (new_key, sizeof (new_key), "brick%d.%s",
+ index + rsp_ctx->other_count, brick_key);
+ } else {
+ strncpy (new_key, key, sizeof (new_key));
+ new_key[sizeof (new_key) - 1] = 0;
+ }
+
+ ret = dict_set (rsp_ctx->dict, new_key, new_value);
+ if (ret)
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to set key: %s in dict",
+ key);
+
+ return 0;
+}
+
+static int
+glusterd_volume_status_copy_tasks_to_ctx_dict (dict_t *this, char *key,
+ data_t *value, void *data)
+{
+ int ret = 0;
+ dict_t *ctx_dict = NULL;
+ data_t *new_value = NULL;
+
+ if (strncmp (key, "task", 4))
+ return 0;
+
+ ctx_dict = data;
+ GF_ASSERT (ctx_dict);
+
+ new_value = data_copy (value);
+ GF_ASSERT (new_value);
+
+ ret = dict_set (ctx_dict, key, new_value);
+
+ return ret;
+}
+
+int
+glusterd_volume_status_aggregate_tasks_status (dict_t *ctx_dict,
+ dict_t *rsp_dict)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ int local_count = 0;
+ int remote_count = 0;
+ int i = 0;
+ int j = 0;
+ char key[128] = {0,};
+ char *task_type = NULL;
+ int local_status = 0;
+ int remote_status = 0;
+ char *local_task_id = NULL;
+ char *remote_task_id = NULL;
+
+ GF_ASSERT (ctx_dict);
+ GF_ASSERT (rsp_dict);
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = dict_get_int32 (rsp_dict, "tasks", &remote_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get remote task count");
+ goto out;
+ }
+ /* Local count will not be present when this is called for the first
+ * time with the origins rsp_dict
+ */
+ ret = dict_get_int32 (ctx_dict, "tasks", &local_count);
+ if (ret) {
+ ret = dict_foreach (rsp_dict,
+ glusterd_volume_status_copy_tasks_to_ctx_dict,
+ ctx_dict);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to copy tasks"
+ "to ctx_dict.");
+ goto out;
+ }
+
+ if (local_count != remote_count) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TASKS_COUNT_MISMATCH,
+ "Local tasks count (%d) and "
+ "remote tasks count (%d) do not match. Not aggregating "
+ "tasks status.", local_count, remote_count);
+ ret = -1;
+ goto out;
+ }
+
+ /* Update the tasks statuses. For every remote tasks, search for the
+ * local task, and update the local task status based on the remote
+ * status.
+ */
+ for (i = 0; i < remote_count; i++) {
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "task%d.type", i);
+ ret = dict_get_str (rsp_dict, key, &task_type);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get task typpe from rsp dict");
+ goto out;
+ }
+
+ /* Skip replace-brick status as it is going to be the same on
+ * all peers. rb_status is set by the replace brick commit
+ * function on all peers based on the replace brick command.
+ * We return the value of rb_status as the status for a
+ * replace-brick task in a 'volume status' command.
+ */
+ if (!strcmp (task_type, "Replace brick"))
+ continue;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "task%d.status", i);
+ ret = dict_get_int32 (rsp_dict, key, &remote_status);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get task status from rsp dict");
+ goto out;
+ }
+ snprintf (key, sizeof (key), "task%d.id", i);
+ ret = dict_get_str (rsp_dict, key, &remote_task_id);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get task id from rsp dict");
+ goto out;
+ }
+ for (j = 0; j < local_count; j++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "task%d.id", j);
+ ret = dict_get_str (ctx_dict, key, &local_task_id);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get local task-id");
+ goto out;
+ }
+
+ if (strncmp (remote_task_id, local_task_id,
+ strlen (remote_task_id))) {
+ /* Quit if a matching local task is not found */
+ if (j == (local_count - 1)) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TASKS_COUNT_MISMATCH,
+ "Could not find matching local "
+ "task for task %s",
+ remote_task_id);
+ goto out;
+ }
+ continue;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "task%d.status", j);
+ ret = dict_get_int32 (ctx_dict, key, &local_status);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get local task status");
+ goto out;
+ }
+
+ /* Rebalance has 5 states,
+ * NOT_STARTED, STARTED, STOPPED, COMPLETE, FAILED
+ * The precedence used to determine the aggregate status
+ * is as below,
+ * STARTED > FAILED > STOPPED > COMPLETE > NOT_STARTED
+ */
+ /* TODO: Move this to a common place utilities that both
+ * CLI and glusterd need.
+ * Till then if the below algorithm is changed, change
+ * it in cli_xml_output_vol_rebalance_status in
+ * cli-xml-output.c
+ */
+ ret = 0;
+ int rank[] = {
+ [GF_DEFRAG_STATUS_STARTED] = 1,
+ [GF_DEFRAG_STATUS_FAILED] = 2,
+ [GF_DEFRAG_STATUS_STOPPED] = 3,
+ [GF_DEFRAG_STATUS_COMPLETE] = 4,
+ [GF_DEFRAG_STATUS_NOT_STARTED] = 5
+ };
+ if (rank[remote_status] <= rank[local_status])
+ ret = dict_set_int32 (ctx_dict, key,
+ remote_status);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TASK_STATUS_UPDATE_FAIL,
+ "Failed to "
+ "update task status");
+ goto out;
+ }
+ break;
+ }
+ }
+
+out:
+ return ret;
+}
+
+gf_boolean_t
+glusterd_status_has_tasks (int cmd) {
+ if (((cmd & GF_CLI_STATUS_MASK) == GF_CLI_STATUS_NONE) &&
+ (cmd & GF_CLI_STATUS_VOL))
+ return _gf_true;
+ return _gf_false;
+}
+
+int
+glusterd_volume_status_copy_to_op_ctx_dict (dict_t *aggr, dict_t *rsp_dict)
+{
+ int ret = 0;
+ glusterd_status_rsp_conv_t rsp_ctx = {0};
+ int32_t cmd = GF_CLI_STATUS_NONE;
+ int32_t node_count = 0;
+ int32_t other_count = 0;
+ int32_t brick_index_max = -1;
+ int32_t hot_brick_count = -1;
+ int32_t type = -1;
+ int32_t rsp_node_count = 0;
+ int32_t rsp_other_count = 0;
+ int vol_count = -1;
+ int i = 0;
+ dict_t *ctx_dict = NULL;
+ char key[PATH_MAX] = {0,};
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+
+
+ GF_ASSERT (rsp_dict);
+
+ if (aggr) {
+ ctx_dict = aggr;
+
+ } else {
+ ctx_dict = glusterd_op_get_ctx (GD_OP_STATUS_VOLUME);
+
+ }
+
+ ret = dict_get_int32 (ctx_dict, "cmd", &cmd);
+ if (ret)
+ goto out;
+
+ if (cmd & GF_CLI_STATUS_ALL && is_origin_glusterd (ctx_dict)) {
+ ret = dict_get_int32 (rsp_dict, "vol_count", &vol_count);
+ if (ret == 0) {
+ ret = dict_set_int32 (ctx_dict, "vol_count",
+ vol_count);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < vol_count; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "vol%d", i);
+ ret = dict_get_str (rsp_dict, key, &volname);
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (ctx_dict, key, volname);
+ if (ret)
+ goto out;
+ }
+ } else {
+ /* Ignore the error as still the aggregation applies in
+ * case its a task sub command */
+ ret = 0;
+ }
+ }
+
+ if ((cmd & GF_CLI_STATUS_TASKS) != 0)
+ goto aggregate_tasks;
+
+ ret = dict_get_int32 (rsp_dict, "count", &rsp_node_count);
+ if (ret) {
+ ret = 0; //no bricks in the rsp
+ goto out;
+ }
+
+ ret = dict_get_int32 (rsp_dict, "other-count", &rsp_other_count);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get other count from rsp_dict");
+ goto out;
+ }
+
+ ret = dict_get_int32 (ctx_dict, "count", &node_count);
+ ret = dict_get_int32 (ctx_dict, "other-count", &other_count);
+ if (!dict_get (ctx_dict, "brick-index-max")) {
+ ret = dict_get_int32 (rsp_dict, "brick-index-max", &brick_index_max);
+ if (ret)
+ goto out;
+ ret = dict_set_int32 (ctx_dict, "brick-index-max", brick_index_max);
+ if (ret)
+ goto out;
+
+ } else {
+ ret = dict_get_int32 (ctx_dict, "brick-index-max", &brick_index_max);
+ }
+
+ rsp_ctx.count = node_count;
+ rsp_ctx.brick_index_max = brick_index_max;
+ rsp_ctx.other_count = other_count;
+ rsp_ctx.dict = ctx_dict;
+
+ dict_foreach (rsp_dict, glusterd_volume_status_add_peer_rsp, &rsp_ctx);
+
+ ret = dict_set_int32 (ctx_dict, "count", node_count + rsp_node_count);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to update node count");
+ goto out;
+ }
+
+ ret = dict_set_int32 (ctx_dict, "other-count",
+ (other_count + rsp_other_count));
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to update other-count");
+ goto out;
+ }
+
+ ret = dict_get_str (ctx_dict, "volname", &volname);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to get volname");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLINFO_GET_FAIL,
+ "Failed to get volinfo for volume: %s",
+ volname);
+ goto out;
+ }
+
+
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
+ ret = dict_get_int32 (rsp_dict, "hot_brick_count",
+ &hot_brick_count);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get hot brick count from rsp_dict");
+ goto out;
+ }
+
+
+ ret = dict_get_int32 (rsp_dict, "type", &type);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get type from rsp_dict");
+ goto out;
+ }
+
+
+ }
+
+ ret = dict_set_int32 (ctx_dict, "hot_brick_count",
+ hot_brick_count);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to update hot_brick_count");
+ goto out;
+ }
+
+ ret = dict_set_int32 (ctx_dict, "type", type);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to update type");
+ goto out;
+ }
+
+aggregate_tasks:
+ /* Tasks are only present for a normal status command for a volume or
+ * for an explicit tasks status command for a volume
+ */
+ if (!(cmd & GF_CLI_STATUS_ALL) &&
+ (((cmd & GF_CLI_STATUS_TASKS) != 0) ||
+ glusterd_status_has_tasks (cmd)))
+ ret = glusterd_volume_status_aggregate_tasks_status (ctx_dict,
+ rsp_dict);
+
+out:
+ return ret;
+}
+
+int
+glusterd_volume_bitrot_scrub_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict)
+{
+ int ret = -1;
+ int j = 0;
+ uint64_t value = 0;
+ int32_t count = 0;
+ char key[256] = {0,};
+ uint64_t error_count = 0;
+ uint64_t scrubbed_files = 0;
+ uint64_t unsigned_files = 0;
+ uint64_t scrub_duration = 0;
+ char *last_scrub_time = NULL;
+ char *scrub_time = NULL;
+ char *volname = NULL;
+ char *node_uuid = NULL;
+ char *node_uuid_str = NULL;
+ char *bitd_log = NULL;
+ char *scrub_log = NULL;
+ char *scrub_freq = NULL;
+ char *scrub_state = NULL;
+ char *scrub_impact = NULL;
+ char *bad_gfid_str = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ int src_count = 0;
+ int dst_count = 0;
+ int8_t scrub_running = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_str (aggr, "volname", &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND,
+ "Unable to find volinfo for volume: %s", volname);
+ goto out;
+ }
+
+ ret = dict_get_int32 (aggr, "count", &dst_count);
+
+ ret = dict_get_int32 (rsp_dict, "count", &src_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "failed to get count value");
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_set_int32 (aggr, "count", src_count+dst_count);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set count in dictonary");
+
+ snprintf (key, 256, "node-uuid-%d", src_count);
+ ret = dict_get_str (rsp_dict, key, &node_uuid);
+ if (!ret) {
+ node_uuid_str = gf_strdup (node_uuid);
+ memset (key, 0, 256);
+ snprintf (key, 256, "node-uuid-%d", src_count+dst_count);
+ ret = dict_set_dynstr (aggr, key, node_uuid_str);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "failed to set node-uuid");
+ }
+ }
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "scrub-running-%d", src_count);
+ ret = dict_get_int8 (rsp_dict, key, &scrub_running);
+ if (!ret) {
+ memset (key, 0, 256);
+ snprintf (key, 256, "scrub-running-%d", src_count+dst_count);
+ ret = dict_set_int8 (aggr, key, scrub_running);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Failed to set "
+ "scrub-running value");
+ }
+ }
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "scrubbed-files-%d", src_count);
+ ret = dict_get_uint64 (rsp_dict, key, &value);
+ if (!ret) {
+ memset (key, 0, 256);
+ snprintf (key, 256, "scrubbed-files-%d", src_count+dst_count);
+ ret = dict_set_uint64 (aggr, key, value);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Failed to set "
+ "scrubbed-file value");
+ }
+ }
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "unsigned-files-%d", src_count);
+ ret = dict_get_uint64 (rsp_dict, key, &value);
+ if (!ret) {
+ memset (key, 0, 256);
+ snprintf (key, 256, "unsigned-files-%d", src_count+dst_count);
+ ret = dict_set_uint64 (aggr, key, value);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Failed to set "
+ "unsigned-file value");
+ }
+ }
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "last-scrub-time-%d", src_count);
+ ret = dict_get_str (rsp_dict, key, &last_scrub_time);
+ if (!ret) {
+ scrub_time = gf_strdup (last_scrub_time);
+ memset (key, 0, 256);
+ snprintf (key, 256, "last-scrub-time-%d", src_count+dst_count);
+ ret = dict_set_dynstr (aggr, key, scrub_time);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Failed to set "
+ "last scrub time value");
+ }
+ }
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "scrub-duration-%d", src_count);
+ ret = dict_get_uint64 (rsp_dict, key, &value);
+ if (!ret) {
+ memset (key, 0, 256);
+ snprintf (key, 256, "scrub-duration-%d", src_count+dst_count);
+ ret = dict_set_uint64 (aggr, key, value);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Failed to set "
+ "scrubbed-duration value");
+ }
+ }
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "error-count-%d", src_count);
+ ret = dict_get_uint64 (rsp_dict, key, &value);
+ if (!ret) {
+ memset (key, 0, 256);
+ snprintf (key, 256, "error-count-%d", src_count+dst_count);
+ ret = dict_set_uint64 (aggr, key, value);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Failed to set error "
+ "count value");
+ }
+
+ /* Storing all the bad files in the dictionary */
+ for (j = 0; j < value; j++) {
+ memset (key, 0, 256);
+ snprintf (key, 256, "quarantine-%d-%d", j, src_count);
+ ret = dict_get_str (rsp_dict, key, &bad_gfid_str);
+ if (!ret) {
+ memset (key, 0, 256);
+ snprintf (key, 256, "quarantine-%d-%d", j,
+ src_count+dst_count);
+ ret = dict_set_dynstr_with_alloc (aggr, key,
+ bad_gfid_str);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Failed to"
+ "bad file gfid ");
+ }
+ }
+ }
+ }
+
+ ret = dict_get_str (rsp_dict, "bitrot_log_file", &bitd_log);
+ if (!ret) {
+ ret = dict_set_dynstr_with_alloc (aggr, "bitrot_log_file",
+ bitd_log);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Failed to set "
+ "bitrot log file location");
+ goto out;
+ }
+ }
+
+ ret = dict_get_str (rsp_dict, "scrub_log_file", &scrub_log);
+ if (!ret) {
+ ret = dict_set_dynstr_with_alloc (aggr, "scrub_log_file",
+ scrub_log);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Failed to set "
+ "scrubber log file location");
+ goto out;
+ }
+ }
+
+ ret = dict_get_str (rsp_dict, "features.scrub-freq", &scrub_freq);
+ if (!ret) {
+ ret = dict_set_dynstr_with_alloc (aggr, "features.scrub-freq",
+ scrub_freq);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Failed to set "
+ "scrub-frequency value to dictionary");
+ goto out;
+ }
+ }
+
+ ret = dict_get_str (rsp_dict, "features.scrub-throttle", &scrub_impact);
+ if (!ret) {
+ ret = dict_set_dynstr_with_alloc (aggr,
+ "features.scrub-throttle",
+ scrub_impact);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Failed to set "
+ "scrub-throttle value to dictionary");
+ goto out;
+ }
+ }
+
+ ret = dict_get_str (rsp_dict, "features.scrub", &scrub_state);
+ if (!ret) {
+ ret = dict_set_dynstr_with_alloc (aggr, "features.scrub",
+ scrub_state);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Failed to set "
+ "scrub state value to dictionary");
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glusterd_bitrot_volume_node_rsp (dict_t *aggr, dict_t *rsp_dict)
+{
+ int ret = -1;
+ uint64_t value = 0;
+ int32_t count = 0;
+ int32_t index = 0;
+ char key[256] = {0,};
+ char buf[1024] = {0,};
+ uint64_t error_count = 0;
+ int32_t i = 0;
+ int32_t j = 0;
+ uint64_t scrubbed_files = 0;
+ uint64_t unsigned_files = 0;
+ uint64_t scrub_duration = 0;
+ char *last_scrub_time = NULL;
+ char *scrub_time = NULL;
+ char *volname = NULL;
+ char *node_str = NULL;
+ char *scrub_freq = NULL;
+ char *scrub_state = NULL;
+ char *scrub_impact = NULL;
+ char *bad_gfid_str = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ int8_t scrub_running = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_set_str (aggr, "bitrot_log_file",
+ (priv->bitd_svc.proc.logfile));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set bitrot log file location");
+ goto out;
+ }
+
+ ret = dict_set_str (aggr, "scrub_log_file",
+ (priv->scrub_svc.proc.logfile));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set scrubber log file location");
+ goto out;
+ }
+
+ ret = dict_get_str (aggr, "volname", &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND,
+ "Unable to find volinfo for volume: %s", volname);
+ goto out;
+ }
+
+ ret = dict_get_int32 (aggr, "count", &i);
+ i++;
+
+ ret = dict_set_int32 (aggr, "count", i);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set count");
+
+ snprintf (buf, 1024, "%s", uuid_utoa (MY_UUID));
+
+ snprintf (key, 256, "node-uuid-%d", i);
+ ret = dict_set_dynstr_with_alloc (aggr, key, buf);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "failed to set node-uuid");
+
+ ret = dict_get_str (volinfo->dict, "features.scrub-freq", &scrub_freq);
+ if (!ret) {
+ ret = dict_set_str (aggr, "features.scrub-freq", scrub_freq);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Failed to set "
+ "scrub-frequency value to dictionary");
+ }
+ } else {
+ /* By Default scrub-frequency is bi-weekly. So when user
+ * enable bitrot then scrub-frequency value will not be
+ * present in volinfo->dict. Setting by-default value of
+ * scrub-frequency explicitly for presenting it to scrub
+ * status.
+ */
+ ret = dict_set_dynstr_with_alloc (aggr, "features.scrub-freq",
+ "biweekly");
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Failed to set "
+ "scrub-frequency value to dictionary");
+ }
+ }
+
+ ret = dict_get_str (volinfo->dict, "features.scrub-throttle",
+ &scrub_impact);
+ if (!ret) {
+ ret = dict_set_str (aggr, "features.scrub-throttle",
+ scrub_impact);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Failed to set "
+ "scrub-throttle value to dictionary");
+ }
+ } else {
+ /* By Default scrub-throttle is lazy. So when user
+ * enable bitrot then scrub-throttle value will not be
+ * present in volinfo->dict. Setting by-default value of
+ * scrub-throttle explicitly for presenting it to
+ * scrub status.
+ */
+ ret = dict_set_dynstr_with_alloc (aggr,
+ "features.scrub-throttle",
+ "lazy");
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Failed to set "
+ "scrub-throttle value to dictionary");
+ }
+ }
+
+ ret = dict_get_str (volinfo->dict, "features.scrub", &scrub_state);
+ if (!ret) {
+ ret = dict_set_str (aggr, "features.scrub", scrub_state);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Failed to set "
+ "scrub state value to dictionary");
+ }
+ }
+
+ ret = dict_get_int8 (rsp_dict, "scrub-running", &scrub_running);
+ if (!ret) {
+ memset (key, 0, 256);
+ snprintf (key, 256, "scrub-running-%d", i);
+ ret = dict_set_uint64 (aggr, key, scrub_running);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Failed to set "
+ "scrub-running value");
+ }
+ }
+
+ ret = dict_get_uint64 (rsp_dict, "scrubbed-files", &value);
+ if (!ret) {
+ memset (key, 0, 256);
+ snprintf (key, 256, "scrubbed-files-%d", i);
+ ret = dict_set_uint64 (aggr, key, value);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Failed to set "
+ "scrubbed-file value");
+ }
+ }
+
+ ret = dict_get_uint64 (rsp_dict, "unsigned-files", &value);
+ if (!ret) {
+ memset (key, 0, 256);
+ snprintf (key, 256, "unsigned-files-%d", i);
+ ret = dict_set_uint64 (aggr, key, value);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Failed to set "
+ "unsigned-file value");
+ }
+ }
+
+ ret = dict_get_str (rsp_dict, "last-scrub-time", &last_scrub_time);
+ if (!ret) {
+ memset (key, 0, 256);
+ snprintf (key, 256, "last-scrub-time-%d", i);
+
+ scrub_time = gf_strdup (last_scrub_time);
+ ret = dict_set_dynstr (aggr, key, scrub_time);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Failed to set "
+ "last scrub time value");
+ }
+ }
+
+ ret = dict_get_uint64 (rsp_dict, "scrub-duration", &value);
+ if (!ret) {
+ memset (key, 0, 256);
+ snprintf (key, 256, "scrub-duration-%d", i);
+ ret = dict_set_uint64 (aggr, key, value);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Failed to set "
+ "scrubbed-duration value");
+ }
+ }
+
+ ret = dict_get_uint64 (rsp_dict, "total-count", &value);
+ if (!ret) {
+ memset (key, 0, 256);
+ snprintf (key, 256, "error-count-%d", i);
+ ret = dict_set_uint64 (aggr, key, value);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Failed to set error "
+ "count value");
+ }
+
+ /* Storing all the bad files in the dictionary */
+ for (j = 0; j < value; j++) {
+ memset (key, 0, 256);
+ snprintf (key, 256, "quarantine-%d", j);
+ ret = dict_get_str (rsp_dict, key, &bad_gfid_str);
+ if (!ret) {
+ memset (key, 0, 256);
+ snprintf (key, 256, "quarantine-%d-%d", j, i);
+ ret = dict_set_dynstr_with_alloc (aggr, key,
+ bad_gfid_str);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Failed to"
+ "bad file gfid ");
+ }
+ }
+ }
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glusterd_volume_rebalance_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict)
+{
+ char key[256] = {0,};
+ char *node_uuid = NULL;
+ char *node_uuid_str = NULL;
+ char *volname = NULL;
+ dict_t *ctx_dict = NULL;
+ double elapsed_time = 0;
+ glusterd_conf_t *conf = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ int ret = 0;
+ int32_t index = 0;
+ int32_t count = 0;
+ int32_t current_index = 2;
+ int32_t value32 = 0;
+ uint64_t value = 0;
+ char *peer_uuid_str = NULL;
+ xlator_t *this = NULL;
+
+ GF_ASSERT (rsp_dict);
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+
+ if (aggr) {
+ ctx_dict = aggr;
+
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_OPCTX_GET_FAIL,
+ "Operation Context is not present");
+ goto out;
+ }
+
+ if (!ctx_dict)
+ goto out;
+
+ ret = dict_get_str (ctx_dict, "volname", &volname);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32 (rsp_dict, "count", &index);
+ if (ret)
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "failed to get index");
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "node-uuid-%d", index);
+ ret = dict_get_str (rsp_dict, key, &node_uuid);
+ if (!ret) {
+ node_uuid_str = gf_strdup (node_uuid);
+
+ /* Finding the index of the node-uuid in the peer-list */
+ rcu_read_lock ();
+ cds_list_for_each_entry_rcu (peerinfo, &conf->peers,
+ uuid_list) {
+ peer_uuid_str = gd_peer_uuid_str (peerinfo);
+ if (strcmp (peer_uuid_str, node_uuid_str) == 0)
+ break;
+
+ current_index++;
+ }
+ rcu_read_unlock ();
+
+ /* Setting the largest index value as the total count. */
+ ret = dict_get_int32 (ctx_dict, "count", &count);
+ if (count < current_index) {
+ ret = dict_set_int32 (ctx_dict, "count", current_index);
+ if (ret)
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set count");
+ }
+
+ /* Setting the same index for the node, as is in the peerlist.*/
+ memset (key, 0, 256);
+ snprintf (key, 256, "node-uuid-%d", current_index);
+ ret = dict_set_dynstr (ctx_dict, key, node_uuid_str);
+ if (ret) {
+ gf_msg_debug (THIS->name, 0,
+ "failed to set node-uuid");
+ }
+ }
+
+ snprintf (key, 256, "files-%d", index);
+ ret = dict_get_uint64 (rsp_dict, key, &value);
+ if (!ret) {
+ memset (key, 0, 256);
+ snprintf (key, 256, "files-%d", current_index);
+ ret = dict_set_uint64 (ctx_dict, key, value);
+ if (ret) {
+ gf_msg_debug (THIS->name, 0,
+ "failed to set the file count");
+ }
+ }
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "size-%d", index);
+ ret = dict_get_uint64 (rsp_dict, key, &value);
+ if (!ret) {
+ memset (key, 0, 256);
+ snprintf (key, 256, "size-%d", current_index);
+ ret = dict_set_uint64 (ctx_dict, key, value);
+ if (ret) {
+ gf_msg_debug (THIS->name, 0,
+ "failed to set the size of migration");
+ }
+ }
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "lookups-%d", index);
+ ret = dict_get_uint64 (rsp_dict, key, &value);
+ if (!ret) {
+ memset (key, 0, 256);
+ snprintf (key, 256, "lookups-%d", current_index);
+ ret = dict_set_uint64 (ctx_dict, key, value);
+ if (ret) {
+ gf_msg_debug (THIS->name, 0,
+ "failed to set lookuped file count");
+ }
+ }
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "status-%d", index);
+ ret = dict_get_int32 (rsp_dict, key, &value32);
+ if (!ret) {
+ memset (key, 0, 256);
+ snprintf (key, 256, "status-%d", current_index);
+ ret = dict_set_int32 (ctx_dict, key, value32);
+ if (ret) {
+ gf_msg_debug (THIS->name, 0,
+ "failed to set status");
+ }
+ }
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "failures-%d", index);
+ ret = dict_get_uint64 (rsp_dict, key, &value);
+ if (!ret) {
+ memset (key, 0, 256);
+ snprintf (key, 256, "failures-%d", current_index);
+ ret = dict_set_uint64 (ctx_dict, key, value);
+ if (ret) {
+ gf_msg_debug (THIS->name, 0,
+ "failed to set failure count");
+ }
+ }
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "skipped-%d", index);
+ ret = dict_get_uint64 (rsp_dict, key, &value);
+ if (!ret) {
+ memset (key, 0, 256);
+ snprintf (key, 256, "skipped-%d", current_index);
+ ret = dict_set_uint64 (ctx_dict, key, value);
+ if (ret) {
+ gf_msg_debug (THIS->name, 0,
+ "failed to set skipped count");
+ }
+ }
+ memset (key, 0, 256);
+ snprintf (key, 256, "run-time-%d", index);
+ ret = dict_get_double (rsp_dict, key, &elapsed_time);
+ if (!ret) {
+ memset (key, 0, 256);
+ snprintf (key, 256, "run-time-%d", current_index);
+ ret = dict_set_double (ctx_dict, key, elapsed_time);
+ if (ret) {
+ gf_msg_debug (THIS->name, 0,
+ "failed to set run-time");
+ }
+ }
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "demoted-%d", index);
+ ret = dict_get_uint64 (rsp_dict, key, &value);
+ if (!ret) {
+ memset (key, 0, 256);
+ snprintf (key, 256, "demoted-%d", current_index);
+ ret = dict_set_uint64 (ctx_dict, key, value);
+ if (ret) {
+ gf_msg_debug (THIS->name, 0,
+ "failed to set demoted count");
+ }
+ }
+ memset (key, 0, 256);
+ snprintf (key, 256, "promoted-%d", index);
+ ret = dict_get_uint64 (rsp_dict, key, &value);
+ if (!ret) {
+ memset (key, 0, 256);
+ snprintf (key, 256, "promoted-%d", current_index);
+ ret = dict_set_uint64 (ctx_dict, key, value);
+ if (ret) {
+ gf_msg_debug (THIS->name, 0,
+ "failed to set promoted count");
+ }
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int
+glusterd_sys_exec_output_rsp_dict (dict_t *dst, dict_t *src)
+{
+ char output_name[PATH_MAX] = "";
+ char *output = NULL;
+ int ret = 0;
+ int i = 0;
+ int len = 0;
+ int src_output_count = 0;
+ int dst_output_count = 0;
+
+ if (!dst || !src) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_EMPTY,
+ "Source or Destination "
+ "dict is empty.");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dst, "output_count", &dst_output_count);
+
+ ret = dict_get_int32 (src, "output_count", &src_output_count);
+ if (ret) {
+ gf_msg_debug ("glusterd", 0, "No output from source");
+ ret = 0;
+ goto out;
+ }
+
+ for (i = 1; i <= src_output_count; i++) {
+ len = snprintf (output_name, sizeof(output_name) - 1,
+ "output_%d", i);
+ output_name[len] = '\0';
+ ret = dict_get_str (src, output_name, &output);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch %s",
+ output_name);
+ goto out;
+ }
+
+ len = snprintf (output_name, sizeof(output_name) - 1,
+ "output_%d", i+dst_output_count);
+ output_name[len] = '\0';
+ ret = dict_set_dynstr (dst, output_name, gf_strdup (output));
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to set %s",
+ output_name);
+ goto out;
+ }
+ }
+
+ ret = dict_set_int32 (dst, "output_count",
+ dst_output_count+src_output_count);
+out:
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict)
+{
+ int ret = 0;
+ glusterd_op_t op = GD_OP_NONE;
+
+ op = glusterd_op_get_op ();
+ GF_ASSERT (aggr);
+ GF_ASSERT (rsp_dict);
+
+ if (!aggr)
+ goto out;
+ dict_copy (rsp_dict, aggr);
+out:
+ return ret;
+}
+
+int
+glusterd_volume_heal_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict)
+{
+ int ret = 0;
+ dict_t *ctx_dict = NULL;
+ uuid_t *txn_id = NULL;
+ glusterd_op_info_t txn_op_info = {{0},};
+ glusterd_op_t op = GD_OP_NONE;
+
+ GF_ASSERT (rsp_dict);
+
+ ret = dict_get_bin (aggr, "transaction_id", (void **)&txn_id);
+ if (ret)
+ goto out;
+ gf_msg_debug (THIS->name, 0, "transaction ID = %s",
+ uuid_utoa (*txn_id));
+
+ ret = glusterd_get_txn_opinfo (txn_id, &txn_op_info);
+ if (ret) {
+ gf_msg_callingfn (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_OPINFO_GET_FAIL,
+ "Unable to get transaction opinfo "
+ "for transaction ID : %s",
+ uuid_utoa (*txn_id));
+ goto out;
+ }
+
+ op = txn_op_info.op;
+ GF_ASSERT (GD_OP_HEAL_VOLUME == op);
+
+ if (aggr) {
+ ctx_dict = aggr;
+
+ } else {
+ ctx_dict = txn_op_info.op_ctx;
+ }
+
+ if (!ctx_dict)
+ goto out;
+ dict_copy (rsp_dict, ctx_dict);
+out:
+ return ret;
+}
+
+int
+_profile_volume_add_brick_rsp (dict_t *this, char *key, data_t *value,
+ void *data)
+{
+ char new_key[256] = {0};
+ glusterd_pr_brick_rsp_conv_t *rsp_ctx = NULL;
+ data_t *new_value = NULL;
+
+ rsp_ctx = data;
+ new_value = data_copy (value);
+ GF_ASSERT (new_value);
+ snprintf (new_key, sizeof (new_key), "%d-%s", rsp_ctx->count, key);
+ dict_set (rsp_ctx->dict, new_key, new_value);
+ return 0;
+}
+
+int
+glusterd_volume_quota_copy_to_op_ctx_dict (dict_t *dict, dict_t *rsp_dict)
+{
+ int ret = -1;
+ int i = 0;
+ int count = 0;
+ int rsp_dict_count = 0;
+ char *uuid_str = NULL;
+ char *uuid_str_dup = NULL;
+ char key[256] = {0,};
+ xlator_t *this = NULL;
+ int type = GF_QUOTA_OPTION_TYPE_NONE;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = dict_get_int32 (dict, "type", &type);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get quota opcode");
+ goto out;
+ }
+
+ if ((type != GF_QUOTA_OPTION_TYPE_LIMIT_USAGE) &&
+ (type != GF_QUOTA_OPTION_TYPE_LIMIT_OBJECTS) &&
+ (type != GF_QUOTA_OPTION_TYPE_REMOVE) &&
+ (type != GF_QUOTA_OPTION_TYPE_REMOVE_OBJECTS)) {
+ dict_copy (rsp_dict, dict);
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_get_int32 (rsp_dict, "count", &rsp_dict_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get the count of "
+ "gfids from the rsp dict");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "count", &count);
+ if (ret)
+ /* The key "count" is absent in op_ctx when this function is
+ * called after self-staging on the originator. This must not
+ * be treated as error.
+ */
+ gf_msg_debug (this->name, 0, "Failed to get count of gfids"
+ " from req dict. This could be because count is not yet"
+ " copied from rsp_dict into op_ctx");
+
+ for (i = 0; i < rsp_dict_count; i++) {
+ snprintf (key, sizeof(key)-1, "gfid%d", i);
+
+ ret = dict_get_str (rsp_dict, key, &uuid_str);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get gfid "
+ "from rsp dict");
+ goto out;
+ }
+
+ snprintf (key, sizeof (key)-1, "gfid%d", i + count);
+
+ uuid_str_dup = gf_strdup (uuid_str);
+ if (!uuid_str_dup) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr (dict, key, uuid_str_dup);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set gfid "
+ "from rsp dict into req dict");
+ GF_FREE (uuid_str_dup);
+ goto out;
+ }
+ }
+
+ ret = dict_set_int32 (dict, "count", rsp_dict_count + count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set aggregated "
+ "count in req dict");
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+int
+glusterd_profile_volume_brick_rsp (void *pending_entry,
+ dict_t *rsp_dict, dict_t *op_ctx,
+ char **op_errstr, gd_node_type type)
+{
+ int ret = 0;
+ glusterd_pr_brick_rsp_conv_t rsp_ctx = {0};
+ int32_t count = 0;
+ char brick[PATH_MAX+1024] = {0};
+ char key[256] = {0};
+ char *full_brick = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (op_ctx);
+ GF_ASSERT (op_errstr);
+ GF_ASSERT (pending_entry);
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_int32 (op_ctx, "count", &count);
+ if (ret) {
+ count = 1;
+ } else {
+ count++;
+ }
+ snprintf (key, sizeof (key), "%d-brick", count);
+ if (type == GD_NODE_BRICK) {
+ brickinfo = pending_entry;
+ snprintf (brick, sizeof (brick), "%s:%s", brickinfo->hostname,
+ brickinfo->path);
+ } else if (type == GD_NODE_NFS) {
+ snprintf (brick, sizeof (brick), "%s", uuid_utoa (MY_UUID));
+ }
+ full_brick = gf_strdup (brick);
+ GF_ASSERT (full_brick);
+ ret = dict_set_dynstr (op_ctx, key, full_brick);
+
+ rsp_ctx.count = count;
+ rsp_ctx.dict = op_ctx;
+ dict_foreach (rsp_dict, _profile_volume_add_brick_rsp, &rsp_ctx);
+ dict_del (op_ctx, "count");
+ ret = dict_set_int32 (op_ctx, "count", count);
+ return ret;
+}
+
+//input-key: <replica-id>:<child-id>-*
+//output-key: <brick-id>-*
+int
+_heal_volume_add_shd_rsp (dict_t *this, char *key, data_t *value, void *data)
+{
+ char new_key[256] = {0,};
+ char int_str[16] = {0};
+ data_t *new_value = NULL;
+ char *rxl_end = NULL;
+ char *rxl_child_end = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ int rxl_id = 0;
+ int rxl_child_id = 0;
+ int brick_id = 0;
+ int int_len = 0;
+ int ret = 0;
+ glusterd_heal_rsp_conv_t *rsp_ctx = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+
+ rsp_ctx = data;
+ rxl_end = strchr (key, '-');
+ if (!rxl_end)
+ goto out;
+
+ int_len = strlen (key) - strlen (rxl_end);
+ strncpy (int_str, key, int_len);
+ int_str[int_len] = '\0';
+ ret = gf_string2int (int_str, &rxl_id);
+ if (ret)
+ goto out;
+
+ rxl_child_end = strchr (rxl_end + 1, '-');
+ if (!rxl_child_end)
+ goto out;
+
+ int_len = strlen (rxl_end) - strlen (rxl_child_end) - 1;
+ strncpy (int_str, rxl_end + 1, int_len);
+ int_str[int_len] = '\0';
+ ret = gf_string2int (int_str, &rxl_child_id);
+ if (ret)
+ goto out;
+
+ volinfo = rsp_ctx->volinfo;
+ brick_id = rxl_id * volinfo->replica_count + rxl_child_id;
+
+ if (!strcmp (rxl_child_end, "-status")) {
+ brickinfo = glusterd_get_brickinfo_by_position (volinfo,
+ brick_id);
+ if (!brickinfo)
+ goto out;
+ if (!glusterd_is_local_brick (rsp_ctx->this, volinfo,
+ brickinfo))
+ goto out;
+ }
+ new_value = data_copy (value);
+ snprintf (new_key, sizeof (new_key), "%d%s", brick_id, rxl_child_end);
+ dict_set (rsp_ctx->dict, new_key, new_value);
+
+out:
+ return 0;
+}
+
+int
+_heal_volume_add_shd_rsp_of_statistics (dict_t *this, char *key, data_t
+ *value, void *data)
+{
+ char new_key[256] = {0,};
+ char int_str[16] = {0,};
+ char key_begin_string[128] = {0,};
+ data_t *new_value = NULL;
+ char *rxl_end = NULL;
+ char *rxl_child_end = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *key_begin_str = NULL;
+ int rxl_id = 0;
+ int rxl_child_id = 0;
+ int brick_id = 0;
+ int int_len = 0;
+ int ret = 0;
+ glusterd_heal_rsp_conv_t *rsp_ctx = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+
+ rsp_ctx = data;
+ key_begin_str = strchr (key, '-');
+ if (!key_begin_str)
+ goto out;
+
+ int_len = strlen (key) - strlen (key_begin_str);
+ strncpy (key_begin_string, key, int_len);
+ key_begin_string[int_len] = '\0';
+
+ rxl_end = strchr (key_begin_str + 1, '-');
+ if (!rxl_end)
+ goto out;
+
+ int_len = strlen (key_begin_str) - strlen (rxl_end) - 1;
+ strncpy (int_str, key_begin_str + 1, int_len);
+ int_str[int_len] = '\0';
+ ret = gf_string2int (int_str, &rxl_id);
+ if (ret)
+ goto out;
+
+
+ rxl_child_end = strchr (rxl_end + 1, '-');
+ if (!rxl_child_end)
+ goto out;
+
+ int_len = strlen (rxl_end) - strlen (rxl_child_end) - 1;
+ strncpy (int_str, rxl_end + 1, int_len);
+ int_str[int_len] = '\0';
+ ret = gf_string2int (int_str, &rxl_child_id);
+ if (ret)
+ goto out;
+
+ volinfo = rsp_ctx->volinfo;
+ brick_id = rxl_id * volinfo->replica_count + rxl_child_id;
+
+ brickinfo = glusterd_get_brickinfo_by_position (volinfo, brick_id);
+ if (!brickinfo)
+ goto out;
+ if (!glusterd_is_local_brick (rsp_ctx->this, volinfo, brickinfo))
+ goto out;
+
+ new_value = data_copy (value);
+ snprintf (new_key, sizeof (new_key), "%s-%d%s", key_begin_string,
+ brick_id, rxl_child_end);
+ dict_set (rsp_ctx->dict, new_key, new_value);
+
+out:
+ return 0;
+
+}
+
+int
+glusterd_heal_volume_brick_rsp (dict_t *req_dict, dict_t *rsp_dict,
+ dict_t *op_ctx, char **op_errstr)
+{
+ int ret = 0;
+ glusterd_heal_rsp_conv_t rsp_ctx = {0};
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ int heal_op = -1;
+
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (op_ctx);
+ GF_ASSERT (op_errstr);
+
+ ret = dict_get_str (req_dict, "volname", &volname);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to get volume name");
+ goto out;
+ }
+
+ ret = dict_get_int32 (req_dict, "heal-op", &heal_op);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to get heal_op");
+ goto out;
+ }
+
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+
+ if (ret)
+ goto out;
+
+ rsp_ctx.dict = op_ctx;
+ rsp_ctx.volinfo = volinfo;
+ rsp_ctx.this = THIS;
+ if (heal_op == GF_SHD_OP_STATISTICS)
+ dict_foreach (rsp_dict, _heal_volume_add_shd_rsp_of_statistics,
+ &rsp_ctx);
+ else
+ dict_foreach (rsp_dict, _heal_volume_add_shd_rsp, &rsp_ctx);
+
+
+out:
+ return ret;
+}
+
+int
+_status_volume_add_brick_rsp (dict_t *this, char *key, data_t *value,
+ void *data)
+{
+ char new_key[256] = {0,};
+ data_t *new_value = 0;
+ glusterd_pr_brick_rsp_conv_t *rsp_ctx = NULL;
+
+ rsp_ctx = data;
+ new_value = data_copy (value);
+ snprintf (new_key, sizeof (new_key), "brick%d.%s", rsp_ctx->count, key);
+ dict_set (rsp_ctx->dict, new_key, new_value);
+
+ return 0;
+}
+
+int
+glusterd_status_volume_brick_rsp (dict_t *rsp_dict, dict_t *op_ctx,
+ char **op_errstr)
+{
+ int ret = 0;
+ glusterd_pr_brick_rsp_conv_t rsp_ctx = {0};
+ int32_t count = 0;
+ int index = 0;
+
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (op_ctx);
+ GF_ASSERT (op_errstr);
+
+ ret = dict_get_int32 (op_ctx, "count", &count);
+ if (ret) {
+ count = 0;
+ } else {
+ count++;
+ }
+ ret = dict_get_int32 (rsp_dict, "index", &index);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Couldn't get node index");
+ goto out;
+ }
+ dict_del (rsp_dict, "index");
+
+ rsp_ctx.count = index;
+ rsp_ctx.dict = op_ctx;
+ dict_foreach (rsp_dict, _status_volume_add_brick_rsp, &rsp_ctx);
+ ret = dict_set_int32 (op_ctx, "count", count);
+
+out:
+ return ret;
+}
+
+int
+glusterd_defrag_volume_node_rsp (dict_t *req_dict, dict_t *rsp_dict,
+ dict_t *op_ctx)
+{
+ int ret = 0;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char key[256] = {0,};
+ int32_t i = 0;
+ char buf[1024] = {0,};
+ char *node_str = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ priv = THIS->private;
+ GF_ASSERT (req_dict);
+
+ ret = dict_get_str (req_dict, "volname", &volname);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+
+ if (ret)
+ goto out;
+
+ if (rsp_dict) {
+ ret = glusterd_defrag_volume_status_update (volinfo,
+ rsp_dict);
+ }
+
+ if (!op_ctx) {
+ dict_copy (rsp_dict, op_ctx);
+ goto out;
+ }
+
+ ret = dict_get_int32 (op_ctx, "count", &i);
+ i++;
+
+ ret = dict_set_int32 (op_ctx, "count", i);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set count");
+
+ snprintf (buf, 1024, "%s", uuid_utoa (MY_UUID));
+ node_str = gf_strdup (buf);
+
+ snprintf (key, 256, "node-uuid-%d",i);
+ ret = dict_set_dynstr (op_ctx, key, node_str);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "failed to set node-uuid");
+
+ memset (key, 0 , 256);
+ snprintf (key, 256, "files-%d", i);
+ ret = dict_set_uint64 (op_ctx, key, volinfo->rebal.rebalance_files);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "failed to set file count");
+
+ memset (key, 0 , 256);
+ snprintf (key, 256, "size-%d", i);
+ ret = dict_set_uint64 (op_ctx, key, volinfo->rebal.rebalance_data);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "failed to set size of xfer");
+
+ memset (key, 0 , 256);
+ snprintf (key, 256, "lookups-%d", i);
+ ret = dict_set_uint64 (op_ctx, key, volinfo->rebal.lookedup_files);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "failed to set lookedup file count");
+
+ memset (key, 0 , 256);
+ snprintf (key, 256, "status-%d", i);
+ ret = dict_set_int32 (op_ctx, key, volinfo->rebal.defrag_status);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "failed to set status");
+
+ memset (key, 0 , 256);
+ snprintf (key, 256, "failures-%d", i);
+ ret = dict_set_uint64 (op_ctx, key, volinfo->rebal.rebalance_failures);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "failed to set failure count");
+
+ memset (key, 0 , 256);
+ snprintf (key, 256, "skipped-%d", i);
+ ret = dict_set_uint64 (op_ctx, key, volinfo->rebal.skipped_files);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "failed to set skipped count");
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "run-time-%d", i);
+ ret = dict_set_double (op_ctx, key, volinfo->rebal.rebalance_time);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "failed to set run-time");
+
+ memset (key, 0 , 256);
+ snprintf (key, 256, "promoted-%d", i);
+ ret = dict_set_uint64 (op_ctx, key, volinfo->tier_info.promoted);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_SET_FAILED,
+ "failed to set lookedup file count");
+
+ memset (key, 0 , 256);
+ snprintf (key, 256, "demoted-%d", i);
+ ret = dict_set_uint64 (op_ctx, key, volinfo->tier_info.demoted);
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_SET_FAILED,
+ "failed to set lookedup file count");
+
+out:
+ return ret;
+}
+int32_t
+glusterd_handle_node_rsp (dict_t *req_dict, void *pending_entry,
+ glusterd_op_t op, dict_t *rsp_dict, dict_t *op_ctx,
+ char **op_errstr, gd_node_type type)
+{
+ int ret = 0;
+
+ GF_ASSERT (op_errstr);
+
+ switch (op) {
+ case GD_OP_PROFILE_VOLUME:
+ ret = glusterd_profile_volume_brick_rsp (pending_entry,
+ rsp_dict, op_ctx,
+ op_errstr, type);
+ break;
+ case GD_OP_STATUS_VOLUME:
+ ret = glusterd_status_volume_brick_rsp (rsp_dict, op_ctx,
+ op_errstr);
+ break;
+
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ glusterd_defrag_volume_node_rsp (req_dict,
+ rsp_dict, op_ctx);
+ break;
+
+ case GD_OP_HEAL_VOLUME:
+ ret = glusterd_heal_volume_brick_rsp (req_dict, rsp_dict,
+ op_ctx, op_errstr);
+ break;
+ case GD_OP_SCRUB_STATUS:
+ ret = glusterd_bitrot_volume_node_rsp (op_ctx, rsp_dict);
+
+ break;
+ default:
+ break;
+ }
+
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_set_originator_uuid (dict_t *dict)
+{
+ int ret = -1;
+ uuid_t *originator_uuid = NULL;
+
+ GF_ASSERT (dict);
+
+ originator_uuid = GF_CALLOC (1, sizeof(uuid_t),
+ gf_common_mt_uuid_t);
+ if (!originator_uuid) {
+ ret = -1;
+ goto out;
+ }
+
+ gf_uuid_copy (*originator_uuid, MY_UUID);
+ ret = dict_set_bin (dict, "originator_uuid",
+ originator_uuid, sizeof (uuid_t));
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set originator_uuid.");
+ goto out;
+ }
+
+out:
+ if (ret && originator_uuid)
+ GF_FREE (originator_uuid);
+
+ return ret;
+}
+
+/* Should be used only when an operation is in progress, as that is the only
+ * time a lock_owner is set
+ */
+gf_boolean_t
+is_origin_glusterd (dict_t *dict)
+{
+ gf_boolean_t ret = _gf_false;
+ uuid_t lock_owner = {0,};
+ uuid_t *originator_uuid = NULL;
+
+ GF_ASSERT (dict);
+
+ ret = dict_get_bin (dict, "originator_uuid",
+ (void **) &originator_uuid);
+ if (ret) {
+ /* If not originator_uuid has been set, then the command
+ * has been originated from a glusterd running on older version
+ * Hence fetching the lock owner */
+ ret = glusterd_get_lock_owner (&lock_owner);
+ if (ret) {
+ ret = _gf_false;
+ goto out;
+ }
+ ret = !gf_uuid_compare (MY_UUID, lock_owner);
+ } else
+ ret = !gf_uuid_compare (MY_UUID, *originator_uuid);
+
+out:
+ return ret;
+}
+
+int
+glusterd_generate_and_set_task_id (dict_t *dict, char *key)
+{
+ int ret = -1;
+ uuid_t task_id = {0,};
+ char *uuid_str = NULL;
+ xlator_t *this = NULL;
+
+ GF_ASSERT (dict);
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ gf_uuid_generate (task_id);
+ uuid_str = gf_strdup (uuid_utoa (task_id));
+ if (!uuid_str) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr (dict, key, uuid_str);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set %s in dict",
+ key);
+ goto out;
+ }
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_TASK_ID_INFO, "Generated task-id %s for key %s",
+ uuid_str, key);
+
+out:
+ if (ret)
+ GF_FREE (uuid_str);
+ return ret;
+}
+
+int
+glusterd_copy_uuid_to_dict (uuid_t uuid, dict_t *dict, char *key)
+{
+ int ret = -1;
+ char tmp_str[40] = {0,};
+ char *task_id_str = NULL;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (key);
+
+ gf_uuid_unparse (uuid, tmp_str);
+ task_id_str = gf_strdup (tmp_str);
+ if (!task_id_str)
+ return -1;
+
+ ret = dict_set_dynstr (dict, key, task_id_str);
+ if (ret) {
+ GF_FREE (task_id_str);
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Error setting uuid in dict with key %s", key);
+ }
+
+ return 0;
+}
+
+int
+_update_volume_op_versions (dict_t *this, char *key, data_t *value, void *data)
+{
+ int op_version = 0;
+ glusterd_volinfo_t *ctx = NULL;
+ gf_boolean_t enabled = _gf_true;
+ int ret = -1;
+
+ GF_ASSERT (data);
+ ctx = data;
+
+ op_version = glusterd_get_op_version_for_key (key);
+
+ if (gd_is_xlator_option (key) || gd_is_boolean_option (key)) {
+ ret = gf_string2boolean (value->data, &enabled);
+ if (ret)
+ return 0;
+
+ if (!enabled)
+ return 0;
+ }
+
+ if (op_version > ctx->op_version)
+ ctx->op_version = op_version;
+
+ if (gd_is_client_option (key) &&
+ (op_version > ctx->client_op_version))
+ ctx->client_op_version = op_version;
+
+ return 0;
+}
+
+void
+gd_update_volume_op_versions (glusterd_volinfo_t *volinfo)
+{
+ glusterd_conf_t *conf = NULL;
+ gf_boolean_t ob_enabled = _gf_false;
+
+ GF_ASSERT (volinfo);
+
+ conf = THIS->private;
+ GF_ASSERT (conf);
+
+ /* Reset op-versions to minimum */
+ volinfo->op_version = 1;
+ volinfo->client_op_version = 1;
+
+ dict_foreach (volinfo->dict, _update_volume_op_versions, volinfo);
+
+ /* Special case for open-behind
+ * If cluster op-version >= 2 and open-behind hasn't been explicitly
+ * disabled, volume op-versions must be updated to account for it
+ */
+
+ /* TODO: Remove once we have a general way to update automatically
+ * enabled features
+ */
+ if (conf->op_version >= 2) {
+ ob_enabled = dict_get_str_boolean (volinfo->dict,
+ "performance.open-behind",
+ _gf_true);
+ if (ob_enabled) {
+
+ if (volinfo->op_version < 2)
+ volinfo->op_version = 2;
+ if (volinfo->client_op_version < 2)
+ volinfo->client_op_version = 2;
+ }
+ }
+
+ if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) {
+ if (volinfo->op_version < GD_OP_VERSION_3_6_0)
+ volinfo->op_version = GD_OP_VERSION_3_6_0;
+ if (volinfo->client_op_version < GD_OP_VERSION_3_6_0)
+ volinfo->client_op_version = GD_OP_VERSION_3_6_0;
+ }
+
+ return;
+}
+
+int
+op_version_check (xlator_t *this, int min_op_version, char *msg, int msglen)
+{
+ int ret = 0;
+ glusterd_conf_t *priv = NULL;
+
+ GF_ASSERT (this);
+ GF_ASSERT (msg);
+
+ priv = this->private;
+ if (priv->op_version < min_op_version) {
+ snprintf (msg, msglen, "One or more nodes do not support "
+ "the required op-version. Cluster op-version must "
+ "atleast be %d.", min_op_version);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_UNSUPPORTED_VERSION, "%s", msg);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/* A task is committed/completed once the task-id for it is cleared */
+gf_boolean_t
+gd_is_remove_brick_committed (glusterd_volinfo_t *volinfo)
+{
+ GF_ASSERT (volinfo);
+
+ if ((GD_OP_REMOVE_BRICK == volinfo->rebal.op) &&
+ !gf_uuid_is_null (volinfo->rebal.rebalance_id))
+ return _gf_false;
+
+ return _gf_true;
+}
+
+gf_boolean_t
+glusterd_is_status_tasks_op (glusterd_op_t op, dict_t *dict)
+{
+ int ret = -1;
+ uint32_t cmd = GF_CLI_STATUS_NONE;
+ gf_boolean_t is_status_tasks = _gf_false;
+
+ if (op != GD_OP_STATUS_VOLUME)
+ goto out;
+
+ ret = dict_get_uint32 (dict, "cmd", &cmd);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get opcode");
+ goto out;
+ }
+
+ if (cmd & GF_CLI_STATUS_TASKS)
+ is_status_tasks = _gf_true;
+
+out:
+ return is_status_tasks;
+}
+
+/* Tells if rebalance needs to be started for the given volume on the peer
+ *
+ * Rebalance should be started on a peer only if an involved brick is present on
+ * the peer.
+ *
+ * For a normal rebalance, if any one brick of the given volume is present on
+ * the peer, the rebalance process should be started.
+ *
+ * For a rebalance as part of a remove-brick operation, the rebalance process
+ * should be started only if one of the bricks being removed is present on the
+ * peer
+ */
+gf_boolean_t
+gd_should_i_start_rebalance (glusterd_volinfo_t *volinfo) {
+ gf_boolean_t retval = _gf_false;
+ int ret = -1;
+ glusterd_brickinfo_t *brick = NULL;
+ int count = 0;
+ int i = 0;
+ char key[1023] = {0,};
+ char *brickname = NULL;
+
+
+ switch (volinfo->rebal.op) {
+ case GD_OP_REBALANCE:
+ cds_list_for_each_entry (brick, &volinfo->bricks, brick_list) {
+ if (gf_uuid_compare (MY_UUID, brick->uuid) == 0) {
+ retval = _gf_true;
+ break;
+ }
+ }
+ break;
+ case GD_OP_REMOVE_BRICK:
+ ret = dict_get_int32 (volinfo->rebal.dict, "count", &count);
+ if (ret) {
+ goto out;
+ }
+ for (i = 1; i <= count; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d", i);
+ ret = dict_get_str (volinfo->rebal.dict, key,
+ &brickname);
+ if (ret)
+ goto out;
+ ret = glusterd_volume_brickinfo_get_by_brick (brickname,
+ volinfo,
+ &brick,
+ _gf_false);
+ if (ret)
+ goto out;
+ if (gf_uuid_compare (MY_UUID, brick->uuid) == 0) {
+ retval = _gf_true;
+ break;
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+out:
+ return retval;
+}
+
+int
+glusterd_is_volume_quota_enabled (glusterd_volinfo_t *volinfo)
+{
+ return (glusterd_volinfo_get_boolean (volinfo, VKEY_FEATURES_QUOTA));
+}
+
+int
+glusterd_is_volume_inode_quota_enabled (glusterd_volinfo_t *volinfo)
+{
+ return (glusterd_volinfo_get_boolean (volinfo,
+ VKEY_FEATURES_INODE_QUOTA));
+}
+
+int
+glusterd_is_bitrot_enabled (glusterd_volinfo_t *volinfo)
+{
+ return glusterd_volinfo_get_boolean (volinfo, VKEY_FEATURES_BITROT);
+}
+
+int
+glusterd_validate_and_set_gfid (dict_t *op_ctx, dict_t *req_dict,
+ char **op_errstr)
+{
+ int ret = -1;
+ int count = 0;
+ int i = 0;
+ int op_code = GF_QUOTA_OPTION_TYPE_NONE;
+ uuid_t uuid1 = {0};
+ uuid_t uuid2 = {0,};
+ char *path = NULL;
+ char key[256] = {0,};
+ char *uuid1_str = NULL;
+ char *uuid1_str_dup = NULL;
+ char *uuid2_str = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = dict_get_int32 (op_ctx, "type", &op_code);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get quota opcode");
+ goto out;
+ }
+
+ if ((op_code != GF_QUOTA_OPTION_TYPE_LIMIT_USAGE) &&
+ (op_code != GF_QUOTA_OPTION_TYPE_LIMIT_OBJECTS) &&
+ (op_code != GF_QUOTA_OPTION_TYPE_REMOVE) &&
+ (op_code != GF_QUOTA_OPTION_TYPE_REMOVE_OBJECTS)) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_get_str (op_ctx, "path", &path);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get path");
+ goto out;
+ }
+
+ ret = dict_get_int32 (op_ctx, "count", &count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get count");
+ goto out;
+ }
+
+ /* If count is 0, fail the command with ENOENT.
+ *
+ * If count is 1, treat gfid0 as the gfid on which the operation
+ * is to be performed and resume the command.
+ *
+ * if count > 1, get the 0th gfid from the op_ctx and,
+ * compare it with the remaining 'count -1' gfids.
+ * If they are found to be the same, set gfid0 in the op_ctx and
+ * resume the operation, else error out.
+ */
+
+ if (count == 0) {
+ gf_asprintf (op_errstr, "Failed to get trusted.gfid attribute "
+ "on path %s. Reason : %s", path,
+ strerror (ENOENT));
+ ret = -ENOENT;
+ goto out;
+ }
+
+ snprintf (key, sizeof (key) - 1, "gfid%d", 0);
+
+ ret = dict_get_str (op_ctx, key, &uuid1_str);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get key '%s'",
+ key);
+ goto out;
+ }
+
+ gf_uuid_parse (uuid1_str, uuid1);
+
+ for (i = 1; i < count; i++) {
+ snprintf (key, sizeof (key)-1, "gfid%d", i);
+
+ ret = dict_get_str (op_ctx, key, &uuid2_str);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Failed to get key "
+ "'%s'", key);
+ goto out;
+ }
+
+ gf_uuid_parse (uuid2_str, uuid2);
+
+ if (gf_uuid_compare (uuid1, uuid2)) {
+ gf_asprintf (op_errstr, "gfid mismatch between %s and "
+ "%s for path %s", uuid1_str, uuid2_str,
+ path);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (i == count) {
+ uuid1_str_dup = gf_strdup (uuid1_str);
+ if (!uuid1_str_dup) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr (req_dict, "gfid", uuid1_str_dup);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set gfid");
+ GF_FREE (uuid1_str_dup);
+ goto out;
+ }
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_ITER_FAIL,
+ "Failed to iterate through %d"
+ " entries in the req dict", count);
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+void
+glusterd_clean_up_quota_store (glusterd_volinfo_t *volinfo)
+{
+ char voldir[PATH_MAX] = {0,};
+ char quota_confpath[PATH_MAX] = {0,};
+ char cksum_path[PATH_MAX] = {0,};
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ GLUSTERD_GET_VOLUME_DIR (voldir, volinfo, conf);
+
+ snprintf (quota_confpath, sizeof (quota_confpath), "%s/%s", voldir,
+ GLUSTERD_VOLUME_QUOTA_CONFIG);
+ snprintf (cksum_path, sizeof (cksum_path), "%s/%s", voldir,
+ GLUSTERD_VOL_QUOTA_CKSUM_FILE);
+
+ sys_unlink (quota_confpath);
+ sys_unlink (cksum_path);
+
+ gf_store_handle_destroy (volinfo->quota_conf_shandle);
+ volinfo->quota_conf_shandle = NULL;
+ volinfo->quota_conf_version = 0;
+
+}
+
+int
+glusterd_remove_auxiliary_mount (char *volname)
+{
+ int ret = -1;
+ char mountdir[PATH_MAX] = {0,};
+ char pidfile[PATH_MAX] = {0,};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GLUSTERFS_GET_AUX_MOUNT_PIDFILE (pidfile, volname);
+
+ if (!gf_is_service_running (pidfile, NULL)) {
+ gf_msg_debug (this->name, 0, "Aux mount of volume %s "
+ "absent, hence returning", volname);
+ return 0;
+ }
+
+ GLUSTERD_GET_QUOTA_AUX_MOUNT_PATH (mountdir, volname, "/");
+ ret = gf_umount_lazy (this->name, mountdir, 1);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_LAZY_UMOUNT_FAIL, "umount on %s failed, "
+ "reason : %s", mountdir, strerror (errno));
+
+ /* Hide EBADF as it means the mount is already gone */
+ if (errno == EBADF)
+ ret = 0;
+ }
+
+ return ret;
+}
+
+/* Stops the rebalance process of the given volume
+ */
+int
+gd_stop_rebalance_process (glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ char pidfile[PATH_MAX] = {0,};
+
+ GF_ASSERT (volinfo);
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ GLUSTERD_GET_DEFRAG_PID_FILE (pidfile, volinfo, conf);
+ ret = glusterd_service_stop ("rebalance", pidfile, SIGTERM, _gf_true);
+
+ return ret;
+}
+
+rpc_clnt_t *
+glusterd_rpc_clnt_unref (glusterd_conf_t *conf, rpc_clnt_t *rpc)
+{
+ rpc_clnt_t *ret = NULL;
+
+ GF_ASSERT (conf);
+ GF_ASSERT (rpc);
+ synclock_unlock (&conf->big_lock);
+ (void) rpc_clnt_reconnect_cleanup (&rpc->conn);
+ ret = rpc_clnt_unref (rpc);
+ synclock_lock (&conf->big_lock);
+
+ return ret;
+}
+
+int32_t
+glusterd_compare_volume_name(struct cds_list_head *list1,
+ struct cds_list_head *list2)
+{
+ glusterd_volinfo_t *volinfo1 = NULL;
+ glusterd_volinfo_t *volinfo2 = NULL;
+
+ volinfo1 = cds_list_entry (list1, glusterd_volinfo_t, vol_list);
+ volinfo2 = cds_list_entry (list2, glusterd_volinfo_t, vol_list);
+ return strcmp(volinfo1->volname, volinfo2->volname);
+}
+
+static int
+gd_default_synctask_cbk (int ret, call_frame_t *frame, void *opaque)
+{
+ glusterd_conf_t *priv = THIS->private;
+ synclock_unlock (&priv->big_lock);
+ return ret;
+}
+
+void
+glusterd_launch_synctask (synctask_fn_t fn, void *opaque)
+{
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ int ret = -1;
+
+ this = THIS;
+ priv = this->private;
+
+ /* synclock_lock must be called from within synctask, @fn must call it before
+ * it starts with its work*/
+ ret = synctask_new (this->ctx->env, fn, gd_default_synctask_cbk, NULL,
+ opaque);
+ if (ret)
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_SPAWN_SVCS_FAIL, "Failed to spawn bricks"
+ " and other volume related services");
+}
+
+/*
+ * glusterd_enable_default_options enable certain options by default on the
+ * given volume based on the cluster op-version. This is called only during
+ * volume create or during volume reset
+ *
+ * @volinfo - volume on which to enable the default options
+ * @option - option to be set to default. If NULL, all possible options will be
+ * set to default
+ *
+ * Returns 0 on sucess and -1 on failure. If @option is given, but doesn't match
+ * any of the options that could be set, it is a success.
+ */
+/*
+ * TODO: Make this able to parse the volume-set table to set options
+ * Currently, the check and set for any option which wants to make use of this
+ * 'framework' needs to be done here manually. This would mean more work for the
+ * developer. This little extra work can be avoided if we make it possible to
+ * parse the volume-set table to get the options which could be set and their
+ * default values
+ */
+int
+glusterd_enable_default_options (glusterd_volinfo_t *volinfo, char *option)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_VALIDATE_OR_GOTO (this->name, volinfo, out);
+
+ conf = this->private;
+ GF_ASSERT (conf);
+
+#ifdef GD_OP_VERSION_3_8_0
+ if (conf->op_version >= GD_OP_VERSION_3_8_0) {
+ /* nfs.disable needs to be enabled for new volumes with
+ * >= gluster version 3.7 (for now) 3.8 later
+ */
+ if (!option || !strcmp (NFS_DISABLE_MAP_KEY, option)) {
+ ret = dict_set_dynstr_with_alloc (volinfo->dict,
+ NFS_DISABLE_MAP_KEY, "on");
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set option '"
+ NFS_DISABLE_MAP_KEY "' on volume "
+ "%s", volinfo->volname);
+ goto out;
+ }
+ }
+ }
+#endif
+
+ if (conf->op_version >= GD_OP_VERSION_3_7_0) {
+ /* Set needed volume options in volinfo->dict
+ * For ex.,
+ *
+ * if (!option || !strcmp("someoption", option) {
+ * ret = dict_set_str(volinfo->dict, "someoption", "on");
+ * ...
+ * }
+ * */
+
+ /* readdir-ahead needs to be enabled for new volumes with
+ * >= gluster version 3.7
+ */
+ if (!option || !strcmp ("performance.readdir-ahead", option)) {
+ ret = dict_set_dynstr_with_alloc (volinfo->dict,
+ "performance.readdir-ahead", "on");
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set option "
+ "'performance.readdir-ahead' on volume "
+ "%s", volinfo->volname);
+ goto out;
+ }
+ }
+
+ /* Option 'features.quota-deem-statfs' should not be turned off
+ * with 'gluster volume reset <VOLNAME>', since quota features
+ * can be reset only with 'gluster volume quota <VOLNAME>
+ * disable'.
+ */
+
+ if (!option || !strcmp ("features.quota-deem-statfs", option)) {
+ if (glusterd_is_volume_quota_enabled(volinfo)) {
+ ret = dict_set_dynstr_with_alloc (volinfo->dict,
+ "features.quota-deem-statfs", "on");
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set option "
+ "'features.quota-deem-statfs' "
+ "on volume %s",
+ volinfo->volname);
+ goto out;
+ }
+ }
+ }
+
+ if (!option || !strcmp ("features.ctr-enabled", option)) {
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
+ ret = dict_set_dynstr_with_alloc (volinfo->dict,
+ "features.ctr-enabled", "on");
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set option "
+ "'features.ctr-enabled' "
+ "on volume %s",
+ volinfo->volname);
+ goto out;
+ }
+ }
+ }
+
+ }
+out:
+ return ret;
+}
+
+void
+glusterd_get_rebalance_volfile (glusterd_volinfo_t *volinfo,
+ char *path, int path_len)
+{
+ char workdir[PATH_MAX] = {0,};
+ glusterd_conf_t *priv = THIS->private;
+
+ GLUSTERD_GET_VOLUME_DIR (workdir, volinfo, priv);
+
+ snprintf (path, path_len, "%s/%s-rebalance.vol", workdir,
+ volinfo->volname);
+}
+
+/* This function will update the backend file-system
+ * type and the mount options in origin and snap brickinfo.
+ * This will be later used to perform file-system specific operation
+ * during LVM snapshot.
+ *
+ * @param brick_path brickpath for which fstype to be found
+ * @param brickinfo brickinfo of snap/origin volume
+ * @return 0 on success and -1 on failure
+ */
+int
+glusterd_update_mntopts (char *brick_path, glusterd_brickinfo_t *brickinfo)
+{
+ int32_t ret = -1;
+ char *mnt_pt = NULL;
+ char buff[PATH_MAX] = "";
+ char msg[PATH_MAX] = "";
+ char *cmd = NULL;
+ struct mntent *entry = NULL;
+ struct mntent save_entry = {0,};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (brick_path);
+ GF_ASSERT (brickinfo);
+
+ ret = glusterd_get_brick_root (brick_path, &mnt_pt);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICKPATH_ROOT_GET_FAIL,
+ "getting the root "
+ "of the brick (%s) failed ", brick_path);
+ goto out;
+ }
+
+ entry = glusterd_get_mnt_entry_info (mnt_pt, buff, sizeof (buff),
+ &save_entry);
+ if (!entry) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MNTENTRY_GET_FAIL,
+ "getting the mount entry for "
+ "the brick (%s) failed", brick_path);
+ ret = -1;
+ goto out;
+ }
+
+ strncpy (brickinfo->fstype, entry->mnt_type,
+ (sizeof (brickinfo->fstype) - 1));
+ strcpy (brickinfo->mnt_opts, entry->mnt_opts);
+
+ ret = 0;
+out:
+ GF_FREE (mnt_pt);
+ return ret;
+}
+
+int
+glusterd_get_value_for_vme_entry (struct volopt_map_entry *vme, char **def_val)
+{
+ int ret = -1;
+ char *key = NULL;
+ xlator_t *this = NULL;
+ char *descr = NULL;
+ char *local_def_val = NULL;
+ void *dl_handle = NULL;
+ volume_opt_list_t vol_opt_handle = {{0},};
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ CDS_INIT_LIST_HEAD (&vol_opt_handle.list);
+
+ if (_get_xlator_opt_key_from_vme (vme, &key)) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GET_KEY_FAILED,
+ "Failed to get %s key from "
+ "volume option entry", vme->key);
+ goto out;
+ }
+
+ ret = xlator_volopt_dynload (vme->voltype, &dl_handle, &vol_opt_handle);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_XLATOR_VOLOPT_DYNLOAD_ERROR,
+ "xlator_volopt_dynload error "
+ "(%d)", ret);
+ ret = -2;
+ goto cont;
+ }
+
+ ret = xlator_option_info_list (&vol_opt_handle,key,
+ &local_def_val, &descr);
+ if (ret) {
+ /*Swallow Error if option not found*/
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GET_KEY_FAILED,
+ "Failed to get option for %s "
+ "key", key);
+ ret = -2;
+ goto cont;
+ }
+ if (!local_def_val)
+ local_def_val = "(null)";
+
+ *def_val = gf_strdup (local_def_val);
+
+cont:
+ if (dl_handle) {
+ dlclose (dl_handle);
+ dl_handle = NULL;
+ vol_opt_handle.given_opt = NULL;
+ }
+ if (key) {
+ _free_xlator_opt_key (key);
+ key = NULL;
+ }
+
+ if (ret)
+ goto out;
+
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_get_default_val_for_volopt (dict_t *ctx, gf_boolean_t all_opts,
+ char *input_key, char *orig_key,
+ dict_t *vol_dict, char **op_errstr)
+{
+ struct volopt_map_entry *vme = NULL;
+ int ret = -1;
+ int count = 0;
+ char err_str[PATH_MAX] = "";
+ xlator_t *this = NULL;
+ char *def_val = NULL;
+ char dict_key[50] = {0,};
+ gf_boolean_t key_found = _gf_false;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, priv, out);
+
+ GF_VALIDATE_OR_GOTO (this->name, vol_dict, out);
+
+ /* Check whether key is passed for a single option */
+ if (!all_opts && !input_key) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_KEY_NULL,
+ "Key is NULL");
+ goto out;
+ }
+
+ for (vme = &glusterd_volopt_map[0]; vme->key; vme++) {
+ if (!all_opts && strcmp (vme->key, input_key))
+ continue;
+ key_found = _gf_true;
+ /* First look for the key in the priv->opts for global option
+ * and then into vol_dict, if its not present then look for
+ * translator default value */
+ ret = dict_get_str (priv->opts, vme->key, &def_val);
+ if (!def_val) {
+ ret = dict_get_str (vol_dict, vme->key, &def_val);
+ if (!def_val) {
+ if (vme->value) {
+ def_val = vme->value;
+ } else {
+ ret = glusterd_get_value_for_vme_entry
+ (vme, &def_val);
+ if (!all_opts && ret)
+ goto out;
+ else if (ret == -2)
+ continue;
+ }
+ }
+ }
+ count++;
+ sprintf (dict_key, "key%d", count);
+ ret = dict_set_str(ctx, dict_key, vme->key);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to "
+ "set %s in dictionary", vme->key);
+ goto out;
+ }
+ sprintf (dict_key, "value%d", count);
+ ret = dict_set_dynstr_with_alloc (ctx, dict_key, def_val);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to "
+ "set %s for key %s in dictionary", def_val,
+ vme->key);
+ goto out;
+ }
+ def_val = NULL;
+ if (!all_opts)
+ break;
+
+ }
+ if (!all_opts && !key_found)
+ goto out;
+
+ ret = dict_set_int32 (ctx, "count", count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set count "
+ "in dictionary");
+ }
+
+out:
+ if (ret && !all_opts && !key_found) {
+ snprintf (err_str, sizeof (err_str),
+ "option %s does not exist", orig_key);
+ *op_errstr = gf_strdup (err_str);
+ }
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_get_volopt_content (dict_t * ctx, gf_boolean_t xml_out)
+{
+ void *dl_handle = NULL;
+ volume_opt_list_t vol_opt_handle = {{0},};
+ char *key = NULL;
+ struct volopt_map_entry *vme = NULL;
+ int ret = -1;
+ char *def_val = NULL;
+ char *descr = NULL;
+ char output_string[51200] = {0, };
+ char *output = NULL;
+ char tmp_str[2048] = {0, };
+#if (HAVE_LIB_XML)
+ xmlTextWriterPtr writer = NULL;
+ xmlBufferPtr buf = NULL;
+
+ if (xml_out) {
+ ret = init_sethelp_xml_doc (&writer, &buf);
+ if (ret) /*logging done in init_xml_lib*/
+ goto out;
+ }
+#endif
+
+ CDS_INIT_LIST_HEAD (&vol_opt_handle.list);
+
+ for (vme = &glusterd_volopt_map[0]; vme->key; vme++) {
+
+ if ((vme->type == NO_DOC) || (vme->type == GLOBAL_NO_DOC))
+ continue;
+
+ if (vme->description) {
+ descr = vme->description;
+ def_val = vme->value;
+ } else {
+ if (_get_xlator_opt_key_from_vme (vme, &key)) {
+ gf_msg_debug ("glusterd", 0, "Failed to "
+ "get %s key from volume option entry",
+ vme->key);
+ goto out; /*Some error while geting key*/
+ }
+
+ ret = xlator_volopt_dynload (vme->voltype,
+ &dl_handle,
+ &vol_opt_handle);
+
+ if (ret) {
+ gf_msg_debug ("glusterd", 0,
+ "xlator_volopt_dynload error(%d)", ret);
+ ret = 0;
+ goto cont;
+ }
+
+ ret = xlator_option_info_list (&vol_opt_handle, key,
+ &def_val, &descr);
+ if (ret) { /*Swallow Error i.e if option not found*/
+ gf_msg_debug ("glusterd", 0,
+ "Failed to get option for %s key", key);
+ ret = 0;
+ goto cont;
+ }
+ }
+
+ if (xml_out) {
+#if (HAVE_LIB_XML)
+ if (xml_add_volset_element (writer,vme->key,
+ def_val, descr)) {
+ ret = -1;
+ goto cont;
+ }
+#else
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_MODULE_NOT_INSTALLED,
+ "Libxml not present");
+#endif
+ } else {
+ snprintf (tmp_str, sizeof (tmp_str), "Option: %s\nDefault "
+ "Value: %s\nDescription: %s\n\n",
+ vme->key, def_val, descr);
+ strcat (output_string, tmp_str);
+ }
+cont:
+ if (dl_handle) {
+ dlclose (dl_handle);
+ dl_handle = NULL;
+ vol_opt_handle.given_opt = NULL;
+ }
+ if (key) {
+ _free_xlator_opt_key (key);
+ key = NULL;
+ }
+ if (ret)
+ goto out;
+ }
+
+#if (HAVE_LIB_XML)
+ if ((xml_out) &&
+ (ret = end_sethelp_xml_doc (writer)))
+ goto out;
+#else
+ if (xml_out)
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_MODULE_NOT_INSTALLED,
+ "Libxml not present");
+#endif
+
+ if (!xml_out)
+ output = gf_strdup (output_string);
+ else
+#if (HAVE_LIB_XML)
+ output = gf_strdup ((char *)buf->content);
+#else
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_MODULE_NOT_INSTALLED,
+ "Libxml not present");
+#endif
+
+ if (NULL == output) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr (ctx, "help-str", output);
+out:
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_check_client_op_version_support (char *volname, uint32_t op_version,
+ char **op_errstr)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ rpc_transport_t *xprt = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ pthread_mutex_lock (&priv->xprt_lock);
+ list_for_each_entry (xprt, &priv->xprt_list, list) {
+ if ((!strcmp(volname, xprt->peerinfo.volname)) &&
+ ((op_version > xprt->peerinfo.max_op_version) ||
+ (op_version < xprt->peerinfo.min_op_version))) {
+ ret = -1;
+ break;
+ }
+ }
+ pthread_mutex_unlock (&priv->xprt_lock);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_UNSUPPORTED_VERSION,
+ "Client %s is running with min_op_version as %d and "
+ "max_op_version as %d and don't support the required "
+ "op-version %d", xprt->peerinfo.identifier,
+ xprt->peerinfo.min_op_version,
+ xprt->peerinfo.max_op_version, op_version);
+ if (op_errstr)
+ ret = gf_asprintf (op_errstr, "One of the client %s is "
+ "running with op-version %d and "
+ "doesn't support the required "
+ "op-version %d. This client needs to"
+ " be upgraded or disconnected "
+ "before running this command again",
+ xprt->peerinfo.identifier,
+ xprt->peerinfo.max_op_version,
+ op_version);
+
+ return -1;
+ }
+ return 0;
+}
+
+gf_boolean_t
+glusterd_have_peers ()
+{
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ return !cds_list_empty (&conf->peers);
+}
+
+gf_boolean_t
+glusterd_is_volume_started (glusterd_volinfo_t *volinfo)
+{
+ GF_ASSERT (volinfo);
+ return (volinfo->status == GLUSTERD_STATUS_STARTED);
+}
+
+/* This function will insert the element to the list in a order.
+ Order will be based on the compare function provided as a input.
+ If element to be inserted in ascending order compare should return:
+ 0: if both the arguments are equal
+ >0: if first argument is greater than second argument
+ <0: if first argument is less than second argument */
+void
+glusterd_list_add_order (struct cds_list_head *new, struct cds_list_head *head,
+ int (*compare)(struct cds_list_head *,
+ struct cds_list_head *))
+{
+ struct cds_list_head *pos = NULL;
+
+ cds_list_for_each_rcu (pos, head) {
+ if (compare (new, pos) <= 0)
+ break;
+ }
+
+ cds_list_add_rcu (new, rcu_dereference (pos->prev));
+}
+
+
+int
+glusterd_disallow_op_for_tier (glusterd_volinfo_t *volinfo, glusterd_op_t op,
+ int cmd)
+{
+
+ xlator_t *this = NULL;
+ int ret = 0;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO (this->name, volinfo, out);
+
+ if (volinfo->type != GF_CLUSTER_TYPE_TIER)
+ goto out;
+
+ switch (op) {
+ case GD_OP_ADD_BRICK:
+ case GD_OP_REPLACE_BRICK:
+ ret = -1;
+ gf_msg_debug (this->name, 0, "Operation not "
+ "permitted on tiered volume %s",
+ volinfo->volname);
+ break;
+ case GD_OP_REBALANCE:
+ switch (cmd) {
+ case GF_DEFRAG_CMD_START_TIER:
+ case GF_DEFRAG_CMD_STATUS_TIER:
+ case GF_DEFRAG_CMD_START_DETACH_TIER:
+ case GF_DEFRAG_CMD_STOP_DETACH_TIER:
+ case GF_DEFRAG_CMD_STATUS:
+ case GF_DEFRAG_CMD_DETACH_STATUS:
+ ret = 0;
+ break;
+ default:
+ gf_msg_debug (this->name, 0,
+ "Rebalance Operation not permitted"
+ " on tiered volume %s",
+ volinfo->volname);
+ ret = -1;
+ break;
+ }
+ break;
+ case GD_OP_REMOVE_BRICK:
+ switch (cmd) {
+ case GF_OP_CMD_DETACH_COMMIT_FORCE:
+ case GF_OP_CMD_DETACH_COMMIT:
+ case GF_OP_CMD_DETACH_START:
+ case GF_DEFRAG_CMD_STOP_DETACH_TIER:
+ ret = 0;
+ break;
+ default:
+ gf_msg_debug (this->name, 0,
+ "Remove brick operation not "
+ "permitted on tiered volume %s",
+ volinfo->volname);
+ ret = -1;
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+out:
+ return ret;
+}
+
+int32_t
+glusterd_count_connected_peers (int32_t *count)
+{
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *conf = NULL;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("glusterd", this, out);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
+ GF_VALIDATE_OR_GOTO (this->name, count, out);
+
+ *count = 1;
+
+ rcu_read_lock ();
+ cds_list_for_each_entry_rcu (peerinfo, &conf->peers, uuid_list) {
+ /* Find peer who is connected and is a friend */
+ if ((peerinfo->connected) &&
+ (peerinfo->state.state == GD_FRIEND_STATE_BEFRIENDED)) {
+ (*count)++;
+ }
+ }
+ rcu_read_unlock ();
+
+ ret = 0;
+out:
+ return ret;
+}
+
+char*
+gd_get_shd_key (int type)
+{
+ char *key = NULL;
+
+ switch (type) {
+ case GF_CLUSTER_TYPE_REPLICATE:
+ case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
+ key = "cluster.self-heal-daemon";
+ break;
+ case GF_CLUSTER_TYPE_DISPERSE:
+ key = "cluster.disperse-self-heal-daemon";
+ break;
+ default:
+ key = NULL;
+ break;
+ }
+ return key;
+}
+
+int
+glusterd_handle_replicate_brick_ops (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ glusterd_op_t op)
+{
+ int32_t ret = -1;
+ char tmpmount[] = "/tmp/mntXXXXXX";
+ char logfile[PATH_MAX] = {0,};
+ int dirty[3] = {0,};
+ runner_t runner = {0};
+ glusterd_conf_t *priv = NULL;
+ char *pid = NULL;
+ char vpath[PATH_MAX] = {0,};
+ char *volfileserver = NULL;
+
+ priv = THIS->private;
+ GF_VALIDATE_OR_GOTO (THIS->name, priv, out);
+
+ dirty[2] = hton32(1);
+
+ ret = sys_lsetxattr (brickinfo->path, GF_AFR_DIRTY, dirty,
+ sizeof (dirty), 0);
+ if (ret == -1) {
+ gf_msg (THIS->name, GF_LOG_ERROR, errno,
+ GD_MSG_SETXATTR_FAIL, "Failed to set extended"
+ " attribute %s : %s.", GF_AFR_DIRTY, strerror (errno));
+ goto out;
+ }
+
+ if (mkdtemp (tmpmount) == NULL) {
+ gf_msg (THIS->name, GF_LOG_ERROR, errno,
+ GD_MSG_DIR_OP_FAILED,
+ "failed to create a temporary mount directory.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = gf_asprintf (&pid, "%d", GF_CLIENT_PID_SELF_HEALD);
+ if (ret < 0)
+ goto out;
+
+ switch (op) {
+ case GD_OP_REPLACE_BRICK:
+ if (dict_get_str (THIS->options,
+ "transport.socket.bind-address",
+ &volfileserver) != 0)
+ volfileserver = "localhost";
+
+ snprintf (logfile, sizeof (logfile),
+ DEFAULT_LOG_FILE_DIRECTORY"/%s-replace-brick-mount.log",
+ volinfo->volname);
+ if (!*logfile) {
+ ret = -1;
+ goto out;
+ }
+ runinit (&runner);
+ runner_add_args (&runner, SBIN_DIR"/glusterfs",
+ "-s", volfileserver,
+ "--volfile-id", volinfo->volname,
+ "--client-pid", pid,
+ "-l", logfile, tmpmount, NULL);
+ break;
+
+ case GD_OP_ADD_BRICK:
+ snprintf (logfile, sizeof (logfile),
+ DEFAULT_LOG_FILE_DIRECTORY"/%s-add-brick-mount.log",
+ volinfo->volname);
+ if (!*logfile) {
+ ret = -1;
+ goto out;
+ }
+ ret = glusterd_get_dummy_client_filepath (vpath, volinfo,
+ volinfo->transport_type);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Failed to get "
+ "volfile path");
+ goto out;
+ }
+ runinit (&runner);
+ runner_add_args (&runner, SBIN_DIR"/glusterfs",
+ "--volfile", vpath,
+ "--client-pid", pid,
+ "-l", logfile, tmpmount, NULL);
+ break;
+ default:
+ break;
+ }
+ synclock_unlock (&priv->big_lock);
+ ret = runner_run (&runner);
+
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "mount command"
+ " failed.");
+ goto lock;
+ }
+ ret = sys_lsetxattr (tmpmount, (op == GD_OP_REPLACE_BRICK) ?
+ GF_AFR_REPLACE_BRICK : GF_AFR_ADD_BRICK,
+ brickinfo->brick_id, sizeof (brickinfo->brick_id),
+ 0);
+ if (ret == -1)
+ gf_msg (THIS->name, GF_LOG_ERROR, errno,
+ GD_MSG_SETXATTR_FAIL, "Failed to set extended"
+ " attribute %s : %s", (op == GD_OP_REPLACE_BRICK) ?
+ GF_AFR_REPLACE_BRICK : GF_AFR_ADD_BRICK,
+ strerror (errno));
+ gf_umount_lazy (THIS->name, tmpmount, 1);
+lock:
+ synclock_lock (&priv->big_lock);
+out:
+ if (pid)
+ GF_FREE (pid);
+ gf_msg_debug ("glusterd", 0, "Returning with ret");
+ return ret;
+}
+
+void
+assign_brick_groups (glusterd_volinfo_t *volinfo)
+{
+ glusterd_brickinfo_t *brickinfo = NULL;
+ uint16_t group_num = 0;
+ int in_group = 0;
+
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ brickinfo->group = group_num;
+ if (++in_group >= volinfo->replica_count) {
+ in_group = 0;
+ ++group_num;
+ }
+ }
+}
+
+glusterd_brickinfo_t*
+get_last_brick_of_brick_group (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo)
+{
+ glusterd_brickinfo_t *next = NULL;
+ glusterd_brickinfo_t *last = NULL;
+ int ret = -1;
+
+ last = brickinfo;
+ for (;;) {
+ next = list_next (last, &volinfo->bricks,
+ glusterd_brickinfo_t, brick_list);
+ if (!next || (next->group != brickinfo->group)) {
+ break;
+ }
+ last = next;
+ }
+
+ return last;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
new file mode 100644
index 00000000000..f4c4138829f
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
@@ -0,0 +1,718 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _GLUSTERD_UTILS_H
+#define _GLUSTERD_UTILS_H
+
+#include <pthread.h>
+#include "compat-uuid.h"
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "logging.h"
+#include "call-stub.h"
+#include "fd.h"
+#include "byte-order.h"
+#include "glusterd.h"
+#include "rpc-clnt.h"
+#include "protocol-common.h"
+
+#include "glusterfs3-xdr.h"
+#include "glusterd-peer-utils.h"
+
+#define GLUSTERD_SOCK_DIR "/var/run/gluster"
+#define GLUSTERD_ASSIGN_BRICKID_TO_BRICKINFO(brickinfo, volinfo, brickid) do {\
+ sprintf (brickinfo->brick_id, "%s-client-%d",\
+ volinfo->volname, brickid);\
+} while (0)
+
+struct glusterd_lock_ {
+ uuid_t owner;
+ time_t timestamp;
+};
+
+typedef struct glusterd_dict_ctx_ {
+ dict_t *dict;
+ int opt_count;
+ char *key_name;
+ char *val_name;
+ char *prefix;
+} glusterd_dict_ctx_t;
+
+int
+glusterd_compare_lines (const void *a, const void *b);
+
+typedef int (*glusterd_condition_func) (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ void *ctx);
+typedef struct glusterd_lock_ glusterd_lock_t;
+
+int32_t
+glusterd_get_lock_owner (uuid_t *cur_owner);
+
+int32_t
+glusterd_lock (uuid_t new_owner);
+
+int32_t
+glusterd_unlock (uuid_t owner);
+
+int32_t
+glusterd_get_uuid (uuid_t *uuid);
+
+char*
+gd_get_shd_key (int type);
+
+int
+glusterd_submit_reply (rpcsvc_request_t *req, void *arg,
+ struct iovec *payload, int payloadcount,
+ struct iobref *iobref, xdrproc_t xdrproc);
+
+int
+glusterd_to_cli (rpcsvc_request_t *req, gf_cli_rsp *arg, struct iovec *payload,
+ int payloadcount, struct iobref *iobref, xdrproc_t xdrproc,
+ dict_t *dict);
+
+int
+glusterd_submit_request (struct rpc_clnt *rpc, void *req,
+ call_frame_t *frame, rpc_clnt_prog_t *prog,
+ int procnum, struct iobref *iobref,
+ xlator_t *this, fop_cbk_fn_t cbkfn, xdrproc_t xdrproc);
+int32_t
+glusterd_volinfo_new (glusterd_volinfo_t **volinfo);
+
+int32_t
+glusterd_volinfo_dup (glusterd_volinfo_t *volinfo,
+ glusterd_volinfo_t **dup_volinfo,
+ gf_boolean_t set_userauth);
+
+char *
+glusterd_auth_get_username (glusterd_volinfo_t *volinfo);
+
+char *
+glusterd_auth_get_password (glusterd_volinfo_t *volinfo);
+
+int32_t
+glusterd_auth_set_username (glusterd_volinfo_t *volinfo, char *username);
+
+int32_t
+glusterd_auth_set_password (glusterd_volinfo_t *volinfo, char *password);
+
+void
+glusterd_auth_cleanup (glusterd_volinfo_t *volinfo);
+
+gf_boolean_t
+glusterd_check_volume_exists (char *volname);
+
+int32_t
+glusterd_brickinfo_new (glusterd_brickinfo_t **brickinfo);
+
+int32_t
+glusterd_brickinfo_new_from_brick (char *brick,
+ glusterd_brickinfo_t **brickinfo,
+ gf_boolean_t construct_real_path,
+ char **op_errstr);
+
+int32_t
+glusterd_volinfo_find (char *volname, glusterd_volinfo_t **volinfo);
+
+int
+glusterd_volinfo_find_by_volume_id (uuid_t volume_id, glusterd_volinfo_t **volinfo);
+
+int32_t
+glusterd_service_stop(const char *service, char *pidfile, int sig,
+ gf_boolean_t force_kill);
+
+int32_t
+glusterd_service_stop_nolock (const char *service, char *pidfile, int sig,
+ gf_boolean_t force_kill);
+
+int
+glusterd_get_next_available_brickid (glusterd_volinfo_t *volinfo);
+
+int32_t
+glusterd_resolve_brick (glusterd_brickinfo_t *brickinfo);
+
+int32_t
+glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ gf_boolean_t wait);
+
+int32_t
+glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ gf_boolean_t del_brick);
+
+glusterd_volinfo_t *
+glusterd_volinfo_ref (glusterd_volinfo_t *volinfo);
+
+glusterd_volinfo_t *
+glusterd_volinfo_unref (glusterd_volinfo_t *volinfo);
+
+int32_t
+glusterd_volinfo_delete (glusterd_volinfo_t *volinfo);
+
+int32_t
+glusterd_brickinfo_delete (glusterd_brickinfo_t *brickinfo);
+
+gf_boolean_t
+glusterd_is_cli_op_req (int32_t op);
+
+int32_t
+glusterd_volume_brickinfo_get_by_brick (char *brick,
+ glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t **brickinfo,
+ gf_boolean_t construct_real_path);
+
+int32_t
+glusterd_add_volumes_to_export_dict (dict_t **peer_data);
+
+int32_t
+glusterd_compare_friend_data (dict_t *peer_data, int32_t *status,
+ char *hostname);
+
+int
+glusterd_compute_cksum (glusterd_volinfo_t *volinfo,
+ gf_boolean_t is_quota_conf);
+
+void
+glusterd_set_socket_filepath (char *sock_filepath, char *sockpath, size_t len);
+
+struct rpc_clnt*
+glusterd_pending_node_get_rpc (glusterd_pending_node_t *pending_node);
+
+void
+glusterd_pending_node_put_rpc (glusterd_pending_node_t *pending_node);
+
+int
+glusterd_remote_hostname_get (rpcsvc_request_t *req,
+ char *remote_host, int len);
+int32_t
+glusterd_import_friend_volumes (dict_t *peer_data);
+void
+glusterd_set_volume_status (glusterd_volinfo_t *volinfo,
+ glusterd_volume_status status);
+
+int32_t
+glusterd_volume_count_get (void);
+int32_t
+glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo,
+ dict_t *dict, int32_t count,
+ char *prefix);
+int
+glusterd_get_brickinfo (xlator_t *this, const char *brickname,
+ int port, glusterd_brickinfo_t **brickinfo);
+
+void
+glusterd_set_brick_status (glusterd_brickinfo_t *brickinfo,
+ gf_brick_status_t status);
+
+gf_boolean_t
+glusterd_is_brick_started (glusterd_brickinfo_t *brickinfo);
+
+int
+glusterd_friend_brick_belongs (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo, void *uuid);
+int
+glusterd_all_volume_cond_check (glusterd_condition_func func, int status,
+ void *ctx);
+int
+glusterd_brick_start (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ gf_boolean_t wait);
+int
+glusterd_brick_stop (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ gf_boolean_t del_brick);
+
+gf_boolean_t
+glusterd_is_tier_daemon_running (glusterd_volinfo_t *volinfo);
+
+
+int
+glusterd_is_defrag_on (glusterd_volinfo_t *volinfo);
+
+int32_t
+glusterd_volinfo_bricks_delete (glusterd_volinfo_t *volinfo);
+
+int
+glusterd_new_brick_validate (char *brick, glusterd_brickinfo_t *brickinfo,
+ char *op_errstr, size_t len);
+int32_t
+glusterd_volume_brickinfos_delete (glusterd_volinfo_t *volinfo);
+
+int32_t
+glusterd_volume_brickinfo_get (uuid_t uuid, char *hostname, char *path,
+ glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t **brickinfo);
+
+int
+glusterd_brickinfo_get (uuid_t uuid, char *hostname, char *path,
+ glusterd_brickinfo_t **brickinfo);
+
+
+int
+glusterd_rb_check_bricks (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *src_brick,
+ glusterd_brickinfo_t *dst_brick);
+
+int
+glusterd_check_and_set_brick_xattr (char *host, char *path, uuid_t uuid,
+ char **op_errstr, gf_boolean_t is_force);
+
+int
+glusterd_validate_and_create_brickpath (glusterd_brickinfo_t *brickinfo,
+ uuid_t volume_id, char **op_errstr,
+ gf_boolean_t is_force);
+int
+glusterd_sm_tr_log_transition_add (glusterd_sm_tr_log_t *log,
+ int old_state, int new_state,
+ int event);
+int
+glusterd_sm_tr_log_init (glusterd_sm_tr_log_t *log,
+ char * (*state_name_get) (int),
+ char * (*event_name_get) (int),
+ size_t size);
+void
+glusterd_sm_tr_log_delete (glusterd_sm_tr_log_t *log);
+
+int
+glusterd_sm_tr_log_add_to_dict (dict_t *dict,
+ glusterd_sm_tr_log_t *circular_log);
+int
+glusterd_remove_pending_entry (struct cds_list_head *list, void *elem);
+int
+glusterd_clear_pending_nodes (struct cds_list_head *list);
+int32_t
+glusterd_brick_connect (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo, char *socketpath);
+int32_t
+glusterd_brick_disconnect (glusterd_brickinfo_t *brickinfo);
+int32_t
+glusterd_delete_volume (glusterd_volinfo_t *volinfo);
+int32_t
+glusterd_delete_brick (glusterd_volinfo_t* volinfo,
+ glusterd_brickinfo_t *brickinfo);
+
+int32_t
+glusterd_delete_all_bricks (glusterd_volinfo_t* volinfo);
+
+int
+glusterd_spawn_daemons (void *opaque);
+
+int
+glusterd_restart_gsyncds (glusterd_conf_t *conf);
+
+int
+glusterd_start_gsync (glusterd_volinfo_t *master_vol, char *slave,
+ char *path_list, char *conf_path,
+ char *glusterd_uuid_str,
+ char **op_errstr, gf_boolean_t is_pause);
+int
+glusterd_get_local_brickpaths (glusterd_volinfo_t *volinfo,
+ char **pathlist);
+
+int32_t
+glusterd_recreate_bricks (glusterd_conf_t *conf);
+int32_t
+glusterd_handle_upgrade_downgrade (dict_t *options, glusterd_conf_t *conf,
+ gf_boolean_t upgrade,
+ gf_boolean_t downgrade);
+
+int
+glusterd_add_brick_detail_to_dict (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ dict_t *dict, int32_t count);
+
+int32_t
+glusterd_add_brick_to_dict (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ dict_t *dict, int32_t count);
+
+int32_t
+glusterd_get_all_volnames (dict_t *dict);
+
+gf_boolean_t
+glusterd_is_fuse_available ();
+
+int
+glusterd_brick_statedump (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ char *options, int option_cnt, char **op_errstr);
+int
+glusterd_nfs_statedump (char *options, int option_cnt, char **op_errstr);
+
+int
+glusterd_quotad_statedump (char *options, int option_cnt, char **op_errstr);
+
+gf_boolean_t
+glusterd_is_volume_replicate (glusterd_volinfo_t *volinfo);
+
+gf_boolean_t
+glusterd_is_brick_decommissioned (glusterd_volinfo_t *volinfo, char *hostname,
+ char *path);
+int
+glusterd_friend_contains_vol_bricks (glusterd_volinfo_t *volinfo,
+ uuid_t friend_uuid);
+int
+glusterd_friend_remove_cleanup_vols (uuid_t uuid);
+
+int
+glusterd_get_client_filepath (char *filepath,
+ glusterd_volinfo_t *volinfo,
+ gf_transport_type type);
+int
+glusterd_get_trusted_client_filepath (char *filepath,
+ glusterd_volinfo_t *volinfo,
+ gf_transport_type type);
+int
+glusterd_restart_rebalance (glusterd_conf_t *conf);
+
+int32_t
+glusterd_create_sub_tier_volinfo (glusterd_volinfo_t *volinfo,
+ glusterd_volinfo_t **dup_volinfo,
+ gf_boolean_t is_hot_tier,
+ const char *new_name);
+int
+glusterd_restart_rebalance_for_volume (glusterd_volinfo_t *volinfo);
+
+void
+glusterd_defrag_info_set (glusterd_volinfo_t *volinfo, dict_t *dict, int cmd,
+ int status, int op);
+
+int32_t
+glusterd_add_bricks_hname_path_to_dict (dict_t *dict,
+ glusterd_volinfo_t *volinfo);
+
+int
+glusterd_add_node_to_dict (char *server, dict_t *dict, int count,
+ dict_t *vol_opts);
+
+int
+glusterd_calc_dist_leaf_count (int rcount, int scount);
+
+int
+glusterd_get_dist_leaf_count (glusterd_volinfo_t *volinfo);
+
+glusterd_brickinfo_t*
+glusterd_get_brickinfo_by_position (glusterd_volinfo_t *volinfo, uint32_t pos);
+
+gf_boolean_t
+glusterd_is_local_brick (xlator_t *this, glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo);
+int
+glusterd_validate_volume_id (dict_t *op_dict, glusterd_volinfo_t *volinfo);
+
+int
+glusterd_defrag_volume_status_update (glusterd_volinfo_t *volinfo,
+ dict_t *rsp_dict);
+
+int
+glusterd_check_files_identical (char *filename1, char *filename2,
+ gf_boolean_t *identical);
+
+int
+glusterd_check_topology_identical (const char *filename1,
+ const char *filename2,
+ gf_boolean_t *identical);
+
+void
+glusterd_volinfo_reset_defrag_stats (glusterd_volinfo_t *volinfo);
+int
+glusterd_volset_help (dict_t *dict, char **op_errstr);
+
+int32_t
+glusterd_sync_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict);
+int32_t
+glusterd_gsync_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict, char *op_errstr);
+int32_t
+glusterd_rb_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict);
+int
+glusterd_profile_volume_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict);
+int
+glusterd_volume_status_copy_to_op_ctx_dict (dict_t *aggr, dict_t *rsp_dict);
+int
+glusterd_volume_rebalance_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict);
+int
+glusterd_volume_heal_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict);
+int
+glusterd_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict);
+int
+glusterd_sys_exec_output_rsp_dict (dict_t *aggr, dict_t *rsp_dict);
+int32_t
+glusterd_handle_node_rsp (dict_t *req_ctx, void *pending_entry,
+ glusterd_op_t op, dict_t *rsp_dict, dict_t *op_ctx,
+ char **op_errstr, gd_node_type type);
+int
+glusterd_volume_bitrot_scrub_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict);
+
+int
+glusterd_volume_heal_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict);
+
+int32_t
+glusterd_check_if_quota_trans_enabled (glusterd_volinfo_t *volinfo);
+
+int
+glusterd_volume_quota_copy_to_op_ctx_dict (dict_t *aggr, dict_t *rsp);
+int
+_profile_volume_add_brick_rsp (dict_t *this, char *key, data_t *value,
+ void *data);
+int
+glusterd_profile_volume_brick_rsp (void *pending_entry,
+ dict_t *rsp_dict, dict_t *op_ctx,
+ char **op_errstr, gd_node_type type);
+
+int32_t
+glusterd_set_originator_uuid (dict_t *dict);
+
+/* Should be used only when an operation is in progress, as that is the only
+ * time a lock_owner is set
+ */
+gf_boolean_t
+is_origin_glusterd (dict_t *dict);
+
+int
+glusterd_get_next_global_opt_version_str (dict_t *opts, char **version_str);
+
+int
+glusterd_generate_and_set_task_id (dict_t *dict, char *key);
+
+int
+glusterd_validate_and_set_gfid (dict_t *op_ctx, dict_t *req_dict,
+ char **op_errstr);
+
+int
+glusterd_copy_uuid_to_dict (uuid_t uuid, dict_t *dict, char *key);
+
+gf_boolean_t
+glusterd_is_same_address (char *name1, char *name2);
+
+void
+gd_update_volume_op_versions (glusterd_volinfo_t *volinfo);
+
+int
+op_version_check (xlator_t *this, int min_op_version, char *msg, int msglen);
+
+gf_boolean_t
+gd_is_remove_brick_committed (glusterd_volinfo_t *volinfo);
+
+int
+glusterd_get_slave_details_confpath (glusterd_volinfo_t *volinfo,
+ dict_t *dict, char **slave_url,
+ char **slave_host, char **slave_vol,
+ char **conf_path, char **op_errstr);
+
+int
+glusterd_get_slave_info (char *slave,
+ char **slave_url, char **hostname,
+ char **slave_vol, char **op_errstr);
+
+int
+glusterd_get_statefile_name (glusterd_volinfo_t *volinfo, char *slave,
+ char *conf_path, char **statefile,
+ gf_boolean_t *is_template_in_use);
+
+int
+glusterd_gsync_read_frm_status (char *path, char *buf, size_t blen);
+
+int
+glusterd_create_status_file (char *master, char *slave, char *slave_url,
+ char *slave_vol, char *status);
+
+int
+glusterd_check_restart_gsync_session (glusterd_volinfo_t *volinfo, char *slave,
+ dict_t *resp_dict, char *path_list,
+ char *conf_path, gf_boolean_t is_force);
+
+int
+glusterd_check_gsync_running_local (char *master, char *slave,
+ char *conf_path,
+ gf_boolean_t *is_run);
+
+gf_boolean_t
+glusterd_is_status_tasks_op (glusterd_op_t op, dict_t *dict);
+
+gf_boolean_t
+gd_should_i_start_rebalance (glusterd_volinfo_t *volinfo);
+
+int
+glusterd_is_volume_quota_enabled (glusterd_volinfo_t *volinfo);
+
+int
+glusterd_is_volume_inode_quota_enabled (glusterd_volinfo_t *volinfo);
+
+int
+glusterd_is_bitrot_enabled (glusterd_volinfo_t *volinfo);
+
+gf_boolean_t
+glusterd_all_volumes_with_quota_stopped ();
+
+void
+glusterd_clean_up_quota_store (glusterd_volinfo_t *volinfo);
+
+int
+glusterd_remove_auxiliary_mount (char *volname);
+
+gf_boolean_t
+glusterd_status_has_tasks (int cmd);
+
+int
+gd_stop_rebalance_process (glusterd_volinfo_t *volinfo);
+
+rpc_clnt_t *
+glusterd_rpc_clnt_unref (glusterd_conf_t *conf, rpc_clnt_t *rpc);
+
+int32_t
+glusterd_compare_volume_name(struct cds_list_head *, struct cds_list_head *);
+
+char*
+glusterd_get_brick_mount_device (char *brick_path);
+
+struct mntent *
+glusterd_get_mnt_entry_info (char *mnt_pt, char *buff, int buflen,
+ struct mntent *entry_ptr);
+
+int
+glusterd_get_brick_root (char *path, char **mount_point);
+
+int32_t
+glusterd_lvm_snapshot_remove (dict_t *rsp_dict, glusterd_volinfo_t *snap_vol);
+
+gf_boolean_t
+gd_vol_is_geo_rep_active (glusterd_volinfo_t *volinfo);
+
+int32_t
+glusterd_get_brick_mount_dir (char *brickpath, char *hostname, char *mount_dir);
+
+int32_t
+glusterd_aggr_brick_mount_dirs (dict_t *aggr, dict_t *rsp_dict);
+
+int32_t
+glusterd_take_lvm_snapshot (glusterd_brickinfo_t *brickinfo,
+ char *origin_brick_path);
+
+void
+glusterd_launch_synctask (synctask_fn_t fn, void *opaque);
+
+int
+glusterd_enable_default_options (glusterd_volinfo_t *volinfo, char *option);
+
+int
+glusterd_unlink_file (char *sock_file_path);
+
+int32_t
+glusterd_find_brick_mount_path (char *brick_path, char **brick_mount_path);
+
+/*
+ * Function to retrieve list of snap volnames and their uuids
+ */
+int glusterd_snapshot_get_volnames_uuids (dict_t *dict,
+ char *volname, gf_getsnap_name_uuid_rsp *snap_info_rsp);
+
+int
+glusterd_update_mntopts (char *brick_path, glusterd_brickinfo_t *brickinfo);
+
+int
+glusterd_update_fs_label (glusterd_brickinfo_t *brickinfo);
+
+int
+glusterd_get_volopt_content (dict_t *dict, gf_boolean_t xml_out);
+
+int
+glusterd_get_default_val_for_volopt (dict_t *dict, gf_boolean_t all_opts,
+ char *key, char *orig_key,
+ dict_t *vol_dict, char **err_str);
+
+int
+glusterd_check_client_op_version_support (char *volname, uint32_t op_version,
+ char **op_errstr);
+
+gf_boolean_t
+glusterd_have_peers ();
+
+gf_boolean_t
+glusterd_have_volumes ();
+
+void
+glusterd_get_rebalance_volfile (glusterd_volinfo_t *volinfo,
+ char *path, int path_len);
+
+int32_t
+glusterd_brickinfo_dup (glusterd_brickinfo_t *brickinfo,
+ glusterd_brickinfo_t *dup_brickinfo);
+
+int
+glusterd_vol_add_quota_conf_to_dict (glusterd_volinfo_t *volinfo, dict_t *load,
+ int vol_idx, char *prefix);
+
+int32_t
+glusterd_import_volinfo (dict_t *peer_data, int count,
+ glusterd_volinfo_t **volinfo,
+ char *prefix);
+
+int
+glusterd_import_quota_conf (dict_t *peer_data, int vol_idx,
+ glusterd_volinfo_t *new_volinfo,
+ char *prefix);
+
+gf_boolean_t
+glusterd_is_shd_compatible_volume (glusterd_volinfo_t *volinfo);
+
+gf_boolean_t
+glusterd_is_shd_compatible_type (int type);
+
+gf_boolean_t
+glusterd_are_all_volumes_stopped ();
+
+gf_boolean_t
+glusterd_all_shd_compatible_volumes_stopped ();
+
+void
+glusterd_nfs_pmap_deregister ();
+
+gf_boolean_t
+glusterd_is_volume_started (glusterd_volinfo_t *volinfo);
+
+void
+glusterd_list_add_order (struct cds_list_head *new, struct cds_list_head *head,
+ int (*compare)(struct cds_list_head *,
+ struct cds_list_head *));
+int
+glusterd_disallow_op_for_tier (glusterd_volinfo_t *volinfo, glusterd_op_t op,
+ int cmd);
+
+struct rpc_clnt*
+glusterd_defrag_rpc_get (glusterd_defrag_info_t *defrag);
+
+struct rpc_clnt*
+glusterd_defrag_rpc_put (glusterd_defrag_info_t *defrag);
+
+int32_t
+glusterd_count_connected_peers (int32_t *count);
+
+int
+glusterd_volume_brick_for_each (glusterd_volinfo_t *volinfo, void *data,
+ int (*fn) (glusterd_volinfo_t *, glusterd_brickinfo_t *,
+ dict_t *mod_dict, void *));
+
+int
+glusterd_get_dummy_client_filepath (char *filepath,
+ glusterd_volinfo_t *volinfo,
+ gf_transport_type type);
+
+int
+glusterd_handle_replicate_brick_ops (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ glusterd_op_t op);
+void
+assign_brick_groups (glusterd_volinfo_t *volinfo);
+
+glusterd_brickinfo_t*
+get_last_brick_of_brick_group (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo);
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
new file mode 100644
index 00000000000..6a755486d7d
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
@@ -0,0 +1,6587 @@
+/*
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <fnmatch.h>
+#include <sys/wait.h>
+#include <dlfcn.h>
+#include <utime.h>
+
+#include "xlator.h"
+#include "glusterd.h"
+#include "defaults.h"
+#include "syscall.h"
+#include "logging.h"
+#include "dict.h"
+#include "graph-utils.h"
+#include "common-utils.h"
+#include "glusterd-store.h"
+#include "glusterd-hooks.h"
+#include "trie.h"
+#include "glusterd-mem-types.h"
+#include "cli1-xdr.h"
+#include "glusterd-volgen.h"
+#include "glusterd-geo-rep.h"
+#include "glusterd-utils.h"
+#include "glusterd-messages.h"
+#include "run.h"
+#include "options.h"
+#include "glusterd-snapshot-utils.h"
+#include "glusterd-svc-mgmt.h"
+#include "glusterd-svc-helper.h"
+#include "glusterd-snapd-svc-helper.h"
+
+struct gd_validate_reconf_opts {
+ dict_t *options;
+ char **op_errstr;
+};
+
+extern struct volopt_map_entry glusterd_volopt_map[];
+
+#define RPC_SET_OPT(XL, CLI_OPT, XLATOR_OPT, ERROR_CMD) do { \
+ char *_value = NULL; \
+ \
+ if (dict_get_str (set_dict, CLI_OPT, &_value) == 0) { \
+ if (xlator_set_option (XL, \
+ "transport.socket." XLATOR_OPT, _value) != 0) { \
+ gf_msg ("glusterd", GF_LOG_WARNING, errno, \
+ GD_MSG_XLATOR_SET_OPT_FAIL, \
+ "failed to set " XLATOR_OPT); \
+ ERROR_CMD; \
+ } \
+ } \
+} while (0 /* CONSTCOND */)
+
+/*********************************************
+ *
+ * xlator generation / graph manipulation API
+ *
+ *********************************************/
+
+static void
+set_graph_errstr (volgen_graph_t *graph, const char *str)
+{
+ if (!graph->errstr)
+ return;
+
+ *graph->errstr = gf_strdup (str);
+}
+
+static xlator_t *
+xlator_instantiate_va (const char *type, const char *format, va_list arg)
+{
+ xlator_t *xl = NULL;
+ char *volname = NULL;
+ int ret = 0;
+
+ ret = gf_vasprintf (&volname, format, arg);
+ if (ret < 0) {
+ volname = NULL;
+
+ goto error;
+ }
+
+ xl = GF_CALLOC (1, sizeof (*xl), gf_common_mt_xlator_t);
+ if (!xl)
+ goto error;
+ ret = xlator_set_type_virtual (xl, type);
+ if (ret)
+ goto error;
+ xl->options = get_new_dict();
+ if (!xl->options)
+ goto error;
+ xl->name = volname;
+ CDS_INIT_LIST_HEAD (&xl->volume_options);
+
+ xl->ctx = THIS->ctx;
+
+ return xl;
+
+ error:
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_XLATOR_CREATE_FAIL,
+ "creating xlator of type %s failed",
+ type);
+ GF_FREE (volname);
+ if (xl)
+ xlator_destroy (xl);
+
+ return NULL;
+}
+
+static xlator_t *
+xlator_instantiate (const char *type, const char *format, ...)
+{
+ va_list arg;
+ xlator_t *xl;
+
+ va_start (arg, format);
+ xl = xlator_instantiate_va (type, format, arg);
+ va_end (arg);
+
+ return xl;
+}
+
+static int
+volgen_xlator_link (xlator_t *pxl, xlator_t *cxl)
+{
+ int ret = 0;
+
+ ret = glusterfs_xlator_link (pxl, cxl);
+ if (ret == -1) {
+ gf_msg ("glusterd", GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY,
+ "Out of memory, cannot link xlators %s <- %s",
+ pxl->name, cxl->name);
+ }
+
+ return ret;
+}
+
+static int
+volgen_graph_link (volgen_graph_t *graph, xlator_t *xl)
+{
+ int ret = 0;
+
+ /* no need to care about graph->top here */
+ if (graph->graph.first)
+ ret = volgen_xlator_link (xl, graph->graph.first);
+ if (ret == -1) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_GRAPH_ENTRY_ADD_FAIL,
+ "failed to add graph entry %s",
+ xl->name);
+
+ return -1;
+ }
+
+ return 0;
+}
+
+static xlator_t *
+volgen_graph_add_as (volgen_graph_t *graph, const char *type,
+ const char *format, ...)
+{
+ va_list arg;
+ xlator_t *xl = NULL;
+
+ va_start (arg, format);
+ xl = xlator_instantiate_va (type, format, arg);
+ va_end (arg);
+
+ if (!xl)
+ return NULL;
+
+ if (volgen_graph_link (graph, xl)) {
+ xlator_destroy (xl);
+
+ return NULL;
+ } else
+ glusterfs_graph_set_first (&graph->graph, xl);
+
+ return xl;
+}
+
+static xlator_t *
+volgen_graph_add_nolink (volgen_graph_t *graph, const char *type,
+ const char *format, ...)
+{
+ va_list arg;
+ xlator_t *xl = NULL;
+
+ va_start (arg, format);
+ xl = xlator_instantiate_va (type, format, arg);
+ va_end (arg);
+
+ if (!xl)
+ return NULL;
+
+ glusterfs_graph_set_first (&graph->graph, xl);
+
+ return xl;
+}
+
+static xlator_t *
+volgen_graph_add (volgen_graph_t *graph, char *type, char *volname)
+{
+ char *shorttype = NULL;
+
+ shorttype = strrchr (type, '/');
+ GF_ASSERT (shorttype);
+ shorttype++;
+ GF_ASSERT (*shorttype);
+
+ return volgen_graph_add_as (graph, type, "%s-%s", volname, shorttype);
+}
+
+/* XXX Seems there is no such generic routine?
+ * Maybe should put to xlator.c ??
+ */
+static int
+xlator_set_option (xlator_t *xl, char *key, char *value)
+{
+ char *dval = NULL;
+
+ dval = gf_strdup (value);
+ if (!dval) {
+ gf_msg ("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_NO_MEMORY,
+ "failed to set xlator opt: %s[%s] = %s",
+ xl->name, key, value);
+
+ return -1;
+ }
+
+ return dict_set_dynstr (xl->options, key, dval);
+}
+
+static int
+xlator_get_option (xlator_t *xl, char *key, char **value)
+{
+ GF_ASSERT (xl);
+ return dict_get_str (xl->options, key, value);
+}
+
+static xlator_t *
+first_of (volgen_graph_t *graph)
+{
+ return (xlator_t *)graph->graph.first;
+}
+
+
+
+
+/**************************
+ *
+ * Trie glue
+ *
+ *************************/
+
+
+static int
+volopt_selector (int lvl, char **patt, void *param,
+ int (*optcbk)(char *word, void *param))
+{
+ struct volopt_map_entry *vme = NULL;
+ char *w = NULL;
+ int i = 0;
+ int len = 0;
+ int ret = 0;
+ char *dot = NULL;
+
+ for (vme = glusterd_volopt_map; vme->key; vme++) {
+ w = vme->key;
+
+ for (i = 0; i < lvl; i++) {
+ if (patt[i]) {
+ w = strtail (w, patt[i]);
+ GF_ASSERT (!w || *w);
+ if (!w || *w != '.')
+ goto next;
+ } else {
+ w = strchr (w, '.');
+ GF_ASSERT (w);
+ }
+ w++;
+ }
+
+ dot = strchr (w, '.');
+ if (dot) {
+ len = dot - w;
+ w = gf_strdup (w);
+ if (!w)
+ return -1;
+ w[len] = '\0';
+ }
+ ret = optcbk (w, param);
+ if (dot)
+ GF_FREE (w);
+ if (ret)
+ return -1;
+ next:
+ continue;
+ }
+
+ return 0;
+}
+
+static int
+volopt_trie_cbk (char *word, void *param)
+{
+ return trie_add ((trie_t *)param, word);
+}
+
+static int
+process_nodevec (struct trienodevec *nodevec, char **hint)
+{
+ int ret = 0;
+ char *hint1 = NULL;
+ char *hint2 = NULL;
+ char *hintinfx = "";
+ trienode_t **nodes = nodevec->nodes;
+
+ if (!nodes[0]) {
+ *hint = NULL;
+ return 0;
+ }
+
+#if 0
+ /* Limit as in git */
+ if (trienode_get_dist (nodes[0]) >= 6) {
+ *hint = NULL;
+ return 0;
+ }
+#endif
+
+ if (trienode_get_word (nodes[0], &hint1))
+ return -1;
+
+ if (nodevec->cnt < 2 || !nodes[1]) {
+ *hint = hint1;
+ return 0;
+ }
+
+ if (trienode_get_word (nodes[1], &hint2))
+ return -1;
+
+ if (*hint)
+ hintinfx = *hint;
+ ret = gf_asprintf (hint, "%s or %s%s", hint1, hintinfx, hint2);
+ if (ret > 0)
+ ret = 0;
+ return ret;
+}
+
+static int
+volopt_trie_section (int lvl, char **patt, char *word, char **hint, int hints)
+{
+ trienode_t *nodes[] = { NULL, NULL };
+ struct trienodevec nodevec = { nodes, 2};
+ trie_t *trie = NULL;
+ int ret = 0;
+
+ trie = trie_new ();
+ if (!trie)
+ return -1;
+
+ if (volopt_selector (lvl, patt, trie, &volopt_trie_cbk)) {
+ trie_destroy (trie);
+
+ return -1;
+ }
+
+ GF_ASSERT (hints <= 2);
+ nodevec.cnt = hints;
+ ret = trie_measure_vec (trie, word, &nodevec);
+ if (!ret && nodevec.nodes[0])
+ ret = process_nodevec (&nodevec, hint);
+
+ trie_destroy (trie);
+
+ return ret;
+}
+
+static int
+volopt_trie (char *key, char **hint)
+{
+ char *patt[] = { NULL };
+ char *fullhint = NULL;
+ char *dot = NULL;
+ char *dom = NULL;
+ int len = 0;
+ int ret = 0;
+
+ *hint = NULL;
+
+ dot = strchr (key, '.');
+ if (!dot)
+ return volopt_trie_section (1, patt, key, hint, 2);
+
+ len = dot - key;
+ dom = gf_strdup (key);
+ if (!dom)
+ return -1;
+ dom[len] = '\0';
+
+ ret = volopt_trie_section (0, NULL, dom, patt, 1);
+ GF_FREE (dom);
+ if (ret) {
+ patt[0] = NULL;
+ goto out;
+ }
+ if (!patt[0])
+ goto out;
+
+ *hint = "...";
+ ret = volopt_trie_section (1, patt, dot + 1, hint, 2);
+ if (ret)
+ goto out;
+ if (*hint) {
+ ret = gf_asprintf (&fullhint, "%s.%s", patt[0], *hint);
+ GF_FREE (*hint);
+ if (ret >= 0) {
+ ret = 0;
+ *hint = fullhint;
+ }
+ }
+
+ out:
+ GF_FREE (patt[0]);
+ if (ret)
+ *hint = NULL;
+
+ return ret;
+}
+
+
+
+
+/**************************
+ *
+ * Volume generation engine
+ *
+ **************************/
+
+
+typedef int (*volgen_opthandler_t) (volgen_graph_t *graph,
+ struct volopt_map_entry *vme,
+ void *param);
+
+struct opthandler_data {
+ volgen_graph_t *graph;
+ volgen_opthandler_t handler;
+ struct volopt_map_entry *vme;
+ gf_boolean_t found;
+ gf_boolean_t data_t_fake;
+ int rv;
+ char *volname;
+ void *param;
+};
+
+static int
+process_option (char *key, data_t *value, void *param)
+{
+ struct opthandler_data *odt = param;
+ struct volopt_map_entry vme = {0,};
+
+ if (odt->rv)
+ return 0;
+ odt->found = _gf_true;
+
+ vme.key = key;
+ vme.voltype = odt->vme->voltype;
+ vme.option = odt->vme->option;
+ vme.op_version = odt->vme->op_version;
+
+ if (!vme.option) {
+ vme.option = strrchr (key, '.');
+ if (vme.option)
+ vme.option++;
+ else
+ vme.option = key;
+ }
+ if (odt->data_t_fake)
+ vme.value = (char *)value;
+ else
+ vme.value = value->data;
+
+ odt->rv = odt->handler (odt->graph, &vme, odt->param);
+ return 0;
+}
+
+static int
+volgen_graph_set_options_generic (volgen_graph_t *graph, dict_t *dict,
+ void *param, volgen_opthandler_t handler)
+{
+ struct volopt_map_entry *vme = NULL;
+ struct opthandler_data odt = {0,};
+ data_t *data = NULL;
+
+ odt.graph = graph;
+ odt.handler = handler;
+ odt.param = param;
+ (void)data;
+
+ for (vme = glusterd_volopt_map; vme->key; vme++) {
+ odt.vme = vme;
+ odt.found = _gf_false;
+ odt.data_t_fake = _gf_false;
+
+ data = dict_get (dict, vme->key);
+
+ if (data)
+ process_option (vme->key, data, &odt);
+ if (odt.rv)
+ return odt.rv;
+
+ if (odt.found)
+ continue;
+
+ /* check for default value */
+
+ if (vme->value) {
+ /* stupid hack to be able to reuse dict iterator
+ * in this context
+ */
+ odt.data_t_fake = _gf_true;
+ process_option (vme->key, (data_t *)vme->value, &odt);
+ if (odt.rv)
+ return odt.rv;
+ }
+ }
+
+ return 0;
+}
+
+static int
+no_filter_option_handler (volgen_graph_t *graph, struct volopt_map_entry *vme,
+ void *param)
+{
+ xlator_t *trav;
+ int ret = 0;
+
+ for (trav = first_of (graph); trav; trav = trav->next) {
+ if (strcmp (trav->type, vme->voltype) != 0)
+ continue;
+
+ ret = xlator_set_option (trav, vme->option, vme->value);
+ if (ret)
+ break;
+ }
+ return ret;
+}
+
+static int
+basic_option_handler (volgen_graph_t *graph, struct volopt_map_entry *vme,
+ void *param)
+{
+ int ret = 0;
+
+ if (vme->option[0] == '!')
+ goto out;
+
+ ret = no_filter_option_handler (graph, vme, param);
+out:
+ return ret;
+}
+
+static int
+volgen_graph_set_options (volgen_graph_t *graph, dict_t *dict)
+{
+ return volgen_graph_set_options_generic (graph, dict, NULL,
+ &basic_option_handler);
+}
+
+static int
+optget_option_handler (volgen_graph_t *graph, struct volopt_map_entry *vme,
+ void *param)
+{
+ struct volopt_map_entry *vme2 = param;
+
+ if (strcmp (vme->key, vme2->key) == 0)
+ vme2->value = vme->value;
+
+ return 0;
+}
+
+/* This getter considers defaults also. */
+static int
+volgen_dict_get (dict_t *dict, char *key, char **value)
+{
+ struct volopt_map_entry vme = {0,};
+ int ret = 0;
+
+ vme.key = key;
+
+ ret = volgen_graph_set_options_generic (NULL, dict, &vme,
+ &optget_option_handler);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY, "Out of memory");
+
+ return -1;
+ }
+
+ *value = vme.value;
+
+ return 0;
+}
+
+static int
+option_complete (char *key, char **completion)
+{
+ struct volopt_map_entry *vme = NULL;
+
+ *completion = NULL;
+ for (vme = glusterd_volopt_map; vme->key; vme++) {
+ if (strcmp (strchr (vme->key, '.') + 1, key) != 0)
+ continue;
+
+ if (*completion && strcmp (*completion, vme->key) != 0) {
+ /* cancel on non-unique match */
+ *completion = NULL;
+
+ return 0;
+ } else
+ *completion = vme->key;
+ }
+
+ if (*completion) {
+ /* For sake of unified API we want
+ * have the completion to be a to-be-freed
+ * string.
+ */
+ *completion = gf_strdup (*completion);
+ return -!*completion;
+ }
+
+ return 0;
+}
+
+int
+glusterd_volinfo_get (glusterd_volinfo_t *volinfo, char *key, char **value)
+{
+ return volgen_dict_get (volinfo->dict, key, value);
+}
+
+int
+glusterd_volinfo_get_boolean (glusterd_volinfo_t *volinfo, char *key)
+{
+ char *val = NULL;
+ gf_boolean_t enabled = _gf_false;
+ int ret = 0;
+
+ ret = glusterd_volinfo_get (volinfo, key, &val);
+ if (ret)
+ return -1;
+
+ if (val)
+ ret = gf_string2boolean (val, &enabled);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY,
+ "value for %s option is not valid", key);
+
+ return -1;
+ }
+
+ return enabled;
+}
+
+gf_boolean_t
+glusterd_check_voloption_flags (char *key, int32_t flags)
+{
+ char *completion = NULL;
+ struct volopt_map_entry *vmep = NULL;
+ int ret = 0;
+
+ COMPLETE_OPTION(key, completion, ret);
+ for (vmep = glusterd_volopt_map; vmep->key; vmep++) {
+ if (strcmp (vmep->key, key) == 0) {
+ if (vmep->flags & flags)
+ return _gf_true;
+ else
+ return _gf_false;
+ }
+ }
+
+ return _gf_false;
+}
+
+gf_boolean_t
+glusterd_check_globaloption (char *key)
+{
+ char *completion = NULL;
+ struct volopt_map_entry *vmep = NULL;
+ int ret = 0;
+
+ COMPLETE_OPTION(key, completion, ret);
+ for (vmep = glusterd_volopt_map; vmep->key; vmep++) {
+ if (strcmp (vmep->key, key) == 0) {
+ if ((vmep->type == GLOBAL_DOC) ||
+ (vmep->type == GLOBAL_NO_DOC))
+ return _gf_true;
+ else
+ return _gf_false;
+ }
+ }
+
+ return _gf_false;
+}
+
+gf_boolean_t
+glusterd_check_localoption (char *key)
+{
+ char *completion = NULL;
+ struct volopt_map_entry *vmep = NULL;
+ int ret = 0;
+
+ COMPLETE_OPTION(key, completion, ret);
+ for (vmep = glusterd_volopt_map; vmep->key; vmep++) {
+ if (strcmp (vmep->key, key) == 0) {
+ if ((vmep->type == DOC) ||
+ (vmep->type == NO_DOC))
+ return _gf_true;
+ else
+ return _gf_false;
+ }
+ }
+
+ return _gf_false;
+}
+
+int
+glusterd_check_voloption (char *key)
+{
+ char *completion = NULL;
+ struct volopt_map_entry *vmep = NULL;
+ int ret = 0;
+
+ COMPLETE_OPTION(key, completion, ret);
+ for (vmep = glusterd_volopt_map; vmep->key; vmep++) {
+ if (strcmp (vmep->key, key) == 0) {
+ if ((vmep->type == DOC) ||
+ (vmep->type == DOC))
+ return _gf_true;
+ else
+ return _gf_false;
+ }
+ }
+
+ return _gf_false;
+
+}
+
+int
+glusterd_check_option_exists (char *key, char **completion)
+{
+ struct volopt_map_entry vme = {0,};
+ struct volopt_map_entry *vmep = NULL;
+ int ret = 0;
+ xlator_t *this = THIS;
+
+ (void)vme;
+ (void)vmep;
+
+ if (!strchr (key, '.')) {
+ if (completion) {
+ ret = option_complete (key, completion);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY,
+ "Out of memory");
+ return -1;
+ }
+
+ ret = !!*completion;
+ if (ret)
+ return ret;
+ else
+ goto trie;
+ } else
+ return 0;
+ }
+
+ for (vmep = glusterd_volopt_map; vmep->key; vmep++) {
+ if (strcmp (vmep->key, key) == 0) {
+ ret = 1;
+ break;
+ }
+ }
+
+ if (ret || !completion)
+ return ret;
+
+ trie:
+ ret = volopt_trie (key, completion);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_ERROR_ENCOUNTERED,
+ "Some error occurred during keyword hinting");
+ }
+
+ return ret;
+}
+
+int
+glusterd_volopt_validate (glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
+ char *value, char **op_errstr)
+{
+ struct volopt_map_entry *vme = NULL;
+ char *volname = NULL;
+ int ret = 0;
+ xlator_t *this = THIS;
+
+ if (!dict || !key || !value) {
+ gf_msg_callingfn (this->name, GF_LOG_WARNING, EINVAL,
+ GD_MSG_INVALID_ENTRY, "Invalid "
+ "Arguments (dict=%p, key=%s, value=%s)", dict,
+ key, value);
+ return -1;
+ }
+
+ for (vme = &glusterd_volopt_map[0]; vme->key; vme++) {
+ if ((vme->validate_fn) &&
+ ((!strcmp (key, vme->key)) ||
+ (!strcmp (key, strchr (vme->key, '.') + 1)))) {
+ ret = vme->validate_fn (volinfo, dict, key, value,
+ op_errstr);
+ if (ret)
+ goto out;
+ break;
+ }
+ }
+out:
+ return ret;
+}
+
+char*
+glusterd_get_trans_type_rb (gf_transport_type ttype)
+{
+ char *trans_type = NULL;
+
+ switch (ttype) {
+ case GF_TRANSPORT_RDMA:
+ gf_asprintf (&trans_type, "rdma");
+ break;
+ case GF_TRANSPORT_TCP:
+ case GF_TRANSPORT_BOTH_TCP_RDMA:
+ gf_asprintf (&trans_type, "tcp");
+ break;
+ default:
+ gf_msg (THIS->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "Unknown "
+ "transport type");
+ }
+
+ return trans_type;
+}
+
+static int
+_xl_link_children (xlator_t *parent, xlator_t *children, size_t child_count)
+{
+ xlator_t *trav = NULL;
+ size_t seek = 0;
+ int ret = -1;
+
+ if (child_count == 0)
+ goto out;
+ seek = child_count;
+ for (trav = children; --seek; trav = trav->next);
+ for (; child_count--; trav = trav->prev) {
+ ret = volgen_xlator_link (parent, trav);
+ gf_msg_debug (THIS->name, 0, "%s:%s", parent->name,
+ trav->name);
+ if (ret)
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+volgen_graph_merge_sub (volgen_graph_t *dgraph, volgen_graph_t *sgraph,
+ size_t child_count)
+{
+ xlator_t *trav = NULL;
+ int ret = 0;
+
+ GF_ASSERT (dgraph->graph.first);
+
+ ret = _xl_link_children (first_of (dgraph), first_of (sgraph),
+ child_count);
+ if (ret)
+ goto out;
+
+ for (trav = first_of (dgraph); trav->next; trav = trav->next);
+
+ trav->next = first_of (sgraph);
+ trav->next->prev = trav;
+ dgraph->graph.xl_count += sgraph->graph.xl_count;
+
+out:
+ return ret;
+}
+
+static void
+volgen_apply_filters (char *orig_volfile)
+{
+ DIR *filterdir = NULL;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {{0,},};
+ struct stat statbuf = {0,};
+ char filterpath[PATH_MAX] = {0,};
+
+ filterdir = sys_opendir (FILTERDIR);
+
+ if (!filterdir)
+ return;
+
+ for (;;) {
+
+ errno = 0;
+
+ entry = sys_readdir (filterdir, scratch);
+
+ if (!entry || errno != 0)
+ break;
+
+ if (strcmp (entry->d_name, ".") == 0 ||
+ strcmp (entry->d_name, "..") == 0)
+ continue;
+ /*
+ * d_type isn't guaranteed to be present/valid on all systems,
+ * so do an explicit stat instead.
+ */
+ (void) snprintf (filterpath, sizeof(filterpath), "%s/%s",
+ FILTERDIR, entry->d_name);
+
+ /* Deliberately use stat instead of lstat to allow symlinks. */
+ if (sys_stat (filterpath, &statbuf) == -1)
+ continue;
+
+ if (!S_ISREG (statbuf.st_mode))
+ continue;
+ /*
+ * We could check the mode in statbuf directly, or just skip
+ * this entirely and check for EPERM after exec fails, but this
+ * is cleaner.
+ */
+ if (sys_access (filterpath, X_OK) != 0)
+ continue;
+
+ if (runcmd (filterpath, orig_volfile, NULL)) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_FILTER_RUN_FAILED,
+ "failed to run filter %s",
+ entry->d_name);
+ }
+ }
+
+ (void) sys_closedir (filterdir);
+}
+
+static int
+volgen_write_volfile (volgen_graph_t *graph, char *filename)
+{
+ char *ftmp = NULL;
+ FILE *f = NULL;
+ int fd = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ if (gf_asprintf (&ftmp, "%s.tmp", filename) == -1) {
+ ftmp = NULL;
+ goto error;
+ }
+
+ fd = sys_creat (ftmp, S_IRUSR | S_IWUSR);
+ if (fd < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED, "file creation failed");
+ goto error;
+ }
+
+ sys_close (fd);
+
+ f = fopen (ftmp, "w");
+ if (!f)
+ goto error;
+
+ if (glusterfs_graph_print_file (f, &graph->graph) == -1)
+ goto error;
+
+ if (fclose (f) != 0) {
+ gf_msg (THIS->name, GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED, "fclose on the file %s "
+ "failed", ftmp);
+ /*
+ * Even though fclose has failed here, we have to set f to NULL.
+ * Otherwise when the code path goes to error, there again we
+ * try to close it which might cause undefined behavior such as
+ * process crash.
+ */
+ f = NULL;
+ goto error;
+ }
+
+ f = NULL;
+
+ if (sys_rename (ftmp, filename) == -1)
+ goto error;
+
+ GF_FREE (ftmp);
+
+ volgen_apply_filters(filename);
+
+ return 0;
+
+ error:
+
+ GF_FREE (ftmp);
+ if (f)
+ fclose (f);
+
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLFILE_CREATE_FAIL,
+ "failed to create volfile %s", filename);
+
+ return -1;
+}
+
+static void
+volgen_graph_free (volgen_graph_t *graph)
+{
+ xlator_t *trav = NULL;
+ xlator_t *trav_old = NULL;
+
+ for (trav = first_of (graph) ;; trav = trav->next) {
+ if (trav_old)
+ xlator_destroy (trav_old);
+
+ trav_old = trav;
+
+ if (!trav)
+ break;
+ }
+}
+
+static int
+build_graph_generic (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *mod_dict, void *param,
+ int (*builder) (volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo,
+ dict_t *set_dict, void *param))
+{
+ dict_t *set_dict = NULL;
+ int ret = 0;
+
+ if (mod_dict) {
+ set_dict = dict_copy (volinfo->dict, NULL);
+ if (!set_dict)
+ return -1;
+ dict_copy (mod_dict, set_dict);
+ /* XXX dict_copy swallows errors */
+ } else {
+ set_dict = volinfo->dict;
+ }
+
+ ret = builder (graph, volinfo, set_dict, param);
+ if (!ret)
+ ret = volgen_graph_set_options (graph, set_dict);
+
+ if (mod_dict)
+ dict_destroy (set_dict);
+
+ return ret;
+}
+
+static gf_transport_type
+transport_str_to_type (char *tt)
+{
+ gf_transport_type type = GF_TRANSPORT_TCP;
+
+ if (!strcmp ("tcp", tt))
+ type = GF_TRANSPORT_TCP;
+ else if (!strcmp ("rdma", tt))
+ type = GF_TRANSPORT_RDMA;
+ else if (!strcmp ("tcp,rdma", tt))
+ type = GF_TRANSPORT_BOTH_TCP_RDMA;
+ return type;
+}
+
+static void
+transport_type_to_str (gf_transport_type type, char *tt)
+{
+ switch (type) {
+ case GF_TRANSPORT_RDMA:
+ strcpy (tt, "rdma");
+ break;
+ case GF_TRANSPORT_TCP:
+ strcpy (tt, "tcp");
+ break;
+ case GF_TRANSPORT_BOTH_TCP_RDMA:
+ strcpy (tt, "tcp,rdma");
+ break;
+ }
+}
+
+static void
+get_vol_transport_type (glusterd_volinfo_t *volinfo, char *tt)
+{
+ transport_type_to_str (volinfo->transport_type, tt);
+}
+
+/* If no value has specfied for tcp,rdma volume from cli
+ * use tcp as default value.Otherwise, use transport type
+ * mentioned in volinfo
+ */
+static void
+get_vol_nfs_transport_type (glusterd_volinfo_t *volinfo, char *tt)
+{
+ if (volinfo->transport_type == GF_TRANSPORT_BOTH_TCP_RDMA) {
+ strcpy (tt, "tcp");
+ gf_msg ("glusterd", GF_LOG_INFO, 0,
+ GD_MSG_DEFAULT_OPT_INFO,
+ "The default transport type for tcp,rdma volume "
+ "is tcp if option is not defined by the user ");
+ } else
+ transport_type_to_str (volinfo->transport_type, tt);
+}
+
+/* gets the volinfo, dict, a character array for filling in
+ * the transport type and a boolean option which says whether
+ * the transport type is required for nfs or not. If its not
+ * for nfs, then it is considered as the client transport
+ * and client transport type is filled in the character array
+ */
+static void
+get_transport_type (glusterd_volinfo_t *volinfo, dict_t *set_dict,
+ char *transt, gf_boolean_t is_nfs)
+{
+ int ret = -1;
+ char *tt = NULL;
+ char *key = NULL;
+ typedef void (*transport_type) (glusterd_volinfo_t *volinfo, char *tt);
+ transport_type get_transport;
+
+ if (is_nfs == _gf_false) {
+ key = "client-transport-type";
+ get_transport = get_vol_transport_type;
+ } else {
+ key = "nfs.transport-type";
+ get_transport = get_vol_nfs_transport_type;
+ }
+
+ ret = dict_get_str (set_dict, key, &tt);
+ if (ret)
+ get_transport (volinfo, transt);
+ if (!ret)
+ strcpy (transt, tt);
+}
+
+static int
+server_auth_option_handler (volgen_graph_t *graph,
+ struct volopt_map_entry *vme, void *param)
+{
+ xlator_t *xl = NULL;
+ xlator_list_t *trav = NULL;
+ char *aa = NULL;
+ int ret = 0;
+ char *key = NULL;
+ char *auth_path = NULL;
+
+ if (strcmp (vme->option, "!server-auth") != 0)
+ return 0;
+
+ xl = first_of (graph);
+
+ /* from 'auth.allow' -> 'allow', and 'auth.reject' -> 'reject' */
+ key = strchr (vme->key, '.') + 1;
+
+ ret = xlator_get_option (xl, "auth-path", &auth_path);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DEFAULT_OPT_INFO,
+ "Failed to get auth-path from server graph");
+ return -1;
+ }
+ ret = gf_asprintf (&aa, "auth.addr.%s.%s", auth_path,
+ key);
+ if (ret != -1) {
+ ret = xlator_set_option (xl, aa, vme->value);
+ GF_FREE (aa);
+ }
+ if (ret)
+ return -1;
+
+ return 0;
+}
+
+static int
+loglevel_option_handler (volgen_graph_t *graph,
+ struct volopt_map_entry *vme, void *param)
+{
+ char *role = param;
+ struct volopt_map_entry vme2 = {0,};
+
+ if ( (strcmp (vme->option, "!client-log-level") != 0 &&
+ strcmp (vme->option, "!brick-log-level") != 0)
+ || !strstr (vme->key, role))
+ return 0;
+
+ memcpy (&vme2, vme, sizeof (vme2));
+ vme2.option = "log-level";
+
+ return basic_option_handler (graph, &vme2, NULL);
+}
+
+static int
+server_check_marker_off (volgen_graph_t *graph, struct volopt_map_entry *vme,
+ glusterd_volinfo_t *volinfo)
+{
+ gf_boolean_t enabled = _gf_false;
+ int ret = 0;
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (vme);
+
+ if (strcmp (vme->option, "!xtime") != 0)
+ return 0;
+
+ ret = gf_string2boolean (vme->value, &enabled);
+ if (ret || enabled)
+ goto out;
+
+ ret = glusterd_volinfo_get_boolean (volinfo, VKEY_MARKER_XTIME);
+ if (ret < 0) {
+ gf_msg ("glusterd", GF_LOG_WARNING, 0,
+ GD_MSG_MARKER_STATUS_GET_FAIL,
+ "failed to get the marker status");
+ ret = -1;
+ goto out;
+ }
+
+ if (ret) {
+ enabled = _gf_false;
+ glusterd_check_geo_rep_configured (volinfo, &enabled);
+
+ if (enabled) {
+ gf_msg ("glusterd", GF_LOG_WARNING, 0,
+ GD_MSG_MARKER_DISABLE_FAIL,
+ GEOREP" sessions active"
+ "for the volume %s, cannot disable marker "
+ ,volinfo->volname);
+ set_graph_errstr (graph,
+ VKEY_MARKER_XTIME" cannot be disabled "
+ "while "GEOREP" sessions exist");
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = 0;
+ out:
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+
+}
+
+static int
+sys_loglevel_option_handler (volgen_graph_t *graph,
+ struct volopt_map_entry *vme,
+ void *param)
+{
+ char *role = NULL;
+ struct volopt_map_entry vme2 = {0,};
+
+ role = (char *) param;
+
+ if (strcmp (vme->option, "!sys-log-level") != 0 ||
+ !strstr (vme->key, role))
+ return 0;
+
+ memcpy (&vme2, vme, sizeof (vme2));
+ vme2.option = "sys-log-level";
+
+ return basic_option_handler (graph, &vme2, NULL);
+}
+
+static int
+logger_option_handler (volgen_graph_t *graph, struct volopt_map_entry *vme,
+ void *param)
+{
+ char *role = NULL;
+ struct volopt_map_entry vme2 = {0,};
+
+ role = (char *) param;
+
+ if (strcmp (vme->option, "!logger") != 0 ||
+ !strstr (vme->key, role))
+ return 0;
+
+ memcpy (&vme2, vme, sizeof (vme2));
+ vme2.option = "logger";
+
+ return basic_option_handler (graph, &vme2, NULL);
+}
+
+static int
+log_format_option_handler (volgen_graph_t *graph, struct volopt_map_entry *vme,
+ void *param)
+{
+ char *role = NULL;
+ struct volopt_map_entry vme2 = {0,};
+
+ role = (char *) param;
+
+ if (strcmp (vme->option, "!log-format") != 0 ||
+ !strstr (vme->key, role))
+ return 0;
+
+ memcpy (&vme2, vme, sizeof (vme2));
+ vme2.option = "log-format";
+
+ return basic_option_handler (graph, &vme2, NULL);
+}
+
+static int
+log_buf_size_option_handler (volgen_graph_t *graph,
+ struct volopt_map_entry *vme,
+ void *param)
+{
+ char *role = NULL;
+ struct volopt_map_entry vme2 = {0,};
+
+ role = (char *) param;
+
+ if (strcmp (vme->option, "!log-buf-size") != 0 ||
+ !strstr (vme->key, role))
+ return 0;
+
+ memcpy (&vme2, vme, sizeof (vme2));
+ vme2.option = "log-buf-size";
+
+ return basic_option_handler (graph, &vme2, NULL);
+}
+
+static int
+log_flush_timeout_option_handler (volgen_graph_t *graph,
+ struct volopt_map_entry *vme,
+ void *param)
+{
+ char *role = NULL;
+ struct volopt_map_entry vme2 = {0,};
+
+ role = (char *) param;
+
+ if (strcmp (vme->option, "!log-flush-timeout") != 0 ||
+ !strstr (vme->key, role))
+ return 0;
+
+ memcpy (&vme2, vme, sizeof (vme2));
+ vme2.option = "log-flush-timeout";
+
+ return basic_option_handler (graph, &vme2, NULL);
+}
+
+static int
+volgen_graph_set_xl_options (volgen_graph_t *graph, dict_t *dict)
+{
+ int32_t ret = -1;
+ char *xlator = NULL;
+ char xlator_match[1024] = {0,}; /* for posix* -> *posix* */
+ char *loglevel = NULL;
+ xlator_t *trav = NULL;
+
+ ret = dict_get_str (dict, "xlator", &xlator);
+ if (ret)
+ goto out;
+
+ ret = dict_get_str (dict, "loglevel", &loglevel);
+ if (ret)
+ goto out;
+
+ snprintf (xlator_match, 1024, "*%s", xlator);
+
+ for (trav = first_of (graph); trav; trav = trav->next) {
+ if (fnmatch(xlator_match, trav->type, FNM_NOESCAPE) == 0) {
+ gf_msg_debug ("glusterd", 0,
+ "Setting log level for xlator: %s",
+ trav->type);
+ ret = xlator_set_option (trav, "log-level", loglevel);
+ if (ret)
+ break;
+ }
+ }
+
+ out:
+ return ret;
+}
+
+static int
+server_spec_option_handler (volgen_graph_t *graph,
+ struct volopt_map_entry *vme, void *param)
+{
+ int ret = 0;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ volinfo = param;
+
+ ret = server_auth_option_handler (graph, vme, NULL);
+ if (!ret)
+ ret = server_check_marker_off (graph, vme, volinfo);
+
+ if (!ret)
+ ret = loglevel_option_handler (graph, vme, "brick");
+
+ if (!ret)
+ ret = sys_loglevel_option_handler (graph, vme, "brick");
+
+ if (!ret)
+ ret = logger_option_handler (graph, vme, "brick");
+
+ if (!ret)
+ ret = log_format_option_handler (graph, vme, "brick");
+
+ if (!ret)
+ ret = log_buf_size_option_handler (graph, vme, "brick");
+
+ if (!ret)
+ ret = log_flush_timeout_option_handler (graph, vme, "brick");
+
+ return ret;
+}
+
+static int
+server_spec_extended_option_handler (volgen_graph_t *graph,
+ struct volopt_map_entry *vme, void *param)
+{
+ int ret = 0;
+ dict_t *dict = NULL;
+
+ GF_ASSERT (param);
+ dict = (dict_t *)param;
+
+ ret = server_auth_option_handler (graph, vme, NULL);
+ if (!ret)
+ ret = volgen_graph_set_xl_options (graph, dict);
+
+ return ret;
+}
+
+static void get_vol_tstamp_file (char *filename, glusterd_volinfo_t *volinfo);
+
+static int
+brick_graph_add_posix (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *set_dict, glusterd_brickinfo_t *brickinfo)
+{
+ int ret = -1;
+ gf_boolean_t quota_enabled = _gf_true;
+ gf_boolean_t trash_enabled = _gf_false;
+ gf_boolean_t pgfid_feat = _gf_false;
+ char *value = NULL;
+ xlator_t *xl = NULL;
+
+ if (!graph || !volinfo || !set_dict || !brickinfo)
+ goto out;
+
+ ret = glusterd_volinfo_get (volinfo, VKEY_FEATURES_QUOTA, &value);
+ if (value) {
+ ret = gf_string2boolean (value, &quota_enabled);
+ if (ret)
+ goto out;
+ }
+
+ ret = glusterd_volinfo_get (volinfo, VKEY_FEATURES_TRASH, &value);
+ if (value) {
+ ret = gf_string2boolean (value, &trash_enabled);
+ if (ret)
+ goto out;
+ }
+
+ ret = glusterd_volinfo_get (volinfo,
+ "update-link-count-parent",
+ &value);
+ if (value) {
+ ret = gf_string2boolean (value, &pgfid_feat);
+ if (ret)
+ goto out;
+ }
+
+ ret = -1;
+
+ xl = volgen_graph_add (graph, "storage/posix", volinfo->volname);
+ if (!xl)
+ goto out;
+
+ ret = xlator_set_option (xl, "directory", brickinfo->path);
+ if (ret)
+ goto out;
+
+ ret = xlator_set_option (xl, "volume-id",
+ uuid_utoa (volinfo->volume_id));
+ if (ret)
+ goto out;
+
+ if (quota_enabled || pgfid_feat || trash_enabled)
+ xlator_set_option (xl, "update-link-count-parent",
+ "on");
+out:
+ return ret;
+}
+static int
+brick_graph_add_trash (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *set_dict, glusterd_brickinfo_t *brickinfo)
+{
+ int ret = -1;
+ xlator_t *xl = NULL;
+
+ xl = volgen_graph_add (graph, "features/trash", volinfo->volname);
+ if (!xl)
+ goto out;
+ ret = xlator_set_option (xl, "trash-dir", ".trashcan");
+ if (ret)
+ goto out;
+ ret = xlator_set_option (xl, "brick-path", brickinfo->path);
+ if (ret)
+ goto out;
+ ret = xlator_set_option (xl, "trash-internal-op", "off");
+ if (ret)
+ goto out;
+out:
+ return ret;
+}
+
+static int
+brick_graph_add_decompounder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *set_dict, glusterd_brickinfo_t *brickinfo)
+{
+ xlator_t *xl = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ int ret = -1;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("glusterd", this, out);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
+
+ xl = volgen_graph_add (graph, "performance/decompounder", volinfo->volname);
+ if (xl)
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+brick_graph_add_arbiter (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *set_dict, glusterd_brickinfo_t *brickinfo)
+{
+ xlator_t *xl = NULL;
+ glusterd_brickinfo_t *next = NULL;
+ glusterd_brickinfo_t *last = NULL;
+ int ret = -1;
+
+ if (volinfo->arbiter_count != 1)
+ return 0;
+
+ /* Add arbiter only if it is the last (i.e. 3rd) brick. */
+ last = get_last_brick_of_brick_group (volinfo, brickinfo);
+ if (last != brickinfo)
+ return 0;
+
+ xl = volgen_graph_add (graph, "features/arbiter", volinfo->volname);
+ if (!xl)
+ goto out;
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+brick_graph_add_bd (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *set_dict, glusterd_brickinfo_t *brickinfo)
+{
+ int ret = -1;
+
+ if (!graph || !volinfo || !set_dict || !brickinfo)
+ goto out;
+
+ ret = 0;
+
+#ifdef HAVE_BD_XLATOR
+ if (*brickinfo->vg != '\0') {
+ xlator_t *xl = NULL;
+ /* Now add BD v2 xlator if volume is BD type */
+ xl = volgen_graph_add (graph, "storage/bd", volinfo->volname);
+ if (!xl) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = xlator_set_option (xl, "device", "vg");
+ if (ret)
+ goto out;
+
+ ret = xlator_set_option (xl, "export", brickinfo->vg);
+ if (ret)
+ goto out;
+ }
+#endif
+
+out:
+ return ret;
+}
+
+static int
+brick_graph_add_bitrot_stub (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *set_dict, glusterd_brickinfo_t *brickinfo)
+{
+ xlator_t *xl = NULL;
+ int ret = -1;
+
+ if (!graph || !volinfo || !set_dict || !brickinfo)
+ goto out;
+
+ xl = volgen_graph_add (graph, "features/bitrot-stub", volinfo->volname);
+ if (!xl)
+ goto out;
+
+ ret = xlator_set_option (xl, "export", brickinfo->path);
+
+out:
+ return ret;
+}
+
+static int
+brick_graph_add_changelog (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *set_dict, glusterd_brickinfo_t *brickinfo)
+{
+ xlator_t *xl = NULL;
+ char changelog_basepath[PATH_MAX] = {0,};
+ int ret = -1;
+
+ if (!graph || !volinfo || !set_dict || !brickinfo)
+ goto out;
+
+ xl = volgen_graph_add (graph, "features/changelog", volinfo->volname);
+ if (!xl)
+ goto out;
+
+ ret = xlator_set_option (xl, "changelog-brick", brickinfo->path);
+ if (ret)
+ goto out;
+
+ snprintf (changelog_basepath, sizeof (changelog_basepath),
+ "%s/%s", brickinfo->path, ".glusterfs/changelogs");
+ ret = xlator_set_option (xl, "changelog-dir", changelog_basepath);
+ if (ret)
+ goto out;
+out:
+ return ret;
+}
+
+#if USE_GFDB /* only add changetimerecorder when GFDB is enabled */
+static int
+brick_graph_add_changetimerecorder (volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo,
+ dict_t *set_dict,
+ glusterd_brickinfo_t *brickinfo)
+{
+ xlator_t *xl = NULL;
+ int ret = -1;
+ glusterd_brickinfo_t *brickiter = NULL;
+ glusterd_brickinfo_t *tmp = NULL;
+ char *brickname = NULL;
+ char *path = NULL;
+ char *volname = NULL;
+ char index_basepath[PATH_MAX] = {0};
+ char *hotbrick = NULL;
+
+ if (!graph || !volinfo || !set_dict || !brickinfo)
+ goto out;
+
+ path = brickinfo->path;
+
+ xl = volgen_graph_add (graph, "features/changetimerecorder",
+ volinfo->volname);
+
+
+ ret = xlator_set_option (xl, "db-type", "sqlite3");
+ if (ret)
+ goto out;
+
+ if (!set_dict || dict_get_str (set_dict, "hot-brick", &hotbrick))
+ hotbrick = "off";
+
+ ret = xlator_set_option (xl, "hot-brick", hotbrick);
+ if (ret)
+ goto out;
+
+ brickname = strrchr(path, '/') + 1;
+ snprintf (index_basepath, sizeof (index_basepath), "%s.db",
+ brickname);
+ ret = xlator_set_option (xl, "db-name", index_basepath);
+ if (ret)
+ goto out;
+
+ snprintf (index_basepath, sizeof (index_basepath), "%s/%s",
+ path, ".glusterfs/");
+ ret = xlator_set_option (xl, "db-path", index_basepath);
+ if (ret)
+ goto out;
+
+ ret = xlator_set_option (xl, "record-exit", "off");
+ if (ret)
+ goto out;
+
+ ret = xlator_set_option (xl, "ctr_link_consistency", "off");
+ if (ret)
+ goto out;
+
+ ret = xlator_set_option (xl, "ctr_lookupheal_link_timeout", "300");
+ if (ret)
+ goto out;
+
+ ret = xlator_set_option (xl, "ctr_lookupheal_inode_timeout", "300");
+ if (ret)
+ goto out;
+
+ ret = xlator_set_option (xl, "record-entry", "on");
+ if (ret)
+ goto out;
+
+out:
+ return ret;
+}
+#endif /* USE_GFDB */
+
+static int
+brick_graph_add_acl (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *set_dict, glusterd_brickinfo_t *brickinfo)
+{
+ xlator_t *xl = NULL;
+ int ret = -1;
+
+ if (!graph || !volinfo || !set_dict)
+ goto out;
+
+ xl = volgen_graph_add (graph, "features/access-control",
+ volinfo->volname);
+ if (!xl)
+ goto out;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+brick_graph_add_locks (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *set_dict, glusterd_brickinfo_t *brickinfo)
+{
+
+ xlator_t *xl = NULL;
+ int ret = -1;
+
+ if (!graph || !volinfo || !set_dict)
+ goto out;
+
+ xl = volgen_graph_add (graph, "features/locks",
+ volinfo->volname);
+ if (!xl)
+ goto out;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/* Add this before (above) io-threads because it's not thread-safe yet. */
+static int
+brick_graph_add_fdl (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *set_dict, glusterd_brickinfo_t *brickinfo)
+{
+
+ xlator_t *xl = NULL;
+ int ret = -1;
+
+ if (!graph || !volinfo || !set_dict)
+ goto out;
+
+ if (dict_get_str_boolean (set_dict, "features.fdl", 0)) {
+ xl = volgen_graph_add (graph, "experimental/fdl",
+ volinfo->volname);
+ if (!xl)
+ goto out;
+ }
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static int
+brick_graph_add_iot (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *set_dict, glusterd_brickinfo_t *brickinfo)
+{
+
+ xlator_t *xl = NULL;
+ int ret = -1;
+
+ if (!graph || !volinfo || !set_dict)
+ goto out;
+
+ xl = volgen_graph_add (graph, "performance/io-threads",
+ volinfo->volname);
+ if (!xl)
+ goto out;
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+brick_graph_add_barrier (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *set_dict, glusterd_brickinfo_t *brickinfo)
+{
+
+ xlator_t *xl = NULL;
+ int ret = -1;
+
+ if (!graph || !volinfo)
+ goto out;
+
+ xl = volgen_graph_add (graph, "features/barrier", volinfo->volname);
+ if (!xl)
+ goto out;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+xlator_t *
+add_one_peer (volgen_graph_t *graph, glusterd_brickinfo_t *peer,
+ char *volname, uint16_t index)
+{
+ xlator_t *kid;
+
+ kid = volgen_graph_add_nolink (graph, "protocol/client",
+ "%s-client-%u", volname,
+ index++);
+ if (!kid) {
+ return NULL;
+ }
+
+ /* TBD: figure out where to get the proper transport list */
+ if (xlator_set_option(kid, "transport-type", "socket")) {
+ return NULL;
+ }
+ if (xlator_set_option(kid, "remote-host", peer->hostname)) {
+ return NULL;
+ }
+ if (xlator_set_option(kid, "remote-subvolume", peer->path)) {
+ return NULL;
+ }
+ /* TBD: deal with RDMA, SSL */
+
+ return kid;
+}
+
+int
+add_jbr_stuff (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo)
+{
+ xlator_t *me;
+ glusterd_brickinfo_t *peer;
+ glusterd_brickinfo_t *prev_peer;
+ char *leader_opt;
+ uint16_t index = 0;
+ xlator_t *kid;
+
+ /* Create the JBR xlator, but defer linkage for now. */
+ me = xlator_instantiate ("experimental/jbr", "%s-jbr",
+ volinfo->volname);
+ if (!me || volgen_xlator_link(me, first_of(graph))) {
+ return -1;
+ }
+
+ /* Figure out if we should start as leader, mark appropriately. */
+ peer = list_prev (brickinfo, &volinfo->bricks,
+ glusterd_brickinfo_t, brick_list);
+ leader_opt = (!peer || (peer->group != brickinfo->group)) ? "yes"
+ : "no";
+ if (xlator_set_option(me, "leader", leader_opt)) {
+ /*
+ * TBD: fix memory leak ("me" and associated dictionary)
+ * There seems to be no function already to clean up a
+ * just-allocated translator object if something else fails.
+ * Apparently the convention elsewhere in this file is to return
+ * without freeing anything, but we can't keep being that sloppy
+ * forever.
+ */
+ return -1;
+ }
+
+ /*
+ * Make sure we're at the beginning of the list of bricks in this
+ * replica set. This way all bricks' volfiles have peers in a
+ * consistent order.
+ */
+ peer = brickinfo;
+ for (;;) {
+ prev_peer = list_prev (peer, &volinfo->bricks,
+ glusterd_brickinfo_t, brick_list);
+ if (!prev_peer || (prev_peer->group != brickinfo->group)) {
+ break;
+ }
+ peer = prev_peer;
+ }
+
+ /* Actually add the peers. */
+ do {
+ if (peer != brickinfo) {
+ gf_log ("glusterd", GF_LOG_INFO,
+ "%s:%s needs client for %s:%s",
+ brickinfo->hostname, brickinfo->path,
+ peer->hostname, peer->path);
+ kid = add_one_peer (graph, peer,
+ volinfo->volname, index++);
+ if (!kid || volgen_xlator_link(me, kid)) {
+ return -1;
+ }
+ }
+ peer = list_next (peer, &volinfo->bricks,
+ glusterd_brickinfo_t, brick_list);
+ } while (peer && (peer->group == brickinfo->group));
+
+ /* Finish linkage to client file. */
+ glusterfs_graph_set_first(&graph->graph, me);
+
+ return 0;
+}
+
+static int
+brick_graph_add_index (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *set_dict, glusterd_brickinfo_t *brickinfo)
+{
+ xlator_t *xl = NULL;
+ char *pending_xattr = NULL;
+ char index_basepath[PATH_MAX] = {0};
+ int ret = -1;
+
+ if (!graph || !volinfo || !brickinfo || !set_dict)
+ goto out;
+
+ /* For JBR we don't need/want index. */
+ if (glusterd_volinfo_get_boolean(volinfo, "cluster.jbr") > 0) {
+ return add_jbr_stuff (graph, volinfo, brickinfo);
+ }
+
+ xl = volgen_graph_add (graph, "features/index", volinfo->volname);
+ if (!xl)
+ goto out;
+
+ snprintf (index_basepath, sizeof (index_basepath), "%s/%s",
+ brickinfo->path, ".glusterfs/indices");
+ ret = xlator_set_option (xl, "index-base", index_basepath);
+ if (ret)
+ goto out;
+ if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) {
+ ret = xlator_set_option (xl, "xattrop64-watchlist",
+ "trusted.ec.dirty");
+ if (ret)
+ goto out;
+ }
+ if ((volinfo->type == GF_CLUSTER_TYPE_STRIPE_REPLICATE ||
+ volinfo->type == GF_CLUSTER_TYPE_REPLICATE)) {
+ ret = xlator_set_option (xl, "xattrop-dirty-watchlist",
+ "trusted.afr.dirty");
+ if (ret)
+ goto out;
+ ret = gf_asprintf (&pending_xattr, "trusted.afr.%s-",
+ volinfo->volname);
+ if (ret < 0)
+ goto out;
+ ret = xlator_set_option (xl, "xattrop-pending-watchlist",
+ pending_xattr);
+ if (ret)
+ goto out;
+ }
+out:
+ GF_FREE (pending_xattr);
+ return ret;
+}
+
+static int
+brick_graph_add_marker (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *set_dict, glusterd_brickinfo_t *brickinfo)
+{
+ int ret = -1;
+ xlator_t *xl = NULL;
+ char tstamp_file[PATH_MAX] = {0,};
+ char volume_id[64] = {0,};
+ char buf[32] = {0,};
+
+ if (!graph || !volinfo || !set_dict)
+ goto out;
+
+ xl = volgen_graph_add (graph, "features/marker", volinfo->volname);
+ if (!xl)
+ goto out;
+
+ gf_uuid_unparse (volinfo->volume_id, volume_id);
+ ret = xlator_set_option (xl, "volume-uuid", volume_id);
+ if (ret)
+ goto out;
+ get_vol_tstamp_file (tstamp_file, volinfo);
+ ret = xlator_set_option (xl, "timestamp-file", tstamp_file);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->quota_xattr_version);
+ ret = xlator_set_option (xl, "quota-version", buf);
+ if (ret)
+ goto out;
+
+out:
+ return ret;
+}
+
+static int
+brick_graph_add_quota (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *set_dict, glusterd_brickinfo_t *brickinfo)
+{
+ int ret = -1;
+ xlator_t *xl = NULL;
+ char *value = NULL;
+
+ if (!graph || !volinfo || !set_dict)
+ goto out;
+
+ xl = volgen_graph_add (graph, "features/quota", volinfo->volname);
+ if (!xl)
+ goto out;
+
+ ret = xlator_set_option (xl, "volume-uuid", volinfo->volname);
+ if (ret)
+ goto out;
+
+ ret = glusterd_volinfo_get (volinfo, VKEY_FEATURES_QUOTA, &value);
+ if (value) {
+ ret = xlator_set_option (xl, "server-quota", value);
+ if (ret)
+ goto out;
+ }
+out:
+ return ret;
+}
+
+static int
+brick_graph_add_ro (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *set_dict, glusterd_brickinfo_t *brickinfo)
+{
+ int ret = -1;
+ xlator_t *xl = NULL;
+
+ if (!graph || !volinfo || !set_dict)
+ goto out;
+
+ if (dict_get_str_boolean (set_dict, "features.read-only", 0) &&
+ (dict_get_str_boolean (set_dict, "features.worm", 0) ||
+ dict_get_str_boolean (set_dict, "features.worm-file-level", 0))) {
+ gf_msg (THIS->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED,
+ "read-only and worm cannot be set together");
+ ret = -1;
+ goto out;
+ }
+
+ xl = volgen_graph_add (graph, "features/read-only", volinfo->volname);
+ if (!xl)
+ return -1;
+ ret = xlator_set_option (xl, "read-only", "off");
+ if (ret)
+ return -1;
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static int
+brick_graph_add_worm (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *set_dict, glusterd_brickinfo_t *brickinfo)
+{
+ int ret = -1;
+ xlator_t *xl = NULL;
+
+ if (!graph || !volinfo || !set_dict)
+ goto out;
+
+ if (dict_get_str_boolean (set_dict, "features.read-only", 0) &&
+ (dict_get_str_boolean (set_dict, "features.worm", 0) ||
+ dict_get_str_boolean (set_dict, "features.worm-file-level", 0))) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_INCOMPATIBLE_VALUE,
+ "read-only and worm cannot be set together");
+ ret = -1;
+ goto out;
+ }
+
+ xl = volgen_graph_add (graph, "features/worm", volinfo->volname);
+ if (!xl)
+ return -1;
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static int
+brick_graph_add_cdc (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *set_dict, glusterd_brickinfo_t *brickinfo)
+{
+ int ret = -1;
+ xlator_t *xl = NULL;
+
+ if (!graph || !volinfo || !set_dict)
+ goto out;
+
+ /* Check for compress volume option, and add it to the graph on
+ * server side */
+ ret = dict_get_str_boolean (set_dict, "network.compression", 0);
+ if (ret == -1)
+ goto out;
+ if (ret) {
+ xl = volgen_graph_add (graph, "features/cdc",
+ volinfo->volname);
+ if (!xl) {
+ ret = -1;
+ goto out;
+ }
+ ret = xlator_set_option (xl, "mode", "server");
+ if (ret)
+ goto out;
+ }
+out:
+ return ret;
+}
+
+static int
+brick_graph_add_io_stats (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *set_dict, glusterd_brickinfo_t *brickinfo)
+{
+ int ret = -1;
+ xlator_t *xl = NULL;
+
+ if (!graph || !volinfo || !set_dict || !brickinfo)
+ goto out;
+
+ xl = volgen_graph_add_as (graph, "debug/io-stats", brickinfo->path);
+ if (!xl)
+ goto out;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+brick_graph_add_upcall (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *set_dict, glusterd_brickinfo_t *brickinfo)
+{
+
+ xlator_t *xl = NULL;
+ int ret = -1;
+
+ if (!graph || !volinfo || !set_dict)
+ goto out;
+
+ xl = volgen_graph_add (graph, "features/upcall", volinfo->volname);
+ if (!xl) {
+ gf_msg ("glusterd", GF_LOG_WARNING, 0,
+ GD_MSG_GRAPH_FEATURE_ADD_FAIL,
+ "failed to add features/upcall to graph");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+brick_graph_add_leases (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *set_dict, glusterd_brickinfo_t *brickinfo)
+{
+
+ xlator_t *xl = NULL;
+ int ret = -1;
+
+ if (!graph || !volinfo || !set_dict)
+ goto out;
+
+ xl = volgen_graph_add (graph, "features/leases", volinfo->volname);
+ if (!xl) {
+ gf_msg ("glusterd", GF_LOG_WARNING, 0,
+ GD_MSG_GRAPH_FEATURE_ADD_FAIL,
+ "failed to add features/leases to graph");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+brick_graph_add_server (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *set_dict, glusterd_brickinfo_t *brickinfo)
+{
+ int ret = -1;
+ xlator_t *xl = NULL;
+ char transt[16] = {0,};
+ char *username = NULL;
+ char *password = NULL;
+ char key[1024] = {0};
+ char *ssl_user = NULL;
+ char *value = NULL;
+ char *address_family_data = NULL;
+
+ if (!graph || !volinfo || !set_dict || !brickinfo)
+ goto out;
+
+ get_vol_transport_type (volinfo, transt);
+
+ username = glusterd_auth_get_username (volinfo);
+ password = glusterd_auth_get_password (volinfo);
+
+ xl = volgen_graph_add (graph, "protocol/server", volinfo->volname);
+ if (!xl)
+ goto out;
+
+ ret = xlator_set_option (xl, "transport-type", transt);
+ if (ret)
+ goto out;
+
+ /*In the case of running multiple glusterds on a single machine,
+ * we should ensure that bricks don't listen on all IPs on that
+ * machine and break the IP based separation being brought about.*/
+ if (dict_get (THIS->options, "transport.socket.bind-address")) {
+ ret = xlator_set_option (xl, "transport.socket.bind-address",
+ brickinfo->hostname);
+ if (ret)
+ return -1;
+ }
+
+ RPC_SET_OPT(xl, SSL_OWN_CERT_OPT, "ssl-own-cert", return -1);
+ RPC_SET_OPT(xl, SSL_PRIVATE_KEY_OPT,"ssl-private-key", return -1);
+ RPC_SET_OPT(xl, SSL_CA_LIST_OPT, "ssl-ca-list", return -1);
+ RPC_SET_OPT(xl, SSL_CRL_PATH_OPT, "ssl-crl-path", return -1);
+ RPC_SET_OPT(xl, SSL_CERT_DEPTH_OPT, "ssl-cert-depth", return -1);
+ RPC_SET_OPT(xl, SSL_CIPHER_LIST_OPT,"ssl-cipher-list", return -1);
+ RPC_SET_OPT(xl, SSL_DH_PARAM_OPT, "ssl-dh-param", return -1);
+ RPC_SET_OPT(xl, SSL_EC_CURVE_OPT, "ssl-ec-curve", return -1);
+
+ if (dict_get_str (volinfo->dict, "transport.address-family",
+ &address_family_data) == 0) {
+ ret = xlator_set_option (xl, "transport.address-family",
+ address_family_data);
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_WARNING,
+ "failed to set transport.address-family");
+ return -1;
+ }
+ }
+
+ if (username) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "auth.login.%s.allow",
+ brickinfo->path);
+
+ ret = xlator_set_option (xl, key, username);
+ if (ret)
+ return -1;
+ }
+
+ if (password) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "auth.login.%s.password",
+ username);
+
+ ret = xlator_set_option (xl, key, password);
+ if (ret)
+ return -1;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "auth-path");
+
+ ret = xlator_set_option (xl, key, brickinfo->path);
+ if (ret)
+ return -1;
+
+ if (dict_get_str (volinfo->dict, "auth.ssl-allow", &ssl_user) == 0) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "auth.login.%s.ssl-allow",
+ brickinfo->path);
+
+ ret = xlator_set_option (xl, key, ssl_user);
+ if (ret)
+ return -1;
+ }
+
+out:
+ return ret;
+}
+
+static int
+brick_graph_add_pump (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *set_dict, glusterd_brickinfo_t *brickinfo)
+{
+ int ret = -1;
+ int pump = 0;
+ xlator_t *xl = NULL;
+ xlator_t *txl = NULL;
+ xlator_t *rbxl = NULL;
+ char *username = NULL;
+ char *password = NULL;
+ char *ptranst = NULL;
+ char *value = NULL;
+ char *address_family_data = NULL;
+
+
+ if (!graph || !volinfo || !set_dict)
+ goto out;
+
+ ret = dict_get_int32 (volinfo->dict, "enable-pump", &pump);
+ if (ret == -ENOENT)
+ ret = pump = 0;
+ if (ret)
+ return -1;
+
+ username = glusterd_auth_get_username (volinfo);
+ password = glusterd_auth_get_password (volinfo);
+
+ if (pump) {
+ txl = first_of (graph);
+
+ rbxl = volgen_graph_add_nolink (graph, "protocol/client",
+ "%s-replace-brick",
+ volinfo->volname);
+ if (!rbxl)
+ return -1;
+
+ ptranst = glusterd_get_trans_type_rb (volinfo->transport_type);
+ if (NULL == ptranst)
+ return -1;
+
+ RPC_SET_OPT(rbxl, SSL_OWN_CERT_OPT, "ssl-own-cert",
+ return -1);
+ RPC_SET_OPT(rbxl, SSL_PRIVATE_KEY_OPT,"ssl-private-key",
+ return -1);
+ RPC_SET_OPT(rbxl, SSL_CA_LIST_OPT, "ssl-ca-list",
+ return -1);
+ RPC_SET_OPT(rbxl, SSL_CRL_PATH_OPT, "ssl-crl-path",
+ return -1);
+ RPC_SET_OPT(rbxl, SSL_CERT_DEPTH_OPT, "ssl-cert-depth",
+ return -1);
+ RPC_SET_OPT(rbxl, SSL_CIPHER_LIST_OPT,"ssl-cipher-list",
+ return -1);
+ RPC_SET_OPT(rbxl, SSL_DH_PARAM_OPT, "ssl-dh-param",
+ return -1);
+ RPC_SET_OPT(rbxl, SSL_EC_CURVE_OPT, "ssl-ec-curve",
+ return -1);
+
+ if (username) {
+ ret = xlator_set_option (rbxl, "username", username);
+ if (ret)
+ return -1;
+ }
+
+ if (password) {
+ ret = xlator_set_option (rbxl, "password", password);
+ if (ret)
+ return -1;
+ }
+
+ ret = xlator_set_option (rbxl, "transport-type", ptranst);
+ GF_FREE (ptranst);
+ if (ret)
+ return -1;
+
+ if (dict_get_str (volinfo->dict, "transport.address-family",
+ &address_family_data) == 0) {
+ ret = xlator_set_option (rbxl,
+ "transport.address-family",
+ address_family_data);
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_WARNING,
+ "failed to set transport.address-family");
+ return -1;
+ }
+ }
+
+ xl = volgen_graph_add_nolink (graph, "cluster/pump", "%s-pump",
+ volinfo->volname);
+ if (!xl)
+ return -1;
+ ret = volgen_xlator_link (xl, txl);
+ if (ret)
+ return -1;
+ ret = volgen_xlator_link (xl, rbxl);
+ if (ret)
+ return -1;
+ }
+
+out:
+ return ret;
+}
+
+
+/* The order of xlator definition here determines
+ * the topology of the brick graph */
+static volgen_brick_xlator_t server_graph_table[] = {
+ {brick_graph_add_server, NULL},
+ {brick_graph_add_decompounder, "decompounder"},
+ {brick_graph_add_io_stats, "NULL"},
+ {brick_graph_add_cdc, NULL},
+ {brick_graph_add_quota, "quota"},
+ {brick_graph_add_index, "index"},
+ {brick_graph_add_barrier, NULL},
+ {brick_graph_add_marker, "marker"},
+ {brick_graph_add_fdl, "fdl"},
+ {brick_graph_add_iot, "io-threads"},
+ {brick_graph_add_upcall, "upcall"},
+ {brick_graph_add_leases, "leases"},
+ {brick_graph_add_pump, NULL},
+ {brick_graph_add_ro, NULL},
+ {brick_graph_add_worm, NULL},
+ {brick_graph_add_locks, "locks"},
+ {brick_graph_add_acl, "acl"},
+ {brick_graph_add_bitrot_stub, "bitrot-stub"},
+ {brick_graph_add_changelog, "changelog"},
+#if USE_GFDB /* changetimerecorder depends on gfdb */
+ {brick_graph_add_changetimerecorder, "changetimerecorder"},
+#endif
+ {brick_graph_add_bd, "bd"},
+ {brick_graph_add_trash, "trash"},
+ {brick_graph_add_arbiter, "arbiter"},
+ {brick_graph_add_posix, "posix"},
+};
+
+static glusterd_server_xlator_t
+get_server_xlator (char *xlator)
+{
+ int i = 0;
+ int size = sizeof (server_graph_table)/sizeof (server_graph_table[0]);
+
+ for (i = 0; i < size; i++) {
+ if (!server_graph_table[i].dbg_key)
+ continue;
+ if (strcmp (xlator, server_graph_table[i].dbg_key))
+ return GF_XLATOR_SERVER;
+ }
+
+ return GF_XLATOR_NONE;
+}
+
+static glusterd_client_xlator_t
+get_client_xlator (char *xlator)
+{
+ glusterd_client_xlator_t subvol = GF_CLNT_XLATOR_NONE;
+
+ if (strcmp (xlator, "client") == 0)
+ subvol = GF_CLNT_XLATOR_FUSE;
+
+ return subvol;
+}
+
+static int
+debugxl_option_handler (volgen_graph_t *graph, struct volopt_map_entry *vme,
+ void *param)
+{
+ char *volname = NULL;
+ gf_boolean_t enabled = _gf_false;
+
+ volname = param;
+
+ if (strcmp (vme->option, "!debug") != 0)
+ return 0;
+
+ if (!strcmp (vme->key , "debug.trace") ||
+ !strcmp (vme->key, "debug.error-gen")) {
+ if (get_server_xlator (vme->value) == GF_XLATOR_NONE &&
+ get_client_xlator (vme->value) == GF_CLNT_XLATOR_NONE)
+ return 0;
+ else
+ goto add_graph;
+ }
+
+ if (gf_string2boolean (vme->value, &enabled) == -1)
+ return -1;
+ if (!enabled)
+ return 0;
+
+add_graph:
+ if (volgen_graph_add (graph, vme->voltype, volname))
+ return 0;
+ else
+ return -1;
+}
+
+int
+check_and_add_debug_xl (volgen_graph_t *graph, dict_t *set_dict, char *volname,
+ char *xlname)
+{
+ int ret = 0;
+ char *value_str = NULL;
+
+ if (!xlname)
+ goto out;
+
+ ret = dict_get_str (set_dict, "debug.trace", &value_str);
+ if (!ret) {
+ if (strcmp (xlname, value_str) == 0) {
+ ret = volgen_graph_set_options_generic (graph,
+ set_dict, volname,
+ &debugxl_option_handler);
+ if (ret)
+ goto out;
+ }
+ }
+
+ ret = dict_get_str (set_dict, "debug.error-gen", &value_str);
+ if (!ret) {
+ if (strcmp (xlname, value_str) == 0) {
+ ret = volgen_graph_set_options_generic (graph,
+ set_dict, volname,
+ &debugxl_option_handler);
+ if (ret)
+ goto out;
+ }
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static int
+server_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *set_dict, void *param)
+{
+ int ret = 0;
+ char *xlator = NULL;
+ char *loglevel = NULL;
+ int i = 0;
+
+
+ i = sizeof (server_graph_table)/sizeof (server_graph_table[0]) - 1;
+
+ while (i >= 0) {
+ ret = server_graph_table[i].builder (graph, volinfo, set_dict,
+ param);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_BUILD_GRAPH_FAILED, "Builing graph "
+ "failed for server graph table entry: %d", i);
+ goto out;
+ }
+
+ ret = check_and_add_debug_xl (graph, set_dict,
+ volinfo->volname,
+ server_graph_table[i].dbg_key);
+ if (ret)
+ goto out;
+
+ i--;
+ }
+
+
+ ret = dict_get_str (set_dict, "xlator", &xlator);
+
+ /* got a cli log level request */
+ if (!ret) {
+ ret = dict_get_str (set_dict, "loglevel", &loglevel);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED, "could not get both"
+ " translator name and loglevel for log level request");
+ goto out;
+ }
+ }
+
+ ret = volgen_graph_set_options_generic (graph, set_dict,
+ (xlator && loglevel) ? (void *)set_dict : volinfo,
+ (xlator && loglevel) ? &server_spec_extended_option_handler :
+ &server_spec_option_handler);
+
+ out:
+ return ret;
+}
+
+
+/* builds a graph for server role , with option overrides in mod_dict */
+static int
+build_server_graph (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *mod_dict, glusterd_brickinfo_t *brickinfo)
+{
+ return build_graph_generic (graph, volinfo, mod_dict, brickinfo,
+ &server_graph_builder);
+}
+
+static int
+perfxl_option_handler (volgen_graph_t *graph, struct volopt_map_entry *vme,
+ void *param)
+{
+ gf_boolean_t enabled = _gf_false;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ GF_ASSERT (param);
+ volinfo = param;
+
+ if (strcmp (vme->option, "!perf") != 0)
+ return 0;
+
+ if (gf_string2boolean (vme->value, &enabled) == -1)
+ return -1;
+ if (!enabled)
+ return 0;
+
+ /* Check op-version before adding the 'open-behind' xlator in the graph
+ */
+ if (!strcmp (vme->key, "performance.open-behind") &&
+ (vme->op_version > volinfo->client_op_version))
+ return 0;
+
+ if (volgen_graph_add (graph, vme->voltype, volinfo->volname))
+ return 0;
+ else
+ return -1;
+}
+
+static int
+nfsperfxl_option_handler (volgen_graph_t *graph, struct volopt_map_entry *vme,
+ void *param)
+{
+ char *volname = NULL;
+ gf_boolean_t enabled = _gf_false;
+
+ volname = param;
+
+ if (strcmp (vme->option, "!nfsperf") != 0)
+ return 0;
+
+ if (gf_string2boolean (vme->value, &enabled) == -1)
+ return -1;
+ if (!enabled)
+ return 0;
+
+ if (volgen_graph_add (graph, vme->voltype, volname))
+ return 0;
+ else
+ return -1;
+}
+
+#if (HAVE_LIB_XML)
+int
+end_sethelp_xml_doc (xmlTextWriterPtr writer)
+{
+ int ret = -1;
+
+ ret = xmlTextWriterEndElement(writer);
+ if (ret < 0) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_XML_TEXT_WRITE_FAIL, "Could not end an "
+ "xmlElement");
+ ret = -1;
+ goto out;
+ }
+ ret = xmlTextWriterEndDocument (writer);
+ if (ret < 0) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_XML_TEXT_WRITE_FAIL, "Could not end an "
+ "xmlDocument");
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+ out:
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+
+}
+
+int
+init_sethelp_xml_doc (xmlTextWriterPtr *writer, xmlBufferPtr *buf)
+{
+ int ret;
+
+ *buf = xmlBufferCreateSize (8192);
+ if (buf == NULL) {
+ gf_msg ("glusterd", GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY, "Error creating the xml "
+ "buffer");
+ ret = -1;
+ goto out;
+ }
+
+ xmlBufferSetAllocationScheme (*buf,XML_BUFFER_ALLOC_DOUBLEIT);
+
+ *writer = xmlNewTextWriterMemory(*buf, 0);
+ if (writer == NULL) {
+ gf_msg ("glusterd", GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY, " Error creating the xml "
+ "writer");
+ ret = -1;
+ goto out;
+ }
+
+ ret = xmlTextWriterStartDocument(*writer, "1.0", "UTF-8", "yes");
+ if (ret < 0) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_XML_DOC_START_FAIL, "Error While starting the "
+ "xmlDoc");
+ goto out;
+ }
+
+ ret = xmlTextWriterStartElement(*writer, (xmlChar *)"options");
+ if (ret < 0) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_XML_ELE_CREATE_FAIL, "Could not create an "
+ "xmlElement");
+ ret = -1;
+ goto out;
+ }
+
+
+ ret = 0;
+
+ out:
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+
+}
+
+int
+xml_add_volset_element (xmlTextWriterPtr writer, const char *name,
+ const char *def_val, const char *dscrpt)
+{
+
+ int ret = -1;
+
+ GF_ASSERT (name);
+
+ ret = xmlTextWriterStartElement(writer, (xmlChar *) "option");
+ if (ret < 0) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_XML_ELE_CREATE_FAIL, "Could not create an "
+ "xmlElemetnt");
+ ret = -1;
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar*)"defaultValue",
+ "%s", def_val);
+ if (ret < 0) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_XML_ELE_CREATE_FAIL, "Could not create an "
+ "xmlElemetnt");
+ ret = -1;
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"description",
+ "%s", dscrpt );
+ if (ret < 0) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_XML_ELE_CREATE_FAIL, "Could not create an "
+ "xmlElemetnt");
+ ret = -1;
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *) "name", "%s",
+ name);
+ if (ret < 0) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_XML_ELE_CREATE_FAIL, "Could not create an "
+ "xmlElemetnt");
+ ret = -1;
+ goto out;
+ }
+
+ ret = xmlTextWriterEndElement(writer);
+ if (ret < 0) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_XML_ELE_CREATE_FAIL, "Could not end an "
+ "xmlElemetnt");
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+ out:
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+
+}
+
+#endif
+
+int
+_get_xlator_opt_key_from_vme ( struct volopt_map_entry *vme, char **key)
+{
+ int ret = 0;
+
+ GF_ASSERT (vme);
+ GF_ASSERT (key);
+
+
+ if (!strcmp (vme->key, AUTH_ALLOW_MAP_KEY))
+ *key = gf_strdup (AUTH_ALLOW_OPT_KEY);
+ else if (!strcmp (vme->key, AUTH_REJECT_MAP_KEY))
+ *key = gf_strdup (AUTH_REJECT_OPT_KEY);
+ else if (!strcmp (vme->key, NFS_DISABLE_MAP_KEY))
+ *key = gf_strdup (NFS_DISABLE_OPT_KEY);
+ else {
+ if (vme->option) {
+ if (vme->option[0] == '!') {
+ *key = vme->option + 1;
+ if (!*key[0])
+ ret = -1;
+ } else {
+ *key = vme->option;
+ }
+ } else {
+ *key = strchr (vme->key, '.');
+ if (*key) {
+ (*key) ++;
+ if (!*key[0])
+ ret = -1;
+ } else {
+ ret = -1;
+ }
+ }
+ }
+ if (ret)
+ gf_msg ("glusterd", GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "Wrong entry found in "
+ "glusterd_volopt_map entry %s", vme->key);
+ else
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+
+ return ret;
+}
+
+void
+_free_xlator_opt_key (char *key)
+{
+ GF_ASSERT (key);
+
+ if (!strcmp (key, AUTH_ALLOW_OPT_KEY) ||
+ !strcmp (key, AUTH_REJECT_OPT_KEY) ||
+ !strcmp (key, NFS_DISABLE_OPT_KEY))
+ GF_FREE (key);
+
+ return;
+}
+
+static xlator_t *
+volgen_graph_build_client (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ char *hostname, char *subvol, char *xl_id,
+ char *transt, dict_t *set_dict)
+{
+ xlator_t *xl = NULL;
+ int ret = -2;
+ uint32_t client_type = GF_CLIENT_OTHER;
+ char *str = NULL;
+ char *ssl_str = NULL;
+ gf_boolean_t ssl_bool = _gf_false;
+ char *value = NULL;
+ char *address_family_data = NULL;
+
+ GF_ASSERT (graph);
+ GF_ASSERT (subvol);
+ GF_ASSERT (xl_id);
+ GF_ASSERT (transt);
+
+ xl = volgen_graph_add_nolink (graph, "protocol/client",
+ "%s", xl_id);
+ if (!xl)
+ goto err;
+
+ ret = xlator_set_option (xl, "ping-timeout", "42");
+ if (ret)
+ goto err;
+
+ if (hostname) {
+ ret = xlator_set_option (xl, "remote-host", hostname);
+ if (ret)
+ goto err;
+ }
+
+ ret = xlator_set_option (xl, "remote-subvolume", subvol);
+ if (ret)
+ goto err;
+
+ ret = xlator_set_option (xl, "transport-type", transt);
+ if (ret)
+ goto err;
+
+ if (dict_get_str (volinfo->dict, "transport.address-family",
+ &address_family_data) == 0) {
+ ret = xlator_set_option (xl,
+ "transport.address-family",
+ address_family_data);
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_WARNING,
+ "failed to set transport.address-family");
+ goto err;
+ }
+ }
+
+ ret = dict_get_uint32 (set_dict, "trusted-client",
+ &client_type);
+
+ if (!ret && client_type == GF_CLIENT_TRUSTED) {
+ str = NULL;
+ str = glusterd_auth_get_username (volinfo);
+ if (str) {
+ ret = xlator_set_option (xl, "username",
+ str);
+ if (ret)
+ goto err;
+ }
+
+ str = glusterd_auth_get_password (volinfo);
+ if (str) {
+ ret = xlator_set_option (xl, "password",
+ str);
+ if (ret)
+ goto err;
+ }
+ }
+
+ if (dict_get_str(set_dict,"client.ssl",&ssl_str) == 0) {
+ if (gf_string2boolean(ssl_str,&ssl_bool) == 0) {
+ if (ssl_bool) {
+ ret = xlator_set_option(xl,
+ "transport.socket.ssl-enabled",
+ "true");
+ if (ret) {
+ goto err;
+ }
+ }
+ }
+ }
+
+ RPC_SET_OPT(xl, SSL_OWN_CERT_OPT, "ssl-own-cert", goto err);
+ RPC_SET_OPT(xl, SSL_PRIVATE_KEY_OPT,"ssl-private-key", goto err);
+ RPC_SET_OPT(xl, SSL_CA_LIST_OPT, "ssl-ca-list", goto err);
+ RPC_SET_OPT(xl, SSL_CRL_PATH_OPT, "ssl-crl-path", goto err);
+ RPC_SET_OPT(xl, SSL_CERT_DEPTH_OPT, "ssl-cert-depth", goto err);
+ RPC_SET_OPT(xl, SSL_CIPHER_LIST_OPT,"ssl-cipher-list", goto err);
+ RPC_SET_OPT(xl, SSL_DH_PARAM_OPT, "ssl-dh-param", goto err);
+ RPC_SET_OPT(xl, SSL_EC_CURVE_OPT, "ssl-ec-curve", goto err);
+
+ return xl;
+err:
+ return NULL;
+}
+
+static int
+volgen_graph_build_clients (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *set_dict, void *param)
+{
+ int i = 0;
+ int ret = -1;
+ char transt[16] = {0,};
+ char *volname = NULL;
+ glusterd_brickinfo_t *brick = NULL;
+ xlator_t *xl = NULL;
+
+ volname = volinfo->volname;
+
+ if (volinfo->brick_count == 0) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_VOLUME_INCONSISTENCY,
+ "volume inconsistency: brick count is 0");
+ goto out;
+ }
+
+ if ((volinfo->type != GF_CLUSTER_TYPE_TIER) &&
+ (volinfo->dist_leaf_count < volinfo->brick_count) &&
+ ((volinfo->brick_count % volinfo->dist_leaf_count) != 0)) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_VOLUME_INCONSISTENCY,
+ "volume inconsistency: "
+ "total number of bricks (%d) is not divisible with "
+ "number of bricks per cluster (%d) in a multi-cluster "
+ "setup",
+ volinfo->brick_count, volinfo->dist_leaf_count);
+ goto out;
+ }
+
+ get_transport_type (volinfo, set_dict, transt, _gf_false);
+
+ if (!strcmp (transt, "tcp,rdma"))
+ strcpy (transt, "tcp");
+
+ i = 0;
+ cds_list_for_each_entry (brick, &volinfo->bricks, brick_list) {
+ xl = volgen_graph_build_client (graph, volinfo,
+ brick->hostname, brick->path,
+ brick->brick_id,
+ transt, set_dict);
+ if (!xl) {
+ ret = -1;
+ goto out;
+ }
+
+ i++;
+ }
+
+ if (i != volinfo->brick_count) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_VOLUME_INCONSISTENCY,
+ "volume inconsistency: actual number of bricks (%d) "
+ "differs from brick count (%d)", i,
+ volinfo->brick_count);
+
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+volgen_graph_build_clients_for_tier_shd (volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo,
+ dict_t *set_dict)
+{
+ int ret = 0;
+ glusterd_volinfo_t *dup_volinfo = NULL;
+ gf_boolean_t is_hot_tier = _gf_false;
+ gf_boolean_t is_hot_shd = _gf_false;
+ gf_boolean_t is_cold_shd = _gf_false;
+
+ is_cold_shd = glusterd_is_shd_compatible_type
+ (volinfo->tier_info.cold_type);
+ is_hot_shd = glusterd_is_shd_compatible_type
+ (volinfo->tier_info.hot_type);
+
+ if (is_cold_shd && is_hot_shd) {
+ ret = volgen_graph_build_clients (graph, volinfo,
+ set_dict, NULL);
+ return ret;
+ }
+
+ if (is_cold_shd) {
+ ret = glusterd_create_sub_tier_volinfo (volinfo, &dup_volinfo,
+ is_hot_tier,
+ volinfo->volname);
+ if (ret)
+ goto out;
+ ret = volgen_graph_build_clients (graph, dup_volinfo,
+ set_dict, NULL);
+ if (ret)
+ goto out;
+ }
+ if (is_hot_shd) {
+ is_hot_tier = _gf_true;
+ ret = glusterd_create_sub_tier_volinfo (volinfo, &dup_volinfo,
+ is_hot_tier,
+ volinfo->volname);
+ if (ret)
+ goto out;
+ ret = volgen_graph_build_clients (graph, dup_volinfo,
+ set_dict, NULL);
+ if (ret)
+ goto out;
+ }
+out:
+ if (dup_volinfo)
+ glusterd_volinfo_delete (dup_volinfo);
+ return ret;
+}
+
+static int
+volgen_link_bricks (volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo, char *xl_type,
+ char *xl_namefmt, size_t child_count,
+ size_t sub_count, size_t start_count,
+ xlator_t *trav)
+{
+ int i = 0;
+ int j = start_count;
+ xlator_t *xl = NULL;
+ char *volname = NULL;
+ int ret = -1;
+
+ if (child_count == 0)
+ goto out;
+ volname = volinfo->volname;
+
+ for (;; trav = trav->prev) {
+ if ((i % sub_count) == 0) {
+ xl = volgen_graph_add_nolink (graph, xl_type,
+ xl_namefmt, volname, j);
+ if (!xl) {
+ ret = -1;
+ goto out;
+ }
+ j++;
+ }
+
+ ret = volgen_xlator_link (xl, trav);
+ if (ret)
+ goto out;
+
+ i++;
+ if (i == child_count)
+ break;
+ }
+
+ ret = j - start_count;
+out:
+ return ret;
+}
+
+static int
+volgen_link_bricks_from_list_tail_start (volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo,
+ char *xl_type,
+ char *xl_namefmt, size_t child_count,
+ size_t sub_count, size_t start_count)
+{
+ xlator_t *trav = NULL;
+ size_t cnt = child_count;
+
+ if (!cnt)
+ return -1;
+
+ for (trav = first_of(graph); --cnt; trav = trav->next)
+ ;
+
+ return volgen_link_bricks (graph, volinfo,
+ xl_type,
+ xl_namefmt,
+ child_count,
+ sub_count, start_count,
+ trav);
+}
+
+static int
+volgen_link_bricks_from_list_head_start (volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo,
+ char *xl_type,
+ char *xl_namefmt, size_t child_count,
+ size_t sub_count, size_t start_count)
+{
+ xlator_t *trav = NULL;
+
+ for (trav = first_of(graph); trav->next; trav = trav->next)
+ ;
+
+ return volgen_link_bricks (graph, volinfo,
+ xl_type,
+ xl_namefmt,
+ child_count,
+ sub_count, start_count,
+ trav);
+}
+
+static int
+volgen_link_bricks_from_list_tail (volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo,
+ char *xl_type,
+ char *xl_namefmt, size_t child_count,
+ size_t sub_count)
+{
+ xlator_t *trav = NULL;
+ size_t cnt = child_count;
+
+ if (!cnt)
+ return -1;
+
+ for (trav = first_of(graph); --cnt; trav = trav->next)
+ ;
+
+ return volgen_link_bricks (graph, volinfo,
+ xl_type,
+ xl_namefmt,
+ child_count,
+ sub_count, 0,
+ trav);
+}
+
+static int
+volgen_link_bricks_from_list_head (volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo, char *xl_type,
+ char *xl_namefmt, size_t child_count,
+ size_t sub_count)
+{
+ xlator_t *trav = NULL;
+
+ for (trav = first_of(graph); trav->next; trav = trav->next)
+ ;
+
+ return volgen_link_bricks (graph, volinfo,
+ xl_type,
+ xl_namefmt,
+ child_count,
+ sub_count, 0,
+ trav);
+}
+
+/**
+ * This is the build graph function for user-serviceable snapshots.
+ * Generates snapview-client
+ */
+static int
+volgen_graph_build_snapview_client (volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo,
+ char *volname, dict_t *set_dict)
+{
+ int ret = 0;
+ xlator_t *prev_top = NULL;
+ xlator_t *prot_clnt = NULL;
+ xlator_t *svc = NULL;
+ char transt [16] = {0,};
+ char *svc_args[] = {"features/snapview-client",
+ "%s-snapview-client"};
+ char subvol [1024] = {0,};
+ char xl_id [1024] = {0,};
+
+ prev_top = (xlator_t *)(graph->graph.first);
+
+ snprintf (subvol, sizeof (subvol), "snapd-%s", volinfo->volname);
+ snprintf (xl_id, sizeof (xl_id), "%s-snapd-client", volinfo->volname);
+
+ get_transport_type (volinfo, set_dict, transt, _gf_false);
+
+ prot_clnt = volgen_graph_build_client (graph, volinfo, NULL, subvol,
+ xl_id, transt, set_dict);
+ if (!prot_clnt) {
+ ret = -1;
+ goto out;
+ }
+
+ svc = volgen_graph_add_nolink (graph, svc_args[0], svc_args[1],
+ volname);
+ if (!svc) {
+ ret = -1;
+ goto out;
+ }
+
+ /**
+ * Ordering the below two traslators (cur_top & prot_clnt) is important
+ * as snapview client implementation is built on the policy that
+ * normal volume path goes to FIRST_CHILD and snap world operations
+ * goes to SECOND_CHILD
+ **/
+ ret = volgen_xlator_link (graph->graph.first, prev_top);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_XLATOR_LINK_FAIL, "failed to link the "
+ "snapview-client to distribute");
+ goto out;
+ }
+
+ ret = volgen_xlator_link (graph->graph.first, prot_clnt);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_XLATOR_LINK_FAIL, "failed to link the "
+ "snapview-client to snapview-server");
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+gf_boolean_t
+_xl_is_client_decommissioned (xlator_t *xl, glusterd_volinfo_t *volinfo)
+{
+ int ret = 0;
+ gf_boolean_t decommissioned = _gf_false;
+ char *hostname = NULL;
+ char *path = NULL;
+
+ GF_ASSERT (!strcmp (xl->type, "protocol/client"));
+ ret = xlator_get_option (xl, "remote-host", &hostname);
+ if (ret) {
+ GF_ASSERT (0);
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_REMOTE_HOST_GET_FAIL,
+ "Failed to get remote-host "
+ "from client %s", xl->name);
+ goto out;
+ }
+ ret = xlator_get_option (xl, "remote-subvolume", &path);
+ if (ret) {
+ GF_ASSERT (0);
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_REMOTE_HOST_GET_FAIL,
+ "Failed to get remote-host "
+ "from client %s", xl->name);
+ goto out;
+ }
+
+ decommissioned = glusterd_is_brick_decommissioned (volinfo, hostname,
+ path);
+out:
+ return decommissioned;
+}
+
+gf_boolean_t
+_xl_has_decommissioned_clients (xlator_t *xl, glusterd_volinfo_t *volinfo)
+{
+ xlator_list_t *xl_child = NULL;
+ gf_boolean_t decommissioned = _gf_false;
+ xlator_t *cxl = NULL;
+
+ if (!xl)
+ goto out;
+
+ if (!strcmp (xl->type, "protocol/client")) {
+ decommissioned = _xl_is_client_decommissioned (xl, volinfo);
+ goto out;
+ }
+
+ xl_child = xl->children;
+ while (xl_child) {
+ cxl = xl_child->xlator;
+ /* this can go into 2 depths if the volume type
+ is stripe-replicate */
+ decommissioned = _xl_has_decommissioned_clients (cxl, volinfo);
+ if (decommissioned)
+ break;
+
+ xl_child = xl_child->next;
+ }
+out:
+ return decommissioned;
+}
+
+static int
+_graph_get_decommissioned_children (xlator_t *dht, glusterd_volinfo_t *volinfo,
+ char **children)
+{
+ int ret = -1;
+ xlator_list_t *xl_child = NULL;
+ xlator_t *cxl = NULL;
+ gf_boolean_t comma = _gf_false;
+
+ *children = NULL;
+ xl_child = dht->children;
+ while (xl_child) {
+ cxl = xl_child->xlator;
+ if (_xl_has_decommissioned_clients (cxl, volinfo)) {
+ if (!*children) {
+ *children = GF_CALLOC (16 * GF_UNIT_KB, 1,
+ gf_common_mt_char);
+ if (!*children)
+ goto out;
+ }
+
+ if (comma)
+ strcat (*children, ",");
+ strcat (*children, cxl->name);
+ comma = _gf_true;
+ }
+
+ xl_child = xl_child->next;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+volgen_graph_build_dht_cluster (volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo, size_t child_count,
+ gf_boolean_t is_quotad)
+{
+ int32_t clusters = 0;
+ int ret = -1;
+ char *decommissioned_children = NULL;
+ xlator_t *dht = NULL;
+ char *voltype = "cluster/distribute";
+ char *name_fmt = NULL;
+
+ /* NUFA and Switch section */
+ if (dict_get_str_boolean (volinfo->dict, "cluster.nufa", 0) &&
+ dict_get_str_boolean (volinfo->dict, "cluster.switch", 0)) {
+ gf_msg (THIS->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED,
+ "nufa and switch cannot be set together");
+ ret = -1;
+ goto out;
+ }
+
+ /* Check for NUFA volume option, and change the voltype */
+ if (dict_get_str_boolean (volinfo->dict, "cluster.nufa", 0))
+ voltype = "cluster/nufa";
+
+ /* Check for switch volume option, and change the voltype */
+ if (dict_get_str_boolean (volinfo->dict, "cluster.switch", 0))
+ voltype = "cluster/switch";
+
+ if (is_quotad)
+ name_fmt = "%s";
+ else
+ name_fmt = "%s-dht";
+
+ clusters = volgen_link_bricks_from_list_tail (graph, volinfo,
+ voltype,
+ name_fmt,
+ child_count,
+ child_count);
+ if (clusters < 0)
+ goto out;
+
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
+ ret = 0;
+ goto out;
+ }
+
+ dht = first_of (graph);
+ ret = _graph_get_decommissioned_children (dht, volinfo,
+ &decommissioned_children);
+ if (ret)
+ goto out;
+ if (decommissioned_children) {
+ ret = xlator_set_option (dht, "decommissioned-bricks",
+ decommissioned_children);
+ if (ret)
+ goto out;
+ }
+ ret = 0;
+out:
+ GF_FREE (decommissioned_children);
+ return ret;
+}
+
+static int
+volgen_graph_build_ec_clusters (volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo)
+{
+ int i = 0;
+ int ret = 0;
+ int clusters = 0;
+ char *disperse_args[] = {"cluster/disperse",
+ "%s-disperse-%d"};
+ xlator_t *ec = NULL;
+ char option[32] = {0};
+ int start_count = 0;
+
+ if (volinfo->tier_info.cur_tier_hot &&
+ volinfo->tier_info.cold_type == GF_CLUSTER_TYPE_DISPERSE)
+ start_count = volinfo->tier_info.cold_brick_count/
+ volinfo->tier_info.cold_disperse_count;
+
+ clusters = volgen_link_bricks_from_list_tail_start (graph, volinfo,
+ disperse_args[0],
+ disperse_args[1],
+ volinfo->brick_count,
+ volinfo->disperse_count,
+ start_count);
+ if (clusters < 0)
+ goto out;
+
+ sprintf(option, "%d", volinfo->redundancy_count);
+ ec = first_of (graph);
+ for (i = 0; i < clusters; i++) {
+ ret = xlator_set_option (ec, "redundancy", option);
+ if (ret) {
+ clusters = -1;
+ goto out;
+ }
+
+ ec = ec->next;
+ }
+out:
+ return clusters;
+}
+
+static int
+set_afr_pending_xattrs_option (volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo,
+ int clusters)
+{
+ xlator_t *xlator = NULL;
+ xlator_t **afr_xlators_list = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ glusterd_brickinfo_t *brick = NULL;
+ char *ptr = NULL;
+ int i = 0;
+ int index = -1;
+ int ret = 0;
+ char *afr_xattrs_list = NULL;
+ int list_size = -1;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("glusterd", this, out);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
+
+ if (conf->op_version < GD_OP_VERSION_3_9_0)
+ return ret;
+
+ /* (brick_id x rep.count) + (rep.count-1 commas) + NULL*/
+ list_size = (1024 * volinfo->replica_count) +
+ (volinfo->replica_count - 1) + 1;
+ afr_xattrs_list = GF_CALLOC (1, list_size, gf_common_mt_char);
+ if (!afr_xattrs_list)
+ goto out;
+
+ ptr = afr_xattrs_list;
+ afr_xlators_list = GF_CALLOC (clusters, sizeof (xlator_t *),
+ gf_common_mt_xlator_t);
+ if (!afr_xlators_list)
+ goto out;
+
+ xlator = first_of (graph);
+
+ for (i = 0, index = clusters - 1; i < clusters; i++) {
+ afr_xlators_list[index--] = xlator;
+ xlator = xlator->next;
+ }
+
+ i = 1;
+ index = 0;
+
+ cds_list_for_each_entry (brick, &volinfo->bricks, brick_list) {
+ if (index == clusters)
+ break;
+ strncat (ptr, brick->brick_id, strlen(brick->brick_id));
+ if (i == volinfo->replica_count) {
+ ret = xlator_set_option (afr_xlators_list[index++],
+ "afr-pending-xattr",
+ afr_xattrs_list);
+ if (ret)
+ return ret;
+ memset (afr_xattrs_list, 0, list_size);
+ ptr = afr_xattrs_list;
+ i = 1;
+ continue;
+ }
+ ptr[strlen(brick->brick_id)] = ',';
+ ptr += strlen (brick->brick_id) + 1;
+ i++;
+ }
+
+out:
+ GF_FREE (afr_xattrs_list);
+ GF_FREE (afr_xlators_list);
+ return ret;
+}
+
+static int
+volgen_graph_build_afr_clusters (volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo)
+{
+ int i = 0;
+ int ret = 0;
+ int clusters = 0;
+ char *replicate_type = NULL;
+ char *replicate_name = "%s-replicate-%d";
+ xlator_t *afr = NULL;
+ char option[32] = {0};
+ int start_count = 0;
+
+ if (glusterd_volinfo_get_boolean(volinfo, "cluster.jbr") > 0) {
+ replicate_type = "experimental/jbrc";
+ } else {
+ replicate_type = "cluster/replicate";
+ }
+
+ if (volinfo->tier_info.cold_type == GF_CLUSTER_TYPE_REPLICATE)
+ start_count = volinfo->tier_info.cold_brick_count /
+ volinfo->tier_info.cold_replica_count;
+
+ if (volinfo->tier_info.cur_tier_hot)
+ clusters = volgen_link_bricks_from_list_head_start (graph,
+ volinfo,
+ replicate_type,
+ replicate_name,
+ volinfo->brick_count,
+ volinfo->replica_count,
+ start_count);
+ else
+ clusters = volgen_link_bricks_from_list_tail (graph,
+ volinfo,
+ replicate_type,
+ replicate_name,
+ volinfo->brick_count,
+ volinfo->replica_count);
+
+ if (clusters < 0)
+ goto out;
+
+ ret = set_afr_pending_xattrs_option (graph, volinfo, clusters);
+ if (ret) {
+ clusters = -1;
+ goto out;
+ }
+ if (!volinfo->arbiter_count)
+ goto out;
+
+ afr = first_of (graph);
+ sprintf(option, "%d", volinfo->arbiter_count);
+ for (i = 0; i < clusters; i++) {
+ ret = xlator_set_option (afr, "arbiter-count", option);
+ if (ret) {
+ clusters = -1;
+ goto out;
+ }
+ afr = afr->next;
+ }
+out:
+ return clusters;
+}
+
+static int
+volume_volgen_graph_build_clusters (volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo,
+ gf_boolean_t is_quotad)
+{
+ char *tier_args[] = {"cluster/tier",
+ "%s-tier-%d"};
+ char *stripe_args[] = {"cluster/stripe",
+ "%s-stripe-%d"};
+ char option[32] = "";
+ int rclusters = 0;
+ int clusters = 0;
+ int dist_count = 0;
+ int ret = -1;
+ xlator_t *ec = NULL;
+ xlator_t *client = NULL;
+ char tmp_volname[GD_VOLUME_NAME_MAX] = {0, };
+
+ if (!volinfo->dist_leaf_count)
+ goto out;
+
+ if (volinfo->dist_leaf_count == 1)
+ goto build_distribute;
+
+ /* All other cases, it will have one or the other cluster type */
+ switch (volinfo->type) {
+ case GF_CLUSTER_TYPE_REPLICATE:
+ clusters = volgen_graph_build_afr_clusters (graph, volinfo);
+ if (clusters < 0)
+ goto out;
+ break;
+ case GF_CLUSTER_TYPE_STRIPE:
+ clusters = volgen_link_bricks_from_list_tail (graph, volinfo,
+ stripe_args[0],
+ stripe_args[1],
+ volinfo->brick_count,
+ volinfo->stripe_count);
+ if (clusters < 0)
+ goto out;
+ break;
+ case GF_CLUSTER_TYPE_TIER:
+ ret = volgen_link_bricks_from_list_head (graph, volinfo,
+ tier_args[0],
+ tier_args[1],
+ volinfo->brick_count,
+ volinfo->replica_count);
+ break;
+ case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
+ /* Replicate after the clients, then stripe */
+ if (volinfo->replica_count == 0)
+ goto out;
+ clusters = volgen_graph_build_afr_clusters (graph, volinfo);
+ if (clusters < 0)
+ goto out;
+
+ rclusters = volinfo->brick_count / volinfo->replica_count;
+ GF_ASSERT (rclusters == clusters);
+ clusters = volgen_link_bricks_from_list_tail (graph, volinfo,
+ stripe_args[0],
+ stripe_args[1],
+ rclusters,
+ volinfo->stripe_count);
+ if (clusters < 0)
+ goto out;
+ break;
+
+ case GF_CLUSTER_TYPE_DISPERSE:
+ clusters = volgen_graph_build_ec_clusters (graph, volinfo);
+ if (clusters < 0)
+ goto out;
+
+ break;
+ default:
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_VOLUME_INCONSISTENCY,
+ "volume inconsistency: "
+ "unrecognized clustering type");
+ goto out;
+ }
+
+build_distribute:
+ dist_count = volinfo->brick_count / volinfo->dist_leaf_count;
+ if (!dist_count) {
+ ret = -1;
+ goto out;
+ }
+ if (volinfo->tier_info.hot_brick_count) {
+ strncpy (tmp_volname, volinfo->volname,
+ strlen (volinfo->volname));
+ if (volinfo->tier_info.cur_tier_hot)
+ strcat (volinfo->volname, "-hot");
+ else
+ strcat (volinfo->volname, "-cold");
+ }
+ ret = volgen_graph_build_dht_cluster (graph, volinfo,
+ dist_count, is_quotad);
+ if (volinfo->tier_info.hot_brick_count)
+ strcpy (volinfo->volname, tmp_volname);
+ if (ret)
+ goto out;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int client_graph_set_perf_options(volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo,
+ dict_t *set_dict)
+{
+ data_t *tmp_data = NULL;
+ char *volname = NULL;
+
+ /*
+ * Logic to make sure NFS doesn't have performance translators by
+ * default for a volume
+ */
+ volname = volinfo->volname;
+ tmp_data = dict_get (set_dict, "nfs-volume-file");
+ if (!tmp_data)
+ return volgen_graph_set_options_generic(graph, set_dict,
+ volinfo,
+ &perfxl_option_handler);
+ else
+ return volgen_graph_set_options_generic(graph, set_dict,
+ volname,
+ &nfsperfxl_option_handler);
+}
+
+static int
+graph_set_generic_options (xlator_t *this, volgen_graph_t *graph,
+ dict_t *set_dict, char *identifier)
+{
+ int ret = 0;
+
+ ret = volgen_graph_set_options_generic (graph, set_dict, "client",
+ &loglevel_option_handler);
+
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_GRAPH_SET_OPT_FAIL,
+ "changing %s log level"
+ " failed", identifier);
+
+ ret = volgen_graph_set_options_generic (graph, set_dict, "client",
+ &sys_loglevel_option_handler);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_GRAPH_SET_OPT_FAIL,
+ "changing %s syslog "
+ "level failed", identifier);
+
+ ret = volgen_graph_set_options_generic (graph, set_dict, "client",
+ &logger_option_handler);
+
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_GRAPH_SET_OPT_FAIL,
+ "changing %s logger"
+ " failed", identifier);
+
+ ret = volgen_graph_set_options_generic (graph, set_dict, "client",
+ &log_format_option_handler);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_GRAPH_SET_OPT_FAIL,
+ "changing %s log format"
+ " failed", identifier);
+
+ ret = volgen_graph_set_options_generic (graph, set_dict, "client",
+ &log_buf_size_option_handler);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_GRAPH_SET_OPT_FAIL,
+ "Failed to change "
+ "log-buf-size option");
+
+ ret = volgen_graph_set_options_generic (graph, set_dict, "client",
+ &log_flush_timeout_option_handler);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_GRAPH_SET_OPT_FAIL,
+ "Failed to change "
+ "log-flush-timeout option");
+ return 0;
+}
+
+static int
+volume_volgen_graph_build_clusters_tier (volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo,
+ gf_boolean_t is_quotad)
+{
+ int ret = -1;
+ xlator_t *root = NULL;
+ xlator_t *xl, *hxl, *cxl;
+ char *rule = NULL;
+ int st_brick_count = 0;
+ int st_replica_count = 0;
+ int st_disperse_count = 0;
+ int st_dist_leaf_count = 0;
+ int st_type = 0;
+ int dist_count = 0;
+ int start_count = 0;
+ char *decommissioned_children = NULL;
+ glusterd_volinfo_t *dup_volinfo = NULL;
+ gf_boolean_t is_hot_tier = _gf_false;
+
+ st_brick_count = volinfo->brick_count;
+ st_replica_count = volinfo->replica_count;
+ st_disperse_count = volinfo->disperse_count;
+ st_type = volinfo->type;
+ st_dist_leaf_count = volinfo->dist_leaf_count;
+
+ volinfo->dist_leaf_count = volinfo->tier_info.cold_dist_leaf_count;
+ volinfo->brick_count = volinfo->tier_info.cold_brick_count;
+ volinfo->replica_count = volinfo->tier_info.cold_replica_count;
+ volinfo->disperse_count = volinfo->tier_info.cold_disperse_count;
+ volinfo->redundancy_count = volinfo->tier_info.cold_redundancy_count;
+ volinfo->type = volinfo->tier_info.cold_type;
+ volinfo->tier_info.cur_tier_hot = 0;
+ ret = glusterd_create_sub_tier_volinfo (volinfo, &dup_volinfo,
+ is_hot_tier, volinfo->volname);
+ if (ret)
+ goto out;
+
+ ret = volume_volgen_graph_build_clusters (graph, dup_volinfo,
+ is_quotad);
+ if (ret)
+ goto out;
+ cxl = first_of(graph);
+
+ volinfo->type = volinfo->tier_info.hot_type;
+ volinfo->brick_count = volinfo->tier_info.hot_brick_count;
+ volinfo->replica_count = volinfo->tier_info.hot_replica_count;
+ volinfo->dist_leaf_count = glusterd_get_dist_leaf_count(volinfo);
+ volinfo->disperse_count = 0;
+ volinfo->tier_info.cur_tier_hot = 1;
+
+ dist_count = volinfo->brick_count / volinfo->dist_leaf_count;
+
+ if (volinfo->tier_info.cold_type == GF_CLUSTER_TYPE_REPLICATE) {
+ start_count = volinfo->tier_info.cold_brick_count /
+ volinfo->tier_info.cold_replica_count;
+ }
+
+ if (volinfo->dist_leaf_count != 1) {
+ ret = volgen_link_bricks_from_list_head_start
+ (graph, volinfo,
+ "cluster/replicate",
+ "%s-replicate-%d",
+ volinfo->brick_count,
+ volinfo->replica_count,
+ start_count);
+ if (ret != -1) {
+ ret = set_afr_pending_xattrs_option (graph, volinfo,
+ ret);
+ if (ret)
+ goto out;
+ volgen_link_bricks_from_list_tail (graph, volinfo,
+ "cluster/distribute",
+ "%s-hot-dht",
+ dist_count,
+ dist_count);
+ }
+ } else {
+ ret = volgen_link_bricks_from_list_head (graph, volinfo,
+ "cluster/distribute",
+ "%s-hot-dht",
+ dist_count,
+ dist_count);
+ }
+ if (ret == -1)
+ goto out;
+
+ hxl = first_of(graph);
+
+ volinfo->type = GF_CLUSTER_TYPE_TIER;
+ if (!is_quotad) {
+
+ xl = volgen_graph_add_nolink (graph, "cluster/tier", "%s-%s",
+ volinfo->volname, "tier-dht");
+ } else {
+ xl = volgen_graph_add_nolink (graph, "cluster/tier", "%s",
+ volinfo->volname);
+ }
+ if (!xl)
+ goto out;
+
+ gf_asprintf(&rule, "%s-hot-dht", volinfo->volname);
+
+ ret = xlator_set_option(xl, "rule", rule);
+ if (ret)
+ goto out;
+
+ /*Each dht/tier layer must have a different xattr name*/
+ ret = xlator_set_option(xl, "xattr-name", "trusted.tier.tier-dht");
+ if (ret)
+ goto out;
+
+ ret = volgen_xlator_link (xl, cxl);
+ ret = volgen_xlator_link (xl, hxl);
+
+ st_type = GF_CLUSTER_TYPE_TIER;
+
+ ret = _graph_get_decommissioned_children (xl, volinfo,
+ &decommissioned_children);
+ if (ret)
+ goto out;
+ if (decommissioned_children) {
+ ret = xlator_set_option (xl, "decommissioned-bricks",
+ decommissioned_children);
+ if (ret)
+ goto out;
+ }
+
+ out:
+ volinfo->brick_count = st_brick_count;
+ volinfo->replica_count = st_replica_count;
+ volinfo->disperse_count = st_disperse_count;
+ volinfo->type = st_type;
+ volinfo->dist_leaf_count = st_dist_leaf_count;
+ volinfo->tier_info.cur_tier_hot = 0;
+
+ if (dup_volinfo)
+ glusterd_volinfo_delete (dup_volinfo);
+ GF_FREE (rule);
+ return ret;
+}
+
+static int
+client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *set_dict, void *param)
+{
+ int ret = 0;
+ xlator_t *xl = NULL;
+ char *volname = NULL;
+ glusterd_conf_t *conf = THIS->private;
+ char *tmp = NULL;
+ gf_boolean_t var = _gf_false;
+ gf_boolean_t ob = _gf_false;
+ int uss_enabled = -1;
+ xlator_t *this = THIS;
+
+ GF_ASSERT (this);
+ GF_ASSERT (conf);
+
+ volname = volinfo->volname;
+ ret = volgen_graph_build_clients (graph, volinfo, set_dict,
+ param);
+ if (ret)
+ goto out;
+
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER)
+ ret = volume_volgen_graph_build_clusters_tier
+ (graph, volinfo, _gf_false);
+ else
+ ret = volume_volgen_graph_build_clusters
+ (graph, volinfo, _gf_false);
+
+ if (ret == -1)
+ goto out;
+
+ ret = dict_get_str_boolean (set_dict, "features.shard", _gf_false);
+ if (ret == -1)
+ goto out;
+
+ if (ret) {
+ xl = volgen_graph_add (graph, "features/shard", volname);
+ if (!xl) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ /* As of now snapshot volume is read-only. Read-only xlator is loaded
+ * in client graph so that AFR & DHT healing can be done in server.
+ */
+ if (volinfo->is_snap_volume) {
+ xl = volgen_graph_add (graph, "features/read-only", volname);
+ if (!xl) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GRAPH_FEATURE_ADD_FAIL,
+ "Failed to add "
+ "read-only feature to the graph of %s "
+ "snapshot with %s origin volume",
+ volname, volinfo->parent_volname);
+ ret = -1;
+ goto out;
+ }
+ ret = xlator_set_option (xl, "read-only", "on");
+ if (ret)
+ goto out;
+
+ }
+
+ /* Check for compress volume option, and add it to the graph on client side */
+ ret = dict_get_str_boolean (set_dict, "network.compression", 0);
+ if (ret == -1)
+ goto out;
+ if (ret) {
+ xl = volgen_graph_add (graph, "features/cdc", volname);
+ if (!xl) {
+ ret = -1;
+ goto out;
+ }
+ ret = xlator_set_option (xl, "mode", "client");
+ if (ret)
+ goto out;
+ }
+
+ ret = dict_get_str_boolean (set_dict, "features.encryption", _gf_false);
+ if (ret == -1)
+ goto out;
+ if (ret) {
+ xl = volgen_graph_add (graph, "encryption/crypt", volname);
+ if (!xl) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (conf->op_version == GD_OP_VERSION_MIN) {
+ ret = glusterd_volinfo_get_boolean (volinfo,
+ VKEY_FEATURES_QUOTA);
+ if (ret == -1)
+ goto out;
+ if (ret) {
+ xl = volgen_graph_add (graph, "features/quota",
+ volname);
+ if (!xl) {
+ ret = -1;
+ goto out;
+ }
+ }
+ }
+
+ /* Do not allow changing read-after-open option if root-squash is
+ enabled.
+ */
+ ret = dict_get_str (set_dict, "performance.read-after-open", &tmp);
+ if (!ret) {
+ ret = dict_get_str (volinfo->dict, "server.root-squash", &tmp);
+ if (!ret) {
+ ob = _gf_false;
+ ret = gf_string2boolean (tmp, &ob);
+ if (!ret && ob) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_ROOT_SQUASH_ENABLED,
+ "root-squash is enabled. Please turn it"
+ " off to change read-after-open "
+ "option");
+ ret = -1;
+ goto out;
+ }
+ }
+ }
+
+ /* open behind causes problems when root-squash is enabled
+ (by allowing reads to happen even though the squashed user
+ does not have permissions to do so) as it fakes open to be
+ successful and later sends reads on anonymous fds. So when
+ root-squash is enabled, open-behind's option to read after
+ open is done is also enabled.
+ */
+ ret = dict_get_str (set_dict, "server.root-squash", &tmp);
+ if (!ret) {
+ ret = gf_string2boolean (tmp, &var);
+ if (ret)
+ goto out;
+
+ if (var) {
+ ret = dict_get_str (volinfo->dict,
+ "performance.read-after-open",
+ &tmp);
+ if (!ret) {
+ ret = gf_string2boolean (tmp, &ob);
+ /* go ahead with turning read-after-open on
+ even if string2boolean conversion fails,
+ OR if read-after-open option is turned off
+ */
+ if (ret || !ob)
+ ret = dict_set_str (set_dict,
+ "performance.read-after-open",
+ "yes");
+ } else {
+ ret = dict_set_str (set_dict,
+ "performance.read-after-open",
+ "yes");
+ }
+ } else {
+ /* When root-squash has to be turned off, open-behind's
+ read-after-open option should be reset to what was
+ there before root-squash was turned on. If the option
+ cannot be found in volinfo's dict, it means that
+ option was not set before turning on root-squash.
+ */
+ ob = _gf_false;
+ ret = dict_get_str (volinfo->dict,
+ "performance.read-after-open",
+ &tmp);
+ if (!ret) {
+ ret = gf_string2boolean (tmp, &ob);
+
+ if (!ret && ob) {
+ ret = dict_set_str (set_dict,
+ "performance.read-after-open",
+ "yes");
+ }
+ }
+ /* consider operation is failure only if read-after-open
+ option is enabled and could not set into set_dict
+ */
+ if (!ob)
+ ret = 0;
+ }
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_ROOT_SQUASH_FAILED,
+ "setting "
+ "open behind option as part of root "
+ "squash failed");
+ goto out;
+ }
+ }
+
+ ret = dict_get_str_boolean (set_dict, "server.manage-gids", _gf_false);
+ if (ret != -1) {
+ ret = dict_set_str (set_dict, "client.send-gids",
+ ret ? "false" : "true");
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_WARNING, errno,
+ GD_MSG_DICT_SET_FAILED, "changing client"
+ " protocol option failed");
+ }
+
+ ret = client_graph_set_perf_options(graph, volinfo, set_dict);
+ if (ret)
+ goto out;
+
+ uss_enabled = dict_get_str_boolean (set_dict, "features.uss",
+ _gf_false);
+ if (uss_enabled == -1)
+ goto out;
+ if (uss_enabled && !volinfo->is_snap_volume) {
+
+ ret = volgen_graph_build_snapview_client
+ (graph, volinfo,
+ volname, set_dict);
+ if (ret == -1)
+ goto out;
+ }
+
+ ret = dict_get_str_boolean (set_dict, "ganesha.enable", _gf_false);
+
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ GD_MSG_DICT_GET_FAILED, "setting ganesha.enable"
+ "option failed.");
+ goto out;
+ }
+
+ if (ret) {
+ xl = volgen_graph_add (graph, "features/ganesha", volname);
+
+ if (!xl) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GRAPH_FEATURE_ADD_FAIL,
+ "failed to add"
+ "add features/ganesha to graph");
+ ret = -1;
+ goto out;
+ }
+ }
+
+ /* add debug translators depending on the options */
+ ret = check_and_add_debug_xl (graph, set_dict, volname,
+ "client");
+ if (ret)
+ return -1;
+
+ ret = -1;
+ xl = volgen_graph_add_as (graph, "debug/io-stats", volname);
+ if (!xl)
+ goto out;
+
+ ret = graph_set_generic_options (this, graph, set_dict, "client");
+out:
+ return ret;
+}
+
+
+/* builds a graph for client role , with option overrides in mod_dict */
+static int
+build_client_graph (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *mod_dict)
+{
+ return build_graph_generic (graph, volinfo, mod_dict, NULL,
+ &client_graph_builder);
+}
+
+char *gd_shd_options[] = {
+ "!self-heal-daemon",
+ "!heal-timeout",
+ NULL
+};
+
+char*
+gd_get_matching_option (char **options, char *option)
+{
+ while (*options && strcmp (*options, option))
+ options++;
+ return *options;
+}
+
+static int
+bitrot_option_handler (volgen_graph_t *graph, struct volopt_map_entry *vme,
+ void *param)
+{
+ xlator_t *xl = NULL;
+ char *bitrot_option = NULL;
+ int ret = 0;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ volinfo = param;
+
+ xl = first_of (graph);
+
+ if (!strcmp (vme->option, "expiry-time")) {
+ ret = gf_asprintf (&bitrot_option, "expiry-time");
+ if (ret != -1) {
+ ret = xlator_set_option (xl, bitrot_option, vme->value);
+ GF_FREE (bitrot_option);
+ }
+
+ if (ret)
+ return -1;
+ }
+
+ return ret;
+}
+
+static int
+scrubber_option_handler (volgen_graph_t *graph, struct volopt_map_entry *vme,
+ void *param)
+{
+ xlator_t *xl = NULL;
+ char *scrub_option = NULL;
+ int ret = 0;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ volinfo = param;
+
+ xl = first_of (graph);
+
+ if (!strcmp (vme->option, "scrub-throttle")) {
+ ret = gf_asprintf (&scrub_option, "scrub-throttle");
+ if (ret != -1) {
+ ret = xlator_set_option (xl, scrub_option, vme->value);
+ GF_FREE (scrub_option);
+ }
+
+ if (ret)
+ return -1;
+ }
+
+ if (!strcmp (vme->option, "scrub-frequency")) {
+ ret = gf_asprintf (&scrub_option, "scrub-freq");
+ if (ret != -1) {
+ ret = xlator_set_option (xl, scrub_option, vme->value);
+ GF_FREE (scrub_option);
+ }
+
+ if (ret)
+ return -1;
+ }
+
+ if (!strcmp (vme->option, "scrubber")) {
+ if (!strcmp (vme->value, "pause")) {
+ ret = gf_asprintf (&scrub_option, "scrub-state");
+ if (ret != -1) {
+ ret = xlator_set_option (xl, scrub_option,
+ vme->value);
+ GF_FREE (scrub_option);
+ }
+
+ if (ret)
+ return -1;
+ }
+ }
+
+ return ret;
+}
+
+static int
+shd_option_handler (volgen_graph_t *graph, struct volopt_map_entry *vme,
+ void *param)
+{
+ int ret = 0;
+ struct volopt_map_entry new_vme = {0};
+ char *shd_option = NULL;
+
+ shd_option = gd_get_matching_option (gd_shd_options, vme->option);
+ if ((vme->option[0] == '!') && !shd_option)
+ goto out;
+ new_vme = *vme;
+ if (shd_option) {
+ new_vme.option = shd_option + 1;//option with out '!'
+ }
+
+ ret = no_filter_option_handler (graph, &new_vme, param);
+out:
+ return ret;
+}
+
+static int
+nfs_option_handler (volgen_graph_t *graph,
+ struct volopt_map_entry *vme, void *param)
+{
+ xlator_t *xl = NULL;
+ char *aa = NULL;
+ int ret = 0;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ volinfo = param;
+
+ xl = first_of (graph);
+
+/* if (vme->type == GLOBAL_DOC || vme->type == GLOBAL_NO_DOC) {
+
+ ret = xlator_set_option (xl, vme->key, vme->value);
+ }*/
+ if (!volinfo || (volinfo->volname[0] == '\0'))
+ return 0;
+
+ if (! strcmp (vme->option, "!rpc-auth.addr.*.allow")) {
+ ret = gf_asprintf (&aa, "rpc-auth.addr.%s.allow",
+ volinfo->volname);
+
+ if (ret != -1) {
+ ret = xlator_set_option (xl, aa, vme->value);
+ GF_FREE (aa);
+ }
+
+ if (ret)
+ return -1;
+ }
+
+ if (! strcmp (vme->option, "!rpc-auth.addr.*.reject")) {
+ ret = gf_asprintf (&aa, "rpc-auth.addr.%s.reject",
+ volinfo->volname);
+
+ if (ret != -1) {
+ ret = xlator_set_option (xl, aa, vme->value);
+ GF_FREE (aa);
+ }
+
+ if (ret)
+ return -1;
+ }
+
+ if (! strcmp (vme->option, "!rpc-auth.auth-unix.*")) {
+ ret = gf_asprintf (&aa, "rpc-auth.auth-unix.%s",
+ volinfo->volname);
+
+ if (ret != -1) {
+ ret = xlator_set_option (xl, aa, vme->value);
+ GF_FREE (aa);
+ }
+
+ if (ret)
+ return -1;
+ }
+ if (! strcmp (vme->option, "!rpc-auth.auth-null.*")) {
+ ret = gf_asprintf (&aa, "rpc-auth.auth-null.%s",
+ volinfo->volname);
+
+ if (ret != -1) {
+ ret = xlator_set_option (xl, aa, vme->value);
+ GF_FREE (aa);
+ }
+
+ if (ret)
+ return -1;
+ }
+
+ if (! strcmp (vme->option, "!nfs3.*.trusted-sync")) {
+ ret = gf_asprintf (&aa, "nfs3.%s.trusted-sync",
+ volinfo->volname);
+
+ if (ret != -1) {
+ ret = xlator_set_option (xl, aa, vme->value);
+ GF_FREE (aa);
+ }
+
+ if (ret)
+ return -1;
+ }
+
+ if (! strcmp (vme->option, "!nfs3.*.trusted-write")) {
+ ret = gf_asprintf (&aa, "nfs3.%s.trusted-write",
+ volinfo->volname);
+
+ if (ret != -1) {
+ ret = xlator_set_option (xl, aa, vme->value);
+ GF_FREE (aa);
+ }
+
+ if (ret)
+ return -1;
+ }
+
+ if (! strcmp (vme->option, "!nfs3.*.volume-access")) {
+ ret = gf_asprintf (&aa, "nfs3.%s.volume-access",
+ volinfo->volname);
+
+ if (ret != -1) {
+ ret = xlator_set_option (xl, aa, vme->value);
+ GF_FREE (aa);
+ }
+
+ if (ret)
+ return -1;
+ }
+
+ if (! strcmp (vme->option, "!nfs3.*.export-dir")) {
+ ret = gf_asprintf (&aa, "nfs3.%s.export-dir",
+ volinfo->volname);
+
+ if (ret != -1) {
+ ret = gf_canonicalize_path (vme->value);
+ if (ret)
+ return -1;
+
+ ret = xlator_set_option (xl, aa, vme->value);
+ GF_FREE (aa);
+ }
+
+ if (ret)
+ return -1;
+ }
+
+
+
+ if (! strcmp (vme->option, "!rpc-auth.ports.*.insecure")) {
+ ret = gf_asprintf (&aa, "rpc-auth.ports.%s.insecure",
+ volinfo->volname);
+
+ if (ret != -1) {
+ ret = xlator_set_option (xl, aa, vme->value);
+ GF_FREE (aa);
+ }
+
+ if (ret)
+ return -1;
+ }
+
+
+ if (! strcmp (vme->option, "!nfs-disable")) {
+ ret = gf_asprintf (&aa, "nfs.%s.disable",
+ volinfo->volname);
+
+ if (ret != -1) {
+ ret = xlator_set_option (xl, aa, vme->value);
+ GF_FREE (aa);
+ }
+
+ if (ret)
+ return -1;
+ }
+
+ if ((strcmp (vme->voltype, "nfs/server") == 0) &&
+ (vme->option && vme->option[0]!='!') ) {
+ ret = xlator_set_option (xl, vme->option, vme->value);
+ if (ret)
+ return -1;
+ }
+
+
+ /*key = strchr (vme->key, '.') + 1;
+
+ for (trav = xl->children; trav; trav = trav->next) {
+ ret = gf_asprintf (&aa, "auth.addr.%s.%s", trav->xlator->name,
+ key);
+ if (ret != -1) {
+ ret = xlator_set_option (xl, aa, vme->value);
+ GF_FREE (aa);
+ }
+ if (ret)
+ return -1;
+ }*/
+
+ return 0;
+}
+
+char*
+volgen_get_shd_key (int type)
+{
+ char *key = NULL;
+
+ switch (type) {
+ case GF_CLUSTER_TYPE_REPLICATE:
+ case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
+ key = "cluster.self-heal-daemon";
+ break;
+ case GF_CLUSTER_TYPE_DISPERSE:
+ key = "cluster.disperse-self-heal-daemon";
+ break;
+ default:
+ key = NULL;
+ break;
+ }
+
+ return key;
+}
+
+static gf_boolean_t
+volgen_is_shd_compatible_xl (char *xl_type)
+{
+ char *shd_xls[] = {"cluster/replicate", "cluster/disperse",
+ NULL};
+ if (gf_get_index_by_elem (shd_xls, xl_type) != -1)
+ return _gf_true;
+
+ return _gf_false;
+}
+
+static int
+volgen_graph_set_iam_shd (volgen_graph_t *graph)
+{
+ xlator_t *trav;
+ int ret = 0;
+
+ for (trav = first_of (graph); trav; trav = trav->next) {
+ if (!volgen_is_shd_compatible_xl (trav->type))
+ continue;
+
+ ret = xlator_set_option (trav, "iam-self-heal-daemon", "yes");
+ if (ret)
+ break;
+ }
+ return ret;
+}
+
+static int
+glusterd_prepare_shd_volume_options_for_tier (glusterd_volinfo_t *volinfo,
+ dict_t *set_dict)
+{
+ int ret = -1;
+ char *key = NULL;
+
+ key = volgen_get_shd_key (volinfo->tier_info.cold_type);
+ if (key) {
+ ret = dict_set_str (set_dict, key, "enable");
+ if (ret)
+ goto out;
+ }
+
+ key = volgen_get_shd_key (volinfo->tier_info.hot_type);
+ if (key) {
+ ret = dict_set_str (set_dict, key, "enable");
+ if (ret)
+ goto out;
+ }
+out:
+ return ret;
+}
+
+static int
+prepare_shd_volume_options (glusterd_volinfo_t *volinfo,
+ dict_t *mod_dict, dict_t *set_dict)
+{
+ char *key = NULL;
+ int ret = 0;
+
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
+ ret = glusterd_prepare_shd_volume_options_for_tier (volinfo,
+ set_dict);
+ if (ret)
+ goto out;
+ } else {
+ key = volgen_get_shd_key (volinfo->type);
+ if (!key) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_str (set_dict, key, "enable");
+ if (ret)
+ goto out;
+ }
+
+ ret = dict_set_uint32 (set_dict, "trusted-client", GF_CLIENT_TRUSTED);
+ if (ret)
+ goto out;
+
+ dict_copy (volinfo->dict, set_dict);
+ if (mod_dict)
+ dict_copy (mod_dict, set_dict);
+out:
+ return ret;
+}
+
+static int
+build_afr_ec_clusters (volgen_graph_t *graph, glusterd_volinfo_t *volinfo)
+{
+
+ int clusters = -1;
+ switch (volinfo->type) {
+ case GF_CLUSTER_TYPE_REPLICATE:
+ case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
+ clusters = volgen_graph_build_afr_clusters (graph, volinfo);
+ break;
+
+ case GF_CLUSTER_TYPE_DISPERSE:
+ clusters = volgen_graph_build_ec_clusters (graph, volinfo);
+ break;
+ }
+ return clusters;
+}
+
+static int
+build_afr_ec_clusters_for_tier (volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo,
+ dict_t *set_dict)
+{
+ int ret = 0;
+ glusterd_volinfo_t *dup_volinfo[2] = {NULL, NULL};
+ int clusters = 0;
+ int i = 0;
+ volgen_graph_t hot_graph = {0};
+ volgen_graph_t cold_cgraph = {0};
+ gf_boolean_t is_hot_tier = _gf_false;
+
+ if (glusterd_is_shd_compatible_type (volinfo->tier_info.cold_type)) {
+ ret = glusterd_create_sub_tier_volinfo (volinfo,
+ &dup_volinfo[0],
+ is_hot_tier,
+ volinfo->volname);
+ if (ret)
+ goto out;
+ }
+ if (glusterd_is_shd_compatible_type (volinfo->tier_info.hot_type)) {
+ is_hot_tier = _gf_true;
+ ret = glusterd_create_sub_tier_volinfo (volinfo,
+ &dup_volinfo[1],
+ is_hot_tier,
+ volinfo->volname);
+ if (ret)
+ goto out;
+ dup_volinfo[1]->tier_info.cur_tier_hot = 1;
+ }
+
+ for (i = 0; i < 2; i++) {
+ if (!dup_volinfo[i])
+ continue;
+ ret = build_afr_ec_clusters (graph, dup_volinfo[i]);
+ if (ret < 0)
+ goto out;
+ clusters += ret;
+ }
+ ret = 0;
+out:
+ for (i = 0; i < 2; i++) {
+ if (dup_volinfo[i])
+ glusterd_volinfo_delete (dup_volinfo[i]);
+ }
+
+ if (ret)
+ clusters = -1;
+
+ return clusters;
+}
+
+
+
+static int
+build_shd_clusters (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *set_dict)
+{
+ int ret = 0;
+ int clusters = -1;
+
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
+ ret = volgen_graph_build_clients_for_tier_shd (graph, volinfo,
+ set_dict);
+ if (ret)
+ goto out;
+
+ clusters = build_afr_ec_clusters_for_tier (graph, volinfo,
+ set_dict);
+ } else {
+ ret = volgen_graph_build_clients (graph, volinfo,
+ set_dict, NULL);
+ if (ret)
+ goto out;
+ clusters = build_afr_ec_clusters (graph, volinfo);
+ }
+out:
+ return clusters;
+}
+
+gf_boolean_t
+gd_is_self_heal_enabled (glusterd_volinfo_t *volinfo, dict_t *dict)
+{
+
+ char *shd_key = NULL;
+ gf_boolean_t shd_enabled = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("glusterd", volinfo, out);
+
+ switch (volinfo->type) {
+ case GF_CLUSTER_TYPE_REPLICATE:
+ case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
+ case GF_CLUSTER_TYPE_DISPERSE:
+ shd_key = volgen_get_shd_key (volinfo->type);
+ shd_enabled = dict_get_str_boolean (dict, shd_key,
+ _gf_true);
+ break;
+ case GF_CLUSTER_TYPE_TIER:
+ shd_key = volgen_get_shd_key (volinfo->tier_info.cold_type);
+ if (shd_key)
+ shd_enabled = dict_get_str_boolean (dict, shd_key,
+ _gf_true);
+
+ shd_key = volgen_get_shd_key (volinfo->tier_info.hot_type);
+ if (shd_key)
+ shd_enabled |= dict_get_str_boolean (dict, shd_key,
+ _gf_true);
+
+ break;
+ default:
+ break;
+ }
+out:
+ return shd_enabled;
+}
+
+static int
+build_rebalance_volfile (glusterd_volinfo_t *volinfo, char *filepath,
+ dict_t *mod_dict)
+{
+ volgen_graph_t graph = {0,};
+ xlator_t *xl = NULL;
+ int ret = -1;
+ xlator_t *this = NULL;
+ dict_t *set_dict = NULL;
+
+ this = THIS;
+
+ if (volinfo->brick_count <= volinfo->dist_leaf_count) {
+ /*
+ * Volume is not a distribute volume or
+ * contains only 1 brick, no need to create
+ * the volfiles.
+ */
+ return 0;
+ }
+
+ set_dict = dict_copy (volinfo->dict, NULL);
+ if (!set_dict)
+ return -1;
+
+ if (mod_dict) {
+ dict_copy (mod_dict, set_dict);
+ /* XXX dict_copy swallows errors */
+ }
+
+ /* Rebalance is always a trusted client*/
+ ret = dict_set_uint32 (set_dict, "trusted-client", GF_CLIENT_TRUSTED);
+ if (ret)
+ return -1;
+
+ ret = volgen_graph_build_clients (&graph, volinfo, set_dict, NULL);
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER)
+ ret = volume_volgen_graph_build_clusters_tier
+ (&graph, volinfo, _gf_false);
+ else
+ ret = volume_volgen_graph_build_clusters
+ (&graph, volinfo, _gf_false);
+
+ xl = volgen_graph_add_as (&graph, "debug/io-stats", volinfo->volname);
+ if (!xl) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = graph_set_generic_options (this, &graph, set_dict,
+ "rebalance-daemon");
+ if (ret)
+ goto out;
+
+ ret = volgen_graph_set_options_generic (&graph, set_dict, volinfo,
+ basic_option_handler);
+
+ if (!ret)
+ ret = volgen_write_volfile (&graph, filepath);
+
+out:
+ volgen_graph_free (&graph);
+
+ dict_destroy (set_dict);
+
+ return ret;
+}
+
+
+static int
+build_shd_volume_graph (xlator_t *this, volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo,
+ dict_t *mod_dict, dict_t *set_dict,
+ gf_boolean_t graph_check, gf_boolean_t *valid_config)
+{
+ volgen_graph_t cgraph = {0};
+ int ret = 0;
+ int clusters = -1;
+
+ if (!graph_check && (volinfo->status != GLUSTERD_STATUS_STARTED))
+ goto out;
+
+ if (!glusterd_is_shd_compatible_volume (volinfo))
+ goto out;
+
+ /* Shd graph is valid only when there is at least one
+ * replica/disperse volume is present
+ */
+ *valid_config = _gf_true;
+
+ ret = prepare_shd_volume_options (volinfo, mod_dict, set_dict);
+ if (ret)
+ goto out;
+
+ clusters = build_shd_clusters (&cgraph, volinfo, set_dict);
+ if (clusters < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = volgen_graph_set_options_generic (&cgraph, set_dict,
+ volinfo, shd_option_handler);
+ if (ret)
+ goto out;
+
+ ret = volgen_graph_set_iam_shd (&cgraph);
+ if (ret)
+ goto out;
+
+ ret = volgen_graph_merge_sub (graph, &cgraph, clusters);
+ if (ret)
+ goto out;
+
+ ret = graph_set_generic_options (this, graph, set_dict,
+ "self-heal daemon");
+out:
+ return ret;
+}
+
+int
+build_shd_graph (volgen_graph_t *graph, dict_t *mod_dict)
+{
+ glusterd_volinfo_t *voliter = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ dict_t *set_dict = NULL;
+ int ret = 0;
+ gf_boolean_t valid_config = _gf_false;
+ xlator_t *iostxl = NULL;
+ int clusters = 0;
+ gf_boolean_t graph_check = _gf_false;
+
+ this = THIS;
+ priv = this->private;
+
+ set_dict = dict_new ();
+ if (!set_dict) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ graph_check = dict_get_str_boolean (mod_dict, "graph-check", 0);
+ iostxl = volgen_graph_add_as (graph, "debug/io-stats", "glustershd");
+ if (!iostxl) {
+ ret = -1;
+ goto out;
+ }
+
+ cds_list_for_each_entry (voliter, &priv->volumes, vol_list) {
+ ret = build_shd_volume_graph (this, graph, voliter,
+ mod_dict, set_dict,
+ graph_check, &valid_config);
+ ret = dict_reset (set_dict);
+ if (ret)
+ goto out;
+ }
+
+out:
+ if (set_dict)
+ dict_unref (set_dict);
+ if (!valid_config)
+ ret = -EINVAL;
+ return ret;
+}
+
+/* builds a graph for nfs server role, with option overrides in mod_dict */
+int
+build_nfs_graph (volgen_graph_t *graph, dict_t *mod_dict)
+{
+ volgen_graph_t cgraph = {0,};
+ glusterd_volinfo_t *voliter = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ dict_t *set_dict = NULL;
+ xlator_t *nfsxl = NULL;
+ char *skey = NULL;
+ int ret = 0;
+ char nfs_xprt[16] = {0,};
+ char *volname = NULL;
+ data_t *data = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ set_dict = dict_new ();
+ if (!set_dict) {
+ gf_msg ("glusterd", GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY,
+ "Out of memory");
+ return -1;
+ }
+
+ nfsxl = volgen_graph_add_as (graph, "nfs/server", "nfs-server");
+ if (!nfsxl) {
+ ret = -1;
+ goto out;
+ }
+ ret = xlator_set_option (nfsxl, "nfs.dynamic-volumes", "on");
+ if (ret)
+ goto out;
+
+ ret = xlator_set_option (nfsxl, "nfs.nlm", "on");
+ if (ret)
+ goto out;
+
+ ret = xlator_set_option (nfsxl, "nfs.drc", "off");
+ if (ret)
+ goto out;
+
+ cds_list_for_each_entry (voliter, &priv->volumes, vol_list) {
+ if (voliter->status != GLUSTERD_STATUS_STARTED)
+ continue;
+
+ if (dict_get_str_boolean (voliter->dict, NFS_DISABLE_MAP_KEY, 0))
+ continue;
+
+ ret = gf_asprintf (&skey, "rpc-auth.addr.%s.allow",
+ voliter->volname);
+ if (ret == -1) {
+ gf_msg ("glusterd", GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY, "Out of memory");
+ goto out;
+ }
+ ret = xlator_set_option (nfsxl, skey, "*");
+ GF_FREE (skey);
+ if (ret)
+ goto out;
+
+ ret = gf_asprintf (&skey, "nfs3.%s.volume-id",
+ voliter->volname);
+ if (ret == -1) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_NO_MEMORY, "Out of memory");
+ goto out;
+ }
+ ret = xlator_set_option (nfsxl, skey, uuid_utoa (voliter->volume_id));
+ GF_FREE (skey);
+ if (ret)
+ goto out;
+
+ /* If both RDMA and TCP are the transport_type, use TCP for NFS
+ * client protocols, because tcp,rdma volume can be created in
+ * servers which does not have rdma supported hardware
+ * The transport type specified here is client transport type
+ * which is used for communication between gluster-nfs and brick
+ * processes.
+ * User can specify client transport for tcp,rdma volume using
+ * nfs.transport-type, if it is not set by user default
+ * one will be tcp.
+ */
+ memset (&cgraph, 0, sizeof (cgraph));
+ if (mod_dict)
+ get_transport_type (voliter, mod_dict, nfs_xprt, _gf_true);
+ else
+ get_transport_type (voliter, voliter->dict, nfs_xprt, _gf_true);
+
+ ret = dict_set_str (set_dict, "performance.stat-prefetch", "off");
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (set_dict, "performance.client-io-threads",
+ "off");
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (set_dict, "client-transport-type",
+ nfs_xprt);
+ if (ret)
+ goto out;
+
+ ret = dict_set_uint32 (set_dict, "trusted-client",
+ GF_CLIENT_TRUSTED);
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (set_dict, "nfs-volume-file", "yes");
+ if (ret)
+ goto out;
+
+ if (mod_dict && (data = dict_get (mod_dict, "volume-name"))) {
+ volname = data->data;
+ if (strcmp (volname, voliter->volname) == 0)
+ dict_copy (mod_dict, set_dict);
+ }
+
+ ret = build_client_graph (&cgraph, voliter, set_dict);
+ if (ret)
+ goto out;
+
+ if (mod_dict) {
+ dict_copy (mod_dict, set_dict);
+ ret = volgen_graph_set_options_generic (&cgraph, set_dict, voliter,
+ basic_option_handler);
+ } else {
+ ret = volgen_graph_set_options_generic (&cgraph, voliter->dict, voliter,
+ basic_option_handler);
+ }
+
+ if (ret)
+ goto out;
+
+ ret = volgen_graph_merge_sub (graph, &cgraph, 1);
+ if (ret)
+ goto out;
+ ret = dict_reset (set_dict);
+ if (ret)
+ goto out;
+ }
+
+ cds_list_for_each_entry (voliter, &priv->volumes, vol_list) {
+
+ if (mod_dict) {
+ ret = volgen_graph_set_options_generic (graph, mod_dict, voliter,
+ nfs_option_handler);
+ } else {
+ ret = volgen_graph_set_options_generic (graph, voliter->dict, voliter,
+ nfs_option_handler);
+ }
+
+ if (ret)
+ gf_msg ("glusterd", GF_LOG_WARNING, 0,
+ GD_MSG_GRAPH_SET_OPT_FAIL, "Could not set "
+ "vol-options for the volume %s", voliter->volname);
+ }
+
+ out:
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ dict_destroy (set_dict);
+
+ return ret;
+}
+
+/****************************
+ *
+ * Volume generation interface
+ *
+ ****************************/
+
+
+static void
+get_brick_filepath (char *filename, glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo, char *prefix)
+{
+ char path[PATH_MAX] = {0,};
+ char brick[PATH_MAX] = {0,};
+ glusterd_conf_t *priv = NULL;
+
+ priv = THIS->private;
+
+ GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, brick);
+ GLUSTERD_GET_VOLUME_DIR (path, volinfo, priv);
+
+ if (prefix)
+ snprintf (filename, PATH_MAX, "%s/%s.%s.%s.%s.vol",
+ path, volinfo->volname, prefix,
+ brickinfo->hostname, brick);
+ else
+ snprintf (filename, PATH_MAX, "%s/%s.%s.%s.vol",
+ path, volinfo->volname,
+ brickinfo->hostname, brick);
+}
+
+gf_boolean_t
+glusterd_is_valid_volfpath (char *volname, char *brick)
+{
+ char volfpath[PATH_MAX] = {0,};
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ int32_t ret = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = glusterd_brickinfo_new_from_brick (brick, &brickinfo, _gf_false,
+ NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_BRICKINFO_CREATE_FAIL,
+ "Failed to create brickinfo"
+ " for brick %s", brick );
+ ret = 0;
+ goto out;
+ }
+ ret = glusterd_volinfo_new (&volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_VOLINFO_STORE_FAIL,
+ "Failed to create volinfo");
+ ret = 0;
+ goto out;
+ }
+ strncpy (volinfo->volname, volname, strlen (volname));
+ get_brick_filepath (volfpath, volinfo, brickinfo, NULL);
+
+ ret = ((strlen(volfpath) < PATH_MAX) &&
+ strlen (strrchr(volfpath, '/')) < _POSIX_PATH_MAX);
+
+out:
+ if (brickinfo)
+ glusterd_brickinfo_delete (brickinfo);
+ if (volinfo)
+ glusterd_volinfo_unref (volinfo);
+ return ret;
+}
+
+static int
+glusterd_generate_brick_volfile (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ dict_t *mod_dict, void *data)
+{
+ volgen_graph_t graph = {0,};
+ char filename[PATH_MAX] = {0,};
+ int ret = -1;
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (brickinfo);
+
+ get_brick_filepath (filename, volinfo, brickinfo, NULL);
+
+ ret = build_server_graph (&graph, volinfo, mod_dict, brickinfo);
+ if (!ret)
+ ret = volgen_write_volfile (&graph, filename);
+
+ volgen_graph_free (&graph);
+
+ return ret;
+}
+
+int
+build_quotad_graph (volgen_graph_t *graph, dict_t *mod_dict)
+{
+ volgen_graph_t cgraph = {0};
+ glusterd_volinfo_t *voliter = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ dict_t *set_dict = NULL;
+ int ret = 0;
+ xlator_t *quotad_xl = NULL;
+ char *skey = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ set_dict = dict_new ();
+ if (!set_dict) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ quotad_xl = volgen_graph_add_as (graph, "features/quotad", "quotad");
+ if (!quotad_xl) {
+ ret = -1;
+ goto out;
+ }
+
+ cds_list_for_each_entry (voliter, &priv->volumes, vol_list) {
+ if (voliter->status != GLUSTERD_STATUS_STARTED)
+ continue;
+
+ if (1 != glusterd_is_volume_quota_enabled (voliter))
+ continue;
+
+ ret = dict_set_uint32 (set_dict, "trusted-client",
+ GF_CLIENT_TRUSTED);
+ if (ret)
+ goto out;
+
+ dict_copy (voliter->dict, set_dict);
+ if (mod_dict)
+ dict_copy (mod_dict, set_dict);
+
+ ret = gf_asprintf(&skey, "%s.volume-id", voliter->volname);
+ if (ret == -1) {
+ gf_msg ("glusterd", GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY, "Out of memory");
+ goto out;
+ }
+ ret = xlator_set_option(quotad_xl, skey, voliter->volname);
+ GF_FREE(skey);
+ if (ret)
+ goto out;
+
+ memset (&cgraph, 0, sizeof (cgraph));
+ ret = volgen_graph_build_clients (&cgraph, voliter, set_dict,
+ NULL);
+ if (ret)
+ goto out;
+
+ if (voliter->type == GF_CLUSTER_TYPE_TIER)
+ ret = volume_volgen_graph_build_clusters_tier
+ (&cgraph, voliter, _gf_true);
+ else
+ ret = volume_volgen_graph_build_clusters
+ (&cgraph, voliter, _gf_true);
+ if (ret) {
+ ret = -1;
+ goto out;
+ }
+
+ if (mod_dict) {
+ dict_copy (mod_dict, set_dict);
+ ret = volgen_graph_set_options_generic (&cgraph, set_dict,
+ voliter,
+ basic_option_handler);
+ } else {
+ ret = volgen_graph_set_options_generic (&cgraph,
+ voliter->dict,
+ voliter,
+ basic_option_handler);
+ }
+ if (ret)
+ goto out;
+
+ ret = volgen_graph_merge_sub (graph, &cgraph, 1);
+ if (ret)
+ goto out;
+
+ ret = dict_reset (set_dict);
+ if (ret)
+ goto out;
+ }
+
+out:
+ if (set_dict)
+ dict_unref (set_dict);
+ return ret;
+}
+
+static void
+get_vol_tstamp_file (char *filename, glusterd_volinfo_t *volinfo)
+{
+ glusterd_conf_t *priv = NULL;
+
+ priv = THIS->private;
+
+ GLUSTERD_GET_VOLUME_DIR (filename, volinfo, priv);
+ strncat (filename, "/marker.tstamp",
+ PATH_MAX - strlen(filename) - 1);
+}
+
+static void
+get_parent_vol_tstamp_file (char *filename, glusterd_volinfo_t *volinfo)
+{
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ snprintf (filename, PATH_MAX, "%s/vols/%s", priv->workdir,
+ volinfo->parent_volname);
+ strncat (filename, "/marker.tstamp",
+ PATH_MAX - strlen(filename) - 1);
+}
+
+void
+assign_jbr_uuids (glusterd_volinfo_t *volinfo)
+{
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int in_group = 0;
+ uuid_t tmp_uuid;
+
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ if (in_group == 0)
+ gf_uuid_generate(tmp_uuid);
+ gf_uuid_copy(brickinfo->jbr_uuid, tmp_uuid);
+ if (++in_group >= volinfo->replica_count)
+ in_group = 0;
+ }
+}
+
+int
+generate_brick_volfiles (glusterd_volinfo_t *volinfo)
+{
+ glusterd_brickinfo_t *brickinfo = NULL;
+ char tstamp_file[PATH_MAX] = {0,};
+ char parent_tstamp_file[PATH_MAX] = {0,};
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = glusterd_volinfo_get_boolean (volinfo, VKEY_MARKER_XTIME);
+ if (ret == -1)
+ return -1;
+
+ assign_brick_groups (volinfo);
+ get_vol_tstamp_file (tstamp_file, volinfo);
+
+ if (ret) {
+ ret = open (tstamp_file, O_WRONLY|O_CREAT|O_EXCL, 0600);
+ if (ret == -1 && errno == EEXIST) {
+ gf_msg_debug (this->name, 0,
+ "timestamp file exist");
+ ret = -2;
+ }
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED,
+ "failed to create "
+ "%s", tstamp_file);
+ return -1;
+ }
+ if (ret >= 0) {
+ sys_close (ret);
+ /* If snap_volume, retain timestamp for marker.tstamp
+ * from parent. Geo-replication depends on mtime of
+ * 'marker.tstamp' to decide the volume-mark, i.e.,
+ * geo-rep start time just after session is created.
+ */
+ if (volinfo->is_snap_volume) {
+ get_parent_vol_tstamp_file (parent_tstamp_file,
+ volinfo);
+ ret = gf_set_timestamp (parent_tstamp_file,
+ tstamp_file);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TSTAMP_SET_FAIL,
+ "Unable to set atime and mtime"
+ " of %s as of %s", tstamp_file,
+ parent_tstamp_file);
+ goto out;
+ }
+ }
+ }
+ } else {
+ ret = sys_unlink (tstamp_file);
+ if (ret == -1 && errno == ENOENT)
+ ret = 0;
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED,
+ "failed to unlink "
+ "%s", tstamp_file);
+ return -1;
+ }
+ }
+
+ if (glusterd_volinfo_get_boolean(volinfo, "cluster.jbr") > 0) {
+ assign_jbr_uuids(volinfo);
+ }
+
+ ret = glusterd_volume_brick_for_each (volinfo, NULL,
+ glusterd_generate_brick_volfile);
+ if (ret)
+ goto out;
+
+ ret = 0;
+
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+static int
+generate_single_transport_client_volfile (glusterd_volinfo_t *volinfo,
+ char *filepath, dict_t *dict)
+{
+ volgen_graph_t graph = {0,};
+ int ret = -1;
+
+ ret = build_client_graph (&graph, volinfo, dict);
+ if (!ret)
+ ret = volgen_write_volfile (&graph, filepath);
+
+ volgen_graph_free (&graph);
+
+ return ret;
+}
+
+int
+glusterd_generate_client_per_brick_volfile (glusterd_volinfo_t *volinfo)
+{
+ char filepath[PATH_MAX] = {0, };
+ glusterd_brickinfo_t *brick = NULL;
+ volgen_graph_t graph = {0, };
+ dict_t *dict = NULL;
+ xlator_t *xl = NULL;
+ int ret = -1;
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_set_uint32 (dict, "trusted-client", GF_CLIENT_TRUSTED);
+ if (ret)
+ goto out;
+
+ cds_list_for_each_entry (brick, &volinfo->bricks, brick_list) {
+ xl = volgen_graph_build_client (&graph, volinfo,
+ brick->hostname, brick->path,
+ brick->brick_id,
+ "tcp", dict);
+ if (!xl) {
+ ret = -1;
+ goto out;
+ }
+
+ get_brick_filepath (filepath, volinfo, brick, "client");
+ ret = volgen_write_volfile (&graph, filepath);
+ if (ret < 0)
+ goto out;
+
+ volgen_graph_free (&graph);
+ memset (&graph, 0, sizeof (graph));
+ }
+
+
+ ret = 0;
+out:
+ if (ret)
+ volgen_graph_free (&graph);
+
+ if (dict)
+ dict_unref (dict);
+
+ return ret;
+}
+
+static void
+enumerate_transport_reqs (gf_transport_type type, char **types)
+{
+ switch (type) {
+ case GF_TRANSPORT_TCP:
+ types[0] = "tcp";
+ break;
+ case GF_TRANSPORT_RDMA:
+ types[0] = "rdma";
+ break;
+ case GF_TRANSPORT_BOTH_TCP_RDMA:
+ types[0] = "tcp";
+ types[1] = "rdma";
+ break;
+ }
+}
+
+int
+generate_dummy_client_volfiles (glusterd_volinfo_t *volinfo)
+{
+ int i = 0;
+ int ret = -1;
+ char filepath[PATH_MAX] = {0,};
+ char *types[] = {NULL, NULL, NULL};
+ dict_t *dict = NULL;
+ xlator_t *this = NULL;
+ gf_transport_type type = GF_TRANSPORT_TCP;
+
+ this = THIS;
+
+ enumerate_transport_reqs (volinfo->transport_type, types);
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+ for (i = 0; types[i]; i++) {
+ memset (filepath, 0, sizeof (filepath));
+ ret = dict_set_str (dict, "client-transport-type", types[i]);
+ if (ret)
+ goto out;
+ type = transport_str_to_type (types[i]);
+
+ ret = dict_set_uint32 (dict, "trusted-client", GF_CLIENT_OTHER);
+ if (ret)
+ goto out;
+
+ ret = glusterd_get_dummy_client_filepath (filepath,
+ volinfo, type);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY,
+ "Received invalid transport-type.");
+ goto out;
+ }
+
+ ret = generate_single_transport_client_volfile (volinfo,
+ filepath,
+ dict);
+ if (ret)
+ goto out;
+ }
+
+out:
+ if (dict)
+ dict_unref (dict);
+
+ gf_msg_trace ("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+generate_client_volfiles (glusterd_volinfo_t *volinfo,
+ glusterd_client_type_t client_type)
+{
+ int i = 0;
+ int ret = -1;
+ char filepath[PATH_MAX] = {0,};
+ char *types[] = {NULL, NULL, NULL};
+ dict_t *dict = NULL;
+ xlator_t *this = NULL;
+ gf_transport_type type = GF_TRANSPORT_TCP;
+
+ this = THIS;
+
+ enumerate_transport_reqs (volinfo->transport_type, types);
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+ for (i = 0; types[i]; i++) {
+ memset (filepath, 0, sizeof (filepath));
+ ret = dict_set_str (dict, "client-transport-type", types[i]);
+ if (ret)
+ goto out;
+ type = transport_str_to_type (types[i]);
+
+ ret = dict_set_uint32 (dict, "trusted-client", client_type);
+ if (ret)
+ goto out;
+
+ if (client_type == GF_CLIENT_TRUSTED) {
+ ret = glusterd_get_trusted_client_filepath (filepath,
+ volinfo,
+ type);
+ } else {
+ ret = glusterd_get_client_filepath (filepath,
+ volinfo,
+ type);
+ }
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY,
+ "Received invalid transport-type");
+ goto out;
+ }
+
+ ret = generate_single_transport_client_volfile (volinfo,
+ filepath,
+ dict);
+ if (ret)
+ goto out;
+ }
+
+ /* Generate volfile for rebalance process */
+ glusterd_get_rebalance_volfile (volinfo, filepath, PATH_MAX);
+ ret = build_rebalance_volfile (volinfo, filepath, dict);
+
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLFILE_CREATE_FAIL,
+ "Failed to create rebalance volfile for %s",
+ volinfo->volname);
+ goto out;
+ }
+
+out:
+ if (dict)
+ dict_unref (dict);
+
+ gf_msg_trace ("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_snapdsvc_generate_volfile (volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo)
+{
+ xlator_t *xl = NULL;
+ char *username = NULL;
+ char *passwd = NULL;
+ int ret = 0;
+ char key [PATH_MAX] = {0, };
+ dict_t *set_dict = NULL;
+ char *loglevel = NULL;
+ char *xlator = NULL;
+ char *value = NULL;
+ char auth_path[] = "auth-path";
+
+ set_dict = dict_copy (volinfo->dict, NULL);
+ if (!set_dict)
+ return -1;
+
+ ret = dict_get_str (set_dict, "xlator", &xlator);
+ if (!ret) {
+ ret = dict_get_str (set_dict, "loglevel", &loglevel);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED, "could not get both"
+ " translator name and loglevel for log level "
+ "request");
+ return -1;
+ }
+ }
+
+ xl = volgen_graph_add (graph, "features/snapview-server",
+ volinfo->volname);
+ if (!xl)
+ return -1;
+
+ ret = xlator_set_option (xl, "volname", volinfo->volname);
+ if (ret)
+ return -1;
+
+ xl = volgen_graph_add (graph, "performance/io-threads",
+ volinfo->volname);
+ if (!xl)
+ return -1;
+
+ snprintf (key, sizeof (key), "snapd-%s", volinfo->volname);
+ xl = volgen_graph_add_as (graph, "debug/io-stats", key);
+ if (!xl)
+ return -1;
+
+ xl = volgen_graph_add (graph, "protocol/server", volinfo->volname);
+ if (!xl)
+ return -1;
+
+ ret = xlator_set_option (xl, "transport-type", "tcp");
+ if (ret)
+ return -1;
+
+ RPC_SET_OPT(xl, SSL_OWN_CERT_OPT, "ssl-own-cert", return -1);
+ RPC_SET_OPT(xl, SSL_PRIVATE_KEY_OPT,"ssl-private-key", return -1);
+ RPC_SET_OPT(xl, SSL_CA_LIST_OPT, "ssl-ca-list", return -1);
+ RPC_SET_OPT(xl, SSL_CRL_PATH_OPT, "ssl-crl-path", return -1);
+ RPC_SET_OPT(xl, SSL_CERT_DEPTH_OPT, "ssl-cert-depth", return -1);
+ RPC_SET_OPT(xl, SSL_CIPHER_LIST_OPT,"ssl-cipher-list", return -1);
+ RPC_SET_OPT(xl, SSL_DH_PARAM_OPT, "ssl-dh-param", return -1);
+ RPC_SET_OPT(xl, SSL_EC_CURVE_OPT, "ssl-ec-curve", return -1);
+
+ username = glusterd_auth_get_username (volinfo);
+ passwd = glusterd_auth_get_password (volinfo);
+
+ snprintf (key, sizeof (key), "auth.login.snapd-%s.allow",
+ volinfo->volname);
+ ret = xlator_set_option (xl, key, username);
+ if (ret)
+ return -1;
+
+ snprintf (key, sizeof (key), "auth.login.%s.password", username);
+ ret = xlator_set_option (xl, key, passwd);
+ if (ret)
+ return -1;
+
+ snprintf (key, sizeof (key), "snapd-%s", volinfo->volname);
+ ret = xlator_set_option (xl, auth_path, key);
+ if (ret)
+ return -1;
+
+ ret = volgen_graph_set_options_generic
+ (graph, set_dict,
+ (xlator && loglevel)? (void *)set_dict: volinfo,
+ (xlator && loglevel) ?
+ &server_spec_extended_option_handler:
+ &server_spec_option_handler);
+
+ return ret;
+}
+
+static int
+prepare_bitrot_scrub_volume_options (glusterd_volinfo_t *volinfo,
+ dict_t *mod_dict, dict_t *set_dict)
+{
+ int ret = 0;
+
+
+ ret = dict_set_uint32 (set_dict, "trusted-client", GF_CLIENT_TRUSTED);
+ if (ret)
+ goto out;
+
+ dict_copy (volinfo->dict, set_dict);
+ if (mod_dict)
+ dict_copy (mod_dict, set_dict);
+
+out:
+ return ret;
+}
+
+static int
+build_bitd_clusters (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *set_dict, int brick_count, unsigned int numbricks)
+{
+ int ret = -1;
+ int clusters = 0;
+ xlator_t *xl = NULL;
+ char *brick_hint = NULL;
+ char *bitrot_args[] = {"features/bit-rot",
+ "%s-bit-rot-%d"};
+
+ ret = volgen_link_bricks_from_list_tail (graph, volinfo, bitrot_args[0],
+ bitrot_args[1], brick_count,
+ brick_count);
+ clusters = ret;
+
+ xl = first_of (graph);
+
+ ret = gf_asprintf (&brick_hint, "%d", numbricks);
+ if (ret < 0)
+ goto out;
+
+ ret = xlator_set_option (xl, "brick-count", brick_hint);
+ if (ret)
+ goto out;
+
+ ret = clusters;
+
+out:
+ return ret;
+}
+
+static int
+build_bitd_volume_graph (volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo, dict_t *mod_dict,
+ unsigned int numbricks)
+{
+ volgen_graph_t cgraph = {0};
+ xlator_t *this = NULL;
+ xlator_t *xl = NULL;
+ dict_t *set_dict = NULL;
+ glusterd_conf_t *priv = NULL;
+ int ret = 0;
+ int clusters = -1;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int brick_count = 0;
+ char transt[16] = {0,};
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ set_dict = dict_new ();
+ if (!set_dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = prepare_bitrot_scrub_volume_options (volinfo, mod_dict, set_dict);
+ if (ret)
+ goto out;
+
+ get_transport_type (volinfo, set_dict, transt, _gf_false);
+ if (!strncmp (transt, "tcp,rdma", strlen ("tcp,rdma")))
+ strncpy (transt, "tcp", strlen ("tcp"));
+
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ if (!glusterd_is_local_brick (this, volinfo, brickinfo))
+ continue;
+
+ xl = volgen_graph_build_client (&cgraph, volinfo,
+ brickinfo->hostname,
+ brickinfo->path,
+ brickinfo->brick_id,
+ transt, set_dict);
+ if (!xl) {
+ ret = -1;
+ goto out;
+ }
+ brick_count++;
+ }
+
+ if (brick_count == 0) {
+ ret = 0;
+ goto out;
+ }
+
+ clusters = build_bitd_clusters (&cgraph, volinfo, set_dict, brick_count,
+ numbricks);
+ if (clusters < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = volgen_graph_set_options_generic (&cgraph, set_dict,
+ volinfo,
+ bitrot_option_handler);
+ if (ret)
+ goto out;
+
+ ret = volgen_graph_merge_sub (graph, &cgraph, clusters);
+ if (ret)
+ goto out;
+
+ ret = graph_set_generic_options (this, graph, set_dict, "Bitrot");
+
+out:
+ if (set_dict)
+ dict_unref (set_dict);
+
+ return ret;
+}
+
+int
+build_bitd_graph (volgen_graph_t *graph, dict_t *mod_dict)
+{
+ glusterd_volinfo_t *voliter = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ int ret = 0;
+ gf_boolean_t valid_config = _gf_false;
+ xlator_t *iostxl = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ unsigned int numbricks = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ iostxl = volgen_graph_add_as (graph, "debug/io-stats", "bitd");
+ if (!iostxl) {
+ ret = -1;
+ goto out;
+ }
+
+ /* TODO: do way with this extra loop _if possible_ */
+ cds_list_for_each_entry (voliter, &priv->volumes, vol_list) {
+ if (voliter->status != GLUSTERD_STATUS_STARTED)
+ continue;
+ if (!glusterd_is_bitrot_enabled (voliter))
+ continue;
+
+ cds_list_for_each_entry (brickinfo,
+ &voliter->bricks, brick_list) {
+ if (!glusterd_is_local_brick (this, voliter, brickinfo))
+ continue;
+ numbricks++;
+ }
+ }
+
+ cds_list_for_each_entry (voliter, &priv->volumes, vol_list) {
+ if (voliter->status != GLUSTERD_STATUS_STARTED)
+ continue;
+
+ if (!glusterd_is_bitrot_enabled (voliter))
+ continue;
+
+ ret = build_bitd_volume_graph (graph, voliter,
+ mod_dict, numbricks);
+ }
+out:
+ return ret;
+}
+
+static int
+build_scrub_clusters (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *set_dict, int brick_count)
+{
+ int ret = -1;
+ int clusters = 0;
+ xlator_t *xl = NULL;
+ char *scrub_args[] = {"features/bit-rot",
+ "%s-bit-rot-%d"};
+
+ ret = volgen_link_bricks_from_list_tail (graph, volinfo, scrub_args[0],
+ scrub_args[1], brick_count,
+ brick_count);
+ clusters = ret;
+
+ xl = first_of (graph);
+
+
+ ret = xlator_set_option (xl, "scrubber", "true");
+ if (ret)
+ goto out;
+
+ ret = clusters;
+
+out:
+ return ret;
+}
+
+static int
+build_scrub_volume_graph (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *mod_dict)
+{
+ volgen_graph_t cgraph = {0};
+ dict_t *set_dict = NULL;
+ xlator_t *this = NULL;
+ xlator_t *xl = NULL;
+ glusterd_conf_t *priv = NULL;
+ int ret = 0;
+ int clusters = -1;
+ int brick_count = 0;
+ char transt[16] = {0,};
+ glusterd_brickinfo_t *brickinfo = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ set_dict = dict_new ();
+ if (!set_dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = prepare_bitrot_scrub_volume_options (volinfo, mod_dict, set_dict);
+ if (ret)
+ goto out;
+
+ get_transport_type (volinfo, set_dict, transt, _gf_false);
+ if (!strncmp (transt, "tcp,rdma", strlen ("tcp,rdma")))
+ strncpy (transt, "tcp", strlen ("tcp"));
+
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ if (!glusterd_is_local_brick (this, volinfo, brickinfo))
+ continue;
+
+ xl = volgen_graph_build_client (&cgraph, volinfo,
+ brickinfo->hostname,
+ brickinfo->path,
+ brickinfo->brick_id,
+ transt, set_dict);
+ if (!xl) {
+ ret = -1;
+ goto out;
+ }
+ brick_count++;
+ }
+
+ if (brick_count == 0) {
+ ret = 0;
+ goto out;
+ }
+
+ clusters = build_scrub_clusters (&cgraph, volinfo, set_dict,
+ brick_count);
+ if (clusters < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = volgen_graph_set_options_generic (&cgraph, set_dict,
+ volinfo,
+ scrubber_option_handler);
+ if (ret)
+ goto out;
+
+ ret = volgen_graph_merge_sub (graph, &cgraph, clusters);
+ if (ret)
+ goto out;
+
+ ret = graph_set_generic_options (this, graph, set_dict, "Scrubber");
+out:
+ if (set_dict)
+ dict_unref (set_dict);
+
+ return ret;
+}
+
+int
+build_scrub_graph (volgen_graph_t *graph, dict_t *mod_dict)
+{
+ glusterd_volinfo_t *voliter = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ int ret = 0;
+ gf_boolean_t valid_config = _gf_false;
+ xlator_t *iostxl = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ iostxl = volgen_graph_add_as (graph, "debug/io-stats", "scrub");
+ if (!iostxl) {
+ ret = -1;
+ goto out;
+ }
+
+ cds_list_for_each_entry (voliter, &priv->volumes, vol_list) {
+ if (voliter->status != GLUSTERD_STATUS_STARTED)
+ continue;
+
+ if (!glusterd_is_bitrot_enabled (voliter))
+ continue;
+
+ ret = build_scrub_volume_graph (graph, voliter, mod_dict);
+ }
+out:
+ return ret;
+}
+
+int
+glusterd_snapdsvc_create_volfile (glusterd_volinfo_t *volinfo)
+{
+ volgen_graph_t graph = {0,};
+ int ret = -1;
+ char filename [PATH_MAX] = {0,};
+
+ glusterd_svc_build_snapd_volfile (volinfo, filename, PATH_MAX);
+
+ ret = glusterd_snapdsvc_generate_volfile (&graph, volinfo);
+ if (!ret)
+ ret = volgen_write_volfile (&graph, filename);
+
+ volgen_graph_free (&graph);
+
+ return ret;
+}
+
+int
+glusterd_create_rb_volfiles (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo)
+{
+ int ret = -1;
+
+ ret = glusterd_generate_brick_volfile (volinfo, brickinfo, NULL, NULL);
+ if (!ret)
+ ret = generate_client_volfiles (volinfo, GF_CLIENT_TRUSTED);
+ if (!ret)
+ ret = glusterd_fetchspec_notify (THIS);
+
+ return ret;
+}
+
+int
+glusterd_create_volfiles (glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ ret = generate_brick_volfiles (volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLFILE_CREATE_FAIL,
+ "Could not generate volfiles for bricks");
+ goto out;
+ }
+
+ ret = generate_client_volfiles (volinfo, GF_CLIENT_TRUSTED);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLFILE_CREATE_FAIL,
+ "Could not generate trusted client volfiles");
+ goto out;
+ }
+
+ ret = generate_client_volfiles (volinfo, GF_CLIENT_OTHER);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLFILE_CREATE_FAIL,
+ "Could not generate client volfiles");
+
+out:
+ return ret;
+}
+
+int
+glusterd_create_volfiles_and_notify_services (glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ ret = glusterd_create_volfiles (volinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_fetchspec_notify (this);
+
+out:
+ return ret;
+}
+
+int
+glusterd_create_global_volfile (glusterd_graph_builder_t builder,
+ char *filepath, dict_t *mod_dict)
+{
+ volgen_graph_t graph = {0,};
+ int ret = -1;
+
+ ret = builder (&graph, mod_dict);
+ if (!ret)
+ ret = volgen_write_volfile (&graph, filepath);
+
+ volgen_graph_free (&graph);
+
+ return ret;
+}
+
+int
+glusterd_delete_volfile (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo)
+{
+ int ret = 0;
+ char filename[PATH_MAX] = {0,};
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (brickinfo);
+
+ get_brick_filepath (filename, volinfo, brickinfo, NULL);
+ ret = sys_unlink (filename);
+ if (ret)
+ gf_msg ("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED,
+ "failed to delete file: %s",
+ filename);
+ return ret;
+}
+
+int
+validate_shdopts (glusterd_volinfo_t *volinfo,
+ dict_t *val_dict,
+ char **op_errstr)
+{
+ volgen_graph_t graph = {0,};
+ int ret = -1;
+
+ graph.errstr = op_errstr;
+
+ if (!glusterd_is_shd_compatible_volume (volinfo)) {
+ ret = 0;
+ goto out;
+ }
+ ret = dict_set_str (val_dict, "graph-check", "on");
+ if (ret)
+ goto out;
+ ret = build_shd_graph (&graph, val_dict);
+ if (!ret)
+ ret = graph_reconf_validateopt (&graph.graph, op_errstr);
+
+ volgen_graph_free (&graph);
+
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+out:
+ dict_del (val_dict, "graph-check");
+ return ret;
+}
+
+int
+validate_nfsopts (glusterd_volinfo_t *volinfo,
+ dict_t *val_dict,
+ char **op_errstr)
+{
+ volgen_graph_t graph = {0,};
+ int ret = -1;
+ char transport_type[16] = {0,};
+ char *tt = NULL;
+ char err_str[4096] = {0,};
+ xlator_t *this = THIS;
+
+ GF_ASSERT (this);
+
+ graph.errstr = op_errstr;
+
+ get_vol_transport_type (volinfo, transport_type);
+ ret = dict_get_str (val_dict, "nfs.transport-type", &tt);
+ if (!ret) {
+ if (volinfo->transport_type != GF_TRANSPORT_BOTH_TCP_RDMA) {
+ snprintf (err_str, sizeof (err_str), "Changing nfs "
+ "transport type is allowed only for volumes "
+ "of transport type tcp,rdma");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_OP_UNSUPPORTED, "%s", err_str);
+ *op_errstr = gf_strdup (err_str);
+ ret = -1;
+ goto out;
+ }
+ if (strcmp (tt,"tcp") && strcmp (tt,"rdma")) {
+ snprintf (err_str, sizeof (err_str), "wrong transport "
+ "type %s", tt);
+ *op_errstr = gf_strdup (err_str);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = dict_set_str (val_dict, "volume-name", volinfo->volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_SET_FAILED, "Failed to set volume name");
+ goto out;
+ }
+
+ ret = build_nfs_graph (&graph, val_dict);
+ if (!ret)
+ ret = graph_reconf_validateopt (&graph.graph, op_errstr);
+
+ volgen_graph_free (&graph);
+
+out:
+ if (dict_get (val_dict, "volume-name"))
+ dict_del (val_dict, "volume-name");
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+
+int
+validate_clientopts (glusterd_volinfo_t *volinfo,
+ dict_t *val_dict,
+ char **op_errstr)
+{
+ volgen_graph_t graph = {0,};
+ int ret = -1;
+
+ GF_ASSERT (volinfo);
+
+ graph.errstr = op_errstr;
+
+ ret = build_client_graph (&graph, volinfo, val_dict);
+ if (!ret)
+ ret = graph_reconf_validateopt (&graph.graph, op_errstr);
+
+ volgen_graph_free (&graph);
+
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+validate_brickopts (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo, dict_t *mod_dict,
+ void *reconf)
+{
+ volgen_graph_t graph = {0,};
+ int ret = -1;
+ struct gd_validate_reconf_opts *brickreconf = reconf;
+ dict_t *val_dict = brickreconf->options;
+ char **op_errstr = brickreconf->op_errstr;
+ dict_t *full_dict = NULL;
+
+ GF_ASSERT (volinfo);
+
+ graph.errstr = op_errstr;
+ full_dict = dict_new();
+ if (!full_dict) {
+ ret = -1;
+ goto out;
+ }
+
+ if (mod_dict)
+ dict_copy (mod_dict, full_dict);
+
+ if (val_dict)
+ dict_copy (val_dict, full_dict);
+
+
+ ret = build_server_graph (&graph, volinfo, full_dict, brickinfo);
+ if (!ret)
+ ret = graph_reconf_validateopt (&graph.graph, op_errstr);
+
+ volgen_graph_free (&graph);
+
+out:
+ if (full_dict)
+ dict_unref (full_dict);
+
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_validate_brickreconf (glusterd_volinfo_t *volinfo,
+ dict_t *val_dict,
+ char **op_errstr)
+{
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int ret = -1;
+ struct gd_validate_reconf_opts brickreconf = {0};
+
+ brickreconf.options = val_dict;
+ brickreconf.op_errstr = op_errstr;
+ ret = glusterd_volume_brick_for_each (volinfo, &brickreconf,
+ validate_brickopts);
+ return ret;
+}
+
+static int
+_check_globalopt (dict_t *this, char *key, data_t *value, void *ret_val)
+{
+ int *ret = NULL;
+
+ ret = ret_val;
+ if (*ret)
+ return 0;
+ if (!glusterd_check_globaloption (key))
+ *ret = 1;
+
+ return 0;
+}
+
+int
+glusterd_validate_globalopts (glusterd_volinfo_t *volinfo,
+ dict_t *val_dict, char **op_errstr)
+{
+ int ret = 0;
+
+ dict_foreach (val_dict, _check_globalopt, &ret);
+ if (ret) {
+ *op_errstr = gf_strdup ( "option specified is not a global option");
+ return -1;
+ }
+ ret = glusterd_validate_brickreconf (volinfo, val_dict, op_errstr);
+
+ if (ret) {
+ gf_msg_debug ("glusterd", 0,
+ "Could not Validate bricks");
+ goto out;
+ }
+
+ ret = validate_clientopts (volinfo, val_dict, op_errstr);
+ if (ret) {
+ gf_msg_debug ("glusterd", 0,
+ "Could not Validate client");
+ goto out;
+ }
+
+ ret = validate_nfsopts (volinfo, val_dict, op_errstr);
+ if (ret) {
+ gf_msg_debug ("glusterd", 0, "Could not Validate nfs");
+ goto out;
+ }
+
+ ret = validate_shdopts (volinfo, val_dict, op_errstr);
+ if (ret) {
+ gf_msg_debug ("glusterd", 0, "Could not Validate self-heald");
+ goto out;
+ }
+
+out:
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+static int
+_check_localopt (dict_t *this, char *key, data_t *value, void *ret_val)
+{
+ int *ret = NULL;
+
+ ret = ret_val;
+ if (*ret)
+ return 0;
+ if (!glusterd_check_localoption (key))
+ *ret = 1;
+
+ return 0;
+}
+
+int
+glusterd_validate_reconfopts (glusterd_volinfo_t *volinfo, dict_t *val_dict,
+ char **op_errstr)
+{
+ int ret = 0;
+
+ dict_foreach (val_dict, _check_localopt, &ret);
+ if (ret) {
+ *op_errstr = gf_strdup ( "option specified is not a local option");
+ return -1;
+ }
+ ret = glusterd_validate_brickreconf (volinfo, val_dict, op_errstr);
+
+ if (ret) {
+ gf_msg_debug ("glusterd", 0,
+ "Could not Validate bricks");
+ goto out;
+ }
+
+ ret = validate_clientopts (volinfo, val_dict, op_errstr);
+ if (ret) {
+ gf_msg_debug ("glusterd", 0,
+ "Could not Validate client");
+ goto out;
+ }
+
+ ret = validate_nfsopts (volinfo, val_dict, op_errstr);
+ if (ret) {
+ gf_msg_debug ("glusterd", 0, "Could not Validate nfs");
+ goto out;
+ }
+
+
+ ret = validate_shdopts (volinfo, val_dict, op_errstr);
+ if (ret) {
+ gf_msg_debug ("glusterd", 0, "Could not Validate self-heald");
+ goto out;
+ }
+
+
+out:
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+static struct volopt_map_entry *
+_gd_get_vmep (char *key) {
+ char *completion = NULL;
+ struct volopt_map_entry *vmep = NULL;
+ int ret = 0;
+
+ COMPLETE_OPTION ((char *)key, completion, ret);
+ for (vmep = glusterd_volopt_map; vmep->key; vmep++) {
+ if (strcmp (vmep->key, key) == 0)
+ return vmep;
+ }
+
+ return NULL;
+}
+
+uint32_t
+glusterd_get_op_version_for_key (char *key)
+{
+ struct volopt_map_entry *vmep = NULL;
+
+ GF_ASSERT (key);
+
+ vmep = _gd_get_vmep (key);
+ if (vmep)
+ return vmep->op_version;
+
+ return 0;
+}
+
+gf_boolean_t
+gd_is_client_option (char *key)
+{
+ struct volopt_map_entry *vmep = NULL;
+
+ GF_ASSERT (key);
+
+ vmep = _gd_get_vmep (key);
+ if (vmep && (vmep->flags & OPT_FLAG_CLIENT_OPT))
+ return _gf_true;
+
+ return _gf_false;
+}
+
+gf_boolean_t
+gd_is_xlator_option (char *key)
+{
+ struct volopt_map_entry *vmep = NULL;
+
+ GF_ASSERT (key);
+
+ vmep = _gd_get_vmep (key);
+ if (vmep && (vmep->flags & OPT_FLAG_XLATOR_OPT))
+ return _gf_true;
+
+ return _gf_false;
+}
+
+volume_option_type_t
+_gd_get_option_type (char *key)
+{
+ struct volopt_map_entry *vmep = NULL;
+ void *dl_handle = NULL;
+ volume_opt_list_t vol_opt_list = {{0},};
+ int ret = -1;
+ volume_option_t *opt = NULL;
+ char *xlopt_key = NULL;
+ volume_option_type_t opt_type = GF_OPTION_TYPE_MAX;
+
+ GF_ASSERT (key);
+
+ vmep = _gd_get_vmep (key);
+
+ if (vmep) {
+ CDS_INIT_LIST_HEAD (&vol_opt_list.list);
+ ret = xlator_volopt_dynload (vmep->voltype, &dl_handle,
+ &vol_opt_list);
+ if (ret)
+ goto out;
+
+ if (_get_xlator_opt_key_from_vme (vmep, &xlopt_key))
+ goto out;
+
+ opt = xlator_volume_option_get_list (&vol_opt_list, xlopt_key);
+ _free_xlator_opt_key (xlopt_key);
+
+ if (opt)
+ opt_type = opt->type;
+ }
+
+out:
+ if (dl_handle) {
+ dlclose (dl_handle);
+ dl_handle = NULL;
+ }
+
+ return opt_type;
+}
+
+gf_boolean_t
+gd_is_boolean_option (char *key)
+{
+ GF_ASSERT (key);
+
+ if (GF_OPTION_TYPE_BOOL == _gd_get_option_type (key))
+ return _gf_true;
+
+ return _gf_false;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.h b/xlators/mgmt/glusterd/src/glusterd-volgen.h
new file mode 100644
index 00000000000..f90177372dc
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.h
@@ -0,0 +1,295 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _GLUSTERD_VOLGEN_H_
+#define _GLUSTERD_VOLGEN_H_
+
+#if (HAVE_LIB_XML)
+#include <libxml/encoding.h>
+#include <libxml/xmlwriter.h>
+#endif
+
+#include "glusterd.h"
+#include "glusterd-messages.h"
+
+/* volopt map key name definitions */
+
+#define VKEY_DIAG_CNT_FOP_HITS "diagnostics.count-fop-hits"
+#define VKEY_DIAG_LAT_MEASUREMENT "diagnostics.latency-measurement"
+#define VKEY_FEATURES_LIMIT_USAGE "features.limit-usage"
+#define VKEY_FEATURES_SOFT_LIMIT "features.soft-limit"
+#define VKEY_MARKER_XTIME GEOREP".indexing"
+#define VKEY_MARKER_XTIME_FORCE GEOREP".ignore-pid-check"
+#define VKEY_CHANGELOG "changelog.changelog"
+#define VKEY_FEATURES_QUOTA "features.quota"
+#define VKEY_FEATURES_INODE_QUOTA "features.inode-quota"
+#define VKEY_FEATURES_TRASH "features.trash"
+#define VKEY_FEATURES_BITROT "features.bitrot"
+#define VKEY_FEATURES_SCRUB "features.scrub"
+
+#define AUTH_ALLOW_MAP_KEY "auth.allow"
+#define AUTH_REJECT_MAP_KEY "auth.reject"
+#define NFS_DISABLE_MAP_KEY "nfs.disable"
+#define AUTH_ALLOW_OPT_KEY "auth.addr.*.allow"
+#define AUTH_REJECT_OPT_KEY "auth.addr.*.reject"
+#define NFS_DISABLE_OPT_KEY "nfs.*.disable"
+
+#define SSL_OWN_CERT_OPT "ssl.own-cert"
+#define SSL_PRIVATE_KEY_OPT "ssl.private-key"
+#define SSL_CA_LIST_OPT "ssl.ca-list"
+#define SSL_CRL_PATH_OPT "ssl.crl-path"
+#define SSL_CERT_DEPTH_OPT "ssl.certificate-depth"
+#define SSL_CIPHER_LIST_OPT "ssl.cipher-list"
+#define SSL_DH_PARAM_OPT "ssl.dh-param"
+#define SSL_EC_CURVE_OPT "ssl.ec-curve"
+
+
+typedef enum {
+ GF_CLIENT_TRUSTED,
+ GF_CLIENT_OTHER
+} glusterd_client_type_t;
+
+struct volgen_graph {
+ char **errstr;
+ glusterfs_graph_t graph;
+};
+typedef struct volgen_graph volgen_graph_t;
+
+typedef int (*glusterd_graph_builder_t) (volgen_graph_t *graph,
+ dict_t *mod_dict);
+
+#define COMPLETE_OPTION(key, completion, ret) \
+ do { \
+ if (!strchr (key, '.')) { \
+ ret = option_complete (key, &completion); \
+ if (ret) { \
+ gf_msg ("", GF_LOG_ERROR, ENOMEM, \
+ GD_MSG_NO_MEMORY, "Out of memory"); \
+ return _gf_false; \
+ } \
+ \
+ if (!completion) { \
+ gf_msg ("", GF_LOG_ERROR, 0, \
+ GD_MSG_INVALID_ENTRY, \
+ "option %s does not" \
+ "exist", key); \
+ return _gf_false; \
+ } \
+ } \
+ \
+ if (completion) \
+ GF_FREE (completion); \
+ } while (0);
+
+typedef enum gd_volopt_flags_ {
+ OPT_FLAG_NONE,
+ OPT_FLAG_FORCE = 0x01, // option needs force to be reset
+ OPT_FLAG_XLATOR_OPT = 0x02, // option enables/disables xlators
+ OPT_FLAG_CLIENT_OPT = 0x04, // option affects clients
+ OPT_FLAG_NEVER_RESET = 0x08, /* option which should not be reset */
+} gd_volopt_flags_t;
+
+typedef enum {
+ GF_XLATOR_POSIX = 0,
+ GF_XLATOR_ACL,
+ GF_XLATOR_LOCKS,
+ GF_XLATOR_LEASES,
+ GF_XLATOR_UPCALL,
+ GF_XLATOR_IOT,
+ GF_XLATOR_INDEX,
+ GF_XLATOR_MARKER,
+ GF_XLATOR_IO_STATS,
+ GF_XLATOR_BD,
+ GF_XLATOR_SERVER,
+ GF_XLATOR_NONE,
+} glusterd_server_xlator_t;
+
+/* As of now debug xlators can be loaded only below fuse in the client
+ * graph via cli. More xlators can be added below when the cli option
+ * for adding debug xlators anywhere in the client graph has to be made
+ * available.
+ */
+typedef enum {
+ GF_CLNT_XLATOR_FUSE = 0,
+ GF_CLNT_XLATOR_NONE,
+} glusterd_client_xlator_t;
+
+typedef enum { DOC, NO_DOC, GLOBAL_DOC, GLOBAL_NO_DOC } option_type_t;
+
+typedef int (*vme_option_validation) (glusterd_volinfo_t *volinfo, dict_t *dict,
+ char *key, char *value, char **op_errstr);
+
+struct volopt_map_entry {
+ char *key;
+ char *voltype;
+ char *option;
+ char *value;
+ option_type_t type;
+ uint32_t flags;
+ uint32_t op_version;
+ char *description;
+ vme_option_validation validate_fn;
+ /* If client_option is true, the option affects clients.
+ * this is used to calculate client-op-version of volumes
+ */
+ //gf_boolean_t client_option;
+};
+
+typedef
+int (*brick_xlator_builder) (volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo, dict_t *set_dict,
+ glusterd_brickinfo_t *brickinfo);
+
+struct volgen_brick_xlator {
+ /* function that builds a xlator */
+ brick_xlator_builder builder;
+ /* debug key for a xlator that
+ * gets used for adding debug translators like trace, error-gen
+ * before this xlator */
+ char *dbg_key;
+};
+typedef struct volgen_brick_xlator volgen_brick_xlator_t;
+
+int
+glusterd_snapdsvc_create_volfile (glusterd_volinfo_t *volinfo);
+
+int
+glusterd_snapdsvc_generate_volfile (volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo);
+
+int
+glusterd_create_global_volfile (glusterd_graph_builder_t builder,
+ char *filepath, dict_t *mod_dict);
+
+int
+glusterd_create_rb_volfiles (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo);
+
+int
+glusterd_create_volfiles (glusterd_volinfo_t *volinfo);
+
+int
+glusterd_create_volfiles_and_notify_services (glusterd_volinfo_t *volinfo);
+
+int
+glusterd_generate_client_per_brick_volfile (glusterd_volinfo_t *volinfo);
+
+void
+glusterd_get_nfs_filepath (char *filename);
+
+void
+glusterd_get_shd_filepath (char *filename);
+
+int
+build_shd_graph (volgen_graph_t *graph, dict_t *mod_dict);
+
+int
+build_nfs_graph (volgen_graph_t *graph, dict_t *mod_dict);
+
+int
+build_quotad_graph (volgen_graph_t *graph, dict_t *mod_dict);
+
+int
+build_bitd_graph (volgen_graph_t *graph, dict_t *mod_dict);
+
+int
+build_scrub_graph (volgen_graph_t *graph, dict_t *mod_dict);
+
+int
+glusterd_delete_volfile (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo);
+int
+glusterd_delete_snap_volfile (glusterd_volinfo_t *volinfo,
+ glusterd_volinfo_t *snap_volinfo,
+ glusterd_brickinfo_t *brickinfo);
+
+int
+glusterd_volinfo_get (glusterd_volinfo_t *volinfo, char *key, char **value);
+
+int
+glusterd_volinfo_get_boolean (glusterd_volinfo_t *volinfo, char *key);
+
+int
+glusterd_validate_globalopts (glusterd_volinfo_t *volinfo, dict_t *val_dict,
+ char **op_errstr);
+
+int
+glusterd_validate_localopts (dict_t *val_dict, char **op_errstr);
+
+gf_boolean_t
+glusterd_check_globaloption (char *key);
+
+gf_boolean_t
+glusterd_check_voloption_flags (char *key, int32_t flags);
+
+gf_boolean_t
+glusterd_is_valid_volfpath (char *volname, char *brick);
+
+int
+generate_brick_volfiles (glusterd_volinfo_t *volinfo);
+
+int
+generate_snap_brick_volfiles (glusterd_volinfo_t *volinfo,
+ glusterd_volinfo_t *snap_volinfo);
+int
+generate_client_volfiles (glusterd_volinfo_t *volinfo,
+ glusterd_client_type_t client_type);
+int
+generate_snap_client_volfiles (glusterd_volinfo_t *actual_volinfo,
+ glusterd_volinfo_t *snap_volinfo,
+ glusterd_client_type_t client_type,
+ gf_boolean_t vol_restore);
+
+int
+_get_xlator_opt_key_from_vme ( struct volopt_map_entry *vme, char **key);
+
+void
+_free_xlator_opt_key (char *key);
+
+
+#if (HAVE_LIB_XML)
+int
+init_sethelp_xml_doc (xmlTextWriterPtr *writer, xmlBufferPtr *buf);
+
+int
+xml_add_volset_element (xmlTextWriterPtr writer, const char *name,
+ const char *def_val, const char *dscrpt);
+int
+end_sethelp_xml_doc (xmlTextWriterPtr writer);
+#endif /* HAVE_LIB_XML */
+
+char*
+glusterd_get_trans_type_rb (gf_transport_type ttype);
+
+uint32_t
+glusterd_get_op_version_for_key (char *key);
+
+gf_boolean_t
+gd_is_client_option (char *key);
+
+gf_boolean_t
+gd_is_xlator_option (char *key);
+
+gf_boolean_t
+gd_is_boolean_option (char *key);
+
+
+char*
+volgen_get_shd_key (int type);
+
+int
+glusterd_volopt_validate (glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
+ char *value, char **op_errstr);
+gf_boolean_t
+gd_is_self_heal_enabled (glusterd_volinfo_t *volinfo, dict_t *dict);
+
+int
+generate_dummy_client_volfiles (glusterd_volinfo_t *volinfo);
+
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
new file mode 100644
index 00000000000..72e14b0429d
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
@@ -0,0 +1,3200 @@
+/*
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifdef HAVE_BD_XLATOR
+#include <lvm2app.h>
+#endif
+
+#include "common-utils.h"
+#include "syscall.h"
+#include "cli1-xdr.h"
+#include "xdr-generic.h"
+#include "glusterd.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-geo-rep.h"
+#include "glusterd-store.h"
+#include "glusterd-utils.h"
+#include "glusterd-volgen.h"
+#include "glusterd-messages.h"
+#include "run.h"
+#include "glusterd-snapshot-utils.h"
+#include "glusterd-svc-mgmt.h"
+#include "glusterd-svc-helper.h"
+#include "glusterd-shd-svc.h"
+#include "glusterd-snapd-svc.h"
+#include "glusterd-mgmt.h"
+#include "glusterd-server-quorum.h"
+
+#include <stdint.h>
+#include <sys/socket.h>
+#include <netdb.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <stdlib.h>
+
+#define glusterd_op_start_volume_args_get(dict, volname, flags) \
+ glusterd_op_stop_volume_args_get (dict, volname, flags)
+
+gf_ai_compare_t
+glusterd_compare_addrinfo (struct addrinfo *first, struct addrinfo *next)
+{
+ int ret = -1;
+ struct addrinfo *tmp1 = NULL;
+ struct addrinfo *tmp2 = NULL;
+ char firstip[NI_MAXHOST] = {0.};
+ char nextip[NI_MAXHOST] = {0,};
+
+ for (tmp1 = first; tmp1 != NULL; tmp1 = tmp1->ai_next) {
+ ret = getnameinfo (tmp1->ai_addr, tmp1->ai_addrlen, firstip,
+ NI_MAXHOST, NULL, 0, NI_NUMERICHOST);
+ if (ret)
+ return GF_AI_COMPARE_ERROR;
+ for (tmp2 = next; tmp2 != NULL; tmp2 = tmp2->ai_next) {
+ ret = getnameinfo (tmp2->ai_addr, tmp2->ai_addrlen,
+ nextip, NI_MAXHOST, NULL, 0,
+ NI_NUMERICHOST);
+ if (ret)
+ return GF_AI_COMPARE_ERROR;
+ if (!strcmp (firstip, nextip)) {
+ return GF_AI_COMPARE_MATCH;
+ }
+ }
+ }
+ return GF_AI_COMPARE_NO_MATCH;
+}
+
+/* Check for non optimal brick order for replicate :
+ * Checks if bricks belonging to a replicate volume
+ * are present on the same server
+ */
+int32_t
+glusterd_check_brick_order(dict_t *dict, char *err_str)
+{
+ int ret = -1;
+ int i = 0;
+ int j = 0;
+ int k = 0;
+ xlator_t *this = NULL;
+ addrinfo_list_t *ai_list = NULL;
+ addrinfo_list_t *ai_list_tmp1 = NULL;
+ addrinfo_list_t *ai_list_tmp2 = NULL;
+ char *brick = NULL;
+ char *brick_list = NULL;
+ char *brick_list_dup = NULL;
+ char *brick_list_ptr = NULL;
+ char *tmpptr = NULL;
+ char *volname = NULL;
+ int32_t brick_count = 0;
+ int32_t type = GF_CLUSTER_TYPE_NONE;
+ int32_t sub_count = 0;
+ struct addrinfo *ai_info = NULL;
+
+ const char failed_string[2048] = "Failed to perform brick order "
+ "check. Use 'force' at the end of the command"
+ " if you want to override this behavior. ";
+ const char found_string[2048] = "Multiple bricks of a %s "
+ "volume are present on the same server. This "
+ "setup is not optimal. Use 'force' at the "
+ "end of the command if you want to override "
+ "this behavior. ";
+
+ this = THIS;
+
+ GF_ASSERT(this);
+
+ ai_list = malloc (sizeof (addrinfo_list_t));
+ ai_list->info = NULL;
+ CDS_INIT_LIST_HEAD (&ai_list->list);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to get volume name");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "type", &type);
+ if (ret) {
+ snprintf (err_str, 512, "Unable to get type of volume %s",
+ volname);
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "%s", err_str);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "bricks", &brick_list);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Bricks check : Could not "
+ "retrieve bricks list");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "count", &brick_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Bricks check : Could not "
+ "retrieve brick count");
+ goto out;
+ }
+
+ if (type != GF_CLUSTER_TYPE_DISPERSE) {
+ ret = dict_get_int32 (dict, "replica-count", &sub_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Bricks check : Could"
+ " not retrieve replica count");
+ goto out;
+ }
+ gf_msg_debug (this->name, 0, "Replicate cluster type "
+ "found. Checking brick order.");
+ } else {
+ ret = dict_get_int32 (dict, "disperse-count", &sub_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Bricks check : Could"
+ " not retrieve disperse count");
+ goto out;
+ }
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_DISPERSE_CLUSTER_FOUND, "Disperse cluster type"
+ " found. Checking brick order.");
+ }
+
+ brick_list_dup = brick_list_ptr = gf_strdup(brick_list);
+ /* Resolve hostnames and get addrinfo */
+ while (i < brick_count) {
+ ++i;
+ brick = strtok_r (brick_list_dup, " \n", &tmpptr);
+ brick_list_dup = tmpptr;
+ if (brick == NULL)
+ goto check_failed;
+ brick = strtok_r (brick, ":", &tmpptr);
+ if (brick == NULL)
+ goto check_failed;
+ ret = getaddrinfo (brick, NULL, NULL, &ai_info);
+ if (ret != 0) {
+ ret = 0;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_HOSTNAME_RESOLVE_FAIL,
+ "unable to resolve "
+ "host name");
+ goto out;
+ }
+ ai_list_tmp1 = malloc (sizeof (addrinfo_list_t));
+ if (ai_list_tmp1 == NULL) {
+ ret = 0;
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY, "failed to allocate "
+ "memory");
+ goto out;
+ }
+ ai_list_tmp1->info = ai_info;
+ cds_list_add_tail (&ai_list_tmp1->list, &ai_list->list);
+ ai_list_tmp1 = NULL;
+ }
+
+ i = 0;
+ ai_list_tmp1 = cds_list_entry (ai_list->list.next,
+ addrinfo_list_t, list);
+
+ /* Check for bad brick order */
+ while (i < brick_count) {
+ ++i;
+ ai_info = ai_list_tmp1->info;
+ ai_list_tmp1 = cds_list_entry (ai_list_tmp1->list.next,
+ addrinfo_list_t, list);
+ if (0 == i % sub_count) {
+ j = 0;
+ continue;
+ }
+ ai_list_tmp2 = ai_list_tmp1;
+ k = j;
+ while (k < sub_count - 1) {
+ ++k;
+ ret = glusterd_compare_addrinfo (ai_info,
+ ai_list_tmp2->info);
+ if (GF_AI_COMPARE_ERROR == ret)
+ goto check_failed;
+ if (GF_AI_COMPARE_MATCH == ret)
+ goto found_bad_brick_order;
+ ai_list_tmp2 = cds_list_entry (ai_list_tmp2->list.next,
+ addrinfo_list_t, list);
+ }
+ ++j;
+ }
+ gf_msg_debug (this->name, 0, "Brick order okay");
+ ret = 0;
+ goto out;
+
+check_failed:
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BAD_BRKORDER_CHECK_FAIL, "Failed bad brick order check");
+ snprintf (err_str, sizeof (failed_string), failed_string);
+ ret = -1;
+ goto out;
+
+found_bad_brick_order:
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_BAD_BRKORDER, "Bad brick order found");
+ if (type == GF_CLUSTER_TYPE_DISPERSE) {
+ snprintf (err_str, sizeof (found_string), found_string, "disperse");
+ } else {
+ snprintf (err_str, sizeof (found_string), found_string, "replicate");
+ }
+
+ ret = -1;
+out:
+ ai_list_tmp2 = NULL;
+ GF_FREE (brick_list_ptr);
+ cds_list_for_each_entry (ai_list_tmp1, &ai_list->list, list) {
+ if (ai_list_tmp1->info)
+ freeaddrinfo (ai_list_tmp1->info);
+ free (ai_list_tmp2);
+ ai_list_tmp2 = ai_list_tmp1;
+ }
+ free (ai_list_tmp2);
+ return ret;
+}
+
+int
+__glusterd_handle_create_volume (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,}};
+ dict_t *dict = NULL;
+ char *bricks = NULL;
+ char *volname = NULL;
+ int brick_count = 0;
+ void *cli_rsp = NULL;
+ char err_str[2048] = {0,};
+ gf_cli_rsp rsp = {0,};
+ xlator_t *this = NULL;
+ char *free_ptr = NULL;
+ char *trans_type = NULL;
+ char *address_family_str = NULL;
+ uuid_t volume_id = {0,};
+ uuid_t tmp_uuid = {0};
+ int32_t type = 0;
+ char *username = NULL;
+ char *password = NULL;
+
+ GF_ASSERT (req);
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = -1;
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ snprintf (err_str, sizeof (err_str), "Failed to decode request "
+ "received from cli");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL, "%s", err_str);
+ goto out;
+ }
+
+ gf_msg_debug (this->name, 0, "Received create volume req");
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (err_str, sizeof (err_str), "Unable to decode "
+ "the command");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
+ }
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get volume "
+ "name");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "%s", err_str);
+ goto out;
+ }
+
+ if ((ret = glusterd_check_volume_exists (volname))) {
+ snprintf (err_str, sizeof (err_str), "Volume %s already exists",
+ volname);
+ gf_msg (this->name, GF_LOG_ERROR, EEXIST,
+ GD_MSG_VOL_ALREADY_EXIST, "%s", err_str);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "count", &brick_count);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get brick count"
+ " for volume %s", volname);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "%s", err_str);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "type", &type);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get type of "
+ "volume %s", volname);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "%s", err_str);
+ goto out;
+ }
+
+
+
+ ret = dict_get_str (dict, "transport", &trans_type);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get "
+ "transport-type of volume %s", volname);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "%s", err_str);
+ goto out;
+ }
+
+ ret = dict_get_str (this->options, "transport.address-family",
+ &address_family_str);
+
+ if (!ret) {
+ ret = dict_set_dynstr_with_alloc (dict,
+ "transport.address-family",
+ address_family_str);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set transport.address-family");
+ goto out;
+ }
+ } else if (!strcmp(trans_type, "tcp")) {
+ /* Setting default as inet for trans_type tcp */
+ ret = dict_set_dynstr_with_alloc (dict,
+ "transport.address-family",
+ "inet");
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set transport.address-family");
+ goto out;
+ }
+ }
+ ret = dict_get_str (dict, "bricks", &bricks);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get bricks for "
+ "volume %s", volname);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "%s", err_str);
+ goto out;
+ }
+
+ if (!dict_get (dict, "force")) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to get 'force' flag");
+ goto out;
+ }
+
+ gf_uuid_generate (volume_id);
+ free_ptr = gf_strdup (uuid_utoa (volume_id));
+ ret = dict_set_dynstr (dict, "volume-id", free_ptr);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to set volume "
+ "id of volume %s", volname);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "%s", err_str);
+ goto out;
+ }
+ free_ptr = NULL;
+
+ /* generate internal username and password */
+
+ gf_uuid_generate (tmp_uuid);
+ username = gf_strdup (uuid_utoa (tmp_uuid));
+ ret = dict_set_dynstr (dict, "internal-username", username);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set username for "
+ "volume %s", volname);
+ goto out;
+ }
+
+ gf_uuid_generate (tmp_uuid);
+ password = gf_strdup (uuid_utoa (tmp_uuid));
+ ret = dict_set_dynstr (dict, "internal-password", password);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set password for "
+ "volume %s", volname);
+ goto out;
+ }
+
+ ret = glusterd_op_begin_synctask (req, GD_OP_CREATE_VOLUME, dict);
+
+out:
+ if (ret) {
+ rsp.op_ret = -1;
+ rsp.op_errno = 0;
+ if (err_str[0] == '\0')
+ snprintf (err_str, sizeof (err_str),
+ "Operation failed");
+ rsp.op_errstr = err_str;
+ cli_rsp = &rsp;
+ glusterd_to_cli (req, cli_rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_cli_rsp, dict);
+ ret = 0; //Client response sent, prevent second response
+ }
+
+ GF_FREE(free_ptr);
+
+ return ret;
+}
+
+int
+glusterd_handle_create_volume (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_create_volume);
+}
+
+int
+__glusterd_handle_cli_start_volume (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,}};
+ char *volname = NULL;
+ dict_t *dict = NULL;
+ glusterd_op_t cli_op = GD_OP_START_VOLUME;
+ char errstr[2048] = {0,};
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ conf = this->private;
+ GF_ASSERT (conf);
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ snprintf (errstr, sizeof (errstr), "Failed to decode message "
+ "received from cli");
+ req->rpc_err = GARBAGE_ARGS;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL, "%s", errstr);
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (errstr, sizeof (errstr), "Unable to decode "
+ "the command");
+ goto out;
+ }
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ snprintf (errstr, sizeof (errstr), "Unable to get volume name");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "%s", errstr);
+ goto out;
+ }
+
+ gf_msg_debug (this->name, 0, "Received start vol req"
+ " for volume %s", volname);
+
+ if (conf->op_version <= GD_OP_VERSION_3_7_6) {
+ gf_msg_debug (this->name, 0, "The cluster is operating at "
+ "version less than or equal to %d. Volume start "
+ "falling back to syncop framework.",
+ GD_OP_VERSION_3_7_6);
+ ret = glusterd_op_begin_synctask (req, GD_OP_START_VOLUME,
+ dict);
+ } else {
+ ret = glusterd_mgmt_v3_initiate_all_phases (req,
+ GD_OP_START_VOLUME,
+ dict);
+ }
+out:
+ free (cli_req.dict.dict_val); //its malloced by xdr
+
+ if (ret) {
+ if(errstr[0] == '\0')
+ snprintf (errstr, sizeof (errstr), "Operation failed");
+ ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
+ dict, errstr);
+ }
+
+ return ret;
+}
+
+int
+glusterd_handle_cli_start_volume (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_cli_start_volume);
+}
+
+int
+__glusterd_handle_cli_stop_volume (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,}};
+ char *dup_volname = NULL;
+ dict_t *dict = NULL;
+ glusterd_op_t cli_op = GD_OP_STOP_VOLUME;
+ xlator_t *this = NULL;
+ char err_str[2048] = {0,};
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ snprintf (err_str, sizeof (err_str), "Failed to decode message "
+ "received from cli");
+ req->rpc_err = GARBAGE_ARGS;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL, "%s", err_str);
+ goto out;
+ }
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (err_str, sizeof (err_str), "Unable to decode "
+ "the command");
+ goto out;
+ }
+ }
+
+ ret = dict_get_str (dict, "volname", &dup_volname);
+
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Failed to get volume "
+ "name");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "%s", err_str);
+ goto out;
+ }
+
+ gf_msg_debug (this->name, 0, "Received stop vol req "
+ "for volume %s", dup_volname);
+
+ ret = glusterd_op_begin_synctask (req, GD_OP_STOP_VOLUME, dict);
+
+out:
+ free (cli_req.dict.dict_val); //its malloced by xdr
+
+ if (ret) {
+ if (err_str[0] == '\0')
+ snprintf (err_str, sizeof (err_str),
+ "Operation failed");
+ ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
+ dict, err_str);
+ }
+
+ return ret;
+}
+
+int
+glusterd_handle_cli_stop_volume (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_cli_stop_volume);
+}
+
+int
+__glusterd_handle_cli_delete_volume (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,},};
+ glusterd_op_t cli_op = GD_OP_DELETE_VOLUME;
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ char err_str[2048]= {0,};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (req);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ snprintf (err_str, sizeof (err_str), "Failed to decode request "
+ "received from cli");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REQ_DECODE_FAIL, "%s", err_str);
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (err_str, sizeof (err_str), "Unable to decode "
+ "the command");
+ goto out;
+ }
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Failed to get volume "
+ "name");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "%s", err_str);
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ gf_msg_debug (this->name, 0, "Received delete vol req"
+ "for volume %s", volname);
+
+ ret = glusterd_op_begin_synctask (req, GD_OP_DELETE_VOLUME, dict);
+
+out:
+ free (cli_req.dict.dict_val); //its malloced by xdr
+
+ if (ret) {
+ if (err_str[0] == '\0')
+ snprintf (err_str, sizeof (err_str),
+ "Operation failed");
+ ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
+ dict, err_str);
+ }
+
+ return ret;
+}
+int
+glusterd_handle_cli_delete_volume (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_cli_delete_volume);
+}
+int
+glusterd_handle_shd_option_for_tier (glusterd_volinfo_t *volinfo,
+ char *value, dict_t *dict)
+{
+ int count = 0;
+ char dict_key[1024] = {0, };
+ char *key = NULL;
+ int ret = 0;
+
+ key = gd_get_shd_key (volinfo->tier_info.cold_type);
+ if (key) {
+ count++;
+ snprintf (dict_key, sizeof (dict_key), "key%d", count);
+ ret = dict_set_str (dict, dict_key, key);
+ if (ret)
+ goto out;
+ snprintf (dict_key, sizeof (dict_key), "value%d", count);
+ ret = dict_set_str (dict, dict_key, value);
+ if (ret)
+ goto out;
+ }
+
+ key = gd_get_shd_key (volinfo->tier_info.hot_type);
+ if (key) {
+ count++;
+ snprintf (dict_key, sizeof (dict_key), "key%d", count);
+ ret = dict_set_str (dict, dict_key, key);
+ if (ret)
+ goto out;
+ snprintf (dict_key, sizeof (dict_key), "value%d", count);
+ ret = dict_set_str (dict, dict_key, value);
+ if (ret)
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, "count", count);
+ if (ret)
+ goto out;
+
+out:
+ return ret;
+}
+static int
+glusterd_handle_heal_enable_disable (rpcsvc_request_t *req, dict_t *dict,
+ glusterd_volinfo_t *volinfo)
+{
+ gf_xl_afr_op_t heal_op = GF_SHD_OP_INVALID;
+ int ret = 0;
+ char *key = NULL;
+ char *value = NULL;
+
+ ret = dict_get_int32 (dict, "heal-op", (int32_t *)&heal_op);
+ if (ret || (heal_op == GF_SHD_OP_INVALID)) {
+ ret = -1;
+ goto out;
+ }
+
+ if ((heal_op != GF_SHD_OP_HEAL_ENABLE) &&
+ (heal_op != GF_SHD_OP_HEAL_DISABLE)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (heal_op == GF_SHD_OP_HEAL_ENABLE) {
+ value = "enable";
+ } else if (heal_op == GF_SHD_OP_HEAL_DISABLE) {
+ value = "disable";
+ }
+
+ /* Convert this command to volume-set command based on volume type */
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
+ ret = glusterd_handle_shd_option_for_tier (volinfo, value,
+ dict);
+ if (!ret)
+ goto set_volume;
+ goto out;
+ }
+
+ key = volgen_get_shd_key (volinfo->type);
+ if (!key) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "key1", key);
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (dict, "value1", value);
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32 (dict, "count", 1);
+ if (ret)
+ goto out;
+
+set_volume:
+ ret = glusterd_op_begin_synctask (req, GD_OP_SET_VOLUME, dict);
+
+out:
+ return ret;
+}
+
+int
+__glusterd_handle_cli_heal_volume (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,}};
+ dict_t *dict = NULL;
+ glusterd_op_t cli_op = GD_OP_HEAL_VOLUME;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+ char op_errstr[2048] = {0,};
+
+ GF_ASSERT (req);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (op_errstr, sizeof (op_errstr),
+ "Unable to decode the command");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
+ }
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ snprintf (op_errstr, sizeof (op_errstr), "Unable to find "
+ "volume name");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "%s", op_errstr);
+ goto out;
+ }
+
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_HEAL_VOL_REQ_RCVD, "Received heal vol req "
+ "for volume %s", volname);
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (op_errstr, sizeof (op_errstr),
+ "Volume %s does not exist", volname);
+ goto out;
+ }
+
+ ret = glusterd_handle_heal_enable_disable (req, dict, volinfo);
+ if (ret == -EINVAL) {
+ ret = 0;
+ } else {
+ /*
+ * If the return value is -ve but not -EINVAL then the command
+ * failed. If the return value is 0 then the synctask for the
+ * op has begun, so in both cases just 'goto out'. If there was
+ * a failure it will respond with an error, otherwise the
+ * synctask will take the responsibility of sending the
+ * response.
+ */
+ goto out;
+ }
+
+ ret = glusterd_add_bricks_hname_path_to_dict (dict, volinfo);
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32 (dict, "count", volinfo->brick_count);
+ if (ret)
+ goto out;
+
+ ret = glusterd_op_begin_synctask (req, GD_OP_HEAL_VOLUME, dict);
+
+out:
+ if (ret) {
+ if (op_errstr[0] == '\0')
+ snprintf (op_errstr, sizeof (op_errstr),
+ "operation failed");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GLUSTERD_OP_FAILED, "%s", op_errstr);
+ ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
+ dict, op_errstr);
+ }
+
+ return ret;
+}
+
+int
+glusterd_handle_cli_heal_volume (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_cli_heal_volume);
+}
+
+int
+__glusterd_handle_cli_statedump_volume (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,}};
+ char *volname = NULL;
+ char *options = NULL;
+ dict_t *dict = NULL;
+ int32_t option_cnt = 0;
+ glusterd_op_t cli_op = GD_OP_STATEDUMP_VOLUME;
+ char err_str[2048] = {0,};
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ GF_ASSERT (req);
+
+ ret = -1;
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (err_str, sizeof (err_str), "Unable to "
+ "decode the command");
+ goto out;
+ }
+ }
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get the volume "
+ "name");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "%s", err_str);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "options", &options);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get options");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "%s", err_str);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "option_cnt", &option_cnt);
+ if (ret) {
+ snprintf (err_str , sizeof (err_str), "Unable to get option "
+ "count");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "%s", err_str);
+ goto out;
+ }
+
+ if (priv->op_version == GD_OP_VERSION_MIN &&
+ strstr (options, "quotad")) {
+ snprintf (err_str, sizeof (err_str), "The cluster is operating "
+ "at op-version 1. Taking quotad's statedump is "
+ "disallowed in this state");
+ ret = -1;
+ goto out;
+ }
+
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_STATEDUMP_VOL_REQ_RCVD, "Received statedump request for "
+ "volume %s with options %s", volname, options);
+
+ ret = glusterd_op_begin_synctask (req, GD_OP_STATEDUMP_VOLUME, dict);
+
+out:
+ if (ret) {
+ if (err_str[0] == '\0')
+ snprintf (err_str, sizeof (err_str),
+ "Operation failed");
+ ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
+ dict, err_str);
+ }
+ free (cli_req.dict.dict_val);
+
+ return ret;
+}
+
+int
+glusterd_handle_cli_statedump_volume (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_cli_statedump_volume);
+}
+
+#ifdef HAVE_BD_XLATOR
+/*
+ * Validates if given VG in the brick exists or not. Also checks if VG has
+ * GF_XATTR_VOL_ID_KEY tag set to avoid using same VG for multiple bricks.
+ * Tag is checked only during glusterd_op_stage_create_volume. Tag is set during
+ * glusterd_validate_and_create_brickpath().
+ * @brick - brick info, @check_tag - check for VG tag or not
+ * @msg - Error message to return to caller
+ */
+int
+glusterd_is_valid_vg (glusterd_brickinfo_t *brick, int check_tag, char *msg)
+{
+ lvm_t handle = NULL;
+ vg_t vg = NULL;
+ char *vg_name = NULL;
+ int retval = 0;
+ char *p = NULL;
+ char *ptr = NULL;
+ struct dm_list *dm_lvlist = NULL;
+ struct dm_list *dm_seglist = NULL;
+ struct lvm_lv_list *lv_list = NULL;
+ struct lvm_property_value prop = {0, };
+ struct lvm_lvseg_list *seglist = NULL;
+ struct dm_list *taglist = NULL;
+ struct lvm_str_list *strl = NULL;
+
+ handle = lvm_init (NULL);
+ if (!handle) {
+ sprintf (msg, "lvm_init failed, could not validate vg");
+ return -1;
+ }
+ if (*brick->vg == '\0') { /* BD xlator has vg in brick->path */
+ p = gf_strdup (brick->path);
+ vg_name = strtok_r (p, "/", &ptr);
+ } else
+ vg_name = brick->vg;
+
+ vg = lvm_vg_open (handle, vg_name, "r", 0);
+ if (!vg) {
+ sprintf (msg, "no such vg: %s", vg_name);
+ retval = -1;
+ goto out;
+ }
+ if (!check_tag)
+ goto next;
+
+ taglist = lvm_vg_get_tags (vg);
+ if (!taglist)
+ goto next;
+
+ dm_list_iterate_items (strl, taglist) {
+ if (!strncmp(strl->str, GF_XATTR_VOL_ID_KEY,
+ strlen (GF_XATTR_VOL_ID_KEY))) {
+ sprintf (msg, "VG %s is already part of"
+ " a brick", vg_name);
+ retval = -1;
+ goto out;
+ }
+ }
+next:
+
+ brick->caps = CAPS_BD | CAPS_OFFLOAD_COPY | CAPS_OFFLOAD_SNAPSHOT;
+
+ dm_lvlist = lvm_vg_list_lvs (vg);
+ if (!dm_lvlist)
+ goto out;
+
+ dm_list_iterate_items (lv_list, dm_lvlist) {
+ dm_seglist = lvm_lv_list_lvsegs (lv_list->lv);
+ dm_list_iterate_items (seglist, dm_seglist) {
+ prop = lvm_lvseg_get_property (seglist->lvseg,
+ "segtype");
+ if (!prop.is_valid || !prop.value.string)
+ continue;
+ if (!strcmp (prop.value.string, "thin-pool")) {
+ brick->caps |= CAPS_THIN;
+ gf_msg (THIS->name, GF_LOG_INFO, 0,
+ GD_MSG_THINPOOLS_FOR_THINLVS,
+ "Thin Pool "
+ "\"%s\" will be used for thin LVs",
+ lvm_lv_get_name (lv_list->lv));
+ break;
+ }
+ }
+ }
+
+ retval = 0;
+out:
+ if (vg)
+ lvm_vg_close (vg);
+ lvm_quit (handle);
+ if (p)
+ GF_FREE (p);
+ return retval;
+}
+#endif
+
+/* op-sm */
+int
+glusterd_op_stage_create_volume (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int ret = 0;
+ char *volname = NULL;
+ gf_boolean_t exists = _gf_false;
+ char *bricks = NULL;
+ char *brick_list = NULL;
+ char *free_ptr = NULL;
+ char key[PATH_MAX] = "";
+ glusterd_brickinfo_t *brick_info = NULL;
+ int32_t brick_count = 0;
+ int32_t local_brick_count = 0;
+ int32_t i = 0;
+ int32_t type = 0;
+ char *brick = NULL;
+ char *tmpptr = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ char msg[2048] = {0};
+ uuid_t volume_uuid;
+ char *volume_uuid_str;
+ gf_boolean_t is_force = _gf_false;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (rsp_dict);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get volume name");
+ goto out;
+ }
+
+ exists = glusterd_check_volume_exists (volname);
+ if (exists) {
+ snprintf (msg, sizeof (msg), "Volume %s already exists",
+ volname);
+ ret = -1;
+ goto out;
+ } else {
+ ret = 0;
+ }
+
+ ret = dict_get_int32 (dict, "count", &brick_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get brick count "
+ "for volume %s", volname);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "volume-id", &volume_uuid_str);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get volume id of "
+ "volume %s", volname);
+ goto out;
+ }
+
+ ret = gf_uuid_parse (volume_uuid_str, volume_uuid);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_UUID_PARSE_FAIL,
+ "Unable to parse volume id of"
+ " volume %s", volname);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "bricks", &bricks);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to get bricks for "
+ "volume %s", volname);
+ goto out;
+ }
+
+ is_force = dict_get_str_boolean (dict, "force", _gf_false);
+
+ if (bricks) {
+ brick_list = gf_strdup (bricks);
+ if (!brick_list) {
+ ret = -1;
+ goto out;
+ } else {
+ free_ptr = brick_list;
+ }
+ }
+
+ while ( i < brick_count) {
+ i++;
+ brick= strtok_r (brick_list, " \n", &tmpptr);
+ brick_list = tmpptr;
+
+ if (!glusterd_store_is_valid_brickpath (volname, brick)) {
+ snprintf (msg, sizeof (msg), "brick path %s is too "
+ "long.", brick);
+ ret = -1;
+ goto out;
+ }
+
+ if (!glusterd_is_valid_volfpath (volname, brick)) {
+ snprintf (msg, sizeof (msg), "Volume file path for "
+ "volume %s and brick path %s is too long.",
+ volname, brick);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_brickinfo_new_from_brick (brick, &brick_info,
+ _gf_true, op_errstr);
+ if (ret)
+ goto out;
+
+ ret = glusterd_new_brick_validate (brick, brick_info, msg,
+ sizeof (msg));
+ if (ret)
+ goto out;
+
+ ret = glusterd_resolve_brick (brick_info);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RESOLVE_BRICK_FAIL,
+ FMTSTR_RESOLVE_BRICK,
+ brick_info->hostname, brick_info->path);
+ goto out;
+ }
+
+ if (!gf_uuid_compare (brick_info->uuid, MY_UUID)) {
+#ifdef HAVE_BD_XLATOR
+ if (brick_info->vg[0]) {
+ ret = glusterd_is_valid_vg (brick_info, 1, msg);
+ if (ret)
+ goto out;
+ }
+#endif
+ ret = glusterd_validate_and_create_brickpath (brick_info,
+ volume_uuid, op_errstr,
+ is_force);
+ if (ret)
+ goto out;
+
+ /* A bricks mount dir is required only by snapshots which were
+ * introduced in gluster-3.6.0
+ */
+ if (priv->op_version >= GD_OP_VERSION_3_6_0) {
+ ret = glusterd_get_brick_mount_dir
+ (brick_info->path, brick_info->hostname,
+ brick_info->mount_dir);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_MOUNTDIR_GET_FAIL,
+ "Failed to get brick mount_dir");
+ goto out;
+ }
+
+ snprintf (key, sizeof(key), "brick%d.mount_dir",
+ i);
+ ret = dict_set_dynstr_with_alloc
+ (rsp_dict, key, brick_info->mount_dir);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+ }
+ local_brick_count = i;
+
+ brick_list = tmpptr;
+ }
+ glusterd_brickinfo_delete (brick_info);
+ brick_info = NULL;
+ }
+
+ /*Check brick order if the volume type is replicate or disperse. If
+ * force at the end of command not given then check brick order.
+ */
+ if (is_origin_glusterd (dict)) {
+ ret = dict_get_int32 (dict, "type", &type);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Unable to get type of "
+ "volume %s", volname);
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_DICT_GET_FAILED, "%s", msg);
+ goto out;
+ }
+
+ if (!is_force) {
+ if ((type == GF_CLUSTER_TYPE_REPLICATE) ||
+ (type == GF_CLUSTER_TYPE_STRIPE_REPLICATE) ||
+ (type == GF_CLUSTER_TYPE_DISPERSE)) {
+ ret = glusterd_check_brick_order(dict, msg);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BAD_BRKORDER, "Not "
+ "creating volume because of bad "
+ "brick order");
+ goto out;
+ }
+ }
+ }
+ }
+
+ ret = dict_set_int32 (rsp_dict, "brick_count", local_brick_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set local_brick_count");
+ goto out;
+ }
+out:
+ GF_FREE (free_ptr);
+ if (brick_info)
+ glusterd_brickinfo_delete (brick_info);
+
+ if (msg[0] != '\0') {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_OP_STAGE_CREATE_VOL_FAIL, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ }
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+int
+glusterd_op_stop_volume_args_get (dict_t *dict, char** volname, int *flags)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ if (!dict || !volname || !flags)
+ goto out;
+
+ ret = dict_get_str (dict, "volname", volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get volume name");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "flags", flags);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get flags");
+ goto out;
+ }
+out:
+ return ret;
+}
+
+int
+glusterd_op_statedump_volume_args_get (dict_t *dict, char **volname,
+ char **options, int *option_cnt)
+{
+ int ret = -1;
+
+ if (!dict || !volname || !options || !option_cnt)
+ goto out;
+
+ ret = dict_get_str (dict, "volname", volname);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get volname");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "options", options);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get options");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "option_cnt", option_cnt);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get option count");
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+int
+glusterd_op_stage_start_volume (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int ret = 0;
+ char *volname = NULL;
+ char key[PATH_MAX] = "";
+ int flags = 0;
+ int32_t brick_count = 0;
+ int32_t local_brick_count = 0;
+ gf_boolean_t exists = _gf_false;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ char msg[2048] = {0,};
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ uuid_t volume_id = {0,};
+ char volid[50] = {0,};
+ char xattr_volid[50] = {0,};
+ int caps = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (rsp_dict);
+
+ ret = glusterd_op_start_volume_args_get (dict, &volname, &flags);
+ if (ret)
+ goto out;
+
+ exists = glusterd_check_volume_exists (volname);
+
+ if (!exists) {
+ snprintf (msg, sizeof (msg), FMTSTR_CHECK_VOL_EXISTS, volname);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLINFO_GET_FAIL, FMTSTR_CHECK_VOL_EXISTS,
+ volname);
+ goto out;
+ }
+
+ if (priv->op_version > GD_OP_VERSION_3_7_5) {
+ ret = glusterd_validate_quorum (this, GD_OP_START_VOLUME, dict,
+ op_errstr);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_SERVER_QUORUM_NOT_MET,
+ "Server quorum not met. Rejecting operation.");
+ goto out;
+ }
+ }
+
+ /* This is an incremental approach to have all the volinfo objects ref
+ * count. The first attempt is made in volume start transaction to
+ * ensure it doesn't race with import volume where stale volume is
+ * deleted. There are multiple instances of GlusterD crashing in
+ * bug-948686.t because of this. Once this approach is full proof, all
+ * other volinfo objects will be refcounted.
+ */
+ glusterd_volinfo_ref (volinfo);
+
+ ret = glusterd_validate_volume_id (dict, volinfo);
+ if (ret)
+ goto out;
+
+ if (!(flags & GF_CLI_FLAG_OP_FORCE)) {
+ if (glusterd_is_volume_started (volinfo)) {
+ snprintf (msg, sizeof (msg), "Volume %s already "
+ "started", volname);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ brick_count++;
+ ret = glusterd_resolve_brick (brickinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RESOLVE_BRICK_FAIL, FMTSTR_RESOLVE_BRICK,
+ brickinfo->hostname, brickinfo->path);
+ goto out;
+ }
+
+ if ((gf_uuid_compare (brickinfo->uuid, MY_UUID)) ||
+ (brickinfo->snap_status == -1))
+ continue;
+
+ ret = gf_lstat_dir (brickinfo->path, NULL);
+ if (ret && (flags & GF_CLI_FLAG_OP_FORCE)) {
+ continue;
+ } else if (ret) {
+ snprintf (msg, sizeof (msg), "Failed to find "
+ "brick directory %s for volume %s. "
+ "Reason : %s", brickinfo->path,
+ volname, strerror (errno));
+ goto out;
+ }
+ ret = sys_lgetxattr (brickinfo->path, GF_XATTR_VOL_ID_KEY,
+ volume_id, 16);
+ if (ret < 0 && (!(flags & GF_CLI_FLAG_OP_FORCE))) {
+ snprintf (msg, sizeof (msg), "Failed to get "
+ "extended attribute %s for brick dir %s. "
+ "Reason : %s", GF_XATTR_VOL_ID_KEY,
+ brickinfo->path, strerror (errno));
+ ret = -1;
+ goto out;
+ } else if (ret < 0) {
+ ret = sys_lsetxattr (brickinfo->path,
+ GF_XATTR_VOL_ID_KEY,
+ volinfo->volume_id, 16,
+ XATTR_CREATE);
+ if (ret == -1) {
+ snprintf (msg, sizeof (msg), "Failed to set "
+ "extended attribute %s on %s. Reason: "
+ "%s", GF_XATTR_VOL_ID_KEY,
+ brickinfo->path, strerror (errno));
+ goto out;
+ } else {
+ continue;
+ }
+ }
+ if (gf_uuid_compare (volinfo->volume_id, volume_id)) {
+ snprintf (msg, sizeof (msg), "Volume id mismatch for "
+ "brick %s:%s. Expected volume id %s, "
+ "volume id %s found", brickinfo->hostname,
+ brickinfo->path,
+ uuid_utoa_r (volinfo->volume_id, volid),
+ uuid_utoa_r (volume_id, xattr_volid));
+ ret = -1;
+ goto out;
+ }
+
+ /* A bricks mount dir is required only by snapshots which were
+ * introduced in gluster-3.6.0
+ */
+ if (priv->op_version >= GD_OP_VERSION_3_6_0) {
+ if (strlen(brickinfo->mount_dir) < 1) {
+ ret = glusterd_get_brick_mount_dir
+ (brickinfo->path, brickinfo->hostname,
+ brickinfo->mount_dir);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_MOUNTDIR_GET_FAIL,
+ "Failed to get brick mount_dir");
+ goto out;
+ }
+
+ snprintf (key, sizeof(key), "brick%d.mount_dir",
+ brick_count);
+ ret = dict_set_dynstr_with_alloc
+ (rsp_dict, key, brickinfo->mount_dir);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+ local_brick_count = brick_count;
+ }
+ }
+
+#ifdef HAVE_BD_XLATOR
+ if (brickinfo->vg[0])
+ caps = CAPS_BD | CAPS_THIN |
+ CAPS_OFFLOAD_COPY | CAPS_OFFLOAD_SNAPSHOT;
+ /* Check for VG/thin pool if its BD volume */
+ if (brickinfo->vg[0]) {
+ ret = glusterd_is_valid_vg (brickinfo, 0, msg);
+ if (ret)
+ goto out;
+ /* if anyone of the brick does not have thin support,
+ disable it for entire volume */
+ caps &= brickinfo->caps;
+ } else
+ caps = 0;
+#endif
+ }
+
+ ret = dict_set_int32 (rsp_dict, "brick_count", local_brick_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set local_brick_count");
+ goto out;
+ }
+
+ volinfo->caps = caps;
+ ret = 0;
+out:
+ if (volinfo)
+ glusterd_volinfo_unref (volinfo);
+
+ if (ret && (msg[0] != '\0')) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_OP_STAGE_START_VOL_FAIL, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ }
+ return ret;
+}
+
+int
+glusterd_op_stage_stop_volume (dict_t *dict, char **op_errstr)
+{
+ int ret = -1;
+ char *volname = NULL;
+ int flags = 0;
+ gf_boolean_t exists = _gf_false;
+ glusterd_volinfo_t *volinfo = NULL;
+ char msg[2048] = {0};
+ xlator_t *this = NULL;
+ gsync_status_param_t param = {0,};
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = glusterd_op_stop_volume_args_get (dict, &volname, &flags);
+ if (ret)
+ goto out;
+
+ exists = glusterd_check_volume_exists (volname);
+
+ if (!exists) {
+ snprintf (msg, sizeof (msg), FMTSTR_CHECK_VOL_EXISTS, volname);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_FOUND, "%s", msg);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (msg, sizeof (msg), FMTSTR_CHECK_VOL_EXISTS, volname);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLINFO_GET_FAIL, "%s", msg);
+ goto out;
+ }
+
+ ret = glusterd_validate_volume_id (dict, volinfo);
+ if (ret)
+ goto out;
+
+ /* If 'force' flag is given, no check is required */
+ if (flags & GF_CLI_FLAG_OP_FORCE)
+ goto out;
+
+ if (_gf_false == glusterd_is_volume_started (volinfo)) {
+ snprintf (msg, sizeof(msg), "Volume %s "
+ "is not in the started state", volname);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_STARTED, "%s", msg);
+ ret = -1;
+ goto out;
+ }
+
+ /* If geo-rep is configured, for this volume, it should be stopped. */
+ param.volinfo = volinfo;
+ ret = glusterd_check_geo_rep_running (&param, op_errstr);
+ if (ret || param.is_active) {
+ ret = -1;
+ goto out;
+ }
+ ret = glusterd_check_ganesha_export (volinfo);
+ if (ret) {
+ ret = ganesha_manage_export (volname, "off", op_errstr, _gf_false);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_WARNING, 0,
+ GD_MSG_NFS_GNS_UNEXPRT_VOL_FAIL, "Could not "
+ "unexport volume via NFS-Ganesha");
+ ret = 0;
+ }
+ }
+
+ if (glusterd_is_defrag_on (volinfo)) {
+ snprintf (msg, sizeof(msg), "rebalance session is "
+ "in progress for the volume '%s'", volname);
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_OIP, "%s", msg);
+ ret = -1;
+ goto out;
+ }
+
+out:
+ if (msg[0] != 0)
+ *op_errstr = gf_strdup (msg);
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+int
+glusterd_op_stage_delete_volume (dict_t *dict, char **op_errstr)
+{
+ int ret = 0;
+ char *volname = NULL;
+ gf_boolean_t exists = _gf_false;
+ glusterd_volinfo_t *volinfo = NULL;
+ char msg[2048] = {0};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get volume name");
+ goto out;
+ }
+
+ exists = glusterd_check_volume_exists (volname);
+ if (!exists) {
+ snprintf (msg, sizeof (msg), FMTSTR_CHECK_VOL_EXISTS, volname);
+ ret = -1;
+ goto out;
+ } else {
+ ret = 0;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (msg, sizeof (msg), FMTSTR_CHECK_VOL_EXISTS, volname);
+ goto out;
+ }
+
+ ret = glusterd_validate_volume_id (dict, volinfo);
+ if (ret)
+ goto out;
+
+ if (glusterd_is_volume_started (volinfo)) {
+ snprintf (msg, sizeof (msg), "Volume %s has been started."
+ "Volume needs to be stopped before deletion.",
+ volname);
+ ret = -1;
+ goto out;
+ }
+
+ if (volinfo->snap_count > 0 ||
+ !cds_list_empty (&volinfo->snap_volumes)) {
+ snprintf (msg, sizeof (msg), "Cannot delete Volume %s ,"
+ "as it has %"PRIu64" snapshots. "
+ "To delete the volume, "
+ "first delete all the snapshots under it.",
+ volname, volinfo->snap_count);
+ ret = -1;
+ goto out;
+ }
+
+ if (!glusterd_are_all_peers_up ()) {
+ ret = -1;
+ snprintf (msg, sizeof(msg), "Some of the peers are down");
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (msg[0] != '\0') {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_OP_STAGE_DELETE_VOL_FAIL, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ }
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_handle_heal_cmd (xlator_t *this, glusterd_volinfo_t *volinfo,
+ dict_t *dict, char **op_errstr)
+{
+ glusterd_conf_t *priv = NULL;
+ gf_xl_afr_op_t heal_op = GF_SHD_OP_INVALID;
+ int ret = 0;
+ char msg[2408] = {0,};
+ char *offline_msg = "Self-heal daemon is not running. "
+ "Check self-heal daemon log file.";
+
+ priv = this->private;
+ ret = dict_get_int32 (dict, "heal-op", (int32_t*)&heal_op);
+ if (ret) {
+ ret = -1;
+ *op_errstr = gf_strdup("Heal operation not specified");
+ goto out;
+ }
+
+ switch (heal_op) {
+ case GF_SHD_OP_INVALID:
+ case GF_SHD_OP_HEAL_ENABLE: /* This op should be handled in volume-set*/
+ case GF_SHD_OP_HEAL_DISABLE:/* This op should be handled in volume-set*/
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE:/*glfsheal cmd*/
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME:/*glfsheal cmd*/
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK:/*glfsheal cmd*/
+ ret = -1;
+ *op_errstr = gf_strdup("Invalid heal-op");
+ goto out;
+
+ case GF_SHD_OP_HEAL_INDEX:
+ case GF_SHD_OP_HEAL_FULL:
+ if (!glusterd_is_shd_compatible_volume (volinfo)) {
+ ret = -1;
+ snprintf (msg, sizeof (msg), "Volume %s is not of type "
+ "replicate or disperse", volinfo->volname);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ if (!priv->shd_svc.online) {
+ ret = -1;
+ *op_errstr = gf_strdup (offline_msg);
+ goto out;
+ }
+ break;
+ case GF_SHD_OP_INDEX_SUMMARY:
+ case GF_SHD_OP_SPLIT_BRAIN_FILES:
+ case GF_SHD_OP_STATISTICS:
+ case GF_SHD_OP_STATISTICS_HEAL_COUNT:
+ case GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA:
+ if (!glusterd_is_volume_replicate (volinfo)) {
+ ret = -1;
+ snprintf (msg, sizeof (msg), "Volume %s is not of type "
+ "replicate", volinfo->volname);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ if (!priv->shd_svc.online) {
+ ret = -1;
+ *op_errstr = gf_strdup (offline_msg);
+ goto out;
+ }
+ break;
+ case GF_SHD_OP_HEALED_FILES:
+ case GF_SHD_OP_HEAL_FAILED_FILES:
+ ret = -1;
+ snprintf (msg, sizeof (msg), "Command not supported. "
+ "Please use \"gluster volume heal %s info\" "
+ "and logs to find the heal information.",
+ volinfo->volname);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+
+ }
+out:
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_HANDLE_HEAL_CMD_FAIL, "%s", *op_errstr);
+ return ret;
+}
+
+int
+glusterd_op_stage_heal_volume (dict_t *dict, char **op_errstr)
+{
+ int ret = 0;
+ char *volname = NULL;
+ gf_boolean_t enabled = _gf_false;
+ glusterd_volinfo_t *volinfo = NULL;
+ char msg[2048];
+ glusterd_conf_t *priv = NULL;
+ dict_t *opt_dict = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ if (!priv) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_PRIV_NULL,
+ "priv is NULL");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ ret = -1;
+ snprintf (msg, sizeof (msg), "Volume %s does not exist", volname);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_FOUND, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ ret = glusterd_validate_volume_id (dict, volinfo);
+ if (ret)
+ goto out;
+
+ if (!glusterd_is_volume_started (volinfo)) {
+ ret = -1;
+ snprintf (msg, sizeof (msg), "Volume %s is not started.",
+ volname);
+ gf_msg (THIS->name, GF_LOG_WARNING, 0,
+ GD_MSG_VOL_NOT_STARTED, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ opt_dict = volinfo->dict;
+ if (!opt_dict) {
+ ret = 0;
+ goto out;
+ }
+ enabled = gd_is_self_heal_enabled (volinfo, opt_dict);
+ if (!enabled) {
+ ret = -1;
+ snprintf (msg, sizeof (msg), "Self-heal-daemon is "
+ "disabled. Heal will not be triggered on volume %s",
+ volname);
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SELF_HEALD_DISABLED, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ ret = glusterd_handle_heal_cmd (this, volinfo, dict, op_errstr);
+ if (ret)
+ goto out;
+
+ ret = 0;
+out:
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+
+ return ret;
+}
+
+int
+glusterd_op_stage_statedump_volume (dict_t *dict, char **op_errstr)
+{
+ int ret = -1;
+ char *volname = NULL;
+ char *options = NULL;
+ int option_cnt = 0;
+ gf_boolean_t is_running = _gf_false;
+ glusterd_volinfo_t *volinfo = NULL;
+ char msg[2408] = {0,};
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = glusterd_op_statedump_volume_args_get (dict, &volname, &options,
+ &option_cnt);
+ if (ret)
+ goto out;
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (msg, sizeof(msg), FMTSTR_CHECK_VOL_EXISTS, volname);
+ goto out;
+ }
+
+ ret = glusterd_validate_volume_id (dict, volinfo);
+ if (ret)
+ goto out;
+
+ is_running = glusterd_is_volume_started (volinfo);
+ if (!is_running) {
+ snprintf (msg, sizeof(msg), "Volume %s is not in the started"
+ " state", volname);
+ ret = -1;
+ goto out;
+ }
+
+ if (priv->op_version == GD_OP_VERSION_MIN &&
+ strstr (options, "quotad")) {
+ snprintf (msg, sizeof (msg), "The cluster is operating "
+ "at op-version 1. Taking quotad's statedump is "
+ "disallowed in this state");
+ ret = -1;
+ goto out;
+ }
+ if ((strstr (options, "quotad")) &&
+ (!glusterd_is_volume_quota_enabled (volinfo))) {
+ snprintf (msg, sizeof (msg), "Quota is not enabled on "
+ "volume %s", volname);
+ ret = -1;
+ goto out;
+ }
+out:
+ if (ret && msg[0] != '\0')
+ *op_errstr = gf_strdup (msg);
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_op_stage_clearlocks_volume (dict_t *dict, char **op_errstr)
+{
+ int ret = -1;
+ char *volname = NULL;
+ char *path = NULL;
+ char *type = NULL;
+ char *kind = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char msg[2048] = {0,};
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ snprintf (msg, sizeof(msg), "Failed to get volume name");
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "path", &path);
+ if (ret) {
+ snprintf (msg, sizeof(msg), "Failed to get path");
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "kind", &kind);
+ if (ret) {
+ snprintf (msg, sizeof(msg), "Failed to get kind");
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "type", &type);
+ if (ret) {
+ snprintf (msg, sizeof(msg), "Failed to get type");
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (msg, sizeof(msg), "Volume %s does not exist",
+ volname);
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_FOUND, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ ret = glusterd_validate_volume_id (dict, volinfo);
+ if (ret)
+ goto out;
+
+ if (!glusterd_is_volume_started (volinfo)) {
+ snprintf (msg, sizeof(msg), "Volume %s is not started",
+ volname);
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_STARTED, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_op_create_volume (dict_t *dict, char **op_errstr)
+{
+ int ret = 0;
+ char *volname = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ gf_boolean_t vol_added = _gf_false;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ xlator_t *this = NULL;
+ char *brick = NULL;
+ int32_t count = 0;
+ int32_t i = 1;
+ char *bricks = NULL;
+ char *brick_list = NULL;
+ char *free_ptr = NULL;
+ char *saveptr = NULL;
+ char *trans_type = NULL;
+ char *str = NULL;
+ char *username = NULL;
+ char *password = NULL;
+ int caps = 0;
+ int brickid = 0;
+ char msg[1024] __attribute__((unused)) = {0, };
+ char *brick_mount_dir = NULL;
+ char key[PATH_MAX] = "";
+ char *address_family_str = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = glusterd_volinfo_new (&volinfo);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY,
+ "Unable to allocate memory for volinfo");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get volume name");
+ goto out;
+ }
+
+ strncpy (volinfo->volname, volname, strlen (volname));
+ GF_ASSERT (volinfo->volname);
+
+ ret = dict_get_int32 (dict, "type", &volinfo->type);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get type of volume"
+ " %s", volname);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "count", &volinfo->brick_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get brick count of"
+ " volume %s", volname);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "port", &volinfo->port);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get port");
+ goto out;
+ }
+
+ count = volinfo->brick_count;
+
+ ret = dict_get_str (dict, "bricks", &bricks);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get bricks for "
+ "volume %s", volname);
+ goto out;
+ }
+
+ /* replica-count 1 means, no replication, file is in one brick only */
+ volinfo->replica_count = 1;
+ /* stripe-count 1 means, no striping, file is present as a whole */
+ volinfo->stripe_count = 1;
+
+ if (GF_CLUSTER_TYPE_REPLICATE == volinfo->type) {
+ ret = dict_get_int32 (dict, "replica-count",
+ &volinfo->replica_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to get "
+ "replica count for volume %s", volname);
+ goto out;
+ }
+ ret = dict_get_int32 (dict, "arbiter-count",
+ &volinfo->arbiter_count);
+ } else if (GF_CLUSTER_TYPE_STRIPE == volinfo->type) {
+ ret = dict_get_int32 (dict, "stripe-count",
+ &volinfo->stripe_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to get stripe"
+ " count for volume %s", volname);
+ goto out;
+ }
+ } else if (GF_CLUSTER_TYPE_STRIPE_REPLICATE == volinfo->type) {
+ ret = dict_get_int32 (dict, "stripe-count",
+ &volinfo->stripe_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to get stripe"
+ " count for volume %s", volname);
+ goto out;
+ }
+ ret = dict_get_int32 (dict, "replica-count",
+ &volinfo->replica_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to get "
+ "replica count for volume %s", volname);
+ goto out;
+ }
+ ret = dict_get_int32 (dict, "arbiter-count",
+ &volinfo->arbiter_count);
+ } else if (GF_CLUSTER_TYPE_DISPERSE == volinfo->type) {
+ ret = dict_get_int32 (dict, "disperse-count",
+ &volinfo->disperse_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to get "
+ "disperse count for volume %s", volname);
+ goto out;
+ }
+ ret = dict_get_int32 (dict, "redundancy-count",
+ &volinfo->redundancy_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to get "
+ "redundancy count for volume %s", volname);
+ goto out;
+ }
+ if (priv->op_version < GD_OP_VERSION_3_6_0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_UNSUPPORTED_VERSION, "Disperse volume "
+ "needs op-version 3.6.0 or higher");
+ ret = -1;
+ goto out;
+ }
+ }
+
+ /* dist-leaf-count is the count of brick nodes for a given
+ subvolume of distribute */
+ volinfo->dist_leaf_count = glusterd_get_dist_leaf_count (volinfo);
+
+ /* subvol_count is the count of number of subvolumes present
+ for a given distribute volume */
+ volinfo->subvol_count = (volinfo->brick_count /
+ volinfo->dist_leaf_count);
+
+ /* Keep sub-count same as earlier, for the sake of backward
+ compatibility */
+ if (volinfo->dist_leaf_count > 1)
+ volinfo->sub_count = volinfo->dist_leaf_count;
+
+ ret = dict_get_str (dict, "transport", &trans_type);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to get transport type of volume %s", volname);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "volume-id", &str);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to get volume-id of volume %s", volname);
+ goto out;
+ }
+ ret = gf_uuid_parse (str, volinfo->volume_id);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_UUID_PARSE_FAIL,
+ "unable to parse uuid %s of volume %s", str, volname);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "internal-username", &username);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "unable to get internal username of volume %s",
+ volname);
+ goto out;
+ }
+ glusterd_auth_set_username (volinfo, username);
+
+ ret = dict_get_str (dict, "internal-password", &password);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "unable to get internal password of volume %s",
+ volname);
+ goto out;
+ }
+ glusterd_auth_set_password (volinfo, password);
+
+ if (strcasecmp (trans_type, "rdma") == 0) {
+ volinfo->transport_type = GF_TRANSPORT_RDMA;
+ } else if (strcasecmp (trans_type, "tcp") == 0) {
+ volinfo->transport_type = GF_TRANSPORT_TCP;
+ } else {
+ volinfo->transport_type = GF_TRANSPORT_BOTH_TCP_RDMA;
+ }
+
+ if (bricks) {
+ brick_list = gf_strdup (bricks);
+ free_ptr = brick_list;
+ }
+
+ if (count)
+ brick = strtok_r (brick_list+1, " \n", &saveptr);
+ caps = CAPS_BD | CAPS_THIN | CAPS_OFFLOAD_COPY | CAPS_OFFLOAD_SNAPSHOT;
+
+ brickid = glusterd_get_next_available_brickid (volinfo);
+ if (brickid < 0)
+ goto out;
+ while ( i <= count) {
+ ret = glusterd_brickinfo_new_from_brick (brick, &brickinfo,
+ _gf_true, op_errstr);
+ if (ret)
+ goto out;
+
+ GLUSTERD_ASSIGN_BRICKID_TO_BRICKINFO (brickinfo, volinfo,
+ brickid++);
+
+ ret = glusterd_resolve_brick (brickinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RESOLVE_BRICK_FAIL, FMTSTR_RESOLVE_BRICK,
+ brickinfo->hostname, brickinfo->path);
+ goto out;
+ }
+
+ /* A bricks mount dir is required only by snapshots which were
+ * introduced in gluster-3.6.0
+ */
+ if (priv->op_version >= GD_OP_VERSION_3_6_0) {
+ brick_mount_dir = NULL;
+ snprintf (key, sizeof(key), "brick%d.mount_dir", i);
+ ret = dict_get_str (dict, key, &brick_mount_dir);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "%s not present", key);
+ goto out;
+ }
+ strncpy (brickinfo->mount_dir, brick_mount_dir,
+ sizeof(brickinfo->mount_dir));
+ }
+
+#ifdef HAVE_BD_XLATOR
+ if (!gf_uuid_compare (brickinfo->uuid, MY_UUID)
+ && brickinfo->vg[0]) {
+ ret = glusterd_is_valid_vg (brickinfo, 0, msg);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_INVALID_VG, "%s", msg);
+ goto out;
+ }
+
+ /* if anyone of the brick does not have thin
+ support, disable it for entire volume */
+ caps &= brickinfo->caps;
+ } else {
+ caps = 0;
+ }
+
+#endif
+
+ cds_list_add_tail (&brickinfo->brick_list, &volinfo->bricks);
+ brick = strtok_r (NULL, " \n", &saveptr);
+ i++;
+ }
+
+ ret = glusterd_enable_default_options (volinfo, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_FAIL_DEFAULT_OPT_SET, "Failed to set default "
+ "options on create for volume %s", volinfo->volname);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "transport.address-family",
+ &address_family_str);
+
+ if (!ret) {
+ ret = dict_set_dynstr_with_alloc(volinfo->dict,
+ "transport.address-family", address_family_str);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set transport.address-family for %s",
+ volinfo->volname);
+ goto out;
+ }
+ }
+
+ gd_update_volume_op_versions (volinfo);
+
+ volinfo->caps = caps;
+
+ ret = glusterd_store_volinfo (volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ glusterd_store_delete_volume (volinfo);
+ *op_errstr = gf_strdup ("Failed to store the "
+ "Volume information");
+ goto out;
+ }
+
+ ret = glusterd_create_volfiles_and_notify_services (volinfo);
+ if (ret) {
+ *op_errstr = gf_strdup ("Failed to create volume files");
+ goto out;
+ }
+
+ volinfo->rebal.defrag_status = 0;
+ glusterd_list_add_order (&volinfo->vol_list, &priv->volumes,
+ glusterd_compare_volume_name);
+ vol_added = _gf_true;
+
+out:
+ GF_FREE(free_ptr);
+ if (!vol_added && volinfo)
+ glusterd_volinfo_unref (volinfo);
+ return ret;
+}
+
+int
+glusterd_start_volume (glusterd_volinfo_t *volinfo, int flags,
+ gf_boolean_t wait)
+
+{
+ int ret = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ xlator_t *this = NULL;
+ glusterd_volinfo_ver_ac_t verincrement = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (volinfo);
+
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ ret = glusterd_brick_start (volinfo, brickinfo, wait);
+ /* If 'force' try to start all bricks regardless of success or
+ * failure
+ */
+ if (!(flags & GF_CLI_FLAG_OP_FORCE) && ret)
+ goto out;
+ }
+
+ /* Increment the volinfo version only if there is a
+ * change in status. Force option can be used to start
+ * dead bricks even if the volume is in started state.
+ * In such case volume status will be GLUSTERD_STATUS_STARTED.
+ * Therefore we should not increment the volinfo version.*/
+ if (GLUSTERD_STATUS_STARTED != volinfo->status) {
+ verincrement = GLUSTERD_VOLINFO_VER_AC_INCREMENT;
+ } else {
+ verincrement = GLUSTERD_VOLINFO_VER_AC_NONE;
+ }
+
+ glusterd_set_volume_status (volinfo, GLUSTERD_STATUS_STARTED);
+
+ ret = glusterd_store_volinfo (volinfo, verincrement);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLINFO_SET_FAIL,
+ "Failed to store volinfo of "
+ "%s volume", volinfo->volname);
+ goto out;
+ }
+out:
+ gf_msg_trace (this->name, 0, "returning %d ", ret);
+ return ret;
+}
+
+int
+glusterd_op_start_volume (dict_t *dict, char **op_errstr)
+{
+ int ret = 0;
+ int32_t brick_count = 0;
+ char *brick_mount_dir = NULL;
+ char key[PATH_MAX] = "";
+ char *volname = NULL;
+ char *str = NULL;
+ gf_boolean_t option = _gf_false;
+ int flags = 0;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ glusterd_svc_t *svc = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ ret = glusterd_op_start_volume_args_get (dict, &volname, &flags);
+ if (ret)
+ goto out;
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_FOUND, FMTSTR_CHECK_VOL_EXISTS,
+ volname);
+ goto out;
+ }
+
+ /* This is an incremental approach to have all the volinfo objects ref
+ * count. The first attempt is made in volume start transaction to
+ * ensure it doesn't race with import volume where stale volume is
+ * deleted. There are multiple instances of GlusterD crashing in
+ * bug-948686.t because of this. Once this approach is full proof, all
+ * other volinfo objects will be refcounted.
+ */
+ glusterd_volinfo_ref (volinfo);
+
+ /* A bricks mount dir is required only by snapshots which were
+ * introduced in gluster-3.6.0
+ */
+ if (conf->op_version >= GD_OP_VERSION_3_6_0) {
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks,
+ brick_list) {
+ brick_count++;
+ /* Don't check bricks that are not owned by you
+ */
+ if (gf_uuid_compare (brickinfo->uuid, MY_UUID))
+ continue;
+ if (strlen(brickinfo->mount_dir) < 1) {
+ brick_mount_dir = NULL;
+ snprintf (key, sizeof(key), "brick%d.mount_dir",
+ brick_count);
+ ret = dict_get_str (dict, key,
+ &brick_mount_dir);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "%s not present", key);
+ goto out;
+ }
+ strncpy (brickinfo->mount_dir, brick_mount_dir,
+ sizeof(brickinfo->mount_dir));
+ }
+ }
+ }
+
+ ret = dict_get_str (conf->opts, GLUSTERD_STORE_KEY_GANESHA_GLOBAL, &str);
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_DICT_GET_FAILED, "Global dict not present.");
+ ret = 0;
+
+ } else {
+ ret = gf_string2boolean (str, &option);
+ /* Check if the feature is enabled and set nfs-disable to true */
+ if (option) {
+ gf_msg_debug (this->name, 0, "NFS-Ganesha is enabled");
+ /* Gluster-nfs should not start when NFS-Ganesha is enabled*/
+ ret = dict_set_str (volinfo->dict, NFS_DISABLE_MAP_KEY, "on");
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "Failed to set nfs.disable for"
+ "volume %s", volname);
+ goto out;
+ }
+ }
+ }
+
+ ret = glusterd_start_volume (volinfo, flags, _gf_true);
+ if (ret)
+ goto out;
+
+ if (!volinfo->is_snap_volume) {
+ svc = &(volinfo->snapd.svc);
+ ret = svc->manager (svc, volinfo, PROC_START_NO_WAIT);
+ if (ret)
+ goto out;
+ }
+ if (conf->op_version <= GD_OP_VERSION_3_7_6) {
+ /*
+ * Starting tier daemon on originator node will fail if
+ * atleast one of the peer host brick for the volume.
+ * Because The bricks in the peer haven't started when you
+ * commit on originator node.
+ * Please upgrade to version greater than GD_OP_VERSION_3_7_6
+ */
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
+ if (volinfo->rebal.op != GD_OP_REMOVE_BRICK) {
+ glusterd_defrag_info_set (volinfo, dict,
+ GF_DEFRAG_CMD_START_TIER,
+ GF_DEFRAG_CMD_START,
+ GD_OP_REBALANCE);
+ }
+ glusterd_restart_rebalance_for_volume (volinfo);
+ }
+ } else {
+ /* Starting tier daemon is moved into post validate phase */
+ }
+
+
+ ret = glusterd_svcs_manager (volinfo);
+
+out:
+ if (!volinfo)
+ glusterd_volinfo_unref (volinfo);
+
+ gf_msg_trace (this->name, 0, "returning %d ", ret);
+ return ret;
+}
+
+int
+glusterd_stop_volume (glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ char mountdir[PATH_MAX] = {0,};
+ char pidfile[PATH_MAX] = {0,};
+ xlator_t *this = NULL;
+ glusterd_svc_t *svc = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_VALIDATE_OR_GOTO (this->name, volinfo, out);
+
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ ret = glusterd_brick_stop (volinfo, brickinfo, _gf_false);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_STOP_FAIL, "Failed to stop "
+ "brick (%s)", brickinfo->path);
+ goto out;
+ }
+ }
+
+ glusterd_set_volume_status (volinfo, GLUSTERD_STATUS_STOPPED);
+
+ ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLINFO_SET_FAIL, "Failed to store volinfo of "
+ "%s volume", volinfo->volname);
+ goto out;
+ }
+
+ /* If quota auxiliary mount is present, unmount it */
+ GLUSTERFS_GET_AUX_MOUNT_PIDFILE (pidfile, volinfo->volname);
+
+ if (!gf_is_service_running (pidfile, NULL)) {
+ gf_msg_debug (this->name, 0, "Aux mount of volume %s "
+ "absent", volinfo->volname);
+ } else {
+ GLUSTERD_GET_QUOTA_AUX_MOUNT_PATH (mountdir, volinfo->volname,
+ "/");
+
+ ret = gf_umount_lazy (this->name, mountdir, 0);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_UNOUNT_FAILED,
+ "umount on %s failed",
+ mountdir);
+ }
+
+ if (!volinfo->is_snap_volume) {
+ svc = &(volinfo->snapd.svc);
+ ret = svc->manager (svc, volinfo, PROC_START_NO_WAIT);
+ if (ret)
+ goto out;
+ }
+
+ ret = glusterd_svcs_manager (volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_GRAPH_CHANGE_NOTIFY_FAIL, "Failed to notify graph "
+ "change for %s volume", volinfo->volname);
+
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+int
+glusterd_op_stop_volume (dict_t *dict)
+{
+ int ret = 0;
+ int flags = 0;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = glusterd_op_stop_volume_args_get (dict, &volname, &flags);
+ if (ret)
+ goto out;
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_FOUND, FMTSTR_CHECK_VOL_EXISTS,
+ volname);
+ goto out;
+ }
+
+ ret = glusterd_stop_volume (volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_STOP_FAILED, "Failed to stop %s volume",
+ volname);
+ goto out;
+ }
+out:
+ return ret;
+}
+
+int
+glusterd_op_delete_volume (dict_t *dict)
+{
+ int ret = 0;
+ char *volname = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_FOUND, FMTSTR_CHECK_VOL_EXISTS,
+ volname);
+ goto out;
+ }
+
+ ret = glusterd_remove_auxiliary_mount (volname);
+ if (ret)
+ goto out;
+
+ ret = glusterd_delete_volume (volinfo);
+out:
+ gf_msg_debug (this->name, 0, "returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_op_heal_volume (dict_t *dict, char **op_errstr)
+{
+ int ret = 0;
+ /* Necessary subtasks of heal are completed in brick op */
+
+ return ret;
+}
+
+int
+glusterd_op_statedump_volume (dict_t *dict, char **op_errstr)
+{
+ int ret = 0;
+ char *volname = NULL;
+ char *options = NULL;
+ int option_cnt = 0;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+
+ ret = glusterd_op_statedump_volume_args_get (dict, &volname, &options,
+ &option_cnt);
+ if (ret)
+ goto out;
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret)
+ goto out;
+ gf_msg_debug ("glusterd", 0, "Performing statedump on volume %s", volname);
+ if (strstr (options, "nfs") != NULL) {
+ ret = glusterd_nfs_statedump (options, option_cnt, op_errstr);
+ if (ret)
+ goto out;
+
+ } else if (strstr (options, "quotad")) {
+ ret = glusterd_quotad_statedump (options, option_cnt,
+ op_errstr);
+ if (ret)
+ goto out;
+ } else {
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks,
+ brick_list) {
+ ret = glusterd_brick_statedump (volinfo, brickinfo,
+ options, option_cnt,
+ op_errstr);
+ /* Let us take the statedump of other bricks instead of
+ * exiting, if statedump of this brick fails.
+ */
+ if (ret)
+ gf_msg (THIS->name, GF_LOG_WARNING, 0,
+ GD_MSG_BRK_STATEDUMP_FAIL, "could not "
+ "take the statedump of the brick %s:%s."
+ " Proceeding to other bricks",
+ brickinfo->hostname, brickinfo->path);
+ }
+ }
+
+out:
+ return ret;
+}
+
+int
+glusterd_clearlocks_send_cmd (glusterd_volinfo_t *volinfo, char *cmd,
+ char *path, char *result, char *errstr,
+ int err_len, char *mntpt)
+{
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ char abspath[PATH_MAX] = {0, };
+
+ priv = THIS->private;
+
+ snprintf (abspath, sizeof (abspath), "%s/%s", mntpt, path);
+ ret = sys_lgetxattr (abspath, cmd, result, PATH_MAX);
+ if (ret < 0) {
+ snprintf (errstr, err_len, "clear-locks getxattr command "
+ "failed. Reason: %s", strerror (errno));
+ gf_msg_debug (THIS->name, 0, "%s", errstr);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glusterd_clearlocks_rmdir_mount (glusterd_volinfo_t *volinfo, char *mntpt)
+{
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+
+ priv = THIS->private;
+
+ ret = sys_rmdir (mntpt);
+ if (ret) {
+ gf_msg_debug (THIS->name, 0, "rmdir failed");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+void
+glusterd_clearlocks_unmount (glusterd_volinfo_t *volinfo, char *mntpt)
+{
+ glusterd_conf_t *priv = NULL;
+ runner_t runner = {0,};
+ int ret = 0;
+
+ priv = THIS->private;
+
+ /*umount failures are ignored. Using stat we could have avoided
+ * attempting to unmount a non-existent filesystem. But a failure of
+ * stat() on mount can be due to network failures.*/
+
+ runinit (&runner);
+ runner_add_args (&runner, _PATH_UMOUNT, "-f", NULL);
+ runner_argprintf (&runner, "%s", mntpt);
+
+ synclock_unlock (&priv->big_lock);
+ ret = runner_run (&runner);
+ synclock_lock (&priv->big_lock);
+ if (ret) {
+ ret = 0;
+ gf_msg_debug ("glusterd", 0,
+ "umount failed on maintenance client");
+ }
+
+ return;
+}
+
+int
+glusterd_clearlocks_create_mount (glusterd_volinfo_t *volinfo, char **mntpt)
+{
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ char template[PATH_MAX] = {0,};
+ char *tmpl = NULL;
+
+ priv = THIS->private;
+
+ snprintf (template, sizeof (template), "/tmp/%s.XXXXXX",
+ volinfo->volname);
+ tmpl = mkdtemp (template);
+ if (!tmpl) {
+ gf_msg_debug (THIS->name, 0, "Couldn't create temporary "
+ "mount directory. Reason %s", strerror (errno));
+ goto out;
+ }
+
+ *mntpt = gf_strdup (tmpl);
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glusterd_clearlocks_mount (glusterd_volinfo_t *volinfo, char **xl_opts,
+ char *mntpt)
+{
+ int ret = -1;
+ int i = 0;
+ glusterd_conf_t *priv = NULL;
+ runner_t runner = {0,};
+ char client_volfpath[PATH_MAX] = {0,};
+ char self_heal_opts[3][1024] = {"*replicate*.data-self-heal=off",
+ "*replicate*.metadata-self-heal=off",
+ "*replicate*.entry-self-heal=off"};
+
+ priv = THIS->private;
+
+ runinit (&runner);
+ glusterd_get_trusted_client_filepath (client_volfpath, volinfo,
+ volinfo->transport_type);
+ runner_add_args (&runner, SBIN_DIR"/glusterfs", "-f", NULL);
+ runner_argprintf (&runner, "%s", client_volfpath);
+ runner_add_arg (&runner, "-l");
+ runner_argprintf (&runner, DEFAULT_LOG_FILE_DIRECTORY
+ "/%s-clearlocks-mnt.log", volinfo->volname);
+ if (volinfo->memory_accounting)
+ runner_add_arg (&runner, "--mem-accounting");
+
+ for (i = 0; i < volinfo->brick_count && xl_opts[i]; i++) {
+ runner_add_arg (&runner, "--xlator-option");
+ runner_argprintf (&runner, "%s", xl_opts[i]);
+ }
+
+ for (i = 0; i < 3; i++) {
+ runner_add_args (&runner, "--xlator-option",
+ self_heal_opts[i], NULL);
+ }
+
+ runner_argprintf (&runner, "%s", mntpt);
+ synclock_unlock (&priv->big_lock);
+ ret = runner_run (&runner);
+ synclock_lock (&priv->big_lock);
+ if (ret) {
+ gf_msg_debug (THIS->name, 0,
+ "Could not start glusterfs");
+ goto out;
+ }
+ gf_msg_debug (THIS->name, 0,
+ "Started glusterfs successfully");
+
+out:
+ return ret;
+}
+
+int
+glusterd_clearlocks_get_local_client_ports (glusterd_volinfo_t *volinfo,
+ char **xl_opts)
+{
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ char brickname[PATH_MAX] = {0,};
+ int index = 0;
+ int ret = -1;
+ int i = 0;
+ int port = 0;
+
+ GF_ASSERT (xl_opts);
+ if (!xl_opts) {
+ gf_msg_debug (THIS->name, 0, "Should pass non-NULL "
+ "xl_opts");
+ goto out;
+ }
+
+ priv = THIS->private;
+
+ index = -1;
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ index++;
+ if (gf_uuid_compare (brickinfo->uuid, MY_UUID))
+ continue;
+
+ if (volinfo->transport_type == GF_TRANSPORT_RDMA) {
+ snprintf (brickname, sizeof(brickname), "%s.rdma",
+ brickinfo->path);
+ } else
+ snprintf (brickname, sizeof(brickname), "%s",
+ brickinfo->path);
+
+ port = pmap_registry_search (THIS, brickname,
+ GF_PMAP_PORT_BRICKSERVER);
+ if (!port) {
+ ret = -1;
+ gf_msg_debug (THIS->name, 0, "Couldn't get port "
+ " for brick %s:%s", brickinfo->hostname,
+ brickinfo->path);
+ goto out;
+ }
+
+ ret = gf_asprintf (&xl_opts[i], "%s-client-%d.remote-port=%d",
+ volinfo->volname, index, port);
+ if (ret == -1) {
+ xl_opts[i] = NULL;
+ goto out;
+ }
+ i++;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glusterd_op_clearlocks_volume (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ int i = 0;
+ char *volname = NULL;
+ char *path = NULL;
+ char *kind = NULL;
+ char *type = NULL;
+ char *opts = NULL;
+ char *cmd_str = NULL;
+ char *free_ptr = NULL;
+ char msg[PATH_MAX] = {0,};
+ char result[PATH_MAX] = {0,};
+ char *mntpt = NULL;
+ char **xl_opts = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to get volume name");
+ goto out;
+ }
+ gf_msg_debug ("glusterd", 0, "Performing clearlocks on volume %s", volname);
+
+ ret = dict_get_str (dict, "path", &path);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to get path");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "kind", &kind);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to get kind");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "type", &type);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to get type");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "opts", &opts);
+ if (ret)
+ ret = 0;
+
+ gf_msg (THIS->name, GF_LOG_INFO, 0,
+ GD_MSG_CLRCLK_VOL_REQ_RCVD,
+ "Received clear-locks request for "
+ "volume %s with kind %s type %s and options %s", volname,
+ kind, type, opts);
+
+ if (opts)
+ ret = gf_asprintf (&cmd_str, GF_XATTR_CLRLK_CMD".t%s.k%s.%s",
+ type, kind, opts);
+ else
+ ret = gf_asprintf (&cmd_str, GF_XATTR_CLRLK_CMD".t%s.k%s",
+ type, kind);
+ if (ret == -1)
+ goto out;
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Volume %s doesn't exist.",
+ volname);
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_FOUND, "%s", msg);
+ goto out;
+ }
+
+ xl_opts = GF_CALLOC (volinfo->brick_count+1, sizeof (char*),
+ gf_gld_mt_charptr);
+ if (!xl_opts)
+ goto out;
+
+ ret = glusterd_clearlocks_get_local_client_ports (volinfo, xl_opts);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Couldn't get port numbers of "
+ "local bricks");
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRK_PORT_NUM_GET_FAIL, "%s", msg);
+ goto out;
+ }
+
+ ret = glusterd_clearlocks_create_mount (volinfo, &mntpt);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Creating mount directory "
+ "for clear-locks failed.");
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_CLRLOCKS_MOUNTDIR_CREATE_FAIL, "%s", msg);
+ goto out;
+ }
+
+ ret = glusterd_clearlocks_mount (volinfo, xl_opts, mntpt);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Failed to mount clear-locks "
+ "maintenance client.");
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_CLRLOCKS_CLNT_MOUNT_FAIL, "%s", msg);
+ goto out;
+ }
+
+ ret = glusterd_clearlocks_send_cmd (volinfo, cmd_str, path, result,
+ msg, sizeof (msg), mntpt);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_CLRCLK_SND_CMD_FAIL, "%s", msg);
+ goto umount;
+ }
+
+ free_ptr = gf_strdup(result);
+ if (dict_set_dynstr (rsp_dict, "lk-summary", free_ptr)) {
+ GF_FREE (free_ptr);
+ snprintf (msg, sizeof (msg), "Failed to set clear-locks "
+ "result");
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED, "%s", msg);
+ }
+
+umount:
+ glusterd_clearlocks_unmount (volinfo, mntpt);
+
+ if (glusterd_clearlocks_rmdir_mount (volinfo, mntpt))
+ gf_msg (THIS->name, GF_LOG_WARNING, 0,
+ GD_MSG_CLRLOCKS_CLNT_UMOUNT_FAIL, "Couldn't unmount "
+ "clear-locks mount point");
+
+out:
+ if (ret)
+ *op_errstr = gf_strdup (msg);
+
+ if (xl_opts) {
+ for (i = 0; i < volinfo->brick_count && xl_opts[i]; i++)
+ GF_FREE (xl_opts[i]);
+ GF_FREE (xl_opts);
+ }
+
+ GF_FREE (cmd_str);
+
+ GF_FREE (mntpt);
+
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
new file mode 100644
index 00000000000..66e9327e030
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -0,0 +1,3026 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "glusterd-volgen.h"
+#include "glusterd-utils.h"
+
+static int
+get_tier_freq_threshold (glusterd_volinfo_t *volinfo, char *threshold_key) {
+ int threshold = 0;
+ char *str_thresold = NULL;
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ glusterd_volinfo_get (volinfo, threshold_key, &str_thresold);
+ if (str_thresold) {
+ ret = gf_string2int (str_thresold, &threshold);
+ if (ret == -1) {
+ threshold = ret;
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INCOMPATIBLE_VALUE, "Failed to convert "
+ "string to integer");
+ }
+ }
+
+ return threshold;
+}
+
+/*
+ * Validation function for record-counters
+ * if write-freq-threshold and read-freq-threshold both have non-zero values
+ * record-counters cannot be set to off
+ * if record-counters is set to on
+ * check if both the frequency thresholds are zero, then pop
+ * a note, but volume set is not failed.
+ * */
+static int
+validate_tier_counters (glusterd_volinfo_t *volinfo,
+ dict_t *dict,
+ char *key,
+ char *value,
+ char **op_errstr) {
+
+ char errstr[2048] = "";
+ int ret = -1;
+ xlator_t *this = NULL;
+ gf_boolean_t origin_val = -1;
+ int current_wt = 0;
+ int current_rt = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ if (volinfo->type != GF_CLUSTER_TYPE_TIER) {
+ snprintf (errstr, sizeof (errstr), "Volume %s is not a tier "
+ "volume. Option %s is only valid for tier volume.",
+ volinfo->volname, key);
+ goto out;
+ }
+
+ ret = gf_string2boolean (value, &origin_val);
+ if (ret) {
+ snprintf (errstr, sizeof (errstr), "%s is not a compatible "
+ "value. %s expects an boolean value", value, key);
+ goto out;
+ }
+
+ current_rt = get_tier_freq_threshold (volinfo,
+ "cluster.read-freq-threshold");
+ if (current_rt == -1) {
+ snprintf (errstr, sizeof (errstr), " Failed to retrieve value"
+ " of cluster.read-freq-threshold");
+ goto out;
+ }
+ current_wt = get_tier_freq_threshold (volinfo,
+ "cluster.write-freq-threshold");
+ if (current_wt == -1) {
+ snprintf (errstr, sizeof (errstr), " Failed to retrieve value "
+ "of cluster.write-freq-threshold");
+ goto out;
+ }
+ /* If record-counters is set to off */
+ if (!origin_val) {
+
+ /* Both the thresholds should be zero to set
+ * record-counters to off*/
+ if (current_rt || current_wt) {
+ snprintf (errstr, sizeof (errstr),
+ "Cannot set features.record-counters to \"%s\""
+ " as cluster.write-freq-threshold is %d"
+ " and cluster.read-freq-threshold is %d. Please"
+ " set both cluster.write-freq-threshold and "
+ " cluster.read-freq-threshold to 0, to set "
+ " features.record-counters to \"%s\".",
+ value, current_wt, current_rt, value);
+ ret = -1;
+ goto out;
+ }
+ }
+ /* TODO give a warning message to the user. errstr without re = -1 will
+ * not result in a warning on cli for now.
+ else {
+ if (!current_rt && !current_wt) {
+ snprintf (errstr, sizeof (errstr),
+ " Note : cluster.write-freq-threshold is %d"
+ " and cluster.read-freq-threshold is %d. Please"
+ " set both cluster.write-freq-threshold and "
+ " cluster.read-freq-threshold to"
+ " appropriate positive values.",
+ current_wt, current_rt);
+ }
+ }*/
+
+ ret = 0;
+out:
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INCOMPATIBLE_VALUE, "%s", errstr);
+ *op_errstr = gf_strdup (errstr);
+ }
+
+ return ret;
+
+}
+
+
+/*
+ * Validation function for ctr sql params
+ * features.ctr-sql-db-cachesize (Range: 1000 to 262144 pages)
+ * features.ctr-sql-db-wal-autocheckpoint (Range: 1000 to 262144 pages)
+ * */
+static int
+validate_ctr_sql_params (glusterd_volinfo_t *volinfo,
+ dict_t *dict,
+ char *key,
+ char *value,
+ char **op_errstr)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ char errstr[2048] = "";
+ int origin_val = -1;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+
+ ret = gf_string2int (value, &origin_val);
+ if (ret) {
+ snprintf (errstr, sizeof (errstr), "%s is not a compatible "
+ "value. %s expects an integer value.", value, key);
+ ret = -1;
+ goto out;
+ }
+
+ if (origin_val < 0) {
+ snprintf (errstr, sizeof (errstr), "%s is not a "
+ "compatible value. %s expects a positive"
+ "integer value.", value, key);
+ ret = -1;
+ goto out;
+ }
+
+ if (strstr (key, "sql-db-cachesize") ||
+ strstr (key, "sql-db-wal-autocheckpoint")) {
+ if ((origin_val < 1000) || (origin_val > 262144)) {
+ snprintf (errstr, sizeof (errstr), "%s is not a "
+ "compatible value. %s "
+ "expects a value between : "
+ "1000 to 262144.",
+ value, key);
+ ret = -1;
+ goto out;
+ }
+ }
+
+
+ ret = 0;
+out:
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INCOMPATIBLE_VALUE, "%s", errstr);
+ *op_errstr = gf_strdup (errstr);
+ }
+ return ret;
+}
+
+
+/* Validation for tiering frequency thresholds
+ * If any of the frequency thresholds are set to a non-zero value,
+ * switch record-counters on, if not already on
+ * If both the frequency thresholds are set to zero,
+ * switch record-counters off, if not already off
+ * */
+static int
+validate_tier_thresholds (glusterd_volinfo_t *volinfo,
+ dict_t *dict,
+ char *key,
+ char *value,
+ char **op_errstr)
+{
+ char errstr[2048] = "";
+ int ret = -1;
+ xlator_t *this = NULL;
+ int origin_val = -1;
+ gf_boolean_t current_rc = _gf_false;
+ char *str_current_rc = NULL;
+ int current_wt = 0;
+ int current_rt = 0;
+ char *str_current_wt = NULL;
+ char *str_current_rt = NULL;
+ gf_boolean_t is_set_rc = _gf_false;
+ char *proposed_rc = NULL;
+ gf_boolean_t is_set_wrt_thsd = _gf_false;
+
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ if (volinfo->type != GF_CLUSTER_TYPE_TIER) {
+ snprintf (errstr, sizeof (errstr), "Volume %s is not a tier "
+ "volume. Option %s is only valid for tier volume.",
+ volinfo->volname, key);
+ goto out;
+ }
+
+
+ ret = gf_string2int (value, &origin_val);
+ if (ret) {
+ snprintf (errstr, sizeof (errstr), "%s is not a compatible "
+ "value. %s expects an integer value.", value, key);
+ ret = -1;
+ goto out;
+ }
+
+ if (origin_val < 0) {
+ snprintf (errstr, sizeof (errstr), "%s is not a "
+ "compatible value. %s expects a positive"
+ "integer value.", value, key);
+ ret = -1;
+ goto out;
+ }
+
+ /* Get the record-counters value */
+ ret = glusterd_volinfo_get_boolean (volinfo,
+ "features.record-counters");
+ if (ret == -1) {
+ snprintf (errstr, sizeof (errstr), "Failed to retrive value of"
+ "features.record-counters from volume info");
+ goto out;
+ }
+ current_rc = ret;
+
+ /* if any of the thresholds are set to a non-zero value
+ * switch record-counters on, if not already on*/
+ if (origin_val > 0) {
+ if (!current_rc) {
+ is_set_rc = _gf_true;
+ current_rc = _gf_true;
+ }
+ } else {
+ /* if the set is for write-freq-threshold */
+ if (strstr (key, "write-freq-threshold")) {
+ current_rt = get_tier_freq_threshold (volinfo,
+ "cluster.read-freq-threshold");
+ if (current_rt == -1) {
+ snprintf (errstr, sizeof (errstr),
+ " Failed to retrive value of"
+ "cluster.read-freq-threshold");
+ goto out;
+ }
+ current_wt = origin_val;
+ }
+ /* else it should be read-freq-threshold */
+ else {
+ current_wt = get_tier_freq_threshold (volinfo,
+ "cluster.write-freq-threshold");
+ if (current_wt == -1) {
+ snprintf (errstr, sizeof (errstr),
+ " Failed to retrive value of"
+ "cluster.write-freq-threshold");
+ goto out;
+ }
+ current_rt = origin_val;
+ }
+
+ /* Since both the thresholds are zero, set record-counters
+ * to off, if not already off */
+ if (current_rt == 0 && current_wt == 0) {
+ if (current_rc) {
+ is_set_rc = _gf_true;
+ current_rc = _gf_false;
+ }
+ }
+ }
+
+ /* if record-counter has to be set to proposed value */
+ if (is_set_rc) {
+ if (current_rc) {
+ ret = gf_asprintf (&proposed_rc, "on");
+ } else {
+ ret = gf_asprintf (&proposed_rc, "off");
+ }
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INCOMPATIBLE_VALUE,
+ "Failed to allocate memory to dict_value");
+ goto error;
+ }
+ ret = dict_set_str (volinfo->dict, "features.record-counters",
+ proposed_rc);
+error:
+ if (ret) {
+ snprintf (errstr, sizeof (errstr),
+ "Failed to set features.record-counters"
+ "to \"%s\" automatically."
+ "Please try to set features.record-counters "
+ "\"%s\" manually. The options "
+ "cluster.write-freq-threshold and "
+ "cluster.read-freq-threshold can only "
+ "be set to a non zero value, if "
+ "features.record-counters is "
+ "set to \"on\".", proposed_rc, proposed_rc);
+ goto out;
+ }
+ }
+ ret = 0;
+out:
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INCOMPATIBLE_VALUE, "%s", errstr);
+ *op_errstr = gf_strdup (errstr);
+ if (proposed_rc)
+ GF_FREE (proposed_rc);
+ }
+ return ret;
+}
+
+
+
+static int
+validate_tier (glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
+ char *value, char **op_errstr)
+{
+ char errstr[2048] = "";
+ int ret = 0;
+ xlator_t *this = NULL;
+ int origin_val = -1;
+ char *current_wm_hi = NULL;
+ char *current_wm_low = NULL;
+ uint64_t wm_hi = 0;
+ uint64_t wm_low = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ if (volinfo->type != GF_CLUSTER_TYPE_TIER) {
+ snprintf (errstr, sizeof (errstr), "Volume %s is not a tier "
+ "volume. Option %s is only valid for tier volume.",
+ volinfo->volname, key);
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INCOMPATIBLE_VALUE, "%s", errstr);
+ *op_errstr = gf_strdup (errstr);
+ ret = -1;
+ goto out;
+ }
+
+ if (strstr (key, "cluster.tier-mode")) {
+ if (strcmp(value, "test") &&
+ strcmp(value, "cache")) {
+ ret = -1;
+ goto out;
+ }
+ goto out;
+ } else if (strstr (key, "tier-pause")) {
+ if (strcmp(value, "off") &&
+ strcmp(value, "on")) {
+ ret = -1;
+ goto out;
+ }
+ goto out;
+ }
+
+ /*
+ * Rest of the volume set options for tier are expecting a positive
+ * Integer. Change the function accordingly if this constraint is
+ * changed.
+ */
+ ret = gf_string2int (value, &origin_val);
+ if (ret) {
+ snprintf (errstr, sizeof (errstr), "%s is not a compatible "
+ "value. %s expects an integer value.",
+ value, key);
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INCOMPATIBLE_VALUE, "%s", errstr);
+ *op_errstr = gf_strdup (errstr);
+ ret = -1;
+ goto out;
+ }
+
+ if (strstr (key, "watermark-hi") ||
+ strstr (key, "watermark-low")) {
+ if ((origin_val < 1) || (origin_val > 99)) {
+ snprintf (errstr, sizeof (errstr), "%s is not a "
+ "compatible value. %s expects a "
+ "percentage from 1-99.",
+ value, key);
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INCOMPATIBLE_VALUE, "%s", errstr);
+ *op_errstr = gf_strdup (errstr);
+ ret = -1;
+ goto out;
+ }
+
+ if (strstr (key, "watermark-hi")) {
+ wm_hi = origin_val;
+ } else {
+ glusterd_volinfo_get (volinfo,
+ "cluster.watermark-hi",
+ &current_wm_hi);
+ gf_string2bytesize_uint64 (current_wm_hi,
+ &wm_hi);
+ }
+
+ if (strstr (key, "watermark-low")) {
+ wm_low = origin_val;
+ } else {
+ glusterd_volinfo_get (volinfo,
+ "cluster.watermark-low",
+ &current_wm_low);
+ gf_string2bytesize_uint64 (current_wm_low,
+ &wm_low);
+ }
+ if (wm_low > wm_hi) {
+ snprintf (errstr, sizeof (errstr), "lower watermark"
+ " cannot exceed upper watermark.");
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INCOMPATIBLE_VALUE, "%s", errstr);
+ *op_errstr = gf_strdup (errstr);
+ ret = -1;
+ goto out;
+ }
+ } else if (strstr (key, "tier-promote-frequency") ||
+ strstr (key, "tier-max-mb") ||
+ strstr (key, "tier-max-promote-file-size") ||
+ strstr (key, "tier-max-files") ||
+ strstr (key, "tier-demote-frequency")) {
+ if (origin_val < 1) {
+ snprintf (errstr, sizeof (errstr), "%s is not a "
+ " compatible value. %s expects a positive "
+ "integer value greater than 0.",
+ value, key);
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INCOMPATIBLE_VALUE, "%s", errstr);
+ *op_errstr = gf_strdup (errstr);
+ ret = -1;
+ goto out;
+ }
+
+ }
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+validate_cache_max_min_size (glusterd_volinfo_t *volinfo, dict_t *dict,
+ char *key, char *value, char **op_errstr)
+{
+ char *current_max_value = NULL;
+ char *current_min_value = NULL;
+ char errstr[2048] = "";
+ glusterd_conf_t *priv = NULL;
+ int ret = 0;
+ uint64_t max_value = 0;
+ uint64_t min_value = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ if ((!strcmp (key, "performance.cache-min-file-size")) ||
+ (!strcmp (key, "cache-min-file-size"))) {
+ glusterd_volinfo_get (volinfo,
+ "performance.cache-max-file-size",
+ &current_max_value);
+ if (current_max_value) {
+ gf_string2bytesize_uint64 (current_max_value, &max_value);
+ gf_string2bytesize_uint64 (value, &min_value);
+ current_min_value = value;
+ }
+ } else if ((!strcmp (key, "performance.cache-max-file-size")) ||
+ (!strcmp (key, "cache-max-file-size"))) {
+ glusterd_volinfo_get (volinfo,
+ "performance.cache-min-file-size",
+ &current_min_value);
+ if (current_min_value) {
+ gf_string2bytesize_uint64 (current_min_value, &min_value);
+ gf_string2bytesize_uint64 (value, &max_value);
+ current_max_value = value;
+ }
+ }
+
+ if (min_value > max_value) {
+ snprintf (errstr, sizeof (errstr),
+ "cache-min-file-size (%s) is greater than "
+ "cache-max-file-size (%s)",
+ current_min_value, current_max_value);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_CACHE_MINMAX_SIZE_INVALID, "%s", errstr);
+ *op_errstr = gf_strdup (errstr);
+ ret = -1;
+ goto out;
+ }
+
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+validate_defrag_throttle_option (glusterd_volinfo_t *volinfo, dict_t *dict,
+ char *key, char *value, char **op_errstr)
+{
+ char errstr[2048] = "";
+ glusterd_conf_t *priv = NULL;
+ int ret = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ if (!strcasecmp (value, "lazy") ||
+ !strcasecmp (value, "normal") ||
+ !strcasecmp (value, "aggressive")) {
+ ret = 0;
+ } else {
+ ret = -1;
+ snprintf (errstr, sizeof (errstr), "%s should be "
+ "{lazy|normal|aggressive}", key);
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "%s", errstr);
+ *op_errstr = gf_strdup (errstr);
+ }
+
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+validate_quota (glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
+ char *value, char **op_errstr)
+{
+ char errstr[2048] = "";
+ glusterd_conf_t *priv = NULL;
+ int ret = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = glusterd_volinfo_get_boolean (volinfo, VKEY_FEATURES_QUOTA);
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_QUOTA_GET_STAT_FAIL,
+ "failed to get the quota status");
+ goto out;
+ }
+
+ if (ret == _gf_false) {
+ snprintf (errstr, sizeof (errstr),
+ "Cannot set %s. Enable quota first.", key);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_QUOTA_DISABLED, "%s", errstr);
+ *op_errstr = gf_strdup (errstr);
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+validate_uss (glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
+ char *value, char **op_errstr)
+{
+ char errstr[2048] = "";
+ int ret = 0;
+ xlator_t *this = NULL;
+ gf_boolean_t b = _gf_false;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = gf_string2boolean (value, &b);
+ if (ret) {
+ snprintf (errstr, sizeof (errstr), "%s is not a valid boolean "
+ "value. %s expects a valid boolean value.", value,
+ key);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_INVALID_ENTRY, "%s", errstr);
+ *op_errstr = gf_strdup (errstr);
+ goto out;
+ }
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+validate_uss_dir (glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
+ char *value, char **op_errstr)
+{
+ char errstr[2048] = "";
+ int ret = -1;
+ int i = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ i = strlen (value);
+ if (i > NAME_MAX) {
+ snprintf (errstr, sizeof (errstr), "value of %s exceedes %d "
+ "characters", key, NAME_MAX);
+ goto out;
+ } else if (i < 2) {
+ snprintf (errstr, sizeof (errstr), "value of %s too short, "
+ "expects atleast two characters", key);
+ goto out;
+ }
+
+ if (value[0] != '.') {
+ snprintf (errstr, sizeof (errstr), "%s expects value starting "
+ "with '.' ", key);
+ goto out;
+ }
+
+ for (i = 1; value[i]; i++) {
+ if (isalnum (value[i]) || value[i] == '_' || value[i] == '-')
+ continue;
+
+ snprintf (errstr, sizeof (errstr), "%s expects value to"
+ " contain only '0-9a-z-_'", key);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "%s", errstr);
+ *op_errstr = gf_strdup (errstr);
+ }
+
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+validate_stripe (glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
+ char *value, char **op_errstr)
+{
+ char errstr[2048] = "";
+ glusterd_conf_t *priv = NULL;
+ int ret = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ if (volinfo->stripe_count == 1) {
+ snprintf (errstr, sizeof (errstr),
+ "Cannot set %s for a non-stripe volume.", key);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_NON_STRIPE_VOL, "%s", errstr);
+ *op_errstr = gf_strdup (errstr);
+ ret = -1;
+ goto out;
+ }
+
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+validate_replica (glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
+ char *value, char **op_errstr)
+{
+ char errstr[2048] = "";
+ int ret = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ if (volinfo->replica_count == 1) {
+ snprintf (errstr, sizeof (errstr),
+ "Cannot set %s for a non-replicate volume.", key);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOL_NOT_REPLICA, "%s", errstr);
+ *op_errstr = gf_strdup (errstr);
+ ret = -1;
+ goto out;
+ }
+
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+validate_subvols_per_directory (glusterd_volinfo_t *volinfo, dict_t *dict,
+ char *key, char *value, char **op_errstr)
+{
+ char errstr[2048] = "";
+ glusterd_conf_t *priv = NULL;
+ int ret = 0;
+ int subvols = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ subvols = atoi(value);
+
+ /* Checking if the subvols-per-directory exceed the total
+ number of subvolumes. */
+ if (subvols > volinfo->subvol_count) {
+ snprintf (errstr, sizeof(errstr),
+ "subvols-per-directory(%d) is greater "
+ "than the number of subvolumes(%d).",
+ subvols, volinfo->subvol_count);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SUBVOLUMES_EXCEED,
+ "%s.", errstr);
+ *op_errstr = gf_strdup (errstr);
+ ret = -1;
+ goto out;
+ }
+
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+validate_replica_heal_enable_disable (glusterd_volinfo_t *volinfo, dict_t *dict,
+ char *key, char *value, char **op_errstr)
+{
+ int ret = 0;
+
+ if (!glusterd_is_volume_replicate (volinfo)) {
+ gf_asprintf (op_errstr, "Volume %s is not of replicate type",
+ volinfo->volname);
+ ret = -1;
+ }
+
+ return ret;
+}
+
+static int
+validate_mandatory_locking (glusterd_volinfo_t *volinfo, dict_t *dict,
+ char *key, char *value, char **op_errstr)
+{
+ char errstr[2048] = "";
+ int ret = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ if (strcmp (value, "off") != 0 && strcmp (value, "file") != 0 &&
+ strcmp(value, "forced") != 0 &&
+ strcmp(value, "optimal") != 0) {
+ snprintf (errstr, sizeof(errstr), "Invalid option value '%s':"
+ " Available options are 'off', 'file', "
+ "'forced' or 'optimal'", value);
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY,
+ "%s", errstr);
+ *op_errstr = gf_strdup (errstr);
+ ret = -1;
+ goto out;
+ }
+out:
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+validate_disperse_heal_enable_disable (glusterd_volinfo_t *volinfo,
+ dict_t *dict, char *key, char *value,
+ char **op_errstr)
+{
+ int ret = 0;
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
+ if (volinfo->tier_info.cold_type != GF_CLUSTER_TYPE_DISPERSE &&
+ volinfo->tier_info.hot_type != GF_CLUSTER_TYPE_DISPERSE) {
+ gf_asprintf (op_errstr, "Volume %s is not containing "
+ "disperse type", volinfo->volname);
+
+ return -1;
+ } else
+ return 0;
+ }
+
+ if (volinfo->type != GF_CLUSTER_TYPE_DISPERSE) {
+ gf_asprintf (op_errstr, "Volume %s is not of disperse type",
+ volinfo->volname);
+ ret = -1;
+ }
+
+ return ret;
+}
+
+static int
+validate_lock_migration_option (glusterd_volinfo_t *volinfo, dict_t *dict,
+ char *key, char *value, char **op_errstr)
+{
+ char errstr[2048] = "";
+ glusterd_conf_t *priv = NULL;
+ int ret = 0;
+ xlator_t *this = NULL;
+ gf_boolean_t b = _gf_false;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ if (volinfo->replica_count > 1 || volinfo->disperse_count ||
+ volinfo->type == GF_CLUSTER_TYPE_TIER) {
+ snprintf (errstr, sizeof (errstr), "Lock migration is "
+ "a experimental feature. Currently works with"
+ " pure distribute volume only");
+ ret = -1;
+
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "%s", errstr);
+
+ *op_errstr = gf_strdup (errstr);
+ goto out;
+ }
+
+ ret = gf_string2boolean (value, &b);
+ if (ret) {
+ snprintf (errstr, sizeof (errstr), "Invalid value"
+ " for volume set command. Use on/off only.");
+ ret = -1;
+
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "%s", errstr);
+
+ *op_errstr = gf_strdup (errstr);
+
+ goto out;
+ }
+
+ gf_msg_debug (this->name, 0, "Returning %d", ret);
+
+out:
+ return ret;
+}
+
+
+static int
+validate_worm (glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
+ char *value, char **op_errstr)
+{
+ xlator_t *this = NULL;
+ gf_boolean_t b = _gf_false;
+ int ret = -1;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("glusterd", this, out);
+ ret = gf_string2boolean (value, &b);
+ if (ret) {
+ gf_asprintf (op_errstr, "%s is not a valid boolean value. %s "
+ "expects a valid boolean value.", value, key);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_INVALID_ENTRY, "%s", *op_errstr);
+ }
+out:
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+
+ return ret;
+}
+
+
+static int
+validate_worm_period (glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
+ char *value, char **op_errstr)
+{
+ xlator_t *this = NULL;
+ uint64_t period = -1;
+ int ret = -1;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("glusterd", this, out);
+ ret = gf_string2uint64 (value, &period);
+ if (ret) {
+ gf_asprintf (op_errstr, "%s is not a valid uint64_t value."
+ " %s expects a valid uint64_t value.", value, key);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_INVALID_ENTRY, "%s", *op_errstr);
+ }
+out:
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+
+ return ret;
+}
+
+
+static int
+validate_reten_mode (glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
+ char *value, char **op_errstr)
+{
+ xlator_t *this = NULL;
+ int ret = -1;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("glusterd", this, out);
+ if ((strcmp (value, "relax") &&
+ strcmp (value, "enterprise"))) {
+ gf_asprintf (op_errstr, "The value of retention mode should be "
+ "either relax or enterprise. But the value"
+ " of %s is %s", key, value);
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY,
+ "%s", *op_errstr);
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+out:
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+
+ return ret;
+}
+
+
+/* dispatch table for VOLUME SET
+ * -----------------------------
+ *
+ * Format of entries:
+ *
+ * First field is the <key>, for the purpose of looking it up
+ * in volume dictionary. Each <key> is of the format "<domain>.<specifier>".
+ *
+ * Second field is <voltype>.
+ *
+ * Third field is <option>, if its unset, it's assumed to be
+ * the same as <specifier>.
+ *
+ * Fourth field is <value>. In this context they are used to specify
+ * a default. That is, even the volume dict doesn't have a value,
+ * we procced as if the default value were set for it.
+ *
+ * Fifth field is <doctype>, which decides if the option is public and available
+ * in "set help" or not. "NO_DOC" entries are not part of the public interface
+ * and are subject to change at any time. This also decides if an option is
+ * global (apllies to all volumes) or normal (applies to only specified volume).
+ *
+ * Sixth field is <flags>.
+ *
+ * Seventh field is <op-version>.
+ *
+ * Eight field is description of option: If NULL, tried to fetch from
+ * translator code's xlator_options table.
+ *
+ * Nineth field is validation function: If NULL, xlator's option specific
+ * validation will be tried, otherwise tried at glusterd code itself.
+ *
+ * There are two type of entries: basic and special.
+ *
+ * - Basic entries are the ones where the <option> does _not_ start with
+ * the bang! character ('!').
+ *
+ * In their case, <option> is understood as an option for an xlator of
+ * type <voltype>. Their effect is to copy over the volinfo->dict[<key>]
+ * value to all graph nodes of type <voltype> (if such a value is set).
+ *
+ * You are free to add entries of this type, they will become functional
+ * just by being present in the table.
+ *
+ * - Special entries where the <option> starts with the bang!.
+ *
+ * They are not applied to all graphs during generation, and you cannot
+ * extend them in a trivial way which could be just picked up. Better
+ * not touch them unless you know what you do.
+ *
+ *
+ * Another kind of grouping for options, according to visibility:
+ *
+ * - Exported: one which is used in the code. These are characterized by
+ * being used a macro as <key> (of the format VKEY_..., defined in
+ * glusterd-volgen.h
+ *
+ * - Non-exported: the rest; these have string literal <keys>.
+ *
+ * Adhering to this policy, option name changes shall be one-liners.
+ *
+ */
+
+struct volopt_map_entry glusterd_volopt_map[] = {
+ /* DHT xlator options */
+ { .key = "cluster.lookup-unhashed",
+ .voltype = "cluster/distribute",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.lookup-optimize",
+ .voltype = "cluster/distribute",
+ .op_version = GD_OP_VERSION_3_7_2,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.min-free-disk",
+ .voltype = "cluster/distribute",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.min-free-inodes",
+ .voltype = "cluster/distribute",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.rebalance-stats",
+ .voltype = "cluster/distribute",
+ .op_version = 2,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.subvols-per-directory",
+ .voltype = "cluster/distribute",
+ .option = "directory-layout-spread",
+ .op_version = 2,
+ .validate_fn = validate_subvols_per_directory,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.readdir-optimize",
+ .voltype = "cluster/distribute",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.rsync-hash-regex",
+ .voltype = "cluster/distribute",
+ .type = NO_DOC,
+ .op_version = 3,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.extra-hash-regex",
+ .voltype = "cluster/distribute",
+ .type = NO_DOC,
+ .op_version = 3,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.dht-xattr-name",
+ .voltype = "cluster/distribute",
+ .option = "xattr-name",
+ .type = NO_DOC,
+ .op_version = 3,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.randomize-hash-range-by-gfid",
+ .voltype = "cluster/distribute",
+ .option = "randomize-hash-range-by-gfid",
+ .type = NO_DOC,
+ .op_version = GD_OP_VERSION_3_6_0,
+ .flags = OPT_FLAG_CLIENT_OPT,
+ },
+ { .key = "cluster.rebal-throttle",
+ .voltype = "cluster/distribute",
+ .option = "rebal-throttle",
+ .op_version = GD_OP_VERSION_3_7_0,
+ .validate_fn = validate_defrag_throttle_option,
+ .flags = OPT_FLAG_CLIENT_OPT,
+ },
+
+ { .key = "cluster.lock-migration",
+ .voltype = "cluster/distribute",
+ .option = "lock-migration",
+ .value = "off",
+ .op_version = GD_OP_VERSION_3_8_0,
+ .validate_fn = validate_lock_migration_option,
+ .flags = OPT_FLAG_CLIENT_OPT,
+ },
+
+ /* NUFA xlator options (Distribute special case) */
+ { .key = "cluster.nufa",
+ .voltype = "cluster/distribute",
+ .option = "!nufa",
+ .type = NO_DOC,
+ .op_version = 2,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.local-volume-name",
+ .voltype = "cluster/nufa",
+ .option = "local-volume-name",
+ .type = NO_DOC,
+ .op_version = 3,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.weighted-rebalance",
+ .voltype = "cluster/distribute",
+ .op_version = GD_OP_VERSION_3_6_0,
+ },
+
+ /* Switch xlator options (Distribute special case) */
+ { .key = "cluster.switch",
+ .voltype = "cluster/distribute",
+ .option = "!switch",
+ .type = NO_DOC,
+ .op_version = 3,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.switch-pattern",
+ .voltype = "cluster/switch",
+ .option = "pattern.switch.case",
+ .type = NO_DOC,
+ .op_version = 3,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+
+ /* AFR xlator options */
+ { .key = "cluster.entry-change-log",
+ .voltype = "cluster/replicate",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.read-subvolume",
+ .voltype = "cluster/replicate",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.read-subvolume-index",
+ .voltype = "cluster/replicate",
+ .op_version = 2,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.read-hash-mode",
+ .voltype = "cluster/replicate",
+ .op_version = 2,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.background-self-heal-count",
+ .voltype = "cluster/replicate",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.metadata-self-heal",
+ .voltype = "cluster/replicate",
+ .op_version = 1,
+ .validate_fn = validate_replica,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.data-self-heal",
+ .voltype = "cluster/replicate",
+ .op_version = 1,
+ .validate_fn = validate_replica,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.entry-self-heal",
+ .voltype = "cluster/replicate",
+ .op_version = 1,
+ .validate_fn = validate_replica,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.self-heal-daemon",
+ .voltype = "cluster/replicate",
+ .option = "!self-heal-daemon",
+ .op_version = 1,
+ .validate_fn = validate_replica_heal_enable_disable
+ },
+ { .key = "cluster.heal-timeout",
+ .voltype = "cluster/replicate",
+ .option = "!heal-timeout",
+ .op_version = 2,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.strict-readdir",
+ .voltype = "cluster/replicate",
+ .type = NO_DOC,
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.self-heal-window-size",
+ .voltype = "cluster/replicate",
+ .option = "data-self-heal-window-size",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.data-change-log",
+ .voltype = "cluster/replicate",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.metadata-change-log",
+ .voltype = "cluster/replicate",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.data-self-heal-algorithm",
+ .voltype = "cluster/replicate",
+ .option = "data-self-heal-algorithm",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.eager-lock",
+ .voltype = "cluster/replicate",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "disperse.eager-lock",
+ .voltype = "cluster/disperse",
+ .op_version = GD_OP_VERSION_3_7_10,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.quorum-type",
+ .voltype = "cluster/replicate",
+ .option = "quorum-type",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.quorum-count",
+ .voltype = "cluster/replicate",
+ .option = "quorum-count",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.choose-local",
+ .voltype = "cluster/replicate",
+ .op_version = 2,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.self-heal-readdir-size",
+ .voltype = "cluster/replicate",
+ .op_version = 2,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.post-op-delay-secs",
+ .voltype = "cluster/replicate",
+ .type = NO_DOC,
+ .op_version = 2,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.readdir-failover",
+ .voltype = "cluster/replicate",
+ .op_version = 2,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.ensure-durability",
+ .voltype = "cluster/replicate",
+ .op_version = 3,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.consistent-metadata",
+ .voltype = "cluster/replicate",
+ .type = DOC,
+ .op_version = GD_OP_VERSION_3_7_0,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.heal-wait-queue-length",
+ .voltype = "cluster/replicate",
+ .type = DOC,
+ .op_version = GD_OP_VERSION_3_7_10,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.favorite-child-policy",
+ .voltype = "cluster/replicate",
+ .type = DOC,
+ .op_version = GD_OP_VERSION_3_7_12,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+
+ /* stripe xlator options */
+ { .key = "cluster.stripe-block-size",
+ .voltype = "cluster/stripe",
+ .option = "block-size",
+ .op_version = 1,
+ .validate_fn = validate_stripe,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.stripe-coalesce",
+ .voltype = "cluster/stripe",
+ .option = "coalesce",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+
+ /* IO-stats xlator options */
+ { .key = VKEY_DIAG_LAT_MEASUREMENT,
+ .voltype = "debug/io-stats",
+ .option = "latency-measurement",
+ .value = "off",
+ .op_version = 1
+ },
+ { .key = "diagnostics.dump-fd-stats",
+ .voltype = "debug/io-stats",
+ .op_version = 1
+ },
+ { .key = VKEY_DIAG_CNT_FOP_HITS,
+ .voltype = "debug/io-stats",
+ .option = "count-fop-hits",
+ .value = "off",
+ .type = NO_DOC,
+ .op_version = 1
+ },
+ { .key = "diagnostics.brick-log-level",
+ .voltype = "debug/io-stats",
+ .value = "INFO",
+ .option = "!brick-log-level",
+ .op_version = 1
+ },
+ { .key = "diagnostics.client-log-level",
+ .voltype = "debug/io-stats",
+ .value = "INFO",
+ .option = "!client-log-level",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "diagnostics.brick-sys-log-level",
+ .voltype = "debug/io-stats",
+ .option = "!sys-log-level",
+ .op_version = 1
+ },
+ { .key = "diagnostics.client-sys-log-level",
+ .voltype = "debug/io-stats",
+ .option = "!sys-log-level",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "diagnostics.brick-logger",
+ .voltype = "debug/io-stats",
+ .option = "!logger",
+ .op_version = GD_OP_VERSION_3_6_0,
+ },
+ { .key = "diagnostics.client-logger",
+ .voltype = "debug/io-stats",
+ .option = "!logger",
+ .op_version = GD_OP_VERSION_3_6_0,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "diagnostics.brick-log-format",
+ .voltype = "debug/io-stats",
+ .option = "!log-format",
+ .op_version = GD_OP_VERSION_3_6_0,
+ },
+ { .key = "diagnostics.client-log-format",
+ .voltype = "debug/io-stats",
+ .option = "!log-format",
+ .op_version = GD_OP_VERSION_3_6_0,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "diagnostics.brick-log-buf-size",
+ .voltype = "debug/io-stats",
+ .option = "!log-buf-size",
+ .op_version = GD_OP_VERSION_3_6_0,
+ },
+ { .key = "diagnostics.client-log-buf-size",
+ .voltype = "debug/io-stats",
+ .option = "!log-buf-size",
+ .op_version = GD_OP_VERSION_3_6_0,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "diagnostics.brick-log-flush-timeout",
+ .voltype = "debug/io-stats",
+ .option = "!log-flush-timeout",
+ .op_version = GD_OP_VERSION_3_6_0,
+ },
+ { .key = "diagnostics.client-log-flush-timeout",
+ .voltype = "debug/io-stats",
+ .option = "!log-flush-timeout",
+ .op_version = GD_OP_VERSION_3_6_0,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "diagnostics.stats-dump-interval",
+ .voltype = "debug/io-stats",
+ .option = "ios-dump-interval",
+ .op_version = 1
+ },
+ { .key = "diagnostics.fop-sample-interval",
+ .voltype = "debug/io-stats",
+ .option = "ios-sample-interval",
+ .op_version = 1
+ },
+ { .key = "diagnostics.fop-sample-buf-size",
+ .voltype = "debug/io-stats",
+ .option = "ios-sample-buf-size",
+ .op_version = 1
+ },
+ { .key = "diagnostics.stats-dnscache-ttl-sec",
+ .voltype = "debug/io-stats",
+ .option = "ios-dnscache-ttl-sec",
+ .op_version = 1
+ },
+
+ /* IO-cache xlator options */
+ { .key = "performance.cache-max-file-size",
+ .voltype = "performance/io-cache",
+ .option = "max-file-size",
+ .op_version = 1,
+ .validate_fn = validate_cache_max_min_size,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "performance.cache-min-file-size",
+ .voltype = "performance/io-cache",
+ .option = "min-file-size",
+ .op_version = 1,
+ .validate_fn = validate_cache_max_min_size,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "performance.cache-refresh-timeout",
+ .voltype = "performance/io-cache",
+ .option = "cache-timeout",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "performance.cache-priority",
+ .voltype = "performance/io-cache",
+ .option = "priority",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "performance.cache-size",
+ .voltype = "performance/io-cache",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+
+ /* IO-threads xlator options */
+ { .key = "performance.io-thread-count",
+ .voltype = "performance/io-threads",
+ .option = "thread-count",
+ .op_version = 1
+ },
+ { .key = "performance.high-prio-threads",
+ .voltype = "performance/io-threads",
+ .op_version = 1
+ },
+ { .key = "performance.normal-prio-threads",
+ .voltype = "performance/io-threads",
+ .op_version = 1
+ },
+ { .key = "performance.low-prio-threads",
+ .voltype = "performance/io-threads",
+ .op_version = 1
+ },
+ { .key = "performance.least-prio-threads",
+ .voltype = "performance/io-threads",
+ .op_version = 1
+ },
+ { .key = "performance.enable-least-priority",
+ .voltype = "performance/io-threads",
+ .op_version = 1
+ },
+
+ /* Other perf xlators' options */
+ { .key = "performance.cache-size",
+ .voltype = "performance/quick-read",
+ .type = NO_DOC,
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "performance.flush-behind",
+ .voltype = "performance/write-behind",
+ .option = "flush-behind",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "performance.nfs.flush-behind",
+ .voltype = "performance/write-behind",
+ .option = "flush-behind",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "performance.write-behind-window-size",
+ .voltype = "performance/write-behind",
+ .option = "cache-size",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "performance.resync-failed-syncs-after-fsync",
+ .voltype = "performance/write-behind",
+ .option = "resync-failed-syncs-after-fsync",
+ .op_version = GD_OP_VERSION_3_7_7,
+ .flags = OPT_FLAG_CLIENT_OPT,
+ .description = "If sync of \"cached-writes issued before fsync\" "
+ "(to backend) fails, this option configures whether "
+ "to retry syncing them after fsync or forget them. "
+ "If set to on, cached-writes are retried "
+ "till a \"flush\" fop (or a successful sync) on sync "
+ "failures. "
+ "fsync itself is failed irrespective of the value of "
+ "this option. ",
+ },
+ { .key = "performance.nfs.write-behind-window-size",
+ .voltype = "performance/write-behind",
+ .option = "cache-size",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "performance.strict-o-direct",
+ .voltype = "performance/write-behind",
+ .option = "strict-O_DIRECT",
+ .op_version = 2,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "performance.nfs.strict-o-direct",
+ .voltype = "performance/write-behind",
+ .option = "strict-O_DIRECT",
+ .op_version = 2,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "performance.strict-write-ordering",
+ .voltype = "performance/write-behind",
+ .option = "strict-write-ordering",
+ .op_version = 2,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "performance.nfs.strict-write-ordering",
+ .voltype = "performance/write-behind",
+ .option = "strict-write-ordering",
+ .op_version = 2,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "performance.lazy-open",
+ .voltype = "performance/open-behind",
+ .option = "lazy-open",
+ .op_version = 3,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "performance.read-after-open",
+ .voltype = "performance/open-behind",
+ .option = "read-after-open",
+ .op_version = 3,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "performance.read-ahead-page-count",
+ .voltype = "performance/read-ahead",
+ .option = "page-count",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "performance.md-cache-timeout",
+ .voltype = "performance/md-cache",
+ .option = "md-cache-timeout",
+ .op_version = 2,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "performance.cache-swift-metadata",
+ .voltype = "performance/md-cache",
+ .option = "cache-swift-metadata",
+ .op_version = GD_OP_VERSION_3_7_10,
+ .description = "Cache swift metadata (user.swift.metadata xattr)",
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "performance.cache-samba-metadata",
+ .voltype = "performance/md-cache",
+ .option = "cache-samba-metadata",
+ .op_version = GD_OP_VERSION_3_9_0,
+ .description = "Cache samba metadata (user.DOSATTRIB, security.NTACL"
+ " xattr)",
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+
+ /* Crypt xlator options */
+
+ { .key = "features.encryption",
+ .voltype = "encryption/crypt",
+ .option = "!feat",
+ .value = "off",
+ .op_version = 3,
+ .description = "enable/disable client-side encryption for "
+ "the volume.",
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT
+ },
+
+ { .key = "encryption.master-key",
+ .voltype = "encryption/crypt",
+ .op_version = 3,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "encryption.data-key-size",
+ .voltype = "encryption/crypt",
+ .op_version = 3,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "encryption.block-size",
+ .voltype = "encryption/crypt",
+ .op_version = 3,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+
+ /* Client xlator options */
+ { .key = "network.frame-timeout",
+ .voltype = "protocol/client",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "network.ping-timeout",
+ .voltype = "protocol/client",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "network.tcp-window-size",
+ .voltype = "protocol/client",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "features.lock-heal",
+ .voltype = "protocol/client",
+ .option = "lk-heal",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "features.grace-timeout",
+ .voltype = "protocol/client",
+ .option = "grace-timeout",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "client.ssl",
+ .voltype = "protocol/client",
+ .option = "transport.socket.ssl-enabled",
+ .op_version = 2,
+ .description = "enable/disable client.ssl flag in the "
+ "volume.",
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "network.remote-dio",
+ .voltype = "protocol/client",
+ .option = "filter-O_DIRECT",
+ .op_version = 2,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "client.own-thread",
+ .voltype = "protocol/client",
+ .option = "transport.socket.own-thread",
+ .type = NO_DOC,
+ .op_version = GD_OP_VERSION_3_7_0,
+ },
+ { .key = "client.event-threads",
+ .voltype = "protocol/client",
+ .op_version = GD_OP_VERSION_3_7_0,
+ },
+
+ /* Server xlator options */
+ { .key = "network.ping-timeout",
+ .voltype = "protocol/server",
+ .option = "transport.tcp-user-timeout",
+ .op_version = GD_OP_VERSION_3_7_0,
+ },
+ { .key = "network.tcp-window-size",
+ .voltype = "protocol/server",
+ .type = NO_DOC,
+ .op_version = 1
+ },
+ { .key = "network.inode-lru-limit",
+ .voltype = "protocol/server",
+ .op_version = 1
+ },
+ { .key = AUTH_ALLOW_MAP_KEY,
+ .voltype = "protocol/server",
+ .option = "!server-auth",
+ .value = "*",
+ .op_version = 1
+ },
+ { .key = AUTH_REJECT_MAP_KEY,
+ .voltype = "protocol/server",
+ .option = "!server-auth",
+ .op_version = 1
+ },
+ { .key = "transport.keepalive",
+ .voltype = "protocol/server",
+ .option = "transport.socket.keepalive",
+ .type = NO_DOC,
+ .op_version = 1
+ },
+ { .key = "server.allow-insecure",
+ .voltype = "protocol/server",
+ .option = "rpc-auth-allow-insecure",
+ .type = NO_DOC,
+ .op_version = 1
+ },
+ { .key = "server.root-squash",
+ .voltype = "protocol/server",
+ .option = "root-squash",
+ .op_version = 2
+ },
+ { .key = "server.anonuid",
+ .voltype = "protocol/server",
+ .option = "anonuid",
+ .op_version = 3
+ },
+ { .key = "server.anongid",
+ .voltype = "protocol/server",
+ .option = "anongid",
+ .op_version = 3
+ },
+ { .key = "server.statedump-path",
+ .voltype = "protocol/server",
+ .option = "statedump-path",
+ .op_version = 1
+ },
+ { .key = "server.outstanding-rpc-limit",
+ .voltype = "protocol/server",
+ .option = "rpc.outstanding-rpc-limit",
+ .type = GLOBAL_DOC,
+ .op_version = 3
+ },
+ { .key = "features.lock-heal",
+ .voltype = "protocol/server",
+ .option = "lk-heal",
+ .type = NO_DOC,
+ .op_version = 1
+ },
+ { .key = "features.grace-timeout",
+ .voltype = "protocol/server",
+ .option = "grace-timeout",
+ .type = NO_DOC,
+ .op_version = 1
+ },
+ { .key = "server.ssl",
+ .voltype = "protocol/server",
+ .option = "transport.socket.ssl-enabled",
+ .description = "enable/disable server.ssl flag in the "
+ "volume.",
+ .op_version = 2
+ },
+ { .key = "auth.ssl-allow",
+ .voltype = "protocol/server",
+ .option = "!ssl-allow",
+ .value = "*",
+ .type = NO_DOC,
+ .op_version = GD_OP_VERSION_3_6_0,
+ },
+ { .key = "server.manage-gids",
+ .voltype = "protocol/server",
+ .op_version = GD_OP_VERSION_3_6_0,
+ },
+ { .key = "server.dynamic-auth",
+ .voltype = "protocol/server",
+ .op_version = GD_OP_VERSION_3_7_5,
+ },
+ { .key = "client.send-gids",
+ .voltype = "protocol/client",
+ .type = NO_DOC,
+ .op_version = GD_OP_VERSION_3_6_0,
+ },
+ { .key = "server.gid-timeout",
+ .voltype = "protocol/server",
+ .op_version = GD_OP_VERSION_3_6_0,
+ },
+ { .key = "server.own-thread",
+ .voltype = "protocol/server",
+ .option = "transport.socket.own-thread",
+ .type = NO_DOC,
+ .op_version = GD_OP_VERSION_3_7_0,
+ },
+ { .key = "server.event-threads",
+ .voltype = "protocol/server",
+ .op_version = GD_OP_VERSION_3_7_0,
+ },
+
+ /* Generic transport options */
+ { .key = SSL_OWN_CERT_OPT,
+ .voltype = "rpc-transport/socket",
+ .option = "!ssl-own-cert",
+ .op_version = GD_OP_VERSION_3_7_4,
+ },
+ { .key = SSL_PRIVATE_KEY_OPT,
+ .voltype = "rpc-transport/socket",
+ .option = "!ssl-private-key",
+ .op_version = GD_OP_VERSION_3_7_4,
+ },
+ { .key = SSL_CA_LIST_OPT,
+ .voltype = "rpc-transport/socket",
+ .option = "!ssl-ca-list",
+ .op_version = GD_OP_VERSION_3_7_4,
+ },
+ { .key = SSL_CRL_PATH_OPT,
+ .voltype = "rpc-transport/socket",
+ .option = "!ssl-crl-path",
+ .op_version = GD_OP_VERSION_3_7_4,
+ },
+ { .key = SSL_CERT_DEPTH_OPT,
+ .voltype = "rpc-transport/socket",
+ .option = "!ssl-cert-depth",
+ .op_version = GD_OP_VERSION_3_6_0,
+ },
+ { .key = SSL_CIPHER_LIST_OPT,
+ .voltype = "rpc-transport/socket",
+ .option = "!ssl-cipher-list",
+ .op_version = GD_OP_VERSION_3_6_0,
+ },
+ { .key = SSL_DH_PARAM_OPT,
+ .voltype = "rpc-transport/socket",
+ .option = "!ssl-dh-param",
+ .op_version = GD_OP_VERSION_3_7_4,
+ },
+ { .key = SSL_EC_CURVE_OPT,
+ .voltype = "rpc-transport/socket",
+ .option = "!ssl-ec-curve",
+ .op_version = GD_OP_VERSION_3_7_4,
+ },
+ { .key = "transport.address-family",
+ .voltype = "protocol/server",
+ .option = "!address-family",
+ .op_version = GD_OP_VERSION_3_7_4,
+ .type = NO_DOC,
+ },
+
+ /* Performance xlators enable/disbable options */
+ { .key = "performance.write-behind",
+ .voltype = "performance/write-behind",
+ .option = "!perf",
+ .value = "on",
+ .op_version = 1,
+ .description = "enable/disable write-behind translator in the "
+ "volume.",
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT
+ },
+ { .key = "performance.read-ahead",
+ .voltype = "performance/read-ahead",
+ .option = "!perf",
+ .value = "on",
+ .op_version = 1,
+ .description = "enable/disable read-ahead translator in the volume.",
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT
+ },
+ { .key = "performance.readdir-ahead",
+ .voltype = "performance/readdir-ahead",
+ .option = "!perf",
+ .value = "off",
+ .op_version = 3,
+ .description = "enable/disable readdir-ahead translator in the volume.",
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT
+ },
+
+ { .key = "performance.io-cache",
+ .voltype = "performance/io-cache",
+ .option = "!perf",
+ .value = "on",
+ .op_version = 1,
+ .description = "enable/disable io-cache translator in the volume.",
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "performance.quick-read",
+ .voltype = "performance/quick-read",
+ .option = "!perf",
+ .value = "on",
+ .op_version = 1,
+ .description = "enable/disable quick-read translator in the volume.",
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT
+
+ },
+ { .key = "performance.open-behind",
+ .voltype = "performance/open-behind",
+ .option = "!perf",
+ .value = "on",
+ .op_version = 2,
+ .description = "enable/disable open-behind translator in the volume.",
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT
+
+ },
+ { .key = "performance.stat-prefetch",
+ .voltype = "performance/md-cache",
+ .option = "!perf",
+ .value = "on",
+ .op_version = 1,
+ .description = "enable/disable meta-data caching translator in the "
+ "volume.",
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT
+ },
+ { .key = "performance.client-io-threads",
+ .voltype = "performance/io-threads",
+ .option = "!perf",
+ .value = "off",
+ .op_version = 1,
+ .description = "enable/disable io-threads translator in the client "
+ "graph of volume.",
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT
+ },
+ { .key = "performance.nfs.write-behind",
+ .voltype = "performance/write-behind",
+ .option = "!nfsperf",
+ .value = "on",
+ .op_version = 1,
+ .description = "enable/disable write-behind translator in the volume",
+ .flags = OPT_FLAG_XLATOR_OPT
+ },
+ { .key = "performance.nfs.read-ahead",
+ .voltype = "performance/read-ahead",
+ .option = "!nfsperf",
+ .value = "off",
+ .type = NO_DOC,
+ .op_version = 1,
+ .flags = OPT_FLAG_XLATOR_OPT
+ },
+ { .key = "performance.nfs.io-cache",
+ .voltype = "performance/io-cache",
+ .option = "!nfsperf",
+ .value = "off",
+ .type = NO_DOC,
+ .op_version = 1,
+ .flags = OPT_FLAG_XLATOR_OPT
+ },
+ { .key = "performance.nfs.quick-read",
+ .voltype = "performance/quick-read",
+ .option = "!nfsperf",
+ .value = "off",
+ .type = NO_DOC,
+ .op_version = 1,
+ .flags = OPT_FLAG_XLATOR_OPT
+ },
+ { .key = "performance.nfs.stat-prefetch",
+ .voltype = "performance/md-cache",
+ .option = "!nfsperf",
+ .value = "off",
+ .type = NO_DOC,
+ .op_version = 1,
+ .flags = OPT_FLAG_XLATOR_OPT
+ },
+ { .key = "performance.nfs.io-threads",
+ .voltype = "performance/io-threads",
+ .option = "!nfsperf",
+ .value = "off",
+ .type = NO_DOC,
+ .op_version = 1,
+ .flags = OPT_FLAG_XLATOR_OPT
+ },
+ { .key = "performance.force-readdirp",
+ .voltype = "performance/md-cache",
+ .option = "force-readdirp",
+ .op_version = 2,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "features.cache-invalidation",
+ .voltype = "performance/md-cache",
+ .option = "cache-invalidation",
+ .op_version = GD_OP_VERSION_3_9_0,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+
+ /* Feature translators */
+ { .key = "features.uss",
+ .voltype = "features/snapview-server",
+ .op_version = GD_OP_VERSION_3_6_0,
+ .value = "off",
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT,
+ .validate_fn = validate_uss,
+ .description = "enable/disable User Serviceable Snapshots on the "
+ "volume."
+ },
+
+ { .key = "features.snapshot-directory",
+ .voltype = "features/snapview-client",
+ .op_version = GD_OP_VERSION_3_6_0,
+ .value = ".snaps",
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT,
+ .validate_fn = validate_uss_dir,
+ .description = "Entry point directory for entering snapshot world. "
+ "Value can have only [0-9a-z-_] and starts with "
+ "dot (.) and cannot exceed 255 character"
+ },
+
+ { .key = "features.show-snapshot-directory",
+ .voltype = "features/snapview-client",
+ .op_version = GD_OP_VERSION_3_6_0,
+ .value = "off",
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT,
+ .description = "show entry point in readdir output of "
+ "snapdir-entry-path which is set by samba"
+ },
+
+#ifdef HAVE_LIB_Z
+ /* Compressor-decompressor xlator options
+ * defaults used from xlator/features/compress/src/cdc.h
+ */
+ { .key = "network.compression",
+ .voltype = "features/cdc",
+ .option = "!feat",
+ .value = "off",
+ .op_version = 3,
+ .description = "enable/disable network compression translator",
+ .flags = OPT_FLAG_XLATOR_OPT
+ },
+ { .key = "network.compression.window-size",
+ .voltype = "features/cdc",
+ .option = "window-size",
+ .op_version = 3
+ },
+ { .key = "network.compression.mem-level",
+ .voltype = "features/cdc",
+ .option = "mem-level",
+ .op_version = 3
+ },
+ { .key = "network.compression.min-size",
+ .voltype = "features/cdc",
+ .option = "min-size",
+ .op_version = 3
+ },
+ { .key = "network.compression.compression-level",
+ .voltype = "features/cdc",
+ .option = "compression-level",
+ .op_version = 3
+ },
+ { .key = "network.compression.debug",
+ .voltype = "features/cdc",
+ .option = "debug",
+ .type = NO_DOC,
+ .op_version = 3
+ },
+#endif
+
+ /* Quota xlator options */
+ { .key = VKEY_FEATURES_LIMIT_USAGE,
+ .voltype = "features/quota",
+ .option = "limit-set",
+ .type = NO_DOC,
+ .op_version = 1,
+ },
+ {
+ .key = "features.quota-timeout",
+ .voltype = "features/quota",
+ .option = "timeout",
+ .value = "0",
+ .op_version = 1,
+ .validate_fn = validate_quota,
+ },
+ { .key = "features.default-soft-limit",
+ .voltype = "features/quota",
+ .option = "default-soft-limit",
+ .type = NO_DOC,
+ .op_version = 3,
+ },
+ { .key = "features.soft-timeout",
+ .voltype = "features/quota",
+ .option = "soft-timeout",
+ .type = NO_DOC,
+ .op_version = 3,
+ },
+ { .key = "features.hard-timeout",
+ .voltype = "features/quota",
+ .option = "hard-timeout",
+ .type = NO_DOC,
+ .op_version = 3,
+ },
+ { .key = "features.alert-time",
+ .voltype = "features/quota",
+ .option = "alert-time",
+ .type = NO_DOC,
+ .op_version = 3,
+ },
+ { .key = "features.quota-deem-statfs",
+ .voltype = "features/quota",
+ .option = "deem-statfs",
+ .value = "off",
+ .type = DOC,
+ .op_version = 2,
+ .validate_fn = validate_quota,
+ },
+
+ /* Marker xlator options */
+ { .key = VKEY_MARKER_XTIME,
+ .voltype = "features/marker",
+ .option = "xtime",
+ .value = "off",
+ .type = NO_DOC,
+ .flags = OPT_FLAG_FORCE,
+ .op_version = 1
+ },
+ { .key = VKEY_MARKER_XTIME,
+ .voltype = "features/marker",
+ .option = "!xtime",
+ .value = "off",
+ .type = NO_DOC,
+ .flags = OPT_FLAG_FORCE,
+ .op_version = 1
+ },
+ { .key = VKEY_MARKER_XTIME_FORCE,
+ .voltype = "features/marker",
+ .option = "gsync-force-xtime",
+ .value = "off",
+ .type = NO_DOC,
+ .flags = OPT_FLAG_FORCE,
+ .op_version = 2
+ },
+ { .key = VKEY_MARKER_XTIME_FORCE,
+ .voltype = "features/marker",
+ .option = "!gsync-force-xtime",
+ .value = "off",
+ .type = NO_DOC,
+ .flags = OPT_FLAG_FORCE,
+ .op_version = 2
+ },
+ { .key = VKEY_FEATURES_QUOTA,
+ .voltype = "features/marker",
+ .option = "quota",
+ .value = "off",
+ .type = NO_DOC,
+ .flags = OPT_FLAG_NEVER_RESET,
+ .op_version = 1
+ },
+ { .key = VKEY_FEATURES_INODE_QUOTA,
+ .voltype = "features/marker",
+ .option = "inode-quota",
+ .value = "off",
+ .type = NO_DOC,
+ .flags = OPT_FLAG_NEVER_RESET,
+ .op_version = 1
+ },
+
+ { .key = VKEY_FEATURES_BITROT,
+ .voltype = "features/bit-rot",
+ .option = "bitrot",
+ .value = "disable",
+ .type = NO_DOC,
+ .flags = OPT_FLAG_FORCE,
+ .op_version = GD_OP_VERSION_3_7_0
+ },
+
+ /* Debug xlators options */
+ { .key = "debug.trace",
+ .voltype = "debug/trace",
+ .option = "!debug",
+ .value = "off",
+ .type = NO_DOC,
+ .op_version = 1,
+ .flags = OPT_FLAG_XLATOR_OPT
+ },
+ { .key = "debug.log-history",
+ .voltype = "debug/trace",
+ .option = "log-history",
+ .type = NO_DOC,
+ .op_version = 2
+ },
+ { .key = "debug.log-file",
+ .voltype = "debug/trace",
+ .option = "log-file",
+ .type = NO_DOC,
+ .op_version = 2
+ },
+ { .key = "debug.exclude-ops",
+ .voltype = "debug/trace",
+ .option = "exclude-ops",
+ .type = NO_DOC,
+ .op_version = 2
+ },
+ { .key = "debug.include-ops",
+ .voltype = "debug/trace",
+ .option = "include-ops",
+ .type = NO_DOC,
+ .op_version = 2
+ },
+ { .key = "debug.error-gen",
+ .voltype = "debug/error-gen",
+ .option = "!debug",
+ .value = "off",
+ .type = NO_DOC,
+ .op_version = 1,
+ .flags = OPT_FLAG_XLATOR_OPT
+ },
+ { .key = "debug.error-failure",
+ .voltype = "debug/error-gen",
+ .option = "failure",
+ .type = NO_DOC,
+ .op_version = 3
+ },
+ { .key = "debug.error-number",
+ .voltype = "debug/error-gen",
+ .option = "error-no",
+ .type = NO_DOC,
+ .op_version = 3
+ },
+ { .key = "debug.random-failure",
+ .voltype = "debug/error-gen",
+ .option = "random-failure",
+ .type = NO_DOC,
+ .op_version = 3
+ },
+ { .key = "debug.error-fops",
+ .voltype = "debug/error-gen",
+ .option = "enable",
+ .type = NO_DOC,
+ .op_version = 3
+ },
+
+
+ /* NFS xlator options */
+ { .key = "nfs.enable-ino32",
+ .voltype = "nfs/server",
+ .option = "nfs.enable-ino32",
+ .type = GLOBAL_DOC,
+ .op_version = 1
+ },
+ { .key = "nfs.mem-factor",
+ .voltype = "nfs/server",
+ .option = "nfs.mem-factor",
+ .type = GLOBAL_DOC,
+ .op_version = 1
+ },
+ { .key = "nfs.export-dirs",
+ .voltype = "nfs/server",
+ .option = "nfs3.export-dirs",
+ .type = GLOBAL_DOC,
+ .op_version = 1
+ },
+ { .key = "nfs.export-volumes",
+ .voltype = "nfs/server",
+ .option = "nfs3.export-volumes",
+ .type = GLOBAL_DOC,
+ .op_version = 1
+ },
+ { .key = "nfs.addr-namelookup",
+ .voltype = "nfs/server",
+ .option = "rpc-auth.addr.namelookup",
+ .type = GLOBAL_DOC,
+ .op_version = 1
+ },
+ { .key = "nfs.dynamic-volumes",
+ .voltype = "nfs/server",
+ .option = "nfs.dynamic-volumes",
+ .type = GLOBAL_DOC,
+ .op_version = 1
+ },
+ { .key = "nfs.register-with-portmap",
+ .voltype = "nfs/server",
+ .option = "rpc.register-with-portmap",
+ .type = GLOBAL_DOC,
+ .op_version = 1
+ },
+ { .key = "nfs.outstanding-rpc-limit",
+ .voltype = "nfs/server",
+ .option = "rpc.outstanding-rpc-limit",
+ .type = GLOBAL_DOC,
+ .op_version = 3
+ },
+ { .key = "nfs.port",
+ .voltype = "nfs/server",
+ .option = "nfs.port",
+ .type = GLOBAL_DOC,
+ .op_version = 1
+ },
+ { .key = "nfs.rpc-auth-unix",
+ .voltype = "nfs/server",
+ .option = "!rpc-auth.auth-unix.*",
+ .op_version = 1
+ },
+ { .key = "nfs.rpc-auth-null",
+ .voltype = "nfs/server",
+ .option = "!rpc-auth.auth-null.*",
+ .op_version = 1
+ },
+ { .key = "nfs.rpc-auth-allow",
+ .voltype = "nfs/server",
+ .option = "!rpc-auth.addr.*.allow",
+ .op_version = 1
+ },
+ { .key = "nfs.rpc-auth-reject",
+ .voltype = "nfs/server",
+ .option = "!rpc-auth.addr.*.reject",
+ .op_version = 1
+ },
+ { .key = "nfs.ports-insecure",
+ .voltype = "nfs/server",
+ .option = "!rpc-auth.ports.*.insecure",
+ .op_version = 1
+ },
+ { .key = "nfs.transport-type",
+ .voltype = "nfs/server",
+ .option = "!nfs.transport-type",
+ .op_version = 1,
+ .description = "Specifies the nfs transport type. Valid "
+ "transport types are 'tcp' and 'rdma'."
+ },
+ { .key = "nfs.trusted-sync",
+ .voltype = "nfs/server",
+ .option = "!nfs3.*.trusted-sync",
+ .op_version = 1
+ },
+ { .key = "nfs.trusted-write",
+ .voltype = "nfs/server",
+ .option = "!nfs3.*.trusted-write",
+ .op_version = 1
+ },
+ { .key = "nfs.volume-access",
+ .voltype = "nfs/server",
+ .option = "!nfs3.*.volume-access",
+ .op_version = 1
+ },
+ { .key = "nfs.export-dir",
+ .voltype = "nfs/server",
+ .option = "!nfs3.*.export-dir",
+ .op_version = 1
+ },
+ { .key = NFS_DISABLE_MAP_KEY,
+ .voltype = "nfs/server",
+ .option = "!nfs-disable",
+ .value = "on",
+ .op_version = 1
+ },
+ { .key = "nfs.nlm",
+ .voltype = "nfs/server",
+ .option = "nfs.nlm",
+ .type = GLOBAL_DOC,
+ .op_version = 1
+ },
+ { .key = "nfs.acl",
+ .voltype = "nfs/server",
+ .option = "nfs.acl",
+ .type = GLOBAL_DOC,
+ .op_version = 3
+ },
+ { .key = "nfs.mount-udp",
+ .voltype = "nfs/server",
+ .option = "nfs.mount-udp",
+ .type = GLOBAL_DOC,
+ .op_version = 1
+ },
+ { .key = "nfs.mount-rmtab",
+ .voltype = "nfs/server",
+ .option = "nfs.mount-rmtab",
+ .type = GLOBAL_DOC,
+ .op_version = 1
+ },
+ { .key = "nfs.rpc-statd",
+ .voltype = "nfs/server",
+ .option = "nfs.rpc-statd",
+ .type = NO_DOC,
+ .op_version = GD_OP_VERSION_3_6_0,
+ },
+ { .key = "nfs.log-level",
+ .voltype = "nfs/server",
+ .option = "nfs.log-level",
+ .type = NO_DOC,
+ .op_version = GD_OP_VERSION_3_6_0,
+ },
+ { .key = "nfs.server-aux-gids",
+ .voltype = "nfs/server",
+ .option = "nfs.server-aux-gids",
+ .type = NO_DOC,
+ .op_version = 2
+ },
+ { .key = "nfs.drc",
+ .voltype = "nfs/server",
+ .option = "nfs.drc",
+ .type = GLOBAL_DOC,
+ .op_version = 3
+ },
+ { .key = "nfs.drc-size",
+ .voltype = "nfs/server",
+ .option = "nfs.drc-size",
+ .type = GLOBAL_DOC,
+ .op_version = 3
+ },
+ { .key = "nfs.read-size",
+ .voltype = "nfs/server",
+ .option = "nfs3.read-size",
+ .type = GLOBAL_DOC,
+ .op_version = 3
+ },
+ { .key = "nfs.write-size",
+ .voltype = "nfs/server",
+ .option = "nfs3.write-size",
+ .type = GLOBAL_DOC,
+ .op_version = 3
+ },
+ { .key = "nfs.readdir-size",
+ .voltype = "nfs/server",
+ .option = "nfs3.readdir-size",
+ .type = GLOBAL_DOC,
+ .op_version = 3
+ },
+ { .key = "nfs.rdirplus",
+ .voltype = "nfs/server",
+ .option = "nfs.rdirplus",
+ .type = GLOBAL_DOC,
+ .op_version = GD_OP_VERSION_3_7_12,
+ .description = "When this option is set to off NFS falls back to "
+ "standard readdir instead of readdirp"
+ },
+
+ /* Cli options for Export authentication on nfs mount */
+ { .key = "nfs.exports-auth-enable",
+ .voltype = "nfs/server",
+ .option = "nfs.exports-auth-enable",
+ .type = GLOBAL_DOC,
+ .op_version = GD_OP_VERSION_3_7_0
+ },
+ { .key = "nfs.auth-refresh-interval-sec",
+ .voltype = "nfs/server",
+ .option = "nfs.auth-refresh-interval-sec",
+ .type = GLOBAL_DOC,
+ .op_version = GD_OP_VERSION_3_7_0
+ },
+ { .key = "nfs.auth-cache-ttl-sec",
+ .voltype = "nfs/server",
+ .option = "nfs.auth-cache-ttl-sec",
+ .type = GLOBAL_DOC,
+ .op_version = GD_OP_VERSION_3_7_0
+ },
+
+ /* Other options which don't fit any place above */
+ { .key = "features.read-only",
+ .voltype = "features/read-only",
+ .option = "read-only",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT
+ },
+ { .key = "features.worm",
+ .voltype = "features/worm",
+ .option = "worm",
+ .value = "off",
+ .validate_fn = validate_worm,
+ .op_version = 2,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT
+ },
+ { .key = "features.worm-file-level",
+ .voltype = "features/worm",
+ .option = "worm-file-level",
+ .value = "off",
+ .validate_fn = validate_worm,
+ .op_version = GD_OP_VERSION_3_8_0,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT
+ },
+ { .key = "features.default-retention-period",
+ .voltype = "features/worm",
+ .option = "default-retention-period",
+ .validate_fn = validate_worm_period,
+ .op_version = GD_OP_VERSION_3_8_0,
+ },
+ { .key = "features.retention-mode",
+ .voltype = "features/worm",
+ .option = "retention-mode",
+ .validate_fn = validate_reten_mode,
+ .op_version = GD_OP_VERSION_3_8_0,
+ },
+ { .key = "features.auto-commit-period",
+ .voltype = "features/worm",
+ .option = "auto-commit-period",
+ .validate_fn = validate_worm_period,
+ .op_version = GD_OP_VERSION_3_8_0,
+ },
+ { .key = "storage.linux-aio",
+ .voltype = "storage/posix",
+ .op_version = 1
+ },
+ { .key = "storage.batch-fsync-mode",
+ .voltype = "storage/posix",
+ .op_version = 3
+ },
+ { .key = "storage.batch-fsync-delay-usec",
+ .voltype = "storage/posix",
+ .op_version = 3
+ },
+ { .key = "storage.xattr-user-namespace-mode",
+ .voltype = "storage/posix",
+ .op_version = GD_OP_VERSION_3_6_0,
+ },
+ { .key = "storage.owner-uid",
+ .voltype = "storage/posix",
+ .option = "brick-uid",
+ .op_version = 1
+ },
+ { .key = "storage.owner-gid",
+ .voltype = "storage/posix",
+ .option = "brick-gid",
+ .op_version = 1
+ },
+ { .key = "storage.node-uuid-pathinfo",
+ .voltype = "storage/posix",
+ .op_version = 3
+ },
+ { .key = "storage.health-check-interval",
+ .voltype = "storage/posix",
+ .op_version = 3
+ },
+ { .option = "update-link-count-parent",
+ .key = "storage.build-pgfid",
+ .voltype = "storage/posix",
+ .op_version = GD_OP_VERSION_3_6_0,
+ },
+ { .key = "storage.bd-aio",
+ .voltype = "storage/bd",
+ .op_version = 3
+ },
+ { .key = "config.memory-accounting",
+ .voltype = "mgmt/glusterd",
+ .option = "!config",
+ .op_version = 2,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "config.transport",
+ .voltype = "mgmt/glusterd",
+ .option = "!config",
+ .op_version = 2
+ },
+ { .key = GLUSTERD_QUORUM_TYPE_KEY,
+ .voltype = "mgmt/glusterd",
+ .value = "off",
+ .op_version = 2
+ },
+ { .key = GLUSTERD_QUORUM_RATIO_KEY,
+ .voltype = "mgmt/glusterd",
+ .value = "0",
+ .op_version = 2
+ },
+ /* changelog translator - global tunables */
+ { .key = "changelog.changelog",
+ .voltype = "features/changelog",
+ .type = NO_DOC,
+ .op_version = 3
+ },
+ { .key = "changelog.changelog-dir",
+ .voltype = "features/changelog",
+ .type = NO_DOC,
+ .op_version = 3
+ },
+ { .key = "changelog.encoding",
+ .voltype = "features/changelog",
+ .type = NO_DOC,
+ .op_version = 3
+ },
+ { .key = "changelog.rollover-time",
+ .voltype = "features/changelog",
+ .type = NO_DOC,
+ .op_version = 3
+ },
+ { .key = "changelog.fsync-interval",
+ .voltype = "features/changelog",
+ .type = NO_DOC,
+ .op_version = 3
+ },
+ { .key = "changelog.changelog-barrier-timeout",
+ .voltype = "features/changelog",
+ .value = BARRIER_TIMEOUT,
+ .op_version = GD_OP_VERSION_3_6_0,
+ },
+ { .key = "changelog.capture-del-path",
+ .voltype = "features/changelog",
+ .type = NO_DOC,
+ .op_version = 3
+ },
+ { .key = "features.barrier",
+ .voltype = "features/barrier",
+ .value = "disable",
+ .type = NO_DOC,
+ .op_version = GD_OP_VERSION_3_7_0,
+ },
+ { .key = "features.barrier-timeout",
+ .voltype = "features/barrier",
+ .value = BARRIER_TIMEOUT,
+ .op_version = GD_OP_VERSION_3_6_0,
+ },
+ { .key = "cluster.op-version",
+ .voltype = "mgmt/glusterd",
+ .op_version = GD_OP_VERSION_3_6_0,
+ },
+ /*Trash translator options */
+ { .key = "features.trash",
+ .voltype = "features/trash",
+ .op_version = GD_OP_VERSION_3_7_0,
+ },
+ { .key = "features.trash-dir",
+ .voltype = "features/trash",
+ .op_version = GD_OP_VERSION_3_7_0,
+ },
+ { .key = "features.trash-eliminate-path",
+ .voltype = "features/trash",
+ .op_version = GD_OP_VERSION_3_7_0,
+ },
+ { .key = "features.trash-max-filesize",
+ .voltype = "features/trash",
+ .op_version = GD_OP_VERSION_3_7_0,
+ },
+ { .key = "features.trash-internal-op",
+ .voltype = "features/trash",
+ .op_version = GD_OP_VERSION_3_7_0,
+ },
+ { .key = GLUSTERD_SHARED_STORAGE_KEY,
+ .voltype = "mgmt/glusterd",
+ .value = "disable",
+ .type = GLOBAL_DOC,
+ .op_version = GD_OP_VERSION_3_7_1,
+ .description = "Create and mount the shared storage volume"
+ "(gluster_shared_storage) at "
+ "/var/run/gluster/shared_storage on enabling this "
+ "option. Unmount and delete the shared storage volume "
+ " on disabling this option."
+ },
+
+#if USE_GFDB /* no GFDB means tiering is disabled */
+ /* tier translator - global tunables */
+ { .key = "cluster.write-freq-threshold",
+ .voltype = "cluster/tier",
+ .value = "0",
+ .option = "write-freq-threshold",
+ .op_version = GD_OP_VERSION_3_7_0,
+ .flags = OPT_FLAG_CLIENT_OPT,
+ .validate_fn = validate_tier_thresholds,
+ .description = "Defines the number of writes, in a promotion/demotion"
+ " cycle, that would mark a file HOT for promotion. Any"
+ " file that has write hits less than this value will "
+ "be considered as COLD and will be demoted."
+ },
+ { .key = "cluster.read-freq-threshold",
+ .voltype = "cluster/tier",
+ .value = "0",
+ .option = "read-freq-threshold",
+ .op_version = GD_OP_VERSION_3_7_0,
+ .flags = OPT_FLAG_CLIENT_OPT,
+ .validate_fn = validate_tier_thresholds,
+ .description = "Defines the number of reads, in a promotion/demotion "
+ "cycle, that would mark a file HOT for promotion. Any "
+ "file that has read hits less than this value will be "
+ "considered as COLD and will be demoted."
+ },
+ { .key = "cluster.tier-pause",
+ .voltype = "cluster/tier",
+ .option = "tier-pause",
+ .op_version = GD_OP_VERSION_3_7_6,
+ .flags = OPT_FLAG_CLIENT_OPT,
+ .validate_fn = validate_tier,
+ },
+ { .key = "cluster.tier-promote-frequency",
+ .voltype = "cluster/tier",
+ .value = "120",
+ .option = "tier-promote-frequency",
+ .op_version = GD_OP_VERSION_3_7_0,
+ .flags = OPT_FLAG_CLIENT_OPT,
+ .validate_fn = validate_tier,
+ },
+ { .key = "cluster.tier-demote-frequency",
+ .voltype = "cluster/tier",
+ .value = "3600",
+ .option = "tier-demote-frequency",
+ .op_version = GD_OP_VERSION_3_7_0,
+ .flags = OPT_FLAG_CLIENT_OPT,
+ .validate_fn = validate_tier,
+ },
+ { .key = "cluster.watermark-hi",
+ .voltype = "cluster/tier",
+ .value = "90",
+ .option = "watermark-hi",
+ .op_version = GD_OP_VERSION_3_7_6,
+ .flags = OPT_FLAG_CLIENT_OPT,
+ .validate_fn = validate_tier,
+ .description = "Upper % watermark for promotion. If hot tier fills"
+ " above this percentage, no promotion will happen and demotion will "
+ "happen with high probability."
+ },
+ { .key = "cluster.watermark-low",
+ .voltype = "cluster/tier",
+ .value = "75",
+ .option = "watermark-low",
+ .op_version = GD_OP_VERSION_3_7_6,
+ .flags = OPT_FLAG_CLIENT_OPT,
+ .validate_fn = validate_tier,
+ .description = "Lower % watermark. If hot tier is less "
+ "full than this, promotion will happen and demotion will not happen. "
+ "If greater than this, promotion/demotion will happen at a probability "
+ "relative to how full the hot tier is."
+ },
+ { .key = "cluster.tier-mode",
+ .voltype = "cluster/tier",
+ .option = "tier-mode",
+ .value = "cache",
+ .op_version = GD_OP_VERSION_3_7_6,
+ .flags = OPT_FLAG_CLIENT_OPT,
+ .validate_fn = validate_tier,
+ .description = "Either 'test' or 'cache'. Test mode periodically"
+ " demotes or promotes files automatically based on access."
+ " Cache mode does so based on whether the cache is full or not,"
+ " as specified with watermarks."
+ },
+ { .key = "cluster.tier-max-promote-file-size",
+ .voltype = "cluster/tier",
+ .option = "tier-max-promote-file-size",
+ .value = "0",
+ .op_version = GD_OP_VERSION_3_7_10,
+ .flags = OPT_FLAG_CLIENT_OPT,
+ .validate_fn = validate_tier,
+ .description = "The maximum file size in bytes that is promoted. If 0, there"
+ " is no maximum size (default)."
+ },
+ { .key = "cluster.tier-max-mb",
+ .voltype = "cluster/tier",
+ .option = "tier-max-mb",
+ .value = "4000",
+ .op_version = GD_OP_VERSION_3_7_6,
+ .flags = OPT_FLAG_CLIENT_OPT,
+ .validate_fn = validate_tier,
+ .description = "The maximum number of MB that may be migrated"
+ " in any direction in a given cycle by a single node."
+ },
+ { .key = "cluster.tier-max-files",
+ .voltype = "cluster/tier",
+ .option = "tier-max-files",
+ .value = "10000",
+ .op_version = GD_OP_VERSION_3_7_6,
+ .flags = OPT_FLAG_CLIENT_OPT,
+ .validate_fn = validate_tier,
+ .description = "The maximum number of files that may be migrated"
+ " in any direction in a given cycle by a single node."
+ },
+ { .key = "features.ctr-enabled",
+ .voltype = "features/changetimerecorder",
+ .value = "off",
+ .option = "ctr-enabled",
+ .op_version = GD_OP_VERSION_3_7_0,
+ .description = "Enable CTR xlator"
+ },
+ { .key = "features.record-counters",
+ .voltype = "features/changetimerecorder",
+ .value = "off",
+ .option = "record-counters",
+ .op_version = GD_OP_VERSION_3_7_0,
+ .validate_fn = validate_tier_counters,
+ .description = "Its a Change Time Recorder Xlator option to "
+ "enable recording write "
+ "and read heat counters. The default is disabled. "
+ "If enabled, \"cluster.write-freq-threshold\" and "
+ "\"cluster.read-freq-threshold\" defined the number "
+ "of writes (or reads) to a given file are needed "
+ "before triggering migration."
+ },
+ { .key = "features.ctr-record-metadata-heat",
+ .voltype = "features/changetimerecorder",
+ .value = "off",
+ .option = "ctr-record-metadata-heat",
+ .op_version = GD_OP_VERSION_3_7_0,
+ .type = NO_DOC,
+ .description = "Its a Change Time Recorder Xlator option to "
+ "enable recording write heat on metadata of the file. "
+ "The default is disabled. "
+ "Metadata is inode attributes like atime, mtime,"
+ " permissions etc and "
+ "extended attributes of a file ."
+ },
+ { .key = "features.ctr_link_consistency",
+ .voltype = "features/changetimerecorder",
+ .value = "off",
+ .option = "ctr_link_consistency",
+ .op_version = GD_OP_VERSION_3_7_0,
+ .type = NO_DOC,
+ .description = "Enable a crash consistent way of recording hardlink "
+ "updates by Change Time Recorder Xlator. "
+ "When recording in a crash "
+ "consistent way the data operations will "
+ "experience more latency."
+ },
+ { .key = "features.ctr_lookupheal_link_timeout",
+ .voltype = "features/changetimerecorder",
+ .value = "300",
+ .option = "ctr_lookupheal_link_timeout",
+ .op_version = GD_OP_VERSION_3_7_2,
+ .type = NO_DOC,
+ .description = "Defines the expiry period of in-memory "
+ "hardlink of an inode,"
+ "used by lookup heal in Change Time Recorder."
+ "Once the expiry period"
+ "hits an attempt to heal the database per "
+ "hardlink is done and the "
+ "in-memory hardlink period is reset"
+ },
+ { .key = "features.ctr_lookupheal_inode_timeout",
+ .voltype = "features/changetimerecorder",
+ .value = "300",
+ .option = "ctr_lookupheal_inode_timeout",
+ .op_version = GD_OP_VERSION_3_7_2,
+ .type = NO_DOC,
+ .description = "Defines the expiry period of in-memory inode,"
+ "used by lookup heal in Change Time Recorder. "
+ "Once the expiry period"
+ "hits an attempt to heal the database per "
+ "inode is done"
+ },
+ { .key = "features.ctr-sql-db-cachesize",
+ .voltype = "features/changetimerecorder",
+ .value = "1000",
+ .option = "sql-db-cachesize",
+ .validate_fn = validate_ctr_sql_params,
+ .op_version = GD_OP_VERSION_3_7_7,
+ .description = "Defines the cache size of the sqlite database of "
+ "changetimerecorder xlator."
+ "The input to this option is in pages."
+ "Each page is 4096 bytes. Default value is 1000 "
+ "pages i.e ~ 4 MB. "
+ "The max value is 262144 pages i.e 1 GB and "
+ "the min value is 1000 pages i.e ~ 4 MB. "
+ },
+ { .key = "features.ctr-sql-db-wal-autocheckpoint",
+ .voltype = "features/changetimerecorder",
+ .value = "1000",
+ .option = "sql-db-wal-autocheckpoint",
+ .validate_fn = validate_ctr_sql_params,
+ .op_version = GD_OP_VERSION_3_7_7,
+ .description = "Defines the autocheckpoint of the sqlite database of "
+ " changetimerecorder. "
+ "The input to this option is in pages. "
+ "Each page is 4096 bytes. Default value is 1000 "
+ "pages i.e ~ 4 MB."
+ "The max value is 262144 pages i.e 1 GB and "
+ "the min value is 1000 pages i.e ~4 MB."
+ },
+#endif /* USE_GFDB */
+ { .key = "locks.trace",
+ .voltype = "features/locks",
+ .op_version = GD_OP_VERSION_3_7_0,
+ },
+ { .key = "locks.mandatory-locking",
+ .voltype = "features/locks",
+ .op_version = GD_OP_VERSION_3_8_0,
+ .validate_fn = validate_mandatory_locking,
+ },
+ { .key = "cluster.disperse-self-heal-daemon",
+ .voltype = "cluster/disperse",
+ .type = NO_DOC,
+ .option = "self-heal-daemon",
+ .op_version = GD_OP_VERSION_3_7_0,
+ .validate_fn = validate_disperse_heal_enable_disable
+ },
+ { .key = "cluster.quorum-reads",
+ .voltype = "cluster/replicate",
+ .op_version = GD_OP_VERSION_3_7_0,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "client.bind-insecure",
+ .voltype = "protocol/client",
+ .option = "client-bind-insecure",
+ .type = NO_DOC,
+ .op_version = GD_OP_VERSION_3_7_0,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "ganesha.enable",
+ .voltype = "features/ganesha",
+ .value = "off",
+ .option = "ganesha.enable",
+ .op_version = GD_OP_VERSION_3_7_0,
+ },
+ { .key = "features.shard",
+ .voltype = "features/shard",
+ .value = "off",
+ .option = "!shard",
+ .op_version = GD_OP_VERSION_3_7_0,
+ .description = "enable/disable sharding translator on the volume.",
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT
+ },
+ { .key = "features.shard-block-size",
+ .voltype = "features/shard",
+ .op_version = GD_OP_VERSION_3_7_0,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "features.scrub-throttle",
+ .voltype = "features/bit-rot",
+ .value = "lazy",
+ .option = "scrub-throttle",
+ .op_version = GD_OP_VERSION_3_7_0,
+ .type = NO_DOC,
+ },
+ { .key = "features.scrub-freq",
+ .voltype = "features/bit-rot",
+ .value = "biweekly",
+ .option = "scrub-frequency",
+ .op_version = GD_OP_VERSION_3_7_0,
+ .type = NO_DOC,
+ },
+ { .key = "features.scrub",
+ .voltype = "features/bit-rot",
+ .option = "scrubber",
+ .op_version = GD_OP_VERSION_3_7_0,
+ .flags = OPT_FLAG_FORCE,
+ .type = NO_DOC,
+ },
+ { .key = "features.expiry-time",
+ .voltype = "features/bit-rot",
+ .value = SIGNING_TIMEOUT,
+ .option = "expiry-time",
+ .op_version = GD_OP_VERSION_3_7_0,
+ .type = NO_DOC,
+ },
+ /* Upcall translator options */
+ { .key = "features.cache-invalidation",
+ .voltype = "features/upcall",
+ .value = "off",
+ .op_version = GD_OP_VERSION_3_7_0,
+ },
+ { .key = "features.cache-invalidation-timeout",
+ .voltype = "features/upcall",
+ .op_version = GD_OP_VERSION_3_7_0,
+ },
+ /* Lease translator options */
+ { .key = "features.leases",
+ .voltype = "features/leases",
+ .value = "off",
+ .op_version = GD_OP_VERSION_3_8_0,
+ },
+ { .key = "features.lease-lock-recall-timeout",
+ .voltype = "features/leases",
+ .op_version = GD_OP_VERSION_3_8_0,
+ },
+ { .key = "disperse.background-heals",
+ .voltype = "cluster/disperse",
+ .op_version = GD_OP_VERSION_3_7_3,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "disperse.heal-wait-qlength",
+ .voltype = "cluster/disperse",
+ .op_version = GD_OP_VERSION_3_7_3,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.heal-timeout",
+ .voltype = "cluster/disperse",
+ .option = "!heal-timeout",
+ .op_version = GD_OP_VERSION_3_7_3,
+ .type = NO_DOC,
+ },
+ {
+ .key = "dht.force-readdirp",
+ .voltype = "cluster/distribute",
+ .option = "use-readdirp",
+ .op_version = GD_OP_VERSION_3_7_5,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "disperse.read-policy",
+ .voltype = "cluster/disperse",
+ .op_version = GD_OP_VERSION_3_7_6,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.jbr",
+ .voltype = "experimental/jbr",
+ .option = "!jbr",
+ .op_version = GD_OP_VERSION_4_0_0,
+ .description = "enable JBR instead of AFR for replication",
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT
+ },
+ { .key = "cluster.jbr.quorum-percent",
+ .voltype = "experimental/jbr",
+ .option = "quorum-percent",
+ .op_version = GD_OP_VERSION_4_0_0,
+ .description = "percent of rep_count-1 bricks that must be up"
+ },
+ /* Full Data Logging */
+ {
+ .key = "features.fdl",
+ .voltype = "features/fdl",
+ .option = "!fdl",
+ .op_version = GD_OP_VERSION_4_0_0,
+ .flags = OPT_FLAG_XLATOR_OPT,
+ .type = NO_DOC,
+ },
+ { .key = "cluster.shd-max-threads",
+ .voltype = "cluster/replicate",
+ .op_version = GD_OP_VERSION_3_7_12,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.shd-wait-qlength",
+ .voltype = "cluster/replicate",
+ .op_version = GD_OP_VERSION_3_7_12,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.locking-scheme",
+ .voltype = "cluster/replicate",
+ .type = DOC,
+ .op_version = GD_OP_VERSION_3_7_12,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.granular-entry-heal",
+ .voltype = "cluster/replicate",
+ .type = DOC,
+ .op_version = GD_OP_VERSION_3_8_0,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .option = "revocation-secs",
+ .key = "features.locks-revocation-secs",
+ .voltype = "features/locks",
+ .op_version = GD_OP_VERSION_3_9_0,
+ },
+ { .option = "revocation-clear-all",
+ .key = "features.locks-revocation-clear-all",
+ .voltype = "features/locks",
+ .op_version = GD_OP_VERSION_3_9_0,
+ },
+ { .option = "revocation-max-blocked",
+ .key = "features.locks-revocation-max-blocked",
+ .voltype = "features/locks",
+ .op_version = GD_OP_VERSION_3_9_0,
+ },
+ { .option = "monkey-unlocking",
+ .key = "features.locks-monkey-unlocking",
+ .voltype = "features/locks",
+ .op_version = GD_OP_VERSION_3_9_0,
+ .type = NO_DOC,
+ },
+ { .key = NULL
+ }
+};
diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c
new file mode 100644
index 00000000000..0f7bb05bde1
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd.c
@@ -0,0 +1,2019 @@
+/*
+ Copyright (c) 2006-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <time.h>
+#include <grp.h>
+#include <sys/uio.h>
+#include <sys/resource.h>
+
+#include <libgen.h>
+#include "compat-uuid.h"
+
+#include "glusterd.h"
+#include "rpcsvc.h"
+#include "fnmatch.h"
+#include "xlator.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "list.h"
+#include "dict.h"
+#include "options.h"
+#include "compat.h"
+#include "compat-errno.h"
+#include "syscall.h"
+#include "glusterd-statedump.h"
+#include "glusterd-sm.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-store.h"
+#include "glusterd-hooks.h"
+#include "glusterd-utils.h"
+#include "glusterd-locks.h"
+#include "glusterd-svc-mgmt.h"
+#include "glusterd-shd-svc.h"
+#include "glusterd-nfs-svc.h"
+#include "glusterd-bitd-svc.h"
+#include "glusterd-scrub-svc.h"
+#include "glusterd-quotad-svc.h"
+#include "glusterd-snapd-svc.h"
+#include "glusterd-messages.h"
+#include "common-utils.h"
+#include "glusterd-geo-rep.h"
+#include "run.h"
+#include "rpc-clnt-ping.h"
+
+#include "syncop.h"
+
+#include "glusterd-mountbroker.h"
+
+extern struct rpcsvc_program gluster_handshake_prog;
+extern struct rpcsvc_program gluster_cli_getspec_prog;
+extern struct rpcsvc_program gluster_pmap_prog;
+extern glusterd_op_info_t opinfo;
+extern struct rpcsvc_program gd_svc_mgmt_prog;
+extern struct rpcsvc_program gd_svc_mgmt_v3_prog;
+extern struct rpcsvc_program gd_svc_peer_prog;
+extern struct rpcsvc_program gd_svc_cli_prog;
+extern struct rpcsvc_program gd_svc_cli_trusted_progs;
+extern struct rpc_clnt_program gd_brick_prog;
+extern struct rpcsvc_program glusterd_mgmt_hndsk_prog;
+
+extern char snap_mount_dir[PATH_MAX];
+
+rpcsvc_cbk_program_t glusterd_cbk_prog = {
+ .progname = "Gluster Callback",
+ .prognum = GLUSTER_CBK_PROGRAM,
+ .progver = GLUSTER_CBK_VERSION,
+};
+
+struct rpcsvc_program *gd_inet_programs[] = {
+ &gd_svc_peer_prog,
+ &gd_svc_cli_trusted_progs, /* Must be index 1 for secure_mgmt! */
+ &gd_svc_mgmt_prog,
+ &gd_svc_mgmt_v3_prog,
+ &gluster_pmap_prog,
+ &gluster_handshake_prog,
+ &glusterd_mgmt_hndsk_prog,
+};
+int gd_inet_programs_count = (sizeof (gd_inet_programs) /
+ sizeof (gd_inet_programs[0]));
+
+struct rpcsvc_program *gd_uds_programs[] = {
+ &gd_svc_cli_prog,
+ &gluster_cli_getspec_prog,
+};
+int gd_uds_programs_count = (sizeof (gd_uds_programs) /
+ sizeof (gd_uds_programs[0]));
+
+const char *gd_op_list[GD_OP_MAX + 1] = {
+ [GD_OP_NONE] = "Invalid op",
+ [GD_OP_CREATE_VOLUME] = "Create",
+ [GD_OP_START_BRICK] = "Start Brick",
+ [GD_OP_STOP_BRICK] = "Stop Brick",
+ [GD_OP_DELETE_VOLUME] = "Delete",
+ [GD_OP_START_VOLUME] = "Start",
+ [GD_OP_STOP_VOLUME] = "Stop",
+ [GD_OP_DEFRAG_VOLUME] = "Rebalance",
+ [GD_OP_ADD_BRICK] = "Add brick",
+ [GD_OP_DETACH_TIER] = "Detach tier",
+ [GD_OP_TIER_MIGRATE] = "Tier migration",
+ [GD_OP_REMOVE_BRICK] = "Remove brick",
+ [GD_OP_REPLACE_BRICK] = "Replace brick",
+ [GD_OP_SET_VOLUME] = "Set",
+ [GD_OP_RESET_VOLUME] = "Reset",
+ [GD_OP_SYNC_VOLUME] = "Sync",
+ [GD_OP_LOG_ROTATE] = "Log rotate",
+ [GD_OP_GSYNC_SET] = "Geo-replication",
+ [GD_OP_PROFILE_VOLUME] = "Profile",
+ [GD_OP_QUOTA] = "Quota",
+ [GD_OP_STATUS_VOLUME] = "Status",
+ [GD_OP_REBALANCE] = "Rebalance",
+ [GD_OP_HEAL_VOLUME] = "Heal",
+ [GD_OP_STATEDUMP_VOLUME] = "Statedump",
+ [GD_OP_LIST_VOLUME] = "Lists",
+ [GD_OP_CLEARLOCKS_VOLUME] = "Clear locks",
+ [GD_OP_DEFRAG_BRICK_VOLUME] = "Rebalance",
+ [GD_OP_COPY_FILE] = "Copy File",
+ [GD_OP_SYS_EXEC] = "Execute system commands",
+ [GD_OP_GSYNC_CREATE] = "Geo-replication Create",
+ [GD_OP_SNAP] = "Snapshot",
+ [GD_OP_MAX] = "Invalid op"
+};
+
+static int
+glusterd_opinfo_init ()
+{
+ int32_t ret = -1;
+
+ opinfo.op = GD_OP_NONE;
+
+ return ret;
+}
+
+
+int
+glusterd_uuid_init ()
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+
+ ret = glusterd_retrieve_uuid ();
+ if (ret == 0) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_RETRIEVED_UUID,
+ "retrieved UUID: %s", uuid_utoa (priv->uuid));
+ return 0;
+ }
+
+ ret = glusterd_uuid_generate_save ();
+
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_UUID_GEN_STORE_FAIL,
+ "Unable to generate and save new UUID");
+ return ret;
+ }
+
+ return 0;
+}
+
+int
+glusterd_uuid_generate_save ()
+{
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ gf_uuid_generate (priv->uuid);
+
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_GENERATED_UUID, "generated UUID: %s",
+ uuid_utoa (priv->uuid));
+
+ ret = glusterd_store_global_info (this);
+
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_UUID_STORE_FAIL,
+ "Unable to store the generated uuid %s",
+ uuid_utoa (priv->uuid));
+
+ return ret;
+}
+
+int
+glusterd_options_init (xlator_t *this)
+{
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ char *initial_version = "0";
+
+ priv = this->private;
+
+ priv->opts = dict_new ();
+ if (!priv->opts)
+ goto out;
+
+ ret = glusterd_store_retrieve_options (this);
+ if (ret == 0) {
+ goto out;
+ }
+
+ ret = dict_set_str (priv->opts, GLUSTERD_GLOBAL_OPT_VERSION,
+ initial_version);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_options (this, priv->opts);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VERS_STORE_FAIL, "Unable to store version");
+ return ret;
+ }
+out:
+ return 0;
+}
+
+int
+glusterd_fetchspec_notify (xlator_t *this)
+{
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ rpc_transport_t *trans = NULL;
+
+ priv = this->private;
+
+ pthread_mutex_lock (&priv->xprt_lock);
+ {
+ list_for_each_entry (trans, &priv->xprt_list, list) {
+ rpcsvc_callback_submit (priv->rpc, trans,
+ &glusterd_cbk_prog,
+ GF_CBK_FETCHSPEC, NULL, 0);
+ }
+ }
+ pthread_mutex_unlock (&priv->xprt_lock);
+
+ ret = 0;
+
+ return ret;
+}
+
+int
+glusterd_fetchsnap_notify (xlator_t *this)
+{
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ rpc_transport_t *trans = NULL;
+
+ priv = this->private;
+
+ /*
+ * TODO: As of now, the identification of the rpc clients in the
+ * handshake protocol is not there. So among so many glusterfs processes
+ * registered with glusterd, it is hard to identify one particular
+ * process (in this particular case, the snap daemon). So the callback
+ * notification is sent to all the transports from the transport list.
+ * Only those processes which have a rpc client registered for this
+ * callback will respond to the notification. Once the identification
+ * of the rpc clients becomes possible, the below section can be changed
+ * to send callback notification to only those rpc clients, which have
+ * registered.
+ */
+ pthread_mutex_lock (&priv->xprt_lock);
+ {
+ list_for_each_entry (trans, &priv->xprt_list, list) {
+ rpcsvc_callback_submit (priv->rpc, trans,
+ &glusterd_cbk_prog,
+ GF_CBK_GET_SNAPS, NULL, 0);
+ }
+ }
+ pthread_mutex_unlock (&priv->xprt_lock);
+
+ ret = 0;
+
+ return ret;
+}
+
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_gld_mt_end + 1);
+
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ GD_MSG_NO_MEMORY, "Memory accounting init"
+ " failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+int
+glusterd_rpcsvc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
+ void *data)
+{
+ xlator_t *this = NULL;
+ rpc_transport_t *xprt = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ if (!xl || !data) {
+ gf_msg ("glusterd", GF_LOG_WARNING, 0,
+ GD_MSG_NO_INIT,
+ "Calling rpc_notify without initializing");
+ goto out;
+ }
+
+ this = xl;
+ xprt = data;
+
+ priv = this->private;
+
+ switch (event) {
+ case RPCSVC_EVENT_ACCEPT:
+ {
+
+ pthread_mutex_lock (&priv->xprt_lock);
+ list_add_tail (&xprt->list, &priv->xprt_list);
+ pthread_mutex_unlock (&priv->xprt_lock);
+ break;
+ }
+ case RPCSVC_EVENT_DISCONNECT:
+ {
+ /* A DISCONNECT event could come without an ACCEPT event
+ * happening for this transport. This happens when the server is
+ * expecting encrypted connections by the client tries to
+ * connect unecnrypted
+ */
+ if (list_empty (&xprt->list))
+ break;
+
+ pthread_mutex_lock (&priv->xprt_lock);
+ list_del (&xprt->list);
+ pthread_mutex_unlock (&priv->xprt_lock);
+ pmap_registry_remove (this, 0, NULL, GF_PMAP_PORT_NONE, xprt);
+ break;
+ }
+
+ default:
+ break;
+ }
+
+out:
+ return 0;
+}
+
+
+static int32_t
+glusterd_program_register (xlator_t *this, rpcsvc_t *svc,
+ rpcsvc_program_t *prog)
+{
+ int32_t ret = -1;
+
+ ret = rpcsvc_program_register (svc, prog);
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "cannot register program (name: %s, prognum:%d, "
+ "progver:%d)", prog->progname, prog->prognum,
+ prog->progver);
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+int
+glusterd_rpcsvc_options_build (dict_t *options)
+{
+ int ret = 0;
+ uint32_t backlog = 0;
+
+ ret = dict_get_uint32 (options, "transport.socket.listen-backlog",
+ &backlog);
+
+ if (ret) {
+ backlog = GLUSTERD_SOCKET_LISTEN_BACKLOG;
+ ret = dict_set_uint32 (options,
+ "transport.socket.listen-backlog",
+ backlog);
+ if (ret)
+ goto out;
+ }
+
+ gf_msg_debug ("glusterd", 0, "listen-backlog value: %d", backlog);
+
+out:
+ return ret;
+}
+
+#if SYNCDAEMON_COMPILE
+static int
+glusterd_check_gsync_present (int *valid_state)
+{
+ char buff[PATH_MAX] = {0, };
+ runner_t runner = {0,};
+ char *ptr = NULL;
+ int ret = 0;
+
+ runinit (&runner);
+ runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "--version", NULL);
+ runner_redir (&runner, STDOUT_FILENO, RUN_PIPE);
+ ret = runner_start (&runner);
+ if (ret == -1) {
+ if (errno == ENOENT) {
+ gf_msg ("glusterd", GF_LOG_INFO, errno,
+ GD_MSG_MODULE_NOT_INSTALLED, GEOREP
+ " module not installed in the system");
+ *valid_state = 0;
+ }
+ else {
+ gf_msg ("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_MODULE_NOT_WORKING, GEOREP
+ " module not working as desired");
+ *valid_state = -1;
+ }
+ goto out;
+ }
+
+ ptr = fgets(buff, sizeof(buff), runner_chio (&runner, STDOUT_FILENO));
+ if (ptr) {
+ if (!strstr (buff, "gsyncd")) {
+ ret = -1;
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_MODULE_NOT_WORKING, GEOREP" module not "
+ "working as desired");
+ *valid_state = -1;
+ goto out;
+ }
+ } else {
+ ret = -1;
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_MODULE_NOT_WORKING, GEOREP" module not "
+ "working as desired");
+ *valid_state = -1;
+ goto out;
+ }
+
+ ret = 0;
+ out:
+
+ runner_end (&runner);
+
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+
+}
+
+static int
+group_write_allow (char *path, gid_t gid)
+{
+ struct stat st = {0,};
+ int ret = 0;
+
+ ret = sys_stat (path, &st);
+ if (ret == -1)
+ goto out;
+ GF_ASSERT (S_ISDIR (st.st_mode));
+
+ ret = sys_chown (path, -1, gid);
+ if (ret == -1)
+ goto out;
+
+ ret = sys_chmod (path, (st.st_mode & ~S_IFMT) | S_IWGRP|S_IXGRP|S_ISVTX);
+
+ out:
+ if (ret == -1)
+ gf_msg ("glusterd", GF_LOG_CRITICAL, errno,
+ GD_MSG_WRITE_ACCESS_GRANT_FAIL,
+ "failed to set up write access to %s for group %d (%s)",
+ path, gid, strerror (errno));
+ return ret;
+}
+
+static int
+glusterd_crt_georep_folders (char *georepdir, glusterd_conf_t *conf)
+{
+ char *greplg_s = NULL;
+ struct group *gr = NULL;
+ int ret = 0;
+
+ GF_ASSERT (georepdir);
+ GF_ASSERT (conf);
+
+ if (strlen (conf->workdir)+2 > PATH_MAX-strlen(GEOREP)) {
+ ret = -1;
+ gf_msg ("glusterd", GF_LOG_CRITICAL, 0,
+ GD_MSG_DIRPATH_TOO_LONG,
+ "directory path %s/"GEOREP" is longer than PATH_MAX",
+ conf->workdir);
+ goto out;
+ }
+
+ snprintf (georepdir, PATH_MAX, "%s/"GEOREP, conf->workdir);
+ ret = mkdir_p (georepdir, 0777, _gf_true);
+ if (-1 == ret) {
+ gf_msg ("glusterd", GF_LOG_CRITICAL, errno,
+ GD_MSG_CREATE_DIR_FAILED,
+ "Unable to create "GEOREP" directory %s",
+ georepdir);
+ goto out;
+ }
+
+ if (strlen (DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP) >= PATH_MAX) {
+ ret = -1;
+ gf_msg ("glusterd", GF_LOG_CRITICAL, 0,
+ GD_MSG_DIRPATH_TOO_LONG,
+ "directory path "DEFAULT_LOG_FILE_DIRECTORY"/"
+ GEOREP" is longer than PATH_MAX");
+ goto out;
+ }
+ ret = mkdir_p (DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP, 0777, _gf_true);
+ if (-1 == ret) {
+ gf_msg ("glusterd", GF_LOG_CRITICAL, errno,
+ GD_MSG_CREATE_DIR_FAILED,
+ "Unable to create "GEOREP" log directory");
+ goto out;
+ }
+
+ /* Slave log file directory */
+ if (strlen(DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"-slaves") >= PATH_MAX) {
+ ret = -1;
+ gf_msg ("glusterd", GF_LOG_CRITICAL, 0,
+ GD_MSG_DIRPATH_TOO_LONG,
+ "directory path "DEFAULT_LOG_FILE_DIRECTORY"/"
+ GEOREP"-slaves"" is longer than PATH_MAX");
+ goto out;
+ }
+ ret = mkdir_p (DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"-slaves", 0777,
+ _gf_true);
+ if (-1 == ret) {
+ gf_msg ("glusterd", GF_LOG_CRITICAL, errno,
+ GD_MSG_CREATE_DIR_FAILED,
+ "Unable to create "GEOREP" slave log directory");
+ goto out;
+ }
+
+ /* MountBroker log file directory */
+ if (strlen(DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"-slaves/mbr") >= PATH_MAX) {
+ ret = -1;
+ gf_msg ("glusterd", GF_LOG_CRITICAL, 0,
+ GD_MSG_DIRPATH_TOO_LONG,
+ "directory path "DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP
+ "-slaves/mbr"" is longer than PATH_MAX");
+ goto out;
+ }
+ ret = mkdir_p (DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"-slaves/mbr", 0777,
+ _gf_true);
+ if (-1 == ret) {
+ gf_msg ("glusterd", GF_LOG_CRITICAL, errno,
+ GD_MSG_CREATE_DIR_FAILED,
+ "Unable to create "GEOREP" mountbroker slave log directory");
+ goto out;
+ }
+
+ ret = dict_get_str (THIS->options, GEOREP"-log-group", &greplg_s);
+ if (ret)
+ ret = 0;
+ else {
+ gr = getgrnam (greplg_s);
+ if (!gr) {
+ gf_msg ("glusterd", GF_LOG_CRITICAL, 0,
+ GD_MSG_LOGGROUP_INVALID,
+ "group "GEOREP"-log-group %s does not exist", greplg_s);
+ ret = -1;
+ goto out;
+ }
+
+ ret = group_write_allow (DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP,
+ gr->gr_gid);
+ if (ret == 0)
+ ret = group_write_allow (DEFAULT_LOG_FILE_DIRECTORY"/"
+ GEOREP"-slaves", gr->gr_gid);
+ if (ret == 0)
+ ret = group_write_allow (DEFAULT_LOG_FILE_DIRECTORY"/"
+ GEOREP"-slaves/mbr", gr->gr_gid);
+ }
+
+ out:
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+static void
+runinit_gsyncd_setrx (runner_t *runner, glusterd_conf_t *conf)
+{
+ runinit (runner);
+ runner_add_args (runner, GSYNCD_PREFIX"/gsyncd", "-c", NULL);
+ runner_argprintf (runner, "%s/"GSYNC_CONF_TEMPLATE, conf->workdir);
+ runner_add_arg (runner, "--config-set-rx");
+}
+
+static int
+configure_syncdaemon (glusterd_conf_t *conf)
+#define RUN_GSYNCD_CMD do { \
+ ret = runner_run_reuse (&runner); \
+ if (ret == -1) { \
+ runner_log (&runner, "glusterd", GF_LOG_ERROR, "command failed"); \
+ runner_end (&runner); \
+ goto out; \
+ } \
+ runner_end (&runner); \
+} while (0)
+{
+ int ret = 0;
+ runner_t runner = {0,};
+ char georepdir[PATH_MAX] = {0,};
+ int valid_state = 0;
+
+ ret = setenv ("_GLUSTERD_CALLED_", "1", 1);
+ if (ret < 0) {
+ ret = 0;
+ goto out;
+ }
+ valid_state = -1;
+ ret = glusterd_check_gsync_present (&valid_state);
+ if (-1 == ret) {
+ ret = valid_state;
+ goto out;
+ }
+
+ glusterd_crt_georep_folders (georepdir, conf);
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
+
+ /************
+ * master pre-configuration
+ ************/
+
+ /* remote-gsyncd */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_args (&runner, "remote-gsyncd", GSYNCD_PREFIX"/gsyncd", ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_args (&runner, "remote-gsyncd", "/nonexistent/gsyncd",
+ ".", "^ssh:", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* gluster-command-dir */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_args (&runner, "gluster-command-dir", SBIN_DIR"/",
+ ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* gluster-params */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_args (&runner, "gluster-params",
+ "aux-gfid-mount acl",
+ ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* ssh-command */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_arg (&runner, "ssh-command");
+ runner_argprintf (&runner,
+ "ssh -oPasswordAuthentication=no "
+ "-oStrictHostKeyChecking=no "
+ "-i %s/secret.pem", georepdir);
+ runner_add_args (&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* ssh-command tar */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_arg (&runner, "ssh-command-tar");
+ runner_argprintf (&runner,
+ "ssh -oPasswordAuthentication=no "
+ "-oStrictHostKeyChecking=no "
+ "-i %s/tar_ssh.pem", georepdir);
+ runner_add_args (&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* pid-file */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_arg (&runner, "pid-file");
+ runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/monitor.pid", georepdir);
+ runner_add_args (&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* geo-rep working dir */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_arg (&runner, "georep-session-working-dir");
+ runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/", georepdir);
+ runner_add_args (&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* state-file */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_arg (&runner, "state-file");
+ runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/monitor.status", georepdir);
+ runner_add_args (&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* state-detail-file */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_arg (&runner, "state-detail-file");
+ runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/${eSlave}-detail.status",
+ georepdir);
+ runner_add_args (&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* state-detail-file */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_arg (&runner, "state-detail-file");
+ runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/${eSlave}-detail.status",
+ georepdir);
+ runner_add_args (&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* state-socket */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_arg (&runner, "state-socket-unencoded");
+ runner_argprintf (&runner, "%s/${mastervol}/${eSlave}.socket", georepdir);
+ runner_add_args (&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* socketdir */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_args (&runner, "socketdir", GLUSTERD_SOCK_DIR, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* log-file */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_args (&runner,
+ "log-file",
+ DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"/${mastervol}/${eSlave}.log",
+ ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* gluster-log-file */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_args (&runner,
+ "gluster-log-file",
+ DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"/${mastervol}/${eSlave}${local_id}.gluster.log",
+ ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* ignore-deletes */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_args (&runner, "ignore-deletes", "true", ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* special-sync-mode */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_args (&runner, "special-sync-mode", "partial", ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* change-detector == changelog */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_args(&runner, "change-detector", "changelog", ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_arg(&runner, "working-dir");
+ runner_argprintf(&runner, "%s/${mastervol}/${eSlave}",
+ DEFAULT_VAR_RUN_DIRECTORY);
+ runner_add_args (&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /************
+ * slave pre-configuration
+ ************/
+
+ /* gluster-command-dir */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_args (&runner, "gluster-command-dir", SBIN_DIR"/",
+ ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* gluster-params */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_args (&runner, "gluster-params",
+ "aux-gfid-mount acl",
+ ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* log-file */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_args (&runner,
+ "log-file",
+ DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"-slaves/${session_owner}:${eSlave}.log",
+ ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* MountBroker log-file */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_args (&runner,
+ "log-file-mbr",
+ DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"-slaves/mbr/${session_owner}:${eSlave}.log",
+ ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* gluster-log-file */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_args (&runner,
+ "gluster-log-file",
+ DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"-slaves/${session_owner}:${eSlave}.gluster.log",
+ ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ out:
+ return ret ? -1 : 0;
+}
+#undef RUN_GSYNCD_CMD
+#else /* SYNCDAEMON_COMPILE */
+static int
+configure_syncdaemon (glusterd_conf_t *conf)
+{
+ return 0;
+}
+#endif /* !SYNCDAEMON_COMPILE */
+
+
+static int
+check_prepare_mountbroker_root (char *mountbroker_root)
+{
+ int dfd0 = -1;
+ int dfd = -1;
+ int dfd2 = -1;
+ struct stat st = {0,};
+ struct stat st2 = {0,};
+ int ret = 0;
+
+ ret = open (mountbroker_root, O_RDONLY);
+ if (ret != -1) {
+ dfd = ret;
+ ret = sys_fstat (dfd, &st);
+ }
+ if (ret == -1 || !S_ISDIR (st.st_mode)) {
+ gf_msg ("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_DIR_OP_FAILED,
+ "cannot access mountbroker-root directory %s",
+ mountbroker_root);
+ ret = -1;
+ goto out;
+ }
+ if (st.st_uid != 0 ||
+ (st.st_mode & (S_IWGRP|S_IWOTH))) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DIR_PERM_LIBERAL,
+ "permissions on mountbroker-root directory %s are "
+ "too liberal", mountbroker_root);
+ ret = -1;
+ goto out;
+ }
+ if (!(st.st_mode & (S_IXGRP|S_IXOTH))) {
+ gf_msg ("glusterd", GF_LOG_WARNING, 0,
+ GD_MSG_DIR_PERM_STRICT,
+ "permissions on mountbroker-root directory %s are "
+ "probably too strict", mountbroker_root);
+ }
+
+ dfd0 = dup (dfd);
+
+ for (;;) {
+ ret = sys_openat (dfd, "..", O_RDONLY);
+ if (ret != -1) {
+ dfd2 = ret;
+ ret = sys_fstat (dfd2, &st2);
+ }
+ if (ret == -1) {
+ gf_msg ("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_DIR_OP_FAILED,
+ "error while checking mountbroker-root ancestors "
+ "%d (%s)", errno, strerror (errno));
+ goto out;
+ }
+
+ if (st2.st_ino == st.st_ino)
+ break; /* arrived to root */
+
+ if (st2.st_uid != 0 ||
+ ((st2.st_mode & (S_IWGRP|S_IWOTH)) &&
+ !(st2.st_mode & S_ISVTX))) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_DIR_PERM_LIBERAL,
+ "permissions on ancestors of mountbroker-root "
+ "directory are too liberal");
+ ret = -1;
+ goto out;
+ }
+ if (!(st.st_mode & (S_IXGRP|S_IXOTH))) {
+ gf_msg ("glusterd", GF_LOG_WARNING, 0,
+ GD_MSG_DIR_PERM_STRICT,
+ "permissions on ancestors of mountbroker-root "
+ "directory are probably too strict");
+ }
+
+ sys_close (dfd);
+ dfd = dfd2;
+ st = st2;
+ }
+
+ ret = sys_mkdirat (dfd0, MB_HIVE, 0711);
+ if (ret == -1 && errno == EEXIST)
+ ret = 0;
+ if (ret != -1)
+ ret = sys_fstatat (dfd0, MB_HIVE, &st, AT_SYMLINK_NOFOLLOW);
+ if (ret == -1 || st.st_mode != (S_IFDIR|0711)) {
+ gf_msg ("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_CREATE_DIR_FAILED,
+ "failed to set up mountbroker-root directory %s",
+ mountbroker_root);
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+
+ out:
+ if (dfd0 != -1)
+ sys_close (dfd0);
+ if (dfd != -1)
+ sys_close (dfd);
+ if (dfd2 != -1)
+ sys_close (dfd2);
+
+ return ret;
+}
+
+static int
+_install_mount_spec (dict_t *opts, char *key, data_t *value, void *data)
+{
+ glusterd_conf_t *priv = THIS->private;
+ char *label = NULL;
+ gf_boolean_t georep = _gf_false;
+ gf_boolean_t ghadoop = _gf_false;
+ char *pdesc = value->data;
+ char *volname = NULL;
+ int rv = 0;
+ gf_mount_spec_t *mspec = NULL;
+ char *user = NULL;
+ char *volfile_server = NULL;
+
+ label = strtail (key, "mountbroker.");
+
+ /* check for presence of geo-rep/hadoop label */
+ if (!label) {
+ label = strtail (key, "mountbroker-"GEOREP".");
+ if (label)
+ georep = _gf_true;
+ else {
+ label = strtail (key, "mountbroker-"GHADOOP".");
+ if (label)
+ ghadoop = _gf_true;
+ }
+ }
+
+ if (!label)
+ return 0;
+
+ mspec = GF_CALLOC (1, sizeof (*mspec), gf_gld_mt_mount_spec);
+ if (!mspec)
+ goto err;
+ mspec->label = label;
+
+ if (georep || ghadoop) {
+ volname = gf_strdup (pdesc);
+ if (!volname)
+ goto err;
+ user = strchr (volname, ':');
+ if (user) {
+ *user = '\0';
+ user++;
+ } else
+ user = label;
+
+ if (georep)
+ rv = make_georep_mountspec (mspec, volname, user);
+
+ if (ghadoop) {
+ volfile_server = strchr (user, ':');
+ if (volfile_server)
+ *volfile_server++ = '\0';
+ else
+ volfile_server = "localhost";
+
+ rv = make_ghadoop_mountspec (mspec, volname, user, volfile_server);
+ }
+
+ GF_FREE (volname);
+ if (rv != 0)
+ goto err;
+ } else if (parse_mount_pattern_desc (mspec, pdesc) != 0)
+ goto err;
+
+ cds_list_add_tail (&mspec->speclist, &priv->mount_specs);
+
+ return 0;
+ err:
+
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_MOUNT_SPEC_INSTALL_FAIL,
+ "adding %smount spec failed: label: %s desc: %s",
+ georep ? GEOREP" " : "", label, pdesc);
+
+ if (mspec) {
+ if (mspec->patterns) {
+ GF_FREE (mspec->patterns->components);
+ GF_FREE (mspec->patterns);
+ }
+ GF_FREE (mspec);
+ }
+
+ return -1;
+}
+
+
+/* The glusterd unix domain socket listener only listens for cli */
+rpcsvc_t *
+glusterd_init_uds_listener (xlator_t *this)
+{
+ int ret = -1;
+ dict_t *options = NULL;
+ rpcsvc_t *rpc = NULL;
+ data_t *sock_data = NULL;
+ char sockfile[UNIX_PATH_MAX+1] = {0,};
+ int i = 0;
+
+
+ GF_ASSERT (this);
+
+ sock_data = dict_get (this->options, "glusterd-sockfile");
+ if (!sock_data) {
+ strncpy (sockfile, DEFAULT_GLUSTERD_SOCKFILE, UNIX_PATH_MAX);
+ } else {
+ strncpy (sockfile, sock_data->data, UNIX_PATH_MAX);
+ }
+
+ options = dict_new ();
+ if (!options)
+ goto out;
+
+ ret = rpcsvc_transport_unix_options_build (&options, sockfile);
+ if (ret)
+ goto out;
+
+ rpc = rpcsvc_init (this, this->ctx, options, 8);
+ if (rpc == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = rpcsvc_register_notify (rpc, glusterd_rpcsvc_notify, this);
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "Failed to register notify function");
+ goto out;
+ }
+
+ ret = rpcsvc_create_listeners (rpc, options, this->name);
+ if (ret != 1) {
+ gf_msg_debug (this->name, 0, "Failed to create listener");
+ goto out;
+ }
+ ret = 0;
+
+ for (i = 0; i < gd_uds_programs_count; i++) {
+ ret = glusterd_program_register (this, rpc, gd_uds_programs[i]);
+ if (ret) {
+ i--;
+ for (; i >= 0; i--)
+ rpcsvc_program_unregister (rpc,
+ gd_uds_programs[i]);
+
+ goto out;
+ }
+ }
+
+out:
+ if (options)
+ dict_unref (options);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GLUSTERD_SOCK_LISTENER_START_FAIL,
+ "Failed to start glusterd "
+ "unix domain socket listener.");
+ if (rpc) {
+ GF_FREE (rpc);
+ rpc = NULL;
+ }
+ }
+ return rpc;
+}
+
+void
+glusterd_stop_uds_listener (xlator_t *this)
+{
+ glusterd_conf_t *conf = NULL;
+ rpcsvc_listener_t *listener = NULL;
+ rpcsvc_listener_t *next = NULL;
+ data_t *sock_data = NULL;
+ char sockfile[UNIX_PATH_MAX+1] = {0,};
+
+ GF_ASSERT (this);
+ conf = this->private;
+
+ (void) rpcsvc_program_unregister (conf->uds_rpc, &gd_svc_cli_prog);
+ (void) rpcsvc_program_unregister (conf->uds_rpc, &gluster_handshake_prog);
+
+ list_for_each_entry_safe (listener, next, &conf->uds_rpc->listeners,
+ list) {
+ rpcsvc_listener_destroy (listener);
+ }
+
+ (void) rpcsvc_unregister_notify (conf->uds_rpc,
+ glusterd_rpcsvc_notify, this);
+
+ sock_data = dict_get (this->options, "glusterd-sockfile");
+ if (!sock_data) {
+ strncpy (sockfile, DEFAULT_GLUSTERD_SOCKFILE, UNIX_PATH_MAX);
+ } else {
+ strncpy (sockfile, sock_data->data, UNIX_PATH_MAX);
+ }
+ sys_unlink (sockfile);
+
+ return;
+}
+
+
+void
+glusterd_stop_listener (xlator_t *this)
+{
+ glusterd_conf_t *conf = NULL;
+ rpcsvc_listener_t *listener = NULL;
+ rpcsvc_listener_t *next = NULL;
+ int i = 0;
+
+ GF_VALIDATE_OR_GOTO ("glusterd", this, out);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
+
+ gf_msg_debug (this->name, 0,
+ "%s function called ", __func__);
+
+ for (i = 0; i < gd_inet_programs_count; i++) {
+ rpcsvc_program_unregister (conf->rpc, gd_inet_programs[i]);
+ }
+
+ list_for_each_entry_safe (listener, next, &conf->rpc->listeners, list) {
+ rpcsvc_listener_destroy (listener);
+ }
+
+ (void) rpcsvc_unregister_notify (conf->rpc,
+ glusterd_rpcsvc_notify,
+ this);
+
+out:
+
+ return;
+}
+
+static int
+glusterd_find_correct_var_run_dir (xlator_t *this, char *var_run_dir)
+{
+ int ret = -1;
+ struct stat buf = {0,};
+
+ GF_VALIDATE_OR_GOTO ("glusterd", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, var_run_dir, out);
+
+ /* /var/run is normally a symbolic link to /run dir, which
+ * creates problems as the entry point in the mtab for the mount point
+ * and glusterd maintained entry point will be different. Therefore
+ * identify the correct run dir and use it
+ */
+ ret = sys_lstat (GLUSTERD_VAR_RUN_DIR, &buf);
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED,
+ "stat fails on %s, exiting. (errno = %d)",
+ GLUSTERD_VAR_RUN_DIR, errno);
+ goto out;
+ }
+
+ /* If /var/run is symlink then use /run dir */
+ if (S_ISLNK (buf.st_mode)) {
+ strcpy (var_run_dir, GLUSTERD_RUN_DIR);
+ } else {
+ strcpy (var_run_dir, GLUSTERD_VAR_RUN_DIR);
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+glusterd_init_var_run_dirs (xlator_t *this, char *var_run_dir,
+ char *dir_to_be_created)
+{
+ int ret = -1;
+ struct stat buf = {0,};
+ char abs_path[PATH_MAX] = {0, };
+
+ GF_VALIDATE_OR_GOTO ("glusterd", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, var_run_dir, out);
+ GF_VALIDATE_OR_GOTO (this->name, dir_to_be_created, out);
+
+ snprintf (abs_path, sizeof(abs_path), "%s%s",
+ var_run_dir, dir_to_be_created);
+
+ ret = sys_stat (abs_path, &buf);
+ if ((ret != 0) && (ENOENT != errno)) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED,
+ "stat fails on %s, exiting. (errno = %d)",
+ abs_path, errno);
+ ret = -1;
+ goto out;
+ }
+
+ if ((!ret) && (!S_ISDIR(buf.st_mode))) {
+ gf_msg (this->name, GF_LOG_CRITICAL, ENOENT,
+ GD_MSG_DIR_NOT_FOUND,
+ "Provided snap path %s is not a directory,"
+ "exiting", abs_path);
+ ret = -1;
+ goto out;
+ }
+
+ if ((-1 == ret) && (ENOENT == errno)) {
+ /* Create missing dirs */
+ ret = mkdir_p (abs_path, 0777, _gf_true);
+
+ if (-1 == ret) {
+ gf_msg (this->name, GF_LOG_CRITICAL, errno,
+ GD_MSG_CREATE_DIR_FAILED,
+ "Unable to create directory %s"
+ " ,errno = %d", abs_path, errno);
+ goto out;
+ }
+ }
+
+out:
+ return ret;
+}
+
+static void
+glusterd_svcs_build ()
+{
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ priv->shd_svc.build = glusterd_shdsvc_build;
+ priv->shd_svc.build (&(priv->shd_svc));
+
+ priv->nfs_svc.build = glusterd_nfssvc_build;
+ priv->nfs_svc.build (&(priv->nfs_svc));
+
+ priv->quotad_svc.build = glusterd_quotadsvc_build;
+ priv->quotad_svc.build (&(priv->quotad_svc));
+
+ priv->bitd_svc.build = glusterd_bitdsvc_build;
+ priv->bitd_svc.build (&(priv->bitd_svc));
+
+ priv->scrub_svc.build = glusterd_scrubsvc_build;
+ priv->scrub_svc.build (&(priv->scrub_svc));
+}
+
+static int
+is_upgrade (dict_t *options, gf_boolean_t *upgrade)
+{
+ int ret = 0;
+ char *type = NULL;
+
+ ret = dict_get_str (options, "upgrade", &type);
+ if (!ret) {
+ ret = gf_string2boolean (type, upgrade);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_STR_TO_BOOL_FAIL, "upgrade option "
+ "%s is not a valid boolean type", type);
+ ret = -1;
+ goto out;
+ }
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+is_downgrade (dict_t *options, gf_boolean_t *downgrade)
+{
+ int ret = 0;
+ char *type = NULL;
+
+ ret = dict_get_str (options, "downgrade", &type);
+ if (!ret) {
+ ret = gf_string2boolean (type, downgrade);
+ if (ret) {
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_STR_TO_BOOL_FAIL, "downgrade option "
+ "%s is not a valid boolean type", type);
+ ret = -1;
+ goto out;
+ }
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+/*
+ * init - called during glusterd initialization
+ *
+ * @this:
+ *
+ */
+int
+init (xlator_t *this)
+{
+ int32_t ret = -1;
+ rpcsvc_t *rpc = NULL;
+ rpcsvc_t *uds_rpc = NULL;
+ glusterd_conf_t *conf = NULL;
+ data_t *dir_data = NULL;
+ struct stat buf = {0,};
+ char storedir[PATH_MAX] = {0,};
+ char workdir[PATH_MAX] = {0,};
+ char cmd_log_filename[PATH_MAX] = {0,};
+ int first_time = 0;
+ char *mountbroker_root = NULL;
+ int i = 0;
+ int total_transport = 0;
+ char *valgrind_str = NULL;
+ char *transport_type = NULL;
+ char var_run_dir[PATH_MAX] = {0,};
+ int32_t workers = 0;
+ gf_boolean_t upgrade = _gf_false;
+ gf_boolean_t downgrade = _gf_false;
+
+#ifndef GF_DARWIN_HOST_OS
+ {
+ struct rlimit lim;
+ lim.rlim_cur = 65536;
+ lim.rlim_max = 65536;
+
+ if (setrlimit (RLIMIT_NOFILE, &lim) == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_SETXATTR_FAIL,
+ "Failed to set 'ulimit -n "
+ " 65536'");
+ } else {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_FILE_DESC_LIMIT_SET,
+ "Maximum allowed open file descriptors "
+ "set to 65536");
+ }
+ }
+#endif
+
+ dir_data = dict_get (this->options, "working-directory");
+
+ if (!dir_data) {
+ //Use default working dir
+ strncpy (workdir, GLUSTERD_DEFAULT_WORKDIR, PATH_MAX);
+ } else {
+ strncpy (workdir, dir_data->data, PATH_MAX);
+ }
+
+ ret = sys_stat (workdir, &buf);
+ if ((ret != 0) && (ENOENT != errno)) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DIR_OP_FAILED,
+ "stat fails on %s, exiting. (errno = %d)",
+ workdir, errno);
+ exit (1);
+ }
+
+ if ((!ret) && (!S_ISDIR(buf.st_mode))) {
+ gf_msg (this->name, GF_LOG_CRITICAL, ENOENT,
+ GD_MSG_DIR_NOT_FOUND,
+ "Provided working area %s is not a directory,"
+ "exiting", workdir);
+ exit (1);
+ }
+
+
+ if ((-1 == ret) && (ENOENT == errno)) {
+ ret = mkdir_p (workdir, 0777, _gf_true);
+
+ if (-1 == ret) {
+ gf_msg (this->name, GF_LOG_CRITICAL, errno,
+ GD_MSG_CREATE_DIR_FAILED,
+ "Unable to create directory %s"
+ " ,errno = %d", workdir, errno);
+ exit (1);
+ }
+
+ first_time = 1;
+ }
+
+ setenv ("GLUSTERD_WORKDIR", workdir, 1);
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_CURR_WORK_DIR_INFO, "Using %s as working directory",
+ workdir);
+
+ ret = glusterd_find_correct_var_run_dir (this, var_run_dir);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_VAR_RUN_DIR_FIND_FAIL, "Unable to find "
+ "the correct var run dir");
+ exit (1);
+ }
+
+ ret = glusterd_init_var_run_dirs (this, var_run_dir,
+ GLUSTERD_DEFAULT_SNAPS_BRICK_DIR);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_CREATE_DIR_FAILED, "Unable to create "
+ "snap backend folder");
+ exit (1);
+ }
+
+ snprintf (snap_mount_dir, sizeof(snap_mount_dir), "%s%s",
+ var_run_dir, GLUSTERD_DEFAULT_SNAPS_BRICK_DIR);
+
+ ret = mkdir_p (GLUSTER_SHARED_STORAGE_BRICK_DIR, 0777,
+ _gf_true);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_DIR_OP_FAILED, "Unable to create "
+ "shared storage brick");
+ exit (1);
+ }
+
+ snprintf (cmd_log_filename, PATH_MAX, "%s/cmd_history.log",
+ DEFAULT_LOG_FILE_DIRECTORY);
+ ret = gf_cmd_log_init (cmd_log_filename);
+
+ if (ret == -1) {
+ gf_msg ("this->name", GF_LOG_CRITICAL, errno,
+ GD_MSG_FILE_OP_FAILED,
+ "Unable to create cmd log file %s", cmd_log_filename);
+ exit (1);
+ }
+
+ snprintf (storedir, PATH_MAX, "%s/vols", workdir);
+
+ ret = sys_mkdir (storedir, 0777);
+
+ if ((-1 == ret) && (errno != EEXIST)) {
+ gf_msg (this->name, GF_LOG_CRITICAL, errno,
+ GD_MSG_CREATE_DIR_FAILED,
+ "Unable to create volume directory %s"
+ " ,errno = %d", storedir, errno);
+ exit (1);
+ }
+
+ snprintf (storedir, PATH_MAX, "%s/snaps", workdir);
+
+ ret = sys_mkdir (storedir, 0777);
+
+ if ((-1 == ret) && (errno != EEXIST)) {
+ gf_msg (this->name, GF_LOG_CRITICAL, errno,
+ GD_MSG_CREATE_DIR_FAILED,
+ "Unable to create snaps directory %s"
+ " ,errno = %d", storedir, errno);
+ exit (1);
+ }
+
+ snprintf (storedir, PATH_MAX, "%s/peers", workdir);
+
+ ret = sys_mkdir (storedir, 0777);
+
+ if ((-1 == ret) && (errno != EEXIST)) {
+ gf_msg (this->name, GF_LOG_CRITICAL, errno,
+ GD_MSG_CREATE_DIR_FAILED,
+ "Unable to create peers directory %s"
+ " ,errno = %d", storedir, errno);
+ exit (1);
+ }
+
+ snprintf (storedir, PATH_MAX, "%s/bricks", DEFAULT_LOG_FILE_DIRECTORY);
+ ret = sys_mkdir (storedir, 0777);
+ if ((-1 == ret) && (errno != EEXIST)) {
+ gf_msg (this->name, GF_LOG_CRITICAL, errno,
+ GD_MSG_CREATE_DIR_FAILED,
+ "Unable to create logs directory %s"
+ " ,errno = %d", storedir, errno);
+ exit (1);
+ }
+
+ snprintf (storedir, PATH_MAX, "%s/nfs", workdir);
+ ret = sys_mkdir (storedir, 0777);
+ if ((-1 == ret) && (errno != EEXIST)) {
+ gf_msg (this->name, GF_LOG_CRITICAL, errno,
+ GD_MSG_CREATE_DIR_FAILED,
+ "Unable to create nfs directory %s"
+ " ,errno = %d", storedir, errno);
+ exit (1);
+ }
+
+ snprintf (storedir, PATH_MAX, "%s/bitd", workdir);
+ ret = sys_mkdir (storedir, 0777);
+ if ((-1 == ret) && (errno != EEXIST)) {
+ gf_msg (this->name, GF_LOG_CRITICAL, errno,
+ GD_MSG_CREATE_DIR_FAILED,
+ "Unable to create bitrot directory %s",
+ storedir);
+ exit (1);
+ }
+
+ snprintf (storedir, PATH_MAX, "%s/scrub", workdir);
+ ret = sys_mkdir (storedir, 0777);
+ if ((-1 == ret) && (errno != EEXIST)) {
+ gf_msg (this->name, GF_LOG_CRITICAL, errno,
+ GD_MSG_CREATE_DIR_FAILED,
+ "Unable to create scrub directory %s",
+ storedir);
+ exit (1);
+ }
+
+ snprintf (storedir, PATH_MAX, "%s/glustershd", workdir);
+ ret = sys_mkdir (storedir, 0777);
+ if ((-1 == ret) && (errno != EEXIST)) {
+ gf_msg (this->name, GF_LOG_CRITICAL, errno,
+ GD_MSG_CREATE_DIR_FAILED,
+ "Unable to create glustershd directory %s"
+ " ,errno = %d", storedir, errno);
+ exit (1);
+ }
+
+ snprintf (storedir, PATH_MAX, "%s/quotad", workdir);
+ ret = sys_mkdir (storedir, 0777);
+ if ((-1 == ret) && (errno != EEXIST)) {
+ gf_msg (this->name, GF_LOG_CRITICAL, errno,
+ GD_MSG_CREATE_DIR_FAILED,
+ "Unable to create quotad directory %s"
+ " ,errno = %d", storedir, errno);
+ exit (1);
+ }
+
+ snprintf (storedir, PATH_MAX, "%s/groups", workdir);
+ ret = sys_mkdir (storedir, 0777);
+ if ((-1 == ret) && (errno != EEXIST)) {
+ gf_msg (this->name, GF_LOG_CRITICAL, errno,
+ GD_MSG_CREATE_DIR_FAILED,
+ "Unable to create glustershd directory %s"
+ " ,errno = %d", storedir, errno);
+ exit (1);
+ }
+
+ ret = glusterd_rpcsvc_options_build (this->options);
+ if (ret)
+ goto out;
+ rpc = rpcsvc_init (this, this->ctx, this->options, 64);
+ if (rpc == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RPC_INIT_FAIL,
+ "failed to init rpc");
+ goto out;
+ }
+
+ ret = rpcsvc_register_notify (rpc, glusterd_rpcsvc_notify, this);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RPCSVC_REG_NOTIFY_RETURNED,
+ "rpcsvc_register_notify returned %d", ret);
+ goto out;
+ }
+
+ /* Enable encryption for the TCP listener is management encryption is
+ * enabled
+ */
+ if (this->ctx->secure_mgmt) {
+ ret = dict_set_str (this->options,
+ "transport.socket.ssl-enabled", "on");
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "failed to set ssl-enabled in dict");
+ goto out;
+ }
+ /*
+ * With strong authentication, we can afford to allow
+ * privileged operations over TCP.
+ */
+ gd_inet_programs[1] = &gd_svc_cli_prog;
+ /*
+ * This is the only place where we want secure_srvr to reflect
+ * the management-plane setting.
+ */
+ this->ctx->secure_srvr = MGMT_SSL_ALWAYS;
+ }
+
+ /*
+ * only one (at most a pair - rdma and socket) listener for
+ * glusterd1_mop_prog, gluster_pmap_prog and gluster_handshake_prog.
+ */
+
+ ret = dict_get_str (this->options, "transport-type", &transport_type);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Failed to get transport type");
+ ret = -1;
+ goto out;
+ }
+
+ total_transport = rpc_transport_count (transport_type);
+ if (total_transport <= 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RPC_TRANSPORT_COUNT_GET_FAIL,
+ "failed to get total number of available tranpsorts");
+ ret = -1;
+ goto out;
+ }
+
+ ret = rpcsvc_create_listeners (rpc, this->options, this->name);
+ if (ret < 1) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RPC_LISTENER_CREATE_FAIL,
+ "creation of listener failed");
+ ret = -1;
+ goto out;
+ } else if (ret < total_transport) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RPC_LISTENER_CREATE_FAIL,
+ "creation of %d listeners failed, continuing with "
+ "succeeded transport", (total_transport - ret));
+ }
+
+ for (i = 0; i < gd_inet_programs_count; i++) {
+ ret = glusterd_program_register (this, rpc,
+ gd_inet_programs[i]);
+ if (ret) {
+ i--;
+ for (; i >= 0; i--)
+ rpcsvc_program_unregister (rpc,
+ gd_inet_programs[i]);
+
+ goto out;
+ }
+ }
+
+ /*
+ * Start a unix domain socket listener just for cli commands This
+ * should prevent ports from being wasted by being in TIMED_WAIT when
+ * cli commands are done continuously
+ */
+ uds_rpc = glusterd_init_uds_listener (this);
+ if (uds_rpc == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ conf = GF_CALLOC (1, sizeof (glusterd_conf_t),
+ gf_gld_mt_glusterd_conf_t);
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+ CDS_INIT_LIST_HEAD (&conf->peers);
+ CDS_INIT_LIST_HEAD (&conf->volumes);
+ CDS_INIT_LIST_HEAD (&conf->snapshots);
+ CDS_INIT_LIST_HEAD (&conf->missed_snaps_list);
+
+ pthread_mutex_init (&conf->mutex, NULL);
+ conf->rpc = rpc;
+ conf->uds_rpc = uds_rpc;
+ conf->gfs_mgmt = &gd_brick_prog;
+ strncpy (conf->workdir, workdir, PATH_MAX);
+
+ synclock_init (&conf->big_lock, SYNC_LOCK_RECURSIVE);
+ pthread_mutex_init (&conf->xprt_lock, NULL);
+ INIT_LIST_HEAD (&conf->xprt_list);
+
+ glusterd_friend_sm_init ();
+ glusterd_op_sm_init ();
+ glusterd_opinfo_init ();
+ ret = glusterd_sm_tr_log_init (&conf->op_sm_log,
+ glusterd_op_sm_state_name_get,
+ glusterd_op_sm_event_name_get,
+ GLUSTERD_TR_LOG_SIZE);
+ if (ret)
+ goto out;
+
+ conf->base_port = GF_IANA_PRIV_PORTS_START;
+ if (dict_get_uint32(this->options, "base-port", &conf->base_port) == 0) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "base-port override: %d", conf->base_port);
+ }
+
+ /* Set option to run bricks on valgrind if enabled in glusterd.vol */
+ conf->valgrind = _gf_false;
+ ret = dict_get_str (this->options, "run-with-valgrind", &valgrind_str);
+ if (ret < 0) {
+ gf_msg_debug (this->name, 0,
+ "cannot get run-with-valgrind value");
+ }
+ if (valgrind_str) {
+ if (gf_string2boolean (valgrind_str, &(conf->valgrind))) {
+ gf_msg (this->name, GF_LOG_WARNING, EINVAL,
+ GD_MSG_INVALID_ENTRY,
+ "run-with-valgrind value not a boolean string");
+ }
+ }
+
+ /* Store ping-timeout in conf */
+ ret = dict_get_int32 (this->options, "ping-timeout",
+ &conf->ping_timeout);
+ /* Not failing here since ping-timeout can be optional as well */
+
+ this->private = conf;
+ glusterd_mgmt_v3_lock_init ();
+ glusterd_txn_opinfo_dict_init ();
+ glusterd_svcs_build ();
+
+ /* Make install copies few of the hook-scripts by creating hooks
+ * directory. Hence purposefully not doing the check for the presence of
+ * hooks directory. Doing so avoids creation of complete hooks directory
+ * tree.
+ */
+ ret = glusterd_hooks_create_hooks_directory (conf->workdir);
+ if (-1 == ret) {
+ gf_msg (this->name, GF_LOG_CRITICAL, errno,
+ GD_MSG_DIR_OP_FAILED,
+ "Unable to create hooks directory ");
+ exit (1);
+ }
+
+ CDS_INIT_LIST_HEAD (&conf->mount_specs);
+
+ ret = dict_foreach (this->options, _install_mount_spec, NULL);
+ if (ret)
+ goto out;
+ ret = dict_get_str (this->options, "mountbroker-root",
+ &mountbroker_root);
+ if (ret)
+ ret = 0;
+ else
+ ret = check_prepare_mountbroker_root (mountbroker_root);
+ if (ret)
+ goto out;
+
+ ret = is_upgrade (this->options, &upgrade);
+ if (ret)
+ goto out;
+
+ ret = is_downgrade (this->options, &downgrade);
+ if (ret)
+ goto out;
+
+ if (!upgrade && !downgrade) {
+ ret = configure_syncdaemon (conf);
+ if (ret)
+ goto out;
+ }
+
+ /* Restoring op-version needs to be done before initializing the
+ * services as glusterd_svc_init_common () invokes
+ * glusterd_conn_build_socket_filepath () which uses MY_UUID macro.
+ * MY_UUID generates a new uuid if its not been generated and writes it
+ * in the info file, Since the op-version is not read yet
+ * the default value i.e. 0 will be written for op-version and restore
+ * will fail. This is why restoring op-version needs to happen before
+ * service initialization
+ * */
+ ret = glusterd_restore_op_version (this);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_OP_VERS_RESTORE_FAIL,
+ "Failed to restore op_version");
+ goto out;
+ }
+
+ ret = glusterd_restore ();
+ if (ret < 0)
+ goto out;
+
+ /* If the peer count is less than 2 then this would be the best time to
+ * spawn process/bricks that may need (re)starting since last time
+ * (this) glusterd was up. */
+ if (glusterd_get_peers_count () < 2)
+ glusterd_launch_synctask (glusterd_spawn_daemons, NULL);
+
+ ret = glusterd_options_init (this);
+ if (ret < 0)
+ goto out;
+
+ ret = glusterd_handle_upgrade_downgrade (this->options, conf, upgrade,
+ downgrade);
+ if (ret)
+ goto out;
+
+ ret = glusterd_hooks_spawn_worker (this);
+ if (ret)
+ goto out;
+
+ GF_OPTION_INIT ("event-threads", workers, int32, out);
+ if (workers > 0 && workers != conf->workers) {
+ conf->workers = workers;
+ ret = event_reconfigure_threads (this->ctx->event_pool,
+ workers);
+ if (ret)
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret < 0) {
+ if (this->private != NULL) {
+ GF_FREE (this->private);
+ this->private = NULL;
+ }
+
+ }
+
+ return ret;
+}
+
+
+
+
+/*
+ * fini - finish function for glusterd, called before
+ * unloading gluster.
+ *
+ * @this:
+ *
+ */
+void
+fini (xlator_t *this)
+{
+ glusterd_conf_t *conf = NULL;
+ if (!this || !this->private)
+ goto out;
+
+ conf = this->private;
+
+ glusterd_stop_uds_listener (this); /*stop unix socket rpc*/
+ glusterd_stop_listener (this); /*stop tcp/ip socket rpc*/
+
+#if 0
+ /* Running threads might be using these resourses, we have to cancel/stop
+ * running threads before deallocating the memeory, but we don't have
+ * control over the running threads to do pthread_cancel().
+ * So memeory freeing handover to kernel.
+ */
+ /*TODO: cancel/stop the running threads*/
+
+ GF_FREE (conf->uds_rpc);
+ GF_FREE (conf->rpc);
+ FREE (conf->pmap);
+ if (conf->handle)
+ gf_store_handle_destroy (conf->handle);
+ glusterd_sm_tr_log_delete (&conf->op_sm_log);
+ glusterd_mgmt_v3_lock_fini ();
+ glusterd_txn_opinfo_dict_fini ();
+ GF_FREE (conf);
+
+ this->private = NULL;
+#endif
+out:
+ return;
+}
+
+/*
+ * notify - notify function for glusterd
+ * @this:
+ * @trans:
+ * @event:
+ *
+ */
+int
+notify (xlator_t *this, int32_t event, void *data, ...)
+{
+ int ret = 0;
+
+ switch (event) {
+ case GF_EVENT_POLLIN:
+ break;
+
+ case GF_EVENT_POLLERR:
+ break;
+
+ case GF_EVENT_TRANSPORT_CLEANUP:
+ break;
+
+ default:
+ default_notify (this, event, data);
+ break;
+
+ }
+
+ return ret;
+}
+
+
+struct xlator_fops fops;
+
+struct xlator_cbks cbks;
+
+struct xlator_dumpops dumpops = {
+ .priv = glusterd_dump_priv,
+};
+
+
+struct volume_options options[] = {
+ { .key = {"working-directory"},
+ .type = GF_OPTION_TYPE_PATH,
+ },
+ { .key = {"transport-type"},
+ .type = GF_OPTION_TYPE_ANY,
+ },
+ { .key = {"transport.*"},
+ .type = GF_OPTION_TYPE_ANY,
+ },
+ { .key = {"rpc-auth.*"},
+ .type = GF_OPTION_TYPE_ANY,
+ },
+ { .key = {"rpc-auth-allow-insecure"},
+ .type = GF_OPTION_TYPE_BOOL,
+ },
+ { .key = {"upgrade"},
+ .type = GF_OPTION_TYPE_BOOL,
+ },
+ { .key = {"downgrade"},
+ .type = GF_OPTION_TYPE_BOOL,
+ },
+ { .key = {"bind-insecure"},
+ .type = GF_OPTION_TYPE_BOOL,
+ },
+ { .key = {"mountbroker-root"},
+ .type = GF_OPTION_TYPE_PATH,
+ },
+ { .key = {"mountbroker.*"},
+ .type = GF_OPTION_TYPE_ANY,
+ },
+ { .key = {"mountbroker-"GEOREP".*"},
+ .type = GF_OPTION_TYPE_ANY,
+ },
+ { .key = {"mountbroker-"GHADOOP".*"},
+ .type = GF_OPTION_TYPE_ANY,
+ },
+ { .key = {GEOREP"-log-group"},
+ .type = GF_OPTION_TYPE_ANY,
+ },
+ { .key = {"run-with-valgrind"},
+ .type = GF_OPTION_TYPE_BOOL,
+ },
+ { .key = {"server-quorum-type"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = { "none", "server"},
+ .description = "This feature is on the server-side i.e. in glusterd."
+ " Whenever the glusterd on a machine observes that "
+ "the quorum is not met, it brings down the bricks to "
+ "prevent data split-brains. When the network "
+ "connections are brought back up and the quorum is "
+ "restored the bricks in the volume are brought back "
+ "up."
+ },
+ { .key = {"server-quorum-ratio"},
+ .type = GF_OPTION_TYPE_PERCENT,
+ .description = "Sets the quorum percentage for the trusted "
+ "storage pool."
+ },
+ { .key = {"glusterd-sockfile"},
+ .type = GF_OPTION_TYPE_PATH,
+ .description = "The socket file on which glusterd should listen for "
+ "cli requests. Default is "DEFAULT_GLUSTERD_SOCKFILE "."
+ },
+ { .key = {"base-port"},
+ .type = GF_OPTION_TYPE_INT,
+ .description = "Sets the base port for portmap query"
+ },
+ { .key = {"snap-brick-path"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "directory where the bricks for the snapshots will be created"
+ },
+ { .key = {"ping-timeout"},
+ .type = GF_OPTION_TYPE_TIME,
+ .min = 0,
+ .max = 300,
+ .default_value = TOSTRING(RPC_DEFAULT_PING_TIMEOUT),
+ },
+ { .key = {"event-threads"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 32,
+ .default_value = "2",
+ .description = "Specifies the number of event threads to execute "
+ "in parallel. Larger values would help process"
+ " responses faster, depending on available processing"
+ " power. Range 1-32 threads."
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
new file mode 100644
index 00000000000..909471401bb
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -0,0 +1,1193 @@
+/*
+ Copyright (c) 2006-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _GLUSTERD_H_
+#define _GLUSTERD_H_
+
+#include <sys/types.h>
+#include <dirent.h>
+#include <pthread.h>
+#include <libgen.h>
+
+#include "compat-uuid.h"
+
+#include "rpc-clnt.h"
+#include "glusterfs.h"
+#include "xlator.h"
+#include "logging.h"
+#include "call-stub.h"
+#include "fd.h"
+#include "byte-order.h"
+#include "glusterd-mem-types.h"
+#include "rpcsvc.h"
+#include "glusterd-sm.h"
+#include "glusterd-snapd-svc.h"
+#include "glusterd-bitd-svc.h"
+#include "glusterd1-xdr.h"
+#include "protocol-common.h"
+#include "glusterd-pmap.h"
+#include "cli1-xdr.h"
+#include "syncop.h"
+#include "store.h"
+#include "glusterd-rcu.h"
+
+#define GLUSTERD_TR_LOG_SIZE 50
+#define GLUSTERD_SOCKET_LISTEN_BACKLOG 128
+#define GLUSTERD_QUORUM_TYPE_KEY "cluster.server-quorum-type"
+#define GLUSTERD_QUORUM_RATIO_KEY "cluster.server-quorum-ratio"
+#define GLUSTERD_GLOBAL_OPT_VERSION "global-option-version"
+#define GLUSTERD_COMMON_PEM_PUB_FILE "/geo-replication/common_secret.pem.pub"
+#define GEO_CONF_MAX_OPT_VALS 6
+#define GLUSTERD_CREATE_HOOK_SCRIPT "/hooks/1/gsync-create/post/" \
+ "S56glusterd-geo-rep-create-post.sh"
+#define GLUSTER_SHARED_STORAGE "gluster_shared_storage"
+#define GLUSTERD_SHARED_STORAGE_KEY "cluster.enable-shared-storage"
+
+#define GANESHA_HA_CONF CONFDIR "/ganesha-ha.conf"
+#define GANESHA_EXPORT_DIRECTORY CONFDIR"/exports"
+#define GLUSTERD_SNAPS_MAX_HARD_LIMIT 256
+#define GLUSTERD_SNAPS_DEF_SOFT_LIMIT_PERCENT 90
+#define GLUSTERD_SNAPS_MAX_SOFT_LIMIT_PERCENT 100
+#define GLUSTERD_SERVER_QUORUM "server"
+
+#define FMTSTR_CHECK_VOL_EXISTS "Volume %s does not exist"
+#define FMTSTR_RESOLVE_BRICK "Could not find peer on which brick %s:%s resides"
+
+#define LOGSTR_FOUND_BRICK "Found brick %s:%s in volume %s"
+#define LOGSTR_BUILD_PAYLOAD "Failed to build payload for operation 'Volume %s'"
+#define LOGSTR_STAGE_FAIL "Staging of operation 'Volume %s' failed on %s %s %s"
+#define LOGSTR_COMMIT_FAIL "Commit of operation 'Volume %s' failed on %s %s %s"
+
+#define OPERRSTR_BUILD_PAYLOAD "Failed to build payload. Please check the log "\
+ "file for more details."
+#define OPERRSTR_STAGE_FAIL "Staging failed on %s. Please check the log file " \
+ "for more details."
+#define OPERRSTR_COMMIT_FAIL "Commit failed on %s. Please check the log file "\
+ "for more details."
+
+struct glusterd_volinfo_;
+typedef struct glusterd_volinfo_ glusterd_volinfo_t;
+
+struct glusterd_snap_;
+typedef struct glusterd_snap_ glusterd_snap_t;
+
+/* For every new feature please add respective enum of new feature
+ * at the end of latest enum (just before the GD_OP_MAX enum)
+ */
+typedef enum glusterd_op_ {
+ GD_OP_NONE = 0,
+ GD_OP_CREATE_VOLUME,
+ GD_OP_START_BRICK,
+ GD_OP_STOP_BRICK,
+ GD_OP_DELETE_VOLUME,
+ GD_OP_START_VOLUME,
+ GD_OP_STOP_VOLUME,
+ GD_OP_DEFRAG_VOLUME,
+ GD_OP_ADD_BRICK,
+ GD_OP_REMOVE_BRICK,
+ GD_OP_REPLACE_BRICK,
+ GD_OP_SET_VOLUME,
+ GD_OP_RESET_VOLUME,
+ GD_OP_SYNC_VOLUME,
+ GD_OP_LOG_ROTATE,
+ GD_OP_GSYNC_SET,
+ GD_OP_PROFILE_VOLUME,
+ GD_OP_QUOTA,
+ GD_OP_STATUS_VOLUME,
+ GD_OP_REBALANCE,
+ GD_OP_HEAL_VOLUME,
+ GD_OP_STATEDUMP_VOLUME,
+ GD_OP_LIST_VOLUME,
+ GD_OP_CLEARLOCKS_VOLUME,
+ GD_OP_DEFRAG_BRICK_VOLUME,
+ GD_OP_COPY_FILE,
+ GD_OP_SYS_EXEC,
+ GD_OP_GSYNC_CREATE,
+ GD_OP_SNAP,
+ GD_OP_BARRIER,
+ GD_OP_GANESHA,
+ GD_OP_BITROT,
+ GD_OP_DETACH_TIER,
+ GD_OP_TIER_MIGRATE,
+ GD_OP_SCRUB_STATUS,
+ GD_OP_MAX,
+} glusterd_op_t;
+
+extern const char * gd_op_list[];
+
+struct glusterd_volgen {
+ dict_t *dict;
+};
+
+typedef struct {
+ struct _volfile_ctx *volfile;
+ pthread_mutex_t mutex;
+ struct cds_list_head peers;
+ gf_boolean_t verify_volfile_checksum;
+ gf_boolean_t trace;
+ uuid_t uuid;
+ char workdir[PATH_MAX];
+ rpcsvc_t *rpc;
+ glusterd_svc_t shd_svc;
+ glusterd_svc_t nfs_svc;
+ glusterd_svc_t bitd_svc;
+ glusterd_svc_t scrub_svc;
+ glusterd_svc_t quotad_svc;
+ struct pmap_registry *pmap;
+ struct cds_list_head volumes;
+ struct cds_list_head snapshots; /*List of snap volumes */
+ pthread_mutex_t xprt_lock;
+ struct list_head xprt_list;
+ gf_store_handle_t *handle;
+ gf_timer_t *timer;
+ glusterd_sm_tr_log_t op_sm_log;
+ struct rpc_clnt_program *gfs_mgmt;
+ dict_t *mgmt_v3_lock; /* Dict for saving
+ * mgmt_v3 locks */
+ dict_t *glusterd_txn_opinfo; /* Dict for saving
+ * transaction opinfos */
+ uuid_t global_txn_id; /* To be used in
+ * heterogeneous
+ * cluster with no
+ * transaction ids */
+
+ struct cds_list_head mount_specs;
+ gf_boolean_t valgrind;
+ pthread_t brick_thread;
+ void *hooks_priv;
+
+ /* need for proper handshake_t */
+ int op_version; /* Starts with 1 for 3.3.0 */
+ xlator_t *xl; /* Should be set to 'THIS' before creating thread */
+ gf_boolean_t pending_quorum_action;
+ dict_t *opts;
+ synclock_t big_lock;
+ gf_boolean_t restart_done;
+ rpcsvc_t *uds_rpc; /* RPCSVC for the unix domain socket */
+ uint32_t base_port;
+ char *snap_bricks_directory;
+ gf_store_handle_t *missed_snaps_list_shandle;
+ struct cds_list_head missed_snaps_list;
+ int ping_timeout;
+ uint32_t generation;
+ int32_t workers;
+} glusterd_conf_t;
+
+
+typedef enum gf_brick_status {
+ GF_BRICK_STOPPED,
+ GF_BRICK_STARTED,
+} gf_brick_status_t;
+
+struct glusterd_brickinfo {
+ char hostname[1024];
+ char path[PATH_MAX];
+ char real_path[PATH_MAX];
+ char device_path[PATH_MAX];
+ char mount_dir[PATH_MAX];
+ char brick_id[1024];/*Client xlator name, AFR changelog name*/
+ char fstype [NAME_MAX]; /* Brick file-system type */
+ char mnt_opts [1024]; /* Brick mount options */
+ struct cds_list_head brick_list;
+ uuid_t uuid;
+ int port;
+ int rdma_port;
+ char *logfile;
+ gf_boolean_t signed_in;
+ gf_store_handle_t *shandle;
+ gf_brick_status_t status;
+ struct rpc_clnt *rpc;
+ int decommissioned;
+ char vg[PATH_MAX]; /* FIXME: Use max size for length of vg */
+ int caps; /* Capability */
+ int32_t snap_status;
+ /*
+ * The group is used to identify which bricks are part of the same
+ * replica set during brick-volfile generation, so that JBR volfiles
+ * can "cross-connect" the bricks to one another. It is also used by
+ * AFR to load the arbiter xlator in the appropriate brick in case of
+ * a replica 3 volume with arbiter enabled.
+ */
+ uint16_t group;
+ uuid_t jbr_uuid;
+};
+
+typedef struct glusterd_brickinfo glusterd_brickinfo_t;
+
+struct gf_defrag_brickinfo_ {
+ char *name;
+ int files;
+ int size;
+};
+
+typedef int (*defrag_cbk_fn_t) (glusterd_volinfo_t *volinfo,
+ gf_defrag_status_t status);
+
+struct glusterd_defrag_info_ {
+ uint64_t total_files;
+ uint64_t total_data;
+ uint64_t num_files_lookedup;
+ uint64_t total_failures;
+ gf_lock_t lock;
+ int cmd;
+ pthread_t th;
+ gf_defrag_status_t defrag_status;
+ struct rpc_clnt *rpc;
+ uint32_t connected;
+ char mount[1024];
+ char databuf[131072];
+ struct gf_defrag_brickinfo_ *bricks; /* volinfo->brick_count */
+
+ defrag_cbk_fn_t cbk_fn;
+};
+
+
+typedef struct glusterd_defrag_info_ glusterd_defrag_info_t;
+
+typedef enum gf_transport_type_ {
+ GF_TRANSPORT_TCP, //DEFAULT
+ GF_TRANSPORT_RDMA,
+ GF_TRANSPORT_BOTH_TCP_RDMA,
+} gf_transport_type;
+
+
+typedef enum gf_rb_status_ {
+ GF_RB_STATUS_NONE,
+ GF_RB_STATUS_STARTED,
+ GF_RB_STATUS_PAUSED,
+} gf_rb_status_t;
+
+struct _auth {
+ char *username;
+ char *password;
+};
+
+typedef struct _auth auth_t;
+
+/* Capabilities of xlator */
+#define CAPS_BD 0x00000001
+#define CAPS_THIN 0x00000002
+#define CAPS_OFFLOAD_COPY 0x00000004
+#define CAPS_OFFLOAD_SNAPSHOT 0x00000008
+#define CAPS_OFFLOAD_ZERO 0x00000020
+
+struct glusterd_bitrot_scrub_ {
+ char *scrub_state;
+ char *scrub_impact;
+ char *scrub_freq;
+ uint64_t scrubbed_files;
+ uint64_t unsigned_files;
+ uint64_t last_scrub_time;
+ uint64_t scrub_duration;
+ uint64_t error_count;
+};
+
+typedef struct glusterd_bitrot_scrub_ glusterd_bitrot_scrub_t;
+
+
+struct glusterd_rebalance_ {
+ gf_defrag_status_t defrag_status;
+ uint64_t rebalance_files;
+ uint64_t rebalance_data;
+ uint64_t lookedup_files;
+ uint64_t skipped_files;
+ glusterd_defrag_info_t *defrag;
+ gf_cli_defrag_type defrag_cmd;
+ uint64_t rebalance_failures;
+ uuid_t rebalance_id;
+ double rebalance_time;
+ glusterd_op_t op;
+ dict_t *dict; /* Dict to store misc information
+ * like list of bricks being removed */
+ uint32_t commit_hash;
+};
+
+typedef struct glusterd_rebalance_ glusterd_rebalance_t;
+
+struct glusterd_replace_brick_ {
+ glusterd_brickinfo_t *src_brick;
+ glusterd_brickinfo_t *dst_brick;
+};
+
+typedef struct glusterd_replace_brick_ glusterd_replace_brick_t;
+
+typedef enum gd_quorum_status_ {
+ NOT_APPLICABLE_QUORUM, //Does not follow quorum
+ MEETS_QUORUM, //Follows quorum and meets.
+ DOESNT_MEET_QUORUM, //Follows quorum and does not meet.
+} gd_quorum_status_t;
+
+typedef struct tier_info_ {
+ int cold_type;
+ int cold_brick_count;
+ int cold_replica_count;
+ int cold_disperse_count;
+ int cold_dist_leaf_count;
+ int cold_redundancy_count;
+ int hot_type;
+ int hot_brick_count;
+ int hot_replica_count;
+ int promoted;
+ int demoted;
+ uint16_t cur_tier_hot;
+} gd_tier_info_t;
+
+struct glusterd_volinfo_ {
+ gf_lock_t lock;
+ gf_boolean_t is_snap_volume;
+ glusterd_snap_t *snapshot;
+ uuid_t restored_from_snap;
+ gd_tier_info_t tier_info;
+ char parent_volname[GD_VOLUME_NAME_MAX];
+ /* In case of a snap volume
+ i.e (is_snap_volume == TRUE) this
+ field will contain the name of
+ the volume which is snapped. In
+ case of a non-snap volume, this
+ field will be initialized as N/A */
+ char volname[GD_VOLUME_NAME_MAX + 5];
+ /* An extra 5 bytes are allocated.
+ * Reason is, As part of the tiering
+ * volfile generation code, we are
+ * temporarily appending either hot
+ * or cold */
+ int type;
+ int brick_count;
+ uint64_t snap_count;
+ uint64_t snap_max_hard_limit;
+ struct cds_list_head vol_list;
+ /* In case of a snap volume
+ i.e (is_snap_volume == TRUE) this
+ is linked to glusterd_snap_t->volumes.
+ In case of a non-snap volume, this is
+ linked to glusterd_conf_t->volumes */
+ struct cds_list_head snapvol_list;
+ /* This is a current pointer for
+ glusterd_volinfo_t->snap_volumes */
+ struct cds_list_head bricks;
+ struct cds_list_head snap_volumes;
+ /* TODO : Need to remove this, as this
+ * is already part of snapshot object.
+ */
+ glusterd_volume_status status;
+ int sub_count; /* backward compatibility */
+ int stripe_count;
+ int replica_count;
+ int arbiter_count;
+ int disperse_count;
+ int redundancy_count;
+ int subvol_count; /* Number of subvolumes in a
+ distribute volume */
+ int dist_leaf_count; /* Number of bricks in one
+ distribute subvolume */
+ int port;
+ gf_store_handle_t *shandle;
+ gf_store_handle_t *node_state_shandle;
+ gf_store_handle_t *quota_conf_shandle;
+
+ /* Defrag/rebalance related */
+ glusterd_rebalance_t rebal;
+
+ /* Replace brick status */
+ glusterd_replace_brick_t rep_brick;
+
+ /* Bitrot scrub status*/
+ glusterd_bitrot_scrub_t bitrot_scrub;
+
+ int version;
+ uint32_t quota_conf_version;
+ uint32_t cksum;
+ uint32_t quota_conf_cksum;
+ gf_transport_type transport_type;
+
+ dict_t *dict;
+
+ uuid_t volume_id;
+ auth_t auth;
+ char *logdir;
+
+ dict_t *gsync_slaves;
+ dict_t *gsync_active_slaves;
+
+ int decommission_in_progress;
+ xlator_t *xl;
+
+ gf_boolean_t memory_accounting;
+ int caps; /* Capability */
+
+ int op_version;
+ int client_op_version;
+ pthread_mutex_t reflock;
+ int refcnt;
+ gd_quorum_status_t quorum_status;
+
+ glusterd_snapdsvc_t snapd;
+ int32_t quota_xattr_version;
+};
+
+typedef enum gd_snap_status_ {
+ GD_SNAP_STATUS_NONE,
+ GD_SNAP_STATUS_INIT,
+ GD_SNAP_STATUS_IN_USE,
+ GD_SNAP_STATUS_DECOMMISSION,
+ GD_SNAP_STATUS_UNDER_RESTORE,
+ GD_SNAP_STATUS_RESTORED,
+} gd_snap_status_t;
+
+struct glusterd_snap_ {
+ gf_lock_t lock;
+ struct cds_list_head volumes;
+ struct cds_list_head snap_list;
+ char snapname[GLUSTERD_MAX_SNAP_NAME];
+ uuid_t snap_id;
+ char *description;
+ time_t time_stamp;
+ gf_boolean_t snap_restored;
+ gd_snap_status_t snap_status;
+ gf_store_handle_t *shandle;
+};
+
+typedef struct glusterd_snap_op_ {
+ char *snap_vol_id;
+ int32_t brick_num;
+ char *brick_path;
+ int32_t op;
+ int32_t status;
+ struct cds_list_head snap_ops_list;
+} glusterd_snap_op_t;
+
+typedef struct glusterd_missed_snap_ {
+ char *node_uuid;
+ char *snap_uuid;
+ struct cds_list_head missed_snaps;
+ struct cds_list_head snap_ops;
+} glusterd_missed_snap_info;
+
+typedef enum gd_node_type_ {
+ GD_NODE_NONE,
+ GD_NODE_BRICK,
+ GD_NODE_SHD,
+ GD_NODE_REBALANCE,
+ GD_NODE_NFS,
+ GD_NODE_QUOTAD,
+ GD_NODE_SNAPD,
+ GD_NODE_BITD,
+ GD_NODE_SCRUB,
+} gd_node_type;
+
+typedef enum missed_snap_stat {
+ GD_MISSED_SNAP_NONE,
+ GD_MISSED_SNAP_PENDING,
+ GD_MISSED_SNAP_DONE,
+} missed_snap_stat;
+
+typedef struct glusterd_pending_node_ {
+ struct cds_list_head list;
+ void *node;
+ gd_node_type type;
+ int32_t index;
+} glusterd_pending_node_t;
+
+struct gsync_config_opt_vals_ {
+ char *op_name;
+ int no_of_pos_vals;
+ gf_boolean_t case_sensitive;
+ char *values[GEO_CONF_MAX_OPT_VALS];
+};
+
+enum glusterd_op_ret {
+ GLUSTERD_CONNECTION_AWAITED = 100,
+};
+
+enum glusterd_vol_comp_status_ {
+ GLUSTERD_VOL_COMP_NONE = 0,
+ GLUSTERD_VOL_COMP_SCS = 1,
+ GLUSTERD_VOL_COMP_UPDATE_REQ,
+ GLUSTERD_VOL_COMP_RJT,
+};
+
+typedef struct addrinfo_list {
+ struct cds_list_head list;
+ struct addrinfo *info;
+} addrinfo_list_t;
+
+typedef enum {
+ GF_AI_COMPARE_NO_MATCH = 0,
+ GF_AI_COMPARE_MATCH = 1,
+ GF_AI_COMPARE_ERROR = 2
+} gf_ai_compare_t;
+
+#define GLUSTERD_DEFAULT_PORT GF_DEFAULT_BASE_PORT
+#define GLUSTERD_INFO_FILE "glusterd.info"
+#define GLUSTERD_VOLUME_QUOTA_CONFIG "quota.conf"
+#define GLUSTERD_VOLUME_DIR_PREFIX "vols"
+#define GLUSTERD_PEER_DIR_PREFIX "peers"
+#define GLUSTERD_VOLUME_INFO_FILE "info"
+#define GLUSTERD_VOLUME_SNAPD_INFO_FILE "snapd.info"
+#define GLUSTERD_SNAP_INFO_FILE "info"
+#define GLUSTERD_VOLUME_RBSTATE_FILE "rbstate"
+#define GLUSTERD_BRICK_INFO_DIR "bricks"
+#define GLUSTERD_CKSUM_FILE "cksum"
+#define GLUSTERD_VOL_QUOTA_CKSUM_FILE "quota.cksum"
+#define GLUSTERD_TRASH "trash"
+#define GLUSTERD_NODE_STATE_FILE "node_state.info"
+#define GLUSTERD_MISSED_SNAPS_LIST_FILE "missed_snaps_list"
+#define GLUSTERD_VOL_SNAP_DIR_PREFIX "snaps"
+
+#define GLUSTERD_DEFAULT_SNAPS_BRICK_DIR "/gluster/snaps"
+#define GLUSTER_SHARED_STORAGE_BRICK_DIR GLUSTERD_DEFAULT_WORKDIR"/ss_brick"
+#define GLUSTERD_VAR_RUN_DIR "/var/run"
+#define GLUSTERD_RUN_DIR "/run"
+
+/* definitions related to replace brick */
+#define RB_CLIENT_MOUNTPOINT "rb_mount"
+#define RB_CLIENTVOL_FILENAME "rb_client.vol"
+#define RB_DSTBRICK_PIDFILE "rb_dst_brick.pid"
+#define RB_DSTBRICKVOL_FILENAME "rb_dst_brick.vol"
+#define RB_PUMP_DEF_ARG "default"
+
+#define GLUSTERD_UUID_LEN 50
+
+typedef ssize_t (*gd_serialize_t) (struct iovec outmsg, void *args);
+
+#define GLUSTERD_GET_VOLUME_DIR(path, volinfo, priv) \
+ if (volinfo->is_snap_volume) { \
+ snprintf (path, PATH_MAX, "%s/snaps/%s/%s", priv->workdir, \
+ volinfo->snapshot->snapname, volinfo->volname); \
+ } else { \
+ snprintf (path, PATH_MAX, "%s/vols/%s", priv->workdir, \
+ volinfo->volname); \
+ }
+
+#define GLUSTERD_GET_SNAP_DIR(path, snap, priv) \
+ snprintf (path, PATH_MAX, "%s/snaps/%s", priv->workdir, \
+ snap->snapname);
+
+#define GLUSTERD_GET_SNAP_GEO_REP_DIR(path, snap, priv) \
+ snprintf (path, PATH_MAX, "%s/snaps/%s/%s", priv->workdir, \
+ snap->snapname, GEOREP);
+
+#define GLUSTERD_GET_BRICK_DIR(path, volinfo, priv) \
+ if (volinfo->is_snap_volume) { \
+ snprintf (path, PATH_MAX, "%s/snaps/%s/%s/%s", priv->workdir, \
+ volinfo->snapshot->snapname, volinfo->volname, \
+ GLUSTERD_BRICK_INFO_DIR); \
+ } else { \
+ snprintf (path, PATH_MAX, "%s/%s/%s/%s", priv->workdir, \
+ GLUSTERD_VOLUME_DIR_PREFIX, volinfo->volname, \
+ GLUSTERD_BRICK_INFO_DIR); \
+ }
+
+#define GLUSTERD_GET_NFS_DIR(path, priv) \
+ snprintf (path, PATH_MAX, "%s/nfs", priv->workdir);
+
+#define GLUSTERD_GET_QUOTAD_DIR(path, priv) \
+ snprintf (path, PATH_MAX, "%s/quotad", priv->workdir);
+
+#define GLUSTERD_GET_QUOTA_AUX_MOUNT_PATH(abspath, volname, path) \
+ snprintf (abspath, sizeof (abspath)-1, \
+ DEFAULT_VAR_RUN_DIRECTORY"/%s%s", volname, path);
+
+#define GLUSTERD_GET_TMP_PATH(abspath, path) do { \
+ snprintf (abspath, sizeof (abspath)-1, \
+ DEFAULT_VAR_RUN_DIRECTORY"/tmp%s", path); \
+ } while (0)
+
+#define GLUSTERD_REMOVE_SLASH_FROM_PATH(path,string) do { \
+ int i = 0; \
+ for (i = 1; i < strlen (path); i++) { \
+ string[i-1] = path[i]; \
+ if (string[i-1] == '/' && (i != strlen(path) - 1)) \
+ string[i-1] = '-'; \
+ } \
+ } while (0)
+
+#define GLUSTERD_GET_BRICK_PIDFILE(pidfile,volinfo,brickinfo, priv) do { \
+ char exp_path[PATH_MAX] = {0,}; \
+ char volpath[PATH_MAX] = {0,}; \
+ GLUSTERD_GET_VOLUME_DIR (volpath, volinfo, priv); \
+ GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, exp_path); \
+ snprintf (pidfile, PATH_MAX, "%s/run/%s-%s.pid", \
+ volpath, brickinfo->hostname, exp_path); \
+ } while (0)
+
+#define GLUSTERD_GET_NFS_PIDFILE(pidfile,nfspath) { \
+ snprintf (pidfile, PATH_MAX, "%s/run/nfs.pid", \
+ nfspath); \
+ }
+
+#define GLUSTERD_GET_QUOTAD_PIDFILE(pidfile,quotadpath) { \
+ snprintf (pidfile, PATH_MAX, "%s/run/quotad.pid", \
+ quotadpath); \
+ }
+
+#define GLUSTERD_GET_QUOTA_CRAWL_PIDDIR(piddir, volinfo, type) do { \
+ char _volpath[PATH_MAX] = {0,}; \
+ GLUSTERD_GET_VOLUME_DIR (_volpath, volinfo, priv); \
+ if (type == GF_QUOTA_OPTION_TYPE_ENABLE || \
+ type == GF_QUOTA_OPTION_TYPE_ENABLE_OBJECTS) \
+ snprintf (piddir, PATH_MAX, "%s/run/quota/enable", \
+ _volpath); \
+ else \
+ snprintf (piddir, PATH_MAX, "%s/run/quota/disable", \
+ _volpath); \
+ } while (0)
+
+#define GLUSTERD_STACK_DESTROY(frame) do {\
+ frame->local = NULL; \
+ STACK_DESTROY (frame->root); \
+ } while (0)
+
+#define GLUSTERD_GET_DEFRAG_PROCESS(path, volinfo) do { \
+ if (volinfo->rebal.defrag_cmd == GF_DEFRAG_CMD_START_TIER) \
+ snprintf (path, NAME_MAX, "tier"); \
+ else \
+ snprintf (path, NAME_MAX, "rebalance"); \
+ } while (0)
+
+#define GLUSTERD_GET_DEFRAG_DIR(path, volinfo, priv) do { \
+ char vol_path[PATH_MAX]; \
+ char operation[NAME_MAX]; \
+ GLUSTERD_GET_VOLUME_DIR(vol_path, volinfo, priv); \
+ GLUSTERD_GET_DEFRAG_PROCESS(operation, volinfo); \
+ snprintf (path, PATH_MAX, "%s/%s", vol_path, operation);\
+ } while (0)
+
+#define GLUSTERD_GET_DEFRAG_SOCK_FILE_OLD(path, volinfo, priv) do { \
+ char defrag_path[PATH_MAX]; \
+ GLUSTERD_GET_DEFRAG_DIR(defrag_path, volinfo, priv); \
+ snprintf (path, PATH_MAX, "%s/%s.sock", defrag_path, \
+ uuid_utoa(MY_UUID)); \
+ } while (0)
+
+#define GLUSTERD_GET_DEFRAG_SOCK_FILE(path, volinfo) do { \
+ char operation[NAME_MAX]; \
+ GLUSTERD_GET_DEFRAG_PROCESS(operation, volinfo); \
+ snprintf (path, UNIX_PATH_MAX, DEFAULT_VAR_RUN_DIRECTORY \
+ "/gluster-%s-%s.sock", operation, \
+ uuid_utoa(volinfo->volume_id)); \
+ } while (0)
+
+#define GLUSTERD_GET_DEFRAG_PID_FILE(path, volinfo, priv) do { \
+ char defrag_path[PATH_MAX]; \
+ GLUSTERD_GET_DEFRAG_DIR(defrag_path, volinfo, priv); \
+ snprintf (path, PATH_MAX, "%s/%s.pid", defrag_path, \
+ uuid_utoa(MY_UUID)); \
+ } while (0)
+
+#define GLUSTERFS_GET_AUX_MOUNT_PIDFILE(pidfile, volname) { \
+ snprintf (pidfile, PATH_MAX-1, \
+ DEFAULT_VAR_RUN_DIRECTORY"/%s.pid", volname); \
+ }
+
+#define GLUSTERD_GET_UUID_NOHYPHEN(ret_string, uuid) do { \
+ char *snap_volname_ptr = ret_string; \
+ char *snap_volid_ptr = uuid_utoa(uuid); \
+ while (*snap_volid_ptr) { \
+ if (*snap_volid_ptr == '-') { \
+ snap_volid_ptr++; \
+ } else { \
+ (*snap_volname_ptr++) = \
+ (*snap_volid_ptr++); \
+ } \
+ } \
+ *snap_volname_ptr = '\0'; \
+ } while (0)
+
+#define GLUSTERD_DUMP_PEERS(head, member, xpeers) do { \
+ glusterd_peerinfo_t *_peerinfo = NULL; \
+ char subkey[50] = {0,}; \
+ int index = 1; \
+ char key[GF_DUMP_MAX_BUF_LEN] = {0,}; \
+ \
+ if (!xpeers) \
+ snprintf (key, sizeof (key), "glusterd.peer"); \
+ else \
+ snprintf (key, sizeof (key), \
+ "glusterd.xaction_peer"); \
+ \
+ rcu_read_lock (); \
+ cds_list_for_each_entry_rcu (_peerinfo, head, member) { \
+ glusterd_dump_peer (_peerinfo, key, index, xpeers); \
+ if (!xpeers) \
+ glusterd_dump_peer_rpcstat (_peerinfo, key, \
+ index); \
+ index++; \
+ } \
+ rcu_read_unlock (); \
+ \
+ } while (0)
+
+int glusterd_uuid_init();
+
+int glusterd_uuid_generate_save ();
+
+#define MY_UUID (__glusterd_uuid())
+
+static inline unsigned char *
+__glusterd_uuid()
+{
+ glusterd_conf_t *priv = THIS->private;
+
+ if (gf_uuid_is_null (priv->uuid))
+ glusterd_uuid_init();
+ return &priv->uuid[0];
+}
+
+int glusterd_big_locked_notify (struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event,
+ void *data, rpc_clnt_notify_t notify_fn);
+
+int
+glusterd_big_locked_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe, fop_cbk_fn_t fn);
+
+int glusterd_big_locked_handler (rpcsvc_request_t *req, rpcsvc_actor actor_fn);
+
+int32_t
+glusterd_brick_from_brickinfo (glusterd_brickinfo_t *brickinfo,
+ char **new_brick);
+int
+glusterd_probe_begin (rpcsvc_request_t *req, const char *hoststr, int port,
+ dict_t *dict, int *op_errno);
+
+int
+glusterd_xfer_friend_add_resp (rpcsvc_request_t *req, char *myhostname,
+ char *remote_hostname, int port, int32_t op_ret,
+ int32_t op_errno);
+
+int
+glusterd_friend_add (const char *hoststr, int port,
+ glusterd_friend_sm_state_t state,
+ uuid_t *uuid, glusterd_peerinfo_t **friend,
+ gf_boolean_t restore, glusterd_peerctx_args_t *args);
+
+int
+glusterd_friend_add_from_peerinfo (glusterd_peerinfo_t *friend,
+ gf_boolean_t restore,
+ glusterd_peerctx_args_t *args);
+int
+glusterd_friend_rpc_create (xlator_t *this, glusterd_peerinfo_t *peerinfo,
+ glusterd_peerctx_args_t *args);
+int
+glusterd_friend_remove (uuid_t uuid, char *hostname);
+
+int
+glusterd_op_lock_send_resp (rpcsvc_request_t *req, int32_t status);
+
+int
+glusterd_op_unlock_send_resp (rpcsvc_request_t *req, int32_t status);
+
+int
+glusterd_op_mgmt_v3_lock_send_resp (rpcsvc_request_t *req,
+ uuid_t *txn_id, int32_t status);
+
+int
+glusterd_op_mgmt_v3_unlock_send_resp (rpcsvc_request_t *req,
+ uuid_t *txn_id, int32_t status);
+
+int
+glusterd_op_stage_send_resp (rpcsvc_request_t *req,
+ int32_t op, int32_t status,
+ char *op_errstr, dict_t *rsp_dict);
+
+int
+glusterd_op_commmit_send_resp (rpcsvc_request_t *req,
+ int32_t op, int32_t status);
+
+int32_t
+glusterd_create_volume (rpcsvc_request_t *req, dict_t *dict);
+
+int
+glusterd_handle_incoming_friend_req (rpcsvc_request_t *req);
+
+int
+glusterd_handle_probe_query (rpcsvc_request_t *req);
+
+int
+glusterd_handle_cluster_lock (rpcsvc_request_t *req);
+
+int
+glusterd_handle_cluster_unlock (rpcsvc_request_t *req);
+
+int
+glusterd_handle_stage_op (rpcsvc_request_t *req);
+
+int
+glusterd_handle_commit_op (rpcsvc_request_t *req);
+
+int
+glusterd_handle_cli_probe (rpcsvc_request_t *req);
+
+int
+glusterd_handle_create_volume (rpcsvc_request_t *req);
+
+int
+glusterd_handle_defrag_volume (rpcsvc_request_t *req);
+
+int
+glusterd_handle_defrag_volume_v2 (rpcsvc_request_t *req);
+
+int
+glusterd_xfer_cli_probe_resp (rpcsvc_request_t *req, int32_t op_ret,
+ int32_t op_errno, char *op_errstr, char *hostname,
+ int port, dict_t *dict);
+
+int
+glusterd_op_commit_send_resp (rpcsvc_request_t *req,
+ int32_t op, int32_t status, char *op_errstr,
+ dict_t *rsp_dict);
+
+int
+glusterd_xfer_friend_remove_resp (rpcsvc_request_t *req, char *hostname, int port);
+
+int
+glusterd_deprobe_begin (rpcsvc_request_t *req, const char *hoststr, int port,
+ uuid_t uuid, dict_t *dict, int *op_errno);
+
+int
+glusterd_handle_cli_deprobe (rpcsvc_request_t *req);
+
+int
+glusterd_handle_incoming_unfriend_req (rpcsvc_request_t *req);
+
+int32_t
+glusterd_list_friends (rpcsvc_request_t *req, dict_t *dict, int32_t flags);
+
+int
+glusterd_handle_cli_list_friends (rpcsvc_request_t *req);
+
+int
+glusterd_handle_cli_start_volume (rpcsvc_request_t *req);
+
+int
+glusterd_handle_friend_update (rpcsvc_request_t *req);
+
+int
+glusterd_handle_cli_stop_volume (rpcsvc_request_t *req);
+
+int
+glusterd_handle_cli_delete_volume (rpcsvc_request_t *req);
+
+int
+glusterd_handle_cli_get_volume (rpcsvc_request_t *req);
+
+int32_t
+glusterd_get_volumes (rpcsvc_request_t *req, dict_t *dict, int32_t flags);
+
+int
+glusterd_handle_add_brick (rpcsvc_request_t *req);
+
+int
+glusterd_handle_attach_tier (rpcsvc_request_t *req);
+
+int
+glusterd_handle_detach_tier (rpcsvc_request_t *req);
+
+int
+glusterd_handle_replace_brick (rpcsvc_request_t *req);
+
+int
+glusterd_handle_remove_brick (rpcsvc_request_t *req);
+
+int
+glusterd_handle_log_rotate (rpcsvc_request_t *req);
+
+int
+glusterd_handle_sync_volume (rpcsvc_request_t *req);
+
+int32_t
+glusterd_log_filename (rpcsvc_request_t *req, dict_t *dict);
+
+int32_t
+glusterd_log_rotate (rpcsvc_request_t *req, dict_t *dict);
+
+int32_t
+glusterd_remove_brick (rpcsvc_request_t *req, dict_t *dict);
+
+int32_t
+glusterd_set_volume (rpcsvc_request_t *req, dict_t *dict);
+
+int32_t
+glusterd_reset_volume (rpcsvc_request_t *req, dict_t *dict);
+
+int32_t
+glusterd_gsync_set (rpcsvc_request_t *req, dict_t *dict);
+
+int32_t
+glusterd_quota (rpcsvc_request_t *req, dict_t *dict);
+
+int
+glusterd_handle_set_volume (rpcsvc_request_t *req);
+
+int
+glusterd_handle_reset_volume (rpcsvc_request_t *req);
+
+int
+glusterd_handle_copy_file (rpcsvc_request_t *req);
+
+int
+glusterd_handle_sys_exec (rpcsvc_request_t *req);
+
+int
+glusterd_handle_gsync_set (rpcsvc_request_t *req);
+
+int
+glusterd_handle_quota (rpcsvc_request_t *req);
+
+int
+glusterd_handle_bitrot (rpcsvc_request_t *req);
+
+int
+glusterd_handle_fsm_log (rpcsvc_request_t *req);
+
+int
+glusterd_xfer_cli_deprobe_resp (rpcsvc_request_t *req, int32_t op_ret,
+ int32_t op_errno, char *op_errstr,
+ char *hostname, dict_t *dict);
+
+int
+glusterd_fetchspec_notify (xlator_t *this);
+
+int
+glusterd_fetchsnap_notify (xlator_t *this);
+
+int
+glusterd_add_tier_volume_detail_to_dict (glusterd_volinfo_t *volinfo,
+ dict_t *volumes, int count);
+
+int
+glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo,
+ dict_t *volumes, int count);
+
+int
+glusterd_restart_bricks (glusterd_conf_t *conf);
+
+int32_t
+glusterd_volume_txn (rpcsvc_request_t *req, char *volname, int flags,
+ glusterd_op_t op);
+
+int
+glusterd_peer_dump_version (xlator_t *this, struct rpc_clnt *rpc,
+ glusterd_peerctx_t *peerctx);
+
+int
+glusterd_validate_reconfopts (glusterd_volinfo_t *volinfo, dict_t *val_dict, char **op_errstr);
+int
+glusterd_handle_cli_profile_volume (rpcsvc_request_t *req);
+
+int
+glusterd_handle_getwd (rpcsvc_request_t *req);
+
+int32_t
+glusterd_set_volume (rpcsvc_request_t *req, dict_t *dict);
+int
+glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event,
+ void *data);
+int
+glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data);
+
+int
+glusterd_rpc_create (struct rpc_clnt **rpc, dict_t *options,
+ rpc_clnt_notify_t notify_fn, void *notify_data);
+
+
+/* handler functions */
+int32_t glusterd_op_begin (rpcsvc_request_t *req, glusterd_op_t op, void *ctx,
+ char *err_str, size_t size);
+
+/* removed other definitions as they have been defined elsewhere in this file*/
+
+int glusterd_handle_cli_statedump_volume (rpcsvc_request_t *req);
+int glusterd_handle_cli_clearlocks_volume (rpcsvc_request_t *req);
+
+int glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
+ size_t len, int cmd, defrag_cbk_fn_t cbk,
+ glusterd_op_t op);
+int
+glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo,
+ gf_boolean_t reconnect);
+
+int glusterd_rebalance_defrag_init (glusterd_volinfo_t *volinfo,
+ defrag_cbk_fn_t cbk);
+
+int glusterd_handle_cli_heal_volume (rpcsvc_request_t *req);
+
+int glusterd_handle_cli_list_volume (rpcsvc_request_t *req);
+
+int
+glusterd_handle_snapshot (rpcsvc_request_t *req);
+
+/* op-sm functions */
+int glusterd_op_stage_heal_volume (dict_t *dict, char **op_errstr);
+int glusterd_op_heal_volume (dict_t *dict, char **op_errstr);
+int glusterd_op_stage_gsync_set (dict_t *dict, char **op_errstr);
+int glusterd_op_gsync_set (dict_t *dict, char **op_errstr, dict_t *rsp_dict);
+int glusterd_op_stage_copy_file (dict_t *dict, char **op_errstr);
+int glusterd_op_copy_file (dict_t *dict, char **op_errstr);
+int glusterd_op_stage_sys_exec (dict_t *dict, char **op_errstr);
+int glusterd_op_sys_exec (dict_t *dict, char **op_errstr, dict_t *rsp_dict);
+int glusterd_op_stage_gsync_create (dict_t *dict, char **op_errstr);
+int glusterd_op_gsync_create (dict_t *dict, char **op_errstr, dict_t *rsp_dict);
+int glusterd_op_quota (dict_t *dict, char **op_errstr, dict_t *rsp_dict);
+
+int glusterd_op_bitrot (dict_t *dict, char **op_errstr, dict_t *rsp_dict);
+
+int glusterd_op_stage_quota (dict_t *dict, char **op_errstr, dict_t *rsp_dict);
+
+int glusterd_op_stage_bitrot (dict_t *dict, char **op_errstr, dict_t *rsp_dict);
+
+int glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict);
+int glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict);
+int glusterd_op_log_rotate (dict_t *dict);
+int glusterd_op_stage_log_rotate (dict_t *dict, char **op_errstr);
+int glusterd_op_stage_create_volume (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict);
+int glusterd_op_stage_start_volume (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict);
+int glusterd_op_stage_stop_volume (dict_t *dict, char **op_errstr);
+int glusterd_op_stage_delete_volume (dict_t *dict, char **op_errstr);
+int glusterd_op_create_volume (dict_t *dict, char **op_errstr);
+int glusterd_op_start_volume (dict_t *dict, char **op_errstr);
+int glusterd_op_stop_volume (dict_t *dict);
+int glusterd_op_delete_volume (dict_t *dict);
+int glusterd_handle_ganesha_op (dict_t *dict, char **op_errstr,
+ char *key, char *value);
+int glusterd_check_ganesha_cmd (char *key, char *value,
+ char **errstr, dict_t *dict);
+int glusterd_op_stage_set_ganesha (dict_t *dict, char **op_errstr);
+int glusterd_op_set_ganesha (dict_t *dict, char **errstr);
+int ganesha_manage_export (char *volname, char *value, char **op_errstr,
+ gf_boolean_t reboot);
+gf_boolean_t glusterd_check_ganesha_export (glusterd_volinfo_t *volinfo);
+int stop_ganesha (char **op_errstr);
+int tear_down_cluster (void);
+int glusterd_op_add_brick (dict_t *dict, char **op_errstr);
+int glusterd_op_remove_brick (dict_t *dict, char **op_errstr);
+int glusterd_op_stage_add_brick (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict);
+int glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr);
+
+int glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr);
+int glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict);
+
+int glusterd_op_stage_statedump_volume (dict_t *dict, char **op_errstr);
+int glusterd_op_statedump_volume (dict_t *dict, char **op_errstr);
+
+int glusterd_op_stage_clearlocks_volume (dict_t *dict, char **op_errstr);
+int glusterd_op_clearlocks_volume (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict);
+
+
+int glusterd_op_stage_barrier (dict_t *dict, char **op_errstr);
+int glusterd_op_barrier (dict_t *dict, char **op_errstr);
+
+/* misc */
+int glusterd_op_perform_remove_brick (glusterd_volinfo_t *volinfo, char *brick,
+ int force, int *need_migrate);
+int glusterd_op_stop_volume_args_get (dict_t *dict, char** volname, int *flags);
+int glusterd_op_statedump_volume_args_get (dict_t *dict, char **volname,
+ char **options, int *option_cnt);
+
+int glusterd_op_gsync_args_get (dict_t *dict, char **op_errstr,
+ char **master, char **slave, char **host_uuid);
+
+int glusterd_start_volume (glusterd_volinfo_t *volinfo, int flags,
+ gf_boolean_t wait);
+
+int glusterd_stop_volume (glusterd_volinfo_t *volinfo);
+
+/* Synctask part */
+int32_t glusterd_op_begin_synctask (rpcsvc_request_t *req, glusterd_op_t op,
+ void *dict);
+int32_t
+glusterd_defrag_event_notify_handle (dict_t *dict);
+
+int32_t
+glusterd_txn_opinfo_dict_init ();
+
+void
+glusterd_txn_opinfo_dict_fini ();
+
+void
+glusterd_txn_opinfo_init ();
+
+/* snapshot */
+glusterd_snap_t*
+glusterd_new_snap_object();
+
+int32_t
+glusterd_list_add_snapvol (glusterd_volinfo_t *origin_vol,
+ glusterd_volinfo_t *snap_vol);
+
+glusterd_snap_t*
+glusterd_remove_snap_by_id (uuid_t snap_id);
+
+glusterd_snap_t*
+glusterd_remove_snap_by_name (char *snap_name);
+
+glusterd_snap_t*
+glusterd_find_snap_by_name (char *snap_name);
+
+glusterd_snap_t*
+glusterd_find_snap_by_id (uuid_t snap_id);
+
+int
+glusterd_snapshot_prevalidate (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict, uint32_t *op_errno);
+int
+glusterd_snapshot_brickop (dict_t *dict, char **op_errstr, dict_t *rsp_dict);
+int
+glusterd_snapshot (dict_t *dict, char **op_errstr,
+ uint32_t *op_errno, dict_t *rsp_dict);
+int
+glusterd_snapshot_postvalidate (dict_t *dict, int32_t op_ret, char **op_errstr,
+ dict_t *rsp_dict);
+char *
+glusterd_build_snap_device_path (char *device, char *snapname,
+ int32_t brick_count);
+
+int32_t
+glusterd_snap_remove (dict_t *rsp_dict, glusterd_snap_t *snap,
+ gf_boolean_t remove_lvm, gf_boolean_t force,
+ gf_boolean_t is_clone);
+int32_t
+glusterd_snapshot_cleanup (dict_t *dict, char **op_errstr, dict_t *rsp_dict);
+
+int32_t
+glusterd_add_missed_snaps_to_list (dict_t *dict, int32_t missed_snap_count);
+
+int32_t
+glusterd_add_new_entry_to_list (char *missed_info, char *snap_vol_id,
+ int32_t brick_num, char *brick_path,
+ int32_t snap_op, int32_t snap_status);
+
+int
+glusterd_snapshot_revert_restore_from_snap (glusterd_snap_t *snap);
+
+
+int
+glusterd_add_brick_status_to_dict (dict_t *dict, glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ char *key_prefix);
+
+int32_t
+glusterd_handle_snap_limit (dict_t *dict, dict_t *rsp_dict);
+
+gf_boolean_t
+glusterd_should_i_stop_bitd ();
+
+int
+glusterd_remove_brick_migrate_cbk (glusterd_volinfo_t *volinfo,
+ gf_defrag_status_t status);
+
+#endif
diff --git a/xlators/mount/fuse/src/Makefile.am b/xlators/mount/fuse/src/Makefile.am
index 93ee00b81c8..2c0235e0927 100644
--- a/xlators/mount/fuse/src/Makefile.am
+++ b/xlators/mount/fuse/src/Makefile.am
@@ -1,6 +1,18 @@
-noinst_HEADERS = $(CONTRIBDIR)/fuse-include/fuse_kernel.h\
- $(CONTRIBDIR)/fuse-include/fuse-mount.h\
- $(CONTRIBDIR)/fuse-include/fuse-misc.h fuse-mem-types.h
+noinst_HEADERS_linux = $(CONTRIBDIR)/fuse-include/fuse_kernel.h\
+ $(CONTRIBDIR)/fuse-include/mount_util.h\
+ $(CONTRIBDIR)/fuse-lib/mount-gluster-compat.h
+noinst_HEADERS_darwin = $(CONTRIBDIR)/fuse-include/fuse_kernel_macfuse.h\
+ $(CONTRIBDIR)/macfuse/fuse_param.h\
+ $(CONTRIBDIR)/macfuse/fuse_ioctl.h
+noinst_HEADERS_common = $(CONTRIBDIR)/fuse-include/fuse-mount.h\
+ $(CONTRIBDIR)/fuse-include/fuse-misc.h fuse-mem-types.h \
+ fuse-bridge.h
+
+if GF_DARWIN_HOST_OS
+ noinst_HEADERS = $(noinst_HEADERS_common) $(noinst_HEADERS_darwin)
+else
+ noinst_HEADERS = $(noinst_HEADERS_common) $(noinst_HEADERS_linux)
+endif
xlator_LTLIBRARIES = fuse.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/mount
@@ -8,17 +20,19 @@ xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/mount
if GF_DARWIN_HOST_OS
mount_source=$(CONTRIBDIR)/macfuse/mount_darwin.c
else
- mount_source=$(CONTRIBDIR)/fuse-lib/mount.c
+ mount_source=$(CONTRIBDIR)/fuse-lib/mount.c $(CONTRIBDIR)/fuse-lib/mount-common.c
endif
-fuse_la_SOURCES = fuse-bridge.c $(CONTRIBDIR)/fuse-lib/misc.c \
- $(mount_source)
-fuse_la_LDFLAGS = -module -avoidversion -shared -nostartfiles
+fuse_la_SOURCES = fuse-helpers.c fuse-resolve.c fuse-bridge.c \
+ $(CONTRIBDIR)/fuse-lib/misc.c $(mount_source)
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS) -Wall \
- -I$(top_srcdir)/libglusterfs/src -I$(CONTRIBDIR)/fuse-include \
- $(GF_CFLAGS) $(GF_FUSE_CFLAGS)
+fuse_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+fuse_la_LIBADD = $(GF_LDADD) @GF_FUSE_LDADD@
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
+ -I$(top_srcdir)/libglusterfs/src -I$(CONTRIBDIR)/fuse-include \
+ -I$(CONTRIBDIR)/fuse-lib $(GF_FUSE_CFLAGS)
-CLEANFILES =
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+CLEANFILES =
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
index 096ca93df58..eead33fbd55 100644
--- a/xlators/mount/fuse/src/fuse-bridge.c
+++ b/xlators/mount/fuse/src/fuse-bridge.c
@@ -1,313 +1,136 @@
/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-/*
- * TODO:
- * Need to free_state() when fuse_reply_err() + return.
- * Check loc->path for "" after fuse_loc_fill in all fops
- * (now being done in getattr, lookup) or better - make
- * fuse_loc_fill() and inode_path() return success/failure.
- */
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
-#include <stdint.h>
-#include <signal.h>
-#include <pthread.h>
-#include <stddef.h>
-#include <dirent.h>
-#include <sys/mount.h>
-#include <sys/time.h>
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif /* _CONFIG_H */
-
+#include <sys/wait.h>
+#include "fuse-bridge.h"
+#include "mount-gluster-compat.h"
#include "glusterfs.h"
-#include "logging.h"
-#include "xlator.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "statedump.h"
-
-#ifdef GF_DARWIN_HOST_OS
-/* This is MacFUSE's marker for MacFUSE-specific code */
-#define __FreeBSD__ 10
-#include "fuse_kernel_macfuse.h"
-#else
-#include "fuse_kernel.h"
-#endif
-#include "fuse-misc.h"
-#include "fuse-mount.h"
-#include "fuse-mem-types.h"
-
-#include "list.h"
-#include "dict.h"
-
-/* TODO: when supporting posix acl, remove this definition */
-#define DISABLE_POSIX_ACL
-
-#define ZR_MOUNTPOINT_OPT "mountpoint"
-#define ZR_DIRECT_IO_OPT "direct-io-mode"
-#define ZR_STRICT_VOLFILE_CHECK "strict-volfile-check"
+#include "byte-order.h"
+#include "compat-errno.h"
+#include "glusterfs-acl.h"
+#include "syscall.h"
-#ifdef GF_LINUX_HOST_OS
-#define FUSE_OP_HIGH (FUSE_POLL + 1)
+#ifdef __NetBSD__
+#undef open /* in perfuse.h, pulled from mount-gluster-compat.h */
#endif
-#ifdef GF_DARWIN_HOST_OS
-#define FUSE_OP_HIGH (FUSE_DESTROY + 1)
-#endif
-#define GLUSTERFS_XATTR_LEN_MAX 65536
-
-#define MAX_FUSE_PROC_DELAY 1
static int gf_fuse_conn_err_log;
static int gf_fuse_xattr_enotsup_log;
-typedef struct fuse_in_header fuse_in_header_t;
-typedef void (fuse_handler_t) (xlator_t *this, fuse_in_header_t *finh,
- void *msg);
-
-struct fuse_private {
- int fd;
- uint32_t proto_minor;
- char *volfile;
- size_t volfile_size;
- char *mount_point;
- struct iobuf *iobuf;
-
- pthread_t fuse_thread;
- char fuse_thread_started;
-
- uint32_t direct_io_mode;
- size_t *msg0_len_p;
-
- double entry_timeout;
- double attribute_timeout;
+void fini (xlator_t *this_xl);
- pthread_cond_t sync_cond;
- pthread_mutex_t sync_mutex;
- char child_up;
-
- char init_recvd;
-
- gf_boolean_t strict_volfile_check;
-
- fuse_handler_t **fuse_ops;
- fuse_handler_t **fuse_ops0;
- pthread_mutex_t fuse_dump_mutex;
- int fuse_dump_fd;
-
- glusterfs_graph_t *next_graph;
- xlator_t *active_subvol;
-};
-typedef struct fuse_private fuse_private_t;
-
-#define _FH_TO_FD(fh) ((fd_t *)(uintptr_t)(fh))
-
-#define FH_TO_FD(fh) ((_FH_TO_FD (fh))?(fd_ref (_FH_TO_FD (fh))):((fd_t *) 0))
-
-#define FUSE_FOP(state, ret, op_num, fop, args ...) \
- do { \
- call_frame_t *frame = NULL; \
- xlator_t *xl = NULL; \
- \
- frame = get_call_frame_for_req (state); \
- if (!frame) { \
- /* This is not completely clean, as some \
- * earlier allocations might remain unfreed \
- * if we return at this point, but still \
- * better than trying to go on with a NULL \
- * frame ... \
- */ \
- gf_log ("glusterfs-fuse", \
- GF_LOG_ERROR, \
- "FUSE message" \
- " unique %"PRIu64" opcode %d:" \
- " frame allocation failed", \
- state->finh->unique, \
- state->finh->opcode); \
- free_state (state); \
- return; \
- } \
- xl = fuse_state_subvol (state); \
- \
- frame->root->state = state; \
- frame->root->op = op_num; \
- frame->op = op_num; \
- STACK_WIND (frame, ret, xl, xl->fops->fop, args); \
- } while (0)
-
-#define GF_SELECT_LOG_LEVEL(_errno) \
- (((_errno == ENOENT) || (_errno == ESTALE))? \
- GF_LOG_DEBUG)
-
-#define GET_STATE(this, finh, state) \
- do { \
- state = get_state (this, finh); \
- if (!state) { \
- gf_log ("glusterfs-fuse", \
- GF_LOG_ERROR, \
- "FUSE message unique %"PRIu64" opcode %d:" \
- " state allocation failed", \
- finh->unique, finh->opcode); \
- \
- send_fuse_err (this, finh, ENOMEM); \
- GF_FREE (finh); \
- \
- return; \
- } \
- } while (0)
-
-
-typedef struct {
- void *pool;
- xlator_t *this;
- inode_table_t *itable;
- loc_t loc;
- loc_t loc2;
- fuse_in_header_t *finh;
- int32_t flags;
- off_t off;
- size_t size;
- unsigned long nlookup;
- fd_t *fd;
- dict_t *dict;
- char *name;
- char is_revalidate;
- gf_boolean_t truncate_needed;
- gf_lock_t lock;
- uint64_t lk_owner;
-} fuse_state_t;
+static void fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino);
+/*
+ * Send an invalidate notification up to fuse to purge the file from local
+ * page cache.
+ */
-xlator_t *
-fuse_state_subvol (fuse_state_t *state)
+static int32_t
+fuse_invalidate(xlator_t *this, inode_t *inode)
{
- xlator_t *subvol = NULL;
+ fuse_private_t *priv = this->private;
+ uint64_t nodeid;
- if (!state)
- return NULL;
-
- if (state->loc.inode)
- subvol = state->loc.inode->table->xl;
+ /*
+ * NOTE: We only invalidate at the moment if fopen_keep_cache is
+ * enabled because otherwise this is a departure from default
+ * behavior. Specifically, the performance/write-behind xlator
+ * causes unconditional invalidations on write requests.
+ */
+ if (!priv->fopen_keep_cache)
+ return 0;
- if (state->fd)
- subvol = state->fd->inode->table->xl;
+ nodeid = inode_to_fuse_nodeid(inode);
+ gf_log(this->name, GF_LOG_DEBUG, "Invalidate inode id %"GF_PRI_INODE"." , nodeid);
+ fuse_log_eh (this, "Sending invalidate inode id: %"GF_PRI_INODE" gfid: %s", nodeid,
+ uuid_utoa (inode->gfid));
+ fuse_invalidate_inode(this, nodeid);
- return subvol;
+ return 0;
}
-
-xlator_t *
-fuse_active_subvol (xlator_t *fuse)
+static int32_t
+fuse_forget_cbk (xlator_t *this, inode_t *inode)
{
- fuse_private_t *priv = NULL;
-
- priv = fuse->private;
-
- return priv->active_subvol;
+ //Nothing to free in inode ctx, hence return.
+ return 0;
}
-
-static void
-free_state (fuse_state_t *state)
+fuse_fd_ctx_t *
+__fuse_fd_ctx_check_n_create (xlator_t *this, fd_t *fd)
{
- loc_wipe (&state->loc);
+ uint64_t val = 0;
+ int32_t ret = 0;
+ fuse_fd_ctx_t *fd_ctx = NULL;
- loc_wipe (&state->loc2);
+ ret = __fd_ctx_get (fd, this, &val);
- if (state->dict) {
- dict_unref (state->dict);
- state->dict = (void *)0xaaaaeeee;
- }
- if (state->name) {
- GF_FREE (state->name);
- state->name = NULL;
- }
- if (state->fd) {
- fd_unref (state->fd);
- state->fd = (void *)0xfdfdfdfd;
- }
- if (state->finh) {
- GF_FREE (state->finh);
- state->finh = NULL;
+ fd_ctx = (fuse_fd_ctx_t *)(unsigned long) val;
+
+ if (fd_ctx == NULL) {
+ fd_ctx = GF_CALLOC (1, sizeof (*fd_ctx),
+ gf_fuse_mt_fd_ctx_t);
+ if (!fd_ctx) {
+ goto out;
+ }
+ ret = __fd_ctx_set (fd, this,
+ (uint64_t)(unsigned long)fd_ctx);
+ if (ret < 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "fd-ctx-set failed");
+ GF_FREE (fd_ctx);
+ fd_ctx = NULL;
+ }
}
-#ifdef DEBUG
- memset (state, 0x90, sizeof (*state));
-#endif
- GF_FREE (state);
- state = NULL;
+out:
+ return fd_ctx;
}
-
-fuse_state_t *
-get_state (xlator_t *this, fuse_in_header_t *finh)
+fuse_fd_ctx_t *
+fuse_fd_ctx_check_n_create (xlator_t *this, fd_t *fd)
{
- fuse_state_t *state = NULL;
+ fuse_fd_ctx_t *fd_ctx = NULL;
- state = (void *)GF_CALLOC (1, sizeof (*state),
- gf_fuse_mt_fuse_state_t);
- if (!state)
- return NULL;
- state->pool = this->ctx->pool;
- state->finh = finh;
- state->this = this;
+ if ((fd == NULL) || (this == NULL)) {
+ goto out;
+ }
- LOCK_INIT (&state->lock);
+ LOCK (&fd->lock);
+ {
+ fd_ctx = __fuse_fd_ctx_check_n_create (this, fd);
+ }
+ UNLOCK (&fd->lock);
- return state;
+out:
+ return fd_ctx;
}
-
-static call_frame_t *
-get_call_frame_for_req (fuse_state_t *state)
+fuse_fd_ctx_t *
+fuse_fd_ctx_get (xlator_t *this, fd_t *fd)
{
- call_pool_t *pool = NULL;
- fuse_in_header_t *finh = NULL;
- call_frame_t *frame = NULL;
- xlator_t *this = NULL;
- fuse_private_t *priv = NULL;
+ fuse_fd_ctx_t *fdctx = NULL;
+ uint64_t value = 0;
+ int ret = 0;
- pool = state->pool;
- finh = state->finh;
- this = state->this;
- priv = this->private;
-
- frame = create_frame (this, pool);
- if (!frame)
- return NULL;
-
- if (finh) {
- frame->root->uid = finh->uid;
- frame->root->gid = finh->gid;
- frame->root->pid = finh->pid;
- frame->root->lk_owner = state->lk_owner;
- frame->root->unique = finh->unique;
+ ret = fd_ctx_get (fd, this, &value);
+ if (ret < 0) {
+ goto out;
}
- frame->root->type = GF_OP_TYPE_FOP;
+ fdctx = (fuse_fd_ctx_t *) (unsigned long)value;
- return frame;
+out:
+ return fdctx;
}
-
/*
* iov_out should contain a fuse_out_header at zeroth position.
* The error value of this header is sent to kernel.
@@ -320,6 +143,10 @@ send_fuse_iov (xlator_t *this, fuse_in_header_t *finh, struct iovec *iov_out,
struct fuse_out_header *fouh = NULL;
int res, i;
+ if (!this || !finh || !iov_out) {
+ gf_log ("send_fuse_iov", GF_LOG_ERROR,"Invalid arguments");
+ return EINVAL;
+ }
priv = this->private;
fouh = iov_out[0].iov_base;
@@ -329,7 +156,9 @@ send_fuse_iov (xlator_t *this, fuse_in_header_t *finh, struct iovec *iov_out,
fouh->len += iov_out[i].iov_len;
fouh->unique = finh->unique;
- res = writev (priv->fd, iov_out, count);
+ res = sys_writev (priv->fd, iov_out, count);
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE, "writev() result %d/%d %s",
+ res, fouh->len, res == -1 ? strerror (errno) : "");
if (res == -1)
return errno;
@@ -340,9 +169,9 @@ send_fuse_iov (xlator_t *this, fuse_in_header_t *finh, struct iovec *iov_out,
char w = 'W';
pthread_mutex_lock (&priv->fuse_dump_mutex);
- res = write (priv->fuse_dump_fd, &w, 1);
+ res = sys_write (priv->fuse_dump_fd, &w, 1);
if (res != -1)
- res = writev (priv->fuse_dump_fd, iov_out, count);
+ res = sys_writev (priv->fuse_dump_fd, iov_out, count);
pthread_mutex_unlock (&priv->fuse_dump_mutex);
if (res == -1)
@@ -359,195 +188,243 @@ send_fuse_data (xlator_t *this, fuse_in_header_t *finh, void *data, size_t size)
{
struct fuse_out_header fouh = {0, };
struct iovec iov_out[2];
+ int ret = 0;
fouh.error = 0;
iov_out[0].iov_base = &fouh;
iov_out[1].iov_base = data;
iov_out[1].iov_len = size;
- return send_fuse_iov (this, finh, iov_out, 2);
+ ret = send_fuse_iov (this, finh, iov_out, 2);
+ if (ret != 0)
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR, "send_fuse_iov() "
+ "failed: %s", strerror (ret));
+
+ return ret;
}
#define send_fuse_obj(this, finh, obj) \
send_fuse_data (this, finh, obj, sizeof (*(obj)))
-static int
-send_fuse_err (xlator_t *this, fuse_in_header_t *finh, int error)
+
+#if FUSE_KERNEL_MINOR_VERSION >= 11
+static void
+fuse_invalidate_entry (xlator_t *this, uint64_t fuse_ino)
{
- struct fuse_out_header fouh = {0, };
- struct iovec iov_out;
+ struct fuse_out_header *fouh = NULL;
+ struct fuse_notify_inval_entry_out *fnieo = NULL;
+ fuse_private_t *priv = NULL;
+ dentry_t *dentry = NULL;
+ inode_t *inode = NULL;
+ size_t nlen = 0;
+ fuse_invalidate_node_t *node = NULL;
- fouh.error = -error;
- iov_out.iov_base = &fouh;
+ priv = this->private;
- return send_fuse_iov (this, finh, &iov_out, 1);
-}
+ if (!priv->reverse_fuse_thread_started)
+ return;
-static inode_t *
-fuse_ino_to_inode (uint64_t ino, xlator_t *fuse)
-{
- inode_t *inode = NULL;
- xlator_t *active_subvol = NULL;
+ inode = fuse_ino_to_inode(fuse_ino, this);
+ if (inode == NULL) {
+ return;
+ }
- if (ino == 1) {
- active_subvol = fuse_active_subvol (fuse);
- inode = active_subvol->itable->root;
- } else {
- inode = (inode_t *) (unsigned long) ino;
- inode_ref (inode);
+ list_for_each_entry (dentry, &inode->dentry_list, inode_list) {
+ node = GF_CALLOC (1, sizeof (*node),
+ gf_fuse_mt_invalidate_node_t);
+ if (node == NULL)
+ break;
+
+ INIT_LIST_HEAD (&node->next);
+
+ fouh = (struct fuse_out_header *)node->inval_buf;
+ fnieo = (struct fuse_notify_inval_entry_out *)(fouh + 1);
+
+ fouh->unique = 0;
+ fouh->error = FUSE_NOTIFY_INVAL_ENTRY;
+
+ nlen = strlen (dentry->name);
+ fouh->len = sizeof (*fouh) + sizeof (*fnieo) + nlen + 1;
+ fnieo->parent = inode_to_fuse_nodeid (dentry->parent);
+
+ fnieo->namelen = nlen;
+ strcpy (node->inval_buf + sizeof (*fouh) + sizeof (*fnieo),
+ dentry->name);
+
+ pthread_mutex_lock (&priv->invalidate_mutex);
+ {
+ list_add_tail (&node->next, &priv->invalidate_list);
+ pthread_cond_signal (&priv->invalidate_cond);
+ }
+ pthread_mutex_unlock (&priv->invalidate_mutex);
+
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE, "INVALIDATE entry: "
+ "%"PRIu64"/%s", fnieo->parent, dentry->name);
+
+ if (dentry->parent) {
+ fuse_log_eh (this, "Invalidated entry %s (parent: %s)",
+ dentry->name,
+ uuid_utoa (dentry->parent->gfid));
+ } else {
+ fuse_log_eh (this, "Invalidated entry %s(nodeid: %" PRIu64 ")",
+ dentry->name, fnieo->parent);
+ }
}
- return inode;
+ if (inode)
+ inode_unref (inode);
}
+#endif
-static uint64_t
-inode_to_nodeid (inode_t *inode)
+/*
+ * Send an inval inode notification to fuse. This causes an invalidation of the
+ * entire page cache mapping on the inode.
+ */
+static void
+fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
{
- if (!inode || inode->ino == 1)
- return 1;
+#if FUSE_KERNEL_MINOR_VERSION >= 11
+ struct fuse_out_header *fouh = NULL;
+ struct fuse_notify_inval_inode_out *fniio = NULL;
+ fuse_private_t *priv = NULL;
+ fuse_invalidate_node_t *node = NULL;
+ inode_t *inode = NULL;
- return (unsigned long) inode;
-}
+ priv = this->private;
+ if (!priv->reverse_fuse_thread_started)
+ return;
-GF_MUST_CHECK static int32_t
-fuse_loc_fill (loc_t *loc, fuse_state_t *state, ino_t ino,
- ino_t par, const char *name)
-{
- inode_t *inode = NULL;
- inode_t *parent = NULL;
- int32_t ret = -1;
- char *path = NULL;
+ node = GF_CALLOC (1, sizeof (*node), gf_fuse_mt_invalidate_node_t);
+ if (node == NULL)
+ return;
- /* resistance against multiple invocation of loc_fill not to get
- reference leaks via inode_search() */
+ INIT_LIST_HEAD (&node->next);
- inode = loc->inode;
+ fouh = (struct fuse_out_header *) node->inval_buf;
+ fniio = (struct fuse_notify_inval_inode_out *) (fouh + 1);
- if (name) {
- parent = loc->parent;
- if (!parent) {
- parent = fuse_ino_to_inode (par, state->this);
- loc->parent = parent;
- }
+ fouh->unique = 0;
+ fouh->error = FUSE_NOTIFY_INVAL_INODE;
+ fouh->len = sizeof(struct fuse_out_header) +
+ sizeof(struct fuse_notify_inval_inode_out);
- inode = loc->inode;
- if (!inode) {
- inode = inode_grep (parent->table, parent, name);
- loc->inode = inode;
- }
+ /* inval the entire mapping until we learn how to be more granular */
+ fniio->ino = fuse_ino;
+ fniio->off = 0;
+ fniio->len = -1;
- ret = inode_path (parent, name, &path);
- if (ret <= 0) {
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "inode_path failed for %"PRId64"/%s",
- parent->ino, name);
- goto fail;
- }
- loc->path = path;
- } else {
- inode = loc->inode;
- if (!inode) {
- inode = fuse_ino_to_inode (ino, state->this);
- loc->inode = inode;
- }
+ inode = fuse_ino_to_inode (fuse_ino, this);
- parent = loc->parent;
- if (!parent) {
- parent = inode_parent (inode, par, name);
- loc->parent = parent;
- }
+ pthread_mutex_lock (&priv->invalidate_mutex);
+ {
+ list_add_tail (&node->next, &priv->invalidate_list);
+ pthread_cond_signal (&priv->invalidate_cond);
+ }
+ pthread_mutex_unlock (&priv->invalidate_mutex);
- ret = inode_path (inode, NULL, &path);
- if (ret <= 0) {
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "inode_path failed for %"PRId64,
- inode->ino);
- goto fail;
- }
- loc->path = path;
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE, "INVALIDATE inode: %" PRIu64,
+ fuse_ino);
+
+ if (inode) {
+ fuse_log_eh (this, "Invalidated inode %" PRIu64 " (gfid: %s)",
+ fuse_ino, uuid_utoa (inode->gfid));
+ } else {
+ fuse_log_eh (this, "Invalidated inode %" PRIu64, fuse_ino);
}
if (inode)
- loc->ino = inode->ino;
+ inode_unref (inode);
+#else
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "fuse_invalidate_inode not implemented on OS X due to missing FUSE notification");
+#endif
+}
- if (loc->path) {
- loc->name = strrchr (loc->path, '/');
- if (loc->name)
- loc->name++;
- else
- loc->name = "";
- }
- if ((ino != 1) && (parent == NULL)) {
- gf_log ("fuse-bridge", GF_LOG_DEBUG,
- "failed to search parent for %"PRId64"/%s (%"PRId64")",
- (ino_t)par, name, (ino_t)ino);
- ret = -1;
- goto fail;
+int
+send_fuse_err (xlator_t *this, fuse_in_header_t *finh, int error)
+{
+ struct fuse_out_header fouh = {0, };
+ struct iovec iov_out;
+ inode_t *inode = NULL;
+
+ if (error == ESTALE)
+ error = ENOENT;
+
+ fouh.error = -error;
+ iov_out.iov_base = &fouh;
+
+ inode = fuse_ino_to_inode (finh->nodeid, this);
+
+ // filter out ENOENT
+ if (error != ENOENT) {
+ if (inode) {
+ fuse_log_eh (this,"Sending %s for operation %d on "
+ "inode %s", strerror (error), finh->opcode,
+ uuid_utoa (inode->gfid));
+ } else {
+ fuse_log_eh (this, "Sending %s for operation %d on "
+ "inode %" GF_PRI_INODE, strerror (error),
+ finh->opcode, finh->nodeid);
+ }
}
- ret = 0;
-fail:
- return ret;
-}
+ if (inode)
+ inode_unref (inode);
-/* courtesy of folly */
-static void
-stat2attr (struct iatt *st, struct fuse_attr *fa)
-{
- fa->ino = st->ia_ino;
- fa->size = st->ia_size;
- fa->blocks = st->ia_blocks;
- fa->atime = st->ia_atime;
- fa->mtime = st->ia_mtime;
- fa->ctime = st->ia_ctime;
- fa->atimensec = st->ia_atime_nsec;
- fa->mtimensec = st->ia_mtime_nsec;
- fa->ctimensec = st->ia_ctime_nsec;
- fa->mode = st_mode_from_ia (st->ia_prot, st->ia_type);
- fa->nlink = st->ia_nlink;
- fa->uid = st->ia_uid;
- fa->gid = st->ia_gid;
- fa->rdev = st->ia_rdev;
-#if FUSE_KERNEL_MINOR_VERSION >= 9
- fa->blksize = st->ia_blksize;
-#endif
-#ifdef GF_DARWIN_HOST_OS
- fa->crtime = (uint64_t)-1;
- fa->crtimensec = (uint32_t)-1;
- fa->flags = 0;
-#endif
+ return send_fuse_iov (this, finh, &iov_out, 1);
}
-
static int
fuse_entry_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf)
+ inode_t *inode, struct iatt *buf, dict_t *xdata)
{
- fuse_state_t *state = NULL;
- fuse_in_header_t *finh = NULL;
- struct fuse_entry_out feo = {0, };
- fuse_private_t *priv = NULL;
- inode_t *linked_inode = NULL;
+ fuse_state_t *state = NULL;
+ fuse_in_header_t *finh = NULL;
+ struct fuse_entry_out feo = {0, };
+ fuse_private_t *priv = NULL;
+ inode_t *linked_inode = NULL;
+ uint64_t ctx_value = LOOKUP_NOT_NEEDED;
priv = this->private;
state = frame->root->state;
finh = state->finh;
- if (!op_ret && state->loc.ino == 1) {
- buf->ia_ino = 1;
+ if (op_ret == 0) {
+ if (__is_root_gfid (state->loc.inode->gfid))
+ buf->ia_ino = 1;
+ if (gf_uuid_is_null (buf->ia_gfid)) {
+ /* With a NULL gfid inode linking is
+ not possible. Let's not pretend this
+ call was a "success".
+ */
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "Received NULL gfid for %s. Forcing EIO",
+ state->loc.path);
+ op_ret = -1;
+ op_errno = EIO;
+ }
}
+ /* log into the event-history after the null uuid check is done, since
+ * the op_ret and op_errno are being changed if the gfid is NULL.
+ */
+ fuse_log_eh (this, "op_ret: %d op_errno: %d "
+ "%"PRIu64": %s() %s => %s", op_ret, op_errno,
+ frame->root->unique, gf_fop_list[frame->root->op],
+ state->loc.path, (op_ret == 0)?
+ uuid_utoa(buf->ia_gfid):uuid_utoa(state->loc.gfid));
+
if (op_ret == 0) {
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": %s() %s => %"PRId64" (%"PRId64")",
+ "%"PRIu64": %s() %s => %"PRIu64,
frame->root->unique, gf_fop_list[frame->root->op],
- state->loc.path, buf->ia_ino, state->loc.ino);
+ state->loc.path, buf->ia_ino);
buf->ia_blksize = this->ctx->page_size;
- stat2attr (buf, &feo.attr);
+ gf_fuse_stat2attr (buf, &feo.attr, priv->enable_ino32);
if (!buf->ia_ino) {
gf_log ("glusterfs-fuse", GF_LOG_WARNING,
@@ -559,23 +436,13 @@ fuse_entry_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
linked_inode = inode_link (inode, state->loc.parent,
state->loc.name, buf);
- if (linked_inode != inode) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%s(%s) inode (ptr=%p, ino=%"PRId64", "
- "gen=%"PRId64") found conflict (ptr=%p, "
- "ino=%"PRId64", gen=%"PRId64")",
- gf_fop_list[frame->root->op],
- state->loc.path, inode, inode->ino,
- inode->generation, linked_inode,
- linked_inode->ino, linked_inode->generation);
+ if (linked_inode == inode) {
+ inode_ctx_set (linked_inode, this, &ctx_value);
}
inode_lookup (linked_inode);
- /* TODO: make these timeouts configurable (via meta?) */
- feo.nodeid = inode_to_nodeid (linked_inode);
-
- feo.generation = linked_inode->generation;
+ feo.nodeid = inode_to_fuse_nodeid (linked_inode);
inode_unref (linked_inode);
@@ -602,26 +469,34 @@ fuse_entry_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
"%"PRIu64": %s() %s => -1 (%s)", frame->root->unique,
gf_fop_list[frame->root->op], state->loc.path,
strerror (op_errno));
- send_fuse_err (this, state->finh, op_errno);
+
+ if ((op_errno == ENOENT) && (priv->negative_timeout != 0)) {
+ feo.entry_valid =
+ calc_timeout_sec (priv->negative_timeout);
+ feo.entry_valid_nsec =
+ calc_timeout_nsec (priv->negative_timeout);
+ send_fuse_obj (this, finh, &feo);
+ } else {
+ send_fuse_err (this, state->finh, op_errno);
+ }
}
- free_state (state);
+ free_fuse_state (state);
STACK_DESTROY (frame->root);
return 0;
}
-
static int
fuse_newentry_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- fuse_entry_cbk (frame, cookie, this, op_ret, op_errno, inode, buf);
+ fuse_entry_cbk (frame, cookie, this, op_ret, op_errno, inode, buf,
+ xdata);
return 0;
}
-
static int
fuse_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
@@ -636,89 +511,163 @@ fuse_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
prev = cookie;
if (op_ret == -1 && state->is_revalidate == 1) {
- itable = state->loc.inode->table;
+ itable = state->itable;
+ /*
+ * A stale mapping might exist for a dentry/inode that has been
+ * removed from another client.
+ */
+ if (op_errno == ENOENT)
+ inode_unlink(state->loc.inode, state->loc.parent,
+ state->loc.name);
inode_unref (state->loc.inode);
state->loc.inode = inode_new (itable);
state->is_revalidate = 2;
+ if (gf_uuid_is_null (state->gfid))
+ gf_uuid_generate (state->gfid);
+ fuse_gfid_set (state);
STACK_WIND (frame, fuse_lookup_cbk,
prev->this, prev->this->fops->lookup,
- &state->loc, state->dict);
+ &state->loc, state->xdata);
return 0;
}
- fuse_entry_cbk (frame, cookie, this, op_ret, op_errno, inode, stat);
+ fuse_entry_cbk (frame, cookie, this, op_ret, op_errno, inode, stat,
+ dict);
return 0;
}
-
-static void
-fuse_lookup (xlator_t *this, fuse_in_header_t *finh, void *msg)
+void
+fuse_fop_resume (fuse_state_t *state)
{
- char *name = msg;
-
- fuse_state_t *state = NULL;
- int32_t ret = -1;
+ fuse_resume_fn_t fn = NULL;
- GET_STATE (this, finh, state);
+ /*
+ * Fail fd resolution failures right away.
+ */
+ if (state->resolve.fd && state->resolve.op_ret < 0) {
+ send_fuse_err (state->this, state->finh,
+ state->resolve.op_errno);
+ free_fuse_state (state);
+ return;
+ }
- ret = fuse_loc_fill (&state->loc, state, 0, finh->nodeid, name);
+ fn = state->resume_fn;
+ fn (state);
+}
- if (ret < 0) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64": LOOKUP %"PRIu64"/%s (fuse_loc_fill() failed)",
- finh->unique, finh->nodeid, name);
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
+void
+fuse_lookup_resume (fuse_state_t *state)
+{
+ if (!state->loc.parent && !state->loc.inode) {
+ gf_log ("fuse", GF_LOG_ERROR, "failed to resolve path %s",
+ state->loc.path);
+ send_fuse_err (state->this, state->finh,
+ state->resolve.op_errno);
+ free_fuse_state (state);
return;
}
- if (!state->loc.inode) {
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": LOOKUP %s", finh->unique,
- state->loc.path);
+ /* parent was resolved, entry could not, may be a missing gfid?
+ * Hence try to do a regular lookup
+ */
+ if ((state->resolve.op_ret == -1)
+ && (state->resolve.op_errno == ENODATA)) {
+ state->resolve.op_ret = 0;
+ }
- state->loc.inode = inode_new (state->loc.parent->table);
- } else {
+ if (state->loc.inode) {
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": LOOKUP %s(%"PRId64")", finh->unique,
- state->loc.path, state->loc.inode->ino);
+ "%"PRIu64": LOOKUP %s(%s)", state->finh->unique,
+ state->loc.path, uuid_utoa (state->loc.inode->gfid));
state->is_revalidate = 1;
+ } else {
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": LOOKUP %s", state->finh->unique,
+ state->loc.path);
+ state->loc.inode = inode_new (state->loc.parent->table);
+ if (gf_uuid_is_null (state->gfid))
+ gf_uuid_generate (state->gfid);
+ fuse_gfid_set (state);
}
- state->dict = dict_new ();
-
FUSE_FOP (state, fuse_lookup_cbk, GF_FOP_LOOKUP,
- lookup, &state->loc, state->dict);
+ lookup, &state->loc, state->xdata);
}
+static void
+fuse_lookup (xlator_t *this, fuse_in_header_t *finh, void *msg)
+{
+ char *name = msg;
+ fuse_state_t *state = NULL;
+
+ GET_STATE (this, finh, state);
+
+ (void) fuse_resolve_entry_init (state, &state->resolve,
+ finh->nodeid, name);
+
+ fuse_resolve_and_resume (state, fuse_lookup_resume);
+
+ return;
+}
+
+static void
+do_forget(xlator_t *this, uint64_t unique, uint64_t nodeid, uint64_t nlookup)
+{
+ inode_t *fuse_inode = fuse_ino_to_inode(nodeid, this);
+
+ fuse_log_eh(this, "%"PRIu64": FORGET %"PRIu64"/%"PRIu64" gfid: (%s)",
+ unique, nodeid, nlookup, uuid_utoa(fuse_inode->gfid));
+
+ inode_forget(fuse_inode, nlookup);
+ inode_unref(fuse_inode);
+}
static void
fuse_forget (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
- struct fuse_forget_in *ffi = msg;
-
- inode_t *fuse_inode;
+ struct fuse_forget_in *ffi = msg;
if (finh->nodeid == 1) {
GF_FREE (finh);
return;
}
- fuse_inode = fuse_ino_to_inode (finh->nodeid, this);
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": FORGET %"PRIu64"/%"PRIu64,
+ finh->unique, finh->nodeid, ffi->nlookup);
- inode_forget (fuse_inode, ffi->nlookup);
- inode_unref (fuse_inode);
+ do_forget(this, finh->unique, finh->nodeid, ffi->nlookup);
GF_FREE (finh);
}
+#if FUSE_KERNEL_MINOR_VERSION >= 16
+static void
+fuse_batch_forget(xlator_t *this, fuse_in_header_t *finh, void *msg)
+{
+ struct fuse_batch_forget_in *fbfi = msg;
+ struct fuse_forget_one *ffo = (struct fuse_forget_one *) (fbfi + 1);
+ int i;
+
+ gf_log("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": BATCH_FORGET %"PRIu64"/%"PRIu32,
+ finh->unique, finh->nodeid, fbfi->count);
+
+ for (i = 0; i < fbfi->count; i++) {
+ if (ffo[i].nodeid == 1)
+ continue;
+ do_forget(this, finh->unique, ffo[i].nodeid, ffo[i].nlookup);
+ }
+ GF_FREE(finh);
+}
+#endif
static int
fuse_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
fuse_state_t *state;
fuse_in_header_t *finh;
@@ -729,17 +678,17 @@ fuse_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
state = frame->root->state;
finh = state->finh;
+ fuse_log_eh_fop(this, state, frame, op_ret, op_errno);
+
if (op_ret == 0) {
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": %s() %s => %"PRId64, frame->root->unique,
+ "%"PRIu64": %s() %s => %"PRIu64, frame->root->unique,
gf_fop_list[frame->root->op],
state->loc.path ? state->loc.path : "ERR",
prebuf->ia_ino);
- /* TODO: make these timeouts configurable via meta */
- /* TODO: what if the inode number has changed by now */
postbuf->ia_blksize = this->ctx->page_size;
- stat2attr (postbuf, &fao.attr);
+ gf_fuse_stat2attr (postbuf, &fao.attr, priv->enable_ino32);
fao.attr_valid = calc_timeout_sec (priv->attribute_timeout);
fao.attr_valid_nsec =
@@ -763,16 +712,15 @@ fuse_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
send_fuse_err (this, finh, op_errno);
}
- free_state (state);
+ free_fuse_state (state);
STACK_DESTROY (frame->root);
return 0;
}
-
static int
fuse_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
fuse_state_t *state;
fuse_in_header_t *finh;
@@ -783,17 +731,19 @@ fuse_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
state = frame->root->state;
finh = state->finh;
+ fuse_log_eh (this, "op_ret: %d, op_errno: %d, %"PRIu64": %s() %s => "
+ "gfid: %s", op_ret, op_errno, frame->root->unique,
+ gf_fop_list[frame->root->op], state->loc.path,
+ state->loc.inode ? uuid_utoa (state->loc.inode->gfid) : "");
if (op_ret == 0) {
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": %s() %s => %"PRId64, frame->root->unique,
+ "%"PRIu64": %s() %s => %"PRIu64, frame->root->unique,
gf_fop_list[frame->root->op],
state->loc.path ? state->loc.path : "ERR",
buf->ia_ino);
- /* TODO: make these timeouts configurable via meta */
- /* TODO: what if the inode number has changed by now */
buf->ia_blksize = this->ctx->page_size;
- stat2attr (buf, &fao.attr);
+ gf_fuse_stat2attr (buf, &fao.attr, priv->enable_ino32);
fao.attr_valid = calc_timeout_sec (priv->attribute_timeout);
fao.attr_valid_nsec =
@@ -808,9 +758,9 @@ fuse_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
send_fuse_obj (this, finh, &fao);
#endif
} else {
- GF_LOG_OCCASIONALLY ( gf_fuse_conn_err_log, "glusterfs-fuse",
- GF_LOG_WARNING,
- "%"PRIu64": %s() %s => -1 (%s)",
+ GF_LOG_OCCASIONALLY ( gf_fuse_conn_err_log, "glusterfs-fuse",
+ GF_LOG_WARNING,
+ "%"PRIu64": %s() %s => -1 (%s)",
frame->root->unique,
gf_fop_list[frame->root->op],
state->loc.path ? state->loc.path : "ERR",
@@ -819,129 +769,193 @@ fuse_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
send_fuse_err (this, finh, op_errno);
}
- free_state (state);
+ free_fuse_state (state);
STACK_DESTROY (frame->root);
return 0;
}
-
static int
fuse_root_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *stat, dict_t *dict,
struct iatt *postparent)
{
- fuse_attr_cbk (frame, cookie, this, op_ret, op_errno, stat);
+ fuse_attr_cbk (frame, cookie, this, op_ret, op_errno, stat, dict);
return 0;
}
+void
+fuse_getattr_resume (fuse_state_t *state)
+{
+ if (!state->loc.inode) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRIu64": GETATTR %"PRIu64" (%s) resolution failed",
+ state->finh->unique, state->finh->nodeid,
+ uuid_utoa (state->resolve.gfid));
+ send_fuse_err (state->this, state->finh,
+ state->resolve.op_errno);
+ free_fuse_state (state);
+ return;
+ }
+
+ if (!IA_ISDIR (state->loc.inode->ia_type)) {
+ state->fd = fd_lookup (state->loc.inode, state->finh->pid);
+ }
+
+ if (!state->fd) {
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": GETATTR %"PRIu64" (%s)",
+ state->finh->unique, state->finh->nodeid,
+ state->loc.path);
+
+ FUSE_FOP (state, fuse_attr_cbk, GF_FOP_STAT,
+ stat, &state->loc, state->xdata);
+ } else {
+
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": FGETATTR %"PRIu64" (%s/%p)",
+ state->finh->unique, state->finh->nodeid,
+ state->loc.path, state->fd);
+
+ FUSE_FOP (state, fuse_attr_cbk, GF_FOP_FSTAT,
+ fstat, state->fd, state->xdata);
+ }
+}
static void
fuse_getattr (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
fuse_state_t *state;
- fd_t *fd = NULL;
int32_t ret = -1;
GET_STATE (this, finh, state);
if (finh->nodeid == 1) {
+ state->gfid[15] = 1;
+
ret = fuse_loc_fill (&state->loc, state, finh->nodeid, 0, NULL);
if (ret < 0) {
gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64": GETATTR %"PRIu64" (fuse_loc_fill() failed)",
- finh->unique, finh->nodeid);
+ "%"PRIu64": GETATTR on / (fuse_loc_fill() failed)",
+ finh->unique);
send_fuse_err (this, finh, ENOENT);
- free_state (state);
+ free_fuse_state (state);
return;
}
- state->dict = dict_new ();
+ fuse_gfid_set (state);
FUSE_FOP (state, fuse_root_lookup_cbk, GF_FOP_LOOKUP,
- lookup, &state->loc, state->dict);
+ lookup, &state->loc, state->xdata);
return;
}
- ret = fuse_loc_fill (&state->loc, state, finh->nodeid, 0, NULL);
+ fuse_resolve_inode_init (state, &state->resolve, state->finh->nodeid);
- if (!state->loc.inode) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64": GETATTR %"PRIu64" (%s) (fuse_loc_fill() returned NULL inode)",
- finh->unique, finh->nodeid, state->loc.path);
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
- return;
+ fuse_resolve_and_resume (state, fuse_getattr_resume);
+}
+
+static int32_t
+fuse_fd_inherit_directio (xlator_t *this, fd_t *fd, struct fuse_open_out *foo)
+{
+ int32_t ret = 0;
+ fuse_fd_ctx_t *fdctx = NULL, *tmp_fdctx = NULL;
+ fd_t *tmp_fd = NULL;
+
+ GF_VALIDATE_OR_GOTO_WITH_ERROR ("glusterfs-fuse", this, out, ret,
+ -EINVAL);
+ GF_VALIDATE_OR_GOTO_WITH_ERROR ("glusterfs-fuse", fd, out, ret,
+ -EINVAL);
+ GF_VALIDATE_OR_GOTO_WITH_ERROR ("glusterfs-fuse", foo, out, ret,
+ -EINVAL);
+
+ fdctx = fuse_fd_ctx_get (this, fd);
+ if (!fdctx) {
+ ret = -ENOMEM;
+ goto out;
}
- fd = fd_lookup (state->loc.inode, finh->pid);
- state->fd = fd;
- if (!fd || IA_ISDIR (state->loc.inode->ia_type)) {
- /* this is the @ret of fuse_loc_fill, checked here
- to permit fstat() to happen even when fuse_loc_fill fails
- */
- if (ret < 0) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64": GETATTR %"PRIu64" (fuse_loc_fill() failed)",
- finh->unique, finh->nodeid);
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
- return;
+ tmp_fd = fd_lookup (fd->inode, 0);
+ if (tmp_fd) {
+ tmp_fdctx = fuse_fd_ctx_get (this, tmp_fd);
+ if (tmp_fdctx) {
+ foo->open_flags &= ~FOPEN_DIRECT_IO;
+ foo->open_flags |= (tmp_fdctx->open_flags
+ & FOPEN_DIRECT_IO);
}
+ }
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": GETATTR %"PRIu64" (%s)",
- finh->unique, finh->nodeid, state->loc.path);
+ fdctx->open_flags |= (foo->open_flags & FOPEN_DIRECT_IO);
+ if (tmp_fd != NULL) {
+ fd_unref (tmp_fd);
+ }
- FUSE_FOP (state, fuse_attr_cbk, GF_FOP_STAT,
- stat, &state->loc);
- } else {
+ ret = 0;
+out:
+ return ret;
+}
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": FGETATTR %"PRIu64" (%s/%p)",
- finh->unique, finh->nodeid, state->loc.path, fd);
- FUSE_FOP (state,fuse_attr_cbk, GF_FOP_FSTAT,
- fstat, fd);
- }
+gf_boolean_t
+direct_io_mode (dict_t *xdata)
+{
+ if (xdata && dict_get (xdata, "direct-io-mode"))
+ return _gf_true;
+ return _gf_false;
}
static int
fuse_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- fuse_state_t *state;
- fuse_in_header_t *finh;
- fuse_private_t *priv = NULL;
- struct fuse_open_out foo = {0, };
+ fuse_state_t *state = NULL;
+ fuse_in_header_t *finh = NULL;
+ fuse_private_t *priv = NULL;
+ int32_t ret = 0;
+ struct fuse_open_out foo = {0, };
priv = this->private;
state = frame->root->state;
finh = state->finh;
+ fuse_log_eh_fop(this, state, frame, op_ret, op_errno);
+
if (op_ret >= 0) {
foo.fh = (uintptr_t) fd;
foo.open_flags = 0;
if (!IA_ISDIR (fd->inode->ia_type)) {
- if (priv->direct_io_mode)
+ if (((priv->direct_io_mode == 2)
+ && ((state->flags & O_ACCMODE) != O_RDONLY))
+ || (priv->direct_io_mode == 1)
+ || (direct_io_mode (xdata)))
foo.open_flags |= FOPEN_DIRECT_IO;
#ifdef GF_DARWIN_HOST_OS
- /* In Linux: by default, buffer cache
- * is purged upon open, setting
- * FOPEN_KEEP_CACHE implies no-purge
- *
- * In MacFUSE: by default, buffer cache
- * is left intact upon open, setting
- * FOPEN_PURGE_UBC implies purge
- *
- * [[Innnnteresting...]]
- */
+ /* In Linux: by default, buffer cache
+ * is purged upon open, setting
+ * FOPEN_KEEP_CACHE implies no-purge
+ *
+ * In MacFUSE: by default, buffer cache
+ * is left intact upon open, setting
+ * FOPEN_PURGE_UBC implies purge
+ *
+ * [[Interesting...]]
+ */
+ if (!priv->fopen_keep_cache)
foo.open_flags |= FOPEN_PURGE_UBC;
+#else
+ /*
+ * If fopen-keep-cache is enabled, we set the associated
+ * flag here such that files are not invalidated on open.
+ * File invalidations occur either in fuse or explicitly
+ * when the cache is set invalid on the inode.
+ */
+ if (priv->fopen_keep_cache)
+ foo.open_flags |= FOPEN_KEEP_CACHE;
#endif
}
@@ -949,49 +963,58 @@ fuse_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
"%"PRIu64": %s() %s => %p", frame->root->unique,
gf_fop_list[frame->root->op], state->loc.path, fd);
- fd_ref (fd);
+ ret = fuse_fd_inherit_directio (this, fd, &foo);
+ if (ret < 0) {
+ op_errno = -ret;
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "cannot inherit direct-io values for fd "
+ "(ptr:%p inode-gfid:%s) from fds already "
+ "opened", fd, uuid_utoa (fd->inode->gfid));
+ goto err;
+ }
+
if (send_fuse_obj (this, finh, &foo) == ENOENT) {
gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
"open(%s) got EINTR", state->loc.path);
- fd_unref (fd);
- goto out;
+ gf_fd_put (priv->fdtable, state->fd_no);
+ goto out;
}
fd_bind (fd);
} else {
+ err:
gf_log ("glusterfs-fuse", GF_LOG_WARNING,
"%"PRIu64": %s() %s => -1 (%s)", frame->root->unique,
gf_fop_list[frame->root->op], state->loc.path,
strerror (op_errno));
send_fuse_err (this, finh, op_errno);
+ gf_fd_put (priv->fdtable, state->fd_no);
}
out:
- free_state (state);
+ free_fuse_state (state);
STACK_DESTROY (frame->root);
return 0;
}
-
static void
-fuse_do_truncate (fuse_state_t *state, size_t size)
+fuse_do_truncate (fuse_state_t *state)
{
if (state->fd) {
FUSE_FOP (state, fuse_truncate_cbk, GF_FOP_FTRUNCATE,
- ftruncate, state->fd, size);
+ ftruncate, state->fd, state->off, state->xdata);
} else {
FUSE_FOP (state, fuse_truncate_cbk, GF_FOP_TRUNCATE,
- truncate, &state->loc, size);
+ truncate, &state->loc, state->off, state->xdata);
}
return;
}
-
static int
fuse_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *statpre, struct iatt *statpost)
+ struct iatt *statpre, struct iatt *statpost, dict_t *xdata)
{
fuse_state_t *state;
fuse_in_header_t *finh;
@@ -1004,26 +1027,27 @@ fuse_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
state = frame->root->state;
finh = state->finh;
+ fuse_log_eh(this, "op_ret: %d, op_errno: %d, %"PRIu64", %s() %s => "
+ "gfid: %s", op_ret, op_errno, frame->root->unique,
+ gf_fop_list[frame->root->op], state->loc.path,
+ state->loc.inode ? uuid_utoa (state->loc.inode->gfid) : "");
+
if (op_ret == 0) {
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": %s() %s => %"PRId64, frame->root->unique,
+ "%"PRIu64": %s() %s => %"PRIu64, frame->root->unique,
gf_fop_list[frame->root->op],
state->loc.path ? state->loc.path : "ERR",
statpost->ia_ino);
- /* TODO: make these timeouts configurable via meta */
- /* TODO: what if the inode number has changed by now */
-
statpost->ia_blksize = this->ctx->page_size;
-
- stat2attr (statpost, &fao.attr);
+ gf_fuse_stat2attr (statpost, &fao.attr, priv->enable_ino32);
fao.attr_valid = calc_timeout_sec (priv->attribute_timeout);
fao.attr_valid_nsec =
calc_timeout_nsec (priv->attribute_timeout);
if (state->truncate_needed) {
- fuse_do_truncate (state, state->size);
+ fuse_do_truncate (state);
} else {
#if FUSE_KERNEL_MINOR_VERSION >= 9
priv->proto_minor >= 9 ?
@@ -1047,7 +1071,7 @@ fuse_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
if (op_done) {
- free_state (state);
+ free_fuse_state (state);
}
STACK_DESTROY (frame->root);
@@ -1055,7 +1079,6 @@ fuse_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-
static int32_t
fattr_to_gf_set_attr (int32_t valid)
{
@@ -1082,35 +1105,81 @@ fattr_to_gf_set_attr (int32_t valid)
return gf_valid;
}
-
#define FATTR_MASK (FATTR_SIZE \
| FATTR_UID | FATTR_GID \
| FATTR_ATIME | FATTR_MTIME \
| FATTR_MODE)
+void
+fuse_setattr_resume (fuse_state_t *state)
+{
+ if (!state->fd && !state->loc.inode) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRIu64": SETATTR %"PRIu64" (%s) resolution failed",
+ state->finh->unique, state->finh->nodeid,
+ uuid_utoa (state->resolve.gfid));
+ send_fuse_err (state->this, state->finh,
+ state->resolve.op_errno);
+ free_fuse_state (state);
+ return;
+ }
+
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": SETATTR (%"PRIu64")%s", state->finh->unique,
+ state->finh->nodeid, state->loc.path);
+
+#ifdef GF_TEST_FFOP
+ /* this is for calls like 'fchmod()' */
+ if (!state->fd)
+ state->fd = fd_lookup (state->loc.inode, state->finh->pid);
+#endif /* GF_TEST_FFOP */
+
+ if ((state->valid & (FATTR_MASK)) != FATTR_SIZE) {
+ if (state->fd &&
+ !((state->valid & FATTR_ATIME) ||
+ (state->valid & FATTR_MTIME))) {
+ /*
+ there is no "futimes" call, so don't send
+ fsetattr if ATIME or MTIME is set
+ */
+
+ FUSE_FOP (state, fuse_setattr_cbk, GF_FOP_FSETATTR,
+ fsetattr, state->fd, &state->attr,
+ fattr_to_gf_set_attr (state->valid),
+ state->xdata);
+ } else {
+ FUSE_FOP (state, fuse_setattr_cbk, GF_FOP_SETATTR,
+ setattr, &state->loc, &state->attr,
+ fattr_to_gf_set_attr (state->valid),
+ state->xdata);
+ }
+ } else {
+ fuse_do_truncate (state);
+ }
+
+}
static void
fuse_setattr (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
struct fuse_setattr_in *fsi = msg;
- struct iatt attr = {0, };
-
+#if FUSE_KERNEL_MINOR_VERSION >= 9
fuse_private_t *priv = NULL;
+#endif
fuse_state_t *state = NULL;
- int32_t ret = -1;
- int32_t valid = 0;
GET_STATE (this, finh, state);
if (fsi->valid & FATTR_FH &&
- !(fsi->valid & (FATTR_ATIME|FATTR_MTIME)))
+ !(fsi->valid & (FATTR_ATIME|FATTR_MTIME))) {
/* We need no loc if kernel sent us an fd and
* we are not fiddling with times */
- ret = 1;
- else
- ret = fuse_loc_fill (&state->loc, state, finh->nodeid, 0,
- NULL);
+ state->fd = FH_TO_FD (fsi->fh);
+ fuse_resolve_fd_init (state, &state->resolve, state->fd);
+ } else {
+ fuse_resolve_inode_init (state, &state->resolve, finh->nodeid);
+ }
/*
* This is just stub code demonstrating how to retrieve
@@ -1125,81 +1194,86 @@ fuse_setattr (xlator_t *this, fuse_in_header_t *finh, void *msg)
* http://git.kernel.org/?p=linux/kernel/git/torvalds/
* linux-2.6.git;a=commit;h=v2.6.23-5896-gf333211
*/
- priv = this->private;
#if FUSE_KERNEL_MINOR_VERSION >= 9
+ priv = this->private;
if (priv->proto_minor >= 9 && fsi->valid & FATTR_LOCKOWNER)
state->lk_owner = fsi->lock_owner;
#endif
- if ((state->loc.inode == NULL && ret == 0) ||
- (ret < 0)) {
+ state->valid = fsi->valid;
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64": SETATTR %s (fuse_loc_fill() failed)",
- finh->unique, state->loc.path);
+ if ((fsi->valid & (FATTR_MASK)) != FATTR_SIZE) {
+ if (fsi->valid & FATTR_SIZE) {
+ state->off = fsi->size;
+ state->truncate_needed = _gf_true;
+ }
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
+ state->attr.ia_size = fsi->size;
+ state->attr.ia_atime = fsi->atime;
+ state->attr.ia_mtime = fsi->mtime;
+ state->attr.ia_atime_nsec = fsi->atimensec;
+ state->attr.ia_mtime_nsec = fsi->mtimensec;
- return;
+ state->attr.ia_prot = ia_prot_from_st_mode (fsi->mode);
+ state->attr.ia_uid = fsi->uid;
+ state->attr.ia_gid = fsi->gid;
+ } else {
+ state->off = fsi->size;
}
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": SETATTR (%"PRIu64")%s", finh->unique,
- finh->nodeid, state->loc.path);
-
- valid = fsi->valid;
-
- if (fsi->valid & FATTR_FH) {
- state->fd = FH_TO_FD (fsi->fh);
- }
+ fuse_resolve_and_resume (state, fuse_setattr_resume);
+}
- if ((valid & (FATTR_MASK)) != FATTR_SIZE) {
- if (valid & FATTR_SIZE) {
- state->size = fsi->size;
- state->truncate_needed = _gf_true;
- }
+static int
+fuse_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ fuse_state_t *state = NULL;
+ fuse_in_header_t *finh = NULL;
- attr.ia_size = fsi->size;
- attr.ia_atime = fsi->atime;
- attr.ia_mtime = fsi->mtime;
- attr.ia_atime_nsec = fsi->atimensec;
- attr.ia_mtime_nsec = fsi->mtimensec;
+ GF_ASSERT (frame);
+ GF_ASSERT (frame->root);
- attr.ia_prot = ia_prot_from_st_mode (fsi->mode);
- attr.ia_uid = fsi->uid;
- attr.ia_gid = fsi->gid;
+ state = frame->root->state;
+ finh = state->finh;
- if (state->fd &&
- !((fsi->valid & FATTR_ATIME) || (fsi->valid & FATTR_MTIME))) {
+ fuse_log_eh_fop(this, state, frame, op_ret, op_errno);
- /*
- there is no "futimes" call, so don't send
- fsetattr if ATIME or MTIME is set
- */
+ if (op_ret == 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": %s() %s => 0", frame->root->unique,
+ gf_fop_list[frame->root->op],
+ state->loc.path ? state->loc.path : "ERR");
- FUSE_FOP (state, fuse_setattr_cbk, GF_FOP_FSETATTR,
- fsetattr, state->fd, &attr,
- fattr_to_gf_set_attr (fsi->valid));
- } else {
- FUSE_FOP (state, fuse_setattr_cbk, GF_FOP_SETATTR,
- setattr, &state->loc, &attr,
- fattr_to_gf_set_attr (fsi->valid));
- }
+ send_fuse_err (this, finh, 0);
} else {
- fuse_do_truncate (state, fsi->size);
+ gf_log ("glusterfs-fuse",
+ (ENODATA == op_errno) ? GF_LOG_DEBUG : GF_LOG_WARNING,
+ "%"PRIu64": %s() of %s on %s => -1 (%s)",
+ frame->root->unique,
+ gf_fop_list[frame->root->op],
+ state->name ? state->name : "",
+ state->loc.path ? state->loc.path : "ERR",
+ strerror (op_errno));
+
+ send_fuse_err (this, finh, op_errno);
}
-}
+ free_fuse_state (state);
+ STACK_DESTROY (frame->root);
+ return 0;
+}
static int
fuse_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
fuse_state_t *state = frame->root->state;
fuse_in_header_t *finh = state->finh;
+ fuse_log_eh_fop(this, state, frame, op_ret, op_errno);
+
if (op_ret == 0) {
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
"%"PRIu64": %s() %s => 0", frame->root->unique,
@@ -1208,35 +1282,35 @@ fuse_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
send_fuse_err (this, finh, 0);
} else {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64": %s() %s => -1 (%s)",
- frame->root->unique,
- gf_fop_list[frame->root->op],
- state->loc.path ? state->loc.path : "ERR",
- strerror (op_errno));
+ if (GF_IGNORE_IF_GSYNCD_SAFE_ERROR(frame, op_errno)) {
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "%"PRIu64": %s() %s => -1 (%s)",
+ frame->root->unique,
+ gf_fop_list[frame->root->op],
+ state->loc.path ? state->loc.path : "ERR",
+ strerror (op_errno));
+ }
send_fuse_err (this, finh, op_errno);
}
- free_state (state);
+ free_fuse_state (state);
STACK_DESTROY (frame->root);
return 0;
}
-
static int
fuse_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
- return fuse_err_cbk (frame, cookie, this, op_ret, op_errno);
+ return fuse_err_cbk (frame, cookie, this, op_ret, op_errno, xdata);
}
-
static int
fuse_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
if (op_ret == -1 && op_errno == ENOTSUP)
GF_LOG_OCCASIONALLY (gf_fuse_xattr_enotsup_log,
@@ -1244,14 +1318,13 @@ fuse_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
"extended attribute not supported "
"by the backend storage");
- return fuse_err_cbk (frame, cookie, this, op_ret, op_errno);
+ return fuse_err_cbk (frame, cookie, this, op_ret, op_errno, xdata);
}
-
static int
fuse_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
fuse_state_t *state = NULL;
fuse_in_header_t *finh = NULL;
@@ -1259,70 +1332,81 @@ fuse_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
state = frame->root->state;
finh = state->finh;
- if (op_ret == 0)
- inode_unlink (state->loc.inode, state->loc.parent,
- state->loc.name);
+ fuse_log_eh (this, "op_ret: %d, op_errno: %d, %"PRIu64": %s() %s => "
+ "gfid: %s", op_ret, op_errno, frame->root->unique,
+ gf_fop_list[frame->root->op], state->loc.path,
+ state->loc.inode ? uuid_utoa (state->loc.inode->gfid) : "");
if (op_ret == 0) {
+ inode_unlink (state->loc.inode, state->loc.parent,
+ state->loc.name);
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
"%"PRIu64": %s() %s => 0", frame->root->unique,
gf_fop_list[frame->root->op], state->loc.path);
send_fuse_err (this, finh, 0);
} else {
- gf_log ("glusterfs-fuse",
- op_errno == ENOTEMPTY ? GF_LOG_DEBUG : GF_LOG_WARNING,
- "%"PRIu64": %s() %s => -1 (%s)", frame->root->unique,
- gf_fop_list[frame->root->op], state->loc.path,
- strerror (op_errno));
-
+ if (GF_IGNORE_IF_GSYNCD_SAFE_ERROR(frame, op_errno)) {
+ gf_log ("glusterfs-fuse",
+ op_errno == ENOTEMPTY ? GF_LOG_DEBUG :
+ GF_LOG_WARNING, "%"PRIu64": %s() %s => -1 (%s)",
+ frame->root->unique,
+ gf_fop_list[frame->root->op], state->loc.path,
+ strerror (op_errno));
+ }
send_fuse_err (this, finh, op_errno);
}
- free_state (state);
+ free_fuse_state (state);
STACK_DESTROY (frame->root);
return 0;
}
+void
+fuse_access_resume (fuse_state_t *state)
+{
+ if (!state->loc.inode) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRIu64": ACCESS %"PRIu64" (%s) resolution failed",
+ state->finh->unique, state->finh->nodeid,
+ uuid_utoa (state->resolve.gfid));
+ send_fuse_err (state->this, state->finh,
+ state->resolve.op_errno);
+ free_fuse_state (state);
+ return;
+ }
+
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64" ACCESS %s/%"PRIu64" mask=%d",
+ state->finh->unique, state->loc.path,
+ state->finh->nodeid, state->mask);
+
+ FUSE_FOP (state, fuse_err_cbk, GF_FOP_ACCESS, access,
+ &state->loc, state->mask, state->xdata);
+}
static void
fuse_access (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
struct fuse_access_in *fai = msg;
-
fuse_state_t *state = NULL;
- int32_t ret = -1;
GET_STATE (this, finh, state);
- ret = fuse_loc_fill (&state->loc, state, finh->nodeid, 0, NULL);
- if ((state->loc.inode == NULL) ||
- (ret < 0)) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64": ACCESS %"PRIu64" (%s) (fuse_loc_fill() failed)",
- finh->unique, finh->nodeid, state->loc.path);
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
- return;
- }
+ fuse_resolve_inode_init (state, &state->resolve, finh->nodeid);
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64" ACCESS %s/%"PRIu64" mask=%d", finh->unique,
- state->loc.path, finh->nodeid, fai->mask);
+ state->mask = fai->mask;
- FUSE_FOP (state, fuse_err_cbk,
- GF_FOP_ACCESS, access,
- &state->loc, fai->mask);
+ fuse_resolve_and_resume (state, fuse_access_resume);
return;
}
-
static int
fuse_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, const char *linkname,
- struct iatt *buf)
+ struct iatt *buf, dict_t *xdata)
{
fuse_state_t *state = NULL;
fuse_in_header_t *finh = NULL;
@@ -1330,14 +1414,17 @@ fuse_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
state = frame->root->state;
finh = state->finh;
- if (op_ret > 0) {
- ((char *)linkname)[op_ret] = '\0';
+ fuse_log_eh (this, "op_ret: %d, op_errno: %d %"PRIu64": %s() => %s"
+ " linkname: %s, gfid: %s", op_ret, op_errno,
+ frame->root->unique, gf_fop_list[frame->root->op],
+ state->loc.gfid, linkname,
+ uuid_utoa (state->loc.gfid));
+ if (op_ret > 0) {
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": %s => %s", frame->root->unique,
- state->loc.path, linkname);
-
- send_fuse_data (this, finh, (void *)linkname, op_ret + 1);
+ "%"PRIu64": %s => %s (size:%d)", frame->root->unique,
+ state->loc.path, linkname, op_ret);
+ send_fuse_data (this, finh, (void *)linkname, op_ret);
} else {
gf_log ("glusterfs-fuse", GF_LOG_WARNING,
"%"PRIu64": %s => -1 (%s)", frame->root->unique,
@@ -1346,41 +1433,82 @@ fuse_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
send_fuse_err (this, finh, op_errno);
}
- free_state (state);
+ free_fuse_state (state);
STACK_DESTROY (frame->root);
return 0;
}
+void
+fuse_readlink_resume (fuse_state_t *state)
+{
+ if (!state->loc.inode) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "READLINK %"PRIu64" (%s) resolution failed",
+ state->finh->unique, uuid_utoa (state->resolve.gfid));
+ send_fuse_err (state->this, state->finh,
+ state->resolve.op_errno);
+ free_fuse_state (state);
+ return;
+ }
+
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64" READLINK %s/%s", state->finh->unique,
+ state->loc.path, uuid_utoa (state->loc.inode->gfid));
+
+ FUSE_FOP (state, fuse_readlink_cbk, GF_FOP_READLINK,
+ readlink, &state->loc, 4096, state->xdata);
+}
static void
fuse_readlink (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
fuse_state_t *state = NULL;
- int32_t ret = -1;
GET_STATE (this, finh, state);
- ret = fuse_loc_fill (&state->loc, state, finh->nodeid, 0, NULL);
- if ((state->loc.inode == NULL) ||
- (ret < 0)) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64" READLINK %s (fuse_loc_fill() returned NULL inode)",
- finh->unique, state->loc.path);
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
- return;
- }
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64" READLINK %s/%"PRId64, finh->unique,
- state->loc.path, state->loc.inode->ino);
+ fuse_resolve_inode_init (state, &state->resolve, finh->nodeid);
- FUSE_FOP (state, fuse_readlink_cbk, GF_FOP_READLINK,
- readlink, &state->loc, 4096);
+ fuse_resolve_and_resume (state, fuse_readlink_resume);
return;
}
+void
+fuse_mknod_resume (fuse_state_t *state)
+{
+ if (!state->loc.parent) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "MKNOD %"PRIu64"/%s (%s/%s) resolution failed",
+ state->finh->nodeid, state->resolve.bname,
+ uuid_utoa (state->resolve.gfid), state->resolve.bname);
+ send_fuse_err (state->this, state->finh,
+ state->resolve.op_errno);
+ free_fuse_state (state);
+ return;
+ }
+
+ if (state->resolve.op_errno == ENOENT) {
+ state->resolve.op_ret = 0;
+ state->resolve.op_errno = 0;
+ }
+
+ if (state->loc.inode) {
+ gf_log (state->this->name, GF_LOG_DEBUG, "inode already present");
+ inode_unref (state->loc.inode);
+ state->loc.inode = NULL;
+ }
+
+ state->loc.inode = inode_new (state->loc.parent->table);
+
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": MKNOD %s", state->finh->unique,
+ state->loc.path);
+
+ FUSE_FOP (state, fuse_newentry_cbk, GF_FOP_MKNOD,
+ mknod, &state->loc, state->mode, state->rdev, state->umask,
+ state->xdata);
+}
static void
fuse_mknod (xlator_t *this, fuse_in_header_t *finh, void *msg)
@@ -1389,174 +1517,234 @@ fuse_mknod (xlator_t *this, fuse_in_header_t *finh, void *msg)
char *name = (char *)(fmi + 1);
fuse_state_t *state = NULL;
+#if FUSE_KERNEL_MINOR_VERSION >= 12
fuse_private_t *priv = NULL;
- int32_t ret = -1;
+ int32_t ret = -1;
priv = this->private;
-#if FUSE_KERNEL_MINOR_VERSION >= 12
if (priv->proto_minor < 12)
name = (char *)msg + FUSE_COMPAT_MKNOD_IN_SIZE;
#endif
GET_STATE (this, finh, state);
- ret = fuse_loc_fill (&state->loc, state, 0, finh->nodeid, name);
- if (ret < 0) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64" MKNOD %s (fuse_loc_fill() failed)",
- finh->unique, state->loc.path);
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
+
+ gf_uuid_generate (state->gfid);
+
+ fuse_resolve_entry_init (state, &state->resolve, finh->nodeid, name);
+
+ state->mode = fmi->mode;
+ state->rdev = fmi->rdev;
+
+#if FUSE_KERNEL_MINOR_VERSION >=12
+ priv = this->private;
+ FUSE_ENTRY_CREATE(this, priv, finh, state, fmi, "MKNOD");
+#endif
+
+ fuse_resolve_and_resume (state, fuse_mknod_resume);
+
+ return;
+}
+
+void
+fuse_mkdir_resume (fuse_state_t *state)
+{
+ if (!state->loc.parent) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "MKDIR %"PRIu64" (%s/%s) resolution failed",
+ state->finh->nodeid, uuid_utoa (state->resolve.gfid),
+ state->resolve.bname);
+ send_fuse_err (state->this, state->finh,
+ state->resolve.op_errno);
+ free_fuse_state (state);
return;
}
+ if (state->resolve.op_errno == ENOENT) {
+ state->resolve.op_ret = 0;
+ state->resolve.op_errno = 0;
+ }
+
+ if (state->loc.inode) {
+ gf_log (state->this->name, GF_LOG_DEBUG, "inode already present");
+ inode_unref (state->loc.inode);
+ state->loc.inode = NULL;
+ }
+
state->loc.inode = inode_new (state->loc.parent->table);
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": MKNOD %s", finh->unique,
+ "%"PRIu64": MKDIR %s", state->finh->unique,
state->loc.path);
- FUSE_FOP (state, fuse_newentry_cbk, GF_FOP_MKNOD,
- mknod, &state->loc, fmi->mode, fmi->rdev);
-
- return;
+ FUSE_FOP (state, fuse_newentry_cbk, GF_FOP_MKDIR,
+ mkdir, &state->loc, state->mode, state->umask, state->xdata);
}
-
static void
fuse_mkdir (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
struct fuse_mkdir_in *fmi = msg;
char *name = (char *)(fmi + 1);
+#if FUSE_KERNEL_MINOR_VERSION >=12
+ fuse_private_t *priv = NULL;
+ int32_t ret = -1;
+#endif
fuse_state_t *state;
- int32_t ret = -1;
GET_STATE (this, finh, state);
- ret = fuse_loc_fill (&state->loc, state, 0, finh->nodeid, name);
- if (ret < 0) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64" MKDIR %s (fuse_loc_fill() failed)",
- finh->unique, state->loc.path);
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
- return;
- }
- state->loc.inode = inode_new (state->loc.parent->table);
+ gf_uuid_generate (state->gfid);
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": MKDIR %s", finh->unique,
- state->loc.path);
+ fuse_resolve_entry_init (state, &state->resolve, finh->nodeid, name);
- FUSE_FOP (state, fuse_newentry_cbk, GF_FOP_MKDIR,
- mkdir, &state->loc, fmi->mode);
+ state->mode = fmi->mode;
+
+#if FUSE_KERNEL_MINOR_VERSION >=12
+ priv = this->private;
+ FUSE_ENTRY_CREATE(this, priv, finh, state, fmi, "MKDIR");
+#endif
+
+ fuse_resolve_and_resume (state, fuse_mkdir_resume);
return;
}
+void
+fuse_unlink_resume (fuse_state_t *state)
+{
+ if (!state->loc.parent || !state->loc.inode) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "UNLINK %"PRIu64" (%s/%s) resolution failed",
+ state->finh->nodeid, uuid_utoa (state->resolve.gfid),
+ state->resolve.bname);
+ send_fuse_err (state->this, state->finh,
+ state->resolve.op_errno);
+ free_fuse_state (state);
+ return;
+ }
+
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": UNLINK %s", state->finh->unique,
+ state->loc.path);
+
+ FUSE_FOP (state, fuse_unlink_cbk, GF_FOP_UNLINK,
+ unlink, &state->loc, 0, state->xdata);
+}
static void
fuse_unlink (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
char *name = msg;
-
fuse_state_t *state = NULL;
- int32_t ret = -1;
GET_STATE (this, finh, state);
- ret = fuse_loc_fill (&state->loc, state, 0, finh->nodeid, name);
+ fuse_resolve_entry_init (state, &state->resolve, finh->nodeid, name);
- if ((state->loc.inode == NULL) ||
- (ret < 0)) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64": UNLINK %s (fuse_loc_fill() returned NULL inode)",
- finh->unique, state->loc.path);
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
+ fuse_resolve_and_resume (state, fuse_unlink_resume);
+
+ return;
+}
+
+void
+fuse_rmdir_resume (fuse_state_t *state)
+{
+ if (!state->loc.parent || !state->loc.inode) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "RMDIR %"PRIu64" (%s/%s) resolution failed",
+ state->finh->nodeid, uuid_utoa (state->resolve.gfid),
+ state->resolve.bname);
+ send_fuse_err (state->this, state->finh,
+ state->resolve.op_errno);
+ free_fuse_state (state);
return;
}
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": UNLINK %s", finh->unique,
+ "%"PRIu64": RMDIR %s", state->finh->unique,
state->loc.path);
- FUSE_FOP (state, fuse_unlink_cbk, GF_FOP_UNLINK,
- unlink, &state->loc);
-
- return;
+ FUSE_FOP (state, fuse_unlink_cbk, GF_FOP_RMDIR,
+ rmdir, &state->loc, 0, state->xdata);
}
-
static void
fuse_rmdir (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
char *name = msg;
-
fuse_state_t *state = NULL;
- int32_t ret = -1;
GET_STATE (this, finh, state);
- ret = fuse_loc_fill (&state->loc, state, 0, finh->nodeid, name);
- if ((state->loc.inode == NULL) ||
- (ret < 0)) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64": RMDIR %s (fuse_loc_fill() failed)",
- finh->unique, state->loc.path);
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
- return;
- }
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": RMDIR %s", finh->unique,
- state->loc.path);
+ fuse_resolve_entry_init (state, &state->resolve, finh->nodeid, name);
- FUSE_FOP (state, fuse_unlink_cbk, GF_FOP_RMDIR,
- rmdir, &state->loc);
+ fuse_resolve_and_resume (state, fuse_rmdir_resume);
return;
}
-
-static void
-fuse_symlink (xlator_t *this, fuse_in_header_t *finh, void *msg)
+void
+fuse_symlink_resume (fuse_state_t *state)
{
- char *name = msg;
- char *linkname = name + strlen (name) + 1;
+ if (!state->loc.parent) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "SYMLINK %"PRIu64" (%s/%s) -> %s resolution failed",
+ state->finh->nodeid, uuid_utoa (state->resolve.gfid),
+ state->resolve.bname, state->name);
+ send_fuse_err (state->this, state->finh,
+ state->resolve.op_errno);
+ free_fuse_state (state);
+ return;
+ }
- fuse_state_t *state = NULL;
- int32_t ret = -1;
+ if (state->resolve.op_errno == ENOENT) {
+ state->resolve.op_ret = 0;
+ state->resolve.op_errno = 0;
+ }
- GET_STATE (this, finh, state);
- ret = fuse_loc_fill (&state->loc, state, 0, finh->nodeid, name);
- if (ret < 0) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64" SYMLINK %s -> %s (fuse_loc_fill() failed)",
- finh->unique, state->loc.path, linkname);
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
- return;
+ if (state->loc.inode) {
+ gf_log (state->this->name, GF_LOG_DEBUG, "inode already present");
+ inode_unref (state->loc.inode);
+ state->loc.inode = NULL;
}
state->loc.inode = inode_new (state->loc.parent->table);
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": SYMLINK %s -> %s", finh->unique,
- state->loc.path, linkname);
+ "%"PRIu64": SYMLINK %s -> %s", state->finh->unique,
+ state->loc.path, state->name);
FUSE_FOP (state, fuse_newentry_cbk, GF_FOP_SYMLINK,
- symlink, linkname, &state->loc);
+ symlink, state->name, &state->loc, state->umask, state->xdata);
+}
+
+static void
+fuse_symlink (xlator_t *this, fuse_in_header_t *finh, void *msg)
+{
+ char *name = msg;
+ char *linkname = name + strlen (name) + 1;
+ fuse_state_t *state = NULL;
+
+ GET_STATE (this, finh, state);
+
+ gf_uuid_generate (state->gfid);
+
+ fuse_resolve_entry_init (state, &state->resolve, finh->nodeid, name);
+
+ state->name = gf_strdup (linkname);
+
+ fuse_resolve_and_resume (state, fuse_symlink_resume);
return;
}
-
int
fuse_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
fuse_state_t *state = NULL;
fuse_in_header_t *finh = NULL;
@@ -1564,17 +1752,25 @@ fuse_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
state = frame->root->state;
finh = state->finh;
+ fuse_log_eh (this, "op_ret: %d, op_errno: %d, %"PRIu64": %s() "
+ "path: %s parent: %s ==> path: %s parent: %s"
+ "gfid: %s", op_ret, op_errno, frame->root->unique,
+ gf_fop_list[frame->root->op], state->loc.path,
+ state->loc.parent?uuid_utoa (state->loc.parent->gfid):"",
+ state->loc2.path,
+ state->loc2.parent?uuid_utoa (state->loc2.parent->gfid):"",
+ state->loc.inode?uuid_utoa (state->loc.inode->gfid):"");
+
if (op_ret == 0) {
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": %s -> %s => 0 (buf->ia_ino=%"PRId64" , loc->ino=%"PRId64")",
+ "%"PRIu64": %s -> %s => 0 (buf->ia_ino=%"PRIu64")",
frame->root->unique, state->loc.path, state->loc2.path,
- buf->ia_ino, state->loc.ino);
+ buf->ia_ino);
{
/* ugly ugly - to stay blind to situation where
rename happens on a new inode
*/
- buf->ia_ino = state->loc.ino;
buf->ia_type = state->loc.inode->ia_type;
}
buf->ia_blksize = this->ctx->page_size;
@@ -1593,11 +1789,57 @@ fuse_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
send_fuse_err (this, finh, op_errno);
}
- free_state (state);
+ free_fuse_state (state);
STACK_DESTROY (frame->root);
return 0;
}
+void
+fuse_rename_resume (fuse_state_t *state)
+{
+ char loc_uuid[64] = {0,};
+ char loc2_uuid[64] = {0,};
+
+ if (!state->loc.parent || !state->loc.inode) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "RENAME %"PRIu64" %s/%s -> %s/%s src resolution failed",
+ state->finh->unique,
+ uuid_utoa_r (state->resolve.gfid, loc_uuid),
+ state->resolve.bname,
+ uuid_utoa_r (state->resolve2.gfid, loc2_uuid),
+ state->resolve2.bname);
+
+ send_fuse_err (state->this, state->finh,
+ state->resolve.op_errno);
+ free_fuse_state (state);
+ return;
+ }
+
+ if (!state->loc2.parent) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "RENAME %"PRIu64" %s/%s -> %s/%s dst resolution failed",
+ state->finh->unique,
+ uuid_utoa_r (state->resolve.gfid, loc_uuid),
+ state->resolve.bname,
+ uuid_utoa_r (state->resolve2.gfid, loc2_uuid),
+ state->resolve2.bname);
+
+ send_fuse_err (state->this, state->finh, ENOENT);
+ free_fuse_state (state);
+ return;
+ }
+
+ state->resolve.op_ret = 0;
+ state->resolve2.op_ret = 0;
+
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": RENAME `%s (%s)' -> `%s (%s)'",
+ state->finh->unique, state->loc.path, loc_uuid,
+ state->loc2.path, loc2_uuid);
+
+ FUSE_FOP (state, fuse_rename_cbk, GF_FOP_RENAME,
+ rename, &state->loc, &state->loc2, state->xdata);
+}
static void
fuse_rename (xlator_t *this, fuse_in_header_t *finh, void *msg)
@@ -1605,153 +1847,127 @@ fuse_rename (xlator_t *this, fuse_in_header_t *finh, void *msg)
struct fuse_rename_in *fri = msg;
char *oldname = (char *)(fri + 1);
char *newname = oldname + strlen (oldname) + 1;
-
fuse_state_t *state = NULL;
- int32_t ret = -1;
GET_STATE (this, finh, state);
- ret = fuse_loc_fill (&state->loc, state, 0, finh->nodeid, oldname);
- if ((state->loc.inode == NULL) ||
- (ret < 0)) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "for %s %"PRIu64": RENAME `%s' -> `%s' (fuse_loc_fill() failed)",
- state->loc.path, finh->unique, state->loc.path,
- state->loc2.path);
+ fuse_resolve_entry_init (state, &state->resolve, finh->nodeid, oldname);
+
+ fuse_resolve_entry_init (state, &state->resolve2, fri->newdir, newname);
+
+ fuse_resolve_and_resume (state, fuse_rename_resume);
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
+ return;
+}
+
+void
+fuse_link_resume (fuse_state_t *state)
+{
+ if (!state->loc2.inode || !state->loc.parent) {
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "fuse_loc_fill() failed %"PRIu64": LINK %s %s",
+ state->finh->unique, state->loc2.path, state->loc.path);
+ send_fuse_err (state->this, state->finh,
+ state->resolve.op_errno);
+ free_fuse_state (state);
return;
}
- ret = fuse_loc_fill (&state->loc2, state, 0, fri->newdir, newname);
- if (ret < 0) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "for %s %"PRIu64": RENAME `%s' -> `%s' (fuse_loc_fill() failed)",
- state->loc.path, finh->unique, state->loc.path,
- state->loc2.path);
+ state->resolve.op_ret = 0;
+ state->resolve2.op_ret = 0;
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
- return;
- }
+ if (state->loc.inode) {
+ inode_unref (state->loc.inode);
+ state->loc.inode = NULL;
+ }
+ state->loc.inode = inode_ref (state->loc2.inode);
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": RENAME `%s (%"PRId64")' -> `%s (%"PRId64")'",
- finh->unique, state->loc.path, state->loc.ino,
- state->loc2.path, state->loc2.ino);
-
- FUSE_FOP (state, fuse_rename_cbk, GF_FOP_RENAME,
- rename, &state->loc, &state->loc2);
+ "%"PRIu64": LINK() %s -> %s",
+ state->finh->unique, state->loc2.path,
+ state->loc.path);
- return;
+ FUSE_FOP (state, fuse_newentry_cbk, GF_FOP_LINK,
+ link, &state->loc2, &state->loc, state->xdata);
}
-
static void
fuse_link (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
struct fuse_link_in *fli = msg;
char *name = (char *)(fli + 1);
-
fuse_state_t *state = NULL;
- int32_t ret = -1;
GET_STATE (this, finh, state);
- ret = fuse_loc_fill (&state->loc, state, 0, finh->nodeid, name);
- if (ret == 0)
- ret = fuse_loc_fill (&state->loc2, state, fli->oldnodeid, 0,
- NULL);
+ fuse_resolve_inode_init (state, &state->resolve2, fli->oldnodeid);
- if ((state->loc2.inode == NULL) ||
- (ret < 0)) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "fuse_loc_fill() failed for %s %"PRIu64": LINK %s %s",
- state->loc2.path, finh->unique,
- state->loc2.path, state->loc.path);
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
- return;
- }
-
- state->loc.inode = inode_ref (state->loc2.inode);
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": LINK() %s (%"PRId64") -> %s (%"PRId64")",
- finh->unique, state->loc2.path, state->loc2.ino,
- state->loc.path, state->loc.ino);
+ fuse_resolve_entry_init (state, &state->resolve, finh->nodeid, name);
- FUSE_FOP (state, fuse_newentry_cbk, GF_FOP_LINK,
- link, &state->loc2, &state->loc);
+ fuse_resolve_and_resume (state, fuse_link_resume);
return;
}
-
static int
fuse_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
fd_t *fd, inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- fuse_state_t *state = NULL;
- fuse_in_header_t *finh = NULL;
- fuse_private_t *priv = NULL;
- struct fuse_out_header fouh = {0, };
- struct fuse_entry_out feo = {0, };
- struct fuse_open_out foo = {0, };
- struct iovec iov_out[3];
- inode_t *linked_inode = NULL;
-
+ fuse_state_t *state = NULL;
+ fuse_in_header_t *finh = NULL;
+ fuse_private_t *priv = NULL;
+ struct fuse_out_header fouh = {0, };
+ struct fuse_entry_out feo = {0, };
+ struct fuse_open_out foo = {0, };
+ struct iovec iov_out[3];
+ inode_t *linked_inode = NULL;
+ uint64_t ctx_value = LOOKUP_NOT_NEEDED;
state = frame->root->state;
priv = this->private;
finh = state->finh;
foo.open_flags = 0;
+ fuse_log_eh_fop(this, state, frame, op_ret, op_errno);
+
if (op_ret >= 0) {
foo.fh = (uintptr_t) fd;
- if (priv->direct_io_mode)
+ if (((priv->direct_io_mode == 2)
+ && ((state->flags & O_ACCMODE) != O_RDONLY))
+ || (priv->direct_io_mode == 1)
+ || direct_io_mode (xdata))
foo.open_flags |= FOPEN_DIRECT_IO;
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": %s() %s => %p (ino=%"PRId64")",
+ "%"PRIu64": %s() %s => %p (ino=%"PRIu64")",
frame->root->unique, gf_fop_list[frame->root->op],
state->loc.path, fd, buf->ia_ino);
buf->ia_blksize = this->ctx->page_size;
- stat2attr (buf, &feo.attr);
+ gf_fuse_stat2attr (buf, &feo.attr, priv->enable_ino32);
linked_inode = inode_link (inode, state->loc.parent,
state->loc.name, buf);
if (linked_inode != inode) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "create(%s) inode (ptr=%p, ino=%"PRId64", "
- "gen=%"PRId64") found conflict (ptr=%p, "
- "ino=%"PRId64", gen=%"PRId64")",
- state->loc.path, inode, inode->ino,
- inode->generation, linked_inode,
- linked_inode->ino, linked_inode->generation);
-
/*
VERY racy code (if used anywhere else)
-- don't do this without understanding
*/
inode_unref (fd->inode);
fd->inode = inode_ref (linked_inode);
+ } else {
+ inode_ctx_set (linked_inode, this, &ctx_value);
}
inode_lookup (linked_inode);
inode_unref (linked_inode);
- fd_ref (fd);
-
- feo.nodeid = inode_to_nodeid (linked_inode);
-
- feo.generation = linked_inode->generation;
+ feo.nodeid = inode_to_fuse_nodeid (linked_inode);
feo.entry_valid = calc_timeout_sec (priv->entry_timeout);
feo.entry_valid_nsec = calc_timeout_nsec (priv->entry_timeout);
@@ -1771,11 +1987,12 @@ fuse_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
#endif
iov_out[2].iov_base = &foo;
iov_out[2].iov_len = sizeof (foo);
+
if (send_fuse_iov (this, finh, iov_out, 3) == ENOENT) {
gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
"create(%s) got EINTR", state->loc.path);
inode_forget (inode, 1);
- fd_unref (fd);
+ gf_fd_put (priv->fdtable, state->fd_no);
goto out;
}
@@ -1785,110 +2002,196 @@ fuse_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
"%"PRIu64": %s => -1 (%s)", finh->unique,
state->loc.path, strerror (op_errno));
send_fuse_err (this, finh, op_errno);
+ gf_fd_put (priv->fdtable, state->fd_no);
}
out:
- free_state (state);
+ free_fuse_state (state);
STACK_DESTROY (frame->root);
return 0;
}
+void
+fuse_create_resume (fuse_state_t *state)
+{
+ fd_t *fd = NULL;
+ fuse_private_t *priv = NULL;
+ fuse_fd_ctx_t *fdctx = NULL;
+
+ if (!state->loc.parent) {
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "%"PRIu64" CREATE %s/%s resolution failed",
+ state->finh->unique, uuid_utoa (state->resolve.gfid),
+ state->resolve.bname);
+ send_fuse_err (state->this, state->finh,
+ state->resolve.op_errno);
+ free_fuse_state (state);
+ return;
+ }
+
+ if (state->resolve.op_errno == ENOENT) {
+ state->resolve.op_ret = 0;
+ state->resolve.op_errno = 0;
+ }
+
+ if (state->loc.inode) {
+ gf_log (state->this->name, GF_LOG_DEBUG,
+ "inode already present");
+ inode_unref (state->loc.inode);
+ }
+
+ state->loc.inode = inode_new (state->loc.parent->table);
+
+ fd = fd_create (state->loc.inode, state->finh->pid);
+ if (fd == NULL) {
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "%"PRIu64" CREATE cannot create a new fd",
+ state->finh->unique);
+ send_fuse_err (state->this, state->finh, ENOMEM);
+ free_fuse_state (state);
+ return;
+ }
+
+ fdctx = fuse_fd_ctx_check_n_create (state->this, fd);
+ if (fdctx == NULL) {
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "%"PRIu64" CREATE creation of fdctx failed",
+ state->finh->unique);
+ fd_unref (fd);
+ send_fuse_err (state->this, state->finh, ENOMEM);
+ free_fuse_state (state);
+ return;
+ }
+
+ priv = state->this->private;
+
+ state->fd_no = gf_fd_unused_get (priv->fdtable, fd);
+
+ state->fd = fd_ref (fd);
+ fd->flags = state->flags;
+
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": CREATE %s", state->finh->unique,
+ state->loc.path);
+
+ FUSE_FOP (state, fuse_create_cbk, GF_FOP_CREATE,
+ create, &state->loc, state->flags, state->mode,
+ state->umask, fd, state->xdata);
+
+}
static void
fuse_create (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
#if FUSE_KERNEL_MINOR_VERSION >= 12
struct fuse_create_in *fci = msg;
+ fuse_private_t *priv = NULL;
+ int32_t ret = -1;
#else
struct fuse_open_in *fci = msg;
#endif
char *name = (char *)(fci + 1);
- fuse_private_t *priv = NULL;
fuse_state_t *state = NULL;
- fd_t *fd = NULL;
- int32_t ret = -1;
- priv = this->private;
#if FUSE_KERNEL_MINOR_VERSION >= 12
+ priv = this->private;
if (priv->proto_minor < 12)
name = (char *)((struct fuse_open_in *)msg + 1);
#endif
GET_STATE (this, finh, state);
+
+ gf_uuid_generate (state->gfid);
+
+ fuse_resolve_entry_init (state, &state->resolve, finh->nodeid, name);
+
+ state->mode = fci->mode;
state->flags = fci->flags;
- ret = fuse_loc_fill (&state->loc, state, 0, finh->nodeid, name);
- if (ret < 0) {
+#if FUSE_KERNEL_MINOR_VERSION >=12
+ priv = this->private;
+ FUSE_ENTRY_CREATE(this, priv, finh, state, fci, "CREATE");
+#endif
+ fuse_resolve_and_resume (state, fuse_create_resume);
+
+ return;
+}
+
+void
+fuse_open_resume (fuse_state_t *state)
+{
+ fd_t *fd = NULL;
+ fuse_private_t *priv = NULL;
+ fuse_fd_ctx_t *fdctx = NULL;
+
+ if (!state->loc.inode) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRIu64": OPEN %s resolution failed",
+ state->finh->unique, uuid_utoa (state->resolve.gfid));
+
+ send_fuse_err (state->this, state->finh,
+ state->resolve.op_errno);
+ free_fuse_state (state);
+ return;
+ }
+
+ fd = fd_create (state->loc.inode, state->finh->pid);
+ if (!fd) {
+ gf_log ("fuse", GF_LOG_ERROR,
+ "fd is NULL");
+ send_fuse_err (state->this, state->finh, ENOENT);
+ free_fuse_state (state);
+ return;
+ }
+
+ fdctx = fuse_fd_ctx_check_n_create (state->this, fd);
+ if (fdctx == NULL) {
gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64" CREATE %s (fuse_loc_fill() failed)",
- finh->unique, state->loc.path);
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
+ "%"PRIu64": OPEN creation of fdctx failed",
+ state->finh->unique);
+ fd_unref (fd);
+ send_fuse_err (state->this, state->finh, ENOMEM);
+ free_fuse_state (state);
return;
}
- state->loc.inode = inode_new (state->loc.parent->table);
+ priv = state->this->private;
- fd = fd_create (state->loc.inode, finh->pid);
- state->fd = fd;
+ state->fd_no = gf_fd_unused_get (priv->fdtable, fd);
+ state->fd = fd_ref (fd);
fd->flags = state->flags;
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": CREATE %s", finh->unique,
+ "%"PRIu64": OPEN %s", state->finh->unique,
state->loc.path);
- FUSE_FOP (state, fuse_create_cbk, GF_FOP_CREATE,
- create, &state->loc, state->flags, fci->mode, fd);
-
- return;
+ FUSE_FOP (state, fuse_fd_cbk, GF_FOP_OPEN,
+ open, &state->loc, state->flags, fd, state->xdata);
}
-
static void
fuse_open (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
struct fuse_open_in *foi = msg;
-
fuse_state_t *state = NULL;
- fd_t *fd = NULL;
- int32_t ret = -1;
GET_STATE (this, finh, state);
- state->flags = foi->flags;
-
- ret = fuse_loc_fill (&state->loc, state, finh->nodeid, 0, NULL);
- if ((state->loc.inode == NULL) ||
- (ret < 0)) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64": OPEN %s (fuse_loc_fill() failed)",
- finh->unique, state->loc.path);
-
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
- return;
- }
- fd = fd_create (state->loc.inode, finh->pid);
- state->fd = fd;
- fd->flags = foi->flags;
+ fuse_resolve_inode_init (state, &state->resolve, finh->nodeid);
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": OPEN %s", finh->unique,
- state->loc.path);
+ state->flags = foi->flags;
- FUSE_FOP (state, fuse_fd_cbk, GF_FOP_OPEN,
- open, &state->loc, foi->flags, fd, 0);
+ fuse_resolve_and_resume (state, fuse_open_resume);
return;
}
-
static int
fuse_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
struct iovec *vector, int32_t count,
- struct iatt *stbuf, struct iobref *iobref)
+ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
{
fuse_state_t *state = NULL;
fuse_in_header_t *finh = NULL;
@@ -1898,9 +2201,11 @@ fuse_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
state = frame->root->state;
finh = state->finh;
+ fuse_log_eh_fop(this, state, frame, op_ret, op_errno);
+
if (op_ret >= 0) {
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": READ => %d/%"GF_PRI_SIZET",%"PRId64"/%"PRId64,
+ "%"PRIu64": READ => %d/%"GF_PRI_SIZET",%"PRId64"/%"PRIu64,
frame->root->unique,
op_ret, state->size, state->off, stbuf->ia_size);
@@ -1916,57 +2221,70 @@ fuse_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
send_fuse_err (this, finh, ENOMEM);
} else {
gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64": READ => %d (%s)", frame->root->unique,
- op_ret, strerror (op_errno));
+ "%"PRIu64": READ => %d gfid=%s fd=%p (%s)",
+ frame->root->unique, op_ret,
+ (state->fd && state->fd->inode) ?
+ uuid_utoa (state->fd->inode->gfid) : "nil",
+ state->fd, strerror (op_errno));
send_fuse_err (this, finh, op_errno);
}
- free_state (state);
+ free_fuse_state (state);
STACK_DESTROY (frame->root);
return 0;
}
+void
+fuse_readv_resume (fuse_state_t *state)
+{
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": READ (%p, size=%zu, offset=%"PRIu64")",
+ state->finh->unique, state->fd, state->size, state->off);
+
+ FUSE_FOP (state, fuse_readv_cbk, GF_FOP_READ, readv, state->fd,
+ state->size, state->off, state->io_flags, state->xdata);
+}
static void
fuse_readv (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
struct fuse_read_in *fri = msg;
+#if FUSE_KERNEL_MINOR_VERSION >= 9
fuse_private_t *priv = NULL;
+#endif
fuse_state_t *state = NULL;
fd_t *fd = NULL;
GET_STATE (this, finh, state);
- state->size = fri->size;
- state->off = fri->offset;
-
fd = FH_TO_FD (fri->fh);
state->fd = fd;
+ fuse_resolve_fd_init (state, &state->resolve, fd);
+
/* See comment by similar code in fuse_settatr */
- priv = this->private;
#if FUSE_KERNEL_MINOR_VERSION >= 9
+ priv = this->private;
if (priv->proto_minor >= 9 && fri->read_flags & FUSE_READ_LOCKOWNER)
state->lk_owner = fri->lock_owner;
#endif
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": READ (%p, size=%"PRIu32", offset=%"PRIu64")",
- finh->unique, fd, fri->size, fri->offset);
-
- FUSE_FOP (state, fuse_readv_cbk, GF_FOP_READ,
- readv, fd, fri->size, fri->offset);
-
+ state->size = fri->size;
+ state->off = fri->offset;
+ /* lets ignore 'fri->read_flags', but just consider 'fri->flags' */
+#if FUSE_KERNEL_MINOR_VERSION >= 9
+ state->io_flags = fri->flags;
+#endif
+ fuse_resolve_and_resume (state, fuse_readv_resume);
}
-
static int
fuse_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *stbuf, struct iatt *postbuf)
+ struct iatt *stbuf, struct iatt *postbuf, dict_t *xdata)
{
fuse_state_t *state = NULL;
fuse_in_header_t *finh = NULL;
@@ -1975,9 +2293,11 @@ fuse_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
state = frame->root->state;
finh = state->finh;
+ fuse_log_eh_fop(this, state, frame, op_ret, op_errno);
+
if (op_ret >= 0) {
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": WRITE => %d/%"GF_PRI_SIZET",%"PRId64"/%"PRId64,
+ "%"PRIu64": WRITE => %d/%"GF_PRI_SIZET",%"PRId64"/%"PRIu64,
frame->root->unique,
op_ret, state->size, state->off, stbuf->ia_size);
@@ -1985,18 +2305,52 @@ fuse_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
send_fuse_obj (this, finh, &fwo);
} else {
gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64": WRITE => -1 (%s)", frame->root->unique,
+ "%"PRIu64": WRITE => -1 gfid=%s fd=%p (%s)",
+ frame->root->unique,
+ (state->fd && state->fd->inode) ?
+ uuid_utoa (state->fd->inode->gfid) : "nil", state->fd,
strerror (op_errno));
send_fuse_err (this, finh, op_errno);
}
- free_state (state);
+ free_fuse_state (state);
STACK_DESTROY (frame->root);
return 0;
}
+void
+fuse_write_resume (fuse_state_t *state)
+{
+ struct iobref *iobref = NULL;
+ struct iobuf *iobuf = NULL;
+
+
+ iobref = iobref_new ();
+ if (!iobref) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRIu64": WRITE iobref allocation failed",
+ state->finh->unique);
+ send_fuse_err (state->this, state->finh, ENOMEM);
+
+ free_fuse_state (state);
+ return;
+ }
+
+ iobuf = ((fuse_private_t *) (state->this->private))->iobuf;
+ iobref_add (iobref, iobuf);
+
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": WRITE (%p, size=%"GF_PRI_SIZET", offset=%"PRId64")",
+ state->finh->unique, state->fd, state->size, state->off);
+
+ FUSE_FOP (state, fuse_writev_cbk, GF_FOP_WRITE, writev, state->fd,
+ &state->vector, 1, state->off, state->io_flags, iobref,
+ state->xdata);
+
+ iobref_unref (iobref);
+}
static void
fuse_write (xlator_t *this, fuse_in_header_t *finh, void *msg)
@@ -2007,53 +2361,113 @@ fuse_write (xlator_t *this, fuse_in_header_t *finh, void *msg)
struct fuse_write_in *fwi = (struct fuse_write_in *)
(finh + 1);
- fuse_private_t *priv = NULL;
fuse_state_t *state = NULL;
- struct iovec vector;
fd_t *fd = NULL;
- struct iobref *iobref = NULL;
- struct iobuf *iobuf = NULL;
-
+#if FUSE_KERNEL_MINOR_VERSION >= 9
+ fuse_private_t *priv = NULL;
priv = this->private;
+#endif
GET_STATE (this, finh, state);
- state->size = fwi->size;
- state->off = fwi->offset;
fd = FH_TO_FD (fwi->fh);
state->fd = fd;
- vector.iov_base = msg;
- vector.iov_len = fwi->size;
+ state->size = fwi->size;
+ state->off = fwi->offset;
+
+ /* lets ignore 'fwi->write_flags', but just consider 'fwi->flags' */
+#if FUSE_KERNEL_MINOR_VERSION >= 9
+ state->io_flags = fwi->flags;
+#else
+ state->io_flags = fwi->write_flags;
+#endif
+ /* TODO: may need to handle below flag
+ (fwi->write_flags & FUSE_WRITE_CACHE);
+ */
+
+
+ fuse_resolve_fd_init (state, &state->resolve, fd);
/* See comment by similar code in fuse_settatr */
- priv = this->private;
#if FUSE_KERNEL_MINOR_VERSION >= 9
+ priv = this->private;
if (priv->proto_minor >= 9 && fwi->write_flags & FUSE_WRITE_LOCKOWNER)
state->lk_owner = fwi->lock_owner;
#endif
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": WRITE (%p, size=%"PRIu32", offset=%"PRId64")",
- finh->unique, fd, fwi->size, fwi->offset);
+ state->vector.iov_base = msg;
+ state->vector.iov_len = fwi->size;
- iobref = iobref_new ();
- if (!iobref) {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "%"PRIu64": WRITE iobref allocation failed",
- finh->unique);
+ fuse_resolve_and_resume (state, fuse_write_resume);
- free_state (state);
- return;
+ return;
+}
+
+#if FUSE_KERNEL_MINOR_VERSION >= 24 && HAVE_SEEK_HOLE
+static int
+fuse_lseek_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, off_t offset, dict_t *xdata)
+{
+ fuse_state_t *state = frame->root->state;
+ fuse_in_header_t *finh = state->finh;
+ struct fuse_lseek_out flo = {0, };
+
+ fuse_log_eh_fop (this, state, frame, op_ret, op_errno);
+
+ if (op_ret >= 0) {
+ flo.offset = offset;
+ send_fuse_obj (this, finh, &flo);
+ } else {
+ send_fuse_err (this, finh, op_errno);
}
- iobuf = ((fuse_private_t *) (state->this->private))->iobuf;
- iobref_add (iobref, iobuf);
- FUSE_FOP (state, fuse_writev_cbk, GF_FOP_WRITE,
- writev, fd, &vector, 1, fwi->offset, iobref);
+ free_fuse_state (state);
+ STACK_DESTROY (frame->root);
- iobref_unref (iobref);
- return;
+ return 0;
+}
+
+static void
+fuse_lseek_resume (fuse_state_t *state)
+{
+ FUSE_FOP (state, fuse_lseek_cbk, GF_FOP_SEEK, seek, state->fd,
+ state->off, state->whence, state->xdata);
+}
+
+static void
+fuse_lseek (xlator_t *this, fuse_in_header_t *finh, void *msg)
+{
+ struct fuse_lseek_in *ffi = msg;
+ fuse_state_t *state = NULL;
+
+ GET_STATE (this, finh, state);
+ state->fd = FH_TO_FD (ffi->fh);
+ state->off = ffi->offset;
+
+ switch (ffi->whence) {
+ case SEEK_DATA:
+ state->whence = GF_SEEK_DATA;
+ break;
+ case SEEK_HOLE:
+ state->whence = GF_SEEK_HOLE;
+ break;
+ default:
+ /* fuse should handle other whence internally */
+ send_fuse_err (this, finh, EINVAL);
+ free_fuse_state (state);
+ return;
+ }
+
+ fuse_resolve_fd_init (state, &state->resolve, state->fd);
+ fuse_resolve_and_resume (state, fuse_lseek_resume);
}
+#endif /* FUSE_KERNEL_MINOR_VERSION >= 24 && HAVE_SEEK_HOLE */
+void
+fuse_flush_resume (fuse_state_t *state)
+{
+ FUSE_FOP (state, fuse_err_cbk, GF_FOP_FLUSH,
+ flush, state->fd, state->xdata);
+}
static void
fuse_flush (xlator_t *this, fuse_in_header_t *finh, void *msg)
@@ -2066,88 +2480,88 @@ fuse_flush (xlator_t *this, fuse_in_header_t *finh, void *msg)
GET_STATE (this, finh, state);
fd = FH_TO_FD (ffi->fh);
state->fd = fd;
- if (fd)
- fd->flush_unique = finh->unique;
+
+ fuse_resolve_fd_init (state, &state->resolve, fd);
state->lk_owner = ffi->lock_owner;
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
"%"PRIu64": FLUSH %p", finh->unique, fd);
- FUSE_FOP (state, fuse_err_cbk, GF_FOP_FLUSH,
- flush, fd);
+ fuse_resolve_and_resume (state, fuse_flush_resume);
return;
}
+int
+fuse_internal_release (xlator_t *this, fd_t *fd)
+{
+ //This is a place holder function to prevent "xlator does not implement
+ //release_cbk" Warning log.
+ //Actual release happens as part of fuse_release which gets executed
+ //when kernel fuse sends it.
+ return 0;
+}
static void
fuse_release (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
- struct fuse_release_in *fri = msg;
-
- fd_t *fd = NULL;
- int do_flush = 0;
-
- fuse_state_t *state = NULL;
+ struct fuse_release_in *fri = msg;
+ fd_t *activefd = NULL;
+ fd_t *fd = NULL;
+ uint64_t val = 0;
+ int ret = 0;
+ fuse_state_t *state = NULL;
+ fuse_fd_ctx_t *fdctx = NULL;
+ fuse_private_t *priv = NULL;
GET_STATE (this, finh, state);
fd = FH_TO_FD (fri->fh);
state->fd = fd;
-#ifdef GF_LINUX_HOST_OS
- /* This is an ugly Linux specific hack, relying on subtle
- * implementation details.
- *
- * The self-heal algorithm of replicate relies on being
- * notified by means of a flush fop whenever a consumer
- * of a file is done with that file. If this happens
- * from userspace by means of close(2) or process termination,
- * the kernel sends us a FLUSH message which we can handle with
- * the flush fop (nb. this mechanism itself is Linux specific!!).
- *
- * However, if it happens from a kernel context, we get no FLUSH,
- * just the final RELEASE when all references to the file are gone.
- * We try to guess that this is the case by checking if the last FLUSH
- * on the file was just the previous message. If not, we conjecture
- * that this release is from a kernel context and call the flush fop
- * here.
- *
- * Note #1: we check the above condition by means of looking at
- * the "unique" values of the FUSE messages, relying on which is
- * a big fat NO NO NO in any sane code.
- *
- * Note #2: there is no guarantee against false positives (in theory
- * it's possible that the scheduler arranges an unrelated FUSE message
- * in between FLUSH and RELEASE, although it seems to be unlikely), but
- * extra flushes are not a problem.
- *
- * Note #3: cf. Bug #223.
- */
+ priv = this->private;
- if (fd && fd->flush_unique + 1 != finh->unique)
- do_flush = 1;
-#endif
+ fuse_log_eh (this, "RELEASE(): %"PRIu64":, fd: %p, gfid: %s",
+ finh->unique, fd, uuid_utoa (fd->inode->gfid));
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": RELEASE %p%s", finh->unique, fd,
- do_flush ? " (FLUSH implied)" : "");
+ "%"PRIu64": RELEASE %p", finh->unique, state->fd);
+
+ ret = fd_ctx_del (fd, this, &val);
+ if (!ret) {
+ fdctx = (fuse_fd_ctx_t *)(unsigned long)val;
+ if (fdctx) {
+ activefd = fdctx->activefd;
+ if (activefd) {
+ fd_unref (activefd);
+ }
- if (do_flush) {
- state->lk_owner = (uint64_t)-1;
- FUSE_FOP (state, fuse_err_cbk, GF_FOP_FLUSH, flush, fd);
- fd_unref (fd);
- } else {
- fd_unref (fd);
+ GF_FREE (fdctx);
+ }
+ }
+ fd_unref (fd);
- send_fuse_err (this, finh, 0);
+ state->fd = NULL;
- free_state (state);
- }
+ gf_fdptr_put (priv->fdtable, fd);
+ send_fuse_err (this, finh, 0);
+
+ free_fuse_state (state);
return;
}
+void
+fuse_fsync_resume (fuse_state_t *state)
+{
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": FSYNC %p", state->finh->unique,
+ state->fd);
+
+ /* fsync_flags: 1 means "datasync" (no defines for this) */
+ FUSE_FOP (state, fuse_fsync_cbk, GF_FOP_FSYNC,
+ fsync, state->fd, (state->flags & 1), state->xdata);
+}
static void
fuse_fsync (xlator_t *this, fuse_in_header_t *finh, void *msg)
@@ -2161,52 +2575,79 @@ fuse_fsync (xlator_t *this, fuse_in_header_t *finh, void *msg)
fd = FH_TO_FD (fsi->fh);
state->fd = fd;
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": FSYNC %p", finh->unique, fd);
-
- /* fsync_flags: 1 means "datasync" (no defines for this) */
- FUSE_FOP (state, fuse_fsync_cbk, GF_FOP_FSYNC,
- fsync, fd, fsi->fsync_flags & 1);
+ fuse_resolve_fd_init (state, &state->resolve, fd);
+ state->flags = fsi->fsync_flags;
+ fuse_resolve_and_resume (state, fuse_fsync_resume);
return;
}
-
-static void
-fuse_opendir (xlator_t *this, fuse_in_header_t *finh, void *msg)
+void
+fuse_opendir_resume (fuse_state_t *state)
{
- /*
- struct fuse_open_in *foi = msg;
- */
+ fd_t *fd = NULL;
+ fuse_private_t *priv = NULL;
+ fuse_fd_ctx_t *fdctx = NULL;
- fuse_state_t *state = NULL;
- fd_t *fd = NULL;
- int32_t ret = -1;
+ priv = state->this->private;
- GET_STATE (this, finh, state);
- ret = fuse_loc_fill (&state->loc, state, finh->nodeid, 0, NULL);
- if ((state->loc.inode == NULL) ||
- (ret < 0)) {
+ if (!state->loc.inode) {
gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64": OPENDIR %s (fuse_loc_fill() failed)",
- finh->unique, state->loc.path);
+ "%"PRIu64": OPENDIR (%s) resolution failed",
+ state->finh->unique, uuid_utoa (state->resolve.gfid));
+ send_fuse_err (state->this, state->finh,
+ state->resolve.op_errno);
+ free_fuse_state (state);
+ return;
+ }
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
+ fd = fd_create (state->loc.inode, state->finh->pid);
+ if (fd == NULL) {
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "%"PRIu64": OPENDIR fd creation failed",
+ state->finh->unique);
+ send_fuse_err (state->this, state->finh, ENOMEM);
+ free_fuse_state (state);
return;
}
- fd = fd_create (state->loc.inode, finh->pid);
- state->fd = fd;
+ fdctx = fuse_fd_ctx_check_n_create (state->this, fd);
+ if (fdctx == NULL) {
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "%"PRIu64": OPENDIR creation of fdctx failed",
+ state->finh->unique);
+ fd_unref (fd);
+ send_fuse_err (state->this, state->finh, ENOMEM);
+ free_fuse_state (state);
+ return;
+ }
+
+ state->fd = fd_ref (fd);
+ state->fd_no = gf_fd_unused_get (priv->fdtable, fd);
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": OPENDIR %s", finh->unique,
+ "%"PRIu64": OPENDIR %s", state->finh->unique,
state->loc.path);
FUSE_FOP (state, fuse_fd_cbk, GF_FOP_OPENDIR,
- opendir, &state->loc, fd);
+ opendir, &state->loc, fd, state->xdata);
}
+static void
+fuse_opendir (xlator_t *this, fuse_in_header_t *finh, void *msg)
+{
+ /*
+ struct fuse_open_in *foi = msg;
+ */
+
+ fuse_state_t *state = NULL;
+
+ GET_STATE (this, finh, state);
+
+ fuse_resolve_inode_init (state, &state->resolve, finh->nodeid);
+
+ fuse_resolve_and_resume (state, fuse_opendir_resume);
+}
unsigned char
d_type_from_stat (struct iatt *buf)
@@ -2241,20 +2682,25 @@ d_type_from_stat (struct iatt *buf)
return d_type;
}
-
static int
fuse_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
fuse_state_t *state = NULL;
fuse_in_header_t *finh = NULL;
- int size = 0;
+ size_t size = 0;
+ size_t max_size = 0;
char *buf = NULL;
gf_dirent_t *entry = NULL;
struct fuse_dirent *fde = NULL;
+ fuse_private_t *priv = NULL;
state = frame->root->state;
finh = state->finh;
+ priv = state->this->private;
+
+ fuse_log_eh_fop(this, state, frame, op_ret, op_errno);
if (op_ret < 0) {
gf_log ("glusterfs-fuse", GF_LOG_WARNING,
@@ -2270,11 +2716,23 @@ fuse_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
frame->root->unique, op_ret, state->size, state->off);
list_for_each_entry (entry, &entries->list, list) {
- size += FUSE_DIRENT_ALIGN (FUSE_NAME_OFFSET +
- strlen (entry->d_name));
+ size_t fde_size = FUSE_DIRENT_ALIGN (FUSE_NAME_OFFSET +
+ strlen (entry->d_name));
+ max_size += fde_size;
+
+ if (max_size > state->size) {
+ /* we received too many entries to fit in the reply */
+ max_size -= fde_size;
+ break;
+ }
}
- buf = GF_CALLOC (1, size, gf_fuse_mt_char);
+ if (max_size == 0) {
+ send_fuse_data (this, finh, 0, 0);
+ goto out;
+ }
+
+ buf = GF_CALLOC (1, max_size, gf_fuse_mt_char);
if (!buf) {
gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
"%"PRIu64": READDIR => -1 (%s)", frame->root->unique,
@@ -2286,24 +2744,36 @@ fuse_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
size = 0;
list_for_each_entry (entry, &entries->list, list) {
fde = (struct fuse_dirent *)(buf + size);
- fde->ino = entry->d_ino;
- fde->off = entry->d_off;
- fde->namelen = strlen (entry->d_name);
- strncpy (fde->name, entry->d_name, fde->namelen);
+ gf_fuse_fill_dirent (entry, fde, priv->enable_ino32);
size += FUSE_DIRENT_SIZE (fde);
+
+ if (size == max_size)
+ break;
}
send_fuse_data (this, finh, buf, size);
+ /* TODO: */
+ /* gf_link_inodes_from_dirent (this, state->fd->inode, entries); */
+
out:
- free_state (state);
+ free_fuse_state (state);
STACK_DESTROY (frame->root);
- if (buf)
- GF_FREE (buf);
+ GF_FREE (buf);
return 0;
}
+void
+fuse_readdir_resume (fuse_state_t *state)
+{
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": READDIR (%p, size=%"GF_PRI_SIZET", offset=%"PRId64")",
+ state->finh->unique, state->fd, state->size, state->off);
+
+ FUSE_FOP (state, fuse_readdir_cbk, GF_FOP_READDIR,
+ readdir, state->fd, state->size, state->off, state->xdata);
+}
static void
fuse_readdir (xlator_t *this, fuse_in_header_t *finh, void *msg)
@@ -2319,37 +2789,266 @@ fuse_readdir (xlator_t *this, fuse_in_header_t *finh, void *msg)
fd = FH_TO_FD (fri->fh);
state->fd = fd;
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": READDIR (%p, size=%"PRIu32", offset=%"PRId64")",
- finh->unique, fd, fri->size, fri->offset);
+ fuse_resolve_fd_init (state, &state->resolve, fd);
- FUSE_FOP (state, fuse_readdir_cbk, GF_FOP_READDIR,
- readdir, fd, fri->size, fri->offset);
+ fuse_resolve_and_resume (state, fuse_readdir_resume);
+}
+
+#if FUSE_KERNEL_MINOR_VERSION >= 20
+static int
+fuse_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ fuse_state_t *state = NULL;
+ fuse_in_header_t *finh = NULL;
+ size_t max_size = 0;
+ size_t size = 0;
+ char *buf = NULL;
+ gf_dirent_t *entry = NULL;
+ struct fuse_direntplus *fde = NULL;
+ struct fuse_entry_out *feo = NULL;
+ fuse_private_t *priv = NULL;
+
+ state = frame->root->state;
+ finh = state->finh;
+ priv = this->private;
+
+ if (op_ret < 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "%"PRIu64": READDIRP => -1 (%s)", frame->root->unique,
+ strerror (op_errno));
+
+ send_fuse_err (this, finh, op_errno);
+ goto out;
+ }
+
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": READDIRP => %d/%"GF_PRI_SIZET",%"PRId64,
+ frame->root->unique, op_ret, state->size, state->off);
+
+ list_for_each_entry (entry, &entries->list, list) {
+ size_t fdes = FUSE_DIRENT_ALIGN (FUSE_NAME_OFFSET_DIRENTPLUS +
+ strlen (entry->d_name));
+ max_size += fdes;
+
+ if (max_size > state->size) {
+ /* we received too many entries to fit in the reply */
+ max_size -= fdes;
+ break;
+ }
+ }
+
+ if (max_size == 0) {
+ send_fuse_data (this, finh, 0, 0);
+ goto out;
+ }
+
+ buf = GF_CALLOC (1, max_size, gf_fuse_mt_char);
+ if (!buf) {
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRIu64": READDIRP => -1 (%s)", frame->root->unique,
+ strerror (ENOMEM));
+ send_fuse_err (this, finh, ENOMEM);
+ goto out;
+ }
+
+ size = 0;
+ list_for_each_entry (entry, &entries->list, list) {
+ inode_t *linked_inode;
+
+ fde = (struct fuse_direntplus *)(buf + size);
+ feo = &fde->entry_out;
+
+ if (priv->enable_ino32)
+ fde->dirent.ino = GF_FUSE_SQUASH_INO(entry->d_ino);
+ else
+ fde->dirent.ino = entry->d_ino;
+
+ fde->dirent.off = entry->d_off;
+ fde->dirent.type = entry->d_type;
+ fde->dirent.namelen = strlen (entry->d_name);
+ strncpy (fde->dirent.name, entry->d_name, fde->dirent.namelen);
+ size += FUSE_DIRENTPLUS_SIZE (fde);
+
+ if (!entry->inode)
+ goto next_entry;
+
+ entry->d_stat.ia_blksize = this->ctx->page_size;
+ gf_fuse_stat2attr (&entry->d_stat, &feo->attr, priv->enable_ino32);
+
+ linked_inode = inode_link (entry->inode, state->fd->inode,
+ entry->d_name, &entry->d_stat);
+ if (!linked_inode)
+ goto next_entry;
+
+ feo->nodeid = inode_to_fuse_nodeid (linked_inode);
+
+ if (!((strcmp (entry->d_name, ".") == 0) ||
+ (strcmp (entry->d_name, "..") == 0))) {
+ inode_lookup (linked_inode);
+ inode_set_need_lookup (linked_inode, this);
+ }
+
+ inode_unref (linked_inode);
+
+ feo->entry_valid =
+ calc_timeout_sec (priv->entry_timeout);
+ feo->entry_valid_nsec =
+ calc_timeout_nsec (priv->entry_timeout);
+ feo->attr_valid =
+ calc_timeout_sec (priv->attribute_timeout);
+ feo->attr_valid_nsec =
+ calc_timeout_nsec (priv->attribute_timeout);
+
+next_entry:
+ if (size == max_size)
+ break;
+ }
+
+ send_fuse_data (this, finh, buf, size);
+out:
+ free_fuse_state (state);
+ STACK_DESTROY (frame->root);
+ GF_FREE (buf);
+ return 0;
+
+}
+
+void
+fuse_readdirp_resume (fuse_state_t *state)
+{
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": READDIRP (%p, size=%"GF_PRI_SIZET", offset=%"PRId64")",
+ state->finh->unique, state->fd, state->size, state->off);
+
+ FUSE_FOP (state, fuse_readdirp_cbk, GF_FOP_READDIRP,
+ readdirp, state->fd, state->size, state->off, state->xdata);
}
static void
-fuse_releasedir (xlator_t *this, fuse_in_header_t *finh, void *msg)
+fuse_readdirp (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
- struct fuse_release_in *fri = msg;
+ struct fuse_read_in *fri = msg;
- fuse_state_t *state = NULL;
+ fuse_state_t *state = NULL;
+ fd_t *fd = NULL;
+
+ GET_STATE (this, finh, state);
+ state->size = fri->size;
+ state->off = fri->offset;
+ fd = FH_TO_FD (fri->fh);
+ state->fd = fd;
+
+ fuse_resolve_fd_init (state, &state->resolve, fd);
+
+ fuse_resolve_and_resume (state, fuse_readdirp_resume);
+}
+#endif
+
+#if FUSE_KERNEL_MINOR_VERSION >= 19
+#ifdef FALLOC_FL_KEEP_SIZE
+static int
+fuse_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ return fuse_err_cbk(frame, cookie, this, op_ret, op_errno, xdata);
+}
+
+static void
+fuse_fallocate_resume(fuse_state_t *state)
+{
+ gf_log("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": FALLOCATE (%p, flags=%d, size=%zu, offset=%"PRId64")",
+ state->finh->unique, state->fd, state->flags, state->size,
+ state->off);
+
+ if (state->flags & FALLOC_FL_PUNCH_HOLE)
+ FUSE_FOP(state, fuse_fallocate_cbk, GF_FOP_DISCARD, discard,
+ state->fd, state->off, state->size, state->xdata);
+ else
+ FUSE_FOP(state, fuse_fallocate_cbk, GF_FOP_FALLOCATE, fallocate,
+ state->fd, (state->flags & FALLOC_FL_KEEP_SIZE),
+ state->off, state->size, state->xdata);
+}
+
+static void
+fuse_fallocate(xlator_t *this, fuse_in_header_t *finh, void *msg)
+{
+ struct fuse_fallocate_in *ffi = msg;
+ fuse_state_t *state = NULL;
+
+ GET_STATE(this, finh, state);
+ state->off = ffi->offset;
+ state->size = ffi->length;
+ state->flags = ffi->mode;
+ state->fd = FH_TO_FD(ffi->fh);
+
+ fuse_resolve_fd_init(state, &state->resolve, state->fd);
+ fuse_resolve_and_resume(state, fuse_fallocate_resume);
+}
+#endif /* FALLOC_FL_KEEP_SIZE */
+#endif /* FUSE minor version >= 19 */
+
+static void
+fuse_releasedir (xlator_t *this, fuse_in_header_t *finh, void *msg)
+{
+ struct fuse_release_in *fri = msg;
+ fd_t *activefd = NULL;
+ uint64_t val = 0;
+ int ret = 0;
+ fuse_state_t *state = NULL;
+ fuse_fd_ctx_t *fdctx = NULL;
+ fuse_private_t *priv = NULL;
GET_STATE (this, finh, state);
state->fd = FH_TO_FD (fri->fh);
+ priv = this->private;
+
+ fuse_log_eh (this, "RELEASEDIR (): %"PRIu64": fd: %p, gfid: %s",
+ finh->unique, state->fd,
+ uuid_utoa (state->fd->inode->gfid));
+
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
"%"PRIu64": RELEASEDIR %p", finh->unique, state->fd);
+ ret = fd_ctx_del (state->fd, this, &val);
+
+ if (!ret) {
+ fdctx = (fuse_fd_ctx_t *)(unsigned long)val;
+ if (fdctx) {
+ activefd = fdctx->activefd;
+ if (activefd) {
+ fd_unref (activefd);
+ }
+
+ GF_FREE (fdctx);
+ }
+ }
+
fd_unref (state->fd);
+ gf_fdptr_put (priv->fdtable, state->fd);
+
+ state->fd = NULL;
+
send_fuse_err (this, finh, 0);
- free_state (state);
+ free_fuse_state (state);
return;
}
+void
+fuse_fsyncdir_resume (fuse_state_t *state)
+{
+ FUSE_FOP (state, fuse_err_cbk, GF_FOP_FSYNCDIR,
+ fsyncdir, state->fd, (state->flags & 1), state->xdata);
+
+}
static void
fuse_fsyncdir (xlator_t *this, fuse_in_header_t *finh, void *msg)
@@ -2364,16 +3063,18 @@ fuse_fsyncdir (xlator_t *this, fuse_in_header_t *finh, void *msg)
GET_STATE (this, finh, state);
state->fd = fd;
- FUSE_FOP (state, fuse_err_cbk, GF_FOP_FSYNCDIR,
- fsyncdir, fd, fsi->fsync_flags & 1);
+ fuse_resolve_fd_init (state, &state->resolve, fd);
+
+ state->flags = fsi->fsync_flags;
+ fuse_resolve_and_resume (state, fuse_fsyncdir_resume);
return;
}
-
static int
fuse_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf)
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata)
{
fuse_state_t *state = NULL;
fuse_in_header_t *finh = NULL;
@@ -2383,19 +3084,10 @@ fuse_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
state = frame->root->state;
priv = this->private;
finh = state->finh;
- /*
- Filesystems (like ZFS on solaris) reports
- different ->f_frsize and ->f_bsize. Old coreutils
- df tools use statfs() and do not see ->f_frsize.
- the ->f_blocks, ->f_bavail and ->f_bfree are
- w.r.t ->f_frsize and not ->f_bsize which makes the
- df tools report wrong values.
-
- Scale the block counts to match ->f_bsize.
- */
- /* TODO: with old coreutils, f_bsize is taken from stat()'s ia_blksize
- * so the df with old coreutils this wont work :(
- */
+
+ fuse_log_eh (this, "op_ret: %d, op_errno: %d, %"PRIu64": %s()",
+ op_ret, op_errno, frame->root->unique,
+ gf_fop_list[frame->root->op]);
if (op_ret == 0) {
#ifndef GF_DARWIN_HOST_OS
@@ -2430,37 +3122,83 @@ fuse_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
send_fuse_err (this, finh, op_errno);
}
- free_state (state);
+ free_fuse_state (state);
STACK_DESTROY (frame->root);
return 0;
}
+void
+fuse_statfs_resume (fuse_state_t *state)
+{
+ if (!state->loc.inode) {
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "%"PRIu64": STATFS (%s) resolution fail",
+ state->finh->unique, uuid_utoa (state->resolve.gfid));
+
+ send_fuse_err (state->this, state->finh,
+ state->resolve.op_errno);
+ free_fuse_state (state);
+ return;
+ }
+
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": STATFS", state->finh->unique);
+
+ FUSE_FOP (state, fuse_statfs_cbk, GF_FOP_STATFS,
+ statfs, &state->loc, state->xdata);
+}
+
static void
fuse_statfs (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
fuse_state_t *state = NULL;
- int32_t ret = -1;
GET_STATE (this, finh, state);
- ret = fuse_loc_fill (&state->loc, state, 1, 0, NULL);
- if ((state->loc.inode == NULL) ||
- (ret < 0)) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64": STATFS (fuse_loc_fill() fail)",
- finh->unique);
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
+ fuse_resolve_inode_init (state, &state->resolve, finh->nodeid);
+
+ fuse_resolve_and_resume (state, fuse_statfs_resume);
+}
+
+
+void
+fuse_setxattr_resume (fuse_state_t *state)
+{
+ if (!state->loc.inode) {
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "%"PRIu64": SETXATTR %s/%"PRIu64" (%s) "
+ "resolution failed",
+ state->finh->unique, uuid_utoa (state->resolve.gfid),
+ state->finh->nodeid, state->name);
+ send_fuse_err (state->this, state->finh,
+ state->resolve.op_errno);
+ free_fuse_state (state);
return;
}
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": STATFS", finh->unique);
+#ifdef GF_TEST_FFOP
+ state->fd = fd_lookup (state->loc.inode, state->finh->pid);
+#endif /* GF_TEST_FFOP */
- FUSE_FOP (state, fuse_statfs_cbk, GF_FOP_STATFS,
- statfs, &state->loc);
+ if (state->fd) {
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": SETXATTR %p/%"PRIu64" (%s)", state->finh->unique,
+ state->fd, state->finh->nodeid, state->name);
+
+ FUSE_FOP (state, fuse_setxattr_cbk, GF_FOP_FSETXATTR,
+ fsetxattr, state->fd, state->xattr, state->flags,
+ state->xdata);
+ } else {
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": SETXATTR %s/%"PRIu64" (%s)", state->finh->unique,
+ state->loc.path, state->finh->nodeid, state->name);
+
+ FUSE_FOP (state, fuse_setxattr_cbk, GF_FOP_SETXATTR,
+ setxattr, &state->loc, state->xattr, state->flags,
+ state->xdata);
+ }
}
@@ -2470,10 +3208,16 @@ fuse_setxattr (xlator_t *this, fuse_in_header_t *finh, void *msg)
struct fuse_setxattr_in *fsi = msg;
char *name = (char *)(fsi + 1);
char *value = name + strlen (name) + 1;
+ struct fuse_private *priv = NULL;
fuse_state_t *state = NULL;
char *dict_value = NULL;
int32_t ret = -1;
+ char *newkey = NULL;
+
+ priv = this->private;
+
+ GET_STATE (this, finh, state);
#ifdef GF_DARWIN_HOST_OS
if (fsi->position) {
@@ -2487,54 +3231,98 @@ fuse_setxattr (xlator_t *this, fuse_in_header_t *finh, void *msg)
}
#endif
-#ifdef DISABLE_POSIX_ACL
- if (!strncmp (name, "system.", 7)) {
+ if (fuse_ignore_xattr_set (priv, name)) {
+ (void) send_fuse_err (this, finh, 0);
+ return;
+ }
+
+ if (!priv->acl) {
+ if ((strcmp (name, POSIX_ACL_ACCESS_XATTR) == 0) ||
+ (strcmp (name, POSIX_ACL_DEFAULT_XATTR) == 0)) {
+ send_fuse_err (this, finh, EOPNOTSUPP);
+ GF_FREE (finh);
+ return;
+ }
+ }
+
+ ret = fuse_check_selinux_cap_xattr (priv, name);
+ if (ret) {
send_fuse_err (this, finh, EOPNOTSUPP);
GF_FREE (finh);
return;
}
-#endif
- GET_STATE (this, finh, state);
- state->size = fsi->size;
- ret = fuse_loc_fill (&state->loc, state, finh->nodeid, 0, NULL);
- if ((state->loc.inode == NULL) ||
- (ret < 0)) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64": SETXATTR %s/%"PRIu64" (%s) (fuse_loc_fill() failed)",
- finh->unique,
- state->loc.path, finh->nodeid, name);
+ /* Check if the command is for changing the log
+ level of process or specific xlator */
+ ret = is_gf_log_command (this, name, value);
+ if (ret >= 0) {
+ send_fuse_err (this, finh, ret);
+ GF_FREE (finh);
+ return;
+ }
+
+ if (!strcmp ("inode-invalidate", name)) {
+ gf_log ("fuse", GF_LOG_TRACE,
+ "got request to invalidate %"PRIu64, finh->nodeid);
+ send_fuse_err (this, finh, 0);
+#if FUSE_KERNEL_MINOR_VERSION >= 11
+ fuse_invalidate_entry (this, finh->nodeid);
+#endif
+ GF_FREE (finh);
+ return;
+ }
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
+ if (!strcmp (GFID_XATTR_KEY, name) || !strcmp (GF_XATTR_VOL_ID_KEY, name)) {
+ send_fuse_err (this, finh, EPERM);
+ GF_FREE (finh);
return;
}
- state->dict = get_new_dict ();
- if (!state->dict) {
+ state->size = fsi->size;
+
+ fuse_resolve_inode_init (state, &state->resolve, finh->nodeid);
+
+ state->xattr = get_new_dict ();
+ if (!state->xattr) {
gf_log ("glusterfs-fuse", GF_LOG_ERROR,
"%"PRIu64": SETXATTR dict allocation failed",
finh->unique);
- free_state (state);
+ send_fuse_err (this, finh, ENOMEM);
+ free_fuse_state (state);
+ return;
+ }
+
+ ret = fuse_flip_xattr_ns (priv, name, &newkey);
+ if (ret) {
+ send_fuse_err (this, finh, ENOMEM);
+ free_fuse_state (state);
return;
}
- dict_value = memdup (value, fsi->size);
- dict_set (state->dict, (char *)name,
+ if (fsi->size > 0) {
+ /*
+ * Many translators expect setxattr values to be strings, but
+ * neither dict_get_str nor data_to_str do any checking or
+ * fixups to make sure that's the case. To avoid nasty
+ * surprises, allocate an extra byte and add a NUL here.
+ */
+ dict_value = memdup (value, fsi->size+1);
+ dict_value[fsi->size] = '\0';
+ }
+ dict_set (state->xattr, newkey,
data_from_dynptr ((void *)dict_value, fsi->size));
- dict_ref (state->dict);
+ dict_ref (state->xattr);
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": SETXATTR %s/%"PRIu64" (%s)", finh->unique,
- state->loc.path, finh->nodeid, name);
+ state->flags = fsi->flags;
+ state->name = newkey;
- FUSE_FOP (state, fuse_setxattr_cbk, GF_FOP_SETXATTR,
- setxattr, &state->loc, state->dict, fsi->flags);
+ fuse_resolve_and_resume (state, fuse_setxattr_resume);
return;
}
+
static void
send_fuse_xattr (xlator_t *this, fuse_in_header_t *finh, const char *value,
size_t size, size_t expected)
@@ -2558,24 +3346,42 @@ send_fuse_xattr (xlator_t *this, fuse_in_header_t *finh, const char *value,
}
}
+/* filter out xattrs that need not be visible on the
+ * mount point. this is _specifically_ for geo-rep
+ * as of now, to prevent Rsync from crying out loud
+ * when it tries to setxattr() for selinux xattrs
+ */
+static int
+fuse_filter_xattr(char *key)
+{
+ int need_filter = 0;
+ struct fuse_private *priv = THIS->private;
+
+ if ((priv->client_pid == GF_CLIENT_PID_GSYNCD)
+ && fnmatch ("*.selinux*", key, FNM_PERIOD) == 0)
+ need_filter = 1;
+
+ return need_filter;
+}
+
+
static int
fuse_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
- int need_to_free_dict = 0;
char *value = "";
fuse_state_t *state = NULL;
fuse_in_header_t *finh = NULL;
data_t *value_data = NULL;
- fuse_private_t *priv = NULL;
int ret = -1;
int32_t len = 0;
- data_pair_t *trav = NULL;
+ int32_t len_next = 0;
- priv = this->private;
state = frame->root->state;
finh = state->finh;
+ fuse_log_eh_fop(this, state, frame, op_ret, op_errno);
+
if (op_ret >= 0) {
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
"%"PRIu64": %s() %s => %d", frame->root->unique,
@@ -2597,26 +3403,29 @@ fuse_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
} /* if(value_data)...else */
} else {
/* if callback for listxattr */
- trav = dict->members_list;
- while (trav) {
- len += strlen (trav->key) + 1;
- trav = trav->next;
- } /* while(trav) */
+ /* we need to invoke fuse_filter_xattr() twice. Once
+ * while counting size and then while filling buffer
+ */
+ len = dict_keys_join (NULL, 0, dict, fuse_filter_xattr);
+ if (len < 0)
+ goto out;
+
value = alloca (len + 1);
- ERR_ABORT (value);
- len = 0;
- trav = dict->members_list;
- while (trav) {
- strcpy (value + len, trav->key);
- value[len + strlen (trav->key)] = '\0';
- len += strlen (trav->key) + 1;
- trav = trav->next;
- } /* while(trav) */
+ if (!value)
+ goto out;
+
+ len_next = dict_keys_join (value, len, dict,
+ fuse_filter_xattr);
+ if (len_next != len)
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "sizes not equal %d != %d",
+ len, len_next);
+
send_fuse_xattr (this, finh, value, len, state->size);
} /* if(state->name)...else */
} else {
/* if failure - no need to check if listxattr or getxattr */
- if (op_errno != ENODATA) {
+ if (op_errno != ENODATA && op_errno != ENOATTR) {
if (op_errno == ENOTSUP) {
GF_LOG_OCCASIONALLY (gf_fuse_xattr_enotsup_log,
"glusterfs-fuse",
@@ -2626,40 +3435,125 @@ fuse_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
"storage");
} else {
gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64": %s() %s => -1 (%s)",
+ "%"PRIu64": %s(%s) %s => -1 (%s)",
frame->root->unique,
- gf_fop_list[frame->root->op],
+ gf_fop_list[frame->root->op], state->name,
state->loc.path, strerror (op_errno));
}
} else {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64": %s() %s => -1 (%s)",
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRIu64": %s(%s) %s => -1 (%s)",
frame->root->unique,
- gf_fop_list[frame->root->op], state->loc.path,
- strerror (op_errno));
+ gf_fop_list[frame->root->op], state->name,
+ state->loc.path, strerror (op_errno));
} /* if(op_errno!= ENODATA)...else */
send_fuse_err (this, finh, op_errno);
} /* if(op_ret>=0)...else */
- if (need_to_free_dict)
- dict_unref (dict);
-
- free_state (state);
+out:
+ free_fuse_state (state);
STACK_DESTROY (frame->root);
return 0;
}
+void
+fuse_getxattr_resume (fuse_state_t *state)
+{
+ char *value = NULL;
+
+ if (!state->loc.inode) {
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "%"PRIu64": GETXATTR %s/%"PRIu64" (%s) "
+ "resolution failed",
+ state->finh->unique,
+ uuid_utoa (state->resolve.gfid),
+ state->finh->nodeid, state->name);
+
+ send_fuse_err (state->this, state->finh,
+ state->resolve.op_errno);
+ free_fuse_state (state);
+ return;
+ }
+
+#ifdef GF_TEST_FFOP
+ state->fd = fd_lookup (state->loc.inode, state->finh->pid);
+#endif /* GF_TEST_FFOP */
+
+ if (state->name &&
+ (strcmp (state->name, VIRTUAL_GFID_XATTR_KEY) == 0)) {
+ /* send glusterfs gfid in binary form */
+
+ value = GF_CALLOC (16 + 1, sizeof(char),
+ gf_common_mt_char);
+ if (!value) {
+ send_fuse_err (state->this, state->finh, ENOMEM);
+ goto internal_out;
+ }
+ memcpy (value, state->loc.inode->gfid, 16);
+
+ send_fuse_xattr (THIS, state->finh, value, 16, state->size);
+ GF_FREE (value);
+ internal_out:
+ free_fuse_state (state);
+ return;
+ }
+
+ if (state->name &&
+ (strcmp (state->name, VIRTUAL_GFID_XATTR_KEY_STR) == 0)) {
+ /* transform binary gfid to canonical form */
+
+ value = GF_CALLOC (UUID_CANONICAL_FORM_LEN + 1, sizeof(char),
+ gf_common_mt_char);
+ if (!value) {
+ send_fuse_err (state->this, state->finh, ENOMEM);
+ goto internal_out1;
+ }
+ uuid_utoa_r (state->loc.inode->gfid, value);
+
+ send_fuse_xattr (THIS, state->finh, value,
+ UUID_CANONICAL_FORM_LEN, state->size);
+ GF_FREE (value);
+ internal_out1:
+ free_fuse_state (state);
+ return;
+ }
+
+
+ if (state->fd) {
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": GETXATTR %p/%"PRIu64" (%s)", state->finh->unique,
+ state->fd, state->finh->nodeid, state->name);
+
+ FUSE_FOP (state, fuse_xattr_cbk, GF_FOP_FGETXATTR,
+ fgetxattr, state->fd, state->name, state->xdata);
+ } else {
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": GETXATTR %s/%"PRIu64" (%s)", state->finh->unique,
+ state->loc.path, state->finh->nodeid, state->name);
+
+ FUSE_FOP (state, fuse_xattr_cbk, GF_FOP_GETXATTR,
+ getxattr, &state->loc, state->name, state->xdata);
+ }
+}
+
+
static void
fuse_getxattr (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
- struct fuse_getxattr_in *fgxi = msg;
- char *name = (char *)(fgxi + 1);
+ struct fuse_getxattr_in *fgxi = msg;
+ char *name = (char *)(fgxi + 1);
+ fuse_state_t *state = NULL;
+ struct fuse_private *priv = NULL;
+ int rv = 0;
+ int op_errno = EINVAL;
+ char *newkey = NULL;
+ int ret = 0;
- fuse_state_t *state = NULL;
- int32_t ret = -1;
+ priv = this->private;
+ GET_STATE (this, finh, state);
#ifdef GF_DARWIN_HOST_OS
if (fgxi->position) {
@@ -2675,44 +3569,80 @@ fuse_getxattr (xlator_t *this, fuse_in_header_t *finh, void *msg)
"%"PRIu64": GETXATTR %s/%"PRIu64" (%s):"
"refusing positioned getxattr",
finh->unique, state->loc.path, finh->nodeid, name);
- send_fuse_err (this, finh, EINVAL);
- FREE (finh);
- return;
+ op_errno = EINVAL;
+ goto err;
}
#endif
-#ifdef DISABLE_POSIX_ACL
- if (!strncmp (name, "system.", 7)) {
- send_fuse_err (this, finh, ENODATA);
- GF_FREE (finh);
- return;
+ if (!priv->acl) {
+ if ((strcmp (name, POSIX_ACL_ACCESS_XATTR) == 0) ||
+ (strcmp (name, POSIX_ACL_DEFAULT_XATTR) == 0)) {
+ op_errno = ENOTSUP;
+ goto err;
+ }
+ }
+
+ ret = fuse_check_selinux_cap_xattr (priv, name);
+ if (ret) {
+ op_errno = ENODATA;
+ goto err;
+ }
+
+ fuse_resolve_inode_init (state, &state->resolve, finh->nodeid);
+
+ rv = fuse_flip_xattr_ns (priv, name, &newkey);
+ if (rv) {
+ op_errno = ENOMEM;
+ goto err;
}
-#endif
- GET_STATE (this, finh, state);
state->size = fgxi->size;
- state->name = gf_strdup (name);
+ state->name = newkey;
+
+ fuse_resolve_and_resume (state, fuse_getxattr_resume);
- ret = fuse_loc_fill (&state->loc, state, finh->nodeid, 0, NULL);
- if ((state->loc.inode == NULL) ||
- (ret < 0)) {
+ return;
+ err:
+ send_fuse_err (this, finh, op_errno);
+ free_fuse_state (state);
+ return;
+}
+
+
+void
+fuse_listxattr_resume (fuse_state_t *state)
+{
+ if (!state->loc.inode) {
gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64": GETXATTR %s/%"PRIu64" (%s) (fuse_loc_fill() failed)",
- finh->unique, state->loc.path, finh->nodeid, name);
+ "%"PRIu64": LISTXATTR %s/%"PRIu64
+ "resolution failed", state->finh->unique,
+ uuid_utoa (state->resolve.gfid), state->finh->nodeid);
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
+ send_fuse_err (state->this, state->finh,
+ state->resolve.op_errno);
+ free_fuse_state (state);
return;
}
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": GETXATTR %s/%"PRIu64" (%s)", finh->unique,
- state->loc.path, finh->nodeid, name);
+#ifdef GF_TEST_FFOP
+ state->fd = fd_lookup (state->loc.inode, state->finh->pid);
+#endif /* GF_TEST_FFOP */
- FUSE_FOP (state, fuse_xattr_cbk, GF_FOP_GETXATTR,
- getxattr, &state->loc, name);
+ if (state->fd) {
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": LISTXATTR %p/%"PRIu64, state->finh->unique,
+ state->fd, state->finh->nodeid);
- return;
+ FUSE_FOP (state, fuse_xattr_cbk, GF_FOP_FGETXATTR,
+ fgetxattr, state->fd, NULL, state->xdata);
+ } else {
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": LISTXATTR %s/%"PRIu64, state->finh->unique,
+ state->loc.path, state->finh->nodeid);
+
+ FUSE_FOP (state, fuse_xattr_cbk, GF_FOP_GETXATTR,
+ getxattr, &state->loc, NULL, state->xdata);
+ }
}
@@ -2720,64 +3650,90 @@ static void
fuse_listxattr (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
struct fuse_getxattr_in *fgxi = msg;
-
fuse_state_t *state = NULL;
- int32_t ret = -1;
GET_STATE (this, finh, state);
+
+ fuse_resolve_inode_init (state, &state->resolve, finh->nodeid);
+
state->size = fgxi->size;
- ret = fuse_loc_fill (&state->loc, state, finh->nodeid, 0, NULL);
- if ((state->loc.inode == NULL) ||
- (ret < 0)) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64": LISTXATTR %s/%"PRIu64" (fuse_loc_fill() failed)",
- finh->unique, state->loc.path, finh->nodeid);
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
+ fuse_resolve_and_resume (state, fuse_listxattr_resume);
+
+ return;
+}
+
+
+void
+fuse_removexattr_resume (fuse_state_t *state)
+{
+ if (!state->loc.inode) {
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRIu64": REMOVEXATTR %s/%"PRIu64" (%s) "
+ "resolution failed",
+ state->finh->unique, uuid_utoa (state->resolve.gfid),
+ state->finh->nodeid, state->name);
+
+ send_fuse_err (state->this, state->finh,
+ state->resolve.op_errno);
+ free_fuse_state (state);
return;
}
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": LISTXATTR %s/%"PRIu64, finh->unique,
- state->loc.path, finh->nodeid);
+#ifdef GF_TEST_FFOP
+ state->fd = fd_lookup (state->loc.inode, state->finh->pid);
+#endif /* GF_TEST_FFOP */
- FUSE_FOP (state, fuse_xattr_cbk, GF_FOP_GETXATTR,
- getxattr, &state->loc, NULL);
+ if (state->fd) {
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": REMOVEXATTR %p/%"PRIu64" (%s)", state->finh->unique,
+ state->fd, state->finh->nodeid, state->name);
- return;
+ FUSE_FOP (state, fuse_removexattr_cbk, GF_FOP_FREMOVEXATTR,
+ fremovexattr, state->fd, state->name, state->xdata);
+ } else {
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": REMOVEXATTR %s/%"PRIu64" (%s)", state->finh->unique,
+ state->loc.path, state->finh->nodeid, state->name);
+
+ FUSE_FOP (state, fuse_removexattr_cbk, GF_FOP_REMOVEXATTR,
+ removexattr, &state->loc, state->name, state->xdata);
+ }
}
static void
fuse_removexattr (xlator_t *this, fuse_in_header_t *finh, void *msg)
-
{
char *name = msg;
fuse_state_t *state = NULL;
+ fuse_private_t *priv = NULL;
int32_t ret = -1;
+ char *newkey = NULL;
+
+ if (!strcmp (GFID_XATTR_KEY, name) || !strcmp (GF_XATTR_VOL_ID_KEY, name)) {
+ send_fuse_err (this, finh, EPERM);
+ GF_FREE (finh);
+ return;
+ }
+
+ priv = this->private;
GET_STATE (this, finh, state);
- ret = fuse_loc_fill (&state->loc, state, finh->nodeid, 0, NULL);
- if ((state->loc.inode == NULL) ||
- (ret < 0)) {
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRIu64": REMOVEXATTR %s/%"PRIu64" (%s) (fuse_loc_fill() failed)",
- finh->unique, state->loc.path, finh->nodeid, name);
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
+ fuse_resolve_inode_init (state, &state->resolve, finh->nodeid);
+
+ ret = fuse_flip_xattr_ns (priv, name, &newkey);
+ if (ret) {
+ send_fuse_err (this, finh, ENOMEM);
+ free_fuse_state (state);
return;
}
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": REMOVEXATTR %s/%"PRIu64" (%s)", finh->unique,
- state->loc.path, finh->nodeid, name);
-
- FUSE_FOP (state, fuse_err_cbk, GF_FOP_REMOVEXATTR,
- removexattr, &state->loc, name);
+ state->name = newkey;
+ fuse_resolve_and_resume (state, fuse_removexattr_resume);
return;
}
@@ -2786,13 +3742,16 @@ static int gf_fuse_lk_enosys_log;
static int
fuse_getlk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct flock *lock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
{
fuse_state_t *state = NULL;
state = frame->root->state;
struct fuse_lk_out flo = {{0, }, };
+ fuse_log_eh_fop(this, state, frame, op_ret, op_errno);
+
if (op_ret == 0) {
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
"%"PRIu64": ERR => 0", frame->root->unique);
@@ -2824,13 +3783,24 @@ fuse_getlk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
send_fuse_err (this, state->finh, op_errno);
}
- free_state (state);
+ free_fuse_state (state);
STACK_DESTROY (frame->root);
return 0;
}
+void
+fuse_getlk_resume (fuse_state_t *state)
+{
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": GETLK %p", state->finh->unique, state->fd);
+
+ FUSE_FOP (state, fuse_getlk_cbk, GF_FOP_LK,
+ lk, state->fd, F_GETLK, &state->lk_lock, state->xdata);
+}
+
+
static void
fuse_getlk (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
@@ -2838,20 +3808,19 @@ fuse_getlk (xlator_t *this, fuse_in_header_t *finh, void *msg)
fuse_state_t *state = NULL;
fd_t *fd = NULL;
- struct flock lock = {0, };
fd = FH_TO_FD (fli->fh);
GET_STATE (this, finh, state);
state->fd = fd;
- convert_fuse_file_lock (&fli->lk, &lock);
- state->lk_owner = fli->owner;
+ fuse_resolve_fd_init (state, &state->resolve, fd);
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": GETLK %p", finh->unique, fd);
+ convert_fuse_file_lock (&fli->lk, &state->lk_lock,
+ fli->owner);
- FUSE_FOP (state, fuse_getlk_cbk, GF_FOP_LK,
- lk, fd, F_GETLK, &lock);
+ state->lk_owner = fli->owner;
+
+ fuse_resolve_and_resume (state, fuse_getlk_resume);
return;
}
@@ -2859,15 +3828,24 @@ fuse_getlk (xlator_t *this, fuse_in_header_t *finh, void *msg)
static int
fuse_setlk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct flock *lock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
{
+ uint32_t op = 0;
fuse_state_t *state = NULL;
state = frame->root->state;
+ op = state->finh->opcode;
+
+ fuse_log_eh_fop(this, state, frame, op_ret, op_errno);
if (op_ret == 0) {
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
"%"PRIu64": ERR => 0", frame->root->unique);
+ fd_lk_insert_and_merge (state->fd,
+ (op == FUSE_SETLK) ? F_SETLK : F_SETLKW,
+ &state->lk_lock);
+
send_fuse_err (this, state->finh, 0);
} else {
if (op_errno == ENOSYS) {
@@ -2881,12 +3859,11 @@ fuse_setlk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
} else if (op_errno == EAGAIN) {
gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
"Returning EAGAIN Flock: "
- "start=%llu, len=%llu, pid=%llu, lk-owner=%llu",
- (unsigned long long) lock->l_start,
- (unsigned long long) lock->l_len,
- (unsigned long long) lock->l_pid,
- (unsigned long long) frame->root->lk_owner);
-
+ "start=%llu, len=%llu, pid=%llu, lk-owner=%s",
+ (unsigned long long) state->lk_lock.l_start,
+ (unsigned long long) state->lk_lock.l_len,
+ (unsigned long long) state->lk_lock.l_pid,
+ lkowner_utoa (&frame->root->lk_owner));
} else {
gf_log ("glusterfs-fuse", GF_LOG_WARNING,
"%"PRIu64": ERR => -1 (%s)",
@@ -2896,13 +3873,26 @@ fuse_setlk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
send_fuse_err (this, state->finh, op_errno);
}
- free_state (state);
+ free_fuse_state (state);
STACK_DESTROY (frame->root);
return 0;
}
+void
+fuse_setlk_resume (fuse_state_t *state)
+{
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": SETLK%s %p", state->finh->unique,
+ state->finh->opcode == FUSE_SETLK ? "" : "W", state->fd);
+
+ FUSE_FOP (state, fuse_setlk_cbk, GF_FOP_LK, lk, state->fd,
+ state->finh->opcode == FUSE_SETLK ? F_SETLK : F_SETLKW,
+ &state->lk_lock, state->xdata);
+}
+
+
static void
fuse_setlk (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
@@ -2910,36 +3900,109 @@ fuse_setlk (xlator_t *this, fuse_in_header_t *finh, void *msg)
fuse_state_t *state = NULL;
fd_t *fd = NULL;
- struct flock lock = {0, };
fd = FH_TO_FD (fli->fh);
GET_STATE (this, finh, state);
state->finh = finh;
state->fd = fd;
- convert_fuse_file_lock (&fli->lk, &lock);
- state->lk_owner = fli->owner;
+ fuse_resolve_fd_init (state, &state->resolve, fd);
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": SETLK%s %p", finh->unique,
- finh->opcode == FUSE_SETLK ? "" : "W", fd);
+ convert_fuse_file_lock (&fli->lk, &state->lk_lock,
+ fli->owner);
+
+ state->lk_owner = fli->owner;
- FUSE_FOP (state, fuse_setlk_cbk, GF_FOP_LK,
- lk, fd, finh->opcode == FUSE_SETLK ? F_SETLK : F_SETLKW,
- &lock);
+ fuse_resolve_and_resume (state, fuse_setlk_resume);
return;
}
+#if FUSE_KERNEL_MINOR_VERSION >= 11
+static void *
+notify_kernel_loop (void *data)
+{
+ uint32_t len = 0;
+ ssize_t rv = 0;
+ xlator_t *this = NULL;
+ fuse_private_t *priv = NULL;
+ fuse_invalidate_node_t *node = NULL;
+ fuse_invalidate_node_t *tmp = NULL;
+ struct fuse_out_header *pfoh = NULL;
+
+ this = data;
+ priv = this->private;
+
+ for (;;) {
+ pthread_mutex_lock (&priv->invalidate_mutex);
+ {
+ while (list_empty (&priv->invalidate_list))
+ pthread_cond_wait (&priv->invalidate_cond,
+ &priv->invalidate_mutex);
+
+ node = list_entry (priv->invalidate_list.next,
+ fuse_invalidate_node_t, next);
+
+ list_del_init (&node->next);
+ }
+ pthread_mutex_unlock (&priv->invalidate_mutex);
+
+ pfoh = (struct fuse_out_header *)node->inval_buf;
+ memcpy (&len, &pfoh->len, sizeof(len));
+ /*
+ * a simple
+ * len = pfoh->len;
+ * works on x86, but takes a multiple insn cycle hit
+ * when pfoh->len is not correctly aligned, possibly
+ * even stalling the insn pipeline.
+ * Other architectures will not be so forgiving. If
+ * we're lucky the memcpy will be inlined by the
+ * compiler, and might be as fast or faster without
+ * the risk of stalling the insn pipeline.
+ */
+
+ rv = sys_write (priv->fd, node->inval_buf, len);
+
+ GF_FREE (node);
+
+ if (rv == -1 && errno == EBADF)
+ break;
+
+ if (rv != len && !(rv == -1 && errno == ENOENT)) {
+ gf_log ("glusterfs-fuse", GF_LOG_INFO,
+ "len: %u, rv: %zd, errno: %d", len, rv, errno);
+ }
+ }
+
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "kernel notifier loop terminated");
+
+ pthread_mutex_lock (&priv->invalidate_mutex);
+ {
+ priv->reverse_fuse_thread_started = _gf_false;
+ list_for_each_entry_safe (node, tmp, &priv->invalidate_list,
+ next) {
+ list_del_init (&node->next);
+ GF_FREE (node);
+ }
+ }
+ pthread_mutex_unlock (&priv->invalidate_mutex);
+
+ return NULL;
+}
+#endif
static void
fuse_init (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
- struct fuse_init_in *fini = msg;
-
- struct fuse_init_out fino;
- fuse_private_t *priv = NULL;
- int ret;
+ struct fuse_init_in *fini = msg;
+ struct fuse_init_out fino = {0,};
+ fuse_private_t *priv = NULL;
+ size_t size = 0;
+ int ret = 0;
+#if FUSE_KERNEL_MINOR_VERSION >= 9
+ pthread_t messenger;
+#endif
priv = this->private;
@@ -2947,7 +4010,7 @@ fuse_init (xlator_t *this, fuse_in_header_t *finh, void *msg)
gf_log ("glusterfs-fuse", GF_LOG_ERROR,
"got INIT after first message");
- close (priv->fd);
+ sys_close (priv->fd);
goto out;
}
@@ -2958,7 +4021,7 @@ fuse_init (xlator_t *this, fuse_in_header_t *finh, void *msg)
"unsupported FUSE protocol version %d.%d",
fini->major, fini->minor);
- close (priv->fd);
+ sys_close (priv->fd);
goto out;
}
priv->proto_minor = fini->minor;
@@ -2968,6 +4031,17 @@ fuse_init (xlator_t *this, fuse_in_header_t *finh, void *msg)
fino.max_readahead = 1 << 17;
fino.max_write = 1 << 17;
fino.flags = FUSE_ASYNC_READ | FUSE_POSIX_LOCKS;
+#if FUSE_KERNEL_MINOR_VERSION >= 17
+ if (fini->minor >= 17)
+ fino.flags |= FUSE_FLOCK_LOCKS;
+#endif
+#if FUSE_KERNEL_MINOR_VERSION >= 12
+ if (fini->minor >= 12) {
+ /* let fuse leave the umask processing to us, so that it does not
+ * break extended POSIX ACL defaults on server */
+ fino.flags |= FUSE_DONT_MASK;
+ }
+#endif
#if FUSE_KERNEL_MINOR_VERSION >= 9
if (fini->minor >= 6 /* fuse_init_in has flags */ &&
fini->flags & FUSE_BIG_WRITES) {
@@ -2976,16 +4050,101 @@ fuse_init (xlator_t *this, fuse_in_header_t *finh, void *msg)
priv->direct_io_mode = 0;
fino.flags |= FUSE_BIG_WRITES;
}
- if (fini->minor >= 13) {
- /* these values seemed to work fine during testing */
- fino.max_background = 64;
- fino.congestion_threshold = 48;
+ /* Used for 'reverse invalidation of inode' */
+ if (fini->minor >= 12) {
+ ret = gf_thread_create (&messenger, NULL, notify_kernel_loop,
+ this);
+ if (ret != 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "failed to start messenger daemon (%s)",
+ strerror(errno));
+
+ sys_close (priv->fd);
+ goto out;
+ }
+ priv->reverse_fuse_thread_started = _gf_true;
+ } else {
+ /*
+ * FUSE minor < 12 does not implement invalidate notifications.
+ * This mechanism is required for fopen-keep-cache to operate
+ * correctly. Disable and warn the user.
+ */
+ if (priv->fopen_keep_cache) {
+ gf_log("glusterfs-fuse", GF_LOG_WARNING, "FUSE version "
+ "%d.%d does not support inval notifications. "
+ "fopen-keep-cache disabled.", fini->major,
+ fini->minor);
+ priv->fopen_keep_cache = 0;
+ }
+ }
+
+ if (fini->minor >= 13) {
+ fino.max_background = priv->background_qlen;
+ fino.congestion_threshold = priv->congestion_threshold;
}
if (fini->minor < 9)
*priv->msg0_len_p = sizeof(*finh) + FUSE_COMPAT_WRITE_IN_SIZE;
+
+ if (priv->use_readdirp) {
+ if (fini->flags & FUSE_DO_READDIRPLUS)
+ fino.flags |= FUSE_DO_READDIRPLUS;
+ }
+#endif
+ if (priv->fopen_keep_cache == 2) {
+ /* If user did not explicitly set --fopen-keep-cache[=off],
+ then check if kernel support FUSE_AUTO_INVAL_DATA and ...
+ */
+#if FUSE_KERNEL_MINOR_VERSION >= 20
+ if (fini->flags & FUSE_AUTO_INVAL_DATA) {
+ /* ... enable fopen_keep_cache mode if supported.
+ */
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG, "Detected "
+ "support for FUSE_AUTO_INVAL_DATA. Enabling "
+ "fopen_keep_cache automatically.");
+ fino.flags |= FUSE_AUTO_INVAL_DATA;
+ priv->fopen_keep_cache = 1;
+ } else
+#endif
+ {
+
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG, "No support "
+ "for FUSE_AUTO_INVAL_DATA. Disabling "
+ "fopen_keep_cache.");
+ /* ... else disable. */
+ priv->fopen_keep_cache = 0;
+ }
+ } else if (priv->fopen_keep_cache == 1) {
+ /* If user explicitly set --fopen-keep-cache[=on],
+ then enable FUSE_AUTO_INVAL_DATA if possible.
+ */
+#if FUSE_KERNEL_MINOR_VERSION >= 20
+ if (fini->flags & FUSE_AUTO_INVAL_DATA) {
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG, "fopen_keep_cache "
+ "is explicitly set. Enabling FUSE_AUTO_INVAL_DATA");
+ fino.flags |= FUSE_AUTO_INVAL_DATA;
+ } else
#endif
- ret = send_fuse_obj (this, finh, &fino);
+ {
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING, "fopen_keep_cache "
+ "is explicitly set. Support for "
+ "FUSE_AUTO_INVAL_DATA is missing");
+ }
+ }
+
+#if FUSE_KERNEL_MINOR_VERSION >= 22
+ if (fini->flags & FUSE_ASYNC_DIO)
+ fino.flags |= FUSE_ASYNC_DIO;
+#endif
+ /* FUSE 7.23 and newer added attributes to the fuse_init_out struct */
+ if (fini->minor > 22) {
+ size = sizeof (fino);
+ } else {
+ /* reduce the size, chop off unused attributes from &fino */
+ size = FUSE_COMPAT_22_INIT_OUT_SIZE;
+ }
+
+ ret = send_fuse_data (this, finh, &fino, size);
if (ret == 0)
gf_log ("glusterfs-fuse", GF_LOG_INFO,
"FUSE inited with protocol versions:"
@@ -2996,7 +4155,7 @@ fuse_init (xlator_t *this, fuse_in_header_t *finh, void *msg)
gf_log ("glusterfs-fuse", GF_LOG_ERROR,
"FUSE init failed (%s)", strerror (ret));
- close (priv->fd);
+ sys_close (priv->fd);
}
out:
@@ -3035,10 +4194,8 @@ fuse_first_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
inode_t *inode, struct iatt *buf, dict_t *xattr,
struct iatt *postparent)
{
- fuse_private_t *priv = NULL;
struct fuse_first_lookup *stub = NULL;
- priv = this->private;
stub = frame->local;
if (op_ret == 0) {
@@ -3069,17 +4226,24 @@ fuse_first_lookup (xlator_t *this)
xlator_t *xl = NULL;
dict_t *dict = NULL;
struct fuse_first_lookup stub;
+ uuid_t gfid;
+ int ret = -1;
priv = this->private;
loc.path = "/";
loc.name = "";
- loc.ino = 1;
loc.inode = fuse_ino_to_inode (1, this);
+ gf_uuid_copy (loc.gfid, loc.inode->gfid);
loc.parent = NULL;
dict = dict_new ();
frame = create_frame (this, this->ctx->pool);
+ if (!frame) {
+ gf_log ("fuse", GF_LOG_ERROR, "failed to create frame");
+ goto out;
+ }
+
frame->root->type = GF_OP_TYPE_FOP;
xl = priv->active_subvol;
@@ -3090,17 +4254,23 @@ fuse_first_lookup (xlator_t *this)
frame->local = &stub;
- STACK_WIND (frame, fuse_first_lookup_cbk, xl, xl->fops->lookup,
- &loc, dict);
- dict_unref (dict);
+ memset (gfid, 0, 16);
+ gfid[15] = 1;
+ ret = dict_set_static_bin (dict, "gfid-req", gfid, 16);
+ if (ret) {
+ gf_log (xl->name, GF_LOG_ERROR, "failed to set 'gfid-req'");
+ } else {
+ STACK_WIND (frame, fuse_first_lookup_cbk, xl, xl->fops->lookup,
+ &loc, dict);
- pthread_mutex_lock (&stub.mutex);
- {
- while (!stub.fin) {
- pthread_cond_wait (&stub.cond, &stub.mutex);
+ pthread_mutex_lock (&stub.mutex);
+ {
+ while (!stub.fin) {
+ pthread_cond_wait (&stub.cond, &stub.mutex);
+ }
}
+ pthread_mutex_unlock (&stub.mutex);
}
- pthread_mutex_unlock (&stub.mutex);
pthread_mutex_destroy (&stub.mutex);
pthread_cond_destroy (&stub.cond);
@@ -3108,18 +4278,532 @@ fuse_first_lookup (xlator_t *this)
frame->local = NULL;
STACK_DESTROY (frame->root);
+out:
+ dict_unref (dict);
+ inode_unref(loc.inode);
+
+ return ret;
+}
+
+
+int
+fuse_nameless_lookup (xlator_t *this, xlator_t *xl, uuid_t gfid, loc_t *loc)
+{
+ int ret = -1;
+ dict_t *xattr_req = NULL;
+ struct iatt iatt = {0, };
+ inode_t *linked_inode = NULL;
+ uint64_t ctx_value = LOOKUP_NOT_NEEDED;
+
+ if ((loc == NULL) || (xl == NULL)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (loc->inode == NULL) {
+ loc->inode = inode_new (xl->itable);
+ if (loc->inode == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+
+ gf_uuid_copy (loc->gfid, gfid);
+
+ xattr_req = dict_new ();
+ if (xattr_req == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_lookup (xl, loc, &iatt, NULL, xattr_req, NULL);
+ if (ret < 0)
+ goto out;
+
+ linked_inode = inode_link (loc->inode, NULL, NULL, &iatt);
+ if (linked_inode == loc->inode)
+ inode_ctx_set (linked_inode, this, &ctx_value);
+
+ inode_unref (loc->inode);
+ loc->inode = linked_inode;
+
+ ret = 0;
+out:
+ if (xattr_req != NULL) {
+ dict_unref (xattr_req);
+ }
+
+ return ret;
+}
+
+
+int
+fuse_migrate_fd_open (xlator_t *this, fd_t *basefd, fd_t *oldfd,
+ xlator_t *old_subvol, xlator_t *new_subvol)
+{
+ loc_t loc = {0, };
+ fd_t *newfd = NULL, *old_activefd = NULL;
+ fuse_fd_ctx_t *basefd_ctx = NULL;
+ fuse_fd_ctx_t *newfd_ctx = NULL;
+ int ret = 0, flags = 0;
+
+ ret = inode_path (basefd->inode, NULL, (char **)&loc.path);
+ if (ret < 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "cannot construct path of gfid (%s) failed"
+ "(old-subvolume:%s-%d new-subvolume:%s-%d)",
+ uuid_utoa (basefd->inode->gfid),
+ old_subvol->name, old_subvol->graph->id,
+ new_subvol->name, new_subvol->graph->id);
+ goto out;
+ }
+
+ gf_uuid_copy (loc.gfid, basefd->inode->gfid);
+
+ loc.inode = inode_find (new_subvol->itable, basefd->inode->gfid);
+
+ if (loc.inode == NULL) {
+ ret = fuse_nameless_lookup (this, new_subvol,
+ basefd->inode->gfid, &loc);
+ if (ret < 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "name-less lookup of gfid (%s) failed (%s)"
+ "(old-subvolume:%s-%d new-subvolume:%s-%d)",
+ uuid_utoa (basefd->inode->gfid),
+ strerror (-ret),
+ old_subvol->name, old_subvol->graph->id,
+ new_subvol->name, new_subvol->graph->id);
+ ret = -1;
+ goto out;
+ }
+
+ }
+
+ basefd_ctx = fuse_fd_ctx_get (this, basefd);
+ GF_VALIDATE_OR_GOTO ("glusterfs-fuse", basefd_ctx, out);
+
+ newfd = fd_create (loc.inode, basefd->pid);
+ if (newfd == NULL) {
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "cannot create new fd, hence not migrating basefd "
+ "(ptr:%p inode-gfid:%s) "
+ "(old-subvolume:%s-%d new-subvolume:%s-%d)", basefd,
+ uuid_utoa (loc.inode->gfid),
+ old_subvol->name, old_subvol->graph->id,
+ new_subvol->name, new_subvol->graph->id);
+ ret = -1;
+ goto out;
+ }
+
+ newfd->flags = basefd->flags;
+ if (newfd->lk_ctx)
+ fd_lk_ctx_unref (newfd->lk_ctx);
+
+ newfd->lk_ctx = fd_lk_ctx_ref (oldfd->lk_ctx);
+
+ newfd_ctx = fuse_fd_ctx_check_n_create (this, newfd);
+ GF_VALIDATE_OR_GOTO ("glusterfs-fuse", newfd_ctx, out);
+
+ if (IA_ISDIR (basefd->inode->ia_type)) {
+ ret = syncop_opendir (new_subvol, &loc, newfd, NULL, NULL);
+ } else {
+ flags = basefd->flags & ~(O_CREAT | O_EXCL | O_TRUNC);
+ ret = syncop_open (new_subvol, &loc, flags, newfd, NULL, NULL);
+ }
+
+ if (ret < 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "open on basefd (ptr:%p inode-gfid:%s) failed (%s)"
+ "(old-subvolume:%s-%d new-subvolume:%s-%d)", basefd,
+ uuid_utoa (basefd->inode->gfid), strerror (-ret),
+ old_subvol->name, old_subvol->graph->id,
+ new_subvol->name, new_subvol->graph->id);
+ ret = -1;
+ goto out;
+ }
+
+ fd_bind (newfd);
+
+ LOCK (&basefd->lock);
+ {
+ if (basefd_ctx->activefd != NULL) {
+ old_activefd = basefd_ctx->activefd;
+ }
+
+ basefd_ctx->activefd = newfd;
+ }
+ UNLOCK (&basefd->lock);
+
+ if (old_activefd != NULL) {
+ fd_unref (old_activefd);
+ }
+
+ gf_log ("glusterfs-fuse", GF_LOG_INFO,
+ "migrated basefd (%p) to newfd (%p) (inode-gfid:%s)"
+ "(old-subvolume:%s-%d new-subvolume:%s-%d)", basefd, newfd,
+ uuid_utoa (basefd->inode->gfid),
+ old_subvol->name, old_subvol->graph->id,
+ new_subvol->name, new_subvol->graph->id);
+
+ ret = 0;
+
+out:
+ loc_wipe (&loc);
+
+ return ret;
+}
+
+int
+fuse_migrate_locks (xlator_t *this, fd_t *basefd, fd_t *oldfd,
+ xlator_t *old_subvol, xlator_t *new_subvol)
+{
+ int ret = -1;
+ dict_t *lockinfo = NULL;
+ void *ptr = NULL;
+ fd_t *newfd = NULL;
+ fuse_fd_ctx_t *basefd_ctx = NULL;
+
+
+ if (!oldfd->lk_ctx || fd_lk_ctx_empty (oldfd->lk_ctx))
+ return 0;
+
+ basefd_ctx = fuse_fd_ctx_get (this, basefd);
+ GF_VALIDATE_OR_GOTO ("glusterfs-fuse", basefd_ctx, out);
+
+ LOCK (&basefd->lock);
+ {
+ newfd = fd_ref (basefd_ctx->activefd);
+ }
+ UNLOCK (&basefd->lock);
+
+ ret = syncop_fgetxattr (old_subvol, oldfd, &lockinfo,
+ GF_XATTR_LOCKINFO_KEY, NULL, NULL);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "getting lockinfo failed while migrating locks"
+ "(oldfd:%p newfd:%p inode-gfid:%s)"
+ "(old-subvol:%s-%d new-subvol:%s-%d)",
+ oldfd, newfd, uuid_utoa (newfd->inode->gfid),
+ old_subvol->name, old_subvol->graph->id,
+ new_subvol->name, new_subvol->graph->id);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_ptr (lockinfo, GF_XATTR_LOCKINFO_KEY, &ptr);
+ if (ptr == NULL) {
+ ret = 0;
+ gf_log (this->name, GF_LOG_INFO,
+ "No lockinfo present on any of the bricks "
+ "(oldfd: %p newfd:%p inode-gfid:%s) "
+ "(old-subvol:%s-%d new-subvol:%s-%d)",
+ oldfd, newfd, uuid_utoa (newfd->inode->gfid),
+ old_subvol->name, old_subvol->graph->id,
+ new_subvol->name, new_subvol->graph->id);
+
+ goto out;
+ }
+
+ ret = syncop_fsetxattr (new_subvol, newfd, lockinfo, 0, NULL, NULL);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "migrating locks failed (oldfd:%p newfd:%p "
+ "inode-gfid:%s) (old-subvol:%s-%d new-subvol:%s-%d)",
+ oldfd, newfd, uuid_utoa (newfd->inode->gfid),
+ old_subvol->name, old_subvol->graph->id,
+ new_subvol->name, new_subvol->graph->id);
+ ret = -1;
+ goto out;
+ }
+
+out:
+ if (newfd)
+ fd_unref (newfd);
+
+ if (lockinfo != NULL) {
+ dict_unref (lockinfo);
+ }
+
+ return ret;
+}
+
+
+int
+fuse_migrate_fd (xlator_t *this, fd_t *basefd, xlator_t *old_subvol,
+ xlator_t *new_subvol)
+{
+ int ret = -1;
+ char create_in_progress = 0;
+ fuse_fd_ctx_t *basefd_ctx = NULL;
+ fd_t *oldfd = NULL;
+
+ basefd_ctx = fuse_fd_ctx_get (this, basefd);
+ GF_VALIDATE_OR_GOTO ("glusterfs-fuse", basefd_ctx, out);
+
+ LOCK (&basefd->lock);
+ {
+ oldfd = basefd_ctx->activefd ? basefd_ctx->activefd
+ : basefd;
+ fd_ref (oldfd);
+ }
+ UNLOCK (&basefd->lock);
+
+ LOCK (&oldfd->inode->lock);
+ {
+ if (gf_uuid_is_null (oldfd->inode->gfid)) {
+ create_in_progress = 1;
+ } else {
+ create_in_progress = 0;
+ }
+ }
+ UNLOCK (&oldfd->inode->lock);
+
+ if (create_in_progress) {
+ gf_log ("glusterfs-fuse", GF_LOG_INFO,
+ "create call on fd (%p) is in progress "
+ "(basefd-ptr:%p basefd-inode.gfid:%s), "
+ "hence deferring migration till application does an "
+ "fd based operation on this fd"
+ "(old-subvolume:%s-%d, new-subvolume:%s-%d)",
+ oldfd, basefd, uuid_utoa (basefd->inode->gfid),
+ old_subvol->name, old_subvol->graph->id,
+ new_subvol->name, new_subvol->graph->id);
+
+ ret = 0;
+ goto out;
+ }
+
+ if (oldfd->inode->table->xl == old_subvol) {
+ if (IA_ISDIR (oldfd->inode->ia_type))
+ ret = syncop_fsyncdir (old_subvol, oldfd, 0, NULL,
+ NULL);
+ else
+ ret = syncop_fsync (old_subvol, oldfd, 0, NULL, NULL);
+
+ if (ret < 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "syncop_fsync(dir) failed (%s) on fd (%p)"
+ "(basefd:%p basefd-inode.gfid:%s) "
+ "(old-subvolume:%s-%d new-subvolume:%s-%d)",
+ strerror (-ret), oldfd, basefd,
+ uuid_utoa (basefd->inode->gfid),
+ old_subvol->name, old_subvol->graph->id,
+ new_subvol->name, new_subvol->graph->id);
+ }
+ } else {
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "basefd (ptr:%p inode-gfid:%s) was not "
+ "migrated during previous graph switch"
+ "(old-subvolume:%s-%d new-subvolume: %s-%d)", basefd,
+ basefd->inode->gfid,
+ old_subvol->name, old_subvol->graph->id,
+ new_subvol->name, new_subvol->graph->id);
+ }
+
+ ret = fuse_migrate_fd_open (this, basefd, oldfd, old_subvol,
+ new_subvol);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING, "open corresponding to "
+ "basefd (ptr:%p inode-gfid:%s) in new graph failed "
+ "(old-subvolume:%s-%d new-subvolume:%s-%d)", basefd,
+ uuid_utoa (basefd->inode->gfid), old_subvol->name,
+ old_subvol->graph->id, new_subvol->name,
+ new_subvol->graph->id);
+ goto out;
+ }
+
+ ret = fuse_migrate_locks (this, basefd, oldfd, old_subvol,
+ new_subvol);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "migrating locks from old-subvolume (%s-%d) to "
+ "new-subvolume (%s-%d) failed (inode-gfid:%s oldfd:%p "
+ "basefd:%p)", old_subvol->name, old_subvol->graph->id,
+ new_subvol->name, new_subvol->graph->id,
+ uuid_utoa (basefd->inode->gfid), oldfd, basefd);
+
+ }
+out:
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING, "migration of basefd "
+ "(ptr:%p inode-gfid:%s) failed"
+ "(old-subvolume:%s-%d new-subvolume:%s-%d)", basefd,
+ oldfd ? uuid_utoa (oldfd->inode->gfid) : NULL,
+ old_subvol->name, old_subvol->graph->id,
+ new_subvol->name, new_subvol->graph->id);
+ }
+
+ fd_unref (oldfd);
+
+ return ret;
+}
+
+
+int
+fuse_handle_opened_fds (xlator_t *this, xlator_t *old_subvol,
+ xlator_t *new_subvol)
+{
+ fuse_private_t *priv = NULL;
+ fdentry_t *fdentries = NULL;
+ uint32_t count = 0;
+ fdtable_t *fdtable = NULL;
+ int i = 0;
+ fd_t *fd = NULL;
+ int32_t ret = 0;
+ fuse_fd_ctx_t *fdctx = NULL;
+
+ priv = this->private;
+
+ fdtable = priv->fdtable;
+
+ fdentries = gf_fd_fdtable_copy_all_fds (fdtable, &count);
+ if (fdentries != NULL) {
+ for (i = 0; i < count; i++) {
+ fd = fdentries[i].fd;
+ if (fd == NULL)
+ continue;
+
+ ret = fuse_migrate_fd (this, fd, old_subvol,
+ new_subvol);
+
+ fdctx = fuse_fd_ctx_get (this, fd);
+ if (fdctx) {
+ LOCK (&fd->lock);
+ {
+ if (ret < 0) {
+ fdctx->migration_failed = 1;
+ } else {
+ fdctx->migration_failed = 0;
+ }
+ }
+ UNLOCK (&fd->lock);
+ }
+ }
+
+ for (i = 0; i < count ; i++) {
+ fd = fdentries[i].fd;
+ if (fd)
+ fd_unref (fd);
+ }
+
+ GF_FREE (fdentries);
+ }
+
+ return 0;
+}
+
+
+static int
+fuse_handle_blocked_locks (xlator_t *this, xlator_t *old_subvol,
+ xlator_t *new_subvol)
+{
return 0;
}
+static int
+fuse_graph_switch_task (void *data)
+{
+ fuse_graph_switch_args_t *args = NULL;
+
+ args = data;
+ if (args == NULL) {
+ goto out;
+ }
+
+ /* don't change the order of handling open fds and blocked locks, since
+ * the act of opening files also reacquires granted locks in new graph.
+ */
+ fuse_handle_opened_fds (args->this, args->old_subvol, args->new_subvol);
+
+ fuse_handle_blocked_locks (args->this, args->old_subvol,
+ args->new_subvol);
+
+out:
+ return 0;
+}
+
+
+fuse_graph_switch_args_t *
+fuse_graph_switch_args_alloc (void)
+{
+ fuse_graph_switch_args_t *args = NULL;
+
+ args = GF_CALLOC (1, sizeof (*args), gf_fuse_mt_graph_switch_args_t);
+ if (args == NULL) {
+ goto out;
+ }
+
+out:
+ return args;
+}
+
+
+void
+fuse_graph_switch_args_destroy (fuse_graph_switch_args_t *args)
+{
+ if (args == NULL) {
+ goto out;
+ }
+
+ GF_FREE (args);
+out:
+ return;
+}
+
+
+int
+fuse_handle_graph_switch (xlator_t *this, xlator_t *old_subvol,
+ xlator_t *new_subvol)
+{
+ call_frame_t *frame = NULL;
+ int32_t ret = -1;
+ fuse_graph_switch_args_t *args = NULL;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (frame == NULL) {
+ goto out;
+ }
+
+ args = fuse_graph_switch_args_alloc ();
+ if (args == NULL) {
+ goto out;
+ }
+
+ args->this = this;
+ args->old_subvol = old_subvol;
+ args->new_subvol = new_subvol;
+
+ ret = synctask_new (this->ctx->env, fuse_graph_switch_task, NULL, frame,
+ args);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING, "starting sync-task to "
+ "handle graph switch failed");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (args != NULL) {
+ fuse_graph_switch_args_destroy (args);
+ }
+
+ if (frame != NULL) {
+ STACK_DESTROY (frame->root);
+ }
+
+ return ret;
+}
+
+
int
fuse_graph_sync (xlator_t *this)
{
- fuse_private_t *priv = NULL;
- int need_first_lookup = 0;
- struct timeval now = {0, };
- struct timespec timeout = {0, };
- int ret = 0;
+ fuse_private_t *priv = NULL;
+ int need_first_lookup = 0;
+ int ret = 0;
+ xlator_t *old_subvol = NULL, *new_subvol = NULL;
+ uint64_t winds_on_old_subvol = 0;
priv = this->private;
@@ -3128,49 +4812,80 @@ fuse_graph_sync (xlator_t *this)
if (!priv->next_graph)
goto unlock;
- priv->active_subvol = priv->next_graph->top;
+ old_subvol = priv->active_subvol;
+ new_subvol = priv->active_subvol = priv->next_graph->top;
priv->next_graph = NULL;
need_first_lookup = 1;
- gettimeofday (&now, NULL);
- timeout.tv_sec = now.tv_sec + MAX_FUSE_PROC_DELAY;
- timeout.tv_nsec = now.tv_usec * 1000;
-
- while (!priv->child_up) {
- ret = pthread_cond_timedwait (&priv->sync_cond,
- &priv->sync_mutex,
- &timeout);
+ while (!priv->event_recvd) {
+ ret = pthread_cond_wait (&priv->sync_cond,
+ &priv->sync_mutex);
if (ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "timedwait returned non zero value "
- "ret: %d errno: %d", ret, errno);
- break;
- }
+ gf_log (this->name, GF_LOG_DEBUG,
+ "timedwait returned non zero value "
+ "ret: %d errno: %d", ret, errno);
+ break;
+ }
}
}
unlock:
pthread_mutex_unlock (&priv->sync_mutex);
- if (need_first_lookup)
+ if (need_first_lookup) {
fuse_first_lookup (this);
+ }
+
+ if ((old_subvol != NULL) && (new_subvol != NULL)) {
+ fuse_handle_graph_switch (this, old_subvol, new_subvol);
+
+ pthread_mutex_lock (&priv->sync_mutex);
+ {
+ old_subvol->switched = 1;
+ winds_on_old_subvol = old_subvol->winds;
+ }
+ pthread_mutex_unlock (&priv->sync_mutex);
+
+ if (winds_on_old_subvol == 0) {
+ xlator_notify (old_subvol, GF_EVENT_PARENT_DOWN,
+ old_subvol, NULL);
+ }
+ }
return 0;
}
+int
+fuse_get_mount_status (xlator_t *this)
+{
+ int kid_status = -1;
+ fuse_private_t *priv = this->private;
+
+ if (sys_read (priv->status_pipe[0], &kid_status, sizeof(kid_status)) < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "could not get mount status");
+ kid_status = -1;
+ }
+ gf_log (this->name, GF_LOG_DEBUG, "mount status is %d", kid_status);
+
+ sys_close(priv->status_pipe[0]);
+ sys_close(priv->status_pipe[1]);
+ return kid_status;
+}
static void *
fuse_thread_proc (void *data)
{
- char *mount_point = NULL;
- xlator_t *this = NULL;
- fuse_private_t *priv = NULL;
- ssize_t res = 0;
- struct iobuf *iobuf = NULL;
- fuse_in_header_t *finh;
- struct iovec iov_in[2];
- void *msg = NULL;
- const size_t msg0_size = sizeof (*finh) + 128;
- fuse_handler_t **fuse_ops = NULL;
+ char *mount_point = NULL;
+ xlator_t *this = NULL;
+ fuse_private_t *priv = NULL;
+ ssize_t res = 0;
+ struct iobuf *iobuf = NULL;
+ fuse_in_header_t *finh = NULL;
+ struct iovec iov_in[2];
+ void *msg = NULL;
+ const size_t msg0_size = sizeof (*finh) + 128;
+ fuse_handler_t **fuse_ops = NULL;
+ struct pollfd pfd[2] = {{0,}};
+ gf_boolean_t mount_finished = _gf_false;
this = data;
priv = this->private;
@@ -3180,16 +4895,59 @@ fuse_thread_proc (void *data)
iov_in[0].iov_len = sizeof (*finh) + sizeof (struct fuse_write_in);
iov_in[1].iov_len = ((struct iobuf_pool *)this->ctx->iobuf_pool)
- ->page_size;
+ ->default_page_size;
priv->msg0_len_p = &iov_in[0].iov_len;
for (;;) {
+ /* THIS has to be reset here */
+ THIS = this;
+
+ if (!mount_finished) {
+ memset(pfd,0,sizeof(pfd));
+ pfd[0].fd = priv->status_pipe[0];
+ pfd[0].events = POLLIN | POLLHUP | POLLERR;
+ pfd[1].fd = priv->fd;
+ pfd[1].events = POLLIN | POLLHUP | POLLERR;
+ if (poll(pfd,2,-1) < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "poll error %s", strerror(errno));
+ break;
+ }
+ if (pfd[0].revents & POLLIN) {
+ if (fuse_get_mount_status(this) != 0) {
+ break;
+ }
+ mount_finished = _gf_true;
+ }
+ else if (pfd[0].revents) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "mount pipe closed without status");
+ break;
+ }
+ if (!pfd[1].revents) {
+ continue;
+ }
+ }
+
+ /*
+ * We don't want to block on readv while we're still waiting
+ * for mount status. That means we only want to get here if
+ * mount_status is true (meaning that our wait completed
+ * already) or if we already called poll(2) on priv->fd to
+ * make sure it's ready.
+ */
+
if (priv->init_recvd)
fuse_graph_sync (this);
+ /* TODO: This place should always get maximum supported buffer
+ size from 'fuse', which is as of today 128KB. If we bring in
+ support for higher block sizes support, then we should be
+ changing this one too */
iobuf = iobuf_get (this->ctx->iobuf_pool);
+
/* Add extra 128 byte to the first iov so that it can
- * accomodate "ordinary" non-write requests. It's not
+ * accommodate "ordinary" non-write requests. It's not
* guaranteed to be big enough, as SETXATTR and namespace
* operations with very long names may grow behind it,
* but it's good enough in most cases (and we can handle
@@ -3210,21 +4968,28 @@ fuse_thread_proc (void *data)
iov_in[1].iov_base = iobuf->ptr;
- res = readv (priv->fd, iov_in, 2);
+ res = sys_readv (priv->fd, iov_in, 2);
if (res == -1) {
if (errno == ENODEV || errno == EBADF) {
- gf_log ("glusterfs-fuse", GF_LOG_NORMAL,
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
"terminating upon getting %s when "
"reading /dev/fuse",
errno == ENODEV ? "ENODEV" : "EBADF");
-
+ fuse_log_eh (this, "glusterfs-fuse: terminating"
+ " upon getting %s when "
+ "reading /dev/fuse",
+ errno == ENODEV ? "ENODEV":
+ "EBADF");
break;
}
if (errno != EINTR) {
gf_log ("glusterfs-fuse", GF_LOG_WARNING,
"read from /dev/fuse returned -1 (%s)",
strerror (errno));
+ fuse_log_eh (this, "glusterfs-fuse: read from "
+ "/dev/fuse returned -1 (%s)",
+ strerror (errno));
}
goto cont_err;
@@ -3232,6 +4997,8 @@ fuse_thread_proc (void *data)
if (res < sizeof (finh)) {
gf_log ("glusterfs-fuse", GF_LOG_WARNING,
"short read on /dev/fuse");
+ fuse_log_eh (this, "glusterfs-fuse: short read on "
+ "/dev/fuse");
break;
}
@@ -3250,6 +5017,8 @@ fuse_thread_proc (void *data)
) {
gf_log ("glusterfs-fuse", GF_LOG_WARNING,
"inconsistent read on /dev/fuse");
+ fuse_log_eh (this, "glusterfs-fuse: inconsistent read "
+ "on /dev/fuse");
break;
}
@@ -3259,11 +5028,12 @@ fuse_thread_proc (void *data)
msg = iov_in[1].iov_base;
else {
if (res > msg0_size) {
- iov_in[0].iov_base =
- GF_REALLOC (iov_in[0].iov_base, res);
- if (iov_in[0].iov_base)
+ void *b = GF_REALLOC (iov_in[0].iov_base, res);
+ if (b) {
+ iov_in[0].iov_base = b;
finh = (fuse_in_header_t *)
iov_in[0].iov_base;
+ }
else {
gf_log ("glusterfs-fuse", GF_LOG_ERROR,
"Out of memory");
@@ -3280,13 +5050,15 @@ fuse_thread_proc (void *data)
msg = finh + 1;
}
-#ifdef GF_DARWIN_HOST_OS
+ if (priv->uid_map_root &&
+ finh->uid == priv->uid_map_root)
+ finh->uid = 0;
+
if (finh->opcode >= FUSE_OP_HIGH)
/* turn down MacFUSE specific messages */
fuse_enosys (this, finh, msg);
else
-#endif
- fuse_ops[finh->opcode] (this, finh, msg);
+ fuse_ops[finh->opcode] (this, finh, msg);
iobuf_unref (iobuf);
continue;
@@ -3296,33 +5068,42 @@ fuse_thread_proc (void *data)
GF_FREE (iov_in[0].iov_base);
}
- iobuf_unref (iobuf);
- GF_FREE (iov_in[0].iov_base);
+ /*
+ * We could be in all sorts of states with respect to iobuf and iov_in
+ * by the time we get here, and it's just not worth untangling them if
+ * we're about to kill ourselves anyway.
+ */
if (dict_get (this->options, ZR_MOUNTPOINT_OPT))
mount_point = data_to_str (dict_get (this->options,
ZR_MOUNTPOINT_OPT));
if (mount_point) {
- gf_log (this->name, GF_LOG_NORMAL,
+ gf_log (this->name, GF_LOG_INFO,
"unmounting %s", mount_point);
- dict_del (this->options, ZR_MOUNTPOINT_OPT);
}
- raise (SIGTERM);
-
+ /* Kill the whole process, not just this thread. */
+ kill (getpid(), SIGTERM);
return NULL;
}
+
int32_t
fuse_itable_dump (xlator_t *this)
{
+ fuse_private_t *priv = NULL;
+
if (!this)
return -1;
- gf_proc_dump_add_section("xlator.mount.fuse.itable");
- inode_table_dump(this->itable, "xlator.mount.fuse.itable");
+ priv = this->private;
- return 0;
+ if (priv && priv->active_subvol) {
+ gf_proc_dump_add_section("xlator.mount.fuse.itable");
+ inode_table_dump(priv->active_subvol->itable,
+ "xlator.mount.fuse.itable");
+ }
+ return 0;
}
int32_t
@@ -3340,58 +5121,129 @@ fuse_priv_dump (xlator_t *this)
gf_proc_dump_add_section("xlator.mount.fuse.priv");
- gf_proc_dump_write("xlator.mount.fuse.priv.fd", "%d", private->fd);
- gf_proc_dump_write("xlator.mount.fuse.priv.proto_minor", "%u",
+ gf_proc_dump_write("fd", "%d", private->fd);
+ gf_proc_dump_write("proto_minor", "%u",
private->proto_minor);
- gf_proc_dump_write("xlator.mount.fuse.priv.volfile", "%s",
+ gf_proc_dump_write("volfile", "%s",
private->volfile?private->volfile:"None");
- gf_proc_dump_write("xlator.mount.fuse.volfile_size", "%d",
+ gf_proc_dump_write("volfile_size", "%d",
private->volfile_size);
- gf_proc_dump_write("xlator.mount.fuse.mount_point", "%s",
+ gf_proc_dump_write("mount_point", "%s",
private->mount_point);
- gf_proc_dump_write("xlator.mount.fuse.iobuf", "%u",
+ gf_proc_dump_write("iobuf", "%u",
private->iobuf);
- gf_proc_dump_write("xlator.mount.fuse.fuse_thread_started", "%d",
+ gf_proc_dump_write("fuse_thread_started", "%d",
(int)private->fuse_thread_started);
- gf_proc_dump_write("xlator.mount.fuse.direct_io_mode", "%d",
+ gf_proc_dump_write("direct_io_mode", "%d",
private->direct_io_mode);
- gf_proc_dump_write("xlator.mount.fuse.entry_timeout", "%lf",
+ gf_proc_dump_write("entry_timeout", "%lf",
private->entry_timeout);
- gf_proc_dump_write("xlator.mount.fuse.attribute_timeout", "%lf",
+ gf_proc_dump_write("attribute_timeout", "%lf",
private->attribute_timeout);
- gf_proc_dump_write("xlator.mount.fuse.init_recvd", "%d",
+ gf_proc_dump_write("init_recvd", "%d",
(int)private->init_recvd);
- gf_proc_dump_write("xlator.mount.fuse.strict_volfile_check", "%d",
+ gf_proc_dump_write("strict_volfile_check", "%d",
(int)private->strict_volfile_check);
+ gf_proc_dump_write("reverse_thread_started", "%d",
+ (int)private->reverse_fuse_thread_started);
+ gf_proc_dump_write("use_readdirp", "%d", private->use_readdirp);
return 0;
}
+int
+fuse_history_dump (xlator_t *this)
+{
+ int ret = -1;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0,};
+
+ GF_VALIDATE_OR_GOTO ("fuse", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->history, out);
+
+ gf_proc_dump_build_key (key_prefix, "xlator.mount.fuse",
+ "history");
+ gf_proc_dump_add_section (key_prefix);
+ eh_dump (this->history, NULL, dump_history_fuse);
+
+ ret = 0;
+out:
+ return ret;
+}
int
-fuse_graph_setup (xlator_t *this, glusterfs_graph_t *graph)
+dump_history_fuse (circular_buffer_t *cb, void *data)
{
- inode_table_t *itable = NULL;
- int ret = 0;
- fuse_private_t *priv = NULL;
+ char timestr[256] = {0,};
- priv = this->private;
+ gf_time_fmt (timestr, sizeof timestr, cb->tv.tv_sec, gf_timefmt_F_HMS);
- itable = inode_table_new (0, graph->top);
- if (!itable)
- return -1;
+ snprintf (timestr + strlen (timestr), 256 - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS, cb->tv.tv_usec);
+ gf_proc_dump_write ("TIME", "%s", timestr);
- ((xlator_t *)graph->top)->itable = itable;
+ gf_proc_dump_write ("message", "%s\n", cb->data);
+
+ return 0;
+}
+
+int
+fuse_graph_setup (xlator_t *this, glusterfs_graph_t *graph)
+{
+ inode_table_t *itable = NULL;
+ int ret = 0, winds = 0;
+ fuse_private_t *priv = NULL;
+ glusterfs_graph_t *prev_graph = NULL;
+
+ priv = this->private;
pthread_mutex_lock (&priv->sync_mutex);
{
- priv->next_graph = graph;
- priv->child_up = 0;
+ /* handle the case of more than one CHILD_UP on same graph */
+ if ((priv->active_subvol == graph->top) || graph->used) {
+ goto unlock;
+ }
+
+ itable = inode_table_new (0, graph->top);
+ if (!itable) {
+ ret = -1;
+ goto unlock;
+ }
+
+ ((xlator_t *)graph->top)->itable = itable;
+
+ prev_graph = priv->next_graph;
- pthread_cond_signal (&priv->sync_cond);
+ if ((prev_graph != NULL) && (prev_graph->id > graph->id)) {
+ /* there was a race and an old graph was initialised
+ * before new one.
+ */
+ prev_graph = graph;
+ } else {
+ priv->next_graph = graph;
+ priv->event_recvd = 0;
+ }
+
+ if (prev_graph != NULL)
+ winds = ((xlator_t *)prev_graph->top)->winds;
+
+ /* set post initializing next_graph i to preserve
+ * critical section update and bails on error */
+ graph->used = 1;
}
pthread_mutex_unlock (&priv->sync_mutex);
+ if ((prev_graph != NULL) && (winds == 0)) {
+ xlator_notify (prev_graph->top, GF_EVENT_PARENT_DOWN,
+ prev_graph->top, NULL);
+ }
+
+ gf_log ("fuse", GF_LOG_INFO, "switched to graph %d",
+ ((graph) ? graph->id : 0));
+
+ return ret;
+unlock:
+ pthread_mutex_unlock (&priv->sync_mutex);
+
return ret;
}
@@ -3401,24 +5253,54 @@ notify (xlator_t *this, int32_t event, void *data, ...)
{
int32_t ret = 0;
fuse_private_t *private = NULL;
+ gf_boolean_t start_thread = _gf_false;
glusterfs_graph_t *graph = NULL;
private = this->private;
+ graph = data;
+
+ gf_log ("fuse", GF_LOG_DEBUG, "got event %d on graph %d",
+ event, ((graph) ? graph->id : 0));
+
switch (event)
{
case GF_EVENT_GRAPH_NEW:
- graph = data;
+ break;
- ret = fuse_graph_setup (this, graph);
- if (ret)
- break;
+ case GF_EVENT_CHILD_UP:
+ case GF_EVENT_CHILD_DOWN:
+ case GF_EVENT_CHILD_CONNECTING:
+ {
+ if (graph) {
+ ret = fuse_graph_setup (this, graph);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to setup the graph");
+ }
+
+ if ((event == GF_EVENT_CHILD_UP)
+ || (event == GF_EVENT_CHILD_DOWN)) {
+ pthread_mutex_lock (&private->sync_mutex);
+ {
+ private->event_recvd = 1;
+ pthread_cond_broadcast (&private->sync_cond);
+ }
+ pthread_mutex_unlock (&private->sync_mutex);
+ }
- if (!private->fuse_thread_started) {
- private->fuse_thread_started = 1;
+ pthread_mutex_lock (&private->sync_mutex);
+ {
+ if (!private->fuse_thread_started) {
+ private->fuse_thread_started = 1;
+ start_thread = _gf_true;
+ }
+ }
+ pthread_mutex_unlock (&private->sync_mutex);
- ret = pthread_create (&private->fuse_thread, NULL,
- fuse_thread_proc, this);
+ if (start_thread) {
+ ret = gf_thread_create (&private->fuse_thread, NULL,
+ fuse_thread_proc, this);
if (ret != 0) {
gf_log (this->name, GF_LOG_DEBUG,
"pthread_create() failed (%s)",
@@ -3428,38 +5310,16 @@ notify (xlator_t *this, int32_t event, void *data, ...)
}
break;
-
-// case GF_EVENT_CHILD_CONNECTING:
-
- case GF_EVENT_CHILD_UP:
- {
- /* set priv->active_subvol */
- /* set priv->first_lookup = 1 */
-
- pthread_mutex_lock (&private->sync_mutex);
- {
- private->child_up = 1;
- pthread_cond_broadcast (&private->sync_cond);
- }
- pthread_mutex_unlock (&private->sync_mutex);
-
- break;
}
- case GF_EVENT_VOLFILE_MODIFIED:
+ case GF_EVENT_AUTH_FAILED:
{
- gf_log (this->name, GF_LOG_CRITICAL,
- "Remote volume file changed, try re-mounting.");
- if (private->strict_volfile_check) {
- //fuse_session_remove_chan (private->ch);
- //fuse_session_destroy (private->se);
- //fuse_unmount (private->mount_point, private->ch);
- /* TODO: Above code if works, will be a cleaner way,
- but for now, lets just achieve what we want */
- raise (SIGTERM);
- }
- break;
+ /* Authentication failure is an error and glusterfs should stop */
+ gf_log (this->name, GF_LOG_ERROR, "Server authenication failed. Shutting down.");
+ fini (this);
+ break;
}
+
default:
break;
}
@@ -3488,47 +5348,69 @@ mem_acct_init (xlator_t *this)
static fuse_handler_t *fuse_std_ops[FUSE_OP_HIGH] = {
- [FUSE_INIT] = fuse_init,
- [FUSE_DESTROY] = fuse_destroy,
[FUSE_LOOKUP] = fuse_lookup,
[FUSE_FORGET] = fuse_forget,
[FUSE_GETATTR] = fuse_getattr,
[FUSE_SETATTR] = fuse_setattr,
- [FUSE_OPENDIR] = fuse_opendir,
- [FUSE_READDIR] = fuse_readdir,
- [FUSE_RELEASEDIR] = fuse_releasedir,
- [FUSE_ACCESS] = fuse_access,
[FUSE_READLINK] = fuse_readlink,
+ [FUSE_SYMLINK] = fuse_symlink,
[FUSE_MKNOD] = fuse_mknod,
[FUSE_MKDIR] = fuse_mkdir,
[FUSE_UNLINK] = fuse_unlink,
[FUSE_RMDIR] = fuse_rmdir,
- [FUSE_SYMLINK] = fuse_symlink,
[FUSE_RENAME] = fuse_rename,
[FUSE_LINK] = fuse_link,
- [FUSE_CREATE] = fuse_create,
[FUSE_OPEN] = fuse_open,
[FUSE_READ] = fuse_readv,
[FUSE_WRITE] = fuse_write,
- [FUSE_FLUSH] = fuse_flush,
+ [FUSE_STATFS] = fuse_statfs,
[FUSE_RELEASE] = fuse_release,
[FUSE_FSYNC] = fuse_fsync,
- [FUSE_FSYNCDIR] = fuse_fsyncdir,
- [FUSE_STATFS] = fuse_statfs,
[FUSE_SETXATTR] = fuse_setxattr,
[FUSE_GETXATTR] = fuse_getxattr,
[FUSE_LISTXATTR] = fuse_listxattr,
[FUSE_REMOVEXATTR] = fuse_removexattr,
+ [FUSE_FLUSH] = fuse_flush,
+ [FUSE_INIT] = fuse_init,
+ [FUSE_OPENDIR] = fuse_opendir,
+ [FUSE_READDIR] = fuse_readdir,
+ [FUSE_RELEASEDIR] = fuse_releasedir,
+ [FUSE_FSYNCDIR] = fuse_fsyncdir,
[FUSE_GETLK] = fuse_getlk,
[FUSE_SETLK] = fuse_setlk,
[FUSE_SETLKW] = fuse_setlk,
-};
+ [FUSE_ACCESS] = fuse_access,
+ [FUSE_CREATE] = fuse_create,
+ /* [FUSE_INTERRUPT] */
+ /* [FUSE_BMAP] */
+ [FUSE_DESTROY] = fuse_destroy,
+ /* [FUSE_IOCTL] */
+ /* [FUSE_POLL] */
+ /* [FUSE_NOTIFY_REPLY] */
+#if FUSE_KERNEL_MINOR_VERSION >= 16
+ [FUSE_BATCH_FORGET]= fuse_batch_forget,
+#endif
-static fuse_handler_t *fuse_dump_ops[FUSE_OP_HIGH] = {
+#if FUSE_KERNEL_MINOR_VERSION >= 19
+#ifdef FALLOC_FL_KEEP_SIZE
+ [FUSE_FALLOCATE] = fuse_fallocate,
+#endif /* FALLOC_FL_KEEP_SIZE */
+#endif
+
+#if FUSE_KERNEL_MINOR_VERSION >= 21
+ [FUSE_READDIRPLUS] = fuse_readdirp,
+#endif
+
+#if FUSE_KERNEL_MINOR_VERSION >= 24 && HAVE_SEEK_HOLE
+ [FUSE_LSEEK] = fuse_lseek,
+#endif
};
+static fuse_handler_t *fuse_dump_ops[FUSE_OP_HIGH];
+
+
static void
fuse_dumper (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
@@ -3547,14 +5429,14 @@ fuse_dumper (xlator_t *this, fuse_in_header_t *finh, void *msg)
diov[2].iov_len = finh->len - sizeof (*finh);
pthread_mutex_lock (&priv->fuse_dump_mutex);
- ret = writev (priv->fuse_dump_fd, diov, 3);
+ ret = sys_writev (priv->fuse_dump_fd, diov, 3);
pthread_mutex_unlock (&priv->fuse_dump_mutex);
if (ret == -1)
gf_log ("glusterfs-fuse", GF_LOG_ERROR,
"failed to dump fuse message (R): %s",
strerror (errno));
- return priv->fuse_ops0[finh->opcode] (this, finh, msg);
+ priv->fuse_ops0[finh->opcode] (this, finh, msg);
}
@@ -3564,11 +5446,19 @@ init (xlator_t *this_xl)
int ret = 0;
dict_t *options = NULL;
char *value_string = NULL;
+ cmd_args_t *cmd_args = NULL;
char *fsname = NULL;
fuse_private_t *priv = NULL;
struct stat stbuf = {0,};
int i = 0;
int xl_name_allocated = 0;
+ int fsname_allocated = 0;
+ glusterfs_ctx_t *ctx = NULL;
+ gf_boolean_t sync_to_mount = _gf_false;
+ gf_boolean_t fopen_keep_cache = _gf_false;
+ unsigned long mntflags = 0;
+ char *mnt_args = NULL;
+ eh_t *event = NULL;
if (this_xl == NULL)
return -1;
@@ -3576,6 +5466,10 @@ init (xlator_t *this_xl)
if (this_xl->options == NULL)
return -1;
+ ctx = this_xl->ctx;
+ if (!ctx)
+ return -1;
+
options = this_xl->options;
if (this_xl->name == NULL) {
@@ -3600,15 +5494,19 @@ init (xlator_t *this_xl)
priv->mount_point = NULL;
priv->fd = -1;
+ INIT_LIST_HEAD (&priv->invalidate_list);
+ pthread_cond_init (&priv->invalidate_cond, NULL);
+ pthread_mutex_init (&priv->invalidate_mutex, NULL);
+
/* get options from option dictionary */
ret = dict_get_str (options, ZR_MOUNTPOINT_OPT, &value_string);
- if (value_string == NULL) {
+ if (ret == -1 || value_string == NULL) {
gf_log ("fuse", GF_LOG_ERROR,
"Mandatory option 'mountpoint' is not specified.");
goto cleanup_exit;
}
- if (stat (value_string, &stbuf) != 0) {
+ if (sys_stat (value_string, &stbuf) != 0) {
if (errno == ENOENT) {
gf_log (this_xl->name, GF_LOG_ERROR,
"%s %s does not exist",
@@ -3641,34 +5539,51 @@ init (xlator_t *this_xl)
goto cleanup_exit;
}
- ret = dict_get_double (options, "attribute-timeout",
- &priv->attribute_timeout);
- if (ret != 0)
- priv->attribute_timeout = 1.0; /* default */
+ GF_OPTION_INIT (ZR_ATTR_TIMEOUT_OPT, priv->attribute_timeout, double,
+ cleanup_exit);
+
+ GF_OPTION_INIT (ZR_ENTRY_TIMEOUT_OPT, priv->entry_timeout, double,
+ cleanup_exit);
+
+ GF_OPTION_INIT (ZR_NEGATIVE_TIMEOUT_OPT, priv->negative_timeout, double,
+ cleanup_exit);
- ret = dict_get_double (options, "entry-timeout",
- &priv->entry_timeout);
- if (!priv->entry_timeout)
- priv->entry_timeout = 1.0; /* default */
+ GF_OPTION_INIT ("client-pid", priv->client_pid, int32, cleanup_exit);
+ /* have to check & register the presence of client-pid manually */
+ priv->client_pid_set = !!dict_get (this_xl->options, "client-pid");
+ GF_OPTION_INIT ("uid-map-root", priv->uid_map_root, uint32,
+ cleanup_exit);
priv->direct_io_mode = 2;
ret = dict_get_str (options, ZR_DIRECT_IO_OPT, &value_string);
if (ret == 0) {
ret = gf_string2boolean (value_string, &priv->direct_io_mode);
+ GF_ASSERT (ret == 0);
}
- priv->strict_volfile_check = 0;
- ret = dict_get_str (options, ZR_STRICT_VOLFILE_CHECK, &value_string);
- if (ret == 0) {
- ret = gf_string2boolean (value_string,
- &priv->strict_volfile_check);
- }
+ GF_OPTION_INIT (ZR_STRICT_VOLFILE_CHECK, priv->strict_volfile_check,
+ bool, cleanup_exit);
+
+ GF_OPTION_INIT ("acl", priv->acl, bool, cleanup_exit);
+
+ if (priv->uid_map_root)
+ priv->acl = 1;
+
+ GF_OPTION_INIT ("selinux", priv->selinux, bool, cleanup_exit);
+
+ GF_OPTION_INIT ("capability", priv->capability, bool, cleanup_exit);
+
+ GF_OPTION_INIT ("read-only", priv->read_only, bool, cleanup_exit);
+
+ GF_OPTION_INIT ("enable-ino32", priv->enable_ino32, bool, cleanup_exit);
+
+ GF_OPTION_INIT ("use-readdirp", priv->use_readdirp, bool, cleanup_exit);
priv->fuse_dump_fd = -1;
ret = dict_get_str (options, "dump-fuse", &value_string);
if (ret == 0) {
- ret = unlink (value_string);
+ ret = sys_unlink (value_string);
if (ret != -1 || errno == ENOENT)
ret = open (value_string, O_RDWR|O_CREAT|O_EXCL,
S_IRUSR|S_IWUSR);
@@ -3682,21 +5597,135 @@ init (xlator_t *this_xl)
priv->fuse_dump_fd = ret;
}
- fsname = this_xl->ctx->cmd_args.volfile;
- fsname = (fsname ? fsname : this_xl->ctx->cmd_args.volfile_server);
- fsname = (fsname ? fsname : "glusterfs");
+ sync_to_mount = _gf_false;
+ ret = dict_get_str (options, "sync-to-mount", &value_string);
+ if (ret == 0) {
+ ret = gf_string2boolean (value_string,
+ &sync_to_mount);
+ GF_ASSERT (ret == 0);
+ }
+
+ priv->fopen_keep_cache = 2;
+ if (dict_get (options, "fopen-keep-cache")) {
+ GF_OPTION_INIT("fopen-keep-cache", fopen_keep_cache, bool,
+ cleanup_exit);
+ priv->fopen_keep_cache = fopen_keep_cache;
+ }
+
+ GF_OPTION_INIT("gid-timeout", priv->gid_cache_timeout, int32,
+ cleanup_exit);
+
+ GF_OPTION_INIT ("fuse-mountopts", priv->fuse_mountopts, str, cleanup_exit);
+
+ if (gid_cache_init(&priv->gid_cache, priv->gid_cache_timeout) < 0) {
+ gf_log("glusterfs-fuse", GF_LOG_ERROR, "Failed to initialize "
+ "group cache.");
+ goto cleanup_exit;
+ }
+
+ GF_OPTION_INIT("resolve-gids", priv->resolve_gids, bool, cleanup_exit);
+
+ /* default values seemed to work fine during testing */
+ GF_OPTION_INIT ("background-qlen", priv->background_qlen, int32,
+ cleanup_exit);
+ GF_OPTION_INIT ("congestion-threshold", priv->congestion_threshold,
+ int32, cleanup_exit);
+
+ GF_OPTION_INIT("no-root-squash", priv->no_root_squash, bool,
+ cleanup_exit);
+ /* change the client_pid to no-root-squash pid only if the
+ client is none of defrag process, hadoop access and gsyncd process.
+ */
+ if (!priv->client_pid_set) {
+ if (priv->no_root_squash == _gf_true) {
+ priv->client_pid_set = _gf_true;
+ priv->client_pid = GF_CLIENT_PID_NO_ROOT_SQUASH;
+ }
+ }
+
+ /* user has set only background-qlen, not congestion-threshold,
+ use the fuse kernel driver formula to set congestion. ie, 75% */
+ if (dict_get (this_xl->options, "background-qlen") &&
+ !dict_get (this_xl->options, "congestion-threshold")) {
+ priv->congestion_threshold = (priv->background_qlen * 3) / 4;
+ gf_log (this_xl->name, GF_LOG_INFO,
+ "setting congestion control as 75%% of "
+ "background-queue length (ie, (.75 * %d) = %d",
+ priv->background_qlen, priv->congestion_threshold);
+ }
+
+ /* congestion should not be higher than background queue length */
+ if (priv->congestion_threshold > priv->background_qlen) {
+ gf_log (this_xl->name, GF_LOG_INFO,
+ "setting congestion control same as "
+ "background-queue length (%d)",
+ priv->background_qlen);
+ priv->congestion_threshold = priv->background_qlen;
+ }
+
+ cmd_args = &this_xl->ctx->cmd_args;
+ fsname = cmd_args->volfile;
+ if (!fsname && cmd_args->volfile_server) {
+ if (cmd_args->volfile_id) {
+ fsname = GF_MALLOC (
+ strlen (cmd_args->volfile_server) + 1 +
+ strlen (cmd_args->volfile_id) + 1,
+ gf_fuse_mt_fuse_private_t);
+ if (!fsname) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "Out of memory");
+ goto cleanup_exit;
+ }
+ fsname_allocated = 1;
+ strcpy (fsname, cmd_args->volfile_server);
+ strcat (fsname, ":");
+ strcat (fsname, cmd_args->volfile_id);
+ } else
+ fsname = cmd_args->volfile_server;
+ }
+ if (!fsname)
+ fsname = "glusterfs";
+ priv->fdtable = gf_fd_fdtable_alloc ();
+ if (priv->fdtable == NULL) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR, "Out of memory");
+ goto cleanup_exit;
+ }
- priv->fd = gf_fuse_mount (priv->mount_point, fsname,
- "allow_other,default_permissions,"
- "max_read=131072");
+ if (priv->read_only)
+ mntflags |= MS_RDONLY;
+ gf_asprintf (&mnt_args, "%s%s%sallow_other,max_read=131072",
+ priv->acl ? "" : "default_permissions,",
+ priv->fuse_mountopts ? priv->fuse_mountopts : "",
+ priv->fuse_mountopts ? "," : "");
+ if (!mnt_args)
+ goto cleanup_exit;
+
+ if (pipe(priv->status_pipe) < 0) {
+ gf_log (this_xl->name, GF_LOG_ERROR,
+ "could not create pipe to separate mount process");
+ goto cleanup_exit;
+ }
+
+ priv->fd = gf_fuse_mount (priv->mount_point, fsname, mntflags, mnt_args,
+ sync_to_mount ? &ctx->mnt_pid : NULL,
+ priv->status_pipe[1]);
if (priv->fd == -1)
goto cleanup_exit;
+ event = eh_new (FUSE_EVENT_HISTORY_SIZE, _gf_false, NULL);
+ if (!event) {
+ gf_log (this_xl->name, GF_LOG_ERROR,
+ "could not create a new event history");
+ goto cleanup_exit;
+ }
+
+ this_xl->history = event;
+
pthread_mutex_init (&priv->fuse_dump_mutex, NULL);
pthread_cond_init (&priv->sync_cond, NULL);
pthread_mutex_init (&priv->sync_mutex, NULL);
- priv->child_up = 0;
+ priv->event_recvd = 0;
for (i = 0; i < FUSE_OP_HIGH; i++) {
if (!fuse_std_ops[i])
@@ -3710,17 +5739,25 @@ init (xlator_t *this_xl)
priv->fuse_ops = fuse_dump_ops;
}
+ if (fsname_allocated)
+ GF_FREE (fsname);
+ GF_FREE (mnt_args);
return 0;
cleanup_exit:
if (xl_name_allocated)
GF_FREE (this_xl->name);
+ if (fsname_allocated)
+ GF_FREE (fsname);
if (priv) {
GF_FREE (priv->mount_point);
- close (priv->fd);
- close (priv->fuse_dump_fd);
+ if (priv->fd != -1)
+ sys_close (priv->fd);
+ if (priv->fuse_dump_fd != -1)
+ sys_close (priv->fuse_dump_fd);
GF_FREE (priv);
}
+ GF_FREE (mnt_args);
return -1;
}
@@ -3737,49 +5774,143 @@ fini (xlator_t *this_xl)
if ((priv = this_xl->private) == NULL)
return;
+ pthread_mutex_lock (&priv->sync_mutex);
+ {
+ if (!(priv->fini_invoked)) {
+ priv->fini_invoked = _gf_true;
+ } else {
+ pthread_mutex_unlock (&priv->sync_mutex);
+ return;
+ }
+ }
+ pthread_mutex_unlock (&priv->sync_mutex);
+
if (dict_get (this_xl->options, ZR_MOUNTPOINT_OPT))
mount_point = data_to_str (dict_get (this_xl->options,
ZR_MOUNTPOINT_OPT));
if (mount_point != NULL) {
- gf_log (this_xl->name, GF_LOG_NORMAL,
+ gf_log (this_xl->name, GF_LOG_INFO,
"Unmounting '%s'.", mount_point);
- dict_del (this_xl->options, ZR_MOUNTPOINT_OPT);
gf_fuse_unmount (mount_point, priv->fd);
- close (priv->fuse_dump_fd);
+ sys_close (priv->fuse_dump_fd);
+ dict_del (this_xl->options, ZR_MOUNTPOINT_OPT);
}
+ /* Process should terminate once fuse xlator is finished.
+ * Required for AUTH_FAILED event.
+ */
+ kill (getpid (), SIGTERM);
}
-struct xlator_fops fops = {
-};
+struct xlator_fops fops;
struct xlator_cbks cbks = {
+ .invalidate = fuse_invalidate,
+ .forget = fuse_forget_cbk,
+ .release = fuse_internal_release
};
struct xlator_dumpops dumpops = {
.priv = fuse_priv_dump,
.inode = fuse_itable_dump,
+ .history = fuse_history_dump,
};
struct volume_options options[] = {
{ .key = {"direct-io-mode"},
.type = GF_OPTION_TYPE_BOOL
},
- { .key = {"mountpoint", "mount-point"},
+ { .key = {ZR_MOUNTPOINT_OPT, "mount-point"},
.type = GF_OPTION_TYPE_PATH
},
- { .key = {"dump-fuse", "fuse-dumpfile"},
+ { .key = {ZR_DUMP_FUSE, "fuse-dumpfile"},
.type = GF_OPTION_TYPE_PATH
},
- { .key = {"attribute-timeout"},
- .type = GF_OPTION_TYPE_DOUBLE
+ { .key = {ZR_ATTR_TIMEOUT_OPT},
+ .type = GF_OPTION_TYPE_DOUBLE,
+ .default_value = "1.0"
+ },
+ { .key = {ZR_ENTRY_TIMEOUT_OPT},
+ .type = GF_OPTION_TYPE_DOUBLE,
+ .default_value = "1.0"
+ },
+ { .key = {ZR_NEGATIVE_TIMEOUT_OPT},
+ .type = GF_OPTION_TYPE_DOUBLE,
+ .default_value = "0.0"
},
- { .key = {"entry-timeout"},
- .type = GF_OPTION_TYPE_DOUBLE
+ { .key = {ZR_STRICT_VOLFILE_CHECK},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false"
},
- { .key = {"strict-volfile-check"},
+ { .key = {"client-pid"},
+ .type = GF_OPTION_TYPE_INT
+ },
+ { .key = {"uid-map-root"},
+ .type = GF_OPTION_TYPE_INT
+ },
+ { .key = {"sync-to-mount"},
.type = GF_OPTION_TYPE_BOOL
},
+ { .key = {"read-only"},
+ .type = GF_OPTION_TYPE_BOOL
+ },
+ { .key = {"fopen-keep-cache"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false"
+ },
+ { .key = {"gid-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "300"
+ },
+ { .key = {"resolve-gids"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false"
+ },
+ { .key = {"acl"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false"
+ },
+ { .key = {"selinux"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false"
+ },
+ { .key = {"enable-ino32"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false"
+ },
+ { .key = {"background-qlen"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "64",
+ .min = 16,
+ .max = (64 * GF_UNIT_KB),
+ },
+ { .key = {"congestion-threshold"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "48",
+ .min = 12,
+ .max = (64 * GF_UNIT_KB),
+ },
+ { .key = {"fuse-mountopts"},
+ .type = GF_OPTION_TYPE_STR
+ },
+ { .key = {"use-readdirp"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "yes"
+ },
+ { .key = {"no-root-squash"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .description = "This is the mount option for disabling the "
+ "root squash for the client irrespective of whether the root-squash "
+ "option for the volume is set or not. But this option is honoured "
+ "only for the trusted clients. For non trusted clients this value "
+ "does not have any affect and the volume option for root-squash is "
+ "honoured.",
+ },
+ { .key = {"capability"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false"
+ },
{ .key = {NULL} },
};
diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h
new file mode 100644
index 00000000000..40bd17ba6e6
--- /dev/null
+++ b/xlators/mount/fuse/src/fuse-bridge.h
@@ -0,0 +1,428 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _GF_FUSE_BRIDGE_H_
+#define _GF_FUSE_BRIDGE_H_
+
+#include <stdint.h>
+#include <signal.h>
+#include <pthread.h>
+#include <stddef.h>
+#include <dirent.h>
+#include <sys/mount.h>
+#include <sys/time.h>
+#include <fnmatch.h>
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "xlator.h"
+#include "defaults.h"
+#include "common-utils.h"
+#include "statedump.h"
+
+#ifdef GF_DARWIN_HOST_OS
+#include "fuse_kernel_macfuse.h"
+#else
+#include "fuse_kernel.h"
+#endif
+#include "fuse-misc.h"
+#include "fuse-mount.h"
+#include "fuse-mem-types.h"
+
+#include "list.h"
+#include "dict.h"
+#include "syncop.h"
+#include "gidcache.h"
+
+#if defined(GF_LINUX_HOST_OS) || defined(__FreeBSD__) || defined(__NetBSD__)
+#define FUSE_OP_HIGH (FUSE_LSEEK + 1)
+#endif
+#ifdef GF_DARWIN_HOST_OS
+#define FUSE_OP_HIGH (FUSE_DESTROY + 1)
+#endif
+#define GLUSTERFS_XATTR_LEN_MAX 65536
+
+#define MAX_FUSE_PROC_DELAY 1
+
+typedef struct fuse_in_header fuse_in_header_t;
+typedef void (fuse_handler_t) (xlator_t *this, fuse_in_header_t *finh,
+ void *msg);
+
+struct fuse_private {
+ int fd;
+ uint32_t proto_minor;
+ char *volfile;
+ size_t volfile_size;
+ char *mount_point;
+ struct iobuf *iobuf;
+
+ pthread_t fuse_thread;
+ char fuse_thread_started;
+
+ uint32_t direct_io_mode;
+ size_t *msg0_len_p;
+
+ double entry_timeout;
+ double negative_timeout;
+ double attribute_timeout;
+
+ pthread_cond_t sync_cond;
+ pthread_mutex_t sync_mutex;
+ char event_recvd;
+
+ char init_recvd;
+
+ gf_boolean_t strict_volfile_check;
+
+ fuse_handler_t **fuse_ops;
+ fuse_handler_t **fuse_ops0;
+ pthread_mutex_t fuse_dump_mutex;
+ int fuse_dump_fd;
+
+ glusterfs_graph_t *next_graph;
+ xlator_t *active_subvol;
+
+ pid_t client_pid;
+ gf_boolean_t client_pid_set;
+ unsigned uid_map_root;
+ gf_boolean_t acl;
+ gf_boolean_t selinux;
+ gf_boolean_t read_only;
+ int32_t fopen_keep_cache;
+ int32_t gid_cache_timeout;
+ gf_boolean_t enable_ino32;
+ /* This is the mount option for disabling the root-squash for the
+ mount irrespective of whether the root-squash option for the
+ volume is set or not. But this option is honoured only for
+ thr trusted clients. For non trusted clients this value does
+ not have any affect and the volume option for root-squash is
+ honoured.
+ */
+ gf_boolean_t no_root_squash;
+ fdtable_t *fdtable;
+ gid_cache_t gid_cache;
+ char *fuse_mountopts;
+
+ /* For fuse-reverse-validation */
+ struct list_head invalidate_list;
+ pthread_cond_t invalidate_cond;
+ pthread_mutex_t invalidate_mutex;
+ gf_boolean_t reverse_fuse_thread_started;
+
+ /* For communicating with separate mount thread. */
+ int status_pipe[2];
+
+ /* for fuse queue length and congestion threshold */
+ int background_qlen;
+ int congestion_threshold;
+
+ /* for using fuse-kernel readdirp*/
+ gf_boolean_t use_readdirp;
+
+ /* fini started, helps prevent multiple epoll worker threads
+ * firing up the fini routine */
+ gf_boolean_t fini_invoked;
+
+ /* resolve gid with getgrouplist() instead of /proc/%d/status */
+ gf_boolean_t resolve_gids;
+
+ /* Enable or disable capability support */
+ gf_boolean_t capability;
+};
+typedef struct fuse_private fuse_private_t;
+
+#define INVAL_BUF_SIZE (sizeof (struct fuse_out_header) + \
+ max (sizeof (struct fuse_notify_inval_inode_out), \
+ sizeof (struct fuse_notify_inval_entry_out) + \
+ NAME_MAX + 1))
+
+
+struct fuse_invalidate_node {
+ char inval_buf[INVAL_BUF_SIZE];
+ struct list_head next;
+};
+typedef struct fuse_invalidate_node fuse_invalidate_node_t;
+
+struct fuse_graph_switch_args {
+ xlator_t *this;
+ xlator_t *old_subvol;
+ xlator_t *new_subvol;
+};
+typedef struct fuse_graph_switch_args fuse_graph_switch_args_t;
+
+#define FUSE_EVENT_HISTORY_SIZE 1024
+
+#define _FH_TO_FD(fh) ((fd_t *)(uintptr_t)(fh))
+
+#define FH_TO_FD(fh) ((_FH_TO_FD (fh))?(fd_ref (_FH_TO_FD (fh))):((fd_t *) 0))
+
+/* Use the same logic as the Linux NFS-client */
+#define GF_FUSE_SQUASH_INO(ino) (((uint32_t) ino) ^ (ino >> 32))
+
+#define FUSE_FOP(state, ret, op_num, fop, args ...) \
+ do { \
+ xlator_t *xl = NULL; \
+ call_frame_t *frame = NULL; \
+ \
+ xl = state->active_subvol; \
+ if (!xl) { \
+ gf_log_callingfn (state->this->name, GF_LOG_ERROR, \
+ "No active subvolume"); \
+ send_fuse_err (state->this, state->finh, ENOENT); \
+ free_fuse_state (state); \
+ break; \
+ } \
+ \
+ frame = get_call_frame_for_req (state); \
+ if (!frame) { \
+ /* This is not completely clean, as some \
+ * earlier allocations might remain unfreed \
+ * if we return at this point, but still \
+ * better than trying to go on with a NULL \
+ * frame ... \
+ */ \
+ send_fuse_err (state->this, state->finh, ENOMEM); \
+ free_fuse_state (state); \
+ /* ideally, need to 'return', but let the */ \
+ /* calling function take care of it */ \
+ break; \
+ } \
+ \
+ frame->root->state = state; \
+ frame->root->op = op_num; \
+ frame->op = op_num; \
+ \
+ if (state->this->history) \
+ gf_log_eh ("%"PRIu64", %s, path: (%s), gfid: " \
+ "(%s)", frame->root->unique, \
+ gf_fop_list[frame->root->op], \
+ state->loc.path, \
+ (state->fd == NULL)? \
+ uuid_utoa (state->loc.gfid): \
+ uuid_utoa (state->fd->inode->gfid)); \
+ STACK_WIND (frame, ret, xl, xl->fops->fop, args); \
+ } while (0)
+
+#define GF_SELECT_LOG_LEVEL(_errno) \
+ (((_errno == ENOENT) || (_errno == ESTALE))? \
+ GF_LOG_DEBUG)
+
+#define GET_STATE(this, finh, state) \
+ do { \
+ state = get_fuse_state (this, finh); \
+ if (!state) { \
+ gf_log ("glusterfs-fuse", \
+ GF_LOG_ERROR, \
+ "FUSE message unique %"PRIu64" opcode %d:" \
+ " state allocation failed", \
+ finh->unique, finh->opcode); \
+ \
+ send_fuse_err (this, finh, ENOMEM); \
+ GF_FREE (finh); \
+ \
+ return; \
+ } \
+ } while (0)
+
+#define FUSE_ENTRY_CREATE(this, priv, finh, state, fci, op) \
+ do { \
+ if (priv->proto_minor >= 12) \
+ state->mode &= ~fci->umask; \
+ if (priv->proto_minor >= 12 && priv->acl) { \
+ state->xdata = dict_new (); \
+ if (!state->xdata) { \
+ gf_log ("glusterfs-fuse", \
+ GF_LOG_WARNING, \
+ "%s failed to allocate " \
+ "a param dictionary", op); \
+ send_fuse_err (this, finh, ENOMEM); \
+ free_fuse_state (state); \
+ return; \
+ } \
+ state->umask = fci->umask; \
+ \
+/* TODO: remove this after 3.4.0 release. keeping it for the \
+ sake of backward compatibility with old (3.3.[01]) \
+ releases till then. */ \
+ ret = dict_set_int16 (state->xdata, "umask", \
+ fci->umask); \
+ if (ret < 0) { \
+ gf_log ("glusterfs-fuse", \
+ GF_LOG_WARNING, \
+ "%s Failed adding umask"\
+ " to request", op); \
+ dict_destroy (state->xdata); \
+ send_fuse_err (this, finh, ENOMEM); \
+ free_fuse_state (state); \
+ return; \
+ } \
+ ret = dict_set_int16 (state->xdata, "mode", \
+ fci->mode); \
+ if (ret < 0) { \
+ gf_log ("glusterfs-fuse", \
+ GF_LOG_WARNING, \
+ "%s Failed adding mode " \
+ "to request", op); \
+ dict_destroy (state->xdata); \
+ send_fuse_err (this, finh, ENOMEM); \
+ free_fuse_state (state); \
+ return; \
+ } \
+ } \
+ } while (0)
+
+#define fuse_log_eh_fop(this, state, frame, op_ret, op_errno) \
+ do { \
+ if (this->history) { \
+ if (state->fd) \
+ gf_log_eh ("op_ret: %d, op_errno: %d, " \
+ "%"PRIu64", %s () => %p, gfid: %s", \
+ op_ret, op_errno, \
+ frame->root->unique, \
+ gf_fop_list[frame->root->op], \
+ state->fd, \
+ uuid_utoa (state->fd->inode->gfid)); \
+ else \
+ gf_log_eh ("op_ret: %d, op_errno: %d, " \
+ "%"PRIu64", %s () => %s, gfid: %s", \
+ op_ret, op_errno, \
+ frame->root->unique, \
+ gf_fop_list[frame->root->op], \
+ state->loc.path, \
+ uuid_utoa (state->loc.gfid)); \
+ } \
+ } while(0)
+
+#define fuse_log_eh(this, args...) \
+ do { \
+ if (this->history) \
+ gf_log_eh(args); \
+ } while (0)
+
+static inline xlator_t *
+fuse_active_subvol (xlator_t *fuse)
+{
+ fuse_private_t *priv = NULL;
+
+ priv = fuse->private;
+
+ return priv->active_subvol;
+}
+
+
+typedef enum {
+ RESOLVE_MUST = 1,
+ RESOLVE_NOT,
+ RESOLVE_MAY,
+ RESOLVE_DONTCARE,
+ RESOLVE_EXACT
+} fuse_resolve_type_t;
+
+
+typedef struct {
+ fuse_resolve_type_t type;
+ fd_t *fd;
+ char *path;
+ char *bname;
+ u_char gfid[16];
+ inode_t *hint;
+ u_char pargfid[16];
+ inode_t *parhint;
+ int op_ret;
+ int op_errno;
+ loc_t resolve_loc;
+} fuse_resolve_t;
+
+
+typedef struct {
+ void *pool;
+ xlator_t *this;
+ xlator_t *active_subvol;
+ inode_table_t *itable;
+ loc_t loc;
+ loc_t loc2;
+ fuse_in_header_t *finh;
+ int32_t flags;
+ off_t off;
+ size_t size;
+ unsigned long nlookup;
+ fd_t *fd;
+ dict_t *xattr;
+ dict_t *xdata;
+ char *name;
+ char is_revalidate;
+ gf_boolean_t truncate_needed;
+ gf_lock_t lock;
+ uint64_t lk_owner;
+
+ /* used within resolve_and_resume */
+ /* */
+ fuse_resolve_t resolve;
+ fuse_resolve_t resolve2;
+
+ loc_t *loc_now;
+ fuse_resolve_t *resolve_now;
+
+ void *resume_fn;
+
+ int valid;
+ int mask;
+ dev_t rdev;
+ mode_t mode;
+ mode_t umask;
+ struct iatt attr;
+ struct gf_flock lk_lock;
+ struct iovec vector;
+
+ uuid_t gfid;
+ uint32_t io_flags;
+ int32_t fd_no;
+
+ gf_seek_what_t whence;
+} fuse_state_t;
+
+typedef struct {
+ uint32_t open_flags;
+ char migration_failed;
+ fd_t *activefd;
+} fuse_fd_ctx_t;
+
+typedef void (*fuse_resume_fn_t) (fuse_state_t *state);
+
+GF_MUST_CHECK int32_t
+fuse_loc_fill (loc_t *loc, fuse_state_t *state, ino_t ino,
+ ino_t par, const char *name);
+call_frame_t *get_call_frame_for_req (fuse_state_t *state);
+fuse_state_t *get_fuse_state (xlator_t *this, fuse_in_header_t *finh);
+void free_fuse_state (fuse_state_t *state);
+void gf_fuse_stat2attr (struct iatt *st, struct fuse_attr *fa,
+ gf_boolean_t enable_ino32);
+void gf_fuse_fill_dirent (gf_dirent_t *entry, struct fuse_dirent *fde,
+ gf_boolean_t enable_ino32);
+uint64_t inode_to_fuse_nodeid (inode_t *inode);
+xlator_t *fuse_active_subvol (xlator_t *fuse);
+inode_t *fuse_ino_to_inode (uint64_t ino, xlator_t *fuse);
+int send_fuse_err (xlator_t *this, fuse_in_header_t *finh, int error);
+int fuse_gfid_set (fuse_state_t *state);
+int fuse_flip_xattr_ns (struct fuse_private *priv, char *okey, char **nkey);
+fuse_fd_ctx_t * __fuse_fd_ctx_check_n_create (xlator_t *this, fd_t *fd);
+fuse_fd_ctx_t * fuse_fd_ctx_check_n_create (xlator_t *this, fd_t *fd);
+
+int fuse_resolve_and_resume (fuse_state_t *state, fuse_resume_fn_t fn);
+int fuse_resolve_inode_init (fuse_state_t *state, fuse_resolve_t *resolve,
+ ino_t ino);
+int fuse_resolve_entry_init (fuse_state_t *state, fuse_resolve_t *resolve,
+ ino_t par, char *name);
+int fuse_resolve_fd_init (fuse_state_t *state, fuse_resolve_t *resolve,
+ fd_t *fd);
+int fuse_ignore_xattr_set (fuse_private_t *priv, char *key);
+void fuse_fop_resume (fuse_state_t *state);
+int dump_history_fuse (circular_buffer_t *cb, void *data);
+int fuse_check_selinux_cap_xattr (fuse_private_t *priv, char *name);
+#endif /* _GF_FUSE_BRIDGE_H_ */
diff --git a/xlators/mount/fuse/src/fuse-helpers.c b/xlators/mount/fuse/src/fuse-helpers.c
new file mode 100644
index 00000000000..3e541979dc7
--- /dev/null
+++ b/xlators/mount/fuse/src/fuse-helpers.c
@@ -0,0 +1,679 @@
+/*
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifdef __NetBSD__
+#define _KMEMUSER
+#endif
+
+#if defined(GF_SOLARIS_HOST_OS)
+#include <sys/procfs.h>
+#elif defined(__FreeBSD__)
+#include <sys/types.h>
+#include <libutil.h>
+#elif defined(CTL_KERN)
+#include <sys/sysctl.h>
+#endif
+#include <pwd.h>
+#include <grp.h>
+
+#include "fuse-bridge.h"
+
+static void
+fuse_resolve_wipe (fuse_resolve_t *resolve)
+{
+ GF_FREE ((void *)resolve->path);
+
+ GF_FREE ((void *)resolve->bname);
+
+ if (resolve->fd)
+ fd_unref (resolve->fd);
+
+ loc_wipe (&resolve->resolve_loc);
+
+ if (resolve->hint) {
+ inode_unref (resolve->hint);
+ resolve->hint = 0;
+ }
+
+ if (resolve->parhint) {
+ inode_unref (resolve->parhint);
+ resolve->parhint = 0;
+ }
+}
+
+
+void
+free_fuse_state (fuse_state_t *state)
+{
+ xlator_t *this = NULL;
+ fuse_private_t *priv = NULL;
+ uint64_t winds = 0;
+ char switched = 0;
+
+ this = state->this;
+
+ priv = this->private;
+
+ loc_wipe (&state->loc);
+
+ loc_wipe (&state->loc2);
+
+ if (state->xdata) {
+ dict_unref (state->xdata);
+ state->xdata = (void *)0xaaaaeeee;
+ }
+ if (state->xattr)
+ dict_unref (state->xattr);
+
+ if (state->name) {
+ GF_FREE (state->name);
+ state->name = NULL;
+ }
+ if (state->fd) {
+ fd_unref (state->fd);
+ state->fd = (void *)0xfdfdfdfd;
+ }
+ if (state->finh) {
+ GF_FREE (state->finh);
+ state->finh = NULL;
+ }
+
+ fuse_resolve_wipe (&state->resolve);
+ fuse_resolve_wipe (&state->resolve2);
+
+ pthread_mutex_lock (&priv->sync_mutex);
+ {
+ winds = --state->active_subvol->winds;
+ switched = state->active_subvol->switched;
+ }
+ pthread_mutex_unlock (&priv->sync_mutex);
+
+ if ((winds == 0) && (switched)) {
+ xlator_notify (state->active_subvol, GF_EVENT_PARENT_DOWN,
+ state->active_subvol, NULL);
+ }
+
+#ifdef DEBUG
+ memset (state, 0x90, sizeof (*state));
+#endif
+ GF_FREE (state);
+ state = NULL;
+}
+
+
+fuse_state_t *
+get_fuse_state (xlator_t *this, fuse_in_header_t *finh)
+{
+ fuse_state_t *state = NULL;
+ xlator_t *active_subvol = NULL;
+ fuse_private_t *priv = NULL;
+
+ state = (void *)GF_CALLOC (1, sizeof (*state),
+ gf_fuse_mt_fuse_state_t);
+ if (!state)
+ return NULL;
+
+ state->this = THIS;
+ priv = this->private;
+
+ pthread_mutex_lock (&priv->sync_mutex);
+ {
+ active_subvol = fuse_active_subvol (state->this);
+ active_subvol->winds++;
+ }
+ pthread_mutex_unlock (&priv->sync_mutex);
+
+ state->active_subvol = active_subvol;
+ state->itable = active_subvol->itable;
+
+ state->pool = this->ctx->pool;
+ state->finh = finh;
+ state->this = this;
+
+ LOCK_INIT (&state->lock);
+
+ return state;
+}
+
+
+#define FUSE_MAX_AUX_GROUPS 32 /* We can get only up to 32 aux groups from /proc */
+void
+frame_fill_groups (call_frame_t *frame)
+{
+#if defined(GF_LINUX_HOST_OS)
+ xlator_t *this = frame->this;
+ fuse_private_t *priv = this->private;
+ char filename[32];
+ char line[4096];
+ char *ptr = NULL;
+ FILE *fp = NULL;
+ int idx = 0;
+ long int id = 0;
+ char *saveptr = NULL;
+ char *endptr = NULL;
+ int ret = 0;
+ int ngroups = FUSE_MAX_AUX_GROUPS;
+ gid_t mygroups[GF_MAX_AUX_GROUPS];
+
+ if (priv->resolve_gids) {
+ struct passwd pwent;
+ char mystrs[1024];
+ struct passwd *result;
+
+ if (getpwuid_r (frame->root->uid, &pwent, mystrs,
+ sizeof(mystrs), &result) != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "getpwuid_r(%u) "
+ "failed", frame->root->uid);
+ return;
+ }
+
+ ngroups = GF_MAX_AUX_GROUPS;
+ if (getgrouplist (result->pw_name, frame->root->gid, mygroups,
+ &ngroups) == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "could not map %s to "
+ "group list (ngroups %d, max %d)",
+ result->pw_name, ngroups, GF_MAX_AUX_GROUPS);
+ return;
+ }
+
+ if (call_stack_alloc_groups (frame->root, ngroups) != 0)
+ goto out;
+
+ /* Copy data to the frame. */
+ for (idx = 0; idx < ngroups; ++idx) {
+ frame->root->groups[idx] = mygroups[idx];
+ }
+ frame->root->ngrps = ngroups;
+ } else {
+ ret = snprintf (filename, sizeof filename, "/proc/%d/status",
+ frame->root->pid);
+ if (ret >= sizeof filename)
+ goto out;
+
+ fp = fopen (filename, "r");
+ if (!fp)
+ goto out;
+
+ if (call_stack_alloc_groups (frame->root, ngroups) != 0)
+ goto out;
+
+ while ((ptr = fgets (line, sizeof line, fp))) {
+ if (strncmp (ptr, "Groups:", 7) != 0)
+ continue;
+
+ ptr = line + 8;
+
+ for (ptr = strtok_r (ptr, " \t\r\n", &saveptr);
+ ptr;
+ ptr = strtok_r (NULL, " \t\r\n", &saveptr)) {
+ errno = 0;
+ id = strtol (ptr, &endptr, 0);
+ if (errno == ERANGE)
+ break;
+ if (!endptr || *endptr)
+ break;
+ frame->root->groups[idx++] = id;
+ if (idx == FUSE_MAX_AUX_GROUPS)
+ break;
+ }
+
+ frame->root->ngrps = idx;
+ break;
+ }
+ }
+
+out:
+ if (fp)
+ fclose (fp);
+#elif defined(GF_SOLARIS_HOST_OS)
+ char filename[32];
+ char scratch[128];
+ prcred_t *prcred = (prcred_t *) scratch;
+ FILE *fp = NULL;
+ int ret = 0;
+ int ngrps;
+
+ ret = snprintf (filename, sizeof filename,
+ "/proc/%d/cred", frame->root->pid);
+
+ if (ret < sizeof filename) {
+ fp = fopen (filename, "r");
+ if (fp != NULL) {
+ if (fgets (scratch, sizeof scratch, fp) != NULL) {
+ ngrps = MIN(prcred->pr_ngroups,
+ FUSE_MAX_AUX_GROUPS);
+ if (call_stack_alloc_groups (frame->root,
+ ngrps) != 0)
+ return;
+ }
+ fclose (fp);
+ }
+ }
+#elif defined(CTL_KERN) /* DARWIN and *BSD */
+ /*
+ N.B. CTL_KERN is an enum on Linux. (Meaning, if it's not
+ obvious, that it's not subject to preprocessor directives
+ like '#if defined'.)
+ Unlike Linux, on Mac OS and the BSDs it is a #define. We
+ could test to see that KERN_PROC is defined, but, barring any
+ evidence to the contrary, I think that's overkill.
+ We might also test that GF_DARWIN_HOST_OS is defined, why
+ limit this to just Mac OS. It's equally valid for the BSDs
+ and we do have people building on NetBSD and FreeBSD.
+ */
+ int name[] = { CTL_KERN, KERN_PROC, KERN_PROC_PID, frame->root->pid };
+ size_t namelen = sizeof name / sizeof name[0];
+ struct kinfo_proc kp;
+ size_t kplen = sizeof(kp);
+ int i, ngroups;
+
+ if (sysctl(name, namelen, &kp, &kplen, NULL, 0) != 0)
+ return;
+ ngroups = MIN(kp.kp_eproc.e_ucred.cr_ngroups, NGROUPS_MAX);
+ if (call_stack_alloc_groups (frame->root, ngroups) != 0)
+ return;
+ for (i = 0; i < ngroups; i++)
+ frame->root->groups[i] = kp.kp_eproc.e_ucred.cr_groups[i];
+ frame->root->ngrps = ngroups;
+#else
+ frame->root->ngrps = 0;
+#endif /* GF_LINUX_HOST_OS */
+}
+
+/*
+ * Get the groups for the PID associated with this frame. If enabled,
+ * use the gid cache to reduce group list collection.
+ */
+static void get_groups(fuse_private_t *priv, call_frame_t *frame)
+{
+ int i;
+ const gid_list_t *gl;
+ gid_list_t agl;
+
+ if (!priv || !priv->gid_cache_timeout) {
+ frame_fill_groups(frame);
+ return;
+ }
+
+ if (-1 == priv->gid_cache_timeout) {
+ frame->root->ngrps = 0;
+ return;
+ }
+
+ gl = gid_cache_lookup(&priv->gid_cache, frame->root->pid,
+ frame->root->uid, frame->root->gid);
+ if (gl) {
+ if (call_stack_alloc_groups (frame->root, gl->gl_count) != 0)
+ return;
+ frame->root->ngrps = gl->gl_count;
+ for (i = 0; i < gl->gl_count; i++)
+ frame->root->groups[i] = gl->gl_list[i];
+ gid_cache_release(&priv->gid_cache, gl);
+ return;
+ }
+
+ frame_fill_groups (frame);
+
+ agl.gl_id = frame->root->pid;
+ agl.gl_uid = frame->root->uid;
+ agl.gl_gid = frame->root->gid;
+ agl.gl_count = frame->root->ngrps;
+ agl.gl_list = GF_CALLOC(frame->root->ngrps, sizeof(gid_t),
+ gf_fuse_mt_gids_t);
+ if (!agl.gl_list)
+ return;
+
+ for (i = 0; i < frame->root->ngrps; i++)
+ agl.gl_list[i] = frame->root->groups[i];
+
+ if (gid_cache_add(&priv->gid_cache, &agl) != 1)
+ GF_FREE(agl.gl_list);
+}
+
+call_frame_t *
+get_call_frame_for_req (fuse_state_t *state)
+{
+ call_pool_t *pool = NULL;
+ fuse_in_header_t *finh = NULL;
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ fuse_private_t *priv = NULL;
+
+ pool = state->pool;
+ finh = state->finh;
+ this = state->this;
+ priv = this->private;
+
+ frame = create_frame (this, pool);
+ if (!frame)
+ return NULL;
+
+ if (finh) {
+ frame->root->uid = finh->uid;
+ frame->root->gid = finh->gid;
+ frame->root->pid = finh->pid;
+ frame->root->unique = finh->unique;
+ set_lk_owner_from_uint64 (&frame->root->lk_owner,
+ state->lk_owner);
+ }
+
+ get_groups(priv, frame);
+
+ if (priv && priv->client_pid_set)
+ frame->root->pid = priv->client_pid;
+
+ frame->root->type = GF_OP_TYPE_FOP;
+
+ return frame;
+}
+
+
+inode_t *
+fuse_ino_to_inode (uint64_t ino, xlator_t *fuse)
+{
+ inode_t *inode = NULL;
+ xlator_t *active_subvol = NULL;
+
+ if (ino == 1) {
+ active_subvol = fuse_active_subvol (fuse);
+ if (active_subvol)
+ inode = active_subvol->itable->root;
+ } else {
+ inode = (inode_t *) (unsigned long) ino;
+ inode_ref (inode);
+ }
+
+ return inode;
+}
+
+uint64_t
+inode_to_fuse_nodeid (inode_t *inode)
+{
+ if (!inode)
+ return 0;
+ if (__is_root_gfid (inode->gfid))
+ return 1;
+
+ return (unsigned long) inode;
+}
+
+
+GF_MUST_CHECK int32_t
+fuse_loc_fill (loc_t *loc, fuse_state_t *state, ino_t ino,
+ ino_t par, const char *name)
+{
+ inode_t *inode = NULL;
+ inode_t *parent = NULL;
+ int32_t ret = -1;
+ char *path = NULL;
+ uuid_t null_gfid = {0,};
+
+ /* resistance against multiple invocation of loc_fill not to get
+ reference leaks via inode_search() */
+
+ if (name) {
+ parent = loc->parent;
+ if (!parent) {
+ parent = fuse_ino_to_inode (par, state->this);
+ loc->parent = parent;
+ if (parent)
+ gf_uuid_copy (loc->pargfid, parent->gfid);
+ }
+
+ inode = loc->inode;
+ if (!inode && parent) {
+ inode = inode_grep (parent->table, parent, name);
+ loc->inode = inode;
+ }
+
+ ret = inode_path (parent, name, &path);
+ if (ret <= 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "inode_path failed for %s/%s",
+ (parent)?uuid_utoa (parent->gfid):"0", name);
+ goto fail;
+ }
+ loc->path = path;
+ } else {
+ inode = loc->inode;
+ if (!inode) {
+ inode = fuse_ino_to_inode (ino, state->this);
+ loc->inode = inode;
+ if (inode)
+ gf_uuid_copy (loc->gfid, inode->gfid);
+ }
+
+ parent = loc->parent;
+ if (!parent) {
+ parent = inode_parent (inode, null_gfid, NULL);
+ loc->parent = parent;
+ if (parent)
+ gf_uuid_copy (loc->pargfid, parent->gfid);
+
+ }
+
+ ret = inode_path (inode, NULL, &path);
+ if (ret <= 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "inode_path failed for %s",
+ (inode) ? uuid_utoa (inode->gfid) : "0");
+ goto fail;
+ }
+ loc->path = path;
+ }
+
+ if (loc->path) {
+ loc->name = strrchr (loc->path, '/');
+ if (loc->name)
+ loc->name++;
+ else
+ loc->name = "";
+ }
+
+ if ((ino != 1) && (parent == NULL)) {
+ gf_log ("fuse-bridge", GF_LOG_DEBUG,
+ "failed to search parent for %"PRId64"/%s (%"PRId64")",
+ (ino_t)par, name, (ino_t)ino);
+ ret = -1;
+ goto fail;
+ }
+ ret = 0;
+fail:
+ /* this should not happen as inode_path returns -1 when buf is NULL
+ for sure */
+ if (path && !loc->path)
+ GF_FREE (path);
+ return ret;
+}
+
+/* courtesy of folly */
+void
+gf_fuse_stat2attr (struct iatt *st, struct fuse_attr *fa, gf_boolean_t enable_ino32)
+{
+ if (enable_ino32)
+ fa->ino = GF_FUSE_SQUASH_INO(st->ia_ino);
+ else
+ fa->ino = st->ia_ino;
+
+ fa->size = st->ia_size;
+ fa->blocks = st->ia_blocks;
+ fa->atime = st->ia_atime;
+ fa->mtime = st->ia_mtime;
+ fa->ctime = st->ia_ctime;
+ fa->atimensec = st->ia_atime_nsec;
+ fa->mtimensec = st->ia_mtime_nsec;
+ fa->ctimensec = st->ia_ctime_nsec;
+ fa->mode = st_mode_from_ia (st->ia_prot, st->ia_type);
+ fa->nlink = st->ia_nlink;
+ fa->uid = st->ia_uid;
+ fa->gid = st->ia_gid;
+ fa->rdev = makedev (ia_major (st->ia_rdev),
+ ia_minor (st->ia_rdev));
+#if FUSE_KERNEL_MINOR_VERSION >= 9
+ fa->blksize = st->ia_blksize;
+#endif
+#ifdef GF_DARWIN_HOST_OS
+ fa->crtime = (uint64_t)-1;
+ fa->crtimensec = (uint32_t)-1;
+ fa->flags = 0;
+#endif
+}
+
+void
+gf_fuse_fill_dirent (gf_dirent_t *entry, struct fuse_dirent *fde, gf_boolean_t enable_ino32)
+{
+ if (enable_ino32)
+ fde->ino = GF_FUSE_SQUASH_INO(entry->d_ino);
+ else
+ fde->ino = entry->d_ino;
+
+ fde->off = entry->d_off;
+ fde->type = entry->d_type;
+ fde->namelen = strlen (entry->d_name);
+ strncpy (fde->name, entry->d_name, fde->namelen);
+}
+
+static int
+fuse_do_flip_xattr_ns (char *okey, const char *nns, char **nkey)
+{
+ int ret = 0;
+ char *key = NULL;
+
+ okey = strchr (okey, '.');
+ GF_ASSERT (okey);
+
+ key = GF_CALLOC (1, strlen (nns) + strlen(okey) + 1,
+ gf_common_mt_char);
+ if (!key) {
+ ret = -1;
+ goto out;
+ }
+
+ strcpy (key, nns);
+ strcat (key, okey);
+
+ *nkey = key;
+
+ out:
+ return ret;
+}
+
+static int
+fuse_xattr_alloc_default (char *okey, char **nkey)
+{
+ int ret = 0;
+
+ *nkey = gf_strdup (okey);
+ if (!*nkey)
+ ret = -1;
+ return ret;
+}
+
+#define PRIV_XA_NS "trusted"
+#define UNPRIV_XA_NS "system"
+
+int
+fuse_flip_xattr_ns (fuse_private_t *priv, char *okey, char **nkey)
+{
+ int ret = 0;
+ gf_boolean_t need_flip = _gf_false;
+
+ switch (priv->client_pid) {
+ case GF_CLIENT_PID_GSYNCD:
+ /* valid xattr(s): *xtime, volume-mark* */
+ gf_log("glusterfs-fuse", GF_LOG_DEBUG, "PID: %d, checking xattr(s): "
+ "volume-mark*, *xtime", priv->client_pid);
+ if ( (strcmp (okey, UNPRIV_XA_NS".glusterfs.volume-mark") == 0)
+ || (fnmatch (UNPRIV_XA_NS".glusterfs.volume-mark.*", okey, FNM_PERIOD) == 0)
+ || (fnmatch (UNPRIV_XA_NS".glusterfs.*.xtime", okey, FNM_PERIOD) == 0) )
+ need_flip = _gf_true;
+ break;
+
+ case GF_CLIENT_PID_HADOOP:
+ /* valid xattr(s): pathinfo */
+ gf_log("glusterfs-fuse", GF_LOG_DEBUG, "PID: %d, checking xattr(s): "
+ "pathinfo", priv->client_pid);
+ if (strcmp (okey, UNPRIV_XA_NS".glusterfs.pathinfo") == 0)
+ need_flip = _gf_true;
+ break;
+ }
+
+ if (need_flip) {
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG, "flipping %s to "PRIV_XA_NS" equivalent",
+ okey);
+ ret = fuse_do_flip_xattr_ns (okey, PRIV_XA_NS, nkey);
+ } else {
+ /* if we cannot match, continue with what we got */
+ ret = fuse_xattr_alloc_default (okey, nkey);
+ }
+
+ return ret;
+}
+
+int
+fuse_ignore_xattr_set (fuse_private_t *priv, char *key)
+{
+ int ret = 0;
+
+ /* don't mess with user namespace */
+ if (fnmatch ("user.*", key, FNM_PERIOD) == 0)
+ goto out;
+
+ if (priv->client_pid != GF_CLIENT_PID_GSYNCD)
+ goto out;
+
+ /* trusted NS check */
+ if (!((fnmatch ("*.glusterfs.*.xtime", key, FNM_PERIOD) == 0)
+ || (fnmatch ("*.glusterfs.volume-mark",
+ key, FNM_PERIOD) == 0)
+ || (fnmatch ("*.glusterfs.volume-mark.*",
+ key, FNM_PERIOD) == 0)
+ || (fnmatch ("system.posix_acl_access",
+ key, FNM_PERIOD) == 0)
+ || (fnmatch ("glusterfs.gfid.newfile",
+ key, FNM_PERIOD) == 0)
+ || (fnmatch ("*.glusterfs.shard.block-size",
+ key, FNM_PERIOD) == 0)
+ || (fnmatch ("*.glusterfs.shard.file-size",
+ key, FNM_PERIOD) == 0)))
+ ret = -1;
+
+ out:
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG, "%s setxattr: key [%s], "
+ " client pid [%d]", (ret ? "disallowing" : "allowing"), key,
+ priv->client_pid);
+
+ return ret;
+}
+
+int
+fuse_check_selinux_cap_xattr (fuse_private_t *priv, char *name)
+{
+ int ret = -1;
+
+ if (strcmp (name, "security.selinux") &&
+ strcmp (name, "security.capability")) {
+ /* if xattr name is not of interest, no validations needed */
+ ret = 0;
+ goto out;
+ }
+
+ if ((strcmp (name, "security.selinux") == 0) &&
+ (priv->selinux)) {
+ ret = 0;
+ }
+
+ if ((strcmp (name, "security.capability") == 0) &&
+ ((priv->capability) || (priv->selinux))) {
+ ret = 0;
+ }
+
+out:
+ return ret;
+}
diff --git a/xlators/mount/fuse/src/fuse-mem-types.h b/xlators/mount/fuse/src/fuse-mem-types.h
index b0eb816cd14..2b4b473813d 100644
--- a/xlators/mount/fuse/src/fuse-mem-types.h
+++ b/xlators/mount/fuse/src/fuse-mem-types.h
@@ -1,23 +1,13 @@
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __FUSE_MEM_TYPES_H__
#define __FUSE_MEM_TYPES_H__
@@ -29,6 +19,10 @@ enum gf_fuse_mem_types_ {
gf_fuse_mt_char,
gf_fuse_mt_iov_base,
gf_fuse_mt_fuse_state_t,
+ gf_fuse_mt_fd_ctx_t,
+ gf_fuse_mt_graph_switch_args_t,
+ gf_fuse_mt_gids_t,
+ gf_fuse_mt_invalidate_node_t,
gf_fuse_mt_end
};
#endif
diff --git a/xlators/mount/fuse/src/fuse-resolve.c b/xlators/mount/fuse/src/fuse-resolve.c
new file mode 100644
index 00000000000..7d3494f5419
--- /dev/null
+++ b/xlators/mount/fuse/src/fuse-resolve.c
@@ -0,0 +1,721 @@
+/*
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "fuse-bridge.h"
+
+static int
+fuse_resolve_all (fuse_state_t *state);
+
+int fuse_resolve_continue (fuse_state_t *state);
+int fuse_resolve_entry_simple (fuse_state_t *state);
+int fuse_resolve_inode_simple (fuse_state_t *state);
+int fuse_migrate_fd (xlator_t *this, fd_t *fd, xlator_t *old_subvol,
+ xlator_t *new_subvol);
+
+fuse_fd_ctx_t *
+fuse_fd_ctx_get (xlator_t *this, fd_t *fd);
+
+static int
+fuse_resolve_loc_touchup (fuse_state_t *state)
+{
+ fuse_resolve_t *resolve = NULL;
+ loc_t *loc = NULL;
+
+ resolve = state->resolve_now;
+ loc = state->loc_now;
+
+ loc_touchup (loc, resolve->bname);
+ return 0;
+}
+
+
+int
+fuse_resolve_entry_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ fuse_state_t *state = NULL;
+ fuse_resolve_t *resolve = NULL;
+ inode_t *link_inode = NULL;
+ loc_t *resolve_loc = NULL;
+ uint64_t ctx_value = LOOKUP_NOT_NEEDED;
+
+ state = frame->root->state;
+ resolve = state->resolve_now;
+ resolve_loc = &resolve->resolve_loc;
+
+ STACK_DESTROY (frame->root);
+
+ if (op_ret == -1) {
+ gf_log (this->name, (op_errno == ENOENT)
+ ? GF_LOG_DEBUG : GF_LOG_WARNING,
+ "%s/%s: failed to resolve (%s)",
+ uuid_utoa (resolve_loc->pargfid), resolve_loc->name,
+ strerror (op_errno));
+ resolve->op_ret = -1;
+ resolve->op_errno = op_errno;
+ goto out;
+ }
+
+ link_inode = inode_link (inode, resolve_loc->parent,
+ resolve_loc->name, buf);
+ if (link_inode == inode)
+ inode_ctx_set (link_inode, this, &ctx_value);
+ state->loc_now->inode = link_inode;
+
+out:
+ loc_wipe (resolve_loc);
+
+ fuse_resolve_continue (state);
+ return 0;
+}
+
+
+int
+fuse_resolve_entry (fuse_state_t *state)
+{
+ fuse_resolve_t *resolve = NULL;
+ loc_t *resolve_loc = NULL;
+
+ resolve = state->resolve_now;
+ resolve_loc = &resolve->resolve_loc;
+
+ resolve_loc->parent = inode_ref (state->loc_now->parent);
+ gf_uuid_copy (resolve_loc->pargfid, state->loc_now->pargfid);
+ resolve_loc->name = resolve->bname;
+
+ resolve_loc->inode = inode_grep (state->itable, resolve->parhint,
+ resolve->bname);
+ if (!resolve_loc->inode) {
+ resolve_loc->inode = inode_new (state->itable);
+ }
+ inode_path (resolve_loc->parent, resolve_loc->name,
+ (char **) &resolve_loc->path);
+
+ FUSE_FOP (state, fuse_resolve_entry_cbk, GF_FOP_LOOKUP,
+ lookup, resolve_loc, NULL);
+
+ return 0;
+}
+
+
+int
+fuse_resolve_gfid_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xattr, struct iatt *postparent)
+{
+ fuse_state_t *state = NULL;
+ fuse_resolve_t *resolve = NULL;
+ inode_t *link_inode = NULL;
+ loc_t *loc_now = NULL;
+ inode_t *tmp_inode = NULL;
+ uint64_t ctx_value = LOOKUP_NOT_NEEDED;
+
+ state = frame->root->state;
+ resolve = state->resolve_now;
+ loc_now = state->loc_now;
+
+ STACK_DESTROY (frame->root);
+
+ if (op_ret == -1) {
+ gf_log (this->name, (op_errno == ENOENT)
+ ? GF_LOG_DEBUG : GF_LOG_WARNING,
+ "%s: failed to resolve (%s)",
+ uuid_utoa (resolve->resolve_loc.gfid),
+ strerror (op_errno));
+ loc_wipe (&resolve->resolve_loc);
+
+ /* resolve->op_ret can have 3 values: 0, -1, -2.
+ * 0 : resolution was successful.
+ * -1: parent inode could not be resolved.
+ * -2: entry (inode corresponding to path) could not be resolved
+ */
+
+ if (gf_uuid_is_null (resolve->gfid)) {
+ resolve->op_ret = -1;
+ } else {
+ resolve->op_ret = -2;
+ }
+
+ resolve->op_errno = op_errno;
+ goto out;
+ }
+
+ link_inode = inode_link (inode, NULL, NULL, buf);
+ if (link_inode == inode)
+ inode_ctx_set (link_inode, this, &ctx_value);
+
+ loc_wipe (&resolve->resolve_loc);
+
+ if (!link_inode)
+ goto out;
+
+ if (!gf_uuid_is_null (resolve->gfid)) {
+ loc_now->inode = link_inode;
+ goto out;
+ }
+
+ loc_now->parent = link_inode;
+ gf_uuid_copy (loc_now->pargfid, link_inode->gfid);
+
+ tmp_inode = inode_grep (state->itable, link_inode, resolve->bname);
+ if (tmp_inode && (!inode_needs_lookup (tmp_inode, THIS))) {
+ loc_now->inode = tmp_inode;
+ goto out;
+ }
+
+ inode_unref (tmp_inode);
+ fuse_resolve_entry (state);
+
+ return 0;
+out:
+ fuse_resolve_continue (state);
+ return 0;
+}
+
+
+int
+fuse_resolve_gfid (fuse_state_t *state)
+{
+ fuse_resolve_t *resolve = NULL;
+ loc_t *resolve_loc = NULL;
+ int ret = 0;
+
+ resolve = state->resolve_now;
+ resolve_loc = &resolve->resolve_loc;
+
+ if (!gf_uuid_is_null (resolve->pargfid)) {
+ gf_uuid_copy (resolve_loc->gfid, resolve->pargfid);
+ } else if (!gf_uuid_is_null (resolve->gfid)) {
+ gf_uuid_copy (resolve_loc->gfid, resolve->gfid);
+ }
+
+ /* inode may already exist in case we are looking up an inode which was
+ linked through readdirplus */
+ resolve_loc->inode = inode_find (state->itable, resolve_loc->gfid);
+ if (!resolve_loc->inode)
+ resolve_loc->inode = inode_new (state->itable);
+ ret = loc_path (resolve_loc, NULL);
+
+ if (ret <= 0) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to get the path for inode %s",
+ uuid_utoa (resolve->gfid));
+ }
+
+ FUSE_FOP (state, fuse_resolve_gfid_cbk, GF_FOP_LOOKUP,
+ lookup, resolve_loc, NULL);
+
+ return 0;
+}
+
+
+/*
+ * Return value:
+ * 0 - resolved parent and entry (as necessary)
+ * -1 - resolved parent but not entry (though necessary)
+ * 1 - resolved neither parent nor entry
+ */
+
+int
+fuse_resolve_parent_simple (fuse_state_t *state)
+{
+ fuse_resolve_t *resolve = NULL;
+ loc_t *loc = NULL;
+ inode_t *parent = NULL;
+ inode_t *inode = NULL;
+ xlator_t *this = NULL;
+
+ resolve = state->resolve_now;
+ loc = state->loc_now;
+ this = state->this;
+
+ loc->name = resolve->bname;
+
+ parent = resolve->parhint;
+ if (parent->table == state->itable) {
+ if (inode_needs_lookup (parent, THIS))
+ return 1;
+
+ /* no graph switches since */
+ loc->parent = inode_ref (parent);
+ gf_uuid_copy (loc->pargfid, parent->gfid);
+ loc->inode = inode_grep (state->itable, parent, loc->name);
+
+ /* nodeid for root is 1 and we blindly take the latest graph's
+ * table->root as the parhint and because of this there is
+ * ambiguity whether the entry should have existed or not, and
+ * we took the conservative approach of assuming entry should
+ * have been there even though it need not have (bug #804592).
+ */
+
+ if (loc->inode && inode_needs_lookup (loc->inode, THIS)) {
+ inode_unref (loc->inode);
+ loc->inode = NULL;
+ return -1;
+ }
+
+ if ((loc->inode == NULL)
+ && __is_root_gfid (parent->gfid)) {
+ /* non decisive result - entry missing */
+ return -1;
+ }
+
+ /* decisive result - resolution success */
+ return 0;
+ }
+
+ parent = inode_find (state->itable, resolve->pargfid);
+ if (!parent) {
+ /* non decisive result - parent missing */
+ return 1;
+ }
+ if (inode_needs_lookup (parent, THIS)) {
+ inode_unref (parent);
+ return 1;
+ }
+
+ loc->parent = parent;
+ gf_uuid_copy (loc->pargfid, resolve->pargfid);
+
+ inode = inode_grep (state->itable, parent, loc->name);
+ if (inode && !inode_needs_lookup (inode, this)) {
+ loc->inode = inode;
+ /* decisive result - resolution success */
+ return 0;
+ }
+
+ /* non decisive result - entry missing */
+ return -1;
+}
+
+
+int
+fuse_resolve_parent (fuse_state_t *state)
+{
+ int ret = 0;
+
+ ret = fuse_resolve_parent_simple (state);
+ if (ret > 0) {
+ fuse_resolve_gfid (state);
+ return 0;
+ }
+
+ if (ret < 0) {
+ fuse_resolve_entry (state);
+ return 0;
+ }
+
+ fuse_resolve_continue (state);
+
+ return 0;
+}
+
+
+int
+fuse_resolve_inode_simple (fuse_state_t *state)
+{
+ fuse_resolve_t *resolve = NULL;
+ loc_t *loc = NULL;
+ inode_t *inode = NULL;
+
+ resolve = state->resolve_now;
+ loc = state->loc_now;
+
+ inode = resolve->hint;
+ if (inode->table == state->itable)
+ inode_ref (inode);
+ else
+ inode = inode_find (state->itable, resolve->gfid);
+
+ if (inode) {
+ if (!inode_needs_lookup (inode, THIS))
+ goto found;
+ /* inode was linked through readdirplus */
+ inode_unref (inode);
+ }
+
+ return 1;
+found:
+ loc->inode = inode;
+ return 0;
+}
+
+
+int
+fuse_resolve_inode (fuse_state_t *state)
+{
+ int ret = 0;
+
+ ret = fuse_resolve_inode_simple (state);
+
+ if (ret > 0) {
+ fuse_resolve_gfid (state);
+ return 0;
+ }
+
+ fuse_resolve_continue (state);
+
+ return 0;
+}
+
+
+int
+fuse_migrate_fd_task (void *data)
+{
+ int ret = -1;
+ fuse_state_t *state = NULL;
+ fd_t *basefd = NULL, *oldfd = NULL;
+ fuse_fd_ctx_t *basefd_ctx = NULL;
+ xlator_t *old_subvol = NULL;
+
+ state = data;
+ if (state == NULL) {
+ goto out;
+ }
+
+ basefd = state->fd;
+
+ basefd_ctx = fuse_fd_ctx_get (state->this, basefd);
+ if (!basefd_ctx)
+ goto out;
+
+ LOCK (&basefd->lock);
+ {
+ oldfd = basefd_ctx->activefd ? basefd_ctx->activefd : basefd;
+ fd_ref (oldfd);
+ }
+ UNLOCK (&basefd->lock);
+
+ old_subvol = oldfd->inode->table->xl;
+
+ ret = fuse_migrate_fd (state->this, basefd, old_subvol,
+ state->active_subvol);
+
+ LOCK (&basefd->lock);
+ {
+ if (ret < 0) {
+ basefd_ctx->migration_failed = 1;
+ } else {
+ basefd_ctx->migration_failed = 0;
+ }
+ }
+ UNLOCK (&basefd->lock);
+
+ ret = 0;
+
+out:
+ if (oldfd)
+ fd_unref (oldfd);
+
+ return ret;
+}
+
+
+static int
+fuse_migrate_fd_error (xlator_t *this, fd_t *fd)
+{
+ fuse_fd_ctx_t *fdctx = NULL;
+ char error = 0;
+
+ fdctx = fuse_fd_ctx_get (this, fd);
+ if (fdctx != NULL) {
+ if (fdctx->migration_failed) {
+ error = 1;
+ }
+ }
+
+ return error;
+}
+
+#define FUSE_FD_GET_ACTIVE_FD(activefd, basefd) \
+ do { \
+ LOCK (&basefd->lock); \
+ { \
+ activefd = basefd_ctx->activefd ? \
+ basefd_ctx->activefd : basefd; \
+ if (activefd != basefd) { \
+ fd_ref (activefd); \
+ } \
+ } \
+ UNLOCK (&basefd->lock); \
+ \
+ if (activefd == basefd) { \
+ fd_ref (activefd); \
+ } \
+ } while (0);
+
+
+static int
+fuse_resolve_fd (fuse_state_t *state)
+{
+ fuse_resolve_t *resolve = NULL;
+ fd_t *basefd = NULL, *activefd = NULL;
+ xlator_t *active_subvol = NULL, *this = NULL;
+ int ret = 0;
+ char fd_migration_error = 0;
+ fuse_fd_ctx_t *basefd_ctx = NULL;
+
+ resolve = state->resolve_now;
+
+ this = state->this;
+
+ basefd = resolve->fd;
+ basefd_ctx = fuse_fd_ctx_get (this, basefd);
+ if (basefd_ctx == NULL) {
+ gf_log (state->this->name, GF_LOG_WARNING,
+ "fdctx is NULL for basefd (ptr:%p inode-gfid:%s), "
+ "resolver erroring out with errno EINVAL",
+ basefd, uuid_utoa (basefd->inode->gfid));
+ resolve->op_ret = -1;
+ resolve->op_errno = EINVAL;
+ goto resolve_continue;
+ }
+
+ FUSE_FD_GET_ACTIVE_FD (activefd, basefd);
+
+ active_subvol = activefd->inode->table->xl;
+
+ fd_migration_error = fuse_migrate_fd_error (state->this, basefd);
+ if (fd_migration_error) {
+ resolve->op_ret = -1;
+ resolve->op_errno = EBADF;
+ } else if (state->active_subvol != active_subvol) {
+ ret = synctask_new (state->this->ctx->env, fuse_migrate_fd_task,
+ NULL, NULL, state);
+
+ fd_migration_error = fuse_migrate_fd_error (state->this,
+ basefd);
+ fd_unref (activefd);
+
+ FUSE_FD_GET_ACTIVE_FD (activefd, basefd);
+ active_subvol = activefd->inode->table->xl;
+
+ if ((ret == -1) || fd_migration_error
+ || (state->active_subvol != active_subvol)) {
+ if (ret == -1) {
+ gf_log (state->this->name, GF_LOG_WARNING,
+ "starting sync-task to migrate "
+ "basefd (ptr:%p inode-gfid:%s) failed "
+ "(old-subvolume:%s-%d "
+ "new-subvolume:%s-%d)",
+ basefd,
+ uuid_utoa (basefd->inode->gfid),
+ active_subvol->name,
+ active_subvol->graph->id,
+ state->active_subvol->name,
+ state->active_subvol->graph->id);
+ } else {
+ gf_log (state->this->name, GF_LOG_WARNING,
+ "fd migration of basefd "
+ "(ptr:%p inode-gfid:%s) failed "
+ "(old-subvolume:%s-%d "
+ "new-subvolume:%s-%d)",
+ basefd,
+ uuid_utoa (basefd->inode->gfid),
+ active_subvol->name,
+ active_subvol->graph->id,
+ state->active_subvol->name,
+ state->active_subvol->graph->id);
+ }
+
+ resolve->op_ret = -1;
+ resolve->op_errno = EBADF;
+ } else {
+ gf_log (state->this->name, GF_LOG_DEBUG,
+ "basefd (ptr:%p inode-gfid:%s) migrated "
+ "successfully in resolver "
+ "(old-subvolume:%s-%d new-subvolume:%s-%d)",
+ basefd, uuid_utoa (basefd->inode->gfid),
+ active_subvol->name, active_subvol->graph->id,
+ state->active_subvol->name,
+ state->active_subvol->graph->id);
+ }
+ }
+
+ if ((resolve->op_ret == -1) && (resolve->op_errno == EBADF)) {
+ gf_log ("fuse-resolve", GF_LOG_WARNING,
+ "migration of basefd (ptr:%p inode-gfid:%s) "
+ "did not complete, failing fop with EBADF "
+ "(old-subvolume:%s-%d new-subvolume:%s-%d)", basefd,
+ uuid_utoa (basefd->inode->gfid),
+ active_subvol->name, active_subvol->graph->id,
+ state->active_subvol->name,
+ state->active_subvol->graph->id);
+ }
+
+ if (activefd != basefd) {
+ state->fd = fd_ref (activefd);
+ fd_unref (basefd);
+ }
+
+ /* state->active_subvol = active_subvol; */
+
+resolve_continue:
+ if (activefd != NULL) {
+ fd_unref (activefd);
+ }
+
+ fuse_resolve_continue (state);
+
+ return 0;
+}
+
+
+int
+fuse_gfid_set (fuse_state_t *state)
+{
+ int ret = 0;
+
+ if (gf_uuid_is_null (state->gfid))
+ goto out;
+
+ if (!state->xdata)
+ state->xdata = dict_new ();
+
+ if (!state->xdata) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_static_bin (state->xdata, "gfid-req",
+ state->gfid, sizeof (state->gfid));
+out:
+ return ret;
+}
+
+
+int
+fuse_resolve_entry_init (fuse_state_t *state, fuse_resolve_t *resolve,
+ ino_t par, char *name)
+{
+ inode_t *parent = NULL;
+
+ parent = fuse_ino_to_inode (par, state->this);
+ gf_uuid_copy (resolve->pargfid, parent->gfid);
+ resolve->parhint = parent;
+ resolve->bname = gf_strdup (name);
+
+ return 0;
+}
+
+
+int
+fuse_resolve_inode_init (fuse_state_t *state, fuse_resolve_t *resolve,
+ ino_t ino)
+{
+ inode_t *inode = NULL;
+
+ inode = fuse_ino_to_inode (ino, state->this);
+ gf_uuid_copy (resolve->gfid, inode->gfid);
+ resolve->hint = inode;
+
+ return 0;
+}
+
+
+int
+fuse_resolve_fd_init (fuse_state_t *state, fuse_resolve_t *resolve,
+ fd_t *fd)
+{
+ resolve->fd = fd_ref (fd);
+
+ return 0;
+}
+
+
+static int
+fuse_resolve (fuse_state_t *state)
+{
+ fuse_resolve_t *resolve = NULL;
+
+ resolve = state->resolve_now;
+
+ if (resolve->fd) {
+
+ fuse_resolve_fd (state);
+
+ } else if (!gf_uuid_is_null (resolve->pargfid)) {
+
+ fuse_resolve_parent (state);
+
+ } else if (!gf_uuid_is_null (resolve->gfid)) {
+
+ fuse_resolve_inode (state);
+
+ } else {
+ fuse_resolve_all (state);
+ }
+
+ return 0;
+}
+
+static int
+fuse_resolve_done (fuse_state_t *state)
+{
+ fuse_fop_resume (state);
+ return 0;
+}
+
+/*
+ * This function is called multiple times, once per resolving one location/fd.
+ * state->resolve_now is used to decide which location/fd is to be resolved now
+ */
+static int
+fuse_resolve_all (fuse_state_t *state)
+{
+ if (state->resolve_now == NULL) {
+
+ state->resolve_now = &state->resolve;
+ state->loc_now = &state->loc;
+
+ fuse_resolve (state);
+
+ } else if (state->resolve_now == &state->resolve) {
+
+ state->resolve_now = &state->resolve2;
+ state->loc_now = &state->loc2;
+
+ fuse_resolve (state);
+
+ } else if (state->resolve_now == &state->resolve2) {
+
+ fuse_resolve_done (state);
+
+ } else {
+ gf_log ("fuse-resolve", GF_LOG_ERROR,
+ "Invalid pointer for state->resolve_now");
+ }
+
+ return 0;
+}
+
+
+int
+fuse_resolve_continue (fuse_state_t *state)
+{
+ fuse_resolve_loc_touchup (state);
+
+ fuse_resolve_all (state);
+
+ return 0;
+}
+
+int
+fuse_resolve_and_resume (fuse_state_t *state, fuse_resume_fn_t fn)
+{
+ fuse_gfid_set (state);
+
+ state->resume_fn = fn;
+
+ fuse_resolve_all (state);
+
+ return 0;
+}
diff --git a/xlators/mount/fuse/utils/Makefile.am b/xlators/mount/fuse/utils/Makefile.am
index c626e2769fe..fdad27ad103 100644
--- a/xlators/mount/fuse/utils/Makefile.am
+++ b/xlators/mount/fuse/utils/Makefile.am
@@ -1,10 +1,9 @@
utildir = @mountutildir@
-if GF_DARWIN_HOST_OS
-util_SCRIPTS = mount_glusterfs
-else
+if GF_LINUX_HOST_OS
util_SCRIPTS = mount.glusterfs
+else
+util_SCRIPTS = mount_glusterfs
endif
-CLEANFILES =
-
+CLEANFILES =
diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in
index 0ea0bdd8c6e..6c4cdfed062 100755
--- a/xlators/mount/fuse/utils/mount.glusterfs.in
+++ b/xlators/mount/fuse/utils/mount.glusterfs.in
@@ -1,20 +1,19 @@
-#!/bin/bash
-# (C) 2006, 2007, 2008 Gluster Inc. <http://www.gluster.com>
-#
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License as
-# published by the Free Software Foundation; either version 2 of
-# the License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public
-# License along with this program; if not, write to the Free
-# Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
-# Boston, MA 02110-1301 USA
+#!/bin/sh
+#
+# Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+# Copyright (c) 2015 ungleich GmbH <http://www.ungleich.ch>
+#
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+warn ()
+{
+ echo "$@" >&2
+}
_init ()
{
@@ -23,223 +22,669 @@ _init ()
LOG_CRITICAL=CRITICAL;
LOG_ERROR=ERROR;
LOG_WARNING=WARNING;
- LOG_NORMAL=NORMAL
+ LOG_INFO=INFO
LOG_DEBUG=DEBUG;
LOG_TRACE=TRACE;
- # set default log level to NORMAL
- log_level=$LOG_NORMAL;
+ HOST_NAME_MAX=64;
+
prefix="@prefix@";
exec_prefix=@exec_prefix@;
cmd_line=$(echo "@sbindir@/glusterfs");
+
+ # check whether getfattr exists
+ export PATH
+ getfattr=$(which getfattr 2>/dev/null);
+ if [ $? -ne 0 ]; then
+ warn "WARNING: getfattr not found, certain checks will be skipped.."
+ fi
+
+ mounttab=/proc/mounts
+ uname_s=`uname -s`
+ case ${uname_s} in
+ NetBSD)
+ getinode="stat -f %i"
+ getdev="stat -f %d"
+ lgetinode="${getinode} -L"
+ lgetdev="${getdev} -L"
+ ;;
+ Linux)
+ getinode="stat -c %i"
+ getdev="stat -c %d"
+ lgetinode="${getinode} -L"
+ lgetdev="${getdev} -L"
+ ;;
+ esac
+
+ UPDATEDBCONF=/etc/updatedb.conf
+}
+
+is_valid_hostname ()
+{
+ local server=$1
+
+ length=$(echo $server | wc -c)
+ if [ ${length} -gt ${HOST_NAME_MAX} ]; then
+ return 1
+ fi
+}
+
+parse_backup_volfile_servers ()
+{
+ local server_list=$1
+ local servers=""
+ local new_servers=""
+
+ servers=$(echo ${server_list} | sed 's/\:/ /g')
+ for server in ${servers}; do
+ is_valid_hostname ${server}
+ if [ $? -eq 1 ]; then
+ continue
+ fi
+ new_servers=$(echo "${new_servers} ${server}")
+ done
+
+ echo ${new_servers}
+}
+
+parse_volfile_servers ()
+{
+ local server_list=$1
+ local servers=""
+ local new_servers=""
+
+ servers=$(echo ${server_list} | sed 's/,/ /g')
+ for server in ${servers}; do
+ is_valid_hostname ${server}
+ if [ $? -eq 1 ]; then
+ continue
+ fi
+ new_servers=$(echo "${new_servers} ${server}")
+ done
+
+ echo ${new_servers}
}
start_glusterfs ()
{
if [ -n "$log_level_str" ]; then
- case "$log_level_str" in
- "ERROR")
- log_level=$LOG_ERROR;
- ;;
- "NORMAL")
- log_level=$LOG_NORMAL
+ case "$( echo $log_level_str | awk '{print toupper($0)}')" in
+ "ERROR")
+ log_level=$LOG_ERROR;
+ ;;
+ "INFO")
+ log_level=$LOG_INFO;
+ ;;
+ "DEBUG")
+ log_level=$LOG_DEBUG;
+ ;;
+ "CRITICAL")
+ log_level=$LOG_CRITICAL;
+ ;;
+ "WARNING")
+ log_level=$LOG_WARNING;
;;
- "DEBUG")
- log_level=$LOG_DEBUG;
- ;;
- "CRITICAL")
- log_level=$LOG_CRITICAL;
- ;;
- "WARNING")
- log_level=$LOG_WARNING;
- ;;
- "TRACE")
- log_level=$LOG_TRACE;
- ;;
- "NONE")
- log_level=$LOG_NONE;
- ;;
- *)
- echo "invalid log level $log_level_str, using NORMAL";
- log_level=$LOG_NORMAL;
- ;;
- esac
- fi
- cmd_line=$(echo "$cmd_line --log-level=$log_level");
-
+ "TRACE")
+ log_level=$LOG_TRACE;
+ ;;
+ "NONE")
+ log_level=$LOG_NONE;
+ ;;
+ *)
+ warn "invalid log level $log_level_str, using INFO";
+ log_level=$LOG_INFO;
+ ;;
+ esac
+ fi
+
+ # options without values start here
if [ -n "$read_only" ]; then
- cmd_line=$(echo "$cmd_line --read-only");
+ cmd_line=$(echo "$cmd_line --read-only");
fi
- if [ -n "$log_file" ]; then
- cmd_line=$(echo "$cmd_line --log-file=$log_file");
+ if [ -n "$acl" ]; then
+ cmd_line=$(echo "$cmd_line --acl");
+ fi
+
+ if [ -n "$selinux" ]; then
+ cmd_line=$(echo "$cmd_line --selinux");
+ fi
+
+ if [ -n "$enable_ino32" ]; then
+ cmd_line=$(echo "$cmd_line --enable-ino32");
+ fi
+
+ if [ -n "$worm" ]; then
+ cmd_line=$(echo "$cmd_line --worm");
+ fi
+ if [ -n "$volfile_max_fetch_attempts" ]; then
+ cmd_line=$(echo "$cmd_line --volfile-max-fetch-attempts=$volfile_max_fetch_attempts")
+ fi
+
+ if [ -n "$fopen_keep_cache" ]; then
+ cmd_line=$(echo "$cmd_line --fopen-keep-cache");
fi
if [ -n "$volfile_check" ]; then
- cmd_line=$(echo "$cmd_line --volfile-check");
+ cmd_line=$(echo "$cmd_line --volfile-check");
+ fi
+
+ if [ -n "$mem_accounting" ]; then
+ cmd_line=$(echo "$cmd_line --mem-accounting");
+ fi
+
+ if [ -n "$aux_gfid_mount" ]; then
+ cmd_line=$(echo "$cmd_line --aux-gfid-mount");
+ fi
+
+ if [ -n "$resolve_gids" ]; then
+ cmd_line=$(echo "$cmd_line --resolve-gids");
+ fi
+
+ if [ -n "$no_root_squash" ]; then
+ cmd_line=$(echo "$cmd_line --no-root-squash");
+ fi
+
+#options with values start here
+ if [ -n "$log_level" ]; then
+ cmd_line=$(echo "$cmd_line --log-level=$log_level");
+ fi
+
+ if [ -n "$log_file" ]; then
+ cmd_line=$(echo "$cmd_line --log-file=$log_file");
fi
if [ -n "$direct_io_mode" ]; then
- cmd_line=$(echo "$cmd_line --disable-direct-io-mode");
+ cmd_line=$(echo "$cmd_line --direct-io-mode=$direct_io_mode");
+ fi
+
+ if [ -n "$use_readdirp" ]; then
+ cmd_line=$(echo "$cmd_line --use-readdirp=$use_readdirp");
fi
if [ -n "$volume_name" ]; then
cmd_line=$(echo "$cmd_line --volume-name=$volume_name");
fi
-
- if [ -n "$log_server" ]; then
- if [ -n "$log_server_port" ]; then
- cmd_line=$(echo "$cmd_line \
---log-server=$log_server \
---log-server-port=$log_server_port");
- fi
+
+ if [ -n "$attribute_timeout" ]; then
+ cmd_line=$(echo "$cmd_line --attribute-timeout=$attribute_timeout");
+ fi
+
+ if [ -n "$entry_timeout" ]; then
+ cmd_line=$(echo "$cmd_line --entry-timeout=$entry_timeout");
+ fi
+
+ if [ -n "$negative_timeout" ]; then
+ cmd_line=$(echo "$cmd_line --negative-timeout=$negative_timeout");
+ fi
+
+ if [ -n "$gid_timeout" ]; then
+ cmd_line=$(echo "$cmd_line --gid-timeout=$gid_timeout");
+ fi
+
+ if [ -n "$bg_qlen" ]; then
+ cmd_line=$(echo "$cmd_line --background-qlen=$bg_qlen");
+ fi
+
+ if [ -n "$cong_threshold" ]; then
+ cmd_line=$(echo "$cmd_line --congestion-threshold=$cong_threshold");
+ fi
+
+ if [ -n "$oom_score_adj" ]; then
+ cmd_line=$(echo "$cmd_line --oom-score-adj=$oom_score_adj");
+ fi
+
+ if [ -n "$fuse_mountopts" ]; then
+ cmd_line=$(echo "$cmd_line --fuse-mountopts=$fuse_mountopts");
+ fi
+
+ if [ -n "$xlator_option" ]; then
+ cmd_line=$(echo "$cmd_line --xlator-option=$xlator_option");
fi
+ # if trasnport type is specified, we have to append it to
+ # volume name, so that it fetches the right client vol file
+
if [ -z "$volfile_loc" ]; then
if [ -n "$server_ip" ]; then
- cmd_line=$(echo "$cmd_line --volfile-server-port=$server_port");
- if [ -n "$transport" ]; then
- cmd_line=$(echo "$cmd_line --volfile-server-transport=$transport");
- fi
- if [ -n "$volume_id" ]; then
- cmd_line=$(echo "$cmd_line --volfile-id=$volume_id");
+
+ servers=$(parse_volfile_servers ${server_ip});
+ if [ -n "$servers" ]; then
+ for i in $(echo ${servers}); do
+ cmd_line=$(echo "$cmd_line --volfile-server=$i");
+ done
+ else
+ warn "ERROR: No valid servers found on command line.. exiting"
+ print_usage
+ exit 1
fi
if [ -n "$backupvolfile_server" ]; then
- cmd_line1=$(echo "$cmd_line --volfile-server=$backupvolfile_server");
+ if [ -z "$backup_volfile_servers" ]; then
+ is_valid_hostname ${backupvolfile_server};
+ if [ $? -eq 1 ]; then
+ warn "ERROR: Invalid backup server specified.. exiting"
+ exit 1
+ fi
+ cmd_line=$(echo "$cmd_line --volfile-server=$backupvolfile_server");
+ fi
+ fi
+
+ if [ -n "$backup_volfile_servers" ]; then
+ backup_servers=$(parse_backup_volfile_servers ${backup_volfile_servers})
+ for i in $(echo ${backup_servers}); do
+ cmd_line=$(echo "$cmd_line --volfile-server=$i");
+ done
+ fi
+
+ if [ -n "$server_port" ]; then
+ cmd_line=$(echo "$cmd_line --volfile-server-port=$server_port");
fi
- cmd_line=$(echo "$cmd_line --volfile-server=$server_ip");
+ if [ -n "$volume_id" ]; then
+ if [ -n "$transport" ]; then
+ volume_id="$volume_id.$transport";
+ cmd_line=$(echo "$cmd_line --volfile-server-transport=$transport");
+ fi
+ cmd_line=$(echo "$cmd_line --volfile-id=$volume_id");
+ fi
fi
else
cmd_line=$(echo "$cmd_line --volfile=$volfile_loc");
fi
-
+
+ if [ -n "$fuse_mountopts" ]; then
+ cmd_line=$(echo "$cmd_line --fuse-mountopts=$fuse_mountopts");
+ fi
+
cmd_line=$(echo "$cmd_line $mount_point");
$cmd_line;
-
- # retry the failover
- if [ $? != "0" ]; then
- if [ -n "$cmd_line1" ]; then
- cmd_line1=$(echo "$cmd_line1 $mount_point");
- $cmd_line1
- fi
+ if [ $? -ne 0 ]; then
+ warn "Mount failed. Please check the log file for more details."
+ exit 1;
fi
+
+ inode=$( ${getinode} $mount_point 2>/dev/null);
+ # this is required if the stat returns error
+ if [ $? -ne 0 ]; then
+ warn "Mount failed. Please check the log file for more details."
+ umount $mount_point > /dev/null 2>&1;
+ exit 1;
+ fi
}
-usage ()
+print_usage ()
{
+cat << EOF
+Usage: $0 <volumeserver>:<volumeid/volumeport> -o<options> <mountpoint>
+Options:
+man 8 $0
+To display the version number of the mount helper: $0 -V
+EOF
+}
-echo "Usage: mount.glusterfs <volumeserver>:<volumeid/volumeport> -o <options> <mountpoint>
-Options:
-man 8 mount.glusterfs
+# check for recursive mounts. i.e, mounting over an existing brick
+check_recursive_mount ()
+{
+ if [ $1 = "/" ]; then
+ warn "Cannot mount over root";
+ exit 2;
+ fi
-To display the version number of the mount helper:
-mount.glusterfs --version"
+ # GFID check first
+ # remove trailing / from mount point
+ mnt_dir=${1%/};
-}
+ if [ -n "${getfattr}" ]; then
+ ${getfattr} -n trusted.gfid $mnt_dir 2>/dev/null | grep -iq "trusted.gfid=";
+ if [ $? -eq 0 ]; then
+ warn "ERROR: $mnt_dir is in use as a brick of a gluster volume";
+ exit 2;
+ fi
+ fi
-main ()
-{
- helper=$(echo "$@" | sed -n 's/.*\--[ ]*\([^ ]*\).*/\1/p');
+ # check if the mount point is a brick's parent directory
+ GLUSTERD_WORKDIR="@GLUSTERD_WORKDIR@";
- options=$(echo "$@" | sed -n 's/.*\-o[ ]*\([^ ]*\).*/\1/p');
+ ls -L "${GLUSTERD_WORKDIR}"/vols/*/bricks/* > /dev/null 2>&1;
+ if [ $? -ne 0 ]; then
+ return;
+ fi
- new_log_level=$(echo "$options" | sed -n 's/.*log-level=\([^,]*\).*/\1/p');
-
- [ -n "$new_log_level" ] && {
- log_level_str="$new_log_level";
- }
+ brick_path=`grep ^path "$GLUSTERD_WORKDIR"/vols/*/bricks/* 2>/dev/null | cut -d "=" -f 2`;
+ root_inode=`${lgetinode} /`;
+ root_dev=`${lgetdev} /`;
+ mnt_inode=`${lgetinode} $mnt_dir`;
+ mnt_dev=`${lgetdev} $mnt_dir`;
+ for brick in "$brick_path"; do
+ # evaluate brick path to see if this is local, if non-local, skip iteration
+ ls $brick > /dev/null 2>&1;
+ if [ $? -ne 0 ]; then
+ continue;
+ fi
- log_file=$(echo "$options" | sed -n 's/.*log-file=\([^,]*\).*/\1/p');
+ if [ -n "${getfattr}" ]; then
+ ${getfattr} -n trusted.gfid "$brick" 2>/dev/null | grep -iq "trusted.gfid=";
+ if [ $? -eq 0 ]; then
+ # brick is local
+ while [ 1 ]; do
+ tmp_brick="$brick";
+ brick="$brick"/..;
+ brick_dev=`${lgetdev} $brick`;
+ brick_inode=`${lgetinode} $brick`;
+ if [ "$mnt_inode" -eq "$brick_inode" \
+ -a "$mnt_dev" -eq "$brick_dev" ]; then
+ warn "ERROR: ${mnt_dir} is a parent of the brick ${tmp_brick}";
+ exit 2;
+ fi
+ [ "$root_inode" -ne "$brick_inode" \
+ -o "$root_dev" -ne "$brick_dev" ] || break;
+ done;
+ else
+ continue;
+ fi
+ else
+ continue;
+ fi
+ done;
+}
- read_only=$(echo "$options" | sed -n 's/.*\(ro\)[^,]*.*/\1/p');
+with_options()
+{
+ local key=$1
+ local value=$2
+
+ # Handle options with values.
+ case "$key" in
+ "log-level")
+ log_level_str=$value
+ ;;
+ "log-file")
+ log_file=$value
+ ;;
+ "transport")
+ transport=$value
+ ;;
+ "direct-io-mode")
+ direct_io_mode=$value
+ ;;
+ "volume-name")
+ volume_name=$value
+ ;;
+ "volume-id")
+ volume_id=$value
+ ;;
+ "volfile-check")
+ volfile_check=$value
+ ;;
+ "server-port")
+ server_port=$value
+ ;;
+ "attribute-timeout")
+ attribute_timeout=$value
+ ;;
+ "entry-timeout")
+ entry_timeout=$value
+ ;;
+ "negative-timeout")
+ negative_timeout=$value
+ ;;
+ "gid-timeout")
+ gid_timeout=$value
+ ;;
+ "background-qlen")
+ bg_qlen=$value
+ ;;
+ "backup-volfile-servers")
+ backup_volfile_servers=$value
+ ;;
+ "backupvolfile-server")
+ backupvolfile_server=$value
+ ;;
+ "fetch-attempts")
+ volfile_max_fetch_attempts=$value
+ ;;
+ "congestion-threshold")
+ cong_threshold=$value
+ ;;
+ "oom-score-adj")
+ oom_score_adj=$value
+ ;;
+ "xlator-option")
+ xlator_option=$value
+ ;;
+ "fuse-mountopts")
+ fuse_mountopts=$value
+ ;;
+ "use-readdirp")
+ use_readdirp=$value
+ ;;
+ "no-root-squash")
+ if [ $value = "yes" ] ||
+ [ $value = "on" ] ||
+ [ $value = "enable" ] ||
+ [ $value = "true" ] ; then
+ no_root_squash=1;
+ fi ;;
+ "root-squash")
+ if [ $value = "no" ] ||
+ [ $value = "off" ] ||
+ [ $value = "disable" ] ||
+ [ $value = "false" ] ; then
+ no_root_squash=1;
+ fi ;;
+ "context"|"fscontext"|"defcontext"|"rootcontext")
+ # standard SElinux mount options to pass to the kernel
+ [ -z "$fuse_mountopts" ] || fuse_mountopts="$fuse_mountopts,"
+ fuse_mountopts="${fuse_mountopts}$key=\"$value\""
+ ;;
+ x-*)
+ # comments or userspace application-specific options, drop them
+ ;;
+ *)
+ warn "Invalid option: $key"
+ exit 1
+ ;;
+ esac
+}
- transport=$(echo "$options" | sed -n 's/.*transport=\([^,]*\).*/\1/p');
+without_options()
+{
+ local option=$1
+ # Handle options without values.
+ case "$option" in
+ "ro")
+ read_only=1
+ ;;
+ "acl")
+ acl=1
+ ;;
+ "selinux")
+ selinux=1
+ ;;
+ "worm")
+ worm=1
+ ;;
+ "fopen-keep-cache")
+ fopen_keep_cache=1
+ ;;
+ "enable-ino32")
+ enable_ino32=1
+ ;;
+ "mem-accounting")
+ mem_accounting=1
+ ;;
+ "aux-gfid-mount")
+ if [ ${uname_s} = "Linux" ]; then
+ aux_gfid_mount=1
+ fi
+ ;;
+ "resolve-gids")
+ resolve_gids=1
+ ;;
+ # "mount -t glusterfs" sends this, but it's useless.
+ "rw")
+ ;;
+ # TODO: not sure how to handle this yet
+ "async"|"sync"|"dirsync"|\
+ "mand"|"nomand"|\
+ "silent"|"loud"|\
+ "iversion"|"noiversion"|\
+ "nofail")
+ warn "mount option '${option}' is not handled (yet?)"
+ ;;
+ # standard mount options to pass to the kernel
+ "atime"|"noatime"|"diratime"|"nodiratime"|\
+ "relatime"|"norelatime"|\
+ "strictatime"|"nostrictatime"|"lazyatime"|"nolazyatime"|\
+ "dev"|"nodev"|"exec"|"noexec"|"suid"|"nosuid")
+ [ -z "$fuse_mountopts" ] || fuse_mountopts="$fuse_mountopts,"
+ fuse_mountopts="${fuse_mountopts}${option}"
+ ;;
+ # these ones are interpreted during system initialization
+ "auto"|"noauto")
+ ;;
+ "_netdev")
+ ;;
+ x-*)
+ # comments or userspace application-specific options, drop them
+ ;;
+ *)
+ warn "Invalid option $option";
+ exit 1
+ ;;
+ esac
+}
- direct_io_mode=$(echo "$options" | sed -n 's/.*direct-io-mode=\([^,]*\).*/\1/p');
+parse_options()
+{
+ local optarg=${1}
+ for pair in $(echo ${optarg}|sed 's/,/ /g'); do
+ key=$(echo "$pair" | cut -f1 -d'=');
+ value=$(echo "$pair" | cut -f2- -d'=');
+ if [ "$key" = "$value" ]; then
+ without_options $pair;
+ else
+ with_options $key $value;
+ fi
+ done
+}
- volume_name=$(echo "$options" | sed -n 's/.*volume-name=\([^,]*\).*/\1/p');
+update_updatedb()
+{
+ # Append fuse.glusterfs to PRUNEFS variable in updatedb.conf(5).
+ # updatedb(8) should not index files under GlusterFS, indexing
+ # GlusterFS is not necessary and should be avoided.
+ # Following code disables updatedb crawl on 'glusterfs'
+ test -f $UPDATEDBCONF && {
+ if ! grep -q 'glusterfs' $UPDATEDBCONF; then
+ sed 's/\(PRUNEFS.*\)"/\1 fuse.glusterfs"/' $UPDATEDBCONF \
+ > ${UPDATEDBCONF}.bak
+ mv -f ${UPDATEDBCONF}.bak $UPDATEDBCONF
+ fi
+ }
+}
- volume_id=$(echo "$options" | sed -n 's/.*volume_id=\([^,]*\).*/\1/p');
+main ()
+{
+ if [ "x${uname_s}" = "xLinux" ] ; then
+ volfile_loc=$1
+ mount_point=$2
- volfile_check=$(echo "$options" | sed -n 's/.*volfile-check=\([^,]*\).*/\1/p');
+ ## `mount` specifies options as a last argument
+ shift 2;
+ fi
+ while getopts "Vo:hns" opt; do
+ case "${opt}" in
+ o)
+ parse_options ${OPTARG};
+ shift 2;
+ ;;
+ n)
+ ;;
+ s)
+ # accept+ignore sloppy mount, passed by autofs
+ ;;
+ V)
+ ${cmd_line} -V;
+ exit 0;
+ ;;
+ h)
+ print_usage;
+ exit 0;
+ ;;
+ ?)
+ print_usage;
+ exit 0;
+ ;;
+ esac
+ done
- server_port=$(echo "$options" | sed -n 's/.*server-port=\([^,]*\).*/\1/p');
- backupvolfile_server=$(echo "$options" | sed -n 's/.*backupvolfile-server=\([^,]*\).*/\1/p');
+ if [ "x${uname_s}" = "xNetBSD" ] ; then
+ volfile_loc=$1
+ mount_point=$2
+ fi
- log_server=$(echo "$options" | sed -n 's/.*log-server=\([^,]*\).*/\1/p');
-
- log_server_port=$(echo "$options" | sed -n 's/.*log-server-port=\([^,]*\).*/\1/p');
-
- volfile_loc="$1";
-
[ -r "$volfile_loc" ] || {
- server_ip=$(echo "$volfile_loc" | sed -n 's/\([^\:]*\).*/\1/p');
- test_str=$(echo "$volfile_loc" | sed -n 's/.*:\([^ ]*\).*/\1/p');
- [ -n "$test_str" ] && {
- # Backward compatibility
- test_str1=$(echo "$test_str" | sed -e 's/[0-9]//g');
- [ -n "$test_str1" ] && {
- volume_id="$test_str";
- } || {
- server_port=$test_str;
- }
- }
- volfile_loc="";
- }
-
- [ -n "$server_port" ] || {
- server_port="6996";
+ # '%' included to support ipv6 link local addresses
+ server_ip=$(echo "$volfile_loc" | sed -n 's/\([a-zA-Z0-9:%.\-]*\):.*/\1/p');
+ volume_str=$(echo "$volfile_loc" | sed -n 's/.*:\([^ ]*\).*/\1/p');
+ [ -n "$volume_str" ] && {
+ volume_id="$volume_str";
+ }
+ volfile_loc="";
+ [ -z "$volume_id" -o -z "$server_ip" ] && {
+ cat <<EOF >&2
+ERROR: Server name/volume name unspecified cannot proceed further..
+Please specify correct format
+Usage:
+man 8 $0
+EOF
+ exit 1;
+ }
}
- new_fs_options=$(echo "$options" | sed -e 's/[,]*log-file=[^,]*//' \
- -e 's/[,]*log-level=[^,]*//' \
- -e 's/[,]*volume-name=[^,]*//' \
- -e 's/[,]*direct-io-mode=[^,]*//' \
- -e 's/[,]*volfile-check=[^,]*//' \
- -e 's/[,]*transport=[^,]*//' \
- -e 's/[,]*backupvolfile-server=[^,]*//' \
- -e 's/[,]*server-port=[^,]*//' \
- -e 's/[,]*volume-id=[^,]*//' \
- -e 's/[,]*log-server=[^,]*//' \
- -e 's/[,]*ro[^,]*//' \
- -e 's/[,]*log-server-port=[^,]*//');
-
- #
- [ -n "$helper" ] && {
- cmd_line=$(echo "$cmd_line --$helper");
- exec $cmd_line;
- exit 0;
+ grep_ret=$(echo ${mount_point} | grep '^\-o');
+ [ "x" != "x${grep_ret}" ] && {
+ cat <<EOF >&2
+ERROR: -o options cannot be specified in either first two arguments..
+Please specify correct style
+Usage:
+man 8 $0
+EOF
+ exit 1;
}
- mount_point=""
- for arg in "$@"; do
- [ -d "$arg" ] && {
- mount_point=$arg
- }
- done
-
- [ -z "$mount_point" ] && {
- usage;
- exit 0;
+ # No need to do a ! -d test, it is taken care while initializing the
+ # variable mount_point
+ [ -z "$mount_point" -o ! -d "$mount_point" ] && {
+ cat <<EOF >&2
+ERROR: Mount point does not exist
+Please specify a mount point
+Usage:
+man 8 $0
+EOF
+ exit 1;
}
# Simple check to avoid multiple identical mounts
- if grep -q " $mount_point fuse" /etc/mtab; then
- echo -n "$0: according to mtab, GlusterFS is already mounted on "
- echo "$mount_point"
- sleep 1;
- exit 0;
+ if grep -q "[[:space:]+]${mount_point}[[:space:]+]fuse" $mounttab; then
+ warn "$0: according to mtab, GlusterFS is already mounted on" \
+ "$mount_point"
+ exit 32;
fi
- fs_options=$(echo "$fs_options,$new_fs_options");
-
- start_glusterfs;
+ #Snapshot volumes are mounted read only
+ case $volume_id in
+ /snaps/* ) read_only=1
+ esac
- sleep 3;
+ check_recursive_mount "$mount_point";
+
+ update_updatedb;
+
+ start_glusterfs;
}
_init "$@" && main "$@";
-
diff --git a/xlators/mount/fuse/utils/mount_glusterfs.in b/xlators/mount/fuse/utils/mount_glusterfs.in
index 0f808c14bbd..eca84557e87 100755
--- a/xlators/mount/fuse/utils/mount_glusterfs.in
+++ b/xlators/mount/fuse/utils/mount_glusterfs.in
@@ -1,194 +1,548 @@
#!/bin/sh
-# (C) 2008 Gluster Inc. <http://www.gluster.com>
-#
+# (C) 2014 Red Hat Inc. <http://www.redhat.com>
+# (C) 2015 ungleich GmbH <http://www.ungleich.ch>
+#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of
# the License, or (at your option) any later version.
-#
+#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
-#
+#
# You should have received a copy of the GNU General Public
# License along with this program; if not, write to the Free
# Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
# Boston, MA 02110-1301 USA
+warn ()
+{
+ echo "$@" >&2
+}
_init ()
{
+
# log level definitions
LOG_NONE=NONE;
LOG_CRITICAL=CRITICAL;
LOG_ERROR=ERROR;
LOG_WARNING=WARNING;
- LOG_NORMAL=NORMAL;
+ LOG_INFO=INFO
LOG_DEBUG=DEBUG;
+ LOG_TRACE=TRACE;
- # set default log level to ERROR
- log_level=$LOG_NORMAL;
-}
+ HOST_NAME_MAX=64;
-start_glusterfs ()
-{
prefix="@prefix@";
exec_prefix=@exec_prefix@;
cmd_line=$(echo "@sbindir@/glusterfs");
-
+
+ alias lsL='ls -L'
+ uname_s=`uname -s`
+ case ${uname_s} in
+ Darwin)
+ getinode="stat -f %i"
+ getdev="stat -f %d"
+ ;;
+ esac
+}
+
+is_valid_hostname ()
+{
+ local server=$1
+
+ length=$(echo $server | wc -c)
+ if [ ${length} -gt ${HOST_NAME_MAX} ]; then
+ return 1
+ fi
+}
+
+parse_backup_volfile_servers ()
+{
+ local server_list=$1
+ local servers=""
+ local new_servers=""
+
+ servers=$(echo ${server_list} | sed 's/\:/ /g')
+ for server in ${servers}; do
+ is_valid_hostname ${server}
+ if [ $? -eq 1 ]; then
+ continue
+ fi
+ new_servers=$(echo "${new_servers} ${server}")
+ done
+
+ echo ${new_servers}
+}
+
+parse_volfile_servers ()
+{
+ local server_list=$1
+ local servers=""
+ local new_servers=""
+
+ servers=$(echo ${server_list} | sed 's/,/ /g')
+ for server in ${servers}; do
+ is_valid_hostname ${server}
+ if [ $? -eq 1 ]; then
+ continue
+ fi
+ new_servers=$(echo "${new_servers} ${server}")
+ done
+
+ echo ${new_servers}
+}
+
+start_glusterfs ()
+{
if [ -n "$log_level_str" ]; then
- case "$log_level_str" in
- "ERROR")
- log_level=$LOG_ERROR;
- ;;
- "NORMAL")
- log_level=$LOG_NORMAL;
+ case "$( echo $log_level_str | awk '{print toupper($0)}')" in
+ "ERROR")
+ log_level=$LOG_ERROR;
;;
- "DEBUG")
- log_level=$LOG_DEBUG;
- ;;
- "CRITICAL")
- log_level=$LOG_CRITICAL;
- ;;
- "WARNING")
- log_level=$LOG_WARNING;
- ;;
- "NONE")
- log_level=$LOG_NONE;
- ;;
- *)
- echo "invalid log level $log_level_str, using NORMAL";
- log_level=$LOG_NORMAL;
- ;;
- esac
- fi
- cmd_line=$(echo "$cmd_line --log-level=$log_level");
-
- if [ -n "$log_file" ]; then
- cmd_line=$(echo "$cmd_line --log-file=$log_file");
+ "INFO")
+ log_level=$LOG_INFO;
+ ;;
+ "DEBUG")
+ log_level=$LOG_DEBUG;
+ ;;
+ "CRITICAL")
+ log_level=$LOG_CRITICAL;
+ ;;
+ "WARNING")
+ log_level=$LOG_WARNING;
+ ;;
+ "TRACE")
+ log_level=$LOG_TRACE;
+ ;;
+ "NONE")
+ log_level=$LOG_NONE;
+ ;;
+ *)
+ warn "invalid log level $log_level_str, using INFO";
+ log_level=$LOG_INFO;
+ ;;
+ esac
+ fi
+
+ # options without values start here
+ if [ -n "$read_only" ]; then
+ cmd_line=$(echo "$cmd_line --read-only");
+ fi
+
+ if [ -n "$acl" ]; then
+ cmd_line=$(echo "$cmd_line --acl");
+ fi
+
+ if [ -n "$selinux" ]; then
+ cmd_line=$(echo "$cmd_line --selinux");
+ fi
+
+ if [ -n "$enable_ino32" ]; then
+ cmd_line=$(echo "$cmd_line --enable-ino32");
+ fi
+
+ if [ -n "$worm" ]; then
+ cmd_line=$(echo "$cmd_line --worm");
+ fi
+ if [ -n "$volfile_max_fetch_attempts" ]; then
+ cmd_line=$(echo "$cmd_line --volfile-max-fetch-attempts=$volfile_max_fetch_attempts")
+ fi
+
+ if [ -n "$fopen_keep_cache" ]; then
+ cmd_line=$(echo "$cmd_line --fopen-keep-cache");
fi
if [ -n "$volfile_check" ]; then
- cmd_line=$(echo "$cmd_line --volfile-check");
+ cmd_line=$(echo "$cmd_line --volfile-check");
+ fi
+
+ if [ -n "$mem_accounting" ]; then
+ cmd_line=$(echo "$cmd_line --mem-accounting");
+ fi
+
+ if [ -n "$aux_gfid_mount" ]; then
+ cmd_line=$(echo "$cmd_line --aux-gfid-mount");
+ fi
+
+ if [ -n "$no_root_squash" ]; then
+ cmd_line=$(echo "$cmd_line --no-root-squash");
+ fi
+
+ if [ -n "$capability" ]; then
+ cmd_line=$(echo "$cmd_line --capability");
+ fi
+
+#options with values start here
+ if [ -n "$log_level" ]; then
+ cmd_line=$(echo "$cmd_line --log-level=$log_level");
+ fi
+
+ if [ -n "$log_file" ]; then
+ cmd_line=$(echo "$cmd_line --log-file=$log_file");
fi
if [ -n "$direct_io_mode" ]; then
- cmd_line=$(echo "$cmd_line --disable-direct-io-mode");
+ cmd_line=$(echo "$cmd_line --direct-io-mode=$direct_io_mode");
fi
-
- if [ -z "$volfile_loc" ]; then
- if [ -n "$transport" ]; then
- cmd_line=$(echo "$cmd_line \
---volfile-server=$server_ip \
---volfile-server-port=$server_port \
---volfile-server-transport=$transport");
- else
- cmd_line=$(echo "$cmd_line \
---volfile-server=$server_ip \
---volfile-server-port=$server_port");
- fi
- else
- cmd_line=$(echo "$cmd_line --volfile=$volfile_loc");
+
+ if [ -n "$mac_compat" ]; then
+ cmd_line=$(echo "$cmd_line --mac-compat=$mac_compat");
+ fi
+
+ if [ -n "$use_readdirp" ]; then
+ cmd_line=$(echo "$cmd_line --use-readdirp=$use_readdirp");
fi
if [ -n "$volume_name" ]; then
cmd_line=$(echo "$cmd_line --volume-name=$volume_name");
fi
-
- if [ -n "$volume_id" ]; then
- cmd_line=$(echo "$cmd_line --volfile-id=$volume_id");
+
+ if [ -n "$attribute_timeout" ]; then
+ cmd_line=$(echo "$cmd_line --attribute-timeout=$attribute_timeout");
+ fi
+
+ if [ -n "$entry_timeout" ]; then
+ cmd_line=$(echo "$cmd_line --entry-timeout=$entry_timeout");
+ fi
+
+ if [ -n "$negative_timeout" ]; then
+ cmd_line=$(echo "$cmd_line --negative-timeout=$negative_timeout");
+ fi
+
+ if [ -n "$gid_timeout" ]; then
+ cmd_line=$(echo "$cmd_line --gid-timeout=$gid_timeout");
+ fi
+
+ if [ -n "$bg_qlen" ]; then
+ cmd_line=$(echo "$cmd_line --background-qlen=$bg_qlen");
+ fi
+
+ if [ -n "$cong_threshold" ]; then
+ cmd_line=$(echo "$cmd_line --congestion-threshold=$cong_threshold");
+ fi
+
+ if [ -n "$fuse_mountopts" ]; then
+ cmd_line=$(echo "$cmd_line --fuse-mountopts=$fuse_mountopts");
+ fi
+
+ if [ -n "$xlator_option" ]; then
+ cmd_line=$(echo "$cmd_line --xlator-option=$xlator_option");
+ fi
+
+ # for rdma volume, we have to fetch volfile with '.rdma' added
+ # to volume name, so that it fetches the right client vol file
+ volume_id_rdma="";
+
+ if [ -z "$volfile_loc" ]; then
+ if [ -n "$server_ip" ]; then
+
+ servers=$(parse_volfile_servers ${server_ip});
+ if [ -n "$servers" ]; then
+ for i in $(echo ${servers}); do
+ cmd_line=$(echo "$cmd_line --volfile-server=$i");
+ done
+ else
+ warn "ERROR: No valid servers found on command line.. exiting"
+ print_usage
+ exit 1
+ fi
+
+ if [ -n "$backupvolfile_server" ]; then
+ if [ -z "$backup_volfile_servers" ]; then
+ is_valid_hostname ${backupvolfile_server};
+ if [ $? -eq 1 ]; then
+ warn "ERROR: Invalid backup server specified.. exiting"
+ exit 1
+ fi
+ cmd_line=$(echo "$cmd_line --volfile-server=$backupvolfile_server");
+ fi
+ fi
+
+ if [ -n "$backup_volfile_servers" ]; then
+ backup_servers=$(parse_backup_volfile_servers ${backup_volfile_servers})
+ for i in $(echo ${backup_servers}); do
+ cmd_line=$(echo "$cmd_line --volfile-server=$i");
+ done
+ fi
+
+ if [ -n "$server_port" ]; then
+ cmd_line=$(echo "$cmd_line --volfile-server-port=$server_port");
+ fi
+
+ if [ -n "$transport" ]; then
+ cmd_line=$(echo "$cmd_line --volfile-server-transport=$transport");
+ if [ "$transport" = "rdma" ]; then
+ volume_id_rdma=".rdma";
+ fi
+ fi
+
+ if [ -n "$volume_id" ]; then
+ if [ -n "$volume_id_rdma" ]; then
+ volume_id="$volume_id$volume_id_rdma";
+ fi
+ cmd_line=$(echo "$cmd_line --volfile-id=$volume_id");
+ fi
+ fi
+ else
+ cmd_line=$(echo "$cmd_line --volfile=$volfile_loc");
+ fi
+
+ if [ -n "$fuse_mountopts" ]; then
+ cmd_line=$(echo "$cmd_line --fuse-mountopts=$fuse_mountopts");
fi
cmd_line=$(echo "$cmd_line $mount_point");
- exec $cmd_line;
+ $cmd_line;
+
+ if [ $? -ne 0 ]; then
+ exit 1;
+ fi
}
+print_usage ()
+{
+cat << EOF >&2
+Usage: $0 <volumeserver>:<volumeid/volumeport> -o<options> <mountpoint>
+Options:
+man 8 $0
+To display the version number of the mount helper: $0 -V
+EOF
+}
-main ()
+with_options()
{
-
- new_log_level=""
- log_file=""
- transport=""
- direct_io_mode=""
- volume_name=""
- new_fs_options=""
- volfile_check=""
-
- while getopts o: opt; do
- case "$opt" in
- o)
- options=$(echo $OPTARG | sed -n 's/.*\-o[ ]*\([^ ]*\).*/\1/p');
- [ -z $new_log_level ] && {
- new_log_level=$(echo "$options" | sed -n 's/.*log-level=\([^,]*\).*/\1/p');
- }
-
- [ -z $log_file ] && {
- log_file=$(echo "$options" | sed -n 's/.*log-file=\([^,]*\).*/\1/p');
- }
-
- [ -z $transport ] && {
- transport=$(echo "$options" | sed -n 's/.*transport=\([^,]*\).*/\1/p');
- }
-
- [ -z $direct_io_mode ] && {
- direct_io_mode=$(echo "$options" | sed -n 's/.*direct-io-mode=\([^,]*\).*/\1/p');
- }
-
- [ -z $volfile_check ] && {
- volfile_check=$(echo "$options" | sed -n 's/.*volfile-check=\([^,]*\).*/\1/p');
- }
-
- [ -z $volume_name ] && {
- volume_name=$(echo "$options" | sed -n 's/.*volume-name=\([^,]*\).*/\1/p');
- }
-
- [ -z $volume_id ] && {
- volume_id=$(echo "$options" | sed -n 's/.*volume-id=\([^,]*\).*/\1/p');
- }
-
- this_option=$(echo "$options" | sed -e 's/[,]*log-file=[^,]*//' \
- -e 's/[,]*log-level=[^,]*//' \
- -e 's/[,]*volume-name=[^,]*//' \
- -e 's/[,]*volfile-check=[^,]*//' \
- -e 's/[,]*direct-io-mode=[^,]*//' \
- -e 's/[,]*transport=[^,]*//' \
- -e 's/[,]*volume-id=[^,]*//');
- new_fs_options="$new_fs_options $this_option";
- ;;
- esac
+ local key=$1
+ local value=$2
+
+ # Handle options with values.
+ case "$key" in
+ "log-level")
+ log_level_str=$value
+ ;;
+ "log-file")
+ log_file=$value
+ ;;
+ "transport")
+ transport=$value
+ ;;
+ "direct-io-mode")
+ direct_io_mode=$value
+ ;;
+ "mac-compat")
+ mac_compat=$value
+ ;;
+ "volume-name")
+ volume_name=$value
+ ;;
+ "volume-id")
+ volume_id=$value
+ ;;
+ "volfile-check")
+ volfile_check=$value
+ ;;
+ "server-port")
+ server_port=$value
+ ;;
+ "attribute-timeout")
+ attribute_timeout=$value
+ ;;
+ "entry-timeout")
+ entry_timeout=$value
+ ;;
+ "negative-timeout")
+ negative_timeout=$value
+ ;;
+ "gid-timeout")
+ gid_timeout=$value
+ ;;
+ "background-qlen")
+ bg_qlen=$value
+ ;;
+ "backup-volfile-servers")
+ backup_volfile_servers=$value
+ ;;
+ "backupvolfile-server")
+ backupvolfile_server=$value
+ ;;
+ "fetch-attempts")
+ volfile_max_fetch_attempts=$value
+ ;;
+ "congestion-threshold")
+ cong_threshold=$value
+ ;;
+ "xlator-option")
+ xlator_option=$value
+ ;;
+ "fuse-mountopts")
+ fuse_mountopts=$value
+ ;;
+ "use-readdirp")
+ use_readdirp=$value
+ ;;
+ "no-root-squash")
+ if [ $value = "yes" ] ||
+ [ $value = "on" ] ||
+ [ $value = "enable" ] ||
+ [ $value = "true" ] ; then
+ no_root_squash=1;
+ fi ;;
+ "root-squash")
+ if [ $value = "no" ] ||
+ [ $value = "off" ] ||
+ [ $value = "disable" ] ||
+ [ $value = "false" ] ; then
+ no_root_squash=1;
+ fi ;;
+ *)
+ warn "Invalid option: $key"
+ exit 1
+ ;;
+ esac
+}
+
+without_options()
+{
+ local option=$1
+ # Handle options without values.
+ case "$option" in
+ "ro")
+ read_only=1
+ ;;
+ "acl")
+ acl=1
+ ;;
+ "selinux")
+ selinux=1
+ ;;
+ "worm")
+ worm=1
+ ;;
+ "fopen-keep-cache")
+ fopen_keep_cache=1
+ ;;
+ "enable-ino32")
+ enable_ino32=1
+ ;;
+ "mem-accounting")
+ mem_accounting=1
+ ;;
+ "aux-gfid-mount")
+ if [ ${uname_s} = "Linux" ]; then
+ aux_gfid_mount=1
+ fi
+ ;;
+ # "mount -t glusterfs" sends this, but it's useless.
+ "rw")
+ ;;
+ # these ones are interpreted during system initialization
+ "noauto")
+ ;;
+ "_netdev")
+ ;;
+ "capability")
+ capability=1
+ ;;
+ *)
+ warn "Invalid option $option";
+ exit 1
+ ;;
+ esac
+}
+
+parse_options()
+{
+ local optarg=${1}
+ for pair in $(echo $optarg | sed 's/,/ /g'); do
+ key=$(echo "$pair" | cut -f1 -d'=');
+ value=$(echo "$pair" | cut -f2- -d'=');
+ if [ "$key" = "$value" ]; then
+ without_options $pair;
+ else
+ with_options $key $value;
+ fi
done
+}
- [ -n "$new_log_level" ] && {
- log_level_str="$new_log_level";
- }
+main ()
+{
+ ## `mount` on OSX specifies options as first argument
+ echo $1|grep -q -- "-o"
+ if [ $? -eq 0 ]; then
+ volfile_loc=$3
+ mount_point=$4
+ else
+ volfile_loc=$1
+ mount_point=$2
+ fi
- # TODO: use getopt. This is very much darwin specific
- volfile_loc="$1";
- while [ "$volfile_loc" == "-o" ] ; do
- shift ;
- shift ;
- volfile_loc="$1";
+ while getopts "Vo:h" opt; do
+ case "${opt}" in
+ o)
+ parse_options ${OPTARG};
+ ;;
+ V)
+ ${cmd_line} -V;
+ exit 0;
+ ;;
+ h)
+ print_usage;
+ exit 0;
+ ;;
+ ?)
+ print_usage;
+ exit 0;
+ ;;
+ esac
done
-
+
[ -r "$volfile_loc" ] || {
- server_ip=$(echo "$volfile_loc" | sed -n 's/\([^\:]*\).*/\1/p');
- server_port=$(echo "$volfile_loc" | sed -n 's/.*:\([^ ]*\).*/\1/p');
- [ -n "$server_port" ] || {
- server_port="6996";
- }
+ # '%' included to support ipv6 link local addresses
+ server_ip=$(echo "$volfile_loc" | sed -n 's/\([a-zA-Z0-9:%.\-]*\):.*/\1/p');
+ volume_str=$(echo "$volfile_loc" | sed -n 's/.*:\([^ ]*\).*/\1/p');
+ [ -n "$volume_str" ] && {
+ volume_id="$volume_str";
+ }
+ volfile_loc="";
+ }
+
+ [ -z "$volume_id" -o -z "$server_ip" ] && {
+ cat <<EOF >&2
+ERROR: Server name/volume name unspecified cannot proceed further..
+Please specify correct format
+Usage:
+man 8 $0
+EOF
+ exit 1;
+ }
+
+ grep_ret=$(echo ${mount_point} | grep '^\-o');
+ [ "x" != "x${grep_ret}" ] && {
+ cat <<EOF >&2
+ERROR: -o options cannot be specified in either first two arguments..
+Please specify correct style
+Usage:
+man 8 $0
+EOF
+ exit 1;
+ }
- volfile_loc="";
+ # No need to do a ! -d test, it is taken care while initializing the
+ # variable mount_point
+ [ -z "$mount_point" -o ! -d "$mount_point" ] && {
+ cat <<EOF >&2
+ERROR: Mount point does not exist
+Please specify a mount point
+Usage:
+man 8 $0
+EOF
+ exit 1;
}
- # following line is product of love towards sed
- # $2=$(echo "$@" | sed -n 's/[^ ]* \([^ ]*\).*/\1/p');
-
- mount_point="$2";
- fs_options=$(echo "$fs_options,$new_fs_options");
-
start_glusterfs;
}
diff --git a/xlators/nfs/Makefile.am b/xlators/nfs/Makefile.am
index de3c08cbada..8771032f6c6 100644
--- a/xlators/nfs/Makefile.am
+++ b/xlators/nfs/Makefile.am
@@ -1,3 +1,3 @@
-SUBDIRS = lib server
+SUBDIRS = server
CLEANFILES =
diff --git a/xlators/nfs/lib/src/Makefile.am b/xlators/nfs/lib/src/Makefile.am
deleted file mode 100644
index 4eb9b61ed41..00000000000
--- a/xlators/nfs/lib/src/Makefile.am
+++ /dev/null
@@ -1,11 +0,0 @@
-lib_LTLIBRARIES = libglrpcsvc.la
-libglrpcsvc_la_LDFLAGS = -avoidversion
-
-libglrpcsvc_la_SOURCES = msg-nfs3.c xdr-nfs3.c xdr-rpc.c auth-unix.c rpcsvc-auth.c rpcsvc.c auth-null.c rpc-socket.c
-libglrpcsvc_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-noinst_HEADERS = xdr-rpc.h msg-nfs3.h xdr-common.h xdr-nfs3.h rpc-socket.h rpcsvc.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
-
-CLEANFILES =
diff --git a/xlators/nfs/lib/src/auth-null.c b/xlators/nfs/lib/src/auth-null.c
deleted file mode 100644
index b162db11247..00000000000
--- a/xlators/nfs/lib/src/auth-null.c
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "rpcsvc.h"
-#include "list.h"
-#include "dict.h"
-
-
-int
-auth_null_request_init (rpcsvc_request_t *req, void *priv)
-{
- if (!req)
- return -1;
-
- memset (req->cred.authdata, 0, RPCSVC_MAX_AUTH_BYTES);
- req->cred.datalen = 0;
-
- memset (req->verf.authdata, 0, RPCSVC_MAX_AUTH_BYTES);
- req->verf.datalen = 0;
-
- return 0;
-}
-
-int auth_null_authenticate (rpcsvc_request_t *req, void *priv)
-{
- /* Always succeed. */
- return RPCSVC_AUTH_ACCEPT;
-}
-
-rpcsvc_auth_ops_t auth_null_ops = {
- .conn_init = NULL,
- .request_init = auth_null_request_init,
- .authenticate = auth_null_authenticate
-};
-
-rpcsvc_auth_t rpcsvc_auth_null = {
- .authname = "AUTH_NULL",
- .authnum = AUTH_NULL,
- .authops = &auth_null_ops,
- .authprivate = NULL
-};
-
-
-rpcsvc_auth_t *
-rpcsvc_auth_null_init (rpcsvc_t *svc, dict_t *options)
-{
- return &rpcsvc_auth_null;
-}
-
diff --git a/xlators/nfs/lib/src/auth-unix.c b/xlators/nfs/lib/src/auth-unix.c
deleted file mode 100644
index 0eaf0686654..00000000000
--- a/xlators/nfs/lib/src/auth-unix.c
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "rpcsvc.h"
-#include "list.h"
-#include "dict.h"
-#include "xdr-rpc.h"
-
-
-int
-auth_unix_request_init (rpcsvc_request_t *req, void *priv)
-{
- if (!req)
- return -1;
- memset (req->verf.authdata, 0, RPCSVC_MAX_AUTH_BYTES);
- req->verf.datalen = 0;
- req->verf.flavour = AUTH_NULL;
-
- return 0;
-}
-
-int auth_unix_authenticate (rpcsvc_request_t *req, void *priv)
-{
- int ret = RPCSVC_AUTH_REJECT;
- struct authunix_parms aup;
- char machname[MAX_MACHINE_NAME];
-
- if (!req)
- return ret;
-
- ret = xdr_to_auth_unix_cred (req->cred.authdata, req->cred.datalen,
- &aup, machname, req->auxgids);
- if (ret == -1) {
- ret = RPCSVC_AUTH_REJECT;
- goto err;
- }
-
- req->uid = aup.aup_uid;
- req->gid = aup.aup_gid;
- req->auxgidcount = aup.aup_len;
-
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Auth Info: machine name: %s, uid: %d"
- ", gid: %d", machname, req->uid, req->gid);
- ret = RPCSVC_AUTH_ACCEPT;
-err:
- return ret;
-}
-
-rpcsvc_auth_ops_t auth_unix_ops = {
- .conn_init = NULL,
- .request_init = auth_unix_request_init,
- .authenticate = auth_unix_authenticate
-};
-
-rpcsvc_auth_t rpcsvc_auth_unix = {
- .authname = "AUTH_UNIX",
- .authnum = AUTH_UNIX,
- .authops = &auth_unix_ops,
- .authprivate = NULL
-};
-
-
-rpcsvc_auth_t *
-rpcsvc_auth_unix_init (rpcsvc_t *svc, dict_t *options)
-{
- return &rpcsvc_auth_unix;
-}
-
diff --git a/xlators/nfs/lib/src/msg-nfs3.c b/xlators/nfs/lib/src/msg-nfs3.c
deleted file mode 100644
index 3eefd4c984d..00000000000
--- a/xlators/nfs/lib/src/msg-nfs3.c
+++ /dev/null
@@ -1,536 +0,0 @@
-/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <sys/uio.h>
-#include <rpc/rpc.h>
-#include <rpc/xdr.h>
-#include <sys/types.h>
-
-#include "xdr-nfs3.h"
-#include "msg-nfs3.h"
-#include "xdr-common.h"
-
-
-/* Decode the mount path from the network message in inmsg
- * into the memory referenced by outpath.iov_base.
- * The size allocated for outpath.iov_base is outpath.iov_len.
- * The size of the path extracted from the message is returned.
- */
-ssize_t
-xdr_to_mountpath (struct iovec outpath, struct iovec inmsg)
-{
- XDR xdr;
- ssize_t ret = -1;
- char *mntpath = NULL;
-
- if ((!outpath.iov_base) || (!inmsg.iov_base))
- return -1;
-
- xdrmem_create (&xdr, inmsg.iov_base, (unsigned int)inmsg.iov_len,
- XDR_DECODE);
-
- mntpath = outpath.iov_base;
- if (!xdr_dirpath (&xdr, (dirpath *)&mntpath)) {
- ret = -1;
- goto ret;
- }
-
- ret = xdr_decoded_length (xdr);
-
-ret:
- return ret;
-}
-
-
-ssize_t
-xdr_serialize_generic (struct iovec outmsg, void *res, xdrproc_t proc)
-{
- ssize_t ret = -1;
- XDR xdr;
-
- if ((!outmsg.iov_base) || (!res) || (!proc))
- return -1;
-
- xdrmem_create (&xdr, outmsg.iov_base, (unsigned int)outmsg.iov_len,
- XDR_ENCODE);
-
- if (!proc (&xdr, res)) {
- ret = -1;
- goto ret;
- }
-
- ret = xdr_encoded_length (xdr);
-
-ret:
- return ret;
-}
-
-
-ssize_t
-xdr_to_generic (struct iovec inmsg, void *args, xdrproc_t proc)
-{
- XDR xdr;
- ssize_t ret = -1;
-
- if ((!inmsg.iov_base) || (!args) || (!proc))
- return -1;
-
- xdrmem_create (&xdr, inmsg.iov_base, (unsigned int)inmsg.iov_len,
- XDR_DECODE);
-
- if (!proc (&xdr, args)) {
- ret = -1;
- goto ret;
- }
-
- ret = xdr_decoded_length (xdr);
-ret:
- return ret;
-}
-
-
-ssize_t
-xdr_to_generic_payload (struct iovec inmsg, void *args, xdrproc_t proc,
- struct iovec *pendingpayload)
-{
- XDR xdr;
- ssize_t ret = -1;
-
- if ((!inmsg.iov_base) || (!args) || (!proc))
- return -1;
-
- xdrmem_create (&xdr, inmsg.iov_base, (unsigned int)inmsg.iov_len,
- XDR_DECODE);
-
- if (!proc (&xdr, args)) {
- ret = -1;
- goto ret;
- }
-
- ret = xdr_decoded_length (xdr);
-
- if (pendingpayload) {
- pendingpayload->iov_base = xdr_decoded_remaining_addr (xdr);
- pendingpayload->iov_len = xdr_decoded_remaining_len (xdr);
- }
-
-ret:
- return ret;
-}
-
-
-/* Translate the mountres3 structure in res into XDR format into memory
- * referenced by outmsg.iov_base.
- * Returns the number of bytes used in encoding into XDR format.
- */
-ssize_t
-xdr_serialize_mountres3 (struct iovec outmsg, mountres3 *res)
-{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_mountres3);
-}
-
-
-ssize_t
-xdr_serialize_mountbody (struct iovec outmsg, mountbody *mb)
-{
- return xdr_serialize_generic (outmsg, (void *)mb,
- (xdrproc_t)xdr_mountbody);
-}
-
-ssize_t
-xdr_serialize_mountlist (struct iovec outmsg, mountlist *ml)
-{
- return xdr_serialize_generic (outmsg, (void *)ml,
- (xdrproc_t)xdr_mountlist);
-}
-
-
-ssize_t
-xdr_serialize_mountstat3 (struct iovec outmsg, mountstat3 *m)
-{
- return xdr_serialize_generic (outmsg, (void *)m,
- (xdrproc_t)xdr_mountstat3);
-}
-
-
-ssize_t
-xdr_to_getattr3args (struct iovec inmsg, getattr3args *ga)
-{
- return xdr_to_generic (inmsg, (void *)ga,(xdrproc_t)xdr_getattr3args);
-}
-
-
-ssize_t
-xdr_serialize_getattr3res (struct iovec outmsg, getattr3res *res)
-{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_getattr3res);
-}
-
-
-ssize_t
-xdr_serialize_setattr3res (struct iovec outmsg, setattr3res *res)
-{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_setattr3res);
-}
-
-
-ssize_t
-xdr_to_setattr3args (struct iovec inmsg, setattr3args *sa)
-{
- return xdr_to_generic (inmsg, (void *)sa, (xdrproc_t)xdr_setattr3args);
-}
-
-
-ssize_t
-xdr_serialize_lookup3res (struct iovec outmsg, lookup3res *res)
-{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_lookup3res);
-}
-
-
-ssize_t
-xdr_to_lookup3args (struct iovec inmsg, lookup3args *la)
-{
- return xdr_to_generic (inmsg, (void *)la, (xdrproc_t)xdr_lookup3args);
-}
-
-
-ssize_t
-xdr_to_access3args (struct iovec inmsg, access3args *ac)
-{
- return xdr_to_generic (inmsg,(void *)ac, (xdrproc_t)xdr_access3args);
-}
-
-
-ssize_t
-xdr_serialize_access3res (struct iovec outmsg, access3res *res)
-{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_access3res);
-}
-
-
-ssize_t
-xdr_to_readlink3args (struct iovec inmsg, readlink3args *ra)
-{
- return xdr_to_generic (inmsg, (void *)ra, (xdrproc_t)xdr_readlink3args);
-}
-
-
-ssize_t
-xdr_serialize_readlink3res (struct iovec outmsg, readlink3res *res)
-{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_readlink3res);
-}
-
-
-ssize_t
-xdr_to_read3args (struct iovec inmsg, read3args *ra)
-{
- return xdr_to_generic (inmsg, (void *)ra, (xdrproc_t)xdr_read3args);
-}
-
-
-ssize_t
-xdr_serialize_read3res (struct iovec outmsg, read3res *res)
-{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_read3res);
-}
-
-ssize_t
-xdr_serialize_read3res_nocopy (struct iovec outmsg, read3res *res)
-{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_read3res_nocopy);
-}
-
-
-ssize_t
-xdr_to_write3args (struct iovec inmsg, write3args *wa)
-{
- return xdr_to_generic (inmsg, (void *)wa,(xdrproc_t)xdr_write3args);
-}
-
-
-ssize_t
-xdr_to_write3args_nocopy (struct iovec inmsg, write3args *wa,
- struct iovec *payload)
-{
- return xdr_to_generic_payload (inmsg, (void *)wa,
- (xdrproc_t)xdr_write3args, payload);
-}
-
-
-ssize_t
-xdr_serialize_write3res (struct iovec outmsg, write3res *res)
-{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_write3res);
-}
-
-
-ssize_t
-xdr_to_create3args (struct iovec inmsg, create3args *ca)
-{
- return xdr_to_generic (inmsg, (void *)ca, (xdrproc_t)xdr_create3args);
-}
-
-
-ssize_t
-xdr_serialize_create3res (struct iovec outmsg, create3res *res)
-{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_create3res);
-}
-
-
-ssize_t
-xdr_serialize_mkdir3res (struct iovec outmsg, mkdir3res *res)
-{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_mkdir3res);
-}
-
-
-ssize_t
-xdr_to_mkdir3args (struct iovec inmsg, mkdir3args *ma)
-{
- return xdr_to_generic (inmsg, (void *)ma, (xdrproc_t)xdr_mkdir3args);
-}
-
-
-ssize_t
-xdr_to_symlink3args (struct iovec inmsg, symlink3args *sa)
-{
- return xdr_to_generic (inmsg, (void *)sa, (xdrproc_t)xdr_symlink3args);
-}
-
-
-ssize_t
-xdr_serialize_symlink3res (struct iovec outmsg, symlink3res *res)
-{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_symlink3res);
-}
-
-
-ssize_t
-xdr_to_mknod3args (struct iovec inmsg, mknod3args *ma)
-{
- return xdr_to_generic (inmsg, (void *)ma, (xdrproc_t)xdr_mknod3args);
-}
-
-
-ssize_t
-xdr_serialize_mknod3res (struct iovec outmsg, mknod3res *res)
-{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_mknod3res);
-}
-
-
-ssize_t
-xdr_to_remove3args (struct iovec inmsg, remove3args *ra)
-{
- return xdr_to_generic (inmsg, (void *)ra, (xdrproc_t)xdr_remove3args);
-}
-
-
-ssize_t
-xdr_serialize_remove3res (struct iovec outmsg, remove3res *res)
-{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_remove3res);
-}
-
-
-ssize_t
-xdr_to_rmdir3args (struct iovec inmsg, rmdir3args *ra)
-{
- return xdr_to_generic (inmsg, (void *)ra, (xdrproc_t)xdr_rmdir3args);
-}
-
-
-ssize_t
-xdr_serialize_rmdir3res (struct iovec outmsg, rmdir3res *res)
-{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_rmdir3res);
-}
-
-
-ssize_t
-xdr_serialize_rename3res (struct iovec outmsg, rename3res *res)
-{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_rename3res);
-}
-
-
-ssize_t
-xdr_to_rename3args (struct iovec inmsg, rename3args *ra)
-{
- return xdr_to_generic (inmsg, (void *)ra, (xdrproc_t)xdr_rename3args);
-}
-
-
-ssize_t
-xdr_serialize_link3res (struct iovec outmsg, link3res *li)
-{
- return xdr_serialize_generic (outmsg, (void *)li,
- (xdrproc_t)xdr_link3res);
-}
-
-
-ssize_t
-xdr_to_link3args (struct iovec inmsg, link3args *la)
-{
- return xdr_to_generic (inmsg, (void *)la, (xdrproc_t)xdr_link3args);
-}
-
-
-ssize_t
-xdr_to_readdir3args (struct iovec inmsg, readdir3args *rd)
-{
- return xdr_to_generic (inmsg, (void *)rd, (xdrproc_t)xdr_readdir3args);
-}
-
-
-ssize_t
-xdr_serialize_readdir3res (struct iovec outmsg, readdir3res *res)
-{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_readdir3res);
-}
-
-
-ssize_t
-xdr_to_readdirp3args (struct iovec inmsg, readdirp3args *rp)
-{
- return xdr_to_generic (inmsg, (void *)rp, (xdrproc_t)xdr_readdirp3args);
-}
-
-
-ssize_t
-xdr_serialize_readdirp3res (struct iovec outmsg, readdirp3res *res)
-{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_readdirp3res);
-}
-
-
-ssize_t
-xdr_to_fsstat3args (struct iovec inmsg, fsstat3args *fa)
-{
- return xdr_to_generic (inmsg, (void *)fa, (xdrproc_t)xdr_fsstat3args);
-}
-
-
-ssize_t
-xdr_serialize_fsstat3res (struct iovec outmsg, fsstat3res *res)
-{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_fsstat3res);
-}
-
-ssize_t
-xdr_to_fsinfo3args (struct iovec inmsg, fsinfo3args *fi)
-{
- return xdr_to_generic (inmsg, (void *)fi, (xdrproc_t)xdr_fsinfo3args);
-}
-
-
-ssize_t
-xdr_serialize_fsinfo3res (struct iovec outmsg, fsinfo3res *res)
-{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_fsinfo3res);
-}
-
-
-ssize_t
-xdr_to_pathconf3args (struct iovec inmsg, pathconf3args *pc)
-{
- return xdr_to_generic (inmsg, (void *)pc, (xdrproc_t)xdr_pathconf3args);}
-
-
-ssize_t
-xdr_serialize_pathconf3res (struct iovec outmsg, pathconf3res *res)
-{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_pathconf3res);
-}
-
-
-ssize_t
-xdr_to_commit3args (struct iovec inmsg, commit3args *ca)
-{
- return xdr_to_generic (inmsg, (void *)ca, (xdrproc_t)xdr_commit3args);
-}
-
-
-ssize_t
-xdr_serialize_commit3res (struct iovec outmsg, commit3res *res)
-{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_commit3res);
-}
-
-
-ssize_t
-xdr_serialize_exports (struct iovec outmsg, exports *elist)
-{
- XDR xdr;
- ssize_t ret = -1;
-
- if ((!outmsg.iov_base) || (!elist))
- return -1;
-
- xdrmem_create (&xdr, outmsg.iov_base, (unsigned int)outmsg.iov_len,
- XDR_ENCODE);
-
- if (!xdr_exports (&xdr, elist))
- goto ret;
-
- ret = xdr_decoded_length (xdr);
-
-ret:
- return ret;
-}
-
-
-ssize_t
-xdr_serialize_nfsstat3 (struct iovec outmsg, nfsstat3 *s)
-{
- return xdr_serialize_generic (outmsg, (void *)s,
- (xdrproc_t)xdr_nfsstat3);
-}
-
-
diff --git a/xlators/nfs/lib/src/msg-nfs3.h b/xlators/nfs/lib/src/msg-nfs3.h
deleted file mode 100644
index 047e8dfc81e..00000000000
--- a/xlators/nfs/lib/src/msg-nfs3.h
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _MSG_NFS3_H_
-#define _MSG_NFS3_H_
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "xdr-nfs3.h"
-
-#include <sys/types.h>
-#include <sys/uio.h>
-
-extern ssize_t
-xdr_to_mountpath (struct iovec outpath, struct iovec inmsg);
-
-extern ssize_t
-xdr_serialize_mountres3 (struct iovec outmsg, mountres3 *res);
-
-extern ssize_t
-xdr_serialize_mountbody (struct iovec outmsg, mountbody *mb);
-
-extern ssize_t
-xdr_to_getattr3args (struct iovec inmsg, getattr3args *ga);
-
-extern ssize_t
-xdr_serialize_getattr3res (struct iovec outmsg, getattr3res *res);
-
-extern ssize_t
-xdr_serialize_setattr3res (struct iovec outmsg, setattr3res *res);
-
-extern ssize_t
-xdr_to_setattr3args (struct iovec inmsg, setattr3args *sa);
-
-extern ssize_t
-xdr_serialize_lookup3res (struct iovec outmsg, lookup3res *res);
-
-extern ssize_t
-xdr_to_lookup3args (struct iovec inmsg, lookup3args *la);
-
-extern ssize_t
-xdr_to_access3args (struct iovec inmsg, access3args *ac);
-
-extern ssize_t
-xdr_serialize_access3res (struct iovec outmsg, access3res *res);
-
-extern ssize_t
-xdr_to_readlink3args (struct iovec inmsg, readlink3args *ra);
-
-extern ssize_t
-xdr_serialize_readlink3res (struct iovec outmsg, readlink3res *res);
-
-extern ssize_t
-xdr_to_read3args (struct iovec inmsg, read3args *ra);
-
-extern ssize_t
-xdr_serialize_read3res (struct iovec outmsg, read3res *res);
-
-extern ssize_t
-xdr_serialize_read3res_nocopy (struct iovec outmsg, read3res *res);
-
-extern ssize_t
-xdr_to_write3args (struct iovec inmsg, write3args *wa);
-
-extern ssize_t
-xdr_to_write3args_nocopy (struct iovec inmsg, write3args *wa,
- struct iovec *payload);
-
-extern ssize_t
-xdr_serialize_write3res (struct iovec outmsg, write3res *res);
-
-extern ssize_t
-xdr_to_create3args (struct iovec inmsg, create3args *ca);
-
-extern ssize_t
-xdr_serialize_create3res (struct iovec outmsg, create3res *res);
-
-extern ssize_t
-xdr_serialize_mkdir3res (struct iovec outmsg, mkdir3res *res);
-
-extern ssize_t
-xdr_to_mkdir3args (struct iovec inmsg, mkdir3args *ma);
-
-extern ssize_t
-xdr_to_symlink3args (struct iovec inmsg, symlink3args *sa);
-
-extern ssize_t
-xdr_serialize_symlink3res (struct iovec outmsg, symlink3res *res);
-
-extern ssize_t
-xdr_to_mknod3args (struct iovec inmsg, mknod3args *ma);
-
-extern ssize_t
-xdr_serialize_mknod3res (struct iovec outmsg, mknod3res *res);
-
-extern ssize_t
-xdr_to_remove3args (struct iovec inmsg, remove3args *ra);
-
-extern ssize_t
-xdr_serialize_remove3res (struct iovec outmsg, remove3res *res);
-
-extern ssize_t
-xdr_to_rmdir3args (struct iovec inmsg, rmdir3args *ra);
-
-extern ssize_t
-xdr_serialize_rmdir3res (struct iovec outmsg, rmdir3res *res);
-
-extern ssize_t
-xdr_serialize_rename3res (struct iovec outmsg, rename3res *res);
-
-extern ssize_t
-xdr_to_rename3args (struct iovec inmsg, rename3args *ra);
-
-extern ssize_t
-xdr_serialize_link3res (struct iovec outmsg, link3res *li);
-
-extern ssize_t
-xdr_to_link3args (struct iovec inmsg, link3args *la);
-
-extern ssize_t
-xdr_to_readdir3args (struct iovec inmsg, readdir3args *rd);
-
-extern ssize_t
-xdr_serialize_readdir3res (struct iovec outmsg, readdir3res *res);
-
-extern ssize_t
-xdr_to_readdirp3args (struct iovec inmsg, readdirp3args *rp);
-
-extern ssize_t
-xdr_serialize_readdirp3res (struct iovec outmsg, readdirp3res *res);
-
-extern ssize_t
-xdr_to_fsstat3args (struct iovec inmsg, fsstat3args *fa);
-
-extern ssize_t
-xdr_serialize_fsstat3res (struct iovec outmsg, fsstat3res *res);
-
-extern ssize_t
-xdr_to_fsinfo3args (struct iovec inmsg, fsinfo3args *fi);
-
-extern ssize_t
-xdr_serialize_fsinfo3res (struct iovec outmsg, fsinfo3res *res);
-
-extern ssize_t
-xdr_to_pathconf3args (struct iovec inmsg, pathconf3args *pc);
-
-extern ssize_t
-xdr_serialize_pathconf3res (struct iovec outmsg, pathconf3res *res);
-
-extern ssize_t
-xdr_to_commit3args (struct iovec inmsg, commit3args *ca);
-
-extern ssize_t
-xdr_serialize_commit3res (struct iovec outmsg, commit3res *res);
-
-extern ssize_t
-xdr_serialize_exports (struct iovec outmsg, exports *elist);
-
-extern ssize_t
-xdr_serialize_mountlist (struct iovec outmsg, mountlist *ml);
-
-extern ssize_t
-xdr_serialize_mountstat3 (struct iovec outmsg, mountstat3 *m);
-
-extern ssize_t
-xdr_serialize_nfsstat3 (struct iovec outmsg, nfsstat3 *s);
-#endif
diff --git a/xlators/nfs/lib/src/rpc-socket.c b/xlators/nfs/lib/src/rpc-socket.c
deleted file mode 100644
index 01f114a8530..00000000000
--- a/xlators/nfs/lib/src/rpc-socket.c
+++ /dev/null
@@ -1,358 +0,0 @@
-/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "rpc-socket.h"
-#include "rpcsvc.h"
-#include "dict.h"
-#include "logging.h"
-#include "byte-order.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-
-#include <fcntl.h>
-#include <errno.h>
-#include <sys/socket.h>
-#include <netdb.h>
-#include <netinet/in.h>
-#include <netinet/tcp.h>
-
-static int
-rpcsvc_socket_server_get_local_socket (int addrfam, char *listenhost,
- uint16_t listenport,
- struct sockaddr *addr,
- socklen_t *addr_len)
-{
- struct addrinfo hints, *res = 0;
- char service[NI_MAXSERV];
- int ret = -1;
-
- memset (service, 0, sizeof (service));
- sprintf (service, "%d", listenport);
-
- memset (&hints, 0, sizeof (hints));
- addr->sa_family = hints.ai_family = addrfam;
- hints.ai_socktype = SOCK_STREAM;
- hints.ai_flags = AI_ADDRCONFIG | AI_PASSIVE;
-
- ret = getaddrinfo(listenhost, service, &hints, &res);
- if (ret != 0) {
- gf_log (GF_RPCSVC_SOCK, GF_LOG_ERROR,
- "getaddrinfo failed for host %s, service %s (%s)",
- listenhost, service, gai_strerror (ret));
- ret = -1;
- goto err;
- }
-
- memcpy (addr, res->ai_addr, res->ai_addrlen);
- *addr_len = res->ai_addrlen;
-
- freeaddrinfo (res);
- ret = 0;
-
-err:
- return ret;
-}
-
-
-int
-rpcsvc_socket_listen (int addrfam, char *listenhost, uint16_t listenport)
-{
- int sock = -1;
- struct sockaddr_storage sockaddr;
- socklen_t sockaddr_len;
- int flags = 0;
- int ret = -1;
- int opt = 1;
-
- ret = rpcsvc_socket_server_get_local_socket (addrfam, listenhost,
- listenport,SA (&sockaddr),
- &sockaddr_len);
-
- if (ret == -1)
- return ret;
-
- sock = socket (SA (&sockaddr)->sa_family, SOCK_STREAM, 0);
- if (sock == -1) {
- gf_log (GF_RPCSVC_SOCK, GF_LOG_ERROR, "socket creation failed"
- " (%s)", strerror (errno));
- goto err;
- }
-
- flags = fcntl (sock, F_GETFL);
- if (flags == -1) {
- gf_log (GF_RPCSVC_SOCK, GF_LOG_ERROR, "cannot get socket flags"
- " (%s)", strerror(errno));
- goto close_err;
- }
-
- ret = fcntl (sock, F_SETFL, flags | O_NONBLOCK);
- if (ret == -1) {
- gf_log (GF_RPCSVC_SOCK, GF_LOG_ERROR, "cannot set socket "
- "non-blocking (%s)", strerror (errno));
- goto close_err;
- }
-
- ret = setsockopt (sock, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof (opt));
- if (ret == -1) {
- gf_log (GF_RPCSVC_SOCK, GF_LOG_ERROR, "setsockopt() for "
- "SO_REUSEADDR failed (%s)", strerror (errno));
- goto close_err;
- }
-
- ret = bind (sock, (struct sockaddr *)&sockaddr, sockaddr_len);
- if (ret == -1) {
- gf_log (GF_RPCSVC_SOCK, GF_LOG_ERROR, "binding socket failed:"
- " %s", strerror (errno));
- if (errno == EADDRINUSE)
- gf_log (GF_RPCSVC_SOCK, GF_LOG_ERROR, "Port is already"
- " in use");
- goto close_err;
- }
-
- ret = listen (sock, 10);
- if (ret == -1) {
- gf_log (GF_RPCSVC_SOCK, GF_LOG_ERROR, "could not listen on"
- " socket (%s)", strerror (errno));
- goto close_err;
- }
-
- return sock;
-
-close_err:
- close (sock);
- sock = -1;
-
-err:
- return sock;
-}
-
-
-int
-rpcsvc_socket_accept (int listenfd)
-{
- int new_sock = -1;
- struct sockaddr_storage new_sockaddr = {0, };
- socklen_t addrlen = sizeof (new_sockaddr);
- int flags = 0;
- int ret = -1;
- int on = 1;
-
- new_sock = accept (listenfd, SA (&new_sockaddr), &addrlen);
- if (new_sock == -1) {
- gf_log (GF_RPCSVC_SOCK, GF_LOG_ERROR,"accept on socket failed");
- goto err;
- }
-
- flags = fcntl (new_sock, F_GETFL);
- if (flags == -1) {
- gf_log (GF_RPCSVC_SOCK, GF_LOG_ERROR, "cannot get socket flags"
- " (%s)", strerror(errno));
- goto close_err;
- }
-
- ret = fcntl (new_sock, F_SETFL, flags | O_NONBLOCK);
- if (ret == -1) {
- gf_log (GF_RPCSVC_SOCK, GF_LOG_ERROR, "cannot set socket "
- "non-blocking (%s)", strerror (errno));
- goto close_err;
- }
-
-#ifdef TCP_NODELAY
- ret = setsockopt(new_sock, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
- if (ret == -1) {
- gf_log (GF_RPCSVC_SOCK, GF_LOG_ERROR, "cannot set no-delay "
- " socket option");
- }
-#endif
-
- return new_sock;
-
-close_err:
- close (new_sock);
- new_sock = -1;
-
-err:
- return new_sock;
-}
-
-ssize_t
-rpcsvc_socket_read (int sockfd, char *readaddr, size_t readsize)
-{
- ssize_t dataread = 0;
- ssize_t readlen = -1;
-
- if (!readaddr)
- return -1;
-
- while (readsize > 0) {
- readlen = read (sockfd, readaddr, readsize);
- if (readlen == -1) {
- if (errno != EAGAIN) {
- dataread = -1;
- break;
- } else
- break;
- } else if (readlen == 0)
- break;
-
- dataread += readlen;
- readaddr += readlen;
- readsize -= readlen;
- }
-
- return dataread;
-}
-
-
-ssize_t
-rpcsvc_socket_write (int sockfd, char *buffer, size_t size)
-{
- size_t writelen = -1;
- ssize_t written = 0;
-
- if (!buffer)
- return -1;
-
- while (size > 0) {
- writelen = write (sockfd, buffer, size);
- if (writelen == -1) {
- if (errno != EAGAIN) {
- written = -1;
- break;
- } else
- break;
- } else if (writelen == 0)
- break;
-
- written += writelen;
- size -= writelen;
- buffer += writelen;
- }
-
- return written;
-}
-
-
-int
-rpcsvc_socket_peername (int sockfd, char *hostname, int hostlen)
-{
- struct sockaddr sa;
- socklen_t sl = sizeof (sa);
- int ret = EAI_FAIL;
-
- if (!hostname)
- return ret;
-
- ret = getpeername (sockfd, &sa, &sl);
- if (ret == -1) {
- gf_log (GF_RPCSVC_SOCK, GF_LOG_ERROR, "Failed to get peer name:"
- " %s", strerror (errno));
- ret = EAI_FAIL;
- goto err;
- }
-
- ret = getnameinfo (&sa, sl, hostname, hostlen, NULL, 0, 0);
- if (ret != 0)
- goto err;
-
-err:
- return ret;
-}
-
-
-int
-rpcsvc_socket_peeraddr (int sockfd, char *addrstr, int addrlen,
- struct sockaddr *returnsa, socklen_t sasize)
-{
- struct sockaddr sa;
- int ret = EAI_FAIL;
-
- if (returnsa)
- ret = getpeername (sockfd, returnsa, &sasize);
- else {
- sasize = sizeof (sa);
- ret = getpeername (sockfd, &sa, &sasize);
- }
-
- if (ret == -1) {
- gf_log (GF_RPCSVC_SOCK, GF_LOG_ERROR, "Failed to get peer addr:"
- " %s", strerror (errno));
- ret = EAI_FAIL;
- goto err;
- }
-
- /* If caller did not specify a string into which the address can be
- * stored, dont bother getting it.
- */
- if (!addrstr) {
- ret = 0;
- goto err;
- }
-
- if (returnsa)
- ret = getnameinfo (returnsa, sasize, addrstr, addrlen, NULL, 0,
- NI_NUMERICHOST);
- else
- ret = getnameinfo (&sa, sasize, addrstr, addrlen, NULL, 0,
- NI_NUMERICHOST);
-
-err:
- return ret;
-}
-
-
-int
-rpcsvc_socket_block_tx (int sockfd)
-{
- int ret = -1;
- int on = 1;
-
-#ifdef TCP_CORK
- ret = setsockopt(sockfd, IPPROTO_TCP, TCP_CORK, &on, sizeof(on));
-#endif
-
-#ifdef TCP_NOPUSH
- ret = setsockopt(sockfd, IPPROTO_TCP, TCP_NOPUSH, &on, sizeof(on));
-#endif
-
- return ret;
-}
-
-
-int
-rpcsvc_socket_unblock_tx (int sockfd)
-{
- int ret = -1;
- int off = 0;
-
-#ifdef TCP_CORK
- ret = setsockopt(sockfd, IPPROTO_TCP, TCP_CORK, &off, sizeof(off));
-#endif
-
-#ifdef TCP_NOPUSH
- ret = setsockopt(sockfd, IPPROTO_TCP, TCP_NOPUSH, &off, sizeof(off));
-#endif
- return ret;
-}
-
diff --git a/xlators/nfs/lib/src/rpc-socket.h b/xlators/nfs/lib/src/rpc-socket.h
deleted file mode 100644
index 3a50c97a98d..00000000000
--- a/xlators/nfs/lib/src/rpc-socket.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _RPCSVC_SOCKET_H_
-#define _RPCSVC_SOCKET_H_
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "rpcsvc.h"
-#include "dict.h"
-#include "logging.h"
-#include "byte-order.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-
-#include <fcntl.h>
-#include <errno.h>
-#include <sys/socket.h>
-#include <netdb.h>
-
-#define SA(ptr) ((struct sockaddr *)ptr)
-#define GF_RPCSVC_SOCK "rpc-socket"
-extern int
-rpcsvc_socket_listen (int addrfam, char *listenhost, uint16_t listenport);
-
-extern int
-rpcsvc_socket_accept (int listenfd);
-
-extern ssize_t
-rpcsvc_socket_read (int sockfd, char *readaddr, size_t readsize);
-
-extern ssize_t
-rpcsvc_socket_write (int sockfd, char *buffer, size_t size);
-
-extern int
-rpcsvc_socket_peername (int sockfd, char *hostname, int hostlen);
-
-extern int
-rpcsvc_socket_peeraddr (int sockfd, char *addrstr, int addrlen,
- struct sockaddr *returnsa, socklen_t sasize);
-extern int
-rpcsvc_socket_block_tx (int sockfd);
-
-extern int
-rpcsvc_socket_unblock_tx (int sockfd);
-#endif
diff --git a/xlators/nfs/lib/src/rpcsvc-auth.c b/xlators/nfs/lib/src/rpcsvc-auth.c
deleted file mode 100644
index f61fe91d0b2..00000000000
--- a/xlators/nfs/lib/src/rpcsvc-auth.c
+++ /dev/null
@@ -1,391 +0,0 @@
-/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include "rpcsvc.h"
-#include "logging.h"
-#include "dict.h"
-
-extern rpcsvc_auth_t *
-rpcsvc_auth_null_init (rpcsvc_t *svc, dict_t *options);
-
-extern rpcsvc_auth_t *
-rpcsvc_auth_unix_init (rpcsvc_t *svc, dict_t *options);
-
-int
-rpcsvc_auth_add_initer (struct list_head *list, char *idfier,
- rpcsvc_auth_initer_t init)
-{
- struct rpcsvc_auth_list *new = NULL;
-
- if ((!list) || (!init) || (!idfier))
- return -1;
-
- new = GF_CALLOC (1, sizeof (*new), gf_common_mt_rpcsvc_auth_list);
- if (!new) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Memory allocation failed");
- return -1;
- }
-
- new->init = init;
- strcpy (new->name, idfier);
- INIT_LIST_HEAD (&new->authlist);
- list_add_tail (&new->authlist, list);
- return 0;
-}
-
-
-
-int
-rpcsvc_auth_add_initers (rpcsvc_t *svc)
-{
- int ret = -1;
-
- ret = rpcsvc_auth_add_initer (&svc->authschemes, "auth-unix",
- (rpcsvc_auth_initer_t)
- rpcsvc_auth_unix_init);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to add AUTH_UNIX");
- goto err;
- }
-
- ret = rpcsvc_auth_add_initer (&svc->authschemes, "auth-null",
- (rpcsvc_auth_initer_t)
- rpcsvc_auth_null_init);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to add AUTH_NULL");
- goto err;
- }
-
- ret = 0;
-err:
- return 0;
-}
-
-
-int
-rpcsvc_auth_init_auth (rpcsvc_t *svc, dict_t *options,
- struct rpcsvc_auth_list *authitem)
-{
- int ret = -1;
-
- if ((!svc) || (!options) || (!authitem))
- return -1;
-
- if (!authitem->init) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "No init function defined");
- ret = -1;
- goto err;
- }
-
- authitem->auth = authitem->init (svc, options);
- if (!authitem->auth) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Registration of auth failed:"
- " %s", authitem->name);
- ret = -1;
- goto err;
- }
-
- authitem->enable = 1;
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Authentication enabled: %s",
- authitem->auth->authname);
-
- ret = 0;
-err:
- return ret;
-}
-
-
-int
-rpcsvc_auth_init_auths (rpcsvc_t *svc, dict_t *options)
-{
- int ret = -1;
- struct rpcsvc_auth_list *auth = NULL;
- struct rpcsvc_auth_list *tmp = NULL;
-
- if (!svc)
- return -1;
-
- if (list_empty (&svc->authschemes)) {
- gf_log (GF_RPCSVC, GF_LOG_WARNING, "No authentication!");
- ret = 0;
- goto err;
- }
-
- /* If auth null and sys are not disabled by the user, we must enable
- * it by default. This is a globally default rule, the user is still
- * allowed to disable the two for particular subvolumes.
- */
- if (!dict_get (options, "rpc-auth.auth-null"))
- ret = dict_set_dynstr (options, "rpc-auth.auth-null", "on");
-
- if (!dict_get (options, "rpc-auth.auth-unix"))
- ret = dict_set_dynstr (options, "rpc-auth.auth-unix", "on");
-
- list_for_each_entry_safe (auth, tmp, &svc->authschemes, authlist) {
- ret = rpcsvc_auth_init_auth (svc, options, auth);
- if (ret == -1)
- goto err;
- }
-
- ret = 0;
-err:
- return ret;
-
-}
-
-int
-rpcsvc_auth_init (rpcsvc_t *svc, dict_t *options)
-{
- int ret = -1;
-
- if ((!svc) || (!options))
- return -1;
-
- ret = rpcsvc_auth_add_initers (svc);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to add initers");
- goto out;
- }
-
- ret = rpcsvc_auth_init_auths (svc, options);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to init auth schemes");
- goto out;
- }
-
-out:
- return ret;
-}
-
-
-rpcsvc_auth_t *
-__rpcsvc_auth_get_handler (rpcsvc_request_t *req)
-{
- int ret = -1;
- struct rpcsvc_auth_list *auth = NULL;
- struct rpcsvc_auth_list *tmp = NULL;
- rpcsvc_t *svc = NULL;
-
- if (!req)
- return NULL;
-
- svc = rpcsvc_request_service (req);
- if (list_empty (&svc->authschemes)) {
- gf_log (GF_RPCSVC, GF_LOG_WARNING, "No authentication!");
- ret = 0;
- goto err;
- }
-
- list_for_each_entry_safe (auth, tmp, &svc->authschemes, authlist) {
- if (!auth->enable)
- continue;
- if (auth->auth->authnum == req->cred.flavour)
- goto err;
-
- }
-
- auth = NULL;
-err:
- if (auth)
- return auth->auth;
- else
- return NULL;
-}
-
-rpcsvc_auth_t *
-rpcsvc_auth_get_handler (rpcsvc_request_t *req)
-{
- rpcsvc_auth_t *auth = NULL;
-
- auth = __rpcsvc_auth_get_handler (req);
- if (auth)
- goto ret;
-
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "No auth handler: %d",
- req->cred.flavour);
-
- /* The requested scheme was not available so fall back the to one
- * scheme that will always be present.
- */
- req->cred.flavour = AUTH_NULL;
- req->verf.flavour = AUTH_NULL;
- auth = __rpcsvc_auth_get_handler (req);
-ret:
- return auth;
-}
-
-
-int
-rpcsvc_auth_request_init (rpcsvc_request_t *req)
-{
- int ret = -1;
- rpcsvc_auth_t *auth = NULL;
-
- if (!req)
- return -1;
-
- auth = rpcsvc_auth_get_handler (req);
- if (!auth)
- goto err;
- ret = 0;
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Auth handler: %s", auth->authname);
- if (!auth->authops->request_init)
- ret = auth->authops->request_init (req, auth->authprivate);
-
-err:
- return ret;
-}
-
-
-int
-rpcsvc_authenticate (rpcsvc_request_t *req)
-{
- int ret = RPCSVC_AUTH_REJECT;
- rpcsvc_auth_t *auth = NULL;
- int minauth = 0;
-
- if (!req)
- return ret;
-
- minauth = rpcsvc_request_prog_minauth (req);
- if (minauth > rpcsvc_request_cred_flavour (req)) {
- gf_log (GF_RPCSVC, GF_LOG_DEBUG, "Auth too weak");
- rpcsvc_request_set_autherr (req, AUTH_TOOWEAK);
- goto err;
- }
-
- auth = rpcsvc_auth_get_handler (req);
- if (!auth) {
- gf_log (GF_RPCSVC, GF_LOG_DEBUG, "No auth handler found");
- goto err;
- }
-
- if (auth->authops->authenticate)
- ret = auth->authops->authenticate (req, auth->authprivate);
-
-err:
- return ret;
-}
-
-
-int
-rpcsvc_auth_array (rpcsvc_t *svc, char *volname, int *autharr, int arrlen)
-{
- int count = 0;
- int gen = RPCSVC_AUTH_REJECT;
- int spec = RPCSVC_AUTH_REJECT;
- int final = RPCSVC_AUTH_REJECT;
- char *srchstr = NULL;
- char *valstr = NULL;
- gf_boolean_t boolval = _gf_false;
- int ret = 0;
-
- struct rpcsvc_auth_list *auth = NULL;
- struct rpcsvc_auth_list *tmp = NULL;
-
- if ((!svc) || (!autharr) || (!volname))
- return -1;
-
- memset (autharr, 0, arrlen * sizeof(int));
- if (list_empty (&svc->authschemes)) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "No authentication!");
- goto err;
- }
-
- list_for_each_entry_safe (auth, tmp, &svc->authschemes, authlist) {
- if (count >= arrlen)
- break;
-
- gen = gf_asprintf (&srchstr, "rpc-auth.%s", auth->name);
- if (gen == -1) {
- count = -1;
- goto err;
- }
-
- gen = RPCSVC_AUTH_REJECT;
- if (dict_get (svc->options, srchstr)) {
- ret = dict_get_str (svc->options, srchstr, &valstr);
- if (ret == 0) {
- ret = gf_string2boolean (valstr, &boolval);
- if (ret == 0) {
- if (boolval == _gf_true)
- gen = RPCSVC_AUTH_ACCEPT;
- } else
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Faile"
- "d to read auth val");
- } else
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Faile"
- "d to read auth val");
- }
-
- GF_FREE (srchstr);
- spec = gf_asprintf (&srchstr, "rpc-auth.%s.%s", auth->name,
- volname);
- if (spec == -1) {
- count = -1;
- goto err;
- }
-
- spec = RPCSVC_AUTH_DONTCARE;
- if (dict_get (svc->options, srchstr)) {
- ret = dict_get_str (svc->options, srchstr, &valstr);
- if (ret == 0) {
- ret = gf_string2boolean (valstr, &boolval);
- if (ret == 0) {
- if (boolval == _gf_true)
- spec = RPCSVC_AUTH_ACCEPT;
- else
- spec = RPCSVC_AUTH_REJECT;
- } else
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Faile"
- "d to read auth val");
- } else
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Faile"
- "d to read auth val");
- }
-
- GF_FREE (srchstr);
- final = rpcsvc_combine_gen_spec_volume_checks (gen, spec);
- if (final == RPCSVC_AUTH_ACCEPT) {
- autharr[count] = auth->auth->authnum;
- ++count;
- }
- }
-
-err:
- return count;
-}
-
-
-gid_t *
-rpcsvc_auth_unix_auxgids (rpcsvc_request_t *req, int *arrlen)
-{
- if ((!req) || (!arrlen))
- return NULL;
-
- if (req->cred.flavour != AUTH_UNIX)
- return NULL;
-
- *arrlen = req->auxgidcount;
- if (*arrlen == 0)
- return NULL;
-
- return &req->auxgids[0];
-}
-
diff --git a/xlators/nfs/lib/src/rpcsvc.c b/xlators/nfs/lib/src/rpcsvc.c
deleted file mode 100644
index 4ea008b882c..00000000000
--- a/xlators/nfs/lib/src/rpcsvc.c
+++ /dev/null
@@ -1,2770 +0,0 @@
-/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "rpcsvc.h"
-#include "rpc-socket.h"
-#include "dict.h"
-#include "logging.h"
-#include "byte-order.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "list.h"
-#include "xdr-rpc.h"
-#include "iobuf.h"
-#include "globals.h"
-
-#include <errno.h>
-#include <pthread.h>
-#include <stdlib.h>
-#include <rpc/rpc.h>
-#include <rpc/pmap_clnt.h>
-#include <arpa/inet.h>
-#include <rpc/xdr.h>
-#include <fnmatch.h>
-#include <stdarg.h>
-#include <stdio.h>
-
-
-#define rpcsvc_alloc_request(con, request) \
- do { \
- request = (rpcsvc_request_t *) mem_get ((con)->rxpool); \
- memset (request, 0, sizeof (rpcsvc_request_t)); \
- } while (0) \
-
-/* The generic event handler for every stage */
-void *
-rpcsvc_stage_proc (void *arg)
-{
- rpcsvc_stage_t *stg = (rpcsvc_stage_t *)arg;
-
- if (!stg)
- return NULL;
-
- event_dispatch (stg->eventpool);
- return NULL;
-}
-
-
-rpcsvc_stage_t *
-rpcsvc_stage_init (rpcsvc_t *svc)
-{
- rpcsvc_stage_t *stg = NULL;
- int ret = -1;
- size_t stacksize = RPCSVC_THREAD_STACK_SIZE;
- pthread_attr_t stgattr;
- unsigned int eventpoolsize = 0;
-
- if (!svc)
- return NULL;
-
- stg = GF_CALLOC (1, sizeof(*stg), gf_common_mt_rpcsvc_stage_t);
- if (!stg)
- return NULL;
-
- eventpoolsize = svc->memfactor * RPCSVC_EVENTPOOL_SIZE_MULT;
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "event pool size: %d", eventpoolsize);
- stg->eventpool = event_pool_new (eventpoolsize);
- if (!stg->eventpool)
- goto free_stg;
-
- pthread_attr_init (&stgattr);
- ret = pthread_attr_setstacksize (&stgattr, stacksize);
- if (ret == EINVAL)
- gf_log (GF_RPCSVC, GF_LOG_WARNING,
- "Using default thread stack size");
-
- ret = pthread_create (&stg->tid, &stgattr, rpcsvc_stage_proc,
- (void *)stg);
- if (ret != 0) {
- ret = -1;
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Stage creation failed");
- goto free_stg;
- }
-
- stg->svc = svc;
- ret = 0;
-free_stg:
- if (ret == -1) {
- GF_FREE (stg);
- stg = NULL;
- }
-
- return stg;
-}
-
-
-int
-rpcsvc_init_options (rpcsvc_t *svc, dict_t *options)
-{
- svc->memfactor = RPCSVC_DEFAULT_MEMFACTOR;
- return 0;
-}
-
-
-/* The global RPC service initializer.
- * Starts up the stages and then waits for RPC program registrations
- * to come in.
- */
-rpcsvc_t *
-rpcsvc_init (glusterfs_ctx_t *ctx, dict_t *options)
-{
- rpcsvc_t *svc = NULL;
- int ret = -1;
-
- if ((!ctx) || (!options))
- return NULL;
-
- svc = GF_CALLOC (1, sizeof (*svc), gf_common_mt_rpcsvc_t);
- if (!svc)
- return NULL;
-
- pthread_mutex_init (&svc->rpclock, NULL);
- INIT_LIST_HEAD (&svc->stages);
- INIT_LIST_HEAD (&svc->authschemes);
-
- ret = rpcsvc_init_options (svc, options);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to init options");
- goto free_svc;
- }
-
- ret = rpcsvc_auth_init (svc, options);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to init "
- "authentication");
- goto free_svc;
- }
-
- ret = -1;
- svc->defaultstage = rpcsvc_stage_init (svc);
- if (!svc->defaultstage) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR,"RPC service init failed.");
- goto free_svc;
- }
- svc->options = options;
- svc->ctx = ctx;
- gf_log (GF_RPCSVC, GF_LOG_DEBUG, "RPC service inited.");
-
- ret = 0;
-free_svc:
- if (ret == -1) {
- GF_FREE (svc);
- svc = NULL;
- }
-
- return svc;
-}
-
-
-/* Once multi-threaded support is complete, we'll be able to round-robin
- * the various incoming connections over the many available stages. This
- * function selects one from among all the stages.
- */
-rpcsvc_stage_t *
-rpcsvc_select_stage (rpcsvc_t *rpcservice)
-{
- if (!rpcservice)
- return NULL;
-
- return rpcservice->defaultstage;
-}
-
-
-int
-rpcsvc_conn_peer_check_search (dict_t *options, char *pattern, char *clstr)
-{
- int ret = -1;
- char *addrtok = NULL;
- char *addrstr = NULL;
- char *svptr = NULL;
-
- if ((!options) || (!clstr))
- return -1;
-
- if (!dict_get (options, pattern))
- return -1;
-
- ret = dict_get_str (options, pattern, &addrstr);
- if (ret < 0) {
- ret = -1;
- goto err;
- }
-
- if (!addrstr) {
- ret = -1;
- goto err;
- }
-
- addrtok = strtok_r (addrstr, ",", &svptr);
- while (addrtok) {
-
- ret = fnmatch (addrtok, clstr, FNM_CASEFOLD);
- if (ret == 0)
- goto err;
-
- addrtok = strtok_r (NULL, ",", &svptr);
- }
-
- ret = -1;
-err:
-
- return ret;
-}
-
-
-int
-rpcsvc_conn_peer_check_allow (dict_t *options, char *volname, char *clstr)
-{
- int ret = RPCSVC_AUTH_DONTCARE;
- char *srchstr = NULL;
- char globalrule[] = "rpc-auth.addr.allow";
-
- if ((!options) || (!clstr))
- return ret;
-
- /* If volname is NULL, then we're searching for the general rule to
- * determine the current address in clstr is allowed or not for all
- * subvolumes.
- */
- if (volname) {
- ret = gf_asprintf (&srchstr, "rpc-auth.addr.%s.allow", volname);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
- ret = RPCSVC_AUTH_DONTCARE;
- goto out;
- }
- } else
- srchstr = globalrule;
-
- ret = rpcsvc_conn_peer_check_search (options, srchstr, clstr);
- if (volname)
- GF_FREE (srchstr);
-
- if (ret == 0)
- ret = RPCSVC_AUTH_ACCEPT;
- else
- ret = RPCSVC_AUTH_DONTCARE;
-out:
- return ret;
-}
-
-int
-rpcsvc_conn_peer_check_reject (dict_t *options, char *volname, char *clstr)
-{
- int ret = RPCSVC_AUTH_DONTCARE;
- char *srchstr = NULL;
- char generalrule[] = "rpc-auth.addr.reject";
-
- if ((!options) || (!clstr))
- return ret;
-
- if (volname) {
- ret = gf_asprintf (&srchstr, "rpc-auth.addr.%s.reject", volname);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
- ret = RPCSVC_AUTH_REJECT;
- goto out;
- }
- } else
- srchstr = generalrule;
-
- ret = rpcsvc_conn_peer_check_search (options, srchstr, clstr);
- if (volname)
- GF_FREE (srchstr);
-
- if (ret == 0)
- ret = RPCSVC_AUTH_REJECT;
- else
- ret = RPCSVC_AUTH_DONTCARE;
-out:
- return ret;
-}
-
-
-/* This function tests the results of the allow rule and the reject rule to
- * combine them into a single result that can be used to determine if the
- * connection should be allowed to proceed.
- * Heres the test matrix we need to follow in this function.
- *
- * A - Allow, the result of the allow test. Never returns R.
- * R - Reject, result of the reject test. Never returns A.
- * Both can return D or dont care if no rule was given.
- *
- * | @allow | @reject | Result |
- * | A | R | R |
- * | D | D | D |
- * | A | D | A |
- * | D | R | R |
- */
-int
-rpcsvc_combine_allow_reject_volume_check (int allow, int reject)
-{
- int final = RPCSVC_AUTH_REJECT;
-
- /* If allowed rule allows but reject rule rejects, we stay cautious
- * and reject. */
- if ((allow == RPCSVC_AUTH_ACCEPT) && (reject == RPCSVC_AUTH_REJECT))
- final = RPCSVC_AUTH_REJECT;
- /* if both are dont care, that is user did not specify for either allow
- * or reject, we leave it up to the general rule to apply, in the hope
- * that there is one.
- */
- else if ((allow == RPCSVC_AUTH_DONTCARE) &&
- (reject == RPCSVC_AUTH_DONTCARE))
- final = RPCSVC_AUTH_DONTCARE;
- /* If one is dont care, the other one applies. */
- else if ((allow == RPCSVC_AUTH_ACCEPT) &&
- (reject == RPCSVC_AUTH_DONTCARE))
- final = RPCSVC_AUTH_ACCEPT;
- else if ((allow == RPCSVC_AUTH_DONTCARE) &&
- (reject == RPCSVC_AUTH_REJECT))
- final = RPCSVC_AUTH_REJECT;
-
- return final;
-}
-
-
-/* Combines the result of the general rule test against, the specific rule
- * to determine final permission for the client's address.
- *
- * | @gen | @spec | Result |
- * | A | A | A |
- * | A | R | R |
- * | A | D | A |
- * | D | A | A |
- * | D | R | R |
- * | D | D | D |
- * | R | A | A |
- * | R | D | R |
- * | R | R | R |
- */
-int
-rpcsvc_combine_gen_spec_addr_checks (int gen, int spec)
-{
- int final = RPCSVC_AUTH_REJECT;
-
- if ((gen == RPCSVC_AUTH_ACCEPT) && (spec == RPCSVC_AUTH_ACCEPT))
- final = RPCSVC_AUTH_ACCEPT;
- else if ((gen == RPCSVC_AUTH_ACCEPT) && (spec == RPCSVC_AUTH_REJECT))
- final = RPCSVC_AUTH_REJECT;
- else if ((gen == RPCSVC_AUTH_ACCEPT) && (spec == RPCSVC_AUTH_DONTCARE))
- final = RPCSVC_AUTH_ACCEPT;
- else if ((gen == RPCSVC_AUTH_DONTCARE) && (spec == RPCSVC_AUTH_ACCEPT))
- final = RPCSVC_AUTH_ACCEPT;
- else if ((gen == RPCSVC_AUTH_DONTCARE) && (spec == RPCSVC_AUTH_REJECT))
- final = RPCSVC_AUTH_REJECT;
- else if ((gen == RPCSVC_AUTH_DONTCARE) && (spec== RPCSVC_AUTH_DONTCARE))
- final = RPCSVC_AUTH_DONTCARE;
- else if ((gen == RPCSVC_AUTH_REJECT) && (spec == RPCSVC_AUTH_ACCEPT))
- final = RPCSVC_AUTH_ACCEPT;
- else if ((gen == RPCSVC_AUTH_REJECT) && (spec == RPCSVC_AUTH_DONTCARE))
- final = RPCSVC_AUTH_REJECT;
- else if ((gen == RPCSVC_AUTH_REJECT) && (spec == RPCSVC_AUTH_REJECT))
- final = RPCSVC_AUTH_REJECT;
-
- return final;
-}
-
-
-
-/* Combines the result of the general rule test against, the specific rule
- * to determine final test for the connection coming in for a given volume.
- *
- * | @gen | @spec | Result |
- * | A | A | A |
- * | A | R | R |
- * | A | D | A |
- * | D | A | A |
- * | D | R | R |
- * | D | D | R |, special case, we intentionally disallow this.
- * | R | A | A |
- * | R | D | R |
- * | R | R | R |
- */
-int
-rpcsvc_combine_gen_spec_volume_checks (int gen, int spec)
-{
- int final = RPCSVC_AUTH_REJECT;
-
- if ((gen == RPCSVC_AUTH_ACCEPT) && (spec == RPCSVC_AUTH_ACCEPT))
- final = RPCSVC_AUTH_ACCEPT;
- else if ((gen == RPCSVC_AUTH_ACCEPT) && (spec == RPCSVC_AUTH_REJECT))
- final = RPCSVC_AUTH_REJECT;
- else if ((gen == RPCSVC_AUTH_ACCEPT) && (spec == RPCSVC_AUTH_DONTCARE))
- final = RPCSVC_AUTH_ACCEPT;
- else if ((gen == RPCSVC_AUTH_DONTCARE) && (spec == RPCSVC_AUTH_ACCEPT))
- final = RPCSVC_AUTH_ACCEPT;
- else if ((gen == RPCSVC_AUTH_DONTCARE) && (spec == RPCSVC_AUTH_REJECT))
- final = RPCSVC_AUTH_REJECT;
- /* On no rule, we reject. */
- else if ((gen == RPCSVC_AUTH_DONTCARE) && (spec== RPCSVC_AUTH_DONTCARE))
- final = RPCSVC_AUTH_REJECT;
- else if ((gen == RPCSVC_AUTH_REJECT) && (spec == RPCSVC_AUTH_ACCEPT))
- final = RPCSVC_AUTH_ACCEPT;
- else if ((gen == RPCSVC_AUTH_REJECT) && (spec == RPCSVC_AUTH_DONTCARE))
- final = RPCSVC_AUTH_REJECT;
- else if ((gen == RPCSVC_AUTH_REJECT) && (spec == RPCSVC_AUTH_REJECT))
- final = RPCSVC_AUTH_REJECT;
-
- return final;
-}
-
-
-int
-rpcsvc_conn_peer_check_name (dict_t *options, char *volname,
- rpcsvc_conn_t *conn)
-{
- int ret = RPCSVC_AUTH_REJECT;
- int aret = RPCSVC_AUTH_REJECT;
- int rjret = RPCSVC_AUTH_REJECT;
- char clstr[RPCSVC_PEER_STRLEN];
-
- if (!conn)
- return ret;
-
- ret = rpcsvc_conn_peername (conn, clstr, RPCSVC_PEER_STRLEN);
- if (ret != 0) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to get remote addr: "
- "%s", gai_strerror (ret));
- ret = RPCSVC_AUTH_REJECT;
- goto err;
- }
-
- aret = rpcsvc_conn_peer_check_allow (options, volname, clstr);
- rjret = rpcsvc_conn_peer_check_reject (options, volname, clstr);
-
- ret = rpcsvc_combine_allow_reject_volume_check (aret, rjret);
-
-err:
- return ret;
-}
-
-
-int
-rpcsvc_conn_peer_check_addr (dict_t *options, char *volname,rpcsvc_conn_t *conn)
-{
- int ret = RPCSVC_AUTH_REJECT;
- int aret = RPCSVC_AUTH_DONTCARE;
- int rjret = RPCSVC_AUTH_REJECT;
- char clstr[RPCSVC_PEER_STRLEN];
-
- if (!conn)
- return ret;
-
- ret = rpcsvc_conn_peeraddr (conn, clstr, RPCSVC_PEER_STRLEN, NULL, 0);
- if (ret != 0) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to get remote addr: "
- "%s", gai_strerror (ret));
- ret = RPCSVC_AUTH_REJECT;
- goto err;
- }
-
- aret = rpcsvc_conn_peer_check_allow (options, volname, clstr);
- rjret = rpcsvc_conn_peer_check_reject (options, volname, clstr);
-
- ret = rpcsvc_combine_allow_reject_volume_check (aret, rjret);
-err:
- return ret;
-}
-
-
-int
-rpcsvc_conn_check_volume_specific (dict_t *options, char *volname,
- rpcsvc_conn_t *conn)
-{
- int namechk = RPCSVC_AUTH_REJECT;
- int addrchk = RPCSVC_AUTH_REJECT;
- gf_boolean_t namelookup = _gf_true;
- char *namestr = NULL;
- int ret = 0;
-
- if ((!options) || (!volname) || (!conn))
- return RPCSVC_AUTH_REJECT;
-
- /* Enabled by default */
- if ((dict_get (options, "rpc-auth.addr.namelookup"))) {
- ret = dict_get_str (options, "rpc-auth.addr.namelookup"
- , &namestr);
- if (ret == 0)
- ret = gf_string2boolean (namestr, &namelookup);
- }
-
- /* We need two separate checks because the rules with addresses in them
- * can be network addresses which can be general and names can be
- * specific which will over-ride the network address rules.
- */
- if (namelookup)
- namechk = rpcsvc_conn_peer_check_name (options, volname, conn);
- addrchk = rpcsvc_conn_peer_check_addr (options, volname, conn);
-
- if (namelookup)
- ret = rpcsvc_combine_gen_spec_addr_checks (addrchk, namechk);
- else
- ret = addrchk;
-
- return ret;
-}
-
-
-int
-rpcsvc_conn_check_volume_general (dict_t *options, rpcsvc_conn_t *conn)
-{
- int addrchk = RPCSVC_AUTH_REJECT;
- int namechk = RPCSVC_AUTH_REJECT;
- gf_boolean_t namelookup = _gf_true;
- char *namestr = NULL;
- int ret = 0;
-
- if ((!options) || (!conn))
- return RPCSVC_AUTH_REJECT;
-
- /* Enabled by default */
- if ((dict_get (options, "rpc-auth.addr.namelookup"))) {
- ret = dict_get_str (options, "rpc-auth.addr.namelookup"
- , &namestr);
- if (ret == 0)
- ret = gf_string2boolean (namestr, &namelookup);
- }
-
- /* We need two separate checks because the rules with addresses in them
- * can be network addresses which can be general and names can be
- * specific which will over-ride the network address rules.
- */
- if (namelookup)
- namechk = rpcsvc_conn_peer_check_name (options, NULL, conn);
- addrchk = rpcsvc_conn_peer_check_addr (options, NULL, conn);
-
- if (namelookup)
- ret = rpcsvc_combine_gen_spec_addr_checks (addrchk, namechk);
- else
- ret = addrchk;
-
- return ret;
-}
-
-int
-rpcsvc_conn_peer_check (dict_t *options, char *volname, rpcsvc_conn_t *conn)
-{
- int general_chk = RPCSVC_AUTH_REJECT;
- int specific_chk = RPCSVC_AUTH_REJECT;
-
- if ((!options) || (!volname) || (!conn))
- return RPCSVC_AUTH_REJECT;
-
- general_chk = rpcsvc_conn_check_volume_general (options, conn);
- specific_chk = rpcsvc_conn_check_volume_specific (options, volname,
- conn);
-
- return rpcsvc_combine_gen_spec_volume_checks (general_chk,specific_chk);
-}
-
-
-char *
-rpcsvc_volume_allowed (dict_t *options, char *volname)
-{
- char globalrule[] = "rpc-auth.addr.allow";
- char *srchstr = NULL;
- char *addrstr = NULL;
- int ret = -1;
-
- if ((!options) || (!volname))
- return NULL;
-
- ret = gf_asprintf (&srchstr, "rpc-auth.addr.%s.allow", volname);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
- goto out;
- }
-
- if (!dict_get (options, srchstr)) {
- GF_FREE (srchstr);
- srchstr = globalrule;
- ret = dict_get_str (options, srchstr, &addrstr);
- } else
- ret = dict_get_str (options, srchstr, &addrstr);
-
-out:
- return addrstr;
-}
-
-
-/* Initialize the core of a connection */
-rpcsvc_conn_t *
-rpcsvc_conn_init (rpcsvc_t *svc, rpcsvc_program_t *prog, int sockfd)
-{
- rpcsvc_conn_t *conn = NULL;
- int ret = -1;
- unsigned int poolcount = 0;
-
- conn = GF_CALLOC (1, sizeof(*conn), gf_common_mt_rpcsvc_conn_t);
- if (!conn) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "memory allocation failed");
- return NULL;
- }
-
- conn->sockfd = sockfd;
- conn->program = (void *)prog;
- INIT_LIST_HEAD (&conn->txbufs);
- poolcount = RPCSVC_POOLCOUNT_MULT * svc->memfactor;
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "tx pool: %d", poolcount);
- conn->txpool = mem_pool_new (rpcsvc_txbuf_t, poolcount);
- if (!conn->txpool) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "mem pool allocation failed");
- goto free_conn;
- }
-
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "rx pool: %d", poolcount);
- conn->rxpool = mem_pool_new (rpcsvc_request_t, poolcount);
- if (!conn->rxpool) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "mem pool allocation failed");
- goto free_txp;
- }
-
- /* Cannot consider a connection connected unless the user of this
- * connection decides it is ready to use. It is possible that we have
- * to free this connection soon after. That free will not happpen
- * unless the state is disconnected.
- */
- conn->connstate = RPCSVC_CONNSTATE_DISCONNECTED;
- pthread_mutex_init (&conn->connlock, NULL);
- conn->connref = 0;
- gf_log (GF_RPCSVC, GF_LOG_DEBUG, "New connection inited: sockfd: %d",
- sockfd);
-
- ret = 0;
-free_txp:
- if (ret == -1)
- mem_pool_destroy (conn->txpool);
-
-free_conn:
- if (ret == -1) {
- GF_FREE (conn);
- conn = NULL;
- }
-
- return conn;
-}
-
-
-void
-rpcsvc_conn_destroy (rpcsvc_conn_t *conn)
-{
- mem_pool_destroy (conn->txpool);
- mem_pool_destroy (conn->rxpool);
-
- if (conn->program->conn_destroy)
- conn->program->conn_destroy (conn->program->private, conn);
-
- /* Need to destory record state, txlists etc. */
- GF_FREE (conn);
- gf_log (GF_RPCSVC, GF_LOG_DEBUG, "Connection destroyed");
-}
-
-
-int
-__rpcsvc_conn_unref (rpcsvc_conn_t *conn)
-{
- --conn->connref;
- return conn->connref;
-}
-
-
-void
-__rpcsvc_conn_deinit (rpcsvc_conn_t *conn)
-{
- if (!conn)
- return;
-
- if ((conn->stage) && (conn->stage->eventpool)) {
- event_unregister (conn->stage->eventpool, conn->sockfd,
- conn->eventidx);
- }
-
- if (rpcsvc_conn_check_active (conn)) {
- gf_log (GF_RPCSVC, GF_LOG_DEBUG, "Connection de-activated:"
- " sockfd: %d", conn->sockfd);
- conn->connstate = RPCSVC_CONNSTATE_DISCONNECTED;
- }
-
- if (conn->sockfd != -1) {
- close (conn->sockfd);
- conn->sockfd = -1;
- }
-}
-
-
-void
-rpcsvc_conn_deinit (rpcsvc_conn_t *conn)
-{
- int ref = 0;
-
- if (!conn)
- return;
-
- pthread_mutex_lock (&conn->connlock);
- {
- __rpcsvc_conn_deinit (conn);
- ref = __rpcsvc_conn_unref (conn);
- }
- pthread_mutex_unlock (&conn->connlock);
-
- if (ref == 0)
- rpcsvc_conn_destroy (conn);
-
- return;
-}
-
-
-void
-rpcsvc_conn_unref (rpcsvc_conn_t *conn)
-{
- int ref = 0;
- if (!conn)
- return;
-
- pthread_mutex_lock (&conn->connlock);
- {
- ref = __rpcsvc_conn_unref (conn);
- }
- pthread_mutex_unlock (&conn->connlock);
-
- if (ref == 0)
- rpcsvc_conn_destroy (conn);
-}
-
-
-int
-rpcsvc_conn_active (rpcsvc_conn_t *conn)
-{
- int status = 0;
-
- if (!conn)
- return 0;
-
- pthread_mutex_lock (&conn->connlock);
- {
- status = rpcsvc_conn_check_active (conn);
- }
- pthread_mutex_unlock (&conn->connlock);
-
- return status;
-}
-
-
-
-void
-rpcsvc_conn_ref (rpcsvc_conn_t *conn)
-{
- if (!conn)
- return;
-
- pthread_mutex_lock (&conn->connlock);
- {
- ++conn->connref;
- }
- pthread_mutex_unlock (&conn->connlock);
-
- return;
-}
-
-
-void
-rpcsvc_conn_state_init (rpcsvc_conn_t *conn)
-{
- if (!conn)
- return;
-
- ++conn->connref;
- conn->connstate = RPCSVC_CONNSTATE_CONNECTED;
-}
-
-/* Builds a rpcsvc_conn_t with the aim of listening on it.
- */
-rpcsvc_conn_t *
-rpcsvc_conn_listen_init (rpcsvc_t *svc, rpcsvc_program_t *newprog)
-{
- rpcsvc_conn_t *conn = NULL;
- int sock = -1;
-
- if (!newprog)
- return NULL;
-
- sock = rpcsvc_socket_listen (newprog->progaddrfamily, newprog->proghost,
- newprog->progport);
- if (sock == -1)
- goto err;
-
- conn = rpcsvc_conn_init (svc, newprog, sock);
- if (!conn)
- goto sock_close_err;
-
- rpcsvc_conn_state_init (conn);
-sock_close_err:
- if (!conn)
- close (sock);
-
-err:
- return conn;
-}
-
-void
-rpcsvc_record_init (rpcsvc_record_state_t *rs, struct iobuf_pool *pool)
-{
- if (!rs)
- return;
-
- rs->state = RPCSVC_READ_FRAGHDR;
- rs->vecstate = 0;
- rs->remainingfraghdr = RPCSVC_FRAGHDR_SIZE;
- rs->remainingfrag = 0;
- rs->fragsize = 0;
- rs->recordsize = 0;
- rs->islastfrag = 0;
-
- /* If the rs preserves a ref to the iob used by the previous request,
- * we must unref it here to prevent memory leak.
- * If program actor wanted to keep that memory around, it should've
- * refd it on entry into the actor.
- */
- if (rs->activeiob)
- iobuf_unref (rs->activeiob);
-
- if (rs->vectoriob) {
- iobuf_unref (rs->vectoriob);
- rs->vectoriob = NULL;
- }
-
- rs->activeiob = iobuf_get (pool);
- rs->fragcurrent = iobuf_ptr (rs->activeiob);
-
- memset (rs->fragheader, 0, RPCSVC_FRAGHDR_SIZE);
- rs->hdrcurrent = &rs->fragheader[0];
-
-}
-
-
-int
-rpcsvc_conn_privport_check (rpcsvc_t *svc, char *volname, rpcsvc_conn_t *conn)
-{
- struct sockaddr_in sa;
- int ret = RPCSVC_AUTH_REJECT;
- socklen_t sasize = sizeof (sa);
- char *srchstr = NULL;
- char *valstr = NULL;
- int globalinsecure = RPCSVC_AUTH_REJECT;
- int exportinsecure = RPCSVC_AUTH_DONTCARE;
- uint16_t port = 0;
- gf_boolean_t insecure = _gf_false;
-
- if ((!svc) || (!volname) || (!conn))
- return ret;
-
- ret = rpcsvc_conn_peeraddr (conn, NULL, 0, (struct sockaddr *)&sa,
- sasize);
- if (ret != 0) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to get peer addr: %s",
- gai_strerror (ret));
- ret = RPCSVC_AUTH_REJECT;
- goto err;
- }
-
- port = ntohs (sa.sin_port);
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Client port: %d", (int)port);
- /* If the port is already a privileged one, dont bother with checking
- * options.
- */
- if (port <= 1024) {
- ret = RPCSVC_AUTH_ACCEPT;
- goto err;
- }
-
- /* Disabled by default */
- if ((dict_get (svc->options, "rpc-auth.ports.insecure"))) {
- ret = dict_get_str (svc->options, "rpc-auth.ports.insecure"
- , &srchstr);
- if (ret == 0) {
- ret = gf_string2boolean (srchstr, &insecure);
- if (ret == 0) {
- if (insecure == _gf_true)
- globalinsecure = RPCSVC_AUTH_ACCEPT;
- } else
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to"
- " read rpc-auth.ports.insecure value");
- } else
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to"
- " read rpc-auth.ports.insecure value");
- }
-
- /* Disabled by default */
- ret = gf_asprintf (&srchstr, "rpc-auth.ports.%s.insecure", volname);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
- ret = RPCSVC_AUTH_REJECT;
- goto err;
- }
-
- if (dict_get (svc->options, srchstr)) {
- ret = dict_get_str (svc->options, srchstr, &valstr);
- if (ret == 0) {
- ret = gf_string2boolean (srchstr, &insecure);
- if (ret == 0) {
- if (insecure == _gf_true)
- exportinsecure = RPCSVC_AUTH_ACCEPT;
- else
- exportinsecure = RPCSVC_AUTH_REJECT;
- } else
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to"
- " read rpc-auth.ports.insecure value");
- } else
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to"
- " read rpc-auth.ports.insecure value");
- }
-
- ret = rpcsvc_combine_gen_spec_volume_checks (globalinsecure,
- exportinsecure);
- if (ret == RPCSVC_AUTH_ACCEPT)
- gf_log (GF_RPCSVC, GF_LOG_DEBUG, "Unprivileged port allowed");
- else
- gf_log (GF_RPCSVC, GF_LOG_DEBUG, "Unprivileged port not"
- " allowed");
-
-err:
- return ret;
-}
-
-/* Inits a rpcsvc_conn_t after accepting the connection.
- */
-rpcsvc_conn_t *
-rpcsvc_conn_accept_init (rpcsvc_t *svc, int listenfd,
- rpcsvc_program_t *destprog)
-{
- rpcsvc_conn_t *newconn = NULL;
- int sock = -1;
- int ret = -1;
-
- sock = rpcsvc_socket_accept (listenfd);
- if (sock == -1)
- goto err;
-
- newconn = rpcsvc_conn_init (svc, destprog, sock);
- if (!newconn) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to init conn object");
- ret = -1;
- goto err;
- }
-
- rpcsvc_record_init (&newconn->rstate, svc->ctx->iobuf_pool);
- rpcsvc_conn_state_init (newconn);
- if (destprog->conn_init)
- destprog->conn_init (destprog->private, newconn);
- ret = 0;
-
-err:
- if (ret == -1)
- close (sock);
-
- return newconn;
-}
-
-
-/* Once the connection has been created, we need to associate it with
- * a stage so that the selected stage will handle the event on this connection.
- * This function also allows the caller to decide which handler should
- * be executed in the context of the stage, and also which specific events
- * should be handed to the handler when running in this particular stage.
- */
-int
-rpcsvc_stage_conn_associate (rpcsvc_stage_t *stg, rpcsvc_conn_t *conn,
- event_handler_t handler, void *data)
-{
- int ret = -1;
-
- if ((!stg) || (!conn))
- return -1;
-
- conn->stage = stg;
- conn->eventidx = event_register (stg->eventpool, conn->sockfd, handler,
- data, 1, 0);
- if (conn->eventidx == -1)
- goto err;
-
- ret = 0;
-err:
- return ret;
-}
-
-
-/* Depending on the state we're in, return the size of the next read request. */
-size_t
-rpcsvc_record_read_size (rpcsvc_record_state_t *rs)
-{
- size_t toread = -1;
-
- if (!rs)
- return -1;
-
- if (rpcsvc_record_readfraghdr (rs))
- toread = rs->remainingfraghdr;
- else if (rpcsvc_record_readfrag (rs))
- toread = rs->remainingfrag;
- else
- toread = RPCSVC_CONN_READ;
-
- return toread;
-}
-
-
-uint32_t
-rpcsvc_record_extract_fraghdr (char *fraghdr)
-{
- uint32_t hdr = 0;
- if (!fraghdr)
- return 0;
-
- memcpy ((void *)&hdr, fraghdr, sizeof (hdr));
-
- hdr = ntohl (hdr);
- return hdr;
-}
-
-
-ssize_t
-rpcsvc_record_read_complete_fraghdr (rpcsvc_record_state_t *rs,ssize_t dataread)
-{
- uint32_t remhdr = 0;
- char *fraghdrstart = NULL;
- uint32_t fraghdr = 0;
-
- fraghdrstart = &rs->fragheader[0];
- remhdr = rs->remainingfraghdr;
- fraghdr = rpcsvc_record_extract_fraghdr (fraghdrstart);
- rs->fragsize = RPCSVC_FRAGSIZE (fraghdr);
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Received fragment size: %d",
- rs->fragsize);
- if (rpcsvc_record_vectored (rs)) {
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Vectored RPC header,"
- " remaining: %d", RPCSVC_BARERPC_MSGSZ);
- rs->remainingfrag = RPCSVC_BARERPC_MSGSZ;
- } else {
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Regular RPC header,"
- " remaining: %d", rs->fragsize);
- rs->remainingfrag = rs->fragsize;
- }
-
- rs->state = RPCSVC_READ_FRAG;
- dataread -= remhdr;
- rs->remainingfraghdr -= remhdr;
- rs->islastfrag = RPCSVC_LASTFRAG (fraghdr);
-
- return dataread;
-}
-
-
-ssize_t
-rpcsvc_record_read_partial_fraghdr (rpcsvc_record_state_t *rs, ssize_t dataread)
-{
-
- /* In case we got less than even the remaining header size,
- * we need to consume it all and wait for remaining frag hdr
- * bytes to come in.
- */
- rs->remainingfraghdr -= dataread;
- rpcsvc_record_update_currenthdr (rs, dataread);
- dataread = 0;
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Fragment header remaining: %d",
- rs->remainingfraghdr);
-
- return dataread;
-}
-
-
-ssize_t
-rpcsvc_record_update_fraghdr (rpcsvc_record_state_t *rs, ssize_t dataread)
-{
- if ((!rs) || (dataread <= 0))
- return -1;
-
- /* Why are we even here, we're not supposed to be in the fragment
- * header processing state.
- */
- if (!rpcsvc_record_readfraghdr(rs)) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "record state inconsistent"
- ": request to update frag header when state is not"
- "RPCSVC_READ_FRAGHDR");
- return -1;
- }
-
- /* Again, if header has been read then the state member above should've
- * been different, this is crazy. We should not be here.
- */
- if (rs->remainingfraghdr == 0) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "record state inconsistent"
- ": request to update frag header when frag header"
- "remaining is 0.");
- return -1;
- }
-
- /* We've definitely got the full header now and may be even more. */
- if (dataread >= rs->remainingfraghdr)
- dataread = rpcsvc_record_read_complete_fraghdr (rs, dataread);
- else
- dataread = rpcsvc_record_read_partial_fraghdr (rs, dataread);
-
- return dataread;
-}
-
-ssize_t
-rpcsvc_record_read_complete_frag (rpcsvc_record_state_t *rs, ssize_t dataread)
-{
- uint32_t remfrag;
-
- /* Since the frag is now complete, change the state to the next
- * one, i.e. to read the header of the next fragment.
- */
- remfrag = rs->remainingfrag;
- rs->state = RPCSVC_READ_FRAGHDR;
- dataread -= remfrag;
-
- /* This will be 0 now. */
- rs->remainingfrag -= remfrag;
-
- /* Now that the fragment is complete, we must update the
- * record size. Recall that fragsize was got from the frag
- * header.
- */
- rs->recordsize += rs->fragsize;
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Fragment remaining: %d",
- rs->remainingfrag);
-
- return dataread;
-}
-
-
-ssize_t
-rpcsvc_record_read_partial_frag (rpcsvc_record_state_t *rs, ssize_t dataread)
-{
- /* Just take whatever has come through the current network buffer. */
- rs->remainingfrag -= dataread;
-
- rpcsvc_record_update_currentfrag (rs, dataread);
- /* Since we know we're consuming the whole buffer from dataread
- * simply setting to 0 zero is fine.
- */
- dataread = 0;
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Fragment remaining: %d",
- rs->remainingfrag);
- return dataread;
-}
-
-
-ssize_t
-rpcsvc_record_update_frag (rpcsvc_record_state_t *rs, ssize_t dataread)
-{
- if ((!rs) || (dataread <= 0))
- return -1;
-
- if (!rpcsvc_record_readfrag (rs)) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "record state inconsistent"
- ": request to update fragment when record state is not"
- "RPCSVC_READ_FRAG.");
- return -1;
- }
-
- if (rs->remainingfrag == 0) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "record state inconsistent"
- ": request to update fragment when there is no fragment"
- " data remaining to be read.");
- return -1;
- }
-
- /* We've read in more data than the current fragment requires. */
- if (dataread >= rs->remainingfrag)
- dataread = rpcsvc_record_read_complete_frag (rs, dataread);
- else
- dataread = rpcsvc_record_read_partial_frag (rs, dataread);
-
- return dataread;
-}
-
-
-/* This needs to change to returning errors, since
- * we need to return RPC specific error messages when some
- * of the pointers below are NULL.
- */
-rpcsvc_actor_t *
-rpcsvc_program_actor (rpcsvc_conn_t *conn, rpcsvc_request_t *req)
-{
- rpcsvc_program_t *program = NULL;
- int err = SYSTEM_ERR;
- rpcsvc_actor_t *actor = NULL;
-
- if ((!conn) || (!req))
- goto err;
-
- program = (rpcsvc_program_t *)conn->program;
- if (!program)
- goto err;
-
- if (req->prognum != program->prognum) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "RPC program not available");
- err = PROG_UNAVAIL;
- goto err;
- }
-
- if (!program->actors) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "RPC System error");
- err = SYSTEM_ERR;
- goto err;
- }
-
- if (req->progver != program->progver) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "RPC program version not"
- " available");
- err = PROG_MISMATCH;
- goto err;
- }
-
- if ((req->procnum < 0) || (req->procnum >= program->numactors)) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "RPC Program procedure not"
- " available");
- err = PROC_UNAVAIL;
- goto err;
- }
-
- actor = &program->actors[req->procnum];
- if (!actor->actor) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "RPC Program procedure not"
- " available");
- err = PROC_UNAVAIL;
- actor = NULL;
- goto err;
- }
-
- err = SUCCESS;
- gf_log (GF_RPCSVC, GF_LOG_DEBUG, "Actor found: %s - %s",
- program->progname, actor->procname);
-err:
- if (req)
- req->rpc_err = err;
-
- return actor;
-}
-
-
-rpcsvc_txbuf_t *
-rpcsvc_init_txbuf (rpcsvc_conn_t *conn, struct iovec msg, struct iobuf *iob,
- struct iobref *iobref, int txflags)
-{
- rpcsvc_txbuf_t *txbuf = NULL;
-
- txbuf = (rpcsvc_txbuf_t *) mem_get(conn->txpool);
- if (!txbuf) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to get txbuf");
- return NULL;
- }
-
- memset (txbuf, 0, sizeof (*txbuf));
- INIT_LIST_HEAD (&txbuf->txlist);
- txbuf->buf = msg;
-
- /* If it was required, this iob must've been ref'd already
- * so I dont have to bother here.
- */
- txbuf->iob = iob;
- txbuf->iobref = iobref;
- txbuf->offset = 0;
- txbuf->txbehave = txflags;
-
- return txbuf;
-}
-
-
-int
-rpcsvc_conn_append_txlist (rpcsvc_conn_t *conn, struct iovec msg,
- struct iobuf *iob, int txflags)
-{
- rpcsvc_txbuf_t *txbuf = NULL;
-
- if ((!conn) || (!msg.iov_base) || (!iob))
- return -1;
-
- txbuf = rpcsvc_init_txbuf (conn, msg, iob, NULL, txflags);
- if (!txbuf)
- return -1;
-
- list_add_tail (&txbuf->txlist, &conn->txbufs);
- return 0;
-}
-
-
-void
-rpcsvc_set_lastfrag (uint32_t *fragsize) {
- (*fragsize) |= 0x80000000U;
-}
-
-void
-rpcsvc_set_frag_header_size (uint32_t size, char *haddr)
-{
- size = htonl (size);
- memcpy (haddr, &size, sizeof (size));
-}
-
-void
-rpcsvc_set_last_frag_header_size (uint32_t size, char *haddr)
-{
- rpcsvc_set_lastfrag (&size);
- rpcsvc_set_frag_header_size (size, haddr);
-}
-
-
-/* Given the RPC reply structure and the payload handed by the RPC program,
- * encode the RPC record header into the buffer pointed by recordstart.
- */
-struct iovec
-rpcsvc_record_build_header (char *recordstart, size_t rlen,
- struct rpc_msg reply, size_t payload)
-{
- struct iovec replyhdr;
- struct iovec txrecord = {0, 0};
- size_t fraglen = 0;
- int ret = -1;
-
- /* After leaving aside the 4 bytes for the fragment header, lets
- * encode the RPC reply structure into the buffer given to us.
- */
- ret = rpc_reply_to_xdr (&reply,(recordstart + RPCSVC_FRAGHDR_SIZE),
- rlen, &replyhdr);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to create RPC reply");
- goto err;
- }
-
- fraglen = payload + replyhdr.iov_len;
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Reply fraglen %zu, payload: %zu, "
- "rpc hdr: %zu", fraglen, payload, replyhdr.iov_len);
-
- /* Since we're not spreading RPC records over mutiple fragments
- * we just set this fragment as the first and last fragment for this
- * record.
- */
- rpcsvc_set_last_frag_header_size (fraglen, recordstart);
-
- /* Even though the RPC record starts at recordstart+RPCSVC_FRAGHDR_SIZE
- * we need to transmit the record with the fragment header, which starts
- * at recordstart.
- */
- txrecord.iov_base = recordstart;
-
- /* Remember, this is only the vec for the RPC header and does not
- * include the payload above. We needed the payload only to calculate
- * the size of the full fragment. This size is sent in the fragment
- * header.
- */
- txrecord.iov_len = RPCSVC_FRAGHDR_SIZE + replyhdr.iov_len;
-
-err:
- return txrecord;
-}
-
-
-int
-rpcsvc_conn_submit (rpcsvc_conn_t *conn, struct iovec hdr,
- struct iobuf *hdriob, struct iovec msgvec,
- struct iobuf *msgiob)
-{
- int ret = -1;
-
- if ((!conn) || (!hdr.iov_base) || (!hdriob))
- return -1;
-
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Tx Header: %zu, payload: %zu",
- hdr.iov_len, msgvec.iov_len);
- /* Now that we have both the RPC and Program buffers in xdr format
- * lets hand it to the transmission layer.
- */
- pthread_mutex_lock (&conn->connlock);
- {
- if (!rpcsvc_conn_check_active (conn)) {
- gf_log (GF_RPCSVC, GF_LOG_DEBUG, "Connection inactive");
- goto unlock_err;
- }
-
- ret = rpcsvc_conn_append_txlist (conn, hdr, hdriob,
- RPCSVC_TXB_FIRST);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to append "
- "header to transmission list");
- goto unlock_err;
- }
-
- /* It is possible that this RPC reply is an error reply. In that
- * case we might not have been handed a payload.
- */
- ret = 0;
- if (msgiob)
- ret = rpcsvc_conn_append_txlist (conn, msgvec, msgiob,
- RPCSVC_TXB_LAST);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to append"
- " payload to transmission list");
- goto unlock_err;
- }
- }
-unlock_err:
- pthread_mutex_unlock (&conn->connlock);
-
- if (ret == -1)
- goto err;
-
- /* Tell event pool, we're interested in poll_out to trigger flush
- * of our tx buffers.
- */
- conn->eventidx = event_select_on (conn->stage->eventpool, conn->sockfd,
- conn->eventidx, -1, 1);
- ret = 0;
-err:
-
- return ret;
-}
-
-
-int
-rpcsvc_fill_reply (rpcsvc_request_t *req, struct rpc_msg *reply)
-{
- rpcsvc_program_t *prog = NULL;
- if ((!req) || (!reply))
- return -1;
-
- prog = rpcsvc_request_program (req);
- rpc_fill_empty_reply (reply, req->xid);
-
- if (req->rpc_stat == MSG_DENIED)
- rpc_fill_denied_reply (reply, req->rpc_err, req->auth_err);
- else if (req->rpc_stat == MSG_ACCEPTED)
- rpc_fill_accepted_reply (reply, req->rpc_err, prog->proglowvers,
- prog->proghighvers, req->verf.flavour,
- req->verf.datalen,
- req->verf.authdata);
- else
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Invalid rpc_stat value");
-
- return 0;
-}
-
-
-/* Given a request and the reply payload, build a reply and encodes the reply
- * into a record header. This record header is encoded into the vector pointed
- * to be recbuf.
- * msgvec is the buffer that points to the payload of the RPC program.
- * This buffer can be NULL, if an RPC error reply is being constructed.
- * The only reason it is needed here is that in case the buffer is provided,
- * we should account for the length of that buffer in the RPC fragment header.
- */
-struct iobuf *
-rpcsvc_record_build_record (rpcsvc_request_t *req, size_t payload,
- struct iovec *recbuf)
-{
- struct rpc_msg reply;
- struct iobuf *replyiob = NULL;
- char *record = NULL;
- struct iovec recordhdr = {0, };
- size_t pagesize = 0;
- rpcsvc_conn_t *conn = NULL;
- rpcsvc_t *svc = NULL;
-
- if ((!req) || (!req->conn) || (!recbuf))
- return NULL;
-
- /* First, try to get a pointer into the buffer which the RPC
- * layer can use.
- */
- conn = req->conn;
- svc = rpcsvc_conn_rpcsvc (conn);
- replyiob = iobuf_get (svc->ctx->iobuf_pool);
- pagesize = iobpool_pagesize ((struct iobuf_pool *)svc->ctx->iobuf_pool);
- if (!replyiob) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to get iobuf");
- goto err_exit;
- }
-
- record = iobuf_ptr (replyiob); /* Now we have it. */
-
- /* Fill the rpc structure and XDR it into the buffer got above. */
- rpcsvc_fill_reply (req, &reply);
- recordhdr = rpcsvc_record_build_header (record, pagesize, reply,
- payload);
- if (!recordhdr.iov_base) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to build record "
- " header");
- iobuf_unref (replyiob);
- replyiob = NULL;
- recbuf->iov_base = NULL;
- goto err_exit;
- }
-
- recbuf->iov_base = recordhdr.iov_base;
- recbuf->iov_len = recordhdr.iov_len;
-err_exit:
- return replyiob;
-}
-
-
-/*
- * The function to submit a program message to the RPC service.
- * This message is added to the transmission queue of the
- * conn.
- *
- * Program callers are not expected to use the msgvec->iov_base
- * address for anything else.
- * Nor are they expected to free it once this function returns.
- * Once the transmission of the buffer is completed by the RPC service,
- * the memory area as referenced through @msg will be unrefed.
- * If a higher layer does not want anything to do with this iobuf
- * after this function returns, it should call unref on it. For keeping
- * it around till the transmission is actually complete, rpcsvc also refs it.
- * *
- * If this function returns an error by returning -1, the
- * higher layer programs should assume that a disconnection happened
- * and should know that the conn memory area as well as the req structure
- * has been freed internally.
- *
- * For now, this function assumes that a submit is always called
- * to send a new record. Later, if there is a situation where different
- * buffers for the same record come from different sources, then we'll
- * need to change this code to account for multiple submit calls adding
- * the buffers into a single record.
- */
-
-int
-rpcsvc_submit_generic (rpcsvc_request_t *req, struct iovec msgvec,
- struct iobuf *msg)
-{
- int ret = -1;
- struct iobuf *replyiob = NULL;
- struct iovec recordhdr = {0, };
- rpcsvc_conn_t *conn = NULL;
-
- if ((!req) || (!req->conn))
- return -1;
-
- conn = req->conn;
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Tx message: %zu", msgvec.iov_len);
- /* Build the buffer containing the encoded RPC reply. */
- replyiob = rpcsvc_record_build_record (req, msgvec.iov_len, &recordhdr);
- if (!replyiob) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR,"Reply record creation failed");
- goto disconnect_exit;
- }
-
- /* Must ref the iobuf got from higher layer so that the higher layer
- * can rest assured that it can unref it and leave the final freeing
- * of the buffer to us. Note msg can be NULL if an RPC-only message
- * was being sent. Happens when an RPC error reply is being sent.
- */
- if (msg)
- iobuf_ref (msg);
- ret = rpcsvc_conn_submit (conn, recordhdr, replyiob, msgvec, msg);
- mem_put (conn->rxpool, req);
-
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to submit message");
- iobuf_unref (replyiob);
- }
-
-disconnect_exit:
- /* Note that a unref is called everytime a reply is sent. This is in
- * response to the ref that is performed on the conn when a request is
- * handed to the RPC program.
- *
- * The catch, however, is that if the reply is an rpc error, we must
- * not unref. This is because the ref only contains
- * references for the actors to which the request was handed plus one
- * reference maintained by the RPC layer. By unrefing for a case where
- * no actor was called, we will be losing the ref held for the RPC
- * layer.
- */
- if ((rpcsvc_request_accepted (req)) &&
- (rpcsvc_request_accepted_success (req)))
- rpcsvc_conn_unref (conn);
-
- return ret;
-}
-
-
-int
-rpcsvc_request_attach_vector (rpcsvc_request_t *req, struct iovec msgvec,
- struct iobuf *iob, struct iobref *iobref,
- int finalvector)
-{
- rpcsvc_txbuf_t *txb = NULL;
- int txflags = 0;
-
- if ((!req) || (!msgvec.iov_base))
- return -1;
-
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Tx Vector: %zu", msgvec.iov_len);
- if (finalvector)
- txflags |= RPCSVC_TXB_LAST;
- /* We only let the user decide whether this is the last vector for the
- * record, since the first vector is always the RPC header.
- */
- txb = rpcsvc_init_txbuf (req->conn, msgvec, iob, iobref, txflags);
- if (!txb) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Could not init tx buf");
- return -1;
- }
-
- req->payloadsize += msgvec.iov_len;
- if (iob)
- iobuf_ref (iob);
- if (iobref)
- iobref_ref (iobref);
- list_add_tail (&txb->txlist, &req->txlist);
-
- return 0;
-}
-
-
-int
-rpcsvc_request_attach_vectors (rpcsvc_request_t *req, struct iovec *payload,
- int vcount, struct iobref *piobref)
-{
- int c = 0;
- int ret = -1;
-
- for (;c < (vcount-1); c++) {
- ret = rpcsvc_request_attach_vector (req, payload[c], NULL,
- piobref, 0);
- if (ret < 0) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to attach "
- "vector");
- goto out;
- }
- }
-
- ret = rpcsvc_request_attach_vector (req, payload[vcount-1], NULL,
- piobref, 1);
- if (ret < 0)
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to attach final vec");
-
-out:
- return ret;
-}
-
-
-int
-rpcsvc_submit_vectors (rpcsvc_request_t *req)
-{
- int ret = -1;
- struct iobuf *replyiob = NULL;
- struct iovec recordhdr = {0, };
- rpcsvc_txbuf_t *rpctxb = NULL;
-
- if ((!req) || (!req->conn))
- return -1;
-
- /* Build the buffer containing the encoded RPC reply. */
- replyiob = rpcsvc_record_build_record (req, req->payloadsize,
- &recordhdr);
- if (!replyiob) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR,"Reply record creation failed");
- goto disconnect_exit;
- }
-
- rpctxb = rpcsvc_init_txbuf (req->conn, recordhdr, replyiob, NULL,
- RPCSVC_TXB_FIRST);
- if (!rpctxb) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to create tx buf");
- goto disconnect_exit;
- }
-
- pthread_mutex_lock (&req->conn->connlock);
- {
- list_splice_init (&req->txlist, &req->conn->txbufs);
- list_add (&rpctxb->txlist, &req->conn->txbufs);
- }
- pthread_mutex_unlock (&req->conn->connlock);
-
- ret = 0;
- req->conn->eventidx = event_select_on (req->conn->stage->eventpool,
- req->conn->sockfd,
- req->conn->eventidx, -1, 1);
-disconnect_exit:
- /* Note that a unref is called everytime a reply is sent. This is in
- * response to the ref that is performed on the conn when a request is
- * handed to the RPC program.
- */
- rpcsvc_conn_unref (req->conn);
- if (ret == -1)
- iobuf_unref (replyiob);
-
- mem_put (req->conn->rxpool, req);
- return ret;
-}
-
-
-int
-rpcsvc_error_reply (rpcsvc_request_t *req)
-{
- struct iovec dummyvec = {0, };
-
- if (!req)
- return -1;
-
- /* At this point the req should already have been filled with the
- * appropriate RPC error numbers.
- */
- return rpcsvc_submit_generic (req, dummyvec, NULL);
-}
-
-
-rpcsvc_request_t *
-rpcsvc_request_init (rpcsvc_conn_t *conn, struct rpc_msg *callmsg,
- struct iovec progmsg, rpcsvc_request_t *req)
-{
- if ((!conn) || (!callmsg)|| (!req))
- return NULL;
-
-
- /* We start a RPC request as always denied. */
- req->rpc_stat = MSG_DENIED;
- req->xid = rpc_call_xid (callmsg);
- req->prognum = rpc_call_program (callmsg);
- req->progver = rpc_call_progver (callmsg);
- req->procnum = rpc_call_progproc (callmsg);
- req->conn = conn;
- req->msg = progmsg;
- req->recordiob = conn->rstate.activeiob;
- INIT_LIST_HEAD (&req->txlist);
- req->payloadsize = 0;
-
- /* By this time, the data bytes for the auth scheme would have already
- * been copied into the required sections of the req structure,
- * we just need to fill in the meta-data about it now.
- */
- req->cred.flavour = rpc_call_cred_flavour (callmsg);
- req->cred.datalen = rpc_call_cred_len (callmsg);
- req->verf.flavour = rpc_call_verf_flavour (callmsg);
- req->verf.datalen = rpc_call_verf_len (callmsg);
-
- /* AUTH */
- rpcsvc_auth_request_init (req);
- return req;
-}
-
-
-rpcsvc_request_t *
-rpcsvc_request_create (rpcsvc_conn_t *conn)
-{
- char *msgbuf = NULL;
- struct rpc_msg rpcmsg;
- struct iovec progmsg; /* RPC Program payload */
- rpcsvc_request_t *req = NULL;
- int ret = -1;
-
- if (!conn)
- return NULL;
-
- /* We need to allocate the request before actually calling
- * rpcsvc_request_init on the request so that we, can fill the auth
- * data directly into the request structure from the message iobuf.
- * This avoids a need to keep a temp buffer into which the auth data
- * would've been copied otherwise.
- */
- rpcsvc_alloc_request (conn, req);
- if (!req) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to alloc request");
- goto err;
- }
-
- msgbuf = iobuf_ptr (conn->rstate.activeiob);
- ret = xdr_to_rpc_call (msgbuf, conn->rstate.recordsize, &rpcmsg,
- &progmsg, req->cred.authdata,req->verf.authdata);
-
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "RPC call decoding failed");
- rpcsvc_request_seterr (req, GARBAGE_ARGS);
- goto err;
- }
-
- ret = -1;
- rpcsvc_request_init (conn, &rpcmsg, progmsg, req);
-
- gf_log (GF_RPCSVC, GF_LOG_DEBUG, "RPC XID: %lx, Ver: %ld, Program: %ld,"
- " ProgVers: %ld, Proc: %ld", rpc_call_xid (&rpcmsg),
- rpc_call_rpcvers (&rpcmsg), rpc_call_program (&rpcmsg),
- rpc_call_progver (&rpcmsg), rpc_call_progproc (&rpcmsg));
-
- if (rpc_call_rpcvers (&rpcmsg) != 2) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "RPC version not supported");
- rpcsvc_request_seterr (req, RPC_MISMATCH);
- goto err;
- }
-
- ret = rpcsvc_authenticate (req);
- if (ret == RPCSVC_AUTH_REJECT) {
- /* No need to set auth_err, that is the responsibility of
- * the authentication handler since only that know what exact
- * error happened.
- */
- rpcsvc_request_seterr (req, AUTH_ERROR);
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed authentication");
- ret = -1;
- goto err;
- }
-
-
- /* If the error is not RPC_MISMATCH, we consider the call as accepted
- * since we are not handling authentication failures for now.
- */
- req->rpc_stat = MSG_ACCEPTED;
- ret = 0;
-err:
- if (ret == -1) {
- ret = rpcsvc_error_reply (req);
- req = NULL;
- }
-
- return req;
-}
-
-
-int
-rpcsvc_handle_rpc_call (rpcsvc_conn_t *conn)
-{
- rpcsvc_actor_t *actor = NULL;
- rpcsvc_request_t *req = NULL;
- int ret = -1;
-
- if (!conn)
- return -1;
-
- req = rpcsvc_request_create (conn);
- if (!req)
- goto err;
-
- if (!rpcsvc_request_accepted (req))
- goto err_reply;
-
- actor = rpcsvc_program_actor (conn, req);
- if (!actor)
- goto err_reply;
-
- if ((actor) && (actor->actor)) {
- rpcsvc_conn_ref (conn);
- ret = actor->actor (req);
- }
-
-err_reply:
- if (ret == RPCSVC_ACTOR_ERROR)
- ret = rpcsvc_error_reply (req);
-
- /* No need to propagate error beyond this function since the reply
- * has now been queued. */
- ret = 0;
-err:
- return ret;
-}
-
-#define rpc_call_cred_addr(rs) (iobuf_ptr ((rs)->activeiob) + RPCSVC_BARERPC_MSGSZ - 4)
-
-uint32_t
-rpcsvc_call_credlen (rpcsvc_record_state_t *rs)
-{
- char *credaddr = NULL;
- uint32_t credlen_nw = 0;
- uint32_t credlen_host = 0;
-
- /* Position to the start of the credential length field. */
- credaddr = rpc_call_cred_addr (rs);
- credlen_nw = *(uint32_t *)credaddr;
- credlen_host = ntohl (credlen_nw);
-
- return credlen_host;
-}
-
-uint32_t
-rpcsvc_call_verflen (rpcsvc_record_state_t *rs)
-{
- char *verfaddr = NULL;
- uint32_t verflen_nw = 0;
- uint32_t verflen_host = 0;
- uint32_t credlen = 0;
-
- /* Position to the start of the verifier length field. */
- credlen = rpcsvc_call_credlen (rs);
- verfaddr = (rpc_call_cred_addr (rs) + 4 + credlen);
- verflen_nw = *(uint32_t *)verfaddr;
- verflen_host = ntohl (verflen_nw);
-
- return verflen_host;
-}
-
-
-void
-rpcsvc_update_vectored_verf (rpcsvc_record_state_t *rs)
-{
- if (!rs)
- return;
-
- rs->recordsize += rpcsvc_call_verflen (rs);
- return;
-}
-
-
-void
-rpcsvc_handle_vectored_prep_rpc_call (rpcsvc_conn_t *conn)
-{
- rpcsvc_actor_t *actor = NULL;
- rpcsvc_request_t *req = NULL;
- rpcsvc_record_state_t *rs = NULL;
- rpcsvc_t *svc = NULL;
- int ret = -1;
- ssize_t remfrag = RPCSVC_ACTOR_ERROR;
- int newbuf = 0;
-
- if (!conn)
- return;
-
- rs = &conn->rstate;
-
- /* In case one of the steps below fails, we need to make sure that the
- * remaining frag in the kernel's buffers are read-out so that the
- * requests that follow can be served.
- */
- rs->remainingfrag = rs->fragsize - rs->recordsize;
- rs->vecstate = RPCSVC_VECTOR_IGNORE;
- req = rpcsvc_request_create (conn);
- svc = rpcsvc_conn_rpcsvc (conn);
- if (!req)
- goto err;
-
- if (!rpcsvc_request_accepted (req))
- goto err_reply;
-
- actor = rpcsvc_program_actor (conn, req);
- if (!actor)
- goto err_reply;
-
- if (!actor->vector_sizer) {
- ret = -1;
- rpcsvc_request_seterr (req, PROC_UNAVAIL);
- goto err_reply;
- }
-
- rpcsvc_conn_ref (conn);
- ret = actor->vector_sizer (req, &remfrag, &newbuf);
- rpcsvc_conn_unref (conn);
-
- if (ret == RPCSVC_ACTOR_ERROR) {
- ret = -1;
- rpcsvc_request_seterr (req, SYSTEM_ERR);
- goto err_reply;
- }
-
- rs->remainingfrag = remfrag;
- rs->vecstate = RPCSVC_VECTOR_READPROCHDR;
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Vectored RPC proc header remaining:"
- " %d", rs->remainingfrag);
- conn->vectoredreq = req;
-
- /* Store the reference to the current frag pointer. This is where the
- * proc header will be read into.
- */
- req->msg.iov_base = rs->fragcurrent;
- req->msg.iov_len = rs->remainingfrag;
- ret = 0;
-
-err_reply:
- if (ret == -1)
- ret = rpcsvc_error_reply (req);
-
- /* No need to propagate error beyond this function since the reply
- * has now been queued. */
- ret = 0;
-err:
- return;
-}
-
-
-void
-rpcsvc_update_vectored_verfsz (rpcsvc_conn_t *conn)
-{
- rpcsvc_record_state_t *rs = NULL;
- uint32_t verflen = 0;
-
- if (!conn)
- return;
-
- rs = &conn->rstate;
-
- verflen = rpcsvc_call_verflen (rs);
- rs->recordsize += 8;
- if (verflen > 0) {
- rs->remainingfrag = verflen;
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Vectored RPC verf remaining: "
- " %d", rs->remainingfrag);
- rs->vecstate = RPCSVC_VECTOR_READVERF;
- } else {
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Vectored RPC preparing call");
- rpcsvc_handle_vectored_prep_rpc_call (conn);
- }
-
- return;
-}
-
-
-void
-rpcsvc_update_vectored_cred (rpcsvc_record_state_t *rs)
-{
- uint32_t credlen = 0;
-
- if (!rs)
- return;
-
- credlen = rpcsvc_call_credlen (rs);
- /* Update remainingfrag to read the 8 bytes needed for
- * reading verf flavour and verf len.
- */
- rs->remainingfrag = (2 * sizeof (uint32_t));
- rs->vecstate = RPCSVC_VECTOR_READVERFSZ;
- rs->recordsize += credlen;
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Vectored RPC verfsz remaining: %d",
- rs->remainingfrag);
-
- return;
-}
-
-void
-rpcsvc_update_vectored_barerpc (rpcsvc_record_state_t *rs)
-{
- uint32_t credlen = 0;
-
- if (!rs)
- return;
-
- credlen = rpcsvc_call_credlen (rs);
- rs->recordsize = RPCSVC_BARERPC_MSGSZ;
- if (credlen == 0) {
- rs->remainingfrag = 8;
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Vectored RPC verfsz remaining"
- ": %d", rs->remainingfrag);
- rs->vecstate = RPCSVC_VECTOR_READVERFSZ;
- } else {
- rs->remainingfrag = credlen;
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Vectored RPC cred remaining: "
- "%d", rs->remainingfrag);
- rs->vecstate = RPCSVC_VECTOR_READCRED;
- }
-
- return;
-}
-
-
-void
-rpcsvc_handle_vectored_rpc_call (rpcsvc_conn_t *conn)
-{
- rpcsvc_actor_t *actor = NULL;
- rpcsvc_request_t *req = NULL;
- rpcsvc_record_state_t *rs = NULL;
- rpcsvc_t *svc = NULL;
- int ret = -1;
- ssize_t remfrag = -1;
- int newbuf = 0;
-
- if (!conn)
- return;
-
- rs = &conn->rstate;
-
- req = conn->vectoredreq;
- svc = rpcsvc_conn_rpcsvc (conn);
-
- if (!req)
- goto err;
-
- actor = rpcsvc_program_actor (conn, req);
- if (!actor)
- goto err_reply;
-
- if (!actor->vector_sizer) {
- ret = -1;
- rpcsvc_request_seterr (req, PROC_UNAVAIL);
- goto err_reply;
- }
-
- req->msg.iov_len = (unsigned long)((long)rs->fragcurrent - (long)req->msg.iov_base);
- rpcsvc_conn_ref (conn);
- ret = actor->vector_sizer (req, &remfrag, &newbuf);
- rpcsvc_conn_unref (conn);
- if (ret == RPCSVC_ACTOR_ERROR) {
- ret = -1;
- rpcsvc_request_seterr (req, SYSTEM_ERR);
- goto err_reply;
- }
-
- if (newbuf) {
- rs->vectoriob = iobuf_get (svc->ctx->iobuf_pool);
- rs->fragcurrent = iobuf_ptr (rs->vectoriob);
- rs->vecstate = RPCSVC_VECTOR_READVEC;
- rs->remainingfrag = remfrag;
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Vectored RPC buf remaining:"
- " %d", rs->remainingfrag);
- } else {
- rs->remainingfrag = remfrag;
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Vectored RPC proc remaining:"
- " %d", rs->remainingfrag);
- }
-
- ret = 0;
-err_reply:
- if (ret == -1)
- ret = rpcsvc_error_reply (req);
-
- /* No need to propagate error beyond this function since the reply
- * has now been queued. */
- ret = 0;
-err:
- return;
-}
-
-
-
-void
-rpcsvc_record_vectored_call_actor (rpcsvc_conn_t *conn)
-{
- rpcsvc_actor_t *actor = NULL;
- rpcsvc_request_t *req = NULL;
- rpcsvc_record_state_t *rs = NULL;
- rpcsvc_t *svc = NULL;
- int ret = -1;
-
- if (!conn)
- return;
-
- rs = &conn->rstate;
- req = conn->vectoredreq;
- svc = rpcsvc_conn_rpcsvc (conn);
-
- if (!req)
- goto err;
-
- actor = rpcsvc_program_actor (conn, req);
- if (!actor)
- goto err_reply;
-
- if (actor->vector_actor) {
- rpcsvc_conn_ref (conn);
- ret = actor->vector_actor (req, rs->vectoriob);
- } else {
- rpcsvc_request_seterr (req, PROC_UNAVAIL);
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "No vectored handler present");
- ret = RPCSVC_ACTOR_ERROR;
- }
-
-err_reply:
- if (ret == RPCSVC_ACTOR_ERROR)
- ret = rpcsvc_error_reply (req);
-
- /* No need to propagate error beyond this function since the reply
- * has now been queued. */
- ret = 0;
-err:
- return;
-}
-
-
-
-ssize_t
-rpcsvc_update_vectored_state (rpcsvc_conn_t *conn)
-{
- rpcsvc_record_state_t *rs = NULL;
- rpcsvc_t *svc = NULL;
-
- if (!conn)
- return 0;
-
- /* At this point, we can be confident that the activeiob contains
- * exactly the first RPCSVC_BARERPC_MSGSZ bytes needed in order to
- * determine the program and actor. So the next state will be
- * to read the credentials.
- *
- * But first, try to determine how many more bytes do we need from the
- * network to complete the RPC message including the credentials.
- */
-
- rs = &conn->rstate;
- if (rpcsvc_record_vectored_baremsg (rs))
- rpcsvc_update_vectored_barerpc (rs);
- else if (rpcsvc_record_vectored_cred (rs))
- rpcsvc_update_vectored_cred (rs);
- else if (rpcsvc_record_vectored_verfsz (rs))
- rpcsvc_update_vectored_verfsz (conn);
- else if (rpcsvc_record_vectored_verfread (rs)) {
- rpcsvc_update_vectored_verf (rs);
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Vectored RPC preparing call");
- rpcsvc_handle_vectored_prep_rpc_call (conn);
- } else if (rpcsvc_record_vectored_readprochdr (rs))
- rpcsvc_handle_vectored_rpc_call (conn);
- else if (rpcsvc_record_vectored_ignore (rs)) {
- svc = rpcsvc_conn_rpcsvc (conn);
- rpcsvc_record_init (rs, svc->ctx->iobuf_pool);
- } else if (rpcsvc_record_vectored_readvec (rs)) {
- svc = rpcsvc_conn_rpcsvc (conn);
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Vectored RPC vector read");
- rpcsvc_record_vectored_call_actor (conn);
- rpcsvc_record_init (rs, svc->ctx->iobuf_pool);
- }
-
- return 0;
-}
-
-
-ssize_t
-rpcsvc_record_read_partial_frag (rpcsvc_record_state_t *rs, ssize_t dataread);
-
-ssize_t
-rpcsvc_update_vectored_msg (rpcsvc_conn_t *conn, ssize_t dataread)
-{
-
- if (!conn)
- return dataread;
-
- /* find out how much of the bare msg is pending and set that up to be
- * read into the updated fragcurrent along with the updated size into
- * remainingfrag.
- */
-
-
- /* Incidently, the logic needed here is similar to a regular partial
- * fragment read since we've already set the remainingfrag member in
- * rstate to be RPCSVC_BARERPC_MSGSZ for the purpose of a vectored
- * fragment.
- */
- return rpcsvc_record_read_partial_frag (&conn->rstate, dataread);
-}
-
-/* FIX: As a first version of vectored reading, I am assuming dataread will
- * always be equal to RPCSVC_BARERPC_MSGSZ for the sake of simplicity on the
- * belief that we're never actually reading more bytes than needed in each
- * poll_in.
- */
-ssize_t
-rpcsvc_handle_vectored_frag (rpcsvc_conn_t *conn, ssize_t dataread)
-{
- if (!conn)
- return dataread;
-
- /* At this point we can be confident that only the frag size has been
- * read from the network. Now it is up to us to have the remaining RPC
- * fields given to us here.
- */
-
- /* Since the poll_in handler uses the remainingfrag field to determine
- * how much to read from the network, we'll hack this scheme to tell
- * the poll_in to read at most RPCSVC_BARERPC_MSGSZ bytes. This is done
- * to, as a first step, identify which (program, actor) we need to call.
- */
-
- dataread = rpcsvc_update_vectored_msg (conn, dataread);
-
- if (conn->rstate.remainingfrag == 0) {
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Vectored frag complete");
- dataread = rpcsvc_update_vectored_state (conn);
- }
-
- return dataread;
-}
-
-
-int
-rpcsvc_record_update_state (rpcsvc_conn_t *conn, ssize_t dataread)
-{
- rpcsvc_record_state_t *rs = NULL;
- rpcsvc_t *svc = NULL;
-
- if (!conn)
- return -1;
-
- rs = &conn->rstate;
- /* At entry into this function, fragcurrent will be pointing to the\
- * start of the area into which dataread number of bytes were read.
- */
-
- if (rpcsvc_record_readfraghdr(rs))
- dataread = rpcsvc_record_update_fraghdr (rs, dataread);
-
- if (rpcsvc_record_readfrag(rs)) {
- /* The minimum needed for triggering the vectored handler is
- * the frag size field. The fragsize member remains set to this
- * size till this request is completely extracted from the
- * network. Once all the data has been read from the network,
- * the request structure would've been created. The point being
- * that even if it takes multiple calls to network IO for
- * getting the vectored fragment, we can continue to use this
- * condition as the flag to tell us that this is a vectored
- * fragment.
- */
- if ((dataread > 0) && (rpcsvc_record_vectored (rs))) {
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Vectored frag");
- dataread = rpcsvc_handle_vectored_frag (conn, dataread);
- } else if (dataread > 0) {
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Regular frag");
- dataread = rpcsvc_record_update_frag (rs, dataread);
- }
- }
-
- /* This should not happen. We are never reading more than the current
- * fragment needs. Something is seriously wrong.
- */
- if (dataread > 0) {
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Data Left: %zd", dataread);
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Unwanted data read from "
- " connection.");
- }
-
- /* If we're now supposed to wait for a new fragment header and if the
- * fragment that we just completed in the previous call to
- * rpcsvc_record_update_frag was the last fragment for the current
- * RPC record, then, it is time to perform the translation from
- * XDR formatted buffer in activeiob followed by the upcall to the
- * protocol actor.
- */
- if ((rpcsvc_record_readfraghdr(rs)) && (rs->islastfrag)) {
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Full Record Received.");
- rpcsvc_handle_rpc_call (conn);
- svc = rpcsvc_conn_rpcsvc (conn);
- rpcsvc_record_init (rs, svc->ctx->iobuf_pool);
- }
-
- return 0;
-}
-
-
-char *
-rpcsvc_record_read_addr (rpcsvc_record_state_t *rs)
-{
-
- if (rpcsvc_record_readfraghdr (rs))
- return rpcsvc_record_currenthdr_addr (rs);
- else if (rpcsvc_record_readfrag (rs))
- return rpcsvc_record_currentfrag_addr (rs);
-
- return NULL;
-}
-
-
-int
-rpcsvc_conn_data_poll_in (rpcsvc_conn_t *conn)
-{
- ssize_t dataread = -1;
- size_t readsize = 0;
- char *readaddr = NULL;
- int ret = -1;
-
- readaddr = rpcsvc_record_read_addr (&conn->rstate);
- if (!readaddr)
- goto err;
-
- readsize = rpcsvc_record_read_size (&conn->rstate);
- if (readsize == -1)
- goto err;
-
- dataread = rpcsvc_socket_read (conn->sockfd, readaddr, readsize);
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "conn: 0x%lx, readsize: %zu, dataread: %zd",
- (long)conn, readsize, dataread);
-
- if (dataread > 0)
- ret = rpcsvc_record_update_state (conn, dataread);
-
-err:
- return ret;
-}
-
-
-int
-rpcsvc_conn_data_poll_err (rpcsvc_conn_t *conn)
-{
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Received error event");
- rpcsvc_conn_deinit (conn);
- return 0;
-}
-
-
-int
-__rpcsvc_conn_data_poll_out (rpcsvc_conn_t *conn)
-{
- rpcsvc_txbuf_t *txbuf = NULL;
- rpcsvc_txbuf_t *tmp = NULL;
- ssize_t written = -1;
- char *writeaddr = NULL;
- size_t writesize = -1;
-
- if (!conn)
- return -1;
-
- /* Attempt transmission of each of the pending buffers */
- list_for_each_entry_safe (txbuf, tmp, &conn->txbufs, txlist) {
-tx_remaining:
- writeaddr = (char *)(txbuf->buf.iov_base + txbuf->offset);
- writesize = (txbuf->buf.iov_len - txbuf->offset);
-
- if (txbuf->txbehave & RPCSVC_TXB_FIRST) {
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "First Tx Buf");
- rpcsvc_socket_block_tx (conn->sockfd);
- }
-
- written = rpcsvc_socket_write (conn->sockfd, writeaddr,
- writesize);
- if (txbuf->txbehave & RPCSVC_TXB_LAST) {
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Last Tx Buf");
- rpcsvc_socket_unblock_tx (conn->sockfd);
- }
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "conn: 0x%lx, Tx request: %zu,"
- " Tx sent: %zd", (long)conn, writesize, written);
-
- /* There was an error transmitting this buffer */
- if (written == -1)
- break;
-
- if (written >= 0)
- txbuf->offset += written;
-
- /* If the current buffer has been completely transmitted,
- * delete it from the list and move on to the next buffer.
- */
- if (txbuf->offset == txbuf->buf.iov_len) {
- /* It doesnt matter who ref'ed this iobuf, rpcsvc for
- * its own header or a RPC program.
- */
- if (txbuf->iob)
- iobuf_unref (txbuf->iob);
- if (txbuf->iobref)
- iobref_unref (txbuf->iobref);
-
- list_del (&txbuf->txlist);
- mem_put (conn->txpool, txbuf);
- } else
- /* If the current buffer is incompletely tx'd, do not
- * go to the head of the loop, since that moves us to
- * the next buffer.
- */
- goto tx_remaining;
- }
-
- /* If we've broken out of the loop above then we must unblock
- * the transmission now.
- */
- rpcsvc_socket_unblock_tx (conn->sockfd);
- if (list_empty (&conn->txbufs))
- conn->eventidx = event_select_on (conn->stage->eventpool,
- conn->sockfd, conn->eventidx,
- -1, 0);
-
- return 0;
-}
-
-
-int
-rpcsvc_conn_data_poll_out (rpcsvc_conn_t *conn)
-{
- if (!conn)
- return -1;
-
-
- pthread_mutex_lock (&conn->connlock);
- {
- __rpcsvc_conn_data_poll_out (conn);
- }
- pthread_mutex_unlock (&conn->connlock);
-
- return 0;
-}
-
-
-int
-rpcsvc_conn_data_handler (int fd, int idx, void *data, int poll_in, int poll_out
- , int poll_err)
-{
- rpcsvc_conn_t *conn = NULL;
- int ret = 0;
-
- if (!data)
- return 0;
-
- conn = (rpcsvc_conn_t *)data;
-
- if (poll_out)
- ret = rpcsvc_conn_data_poll_out (conn);
-
- if (poll_err) {
- ret = rpcsvc_conn_data_poll_err (conn);
- return 0;
- }
-
- if (poll_in) {
- ret = 0;
- ret = rpcsvc_conn_data_poll_in (conn);
- }
-
- if (ret == -1)
- rpcsvc_conn_data_poll_err (conn);
-
- return 0;
-}
-
-
-int
-rpcsvc_conn_listening_handler (int fd, int idx, void *data, int poll_in,
- int poll_out, int poll_err)
-{
- rpcsvc_conn_t *newconn = NULL;
- rpcsvc_stage_t *selectedstage = NULL;
- int ret = -1;
- rpcsvc_conn_t *conn = NULL;
- rpcsvc_program_t *prog = NULL;
- rpcsvc_t *svc = NULL;
-
- if (!poll_in)
- return 0;
-
- conn = (rpcsvc_conn_t *)data;
- prog = (rpcsvc_program_t *)conn->program;
- svc = rpcsvc_conn_rpcsvc (conn);
- newconn = rpcsvc_conn_accept_init (svc, fd, prog);
- if (!newconn) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "failed to accept connection");
- goto err;
- }
-
- selectedstage = rpcsvc_select_stage (svc);
- if (!selectedstage)
- goto close_err;
-
- /* Now that we've accepted the connection, we need to associate
- * its events to a stage.
- */
- ret = rpcsvc_stage_conn_associate (selectedstage, newconn,
- rpcsvc_conn_data_handler, newconn);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "could not associated stage "
- " with new connection");
- goto close_err;
- }
- gf_log (GF_RPCSVC, GF_LOG_DEBUG, "New Connection: Program %s, Num: %d,"
- " Ver: %d, Port: %d", prog->progname, prog->prognum,
- prog->progver, prog->progport);
- ret = 0;
-close_err:
- if (ret == -1)
- rpcsvc_conn_unref (newconn);
-
-err:
- return ret;
-}
-
-
-/* Register the program with the local portmapper service. */
-int
-rpcsvc_program_register_portmap (rpcsvc_program_t *newprog)
-{
- if (!newprog)
- return -1;
-
- if (!(pmap_set(newprog->prognum, newprog->progver, IPPROTO_TCP,
- newprog->progport))) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Could not register with"
- " portmap");
- return -1;
- }
-
- return 0;
-}
-
-
-int
-rpcsvc_program_unregister_portmap (rpcsvc_program_t *prog)
-{
- if (!prog)
- return -1;
-
- if (!(pmap_unset(prog->prognum, prog->progver))) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Could not unregister with"
- " portmap");
- return -1;
- }
-
- return 0;
-}
-
-
-int
-rpcsvc_stage_program_register (rpcsvc_stage_t *stg, rpcsvc_program_t *newprog)
-{
- rpcsvc_conn_t *newconn = NULL;
- rpcsvc_t *svc = NULL;
-
- if ((!stg) || (!newprog))
- return -1;
-
- svc = rpcsvc_stage_service (stg);
- /* Create a listening socket */
- newconn = rpcsvc_conn_listen_init (svc, newprog);
- if (!newconn) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "could not create listening"
- " connection");
- return -1;
- }
-
- if ((rpcsvc_stage_conn_associate (stg, newconn,
- rpcsvc_conn_listening_handler,
- newconn)) == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR,"could not associate stage with"
- " listening connection");
- return -1;
- }
-
- return 0;
-}
-
-
-int
-rpcsvc_program_register (rpcsvc_t *svc, rpcsvc_program_t program)
-{
- rpcsvc_program_t *newprog = NULL;
- rpcsvc_stage_t *selectedstage = NULL;
- int ret = -1;
-
- if (!svc)
- return -1;
-
- newprog = GF_CALLOC (1, sizeof(*newprog), gf_common_mt_rpcsvc_program_t);
- if (!newprog)
- return -1;
-
- if (!program.actors)
- goto free_prog;
-
- memcpy (newprog, &program, sizeof (program));
- selectedstage = rpcsvc_select_stage (svc);
-
- ret = rpcsvc_stage_program_register (selectedstage, newprog);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "stage registration of program"
- " failed");
- goto free_prog;
- }
-
- ret = rpcsvc_program_register_portmap (newprog);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "portmap registration of"
- " program failed");
- goto free_prog;
- }
-
- ret = 0;
- gf_log (GF_RPCSVC, GF_LOG_DEBUG, "New program registered: %s, Num: %d,"
- " Ver: %d, Port: %d", newprog->progname, newprog->prognum,
- newprog->progver, newprog->progport);
-
-free_prog:
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Program registration failed:"
- " %s, Num: %d, Ver: %d, Port: %d", newprog->progname,
- newprog->prognum, newprog->progver, newprog->progport);
- GF_FREE (newprog);
- }
-
- return ret;
-}
-
-/* The only difference between the generic submit and this one is that the
- * generic submit is also used for submitting RPC error replies in where there
- * are no payloads so the msgvec and msgbuf can be NULL.
- * Since RPC programs should be using this function along with their payloads
- * we must perform NULL checks before calling the generic submit.
- */
-int
-rpcsvc_submit_message (rpcsvc_request_t *req, struct iovec msgvec,
- struct iobuf *msg)
-{
- if ((!req) || (!req->conn) || (!msg) || (!msgvec.iov_base))
- return -1;
-
- return rpcsvc_submit_generic (req, msgvec, msg);
-}
-
-
-int
-rpcsvc_program_unregister (rpcsvc_t *svc, rpcsvc_program_t prog)
-{
- int ret = -1;
-
- if (!svc)
- return -1;
-
- /* TODO: De-init the listening connection for this program. */
- ret = rpcsvc_program_unregister_portmap (&prog);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "portmap unregistration of"
- " program failed");
- goto err;
- }
-
- ret = 0;
- gf_log (GF_RPCSVC, GF_LOG_DEBUG, "Program unregistered: %s, Num: %d,"
- " Ver: %d, Port: %d", prog.progname, prog.prognum,
- prog.progver, prog.progport);
-
-err:
- if (ret == -1)
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Program unregistration failed"
- ": %s, Num: %d, Ver: %d, Port: %d", prog.progname,
- prog.prognum, prog.progver, prog.progport);
-
- return ret;
-}
-
-
-int
-rpcsvc_conn_peername (rpcsvc_conn_t *conn, char *hostname, int hostlen)
-{
- if (!conn)
- return -1;
-
- return rpcsvc_socket_peername (conn->sockfd, hostname, hostlen);
-}
-
-
-int
-rpcsvc_conn_peeraddr (rpcsvc_conn_t *conn, char *addrstr, int addrlen,
- struct sockaddr *sa, socklen_t sasize)
-{
- if (!conn)
- return -1;
-
- return rpcsvc_socket_peeraddr (conn->sockfd, addrstr, addrlen, sa,
- sasize);
-}
-
diff --git a/xlators/nfs/lib/src/rpcsvc.h b/xlators/nfs/lib/src/rpcsvc.h
deleted file mode 100644
index a77021ac47e..00000000000
--- a/xlators/nfs/lib/src/rpcsvc.h
+++ /dev/null
@@ -1,721 +0,0 @@
-/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _RPCSVC_H
-#define _RPCSVC_H
-
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "event.h"
-#include "logging.h"
-#include "dict.h"
-#include "mem-pool.h"
-#include "list.h"
-#include "iobuf.h"
-#include "xdr-rpc.h"
-#include "glusterfs.h"
-
-#include <pthread.h>
-#include <sys/uio.h>
-
-#ifdef GF_DARWIN_HOST_OS
-#include <nfs/rpcv2.h>
-#define NGRPS RPCAUTH_UNIXGIDS
-#endif
-
-#define GF_RPCSVC "rpc-service"
-#define RPCSVC_THREAD_STACK_SIZE ((size_t)(1024 * GF_UNIT_KB))
-
-#define RPCSVC_DEFAULT_MEMFACTOR 15
-#define RPCSVC_EVENTPOOL_SIZE_MULT 1024
-#define RPCSVC_POOLCOUNT_MULT 35
-#define RPCSVC_CONN_READ (128 * GF_UNIT_KB)
-#define RPCSVC_PAGE_SIZE (128 * GF_UNIT_KB)
-
-/* Defines for RPC record and fragment assembly */
-
-#define RPCSVC_FRAGHDR_SIZE 4 /* 4-byte RPC fragment header size */
-
-/* Given the 4-byte fragment header, returns non-zero if this fragment
- * is the last fragment for the RPC record being assemebled.
- * RPC Record marking standard defines a 32 bit value as the fragment
- * header with the MSB signifying whether the fragment is the last
- * fragment for the record being asembled.
- */
-#define RPCSVC_LASTFRAG(fraghdr) ((uint32_t)(fraghdr & 0x80000000U))
-
-/* Given the 4-byte fragment header, extracts the bits that contain
- * the fragment size.
- */
-#define RPCSVC_FRAGSIZE(fraghdr) ((uint32_t)(fraghdr & 0x7fffffffU))
-
-/* RPC Record States */
-#define RPCSVC_READ_FRAGHDR 1
-#define RPCSVC_READ_FRAG 2
-/* The size in bytes, if crossed by a fragment will be handed over to the
- * vectored actor so that it can allocate its buffers the way it wants.
- * In our RPC layer, we assume that vectored RPC requests/records are never
- * spread over multiple RPC fragments since that prevents us from determining
- * whether the record should be handled in RPC layer completely or handed to
- * the vectored handler.
- */
-#define RPCSVC_VECTORED_FRAGSZ 4096
-#define RPCSVC_VECTOR_READCRED 1003
-#define RPCSVC_VECTOR_READVERFSZ 1004
-#define RPCSVC_VECTOR_READVERF 1005
-#define RPCSVC_VECTOR_IGNORE 1006
-#define RPCSVC_VECTOR_READVEC 1007
-#define RPCSVC_VECTOR_READPROCHDR 1008
-
-#define rpcsvc_record_vectored_baremsg(rs) (((rs)->state == RPCSVC_READ_FRAG) && (rs)->vecstate == 0)
-#define rpcsvc_record_vectored_cred(rs) ((rs)->vecstate == RPCSVC_VECTOR_READCRED)
-#define rpcsvc_record_vectored_verfsz(rs) ((rs)->vecstate == RPCSVC_VECTOR_READVERFSZ)
-#define rpcsvc_record_vectored_verfread(rs) ((rs)->vecstate == RPCSVC_VECTOR_READVERF)
-#define rpcsvc_record_vectored_ignore(rs) ((rs)->vecstate == RPCSVC_VECTOR_IGNORE)
-#define rpcsvc_record_vectored_readvec(rs) ((rs)->vecstate == RPCSVC_VECTOR_READVEC)
-#define rpcsvc_record_vectored_readprochdr(rs) ((rs)->vecstate == RPCSVC_VECTOR_READPROCHDR)
-#define rpcsvc_record_vectored(rs) ((rs)->fragsize > RPCSVC_VECTORED_FRAGSZ)
-/* Includes bytes up to and including the credential length field. The credlen
- * will be followed by @credlen bytes of credential data which will have to be
- * read separately by the vectored reader. After the credentials comes the
- * verifier which will also have to be read separately including the 8 bytes of
- * verf flavour and verflen.
- */
-#define RPCSVC_BARERPC_MSGSZ 32
-#define rpcsvc_record_readfraghdr(rs) ((rs)->state == RPCSVC_READ_FRAGHDR)
-#define rpcsvc_record_readfrag(rs) ((rs)->state == RPCSVC_READ_FRAG)
-
-#define rpcsvc_conn_rpcsvc(conn) ((conn)->stage->svc)
-#define RPCSVC_LOWVERS 2
-#define RPCSVC_HIGHVERS 2
-
-typedef struct rpc_svc_program rpcsvc_program_t;
-/* A Stage is the event handler thread together with
- * the connections being served by this thread.
- * It is called a stage because all the actors, i.e, protocol actors,
- * defined by higher level users of the RPC layer, are executed here.
- */
-typedef struct rpc_svc_stage_context {
- pthread_t tid;
- struct event_pool *eventpool; /* Per-stage event-pool */
- void *svc; /* Ref to the rpcsvc_t */
-} rpcsvc_stage_t;
-
-
-/* RPC Records and Fragments assembly state.
- * This is per-connection state that is used to determine
- * how much data has come in, how much more needs to be read
- * and where it needs to be read.
- *
- * All this state is then used to re-assemble network buffers into
- * RPC fragments, which are then re-assembled into RPC records.
- *
- * See RFC 1831: "RPC: Remote Procedure Call Protocol Specification Version 2",
- * particularly the section on Record Marking Standard.
- */
-typedef struct rpcsvc_record_state {
-
- /* Pending messages storage
- * This memory area is currently being used to assemble
- * the latest RPC record.
- *
- * Note that this buffer contains the data other than the
- * fragment headers received from the network. This is so that we can
- * directly pass this buffer to higher layers without requiring to
- * perform memory copies and marshalling of data.
- */
- struct iobuf *activeiob;
-
- struct iobuf *vectoriob;
- /* The pointer into activeiob memory, into which will go the
- * contents from the next read from the network.
- */
- char *fragcurrent;
-
- /* Size of the currently incomplete RPC fragment.
- * This is filled in when the fragment header comes in.
- * Even though only the 31 least significant bits are used from the
- * fragment header, we use a 32 bit variable to store the size.
- */
- uint32_t fragsize;
-
- /* The fragment header is always read in here so that
- * the RPC messages contained in a RPC records can be processed
- * separately without copying them out of the activeiob above.
- */
- char fragheader[RPCSVC_FRAGHDR_SIZE];
- char *hdrcurrent;
-
- /* Bytes remaining to come in for the current fragment. */
- uint32_t remainingfrag;
-
- /* It is possible for the frag header to be split over separate
- * read calls, so we need to keep track of how much is left.
- */
- uint32_t remainingfraghdr;
-
- /* Record size, the total size of the RPC record, i.e. the total
- * of all fragment sizes received till now. Does not include the size
- * of a partial fragment which is continuing to be assembled right now.
- */
- int recordsize;
-
- /* Current state of the record */
- int state;
-
- /* Current state of the vectored reading process. */
- int vecstate;
-
- /* Set to non-zero when the currently partial or complete fragment is
- * the last fragment being received for the current RPC record.
- */
- uint32_t islastfrag;
-
-} rpcsvc_record_state_t;
-
-
-#define RPCSVC_CONNSTATE_CONNECTED 1
-#define RPCSVC_CONNSTATE_DISCONNECTED 2
-
-#define rpcsvc_conn_check_active(conn) ((conn)->connstate==RPCSVC_CONNSTATE_CONNECTED)
-
-typedef struct rpcsvc_request rpcsvc_request_t;
-/* Contains the state for each connection that is used for transmitting and
- * receiving RPC messages.
- *
- * There is also an eventidx because each connection's fd is added to the event
- * pool of the stage to which a connection belongs.
- * Anything that can be accessed by a RPC program must be synced through
- * connlock.
- */
-typedef struct rpc_conn_state {
-
- /* Transport or connection state */
-
- /* Once we start working on RDMA support, this TCP specific state will
- * have to be abstracted away.
- */
- int sockfd;
- int eventidx;
- int windowsize;
-
- /* Reference to the stage which is handling this
- * connection.
- */
- rpcsvc_stage_t *stage;
-
- /* RPC Records and Fragments assembly state.
- * All incoming data is staged here before being
- * called a full RPC message.
- */
- rpcsvc_record_state_t rstate;
-
- /* It is possible that a client disconnects while
- * the higher layer RPC service is busy in a call.
- * In this case, we cannot just free the conn
- * structure, since the higher layer service could
- * still have a reference to it.
- * The refcount avoids freeing until all references
- * have been given up, although the connection is clos()ed at the first
- * call to unref.
- */
- int connref;
- pthread_mutex_t connlock;
- int connstate;
-
- /* The program that is listening for requests on this connection. */
- rpcsvc_program_t *program;
-
- /* List of buffers awaiting transmission */
- /* Accesses to txbufs between multiple threads calling
- * rpcsvc_submit is synced through connlock. Prefer spinlock over
- * mutex because this is a low overhead op that needs simple
- * appending to the tx list.
- */
- struct list_head txbufs;
-
- /* Mem pool for the txbufs above. */
- struct mem_pool *txpool;
-
- /* Memory pool for rpcsvc_request_t */
- struct mem_pool *rxpool;
-
- /* The request which hasnt yet been handed to the RPC program because
- * this request is being treated as a vector request and so needs some
- * more data to be got from the network.
- */
- rpcsvc_request_t *vectoredreq;
-} rpcsvc_conn_t;
-
-
-#define RPCSVC_MAX_AUTH_BYTES 400
-typedef struct rpcsvc_auth_data {
- int flavour;
- int datalen;
- char authdata[RPCSVC_MAX_AUTH_BYTES];
-} rpcsvc_auth_data_t;
-
-#define rpcsvc_auth_flavour(au) ((au).flavour)
-
-/* The container for the RPC call handed up to an actor.
- * Dynamically allocated. Lives till the call reply is completely
- * transmitted.
- * */
-struct rpcsvc_request {
- /* Connection over which this request came. */
- rpcsvc_conn_t *conn;
-
- /* The identifier for the call from client.
- * Needed to pair the reply with the call.
- */
- uint32_t xid;
-
- int prognum;
-
- int progver;
-
- int procnum;
- /* Uid and gid filled by the rpc-auth module during the authentication
- * phase.
- */
- uid_t uid;
- gid_t gid;
-
- /* Might want to move this to AUTH_UNIX specifix state since this array
- * is not available for every authenticatino scheme.
- */
- gid_t auxgids[NGRPS];
- int auxgidcount;
-
-
- /* The RPC message payload, contains the data required
- * by the program actors. This is the buffer that will need to
- * be de-xdred by the actor.
- */
- struct iovec msg;
-
- /* The full message buffer allocated to store the RPC headers.
- * This buffer is ref'd when allocated why RPC svc and unref'd after
- * the buffer is handed to the actor. That means if the actor or any
- * higher layer wants to keep this buffer around, they too must ref it
- * right after entering the program actor.
- */
- struct iobuf *recordiob;
-
- /* Status of the RPC call, whether it was accepted or denied. */
- int rpc_stat;
-
- /* In case, the call was denied, the RPC error is stored here
- * till the reply is sent.
- */
- int rpc_err;
-
- /* In case the failure happened because of an authentication problem
- * , this value needs to be assigned the correct auth error number.
- */
- int auth_err;
-
- /* There can be cases of RPC requests where the reply needs to
- * be built from multiple sources. For eg. where even the NFS reply can
- * contain a payload, as in the NFSv3 read reply. Here the RPC header
- * ,NFS header and the read data are brought together separately from
- * different buffers, so we need to stage the buffers temporarily here
- * before all of them get added to the connection's transmission list.
- */
- struct list_head txlist;
-
- /* While the reply record is being built, this variable keeps track
- * of how many bytes have been added to the record.
- */
- size_t payloadsize;
-
- /* The credentials extracted from the rpc request */
- rpcsvc_auth_data_t cred;
-
- /* The verified extracted from the rpc request. In request side
- * processing this contains the verifier sent by the client, on reply
- * side processing, it is filled with the verified that will be
- * sent to the client.
- */
- rpcsvc_auth_data_t verf;
-
- /* Container for a RPC program wanting to store a temp
- * request-specific item.
- */
- void *private;
-
-};
-
-#define rpcsvc_request_program(req) ((rpcsvc_program_t *)((req)->conn->program))
-#define rpcsvc_request_program_private(req) (((rpcsvc_program_t *)((req)->conn->program))->private)
-#define rpcsvc_request_conn(req) (req)->conn
-#define rpcsvc_request_accepted(req) ((req)->rpc_stat == MSG_ACCEPTED)
-#define rpcsvc_request_accepted_success(req) ((req)->rpc_err == SUCCESS)
-#define rpcsvc_request_uid(req) ((req)->uid)
-#define rpcsvc_request_gid(req) ((req)->gid)
-#define rpcsvc_stage_service(stg) ((rpcsvc_t *)((stg)->svc))
-#define rpcsvc_conn_stage(conn) ((conn)->stage)
-#define rpcsvc_request_service(req) (rpcsvc_stage_service(rpcsvc_conn_stage(rpcsvc_request_conn(req))))
-#define rpcsvc_request_prog_minauth(req) (rpcsvc_request_program(req)->min_auth)
-#define rpcsvc_request_cred_flavour(req) (rpcsvc_auth_flavour(req->cred))
-#define rpcsvc_request_verf_flavour(req) (rpcsvc_auth_flavour(req->verf))
-
-#define rpcsvc_request_uid(req) ((req)->uid)
-#define rpcsvc_request_gid(req) ((req)->gid)
-#define rpcsvc_request_private(req) ((req)->private)
-#define rpcsvc_request_xid(req) ((req)->xid)
-#define rpcsvc_request_set_private(req,prv) (req)->private = (void *)(prv)
-#define rpcsvc_request_record_iob(rq) ((rq)->recordiob)
-#define rpcsvc_request_record_ref(req) (iobuf_ref ((req)->recordiob))
-#define rpcsvc_request_record_unref(req) (iobuf_unref ((req)->recordiob))
-
-
-#define RPCSVC_ACTOR_SUCCESS 0
-#define RPCSVC_ACTOR_ERROR (-1)
-
-/* Functor for every type of protocol actor
- * must be defined like this.
- *
- * See the request structure for info on how to handle the request
- * in the program actor.
- *
- * On successful santify checks inside the actor, it should return
- * RPCSVC_ACTOR_SUCCESS.
- * On an error, on which the RPC layer is expected to return a reply, the actor
- * should return RPCSVC_ACTOR_ERROR.
- *
- */
-typedef int (*rpcsvc_actor) (rpcsvc_request_t *req);
-typedef int (*rpcsvc_vector_actor) (rpcsvc_request_t *req, struct iobuf *iob);
-typedef int (*rpcsvc_vector_sizer) (rpcsvc_request_t *req, ssize_t *readsize,
- int *newiob);
-
-/* Every protocol actor will also need to specify the function the RPC layer
- * will use to serialize or encode the message into XDR format just before
- * transmitting on the connection.
- */
-typedef void *(*rpcsvc_encode_reply) (void *msg);
-
-/* Once the reply has been transmitted, the message will have to be de-allocated
- * , so every actor will need to provide a function that deallocates the message
- * it had allocated as a response.
- */
-typedef void (*rpcsvc_deallocate_reply) (void *msg);
-
-
-#define RPCSVC_NAME_MAX 32
-/* The descriptor for each procedure/actor that runs
- * over the RPC service.
- */
-typedef struct rpc_svc_actor_desc {
- char procname[RPCSVC_NAME_MAX];
- int procnum;
- rpcsvc_actor actor;
-
- /* Handler for cases where the RPC requests fragments are large enough
- * to benefit from being decoded into aligned memory addresses. While
- * decoding the request in a non-vectored manner, due to the nature of
- * the XDR scheme, RPC cannot guarantee memory aligned addresses for
- * the resulting message-specific structures. Allowing a specialized
- * handler for letting the RPC program read the data from the network
- * directly into its alligned buffers.
- */
- rpcsvc_vector_actor vector_actor;
- rpcsvc_vector_sizer vector_sizer;
-
-} rpcsvc_actor_t;
-
-typedef int (*rpcsvc_conn_notify_fn) (void *progpriv, rpcsvc_conn_t *conn);
-
-/* Describes a program and its version along with the function pointers
- * required to handle the procedures/actors of each program/version.
- * Never changed ever by any thread so no need for a lock.
- */
-struct rpc_svc_program {
- char progname[RPCSVC_NAME_MAX];
- int prognum;
- int progver;
- uint16_t progport; /* Registered with portmap */
- int progaddrfamily; /* AF_INET or AF_INET6 */
- char *proghost; /* Bind host, can be NULL */
- rpcsvc_actor_t *actors; /* All procedure handlers */
- int numactors; /* Num actors in actor array */
- int proghighvers; /* Highest ver for program
- supported by the system. */
- int proglowvers; /* Lowest ver */
-
- /* Program specific state handed to actors */
- void *private;
-
- /* This upcall is made when a connection's refcount reaches 0 and the
- * connection is about to be destroyed. We want to let the RPC program
- * know that it should also now free any state it is maintaining
- * for this connection.
- */
- rpcsvc_conn_notify_fn conn_destroy;
-
- /* Used to tell RPC program to init the state it needs to associate
- * with the new connection.
- */
- rpcsvc_conn_notify_fn conn_init;
-
- /* An integer that identifies the min auth strength that is required
- * by this protocol, for eg. MOUNT3 needs AUTH_UNIX at least.
- * See RFC 1813, Section 5.2.1.
- */
- int min_auth;
-};
-
-
-/* Contains global state required for all the RPC services.
- */
-typedef struct rpc_svc_state {
-
- /* Contains the list of rpcsvc_stage_t
- * list of (program, version) handlers.
- * other options.
- */
-
- /* At this point, lock is not used to protect anything. Later, it'll
- * be used for protecting stages.
- */
- pthread_mutex_t rpclock;
-
- /* This is the first stage that is inited, so that any RPC based
- * services that do not need multi-threaded support can just use the
- * service right away. This is not added to the stages list
- * declared later.
- * This is also the stage over which all service listeners are run.
- */
- rpcsvc_stage_t *defaultstage;
-
- /* When we have multi-threaded RPC support, we'll use this to link
- * to the multiple Stages.
- */
- struct list_head stages; /* All stages */
-
- unsigned int memfactor;
-
- /* List of the authentication schemes available. */
- struct list_head authschemes;
-
- /* Reference to the options */
- dict_t *options;
-
- /* Allow insecure ports. */
- int allow_insecure;
-
- glusterfs_ctx_t *ctx;
-} rpcsvc_t;
-
-
-/* All users of RPC services should use this API to register their
- * procedure handlers.
- */
-extern int
-rpcsvc_program_register (rpcsvc_t *svc, rpcsvc_program_t program);
-
-extern int
-rpcsvc_program_unregister (rpcsvc_t *svc, rpcsvc_program_t program);
-
-/* Inits the global RPC service data structures.
- * Called in main.
- */
-extern rpcsvc_t *
-rpcsvc_init (glusterfs_ctx_t *ctx, dict_t *options);
-
-
-extern int
-rpcsvc_submit_message (rpcsvc_request_t * req, struct iovec msg,
- struct iobuf *iob);
-
-int
-rpcsvc_submit_generic (rpcsvc_request_t *req, struct iovec msgvec,
- struct iobuf *msg);
-#define rpcsvc_record_currentfrag_addr(rs) ((rs)->fragcurrent)
-#define rpcsvc_record_currenthdr_addr(rs) ((rs)->hdrcurrent)
-
-#define rpcsvc_record_update_currentfrag(rs, size) \
- do { \
- (rs)->fragcurrent += size; \
- } while (0) \
-
-#define rpcsvc_record_update_currenthdr(rs, size) \
- do { \
- (rs)->hdrcurrent += size; \
- } while (0) \
-
-
-/* These are used to differentiate between multiple txbufs which form
- * a single RPC record. For eg, one purpose we use these for is to
- * prevent dividing a RPC record over multiple TCP segments. Multiple
- * TCP segments are possible for a single RPC record because we generally do not
- * have control over how the kernel's TCP segments the buffers when putting
- * them on the wire. So, on Linux, we use these to set TCP_CORK to create
- * a single TCP segment from multiple txbufs that are part of the same RPC
- * record. This improves network performance by reducing tiny message
- * transmissions.
- */
-#define RPCSVC_TXB_FIRST 0x1
-#define RPCSVC_TXB_LAST 0x2
-
-/* The list of buffers appended to a connection's pending
- * transmission list.
- */
-typedef struct rpcsvc_txbuf {
- struct list_head txlist;
- /* The iobuf which contains the full message to be transmitted */
- struct iobuf *iob;
-
- /* For vectored messages from an RPC program, we need to be able
- * maintain a ref to an iobuf which we do not have access to directly
- * except through the iobref which in turn could've been passed to
- * the RPC program by a higher layer.
- *
- * So either the iob is defined or iobref is defined for a reply,
- * never both.
- */
- struct iobref *iobref;
- /* In order to handle non-blocking writes, we'll need to keep track of
- * how much data from an iobuf has been written and where the next
- * transmission needs to start from. This iov.base points to the base of
- * the iobuf, iov.len is the size of iobuf being used for the message
- * from the total size in the iobuf.
- */
- struct iovec buf;
- /* offset is the point from where the next transmission for this buffer
- * should start.
- */
- size_t offset;
-
- /* This is a special field that tells us what kind of transmission
- * behaviour to provide to a particular buffer.
- * See the RPCSVC_TXB_* defines for more info.
- */
- int txbehave;
-} rpcsvc_txbuf_t;
-
-extern int
-rpcsvc_error_reply (rpcsvc_request_t *req);
-
-#define RPCSVC_PEER_STRLEN 1024
-#define RPCSVC_AUTH_ACCEPT 1
-#define RPCSVC_AUTH_REJECT 2
-#define RPCSVC_AUTH_DONTCARE 3
-
-extern int
-rpcsvc_conn_peername (rpcsvc_conn_t *conn, char *hostname, int hostlen);
-
-extern int
-rpcsvc_conn_peeraddr (rpcsvc_conn_t *conn, char *addrstr, int addrlen,
- struct sockaddr *returnsa, socklen_t sasize);
-
-extern int
-rpcsvc_conn_peer_check (dict_t *options, char *volname, rpcsvc_conn_t *conn);
-
-extern int
-rpcsvc_conn_privport_check (rpcsvc_t *svc, char *volname, rpcsvc_conn_t *conn);
-#define rpcsvc_request_seterr(req, err) (req)->rpc_err = err
-#define rpcsvc_request_set_autherr(req, err) (req)->auth_err = err
-
-extern void
-rpcsvc_conn_deinit (rpcsvc_conn_t *conn);
-extern void rpcsvc_conn_ref (rpcsvc_conn_t *conn);
-extern void rpcsvc_conn_unref (rpcsvc_conn_t *conn);
-
-extern int rpcsvc_submit_vectors (rpcsvc_request_t *req);
-
-extern int rpcsvc_request_attach_vector (rpcsvc_request_t *req,
- struct iovec msgvec, struct iobuf *iob,
- struct iobref *ioref, int finalvector);
-extern int
-rpcsvc_request_attach_vectors (rpcsvc_request_t *req, struct iovec *payload,
- int vcount, struct iobref *piobref);
-
-typedef int (*auth_init_conn) (rpcsvc_conn_t *conn, void *priv);
-typedef int (*auth_init_request) (rpcsvc_request_t *req, void *priv);
-typedef int (*auth_request_authenticate) (rpcsvc_request_t *req, void *priv);
-
-/* This structure needs to be registered by every authentication scheme.
- * Our authentication schemes are stored per connection because
- * each connection will end up using a different authentication scheme.
- */
-typedef struct rpcsvc_auth_ops {
- auth_init_conn conn_init;
- auth_init_request request_init;
- auth_request_authenticate authenticate;
-} rpcsvc_auth_ops_t;
-
-typedef struct rpcsvc_auth_flavour_desc {
- char authname[RPCSVC_NAME_MAX];
- int authnum;
- rpcsvc_auth_ops_t *authops;
- void *authprivate;
-} rpcsvc_auth_t;
-
-typedef void * (*rpcsvc_auth_initer_t) (rpcsvc_t *svc, dict_t *options);
-
-struct rpcsvc_auth_list {
- struct list_head authlist;
- rpcsvc_auth_initer_t init;
- /* Should be the name with which we identify the auth scheme given
- * in the volfile options.
- * This should be different from the authname in rpc_auth_t
- * in way that makes it easier to specify this scheme in the volfile.
- * This is because the technical names of the schemes can be a bit
- * arcane.
- */
- char name[RPCSVC_NAME_MAX];
- rpcsvc_auth_t *auth;
- int enable;
-};
-
-extern int
-rpcsvc_auth_request_init (rpcsvc_request_t *req);
-
-extern int
-rpcsvc_auth_init (rpcsvc_t *svc, dict_t *options);
-
-extern int
-rpcsvc_auth_conn_init (rpcsvc_conn_t *conn);
-
-extern int
-rpcsvc_authenticate (rpcsvc_request_t *req);
-
-extern int
-rpcsvc_auth_array (rpcsvc_t *svc, char *volname, int *autharr, int arrlen);
-
-/* If the request has been sent using AUTH_UNIX, this function returns the
- * auxiliary gids as an array, otherwise, it returns NULL.
- * Move to auth-unix specific source file when we need to modularize the
- * authentication code even further to support mode auth schemes.
- */
-extern gid_t *
-rpcsvc_auth_unix_auxgids (rpcsvc_request_t *req, int *arrlen);
-
-extern int
-rpcsvc_combine_gen_spec_volume_checks (int gen, int spec);
-
-extern char *
-rpcsvc_volume_allowed (dict_t *options, char *volname);
-#endif
diff --git a/xlators/nfs/lib/src/xdr-common.h b/xlators/nfs/lib/src/xdr-common.h
deleted file mode 100644
index 50a57ade932..00000000000
--- a/xlators/nfs/lib/src/xdr-common.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _XDR_COMMON_H_
-#define _XDR_COMMON_H_
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <rpc/rpc.h>
-#define XDR_BYTES_PER_UNIT 4
-
-/* Returns the address of the byte that follows the
- * last byte used for decoding the previous xdr component.
- * For eg, once the RPC call for NFS has been decoded, thie macro will return
- * the address from which the NFS header starts.
- */
-#define xdr_decoded_remaining_addr(xdr) ((&xdr)->x_private)
-
-/* Returns the length of the remaining record after the previous decode
- * operation completed.
- */
-#define xdr_decoded_remaining_len(xdr) ((&xdr)->x_handy)
-
-/* Returns the number of bytes used by the last encode operation. */
-#define xdr_encoded_length(xdr) (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base))
-
-#define xdr_decoded_length(xdr) (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base))
-
-#endif
diff --git a/xlators/nfs/lib/src/xdr-nfs3.c b/xlators/nfs/lib/src/xdr-nfs3.c
deleted file mode 100644
index 0360203961c..00000000000
--- a/xlators/nfs/lib/src/xdr-nfs3.c
+++ /dev/null
@@ -1,1898 +0,0 @@
-/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include "xdr-nfs3.h"
-#include "mem-pool.h"
-
-#if GF_DARWIN_HOST_OS
-#define xdr_u_quad_t xdr_u_int64_t
-#define xdr_quad_t xdr_int64_t
-#define xdr_uint32_t xdr_u_int32_t
-#endif
-
-bool_t
-xdr_uint64 (XDR *xdrs, uint64 *objp)
-{
- if (!xdr_u_quad_t (xdrs, objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_int64 (XDR *xdrs, int64 *objp)
-{
- if (!xdr_quad_t (xdrs, objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_uint32 (XDR *xdrs, uint32 *objp)
-{
- if (!xdr_uint32_t (xdrs, objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_int32 (XDR *xdrs, int32 *objp)
-{
- if (!xdr_int32_t (xdrs, objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_filename3 (XDR *xdrs, filename3 *objp)
-{
- if (!xdr_string (xdrs, objp, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_nfspath3 (XDR *xdrs, nfspath3 *objp)
-{
- if (!xdr_string (xdrs, objp, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_fileid3 (XDR *xdrs, fileid3 *objp)
-{
- if (!xdr_uint64 (xdrs, objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_cookie3 (XDR *xdrs, cookie3 *objp)
-{
- if (!xdr_uint64 (xdrs, objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_cookieverf3 (XDR *xdrs, cookieverf3 objp)
-{
- if (!xdr_opaque (xdrs, objp, NFS3_COOKIEVERFSIZE))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_createverf3 (XDR *xdrs, createverf3 objp)
-{
- if (!xdr_opaque (xdrs, objp, NFS3_CREATEVERFSIZE))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_writeverf3 (XDR *xdrs, writeverf3 objp)
-{
- if (!xdr_opaque (xdrs, objp, NFS3_WRITEVERFSIZE))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_uid3 (XDR *xdrs, uid3 *objp)
-{
- if (!xdr_uint32 (xdrs, objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gid3 (XDR *xdrs, gid3 *objp)
-{
- if (!xdr_uint32 (xdrs, objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_size3 (XDR *xdrs, size3 *objp)
-{
- if (!xdr_uint64 (xdrs, objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_offset3 (XDR *xdrs, offset3 *objp)
-{
- if (!xdr_uint64 (xdrs, objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_mode3 (XDR *xdrs, mode3 *objp)
-{
- if (!xdr_uint32 (xdrs, objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_count3 (XDR *xdrs, count3 *objp)
-{
- if (!xdr_uint32 (xdrs, objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_nfsstat3 (XDR *xdrs, nfsstat3 *objp)
-{
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_ftype3 (XDR *xdrs, ftype3 *objp)
-{
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_specdata3 (XDR *xdrs, specdata3 *objp)
-{
- if (!xdr_uint32 (xdrs, &objp->specdata1))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->specdata2))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_nfs_fh3 (XDR *xdrs, nfs_fh3 *objp)
-{
- if (!xdr_bytes (xdrs, (char **)&objp->data.data_val, (u_int *) &objp->data.data_len, NFS3_FHSIZE))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_nfstime3 (XDR *xdrs, nfstime3 *objp)
-{
- if (!xdr_uint32 (xdrs, &objp->seconds))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->nseconds))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_fattr3 (XDR *xdrs, fattr3 *objp)
-{
- if (!xdr_ftype3 (xdrs, &objp->type))
- return FALSE;
- if (!xdr_mode3 (xdrs, &objp->mode))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->nlink))
- return FALSE;
- if (!xdr_uid3 (xdrs, &objp->uid))
- return FALSE;
- if (!xdr_gid3 (xdrs, &objp->gid))
- return FALSE;
- if (!xdr_size3 (xdrs, &objp->size))
- return FALSE;
- if (!xdr_size3 (xdrs, &objp->used))
- return FALSE;
- if (!xdr_specdata3 (xdrs, &objp->rdev))
- return FALSE;
- if (!xdr_uint64 (xdrs, &objp->fsid))
- return FALSE;
- if (!xdr_fileid3 (xdrs, &objp->fileid))
- return FALSE;
- if (!xdr_nfstime3 (xdrs, &objp->atime))
- return FALSE;
- if (!xdr_nfstime3 (xdrs, &objp->mtime))
- return FALSE;
- if (!xdr_nfstime3 (xdrs, &objp->ctime))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_post_op_attr (XDR *xdrs, post_op_attr *objp)
-{
- if (!xdr_bool (xdrs, &objp->attributes_follow))
- return FALSE;
- switch (objp->attributes_follow) {
- case TRUE:
- if (!xdr_fattr3 (xdrs, &objp->post_op_attr_u.attributes))
- return FALSE;
- break;
- case FALSE:
- break;
- default:
- return FALSE;
- }
- return TRUE;
-}
-
-bool_t
-xdr_wcc_attr (XDR *xdrs, wcc_attr *objp)
-{
- if (!xdr_size3 (xdrs, &objp->size))
- return FALSE;
- if (!xdr_nfstime3 (xdrs, &objp->mtime))
- return FALSE;
- if (!xdr_nfstime3 (xdrs, &objp->ctime))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_pre_op_attr (XDR *xdrs, pre_op_attr *objp)
-{
- if (!xdr_bool (xdrs, &objp->attributes_follow))
- return FALSE;
- switch (objp->attributes_follow) {
- case TRUE:
- if (!xdr_wcc_attr (xdrs, &objp->pre_op_attr_u.attributes))
- return FALSE;
- break;
- case FALSE:
- break;
- default:
- return FALSE;
- }
- return TRUE;
-}
-
-bool_t
-xdr_wcc_data (XDR *xdrs, wcc_data *objp)
-{
- if (!xdr_pre_op_attr (xdrs, &objp->before))
- return FALSE;
- if (!xdr_post_op_attr (xdrs, &objp->after))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_post_op_fh3 (XDR *xdrs, post_op_fh3 *objp)
-{
- if (!xdr_bool (xdrs, &objp->handle_follows))
- return FALSE;
- switch (objp->handle_follows) {
- case TRUE:
- if (!xdr_nfs_fh3 (xdrs, &objp->post_op_fh3_u.handle))
- return FALSE;
- break;
- case FALSE:
- break;
- default:
- return FALSE;
- }
- return TRUE;
-}
-
-bool_t
-xdr_time_how (XDR *xdrs, time_how *objp)
-{
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_set_mode3 (XDR *xdrs, set_mode3 *objp)
-{
- if (!xdr_bool (xdrs, &objp->set_it))
- return FALSE;
- switch (objp->set_it) {
- case TRUE:
- if (!xdr_mode3 (xdrs, &objp->set_mode3_u.mode))
- return FALSE;
- break;
- default:
- break;
- }
- return TRUE;
-}
-
-bool_t
-xdr_set_uid3 (XDR *xdrs, set_uid3 *objp)
-{
- if (!xdr_bool (xdrs, &objp->set_it))
- return FALSE;
- switch (objp->set_it) {
- case TRUE:
- if (!xdr_uid3 (xdrs, &objp->set_uid3_u.uid))
- return FALSE;
- break;
- default:
- break;
- }
- return TRUE;
-}
-
-bool_t
-xdr_set_gid3 (XDR *xdrs, set_gid3 *objp)
-{
- if (!xdr_bool (xdrs, &objp->set_it))
- return FALSE;
- switch (objp->set_it) {
- case TRUE:
- if (!xdr_gid3 (xdrs, &objp->set_gid3_u.gid))
- return FALSE;
- break;
- default:
- break;
- }
- return TRUE;
-}
-
-bool_t
-xdr_set_size3 (XDR *xdrs, set_size3 *objp)
-{
- if (!xdr_bool (xdrs, &objp->set_it))
- return FALSE;
- switch (objp->set_it) {
- case TRUE:
- if (!xdr_size3 (xdrs, &objp->set_size3_u.size))
- return FALSE;
- break;
- default:
- break;
- }
- return TRUE;
-}
-
-bool_t
-xdr_set_atime (XDR *xdrs, set_atime *objp)
-{
- if (!xdr_time_how (xdrs, &objp->set_it))
- return FALSE;
- switch (objp->set_it) {
- case SET_TO_CLIENT_TIME:
- if (!xdr_nfstime3 (xdrs, &objp->set_atime_u.atime))
- return FALSE;
- break;
- default:
- break;
- }
- return TRUE;
-}
-
-bool_t
-xdr_set_mtime (XDR *xdrs, set_mtime *objp)
-{
- if (!xdr_time_how (xdrs, &objp->set_it))
- return FALSE;
- switch (objp->set_it) {
- case SET_TO_CLIENT_TIME:
- if (!xdr_nfstime3 (xdrs, &objp->set_mtime_u.mtime))
- return FALSE;
- break;
- default:
- break;
- }
- return TRUE;
-}
-
-bool_t
-xdr_sattr3 (XDR *xdrs, sattr3 *objp)
-{
- if (!xdr_set_mode3 (xdrs, &objp->mode))
- return FALSE;
- if (!xdr_set_uid3 (xdrs, &objp->uid))
- return FALSE;
- if (!xdr_set_gid3 (xdrs, &objp->gid))
- return FALSE;
- if (!xdr_set_size3 (xdrs, &objp->size))
- return FALSE;
- if (!xdr_set_atime (xdrs, &objp->atime))
- return FALSE;
- if (!xdr_set_mtime (xdrs, &objp->mtime))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_diropargs3 (XDR *xdrs, diropargs3 *objp)
-{
- if (!xdr_nfs_fh3 (xdrs, &objp->dir))
- return FALSE;
- if (!xdr_filename3 (xdrs, &objp->name))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_getattr3args (XDR *xdrs, getattr3args *objp)
-{
- if (!xdr_nfs_fh3 (xdrs, &objp->object))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_getattr3resok (XDR *xdrs, getattr3resok *objp)
-{
- if (!xdr_fattr3 (xdrs, &objp->obj_attributes))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_getattr3res (XDR *xdrs, getattr3res *objp)
-{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_getattr3resok (xdrs, &objp->getattr3res_u.resok))
- return FALSE;
- break;
- default:
- break;
- }
- return TRUE;
-}
-
-bool_t
-xdr_sattrguard3 (XDR *xdrs, sattrguard3 *objp)
-{
- if (!xdr_bool (xdrs, &objp->check))
- return FALSE;
- switch (objp->check) {
- case TRUE:
- if (!xdr_nfstime3 (xdrs, &objp->sattrguard3_u.obj_ctime))
- return FALSE;
- break;
- case FALSE:
- break;
- default:
- return FALSE;
- }
- return TRUE;
-}
-
-bool_t
-xdr_setattr3args (XDR *xdrs, setattr3args *objp)
-{
- if (!xdr_nfs_fh3 (xdrs, &objp->object))
- return FALSE;
- if (!xdr_sattr3 (xdrs, &objp->new_attributes))
- return FALSE;
- if (!xdr_sattrguard3 (xdrs, &objp->guard))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_setattr3resok (XDR *xdrs, setattr3resok *objp)
-{
- if (!xdr_wcc_data (xdrs, &objp->obj_wcc))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_setattr3resfail (XDR *xdrs, setattr3resfail *objp)
-{
- if (!xdr_wcc_data (xdrs, &objp->obj_wcc))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_setattr3res (XDR *xdrs, setattr3res *objp)
-{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_setattr3resok (xdrs, &objp->setattr3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_setattr3resfail (xdrs, &objp->setattr3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
-}
-
-bool_t
-xdr_lookup3args (XDR *xdrs, lookup3args *objp)
-{
- if (!xdr_diropargs3 (xdrs, &objp->what))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_lookup3resok (XDR *xdrs, lookup3resok *objp)
-{
- if (!xdr_nfs_fh3 (xdrs, &objp->object))
- return FALSE;
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- if (!xdr_post_op_attr (xdrs, &objp->dir_attributes))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_lookup3resfail (XDR *xdrs, lookup3resfail *objp)
-{
- if (!xdr_post_op_attr (xdrs, &objp->dir_attributes))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_lookup3res (XDR *xdrs, lookup3res *objp)
-{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_lookup3resok (xdrs, &objp->lookup3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_lookup3resfail (xdrs, &objp->lookup3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
-}
-
-bool_t
-xdr_access3args (XDR *xdrs, access3args *objp)
-{
- if (!xdr_nfs_fh3 (xdrs, &objp->object))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->access))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_access3resok (XDR *xdrs, access3resok *objp)
-{
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->access))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_access3resfail (XDR *xdrs, access3resfail *objp)
-{
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_access3res (XDR *xdrs, access3res *objp)
-{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_access3resok (xdrs, &objp->access3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_access3resfail (xdrs, &objp->access3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
-}
-
-bool_t
-xdr_readlink3args (XDR *xdrs, readlink3args *objp)
-{
- if (!xdr_nfs_fh3 (xdrs, &objp->symlink))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_readlink3resok (XDR *xdrs, readlink3resok *objp)
-{
- if (!xdr_post_op_attr (xdrs, &objp->symlink_attributes))
- return FALSE;
- if (!xdr_nfspath3 (xdrs, &objp->data))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_readlink3resfail (XDR *xdrs, readlink3resfail *objp)
-{
- if (!xdr_post_op_attr (xdrs, &objp->symlink_attributes))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_readlink3res (XDR *xdrs, readlink3res *objp)
-{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_readlink3resok (xdrs, &objp->readlink3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_readlink3resfail (xdrs, &objp->readlink3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
-}
-
-bool_t
-xdr_read3args (XDR *xdrs, read3args *objp)
-{
- if (!xdr_nfs_fh3 (xdrs, &objp->file))
- return FALSE;
- if (!xdr_offset3 (xdrs, &objp->offset))
- return FALSE;
- if (!xdr_count3 (xdrs, &objp->count))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_read3resok_nocopy (XDR *xdrs, read3resok *objp)
-{
- if (!xdr_post_op_attr (xdrs, &objp->file_attributes))
- return FALSE;
- if (!xdr_count3 (xdrs, &objp->count))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->eof))
- return FALSE;
- if (!xdr_u_int (xdrs, (u_int *) &objp->data.data_len))
- return FALSE;
- return TRUE;
-}
-
-
-bool_t
-xdr_read3resok (XDR *xdrs, read3resok *objp)
-{
- if (!xdr_post_op_attr (xdrs, &objp->file_attributes))
- return FALSE;
- if (!xdr_count3 (xdrs, &objp->count))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->eof))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->data.data_val, (u_int *) &objp->data.data_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_read3resfail (XDR *xdrs, read3resfail *objp)
-{
- if (!xdr_post_op_attr (xdrs, &objp->file_attributes))
- return FALSE;
- return TRUE;
-}
-
-
-bool_t
-xdr_read3res_nocopy (XDR *xdrs, read3res *objp)
-{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_read3resok_nocopy (xdrs, &objp->read3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_read3resfail (xdrs, &objp->read3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
-}
-
-
-bool_t
-xdr_read3res (XDR *xdrs, read3res *objp)
-{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_read3resok (xdrs, &objp->read3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_read3resfail (xdrs, &objp->read3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
-}
-
-bool_t
-xdr_stable_how (XDR *xdrs, stable_how *objp)
-{
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_write3args (XDR *xdrs, write3args *objp)
-{
- if (!xdr_nfs_fh3 (xdrs, &objp->file))
- return FALSE;
- if (!xdr_offset3 (xdrs, &objp->offset))
- return FALSE;
- if (!xdr_count3 (xdrs, &objp->count))
- return FALSE;
- if (!xdr_stable_how (xdrs, &objp->stable))
- return FALSE;
-
- /* Added specifically to avoid copies from the xdr buffer into
- * the write3args structure, which will also require an already
- * allocated buffer. That is not optimal.
- */
- if (!xdr_u_int (xdrs, (u_int *) &objp->data.data_len))
- return FALSE;
-
- /* The remaining bytes in the xdr buffer are the bytes that need to be
- * written. See how these bytes are extracted in the xdr_to_write3args
- * code path. Be careful, while using the write3args structure, since
- * only the data.data_len has been filled. The actual data is
- * extracted in xdr_to_write3args path.
- */
-
- /* if (!xdr_bytes (xdrs, (char **)&objp->data.data_val, (u_int *) &objp->data.data_len, ~0))
- return FALSE;
- */
- return TRUE;
-}
-
-bool_t
-xdr_write3resok (XDR *xdrs, write3resok *objp)
-{
- if (!xdr_wcc_data (xdrs, &objp->file_wcc))
- return FALSE;
- if (!xdr_count3 (xdrs, &objp->count))
- return FALSE;
- if (!xdr_stable_how (xdrs, &objp->committed))
- return FALSE;
- if (!xdr_writeverf3 (xdrs, objp->verf))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_write3resfail (XDR *xdrs, write3resfail *objp)
-{
- if (!xdr_wcc_data (xdrs, &objp->file_wcc))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_write3res (XDR *xdrs, write3res *objp)
-{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_write3resok (xdrs, &objp->write3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_write3resfail (xdrs, &objp->write3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
-}
-
-bool_t
-xdr_createmode3 (XDR *xdrs, createmode3 *objp)
-{
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_createhow3 (XDR *xdrs, createhow3 *objp)
-{
- if (!xdr_createmode3 (xdrs, &objp->mode))
- return FALSE;
- switch (objp->mode) {
- case UNCHECKED:
- case GUARDED:
- if (!xdr_sattr3 (xdrs, &objp->createhow3_u.obj_attributes))
- return FALSE;
- break;
- case EXCLUSIVE:
- if (!xdr_createverf3 (xdrs, objp->createhow3_u.verf))
- return FALSE;
- break;
- default:
- return FALSE;
- }
- return TRUE;
-}
-
-bool_t
-xdr_create3args (XDR *xdrs, create3args *objp)
-{
- if (!xdr_diropargs3 (xdrs, &objp->where))
- return FALSE;
- if (!xdr_createhow3 (xdrs, &objp->how))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_create3resok (XDR *xdrs, create3resok *objp)
-{
- if (!xdr_post_op_fh3 (xdrs, &objp->obj))
- return FALSE;
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_create3resfail (XDR *xdrs, create3resfail *objp)
-{
- if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_create3res (XDR *xdrs, create3res *objp)
-{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_create3resok (xdrs, &objp->create3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_create3resfail (xdrs, &objp->create3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
-}
-
-bool_t
-xdr_mkdir3args (XDR *xdrs, mkdir3args *objp)
-{
- if (!xdr_diropargs3 (xdrs, &objp->where))
- return FALSE;
- if (!xdr_sattr3 (xdrs, &objp->attributes))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_mkdir3resok (XDR *xdrs, mkdir3resok *objp)
-{
- if (!xdr_post_op_fh3 (xdrs, &objp->obj))
- return FALSE;
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_mkdir3resfail (XDR *xdrs, mkdir3resfail *objp)
-{
- if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_mkdir3res (XDR *xdrs, mkdir3res *objp)
-{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_mkdir3resok (xdrs, &objp->mkdir3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_mkdir3resfail (xdrs, &objp->mkdir3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
-}
-
-bool_t
-xdr_symlinkdata3 (XDR *xdrs, symlinkdata3 *objp)
-{
- if (!xdr_sattr3 (xdrs, &objp->symlink_attributes))
- return FALSE;
- if (!xdr_nfspath3 (xdrs, &objp->symlink_data))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_symlink3args (XDR *xdrs, symlink3args *objp)
-{
- if (!xdr_diropargs3 (xdrs, &objp->where))
- return FALSE;
- if (!xdr_symlinkdata3 (xdrs, &objp->symlink))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_symlink3resok (XDR *xdrs, symlink3resok *objp)
-{
- if (!xdr_post_op_fh3 (xdrs, &objp->obj))
- return FALSE;
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_symlink3resfail (XDR *xdrs, symlink3resfail *objp)
-{
- if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_symlink3res (XDR *xdrs, symlink3res *objp)
-{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_symlink3resok (xdrs, &objp->symlink3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_symlink3resfail (xdrs, &objp->symlink3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
-}
-
-bool_t
-xdr_devicedata3 (XDR *xdrs, devicedata3 *objp)
-{
- if (!xdr_sattr3 (xdrs, &objp->dev_attributes))
- return FALSE;
- if (!xdr_specdata3 (xdrs, &objp->spec))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_mknoddata3 (XDR *xdrs, mknoddata3 *objp)
-{
- if (!xdr_ftype3 (xdrs, &objp->type))
- return FALSE;
- switch (objp->type) {
- case NF3CHR:
- case NF3BLK:
- if (!xdr_devicedata3 (xdrs, &objp->mknoddata3_u.device))
- return FALSE;
- break;
- case NF3SOCK:
- case NF3FIFO:
- if (!xdr_sattr3 (xdrs, &objp->mknoddata3_u.pipe_attributes))
- return FALSE;
- break;
- default:
- break;
- }
- return TRUE;
-}
-
-bool_t
-xdr_mknod3args (XDR *xdrs, mknod3args *objp)
-{
- if (!xdr_diropargs3 (xdrs, &objp->where))
- return FALSE;
- if (!xdr_mknoddata3 (xdrs, &objp->what))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_mknod3resok (XDR *xdrs, mknod3resok *objp)
-{
- if (!xdr_post_op_fh3 (xdrs, &objp->obj))
- return FALSE;
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_mknod3resfail (XDR *xdrs, mknod3resfail *objp)
-{
- if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_mknod3res (XDR *xdrs, mknod3res *objp)
-{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_mknod3resok (xdrs, &objp->mknod3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_mknod3resfail (xdrs, &objp->mknod3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
-}
-
-bool_t
-xdr_remove3args (XDR *xdrs, remove3args *objp)
-{
- if (!xdr_diropargs3 (xdrs, &objp->object))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_remove3resok (XDR *xdrs, remove3resok *objp)
-{
- if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_remove3resfail (XDR *xdrs, remove3resfail *objp)
-{
- if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_remove3res (XDR *xdrs, remove3res *objp)
-{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_remove3resok (xdrs, &objp->remove3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_remove3resfail (xdrs, &objp->remove3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
-}
-
-bool_t
-xdr_rmdir3args (XDR *xdrs, rmdir3args *objp)
-{
- if (!xdr_diropargs3 (xdrs, &objp->object))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_rmdir3resok (XDR *xdrs, rmdir3resok *objp)
-{
- if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_rmdir3resfail (XDR *xdrs, rmdir3resfail *objp)
-{
- if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_rmdir3res (XDR *xdrs, rmdir3res *objp)
-{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_rmdir3resok (xdrs, &objp->rmdir3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_rmdir3resfail (xdrs, &objp->rmdir3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
-}
-
-bool_t
-xdr_rename3args (XDR *xdrs, rename3args *objp)
-{
- if (!xdr_diropargs3 (xdrs, &objp->from))
- return FALSE;
- if (!xdr_diropargs3 (xdrs, &objp->to))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_rename3resok (XDR *xdrs, rename3resok *objp)
-{
- if (!xdr_wcc_data (xdrs, &objp->fromdir_wcc))
- return FALSE;
- if (!xdr_wcc_data (xdrs, &objp->todir_wcc))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_rename3resfail (XDR *xdrs, rename3resfail *objp)
-{
- if (!xdr_wcc_data (xdrs, &objp->fromdir_wcc))
- return FALSE;
- if (!xdr_wcc_data (xdrs, &objp->todir_wcc))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_rename3res (XDR *xdrs, rename3res *objp)
-{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_rename3resok (xdrs, &objp->rename3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_rename3resfail (xdrs, &objp->rename3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
-}
-
-bool_t
-xdr_link3args (XDR *xdrs, link3args *objp)
-{
- if (!xdr_nfs_fh3 (xdrs, &objp->file))
- return FALSE;
- if (!xdr_diropargs3 (xdrs, &objp->link))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_link3resok (XDR *xdrs, link3resok *objp)
-{
- if (!xdr_post_op_attr (xdrs, &objp->file_attributes))
- return FALSE;
- if (!xdr_wcc_data (xdrs, &objp->linkdir_wcc))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_link3resfail (XDR *xdrs, link3resfail *objp)
-{
- if (!xdr_post_op_attr (xdrs, &objp->file_attributes))
- return FALSE;
- if (!xdr_wcc_data (xdrs, &objp->linkdir_wcc))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_link3res (XDR *xdrs, link3res *objp)
-{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_link3resok (xdrs, &objp->link3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_link3resfail (xdrs, &objp->link3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
-}
-
-bool_t
-xdr_readdir3args (XDR *xdrs, readdir3args *objp)
-{
- if (!xdr_nfs_fh3 (xdrs, &objp->dir))
- return FALSE;
- if (!xdr_cookie3 (xdrs, &objp->cookie))
- return FALSE;
- if (!xdr_cookieverf3 (xdrs, objp->cookieverf))
- return FALSE;
- if (!xdr_count3 (xdrs, &objp->count))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_entry3 (XDR *xdrs, entry3 *objp)
-{
- if (!xdr_fileid3 (xdrs, &objp->fileid))
- return FALSE;
- if (!xdr_filename3 (xdrs, &objp->name))
- return FALSE;
- if (!xdr_cookie3 (xdrs, &objp->cookie))
- return FALSE;
- if (!xdr_pointer (xdrs, (char **)&objp->nextentry, sizeof (entry3), (xdrproc_t) xdr_entry3))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_dirlist3 (XDR *xdrs, dirlist3 *objp)
-{
- if (!xdr_pointer (xdrs, (char **)&objp->entries, sizeof (entry3), (xdrproc_t) xdr_entry3))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->eof))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_readdir3resok (XDR *xdrs, readdir3resok *objp)
-{
- if (!xdr_post_op_attr (xdrs, &objp->dir_attributes))
- return FALSE;
- if (!xdr_cookieverf3 (xdrs, objp->cookieverf))
- return FALSE;
- if (!xdr_dirlist3 (xdrs, &objp->reply))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_readdir3resfail (XDR *xdrs, readdir3resfail *objp)
-{
- if (!xdr_post_op_attr (xdrs, &objp->dir_attributes))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_readdir3res (XDR *xdrs, readdir3res *objp)
-{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_readdir3resok (xdrs, &objp->readdir3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_readdir3resfail (xdrs, &objp->readdir3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
-}
-
-bool_t
-xdr_readdirp3args (XDR *xdrs, readdirp3args *objp)
-{
- if (!xdr_nfs_fh3 (xdrs, &objp->dir))
- return FALSE;
- if (!xdr_cookie3 (xdrs, &objp->cookie))
- return FALSE;
- if (!xdr_cookieverf3 (xdrs, objp->cookieverf))
- return FALSE;
- if (!xdr_count3 (xdrs, &objp->dircount))
- return FALSE;
- if (!xdr_count3 (xdrs, &objp->maxcount))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_entryp3 (XDR *xdrs, entryp3 *objp)
-{
- if (!xdr_fileid3 (xdrs, &objp->fileid))
- return FALSE;
- if (!xdr_filename3 (xdrs, &objp->name))
- return FALSE;
- if (!xdr_cookie3 (xdrs, &objp->cookie))
- return FALSE;
- if (!xdr_post_op_attr (xdrs, &objp->name_attributes))
- return FALSE;
- if (!xdr_post_op_fh3 (xdrs, &objp->name_handle))
- return FALSE;
- if (!xdr_pointer (xdrs, (char **)&objp->nextentry, sizeof (entryp3), (xdrproc_t) xdr_entryp3))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_dirlistp3 (XDR *xdrs, dirlistp3 *objp)
-{
- if (!xdr_pointer (xdrs, (char **)&objp->entries, sizeof (entryp3), (xdrproc_t) xdr_entryp3))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->eof))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_readdirp3resok (XDR *xdrs, readdirp3resok *objp)
-{
- if (!xdr_post_op_attr (xdrs, &objp->dir_attributes))
- return FALSE;
- if (!xdr_cookieverf3 (xdrs, objp->cookieverf))
- return FALSE;
- if (!xdr_dirlistp3 (xdrs, &objp->reply))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_readdirp3resfail (XDR *xdrs, readdirp3resfail *objp)
-{
- if (!xdr_post_op_attr (xdrs, &objp->dir_attributes))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_readdirp3res (XDR *xdrs, readdirp3res *objp)
-{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_readdirp3resok (xdrs, &objp->readdirp3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_readdirp3resfail (xdrs, &objp->readdirp3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
-}
-
-bool_t
-xdr_fsstat3args (XDR *xdrs, fsstat3args *objp)
-{
- if (!xdr_nfs_fh3 (xdrs, &objp->fsroot))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_fsstat3resok (XDR *xdrs, fsstat3resok *objp)
-{
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- if (!xdr_size3 (xdrs, &objp->tbytes))
- return FALSE;
- if (!xdr_size3 (xdrs, &objp->fbytes))
- return FALSE;
- if (!xdr_size3 (xdrs, &objp->abytes))
- return FALSE;
- if (!xdr_size3 (xdrs, &objp->tfiles))
- return FALSE;
- if (!xdr_size3 (xdrs, &objp->ffiles))
- return FALSE;
- if (!xdr_size3 (xdrs, &objp->afiles))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->invarsec))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_fsstat3resfail (XDR *xdrs, fsstat3resfail *objp)
-{
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_fsstat3res (XDR *xdrs, fsstat3res *objp)
-{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_fsstat3resok (xdrs, &objp->fsstat3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_fsstat3resfail (xdrs, &objp->fsstat3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
-}
-
-bool_t
-xdr_fsinfo3args (XDR *xdrs, fsinfo3args *objp)
-{
- if (!xdr_nfs_fh3 (xdrs, &objp->fsroot))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_fsinfo3resok (XDR *xdrs, fsinfo3resok *objp)
-{
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->rtmax))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->rtpref))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->rtmult))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->wtmax))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->wtpref))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->wtmult))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->dtpref))
- return FALSE;
- if (!xdr_size3 (xdrs, &objp->maxfilesize))
- return FALSE;
- if (!xdr_nfstime3 (xdrs, &objp->time_delta))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->properties))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_fsinfo3resfail (XDR *xdrs, fsinfo3resfail *objp)
-{
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_fsinfo3res (XDR *xdrs, fsinfo3res *objp)
-{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_fsinfo3resok (xdrs, &objp->fsinfo3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_fsinfo3resfail (xdrs, &objp->fsinfo3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
-}
-
-bool_t
-xdr_pathconf3args (XDR *xdrs, pathconf3args *objp)
-{
- if (!xdr_nfs_fh3 (xdrs, &objp->object))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_pathconf3resok (XDR *xdrs, pathconf3resok *objp)
-{
- register int32_t *buf;
-
-
- if (xdrs->x_op == XDR_ENCODE) {
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->linkmax))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->name_max))
- return FALSE;
- buf = XDR_INLINE (xdrs, 4 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_bool (xdrs, &objp->no_trunc))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->chown_restricted))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->case_insensitive))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->case_preserving))
- return FALSE;
- } else {
- IXDR_PUT_BOOL(buf, objp->no_trunc);
- IXDR_PUT_BOOL(buf, objp->chown_restricted);
- IXDR_PUT_BOOL(buf, objp->case_insensitive);
- IXDR_PUT_BOOL(buf, objp->case_preserving);
- }
- return TRUE;
- } else if (xdrs->x_op == XDR_DECODE) {
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->linkmax))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->name_max))
- return FALSE;
- buf = XDR_INLINE (xdrs, 4 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_bool (xdrs, &objp->no_trunc))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->chown_restricted))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->case_insensitive))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->case_preserving))
- return FALSE;
- } else {
- objp->no_trunc = IXDR_GET_BOOL(buf);
- objp->chown_restricted = IXDR_GET_BOOL(buf);
- objp->case_insensitive = IXDR_GET_BOOL(buf);
- objp->case_preserving = IXDR_GET_BOOL(buf);
- }
- return TRUE;
- }
-
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->linkmax))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->name_max))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->no_trunc))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->chown_restricted))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->case_insensitive))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->case_preserving))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_pathconf3resfail (XDR *xdrs, pathconf3resfail *objp)
-{
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_pathconf3res (XDR *xdrs, pathconf3res *objp)
-{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_pathconf3resok (xdrs, &objp->pathconf3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_pathconf3resfail (xdrs, &objp->pathconf3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
-}
-
-bool_t
-xdr_commit3args (XDR *xdrs, commit3args *objp)
-{
- if (!xdr_nfs_fh3 (xdrs, &objp->file))
- return FALSE;
- if (!xdr_offset3 (xdrs, &objp->offset))
- return FALSE;
- if (!xdr_count3 (xdrs, &objp->count))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_commit3resok (XDR *xdrs, commit3resok *objp)
-{
- if (!xdr_wcc_data (xdrs, &objp->file_wcc))
- return FALSE;
- if (!xdr_writeverf3 (xdrs, objp->verf))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_commit3resfail (XDR *xdrs, commit3resfail *objp)
-{
- if (!xdr_wcc_data (xdrs, &objp->file_wcc))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_commit3res (XDR *xdrs, commit3res *objp)
-{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_commit3resok (xdrs, &objp->commit3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_commit3resfail (xdrs, &objp->commit3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
-}
-
-bool_t
-xdr_fhandle3 (XDR *xdrs, fhandle3 *objp)
-{
- if (!xdr_bytes (xdrs, (char **)&objp->fhandle3_val, (u_int *) &objp->fhandle3_len, FHSIZE3))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_dirpath (XDR *xdrs, dirpath *objp)
-{
- if (!xdr_string (xdrs, objp, MNTPATHLEN))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_name (XDR *xdrs, name *objp)
-{
- if (!xdr_string (xdrs, objp, MNTNAMLEN))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_mountstat3 (XDR *xdrs, mountstat3 *objp)
-{
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_mountres3_ok (XDR *xdrs, mountres3_ok *objp)
-{
- if (!xdr_fhandle3 (xdrs, &objp->fhandle))
- return FALSE;
- if (!xdr_array (xdrs, (char **)&objp->auth_flavors.auth_flavors_val, (u_int *) &objp->auth_flavors.auth_flavors_len, ~0,
- sizeof (int), (xdrproc_t) xdr_int))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_mountres3 (XDR *xdrs, mountres3 *objp)
-{
- if (!xdr_mountstat3 (xdrs, &objp->fhs_status))
- return FALSE;
- switch (objp->fhs_status) {
- case MNT3_OK:
- if (!xdr_mountres3_ok (xdrs, &objp->mountres3_u.mountinfo))
- return FALSE;
- break;
- default:
- break;
- }
- return TRUE;
-}
-
-bool_t
-xdr_mountlist (XDR *xdrs, mountlist *objp)
-{
- if (!xdr_pointer (xdrs, (char **)objp, sizeof (struct mountbody), (xdrproc_t) xdr_mountbody))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_mountbody (XDR *xdrs, mountbody *objp)
-{
- if (!xdr_name (xdrs, &objp->ml_hostname))
- return FALSE;
- if (!xdr_dirpath (xdrs, &objp->ml_directory))
- return FALSE;
- if (!xdr_mountlist (xdrs, &objp->ml_next))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_groups (XDR *xdrs, groups *objp)
-{
- if (!xdr_pointer (xdrs, (char **)objp, sizeof (struct groupnode), (xdrproc_t) xdr_groupnode))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_groupnode (XDR *xdrs, groupnode *objp)
-{
- if (!xdr_name (xdrs, &objp->gr_name))
- return FALSE;
- if (!xdr_groups (xdrs, &objp->gr_next))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_exports (XDR *xdrs, exports *objp)
-{
- if (!xdr_pointer (xdrs, (char **)objp, sizeof (struct exportnode), (xdrproc_t) xdr_exportnode))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_exportnode (XDR *xdrs, exportnode *objp)
-{
- if (!xdr_dirpath (xdrs, &objp->ex_dir))
- return FALSE;
- if (!xdr_groups (xdrs, &objp->ex_groups))
- return FALSE;
- if (!xdr_exports (xdrs, &objp->ex_next))
- return FALSE;
- return TRUE;
-}
-
-void
-xdr_free_exports_list (struct exportnode *first)
-{
- struct exportnode *elist = NULL;
-
- if (!first)
- return;
-
- while (first) {
- elist = first->ex_next;
- if (first->ex_dir)
- GF_FREE (first->ex_dir);
-
- if (first->ex_groups) {
- if (first->ex_groups->gr_name)
- GF_FREE (first->ex_groups->gr_name);
- GF_FREE (first->ex_groups);
- }
-
- GF_FREE (first);
- first = elist;
- }
-
-}
-
-
-void
-xdr_free_mountlist (mountlist ml)
-{
- struct mountbody *next = NULL;
-
- if (!ml)
- return;
-
- while (ml) {
- GF_FREE (ml->ml_hostname);
- GF_FREE (ml->ml_directory);
- next = ml->ml_next;
- GF_FREE (ml);
- ml = next;
- }
-
- return;
-}
-
-
-/* Free statements are based on the way sunrpc xdr decoding
- * code performs memory allocations.
- */
-void
-xdr_free_write3args_nocopy (write3args *wa)
-{
- if (!wa)
- return;
-
- GF_FREE (wa->file.data.data_val);
- GF_FREE (wa);
-}
-
-
diff --git a/xlators/nfs/lib/src/xdr-nfs3.h b/xlators/nfs/lib/src/xdr-nfs3.h
deleted file mode 100644
index fe4046584ff..00000000000
--- a/xlators/nfs/lib/src/xdr-nfs3.h
+++ /dev/null
@@ -1,1205 +0,0 @@
-/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _XDR_NFS3_H
-#define _XDR_NFS3_H
-
-#include <rpc/rpc.h>
-
-#define NFS3_FHSIZE 64
-#define NFS3_COOKIEVERFSIZE 8
-#define NFS3_CREATEVERFSIZE 8
-#define NFS3_WRITEVERFSIZE 8
-
-#define NFS3_ENTRY3_FIXED_SIZE 24
-#define NFS3_POSTOPATTR_SIZE 88
-#define NFS3_READDIR_RESOK_SIZE (NFS3_POSTOPATTR_SIZE + sizeof (bool_t) + NFS3_COOKIEVERFSIZE)
-
-/* In size of post_op_fh3, the length of the file handle will have to be
- * included separately since we have variable length fh. Here we only account
- * for the field for handle_follows and for the file handle length field.
- */
-#define NFS3_POSTOPFH3_FIXED_SIZE (sizeof (bool_t) + sizeof (uint32_t))
-
-/* Similarly, the size of the entry will have to include the variable length
- * file handle and the length of the entry name.
- */
-#define NFS3_ENTRYP3_FIXED_SIZE (NFS3_ENTRY3_FIXED_SIZE + NFS3_POSTOPATTR_SIZE + NFS3_POSTOPFH3_FIXED_SIZE)
-
-typedef u_quad_t uint64;
-typedef quad_t int64;
-typedef uint32_t uint32;
-typedef int32_t int32;
-typedef char *filename3;
-typedef char *nfspath3;
-typedef uint64 fileid3;
-typedef uint64 cookie3;
-typedef char cookieverf3[NFS3_COOKIEVERFSIZE];
-typedef char createverf3[NFS3_CREATEVERFSIZE];
-typedef char writeverf3[NFS3_WRITEVERFSIZE];
-typedef uint32 uid3;
-typedef uint32 gid3;
-typedef uint64 size3;
-typedef uint64 offset3;
-typedef uint32 mode3;
-typedef uint32 count3;
-
-#define NFS3MODE_SETXUID 0x00800
-#define NFS3MODE_SETXGID 0x00400
-#define NFS3MODE_SAVESWAPTXT 0x00200
-#define NFS3MODE_ROWNER 0x00100
-#define NFS3MODE_WOWNER 0x00080
-#define NFS3MODE_XOWNER 0x00040
-#define NFS3MODE_RGROUP 0x00020
-#define NFS3MODE_WGROUP 0x00010
-#define NFS3MODE_XGROUP 0x00008
-#define NFS3MODE_ROTHER 0x00004
-#define NFS3MODE_WOTHER 0x00002
-#define NFS3MODE_XOTHER 0x00001
-
-enum nfsstat3 {
- NFS3_OK = 0,
- NFS3ERR_PERM = 1,
- NFS3ERR_NOENT = 2,
- NFS3ERR_IO = 5,
- NFS3ERR_NXIO = 6,
- NFS3ERR_ACCES = 13,
- NFS3ERR_EXIST = 17,
- NFS3ERR_XDEV = 18,
- NFS3ERR_NODEV = 19,
- NFS3ERR_NOTDIR = 20,
- NFS3ERR_ISDIR = 21,
- NFS3ERR_INVAL = 22,
- NFS3ERR_FBIG = 27,
- NFS3ERR_NOSPC = 28,
- NFS3ERR_ROFS = 30,
- NFS3ERR_MLINK = 31,
- NFS3ERR_NAMETOOLONG = 63,
- NFS3ERR_NOTEMPTY = 66,
- NFS3ERR_DQUOT = 69,
- NFS3ERR_STALE = 70,
- NFS3ERR_REMOTE = 71,
- NFS3ERR_BADHANDLE = 10001,
- NFS3ERR_NOT_SYNC = 10002,
- NFS3ERR_BAD_COOKIE = 10003,
- NFS3ERR_NOTSUPP = 10004,
- NFS3ERR_TOOSMALL = 10005,
- NFS3ERR_SERVERFAULT = 10006,
- NFS3ERR_BADTYPE = 10007,
- NFS3ERR_JUKEBOX = 10008,
-};
-typedef enum nfsstat3 nfsstat3;
-
-enum ftype3 {
- NF3REG = 1,
- NF3DIR = 2,
- NF3BLK = 3,
- NF3CHR = 4,
- NF3LNK = 5,
- NF3SOCK = 6,
- NF3FIFO = 7,
-};
-typedef enum ftype3 ftype3;
-
-struct specdata3 {
- uint32 specdata1;
- uint32 specdata2;
-};
-typedef struct specdata3 specdata3;
-
-struct nfs_fh3 {
- struct {
- u_int data_len;
- char *data_val;
- } data;
-};
-typedef struct nfs_fh3 nfs_fh3;
-
-struct nfstime3 {
- uint32 seconds;
- uint32 nseconds;
-};
-typedef struct nfstime3 nfstime3;
-
-struct fattr3 {
- ftype3 type;
- mode3 mode;
- uint32 nlink;
- uid3 uid;
- gid3 gid;
- size3 size;
- size3 used;
- specdata3 rdev;
- uint64 fsid;
- fileid3 fileid;
- nfstime3 atime;
- nfstime3 mtime;
- nfstime3 ctime;
-};
-typedef struct fattr3 fattr3;
-
-struct post_op_attr {
- bool_t attributes_follow;
- union {
- fattr3 attributes;
- } post_op_attr_u;
-};
-typedef struct post_op_attr post_op_attr;
-
-struct wcc_attr {
- size3 size;
- nfstime3 mtime;
- nfstime3 ctime;
-};
-typedef struct wcc_attr wcc_attr;
-
-struct pre_op_attr {
- bool_t attributes_follow;
- union {
- wcc_attr attributes;
- } pre_op_attr_u;
-};
-typedef struct pre_op_attr pre_op_attr;
-
-struct wcc_data {
- pre_op_attr before;
- post_op_attr after;
-};
-typedef struct wcc_data wcc_data;
-
-struct post_op_fh3 {
- bool_t handle_follows;
- union {
- nfs_fh3 handle;
- } post_op_fh3_u;
-};
-typedef struct post_op_fh3 post_op_fh3;
-
-enum time_how {
- DONT_CHANGE = 0,
- SET_TO_SERVER_TIME = 1,
- SET_TO_CLIENT_TIME = 2,
-};
-typedef enum time_how time_how;
-
-struct set_mode3 {
- bool_t set_it;
- union {
- mode3 mode;
- } set_mode3_u;
-};
-typedef struct set_mode3 set_mode3;
-
-struct set_uid3 {
- bool_t set_it;
- union {
- uid3 uid;
- } set_uid3_u;
-};
-typedef struct set_uid3 set_uid3;
-
-struct set_gid3 {
- bool_t set_it;
- union {
- gid3 gid;
- } set_gid3_u;
-};
-typedef struct set_gid3 set_gid3;
-
-struct set_size3 {
- bool_t set_it;
- union {
- size3 size;
- } set_size3_u;
-};
-typedef struct set_size3 set_size3;
-
-struct set_atime {
- time_how set_it;
- union {
- nfstime3 atime;
- } set_atime_u;
-};
-typedef struct set_atime set_atime;
-
-struct set_mtime {
- time_how set_it;
- union {
- nfstime3 mtime;
- } set_mtime_u;
-};
-typedef struct set_mtime set_mtime;
-
-struct sattr3 {
- set_mode3 mode;
- set_uid3 uid;
- set_gid3 gid;
- set_size3 size;
- set_atime atime;
- set_mtime mtime;
-};
-typedef struct sattr3 sattr3;
-
-struct diropargs3 {
- nfs_fh3 dir;
- filename3 name;
-};
-typedef struct diropargs3 diropargs3;
-
-struct getattr3args {
- nfs_fh3 object;
-};
-typedef struct getattr3args getattr3args;
-
-struct getattr3resok {
- fattr3 obj_attributes;
-};
-typedef struct getattr3resok getattr3resok;
-
-struct getattr3res {
- nfsstat3 status;
- union {
- getattr3resok resok;
- } getattr3res_u;
-};
-typedef struct getattr3res getattr3res;
-
-struct sattrguard3 {
- bool_t check;
- union {
- nfstime3 obj_ctime;
- } sattrguard3_u;
-};
-typedef struct sattrguard3 sattrguard3;
-
-struct setattr3args {
- nfs_fh3 object;
- sattr3 new_attributes;
- sattrguard3 guard;
-};
-typedef struct setattr3args setattr3args;
-
-struct setattr3resok {
- wcc_data obj_wcc;
-};
-typedef struct setattr3resok setattr3resok;
-
-struct setattr3resfail {
- wcc_data obj_wcc;
-};
-typedef struct setattr3resfail setattr3resfail;
-
-struct setattr3res {
- nfsstat3 status;
- union {
- setattr3resok resok;
- setattr3resfail resfail;
- } setattr3res_u;
-};
-typedef struct setattr3res setattr3res;
-
-struct lookup3args {
- diropargs3 what;
-};
-typedef struct lookup3args lookup3args;
-
-struct lookup3resok {
- nfs_fh3 object;
- post_op_attr obj_attributes;
- post_op_attr dir_attributes;
-};
-typedef struct lookup3resok lookup3resok;
-
-struct lookup3resfail {
- post_op_attr dir_attributes;
-};
-typedef struct lookup3resfail lookup3resfail;
-
-struct lookup3res {
- nfsstat3 status;
- union {
- lookup3resok resok;
- lookup3resfail resfail;
- } lookup3res_u;
-};
-typedef struct lookup3res lookup3res;
-#define ACCESS3_READ 0x0001
-#define ACCESS3_LOOKUP 0x0002
-#define ACCESS3_MODIFY 0x0004
-#define ACCESS3_EXTEND 0x0008
-#define ACCESS3_DELETE 0x0010
-#define ACCESS3_EXECUTE 0x0020
-
-struct access3args {
- nfs_fh3 object;
- uint32 access;
-};
-typedef struct access3args access3args;
-
-struct access3resok {
- post_op_attr obj_attributes;
- uint32 access;
-};
-typedef struct access3resok access3resok;
-
-struct access3resfail {
- post_op_attr obj_attributes;
-};
-typedef struct access3resfail access3resfail;
-
-struct access3res {
- nfsstat3 status;
- union {
- access3resok resok;
- access3resfail resfail;
- } access3res_u;
-};
-typedef struct access3res access3res;
-
-struct readlink3args {
- nfs_fh3 symlink;
-};
-typedef struct readlink3args readlink3args;
-
-struct readlink3resok {
- post_op_attr symlink_attributes;
- nfspath3 data;
-};
-typedef struct readlink3resok readlink3resok;
-
-struct readlink3resfail {
- post_op_attr symlink_attributes;
-};
-typedef struct readlink3resfail readlink3resfail;
-
-struct readlink3res {
- nfsstat3 status;
- union {
- readlink3resok resok;
- readlink3resfail resfail;
- } readlink3res_u;
-};
-typedef struct readlink3res readlink3res;
-
-struct read3args {
- nfs_fh3 file;
- offset3 offset;
- count3 count;
-};
-typedef struct read3args read3args;
-
-struct read3resok {
- post_op_attr file_attributes;
- count3 count;
- bool_t eof;
- struct {
- u_int data_len;
- char *data_val;
- } data;
-};
-typedef struct read3resok read3resok;
-
-struct read3resfail {
- post_op_attr file_attributes;
-};
-typedef struct read3resfail read3resfail;
-
-struct read3res {
- nfsstat3 status;
- union {
- read3resok resok;
- read3resfail resfail;
- } read3res_u;
-};
-typedef struct read3res read3res;
-
-enum stable_how {
- UNSTABLE = 0,
- DATA_SYNC = 1,
- FILE_SYNC = 2,
-};
-typedef enum stable_how stable_how;
-
-struct write3args {
- nfs_fh3 file;
- offset3 offset;
- count3 count;
- stable_how stable;
- struct {
- u_int data_len;
- char *data_val;
- } data;
-};
-typedef struct write3args write3args;
-
-/* Generally, the protocol allows the file handle to be less than 64 bytes but
- * our server does not return file handles less than 64b so we can safely say
- * sizeof (nfs_fh3) rather than first trying to extract the fh size of the
- * network followed by a sized-read of the file handle.
- */
-#define NFS3_WRITE3ARGS_SIZE (sizeof (uint32_t) + NFS3_FHSIZE + sizeof (offset3) + sizeof (count3) + sizeof (uint32_t))
-struct write3resok {
- wcc_data file_wcc;
- count3 count;
- stable_how committed;
- writeverf3 verf;
-};
-typedef struct write3resok write3resok;
-
-struct write3resfail {
- wcc_data file_wcc;
-};
-typedef struct write3resfail write3resfail;
-
-struct write3res {
- nfsstat3 status;
- union {
- write3resok resok;
- write3resfail resfail;
- } write3res_u;
-};
-typedef struct write3res write3res;
-
-enum createmode3 {
- UNCHECKED = 0,
- GUARDED = 1,
- EXCLUSIVE = 2,
-};
-typedef enum createmode3 createmode3;
-
-struct createhow3 {
- createmode3 mode;
- union {
- sattr3 obj_attributes;
- createverf3 verf;
- } createhow3_u;
-};
-typedef struct createhow3 createhow3;
-
-struct create3args {
- diropargs3 where;
- createhow3 how;
-};
-typedef struct create3args create3args;
-
-struct create3resok {
- post_op_fh3 obj;
- post_op_attr obj_attributes;
- wcc_data dir_wcc;
-};
-typedef struct create3resok create3resok;
-
-struct create3resfail {
- wcc_data dir_wcc;
-};
-typedef struct create3resfail create3resfail;
-
-struct create3res {
- nfsstat3 status;
- union {
- create3resok resok;
- create3resfail resfail;
- } create3res_u;
-};
-typedef struct create3res create3res;
-
-struct mkdir3args {
- diropargs3 where;
- sattr3 attributes;
-};
-typedef struct mkdir3args mkdir3args;
-
-struct mkdir3resok {
- post_op_fh3 obj;
- post_op_attr obj_attributes;
- wcc_data dir_wcc;
-};
-typedef struct mkdir3resok mkdir3resok;
-
-struct mkdir3resfail {
- wcc_data dir_wcc;
-};
-typedef struct mkdir3resfail mkdir3resfail;
-
-struct mkdir3res {
- nfsstat3 status;
- union {
- mkdir3resok resok;
- mkdir3resfail resfail;
- } mkdir3res_u;
-};
-typedef struct mkdir3res mkdir3res;
-
-struct symlinkdata3 {
- sattr3 symlink_attributes;
- nfspath3 symlink_data;
-};
-typedef struct symlinkdata3 symlinkdata3;
-
-struct symlink3args {
- diropargs3 where;
- symlinkdata3 symlink;
-};
-typedef struct symlink3args symlink3args;
-
-struct symlink3resok {
- post_op_fh3 obj;
- post_op_attr obj_attributes;
- wcc_data dir_wcc;
-};
-typedef struct symlink3resok symlink3resok;
-
-struct symlink3resfail {
- wcc_data dir_wcc;
-};
-typedef struct symlink3resfail symlink3resfail;
-
-struct symlink3res {
- nfsstat3 status;
- union {
- symlink3resok resok;
- symlink3resfail resfail;
- } symlink3res_u;
-};
-typedef struct symlink3res symlink3res;
-
-struct devicedata3 {
- sattr3 dev_attributes;
- specdata3 spec;
-};
-typedef struct devicedata3 devicedata3;
-
-struct mknoddata3 {
- ftype3 type;
- union {
- devicedata3 device;
- sattr3 pipe_attributes;
- } mknoddata3_u;
-};
-typedef struct mknoddata3 mknoddata3;
-
-struct mknod3args {
- diropargs3 where;
- mknoddata3 what;
-};
-typedef struct mknod3args mknod3args;
-
-struct mknod3resok {
- post_op_fh3 obj;
- post_op_attr obj_attributes;
- wcc_data dir_wcc;
-};
-typedef struct mknod3resok mknod3resok;
-
-struct mknod3resfail {
- wcc_data dir_wcc;
-};
-typedef struct mknod3resfail mknod3resfail;
-
-struct mknod3res {
- nfsstat3 status;
- union {
- mknod3resok resok;
- mknod3resfail resfail;
- } mknod3res_u;
-};
-typedef struct mknod3res mknod3res;
-
-struct remove3args {
- diropargs3 object;
-};
-typedef struct remove3args remove3args;
-
-struct remove3resok {
- wcc_data dir_wcc;
-};
-typedef struct remove3resok remove3resok;
-
-struct remove3resfail {
- wcc_data dir_wcc;
-};
-typedef struct remove3resfail remove3resfail;
-
-struct remove3res {
- nfsstat3 status;
- union {
- remove3resok resok;
- remove3resfail resfail;
- } remove3res_u;
-};
-typedef struct remove3res remove3res;
-
-struct rmdir3args {
- diropargs3 object;
-};
-typedef struct rmdir3args rmdir3args;
-
-struct rmdir3resok {
- wcc_data dir_wcc;
-};
-typedef struct rmdir3resok rmdir3resok;
-
-struct rmdir3resfail {
- wcc_data dir_wcc;
-};
-typedef struct rmdir3resfail rmdir3resfail;
-
-struct rmdir3res {
- nfsstat3 status;
- union {
- rmdir3resok resok;
- rmdir3resfail resfail;
- } rmdir3res_u;
-};
-typedef struct rmdir3res rmdir3res;
-
-struct rename3args {
- diropargs3 from;
- diropargs3 to;
-};
-typedef struct rename3args rename3args;
-
-struct rename3resok {
- wcc_data fromdir_wcc;
- wcc_data todir_wcc;
-};
-typedef struct rename3resok rename3resok;
-
-struct rename3resfail {
- wcc_data fromdir_wcc;
- wcc_data todir_wcc;
-};
-typedef struct rename3resfail rename3resfail;
-
-struct rename3res {
- nfsstat3 status;
- union {
- rename3resok resok;
- rename3resfail resfail;
- } rename3res_u;
-};
-typedef struct rename3res rename3res;
-
-struct link3args {
- nfs_fh3 file;
- diropargs3 link;
-};
-typedef struct link3args link3args;
-
-struct link3resok {
- post_op_attr file_attributes;
- wcc_data linkdir_wcc;
-};
-typedef struct link3resok link3resok;
-
-struct link3resfail {
- post_op_attr file_attributes;
- wcc_data linkdir_wcc;
-};
-typedef struct link3resfail link3resfail;
-
-struct link3res {
- nfsstat3 status;
- union {
- link3resok resok;
- link3resfail resfail;
- } link3res_u;
-};
-typedef struct link3res link3res;
-
-struct readdir3args {
- nfs_fh3 dir;
- cookie3 cookie;
- cookieverf3 cookieverf;
- count3 count;
-};
-typedef struct readdir3args readdir3args;
-
-struct entry3 {
- fileid3 fileid;
- filename3 name;
- cookie3 cookie;
- struct entry3 *nextentry;
-};
-typedef struct entry3 entry3;
-
-struct dirlist3 {
- entry3 *entries;
- bool_t eof;
-};
-typedef struct dirlist3 dirlist3;
-
-struct readdir3resok {
- post_op_attr dir_attributes;
- cookieverf3 cookieverf;
- dirlist3 reply;
-};
-typedef struct readdir3resok readdir3resok;
-
-struct readdir3resfail {
- post_op_attr dir_attributes;
-};
-typedef struct readdir3resfail readdir3resfail;
-
-struct readdir3res {
- nfsstat3 status;
- union {
- readdir3resok resok;
- readdir3resfail resfail;
- } readdir3res_u;
-};
-typedef struct readdir3res readdir3res;
-
-struct readdirp3args {
- nfs_fh3 dir;
- cookie3 cookie;
- cookieverf3 cookieverf;
- count3 dircount;
- count3 maxcount;
-};
-typedef struct readdirp3args readdirp3args;
-
-struct entryp3 {
- fileid3 fileid;
- filename3 name;
- cookie3 cookie;
- post_op_attr name_attributes;
- post_op_fh3 name_handle;
- struct entryp3 *nextentry;
-};
-typedef struct entryp3 entryp3;
-
-struct dirlistp3 {
- entryp3 *entries;
- bool_t eof;
-};
-typedef struct dirlistp3 dirlistp3;
-
-struct readdirp3resok {
- post_op_attr dir_attributes;
- cookieverf3 cookieverf;
- dirlistp3 reply;
-};
-typedef struct readdirp3resok readdirp3resok;
-
-struct readdirp3resfail {
- post_op_attr dir_attributes;
-};
-typedef struct readdirp3resfail readdirp3resfail;
-
-struct readdirp3res {
- nfsstat3 status;
- union {
- readdirp3resok resok;
- readdirp3resfail resfail;
- } readdirp3res_u;
-};
-typedef struct readdirp3res readdirp3res;
-
-struct fsstat3args {
- nfs_fh3 fsroot;
-};
-typedef struct fsstat3args fsstat3args;
-
-struct fsstat3resok {
- post_op_attr obj_attributes;
- size3 tbytes;
- size3 fbytes;
- size3 abytes;
- size3 tfiles;
- size3 ffiles;
- size3 afiles;
- uint32 invarsec;
-};
-typedef struct fsstat3resok fsstat3resok;
-
-struct fsstat3resfail {
- post_op_attr obj_attributes;
-};
-typedef struct fsstat3resfail fsstat3resfail;
-
-struct fsstat3res {
- nfsstat3 status;
- union {
- fsstat3resok resok;
- fsstat3resfail resfail;
- } fsstat3res_u;
-};
-typedef struct fsstat3res fsstat3res;
-#define FSF3_LINK 0x0001
-#define FSF3_SYMLINK 0x0002
-#define FSF3_HOMOGENEOUS 0x0008
-#define FSF3_CANSETTIME 0x0010
-
-struct fsinfo3args {
- nfs_fh3 fsroot;
-};
-typedef struct fsinfo3args fsinfo3args;
-
-struct fsinfo3resok {
- post_op_attr obj_attributes;
- uint32 rtmax;
- uint32 rtpref;
- uint32 rtmult;
- uint32 wtmax;
- uint32 wtpref;
- uint32 wtmult;
- uint32 dtpref;
- size3 maxfilesize;
- nfstime3 time_delta;
- uint32 properties;
-};
-typedef struct fsinfo3resok fsinfo3resok;
-
-struct fsinfo3resfail {
- post_op_attr obj_attributes;
-};
-typedef struct fsinfo3resfail fsinfo3resfail;
-
-struct fsinfo3res {
- nfsstat3 status;
- union {
- fsinfo3resok resok;
- fsinfo3resfail resfail;
- } fsinfo3res_u;
-};
-typedef struct fsinfo3res fsinfo3res;
-
-struct pathconf3args {
- nfs_fh3 object;
-};
-typedef struct pathconf3args pathconf3args;
-
-struct pathconf3resok {
- post_op_attr obj_attributes;
- uint32 linkmax;
- uint32 name_max;
- bool_t no_trunc;
- bool_t chown_restricted;
- bool_t case_insensitive;
- bool_t case_preserving;
-};
-typedef struct pathconf3resok pathconf3resok;
-
-struct pathconf3resfail {
- post_op_attr obj_attributes;
-};
-typedef struct pathconf3resfail pathconf3resfail;
-
-struct pathconf3res {
- nfsstat3 status;
- union {
- pathconf3resok resok;
- pathconf3resfail resfail;
- } pathconf3res_u;
-};
-typedef struct pathconf3res pathconf3res;
-
-struct commit3args {
- nfs_fh3 file;
- offset3 offset;
- count3 count;
-};
-typedef struct commit3args commit3args;
-
-struct commit3resok {
- wcc_data file_wcc;
- writeverf3 verf;
-};
-typedef struct commit3resok commit3resok;
-
-struct commit3resfail {
- wcc_data file_wcc;
-};
-typedef struct commit3resfail commit3resfail;
-
-struct commit3res {
- nfsstat3 status;
- union {
- commit3resok resok;
- commit3resfail resfail;
- } commit3res_u;
-};
-typedef struct commit3res commit3res;
-#define MNTPATHLEN 1024
-#define MNTNAMLEN 255
-#define FHSIZE3 NFS3_FHSIZE
-
-typedef struct {
- u_int fhandle3_len;
- char *fhandle3_val;
-} fhandle3;
-
-typedef char *dirpath;
-
-typedef char *name;
-
-enum mountstat3 {
- MNT3_OK = 0,
- MNT3ERR_PERM = 1,
- MNT3ERR_NOENT = 2,
- MNT3ERR_IO = 5,
- MNT3ERR_ACCES = 13,
- MNT3ERR_NOTDIR = 20,
- MNT3ERR_INVAL = 22,
- MNT3ERR_NAMETOOLONG = 63,
- MNT3ERR_NOTSUPP = 10004,
- MNT3ERR_SERVERFAULT = 10006,
-};
-typedef enum mountstat3 mountstat3;
-
-struct mountres3_ok {
- fhandle3 fhandle;
- struct {
- u_int auth_flavors_len;
- int *auth_flavors_val;
- } auth_flavors;
-};
-typedef struct mountres3_ok mountres3_ok;
-
-struct mountres3 {
- mountstat3 fhs_status;
- union {
- mountres3_ok mountinfo;
- } mountres3_u;
-};
-typedef struct mountres3 mountres3;
-
-typedef struct mountbody *mountlist;
-
-struct mountbody {
- name ml_hostname;
- dirpath ml_directory;
- mountlist ml_next;
-};
-typedef struct mountbody mountbody;
-
-typedef struct groupnode *groups;
-
-struct groupnode {
- name gr_name;
- groups gr_next;
-};
-typedef struct groupnode groupnode;
-
-typedef struct exportnode *exports;
-
-struct exportnode {
- dirpath ex_dir;
- groups ex_groups;
- exports ex_next;
-};
-typedef struct exportnode exportnode;
-
-#define NFS_PROGRAM 100003
-#define NFS_V3 3
-
-#define NFS3_NULL 0
-#define NFS3_GETATTR 1
-#define NFS3_SETATTR 2
-#define NFS3_LOOKUP 3
-#define NFS3_ACCESS 4
-#define NFS3_READLINK 5
-#define NFS3_READ 6
-#define NFS3_WRITE 7
-#define NFS3_CREATE 8
-#define NFS3_MKDIR 9
-#define NFS3_SYMLINK 10
-#define NFS3_MKNOD 11
-#define NFS3_REMOVE 12
-#define NFS3_RMDIR 13
-#define NFS3_RENAME 14
-#define NFS3_LINK 15
-#define NFS3_READDIR 16
-#define NFS3_READDIRP 17
-#define NFS3_FSSTAT 18
-#define NFS3_FSINFO 19
-#define NFS3_PATHCONF 20
-#define NFS3_COMMIT 21
-#define NFS3_PROC_COUNT 22
-
-#define MOUNT_PROGRAM 100005
-#define MOUNT_V3 3
-#define MOUNT_V1 1
-
-#define MOUNT3_NULL 0
-#define MOUNT3_MNT 1
-#define MOUNT3_DUMP 2
-#define MOUNT3_UMNT 3
-#define MOUNT3_UMNTALL 4
-#define MOUNT3_EXPORT 5
-#define MOUNT3_PROC_COUNT 6
-
-#define MOUNT1_NULL 0
-#define MOUNT1_DUMP 2
-#define MOUNT1_UMNT 3
-#define MOUNT1_EXPORT 5
-#define MOUNT1_PROC_COUNT 6
-/* the xdr functions */
-
-extern bool_t xdr_uint64 (XDR *, uint64*);
-extern bool_t xdr_int64 (XDR *, int64*);
-extern bool_t xdr_uint32 (XDR *, uint32*);
-extern bool_t xdr_int32 (XDR *, int32*);
-extern bool_t xdr_filename3 (XDR *, filename3*);
-extern bool_t xdr_nfspath3 (XDR *, nfspath3*);
-extern bool_t xdr_fileid3 (XDR *, fileid3*);
-extern bool_t xdr_cookie3 (XDR *, cookie3*);
-extern bool_t xdr_cookieverf3 (XDR *, cookieverf3);
-extern bool_t xdr_createverf3 (XDR *, createverf3);
-extern bool_t xdr_writeverf3 (XDR *, writeverf3);
-extern bool_t xdr_uid3 (XDR *, uid3*);
-extern bool_t xdr_gid3 (XDR *, gid3*);
-extern bool_t xdr_size3 (XDR *, size3*);
-extern bool_t xdr_offset3 (XDR *, offset3*);
-extern bool_t xdr_mode3 (XDR *, mode3*);
-extern bool_t xdr_count3 (XDR *, count3*);
-extern bool_t xdr_nfsstat3 (XDR *, nfsstat3*);
-extern bool_t xdr_ftype3 (XDR *, ftype3*);
-extern bool_t xdr_specdata3 (XDR *, specdata3*);
-extern bool_t xdr_nfs_fh3 (XDR *, nfs_fh3*);
-extern bool_t xdr_nfstime3 (XDR *, nfstime3*);
-extern bool_t xdr_fattr3 (XDR *, fattr3*);
-extern bool_t xdr_post_op_attr (XDR *, post_op_attr*);
-extern bool_t xdr_wcc_attr (XDR *, wcc_attr*);
-extern bool_t xdr_pre_op_attr (XDR *, pre_op_attr*);
-extern bool_t xdr_wcc_data (XDR *, wcc_data*);
-extern bool_t xdr_post_op_fh3 (XDR *, post_op_fh3*);
-extern bool_t xdr_time_how (XDR *, time_how*);
-extern bool_t xdr_set_mode3 (XDR *, set_mode3*);
-extern bool_t xdr_set_uid3 (XDR *, set_uid3*);
-extern bool_t xdr_set_gid3 (XDR *, set_gid3*);
-extern bool_t xdr_set_size3 (XDR *, set_size3*);
-extern bool_t xdr_set_atime (XDR *, set_atime*);
-extern bool_t xdr_set_mtime (XDR *, set_mtime*);
-extern bool_t xdr_sattr3 (XDR *, sattr3*);
-extern bool_t xdr_diropargs3 (XDR *, diropargs3*);
-extern bool_t xdr_getattr3args (XDR *, getattr3args*);
-extern bool_t xdr_getattr3resok (XDR *, getattr3resok*);
-extern bool_t xdr_getattr3res (XDR *, getattr3res*);
-extern bool_t xdr_sattrguard3 (XDR *, sattrguard3*);
-extern bool_t xdr_setattr3args (XDR *, setattr3args*);
-extern bool_t xdr_setattr3resok (XDR *, setattr3resok*);
-extern bool_t xdr_setattr3resfail (XDR *, setattr3resfail*);
-extern bool_t xdr_setattr3res (XDR *, setattr3res*);
-extern bool_t xdr_lookup3args (XDR *, lookup3args*);
-extern bool_t xdr_lookup3resok (XDR *, lookup3resok*);
-extern bool_t xdr_lookup3resfail (XDR *, lookup3resfail*);
-extern bool_t xdr_lookup3res (XDR *, lookup3res*);
-extern bool_t xdr_access3args (XDR *, access3args*);
-extern bool_t xdr_access3resok (XDR *, access3resok*);
-extern bool_t xdr_access3resfail (XDR *, access3resfail*);
-extern bool_t xdr_access3res (XDR *, access3res*);
-extern bool_t xdr_readlink3args (XDR *, readlink3args*);
-extern bool_t xdr_readlink3resok (XDR *, readlink3resok*);
-extern bool_t xdr_readlink3resfail (XDR *, readlink3resfail*);
-extern bool_t xdr_readlink3res (XDR *, readlink3res*);
-extern bool_t xdr_read3args (XDR *, read3args*);
-extern bool_t xdr_read3resok (XDR *, read3resok*);
-extern bool_t xdr_read3resfail (XDR *, read3resfail*);
-extern bool_t xdr_read3res (XDR *, read3res*);
-extern bool_t xdr_read3res_nocopy (XDR *xdrs, read3res *objp);
-extern bool_t xdr_stable_how (XDR *, stable_how*);
-extern bool_t xdr_write3args (XDR *, write3args*);
-extern bool_t xdr_write3resok (XDR *, write3resok*);
-extern bool_t xdr_write3resfail (XDR *, write3resfail*);
-extern bool_t xdr_write3res (XDR *, write3res*);
-extern bool_t xdr_createmode3 (XDR *, createmode3*);
-extern bool_t xdr_createhow3 (XDR *, createhow3*);
-extern bool_t xdr_create3args (XDR *, create3args*);
-extern bool_t xdr_create3resok (XDR *, create3resok*);
-extern bool_t xdr_create3resfail (XDR *, create3resfail*);
-extern bool_t xdr_create3res (XDR *, create3res*);
-extern bool_t xdr_mkdir3args (XDR *, mkdir3args*);
-extern bool_t xdr_mkdir3resok (XDR *, mkdir3resok*);
-extern bool_t xdr_mkdir3resfail (XDR *, mkdir3resfail*);
-extern bool_t xdr_mkdir3res (XDR *, mkdir3res*);
-extern bool_t xdr_symlinkdata3 (XDR *, symlinkdata3*);
-extern bool_t xdr_symlink3args (XDR *, symlink3args*);
-extern bool_t xdr_symlink3resok (XDR *, symlink3resok*);
-extern bool_t xdr_symlink3resfail (XDR *, symlink3resfail*);
-extern bool_t xdr_symlink3res (XDR *, symlink3res*);
-extern bool_t xdr_devicedata3 (XDR *, devicedata3*);
-extern bool_t xdr_mknoddata3 (XDR *, mknoddata3*);
-extern bool_t xdr_mknod3args (XDR *, mknod3args*);
-extern bool_t xdr_mknod3resok (XDR *, mknod3resok*);
-extern bool_t xdr_mknod3resfail (XDR *, mknod3resfail*);
-extern bool_t xdr_mknod3res (XDR *, mknod3res*);
-extern bool_t xdr_remove3args (XDR *, remove3args*);
-extern bool_t xdr_remove3resok (XDR *, remove3resok*);
-extern bool_t xdr_remove3resfail (XDR *, remove3resfail*);
-extern bool_t xdr_remove3res (XDR *, remove3res*);
-extern bool_t xdr_rmdir3args (XDR *, rmdir3args*);
-extern bool_t xdr_rmdir3resok (XDR *, rmdir3resok*);
-extern bool_t xdr_rmdir3resfail (XDR *, rmdir3resfail*);
-extern bool_t xdr_rmdir3res (XDR *, rmdir3res*);
-extern bool_t xdr_rename3args (XDR *, rename3args*);
-extern bool_t xdr_rename3resok (XDR *, rename3resok*);
-extern bool_t xdr_rename3resfail (XDR *, rename3resfail*);
-extern bool_t xdr_rename3res (XDR *, rename3res*);
-extern bool_t xdr_link3args (XDR *, link3args*);
-extern bool_t xdr_link3resok (XDR *, link3resok*);
-extern bool_t xdr_link3resfail (XDR *, link3resfail*);
-extern bool_t xdr_link3res (XDR *, link3res*);
-extern bool_t xdr_readdir3args (XDR *, readdir3args*);
-extern bool_t xdr_entry3 (XDR *, entry3*);
-extern bool_t xdr_dirlist3 (XDR *, dirlist3*);
-extern bool_t xdr_readdir3resok (XDR *, readdir3resok*);
-extern bool_t xdr_readdir3resfail (XDR *, readdir3resfail*);
-extern bool_t xdr_readdir3res (XDR *, readdir3res*);
-extern bool_t xdr_readdirp3args (XDR *, readdirp3args*);
-extern bool_t xdr_entryp3 (XDR *, entryp3*);
-extern bool_t xdr_dirlistp3 (XDR *, dirlistp3*);
-extern bool_t xdr_readdirp3resok (XDR *, readdirp3resok*);
-extern bool_t xdr_readdirp3resfail (XDR *, readdirp3resfail*);
-extern bool_t xdr_readdirp3res (XDR *, readdirp3res*);
-extern bool_t xdr_fsstat3args (XDR *, fsstat3args*);
-extern bool_t xdr_fsstat3resok (XDR *, fsstat3resok*);
-extern bool_t xdr_fsstat3resfail (XDR *, fsstat3resfail*);
-extern bool_t xdr_fsstat3res (XDR *, fsstat3res*);
-extern bool_t xdr_fsinfo3args (XDR *, fsinfo3args*);
-extern bool_t xdr_fsinfo3resok (XDR *, fsinfo3resok*);
-extern bool_t xdr_fsinfo3resfail (XDR *, fsinfo3resfail*);
-extern bool_t xdr_fsinfo3res (XDR *, fsinfo3res*);
-extern bool_t xdr_pathconf3args (XDR *, pathconf3args*);
-extern bool_t xdr_pathconf3resok (XDR *, pathconf3resok*);
-extern bool_t xdr_pathconf3resfail (XDR *, pathconf3resfail*);
-extern bool_t xdr_pathconf3res (XDR *, pathconf3res*);
-extern bool_t xdr_commit3args (XDR *, commit3args*);
-extern bool_t xdr_commit3resok (XDR *, commit3resok*);
-extern bool_t xdr_commit3resfail (XDR *, commit3resfail*);
-extern bool_t xdr_commit3res (XDR *, commit3res*);
-extern bool_t xdr_fhandle3 (XDR *, fhandle3*);
-extern bool_t xdr_dirpath (XDR *, dirpath*);
-extern bool_t xdr_name (XDR *, name*);
-extern bool_t xdr_mountstat3 (XDR *, mountstat3*);
-extern bool_t xdr_mountres3_ok (XDR *, mountres3_ok*);
-extern bool_t xdr_mountres3 (XDR *, mountres3*);
-extern bool_t xdr_mountlist (XDR *, mountlist*);
-extern bool_t xdr_mountbody (XDR *, mountbody*);
-extern bool_t xdr_groups (XDR *, groups*);
-extern bool_t xdr_groupnode (XDR *, groupnode*);
-extern bool_t xdr_exports (XDR *, exports*);
-extern bool_t xdr_exportnode (XDR *, exportnode*);
-
-extern void xdr_free_exports_list (struct exportnode *first);
-extern void xdr_free_mountlist (mountlist ml);
-
-extern void xdr_free_write3args_nocopy (write3args *wa);
-#endif
diff --git a/xlators/nfs/lib/src/xdr-rpc.c b/xlators/nfs/lib/src/xdr-rpc.c
deleted file mode 100644
index 071462242b1..00000000000
--- a/xlators/nfs/lib/src/xdr-rpc.c
+++ /dev/null
@@ -1,229 +0,0 @@
-/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <string.h>
-#include <rpc/rpc.h>
-#include <rpc/pmap_clnt.h>
-#include <arpa/inet.h>
-#include <rpc/xdr.h>
-#include <sys/uio.h>
-#include <rpc/auth_unix.h>
-
-#include "mem-pool.h"
-#include "xdr-rpc.h"
-#include "xdr-common.h"
-#include "logging.h"
-
-/* Decodes the XDR format in msgbuf into rpc_msg.
- * The remaining payload is returned into payload.
- */
-int
-xdr_to_rpc_call (char *msgbuf, size_t len, struct rpc_msg *call,
- struct iovec *payload, char *credbytes, char *verfbytes)
-{
- XDR xdr;
- char opaquebytes[MAX_AUTH_BYTES];
- struct opaque_auth *oa = NULL;
-
- if ((!msgbuf) || (!call))
- return -1;
-
- memset (call, 0, sizeof (*call));
-
- oa = &call->rm_call.cb_cred;
- if (!credbytes)
- oa->oa_base = opaquebytes;
- else
- oa->oa_base = credbytes;
-
- oa = &call->rm_call.cb_verf;
- if (!verfbytes)
- oa->oa_base = opaquebytes;
- else
- oa->oa_base = verfbytes;
-
- xdrmem_create (&xdr, msgbuf, len, XDR_DECODE);
- if (!xdr_callmsg (&xdr, call))
- return -1;
-
- if (payload) {
- payload->iov_base = xdr_decoded_remaining_addr (xdr);
- payload->iov_len = xdr_decoded_remaining_len (xdr);
- }
-
- return 0;
-}
-
-
-bool_t
-true_func (XDR *s, caddr_t *a)
-{
- return TRUE;
-}
-
-
-int
-rpc_fill_empty_reply (struct rpc_msg *reply, uint32_t xid)
-{
- if (!reply)
- return -1;
-
- /* Setting to 0 also results in reply verifier flavor to be
- * set to AUTH_NULL which is what we want right now.
- */
- memset (reply, 0, sizeof (*reply));
- reply->rm_xid = xid;
- reply->rm_direction = REPLY;
-
- return 0;
-}
-
-int
-rpc_fill_denied_reply (struct rpc_msg *reply, int rjstat, int auth_err)
-{
- if (!reply)
- return -1;
-
- reply->rm_reply.rp_stat = MSG_DENIED;
- reply->rjcted_rply.rj_stat = rjstat;
- if (rjstat == RPC_MISMATCH) {
- /* No problem with hardocoding
- * RPC version numbers. We only support
- * v2 anyway.
- */
- reply->rjcted_rply.rj_vers.low = 2;
- reply->rjcted_rply.rj_vers.high = 2;
- } else if (rjstat == AUTH_ERROR)
- reply->rjcted_rply.rj_why = auth_err;
-
- return 0;
-}
-
-
-int
-rpc_fill_accepted_reply (struct rpc_msg *reply, int arstat, int proglow,
- int proghigh, int verf, int len, char *vdata)
-{
- if (!reply)
- return -1;
-
- reply->rm_reply.rp_stat = MSG_ACCEPTED;
- reply->acpted_rply.ar_stat = arstat;
-
- reply->acpted_rply.ar_verf.oa_flavor = verf;
- reply->acpted_rply.ar_verf.oa_length = len;
- reply->acpted_rply.ar_verf.oa_base = vdata;
- if (arstat == PROG_MISMATCH) {
- reply->acpted_rply.ar_vers.low = proglow;
- reply->acpted_rply.ar_vers.high = proghigh;
- } else if (arstat == SUCCESS) {
-
- /* This is a hack. I'd really like to build a custom
- * XDR library because Sun RPC interface is not very flexible.
- */
- reply->acpted_rply.ar_results.proc = (xdrproc_t)true_func;
- reply->acpted_rply.ar_results.where = NULL;
- }
-
- return 0;
-}
-
-int
-rpc_reply_to_xdr (struct rpc_msg *reply, char *dest, size_t len,
- struct iovec *dst)
-{
- XDR xdr;
-
- if ((!dest) || (!reply) || (!dst))
- return -1;
-
- xdrmem_create (&xdr, dest, len, XDR_ENCODE);
- if (!xdr_replymsg(&xdr, reply))
- return -1;
-
- dst->iov_base = dest;
- dst->iov_len = xdr_encoded_length (xdr);
-
- return 0;
-}
-
-
-int
-xdr_to_auth_unix_cred (char *msgbuf, int msglen, struct authunix_parms *au,
- char *machname, gid_t *gids)
-{
- XDR xdr;
-
- if ((!msgbuf) || (!machname) || (!gids) || (!au))
- return -1;
-
- au->aup_machname = machname;
-#ifdef GF_DARWIN_HOST_OS
- au->aup_gids = (int *)gids;
-#else
- au->aup_gids = gids;
-#endif
-
- xdrmem_create (&xdr, msgbuf, msglen, XDR_DECODE);
-
- if (!xdr_authunix_parms (&xdr, au))
- return -1;
-
- return 0;
-}
-
-ssize_t
-xdr_length_round_up (size_t len, size_t bufsize)
-{
- int roundup = 0;
-
- roundup = len % XDR_BYTES_PER_UNIT;
- if (roundup > 0)
- roundup = XDR_BYTES_PER_UNIT - roundup;
-
- if ((roundup > 0) && ((roundup + len) <= bufsize))
- len += roundup;
-
- return len;
-}
-
-int
-xdr_bytes_round_up (struct iovec *vec, size_t bufsize)
-{
- vec->iov_len = xdr_length_round_up (vec->iov_len, bufsize);
- return 0;
-}
-
-void
-xdr_vector_round_up (struct iovec *vec, int vcount, uint32_t count)
-{
- uint32_t round_count = 0;
-
- round_count = xdr_length_round_up (count, 1048576);
- round_count -= count;
- if (round_count == 0)
- return;
-
- vec[vcount-1].iov_len += round_count;
-}
diff --git a/xlators/nfs/lib/src/xdr-rpc.h b/xlators/nfs/lib/src/xdr-rpc.h
deleted file mode 100644
index 48acdaa4399..00000000000
--- a/xlators/nfs/lib/src/xdr-rpc.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _XDR_RPC_H
-#define _XDR_RPC_H_
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <rpc/rpc.h>
-#include <rpc/pmap_clnt.h>
-#include <arpa/inet.h>
-#include <rpc/xdr.h>
-#include <sys/uio.h>
-
-/* Converts a given network buffer from its XDR format to a structure
- * that contains everything an RPC call needs to work.
- */
-extern int
-xdr_to_rpc_call (char *msgbuf, size_t len, struct rpc_msg *call,
- struct iovec *payload, char *credbytes, char *verfbytes);
-
-extern int
-rpc_fill_empty_reply (struct rpc_msg *reply, uint32_t xid);
-
-extern int
-rpc_fill_denied_reply (struct rpc_msg *reply, int rjstat, int auth_err);
-
-extern int
-rpc_fill_accepted_reply (struct rpc_msg *reply, int arstat, int proglow,
- int proghigh, int verf, int len, char *vdata);
-extern int
-rpc_reply_to_xdr (struct rpc_msg *reply, char *dest, size_t len,
- struct iovec *dst);
-
-extern int
-xdr_to_auth_unix_cred (char *msgbuf, int msglen, struct authunix_parms *au,
- char *machname, gid_t *gids);
-/* Macros that simplify accesing the members of an RPC call structure. */
-#define rpc_call_xid(call) ((call)->rm_xid)
-#define rpc_call_direction(call) ((call)->rm_direction)
-#define rpc_call_rpcvers(call) ((call)->ru.RM_cmb.cb_rpcvers)
-#define rpc_call_program(call) ((call)->ru.RM_cmb.cb_prog)
-#define rpc_call_progver(call) ((call)->ru.RM_cmb.cb_vers)
-#define rpc_call_progproc(call) ((call)->ru.RM_cmb.cb_proc)
-#define rpc_opaque_auth_flavour(oa) ((oa)->oa_flavor)
-#define rpc_opaque_auth_len(oa) ((oa)->oa_length)
-
-#define rpc_call_cred_flavour(call) (rpc_opaque_auth_flavour ((&(call)->ru.RM_cmb.cb_cred)))
-#define rpc_call_cred_len(call) (rpc_opaque_auth_len ((&(call)->ru.RM_cmb.cb_cred)))
-
-
-#define rpc_call_verf_flavour(call) (rpc_opaque_auth_flavour ((&(call)->ru.RM_cmb.cb_verf)))
-#define rpc_call_verf_len(call) (rpc_opaque_auth_len ((&(call)->ru.RM_cmb.cb_verf)))
-
-extern int
-xdr_bytes_round_up (struct iovec *vec, size_t bufsize);
-
-extern ssize_t
-xdr_length_round_up (size_t len, size_t bufsize);
-
-void
-xdr_vector_round_up (struct iovec *vec, int vcount, uint32_t count);
-#endif
diff --git a/xlators/nfs/server/src/Makefile.am b/xlators/nfs/server/src/Makefile.am
index 5e1c9baa4b8..b2831e6990e 100644
--- a/xlators/nfs/server/src/Makefile.am
+++ b/xlators/nfs/server/src/Makefile.am
@@ -1,14 +1,29 @@
xlator_LTLIBRARIES = server.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/nfs
-rpclibdir = $(top_srcdir)/xlators/nfs/lib/src/
-server_la_LDFLAGS = -module -avoidversion
-server_la_SOURCES = nfs.c nfs-common.c nfs-fops.c nfs-inodes.c nfs-generics.c mount3.c nfs3-fh.c nfs3.c nfs3-helpers.c
-server_la_LIBADD = $(top_builddir)/xlators/nfs/lib/src/libglrpcsvc.la $(top_builddir)/libglusterfs/src/libglusterfs.la
+nfsrpclibdir = $(top_srcdir)/rpc/rpc-lib/src
+server_la_LDFLAGS = -module -avoid-version -export-symbols $(top_srcdir)/xlators/nfs/server/src/nfsserver.sym
+server_la_SOURCES = nfs.c nfs-common.c nfs-fops.c nfs-inodes.c \
+ nfs-generics.c mount3.c nfs3-fh.c nfs3.c nfs3-helpers.c nlm4.c \
+ nlmcbk_svc.c mount3udp_svc.c acl3.c netgroups.c exports.c \
+ mount3-auth.c auth-cache.c
+server_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(top_builddir)/api/src/libgfapi.la
-noinst_HEADERS = nfs.h nfs-common.h nfs-fops.h nfs-inodes.h nfs-generics.h mount3.h nfs3-fh.h nfs3.h nfs3-helpers.h nfs-mem-types.h
+noinst_HEADERS = nfs.h nfs-common.h nfs-fops.h nfs-inodes.h nfs-generics.h \
+ mount3.h nfs3-fh.h nfs3.h nfs3-helpers.h nfs-mem-types.h nlm4.h \
+ acl3.h netgroups.h exports.h mount3-auth.h auth-cache.h nfs-messages.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)\
- -I$(rpclibdir) -L$(xlatordir)/ -I$(CONTRIBDIR)/rbtree
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
+ -DLIBDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/auth\" \
+ -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/api/src \
+ -I$(nfsrpclibdir) -I$(CONTRIBDIR)/rbtree \
+ -I$(top_srcdir)/rpc/xdr/src/ -DDATADIR=\"$(localstatedir)\"
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+AM_LDFLAGS = -L$(xlatordir)
CLEANFILES =
+
+EXTRA_DIST = nfsserver.sym
diff --git a/xlators/nfs/server/src/acl3.c b/xlators/nfs/server/src/acl3.c
new file mode 100644
index 00000000000..26fd7986534
--- /dev/null
+++ b/xlators/nfs/server/src/acl3.c
@@ -0,0 +1,954 @@
+/*
+ * Copyright (c) 2012-2013 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#include "defaults.h"
+#include "rpcsvc.h"
+#include "dict.h"
+#include "xlator.h"
+#include "nfs.h"
+#include "mem-pool.h"
+#include "logging.h"
+#include "nfs-fops.h"
+#include "inode.h"
+#include "nfs3.h"
+#include "nfs-mem-types.h"
+#include "nfs3-helpers.h"
+#include "nfs3-fh.h"
+#include "nfs-generics.h"
+#include "acl3.h"
+#include "byte-order.h"
+#include "compat-errno.h"
+#include "nfs-messages.h"
+
+static int
+acl3_nfs_acl_to_xattr (aclentry *ace, void *xattrbuf,
+ int aclcount, int defacl);
+
+static int
+acl3_nfs_acl_from_xattr (aclentry *ace, void *xattrbuf,
+ int bufsize, int defacl);
+
+typedef ssize_t (*acl3_serializer) (struct iovec outmsg, void *args);
+
+extern void nfs3_call_state_wipe (nfs3_call_state_t *cs);
+
+extern nfs3_call_state_t *
+nfs3_call_state_init (struct nfs3_state *s, rpcsvc_request_t *req, xlator_t *v);
+
+extern int
+nfs3_fh_validate (struct nfs3_fh *fh);
+
+extern void
+nfs3_stat_to_fattr3 (struct iatt *buf, fattr3 *fa);
+
+#define acl3_validate_nfs3_state(request, state, status, label, retval) \
+ do { \
+ state = rpcsvc_request_program_private (request); \
+ if (!state) { \
+ gf_msg (GF_ACL, GF_LOG_ERROR, errno, \
+ NFS_MSG_STATE_MISSING, \
+ "NFSv3 state " \
+ "missing from RPC request"); \
+ rpcsvc_request_seterr (req, SYSTEM_ERR); \
+ status = NFS3ERR_SERVERFAULT; \
+ goto label; \
+ } \
+ } while (0); \
+
+#define acl3_validate_gluster_fh(handle, status, errlabel) \
+ do { \
+ if (!nfs3_fh_validate (handle)) { \
+ gf_msg (GF_ACL, GF_LOG_ERROR, EINVAL, \
+ NFS_MSG_BAD_HANDLE, \
+ "Bad Handle"); \
+ status = NFS3ERR_BADHANDLE; \
+ goto errlabel; \
+ } \
+ } while (0) \
+
+
+extern xlator_t *
+nfs3_fh_to_xlator (struct nfs3_state *nfs3, struct nfs3_fh *fh);
+
+#define acl3_map_fh_to_volume(nfs3state, handle, req, volume, status, label) \
+ do { \
+ char exportid[256], gfid[256]; \
+ rpc_transport_t *trans = NULL; \
+ volume = nfs3_fh_to_xlator ((nfs3state), handle); \
+ if (!volume) { \
+ gf_uuid_unparse (handle->exportid, exportid); \
+ gf_uuid_unparse (handle->gfid, gfid); \
+ trans = rpcsvc_request_transport (req); \
+ gf_msg (GF_ACL, GF_LOG_ERROR, 0, \
+ NFS_MSG_FH_TO_VOL_FAIL, "Failed to map " \
+ "FH to vol: client=%s, exportid=%s, gfid=%s",\
+ trans->peerinfo.identifier, exportid, \
+ gfid); \
+ gf_msg (GF_ACL, GF_LOG_ERROR, ESTALE, \
+ NFS_MSG_VOLUME_ERROR, \
+ "Stale nfs client %s must be trying to "\
+ "connect to a deleted volume, please " \
+ "unmount it.", trans->peerinfo.identifier);\
+ status = NFS3ERR_STALE; \
+ goto label; \
+ } else { \
+ gf_msg_trace (GF_ACL, 0, "FH to Volume: %s", \
+ volume->name); \
+ rpcsvc_request_set_private (req, volume); \
+ } \
+ } while (0); \
+
+#define acl3_volume_started_check(nfs3state, vlm, rtval, erlbl) \
+ do { \
+ if ((!nfs_subvolume_started (nfs_state (nfs3state->nfsx), vlm))){\
+ gf_msg (GF_ACL, GF_LOG_ERROR, 0, NFS_MSG_VOL_DISABLE, \
+ "Volume is disabled: %s", \
+ vlm->name); \
+ rtval = RPCSVC_ACTOR_IGNORE; \
+ goto erlbl; \
+ } \
+ } while (0) \
+
+#define acl3_check_fh_resolve_status(cst, nfstat, erlabl) \
+ do { \
+ xlator_t *xlatorp = NULL; \
+ char buf[256], gfid[256]; \
+ rpc_transport_t *trans = NULL; \
+ if ((cst)->resolve_ret < 0) { \
+ trans = rpcsvc_request_transport (cst->req); \
+ xlatorp = nfs3_fh_to_xlator (cst->nfs3state, \
+ &cst->resolvefh); \
+ gf_uuid_unparse (cst->resolvefh.gfid, gfid); \
+ snprintf (buf, sizeof (buf), "(%s) %s : %s", \
+ trans->peerinfo.identifier, \
+ xlatorp ? xlatorp->name : "ERR", \
+ gfid); \
+ gf_msg (GF_ACL, GF_LOG_ERROR, cst->resolve_errno, \
+ NFS_MSG_RESOLVE_FH_FAIL, "Unable to resolve "\
+ "FH: %s", buf); \
+ nfstat = nfs3_errno_to_nfsstat3 (cst->resolve_errno);\
+ goto erlabl; \
+ } \
+ } while (0) \
+
+#define acl3_handle_call_state_init(nfs3state, calls, rq, v, opstat, errlabel)\
+ do { \
+ calls = nfs3_call_state_init ((nfs3state), (rq), v); \
+ if (!calls) { \
+ gf_msg (GF_ACL, GF_LOG_ERROR, 0, \
+ NFS_MSG_INIT_CALL_STAT_FAIL, "Failed to " \
+ "init call state"); \
+ opstat = NFS3ERR_SERVERFAULT; \
+ rpcsvc_request_seterr (req, SYSTEM_ERR); \
+ goto errlabel; \
+ } \
+ } while (0) \
+
+
+int
+acl3svc_submit_reply (rpcsvc_request_t *req, void *arg, acl3_serializer sfunc)
+{
+ struct iovec outmsg = {0, };
+ struct iobuf *iob = NULL;
+ struct nfs3_state *nfs3 = NULL;
+ int ret = -1;
+ ssize_t msglen = 0;
+ struct iobref *iobref = NULL;
+
+ if (!req)
+ return -1;
+
+ nfs3 = (struct nfs3_state *)rpcsvc_request_program_private (req);
+ if (!nfs3) {
+ gf_msg (GF_ACL, GF_LOG_ERROR, EINVAL,
+ NFS_MSG_MNT_STATE_NOT_FOUND, "mount state not found");
+ goto ret;
+ }
+
+ /* First, get the io buffer into which the reply in arg will
+ * be serialized.
+ */
+ iob = iobuf_get (nfs3->iobpool);
+ if (!iob) {
+ gf_msg (GF_ACL, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Failed to get iobuf");
+ goto ret;
+ }
+
+ iobuf_to_iovec (iob, &outmsg);
+ /* Use the given serializer to translate the give C structure in arg
+ * to XDR format which will be written into the buffer in outmsg.
+ */
+ msglen = sfunc (outmsg, arg);
+ if (msglen < 0) {
+ gf_msg (GF_ACL, GF_LOG_ERROR, errno, NFS_MSG_ENCODE_MSG_FAIL,
+ "Failed to encode message");
+ goto ret;
+ }
+ outmsg.iov_len = msglen;
+
+ iobref = iobref_new ();
+ if (iobref == NULL) {
+ gf_msg (GF_ACL, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Failed to get iobref");
+ goto ret;
+ }
+
+ ret = iobref_add (iobref, iob);
+ if (ret) {
+ gf_msg (GF_ACL, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Failed to add iob to iobref");
+ goto ret;
+ }
+
+ /* Then, submit the message for transmission. */
+ ret = rpcsvc_submit_message (req, &outmsg, 1, NULL, 0, iobref);
+ if (ret == -1) {
+ gf_msg (GF_ACL, GF_LOG_ERROR, errno, NFS_MSG_REP_SUBMIT_FAIL,
+ "Reply submission failed");
+ goto ret;
+ }
+
+ ret = 0;
+ret:
+ if (iob)
+ iobuf_unref (iob);
+ if (iobref)
+ iobref_unref (iobref);
+
+ return ret;
+}
+
+
+int
+acl3svc_null (rpcsvc_request_t *req)
+{
+ struct iovec dummyvec = {0, };
+
+ if (!req) {
+ gf_msg (GF_ACL, GF_LOG_ERROR, EINVAL, NFS_MSG_INVALID_ENTRY,
+ "Got NULL request!");
+ return 0;
+ }
+ rpcsvc_submit_generic (req, &dummyvec, 1, NULL, 0, NULL);
+ return 0;
+}
+
+int
+acl3_getacl_reply (rpcsvc_request_t *req, getaclreply *reply)
+{
+ acl3svc_submit_reply (req, (void *)reply,
+ (acl3_serializer)xdr_serialize_getaclreply);
+ return 0;
+}
+
+int
+acl3_setacl_reply (rpcsvc_request_t *req, setaclreply *reply)
+{
+ acl3svc_submit_reply (req, (void *)reply,
+ (acl3_serializer)xdr_serialize_setaclreply);
+ return 0;
+}
+
+/* acl3_getacl_cbk: fetch and decode the ACL in the POSIX_ACL_ACCESS_XATTR
+ *
+ * The POSIX_ACL_ACCESS_XATTR can be set on files and directories.
+ */
+int
+acl3_getacl_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ nfs3_call_state_t *cs = NULL;
+ data_t *data = NULL;
+ getaclreply *getaclreply = NULL;
+ int aclcount = 0;
+ int defacl = 1; /* DEFAULT ACL */
+
+ if (!frame->local) {
+ gf_msg (GF_ACL, GF_LOG_ERROR, EINVAL, NFS_MSG_INVALID_ENTRY,
+ "Invalid argument, frame->local NULL");
+ return -EINVAL;
+ }
+ cs = frame->local;
+ getaclreply = &cs->args.getaclreply;
+ if ((op_ret < 0) && (op_errno != ENODATA && op_errno != ENOATTR)) {
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
+ goto err;
+ } else if (!dict) {
+ /* no ACL has been set */
+ stat = NFS3_OK;
+ goto err;
+ }
+
+ getaclreply->aclentry.aclentry_val = cs->aclentry;
+
+ /* getfacl: NFS USER ACL */
+ data = dict_get (dict, POSIX_ACL_ACCESS_XATTR);
+ if (data && data->data) {
+ aclcount = acl3_nfs_acl_from_xattr (cs->aclentry,
+ data->data,
+ data->len,
+ !defacl);
+ if (aclcount < 0) {
+ gf_msg (GF_ACL, GF_LOG_ERROR, aclcount,
+ NFS_MSG_GET_USER_ACL_FAIL,
+ "Failed to get USER ACL");
+ stat = nfs3_errno_to_nfsstat3 (-aclcount);
+ goto err;
+ }
+ getaclreply->aclcount = aclcount;
+ getaclreply->aclentry.aclentry_len = aclcount;
+ }
+
+ acl3_getacl_reply (cs->req, getaclreply);
+ nfs3_call_state_wipe (cs);
+ return 0;
+
+err:
+ if (getaclreply)
+ getaclreply->status = stat;
+ acl3_getacl_reply (cs->req, getaclreply);
+ nfs3_call_state_wipe (cs);
+ return 0;
+}
+
+/* acl3_default_getacl_cbk: fetch and decode the ACL set in the
+ * POSIX_ACL_DEFAULT_XATTR xattr.
+ *
+ * The POSIX_ACL_DEFAULT_XATTR xattr is only set on directories, not on files.
+ *
+ * When done with POSIX_ACL_DEFAULT_XATTR, we also need to get and decode the
+ * ACL that can be set in POSIX_ACL_DEFAULT_XATTR.
+ */
+int
+acl3_default_getacl_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ nfs3_call_state_t *cs = NULL;
+ data_t *data = NULL;
+ getaclreply *getaclreply = NULL;
+ int aclcount = 0;
+ int defacl = 1; /* DEFAULT ACL */
+ nfs_user_t nfu = {0, };
+ int ret = -1;
+
+ if (!frame->local) {
+ gf_msg (GF_ACL, GF_LOG_ERROR, EINVAL, NFS_MSG_INVALID_ENTRY,
+ "Invalid argument, frame->local NULL");
+ return -EINVAL;
+ }
+ cs = frame->local;
+ getaclreply = &cs->args.getaclreply;
+ if ((op_ret < 0) && (op_errno != ENODATA && op_errno != ENOATTR)) {
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
+ goto err;
+ } else if (!dict) {
+ /* no ACL has been set */
+ stat = NFS3_OK;
+ goto err;
+ }
+
+ getaclreply->daclentry.daclentry_val = cs->daclentry;
+
+ /* getfacl: NFS DEFAULT ACL */
+ data = dict_get (dict, POSIX_ACL_DEFAULT_XATTR);
+ if (data && data->data) {
+ aclcount = acl3_nfs_acl_from_xattr (cs->daclentry,
+ data->data,
+ data->len,
+ defacl);
+ if (aclcount < 0) {
+ gf_msg (GF_ACL, GF_LOG_ERROR, aclcount,
+ NFS_MSG_GET_DEF_ACL_FAIL,
+ "Failed to get DEFAULT ACL");
+ stat = nfs3_errno_to_nfsstat3 (-aclcount);
+ goto err;
+ }
+
+ getaclreply->daclcount = aclcount;
+ getaclreply->daclentry.daclentry_len = aclcount;
+ }
+
+ getaclreply->attr_follows = TRUE;
+ nfs_request_user_init (&nfu, cs->req);
+ ret = nfs_getxattr (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ POSIX_ACL_ACCESS_XATTR, NULL, acl3_getacl_cbk, cs);
+ if (ret < 0) {
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+ goto err;
+ }
+
+ return 0;
+
+err:
+ if (getaclreply)
+ getaclreply->status = stat;
+ acl3_getacl_reply (cs->req, getaclreply);
+ nfs3_call_state_wipe (cs);
+ return 0;
+}
+
+
+int
+acl3_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ nfs3_call_state_t *cs = NULL;
+ getaclreply *getaclreply = NULL;
+ int ret = -1;
+ nfs_user_t nfu = {0, };
+ uint64_t deviceid = 0;
+
+ if (!frame->local) {
+ gf_msg (GF_ACL, GF_LOG_ERROR, EINVAL, NFS_MSG_INVALID_ENTRY,
+ "Invalid argument, frame->local NULL");
+ return EINVAL;
+ }
+
+ cs = frame->local;
+ getaclreply = &cs->args.getaclreply;
+
+ if (op_ret == -1) {
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
+ goto err;
+ }
+
+ /* Fill the attrs before xattrs */
+ getaclreply->attr_follows = TRUE;
+ deviceid = nfs3_request_xlator_deviceid (cs->req);
+ nfs3_map_deviceid_to_statdev (buf, deviceid);
+ nfs3_stat_to_fattr3 (buf, &(getaclreply->attr));
+
+ nfs_request_user_init (&nfu, cs->req);
+ if (buf->ia_type == IA_IFDIR) {
+ ret = nfs_getxattr (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ POSIX_ACL_DEFAULT_XATTR, NULL,
+ acl3_default_getacl_cbk, cs);
+ } else {
+ ret = nfs_getxattr (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ POSIX_ACL_ACCESS_XATTR, NULL,
+ acl3_getacl_cbk, cs);
+ }
+
+ if (ret < 0) {
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+ goto err;
+ }
+
+ return 0;
+err:
+ getaclreply->status = stat;
+ acl3_getacl_reply (cs->req, getaclreply);
+ nfs3_call_state_wipe (cs);
+ return 0;
+}
+
+
+int
+acl3_getacl_resume (void *carg)
+{
+ int ret = -1;
+ nfs3_call_state_t *cs = NULL;
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ nfs_user_t nfu = {0, };
+
+ if (!carg)
+ return ret;
+
+ cs = (nfs3_call_state_t *)carg;
+ acl3_check_fh_resolve_status (cs, stat, acl3err);
+ nfs_request_user_init (&nfu, cs->req);
+
+ ret = nfs_stat (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ acl3_stat_cbk, cs);
+ stat = -ret;
+acl3err:
+ if (ret < 0) {
+ gf_msg (GF_ACL, GF_LOG_ERROR, stat, NFS_MSG_OPEN_FAIL,
+ "unable to open_and_resume");
+ cs->args.getaclreply.status = nfs3_errno_to_nfsstat3 (stat);
+ acl3_getacl_reply (cs->req, &cs->args.getaclreply);
+ nfs3_call_state_wipe (cs);
+ }
+
+ return ret;
+}
+
+
+int
+acl3svc_getacl (rpcsvc_request_t *req)
+{
+ xlator_t *vol = NULL;
+ struct nfs_state *nfs = NULL;
+ nfs3_state_t *nfs3 = NULL;
+ nfs3_call_state_t *cs = NULL;
+ int ret = RPCSVC_ACTOR_ERROR;
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ struct nfs3_fh fh, *fhp = NULL;
+ getaclargs getaclargs;
+ getaclreply getaclreply;
+
+ if (!req)
+ return ret;
+
+ acl3_validate_nfs3_state (req, nfs3, stat, rpcerr, ret);
+ nfs = nfs_state (nfs3->nfsx);
+ memset (&getaclargs, 0, sizeof (getaclargs));
+ memset (&getaclreply, 0, sizeof (getaclreply));
+ getaclargs.fh.n_bytes = (char *)&fh;
+ if (xdr_to_getaclargs(req->msg[0], &getaclargs) <= 0) {
+ gf_msg (GF_ACL, GF_LOG_ERROR, errno, NFS_MSG_ARGS_DECODE_ERROR,
+ "Error decoding args");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto rpcerr;
+ }
+
+ /* Validate ACL mask */
+ if (getaclargs.mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) {
+ stat = NFS3ERR_INVAL;
+ goto acl3err;
+ }
+
+ fhp = &fh;
+ acl3_validate_gluster_fh (&fh, stat, acl3err);
+ acl3_map_fh_to_volume (nfs->nfs3state, fhp, req, vol, stat, acl3err);
+ acl3_volume_started_check (nfs3, vol, ret, rpcerr);
+ acl3_handle_call_state_init (nfs->nfs3state, cs, req,
+ vol, stat, acl3err);
+
+ cs->vol = vol;
+ cs->args.getaclreply.mask = getaclargs.mask;
+
+ ret = nfs3_fh_resolve_and_resume (cs, fhp, NULL, acl3_getacl_resume);
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+acl3err:
+ if (ret < 0) {
+ gf_msg (GF_ACL, GF_LOG_ERROR, -ret, NFS_MSG_RESOLVE_ERROR,
+ "unable to resolve and resume");
+ getaclreply.status = stat;
+ acl3_getacl_reply (req, &getaclreply);
+ nfs3_call_state_wipe (cs);
+ return 0;
+ }
+
+rpcerr:
+ return ret;
+}
+
+int
+acl3_setacl_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
+{
+ nfs3_call_state_t *cs = NULL;
+ cs = frame->local;
+ if (op_ret < 0) {
+ nfsstat3 status = nfs3_cbk_errno_status (op_ret, op_errno);
+ cs->args.setaclreply.status = status;
+ }
+
+ acl3_setacl_reply (cs->req, &cs->args.setaclreply);
+
+ nfs3_call_state_wipe (cs);
+
+ return 0;
+}
+
+int
+acl3_setacl_resume (void *carg)
+{
+ int ret = -1;
+ nfs3_call_state_t *cs = NULL;
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ nfs_user_t nfu = {0, };
+ dict_t *xattr = NULL;
+
+ if (!carg)
+ return ret;
+ cs = (nfs3_call_state_t *)carg;
+ acl3_check_fh_resolve_status (cs, stat, acl3err);
+ nfs_request_user_init (&nfu, cs->req);
+ xattr = dict_new();
+ if (cs->aclcount)
+ ret = dict_set_static_bin (xattr, POSIX_ACL_ACCESS_XATTR,
+ cs->aclxattr,
+ posix_acl_xattr_size (cs->aclcount));
+ if (cs->daclcount)
+ ret = dict_set_static_bin (xattr, POSIX_ACL_DEFAULT_XATTR,
+ cs->daclxattr,
+ posix_acl_xattr_size (cs->daclcount));
+
+ ret = nfs_setxattr (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc, xattr,
+ 0, NULL, acl3_setacl_cbk, cs);
+ dict_unref (xattr);
+
+acl3err:
+ if (ret < 0) {
+ stat = -ret;
+ gf_msg (GF_ACL, GF_LOG_ERROR, stat, NFS_MSG_OPEN_FAIL,
+ "unable to open_and_resume");
+ cs->args.setaclreply.status = nfs3_errno_to_nfsstat3 (stat);
+ acl3_setacl_reply (cs->req, &cs->args.setaclreply);
+ nfs3_call_state_wipe (cs);
+ }
+
+ return ret;
+}
+
+
+int
+acl3svc_setacl (rpcsvc_request_t *req)
+{
+ xlator_t *vol = NULL;
+ struct nfs_state *nfs = NULL;
+ nfs3_state_t *nfs3 = NULL;
+ nfs3_call_state_t *cs = NULL;
+ int ret = RPCSVC_ACTOR_ERROR;
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ struct nfs3_fh fh;
+ struct nfs3_fh *fhp = NULL;
+ setaclargs setaclargs;
+ setaclreply setaclreply;
+ aclentry *daclentry = NULL;
+ aclentry *aclentry = NULL;
+ int aclerrno = 0;
+ int defacl = 1;
+
+ if (!req)
+ return ret;
+ aclentry = GF_CALLOC (NFS_ACL_MAX_ENTRIES, sizeof(*aclentry),
+ gf_nfs_mt_arr);
+ if (!aclentry) {
+ goto rpcerr;
+ }
+ daclentry = GF_CALLOC (NFS_ACL_MAX_ENTRIES, sizeof(*daclentry),
+ gf_nfs_mt_arr);
+ if (!daclentry) {
+ goto rpcerr;
+ }
+
+ acl3_validate_nfs3_state (req, nfs3, stat, rpcerr, ret);
+ nfs = nfs_state (nfs3->nfsx);
+ memset (&setaclargs, 0, sizeof (setaclargs));
+ memset (&setaclreply, 0, sizeof (setaclreply));
+ memset (&fh, 0, sizeof (fh));
+ setaclargs.fh.n_bytes = (char *)&fh;
+ setaclargs.aclentry.aclentry_val = aclentry;
+ setaclargs.daclentry.daclentry_val = daclentry;
+ if (xdr_to_setaclargs(req->msg[0], &setaclargs) <= 0) {
+ gf_msg (GF_ACL, GF_LOG_ERROR, errno, NFS_MSG_ARGS_DECODE_ERROR,
+ "Error decoding args");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto rpcerr;
+ }
+
+ /* Validate ACL mask */
+ if (setaclargs.mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) {
+ stat = NFS3ERR_INVAL;
+ goto acl3err;
+ }
+
+ fhp = &fh;
+ acl3_validate_gluster_fh (fhp, stat, acl3err);
+ acl3_map_fh_to_volume (nfs->nfs3state, fhp, req, vol, stat, acl3err);
+ acl3_volume_started_check (nfs3, vol, ret, rpcerr);
+ acl3_handle_call_state_init (nfs->nfs3state, cs, req,
+ vol, stat, acl3err);
+
+ cs->vol = vol;
+ cs->aclcount = setaclargs.aclcount;
+ cs->daclcount = setaclargs.daclcount;
+
+ /* setfacl: NFS USER ACL */
+ aclerrno = acl3_nfs_acl_to_xattr (aclentry,
+ cs->aclxattr,
+ cs->aclcount,
+ !defacl);
+ if (aclerrno < 0) {
+ gf_msg (GF_ACL, GF_LOG_ERROR, -aclerrno,
+ NFS_MSG_SET_USER_ACL_FAIL,
+ "Failed to set USER ACL");
+ stat = nfs3_errno_to_nfsstat3 (-aclerrno);
+ goto acl3err;
+ }
+
+ /* setfacl: NFS DEFAULT ACL */
+ aclerrno = acl3_nfs_acl_to_xattr (daclentry,
+ cs->daclxattr,
+ cs->daclcount,
+ defacl);
+ if (aclerrno < 0) {
+ gf_msg (GF_ACL, GF_LOG_ERROR, -aclerrno,
+ NFS_MSG_SET_DEF_ACL_FAIL,
+ "Failed to set DEFAULT ACL");
+ stat = nfs3_errno_to_nfsstat3 (-aclerrno);
+ goto acl3err;
+ }
+
+ ret = nfs3_fh_resolve_and_resume (cs, fhp, NULL, acl3_setacl_resume);
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+acl3err:
+ if (ret < 0) {
+ gf_msg (GF_ACL, GF_LOG_ERROR, -ret, NFS_MSG_RESOLVE_ERROR,
+ "unable to resolve and resume");
+ setaclreply.status = stat;
+ acl3_setacl_reply (req, &setaclreply);
+ nfs3_call_state_wipe (cs);
+ GF_FREE(aclentry);
+ GF_FREE(daclentry);
+ return 0;
+ }
+
+rpcerr:
+ if (ret < 0)
+ nfs3_call_state_wipe (cs);
+ if (aclentry)
+ GF_FREE (aclentry);
+ if (daclentry)
+ GF_FREE (daclentry);
+ return ret;
+}
+
+
+
+rpcsvc_actor_t acl3svc_actors[ACL3_PROC_COUNT] = {
+ {"NULL", ACL3_NULL, acl3svc_null, NULL, 0, DRC_NA},
+ {"GETACL", ACL3_GETACL, acl3svc_getacl, NULL, 0, DRC_NA},
+ {"SETACL", ACL3_SETACL, acl3svc_setacl, NULL, 0, DRC_NA},
+};
+
+rpcsvc_program_t acl3prog = {
+ .progname = "ACL3",
+ .prognum = ACL_PROGRAM,
+ .progver = ACLV3_VERSION,
+ .progport = GF_NFS3_PORT,
+ .actors = acl3svc_actors,
+ .numactors = ACL3_PROC_COUNT,
+ .min_auth = AUTH_NULL,
+};
+
+rpcsvc_program_t *
+acl3svc_init(xlator_t *nfsx)
+{
+ struct nfs3_state *ns = NULL;
+ struct nfs_state *nfs = NULL;
+ dict_t *options = NULL;
+ int ret = -1;
+ char *portstr = NULL;
+ static gf_boolean_t acl3_inited = _gf_false;
+
+ /* Already inited */
+ if (acl3_inited)
+ return &acl3prog;
+
+ nfs = (struct nfs_state*)nfsx->private;
+
+ ns = nfs->nfs3state;
+ if (!ns) {
+ gf_msg (GF_ACL, GF_LOG_ERROR, EINVAL, NFS_MSG_ACL_INIT_FAIL,
+ "ACL3 init failed");
+ goto err;
+ }
+ acl3prog.private = ns;
+
+ options = dict_new ();
+
+ ret = gf_asprintf (&portstr, "%d", GF_ACL3_PORT);
+ if (ret == -1)
+ goto err;
+
+ ret = dict_set_dynstr (options, "transport.socket.listen-port",
+ portstr);
+ if (ret == -1)
+ goto err;
+ ret = dict_set_str (options, "transport-type", "socket");
+ if (ret == -1) {
+ gf_msg (GF_ACL, GF_LOG_ERROR, errno, NFS_MSG_DICT_SET_FAILED,
+ "dict_set_str error");
+ goto err;
+ }
+
+ if (nfs->allow_insecure) {
+ ret = dict_set_str (options, "rpc-auth-allow-insecure", "on");
+ if (ret == -1) {
+ gf_msg (GF_ACL, GF_LOG_ERROR, errno,
+ NFS_MSG_DICT_SET_FAILED,
+ "dict_set_str error");
+ goto err;
+ }
+ ret = dict_set_str (options, "rpc-auth.ports.insecure", "on");
+ if (ret == -1) {
+ gf_msg (GF_ACL, GF_LOG_ERROR, errno,
+ NFS_MSG_DICT_SET_FAILED,
+ "dict_set_str error");
+ goto err;
+ }
+ }
+
+ ret = dict_set_str (options, "transport.address-family", "inet");
+ if (ret == -1) {
+ gf_msg (GF_ACL, GF_LOG_ERROR, errno,
+ NFS_MSG_DICT_SET_FAILED,
+ "dict_set_str error");
+ goto err;
+ }
+
+ ret = rpcsvc_create_listeners (nfs->rpcsvc, options, "ACL");
+ if (ret == -1) {
+ gf_msg (GF_ACL, GF_LOG_ERROR, errno,
+ NFS_MSG_LISTENERS_CREATE_FAIL,
+ "Unable to create listeners");
+ dict_unref (options);
+ goto err;
+ }
+
+ acl3_inited = _gf_true;
+ return &acl3prog;
+err:
+ return NULL;
+}
+
+static int
+acl3_nfs_acl_to_xattr (aclentry *ace, /* ACL entries to be read */
+ void *xattrbuf, /* XATTR buf to be populated */
+ int aclcount, /* No of ACLs to be read */
+ int defacl) /* 1 if DEFAULT ACL */
+{
+ int idx = 0;
+ posix_acl_xattr_header *xheader = NULL;
+ posix_acl_xattr_entry *xentry = NULL;
+
+ if ((!ace) || (!xattrbuf))
+ return (-EINVAL);
+
+ /* ACL count is ZERO, nothing to do */
+ if (!aclcount)
+ return (0);
+
+ if ((aclcount < 0) || (aclcount > NFS_ACL_MAX_ENTRIES))
+ return (-EINVAL);
+
+ xheader = (posix_acl_xattr_header *) (xattrbuf);
+ xentry = (posix_acl_xattr_entry *) (xheader + 1);
+
+ /*
+ * For "default ACL", NFSv3 handles the 'type' differently
+ * i.e. by logical OR'ing 'type' with NFS_ACL_DEFAULT.
+ * Which the backend File system does not understand and
+ * that needs to be masked OFF.
+ */
+ xheader->version = POSIX_ACL_XATTR_VERSION;
+
+ for (idx = 0; idx < aclcount; idx++) {
+ xentry->tag = ace->type;
+ if (defacl)
+ xentry->tag &= ~NFS_ACL_DEFAULT;
+ xentry->perm = ace->perm;
+
+ switch (xentry->tag) {
+ case POSIX_ACL_USER:
+ case POSIX_ACL_GROUP:
+ if (xentry->perm & ~S_IRWXO)
+ return (-EINVAL);
+ xentry->id = ace->uid;
+ break;
+ case POSIX_ACL_USER_OBJ:
+ case POSIX_ACL_GROUP_OBJ:
+ case POSIX_ACL_OTHER:
+ if (xentry->perm & ~S_IRWXO)
+ return (-EINVAL);
+ xentry->id = POSIX_ACL_UNDEFINED_ID;
+ break;
+ case POSIX_ACL_MASK:
+ /* Solaris sometimes sets additional bits in
+ * the mask.
+ */
+ xentry->perm &= S_IRWXO;
+ xentry->id = POSIX_ACL_UNDEFINED_ID;
+ break;
+ default:
+ return (-EINVAL);
+ }
+
+ xentry++;
+ ace++;
+ }
+
+ /* SUCCESS */
+ return (0);
+}
+
+static int
+acl3_nfs_acl_from_xattr (aclentry *ace, /* ACL entries to be filled */
+ void *xattrbuf, /* XATTR buf to be read */
+ int bufsize, /* Size of XATTR buffer */
+ int defacl) /* 1 if DEFAULT ACL */
+{
+ int idx = 0;
+ ssize_t aclcount = 0;
+ posix_acl_xattr_header *xheader = NULL;
+ posix_acl_xattr_entry *xentry = NULL;
+
+ if ((!xattrbuf) || (!ace))
+ return (-EINVAL);
+
+ aclcount = posix_acl_xattr_count (bufsize);
+ if ((aclcount < 0) || (aclcount > NFS_ACL_MAX_ENTRIES))
+ return (-EINVAL);
+
+ xheader = (posix_acl_xattr_header *) (xattrbuf);
+ xentry = (posix_acl_xattr_entry *) (xheader + 1);
+
+ /* Check for supported POSIX ACL xattr version */
+ if (xheader->version != POSIX_ACL_XATTR_VERSION)
+ return (-ENOSYS);
+
+ for (idx = 0; idx < (int)aclcount; idx++) {
+ ace->type = xentry->tag;
+ if (defacl) {
+ /*
+ * SET the NFS_ACL_DEFAULT flag for default
+ * ACL which was masked OFF during setfacl().
+ */
+ ace->type |= NFS_ACL_DEFAULT;
+ }
+ ace->perm = (xentry->perm & S_IRWXO);
+
+ switch (xentry->tag) {
+ case POSIX_ACL_USER:
+ case POSIX_ACL_GROUP:
+ ace->uid = xentry->id;
+ break;
+ case POSIX_ACL_USER_OBJ:
+ case POSIX_ACL_GROUP_OBJ:
+ case POSIX_ACL_MASK:
+ case POSIX_ACL_OTHER:
+ ace->uid = POSIX_ACL_UNDEFINED_ID;
+ break;
+ default:
+ return (-EINVAL);
+ }
+
+
+ xentry++;
+ ace++;
+ }
+
+ /* SUCCESS: ACL count */
+ return aclcount;
+}
diff --git a/xlators/nfs/server/src/acl3.h b/xlators/nfs/server/src/acl3.h
new file mode 100644
index 00000000000..3ccc587bd59
--- /dev/null
+++ b/xlators/nfs/server/src/acl3.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifndef _ACL3_H
+#define _ACL3_H
+
+#include "glusterfs-acl.h"
+
+#define ACL3_NULL 0
+#define ACL3_GETACL 1
+#define ACL3_SETACL 2
+#define ACL3_PROC_COUNT 3
+
+#define GF_ACL3_PORT 38469
+#define GF_ACL GF_NFS"-ACL"
+
+/* Flags for the getacl/setacl mode */
+#define NFS_ACL 0x0001
+#define NFS_ACLCNT 0x0002
+#define NFS_DFACL 0x0004
+#define NFS_DFACLCNT 0x0008
+
+/*
+ * NFSv3, identifies the default ACL by NFS_ACL_DEFAULT. Gluster
+ * NFS needs to mask it OFF before sending it up to POSIX layer
+ * or File system layer.
+ */
+#define NFS_ACL_DEFAULT 0x1000
+
+#define NFS_ACL_MAX_ENTRIES 1024
+
+rpcsvc_program_t *
+acl3svc_init(xlator_t *nfsx);
+
+#endif
diff --git a/xlators/nfs/server/src/auth-cache.c b/xlators/nfs/server/src/auth-cache.c
new file mode 100644
index 00000000000..730e0a97d20
--- /dev/null
+++ b/xlators/nfs/server/src/auth-cache.c
@@ -0,0 +1,489 @@
+/*
+ Copyright 2014-present Facebook. All Rights Reserved
+ This file is part of GlusterFS.
+
+ Author :
+ Shreyas Siravara <shreyas.siravara@gmail.com>
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "refcount.h"
+#include "auth-cache.h"
+#include "nfs3.h"
+#include "exports.h"
+#include "nfs-messages.h"
+
+enum auth_cache_lookup_results {
+ ENTRY_FOUND = 0,
+ ENTRY_NOT_FOUND = -1,
+ ENTRY_EXPIRED = -2,
+};
+
+struct auth_cache_entry {
+ GF_REF_DECL; /* refcounting */
+ data_t *data; /* data_unref() on refcount == 0 */
+
+ time_t timestamp;
+ struct export_item *item;
+};
+
+/* Given a filehandle and an ip, creates a colon delimited hashkey.
+ */
+static char*
+make_hashkey(struct nfs3_fh *fh, const char *host)
+{
+ char *hashkey = NULL;
+ char exportid[256] = {0, };
+ char gfid[256] = {0, };
+ char mountid[256] = {0, };
+ size_t nbytes = 0;
+
+ gf_uuid_unparse (fh->exportid, exportid);
+ gf_uuid_unparse (fh->gfid, gfid);
+ gf_uuid_unparse (fh->mountid, mountid);
+
+ nbytes = strlen (exportid) + strlen (host)
+ + strlen (mountid) + 3;
+ hashkey = GF_MALLOC (nbytes, gf_common_mt_char);
+ if (!hashkey)
+ return NULL;
+
+ snprintf (hashkey, nbytes, "%s:%s:%s", exportid,
+ mountid, host);
+
+ return hashkey;
+}
+
+/**
+ * auth_cache_init -- Initialize an auth cache and set the ttl_sec
+ *
+ * @ttl_sec : The TTL to set in seconds
+ *
+ * @return : allocated auth cache struct, NULL if allocation failed.
+ */
+struct auth_cache *
+auth_cache_init (time_t ttl_sec)
+{
+ struct auth_cache *cache = GF_CALLOC (1, sizeof (*cache),
+ gf_nfs_mt_auth_cache);
+
+ GF_VALIDATE_OR_GOTO ("auth-cache", cache, out);
+
+ cache->cache_dict = dict_new ();
+ if (!cache->cache_dict) {
+ GF_FREE (cache);
+ cache = NULL;
+ goto out;
+ }
+
+ LOCK_INIT (&cache->lock);
+ cache->ttl_sec = ttl_sec;
+out:
+ return cache;
+}
+
+/* auth_cache_entry_free -- called by refcounting subsystem on refcount == 0
+ *
+ * @to_free: auth_cache_entry that has refcount == 0 and needs to get free'd
+ */
+void
+auth_cache_entry_free (void *to_free)
+{
+ struct auth_cache_entry *entry = to_free;
+ data_t *entry_data = NULL;
+
+ GF_VALIDATE_OR_GOTO (GF_NFS, entry, out);
+ GF_VALIDATE_OR_GOTO (GF_NFS, entry->data, out);
+
+ entry_data = entry->data;
+ /* set data_t->data to NULL, otherwise data_unref() tries to free it */
+ entry_data->data = NULL;
+ data_unref (entry_data);
+
+ GF_FREE (entry);
+out:
+ return;
+}
+
+/**
+ * auth_cache_entry_init -- Initialize an auth cache entry
+ *
+ * @return: Pointer to an allocated auth cache entry, NULL if allocation
+ * failed.
+ */
+static struct auth_cache_entry *
+auth_cache_entry_init ()
+{
+ struct auth_cache_entry *entry = NULL;
+
+ entry = GF_CALLOC (1, sizeof (*entry), gf_nfs_mt_auth_cache_entry);
+ if (!entry)
+ gf_msg (GF_NFS, GF_LOG_WARNING, ENOMEM, NFS_MSG_NO_MEMORY,
+ "failed to allocate entry");
+ else
+ GF_REF_INIT (entry, auth_cache_entry_free);
+
+ return entry;
+}
+
+/**
+ * auth_cache_add -- Add an auth_cache_entry to the cache->dict
+ *
+ * @return: 0 on success, non-zero otherwise.
+ */
+static int
+auth_cache_add (struct auth_cache *cache, char *hashkey,
+ struct auth_cache_entry *entry)
+{
+ int ret = -1;
+ data_t *entry_data = NULL;
+
+ GF_VALIDATE_OR_GOTO (GF_NFS, cache, out);
+ GF_VALIDATE_OR_GOTO (GF_NFS, cache->cache_dict, out);
+
+ ret = GF_REF_GET (entry);
+ if (ret == 0) {
+ /* entry does not have any references */
+ ret = -1;
+ goto out;
+ }
+
+ entry_data = bin_to_data (entry, sizeof (*entry));
+ if (!entry_data) {
+ ret = -1;
+ GF_REF_PUT (entry);
+ goto out;
+ }
+
+ /* we'll take an extra ref on the data_t, it gets unref'd when the
+ * auth_cache_entry is released */
+ entry->data = data_ref (entry_data);
+
+ LOCK (&cache->lock);
+ {
+ ret = dict_set (cache->cache_dict, hashkey, entry_data);
+ }
+ UNLOCK (&cache->lock);
+
+ if (ret) {
+ /* adding to dict failed */
+ GF_REF_PUT (entry);
+ }
+out:
+ return ret;
+}
+
+/**
+ * _auth_cache_expired -- Check if the auth_cache_entry has expired
+ *
+ * The auth_cache->lock should have been taken when this function is called.
+ *
+ * @return: true when the auth_cache_entry is expired, false otherwise.
+ */
+static int
+_auth_cache_expired (struct auth_cache *cache, struct auth_cache_entry *entry)
+{
+ return ((time (NULL) - entry->timestamp) > cache->ttl_sec);
+}
+
+/**
+ * auth_cache_get -- Get the @hashkey entry from the cache->cache_dict
+ *
+ * @cache: The auth_cache that should contain the @entry.
+ * @haskkey: The key associated with the auth_cache_entry.
+ * @entry: The found auth_cache_entry, unmodified if not found/expired.
+ *
+ * The using the cache->dict requires locking, this function takes care of
+ * that. When the entry is found, but has expired, it will be removed from the
+ * cache_dict.
+ *
+ * @return: 0 when found, ENTRY_NOT_FOUND or ENTRY_EXPIRED otherwise.
+ */
+static enum auth_cache_lookup_results
+auth_cache_get (struct auth_cache *cache, char *hashkey,
+ struct auth_cache_entry **entry)
+{
+ enum auth_cache_lookup_results ret = ENTRY_NOT_FOUND;
+ data_t *entry_data = NULL;
+ struct auth_cache_entry *lookup_res = NULL;
+
+ GF_VALIDATE_OR_GOTO (GF_NFS, cache, out);
+ GF_VALIDATE_OR_GOTO (GF_NFS, cache->cache_dict, out);
+ GF_VALIDATE_OR_GOTO (GF_NFS, hashkey, out);
+
+ LOCK (&cache->lock);
+ {
+ entry_data = dict_get (cache->cache_dict, hashkey);
+ if (!entry_data)
+ goto unlock;
+
+ lookup_res = (struct auth_cache_entry *)(entry_data->data);
+ if (GF_REF_GET (lookup_res) == 0) {
+ /* entry has been free'd */
+ ret = ENTRY_EXPIRED;
+ goto unlock;
+ }
+
+ if (_auth_cache_expired (cache, lookup_res)) {
+ ret = ENTRY_EXPIRED;
+
+ /* free entry and remove from the cache */
+ GF_FREE (lookup_res);
+ entry_data->data = NULL;
+ dict_del (cache->cache_dict, hashkey);
+
+ goto unlock;
+ }
+
+ *entry = lookup_res;
+ ret = ENTRY_FOUND;
+ }
+unlock:
+ UNLOCK (&cache->lock);
+
+out:
+ return ret;
+}
+
+/**
+ * auth_cache_lookup -- Lookup an item from the cache
+ *
+ * @cache: cache to lookup from
+ * @fh : FH to use in lookup
+ * @host_addr: Address to use in lookup
+ * @timestamp: The timestamp to set when lookup succeeds
+ * @can_write: Is the host authorized to write to the filehandle?
+ *
+ * If the current time - entry time of the cache entry > ttl_sec,
+ * we remove the element from the dict and return ENTRY_EXPIRED.
+ *
+ * @return: ENTRY_EXPIRED if entry expired
+ * ENTRY_NOT_FOUND if entry not found in dict
+ * 0 if found
+ */
+enum auth_cache_lookup_results
+auth_cache_lookup (struct auth_cache *cache, struct nfs3_fh *fh,
+ const char *host_addr, time_t *timestamp,
+ gf_boolean_t *can_write)
+{
+ char *hashkey = NULL;
+ struct auth_cache_entry *lookup_res = NULL;
+ enum auth_cache_lookup_results ret = ENTRY_NOT_FOUND;
+
+ GF_VALIDATE_OR_GOTO (GF_NFS, cache, out);
+ GF_VALIDATE_OR_GOTO (GF_NFS, fh, out);
+ GF_VALIDATE_OR_GOTO (GF_NFS, host_addr, out);
+ GF_VALIDATE_OR_GOTO (GF_NFS, timestamp, out);
+ GF_VALIDATE_OR_GOTO (GF_NFS, can_write, out);
+
+ hashkey = make_hashkey (fh, host_addr);
+ if (!hashkey) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = auth_cache_get (cache, hashkey, &lookup_res);
+ switch (ret) {
+ case ENTRY_FOUND:
+ *timestamp = lookup_res->timestamp;
+ *can_write = lookup_res->item->opts->rw;
+ GF_REF_PUT (lookup_res);
+ break;
+
+ case ENTRY_NOT_FOUND:
+ gf_msg_debug (GF_NFS, 0, "could not find entry for %s",
+ host_addr);
+ break;
+
+ case ENTRY_EXPIRED:
+ gf_msg_debug (GF_NFS, 0, "entry for host %s has expired",
+ host_addr);
+ break;
+ }
+
+out:
+ GF_FREE (hashkey);
+
+ return ret;
+}
+
+/* auth_cache_entry_purge -- free up the auth_cache_entry
+ *
+ * This gets called through dict_foreach() by auth_cache_purge(). Each
+ * auth_cache_entry has a refcount which needs to be decremented. Once the
+ * auth_cache_entry reaches refcount == 0, auth_cache_entry_free() will call
+ * data_unref() to free the associated data_t.
+ *
+ * @d: dict that gets purged by auth_cache_purge()
+ * @k: hashkey of the current entry
+ * @v: data_t of the current entry
+ */
+int
+auth_cache_entry_purge (dict_t *d, char *k, data_t *v, void *_unused)
+{
+ struct auth_cache_entry *entry = (struct auth_cache_entry *) v->data;
+
+ if (entry)
+ GF_REF_PUT (entry);
+
+ return 0;
+}
+
+/**
+ * auth_cache_purge -- Purge the dict in the cache and create a new empty one.
+ *
+ * @cache: Cache to purge
+ *
+ */
+void
+auth_cache_purge (struct auth_cache *cache)
+{
+ dict_t *new_cache_dict = dict_new ();
+ dict_t *old_cache_dict = NULL;
+
+ if (!cache || !new_cache_dict)
+ goto out;
+
+ LOCK (&cache->lock);
+ {
+ old_cache_dict = cache->cache_dict;
+ cache->cache_dict = new_cache_dict;
+ }
+ UNLOCK (&cache->lock);
+
+ /* walk all entries and refcount-- with GF_REF_PUT() */
+ dict_foreach (old_cache_dict, auth_cache_entry_purge, NULL);
+ dict_unref (old_cache_dict);
+out:
+ return;
+}
+
+/**
+ * is_nfs_fh_cached_and_writeable -- Checks if an NFS FH is cached for the given
+ * host
+ * @cache: The fh cache
+ * @host_addr: Address to use in lookup
+ * @fh: The fh to use in lookup
+ *
+ *
+ * @return: TRUE if cached, FALSE otherwise
+ *
+ */
+gf_boolean_t
+is_nfs_fh_cached (struct auth_cache *cache, struct nfs3_fh *fh,
+ const char *host_addr)
+{
+ int ret = 0;
+ time_t timestamp = 0;
+ gf_boolean_t cached = _gf_false;
+ gf_boolean_t can_write = _gf_false;
+
+ if (!fh)
+ goto out;
+
+ ret = auth_cache_lookup (cache, fh, host_addr, &timestamp, &can_write);
+ cached = (ret == ENTRY_FOUND);
+
+out:
+ return cached;
+}
+
+
+/**
+ * is_nfs_fh_cached_and_writeable -- Checks if an NFS FH is cached for the given
+ * host and writable
+ * @cache: The fh cache
+ * @host_addr: Address to use in lookup
+ * @fh: The fh to use in lookup
+ *
+ *
+ * @return: TRUE if cached & writable, FALSE otherwise
+ *
+ */
+gf_boolean_t
+is_nfs_fh_cached_and_writeable (struct auth_cache *cache, struct nfs3_fh *fh,
+ const char *host_addr)
+{
+ int ret = 0;
+ time_t timestamp = 0;
+ gf_boolean_t cached = _gf_false;
+ gf_boolean_t writable = _gf_false;
+
+ if (!fh)
+ goto out;
+
+ ret = auth_cache_lookup (cache, fh, host_addr, &timestamp, &writable);
+ cached = ((ret == ENTRY_FOUND) && writable);
+
+out:
+ return cached;
+}
+
+/**
+ * cache_nfs_fh -- Places the nfs file handle in the underlying dict as we are
+ * using as our cache. The key is "exportid:gfid:host_addr", the
+ * value is an entry struct containing the export item that
+ * was authorized for the operation and the file handle that was
+ * authorized.
+ *
+ * @cache: The cache to place fh's in
+ * @fh : The fh to cache
+ * @host_addr: The address of the host
+ * @export_item: The export item that was authorized
+ *
+ */
+int
+cache_nfs_fh (struct auth_cache *cache, struct nfs3_fh *fh,
+ const char *host_addr, struct export_item *export_item)
+{
+ int ret = -EINVAL;
+ char *hashkey = NULL;
+ data_t *entry_data = NULL;
+ time_t timestamp = 0;
+ gf_boolean_t can_write = _gf_false;
+ struct auth_cache_entry *entry = NULL;
+
+ GF_VALIDATE_OR_GOTO (GF_NFS, host_addr, out);
+ GF_VALIDATE_OR_GOTO (GF_NFS, cache, out);
+ GF_VALIDATE_OR_GOTO (GF_NFS, fh, out);
+
+ /* If we could already find it in the cache, just return */
+ ret = auth_cache_lookup (cache, fh, host_addr, &timestamp, &can_write);
+ if (ret == 0) {
+ gf_msg_trace (GF_NFS, 0, "found cached auth/fh for host "
+ "%s", host_addr);
+ goto out;
+ }
+
+ hashkey = make_hashkey (fh, host_addr);
+ if (!hashkey) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ entry = auth_cache_entry_init ();
+ if (!entry) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ entry->timestamp = time (NULL);
+ entry->item = export_item;
+
+ ret = auth_cache_add (cache, hashkey, entry);
+ GF_REF_PUT (entry);
+ if (ret)
+ goto out;
+
+ gf_msg_trace (GF_NFS, 0, "Caching file-handle (%s)", host_addr);
+ ret = 0;
+
+out:
+ GF_FREE (hashkey);
+
+ return ret;
+}
diff --git a/xlators/nfs/server/src/auth-cache.h b/xlators/nfs/server/src/auth-cache.h
new file mode 100644
index 00000000000..a3ea5a43ded
--- /dev/null
+++ b/xlators/nfs/server/src/auth-cache.h
@@ -0,0 +1,54 @@
+/*
+ Copyright 2014-present Facebook. All Rights Reserved
+
+ This file is part of GlusterFS.
+
+ Author :
+ Shreyas Siravara <shreyas.siravara@gmail.com>
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _AUTH_CACHE_H_
+#define _AUTH_CACHE_H_
+
+#include "nfs-mem-types.h"
+#include "mount3.h"
+#include "exports.h"
+#include "dict.h"
+#include "nfs3.h"
+
+struct auth_cache {
+ gf_lock_t lock; /* locking for the dict (and entries) */
+ dict_t *cache_dict; /* Dict holding fh -> authcache_entry */
+ time_t ttl_sec; /* TTL of the auth cache in seconds */
+};
+
+
+/* Initializes the cache */
+struct auth_cache *
+auth_cache_init (time_t ttl_sec);
+
+/* Inserts FH into cache */
+int
+cache_nfs_fh (struct auth_cache *cache, struct nfs3_fh *fh,
+ const char *host_addr, struct export_item *export_item);
+
+/* Checks if the filehandle cached & writable */
+gf_boolean_t
+is_nfs_fh_cached_and_writeable (struct auth_cache *cache, struct nfs3_fh *fh,
+ const char *host_addr);
+
+/* Checks if the filehandle is cached */
+gf_boolean_t
+is_nfs_fh_cached (struct auth_cache *cache, struct nfs3_fh *fh,
+ const char *host_addr);
+
+/* Purge the cache */
+void
+auth_cache_purge (struct auth_cache *cache);
+
+#endif /* _AUTH_CACHE_H_ */
diff --git a/xlators/nfs/server/src/exports.c b/xlators/nfs/server/src/exports.c
new file mode 100644
index 00000000000..83aec254040
--- /dev/null
+++ b/xlators/nfs/server/src/exports.c
@@ -0,0 +1,1472 @@
+/*
+ Copyright 2014-present Facebook. All Rights Reserved
+
+ This file is part of GlusterFS.
+
+ Author :
+ Shreyas Siravara <shreyas.siravara@gmail.com>
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "exports.h"
+#include "hashfn.h"
+#include "parse-utils.h"
+#include "nfs-messages.h"
+
+static void _exp_dict_destroy (dict_t *ng_dict);
+static void _export_options_print (const struct export_options *opts);
+static void _export_options_deinit (struct export_options *opts);
+static void _export_dir_deinit (struct export_dir *dir);
+
+static struct parser *netgroup_parser;
+static struct parser *hostname_parser;
+static struct parser *options_parser;
+
+/**
+ * _exp_init_parsers -- Initialize parsers to be used in this file
+ *
+ * @return: success: 0
+ * failure: -1
+ */
+static int
+_exp_init_parsers ()
+{
+ int ret = -1;
+
+ netgroup_parser = parser_init (NETGROUP_REGEX_PATTERN);
+ if (!netgroup_parser)
+ goto out;
+
+ hostname_parser = parser_init (HOSTNAME_REGEX_PATTERN);
+ if (!hostname_parser)
+ goto out;
+
+ options_parser = parser_init (OPTIONS_REGEX_PATTERN);
+ if (!options_parser)
+ goto out;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/**
+ * _exp_deinit_parsers -- Free parsers used in this file
+ */
+static void
+_exp_deinit_parsers ()
+{
+ parser_deinit (netgroup_parser);
+ parser_deinit (hostname_parser);
+ parser_deinit (options_parser);
+}
+
+/**
+ * _export_file_init -- Initialize an exports file structure.
+ *
+ * @return : success: Pointer to an allocated exports file struct
+ * failure: NULL
+ *
+ * Not for external use.
+ */
+struct exports_file *
+_exports_file_init ()
+{
+ struct exports_file *file = NULL;
+
+ file = GF_CALLOC (1, sizeof (*file), gf_common_mt_nfs_exports);
+ if (!file) {
+ gf_msg (GF_EXP, GF_LOG_CRITICAL, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Failed to allocate file struct!");
+ goto out;
+ }
+
+ file->exports_dict = dict_new ();
+ file->exports_map = dict_new ();
+ if (!file->exports_dict || !file->exports_map) {
+ gf_msg (GF_EXP, GF_LOG_CRITICAL, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Failed to allocate dict!");
+ goto free_and_out;
+ }
+
+ goto out;
+
+free_and_out:
+ if (file->exports_dict)
+ dict_unref (file->exports_dict);
+
+ GF_FREE (file);
+ file = NULL;
+out:
+ return file;
+}
+
+/**
+ * _exp_file_dict_destroy -- Delete each item in the dict
+ *
+ * @dict : Dict to free elements from
+ * @key : Key in the dict we are on
+ * @val : Value associated with that dict
+ * @tmp : Not used
+ *
+ * Not for external use.
+ */
+static int
+_exp_file_dict_destroy (dict_t *dict, char *key, data_t *val, void *tmp)
+{
+ struct export_dir *dir = NULL;
+
+ GF_VALIDATE_OR_GOTO (GF_EXP, dict, out);
+
+ if (val) {
+ dir = (struct export_dir *)val->data;
+
+ if (dir) {
+ _export_dir_deinit (dir);
+ val->data = NULL;
+ }
+ dict_del (dict, key);
+ }
+
+out:
+ return 0;
+}
+
+/**
+ * _exp_file_deinit -- Free memory used by an export file
+ *
+ * @expfile : Pointer to the exports file to free
+ *
+ * Externally usable.
+ */
+void
+exp_file_deinit (struct exports_file *expfile)
+{
+ if (!expfile)
+ goto out;
+
+ if (expfile->exports_dict) {
+ dict_foreach (expfile->exports_dict, _exp_file_dict_destroy,
+ NULL);
+ dict_unref (expfile->exports_dict);
+ }
+
+ if (expfile->exports_map) {
+ dict_foreach (expfile->exports_map, _exp_file_dict_destroy,
+ NULL);
+ dict_unref (expfile->exports_map);
+ }
+
+ GF_FREE (expfile->filename);
+ GF_FREE (expfile);
+out:
+ return;
+}
+
+/**
+ * _export_dir_init -- Initialize an export directory structure.
+ *
+ * @return : success: Pointer to an allocated exports directory struct
+ * failure: NULL
+ *
+ * Not for external use.
+ */
+static struct export_dir *
+_export_dir_init ()
+{
+ struct export_dir *expdir = GF_CALLOC (1, sizeof (*expdir),
+ gf_common_mt_nfs_exports);
+
+ if (!expdir)
+ gf_msg (GF_EXP, GF_LOG_CRITICAL, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Failed to allocate export dir structure!");
+
+ return expdir;
+}
+
+/**
+ * _export_dir_deinit -- Free memory used by an export dir
+ *
+ * @expdir : Pointer to the export directory to free
+ *
+ * Not for external use.
+ */
+static void
+_export_dir_deinit (struct export_dir *dir)
+{
+ GF_VALIDATE_OR_GOTO (GF_EXP, dir, out);
+ GF_FREE (dir->dir_name);
+ _exp_dict_destroy (dir->netgroups);
+ _exp_dict_destroy (dir->hosts);
+ GF_FREE (dir);
+
+out:
+ return;
+}
+
+
+/**
+ * _export_item_print -- Print the elements in the export item.
+ *
+ * @expdir : Pointer to the item struct to print out.
+ *
+ * Not for external use.
+ */
+static void
+_export_item_print (const struct export_item *item)
+{
+ GF_VALIDATE_OR_GOTO (GF_EXP, item, out);
+ printf ("%s", item->name);
+ _export_options_print (item->opts);
+out:
+ return;
+}
+
+/**
+ * _export_item_init -- Initialize an export item structure
+ *
+ * @return : success: Pointer to an allocated exports item struct
+ * failure: NULL
+ *
+ * Not for external use.
+ */
+static struct export_item *
+_export_item_init ()
+{
+ struct export_item *item = GF_CALLOC (1, sizeof (*item),
+ gf_common_mt_nfs_exports);
+
+ if (!item)
+ gf_msg (GF_EXP, GF_LOG_CRITICAL, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Failed to allocate export item!");
+
+ return item;
+}
+
+/**
+ * _export_item_deinit -- Free memory used by an export item
+ *
+ * @expdir : Pointer to the export item to free
+ *
+ * Not for external use.
+ */
+static void
+_export_item_deinit (struct export_item *item)
+{
+ if (!item)
+ return;
+
+ _export_options_deinit (item->opts);
+ GF_FREE (item->name);
+ GF_FREE (item);
+}
+
+/**
+ * _export_host_init -- Initialize an export options struct
+ *
+ * @return : success: Pointer to an allocated options struct
+ * failure: NULL
+ *
+ * Not for external use.
+ */
+static struct export_options *
+_export_options_init ()
+{
+ struct export_options *opts = GF_CALLOC (1, sizeof (*opts),
+ gf_common_mt_nfs_exports);
+
+ if (!opts)
+ gf_msg (GF_EXP, GF_LOG_CRITICAL, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Failed to allocate options structure!");
+
+ return opts;
+}
+
+/**
+ * _export_options_deinit -- Free memory used by a options struct
+ *
+ * @expdir : Pointer to the options struct to free
+ *
+ * Not for external use.
+ */
+static void
+_export_options_deinit (struct export_options *opts)
+{
+ if (!opts)
+ return;
+
+ GF_FREE (opts->anon_uid);
+ GF_FREE (opts->sec_type);
+ GF_FREE (opts);
+}
+
+/**
+ * _export_options_print -- Print the elements in the options struct.
+ *
+ * @expdir : Pointer to the options struct to print out.
+ *
+ * Not for external use.
+ */
+static void
+_export_options_print (const struct export_options *opts)
+{
+ GF_VALIDATE_OR_GOTO (GF_EXP, opts, out);
+
+ printf ("(");
+ if (opts->rw)
+ printf ("rw,");
+ else
+ printf ("ro,");
+
+ if (opts->nosuid)
+ printf ("nosuid,");
+
+ if (opts->root)
+ printf ("root,");
+
+ if (opts->anon_uid)
+ printf ("anonuid=%s,", opts->anon_uid);
+
+ if (opts->sec_type)
+ printf ("sec=%s,", opts->sec_type);
+
+ printf (") ");
+out:
+ return;
+}
+
+/**
+ * __exp_dict_free_walk -- Delete each item in the dict
+ *
+ * @dict : Dict to free elements from
+ * @key : Key in the dict we are on
+ * @val : Value associated with that dict
+ * @tmp : Not used
+ *
+ * Passed as a function pointer to dict_foreach()
+ *
+ * Not for external use.
+ */
+static int
+__exp_dict_free_walk (dict_t *dict, char *key, data_t *val, void *tmp)
+{
+ if (val) {
+ _export_item_deinit ((struct export_item *)val->data);
+ val->data = NULL;
+ dict_del (dict, key);
+ }
+ return 0;
+}
+
+/**
+ * _exp_dict_destroy -- Delete all the items from this dict
+ * through the helper function above.
+ *
+ * @ng_dict : Dict to free
+ *
+ * Not for external use.
+ */
+static void
+_exp_dict_destroy (dict_t *ng_dict)
+{
+ if (!ng_dict)
+ goto out;
+
+ dict_foreach (ng_dict, __exp_dict_free_walk, NULL);
+out:
+ return;
+}
+
+/**
+ * exp_file_dir_from_uuid -- Using a uuid as the key, retrieve an exports
+ * directory from the file.
+ *
+ * @file: File to retrieve data from
+ * @export_uuid: UUID of the export (mountid in the NFS xlator)
+ *
+ * @return : success: Pointer to an export dir struct
+ * failure: NULL
+ */
+struct export_dir *
+exp_file_dir_from_uuid (const struct exports_file *file,
+ const uuid_t export_uuid)
+{
+ char export_uuid_str[512] = {0, };
+ data_t *dirdata = NULL;
+ struct export_dir *dir = NULL;
+
+ gf_uuid_unparse (export_uuid, export_uuid_str);
+
+ dirdata = dict_get (file->exports_map, export_uuid_str);
+ if (dirdata)
+ dir = (struct export_dir *)dirdata->data;
+
+ return dir;
+}
+
+/**
+ * _exp_file_insert -- Insert the exports directory into the file structure
+ * using the directory as a dict. Also hashes the dirname,
+ * stores it in a uuid type, converts the uuid type to a
+ * string and uses that as the key to the exports map.
+ * The exports map maps an export "uuid" to an export
+ * directory struct.
+ *
+ * @file : Exports file struct to insert into
+ * @dir : Export directory to insert
+ *
+ * Not for external use.
+ */
+static void
+_exp_file_insert (struct exports_file *file, struct export_dir *dir)
+{
+ data_t *dirdata = NULL;
+ uint32_t hashedval = 0;
+ uuid_t export_uuid = {0, };
+ char export_uuid_str[512] = {0, };
+ char *dirdup = NULL;
+
+ GF_VALIDATE_OR_GOTO (GF_EXP, file, out);
+ GF_VALIDATE_OR_GOTO (GF_EXP, dir, out);
+
+ dirdata = bin_to_data (dir, sizeof (*dir));
+ dict_set (file->exports_dict, dir->dir_name, dirdata);
+
+ dirdup = strdupa (dir->dir_name);
+ while (strlen (dirdup) > 0 && dirdup[0] == '/')
+ dirdup++;
+
+ hashedval = SuperFastHash (dirdup, strlen (dirdup));
+ memset (export_uuid, 0, sizeof (export_uuid));
+ memcpy (export_uuid, &hashedval, sizeof (hashedval));
+ gf_uuid_unparse (export_uuid, export_uuid_str);
+
+ dict_set (file->exports_map, export_uuid_str, dirdata);
+out:
+ return;
+}
+
+/**
+ * __exp_item_print_walk -- Print all the keys and values in the dict
+ *
+ * @dict : the dict to walk
+ * @key : the key in the dict we are currently on
+ * @val : the value in the dict assocated with the key
+ * @tmp : Additional parameter data (not used)
+ *
+ * Passed as a function pointer to dict_foreach ().
+ *
+ * Not for external use.
+ */
+static int
+__exp_item_print_walk (dict_t *dict, char *key, data_t *val, void *tmp)
+{
+ if (val)
+ _export_item_print ((struct export_item *)val->data);
+
+ return 0;
+}
+
+/**
+ * __exp_file_print_walk -- Print all the keys and values in the dict
+ *
+ * @dict : the dict to walk
+ * @key : the key in the dict we are currently on
+ * @val : the value in the dict assocated with the key
+ * @tmp : Additional parameter data (not used)
+ *
+ * Passed as a function pointer to dict_foreach ().
+ *
+ * Not for external use.
+ */
+static int
+__exp_file_print_walk (dict_t *dict, char *key, data_t *val, void *tmp)
+{
+ if (val) {
+ struct export_dir *dir = (struct export_dir *)val->data;
+
+ printf ("%s ", key);
+
+ if (dir->netgroups)
+ dict_foreach (dir->netgroups, __exp_item_print_walk,
+ NULL);
+
+ if (dir->hosts)
+ dict_foreach (dir->hosts, __exp_item_print_walk, NULL);
+
+ printf ("\n");
+ }
+ return 0;
+}
+
+/**
+ * exp_file_print -- Print out the contents of the exports file
+ *
+ * @file : Exports file to print
+ *
+ * Not for external use.
+ */
+void
+exp_file_print (const struct exports_file *file)
+{
+ GF_VALIDATE_OR_GOTO (GF_EXP, file, out);
+ dict_foreach (file->exports_dict, __exp_file_print_walk, NULL);
+out:
+ return;
+}
+
+#define __exp_line_get_opt_val(val, equals, ret, errlabel) \
+ do { \
+ (val) = (equals) + 1; \
+ if (!(*(val))) { \
+ (ret) = 1; \
+ goto errlabel; \
+ } \
+ } while (0) \
+
+enum gf_exp_parse_status {
+ GF_EXP_PARSE_SUCCESS = 0,
+ GF_EXP_PARSE_ITEM_NOT_FOUND = 1,
+ GF_EXP_PARSE_ITEM_FAILURE = 2,
+ GF_EXP_PARSE_ITEM_NOT_IN_MOUNT_STATE = 3,
+ GF_EXP_PARSE_LINE_IGNORING = 4,
+};
+
+/**
+ * __exp_line_opt_key_value_parse -- Parse the key-value options in the options
+ * string.
+ *
+ * Given a string like (sec=sys,anonuid=0,rw), to parse, this function
+ * will get called once with 'sec=sys' and again with 'anonuid=0'.
+ * It will check for the '=', make sure there is data to be read
+ * after the '=' and copy the data into the options struct.
+ *
+ * @option : An option string like sec=sys or anonuid=0
+ * @opts : Pointer to an struct export_options that holds all the export
+ * options.
+ *
+ * @return: success: GF_EXP_PARSE_SUCCESS
+ * failure: GF_EXP_PARSE_ITEM_FAILURE on parse failure,
+ * -EINVAL on bad args, -ENOMEM on allocation errors.
+ *
+ * Not for external use.
+ */
+static int
+__exp_line_opt_key_value_parse (char *option, struct export_options *opts)
+{
+ char *equals = NULL;
+ char *right = NULL;
+ char *strmatch = option;
+ int ret = -EINVAL;
+
+ GF_VALIDATE_OR_GOTO (GF_EXP, option, out);
+ GF_VALIDATE_OR_GOTO (GF_EXP, opts, out);
+
+ equals = strchr (option, '=');
+ if (!equals) {
+ ret = GF_EXP_PARSE_ITEM_FAILURE;
+ goto out;
+ }
+
+ *equals = 0;
+ /* Now that an '=' has been found the left side is the option and
+ * the right side is the value. We simply have to compare those and
+ * extract it.
+ */
+ if (strcmp (strmatch, "anonuid") == 0) {
+ *equals = '=';
+ /* Get the value for this option */
+ __exp_line_get_opt_val (right, equals, ret, out);
+ opts->anon_uid = gf_strdup (right);
+ GF_CHECK_ALLOC (opts->anon_uid, ret, out);
+ } else if (strcmp (strmatch, "sec") == 0) {
+ *equals = '=';
+ /* Get the value for this option */
+ __exp_line_get_opt_val (right, equals, ret, out);
+ opts->sec_type = gf_strdup (right);
+ GF_CHECK_ALLOC (opts->sec_type, ret, out);
+ } else {
+ *equals = '=';
+ ret = GF_EXP_PARSE_ITEM_FAILURE;
+ goto out;
+ }
+
+ ret = GF_EXP_PARSE_SUCCESS;
+out:
+ return ret;
+}
+
+/**
+ * __exp_line_opt_parse -- Parse the options part of an
+ * exports or netgroups string.
+ *
+ * @opt_str : The option string to parse
+ * @exp_opts : Double pointer to the options we are going
+ * to allocate and setup.
+ *
+ *
+ * @return: success: GF_EXP_PARSE_SUCCESS
+ * failure: GF_EXP_PARSE_ITEM_FAILURE on parse failure,
+ * -EINVAL on bad args, -ENOMEM on allocation errors.
+ *
+ * Not for external use.
+ */
+static int
+__exp_line_opt_parse (const char *opt_str, struct export_options **exp_opts)
+{
+ struct export_options *opts = NULL;
+ char *strmatch = NULL;
+ int ret = -EINVAL;
+ char *equals = NULL;
+
+ ret = parser_set_string (options_parser, opt_str);
+ if (ret < 0)
+ goto out;
+
+ while ((strmatch = parser_get_next_match (options_parser))) {
+ if (!opts) {
+ /* If the options have not been allocated,
+ * allocate it.
+ */
+ opts = _export_options_init ();
+ if (!opts) {
+ ret = -ENOMEM;
+ parser_unset_string (options_parser);
+ goto out;
+ }
+ }
+
+ /* First, check for all the boolean options Second, check for
+ * an '=', and check the available options there. The string
+ * parsing here gets slightly messy, but the concept itself
+ * is pretty simple.
+ */
+ equals = strchr (strmatch, '=');
+ if (strcmp (strmatch, "root") == 0)
+ opts->root = _gf_true;
+ else if (strcmp (strmatch, "ro") == 0)
+ opts->rw = _gf_false;
+ else if (strcmp (strmatch, "rw") == 0)
+ opts->rw = _gf_true;
+ else if (strcmp (strmatch, "nosuid") == 0)
+ opts->nosuid = _gf_true;
+ else if (equals) {
+ ret = __exp_line_opt_key_value_parse (strmatch, opts);
+ if (ret < 0) {
+ /* This means invalid key value options were
+ * specified, or memory allocation failed.
+ * The ret value gets bubbled up to the caller.
+ */
+ GF_FREE (strmatch);
+ parser_unset_string (options_parser);
+ _export_options_deinit (opts);
+ goto out;
+ }
+ } else
+ /* Cannot change to gf_msg.
+ * gf_msg not giving output to STDOUT
+ * Bug id : BZ1215017
+ */
+ gf_log (GF_EXP, GF_LOG_WARNING,
+ "Could not find any valid options for "
+ "string: %s", strmatch);
+ GF_FREE (strmatch);
+ }
+
+ if (!opts) {
+ /* If opts is not allocated
+ * that means no matches were found
+ * which is a parse error. Not marking
+ * it as "not found" because it is a parse
+ * error to not have options.
+ */
+ ret = GF_EXP_PARSE_ITEM_FAILURE;
+ parser_unset_string (options_parser);
+ goto out;
+ }
+
+ *exp_opts = opts;
+ parser_unset_string (options_parser);
+ ret = GF_EXP_PARSE_SUCCESS;
+out:
+ return ret;
+}
+
+
+/**
+ * __exp_line_ng_host_str_parse -- Parse the netgroup or host string
+ *
+ * e.g. @mygroup(<options>), parsing @mygroup and (<options>)
+ * or myhost001.dom(<options>), parsing myhost001.dom and (<options>)
+ *
+ * @line : The line to parse
+ * @exp_item : Double pointer to a struct export_item
+ *
+ * @return: success: GF_PARSE_SUCCESS
+ * failure: GF_EXP_PARSE_ITEM_FAILURE on parse failure,
+ * -EINVAL on bad args, -ENOMEM on allocation errors.
+ *
+ * Not for external use.
+ */
+static int
+__exp_line_ng_host_str_parse (char *str, struct export_item **exp_item)
+{
+ struct export_item *item = NULL;
+ int ret = -EINVAL;
+ char *parens = NULL;
+ char *optstr = NULL;
+ struct export_options *exp_opts = NULL;
+ char *item_name = NULL;
+
+ GF_VALIDATE_OR_GOTO (GF_EXP, str, out);
+ GF_VALIDATE_OR_GOTO (GF_EXP, exp_item, out);
+
+ /* A netgroup/host string looks like this:
+ * @test(sec=sys,rw,anonuid=0) or host(sec=sys,rw,anonuid=0)
+ * We want to extract the name, 'test' or 'host'
+ * Again, we could setup a regex and use it here,
+ * but its simpler to find the '(' and copy until
+ * there.
+ */
+ parens = strchr (str, '(');
+ if (!parens) {
+ /* Parse error if there are no parens. */
+ ret = GF_EXP_PARSE_ITEM_FAILURE;
+ goto out;
+ }
+
+ *parens = '\0'; /* Temporarily terminate it so we can do a copy */
+
+ if (strlen (str) > FQDN_MAX_LEN) {
+ ret = GF_EXP_PARSE_ITEM_FAILURE;
+ goto out;
+ }
+
+ /* Strip leading whitespaces */
+ while (*str == ' ' || *str == '\t')
+ str++;
+
+ item_name = gf_strdup (str);
+ GF_CHECK_ALLOC (item_name, ret, out);
+
+ gf_msg_trace (GF_EXP, 0, "found hostname/netgroup: %s", item_name);
+
+ /* Initialize an export item for this */
+ item = _export_item_init ();
+ GF_CHECK_ALLOC (item, ret, free_and_out);
+ item->name = item_name;
+
+ *parens = '('; /* Restore the string */
+
+ /* Options start at the parantheses */
+ optstr = parens;
+
+ ret = __exp_line_opt_parse (optstr, &exp_opts);
+ if (ret != 0) {
+ /* Bubble up the error to the caller */
+ _export_item_deinit (item);
+ goto out;
+ }
+
+ item->opts = exp_opts;
+
+ *exp_item = item;
+
+ ret = GF_EXP_PARSE_SUCCESS;
+ goto out;
+
+free_and_out:
+ GF_FREE (item_name);
+out:
+ return ret;
+}
+
+/**
+ * __exp_line_ng_parse -- Extract the netgroups in the line
+ * and call helper functions to parse
+ * the string.
+ *
+ * The call chain goes like this:
+ *
+ * 1) __exp_line_ng_parse ("/test @test(sec=sys,rw,anonuid=0)")
+ * 2) __exp_line_ng_str_parse ("@test(sec=sys,rw,anonuid=0)");
+ * 3) __exp_line_opt_parse("(sec=sys,rw,anonuid=0)");
+ *
+ *
+ * @line : The line to parse
+ * @ng_dict : Double pointer to the dict we want to
+ * insert netgroups into.
+ *
+ * Allocates the dict, extracts netgroup strings from the line,
+ * parses them into a struct export_item structure and inserts
+ * them in the dict.
+ *
+ * @return: success: GF_EXP_PARSE_SUCCESS
+ * failure: GF_EXP_PARSE_ITEM_FAILURE on parse failure,
+ * GF_EXP_PARSE_ITEM_NOT_FOUND if the netgroup was not found
+ * -EINVAL on bad args, -ENOMEM on allocation errors.
+ *
+ * Not for external use.
+ */
+static int
+__exp_line_ng_parse (const char *line, dict_t **ng_dict)
+{
+ dict_t *netgroups = NULL;
+ char *strmatch = NULL;
+ int ret = -EINVAL;
+ struct export_item *exp_ng = NULL;
+ data_t *ngdata = NULL;
+
+ GF_VALIDATE_OR_GOTO (GF_EXP, line, out);
+ GF_VALIDATE_OR_GOTO (GF_EXP, ng_dict, out);
+
+ *ng_dict = NULL; /* Will be set if parse is successful */
+
+ /* Initialize a parser with the line to parse
+ * and the regex used to parse it.
+ */
+ ret = parser_set_string (netgroup_parser, line);
+ if (ret < 0) {
+ goto out;
+ }
+
+ gf_msg_trace (GF_EXP, 0, "parsing line: %s", line);
+
+ while ((strmatch = parser_get_next_match (netgroup_parser))) {
+ if (!netgroups) {
+ /* Allocate a new dict to store the netgroups. */
+ netgroups = dict_new ();
+ if (!netgroups) {
+ ret = -ENOMEM;
+ goto free_and_out;
+ }
+ }
+
+ gf_msg_trace (GF_EXP, 0, "parsing netgroup: %s", strmatch);
+
+ ret = __exp_line_ng_host_str_parse (strmatch, &exp_ng);
+
+ if (ret != 0) {
+ /* Parsing or other critical errors.
+ * caller will handle return value.
+ */
+ _exp_dict_destroy (netgroups);
+ goto free_and_out;
+ }
+
+ ngdata = bin_to_data (exp_ng, sizeof (*exp_ng));
+ dict_set (netgroups, exp_ng->name, ngdata);
+
+ /* Free this matched string and continue parsing. */
+ GF_FREE (strmatch);
+ }
+
+ /* If the netgroups dict was not allocated, then we know that
+ * no matches were found.
+ */
+ if (!netgroups) {
+ ret = GF_EXP_PARSE_ITEM_NOT_FOUND;
+ parser_unset_string (netgroup_parser);
+ goto out;
+ }
+
+ ret = GF_EXP_PARSE_SUCCESS;
+ *ng_dict = netgroups;
+
+free_and_out:
+ parser_unset_string (netgroup_parser);
+ GF_FREE (strmatch);
+out:
+ return ret;
+}
+
+/**
+ * __exp_line_host_parse -- Extract the hosts in the line
+ * and call helper functions to parse
+ * the string.
+ *
+ * The call chain goes like this:
+ *
+ * 1) __exp_line_host_parse ("/test hostip(sec=sys,rw,anonuid=0)")
+ * 2) __exp_line_ng_host_str_parse ("hostip(sec=sys,rw,anonuid=0)");
+ * 3) __exp_line_opt_parse("(sec=sys,rw,anonuid=0)");
+ *
+ *
+ * @line : The line to parse
+ * @ng_dict : Double pointer to the dict we want to
+ * insert hosts into.
+ *
+ * Allocates the dict, extracts host strings from the line,
+ * parses them into a struct export_item structure and inserts
+ * them in the dict.
+ *
+ * @return: success: GF_EXP_PARSE_SUCCESS
+ * failure: GF_EXP_PARSE_ITEM_FAILURE on parse failure,
+ * GF_EXP_PARSE_ITEM_NOT_FOUND if the host was not found,
+ * -EINVAL on bad args, -ENOMEM on allocation errors.
+ *
+ * Not for external use.
+ */
+static int
+__exp_line_host_parse (const char *line, dict_t **host_dict)
+{
+ dict_t *hosts = NULL;
+ char *strmatch = NULL;
+ int ret = -EINVAL;
+ struct export_item *exp_host = NULL;
+ data_t *hostdata = NULL;
+
+ GF_VALIDATE_OR_GOTO (GF_EXP, line, out);
+ GF_VALIDATE_OR_GOTO (GF_EXP, host_dict, out);
+
+ *host_dict = NULL;
+
+ /* Initialize a parser with the line to parse and the regex used to
+ * parse it.
+ */
+ ret = parser_set_string (hostname_parser, line);
+ if (ret < 0) {
+ goto out;
+ }
+
+ gf_msg_trace (GF_EXP, 0, "parsing line: %s", line);
+
+ while ((strmatch = parser_get_next_match (hostname_parser))) {
+ if (!hosts) {
+ /* Allocate a new dictto store the netgroups. */
+ hosts = dict_new ();
+ GF_CHECK_ALLOC (hosts, ret, free_and_out);
+ }
+
+ gf_msg_trace (GF_EXP, 0, "parsing hostname: %s", strmatch);
+
+ ret = __exp_line_ng_host_str_parse (strmatch, &exp_host);
+
+ if (ret != 0) {
+ /* Parsing or other critical error, free allocated
+ * memory and exit. The caller will handle the errors.
+ */
+ _exp_dict_destroy (hosts);
+ goto free_and_out;
+ }
+
+ /* Insert export item structure into the hosts dict. */
+ hostdata = bin_to_data (exp_host, sizeof (*exp_host));
+ dict_set (hosts, exp_host->name, hostdata);
+
+
+ /* Free this matched string and continue parsing.*/
+ GF_FREE (strmatch);
+ }
+
+ /* If the hosts dict was not allocated, then we know that
+ * no matches were found.
+ */
+ if (!exp_host) {
+ ret = GF_EXP_PARSE_ITEM_NOT_FOUND;
+ parser_unset_string (hostname_parser);
+ goto out;
+ }
+
+ ret = GF_EXP_PARSE_SUCCESS;
+ *host_dict = hosts;
+
+free_and_out:
+ parser_unset_string (hostname_parser);
+ GF_FREE (strmatch);
+out:
+ return ret;
+}
+
+
+/**
+ * __exp_line_dir_parse -- Extract directory name from a line in the exports
+ * file.
+ *
+ * @line : The line to parse
+ * @dirname : Double pointer to the string we need to hold the directory name.
+ * If the parsing failed, the string will point to NULL, otherwise
+ * it will point to a valid memory region that is allocated by
+ * this function.
+ * @check_ms: If this variable is set then we cross check the directory line
+ * with whats in gluster's vol files and reject them if they don't
+ * match.
+ *
+ * @return : success: GF_EXP_PARSE_SUCCESS
+ * failure: GF_EXP_PARSE_ITEM_FAILURE on parse failure,
+ * -EINVAL on bad arguments, -ENOMEM on allocation failures,
+ * GF_EXP_PARSE_ITEM_NOT_IN_MOUNT_STATE if we failed to match
+ * with gluster's mountstate.
+ *
+ * The caller is responsible for freeing memory allocated by this function
+ *
+ * Not for external use.
+ */
+static int
+__exp_line_dir_parse (const char *line, char **dirname, struct mount3_state *ms)
+{
+ char *dir = NULL;
+ char *delim = NULL;
+ int ret = -EINVAL;
+ char *linedup = NULL;
+ struct mnt3_export *mnt3export = NULL;
+ size_t dirlen = 0;
+
+ GF_VALIDATE_OR_GOTO (GF_EXP, line, out);
+ GF_VALIDATE_OR_GOTO (GF_EXP, dirname, out);
+
+ /* Duplicate the line because we don't
+ * want to modify the original string.
+ */
+ linedup = strdupa (line);
+
+ /* We use strtok_r () here to split the string by space/tab and get the
+ * the result. We only need the first result of the split.
+ * a simple task. It is worth noting that dirnames always have to be
+ * validated against gluster's vol files so if they don't
+ * match it will be rejected.
+ */
+ dir = linedup;
+ delim = linedup + strcspn (linedup, " \t");
+ *delim = 0;
+
+ if (ms) {
+ /* Match the directory name with an existing
+ * export in the mount state.
+ */
+ mnt3export = mnt3_mntpath_to_export (ms, dir, _gf_true);
+ if (!mnt3export) {
+ gf_msg_debug (GF_EXP, 0, "%s not in mount state, "
+ "ignoring!", dir);
+ ret = GF_EXP_PARSE_ITEM_NOT_IN_MOUNT_STATE;
+ goto out;
+ }
+ }
+
+ /* Directories can be 1024 bytes in length, check
+ * that the argument provided adheres to
+ * that restriction.
+ */
+ if (strlen (dir) > DIR_MAX_LEN) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* Copy the result of the split */
+ dir = gf_strdup (dir);
+ GF_CHECK_ALLOC (dir, ret, out);
+
+ /* Ensure that trailing slashes are stripped before storing the name */
+ dirlen = strlen (dir);
+ if (dirlen > 0 && dir[dirlen - 1] == '/')
+ dir[dirlen - 1] = '\0';
+
+
+ /* Set the argument to point to the allocated string */
+ *dirname = dir;
+ ret = GF_EXP_PARSE_SUCCESS;
+out:
+ return ret;
+}
+
+/**
+ * _exp_line_parse -- Parse a line in an exports file into a structure
+ * that holds all the parts of the line. An exports
+ * structure has a dict of netgroups and a dict of hosts.
+ *
+ * An export line looks something like this /test @test(sec=sys,rw,anonuid=0)
+ * or /test @test(sec=sys,rw,anonuid=0) hostA(sec=sys,rw,anonuid=0), etc.
+ *
+ * We use regexes to parse the line into three separate pieces:
+ * 1) The directory (exports.h -- DIRECTORY_REGEX_PATTERN)
+ * 2) The netgroup if it exists (exports.h -- NETGROUP_REGEX_PATTERN)
+ * 3) The host if it exists (exports.h -- HOST_REGEX_PATTERN)
+ *
+ * In this case, the netgroup would be @test(sec=sys,rw,anonuid=0)
+ * and the host would be hostA(sec=sys,rw,anonuid=0).
+ *
+ * @line : The line to parse
+ * @dir : Double pointer to the struct we need to parse the line into.
+ * If the parsing failed, the struct will point to NULL,
+ * otherwise it will point to a valid memory region that is
+ * allocated by this function.
+ * @parse_full : This parameter tells us whether we should parse all the lines
+ * in the file, even if they are not present in gluster's config.
+ * The gluster config holds the volumes that it exports so
+ * if parse_full is set to FALSE then we will ensure that
+ * the export file structure holds only those volumes
+ * that gluster has exported. It is important to note that
+ * If gluster exports a volume named '/test', '/test' and all
+ * of its subdirectories that may be in the exports file
+ * are valid exports.
+ * @ms : The mount state that holds the list of volumes that gluster
+ * currently exports.
+ *
+ * @return : success: GF_EXP_PARSE_SUCCESS on success, -EINVAL on bad arguments,
+ * -ENOMEM on memory allocation errors,
+ * GF_EXP_PARSE_LINE_IGNORING if we ignored the line,
+ * GF_EXP_PARSE_ITEM_FAILURE if there was error parsing
+ * failure: NULL
+ *
+ * The caller is responsible for freeing memory allocated by this function
+ * The caller should free this memory using the _exp_dir_deinit () function.
+ *
+ * Not for external use.
+ */
+static int
+_exp_line_parse (const char *line, struct export_dir **dir,
+ gf_boolean_t parse_full, struct mount3_state *ms)
+{
+ struct export_dir *expdir = NULL;
+ char *dirstr = NULL;
+ dict_t *netgroups = NULL;
+ dict_t *hosts = NULL;
+ int ret = -EINVAL;
+ gf_boolean_t netgroups_failed = _gf_false;
+
+ GF_VALIDATE_OR_GOTO (GF_EXP, line, out);
+ GF_VALIDATE_OR_GOTO (GF_EXP, dir, out);
+
+ if (*line == '#' || *line == ' ' || *line == '\t'
+ || *line == '\0' || *line == '\n') {
+ ret = GF_EXP_PARSE_LINE_IGNORING;
+ goto out;
+ }
+
+ expdir = _export_dir_init ();
+ if (!expdir) {
+ *dir = NULL;
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ /* Get the directory string from the line */
+ ret = __exp_line_dir_parse (line, &dirstr, ms);
+ if (ret < 0) {
+ gf_msg (GF_EXP, GF_LOG_ERROR, 0, NFS_MSG_PARSE_DIR_FAIL,
+ "Parsing directory failed: %s", strerror (-ret));
+ /* If parsing the directory failed,
+ * we should simply fail because there's
+ * nothing else we can extract from the string to make
+ * the data valuable.
+ */
+ goto free_and_out;
+ }
+
+ /* Set the dir str */
+ expdir->dir_name = dirstr;
+
+ /* Parse the netgroup part of the string */
+ ret = __exp_line_ng_parse (line, &netgroups);
+ if (ret < 0) {
+ gf_msg (GF_EXP, GF_LOG_ERROR, -ret, NFS_MSG_PARSE_FAIL,
+ "Critical error: %s", strerror (-ret));
+ /* Return values less than 0
+ * indicate critical failures (null parameters,
+ * failure to allocate memory, etc).
+ */
+ goto free_and_out;
+ }
+ if (ret != 0) {
+ if (ret == GF_EXP_PARSE_ITEM_FAILURE)
+ /* Cannot change to gf_msg.
+ * gf_msg not giving output to STDOUT
+ * Bug id : BZ1215017
+ */
+ gf_log (GF_EXP, GF_LOG_WARNING,
+ "Error parsing netgroups for: %s", line);
+ /* Even though parsing failed for the netgroups we should let
+ * host parsing proceed.
+ */
+ netgroups_failed = _gf_true;
+ }
+
+ /* Parse the host part of the string */
+ ret = __exp_line_host_parse (line, &hosts);
+ if (ret < 0) {
+ gf_msg (GF_EXP, GF_LOG_ERROR, -ret, NFS_MSG_PARSE_FAIL,
+ "Critical error: %s", strerror (-ret));
+ goto free_and_out;
+ }
+ if (ret != 0) {
+ if (ret == GF_EXP_PARSE_ITEM_FAILURE)
+ gf_msg (GF_EXP, GF_LOG_WARNING, 0, NFS_MSG_PARSE_FAIL,
+ "Error parsing hosts for: %s", line);
+ /* If netgroups parsing failed, AND
+ * host parsing failed, then theres something really
+ * wrong with this line, so we're just going to
+ * log it and fail out.
+ */
+ if (netgroups_failed)
+ goto free_and_out;
+ }
+
+ expdir->hosts = hosts;
+ expdir->netgroups = netgroups;
+ *dir = expdir;
+ goto out;
+
+free_and_out:
+ _export_dir_deinit (expdir);
+out:
+ return ret;
+}
+
+struct export_item *
+exp_dir_get_netgroup (const struct export_dir *expdir, const char *netgroup)
+{
+ struct export_item *lookup_res = NULL;
+ data_t *dict_res = NULL;
+
+ GF_VALIDATE_OR_GOTO (GF_EXP, expdir, out);
+ GF_VALIDATE_OR_GOTO (GF_EXP, netgroup, out);
+
+ if (!expdir->netgroups)
+ goto out;
+
+ dict_res = dict_get (expdir->netgroups, (char *)netgroup);
+ if (!dict_res) {
+ gf_msg_debug (GF_EXP, 0, "%s not found for %s",
+ netgroup, expdir->dir_name);
+ goto out;
+ }
+
+ lookup_res = (struct export_item *)dict_res->data;
+out:
+ return lookup_res;
+}
+/**
+ * exp_dir_get_host -- Given a host string and an exports directory structure,
+ * find and return an struct export_item structure that
+ * represents the requested host.
+ *
+ * @expdir: Export directory to lookup from
+ * @host : Host string to lookup
+ *
+ * @return: success: Pointer to a export item structure
+ * failure: NULL
+ */
+struct export_item *
+exp_dir_get_host (const struct export_dir *expdir, const char *host)
+{
+ struct export_item *lookup_res = NULL;
+ data_t *dict_res = NULL;
+
+ GF_VALIDATE_OR_GOTO (GF_EXP, expdir, out);
+ GF_VALIDATE_OR_GOTO (GF_EXP, host, out);
+
+ if (!expdir->hosts)
+ goto out;
+
+ dict_res = dict_get (expdir->hosts, (char *)host);
+ if (!dict_res) {
+ gf_msg_debug (GF_EXP, 0, "%s not found for %s",
+ host, expdir->dir_name);
+
+ /* Check if wildcards are allowed for the host */
+ dict_res = dict_get (expdir->hosts, "*");
+ if (!dict_res) {
+ goto out;
+ }
+ }
+
+ lookup_res = (struct export_item *)dict_res->data;
+out:
+ return lookup_res;
+}
+
+
+/**
+ * exp_file_get_dir -- Return an export dir given a directory name
+ * Does a lookup from the dict in the file structure.
+ *
+ * @file : Exports file structure to lookup from
+ * @dir : Directory name to lookup
+ *
+ * @return : success: Pointer to an export directory structure
+ * failure: NULL
+ */
+struct export_dir *
+exp_file_get_dir (const struct exports_file *file, const char *dir)
+{
+ struct export_dir *lookup_res = NULL;
+ data_t *dict_res = NULL;
+ char *dirdup = NULL;
+ size_t dirlen = 0;
+
+ GF_VALIDATE_OR_GOTO (GF_EXP, file, out);
+ GF_VALIDATE_OR_GOTO (GF_EXP, dir, out);
+
+ dirlen = strlen (dir);
+ if (dirlen <= 0)
+ goto out;
+
+ dirdup = (char *)dir; /* Point at the directory */
+
+ /* If directories don't contain a leading slash */
+ if (*dir != '/') {
+ dirdup = alloca (dirlen + 2); /* Leading slash & null byte */
+ snprintf (dirdup, dirlen + 2, "/%s", dir);
+ }
+
+ dict_res = dict_get (file->exports_dict, dirdup);
+ if (!dict_res) {
+ gf_msg_debug (GF_EXP, 0, "%s not found in %s", dirdup,
+ file->filename);
+ goto out;
+ }
+
+ lookup_res = (struct export_dir *)dict_res->data;
+out:
+ return lookup_res;
+}
+
+/**
+ * exp_file_parse -- Parse an exports file into a structure
+ * that can be looked up through simple
+ * function calls.
+ *
+ * @filepath: Path to the exports file
+ * @ms : Current mount state (useful to match with gluster vol files)
+ *
+ * @return : success: 0
+ * failure: -1 on parsing failure, -EINVAL on bad arguments,
+ * -ENOMEM on allocation failures.
+ *
+ * The caller is responsible for freeing memory allocated by this function.
+ * The caller should free this memory using the exp_file_deinit () function.
+ * Calling GF_FREE ( ) on the pointer will NOT free all the allocated memory.
+ *
+ * Externally usable.
+ */
+int
+exp_file_parse (const char *filepath, struct exports_file **expfile,
+ struct mount3_state *ms)
+{
+ FILE *fp = NULL;
+ struct exports_file *file = NULL;
+ size_t len = 0;
+ int ret = -EINVAL;
+ unsigned long line_number = 0;
+ char *line = NULL;
+ struct export_dir *expdir = NULL;
+
+ /* Sets whether we we should parse the entire file or just that which
+ * is present in the mount state */
+ gf_boolean_t parse_complete_file = _gf_false;
+
+ GF_VALIDATE_OR_GOTO (GF_EXP, expfile, parse_done);
+
+ if (!ms) {
+ /* If mount state is null that means that we
+ * should go through and parse the whole file
+ * since we don't have anything to compare against.
+ */
+ parse_complete_file = _gf_true;
+ }
+
+ fp = fopen (filepath, "r");
+ if (!fp) {
+ ret = -errno;
+ goto parse_done;
+ }
+
+ ret = _exp_init_parsers ();
+ if (ret < 0)
+ goto parse_done;
+
+ /* Process the file line by line, with each line being parsed into
+ * an struct export_dir struct. If 'parse_complete_file' is set to TRUE
+ * then
+ */
+ while (getline (&line, &len, fp) != -1) {
+ line_number++; /* Keeping track of line number allows us to
+ * to log which line numbers were wrong
+ */
+ strtok (line, "\n"); /* removes the newline character from
+ * the line
+ */
+
+ /* Parse the line from the file into an struct export_dir
+ * structure. The process is as follows:
+ * Given a line like :
+ * "/vol @test(sec=sys,rw,anonuid=0) 10.35.11.31(sec=sys,rw)"
+ *
+ * This function will allocate an export dir and set its name
+ * to '/vol', using the function _exp_line_dir_parse ().
+ *
+ * Then it will extract the netgroups from the line, in this
+ * case it would be '@test(sec=sys,rw,anonuid=0)', and set the
+ * item structure's name to '@test'.
+ * It will also extract the options from that string and parse
+ * them into an struct export_options which will be pointed
+ * to by the item structure. This will be put into a dict
+ * which will be pointed to by the export directory structure.
+ *
+ * The same process happens above for the host string
+ * '10.35.11.32(sec=sys,rw)'
+ */
+ ret = _exp_line_parse (line, &expdir, parse_complete_file, ms);
+ if (ret == -ENOMEM) {
+ /* If we get memory allocation errors, we really should
+ * not continue parsing, so just free the allocated
+ * memory and exit.
+ */
+ goto free_and_done;
+ }
+
+ if (ret < 0) {
+ gf_msg (GF_EXP, GF_LOG_ERROR, -ret, NFS_MSG_PARSE_FAIL,
+ "Failed to parse line #%lu", line_number);
+ continue; /* Skip entering this line and continue */
+ }
+
+ if (ret == GF_EXP_PARSE_LINE_IGNORING) {
+ /* This just means the line was empty or started with a
+ * '#' or a ' ' and we are ignoring it.
+ */
+ gf_msg_debug (GF_EXP, 0,
+ "Ignoring line #%lu because it started "
+ "with a %c", line_number, *line);
+ continue;
+ }
+
+ if (!file) {
+ file = _exports_file_init ();
+ GF_CHECK_ALLOC_AND_LOG (GF_EXP, file, ret,
+ "Allocation error while "
+ "allocating file struct",
+ parse_done);
+
+ file->filename = gf_strdup (filepath);
+ GF_CHECK_ALLOC_AND_LOG (GF_EXP, file, ret,
+ "Allocation error while "
+ "duping filepath",
+ free_and_done);
+ }
+
+ /* If the parsing is successful store the export directory
+ * in the file structure.
+ */
+ _exp_file_insert (file, expdir);
+ }
+
+ /* line got allocated through getline(), don't use GF_FREE() for it */
+ free (line);
+
+ *expfile = file;
+ goto parse_done;
+
+free_and_done:
+ exp_file_deinit (file);
+
+parse_done:
+ if (fp)
+ fclose (fp);
+ _exp_deinit_parsers ();
+ return ret;
+}
diff --git a/xlators/nfs/server/src/exports.h b/xlators/nfs/server/src/exports.h
new file mode 100644
index 00000000000..bc9af2f0b8b
--- /dev/null
+++ b/xlators/nfs/server/src/exports.h
@@ -0,0 +1,92 @@
+/*
+ Copyright 2014-present Facebook. All Rights Reserved
+
+ This file is part of GlusterFS.
+
+ Author :
+ Shreyas Siravara <shreyas.siravara@gmail.com>
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _EXPORTS_H_
+#define _EXPORTS_H_
+
+#include "nfs-mem-types.h"
+#include "dict.h"
+#include "nfs.h"
+
+#define GF_EXP GF_NFS"-exports"
+
+#define NETGROUP_REGEX_PATTERN "(@([a-zA-Z0-9\\(=, .])+)())"
+#define HOSTNAME_REGEX_PATTERN "[[:space:]]([a-zA-Z0-9.\\(=,*/)-]+)"
+#define OPTIONS_REGEX_PATTERN "([a-zA-Z0-9=\\.]+)"
+
+#define NETGROUP_MAX_LEN 128
+#define FQDN_MAX_LEN 256
+
+#define SEC_OPTION_MAX 10
+#define UID_MAX_LEN 6
+
+#define DIR_MAX_LEN 1024
+
+/* The following 2 definitions are in mount3.h
+ * but we don't want to include it because mount3.h
+ * depends on structs in this file so we get a cross
+ * dependency.
+ */
+struct mount3_state;
+
+extern struct mnt3_export *
+mnt3_mntpath_to_export (struct mount3_state *ms, const char *dirpath,
+ gf_boolean_t export_parsing_match);
+
+struct export_options {
+ gf_boolean_t rw; /* Read-write option */
+ gf_boolean_t nosuid; /* nosuid option */
+ gf_boolean_t root; /* root option */
+ char *anon_uid; /* anonuid option */
+ char *sec_type; /* X, for sec=X */
+};
+
+struct export_item {
+ char *name; /* Name of the export item */
+ struct export_options *opts; /* NFS Options */
+};
+
+struct export_dir {
+ char *dir_name; /* Directory */
+ dict_t *netgroups; /* Dict of netgroups */
+ dict_t *hosts; /* Dict of hosts */
+};
+
+struct exports_file {
+ char *filename; /* Filename */
+ dict_t *exports_dict; /* Dict of export_dir_t */
+ dict_t *exports_map; /* Map of SuperFastHash(<export>) -> expdir */
+};
+
+void
+exp_file_deinit (struct exports_file *expfile);
+
+int
+exp_file_parse (const char *filepath, struct exports_file **expfile,
+ struct mount3_state *ms);
+
+struct export_dir *
+exp_file_get_dir (const struct exports_file *file, const char *dir);
+
+struct export_item *
+exp_dir_get_host (const struct export_dir *expdir, const char *host);
+
+struct export_item *
+exp_dir_get_netgroup (const struct export_dir *expdir, const char *netgroup);
+
+struct export_dir *
+exp_file_dir_from_uuid (const struct exports_file *file,
+ const uuid_t export_uuid);
+
+#endif /* _EXPORTS_H_ */
diff --git a/xlators/nfs/server/src/mount3-auth.c b/xlators/nfs/server/src/mount3-auth.c
new file mode 100644
index 00000000000..97c95cbfd23
--- /dev/null
+++ b/xlators/nfs/server/src/mount3-auth.c
@@ -0,0 +1,644 @@
+/*
+ Copyright 2014-present Facebook. All Rights Reserved
+
+ This file is part of GlusterFS.
+
+ Author :
+ Shreyas Siravara <shreyas.siravara@gmail.com>
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/* This file contains code for handling mount authentication.
+ * The primary structure here is 'mnt3_auth_params' which contains
+ * 3 important fields: 1) Pointer to a netgroups file struct, 2) Pointer to an
+ * exports file struct. 3) Pointer to a mount state struct.
+ *
+ * - The auth parameter struct belongs to a mount state so the mount state
+ * pointer represents the mount state that this auth parameter struct belongs
+ * to.
+ *
+ * - Currently, the only supported mount auth parameters are an exports file
+ * and a netgroups file. The two pointers in the struct represent the files
+ * we are to authenticate against.
+ *
+ * - To initialize a struct, make a call to mnt3_auth_params_init () with a mnt
+ * state as a parameter.
+ *
+ * - To set an exports file authentication parameter, call
+ * mnt3_auth_set_exports_auth () with an exports file as a parameter.
+ *
+ * - Same goes for the netgroups file parameter, except use the netgroups file
+ * as the parameter.
+ */
+
+#include "mount3-auth.h"
+#include "exports.h"
+#include "netgroups.h"
+#include "mem-pool.h"
+#include "nfs-messages.h"
+
+/**
+ * mnt3_auth_params_init -- Initialize the mount3 authorization parameters
+ * and return the allocated struct. The mount3_state
+ * parameter is pointed to by a field in the struct.
+ *
+ * @ms: Mount state that is needed for auth.
+ *
+ * @return: success: Pointer to the allocated struct
+ * failure: NULL
+ *
+ * For external use.
+ */
+struct mnt3_auth_params *
+mnt3_auth_params_init (struct mount3_state *ms)
+{
+ struct mnt3_auth_params *auth_params = NULL;
+
+ auth_params = GF_MALLOC (sizeof (*auth_params),
+ gf_nfs_mt_mnt3_auth_params);
+ GF_VALIDATE_OR_GOTO (GF_MNT_AUTH, auth_params, out);
+
+ auth_params->ngfile = NULL;
+ auth_params->expfile = NULL;
+ auth_params->ms = ms;
+out:
+ return auth_params;
+}
+
+/**
+ * mnt3_auth_params_deinit -- Free the memory used by the struct.
+ *
+ * @auth_params: Pointer to the struct we want to free
+ *
+ * For external use.
+ */
+void
+mnt3_auth_params_deinit (struct mnt3_auth_params *auth_params)
+{
+ if (!auth_params)
+ goto out;
+
+ /* Atomically set the auth params in the mount state to NULL
+ * so subsequent fops will be denied while the auth params
+ * are being cleaned up.
+ */
+ (void)__sync_lock_test_and_set (&auth_params->ms->auth_params, NULL);
+
+ ng_file_deinit (auth_params->ngfile);
+ exp_file_deinit (auth_params->expfile);
+ auth_params->ms = NULL;
+ GF_FREE (auth_params);
+out:
+ return;
+}
+
+/**
+ * mnt3_set_exports_auth -- Set the exports auth file
+ *
+ * @auth_params : Pointer to the auth params struct
+ * @filename : File name to load from disk and parse
+ *
+ * @return : success: 0
+ * failure: -1
+ *
+ * For external use.
+ */
+int
+mnt3_auth_set_exports_auth (struct mnt3_auth_params *auth_params,
+ const char *filename)
+{
+ struct exports_file *expfile = NULL;
+ struct exports_file *oldfile = NULL;
+ int ret = -EINVAL;
+
+ /* Validate args */
+ GF_VALIDATE_OR_GOTO (GF_MNT_AUTH, auth_params, out);
+ GF_VALIDATE_OR_GOTO (GF_MNT_AUTH, filename, out);
+
+ /* Parse the exports file and set the auth parameter */
+ ret = exp_file_parse (filename, &expfile, auth_params->ms);
+ if (ret < 0) {
+ gf_msg (GF_MNT_AUTH, GF_LOG_ERROR, 0, NFS_MSG_LOAD_PARSE_ERROR,
+ "Failed to load & parse file"
+ " %s, see logs for more information", filename);
+ goto out;
+ }
+
+ /* Atomically set the file pointer */
+ oldfile = __sync_lock_test_and_set (&auth_params->expfile, expfile);
+ exp_file_deinit (oldfile);
+ ret = 0;
+out:
+ return ret;
+}
+
+/**
+ * mnt3_set_netgroups_auth -- Set netgroups auth file
+ *
+ * @auth_params : Pointer to the auth params struct.
+ * @filename : File name to load from disk and parse
+ *
+ * @return : success: 0
+ * failure: -1
+ *
+ * For external use.
+ */
+int
+mnt3_auth_set_netgroups_auth (struct mnt3_auth_params *auth_params,
+ const char *filename)
+{
+ struct netgroups_file *ngfile = NULL;
+ struct netgroups_file *oldfile = NULL;
+ int ret = -EINVAL;
+
+ /* Validate args */
+ GF_VALIDATE_OR_GOTO (GF_MNT_AUTH, auth_params, out);
+ GF_VALIDATE_OR_GOTO (GF_MNT_AUTH, filename, out);
+
+ ngfile = ng_file_parse (filename);
+ if (!ngfile) {
+ gf_msg (GF_MNT_AUTH, GF_LOG_ERROR, 0, NFS_MSG_LOAD_PARSE_ERROR,
+ "Failed to load file %s, see logs for more "
+ "information", filename);
+ ret = -1;
+ goto out;
+ }
+
+ /* Atomically set the file pointer */
+ oldfile = __sync_lock_test_and_set (&auth_params->ngfile, ngfile);
+ ng_file_deinit (oldfile);
+ ret = 0;
+out:
+ return ret;
+}
+
+/* Struct used to pass parameters to
+ * _mnt3_auth_subnet_match () which
+ * checks if an IP matches a subnet
+ */
+struct _mnt3_subnet_match_s {
+ char *ip; /* IP address to match */
+ struct export_item **host; /* Host structure to set */
+};
+
+/**
+ * _mnt3_auth_subnet_match -- Check if an ip (specified in the parameter tmp)
+ * is in the subnet specified by key.
+ *
+ * @dict: The dict to walk
+ * @key : The key we are on
+ * @val : The value we are on
+ * @tmp : Parameter that points to the above struct
+ *
+ */
+static int
+_mnt3_auth_subnet_match (dict_t *dict, char *key, data_t *val, void *tmp)
+{
+ struct _mnt3_subnet_match_s *match = NULL;
+
+ match = (struct _mnt3_subnet_match_s *)tmp;
+
+ if (!match)
+ return 0;
+
+ if (!match->host)
+ return 0;
+
+ if (!match->ip)
+ return 0;
+
+ /* Already found the host */
+ if (*(match->host))
+ return 0;
+
+ /* Don't process anything that's not in CIDR */
+ if (!strchr (key, '/'))
+ return 0;
+
+ /* Strip out leading whitespaces */
+ while (*key == ' ')
+ key++;
+
+ /* If we found that the IP was in the network, set the host
+ * to point to the value in the dict.
+ */
+ if (gf_is_ip_in_net (key, match->ip)) {
+ *(match->host) = (struct export_item *)val->data;
+ }
+ return 0;
+}
+
+/**
+ * _find_host_in_export -- Find a host in the exports file.
+ *
+ * Case 1: FH is non-null
+ * -----------------------
+ * The lookup process is two-step: The FH has a mountid which represents the
+ * export that was mounted by the client. The export is defined as an entry in
+ * the exports file. The FH's 'mountid' is hashed in the exports file to lookup
+ * an export directory.
+ *
+ * Case 2: FH is null
+ * -------------------
+ * The lookup process is two-step: You need a directory and a hostname
+ * to do the lookup. We first lookup the export directory in the file
+ * and then do a lookup on the directory to find the host. If the host
+ * is not found, we must finally check for subnets and then do a match.
+ *
+ * @file: Exports file to lookup in
+ * @dir : Directory to do the lookup
+ * @host: Host to lookup in the directory
+ *
+ * Not for external use.
+ */
+static struct export_item *
+_mnt3_auth_check_host_in_export (const struct exports_file *file,
+ const char *dir, const char *host,
+ struct nfs3_fh *fh)
+{
+ struct export_dir *expdir = NULL;
+ struct export_item *host_s = NULL;
+ struct _mnt3_subnet_match_s snet_match_s = {0, };
+
+ /* Validate args */
+ GF_VALIDATE_OR_GOTO (GF_MNT_AUTH, file, out);
+ GF_VALIDATE_OR_GOTO (GF_MNT_AUTH, host, out);
+
+ /* If the filehandle is defined, use that to perform authentication.
+ * All file operations that need authentication must follow this
+ * code path.
+ */
+ if (fh) {
+ expdir = exp_file_dir_from_uuid (file, fh->mountid);
+ if (!expdir)
+ goto out;
+ } else {
+ /* Get the exports directory from the exports file */
+ expdir = exp_file_get_dir (file, dir);
+ if (!expdir)
+ goto out;
+ }
+
+ /* Extract the host from the export directory */
+ host_s = exp_dir_get_host (expdir, host);
+ if (!host_s)
+ goto subnet_match;
+ else
+ goto out;
+
+ /* If the host is not found, we need to walk through the hosts
+ * in the exports directory and see if any of the "hosts" are actually
+ * networks (e.g. 10.5.153.0/24). If they are we should match the
+ * incoming network.
+ */
+subnet_match:
+ if (!expdir->hosts)
+ goto out;
+ snet_match_s.ip = (char *)host;
+ snet_match_s.host = &host_s;
+ dict_foreach (expdir->hosts, _mnt3_auth_subnet_match, &snet_match_s);
+out:
+ return host_s;
+}
+
+/* This struct represents all the parameters necessary to search through a
+ * netgroups file to find a host.
+ */
+struct ng_auth_search {
+ const char *search_for; /* strings to search for */
+ gf_boolean_t found; /* mark true once found */
+ const struct netgroups_file *file; /* netgroups file to search */
+ const char *expdir;
+ struct export_item *expitem; /* pointer to the export */
+ const struct exports_file *expfile;
+ gf_boolean_t _is_host_dict; /* searching a host dict? */
+ struct netgroup_entry *found_entry; /* the entry we found! */
+};
+
+/**
+ * __netgroup_dict_search -- Function to search the netgroups dict.
+ *
+ * @dict: The dict we are walking
+ * @key : The key we are on
+ * @val : The value associated with that key
+ * @data: Additional parameters. We pass a pointer to ng_auth_search_s
+ *
+ * This is passed as a function pointer to dict_foreach ().
+ *
+ * Not for external use.
+ */
+static int
+__netgroup_dict_search (dict_t *dict, char *key, data_t *val, void *data)
+{
+ struct ng_auth_search *ngsa = NULL;
+ struct netgroup_entry *ngentry = NULL;
+ data_t *hdata = NULL;
+
+ /* 'ngsa' is the search params */
+ ngsa = (struct ng_auth_search *)data;
+ ngentry = (struct netgroup_entry *)val->data;
+
+ if (ngsa->_is_host_dict) {
+ /* If are on a host dict, we can simply hash the search key
+ * against the host dict and see if we find anything.
+ */
+ hdata = dict_get (dict, (char *)ngsa->search_for);
+ if (hdata) {
+ /* If it was found, log the message, mark the search
+ * params dict as found and return.
+ */
+ gf_msg_debug (GF_MNT_AUTH, errno, "key %s was hashed "
+ "and found", key);
+ ngsa->found = _gf_true;
+ ngsa->found_entry = (struct netgroup_entry *)hdata->data;
+ goto out;
+ }
+ }
+
+ /* If the key is what we are searching for, mark the item as
+ * found and return.
+ */
+ if (strcmp (key, ngsa->search_for) == 0) {
+ ngsa->found = _gf_true;
+ ngsa->found_entry = ngentry;
+ goto out;
+ }
+
+ /* If we have a netgroup hosts dict, then search the dict using this
+ * same function.
+ */
+ if (ngentry->netgroup_hosts) {
+ ngsa->_is_host_dict = _gf_true;
+ dict_foreach (ngentry->netgroup_hosts, __netgroup_dict_search,
+ ngsa);
+ }
+
+ /* If that search was successful, just return */
+ if (ngsa->found)
+ goto out;
+
+ /* If we have a netgroup dict, then search the dict using this same
+ * function.
+ */
+ if (ngentry->netgroup_ngs) {
+ ngsa->_is_host_dict = _gf_false;
+ dict_foreach (ngentry->netgroup_ngs, __netgroup_dict_search,
+ ngsa);
+ }
+out:
+ return 0;
+}
+
+/**
+ * __export_dir_lookup_netgroup -- Function to search an exports directory
+ * for a host name.
+ *
+ * This function walks all the netgroups & hosts in an export directory
+ * and tries to match it with the search key. This function calls the above
+ * netgroup search function to search through the netgroups.
+ *
+ * This function is very similar to the above function, but both are necessary
+ * since we are walking two different dicts. For each netgroup in _this_ dict
+ * (the exports dict) we are going to find the corresponding netgroups dict
+ * and walk that (nested) structure until we find the host we are looking for.
+ *
+ * @dict: The dict we are walking
+ * @key : The key we are on
+ * @val : The value associated with that key
+ * @data: Additional parameters. We pass a pointer to ng_auth_search_s
+ *
+ * This is passed as a function pointer to dict_foreach ().
+ *
+ * Not for external use.
+ */
+static int
+__export_dir_lookup_netgroup (dict_t *dict, char *key, data_t *val,
+ void *data)
+{
+ struct ng_auth_search *ngsa = NULL; /* Search params */
+ struct netgroups_file *nfile = NULL; /* Netgroups file to search */
+ struct netgroup_entry *ngentry = NULL; /* Entry in the netgroups file */
+ struct export_dir *tmpdir = NULL;
+
+ ngsa = (struct ng_auth_search *)data;
+ nfile = (struct netgroups_file *)ngsa->file;
+
+ GF_ASSERT ((*key == '@'));
+
+ /* We use ++key here because keys start with '@' for ngs */
+ ngentry = ng_file_get_netgroup (nfile, (key + 1));
+ if (!ngentry) {
+ gf_msg_debug (GF_MNT_AUTH, 0, "%s not found in %s",
+ key, nfile->filename);
+ goto out;
+ }
+
+ tmpdir = exp_file_get_dir (ngsa->expfile, ngsa->expdir);
+ if (!tmpdir)
+ goto out;
+
+ ngsa->expitem = exp_dir_get_netgroup (tmpdir, key);
+ if (!ngsa->expitem)
+ goto out;
+
+ /* Run through the host dict */
+ if (ngentry->netgroup_hosts) {
+ ngsa->_is_host_dict = _gf_true;
+ dict_foreach (ngentry->netgroup_hosts, __netgroup_dict_search,
+ ngsa);
+ }
+
+ /* If the above search was successful, just return */
+ if (ngsa->found)
+ goto out;
+
+ /* Run through the netgroups dict */
+ if (ngentry->netgroup_ngs) {
+ ngsa->_is_host_dict = _gf_false;
+ dict_foreach (ngentry->netgroup_ngs, __netgroup_dict_search,
+ ngsa);
+ }
+out:
+ return 0;
+}
+
+/**
+ * _mnt3_auth_setup_search_param -- This function sets up an ng_auth_search
+ * struct with host and file as the parameters.
+ * Host is what we are searching for and file
+ * is what we are searching in.
+ * @params: Search params to setup
+ * @host : The host to set
+ * @nfile : The netgroups file to set
+ *
+ */
+void _mnt3_auth_setup_search_params (struct ng_auth_search *params,
+ const char *host, const char *dir,
+ const struct netgroups_file *nfile,
+ const struct exports_file *expfile)
+{
+ GF_VALIDATE_OR_GOTO (GF_MNT_AUTH, params, out);
+ GF_VALIDATE_OR_GOTO (GF_MNT_AUTH, host, out);
+ GF_VALIDATE_OR_GOTO (GF_MNT_AUTH, nfile, out);
+
+ params->search_for = host;
+ params->found = _gf_false;
+ params->file = nfile;
+ params->_is_host_dict = _gf_false;
+ params->found_entry = NULL;
+ params->expitem = NULL;
+ params->expfile = expfile;
+ params->expdir = dir;
+out:
+ return;
+}
+
+/**
+ * _mnt3_auth_find_host_in_netgroup -- Given a host name for an directory
+ * find if that hostname is in the
+ * directory's dict of netgroups.
+ * @nfile: Netgroups file to search
+ * @efile: Exports file to search
+ * @dir : The exports directory name (used to lookup in exports file)
+ * @host : The host we are searching for
+ *
+ * Search procedure:
+ *
+ * - Lookup directory string against exports file structure,
+ * get an exports directory structure.
+ * - Walk the export file structure's netgroup dict. This dict
+ * holds each netgroup that is authorized to mount that directory.
+ * - We then have to walk the netgroup structure, which is a set of
+ * nested dicts until we find the host we are looking for.
+ *
+ * @return: success: Pointer to the netgroup entry found
+ * failure: NULL
+ *
+ * Not for external use.
+ */
+static struct netgroup_entry *
+_mnt3_auth_check_host_in_netgroup (const struct mnt3_auth_params *auth_params,
+ struct nfs3_fh *fh, const char *host,
+ const char *dir, struct export_item **item)
+{
+ struct export_dir *expdir = NULL;
+ struct ng_auth_search ngsa = {0, };
+ struct netgroup_entry *found_entry = NULL;
+ struct exports_file *efile = auth_params->expfile;
+ struct netgroups_file *nfile = auth_params->ngfile;
+
+ /* Validate args */
+ GF_VALIDATE_OR_GOTO (GF_MNT_AUTH, nfile, out);
+ GF_VALIDATE_OR_GOTO (GF_MNT_AUTH, efile, out);
+ GF_VALIDATE_OR_GOTO (GF_MNT_AUTH, host, out);
+ GF_VALIDATE_OR_GOTO (GF_MNT_AUTH, item, out);
+
+ if (fh) {
+ expdir = exp_file_dir_from_uuid (efile, fh->mountid);
+ if (!expdir)
+ goto out;
+ } else {
+ /* Get the exports directory */
+ expdir = exp_file_get_dir (efile, dir);
+ if (!expdir)
+ goto out;
+ }
+
+ /* Setup search struct */
+ _mnt3_auth_setup_search_params (&ngsa, host, expdir->dir_name, nfile,
+ efile);
+
+ /* Do the search */
+ dict_foreach (expdir->netgroups, __export_dir_lookup_netgroup, &ngsa);
+ found_entry = ngsa.found_entry;
+ *item = ngsa.expitem;
+out:
+ return found_entry;
+}
+
+/**
+ * check_rw_access -- Checks if the export item
+ * has read-write access.
+ *
+ * @host_item : The export item to check
+ *
+ * @return -EROFS if it does not have rw access, 0 otherwise
+ *
+ */
+int
+check_rw_access (struct export_item *item)
+{
+ struct export_options *opts = NULL;
+ int ret = -EROFS;
+
+ if (!item)
+ goto out;
+
+ opts = item->opts;
+ if (!opts)
+ goto out;
+
+ if (opts->rw)
+ ret = 0;
+out:
+ return ret;
+}
+
+/**
+ * mnt3_auth_host -- Check if a host is authorized for a directory
+ *
+ * @auth_params : Auth parameters to authenticate against
+ * @host: Host requesting the directory
+ * @dir : Directory that the host requests
+ * @fh : The filehandle passed from an fop to authenticate
+ *
+ * 'fh' is null on mount requests and 'dir' is null on fops
+ *
+ * Procedure:
+ *
+ * - Check if the host is in the exports directory.
+ * - If not, check if the host is in the netgroups file for the
+ * netgroups authorized for the exports.
+ *
+ * @return: 0 if authorized
+ * -EACCES for completely unauthorized fop
+ * -EROFS for unauthorized write operations (rm, mkdir, write) *
+ */
+int
+mnt3_auth_host (const struct mnt3_auth_params *auth_params, const char *host,
+ struct nfs3_fh *fh, const char *dir, gf_boolean_t is_write_op,
+ struct export_item **save_item)
+{
+ int auth_status_code = -EACCES;
+ struct export_item *item = NULL;
+
+ GF_VALIDATE_OR_GOTO (GF_MNT_AUTH, auth_params, out);
+ GF_VALIDATE_OR_GOTO (GF_MNT_AUTH, host, out);
+
+ /* Find the host in the exports file */
+ item = _mnt3_auth_check_host_in_export (auth_params->expfile, dir,
+ host, fh);
+ if (item) {
+ auth_status_code = (is_write_op) ?
+ check_rw_access (item) : 0;
+ goto out;
+ }
+
+ /* Find the host in the netgroups file for the exports directory */
+ if (_mnt3_auth_check_host_in_netgroup (auth_params, fh, host, dir,
+ &item)) {
+ auth_status_code = (is_write_op) ?
+ check_rw_access (item) : 0;
+ goto out;
+ }
+
+out:
+ if (save_item)
+ *save_item = item;
+
+ return auth_status_code;
+}
diff --git a/xlators/nfs/server/src/mount3-auth.h b/xlators/nfs/server/src/mount3-auth.h
new file mode 100644
index 00000000000..b25d4724fac
--- /dev/null
+++ b/xlators/nfs/server/src/mount3-auth.h
@@ -0,0 +1,59 @@
+/*
+ Copyright 2014-present Facebook. All Rights Reserved
+
+ This file is part of GlusterFS.
+
+ Author :
+ Shreyas Siravara <shreyas.siravara@gmail.com>
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _MOUNT3_AUTH
+#define _MOUNT3_AUTH
+
+#include "nfs-mem-types.h"
+#include "netgroups.h"
+#include "exports.h"
+#include "mount3.h"
+#include "nfs.h"
+
+#define GF_MNT_AUTH GF_NFS"-mount3-auth"
+
+struct mnt3_auth_params {
+ struct netgroups_file *ngfile; /* The netgroup file to auth against */
+ struct exports_file *expfile; /* The export file to auth against */
+ struct mount3_state *ms; /* The mount state that owns this */
+};
+
+/* Initialize auth params struct */
+struct mnt3_auth_params *
+mnt3_auth_params_init (struct mount3_state *ms);
+
+/* Set the netgroups file to use in the auth */
+int
+mnt3_auth_set_netgroups_auth (struct mnt3_auth_params *aps,
+ const char *filename);
+
+/* Set the exports file to use in the auth */
+int
+mnt3_auth_set_exports_auth (struct mnt3_auth_params *aps, const char *filename);
+
+/* Check if a host is authorized to perform a mount / nfs-fop */
+int
+mnt3_auth_host (const struct mnt3_auth_params *aps, const char *host,
+ struct nfs3_fh *fh, const char *dir, gf_boolean_t is_write_op,
+ struct export_item **save_item);
+
+/* Free resources used by the auth params struct */
+void
+mnt3_auth_params_deinit (struct mnt3_auth_params *aps);
+
+int
+mnt3_auth_fop_options_verify (const struct mnt3_auth_params *auth_params,
+ const char *host, const char *dir);
+
+#endif /* _MOUNT3_AUTH */
diff --git a/xlators/nfs/server/src/mount3.c b/xlators/nfs/server/src/mount3.c
index 9b287cf907e..2647e384a94 100644
--- a/xlators/nfs/server/src/mount3.c
+++ b/xlators/nfs/server/src/mount3.c
@@ -1,27 +1,13 @@
/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "rpcsvc.h"
#include "dict.h"
#include "xlator.h"
@@ -37,39 +23,87 @@
#include "locking.h"
#include "iatt.h"
#include "nfs-mem-types.h"
-
+#include "nfs.h"
+#include "common-utils.h"
+#include "store.h"
+#include "glfs-internal.h"
+#include "glfs.h"
+#include "mount3-auth.h"
+#include "hashfn.h"
+#include "nfs-messages.h"
#include <errno.h>
#include <sys/socket.h>
#include <sys/uio.h>
+
+/* This macro will assist in freeing up entire link list
+ * of host_auth_spec structure.
+ */
+#define FREE_HOSTSPEC(exp) do { \
+ struct host_auth_spec *host= exp->hostspec; \
+ while (NULL != host){ \
+ struct host_auth_spec* temp = host; \
+ host = host->next; \
+ if (NULL != temp->host_addr) { \
+ GF_FREE (temp->host_addr); \
+ } \
+ GF_FREE (temp); \
+ } \
+ exp->hostspec = NULL; \
+ } while (0)
+
+/* Paths for export and netgroup files */
+const char *exports_file_path = GLUSTERD_DEFAULT_WORKDIR "/nfs/exports";
+const char *netgroups_file_path = GLUSTERD_DEFAULT_WORKDIR "/nfs/netgroups";
+
typedef ssize_t (*mnt3_serializer) (struct iovec outmsg, void *args);
+extern void *
+mount3udp_thread (void *argv);
+
+static void
+mnt3_export_free (struct mnt3_export *exp)
+{
+ if (!exp)
+ return;
+
+ if (exp->exptype == MNT3_EXPTYPE_DIR)
+ FREE_HOSTSPEC (exp);
+ GF_FREE (exp->expname);
+ GF_FREE (exp->fullpath);
+ GF_FREE (exp);
+}
/* Generic reply function for MOUNTv3 specific replies. */
int
mnt3svc_submit_reply (rpcsvc_request_t *req, void *arg, mnt3_serializer sfunc)
{
- struct iovec outmsg = {0, };
- struct iobuf *iob = NULL;
- struct mount3_state *ms = NULL;
- int ret = -1;
+ struct iovec outmsg = {0, };
+ struct iobuf *iob = NULL;
+ struct mount3_state *ms = NULL;
+ int ret = -1;
+ ssize_t msglen = 0;
+ struct iobref *iobref = NULL;
if (!req)
return -1;
ms = (struct mount3_state *)rpcsvc_request_program_private (req);
if (!ms) {
- gf_log (GF_MNT, GF_LOG_ERROR, "mount state not found");
+ gf_msg (GF_MNT, GF_LOG_ERROR, EINVAL,
+ NFS_MSG_MNT_STATE_NOT_FOUND, "mount state not found");
goto ret;
}
/* First, get the io buffer into which the reply in arg will
* be serialized.
*/
+ /* TODO: use 'xdrproc_t' instead of 'sfunc' to get the xdr-size */
iob = iobuf_get (ms->iobpool);
if (!iob) {
- gf_log (GF_MNT, GF_LOG_ERROR, "Failed to get iobuf");
+ gf_msg (GF_MNT, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Failed to get iobuf");
goto ret;
}
@@ -77,21 +111,102 @@ mnt3svc_submit_reply (rpcsvc_request_t *req, void *arg, mnt3_serializer sfunc)
/* Use the given serializer to translate the give C structure in arg
* to XDR format which will be written into the buffer in outmsg.
*/
- outmsg.iov_len = sfunc (outmsg, arg);
+ msglen = sfunc (outmsg, arg);
+ if (msglen < 0) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, 0, NFS_MSG_ENCODE_MSG_FAIL,
+ "Failed to encode message");
+ goto ret;
+ }
+ outmsg.iov_len = msglen;
+
+ iobref = iobref_new ();
+ if (iobref == NULL) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Failed to get iobref");
+ goto ret;
+ }
+
+ ret = iobref_add (iobref, iob);
+ if (ret) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Failed to add iob to iobref");
+ goto ret;
+ }
/* Then, submit the message for transmission. */
- ret = rpcsvc_submit_message (req, outmsg, iob);
- iobuf_unref (iob);
+ ret = rpcsvc_submit_message (req, &outmsg, 1, NULL, 0, iobref);
if (ret == -1) {
- gf_log (GF_MNT, GF_LOG_ERROR, "Reply submission failed");
+ gf_msg (GF_MNT, GF_LOG_ERROR, errno, NFS_MSG_REP_SUBMIT_FAIL,
+ "Reply submission failed");
goto ret;
}
ret = 0;
ret:
+ if (NULL != iob)
+ iobuf_unref (iob);
+ if (NULL != iobref)
+ iobref_unref (iobref);
+
return ret;
}
+/**
+ * __mountdict_insert -- Insert a mount entry into the mount state
+ *
+ * @ms: The mount state holding the entries
+ * @me: The mount entry to insert
+ *
+ * Not for external use.
+ */
+void
+__mountdict_insert (struct mount3_state *ms, struct mountentry *me)
+{
+ char *exname = NULL;
+ char *fpath = NULL;
+ data_t *medata = NULL;
+
+ GF_VALIDATE_OR_GOTO (GF_MNT, ms, out);
+ GF_VALIDATE_OR_GOTO (GF_MNT, me, out);
+
+ /* We don't want export names with leading slashes */
+ exname = me->exname;
+ while (exname[0] == '/')
+ exname++;
+
+ /* Get the fullpath for the export */
+ fpath = me->fullpath;
+ if (me->has_full_path) {
+ while (fpath[0] == '/')
+ fpath++;
+
+ /* Export names can either be just volumes or paths inside that
+ * volume. */
+ exname = fpath;
+ }
+ snprintf (me->hashkey, sizeof (me->hashkey), "%s:%s", exname,
+ me->hostname);
+
+ medata = bin_to_data (me, sizeof (*me));
+ dict_set (ms->mountdict, me->hashkey, medata);
+ gf_msg_trace (GF_MNT, 0, "Inserted into mountdict: %s", me->hashkey);
+out:
+ return;
+}
+
+/**
+ * __mountdict_remove -- Remove a mount entry from the mountstate.
+ *
+ * @ms: The mount state holding the entries
+ * @me: The mount entry to remove
+ *
+ * Not for external use.
+ */
+void
+__mountdict_remove (struct mount3_state *ms, struct mountentry *me)
+{
+ dict_del (ms->mountdict, me->hashkey);
+}
/* Generic error reply function, just pass the err status
* and it will do the rest, including transmission.
@@ -163,7 +278,10 @@ mnt3svc_set_mountres3 (mountstat3 stat, struct nfs3_fh *fh, int *authflavor,
uint32_t fhlen = 0;
res.fhs_status = stat;
- fhlen = nfs3_fh_compute_size (fh);
+
+ if (fh)
+ fhlen = nfs3_fh_compute_size ();
+
res.mountres3_u.mountinfo.fhandle.fhandle3_len = fhlen;
res.mountres3_u.mountinfo.fhandle.fhandle3_val = (char *)fh;
res.mountres3_u.mountinfo.auth_flavors.auth_flavors_val = authflavor;
@@ -172,37 +290,375 @@ mnt3svc_set_mountres3 (mountstat3 stat, struct nfs3_fh *fh, int *authflavor,
return res;
}
+/* Read the rmtab from the store_handle and append (or not) the entries to the
+ * mountlist.
+ *
+ * Requires the store_handle to be locked.
+ */
+static int
+__mount_read_rmtab (gf_store_handle_t *sh, struct list_head *mountlist,
+ gf_boolean_t append)
+{
+ int ret = 0;
+ unsigned int idx = 0;
+ struct mountentry *me = NULL, *tmp = NULL;
+ /* me->hostname is a char[MNTPATHLEN] */
+ char key[MNTPATHLEN + 11];
+
+ GF_ASSERT (sh && mountlist);
+
+ if (!gf_store_locked_local (sh)) {
+ gf_msg (GF_MNT, GF_LOG_WARNING, 0, NFS_MSG_READ_LOCKED,
+ "Not reading unlocked %s", sh->path);
+ return -1;
+ }
+
+ if (!append) {
+ list_for_each_entry_safe (me, tmp, mountlist, mlist) {
+ list_del (&me->mlist);
+ GF_FREE (me);
+ }
+ me = NULL;
+ }
+
+ for (;;) {
+ char *value = NULL;
+
+ if (me && append) {
+ /* do not add duplicates */
+ list_for_each_entry (tmp, mountlist, mlist) {
+ if (!strcmp(tmp->hostname, me->hostname) &&
+ !strcmp(tmp->exname, me->exname)) {
+ GF_FREE (me);
+ goto dont_add;
+ }
+ }
+ list_add_tail (&me->mlist, mountlist);
+ } else if (me) {
+ list_add_tail (&me->mlist, mountlist);
+ }
+
+dont_add:
+ me = GF_CALLOC (1, sizeof (*me), gf_nfs_mt_mountentry);
+ if (!me) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, ENOMEM,
+ NFS_MSG_NO_MEMORY, "Out of memory");
+ ret = -1;
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&me->mlist);
+
+ snprintf (key, 9 + MNTPATHLEN, "hostname-%d", idx);
+ ret = gf_store_retrieve_value (sh, key, &value);
+ if (ret)
+ break;
+ strncpy (me->hostname, value, MNTPATHLEN);
+ GF_FREE (value);
+
+ snprintf (key, 11 + MNTPATHLEN, "mountpoint-%d", idx);
+ ret = gf_store_retrieve_value (sh, key, &value);
+ if (ret)
+ break;
+ strncpy (me->exname, value, MNTPATHLEN);
+ GF_FREE (value);
+
+ idx++;
+ gf_msg_trace (GF_MNT, 0, "Read entries %s:%s",
+ me->hostname, me->exname);
+ }
+ gf_msg_debug (GF_MNT, 0, "Read %d entries from '%s'", idx, sh->path);
+ GF_FREE (me);
+out:
+ return ret;
+}
+
+/* Overwrite the contents of the rwtab with te in-memory client list.
+ * Fail gracefully if the stora_handle is not locked.
+ */
+static void
+__mount_rewrite_rmtab(struct mount3_state *ms, gf_store_handle_t *sh)
+{
+ struct mountentry *me = NULL;
+ char key[16];
+ int fd, ret;
+ unsigned int idx = 0;
+
+ if (!gf_store_locked_local (sh)) {
+ gf_msg (GF_MNT, GF_LOG_WARNING, 0, NFS_MSG_MODIFY_LOCKED,
+ "Not modifying unlocked %s", sh->path);
+ return;
+ }
+
+ fd = gf_store_mkstemp (sh);
+ if (fd == -1) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, EINVAL, NFS_MSG_INVALID_ENTRY,
+ "Failed to open %s", sh->path);
+ return;
+ }
+
+ list_for_each_entry (me, &ms->mountlist, mlist) {
+ snprintf (key, 16, "hostname-%d", idx);
+ ret = gf_store_save_value (fd, key, me->hostname);
+ if (ret)
+ goto fail;
+
+ snprintf (key, 16, "mountpoint-%d", idx);
+ ret = gf_store_save_value (fd, key, me->exname);
+ if (ret)
+ goto fail;
+
+ idx++;
+ }
+
+ gf_msg_debug (GF_MNT, 0, "Updated rmtab with %d entries", idx);
+
+ if (gf_store_rename_tmppath (sh))
+ gf_msg (GF_MNT, GF_LOG_ERROR, errno,
+ NFS_MSG_RWTAB_OVERWRITE_FAIL,
+ "Failed to overwrite rwtab %s", sh->path);
+
+ return;
+
+fail:
+ gf_msg (GF_MNT, GF_LOG_ERROR, errno, NFS_MSG_UPDATE_FAIL,
+ "Failed to update %s", sh->path);
+ gf_store_unlink_tmppath (sh);
+}
+static gf_boolean_t
+mount_open_rmtab (const char *rmtab, gf_store_handle_t **sh)
+{
+ int ret = -1;
+
+ /* updating the rmtab is disabled, use in-memory only */
+ if (!rmtab || rmtab[0] == '\0')
+ return _gf_false;
+
+ ret = gf_store_handle_new (rmtab, sh);
+ if (ret) {
+ gf_log (GF_MNT, GF_LOG_WARNING, "Failed to open '%s'", rmtab);
+ return _gf_false;
+ }
+
+ return _gf_true;
+}
+
+
+/* Read the rmtab into a clean ms->mountlist.
+ */
+static void
+mount_read_rmtab (struct mount3_state *ms)
+{
+ gf_store_handle_t *sh = NULL;
+ struct nfs_state *nfs = NULL;
+ gf_boolean_t read_rmtab = _gf_false;
+
+ nfs = (struct nfs_state *)ms->nfsx->private;
+
+ read_rmtab = mount_open_rmtab (nfs->rmtab, &sh);
+ if (!read_rmtab)
+ return;
+
+ if (gf_store_lock (sh)) {
+ gf_msg (GF_MNT, GF_LOG_WARNING, 0, NFS_MSG_LOCK_FAIL,
+ "Failed to lock '%s'", nfs->rmtab);
+ goto out;
+ }
+
+ __mount_read_rmtab (sh, &ms->mountlist, _gf_false);
+ gf_store_unlock (sh);
+
+out:
+ gf_store_handle_destroy (sh);
+}
+
+/* Write the ms->mountlist to the rmtab.
+ *
+ * The rmtab could be empty, or it can exists and have been updated by a
+ * different storage server without our knowing.
+ *
+ * 0. if opening the nfs->rmtab fails, return gracefully
+ * 1. takes the store_handle lock on the current rmtab
+ * - blocks if an other storage server rewrites the rmtab at the same time
+ * 2. [if new_rmtab] takes the store_handle lock on the new rmtab
+ * 3. reads/merges the entries from the current rmtab
+ * 4. [if new_rmtab] reads/merges the entries from the new rmtab
+ * 5. [if new_rmtab] writes the new rmtab
+ * 6. [if not new_rmtab] writes the current rmtab
+ * 7 [if new_rmtab] replaces nfs->rmtab to point to the new location
+ * 8. [if new_rmtab] releases the store_handle lock of the new rmtab
+ * 9. releases the store_handle lock of the old rmtab
+ */
+void
+mount_rewrite_rmtab (struct mount3_state *ms, char *new_rmtab)
+{
+ gf_store_handle_t *sh = NULL, *nsh = NULL;
+ struct nfs_state *nfs = NULL;
+ int ret;
+ char *rmtab = NULL;
+ gf_boolean_t got_old_rmtab = _gf_false;
+
+ nfs = (struct nfs_state *)ms->nfsx->private;
+
+ got_old_rmtab = mount_open_rmtab (nfs->rmtab, &sh);
+ if (!got_old_rmtab && !new_rmtab)
+ return;
+
+ if (got_old_rmtab && gf_store_lock (sh)) {
+ gf_msg (GF_MNT, GF_LOG_WARNING, 0, NFS_MSG_REWRITE_ERROR,
+ "Not rewriting '%s'", nfs->rmtab);
+ goto free_sh;
+ }
+
+ if (new_rmtab) {
+ ret = gf_store_handle_new (new_rmtab, &nsh);
+ if (ret) {
+ gf_msg (GF_MNT, GF_LOG_WARNING, 0, NFS_MSG_OPEN_FAIL,
+ "Failed to open '%s'", new_rmtab);
+ goto unlock_sh;
+ }
+
+ if (gf_store_lock (nsh)) {
+ gf_msg (GF_MNT, GF_LOG_WARNING, 0,
+ NFS_MSG_REWRITE_ERROR,
+ "Not rewriting '%s'", new_rmtab);
+ goto free_nsh;
+ }
+ }
+
+ /* always read the currently used rmtab */
+ if (got_old_rmtab)
+ __mount_read_rmtab (sh, &ms->mountlist, _gf_true);
+
+ if (new_rmtab) {
+ /* read the new rmtab and write changes to the new location */
+ __mount_read_rmtab (nsh, &ms->mountlist, _gf_true);
+ __mount_rewrite_rmtab (ms, nsh);
+
+ /* replace the nfs->rmtab reference to the new rmtab */
+ rmtab = gf_strdup(new_rmtab);
+ if (rmtab == NULL) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, errno, NFS_MSG_NO_MEMORY,
+ "Out of memory, keeping %s as rmtab",
+ nfs->rmtab);
+ } else {
+ GF_FREE (nfs->rmtab);
+ nfs->rmtab = rmtab;
+ }
+
+ gf_store_unlock (nsh);
+ } else {
+ /* rewrite the current (unchanged location) rmtab */
+ __mount_rewrite_rmtab (ms, sh);
+ }
+
+free_nsh:
+ if (new_rmtab)
+ gf_store_handle_destroy (nsh);
+unlock_sh:
+ if (got_old_rmtab)
+ gf_store_unlock (sh);
+free_sh:
+ if (got_old_rmtab)
+ gf_store_handle_destroy (sh);
+}
+
+/* Add a new NFS-client to the ms->mountlist and update the rmtab if we can.
+ *
+ * A NFS-client will only be removed from the ms->mountlist in case the
+ * NFS-client sends a unmount request. It is possible that a NFS-client
+ * crashed/rebooted had network loss or something else prevented the NFS-client
+ * to unmount cleanly. In this case, a duplicate entry would be added to the
+ * ms->mountlist, which is wrong and we should prevent.
+ *
+ * It is fully acceptable that the ms->mountlist is not 100% correct, this is a
+ * common issue for all(?) NFS-servers.
+ */
int
mnt3svc_update_mountlist (struct mount3_state *ms, rpcsvc_request_t *req,
- xlator_t *exportxl)
+ const char *expname, const char *fullpath)
{
struct mountentry *me = NULL;
+ struct mountentry *cur = NULL;
int ret = -1;
+ char *colon = NULL;
+ struct nfs_state *nfs = NULL;
+ gf_store_handle_t *sh = NULL;
+ gf_boolean_t update_rmtab = _gf_false;
- if ((!ms) || (!req) || (!exportxl))
+ if ((!ms) || (!req) || (!expname))
return -1;
- me = (struct mountentry *)GF_CALLOC (1, sizeof (*me), gf_nfs_mt_mountentry);
+ me = (struct mountentry *)GF_CALLOC (1, sizeof (*me),
+ gf_nfs_mt_mountentry);
if (!me)
return -1;
- strcpy (me->exname, exportxl->name);
+ nfs = (struct nfs_state *)ms->nfsx->private;
+
+ update_rmtab = mount_open_rmtab (nfs->rmtab, &sh);
+
+ strncpy (me->exname, expname, MNTPATHLEN);
+ /* Sometimes we don't care about the full path
+ * so a NULL value for fullpath is valid.
+ */
+ if (fullpath) {
+ if (strlen (fullpath) < MNTPATHLEN) {
+ strcpy (me->fullpath, fullpath);
+ me->has_full_path = _gf_true;
+ }
+ }
+
+
INIT_LIST_HEAD (&me->mlist);
/* Must get the IP or hostname of the client so we
* can map it into the mount entry.
*/
- ret = rpcsvc_conn_peername (req->conn, me->hostname, MNTPATHLEN);
+ ret = rpcsvc_transport_peername (req->trans, me->hostname, MNTPATHLEN);
if (ret == -1)
goto free_err;
+ colon = strrchr (me->hostname, ':');
+ if (colon) {
+ *colon = '\0';
+ }
LOCK (&ms->mountlock);
{
+ /* in case locking fails, we just don't write the rmtab */
+ if (update_rmtab && gf_store_lock (sh)) {
+ gf_msg (GF_MNT, GF_LOG_WARNING, 0, NFS_MSG_LOCK_FAIL,
+ "Failed to lock '%s', changes will not be "
+ "written", nfs->rmtab);
+ } else if (update_rmtab) {
+ __mount_read_rmtab (sh, &ms->mountlist, _gf_false);
+ }
+
+ /* do not add duplicates */
+ list_for_each_entry (cur, &ms->mountlist, mlist) {
+ if (!strcmp(cur->hostname, me->hostname) &&
+ !strcmp(cur->exname, me->exname)) {
+ GF_FREE (me);
+ goto dont_add;
+ }
+ }
list_add_tail (&me->mlist, &ms->mountlist);
+ __mountdict_insert (ms, me);
+
+ /* only write the rmtab in case it was locked */
+ if (update_rmtab && gf_store_locked_local (sh))
+ __mount_rewrite_rmtab (ms, sh);
}
+dont_add:
+ if (update_rmtab && gf_store_locked_local (sh))
+ gf_store_unlock (sh);
+
UNLOCK (&ms->mountlock);
free_err:
+ if (update_rmtab)
+ gf_store_handle_destroy (sh);
+
if (ret == -1)
GF_FREE (me);
@@ -210,6 +666,79 @@ free_err:
}
+int
+__mnt3_get_volume_id (struct mount3_state *ms, xlator_t *mntxl,
+ uuid_t volumeid)
+{
+ int ret = -1;
+ struct mnt3_export *exp = NULL;
+
+ if ((!ms) || (!mntxl))
+ return ret;
+
+ LOCK (&ms->mountlock);
+ list_for_each_entry (exp, &ms->exportlist, explist) {
+ if (exp->vol == mntxl) {
+ gf_uuid_copy (volumeid, exp->volumeid);
+ ret = 0;
+ goto out;
+ }
+ }
+
+out:
+ UNLOCK (&ms->mountlock);
+ return ret;
+}
+
+int
+__mnt3_build_mountid_from_path (const char *path, uuid_t mountid)
+{
+ uint32_t hashed_path = 0;
+ int ret = -1;
+
+ while (strlen (path) > 0 && path[0] == '/')
+ path++;
+
+ /* Clear the mountid */
+ gf_uuid_clear (mountid);
+
+ hashed_path = SuperFastHash (path, strlen (path));
+ if (hashed_path == 1) {
+ gf_msg (GF_MNT, GF_LOG_WARNING, 0, NFS_MSG_HASH_PATH_FAIL,
+ "failed to hash path: %s", path);
+ goto out;
+ }
+
+ memcpy (mountid, &hashed_path, sizeof (hashed_path));
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+__mnt3_get_mount_id (xlator_t *mntxl, uuid_t mountid)
+{
+ int ret = -1;
+ uint32_t hashed_path = 0;
+
+
+ /* first clear the mountid */
+ gf_uuid_clear (mountid);
+
+ hashed_path = SuperFastHash (mntxl->name, strlen (mntxl->name));
+ if (hashed_path == 1) {
+ gf_msg (GF_MNT, GF_LOG_WARNING, 0, NFS_MSG_HASH_XLATOR_FAIL,
+ "failed to hash xlator name: %s", mntxl->name);
+ goto out;
+ }
+
+ memcpy (mountid, &hashed_path, sizeof (hashed_path));
+ ret = 0;
+out:
+ return ret;
+}
+
+
int32_t
mnt3svc_lookup_mount_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
@@ -224,32 +753,63 @@ mnt3svc_lookup_mount_cbk (call_frame_t *frame, void *cookie,
int autharr[10];
int autharrlen = 0;
rpcsvc_t *svc = NULL;
+ xlator_t *mntxl = NULL;
+ uuid_t volumeid = {0, };
+ char *path = NULL;
+ uuid_t mountid = {1, };
+ char fhstr[1536];
req = (rpcsvc_request_t *)frame->local;
if (!req)
return -1;
+ mntxl = (xlator_t *)cookie;
ms = (struct mount3_state *)rpcsvc_request_program_private (req);
if (!ms) {
- gf_log (GF_MNT, GF_LOG_ERROR, "mount state not found");
+ gf_msg (GF_MNT, GF_LOG_ERROR, EINVAL,
+ NFS_MSG_MNT_STATE_NOT_FOUND,
+ "mount state not found");
op_ret = -1;
op_errno = EINVAL;
}
- if (op_ret == -1)
+ if (op_ret == -1) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, op_errno,
+ NFS_MSG_LOOKUP_MNT_ERROR, "error=%s",
+ strerror (op_errno));
status = mnt3svc_errno_to_mnterr (op_errno);
-
+ }
if (status != MNT3_OK)
goto xmit_res;
- fh = nfs3_fh_build_root_fh (ms->nfsx->children, this, *buf);
- mnt3svc_update_mountlist (ms, req, this);
+ path = GF_CALLOC (PATH_MAX, sizeof (char), gf_nfs_mt_char);
+ if (!path) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Out of memory");
+ goto xmit_res;
+ }
+
+ snprintf (path, PATH_MAX, "/%s", mntxl->name);
+ mnt3svc_update_mountlist (ms, req, path, NULL);
+ GF_FREE (path);
+ if (gf_nfs_dvm_off (nfs_state (ms->nfsx))) {
+ fh = nfs3_fh_build_indexed_root_fh (ms->nfsx->children, mntxl);
+ goto xmit_res;
+ }
+
+ __mnt3_get_mount_id (mntxl, mountid);
+ __mnt3_get_volume_id (ms, mntxl, volumeid);
+ fh = nfs3_fh_build_uuid_root_fh (volumeid, mountid);
+
xmit_res:
- gf_log (GF_MNT, GF_LOG_DEBUG, "Mount reply status: %d", status);
+ nfs3_fh_to_str (&fh, fhstr, sizeof (fhstr));
+ gf_msg_debug (GF_MNT, 0, "MNT reply: fh %s, status: %d", fhstr,
+ status);
if (op_ret == 0) {
svc = rpcsvc_request_service (req);
- autharrlen = rpcsvc_auth_array (svc, this->name, autharr, 10);
+ autharrlen = rpcsvc_auth_array (svc, mntxl->name, autharr,
+ 10);
}
res = mnt3svc_set_mountres3 (status, &fh, autharr, autharrlen);
@@ -261,29 +821,1321 @@ xmit_res:
int
-mnt3svc_mount (rpcsvc_request_t *req, xlator_t *nfsx, xlator_t * xl)
+mnt3_match_dirpath_export (const char *expname, const char *dirpath,
+ gf_boolean_t export_parsing_match)
{
- loc_t oploc = {0, };
- int ret = -1;
+ int ret = 0;
+ size_t dlen;
+ char *fullpath = NULL;
+ char *second_slash = NULL;
+ char *dirdup = NULL;
+
+ if ((!expname) || (!dirpath))
+ return 0;
+
+ dirdup = strdupa (dirpath);
+
+ /* Some clients send a dirpath for mount that includes the slash at the
+ * end. String compare for searching the export will fail because our
+ * exports list does not include that slash. Remove the slash to
+ * compare.
+ */
+ dlen = strlen (dirdup);
+ if (dlen && dirdup[dlen - 1] == '/')
+ dirdup[dlen - 1] = '\0';
+
+ /* Here we try to match fullpaths with export names */
+ fullpath = dirdup;
+
+ if (export_parsing_match) {
+ if (dirdup[0] == '/')
+ fullpath = dirdup + 1;
+
+ second_slash = strchr (fullpath, '/');
+ if (second_slash)
+ *second_slash = '\0';
+ }
+
+ /* The export name begins with a slash so move it forward by one
+ * to ignore the slash when we want to compare the fullpath and
+ * export.
+ */
+ if (fullpath[0] != '/')
+ expname++;
+
+ if (strcmp (expname, fullpath) == 0)
+ ret = 1;
+
+ return ret;
+}
+
+
+int
+mnt3svc_mount_inode (rpcsvc_request_t *req, struct mount3_state *ms,
+ xlator_t * xl, inode_t *exportinode)
+{
+ int ret = -EFAULT;
nfs_user_t nfu = {0, };
+ loc_t exportloc = {0, };
- if ((!req) || (!xl))
+ if ((!req) || (!xl) || (!ms) || (!exportinode))
return ret;
- ret = nfs_ino_loc_fill (xl->itable, 1, 0, &oploc);
+ ret = nfs_inode_loc_fill (exportinode, &exportloc, NFS_RESOLVE_EXIST);
+ if (ret < 0) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, ret, NFS_MSG_INODE_LOC_FILL_ERROR,
+ "Loc fill failed for export inode"
+ ": gfid %s, volume: %s",
+ uuid_utoa (exportinode->gfid), xl->name);
+ goto err;
+ }
+
/* To service the mount request, all we need to do
* is to send a lookup fop that returns the stat
* for the root of the child volume. This is
* used to build the root fh sent to the client.
*/
nfs_request_user_init (&nfu, req);
- ret = nfs_lookup (nfsx, xl, &nfu, &oploc, mnt3svc_lookup_mount_cbk,
- (void *)req);
- nfs_loc_wipe (&oploc);
+ ret = nfs_lookup (ms->nfsx, xl, &nfu, &exportloc,
+ mnt3svc_lookup_mount_cbk, (void *)req);
+
+ nfs_loc_wipe (&exportloc);
+err:
+ return ret;
+}
+
+
+/* For a volume mount request, we just have to create loc on the root inode,
+ * and send a lookup. In the lookup callback the mount reply is send along with
+ * the file handle.
+ */
+int
+mnt3svc_volume_mount (rpcsvc_request_t *req, struct mount3_state *ms,
+ struct mnt3_export *exp)
+{
+ inode_t *exportinode = NULL;
+ int ret = -EFAULT;
+ uuid_t rootgfid = {0, };
+
+ if ((!req) || (!exp) || (!ms))
+ return ret;
+
+ rootgfid[15] = 1;
+ exportinode = inode_find (exp->vol->itable, rootgfid);
+ if (!exportinode) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, ENOENT,
+ NFS_MSG_GET_ROOT_INODE_FAIL,
+ "Failed to get root inode");
+ ret = -ENOENT;
+ goto err;
+ }
+
+ ret = mnt3svc_mount_inode (req, ms, exp->vol, exportinode);
+ inode_unref (exportinode);
+
+err:
+ return ret;
+}
+
+
+/* The catch with directory exports is that the first component of the export
+ * name will be the name of the volume.
+ * Any lookup that needs to be performed to build the directory's file handle
+ * needs to start from the directory path from the root of the volume. For that
+ * we need to strip out the volume name first.
+ */
+char *
+__volume_subdir (char *dirpath, char **volname)
+{
+ char *subdir = NULL;
+ int volname_len = 0;
+
+ if (!dirpath)
+ return NULL;
+
+ if (dirpath[0] == '/')
+ dirpath++;
+
+ subdir = index (dirpath, (int)'/');
+ if (!subdir)
+ goto out;
+
+ if (!volname)
+ goto out;
+
+ if (!*volname)
+ goto out;
+
+ /* subdir points to the first / after the volume name while dirpath
+ * points to the first char of the volume name.
+ */
+ volname_len = subdir - dirpath;
+ strncpy (*volname, dirpath, volname_len);
+ *(*volname + volname_len) = '\0';
+out:
+ return subdir;
+}
+
+
+void
+mnt3_resolve_state_wipe (mnt3_resolve_t *mres)
+{
+ if (!mres)
+ return;
+
+ nfs_loc_wipe (&mres->resolveloc);
+ GF_FREE (mres);
+
+}
+
+
+/* Sets up the component argument to contain the next component in the path and
+ * sets up path as an absolute path starting from the next component.
+ */
+static char *
+setup_next_component (char *path, size_t plen, char *component, size_t clen)
+{
+ char *comp = NULL;
+ char *nextcomp = NULL;
+
+ if ((!path) || (!component))
+ return NULL;
+
+ strncpy (component, path, clen);
+ comp = index (component, (int)'/');
+ if (!comp)
+ goto err;
+
+ comp++;
+ nextcomp = index (comp, (int)'/');
+ if (nextcomp) {
+ strncpy (path, nextcomp, plen);
+ *nextcomp = '\0';
+ } else
+ path[0] = '\0';
+
+err:
+ return comp;
+}
+
+int32_t
+mnt3_resolve_subdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent);
+
+int
+mnt3_parse_dir_exports (rpcsvc_request_t *req, struct mount3_state *ms,
+ char *subdir);
+
+int32_t
+mnt3_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *path,
+ struct iatt *buf, dict_t *xdata);
+
+/* There are multiple components in the directory export path and each one
+ * needs to be looked up one after the other.
+ */
+int
+__mnt3_resolve_export_subdir_comp (mnt3_resolve_t *mres)
+{
+ char dupsubdir[MNTPATHLEN];
+ char *nextcomp = NULL;
+ int ret = -EFAULT;
+ nfs_user_t nfu = {0, };
+ uuid_t gfid = {0, };
+
+ if (!mres)
+ return ret;
+
+ nextcomp = setup_next_component (mres->remainingdir,
+ sizeof (mres->remainingdir),
+ dupsubdir, sizeof (dupsubdir));
+ if (!nextcomp)
+ goto err;
+
+ /* Wipe the contents of the previous component */
+ gf_uuid_copy (gfid, mres->resolveloc.inode->gfid);
+ nfs_loc_wipe (&mres->resolveloc);
+ ret = nfs_entry_loc_fill (mres->mstate->nfsx, mres->exp->vol->itable,
+ gfid, nextcomp, &mres->resolveloc,
+ NFS_RESOLVE_CREATE);
+ if ((ret < 0) && (ret != -2)) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, EFAULT,
+ NFS_MSG_RESOLVE_INODE_FAIL, "Failed to resolve and "
+ "create inode: parent gfid %s, entry %s",
+ uuid_utoa (gfid), nextcomp);
+ ret = -EFAULT;
+ goto err;
+ }
+
+ nfs_request_user_init (&nfu, mres->req);
+ if (IA_ISLNK (mres->resolveloc.inode->ia_type)) {
+ ret = nfs_readlink (mres->mstate->nfsx, mres->exp->vol, &nfu,
+ &mres->resolveloc, mnt3_readlink_cbk, mres);
+ gf_msg_debug (GF_MNT, 0, "Symlink found , need to resolve"
+ " into directory handle");
+ goto err;
+ }
+ ret = nfs_lookup (mres->mstate->nfsx, mres->exp->vol, &nfu,
+ &mres->resolveloc, mnt3_resolve_subdir_cbk, mres);
+
+err:
+ return ret;
+}
+
+int __mnt3_resolve_subdir (mnt3_resolve_t *mres);
+
+/*
+ * Per the AFR2 comments, this function performs the "fresh" lookup
+ * by deleting the inode from cache and calling __mnt3_resolve_subdir
+ * again.
+ */
+int __mnt3_fresh_lookup (mnt3_resolve_t *mres) {
+ inode_unlink (mres->resolveloc.inode,
+ mres->resolveloc.parent, mres->resolveloc.name);
+ strncpy (mres->remainingdir, mres->resolveloc.path,
+ strlen(mres->resolveloc.path));
+ nfs_loc_wipe (&mres->resolveloc);
+ return __mnt3_resolve_subdir (mres);
+}
+
+int32_t
+mnt3_resolve_subdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ mnt3_resolve_t *mres = NULL;
+ mountstat3 mntstat = MNT3ERR_SERVERFAULT;
+ struct nfs3_fh fh = {{0}, };
+ int autharr[10];
+ int autharrlen = 0;
+ rpcsvc_t *svc = NULL;
+ mountres3 res = {0, };
+ xlator_t *mntxl = NULL;
+ char *path = NULL;
+ struct mount3_state *ms = NULL;
+ int authcode = 0;
+ char *authorized_host = NULL;
+ char *authorized_path = NULL;
+ inode_t *linked_inode = NULL;
+
+ mres = frame->local;
+ ms = mres->mstate;
+ mntxl = (xlator_t *)cookie;
+ if (op_ret == -1 && op_errno == ESTALE) {
+ /* Nuke inode from cache and try the LOOKUP
+ * request again. */
+ return __mnt3_fresh_lookup (mres);
+ } else if (op_ret == -1) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, op_errno,
+ NFS_MSG_RESOLVE_SUBDIR_FAIL, "path=%s (%s)",
+ mres->resolveloc.path, strerror (op_errno));
+ mntstat = mnt3svc_errno_to_mnterr (op_errno);
+ goto err;
+ }
+
+ linked_inode = inode_link (mres->resolveloc.inode,
+ mres->resolveloc.parent,
+ mres->resolveloc.name, buf);
+
+ if (linked_inode)
+ nfs_fix_generation (this, linked_inode);
+
+ nfs3_fh_build_child_fh (&mres->parentfh, buf, &fh);
+ if (strlen (mres->remainingdir) <= 0) {
+ size_t alloclen;
+ op_ret = -1;
+ mntstat = MNT3_OK;
+ /* Construct the full path */
+ alloclen = strlen (mres->exp->expname) +
+ strlen (mres->resolveloc.path) + 1;
+ mres->exp->fullpath = GF_CALLOC (alloclen, sizeof (char),
+ gf_nfs_mt_char);
+ if (!mres->exp->fullpath) {
+ gf_msg (GF_MNT, GF_LOG_CRITICAL, ENOMEM,
+ NFS_MSG_NO_MEMORY, "Allocation failed.");
+ goto err;
+ }
+ snprintf (mres->exp->fullpath, alloclen, "%s%s",
+ mres->exp->expname, mres->resolveloc.path);
+
+ /* Check if this path is authorized to be mounted */
+ authcode = mnt3_authenticate_request (ms, mres->req, NULL, NULL,
+ mres->exp->fullpath,
+ &authorized_path,
+ &authorized_host,
+ FALSE);
+ if (authcode != 0) {
+ mntstat = MNT3ERR_ACCES;
+ gf_msg_debug (GF_MNT, 0, "Client mount not allowed");
+ op_ret = -1;
+ goto err;
+ }
+
+ path = GF_CALLOC (PATH_MAX, sizeof (char), gf_nfs_mt_char);
+ if (!path) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, ENOMEM,
+ NFS_MSG_NO_MEMORY,
+ "Memory allocation failed");
+ goto err;
+ }
+ /* Build mountid from the authorized path and stick it in the
+ * filehandle that will get passed back to the client
+ */
+ __mnt3_build_mountid_from_path (authorized_path, fh.mountid);
+
+ snprintf (path, PATH_MAX, "/%s%s", mres->exp->vol->name,
+ mres->resolveloc.path);
+
+ mnt3svc_update_mountlist (mres->mstate, mres->req,
+ path, mres->exp->fullpath);
+ GF_FREE (path);
+ } else {
+ mres->parentfh = fh;
+ op_ret = __mnt3_resolve_export_subdir_comp (mres);
+ if (op_ret < 0)
+ mntstat = mnt3svc_errno_to_mnterr (-op_ret);
+ }
+err:
+ if (op_ret == -1) {
+ gf_msg_debug (GF_MNT, 0, "Mount reply status: %d", mntstat);
+ svc = rpcsvc_request_service (mres->req);
+ autharrlen = rpcsvc_auth_array (svc, mntxl->name, autharr,
+ 10);
+
+ res = mnt3svc_set_mountres3 (mntstat, &fh, autharr, autharrlen);
+ mnt3svc_submit_reply (mres->req, (void *)&res,
+ (mnt3_serializer)xdr_serialize_mountres3);
+ mnt3_resolve_state_wipe (mres);
+ }
+
+ GF_FREE (authorized_path);
+ GF_FREE (authorized_host);
+
+ return 0;
+}
+
+/* This function resolves symbolic link into directory path from
+ * the mount and restart the parsing process from the beginning
+ *
+ * Note : Path specified in the symlink should be relative to the
+ * symlink, because that is the one which is consistent through
+ * out the file system.
+ * If the symlink resolves into another symlink ,then same process
+ * will be repeated.
+ * If symbolic links points outside the file system are not considered
+ * here.
+ *
+ * TODO : 1.) This function cannot handle symlinks points to path which
+ * goes out of the filesystem and comes backs again to same.
+ * For example, consider vol is exported volume.It contains
+ * dir,
+ * symlink1 which points to ../vol/dir,
+ * symlink2 which points to ../mnt/../vol/dir,
+ * symlink1 and symlink2 are not handled right now.
+ *
+ * 2.) udp mount routine is much simpler from tcp routine and resolves
+ * symlink directly.May be ,its better we change this routine
+ * similar to udp
+ */
+int32_t
+mnt3_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *path,
+ struct iatt *buf, dict_t *xdata)
+{
+ mnt3_resolve_t *mres = NULL;
+ int ret = -EFAULT;
+ char *real_loc = NULL;
+ size_t path_len = 0;
+ size_t parent_path_len = 0;
+ char *parent_path = NULL;
+ char *absolute_path = NULL;
+ char *relative_path = NULL;
+ int mntstat = 0;
+
+ GF_ASSERT (frame);
+
+ mres = frame->local;
+ if (!mres || !path || (path[0] == '/') || (op_ret < 0))
+ goto mnterr;
+
+ /* Finding current location of symlink */
+ parent_path_len = strlen (mres->resolveloc.path) - strlen (mres->resolveloc.name);
+ parent_path = gf_strndup (mres->resolveloc.path, parent_path_len);
+ if (!parent_path) {
+ ret = -ENOMEM;
+ goto mnterr;
+ }
+
+ relative_path = gf_strdup (path);
+ if (!relative_path) {
+ ret = -ENOMEM;
+ goto mnterr;
+ }
+ /* Resolving into absolute path */
+ ret = gf_build_absolute_path (parent_path, relative_path, &absolute_path);
+ if (ret < 0) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, ret,
+ NFS_MSG_RESOLVE_SYMLINK_ERROR,
+ "Cannot resolve symlink, path is out of boundary "
+ "from current location %s and with relative path "
+ "%s pointed by symlink", parent_path, relative_path);
+
+ goto mnterr;
+ }
+
+ /* Building the actual mount path to be mounted */
+ path_len = strlen (mres->exp->vol->name) + strlen (absolute_path)
+ + strlen (mres->remainingdir) + 1;
+ real_loc = GF_CALLOC (1, path_len, gf_nfs_mt_char);
+ if (!real_loc) {
+ ret = -ENOMEM;
+ goto mnterr;
+ }
+ sprintf (real_loc , "%s%s", mres->exp->vol->name, absolute_path);
+ gf_path_strip_trailing_slashes (real_loc);
+
+ /* There may entries after symlink in the mount path,
+ * we should include remaining entries too */
+ if (strlen (mres->remainingdir) > 0)
+ strcat (real_loc, mres->remainingdir);
+
+ gf_msg_debug (GF_MNT, 0, "Resolved path is : %s%s "
+ "and actual mount path is %s",
+ absolute_path, mres->remainingdir, real_loc);
+
+ /* After the resolving the symlink , parsing should be done
+ * for the populated mount path
+ */
+ ret = mnt3_parse_dir_exports (mres->req, mres->mstate, real_loc);
+
+ if (ret) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, 0, NFS_MSG_RESOLVE_ERROR,
+ "Resolved into an unknown path %s%s "
+ "from the current location of symlink %s",
+ absolute_path, mres->remainingdir, parent_path);
+ }
+
+ GF_FREE (real_loc);
+ GF_FREE (absolute_path);
+ GF_FREE (parent_path);
+ GF_FREE (relative_path);
+
+ return ret;
+
+mnterr:
+ if (mres) {
+ mntstat = mnt3svc_errno_to_mnterr (-ret);
+ mnt3svc_mnt_error_reply (mres->req, mntstat);
+ } else
+ gf_msg (GF_MNT, GF_LOG_CRITICAL, EINVAL, NFS_MSG_INVALID_ENTRY,
+ "mres == NULL, this should *never* happen");
+ if (absolute_path)
+ GF_FREE (absolute_path);
+ if (parent_path)
+ GF_FREE (parent_path);
+ if (relative_path)
+ GF_FREE (relative_path);
+ return ret;
+}
+
+/* We will always have to perform a hard lookup on all the components of a
+ * directory export for a mount request because in the mount reply we need the
+ * file handle of the directory. Our file handle creation code is designed with
+ * the assumption that to build a child file/dir fh, we'll always have the
+ * parent dir's fh available so that we may copy the hash array of the previous
+ * dir levels.
+ *
+ * Since we do not store the file handles anywhere, for every mount request we
+ * must resolve the file handles of every component so that the parent dir file
+ * of the exported directory can be built.
+ */
+int
+__mnt3_resolve_subdir (mnt3_resolve_t *mres)
+{
+ char dupsubdir[MNTPATHLEN];
+ char *firstcomp = NULL;
+ int ret = -EFAULT;
+ nfs_user_t nfu = {0, };
+ uuid_t rootgfid = {0, };
+
+ if (!mres)
+ return ret;
+
+ firstcomp = setup_next_component (mres->remainingdir,
+ sizeof (mres->remainingdir),
+ dupsubdir, sizeof (dupsubdir));
+ if (!firstcomp)
+ goto err;
+
+ rootgfid[15] = 1;
+ ret = nfs_entry_loc_fill (mres->mstate->nfsx, mres->exp->vol->itable,
+ rootgfid, firstcomp, &mres->resolveloc,
+ NFS_RESOLVE_CREATE);
+ if ((ret < 0) && (ret != -2)) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, EFAULT,
+ NFS_MSG_RESOLVE_INODE_FAIL, "Failed to resolve and "
+ "create inode for volume root: %s",
+ mres->exp->vol->name);
+ ret = -EFAULT;
+ goto err;
+ }
+
+ nfs_request_user_init (&nfu, mres->req);
+ if (IA_ISLNK (mres->resolveloc.inode->ia_type)) {
+ ret = nfs_readlink (mres->mstate->nfsx, mres->exp->vol, &nfu,
+ &mres->resolveloc, mnt3_readlink_cbk, mres);
+ gf_msg_debug (GF_MNT, 0, "Symlink found , need to resolve "
+ "into directory handle");
+ goto err;
+ }
+ ret = nfs_lookup (mres->mstate->nfsx, mres->exp->vol, &nfu,
+ &mres->resolveloc, mnt3_resolve_subdir_cbk, mres);
+
+err:
+ return ret;
+}
+
+
+static gf_boolean_t
+mnt3_match_subnet_v4 (struct addrinfo *ai, uint32_t saddr, uint32_t mask)
+{
+ for (; ai; ai = ai->ai_next) {
+ struct sockaddr_in *sin = (struct sockaddr_in *)ai->ai_addr;
+
+ if (sin->sin_family != AF_INET)
+ continue;
+
+ if (mask_match (saddr, sin->sin_addr.s_addr, mask))
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
+
+/**
+ * This function will verify if the client is allowed to mount
+ * the directory or not. Client's IP address will be compared with
+ * allowed IP list or range present in mnt3_export structure.
+ *
+ * @param client_addr - This structure contains client's IP address.
+ * @param export - mnt3_export structure. Contains allowed IP list/range.
+ *
+ * @return 0 - on Success and -EACCES on failure.
+ *
+ * TODO: Support IPv6 subnetwork
+ */
+int
+mnt3_verify_auth (struct sockaddr_in *client_addr, struct mnt3_export *export)
+{
+ int retvalue = -EACCES;
+ int ret = 0;
+ struct host_auth_spec *host = NULL;
+ struct sockaddr_in *allowed_addr = NULL;
+ struct addrinfo *allowed_addrinfo = NULL;
+
+ struct addrinfo hint = {
+ .ai_family = AF_INET,
+ .ai_protocol = (int)IPPROTO_TCP,
+ .ai_flags = AI_CANONNAME,
+ };
+
+ /* Sanity check */
+ if ((NULL == client_addr) ||
+ (NULL == export) ||
+ (NULL == export->hostspec)) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, EINVAL, NFS_MSG_INVALID_ENTRY,
+ "Invalid argument");
+ return retvalue;
+ }
+
+ host = export->hostspec;
+
+ /*
+ * Currently IPv4 subnetwork is supported i.e. AF_INET.
+ * TODO: IPv6 subnetwork i.e. AF_INET6.
+ */
+ if (client_addr->sin_family != AF_INET) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, EAFNOSUPPORT,
+ NFS_MSG_UNSUPPORTED_VERSION,
+ "Only IPv4 is supported for subdir-auth");
+ return retvalue;
+ }
+
+ /* Try to see if the client IP matches the allowed IP list.*/
+ while (NULL != host){
+ GF_ASSERT (host->host_addr);
+
+ if (NULL != allowed_addrinfo) {
+ freeaddrinfo (allowed_addrinfo);
+ allowed_addrinfo = NULL;
+ }
+
+ /* Get the addrinfo for the allowed host (host_addr). */
+ ret = getaddrinfo (host->host_addr, NULL,
+ &hint, &allowed_addrinfo);
+ if (0 != ret){
+ /*
+ * getaddrinfo() FAILED for the host IP addr. Continue
+ * to search other allowed hosts in the hostspec list.
+ */
+ gf_msg_debug (GF_MNT, 0, "getaddrinfo: %s\n",
+ gai_strerror (ret));
+ host = host->next;
+ continue;
+ }
+
+ allowed_addr = (struct sockaddr_in *)(allowed_addrinfo->ai_addr);
+ if (NULL == allowed_addr) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, EINVAL,
+ NFS_MSG_INVALID_ENTRY, "Invalid structure");
+ break;
+ }
+
+ /* Check if the network addr of both IPv4 socket match */
+ if (mnt3_match_subnet_v4 (allowed_addrinfo,
+ client_addr->sin_addr.s_addr,
+ host->netmask)) {
+ retvalue = 0;
+ break;
+ }
+
+ /* No match yet, continue the search */
+ host = host->next;
+ }
+
+ /* FREE the dynamic memory allocated by getaddrinfo() */
+ if (NULL != allowed_addrinfo) {
+ freeaddrinfo (allowed_addrinfo);
+ }
+
+ return retvalue;
+}
+
+int
+mnt3_resolve_subdir (rpcsvc_request_t *req, struct mount3_state *ms,
+ struct mnt3_export *exp, char *subdir)
+{
+ mnt3_resolve_t *mres = NULL;
+ int ret = -EFAULT;
+ struct nfs3_fh pfh = GF_NFS3FH_STATIC_INITIALIZER;
+ struct sockaddr_in *sin = NULL;
+ int len = -1;
+
+ if ((!req) || (!ms) || (!exp) || (!subdir))
+ return ret;
+
+ sin = (struct sockaddr_in *)(&(req->trans->peerinfo.sockaddr));
+
+ /* Need to check AUTH */
+ if (NULL != exp->hostspec) {
+ ret = mnt3_verify_auth (sin, exp);
+ if (0 != ret) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, EACCES,
+ NFS_MSG_AUTH_VERIFY_FAILED,
+ "AUTH verification failed");
+ return ret;
+ }
+ }
+
+ mres = GF_CALLOC (1, sizeof (mnt3_resolve_t), gf_nfs_mt_mnt3_resolve);
+ if (!mres) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Memory allocation failed");
+ goto err;
+ }
+
+ mres->exp = exp;
+ mres->mstate = ms;
+ mres->req = req;
+
+ strncpy (mres->remainingdir, subdir, MNTPATHLEN);
+ gf_path_strip_trailing_slashes (mres->remainingdir);
+
+ if (gf_nfs_dvm_off (nfs_state (ms->nfsx)))
+ pfh = nfs3_fh_build_indexed_root_fh (
+ mres->mstate->nfsx->children,
+ mres->exp->vol);
+ else
+ pfh = nfs3_fh_build_uuid_root_fh (exp->volumeid, exp->mountid);
+
+ mres->parentfh = pfh;
+ ret = __mnt3_resolve_subdir (mres);
+ if (ret < 0) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, ret, NFS_MSG_RESOLVE_SUBDIR_FAIL,
+ "Failed to resolve export dir: %s", mres->exp->expname);
+ GF_FREE (mres);
+ }
+
+err:
+ return ret;
+}
+
+
+int
+mnt3_resolve_export_subdir (rpcsvc_request_t *req, struct mount3_state *ms,
+ struct mnt3_export *exp)
+{
+ char *volume_subdir = NULL;
+ int ret = -EFAULT;
+
+ if ((!req) || (!ms) || (!exp))
+ return ret;
+
+ volume_subdir = __volume_subdir (exp->expname, NULL);
+ if (!volume_subdir)
+ goto err;
+
+ ret = mnt3_resolve_subdir (req, ms, exp, volume_subdir);
+ if (ret < 0) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, ret, NFS_MSG_RESOLVE_SUBDIR_FAIL,
+ "Failed to resolve export dir: %s", exp->expname);
+ goto err;
+ }
+
+err:
+ return ret;
+}
+
+
+int
+mnt3svc_mount (rpcsvc_request_t *req, struct mount3_state *ms,
+ struct mnt3_export *exp)
+{
+ int ret = -EFAULT;
+
+ if ((!req) || (!ms) || (!exp))
+ return ret;
+
+ if (exp->exptype == MNT3_EXPTYPE_VOLUME)
+ ret = mnt3svc_volume_mount (req, ms, exp);
+ else if (exp->exptype == MNT3_EXPTYPE_DIR)
+ ret = mnt3_resolve_export_subdir (req, ms, exp);
+
+ return ret;
+}
+
+
+/* mnt3_mntpath_to_xlator sets this to 1 if the mount is for a full
+* volume or 2 for a subdir in the volume.
+*
+* The parameter 'export_parsing_match' indicates whether this function
+* is being called by an exports parser or whether it is being called
+* during mount. The behavior is different since we don't have to resolve
+* the path when doing the parse.
+*/
+struct mnt3_export *
+mnt3_mntpath_to_export (struct mount3_state *ms, const char *dirpath,
+ gf_boolean_t export_parsing_match)
+{
+ struct mnt3_export *exp = NULL;
+ struct mnt3_export *found = NULL;
+
+ if ((!ms) || (!dirpath))
+ return NULL;
+
+ LOCK (&ms->mountlock);
+ list_for_each_entry (exp, &ms->exportlist, explist) {
+
+ /* Search for the an exact match with the volume */
+ if (mnt3_match_dirpath_export (exp->expname, dirpath,
+ export_parsing_match)) {
+ found = exp;
+ gf_msg_debug (GF_MNT, 0, "Found export volume: "
+ "%s", exp->vol->name);
+ goto foundexp;
+ }
+ }
+
+ gf_msg_debug (GF_MNT, 0, "Export not found");
+foundexp:
+ UNLOCK (&ms->mountlock);
+ return found;
+}
+
+
+static int
+mnt3_check_client_net_check (rpcsvc_t *svc, char *expvol,
+ char *ipaddr, uint16_t port)
+{
+ int ret = RPCSVC_AUTH_REJECT;
+
+ if ((!svc) || (!expvol) || (!ipaddr))
+ goto err;
+
+ ret = rpcsvc_auth_check (svc, expvol, ipaddr);
+ if (ret == RPCSVC_AUTH_REJECT) {
+ gf_msg (GF_MNT, GF_LOG_INFO, 0, NFS_MSG_PEER_NOT_ALLOWED,
+ "Peer %s not allowed", ipaddr);
+ goto err;
+ }
+
+ ret = rpcsvc_transport_privport_check (svc, expvol, port);
+ if (ret == RPCSVC_AUTH_REJECT) {
+ gf_msg (GF_MNT, GF_LOG_INFO, errno, NFS_MSG_PEER_NOT_ALLOWED,
+ "Peer %s rejected. Unprivileged "
+ "port %d not allowed", ipaddr, port);
+ goto err;
+ }
+
+ ret = RPCSVC_AUTH_ACCEPT;
+err:
return ret;
}
+static int
+mnt3_check_client_net_tcp (rpcsvc_request_t *req, char *volname)
+{
+ rpcsvc_t *svc = NULL;
+ rpc_transport_t *trans = NULL;
+ union gf_sock_union sock_union;
+ socklen_t socksize = sizeof (struct sockaddr_in);
+ char peer[RPCSVC_PEER_STRLEN] = {0,};
+ char *ipaddr = NULL;
+ uint16_t port = 0;
+ int ret = RPCSVC_AUTH_REJECT;
+
+ if ((!req) || (!volname))
+ goto err;
+
+ svc = rpcsvc_request_service (req);
+ trans = rpcsvc_request_transport (req);
+ if ((!svc) || (!trans))
+ goto err;
+
+ ret = rpcsvc_transport_peeraddr (trans, peer, RPCSVC_PEER_STRLEN,
+ &sock_union.storage, socksize);
+ if (ret != 0) {
+ gf_msg (GF_MNT, GF_LOG_WARNING, ENOENT,
+ NFS_MSG_GET_PEER_ADDR_FAIL, "Failed to get peer "
+ "addr: %s", gai_strerror (ret));
+ ret = RPCSVC_AUTH_REJECT;
+ goto err;
+ }
+
+ /* peer[] gets IP:PORT formar, slash the port out */
+ if (!get_host_name ((char *)peer, &ipaddr))
+ ipaddr = peer;
+
+ port = ntohs (sock_union.sin.sin_port);
+
+ ret = mnt3_check_client_net_check (svc, volname, ipaddr, port);
+err:
+ return ret;
+}
+
+static int
+mnt3_check_client_net_udp (struct svc_req *req, char *volname, xlator_t *nfsx)
+{
+ rpcsvc_t *svc = NULL;
+ struct sockaddr_in *sin = NULL;
+ char ipaddr[INET_ADDRSTRLEN + 1] = {0, };
+ uint16_t port = 0;
+ int ret = RPCSVC_AUTH_REJECT;
+ struct nfs_state *nfs = NULL;
+
+ if ((!req) || (!volname) || (!nfsx))
+ goto err;
+
+ sin = svc_getcaller (req->rq_xprt);
+ if (!sin)
+ goto err;
+
+ (void) inet_ntop (AF_INET, &sin->sin_addr, ipaddr, INET_ADDRSTRLEN);
+
+ port = ntohs (sin->sin_port);
+
+ nfs = (struct nfs_state *)nfsx->private;
+ if (nfs != NULL)
+ svc = nfs->rpcsvc;
+
+ ret = mnt3_check_client_net_check (svc, volname, ipaddr, port);
+err:
+ return ret;
+}
+
+
+int
+mnt3_parse_dir_exports (rpcsvc_request_t *req, struct mount3_state *ms,
+ char *subdir)
+{
+ char volname[1024] = {0, };
+ struct mnt3_export *exp = NULL;
+ char *volname_ptr = NULL;
+ int ret = -ENOENT;
+ struct nfs_state *nfs = NULL;
+
+ if ((!ms) || (!subdir))
+ return -1;
+
+ volname_ptr = volname;
+ subdir = __volume_subdir (subdir, &volname_ptr);
+ if (!subdir)
+ goto err;
+
+ exp = mnt3_mntpath_to_export (ms, volname, _gf_false);
+ if (!exp)
+ goto err;
+
+ nfs = (struct nfs_state *)ms->nfsx->private;
+ if (!nfs)
+ goto err;
+
+ if (!nfs_subvolume_started (nfs, exp->vol)) {
+ gf_msg_debug (GF_MNT, 0, "Volume %s not started",
+ exp->vol->name);
+ goto err;
+ }
+
+ ret = mnt3_check_client_net_tcp (req, exp->vol->name);
+ if (ret == RPCSVC_AUTH_REJECT) {
+ gf_msg_debug (GF_MNT, 0, "Client mount not allowed");
+ ret = -EACCES;
+ goto err;
+ }
+
+ ret = mnt3_resolve_subdir (req, ms, exp, subdir);
+ if (ret < 0) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, ret, NFS_MSG_RESOLVE_SUBDIR_FAIL,
+ "Failed to resolve export dir: %s", subdir);
+ goto err;
+ }
+
+err:
+ return ret;
+}
+
+
+int
+mnt3_find_export (rpcsvc_request_t *req, char *path, struct mnt3_export **e)
+{
+ int ret = -EFAULT;
+ struct mount3_state *ms = NULL;
+ struct mnt3_export *exp = NULL;
+
+ if ((!req) || (!path) || (!e))
+ return -1;
+
+ ms = (struct mount3_state *) rpcsvc_request_program_private (req);
+ if (!ms) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, EINVAL,
+ NFS_MSG_MNT_STATE_NOT_FOUND, "Mount state not present");
+ rpcsvc_request_seterr (req, SYSTEM_ERR);
+ goto err;
+ }
+
+ gf_msg_debug (GF_MNT, 0, "dirpath: %s", path);
+ exp = mnt3_mntpath_to_export (ms, path, _gf_false);
+ if (exp) {
+ ret = 0;
+ *e = exp;
+ goto err;
+ }
+
+ if (!gf_mnt3_export_dirs(ms)) {
+ ret = -1;
+ goto err;
+ }
+
+ ret = mnt3_parse_dir_exports (req, ms, path);
+
+err:
+ return ret;
+}
+
+/**
+ * _mnt3_get_peer_addr -- Take an rpc request object and return an allocated
+ * peer address. A peer address is host:port.
+ *
+ * @req: An rpc svc request object to extract the peer address from
+ *
+ * @return: success: Pointer to an allocated string containing the peer address
+ * failure: NULL
+ */
+char *
+_mnt3_get_peer_addr (const rpcsvc_request_t *req)
+{
+ rpc_transport_t *trans = NULL;
+ struct sockaddr_storage sastorage = {0, };
+ char peer[RPCSVC_PEER_STRLEN] = {0, };
+ char *peerdup = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO (GF_NFS, req, out);
+
+ trans = rpcsvc_request_transport (req);
+ ret = rpcsvc_transport_peeraddr (trans, peer, RPCSVC_PEER_STRLEN,
+ &sastorage, sizeof (sastorage));
+ if (ret != 0)
+ goto out;
+
+ peerdup = gf_strdup (peer);
+out:
+ return peerdup;
+}
+
+/**
+ * _mnt3_get_host_from_peer -- Take a peer address and get an allocated
+ * hostname. The hostname is the string on the
+ * left side of the colon.
+ *
+ * @peer_addr: The peer address to get a hostname from
+ *
+ * @return: success: Allocated string containing the hostname
+ * failure: NULL
+ *
+ */
+char *
+_mnt3_get_host_from_peer (const char *peer_addr)
+{
+ char *part = NULL;
+ size_t host_len = 0;
+ char *colon = NULL;
+
+ colon = strchr (peer_addr, ':');
+ if (!colon) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, 0, NFS_MSG_BAD_PEER,
+ "Bad peer %s", peer_addr);
+ goto out;
+ }
+
+ host_len = colon - peer_addr;
+ if (host_len < RPCSVC_PEER_STRLEN)
+ part = gf_strndup (peer_addr, host_len);
+ else
+ gf_msg (GF_MNT, GF_LOG_ERROR, 0, NFS_MSG_PEER_TOO_LONG,
+ "Peer too long %s", peer_addr);
+out:
+ return part;
+}
+
+/**
+ * mnt3_check_cached_fh -- Check if FH is cached.
+ *
+ * Calls auxiliary functions based on whether we are checking
+ * a write operation.
+ *
+ */
+int
+mnt3_check_cached_fh (struct mount3_state *ms, struct nfs3_fh *fh,
+ const char *host_addr, gf_boolean_t is_write_op)
+{
+ if (!is_write_op)
+ return is_nfs_fh_cached (ms->authcache, fh, host_addr);
+
+ return is_nfs_fh_cached_and_writeable (ms->authcache, fh, host_addr);
+}
+
+/**
+ * _mnt3_authenticate_req -- Given an RPC request and a path OR a filehandle
+ * check if the host is authorized to make the
+ * request. Uses exports/netgroups auth model to
+ * do this check.
+ *
+ * @ms : The mount state
+ * @req : The RPC request
+ * @fh : The NFS FH to authenticate (set when authenticating an FOP)
+ * @path: The path to authenticate (set when authenticating a mount req)
+ * @authorized_export: Allocate and fill this value when an export is authorized
+ * @authorized_host: Allocate and fill this value when a host is authorized
+ * @is_write_op: Is this a write op that we are authenticating?
+ *
+ * @return: 0 if authorized
+ * -EACCES for completely unauthorized fop
+ * -EROFS for unauthorized write operations (rm, mkdir, write)
+ */
+int
+_mnt3_authenticate_req (struct mount3_state *ms, rpcsvc_request_t *req,
+ struct nfs3_fh *fh, const char *path,
+ char **authorized_export, char **authorized_host,
+ gf_boolean_t is_write_op)
+{
+ char *peer_addr = NULL;
+ char *host_addr_ip = NULL;
+ char *host_addr_fqdn = NULL;
+ int auth_status_code = -EACCES;
+ char *pathdup = NULL;
+ size_t dlen = 0;
+ char *auth_host = NULL;
+ gf_boolean_t fh_cached = _gf_false;
+ struct export_item *expitem = NULL;
+
+ GF_VALIDATE_OR_GOTO (GF_MNT, ms, out);
+ GF_VALIDATE_OR_GOTO (GF_MNT, req, out);
+
+ peer_addr = _mnt3_get_peer_addr (req);
+ host_addr_ip = _mnt3_get_host_from_peer (peer_addr);
+
+ if (!host_addr_ip || !peer_addr)
+ goto free_and_out;
+
+ if (path) {
+ /* Need to strip out trailing '/' */
+ pathdup = strdupa (path);
+ dlen = strlen (pathdup);
+ if (dlen > 0 && pathdup[dlen-1] == '/')
+ pathdup[dlen-1] = '\0';
+ }
+
+ /* Check if the filehandle is cached */
+ fh_cached = mnt3_check_cached_fh (ms, fh, host_addr_ip, is_write_op);
+ if (fh_cached) {
+ gf_msg_trace (GF_MNT, 0, "Found cached FH for %s",
+ host_addr_ip);
+ auth_status_code = 0;
+ goto free_and_out;
+ }
+
+ /* Check if the IP is authorized */
+ auth_status_code = mnt3_auth_host (ms->auth_params, host_addr_ip,
+ fh, pathdup, is_write_op, &expitem);
+
+ gf_msg_debug (GF_MNT, 0, "access from IP %s is %s", host_addr_ip,
+ auth_status_code ? "denied" : "allowed");
+
+ if (auth_status_code != 0) {
+ /* If not, check if the FQDN is authorized */
+ host_addr_fqdn = gf_rev_dns_lookup (host_addr_ip);
+ auth_status_code = mnt3_auth_host (ms->auth_params,
+ host_addr_fqdn,
+ fh, pathdup, is_write_op,
+ &expitem);
+
+ gf_msg_debug (GF_MNT, 0, "access from FQDN %s is %s",
+ host_addr_fqdn, auth_status_code ? "denied" :
+ "allowed");
+
+ if (auth_status_code == 0)
+ auth_host = host_addr_fqdn;
+ } else
+ auth_host = host_addr_ip;
+
+ /* Skip the lines that set authorized export &
+ * host if they are null.
+ */
+ if (!authorized_export || !authorized_host) {
+ /* Cache the file handle if it was authorized */
+ if (fh && auth_status_code == 0)
+ cache_nfs_fh (ms->authcache, fh, host_addr_ip, expitem);
+
+ goto free_and_out;
+ }
+
+ if (!fh && auth_status_code == 0) {
+ *authorized_export = gf_strdup (pathdup);
+ if (!*authorized_export)
+ gf_msg (GF_MNT, GF_LOG_CRITICAL, ENOMEM,
+ NFS_MSG_NO_MEMORY,
+ "Allocation error when copying "
+ "authorized path");
+
+ *authorized_host = gf_strdup (auth_host);
+ if (!*authorized_host)
+ gf_msg (GF_MNT, GF_LOG_CRITICAL, ENOMEM,
+ NFS_MSG_NO_MEMORY,
+ "Allocation error when copying "
+ "authorized host");
+ }
+
+free_and_out:
+ /* Free allocated strings after doing the auth */
+ GF_FREE (peer_addr);
+ GF_FREE (host_addr_fqdn);
+ GF_FREE (host_addr_ip);
+out:
+ return auth_status_code;
+}
+
+/**
+ * mnt3_authenticate_request -- Given an RPC request and a path, check if the
+ * host is authorized to make the request. This
+ * function calls _mnt3_authenticate_req_path ()
+ * in a loop for the parent of each path while
+ * the authentication check for that path is
+ * failing.
+ *
+ * E.g. If the requested path is /patchy/L1, and /patchy is authorized, but
+ * /patchy/L1 is not, it follows this code path :
+ *
+ * _mnt3_authenticate_req ("/patchy/L1") -> F
+ * _mnt3_authenticate_req ("/patchy"); -> T
+ * return T;
+ *
+ * @ms : The mount state
+ * @req : The RPC request
+ * @path: The requested path
+ * @authorized_path: This gets allocated and populated with the authorized path
+ * @authorized_host: This gets allocated and populated with the authorized host
+ * @return: 0 if authorized
+ * -EACCES for completely unauthorized fop
+ * -EROFS for unauthorized write operations (rm, mkdir, write)
+ */
+int
+mnt3_authenticate_request (struct mount3_state *ms, rpcsvc_request_t *req,
+ struct nfs3_fh *fh, const char *volname,
+ const char *path, char **authorized_path,
+ char **authorized_host, gf_boolean_t is_write_op)
+{
+ int auth_status_code = -EACCES;
+ char *parent_path = NULL;
+ const char *parent_old = NULL;
+
+ GF_VALIDATE_OR_GOTO (GF_MNT, ms, out);
+ GF_VALIDATE_OR_GOTO (GF_MNT, req, out);
+
+ /* If this option is not set, just allow it through */
+ if (!ms->nfs->exports_auth) {
+ /* This function is called in a variety of use-cases (mount
+ * + each fop) so path/authorized_path are not always present.
+ * For the cases which it _is_ present we need to populate the
+ * authorized_path. */
+ if (path && authorized_path)
+ *authorized_path = gf_strdup (path);
+
+ auth_status_code = 0;
+ goto out;
+ }
+
+ /* First check if the path is allowed */
+ auth_status_code = _mnt3_authenticate_req (ms, req, fh, path,
+ authorized_path,
+ authorized_host,
+ is_write_op);
+
+ /* If the filehandle is set, just exit since we have to make only
+ * one call to the function above
+ */
+ if (fh)
+ goto out;
+
+ parent_old = path;
+ while (auth_status_code != 0) {
+ /* Get the path's parent */
+ parent_path = gf_resolve_path_parent (parent_old);
+ if (!parent_path) /* Nothing left in the path to resolve */
+ goto out;
+
+ /* Authenticate it */
+ auth_status_code = _mnt3_authenticate_req (ms, req, fh,
+ parent_path,
+ authorized_path,
+ authorized_host,
+ is_write_op);
+
+ parent_old = strdupa (parent_path); /* Copy the parent onto the
+ * stack.
+ */
+
+ GF_FREE (parent_path); /* Free the allocated parent string */
+ }
+
+out:
+ return auth_status_code;
+}
int
mnt3svc_mnt (rpcsvc_request_t *req)
@@ -291,62 +2143,91 @@ mnt3svc_mnt (rpcsvc_request_t *req)
struct iovec pvec = {0, };
char path[MNTPATHLEN];
int ret = -1;
- xlator_t *targetxl = NULL;
struct mount3_state *ms = NULL;
- rpcsvc_t *svc = NULL;
mountstat3 mntstat = MNT3ERR_SERVERFAULT;
+ struct mnt3_export *exp = NULL;
+ struct nfs_state *nfs = NULL;
+ int authcode = 0;
if (!req)
return -1;
pvec.iov_base = path;
pvec.iov_len = MNTPATHLEN;
- ret = xdr_to_mountpath (pvec, req->msg);
+ ret = xdr_to_mountpath (pvec, req->msg[0]);
if (ret == -1) {
- gf_log (GF_MNT, GF_LOG_ERROR, "Failed to decode args");
+ gf_msg (GF_MNT, GF_LOG_ERROR, 0, NFS_MSG_ARGS_DECODE_ERROR,
+ "Failed to decode args");
rpcsvc_request_seterr (req, GARBAGE_ARGS);
goto rpcerr;
}
ms = (struct mount3_state *)rpcsvc_request_program_private (req);
if (!ms) {
- gf_log (GF_MNT, GF_LOG_ERROR, "Mount state not present");
+ gf_msg (GF_MNT, GF_LOG_ERROR, EINVAL,
+ NFS_MSG_MNT_STATE_NOT_FOUND,
+ "Mount state not present");
rpcsvc_request_seterr (req, SYSTEM_ERR);
ret = -1;
goto rpcerr;
}
- ret = 0;
- gf_log (GF_MNT, GF_LOG_DEBUG, "dirpath: %s", path);
- targetxl = nfs_mntpath_to_xlator (ms->nfsx->children, path);
- if (!targetxl) {
+ nfs = (struct nfs_state *)ms->nfsx->private;
+ gf_msg_debug (GF_MNT, 0, "dirpath: %s", path);
+ ret = mnt3_find_export (req, path, &exp);
+ if (ret < 0) {
+ mntstat = mnt3svc_errno_to_mnterr (-ret);
+ goto mnterr;
+ } else if (!exp) {
+ /*
+ * SPECIAL CASE: exp is NULL if "path" is subdir in
+ * call to mnt3_find_export().
+ *
+ * This is subdir mount, we are already DONE!
+ * nfs_subvolume_started() and mnt3_check_client_net_tcp()
+ * validation are done in mnt3_parse_dir_exports()
+ * which is invoked through mnt3_find_export().
+ *
+ * TODO: All mount should happen thorugh mnt3svc_mount()
+ * It needs more clean up.
+ */
+ return (0);
+ }
+
+ if (!nfs_subvolume_started (nfs, exp->vol)) {
+ gf_msg_debug (GF_MNT, 0, "Volume %s not started",
+ exp->vol->name);
ret = -1;
mntstat = MNT3ERR_NOENT;
goto mnterr;
}
- svc = rpcsvc_request_service (req);
- ret = rpcsvc_conn_peer_check (svc->options, targetxl->name,
- rpcsvc_request_conn (req));
+ ret = mnt3_check_client_net_tcp (req, exp->vol->name);
if (ret == RPCSVC_AUTH_REJECT) {
mntstat = MNT3ERR_ACCES;
+ gf_msg_debug (GF_MNT, 0, "Client mount not allowed");
ret = -1;
- gf_log (GF_MNT, GF_LOG_TRACE, "Peer not allowed");
goto mnterr;
}
- ret = rpcsvc_conn_privport_check (svc, targetxl->name,
- rpcsvc_request_conn (req));
- if (ret == RPCSVC_AUTH_REJECT) {
+ /* The second authentication check is the exports/netgroups
+ * check.
+ */
+ authcode = mnt3_authenticate_request (ms, req, NULL, NULL, path, NULL,
+ NULL, _gf_false);
+ if (authcode != 0) {
mntstat = MNT3ERR_ACCES;
+ gf_msg_debug (GF_MNT, 0, "Client mount not allowed");
ret = -1;
- gf_log (GF_MNT, GF_LOG_TRACE, "Unprivileged port not allowed");
- goto rpcerr;
+ goto mnterr;
}
- mnt3svc_mount (req, ms->nfsx, targetxl);
+ ret = mnt3svc_mount (req, ms, exp);
+
+ if (ret < 0)
+ mntstat = mnt3svc_errno_to_mnterr (-ret);
mnterr:
- if (ret == -1) {
+ if (ret < 0) {
mnt3svc_mnt_error_reply (req, mntstat);
ret = 0;
}
@@ -362,11 +2243,11 @@ mnt3svc_null (rpcsvc_request_t *req)
struct iovec dummyvec = {0, };
if (!req) {
- gf_log (GF_MNT, GF_LOG_ERROR, "Got NULL request!");
+ gf_msg (GF_MNT, GF_LOG_ERROR, EINVAL, NFS_MSG_INVALID_ENTRY,
+ "Got NULL request!");
return 0;
}
-
- rpcsvc_submit_generic (req, dummyvec, NULL);
+ rpcsvc_submit_generic (req, &dummyvec, 1, NULL, 0, NULL);
return 0;
}
@@ -384,50 +2265,51 @@ __build_mountlist (struct mount3_state *ms, int *count)
if ((!ms) || (!count))
return NULL;
+ /* read rmtab, other peers might have updated it */
+ mount_read_rmtab(ms);
+
*count = 0;
- gf_log (GF_MNT, GF_LOG_DEBUG, "Building mount list:");
+ gf_msg_debug (GF_MNT, 0, "Building mount list:");
list_for_each_entry (me, &ms->mountlist, mlist) {
namelen = strlen (me->exname);
mlist = GF_CALLOC (1, sizeof (*mlist), gf_nfs_mt_mountbody);
if (!mlist) {
- gf_log (GF_MNT, GF_LOG_ERROR, "Memory allocation"
- " failed");
+ gf_msg (GF_MNT, GF_LOG_ERROR, ENOMEM,
+ NFS_MSG_NO_MEMORY, "Memory allocation failed");
goto free_list;
}
+ if (!first)
+ first = mlist;
mlist->ml_directory = GF_CALLOC (namelen + 2, sizeof (char),
gf_nfs_mt_char);
if (!mlist->ml_directory) {
- gf_log (GF_MNT, GF_LOG_ERROR, "Memory allocation"
- " failed");
+ gf_msg (GF_MNT, GF_LOG_ERROR, ENOMEM,
+ NFS_MSG_NO_MEMORY, "Memory allocation failed");
goto free_list;
}
- strcpy (mlist->ml_directory, "/");
- strcat (mlist->ml_directory, me->exname);
+ strcpy (mlist->ml_directory, me->exname);
namelen = strlen (me->hostname);
mlist->ml_hostname = GF_CALLOC (namelen + 2, sizeof (char),
gf_nfs_mt_char);
if (!mlist->ml_hostname) {
- gf_log (GF_MNT, GF_LOG_ERROR, "Memory allocation"
- " failed");
+ gf_msg (GF_MNT, GF_LOG_ERROR, ENOMEM,
+ NFS_MSG_NO_MEMORY, "Memory allocation failed");
goto free_list;
}
strcat (mlist->ml_hostname, me->hostname);
- gf_log (GF_MNT, GF_LOG_DEBUG, "mount entry: dir: %s, host: %s",
- mlist->ml_directory, mlist->ml_hostname);
+ gf_msg_debug (GF_MNT, 0, "mount entry: dir: %s, host: %s",
+ mlist->ml_directory, mlist->ml_hostname);
if (prev) {
prev->ml_next = mlist;
prev = mlist;
} else
prev = mlist;
- if (!first)
- first = mlist;
-
(*count)++;
}
@@ -480,8 +2362,8 @@ mnt3svc_dump (rpcsvc_request_t *req)
sfunc = (mnt3_serializer)xdr_serialize_mountlist;
mlist = mnt3svc_build_mountlist (ms, &ret);
- arg = mlist;
- sfunc = (mnt3_serializer)xdr_serialize_mountlist;
+ arg = &mlist;
+
if (!mlist) {
if (ret != 0) {
rpcsvc_request_seterr (req, SYSTEM_ERR);
@@ -504,67 +2386,76 @@ rpcerr:
int
-__mnt3svc_umount (struct mount3_state *ms, char *dirpath, char *hostname)
+mnt3svc_umount (struct mount3_state *ms, char *dirpath, char *hostname)
{
struct mountentry *me = NULL;
- char *exname = NULL;
int ret = -1;
+ gf_store_handle_t *sh = NULL;
+ struct nfs_state *nfs = NULL;
+ gf_boolean_t update_rmtab = _gf_false;
if ((!ms) || (!dirpath) || (!hostname))
return -1;
- if (list_empty (&ms->mountlist))
- return 0;
+ nfs = (struct nfs_state *)ms->nfsx->private;
- if (dirpath[0] == '/')
- exname = dirpath+1;
- else
- exname = dirpath;
-
- list_for_each_entry (me, &ms->mountlist, mlist) {
- if ((strcmp (me->exname, exname) == 0) &&
- (strcmp (me->hostname, hostname) == 0)) {
- ret = 0;
- break;
- }
+ update_rmtab = mount_open_rmtab (nfs->rmtab, &sh);
+ if (update_rmtab) {
+ ret = gf_store_lock (sh);
+ if (ret)
+ goto out_free;
}
- /* Need this check here because at the end of the search me might still
- * be pointing to the last entry, which may not be the one we're
- * looking for.
- */
- if (ret == -1) {/* Not found in list. */
- gf_log (GF_MNT, GF_LOG_DEBUG, "Export not found");
- goto ret;
- }
+ LOCK (&ms->mountlock);
+ {
+ if (update_rmtab)
+ __mount_read_rmtab (sh, &ms->mountlist, _gf_false);
- if (!me)
- goto ret;
+ if (list_empty (&ms->mountlist)) {
+ ret = 0;
+ goto out_unlock;
+ }
- gf_log (GF_MNT, GF_LOG_DEBUG, "Unmounting: dir %s, host: %s",
- me->exname, me->hostname);
- list_del (&me->mlist);
- GF_FREE (me);
- ret = 0;
-ret:
- return ret;
-}
+ ret = -1;
+ list_for_each_entry (me, &ms->mountlist, mlist) {
+ if ((strcmp (me->exname, dirpath) == 0) &&
+ (strcmp (me->hostname, hostname) == 0)) {
+ ret = 0;
+ break;
+ }
+ }
+ /* Need this check here because at the end of the search me
+ * might still be pointing to the last entry, which may not be
+ * the one we're looking for.
+ */
+ if (ret == -1) {/* Not found in list. */
+ gf_msg_trace (GF_MNT, 0, "Export not found");
+ goto out_unlock;
+ }
+ if (!me)
+ goto out_unlock;
-int
-mnt3svc_umount (struct mount3_state *ms, char *dirpath, char *hostname)
-{
- int ret = -1;
- if ((!ms) || (!dirpath) || (!hostname))
- return -1;
+ gf_msg_debug (GF_MNT, 0, "Unmounting: dir %s, host: %s",
+ me->exname, me->hostname);
- LOCK (&ms->mountlock);
- {
- ret = __mnt3svc_umount (ms, dirpath, hostname);
+ list_del (&me->mlist);
+ GF_FREE (me);
+
+ if (update_rmtab)
+ __mount_rewrite_rmtab (ms, sh);
}
+out_unlock:
UNLOCK (&ms->mountlock);
+ if (update_rmtab)
+ gf_store_unlock (sh);
+
+out_free:
+ if (update_rmtab)
+ gf_store_handle_destroy (sh);
+
return ret;
}
@@ -578,6 +2469,7 @@ mnt3svc_umnt (rpcsvc_request_t *req)
int ret = -1;
struct mount3_state *ms = NULL;
mountstat3 mstat = MNT3_OK;
+ char *colon = NULL;
if (!req)
return -1;
@@ -585,56 +2477,47 @@ mnt3svc_umnt (rpcsvc_request_t *req)
/* Remove the mount point from the exports list. */
pvec.iov_base = dirpath;
pvec.iov_len = MNTPATHLEN;
- ret = xdr_to_mountpath (pvec, req->msg);;
+ ret = xdr_to_mountpath (pvec, req->msg[0]);
if (ret == -1) {
- gf_log (GF_MNT, GF_LOG_ERROR, "Failed decode args");
+ gf_msg (GF_MNT, GF_LOG_ERROR, 0, NFS_MSG_ARGS_DECODE_ERROR,
+ "Failed decode args");
rpcsvc_request_seterr (req, GARBAGE_ARGS);
goto rpcerr;
}
ms = (struct mount3_state *)rpcsvc_request_program_private (req);
if (!ms) {
- gf_log (GF_MNT, GF_LOG_ERROR, "Mount state not present");
+ gf_msg (GF_MNT, GF_LOG_ERROR, EINVAL,
+ NFS_MSG_MNT_STATE_NOT_FOUND, "Mount state not present");
rpcsvc_request_seterr (req, SYSTEM_ERR);
ret = -1;
goto rpcerr;
}
- ret = rpcsvc_conn_peername (req->conn, hostname, MNTPATHLEN);
+ ret = rpcsvc_transport_peername (req->trans, hostname, MNTPATHLEN);
if (ret != 0) {
- gf_log (GF_MNT, GF_LOG_ERROR, "Failed to get remote name: %s",
- gai_strerror (ret));
- goto try_umount_with_addr;
- }
-
- gf_log (GF_MNT, GF_LOG_DEBUG, "dirpath: %s, hostname: %s", dirpath,
- hostname);
- ret = mnt3svc_umount (ms, dirpath, hostname);
-
- /* Unmount succeeded with the given hostname. */
- if (ret == 0)
- goto snd_reply;
-
-try_umount_with_addr:
- if (ret != 0)
- ret = rpcsvc_conn_peeraddr (req->conn, hostname, MNTPATHLEN,
- NULL, 0);
-
- if (ret != 0) {
- gf_log (GF_MNT, GF_LOG_ERROR, "Failed to get remote addr: %s",
- gai_strerror (ret));
- rpcsvc_request_seterr (req, SYSTEM_ERR);
+ gf_msg (GF_MNT, GF_LOG_ERROR, ENOENT,
+ NFS_MSG_GET_REMOTE_NAME_FAIL,
+ "Failed to get remote name: %s", gai_strerror (ret));
goto rpcerr;
}
- gf_log (GF_MNT, GF_LOG_DEBUG, "dirpath: %s, hostname: %s", dirpath,
- hostname);
+ colon = strrchr (hostname, ':');
+ if (colon) {
+ *colon= '\0';
+ }
+ gf_path_strip_trailing_slashes (dirpath);
+ gf_msg_debug (GF_MNT, 0, "dirpath: %s, hostname: %s", dirpath,
+ hostname);
ret = mnt3svc_umount (ms, dirpath, hostname);
- if (ret == -1)
- mstat = MNT3ERR_INVAL;
- ret = 0;
-snd_reply:
+ if (ret == -1) {
+ ret = 0;
+ mstat = MNT3ERR_NOENT;
+ }
+ /* FIXME: also take care of the corner case where the
+ * client was resolvable at mount but not at the umount - vice-versa.
+ */
mnt3svc_submit_reply (req, &mstat,
(mnt3_serializer)xdr_serialize_mountstat3);
@@ -647,15 +2530,22 @@ int
__mnt3svc_umountall (struct mount3_state *ms)
{
struct mountentry *me = NULL;
+ struct mountentry *tmp = NULL;
if (!ms)
return -1;
- list_for_each_entry (me, &ms->mountlist, mlist) {
- list_del (&me->mlist);
+ if (list_empty (&ms->mountlist))
+ return 0;
+
+ list_for_each_entry_safe (me, tmp, &ms->mountlist, mlist) {
+ list_del (&me->mlist); /* Remove from the mount list */
+ __mountdict_remove (ms, me); /* Remove from the mount dict */
GF_FREE (me);
}
+ dict_unref (ms->mountdict);
+
return 0;
}
@@ -680,18 +2570,18 @@ mnt3svc_umountall (struct mount3_state *ms)
int
mnt3svc_umntall (rpcsvc_request_t *req)
{
- int ret = -1;
+ int ret = RPCSVC_ACTOR_ERROR;
struct mount3_state *ms = NULL;
mountstat3 mstat = MNT3_OK;
if (!req)
- return -1;
+ return ret;
ms = (struct mount3_state *)rpcsvc_request_program_private (req);
if (!ms) {
- gf_log (GF_MNT, GF_LOG_ERROR, "Mount state not present");
+ gf_msg (GF_MNT, GF_LOG_ERROR, EINVAL,
+ NFS_MSG_MNT_STATE_NOT_FOUND, "Mount state not present");
rpcsvc_request_seterr (req, SYSTEM_ERR);
- ret = -1;
goto rpcerr;
}
@@ -699,13 +2589,14 @@ mnt3svc_umntall (rpcsvc_request_t *req)
mnt3svc_submit_reply (req, &mstat,
(mnt3_serializer)xdr_serialize_mountstat3);
+ ret = RPCSVC_ACTOR_SUCCESS;
rpcerr:
return ret;
}
exports
-mnt3_xlchildren_to_exports (rpcsvc_t *svc, xlator_list_t *cl)
+mnt3_xlchildren_to_exports (rpcsvc_t *svc, struct mount3_state *ms)
{
struct exportnode *elist = NULL;
struct exportnode *prev = NULL;
@@ -713,60 +2604,123 @@ mnt3_xlchildren_to_exports (rpcsvc_t *svc, xlator_list_t *cl)
size_t namelen = 0;
int ret = -1;
char *addrstr = NULL;
+ struct mnt3_export *ent = NULL;
+ struct nfs_state *nfs = NULL;
- if ((!cl) || (!svc))
+ if ((!ms) || (!svc))
return NULL;
- while (cl) {
- namelen = strlen (cl->xlator->name);
+ nfs = (struct nfs_state *)ms->nfsx->private;
+ if (!nfs)
+ return NULL;
+
+ LOCK (&ms->mountlock);
+ list_for_each_entry(ent, &ms->exportlist, explist) {
+
+ /* If volume is not started yet, do not list it for tools like
+ * showmount.
+ */
+ if (!nfs_subvolume_started (nfs, ent->vol))
+ continue;
+
+ namelen = strlen (ent->expname) + 1;
elist = GF_CALLOC (1, sizeof (*elist), gf_nfs_mt_exportnode);
if (!elist) {
- gf_log (GF_MNT, GF_LOG_ERROR, "Memory allocation"
- " failed");
+ gf_msg (GF_MNT, GF_LOG_ERROR, ENOMEM,
+ NFS_MSG_NO_MEMORY, "Memory allocation failed");
goto free_list;
}
-
+ if (!first)
+ first = elist;
elist->ex_dir = GF_CALLOC (namelen + 2, sizeof (char),
gf_nfs_mt_char);
if (!elist->ex_dir) {
- gf_log (GF_MNT, GF_LOG_ERROR, "Memory allocation"
- " failed");
+ gf_msg (GF_MNT, GF_LOG_ERROR, ENOMEM,
+ NFS_MSG_NO_MEMORY, "Memory allocation failed");
goto free_list;
}
+ strcpy (elist->ex_dir, ent->expname);
+
+ addrstr = rpcsvc_volume_allowed (svc->options, ent->vol->name);
+ if (addrstr) {
+ /* create a groupnode per allowed client */
+ char *pos = NULL;
+ char *addr = NULL;
+ char *addrs = NULL;
+ struct groupnode *group = NULL;
+ struct groupnode *prev_group = NULL;
+
+ /* strtok_r() modifies the string, dup it */
+ addrs = gf_strdup (addrstr);
+ if (!addrs)
+ goto free_list;
+
+ while (1) {
+ /* only pass addrs on the 1st call */
+ addr = strtok_r (group ? NULL : addrs, ",",
+ &pos);
+ if (addr == NULL)
+ /* no mode clients */
+ break;
+
+ group = GF_CALLOC (1, sizeof (struct groupnode),
+ gf_nfs_mt_groupnode);
+ if (!group) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, ENOMEM,
+ NFS_MSG_NO_MEMORY, "Memory "
+ "allocation failed");
+ GF_FREE (addrs);
+ goto free_list;
+ }
+
+ group->gr_name = gf_strdup (addr);
+ if (!group->gr_name) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, ENOMEM,
+ NFS_MSG_NO_MEMORY, "Memory "
+ "allocation failed");
+ GF_FREE (group);
+ GF_FREE (addrs);
+ goto free_list;
+ }
+
+ /* chain the groups together */
+ if (!elist->ex_groups)
+ elist->ex_groups = group;
+ else
+ prev_group->gr_next = group;
+ prev_group = group;
+ }
+
+ GF_FREE (addrs);
+ } else {
+ elist->ex_groups = GF_CALLOC (1,
+ sizeof (struct groupnode),
+ gf_nfs_mt_groupnode);
+ if (!elist->ex_groups) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, ENOMEM,
+ NFS_MSG_NO_MEMORY, "Memory allocation "
+ "failed");
+ goto free_list;
+ }
- strcpy (elist->ex_dir, "/");
- strcat (elist->ex_dir, cl->xlator->name);
-
- addrstr = rpcsvc_volume_allowed (svc->options,cl->xlator->name);
- if (addrstr)
- addrstr = gf_strdup (addrstr);
- else
addrstr = gf_strdup ("No Access");
+ if (!addrstr)
+ goto free_list;
- elist->ex_groups = GF_CALLOC (1, sizeof (struct groupnode),
- gf_nfs_mt_groupnode);
- if (!elist->ex_groups) {
- gf_log (GF_MNT, GF_LOG_ERROR, "Memory allocation"
- " failed");
- goto free_list;
+ elist->ex_groups->gr_name = addrstr;
}
- elist->ex_groups->gr_name = addrstr;
if (prev) {
prev->ex_next = elist;
prev = elist;
} else
prev = elist;
-
- if (!first)
- first = elist;
-
- cl = cl->next;
}
ret = 0;
free_list:
+ UNLOCK (&ms->mountlock);
if (ret == -1) {
xdr_free_exports_list (first);
first = NULL;
@@ -788,19 +2742,24 @@ mnt3svc_export (rpcsvc_request_t *req)
ms = (struct mount3_state *)rpcsvc_request_program_private (req);
if (!ms) {
- gf_log (GF_MNT, GF_LOG_ERROR, "mount state not found");
+ gf_msg (GF_MNT, GF_LOG_ERROR, EINVAL,
+ NFS_MSG_MNT_STATE_NOT_FOUND, "mount state not found");
rpcsvc_request_seterr (req, SYSTEM_ERR);
goto err;
}
/* Using the children translator names, build the export list */
elist = mnt3_xlchildren_to_exports (rpcsvc_request_service (req),
- ms->nfsx->children);
+ ms);
+ /* Do not return error when exports list is empty. An exports list can
+ * be empty when no subvolumes have come up. No point returning error
+ * and confusing the user.
if (!elist) {
gf_log (GF_MNT, GF_LOG_ERROR, "Failed to build exports list");
- rpcsvc_request_seterr (req, SYSTEM_ERR);
+ nfs_rpcsvc_request_seterr (req, SYSTEM_ERR);
goto err;
}
+ */
/* Note how the serializer is passed to the generic reply function. */
mnt3svc_submit_reply (req, &elist,
@@ -813,72 +2772,1272 @@ err:
}
+/*
+ * __mnt3udp_get_mstate() Fetches mount3_state from xlator
+ * Linkage: Static
+ * Usage: Used only for UDP MOUNT codepath
+ */
+static struct mount3_state *
+__mnt3udp_get_mstate (xlator_t *nfsx)
+{
+ struct nfs_state *nfs = NULL;
+ struct mount3_state *ms = NULL;
+
+ if (nfsx == NULL)
+ return NULL;
+
+ nfs = (struct nfs_state *)nfsx->private;
+ if (nfs == NULL)
+ return NULL;
+
+ ms = (struct mount3_state *)nfs->mstate;
+ return ms;
+}
+
+extern int
+glfs_resolve_at (struct glfs *, xlator_t *, inode_t *,
+ const char *, loc_t *, struct iatt *, int, int);
+
+extern struct glfs *
+glfs_new_from_ctx (glusterfs_ctx_t *);
+
+extern void
+glfs_free_from_ctx (struct glfs *);
+
+static inode_t *
+__mnt3udp_get_export_subdir_inode (struct svc_req *req, char *subdir,
+ char *expname, /* OUT */
+ struct mnt3_export *exp)
+{
+ inode_t *inode = NULL;
+ loc_t loc = {0, };
+ struct iatt buf = {0, };
+ int ret = -1;
+ glfs_t *fs = NULL;
+
+ if ((!req) || (!subdir) || (!expname) || (!exp))
+ return NULL;
+
+ /* AUTH check for subdir i.e. nfs.export-dir */
+ if (exp->hostspec) {
+ struct sockaddr_in *sin = svc_getcaller (req->rq_xprt);
+ ret = mnt3_verify_auth (sin, exp);
+ if (ret) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, EACCES,
+ NFS_MSG_AUTH_VERIFY_FAILED,
+ "AUTH(nfs.export-dir) verification failed");
+ errno = EACCES;
+ return NULL;
+ }
+ }
+
+ /*
+ * IMP: glfs_t fs object is not used by glfs_resolve_at (). The main
+ * purpose is to not change the ABI of glfs_resolve_at () and not to
+ * pass a NULL object.
+ *
+ * TODO: Instead of linking against libgfapi.so, just for one API
+ * i.e. glfs_resolve_at(), It would be cleaner if PATH name to
+ * inode resolution code can be moved to libglusterfs.so or so.
+ * refer bugzilla for more details :
+ * https://bugzilla.redhat.com/show_bug.cgi?id=1161573
+ */
+ fs = glfs_new_from_ctx (exp->vol->ctx);
+ if (!fs)
+ return NULL;
+
+ ret = glfs_resolve_at (fs, exp->vol, NULL, subdir,
+ &loc, &buf, 1 /* Follow link */,
+ 0 /* Hard lookup */);
+
+ glfs_free_from_ctx (fs);
+
+ if (ret != 0) {
+ loc_wipe (&loc);
+ return NULL;
+ }
+
+ inode = inode_ref (loc.inode);
+ snprintf (expname, PATH_MAX, "/%s%s", exp->vol->name, loc.path);
+
+ loc_wipe (&loc);
+
+ return inode;
+}
+
+static inode_t *
+__mnt3udp_get_export_volume_inode (struct svc_req *req, char *volpath,
+ char *expname, /* OUT */
+ struct mnt3_export *exp)
+{
+ char *rpath = NULL;
+ inode_t *inode = NULL;
+
+ if ((!req) || (!volpath) || (!expname) || (!exp))
+ return NULL;
+
+ rpath = strchr (volpath, '/');
+ if (rpath == NULL)
+ rpath = "/";
+
+ inode = inode_from_path (exp->vol->itable, rpath);
+ snprintf (expname, PATH_MAX, "/%s", exp->vol->name);
+
+ return inode;
+}
+
+/*
+ * nfs3_rootfh() is used for NFS MOUNT over UDP i.e. mountudpproc3_mnt_3_svc().
+ * Especially in mount3udp_thread() THREAD. Gluster NFS starts this thread
+ * when nfs.mount-udp is ENABLED (set to TRUE/ON).
+ */
+struct nfs3_fh *
+nfs3_rootfh (struct svc_req *req, xlator_t *nfsx,
+ char *path, char *expname /* OUT */)
+{
+ struct nfs3_fh *fh = NULL;
+ inode_t *inode = NULL;
+ struct mnt3_export *exp = NULL;
+ struct mount3_state *ms = NULL;
+ struct nfs_state *nfs = NULL;
+ int mnt3type = MNT3_EXPTYPE_DIR;
+ int ret = RPCSVC_AUTH_REJECT;
+
+ if ((!req) || (!nfsx) || (!path) || (!expname)) {
+ errno = EFAULT;
+ return NULL;
+ }
+
+ /*
+ * 1. First check if the MOUNT is for whole volume.
+ * i.e. __mnt3udp_get_export_volume_inode ()
+ * 2. If NOT, then TRY for SUBDIR MOUNT.
+ * i.e. __mnt3udp_get_export_subdir_inode ()
+ * 3. If a subdir is exported using nfs.export-dir,
+ * then the mount type would be MNT3_EXPTYPE_DIR,
+ * so make sure to find the proper path to be
+ * resolved using __volume_subdir()
+ * 3. Make sure subdir export is allowed.
+ */
+ ms = __mnt3udp_get_mstate(nfsx);
+ if (!ms) {
+ errno = EFAULT;
+ return NULL;
+ }
+
+ exp = mnt3_mntpath_to_export (ms, path , _gf_false);
+ if (exp != NULL)
+ mnt3type = exp->exptype;
+
+ if (mnt3type == MNT3_EXPTYPE_DIR) {
+ char volname [MNTPATHLEN] = {0, };
+ char *volptr = volname;
+
+ /* Subdir export (nfs3.export-dirs) check */
+ if (!gf_mnt3_export_dirs(ms)) {
+ errno = EACCES;
+ return NULL;
+ }
+
+ path = __volume_subdir (path, &volptr);
+ if (exp == NULL)
+ exp = mnt3_mntpath_to_export (ms, volname , _gf_false);
+ }
+
+ if (exp == NULL) {
+ errno = ENOENT;
+ return NULL;
+ }
+
+ nfs = (struct nfs_state *)nfsx->private;
+ if (!nfs_subvolume_started (nfs, exp->vol)) {
+ errno = ENOENT;
+ return NULL;
+ }
+
+ /* AUTH check: respect nfs.rpc-auth-allow/reject */
+ ret = mnt3_check_client_net_udp (req, exp->vol->name, nfsx);
+ if (ret == RPCSVC_AUTH_REJECT) {
+ errno = EACCES;
+ return NULL;
+ }
+
+ switch (mnt3type) {
+
+ case MNT3_EXPTYPE_VOLUME:
+ inode = __mnt3udp_get_export_volume_inode (req, path,
+ expname, exp);
+ break;
+
+ case MNT3_EXPTYPE_DIR:
+ inode = __mnt3udp_get_export_subdir_inode (req, path,
+ expname, exp);
+ break;
+
+ default:
+ /* Never reachable */
+ gf_msg (GF_MNT, GF_LOG_ERROR, EFAULT, NFS_MSG_UNKNOWN_MNT_TYPE,
+ "Unknown MOUNT3 type");
+ errno = EFAULT;
+ goto err;
+ }
+
+ if (inode == NULL) {
+ /* Don't over-write errno */
+ if (!errno)
+ errno = ENOENT;
+ goto err;
+ }
+
+ /* Build the inode from FH */
+ fh = GF_CALLOC (1, sizeof(*fh), gf_nfs_mt_nfs3_fh);
+ if (fh == NULL) {
+ errno = ENOMEM;
+ goto err;
+ }
+
+ (void) nfs3_build_fh (inode, exp->volumeid, fh);
+
+err:
+ if (inode)
+ inode_unref (inode);
+
+ return fh;
+}
+
+int
+mount3udp_add_mountlist (xlator_t *nfsx, char *host, char *export)
+{
+ struct mountentry *me = NULL;
+ struct mount3_state *ms = NULL;
+
+ if ((!host) || (!export) || (!nfsx))
+ return -1;
+
+ ms = __mnt3udp_get_mstate (nfsx);
+ if (!ms)
+ return -1;
+
+ me = GF_CALLOC (1, sizeof (*me), gf_nfs_mt_mountentry);
+ if (!me)
+ return -1;
+
+ strncpy (me->exname, export, MNTPATHLEN);
+ strncpy (me->hostname, host, MNTPATHLEN);
+ INIT_LIST_HEAD (&me->mlist);
+ LOCK (&ms->mountlock);
+ {
+ list_add_tail (&me->mlist, &ms->mountlist);
+ mount_rewrite_rmtab(ms, NULL);
+ }
+ UNLOCK (&ms->mountlock);
+ return 0;
+}
+
+int
+mount3udp_delete_mountlist (xlator_t *nfsx, char *hostname, char *export)
+{
+ struct mount3_state *ms = NULL;
+
+ if ((!hostname) || (!export) || (!nfsx))
+ return -1;
+
+ ms = __mnt3udp_get_mstate (nfsx);
+ if (!ms)
+ return -1;
+
+ mnt3svc_umount (ms, export, hostname);
+ return 0;
+}
+
+/**
+ * This function will parse the hostip (IP address, IP range, or hostname)
+ * and fill the host_auth_spec structure.
+ *
+ * @param hostspec - struct host_auth_spec
+ * @param hostip - IP address, IP range (CIDR format) or hostname
+ *
+ * @return 0 - on success and -1 on failure
+ *
+ * NB: This does not support IPv6 currently.
+ */
+int
+mnt3_export_fill_hostspec (struct host_auth_spec* hostspec, const char* hostip)
+{
+ char *ipdupstr = NULL;
+ char *savptr = NULL;
+ char *endptr = NULL;
+ char *ip = NULL;
+ char *token = NULL;
+ int ret = -1;
+ long prefixlen = IPv4_ADDR_SIZE; /* default */
+ uint32_t shiftbits = 0;
+ size_t length = 0;
+
+ /* Create copy of the string so that the source won't change
+ */
+ ipdupstr = gf_strdup (hostip);
+ if (NULL == ipdupstr) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Memory allocation failed");
+ goto err;
+ }
+
+ ip = strtok_r (ipdupstr, "/", &savptr);
+ /* Validate the Hostname or IPv4 address
+ * TODO: IPv6 support for subdir auth.
+ */
+ length = strlen (ip);
+ if ((!valid_ipv4_address (ip, (int)length, _gf_false)) &&
+ (!valid_host_name (ip, (int)length))) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, EINVAL, NFS_MSG_INVALID_ENTRY,
+ "Invalid hostname or IPv4 address: %s", ip);
+ goto err;
+ }
+
+ hostspec->host_addr = gf_strdup (ip);
+ if (NULL == hostspec->host_addr) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Memory allocation failed");
+ goto err;
+ }
+
+ /**
+ * User provided CIDR address (xx.xx.xx.xx/n format) is split
+ * into HOST (IP addr or hostname) and network prefix(n) from
+ * which netmask would be calculated. This CIDR address may
+ * denote a single, distinct interface address or the beginning
+ * address of an entire network.
+ *
+ * e.g. the IPv4 block 192.168.100.0/24 represents the 256
+ * IPv4 addresses from 192.168.100.0 to 192.168.100.255.
+ * Therefore to check if an IP matches 192.168.100.0/24
+ * we should mask the IP with FFFFFF00 and compare it with
+ * host address part of CIDR.
+ *
+ * Refer: mask_match() in common-utils.c.
+ */
+ token = strtok_r (NULL, "/", &savptr);
+ if (token != NULL) {
+ prefixlen = strtol (token, &endptr, 10);
+ if ((errno != 0) || (*endptr != '\0') ||
+ (prefixlen < 0) || (prefixlen > IPv4_ADDR_SIZE)) {
+ gf_msg (THIS->name, GF_LOG_WARNING, EINVAL,
+ NFS_MSG_INVALID_ENTRY,
+ "Invalid IPv4 subnetwork mask");
+ goto err;
+ }
+ }
+
+ /*
+ * 1. Calculate the network mask address.
+ * 2. Convert it into Big-Endian format.
+ * 3. Store it in hostspec netmask.
+ */
+ shiftbits = IPv4_ADDR_SIZE - prefixlen;
+ hostspec->netmask = htonl ((uint32_t)~0 << shiftbits);
+
+ ret = 0; /* SUCCESS */
+err:
+ if (NULL != ipdupstr) {
+ GF_FREE (ipdupstr);
+ }
+ return ret;
+}
+
+
+/**
+ * This function will parse the AUTH parameter passed along with
+ * "export-dir" option. If AUTH parameter is present then it will be
+ * stripped from exportpath and stored in mnt3_export (exp) structure.
+ *
+ * @param exp - mnt3_export structure. Holds information needed for mount.
+ * @param exportpath - Value of "export-dir" key. Holds both export path
+ * and AUTH parameter for the path.
+ * exportpath format: <abspath>[(hostdesc[|hostspec|...])]
+ *
+ * @return This function will return 0 on success and -1 on failure.
+ */
+int
+mnt3_export_parse_auth_param (struct mnt3_export* exp, char* exportpath)
+{
+ char *token = NULL;
+ char *savPtr = NULL;
+ char *hostip = NULL;
+ struct host_auth_spec *host = NULL;
+ int ret = 0;
+
+ /* Using exportpath directly in strtok_r because we want
+ * to strip off AUTH parameter from exportpath. */
+ token = strtok_r (exportpath, "(", &savPtr);
+
+ /* Get the next token, which will be the AUTH parameter. */
+ token = strtok_r (NULL, ")", &savPtr);
+
+ if (NULL == token) {
+ /* If AUTH is not present then we should return success. */
+ return 0;
+ }
+
+ /* Free any previously allocated hostspec structure. */
+ if (NULL != exp->hostspec) {
+ GF_FREE (exp->hostspec);
+ exp->hostspec = NULL;
+ }
+
+ exp->hostspec = GF_CALLOC (1,
+ sizeof (*(exp->hostspec)),
+ gf_nfs_mt_auth_spec);
+ if (NULL == exp->hostspec){
+ gf_msg (GF_MNT, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Memory allocation failed");
+ return -1;
+ }
+
+ /* AUTH parameter can have multiple entries. For each entry
+ * a host_auth_spec structure is created. */
+ host = exp->hostspec;
+
+ hostip = strtok_r (token, "|", &savPtr);
+
+ /* Parse all AUTH parameters separated by '|' */
+ while (NULL != hostip){
+ ret = mnt3_export_fill_hostspec (host, hostip);
+ if (0 != ret) {
+ gf_msg (GF_MNT, GF_LOG_WARNING, 0,
+ NFS_MSG_PARSE_HOSTSPEC_FAIL,
+ "Failed to parse hostspec: %s", hostip);
+ goto err;
+ }
+
+ hostip = strtok_r (NULL, "|", &savPtr);
+ if (NULL == hostip) {
+ break;
+ }
+
+ host->next = GF_CALLOC (1, sizeof (*(host)),
+ gf_nfs_mt_auth_spec);
+ if (NULL == host->next){
+ gf_msg (GF_MNT, GF_LOG_ERROR, ENOMEM,
+ NFS_MSG_NO_MEMORY,
+ "Memory allocation failed");
+ goto err;
+ }
+ host = host->next;
+ }
+
+ /* In case of success return from here */
+ return 0;
+err:
+ /* In case of failure free up hostspec structure. */
+ FREE_HOSTSPEC (exp);
+
+ return -1;
+}
+
+/**
+ * exportpath will also have AUTH options (ip address, subnet address or
+ * hostname) mentioned.
+ * exportpath format: <abspath>[(hostdesc[|hostspec|...])]
+ */
+struct mnt3_export *
+mnt3_init_export_ent (struct mount3_state *ms, xlator_t *xl, char *exportpath,
+ uuid_t volumeid)
+{
+ struct mnt3_export *exp = NULL;
+ int alloclen = 0;
+ int ret = -1;
+
+ if ((!ms) || (!xl))
+ return NULL;
+
+ exp = GF_CALLOC (1, sizeof (*exp), gf_nfs_mt_mnt3_export);
+ if (!exp) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Memory allocation failed");
+ return NULL;
+ }
+
+ if (NULL != exportpath) {
+ /* If exportpath is not NULL then we should check if AUTH
+ * parameter is present or not. If AUTH parameter is present
+ * then it will be stripped and stored in mnt3_export (exp)
+ * structure.
+ */
+ if (0 != mnt3_export_parse_auth_param (exp, exportpath)){
+ gf_msg (GF_MNT, GF_LOG_ERROR, 0,
+ NFS_MSG_PARSE_AUTH_PARAM_FAIL,
+ "Failed to parse auth param");
+ goto err;
+ }
+ }
+
+
+ INIT_LIST_HEAD (&exp->explist);
+ if (exportpath)
+ alloclen = strlen (xl->name) + 2 + strlen (exportpath);
+ else
+ alloclen = strlen (xl->name) + 2;
+
+ exp->expname = GF_CALLOC (alloclen, sizeof (char), gf_nfs_mt_char);
+ if (!exp->expname) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Memory allocation failed");
+ goto err;
+ }
+
+ if (exportpath) {
+ gf_msg_trace (GF_MNT, 0, "Initing dir export: %s:%s",
+ xl->name, exportpath);
+ exp->exptype = MNT3_EXPTYPE_DIR;
+ ret = snprintf (exp->expname, alloclen, "/%s%s", xl->name,
+ exportpath);
+ } else {
+ gf_msg_trace (GF_MNT, 0, "Initing volume export: %s",
+ xl->name);
+ exp->exptype = MNT3_EXPTYPE_VOLUME;
+ ret = snprintf (exp->expname, alloclen, "/%s", xl->name);
+ }
+ if (ret < 0) {
+ gf_msg (xl->name, GF_LOG_ERROR, ret, NFS_MSG_SET_EXP_FAIL,
+ "Failed to set the export name");
+ goto err;
+ }
+ /* Just copy without discrimination, we'll determine whether to
+ * actually use it when a mount request comes in and a file handle
+ * needs to be built.
+ */
+ gf_uuid_copy (exp->volumeid, volumeid);
+ exp->vol = xl;
+
+ /* On success we should return from here*/
+ return exp;
+err:
+ /* On failure free exp and it's members.*/
+ if (NULL != exp) {
+ mnt3_export_free (exp);
+ exp = NULL;
+ }
+
+ return exp;
+}
+
+
+int
+__mnt3_init_volume_direxports (struct mount3_state *ms, xlator_t *xlator,
+ char *optstr, uuid_t volumeid)
+{
+ struct mnt3_export *newexp = NULL;
+ int ret = -1;
+ char *savptr = NULL;
+ char *dupopt = NULL;
+ char *token = NULL;
+
+ if ((!ms) || (!xlator) || (!optstr))
+ return -1;
+
+ dupopt = strdupa (optstr);
+
+ token = strtok_r (dupopt, ",", &savptr);
+ while (token) {
+ newexp = mnt3_init_export_ent (ms, xlator, token, volumeid);
+ if (!newexp) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, 0,
+ NFS_MSG_INIT_DIR_EXP_FAIL, "Failed to init dir "
+ "export: %s", token);
+ ret = -1;
+ goto err;
+ }
+
+ list_add_tail (&newexp->explist, &ms->exportlist);
+ token = strtok_r (NULL, ",", &savptr);
+ }
+
+ ret = 0;
+err:
+ return ret;
+}
+
+
+int
+__mnt3_init_volume (struct mount3_state *ms, dict_t *opts, xlator_t *xlator)
+{
+ struct mnt3_export *newexp = NULL;
+ int ret = -1;
+ char searchstr[1024];
+ char *optstr = NULL;
+ uuid_t volumeid = {0, };
+
+ if ((!ms) || (!xlator) || (!opts))
+ return -1;
+
+ gf_uuid_clear (volumeid);
+ if (gf_nfs_dvm_off (nfs_state (ms->nfsx)))
+ goto no_dvm;
+
+ ret = snprintf (searchstr, 1024, "nfs3.%s.volume-id", xlator->name);
+ if (ret < 0) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, ret, NFS_MSG_SNPRINTF_FAIL,
+ "snprintf failed");
+ ret = -1;
+ goto err;
+ }
+
+ if (dict_get (opts, searchstr)) {
+ ret = dict_get_str (opts, searchstr, &optstr);
+ if (ret < 0) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, ret,
+ NFS_MSG_DICT_GET_FAILED, "Failed to read "
+ "option: %s", searchstr);
+ ret = -1;
+ goto err;
+ }
+ } else {
+ gf_msg (GF_MNT, GF_LOG_ERROR, 0, NFS_MSG_VOLID_MISSING,
+ "DVM is on but volume-id not "
+ "given for volume: %s", xlator->name);
+ ret = -1;
+ goto err;
+ }
+
+ if (optstr) {
+ ret = gf_uuid_parse (optstr, volumeid);
+ if (ret < 0) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, ret,
+ NFS_MSG_PARSE_VOL_UUID_FAIL, "Failed to parse "
+ "volume UUID");
+ ret = -1;
+ goto err;
+ }
+ }
+
+no_dvm:
+ ret = snprintf (searchstr, 1024, "nfs3.%s.export-dir", xlator->name);
+ if (ret < 0) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, ret, NFS_MSG_SNPRINTF_FAIL,
+ "snprintf failed");
+ ret = -1;
+ goto err;
+ }
+
+ if (dict_get (opts, searchstr)) {
+ ret = dict_get_str (opts, searchstr, &optstr);
+ if (ret < 0) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, ret,
+ NFS_MSG_DICT_GET_FAILED, "Failed to read "
+ "option: %s", searchstr);
+ ret = -1;
+ goto err;
+ }
+
+ ret = __mnt3_init_volume_direxports (ms, xlator, optstr,
+ volumeid);
+ if (ret == -1) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, 0,
+ NFS_MSG_DIR_EXP_SETUP_FAIL, "Dir export "
+ "setup failed for volume: %s", xlator->name);
+ goto err;
+ }
+ }
+
+ if (ms->export_volumes) {
+ newexp = mnt3_init_export_ent (ms, xlator, NULL, volumeid);
+ if (!newexp) {
+ ret = -1;
+ goto err;
+ }
+
+ list_add_tail (&newexp->explist, &ms->exportlist);
+ }
+ ret = 0;
+
+
+err:
+ return ret;
+}
+
+
+int
+__mnt3_init_volume_export (struct mount3_state *ms, dict_t *opts)
+{
+ int ret = -1;
+ char *optstr = NULL;
+ /* On by default. */
+ gf_boolean_t boolt = _gf_true;
+
+ if ((!ms) || (!opts))
+ return -1;
+
+ if (!dict_get (opts, "nfs3.export-volumes")) {
+ ret = 0;
+ goto err;
+ }
+
+ ret = dict_get_str (opts, "nfs3.export-volumes", &optstr);
+ if (ret < 0) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, ret, NFS_MSG_DICT_GET_FAILED,
+ "Failed to read option: nfs3.export-volumes");
+ ret = -1;
+ goto err;
+ }
+
+ ret = gf_string2boolean (optstr, &boolt);
+ if (ret < 0) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, ret, NFS_MSG_STR2BOOL_FAIL,
+ "Failed to convert string to boolean");
+ }
+
+err:
+ if (boolt == _gf_false) {
+ gf_msg_trace (GF_MNT, 0, "Volume exports disabled");
+ ms->export_volumes = 0;
+ } else {
+ gf_msg_trace (GF_MNT, 0, "Volume exports enabled");
+ ms->export_volumes = 1;
+ }
+
+ return ret;
+}
+
+
+int
+__mnt3_init_dir_export (struct mount3_state *ms, dict_t *opts)
+{
+ int ret = -1;
+ char *optstr = NULL;
+ /* On by default. */
+ gf_boolean_t boolt = _gf_true;
+
+ if ((!ms) || (!opts))
+ return -1;
+
+ if (!dict_get (opts, "nfs3.export-dirs")) {
+ ret = 0;
+ goto err;
+ }
+
+ ret = dict_get_str (opts, "nfs3.export-dirs", &optstr);
+ if (ret < 0) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, ret, NFS_MSG_DICT_GET_FAILED,
+ "Failed to read option: nfs3.export-dirs");
+ ret = -1;
+ goto err;
+ }
+
+ ret = gf_string2boolean (optstr, &boolt);
+ if (ret < 0) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, ret, NFS_MSG_STR2BOOL_FAIL,
+ "Failed to convert string to boolean");
+ }
+
+err:
+ if (boolt == _gf_false) {
+ gf_msg_trace (GF_MNT, 0, "Dir exports disabled");
+ ms->export_dirs = 0;
+ } else {
+ gf_msg_trace (GF_MNT, 0, "Dir exports enabled");
+ ms->export_dirs = 1;
+ }
+
+ return ret;
+}
+
+
+int
+mnt3_init_options (struct mount3_state *ms, dict_t *options)
+{
+ xlator_list_t *volentry = NULL;
+ int ret = -1;
+
+ if ((!ms) || (!options))
+ return -1;
+
+ __mnt3_init_volume_export (ms, options);
+ __mnt3_init_dir_export (ms, options);
+ volentry = ms->nfsx->children;
+ while (volentry) {
+ gf_msg_trace (GF_MNT, 0, "Initing options for: %s",
+ volentry->xlator->name);
+ ret = __mnt3_init_volume (ms, options, volentry->xlator);
+ if (ret < 0) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, ret,
+ NFS_MSG_VOL_INIT_FAIL,
+ "Volume init failed");
+ goto err;
+ }
+
+ volentry = volentry->next;
+ }
+
+
+ ret = 0;
+err:
+ return ret;
+}
+
struct mount3_state *
mnt3_init_state (xlator_t *nfsx)
{
struct mount3_state *ms = NULL;
+ int ret = -1;
if (!nfsx)
return NULL;
ms = GF_CALLOC (1, sizeof (*ms), gf_nfs_mt_mount3_state);
if (!ms) {
- gf_log (GF_MNT, GF_LOG_ERROR, "Memory allocation failed");
+ gf_msg (GF_MNT, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Memory allocation failed");
return NULL;
}
ms->iobpool = nfsx->ctx->iobuf_pool;
ms->nfsx = nfsx;
- ms->exports = nfsx->children;
+ INIT_LIST_HEAD (&ms->exportlist);
+ ret = mnt3_init_options (ms, nfsx->options);
+ if (ret < 0) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, ret, NFS_MSG_OPT_INIT_FAIL,
+ "Options init failed");
+ return NULL;
+ }
+
INIT_LIST_HEAD (&ms->mountlist);
LOCK_INIT (&ms->mountlock);
return ms;
}
+int
+mount_init_state (xlator_t *nfsx)
+{
+ int ret = -1;
+ struct nfs_state *nfs = NULL;
+
+ if (!nfsx)
+ goto out;
+
+ nfs = (struct nfs_state *)nfs_state (nfsx);
+ /*Maintaining global state for MOUNT1 and MOUNT3*/
+ nfs->mstate = mnt3_init_state (nfsx);
+ if (!nfs->mstate) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Failed to allocate mount state");
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
rpcsvc_actor_t mnt3svc_actors[MOUNT3_PROC_COUNT] = {
- {"NULL", MOUNT3_NULL, mnt3svc_null, NULL, NULL},
- {"MNT", MOUNT3_MNT, mnt3svc_mnt, NULL, NULL},
- {"DUMP", MOUNT3_DUMP, mnt3svc_dump, NULL, NULL},
- {"UMNT", MOUNT3_UMNT, mnt3svc_umnt, NULL, NULL},
- {"UMNTALL", MOUNT3_UMNTALL, mnt3svc_umntall, NULL, NULL},
- {"EXPORT", MOUNT3_EXPORT, mnt3svc_export, NULL, NULL}
+ {"NULL", MOUNT3_NULL, mnt3svc_null, NULL, 0, DRC_NA},
+ {"MNT", MOUNT3_MNT, mnt3svc_mnt, NULL, 0, DRC_NA},
+ {"DUMP", MOUNT3_DUMP, mnt3svc_dump, NULL, 0, DRC_NA},
+ {"UMNT", MOUNT3_UMNT, mnt3svc_umnt, NULL, 0, DRC_NA},
+ {"UMNTALL", MOUNT3_UMNTALL, mnt3svc_umntall, NULL, 0, DRC_NA},
+ {"EXPORT", MOUNT3_EXPORT, mnt3svc_export, NULL, 0, DRC_NA}
};
/* Static init parts are assigned here, dynamic ones are done in
* mnt3svc_init and mnt3_init_state.
+ * Making MOUNT3 a synctask so that the blocking DNS calls during rpc auth
+ * gets offloaded to syncenv, keeping the main/poll thread unblocked
*/
rpcsvc_program_t mnt3prog = {
.progname = "MOUNT3",
.prognum = MOUNT_PROGRAM,
.progver = MOUNT_V3,
.progport = GF_MOUNTV3_PORT,
- .progaddrfamily = AF_INET,
- .proghost = NULL,
.actors = mnt3svc_actors,
.numactors = MOUNT3_PROC_COUNT,
+ .min_auth = AUTH_NULL,
+ .synctask = _gf_true,
};
+/**
+ * __mnt3_mounted_exports_walk -- Walk through the mounted export directories
+ * and unmount the directories that are no
+ * longer authorized to be mounted.
+ * @dict: The dict to walk
+ * @key : The key we are on
+ * @val : The value associated with that key
+ * @tmp : Additional params (pointer to an auth params struct passed here)
+ *
+ */
+int
+__mnt3_mounted_exports_walk (dict_t *dict, char *key, data_t *val, void *tmp)
+{
+ char *path = NULL;
+ char *host_addr_ip = NULL;
+ char *keydup = NULL;
+ char *colon = NULL;
+ struct mnt3_auth_params *auth_params = NULL;
+ int auth_status_code = 0;
+
+ gf_msg_trace (GF_MNT, 0, "Checking if key %s is authorized.", key);
+
+ auth_params = (struct mnt3_auth_params *)tmp;
+
+ /* Since we haven't obtained a lock around the mount dict
+ * here, we want to duplicate the key and then process it.
+ * Otherwise we would potentially have a race condition
+ * by modifying the key in the dict when other threads
+ * are accessing it.
+ */
+ keydup = strdupa (key);
+
+ colon = strchr (keydup, ':');
+ if (!colon)
+ return 0;
+
+ *colon = '\0';
+
+ path = alloca (strlen (keydup) + 2);
+ snprintf (path, strlen (keydup) + 2, "/%s", keydup);
+
+ /* Host is one character after ':' */
+ host_addr_ip = colon + 1;
+ auth_status_code = mnt3_auth_host (auth_params, host_addr_ip, NULL,
+ path, _gf_false, NULL);
+ if (auth_status_code != 0) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, 0, NFS_MSG_AUTH_ERROR,
+ "%s is no longer authorized for %s",
+ host_addr_ip, path);
+ mnt3svc_umount (auth_params->ms, path, host_addr_ip);
+ }
+ return 0;
+}
+
+/**
+ * _mnt3_invalidate_old_mounts -- Calls __mnt3_mounted_exports_walk which checks
+ * checks if hosts are authorized to be mounted
+ * and umounts them.
+ *
+ * @ms: The mountstate for this service that holds all the information we need
+ *
+ */
+void
+_mnt3_invalidate_old_mounts (struct mount3_state *ms)
+{
+ gf_msg_debug (GF_MNT, 0, "Invalidating old mounts ...");
+ dict_foreach (ms->mountdict, __mnt3_mounted_exports_walk,
+ ms->auth_params);
+}
+
+
+/**
+ * _mnt3_has_file_changed -- Checks if a file has changed on disk
+ *
+ * @path: The path of the file on disk
+ * @oldmtime: The previous mtime of the file
+ *
+ * @return: file changed: TRUE
+ * otherwise : FALSE
+ *
+ * Uses get_file_mtime () in common-utils.c
+ */
+gf_boolean_t
+_mnt3_has_file_changed (const char *path, time_t *oldmtime)
+{
+ gf_boolean_t changed = _gf_false;
+ time_t mtime = {0};
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO (GF_MNT, path, out);
+ GF_VALIDATE_OR_GOTO (GF_MNT, oldmtime, out);
+
+ ret = get_file_mtime (path, &mtime);
+ if (ret < 0)
+ goto out;
+
+ if (mtime != *oldmtime) {
+ changed = _gf_true;
+ *oldmtime = mtime;
+ }
+out:
+ return changed;
+}
+
+/**
+ * _mnt_auth_param_refresh_thread - Started using pthread_create () in
+ * mnt3svc_init (). Reloads exports/netgroups
+ * files from disk and sets the auth params
+ * structure in the mount state to reflect
+ * any changes from disk.
+ * @argv: Unused argument
+ * @return: Always returns NULL
+ */
+void *
+_mnt3_auth_param_refresh_thread (void *argv)
+{
+ struct mount3_state *mstate = (struct mount3_state *)argv;
+ char *exp_file_path = NULL;
+ char *ng_file_path = NULL;
+ size_t nbytes = 0;
+ time_t exp_time = 0;
+ time_t ng_time = 0;
+ gf_boolean_t any_file_changed = _gf_false;
+ int ret = 0;
+
+ nbytes = strlen (exports_file_path) + 1;
+ exp_file_path = alloca (nbytes);
+ snprintf (exp_file_path, nbytes, "%s", exports_file_path);
+
+ nbytes = strlen (netgroups_file_path) + 1;
+ ng_file_path = alloca (nbytes);
+ snprintf (ng_file_path, nbytes, "%s", netgroups_file_path);
+
+ /* Set the initial timestamps to avoid reloading right after
+ * mnt3svc_init () spawns this thread */
+ get_file_mtime (exp_file_path, &exp_time);
+ get_file_mtime (ng_file_path, &ng_time);
+
+ while (_gf_true) {
+ if (mstate->stop_refresh)
+ break;
+ any_file_changed = _gf_false;
+
+ /* Sleep before checking the file again */
+ sleep (mstate->nfs->auth_refresh_time_secs);
+
+ if (_mnt3_has_file_changed (exp_file_path, &exp_time)) {
+ gf_msg (GF_MNT, GF_LOG_INFO, 0, NFS_MSG_UPDATING_EXP,
+ "File %s changed, updating exports,",
+ exp_file_path);
+
+ ret = mnt3_auth_set_exports_auth (mstate->auth_params,
+ exp_file_path);
+ if (ret)
+ gf_msg (GF_MNT, GF_LOG_ERROR, 0,
+ NFS_MSG_SET_EXP_AUTH_PARAM_FAIL,
+ "Failed to set export auth params.");
+ else
+ any_file_changed = _gf_true;
+ }
+
+ if (_mnt3_has_file_changed (ng_file_path, &ng_time)) {
+ gf_msg (GF_MNT, GF_LOG_INFO, 0,
+ NFS_MSG_UPDATING_NET_GRP, "File %s changed,"
+ "updating netgroups", ng_file_path);
+
+ ret = mnt3_auth_set_netgroups_auth (mstate->auth_params,
+ ng_file_path);
+ if (ret)
+ gf_msg (GF_MNT, GF_LOG_ERROR, 0,
+ NFS_MSG_SET_NET_GRP_FAIL,
+ "Failed to set netgroup auth params.");
+ else
+ any_file_changed = _gf_true;
+ }
+
+ /* If no files changed, go back to sleep */
+ if (!any_file_changed)
+ continue;
+
+ gf_msg (GF_MNT, GF_LOG_INFO, 0, NFS_MSG_PURGING_AUTH_CACHE,
+ "Purging auth cache.");
+ auth_cache_purge (mstate->authcache);
+
+ /* Walk through mounts that are no longer authorized
+ * and unmount them on the server side. This will
+ * cause subsequent file ops to fail with access denied.
+ */
+ _mnt3_invalidate_old_mounts (mstate);
+ }
+
+ return NULL;
+}
+
+/**
+ * _mnt3_init_auth_params -- Initialize authentication parameters by allocating
+ * the struct and setting the exports & netgroups
+ * files as parameters.
+ *
+ * @mstate : The mount state we are going to set the auth parameters in it.
+ *
+ * @return : success: 0 for success
+ * failure: -EINVAL for bad args, -ENOMEM for allocation errors, < 0
+ * for other errors (parsing the files, etc.) These are
+ * bubbled up from the functions we call to set the params.
+ */
+int
+_mnt3_init_auth_params (struct mount3_state *mstate)
+{
+ int ret = -EINVAL;
+ char *exp_file_path = NULL;
+ char *ng_file_path = NULL;
+ size_t nbytes = 0;
+
+ GF_VALIDATE_OR_GOTO (GF_MNT, mstate, out);
+
+ mstate->auth_params = mnt3_auth_params_init (mstate);
+ if (!mstate->auth_params) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Failed to init mount auth params.");
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ nbytes = strlen (exports_file_path) + 1;
+ exp_file_path = alloca (nbytes);
+ snprintf (exp_file_path, nbytes, "%s", exports_file_path);
+
+ ret = mnt3_auth_set_exports_auth (mstate->auth_params, exp_file_path);
+ if (ret < 0) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, ret,
+ NFS_MSG_SET_EXP_AUTH_PARAM_FAIL,
+ "Failed to set export auth params.");
+ goto out;
+ }
+
+ nbytes = strlen (netgroups_file_path) + 1;
+ ng_file_path = alloca (nbytes);
+ snprintf (ng_file_path, nbytes, "%s", netgroups_file_path);
+
+ ret = mnt3_auth_set_netgroups_auth (mstate->auth_params, ng_file_path);
+ if (ret < 0) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, ret,
+ NFS_MSG_SET_EXP_AUTH_PARAM_FAIL,
+ "Failed to set netgroup auth params.");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+/**
+ * mnt3svc_deinit -- Function called by the nfs translator to cleanup all state
+ *
+ * @nfsx : The NFS translator used to perform the cleanup
+ * This structure holds all the pointers to memory that we need to free
+ * as well as the threads that have been started.
+ */
+void
+mnt3svc_deinit (xlator_t *nfsx)
+{
+ struct mount3_state *mstate = NULL;
+ struct nfs_state *nfs = NULL;
+
+ if (!nfsx || !nfsx->private)
+ return;
+
+ nfs = (struct nfs_state *)nfsx->private;
+ mstate = (struct mount3_state *)nfs->mstate;
+
+ if (nfs->refresh_auth) {
+ /* Mark as true and wait for thread to exit */
+ mstate->stop_refresh = _gf_true;
+ pthread_join (mstate->auth_refresh_thread, NULL);
+ }
+
+ if (nfs->exports_auth)
+ mnt3_auth_params_deinit (mstate->auth_params);
+
+ /* Unmount everything and clear mountdict */
+ mnt3svc_umountall (mstate);
+}
rpcsvc_program_t *
mnt3svc_init (xlator_t *nfsx)
{
struct mount3_state *mstate = NULL;
+ struct nfs_state *nfs = NULL;
+ dict_t *options = NULL;
+ char *portstr = NULL;
+ int ret = -1;
+ pthread_t udp_thread;
- if (!nfsx)
+ if (!nfsx || !nfsx->private)
return NULL;
- gf_log (GF_MNT, GF_LOG_DEBUG, "Initing Mount v3 state");
- mstate = mnt3_init_state (nfsx);
+ nfs = (struct nfs_state *)nfsx->private;
+
+ gf_msg_debug (GF_MNT, 0, "Initing Mount v3 state");
+ mstate = (struct mount3_state *)nfs->mstate;
if (!mstate) {
- gf_log (GF_MNT, GF_LOG_ERROR, "Mount v3 state init failed");
+ gf_msg (GF_MNT, GF_LOG_ERROR, 0, NFS_MSG_MNT_STATE_INIT_FAIL,
+ "Mount v3 state init failed");
goto err;
}
+ mstate->nfs = nfs;
+
+ mstate->mountdict = dict_new ();
+ if (!mstate->mountdict) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Failed to setup mount dict. Allocation error.");
+ goto err;
+ }
+
+ if (nfs->exports_auth) {
+ ret = _mnt3_init_auth_params (mstate);
+ if (ret < 0)
+ goto err;
+
+ mstate->authcache = auth_cache_init (nfs->auth_cache_ttl_sec);
+ if (!mstate->authcache) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ mstate->stop_refresh = _gf_false; /* Allow thread to run */
+ pthread_create (&mstate->auth_refresh_thread, NULL,
+ _mnt3_auth_param_refresh_thread, mstate);
+ } else
+ gf_msg (GF_MNT, GF_LOG_INFO, 0, NFS_MSG_EXP_AUTH_DISABLED,
+ "Exports auth has been disabled!");
+
mnt3prog.private = mstate;
+ options = dict_new ();
+
+ ret = gf_asprintf (&portstr, "%d", GF_MOUNTV3_PORT);
+ if (ret == -1)
+ goto err;
+
+ ret = dict_set_dynstr (options, "transport.socket.listen-port",
+ portstr);
+ if (ret == -1)
+ goto err;
+
+ ret = dict_set_str (options, "transport-type", "socket");
+ if (ret == -1) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, errno, NFS_MSG_DICT_SET_FAILED,
+ "dict_set_str error");
+ goto err;
+ }
+
+ if (nfs->allow_insecure) {
+ ret = dict_set_str (options, "rpc-auth-allow-insecure", "on");
+ if (ret == -1) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, errno,
+ NFS_MSG_DICT_SET_FAILED, "dict_set_str error");
+ goto err;
+ }
+ ret = dict_set_str (options, "rpc-auth.ports.insecure", "on");
+ if (ret == -1) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, errno,
+ NFS_MSG_DICT_SET_FAILED, "dict_set_str error");
+ goto err;
+ }
+ }
+
+ ret= rpcsvc_create_listeners (nfs->rpcsvc, options, nfsx->name);
+ if (ret == -1) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, errno,
+ NFS_MSG_LISTENERS_CREATE_FAIL,
+ "Unable to create listeners");
+ dict_unref (options);
+ goto err;
+ }
+ if (nfs->mount_udp) {
+ pthread_create (&udp_thread, NULL, mount3udp_thread, nfsx);
+ }
return &mnt3prog;
err:
return NULL;
@@ -886,12 +4045,12 @@ err:
rpcsvc_actor_t mnt1svc_actors[MOUNT1_PROC_COUNT] = {
- {"NULL", MOUNT1_NULL, mnt3svc_null, NULL, NULL},
- {{0}, },
- {"DUMP", MOUNT1_DUMP, mnt3svc_dump, NULL, NULL},
- {"UMNT", MOUNT1_UMNT, mnt3svc_umnt, NULL, NULL},
- {{0}, },
- {"EXPORT", MOUNT1_EXPORT, mnt3svc_export, NULL, NULL}
+ {"NULL", MOUNT1_NULL, mnt3svc_null, NULL, 0, DRC_NA},
+ {"MNT", MOUNT1_MNT, NULL, NULL, 0, DRC_NA },
+ {"DUMP", MOUNT1_DUMP, mnt3svc_dump, NULL, 0, DRC_NA},
+ {"UMNT", MOUNT1_UMNT, mnt3svc_umnt, NULL, 0, DRC_NA},
+ {"UMNTALL", MOUNT1_UMNTALL, NULL, NULL, 0, DRC_NA},
+ {"EXPORT", MOUNT1_EXPORT, mnt3svc_export, NULL, 0, DRC_NA}
};
rpcsvc_program_t mnt1prog = {
@@ -899,10 +4058,10 @@ rpcsvc_program_t mnt1prog = {
.prognum = MOUNT_PROGRAM,
.progver = MOUNT_V1,
.progport = GF_MOUNTV1_PORT,
- .progaddrfamily = AF_INET,
- .proghost = NULL,
.actors = mnt1svc_actors,
.numactors = MOUNT1_PROC_COUNT,
+ .min_auth = AUTH_NULL,
+ .synctask = _gf_true,
};
@@ -910,22 +4069,112 @@ rpcsvc_program_t *
mnt1svc_init (xlator_t *nfsx)
{
struct mount3_state *mstate = NULL;
+ struct nfs_state *nfs = NULL;
+ dict_t *options = NULL;
+ char *portstr = NULL;
+ int ret = -1;
- if (!nfsx)
+ if (!nfsx || !nfsx->private)
return NULL;
- gf_log (GF_MNT, GF_LOG_DEBUG, "Initing Mount v1 state");
- mstate = mnt3_init_state (nfsx);
+ nfs = (struct nfs_state *)nfsx->private;
+
+ gf_msg_debug (GF_MNT, GF_LOG_DEBUG, "Initing Mount v1 state");
+ mstate = (struct mount3_state *)nfs->mstate;
if (!mstate) {
- gf_log (GF_MNT, GF_LOG_ERROR, "Mount v3 state init failed");
+ gf_msg (GF_MNT, GF_LOG_ERROR, EINVAL,
+ NFS_MSG_MNT_STATE_INIT_FAIL,
+ "Mount v3 state init failed");
goto err;
}
mnt1prog.private = mstate;
+ options = dict_new ();
+
+ ret = gf_asprintf (&portstr, "%d", GF_MOUNTV1_PORT);
+ if (ret == -1)
+ goto err;
+
+ ret = dict_set_dynstr (options, "transport.socket.listen-port", portstr);
+ if (ret == -1)
+ goto err;
+ ret = dict_set_str (options, "transport-type", "socket");
+ if (ret == -1) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, errno, NFS_MSG_DICT_SET_FAILED,
+ "dict_set_str error");
+ goto err;
+ }
+
+ if (nfs->allow_insecure) {
+ ret = dict_set_str (options, "rpc-auth-allow-insecure", "on");
+ if (ret == -1) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, errno,
+ NFS_MSG_DICT_SET_FAILED,
+ "dict_set_str error");
+ goto err;
+ }
+ ret = dict_set_str (options, "rpc-auth.ports.insecure", "on");
+ if (ret == -1) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, errno,
+ NFS_MSG_DICT_SET_FAILED,
+ "dict_set_str error");
+ goto err;
+ }
+ }
+
+ ret = rpcsvc_create_listeners (nfs->rpcsvc, options, nfsx->name);
+ if (ret == -1) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, errno,
+ NFS_MSG_LISTENERS_CREATE_FAIL,
+ "Unable to create listeners");
+ dict_unref (options);
+ goto err;
+ }
+
return &mnt1prog;
err:
return NULL;
}
+int
+mount_reconfigure_state (xlator_t *nfsx, dict_t *options)
+{
+ int ret = -1;
+ struct nfs_state *nfs = NULL;
+ struct mount3_state *ms = NULL;
+ struct mnt3_export *exp = NULL;
+ struct mnt3_export *texp = NULL;
+
+ if ((!nfsx) || (!options))
+ return (-1);
+
+ nfs = (struct nfs_state *)nfs_state (nfsx);
+ if (!nfs)
+ return (-1);
+
+ ms = nfs->mstate;
+ if (!ms)
+ return (-1);
+ /*
+ * Free() up the old export list. mnt3_init_options() will
+ * rebuild the export list from scratch. Do it with locking
+ * to avoid unnecessary race conditions.
+ */
+ LOCK (&ms->mountlock);
+ list_for_each_entry_safe (exp, texp, &ms->exportlist, explist) {
+ list_del (&exp->explist);
+ mnt3_export_free (exp);
+ }
+ ret = mnt3_init_options (ms, options);
+ UNLOCK (&ms->mountlock);
+
+ if (ret < 0) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, ret, NFS_MSG_RECONF_FAIL,
+ "Options reconfigure failed");
+ return (-1);
+ }
+
+ return (0);
+}
diff --git a/xlators/nfs/server/src/mount3.h b/xlators/nfs/server/src/mount3.h
index 7cfd2b0cb84..ce01a6c543d 100644
--- a/xlators/nfs/server/src/mount3.h
+++ b/xlators/nfs/server/src/mount3.h
@@ -1,30 +1,16 @@
/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _MOUNT3_H_
#define _MOUNT3_H_
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "rpcsvc.h"
#include "dict.h"
#include "xlator.h"
@@ -33,6 +19,11 @@
#include "list.h"
#include "xdr-nfs3.h"
#include "locking.h"
+#include "nfs3-fh.h"
+#include "compat-uuid.h"
+#include "exports.h"
+#include "mount3-auth.h"
+#include "auth-cache.h"
/* Registered with portmap */
#define GF_MOUNTV3_PORT 38465
@@ -48,7 +39,33 @@ mnt3svc_init (xlator_t *nfsx);
extern rpcsvc_program_t *
mnt1svc_init (xlator_t *nfsx);
-/* Data structureused to store the list of mounts points currently
+extern void
+mnt3svc_deinit (xlator_t *nfsx);
+
+extern int
+mount_init_state (xlator_t *nfsx);
+
+extern int
+mount_reconfigure_state (xlator_t *nfsx, dict_t *options);
+
+void
+mount_rewrite_rmtab (struct mount3_state *ms, char *new_rmtab);
+
+struct mnt3_export *
+mnt3_mntpath_to_export (struct mount3_state *ms, const char *dirpath,
+ gf_boolean_t export_parsing_match);
+
+extern int
+mnt3svc_update_mountlist (struct mount3_state *ms, rpcsvc_request_t *req,
+ const char *expname, const char *fullpath);
+
+int
+mnt3_authenticate_request (struct mount3_state *ms, rpcsvc_request_t *req,
+ struct nfs3_fh *fh, const char *volname,
+ const char *path, char **authorized_path,
+ char **authorized_host, gf_boolean_t is_write_op);
+
+/* Data structure used to store the list of mounts points currently
* in use by NFS clients.
*/
struct mountentry {
@@ -58,21 +75,107 @@ struct mountentry {
/* The export name */
char exname[MNTPATHLEN];
char hostname[MNTPATHLEN];
+ char fullpath[MNTPATHLEN];
+
+ gf_boolean_t has_full_path;
+
+ /* Since this is stored in a dict, we want to be able
+ * to find easily get the key we used to store
+ * the struct in our dict
+ */
+ char hashkey[MNTPATHLEN*2+2];
+};
+
+#define MNT3_EXPTYPE_VOLUME 1
+#define MNT3_EXPTYPE_DIR 2
+
+/* Structure to hold export-dir AUTH parameter */
+struct host_auth_spec {
+ char *host_addr; /* Allowed IP or host name */
+ uint32_t netmask; /* Network mask (Big-Endian) */
+ struct host_auth_spec *next; /* Pointer to next AUTH struct */
+};
+
+struct mnt3_export {
+ struct list_head explist;
+
+ /* The string that may contain either the volume name if the full volume
+ * is exported or the subdirectory in the volume.
+ */
+ char *expname;
+ /*
+ * IP address, hostname or subnets who are allowed to connect to expname
+ * subvolume or subdirectory
+ */
+ struct host_auth_spec* hostspec;
+ xlator_t *vol;
+ int exptype;
+
+ /* This holds the full path that the client requested including
+ * the volume name AND the subdirectory in the volume.
+ */
+ char *fullpath;
+
+ /* Extracted from nfs volume options if nfs.dynamicvolumes is on.
+ */
+ uuid_t volumeid;
+ uuid_t mountid;
};
struct mount3_state {
xlator_t *nfsx;
+ /* The NFS state that this belongs to */
+ struct nfs_state *nfs;
+
/* The buffers for all network IO are got from this pool. */
struct iobuf_pool *iobpool;
- xlator_list_t *exports;
+
+ /* List of exports, can be volumes or directories in those volumes. */
+ struct list_head exportlist;
/* List of current mount points over all the exports from this
* server.
*/
struct list_head mountlist;
- /* Used to protect the mountlist. */
+ /* Dict of current mount points over all the exports from this
+ * server. Mirrors the mountlist above, but can be used for
+ * faster lookup in the event that there are several mounts.
+ * Currently, each NFSOP is validated against this dict: each
+ * op is checked to see if the host that operates on the path
+ * does in fact have an entry in the mount dict.
+ */
+ dict_t *mountdict;
+
+ /* Used to protect the mountlist & the mount dict */
gf_lock_t mountlock;
+
+ /* Used to insert additional authentication parameters */
+ struct mnt3_auth_params *auth_params;
+
+ /* Set to 0 if exporting full volumes is disabled. On by default. */
+ gf_boolean_t export_volumes;
+ gf_boolean_t export_dirs;
+
+ pthread_t auth_refresh_thread;
+ gf_boolean_t stop_refresh;
+
+ struct auth_cache *authcache;
+};
+
+#define gf_mnt3_export_dirs(mst) ((mst)->export_dirs)
+
+struct mount3_resolve_state {
+ struct mnt3_export *exp;
+ struct mount3_state *mstate;
+ rpcsvc_request_t *req;
+
+ char remainingdir[MNTPATHLEN];
+ loc_t resolveloc;
+ struct nfs3_fh parentfh;
};
+
+typedef struct mount3_resolve_state mnt3_resolve_t;
+
#endif
diff --git a/xlators/nfs/server/src/mount3udp_svc.c b/xlators/nfs/server/src/mount3udp_svc.c
new file mode 100644
index 00000000000..e8e226e953e
--- /dev/null
+++ b/xlators/nfs/server/src/mount3udp_svc.c
@@ -0,0 +1,234 @@
+/*
+ Copyright (c) 2012 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#include "xdr-nfs3.h"
+#include "logging.h"
+#include "mem-pool.h"
+#include "nfs-mem-types.h"
+#include "nfs-messages.h"
+#include "mount3.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <rpc/pmap_clnt.h>
+#include <string.h>
+#include <memory.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+
+extern struct nfs3_fh*
+nfs3_rootfh (struct svc_req *req, xlator_t *nfsx, char *dp, char *expname);
+
+extern mountres3
+mnt3svc_set_mountres3 (mountstat3 stat, struct nfs3_fh *fh,
+ int *authflavor, u_int aflen);
+extern int
+mount3udp_add_mountlist (xlator_t *nfsx, char *host, char *expname);
+
+extern int
+mount3udp_delete_mountlist (xlator_t *nfsx, char *host, char *expname);
+
+extern mountstat3
+mnt3svc_errno_to_mnterr (int32_t errnum);
+
+
+/* only this thread will use this, no locking needed */
+char mnthost[INET_ADDRSTRLEN+1];
+
+#define MNT3UDP_AUTH_LEN 1 /* Only AUTH_UNIX for now */
+
+mountres3 *
+mountudpproc3_mnt_3_svc(dirpath **dpp, struct svc_req *req)
+{
+ struct mountres3 *res = NULL;
+ int *autharr = NULL;
+ struct nfs3_fh *fh = NULL;
+ char *mpath = NULL;
+ xlator_t *nfsx = THIS;
+ char expname[PATH_MAX] = {0, };
+ mountstat3 stat = MNT3ERR_SERVERFAULT;
+
+ errno = 0; /* RESET errno */
+
+ mpath = (char *)*dpp;
+ while (*mpath == '/')
+ mpath++;
+
+ res = GF_CALLOC (1, sizeof(*res), gf_nfs_mt_mountres3);
+ if (res == NULL) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Unable to allocate memory");
+ goto err;
+ }
+ autharr = GF_CALLOC (MNT3UDP_AUTH_LEN, sizeof(int), gf_nfs_mt_int);
+ if (autharr == NULL) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Unable to allocate memory");
+ goto err;
+ }
+
+ autharr[0] = AUTH_UNIX;
+
+ fh = nfs3_rootfh (req, nfsx, mpath, (char *)expname);
+
+ /* FAILURE: No FH */
+ if (fh == NULL) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, errno, NFS_MSG_GET_FH_FAIL,
+ "Unable to get fh for %s", mpath);
+ if (errno)
+ stat = mnt3svc_errno_to_mnterr (errno);
+ *res = mnt3svc_set_mountres3 (stat, NULL /* fh */,
+ autharr, MNT3UDP_AUTH_LEN);
+ return res;
+ }
+
+ /* SUCCESS */
+ stat = MNT3_OK;
+ *res = mnt3svc_set_mountres3 (stat, fh, autharr, MNT3UDP_AUTH_LEN);
+ (void) mount3udp_add_mountlist (nfsx, mnthost, (char *) expname);
+ return res;
+
+ err:
+ GF_FREE (fh);
+ GF_FREE (res);
+ GF_FREE (autharr);
+ return NULL;
+}
+
+mountstat3 *
+mountudpproc3_umnt_3_svc(dirpath **dp, struct svc_req *req)
+{
+ mountstat3 *stat = NULL;
+ char *mpath = (char *) *dp;
+ xlator_t *nfsx = THIS;
+
+ stat = GF_CALLOC (1, sizeof(mountstat3), gf_nfs_mt_mountstat3);
+ if (stat == NULL) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Unable to allocate memory");
+ return NULL;
+ }
+ *stat = MNT3_OK;
+ (void) mount3udp_delete_mountlist (nfsx, mnthost, mpath);
+ return stat;
+}
+
+static void
+mountudp_program_3(struct svc_req *rqstp, register SVCXPRT *transp)
+{
+ union {
+ dirpath mountudpproc3_mnt_3_arg;
+ } argument;
+ char *result = NULL;
+ xdrproc_t _xdr_argument = NULL, _xdr_result = NULL;
+ char *(*local)(char *, struct svc_req *) = NULL;
+ mountres3 *res = NULL;
+ struct sockaddr_in *sin = NULL;
+
+ sin = svc_getcaller (transp);
+ inet_ntop (AF_INET, &sin->sin_addr, mnthost, INET_ADDRSTRLEN+1);
+
+ switch (rqstp->rq_proc) {
+ case NULLPROC:
+ (void) svc_sendreply (transp, (xdrproc_t) xdr_void,
+ (char *)NULL);
+ return;
+
+ case MOUNT3_MNT:
+ _xdr_argument = (xdrproc_t) xdr_dirpath;
+ _xdr_result = (xdrproc_t) xdr_mountres3;
+ local = (char *(*)(char *,
+ struct svc_req *)) mountudpproc3_mnt_3_svc;
+ break;
+
+ case MOUNT3_UMNT:
+ _xdr_argument = (xdrproc_t) xdr_dirpath;
+ _xdr_result = (xdrproc_t) xdr_mountstat3;
+ local = (char *(*)(char *,
+ struct svc_req *)) mountudpproc3_umnt_3_svc;
+ break;
+
+ default:
+ svcerr_noproc (transp);
+ return;
+ }
+ memset ((char *)&argument, 0, sizeof (argument));
+ if (!svc_getargs (transp, (xdrproc_t) _xdr_argument,
+ (caddr_t) &argument)) {
+ svcerr_decode (transp);
+ return;
+ }
+ result = (*local)((char *)&argument, rqstp);
+ if (result == NULL) {
+ gf_msg_debug (GF_MNT, 0, "PROC returned error");
+ svcerr_systemerr (transp);
+ }
+ if (result != NULL && !svc_sendreply(transp, (xdrproc_t) _xdr_result,
+ result)) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, 0, NFS_MSG_SVC_ERROR,
+ "svc_sendreply returned error");
+ svcerr_systemerr (transp);
+ }
+ if (!svc_freeargs (transp, (xdrproc_t) _xdr_argument,
+ (caddr_t) &argument)) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, 0, NFS_MSG_ARG_FREE_FAIL,
+ "Unable to free arguments");
+ }
+ if (result == NULL)
+ return;
+ /* free the result */
+ switch (rqstp->rq_proc) {
+ case MOUNT3_MNT:
+ res = (mountres3 *) result;
+ GF_FREE (res->mountres3_u.mountinfo.fhandle.fhandle3_val);
+ GF_FREE (res->mountres3_u.mountinfo.auth_flavors.auth_flavors_val);
+ GF_FREE (res);
+ break;
+
+ case MOUNT3_UMNT:
+ GF_FREE (result);
+ break;
+ }
+ return;
+}
+
+void *
+mount3udp_thread (void *argv)
+{
+ xlator_t *nfsx = argv;
+ register SVCXPRT *transp = NULL;
+
+ GF_ASSERT (nfsx);
+
+ if (glusterfs_this_set(nfsx)) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, ENOMEM, NFS_MSG_XLATOR_SET_FAIL,
+ "Failed to set xlator, nfs.mount-udp will not work");
+ return NULL;
+ }
+
+ transp = svcudp_create(RPC_ANYSOCK);
+ if (transp == NULL) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, 0, NFS_MSG_SVC_ERROR,
+ "svcudp_create error");
+ return NULL;
+ }
+ if (!svc_register(transp, MOUNT_PROGRAM, MOUNT_V3,
+ mountudp_program_3, IPPROTO_UDP)) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, 0, NFS_MSG_SVC_ERROR,
+ "svc_register error");
+ return NULL;
+ }
+
+ svc_run ();
+ gf_msg (GF_MNT, GF_LOG_ERROR, 0, NFS_MSG_SVC_RUN_RETURNED,
+ "svc_run returned");
+ return NULL;
+}
diff --git a/xlators/nfs/server/src/netgroups.c b/xlators/nfs/server/src/netgroups.c
new file mode 100644
index 00000000000..1003b72ef8c
--- /dev/null
+++ b/xlators/nfs/server/src/netgroups.c
@@ -0,0 +1,1160 @@
+/*
+ Copyright 2014-present Facebook. All Rights Reserved
+
+ This file is part of GlusterFS.
+
+ Author :
+ Shreyas Siravara <shreyas.siravara@gmail.com>
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2),in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "netgroups.h"
+#include "parse-utils.h"
+#include "nfs-messages.h"
+
+static void _nge_print (const struct netgroup_entry *nge);
+static void _netgroup_entry_deinit (struct netgroup_entry *ptr);
+static void _netgroup_host_deinit (struct netgroup_host *host);
+
+static dict_t *__deleted_entries;
+static struct parser *ng_file_parser;
+static struct parser *ng_host_parser;
+
+/**
+ * _ng_init_parser -- Initialize the parsers used in this file
+ *
+ * @return: success: 0 (on success the parsers are initialized)
+ * failure: -1
+ */
+static int
+_ng_init_parsers ()
+{
+ int ret = -1;
+
+ /* Initialize the parsers. The only reason this should
+ * ever fail is because of 1) memory allocation errors
+ * 2) the regex in netgroups.h has been changed and no
+ * longer compiles.
+ */
+ ng_file_parser = parser_init (NG_FILE_PARSE_REGEX);
+ if (!ng_file_parser)
+ goto out;
+
+ ng_host_parser = parser_init (NG_HOST_PARSE_REGEX);
+ if (!ng_host_parser)
+ goto out;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/**
+ * _ng_deinit_parsers - Free the parsers used in this file
+ */
+static void
+_ng_deinit_parsers ()
+{
+ parser_deinit (ng_file_parser);
+ parser_deinit (ng_host_parser);
+}
+
+/**
+ * _netgroups_file_init - allocate a netgroup file struct
+ * @return: success: Pointer to an allocated netgroup file struct
+ * failure: NULL
+ *
+ * Not for external use.
+ */
+static struct netgroups_file *
+_netgroups_file_init ()
+{
+ struct netgroups_file *file = GF_MALLOC (sizeof (*file),
+ gf_common_mt_nfs_netgroups);
+
+ if (!file)
+ goto out;
+
+ file->filename = NULL;
+ file->ng_file_dict = NULL;
+out:
+ return file;
+}
+
+/**
+ * __ngf_free_walk - walk the netgroup file dict and free each element
+ *
+ * This is passed as a function pointer to dict_foreach ()
+ *
+ * @dict: the dict we are walking
+ * @key : the key we are processing in the dict
+ * @val : the corresponding value in the dict
+ * @tmp : Pointer to additional data that may be passed in (not used)
+ *
+ * @return : Nothing
+ *
+ * Not for external use.
+ */
+static int
+__ngf_free_walk (dict_t *dict, char *key, data_t *val, void *tmp)
+{
+ struct netgroup_entry *nge = NULL;
+
+ if (val) {
+ nge = (struct netgroup_entry *)val->data;
+ _netgroup_entry_deinit (nge);
+ val->data = NULL;
+ dict_del (dict, key); /* Remove the key from this dict */
+ }
+ return 0;
+}
+
+/**
+ * __deleted_entries_free_walk - free the strings in the temporary dict
+ *
+ * This is passed as a function pointer to dict_foreach ()
+ *
+ * @dict: the dict we are walking
+ * @key : the key we are processing in the dict
+ * @val : the corresponding value in the dict
+ * @tmp : Pointer to additional data that may be passed in (not used)
+ *
+ * @return : Nothing
+ *
+ * Not for external use.
+ */
+static int
+__deleted_entries_free_walk (dict_t *dict, char *key, data_t *val, void *tmp)
+{
+ dict_del (dict, key);
+ return 0;
+}
+
+/**
+ * ng_file_deinit - Free the netgroup file struct and any memory
+ * that is allocated for its members.
+ *
+ * @ngfile : Pointer to the netgroup file structure that needs to be freed
+ * @return : Nothing
+ *
+ * External facing function.
+ *
+ * Should be called by the caller of ng_file_parse () in order to free
+ * the memory allocated when parsing the file.
+ */
+void
+ng_file_deinit (struct netgroups_file *ngfile)
+{
+ GF_VALIDATE_OR_GOTO (GF_NG, ngfile, out);
+
+ __deleted_entries = dict_new ();
+ GF_VALIDATE_OR_GOTO (GF_NG, __deleted_entries, out);
+
+ GF_FREE (ngfile->filename);
+ dict_foreach (ngfile->ng_file_dict, __ngf_free_walk, NULL);
+ dict_unref (ngfile->ng_file_dict);
+ GF_FREE (ngfile);
+
+ /* Clean up temporary dict we used to store "freed" names */
+ dict_foreach (__deleted_entries, __deleted_entries_free_walk, NULL);
+ dict_unref (__deleted_entries);
+ __deleted_entries = NULL;
+out:
+ return;
+}
+
+/**
+ * _netgroup_entry_init - Initializes a netgroup entry struct.
+ * A netgroup entry struct represents a single line in a netgroups file.
+ *
+ * @return : success: Pointer to a netgroup entry struct
+ * : failure: NULL
+ *
+ * Not for external use.
+ */
+static struct netgroup_entry *
+_netgroup_entry_init ()
+{
+ struct netgroup_entry *entry = GF_CALLOC (1, sizeof (*entry),
+ gf_common_mt_nfs_netgroups);
+ return entry;
+}
+
+/**
+ * __ngh_free_walk - walk the netgroup host dict and free the host
+ * structure associated with the key.
+ *
+ * This is passed as a function pointer to dict_foreach ()
+ *
+ * @dict: the dict we are walking
+ * @key : the key we are processing in the dict
+ * @val : the corresponding value in the dict
+ * @tmp : Pointer to additional data that may be passed in (not used)
+ *
+ * @return : Nothing
+ *
+ * Not for external use.
+ */
+static int
+__ngh_free_walk (dict_t *dict, char *key, data_t *val, void *tmp)
+{
+ struct netgroup_host *ngh = NULL;
+
+ if (val) {
+ ngh = (struct netgroup_host *)val->data;
+ _netgroup_host_deinit (ngh);
+ val->data = NULL;
+ dict_del (dict, key);
+ }
+ return 0;
+}
+
+/**
+ * __nge_free_walk - walk the netgroup entry dict and free the netgroup entry
+ * structure associated with the key.
+ *
+ * This is passed as a function pointer to dict_foreach ()
+ *
+ * @dict: the dict we are walking
+ * @key : the key we are processing in the dict
+ * @val : the corresponding value in the dict
+ * @tmp : Pointer to additional data that may be passed in (not used)
+ *
+ * @return : Nothing
+ *
+ * Not for external use.
+ */
+static int
+__nge_free_walk (dict_t *dict, char *key, data_t *val, void *tmp)
+{
+ struct netgroup_entry *nge = NULL;
+
+ GF_VALIDATE_OR_GOTO (GF_NG, dict, out);
+
+ if (val) {
+ nge = (struct netgroup_entry *)val->data;
+ if (!dict_get (__deleted_entries, key)) {
+ _netgroup_entry_deinit (nge);
+ val->data = NULL;
+ }
+ dict_del (dict, key);
+ }
+
+out:
+ return 0;
+}
+
+/**
+ * _netgroup_entry_deinit - Free memory pointed to by the parameter
+ * and any memory allocated for members
+ * in the struct. This function walks the
+ * netgroups and hosts dicts if they
+ * are allocated and frees them.
+ *
+ * @ngentry: Pointer to a netgroup entry struct that needs to be freed
+ *
+ * @return : Nothing
+ *
+ * Not for external use.
+ */
+static void
+_netgroup_entry_deinit (struct netgroup_entry *ngentry)
+{
+ dict_t *ng_dict = NULL;
+ dict_t *host_dict = NULL;
+ char *name = NULL;
+ data_t *dint = NULL;
+
+ if (!ngentry)
+ return;
+
+ ng_dict = ngentry->netgroup_ngs;
+ host_dict = ngentry->netgroup_hosts;
+
+ if (ng_dict) {
+ /* Free the dict of netgroup entries */
+ dict_foreach (ng_dict, __nge_free_walk, NULL);
+ dict_unref (ng_dict);
+ ngentry->netgroup_ngs = NULL;
+ }
+
+ if (host_dict) {
+ /* Free the dict of host entries */
+ dict_foreach (host_dict, __ngh_free_walk, NULL);
+ dict_unref (host_dict);
+ ngentry->netgroup_hosts = NULL;
+ }
+
+ if (ngentry->netgroup_name) {
+ /* Keep track of the netgroup names we've deallocated
+ * We need to do this because of the nature of this data
+ * structure. This data structure may hold multiple
+ * pointers to an already freed object, but these are
+ * uniquely identifiable by the name. We keep track
+ * of these names so when we encounter a key who has
+ * an association to an already freed object, we don't
+ * free it twice.
+ */
+ name = strdupa (ngentry->netgroup_name);
+
+ dint = int_to_data (1);
+ dict_set (__deleted_entries, name, dint);
+
+ GF_FREE (ngentry->netgroup_name);
+ ngentry->netgroup_name = NULL;
+ }
+
+ GF_FREE (ngentry);
+}
+
+/**
+ * _netgroup_host_init - Initializes a netgroup host structure.
+ * A netgroup host struct represents an item in a line of a netgroups file that
+ * looks like this : (hostname,user,domain)
+ *
+ * @return : success: Pointer to a netgroup host struct
+ * : failure: NULL
+ *
+ * Not for external use.
+ */
+static struct netgroup_host *
+_netgroup_host_init ()
+{
+ struct netgroup_host *host = GF_CALLOC (1, sizeof (*host),
+ gf_common_mt_nfs_netgroups);
+ return host;
+}
+
+/**
+ * _netgroup_host_deinit - Free memory pointed to by the parameter
+ * and any memory allocated for members in the struct.
+ *
+ * @nghost : Pointer to a netgroup host struct that needs to be freed
+ *
+ * @return : Nothing
+ *
+ * Not for external use.
+ */
+static void
+_netgroup_host_deinit (struct netgroup_host *host)
+{
+ /* Validate args */
+ GF_VALIDATE_OR_GOTO (GF_NG, host, err);
+
+ GF_FREE (host->hostname);
+ host->hostname = NULL;
+
+ GF_FREE (host->user);
+ host->user = NULL;
+
+ GF_FREE (host->domain);
+ host->domain = NULL;
+
+ GF_FREE (host);
+err:
+ return;
+}
+
+/**
+ * _nge_dict_get - Lookup a netgroup entry from the dict based
+ * on the netgroup name.
+ *
+ * @dict : The dict we are looking up from. This function makes the
+ * assumption that the type of underlying data in the dict is of type
+ * struct netgroup_entry. The behavior is not defined otherwise.
+ *
+ * @ngname : Key used to lookup in the dict.
+ *
+ * @return : success: Pointer to a netgroup entry
+ * failure: NULL (if no such key exists in the dict)
+ *
+ * Not for external use.
+ */
+static struct netgroup_entry *
+_nge_dict_get (dict_t *dict, const char *ngname)
+{
+ data_t *ngdata = NULL;
+
+ /* Validate args */
+ GF_VALIDATE_OR_GOTO (GF_NG, dict, err);
+ GF_VALIDATE_OR_GOTO (GF_NG, ngname, err);
+
+ ngdata = dict_get (dict, (char *)ngname);
+ if (ngdata)
+ return (struct netgroup_entry *)ngdata->data;
+err:
+ return NULL;
+}
+
+/**
+ * _nge_dict_insert - Insert a netgroup entry into the dict using
+ * the netgroup name as the key.
+ *
+ * @dict : The dict we are inserting into.
+ *
+ * @nge : The data to insert into the dict.
+ *
+ * @return : nothing
+ *
+ * Not for external use.
+ */
+static void
+_nge_dict_insert (dict_t *dict, struct netgroup_entry *nge)
+{
+ data_t *ngdata = NULL;
+
+ GF_VALIDATE_OR_GOTO (GF_NG, dict, err);
+ GF_VALIDATE_OR_GOTO (GF_NG, nge, err);
+
+ ngdata = bin_to_data (nge, sizeof (*nge));
+ dict_set (dict, nge->netgroup_name, ngdata);
+err:
+ return;
+}
+
+/**
+ * _ngh_dict_get - Lookup a netgroup host entry from the dict based
+ * on the hostname.
+ *
+ * @dict : The dict we are looking up from. This function makes the
+ * assumption that the type of underlying data in the dict is of type
+ * struct netgroup_host. The behavior is not defined otherwise.
+ *
+ * @ngname : Key used to lookup in the dict.
+ *
+ * @return : success: Pointer to a netgroup host entry
+ * failure: NULL (if no such key exists in the dict)
+ *
+ * Externally usable.
+ */
+struct netgroup_host *
+ngh_dict_get (dict_t *dict, const char *hostname)
+{
+ data_t *ngdata = NULL;
+
+ GF_VALIDATE_OR_GOTO (GF_NG, dict, err);
+ GF_VALIDATE_OR_GOTO (GF_NG, hostname, err);
+
+ ngdata = dict_get (dict, (char *)hostname);
+ if (!ngdata)
+ goto err;
+
+ return (struct netgroup_host *)ngdata->data;
+
+err:
+ return NULL;
+}
+
+/**
+ * _ngh_dict_insert - Insert a netgroup host entry into the dict using
+ * the netgroup name as the key.
+ *
+ * @dict : The dict we are inserting into.
+ *
+ * @nge : The data to insert into the dict.
+ *
+ * @return : nothing
+ *
+ * Not for external use.
+ */
+static void
+_ngh_dict_insert (dict_t *dict, struct netgroup_host *ngh)
+{
+ data_t *ngdata = NULL;
+
+ /* Validate args */
+ GF_VALIDATE_OR_GOTO (GF_NG, dict, err);
+ GF_VALIDATE_OR_GOTO (GF_NG, ngh, err);
+
+ ngdata = bin_to_data (ngh, sizeof (*ngh));
+ dict_set (dict, ngh->hostname, ngdata);
+err:
+ return;
+}
+
+/**
+ * _ngh_print - Prints the netgroup host in the
+ * format '(hostname,user,domain)'
+ *
+ * @ngh : The netgroup host to print out
+ *
+ * @return : nothing
+ *
+ * Not for external use.
+ */
+static void
+_ngh_print (const struct netgroup_host *ngh)
+{
+ /* Validate args */
+ GF_VALIDATE_OR_GOTO (GF_NG, ngh, err);
+
+ printf ("(%s,%s,%s)", ngh->hostname, ngh->user ? ngh->user : "",
+ ngh->domain ? ngh->domain : "");
+err:
+ return;
+}
+
+/**
+ * __nge_print_walk - walk the netgroup entry dict and print each entry
+ * associated with the key. This function prints
+ * entries of type 'struct netgroup_entry'.
+ *
+ * This is passed as a function pointer to dict_foreach ()
+ *
+ * @dict: the dict we are walking
+ * @key : the key we are processing in the dict
+ * @val : the corresponding value in the dict
+ * @tmp : Pointer to additional data that may be passed in (not used)
+ *
+ * @return : Nothing
+ *
+ * Not for external use.
+ */
+static int
+__nge_print_walk (dict_t *dict, char *key, data_t *val, void *tmp)
+{
+ if (val)
+ _nge_print ((struct netgroup_entry *)val->data);
+
+ return 0;
+}
+
+/**
+ * __ngh_print_walk - walk the netgroup entry dict and print each entry
+ * associated with the key. This function prints entries
+ * of type 'struct netgroup_host'
+ *
+ * This is passed as a function pointer to dict_foreach (),
+ * which is called from _nge_print ().
+ *
+ * @dict: the dict we are walking
+ * @key : the key we are processing in the dict
+ * @val : the corresponding value in the dict
+ * @tmp : Pointer to additional data that may be passed in (not used)
+ *
+ * @return : Nothing
+ *
+ * Not for external use.
+ */
+static int
+__ngh_print_walk (dict_t *dict, char *key, data_t *val, void *tmp)
+{
+ if (val)
+ _ngh_print ((struct netgroup_host *)val->data);
+
+ return 0;
+}
+
+/**
+ * _nge_print - Prints the netgroup entry in the
+ * format '<netgroup name> <following entries>'
+ *
+ * @ngh : The netgroup entry to print out
+ *
+ * @return : nothing
+ *
+ * Not for external use.
+ */
+static void
+_nge_print (const struct netgroup_entry *nge)
+{
+ /* Validate args */
+ GF_VALIDATE_OR_GOTO (GF_NG, nge, err);
+
+ printf ("%s ", nge->netgroup_name);
+ if (nge->netgroup_ngs)
+ dict_foreach (nge->netgroup_ngs, __nge_print_walk, NULL);
+
+ if (nge->netgroup_hosts)
+ dict_foreach (nge->netgroup_hosts, __ngh_print_walk, NULL);
+
+err:
+ return;
+}
+
+/**
+ * __ngf_print_walk - walk through each entry in the netgroups file and print it
+ * out. This calls helper functions _nge_print () to print
+ * the netgroup entries.
+ *
+ * This is passed as a function pointer to dict_foreach (),
+ * which is called from ng_file_print ().
+ *
+ * @dict: the dict we are walking
+ * @key : the key we are processing in the dict
+ * @val : the corresponding value in the dict
+ * @tmp : Pointer to additional data that may be passed in (not used)
+ *
+ * @return : Nothing
+ *
+ * Not for external use.
+ */
+static int
+__ngf_print_walk (dict_t *dict, char *key, data_t *val, void *tmp)
+{
+ struct netgroup_entry *snge = NULL;
+
+ if (val) {
+ snge = (struct netgroup_entry *)val->data;
+ _nge_print (snge);
+ printf ("\n");
+ }
+ return 0;
+}
+
+/**
+ * ng_file_print - Prints the netgroup file in the
+ * format '<netgroup name> <following entries>', etc.
+ * The netgroup file is a dict of netgroup entries
+ * which, in turn is a combination of a other 'sub' netgroup
+ * entries and host entries. This function prints
+ * all of that out by calling the corresponding print functions
+ *
+ * @ngfile : The netgroup file to print out
+ *
+ * @return : nothing
+ *
+ * External facing function.
+ *
+ * Can be called on any valid 'struct netgroups_file *' type.
+ */
+void
+ng_file_print (const struct netgroups_file *ngfile)
+{
+ dict_foreach (ngfile->ng_file_dict, __ngf_print_walk, NULL);
+}
+
+/**
+ * ng_file_get_netgroup - Look up a netgroup entry from the netgroups file
+ * based on the netgroup name and return a pointer
+ * to the netgroup entry.
+ *
+ * @ngfile : The netgroup file to lookup from.
+ * @netgroup : The netgroup name used to lookup from the netgroup file.
+ *
+ * @return : nothing
+ *
+ * External facing function.
+ *
+ * Can be called on any valid 'struct netgroups_file *' type with a valid 'char
+ * *' as the lookup key.
+ */
+struct netgroup_entry *
+ng_file_get_netgroup (const struct netgroups_file *ngfile, const char *netgroup)
+{
+ data_t *ndata = NULL;
+
+ GF_VALIDATE_OR_GOTO (GF_NG, ngfile, err);
+ GF_VALIDATE_OR_GOTO (GF_NG, netgroup, err);
+
+ ndata = dict_get (ngfile->ng_file_dict,
+ (char *)netgroup);
+ if (!ndata)
+ goto err;
+
+ return (struct netgroup_entry *)ndata->data;
+
+err:
+ return NULL;
+}
+
+/**
+ * __check_host_entry_str - Check if the host string which should be
+ * in the format '(host,user,domain)' is
+ * valid to be parsed. Currently checks
+ * if the # of commas is correct and there
+ * are no spaces in the string, but more
+ * checks can be added.
+ *
+ * @host_str : String to check
+ * @return : success: TRUE if valid
+ * failure: FALSE if not
+ *
+ * Not for external use.
+ */
+static gf_boolean_t
+__check_host_entry_str (const char *host_str)
+{
+ unsigned int comma_count = 0;
+ unsigned int i = 0;
+ gf_boolean_t str_valid = _gf_true;
+
+ GF_VALIDATE_OR_GOTO (GF_NG, host_str, out);
+
+ for (i = 0; i < strlen (host_str); i++) {
+ if (host_str[i] == ',')
+ comma_count++;
+
+ /* Spaces are not allowed in this string. e.g, (a,b,c) is valid
+ * but (a, b,c) is not.
+ */
+ if (host_str[i] == ' ') {
+ str_valid = _gf_false;
+ goto out;
+ }
+ }
+
+ str_valid = (comma_count == 2);
+out:
+ return str_valid;
+}
+
+/**
+ * _parse_ng_host - Parse the netgroup host string into a netgroup host struct.
+ * The netgroup host string is structured as follows:
+ * (host, user, domain)
+ *
+ * @ng_str : String to parse
+ * @return : success: 0 if the parsing succeeded
+ * failure: -EINVAL for bad args, -ENOMEM for allocation errors,
+ * 1 for parsing errors.
+ *
+ * Not for external use.
+ */
+static int
+_parse_ng_host (char *ng_str, struct netgroup_host **ngh)
+{
+ struct netgroup_host *ng_host = NULL;
+ unsigned int parts = 0;
+ char *match = NULL;
+ int ret = -EINVAL;
+
+ GF_VALIDATE_OR_GOTO (GF_NG, ng_str, out);
+ GF_VALIDATE_OR_GOTO (GF_NG, ngh, out);
+
+ if (!__check_host_entry_str (ng_str)) {
+ ret = 1; /* Parse failed */
+ goto out;
+ }
+
+ ret = parser_set_string (ng_host_parser, ng_str);
+ if (ret < 0)
+ goto out;
+
+ gf_msg_trace (GF_NG, 0, "parsing host string: %s", ng_str);
+
+ ng_host = _netgroup_host_init ();
+ GF_CHECK_ALLOC (ng_host, ret, free_and_out); /* Sets ret to -ENOMEM on
+ * failure.
+ */
+ while ((match = parser_get_next_match (ng_host_parser)) != NULL) {
+ gf_msg_trace (GF_NG, 0, "found match: %s (parts=%d)", match,
+ parts);
+
+ switch (parts) {
+ case 0:
+ ng_host->hostname = match;
+ break;
+ case 1:
+ ng_host->user = match;
+ break;
+ case 2:
+ ng_host->domain = match;
+ break;
+ default:
+ GF_FREE (match);
+ break;
+ };
+
+ /* We only allow three parts in the host string;
+ * The format for the string is (a,b,c)
+ */
+ parts++;
+ if (parts > 2)
+ break;
+ }
+
+ /* Set the parameter */
+ *ngh = ng_host;
+ ret = 0;
+
+free_and_out:
+ parser_unset_string (ng_host_parser);
+out:
+ return ret;
+}
+
+/**
+ * _ng_handle_host_part - Parse the host string that looks like this :
+ * '(dev1763.prn2.facebook.com,,)' into a host
+ * struct and insert it into the parent netgroup's
+ * host dict.
+ * @match : The host string
+ * @ngp : The parent netgroup
+ *
+ * @return: success: 0 if parsing succeeded
+ * failure: -EINVAL for bad args, other errors bubbled up
+ * from _parse_ng_host.
+ *
+ *
+ * Not for external use.
+ */
+static int
+_ng_handle_host_part (char *match, struct netgroup_entry *ngp)
+{
+ struct netgroup_host *ngh = NULL;
+ int ret = -EINVAL;
+
+ GF_VALIDATE_OR_GOTO (GF_NG, match, out);
+ GF_VALIDATE_OR_GOTO (GF_NG, ngp, out);
+
+ if (!ngp->netgroup_name) {
+ gf_msg (GF_NG, GF_LOG_WARNING, EINVAL, NFS_MSG_INVALID_ENTRY,
+ "Invalid: Line starts with hostname!");
+ goto out;
+ }
+
+ /* Parse the host string and get a struct for it */
+ ret = _parse_ng_host (match, &ngh);
+ if (ret < 0) {
+ gf_msg (GF_NG, GF_LOG_CRITICAL, -ret, NFS_MSG_PARSE_FAIL,
+ "Critical error : %s", strerror (-ret));
+ goto out;
+ }
+ if (ret != 0) {
+ /* Cannot change to gf_msg
+ * gf_msg not giving output to STDOUT
+ * Bug id : BZ1215017
+ */
+ gf_log (GF_NG, GF_LOG_WARNING,
+ "Parse error for: %s", match);
+ goto out;
+ }
+
+
+ /* Make dict for the parent entry's netgroup hosts */
+ if (!ngp->netgroup_hosts) {
+ ngp->netgroup_hosts = dict_new ();
+ GF_CHECK_ALLOC (ngp->netgroup_hosts, ret,
+ out);
+ }
+
+ /* Insert this entry into the parent netgroup dict */
+ _ngh_dict_insert (ngp->netgroup_hosts, ngh);
+
+out:
+ return ret;
+}
+
+/**
+ * _ng_handle_netgroup_part - Parse the netgroup string that should just be one
+ * string. This may insert the netgroup into the file
+ * struct if it does not already exist. Frees the
+ * parameter match if the netgroup was already found
+ * in the file.
+ *
+ * @match : The netgroup string
+ * @ngp : The netgroup file we may insert the entry into
+ * @ng_entry : Double pointer to the netgroup entry we want to allocate and set.
+ *
+ * @return: success: 0 if parsing succeeded
+ * failure: -EINVAL for bad args, other errors bubbled up
+ * from _parse_ng_host.
+ *
+ *
+ * Not for external use.
+ */
+static int
+_ng_setup_netgroup_entry (char *match, struct netgroups_file *file,
+ struct netgroup_entry **ng_entry)
+{
+ struct netgroup_entry *nge = NULL;
+ int ret = -EINVAL;
+
+ GF_VALIDATE_OR_GOTO (GF_NG, match, out);
+ GF_VALIDATE_OR_GOTO (GF_NG, file, out);
+ GF_VALIDATE_OR_GOTO (GF_NG, ng_entry, out);
+
+ nge = _netgroup_entry_init ();
+ GF_CHECK_ALLOC (nge, ret, out);
+
+ nge->netgroup_name = match;
+
+ /* Insert this new entry into the file dict */
+ _nge_dict_insert (file->ng_file_dict, nge);
+
+ *ng_entry = nge;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/**
+ * _parse_ng_line - Parse a line in the netgroups file into a netgroup entry
+ * struct. The netgroup line is structured as follows:
+ * 'netgroupx netgroupy (hosta,usera,domaina)...' OR
+ * 'netgroupx netgroupy netgroupz...' OR
+ * 'netgroupx (hosta,usera,domaina) (hostb,userb,domainb)'
+ * This function parses this into a netgroup entry
+ * which will hold either a dict of netgroups and/or
+ * a dict of hosts that make up this netgroup.
+ *
+ * In general terms, the data structure to represent a netgroups file
+ * is a set of nested dictionaries. Each line in the netgroups file
+ * is compiled into a struct netgroup_entry structure that holds a dict
+ * of netgroups and a dict of hostnames. The first string in the netgroups
+ * line is the parent netgroup entry and the rest of the items in the line
+ * are the children of that parent netgroup entry. (Hence variables ngp
+ * and nge).
+ *
+ * A sample netgroup file may look like this:
+ *
+ * async async.ash3 async.ash4
+ * async.ash3 async.04.ash3
+ * async04.ash3 (async001.ash3.facebook.com,,) (async002.ash3.facebook.com,,)
+ *
+ * _parse_ng_line will get called on each line, so on the first call to this
+ * function, our data structure looks like this:
+ *
+ *
+ * dict [
+ * 'async' --> dict [
+ * 'async.ash3'
+ * 'async.ash4'
+ * ]
+ * ]
+ *
+ * On the second call to the function with the second line, our data structure
+ * looks like this:
+ *
+ * dict [
+ * 'async' --> dict [
+ * 'async.ash3' -> dict [ 'async.04.ash3' ]
+ * 'async.ash4' ^
+ * ] |
+ * |
+ * 'async.ash3' ------------------------------
+ * ]
+ *
+ * And so on.
+ *
+ * The obvious answer to storing this file in a data structure may be a tree
+ * but lookups from a tree are expensive and since we may be looking up stuff
+ * in this file in the I/O path, we can't afford expensive lookups.
+ *
+ * @ng_str : String to parse
+ * @file : Netgroup file to put the parsed line into
+ * @ng_entry : Double pointer to struct that we are going to allocate and fill
+ *
+ * The string gets parsed into a structure pointed to by
+ * the parameter 'ng_entry'
+ *
+ * @return : success: 0 if parsing succeeded
+ * failure: NULL if not
+ *
+ * Not for external use.
+ */
+static int
+_parse_ng_line (char *ng_str, struct netgroups_file *file,
+ struct netgroup_entry **ng_entry)
+{
+ struct netgroup_entry *ngp = NULL; /* Parent netgroup entry */
+ struct netgroup_entry *nge = NULL; /* Generic netgroup entry */
+ char *match = NULL;
+ int ret = -EINVAL;
+ unsigned int num_entries = 0;
+
+ /* Validate arguments */
+ GF_VALIDATE_OR_GOTO (GF_NG, ng_str, out);
+ GF_VALIDATE_OR_GOTO (GF_NG, file, out);
+
+ if (*ng_str == ' ' || *ng_str == '\0' || *ng_str == '\n') {
+ ret = 0;
+ goto out;
+ }
+
+ ret = parser_set_string (ng_file_parser, ng_str);
+ if (ret < 0)
+ goto out;
+
+ /* This is the first name in the line, and should be the
+ * parent netgroup entry.
+ */
+ match = parser_get_next_match (ng_file_parser);
+ if (!match) {
+ ret = 1;
+ gf_msg (GF_NG, GF_LOG_WARNING, 0,
+ NFS_MSG_FIND_FIRST_MATCH_FAIL, "Unable to find "
+ "first match.");
+ gf_msg (GF_NG, GF_LOG_WARNING, 0, NFS_MSG_PARSE_FAIL,
+ "Error parsing str: %s", ng_str);
+ goto out;
+ }
+
+ /* Lookup to see if the match already exists,
+ * if not, set the parent.
+ */
+ ngp = _nge_dict_get (file->ng_file_dict, match);
+ if (!ngp) {
+ ret = _ng_setup_netgroup_entry (match, file, &ngp);
+ if (ret < 0) {
+ /* Bubble up error to caller. We don't need to free ngp
+ * here because this can only fail if allocating the
+ * struct fails.
+ */
+ goto out;
+ }
+ } else
+ GF_FREE (match);
+
+ if (!ngp->netgroup_ngs) {
+ /* If a netgroup dict has not been allocated
+ * for this parent, allocate it.
+ */
+ ngp->netgroup_ngs = dict_new ();
+ GF_CHECK_ALLOC (ngp->netgroup_ngs, ret, out);
+ /* No need to free anything here since ngp is already
+ * a part of the file. When the file gets
+ * deallocated, we will free ngp.
+ */
+ }
+
+ while ((match = parser_get_next_match (ng_file_parser)) != NULL) {
+ num_entries++;
+ /* This means that we hit a host entry in the line */
+ if (*match == '(') {
+ ret = _ng_handle_host_part (match, ngp);
+ GF_FREE (match);
+ if (ret != 0) {
+ /* If parsing the host fails, bubble the error
+ * code up to the caller.
+ */
+ goto out;
+ }
+ } else {
+ nge = _nge_dict_get (file->ng_file_dict, match);
+ if (!nge) {
+ ret = _ng_setup_netgroup_entry (match, file,
+ &nge);
+ if (ret < 0) {
+ /* Bubble up error to caller. We don't
+ * need to free nge here because this
+ * can only fail if allocating the
+ * struct fails.
+ */
+ goto out;
+ }
+ } else
+ GF_FREE (match);
+
+ /* Insert the netgroup into the parent's dict */
+ _nge_dict_insert (ngp->netgroup_ngs, nge);
+ }
+ }
+
+ /* If there are no entries on the RHS, log an error, but continue */
+ if (!num_entries) {
+ /* Cannot change to gf_msg
+ * gf_msg not giving output to STDOUT
+ * Bug id : BZ1215017
+ */
+ gf_log (GF_NG, GF_LOG_WARNING,
+ "No netgroups were specified except for the parent.");
+ }
+
+ *ng_entry = ngp;
+ ret = 0;
+
+out:
+ parser_unset_string (ng_file_parser);
+ return ret;
+}
+
+/**
+ * ng_file_parse - Parse a netgroups file into a the netgroups file struct.
+ * This is the external facing function that must be called
+ * to parse a netgroups file. This function returns a netgroup
+ * file struct that is allocated and must be freed using
+ * ng_file_deinit.
+ *
+ * @filepath : Path to the netgroups file we need to parse
+ *
+ * @return : success: Pointer to a netgroup file struct if parsing succeeded
+ * failure: NULL if not
+ *
+ * Externally facing function
+ */
+struct netgroups_file *
+ng_file_parse (const char *filepath)
+{
+ FILE *fp = NULL;
+ size_t len = 0;
+ size_t read = 0;
+ char *line = NULL;
+ struct netgroups_file *file = NULL;
+ struct netgroup_entry *nge = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO (GF_NG, filepath, err);
+
+ fp = fopen (filepath, "r");
+ if (!fp)
+ goto err;
+
+ file = _netgroups_file_init ();
+ if (!file)
+ goto err;
+
+ file->ng_file_dict = dict_new ();
+ if (!file->ng_file_dict) {
+ gf_msg (GF_NG, GF_LOG_CRITICAL, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Failed to allocate netgroup file dict");
+ goto err;
+ }
+
+ file->filename = gf_strdup (filepath);
+ if (!file->filename) {
+ gf_msg (GF_NG, GF_LOG_CRITICAL, errno, NFS_MSG_FILE_OP_FAILED,
+ "Failed to duplicate filename");
+ goto err;
+ }
+
+ ret = _ng_init_parsers ();
+ if (ret < 0)
+ goto err;
+
+ /* Read the file line-by-line and parse it */
+ while ((read = getline (&line, &len, fp)) != -1) {
+ if (*line == '#') /* Lines starting with # are comments */
+ continue;
+
+ /* Parse the line into a netgroup entry */
+ ret = _parse_ng_line (line, file, &nge);
+ if (ret == -ENOMEM) {
+ gf_msg (GF_NG, GF_LOG_CRITICAL, ENOMEM,
+ NFS_MSG_NO_MEMORY, "Allocation error "
+ "while parsing line!");
+ ng_file_deinit (file);
+ GF_FREE (line);
+ goto err;
+ }
+ if (ret != 0) {
+ gf_msg_debug (GF_NG, 0, "Failed to parse line %s",
+ line);
+ continue;
+ }
+ }
+
+ /* line got allocated through getline(), don't use GF_FREE() for it */
+ free (line);
+
+ if (fp)
+ fclose(fp);
+
+ return file;
+
+err:
+ if (file)
+ ng_file_deinit (file);
+
+ _ng_deinit_parsers ();
+
+ if (fp)
+ fclose (fp);
+ return NULL;
+}
diff --git a/xlators/nfs/server/src/netgroups.h b/xlators/nfs/server/src/netgroups.h
new file mode 100644
index 00000000000..6044abfabb3
--- /dev/null
+++ b/xlators/nfs/server/src/netgroups.h
@@ -0,0 +1,54 @@
+/*
+ Copyright 2014-present Facebook. All Rights Reserved
+
+ This file is part of GlusterFS.
+
+ Author :
+ Shreyas Siravara <shreyas.siravara@gmail.com>
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _NETGROUPS_H
+#define _NETGROUPS_H
+
+#include "nfs-mem-types.h"
+#include "dict.h"
+#include "nfs.h"
+
+#define GF_NG GF_NFS"-netgroup"
+
+#define NG_FILE_PARSE_REGEX "([a-zA-Z0-9.(,)-]+)"
+#define NG_HOST_PARSE_REGEX "([a-zA-Z0-9.-]+)"
+
+struct netgroup_host {
+ char *hostname; /* Hostname of entry */
+ char *user; /* User field in the entry */
+ char *domain; /* Domain field in the entry */
+};
+
+struct netgroup_entry {
+ char *netgroup_name; /* Name of the netgroup */
+ dict_t *netgroup_ngs; /* Dict of netgroups in this netgroup */
+ dict_t *netgroup_hosts; /* Dict of hosts in this netgroup. */
+};
+
+struct netgroups_file {
+ char *filename; /* Filename on disk */
+ dict_t *ng_file_dict; /* Dict of netgroup entries */
+};
+
+struct netgroups_file *
+ng_file_parse (const char *filepath);
+
+struct netgroup_entry *
+ng_file_get_netgroup (const struct netgroups_file *ngfile,
+ const char *netgroup);
+
+void
+ng_file_deinit (struct netgroups_file *ngfile);
+
+#endif /* _NETGROUPS_H */
diff --git a/xlators/nfs/server/src/nfs-common.c b/xlators/nfs/server/src/nfs-common.c
index 9f68f714649..d9ea1e1ac47 100644
--- a/xlators/nfs/server/src/nfs-common.c
+++ b/xlators/nfs/server/src/nfs-common.c
@@ -1,27 +1,13 @@
/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "rpcsvc.h"
#include "dict.h"
#include "xlator.h"
@@ -33,6 +19,7 @@
#include "nfs-mem-types.h"
#include "rpcsvc.h"
#include "iatt.h"
+#include "nfs-messages.h"
#include <libgen.h>
@@ -86,23 +73,23 @@ nfs_xlator_to_xlid (xlator_list_t *cl, xlator_t *xl)
xlator_t *
nfs_mntpath_to_xlator (xlator_list_t *cl, char *path)
{
- char volname[MNTPATHLEN];
+ char *volname = NULL;
char *volptr = NULL;
- int pathlen = 0;
+ size_t pathlen;
xlator_t *targetxl = NULL;
if ((!cl) || (!path))
return NULL;
- strcpy (volname, path);
+ volname = strdupa (path);
pathlen = strlen (volname);
- gf_log (GF_NFS, GF_LOG_TRACE, "Subvolume search: %s", path);
+ gf_msg_trace (GF_NFS, 0, "Subvolume search: %s", path);
if (volname[0] == '/')
volptr = &volname[1];
else
volptr = &volname[0];
- if (volname[pathlen - 1] == '/')
+ if (pathlen && volname[pathlen - 1] == '/')
volname[pathlen - 1] = '\0';
while (cl) {
@@ -119,75 +106,21 @@ nfs_mntpath_to_xlator (xlator_list_t *cl, char *path)
}
-/* Returns 1 if the stat seems to be filled with zeroes. */
-int
-nfs_zero_filled_stat (struct iatt *buf)
-{
- if (!buf)
- return 1;
-
- /* Do not use st_dev because it is transformed to store the xlator id
- * in place of the device number. Do not use st_ino because by this time
- * we've already mapped the root ino to 1 so it is not guaranteed to be
- * 0.
- */
- if ((buf->ia_nlink == 0) && (buf->ia_type == 0))
- return 1;
-
- return 0;
-}
-
-
void
nfs_loc_wipe (loc_t *loc)
{
- if (!loc)
- return;
-
- if (loc->path) {
- GF_FREE ((char *)loc->path);
- loc->path = NULL;
- }
-
- if (loc->parent) {
- inode_unref (loc->parent);
- loc->parent = NULL;
- }
-
- if (loc->inode) {
- inode_unref (loc->inode);
- loc->inode = NULL;
- }
-
- loc->ino = 0;
+ loc_wipe (loc);
}
int
nfs_loc_copy (loc_t *dst, loc_t *src)
{
- int ret = -1;
-
- dst->ino = src->ino;
-
- if (src->inode)
- dst->inode = inode_ref (src->inode);
+ int ret = -1;
- if (src->parent)
- dst->parent = inode_ref (src->parent);
+ ret = loc_copy (dst, src);
- dst->path = gf_strdup (src->path);
-
- if (!dst->path)
- goto out;
-
- dst->name = strrchr (dst->path, '/');
- if (dst->name)
- dst->name++;
-
- ret = 0;
-out:
- return ret;
+ return ret;
}
@@ -201,24 +134,25 @@ nfs_loc_fill (loc_t *loc, inode_t *inode, inode_t *parent, char *path)
if (inode) {
loc->inode = inode_ref (inode);
- loc->ino = inode->ino;
+ if (!gf_uuid_is_null (inode->gfid))
+ gf_uuid_copy (loc->gfid, inode->gfid);
}
if (parent)
loc->parent = inode_ref (parent);
- loc->path = gf_strdup (path);
- if (!loc->path) {
- gf_log (GF_NFS, GF_LOG_ERROR, "strdup failed");
- goto loc_wipe;
+ if (path) {
+ loc->path = gf_strdup (path);
+ if (!loc->path) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, ENOMEM,
+ NFS_MSG_NO_MEMORY, "strdup failed");
+ goto loc_wipe;
+ }
+ loc->name = strrchr (loc->path, '/');
+ if (loc->name)
+ loc->name++;
}
- loc->name = strrchr (loc->path, '/');
- if (loc->name)
- loc->name++;
- else
- goto loc_wipe;
-
ret = 0;
loc_wipe:
if (ret < 0)
@@ -229,7 +163,7 @@ loc_wipe:
int
-nfs_inode_loc_fill (inode_t *inode, loc_t *loc)
+nfs_inode_loc_fill (inode_t *inode, loc_t *loc, int how)
{
char *resolvedpath = NULL;
inode_t *parent = NULL;
@@ -238,35 +172,49 @@ nfs_inode_loc_fill (inode_t *inode, loc_t *loc)
if ((!inode) || (!loc))
return ret;
- if ((inode) && (inode->ino == 1))
- goto ignore_parent;
-
- parent = inode_parent (inode, 0, NULL);
- if (!parent)
- goto err;
+ /* If gfid is not null, then the inode is already linked to
+ * the inode table, and not a newly created one. For newly
+ * created inode, inode_path returns null gfid as the path.
+ */
+ if (!gf_uuid_is_null (inode->gfid)) {
+ ret = inode_path (inode, NULL, &resolvedpath);
+ if (ret < 0) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, 0,
+ NFS_MSG_PATH_RESOLVE_FAIL, "path resolution "
+ "failed %s", resolvedpath);
+ goto err;
+ }
+ }
-ignore_parent:
- ret = inode_path (inode, NULL, &resolvedpath);
- if (ret < 0)
- goto err;
+ if (resolvedpath == NULL) {
+ char tmp_path[GFID_STR_PFX_LEN + 1] = {0,};
+ snprintf (tmp_path, sizeof (tmp_path), "<gfid:%s>",
+ uuid_utoa (loc->gfid));
+ resolvedpath = gf_strdup (tmp_path);
+ } else {
+ parent = inode_parent (inode, loc->pargfid, NULL);
+ }
ret = nfs_loc_fill (loc, inode, parent, resolvedpath);
- if (ret < 0)
+ if (ret < 0) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, -ret,
+ NFS_MSG_LOC_FILL_RESOLVE_FAIL,
+ "loc fill resolution failed %s", resolvedpath);
goto err;
+ }
+ ret = 0;
err:
if (parent)
inode_unref (parent);
- if (resolvedpath)
- GF_FREE (resolvedpath);
+ GF_FREE (resolvedpath);
return ret;
}
-
int
-nfs_ino_loc_fill (inode_table_t *itable, uint64_t ino, uint64_t gen, loc_t *loc)
+nfs_gfid_loc_fill (inode_table_t *itable, uuid_t gfid, loc_t *loc, int how)
{
int ret = -EFAULT;
inode_t *inode = NULL;
@@ -274,13 +222,41 @@ nfs_ino_loc_fill (inode_table_t *itable, uint64_t ino, uint64_t gen, loc_t *loc)
if (!loc)
return ret;
- inode = inode_get (itable, ino, gen);
+ inode = inode_find (itable, gfid);
if (!inode) {
- ret = -ENOENT;
- goto err;
- }
+ gf_msg_trace (GF_NFS, 0, "Inode not found in itable, will "
+ "try to create one.");
+ if (how == NFS_RESOLVE_CREATE) {
+ gf_msg_trace (GF_NFS, 0, "Inode needs to be created.");
+ inode = inode_new (itable);
+ if (!inode) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, ENOMEM,
+ NFS_MSG_NO_MEMORY, "Failed to "
+ "allocate memory");
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ } else {
+ gf_msg (GF_NFS, GF_LOG_ERROR, ENOENT,
+ NFS_MSG_INODE_NOT_FOUND, "Inode not found in "
+ "itable and no creation was requested.");
+ ret = -ENOENT;
+ goto err;
+ }
+ } else {
+ gf_msg_trace (GF_NFS, 0, "Inode was found in the itable.");
+ }
+
+ gf_uuid_copy (loc->gfid, gfid);
- ret = nfs_inode_loc_fill (inode, loc);
+ ret = nfs_inode_loc_fill (inode, loc, how);
+ if (ret < 0) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, -ret,
+ NFS_MSG_INODE_LOC_FILL_ERROR,
+ "Inode loc filling failed.: %s", strerror (-ret));
+ goto err;
+ }
err:
if (inode)
@@ -290,6 +266,17 @@ err:
int
+nfs_root_loc_fill (inode_table_t *itable, loc_t *loc)
+{
+ uuid_t rootgfid = {0, };
+
+ rootgfid[15] = 1;
+ return nfs_gfid_loc_fill (itable, rootgfid, loc, NFS_RESOLVE_EXIST);
+}
+
+
+
+int
nfs_parent_inode_loc_fill (inode_t *parent, inode_t *entryinode, char *entry,
loc_t *loc)
{
@@ -300,11 +287,14 @@ nfs_parent_inode_loc_fill (inode_t *parent, inode_t *entryinode, char *entry,
return ret;
ret = inode_path (parent, entry, &path);
- if (ret < 0)
+ if (ret < 0) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, -ret, NFS_MSG_PATH_RESOLVE_FAIL,
+ "path resolution failed %s", path);
goto err;
+ }
ret = nfs_loc_fill (loc, entryinode, parent, path);
-
+ GF_FREE (path);
err:
return ret;
}
@@ -317,8 +307,8 @@ err:
* On other errors, return -3. 0 on success.
*/
int
-nfs_entry_loc_fill (inode_table_t *itable, ino_t ino, uint64_t gen, char *entry,
- loc_t *loc, int how)
+nfs_entry_loc_fill (xlator_t *this, inode_table_t *itable, uuid_t pargfid,
+ char *entry, loc_t *loc, int how)
{
inode_t *parent = NULL;
inode_t *entryinode = NULL;
@@ -329,23 +319,26 @@ nfs_entry_loc_fill (inode_table_t *itable, ino_t ino, uint64_t gen, char *entry,
if ((!itable) || (!entry) || (!loc))
return ret;
- parent = inode_get (itable, ino, gen);
+ parent = inode_find (itable, pargfid);
ret = -1;
/* Will need hard resolution now */
- if (!parent)
+ if (!parent || inode_ctx_get (parent, this, NULL))
goto err;
+ gf_uuid_copy (loc->pargfid, pargfid);
+
ret = -2;
entryinode = inode_grep (itable, parent, entry);
- if (!entryinode) {
+ if (!entryinode || inode_ctx_get (entryinode, this, NULL)) {
if (how == NFS_RESOLVE_CREATE) {
/* Even though we'll create the inode and the loc for
* a missing inode, we still need to return -2 so
* that the caller can use the filled loc to call
* lookup.
*/
- entryinode = inode_new (itable);
+ if (!entryinode)
+ entryinode = inode_new (itable);
/* Cannot change ret because that must
* continue to have -2.
*/
@@ -363,13 +356,18 @@ nfs_entry_loc_fill (inode_table_t *itable, ino_t ino, uint64_t gen, char *entry,
ret = inode_path (parent, entry, &resolvedpath);
if (ret < 0) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, -ret, NFS_MSG_PATH_RESOLVE_FAIL,
+ "path resolution failed %s", resolvedpath);
ret = -3;
goto err;
}
ret = nfs_loc_fill (loc, entryinode, parent, resolvedpath);
- if (ret < 0)
+ if (ret < 0) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, 0, NFS_MSG_INODE_LOC_FILL_ERROR,
+ "loc_fill failed %s", resolvedpath);
ret = -3;
+ }
err:
if (parent)
@@ -378,11 +376,79 @@ err:
if (entryinode)
inode_unref (entryinode);
- if (resolvedpath)
- GF_FREE (resolvedpath);
+ GF_FREE (resolvedpath);
return ret;
}
+uint32_t
+nfs_hash_gfid (uuid_t gfid)
+{
+ uint32_t hash = 0;
+ uint64_t msb64 = 0;
+ uint64_t lsb64 = 0;
+ uint32_t a1 = 0;
+ uint32_t a2 = 0;
+ uint32_t a3 = 0;
+ uint32_t a4 = 0;
+ uint32_t b1 = 0;
+ uint32_t b2 = 0;
+
+ if (__is_root_gfid (gfid))
+ return 0x1;
+
+ memcpy (&msb64, &gfid[8], 8);
+ memcpy (&lsb64, &gfid[0], 8);
+
+ a1 = (msb64 << 32);
+ a2 = (msb64 >> 32);
+ a3 = (lsb64 << 32);
+ a4 = (lsb64 >> 32);
+
+ b1 = a1 ^ a4;
+ b2 = a2 ^ a3;
+
+ hash = b1 ^ b2;
+ return hash;
+}
+
+
+void
+nfs_fix_generation (xlator_t *this, inode_t *inode)
+{
+ uint64_t raw_ctx = 0;
+ struct nfs_inode_ctx *ictx = NULL;
+ struct nfs_state *priv = NULL;
+ int ret = -1;
+
+ if (!inode) {
+ return;
+ }
+ priv = this->private;
+
+ if (inode_ctx_get(inode,this,&raw_ctx) == 0) {
+ ictx = (struct nfs_inode_ctx *)raw_ctx;
+ ictx->generation = priv->generation;
+ }
+ else {
+ ictx = GF_CALLOC (1, sizeof (struct nfs_inode_ctx),
+ gf_nfs_mt_inode_ctx);
+ if (!ictx) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ NFS_MSG_NO_MEMORY,
+ "could not allocate nfs inode ctx");
+ return;
+ }
+ INIT_LIST_HEAD(&ictx->shares);
+ ictx->generation = priv->generation;
+ ret = inode_ctx_put (inode, this, (uint64_t)ictx);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ NFS_MSG_INODE_CTX_STORE_FAIL,
+ "could not store nfs inode ctx");
+ return;
+ }
+ }
+}
diff --git a/xlators/nfs/server/src/nfs-common.h b/xlators/nfs/server/src/nfs-common.h
index 20003aa7130..77bdfb0bbf0 100644
--- a/xlators/nfs/server/src/nfs-common.h
+++ b/xlators/nfs/server/src/nfs-common.h
@@ -1,40 +1,29 @@
/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _NFS_COMMON_H_
#define _NFS_COMMON_H_
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include <unistd.h>
#include "xlator.h"
#include "rpcsvc.h"
#include "iatt.h"
+#include "compat-uuid.h"
-#define NFS_PATH_MAX PATH_MAX
+//NFS_PATH_MAX hard-coded to 4096 as a work around for bug 2476.
+//nfs server crashes when path received is longer than PATH_MAX
+#define NFS_PATH_MAX 4096
#define NFS_NAME_MAX NAME_MAX
-#define NFS_DEFAULT_CREATE_MODE 0644
+#define NFS_DEFAULT_CREATE_MODE 0600
extern xlator_t *
nfs_xlid_to_xlator (xlator_list_t *cl, uint8_t xlid);
@@ -48,9 +37,6 @@ nfs_path_to_xlator (xlator_list_t *cl, char *path);
extern xlator_t *
nfs_mntpath_to_xlator (xlator_list_t *cl, char *path);
-extern int
-nfs_zero_filled_stat (struct iatt *buf);
-
extern void
nfs_loc_wipe (loc_t *loc);
@@ -64,13 +50,24 @@ nfs_loc_fill (loc_t *loc, inode_t *inode, inode_t *parent, char *path);
#define NFS_RESOLVE_CREATE 2
extern int
-nfs_inode_loc_fill (inode_t *inode, loc_t *loc);
+nfs_inode_loc_fill (inode_t *inode, loc_t *loc, int how);
extern int
-nfs_ino_loc_fill (inode_table_t *itable, uint64_t ino, uint64_t gen, loc_t *l);
+nfs_ino_loc_fill (inode_table_t *itable, uuid_t gfid, loc_t *l);
+
+extern int
+nfs_entry_loc_fill (xlator_t *this, inode_table_t *itable, uuid_t pargfid,
+ char *entry, loc_t *loc, int how);
+
+extern int
+nfs_root_loc_fill (inode_table_t *itable, loc_t *loc);
+
+extern uint32_t
+nfs_hash_gfid (uuid_t gfid);
extern int
-nfs_entry_loc_fill (inode_table_t *itable, ino_t ino, uint64_t gen, char *entry,
- loc_t *loc, int how);
+nfs_gfid_loc_fill (inode_table_t *itable, uuid_t gfid, loc_t *loc, int how);
+void
+nfs_fix_generation (xlator_t *this, inode_t *inode);
#endif
diff --git a/xlators/nfs/server/src/nfs-fops.c b/xlators/nfs/server/src/nfs-fops.c
index e7505b5dcdd..f6361f02161 100644
--- a/xlators/nfs/server/src/nfs-fops.c
+++ b/xlators/nfs/server/src/nfs-fops.c
@@ -1,26 +1,15 @@
/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+#include <grp.h>
+#include <pwd.h>
#include "dict.h"
#include "xlator.h"
@@ -31,16 +20,112 @@
#include "nfs-fops.h"
#include "inode.h"
#include "nfs-common.h"
-
+#include "nfs3-helpers.h"
+#include "nfs-mem-types.h"
+#include "nfs-messages.h"
#include <libgen.h>
#include <semaphore.h>
-#define nfs_stack_destroy(fram) \
- do { \
- (fram)->local = NULL; \
- STACK_DESTROY ((fram)->root); \
- } while (0) \
+static int gf_auth_max_groups_nfs_log = 0;
+
+void
+nfs_fix_groups (xlator_t *this, call_stack_t *root)
+{
+ struct passwd mypw;
+ char mystrs[1024];
+ struct passwd *result;
+#ifdef GF_DARWIN_HOST_OS
+ /* BSD/DARWIN does not correctly uses gid_t in getgrouplist */
+ int mygroups[GF_MAX_AUX_GROUPS];
+#else
+ gid_t mygroups[GF_MAX_AUX_GROUPS];
+#endif
+ int ngroups;
+ int i;
+ int max_groups;
+ struct nfs_state *priv = this->private;
+ const gid_list_t *agl;
+ gid_list_t gl;
+
+ if (!priv->server_aux_gids) {
+ return;
+ }
+ /* RPC enforces the GF_AUTH_GLUSTERFS_MAX_GROUPS limit */
+ max_groups = GF_AUTH_GLUSTERFS_MAX_GROUPS(root->lk_owner.len);
+
+ agl = gid_cache_lookup(&priv->gid_cache, root->uid, 0, 0);
+ if (agl) {
+ if (agl->gl_count > max_groups) {
+ GF_LOG_OCCASIONALLY (gf_auth_max_groups_nfs_log,
+ this->name, GF_LOG_WARNING,
+ "too many groups, reducing %d -> %d",
+ agl->gl_count, max_groups);
+ }
+
+ for (ngroups = 0; ngroups < agl->gl_count
+ && ngroups <= max_groups; ngroups++) {
+ root->groups[ngroups] = agl->gl_list[ngroups];
+ }
+ root->ngrps = ngroups;
+ gid_cache_release(&priv->gid_cache, agl);
+ return;
+ }
+
+ /* No cached list found. */
+ if (getpwuid_r(root->uid,&mypw,mystrs,sizeof(mystrs),&result) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, NFS_MSG_GETPWUID_FAIL,
+ "getpwuid_r(%u) failed", root->uid);
+ return;
+ }
+
+ if (!result) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, NFS_MSG_GETPWUID_FAIL,
+ "getpwuid_r(%u) found nothing", root->uid);
+ return;
+ }
+
+ gf_msg_trace (this->name, 0, "mapped %u => %s",
+ root->uid, result->pw_name);
+
+ ngroups = GF_MAX_AUX_GROUPS;
+ if (getgrouplist(result->pw_name,root->gid,mygroups,&ngroups) == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, NFS_MSG_MAP_GRP_LIST_FAIL,
+ "could not map %s to group list", result->pw_name);
+ return;
+ }
+
+ /* Add the group data to the cache. */
+ gl.gl_list = GF_CALLOC(ngroups, sizeof(gid_t), gf_nfs_mt_aux_gids);
+ if (gl.gl_list) {
+ /* It's not fatal if the alloc failed. */
+ gl.gl_id = root->uid;
+ gl.gl_uid = 0;
+ gl.gl_gid = 0;
+ gl.gl_count = ngroups;
+ memcpy(gl.gl_list, mygroups, sizeof(gid_t) * ngroups);
+ if (gid_cache_add(&priv->gid_cache, &gl) != 1)
+ GF_FREE(gl.gl_list);
+ }
+
+ /* RPC enforces the GF_AUTH_GLUSTERFS_MAX_GROUPS limit */
+ if (ngroups > max_groups) {
+ GF_LOG_OCCASIONALLY (gf_auth_max_groups_nfs_log,
+ this->name, GF_LOG_WARNING,
+ "too many groups, reducing %d -> %d",
+ ngroups, max_groups);
+
+ ngroups = max_groups;
+ }
+
+ /* Copy data to the frame. */
+ for (i = 0; i < ngroups; ++i) {
+ gf_msg_trace (this->name, 0, "%s is in group %u",
+ result->pw_name, mygroups[i]);
+ root->groups[i] = mygroups[i];
+ }
+ root->ngrps = ngroups;
+}
struct nfs_fop_local *
nfs_fop_local_init (xlator_t *nfsx)
@@ -59,13 +144,9 @@ nfs_fop_local_init (xlator_t *nfsx)
void
nfs_fop_local_wipe (xlator_t *nfsx, struct nfs_fop_local *l)
{
- struct nfs_state *nfs = NULL;
-
if ((!nfsx) || (!l))
return;
- nfs = nfsx->private;
-
if (l->iobref)
iobref_unref (l->iobref);
@@ -78,11 +159,44 @@ nfs_fop_local_wipe (xlator_t *nfsx, struct nfs_fop_local *l)
if (l->newparent)
inode_unref (l->newparent);
- mem_put (nfs->foppool, l);
+ if (l->dictgfid)
+ dict_unref (l->dictgfid);
+
+ mem_put (l);
return;
}
+#define nfs_stack_destroy(nfl, fram) \
+ do { \
+ nfs_fop_local_wipe ((nfl)->nfsx, nfl); \
+ (fram)->local = NULL; \
+ STACK_DESTROY ((fram)->root); \
+ } while (0) \
+
+
+pthread_mutex_t ctr = PTHREAD_MUTEX_INITIALIZER;
+unsigned int cval = 1;
+
+
+int
+nfs_frame_getctr ()
+{
+ uint64_t val = 0;
+
+ pthread_mutex_lock (&ctr);
+ {
+ if (cval == 0)
+ cval = 1;
+ val = cval;
+ cval++;
+ }
+ pthread_mutex_unlock (&ctr);
+
+ return val;
+}
+
+
call_frame_t *
nfs_create_frame (xlator_t *xl, nfs_user_t *nfu)
{
@@ -96,30 +210,47 @@ nfs_create_frame (xlator_t *xl, nfs_user_t *nfu)
frame = create_frame (xl, (call_pool_t *)xl->ctx->pool);
if (!frame)
goto err;
+ if (call_stack_alloc_groups (frame->root, nfu->ngrps) != 0) {
+ STACK_DESTROY (frame->root);
+ frame = NULL;
+ goto err;
+ }
+
+ frame->root->pid = NFS_PID;
frame->root->uid = nfu->uid;
frame->root->gid = nfu->gids[NFS_PRIMGID_IDX];
- if (nfu->ngrps == 1)
- goto err; /* Done, we only got primary gid */
-
- frame->root->ngrps = nfu->ngrps - 1;
-
- gf_log (GF_NFS, GF_LOG_TRACE,"uid: %d, gid %d, gids: %d",
- frame->root->uid, frame->root->gid, frame->root->ngrps);
- for(y = 0, x = 1; y < frame->root->ngrps; x++,y++) {
- gf_log (GF_NFS, GF_LOG_TRACE, "gid: %d", nfu->gids[x]);
- frame->root->groups[y] = nfu->gids[x];
+ memcpy (&frame->root->identifier, &nfu->identifier, UNIX_PATH_MAX);
+ frame->root->lk_owner = nfu->lk_owner;
+
+ if (nfu->ngrps != 1) {
+ frame->root->ngrps = nfu->ngrps - 1;
+
+ gf_msg_trace (GF_NFS, 0, "uid: %d, gid %d, gids: %d",
+ frame->root->uid, frame->root->gid,
+ frame->root->ngrps);
+ for(y = 0, x = 1; y < frame->root->ngrps; x++,y++) {
+ gf_msg_trace (GF_NFS, 0, "gid: %d", nfu->gids[x]);
+ frame->root->groups[y] = nfu->gids[x];
+ }
}
+ /*
+ * It's tempting to do this *instead* of using nfu above, but we need
+ * to have those values in case nfs_fix_groups doesn't do anything.
+ */
+ nfs_fix_groups(xl,frame->root);
+
err:
return frame;
}
#define nfs_fop_handle_frame_create(fram, xla, nfuser, retval, errlabel) \
do { \
- fram = nfs_create_frame (xla, (nfuser)); \
+ fram = nfs_create_frame (xla, (nfuser)); \
if (!fram) { \
retval = (-ENOMEM); \
- gf_log (GF_NFS, GF_LOG_ERROR,"Frame creation failed");\
+ gf_msg (GF_NFS, GF_LOG_ERROR, ENOMEM, \
+ NFS_MSG_NO_MEMORY, "Frame creation failed");\
goto errlabel; \
} \
} while (0) \
@@ -128,28 +259,31 @@ err:
* for us to determine in the callback whether to funge the ino in the stat buf
* with 1 for the parent.
*/
-#define nfs_fop_save_root_ino(locl, loc) \
- do { \
- if ((loc)->ino == 1) \
- (locl)->rootinode = 1; \
- else if (((loc)->parent) && ((loc)->parent->ino == 1)) \
- (locl)->rootparentinode = 1; \
- } while (0) \
+#define nfs_fop_save_root_ino(locl, loc) \
+ do { \
+ if (((loc)->inode) && \
+ __is_root_gfid ((loc)->inode->gfid)) \
+ (locl)->rootinode = 1; \
+ else if (((loc)->parent) && \
+ __is_root_gfid ((loc)->parent->gfid)) \
+ (locl)->rootparentinode = 1; \
+ } while (0)
/* Do the same for an fd */
-#define nfs_fop_save_root_fd_ino(locl, fdesc) \
- do { \
- if ((fdesc)->inode->ino == 1) \
- (locl)->rootinode = 1; \
- } while (0) \
-
+#define nfs_fop_save_root_fd_ino(locl, fdesc) \
+ do { \
+ if (__is_root_gfid ((fdesc)->inode->gfid)) \
+ (locl)->rootinode = 1; \
+ } while (0)
/* Use the state saved by the previous macro to funge the ino in the appropriate
* structure.
*/
-#define nfs_fop_restore_root_ino(locl, preattr, postattr, prepar, postpar) \
+#define nfs_fop_restore_root_ino(locl, fopret, preattr, postattr, prepar, postpar) \
do { \
+ if (fopret == -1) \
+ break; \
if ((locl)->rootinode) { \
if ((preattr)) { \
((struct iatt *)(preattr))->ia_ino = 1; \
@@ -174,17 +308,21 @@ err:
/* If the newly created, inode's parent is root, we'll need to funge the ino
* in the parent attr when we receive them in the callback.
*/
-#define nfs_fop_newloc_save_root_ino(locl, newloc) \
- do { \
- if ((newloc)->ino == 1) \
- (locl)->newrootinode = 1; \
- else if (((newloc)->parent) && ((newloc)->parent->ino == 1)) \
- (locl)->newrootparentinode = 1; \
- } while (0) \
-
-
-#define nfs_fop_newloc_restore_root_ino(locl, preattr, postattr, prepar, postpar) \
+#define nfs_fop_newloc_save_root_ino(locl, newloc) \
+ do { \
+ if (((newloc)->inode) && \
+ __is_root_gfid ((newloc)->inode->gfid)) \
+ (locl)->newrootinode = 1; \
+ else if (((newloc)->parent) && \
+ __is_root_gfid ((newloc)->parent->gfid)) \
+ (locl)->newrootparentinode = 1; \
+ } while (0)
+
+#define nfs_fop_newloc_restore_root_ino(locl, fopret, preattr, postattr, prepar, postpar) \
do { \
+ if (fopret == -1) \
+ break; \
+ \
if ((locl)->newrootinode) { \
if ((preattr)) \
((struct iatt *)(preattr))->ia_ino = 1; \
@@ -198,6 +336,57 @@ err:
} \
} while (0) \
+dict_t *
+nfs_gfid_dict (inode_t *inode)
+{
+ uuid_t newgfid = {0, };
+ char *dyngfid = NULL;
+ dict_t *dictgfid = NULL;
+ int ret = -1;
+ uuid_t rootgfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+
+ dyngfid = GF_CALLOC (1, sizeof (uuid_t), gf_common_mt_char);
+ if (dyngfid == NULL)
+ return (NULL);
+
+ gf_uuid_generate (newgfid);
+
+ if (gf_uuid_compare (inode->gfid, rootgfid) == 0)
+ memcpy (dyngfid, rootgfid, sizeof (uuid_t));
+ else
+ memcpy (dyngfid, newgfid, sizeof (uuid_t));
+
+ dictgfid = dict_new ();
+ if (!dictgfid) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, errno,
+ NFS_MSG_GFID_DICT_CREATE_FAIL,
+ "Failed to create gfid dict");
+ GF_FREE (dyngfid);
+ return (NULL);
+ }
+
+ ret = dict_set_bin (dictgfid, "gfid-req", dyngfid, sizeof (uuid_t));
+ if (ret < 0) {
+ GF_FREE (dyngfid);
+ dict_unref (dictgfid);
+ return (NULL);
+ }
+
+ return dictgfid;
+}
+
+#define nfs_fop_gfid_setup(nflcl, inode, retval, erlbl) \
+ do { \
+ if (nflcl) { \
+ (nflcl)->dictgfid = nfs_gfid_dict (inode); \
+ \
+ if (!((nflcl)->dictgfid)) { \
+ retval = -EFAULT; \
+ goto erlbl; \
+ } \
+ } \
+ } while (0) \
+
/* Fops Layer Explained
* The fops layer has three types of functions. They can all be identified by
* their names. Here are the three patterns:
@@ -212,7 +401,7 @@ err:
*
* nfs_<fopname>
* Unlike the nfs_fop_<fopname> variety, this is the stateful type of fop, in
- * that it silently performs all the relevant GlusterFS state maintainence
+ * that it silently performs all the relevant GlusterFS state maintenance
* operations on the data returned to the callbacks, leaving the caller's
* callback to just use the data returned for whatever it needs to do with that
* data, for eg. the nfs_lookup, will take care of looking up the inodes,
@@ -231,13 +420,17 @@ nfs_fop_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct nfs_fop_local *local = NULL;
fop_lookup_cbk_t progcbk;
+ if (op_ret == 0) {
+ nfs_fix_generation(this,inode);
+ }
+
nfl_to_prog_data (local, progcbk, frame);
- nfs_fop_restore_root_ino (local, buf, NULL, NULL, postparent);
+ nfs_fop_restore_root_ino (local, op_ret, buf, NULL, NULL, postparent);
if (progcbk)
progcbk (frame, cookie, this, op_ret, op_errno, inode, buf,
xattr, postparent);
- nfs_stack_destroy (frame);
+ nfs_stack_destroy (local, frame);
return 0;
}
@@ -253,38 +446,84 @@ nfs_fop_lookup (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *loc,
if ((!xl) || (!loc) || (!nfu))
return ret;
- gf_log (GF_NFS, GF_LOG_TRACE, "Lookup: %s", loc->path);
- nfs_fop_handle_frame_create (frame, xl, nfu, ret, err);
+ gf_msg_trace (GF_NFS, 0, "Lookup: %s", loc->path);
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
nfs_fop_save_root_ino (nfl, loc);
+ nfs_fop_gfid_setup (nfl, loc->inode, ret, err);
- STACK_WIND_COOKIE (frame, nfs_fop_lookup_cbk, nfsx, xl,
- xl->fops->lookup, loc, NULL);
+ STACK_WIND_COOKIE (frame, nfs_fop_lookup_cbk, xl, xl,
+ xl->fops->lookup, loc, nfl->dictgfid);
ret = 0;
err:
if (ret < 0) {
if (frame)
- nfs_stack_destroy (frame);
+ nfs_stack_destroy (nfl, frame);
}
return ret;
}
+int32_t
+nfs_fop_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ struct nfs_fop_local *nfl = NULL;
+ fop_access_cbk_t progcbk = NULL;
+
+ nfl_to_prog_data (nfl, progcbk, frame);
+ if (progcbk)
+ progcbk (frame, cookie, this, op_ret, op_errno, xdata);
+
+ nfs_stack_destroy (nfl, frame);
+ return 0;
+}
+
+int
+nfs_fop_access (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *loc,
+ int32_t accesstest, fop_access_cbk_t cbk, void *local)
+{
+ call_frame_t *frame = NULL;
+ int ret = -EFAULT;
+ struct nfs_fop_local *nfl = NULL;
+ uint32_t accessbits = 0;
+
+ if ((!xl) || (!loc) || (!nfu))
+ return ret;
+
+ gf_msg_trace (GF_NFS, 0, "Access: %s", loc->path);
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
+ nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
+ nfs_fop_save_root_ino (nfl, loc);
+
+ accessbits = nfs3_request_to_accessbits (accesstest);
+ STACK_WIND_COOKIE (frame, nfs_fop_access_cbk, xl, xl, xl->fops->access,
+ loc, accessbits, NULL);
+ ret = 0;
+err:
+ if (ret < 0) {
+ if (frame)
+ nfs_stack_destroy (nfl, frame);
+ }
+
+ return ret;
+}
int32_t
nfs_fop_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
struct nfs_fop_local *nfl = NULL;
fop_stat_cbk_t progcbk = NULL;
nfl_to_prog_data (nfl, progcbk, frame);
- nfs_fop_restore_root_ino (nfl, buf, NULL, NULL, NULL);
+ nfs_fop_restore_root_ino (nfl, op_ret, buf, NULL, NULL, NULL);
if (progcbk)
- progcbk (frame, cookie, this, op_ret, op_errno, buf);
+ progcbk (frame, cookie, this, op_ret, op_errno, buf, xdata);
- nfs_stack_destroy (frame);
+ nfs_stack_destroy (nfl, frame);
return 0;
}
@@ -300,18 +539,18 @@ nfs_fop_stat (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *loc,
if ((!xl) || (!loc) || (!nfu))
return ret;
- gf_log (GF_NFS, GF_LOG_TRACE, "Stat: %s", loc->path);
- nfs_fop_handle_frame_create (frame, xl, nfu, ret, err);
+ gf_msg_trace (GF_NFS, 0, "Stat: %s", loc->path);
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
nfs_fop_save_root_ino (nfl, loc);
- STACK_WIND_COOKIE (frame, nfs_fop_stat_cbk, nfsx, xl, xl->fops->stat,
- loc);
+ STACK_WIND_COOKIE (frame, nfs_fop_stat_cbk, xl, xl, xl->fops->stat,
+ loc, NULL);
ret = 0;
err:
if (ret < 0) {
if (frame)
- nfs_stack_destroy (frame);
+ nfs_stack_destroy (nfl, frame);
}
return ret;
@@ -320,17 +559,18 @@ err:
int32_t
nfs_fop_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
struct nfs_fop_local *nfl = NULL;
fop_fstat_cbk_t progcbk = NULL;
nfl_to_prog_data (nfl, progcbk, frame);
- nfs_fop_restore_root_ino (nfl, buf, NULL, NULL, NULL);
+ nfs_fop_restore_root_ino (nfl, op_ret, buf, NULL, NULL, NULL);
if (progcbk)
- progcbk (frame, cookie, this, op_ret, op_errno, buf);
+ progcbk (frame, cookie, this, op_ret, op_errno, buf, xdata);
- nfs_stack_destroy (frame);
+ nfs_stack_destroy (nfl, frame);
return 0;
}
@@ -346,19 +586,19 @@ nfs_fop_fstat (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
if ((!nfsx) || (!xl) || (!fd) || (!nfu))
return ret;
- gf_log (GF_NFS, GF_LOG_TRACE, "FStat");
- nfs_fop_handle_frame_create (frame, xl, nfu, ret, err);
+ gf_msg_trace (GF_NFS, 0, "FStat");
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
nfs_fop_save_root_fd_ino (nfl, fd);
- STACK_WIND_COOKIE (frame, nfs_fop_fstat_cbk, nfsx, xl, xl->fops->fstat,
- fd);
+ STACK_WIND_COOKIE (frame, nfs_fop_fstat_cbk, xl, xl, xl->fops->fstat,
+ fd, NULL);
ret = 0;
err:
if (ret < 0) {
if (frame)
- nfs_stack_destroy (frame);
+ nfs_stack_destroy (nfl, frame);
}
return ret;
@@ -367,15 +607,15 @@ err:
int32_t
nfs_fop_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
struct nfs_fop_local *nfl = NULL;
fop_opendir_cbk_t progcbk = NULL;
nfl_to_prog_data (nfl, progcbk, frame);
if (progcbk)
- progcbk (frame, cookie, this, op_ret, op_errno, fd);
- nfs_stack_destroy (frame);
+ progcbk (frame, cookie, this, op_ret, op_errno, fd, xdata);
+ nfs_stack_destroy (nfl, frame);
return 0;
}
@@ -391,18 +631,18 @@ nfs_fop_opendir (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
if ((!nfsx) || (!xl) || (!pathloc) || (!dirfd) || (!nfu))
return ret;
- gf_log (GF_NFS, GF_LOG_TRACE, "Opendir: %s", pathloc->path);
- nfs_fop_handle_frame_create (frame, xl, nfu, ret, err);
+ gf_msg_trace (GF_NFS, 0, "Opendir: %s", pathloc->path);
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
- STACK_WIND_COOKIE (frame, nfs_fop_opendir_cbk, nfsx, xl,
- xl->fops->opendir, pathloc, dirfd);
+ STACK_WIND_COOKIE (frame, nfs_fop_opendir_cbk, xl, xl,
+ xl->fops->opendir, pathloc, dirfd, NULL);
ret = 0;
err:
if (ret < 0) {
if (frame)
- nfs_stack_destroy (frame);
+ nfs_stack_destroy (nfl, frame);
}
return ret;
@@ -410,16 +650,16 @@ err:
int
nfs_fop_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
struct nfs_fop_local *nfl = NULL;
fop_flush_cbk_t progcbk = NULL;
nfl_to_prog_data (nfl, progcbk, frame);
if (progcbk)
- progcbk (frame, cookie, this, op_ret, op_errno);
+ progcbk (frame, cookie, this, op_ret, op_errno, xdata);
- nfs_stack_destroy (frame);
+ nfs_stack_destroy (nfl, frame);
return 0;
}
@@ -435,16 +675,16 @@ nfs_fop_flush (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
if ((!nfsx) || (!xl) || (!fd) || (!nfu))
return ret;
- nfs_fop_handle_frame_create (frame, xl, nfu, ret, err);
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
- STACK_WIND_COOKIE (frame, nfs_fop_flush_cbk, nfsx, xl, xl->fops->flush,
- fd);
+ STACK_WIND_COOKIE (frame, nfs_fop_flush_cbk, xl, xl, xl->fops->flush,
+ fd, NULL);
ret = 0;
err:
if (ret < 0) {
if (frame)
- nfs_stack_destroy (frame);
+ nfs_stack_destroy (nfl, frame);
}
return ret;
@@ -453,16 +693,17 @@ err:
int32_t
nfs_fop_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
struct nfs_fop_local *nfl = NULL;
fop_readdirp_cbk_t progcbk = NULL;
nfl_to_prog_data (nfl, progcbk, frame);
if (progcbk)
- progcbk (frame, cookie, this, op_ret, op_errno, entries);
+ progcbk (frame, cookie, this, op_ret, op_errno, entries, xdata);
- nfs_stack_destroy (frame);
+ nfs_stack_destroy (nfl, frame);
return 0;
}
@@ -480,18 +721,18 @@ nfs_fop_readdirp (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *dirfd,
if ((!nfsx) || (!xl) || (!dirfd) || (!nfu))
return ret;
- gf_log (GF_NFS, GF_LOG_TRACE, "readdir");
- nfs_fop_handle_frame_create (frame, xl, nfu, ret, err);
+ gf_msg_trace (GF_NFS, 0, "readdir");
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
- STACK_WIND_COOKIE (frame, nfs_fop_readdirp_cbk, nfsx, xl,
- xl->fops->readdirp, dirfd, bufsize, offset);
+ STACK_WIND_COOKIE (frame, nfs_fop_readdirp_cbk, xl, xl,
+ xl->fops->readdirp, dirfd, bufsize, offset, 0);
ret = 0;
err:
if (ret < 0) {
if (frame)
- nfs_stack_destroy (frame);
+ nfs_stack_destroy (nfl, frame);
}
return ret;
@@ -500,7 +741,8 @@ err:
int32_t
nfs_fop_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf)
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata)
{
struct nfs_fop_local *nfl = NULL;
@@ -508,9 +750,9 @@ nfs_fop_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
nfl_to_prog_data (nfl, progcbk, frame);
if (progcbk)
- progcbk (frame, cookie, this, op_ret, op_errno, buf);
+ progcbk (frame, cookie, this, op_ret, op_errno, buf, xdata);
- nfs_stack_destroy (frame);
+ nfs_stack_destroy (nfl, frame);
return 0;
}
@@ -526,17 +768,17 @@ nfs_fop_statfs (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
if ((!nfsx) || (!xl) || (!pathloc) || (!nfu))
return ret;
- gf_log (GF_NFS, GF_LOG_TRACE, "Statfs: %s", pathloc->path);
- nfs_fop_handle_frame_create (frame, xl, nfu, ret, err);
+ gf_msg_trace (GF_NFS, 0, "Statfs: %s", pathloc->path);
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
- STACK_WIND_COOKIE (frame, nfs_fop_statfs_cbk, nfsx, xl,
- xl->fops->statfs, pathloc);
+ STACK_WIND_COOKIE (frame, nfs_fop_statfs_cbk, xl, xl,
+ xl->fops->statfs, pathloc, NULL);
ret = 0;
err:
if (ret < 0) {
if (frame)
- nfs_stack_destroy (frame);
+ nfs_stack_destroy (nfl, frame);
}
return ret;
@@ -547,18 +789,23 @@ int32_t
nfs_fop_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
struct nfs_fop_local *nfl = NULL;
fop_create_cbk_t progcbk = NULL;
+ if (op_ret == 0) {
+ nfs_fix_generation(this,inode);
+ }
+
nfl_to_prog_data (nfl, progcbk, frame);
- nfs_fop_restore_root_ino (nfl, buf, NULL, preparent, postparent);
+ nfs_fop_restore_root_ino (nfl, op_ret, buf, NULL, preparent,
+ postparent);
if (progcbk)
progcbk (frame, cookie, this, op_ret, op_errno, fd, inode, buf,
- preparent, postparent);
+ preparent, postparent, NULL);
- nfs_stack_destroy (frame);
+ nfs_stack_destroy (nfl, frame);
return 0;
}
@@ -575,18 +822,20 @@ nfs_fop_create (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
if ((!nfsx) || (!xl) || (!pathloc) || (!nfu))
return ret;
- gf_log (GF_NFS, GF_LOG_TRACE, "Create: %s", pathloc->path);
- nfs_fop_handle_frame_create (frame, xl, nfu, ret, err);
+ gf_msg_trace (GF_NFS, 0, "Create: %s", pathloc->path);
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
nfs_fop_save_root_ino (nfl, pathloc);
+ nfs_fop_gfid_setup (nfl, pathloc->inode, ret, err);
+
+ STACK_WIND_COOKIE (frame, nfs_fop_create_cbk, xl, xl, xl->fops->create,
+ pathloc, flags, mode, 0, fd, nfl->dictgfid);
- STACK_WIND_COOKIE (frame, nfs_fop_create_cbk, nfsx, xl,xl->fops->create
- , pathloc, flags, mode, fd);
ret = 0;
err:
if (ret < 0) {
if (frame)
- nfs_stack_destroy (frame);
+ nfs_stack_destroy (nfl, frame);
}
return ret;
@@ -596,16 +845,17 @@ err:
int32_t
nfs_fop_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *pre,
- struct iatt *post)
+ struct iatt *post, dict_t *xdata)
{
struct nfs_fop_local *nfl = NULL;
fop_setattr_cbk_t progcbk = NULL;
nfl_to_prog_data (nfl, progcbk, frame);
- nfs_fop_restore_root_ino (nfl, pre, post, NULL, NULL);
+ nfs_fop_restore_root_ino (nfl, op_ret, pre, post, NULL, NULL);
if (progcbk)
- progcbk (frame, cookie, this, op_ret, op_errno, pre, post);
- nfs_stack_destroy (frame);
+ progcbk (frame, cookie, this, op_ret, op_errno, pre, post,
+ xdata);
+ nfs_stack_destroy (nfl, frame);
return 0;
}
@@ -623,18 +873,18 @@ nfs_fop_setattr (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
if ((!nfsx) || (!xl) || (!pathloc) || (!nfu))
return ret;
- gf_log (GF_NFS, GF_LOG_TRACE, "Setattr: %s", pathloc->path);
- nfs_fop_handle_frame_create (frame, xl, nfu, ret, err);
+ gf_msg_trace (GF_NFS, 0, "Setattr: %s", pathloc->path);
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
nfs_fop_save_root_ino (nfl, pathloc);
- STACK_WIND_COOKIE (frame, nfs_fop_setattr_cbk, nfsx, xl,
- xl->fops->setattr, pathloc, buf, valid);
+ STACK_WIND_COOKIE (frame, nfs_fop_setattr_cbk, xl, xl,
+ xl->fops->setattr, pathloc, buf, valid, NULL);
ret = 0;
err:
if (ret < 0) {
if (frame)
- nfs_stack_destroy (frame);
+ nfs_stack_destroy (nfl, frame);
}
return ret;
@@ -645,17 +895,21 @@ int32_t
nfs_fop_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
struct nfs_fop_local *nfl = NULL;
fop_mkdir_cbk_t progcbk = NULL;
+ if (op_ret == 0) {
+ nfs_fix_generation(this,inode);
+ }
+
nfl_to_prog_data (nfl, progcbk, frame);
- nfs_fop_restore_root_ino (nfl, buf, NULL, preparent, postparent);
+ nfs_fop_restore_root_ino (nfl, op_ret, buf, NULL,preparent, postparent);
if (progcbk)
progcbk (frame, cookie, this, op_ret, op_errno, inode, buf,
- preparent, postparent);
- nfs_stack_destroy (frame);
+ preparent, postparent, xdata);
+ nfs_stack_destroy (nfl, frame);
return 0;
}
@@ -671,18 +925,19 @@ nfs_fop_mkdir (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
if ((!nfsx) || (!xl) || (!pathloc) || (!nfu))
return ret;
- gf_log (GF_NFS, GF_LOG_TRACE, "Mkdir: %s", pathloc->path);
- nfs_fop_handle_frame_create (frame, xl, nfu, ret, err);
+ gf_msg_trace (GF_NFS, 0, "Mkdir: %s", pathloc->path);
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
nfs_fop_save_root_ino (nfl, pathloc);
+ nfs_fop_gfid_setup (nfl, pathloc->inode, ret, err);
- STACK_WIND_COOKIE (frame, nfs_fop_mkdir_cbk, nfsx, xl, xl->fops->mkdir,
- pathloc, mode);
+ STACK_WIND_COOKIE (frame, nfs_fop_mkdir_cbk, xl, xl, xl->fops->mkdir,
+ pathloc, mode, 0, nfl->dictgfid);
ret = 0;
err:
if (ret < 0) {
if (frame)
- nfs_stack_destroy (frame);
+ nfs_stack_destroy (nfl, frame);
}
return ret;
@@ -693,17 +948,21 @@ int32_t
nfs_fop_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
struct nfs_fop_local *nfl = NULL;
fop_symlink_cbk_t progcbk = NULL;
+ if (op_ret == 0) {
+ nfs_fix_generation(this,inode);
+ }
+
nfl_to_prog_data (nfl, progcbk, frame);
- nfs_fop_restore_root_ino (nfl, buf, NULL, preparent, postparent);
+ nfs_fop_restore_root_ino (nfl, op_ret,buf, NULL, preparent, postparent);
if (progcbk)
progcbk (frame, cookie, this, op_ret, op_errno, inode, buf,
- preparent, postparent);
- nfs_stack_destroy (frame);
+ preparent, postparent, xdata);
+ nfs_stack_destroy (nfl, frame);
return 0;
}
@@ -718,18 +977,20 @@ nfs_fop_symlink (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, char *target,
if ((!nfsx) || (!xl) || (!pathloc) || (!target) || (!nfu))
return ret;
- gf_log (GF_NFS, GF_LOG_TRACE, "Symlink: %s", pathloc->path);
- nfs_fop_handle_frame_create (frame, xl, nfu, ret, err);
+ gf_msg_trace (GF_NFS, 0, "Symlink: %s", pathloc->path);
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
nfs_fop_save_root_ino (nfl, pathloc);
+ nfs_fop_gfid_setup (nfl, pathloc->inode, ret, err);
- STACK_WIND_COOKIE (frame, nfs_fop_symlink_cbk, nfsx, xl,
- xl->fops->symlink, target, pathloc);
+ STACK_WIND_COOKIE (frame, nfs_fop_symlink_cbk, xl, xl,
+ xl->fops->symlink, target, pathloc,
+ 0, nfl->dictgfid);
ret = 0;
err:
if (ret < 0) {
if (frame)
- nfs_stack_destroy (frame);
+ nfs_stack_destroy (nfl, frame);
}
return ret;
@@ -739,16 +1000,17 @@ err:
int32_t
nfs_fop_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, const char *path,
- struct iatt *buf)
+ struct iatt *buf, dict_t *xdata)
{
struct nfs_fop_local *nfl = NULL;
fop_readlink_cbk_t progcbk = NULL;
nfl_to_prog_data (nfl, progcbk, frame);
- nfs_fop_restore_root_ino (nfl, buf, NULL, NULL, NULL);
+ nfs_fop_restore_root_ino (nfl, op_ret, buf, NULL, NULL, NULL);
if (progcbk)
- progcbk (frame, cookie, this, op_ret, op_errno, path, buf);
- nfs_stack_destroy (frame);
+ progcbk (frame, cookie, this, op_ret, op_errno, path, buf,
+ xdata);
+ nfs_stack_destroy (nfl, frame);
return 0;
}
@@ -764,18 +1026,18 @@ nfs_fop_readlink (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
if ((!nfsx) || (!xl) || (!pathloc) || (!nfu))
return ret;
- gf_log (GF_NFS, GF_LOG_TRACE, "Readlink: %s", pathloc->path);
- nfs_fop_handle_frame_create (frame, xl, nfu, ret, err);
+ gf_msg_trace (GF_NFS, 0, "Readlink: %s", pathloc->path);
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
nfs_fop_save_root_ino (nfl, pathloc);
- STACK_WIND_COOKIE (frame, nfs_fop_readlink_cbk, nfsx, xl,
- xl->fops->readlink, pathloc, size);
+ STACK_WIND_COOKIE (frame, nfs_fop_readlink_cbk, xl, xl,
+ xl->fops->readlink, pathloc, size, NULL);
ret = 0;
err:
if (ret < 0) {
if (frame)
- nfs_stack_destroy (frame);
+ nfs_stack_destroy (nfl, frame);
}
return ret;
@@ -786,17 +1048,21 @@ int32_t
nfs_fop_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
struct nfs_fop_local *nfl = NULL;
fop_mknod_cbk_t progcbk = NULL;
+ if (op_ret == 0) {
+ nfs_fix_generation(this,inode);
+ }
+
nfl_to_prog_data (nfl, progcbk, frame);
- nfs_fop_restore_root_ino (nfl, buf, NULL, preparent, postparent);
+ nfs_fop_restore_root_ino (nfl, op_ret,buf, NULL, preparent, postparent);
if (progcbk)
progcbk (frame, cookie, this, op_ret, op_errno, inode, buf,
- preparent, postparent);
- nfs_stack_destroy (frame);
+ preparent, postparent, xdata);
+ nfs_stack_destroy (nfl, frame);
return 0;
}
@@ -812,18 +1078,19 @@ nfs_fop_mknod (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
if ((!nfsx) || (!xl) || (!pathloc) || (!nfu))
return ret;
- gf_log (GF_NFS, GF_LOG_TRACE, "Mknod: %s", pathloc->path);
- nfs_fop_handle_frame_create (frame, xl, nfu, ret, err);
+ gf_msg_trace (GF_NFS, 0, "Mknod: %s", pathloc->path);
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
nfs_fop_save_root_ino (nfl, pathloc);
+ nfs_fop_gfid_setup (nfl, pathloc->inode, ret, err);
- STACK_WIND_COOKIE (frame, nfs_fop_mknod_cbk, nfsx, xl, xl->fops->mknod,
- pathloc, mode, dev);
+ STACK_WIND_COOKIE (frame, nfs_fop_mknod_cbk, xl, xl, xl->fops->mknod,
+ pathloc, mode, dev, 0, nfl->dictgfid);
ret = 0;
err:
if (ret < 0) {
if (frame)
- nfs_stack_destroy (frame);
+ nfs_stack_destroy (nfl, frame);
}
return ret;
@@ -832,17 +1099,18 @@ err:
int32_t
nfs_fop_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
struct nfs_fop_local *nfl = frame->local;
fop_rmdir_cbk_t progcbk = NULL;
nfl_to_prog_data (nfl, progcbk, frame);
- nfs_fop_restore_root_ino (nfl, NULL, NULL, preparent, postparent);
+ nfs_fop_restore_root_ino (nfl, op_ret, NULL, NULL, preparent,
+ postparent);
if (progcbk)
progcbk (frame, cookie, this, op_ret, op_errno, preparent,
- postparent);
- nfs_stack_destroy (frame);
+ postparent, NULL);
+ nfs_stack_destroy (nfl, frame);
return 0;
}
@@ -859,18 +1127,18 @@ nfs_fop_rmdir (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
if ((!nfsx) || (!xl) || (!pathloc) || (!nfu))
return ret;
- gf_log (GF_NFS, GF_LOG_TRACE, "Rmdir: %s", pathloc->path);
- nfs_fop_handle_frame_create (frame, xl, nfu, ret, err);
+ gf_msg_trace (GF_NFS, 0, "Rmdir: %s", pathloc->path);
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
nfs_fop_save_root_ino (nfl, pathloc);
- STACK_WIND_COOKIE (frame, nfs_fop_rmdir_cbk, nfsx, xl, xl->fops->rmdir,
- pathloc);
+ STACK_WIND_COOKIE (frame, nfs_fop_rmdir_cbk, xl, xl, xl->fops->rmdir,
+ pathloc, 0, NULL);
ret = 0;
err:
if (ret < 0) {
if (frame)
- nfs_stack_destroy (frame);
+ nfs_stack_destroy (nfl, frame);
}
return ret;
@@ -881,17 +1149,18 @@ err:
int32_t
nfs_fop_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
struct nfs_fop_local *nfl = frame->local;
fop_unlink_cbk_t progcbk = NULL;
nfl_to_prog_data (nfl, progcbk, frame);
- nfs_fop_restore_root_ino (nfl, NULL, NULL, preparent, postparent);
+ nfs_fop_restore_root_ino (nfl, op_ret, NULL, NULL, preparent,
+ postparent);
if (progcbk)
progcbk (frame, cookie, this, op_ret, op_errno, preparent,
- postparent);
- nfs_stack_destroy (frame);
+ postparent, xdata);
+ nfs_stack_destroy (nfl, frame);
return 0;
}
@@ -907,18 +1176,18 @@ nfs_fop_unlink (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
if ((!nfsx) || (!xl) || (!pathloc) || (!nfu))
return ret;
- gf_log (GF_NFS, GF_LOG_TRACE, "Unlink: %s", pathloc->path);
- nfs_fop_handle_frame_create (frame, xl, nfu, ret, err);
+ gf_msg_trace (GF_NFS, 0, "Unlink: %s", pathloc->path);
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
nfs_fop_save_root_ino (nfl, pathloc);
- STACK_WIND_COOKIE (frame, nfs_fop_unlink_cbk, nfsx, xl,
- xl->fops->unlink, pathloc);
+ STACK_WIND_COOKIE (frame, nfs_fop_unlink_cbk, xl, xl,
+ xl->fops->unlink, pathloc, 0, NULL);
ret = 0;
err:
if (ret < 0) {
if (frame)
- nfs_stack_destroy (frame);
+ nfs_stack_destroy (nfl, frame);
}
return ret;
@@ -930,18 +1199,23 @@ int32_t
nfs_fop_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
struct nfs_fop_local *nfl = NULL;
fop_link_cbk_t progcbk = NULL;
+ if (op_ret == 0) {
+ nfs_fix_generation(this,inode);
+ }
+
nfl_to_prog_data (nfl, progcbk, frame);
- nfs_fop_restore_root_ino (nfl, buf, NULL, preparent, postparent);
+ nfs_fop_restore_root_ino (nfl, op_ret, buf, NULL, preparent,
+ postparent);
if (progcbk)
progcbk (frame, cookie, this, op_ret, op_errno, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
- nfs_stack_destroy (frame);
+ nfs_stack_destroy (nfl, frame);
return 0;
}
@@ -957,19 +1231,19 @@ nfs_fop_link (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *oldloc,
if ((!nfsx) || (!xl) || (!oldloc) || (!newloc) || (!nfu))
return ret;
- gf_log (GF_NFS, GF_LOG_TRACE, "Link: %s -> %s", newloc->path,
- oldloc->path);
- nfs_fop_handle_frame_create (frame, xl, nfu, ret, err);
+ gf_msg_trace (GF_NFS, 0, "Link: %s -> %s", newloc->path,
+ oldloc->path);
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
nfs_fop_save_root_ino (nfl, newloc);
- STACK_WIND_COOKIE (frame, nfs_fop_link_cbk, nfsx, xl, xl->fops->link,
- oldloc, newloc);
+ STACK_WIND_COOKIE (frame, nfs_fop_link_cbk, xl, xl, xl->fops->link,
+ oldloc, newloc, NULL);
ret = 0;
err:
if (ret < 0) {
if (frame)
- nfs_stack_destroy (frame);
+ nfs_stack_destroy (nfl, frame);
}
return ret;
@@ -980,7 +1254,8 @@ int32_t
nfs_fop_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
struct nfs_fop_local *nfl = NULL;
@@ -989,16 +1264,17 @@ nfs_fop_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
nfl_to_prog_data (nfl, progcbk, frame);
/* The preattr arg needs to be NULL instead of @buf because it is
* possible that the new parent is not root whereas the source dir
- * could've been. That is handled in the next macro.
+ * could have been. That is handled in the next macro.
*/
- nfs_fop_restore_root_ino (nfl, NULL, NULL, preoldparent, postoldparent);
- nfs_fop_newloc_restore_root_ino (nfl, buf, NULL, prenewparent,
+ nfs_fop_restore_root_ino (nfl, op_ret, NULL, NULL, preoldparent,
+ postoldparent);
+ nfs_fop_newloc_restore_root_ino (nfl, op_ret, buf, NULL, prenewparent,
postnewparent);
if (progcbk)
progcbk (frame, cookie, this, op_ret, op_errno, buf,
preoldparent, postoldparent, prenewparent,
- postnewparent);
- nfs_stack_destroy (frame);
+ postnewparent, xdata);
+ nfs_stack_destroy (nfl, frame);
return 0;
}
@@ -1014,20 +1290,20 @@ nfs_fop_rename (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *oldloc,
if ((!nfsx) || (!xl) || (!oldloc) || (!newloc) || (!nfu))
return ret;
- gf_log (GF_NFS, GF_LOG_TRACE, "Rename: %s -> %s", oldloc->path,
- newloc->path);
- nfs_fop_handle_frame_create (frame, xl, nfu, ret, err);
+ gf_msg_trace (GF_NFS, 0, "Rename: %s -> %s", oldloc->path,
+ newloc->path);
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
nfs_fop_save_root_ino (nfl, oldloc);
nfs_fop_newloc_save_root_ino (nfl, newloc);
- STACK_WIND_COOKIE (frame, nfs_fop_rename_cbk, nfsx, xl,
- xl->fops->rename, oldloc, newloc);
+ STACK_WIND_COOKIE (frame, nfs_fop_rename_cbk, xl, xl,
+ xl->fops->rename, oldloc, newloc, NULL);
ret = 0;
err:
if (ret < 0) {
if (frame)
- nfs_stack_destroy (frame);
+ nfs_stack_destroy (nfl, frame);
}
return ret;
@@ -1036,22 +1312,22 @@ err:
int32_t
nfs_fop_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
struct nfs_fop_local *nfl = NULL;
fop_open_cbk_t progcbk = NULL;
nfl_to_prog_data (nfl, progcbk, frame);
if (progcbk)
- progcbk (frame, cookie, this, op_ret, op_errno, fd);
- nfs_stack_destroy (frame);
+ progcbk (frame, cookie, this, op_ret, op_errno, fd, xdata);
+ nfs_stack_destroy (nfl, frame);
return 0;
}
int
nfs_fop_open (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *loc,
- int32_t flags, fd_t *fd, int32_t wbflags, fop_open_cbk_t cbk,
+ int32_t flags, fd_t *fd, fop_open_cbk_t cbk,
void *local)
{
call_frame_t *frame = NULL;
@@ -1061,17 +1337,17 @@ nfs_fop_open (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *loc,
if ((!nfsx) || (!xl) || (!loc) || (!fd) || (!nfu))
return ret;
- gf_log (GF_NFS, GF_LOG_TRACE, "Open: %s", loc->path);
- nfs_fop_handle_frame_create (frame, xl, nfu, ret, err);
+ gf_msg_trace (GF_NFS, 0, "Open: %s", loc->path);
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
- STACK_WIND_COOKIE (frame, nfs_fop_open_cbk, nfsx, xl, xl->fops->open,
- loc, flags, fd, wbflags);
+ STACK_WIND_COOKIE (frame, nfs_fop_open_cbk, xl, xl, xl->fops->open,
+ loc, flags, fd, NULL);
ret = 0;
err:
if (ret < 0) {
if (frame)
- nfs_stack_destroy (frame);
+ nfs_stack_destroy (nfl, frame);
}
return ret;
@@ -1081,17 +1357,18 @@ err:
int32_t
nfs_fop_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
struct nfs_fop_local *nfl = NULL;
fop_writev_cbk_t progcbk = NULL;
nfl_to_prog_data (nfl, progcbk, frame);
- nfs_fop_restore_root_ino (nfl, prebuf, postbuf, NULL, NULL);
+ nfs_fop_restore_root_ino (nfl, op_ret, prebuf, postbuf, NULL, NULL);
if (progcbk)
- progcbk (frame, cookie, this, op_ret, op_errno, prebuf,postbuf);
+ progcbk (frame, cookie, this, op_ret, op_errno, prebuf,
+ postbuf, xdata);
- nfs_stack_destroy (frame);
+ nfs_stack_destroy (nfl, frame);
return 0;
}
@@ -1099,20 +1376,22 @@ nfs_fop_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
nfs_fop_write (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
- struct iobuf *srciob, struct iovec *vector, int32_t count,
+ struct iobref *srciobref, struct iovec *vector, int32_t count,
off_t offset, fop_writev_cbk_t cbk, void *local)
{
call_frame_t *frame = NULL;
int ret = -EFAULT;
struct nfs_fop_local *nfl = NULL;
+ int flags = 0;
+ nfs3_call_state_t *cs = local;
- if ((!nfsx) || (!xl) || (!fd) || (!vector) || (!nfu) || (!srciob))
+ if ((!nfsx) || (!xl) || (!fd) || (!vector) || (!nfu) || (!srciobref))
return ret;
- nfs_fop_handle_frame_create (frame, xl, nfu, ret, err);
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
nfs_fop_save_root_fd_ino (nfl, fd);
-
+/*
nfl->iobref = iobref_new ();
if (!nfl->iobref) {
gf_log (GF_NFS, GF_LOG_ERROR, "iobref creation failed");
@@ -1121,14 +1400,26 @@ nfs_fop_write (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
}
iobref_add (nfl->iobref, srciob);
- STACK_WIND_COOKIE (frame, nfs_fop_writev_cbk, nfsx, xl,xl->fops->writev
- , fd, vector, count, offset, nfl->iobref);
+*/
+
+ switch (cs->writetype) {
+ case UNSTABLE:
+ break;
+ case DATA_SYNC:
+ flags |= O_DSYNC;
+ break;
+ case FILE_SYNC:
+ flags |= O_SYNC;
+ break;
+ }
+
+ STACK_WIND_COOKIE (frame, nfs_fop_writev_cbk, xl, xl,xl->fops->writev,
+ fd, vector, count, offset, flags, srciobref, NULL);
ret = 0;
err:
if (ret < 0) {
if (frame)
- nfs_stack_destroy (frame);
- nfs_fop_local_wipe (nfsx, nfl);
+ nfs_stack_destroy (nfl, frame);
}
return ret;
@@ -1138,16 +1429,18 @@ err:
int32_t
nfs_fop_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
struct nfs_fop_local *nfl = NULL;
fop_fsync_cbk_t progcbk = NULL;
nfl_to_prog_data (nfl, progcbk, frame);
- nfs_fop_restore_root_ino (nfl, prebuf, postbuf, NULL, NULL);
+ nfs_fop_restore_root_ino (nfl, op_ret, prebuf, postbuf, NULL, NULL);
if (progcbk)
- progcbk (frame, cookie, this, op_ret, op_errno, prebuf,postbuf);
- nfs_stack_destroy (frame);
+ progcbk (frame, cookie, this, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+
+ nfs_stack_destroy (nfl, frame);
return 0;
}
@@ -1164,17 +1457,17 @@ nfs_fop_fsync (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
if ((!nfsx) || (!xl) || (!fd))
return ret;
- nfs_fop_handle_frame_create (frame, xl, nfu, ret, err);
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
nfs_fop_save_root_fd_ino (nfl, fd);
- STACK_WIND_COOKIE (frame, nfs_fop_fsync_cbk, nfsx, xl,
- xl->fops->fsync, fd, datasync);
+ STACK_WIND_COOKIE (frame, nfs_fop_fsync_cbk, xl, xl,
+ xl->fops->fsync, fd, datasync, NULL);
ret = 0;
err:
if (ret < 0) {
if (frame)
- nfs_stack_destroy (frame);
+ nfs_stack_destroy (nfl, frame);
}
return ret;
@@ -1184,18 +1477,19 @@ err:
int32_t
nfs_fop_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *stbuf, struct iobref *iobref)
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
{
struct nfs_fop_local *nfl = NULL;
fop_readv_cbk_t progcbk = NULL;
nfl_to_prog_data (nfl, progcbk, frame);
- nfs_fop_restore_root_ino (nfl, stbuf, NULL, NULL, NULL);
+ nfs_fop_restore_root_ino (nfl, op_ret, stbuf, NULL, NULL, NULL);
if (progcbk)
progcbk (frame, cookie, this, op_ret, op_errno, vector, count,
- stbuf, iobref);
+ stbuf, iobref, xdata);
- nfs_stack_destroy (frame);
+ nfs_stack_destroy (nfl, frame);
return 0;
}
@@ -1211,17 +1505,159 @@ nfs_fop_read (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
if ((!xl) || (!fd) || (!nfu))
return ret;
- nfs_fop_handle_frame_create (frame, xl, nfu, ret, err);
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
nfs_fop_save_root_fd_ino (nfl, fd);
- STACK_WIND_COOKIE (frame, nfs_fop_readv_cbk, nfsx, xl, xl->fops->readv,
- fd, size, offset);
+ STACK_WIND_COOKIE (frame, nfs_fop_readv_cbk, xl, xl, xl->fops->readv,
+ fd, size, offset, 0, NULL);
+ ret = 0;
+err:
+ if (ret < 0) {
+ if (frame)
+ nfs_stack_destroy (nfl, frame);
+ }
+
+ return ret;
+}
+
+int32_t
+nfs_fop_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *flock,
+ dict_t *xdata)
+{
+ struct nfs_fop_local *nfl = NULL;
+ fop_lk_cbk_t progcbk = NULL;
+
+ nfl_to_prog_data (nfl, progcbk, frame);
+
+ if (!op_ret)
+ fd_lk_insert_and_merge (nfl->fd, nfl->cmd, &nfl->flock);
+
+ fd_unref (nfl->fd);
+
+ if (progcbk)
+ progcbk (frame, cookie, this, op_ret, op_errno, flock, xdata);
+
+ nfs_stack_destroy (nfl, frame);
+ return 0;
+}
+
+
+int
+nfs_fop_lk (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
+ int cmd, struct gf_flock *flock, fop_lk_cbk_t cbk, void *local)
+{
+ call_frame_t *frame = NULL;
+ int ret = -EFAULT;
+ struct nfs_fop_local *nfl = NULL;
+
+ if ((!xl) || (!fd) || (!nfu))
+ return ret;
+
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
+ nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
+
+ nfl->cmd = cmd;
+ nfl->fd = fd_ref (fd);
+ nfl->flock = *flock;
+
+ STACK_WIND_COOKIE (frame, nfs_fop_lk_cbk, xl, xl, xl->fops->lk,
+ fd, cmd, flock, NULL);
+ ret = 0;
+err:
+ if (ret < 0) {
+ if (frame)
+ nfs_stack_destroy (nfl, frame);
+ }
+
+ return ret;
+}
+
+int32_t
+nfs_fop_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ struct nfs_fop_local *nfl = NULL;
+ fop_getxattr_cbk_t progcbk = NULL;
+
+ nfl_to_prog_data (nfl, progcbk, frame);
+
+ if (progcbk)
+ progcbk (frame, cookie, this, op_ret, op_errno, dict, xdata);
+
+ nfs_stack_destroy (nfl, frame);
+ return 0;
+}
+
+
+int
+nfs_fop_getxattr (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *loc,
+ char *name, dict_t *xdata, fop_getxattr_cbk_t cbk, void *local)
+{
+ call_frame_t *frame = NULL;
+ int ret = -EFAULT;
+ struct nfs_fop_local *nfl = NULL;
+
+ if ((!xl) || (!loc) || (!nfu))
+ return ret;
+
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
+ nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
+
+ STACK_WIND_COOKIE (frame, nfs_fop_getxattr_cbk, xl, xl, xl->fops->getxattr,
+ loc, name, NULL);
+ ret = 0;
+err:
+ if (ret < 0) {
+ if (frame)
+ nfs_stack_destroy (nfl, frame);
+ }
+
+ return ret;
+}
+
+
+int32_t
+nfs_fop_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ struct nfs_fop_local *nfl = NULL;
+ fop_setxattr_cbk_t progcbk = NULL;
+
+ nfl_to_prog_data (nfl, progcbk, frame);
+
+ if (progcbk)
+ progcbk (frame, cookie, this, op_ret, op_errno, xdata);
+
+ nfs_stack_destroy (nfl, frame);
+ return 0;
+}
+
+
+int
+nfs_fop_setxattr (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu,
+ loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata,
+ fop_setxattr_cbk_t cbk, void *local)
+{
+ call_frame_t *frame = NULL;
+ int ret = -EFAULT;
+ struct nfs_fop_local *nfl = NULL;
+
+ if ((!xl) || (!loc) || (!nfu))
+ return ret;
+
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
+ nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
+
+ STACK_WIND_COOKIE (frame, nfs_fop_setxattr_cbk, xl, xl, xl->fops->setxattr,
+ loc, dict, flags, xdata);
ret = 0;
err:
if (ret < 0) {
if (frame)
- nfs_stack_destroy (frame);
+ nfs_stack_destroy (nfl, frame);
}
return ret;
@@ -1231,17 +1667,18 @@ err:
int32_t
nfs_fop_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
struct nfs_fop_local *nfl = NULL;
fop_truncate_cbk_t progcbk = NULL;
nfl_to_prog_data (nfl, progcbk, frame);
- nfs_fop_restore_root_ino (nfl, prebuf, postbuf, NULL, NULL);
+ nfs_fop_restore_root_ino (nfl, op_ret, prebuf, postbuf, NULL, NULL);
if (progcbk)
- progcbk (frame, cookie, this, op_ret, op_errno, prebuf,postbuf);
+ progcbk (frame, cookie, this, op_ret, op_errno, prebuf,
+ postbuf, xdata);
- nfs_stack_destroy (frame);
+ nfs_stack_destroy (nfl, frame);
return 0;
}
@@ -1257,18 +1694,18 @@ nfs_fop_truncate (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *loc,
if ((!nfsx) || (!xl) || (!loc) || (!nfu))
return ret;
- nfs_fop_handle_frame_create (frame, xl, nfu, ret, err);
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
nfs_fop_save_root_ino (nfl, loc);
- STACK_WIND_COOKIE (frame, nfs_fop_truncate_cbk, nfsx, xl,
- xl->fops->truncate, loc, offset);
+ STACK_WIND_COOKIE (frame, nfs_fop_truncate_cbk, xl, xl,
+ xl->fops->truncate, loc, offset, NULL);
ret = 0;
err:
if (ret < 0) {
if (frame)
- nfs_stack_destroy (frame);
+ nfs_stack_destroy (nfl, frame);
}
return ret;
diff --git a/xlators/nfs/server/src/nfs-fops.h b/xlators/nfs/server/src/nfs-fops.h
index 24fa0b99b2d..e511e8ac1de 100644
--- a/xlators/nfs/server/src/nfs-fops.h
+++ b/xlators/nfs/server/src/nfs-fops.h
@@ -1,30 +1,16 @@
/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _NFS_FOPS_H_
#define _NFS_FOPS_H_
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "dict.h"
#include "xlator.h"
#include "iobuf.h"
@@ -32,6 +18,7 @@
#include "stack.h"
#include "nfs.h"
#include "nfs-common.h"
+#include "nfs-messages.h"
#include <semaphore.h>
/* This structure used to communicate state between a fop and its callback.
@@ -96,9 +83,14 @@ struct nfs_fop_local {
*/
int rootparentinode;
- char path[NFS_NAME_MAX];
- char newpath[NFS_NAME_MAX];
+ char path[NFS_NAME_MAX + 1];
+ char newpath[NFS_NAME_MAX + 1];
xlator_t *nfsx;
+ dict_t *dictgfid;
+
+ fd_t *fd;
+ int cmd;
+ struct gf_flock flock;
};
extern struct nfs_fop_local *
@@ -115,7 +107,7 @@ nfs_fop_local_wipe (xlator_t *xl, struct nfs_fop_local *l);
nflocal = nfs_fop_local_init (nf); \
if (nflocal) { \
nflocal->proglocal = plocal; \
- nflocal->progcbk = pcbk; \
+ nflocal->progcbk = *VOID(&pcbk); \
nflocal->nfsx = nf; \
if (fram) \
((call_frame_t *)fram)->local = nflocal;\
@@ -129,14 +121,14 @@ nfs_fop_local_wipe (xlator_t *xl, struct nfs_fop_local *l);
nflocal = fram->local; \
fram->local = nflocal->proglocal; \
pcbk = nflocal->progcbk; \
- nfs_fop_local_wipe (nflocal->nfsx, nflocal); \
} while (0) \
#define nfs_fop_handle_local_init(fram,nfx, nfloc, cbck,prgloc,retval,lab) \
do { \
prog_data_to_nfl (nfx, nfloc, fram, cbck, prgloc); \
if (!nfloc) { \
- gf_log (GF_NFS,GF_LOG_ERROR,"Failed to init local");\
+ gf_msg (GF_NFS, GF_LOG_ERROR, ENOMEM, \
+ NFS_MSG_NO_MEMORY, "Failed to init local"); \
retval = -ENOMEM; \
goto lab; \
} \
@@ -180,12 +172,12 @@ nfs_fop_fsync (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
extern int
nfs_fop_write (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
- struct iobuf *srciob, struct iovec *vector, int32_t count,
+ struct iobref *srciobref, struct iovec *vector, int32_t count,
off_t offset, fop_writev_cbk_t cbk, void *local);
extern int
nfs_fop_open (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *loc,
- int32_t flags, fd_t *fd, int32_t wbflags, fop_open_cbk_t cbk,
+ int32_t flags, fd_t *fd, fop_open_cbk_t cbk,
void *local);
extern int
@@ -233,4 +225,21 @@ extern int
nfs_fop_stat (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *loc,
fop_stat_cbk_t cbk, void *local);
+extern int
+nfs_fop_access (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *loc,
+ int32_t accesstest, fop_access_cbk_t cbk, void *local);
+
+extern int
+nfs_fop_lk (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
+ int cmd, struct gf_flock *flock, fop_lk_cbk_t cbk, void *local);
+
+extern int
+nfs_fop_getxattr (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *loc,
+ char *name, dict_t *xdata, fop_getxattr_cbk_t cbk, void *local);
+
+extern int
+nfs_fop_setxattr (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu,
+ loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata,
+ fop_setxattr_cbk_t cbk, void *local);
+
#endif
diff --git a/xlators/nfs/server/src/nfs-generics.c b/xlators/nfs/server/src/nfs-generics.c
index b248c28755b..ccfc0ae87c5 100644
--- a/xlators/nfs/server/src/nfs-generics.c
+++ b/xlators/nfs/server/src/nfs-generics.c
@@ -1,27 +1,13 @@
/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "string.h"
@@ -46,6 +32,19 @@ nfs_fstat (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
return ret;
}
+int
+nfs_access (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ int32_t accesstest, fop_access_cbk_t cbk, void *local)
+{
+ int ret = -EFAULT;
+
+ if ((!nfsx) || (!xl) || (!pathloc) || (!nfu))
+ return ret;
+
+ ret = nfs_fop_access (nfsx, xl, nfu, pathloc, accesstest, cbk, local);
+
+ return ret;
+}
int
nfs_stat (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
@@ -138,7 +137,6 @@ nfs_truncate (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
return ret;
}
-
int
nfs_read (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd, size_t size,
off_t offset, fop_readv_cbk_t cbk, void *local)
@@ -146,6 +144,28 @@ nfs_read (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd, size_t size,
return nfs_fop_read (nfsx, xl, nfu, fd, size, offset, cbk, local);
}
+int
+nfs_lk (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
+ int cmd, struct gf_flock *flock, fop_lk_cbk_t cbk, void *local)
+{
+ return nfs_fop_lk ( nfsx, xl, nfu, fd, cmd, flock, cbk, local);
+}
+
+int
+nfs_getxattr (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *loc,
+ char *name, dict_t *xdata, fop_getxattr_cbk_t cbk, void *local)
+{
+ return nfs_fop_getxattr (nfsx, xl, nfu, loc, name, xdata, cbk, local);
+}
+
+int
+nfs_setxattr (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu,
+ loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata,
+ fop_setxattr_cbk_t cbk, void *local)
+{
+ return nfs_fop_setxattr (nfsx, xl, nfu, loc, dict, flags, xdata, cbk,
+ local);
+}
int
nfs_fsync (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
@@ -157,11 +177,11 @@ nfs_fsync (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
int
nfs_write (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
- struct iobuf *srciob, struct iovec *vector, int32_t count,
+ struct iobref *srciobref, struct iovec *vector, int32_t count,
off_t offset, fop_writev_cbk_t cbk, void *local)
{
- return nfs_fop_write (nfsx, xl, nfu, fd, srciob, vector, count, offset,
- cbk, local);
+ return nfs_fop_write (nfsx, xl, nfu, fd, srciobref, vector, count,
+ offset, cbk, local);
}
@@ -174,7 +194,7 @@ nfs_open (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
if ((!nfsx) || (!xl) || (!pathloc) || (!nfu))
return ret;
- ret = nfs_inode_open (nfsx, xl, nfu, pathloc, flags, GF_OPEN_NOWB, cbk,
+ ret = nfs_inode_open (nfsx, xl, nfu, pathloc, flags, cbk,
local);
return ret;
}
@@ -317,3 +337,4 @@ nfs_opendir (xlator_t *nfsx, xlator_t *fopxl, nfs_user_t *nfu, loc_t *pathloc,
return nfs_inode_opendir (nfsx, fopxl, nfu, pathloc, cbk, local);
}
+
diff --git a/xlators/nfs/server/src/nfs-generics.h b/xlators/nfs/server/src/nfs-generics.h
index 55740f7b2f4..c3fb4fca339 100644
--- a/xlators/nfs/server/src/nfs-generics.h
+++ b/xlators/nfs/server/src/nfs-generics.h
@@ -1,30 +1,16 @@
/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _NFS_GENERICS_H_
#define _NFS_GENERICS_H_
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "nfs.h"
#include "xlator.h"
#include "nfs-fops.h"
@@ -43,7 +29,7 @@ struct nfs_direntcache {
* different NFS versions can simply call a standard interface and have fop
* interface dependent functions be handled internally.
* This structure is part of such an abstraction. The fops layer stores any
- * state is requires in the fd. For eg, the dirent cache for a directory fd_t.
+ * state is requires in the fd. E.g. the dirent cache for a directory fd_t.
*/
typedef struct nfs_fop_fdcontext {
pthread_mutex_t lock;
@@ -92,7 +78,7 @@ nfs_fsync (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
extern int
nfs_write (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
- struct iobuf *srciob, struct iovec *vector, int32_t count,
+ struct iobref *srciobref, struct iovec *vector, int32_t count,
off_t offset, fop_writev_cbk_t cbk, void *local);
extern int
@@ -157,4 +143,21 @@ nfs_read_sync (xlator_t *xl, nfs_user_t *nfu, fd_t *fd, size_t size,
extern int
nfs_opendir (xlator_t *nfsx, xlator_t *fopxl, nfs_user_t *nfu, loc_t *pathloc,
fop_opendir_cbk_t cbk, void *local);
+
+extern int
+nfs_access (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ int32_t accesstest, fop_access_cbk_t cbk, void *local);
+extern int
+nfs_lk (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
+ int cmd, struct gf_flock *flock, fop_lk_cbk_t cbk, void *local);
+
+extern int
+nfs_getxattr (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *loc,
+ char *name, dict_t *xdata, fop_getxattr_cbk_t cbk, void *local);
+
+extern int
+nfs_setxattr (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu,
+ loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata,
+ fop_setxattr_cbk_t cbk, void *local);
+
#endif
diff --git a/xlators/nfs/server/src/nfs-inodes.c b/xlators/nfs/server/src/nfs-inodes.c
index 4b8bad71789..0dad30ba10e 100644
--- a/xlators/nfs/server/src/nfs-inodes.c
+++ b/xlators/nfs/server/src/nfs-inodes.c
@@ -1,27 +1,13 @@
/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "string.h"
#include "inode.h"
@@ -29,9 +15,18 @@
#include "nfs-inodes.h"
#include "nfs-fops.h"
#include "xlator.h"
+#include "nfs-messages.h"
#include <libgen.h>
+#define inodes_nfl_to_prog_data(nflocal, pcbk, fram) \
+ do { \
+ nflocal = fram->local; \
+ fram->local = nflocal->proglocal; \
+ *VOID(&pcbk) = nflocal->progcbk; \
+ nfs_fop_local_wipe (nflocal->nfsx, nflocal); \
+ } while (0) \
+
void
nfl_inodes_init (struct nfs_fop_local *nfl, inode_t *inode, inode_t *parent,
inode_t *newparent, const char *name, const char *newname)
@@ -49,10 +44,10 @@ nfl_inodes_init (struct nfs_fop_local *nfl, inode_t *inode, inode_t *parent,
nfl->newparent = inode_ref (newparent);
if (name)
- strcpy (nfl->path, name);
+ strncpy (nfl->path, name, NFS_NAME_MAX);
if (newname)
- strcpy (nfl->newpath, newname);
+ strncpy (nfl->newpath, newname, NFS_NAME_MAX);
return;
}
@@ -62,15 +57,16 @@ int32_t
nfs_inode_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode
, struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
struct nfs_fop_local *nfl = frame->local;
fop_create_cbk_t progcbk = NULL;
+ inode_t *linked_inode = NULL;
if (op_ret == -1)
goto do_not_link;
- inode_link (inode, nfl->parent, nfl->path, buf);
+ linked_inode = inode_link (inode, nfl->parent, nfl->path, buf);
do_not_link:
/* NFS does not need it, upper layers should not expect the pointer to
@@ -78,10 +74,16 @@ do_not_link:
*/
fd_unref (fd);
- nfl_to_prog_data (nfl, progcbk, frame);
+ inodes_nfl_to_prog_data (nfl, progcbk, frame);
if (progcbk)
progcbk (frame, cookie, this, op_ret, op_errno, fd, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
+
+ if (linked_inode) {
+ inode_lookup (linked_inode);
+ inode_unref (linked_inode);
+ }
+
return 0;
}
@@ -102,7 +104,8 @@ nfs_inode_create (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu,
newfd = fd_create (pathloc->inode, 0);
if (!newfd) {
- gf_log (GF_NFS, GF_LOG_ERROR, "Failed to create new fd");
+ gf_msg (GF_NFS, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Failed to create new fd");
ret = -ENOMEM;
goto wipe_nfl;
}
@@ -127,21 +130,27 @@ int32_t
nfs_inode_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
struct nfs_fop_local *nfl = frame->local;
fop_mkdir_cbk_t progcbk = NULL;
+ inode_t *linked_inode = NULL;
if (op_ret == -1)
goto do_not_link;
- inode_link (inode, nfl->parent, nfl->path, buf);
+ linked_inode = inode_link (inode, nfl->parent, nfl->path, buf);
do_not_link:
- nfl_to_prog_data (nfl, progcbk, frame);
+ inodes_nfl_to_prog_data (nfl, progcbk, frame);
if (progcbk)
progcbk (frame, cookie, this, op_ret, op_errno, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
+
+ if (linked_inode) {
+ inode_lookup (linked_inode);
+ inode_unref (linked_inode);
+ }
return 0;
}
@@ -172,7 +181,7 @@ err:
int32_t
nfs_inode_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
struct nfs_fop_local *nfl = NULL;
@@ -186,16 +195,16 @@ nfs_inode_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
/* else
fd_bind (fd);
*/
- nfl_to_prog_data (nfl, progcbk, frame);
+ inodes_nfl_to_prog_data (nfl, progcbk, frame);
if (progcbk)
- progcbk (frame, cookie, this, op_ret, op_errno, fd);
+ progcbk (frame, cookie, this, op_ret, op_errno, fd, xdata);
return 0;
}
int
nfs_inode_open (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *loc,
- int32_t flags, int32_t wbflags, fop_open_cbk_t cbk, void *local)
+ int32_t flags, fop_open_cbk_t cbk, void *local)
{
struct nfs_fop_local *nfl = NULL;
fd_t *newfd = NULL;
@@ -206,13 +215,14 @@ nfs_inode_open (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *loc,
newfd = fd_create (loc->inode, 0);
if (!newfd) {
- gf_log (GF_NFS, GF_LOG_ERROR, "Failed to create fd");
+ gf_msg (GF_NFS, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Failed to create fd");
ret = -ENOMEM;
goto err;
}
nfs_fop_handle_local_init (NULL, nfsx, nfl, cbk, local, ret, fd_err);
- ret = nfs_fop_open (nfsx, xl, nfu, loc, flags, newfd, wbflags,
+ ret = nfs_fop_open (nfsx, xl, nfu, loc, flags, newfd,
nfs_inode_open_cbk, nfl);
if (ret < 0)
@@ -234,7 +244,8 @@ int32_t
nfs_inode_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
struct nfs_fop_local *nfl = NULL;
fop_rename_cbk_t progcbk = NULL;
@@ -247,11 +258,11 @@ nfs_inode_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
nfl->newpath, nfl->inode, buf);
do_not_link:
- nfl_to_prog_data (nfl, progcbk, frame);
+ inodes_nfl_to_prog_data (nfl, progcbk, frame);
if (progcbk)
progcbk (frame, cookie, this, op_ret, op_errno, buf,
preoldparent, postoldparent, prenewparent,
- postnewparent);
+ postnewparent, xdata);
return 0;
}
@@ -284,22 +295,29 @@ int32_t
nfs_inode_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
struct nfs_fop_local *nfl = NULL;
fop_link_cbk_t progcbk = NULL;
+ inode_t *linked_inode = NULL;
if (op_ret == -1)
goto do_not_link;
nfl = frame->local;
- inode_link (inode, nfl->newparent, nfl->path, buf);
+ linked_inode = inode_link (inode, nfl->newparent, nfl->path, buf);
do_not_link:
- nfl_to_prog_data (nfl, progcbk, frame);
+ inodes_nfl_to_prog_data (nfl, progcbk, frame);
if (progcbk)
progcbk (frame, cookie, this, op_ret, op_errno, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
+
+ if (linked_inode) {
+ inode_lookup (linked_inode);
+ inode_unref (linked_inode);
+ }
+
return 0;
}
@@ -330,7 +348,7 @@ err:
int32_t
nfs_inode_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
struct nfs_fop_local *nfl = NULL;
fop_unlink_cbk_t progcbk = NULL;
@@ -341,12 +359,13 @@ nfs_inode_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto do_not_unlink;
inode_unlink (nfl->inode, nfl->parent, nfl->path);
+ inode_forget (nfl->inode, 0);
do_not_unlink:
- nfl_to_prog_data (nfl, progcbk, frame);
+ inodes_nfl_to_prog_data (nfl, progcbk, frame);
if (progcbk)
progcbk (frame, cookie, this, op_ret, op_errno, preparent,
- postparent);
+ postparent, xdata);
return 0;
}
@@ -377,7 +396,7 @@ err:
int32_t
nfs_inode_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
struct nfs_fop_local *nfl = NULL;
fop_rmdir_cbk_t progcbk = NULL;
@@ -388,12 +407,13 @@ nfs_inode_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto do_not_unlink;
inode_unlink (nfl->inode, nfl->parent, nfl->path);
+ inode_forget (nfl->inode, 0);
do_not_unlink:
- nfl_to_prog_data (nfl, progcbk, frame);
+ inodes_nfl_to_prog_data (nfl, progcbk, frame);
if (progcbk)
progcbk (frame, cookie, this, op_ret, op_errno, preparent,
- postparent);
+ postparent, xdata);
return 0;
}
@@ -426,23 +446,30 @@ int32_t
nfs_inode_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
struct nfs_fop_local *nfl = NULL;
fop_mknod_cbk_t progcbk = NULL;
+ inode_t *linked_inode = NULL;
nfl = frame->local;
if (op_ret == -1)
goto do_not_link;
- inode_link (inode, nfl->parent, nfl->path, buf);
+ linked_inode = inode_link (inode, nfl->parent, nfl->path, buf);
do_not_link:
- nfl_to_prog_data (nfl, progcbk, frame);
+ inodes_nfl_to_prog_data (nfl, progcbk, frame);
if (progcbk)
progcbk (frame, cookie, this, op_ret, op_errno, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
+
+ if (linked_inode) {
+ inode_lookup (linked_inode);
+ inode_unref (linked_inode);
+ }
+
return 0;
}
@@ -476,22 +503,28 @@ int32_t
nfs_inode_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
struct nfs_fop_local *nfl = NULL;
fop_symlink_cbk_t progcbk = NULL;
+ inode_t *linked_inode = NULL;
nfl = frame->local;
if (op_ret == -1)
goto do_not_link;
- inode_link (inode, nfl->parent, nfl->path, buf);
+ linked_inode = inode_link (inode, nfl->parent, nfl->path, buf);
do_not_link:
- nfl_to_prog_data (nfl, progcbk, frame);
+ inodes_nfl_to_prog_data (nfl, progcbk, frame);
if (progcbk)
progcbk (frame, cookie, this, op_ret, op_errno, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
+
+ if (linked_inode) {
+ inode_lookup (linked_inode);
+ inode_unref (linked_inode);
+ }
return 0;
}
@@ -522,21 +555,20 @@ err:
int32_t
nfs_inode_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
struct nfs_fop_local *nfl = NULL;
fop_open_cbk_t progcbk = NULL;
- if ((op_ret == -1) && (fd))
- fd_unref (fd);
- else
+ if (op_ret != -1)
fd_bind (fd);
- nfl_to_prog_data (nfl, progcbk, frame);
+ inodes_nfl_to_prog_data (nfl, progcbk, frame);
if (progcbk)
- progcbk (frame, cookie, this, op_ret, op_errno, fd);
+ progcbk (frame, cookie, this, op_ret, op_errno, fd, xdata);
+
return 0;
}
@@ -554,7 +586,8 @@ nfs_inode_opendir (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *loc,
newfd = fd_create (loc->inode, 0);
if (!newfd) {
- gf_log (GF_NFS, GF_LOG_ERROR, "Failed to create fd");
+ gf_msg (GF_NFS, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Failed to create fd");
ret = -ENOMEM;
goto err;
}
diff --git a/xlators/nfs/server/src/nfs-inodes.h b/xlators/nfs/server/src/nfs-inodes.h
index 51a23faef27..9ec94f2befe 100644
--- a/xlators/nfs/server/src/nfs-inodes.h
+++ b/xlators/nfs/server/src/nfs-inodes.h
@@ -1,30 +1,16 @@
/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _NFS_INODES_H_
#define _NFS_INODES_H_
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "dict.h"
#include "xlator.h"
#include "iobuf.h"
@@ -48,7 +34,7 @@ nfs_inode_mkdir (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
extern int
nfs_inode_open (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *loc,
- int32_t flags, int32_t wbflags, fop_open_cbk_t cbk,
+ int32_t flags, fop_open_cbk_t cbk,
void *local);
extern int
@@ -80,6 +66,6 @@ nfs_inode_mknod (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
mode_t mode, dev_t dev, fop_mknod_cbk_t cbk, void *local);
extern int
-nfs_inode_lookup (xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+nfs_inode_lookup (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
fop_lookup_cbk_t cbk, void *local);
#endif
diff --git a/xlators/nfs/server/src/nfs-mem-types.h b/xlators/nfs/server/src/nfs-mem-types.h
index 118ee2d23b0..88c688f74f3 100644
--- a/xlators/nfs/server/src/nfs-mem-types.h
+++ b/xlators/nfs/server/src/nfs-mem-types.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -37,9 +28,28 @@ enum gf_nfs_mem_types_ {
gf_nfs_mt_entry3,
gf_nfs_mt_entryp3,
gf_nfs_mt_nfs3_fd_entry,
+ gf_nfs_mt_nfs3_fh,
gf_nfs_mt_nfs_initer_list,
gf_nfs_mt_xlator_t,
gf_nfs_mt_list_head,
+ gf_nfs_mt_mnt3_resolve,
+ gf_nfs_mt_mnt3_export,
+ gf_nfs_mt_mnt3_auth_params,
+ gf_nfs_mt_int,
+ gf_nfs_mt_mountres3,
+ gf_nfs_mt_mountstat3,
+ gf_nfs_mt_inode_q,
+ gf_nfs_mt_nlm4_state,
+ gf_nfs_mt_nlm4_cm,
+ gf_nfs_mt_nlm4_fde,
+ gf_nfs_mt_nlm4_nlmclnt,
+ gf_nfs_mt_nlm4_share,
+ gf_nfs_mt_aux_gids,
+ gf_nfs_mt_inode_ctx,
+ gf_nfs_mt_auth_spec,
+ gf_nfs_mt_arr,
+ gf_nfs_mt_auth_cache,
+ gf_nfs_mt_auth_cache_entry,
gf_nfs_mt_end
};
#endif
diff --git a/xlators/nfs/server/src/nfs-messages.h b/xlators/nfs/server/src/nfs-messages.h
new file mode 100644
index 00000000000..b3d134d11be
--- /dev/null
+++ b/xlators/nfs/server/src/nfs-messages.h
@@ -0,0 +1,1669 @@
+/*Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _NFS_MESSAGES_H_
+#define _NFS_MESSAGES_H_
+
+#include "glfs-message-id.h"
+
+/*! \file nfs-messages.h
+ * \brief NFS log-message IDs and their descriptions
+ *
+ */
+
+/* NOTE: Rules for message additions
+ * 1) Each instance of a message is _better_ left with a unique message ID, even
+ * if the message format is the same. Reasoning is that, if the message
+ * format needs to change in one instance, the other instances are not
+ * impacted or the new change does not change the ID of the instance being
+ * modified.
+ * 2) Addition of a message,
+ * - Should increment the GLFS_NUM_MESSAGES
+ * - Append to the list of messages defined, towards the end
+ * - Retain macro naming as glfs_msg_X (for redability across developers)
+ * NOTE: Rules for message format modifications
+ * 3) Check acorss the code if the message ID macro in question is reused
+ * anywhere. If reused then then the modifications should ensure correctness
+ * everywhere, or needs a new message ID as (1) above was not adhered to. If
+ * not used anywhere, proceed with the required modification.
+ * NOTE: Rules for message deletion
+ * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used
+ * anywhere, then can be deleted, but will leave a hole by design, as
+ * addition rules specify modification to the end of the list and not filling
+ * holes.
+ */
+
+#define GLFS_NFS_BASE GLFS_MSGID_COMP_NFS
+#define GLFS_NFS_NUM_MESSAGES 202
+#define GLFS_MSGID_END (GLFS_NFS_BASE + GLFS_NFS_NUM_MESSAGES + 1)
+
+/* Messages with message IDs */
+#define glfs_msg_start_x GLFS_NFS_BASE, "Invalid: Start of messages"
+
+/*------------*/
+
+#define NFS_MSG_UNUSED_1 (GLFS_NFS_BASE + 1)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_UNUSED_2 (GLFS_NFS_BASE + 2)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_INVALID_ENTRY (GLFS_NFS_BASE + 3)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_INODE_LOC_FILL_ERROR (GLFS_NFS_BASE + 4)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_HARD_RESOLVE_FAIL (GLFS_NFS_BASE + 5)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_ARGS_DECODE_ERROR (GLFS_NFS_BASE + 6)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_LOOKUP_PROC_FAIL (GLFS_NFS_BASE + 7)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_UNUSED_8 (GLFS_NFS_BASE + 8)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_UNUSED_9 (GLFS_NFS_BASE + 9)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_READLINK_PROC_FAIL (GLFS_NFS_BASE + 10)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_UNUSED_11 (GLFS_NFS_BASE + 11)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_ANONYMOUS_FD_FAIL (GLFS_NFS_BASE + 12)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define NFS_MSG_READ_FAIL (GLFS_NFS_BASE + 13)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_UNUSED_14 (GLFS_NFS_BASE + 14)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_UNUSED_15 (GLFS_NFS_BASE + 15)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_STATE_WRONG (GLFS_NFS_BASE + 16)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_WRITE_FAIL (GLFS_NFS_BASE + 17)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_UNUSED_18 (GLFS_NFS_BASE + 18)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_UNUSED_19 (GLFS_NFS_BASE + 19)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_UNUSED_20 (GLFS_NFS_BASE + 20)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+
+#define NFS_MSG_CREATE_FAIL (GLFS_NFS_BASE + 21)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_UNUSED_22 (GLFS_NFS_BASE + 22)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_UNUSED_23 (GLFS_NFS_BASE + 23)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_DIR_OP_FAIL (GLFS_NFS_BASE + 24)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_UNUSED_25 (GLFS_NFS_BASE + 25)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_SYMLINK_FAIL (GLFS_NFS_BASE + 26)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_UNUSED_27 (GLFS_NFS_BASE + 27)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_MKNOD_FAIL (GLFS_NFS_BASE + 28)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_OPT_INIT_FAIL (GLFS_NFS_BASE + 29)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+
+#define NFS_MSG_UNUSED_30 (GLFS_NFS_BASE + 30)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_REMOVE_FAIL (GLFS_NFS_BASE + 31)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_RMDIR_CBK (GLFS_NFS_BASE + 32)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_UNUSED_33 (GLFS_NFS_BASE + 33)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_RENAME_FAIL (GLFS_NFS_BASE + 34)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_UNUSED_35 (GLFS_NFS_BASE + 35)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_LINK_FAIL (GLFS_NFS_BASE + 36)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_UNUSED_37 (GLFS_NFS_BASE + 37)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_UNUSED_38 (GLFS_NFS_BASE + 38)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_READDIR_FAIL (GLFS_NFS_BASE + 39)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_READDIRP_FAIL (GLFS_NFS_BASE + 40)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_UNUSED_41 (GLFS_NFS_BASE + 41)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_UNUSED_42 (GLFS_NFS_BASE + 42)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define NFS_MSG_FSTAT_FAIL (GLFS_NFS_BASE + 43)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_UNUSED_44 (GLFS_NFS_BASE + 44)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_FSINFO_FAIL (GLFS_NFS_BASE + 45)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_UNUSED_46 (GLFS_NFS_BASE + 46)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_PATHCONF_FAIL (GLFS_NFS_BASE + 47)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_UNUSED_48 (GLFS_NFS_BASE + 48)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_COMMIT_FAIL (GLFS_NFS_BASE + 49)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_PROT_INIT_ADD_FAIL (GLFS_NFS_BASE + 50)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define NFS_MSG_FORMAT_FAIL (GLFS_NFS_BASE + 51)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_SNPRINTF_FAIL (GLFS_NFS_BASE + 52)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_VOLID_MISSING (GLFS_NFS_BASE + 53)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_PARSE_VOL_UUID_FAIL (GLFS_NFS_BASE + 54)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define NFS_MSG_STR2BOOL_FAIL (GLFS_NFS_BASE + 55)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_SUBVOL_INIT_FAIL (GLFS_NFS_BASE + 56)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_NO_MEMORY (GLFS_NFS_BASE + 57)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define NFS_MSG_LISTENERS_CREATE_FAIL (GLFS_NFS_BASE + 58)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_STATE_INIT_FAIL (GLFS_NFS_BASE + 59)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_RECONF_FAIL (GLFS_NFS_BASE + 60)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define NFS_MSG_RECONF_SUBVOL_FAIL (GLFS_NFS_BASE + 61)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_STR_TOO_LONG (GLFS_NFS_BASE + 62)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_STATE_MISSING (GLFS_NFS_BASE + 63)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_INDEX_NOT_FOUND (GLFS_NFS_BASE + 64)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_EXPORT_ID_FAIL (GLFS_NFS_BASE + 65)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_NO_RW_ACCESS (GLFS_NFS_BASE + 66)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define NFS_MSG_BAD_HANDLE (GLFS_NFS_BASE + 67)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define NFS_MSG_RESOLVE_FH_FAIL (GLFS_NFS_BASE + 68)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define NFS_MSG_RESOLVE_STAT (GLFS_NFS_BASE + 69)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define NFS_MSG_VOL_DISABLE (GLFS_NFS_BASE + 70)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_INIT_CALL_STAT_FAIL (GLFS_NFS_BASE + 71)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_ENCODE_FAIL (GLFS_NFS_BASE + 72)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_SERIALIZE_REPLY_FAIL (GLFS_NFS_BASE + 73)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_SUBMIT_REPLY_FAIL (GLFS_NFS_BASE + 74)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_UNUSED_75 (GLFS_NFS_BASE + 75)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_UNUSED_76 (GLFS_NFS_BASE + 76)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_STAT_FOP_FAIL (GLFS_NFS_BASE + 77)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_GETATTR_FAIL (GLFS_NFS_BASE + 78)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_UNUSED_79 (GLFS_NFS_BASE + 79)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_UNUSED_80 (GLFS_NFS_BASE + 80)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_TIMESTAMP_NO_SYNC (GLFS_NFS_BASE + 81)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_SETATTR_INVALID (GLFS_NFS_BASE + 82)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_SETATTR_FAIL (GLFS_NFS_BASE + 83)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_UNUSED_84 (GLFS_NFS_BASE + 84)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_ACCESS_PROC_FAIL (GLFS_NFS_BASE + 85)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_PGM_NOT_FOUND (GLFS_NFS_BASE + 86)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_PGM_INIT_FAIL (GLFS_NFS_BASE + 87)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_PGM_REG_FAIL (GLFS_NFS_BASE + 88)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_LOOKUP_ROOT_FAIL (GLFS_NFS_BASE + 89)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_ROOT_LOC_INIT_FAIL (GLFS_NFS_BASE + 90)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_STARTUP_FAIL (GLFS_NFS_BASE + 91)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_XLATOR_INIT_FAIL (GLFS_NFS_BASE + 92)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_NFS_MAN_DISABLE (GLFS_NFS_BASE + 93)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_DICT_GET_FAILED (GLFS_NFS_BASE + 94)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_PARSE_FAIL (GLFS_NFS_BASE + 95)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_NLM_MAN_DISABLE (GLFS_NFS_BASE + 96)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_ACL_MAN_DISABLE (GLFS_NFS_BASE + 97)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_DICT_SET_FAILED (GLFS_NFS_BASE + 98)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_INIT_GRP_CACHE_FAIL (GLFS_NFS_BASE + 99)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_NO_PERM (GLFS_NFS_BASE + 100)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_REG_FILE_ERROR (GLFS_NFS_BASE + 101)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_RPC_INIT_FAIL (GLFS_NFS_BASE + 102)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_RPC_CONFIG_FAIL (GLFS_NFS_BASE + 103)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_RECONFIG_PATH (GLFS_NFS_BASE + 104)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_RECONFIG_VALUE (GLFS_NFS_BASE + 105)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_RECONFIG_VOL (GLFS_NFS_BASE + 106)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_NLM_INFO (GLFS_NFS_BASE + 107)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_ACL_INFO (GLFS_NFS_BASE + 108)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_INIT_FAIL (GLFS_NFS_BASE + 109)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_STARTED (GLFS_NFS_BASE + 110)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_VOL_NOT_FOUND (GLFS_NFS_BASE + 111)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_RECONFIG_ENABLE (GLFS_NFS_BASE + 112)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_RECONFIG_FAIL (GLFS_NFS_BASE + 113)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_MNT_STATE_NOT_FOUND (GLFS_NFS_BASE + 114)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_ENCODE_MSG_FAIL (GLFS_NFS_BASE + 115)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_REP_SUBMIT_FAIL (GLFS_NFS_BASE + 116)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_READ_LOCKED (GLFS_NFS_BASE + 117)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_MODIFY_LOCKED (GLFS_NFS_BASE + 118)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_RWTAB_OVERWRITE_FAIL (GLFS_NFS_BASE + 119)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_UPDATE_FAIL (GLFS_NFS_BASE + 120)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_OPEN_FAIL (GLFS_NFS_BASE + 121)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_LOCK_FAIL (GLFS_NFS_BASE + 122)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_REWRITE_ERROR (GLFS_NFS_BASE + 123)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_HASH_PATH_FAIL (GLFS_NFS_BASE + 124)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_LOOKUP_MNT_ERROR (GLFS_NFS_BASE + 125)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_GET_ROOT_INODE_FAIL (GLFS_NFS_BASE + 126)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_RESOLVE_INODE_FAIL (GLFS_NFS_BASE + 127)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_RESOLVE_SUBDIR_FAIL (GLFS_NFS_BASE + 128)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_RESOLVE_SYMLINK_ERROR (GLFS_NFS_BASE + 129)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_RESOLVE_ERROR (GLFS_NFS_BASE + 130)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_UNSUPPORTED_VERSION (GLFS_NFS_BASE + 131)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_AUTH_VERIFY_FAILED (GLFS_NFS_BASE + 132)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_PEER_NOT_ALLOWED (GLFS_NFS_BASE + 133)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+
+#define NFS_MSG_GET_PEER_ADDR_FAIL (GLFS_NFS_BASE + 134)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_BAD_PEER (GLFS_NFS_BASE + 135)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_PEER_TOO_LONG (GLFS_NFS_BASE + 136)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_CALLER_NOT_FOUND (GLFS_NFS_BASE + 137)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_GET_REMOTE_NAME_FAIL (GLFS_NFS_BASE + 138)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_UNKNOWN_MNT_TYPE (GLFS_NFS_BASE + 139)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_PARSE_HOSTSPEC_FAIL (GLFS_NFS_BASE + 140)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_PARSE_AUTH_PARAM_FAIL (GLFS_NFS_BASE + 141)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_SET_EXP_FAIL (GLFS_NFS_BASE + 142)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_INIT_DIR_EXP_FAIL (GLFS_NFS_BASE + 143)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_DIR_EXP_SETUP_FAIL (GLFS_NFS_BASE + 144)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_VOL_INIT_FAIL (GLFS_NFS_BASE + 145)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_AUTH_ERROR (GLFS_NFS_BASE + 146)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_UPDATING_EXP (GLFS_NFS_BASE + 147)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_SET_EXP_AUTH_PARAM_FAIL (GLFS_NFS_BASE + 148)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_UPDATING_NET_GRP (GLFS_NFS_BASE + 149)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_SET_NET_GRP_FAIL (GLFS_NFS_BASE + 150)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_PURGING_AUTH_CACHE (GLFS_NFS_BASE + 151)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_MNT_STATE_INIT_FAIL (GLFS_NFS_BASE + 152)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_EXP_AUTH_DISABLED (GLFS_NFS_BASE + 153)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_FH_TO_VOL_FAIL (GLFS_NFS_BASE + 154)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_INODE_SHARES_NOT_FOUND (GLFS_NFS_BASE + 155)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_VOLUME_ERROR (GLFS_NFS_BASE + 156)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_GET_USER_ACL_FAIL (GLFS_NFS_BASE + 157)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_GET_DEF_ACL_FAIL (GLFS_NFS_BASE + 158)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_SET_USER_ACL_FAIL (GLFS_NFS_BASE + 159)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_SET_DEF_ACL_FAIL (GLFS_NFS_BASE + 160)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_ACL_INIT_FAIL (GLFS_NFS_BASE + 161)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_LOAD_PARSE_ERROR (GLFS_NFS_BASE + 162)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_CLNT_CALL_ERROR (GLFS_NFS_BASE + 163)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_CLNT_CREATE_ERROR (GLFS_NFS_BASE + 164)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_NLM_GRACE_PERIOD (GLFS_NFS_BASE + 165)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_RPC_CLNT_ERROR (GLFS_NFS_BASE + 166)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_GET_PORT_ERROR (GLFS_NFS_BASE + 167)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_NLMCLNT_NOT_FOUND (GLFS_NFS_BASE + 168)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_FD_LOOKUP_NULL (GLFS_NFS_BASE + 169)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_SM_NOTIFY (GLFS_NFS_BASE + 170)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_NLM_INIT_FAIL (GLFS_NFS_BASE + 171)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_START_ERROR (GLFS_NFS_BASE + 172)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_UNLINK_ERROR (GLFS_NFS_BASE + 173)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_SHARE_LIST_STORE_FAIL (GLFS_NFS_BASE + 174)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_CLIENT_NOT_FOUND (GLFS_NFS_BASE + 175)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_SHARE_CALL_FAIL (GLFS_NFS_BASE + 176)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_UNSHARE_CALL_FAIL (GLFS_NFS_BASE + 177)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_GET_PID_FAIL (GLFS_NFS_BASE + 178)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_ARG_FREE_FAIL (GLFS_NFS_BASE + 179)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_PMAP_UNSET_FAIL (GLFS_NFS_BASE + 180)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_UDP_SERV_FAIL (GLFS_NFS_BASE + 181)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_REG_NLMCBK_FAIL (GLFS_NFS_BASE + 182)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_TCP_SERV_FAIL (GLFS_NFS_BASE + 183)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_SVC_RUN_RETURNED (GLFS_NFS_BASE + 184)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_XLATOR_SET_FAIL (GLFS_NFS_BASE + 185)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_SVC_ERROR (GLFS_NFS_BASE + 186)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_GET_FH_FAIL (GLFS_NFS_BASE + 187)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_FIND_FIRST_MATCH_FAIL (GLFS_NFS_BASE + 188)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_NETGRP_NOT_FOUND (GLFS_NFS_BASE + 189)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_FILE_OP_FAILED (GLFS_NFS_BASE + 190)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_PATH_RESOLVE_FAIL (GLFS_NFS_BASE + 191)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_LOC_FILL_RESOLVE_FAIL (GLFS_NFS_BASE + 192)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_INODE_NOT_FOUND (GLFS_NFS_BASE + 193)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_INODE_CTX_STORE_FAIL (GLFS_NFS_BASE + 194)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_GETPWUID_FAIL (GLFS_NFS_BASE + 195)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_MAP_GRP_LIST_FAIL (GLFS_NFS_BASE + 196)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_PARSE_DIR_FAIL (GLFS_NFS_BASE + 197)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_LOOKUP_FAIL (GLFS_NFS_BASE + 198)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_STAT_ERROR (GLFS_NFS_BASE + 199)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_GFID_DICT_CREATE_FAIL (GLFS_NFS_BASE + 200)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_HASH_XLATOR_FAIL (GLFS_NFS_BASE + 201)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define NFS_MSG_ENABLE_THROTTLE_FAIL (GLFS_NFS_BASE + 202)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+
+/*------------*/
+#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
+
+
+#endif /* _NFS_MESSAGES_H_ */
+
+
+
+
diff --git a/xlators/nfs/server/src/nfs.c b/xlators/nfs/server/src/nfs.c
index cb5f19ef992..ddfa89dab11 100644
--- a/xlators/nfs/server/src/nfs.c
+++ b/xlators/nfs/server/src/nfs.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
/* This is the primary translator source for NFS.
@@ -22,11 +13,6 @@
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "defaults.h"
#include "rpcsvc.h"
#include "dict.h"
@@ -39,12 +25,218 @@
#include "mount3.h"
#include "nfs3.h"
#include "nfs-mem-types.h"
+#include "nfs3-helpers.h"
+#include "nlm4.h"
+#include "options.h"
+#include "acl3.h"
+#include "rpc-drc.h"
+#include "syscall.h"
+#include "rpcsvc.h"
+#include "nfs-messages.h"
+
+#define OPT_SERVER_AUX_GIDS "nfs.server-aux-gids"
+#define OPT_SERVER_GID_CACHE_TIMEOUT "nfs.server.aux-gid-timeout"
+#define OPT_SERVER_RPC_STATD "nfs.rpc-statd"
+#define OPT_SERVER_RPC_STATD_PIDFILE "nfs.rpc-statd-pidfile"
+#define OPT_SERVER_RPC_STATD_NOTIFY_PIDFILE "nfs.rpc-statd-notify-pidfile"
+
+#define NFS_DATADIR GLUSTERD_DEFAULT_WORKDIR "/nfs"
+
+/* Forward declaration */
+static int nfs_add_initer (struct list_head *list, nfs_version_initer_t init,
+ gf_boolean_t required);
+
+static int
+nfs_init_version (xlator_t *this, nfs_version_initer_t init,
+ gf_boolean_t required)
+{
+ int ret = -1;
+ struct nfs_initer_list *version = NULL;
+ struct nfs_initer_list *tmp = NULL;
+ rpcsvc_program_t *prog = NULL;
+ struct list_head *versions = NULL;
+ struct nfs_state *nfs = NULL;
+ gf_boolean_t found = _gf_false;
+
+ if ((!this) || (!this->private) || (!init))
+ return (-1);
+
+ nfs = (struct nfs_state *)this->private;
+
+ ret = nfs_add_initer (&nfs->versions, init, required);
+ if (ret == -1) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, 0, NFS_MSG_PROT_INIT_ADD_FAIL,
+ "Failed to add protocol initializer");
+ goto err;
+ }
+
+ versions = &nfs->versions;
+ list_for_each_entry_safe (version, tmp, versions, list) {
+ prog = version->program;
+ if (version->init == init) {
+ prog = init(this);
+ if (!prog) {
+ ret = -1;
+ goto err;
+ }
+ version->program = prog;
+ found = _gf_true;
+ break;
+ }
+ }
+
+ /* program not added */
+ if (!found) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, 0, NFS_MSG_PGM_NOT_FOUND,
+ "Program: %s NOT found", prog->progname);
+ goto err;
+ }
+
+ /* Check if nfs.port is configured */
+ if (nfs->override_portnum)
+ prog->progport = nfs->override_portnum;
+
+ gf_msg_debug (GF_NFS, 0, "Starting program: %s", prog->progname);
+
+ ret = rpcsvc_program_register (nfs->rpcsvc, prog);
+ if (ret == -1) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, 0, NFS_MSG_PGM_INIT_FAIL,
+ "Program: %s init failed", prog->progname);
+ goto err;
+ }
+
+ /* Registration with portmapper is disabled, Nothing to do */
+ if (!nfs->register_portmap)
+ goto err;
+
+ ret = rpcsvc_program_register_portmap (prog, prog->progport);
+ if (ret == -1) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, 0, NFS_MSG_PGM_REG_FAIL,
+ "Program %s registration failed", prog->progname);
+ goto err;
+ }
+ ret = 0; /* All well */
+err:
+ return ret;
+}
+
+static int
+nfs_deinit_version (struct nfs_state *nfs, nfs_version_initer_t init)
+{
+ int ret = -1;
+ struct nfs_initer_list *version = NULL;
+ struct nfs_initer_list *tmp = NULL;
+ rpcsvc_program_t *prog = NULL;
+ struct list_head *versions = NULL;
+
+ if ((!nfs) || (!init))
+ return (-1);
+
+ versions = &nfs->versions;
+ list_for_each_entry_safe (version, tmp, versions, list) {
+ prog = version->program;
+ if (version->init == init) {
+ prog = version->program;
+ ret = rpcsvc_program_unregister (nfs->rpcsvc, prog);
+ if (ret != 0)
+ return (-1);
+ list_del (&version->list);
+ GF_FREE (version);
+ return (0);
+ }
+ }
+
+ return (-1);
+}
+
+static int
+nfs_reconfigure_acl3 (xlator_t *this)
+{
+ struct nfs_state *nfs = NULL;
+
+ if ((!this) || (!this->private))
+ return (-1);
+
+ nfs = (struct nfs_state *)this->private;
+
+ /* ACL is enabled */
+ if (nfs->enable_acl)
+ return nfs_init_version (this, acl3svc_init, _gf_false);
+
+ /* ACL is disabled */
+ return nfs_deinit_version (nfs, acl3svc_init);
+}
+
+static int
+nfs_reconfigure_nlm4 (xlator_t *this)
+{
+ struct nfs_state *nfs = NULL;
+
+ if ((!this) || (!this->private))
+ return (-1);
+
+ nfs = (struct nfs_state *)this->private;
+
+ /* NLM is enabled */
+ if (nfs->enable_nlm)
+ return nfs_init_version (this, nlm4svc_init, _gf_false);
+
+ /* NLM is disabled */
+ return nfs_deinit_version (nfs, nlm4svc_init);
+}
+
+static int
+nfs_program_register_portmap_all (struct nfs_state *nfs)
+{
+ struct list_head *versions = NULL;
+ struct nfs_initer_list *version = NULL;
+ struct nfs_initer_list *tmp = NULL;
+ rpcsvc_program_t *prog = NULL;
+
+ if (nfs == NULL)
+ return (-1);
+
+ versions = &nfs->versions;
+ list_for_each_entry_safe (version, tmp, versions, list) {
+ prog = version->program;
+ if (prog == NULL)
+ continue;
+ if (nfs->override_portnum)
+ prog->progport = nfs->override_portnum;
+ (void) rpcsvc_program_register_portmap (prog, prog->progport);
+ }
+
+ return (0);
+}
+
+static int
+nfs_program_unregister_portmap_all (struct nfs_state *nfs)
+{
+ struct list_head *versions = NULL;
+ struct nfs_initer_list *version = NULL;
+ struct nfs_initer_list *tmp = NULL;
+ rpcsvc_program_t *prog = NULL;
+
+ if (nfs == NULL)
+ return (-1);
+
+ versions = &nfs->versions;
+ list_for_each_entry_safe (version, tmp, versions, list) {
+ prog = version->program;
+ if (prog == NULL)
+ continue;
+ (void) rpcsvc_program_unregister_portmap (prog);
+ }
+
+ return (0);
+}
/* Every NFS version must call this function with the init function
* for its particular version.
*/
-int
-nfs_add_initer (struct list_head *list, nfs_version_initer_t init)
+static int
+nfs_add_initer (struct list_head *list, nfs_version_initer_t init,
+ gf_boolean_t required)
{
struct nfs_initer_list *new = NULL;
if ((!list) || (!init))
@@ -52,11 +244,13 @@ nfs_add_initer (struct list_head *list, nfs_version_initer_t init)
new = GF_CALLOC (1, sizeof (*new), gf_nfs_mt_nfs_initer_list);
if (!new) {
- gf_log (GF_NFS, GF_LOG_ERROR, "Memory allocation failed");
+ gf_msg (GF_NFS, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Memory allocation failed");
return -1;
}
new->init = init;
+ new->required = required;
list_add_tail (&new->list, list);
return 0;
}
@@ -82,7 +276,7 @@ nfs_deinit_versions (struct list_head *versions, xlator_t *this)
*/
if (version->program)
rpcsvc_program_unregister (nfs->rpcsvc,
- *(version->program));
+ (version->program));
list_del (&version->list);
GF_FREE (version);
@@ -91,7 +285,6 @@ nfs_deinit_versions (struct list_head *versions, xlator_t *this)
return 0;
}
-
int
nfs_init_versions (struct nfs_state *nfs, xlator_t *this)
{
@@ -104,7 +297,7 @@ nfs_init_versions (struct nfs_state *nfs, xlator_t *this)
if ((!nfs) || (!this))
return -1;
- gf_log (GF_NFS, GF_LOG_DEBUG, "Initing protocol versions");
+ gf_msg_debug (GF_NFS, 0, "Initing protocol versions");
versions = &nfs->versions;
list_for_each_entry_safe (version, tmp, versions, list) {
if (!version->init) {
@@ -113,19 +306,41 @@ nfs_init_versions (struct nfs_state *nfs, xlator_t *this)
}
prog = version->init (this);
- version->program = prog;
if (!prog) {
ret = -1;
goto err;
}
- gf_log (GF_NFS, GF_LOG_DEBUG, "Starting program: %s",
+ version->program = prog;
+ if (nfs->override_portnum)
+ prog->progport = nfs->override_portnum;
+ gf_msg_debug (GF_NFS, 0, "Starting program: %s",
prog->progname);
- ret = rpcsvc_program_register (nfs->rpcsvc, *prog);
+
+ ret = rpcsvc_program_register (nfs->rpcsvc, prog);
if (ret == -1) {
- gf_log (GF_NFS, GF_LOG_ERROR, "Program init failed");
+ gf_msg (GF_NFS, GF_LOG_ERROR, 0,
+ NFS_MSG_PGM_INIT_FAIL,
+ "Program: %s init failed", prog->progname);
goto err;
}
+ if (nfs->register_portmap) {
+ ret = rpcsvc_program_register_portmap (prog,
+ prog->progport);
+ if (ret == -1) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, 0,
+ NFS_MSG_PGM_REG_FAIL,
+ "%s program %s registration failed",
+ version->required ?
+ "Required" : "Optional",
+ prog->progname);
+
+ /* fatal error if the program is required */
+ if (version->required)
+ goto err;
+ }
+ }
+
}
ret = 0;
@@ -140,27 +355,47 @@ nfs_add_all_initiators (struct nfs_state *nfs)
int ret = 0;
/* Add the initializers for all versions. */
- ret = nfs_add_initer (&nfs->versions, mnt3svc_init);
+ ret = nfs_add_initer (&nfs->versions, mnt3svc_init, _gf_true);
if (ret == -1) {
- gf_log (GF_NFS, GF_LOG_ERROR, "Failed to add protocol"
- " initializer");
+ gf_msg (GF_NFS, GF_LOG_ERROR, 0, NFS_MSG_PROT_INIT_ADD_FAIL,
+ "Failed to add MOUNT3 protocol initializer");
goto ret;
}
- ret = nfs_add_initer (&nfs->versions, mnt1svc_init);
+ ret = nfs_add_initer (&nfs->versions, mnt1svc_init, _gf_true);
if (ret == -1) {
- gf_log (GF_NFS, GF_LOG_ERROR, "Failed to add protocol"
- " initializer");
+ gf_msg (GF_NFS, GF_LOG_ERROR, 0, NFS_MSG_PROT_INIT_ADD_FAIL,
+ "Failed to add MOUNT1 protocol initializer");
goto ret;
}
- ret = nfs_add_initer (&nfs->versions, nfs3svc_init);
+ ret = nfs_add_initer (&nfs->versions, nfs3svc_init, _gf_true);
if (ret == -1) {
- gf_log (GF_NFS, GF_LOG_ERROR, "Failed to add protocol"
- " initializer");
+ gf_msg (GF_NFS, GF_LOG_ERROR, 0, NFS_MSG_PROT_INIT_ADD_FAIL,
+ "Failed to add NFS3 protocol initializer");
goto ret;
}
+ if (nfs->enable_nlm == _gf_true) {
+ ret = nfs_add_initer (&nfs->versions, nlm4svc_init, _gf_false);
+ if (ret == -1) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, 0,
+ NFS_MSG_PROT_INIT_ADD_FAIL,
+ "Failed to add protocol initializer");
+ goto ret;
+ }
+ }
+
+ if (nfs->enable_acl == _gf_true) {
+ ret = nfs_add_initer (&nfs->versions, acl3svc_init, _gf_false);
+ if (ret == -1) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, 0,
+ NFS_MSG_PROT_INIT_ADD_FAIL,
+ "Failed to add ACL protocol initializer");
+ goto ret;
+ }
+ }
+
ret = 0;
ret:
return ret;
@@ -203,10 +438,17 @@ nfs_subvolume_set_started (struct nfs_state *nfs, xlator_t *xl)
LOCK (&nfs->svinitlock);
{
for (;x < nfs->allsubvols; ++x) {
+ if (nfs->initedxl[x] == xl) {
+ gf_msg_debug (GF_NFS, 0,
+ "Volume already started %s",
+ xl->name);
+ break;
+ }
+
if (nfs->initedxl[x] == NULL) {
nfs->initedxl[x] = xl;
++nfs->upsubvols;
- gf_log (GF_NFS, GF_LOG_DEBUG, "Starting up: %s "
+ gf_msg_debug (GF_NFS, 0, "Starting up: %s "
", vols started till now: %d", xl->name,
nfs->upsubvols);
goto unlock;
@@ -227,12 +469,15 @@ nfs_start_subvol_lookup_cbk (call_frame_t *frame, void *cookie,
struct iatt *postparent)
{
if (op_ret == -1) {
- gf_log (GF_NFS, GF_LOG_CRITICAL, "Failed to lookup root: %s",
+ gf_msg (GF_NFS, GF_LOG_CRITICAL, op_errno,
+ NFS_MSG_LOOKUP_ROOT_FAIL,
+ "Failed to lookup root: %s",
strerror (op_errno));
goto err;
}
- gf_log (GF_NFS, GF_LOG_TRACE, "Started %s", this->name);
+ nfs_subvolume_set_started (this->private, ((xlator_t *)cookie));
+ gf_msg_trace (GF_NFS, 0, "Started %s", ((xlator_t *)cookie)->name);
err:
return 0;
}
@@ -249,16 +494,17 @@ nfs_startup_subvolume (xlator_t *nfsx, xlator_t *xl)
return -1;
if (nfs_subvolume_started (nfsx->private, xl)) {
- gf_log (GF_NFS,GF_LOG_TRACE, "Subvolume already started: %s",
+ gf_msg_trace (GF_NFS, 0, "Subvolume already started: %s",
xl->name);
ret = 0;
goto err;
}
- nfs_subvolume_set_started (nfsx->private, xl);
- ret = nfs_inode_loc_fill (xl->itable->root, &rootloc);
+ ret = nfs_root_loc_fill (xl->itable, &rootloc);
if (ret == -1) {
- gf_log (GF_NFS, GF_LOG_CRITICAL, "Failed to init root loc");
+ gf_msg (GF_NFS, GF_LOG_CRITICAL, 0,
+ NFS_MSG_ROOT_LOC_INIT_FAIL,
+ "Failed to init root loc");
goto err;
}
@@ -267,8 +513,9 @@ nfs_startup_subvolume (xlator_t *nfsx, xlator_t *xl)
nfs_start_subvol_lookup_cbk,
(void *)nfsx->private);
if (ret < 0) {
- gf_log (GF_NFS, GF_LOG_CRITICAL, "Failed to lookup root: %s",
- strerror (-ret));
+ gf_msg (GF_NFS, GF_LOG_CRITICAL, -ret,
+ NFS_MSG_LOOKUP_ROOT_FAIL,
+ "Failed to lookup root: %s", strerror (-ret));
goto err;
}
@@ -291,12 +538,14 @@ nfs_startup_subvolumes (xlator_t *nfsx)
nfs = nfsx->private;
cl = nfs->subvols;
while (cl) {
- gf_log (GF_NFS, GF_LOG_DEBUG, "Starting subvolume: %s",
+ gf_msg_debug (GF_NFS, 0, "Starting subvolume: %s",
cl->xlator->name);
ret = nfs_startup_subvolume (nfsx, cl->xlator);
if (ret == -1) {
- gf_log (GF_NFS, GF_LOG_CRITICAL, "Failed to start-up "
- "xlator: %s", cl->xlator->name);
+ gf_msg (GF_NFS, GF_LOG_CRITICAL, 0,
+ NFS_MSG_STARTUP_FAIL,
+ "Failed to start-up xlator: %s",
+ cl->xlator->name);
goto err;
}
cl = cl->next;
@@ -320,8 +569,8 @@ nfs_init_subvolume (struct nfs_state *nfs, xlator_t *xl)
lrusize = nfs->memfactor * GF_NFS_INODE_LRU_MULT;
xl->itable = inode_table_new (lrusize, xl);
if (!xl->itable) {
- gf_log (GF_NFS, GF_LOG_CRITICAL, "Failed to allocate "
- "inode table");
+ gf_msg (GF_NFS, GF_LOG_CRITICAL, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Failed to allocate inode table");
goto err;
}
ret = 0;
@@ -341,14 +590,15 @@ nfs_init_subvolumes (struct nfs_state *nfs, xlator_list_t *cl)
lrusize = nfs->memfactor * GF_NFS_INODE_LRU_MULT;
nfs->subvols = cl;
- gf_log (GF_NFS, GF_LOG_TRACE, "inode table lru: %d", lrusize);
+ gf_msg_trace (GF_NFS, 0, "inode table lru: %d", lrusize);
while (cl) {
- gf_log (GF_NFS, GF_LOG_DEBUG, "Initing subvolume: %s",
+ gf_msg_debug (GF_NFS, 0, "Initing subvolume: %s",
cl->xlator->name);
ret = nfs_init_subvolume (nfs, cl->xlator);
if (ret == -1) {
- gf_log (GF_NFS, GF_LOG_CRITICAL, "Failed to init "
+ gf_msg (GF_NFS, GF_LOG_CRITICAL, 0,
+ NFS_MSG_XLATOR_INIT_FAIL, "Failed to init "
"xlator: %s", cl->xlator->name);
goto err;
}
@@ -357,15 +607,16 @@ nfs_init_subvolumes (struct nfs_state *nfs, xlator_list_t *cl)
}
LOCK_INIT (&nfs->svinitlock);
- nfs->initedxl = GF_CALLOC (svcount, sizeof (xlator_t *),
+ nfs->initedxl = GF_CALLOC (svcount, sizeof (xlator_t *),
gf_nfs_mt_xlator_t );
if (!nfs->initedxl) {
- gf_log (GF_NFS, GF_LOG_ERROR, "Failed to allocated inited xls");
+ gf_msg (GF_NFS, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Failed to allocated inited xls");
ret = -1;
goto err;
}
- gf_log (GF_NFS, GF_LOG_TRACE, "Inited volumes: %d", svcount);
+ gf_msg_trace (GF_NFS, 0, "Inited volumes: %d", svcount);
nfs->allsubvols = svcount;
ret = 0;
err:
@@ -388,8 +639,8 @@ nfs_user_root_create (nfs_user_t *newnfu)
int
-nfs_user_create (nfs_user_t *newnfu, uid_t uid, gid_t gid, gid_t *auxgids,
- int auxcount)
+nfs_user_create (nfs_user_t *newnfu, uid_t uid, gid_t gid,
+ rpc_transport_t *trans, gid_t *auxgids, int auxcount)
{
int x = 1;
int y = 0;
@@ -404,8 +655,12 @@ nfs_user_create (nfs_user_t *newnfu, uid_t uid, gid_t gid, gid_t *auxgids,
newnfu->uid = uid;
newnfu->gids[0] = gid;
newnfu->ngrps = 1;
+ if (trans) {
+ memcpy (&newnfu->identifier, trans->peerinfo.identifier,
+ UNIX_PATH_MAX);
+ }
- gf_log (GF_NFS, GF_LOG_TRACE, "uid: %d, gid %d, gids: %d", uid, gid,
+ gf_msg_trace (GF_NFS, 0, "uid: %d, gid %d, gids: %d", uid, gid,
auxcount);
if (!auxgids)
@@ -414,7 +669,7 @@ nfs_user_create (nfs_user_t *newnfu, uid_t uid, gid_t gid, gid_t *auxgids,
for (; y < auxcount; ++x,++y) {
newnfu->gids[x] = auxgids[y];
++newnfu->ngrps;
- gf_log (GF_NFS, GF_LOG_TRACE, "gid: %d", auxgids[y]);
+ gf_msg_trace (GF_NFS, 0, "gid: %d", auxgids[y]);
}
return 0;
@@ -431,12 +686,32 @@ nfs_request_user_init (nfs_user_t *nfu, rpcsvc_request_t *req)
return;
gidarr = rpcsvc_auth_unix_auxgids (req, &gids);
- nfs_user_create (nfu, rpcsvc_request_uid (req), rpcsvc_request_gid (req)
- , gidarr, gids);
+ nfs_user_create (nfu, rpcsvc_request_uid (req),
+ rpcsvc_request_gid (req),
+ rpcsvc_request_transport (req),
+ gidarr, gids);
return;
}
+void
+nfs_request_primary_user_init (nfs_user_t *nfu, rpcsvc_request_t *req,
+ uid_t uid, gid_t gid)
+{
+ gid_t *gidarr = NULL;
+ int gids = 0;
+
+ if ((!req) || (!nfu))
+ return;
+
+ gidarr = rpcsvc_auth_unix_auxgids (req, &gids);
+ nfs_user_create (nfu, uid, gid, rpcsvc_request_transport (req),
+ gidarr, gids);
+
+ return;
+}
+
+
int32_t
mem_acct_init (xlator_t *this)
{
@@ -446,117 +721,835 @@ mem_acct_init (xlator_t *this)
return ret;
ret = xlator_mem_acct_init (this, gf_nfs_mt_end + 1);
-
+
if (ret != 0) {
- gf_log(this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Memory accounting init failed");
return ret;
}
return ret;
}
-int
-init (xlator_t *this) {
+struct nfs_state *
+nfs_init_state (xlator_t *this)
+{
struct nfs_state *nfs = NULL;
- int ret = -1;
+ int i = 0, ret = -1;
unsigned int fopspoolsize = 0;
+ char *optstr = NULL;
+ gf_boolean_t boolt = _gf_false;
+ struct stat stbuf = {0,};
if (!this)
- return -1;
+ return NULL;
- if ((!this->children) || (!this->children->xlator)) {
- gf_log (GF_NFS, GF_LOG_ERROR, "nfs must have at least one"
- " child subvolume");
- return -1;
+ if (!this->children) {
+ gf_msg (GF_NFS, GF_LOG_INFO, 0, NFS_MSG_NFS_MAN_DISABLE,
+ "NFS is manually disabled: Exiting");
+ /* Nothing for nfs process to do, exit cleanly */
+ kill (getpid (), SIGTERM);
}
nfs = GF_CALLOC (1, sizeof (*nfs), gf_nfs_mt_nfs_state);
if (!nfs) {
- gf_log (GF_NFS, GF_LOG_ERROR, "memory allocation failed");
- return -1;
+ gf_msg (GF_NFS, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "memory allocation failed");
+ return NULL;
}
- /* RPC service needs to be started before NFS versions can be inited. */
- nfs->rpcsvc = rpcsvc_init (this->ctx, this->options);
- if (!nfs->rpcsvc) {
- gf_log (GF_NFS, GF_LOG_ERROR, "RPC service init failed");
- goto free_nfs;
+ nfs->memfactor = GF_NFS_DEFAULT_MEMFACTOR;
+ if (dict_get (this->options, "nfs.mem-factor")) {
+ ret = dict_get_str (this->options, "nfs.mem-factor",
+ &optstr);
+ if (ret < 0) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, -ret,
+ NFS_MSG_DICT_GET_FAILED,
+ "Failed to parse dict");
+ goto free_rpcsvc;
+ }
+
+ ret = gf_string2uint (optstr, &nfs->memfactor);
+ if (ret < 0) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, errno, NFS_MSG_PARSE_FAIL,
+ "Failed to parse uint string");
+ goto free_rpcsvc;
+ }
}
- nfs->memfactor = GF_NFS_DEFAULT_MEMFACTOR;
fopspoolsize = nfs->memfactor * GF_NFS_CONCURRENT_OPS_MULT;
/* FIXME: Really saddens me to see this as xlator wide. */
nfs->foppool = mem_pool_new (struct nfs_fop_local, fopspoolsize);
if (!nfs->foppool) {
- gf_log (GF_NFS, GF_LOG_CRITICAL, "Failed to allocate fops local"
- " pool");
+ gf_msg (GF_NFS, GF_LOG_CRITICAL, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Failed to allocate fops local pool");
goto free_rpcsvc;
}
- this->private = (void *)nfs;
- INIT_LIST_HEAD (&nfs->versions);
- ret = nfs_add_all_initiators (nfs);
- if (ret == -1) {
- gf_log (GF_NFS, GF_LOG_ERROR, "Failed to add initiators");
- goto free_nfs;
+ nfs->dynamicvolumes = GF_NFS_DVM_OFF;
+ if (dict_get (this->options, "nfs.dynamic-volumes")) {
+ ret = dict_get_str (this->options, "nfs.dynamic-volumes",
+ &optstr);
+ if (ret < 0) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, -ret,
+ NFS_MSG_DICT_GET_FAILED,
+ "Failed to parse dict");
+ goto free_foppool;
+ }
+
+ ret = gf_string2boolean (optstr, &boolt);
+ if (ret < 0) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, errno, NFS_MSG_PARSE_FAIL,
+ "Failed to parse bool string");
+ goto free_foppool;
+ }
+
+ if (boolt == _gf_true)
+ nfs->dynamicvolumes = GF_NFS_DVM_ON;
}
- ret = nfs_init_subvolumes (nfs, this->children);
+ nfs->enable_nlm = _gf_true;
+ ret = dict_get_str_boolean (this->options, "nfs.nlm", _gf_true);
+ if (ret == _gf_false) {
+ gf_msg (GF_NFS, GF_LOG_INFO, 0, NFS_MSG_NLM_MAN_DISABLE,
+ "NLM is manually disabled");
+ nfs->enable_nlm = _gf_false;
+ }
+
+ nfs->enable_acl = _gf_true;
+ ret = dict_get_str_boolean (this->options, "nfs.acl", _gf_true);
+ if (ret == _gf_false) {
+ gf_msg (GF_NFS, GF_LOG_INFO, 0, NFS_MSG_ACL_MAN_DISABLE,
+ "ACL is manually disabled");
+ nfs->enable_acl = _gf_false;
+ }
+
+ nfs->enable_ino32 = 0;
+ if (dict_get (this->options, "nfs.enable-ino32")) {
+ ret = dict_get_str (this->options, "nfs.enable-ino32",
+ &optstr);
+ if (ret < 0) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, -ret, NFS_MSG_PARSE_FAIL,
+ "Failed to parse dict");
+ goto free_foppool;
+ }
+
+ ret = gf_string2boolean (optstr, &boolt);
+ if (ret < 0) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, errno, NFS_MSG_PARSE_FAIL,
+ "Failed to parse bool string");
+ goto free_foppool;
+ }
+
+ if (boolt == _gf_true)
+ nfs->enable_ino32 = 1;
+ }
+
+ if (dict_get (this->options, "nfs.port")) {
+ ret = dict_get_str (this->options, "nfs.port",
+ &optstr);
+ if (ret < 0) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, -ret, NFS_MSG_PARSE_FAIL,
+ "Failed to parse dict");
+ goto free_foppool;
+ }
+
+ ret = gf_string2uint (optstr, &nfs->override_portnum);
+ if (ret < 0) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, errno, NFS_MSG_PARSE_FAIL,
+ "Failed to parse uint string");
+ goto free_foppool;
+ }
+ }
+
+ if (dict_get (this->options, "transport.socket.bind-address")) {
+ ret = dict_get_str (this->options,
+ "transport.socket.bind-address",
+ &optstr);
+ if (ret < 0) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "Failed to parse "
+ "transport.socket.bind-address string");
+ } else {
+ this->instance_name = gf_strdup (optstr);
+ for (i = 0; i < strlen (this->instance_name); i++) {
+ if (this->instance_name[i] == '.' ||
+ this->instance_name[i] == ':')
+ this->instance_name[i] = '_';
+ }
+ }
+ }
+
+ if (dict_get(this->options, "transport.socket.listen-port") == NULL) {
+ if (nfs->override_portnum)
+ ret = gf_asprintf (&optstr, "%d",
+ nfs->override_portnum);
+ else
+ ret = gf_asprintf (&optstr, "%d", GF_NFS3_PORT);
+ if (ret == -1) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "failed mem-allocation");
+ goto free_foppool;
+ }
+ ret = dict_set_dynstr (this->options,
+ "transport.socket.listen-port", optstr);
+ if (ret == -1) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, 0,
+ NFS_MSG_DICT_SET_FAILED,
+ "dict_set_dynstr error");
+ goto free_foppool;
+ }
+ }
+
+ /* Right only socket support exists between nfs client and
+ * gluster nfs, so we can set default value as socket
+ */
+ ret = dict_set_str (this->options, "transport-type", "socket");
if (ret == -1) {
- gf_log (GF_NFS, GF_LOG_CRITICAL, "Failed to init NFS exports");
- goto free_rpcsvc;
+ gf_msg (GF_NFS, GF_LOG_ERROR, 0, NFS_MSG_DICT_SET_FAILED,
+ "dict_set_str error");
+ goto free_foppool;
+ }
+
+ nfs->mount_udp = 0;
+ if (dict_get(this->options, "nfs.mount-udp")) {
+ ret = dict_get_str (this->options, "nfs.mount-udp", &optstr);
+ if (ret == -1) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, -ret, NFS_MSG_PARSE_FAIL,
+ "Failed to parse dict");
+ goto free_foppool;
+ }
+
+ ret = gf_string2boolean (optstr, &boolt);
+ if (ret < 0) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, errno, NFS_MSG_PARSE_FAIL,
+ "Failed to parse bool "
+ "string");
+ goto free_foppool;
+ }
+
+ if (boolt == _gf_true)
+ nfs->mount_udp = 1;
+ }
+
+ nfs->exports_auth = GF_NFS_DEFAULT_EXPORT_AUTH;
+ if (dict_get(this->options, "nfs.exports-auth-enable")) {
+ ret = dict_get_str (this->options, "nfs.exports-auth-enable",
+ &optstr);
+ if (ret == -1) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, -ret, NFS_MSG_PARSE_FAIL,
+ "Failed to parse dict");
+ goto free_foppool;
+ }
+
+ ret = gf_string2boolean (optstr, &boolt);
+ if (ret < 0) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, errno, NFS_MSG_PARSE_FAIL,
+ "Failed to parse bool string");
+ goto free_foppool;
+ }
+
+ if (boolt == _gf_true)
+ nfs->exports_auth = 1;
+ }
+
+ nfs->auth_refresh_time_secs = GF_NFS_DEFAULT_AUTH_REFRESH_INTERVAL_SEC;
+ if (dict_get (this->options, "nfs.auth-refresh-interval-sec")) {
+ ret = dict_get_str (this->options,
+ "nfs.auth-refresh-interval-sec", &optstr);
+ if (ret < 0) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, -ret, NFS_MSG_PARSE_FAIL,
+ "Failed to parse dict");
+ goto free_foppool;
+ }
+
+ ret = gf_string2uint (optstr, &nfs->auth_refresh_time_secs);
+ if (ret < 0) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, errno, NFS_MSG_PARSE_FAIL,
+ "Failed to parse uint string");
+ goto free_foppool;
+ }
+ }
+
+ nfs->auth_cache_ttl_sec = GF_NFS_DEFAULT_AUTH_CACHE_TTL_SEC;
+ if (dict_get (this->options, "nfs.auth-cache-ttl-sec")) {
+ ret = dict_get_str (this->options,
+ "nfs.auth-cache-ttl-sec", &optstr);
+ if (ret < 0) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, -ret, NFS_MSG_PARSE_FAIL,
+ "Failed to parse dict");
+ goto free_foppool;
+ }
+
+ ret = gf_string2uint (optstr, &nfs->auth_cache_ttl_sec);
+ if (ret < 0) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, errno, NFS_MSG_PARSE_FAIL,
+ "Failed to parse uint string");
+ goto free_foppool;
+ }
+ }
+
+ /* TODO: Make this a configurable option in case we don't want to read
+ * exports/netgroup files off disk when they change. */
+ nfs->refresh_auth = 1;
+
+ nfs->rmtab = gf_strdup (NFS_DATADIR "/rmtab");
+ if (dict_get(this->options, "nfs.mount-rmtab")) {
+ ret = dict_get_str (this->options, "nfs.mount-rmtab", &nfs->rmtab);
+ if (ret == -1) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, -ret, NFS_MSG_PARSE_FAIL,
+ "Failed to parse dict");
+ goto free_foppool;
+ }
+
+ /* check if writing the rmtab is disabled*/
+ if (nfs->rmtab && strcmp ("/-", nfs->rmtab) == 0) {
+ GF_FREE (nfs->rmtab);
+ nfs->rmtab = NULL;
+ }
+ }
+
+ /* support both options rpc-auth.ports.insecure and
+ * rpc-auth-allow-insecure for backward compatibility
+ */
+ nfs->allow_insecure = 1;
+ if (dict_get(this->options, "rpc-auth.ports.insecure")) {
+ ret = dict_get_str (this->options, "rpc-auth.ports.insecure",
+ &optstr);
+ if (ret < 0) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, -ret, NFS_MSG_PARSE_FAIL,
+ "Failed to parse dict");
+ goto free_foppool;
+ }
+
+ ret = gf_string2boolean (optstr, &boolt);
+ if (ret < 0) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, errno, NFS_MSG_PARSE_FAIL,
+ "Failed to parse bool "
+ "string");
+ goto free_foppool;
+ }
+
+ if (boolt == _gf_false)
+ nfs->allow_insecure = 0;
+ }
+
+ if (dict_get(this->options, "rpc-auth-allow-insecure")) {
+ ret = dict_get_str (this->options, "rpc-auth-allow-insecure",
+ &optstr);
+ if (ret < 0) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, -ret, NFS_MSG_PARSE_FAIL,
+ "Failed to parse dict");
+ goto free_foppool;
+ }
+
+ ret = gf_string2boolean (optstr, &boolt);
+ if (ret < 0) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, errno, NFS_MSG_PARSE_FAIL,
+ "Failed to parse bool string");
+ goto free_foppool;
+ }
+
+ if (boolt == _gf_false)
+ nfs->allow_insecure = 0;
+ }
+
+ if (nfs->allow_insecure) {
+ /* blindly set both the options */
+ dict_del (this->options, "rpc-auth-allow-insecure");
+ ret = dict_set_str (this->options,
+ "rpc-auth-allow-insecure", "on");
+ if (ret == -1) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, 0,
+ NFS_MSG_DICT_SET_FAILED, "dict_set_str error");
+ goto free_foppool;
+ }
+ dict_del (this->options, "rpc-auth.ports.insecure");
+ ret = dict_set_str (this->options,
+ "rpc-auth.ports.insecure", "on");
+ if (ret == -1) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, 0,
+ NFS_MSG_DICT_SET_FAILED, "dict_set_str error");
+ goto free_foppool;
+ }
+ }
+
+ GF_OPTION_INIT ("nfs.rdirplus", nfs->rdirplus, bool, free_foppool);
+
+ GF_OPTION_INIT (OPT_SERVER_RPC_STATD, nfs->rpc_statd, path, free_foppool);
+
+ GF_OPTION_INIT (OPT_SERVER_RPC_STATD_PIDFILE, nfs->rpc_statd_pid_file, path, free_foppool);
+
+ GF_OPTION_INIT (OPT_SERVER_AUX_GIDS, nfs->server_aux_gids,
+ bool, free_foppool);
+ GF_OPTION_INIT (OPT_SERVER_GID_CACHE_TIMEOUT,
+ nfs->server_aux_gids_max_age,
+ uint32, free_foppool);
+
+ if (gid_cache_init(&nfs->gid_cache, nfs->server_aux_gids_max_age) < 0) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, 0,
+ NFS_MSG_INIT_GRP_CACHE_FAIL,
+ "Failed to initialize group cache.");
+ goto free_foppool;
+ }
+
+ ret = sys_access (nfs->rpc_statd, X_OK);
+ if (ret) {
+ gf_msg (GF_NFS, GF_LOG_WARNING, EPERM, NFS_MSG_NO_PERM,
+ "%s not enough permissions to access. Disabling NLM",
+ nfs->rpc_statd);
+ nfs->enable_nlm = _gf_false;
+ }
+
+ ret = sys_stat (nfs->rpc_statd, &stbuf);
+ if (ret || !S_ISREG (stbuf.st_mode)) {
+ gf_msg (GF_NFS, GF_LOG_WARNING, 0, NFS_MSG_REG_FILE_ERROR,
+ "%s not a regular file. Disabling NLM", nfs->rpc_statd);
+ nfs->enable_nlm = _gf_false;
+ }
+
+ nfs->rpcsvc = rpcsvc_init (this, this->ctx,
+ this->options, fopspoolsize);
+ if (!nfs->rpcsvc) {
+ ret = -1;
+ gf_msg (GF_NFS, GF_LOG_ERROR, 0, NFS_MSG_RPC_INIT_FAIL,
+ "RPC service init failed");
+ goto free_foppool;
+ }
+
+ ret = rpcsvc_set_throttle_on (nfs->rpcsvc);
+ if (ret) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, 0, NFS_MSG_ENABLE_THROTTLE_FAIL,
+ "Enabling throttle failed");
+ goto free_foppool;
+ }
+
+ ret = rpcsvc_set_outstanding_rpc_limit (nfs->rpcsvc,
+ this->options,
+ RPCSVC_DEF_NFS_OUTSTANDING_RPC_LIMIT);
+ if (ret < 0) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, 0, NFS_MSG_RPC_CONFIG_FAIL,
+ "Failed to configure outstanding-rpc-limit");
+ goto free_foppool;
}
+ nfs->register_portmap = rpcsvc_register_portmap_enabled (nfs->rpcsvc);
+
+ this->private = (void *)nfs;
+ INIT_LIST_HEAD (&nfs->versions);
+ nfs->generation = 1965;
+
+ ret = 0;
+
+free_foppool:
+ if (ret < 0)
+ mem_pool_destroy (nfs->foppool);
+
free_rpcsvc:
/*
* rpcsvc_deinit */
-free_nfs:
- if (ret == -1)
+ if (ret < 0) {
GF_FREE (nfs);
+ nfs = NULL;
+ }
+
+ return nfs;
+}
+
+int
+nfs_drc_init (xlator_t *this)
+{
+ int ret = -1;
+ rpcsvc_t *svc = NULL;
+
+ GF_VALIDATE_OR_GOTO (GF_NFS, this, out);
+ GF_VALIDATE_OR_GOTO (GF_NFS, this->private, out);
+
+ svc = ((struct nfs_state *)(this->private))->rpcsvc;
+ if (!svc)
+ goto out;
+
+ ret = rpcsvc_drc_init (svc, this->options);
+
+ out:
+ return ret;
+}
+
+int
+nfs_reconfigure_state (xlator_t *this, dict_t *options)
+{
+ int ret = 0;
+ int keyindx = 0;
+ char *rmtab = NULL;
+ char *rpc_statd = NULL;
+ gf_boolean_t optbool;
+ uint32_t optuint32;
+ struct nfs_state *nfs = NULL;
+ char *blacklist_keys[] = {
+ "nfs.port",
+ "nfs.transport-type",
+ "nfs.mem-factor",
+ NULL};
+
+ GF_VALIDATE_OR_GOTO (GF_NFS, this, out);
+ GF_VALIDATE_OR_GOTO (GF_NFS, this->private, out);
+ GF_VALIDATE_OR_GOTO (GF_NFS, options, out);
+
+ nfs = (struct nfs_state *)this->private;
+
+ /* Black listed options can't be reconfigured, they need
+ * NFS to be restarted. There are two cases 1. SET 2. UNSET.
+ * 1. SET */
+ while (blacklist_keys[keyindx]) {
+ if (dict_get (options, blacklist_keys[keyindx])) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, 0,
+ NFS_MSG_RECONFIG_FAIL,
+ "Reconfiguring %s needs NFS restart",
+ blacklist_keys[keyindx]);
+ goto out;
+ }
+ keyindx ++;
+ }
+
+ /* UNSET for nfs.mem-factor */
+ if ((!dict_get (options, "nfs.mem-factor")) &&
+ (nfs->memfactor != GF_NFS_DEFAULT_MEMFACTOR)) {
+ gf_msg (GF_NFS, GF_LOG_INFO, 0, NFS_MSG_RECONFIG_FAIL,
+ "Reconfiguring nfs.mem-factor needs NFS restart");
+ goto out;
+ }
+
+ /* UNSET for nfs.port */
+ if ((!dict_get (options, "nfs.port")) &&
+ (nfs->override_portnum)) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, 0, NFS_MSG_RECONFIG_FAIL,
+ "Reconfiguring nfs.port needs NFS restart");
+ goto out;
+ }
+
+ /* reconfig nfs.rpc-statd... */
+ rpc_statd = GF_RPC_STATD_PROG;
+ if (dict_get (options, OPT_SERVER_RPC_STATD_PIDFILE)) {
+ ret = dict_get_str (options, "nfs.rpc-statd", &rpc_statd);
+ if (ret < 0) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, 0, NFS_MSG_READ_FAIL,
+ "Failed to read reconfigured option: "
+ "nfs.rpc-statd");
+ goto out;
+ }
+ }
+
+ if (strcmp(nfs->rpc_statd, rpc_statd) != 0) {
+ gf_msg (GF_NFS, GF_LOG_INFO, 0, NFS_MSG_RECONFIG_FAIL,
+ "Reconfiguring nfs.rpc-statd needs NFS restart");
+ goto out;
+ }
+
+ /* reconfig nfs.mount-rmtab */
+ rmtab = NFS_DATADIR "/rmtab";
+ if (dict_get (options, "nfs.mount-rmtab")) {
+ ret = dict_get_str (options, "nfs.mount-rmtab", &rmtab);
+ if (ret < 0) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, 0, NFS_MSG_READ_FAIL,
+ "Failed to read reconfigured option:"
+ " nfs.mount-rmtab");
+ goto out;
+ }
+ gf_path_strip_trailing_slashes (rmtab);
+ }
+ /* check if writing the rmtab is disabled*/
+ if (strcmp ("/-", rmtab) == 0) {
+ GF_FREE (nfs->rmtab);
+ nfs->rmtab = NULL;
+ gf_msg (GF_NFS, GF_LOG_INFO, 0, NFS_MSG_WRITE_FAIL,
+ "Disabled writing of nfs.mount-rmtab");
+ } else if (!nfs->rmtab || strcmp (nfs->rmtab, rmtab) != 0) {
+ mount_rewrite_rmtab (nfs->mstate, rmtab);
+ gf_msg (GF_NFS, GF_LOG_INFO, 0, NFS_MSG_RECONFIG_PATH,
+ "Reconfigured nfs.mount-rmtab path: %s", nfs->rmtab);
+ }
+
+ GF_OPTION_RECONF (OPT_SERVER_AUX_GIDS, optbool,
+ options, bool, out);
+ if (nfs->server_aux_gids != optbool) {
+ nfs->server_aux_gids = optbool;
+ gf_msg (GF_NFS, GF_LOG_INFO, 0, NFS_MSG_RECONFIG_VALUE,
+ "Reconfigured %s with value %d", OPT_SERVER_AUX_GIDS,
+ optbool);
+ }
+
+ GF_OPTION_RECONF (OPT_SERVER_GID_CACHE_TIMEOUT, optuint32,
+ options, uint32, out);
+ if (nfs->server_aux_gids_max_age != optuint32) {
+ nfs->server_aux_gids_max_age = optuint32;
+ gid_cache_reconf (&nfs->gid_cache, optuint32);
+ gf_msg (GF_NFS, GF_LOG_INFO, 0, NFS_MSG_RECONFIG_VALUE,
+ "Reconfigured %s with value %d",
+ OPT_SERVER_GID_CACHE_TIMEOUT, optuint32);
+ }
+
+ GF_OPTION_RECONF ("nfs.rdirplus", optbool,
+ options, bool, out);
+ if (nfs->rdirplus != optbool) {
+ nfs->rdirplus = optbool;
+ gf_msg (GF_NFS, GF_LOG_INFO, 0, NFS_MSG_RECONFIG_VALUE,
+ "Reconfigured nfs.rdirplus with value %d", optbool);
+ }
+
+ /* reconfig nfs.dynamic-volumes */
+ ret = dict_get_str_boolean (options, "nfs.dynamic-volumes",
+ GF_NFS_DVM_OFF);
+ switch (ret) {
+ case GF_NFS_DVM_ON:
+ case GF_NFS_DVM_OFF:
+ optbool = ret;
+ break;
+ default:
+ optbool = GF_NFS_DVM_OFF;
+ break;
+ }
+ if (nfs->dynamicvolumes != optbool) {
+ nfs->dynamicvolumes = optbool;
+ gf_msg (GF_NFS, GF_LOG_INFO, 0, NFS_MSG_RECONFIG_VOL,
+ "Reconfigured nfs.dynamic-volumes with value %d",
+ optbool);
+ }
+
+ optbool = _gf_false;
+ if (dict_get (options, "nfs.enable-ino32")) {
+ ret = dict_get_str_boolean (options, "nfs.enable-ino32",
+ _gf_false);
+ if (ret < 0) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, 0, NFS_MSG_READ_FAIL,
+ "Failed to read reconfigured option: "
+ "nfs.enable-ino32");
+ goto out;
+ }
+ optbool = ret;
+ }
+ if (nfs->enable_ino32 != optbool) {
+ nfs->enable_ino32 = optbool;
+ gf_msg (GF_NFS, GF_LOG_INFO, 0, NFS_MSG_RECONFIG_ENABLE,
+ "Reconfigured nfs.enable-ino32 with value %d", optbool);
+ }
+
+ /* nfs.nlm is enabled by default */
+ ret = dict_get_str_boolean (options, "nfs.nlm", _gf_true);
+ if (ret < 0) {
+ optbool = _gf_true;
+ } else {
+ optbool = ret;
+ }
+ if (nfs->enable_nlm != optbool) {
+ gf_msg (GF_NFS, GF_LOG_INFO, 0, NFS_MSG_NLM_INFO, "NLM is"
+ " manually %s", (optbool ? "enabled":"disabled"));
+ nfs->enable_nlm = optbool;
+ nfs_reconfigure_nlm4 (this);
+ }
- gf_log (GF_NFS, GF_LOG_DEBUG, "NFS service started");
+ /* nfs.acl is enabled by default */
+ ret = dict_get_str_boolean (options, "nfs.acl", _gf_true);
+ if (ret < 0) {
+ optbool = _gf_true;
+ } else {
+ optbool = ret;
+ }
+ if (nfs->enable_acl != optbool) {
+ gf_msg (GF_NFS, GF_LOG_INFO, 0, NFS_MSG_ACL_INFO, "ACL is "
+ "manually %s", (optbool ? "enabled":"disabled"));
+ nfs->enable_acl = optbool;
+ nfs_reconfigure_acl3 (this);
+ }
+
+ ret = 0;
+out:
return ret;
}
+/*
+ * reconfigure() for NFS server xlator.
+ */
int
-notify (xlator_t *this, int32_t event, void *data, ...)
+reconfigure (xlator_t *this, dict_t *options)
{
+ int ret = 0;
+ struct nfs_state *nfs = NULL;
+ gf_boolean_t regpmap = _gf_true;
+
+ if ((!this) || (!this->private) || (!options))
+ return (-1);
+
+ nfs = (struct nfs_state *)this->private;
+
+ /* Reconfigure nfs options */
+ ret = nfs_reconfigure_state(this, options);
+ if (ret) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, 0, NFS_MSG_RECONFIG_FAIL,
+ "nfs reconfigure state failed");
+ return (-1);
+ }
+
+ /* Reconfigure nfs3 options */
+ ret = nfs3_reconfigure_state(this, options);
+ if (ret) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, 0, NFS_MSG_RECONFIG_FAIL,
+ "nfs3 reconfigure state failed");
+ return (-1);
+ }
+
+ /* Reconfigure mount options */
+ ret = mount_reconfigure_state(this, options);
+ if (ret) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, 0, NFS_MSG_RECONFIG_FAIL,
+ "mount reconfigure state failed");
+ return (-1);
+ }
+
+ /* Reconfigure rpc layer */
+ ret = rpcsvc_reconfigure_options (nfs->rpcsvc, options);
+ if (ret) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, 0, NFS_MSG_RECONFIG_FAIL,
+ "rpcsvc reconfigure options failed");
+ return (-1);
+ }
+
+ /* Reconfigure rpc.outstanding-rpc-limit */
+ ret = rpcsvc_set_outstanding_rpc_limit (nfs->rpcsvc,
+ options,
+ RPCSVC_DEF_NFS_OUTSTANDING_RPC_LIMIT);
+ if (ret < 0) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, 0, NFS_MSG_RECONFIG_FAIL,
+ "Failed to reconfigure outstanding-rpc-limit");
+ return (-1);
+ }
+
+ regpmap = rpcsvc_register_portmap_enabled(nfs->rpcsvc);
+ if (nfs->register_portmap != regpmap) {
+ nfs->register_portmap = regpmap;
+ if (regpmap) {
+ (void) nfs_program_register_portmap_all (nfs);
+ } else {
+ (void) nfs_program_unregister_portmap_all (nfs);
+ }
+ }
+
+ /* Reconfigure drc */
+ ret = rpcsvc_drc_reconfigure (nfs->rpcsvc, options);
+ if (ret) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, 0, NFS_MSG_RECONFIG_FAIL,
+ "rpcsvc DRC reconfigure failed");
+ return (-1);
+ }
+
+ return (0);
+}
+
+/* Main init() routine for NFS server xlator. It inits NFS v3 protocol
+ * and its dependent protocols e.g. ACL, MOUNT v3 (mount3), NLM and
+ * DRC.
+ *
+ * Usage: glusterfsd:
+ * glusterfs_process_volfp() =>
+ * glusterfs_graph_activate() =>
+ * glusterfs_graph_init() =>
+ * xlator_init () => NFS init() routine
+ *
+ * If init() routine fails, the glusterfsd cleans up the NFS process
+ * by invoking cleanup_and_exit().
+ *
+ * RETURN:
+ * 0 (SUCCESS) if all protocol specific inits PASS.
+ * -1 (FAILURE) if any of them FAILS.
+ */
+int
+init (xlator_t *this) {
+
struct nfs_state *nfs = NULL;
- xlator_t *subvol = NULL;
int ret = -1;
- nfs = (struct nfs_state *)this->private;
+ if (!this)
+ return (-1);
+
+ nfs = nfs_init_state (this);
+ if (!nfs) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, 0, NFS_MSG_OPT_INIT_FAIL,
+ "Failed to init nfs option");
+ return (-1);
+ }
+
+ ret = nfs_add_all_initiators (nfs);
+ if (ret) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, 0, NFS_MSG_INIT_FAIL,
+ "Failed to add initiators");
+ return (-1);
+ }
+
+ ret = nfs_init_subvolumes (nfs, this->children);
+ if (ret) {
+ gf_msg (GF_NFS, GF_LOG_CRITICAL, 0, NFS_MSG_INIT_FAIL,
+ "Failed to init NFS exports");
+ return (-1);
+ }
+
+ ret = mount_init_state (this);
+ if (ret) {
+ gf_msg (GF_NFS, GF_LOG_CRITICAL, 0, NFS_MSG_INIT_FAIL,
+ "Failed to init Mount state");
+ return (-1);
+ }
+
+ ret = nlm4_init_state (this);
+ if (ret) {
+ gf_msg (GF_NFS, GF_LOG_CRITICAL, 0, NFS_MSG_INIT_FAIL,
+ "Failed to init NLM state");
+ return (-1);
+ }
+
+ ret = nfs_init_versions (nfs, this);
+ if (ret) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, 0, NFS_MSG_INIT_FAIL,
+ "Failed to initialize protocols");
+ return (-1);
+ }
+
+ ret = nfs_drc_init (this);
+ if (ret) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, 0, NFS_MSG_INIT_FAIL,
+ "Failed to initialize DRC");
+ return (-1);
+ }
+
+ gf_msg (GF_NFS, GF_LOG_INFO, 0, NFS_MSG_STARTED,
+ "NFS service started");
+ return (0); /* SUCCESS */
+}
+
+
+int
+notify (xlator_t *this, int32_t event, void *data, ...)
+{
+ xlator_t *subvol = NULL;
+ struct nfs_state *priv = NULL;
+
subvol = (xlator_t *)data;
- gf_log (GF_NFS, GF_LOG_TRACE, "Notification received: %d",
+ gf_msg_trace (GF_NFS, 0, "Notification received: %d",
event);
- switch (event)
- {
- case GF_EVENT_CHILD_UP:
- {
- nfs_startup_subvolume (this, subvol);
- if ((nfs->upsubvols == nfs->allsubvols) &&
- (!nfs->subvols_started)) {
- nfs->subvols_started = 1;
- gf_log (GF_NFS, GF_LOG_TRACE, "All children up,"
- " starting RPC");
- ret = nfs_init_versions (nfs, this);
- if (ret == -1)
- gf_log (GF_NFS, GF_LOG_CRITICAL,
- "Failed to initialize "
- "protocols");
- }
- break;
- }
- case GF_EVENT_PARENT_UP:
- {
- default_notify (this, GF_EVENT_PARENT_UP, data);
- break;
- }
+ switch (event) {
+ case GF_EVENT_CHILD_UP:
+ nfs_startup_subvolume (this, subvol);
+ break;
+
+ case GF_EVENT_CHILD_MODIFIED:
+ priv = this->private;
+ ++(priv->generation);
+ break;
+
+ case GF_EVENT_PARENT_UP:
+ default_notify (this, GF_EVENT_PARENT_UP, data);
+ break;
}
return 0;
@@ -569,40 +1562,209 @@ fini (xlator_t *this)
struct nfs_state *nfs = NULL;
+ mnt3svc_deinit (this);
nfs = (struct nfs_state *)this->private;
- gf_log (GF_NFS, GF_LOG_DEBUG, "NFS service going down");
+ gf_msg_debug (GF_NFS, 0, "NFS service going down");
nfs_deinit_versions (&nfs->versions, this);
+ GF_FREE (this->instance_name);
return 0;
}
-struct xlator_cbks cbks = { };
-struct xlator_fops fops = { };
+int32_t
+nfs_forget (xlator_t *this, inode_t *inode)
+{
+ uint64_t ctx = 0;
+ struct nfs_inode_ctx *ictx = NULL;
+
+ if (inode_ctx_del (inode, this, &ctx))
+ return -1;
+
+ ictx = (struct nfs_inode_ctx *)ctx;
+ GF_FREE (ictx);
+
+ return 0;
+}
+
+gf_boolean_t
+_nfs_export_is_for_vol (char *exname, char *volname)
+{
+ gf_boolean_t ret = _gf_false;
+ char *tmp = NULL;
+
+ tmp = exname;
+ if (tmp[0] == '/')
+ tmp++;
+
+ if (!strcmp (tmp, volname))
+ ret = _gf_true;
+
+ return ret;
+}
+
+int
+nfs_priv_to_dict (xlator_t *this, dict_t *dict)
+{
+ int ret = -1;
+ struct nfs_state *priv = NULL;
+ struct mountentry *mentry = NULL;
+ char *volname = NULL;
+ char key[1024] = {0,};
+ int count = 0;
+
+ GF_VALIDATE_OR_GOTO (THIS->name, this, out);
+ GF_VALIDATE_OR_GOTO (THIS->name, dict, out);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret, NFS_MSG_VOL_NOT_FOUND,
+ "Could not get volname");
+ goto out;
+ }
+
+ list_for_each_entry (mentry, &priv->mstate->mountlist, mlist) {
+ if (!_nfs_export_is_for_vol (mentry->exname, volname))
+ continue;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "client%d.hostname", count);
+ ret = dict_set_str (dict, key, mentry->hostname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ NFS_MSG_WRITE_FAIL,
+ "Error writing hostname to dict");
+ goto out;
+ }
+
+ /* No connection data available yet in nfs server.
+ * Hence, setting to 0 to prevent cli failing
+ */
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "client%d.bytesread", count);
+ ret = dict_set_uint64 (dict, key, 0);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ NFS_MSG_WRITE_FAIL,
+ "Error writing bytes read to dict");
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "client%d.byteswrite", count);
+ ret = dict_set_uint64 (dict, key, 0);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ NFS_MSG_WRITE_FAIL,
+ "Error writing bytes write to dict");
+ goto out;
+ }
+
+ count++;
+ }
+
+ ret = dict_set_int32 (dict, "clientcount", count);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0, NFS_MSG_WRITE_FAIL,
+ "Error writing client count to dict");
+
+out:
+ gf_msg_debug (THIS->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+extern int32_t
+nlm_priv (xlator_t *this);
+
+int32_t
+nfs_priv (xlator_t *this)
+{
+ int32_t ret = -1;
+
+ /* DRC needs the global drc structure, xl is of no use to it. */
+ ret = rpcsvc_drc_priv (((struct nfs_state *)(this->private))->rpcsvc->drc);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Statedump of DRC failed");
+ goto out;
+ }
+
+ ret = nlm_priv (this);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Statedump of NLM failed");
+ goto out;
+ }
+ out:
+ return ret;
+}
+
+struct xlator_cbks cbks = {
+ .forget = nfs_forget,
+};
+
+struct xlator_fops fops;
+
+struct xlator_dumpops dumpops = {
+ .priv = nfs_priv,
+ .priv_to_dict = nfs_priv_to_dict,
+};
+
+/* TODO: If needed, per-volume options below can be extended to be export
+ * specific also because after export-dir is introduced, a volume is not
+ * necessarily an export whereas different subdirectories within that volume
+ * can be and may need these options to be specified separately.
+*/
struct volume_options options[] = {
{ .key = {"nfs3.read-size"},
.type = GF_OPTION_TYPE_SIZET,
- .description = "Size in which the client should issue read requests"
- " to the Gluster NFSv3 server. Must be a multiple of"
- " 4KiB."
+ .min = GF_NFS3_RTMIN,
+ .max = GF_NFS3_RTMAX,
+ .default_value = TOSTRING(GF_NFS3_RTPREF),
+ .description = "Size in which the client should issue read requests "
+ "to the Gluster NFSv3 server. Must be a multiple of "
+ "4KB (4096). Min and Max supported values are 4KB "
+ "(4096) and 1MB (1048576) respectively. If the "
+ "specified value is within the supported range but "
+ "not a multiple of 4096, it is rounded up to the "
+ "nearest multiple of 4096."
},
{ .key = {"nfs3.write-size"},
.type = GF_OPTION_TYPE_SIZET,
- .description = "Size in which the client should issue write requests"
- " to the Gluster NFSv3 server. Must be a multiple of"
- " 4KiB."
+ .min = GF_NFS3_WTMIN,
+ .max = GF_NFS3_WTMAX,
+ .default_value = TOSTRING(GF_NFS3_WTPREF),
+ .description = "Size in which the client should issue write requests "
+ "to the Gluster NFSv3 server. Must be a multiple of "
+ "1KB (1024). Min and Max supported values are "
+ "4KB (4096) and 1MB(1048576) respectively. If the "
+ "specified value is within the supported range but "
+ "not a multiple of 4096, it is rounded up to the "
+ "nearest multiple of 4096."
},
{ .key = {"nfs3.readdir-size"},
.type = GF_OPTION_TYPE_SIZET,
+ .min = GF_NFS3_DTMIN,
+ .max = GF_NFS3_DTMAX,
+ .default_value = TOSTRING(GF_NFS3_DTPREF),
.description = "Size in which the client should issue directory "
- " reading requests."
+ "reading requests to the Gluster NFSv3 server. Must "
+ "be a multiple of 1KB (1024). Min and Max supported "
+ "values are 4KB (4096) and 1MB (1048576) respectively."
+ "If the specified value is within the supported range "
+ "but not a multiple of 4096, it is rounded up to the "
+ "nearest multiple of 4096."
},
{ .key = {"nfs3.*.volume-access"},
.type = GF_OPTION_TYPE_STR,
+ .value = {"read-only", "read-write"},
+ .default_value = "read-write",
.description = "Type of access desired for this subvolume: "
" read-only, read-write(default)"
},
{ .key = {"nfs3.*.trusted-write"},
.type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
.description = "On an UNSTABLE write from client, return STABLE flag"
" to force client to not send a COMMIT request. In "
"some environments, combined with a replicated "
@@ -617,6 +1779,7 @@ struct volume_options options[] = {
},
{ .key = {"nfs3.*.trusted-sync"},
.type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
.description = "All writes and COMMIT requests are treated as async."
" This implies that no write requests are guaranteed"
" to be on server disks when the write reply is "
@@ -624,66 +1787,126 @@ struct volume_options options[] = {
" trusted-write behaviour. Off by default."
},
+ { .key = {"nfs3.*.export-dir"},
+ .type = GF_OPTION_TYPE_PATH,
+ .default_value = "",
+ .description = "By default, all subvolumes of nfs are exported as "
+ "individual exports. There are cases where a "
+ "subdirectory or subdirectories in the volume need to "
+ "be exported separately. This option can also be used "
+ "in conjunction with nfs3.export-volumes option to "
+ "restrict exports only to the subdirectories specified"
+ " through this option. Must be an absolute path. Along"
+ " with path allowed list of IPs/hostname can be "
+ "associated with each subdirectory. If provided "
+ "connection will allowed only from these IPs. By "
+ "default connections from all IPs are allowed. "
+ "Format: <dir>[(hostspec[|hostspec|...])][,...]. Where"
+ " hostspec can be an IP address, hostname or an IP "
+ "range in CIDR notation. "
+ "e.g. /foo(192.168.1.0/24|host1|10.1.1.8),/host2."
+ " NOTE: Care must be taken while configuring this "
+ "option as invalid entries and/or unreachable DNS "
+ "servers can introduce unwanted delay in all the mount"
+ " calls."
+ },
+ { .key = {"nfs3.export-dirs"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "By default, all subvolumes of nfs are exported as "
+ "individual exports. There are cases where a "
+ "subdirectory or subdirectories in the volume need to "
+ "be exported separately. Enabling this option allows "
+ "any directory on a volumes to be exported separately."
+ "Directory exports are enabled by default."
+ },
+ { .key = {"nfs3.export-volumes"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Enable or disable exporting whole volumes, instead "
+ "if used in conjunction with nfs3.export-dir, can "
+ "allow setting up only subdirectories as exports. On "
+ "by default."
+ },
{ .key = {"rpc-auth.auth-unix"},
.type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
.description = "Disable or enable the AUTH_UNIX authentication type."
- "Must always be enabled for better interoperability."
- "However, can be disabled if needed. Enabled by"
+ "Must always be enabled for better interoperability. "
+ "However, can be disabled if needed. Enabled by "
"default"
},
{ .key = {"rpc-auth.auth-null"},
.type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
.description = "Disable or enable the AUTH_NULL authentication type."
"Must always be enabled. This option is here only to"
" avoid unrecognized option warnings"
},
{ .key = {"rpc-auth.auth-unix.*"},
.type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Disable or enable the AUTH_UNIX authentication type "
+ "for a particular exported volume overriding defaults"
+ " and general setting for AUTH_UNIX scheme. Must "
+ "always be enabled for better interoperability. "
+ "However, can be disabled if needed. Enabled by "
+ "default."
+ },
+ { .key = {"rpc-auth.auth-unix.*.allow"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "on",
.description = "Disable or enable the AUTH_UNIX authentication type "
- "for a particular exported volume over-riding defaults"
+ "for a particular exported volume overriding defaults"
" and general setting for AUTH_UNIX scheme. Must "
- "always be enabled for better interoperability."
- "However, can be disabled if needed. Enabled by"
+ "always be enabled for better interoperability. "
+ "However, can be disabled if needed. Enabled by "
"default."
},
{ .key = {"rpc-auth.auth-null.*"},
.type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
.description = "Disable or enable the AUTH_NULL authentication type "
- "for a particular exported volume over-riding defaults"
+ "for a particular exported volume overriding defaults"
" and general setting for AUTH_NULL. Must always be "
"enabled. This option is here only to avoid "
"unrecognized option warnings."
},
{ .key = {"rpc-auth.addr.allow"},
- .type = GF_OPTION_TYPE_STR,
+ .type = GF_OPTION_TYPE_CLIENT_AUTH_ADDR,
+ .default_value = "all",
.description = "Allow a comma separated list of addresses and/or"
" hostnames to connect to the server. By default, all"
- " connections are disallowed. This allows users to "
+ " connections are allowed. This allows users to "
"define a general rule for all exported volumes."
},
{ .key = {"rpc-auth.addr.reject"},
- .type = GF_OPTION_TYPE_STR,
+ .type = GF_OPTION_TYPE_CLIENT_AUTH_ADDR,
+ .default_value = "none",
.description = "Reject a comma separated list of addresses and/or"
" hostnames from connecting to the server. By default,"
- " all connections are disallowed. This allows users to"
+ " all connections are allowed. This allows users to "
"define a general rule for all exported volumes."
},
{ .key = {"rpc-auth.addr.*.allow"},
- .type = GF_OPTION_TYPE_STR,
+ .type = GF_OPTION_TYPE_CLIENT_AUTH_ADDR,
+ .default_value = "all",
.description = "Allow a comma separated list of addresses and/or"
" hostnames to connect to the server. By default, all"
- " connections are disallowed. This allows users to "
+ " connections are allowed. This allows users to "
"define a rule for a specific exported volume."
},
{ .key = {"rpc-auth.addr.*.reject"},
- .type = GF_OPTION_TYPE_STR,
+ .type = GF_OPTION_TYPE_CLIENT_AUTH_ADDR,
+ .default_value = "none",
.description = "Reject a comma separated list of addresses and/or"
" hostnames from connecting to the server. By default,"
- " all connections are disallowed. This allows users to"
+ " all connections are allowed. This allows users to "
"define a rule for a specific exported volume."
},
{ .key = {"rpc-auth.ports.insecure"},
.type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
.description = "Allow client connections from unprivileged ports. By "
"default only privileged ports are allowed. This is a"
"global setting in case insecure ports are to be "
@@ -691,23 +1914,190 @@ struct volume_options options[] = {
},
{ .key = {"rpc-auth.ports.*.insecure"},
.type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
.description = "Allow client connections from unprivileged ports. By "
"default only privileged ports are allowed. Use this"
- " option to set enable or disable insecure ports for "
- "a specific subvolume and to over-ride global setting "
- " set by the previous option."
+ " option to enable or disable insecure ports for "
+ "a specific subvolume and to override the global "
+ "setting set by the previous option."
},
{ .key = {"rpc-auth.addr.namelookup"},
.type = GF_OPTION_TYPE_BOOL,
- .description = "Users have the option of turning off name lookup for"
- " incoming client connections using this option. In some "
+ .default_value = "off",
+ .description = "Users have the option of turning on name lookup for"
+ " incoming client connections using this option. Use this "
+ "option to turn on name lookups during address-based "
+ "authentication. Turning this on will enable you to"
+ " use hostnames in nfs.rpc-auth-* filters. In some "
"setups, the name server can take too long to reply to DNS "
- "queries resulting in timeouts of mount requests. Use this "
- "option to turn off name lookups during address "
- "authentication. Note, turning this off will prevent you from"
- " using hostnames in rpc-auth.addr.* filters. By default, "
- " name lookup is on."
+ "queries resulting in timeouts of mount requests. By "
+ "default, name lookup is off"
+ },
+ { .key = {"nfs.dynamic-volumes"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Internal option set to tell gnfs to use a different"
+ " scheme for encoding file handles when DVM is being"
+ " used."
+ },
+ { .key = {"nfs3.*.volume-id"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "",
+ .description = "When nfs.dynamic-volumes is set, gnfs expects every "
+ "subvolume to have this option set for it, so that "
+ "gnfs can use this option to identify the volume. "
+ "If all subvolumes do not have this option set, an "
+ "error is reported."
+ },
+ { .key = {"nfs.enable-ino32"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .description = "For nfs clients or apps that do not support 64-bit "
+ "inode numbers, use this option to make NFS return "
+ "32-bit inode numbers instead. Disabled by default, so"
+ " NFS returns 64-bit inode numbers."
+ },
+ { .key = {"rpc.register-with-portmap"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "For systems that need to run multiple nfs servers, "
+ "only one registration is possible with "
+ "portmap service. Use this option to turn off portmap "
+ "registration for Gluster NFS. On by default"
+ },
+ { .key = {"rpc.outstanding-rpc-limit"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = RPCSVC_MIN_OUTSTANDING_RPC_LIMIT,
+ .max = RPCSVC_MAX_OUTSTANDING_RPC_LIMIT,
+ .default_value = TOSTRING(RPCSVC_DEF_NFS_OUTSTANDING_RPC_LIMIT),
+ .description = "Parameter to throttle the number of incoming RPC "
+ "requests from a client. 0 means no limit (can "
+ "potentially run out of memory)"
+ },
+ { .key = {"nfs.port"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 0xffff,
+ .default_value = TOSTRING(GF_NFS3_PORT),
+ .description = "Use this option on systems that need Gluster NFS to "
+ "be associated with a non-default port number."
+ },
+ { .key = {"nfs.mem-factor"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 1024,
+ .default_value = TOSTRING(GF_NFS_DEFAULT_MEMFACTOR),
+ .description = "Use this option to make NFS be faster on systems by "
+ "using more memory. This option specifies a multiple "
+ "that determines the total amount of memory used. "
+ "Default value is 15. Increase to use more memory in "
+ "order to improve performance for certain use cases."
+ "Please consult gluster-users list before using this "
+ "option."
+ },
+ { .key = {"nfs.*.disable"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .description = "This option is used to start or stop the NFS server "
+ "for individual volumes."
+ },
+ { .key = {"nfs.nlm"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "This option, if set to 'off', disables NLM server "
+ "by not registering the service with the portmapper."
+ " Set it to 'on' to re-enable it. Default value: 'on'"
},
- { .key = {NULL} },
-};
+ { .key = {"nfs.mount-udp"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "set the option to 'on' to enable mountd on UDP. "
+ "Required for some Solaris and AIX NFS clients. "
+ "The need for enabling this option often depends "
+ "on the usage of NLM."
+ },
+ { .key = {"nfs.mount-rmtab"},
+ .type = GF_OPTION_TYPE_PATH,
+ .default_value = NFS_DATADIR "/rmtab",
+ .description = "Set the location of the cache file that is used to "
+ "list all the NFS-clients that have connected "
+ "through the MOUNT protocol. If this is on shared "
+ "storage, all GlusterFS servers will update and "
+ "output (with 'showmount') the same list. Set to "
+ "\"/-\" to disable."
+ },
+ { .key = {OPT_SERVER_RPC_STATD},
+ .type = GF_OPTION_TYPE_PATH,
+ .default_value = GF_RPC_STATD_PROG,
+ .description = "The executable of RPC statd utility. "
+ "Defaults to " GF_RPC_STATD_PROG
+ },
+ { .key = {OPT_SERVER_RPC_STATD_PIDFILE},
+ .type = GF_OPTION_TYPE_PATH,
+ .default_value = GF_RPC_STATD_PIDFILE,
+ .description = "The pid file of RPC statd utility. "
+ "Defaults to " GF_RPC_STATD_PIDFILE
+ },
+ { .key = {OPT_SERVER_AUX_GIDS},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Let the server look up which groups a user belongs "
+ "to, overwriting the list passed from the client. "
+ "This enables support for group lists longer than "
+ "can be passed through the NFS protocol, but is not "
+ "secure unless users and groups are well synchronized "
+ "between clients and servers."
+ },
+ { .key = {OPT_SERVER_GID_CACHE_TIMEOUT},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = 3600,
+ .default_value = "300",
+ .description = "Number of seconds to cache auxiliary-GID data, when "
+ OPT_SERVER_AUX_GIDS " is set."
+ },
+ { .key = {"nfs.acl"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "This option is used to control ACL support for NFS."
+ },
+ { .key = {"nfs.drc"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "off",
+ .description = "Enable Duplicate Request Cache in gNFS server to "
+ "improve correctness of non-idempotent operations like "
+ "write, delete, link, et al"
+ },
+ { .key = {"nfs.drc-size"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "0x20000",
+ .description = "Sets the number of non-idempotent "
+ "requests to cache in drc"
+ },
+ { .key = {"nfs.exports-auth-enable"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .description = "Set the option to 'on' to enable exports/netgroup "
+ "authentication in the NFS server and mount daemon."
+ },
+
+ { .key = {"nfs.auth-refresh-interval-sec"},
+ .type = GF_OPTION_TYPE_INT,
+ .description = "Frequency in seconds that the daemon should check for"
+ " changes in the exports/netgroups file."
+ },
+
+ { .key = {"nfs.auth-cache-ttl-sec"},
+ .type = GF_OPTION_TYPE_INT,
+ .description = "Sets the TTL of an entry in the auth cache. Value is "
+ "in seconds."
+ },
+ { .key = {"nfs.rdirplus"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "When this option is set to off NFS falls back to "
+ "standard readdir instead of readdirp"
+ },
+
+ { .key = {NULL} },
+};
diff --git a/xlators/nfs/server/src/nfs.h b/xlators/nfs/server/src/nfs.h
index b4973834558..9bcc88f5548 100644
--- a/xlators/nfs/server/src/nfs.h
+++ b/xlators/nfs/server/src/nfs.h
@@ -1,33 +1,21 @@
/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __NFS_H__
#define __NFS_H__
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "rpcsvc.h"
#include "dict.h"
#include "xlator.h"
+#include "lkowner.h"
+#include "gidcache.h"
#define GF_NFS "nfs"
@@ -42,6 +30,21 @@
#define GF_NFS_MIN_MEMFACTOR 1
#define GF_NFS_MAX_MEMFACTOR 30
+#define GF_NFS_DVM_ON 1
+#define GF_NFS_DVM_OFF 0
+
+/* Disable using the exports file by default */
+#define GF_NFS_DEFAULT_EXPORT_AUTH 0
+
+#define GF_NFS_DEFAULT_AUTH_REFRESH_INTERVAL_SEC 2
+#define GF_NFS_DEFAULT_AUTH_CACHE_TTL_SEC 300 /* 5 min */
+
+/* This corresponds to the max 16 number of group IDs that are sent through an
+ * RPC request. Since NFS is the only one going to set this, we can be safe
+ * in keeping this size hardcoded.
+ */
+#define GF_REQUEST_MAXGROUPS 16
+
/* Callback into a version-specific NFS protocol.
* The return type is used by the nfs.c code to register the protocol.
* with the RPC service.
@@ -53,12 +56,15 @@ struct nfs_initer_list {
struct list_head list;
nfs_version_initer_t init;
rpcsvc_program_t *program;
+ gf_boolean_t required;
};
-
struct nfs_state {
rpcsvc_t *rpcsvc;
struct list_head versions;
+ struct mount3_state *mstate;
+ struct nfs3_state *nfs3state;
+ struct nlm4_state *nlm4state;
struct mem_pool *foppool;
unsigned int memfactor;
xlator_list_t *subvols;
@@ -68,8 +74,44 @@ struct nfs_state {
int upsubvols;
xlator_t **initedxl;
int subvols_started;
+ int dynamicvolumes;
+ int enable_ino32;
+ unsigned int override_portnum;
+ int allow_insecure;
+ int enable_nlm;
+ int enable_acl;
+ int mount_udp;
+
+ /* Enable exports auth model */
+ int exports_auth;
+ /* Refresh auth params from disk periodically */
+ int refresh_auth;
+
+ unsigned int auth_refresh_time_secs;
+ unsigned int auth_cache_ttl_sec;
+
+ char *rmtab;
+ struct rpc_clnt *rpc_clnt;
+ gf_boolean_t server_aux_gids;
+ uint32_t server_aux_gids_max_age;
+ gid_cache_t gid_cache;
+ uint32_t generation;
+ gf_boolean_t register_portmap;
+ char *rpc_statd;
+ char *rpc_statd_pid_file;
+ gf_boolean_t rdirplus;
+};
+
+struct nfs_inode_ctx {
+ struct list_head shares;
+ uint32_t generation;
};
+#define gf_nfs_dvm_on(nfsstt) (((struct nfs_state *)nfsstt)->dynamicvolumes == GF_NFS_DVM_ON)
+#define gf_nfs_dvm_off(nfsstt) (((struct nfs_state *)nfsstt)->dynamicvolumes == GF_NFS_DVM_OFF)
+#define __gf_nfs_enable_ino32(nfsstt) (((struct nfs_state *)nfsstt)->enable_ino32)
+#define gf_nfs_this_private ((struct nfs_state *)((xlator_t *)THIS)->private)
+#define gf_nfs_enable_ino32() (__gf_nfs_enable_ino32(gf_nfs_this_private))
/* We have one gid more than the glusterfs maximum since we pass the primary
* gid as the first element of the array.
@@ -83,16 +125,26 @@ typedef struct nfs_user_info {
uid_t uid;
gid_t gids[NFS_NGROUPS];
int ngrps;
+ gf_lkowner_t lk_owner;
+ char identifier[UNIX_PATH_MAX]; /* ip of user */
} nfs_user_t;
extern int
nfs_user_root_create (nfs_user_t *newnfu);
extern int
-nfs_user_create (nfs_user_t *newnfu, uid_t uid, gid_t gid, gid_t *auxgids,
- int auxcount);
+nfs_user_create (nfs_user_t *newnfu, uid_t uid, gid_t gid,
+ rpc_transport_t *trans, gid_t *auxgids, int auxcount);
extern void
nfs_request_user_init (nfs_user_t *nfu, rpcsvc_request_t *req);
+extern void
+nfs_request_primary_user_init (nfs_user_t *nfu, rpcsvc_request_t *req,
+ uid_t uid, gid_t gid);
+extern int
+nfs_subvolume_started (struct nfs_state *nfs, xlator_t *xl);
+
+extern void
+nfs_fix_groups (xlator_t *this, call_stack_t *root);
#endif
diff --git a/xlators/nfs/server/src/nfs3-fh.c b/xlators/nfs/server/src/nfs3-fh.c
index c7eb78fb378..3feeae39dae 100644
--- a/xlators/nfs/server/src/nfs3-fh.c
+++ b/xlators/nfs/server/src/nfs3-fh.c
@@ -1,27 +1,13 @@
/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "rpcsvc.h"
#include "dict.h"
#include "xlator.h"
@@ -31,6 +17,8 @@
#include "nfs3-fh.h"
#include "nfs-common.h"
#include "iatt.h"
+#include "common-utils.h"
+#include "nfs-messages.h"
int
@@ -45,19 +33,16 @@ nfs3_fh_validate (struct nfs3_fh *fh)
if (fh->ident[1] != GF_NFSFH_IDENT1)
return 0;
- return 1;
-}
+ if (fh->ident[2] != GF_NFSFH_IDENT2)
+ return 0;
+ if (fh->ident[3] != GF_NFSFH_IDENT3)
+ return 0;
-xlator_t *
-nfs3_fh_to_xlator (xlator_list_t *cl, struct nfs3_fh *fh)
-{
- if ((!cl) || (!fh))
- return NULL;
-
- return nfs_xlid_to_xlator (cl, fh->xlatorid);
+ return 1;
}
+
void
nfs3_fh_init (struct nfs3_fh *fh, struct iatt *buf)
{
@@ -66,128 +51,92 @@ nfs3_fh_init (struct nfs3_fh *fh, struct iatt *buf)
fh->ident[0] = GF_NFSFH_IDENT0;
fh->ident[1] = GF_NFSFH_IDENT1;
+ fh->ident[2] = GF_NFSFH_IDENT2;
+ fh->ident[3] = GF_NFSFH_IDENT3;
- fh->hashcount = 0;
- fh->gen = buf->ia_gen;
- fh->ino = buf->ia_ino;
-
+ gf_uuid_copy (fh->gfid, buf->ia_gfid);
}
struct nfs3_fh
-nfs3_fh_build_root_fh (xlator_list_t *cl, xlator_t *xl, struct iatt buf)
+nfs3_fh_build_indexed_root_fh (xlator_list_t *cl, xlator_t *xl)
{
struct nfs3_fh fh = {{0}, };
+ struct iatt buf = {0, };
+ uuid_t root = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+
if ((!cl) || (!xl))
return fh;
+ gf_uuid_copy (buf.ia_gfid, root);
nfs3_fh_init (&fh, &buf);
- fh.xlatorid = nfs_xlator_to_xlid (cl, xl);
- fh.ino = 1;
- fh.gen = 0;
+ fh.exportid [15] = nfs_xlator_to_xlid (cl, xl);
+
return fh;
}
-int
-nfs3_fh_is_root_fh (struct nfs3_fh *fh)
+struct nfs3_fh
+nfs3_fh_build_uuid_root_fh (uuid_t volumeid, uuid_t mountid)
{
- if (!fh)
- return 0;
+ struct nfs3_fh fh = {{0}, };
+ struct iatt buf = {0, };
+ uuid_t root = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
- if (fh->hashcount == 0)
- return 1;
+ gf_uuid_copy (buf.ia_gfid, root);
+ nfs3_fh_init (&fh, &buf);
+ gf_uuid_copy (fh.exportid, volumeid);
+ gf_uuid_copy (fh.mountid, mountid);
- return 0;
+ return fh;
}
-nfs3_hash_entry_t
-nfs3_fh_hash_entry (ino_t ino, uint64_t gen)
+int
+nfs3_fh_is_root_fh (struct nfs3_fh *fh)
{
- nfs3_hash_entry_t hash = 0;
- int shiftsize = 48;
- nfs3_hash_entry_t inomsb = 0;
- nfs3_hash_entry_t inolsb = 0;
- nfs3_hash_entry_t inols23b = 0;
-
- nfs3_hash_entry_t genmsb = 0;
- nfs3_hash_entry_t genlsb = 0;
- nfs3_hash_entry_t genls23b = 0;
-
- hash = ino;
- while (shiftsize != 0) {
- hash ^= (ino >> shiftsize);
- shiftsize -= 16;
- }
-/*
- gf_log ("FILEHANDLE", GF_LOG_TRACE, "INO %"PRIu64, ino);
- gf_log ("FILEHANDLE",GF_LOG_TRACE, "PRI HASH %d", hash);
-*/
- inomsb = (ino >> 56);
-// gf_log ("FILEHANDLE", GF_LOG_TRACE, "inomsb %d", inomsb);
+ uuid_t rootgfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
- inolsb = ((ino << 56) >> 56);
-// gf_log ("FILEHANDLE", GF_LOG_TRACE, "inolsb %d", inolsb);
-
- inolsb = (inolsb << 8);
-// gf_log ("FILEHANDLE", GF_LOG_TRACE, "inolsb to inomsb %d", inolsb);
- inols23b = ((ino << 40) >> 48);
-// gf_log ("FILEHANDLE", GF_LOG_TRACE, "inols23b %d", inols23b);
-
- inols23b = (inols23b << 8);
-// gf_log ("FILEHDNALE", GF_LOG_TRACE, "inols23b %d", inols23b);
-
- genmsb = (gen >> 56);
-// gf_log ("FILEHANDLE", GF_LOG_TRACE, "inomsb %d", inomsb);
-
- genlsb = ((gen << 56) >> 56);
-// gf_log ("FILEHANDLE", GF_LOG_TRACE, "inolsb %d", inolsb);
-
- genlsb = (genlsb << 8);
-// gf_log ("FILEHANDLE", GF_LOG_TRACE, "inolsb to inomsb %d", inolsb);
-
- genls23b = ((gen << 40) >> 48);
-// gf_log ("FILEHANDLE", GF_LOG_TRACE, "inols23b %d", inols23b);
-
- genls23b = (genls23b << 8);
-// gf_log ("FILEHDNALE", GF_LOG_TRACE, "inols23b %d", inols23b);
+ if (!fh)
+ return 0;
- hash ^= inolsb ^ inomsb ^ inols23b ^ genmsb ^ genlsb ^ genls23b;
- return hash;
+ if (gf_uuid_compare (fh->gfid, rootgfid) == 0)
+ return 1;
+ return 0;
}
void
-nfs3_fh_to_str (struct nfs3_fh *fh, char *str)
+nfs3_fh_to_str (struct nfs3_fh *fh, char *str, size_t len)
{
+ char gfid[GF_UUID_BUF_SIZE];
+ char exportid[GF_UUID_BUF_SIZE];
+ char mountid[GF_UUID_BUF_SIZE];
+
if ((!fh) || (!str))
return;
- sprintf (str, "FH: hashcount %d, xlid %d, gen %"PRIu64", ino %"PRIu64,
- fh->hashcount, fh->xlatorid, fh->gen, fh->ino);
+ snprintf (str, len, "FH: exportid %s, gfid %s, mountid %s",
+ uuid_utoa_r (fh->exportid, exportid),
+ uuid_utoa_r (fh->gfid, gfid),
+ uuid_utoa_r (fh->mountid, mountid));
}
-
void
nfs3_log_fh (struct nfs3_fh *fh)
{
-// int x = 0;
+ char gfidstr[512];
+ char exportidstr[512];
+
if (!fh)
return;
- gf_log ("nfs3-fh", GF_LOG_TRACE, "filehandle: hashcount %d, xlid %d, "
- "gen %"PRIu64", ino %"PRIu64, fh->hashcount, fh->xlatorid,
- fh->gen, fh->ino);
-/*
- for (; x < fh->hashcount; ++x)
- gf_log ("FILEHANDLE", GF_LOG_TRACE, "Hash %d: %d", x,
- fh->entryhash[x]);
-*/
+ gf_msg_trace ("nfs3-fh", 0, "filehandle: exportid 0x%s, gfid 0x%s",
+ uuid_utoa_r (fh->exportid, exportidstr),
+ uuid_utoa_r (fh->gfid, gfidstr));
}
-
int
nfs3_fh_build_parent_fh (struct nfs3_fh *child, struct iatt *newstat,
struct nfs3_fh *newfh)
@@ -196,76 +145,42 @@ nfs3_fh_build_parent_fh (struct nfs3_fh *child, struct iatt *newstat,
return -1;
nfs3_fh_init (newfh, newstat);
- newfh->xlatorid = child->xlatorid;
- if ((newstat->ia_ino == 1) && (newstat->ia_gen == 0)) {
- newfh->ino = 1;
- newfh->gen = 0;
- goto done;
- }
-
- newfh->hashcount = child->hashcount - 1;
- memcpy (newfh->entryhash, child->entryhash,
- newfh->hashcount * GF_NFSFH_ENTRYHASH_SIZE);
-
-done:
-// nfs3_log_fh (newfh);
-
+ gf_uuid_copy (newfh->exportid, child->exportid);
return 0;
}
+int
+nfs3_build_fh (inode_t *inode, uuid_t exportid, struct nfs3_fh *newfh)
+{
+ if (!newfh || !inode)
+ return -1;
+ newfh->ident[0] = GF_NFSFH_IDENT0;
+ newfh->ident[1] = GF_NFSFH_IDENT1;
+ newfh->ident[2] = GF_NFSFH_IDENT2;
+ newfh->ident[3] = GF_NFSFH_IDENT3;
+ gf_uuid_copy (newfh->gfid, inode->gfid);
+ gf_uuid_copy (newfh->exportid, exportid);
+ /*gf_uuid_copy (newfh->mountid, mountid);*/
+ return 0;
+}
int
nfs3_fh_build_child_fh (struct nfs3_fh *parent, struct iatt *newstat,
struct nfs3_fh *newfh)
{
- int entry = 0;
-
if ((!parent) || (!newstat) || (!newfh))
return -1;
nfs3_fh_init (newfh, newstat);
- newfh->xlatorid = parent->xlatorid;
- if ((newstat->ia_ino == 1) && (newstat->ia_gen == 0)) {
- newfh->ino = 1;
- newfh->gen = 0;
- goto done;
- }
-
- newfh->hashcount = parent->hashcount + 1;
- memcpy (newfh->entryhash, parent->entryhash,
- parent->hashcount * GF_NFSFH_ENTRYHASH_SIZE);
- entry = newfh->hashcount - 1;
- newfh->entryhash[entry] = nfs3_fh_hash_entry (parent->ino, parent->gen);
-
-done:
-// nfs3_log_fh (newfh);
-
+ gf_uuid_copy (newfh->exportid, parent->exportid);
+ gf_uuid_copy (newfh->mountid, parent->mountid);
return 0;
}
uint32_t
-nfs3_fh_compute_size (struct nfs3_fh *fh)
+nfs3_fh_compute_size ()
{
- if (!fh)
- return 0;
-
- return (GF_NFSFH_STATIC_SIZE +
- (fh->hashcount * GF_NFSFH_ENTRYHASH_SIZE));
+ return GF_NFSFH_STATIC_SIZE;
}
-
-int
-nfs3_fh_hash_index_is_beyond (struct nfs3_fh *fh, int hashidx)
-{
- if (!fh)
- return 1;
-
- if (fh->hashcount >= hashidx)
- return 0;
- else
- return 1;
-
- return 1;
-}
-
diff --git a/xlators/nfs/server/src/nfs3-fh.h b/xlators/nfs/server/src/nfs3-fh.h
index f526edf43b0..3af36cc98b0 100644
--- a/xlators/nfs/server/src/nfs3-fh.h
+++ b/xlators/nfs/server/src/nfs3-fh.h
@@ -1,90 +1,77 @@
/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _NFS_FH_H_
#define _NFS_FH_H_
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "xlator.h"
#include "xdr-nfs3.h"
#include "iatt.h"
#include <sys/types.h>
+#include "compat-uuid.h"
/* BIG FAT WARNING: The file handle code is tightly coupled to NFSv3 file
* handles for now. This will change if and when we need v4. */
#define GF_NFSFH_IDENT0 ':'
#define GF_NFSFH_IDENT1 'O'
-#define GF_NFSFH_IDENT_SIZE (sizeof(char) * 2)
-#define GF_NFSFH_STATIC_SIZE (GF_NFSFH_IDENT_SIZE + sizeof (uint16_t) + sizeof (uint16_t) + sizeof (uint64_t) + sizeof(uint64_t))
-#define GF_NFSFH_MAX_HASH_BYTES (NFS3_FHSIZE - GF_NFSFH_STATIC_SIZE)
-
-/* Each hash element in the file handle is of 2 bytes thus giving
- * us theoretically 65536 unique entries in a directory.
- */
-typedef uint16_t nfs3_hash_entry_t;
-#define GF_NFSFH_ENTRYHASH_SIZE (sizeof (nfs3_hash_entry_t))
-#define GF_NFSFH_MAXHASHES ((int)(GF_NFSFH_MAX_HASH_BYTES / GF_NFSFH_ENTRYHASH_SIZE))
+#define GF_NFSFH_IDENT2 'G'
+#define GF_NFSFH_IDENT3 'L'
+#define GF_NFSFH_IDENT_SIZE (sizeof(char) * 4)
+#define GF_NFSFH_STATIC_SIZE (GF_NFSFH_IDENT_SIZE + (3*sizeof (uuid_t)))
+#define nfs3_fh_exportid_to_index(exprtid) ((uint16_t)exprtid[15])
/* ATTENTION: Change in size of the structure below should be reflected in the
* GF_NFSFH_STATIC_SIZE.
*/
struct nfs3_fh {
/* Used to ensure that a bunch of bytes are actually a GlusterFS NFS
- * file handle. Should contain ":O"
+ * file handle. Should contain ":OGL"
*/
- char ident[2];
-
- /* Number of file/ino hash elements that follow the ino. */
- uint16_t hashcount;
-
- /* Basically, the position/index of an xlator among the children of
- * the NFS xlator.
+ char ident[4];
+
+ /* UUID that identifies an export. The value stored in exportid
+ * depends on the usage of gluster nfs. If the DVM is enabled using
+ * the nfs.dynamic-volumes option then exportid will contain the UUID
+ * of the volume so that gnfs is able to identify volumes uniquely
+ * through volume additions,deletions,migrations, etc.
+ *
+ * When not using dvm, exportid contains the index of the volume
+ * based on the position of the volume in the list of subvolumes
+ * for gnfs.
*/
- uint16_t xlatorid;
- uint64_t gen;
- uint64_t ino;
- nfs3_hash_entry_t entryhash[GF_NFSFH_MAXHASHES];
+ uuid_t exportid;
+
+ /* File/dir gfid. */
+ uuid_t gfid;
+ uuid_t mountid;
+ /* This structure must be exactly NFS3_FHSIZE (64) bytes long.
+ Having the structure shorter results in buffer overflows
+ during XDR decoding.
+ */
+ unsigned char padding[NFS3_FHSIZE - GF_NFSFH_STATIC_SIZE];
} __attribute__((__packed__));
+#define GF_NFS3FH_STATIC_INITIALIZER {{0},}
extern uint32_t
-nfs3_fh_compute_size (struct nfs3_fh *fh);
-
-extern int
-nfs3_fh_hash_index_is_beyond (struct nfs3_fh *fh, int hashidx);
+nfs3_fh_compute_size ();
extern uint16_t
-nfs3_fh_hash_entry (ino_t ino, uint64_t gen);
+nfs3_fh_hash_entry (uuid_t gfid);
extern int
nfs3_fh_validate (struct nfs3_fh *fh);
-extern xlator_t *
-nfs3_fh_to_xlator (xlator_list_t *cl, struct nfs3_fh *fh);
-
extern struct nfs3_fh
-nfs3_fh_build_root_fh (xlator_list_t *cl, xlator_t *xl, struct iatt buf);
+nfs3_fh_build_indexed_root_fh (xlator_list_t *cl, xlator_t *xl);
extern int
nfs3_fh_is_root_fh (struct nfs3_fh *fh);
@@ -97,9 +84,17 @@ extern void
nfs3_log_fh (struct nfs3_fh *fh);
extern void
-nfs3_fh_to_str (struct nfs3_fh *fh, char *str);
+nfs3_fh_to_str (struct nfs3_fh *fh, char *str, size_t len);
extern int
nfs3_fh_build_parent_fh (struct nfs3_fh *child, struct iatt *newstat,
struct nfs3_fh *newfh);
+
+extern struct nfs3_fh
+nfs3_fh_build_uuid_root_fh (uuid_t volumeid, uuid_t mountid);
+
+extern int
+nfs3_build_fh (inode_t *inode, uuid_t exportid,
+ struct nfs3_fh *newfh);
+
#endif
diff --git a/xlators/nfs/server/src/nfs3-helpers.c b/xlators/nfs/server/src/nfs3-helpers.c
index 615c2166019..5ed57bde0e2 100644
--- a/xlators/nfs/server/src/nfs3-helpers.c
+++ b/xlators/nfs/server/src/nfs3-helpers.c
@@ -1,27 +1,13 @@
/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include <inttypes.h>
#include "xlator.h"
@@ -35,8 +21,18 @@
#include "nfs3-helpers.h"
#include "nfs-mem-types.h"
#include "iatt.h"
+#include "common-utils.h"
+#include "nfs-messages.h"
+#include "mount3.h"
#include <string.h>
+extern int
+nfs3_set_root_looked_up (struct nfs3_state *nfs3, struct nfs3_fh *rootfh);
+
+extern int
+nfs3_is_root_looked_up (struct nfs3_state *nfs3, struct nfs3_fh *rootfh);
+
+
#define nfs3_call_resume(cst) \
do { \
if (((cst)) && (cst)->resume_fn) \
@@ -86,18 +82,39 @@ struct nfs3stat_strerror nfs3stat_strerror_table[] = {
{ NFS3ERR_SERVERFAULT, "Error occurred on the server or IO Error" },
{ NFS3ERR_BADTYPE, "Type not supported by the server" },
{ NFS3ERR_JUKEBOX, "Cannot complete server initiated request" },
- { -1, "IO Error" },
+ { NFS3ERR_END_OF_LIST, "IO Error" },
};
+uint64_t
+nfs3_iatt_gfid_to_ino (struct iatt *buf)
+{
+ uint64_t ino = 0;
+
+ if (!buf)
+ return 0;
+
+ if (gf_nfs_enable_ino32()) {
+ ino = (uint32_t )nfs_hash_gfid (buf->ia_gfid);
+ goto hashout;
+ }
+
+ /* from posix its guaranteed to send unique ino */
+ ino = buf->ia_ino;
+
+hashout:
+ return ino;
+}
+
+
void
-nfs3_map_xlid_to_statdev (struct iatt *ia, uint16_t xlid)
+nfs3_map_deviceid_to_statdev (struct iatt *ia, uint64_t deviceid)
{
if (!ia)
return;
- ia->ia_dev = xlid;
+ ia->ia_dev = deviceid;
}
@@ -208,6 +225,7 @@ nfs3_errno_to_nfsstat3 (int errnum)
stat = NFS3ERR_SERVERFAULT;
break;
+ case ENOTSUP:
case ENOSYS:
stat = NFS3ERR_NOTSUPP;
break;
@@ -218,6 +236,15 @@ nfs3_errno_to_nfsstat3 (int errnum)
case ESTALE:
stat = NFS3ERR_STALE;
+ break;
+
+ case ENOTCONN:
+ stat = NFS3ERR_IO;
+ break;
+
+ case EDQUOT:
+ stat = NFS3ERR_DQUOT;
+ break;
default:
stat = NFS3ERR_SERVERFAULT;
@@ -227,6 +254,20 @@ nfs3_errno_to_nfsstat3 (int errnum)
return stat;
}
+/*
+ * Special case: If op_ret is -1, it's very unusual op_errno being
+ * 0 which means something came wrong from upper layer(s). If it
+ * happens by any means, then set NFS3 status to NFS3ERR_SERVERFAULT.
+ */
+nfsstat3
+nfs3_cbk_errno_status (int32_t op_ret, int32_t op_errno)
+{
+ if ((op_ret == -1) && (op_errno == 0)) {
+ return NFS3ERR_SERVERFAULT;
+ }
+
+ return nfs3_errno_to_nfsstat3 (op_errno);
+}
void
nfs3_fill_lookup3res_error (lookup3res *res, nfsstat3 stat,
@@ -241,91 +282,82 @@ nfs3_fill_lookup3res_error (lookup3res *res, nfsstat3 stat,
}
-fattr3
-nfs3_stat_to_fattr3 (struct iatt *buf)
+void
+nfs3_stat_to_fattr3 (struct iatt *buf, fattr3 *fa)
{
- fattr3 fa = {0, };
+ if (buf == NULL || fa == NULL) {
+ errno = EINVAL;
+ return;
+ }
if (IA_ISDIR (buf->ia_type))
- fa.type = NF3DIR;
+ fa->type = NF3DIR;
else if (IA_ISREG (buf->ia_type))
- fa.type = NF3REG;
+ fa->type = NF3REG;
else if (IA_ISCHR (buf->ia_type))
- fa.type = NF3CHR;
+ fa->type = NF3CHR;
else if (IA_ISBLK (buf->ia_type))
- fa.type = NF3BLK;
+ fa->type = NF3BLK;
else if (IA_ISFIFO (buf->ia_type))
- fa.type = NF3FIFO;
+ fa->type = NF3FIFO;
else if (IA_ISLNK (buf->ia_type))
- fa.type = NF3LNK;
+ fa->type = NF3LNK;
else if (IA_ISSOCK (buf->ia_type))
- fa.type = NF3SOCK;
+ fa->type = NF3SOCK;
if (IA_PROT_RUSR (buf->ia_prot))
- fa.mode |= NFS3MODE_ROWNER;
+ fa->mode |= NFS3MODE_ROWNER;
if (IA_PROT_WUSR (buf->ia_prot))
- fa.mode |= NFS3MODE_WOWNER;
+ fa->mode |= NFS3MODE_WOWNER;
if (IA_PROT_XUSR (buf->ia_prot))
- fa.mode |= NFS3MODE_XOWNER;
+ fa->mode |= NFS3MODE_XOWNER;
if (IA_PROT_RGRP (buf->ia_prot))
- fa.mode |= NFS3MODE_RGROUP;
+ fa->mode |= NFS3MODE_RGROUP;
if (IA_PROT_WGRP (buf->ia_prot))
- fa.mode |= NFS3MODE_WGROUP;
+ fa->mode |= NFS3MODE_WGROUP;
if (IA_PROT_XGRP (buf->ia_prot))
- fa.mode |= NFS3MODE_XGROUP;
+ fa->mode |= NFS3MODE_XGROUP;
if (IA_PROT_ROTH (buf->ia_prot))
- fa.mode |= NFS3MODE_ROTHER;
+ fa->mode |= NFS3MODE_ROTHER;
if (IA_PROT_WOTH (buf->ia_prot))
- fa.mode |= NFS3MODE_WOTHER;
+ fa->mode |= NFS3MODE_WOTHER;
if (IA_PROT_XOTH (buf->ia_prot))
- fa.mode |= NFS3MODE_XOTHER;
+ fa->mode |= NFS3MODE_XOTHER;
if (IA_PROT_SUID (buf->ia_prot))
- fa.mode |= NFS3MODE_SETXUID;
+ fa->mode |= NFS3MODE_SETXUID;
if (IA_PROT_SGID (buf->ia_prot))
- fa.mode |= NFS3MODE_SETXGID;
+ fa->mode |= NFS3MODE_SETXGID;
if (IA_PROT_STCKY (buf->ia_prot))
- fa.mode |= NFS3MODE_SAVESWAPTXT;
+ fa->mode |= NFS3MODE_SAVESWAPTXT;
- fa.nlink = buf->ia_nlink;
- fa.uid = buf->ia_uid;
- fa.gid = buf->ia_gid;
- fa.size = buf->ia_size;
- fa.used = (buf->ia_blocks * 512);
+ fa->nlink = buf->ia_nlink;
+ fa->uid = buf->ia_uid;
+ fa->gid = buf->ia_gid;
+ fa->size = buf->ia_size;
+ fa->used = (buf->ia_blocks * 512);
if ((IA_ISCHR (buf->ia_type) || IA_ISBLK (buf->ia_type))) {
- fa.rdev.specdata1 = ia_major (buf->ia_rdev);
- fa.rdev.specdata2 = ia_minor (buf->ia_rdev);
+ fa->rdev.specdata1 = ia_major (buf->ia_rdev);
+ fa->rdev.specdata2 = ia_minor (buf->ia_rdev);
} else {
- fa.rdev.specdata1 = 0;
- fa.rdev.specdata2 = 0;
+ fa->rdev.specdata1 = 0;
+ fa->rdev.specdata2 = 0;
}
- fa.fsid = buf->ia_dev;
- fa.fileid = buf->ia_ino;
- /* FIXME: Handle time resolutions for sub-second granularity */
- if (buf->ia_atime == 9669) {
- fa.mtime.seconds = 0;
- fa.mtime.nseconds = 0;
- fa.atime.seconds = 0;
- fa.atime.nseconds = 0;
- } else {
- fa.mtime.seconds = buf->ia_mtime;
- fa.mtime.nseconds = 0;
- fa.atime.seconds = buf->ia_atime;
- fa.atime.seconds = 0;
- fa.atime.nseconds = 0;
- }
+ fa->fsid = buf->ia_dev;
+ fa->fileid = nfs3_iatt_gfid_to_ino (buf);
- fa.atime.seconds = buf->ia_atime;
- fa.atime.nseconds = 0;
+ fa->atime.seconds = buf->ia_atime;
+ fa->atime.nseconds = buf->ia_atime_nsec;
- fa.ctime.seconds = buf->ia_ctime;
- fa.ctime.nseconds = 0;
+ fa->ctime.seconds = buf->ia_ctime;
+ fa->ctime.nseconds = buf->ia_ctime_nsec;
- return fa;
+ fa->mtime.seconds = buf->ia_mtime;
+ fa->mtime.nseconds = buf->ia_mtime_nsec;
}
@@ -341,10 +373,10 @@ nfs3_stat_to_post_op_attr (struct iatt *buf)
* returning these zeroed out attrs.
*/
attr.attributes_follow = FALSE;
- if (nfs_zero_filled_stat (buf))
+ if (gf_is_zero_filled_stat (buf))
goto out;
- attr.post_op_attr_u.attributes = nfs3_stat_to_fattr3 (buf);
+ nfs3_stat_to_fattr3 (buf, &(attr.post_op_attr_u.attributes));
attr.attributes_follow = TRUE;
out:
@@ -362,16 +394,15 @@ nfs3_stat_to_pre_op_attr (struct iatt *pre)
* returning these zeroed out attrs.
*/
poa.attributes_follow = FALSE;
- if (nfs_zero_filled_stat (pre))
+ if (gf_is_zero_filled_stat (pre))
goto out;
poa.attributes_follow = TRUE;
poa.pre_op_attr_u.attributes.size = pre->ia_size;
- if (pre->ia_atime == 9669)
- poa.pre_op_attr_u.attributes.mtime.seconds = 0;
- else
- poa.pre_op_attr_u.attributes.mtime.seconds = pre->ia_mtime;
+ poa.pre_op_attr_u.attributes.mtime.seconds = pre->ia_mtime;
+ poa.pre_op_attr_u.attributes.mtime.nseconds = pre->ia_mtime_nsec;
poa.pre_op_attr_u.attributes.ctime.seconds = pre->ia_ctime;
+ poa.pre_op_attr_u.attributes.ctime.nseconds = pre->ia_ctime_nsec;
out:
return poa;
@@ -388,21 +419,14 @@ nfs3_fill_lookup3res_success (lookup3res *res, nfsstat3 stat,
res->status = stat;
if (fh) {
res->lookup3res_u.resok.object.data.data_val = (void *)fh;
- fhlen = nfs3_fh_compute_size (fh);
+ fhlen = nfs3_fh_compute_size ();
res->lookup3res_u.resok.object.data.data_len = fhlen;
}
obj.attributes_follow = FALSE;
dir.attributes_follow = FALSE;
- if (buf && fh) {
- nfs3_map_xlid_to_statdev (buf, fh->xlatorid);
- obj = nfs3_stat_to_post_op_attr (buf);
- }
-
- if (postparent && fh) {
- nfs3_map_xlid_to_statdev (postparent, fh->xlatorid);
- dir = nfs3_stat_to_post_op_attr (postparent);
- }
+ obj = nfs3_stat_to_post_op_attr (buf);
+ dir = nfs3_stat_to_post_op_attr (postparent);
res->lookup3res_u.resok.obj_attributes = obj;
res->lookup3res_u.resok.dir_attributes = dir;
@@ -411,10 +435,13 @@ nfs3_fill_lookup3res_success (lookup3res *res, nfsstat3 stat,
void
nfs3_fill_lookup3res (lookup3res *res, nfsstat3 stat, struct nfs3_fh *newfh,
- struct iatt *buf, struct iatt *postparent)
+ struct iatt *buf, struct iatt *postparent,
+ uint64_t deviceid)
{
memset (res, 0, sizeof (*res));
+ nfs3_map_deviceid_to_statdev (buf, deviceid);
+ nfs3_map_deviceid_to_statdev (postparent, deviceid);
if (stat != NFS3_OK)
nfs3_fill_lookup3res_error (res, stat, postparent);
else
@@ -431,7 +458,7 @@ nfs3_extract_getattr_fh (getattr3args *args)
void
nfs3_fill_getattr3res (getattr3res *res, nfsstat3 stat, struct iatt *buf,
- uint16_t xlid)
+ uint64_t deviceid)
{
memset (res, 0, sizeof (*res));
@@ -439,9 +466,8 @@ nfs3_fill_getattr3res (getattr3res *res, nfsstat3 stat, struct iatt *buf,
if (stat != NFS3_OK)
return;
- nfs3_map_xlid_to_statdev (buf, xlid);
- res->getattr3res_u.resok.obj_attributes = nfs3_stat_to_fattr3 (buf);
-
+ nfs3_map_deviceid_to_statdev (buf, deviceid);
+ nfs3_stat_to_fattr3 (buf, &(res->getattr3res_u.resok.obj_attributes));
}
@@ -454,7 +480,7 @@ nfs3_extract_fsinfo_fh (fsinfo3args *args)
void
nfs3_fill_fsinfo3res (struct nfs3_state *nfs3, fsinfo3res *res,
- nfsstat3 status, struct iatt *fsroot, uint16_t xlid)
+ nfsstat3 status, struct iatt *fsroot, uint64_t deviceid)
{
fsinfo3resok resok = {{0}, };
nfstime3 tdelta = GF_NFS3_TIMEDELTA_SECS;
@@ -464,7 +490,7 @@ nfs3_fill_fsinfo3res (struct nfs3_state *nfs3, fsinfo3res *res,
if (status != NFS3_OK)
return;
- nfs3_map_xlid_to_statdev (fsroot, xlid);
+ nfs3_map_deviceid_to_statdev (fsroot, deviceid);
resok.obj_attributes = nfs3_stat_to_post_op_attr (fsroot);
resok.rtmax = nfs3->readsize;
resok.rtpref = nfs3->readsize;
@@ -473,7 +499,7 @@ nfs3_fill_fsinfo3res (struct nfs3_state *nfs3, fsinfo3res *res,
resok.wtpref = nfs3->writesize;
resok.wtmult = GF_NFS3_WTMULT;
resok.dtpref = nfs3->readdirsize;
- resok.maxfilesize = GF_NFS3_MAXFILE;
+ resok.maxfilesize = GF_NFS3_MAXFILESIZE;
resok.time_delta = tdelta;
resok.properties = GF_NFS3_FS_PROP;
@@ -511,7 +537,7 @@ char *
nfsstat3_strerror(int stat)
{
int i;
- for(i = 0; nfs3stat_strerror_table[i].stat != -1; i++) {
+ for(i = 0; nfs3stat_strerror_table[i].stat != NFS3ERR_END_OF_LIST ; i++) {
if (nfs3stat_strerror_table[i].stat == stat)
return nfs3stat_strerror_table[i].strerror;
}
@@ -528,157 +554,49 @@ nfs3_prep_access3args (access3args *args, struct nfs3_fh *fh)
args->object.data.data_val = (void *)fh;
}
+#define POSIX_READ 4
+#define POSIX_WRITE 2
+#define POSIX_EXEC 1
uint32_t
-nfs3_owner_accessbits (ia_prot_t prot, ia_type_t type, uint32_t request)
+nfs3_accessbits (int32_t accbits)
{
- uint32_t accresult = 0;
-
- if (IA_PROT_RUSR (prot) && (request & ACCESS3_READ))
- accresult |= ACCESS3_READ;
-
- if (request & ACCESS3_LOOKUP)
- if ((IA_ISDIR (type)) && (IA_PROT_XUSR (prot)))
- accresult |= ACCESS3_LOOKUP;
-
- if ((IA_PROT_WUSR (prot) && (request & ACCESS3_MODIFY)))
- accresult |= ACCESS3_MODIFY;
-
- if ((IA_PROT_WUSR (prot) && (request & ACCESS3_EXTEND)))
- accresult |= ACCESS3_EXTEND;
-
- /* ACCESS3_DELETE is ignored for now since that requires
- * knowing the permissions on the parent directory.
- */
-
- if (request & ACCESS3_EXECUTE)
- if (IA_PROT_XUSR (prot) && (!IA_ISDIR (type)))
- accresult |= ACCESS3_EXECUTE;
-
- return accresult;
-}
-
-
-uint32_t
-nfs3_group_accessbits (ia_prot_t prot, ia_type_t type, uint32_t request)
-{
- uint32_t accresult = 0;
-
- if (IA_PROT_RGRP (prot) && (request & ACCESS3_READ))
- accresult |= ACCESS3_READ;
-
- if (request & ACCESS3_LOOKUP)
- if ((IA_ISDIR (type)) && IA_PROT_RGRP (prot))
- accresult |= ACCESS3_LOOKUP;
-
- if (IA_PROT_WGRP (prot) && (request & ACCESS3_MODIFY))
- accresult |= ACCESS3_MODIFY;
-
- if (IA_PROT_WGRP (prot) && (request & ACCESS3_EXTEND))
- accresult |= ACCESS3_EXTEND;
-
- /* ACCESS3_DELETE is ignored for now since that requires
- * knowing the permissions on the parent directory.
- */
-
- if (request & ACCESS3_EXECUTE)
- if (IA_PROT_XGRP (prot) && (!IA_ISDIR (type)))
- accresult |= ACCESS3_EXECUTE;
-
- return accresult;
-}
-
-
-uint32_t
-nfs3_other_accessbits (ia_prot_t prot, ia_type_t type, uint32_t request)
-{
- uint32_t accresult = 0;
+ uint32_t accresult = 0;
- if (IA_PROT_ROTH (prot) && (request & ACCESS3_READ))
+ if (accbits & POSIX_READ)
accresult |= ACCESS3_READ;
- if (request & ACCESS3_LOOKUP)
- if (IA_ISDIR (type) && IA_PROT_ROTH (prot))
- accresult |= ACCESS3_LOOKUP;
-
- if (IA_PROT_WOTH (prot) && (request & ACCESS3_MODIFY))
- accresult |= ACCESS3_MODIFY;
-
- if (IA_PROT_WOTH (prot) && (request & ACCESS3_EXTEND))
- accresult |= ACCESS3_EXTEND;
+ if (accbits & POSIX_WRITE)
+ accresult |= (ACCESS3_MODIFY | ACCESS3_EXTEND | ACCESS3_DELETE);
- /* ACCESS3_DELETE is ignored for now since that requires
- * knowing the permissions on the parent directory.
- */
-
- if (request & ACCESS3_EXECUTE)
- if (IA_PROT_XOTH (prot) && (!IA_ISDIR (type)))
- accresult |= ACCESS3_EXECUTE;
+ /* lookup on directory allowed only in case of execute permission */
+ if (accbits & POSIX_EXEC)
+ accresult |= (ACCESS3_EXECUTE | ACCESS3_LOOKUP);
return accresult;
}
-
uint32_t
-nfs3_superuser_accessbits (ia_prot_t prot, ia_type_t type, uint32_t request)
+nfs3_request_to_accessbits (int32_t accbits)
{
- uint32_t accresult = 0;
-
- if (request & ACCESS3_READ)
- accresult |= ACCESS3_READ;
-
- if (request & ACCESS3_LOOKUP)
- if (IA_ISDIR (type))
- accresult |= ACCESS3_LOOKUP;
-
- if (request & ACCESS3_MODIFY)
- accresult |= ACCESS3_MODIFY;
-
- if (request & ACCESS3_EXTEND)
- accresult |= ACCESS3_EXTEND;
-
- /* ACCESS3_DELETE is ignored for now since that requires
- * knowing the permissions on the parent directory.
- */
+ uint32_t acc_request = 0;
- if (request & ACCESS3_EXECUTE)
- if ((IA_PROT_XOTH (prot) || IA_PROT_XUSR (prot) ||
- IA_PROT_XGRP (prot)) && (!IA_ISDIR (type)))
- accresult |= ACCESS3_EXECUTE;
+ if (accbits & ACCESS3_READ)
+ acc_request |= POSIX_READ;
- return accresult;
-}
+ if (accbits & (ACCESS3_MODIFY | ACCESS3_EXTEND | ACCESS3_DELETE))
+ acc_request |= POSIX_WRITE;
+ /* For lookup on directory check for execute permission */
+ if (accbits & (ACCESS3_EXECUTE | ACCESS3_LOOKUP))
+ acc_request |= POSIX_EXEC;
-uint32_t
-nfs3_stat_to_accessbits (struct iatt *buf, uint32_t request, uid_t uid,
- gid_t gid)
-{
- uint32_t accresult = 0;
- ia_prot_t prot = {0, };
- ia_type_t type = 0;
-
- prot = buf->ia_prot;
- type = buf->ia_type;
-
- if (uid == 0)
- accresult = nfs3_superuser_accessbits (prot, type, request);
- else if (buf->ia_uid == uid)
- accresult = nfs3_owner_accessbits (prot, type, request);
- else if (buf->ia_gid == gid)
- accresult = nfs3_group_accessbits (prot, type, request);
- else
- accresult = nfs3_other_accessbits (prot, type, request);
-
- return accresult;
+ return acc_request;
}
-
-
void
-nfs3_fill_access3res (access3res *res, nfsstat3 status, struct iatt *buf,
- uint32_t accbits, uid_t uid, gid_t gid, uint16_t xlid)
+nfs3_fill_access3res (access3res *res, nfsstat3 status, int32_t accbits,
+ int32_t reqaccbits)
{
- post_op_attr objattr;
uint32_t accres = 0;
memset (res, 0, sizeof (*res));
@@ -686,12 +604,10 @@ nfs3_fill_access3res (access3res *res, nfsstat3 status, struct iatt *buf,
if (status != NFS3_OK)
return;
- nfs3_map_xlid_to_statdev (buf, xlid);
- objattr = nfs3_stat_to_post_op_attr (buf);
- accres = nfs3_stat_to_accessbits (buf, accbits, uid, gid);
+ accres = nfs3_accessbits (accbits);
- res->access3res_u.resok.obj_attributes = objattr;
- res->access3res_u.resok.access = accres;
+ /* do not answer what was not asked */
+ res->access3res_u.resok.access = accres & reqaccbits;
}
void
@@ -742,14 +658,12 @@ nfs3_funge_root_dotdot_dirent (gf_dirent_t *ent, struct nfs3_fh *dfh)
nfs3_is_parentdir_entry (ent->d_name)) {
ent->d_ino = 1;
ent->d_stat.ia_ino = 1;
- ent->d_stat.ia_gen = 0;
}
if (nfs3_fh_is_root_fh (dfh) &&
nfs3_is_dot_entry (ent->d_name)) {
ent->d_ino = 1;
ent->d_stat.ia_ino = 1;
- ent->d_stat.ia_gen = 0;
}
}
@@ -766,14 +680,15 @@ nfs3_fill_entry3 (gf_dirent_t *entry, struct nfs3_fh *dfh)
if (!ent)
return NULL;
- gf_log (GF_NFS3, GF_LOG_TRACE, "Entry: %s", entry->d_name);
+ gf_msg_trace (GF_NFS3, 0, "Entry: %s", entry->d_name);
/* If the entry is . or .., we need to replace the physical ino and gen
* with 1 and 0 respectively if the directory is root. This funging is
* needed because there is no parent directory of the root. In that
- * sense the behavious we provide is similar to the output of the
+ * sense the behavior we provide is similar to the output of the
* command: "stat /.."
*/
+ entry->d_ino = nfs3_iatt_gfid_to_ino (&entry->d_stat);
nfs3_funge_root_dotdot_dirent (entry, dfh);
ent->fileid = entry->d_ino;
ent->cookie = entry->d_off;
@@ -800,7 +715,7 @@ nfs3_fill_post_op_fh3 (struct nfs3_fh *fh, post_op_fh3 *pfh)
return;
pfh->handle_follows = 1;
- fhlen = nfs3_fh_compute_size (fh);
+ fhlen = nfs3_fh_compute_size ();
pfh->post_op_fh3_u.handle.data.data_val = (void *)fh;
pfh->post_op_fh3_u.handle.data.data_len = fhlen;
}
@@ -828,7 +743,7 @@ nfs3_fh_to_post_op_fh3 (struct nfs3_fh *fh)
entryp3 *
-nfs3_fill_entryp3 (gf_dirent_t *entry, struct nfs3_fh *dirfh)
+nfs3_fill_entryp3 (gf_dirent_t *entry, struct nfs3_fh *dirfh, uint64_t devid)
{
entryp3 *ent = NULL;
struct nfs3_fh newfh = {{0}, };
@@ -839,12 +754,13 @@ nfs3_fill_entryp3 (gf_dirent_t *entry, struct nfs3_fh *dirfh)
/* If the entry is . or .., we need to replace the physical ino and gen
* with 1 and 0 respectively if the directory is root. This funging is
* needed because there is no parent directory of the root. In that
- * sense the behavious we provide is similar to the output of the
+ * sense the behavior we provide is similar to the output of the
* command: "stat /.."
*/
+ entry->d_ino = nfs3_iatt_gfid_to_ino (&entry->d_stat);
nfs3_funge_root_dotdot_dirent (entry, dirfh);
- gf_log (GF_NFS3, GF_LOG_TRACE, "Entry: %s, ino: %"PRIu64,
- entry->d_name, entry->d_ino);
+ gf_msg_trace (GF_NFS3, 0, "Entry: %s, ino: %"PRIu64,
+ entry->d_name, entry->d_ino);
ent = GF_CALLOC (1, sizeof (*ent), gf_nfs_mt_entryp3);
if (!ent)
return NULL;
@@ -861,8 +777,22 @@ nfs3_fill_entryp3 (gf_dirent_t *entry, struct nfs3_fh *dirfh)
strcpy (ent->name, entry->d_name);
nfs3_fh_build_child_fh (dirfh, &entry->d_stat, &newfh);
- nfs3_map_xlid_to_statdev (&entry->d_stat, dirfh->xlatorid);
- ent->name_attributes = nfs3_stat_to_post_op_attr (&entry->d_stat);
+ nfs3_map_deviceid_to_statdev (&entry->d_stat, devid);
+ /* *
+ * In tier volume, the readdirp send only to cold subvol
+ * which will populate in the 'T' file entries in the result.
+ * For such files an explicit stat call is required, by setting
+ * following argument client will perform the same.
+ *
+ * The inode value for 'T' files and directory is NULL, so just
+ * skip the check if it is directory.
+ */
+ if (!(IA_ISDIR(entry->d_stat.ia_type)) && (entry->inode == NULL))
+ ent->name_attributes.attributes_follow = FALSE;
+ else
+ ent->name_attributes =
+ nfs3_stat_to_post_op_attr (&entry->d_stat);
+
ent->name_handle = nfs3_fh_to_post_op_fh3 (&newfh);
err:
return ent;
@@ -872,7 +802,8 @@ err:
void
nfs3_fill_readdir3res (readdir3res *res, nfsstat3 stat, struct nfs3_fh *dirfh,
uint64_t cverf, struct iatt *dirstat,
- gf_dirent_t *entries, count3 count, int is_eof)
+ gf_dirent_t *entries, count3 count, int is_eof,
+ uint64_t deviceid)
{
post_op_attr dirattr;
entry3 *ent = NULL;
@@ -886,7 +817,7 @@ nfs3_fill_readdir3res (readdir3res *res, nfsstat3 stat, struct nfs3_fh *dirfh,
if (stat != NFS3_OK)
return;
- nfs3_map_xlid_to_statdev (dirstat, dirfh->xlatorid);
+ nfs3_map_deviceid_to_statdev (dirstat, deviceid);
dirattr = nfs3_stat_to_post_op_attr (dirstat);
res->readdir3res_u.resok.dir_attributes = dirattr;
res->readdir3res_u.resok.reply.eof = (bool_t)is_eof;
@@ -927,10 +858,11 @@ nfs3_fill_readdir3res (readdir3res *res, nfsstat3 stat, struct nfs3_fh *dirfh,
void
-nfs3_fill_readdirp3res (readdirp3res *res, nfsstat3 stat, struct nfs3_fh *dirfh,
- uint64_t cverf, struct iatt *dirstat,
- gf_dirent_t *entries, count3 dircount, count3 maxcount,
- int is_eof)
+nfs3_fill_readdirp3res (readdirp3res *res, nfsstat3 stat,
+ struct nfs3_fh *dirfh, uint64_t cverf,
+ struct iatt *dirstat, gf_dirent_t *entries,
+ count3 dircount, count3 maxcount, int is_eof,
+ uint64_t deviceid)
{
post_op_attr dirattr;
entryp3 *ent = NULL;
@@ -945,7 +877,7 @@ nfs3_fill_readdirp3res (readdirp3res *res, nfsstat3 stat, struct nfs3_fh *dirfh,
if (stat != NFS3_OK)
return;
- nfs3_map_xlid_to_statdev (dirstat, dirfh->xlatorid);
+ nfs3_map_deviceid_to_statdev (dirstat, deviceid);
dirattr = nfs3_stat_to_post_op_attr (dirstat);
res->readdirp3res_u.resok.dir_attributes = dirattr;
res->readdirp3res_u.resok.reply.eof = (bool_t)is_eof;
@@ -963,7 +895,7 @@ nfs3_fill_readdirp3res (readdirp3res *res, nfsstat3 stat, struct nfs3_fh *dirfh,
(strcmp (entries->d_name, "..") == 0))
goto nextentry;
*/
- ent = nfs3_fill_entryp3 (entries, dirfh);
+ ent = nfs3_fill_entryp3 (entries, dirfh, deviceid);
if (!ent)
break;
@@ -1049,7 +981,7 @@ nfs3_prep_fsstat3args (fsstat3args *args, struct nfs3_fh *fh)
void
nfs3_fill_fsstat3res (fsstat3res *res, nfsstat3 stat, struct statvfs *fsbuf,
- struct iatt *postbuf, uint16_t xlid)
+ struct iatt *postbuf, uint64_t deviceid)
{
post_op_attr poa;
fsstat3resok resok;
@@ -1059,11 +991,11 @@ nfs3_fill_fsstat3res (fsstat3res *res, nfsstat3 stat, struct statvfs *fsbuf,
if (stat != NFS3_OK)
return;
- nfs3_map_xlid_to_statdev (postbuf, xlid);
+ nfs3_map_deviceid_to_statdev (postbuf, deviceid);
poa = nfs3_stat_to_post_op_attr (postbuf);
resok.tbytes = (size3)(fsbuf->f_frsize * fsbuf->f_blocks);
- resok.fbytes = (size3)(fsbuf->f_bsize * fsbuf->f_bfree);
- resok.abytes = (size3)(fsbuf->f_bsize * fsbuf->f_bavail);
+ resok.fbytes = (size3)(fsbuf->f_frsize * fsbuf->f_bfree);
+ resok.abytes = (size3)(fsbuf->f_frsize * fsbuf->f_bavail);
resok.tfiles = (size3)(fsbuf->f_files);
resok.ffiles = (size3)(fsbuf->f_ffree);
resok.afiles = (size3)(fsbuf->f_favail);
@@ -1208,7 +1140,7 @@ nfs3_stat_to_wcc_data (struct iatt *pre, struct iatt *post)
void
nfs3_fill_create3res (create3res *res, nfsstat3 stat, struct nfs3_fh *newfh,
struct iatt *newbuf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, uint64_t deviceid)
{
post_op_attr poa = {0, };
wcc_data dirwcc = {{0}, };
@@ -1219,14 +1151,12 @@ nfs3_fill_create3res (create3res *res, nfsstat3 stat, struct nfs3_fh *newfh,
return;
nfs3_fill_post_op_fh3 (newfh, &res->create3res_u.resok.obj);
- nfs3_map_xlid_to_statdev (newbuf, newfh->xlatorid);
+ nfs3_map_deviceid_to_statdev (newbuf, deviceid);
poa = nfs3_stat_to_post_op_attr (newbuf);
res->create3res_u.resok.obj_attributes = poa;
- if (preparent) {
- nfs3_map_xlid_to_statdev (preparent, newfh->xlatorid);
- nfs3_map_xlid_to_statdev (postparent, newfh->xlatorid);
- dirwcc = nfs3_stat_to_wcc_data (preparent, postparent);
- }
+ nfs3_map_deviceid_to_statdev (preparent, deviceid);
+ nfs3_map_deviceid_to_statdev (postparent, deviceid);
+ dirwcc = nfs3_stat_to_wcc_data (preparent, postparent);
res->create3res_u.resok.dir_wcc = dirwcc;
}
@@ -1250,7 +1180,7 @@ nfs3_prep_setattr3args (setattr3args *args, struct nfs3_fh *fh)
void
nfs3_fill_setattr3res (setattr3res *res, nfsstat3 stat, struct iatt *preop,
- struct iatt *postop, uint16_t xlid)
+ struct iatt *postop, uint64_t deviceid)
{
wcc_data wcc;
memset (res, 0, sizeof (*res));
@@ -1258,8 +1188,8 @@ nfs3_fill_setattr3res (setattr3res *res, nfsstat3 stat, struct iatt *preop,
if (stat != NFS3_OK)
return;
- nfs3_map_xlid_to_statdev (preop, xlid);
- nfs3_map_xlid_to_statdev (postop, xlid);
+ nfs3_map_deviceid_to_statdev (preop, deviceid);
+ nfs3_map_deviceid_to_statdev (postop, deviceid);
wcc = nfs3_stat_to_wcc_data (preop, postop);
res->setattr3res_u.resok.obj_wcc = wcc;
}
@@ -1278,7 +1208,7 @@ nfs3_prep_mkdir3args (mkdir3args *args, struct nfs3_fh *dirfh, char *name)
void
nfs3_fill_mkdir3res (mkdir3res *res, nfsstat3 stat, struct nfs3_fh *fh,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, uint64_t deviceid)
{
wcc_data dirwcc;
post_op_attr poa;
@@ -1289,10 +1219,10 @@ nfs3_fill_mkdir3res (mkdir3res *res, nfsstat3 stat, struct nfs3_fh *fh,
return;
nfs3_fill_post_op_fh3 (fh, &res->mkdir3res_u.resok.obj);
- nfs3_map_xlid_to_statdev (buf, fh->xlatorid);
+ nfs3_map_deviceid_to_statdev (buf, deviceid);
poa = nfs3_stat_to_post_op_attr (buf);
- nfs3_map_xlid_to_statdev (preparent, fh->xlatorid);
- nfs3_map_xlid_to_statdev (postparent, fh->xlatorid);
+ nfs3_map_deviceid_to_statdev (preparent, deviceid);
+ nfs3_map_deviceid_to_statdev (postparent, deviceid);
dirwcc = nfs3_stat_to_wcc_data (preparent, postparent);
res->mkdir3res_u.resok.obj_attributes = poa;
res->mkdir3res_u.resok.dir_wcc = dirwcc;
@@ -1314,7 +1244,7 @@ nfs3_prep_symlink3args (symlink3args *args, struct nfs3_fh *dirfh, char *name,
void
nfs3_fill_symlink3res (symlink3res *res, nfsstat3 stat, struct nfs3_fh *fh,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, uint64_t deviceid)
{
wcc_data dirwcc;
post_op_attr poa;
@@ -1325,10 +1255,10 @@ nfs3_fill_symlink3res (symlink3res *res, nfsstat3 stat, struct nfs3_fh *fh,
return;
nfs3_fill_post_op_fh3 (fh, &res->symlink3res_u.resok.obj);
- nfs3_map_xlid_to_statdev (buf, fh->xlatorid);
+ nfs3_map_deviceid_to_statdev (buf, deviceid);
poa = nfs3_stat_to_post_op_attr (buf);
- nfs3_map_xlid_to_statdev (postparent, fh->xlatorid);
- nfs3_map_xlid_to_statdev (preparent, fh->xlatorid);
+ nfs3_map_deviceid_to_statdev (postparent, deviceid);
+ nfs3_map_deviceid_to_statdev (preparent, deviceid);
dirwcc = nfs3_stat_to_wcc_data (preparent, postparent);
res->symlink3res_u.resok.obj_attributes = poa;
res->symlink3res_u.resok.dir_wcc = dirwcc;
@@ -1347,7 +1277,7 @@ nfs3_prep_readlink3args (readlink3args *args, struct nfs3_fh *fh)
void
nfs3_fill_readlink3res (readlink3res *res, nfsstat3 stat, char *path,
- struct iatt *buf, uint16_t xlid)
+ struct iatt *buf, uint64_t deviceid)
{
post_op_attr poa;
@@ -1357,7 +1287,7 @@ nfs3_fill_readlink3res (readlink3res *res, nfsstat3 stat, char *path,
if (stat != NFS3_OK)
return;
- nfs3_map_xlid_to_statdev (buf, xlid);
+ nfs3_map_deviceid_to_statdev (buf, deviceid);
poa = nfs3_stat_to_post_op_attr (buf);
res->readlink3res_u.resok.data = (void *)path;
res->readlink3res_u.resok.symlink_attributes = poa;
@@ -1376,7 +1306,7 @@ nfs3_prep_mknod3args (mknod3args *args, struct nfs3_fh *fh, char *name)
void
nfs3_fill_mknod3res (mknod3res *res, nfsstat3 stat, struct nfs3_fh *fh,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, uint64_t deviceid)
{
post_op_attr poa;
wcc_data wccdir;
@@ -1387,10 +1317,10 @@ nfs3_fill_mknod3res (mknod3res *res, nfsstat3 stat, struct nfs3_fh *fh,
return;
nfs3_fill_post_op_fh3 (fh, &res->mknod3res_u.resok.obj);
- nfs3_map_xlid_to_statdev (buf, fh->xlatorid);
+ nfs3_map_deviceid_to_statdev (buf, deviceid);
poa = nfs3_stat_to_post_op_attr (buf);
- nfs3_map_xlid_to_statdev (preparent, fh->xlatorid);
- nfs3_map_xlid_to_statdev (postparent, fh->xlatorid);
+ nfs3_map_deviceid_to_statdev (preparent, deviceid);
+ nfs3_map_deviceid_to_statdev (postparent, deviceid);
wccdir = nfs3_stat_to_wcc_data (preparent, postparent);
res->mknod3res_u.resok.obj_attributes = poa;
res->mknod3res_u.resok.dir_wcc = wccdir;
@@ -1400,7 +1330,7 @@ nfs3_fill_mknod3res (mknod3res *res, nfsstat3 stat, struct nfs3_fh *fh,
void
nfs3_fill_remove3res (remove3res *res, nfsstat3 stat, struct iatt *preparent,
- struct iatt *postparent, uint16_t xlid)
+ struct iatt *postparent, uint64_t deviceid)
{
wcc_data dirwcc;
@@ -1409,8 +1339,8 @@ nfs3_fill_remove3res (remove3res *res, nfsstat3 stat, struct iatt *preparent,
if (stat != NFS3_OK)
return;
- nfs3_map_xlid_to_statdev (preparent, xlid);
- nfs3_map_xlid_to_statdev (postparent, xlid);
+ nfs3_map_deviceid_to_statdev (preparent, deviceid);
+ nfs3_map_deviceid_to_statdev (postparent, deviceid);
dirwcc = nfs3_stat_to_wcc_data (preparent, postparent);
res->remove3res_u.resok.dir_wcc = dirwcc;
}
@@ -1436,7 +1366,7 @@ nfs3_prep_rmdir3args (rmdir3args *args, struct nfs3_fh *fh, char *name)
void
nfs3_fill_rmdir3res (rmdir3res *res, nfsstat3 stat, struct iatt *preparent,
- struct iatt *postparent, uint16_t xlid)
+ struct iatt *postparent, uint64_t deviceid)
{
wcc_data dirwcc;
memset (res, 0, sizeof (*res));
@@ -1445,8 +1375,8 @@ nfs3_fill_rmdir3res (rmdir3res *res, nfsstat3 stat, struct iatt *preparent,
if (stat != NFS3_OK)
return;
- nfs3_map_xlid_to_statdev (postparent, xlid);
- nfs3_map_xlid_to_statdev (preparent, xlid);
+ nfs3_map_deviceid_to_statdev (postparent, deviceid);
+ nfs3_map_deviceid_to_statdev (preparent, deviceid);
dirwcc = nfs3_stat_to_wcc_data (preparent, postparent);
res->rmdir3res_u.resok.dir_wcc = dirwcc;
}
@@ -1466,7 +1396,7 @@ nfs3_prep_link3args (link3args *args, struct nfs3_fh *target,
void
nfs3_fill_link3res (link3res *res, nfsstat3 stat, struct iatt *buf,
struct iatt *preparent, struct iatt *postparent,
- uint16_t xlid)
+ uint64_t deviceid)
{
post_op_attr poa;
wcc_data dirwcc;
@@ -1476,9 +1406,9 @@ nfs3_fill_link3res (link3res *res, nfsstat3 stat, struct iatt *buf,
if (stat != NFS3_OK)
return;
- nfs3_map_xlid_to_statdev (preparent, xlid);
- nfs3_map_xlid_to_statdev (postparent, xlid);
- nfs3_map_xlid_to_statdev (buf, xlid);
+ nfs3_map_deviceid_to_statdev (preparent, deviceid);
+ nfs3_map_deviceid_to_statdev (postparent, deviceid);
+ nfs3_map_deviceid_to_statdev (buf,deviceid);
poa = nfs3_stat_to_post_op_attr (buf);
dirwcc = nfs3_stat_to_wcc_data (preparent, postparent);
res->link3res_u.resok.file_attributes = poa;
@@ -1505,7 +1435,7 @@ void
nfs3_fill_rename3res (rename3res *res, nfsstat3 stat, struct iatt *buf,
struct iatt *preoldparent, struct iatt *postoldparent,
struct iatt *prenewparent, struct iatt *postnewparent,
- uint16_t xlid)
+ uint64_t deviceid)
{
wcc_data dirwcc;
@@ -1515,11 +1445,11 @@ nfs3_fill_rename3res (rename3res *res, nfsstat3 stat, struct iatt *buf,
if (stat != NFS3_OK)
return;
- nfs3_map_xlid_to_statdev (preoldparent, xlid);
- nfs3_map_xlid_to_statdev (postoldparent, xlid);
- nfs3_map_xlid_to_statdev (prenewparent, xlid);
- nfs3_map_xlid_to_statdev (postnewparent, xlid);
- nfs3_map_xlid_to_statdev (buf, xlid);
+ nfs3_map_deviceid_to_statdev (preoldparent, deviceid);
+ nfs3_map_deviceid_to_statdev (postoldparent, deviceid);
+ nfs3_map_deviceid_to_statdev (prenewparent, deviceid);
+ nfs3_map_deviceid_to_statdev (postnewparent, deviceid);
+ nfs3_map_deviceid_to_statdev (buf, deviceid);
dirwcc = nfs3_stat_to_wcc_data (preoldparent, postoldparent);
res->rename3res_u.resok.fromdir_wcc = dirwcc;
dirwcc = nfs3_stat_to_wcc_data (prenewparent, postnewparent);
@@ -1538,7 +1468,7 @@ nfs3_prep_write3args (write3args *args, struct nfs3_fh *fh)
void
nfs3_fill_write3res (write3res *res, nfsstat3 stat, count3 count,
stable_how stable, uint64_t wverf, struct iatt *prestat,
- struct iatt *poststat, uint16_t xlid)
+ struct iatt *poststat, uint64_t deviceid)
{
write3resok resok;
memset (res, 0, sizeof (*res));
@@ -1546,8 +1476,8 @@ nfs3_fill_write3res (write3res *res, nfsstat3 stat, count3 count,
if (stat != NFS3_OK)
return;
- nfs3_map_xlid_to_statdev (prestat, xlid);
- nfs3_map_xlid_to_statdev (poststat, xlid);
+ nfs3_map_deviceid_to_statdev (prestat, deviceid);
+ nfs3_map_deviceid_to_statdev (poststat, deviceid);
resok.file_wcc = nfs3_stat_to_wcc_data (prestat, poststat);
resok.count = count;
resok.committed = stable;
@@ -1567,15 +1497,16 @@ nfs3_prep_commit3args (commit3args *args, struct nfs3_fh *fh)
void
nfs3_fill_commit3res (commit3res *res, nfsstat3 stat, uint64_t wverf,
- struct iatt *prestat, struct iatt *poststat,uint16_t xlid)
+ struct iatt *prestat, struct iatt *poststat,
+ uint64_t deviceid)
{
memset (res, 0, sizeof (*res));
res->status = stat;
if (stat != NFS3_OK)
return;
- nfs3_map_xlid_to_statdev (poststat, xlid);
- nfs3_map_xlid_to_statdev (prestat, xlid);
+ nfs3_map_deviceid_to_statdev (poststat, deviceid);
+ nfs3_map_deviceid_to_statdev (prestat, deviceid);
res->commit3res_u.resok.file_wcc = nfs3_stat_to_wcc_data (prestat,
poststat);
memcpy (res->commit3res_u.resok.verf, &wverf, sizeof (wverf));
@@ -1583,7 +1514,7 @@ nfs3_fill_commit3res (commit3res *res, nfsstat3 stat, uint64_t wverf,
void
nfs3_fill_read3res (read3res *res, nfsstat3 stat, count3 count,
- struct iatt *poststat, int is_eof, uint16_t xlid)
+ struct iatt *poststat, int is_eof, uint64_t deviceid)
{
post_op_attr poa;
@@ -1592,7 +1523,7 @@ nfs3_fill_read3res (read3res *res, nfsstat3 stat, count3 count,
if (stat != NFS3_OK)
return;
- nfs3_map_xlid_to_statdev (poststat, xlid);
+ nfs3_map_deviceid_to_statdev (poststat, deviceid);
poa = nfs3_stat_to_post_op_attr (poststat);
res->read3res_u.resok.file_attributes = poa;
res->read3res_u.resok.count = count;
@@ -1611,7 +1542,7 @@ nfs3_prep_read3args (read3args *args, struct nfs3_fh *fh)
void
nfs3_fill_pathconf3res (pathconf3res *res, nfsstat3 stat, struct iatt *buf,
- uint16_t xlid)
+ uint64_t deviceid)
{
pathconf3resok resok;
@@ -1620,7 +1551,7 @@ nfs3_fill_pathconf3res (pathconf3res *res, nfsstat3 stat, struct iatt *buf,
if (stat != NFS3_OK)
return;
- nfs3_map_xlid_to_statdev (buf, xlid);
+ nfs3_map_deviceid_to_statdev (buf, deviceid);
resok.obj_attributes = nfs3_stat_to_post_op_attr (buf);
resok.linkmax = 256;
resok.name_max = NFS_NAME_MAX;
@@ -1656,11 +1587,11 @@ nfs3_verify_dircookie (struct nfs3_state *nfs3, fd_t *dirfd, cookie3 cookie,
if (cookie == 0)
return 0;
- gf_log (GF_NFS3, GF_LOG_TRACE, "Verifying cookie: cverf: %"PRIu64
- ", cookie: %"PRIu64, cverf, cookie);
+ gf_msg_trace (GF_NFS3, 0, "Verifying cookie: cverf: %"PRIu64
+ ", cookie: %"PRIu64, cverf, cookie);
/* The cookie bad, no way cverf will be zero with a non-zero cookie. */
if ((cverf == 0) && (cookie != 0)) {
- gf_log (GF_NFS3, GF_LOG_TRACE, "Bad cookie requested");
+ gf_msg_trace (GF_NFS3, 0, "Bad cookie requested");
if (stat)
*stat = NFS3ERR_BAD_COOKIE;
goto err;
@@ -1682,7 +1613,7 @@ nfs3_verify_dircookie (struct nfs3_state *nfs3, fd_t *dirfd, cookie3 cookie,
goto err;
}
*/
- gf_log (GF_NFS3, GF_LOG_TRACE, "Cookie verified");
+ gf_msg_trace (GF_NFS3, 0, "Cookie verified");
if (stat)
*stat = NFS3_OK;
ret = 0;
@@ -1691,663 +1622,1962 @@ err:
}
-/* When remove a file, we need to unref the cached fd for an inode but this
- * needs to happen only when the file was in fact opened. However, it is
- * possible that fd_lookup on a file returns an fd because the file was in
- * process of being created(which also returns an fd) but since this fd was not
- * opened through this path, in the NFS3 remove path, we'll end up removing the
- * reference that belongs to someone else. That means, nfs3 remove path should
- * not unref unless it is sure that the file was cached open also. If it was,
- * only then perform the fd_unref, else not.
- *
- * We determine that using a flag in the inode context.
- */
-int
-nfs3_set_inode_opened (xlator_t *nfsxl, inode_t *inode)
+void
+nfs3_stat_to_errstr (uint32_t xid, char *op, nfsstat3 stat, int pstat,
+ char *errstr, size_t len)
{
- if ((!nfsxl) || (!inode))
- return -1;
+ if ((!op) || (!errstr))
+ return;
- inode_ctx_put (inode, nfsxl, GF_NFS3_FD_CACHED);
+ snprintf (errstr, len, "XID: %x, %s: NFS: %d(%s), POSIX: %d(%s)",
+ xid, op,stat, nfsstat3_strerror (stat), pstat,
+ strerror (pstat));
+}
- return 0;
+void
+nfs3_log_common_call (uint32_t xid, char *op, struct nfs3_fh *fh)
+{
+ char fhstr[1024];
+
+ if (THIS->ctx->log.loglevel < GF_LOG_DEBUG)
+ return;
+
+ nfs3_fh_to_str (fh, fhstr, sizeof (fhstr));
+ gf_msg_debug (GF_NFS3, 0, "XID: %x, %s: args: %s", xid, op, fhstr);
}
-/* Returns 1 if inode was cached open, otherwise 0 */
-int
-nfs3_cached_inode_opened (xlator_t *nfsxl, inode_t *inode)
+void
+nfs3_log_fh_entry_call (uint32_t xid, char *op, struct nfs3_fh *fh,
+ char *name)
{
- int ret = -1;
- uint64_t cflag = 0;
+ char fhstr[1024];
- if ((!nfsxl) || (!inode))
- return -1;
+ if (THIS->ctx->log.loglevel < GF_LOG_DEBUG)
+ return;
+ nfs3_fh_to_str (fh, fhstr, sizeof (fhstr));
+ gf_msg_debug (GF_NFS3, 0, "XID: %x, %s: args: %s, name: %s", xid,
+ op, fhstr, name);
+}
- ret = inode_ctx_get (inode, nfsxl, &cflag);
- if (ret == -1)
- ret = 0;
- else if (cflag == GF_NFS3_FD_CACHED)
- ret = 1;
- return ret;
+void
+nfs3_log_rename_call (uint32_t xid, struct nfs3_fh *src, char *sname,
+ struct nfs3_fh *dst, char *dname)
+{
+ char sfhstr[1024];
+ char dfhstr[1024];
+
+ if (THIS->ctx->log.loglevel < GF_LOG_DEBUG)
+ return;
+ nfs3_fh_to_str (src, sfhstr, sizeof (sfhstr));
+ nfs3_fh_to_str (dst, dfhstr, sizeof (dfhstr));
+ gf_msg_debug (GF_NFS3, 0, "XID: %x, RENAME: args: Src: %s, "
+ "name: %s, Dst: %s, name: %s", xid, sfhstr, sname,
+ dfhstr, dname);
}
-int32_t
-nfs3_dir_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+
+void
+nfs3_log_create_call (uint32_t xid, struct nfs3_fh *fh, char *name,
+ createmode3 mode)
{
- nfs3_call_state_t *cs = NULL;
+ char fhstr[1024];
+ char *modestr = NULL;
+ char exclmode[] = "EXCLUSIVE";
+ char unchkd[] = "UNCHECKED";
+ char guarded[] = "GUARDED";
- cs = frame->local;
- if (op_ret == -1) {
- cs->resolve_ret = -1;
- cs->resolve_errno = op_errno;
- nfs3_call_resume (cs);
- goto err;
- }
+ if (THIS->ctx->log.loglevel < GF_LOG_DEBUG)
+ return;
+ nfs3_fh_to_str (fh, fhstr, sizeof (fhstr));
+ if (mode == EXCLUSIVE)
+ modestr = exclmode;
+ else if (mode == GUARDED)
+ modestr = guarded;
+ else
+ modestr = unchkd;
- cs->fd = fd_ref (fd);
- nfs3_set_inode_opened (cs->nfsx, cs->resolvedloc.inode);
- gf_log (GF_NFS3, GF_LOG_TRACE, "FD_REF: %d", fd->refcount);
- nfs3_call_resume (cs);
-err:
- return 0;
+ gf_msg_debug (GF_NFS3, 0, "XID: %x, CREATE: args: %s, name: %s,"
+ " mode: %s", xid, fhstr, name, modestr);
}
-int
-__nfs3_dir_open_and_resume (nfs3_call_state_t *cs)
+void
+nfs3_log_mknod_call (uint32_t xid, struct nfs3_fh *fh, char *name, int type)
{
- nfs_user_t nfu = {0, };
- int ret = -EFAULT;
+ char fhstr[1024];
+ char *modestr = NULL;
+ char chr[] = "CHAR";
+ char blk[] = "BLK";
+ char sock[] = "SOCK";
+ char fifo[] = "FIFO";
- if (!cs)
- return ret;
+ if (THIS->ctx->log.loglevel < GF_LOG_DEBUG)
+ return;
+ nfs3_fh_to_str (fh, fhstr, sizeof (fhstr));
+ if (type == NF3CHR)
+ modestr = chr;
+ else if (type == NF3BLK)
+ modestr = blk;
+ else if (type == NF3SOCK)
+ modestr = sock;
+ else
+ modestr = fifo;
- nfs_user_root_create (&nfu);
- ret = nfs_opendir (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
- nfs3_dir_open_cbk, cs);
- return ret;
+ gf_msg_debug (GF_NFS3, 0, "XID: %x, MKNOD: args: %s, name: %s,"
+ " type: %s", xid, fhstr, name, modestr);
}
-int
-nfs3_dir_open_and_resume (nfs3_call_state_t *cs, nfs3_resume_fn_t resume)
+
+void
+nfs3_log_symlink_call (uint32_t xid, struct nfs3_fh *fh, char *name, char *tgt)
{
- fd_t *fd = NULL;
- int ret = -EFAULT;
+ char fhstr[1024];
- if ((!cs))
- return ret;
+ if (THIS->ctx->log.loglevel < GF_LOG_DEBUG)
+ return;
+ nfs3_fh_to_str (fh, fhstr, sizeof (fhstr));
+ gf_msg_debug (GF_NFS3, 0, "XID: %x, SYMLINK: args: %s, name: %s,"
+ " target: %s", xid, fhstr, name, tgt);
+}
- cs->resume_fn = resume;
- gf_log (GF_NFS3, GF_LOG_TRACE, "Opening: %s", cs->resolvedloc.path);
- fd = fd_lookup (cs->resolvedloc.inode, 0);
- if (fd) {
- gf_log (GF_NFS3, GF_LOG_TRACE, "fd found in state: ref: %d", fd->refcount);
- cs->fd = fd_ref (fd); /* Gets unrefd when the call state is wiped. */
- cs->resolve_ret = 0;
- nfs3_call_resume (cs);
- ret = 0;
- goto err;
- }
- ret = __nfs3_dir_open_and_resume (cs);
+void
+nfs3_log_link_call (uint32_t xid, struct nfs3_fh *fh, char *name,
+ struct nfs3_fh *tgt)
+{
+ char dfhstr[1024];
+ char tfhstr[1024];
-err:
- return ret;
+ if (THIS->ctx->log.loglevel < GF_LOG_DEBUG)
+ return;
+ nfs3_fh_to_str (fh, dfhstr, sizeof (dfhstr));
+ nfs3_fh_to_str (tgt, tfhstr, sizeof (tfhstr));
+ gf_msg_debug (GF_NFS3, 0, "XID: %x, LINK: args: %s, name: %s,"
+ " target: %s", xid, dfhstr, name, tfhstr);
}
-int
-nfs3_flush_open_wait_call_states (nfs3_call_state_t *cs, fd_t *openedfd)
+void
+nfs3_log_rw_call (uint32_t xid, char *op, struct nfs3_fh *fh, offset3 offt,
+ count3 count, int stablewrite)
{
- struct list_head *inode_q = NULL;
- uint64_t ctxaddr = 0;
- int ret = 0;
- nfs3_call_state_t *cstmp = NULL;
+ char fhstr[1024];
- if (!cs)
- return -1;
+ if (THIS->ctx->log.loglevel < GF_LOG_DEBUG)
+ return;
+ nfs3_fh_to_str (fh, fhstr, sizeof (fhstr));
+ if (stablewrite == -1)
+ gf_msg_debug (GF_NFS3, 0, "XID: %x, %s: args: %s, offset:"
+ " %"PRIu64", count: %"PRIu32, xid, op, fhstr,
+ offt, count);
+ else
+ gf_msg_debug (GF_NFS3, 0, "XID: %x, %s: args: %s, offset:"
+ " %"PRIu64", count: %"PRIu32", %s", xid, op,
+ fhstr, offt, count,
+ (stablewrite == UNSTABLE)?"UNSTABLE":"STABLE");
- gf_log (GF_NFS3, GF_LOG_TRACE, "Flushing call state");
- ret = inode_ctx_get (cs->resolvedloc.inode, cs->nfsx, &ctxaddr);
- if (ret == -1) {
- gf_log (GF_NFS3, GF_LOG_TRACE, "No inode queue present");
- goto out;
- }
+}
- inode_q = (struct list_head *)(long)ctxaddr;
- if (!inode_q)
- goto out;
- list_for_each_entry_safe (cs, cstmp, inode_q, openwait_q) {
- gf_log (GF_NFS3, GF_LOG_TRACE, "Calling resume");
- cs->resolve_ret = 0;
- if (openedfd) {
- gf_log (GF_NFS3, GF_LOG_TRACE, "Opening uncached fd done: %d",
- openedfd->refcount);
- cs->fd = fd_ref (openedfd);
- }
- nfs3_call_resume (cs);
- }
+int
+nfs3_getattr_loglevel (nfsstat3 stat) {
-out:
- return 0;
-}
+ int ll = GF_LOG_DEBUG;
+ switch (stat) {
+ case NFS3ERR_PERM:
+ ll = GF_LOG_WARNING;
+ break;
-int
-__nfs3_fdcache_update_entry (struct nfs3_state *nfs3, fd_t *fd)
-{
- uint64_t ctxaddr = 0;
- struct nfs3_fd_entry *fde = NULL;
+ case NFS3ERR_NOENT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ACCES:
+ ll = GF_LOG_WARNING;
+ break;
- gf_log (GF_NFS3, GF_LOG_TRACE, "Updating fd: 0x%lx", (long int)fd);
- fd_ctx_get (fd, nfs3->nfsx, &ctxaddr);
- fde = (struct nfs3_fd_entry *)(long)ctxaddr;
- list_del (&fde->list);
- list_add_tail (&fde->list, &nfs3->fdlru);
+ case NFS3ERR_EXIST:
+ ll = GF_LOG_WARNING;
+ break;
- return 0;
+ case NFS3ERR_XDEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NODEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_IO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NXIO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ISDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_INVAL:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOSPC:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ROFS:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_FBIG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_MLINK:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NAMETOOLONG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTEMPTY:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_SERVERFAULT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTSUPP:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_BADHANDLE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_STALE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_DQUOT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ default:
+ ll = GF_LOG_DEBUG;
+ break;
+ }
+
+ return ll;
}
int
-nfs3_fdcache_update (struct nfs3_state *nfs3, fd_t *fd)
-{
- if ((!nfs3) || (!fd))
- return -1;
+nfs3_setattr_loglevel (nfsstat3 stat) {
+
+ int ll = GF_LOG_DEBUG;
+
+ switch (stat) {
- LOCK (&nfs3->fdlrulock);
- {
- __nfs3_fdcache_update_entry (nfs3, fd);
+ case NFS3ERR_NOENT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_EXIST:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_XDEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NODEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_IO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NXIO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ISDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_INVAL:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOSPC:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ROFS:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_FBIG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_MLINK:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NAMETOOLONG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTEMPTY:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_SERVERFAULT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTSUPP:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_BADHANDLE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_STALE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_DQUOT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ default:
+ ll = GF_LOG_DEBUG;
+ break;
}
- UNLOCK (&nfs3->fdlrulock);
- return 0;
+ return ll;
}
int
-__nfs3_fdcache_remove_entry (struct nfs3_state *nfs3, struct nfs3_fd_entry *fde)
-{
- gf_log (GF_NFS3, GF_LOG_TRACE, "Removing fd: 0x%lx: %d",
- (long int)fde->cachedfd, fde->cachedfd->refcount);
- list_del (&fde->list);
- fd_ctx_del (fde->cachedfd, nfs3->nfsx, NULL);
- fd_unref (fde->cachedfd);
- GF_FREE (fde);
- --nfs3->fdcount;
+nfs3_lookup_loglevel (nfsstat3 stat) {
- return 0;
+ int ll = GF_LOG_DEBUG;
+
+ switch (stat) {
+
+ case NFS3ERR_PERM:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ACCES:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_EXIST:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_XDEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NODEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_IO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NXIO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ISDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_INVAL:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOSPC:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ROFS:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_FBIG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_MLINK:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NAMETOOLONG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTEMPTY:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_SERVERFAULT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTSUPP:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_BADHANDLE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_STALE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_DQUOT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ default:
+ ll = GF_LOG_DEBUG;
+ break;
+ }
+
+ return ll;
}
int
-nfs3_fdcache_remove (struct nfs3_state *nfs3, fd_t *fd)
-{
- struct nfs3_fd_entry *fde = NULL;
- uint64_t ctxaddr = 0;
+nfs3_access_loglevel (nfsstat3 stat) {
- if ((!nfs3) || (!fd))
- return -1;
+ int ll = GF_LOG_DEBUG;
+
+ switch (stat) {
+
+ case NFS3ERR_NOENT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_EXIST:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_XDEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NODEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_IO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NXIO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ISDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_INVAL:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOSPC:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ROFS:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_FBIG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_MLINK:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NAMETOOLONG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTEMPTY:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_SERVERFAULT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTSUPP:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_BADHANDLE:
+ ll = GF_LOG_WARNING;
+ break;
- LOCK (&nfs3->fdlrulock);
- {
- fd_ctx_get (fd, nfs3->nfsx, &ctxaddr);
- fde = (struct nfs3_fd_entry *)(long)ctxaddr;
- __nfs3_fdcache_remove_entry (nfs3, fde);
+ case NFS3ERR_STALE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_DQUOT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ default:
+ ll = GF_LOG_DEBUG;
+ break;
}
- UNLOCK (&nfs3->fdlrulock);
- return 0;
+ return ll;
}
int
-__nfs3_fdcache_replace (struct nfs3_state *nfs3)
-{
- struct nfs3_fd_entry *fde = NULL;
- struct nfs3_fd_entry *tmp = NULL;
+nfs3_readlink_loglevel (nfsstat3 stat) {
- if (!nfs3)
- return -1;
+ int ll = GF_LOG_DEBUG;
- if (nfs3->fdcount <= GF_NFS3_FDCACHE_SIZE)
- return 0;
+ switch (stat) {
- list_for_each_entry_safe (fde, tmp, &nfs3->fdlru, list)
+ case NFS3ERR_EXIST:
+ ll = GF_LOG_WARNING;
break;
- __nfs3_fdcache_remove_entry (nfs3, fde);
+ case NFS3ERR_XDEV:
+ ll = GF_LOG_WARNING;
+ break;
- return 0;
-}
+ case NFS3ERR_NODEV:
+ ll = GF_LOG_WARNING;
+ break;
+ case NFS3ERR_IO:
+ ll = GF_LOG_WARNING;
+ break;
+ case NFS3ERR_NXIO:
+ ll = GF_LOG_WARNING;
+ break;
-int
-nfs3_fdcache_add (struct nfs3_state *nfs3, fd_t *fd)
-{
- struct nfs3_fd_entry *fde = NULL;
- int ret = -1;
+ case NFS3ERR_NOTDIR:
+ ll = GF_LOG_WARNING;
+ break;
- if ((!nfs3) || (!fd))
- return -1;
+ case NFS3ERR_ISDIR:
+ ll = GF_LOG_WARNING;
+ break;
- fde = GF_CALLOC (1, sizeof (*fd), gf_nfs_mt_nfs3_fd_entry);
- if (!fde) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "fd entry allocation failed");
- goto out;
- }
+ case NFS3ERR_INVAL:
+ ll = GF_LOG_WARNING;
+ break;
- /* Already refd by caller. */
- fde->cachedfd = fd;
- INIT_LIST_HEAD (&fde->list);
-
- LOCK (&nfs3->fdlrulock);
- {
- gf_log (GF_NFS3, GF_LOG_TRACE, "Adding fd: 0x%lx",
- (long int) fd);
- fd_ctx_set (fd, nfs3->nfsx, (uintptr_t)fde);
- fd_bind (fd);
- list_add_tail (&fde->list, &nfs3->fdlru);
- ++nfs3->fdcount;
- __nfs3_fdcache_replace (nfs3);
+ case NFS3ERR_NOSPC:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ROFS:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_FBIG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_MLINK:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTEMPTY:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_SERVERFAULT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTSUPP:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_BADHANDLE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_STALE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_DQUOT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ default:
+ ll = GF_LOG_DEBUG;
+ break;
}
- UNLOCK (&nfs3->fdlrulock);
-out:
- return ret;
+ return ll;
}
+int
+nfs3_read_loglevel (nfsstat3 stat) {
-int32_t
-nfs3_file_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
-{
- nfs3_call_state_t *cs = NULL;
- struct nfs3_state *nfs3 = NULL;
+ int ll = GF_LOG_DEBUG;
- cs = frame->local;
- if (op_ret == -1) {
- gf_log (GF_NFS3, GF_LOG_TRACE, "Opening uncached fd failed");
- cs->resolve_ret = -1;
- cs->resolve_errno = op_errno;
- fd = NULL;
- } else {
- gf_log (GF_NFS3, GF_LOG_TRACE, "Opening uncached fd done: %d",
- fd->refcount);
+ switch (stat) {
+
+ case NFS3ERR_NOENT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_EXIST:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_XDEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NODEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_IO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NXIO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ISDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_INVAL:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOSPC:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ROFS:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_FBIG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_MLINK:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NAMETOOLONG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTEMPTY:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_SERVERFAULT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTSUPP:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_BADHANDLE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_STALE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_DQUOT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ default:
+ ll = GF_LOG_DEBUG;
+ break;
}
- nfs3 = rpcsvc_request_program_private (cs->req);
- nfs3_flush_open_wait_call_states (cs, fd);
- nfs3_fdcache_add (nfs3, fd);
- return 0;
+ return ll;
}
-/* Returns 1 if the current call is the first one to be queued. If so, the
- * caller will need to send the open fop. If this is a non-first call to be
- * queued, it means the fd opening is in progress.
- *
- * Returns 0, if this is a non-first call.
- */
+
int
-__nfs3_queue_call_state (nfs3_call_state_t *cs)
-{
- struct list_head *inode_q = NULL;
- int ret = -1;
- uint64_t ctxaddr = 0;
+nfs3_write_loglevel (nfsstat3 stat) {
- ret = __inode_ctx_get (cs->resolvedloc.inode, cs->nfsx, &ctxaddr);
- if (ret == 0) {
- inode_q = (struct list_head *)(long)ctxaddr;
- goto attach_cs;
- }
+ int ll = GF_LOG_DEBUG;
- inode_q = GF_CALLOC (1, sizeof (*inode_q), gf_nfs_mt_list_head);
- if (!inode_q)
- goto err;
+ switch (stat) {
- gf_log (GF_NFS3, GF_LOG_TRACE, "Initing inode queue");
- INIT_LIST_HEAD (inode_q);
- __inode_ctx_put (cs->resolvedloc.inode, cs->nfsx, (uintptr_t)inode_q);
+ case NFS3ERR_NOENT:
+ ll = GF_LOG_WARNING;
+ break;
-attach_cs:
- if (list_empty (inode_q)) {
- gf_log (GF_NFS3, GF_LOG_TRACE, "First call in queue");
- ret = 1;
- } else
- ret = 0;
+ case NFS3ERR_EXIST:
+ ll = GF_LOG_WARNING;
+ break;
- gf_log (GF_NFS3, GF_LOG_TRACE, "Queueing call state");
- list_add_tail (&cs->openwait_q, inode_q);
+ case NFS3ERR_XDEV:
+ ll = GF_LOG_WARNING;
+ break;
-err:
- return ret;
+ case NFS3ERR_NODEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_IO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NXIO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ISDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_INVAL:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOSPC:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ROFS:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_FBIG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_MLINK:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NAMETOOLONG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTEMPTY:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_SERVERFAULT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTSUPP:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_BADHANDLE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_STALE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_DQUOT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ default:
+ ll = GF_LOG_DEBUG;
+ break;
+ }
+
+ return ll;
}
int
-nfs3_queue_call_state (nfs3_call_state_t *cs)
-{
- int ret = 0;
- if (!cs)
- return -1;
+nfs3_create_loglevel (nfsstat3 stat) {
+
+ int ll = GF_LOG_DEBUG;
+
+ switch (stat) {
- LOCK (&cs->resolvedloc.inode->lock);
- {
- ret = __nfs3_queue_call_state (cs);
+ case NFS3ERR_NOENT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_EXIST:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_XDEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NODEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_IO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NXIO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ISDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_INVAL:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_FBIG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_MLINK:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTEMPTY:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_SERVERFAULT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTSUPP:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_BADHANDLE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_STALE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ default:
+ ll = GF_LOG_DEBUG;
+ break;
}
- UNLOCK (&cs->resolvedloc.inode->lock);
- return ret;
+ return ll;
}
int
-__nfs3_file_open_and_resume (nfs3_call_state_t *cs)
-{
- nfs_user_t nfu = {0, };
- int ret = -EFAULT;
+nfs3_mkdir_loglevel (nfsstat3 stat) {
- if (!cs)
- return ret;
+ int ll = GF_LOG_DEBUG;
- ret = nfs3_queue_call_state (cs);
- if (ret != 1) {
- ret = 0;
- goto out;
+ switch (stat) {
+
+ case NFS3ERR_NOENT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_XDEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NODEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_IO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NXIO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ISDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_INVAL:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_FBIG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_MLINK:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTEMPTY:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_SERVERFAULT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTSUPP:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_BADHANDLE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_STALE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_DQUOT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ default:
+ ll = GF_LOG_DEBUG;
+ break;
}
- nfs_user_root_create (&nfu);
- gf_log (GF_NFS3, GF_LOG_TRACE, "Opening uncached fd");
- ret = nfs_open (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc, O_RDWR,
- nfs3_file_open_cbk, cs);
-out:
- return ret;
+ return ll;
}
-
int
-nfs3_file_open_and_resume (nfs3_call_state_t *cs, nfs3_resume_fn_t resume)
-{
- fd_t *fd = NULL;
- int ret = -EFAULT;
- struct nfs3_state *nfs3 = NULL;
+nfs3_symlink_loglevel (nfsstat3 stat) {
- if ((!cs))
- return ret;
+ int ll = GF_LOG_DEBUG;
- cs->resume_fn = resume;
- gf_log (GF_NFS3, GF_LOG_TRACE, "Opening: %s", cs->resolvedloc.path);
- fd = fd_lookup (cs->resolvedloc.inode, 0);
- if (fd) {
- nfs3 = rpcsvc_request_program_private (cs->req);
- /* Already refd by fd_lookup, so no need to ref again. */
- gf_log (GF_NFS3, GF_LOG_TRACE, "fd found in state: %d",
- fd->refcount);
- nfs3_fdcache_update (nfs3, fd);
- cs->fd = fd; /* Gets unrefd when the call state is wiped. */
- cs->resolve_ret = 0;
- nfs3_call_resume (cs);
- ret = 0;
- goto err;
- }
+ switch (stat) {
- ret = __nfs3_file_open_and_resume (cs);
+ case NFS3ERR_XDEV:
+ ll = GF_LOG_WARNING;
+ break;
-err:
- return ret;
-}
+ case NFS3ERR_NODEV:
+ ll = GF_LOG_WARNING;
+ break;
+ case NFS3ERR_IO:
+ ll = GF_LOG_WARNING;
+ break;
-void
-nfs3_stat_to_errstr (uint32_t xid, char *op, nfsstat3 stat, int pstat,
- char *errstr)
-{
- if ((!op) || (!errstr))
- return;
+ case NFS3ERR_NXIO:
+ ll = GF_LOG_WARNING;
+ break;
- sprintf (errstr, "XID: %x, %s: NFS: %d(%s), POSIX: %d(%s)", xid, op,
- stat, nfsstat3_strerror (stat), pstat, strerror (pstat));
-}
+ case NFS3ERR_NOTDIR:
+ ll = GF_LOG_WARNING;
+ break;
-void
-nfs3_log_common_call (uint32_t xid, char *op, struct nfs3_fh *fh)
-{
- char fhstr[1024];
+ case NFS3ERR_ISDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_INVAL:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_FBIG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_MLINK:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTEMPTY:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_SERVERFAULT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTSUPP:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_BADHANDLE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_STALE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_DQUOT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ default:
+ ll = GF_LOG_DEBUG;
+ break;
+ }
- nfs3_fh_to_str (fh, fhstr);
- gf_log (GF_NFS3, GF_LOG_DEBUG, "XID: %x, %s: args: %s", xid, op,
- fhstr);
+ return ll;
}
-void
-nfs3_log_fh_entry_call (uint32_t xid, char *op, struct nfs3_fh *fh,
- char *name)
-{
- char fhstr[1024];
+int
+nfs3_mknod_loglevel (nfsstat3 stat) {
+
+ int ll = GF_LOG_DEBUG;
+
+ switch (stat) {
+
+ case NFS3ERR_NOENT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_XDEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NODEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_IO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NXIO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ISDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_INVAL:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_FBIG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_MLINK:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTEMPTY:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_SERVERFAULT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTSUPP:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_BADHANDLE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_STALE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ default:
+ ll = GF_LOG_DEBUG;
+ break;
+ }
- nfs3_fh_to_str (fh, fhstr);
- gf_log (GF_NFS3, GF_LOG_DEBUG, "XID: %x, %s: args: %s, name: %s", xid,
- op, fhstr, name);
+ return ll;
}
+int
+nfs3_remove_loglevel (nfsstat3 stat) {
-void
-nfs3_log_rename_call (uint32_t xid, struct nfs3_fh *src, char *sname,
- struct nfs3_fh *dst, char *dname)
-{
- char sfhstr[1024];
- char dfhstr[1024];
+ int ll = GF_LOG_DEBUG;
+
+ switch (stat) {
+
+ case NFS3ERR_EXIST:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_XDEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NODEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_IO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NXIO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTDIR:
+ ll = GF_LOG_WARNING;
+ break;
- nfs3_fh_to_str (src, sfhstr);
- nfs3_fh_to_str (dst, dfhstr);
- gf_log (GF_NFS3, GF_LOG_DEBUG, "XID: %x, RENAME: args: Src: %s, "
- "name: %s, Dst: %s, name: %s", xid, sfhstr, sname, dfhstr,
- dname);
+ case NFS3ERR_INVAL:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOSPC:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_FBIG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_MLINK:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_SERVERFAULT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTSUPP:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_BADHANDLE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_STALE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_DQUOT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ default:
+ ll = GF_LOG_DEBUG;
+ break;
+ }
+
+ return ll;
}
+int
+nfs3_rmdir_loglevel (nfsstat3 stat) {
-void
-nfs3_log_create_call (uint32_t xid, struct nfs3_fh *fh, char *name,
- createmode3 mode)
-{
- char fhstr[1024];
- char *modestr = NULL;
- char exclmode[] = "EXCLUSIVE";
- char unchkd[] = "UNCHECKED";
- char guarded[] = "GUARDED";
+ int ll = GF_LOG_DEBUG;
- nfs3_fh_to_str (fh, fhstr);
- if (mode == EXCLUSIVE)
- modestr = exclmode;
- else if (mode == GUARDED)
- modestr = guarded;
- else
- modestr = unchkd;
+ switch (stat) {
+
+ case NFS3ERR_EXIST:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_XDEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NODEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_IO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NXIO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_INVAL:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOSPC:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_FBIG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_MLINK:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_SERVERFAULT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTSUPP:
+ ll = GF_LOG_WARNING;
+ break;
- gf_log (GF_NFS3, GF_LOG_DEBUG, "XID: %x, CREATE: args: %s, name: %s,"
- " mode: %s", xid, fhstr, name, modestr);
+ case NFS3ERR_BADHANDLE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_STALE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_DQUOT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ default:
+ ll = GF_LOG_DEBUG;
+ break;
+ }
+
+ return ll;
}
-void
-nfs3_log_mknod_call (uint32_t xid, struct nfs3_fh *fh, char *name, int type)
-{
- char fhstr[1024];
- char *modestr = NULL;
- char chr[] = "CHAR";
- char blk[] = "BLK";
- char sock[] = "SOCK";
- char fifo[] = "FIFO";
+int
+nfs3_rename_loglevel (nfsstat3 stat) {
- nfs3_fh_to_str (fh, fhstr);
- if (type == NF3CHR)
- modestr = chr;
- else if (type == NF3BLK)
- modestr = blk;
- else if (type == NF3SOCK)
- modestr = sock;
- else
- modestr = fifo;
+ int ll = GF_LOG_DEBUG;
+
+ switch (stat) {
- gf_log (GF_NFS3, GF_LOG_DEBUG, "XID: %x, MKNOD: args: %s, name: %s,"
- " type: %s", xid, fhstr, name, modestr);
+ case NFS3ERR_XDEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NODEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_IO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NXIO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ISDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_INVAL:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOSPC:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_FBIG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_MLINK:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTEMPTY:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_SERVERFAULT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTSUPP:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_BADHANDLE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_STALE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_DQUOT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ default:
+ ll = GF_LOG_DEBUG;
+ break;
+ }
+
+ return ll;
}
+int
+nfs3_link_loglevel (nfsstat3 stat) {
-void
-nfs3_log_symlink_call (uint32_t xid, struct nfs3_fh *fh, char *name, char *tgt)
-{
- char fhstr[1024];
+ int ll = GF_LOG_DEBUG;
+
+ switch (stat) {
+
+ case NFS3ERR_XDEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NODEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_IO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NXIO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_INVAL:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_FBIG:
+ ll = GF_LOG_WARNING;
+ break;
- nfs3_fh_to_str (fh, fhstr);
- gf_log (GF_NFS3, GF_LOG_DEBUG, "XID: %x, SYMLINK: args: %s, name: %s,"
- " target: %s", xid, fhstr, name, tgt);
+ case NFS3ERR_MLINK:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTEMPTY:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_SERVERFAULT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTSUPP:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_BADHANDLE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_STALE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_DQUOT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ default:
+ ll = GF_LOG_DEBUG;
+ break;
+ }
+
+ return ll;
}
-void
-nfs3_log_link_call (uint32_t xid, struct nfs3_fh *fh, char *name,
- struct nfs3_fh *tgt)
-{
- char dfhstr[1024];
- char tfhstr[1024];
+int
+nfs3_readdir_loglevel (nfsstat3 stat) {
+
+ int ll = GF_LOG_DEBUG;
+
+ switch (stat) {
- nfs3_fh_to_str (fh, dfhstr);
- nfs3_fh_to_str (tgt, tfhstr);
- gf_log (GF_NFS3, GF_LOG_DEBUG, "XID: %x, LINK: args: %s, name: %s,"
- " target: %s", xid, dfhstr, name, tfhstr);
+ case NFS3ERR_NOENT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_EXIST:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_XDEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NODEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_IO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NXIO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ISDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_INVAL:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOSPC:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ROFS:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_FBIG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_MLINK:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NAMETOOLONG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTEMPTY:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_SERVERFAULT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTSUPP:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_BADHANDLE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_STALE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_DQUOT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ default:
+ ll = GF_LOG_DEBUG;
+ break;
+ }
+
+ return ll;
}
-void
-nfs3_log_rw_call (uint32_t xid, char *op, struct nfs3_fh *fh, offset3 offt,
- count3 count, int stablewrite)
-{
- char fhstr[1024];
+int
+nfs3_fsstat_loglevel (nfsstat3 stat) {
- nfs3_fh_to_str (fh, fhstr);
- if (stablewrite == -1)
- gf_log (GF_NFS3, GF_LOG_DEBUG, "XID: %x, %s: args: %s, offset:"
- " %"PRIu64", count: %"PRIu32, xid, op, fhstr, offt,
- count);
- else
- gf_log (GF_NFS3, GF_LOG_DEBUG, "XID: %x, %s: args: %s, offset:"
- " %"PRIu64", count: %"PRIu32", %s", xid, op, fhstr,
- offt, count,
- (stablewrite == UNSTABLE)?"UNSTABLE":"STABLE");
+ int ll = GF_LOG_DEBUG;
+
+ switch (stat) {
+ case NFS3ERR_PERM:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOENT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ACCES:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_EXIST:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_XDEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NODEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_IO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NXIO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ISDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_INVAL:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOSPC:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ROFS:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_FBIG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_MLINK:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NAMETOOLONG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTEMPTY:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_SERVERFAULT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTSUPP:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_BADHANDLE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_STALE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_DQUOT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ default:
+ ll = GF_LOG_DEBUG;
+ break;
+ }
+
+ return ll;
}
+struct nfs3op_str {
+ int op;
+ char str[100];
+};
+
+struct nfs3op_str nfs3op_strings[] = {
+ { NFS3_NULL, "NULL"},
+ { NFS3_GETATTR, "GETATTR"},
+ { NFS3_SETATTR, "SETATTR"},
+ { NFS3_LOOKUP, "LOOKUP"},
+ { NFS3_ACCESS, "ACCESS"},
+ { NFS3_READLINK, "READLINK"},
+ { NFS3_READ, "READ"},
+ { NFS3_WRITE, "WRITE"},
+ { NFS3_CREATE, "CREATE"},
+ { NFS3_MKDIR, "MKDIR"},
+ { NFS3_SYMLINK, "SYMLINK"},
+ { NFS3_MKNOD, "MKNOD"},
+ { NFS3_REMOVE, "REMOVE"},
+ { NFS3_RMDIR, "RMDIR"},
+ { NFS3_RENAME, "RENAME"},
+ { NFS3_LINK, "LINK"},
+ { NFS3_READDIR, "READDIR"},
+ { NFS3_READDIRP, "READDIRP"},
+ { NFS3_FSSTAT, "FSSTAT"},
+ { NFS3_FSINFO, "FSINFO"},
+ { NFS3_PATHCONF, "PATHCONF"},
+ { NFS3_COMMIT, "COMMIT"},
+};
+
+int
+nfs3_loglevel (int nfs_op, nfsstat3 stat) {
+
+ int ll = GF_LOG_DEBUG;
+
+ switch (nfs_op) {
+ case NFS3_GETATTR:
+ ll = nfs3_getattr_loglevel (stat);
+ break;
+
+ case NFS3_SETATTR:
+ ll = nfs3_setattr_loglevel (stat);
+ break;
+
+ case NFS3_LOOKUP:
+ ll = nfs3_lookup_loglevel (stat);
+ break;
+
+ case NFS3_ACCESS:
+ ll = nfs3_access_loglevel (stat);
+ break;
+
+ case NFS3_READLINK:
+ ll = nfs3_readlink_loglevel (stat);
+ break;
+
+ case NFS3_READ:
+ ll = nfs3_read_loglevel (stat);
+ break;
+
+ case NFS3_WRITE:
+ ll = nfs3_write_loglevel (stat);
+ break;
+
+ case NFS3_CREATE:
+ ll = nfs3_create_loglevel (stat);
+ break;
+
+ case NFS3_MKDIR:
+ ll = nfs3_mkdir_loglevel (stat);
+ break;
+
+ case NFS3_SYMLINK:
+ ll = nfs3_symlink_loglevel (stat);
+ break;
+
+ case NFS3_MKNOD:
+ ll = nfs3_mknod_loglevel (stat);
+ break;
+
+ case NFS3_REMOVE:
+ ll = nfs3_remove_loglevel (stat);
+ break;
+
+ case NFS3_RMDIR:
+ ll = nfs3_rmdir_loglevel (stat);
+ break;
+
+ case NFS3_RENAME:
+ ll = nfs3_rename_loglevel (stat);
+ break;
+
+ case NFS3_LINK:
+ ll = nfs3_link_loglevel (stat);
+ break;
+
+ case NFS3_READDIR:
+ ll = nfs3_readdir_loglevel (stat);
+ break;
+
+ case NFS3_READDIRP:
+ ll = nfs3_readdir_loglevel (stat);
+ break;
+
+ case NFS3_FSSTAT:
+ ll = nfs3_fsstat_loglevel (stat);
+ break;
+
+ case NFS3_FSINFO:
+ ll = nfs3_fsstat_loglevel (stat);
+ break;
+
+ case NFS3_PATHCONF:
+ ll = nfs3_fsstat_loglevel (stat);
+ break;
+
+ case NFS3_COMMIT:
+ ll = nfs3_write_loglevel (stat);
+ break;
+
+ default:
+ ll = GF_LOG_DEBUG;
+ break;
+ }
+
+ return ll;
+}
void
-nfs3_log_common_res (uint32_t xid, char *op, nfsstat3 stat, int pstat)
+nfs3_log_common_res (uint32_t xid, int op, nfsstat3 stat, int pstat,
+ const char *path)
{
char errstr[1024];
+ int ll = nfs3_loglevel (op, stat);
+
+ if (THIS->ctx->log.loglevel < ll)
+ return;
+ nfs3_stat_to_errstr (xid, nfs3op_strings[op].str, stat, pstat, errstr, sizeof (errstr));
+ if (ll == GF_LOG_DEBUG)
+ gf_msg_debug (GF_NFS3, 0, "%s => (%s)", path,
+ errstr);
+ else
+ gf_msg (GF_NFS3, ll, errno, NFS_MSG_STAT_ERROR,
+ "%s => (%s)", path, errstr);
- nfs3_stat_to_errstr (xid, op, stat, pstat, errstr);
- gf_log (GF_NFS3, GF_LOG_DEBUG, "%s", errstr);
}
void
-nfs3_log_readlink_res (uint32_t xid, nfsstat3 stat, int pstat, char *linkpath)
+nfs3_log_readlink_res (uint32_t xid, nfsstat3 stat, int pstat, char *linkpath,
+ const char *path)
{
char errstr[1024];
+ int ll = nfs3_loglevel (NFS3_READLINK, stat);
- nfs3_stat_to_errstr (xid, "READLINK", stat, pstat, errstr);
- gf_log (GF_NFS3, GF_LOG_DEBUG, "%s, target: %s", errstr, linkpath);
+ if (THIS->ctx->log.loglevel < ll)
+ return;
+ nfs3_stat_to_errstr (xid, "READLINK", stat, pstat, errstr, sizeof (errstr));
+ if (ll == GF_LOG_DEBUG)
+ gf_msg_debug (GF_NFS3, 0, "%s => (%s), target: %s", path,
+ errstr, linkpath);
+ else
+ gf_msg (GF_NFS3, ll, errno, NFS_MSG_STAT_ERROR,
+ "%s => (%s) target: %s" , path,
+ errstr, linkpath);
}
void
nfs3_log_read_res (uint32_t xid, nfsstat3 stat, int pstat, count3 count,
- int is_eof, struct iovec *vec, int32_t veccount)
+ int is_eof, struct iovec *vec,
+ int32_t veccount, const char *path)
{
char errstr[1024];
+ int ll = GF_LOG_DEBUG;
- nfs3_stat_to_errstr (xid, "READ", stat, pstat, errstr);
+ ll = nfs3_loglevel (NFS3_READ, stat);
+ if (THIS->ctx->log.loglevel < ll)
+ return;
+ nfs3_stat_to_errstr (xid, "READ", stat, pstat, errstr, sizeof (errstr));
if (vec)
- gf_log (GF_NFS3, GF_LOG_DEBUG, "%s, count: %"PRIu32", is_eof:"
- " %d, vector: count: %d, len: %zd", errstr, count,
- is_eof, veccount, vec->iov_len);
+ if (ll == GF_LOG_DEBUG)
+ gf_msg_debug (GF_NFS3, 0,
+ "%s => (%s), count: %"PRIu32", is_eof:"
+ " %d, vector: count: %d, len: %zd", path,
+ errstr, count, is_eof, veccount,
+ vec->iov_len);
+ else
+ gf_msg (GF_NFS3, ll, errno, NFS_MSG_STAT_ERROR,
+ "%s => (%s), count: %"PRIu32", is_eof:"
+ " %d, vector: count: %d, len: %zd", path,
+ errstr, count, is_eof, veccount, vec->iov_len);
else
- gf_log (GF_NFS3, GF_LOG_DEBUG, "%s, count: %"PRIu32", is_eof:"
- " %d", errstr, count, is_eof);
-}
+ if (ll == GF_LOG_DEBUG)
+ gf_msg_debug (GF_NFS3, 0,
+ "%s => (%s), count: %"PRIu32", is_eof:"
+ " %d", path, errstr, count, is_eof);
+ else
+ gf_msg (GF_NFS3, ll, errno, NFS_MSG_STAT_ERROR,
+ "%s => (%s), count: %"PRIu32", is_eof:"
+ " %d", path, errstr, count, is_eof);
+}
void
nfs3_log_write_res (uint32_t xid, nfsstat3 stat, int pstat, count3 count,
- int stable, uint64_t wverf)
+ int stable, uint64_t wverf, const char *path)
{
char errstr[1024];
+ int ll = nfs3_loglevel (NFS3_WRITE, stat);
- nfs3_stat_to_errstr (xid, "WRITE", stat, pstat, errstr);
- gf_log (GF_NFS3, GF_LOG_DEBUG, "%s, count: %"PRIu32", %s,wverf: %"PRIu64
- , errstr, count, (stable == UNSTABLE)?"UNSTABLE":"STABLE",
- wverf);
-}
+ if (THIS->ctx->log.loglevel < ll)
+ return;
+ nfs3_stat_to_errstr (xid, "WRITE", stat, pstat, errstr, sizeof (errstr));
+ if (ll == GF_LOG_DEBUG)
+ gf_msg_debug (GF_NFS3, 0,
+ "%s => (%s), count: %"PRIu32", %s,wverf: "
+ "%"PRIu64, path, errstr, count,
+ (stable == UNSTABLE)?"UNSTABLE":"STABLE", wverf);
+ else
+ gf_msg (GF_NFS3, ll, errno, NFS_MSG_STAT_ERROR,
+ "%s => (%s), count: %"PRIu32", %s,wverf: %"PRIu64
+ , path, errstr, count,
+ (stable == UNSTABLE)?"UNSTABLE":"STABLE", wverf);
+}
void
-nfs3_log_newfh_res (uint32_t xid, char *op, nfsstat3 stat, int pstat,
- struct nfs3_fh *newfh)
+nfs3_log_newfh_res (uint32_t xid, int op, nfsstat3 stat, int pstat,
+ struct nfs3_fh *newfh, const char *path)
{
char errstr[1024];
char fhstr[1024];
+ int ll = nfs3_loglevel (op, stat);
- nfs3_stat_to_errstr (xid, op, stat, pstat, errstr);
- nfs3_fh_to_str (newfh, fhstr);
+ if (THIS->ctx->log.loglevel < ll)
+ return;
+ nfs3_stat_to_errstr (xid, nfs3op_strings[op].str, stat, pstat, errstr, sizeof (errstr));
+ nfs3_fh_to_str (newfh, fhstr, sizeof (fhstr));
- gf_log (GF_NFS3, GF_LOG_DEBUG, "%s, %s", errstr, fhstr);
+ if (ll == GF_LOG_DEBUG)
+ gf_msg_debug (GF_NFS3, 0, "%s => (%s), %s", path, errstr,
+ fhstr);
+ else
+ gf_msg (GF_NFS3, nfs3_loglevel (op, stat), errno, NFS_MSG_STAT_ERROR,
+ "%s => (%s), %s", path, errstr, fhstr);
}
-
void
nfs3_log_readdir_res (uint32_t xid, nfsstat3 stat, int pstat, uint64_t cverf,
- count3 count, int is_eof)
+ count3 count, int is_eof, const char *path)
{
char errstr[1024];
+ int ll = nfs3_loglevel (NFS3_READDIR, stat);
- nfs3_stat_to_errstr (xid, "READDIR", stat, pstat, errstr);
- gf_log (GF_NFS3, GF_LOG_DEBUG, "%s, count: %"PRIu32", cverf: %"PRIu64
- ", is_eof: %d", errstr, count, cverf, is_eof);
+ if (THIS->ctx->log.loglevel < ll)
+ return;
+ nfs3_stat_to_errstr (xid, "READDIR", stat, pstat, errstr, sizeof (errstr));
+ if (ll == GF_LOG_DEBUG)
+ gf_msg_debug (GF_NFS3, 0,
+ "%s => (%s), count: %"PRIu32", cverf: %"PRIu64
+ ", is_eof: %d", path, errstr, count, cverf,
+ is_eof);
+ else
+ gf_msg (GF_NFS3, ll, errno, NFS_MSG_STAT_ERROR,
+ "%s => (%s), count: %"PRIu32", cverf: %"PRIu64
+ ", is_eof: %d", path, errstr, count, cverf, is_eof);
}
-
void
nfs3_log_readdirp_res (uint32_t xid, nfsstat3 stat, int pstat, uint64_t cverf,
- count3 dircount, count3 maxcount, int is_eof)
+ count3 dircount, count3 maxcount, int is_eof,
+ const char *path)
{
char errstr[1024];
+ int ll = nfs3_loglevel (NFS3_READDIRP, stat);
- nfs3_stat_to_errstr (xid, "READDIRPLUS", stat, pstat, errstr);
- gf_log (GF_NFS3, GF_LOG_DEBUG, "%s, dircount: %"PRIu32", maxcount: %"
- PRIu32", cverf: %"PRIu64", is_eof: %d", errstr, dircount,
- maxcount, cverf, is_eof);
-}
+ if (THIS->ctx->log.loglevel < ll)
+ return;
+ nfs3_stat_to_errstr (xid, "READDIRPLUS", stat, pstat, errstr, sizeof (errstr));
+ if (ll == GF_LOG_DEBUG)
+ gf_msg_debug (GF_NFS3, 0,
+ "%s => (%s), dircount: %"PRIu32", maxcount: %"
+ PRIu32", cverf: %"PRIu64", is_eof: %d", path,
+ errstr, dircount, maxcount, cverf, is_eof);
+ else
+ gf_msg (GF_NFS3, ll, errno, NFS_MSG_STAT_ERROR,
+ "%s => (%s), dircount: %"PRIu32", maxcount: %"
+ PRIu32", cverf: %"PRIu64", is_eof: %d", path, errstr,
+ dircount, maxcount, cverf, is_eof);
+}
void
-nfs3_log_commit_res (uint32_t xid, nfsstat3 stat, int pstat, uint64_t wverf)
+nfs3_log_commit_res (uint32_t xid, nfsstat3 stat, int pstat, uint64_t wverf,
+ const char *path)
{
char errstr[1024];
+ int ll = nfs3_loglevel (NFS3_COMMIT, stat);
- nfs3_stat_to_errstr (xid, "COMMIT", stat, pstat, errstr);
- gf_log (GF_NFS3, GF_LOG_DEBUG, "%s, wverf: %"PRIu64, errstr, wverf);
-}
+ if (THIS->ctx->log.loglevel < ll)
+ return;
+ nfs3_stat_to_errstr (xid, "COMMIT", stat, pstat, errstr, sizeof (errstr));
+ if (ll == GF_LOG_DEBUG)
+ gf_msg_debug (GF_NFS3, 0, "%s => (%s), wverf: %"PRIu64,
+ path, errstr, wverf);
+ else
+ gf_msg (GF_NFS3, ll, errno, NFS_MSG_STAT_ERROR,
+ "%s => (%s), wverf: %"PRIu64, path, errstr, wverf);
+}
void
nfs3_log_readdir_call (uint32_t xid, struct nfs3_fh *fh, count3 dircount,
@@ -2355,18 +3585,20 @@ nfs3_log_readdir_call (uint32_t xid, struct nfs3_fh *fh, count3 dircount,
{
char fhstr[1024];
- nfs3_fh_to_str (fh, fhstr);
+ if (THIS->ctx->log.loglevel < GF_LOG_DEBUG)
+ return;
+
+ nfs3_fh_to_str (fh, fhstr, sizeof (fhstr));
if (maxcount == 0)
- gf_log (GF_NFS3, GF_LOG_DEBUG, "XID: %x, READDIR: args: %s,"
- " count: %d", xid, fhstr, (uint32_t)dircount);
+ gf_msg_debug (GF_NFS3, 0, "XID: %x, READDIR: args: %s,"
+ " count: %d", xid, fhstr, (uint32_t)dircount);
else
- gf_log (GF_NFS3, GF_LOG_DEBUG, "XID: %x, READDIRPLUS: args: %s,"
- " dircount: %d, maxcount: %d", xid, fhstr,
- (uint32_t)dircount, (uint32_t)maxcount);
+ gf_msg_debug (GF_NFS3, 0, "XID: %x, READDIRPLUS: args: %s,"
+ " dircount: %d, maxcount: %d", xid, fhstr,
+ (uint32_t)dircount, (uint32_t)maxcount);
}
-
int
nfs3_fh_resolve_inode_done (nfs3_call_state_t *cs, inode_t *inode)
{
@@ -2375,10 +3607,14 @@ nfs3_fh_resolve_inode_done (nfs3_call_state_t *cs, inode_t *inode)
if ((!cs) || (!inode))
return ret;
- gf_log (GF_NFS3, GF_LOG_TRACE, "FH inode resolved");
- ret = nfs_inode_loc_fill (inode, &cs->resolvedloc);
- if (ret < 0)
+ gf_msg_trace (GF_NFS3, 0, "FH inode resolved");
+ ret = nfs_inode_loc_fill (inode, &cs->resolvedloc, NFS_RESOLVE_EXIST);
+ if (ret < 0) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, -ret,
+ NFS_MSG_INODE_LOC_FILL_ERROR,
+ "inode loc fill failed");
goto err;
+ }
nfs3_call_resume (cs);
@@ -2386,55 +3622,6 @@ err:
return ret;
}
-#define GF_NFS3_FHRESOLVE_FOUND 1
-#define GF_NFS3_FHRESOLVE_NOTFOUND 2
-#define GF_NFS3_FHRESOLVE_DIRFOUND 3
-
-int
-nfs3_fh_resolve_check_entry (struct nfs3_fh *fh, gf_dirent_t *candidate,
- int hashidx)
-{
- struct iatt *ia = NULL;
- int ret = GF_NFS3_FHRESOLVE_NOTFOUND;
- nfs3_hash_entry_t entryhash = 0;
-
- if ((!fh) || (!candidate))
- return ret;
-
- if ((strcmp (candidate->d_name, ".") == 0) ||
- (strcmp (candidate->d_name, "..") == 0))
- goto found_entry;
-
- ia = &candidate->d_stat;
- if ((ia->ia_gen == fh->gen) && (ia->ia_ino == fh->ino)) {
- gf_log (GF_NFS3, GF_LOG_TRACE, "Found entry: gen: %"PRId64
- " ino: %"PRId64", name: %s", ia->ia_gen, ia->ia_ino,
- candidate->d_name);
- ret = GF_NFS3_FHRESOLVE_FOUND;
- goto found_entry;
- }
-
- /* This condition ensures that we never have to be afraid of having
- * a directory hash conflict with a file hash. The consequence of
- * this condition is that we can now have unlimited files in a directory
- * and upto 65536 sub-directories in a directory.
- */
- if (!IA_ISDIR (candidate->d_stat.ia_type))
- goto found_entry;
- entryhash = fh->entryhash[hashidx];
- if (entryhash == nfs3_fh_hash_entry (ia->ia_ino, ia->ia_gen)) {
- gf_log (GF_NFS3, GF_LOG_TRACE, "Found hash match: %s: %d",
- candidate->d_name, entryhash);
- ret = GF_NFS3_FHRESOLVE_DIRFOUND;
- goto found_entry;
- }
-
-found_entry:
-
- return ret;
-}
-
-
int32_t
nfs3_fh_resolve_entry_lookup_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
@@ -2443,347 +3630,279 @@ nfs3_fh_resolve_entry_lookup_cbk (call_frame_t *frame, void *cookie,
struct iatt *postparent)
{
nfs3_call_state_t *cs = NULL;
+ inode_t *linked_inode = NULL;
cs = frame->local;
cs->resolve_ret = op_ret;
cs->resolve_errno = op_errno;
if (op_ret == -1) {
- gf_log (GF_NFS3, GF_LOG_TRACE, "Lookup failed: %s: %s",
- cs->resolvedloc.path, strerror (op_errno));
+ if (op_errno == ENOENT) {
+ gf_msg_trace (GF_NFS3, 0, "Lookup failed: %s: %s",
+ cs->resolvedloc.path,
+ strerror (op_errno));
+ } else {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, op_errno,
+ NFS_MSG_LOOKUP_FAIL, "Lookup failed: %s: %s",
+ cs->resolvedloc.path, strerror (op_errno));
+ }
goto err;
} else
- gf_log (GF_NFS3, GF_LOG_TRACE, "Entry looked up: %s",
- cs->resolvedloc.path);
-
- inode_link (inode, cs->resolvedloc.parent, cs->resolvedloc.name, buf);
+ gf_msg_trace (GF_NFS3, 0, "Entry looked up: %s",
+ cs->resolvedloc.path);
+
+ memcpy (&cs->stbuf, buf, sizeof (*buf));
+ memcpy (&cs->postparent, postparent, sizeof (*postparent));
+ linked_inode = inode_link (inode, cs->resolvedloc.parent,
+ cs->resolvedloc.name, buf);
+ if (linked_inode) {
+ nfs_fix_generation (this, linked_inode);
+ inode_lookup (linked_inode);
+ inode_unref (cs->resolvedloc.inode);
+ cs->resolvedloc.inode = linked_inode;
+ }
err:
nfs3_call_resume (cs);
return 0;
}
-
-
int32_t
-nfs3_fh_resolve_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries);
-
-int
-nfs3_fh_resolve_found_entry (nfs3_call_state_t *cs, gf_dirent_t *candidate)
-{
- uint64_t dirino = 0;
- uint64_t dirgen = 0;
- int ret = 0;
- nfs_user_t nfu = {0, };
-
- if ((!cs) || (!candidate))
- return -EFAULT;
-
- dirino = cs->resolvedloc.inode->ino;
- dirgen = cs->resolvedloc.inode->generation;
-
- nfs_loc_wipe (&cs->resolvedloc);
- ret = nfs_entry_loc_fill (cs->vol->itable, dirino, dirgen,
- candidate->d_name, &cs->resolvedloc,
- NFS_RESOLVE_CREATE);
- if (ret == -ENOENT) {
- gf_log (GF_NFS3, GF_LOG_TRACE, "Entry not in itable, needs"
- " lookup");
- nfs_user_root_create (&nfu);
- ret = nfs_lookup (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
- nfs3_fh_resolve_entry_lookup_cbk,
- cs);
- } else {
- gf_log (GF_NFS3, GF_LOG_TRACE, "Entry got from itable");
- nfs3_call_resume (cs);
- }
-
- return ret;
-}
-
-
-int32_t
-nfs3_fh_resolve_parent_lookup_cbk (call_frame_t *frame, void *cookie,
+nfs3_fh_resolve_inode_lookup_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
int32_t op_errno, inode_t *inode,
struct iatt *buf, dict_t *xattr,
struct iatt *postparent)
{
nfs3_call_state_t *cs = NULL;
+ inode_t *linked_inode = NULL;
cs = frame->local;
cs->resolve_ret = op_ret;
cs->resolve_errno = op_errno;
if (op_ret == -1) {
- gf_log (GF_NFS3, GF_LOG_TRACE, "Lookup failed: %s: %s",
- cs->resolvedloc.path, strerror (op_errno));
+ if (op_errno == ENOENT) {
+ gf_msg_trace (GF_NFS3, 0, "Lookup failed: %s: %s",
+ cs->resolvedloc.path,
+ strerror (op_errno));
+ } else {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, op_errno,
+ NFS_MSG_LOOKUP_FAIL, "Lookup failed: %s: %s",
+ cs->resolvedloc.path, strerror (op_errno));
+ }
nfs3_call_resume (cs);
goto err;
- } else
- gf_log (GF_NFS3, GF_LOG_TRACE, "Entry looked up: %s",
- cs->resolvedloc.path);
+ }
- inode_link (inode, cs->resolvedloc.parent, cs->resolvedloc.name, buf);
- nfs3_fh_resolve_entry_hard (cs);
+ memcpy (&cs->stbuf, buf, sizeof(*buf));
+ memcpy (&cs->postparent, buf, sizeof(*postparent));
+ linked_inode = inode_link (inode, cs->resolvedloc.parent,
+ cs->resolvedloc.name, buf);
+ if (linked_inode) {
+ nfs_fix_generation (this, linked_inode);
+ inode_lookup (linked_inode);
+ inode_unref (cs->resolvedloc.inode);
+ cs->resolvedloc.inode = linked_inode;
+ }
+ /* If it is an entry lookup and we landed in the callback for hard
+ * inode resolution, it means the parent inode was not available and
+ * had to be resolved first. Now that is done, lets head back into
+ * entry resolution.
+ */
+ if (cs->resolventry)
+ nfs3_fh_resolve_entry_hard (cs);
+ else
+ nfs3_call_resume (cs);
err:
return 0;
}
+/* Needs no extra argument since it knows that the fh to be resolved is in
+ * resolvefh and that it needs to start looking from the root.
+ */
int
-nfs3_fh_resolve_found_parent (nfs3_call_state_t *cs, gf_dirent_t *candidate)
+nfs3_fh_resolve_inode_hard (nfs3_call_state_t *cs)
{
- uint64_t dirino = 0;
- uint64_t dirgen = 0;
- int ret = 0;
+ int ret = -EFAULT;
nfs_user_t nfu = {0, };
- if ((!cs) || (!candidate))
- return -EFAULT;
-
- dirino = cs->resolvedloc.inode->ino;
- dirgen = cs->resolvedloc.inode->generation;
+ if (!cs)
+ return ret;
+ gf_msg_trace (GF_NFS3, 0, "FH hard resolution for: gfid 0x%s",
+ uuid_utoa (cs->resolvefh.gfid));
+ cs->hardresolved = 1;
nfs_loc_wipe (&cs->resolvedloc);
- ret = nfs_entry_loc_fill (cs->vol->itable, dirino, dirgen,
- candidate->d_name, &cs->resolvedloc,
- NFS_RESOLVE_CREATE);
- if (ret == -ENOENT) {
- nfs_user_root_create (&nfu);
- ret = nfs_lookup (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
- nfs3_fh_resolve_parent_lookup_cbk,
- cs);
- } else
- nfs3_fh_resolve_entry_hard (cs);
+ ret = nfs_gfid_loc_fill (cs->vol->itable, cs->resolvefh.gfid,
+ &cs->resolvedloc, NFS_RESOLVE_CREATE);
+ if (ret < 0) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, -ret,
+ NFS_MSG_INODE_LOC_FILL_ERROR,
+ "Failed to fill loc using gfid: "
+ "%s", strerror (-ret));
+ goto out;
+ }
+
+ nfs_user_root_create (&nfu);
+ ret = nfs_lookup (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ nfs3_fh_resolve_inode_lookup_cbk, cs);
+out:
return ret;
}
int
-nfs3_fh_resolve_found (nfs3_call_state_t *cs, gf_dirent_t *candidate)
+nfs3_fh_resolve_entry_hard (nfs3_call_state_t *cs)
{
- int ret = 0;
-
- if ((!cs) || (!candidate))
- return -EFAULT;
-
- if (!cs->resolventry) {
- gf_log (GF_NFS3, GF_LOG_TRACE, "Candidate entry was found");
- ret = nfs3_fh_resolve_found_entry (cs, candidate);
- } else {
- gf_log (GF_NFS3, GF_LOG_TRACE, "Entry's parent was found");
- ret = nfs3_fh_resolve_found_parent (cs, candidate);
- }
-
- return ret;
-}
+ int ret = -EFAULT;
+ nfs_user_t nfu = {0, };
+ if (!cs)
+ return ret;
-int32_t
-nfs3_fh_resolve_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
-{
- nfs3_call_state_t *cs = NULL;
- int ret = -EFAULT;
- nfs_user_t nfu = {0, };
+ nfs_loc_wipe (&cs->resolvedloc);
+ nfs_user_root_create (&nfu);
+ gf_msg_trace (GF_NFS3, 0, "FH hard resolution: gfid: %s "
+ ", entry: %s", uuid_utoa (cs->resolvefh.gfid),
+ cs->resolventry);
- cs = frame->local;
- cs->resolve_ret = op_ret;
- cs->resolve_errno = op_errno;
+ ret = nfs_entry_loc_fill (cs->nfsx, cs->vol->itable, cs->resolvefh.gfid,
+ cs->resolventry, &cs->resolvedloc,
+ NFS_RESOLVE_CREATE);
- if (op_ret == -1) {
- gf_log (GF_NFS3, GF_LOG_TRACE, "Dir open failed: %s: %s",
- cs->resolvedloc.path, strerror (op_errno));
+ if (ret == -2) {
+ gf_msg_trace (GF_NFS3, 0, "Entry needs lookup: %s",
+ cs->resolvedloc.path);
+ /* If the NFS op is lookup, let the resume callback
+ * handle the sending of the lookup fop. Similarly,
+ * if the NFS op is create, let the create call
+ * go ahead in the resume callback so that an EEXIST gets
+ * handled at posix without an extra fop at this point.
+ */
+ if (nfs3_lookup_op (cs) ||
+ (nfs3_create_op (cs) && !nfs3_create_exclusive_op (cs))) {
+ cs->lookuptype = GF_NFS3_FRESH;
+ cs->resolve_ret = 0;
+ cs->hardresolved = 0;
+ nfs3_call_resume (cs);
+ } else {
+ cs->hardresolved = 1;
+ nfs_lookup (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ nfs3_fh_resolve_entry_lookup_cbk, cs);
+ }
+ ret = 0;
+ } else if (ret == -1) {
+ gf_msg_trace (GF_NFS3, 0, "Entry needs parent lookup: %s",
+ cs->resolvedloc.path);
+ ret = nfs3_fh_resolve_inode_hard (cs);
+ } else if (ret == 0) {
+ cs->resolve_ret = 0;
nfs3_call_resume (cs);
- goto err;
- } else
- gf_log (GF_NFS3, GF_LOG_TRACE, "Reading directory: %s",
- cs->resolvedloc.path);
-
- nfs_user_root_create (&nfu);
- /* This function can be called in a recursive code path, so if another
- * directory was opened in an earlier call, we must unref through this
- * reference before opening another fd_t.
- */
- if (cs->resolve_dir_fd)
- fd_unref (cs->resolve_dir_fd);
-
- cs->resolve_dir_fd = fd_ref (fd);
- ret = nfs_readdirp (cs->nfsx, cs->vol, &nfu, fd, GF_NFS3_DTPREF, 0,
- nfs3_fh_resolve_readdir_cbk, cs);
+ }
-err:
return ret;
}
-int32_t
-nfs3_fh_resolve_dir_lookup_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
+int
+nfs3_fh_resolve_inode (nfs3_call_state_t *cs)
{
- nfs3_call_state_t *cs = NULL;
- nfs_user_t nfu = {0, };
-
- cs = frame->local;
- cs->resolve_ret = op_ret;
- cs->resolve_errno = op_errno;
+ inode_t *inode = NULL;
+ int ret = -EFAULT;
+ xlator_t *this = NULL;
- if (op_ret == -1) {
- gf_log (GF_NFS3, GF_LOG_TRACE, "Lookup failed: %s: %s",
- cs->resolvedloc.path, strerror (op_errno));
- nfs3_call_resume (cs);
- goto err;
- } else
- gf_log (GF_NFS3, GF_LOG_TRACE, "Dir will be opened: %s",
- cs->resolvedloc.path);
+ if (!cs)
+ return ret;
- nfs_user_root_create (&nfu);
- inode_link (inode, cs->resolvedloc.parent, cs->resolvedloc.name, buf);
- nfs_opendir (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
- nfs3_fh_resolve_opendir_cbk, cs);
+ this = cs->nfsx;
+ gf_msg_trace (GF_NFS3, 0, "FH needs inode resolution");
+ gf_uuid_copy (cs->resolvedloc.gfid, cs->resolvefh.gfid);
-err:
- return 0;
-}
+ inode = inode_find (cs->vol->itable, cs->resolvefh.gfid);
+ if (!inode || inode_ctx_get (inode, this, NULL))
+ ret = nfs3_fh_resolve_inode_hard (cs);
+ else
+ ret = nfs3_fh_resolve_inode_done (cs, inode);
+ if (inode)
+ inode_unref (inode);
+ return ret;
+}
int
-nfs3_fh_resolve_dir_hard (nfs3_call_state_t *cs, uint64_t ino, uint64_t gen,
- char *entry)
+nfs3_fh_resolve_entry (nfs3_call_state_t *cs)
{
- int ret = -EFAULT;
- nfs_user_t nfu = {0, };
+ int ret = -EFAULT;
if (!cs)
return ret;
- cs->hashidx++;
- nfs_loc_wipe (&cs->resolvedloc);
- if (nfs3_fh_hash_index_is_beyond (&cs->resolvefh, cs->hashidx)) {
- gf_log (GF_NFS3, GF_LOG_TRACE, "Hash index is beyond: idx %d, "
- " fh idx: %d", cs->hashidx, cs->resolvefh.hashcount);
- nfs3_call_resume_estale (cs);
- ret = 0;
- goto out;
- }
-
- nfs_user_root_create (&nfu);
- gf_log (GF_NFS3, GF_LOG_TRACE, "FH hard dir resolution: ino:"
- " %"PRIu64", gen: %"PRIu64", entry: %s, hashidx: %d",
- ino, gen, entry, cs->hashidx);
- ret = nfs_entry_loc_fill (cs->vol->itable, ino, gen, entry,
- &cs->resolvedloc, NFS_RESOLVE_CREATE);
-
- if (ret == 0) {
- gf_log (GF_NFS3, GF_LOG_TRACE, "Dir will be opened: %s",
- cs->resolvedloc.path);
- ret = nfs_opendir (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
- nfs3_fh_resolve_opendir_cbk, cs);
- } else if (ret == -ENOENT) {
- gf_log (GF_NFS3, GF_LOG_TRACE, "Dir needs lookup: %s",
- cs->resolvedloc.path);
- ret = nfs_lookup (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
- nfs3_fh_resolve_dir_lookup_cbk, cs);
- }
-out:
- return ret;
+ return nfs3_fh_resolve_entry_hard (cs);
}
int
-nfs3_fh_resolve_check_response (nfs3_call_state_t *cs, gf_dirent_t *candidate,
- int response, off_t last_offt)
+nfs3_fh_resolve_resume (nfs3_call_state_t *cs)
{
- uint64_t dirino = 0;
- uint64_t dirgen = 0;
- int ret = -EFAULT;
- nfs_user_t nfu = {0, };
+ int ret = -EFAULT;
if (!cs)
return ret;
- dirino = cs->resolvedloc.inode->ino;
- dirgen = cs->resolvedloc.inode->generation;
-
- if (response == GF_NFS3_FHRESOLVE_DIRFOUND)
- ret = nfs3_fh_resolve_dir_hard (cs, dirino, dirgen,
- candidate->d_name);
- else if (response == GF_NFS3_FHRESOLVE_FOUND)
- nfs3_fh_resolve_found (cs, candidate);
- else if (response == GF_NFS3_FHRESOLVE_NOTFOUND) {
- nfs_user_root_create (&nfu);
- ret = nfs_readdirp (cs->nfsx, cs->vol, &nfu, cs->resolve_dir_fd,
- GF_NFS3_DTPREF, last_offt,
- nfs3_fh_resolve_readdir_cbk, cs);
- }
+ if (cs->resolve_ret < 0)
+ goto err_resume_call;
- return 0;
-}
+ if (!cs->resolventry)
+ ret = nfs3_fh_resolve_inode (cs);
+ else
+ ret = nfs3_fh_resolve_entry (cs);
-int
-nfs3_fh_resolve_search_dir (nfs3_call_state_t *cs, gf_dirent_t *entries)
-{
- gf_dirent_t *candidate = NULL;
- int ret = GF_NFS3_FHRESOLVE_NOTFOUND;
- off_t lastoff = 0;
-
- if ((!cs) || (!entries))
- return -EFAULT;
-
- if (list_empty (&entries->list))
- goto not_found;
-
- list_for_each_entry (candidate, &entries->list, list) {
- lastoff = candidate->d_off;
- gf_log (GF_NFS3, GF_LOG_TRACE, "Candidate: %s, ino: %"PRIu64
- ", gen: %"PRIu64, candidate->d_name, candidate->d_ino,
- candidate->d_stat.ia_gen);
- ret = nfs3_fh_resolve_check_entry (&cs->resolvefh, candidate,
- cs->hashidx);
- if (ret != GF_NFS3_FHRESOLVE_NOTFOUND)
- break;
+err_resume_call:
+ if (ret < 0) {
+ cs->resolve_ret = -1;
+ cs->resolve_errno = EFAULT;
+ nfs3_call_resume (cs);
+ ret = 0;
}
-not_found:
- nfs3_fh_resolve_check_response (cs, candidate, ret, lastoff);
return ret;
}
int32_t
-nfs3_fh_resolve_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries)
+nfs3_fh_resolve_root_lookup_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
{
nfs3_call_state_t *cs = NULL;
cs = frame->local;
- if (op_ret <= 0) {
- gf_log (GF_NFS3, GF_LOG_TRACE, "Directory read done: %s: %s",
- cs->resolvedloc.path, strerror (op_ret));
- cs->resolve_ret = -1;
- cs->resolve_errno = ENOENT;
- nfs3_call_resume (cs);
- goto err;
- }
+ cs->resolve_ret = op_ret;
+ cs->resolve_errno = op_errno;
- nfs3_fh_resolve_search_dir (cs, entries);
+ if (op_ret == -1) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, op_errno,
+ NFS_MSG_LOOKUP_ROOT_FAIL, "Root lookup failed: %s",
+ strerror (op_errno));
+ goto err;
+ } else
+ gf_msg_trace (GF_NFS3, 0, "Root looked up: %s",
+ cs->resolvedloc.path);
+ nfs3_set_root_looked_up (cs->nfs3state, &cs->resolvefh);
err:
+ nfs3_fh_resolve_resume (cs);
return 0;
}
-/* Needs no extra argument since it knows that the fh to be resolved is in
- * resolvefh and that it needs to start looking from the root.
- */
+
int
-nfs3_fh_resolve_inode_hard (nfs3_call_state_t *cs)
+nfs3_fh_resolve_root (nfs3_call_state_t *cs)
{
int ret = -EFAULT;
nfs_user_t nfu = {0, };
@@ -2791,113 +3910,75 @@ nfs3_fh_resolve_inode_hard (nfs3_call_state_t *cs)
if (!cs)
return ret;
- cs->hashidx++;
- nfs_loc_wipe (&cs->resolvedloc);
- if (nfs3_fh_hash_index_is_beyond (&cs->resolvefh, cs->hashidx)) {
- gf_log (GF_NFS3, GF_LOG_TRACE, "Hash index is beyond: idx %d, "
- " fh idx: %d", cs->hashidx, cs->resolvefh.hashcount);
- nfs3_call_resume_estale (cs);
- ret = 0;
+ if (nfs3_is_root_looked_up (cs->nfs3state, &cs->resolvefh)) {
+ ret = nfs3_fh_resolve_resume (cs);
goto out;
}
nfs_user_root_create (&nfu);
- gf_log (GF_NFS3, GF_LOG_TRACE, "FH hard resolution: ino:"
- " %"PRIu64", gen: %"PRIu64", hashidx: %d", cs->resolvefh.ino,
- cs->resolvefh.gen, cs->hashidx);
- ret = nfs_ino_loc_fill (cs->vol->itable, 1, 0, &cs->resolvedloc);
-
- if (ret == 0) {
- gf_log (GF_NFS3, GF_LOG_TRACE, "Dir will be opened: %s",
- cs->resolvedloc.path);
- ret = nfs_opendir (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
- nfs3_fh_resolve_opendir_cbk, cs);
- } else if (ret == -ENOENT) {
- gf_log (GF_NFS3, GF_LOG_TRACE, "Dir needs lookup: %s",
- cs->resolvedloc.path);
- ret = nfs_lookup (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
- nfs3_fh_resolve_dir_lookup_cbk, cs);
- }
+ gf_msg_trace (GF_NFS3, 0, "Root needs lookup");
+ ret = nfs_root_loc_fill (cs->vol->itable, &cs->resolvedloc);
+ if (ret < 0) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, -ret, NFS_MSG_LOOKUP_ROOT_FAIL,
+ "Failed to lookup root from itable: %s",
+ strerror (-ret));
+ goto out;
+ }
+
+ ret = nfs_lookup (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ nfs3_fh_resolve_root_lookup_cbk, cs);
out:
return ret;
}
-
+/**
+ * __nfs3_fh_auth_get_peer -- Get a peer name from the rpc request object
+ *
+ * @peer: Char * to write to
+ * @req : The request to get host/peer from
+ */
int
-nfs3_fh_resolve_entry_hard (nfs3_call_state_t *cs)
+__nfs3_fh_auth_get_peer (const rpcsvc_request_t *req, char *peer)
{
- int ret = -EFAULT;
- nfs_user_t nfu = {0, };
-
- if (!cs)
- return ret;
-
- nfs_loc_wipe (&cs->resolvedloc);
- nfs_user_root_create (&nfu);
- gf_log (GF_NFS3, GF_LOG_TRACE, "FH hard resolution: ino:"
- " %"PRIu64", gen: %"PRIu64", entry: %s, hashidx: %d",
- cs->resolvefh.ino, cs->resolvefh.gen, cs->resolventry,
- cs->hashidx);
-
- ret = nfs_entry_loc_fill (cs->vol->itable, cs->resolvefh.ino,
- cs->resolvefh.gen, cs->resolventry,
- &cs->resolvedloc, NFS_RESOLVE_CREATE);
+ struct sockaddr_storage sastorage = {0, };
+ rpc_transport_t *trans = NULL;
+ int ret = 0;
- if (ret == -2) {
- gf_log (GF_NFS3, GF_LOG_TRACE, "Entry needs lookup: %s",
- cs->resolvedloc.path);
- ret = nfs_lookup (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
- nfs3_fh_resolve_entry_lookup_cbk, cs);
- ret = 0;
- } else if (ret == -1) {
- gf_log (GF_NFS3, GF_LOG_TRACE, "Entry needs parent lookup: %s",
- cs->resolvedloc.path);
- ret = nfs3_fh_resolve_inode_hard (cs);
- } else if (ret == 0) {
- cs->resolve_ret = 0;
- nfs3_call_resume (cs);
+ /* Why do we pass in the peer here and then
+ * store it rather than malloc() and return a char * ? We want to avoid
+ * heap allocations in the IO path as much as possible for speed
+ * so we try to keep all allocations on the stack.
+ */
+ trans = rpcsvc_request_transport (req);
+ ret = rpcsvc_transport_peeraddr (trans, peer, RPCSVC_PEER_STRLEN,
+ &sastorage, sizeof (sastorage));
+ if (ret != 0) {
+ gf_msg (GF_NFS3, GF_LOG_WARNING, 0, NFS_MSG_GET_PEER_ADDR_FAIL,
+ "Failed to get peer addr: %s", gai_strerror (ret));
}
-
- return ret;
-}
-
-int
-nfs3_fh_resolve_inode (nfs3_call_state_t *cs)
-{
- inode_t *inode = NULL;
- int ret = -EFAULT;
-
- if (!cs)
- return ret;
-
- gf_log (GF_NFS3, GF_LOG_TRACE, "FH needs inode resolution");
- inode = inode_get (cs->vol->itable, cs->resolvefh.ino,
- cs->resolvefh.gen);
- if (!inode)
- ret = nfs3_fh_resolve_inode_hard (cs);
- else
- ret = nfs3_fh_resolve_inode_done (cs, inode);
-
- if (inode)
- inode_unref (inode);
-
return ret;
}
+/*
+ * nfs3_fh_auth_nfsop () -- Checks if an nfsop is authorized.
+ *
+ * @cs: The NFS call state containing all the relevant information
+ *
+ * @return: 0 if authorized
+ * -EACCES for completely unauthorized fop
+ * -EROFS for unauthorized write operations (rm, mkdir, write)
+ */
int
-nfs3_fh_resolve_entry (nfs3_call_state_t *cs)
+nfs3_fh_auth_nfsop (nfs3_call_state_t *cs, gf_boolean_t is_write_op)
{
- int ret = -EFAULT;
-
- if (!cs)
- return ret;
-
- ret = nfs3_fh_resolve_entry_hard (cs);
- if (ret < 0)
- nfs3_call_resume_estale (cs);
+ struct nfs_state *nfs = NULL;
+ struct mount3_state *ms = NULL;
- return 0;
+ nfs = (struct nfs_state *)cs->nfsx->private;
+ ms = (struct mount3_state *)nfs->mstate;
+ return mnt3_authenticate_request (ms, cs->req, &cs->resolvefh, NULL,
+ NULL, NULL, NULL, is_write_op);
}
int
@@ -2913,18 +3994,20 @@ nfs3_fh_resolve_and_resume (nfs3_call_state_t *cs, struct nfs3_fh *fh,
cs->resolvefh = *fh;
cs->hashidx = 0;
- if (!entry)
- ret = nfs3_fh_resolve_inode (cs);
- else {
+ /* Check if the resolution is:
+ * a. fh resolution
+ *
+ * or
+ *
+ * b. (fh, basename) resolution
+ */
+ if (entry) { /* b */
cs->resolventry = gf_strdup (entry);
if (!cs->resolventry)
goto err;
-
- ret = nfs3_fh_resolve_entry (cs);
}
+ ret = nfs3_fh_resolve_root (cs);
err:
return ret;
}
-
-
diff --git a/xlators/nfs/server/src/nfs3-helpers.h b/xlators/nfs/server/src/nfs3-helpers.h
index db76b5cce77..213639e3806 100644
--- a/xlators/nfs/server/src/nfs3-helpers.h
+++ b/xlators/nfs/server/src/nfs3-helpers.h
@@ -1,29 +1,15 @@
/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _NFS3_HELPER_H_
#define _NFS3_HELPER_H_
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "xlator.h"
#include "nfs3.h"
@@ -44,9 +30,13 @@ nfs3_extract_lookup_name (lookup3args *args);
extern nfsstat3
nfs3_errno_to_nfsstat3 (int errnum);
+extern nfsstat3
+nfs3_cbk_errno_status (int32_t, int32_t);
+
extern void
nfs3_fill_lookup3res (lookup3res *res, nfsstat3 stat, struct nfs3_fh *newfh,
- struct iatt *stbuf, struct iatt *postparent);
+ struct iatt *stbuf, struct iatt *postparent,
+ uint64_t deviceid);
extern post_op_attr
nfs3_stat_to_post_op_attr (struct iatt *buf);
@@ -56,14 +46,14 @@ nfs3_extract_getattr_fh (getattr3args *args);
extern void
nfs3_fill_getattr3res (getattr3res *res, nfsstat3 stat, struct iatt *buf,
- uint16_t xlid);
+ uint64_t deviceid);
extern struct nfs3_fh
nfs3_extract_fsinfo_fh (fsinfo3args *args);
extern void
nfs3_fill_fsinfo3res (struct nfs3_state *nfs3, fsinfo3res *res,
- nfsstat3 status, struct iatt *fsroot, uint16_t xlid);
+ nfsstat3 status, struct iatt *fsroot,uint64_t deviceid);
/* Functions containing _prep_ are used specifically to work around
* the memory allocations that happen inside Sun RPC library.
@@ -98,8 +88,8 @@ extern void
nfs3_prep_access3args (access3args *args, struct nfs3_fh *fh);
extern void
-nfs3_fill_access3res (access3res *res, nfsstat3 status, struct iatt *buf,
- uint32_t accbits, uid_t uid, gid_t gid, uint16_t xlid);
+nfs3_fill_access3res (access3res *res, nfsstat3 status, int32_t accbits,
+ int32_t reqaccbits);
extern char *
nfs3_fhcache_getpath (struct nfs3_state *nfs3, struct nfs3_fh *fh);
@@ -113,16 +103,18 @@ nfs3_prep_readdir3args (readdir3args *ra, struct nfs3_fh *fh);
extern void
nfs3_fill_readdir3res (readdir3res *res, nfsstat3 stat, struct nfs3_fh *dfh,
uint64_t cverf, struct iatt *dirstat,
- gf_dirent_t *entries, count3 count, int is_eof);
+ gf_dirent_t *entries, count3 count, int is_eof,
+ uint64_t deviceid);
extern void
nfs3_prep_readdirp3args (readdirp3args *ra, struct nfs3_fh *fh);
extern void
-nfs3_fill_readdirp3res (readdirp3res *res, nfsstat3 stat, struct nfs3_fh *dirfh,
- uint64_t cverf, struct iatt *dirstat,
- gf_dirent_t *entries, count3 dircount, count3 maxcount,
- int is_eof);
+nfs3_fill_readdirp3res (readdirp3res *res, nfsstat3 stat,
+ struct nfs3_fh *dirfh, uint64_t cverf,
+ struct iatt *dirstat, gf_dirent_t *entries,
+ count3 dircount, count3 maxcount, int is_eof,
+ uint64_t deviceid);
extern void
nfs3_free_readdirp3res (readdirp3res *res);
@@ -135,14 +127,14 @@ nfs3_prep_fsstat3args (fsstat3args *args, struct nfs3_fh *fh);
extern void
nfs3_fill_fsstat3res (fsstat3res *res, nfsstat3 stat, struct statvfs *fsbuf,
- struct iatt *postbuf, uint16_t xlid);
+ struct iatt *postbuf, uint64_t deviceid);
extern int32_t
nfs3_sattr3_to_setattr_valid (sattr3 *sattr, struct iatt *buf, mode_t *omode);
extern void
nfs3_fill_create3res (create3res *res, nfsstat3 stat, struct nfs3_fh *newfh,
struct iatt *newbuf, struct iatt *preparent,
- struct iatt *postparent);
+ struct iatt *postparent, uint64_t deviceid);
extern void
nfs3_prep_create3args (create3args *args, struct nfs3_fh *fh, char *name);
@@ -152,7 +144,7 @@ nfs3_prep_setattr3args (setattr3args *args, struct nfs3_fh *fh);
extern void
nfs3_fill_setattr3res (setattr3res *res, nfsstat3 stat, struct iatt *preop,
- struct iatt *postop, uint16_t xlid);
+ struct iatt *postop, uint64_t deviceid);
extern void
nfs3_prep_mkdir3args (mkdir3args *args, struct nfs3_fh *dirfh, char *name);
@@ -160,7 +152,7 @@ nfs3_prep_mkdir3args (mkdir3args *args, struct nfs3_fh *dirfh, char *name);
extern void
nfs3_fill_mkdir3res (mkdir3res *res, nfsstat3 stat, struct nfs3_fh *fh,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent);
+ struct iatt *postparent, uint64_t deviceid);
extern void
nfs3_prep_symlink3args (symlink3args *args, struct nfs3_fh *dirfh, char *name,
@@ -169,14 +161,14 @@ nfs3_prep_symlink3args (symlink3args *args, struct nfs3_fh *dirfh, char *name,
extern void
nfs3_fill_symlink3res (symlink3res *res, nfsstat3 stat, struct nfs3_fh *fh,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent);
+ struct iatt *postparent, uint64_t deviceid);
extern void
nfs3_prep_readlink3args (readlink3args *args, struct nfs3_fh *fh);
extern void
nfs3_fill_readlink3res (readlink3res *res, nfsstat3 stat, char *path,
- struct iatt *buf, uint16_t xlid);
+ struct iatt *buf, uint64_t deviceid);
extern void
nfs3_prep_mknod3args (mknod3args *args, struct nfs3_fh *fh, char *name);
@@ -184,17 +176,17 @@ nfs3_prep_mknod3args (mknod3args *args, struct nfs3_fh *fh, char *name);
extern void
nfs3_fill_mknod3res (mknod3res *res, nfsstat3 stat, struct nfs3_fh *fh,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent);
+ struct iatt *postparent, uint64_t deviceid);
extern void
nfs3_fill_remove3res (remove3res *res, nfsstat3 stat, struct iatt *preparent,
- struct iatt *postparent, uint16_t xlid);
+ struct iatt *postparent, uint64_t deviceid);
extern void
nfs3_prep_remove3args (remove3args *args, struct nfs3_fh *fh, char *name);
extern void
nfs3_fill_rmdir3res (rmdir3res *res, nfsstat3 stat, struct iatt *preparent,
- struct iatt *postparent, uint16_t xlid);
+ struct iatt *postparent, uint64_t deviceid);
extern void
nfs3_prep_rmdir3args (rmdir3args *args, struct nfs3_fh *fh, char *name);
@@ -202,7 +194,7 @@ nfs3_prep_rmdir3args (rmdir3args *args, struct nfs3_fh *fh, char *name);
extern void
nfs3_fill_link3res (link3res *res, nfsstat3 stat, struct iatt *buf,
struct iatt *preparent, struct iatt *postparent,
- uint16_t xlid);
+ uint64_t deviceid);
extern void
nfs3_prep_link3args (link3args *args, struct nfs3_fh *target,
@@ -217,7 +209,7 @@ extern void
nfs3_fill_rename3res (rename3res *res, nfsstat3 stat, struct iatt *buf,
struct iatt *preoldparent, struct iatt *postoldparent,
struct iatt *prenewparent, struct iatt *postnewparent,
- uint16_t xlid);
+ uint64_t deviceid);
extern void
nfs3_prep_write3args (write3args *args, struct nfs3_fh *fh);
@@ -225,7 +217,7 @@ nfs3_prep_write3args (write3args *args, struct nfs3_fh *fh);
extern void
nfs3_fill_write3res (write3res *res, nfsstat3 stat, count3 count,
stable_how stable, uint64_t wverf, struct iatt *prestat,
- struct iatt *poststat, uint16_t xlid);
+ struct iatt *poststat, uint64_t deviceid);
extern void
nfs3_prep_commit3args (commit3args *args, struct nfs3_fh *fh);
@@ -233,11 +225,11 @@ nfs3_prep_commit3args (commit3args *args, struct nfs3_fh *fh);
extern void
nfs3_fill_commit3res (commit3res *res, nfsstat3 stat, uint64_t wverf,
struct iatt *prestat, struct iatt *poststat,
- uint16_t xlid);
+ uint64_t deviceid);
extern void
nfs3_fill_read3res (read3res *res, nfsstat3 stat, count3 count,
- struct iatt *poststat, int is_eof, uint16_t xlid);
+ struct iatt *poststat, int is_eof, uint64_t deviceid);
extern void
nfs3_prep_read3args (read3args *args, struct nfs3_fh *fh);
@@ -247,39 +239,44 @@ nfs3_prep_pathconf3args (pathconf3args *args, struct nfs3_fh *fh);
extern void
nfs3_fill_pathconf3res (pathconf3res *res, nfsstat3 stat, struct iatt *buf,
- uint16_t xlid);
+ uint64_t deviceid);
extern int
nfs3_cached_inode_opened (xlator_t *nfsxl, inode_t *inode);
extern void
-nfs3_log_common_res (uint32_t xid, char *op, nfsstat3 stat, int pstat);
+nfs3_log_common_res (uint32_t xid, int op, nfsstat3 stat, int pstat,
+ const char *path);
extern void
-nfs3_log_readlink_res (uint32_t xid, nfsstat3 stat, int pstat, char *linkpath);
+nfs3_log_readlink_res (uint32_t xid, nfsstat3 stat, int pstat,
+ char *linkpath, const char *path);
extern void
-nfs3_log_read_res (uint32_t xid, nfsstat3 stat, int pstat, count3 count,
- int is_eof, struct iovec *vec, int32_t vcount);
+nfs3_log_read_res (uint32_t xid, nfsstat3 stat, int pstat,
+ count3 count, int is_eof, struct iovec *vec,
+ int32_t vcount, const char *path);
extern void
nfs3_log_write_res (uint32_t xid, nfsstat3 stat, int pstat, count3 count,
- int stable, uint64_t wverf);
+ int stable, uint64_t wverf, const char *path);
extern void
-nfs3_log_newfh_res (uint32_t xid, char *op, nfsstat3 stat, int pstat,
- struct nfs3_fh *newfh);
+nfs3_log_newfh_res (uint32_t xid, int op, nfsstat3 stat, int pstat,
+ struct nfs3_fh *newfh, const char *path);
extern void
nfs3_log_readdir_res (uint32_t xid, nfsstat3 stat, int pstat, uint64_t cverf,
- count3 count, int is_eof);
+ count3 count, int is_eof, const char *path);
extern void
nfs3_log_readdirp_res (uint32_t xid, nfsstat3 stat, int pstat, uint64_t cverf,
- count3 dircount, count3 maxcount, int is_eof);
+ count3 dircount, count3 maxcount, int is_eof,
+ const char *path);
extern void
-nfs3_log_commit_res (uint32_t xid, nfsstat3 stat, int pstat, uint64_t wverf);
+nfs3_log_commit_res (uint32_t xid, nfsstat3 stat, int pstat, uint64_t wverf,
+ const char *path);
extern void
nfs3_log_common_call (uint32_t xid, char *op, struct nfs3_fh *fh);
@@ -328,18 +325,19 @@ nfs3_fh_resolve_and_resume (nfs3_call_state_t *cs, struct nfs3_fh *fh,
char *entry, nfs3_resume_fn_t resum_fn);
extern int
-nfs3_file_open_and_resume (nfs3_call_state_t *cs, nfs3_resume_fn_t resume);
-
-extern int
-nfs3_dir_open_and_resume (nfs3_call_state_t *cs, nfs3_resume_fn_t resume);
-
-extern int
nfs3_verify_dircookie (struct nfs3_state *nfs3, fd_t *dirfd, cookie3 cookie,
uint64_t cverf, nfsstat3 *stat);
extern int
-nfs3_fdcache_remove (struct nfs3_state *nfs3, fd_t *fd);
+nfs3_is_parentdir_entry (char *entry);
+
+uint32_t
+nfs3_request_to_accessbits (int32_t accbits);
extern int
-nfs3_is_parentdir_entry (char *entry);
+nfs3_fh_auth_nfsop (nfs3_call_state_t *cs, gf_boolean_t is_write_op);
+
+void
+nfs3_map_deviceid_to_statdev (struct iatt *ia, uint64_t deviceid);
+
#endif
diff --git a/xlators/nfs/server/src/nfs3.c b/xlators/nfs/server/src/nfs3.c
index fda6d6413bf..64287c5b1bd 100644
--- a/xlators/nfs/server/src/nfs3.c
+++ b/xlators/nfs/server/src/nfs3.c
@@ -1,27 +1,13 @@
/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "rpcsvc.h"
#include "dict.h"
#include "xlator.h"
@@ -32,12 +18,16 @@
#include "nfs3.h"
#include "mem-pool.h"
#include "logging.h"
+#include "nfs-common.h"
#include "nfs-fops.h"
#include "nfs-inodes.h"
#include "nfs-generics.h"
#include "nfs3-helpers.h"
#include "nfs-mem-types.h"
-
+#include "nfs.h"
+#include "xdr-rpc.h"
+#include "xdr-generic.h"
+#include "nfs-messages.h"
#include <sys/socket.h>
#include <sys/uio.h>
@@ -48,6 +38,10 @@
do { \
if ((str)) { \
if (strlen ((str)) > (len)) { \
+ gf_msg (GF_NFS3, GF_LOG_ERROR, \
+ ENAMETOOLONG, \
+ NFS_MSG_STR_TOO_LONG, \
+ "strlen too long"); \
status = NFS3ERR_NAMETOOLONG; \
retval = -ENAMETOOLONG; \
goto label; \
@@ -58,8 +52,9 @@
#define nfs3_validate_nfs3_state(request, state, status, label, retval) \
do { \
state = rpcsvc_request_program_private (request); \
- if (!nfs3) { \
- gf_log (GF_NFS3, GF_LOG_ERROR, "NFSv3 state " \
+ if (!state) { \
+ gf_msg (GF_NFS3, GF_LOG_ERROR, EFAULT, \
+ NFS_MSG_STATE_MISSING, "NFSv3 state " \
"missing from RPC request"); \
status = NFS3ERR_SERVERFAULT; \
ret = -EFAULT; \
@@ -67,12 +62,91 @@
} \
} while (0); \
-#define nfs3_export_access(nfs3state, xlid) ((nfs3state)->exports[xlid]).access
-#define nfs3_check_rw_volaccess(nfs3state, xlid, status, label) \
- do { \
- if (nfs3_export_access (nfs3state,xlid)!=GF_NFS3_VOLACCESS_RW){\
- gf_log (GF_NFS3, GF_LOG_TRACE, "No read-write access");\
+struct nfs3_export *
+__nfs3_get_export_by_index (struct nfs3_state *nfs3, uuid_t exportid)
+{
+ struct nfs3_export *exp = NULL;
+ int index = 0;
+ int searchindex = 0;
+
+ searchindex = nfs3_fh_exportid_to_index (exportid);
+ list_for_each_entry (exp, &nfs3->exports, explist) {
+ if (searchindex == index)
+ goto found;
+
+ ++index;
+ }
+
+ exp = NULL;
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_INDEX_NOT_FOUND,
+ "searchindex=%d not found", searchindex);
+found:
+ return exp;
+}
+
+
+struct nfs3_export *
+__nfs3_get_export_by_volumeid (struct nfs3_state *nfs3, uuid_t exportid)
+{
+ struct nfs3_export *exp = NULL;
+
+ list_for_each_entry (exp, &nfs3->exports, explist) {
+ if (!gf_uuid_compare (exportid, exp->volumeid))
+ goto found;
+ }
+
+ exp = NULL;
+found:
+ return exp;
+}
+
+
+struct nfs3_export *
+__nfs3_get_export_by_exportid (struct nfs3_state *nfs3, uuid_t exportid)
+{
+ struct nfs3_export *exp = NULL;
+
+ if (!nfs3)
+ return exp;
+
+ if (gf_nfs_dvm_off (nfs_state(nfs3->nfsx)))
+ exp = __nfs3_get_export_by_index (nfs3, exportid);
+ else
+ exp = __nfs3_get_export_by_volumeid (nfs3, exportid);
+
+ return exp;
+}
+
+
+int
+nfs3_export_access (struct nfs3_state *nfs3, uuid_t exportid)
+{
+ int ret = GF_NFS3_VOLACCESS_RO;
+ struct nfs3_export *exp = NULL;
+
+ GF_VALIDATE_OR_GOTO (GF_NFS3, nfs3, err);
+
+ exp = __nfs3_get_export_by_exportid (nfs3, exportid);
+
+ if (!exp) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_EXPORT_ID_FAIL,
+ "Failed to get export by ID");
+ goto err;
+ }
+
+ ret = exp->access;
+
+err:
+ return ret;
+}
+
+#define nfs3_check_rw_volaccess(nfs3state, exid, status, label) \
+ do { \
+ if (nfs3_export_access (nfs3state,exid)!=GF_NFS3_VOLACCESS_RW){\
+ gf_msg (GF_NFS3, GF_LOG_ERROR, EACCES, \
+ NFS_MSG_NO_RW_ACCESS, \
+ "No read-write access"); \
status = NFS3ERR_ROFS; \
goto label; \
} \
@@ -80,17 +154,87 @@
-#define nfs3_map_fh_to_volume(nfs3state, handle, rqst, volume, status, label) \
+xlator_t *
+nfs3_fh_to_xlator (struct nfs3_state *nfs3, struct nfs3_fh *fh)
+{
+ xlator_t *vol = NULL;
+ struct nfs3_export *exp = NULL;
+
+ GF_VALIDATE_OR_GOTO (GF_NFS3, nfs3, out);
+ GF_VALIDATE_OR_GOTO (GF_NFS3, fh, out);
+
+ exp = __nfs3_get_export_by_exportid (nfs3, fh->exportid);
+ if (!exp)
+ goto out;
+
+ vol = exp->subvol;
+out:
+ return vol;
+}
+
+
+int
+nfs3_is_root_looked_up (struct nfs3_state *nfs3, struct nfs3_fh *rootfh)
+{
+ struct nfs3_export *exp = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO (GF_NFS3, nfs3, out);
+ GF_VALIDATE_OR_GOTO (GF_NFS3, rootfh, out);
+
+ exp = __nfs3_get_export_by_exportid (nfs3, rootfh->exportid);
+ if (!exp)
+ goto out;
+
+ ret = exp->rootlookedup;
+out:
+ return ret;
+}
+
+
+int
+nfs3_set_root_looked_up (struct nfs3_state *nfs3, struct nfs3_fh *rootfh)
+{
+ struct nfs3_export *exp = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO (GF_NFS3, nfs3, out);
+ GF_VALIDATE_OR_GOTO (GF_NFS3, rootfh, out);
+
+ exp = __nfs3_get_export_by_exportid (nfs3, rootfh->exportid);
+ if (!exp)
+ goto out;
+
+ exp->rootlookedup = 1;
+out:
+ return ret;
+}
+
+
+#define nfs3_map_fh_to_volume(nfs3state, handle, req, volume, status, label) \
do { \
- volume = nfs3_fh_to_xlator ((nfs3state)->exportslist, handle); \
+ char exportid[256], gfid[256]; \
+ rpc_transport_t *trans = NULL; \
+ volume = nfs3_fh_to_xlator ((nfs3state), handle); \
if (!volume) { \
- gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to map " \
- "FH to vol"); \
+ gf_uuid_unparse (handle->exportid, exportid); \
+ gf_uuid_unparse (handle->gfid, gfid); \
+ trans = rpcsvc_request_transport (req); \
+ GF_LOG_OCCASIONALLY (nfs3state->occ_logger, \
+ GF_NFS3, GF_LOG_ERROR, "Failed to map " \
+ "FH to vol: client=%s, exportid=%s, " \
+ "gfid=%s", trans->peerinfo.identifier, \
+ exportid, gfid); \
+ GF_LOG_OCCASIONALLY (nfs3state->occ_logger, \
+ GF_NFS3, GF_LOG_ERROR, "Stale nfs " \
+ "client %s must be trying to connect to"\
+ " a deleted volume, please unmount it.",\
+ trans->peerinfo.identifier); \
status = NFS3ERR_STALE; \
goto label; \
} else { \
- gf_log (GF_NFS3, GF_LOG_TRACE, "FH to Volume: %s"\
- ,volume->name); \
+ gf_msg_trace (GF_NFS3, 0, "FH to Volume:" \
+ "%s", volume->name); \
rpcsvc_request_set_private (req, volume); \
} \
} while (0); \
@@ -98,17 +242,60 @@
#define nfs3_validate_gluster_fh(handle, status, errlabel) \
do { \
- if ((handle)) { \
- if (!nfs3_fh_validate (handle)) { \
- status = NFS3ERR_BADHANDLE; \
- goto errlabel; \
- } \
+ if (!nfs3_fh_validate (handle)) { \
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, \
+ NFS_MSG_BAD_HANDLE, \
+ "Bad Handle"); \
+ status = NFS3ERR_BADHANDLE; \
+ goto errlabel; \
+ } \
+ } while (0) \
+
+
+#define nfs3_check_fh_auth_status(cst, nfstat, is_write_op, erlabl) \
+ do { \
+ int auth_ret = 0; \
+ int auth_errno = 0; \
+ xlator_t *xlatorp = NULL; \
+ char buf[256], gfid[256]; \
+ rpc_transport_t *trans = NULL; \
+ \
+ auth_ret = auth_errno = \
+ nfs3_fh_auth_nfsop (cst, is_write_op); \
+ if (auth_ret < 0) { \
+ trans = rpcsvc_request_transport (cst->req); \
+ xlatorp = nfs3_fh_to_xlator (cst->nfs3state, \
+ &cst->resolvefh); \
+ gf_uuid_unparse (cst->resolvefh.gfid, gfid); \
+ sprintf (buf, "(%s) %s : %s", \
+ trans->peerinfo.identifier, \
+ xlatorp ? xlatorp->name : "ERR", gfid); \
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, \
+ NFS_MSG_RESOLVE_FH_FAIL, "Unable to " \
+ "resolve FH: %s", buf); \
+ nfstat = nfs3_errno_to_nfsstat3 (-auth_errno); \
+ goto erlabl; \
} \
} while (0) \
#define nfs3_check_fh_resolve_status(cst, nfstat, erlabl) \
do { \
- if ((cst)->resolve_ret == -1) { \
+ xlator_t *xlatorp = NULL; \
+ char buf[256], gfid[256]; \
+ rpc_transport_t *trans = NULL; \
+ if ((cst)->resolve_ret < 0) { \
+ trans = rpcsvc_request_transport (cst->req); \
+ xlatorp = nfs3_fh_to_xlator (cst->nfs3state, \
+ &cst->resolvefh); \
+ gf_uuid_unparse (cst->resolvefh.gfid, gfid); \
+ snprintf (buf, sizeof (buf), "(%s) %s : %s", \
+ trans->peerinfo.identifier, \
+ xlatorp ? xlatorp->name : "ERR", \
+ gfid); \
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, \
+ NFS_MSG_RESOLVE_STAT, \
+ "%s: %s", strerror(cst->resolve_errno), \
+ buf); \
nfstat = nfs3_errno_to_nfsstat3 (cst->resolve_errno);\
goto erlabl; \
} \
@@ -116,33 +303,122 @@
#define nfs3_check_new_fh_resolve_status(cst, nfstat, erlabl) \
do { \
- if (((cst)->resolve_ret == -1) && \
+ xlator_t *xlatorp = NULL; \
+ char buf[256], gfid[256]; \
+ rpc_transport_t *trans = NULL; \
+ if (((cst)->resolve_ret < 0) && \
((cst)->resolve_errno != ENOENT)) { \
+ trans = rpcsvc_request_transport (cst->req); \
+ xlatorp = nfs3_fh_to_xlator (cst->nfs3state, \
+ &cst->resolvefh); \
+ gf_uuid_unparse (cst->resolvefh.gfid, gfid); \
+ snprintf (buf, sizeof (buf), "(%s) %s : %s", \
+ trans->peerinfo.identifier, \
+ xlatorp ? xlatorp->name : "ERR", \
+ gfid); \
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, \
+ NFS_MSG_RESOLVE_STAT, "%s: %s", \
+ strerror(cst->resolve_errno), buf); \
nfstat = nfs3_errno_to_nfsstat3 (cs->resolve_errno);\
goto erlabl; \
} \
} while (0) \
+int
+__nfs3_get_volume_id (struct nfs3_state *nfs3, xlator_t *xl,
+ uuid_t volumeid)
+{
+ int ret = -1;
+ struct nfs3_export *exp = NULL;
+
+ GF_VALIDATE_OR_GOTO (GF_NFS3, nfs3, out);
+ GF_VALIDATE_OR_GOTO (GF_NFS3, xl, out);
+
+ list_for_each_entry (exp, &nfs3->exports, explist) {
+ if (exp->subvol == xl) {
+ gf_uuid_copy (volumeid, exp->volumeid);
+ ret = 0;
+ goto out;
+ }
+ }
+
+out:
+ return ret;
+}
+
+
#define nfs3_funge_solaris_zerolen_fh(nfs3st, fhd, enam, nfsst, erl) \
do { \
xlator_t *fungexl = NULL; \
+ uuid_t zero = {0, }; \
fungexl =nfs_mntpath_to_xlator ((nfs3st)->exportslist,enam);\
if (!fungexl) { \
(nfsst) = NFS3ERR_NOENT; \
goto erl; \
} \
\
- (fhd)->xlatorid = nfs_xlator_to_xlid ((nfs3st)->exportslist, \
- fungexl); \
- (fhd)->gen = 0; \
- (fhd)->ino = 1; \
+ gf_uuid_copy ((fhd)->gfid, zero); \
+ (fhd)->gfid[15] = 1; \
(enam) = NULL; \
+ if ((gf_nfs_dvm_off (nfs_state (nfs3st->nfsx)))) \
+ (fhd)->exportid[15] = nfs_xlator_to_xlid ((nfs3st)->exportslist, fungexl); \
+ else { \
+ if(__nfs3_get_volume_id ((nfs3st), fungexl, (fhd)->exportid) < 0) { \
+ (nfsst) = NFS3ERR_STALE; \
+ goto erl; \
+ } \
+ } \
+ } while (0) \
+
+
+#define nfs3_volume_started_check(nf3stt, vlm, rtval, erlbl) \
+ do { \
+ if ((!nfs_subvolume_started (nfs_state (nf3stt->nfsx), vlm))){\
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, \
+ NFS_MSG_VOL_DISABLE, \
+ "Volume is disabled: %s", \
+ vlm->name); \
+ rtval = RPCSVC_ACTOR_IGNORE; \
+ goto erlbl; \
+ } \
} while (0) \
-#define nfs3_export_sync_trusted(nf3stt, xlid) ((nf3stt)->exports[xlid]).trusted_sync
-#define nfs3_export_write_trusted(nf3stt, xlid) ((nf3stt)->exports[xlid]).trusted_write
+int
+nfs3_export_sync_trusted (struct nfs3_state *nfs3, uuid_t exportid)
+{
+ struct nfs3_export *exp = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO (GF_NFS3, nfs3, err);
+
+ exp = __nfs3_get_export_by_exportid (nfs3, exportid);
+ if (!exp)
+ goto err;
+
+ ret = exp->trusted_sync;
+err:
+ return ret;
+}
+
+
+int
+nfs3_export_write_trusted (struct nfs3_state *nfs3, uuid_t exportid)
+{
+ struct nfs3_export *exp = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO (GF_NFS3, nfs3, err);
+
+ exp = __nfs3_get_export_by_exportid (nfs3, exportid);
+ if (!exp)
+ goto err;
+
+ ret = exp->trusted_write;
+err:
+ return ret;
+}
int
nfs3_solaris_zerolen_fh (struct nfs3_fh *fh, int fhlen)
@@ -171,12 +447,16 @@ nfs3_call_state_init (struct nfs3_state *s, rpcsvc_request_t *req, xlator_t *v)
{
nfs3_call_state_t *cs = NULL;
- if ((!s) || (!req) || (!v))
- return NULL;
+ GF_VALIDATE_OR_GOTO (GF_NFS3, s, err);
+ GF_VALIDATE_OR_GOTO (GF_NFS3, req, err);
+ GF_VALIDATE_OR_GOTO (GF_NFS3, v, err);
cs = (nfs3_call_state_t *) mem_get (s->localpool);
- if (!cs)
+ if (!cs) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "out of memory");
return NULL;
+ }
memset (cs, 0, sizeof (*cs));
INIT_LIST_HEAD (&cs->entries.list);
@@ -186,42 +466,39 @@ nfs3_call_state_init (struct nfs3_state *s, rpcsvc_request_t *req, xlator_t *v)
cs->vol = v;
cs->nfsx = s->nfsx;
cs->nfs3state = s;
-
+err:
return cs;
}
void
nfs3_call_state_wipe (nfs3_call_state_t *cs)
{
- struct nfs3_state *nfs3 = NULL;
if (!cs)
return;
- nfs3 = cs->nfs3state;
if (cs->fd) {
- gf_log (GF_NFS3, GF_LOG_TRACE, "fd ref: %d", cs->fd->refcount);
+ gf_msg_trace (GF_NFS3, 0, "fd 0x%lx ref: %d",
+ (long)cs->fd, cs->fd->refcount);
fd_unref (cs->fd);
}
- if (cs->resolve_dir_fd)
- fd_unref (cs->resolve_dir_fd);
+ GF_FREE (cs->resolventry);
- if (cs->resolventry)
- GF_FREE (cs->resolventry);
-
- if (cs->pathname)
- GF_FREE (cs->pathname);
+ GF_FREE (cs->pathname);
if (!list_empty (&cs->entries.list))
gf_dirent_free (&cs->entries);
- list_del (&cs->openwait_q);
nfs_loc_wipe (&cs->oploc);
nfs_loc_wipe (&cs->resolvedloc);
if (cs->iob)
iobuf_unref (cs->iob);
+ if (cs->iobref)
+ iobref_unref (cs->iobref);
+ if (cs->trans)
+ rpc_transport_unref (cs->trans);
memset (cs, 0, sizeof (*cs));
- mem_put (nfs3->localpool, cs);
+ mem_put (cs);
/* Already refd by fd_lookup, so no need to ref again. */
}
@@ -230,8 +507,9 @@ nfs3_call_state_wipe (nfs3_call_state_t *cs)
do { \
calls = nfs3_call_state_init ((nfs3state), (rq), (vl)); \
if (!calls) { \
- gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to " \
- "init call state"); \
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, \
+ NFS_MSG_INIT_CALL_STAT_FAIL, "Failed to"\
+ " init call state"); \
opstat = NFS3ERR_SERVERFAULT; \
goto errlabel; \
} \
@@ -249,17 +527,20 @@ nfs3_serialize_reply (rpcsvc_request_t *req, void *arg, nfs3_serializer sfunc,
nfs3 = (struct nfs3_state *)rpcsvc_request_program_private (req);
if (!nfs3) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "NFSv3 state not found in RPC"
- " request");
+ gf_msg (GF_NFS3, GF_LOG_ERROR, EINVAL, NFS_MSG_STATE_MISSING,
+ "NFSv3 state not found in RPC request");
goto ret;
}
/* First, get the io buffer into which the reply in arg will
* be serialized.
*/
+ /* TODO: get rid of 'sfunc' and use 'xdrproc_t' so we
+ can have 'xdr_sizeof' */
iob = iobuf_get (nfs3->iobpool);
if (!iob) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to get iobuf");
+ gf_msg (GF_NFS3, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Failed to get iobuf");
goto ret;
}
@@ -272,7 +553,8 @@ nfs3_serialize_reply (rpcsvc_request_t *req, void *arg, nfs3_serializer sfunc,
*/
retlen = sfunc (*outmsg, arg);
if (retlen == -1) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to encode message");
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_ENCODE_FAIL,
+ "Failed to encode message");
goto ret;
}
@@ -295,31 +577,50 @@ nfs3svc_submit_reply (rpcsvc_request_t *req, void *arg, nfs3_serializer sfunc)
struct iovec outmsg = {0, };
struct iobuf *iob = NULL;
int ret = -1;
+ struct iobref *iobref = NULL;
if (!req)
return -1;
iob = nfs3_serialize_reply (req, arg, sfunc, &outmsg);
if (!iob) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to serialize reply");
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_SERIALIZE_REPLY_FAIL,
+ "Failed to serialize reply");
goto ret;
}
- /* Then, submit the message for transmission. */
- ret = rpcsvc_submit_message (req, outmsg, iob);
+ iobref = iobref_new ();
+ if (!iobref) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "failed on iobref_new()");
+ goto ret;
+ }
- /* Now that we've done our job of handing the message to the RPC layer
- * we can safely unref the iob in the hope that RPC layer must have
- * ref'ed the iob on receiving into the txlist.
- */
- iobuf_unref (iob);
+ ret = iobref_add (iobref, iob);
+ if (ret) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Failed to add iob to iobref");
+ goto ret;
+ }
+
+ /* Then, submit the message for transmission. */
+ ret = rpcsvc_submit_message (req, &outmsg, 1, NULL, 0, iobref);
if (ret == -1) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Reply submission failed");
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_SUBMIT_REPLY_FAIL,
+ "Reply submission failed");
goto ret;
}
ret = 0;
ret:
+ /* Now that we've done our job of handing the message to the RPC layer
+ * we can safely unref the iob in the hope that RPC layer must have
+ * ref'ed the iob on receiving into the txlist.
+ */
+ if (NULL != iob)
+ iobuf_unref (iob);
+ if (NULL != iobref)
+ iobref_unref (iobref);
return ret;
}
@@ -327,49 +628,81 @@ ret:
int
nfs3svc_submit_vector_reply (rpcsvc_request_t *req, void *arg,
nfs3_serializer sfunc, struct iovec *payload,
- int vcount, struct iobref *piobref)
+ int vcount, struct iobref *iobref)
{
struct iovec outmsg = {0, };
struct iobuf *iob = NULL;
int ret = -1;
+ int new_iobref = 0;
if (!req)
return -1;
iob = nfs3_serialize_reply (req, arg, sfunc, &outmsg);
if (!iob) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to serialize reply");
- goto err;
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_SERIALIZE_REPLY_FAIL,
+ "Failed to serialize reply");
+ goto ret;
+ }
+ if (iobref == NULL) {
+ iobref = iobref_new ();
+ if (!iobref) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, ENOMEM,
+ NFS_MSG_NO_MEMORY, "failed on iobref_new");
+ goto ret;
+ }
+ new_iobref = 1;
}
- ret = rpcsvc_request_attach_vector (req, outmsg, iob, NULL, 0);
- iobuf_unref (iob);
-
- if (piobref)
- ret = rpcsvc_request_attach_vectors (req, payload, vcount,
- piobref);
+ ret = iobref_add (iobref, iob);
+ if (ret) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Failed to add iob to iobref");
+ goto ret;
+ }
- if (ret == -1)
- goto err;
- ret = rpcsvc_submit_vectors (req);
-err:
+ /* Then, submit the message for transmission. */
+ ret = rpcsvc_submit_message (req, &outmsg, 1, payload, vcount, iobref);
+ if (ret == -1) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_SUBMIT_REPLY_FAIL,
+ "Reply submission failed");
+ goto ret;
+ }
+ ret = 0;
+ret:
+ /* Now that we've done our job of handing the message to the RPC layer
+ * we can safely unref the iob in the hope that RPC layer must have
+ * ref'ed the iob on receiving into the txlist.
+ */
+ if (NULL != iob)
+ iobuf_unref (iob);
+ if (new_iobref)
+ iobref_unref (iobref);
return ret;
}
-
-uint16_t
-nfs3_request_xlator_id (rpcsvc_request_t *rq)
+uint64_t
+nfs3_request_xlator_deviceid (rpcsvc_request_t *rq)
{
struct nfs3_state *nfs3 = NULL;
xlator_t *xl = NULL;
+ uint64_t devid = 0;
+ uuid_t volumeid = {0, };
if (!rq)
return 0;
xl = rpcsvc_request_private (rq);
nfs3 = rpcsvc_request_program_private (rq);
- return nfs_xlator_to_xlid (nfs3->exportslist, xl);
+ if (gf_nfs_dvm_off (nfs_state (nfs3->nfsx)))
+ devid = (uint64_t)nfs_xlator_to_xlid (nfs3->exportslist, xl);
+ else {
+ __nfs3_get_volume_id (nfs3, xl, volumeid);
+ memcpy (&devid, &volumeid[8], sizeof (devid));
+ }
+
+ return devid;
}
@@ -379,8 +712,7 @@ nfs3svc_null (rpcsvc_request_t *req)
struct iovec dummyvec = {0, };
if (!req)
return RPCSVC_ACTOR_ERROR;
-
- rpcsvc_submit_generic (req, dummyvec, NULL);
+ rpcsvc_submit_generic (req, &dummyvec, 1, NULL, 0, NULL);
return RPCSVC_ACTOR_SUCCESS;
}
@@ -389,10 +721,10 @@ int
nfs3_getattr_reply (rpcsvc_request_t *req, nfsstat3 status, struct iatt *buf)
{
getattr3res res;
- uint16_t xlid = 0;
+ uint64_t deviceid = 0;
- xlid = nfs3_request_xlator_id (req);
- nfs3_fill_getattr3res (&res, status, buf, xlid);
+ deviceid = nfs3_request_xlator_deviceid (req);
+ nfs3_fill_getattr3res (&res, status, buf, deviceid);
nfs3svc_submit_reply (req, &res,
(nfs3_serializer)xdr_serialize_getattr3res);
@@ -406,18 +738,27 @@ nfs3svc_getattr_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *buf, dict_t *xattr,
struct iatt *postparent)
{
- rpcsvc_request_t *req = NULL;
nfsstat3 status = NFS3_OK;
nfs3_call_state_t *cs = NULL;
cs = frame->local;
- req = cs->req;
- if (op_ret == -1)
- status = nfs3_errno_to_nfsstat3 (op_errno);
+ /*
+ * Somewhat counter-intuitively, we don't need to look for sh-failed
+ * here. Failing this getattr will generate a new lookup from the
+ * client, and nfs_fop_lookup_cbk will detect any self-heal failures.
+ */
+
+ if (op_ret == -1) {
+ status = nfs3_cbk_errno_status (op_ret, op_errno);
+ }
+ else {
+ nfs_fix_generation(this,inode);
+ }
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "GETATTR", status,
- op_errno);
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_GETATTR, status, op_errno,
+ cs->resolvedloc.path);
nfs3_getattr_reply (cs->req, status, buf);
nfs3_call_state_wipe (cs);
@@ -428,20 +769,21 @@ nfs3svc_getattr_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t
nfs3svc_getattr_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
- rpcsvc_request_t *req = NULL;
nfsstat3 status = NFS3_OK;
nfs3_call_state_t *cs = NULL;
cs = frame->local;
- req = cs->req;
- if (op_ret == -1)
- status = nfs3_errno_to_nfsstat3 (op_errno);
+ if (op_ret == -1) {
+ status = nfs3_cbk_errno_status (op_ret, op_errno);
+ }
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "GETATTR", status,
- op_errno);
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_GETATTR, status, op_errno,
+ cs->resolvedloc.path);
nfs3_getattr_reply (cs->req, status, buf);
nfs3_call_state_wipe (cs);
@@ -457,11 +799,15 @@ nfs3_getattr_resume (void *carg)
int ret = -EFAULT;
nfs_user_t nfu = {0, };
nfs3_call_state_t *cs = NULL;
+ uint64_t raw_ctx = 0;
+ struct nfs_inode_ctx *ictx = NULL;
+ struct nfs_state *priv = NULL;
if (!carg)
return ret;
cs = (nfs3_call_state_t *)carg;
+ nfs3_check_fh_auth_status (cs, stat, _gf_false, nfs3err);
nfs3_check_fh_resolve_status (cs, stat, nfs3err);
nfs_request_user_init (&nfu, cs->req);
/* If inode which is to be getattr'd is the root, we need to do a
@@ -469,25 +815,55 @@ nfs3_getattr_resume (void *carg)
* for the root to have been looked up when the getattr on the root is
* sent. AND, this causes a problem for stat-prefetch in that it
* expects even the root inode to have been looked up.
- */
- if (cs->resolvedloc.inode->ino == 1)
+
+ if (__is_root_gfid (cs->resolvedloc.inode->gfid))
ret = nfs_lookup (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
nfs3svc_getattr_lookup_cbk, cs);
else
ret = nfs_stat (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
- nfs3svc_getattr_stat_cbk, cs);
+ */
+
+ if (cs->hardresolved) {
+ ret = -EFAULT;
+ stat = NFS3_OK;
+ goto nfs3err;
+ }
+
+ /*
+ * If brick state changed, we need to force a proper lookup cycle (as
+ * would happen in native protocol) to do self-heal checks. We detect
+ * this by comparing the generation number for the last successful
+ * creation/lookup on the inode to the current number, so inodes that
+ * haven't been validated since the state change are affected.
+ */
+ if (inode_ctx_get(cs->resolvedloc.inode,cs->nfsx,&raw_ctx) == 0) {
+ ictx = (struct nfs_inode_ctx *)raw_ctx;
+ priv = cs->nfsx->private;
+ if (ictx->generation != priv->generation) {
+ ret = nfs_lookup (cs->nfsx, cs->vol, &nfu,
+ &cs->resolvedloc,
+ nfs3svc_getattr_lookup_cbk, cs);
+ goto check_err;
+ }
+ }
+
+ ret = nfs_stat (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ nfs3svc_getattr_stat_cbk, cs);
+check_err:
if (ret < 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Stat fop failed: %s: %s",
- cs->oploc.path, strerror (-ret));
+ gf_msg (GF_NFS3, GF_LOG_ERROR, -ret, NFS_MSG_STAT_FOP_FAIL,
+ "Stat fop failed: %s: %s", cs->oploc.path,
+ strerror (-ret));
stat = nfs3_errno_to_nfsstat3 (-ret);
}
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "GETATTR",
- stat, -ret);
- nfs3_getattr_reply (cs->req, stat, NULL);
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_GETATTR, stat, -ret,
+ cs->resolvedloc.path);
+ nfs3_getattr_reply (cs->req, stat, &cs->stbuf);
nfs3_call_state_wipe (cs);
ret = 0;
}
@@ -505,13 +881,14 @@ nfs3_getattr (rpcsvc_request_t *req, struct nfs3_fh *fh)
struct nfs3_state *nfs3 = NULL;
nfs3_call_state_t *cstate = NULL;
- if ((!req) || (!fh))
- return -1;
+ GF_VALIDATE_OR_GOTO (GF_NFS3, req, out);
+ GF_VALIDATE_OR_GOTO (GF_NFS3, fh, out);
nfs3_log_common_call (rpcsvc_request_xid (req), "GETATTR", fh);
nfs3_validate_gluster_fh (fh, stat, nfs3err);
nfs3_validate_nfs3_state (req, nfs3, stat, nfs3err, ret);
nfs3_map_fh_to_volume (nfs3, fh, req, vol, stat, nfs3err);
+ nfs3_volume_started_check (nfs3, vol, ret, out);
nfs3_handle_call_state_init (nfs3, cstate, req, vol, stat, nfs3err);
ret = nfs3_fh_resolve_and_resume (cstate, fh, NULL,nfs3_getattr_resume);
@@ -520,13 +897,14 @@ nfs3_getattr (rpcsvc_request_t *req, struct nfs3_fh *fh)
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (req), "GETATTR", stat,
- -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (req),
+ NFS3_GETATTR, stat, -ret,
+ NULL);
nfs3_getattr_reply (req, stat, NULL);
ret = 0;
nfs3_call_state_wipe (cstate);
}
-
+out:
return ret;
}
@@ -542,15 +920,17 @@ nfs3svc_getattr (rpcsvc_request_t *req)
return ret;
nfs3_prep_getattr3args (&args, &fh);
- if (xdr_to_getattr3args (req->msg, &args) <= 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
+ if (xdr_to_getattr3args (req->msg[0], &args) <= 0) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_ARGS_DECODE_ERROR,
+ "Error decoding args");
rpcsvc_request_seterr (req, GARBAGE_ARGS);
goto rpcerr;
}
ret = nfs3_getattr (req, &fh);
- if (ret < 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "GETATTR procedure failed");
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, -ret, NFS_MSG_GETATTR_FAIL,
+ "GETATTR procedure failed");
rpcsvc_request_seterr (req, SYSTEM_ERR);
ret = RPCSVC_ACTOR_ERROR;
}
@@ -565,10 +945,10 @@ nfs3_setattr_reply (rpcsvc_request_t *req, nfsstat3 stat, struct iatt *preop,
struct iatt *postop)
{
setattr3res res = {0, };
- uint16_t xlid = 0;
+ uint64_t deviceid = 0;
- xlid = nfs3_request_xlator_id (req);
- nfs3_fill_setattr3res (&res, stat, preop, postop, xlid);
+ deviceid = nfs3_request_xlator_deviceid (req);
+ nfs3_fill_setattr3res (&res, stat, preop, postop, deviceid);
nfs3svc_submit_reply (req, (void *)&res,
(nfs3_serializer) xdr_serialize_setattr3res);
return 0;
@@ -578,7 +958,7 @@ nfs3_setattr_reply (rpcsvc_request_t *req, nfsstat3 stat, struct iatt *preop,
int32_t
nfs3svc_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
nfsstat3 stat = NFS3ERR_SERVERFAULT;
struct iatt *prestat = NULL;
@@ -586,7 +966,7 @@ nfs3svc_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
cs = frame->local;
if (op_ret == -1) {
- stat = nfs3_errno_to_nfsstat3 (op_errno);
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
goto nfs3err;
}
@@ -601,8 +981,9 @@ nfs3svc_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
stat = NFS3_OK;
nfs3err:
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "SETATTR", stat,
- op_errno);
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_SETATTR, stat, op_errno,
+ cs->resolvedloc.path);
nfs3_setattr_reply (cs->req, stat, prestat, postbuf);
nfs3_call_state_wipe (cs);
@@ -613,7 +994,7 @@ nfs3err:
int32_t
nfs3svc_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *preop,
- struct iatt *postop)
+ struct iatt *postop, dict_t *xdata)
{
nfsstat3 stat = NFS3ERR_SERVERFAULT;
int ret = -1;
@@ -623,32 +1004,26 @@ nfs3svc_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
cs = frame->local;
if (op_ret == -1) {
- stat = nfs3_errno_to_nfsstat3 (op_errno);
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
goto nfs3err;
}
- /* If the first stat was got from the guarded setattr callback, then
- * we'll need to use that stat instead of the preop returned here.
+ prebuf = preop;
+ /* Store the current preop in case we need to send a truncate,
+ * in which case the preop to be returned will be this one.
*/
- if (cs->preparent.ia_ino != 0)
- prebuf = &cs->preparent;
- else {
- prebuf = preop;
- /* Store the current preop in case we need to send a truncate,
- * in which case the preop to be returned will be this one.
- */
- cs->preparent = *preop;
- }
+ cs->preparent = *preop;
- ret = 0;
/* Only truncate if the size is not already same as the requested
* truncation and also only if this is not a directory.
*/
if ((gf_attr_size_set (cs->setattr_valid)) &&
- (!IA_ISDIR (postop->ia_type))) {
+ (!IA_ISDIR (postop->ia_type)) &&
+ (preop->ia_size != cs->attr_in.ia_size)) {
nfs_request_user_init (&nfu, cs->req);
ret = nfs_truncate (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
- cs->stbuf.ia_size, nfs3svc_truncate_cbk,cs);
+ cs->attr_in.ia_size, nfs3svc_truncate_cbk,
+ cs);
if (ret < 0)
stat = nfs3_errno_to_nfsstat3 (-ret);
@@ -659,8 +1034,9 @@ nfs3svc_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "SETATTR",
- stat, op_errno);
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_SETATTR, stat, op_errno,
+ cs->resolvedloc.path);
nfs3_setattr_reply (cs->req, stat, prebuf, postop);
nfs3_call_state_wipe (cs);
}
@@ -672,7 +1048,8 @@ nfs3err:
int32_t
nfs3svc_setattr_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
int ret = -EFAULT;
@@ -682,12 +1059,13 @@ nfs3svc_setattr_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
cs = frame->local;
if (op_ret == -1) {
- stat = nfs3_errno_to_nfsstat3 (op_errno);
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
goto nfs3err;
}
if (buf->ia_ctime != cs->timestamp.seconds) {
- gf_log (GF_NFS3, GF_LOG_TRACE, "Timestamps not in sync");
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_TIMESTAMP_NO_SYNC,
+ "Timestamps not in sync");
stat = NFS3ERR_NOT_SYNC;
goto nfs3err;
}
@@ -702,8 +1080,9 @@ nfs3svc_setattr_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "SETATTR",
- stat, op_errno);
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_SETATTR, stat, op_errno,
+ cs->resolvedloc.path);
nfs3_setattr_reply (cs->req, stat, NULL, NULL);
nfs3_call_state_wipe (cs);
}
@@ -726,22 +1105,18 @@ nfs3_setattr_resume (void *carg)
cs = (nfs3_call_state_t *)carg;
nfs3_check_fh_resolve_status (cs, stat, nfs3err);
nfs_request_user_init (&nfu, cs->req);
- /* If no ctime check is required, head straight to setting the attrs. */
- if (cs->sattrguardcheck)
- ret = nfs_stat (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
- nfs3svc_setattr_stat_cbk, cs);
- else
- ret = nfs_setattr (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
- &cs->stbuf, cs->setattr_valid,
- nfs3svc_setattr_cbk, cs);
+ ret = nfs_setattr (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ &cs->attr_in, cs->setattr_valid,
+ nfs3svc_setattr_cbk, cs);
if (ret < 0)
stat = nfs3_errno_to_nfsstat3 (-ret);
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "SETATTR",
- stat, -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_SETATTR, stat, -ret,
+ cs->resolvedloc.path);
nfs3_setattr_reply (cs->req, stat, NULL, NULL);
nfs3_call_state_wipe (cs);
}
@@ -760,32 +1135,35 @@ nfs3_setattr (rpcsvc_request_t *req, struct nfs3_fh *fh, sattr3 *sattr,
struct nfs3_state *nfs3 = NULL;
nfs3_call_state_t *cs = NULL;
- if ((!req) || (!fh) || (!sattr) || (!guard)) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Bad arguments");
- return -1;
- }
+ GF_VALIDATE_OR_GOTO (GF_NFS3, req, out);
+ GF_VALIDATE_OR_GOTO (GF_NFS3, fh, out);
+ GF_VALIDATE_OR_GOTO (GF_NFS3, sattr, out);
+ GF_VALIDATE_OR_GOTO (GF_NFS3, guard, out);
nfs3_log_common_call (rpcsvc_request_xid (req), "SETATTR", fh);
nfs3_validate_gluster_fh (fh, stat, nfs3err);
nfs3_validate_nfs3_state (req, nfs3, stat, nfs3err, ret);
nfs3_map_fh_to_volume (nfs3, fh, req, vol, stat, nfs3err);
- nfs3_check_rw_volaccess (nfs3, fh->xlatorid, stat, nfs3err);
+ nfs3_volume_started_check (nfs3, vol, ret, out);
+ nfs3_check_rw_volaccess (nfs3, fh->exportid, stat, nfs3err);
nfs3_handle_call_state_init (nfs3, cs, req, vol, stat, nfs3err);
- cs->setattr_valid = nfs3_sattr3_to_setattr_valid (sattr, &cs->stbuf,
+ cs->setattr_valid = nfs3_sattr3_to_setattr_valid (sattr, &cs->attr_in,
NULL);
if (guard->check) {
- gf_log (GF_NFS3, GF_LOG_TRACE, "Guard check required");
+ gf_msg_trace (GF_NFS3, 0, "Guard check required");
cs->timestamp = guard->sattrguard3_u.obj_ctime;
cs->sattrguardcheck = 1;
} else {
- gf_log (GF_NFS3, GF_LOG_TRACE, "Guard check not required");
+ gf_msg_trace (GF_NFS3, 0, "Guard check not required");
cs->sattrguardcheck = 0;
}
if (!cs->setattr_valid) {
ret = -EINVAL; /* Force a reply */
stat = NFS3_OK;
+ gf_msg (GF_NFS3, GF_LOG_ERROR, EINVAL, NFS_MSG_SETATTR_INVALID,
+ "cs->setattr_valid is invalid");
goto nfs3err;
}
@@ -795,8 +1173,9 @@ nfs3_setattr (rpcsvc_request_t *req, struct nfs3_fh *fh, sattr3 *sattr,
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (req), "SETATTR", stat,
- -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (req),
+ NFS3_SETATTR, stat, -ret,
+ cs ? cs->resolvedloc.path : NULL);
nfs3_setattr_reply (req, stat, NULL, NULL);
nfs3_call_state_wipe (cs);
/* Ret must be 0 after this so that the caller does not
@@ -804,7 +1183,7 @@ nfs3err:
*/
ret = 0;
}
-
+out:
return ret;
}
@@ -817,19 +1196,21 @@ nfs3svc_setattr (rpcsvc_request_t *req)
setattr3args args;
int ret = RPCSVC_ACTOR_ERROR;
- if (!req)
- return ret;
+ GF_VALIDATE_OR_GOTO (GF_NFS3, req, rpcerr);
nfs3_prep_setattr3args (&args, &fh);
- if (xdr_to_setattr3args (req->msg, &args) <= 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
+ if (xdr_to_setattr3args (req->msg[0], &args) <= 0) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0,
+ NFS_MSG_ARGS_DECODE_ERROR,
+ "Error decoding args");
rpcsvc_request_seterr (req, GARBAGE_ARGS);
goto rpcerr;
}
ret = nfs3_setattr (req, &fh, &args.new_attributes, &args.guard);
- if (ret < 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "SETATTR procedure failed");
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, -ret, NFS_MSG_SETATTR_FAIL,
+ "SETATTR procedure failed");
rpcsvc_request_seterr (req, SYSTEM_ERR);
ret = RPCSVC_ACTOR_ERROR;
}
@@ -845,12 +1226,44 @@ nfs3_lookup_reply (rpcsvc_request_t *req, nfsstat3 stat, struct nfs3_fh *newfh,
struct iatt *stbuf, struct iatt *postparent)
{
lookup3res res = {0, };
+ uint64_t deviceid = 0;
- nfs3_fill_lookup3res (&res, stat, newfh, stbuf, postparent);
+ deviceid = nfs3_request_xlator_deviceid (req);
+ nfs3_fill_lookup3res (&res, stat, newfh, stbuf, postparent, deviceid);
return nfs3svc_submit_reply (req, &res,
(nfs3_serializer)xdr_serialize_lookup3res);
}
+int
+nfs3_lookup_resume (void *carg);
+
+
+int
+nfs3_fresh_lookup (nfs3_call_state_t *cs)
+{
+ int ret = -EFAULT;
+ char *oldresolventry = NULL;
+
+ GF_VALIDATE_OR_GOTO (GF_NFS3, cs, err);
+ gf_msg_debug (GF_NFS3, 0, "inode needs fresh lookup");
+ inode_unlink (cs->resolvedloc.inode, cs->resolvedloc.parent,
+ cs->resolventry);
+ nfs_loc_wipe (&cs->resolvedloc);
+
+ /* Store pointer to currently allocated resolventry because it gets over
+ * written in fh_resolve_and_resume.
+ */
+ oldresolventry = cs->resolventry;
+ cs->lookuptype = GF_NFS3_FRESH;
+ ret = nfs3_fh_resolve_and_resume (cs, &cs->resolvefh, cs->resolventry,
+ nfs3_lookup_resume);
+ /* Allocated in the previous call to fh_resolve_and_resume using the
+ * same call_state.
+ */
+ GF_FREE (oldresolventry);
+err:
+ return ret;
+}
int
nfs3svc_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
@@ -860,21 +1273,34 @@ nfs3svc_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct nfs3_fh newfh = {{0}, };
nfsstat3 status = NFS3_OK;
nfs3_call_state_t *cs = NULL;
+ inode_t *oldinode = NULL;
cs = frame->local;
if (op_ret == -1) {
- status = nfs3_errno_to_nfsstat3 (op_errno);
+ status = nfs3_cbk_errno_status (op_ret, op_errno);
goto xmit_res;
}
nfs3_fh_build_child_fh (&cs->parent, buf, &newfh);
-
+ oldinode = inode_link (inode, cs->resolvedloc.parent,
+ cs->resolvedloc.name, buf);
xmit_res:
- nfs3_log_newfh_res (rpcsvc_request_xid (cs->req), "LOOKUP", status,
- op_errno, &newfh);
+ /* Only send fresh lookup if it was a revalidate that failed. */
+ if ((op_ret == -1) && (nfs3_is_revalidate_lookup (cs))) {
+ op_ret = nfs3_fresh_lookup (cs);
+ goto out;
+ }
+
+ nfs3_log_newfh_res (rpcsvc_request_xid (cs->req), NFS3_LOOKUP,
+ status, op_errno, &newfh,
+ cs->resolvedloc.path);
nfs3_lookup_reply (cs->req, status, &newfh, buf, postparent);
nfs3_call_state_wipe (cs);
-
+out:
+ if (oldinode) {
+ inode_lookup (oldinode);
+ inode_unref (oldinode);
+ }
return 0;
}
@@ -888,26 +1314,38 @@ nfs3svc_lookup_parentdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct nfs3_fh newfh = {{0}, };
nfsstat3 status = NFS3_OK;
nfs3_call_state_t *cs = NULL;
+ uuid_t volumeid = {0, };
+ uuid_t mountid = {1, };
+ struct nfs3_state *nfs3 = NULL;
cs = frame->local;
if (op_ret == -1) {
- status = nfs3_errno_to_nfsstat3 (op_errno);
+ status = nfs3_cbk_errno_status (op_ret, op_errno);
goto xmit_res;
}
+ nfs3 = cs->nfs3state;
/* If the buf inode shows that this is a root dir's buf, then the file
* handle needs to be specially crafted, in all other cases, we'll just
* create the handle normally using the buffer of the parent dir.
*/
- if (buf->ia_ino != 1)
+ if (buf->ia_ino != 1) {
nfs3_fh_build_parent_fh (&cs->fh, buf, &newfh);
- else
- newfh = nfs3_fh_build_root_fh (cs->nfs3state->exportslist,
- cs->vol, *buf);
+ goto xmit_res;
+ }
+
+ if (gf_nfs_dvm_off (nfs_state (nfs3->nfsx)))
+ newfh = nfs3_fh_build_indexed_root_fh (nfs3->exportslist,
+ cs->vol);
+ else {
+ __nfs3_get_volume_id (nfs3, cs->vol, volumeid);
+ newfh = nfs3_fh_build_uuid_root_fh (volumeid, mountid);
+ }
xmit_res:
- nfs3_log_newfh_res (rpcsvc_request_xid (cs->req), "LOOKUP", status,
- op_errno, &newfh);
+ nfs3_log_newfh_res (rpcsvc_request_xid (cs->req), NFS3_LOOKUP,
+ status, op_errno, &newfh,
+ cs->resolvedloc.path);
nfs3_lookup_reply (cs->req, status, &newfh, buf, postparent);
nfs3_call_state_wipe (cs);
@@ -925,10 +1363,14 @@ nfs3_lookup_parentdir_resume (void *carg)
nfs3_call_state_t *cs = NULL;
inode_t *parent = NULL;
- if (!carg)
- return ret;
+ if (!carg) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, EINVAL, NFS_MSG_INVALID_ENTRY,
+ "Invalid argument, carg value NULL");
+ return EINVAL;
+ }
cs = (nfs3_call_state_t *)carg;
+ nfs3_check_fh_auth_status (cs, stat, _gf_false, nfs3err);
nfs3_check_fh_resolve_status (cs, stat, nfs3err);
/* At this point now, the loc in cs is for the directory file handle
@@ -957,10 +1399,16 @@ nfs3_lookup_parentdir_resume (void *carg)
if (!nfs3_fh_is_root_fh (&cs->fh)) {
parent = inode_ref (cs->resolvedloc.parent);
nfs_loc_wipe (&cs->resolvedloc);
- ret = nfs_inode_loc_fill (parent, &cs->resolvedloc);
+ ret = nfs_inode_loc_fill (parent, &cs->resolvedloc,
+ NFS_RESOLVE_CREATE);
- if (ret < 0)
+ if (ret < 0) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, -ret,
+ NFS_MSG_INODE_LOC_FILL_ERROR,
+ "nfs_inode_loc_fill"
+ " error");
goto errtostat;
+ }
}
ret = nfs_lookup (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
@@ -971,8 +1419,9 @@ errtostat:
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "LOOKUP",
- stat, -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_LOOKUP, stat, -ret,
+ cs->resolvedloc.path);
nfs3_lookup_reply (cs->req, stat, NULL, NULL, NULL);
nfs3_call_state_wipe (cs);
}
@@ -991,14 +1440,26 @@ nfs3_lookup_resume (void *carg)
int ret = -EFAULT;
nfs_user_t nfu = {0, };
nfs3_call_state_t *cs = NULL;
+ struct nfs3_fh newfh = {{0},};
- if (!carg)
- return ret;
+ if (!carg) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, EINVAL, NFS_MSG_INVALID_ENTRY,
+ "Invalid argument, carg value NULL");
+ return EINVAL;
+ }
cs = (nfs3_call_state_t *)carg;
+ nfs3_check_fh_auth_status (cs, stat, _gf_false, nfs3err);
nfs3_check_fh_resolve_status (cs, stat, nfs3err);
- nfs_request_user_init (&nfu, cs->req);
cs->parent = cs->resolvefh;
+
+ if (cs->hardresolved) {
+ stat = NFS3_OK;
+ nfs3_fh_build_child_fh (&cs->parent, &cs->stbuf, &newfh);
+ goto nfs3err;
+ }
+
+ nfs_request_user_init (&nfu, cs->req);
ret = nfs_lookup (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
nfs3svc_lookup_cbk, cs);
if (ret < 0)
@@ -1006,9 +1467,11 @@ nfs3_lookup_resume (void *carg)
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "LOOKUP",
- stat, -ret);
- nfs3_lookup_reply (cs->req, stat, NULL, NULL, NULL);
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_LOOKUP, stat, -ret,
+ cs->resolvedloc.path);
+ nfs3_lookup_reply (cs->req, stat, &newfh, &cs->stbuf,
+ &cs->postparent);
nfs3_call_state_wipe (cs);
}
@@ -1025,12 +1488,12 @@ nfs3_lookup (rpcsvc_request_t *req, struct nfs3_fh *fh, int fhlen, char *name)
struct nfs3_state *nfs3 = NULL;
nfs3_call_state_t *cs = NULL;
- if ((!req) || (!fh) || (!name)) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Bad arguments");
- return -1;
- }
+ GF_VALIDATE_OR_GOTO (GF_NFS3, req, out);
+ GF_VALIDATE_OR_GOTO (GF_NFS3, fh, out);
+ GF_VALIDATE_OR_GOTO (GF_NFS3, name, out);
- nfs3_log_fh_entry_call (rpcsvc_request_xid (req), "LOOKUP", fh, name);
+ nfs3_log_fh_entry_call (rpcsvc_request_xid (req), "LOOKUP", fh,
+ name);
nfs3_validate_nfs3_state (req, nfs3, stat, nfs3err, ret);
if (nfs3_solaris_zerolen_fh (fh, fhlen))
nfs3_funge_solaris_zerolen_fh (nfs3, fh, name, stat, nfs3err);
@@ -1038,22 +1501,25 @@ nfs3_lookup (rpcsvc_request_t *req, struct nfs3_fh *fh, int fhlen, char *name)
nfs3_validate_gluster_fh (fh, stat, nfs3err);
nfs3_validate_strlen_or_goto (name, NFS_NAME_MAX, nfs3err, stat, ret);
nfs3_map_fh_to_volume (nfs3, fh, req, vol, stat, nfs3err);
+ nfs3_volume_started_check (nfs3, vol, ret, out);
nfs3_handle_call_state_init (nfs3, cs, req, vol, stat, nfs3err);
- if (!nfs3_is_parentdir_entry (name))
- ret = nfs3_fh_resolve_and_resume (cs, fh, name,
- nfs3_lookup_resume);
- else
- ret = nfs3_fh_resolve_and_resume (cs, fh, NULL,
- nfs3_lookup_parentdir_resume);
+ cs->lookuptype = GF_NFS3_REVALIDATE;
+ ret = nfs3_fh_resolve_and_resume (cs, fh, name,
+ nfs3_lookup_resume);
- if (ret < 0)
+ if (ret < 0) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, -ret,
+ NFS_MSG_HARD_RESOLVE_FAIL,
+ "failed to start hard reslove");
stat = nfs3_errno_to_nfsstat3 (-ret);
+ }
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (req), "LOOKUP", stat,
- -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (req),
+ NFS3_LOOKUP, stat, -ret,
+ cs ? cs->resolvedloc.path : NULL);
nfs3_lookup_reply (req, stat, NULL, NULL, NULL);
nfs3_call_state_wipe (cs);
/* Ret must be 0 after this so that the caller does not
@@ -1061,7 +1527,7 @@ nfs3err:
*/
ret = 0;
}
-
+out:
return ret;
}
@@ -1074,19 +1540,21 @@ nfs3svc_lookup (rpcsvc_request_t *req)
lookup3args args;
int ret = RPCSVC_ACTOR_ERROR;
- if (!req)
- return ret;
+ GF_VALIDATE_OR_GOTO (GF_NFS, req, rpcerr);
nfs3_prep_lookup3args (&args, &fh, name);
- if (xdr_to_lookup3args (req->msg, &args) <= 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
+ if (xdr_to_lookup3args (req->msg[0], &args) <= 0) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_ARGS_DECODE_ERROR,
+ "Error decoding args");
rpcsvc_request_seterr (req, GARBAGE_ARGS);
goto rpcerr;
}
ret = nfs3_lookup (req, &fh, args.what.dir.data.data_len, name);
- if (ret < 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "LOOKUP procedure failed");
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, -ret,
+ NFS_MSG_LOOKUP_PROC_FAIL,
+ "LOOKUP procedure failed");
rpcsvc_request_seterr (req, SYSTEM_ERR);
ret = RPCSVC_ACTOR_ERROR;
}
@@ -1097,16 +1565,12 @@ rpcerr:
int
-nfs3_access_reply (rpcsvc_request_t *req, nfsstat3 status, struct iatt *buf,
- uint32_t accbits)
+nfs3_access_reply (rpcsvc_request_t *req, nfsstat3 status, int32_t accbits,
+ int32_t reqaccbits)
{
access3res res;
- uint16_t xlid = 0;
- xlid = nfs3_request_xlator_id (req);
- nfs3_fill_access3res (&res, status, buf, accbits,
- rpcsvc_request_uid (req), rpcsvc_request_gid (req)
- , xlid);
+ nfs3_fill_access3res (&res, status, accbits, reqaccbits);
nfs3svc_submit_reply (req, &res,
(nfs3_serializer)xdr_serialize_access3res);
return 0;
@@ -1115,19 +1579,21 @@ nfs3_access_reply (rpcsvc_request_t *req, nfsstat3 status, struct iatt *buf,
int32_t
nfs3svc_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
nfsstat3 status = NFS3_OK;
nfs3_call_state_t *cs = NULL;
cs = frame->local;
- if (op_ret == -1)
- status = nfs3_errno_to_nfsstat3 (op_errno);
+ if (op_ret == -1) {
+ status = nfs3_cbk_errno_status (op_ret, op_errno);
+ }
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "ACCESS", status,
- op_errno);
- nfs3_access_reply (cs->req, status, buf, cs->accessbits);
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_ACCESS, status, op_errno,
+ cs->resolvedloc.path);
+ nfs3_access_reply (cs->req, status, op_errno, cs->accessbits);
nfs3_call_state_wipe (cs);
return 0;
@@ -1141,23 +1607,39 @@ nfs3_access_resume (void *carg)
nfs_user_t nfu = {0, };
nfs3_call_state_t *cs = NULL;
- if (!carg)
- return ret;
+ if (!carg) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, EINVAL, NFS_MSG_INVALID_ENTRY,
+ "Invalid argument, carg value NULL");
+ return EINVAL;
+ }
cs = (nfs3_call_state_t *)carg;
+
+ /* Additional checks on the NFS file handle
+ * go here. The path for an NFS ACCESS call
+ * goes like this:
+ * nfs3_access -> nfs3_fh_resolve_and_resume -> nfs3_resolve_resume ->
+ * nfs3_access_resume -> <macro/function performs check on FH> ->
+ * <continue or return from function based on check.> ('goto nfs3err'
+ * terminates this function and writes the appropriate response to the
+ * client). It is important that you do NOT stick any sort of check
+ * on the file handle outside of the nfs3_##OP_resume functions.
+ */
+ nfs3_check_fh_auth_status (cs, stat, _gf_false, nfs3err);
nfs3_check_fh_resolve_status (cs, stat, nfs3err);
cs->fh = cs->resolvefh;
nfs_request_user_init (&nfu, cs->req);
- ret = nfs_stat (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
- nfs3svc_access_cbk, cs);
+ ret = nfs_access (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ cs->accessbits, nfs3svc_access_cbk, cs);
if (ret < 0)
stat = nfs3_errno_to_nfsstat3 (-ret);
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "ACCESS",
- stat, -ret);
- nfs3_access_reply (cs->req, stat, NULL, 0);
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_ACCESS, stat, -ret,
+ cs->resolvedloc.path);
+ nfs3_access_reply (cs->req, stat, 0, 0);
nfs3_call_state_wipe (cs);
ret = 0;
}
@@ -1175,13 +1657,13 @@ nfs3_access (rpcsvc_request_t *req, struct nfs3_fh *fh, uint32_t accbits)
int ret = -EFAULT;
nfs3_call_state_t *cs = NULL;
- if ((!req) || (!fh))
- return -1;
-
+ GF_VALIDATE_OR_GOTO (GF_NFS, req, out);
+ GF_VALIDATE_OR_GOTO (GF_NFS, fh, out);
nfs3_log_common_call (rpcsvc_request_xid (req), "ACCESS", fh);
nfs3_validate_gluster_fh (fh, stat, nfs3err);
nfs3_validate_nfs3_state (req, nfs3, stat, nfs3err, ret);
nfs3_map_fh_to_volume (nfs3, fh, req, vol, stat, nfs3err);
+ nfs3_volume_started_check (nfs3, vol, ret, out);
nfs3_handle_call_state_init (nfs3, cs, req, vol, stat, nfs3err);
cs->accessbits = accbits;
@@ -1191,13 +1673,14 @@ nfs3_access (rpcsvc_request_t *req, struct nfs3_fh *fh, uint32_t accbits)
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (req), "ACCESS", stat,
- -ret);
- nfs3_access_reply (req, stat, NULL, 0);
+ nfs3_log_common_res (rpcsvc_request_xid (req),
+ NFS3_ACCESS, stat, -ret,
+ cs ? cs->resolvedloc.path : NULL);
+ nfs3_access_reply (req, stat, 0, 0);
nfs3_call_state_wipe (cs);
ret = 0;
}
-
+out:
return ret;
}
@@ -1213,15 +1696,18 @@ nfs3svc_access (rpcsvc_request_t *req)
return ret;
nfs3_prep_access3args (&args, &fh);
- if (xdr_to_access3args (req->msg, &args) <= 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
+ if (xdr_to_access3args (req->msg[0], &args) <= 0) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_ARGS_DECODE_ERROR,
+ "Error decoding args");
rpcsvc_request_seterr (req, GARBAGE_ARGS);
goto rpcerr;
}
ret = nfs3_access (req, &fh, args.access);
- if (ret < 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "ACCESS procedure failed");
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, -ret,
+ NFS_MSG_ACCESS_PROC_FAIL,
+ "ACCESS procedure failed");
rpcsvc_request_seterr (req, SYSTEM_ERR);
ret = RPCSVC_ACTOR_ERROR;
}
@@ -1236,10 +1722,10 @@ nfs3_readlink_reply (rpcsvc_request_t *req, nfsstat3 stat, char *path,
struct iatt *buf)
{
readlink3res res = {0, };
- uint16_t xlid = 0;
+ uint64_t deviceid = 0;
- xlid = nfs3_request_xlator_id (req);
- nfs3_fill_readlink3res (&res, stat, path, buf, xlid);
+ deviceid = nfs3_request_xlator_deviceid (req);
+ nfs3_fill_readlink3res (&res, stat, path, buf, deviceid);
nfs3svc_submit_reply (req, (void *)&res,
(nfs3_serializer)xdr_serialize_readlink3res);
@@ -1250,22 +1736,23 @@ nfs3_readlink_reply (rpcsvc_request_t *req, nfsstat3 stat, char *path,
int32_t
nfs3svc_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, const char *path,
- struct iatt *buf)
+ struct iatt *buf, dict_t *xdata)
{
nfsstat3 stat = NFS3ERR_SERVERFAULT;
nfs3_call_state_t *cs = NULL;
cs = frame->local;
if (op_ret == -1) {
- stat = nfs3_errno_to_nfsstat3 (op_errno);
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
goto nfs3err;
}
stat = NFS3_OK;
nfs3err:
- nfs3_log_readlink_res (rpcsvc_request_xid (cs->req), stat, op_errno,
- (char *)path);
+ nfs3_log_readlink_res (rpcsvc_request_xid (cs->req),
+ stat, op_errno, (char *)path,
+ cs->resolvedloc.path);
nfs3_readlink_reply (cs->req, stat, (char *)path, buf);
nfs3_call_state_wipe (cs);
@@ -1285,6 +1772,7 @@ nfs3_readlink_resume (void *carg)
return ret;
cs = (nfs3_call_state_t *)carg;
+ nfs3_check_fh_auth_status (cs, stat, _gf_false, nfs3err);
nfs3_check_fh_resolve_status (cs, stat, nfs3err);
nfs_request_user_init (&nfu, cs->req);
ret = nfs_readlink (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
@@ -1294,8 +1782,9 @@ nfs3_readlink_resume (void *carg)
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "READLINK",
- stat, -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_READLINK, stat, -ret,
+ cs->resolvedloc.path);
nfs3_readlink_reply (cs->req, stat, NULL, NULL);
nfs3_call_state_wipe (cs);
}
@@ -1314,7 +1803,8 @@ nfs3_readlink (rpcsvc_request_t *req, struct nfs3_fh *fh)
nfs3_call_state_t *cs = NULL;
if ((!req) || (!fh)) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Bad arguments");
+ gf_msg (GF_NFS3, GF_LOG_ERROR, EINVAL, NFS_MSG_INVALID_ENTRY,
+ "Bad arguments");
return -1;
}
@@ -1322,6 +1812,7 @@ nfs3_readlink (rpcsvc_request_t *req, struct nfs3_fh *fh)
nfs3_validate_gluster_fh (fh, stat, nfs3err);
nfs3_validate_nfs3_state (req, nfs3, stat, nfs3err, ret);
nfs3_map_fh_to_volume (nfs3, fh, req, vol, stat, nfs3err);
+ nfs3_volume_started_check (nfs3, vol, ret, out);
nfs3_handle_call_state_init (nfs3, cs, req, vol, stat, nfs3err);
ret = nfs3_fh_resolve_and_resume (cs, fh, NULL, nfs3_readlink_resume);
@@ -1330,8 +1821,9 @@ nfs3_readlink (rpcsvc_request_t *req, struct nfs3_fh *fh)
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (req), "READLINK", stat,
- -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (req),
+ NFS3_READLINK, stat, -ret,
+ cs ? cs->resolvedloc.path : NULL);
nfs3_readlink_reply (req, stat, NULL, NULL);
nfs3_call_state_wipe (cs);
/* Ret must be 0 after this so that the caller does not
@@ -1339,7 +1831,7 @@ nfs3err:
*/
ret = 0;
}
-
+out:
return ret;
}
@@ -1355,15 +1847,18 @@ nfs3svc_readlink (rpcsvc_request_t *req)
return ret;
nfs3_prep_readlink3args (&args, &fh);
- if (xdr_to_readlink3args (req->msg, &args) <= 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
+ if (xdr_to_readlink3args (req->msg[0], &args) <= 0) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_ARGS_DECODE_ERROR,
+ "Error decoding args");
rpcsvc_request_seterr (req, GARBAGE_ARGS);
goto rpcerr;
}
ret = nfs3_readlink (req, &fh);
- if (ret < 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "READLINK procedure failed");
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, -ret,
+ NFS_MSG_READLINK_PROC_FAIL,
+ "READLINK procedure failed");
rpcsvc_request_seterr (req, SYSTEM_ERR);
ret = RPCSVC_ACTOR_ERROR;
}
@@ -1379,22 +1874,24 @@ nfs3_read_reply (rpcsvc_request_t *req, nfsstat3 stat, count3 count,
struct iatt *poststat, int is_eof)
{
read3res res = {0, };
- uint16_t xlid = 0;
+ uint64_t deviceid = 0;
- xlid = nfs3_request_xlator_id (req);
- nfs3_fill_read3res (&res, stat, count, poststat, is_eof, xlid);
+ deviceid = nfs3_request_xlator_deviceid (req);
+ nfs3_fill_read3res (&res, stat, count, poststat, is_eof, deviceid);
if (stat == NFS3_OK) {
xdr_vector_round_up (vec, vcount, count);
/* iob can be zero if the file size was zero. If so, op_ret
* would be 0 and count = 0.
*/
+
if (count != 0) {
nfs3svc_submit_vector_reply (req, (void *)&res,
(nfs3_serializer)
xdr_serialize_read3res_nocopy,
vec, vcount, iobref);
} else
- nfs3svc_submit_reply (req, (void *)&res,
+
+ nfs3svc_submit_reply (req, (void *)&res,
(nfs3_serializer)
xdr_serialize_read3res_nocopy);
} else
@@ -1409,7 +1906,8 @@ nfs3_read_reply (rpcsvc_request_t *req, nfsstat3 stat, count3 count,
int32_t
nfs3svc_read_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *stbuf, struct iobref *iobref)
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
{
nfsstat3 stat = NFS3ERR_SERVERFAULT;
int is_eof = 0;
@@ -1417,7 +1915,7 @@ nfs3svc_read_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
cs = frame->local;
if (op_ret == -1) {
- stat = nfs3_errno_to_nfsstat3 (op_errno);
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
goto err;
} else
stat = NFS3_OK;
@@ -1426,8 +1924,10 @@ nfs3svc_read_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
is_eof = 1;
err:
- nfs3_log_read_res (rpcsvc_request_xid (cs->req), stat, op_errno,
- op_ret, is_eof, vector, count);
+ nfs3_log_read_res (rpcsvc_request_xid (cs->req),
+ stat, op_errno,
+ op_ret, is_eof, vector, count,
+ cs->resolvedloc.path);
nfs3_read_reply (cs->req, stat, op_ret, vector, count, iobref, stbuf,
is_eof);
nfs3_call_state_wipe (cs);
@@ -1456,8 +1956,9 @@ nfs3_read_fd_resume (void *carg)
stat = nfs3_errno_to_nfsstat3 (-ret);
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "READ", stat,
- -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_READ, stat, -ret,
+ cs->resolvedloc.path);
nfs3_read_reply (cs->req, stat, 0, NULL, 0, NULL, NULL, 0);
nfs3_call_state_wipe (cs);
}
@@ -1472,20 +1973,29 @@ nfs3_read_resume (void *carg)
nfsstat3 stat = NFS3ERR_SERVERFAULT;
int ret = -EFAULT;
nfs3_call_state_t *cs = NULL;
+ fd_t *fd = NULL;
if (!carg)
return ret;
cs = (nfs3_call_state_t *)carg;
+ nfs3_check_fh_auth_status (cs, stat, _gf_false, nfs3err);
nfs3_check_fh_resolve_status (cs, stat, nfs3err);
- ret = nfs3_file_open_and_resume (cs, nfs3_read_fd_resume);
- if (ret < 0)
- stat = nfs3_errno_to_nfsstat3 (-ret);
+ fd = fd_anonymous (cs->resolvedloc.inode);
+ if (!fd) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_ANONYMOUS_FD_FAIL,
+ "Failed to create anonymous fd");
+ goto nfs3err;
+ }
+ cs->fd = fd;
+ nfs3_read_fd_resume (cs);
+ ret = 0;
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "READ", stat,
- -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_READ, stat, -ret,
+ cs->resolvedloc.path);
nfs3_read_reply (cs->req, stat, 0, NULL,0, NULL, NULL, 0);
nfs3_call_state_wipe (cs);
}
@@ -1504,15 +2014,17 @@ nfs3_read (rpcsvc_request_t *req, struct nfs3_fh *fh, offset3 offset,
nfs3_call_state_t *cs = NULL;
if ((!req) || (!fh)) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Bad arguments");
+ gf_msg (GF_NFS3, GF_LOG_ERROR, EINVAL, NFS_MSG_INVALID_ENTRY,
+ "Bad arguments");
return -1;
}
- nfs3_log_rw_call (rpcsvc_request_xid (req), "READ", fh, offset, count,
- -1);
+ nfs3_log_rw_call (rpcsvc_request_xid (req), "READ", fh, offset,
+ count, -1);
nfs3_validate_gluster_fh (fh, stat, nfs3err);
nfs3_validate_nfs3_state (req, nfs3, stat, nfs3err, ret);
nfs3_map_fh_to_volume (nfs3, fh, req, vol, stat, nfs3err);
+ nfs3_volume_started_check (nfs3, vol, ret, out);
nfs3_handle_call_state_init (nfs3, cs, req, vol, stat, nfs3err);
cs->datacount = count;
@@ -1523,13 +2035,14 @@ nfs3_read (rpcsvc_request_t *req, struct nfs3_fh *fh, offset3 offset,
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (req), "READ", stat,
- -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (req),
+ NFS3_READ, stat, -ret,
+ cs ? cs->resolvedloc.path : NULL);
nfs3_read_reply (req, stat, 0, NULL,0, NULL, NULL, 0);
nfs3_call_state_wipe (cs);
ret = 0;
}
-
+out:
return ret;
}
@@ -1545,15 +2058,18 @@ nfs3svc_read (rpcsvc_request_t *req)
return ret;
nfs3_prep_read3args (&args, &fh);
- if (xdr_to_read3args (req->msg, &args) <= 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
+ if (xdr_to_read3args (req->msg[0], &args) <= 0) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_ARGS_DECODE_ERROR,
+ "Error decoding args");
rpcsvc_request_seterr (req, GARBAGE_ARGS);
goto rpcerr;
}
ret = nfs3_read (req, &fh, args.offset, args.count);
- if (ret < 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "READ procedure failed");
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, -ret,
+ NFS_MSG_READ_FAIL,
+ "READ procedure failed");
rpcsvc_request_seterr (req, SYSTEM_ERR);
ret = RPCSVC_ACTOR_ERROR;
}
@@ -1569,11 +2085,11 @@ nfs3_write_reply (rpcsvc_request_t *req, nfsstat3 stat, count3 count,
struct iatt *poststat)
{
write3res res = {0, };
- uint16_t xlid = 0;
+ uint64_t deviceid = 0;
- xlid = nfs3_request_xlator_id (req);
+ deviceid = nfs3_request_xlator_deviceid (req);
nfs3_fill_write3res (&res, stat, count, stable, wverf, prestat,
- poststat, xlid);
+ poststat, deviceid);
nfs3svc_submit_reply (req, (void *)&res,
(nfs3_serializer)xdr_serialize_write3res);
@@ -1583,7 +2099,7 @@ nfs3_write_reply (rpcsvc_request_t *req, nfsstat3 stat, count3 count,
int32_t
nfs3svc_write_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
struct nfs3_state *nfs3 = NULL;
nfsstat3 stat = NFS3ERR_SERVERFAULT;
@@ -1592,13 +2108,15 @@ nfs3svc_write_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
cs = frame->local;
nfs3 = rpcsvc_request_program_private (cs->req);
- if (op_ret == -1)
- stat = nfs3_errno_to_nfsstat3 (op_errno);
- else
+ if (op_ret == -1) {
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
+ } else
stat = NFS3_OK;
- nfs3_log_write_res (rpcsvc_request_xid (cs->req), stat, op_errno,
- cs->maxcount, cs->writetype, nfs3->serverstart);
+ nfs3_log_write_res (rpcsvc_request_xid (cs->req),
+ stat, op_errno,
+ cs->maxcount, cs->writetype, nfs3->serverstart,
+ cs->resolvedloc.path);
nfs3_write_reply (cs->req, stat, cs->maxcount, cs->writetype,
nfs3->serverstart, &cs->stbuf, postbuf);
nfs3_call_state_wipe (cs);
@@ -1606,41 +2124,6 @@ nfs3svc_write_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
-/*
- * If this logic determines that the write should return a reply to the client
- * after this function, the return value is -1 and the writetype is reset to
- * the type of write we want to signify to the client.
- *
- * In case the write should continue to serve the request according to the type
- * of stable write, a 0 is returned and writetype is left as it is.
- */
-int
-nfs3_write_how (int *writetype, int write_trusted, int sync_trusted)
-{
- int ret = -1;
-
- if (*writetype == UNSTABLE) {
- /* On an UNSTABLE write, only return STABLE when trusted-write
- * is set. TW is also set when trusted-sync is set.
- */
- if (write_trusted)
- *writetype = FILE_SYNC;
-
- goto err;
- } else if ((*writetype == DATA_SYNC) || (*writetype == FILE_SYNC)) {
-
- /* On a STABLE write, if sync-trusted is on, only then, return
- * without syncing.
- */
- if (sync_trusted)
- goto err;
- }
-
- ret = 0;
-err:
- return ret;
-}
-
/*
* Before going into the write reply logic, here is a matrix that shows the
@@ -1676,55 +2159,30 @@ err:
int32_t
nfs3svc_write_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
nfsstat3 stat = NFS3ERR_SERVERFAULT;
- int ret = -EFAULT;
- nfs_user_t nfu = {0, };
nfs3_call_state_t *cs = NULL;
struct nfs3_state *nfs3 = NULL;
- int write_trusted = 0;
- int sync_trusted = 0;
cs = frame->local;
nfs3 = rpcsvc_request_program_private (cs->req);
if (op_ret == -1) {
- stat = nfs3_errno_to_nfsstat3 (op_errno);
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
goto err;
}
stat = NFS3_OK;
cs->maxcount = op_ret;
- write_trusted = nfs3_export_write_trusted (cs->nfs3state,
- cs->resolvefh.xlatorid);
- sync_trusted = nfs3_export_sync_trusted (cs->nfs3state,
- cs->resolvefh.xlatorid);
- ret = nfs3_write_how (&cs->writetype, write_trusted, sync_trusted);
- if (ret == -1)
- goto err;
-
- nfs_request_user_init (&nfu, cs->req);
- /* Store the current preattr so that this can be used as the pre attr
- * when fsync returns. We dont want to use the preattr in fsync because
- * the write fop happened before the fsync.
- */
- cs->stbuf = *prebuf;
- ret = nfs_fsync (cs->nfsx, cs->vol, &nfu, cs->fd, 0,
- nfs3svc_write_fsync_cbk, cs);
- if (ret < 0)
- stat = nfs3_errno_to_nfsstat3 (-ret);
-
err:
- if (ret < 0) {
- nfs3_log_write_res (rpcsvc_request_xid (cs->req), stat,
- op_errno, cs->maxcount, cs->writetype,
- nfs3->serverstart);
- nfs3_write_reply (cs->req, stat, cs->maxcount,
- cs->writetype, nfs3->serverstart, prebuf,
- postbuf);
- nfs3_call_state_wipe (cs);
- }
+ nfs3_log_write_res (rpcsvc_request_xid (cs->req),
+ stat, op_errno, cs->maxcount, cs->writetype,
+ nfs3->serverstart, cs->resolvedloc.path);
+ nfs3_write_reply (cs->req, stat, cs->maxcount,
+ cs->writetype, nfs3->serverstart, prebuf,
+ postbuf);
+ nfs3_call_state_wipe (cs);
return 0;
}
@@ -1752,8 +2210,9 @@ __nfs3_write_resume (nfs3_call_state_t *cs)
* opaque data buffers to multiples of 4 bytes.
*/
cs->datavec.iov_len = cs->datacount;
- ret = nfs_write (cs->nfsx, cs->vol, &nfu, cs->fd, cs->iob, &cs->datavec,
- 1, cs->dataoffset, nfs3svc_write_cbk, cs);
+ ret = nfs_write (cs->nfsx, cs->vol, &nfu, cs->fd, cs->iobref,
+ &cs->datavec, 1, cs->dataoffset, nfs3svc_write_cbk,
+ cs);
return ret;
}
@@ -1765,47 +2224,31 @@ nfs3_write_resume (void *carg)
nfsstat3 stat = NFS3ERR_SERVERFAULT;
int ret = -EFAULT;
nfs3_call_state_t *cs = NULL;
+ fd_t *fd = NULL;
if (!carg)
return ret;
cs = (nfs3_call_state_t *)carg;
+ nfs3_check_fh_auth_status (cs, stat, _gf_true, nfs3err);
nfs3_check_fh_resolve_status (cs, stat, nfs3err);
-
- ret = __nfs3_write_resume (cs);
- if (ret < 0)
- stat = nfs3_errno_to_nfsstat3 (-ret);
-nfs3err:
- if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "WRITE",
- stat, -ret);
- nfs3_write_reply (cs->req, stat, 0, cs->writetype, 0, NULL,
- NULL);
- nfs3_call_state_wipe (cs);
+ fd = fd_anonymous (cs->resolvedloc.inode);
+ if (!fd) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_ANONYMOUS_FD_FAIL,
+ "Failed to create anonymous fd");
+ goto nfs3err;
}
- return ret;
-}
-
-
-int
-nfs3_write_open_resume (void *carg)
-{
- nfsstat3 stat = NFS3ERR_SERVERFAULT;
- int ret = -EFAULT;
- nfs3_call_state_t *cs = NULL;
- if (!carg)
- return ret;
+ cs->fd = fd; /* Gets unrefd when the call state is wiped. */
- cs = (nfs3_call_state_t *)carg;
- nfs3_check_fh_resolve_status (cs, stat, nfs3err);
- ret = nfs3_file_open_and_resume (cs, nfs3_write_resume);
+ ret = __nfs3_write_resume (cs);
if (ret < 0)
stat = nfs3_errno_to_nfsstat3 (-ret);
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "WRITE",
- stat, -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_WRITE, stat, -ret,
+ cs->resolvedloc.path);
nfs3_write_reply (cs->req, stat, 0, cs->writetype, 0, NULL,
NULL);
nfs3_call_state_wipe (cs);
@@ -1814,11 +2257,10 @@ nfs3err:
}
-
int
nfs3_write (rpcsvc_request_t *req, struct nfs3_fh *fh, offset3 offset,
count3 count, stable_how stable, struct iovec payload,
- struct iobuf *iob)
+ struct iobref *iobref)
{
xlator_t *vol = NULL;
nfsstat3 stat = NFS3ERR_SERVERFAULT;
@@ -1827,37 +2269,39 @@ nfs3_write (rpcsvc_request_t *req, struct nfs3_fh *fh, offset3 offset,
nfs3_call_state_t *cs = NULL;
if ((!req) || (!fh) || (!payload.iov_base)) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Bad arguments");
+ gf_msg (GF_NFS3, GF_LOG_ERROR, EINVAL, NFS_MSG_INVALID_ENTRY,
+ "Bad arguments");
return -1;
}
- nfs3_log_rw_call (rpcsvc_request_xid (req), "WRITE", fh, offset, count,
- stable);
+ nfs3_log_rw_call (rpcsvc_request_xid (req), "WRITE", fh, offset,
+ count, stable);
nfs3_validate_gluster_fh (fh, stat, nfs3err);
nfs3_validate_nfs3_state (req, nfs3, stat, nfs3err, ret);
nfs3_map_fh_to_volume (nfs3, fh, req, vol, stat, nfs3err);
- nfs3_check_rw_volaccess (nfs3, fh->xlatorid, stat, nfs3err);
+ nfs3_volume_started_check (nfs3, vol, ret, out);
+ nfs3_check_rw_volaccess (nfs3, fh->exportid, stat, nfs3err);
nfs3_handle_call_state_init (nfs3, cs, req, vol, stat, nfs3err);
cs->datacount = count;
cs->dataoffset = offset;
cs->writetype = stable;
- cs->iob = iob;
+ cs->iobref = iobref;
cs->datavec = payload;
-
- ret = nfs3_fh_resolve_and_resume (cs, fh, NULL, nfs3_write_open_resume);
+ ret = nfs3_fh_resolve_and_resume (cs, fh, NULL, nfs3_write_resume);
if (ret < 0)
stat = nfs3_errno_to_nfsstat3 (-ret);
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (req), "WRITE", stat,
- -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (req),
+ NFS3_WRITE, stat, -ret,
+ cs ? cs->resolvedloc.path : NULL);
nfs3_write_reply (req, stat, 0, stable, 0, NULL, NULL);
nfs3_call_state_wipe (cs);
ret = 0;
}
-
+out:
return ret;
}
@@ -1869,96 +2313,48 @@ nfs3err:
int
-nfs3svc_write_vecsizer (rpcsvc_request_t *req, ssize_t *readsize, int *newbuf)
+nfs3svc_write_vecsizer (int state, ssize_t *readsize, char *base_addr,
+ char *curr_addr)
{
- ssize_t ret = RPCSVC_ACTOR_ERROR;
- int state = 0;
- uint32_t fhlen = 0;
- uint32_t fhlen_n = 0;
- write3args *args = NULL;
-
- if (!req)
- return ret;
+ int ret = 0;
+ uint32_t fhlen = 0;
+ uint32_t fhlen_n = 0;
- state = (long)rpcsvc_request_private (req);
- *newbuf = 0;
if (state == 0) {
- rpcsvc_request_set_private (req, NFS3_VECWRITE_READFHLEN);
+ ret = NFS3_VECWRITE_READFHLEN;
*readsize = 4;
- ret = 0;
} else if (state == NFS3_VECWRITE_READFHLEN) {
- fhlen_n = *(uint32_t *)req->msg.iov_base;
+ fhlen_n = *(uint32_t *)(curr_addr - 4);
fhlen = ntohl (fhlen_n);
*readsize = xdr_length_round_up (fhlen, NFS3_FHSIZE);
- rpcsvc_request_set_private (req, NFS3_VECWRITE_READFH);
- ret = 0;
+ ret = NFS3_VECWRITE_READFH;
} else if (state == NFS3_VECWRITE_READFH) {
*readsize = NFS3_WRITE_POSTFH_SIZE;
- rpcsvc_request_set_private (req, NFS3_VECWRITE_READREST);
- ret = 0;
+ ret = NFS3_VECWRITE_READREST;
} else if (state == NFS3_VECWRITE_READREST) {
- args = GF_CALLOC (1, sizeof (*args), gf_nfs_mt_write3args);
- if (!args)
- goto rpcerr;
-
- if (xdr_to_write3args_nocopy (req->msg, args, NULL) <= 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
- rpcsvc_request_seterr (req, GARBAGE_ARGS);
- goto rpcerr;
- }
- rpcsvc_request_set_private (req, args);
- ret = xdr_length_round_up (args->data.data_len, 1048576);
- *readsize = ret;
- *newbuf = 1;
ret = 0;
- }
- ret = 0;
-
-rpcerr:
- return ret;
-}
-
-
-int
-nfs3svc_write_vec (rpcsvc_request_t *req, struct iobuf *iob)
-{
- write3args *args = NULL;
- int ret = RPCSVC_ACTOR_ERROR;
- struct iovec payload = {0, };
-
- if ((!req) || (!iob))
- return ret;
-
- args = rpcsvc_request_private (req);
- iobuf_to_iovec (iob, &payload);
- iobuf_ref (iob);
- ret = nfs3_write (req, (struct nfs3_fh *)args->file.data.data_val,
- args->offset, args->count, args->stable, payload,iob);
- xdr_free_write3args_nocopy (args);
- if (ret < 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "WRITE procedure failed");
- rpcsvc_request_seterr (req, SYSTEM_ERR);
- ret = RPCSVC_ACTOR_ERROR;
- }
+ *readsize = 0;
+ } else
+ gf_msg ("nfs", GF_LOG_ERROR, 0, NFS_MSG_STATE_WRONG,
+ "state wrong");
return ret;
}
-
int
nfs3svc_write (rpcsvc_request_t *req)
{
struct nfs3_fh fh = {{0}, };
write3args args;
int ret = RPCSVC_ACTOR_ERROR;
- struct iovec payload = {0, };
if (!req)
return ret;
nfs3_prep_write3args (&args, &fh);
- if (xdr_to_write3args_nocopy (req->msg, &args, &payload) <= 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
+ if (xdr_to_write3args (req->msg[0], &args) <= 0) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_ARGS_DECODE_ERROR,
+ "Error decoding args");
rpcsvc_request_seterr (req, GARBAGE_ARGS);
goto rpcerr;
}
@@ -1968,11 +2364,13 @@ nfs3svc_write (rpcsvc_request_t *req)
* ourselves because the RPC call handler who called us will unref its
* own ref of the record's iobuf when it is done handling the request.
*/
- rpcsvc_request_record_ref (req);
+
ret = nfs3_write (req, &fh, args.offset, args.count, args.stable,
- payload, rpcsvc_request_record_iob (req));
- if (ret < 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "WRITE procedure failed");
+ req->msg[1], rpcsvc_request_iobref_ref (req));
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, -ret,
+ NFS_MSG_WRITE_FAIL,
+ "WRITE procedure failed");
rpcsvc_request_seterr (req, SYSTEM_ERR);
ret = RPCSVC_ACTOR_ERROR;
}
@@ -1988,8 +2386,11 @@ nfs3_create_reply (rpcsvc_request_t *req, nfsstat3 stat, struct nfs3_fh *newfh,
struct iatt *postparent)
{
create3res res = {0, };
+ uint64_t deviceid = 0;
- nfs3_fill_create3res (&res, stat, newfh, newbuf, preparent, postparent);
+ deviceid = nfs3_request_xlator_deviceid (req);
+ nfs3_fill_create3res (&res, stat, newfh, newbuf, preparent, postparent,
+ deviceid);
nfs3svc_submit_reply (req, (void *)&res,
(nfs3_serializer)xdr_serialize_create3res);
return 0;
@@ -1999,21 +2400,22 @@ nfs3_create_reply (rpcsvc_request_t *req, nfsstat3 stat, struct nfs3_fh *newfh,
int32_t
nfs3svc_create_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
nfsstat3 stat = NFS3ERR_SERVERFAULT;
nfs3_call_state_t *cs = NULL;
cs = frame->local;
if (op_ret == -1) {
- stat = nfs3_errno_to_nfsstat3 (op_errno);
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
goto nfs3err;
}
stat = NFS3_OK;
nfs3err:
- nfs3_log_newfh_res (rpcsvc_request_xid (cs->req), "CREATE", stat,
- op_errno, &cs->fh);
+ nfs3_log_newfh_res (rpcsvc_request_xid (cs->req),
+ NFS3_CREATE, stat, op_errno,
+ &cs->fh, cs->resolvedloc.path);
nfs3_create_reply (cs->req, stat, &cs->fh, postop, &cs->preparent,
&cs->postparent);
nfs3_call_state_wipe (cs);
@@ -2026,20 +2428,23 @@ int32_t
nfs3svc_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
nfsstat3 stat = NFS3ERR_SERVERFAULT;
int ret = -EFAULT;
nfs_user_t nfu = {0, };
nfs3_call_state_t *cs = NULL;
+ inode_t *oldinode = NULL;
cs = frame->local;
if (op_ret == -1) {
- stat = nfs3_errno_to_nfsstat3 (op_errno);
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
goto nfs3err;
}
nfs3_fh_build_child_fh (&cs->parent, buf, &cs->fh);
+ oldinode = inode_link (inode, cs->resolvedloc.parent,
+ cs->resolvedloc.name, buf);
/* Means no attributes were required to be set. */
if (!cs->setattr_valid) {
@@ -2051,15 +2456,22 @@ nfs3svc_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
cs->preparent = *preparent;
cs->postparent = *postparent;
nfs_request_user_init (&nfu, cs->req);
+ gf_uuid_copy (cs->resolvedloc.gfid, inode->gfid);
ret = nfs_setattr (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,&cs->stbuf,
cs->setattr_valid, nfs3svc_create_setattr_cbk, cs);
if (ret < 0)
stat = nfs3_errno_to_nfsstat3 (-ret);
nfs3err:
+ if (oldinode) {
+ inode_lookup (oldinode);
+ inode_unref (oldinode);
+ }
+
if (ret < 0) {
- nfs3_log_newfh_res (rpcsvc_request_xid (cs->req), "CREATE",
- stat, op_errno, &cs->fh);
+ nfs3_log_newfh_res (rpcsvc_request_xid (cs->req),
+ NFS3_CREATE, stat, op_errno, &cs->fh,
+ cs->resolvedloc.path);
nfs3_create_reply (cs->req, stat, &cs->fh, buf, preparent,
postparent);
nfs3_call_state_wipe (cs);
@@ -2074,16 +2486,30 @@ nfs3_create_common (nfs3_call_state_t *cs)
int ret = -EFAULT;
int flags = 0;
nfs_user_t nfu = {0, };
+ uid_t uid = 0;
+ gid_t gid = 0;
if (!cs)
return ret;
- if ((cs->createmode == UNCHECKED) || (cs->createmode = EXCLUSIVE))
- flags = O_RDWR;
- else if (cs->createmode == GUARDED)
+ if (cs->createmode == GUARDED)
flags = (O_RDWR | O_EXCL);
+ else
+ flags = O_RDWR;
- nfs_request_user_init (&nfu, cs->req);
+ if (gf_attr_uid_set (cs->setattr_valid)) {
+ uid = cs->stbuf.ia_uid;
+ cs->setattr_valid &= ~GF_SET_ATTR_UID;
+ } else
+ uid = rpcsvc_request_uid (cs->req);
+
+ if (gf_attr_gid_set (cs->setattr_valid)) {
+ gid = cs->stbuf.ia_gid;
+ cs->setattr_valid &= ~GF_SET_ATTR_GID;
+ } else
+ gid = rpcsvc_request_gid (cs->req);
+
+ nfs_request_primary_user_init (&nfu, cs->req, uid, gid);
/* We can avoid sending the setattr call later if only the mode is
* required to be set. This is possible because the create fop allows
* us to specify a mode arg.
@@ -2103,7 +2529,8 @@ nfs3_create_common (nfs3_call_state_t *cs)
int32_t
nfs3svc_create_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
int ret = -EFAULT;
nfsstat3 stat = NFS3ERR_SERVERFAULT;
@@ -2114,20 +2541,32 @@ nfs3svc_create_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
nfs_request_user_init (&nfu, cs->req);
if (op_ret == -1) {
ret = -op_errno;
- stat = nfs3_errno_to_nfsstat3 (op_errno);
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
goto nfs3err;
}
- if (cs->preparent.ia_mtime == buf->ia_mtime)
+ if ((cs->stbuf.ia_mtime == buf->ia_mtime) &&
+ (cs->stbuf.ia_atime == buf->ia_atime)) {
+ gf_msg_debug (GF_NFS3, 0,
+ "Create req retransmitted verf %x %x",
+ cs->stbuf.ia_mtime, cs->stbuf.ia_atime);
stat = NFS3_OK;
- else
+ nfs3_fh_build_child_fh (&cs->parent, buf, &cs->fh);
+ } else {
+ gf_msg_debug (GF_NFS3, 0,
+ "File already exist new_verf %x %x"
+ "old_verf %x %x", cs->stbuf.ia_mtime,
+ cs->stbuf.ia_atime,
+ buf->ia_mtime, buf->ia_atime);
stat = NFS3ERR_EXIST;
+ }
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "CREATE",
- stat, op_errno);
- nfs3_create_reply (cs->req, stat, NULL, NULL, NULL, NULL);
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_CREATE, stat, op_errno,
+ cs->resolvedloc.path);
+ nfs3_create_reply (cs->req, stat, &cs->fh, buf, NULL, NULL);
nfs3_call_state_wipe (cs);
}
@@ -2144,9 +2583,15 @@ nfs3_create_exclusive (nfs3_call_state_t *cs)
if (!cs)
return ret;
- /* HACK warning. */
- cs->preparent.ia_mtime = cs->cookieverf;
- cs->preparent.ia_atime = 9669;
+ /* Storing verifier as a mtime and atime attribute, to store it
+ * in stable storage */
+ memcpy (&cs->stbuf.ia_atime, &cs->cookieverf,
+ sizeof (cs->stbuf.ia_atime));
+ memcpy (&cs->stbuf.ia_mtime,
+ ((char *) &cs->cookieverf) + sizeof (cs->stbuf.ia_atime),
+ sizeof (cs->stbuf.ia_mtime));
+ cs->setattr_valid |= GF_SET_ATTR_ATIME;
+ cs->setattr_valid |= GF_SET_ATTR_MTIME;
nfs_request_user_init (&nfu, cs->req);
/* If the file already existed we need to get that attributes so we can
@@ -2160,15 +2605,7 @@ nfs3_create_exclusive (nfs3_call_state_t *cs)
goto nfs3err;
}
- if (cs->setattr_valid & GF_SET_ATTR_MODE) {
- cs->setattr_valid &= ~GF_SET_ATTR_MODE;
- ret = nfs_create (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
- O_RDWR, cs->mode, nfs3svc_create_cbk, cs);
- } else
- ret = nfs_create (cs->nfsx, cs->vol, &nfu, &cs->oploc, O_RDWR,
- NFS_DEFAULT_CREATE_MODE, nfs3svc_create_cbk,
- cs);
-
+ ret = nfs3_create_common (cs);
nfs3err:
return ret;
}
@@ -2185,6 +2622,7 @@ nfs3_create_resume (void *carg)
return ret;
cs = (nfs3_call_state_t *)carg;
+ nfs3_check_fh_auth_status (cs, stat, _gf_true, nfs3err);
nfs3_check_new_fh_resolve_status (cs, stat, nfs3err);
if (cs->createmode == EXCLUSIVE)
ret = nfs3_create_exclusive (cs);
@@ -2197,8 +2635,9 @@ nfs3_create_resume (void *carg)
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "CREATE",
- stat, -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_CREATE, stat, -ret,
+ cs->resolvedloc.path);
nfs3_create_reply (cs->req, stat, NULL, NULL, NULL, NULL);
nfs3_call_state_wipe (cs);
}
@@ -2224,12 +2663,17 @@ nfs3_create (rpcsvc_request_t *req, struct nfs3_fh *dirfh, char *name,
nfs3_validate_nfs3_state (req, nfs3, stat, nfs3err, ret);
nfs3_validate_strlen_or_goto (name, NFS_NAME_MAX, nfs3err, stat, ret);
nfs3_map_fh_to_volume (nfs3, dirfh, req, vol, stat, nfs3err);
- nfs3_check_rw_volaccess (nfs3, dirfh->xlatorid, stat, nfs3err);
+ nfs3_volume_started_check (nfs3, vol, ret, out);
+ nfs3_check_rw_volaccess (nfs3, dirfh->exportid, stat, nfs3err);
nfs3_handle_call_state_init (nfs3, cs, req, vol, stat, nfs3err);
cs->cookieverf = cverf;
- cs->setattr_valid = nfs3_sattr3_to_setattr_valid (sattr, NULL,
- &cs->mode);
+ /*In Exclusive create client is supposed to send cverf instead of
+ * sattr*/
+ if (mode != EXCLUSIVE)
+ cs->setattr_valid = nfs3_sattr3_to_setattr_valid (sattr,
+ &cs->stbuf,
+ &cs->mode);
cs->createmode = mode;
cs->parent = *dirfh;
@@ -2239,13 +2683,14 @@ nfs3_create (rpcsvc_request_t *req, struct nfs3_fh *dirfh, char *name,
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (req), "CREATE", stat,
- -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (req),
+ NFS3_CREATE, stat, -ret,
+ cs ? cs->resolvedloc.path : NULL);
nfs3_create_reply (req, stat, NULL, NULL, NULL, NULL);
nfs3_call_state_wipe (cs);
ret = 0;
}
-
+out:
return ret;
}
@@ -2253,27 +2698,32 @@ nfs3err:
int
nfs3svc_create (rpcsvc_request_t *req)
{
- char name[NFS_PATH_MAX];
- struct nfs3_fh dirfh = {{0}, };
- create3args args;
- int ret = RPCSVC_ACTOR_ERROR;
- uint64_t cverf = 0;
+ char name[NFS_PATH_MAX];
+ struct nfs3_fh dirfh = {{0}, };
+ create3args args;
+ int ret = RPCSVC_ACTOR_ERROR;
+ uint64_t cverf = 0;
+ uint64_t *cval;
if (!req)
return ret;
nfs3_prep_create3args (&args, &dirfh, name);
- if (xdr_to_create3args (req->msg, &args) <= 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
+ if (xdr_to_create3args (req->msg[0], &args) <= 0) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_ARGS_DECODE_ERROR,
+ "Error decoding args");
rpcsvc_request_seterr (req, GARBAGE_ARGS);
goto rpcerr;
}
- cverf = *(uint64_t *)args.how.createhow3_u.verf;
+ cval = (uint64_t *)args.how.createhow3_u.verf;
+ cverf = *cval;
+
ret = nfs3_create (req, &dirfh, name, args.how.mode,
&args.how.createhow3_u.obj_attributes, cverf);
- if (ret < 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "CREATE procedure failed");
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, -ret, NFS_MSG_CREATE_FAIL,
+ "CREATE procedure failed");
rpcsvc_request_seterr (req, SYSTEM_ERR);
ret = RPCSVC_ACTOR_ERROR;
}
@@ -2289,8 +2739,11 @@ nfs3_mkdir_reply (rpcsvc_request_t *req, nfsstat3 stat, struct nfs3_fh *fh,
struct iatt *postparent)
{
mkdir3res res = {0, };
+ uint64_t deviceid = 0;
- nfs3_fill_mkdir3res (&res, stat, fh, buf, preparent, postparent);
+ deviceid = nfs3_request_xlator_deviceid (req);
+ nfs3_fill_mkdir3res (&res, stat, fh, buf, preparent, postparent,
+ deviceid);
nfs3svc_submit_reply (req, &res,
(nfs3_serializer)xdr_serialize_mkdir3res);
return 0;
@@ -2299,21 +2752,22 @@ nfs3_mkdir_reply (rpcsvc_request_t *req, nfsstat3 stat, struct nfs3_fh *fh,
int32_t
nfs3svc_mkdir_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
nfsstat3 stat = NFS3ERR_SERVERFAULT;
nfs3_call_state_t *cs = NULL;
cs = frame->local;
if (op_ret == -1) {
- stat = nfs3_errno_to_nfsstat3 (op_errno);
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
goto nfs3err;
}
stat = NFS3_OK;
nfs3err:
- nfs3_log_newfh_res (rpcsvc_request_xid (cs->req), "MKDIR", stat,
- op_errno, &cs->fh);
+ nfs3_log_newfh_res (rpcsvc_request_xid (cs->req),
+ NFS3_MKDIR, stat, op_errno, &cs->fh,
+ cs->resolvedloc.path);
nfs3_mkdir_reply (cs->req, stat, &cs->fh, postop, &cs->preparent,
&cs->postparent);
nfs3_call_state_wipe (cs);
@@ -2326,7 +2780,7 @@ int32_t
nfs3svc_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
nfsstat3 stat = NFS3ERR_SERVERFAULT;
int ret = -EFAULT;
@@ -2335,7 +2789,7 @@ nfs3svc_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
cs = frame->local;
if (op_ret == -1) {
- stat = nfs3_errno_to_nfsstat3 (op_errno);
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
goto nfs3err;
}
@@ -2357,8 +2811,9 @@ nfs3svc_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
nfs3err:
if (ret < 0) {
- nfs3_log_newfh_res (rpcsvc_request_xid (cs->req), "MKDIR", stat,
- op_errno, &cs->fh);
+ nfs3_log_newfh_res (rpcsvc_request_xid (cs->req),
+ NFS3_MKDIR, stat, op_errno, &cs->fh,
+ cs->resolvedloc.path);
nfs3_mkdir_reply (cs->req, stat, &cs->fh, buf, preparent,
postparent);
nfs3_call_state_wipe (cs);
@@ -2396,8 +2851,9 @@ nfs3_mkdir_resume (void *carg)
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "MKDIR",stat,
- -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_MKDIR, stat, -ret,
+ cs->resolvedloc.path);
nfs3_mkdir_reply (cs->req, stat, NULL, NULL, NULL, NULL);
nfs3_call_state_wipe (cs);
}
@@ -2418,20 +2874,23 @@ nfs3_mkdir (rpcsvc_request_t *req, struct nfs3_fh *dirfh, char *name,
nfs3_call_state_t *cs = NULL;
if ((!req) || (!dirfh) || (!name) || (!sattr)) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Bad arguments");
+ gf_msg (GF_NFS3, GF_LOG_ERROR, EINVAL, NFS_MSG_INVALID_ENTRY,
+ "Bad arguments");
return -1;
}
- nfs3_log_fh_entry_call (rpcsvc_request_xid (req), "MKDIR", dirfh, name);
+ nfs3_log_fh_entry_call (rpcsvc_request_xid (req), "MKDIR", dirfh,
+ name);
nfs3_validate_gluster_fh (dirfh, stat, nfs3err);
nfs3_validate_nfs3_state (req, nfs3, stat, nfs3err, ret);
nfs3_validate_strlen_or_goto (name, NFS_NAME_MAX, nfs3err, stat, ret);
nfs3_map_fh_to_volume (nfs3, dirfh, req, vol, stat, nfs3err);
- nfs3_check_rw_volaccess (nfs3, dirfh->xlatorid, stat, nfs3err);
+ nfs3_volume_started_check (nfs3, vol, ret, out);
+ nfs3_check_rw_volaccess (nfs3, dirfh->exportid, stat, nfs3err);
nfs3_handle_call_state_init (nfs3, cs, req, vol, stat, nfs3err);
cs->parent = *dirfh;
- cs->setattr_valid = nfs3_sattr3_to_setattr_valid (sattr, NULL,
+ cs->setattr_valid = nfs3_sattr3_to_setattr_valid (sattr, &cs->stbuf,
&cs->mode);
ret = nfs3_fh_resolve_and_resume (cs, dirfh, name, nfs3_mkdir_resume);
if (ret < 0)
@@ -2439,13 +2898,14 @@ nfs3_mkdir (rpcsvc_request_t *req, struct nfs3_fh *dirfh, char *name,
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (req), "MKDIR", stat,
- -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (req),
+ NFS3_MKDIR, stat, -ret,
+ cs ? cs->resolvedloc.path : NULL);
nfs3_mkdir_reply (req, stat, NULL, NULL, NULL, NULL);
nfs3_call_state_wipe (cs);
ret = 0;
}
-
+out:
return ret;
}
@@ -2461,15 +2921,17 @@ nfs3svc_mkdir (rpcsvc_request_t *req)
if (!req)
return ret;
nfs3_prep_mkdir3args (&args, &dirfh, name);
- if (xdr_to_mkdir3args (req->msg, &args) <= 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
+ if (xdr_to_mkdir3args (req->msg[0], &args) <= 0) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_ARGS_DECODE_ERROR,
+ "Error decoding args");
rpcsvc_request_seterr (req, GARBAGE_ARGS);
goto rpcerr;
}
ret = nfs3_mkdir (req, &dirfh, name, &args.attributes);
- if (ret < 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "MKDIR procedure failed");
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, -ret, NFS_MSG_DIR_OP_FAIL,
+ "MKDIR procedure failed");
rpcsvc_request_seterr (req, SYSTEM_ERR);
ret = RPCSVC_ACTOR_ERROR;
}
@@ -2485,8 +2947,11 @@ nfs3_symlink_reply (rpcsvc_request_t *req, nfsstat3 stat, struct nfs3_fh *fh,
struct iatt *postparent)
{
symlink3res res = {0, };
+ uint64_t deviceid = 0;
- nfs3_fill_symlink3res (&res, stat, fh, buf, preparent, postparent);
+ deviceid = nfs3_request_xlator_deviceid (req);
+ nfs3_fill_symlink3res (&res, stat, fh, buf, preparent, postparent,
+ deviceid);
nfs3svc_submit_reply (req, (void *)&res,
(nfs3_serializer)xdr_serialize_symlink3res);
@@ -2498,14 +2963,14 @@ int32_t
nfs3svc_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
nfsstat3 stat = NFS3ERR_SERVERFAULT;
nfs3_call_state_t *cs = NULL;
cs = frame->local;
if (op_ret == -1) {
- stat = nfs3_errno_to_nfsstat3 (op_errno);
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
goto nfs3err;
}
@@ -2513,8 +2978,9 @@ nfs3svc_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
stat = NFS3_OK;
nfs3err:
- nfs3_log_newfh_res (rpcsvc_request_xid (cs->req), "SYMLINK", stat,
- op_errno, &cs->fh);
+ nfs3_log_newfh_res (rpcsvc_request_xid (cs->req),
+ NFS3_SYMLINK, stat, op_errno, &cs->fh,
+ cs->resolvedloc.path);
nfs3_symlink_reply (cs->req, stat, &cs->fh, buf, preparent,
postparent);
nfs3_call_state_wipe (cs);
@@ -2543,8 +3009,9 @@ nfs3_symlink_resume (void *carg)
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "SYMLINK",
- stat, -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_SYMLINK, stat, -ret,
+ cs->resolvedloc.path);
nfs3_symlink_reply (cs->req, stat, NULL, NULL, NULL, NULL);
nfs3_call_state_wipe (cs);
}
@@ -2564,16 +3031,19 @@ nfs3_symlink (rpcsvc_request_t *req, struct nfs3_fh *dirfh, char *name,
nfs3_call_state_t *cs = NULL;
if ((!req) || (!dirfh) || (!name) || (!target) || (!sattr)) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Bad arguments");
+ gf_msg (GF_NFS3, GF_LOG_ERROR, EINVAL, NFS_MSG_INVALID_ENTRY,
+ "Bad arguments");
return -1;
}
- nfs3_log_symlink_call (rpcsvc_request_xid (req), dirfh, name, target);
+ nfs3_log_symlink_call (rpcsvc_request_xid (req), dirfh, name,
+ target);
nfs3_validate_gluster_fh (dirfh, stat, nfs3err);
nfs3_validate_nfs3_state (req, nfs3, stat, nfs3err, ret);
nfs3_validate_strlen_or_goto (name, NFS_NAME_MAX, nfs3err, stat, ret);
nfs3_map_fh_to_volume (nfs3, dirfh, req, vol, stat, nfs3err);
- nfs3_check_rw_volaccess (nfs3, dirfh->xlatorid, stat, nfs3err);
+ nfs3_volume_started_check (nfs3, vol, ret, out);
+ nfs3_check_rw_volaccess (nfs3, dirfh->exportid, stat, nfs3err);
nfs3_handle_call_state_init (nfs3, cs, req, vol, stat, nfs3err);
cs->parent = *dirfh;
@@ -2590,8 +3060,9 @@ nfs3_symlink (rpcsvc_request_t *req, struct nfs3_fh *dirfh, char *name,
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (req), "SYMLINK", stat,
- -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (req),
+ NFS3_SYMLINK, stat, -ret,
+ cs ? cs->resolvedloc.path : NULL);
nfs3_symlink_reply (req, stat, NULL, NULL, NULL, NULL);
nfs3_call_state_wipe (cs);
/* Ret must be 0 after this so that the caller does not
@@ -2599,7 +3070,7 @@ nfs3err:
*/
ret = 0;
}
-
+out:
return ret;
}
@@ -2616,16 +3087,18 @@ nfs3svc_symlink (rpcsvc_request_t *req)
if (!req)
return ret;
nfs3_prep_symlink3args (&args, &dirfh, name, target);
- if (xdr_to_symlink3args (req->msg, &args) <= 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
+ if (xdr_to_symlink3args (req->msg[0], &args) <= 0) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_ARGS_DECODE_ERROR,
+ "Error decoding args");
rpcsvc_request_seterr (req, GARBAGE_ARGS);
goto rpcerr;
}
ret = nfs3_symlink (req, &dirfh, name, target,
&args.symlink.symlink_attributes);
- if (ret < 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "SYMLINK procedure failed");
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, EXDEV, NFS_MSG_SYMLINK_FAIL,
+ "SYMLINK procedure failed");
rpcsvc_request_seterr (req, SYSTEM_ERR);
ret = RPCSVC_ACTOR_ERROR;
}
@@ -2635,13 +3108,17 @@ rpcerr:
}
-int
+static int
nfs3_mknod_reply (rpcsvc_request_t *req, nfsstat3 stat, struct nfs3_fh *fh,
struct iatt *buf, struct iatt *preparent,
struct iatt *postparent)
{
mknod3res res = {0, };
- nfs3_fill_mknod3res (&res, stat, fh, buf, preparent, postparent);
+ uint64_t deviceid = 0;
+
+ deviceid = nfs3_request_xlator_deviceid (req);
+ nfs3_fill_mknod3res (&res, stat, fh, buf, preparent, postparent,
+ deviceid);
nfs3svc_submit_reply (req, (void *)&res,
(nfs3_serializer)xdr_serialize_mknod3res);
@@ -2651,21 +3128,22 @@ nfs3_mknod_reply (rpcsvc_request_t *req, nfsstat3 stat, struct nfs3_fh *fh,
int32_t
nfs3svc_mknod_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
nfsstat3 stat = NFS3ERR_SERVERFAULT;
nfs3_call_state_t *cs = NULL;
cs = frame->local;
if (op_ret == -1) {
- stat = nfs3_errno_to_nfsstat3 (op_errno);
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
goto nfs3err;
}
stat = NFS3_OK;
nfs3err:
- nfs3_log_newfh_res (rpcsvc_request_xid (cs->req), "MKNOD", stat,
- op_errno, &cs->fh);
+ nfs3_log_newfh_res (rpcsvc_request_xid (cs->req),
+ NFS3_MKNOD, stat, op_errno, &cs->fh,
+ cs->resolvedloc.path);
nfs3_mknod_reply (cs->req, stat, &cs->fh, postop, &cs->preparent,
&cs->postparent);
nfs3_call_state_wipe (cs);
@@ -2678,7 +3156,7 @@ int32_t
nfs3svc_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
nfsstat3 stat = NFS3ERR_SERVERFAULT;
int ret = -1;
@@ -2687,7 +3165,7 @@ nfs3svc_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
cs = frame->local;
if (op_ret == -1) {
- stat = nfs3_errno_to_nfsstat3 (op_errno);
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
goto nfs3err;
}
@@ -2709,8 +3187,9 @@ nfs3svc_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
stat = nfs3_errno_to_nfsstat3 (-ret);
nfs3err:
if (ret < 0) {
- nfs3_log_newfh_res (rpcsvc_request_xid (cs->req), "MKNOD", stat,
- op_errno, &cs->fh);
+ nfs3_log_newfh_res (rpcsvc_request_xid (cs->req),
+ NFS3_MKNOD, stat, op_errno, &cs->fh,
+ cs->resolvedloc.path);
nfs3_mknod_reply (cs->req, stat, &cs->fh, buf, preparent,
postparent);
nfs3_call_state_wipe (cs);
@@ -2720,7 +3199,7 @@ nfs3err:
}
-int
+static int
nfs3_mknod_device (nfs3_call_state_t *cs)
{
int ret = -EFAULT;
@@ -2751,12 +3230,11 @@ nfs3_mknod_device (nfs3_call_state_t *cs)
}
-int
-nfs3_mknod_fifo (nfs3_call_state_t *cs)
+static int
+nfs3_mknod_fifo (nfs3_call_state_t *cs, mode_t mode)
{
int ret = -EFAULT;
nfs_user_t nfu = {0, };
- mode_t mode = S_IFIFO;
if (!cs)
return ret;
@@ -2775,7 +3253,7 @@ nfs3_mknod_fifo (nfs3_call_state_t *cs)
}
-int
+static int
nfs3_mknod_resume (void *carg)
{
nfsstat3 stat = NFS3ERR_SERVERFAULT;
@@ -2794,8 +3272,10 @@ nfs3_mknod_resume (void *carg)
ret = nfs3_mknod_device (cs);
break;
case NF3SOCK:
+ ret = nfs3_mknod_fifo (cs, S_IFSOCK);
+ break;
case NF3FIFO:
- ret = nfs3_mknod_fifo (cs);
+ ret = nfs3_mknod_fifo (cs, S_IFIFO);
break;
default:
ret = -EBADF;
@@ -2807,8 +3287,9 @@ nfs3_mknod_resume (void *carg)
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "MKNOD",stat,
- -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_MKNOD, stat, -ret,
+ cs->resolvedloc.path);
nfs3_mknod_reply (cs->req, stat, NULL, NULL, NULL, NULL);
nfs3_call_state_wipe (cs);
}
@@ -2830,16 +3311,19 @@ nfs3_mknod (rpcsvc_request_t *req, struct nfs3_fh *fh, char *name,
sattr3 *sattr = NULL;
if ((!req) || (!fh) || (!name) || (!nodedata)) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Bad arguments");
+ gf_msg (GF_NFS3, GF_LOG_ERROR, EINVAL, NFS_MSG_INVALID_ENTRY,
+ "Bad arguments");
return -1;
}
- nfs3_log_mknod_call (rpcsvc_request_xid (req), fh, name,nodedata->type);
+ nfs3_log_mknod_call (rpcsvc_request_xid (req), fh, name,
+ nodedata->type);
nfs3_validate_gluster_fh (fh, stat, nfs3err);
nfs3_validate_nfs3_state (req, nfs3, stat, nfs3err, ret);
nfs3_validate_strlen_or_goto (name, NFS_NAME_MAX, nfs3err, stat, ret);
nfs3_map_fh_to_volume (nfs3, fh, req, vol, stat, nfs3err);
- nfs3_check_rw_volaccess (nfs3, fh->xlatorid, stat, nfs3err);
+ nfs3_volume_started_check (nfs3, vol, ret, out);
+ nfs3_check_rw_volaccess (nfs3, fh->exportid, stat, nfs3err);
nfs3_handle_call_state_init (nfs3, cs, req, vol, stat, nfs3err);
cs->mknodtype = nodedata->type;
@@ -2848,13 +3332,15 @@ nfs3_mknod (rpcsvc_request_t *req, struct nfs3_fh *fh, char *name,
case NF3BLK:
cs->devnums = nodedata->mknoddata3_u.device.spec;
sattr = &nodedata->mknoddata3_u.device.dev_attributes;
- cs->setattr_valid = nfs3_sattr3_to_setattr_valid (sattr, NULL,
+ cs->setattr_valid = nfs3_sattr3_to_setattr_valid (sattr,
+ &cs->stbuf,
&cs->mode);
break;
case NF3SOCK:
case NF3FIFO:
sattr = &nodedata->mknoddata3_u.pipe_attributes;
- cs->setattr_valid = nfs3_sattr3_to_setattr_valid (sattr, NULL,
+ cs->setattr_valid = nfs3_sattr3_to_setattr_valid (sattr,
+ &cs->stbuf,
&cs->mode);
break;
default:
@@ -2869,8 +3355,9 @@ nfs3_mknod (rpcsvc_request_t *req, struct nfs3_fh *fh, char *name,
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (req), "MKNOD", stat,
- -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (req),
+ NFS3_MKNOD, stat, -ret,
+ cs ? cs->resolvedloc.path : NULL);
nfs3_mknod_reply (req, stat, NULL, NULL, NULL, NULL);
/* Ret must be 0 after this so that the caller does not
* also send an RPC reply.
@@ -2878,7 +3365,7 @@ nfs3err:
nfs3_call_state_wipe (cs);
ret = 0;
}
-
+out:
return ret;
}
@@ -2894,15 +3381,17 @@ nfs3svc_mknod (rpcsvc_request_t *req)
if (!req)
return ret;
nfs3_prep_mknod3args (&args, &fh, name);
- if (xdr_to_mknod3args (req->msg, &args) <= 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
+ if (xdr_to_mknod3args (req->msg[0], &args) <= 0) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_ARGS_DECODE_ERROR,
+ "Error decoding args");
rpcsvc_request_seterr (req, GARBAGE_ARGS);
goto rpcerr;
}
ret = nfs3_mknod (req, &fh, name, &args.what);
- if (ret < 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "MKNOD procedure failed");
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, -ret, NFS_MSG_MKNOD_FAIL,
+ "MKNOD procedure failed");
rpcsvc_request_seterr (req, SYSTEM_ERR);
ret = RPCSVC_ACTOR_ERROR;
}
@@ -2916,10 +3405,10 @@ nfs3_remove_reply (rpcsvc_request_t *req, nfsstat3 stat, struct iatt *preparent
, struct iatt *postparent)
{
remove3res res = {0, };
- uint16_t xlid = 0;
+ uint64_t deviceid = 0;
- xlid = nfs3_request_xlator_id (req);
- nfs3_fill_remove3res (&res, stat, preparent, postparent, xlid);
+ deviceid = nfs3_request_xlator_deviceid (req);
+ nfs3_fill_remove3res (&res, stat, preparent, postparent, deviceid);
nfs3svc_submit_reply (req, (void *)&res,
(nfs3_serializer)xdr_serialize_remove3res);
return 0;
@@ -2930,37 +3419,22 @@ nfs3_remove_reply (rpcsvc_request_t *req, nfsstat3 stat, struct iatt *preparent
int32_t
nfs3svc_remove_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
nfsstat3 stat = NFS3ERR_SERVERFAULT;
- fd_t *openfd = NULL;
nfs3_call_state_t *cs = NULL;
- struct nfs3_state *nfs3 = NULL;
cs = frame->local;
if (op_ret == -1) {
- stat = nfs3_errno_to_nfsstat3 (op_errno);
- goto do_not_unref_cached_fd;
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
}
- stat = NFS3_OK;
- /* Close any cached fds so that when any currently active write
- * finishes, the file is finally removed.
- */
- openfd = fd_lookup (cs->resolvedloc.inode, 0);
- nfs3 = rpcsvc_request_program_private (cs->req);
- if (openfd) {
- fd_unref (openfd);
- nfs3_fdcache_remove (nfs3, openfd);
- }
-
- /* This is the unref equivalent of the ref done when the inode was
- * created on a lookup or a create request.
- * The inode is finally unrefed in call state wipe.
- */
- inode_unref (cs->resolvedloc.inode);
-do_not_unref_cached_fd:
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "REMOVE", stat,
- op_errno);
+
+ if (op_ret == 0)
+ stat = NFS3_OK;
+
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_REMOVE, stat, op_errno,
+ cs->resolvedloc.path);
nfs3_remove_reply (cs->req, stat, preparent, postparent);
nfs3_call_state_wipe (cs);
@@ -3008,8 +3482,9 @@ nfs3_remove_resume (void *carg)
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "REMOVE",
- stat, -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_REMOVE, stat, -ret,
+ cs->resolvedloc.path);
nfs3_remove_reply (cs->req, stat, NULL, NULL);
nfs3_call_state_wipe (cs);
}
@@ -3028,16 +3503,19 @@ nfs3_remove (rpcsvc_request_t *req, struct nfs3_fh *fh, char *name)
nfs3_call_state_t *cs = NULL;
if ((!req) || (!fh) || (!name)) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Bad arguments");
+ gf_msg (GF_NFS3, GF_LOG_ERROR, EINVAL, NFS_MSG_INVALID_ENTRY,
+ "Bad arguments");
return -1;
}
- nfs3_log_fh_entry_call (rpcsvc_request_xid (req), "REMOVE", fh, name);
+ nfs3_log_fh_entry_call (rpcsvc_request_xid (req), "REMOVE", fh,
+ name);
nfs3_validate_gluster_fh (fh, stat, nfs3err);
nfs3_validate_nfs3_state (req, nfs3, stat, nfs3err, ret);
nfs3_validate_strlen_or_goto (name, NFS_NAME_MAX, nfs3err, stat, ret);
nfs3_map_fh_to_volume (nfs3, fh, req, vol, stat, nfs3err);
- nfs3_check_rw_volaccess (nfs3, fh->xlatorid, stat, nfs3err);
+ nfs3_volume_started_check (nfs3, vol, ret, out);
+ nfs3_check_rw_volaccess (nfs3, fh->exportid, stat, nfs3err);
nfs3_handle_call_state_init (nfs3, cs, req, vol, stat, nfs3err);
ret = nfs3_fh_resolve_and_resume (cs, fh, name, nfs3_remove_resume);
@@ -3046,8 +3524,9 @@ nfs3_remove (rpcsvc_request_t *req, struct nfs3_fh *fh, char *name)
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (req), "REMOVE", stat,
- -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (req),
+ NFS3_REMOVE, stat, -ret,
+ cs ? cs->resolvedloc.path : NULL);
nfs3_remove_reply (req, stat, NULL, NULL);
nfs3_call_state_wipe (cs);
/* Ret must be 0 after this so that the caller does not
@@ -3055,7 +3534,7 @@ nfs3err:
*/
ret = 0;
}
-
+out:
return ret;
}
@@ -3071,15 +3550,17 @@ nfs3svc_remove (rpcsvc_request_t *req)
if (!req)
return ret;
nfs3_prep_remove3args (&args, &fh, name);
- if (xdr_to_remove3args (req->msg, &args) <= 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
+ if (xdr_to_remove3args (req->msg[0], &args) <= 0) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_ARGS_DECODE_ERROR,
+ "Error decoding args");
rpcsvc_request_seterr (req, GARBAGE_ARGS);
goto rpcerr;
}
ret = nfs3_remove (req, &fh, name);
- if (ret < 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "REMOVE procedure failed");
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_REMOVE_FAIL,
+ "REMOVE procedure failed");
rpcsvc_request_seterr (req, SYSTEM_ERR);
ret = RPCSVC_ACTOR_ERROR;
}
@@ -3094,10 +3575,10 @@ nfs3_rmdir_reply (rpcsvc_request_t *req, nfsstat3 stat, struct iatt *preparent,
struct iatt *postparent)
{
rmdir3res res = {0, };
- uint16_t xlid = 0;
+ uint64_t deviceid = 0;
- xlid = nfs3_request_xlator_id (req);
- nfs3_fill_rmdir3res (&res, stat, preparent, postparent, xlid);
+ deviceid = nfs3_request_xlator_deviceid (req);
+ nfs3_fill_rmdir3res (&res, stat, preparent, postparent, deviceid);
nfs3svc_submit_reply (req, (void *)&res,
(nfs3_serializer)xdr_serialize_rmdir3res);
return 0;
@@ -3107,21 +3588,24 @@ nfs3_rmdir_reply (rpcsvc_request_t *req, nfsstat3 stat, struct iatt *preparent,
int32_t
nfs3svc_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
nfsstat3 stat = NFS3ERR_SERVERFAULT;
nfs3_call_state_t *cs = NULL;
cs = frame->local;
- if (op_ret == -1)
- stat = nfs3_errno_to_nfsstat3 (op_errno);
- else {
- inode_unref (cs->resolvedloc.inode);
+ if (op_ret == -1) {
+ gf_msg (GF_NFS, GF_LOG_WARNING, op_errno, NFS_MSG_RMDIR_CBK,
+ "%x: %s => -1 (%s)", rpcsvc_request_xid (cs->req),
+ cs->resolvedloc.path, strerror (op_errno));
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
+ } else {
stat = NFS3_OK;
}
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "RMDIR", stat,
- op_errno);
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_RMDIR, stat, op_errno,
+ cs->resolvedloc.path);
nfs3_rmdir_reply (cs->req, stat, preparent, postparent);
nfs3_call_state_wipe (cs);
@@ -3149,8 +3633,9 @@ nfs3_rmdir_resume (void *carg)
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "RMDIR",
- stat, -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_RMDIR, stat, -ret,
+ cs->resolvedloc.path);
nfs3_rmdir_reply (cs->req, stat, NULL, NULL);
nfs3_call_state_wipe (cs);
}
@@ -3170,16 +3655,19 @@ nfs3_rmdir (rpcsvc_request_t *req, struct nfs3_fh *fh, char *name)
nfs3_call_state_t *cs = NULL;
if ((!req) || (!fh) || (!name)) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Bad arguments");
+ gf_msg (GF_NFS3, GF_LOG_ERROR, EINVAL, NFS_MSG_INVALID_ENTRY,
+ "Bad arguments");
return -1;
}
- nfs3_log_fh_entry_call (rpcsvc_request_xid (req), "RMDIR", fh, name);
+ nfs3_log_fh_entry_call (rpcsvc_request_xid (req), "RMDIR", fh,
+ name);
nfs3_validate_gluster_fh (fh, stat, nfs3err);
nfs3_validate_nfs3_state (req, nfs3, stat, nfs3err, ret);
nfs3_validate_strlen_or_goto (name, NFS_NAME_MAX, nfs3err, stat, ret);
nfs3_map_fh_to_volume (nfs3, fh, req, vol, stat, nfs3err);
- nfs3_check_rw_volaccess (nfs3, fh->xlatorid, stat, nfs3err);
+ nfs3_volume_started_check (nfs3, vol, ret, out);
+ nfs3_check_rw_volaccess (nfs3, fh->exportid, stat, nfs3err);
nfs3_handle_call_state_init (nfs3, cs, req, vol, stat, nfs3err);
ret = nfs3_fh_resolve_and_resume (cs, fh, name, nfs3_rmdir_resume);
@@ -3188,8 +3676,9 @@ nfs3_rmdir (rpcsvc_request_t *req, struct nfs3_fh *fh, char *name)
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (req), "RMDIR", stat,
- -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (req),
+ NFS3_RMDIR, stat, -ret,
+ cs ? cs->resolvedloc.path : NULL);
nfs3_rmdir_reply (req, stat, NULL, NULL);
nfs3_call_state_wipe (cs);
/* Ret must be 0 after this so that the caller does not
@@ -3197,7 +3686,7 @@ nfs3err:
*/
ret = 0;
}
-
+out:
return ret;
}
@@ -3213,15 +3702,17 @@ nfs3svc_rmdir (rpcsvc_request_t *req)
if (!req)
return ret;
nfs3_prep_rmdir3args (&args, &fh, name);
- if (xdr_to_rmdir3args (req->msg, &args) <= 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
+ if (xdr_to_rmdir3args (req->msg[0], &args) <= 0) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_ARGS_DECODE_ERROR,
+ "Error decoding args");
rpcsvc_request_seterr (req, GARBAGE_ARGS);
goto rpcerr;
}
ret = nfs3_rmdir (req, &fh, name);
- if (ret < 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "RMDIR procedure failed");
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, -ret, NFS_MSG_DIR_OP_FAIL,
+ "RMDIR procedure failed");
rpcsvc_request_seterr (req, SYSTEM_ERR);
ret = RPCSVC_ACTOR_ERROR;
}
@@ -3237,11 +3728,11 @@ nfs3_rename_reply (rpcsvc_request_t *req, nfsstat3 stat, struct iatt *buf,
struct iatt *prenewparent, struct iatt *postnewparent)
{
rename3res res = {0, };
- uint16_t xlid = 0;
+ uint64_t deviceid = 0;
- xlid = nfs3_request_xlator_id (req);
+ deviceid = nfs3_request_xlator_deviceid (req);
nfs3_fill_rename3res (&res, stat, buf, preoldparent, postoldparent,
- prenewparent, postnewparent, xlid);
+ prenewparent, postnewparent, deviceid);
nfs3svc_submit_reply (req, (void *)&res,
(nfs3_serializer) xdr_serialize_rename3res);
@@ -3255,7 +3746,8 @@ int32_t
nfs3svc_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
int ret = -EFAULT;
nfsstat3 stat = NFS3ERR_SERVERFAULT;
@@ -3263,13 +3755,15 @@ nfs3svc_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
cs = frame->local;
if (op_ret == -1) {
- stat = nfs3_errno_to_nfsstat3 (op_errno);
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
goto nfs3err;
}
stat = NFS3_OK;
nfs3err:
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "RENAME", stat,-ret);
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_RENAME, stat,
+ -ret, cs->resolvedloc.path);
nfs3_rename_reply (cs->req, stat, buf, preoldparent, postoldparent,
prenewparent, postnewparent);
nfs3_call_state_wipe (cs);
@@ -3299,8 +3793,9 @@ nfs3_rename_resume_dst (void *carg)
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "RENAME",
- stat, -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_RENAME, stat, -ret,
+ cs->resolvedloc.path);
nfs3_rename_reply (cs->req, stat, NULL, NULL, NULL, NULL, NULL);
nfs3_call_state_wipe (cs);
}
@@ -3327,6 +3822,7 @@ nfs3_rename_resume_src (void *carg)
*/
nfs_loc_copy (&cs->oploc, &cs->resolvedloc);
nfs_loc_wipe (&cs->resolvedloc);
+ GF_FREE (cs->resolventry);
ret = nfs3_fh_resolve_and_resume (cs, &cs->fh, cs->pathname,
nfs3_rename_resume_dst);
@@ -3335,8 +3831,9 @@ nfs3_rename_resume_src (void *carg)
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "RENAME",
- stat, -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_RENAME, stat, -ret,
+ cs->resolvedloc.path);
nfs3_rename_reply (cs->req, stat, NULL, NULL, NULL, NULL, NULL);
nfs3_call_state_wipe (cs);
}
@@ -3356,7 +3853,8 @@ nfs3_rename (rpcsvc_request_t *req, struct nfs3_fh *olddirfh, char *oldname,
nfs3_call_state_t *cs = NULL;
if ((!req) || (!olddirfh) || (!oldname) || (!newdirfh) || (!newname)) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Bad arguments");
+ gf_msg (GF_NFS3, GF_LOG_ERROR, EINVAL, NFS_MSG_INVALID_ENTRY,
+ "Bad arguments");
return -1;
}
@@ -3368,7 +3866,8 @@ nfs3_rename (rpcsvc_request_t *req, struct nfs3_fh *olddirfh, char *oldname,
nfs3_validate_strlen_or_goto(oldname, NFS_NAME_MAX, nfs3err, stat, ret);
nfs3_validate_strlen_or_goto(newname, NFS_NAME_MAX, nfs3err, stat, ret);
nfs3_map_fh_to_volume (nfs3, olddirfh, req, vol, stat, nfs3err);
- nfs3_check_rw_volaccess (nfs3, olddirfh->xlatorid, stat, nfs3err);
+ nfs3_volume_started_check (nfs3, vol, ret, out);
+ nfs3_check_rw_volaccess (nfs3, olddirfh->exportid, stat, nfs3err);
nfs3_handle_call_state_init (nfs3, cs, req, vol, stat, nfs3err);
/* While we resolve the source (fh, name) pair, we need to keep a copy
@@ -3389,8 +3888,9 @@ nfs3_rename (rpcsvc_request_t *req, struct nfs3_fh *olddirfh, char *oldname,
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (req), "RENAME", stat,
- -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (req),
+ NFS3_RENAME, stat, -ret,
+ cs ? cs->resolvedloc.path : NULL);
nfs3_rename_reply (req, stat, NULL, NULL, NULL, NULL, NULL);
nfs3_call_state_wipe (cs);
/* Ret must be 0 after this so that the caller does not
@@ -3398,7 +3898,7 @@ nfs3err:
*/
ret = 0;
}
-
+out:
return ret;
}
@@ -3416,15 +3916,17 @@ nfs3svc_rename (rpcsvc_request_t *req)
if (!req)
return ret;
nfs3_prep_rename3args (&args, &olddirfh, oldname, &newdirfh, newname);
- if (xdr_to_rename3args (req->msg, &args) <= 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
+ if (xdr_to_rename3args (req->msg[0], &args) <= 0) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_ARGS_DECODE_ERROR,
+ "Error decoding args");
rpcsvc_request_seterr (req, GARBAGE_ARGS);
goto rpcerr;
}
ret = nfs3_rename (req, &olddirfh, oldname, &newdirfh, newname);
- if (ret < 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "RENAME procedure failed");
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_RENAME_FAIL,
+ "RENAME procedure failed");
rpcsvc_request_seterr (req, SYSTEM_ERR);
ret = RPCSVC_ACTOR_ERROR;
}
@@ -3439,10 +3941,10 @@ nfs3_link_reply (rpcsvc_request_t *req, nfsstat3 stat, struct iatt *buf,
struct iatt *preparent, struct iatt *postparent)
{
link3res res = {0, };
- uint16_t xlid = 0;
+ uint64_t deviceid = 0;
- xlid = nfs3_request_xlator_id (req);
- nfs3_fill_link3res (&res, stat, buf, preparent, postparent, xlid);
+ deviceid = nfs3_request_xlator_deviceid (req);
+ nfs3_fill_link3res (&res, stat, buf, preparent, postparent, deviceid);
nfs3svc_submit_reply (req, (void *)&res,
(nfs3_serializer)xdr_serialize_link3res);
@@ -3454,19 +3956,20 @@ int32_t
nfs3svc_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
nfsstat3 stat = NFS3ERR_SERVERFAULT;
nfs3_call_state_t *cs = NULL;
cs = frame->local;
- if (op_ret == -1)
- stat = nfs3_errno_to_nfsstat3 (op_errno);
- else
+ if (op_ret == -1) {
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
+ } else
stat = NFS3_OK;
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "LINK", stat,
- op_errno);
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_LINK, stat, op_errno,
+ cs->resolvedloc.path);
nfs3_link_reply (cs->req, stat, buf, preparent, postparent);
nfs3_call_state_wipe (cs);
@@ -3496,8 +3999,9 @@ nfs3_link_resume_lnk (void *carg)
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "LINK", stat,
- -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_LINK, stat, -ret,
+ cs->resolvedloc.path);
nfs3_link_reply (cs->req, stat, NULL, NULL, NULL);
nfs3_call_state_wipe (cs);
}
@@ -3527,8 +4031,9 @@ nfs3_link_resume_tgt (void *carg)
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "LINK", stat,
- -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_LINK, stat, -ret,
+ cs->resolvedloc.path);
nfs3_link_reply (cs->req, stat, NULL, NULL, NULL);
nfs3_call_state_wipe (cs);
}
@@ -3548,7 +4053,8 @@ nfs3_link (rpcsvc_request_t *req, struct nfs3_fh *targetfh,
nfs3_call_state_t *cs = NULL;
if ((!req) || (!targetfh) || (!dirfh) || (!newname)) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Bad arguments");
+ gf_msg (GF_NFS3, GF_LOG_ERROR, EINVAL, NFS_MSG_INVALID_ENTRY,
+ "Bad arguments");
return -1;
}
@@ -3557,7 +4063,8 @@ nfs3_link (rpcsvc_request_t *req, struct nfs3_fh *targetfh,
nfs3_validate_nfs3_state (req, nfs3, stat, nfs3err, ret);
nfs3_validate_strlen_or_goto(newname, NFS_NAME_MAX, nfs3err, stat, ret);
nfs3_map_fh_to_volume (nfs3, dirfh, req, vol, stat, nfs3err);
- nfs3_check_rw_volaccess (nfs3, dirfh->xlatorid, stat, nfs3err);
+ nfs3_volume_started_check (nfs3, vol, ret, out);
+ nfs3_check_rw_volaccess (nfs3, dirfh->exportid, stat, nfs3err);
nfs3_handle_call_state_init (nfs3, cs, req, vol, stat, nfs3err);
cs->fh = *dirfh;
@@ -3575,8 +4082,9 @@ nfs3_link (rpcsvc_request_t *req, struct nfs3_fh *targetfh,
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (req), "LINK", stat,
- -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (req),
+ NFS3_LINK, stat,
+ -ret, cs ? cs->pathname : NULL);
nfs3_link_reply (req, stat, NULL, NULL, NULL);
nfs3_call_state_wipe (cs);
/* Ret must be 0 after this so that the caller does not
@@ -3584,7 +4092,7 @@ nfs3err:
*/
ret = 0;
}
-
+out:
return ret;
}
@@ -3600,15 +4108,17 @@ nfs3svc_link (rpcsvc_request_t *req)
if (!req)
return ret;
nfs3_prep_link3args (&args, &targetfh, &dirfh, newpath);
- if (xdr_to_link3args (req->msg, &args) <= 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
+ if (xdr_to_link3args (req->msg[0], &args) <= 0) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_ARGS_DECODE_ERROR,
+ "Error decoding args");
rpcsvc_request_seterr (req, GARBAGE_ARGS);
goto rpcerr;
}
ret = nfs3_link (req, &targetfh, &dirfh, newpath);
- if (ret < 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "LINK procedure failed");
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, EXDEV, NFS_MSG_LINK_FAIL,
+ "LINK procedure failed");
rpcsvc_request_seterr (req, SYSTEM_ERR);
ret = RPCSVC_ACTOR_ERROR;
}
@@ -3623,10 +4133,12 @@ nfs3_readdirp_reply (rpcsvc_request_t *req, nfsstat3 stat,struct nfs3_fh *dirfh,
uint64_t cverf, struct iatt *dirstat, gf_dirent_t *entries,
count3 dircount, count3 maxcount, int is_eof)
{
- readdirp3res res = {0, };
+ readdirp3res res = {0, };
+ uint64_t deviceid = 0;
+ deviceid = nfs3_request_xlator_deviceid (req);
nfs3_fill_readdirp3res (&res, stat, dirfh, cverf, dirstat, entries,
- dircount, maxcount, is_eof);
+ dircount, maxcount, is_eof, deviceid);
nfs3svc_submit_reply (req, (void *)&res,
(nfs3_serializer) xdr_serialize_readdirp3res);
nfs3_free_readdirp3res (&res);
@@ -3641,9 +4153,11 @@ nfs3_readdir_reply (rpcsvc_request_t *req, nfsstat3 stat, struct nfs3_fh *dirfh,
count3 count, int is_eof)
{
readdir3res res = {0, };
+ uint64_t deviceid = 0;
+ deviceid = nfs3_request_xlator_deviceid (req);
nfs3_fill_readdir3res (&res, stat, dirfh, cverf, dirstat, entries, count
- , is_eof);
+ , is_eof, deviceid);
nfs3svc_submit_reply (req, (void *)&res,
(nfs3_serializer) xdr_serialize_readdir3res);
nfs3_free_readdir3res (&res);
@@ -3654,7 +4168,8 @@ nfs3_readdir_reply (rpcsvc_request_t *req, nfsstat3 stat, struct nfs3_fh *dirfh,
int32_t
nfs3svc_readdir_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
nfsstat3 stat = NFS3ERR_SERVERFAULT;
int is_eof = 0;
@@ -3662,7 +4177,7 @@ nfs3svc_readdir_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
cs = frame->local;
if (op_ret == -1) {
- stat = nfs3_errno_to_nfsstat3 (op_errno);
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
goto nfs3err;
}
@@ -3670,23 +4185,29 @@ nfs3svc_readdir_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
* readdir'ing.
*/
if (cs->operrno == ENOENT) {
- gf_log (GF_NFS3, GF_LOG_TRACE, "Reached end-of-directory");
+ gf_msg_trace (GF_NFS3, 0, "Reached end-of-directory");
is_eof = 1;
}
stat = NFS3_OK;
+
+ /* do inode linking here */
+ gf_link_inodes_from_dirent (this, cs->fd->inode, &cs->entries);
+
nfs3err:
if (cs->maxcount == 0) {
- nfs3_log_readdir_res (rpcsvc_request_xid (cs->req), stat,
- op_errno, (uintptr_t)cs->fd,
- cs->dircount, is_eof);
+ nfs3_log_readdir_res (rpcsvc_request_xid (cs->req),
+ stat, op_errno, (uintptr_t)cs->fd,
+ cs->dircount, is_eof,
+ cs->resolvedloc.path);
nfs3_readdir_reply (cs->req, stat, &cs->parent,
(uintptr_t)cs->fd, buf, &cs->entries,
cs->dircount, is_eof);
} else {
- nfs3_log_readdirp_res (rpcsvc_request_xid (cs->req), stat,
- op_errno, (uintptr_t)cs->fd,
- cs->dircount, cs->maxcount, is_eof);
+ nfs3_log_readdirp_res (rpcsvc_request_xid (cs->req),
+ stat, op_errno, (uintptr_t)cs->fd,
+ cs->dircount, cs->maxcount, is_eof,
+ cs->resolvedloc.path);
nfs3_readdirp_reply (cs->req, stat, &cs->parent,
(uintptr_t)cs->fd, buf,
&cs->entries, cs->dircount,
@@ -3694,11 +4215,9 @@ nfs3err:
}
if (is_eof) {
- gf_log (GF_NFS3, GF_LOG_TRACE, "EOF REF: %d", cs->fd->refcount);
- fd_unref (cs->fd);
+ /* do nothing */
}
- gf_log (GF_NFS3, GF_LOG_TRACE, "CS WIPE REF: %d", cs->fd->refcount);
nfs3_call_state_wipe (cs);
return 0;
}
@@ -3706,7 +4225,8 @@ nfs3err:
int32_t
nfs3svc_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
nfsstat3 stat = NFS3ERR_SERVERFAULT;
int ret = -EFAULT;
@@ -3715,7 +4235,7 @@ nfs3svc_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
cs = frame->local;
if (op_ret == -1) {
- stat = nfs3_errno_to_nfsstat3 (op_errno);
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
goto err;
}
@@ -3735,12 +4255,14 @@ err:
goto ret;
if (cs->maxcount == 0) {
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "READDIR",
- stat, op_errno);
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_READDIR, stat, op_errno,
+ cs->resolvedloc.path);
nfs3_readdir_reply (cs->req, stat, NULL, 0, NULL, NULL, 0, 0);
} else {
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "READDIRP"
- , stat, op_errno);
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_READDIRP, stat, op_errno,
+ cs->resolvedloc.path);
nfs3_readdirp_reply (cs->req, stat, NULL, 0, NULL, NULL,
0, 0, 0);
}
@@ -3749,7 +4271,6 @@ err:
* so that next time the dir is read, we'll get any changed directory
* entries.
*/
- fd_unref (cs->fd);
nfs3_call_state_wipe (cs);
ret:
return 0;
@@ -3797,12 +4318,14 @@ nfs3err:
if (ret < 0) {
if (cs->maxcount == 0) {
nfs3_log_common_res (rpcsvc_request_xid (cs->req),
- "READDIR", stat, -ret);
+ NFS3_READDIR, stat, -ret,
+ cs->resolvedloc.path);
nfs3_readdir_reply (cs->req, stat, NULL, 0, NULL, NULL,
0, 0);
} else {
nfs3_log_common_res (rpcsvc_request_xid (cs->req),
- "READDIRP", stat, -ret);
+ NFS3_READDIRP, stat, -ret,
+ cs->resolvedloc.path);
nfs3_readdirp_reply (cs->req, stat, NULL, 0, NULL, NULL,
0, 0, 0);
}
@@ -3813,19 +4336,62 @@ nfs3err:
}
+int32_t
+nfs3svc_readdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ dict_t *xdata)
+{
+ /*
+ * We don't really need this, it's just an artifact of forcing the
+ * opendir to happen.
+ */
+ if (fd) {
+ fd_unref(fd);
+ }
+
+ return 0;
+}
+
+
int
nfs3_readdir_open_resume (void *carg)
{
nfsstat3 stat = NFS3ERR_SERVERFAULT;
int ret = -EFAULT;
nfs3_call_state_t *cs = NULL;
+ nfs_user_t nfu = {0, };
if (!carg)
return ret;
cs = (nfs3_call_state_t *)carg;
nfs3_check_fh_resolve_status (cs, stat, nfs3err);
- ret = nfs3_dir_open_and_resume (cs, nfs3_readdir_read_resume);
+ cs->fd = fd_anonymous (cs->resolvedloc.inode);
+ if (!cs->fd) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_ANONYMOUS_FD_FAIL,
+ "Fail to create anonymous fd");
+ goto nfs3err;
+ }
+
+ /*
+ * NFS client will usually send us a readdirp without an opendir,
+ * which would cause us to skip our usual self-heal checks which occur
+ * in opendir for native protocol. To make sure those checks do happen,
+ * our most reliable option is to do our own opendir for any readdirp
+ * at the beginning of the directory.
+ */
+ if (cs->cookie == 0) {
+ nfs_request_user_init (&nfu, cs->req);
+ ret = nfs_opendir (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ nfs3svc_readdir_opendir_cbk, cs);
+ if (ret < 0) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, -ret,
+ NFS_MSG_DIR_OP_FAIL,
+ "auto-opendir failed");
+ }
+ }
+
+ ret = nfs3_readdir_read_resume (cs);
if (ret < 0)
stat = nfs3_errno_to_nfsstat3 (-ret);
@@ -3833,12 +4399,14 @@ nfs3err:
if (ret < 0) {
if (cs->maxcount == 0) {
nfs3_log_common_res (rpcsvc_request_xid (cs->req),
- "READDIR", stat, -ret);
+ NFS3_READDIR, stat, -ret,
+ cs->resolvedloc.path);
nfs3_readdir_reply (cs->req, stat, NULL, 0, NULL, NULL,
0, 0);
} else {
nfs3_log_common_res (rpcsvc_request_xid (cs->req),
- "READDIRP", stat, -ret);
+ NFS3_READDIRP, stat, -ret,
+ cs->resolvedloc.path);
nfs3_readdirp_reply (cs->req, stat, NULL, 0, NULL, NULL,
0, 0, 0);
}
@@ -3859,17 +4427,29 @@ nfs3_readdir (rpcsvc_request_t *req, struct nfs3_fh *fh, cookie3 cookie,
int ret = -EFAULT;
struct nfs3_state *nfs3 = NULL;
nfs3_call_state_t *cs = NULL;
+ struct nfs_state *nfs = NULL;
+ gf_boolean_t is_readdirp = !!maxcount;
if ((!req) || (!fh)) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Bad arguments");
+ gf_msg (GF_NFS3, GF_LOG_ERROR, EINVAL, NFS_MSG_INVALID_ENTRY,
+ "Bad arguments");
return -1;
}
- nfs3_log_readdir_call (rpcsvc_request_xid (req), fh, dircount,maxcount);
+ nfs3_log_readdir_call (rpcsvc_request_xid (req), fh, dircount,
+ maxcount);
nfs3_validate_gluster_fh (fh, stat, nfs3err);
nfs3_validate_nfs3_state (req, nfs3, stat, nfs3err, ret);
nfs3_map_fh_to_volume (nfs3, fh, req, vol, stat, nfs3err);
+ nfs3_volume_started_check (nfs3, vol, ret, out);
nfs3_handle_call_state_init (nfs3, cs, req, vol, stat, nfs3err);
+ nfs = nfs_state (nfs3->nfsx);
+
+ if (is_readdirp && !nfs->rdirplus) {
+ ret = -ENOTSUP;
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+ goto nfs3err;
+ }
cs->cookieverf = cverf;
cs->dircount = dircount;
@@ -3883,14 +4463,16 @@ nfs3_readdir (rpcsvc_request_t *req, struct nfs3_fh *fh, cookie3 cookie,
nfs3err:
if (ret < 0) {
- if (maxcount == 0) {
- nfs3_log_common_res (rpcsvc_request_xid (req), "READDIR"
- , stat, -ret);
+ if (!is_readdirp) {
+ nfs3_log_common_res (rpcsvc_request_xid (req),
+ NFS3_READDIR, stat, -ret,
+ cs ? cs->resolvedloc.path : NULL);
nfs3_readdir_reply (req, stat, NULL, 0, NULL, NULL, 0,
0);
} else {
- nfs3_log_common_res (rpcsvc_request_xid (req),"READDIRP"
- , stat, -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (req),
+ NFS3_READDIRP, stat, -ret,
+ cs ? cs->resolvedloc.path : NULL);
nfs3_readdirp_reply (req, stat, NULL, 0, NULL, NULL, 0,
0, 0);
}
@@ -3900,7 +4482,7 @@ nfs3err:
ret = 0;
nfs3_call_state_wipe (cs);
}
-
+out:
return ret;
}
@@ -3912,20 +4494,25 @@ nfs3svc_readdir (rpcsvc_request_t *req)
struct nfs3_fh fh = {{0},};
int ret = RPCSVC_ACTOR_ERROR;
uint64_t verf = 0;
+ uint64_t *cval;
if (!req)
return ret;
nfs3_prep_readdir3args (&ra, &fh);
- if (xdr_to_readdir3args (req->msg, &ra) <= 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
+ if (xdr_to_readdir3args (req->msg[0], &ra) <= 0) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_ARGS_DECODE_ERROR,
+ "Error decoding args");
rpcsvc_request_seterr (req, GARBAGE_ARGS);
goto rpcerr;
}
+ cval = (uint64_t *) ra.cookieverf;
+ verf = *cval;
- verf = *(uint64_t *)ra.cookieverf;
ret = nfs3_readdir (req, &fh, ra.cookie, verf, ra.count, 0);
- if (ret < 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "READDIR procedure failed");
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, -ret,
+ NFS_MSG_READDIR_FAIL,
+ "READDIR procedure failed");
rpcsvc_request_seterr (req, SYSTEM_ERR);
ret = RPCSVC_ACTOR_ERROR;
}
@@ -3942,21 +4529,26 @@ nfs3svc_readdirp (rpcsvc_request_t *req)
struct nfs3_fh fh = {{0},};
int ret = RPCSVC_ACTOR_ERROR;
uint64_t cverf = 0;
+ uint64_t *cval;
if (!req)
return ret;
nfs3_prep_readdirp3args (&ra, &fh);
- if (xdr_to_readdirp3args (req->msg, &ra) <= 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
+ if (xdr_to_readdirp3args (req->msg[0], &ra) <= 0) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_ARGS_DECODE_ERROR,
+ "Error decoding args");
rpcsvc_request_seterr (req, GARBAGE_ARGS);
goto rpcerr;
}
+ cval = (uint64_t *) ra.cookieverf;
+ cverf = *cval;
- cverf = *(uint64_t *)ra.cookieverf;
ret = nfs3_readdir (req, &fh, ra.cookie, cverf, ra.dircount,
ra.maxcount);
- if (ret < 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "READDIRP procedure failed");
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, -ret,
+ NFS_MSG_READDIRP_FAIL,
+ "READDIRP procedure failed");
rpcsvc_request_seterr (req, SYSTEM_ERR);
ret = RPCSVC_ACTOR_ERROR;
}
@@ -3971,10 +4563,10 @@ nfs3_fsstat_reply (rpcsvc_request_t *req, nfsstat3 stat, struct statvfs *fsbuf,
struct iatt *postbuf)
{
fsstat3res res = {0, };
- uint16_t xlid = 0;
+ uint64_t deviceid = 0;
- xlid = nfs3_request_xlator_id (req);
- nfs3_fill_fsstat3res (&res, stat, fsbuf, postbuf, xlid);
+ deviceid = nfs3_request_xlator_deviceid (req);
+ nfs3_fill_fsstat3res (&res, stat, fsbuf, postbuf, deviceid);
return nfs3svc_submit_reply (req, &res,
(nfs3_serializer)xdr_serialize_fsstat3res);
@@ -3983,19 +4575,21 @@ nfs3_fsstat_reply (rpcsvc_request_t *req, nfsstat3 stat, struct statvfs *fsbuf,
int32_t
nfs3_fsstat_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
nfsstat3 stat = NFS3ERR_SERVERFAULT;
nfs3_call_state_t *cs = NULL;
cs = frame->local;
- if (op_ret == -1)
- stat = nfs3_errno_to_nfsstat3 (op_errno);
- else
+ if (op_ret == -1) {
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
+ } else
stat = NFS3_OK;
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "FSTAT", stat,
- op_errno);
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_FSSTAT, stat,
+ op_errno, cs->resolvedloc.path);
nfs3_fsstat_reply (cs->req, stat, &cs->fsstat, buf);
nfs3_call_state_wipe (cs);
return 0;
@@ -4004,7 +4598,8 @@ nfs3_fsstat_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t
nfs3_fsstat_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf)
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata)
{
nfs_user_t nfu = {0, };
int ret = -EFAULT;
@@ -4014,7 +4609,7 @@ nfs3_fsstat_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
cs = frame->local;
if (op_ret == -1) {
ret = -op_errno;
- stat = nfs3_errno_to_nfsstat3 (op_errno);
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
goto err;
}
@@ -4030,8 +4625,9 @@ nfs3_fsstat_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "FSTAT",
- stat, -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_FSSTAT, stat, -ret,
+ cs->resolvedloc.path);
nfs3_fsstat_reply (cs->req, stat, NULL, NULL);
nfs3_call_state_wipe (cs);
}
@@ -4062,8 +4658,9 @@ nfs3_fsstat_resume (void *carg)
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "FSTAT",
- stat, -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_FSSTAT, stat, -ret,
+ cs->resolvedloc.path);
nfs3_fsstat_reply (cs->req, stat, NULL, NULL);
nfs3_call_state_wipe (cs);
}
@@ -4083,7 +4680,8 @@ nfs3_fsstat (rpcsvc_request_t *req, struct nfs3_fh *fh)
nfs3_call_state_t *cs = NULL;
if ((!req) || (!fh)) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Bad arguments");
+ gf_msg (GF_NFS3, GF_LOG_ERROR, EINVAL, NFS_MSG_INVALID_ENTRY,
+ "Bad arguments");
return -1;
}
@@ -4091,6 +4689,7 @@ nfs3_fsstat (rpcsvc_request_t *req, struct nfs3_fh *fh)
nfs3_validate_gluster_fh (fh, stat, nfs3err);
nfs3_validate_nfs3_state (req, nfs3, stat, nfs3err, ret);
nfs3_map_fh_to_volume (nfs3, fh, req, vol, stat, nfs3err);
+ nfs3_volume_started_check (nfs3, vol, ret, out);
nfs3_handle_call_state_init (nfs3, cs, req, vol, stat, nfs3err);
ret = nfs3_fh_resolve_and_resume (cs, fh, NULL, nfs3_fsstat_resume);
@@ -4099,8 +4698,9 @@ nfs3_fsstat (rpcsvc_request_t *req, struct nfs3_fh *fh)
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (req), "FSTAT", stat,
- -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (req),
+ NFS3_FSSTAT, stat, -ret,
+ cs ? cs->resolvedloc.path : NULL);
nfs3_fsstat_reply (req, stat, NULL, NULL);
nfs3_call_state_wipe (cs);
/* Ret must be 0 after this so that the caller does not
@@ -4108,7 +4708,7 @@ nfs3err:
*/
ret = 0;
}
-
+out:
return ret;
}
@@ -4123,15 +4723,17 @@ nfs3svc_fsstat (rpcsvc_request_t *req)
if (!req)
return ret;
nfs3_prep_fsstat3args (&args, &fh);
- if (xdr_to_fsstat3args (req->msg, &args) <= 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
+ if (xdr_to_fsstat3args (req->msg[0], &args) <= 0) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_ARGS_DECODE_ERROR,
+ "Error decoding args");
rpcsvc_request_seterr (req, GARBAGE_ARGS);
goto rpcerr;
}
ret = nfs3_fsstat (req, &fh);
- if (ret < 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "FSTAT procedure failed");
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_FSTAT_FAIL,
+ "FSTAT procedure failed");
rpcsvc_request_seterr (req, SYSTEM_ERR);
ret = RPCSVC_ACTOR_ERROR;
}
@@ -4146,11 +4748,11 @@ nfs3_fsinfo_reply (rpcsvc_request_t *req, nfsstat3 status, struct iatt *fsroot)
{
fsinfo3res res;
struct nfs3_state *nfs3 = NULL;
- uint16_t xlid = 0;
+ uint64_t deviceid = 0;
- xlid = nfs3_request_xlator_id (req);
+ deviceid = nfs3_request_xlator_deviceid (req);
nfs3 = rpcsvc_request_program_private (req);
- nfs3_fill_fsinfo3res (nfs3, &res, status, fsroot, xlid);
+ nfs3_fill_fsinfo3res (nfs3, &res, status, fsroot, deviceid);
nfs3svc_submit_reply (req, &res,
(nfs3_serializer)xdr_serialize_fsinfo3res);
@@ -4160,20 +4762,22 @@ nfs3_fsinfo_reply (rpcsvc_request_t *req, nfsstat3 status, struct iatt *fsroot)
int32_t
nfs3svc_fsinfo_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
nfsstat3 status = NFS3ERR_SERVERFAULT;
nfs3_call_state_t *cs = NULL;
cs = frame->local;
- if (op_ret == -1)
- status = nfs3_errno_to_nfsstat3 (op_errno);
- else
+ if (op_ret == -1) {
+ status = nfs3_cbk_errno_status (op_ret, op_errno);
+ }else
status = NFS3_OK;
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "FSINFO", status,
- op_errno);
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_FSINFO, status,
+ op_errno, cs->resolvedloc.path);
nfs3_fsinfo_reply (cs->req, status, buf);
nfs3_call_state_wipe (cs);
@@ -4205,8 +4809,9 @@ nfs3_fsinfo_resume (void *carg)
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "FSINFO",
- stat, -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_FSINFO, stat, -ret,
+ cs->resolvedloc.path);
nfs3_fsinfo_reply (cs->req, stat, NULL);
nfs3_call_state_wipe (cs);
}
@@ -4225,7 +4830,8 @@ nfs3_fsinfo (rpcsvc_request_t *req, struct nfs3_fh *fh)
nfs3_call_state_t *cs = NULL;
if ((!req) || (!fh)) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Bad arguments");
+ gf_msg (GF_NFS3, GF_LOG_ERROR, EINVAL, NFS_MSG_INVALID_ENTRY,
+ "Bad arguments");
return -1;
}
@@ -4233,6 +4839,7 @@ nfs3_fsinfo (rpcsvc_request_t *req, struct nfs3_fh *fh)
nfs3_validate_gluster_fh (fh, stat, nfs3err);
nfs3_validate_nfs3_state (req, nfs3, stat, nfs3err, ret);
nfs3_map_fh_to_volume (nfs3, fh, req, vol, stat, nfs3err);
+ nfs3_volume_started_check (nfs3, vol, ret, out);
nfs3_handle_call_state_init (nfs3, cs, req, vol, stat, nfs3err);
ret = nfs3_fh_resolve_and_resume (cs, fh, NULL, nfs3_fsinfo_resume);
@@ -4241,13 +4848,14 @@ nfs3_fsinfo (rpcsvc_request_t *req, struct nfs3_fh *fh)
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (req), "FSINFO", stat,
- -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (req),
+ NFS3_FSINFO, stat, -ret,
+ cs ? cs->resolvedloc.path : NULL);
nfs3_fsinfo_reply (req, stat, NULL);
nfs3_call_state_wipe (cs);
ret = 0;
}
-
+out:
return ret;
}
@@ -4263,15 +4871,17 @@ nfs3svc_fsinfo (rpcsvc_request_t *req)
return ret;
nfs3_prep_fsinfo3args (&args, &root);
- if (xdr_to_fsinfo3args (req->msg, &args) <= 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding arguments");
+ if (xdr_to_fsinfo3args (req->msg[0], &args) <= 0) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_ARGS_DECODE_ERROR,
+ "Error decoding arguments");
rpcsvc_request_seterr (req, GARBAGE_ARGS);
goto rpcerr;
}
ret = nfs3_fsinfo (req, &root);
- if (ret < 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "FSINFO procedure failed");
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_FSINFO_FAIL,
+ "FSINFO procedure failed");
rpcsvc_request_seterr (req, SYSTEM_ERR);
ret = RPCSVC_ACTOR_ERROR;
}
@@ -4285,10 +4895,10 @@ int
nfs3_pathconf_reply (rpcsvc_request_t *req, nfsstat3 stat, struct iatt *buf)
{
pathconf3res res = {0, };
- uint16_t xlid = 0;
+ uint64_t deviceid = 0;
- xlid = nfs3_request_xlator_id (req);
- nfs3_fill_pathconf3res (&res, stat, buf, xlid);
+ deviceid = nfs3_request_xlator_deviceid (req);
+ nfs3_fill_pathconf3res (&res, stat, buf, deviceid);
nfs3svc_submit_reply (req, (void *)&res,
(nfs3_serializer)xdr_serialize_pathconf3res);
return 0;
@@ -4297,16 +4907,17 @@ nfs3_pathconf_reply (rpcsvc_request_t *req, nfsstat3 stat, struct iatt *buf)
int32_t
nfs3svc_pathconf_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
struct iatt *sbuf = NULL;
nfs3_call_state_t *cs = NULL;
nfsstat3 stat = NFS3ERR_SERVERFAULT;
cs = frame->local;
- if (op_ret == -1)
- stat = nfs3_errno_to_nfsstat3 (op_errno);
- else {
+ if (op_ret == -1) {
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
+ } else {
/* If stat fop failed, we can still send the other components
* in a pathconf reply.
*/
@@ -4314,8 +4925,9 @@ nfs3svc_pathconf_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
stat = NFS3_OK;
}
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "PATHCONF", stat,
- op_errno);
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_PATHCONF, stat,
+ op_errno, cs->resolvedloc.path);
nfs3_pathconf_reply (cs->req, stat, sbuf);
nfs3_call_state_wipe (cs);
@@ -4343,8 +4955,9 @@ nfs3_pathconf_resume (void *carg)
stat = nfs3_errno_to_nfsstat3 (-ret);
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "PATHCONF",
- stat, -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_PATHCONF, stat, -ret,
+ cs->resolvedloc.path);
nfs3_pathconf_reply (cs->req, stat, NULL);
nfs3_call_state_wipe (cs);
}
@@ -4362,7 +4975,8 @@ nfs3_pathconf (rpcsvc_request_t *req, struct nfs3_fh *fh)
nfs3_call_state_t *cs = NULL;
if ((!req) || (!fh)) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Bad arguments");
+ gf_msg (GF_NFS3, GF_LOG_ERROR, EINVAL, NFS_MSG_INVALID_ENTRY,
+ "Bad arguments");
return -1;
}
@@ -4370,6 +4984,7 @@ nfs3_pathconf (rpcsvc_request_t *req, struct nfs3_fh *fh)
nfs3_validate_gluster_fh (fh, stat, nfs3err);
nfs3_validate_nfs3_state (req, nfs3, stat, nfs3err, ret);
nfs3_map_fh_to_volume (nfs3, fh, req, vol, stat, nfs3err);
+ nfs3_volume_started_check (nfs3, vol, ret, out);
nfs3_handle_call_state_init (nfs3, cs, req, vol, stat, nfs3err);
ret = nfs3_fh_resolve_and_resume (cs, fh, NULL, nfs3_pathconf_resume);
@@ -4378,8 +4993,9 @@ nfs3_pathconf (rpcsvc_request_t *req, struct nfs3_fh *fh)
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (req), "PATHCONF", stat,
- -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (req),
+ NFS3_PATHCONF, stat, -ret,
+ cs ? cs->resolvedloc.path : NULL);
nfs3_pathconf_reply (req, stat, NULL);
nfs3_call_state_wipe (cs);
/* Ret must be 0 after this so that the caller does not
@@ -4387,7 +5003,7 @@ nfs3err:
*/
ret = 0;
}
-
+out:
return ret;
}
@@ -4402,15 +5018,18 @@ nfs3svc_pathconf (rpcsvc_request_t *req)
if (!req)
return ret;
nfs3_prep_pathconf3args (&args, &fh);
- if (xdr_to_pathconf3args (req->msg, &args) <= 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
+ if (xdr_to_pathconf3args (req->msg[0], &args) <= 0) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_ARGS_DECODE_ERROR,
+ "Error decoding args");
rpcsvc_request_seterr (req, GARBAGE_ARGS);
goto rpcerr;
}
ret = nfs3_pathconf (req, &fh);
- if (ret < 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "PATHCONF procedure failed");
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, -ret,
+ NFS_MSG_PATHCONF_FAIL,
+ "PATHCONF procedure failed");
rpcsvc_request_seterr (req, SYSTEM_ERR);
ret = RPCSVC_ACTOR_ERROR;
}
@@ -4424,10 +5043,10 @@ nfs3_commit_reply (rpcsvc_request_t *req, nfsstat3 stat, uint64_t wverf,
struct iatt *prestat, struct iatt *poststat)
{
commit3res res = {0, };
- uint16_t xlid = 0;
+ uint64_t deviceid = 0;
- xlid = nfs3_request_xlator_id (req);
- nfs3_fill_commit3res (&res, stat, wverf, prestat, poststat, xlid);
+ deviceid = nfs3_request_xlator_deviceid (req);
+ nfs3_fill_commit3res (&res, stat, wverf, prestat, poststat, deviceid);
nfs3svc_submit_reply (req, (void *)&res,
(nfs3_serializer)xdr_serialize_commit3res);
@@ -4437,23 +5056,23 @@ nfs3_commit_reply (rpcsvc_request_t *req, nfsstat3 stat, uint64_t wverf,
int32_t
nfs3svc_commit_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
nfsstat3 stat = NFS3ERR_SERVERFAULT;
nfs3_call_state_t *cs = NULL;
struct nfs3_state *nfs3 = NULL;
cs = frame->local;
- if (op_ret == -1)
- stat = nfs3_errno_to_nfsstat3 (op_errno);
- else
+ if (op_ret == -1) {
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
+ } else
stat = NFS3_OK;
nfs3 = rpcsvc_request_program_private (cs->req);
- nfs3_log_commit_res (rpcsvc_request_xid (cs->req), stat, op_errno,
- nfs3->serverstart);
- nfs3_commit_reply (cs->req, stat, nfs3->serverstart, prebuf, postbuf);
+ nfs3_log_commit_res (rpcsvc_request_xid (cs->req),
+ stat, op_errno, nfs3->serverstart,
+ cs->resolvedloc.path);
+ nfs3_commit_reply (cs->req, stat, nfs3->serverstart, NULL, NULL);
nfs3_call_state_wipe (cs);
return 0;
@@ -4473,29 +5092,30 @@ nfs3_commit_resume (void *carg)
cs = (nfs3_call_state_t *)carg;
nfs3_check_fh_resolve_status (cs, stat, nfs3err);
- if (nfs3_export_sync_trusted (cs->nfs3state, cs->resolvefh.xlatorid)) {
+ if (nfs3_export_sync_trusted (cs->nfs3state, cs->resolvefh.exportid)) {
ret = -1;
stat = NFS3_OK;
goto nfs3err;
}
nfs_request_user_init (&nfu, cs->req);
- ret = nfs_fsync (cs->nfsx, cs->vol, &nfu, cs->fd, 0,
+ ret = nfs_flush (cs->nfsx, cs->vol, &nfu, cs->fd,
nfs3svc_commit_cbk, cs);
if (ret < 0)
stat = nfs3_errno_to_nfsstat3 (-ret);
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "COMMIT",
- stat, -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_COMMIT, stat, -ret,
+ cs->resolvedloc.path);
nfs3_commit_reply (cs->req, stat, cs->nfs3state->serverstart,
NULL, NULL);
nfs3_call_state_wipe (cs);
ret = 0;
}
- return ret;
+ return 0;
}
@@ -4511,14 +5131,21 @@ nfs3_commit_open_resume (void *carg)
cs = (nfs3_call_state_t *)carg;
nfs3_check_fh_resolve_status (cs, stat, nfs3err);
+ cs->fd = fd_anonymous (cs->resolvedloc.inode);
+ if (!cs->fd) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_ANONYMOUS_FD_FAIL,
+ "Failed to create anonymous fd.");
+ goto nfs3err;
+ }
- ret = nfs3_file_open_and_resume (cs, nfs3_commit_resume);
+ ret = nfs3_commit_resume (cs);
if (ret < 0)
stat = nfs3_errno_to_nfsstat3 (-ret);
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (cs->req), "COMMIT",
- stat, -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_COMMIT, stat, -ret,
+ cs->resolvedloc.path);
nfs3_commit_reply (cs->req, stat, 0, NULL, NULL);
nfs3_call_state_wipe (cs);
}
@@ -4539,16 +5166,18 @@ nfs3_commit (rpcsvc_request_t *req, struct nfs3_fh *fh, offset3 offset,
nfs3_call_state_t *cs = NULL;
if ((!req) || (!fh)) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Bad arguments");
+ gf_msg (GF_NFS3, GF_LOG_ERROR, EINVAL, NFS_MSG_INVALID_ENTRY,
+ "Bad arguments");
return -1;
}
- nfs3_log_rw_call (rpcsvc_request_xid (req), "COMMIT", fh, offset, count,
- -1);
+ nfs3_log_rw_call (rpcsvc_request_xid (req), "COMMIT", fh, offset,
+ count, -1);
nfs3_validate_gluster_fh (fh, stat, nfs3err);
nfs3_validate_nfs3_state (req, nfs3, stat, nfs3err, ret);
nfs3_map_fh_to_volume (nfs3, fh, req, vol, stat, nfs3err);
- nfs3_check_rw_volaccess (nfs3, fh->xlatorid, stat, nfs3err);
+ nfs3_volume_started_check (nfs3, vol, ret, out);
+ nfs3_check_rw_volaccess (nfs3, fh->exportid, stat, nfs3err);
nfs3_handle_call_state_init (nfs3, cs, req, vol, stat, nfs3err);
cs->datacount = count;
@@ -4560,13 +5189,14 @@ nfs3_commit (rpcsvc_request_t *req, struct nfs3_fh *fh, offset3 offset,
nfs3err:
if (ret < 0) {
- nfs3_log_common_res (rpcsvc_request_xid (req), "COMMIT", stat,
- -ret);
+ nfs3_log_common_res (rpcsvc_request_xid (req),
+ NFS3_COMMIT, stat, -ret,
+ cs ? cs->resolvedloc.path : NULL);
nfs3_commit_reply (req, stat, 0, NULL, NULL);
nfs3_call_state_wipe (cs);
ret = 0;
}
-
+out:
return ret;
}
@@ -4582,15 +5212,17 @@ nfs3svc_commit (rpcsvc_request_t *req)
if (!req)
return ret;
nfs3_prep_commit3args (&args, &fh);
- if (xdr_to_commit3args (req->msg, &args) <= 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
+ if (xdr_to_commit3args (req->msg[0], &args) <= 0) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_ARGS_DECODE_ERROR,
+ "Error decoding args");
rpcsvc_request_seterr (req, GARBAGE_ARGS);
goto rpcerr;
}
ret = nfs3_commit (req, &fh, args.offset, args.count);
- if (ret < 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "COMMIT procedure failed");
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_COMMIT_FAIL,
+ "COMMIT procedure failed");
rpcsvc_request_seterr (req, SYSTEM_ERR);
ret = RPCSVC_ACTOR_ERROR;
}
@@ -4601,28 +5233,28 @@ rpcerr:
rpcsvc_actor_t nfs3svc_actors[NFS3_PROC_COUNT] = {
- {"NULL", NFS3_NULL, nfs3svc_null, NULL, NULL},
- {"GETATTR", NFS3_GETATTR, nfs3svc_getattr,NULL, NULL},
- {"SETATTR", NFS3_SETATTR, nfs3svc_setattr,NULL, NULL},
- {"LOOKUP", NFS3_LOOKUP, nfs3svc_lookup, NULL, NULL},
- {"ACCESS", NFS3_ACCESS, nfs3svc_access, NULL, NULL},
- {"READLINK", NFS3_READLINK, nfs3svc_readlink,NULL, NULL},
- {"READ", NFS3_READ, nfs3svc_read, NULL, NULL},
- {"WRITE", NFS3_WRITE, nfs3svc_write, nfs3svc_write_vec, nfs3svc_write_vecsizer},
- {"CREATE", NFS3_CREATE, nfs3svc_create, NULL, NULL},
- {"MKDIR", NFS3_MKDIR, nfs3svc_mkdir, NULL, NULL},
- {"SYMLINK", NFS3_SYMLINK, nfs3svc_symlink,NULL, NULL},
- {"MKNOD", NFS3_MKNOD, nfs3svc_mknod, NULL, NULL},
- {"REMOVE", NFS3_REMOVE, nfs3svc_remove, NULL, NULL},
- {"RMDIR", NFS3_RMDIR, nfs3svc_rmdir, NULL, NULL},
- {"RENAME", NFS3_RENAME, nfs3svc_rename, NULL, NULL},
- {"LINK", NFS3_LINK, nfs3svc_link, NULL, NULL},
- {"READDIR", NFS3_READDIR, nfs3svc_readdir,NULL, NULL},
- {"READDIRPLUS", NFS3_READDIRP, nfs3svc_readdirp,NULL, NULL},
- {"FSSTAT", NFS3_FSSTAT, nfs3svc_fsstat, NULL, NULL},
- {"FSINFO", NFS3_FSINFO, nfs3svc_fsinfo, NULL, NULL},
- {"PATHCONF", NFS3_PATHCONF, nfs3svc_pathconf,NULL, NULL},
- {"COMMIT", NFS3_COMMIT, nfs3svc_commit, NULL, NULL}
+ {"NULL", NFS3_NULL, nfs3svc_null, NULL, 0, DRC_IDEMPOTENT},
+ {"GETATTR", NFS3_GETATTR, nfs3svc_getattr, NULL, 0, DRC_IDEMPOTENT},
+ {"SETATTR", NFS3_SETATTR, nfs3svc_setattr, NULL, 0, DRC_NON_IDEMPOTENT},
+ {"LOOKUP", NFS3_LOOKUP, nfs3svc_lookup, NULL, 0, DRC_IDEMPOTENT},
+ {"ACCESS", NFS3_ACCESS, nfs3svc_access, NULL, 0, DRC_IDEMPOTENT},
+ {"READLINK", NFS3_READLINK, nfs3svc_readlink, NULL, 0, DRC_IDEMPOTENT},
+ {"READ", NFS3_READ, nfs3svc_read, NULL, 0, DRC_IDEMPOTENT},
+ {"WRITE", NFS3_WRITE, nfs3svc_write, nfs3svc_write_vecsizer, 0, DRC_NON_IDEMPOTENT},
+ {"CREATE", NFS3_CREATE, nfs3svc_create, NULL, 0, DRC_NON_IDEMPOTENT},
+ {"MKDIR", NFS3_MKDIR, nfs3svc_mkdir, NULL, 0, DRC_NON_IDEMPOTENT},
+ {"SYMLINK", NFS3_SYMLINK, nfs3svc_symlink, NULL, 0, DRC_NON_IDEMPOTENT},
+ {"MKNOD", NFS3_MKNOD, nfs3svc_mknod, NULL, 0, DRC_NON_IDEMPOTENT},
+ {"REMOVE", NFS3_REMOVE, nfs3svc_remove, NULL, 0, DRC_NON_IDEMPOTENT},
+ {"RMDIR", NFS3_RMDIR, nfs3svc_rmdir, NULL, 0, DRC_NON_IDEMPOTENT},
+ {"RENAME", NFS3_RENAME, nfs3svc_rename, NULL, 0, DRC_NON_IDEMPOTENT},
+ {"LINK", NFS3_LINK, nfs3svc_link, NULL, 0, DRC_NON_IDEMPOTENT},
+ {"READDIR", NFS3_READDIR, nfs3svc_readdir, NULL, 0, DRC_IDEMPOTENT},
+ {"READDIRPLUS", NFS3_READDIRP, nfs3svc_readdirp, NULL, 0, DRC_IDEMPOTENT},
+ {"FSSTAT", NFS3_FSSTAT, nfs3svc_fsstat, NULL, 0, DRC_IDEMPOTENT},
+ {"FSINFO", NFS3_FSINFO, nfs3svc_fsinfo, NULL, 0, DRC_IDEMPOTENT},
+ {"PATHCONF", NFS3_PATHCONF, nfs3svc_pathconf, NULL, 0, DRC_IDEMPOTENT},
+ {"COMMIT", NFS3_COMMIT, nfs3svc_commit, NULL, 0, DRC_IDEMPOTENT}
};
@@ -4631,92 +5263,120 @@ rpcsvc_program_t nfs3prog = {
.prognum = NFS_PROGRAM,
.progver = NFS_V3,
.progport = GF_NFS3_PORT,
- .progaddrfamily = AF_INET,
- .proghost = NULL,
.actors = nfs3svc_actors,
.numactors = NFS3_PROC_COUNT,
- .conn_destroy = NULL,
- .conn_init = NULL,
/* Requests like FSINFO are sent before an auth scheme
* is inited by client. See RFC 2623, Section 2.3.2. */
.min_auth = AUTH_NULL,
};
+/*
+ * This function rounds up the input value to multiple of 4096. Min and Max
+ * supported I/O size limits are 4KB (GF_NFS3_FILE_IO_SIZE_MIN) and
+ * 1MB (GF_NFS3_FILE_IO_SIZE_MAX).
+ */
+void
+nfs3_iosize_roundup_4KB (uint64_t *ioszptr)
+{
+ uint64_t iosize;
+ uint64_t iopages;
+
+ if (!ioszptr)
+ return;
+
+ iosize = *ioszptr;
+ iopages = (iosize + GF_NFS3_IO_SIZE -1) >> GF_NFS3_IO_SHIFT;
+ iosize = (iopages * GF_NFS3_IO_SIZE);
+
+ /* Double check - boundary conditions */
+ if (iosize < GF_NFS3_FILE_IO_SIZE_MIN) {
+ iosize = GF_NFS3_FILE_IO_SIZE_MIN;
+ } else if (iosize > GF_NFS3_FILE_IO_SIZE_MAX) {
+ iosize = GF_NFS3_FILE_IO_SIZE_MAX;
+ }
+
+ *ioszptr = iosize;
+}
int
-nfs3_init_options (struct nfs3_state *nfs3, xlator_t *nfsx)
+nfs3_init_options (struct nfs3_state *nfs3, dict_t *options)
{
int ret = -1;
char *optstr = NULL;
uint64_t size64 = 0;
- if ((!nfs3) || (!nfsx))
+ if ((!nfs3) || (!options))
return -1;
/* nfs3.read-size */
nfs3->readsize = GF_NFS3_RTPREF;
- if (dict_get (nfsx->options, "nfs3.read-size")) {
- ret = dict_get_str (nfsx->options, "nfs3.read-size", &optstr);
+ if (dict_get (options, "nfs3.read-size")) {
+ ret = dict_get_str (options, "nfs3.read-size", &optstr);
if (ret < 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to read "
- " option: nfs3.read-size");
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_READ_FAIL,
+ "Failed to read option: nfs3.read-size");
ret = -1;
goto err;
}
- ret = gf_string2bytesize (optstr, &size64);
- nfs3->readsize = size64;
+ ret = gf_string2uint64 (optstr, &size64);
if (ret == -1) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to format"
- " option: nfs3.read-size");
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_FORMAT_FAIL,
+ "Failed to format option: nfs3.read-size");
ret = -1;
goto err;
}
+
+ nfs3_iosize_roundup_4KB (&size64);
+ nfs3->readsize = size64;
}
/* nfs3.write-size */
nfs3->writesize = GF_NFS3_WTPREF;
- if (dict_get (nfsx->options, "nfs3.write-size")) {
- ret = dict_get_str (nfsx->options, "nfs3.write-size", &optstr);
+ if (dict_get (options, "nfs3.write-size")) {
+ ret = dict_get_str (options, "nfs3.write-size", &optstr);
if (ret < 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to read "
- " option: nfs3.write-size");
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_READ_FAIL,
+ "Failed to read option: nfs3.write-size");
ret = -1;
goto err;
}
- ret = gf_string2bytesize (optstr, &size64);
- nfs3->writesize = size64;
+ ret = gf_string2uint64 (optstr, &size64);
if (ret == -1) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to format"
- " option: nfs3.write-size");
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_FORMAT_FAIL,
+ "Failed to format option: nfs3.write-size");
ret = -1;
goto err;
}
+
+ nfs3_iosize_roundup_4KB (&size64);
+ nfs3->writesize = size64;
}
/* nfs3.readdir.size */
nfs3->readdirsize = GF_NFS3_DTPREF;
- if (dict_get (nfsx->options, "nfs3.readdir-size")) {
- ret = dict_get_str (nfsx->options,"nfs3.readdir-size", &optstr);
+ if (dict_get (options, "nfs3.readdir-size")) {
+ ret = dict_get_str (options,"nfs3.readdir-size", &optstr);
if (ret < 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to read"
- " option: nfs3.readdir-size");
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_READ_FAIL,
+ "Failed to read option: nfs3.readdir-size");
ret = -1;
goto err;
}
- ret = gf_string2bytesize (optstr, &size64);
- nfs3->readdirsize = size64;
+ ret = gf_string2uint64 (optstr, &size64);
if (ret == -1) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to format"
- " option: nfs3.readdir-size");
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_FORMAT_FAIL,
+ "Failed to format option: nfs3.readdir-size");
ret = -1;
goto err;
}
- }
+ nfs3_iosize_roundup_4KB (&size64);
+ nfs3->readdirsize = size64;
+ }
/* We want to use the size of the biggest param for the io buffer size.
*/
@@ -4727,33 +5387,84 @@ nfs3_init_options (struct nfs3_state *nfs3, xlator_t *nfsx)
nfs3->iobsize = nfs3->readdirsize;
/* But this is the true size of each iobuf. We need this size to
- * accomodate the NFS headers also in the same buffer. */
+ * accommodate the NFS headers also in the same buffer. */
nfs3->iobsize = nfs3->iobsize * 2;
- /* mem-factor */
- nfs3->memfactor = GF_NFS3_DEFAULT_MEMFACTOR;
ret = 0;
err:
return ret;
}
int
-nfs3_init_subvolume_options (struct nfs3_export *exp, dict_t *options)
+nfs3_init_subvolume_options (xlator_t *nfsx,
+ struct nfs3_export *exp,
+ dict_t *options)
{
int ret = -1;
char *optstr = NULL;
char searchkey[1024];
char *name = NULL;
gf_boolean_t boolt = _gf_false;
+ uuid_t volumeid = {0, };
- if ((!exp) || (!options))
+ if ((!nfsx) || (!exp))
return -1;
+ /* For init, fetch options from xlator but for
+ * reconfigure, take the parameter */
+ if (!options)
+ options = nfsx->options;
+
+ if (!options)
+ return (-1);
+
+ gf_uuid_clear (volumeid);
+ if (gf_nfs_dvm_off (nfs_state (nfsx)))
+ goto no_dvm;
+
+ ret = snprintf (searchkey, 1024, "nfs3.%s.volume-id",exp->subvol->name);
+ if (ret < 0) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, 0, NFS_MSG_SNPRINTF_FAIL,
+ "snprintf failed");
+ ret = -1;
+ goto err;
+ }
+
+ if (dict_get (options, searchkey)) {
+ ret = dict_get_str (options, searchkey, &optstr);
+ if (ret < 0) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, 0, NFS_MSG_READ_FAIL,
+ "Failed to read option: %s", searchkey);
+ ret = -1;
+ goto err;
+ }
+ } else {
+ gf_msg (GF_MNT, GF_LOG_ERROR, 0, NFS_MSG_VOLID_MISSING, "DVM is"
+ " on but volume-id not given for volume: %s",
+ exp->subvol->name);
+ ret = -1;
+ goto err;
+ }
+
+ if (optstr) {
+ ret = gf_uuid_parse (optstr, volumeid);
+ if (ret < 0) {
+ gf_msg (GF_MNT, GF_LOG_ERROR, 0,
+ NFS_MSG_PARSE_VOL_UUID_FAIL,
+ "Failed to parse volume UUID");
+ ret = -1;
+ goto err;
+ }
+ gf_uuid_copy (exp->volumeid, volumeid);
+ }
+
+no_dvm:
/* Volume Access */
name = exp->subvol->name;
ret = snprintf (searchkey, 1024, "nfs3.%s.volume-access", name);
if (ret < 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "snprintf failed");
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_SNPRINTF_FAIL,
+ "snprintf failed");
ret = -1;
goto err;
}
@@ -4762,8 +5473,8 @@ nfs3_init_subvolume_options (struct nfs3_export *exp, dict_t *options)
if (dict_get (options, searchkey)) {
ret = dict_get_str (options, searchkey, &optstr);
if (ret < 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to read "
- " option: %s", searchkey);
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_READ_FAIL,
+ "Failed to read option: %s", searchkey);
ret = -1;
goto err;
}
@@ -4774,7 +5485,8 @@ nfs3_init_subvolume_options (struct nfs3_export *exp, dict_t *options)
ret = snprintf (searchkey, 1024, "rpc-auth.%s.unix", name);
if (ret < 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "snprintf failed");
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_SNPRINTF_FAIL,
+ "snprintf failed");
ret = -1;
goto err;
}
@@ -4782,8 +5494,8 @@ nfs3_init_subvolume_options (struct nfs3_export *exp, dict_t *options)
if (dict_get (options, searchkey)) {
ret = dict_get_str (options, searchkey, &optstr);
if (ret < 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to read "
- " option: %s", searchkey);
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_READ_FAIL,
+ "Failed to read option: %s", searchkey);
ret = -1;
goto err;
}
@@ -4792,7 +5504,8 @@ nfs3_init_subvolume_options (struct nfs3_export *exp, dict_t *options)
exp->trusted_sync = 0;
ret = snprintf (searchkey, 1024, "nfs3.%s.trusted-sync", name);
if (ret < 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "snprintf failed");
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_SNPRINTF_FAIL,
+ "snprintf failed");
ret = -1;
goto err;
}
@@ -4800,15 +5513,16 @@ nfs3_init_subvolume_options (struct nfs3_export *exp, dict_t *options)
if (dict_get (options, searchkey)) {
ret = dict_get_str (options, searchkey, &optstr);
if (ret < 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to read "
- " option: %s", searchkey);
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_READ_FAIL,
+ "Failed to read option: %s", searchkey);
ret = -1;
goto err;
}
ret = gf_string2boolean (optstr, &boolt);
if (ret < 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to convert str "
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0,
+ NFS_MSG_STR2BOOL_FAIL, "Failed to convert str "
"to gf_boolean_t");
ret = -1;
goto err;
@@ -4821,7 +5535,8 @@ nfs3_init_subvolume_options (struct nfs3_export *exp, dict_t *options)
exp->trusted_write = 0;
ret = snprintf (searchkey, 1024, "nfs3.%s.trusted-write", name);
if (ret < 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "snprintf failed");
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_SNPRINTF_FAIL,
+ "snprintf failed");
ret = -1;
goto err;
}
@@ -4829,16 +5544,17 @@ nfs3_init_subvolume_options (struct nfs3_export *exp, dict_t *options)
if (dict_get (options, searchkey)) {
ret = dict_get_str (options, searchkey, &optstr);
if (ret < 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to read "
- " option: %s", searchkey);
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_READ_FAIL,
+ "Failed to read option: %s", searchkey);
ret = -1;
goto err;
}
ret = gf_string2boolean (optstr, &boolt);
if (ret < 0) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to convert str "
- "to gf_boolean_t");
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0,
+ NFS_MSG_STR2BOOL_FAIL, "Failed to convert str"
+ " to gf_boolean_t");
ret = -1;
goto err;
}
@@ -4854,7 +5570,7 @@ nfs3_init_subvolume_options (struct nfs3_export *exp, dict_t *options)
if (exp->trusted_sync)
exp->trusted_write = 1;
- gf_log (GF_NFS3, GF_LOG_TRACE, "%s: %s, %s, %s", exp->subvol->name,
+ gf_msg_trace (GF_NFS3, 0, "%s: %s, %s, %s", exp->subvol->name,
(exp->access == GF_NFS3_VOLACCESS_RO)?"read-only":"read-write",
(exp->trusted_sync == 0)?"no trusted_sync":"trusted_sync",
(exp->trusted_write == 0)?"no trusted_write":"trusted_write");
@@ -4864,64 +5580,60 @@ err:
}
-int
-nfs3_init_subvolume (struct nfs3_state *nfs3, xlator_t *nfsx, xlator_t *subvol,
- int xlid)
+struct nfs3_export *
+nfs3_init_subvolume (struct nfs3_state *nfs3, xlator_t *subvol)
{
int ret = -1;
struct nfs3_export *exp = NULL;
- if ((!nfs3) || (!nfsx) || (!subvol))
- return -1;
+ if ((!nfs3) || (!subvol))
+ return NULL;
- exp = &nfs3->exports[xlid];
+ exp = GF_CALLOC (1, sizeof (*exp), gf_nfs_mt_nfs3_export);
exp->subvol = subvol;
+ INIT_LIST_HEAD (&exp->explist);
+ gf_msg_trace (GF_NFS3, 0, "Initing state: %s", exp->subvol->name);
- gf_log (GF_NFS3, GF_LOG_TRACE, "Initing state: %s", exp->subvol->name);
+ ret = nfs3_init_subvolume_options (nfs3->nfsx, exp, NULL);
+ if (ret == -1) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_SUBVOL_INIT_FAIL,
+ "Failed to init subvol");
+ goto exp_free;
+ }
- ret = nfs3_init_subvolume_options (exp, nfsx->options);
- if (ret == -1)
- gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to init subvol");
+ ret = 0;
+exp_free:
+ if (ret < 0) {
+ GF_FREE (exp);
+ exp = NULL;
+ }
- return ret;
+ return exp;
}
int
-nfs3_init_subvolumes (struct nfs3_state *nfs3, xlator_t *nfsx)
+nfs3_init_subvolumes (struct nfs3_state *nfs3)
{
- int xl_count = 0;
int ret = -1;
struct xlator_list *xl_list = NULL;
+ struct nfs3_export *exp = NULL;
- if ((!nfs3) || (!nfsx))
+ if (!nfs3)
return -1;
- xl_list = nfsx->children;
- while (xl_list) {
- ++xl_count;
- xl_list = xl_list->next;
- }
+ xl_list = nfs3->nfsx->children;
- nfs3->exports = GF_CALLOC (xl_count, sizeof (struct nfs3_export),
- gf_nfs_mt_nfs3_export);
- if (!nfs3->exports) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Memory allocation failed");
- goto err;
- }
-
- xl_list = nfsx->children;
- xl_count = 0; /* Re-using xl_count. */
while (xl_list) {
- ret = nfs3_init_subvolume (nfs3, nfsx, xl_list->xlator,
- xl_count);
- if (ret == -1) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to init subvol: "
- "%s", xl_list->xlator->name);
+ exp = nfs3_init_subvolume (nfs3, xl_list->xlator);
+ if (!exp) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0,
+ NFS_MSG_SUBVOL_INIT_FAIL, "Failed to init "
+ "subvol: %s", xl_list->xlator->name);
goto err;
}
+ list_add_tail (&exp->explist, &nfs3->exports);
xl_list = xl_list->next;
- ++xl_count;
}
ret = 0;
@@ -4936,41 +5648,46 @@ nfs3_init_state (xlator_t *nfsx)
struct nfs3_state *nfs3 = NULL;
int ret = -1;
unsigned int localpool = 0;
+ struct nfs_state *nfs = NULL;
-
- if (!nfsx)
+ if ((!nfsx) || (!nfsx->private))
return NULL;
nfs3 = (struct nfs3_state *)GF_CALLOC (1, sizeof (*nfs3),
gf_nfs_mt_nfs3_state);
if (!nfs3) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Memory allocation failed");
+ gf_msg (GF_NFS3, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Memory allocation failed");
return NULL;
}
- ret = nfs3_init_options (nfs3, nfsx);
+ nfs = nfsx->private;
+ ret = nfs3_init_options (nfs3, nfsx->options);
if (ret == -1) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to init options");
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_OPT_INIT_FAIL,
+ "Failed to init options");
goto ret;
}
nfs3->iobpool = nfsx->ctx->iobuf_pool;
- localpool = nfs3->memfactor * GF_NFS_CONCURRENT_OPS_MULT;
- gf_log (GF_NFS3, GF_LOG_TRACE, "local pool: %d", localpool);
+ localpool = nfs->memfactor * GF_NFS_CONCURRENT_OPS_MULT;
+ gf_msg_trace (GF_NFS3, 0, "local pool: %d", localpool);
nfs3->localpool = mem_pool_new (nfs3_call_state_t, localpool);
if (!nfs3->localpool) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "local mempool creation failed");
+ gf_msg (GF_NFS3, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "local mempool creation failed");
ret = -1;
goto ret;
}
nfs3->nfsx = nfsx;
nfs3->exportslist = nfsx->children;
- ret = nfs3_init_subvolumes (nfs3, nfsx);
+ INIT_LIST_HEAD (&nfs3->exports);
+ ret = nfs3_init_subvolumes (nfs3);
if (ret == -1) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to init per-subvolume "
- "state");
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_SUBVOL_INIT_FAIL,
+ "Failed to init per-subvolume state");
goto free_localpool;
}
@@ -4979,6 +5696,14 @@ nfs3_init_state (xlator_t *nfsx)
LOCK_INIT (&nfs3->fdlrulock);
nfs3->fdcount = 0;
+ ret = rpcsvc_create_listeners (nfs->rpcsvc, nfsx->options, nfsx->name);
+ if (ret == -1) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, 0, NFS_MSG_LISTENERS_CREATE_FAIL,
+ "Unable to create listeners");
+ goto free_localpool;
+ }
+
+ nfs->nfs3state = nfs3;
ret = 0;
free_localpool:
@@ -5005,7 +5730,8 @@ nfs3svc_init (xlator_t *nfsx)
nfs3 = nfs3_init_state (nfsx);
if (!nfs3) {
- gf_log (GF_NFS3, GF_LOG_ERROR, "NFSv3 state init failed");
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_STATE_INIT_FAIL,
+ "NFSv3 state init failed");
return NULL;
}
@@ -5014,4 +5740,40 @@ nfs3svc_init (xlator_t *nfsx)
return &nfs3prog;
}
+int
+nfs3_reconfigure_state (xlator_t *nfsx, dict_t *options)
+{
+ int ret = -1;
+ struct nfs3_export *exp = NULL;
+ struct nfs_state *nfs = NULL;
+ struct nfs3_state *nfs3 = NULL;
+
+ if ((!nfsx) || (!nfsx->private) || (!options))
+ goto out;
+
+ nfs = (struct nfs_state *)nfsx->private;
+ nfs3 = nfs->nfs3state;
+ if (!nfs3)
+ goto out;
+
+ ret = nfs3_init_options (nfs3, options);
+ if (ret) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0, NFS_MSG_RECONF_FAIL,
+ "Failed to reconfigure options");
+ goto out;
+ }
+
+ list_for_each_entry (exp, &nfs3->exports, explist) {
+ ret = nfs3_init_subvolume_options (nfsx, exp, options);
+ if (ret) {
+ gf_msg (GF_NFS3, GF_LOG_ERROR, 0,
+ NFS_MSG_RECONF_SUBVOL_FAIL,
+ "Failed to reconfigure subvol options");
+ goto out;
+ }
+ }
+ ret = 0;
+out:
+ return ret;
+}
diff --git a/xlators/nfs/server/src/nfs3.h b/xlators/nfs/server/src/nfs3.h
index ccdad447735..4cb3e67528d 100644
--- a/xlators/nfs/server/src/nfs3.h
+++ b/xlators/nfs/server/src/nfs3.h
@@ -1,30 +1,16 @@
/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _NFS3_H_
#define _NFS3_H_
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "rpcsvc.h"
#include "dict.h"
#include "xlator.h"
@@ -34,11 +20,12 @@
#include "nfs-common.h"
#include "xdr-nfs3.h"
#include "mem-pool.h"
-
+#include "nlm4.h"
+#include "acl3-xdr.h"
+#include "acl3.h"
#include <sys/statvfs.h>
#define GF_NFS3 GF_NFS"-nfsv3"
-#define GF_NFS3_PORT 38467
#define GF_NFS3_DEFAULT_MEMFACTOR 15
#define GF_NFS3_IOBPOOL_MULT GF_NFS_CONCURRENT_OPS_MULT
@@ -47,16 +34,39 @@
/* Static values used for FSINFO
-FIXME: This should be configurable */
-#define GF_NFS3_RTMAX (64 * GF_UNIT_KB)
-#define GF_NFS3_RTPREF (64 * GF_UNIT_KB)
-#define GF_NFS3_RTMULT (4 * GF_UNIT_KB)
-#define GF_NFS3_WTMAX (64 * GF_UNIT_KB)
-#define GF_NFS3_WTPREF (64 * GF_UNIT_KB)
-#define GF_NFS3_WTMULT (4 * GF_UNIT_KB)
-#define GF_NFS3_DTMIN (4 * GF_UNIT_KB)
-#define GF_NFS3_DTPREF (64 * GF_UNIT_KB)
-#define GF_NFS3_MAXFILE (1 * GF_UNIT_PB)
+ * To change the maximum rsize and wsize supported by the NFS client, adjust
+ * GF_NFS3_FILE_IO_SIZE_MAX. The Gluster NFS server defaults to 1MB(1048576)
+ * (same as kernel NFS server). For slower network, rsize/wsize can be trimmed
+ * to 16/32/64-KB. rsize and wsize can be tuned through nfs.read-size and
+ * nfs.write-size respectively.
+ *
+ * NB: For Kernel-NFS, NFS_MAX_FILE_IO_SIZE is 1048576U (1MB).
+ */
+#define GF_NFS3_FILE_IO_SIZE_MAX (1 * GF_UNIT_MB) /* 1048576 */
+#define GF_NFS3_FILE_IO_SIZE_MIN (4 * GF_UNIT_KB) /* 4096 */
+
+#define GF_NFS3_FILE_IO_SIZE_DEF GF_NFS3_FILE_IO_SIZE_MAX
+
+#define GF_NFS3_RTMAX GF_NFS3_FILE_IO_SIZE_MAX
+#define GF_NFS3_RTMIN GF_NFS3_FILE_IO_SIZE_MIN
+#define GF_NFS3_RTPREF GF_NFS3_FILE_IO_SIZE_DEF
+#define GF_NFS3_RTMULT GF_NFS3_FILE_IO_SIZE_MIN
+
+#define GF_NFS3_WTMAX GF_NFS3_FILE_IO_SIZE_MAX
+#define GF_NFS3_WTMIN GF_NFS3_FILE_IO_SIZE_MIN
+#define GF_NFS3_WTPREF GF_NFS3_FILE_IO_SIZE_DEF
+#define GF_NFS3_WTMULT GF_NFS3_FILE_IO_SIZE_MIN
+
+/* This can be tuned through nfs.readdir-size */
+#define GF_NFS3_DTMAX GF_NFS3_FILE_IO_SIZE_MAX
+#define GF_NFS3_DTMIN GF_NFS3_FILE_IO_SIZE_MIN
+#define GF_NFS3_DTPREF GF_NFS3_FILE_IO_SIZE_DEF
+
+#define GF_NFS3_MAXFILESIZE (1 * GF_UNIT_PB)
+
+#define GF_NFS3_IO_SIZE 4096 /* 4-KB */
+#define GF_NFS3_IO_SHIFT 12 /* 2^12 = 4KB */
+
/* FIXME: Handle time resolutions */
#define GF_NFS3_TIMEDELTA_SECS {1,0}
#define GF_NFS3_TIMEDELTA_NSECS {0,1}
@@ -82,16 +92,19 @@ struct nfs3_fd_entry {
/* Per subvolume nfs3 specific state */
struct nfs3_export {
+ struct list_head explist;
xlator_t *subvol;
+ uuid_t volumeid;
int access;
int trusted_sync;
int trusted_write;
+ int rootlookedup;
};
#define GF_NFS3_DEFAULT_VOLACCESS (GF_NFS3_VOLACCESS_RW)
/* The NFSv3 protocol state */
-struct nfs3_state {
+typedef struct nfs3_state {
/* The NFS xlator pointer. The NFS xlator can be running
* multiple versions of the NFS protocol.
@@ -108,7 +121,7 @@ struct nfs3_state {
*/
xlator_list_t *exportslist;
- struct nfs3_export *exports;
+ struct list_head exports;
/* Mempool for allocations of struct nfs3_local */
struct mem_pool *localpool;
@@ -116,21 +129,46 @@ struct nfs3_state {
uint64_t serverstart;
/* NFSv3 Protocol configurables */
- size_t readsize;
- size_t writesize;
- size_t readdirsize;
+ uint64_t readsize;
+ uint64_t writesize;
+ uint64_t readdirsize;
/* Size of the iobufs used, depends on the sizes of the three params
* above.
*/
- size_t iobsize;
-
- unsigned int memfactor;
+ uint64_t iobsize;
struct list_head fdlru;
gf_lock_t fdlrulock;
int fdcount;
-};
+ uint32_t occ_logger;
+} nfs3_state_t;
+
+typedef enum nfs3_lookup_type {
+ GF_NFS3_REVALIDATE = 1,
+ GF_NFS3_FRESH,
+} nfs3_lookup_type_t;
+
+typedef union args_ {
+ nlm4_stat nlm4_stat;
+ nlm4_holder nlm4_holder;
+ nlm4_lock nlm4_lock;
+ nlm4_share nlm4_share;
+ nlm4_testrply nlm4_testrply;
+ nlm4_testres nlm4_testres;
+ nlm4_testargs nlm4_testargs;
+ nlm4_res nlm4_res;
+ nlm4_lockargs nlm4_lockargs;
+ nlm4_cancargs nlm4_cancargs;
+ nlm4_unlockargs nlm4_unlockargs;
+ nlm4_shareargs nlm4_shareargs;
+ nlm4_shareres nlm4_shareres;
+ nlm4_freeallargs nlm4_freeallargs;
+ getaclargs getaclargs;
+ setaclargs setaclargs;
+ getaclreply getaclreply;
+ setaclreply setaclreply;
+} args;
typedef int (*nfs3_resume_fn_t) (void *cs);
@@ -177,6 +215,7 @@ struct nfs3_local {
count3 datacount;
offset3 dataoffset;
struct iobuf *iob;
+ struct iobref *iobref;
createmode3 createmode;
uint64_t cookieverf;
int sattrguardcheck;
@@ -186,8 +225,10 @@ struct nfs3_local {
cookie3 cookie;
struct iovec datavec;
mode_t mode;
+ struct iatt attr_in;
/* NFSv3 FH resolver state */
+ int hardresolved;
struct nfs3_fh resolvefh;
loc_t resolvedloc;
int resolve_ret;
@@ -195,11 +236,48 @@ struct nfs3_local {
int hashidx;
fd_t *resolve_dir_fd;
char *resolventry;
+ nfs3_lookup_type_t lookuptype;
+ gf_dirent_t *hashmatch;
+ gf_dirent_t *entrymatch;
+ off_t lastentryoffset;
+ struct flock flock;
+ args args;
+ nlm4_lkowner_t lkowner;
+ char cookiebytes[1024];
+ struct nfs3_fh lockfh;
+ int monitor;
+ rpc_transport_t *trans;
+ call_frame_t *frame;
+
+ /* ACL */
+ aclentry aclentry[NFS_ACL_MAX_ENTRIES];
+ aclentry daclentry[NFS_ACL_MAX_ENTRIES];
+ int aclcount;
+ char aclxattr[NFS_ACL_MAX_ENTRIES*8 + 4];
+ int daclcount;
+ char daclxattr[NFS_ACL_MAX_ENTRIES*8 + 4];
};
+#define nfs3_is_revalidate_lookup(cst) ((cst)->lookuptype == GF_NFS3_REVALIDATE)
+#define nfs3_lookup_op(cst) (rpcsvc_request_procnum(cst->req) == NFS3_LOOKUP)
+#define nfs3_create_op(cst) (rpcsvc_request_procnum(cst->req) == NFS3_CREATE)
+#define nfs3_create_exclusive_op(cst) ((cst)->createmode == EXCLUSIVE)
+
typedef struct nfs3_local nfs3_call_state_t;
+/* Queue of ops waiting for open fop to return. */
+struct inode_op_queue {
+ struct list_head opq;
+ pthread_mutex_t qlock;
+};
extern rpcsvc_program_t *
nfs3svc_init (xlator_t *nfsx);
+
+extern int
+nfs3_reconfigure_state (xlator_t *nfsx, dict_t *options);
+
+extern uint64_t
+nfs3_request_xlator_deviceid (rpcsvc_request_t *req);
+
#endif
diff --git a/xlators/nfs/server/src/nfsserver.sym b/xlators/nfs/server/src/nfsserver.sym
new file mode 100644
index 00000000000..2126634962a
--- /dev/null
+++ b/xlators/nfs/server/src/nfsserver.sym
@@ -0,0 +1,20 @@
+init
+fini
+fops
+cbks
+options
+notify
+mem_acct_init
+reconfigure
+dumpops
+exp_file_parse
+exp_file_print
+exp_file_get_dir
+exp_dir_get_host
+exp_dir_get_netgroup
+exp_file_dir_from_uuid
+exp_file_deinit
+ng_file_parse
+ng_file_get_netgroup
+ng_file_print
+ng_file_deinit
diff --git a/xlators/nfs/server/src/nlm4.c b/xlators/nfs/server/src/nlm4.c
new file mode 100644
index 00000000000..3da3b2d1c05
--- /dev/null
+++ b/xlators/nfs/server/src/nlm4.c
@@ -0,0 +1,2621 @@
+/*
+ Copyright (c) 2012 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "defaults.h"
+#include "rpcsvc.h"
+#include "dict.h"
+#include "xlator.h"
+#include "nfs.h"
+#include "mem-pool.h"
+#include "logging.h"
+#include "syscall.h"
+#include "nfs-fops.h"
+#include "inode.h"
+#include "mount3.h"
+#include "nfs3.h"
+#include "nfs-mem-types.h"
+#include "nfs3-helpers.h"
+#include "nfs3-fh.h"
+#include "nlm4.h"
+#include "nlm4-xdr.h"
+#include "msg-nfs3.h"
+#include "nfs-generics.h"
+#include "rpc-clnt.h"
+#include "nsm-xdr.h"
+#include "run.h"
+#include "nfs-messages.h"
+#include <unistd.h>
+#include <rpc/pmap_clnt.h>
+#include <rpc/rpc.h>
+#include <rpc/xdr.h>
+#include <statedump.h>
+
+#ifdef __NetBSD__
+#define KILLALL_CMD "pkill"
+#else
+#define KILLALL_CMD "killall"
+#endif
+
+/* TODO:
+ * 1) 2 opens racing .. creating an fd leak.
+ * 2) use mempool for nlmclnt - destroy if no fd exists, create during 1st call
+ */
+
+typedef ssize_t (*nlm4_serializer) (struct iovec outmsg, void *args);
+
+extern void nfs3_call_state_wipe (nfs3_call_state_t *cs);
+
+struct list_head nlm_client_list;
+gf_lock_t nlm_client_list_lk;
+
+/* race on this is harmless */
+int nlm_grace_period = 50;
+
+#define nlm4_validate_nfs3_state(request, state, status, label, retval) \
+ do { \
+ state = rpcsvc_request_program_private (request); \
+ if (!state) { \
+ gf_msg (GF_NLM, GF_LOG_ERROR, errno, \
+ NFS_MSG_STATE_MISSING, "NFSv3 state " \
+ "missing from RPC request"); \
+ rpcsvc_request_seterr (req, SYSTEM_ERR); \
+ status = nlm4_failed; \
+ goto label; \
+ } \
+ } while (0); \
+
+nfs3_call_state_t *
+nfs3_call_state_init (struct nfs3_state *s, rpcsvc_request_t *req, xlator_t *v);
+
+#define nlm4_handle_call_state_init(nfs3state, calls, rq, opstat, errlabel)\
+ do { \
+ calls = nlm4_call_state_init ((nfs3state), (rq)); \
+ if (!calls) { \
+ gf_msg (GF_NLM, GF_LOG_ERROR, errno, \
+ NFS_MSG_INIT_CALL_STAT_FAIL, "Failed to "\
+ "init call state"); \
+ opstat = nlm4_failed; \
+ rpcsvc_request_seterr (req, SYSTEM_ERR); \
+ goto errlabel; \
+ } \
+ } while (0) \
+
+#define nlm4_validate_gluster_fh(handle, status, errlabel) \
+ do { \
+ if (!nfs3_fh_validate (handle)) { \
+ status = nlm4_stale_fh; \
+ goto errlabel; \
+ } \
+ } while (0) \
+
+xlator_t *
+nfs3_fh_to_xlator (struct nfs3_state *nfs3, struct nfs3_fh *fh);
+
+#define nlm4_map_fh_to_volume(nfs3state, handle, req, volume, status, label) \
+ do { \
+ char exportid[256], gfid[256]; \
+ rpc_transport_t *trans = NULL; \
+ volume = nfs3_fh_to_xlator ((nfs3state), &handle); \
+ if (!volume) { \
+ gf_uuid_unparse (handle.exportid, exportid); \
+ gf_uuid_unparse (handle.gfid, gfid); \
+ trans = rpcsvc_request_transport (req); \
+ gf_msg (GF_NLM, GF_LOG_ERROR, errno, \
+ NFS_MSG_FH_TO_VOL_FAIL, "Failed to map " \
+ "FH to vol: client=%s, exportid=%s, gfid=%s",\
+ trans->peerinfo.identifier, exportid, \
+ gfid); \
+ gf_msg (GF_NLM, GF_LOG_ERROR, errno, \
+ NFS_MSG_VOLUME_ERROR, \
+ "Stale nfs client %s must be trying to "\
+ "connect to a deleted volume, please " \
+ "unmount it.", trans->peerinfo.identifier);\
+ status = nlm4_stale_fh; \
+ goto label; \
+ } else { \
+ gf_msg_trace (GF_NLM, 0, "FH to Volume: %s" \
+ , volume->name); \
+ rpcsvc_request_set_private (req, volume); \
+ } \
+ } while (0); \
+
+#define nlm4_volume_started_check(nfs3state, vlm, rtval, erlbl) \
+ do { \
+ if ((!nfs_subvolume_started (nfs_state (nfs3state->nfsx), vlm))){\
+ gf_msg (GF_NLM, GF_LOG_ERROR, 0, NFS_MSG_VOL_DISABLE, \
+ "Volume is disabled: %s", \
+ vlm->name); \
+ rtval = RPCSVC_ACTOR_IGNORE; \
+ goto erlbl; \
+ } \
+ } while (0) \
+
+#define nlm4_check_fh_resolve_status(cst, nfstat, erlabl) \
+ do { \
+ xlator_t *xlatorp = NULL; \
+ char buf[256], gfid[256]; \
+ rpc_transport_t *trans = NULL; \
+ if ((cst)->resolve_ret < 0) { \
+ trans = rpcsvc_request_transport (cst->req); \
+ xlatorp = nfs3_fh_to_xlator (cst->nfs3state, \
+ &cst->resolvefh); \
+ gf_uuid_unparse (cst->resolvefh.gfid, gfid); \
+ snprintf (buf, sizeof (buf), "(%s) %s : %s", \
+ trans->peerinfo.identifier, \
+ xlatorp ? xlatorp->name : "ERR", \
+ gfid); \
+ gf_msg (GF_NLM, GF_LOG_ERROR, 0, \
+ NFS_MSG_RESOLVE_FH_FAIL, "Unable to resolve FH"\
+ ": %s", buf); \
+ nfstat = nlm4_errno_to_nlm4stat (cst->resolve_errno);\
+ goto erlabl; \
+ } \
+ } while (0) \
+
+
+void
+nlm4_prep_nlm4_testargs (nlm4_testargs *args, struct nfs3_fh *fh,
+ nlm4_lkowner_t *oh, char *cookiebytes)
+{
+ memset (args, 0, sizeof (*args));
+ args->alock.fh.nlm4_netobj_val = (void *)fh;
+ args->alock.oh.nlm4_netobj_val = (void *)oh;
+ args->cookie.nlm4_netobj_val = (void *)cookiebytes;
+}
+
+void
+nlm4_prep_nlm4_lockargs (nlm4_lockargs *args, struct nfs3_fh *fh,
+ nlm4_lkowner_t *oh, char *cookiebytes)
+{
+ memset (args, 0, sizeof (*args));
+ args->alock.fh.nlm4_netobj_val = (void *)fh;
+ args->alock.oh.nlm4_netobj_val = (void *)oh;
+ args->cookie.nlm4_netobj_val = (void *)cookiebytes;
+}
+
+void
+nlm4_prep_nlm4_cancargs (nlm4_cancargs *args, struct nfs3_fh *fh,
+ nlm4_lkowner_t *oh, char *cookiebytes)
+{
+ memset (args, 0, sizeof (*args));
+ args->alock.fh.nlm4_netobj_val = (void *)fh;
+ args->alock.oh.nlm4_netobj_val = (void *)oh;
+ args->cookie.nlm4_netobj_val = (void *)cookiebytes;
+}
+
+void
+nlm4_prep_nlm4_unlockargs (nlm4_unlockargs *args, struct nfs3_fh *fh,
+ nlm4_lkowner_t *oh, char *cookiebytes)
+{
+ memset (args, 0, sizeof (*args));
+ args->alock.fh.nlm4_netobj_val = (void *)fh;
+ args->alock.oh.nlm4_netobj_val = (void *)oh;
+ args->cookie.nlm4_netobj_val = (void *)cookiebytes;
+}
+
+void
+nlm4_prep_shareargs (nlm4_shareargs *args, struct nfs3_fh *fh,
+ nlm4_lkowner_t *oh, char *cookiebytes)
+{
+ memset (args, 0, sizeof (*args));
+ args->share.fh.nlm4_netobj_val = (void *)fh;
+ args->share.oh.nlm4_netobj_val = (void *)oh;
+ args->cookie.nlm4_netobj_val = (void *)cookiebytes;
+}
+
+void
+nlm4_prep_freeallargs (nlm4_freeallargs *args, nlm4_lkowner_t *oh)
+{
+ memset (args, 0, sizeof (*args));
+ args->name = (void *)oh;
+}
+
+void
+nlm_copy_lkowner (gf_lkowner_t *dst, nlm4_netobj *src)
+{
+ dst->len = src->nlm4_netobj_len;
+ memcpy (dst->data, src->nlm4_netobj_val, dst->len);
+}
+
+int
+nlm_is_oh_same_lkowner (gf_lkowner_t *a, nlm4_netobj *b)
+{
+ if (!a || !b) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, EINVAL, NFS_MSG_INVALID_ENTRY,
+ "invalid args");
+ return -1;
+ }
+
+ return (a->len == b->nlm4_netobj_len &&
+ !memcmp (a->data, b->nlm4_netobj_val, a->len));
+}
+
+nlm4_stats
+nlm4_errno_to_nlm4stat (int errnum)
+{
+ nlm4_stats stat = nlm4_denied;
+
+ switch (errnum) {
+ case 0:
+ stat = nlm4_granted;
+ break;
+ case EROFS:
+ stat = nlm4_rofs;
+ break;
+ case ESTALE:
+ stat = nlm4_stale_fh;
+ break;
+ case ENOLCK:
+ stat = nlm4_failed;
+ break;
+ default:
+ stat = nlm4_denied;
+ break;
+ }
+
+ return stat;
+}
+
+nfs3_call_state_t *
+nlm4_call_state_init (struct nfs3_state *s, rpcsvc_request_t *req)
+{
+ nfs3_call_state_t *cs = NULL;
+
+ if ((!s) || (!req))
+ return NULL;
+
+ cs = (nfs3_call_state_t *) mem_get (s->localpool);
+ if (!cs)
+ return NULL;
+
+ memset (cs, 0, sizeof (*cs));
+ INIT_LIST_HEAD (&cs->entries.list);
+ INIT_LIST_HEAD (&cs->openwait_q);
+ cs->operrno = EINVAL;
+ cs->req = req;
+ cs->nfsx = s->nfsx;
+ cs->nfs3state = s;
+ cs->monitor = 1;
+
+ return cs;
+}
+
+int
+nlm_monitor (char *caller_name)
+{
+ nlm_client_t *nlmclnt = NULL;
+ int monitor = -1;
+
+ LOCK (&nlm_client_list_lk);
+ list_for_each_entry (nlmclnt, &nlm_client_list, nlm_clients) {
+ if (!strcmp(caller_name, nlmclnt->caller_name)) {
+ monitor = nlmclnt->nsm_monitor;
+ nlmclnt->nsm_monitor = 1;
+ break;
+ }
+ }
+ UNLOCK (&nlm_client_list_lk);
+
+ if (monitor == -1)
+ gf_msg (GF_NLM, GF_LOG_ERROR, 0, NFS_MSG_CALLER_NOT_FOUND,
+ "%s was not found in the nlmclnt list", caller_name);
+
+ return monitor;
+}
+
+rpc_clnt_t *
+nlm_get_rpc_clnt (char *caller_name)
+{
+ nlm_client_t *nlmclnt = NULL;
+ int nlmclnt_found = 0;
+ rpc_clnt_t *rpc_clnt = NULL;
+
+ LOCK (&nlm_client_list_lk);
+ list_for_each_entry (nlmclnt, &nlm_client_list, nlm_clients) {
+ if (!strcmp(caller_name, nlmclnt->caller_name)) {
+ nlmclnt_found = 1;
+ break;
+ }
+ }
+ if (!nlmclnt_found)
+ goto ret;
+ if (nlmclnt->rpc_clnt)
+ rpc_clnt = rpc_clnt_ref (nlmclnt->rpc_clnt);
+ret:
+ UNLOCK (&nlm_client_list_lk);
+ return rpc_clnt;
+}
+
+int
+nlm_set_rpc_clnt (rpc_clnt_t *rpc_clnt, char *caller_name)
+{
+ nlm_client_t *nlmclnt = NULL;
+ int nlmclnt_found = 0;
+ int ret = -1;
+
+ LOCK (&nlm_client_list_lk);
+ list_for_each_entry (nlmclnt, &nlm_client_list, nlm_clients) {
+ if (!strcmp(caller_name, nlmclnt->caller_name)) {
+ nlmclnt_found = 1;
+ break;
+ }
+ }
+
+ if (!nlmclnt_found) {
+ nlmclnt = GF_CALLOC (1, sizeof(*nlmclnt),
+ gf_nfs_mt_nlm4_nlmclnt);
+ if (nlmclnt == NULL)
+ goto ret;
+
+ INIT_LIST_HEAD(&nlmclnt->fdes);
+ INIT_LIST_HEAD(&nlmclnt->nlm_clients);
+ INIT_LIST_HEAD(&nlmclnt->shares);
+
+ list_add (&nlmclnt->nlm_clients, &nlm_client_list);
+ nlmclnt->caller_name = gf_strdup (caller_name);
+ }
+
+ if (nlmclnt->rpc_clnt == NULL) {
+ nlmclnt->rpc_clnt = rpc_clnt_ref (rpc_clnt);
+ }
+ ret = 0;
+ret:
+ UNLOCK (&nlm_client_list_lk);
+ return ret;
+}
+
+int
+nlm_unset_rpc_clnt (rpc_clnt_t *rpc)
+{
+ nlm_client_t *nlmclnt = NULL;
+ rpc_clnt_t *rpc_clnt = NULL;
+
+ LOCK (&nlm_client_list_lk);
+ list_for_each_entry (nlmclnt, &nlm_client_list, nlm_clients) {
+ if (rpc == nlmclnt->rpc_clnt) {
+ rpc_clnt = nlmclnt->rpc_clnt;
+ nlmclnt->rpc_clnt = NULL;
+ break;
+ }
+ }
+ UNLOCK (&nlm_client_list_lk);
+ if (rpc_clnt == NULL) {
+ return -1;
+ }
+ if (rpc_clnt) {
+ /* cleanup the saved-frames before last unref */
+ rpc_clnt_connection_cleanup (&rpc_clnt->conn);
+
+ rpc_clnt_unref (rpc_clnt);
+ }
+ return 0;
+}
+
+int
+nlm_add_nlmclnt (char *caller_name)
+{
+ nlm_client_t *nlmclnt = NULL;
+ int nlmclnt_found = 0;
+ int ret = -1;
+
+ LOCK (&nlm_client_list_lk);
+ list_for_each_entry (nlmclnt, &nlm_client_list, nlm_clients) {
+ if (!strcmp(caller_name, nlmclnt->caller_name)) {
+ nlmclnt_found = 1;
+ break;
+ }
+ }
+ if (!nlmclnt_found) {
+ nlmclnt = GF_CALLOC (1, sizeof(*nlmclnt),
+ gf_nfs_mt_nlm4_nlmclnt);
+ if (nlmclnt == NULL) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, ENOMEM,
+ NFS_MSG_NO_MEMORY, "malloc error");
+ goto ret;
+ }
+
+ INIT_LIST_HEAD(&nlmclnt->fdes);
+ INIT_LIST_HEAD(&nlmclnt->nlm_clients);
+ INIT_LIST_HEAD(&nlmclnt->shares);
+
+ list_add (&nlmclnt->nlm_clients, &nlm_client_list);
+ nlmclnt->caller_name = gf_strdup (caller_name);
+ }
+ ret = 0;
+ret:
+ UNLOCK (&nlm_client_list_lk);
+ return ret;
+}
+
+int
+nlm4svc_submit_reply (rpcsvc_request_t *req, void *arg, nlm4_serializer sfunc)
+{
+ struct iovec outmsg = {0, };
+ struct iobuf *iob = NULL;
+ struct nfs3_state *nfs3 = NULL;
+ int ret = -1;
+ ssize_t msglen = 0;
+ struct iobref *iobref = NULL;
+
+ if (!req)
+ return -1;
+
+ nfs3 = (struct nfs3_state *)rpcsvc_request_program_private (req);
+ if (!nfs3) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, EINVAL,
+ NFS_MSG_MNT_STATE_NOT_FOUND, "mount state not found");
+ goto ret;
+ }
+
+ /* First, get the io buffer into which the reply in arg will
+ * be serialized.
+ */
+ iob = iobuf_get (nfs3->iobpool);
+ if (!iob) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Failed to get iobuf");
+ goto ret;
+ }
+
+ iobuf_to_iovec (iob, &outmsg);
+ /* Use the given serializer to translate the give C structure in arg
+ * to XDR format which will be written into the buffer in outmsg.
+ */
+ msglen = sfunc (outmsg, arg);
+ if (msglen < 0) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, errno, NFS_MSG_ENCODE_MSG_FAIL,
+ "Failed to encode message");
+ goto ret;
+ }
+ outmsg.iov_len = msglen;
+
+ iobref = iobref_new ();
+ if (iobref == NULL) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Failed to get iobref");
+ goto ret;
+ }
+
+ ret = iobref_add (iobref, iob);
+ if (ret) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Failed to add iob to iobref");
+ goto ret;
+ }
+
+ /* Then, submit the message for transmission. */
+ ret = rpcsvc_submit_message (req, &outmsg, 1, NULL, 0, iobref);
+ if (ret == -1) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, errno, NFS_MSG_REP_SUBMIT_FAIL,
+ "Reply submission failed");
+ goto ret;
+ }
+
+ ret = 0;
+ret:
+ if (iob)
+ iobuf_unref (iob);
+ if (iobref)
+ iobref_unref (iobref);
+
+ return ret;
+}
+
+typedef int (*nlm4_resume_fn_t) (void *cs);
+
+int32_t
+nlm4_file_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ nfs3_call_state_t *cs = frame->local;
+
+ if (op_ret == 0)
+ fd_bind (cs->fd);
+ cs->resolve_ret = op_ret;
+ cs->resume_fn (cs);
+ frame->local = NULL;
+ STACK_DESTROY (frame->root);
+ return 0;
+}
+
+void *
+nsm_monitor(void *arg)
+{
+ CLIENT *clnt = NULL;
+ enum clnt_stat ret;
+ struct mon nsm_mon;
+ struct sm_stat_res res;
+ struct timeval tout = { 5, 0 };
+ char *host = NULL;
+
+ host = arg;
+ nsm_mon.mon_id.mon_name = gf_strdup(host);
+ nsm_mon.mon_id.my_id.my_name = gf_strdup("localhost");
+ nsm_mon.mon_id.my_id.my_prog = NLMCBK_PROGRAM;
+ nsm_mon.mon_id.my_id.my_vers = NLMCBK_V1;
+ nsm_mon.mon_id.my_id.my_proc = NLMCBK_SM_NOTIFY;
+ /* nothing to put in the private data */
+#define SM_PROG 100024
+#define SM_VERS 1
+#define SM_MON 2
+
+ /* create a connection to nsm on the localhost */
+ clnt = clnt_create("localhost", SM_PROG, SM_VERS, "tcp");
+ if(!clnt)
+ {
+ gf_msg (GF_NLM, GF_LOG_ERROR, 0, NFS_MSG_CLNT_CREATE_ERROR,
+ "%s", clnt_spcreateerror ("Clnt_create()"));
+ goto out;
+ }
+
+ ret = clnt_call(clnt, SM_MON,
+ (xdrproc_t) xdr_mon, (caddr_t) & nsm_mon,
+ (xdrproc_t) xdr_sm_stat_res, (caddr_t) & res, tout);
+ if(ret != RPC_SUCCESS)
+ {
+ gf_msg (GF_NLM, GF_LOG_ERROR, 0, NFS_MSG_CLNT_CALL_ERROR,
+ "clnt_call(): %s", clnt_sperrno(ret));
+ goto out;
+ }
+ if(res.res_stat != STAT_SUCC)
+ {
+ gf_msg (GF_NLM, GF_LOG_ERROR, 0, NFS_MSG_CLNT_CALL_ERROR,
+ "clnt_call(): %s", clnt_sperrno(ret));
+ goto out;
+ }
+
+out:
+ GF_FREE(nsm_mon.mon_id.mon_name);
+ GF_FREE(nsm_mon.mon_id.my_id.my_name);
+ if (clnt != NULL)
+ clnt_destroy(clnt);
+ return NULL;
+}
+
+nlm_client_t *
+__nlm_get_uniq (char *caller_name)
+{
+ nlm_client_t *nlmclnt = NULL;
+
+ if (!caller_name)
+ return NULL;
+
+ list_for_each_entry (nlmclnt, &nlm_client_list, nlm_clients) {
+ if (!strcmp(caller_name, nlmclnt->caller_name))
+ return nlmclnt;
+ }
+
+ return NULL;
+}
+
+nlm_client_t *
+nlm_get_uniq (char *caller_name)
+{
+ nlm_client_t *nlmclnt = NULL;
+
+ LOCK (&nlm_client_list_lk);
+ nlmclnt = __nlm_get_uniq (caller_name);
+ UNLOCK (&nlm_client_list_lk);
+
+ return nlmclnt;
+}
+
+
+int
+nlm4_file_open_and_resume(nfs3_call_state_t *cs, nlm4_resume_fn_t resume)
+{
+ fd_t *fd = NULL;
+ int ret = -1;
+ int flags = 0;
+ nlm_client_t *nlmclnt = NULL;
+ call_frame_t *frame = NULL;
+
+ if (cs->args.nlm4_lockargs.exclusive == _gf_false)
+ flags = O_RDONLY;
+ else
+ flags = O_WRONLY;
+
+ nlmclnt = nlm_get_uniq (cs->args.nlm4_lockargs.alock.caller_name);
+ if (nlmclnt == NULL) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, ENOLCK,
+ NFS_MSG_NO_MEMORY, "nlm_get_uniq() "
+ "returned NULL");
+ ret = -ENOLCK;
+ goto err;
+ }
+ cs->resume_fn = resume;
+ fd = fd_lookup_uint64 (cs->resolvedloc.inode, (uint64_t)nlmclnt);
+ if (fd) {
+ cs->fd = fd;
+ cs->resolve_ret = 0;
+ cs->resume_fn(cs);
+ ret = 0;
+ goto err;
+ }
+
+ fd = fd_create_uint64 (cs->resolvedloc.inode, (uint64_t)nlmclnt);
+ if (fd == NULL) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, ENOLCK, NFS_MSG_NO_MEMORY,
+ "fd_create_uint64() returned NULL");
+ ret = -ENOLCK;
+ goto err;
+ }
+
+ cs->fd = fd;
+
+ frame = create_frame (cs->nfsx, cs->nfsx->ctx->pool);
+ if (!frame) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "unable to create frame");
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ frame->root->pid = NFS_PID;
+ frame->root->uid = rpcsvc_request_uid (cs->req);
+ frame->root->gid = rpcsvc_request_gid (cs->req);
+ frame->local = cs;
+ nfs_fix_groups (cs->nfsx, frame->root);
+
+ STACK_WIND_COOKIE (frame, nlm4_file_open_cbk, cs->vol, cs->vol,
+ cs->vol->fops->open, &cs->resolvedloc, flags,
+ cs->fd, NULL);
+ ret = 0;
+err:
+ return ret;
+}
+
+int
+nlm4_generic_reply (rpcsvc_request_t *req, nlm4_netobj cookie, nlm4_stats stat)
+{
+ nlm4_res res;
+
+ memset (&res, 0, sizeof (res));
+ res.cookie = cookie;
+ res.stat.stat = stat;
+
+ nlm4svc_submit_reply (req, (void *)&res,
+ (nlm4_serializer)xdr_serialize_nlm4_res);
+ return 0;
+}
+
+int
+nlm4svc_null (rpcsvc_request_t *req)
+{
+ struct iovec dummyvec = {0, };
+
+ if (!req) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, EINVAL, NFS_MSG_INVALID_ENTRY,
+ "Got NULL request!");
+ return 0;
+ }
+ rpcsvc_submit_generic (req, &dummyvec, 1, NULL, 0, NULL);
+ return 0;
+}
+
+int
+nlm4_gf_flock_to_holder (nlm4_holder *holder, struct gf_flock *flock)
+{
+ switch (flock->l_type) {
+ case GF_LK_F_WRLCK:
+ holder->exclusive = 1;
+ break;
+ }
+
+ holder->svid = flock->l_pid;
+ holder->l_offset = flock->l_start;
+ holder->l_len = flock->l_len;
+ return 0;
+}
+
+int
+nlm4_lock_to_gf_flock (struct gf_flock *flock, nlm4_lock *lock, int excl)
+{
+ flock->l_pid = lock->svid;
+ flock->l_start = lock->l_offset;
+ flock->l_len = lock->l_len;
+ if (excl)
+ flock->l_type = F_WRLCK;
+ else
+ flock->l_type = F_RDLCK;
+ flock->l_whence = SEEK_SET;
+ nlm_copy_lkowner (&flock->l_owner, &lock->oh);
+ return 0;
+}
+
+rpc_clnt_procedure_t nlm4_clnt_actors[NLM4_PROC_COUNT] = {
+ [NLM4_NULL] = {"NULL", NULL},
+ [NLM4_GRANTED] = {"GRANTED", NULL},
+};
+
+char *nlm4_clnt_names[NLM4_PROC_COUNT] = {
+ [NLM4_NULL] = "NULL",
+ [NLM4_GRANTED] = "GRANTED",
+};
+
+rpc_clnt_prog_t nlm4clntprog = {
+ .progname = "NLMv4",
+ .prognum = NLM_PROGRAM,
+ .progver = NLM_V4,
+ .numproc = NLM4_PROC_COUNT,
+ .proctable = nlm4_clnt_actors,
+ .procnames = nlm4_clnt_names,
+};
+
+int
+nlm4_test_reply (nfs3_call_state_t *cs, nlm4_stats stat, struct gf_flock *flock)
+{
+ nlm4_testres res;
+
+ memset (&res, 0, sizeof (res));
+ res.cookie = cs->args.nlm4_testargs.cookie;
+ res.stat.stat = stat;
+ if (stat == nlm4_denied)
+ nlm4_gf_flock_to_holder (&res.stat.nlm4_testrply_u.holder,
+ flock);
+
+ nlm4svc_submit_reply (cs->req, (void *)&res,
+ (nlm4_serializer)xdr_serialize_nlm4_testres);
+ return 0;
+}
+
+int
+nlm4svc_test_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *flock,
+ dict_t *xdata)
+{
+ nlm4_stats stat = nlm4_denied;
+ nfs3_call_state_t *cs = NULL;
+
+ cs = frame->local;
+ if (op_ret == -1) {
+ stat = nlm4_errno_to_nlm4stat (op_errno);
+ goto err;
+ } else if (flock->l_type == F_UNLCK)
+ stat = nlm4_granted;
+
+err:
+ nlm4_test_reply (cs, stat, flock);
+ nfs3_call_state_wipe (cs);
+ return 0;
+}
+
+int
+nlm4_test_fd_resume (void *carg)
+{
+ int ret = -EFAULT;
+ nfs_user_t nfu = {0, };
+ nfs3_call_state_t *cs = NULL;
+ struct gf_flock flock = {0, };
+
+ if (!carg)
+ return ret;
+
+ cs = (nfs3_call_state_t *)carg;
+ nfs_request_user_init (&nfu, cs->req);
+ nlm4_lock_to_gf_flock (&flock, &cs->args.nlm4_testargs.alock,
+ cs->args.nlm4_testargs.exclusive);
+ nlm_copy_lkowner (&nfu.lk_owner, &cs->args.nlm4_testargs.alock.oh);
+ ret = nfs_lk (cs->nfsx, cs->vol, &nfu, cs->fd, F_GETLK, &flock,
+ nlm4svc_test_cbk, cs);
+
+ return ret;
+}
+
+
+int
+nlm4_test_resume (void *carg)
+{
+ nlm4_stats stat = nlm4_failed;
+ int ret = -1;
+ nfs3_call_state_t *cs = NULL;
+ fd_t *fd = NULL;
+
+ if (!carg)
+ return ret;
+
+ cs = (nfs3_call_state_t *)carg;
+ nlm4_check_fh_resolve_status (cs, stat, nlm4err);
+ fd = fd_anonymous (cs->resolvedloc.inode);
+ if (!fd)
+ goto nlm4err;
+ cs->fd = fd;
+ ret = nlm4_test_fd_resume (cs);
+
+nlm4err:
+ if (ret < 0) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, -ret, NFS_MSG_OPEN_FAIL,
+ "unable to open_and_resume");
+ stat = nlm4_errno_to_nlm4stat (-ret);
+ nlm4_test_reply (cs, stat, NULL);
+ nfs3_call_state_wipe (cs);
+ }
+
+ return ret;
+}
+
+int
+nlm4svc_test (rpcsvc_request_t *req)
+{
+ xlator_t *vol = NULL;
+ nlm4_stats stat = nlm4_failed;
+ struct nfs_state *nfs = NULL;
+ nfs3_state_t *nfs3 = NULL;
+ nfs3_call_state_t *cs = NULL;
+ int ret = RPCSVC_ACTOR_ERROR;
+ struct nfs3_fh fh = {{0}, };
+
+ if (!req)
+ return ret;
+
+ nlm4_validate_nfs3_state (req, nfs3, stat, rpcerr, ret);
+ nfs = nfs_state (nfs3->nfsx);
+ nlm4_handle_call_state_init (nfs->nfs3state, cs, req,
+ stat, rpcerr);
+
+ nlm4_prep_nlm4_testargs (&cs->args.nlm4_testargs, &fh, &cs->lkowner,
+ cs->cookiebytes);
+ if (xdr_to_nlm4_testargs(req->msg[0], &cs->args.nlm4_testargs) <= 0) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, errno, NFS_MSG_ARGS_DECODE_ERROR,
+ "Error decoding args");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto rpcerr;
+ }
+
+ nlm4_validate_gluster_fh (&fh, stat, nlm4err);
+ nlm4_map_fh_to_volume (cs->nfs3state, fh, req, vol, stat, nlm4err);
+
+ if (nlm_grace_period) {
+ gf_msg (GF_NLM, GF_LOG_WARNING, 0, NFS_MSG_NLM_GRACE_PERIOD,
+ "NLM in grace period");
+ stat = nlm4_denied_grace_period;
+ nlm4_test_reply (cs, stat, NULL);
+ nfs3_call_state_wipe (cs);
+ return 0;
+ }
+
+ cs->vol = vol;
+ nlm4_volume_started_check (nfs3, vol, ret, rpcerr);
+
+ ret = nfs3_fh_resolve_and_resume (cs, &fh,
+ NULL, nlm4_test_resume);
+
+nlm4err:
+ if (ret < 0) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, -ret, NFS_MSG_RESOLVE_ERROR,
+ "unable to resolve and resume");
+ nlm4_test_reply (cs, stat, NULL);
+ nfs3_call_state_wipe (cs);
+ return 0;
+ }
+
+rpcerr:
+ if (ret < 0)
+ nfs3_call_state_wipe (cs);
+
+ return ret;
+}
+
+int
+nlm4svc_send_granted_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ STACK_DESTROY (((call_frame_t*)myframe)->root);
+ return 0;
+}
+
+void
+nlm4svc_send_granted (nfs3_call_state_t *cs);
+
+int
+nlm_rpcclnt_notify (struct rpc_clnt *rpc_clnt, void *mydata,
+ rpc_clnt_event_t fn, void *data)
+{
+ int ret = 0;
+ char *caller_name = NULL;
+ nfs3_call_state_t *cs = NULL;
+
+ cs = mydata;
+ caller_name = cs->args.nlm4_lockargs.alock.caller_name;
+
+ switch (fn) {
+ case RPC_CLNT_CONNECT:
+ ret = nlm_set_rpc_clnt (rpc_clnt, caller_name);
+ if (ret == -1) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, 0,
+ NFS_MSG_RPC_CLNT_ERROR, "Failed to set "
+ "rpc clnt");
+ goto err;
+ }
+ rpc_clnt_unref (rpc_clnt);
+ nlm4svc_send_granted (cs);
+
+ break;
+
+ case RPC_CLNT_MSG:
+ break;
+
+ case RPC_CLNT_DISCONNECT:
+ nlm_unset_rpc_clnt (rpc_clnt);
+ break;
+ default:
+ break;
+ }
+
+ err:
+ return 0;
+}
+
+void *
+nlm4_establish_callback (void *csarg)
+{
+ int ret = -1;
+ nfs3_call_state_t *cs = NULL;
+ union gf_sock_union sock_union;
+ dict_t *options = NULL;
+ char peerip[INET6_ADDRSTRLEN+1] = {0};
+ char *portstr = NULL;
+ char myip[INET6_ADDRSTRLEN+1] = {0};
+ rpc_clnt_t *rpc_clnt = NULL;
+ int port = -1;
+
+
+ cs = (nfs3_call_state_t *) csarg;
+ glusterfs_this_set (cs->nfsx);
+
+ rpc_transport_get_peeraddr (cs->trans, NULL, 0, &sock_union.storage,
+ sizeof (sock_union.storage));
+
+ switch (sock_union.sa.sa_family) {
+ case AF_INET6:
+ /* can not come here as NLM listens on IPv4 */
+ gf_msg (GF_NLM, GF_LOG_ERROR, EAFNOSUPPORT,
+ NFS_MSG_UNSUPPORTED_VERSION,
+ "NLM is not supported on IPv6 in this release");
+ goto err;
+/*
+ inet_ntop (AF_INET6,
+ &((struct sockaddr_in6 *)sockaddr)->sin6_addr,
+ peerip, INET6_ADDRSTRLEN+1);
+ break;
+*/
+ case AF_INET:
+ inet_ntop (AF_INET, &sock_union.sin.sin_addr, peerip,
+ INET6_ADDRSTRLEN+1);
+ inet_ntop (AF_INET, &(((struct sockaddr_in *)&cs->trans->myinfo.sockaddr)->sin_addr),
+ myip, INET6_ADDRSTRLEN + 1);
+
+ break;
+ default:
+ break;
+ /* FIXME: handle the error */
+ }
+
+ /* looks like libc rpc supports only ipv4 */
+ port = pmap_getport (&sock_union.sin, NLM_PROGRAM,
+ NLM_V4, IPPROTO_TCP);
+
+ if (port == 0) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, 0, NFS_MSG_GET_PORT_ERROR,
+ "Unable to get NLM port of the client."
+ " Is the firewall running on client?"
+ " OR Are RPC services running (rpcinfo -p)?");
+ goto err;
+ }
+
+ options = dict_new();
+ ret = dict_set_str (options, "transport-type", "socket");
+ if (ret == -1) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, errno, NFS_MSG_DICT_SET_FAILED,
+ "dict_set_str error");
+ goto err;
+ }
+
+ ret = dict_set_dynstr (options, "remote-host", gf_strdup (peerip));
+ if (ret == -1) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, errno, NFS_MSG_DICT_SET_FAILED,
+ "dict_set_str error");
+ goto err;
+ }
+
+ ret = gf_asprintf (&portstr, "%d", port);
+ if (ret == -1)
+ goto err;
+
+ ret = dict_set_dynstr (options, "remote-port",
+ portstr);
+ if (ret == -1) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, errno, NFS_MSG_DICT_SET_FAILED,
+ "dict_set_dynstr error");
+ goto err;
+ }
+
+ /* needed in case virtual IP is used */
+ ret = dict_set_dynstr (options, "transport.socket.source-addr",
+ gf_strdup (myip));
+ if (ret == -1)
+ goto err;
+
+ ret = dict_set_str (options, "auth-null", "on");
+ if (ret == -1) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, errno, NFS_MSG_DICT_SET_FAILED,
+ "dict_set_dynstr error");
+ goto err;
+ }
+
+ /* TODO: is 32 frames in transit enough ? */
+ rpc_clnt = rpc_clnt_new (options, cs->nfsx, "NLM-client", 32);
+ if (rpc_clnt == NULL) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, EINVAL, NFS_MSG_INVALID_ENTRY,
+ "rpc_clnt NULL");
+ goto err;
+ }
+
+ ret = rpc_clnt_register_notify (rpc_clnt, nlm_rpcclnt_notify, cs);
+ if (ret == -1) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, errno, NFS_MSG_RPC_CLNT_ERROR,
+ "rpc_clnt_register_connect error");
+ goto err;
+ }
+
+ /* After this connect succeeds, granted msg is sent in notify */
+ ret = rpc_transport_connect (rpc_clnt->conn.trans, port);
+
+ if (ret == -1 && EINPROGRESS == errno)
+ ret = 0;
+
+err:
+ if (ret == -1 && rpc_clnt) {
+ rpc_clnt_unref (rpc_clnt);
+ }
+
+ return rpc_clnt;
+}
+
+void
+nlm4svc_send_granted (nfs3_call_state_t *cs)
+{
+ int ret = -1;
+ rpc_clnt_t *rpc_clnt = NULL;
+ struct iovec outmsg = {0, };
+ nlm4_testargs testargs;
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ char peerip[INET6_ADDRSTRLEN+1];
+ union gf_sock_union sock_union;
+
+ rpc_clnt = nlm_get_rpc_clnt (cs->args.nlm4_lockargs.alock.caller_name);
+ if (rpc_clnt == NULL) {
+ nlm4_establish_callback ((void*)cs);
+ return;
+ }
+
+ rpc_transport_get_peeraddr (cs->trans, NULL, 0, &sock_union.storage,
+ sizeof (sock_union.storage));
+
+ switch (sock_union.sa.sa_family) {
+ case AF_INET6:
+ inet_ntop (AF_INET6, &sock_union.sin6.sin6_addr, peerip,
+ INET6_ADDRSTRLEN+1);
+ break;
+ case AF_INET:
+ inet_ntop (AF_INET, &sock_union.sin.sin_addr, peerip,
+ INET6_ADDRSTRLEN+1);
+ break;
+ default:
+ break;
+ }
+
+ testargs.cookie = cs->args.nlm4_lockargs.cookie;
+ testargs.exclusive = cs->args.nlm4_lockargs.exclusive;
+ testargs.alock = cs->args.nlm4_lockargs.alock;
+
+ iobuf = iobuf_get (cs->nfs3state->iobpool);
+ if (!iobuf) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Failed to get iobuf");
+ goto ret;
+ }
+
+ iobuf_to_iovec (iobuf, &outmsg);
+ /* Use the given serializer to translate the give C structure in arg
+ * to XDR format which will be written into the buffer in outmsg.
+ */
+ outmsg.iov_len = xdr_serialize_nlm4_testargs (outmsg, &testargs);
+
+ iobref = iobref_new ();
+ if (iobref == NULL) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Failed to get iobref");
+ goto ret;
+ }
+
+ ret = iobref_add (iobref, iobuf);
+ if (ret) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY,
+ "Failed to add iob to iobref");
+ goto ret;
+ }
+
+ ret = rpc_clnt_submit (rpc_clnt, &nlm4clntprog, NLM4_GRANTED,
+ nlm4svc_send_granted_cbk, &outmsg, 1,
+ NULL, 0, iobref, cs->frame, NULL, 0,
+ NULL, 0, NULL);
+
+ if (ret < 0) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, -ret, NFS_MSG_RPC_CLNT_ERROR,
+ "rpc_clnt_submit error");
+ goto ret;
+ }
+ret:
+ if (iobref)
+ iobref_unref (iobref);
+ if (iobuf)
+ iobuf_unref (iobuf);
+
+ rpc_clnt_unref (rpc_clnt);
+ nfs3_call_state_wipe (cs);
+ return;
+}
+
+int
+nlm_cleanup_fds (char *caller_name)
+{
+ int nlmclnt_found = 0;
+ nlm_fde_t *fde = NULL, *tmp = NULL;
+ nlm_client_t *nlmclnt = NULL;
+
+ LOCK (&nlm_client_list_lk);
+ list_for_each_entry (nlmclnt,
+ &nlm_client_list, nlm_clients) {
+ if (!strcmp(caller_name, nlmclnt->caller_name)) {
+ nlmclnt_found = 1;
+ break;
+ }
+ }
+
+ if (!nlmclnt_found)
+ goto ret;
+
+ if (list_empty (&nlmclnt->fdes))
+ goto ret;
+
+ list_for_each_entry_safe (fde, tmp, &nlmclnt->fdes, fde_list) {
+ fd_unref (fde->fd);
+ list_del (&fde->fde_list);
+ GF_FREE (fde);
+ }
+
+ret:
+ UNLOCK (&nlm_client_list_lk);
+ return 0;
+}
+
+void
+nlm_search_and_delete (fd_t *fd, char *caller_name)
+{
+ nlm_fde_t *fde = NULL;
+ nlm_client_t *nlmclnt = NULL;
+ int nlmclnt_found = 0;
+ int fde_found = 0;
+ int transit_cnt = 0;
+
+ LOCK (&nlm_client_list_lk);
+ list_for_each_entry (nlmclnt,
+ &nlm_client_list, nlm_clients) {
+ if (!strcmp(caller_name, nlmclnt->caller_name)) {
+ nlmclnt_found = 1;
+ break;
+ }
+ }
+
+ if (!nlmclnt_found)
+ goto ret;
+
+ list_for_each_entry (fde, &nlmclnt->fdes, fde_list) {
+ if (fde->fd == fd) {
+ fde_found = 1;
+ break;
+ }
+ }
+
+ if (!fde_found)
+ goto ret;
+ transit_cnt = fde->transit_cnt;
+ if (transit_cnt)
+ goto ret;
+ list_del (&fde->fde_list);
+
+ret:
+ UNLOCK (&nlm_client_list_lk);
+
+ if (fde_found && !transit_cnt) {
+ fd_unref (fde->fd);
+ GF_FREE (fde);
+ }
+ return;
+}
+
+int
+nlm_dec_transit_count (fd_t *fd, char *caller_name)
+{
+ nlm_fde_t *fde = NULL;
+ nlm_client_t *nlmclnt = NULL;
+ int nlmclnt_found = 0;
+ int fde_found = 0;
+ int transit_cnt = -1;
+
+ LOCK (&nlm_client_list_lk);
+ list_for_each_entry (nlmclnt,
+ &nlm_client_list, nlm_clients) {
+ if (!strcmp(caller_name, nlmclnt->caller_name)) {
+ nlmclnt_found = 1;
+ break;
+ }
+ }
+
+ if (!nlmclnt_found) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, 0, NFS_MSG_NLMCLNT_NOT_FOUND,
+ "nlmclnt not found");
+ nlmclnt = NULL;
+ goto ret;
+ }
+
+ list_for_each_entry (fde, &nlmclnt->fdes, fde_list) {
+ if (fde->fd == fd) {
+ fde_found = 1;
+ break;
+ }
+ }
+
+ if (fde_found) {
+ transit_cnt = --fde->transit_cnt;
+ goto ret;
+ }
+ret:
+
+ UNLOCK (&nlm_client_list_lk);
+ return transit_cnt;
+}
+
+
+nlm_client_t *
+nlm_search_and_add (fd_t *fd, char *caller_name)
+{
+ nlm_fde_t *fde = NULL;
+ nlm_client_t *nlmclnt = NULL;
+ int nlmclnt_found = 0;
+ int fde_found = 0;
+
+ LOCK (&nlm_client_list_lk);
+ list_for_each_entry (nlmclnt,
+ &nlm_client_list, nlm_clients) {
+ if (!strcmp(caller_name, nlmclnt->caller_name)) {
+ nlmclnt_found = 1;
+ break;
+ }
+ }
+
+ if (!nlmclnt_found) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, 0, NFS_MSG_NLMCLNT_NOT_FOUND,
+ "nlmclnt not found");
+ nlmclnt = NULL;
+ goto ret;
+ }
+
+ list_for_each_entry (fde, &nlmclnt->fdes, fde_list) {
+ if (fde->fd == fd) {
+ fde_found = 1;
+ break;
+ }
+ }
+
+ if (fde_found)
+ goto ret;
+
+ fde = GF_CALLOC (1, sizeof (*fde), gf_nfs_mt_nlm4_fde);
+
+ fde->fd = fd_ref (fd);
+ list_add (&fde->fde_list, &nlmclnt->fdes);
+ret:
+ if (nlmclnt_found && fde)
+ fde->transit_cnt++;
+ UNLOCK (&nlm_client_list_lk);
+ return nlmclnt;
+}
+
+int
+nlm4svc_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *flock,
+ dict_t *xdata)
+{
+ nlm4_stats stat = nlm4_denied;
+ int transit_cnt = -1;
+ char *caller_name = NULL;
+ nfs3_call_state_t *cs = NULL;
+ pthread_t thr;
+
+ cs = frame->local;
+ caller_name = cs->args.nlm4_lockargs.alock.caller_name;
+ transit_cnt = nlm_dec_transit_count (cs->fd, caller_name);
+
+ if (op_ret == -1) {
+ if (transit_cnt == 0)
+ nlm_search_and_delete (cs->fd, caller_name);
+ stat = nlm4_errno_to_nlm4stat (op_errno);
+ goto err;
+ } else {
+ stat = nlm4_granted;
+ if (cs->monitor && !nlm_monitor (caller_name)) {
+ /* FIXME: handle nsm_monitor failure */
+ pthread_create (&thr, NULL, nsm_monitor, (void*)caller_name);
+ }
+ }
+
+err:
+ if (cs->args.nlm4_lockargs.block) {
+ cs->frame = copy_frame (frame);
+ frame->local = NULL;
+ nlm4svc_send_granted (cs);
+ } else {
+ nlm4_generic_reply (cs->req, cs->args.nlm4_lockargs.cookie,
+ stat);
+ nfs3_call_state_wipe (cs);
+ }
+ return 0;
+}
+
+int
+nlm4_lock_fd_resume (void *carg)
+{
+ nlm4_stats stat = nlm4_denied;
+ int ret = -EFAULT;
+ nfs_user_t nfu = {0, };
+ nfs3_call_state_t *cs = NULL;
+ struct gf_flock flock = {0, };
+
+ if (!carg)
+ return ret;
+
+ cs = (nfs3_call_state_t *)carg;
+ nlm4_check_fh_resolve_status (cs, stat, nlm4err);
+ (void) nlm_search_and_add (cs->fd,
+ cs->args.nlm4_lockargs.alock.caller_name);
+ nfs_request_user_init (&nfu, cs->req);
+ nlm4_lock_to_gf_flock (&flock, &cs->args.nlm4_lockargs.alock,
+ cs->args.nlm4_lockargs.exclusive);
+ nlm_copy_lkowner (&nfu.lk_owner, &cs->args.nlm4_lockargs.alock.oh);
+ if (cs->args.nlm4_lockargs.block) {
+ nlm4_generic_reply (cs->req, cs->args.nlm4_lockargs.cookie,
+ nlm4_blocked);
+ ret = nfs_lk (cs->nfsx, cs->vol, &nfu, cs->fd, F_SETLKW,
+ &flock, nlm4svc_lock_cbk, cs);
+ /* FIXME: handle error from nfs_lk() specially by just
+ * cleaning up cs and unblock the client lock request.
+ */
+ ret = 0;
+ } else
+ ret = nfs_lk (cs->nfsx, cs->vol, &nfu, cs->fd, F_SETLK,
+ &flock, nlm4svc_lock_cbk, cs);
+
+nlm4err:
+ if (ret < 0) {
+ stat = nlm4_errno_to_nlm4stat (-ret);
+ gf_msg (GF_NLM, GF_LOG_ERROR, stat, NFS_MSG_LOCK_FAIL,
+ "unable to call lk()");
+ nlm4_generic_reply (cs->req, cs->args.nlm4_lockargs.cookie,
+ stat);
+ nfs3_call_state_wipe (cs);
+ }
+
+ return ret;
+}
+
+
+int
+nlm4_lock_resume (void *carg)
+{
+ nlm4_stats stat = nlm4_failed;
+ int ret = -1;
+ nfs3_call_state_t *cs = NULL;
+
+ if (!carg)
+ return ret;
+
+ cs = (nfs3_call_state_t *)carg;
+ nlm4_check_fh_resolve_status (cs, stat, nlm4err);
+ ret = nlm4_file_open_and_resume (cs, nlm4_lock_fd_resume);
+
+nlm4err:
+ if (ret < 0) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, -ret, NFS_MSG_OPEN_FAIL,
+ "unable to open and resume");
+ stat = nlm4_errno_to_nlm4stat (-ret);
+ nlm4_generic_reply (cs->req, cs->args.nlm4_lockargs.cookie,
+ stat);
+ nfs3_call_state_wipe (cs);
+ }
+
+ return ret;
+}
+
+int
+nlm4svc_lock_common (rpcsvc_request_t *req, int mon)
+{
+ int ret = RPCSVC_ACTOR_ERROR;
+ nlm4_stats stat = nlm4_failed;
+ struct nfs3_fh fh = {{0}, };
+ xlator_t *vol = NULL;
+ nfs3_state_t *nfs3 = NULL;
+ nfs3_call_state_t *cs = NULL;
+ struct nfs_state *nfs = NULL;
+
+ if (!req)
+ return ret;
+
+ nlm4_validate_nfs3_state (req, nfs3, stat, rpcerr, ret);
+ nfs = nfs_state (nfs3->nfsx);
+ nlm4_handle_call_state_init (nfs->nfs3state, cs, req,
+ stat, rpcerr);
+
+ nlm4_prep_nlm4_lockargs (&cs->args.nlm4_lockargs, &cs->lockfh,
+ &cs->lkowner, cs->cookiebytes);
+ if (xdr_to_nlm4_lockargs(req->msg[0], &cs->args.nlm4_lockargs) <= 0) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, errno, NFS_MSG_ARGS_DECODE_ERROR,
+ "Error decoding args");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto rpcerr;
+ }
+
+ fh = cs->lockfh;
+ cs->monitor = mon;
+ nlm4_validate_gluster_fh (&fh, stat, nlm4err);
+ nlm4_map_fh_to_volume (cs->nfs3state, fh, req, vol, stat, nlm4err);
+
+ if (nlm_grace_period && !cs->args.nlm4_lockargs.reclaim) {
+ gf_msg (GF_NLM, GF_LOG_WARNING, 0, NFS_MSG_NLM_GRACE_PERIOD,
+ "NLM in grace period");
+ stat = nlm4_denied_grace_period;
+ nlm4_generic_reply (req, cs->args.nlm4_unlockargs.cookie, stat);
+ nfs3_call_state_wipe (cs);
+ return 0;
+ }
+
+ cs->vol = vol;
+ cs->trans = rpcsvc_request_transport_ref(req);
+ nlm4_volume_started_check (nfs3, vol, ret, rpcerr);
+
+ ret = nlm_add_nlmclnt (cs->args.nlm4_lockargs.alock.caller_name);
+
+ ret = nfs3_fh_resolve_and_resume (cs, &fh,
+ NULL, nlm4_lock_resume);
+
+nlm4err:
+ if (ret < 0) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, -ret, NFS_MSG_RESOLVE_ERROR,
+ "unable to resolve and resume");
+ nlm4_generic_reply (cs->req, cs->args.nlm4_lockargs.cookie,
+ stat);
+ nfs3_call_state_wipe (cs);
+ return 0;
+ }
+
+rpcerr:
+ if (ret < 0) {
+ nfs3_call_state_wipe (cs);
+ }
+
+ return ret;
+}
+
+int
+nlm4svc_lock (rpcsvc_request_t *req)
+{
+ return nlm4svc_lock_common (req, 1);
+}
+
+int
+nlm4svc_nm_lock (rpcsvc_request_t *req)
+{
+ return nlm4svc_lock_common (req, 0);
+}
+
+int
+nlm4svc_cancel_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *flock,
+ dict_t *xdata)
+{
+ nlm4_stats stat = nlm4_denied;
+ nfs3_call_state_t *cs = NULL;
+
+ cs = frame->local;
+ if (op_ret == -1) {
+ stat = nlm4_errno_to_nlm4stat (op_errno);
+ goto err;
+ } else
+ stat = nlm4_granted;
+
+err:
+ nlm4_generic_reply (cs->req, cs->args.nlm4_cancargs.cookie,
+ stat);
+ nfs3_call_state_wipe (cs);
+ return 0;
+}
+
+int
+nlm4_cancel_fd_resume (void *carg)
+{
+ int ret = -EFAULT;
+ nfs_user_t nfu = {0, };
+ nfs3_call_state_t *cs = NULL;
+ struct gf_flock flock = {0, };
+
+ if (!carg)
+ return ret;
+
+ cs = (nfs3_call_state_t *)carg;
+ nfs_request_user_init (&nfu, cs->req);
+ nlm4_lock_to_gf_flock (&flock, &cs->args.nlm4_cancargs.alock,
+ cs->args.nlm4_cancargs.exclusive);
+ nlm_copy_lkowner (&nfu.lk_owner, &cs->args.nlm4_cancargs.alock.oh);
+ flock.l_type = F_UNLCK;
+ ret = nfs_lk (cs->nfsx, cs->vol, &nfu, cs->fd, F_SETLK,
+ &flock, nlm4svc_cancel_cbk, cs);
+
+ return ret;
+}
+
+int
+nlm4_cancel_resume (void *carg)
+{
+ nlm4_stats stat = nlm4_failed;
+ int ret = -EFAULT;
+ nfs3_call_state_t *cs = NULL;
+ nlm_client_t *nlmclnt = NULL;
+
+ if (!carg)
+ return ret;
+
+ cs = (nfs3_call_state_t *)carg;
+ nlm4_check_fh_resolve_status (cs, stat, nlm4err);
+
+ nlmclnt = nlm_get_uniq (cs->args.nlm4_cancargs.alock.caller_name);
+ if (nlmclnt == NULL) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, ENOLCK, NFS_MSG_NO_MEMORY,
+ "nlm_get_uniq() returned NULL");
+ goto nlm4err;
+ }
+ cs->fd = fd_lookup_uint64 (cs->resolvedloc.inode, (uint64_t)nlmclnt);
+ if (cs->fd == NULL) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, errno, NFS_MSG_FD_LOOKUP_NULL,
+ "fd_lookup_uint64 retrned NULL");
+ goto nlm4err;
+ }
+ ret = nlm4_cancel_fd_resume (cs);
+
+nlm4err:
+ if (ret < 0) {
+ gf_msg (GF_NLM, GF_LOG_WARNING, -ret, NFS_MSG_LOCK_FAIL,
+ "unable to unlock_fd_resume()");
+ stat = nlm4_errno_to_nlm4stat (-ret);
+ nlm4_generic_reply (cs->req, cs->args.nlm4_cancargs.cookie,
+ stat);
+
+ nfs3_call_state_wipe (cs);
+ }
+ /* clean up is taken care of */
+ return 0;
+}
+
+int
+nlm4svc_cancel (rpcsvc_request_t *req)
+{
+ xlator_t *vol = NULL;
+ nlm4_stats stat = nlm4_failed;
+ struct nfs_state *nfs = NULL;
+ nfs3_state_t *nfs3 = NULL;
+ nfs3_call_state_t *cs = NULL;
+ int ret = RPCSVC_ACTOR_ERROR;
+ struct nfs3_fh fh = {{0}, };
+
+ if (!req)
+ return ret;
+
+ nlm4_validate_nfs3_state (req, nfs3, stat, rpcerr, ret);
+ nfs = nfs_state (nfs3->nfsx);
+ nlm4_handle_call_state_init (nfs->nfs3state, cs, req,
+ stat, rpcerr);
+
+ nlm4_prep_nlm4_cancargs (&cs->args.nlm4_cancargs, &fh, &cs->lkowner,
+ cs->cookiebytes);
+ if (xdr_to_nlm4_cancelargs(req->msg[0], &cs->args.nlm4_cancargs) <= 0) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, errno, NFS_MSG_ARGS_DECODE_ERROR,
+ "Error decoding args");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto rpcerr;
+ }
+
+ nlm4_validate_gluster_fh (&fh, stat, nlm4err);
+ nlm4_map_fh_to_volume (cs->nfs3state, fh, req, vol, stat, nlm4err);
+
+ if (nlm_grace_period) {
+ gf_msg (GF_NLM, GF_LOG_WARNING, 0, NFS_MSG_NLM_GRACE_PERIOD,
+ "NLM in grace period");
+ stat = nlm4_denied_grace_period;
+ nlm4_generic_reply (req, cs->args.nlm4_unlockargs.cookie, stat);
+ nfs3_call_state_wipe (cs);
+ return 0;
+ }
+
+ cs->vol = vol;
+ nlm4_volume_started_check (nfs3, vol, ret, rpcerr);
+
+ ret = nfs3_fh_resolve_and_resume (cs, &fh,
+ NULL, nlm4_cancel_resume);
+
+nlm4err:
+ if (ret < 0) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, -ret, NFS_MSG_RESOLVE_ERROR,
+ "unable to resolve and resume");
+ nlm4_generic_reply (cs->req, cs->args.nlm4_cancargs.cookie,
+ stat);
+ nfs3_call_state_wipe (cs);
+ return 0;
+ }
+
+rpcerr:
+ if (ret < 0) {
+ nfs3_call_state_wipe (cs);
+ }
+ return ret;
+}
+
+int
+nlm4svc_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *flock,
+ dict_t *xdata)
+{
+ nlm4_stats stat = nlm4_denied;
+ nfs3_call_state_t *cs = NULL;
+
+ cs = frame->local;
+ if (op_ret == -1) {
+ stat = nlm4_errno_to_nlm4stat (op_errno);
+ goto err;
+ } else {
+ stat = nlm4_granted;
+ if (flock->l_type == F_UNLCK)
+ nlm_search_and_delete (cs->fd,
+ cs->args.nlm4_unlockargs.alock.caller_name);
+ }
+
+err:
+ nlm4_generic_reply (cs->req, cs->args.nlm4_unlockargs.cookie, stat);
+ nfs3_call_state_wipe (cs);
+ return 0;
+}
+
+int
+nlm4_unlock_fd_resume (void *carg)
+{
+ int ret = -EFAULT;
+ nfs_user_t nfu = {0, };
+ nfs3_call_state_t *cs = NULL;
+ struct gf_flock flock = {0, };
+
+ if (!carg)
+ return ret;
+ cs = (nfs3_call_state_t *)carg;
+ nfs_request_user_init (&nfu, cs->req);
+ nlm4_lock_to_gf_flock (&flock, &cs->args.nlm4_unlockargs.alock, 0);
+ nlm_copy_lkowner (&nfu.lk_owner, &cs->args.nlm4_unlockargs.alock.oh);
+ flock.l_type = F_UNLCK;
+ ret = nfs_lk (cs->nfsx, cs->vol, &nfu, cs->fd, F_SETLK,
+ &flock, nlm4svc_unlock_cbk, cs);
+
+ return ret;
+}
+
+int
+nlm4_unlock_resume (void *carg)
+{
+ nlm4_stats stat = nlm4_failed;
+ int ret = -1;
+ nfs3_call_state_t *cs = NULL;
+ nlm_client_t *nlmclnt = NULL;
+
+ if (!carg)
+ return ret;
+
+ cs = (nfs3_call_state_t *)carg;
+ nlm4_check_fh_resolve_status (cs, stat, nlm4err);
+
+ nlmclnt = nlm_get_uniq (cs->args.nlm4_unlockargs.alock.caller_name);
+ if (nlmclnt == NULL) {
+ stat = nlm4_granted;
+ gf_msg (GF_NLM, GF_LOG_WARNING, ENOLCK, NFS_MSG_NO_MEMORY,
+ "nlm_get_uniq() returned NULL");
+ goto nlm4err;
+ }
+ cs->fd = fd_lookup_uint64 (cs->resolvedloc.inode, (uint64_t)nlmclnt);
+ if (cs->fd == NULL) {
+ stat = nlm4_granted;
+ gf_msg (GF_NLM, GF_LOG_WARNING, 0, NFS_MSG_FD_LOOKUP_NULL,
+ "fd_lookup_uint64() returned NULL");
+ goto nlm4err;
+ }
+ ret = nlm4_unlock_fd_resume (cs);
+
+nlm4err:
+ if (ret < 0) {
+ gf_msg (GF_NLM, GF_LOG_WARNING, -ret, NFS_MSG_LOCK_FAIL,
+ "unable to unlock_fd_resume");
+ stat = nlm4_errno_to_nlm4stat (-ret);
+ nlm4_generic_reply (cs->req, cs->args.nlm4_unlockargs.cookie,
+ stat);
+
+ nfs3_call_state_wipe (cs);
+ }
+ /* we have already taken care of cleanup */
+ return 0;
+}
+
+int
+nlm4svc_unlock (rpcsvc_request_t *req)
+{
+ xlator_t *vol = NULL;
+ nlm4_stats stat = nlm4_failed;
+ struct nfs_state *nfs = NULL;
+ nfs3_state_t *nfs3 = NULL;
+ nfs3_call_state_t *cs = NULL;
+ int ret = RPCSVC_ACTOR_ERROR;
+ struct nfs3_fh fh = {{0}, };
+
+ if (!req)
+ return ret;
+
+ nlm4_validate_nfs3_state (req, nfs3, stat, rpcerr, ret);
+ nfs = nfs_state (nfs3->nfsx);
+ nlm4_handle_call_state_init (nfs->nfs3state, cs, req,
+ stat, rpcerr);
+
+ nlm4_prep_nlm4_unlockargs (&cs->args.nlm4_unlockargs, &fh, &cs->lkowner,
+ cs->cookiebytes);
+ if (xdr_to_nlm4_unlockargs(req->msg[0], &cs->args.nlm4_unlockargs) <= 0)
+ {
+ gf_msg (GF_NLM, GF_LOG_ERROR, errno, NFS_MSG_ARGS_DECODE_ERROR,
+ "Error decoding args");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto rpcerr;
+ }
+
+ nlm4_validate_gluster_fh (&fh, stat, nlm4err);
+ nlm4_map_fh_to_volume (cs->nfs3state, fh, req, vol, stat, nlm4err);
+
+ if (nlm_grace_period) {
+ gf_msg (GF_NLM, GF_LOG_WARNING, 0, NFS_MSG_NLM_GRACE_PERIOD,
+ "NLM in grace period");
+ stat = nlm4_denied_grace_period;
+ nlm4_generic_reply (req, cs->args.nlm4_unlockargs.cookie, stat);
+ nfs3_call_state_wipe (cs);
+ return 0;
+ }
+
+ cs->vol = vol;
+ /* FIXME: check if trans is being used at all for unlock */
+ cs->trans = rpcsvc_request_transport_ref(req);
+ nlm4_volume_started_check (nfs3, vol, ret, rpcerr);
+
+ ret = nfs3_fh_resolve_and_resume (cs, &fh,
+ NULL, nlm4_unlock_resume);
+
+nlm4err:
+ if (ret < 0) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, -ret, NFS_MSG_RESOLVE_ERROR,
+ "unable to resolve and resume");
+ nlm4_generic_reply (req, cs->args.nlm4_unlockargs.cookie, stat);
+ nfs3_call_state_wipe (cs);
+ return 0;
+ }
+
+rpcerr:
+ if (ret < 0) {
+ nfs3_call_state_wipe (cs);
+ }
+ return ret;
+}
+
+int
+nlm4_share_reply (nfs3_call_state_t *cs, nlm4_stats stat)
+{
+ nlm4_shareres res = {{0}, 0, 0};
+
+ if (!cs)
+ return -1;
+
+ res.cookie = cs->args.nlm4_shareargs.cookie;
+ res.stat = stat;
+ res.sequence = 0;
+
+ nlm4svc_submit_reply (cs->req, (void *)&res,
+ (nlm4_serializer)xdr_serialize_nlm4_shareres);
+ return 0;
+}
+
+nlm_share_t *
+nlm4_share_new ()
+{
+ nlm_share_t *share = NULL;
+
+ share = GF_CALLOC (1, sizeof (nlm_share_t),
+ gf_nfs_mt_nlm4_share);
+ if (!share)
+ goto out;
+
+ INIT_LIST_HEAD (&share->client_list);
+ INIT_LIST_HEAD (&share->inode_list);
+ out:
+ return share;
+}
+
+int
+nlm4_add_share_to_inode (nlm_share_t *share)
+{
+ int ret = -1;
+ uint64_t ctx = 0;
+ struct list_head *head = NULL;
+ xlator_t *this = NULL;
+ inode_t *inode = NULL;
+ struct nfs_inode_ctx *ictx = NULL;
+ struct nfs_state *priv = NULL;
+
+ this = THIS;
+ priv = this->private;
+ inode = share->inode;
+ ret = inode_ctx_get (inode, this, &ctx);
+
+ if (ret == -1) {
+ ictx = GF_CALLOC (1, sizeof (struct nfs_inode_ctx),
+ gf_nfs_mt_inode_ctx);
+ if (!ictx ) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ NFS_MSG_NO_MEMORY,
+ "could not allocate nfs inode ctx");
+ ret = -1;
+ goto out;
+ }
+ ictx->generation = priv->generation;
+
+ head = &ictx->shares;
+ INIT_LIST_HEAD (head);
+
+ ret = inode_ctx_put (inode, this, (uint64_t)ictx);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ NFS_MSG_SHARE_LIST_STORE_FAIL,
+ "could not store share list");
+ goto out;
+ }
+ }
+ else {
+ ictx = (struct nfs_inode_ctx *)ctx;
+ head = &ictx->shares;
+ }
+
+ list_add (&share->inode_list, head);
+
+ out:
+ if (ret && head)
+ GF_FREE (head);
+
+ return ret;
+}
+
+int
+nlm4_approve_share_reservation (nfs3_call_state_t *cs)
+{
+ int ret = -1;
+ uint64_t ctx = 0;
+ fsh_mode req_mode = 0;
+ fsh_access req_access = 0;
+ inode_t *inode = NULL;
+ nlm_share_t *share = NULL;
+ struct list_head *head = NULL;
+ struct nfs_inode_ctx *ictx = NULL;
+
+ if (!cs)
+ goto out;
+
+ inode = cs->resolvedloc.inode;
+
+ ret = inode_ctx_get (inode, THIS, &ctx);
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
+ ictx = (struct nfs_inode_ctx *)ctx;
+
+ head = &ictx->shares;
+ if (!head || list_empty (head))
+ goto out;
+
+ req_mode = cs->args.nlm4_shareargs.share.mode;
+ req_access = cs->args.nlm4_shareargs.share.access;
+
+ list_for_each_entry (share, head, inode_list) {
+ ret = (((req_mode & share->access) == 0) &&
+ ((req_access & share->mode) == 0));
+ if (!ret) {
+ ret = -1;
+ goto out;
+ }
+ }
+ ret = 0;
+
+ out:
+ return ret;
+}
+
+int
+nlm4_create_share_reservation (nfs3_call_state_t *cs)
+{
+ int ret = -1;
+ nlm_share_t *share = NULL;
+ nlm_client_t *client = NULL;
+ inode_t *inode = NULL;
+
+ LOCK (&nlm_client_list_lk);
+
+ inode = inode_ref (cs->resolvedloc.inode);
+ if (!inode) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, 0, NFS_MSG_INODE_NOT_FOUND,
+ "inode not found");
+ goto out;
+ }
+
+ client = __nlm_get_uniq (cs->args.nlm4_shareargs.share.caller_name);
+ if (!client) {
+ /* DO NOT add client. the client is supposed
+ to be here, since nlm4svc_share adds it */
+ gf_msg (GF_NLM, GF_LOG_ERROR, 0, NFS_MSG_CLIENT_NOT_FOUND,
+ "client not found");
+ goto out;
+ }
+
+ ret = nlm4_approve_share_reservation (cs);
+ if (ret)
+ goto out;
+
+ share = nlm4_share_new ();
+ if (!share) {
+ ret = -1;
+ goto out;
+ }
+
+ share->inode = inode;
+ share->mode = cs->args.nlm4_shareargs.share.mode;
+ share->access = cs->args.nlm4_shareargs.share.access;
+ nlm_copy_lkowner (&share->lkowner,
+ &cs->args.nlm4_shareargs.share.oh);
+
+ ret = nlm4_add_share_to_inode (share);
+ if (ret)
+ goto out;
+
+ list_add (&share->client_list, &client->shares);
+
+ out:
+ if (ret && inode) {
+ inode_unref (inode);
+ GF_FREE (share);
+ }
+
+ UNLOCK (&nlm_client_list_lk);
+ return ret;
+}
+
+/*
+ SHARE and UNSHARE calls DO NOT perform STACK_WIND,
+ the (non-monitored) share reservations are maintained
+ at *nfs xlator level only*, in memory
+*/
+int
+nlm4_share_resume (void *call_state)
+{
+ int ret = -1;
+ nlm4_stats stat = nlm4_failed;
+ nfs3_call_state_t *cs = NULL;
+
+ if (!call_state)
+ return ret;
+
+ cs = (nfs3_call_state_t *)call_state;
+ nlm4_check_fh_resolve_status (cs, stat, out);
+
+ ret = nlm4_create_share_reservation (cs);
+ if (!ret)
+ stat = nlm4_granted;
+
+ out:
+ nlm4_share_reply (cs, stat);
+ nfs3_call_state_wipe (cs);
+ return 0;
+}
+
+int
+nlm4svc_share (rpcsvc_request_t *req)
+{
+ nlm4_stats stat = nlm4_failed;
+ xlator_t *vol = NULL;
+ nfs3_state_t *nfs3 = NULL;
+ nfs3_call_state_t *cs = NULL;
+ struct nfs_state *nfs = NULL;
+ struct nfs3_fh fh = {{0}, };
+ int ret = RPCSVC_ACTOR_ERROR;
+
+ if (!req)
+ return ret;
+
+ nlm4_validate_nfs3_state (req, nfs3, stat, rpcerr, ret);
+ nfs = nfs_state (nfs3->nfsx);
+ nlm4_handle_call_state_init (nfs->nfs3state, cs, req,
+ stat, rpcerr);
+
+ nlm4_prep_shareargs (&cs->args.nlm4_shareargs, &cs->lockfh,
+ &cs->lkowner, cs->cookiebytes);
+
+ if (xdr_to_nlm4_shareargs (req->msg[0],
+ &cs->args.nlm4_shareargs) <= 0) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, errno, NFS_MSG_ARGS_DECODE_ERROR,
+ "Error decoding SHARE args");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto rpcerr;
+ }
+
+ fh = cs->lockfh;
+ nlm4_validate_gluster_fh (&fh, stat, nlm4err);
+ nlm4_map_fh_to_volume (cs->nfs3state, fh, req,
+ vol, stat, nlm4err);
+
+ if (nlm_grace_period && !cs->args.nlm4_shareargs.reclaim) {
+ gf_msg_debug (GF_NLM, 0, "NLM in grace period");
+ stat = nlm4_denied_grace_period;
+ nlm4_share_reply (cs, stat);
+ nfs3_call_state_wipe (cs);
+ return 0;
+ }
+
+ cs->vol = vol;
+ cs->trans = rpcsvc_request_transport_ref(req);
+ nlm4_volume_started_check (nfs3, vol, ret, rpcerr);
+
+ ret = nlm_add_nlmclnt (cs->args.nlm4_shareargs.share.caller_name);
+
+ ret = nfs3_fh_resolve_and_resume (cs, &fh, NULL, nlm4_share_resume);
+
+ nlm4err:
+ if (ret < 0) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, -ret, NFS_MSG_SHARE_CALL_FAIL,
+ "SHARE call failed");
+ nlm4_share_reply (cs, stat);
+ nfs3_call_state_wipe (cs);
+ return 0;
+ }
+
+ rpcerr:
+ if (ret < 0)
+ nfs3_call_state_wipe (cs);
+
+ return ret;
+}
+
+int
+nlm4_remove_share_reservation (nfs3_call_state_t *cs)
+{
+ int ret = -1;
+ uint64_t ctx = 0;
+ fsh_mode req_mode = 0;
+ fsh_access req_access = 0;
+ nlm_share_t *share = NULL;
+ nlm_share_t *tmp = NULL;
+ nlm_client_t *client = NULL;
+ char *caller = NULL;
+ inode_t *inode = NULL;
+ xlator_t *this = NULL;
+ struct list_head *head = NULL;
+ nlm4_shareargs *args = NULL;
+ struct nfs_inode_ctx *ictx = NULL;
+
+ LOCK (&nlm_client_list_lk);
+
+ args = &cs->args.nlm4_shareargs;
+ caller = args->share.caller_name;
+
+ client = __nlm_get_uniq (caller);
+ if (!client) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, 0, NFS_MSG_CLIENT_NOT_FOUND,
+ "client not found: %s", caller);
+ goto out;
+ }
+
+ inode = cs->resolvedloc.inode;
+ if (!inode) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, 0, NFS_MSG_INODE_NOT_FOUND,
+ "inode not found: client: %s", caller);
+ goto out;
+ }
+
+ this = THIS;
+ ret = inode_ctx_get (inode, this, &ctx);
+ if (ret) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, 0,
+ NFS_MSG_INODE_SHARES_NOT_FOUND,
+ "no shares found for inode:"
+ "gfid: %s; client: %s",
+ inode->gfid, caller);
+ goto out;
+ }
+ ictx = (struct nfs_inode_ctx *)ctx;
+
+ head = &ictx->shares;
+ if (list_empty (head)) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+ req_mode = args->share.mode;
+ req_access = args->share.access;
+
+ list_for_each_entry_safe (share, tmp, head, inode_list) {
+ ret = ((req_mode == share->mode) &&
+ (req_access == share->access) &&
+ nlm_is_oh_same_lkowner (&share->lkowner, &args->share.oh));
+ if (ret) {
+ list_del (&share->client_list);
+ list_del (&share->inode_list);
+ inode_unref (share->inode);
+ GF_FREE (share);
+ break;
+ }
+ }
+
+ ret = 0;
+ out:
+ UNLOCK (&nlm_client_list_lk);
+ return ret;
+
+}
+
+int
+nlm4_unshare_resume (void *call_state)
+{
+ int ret = -1;
+ nlm4_stats stat = nlm4_failed;
+ nfs3_call_state_t *cs = NULL;
+
+ if (!call_state)
+ return ret;
+
+ cs = (nfs3_call_state_t *)call_state;
+
+ nlm4_check_fh_resolve_status (cs, stat, out);
+ ret = nlm4_remove_share_reservation (cs);
+ if (!ret)
+ stat = nlm4_granted;
+
+ out:
+ nlm4_share_reply (cs, stat);
+ nfs3_call_state_wipe (cs);
+ return 0;
+}
+
+int
+nlm4svc_unshare (rpcsvc_request_t *req)
+{
+ nlm4_stats stat = nlm4_failed;
+ xlator_t *vol = NULL;
+ nfs3_state_t *nfs3 = NULL;
+ nfs3_call_state_t *cs = NULL;
+ struct nfs_state *nfs = NULL;
+ struct nfs3_fh fh = {{0}, };
+ int ret = RPCSVC_ACTOR_ERROR;
+
+ if (!req)
+ return ret;
+
+ nlm4_validate_nfs3_state (req, nfs3, stat, rpcerr, ret);
+ nfs = nfs_state (nfs3->nfsx);
+ nlm4_handle_call_state_init (nfs->nfs3state, cs, req,
+ stat, rpcerr);
+
+ nlm4_prep_shareargs (&cs->args.nlm4_shareargs, &cs->lockfh,
+ &cs->lkowner, cs->cookiebytes);
+
+ if (xdr_to_nlm4_shareargs (req->msg[0],
+ &cs->args.nlm4_shareargs) <= 0) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, errno, NFS_MSG_ARGS_DECODE_ERROR,
+ "Error decoding UNSHARE args");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto rpcerr;
+ }
+
+ fh = cs->lockfh;
+ nlm4_validate_gluster_fh (&fh, stat, nlm4err);
+ nlm4_map_fh_to_volume (cs->nfs3state, fh, req,
+ vol, stat, nlm4err);
+
+ if (nlm_grace_period && !cs->args.nlm4_shareargs.reclaim) {
+ gf_msg_debug (GF_NLM, 0, "NLM in grace period");
+ stat = nlm4_denied_grace_period;
+ nlm4_share_reply (cs, stat);
+ nfs3_call_state_wipe (cs);
+ return 0;
+ }
+
+ cs->vol = vol;
+ cs->trans = rpcsvc_request_transport_ref(req);
+ nlm4_volume_started_check (nfs3, vol, ret, rpcerr);
+
+ ret = nfs3_fh_resolve_and_resume (cs, &fh, NULL,
+ nlm4_unshare_resume);
+
+ nlm4err:
+ if (ret < 0) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, -ret, NFS_MSG_UNSHARE_CALL_FAIL,
+ "UNSHARE call failed");
+ nlm4_share_reply (cs, stat);
+ ret = 0;
+ return 0;
+ }
+
+ rpcerr:
+ if (ret < 0)
+ nfs3_call_state_wipe (cs);
+
+ return ret;
+}
+
+int
+nlm4_free_all_shares (char *caller_name)
+{
+ nlm_share_t *share = NULL;
+ nlm_share_t *tmp = NULL;
+ nlm_client_t *client = NULL;
+
+ LOCK (&nlm_client_list_lk);
+
+ client = __nlm_get_uniq (caller_name);
+ if (!client) {
+ gf_msg_debug (GF_NLM, 0, "client not found: %s", caller_name);
+ goto out;
+ }
+
+ list_for_each_entry_safe (share, tmp, &client->shares, client_list) {
+ list_del (&share->inode_list);
+ list_del (&share->client_list);
+ inode_unref (share->inode);
+ GF_FREE (share);
+ }
+ out:
+ UNLOCK (&nlm_client_list_lk);
+ return 0;
+}
+
+int
+nlm4svc_free_all (rpcsvc_request_t *req)
+{
+ int ret = RPCSVC_ACTOR_ERROR;
+ nlm4_stats stat = nlm4_failed;
+ nfs3_state_t *nfs3 = NULL;
+ nfs3_call_state_t *cs = NULL;
+ struct nfs_state *nfs = NULL;
+
+ nlm4_validate_nfs3_state (req, nfs3, stat, err, ret);
+ nfs = nfs_state (nfs3->nfsx);
+ nlm4_handle_call_state_init (nfs->nfs3state, cs,
+ req, stat, err);
+
+ nlm4_prep_freeallargs (&cs->args.nlm4_freeallargs,
+ &cs->lkowner);
+
+ if (xdr_to_nlm4_freeallargs (req->msg[0],
+ &cs->args.nlm4_freeallargs) <= 0) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, errno, NFS_MSG_ARGS_DECODE_ERROR,
+ "Error decoding FREE_ALL args");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto err;
+ }
+
+ ret = nlm4_free_all_shares (cs->args.nlm4_freeallargs.name);
+ if (ret)
+ goto err;
+
+ ret = nlm_cleanup_fds (cs->args.nlm4_freeallargs.name);
+ if (ret)
+ goto err;
+
+ err:
+ nfs3_call_state_wipe (cs);
+ if (ret)
+ gf_msg_debug (GF_NLM, 0, "error in free all; stat: %d", stat);
+ return ret;
+
+}
+
+void
+nlm4svc_sm_notify (struct nlm_sm_status *status)
+{
+ gf_msg (GF_NLM, GF_LOG_INFO, 0, NFS_MSG_SM_NOTIFY, "sm_notify: "
+ "%s, state: %d", status->mon_name, status->state);
+ nlm_cleanup_fds (status->mon_name);
+}
+
+rpcsvc_actor_t nlm4svc_actors[NLM4_PROC_COUNT] = {
+ /* 0 */
+ {"NULL", NLM4_NULL, nlm4svc_null, NULL, 0, DRC_IDEMPOTENT},
+ {"TEST", NLM4_TEST, nlm4svc_test, NULL, 0, DRC_IDEMPOTENT},
+ {"LOCK", NLM4_LOCK, nlm4svc_lock, NULL, 0, DRC_NON_IDEMPOTENT},
+ {"CANCEL", NLM4_CANCEL, nlm4svc_cancel, NULL, 0, DRC_NON_IDEMPOTENT},
+ {"UNLOCK", NLM4_UNLOCK, nlm4svc_unlock, NULL, 0, DRC_NON_IDEMPOTENT},
+ /* 5 */
+ {"GRANTED", NLM4_GRANTED, NULL, NULL, 0, DRC_NA},
+ {"TEST", NLM4_TEST_MSG, NULL, NULL, 0, DRC_NA},
+ {"LOCK", NLM4_LOCK_MSG, NULL, NULL, 0, DRC_NA},
+ {"CANCEL", NLM4_CANCEL_MSG, NULL, NULL, 0, DRC_NA},
+ {"UNLOCK", NLM4_UNLOCK_MSG, NULL, NULL, 0, DRC_NA},
+ /* 10 */
+ {"GRANTED", NLM4_GRANTED_MSG, NULL, NULL, 0, DRC_NA},
+ {"TEST", NLM4_TEST_RES, NULL, NULL, 0, DRC_NA},
+ {"LOCK", NLM4_LOCK_RES, NULL, NULL, 0, DRC_NA},
+ {"CANCEL", NLM4_CANCEL_RES, NULL, NULL, 0, DRC_NA},
+ {"UNLOCK", NLM4_UNLOCK_RES, NULL, NULL, 0, DRC_NA},
+ /* 15 ; procedures 17,18,19 are not defined by nlm */
+ {"GRANTED", NLM4_GRANTED_RES, NULL, NULL, 0, DRC_NA},
+ {"SM_NOTIFY", NLM4_SM_NOTIFY, NULL, NULL, 0, DRC_NA},
+ {"SEVENTEEN", NLM4_SEVENTEEN, NULL, NULL, 0, DRC_NA},
+ {"EIGHTEEN", NLM4_EIGHTEEN, NULL, NULL, 0, DRC_NA},
+ {"NINETEEN", NLM4_NINETEEN, NULL, NULL, 0, DRC_NA},
+ /* 20 */
+ {"SHARE", NLM4_SHARE, nlm4svc_share, NULL, 0, DRC_NON_IDEMPOTENT},
+ {"UNSHARE", NLM4_UNSHARE, nlm4svc_unshare, NULL, 0, DRC_NON_IDEMPOTENT},
+ {"NM_LOCK", NLM4_NM_LOCK, nlm4svc_nm_lock, NULL, 0, DRC_NON_IDEMPOTENT},
+ {"FREE_ALL", NLM4_FREE_ALL, nlm4svc_free_all, NULL, 0, DRC_IDEMPOTENT},
+};
+
+rpcsvc_program_t nlm4prog = {
+ .progname = "NLM4",
+ .prognum = NLM_PROGRAM,
+ .progver = NLM_V4,
+ .progport = GF_NLM4_PORT,
+ .actors = nlm4svc_actors,
+ .numactors = NLM4_PROC_COUNT,
+ .min_auth = AUTH_NULL,
+};
+
+
+int
+nlm4_init_state (xlator_t *nfsx)
+{
+ return 0;
+}
+
+extern void *nsm_thread (void *argv);
+
+void nlm_grace_period_over(void *arg)
+{
+ nlm_grace_period = 0;
+}
+
+rpcsvc_program_t *
+nlm4svc_init(xlator_t *nfsx)
+{
+ struct nfs3_state *ns = NULL;
+ struct nfs_state *nfs = NULL;
+ dict_t *options = NULL;
+ int ret = -1;
+ char *portstr = NULL;
+ pthread_t thr;
+ struct timespec timeout = {0,};
+ FILE *pidfile = NULL;
+ pid_t pid = -1;
+ static gf_boolean_t nlm4_inited = _gf_false;
+
+ /* Already inited */
+ if (nlm4_inited)
+ return &nlm4prog;
+
+ nfs = (struct nfs_state*)nfsx->private;
+
+ ns = nfs->nfs3state;
+ if (!ns) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, EINVAL, NFS_MSG_NLM_INIT_FAIL,
+ "NLM4 init failed");
+ goto err;
+ }
+ nlm4prog.private = ns;
+
+ options = dict_new ();
+
+ ret = gf_asprintf (&portstr, "%d", GF_NLM4_PORT);
+ if (ret == -1)
+ goto err;
+
+ ret = dict_set_dynstr (options, "transport.socket.listen-port",
+ portstr);
+ if (ret == -1)
+ goto err;
+ ret = dict_set_str (options, "transport-type", "socket");
+ if (ret == -1) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, errno,
+ NFS_MSG_DICT_SET_FAILED, "dict_set_str error");
+ goto err;
+ }
+
+ if (nfs->allow_insecure) {
+ ret = dict_set_str (options, "rpc-auth-allow-insecure", "on");
+ if (ret == -1) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, errno,
+ NFS_MSG_DICT_SET_FAILED, "dict_set_str error");
+ goto err;
+ }
+ ret = dict_set_str (options, "rpc-auth.ports.insecure", "on");
+ if (ret == -1) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, errno,
+ NFS_MSG_DICT_SET_FAILED, "dict_set_str error");
+ goto err;
+ }
+ }
+
+ ret = dict_set_str (options, "transport.address-family", "inet");
+ if (ret == -1) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, errno, NFS_MSG_DICT_SET_FAILED,
+ "dict_set_str error");
+ goto err;
+ }
+
+ ret = rpcsvc_create_listeners (nfs->rpcsvc, options, "NLM");
+ if (ret == -1) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, errno,
+ NFS_MSG_LISTENERS_CREATE_FAIL,
+ "Unable to create listeners");
+ dict_unref (options);
+ goto err;
+ }
+ INIT_LIST_HEAD(&nlm_client_list);
+ LOCK_INIT (&nlm_client_list_lk);
+
+ /* unlink sm-notify.pid so that when we restart rpc.statd/sm-notify
+ * it thinks that the machine has restarted and sends NOTIFY to clients.
+ */
+
+ /* TODO:
+ notify/rpc.statd is done differently on OSX
+
+ On OSX rpc.statd is controlled by rpc.lockd and are part for launchd
+ (unified service management framework)
+
+ A runcmd() should be invoking "launchctl start com.apple.lockd"
+ instead. This is still a theory but we need to thoroughly test it
+ out. Until then NLM support is non-existent on OSX.
+ */
+ ret = sys_unlink (GF_SM_NOTIFY_PIDFILE);
+ if (ret == -1 && errno != ENOENT) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, errno, NFS_MSG_UNLINK_ERROR,
+ "unable to unlink %s: %d",
+ GF_SM_NOTIFY_PIDFILE, errno);
+ goto err;
+ }
+ /* temporary work around to restart statd, not distro/OS independent.
+ * Need to figure out a more graceful way
+ * killall will cause problems on solaris.
+ */
+
+ char *pid_file = GF_RPC_STATD_PIDFILE;
+ if (nfs->rpc_statd_pid_file)
+ pid_file = nfs->rpc_statd_pid_file;
+ pidfile = fopen (pid_file, "r");
+ if (pidfile) {
+ ret = fscanf (pidfile, "%d", &pid);
+ if (ret <= 0) {
+ gf_msg (GF_NLM, GF_LOG_WARNING, errno,
+ NFS_MSG_GET_PID_FAIL, "unable to get pid of "
+ "rpc.statd from %s ", GF_RPC_STATD_PIDFILE);
+ ret = runcmd (KILLALL_CMD, "-9", "rpc.statd", NULL);
+ } else
+ kill (pid, SIGKILL);
+
+ fclose (pidfile);
+ } else {
+ gf_msg (GF_NLM, GF_LOG_WARNING, errno, NFS_MSG_OPEN_FAIL,
+ "opening %s of rpc.statd failed (%s)",
+ pid_file, strerror (errno));
+ /* if ret == -1, do nothing - case either statd was not
+ * running or was running in valgrind mode
+ */
+ ret = runcmd (KILLALL_CMD, "-9", "rpc.statd", NULL);
+ }
+
+ ret = sys_unlink (GF_RPC_STATD_PIDFILE);
+ if (ret == -1 && errno != ENOENT) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, errno, NFS_MSG_UNLINK_ERROR,
+ "unable to unlink %s", pid_file);
+ goto err;
+ }
+
+ ret = runcmd (nfs->rpc_statd, NULL);
+ if (ret == -1) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, errno, NFS_MSG_START_ERROR,
+ "unable to start %s", nfs->rpc_statd);
+ goto err;
+ }
+
+
+ pthread_create (&thr, NULL, nsm_thread, (void*)NULL);
+
+ timeout.tv_sec = nlm_grace_period;
+ timeout.tv_nsec = 0;
+
+ gf_timer_call_after (nfsx->ctx, timeout, nlm_grace_period_over, NULL);
+ nlm4_inited = _gf_true;
+ return &nlm4prog;
+err:
+ return NULL;
+}
+
+int32_t
+nlm_priv (xlator_t *this)
+{
+ int32_t ret = -1;
+ uint32_t client_count = 0;
+ uint64_t file_count = 0;
+ nlm_client_t *client = NULL;
+ nlm_fde_t *fde = NULL;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0};
+ char gfid_str[64] = {0};
+
+ gf_proc_dump_add_section("nfs.nlm");
+
+ if (TRY_LOCK (&nlm_client_list_lk))
+ goto out;
+
+ list_for_each_entry (client, &nlm_client_list, nlm_clients) {
+
+ gf_proc_dump_build_key (key, "client", "%d.hostname", client_count);
+ gf_proc_dump_write (key, "%s\n", client->caller_name);
+
+ file_count = 0;
+ list_for_each_entry (fde, &client->fdes, fde_list) {
+ gf_proc_dump_build_key (key, "file", "%ld.gfid", file_count);
+ memset (gfid_str, 0, 64);
+ uuid_utoa_r (fde->fd->inode->gfid, gfid_str);
+ gf_proc_dump_write (key, "%s", gfid_str);
+ file_count++;
+ }
+
+ gf_proc_dump_build_key (key, "client", "files-locked");
+ gf_proc_dump_write (key, "%ld\n", file_count);
+ client_count++;
+ }
+
+ gf_proc_dump_build_key (key, "nlm", "client-count");
+ gf_proc_dump_write (key, "%d", client_count);
+ ret = 0;
+ UNLOCK (&nlm_client_list_lk);
+
+ out:
+ if (ret) {
+ gf_proc_dump_build_key (key, "nlm", "statedump_error");
+ gf_proc_dump_write (key, "Unable to dump nlm state because "
+ "nlm_client_list_lk lock couldn't be acquired");
+ }
+
+ return ret;
+}
diff --git a/xlators/nfs/server/src/nlm4.h b/xlators/nfs/server/src/nlm4.h
new file mode 100644
index 00000000000..c7da5d62193
--- /dev/null
+++ b/xlators/nfs/server/src/nlm4.h
@@ -0,0 +1,111 @@
+/*
+ Copyright (c) 2012 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _NLM4_H_
+#define _NLM4_H_
+
+#include <sys/types.h>
+#include <signal.h>
+#include "rpcsvc.h"
+#include "dict.h"
+#include "xlator.h"
+#include "iobuf.h"
+#include "nfs.h"
+#include "list.h"
+#include "xdr-nfs3.h"
+#include "locking.h"
+#include "nfs3-fh.h"
+#include "compat-uuid.h"
+#include "nlm4-xdr.h"
+#include "lkowner.h"
+
+#define NLM4_NULL 0
+#define NLM4_TEST 1
+#define NLM4_LOCK 2
+#define NLM4_CANCEL 3
+#define NLM4_UNLOCK 4
+#define NLM4_GRANTED 5
+#define NLM4_TEST_MSG 6
+#define NLM4_LOCK_MSG 7
+#define NLM4_CANCEL_MSG 8
+#define NLM4_UNLOCK_MSG 9
+#define NLM4_GRANTED_MSG 10
+#define NLM4_TEST_RES 11
+#define NLM4_LOCK_RES 12
+#define NLM4_CANCEL_RES 13
+#define NLM4_UNLOCK_RES 14
+#define NLM4_GRANTED_RES 15
+#define NLM4_SM_NOTIFY 16
+#define NLM4_SEVENTEEN 17
+#define NLM4_EIGHTEEN 18
+#define NLM4_NINETEEN 19
+#define NLM4_SHARE 20
+#define NLM4_UNSHARE 21
+#define NLM4_NM_LOCK 22
+#define NLM4_FREE_ALL 23
+#define NLM4_PROC_COUNT 24
+
+/* Registered with portmap */
+#define GF_NLM4_PORT 38468
+#define GF_NLM GF_NFS"-NLM"
+#if defined(GF_DARWIN_HOST_OS)
+#define GF_RPC_STATD_PROG "/usr/sbin/rpc.statd"
+#define GF_RPC_STATD_PIDFILE "/var/run/statd.pid"
+#define GF_SM_NOTIFY_PIDFILE "/var/run/statd.notify.pid"
+#elif defined(__NetBSD__)
+#define GF_RPC_STATD_PROG "/usr/sbin/rpc.statd"
+#define GF_RPC_STATD_PIDFILE "/var/run/rpc.statd.pid"
+#define GF_SM_NOTIFY_PIDFILE "/var/run/inexistant.pid"
+#else
+#define GF_RPC_STATD_PROG "/sbin/rpc.statd"
+#define GF_RPC_STATD_PIDFILE "/var/run/rpc.statd.pid"
+#define GF_SM_NOTIFY_PIDFILE "/var/run/sm-notify.pid"
+#endif
+
+extern rpcsvc_program_t *
+nlm4svc_init (xlator_t *nfsx);
+
+extern int
+nlm4_init_state (xlator_t *nfsx);
+
+#define NLM_PROGRAM 100021
+#define NLM_V4 4
+
+typedef struct nlm4_lwowner {
+ char temp[1024];
+} nlm4_lkowner_t;
+
+typedef struct nlm_client {
+ struct sockaddr_storage sa;
+ pid_t uniq;
+ struct list_head nlm_clients;
+ struct list_head fdes;
+ struct list_head shares;
+ struct rpc_clnt *rpc_clnt;
+ char *caller_name;
+ int nsm_monitor;
+} nlm_client_t;
+
+typedef struct nlm_share {
+ struct list_head client_list;
+ struct list_head inode_list;
+ gf_lkowner_t lkowner;
+ inode_t *inode;
+ fsh_mode mode;
+ fsh_access access;
+} nlm_share_t;
+
+typedef struct nlm_fde {
+ struct list_head fde_list;
+ fd_t *fd;
+ int transit_cnt;
+} nlm_fde_t;
+
+#endif
diff --git a/xlators/nfs/server/src/nlmcbk_svc.c b/xlators/nfs/server/src/nlmcbk_svc.c
new file mode 100644
index 00000000000..badbd088f3d
--- /dev/null
+++ b/xlators/nfs/server/src/nlmcbk_svc.c
@@ -0,0 +1,126 @@
+/*
+ Copyright (c) 2012 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/*
+ * Please do not edit this file.
+ * It was generated using rpcgen.
+ */
+
+#include "nlm4.h"
+#include "logging.h"
+#include "nfs-messages.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <rpc/pmap_clnt.h>
+#include <string.h>
+#include <memory.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+#ifndef SIG_PF
+#define SIG_PF void(*)(int)
+#endif
+
+void
+nlm4svc_sm_notify (struct nlm_sm_status *status);
+
+void *nlmcbk_sm_notify_0_svc(struct nlm_sm_status *status, struct svc_req *req)
+{
+ nlm4svc_sm_notify (status);
+ return NULL;
+}
+
+static void
+nlmcbk_program_0(struct svc_req *rqstp, register SVCXPRT *transp)
+{
+ union {
+ struct nlm_sm_status nlmcbk_sm_notify_0_arg;
+ } argument;
+ char *result;
+ xdrproc_t _xdr_argument, _xdr_result;
+ char *(*local)(char *, struct svc_req *);
+
+ switch (rqstp->rq_proc) {
+ case NULLPROC:
+ (void) svc_sendreply (transp, (xdrproc_t) xdr_void, (char *)NULL);
+ return;
+
+ case NLMCBK_SM_NOTIFY:
+ _xdr_argument = (xdrproc_t) xdr_nlm_sm_status;
+ _xdr_result = (xdrproc_t) xdr_void;
+ local = (char *(*)(char *, struct svc_req *)) nlmcbk_sm_notify_0_svc;
+ break;
+
+ default:
+ svcerr_noproc (transp);
+ return;
+ }
+ memset ((char *)&argument, 0, sizeof (argument));
+ if (!svc_getargs (transp, (xdrproc_t) _xdr_argument, (caddr_t) &argument)) {
+ svcerr_decode (transp);
+ return;
+ }
+ result = (*local)((char *)&argument, rqstp);
+ if (!svc_sendreply(transp, (xdrproc_t) _xdr_result, result)) {
+ svcerr_systemerr (transp);
+ }
+
+ if (!svc_freeargs (transp, (xdrproc_t) _xdr_argument, (caddr_t) &argument)) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, 0, NFS_MSG_ARG_FREE_FAIL,
+ "unable to free arguments");
+ return;
+ }
+ return;
+}
+
+void *
+nsm_thread (void *argv)
+{
+ register SVCXPRT *transp;
+ int ret = 0;
+
+ ret = pmap_unset (NLMCBK_PROGRAM, NLMCBK_V1);
+ if (ret == 0) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, 0, NFS_MSG_PMAP_UNSET_FAIL,
+ "pmap_unset failed");
+ return NULL;
+ }
+ transp = svcudp_create(RPC_ANYSOCK);
+ if (transp == NULL) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, errno, NFS_MSG_UDP_SERV_FAIL,
+ "cannot create udp service.");
+ return NULL;
+ }
+ if (!svc_register(transp, NLMCBK_PROGRAM, NLMCBK_V1, nlmcbk_program_0, IPPROTO_UDP)) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, 0, NFS_MSG_REG_NLMCBK_FAIL,
+ "unable to register (NLMCBK_PROGRAM, "
+ "NLMCBK_V0, udp).");
+ return NULL;
+ }
+
+ transp = svctcp_create(RPC_ANYSOCK, 0, 0);
+ if (transp == NULL) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, errno, NFS_MSG_TCP_SERV_FAIL,
+ "cannot create tcp service.");
+ return NULL;
+ }
+ if (!svc_register(transp, NLMCBK_PROGRAM, NLMCBK_V1, nlmcbk_program_0, IPPROTO_TCP)) {
+ gf_msg (GF_NLM, GF_LOG_ERROR, 0, NFS_MSG_REG_NLMCBK_FAIL,
+ "unable to register (NLMCBK_PROGRAM, "
+ "NLMCBK_V0, tcp).");
+ return NULL;
+ }
+
+ svc_run ();
+ gf_msg (GF_NLM, GF_LOG_ERROR, 0, NFS_MSG_SVC_RUN_RETURNED,
+ "svc_run returned");
+ return NULL;
+ /* NOTREACHED */
+}
diff --git a/xlators/performance/Makefile.am b/xlators/performance/Makefile.am
index e91d5f6efc8..eb4e32cbb14 100644
--- a/xlators/performance/Makefile.am
+++ b/xlators/performance/Makefile.am
@@ -1,3 +1,3 @@
-SUBDIRS = write-behind read-ahead io-threads io-cache symlink-cache quick-read stat-prefetch
+SUBDIRS = write-behind read-ahead readdir-ahead io-threads io-cache symlink-cache quick-read md-cache open-behind decompounder
CLEANFILES =
diff --git a/xlators/performance/decompounder/Makefile.am b/xlators/performance/decompounder/Makefile.am
new file mode 100644
index 00000000000..af437a64d6d
--- /dev/null
+++ b/xlators/performance/decompounder/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = src
diff --git a/xlators/performance/decompounder/src/Makefile.am b/xlators/performance/decompounder/src/Makefile.am
new file mode 100644
index 00000000000..693fe0aa5c2
--- /dev/null
+++ b/xlators/performance/decompounder/src/Makefile.am
@@ -0,0 +1,16 @@
+xlator_LTLIBRARIES = decompounder.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
+
+decompounder_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+decompounder_la_SOURCES = decompounder.c
+decompounder_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = decompounder-mem-types.h decompounder-messages.h \
+ decompounder.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/performance/decompounder/src/decompounder-mem-types.h b/xlators/performance/decompounder/src/decompounder-mem-types.h
new file mode 100644
index 00000000000..5c211c1a907
--- /dev/null
+++ b/xlators/performance/decompounder/src/decompounder-mem-types.h
@@ -0,0 +1,20 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __DC_MEM_TYPES_H__
+#define __DC_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_dc_mem_types_ {
+ gf_dc_mt_rsp_t = gf_common_mt_end + 1,
+ gf_dc_mt_end
+};
+#endif
diff --git a/xlators/performance/decompounder/src/decompounder-messages.h b/xlators/performance/decompounder/src/decompounder-messages.h
new file mode 100644
index 00000000000..825599e9479
--- /dev/null
+++ b/xlators/performance/decompounder/src/decompounder-messages.h
@@ -0,0 +1,27 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _DC_MESSAGES_H_
+#define _DC_MESSAGES_H_
+
+#include "glfs-message-id.h"
+
+#define GLFS_COMP_BASE_DC GLFS_MSGID_COMP_DC
+#define GLFS_NUM_MESSAGES 2
+#define GLFS_MSGID_END (GLFS_COMP_BASE_DC + GLFS_NUM_MESSAGES + 1)
+
+#define glfs_msg_start_x GLFS_COMP_BASE_DC, "Invalid: Start of messages"
+
+#define DC_MSG_VOL_MISCONFIGURED (GLFS_COMP_BASE_DC + 1)
+
+#define DC_MSG_ERROR_RECEIVED (GLFS_COMP_BASE_DC + 2)
+
+#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
+#endif /* !_DC_MESSAGES_H_ */
diff --git a/xlators/performance/decompounder/src/decompounder.c b/xlators/performance/decompounder/src/decompounder.c
new file mode 100644
index 00000000000..3009fcdd4b1
--- /dev/null
+++ b/xlators/performance/decompounder/src/decompounder.c
@@ -0,0 +1,952 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "decompounder.h"
+#include "mem-types.h"
+
+void
+dc_local_cleanup (dc_local_t *local)
+{
+ int i = 0;
+
+ for (i = 0; i < local->length; i++)
+ args_cbk_wipe (&local->compound_rsp->rsp_list[i]);
+
+ GF_FREE (local->compound_rsp->rsp_list);
+ GF_FREE (local->compound_rsp);
+ return;
+}
+
+int32_t
+dc_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (stat, frame, op_ret,
+ op_errno, buf, xdata);
+ return 0;
+}
+
+int32_t
+dc_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *path,
+ struct iatt *buf, dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (readlink, frame, op_ret, op_errno,
+ path, buf, xdata);
+ return 0;
+}
+
+
+int32_t
+dc_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (mknod, frame, op_ret, op_errno,
+ inode, buf, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+
+int32_t
+dc_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (mkdir, frame, op_ret, op_errno,
+ inode, buf, preparent, postparent,
+ xdata);
+ return 0;
+}
+
+
+int32_t
+dc_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent,
+ dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (unlink, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+
+int32_t
+dc_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent,
+ dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (rmdir, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+
+int32_t
+dc_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (symlink, frame, op_ret, op_errno,
+ inode, buf, preparent, postparent,
+ xdata);
+ return 0;
+}
+
+
+int32_t
+dc_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent,
+ struct iatt *postoldparent,
+ struct iatt *prenewparent,
+ struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (rename, frame, op_ret, op_errno,
+ buf, preoldparent, postoldparent,
+ prenewparent, postnewparent,
+ xdata);
+ return 0;
+}
+
+
+int32_t
+dc_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent,
+ dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (link, frame, op_ret, op_errno,
+ inode, buf, preparent, postparent,
+ xdata);
+ return 0;
+}
+
+
+int32_t
+dc_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (truncate, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+ return 0;
+}
+
+
+int32_t
+dc_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (open, frame, op_ret, op_errno,
+ fd, xdata);
+ return 0;
+}
+
+
+int32_t
+dc_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (readv, frame, op_ret, op_errno,
+ vector, count, stbuf, iobref, xdata);
+ return 0;
+}
+
+
+int32_t
+dc_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (writev, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+ return 0;
+}
+
+
+int32_t
+dc_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (statfs, frame, op_ret, op_errno,
+ buf, xdata);
+ return 0;
+}
+
+
+int32_t
+dc_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (flush, frame, op_ret, op_errno,
+ xdata);
+ return 0;
+}
+
+
+int32_t
+dc_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (fsync, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+ return 0;
+}
+
+
+int32_t
+dc_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (setxattr, frame, op_ret, op_errno,
+ xdata);
+ return 0;
+}
+
+
+int32_t
+dc_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (getxattr, frame, op_ret, op_errno,
+ dict, xdata);
+ return 0;
+}
+
+
+int32_t
+dc_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (removexattr, frame, op_ret, op_errno,
+ xdata);
+ return 0;
+}
+
+int32_t
+dc_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (opendir, frame, op_ret, op_errno,
+ fd, xdata);
+ return 0;
+}
+
+
+int32_t
+dc_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (fsyncdir, frame, op_ret, op_errno,
+ xdata);
+ return 0;
+}
+
+
+int32_t
+dc_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (access, frame, op_ret, op_errno,
+ xdata);
+ return 0;
+}
+
+
+int32_t
+dc_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ inode_t *inode,
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent,
+ dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (create, frame, op_ret, op_errno,
+ fd, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+
+int32_t
+dc_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (ftruncate, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+ return 0;
+}
+
+
+int32_t
+dc_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (fstat, frame, op_ret, op_errno,
+ buf, xdata);
+ return 0;
+}
+
+
+int32_t
+dc_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (lk, frame, op_ret, op_errno,
+ lock, xdata);
+ return 0;
+}
+
+
+int32_t
+dc_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf,
+ dict_t *xdata,
+ struct iatt *postparent)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (lookup, frame, op_ret, op_errno,
+ inode, buf, xdata, postparent);
+ return 0;
+}
+
+
+int32_t
+dc_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (readdir, frame, op_ret, op_errno,
+ entries, xdata);
+ return 0;
+}
+
+
+int32_t
+dc_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (inodelk, frame, op_ret, op_errno,
+ xdata);
+ return 0;
+}
+
+
+int32_t
+dc_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (finodelk, frame, op_ret, op_errno,
+ xdata);
+ return 0;
+}
+
+
+int32_t
+dc_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (entrylk, frame, op_ret, op_errno,
+ xdata);
+ return 0;
+}
+
+
+int32_t
+dc_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (fentrylk, frame, op_ret, op_errno,
+ xdata);
+ return 0;
+}
+
+
+int32_t
+dc_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (xattrop, frame, op_ret, op_errno,
+ dict, xdata);
+ return 0;
+}
+
+
+int32_t
+dc_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (fxattrop, frame, op_ret, op_errno,
+ dict, xdata);
+ return 0;
+}
+
+
+int32_t
+dc_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (fgetxattr, frame, op_ret, op_errno,
+ dict, xdata);
+ return 0;
+}
+
+
+int32_t
+dc_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (fsetxattr, frame, op_ret, op_errno,
+ xdata);
+ return 0;
+}
+
+
+int32_t
+dc_rchecksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, uint32_t weak_cksum,
+ uint8_t *strong_cksum, dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (rchecksum, frame, op_ret, op_errno,
+ weak_cksum, strong_cksum, xdata);
+ return 0;
+}
+
+
+int32_t
+dc_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (setattr, frame, op_ret, op_errno,
+ statpre, statpost, xdata);
+ return 0;
+}
+
+
+int32_t
+dc_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (fsetattr, frame, op_ret, op_errno,
+ statpre, statpost, xdata);
+ return 0;
+}
+
+
+int32_t
+dc_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (readdirp, frame, op_ret, op_errno,
+ entries, xdata);
+ return 0;
+}
+
+
+int32_t
+dc_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (fremovexattr, frame, op_ret, op_errno,
+ xdata);
+ return 0;
+}
+
+
+int32_t
+dc_fallocate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (fallocate, frame, op_ret, op_errno,
+ pre, post, xdata);
+ return 0;
+}
+
+
+int32_t
+dc_discard_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (discard, frame, op_ret, op_errno,
+ pre, post, xdata);
+ return 0;
+}
+
+
+int32_t
+dc_zerofill_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (zerofill, frame, op_ret, op_errno,
+ pre, post, xdata);
+ return 0;
+}
+
+
+int32_t
+dc_ipc_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (ipc, frame, op_ret, op_errno,
+ xdata);
+ return 0;
+}
+
+
+int32_t
+dc_seek_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, off_t offset, dict_t *xdata)
+{
+ DC_FOP_RESPONSE_STORE_AND_WIND_NEXT (seek, frame, op_ret, op_errno,
+ offset, xdata);
+ return 0;
+}
+
+int32_t
+dc_compound_fop_wind (call_frame_t *frame, xlator_t *this)
+{
+ dc_local_t *local = frame->local;
+ compound_args_t *c_req = local->compound_req;
+ compound_args_cbk_t *c_rsp = local->compound_rsp;
+ int counter = local->counter;
+ default_args_t curr_fop = c_req->req_list[counter];
+ int op_ret = 0;
+ int op_errno = ENOMEM;
+
+ if (local->counter == local->length)
+ goto done;
+
+ switch (c_req->enum_list[counter]) {
+ case GF_FOP_STAT:
+ STACK_WIND (frame, dc_stat_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->stat,
+ &curr_fop.loc, curr_fop.xdata);
+ break;
+ case GF_FOP_READLINK:
+ STACK_WIND (frame, dc_readlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readlink,
+ &curr_fop.loc, curr_fop.size,
+ curr_fop.xdata);
+ break;
+ case GF_FOP_MKNOD:
+ STACK_WIND (frame, dc_mknod_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod,
+ &curr_fop.loc, curr_fop.mode, curr_fop.rdev,
+ curr_fop.umask, curr_fop.xdata);
+ break;
+ case GF_FOP_MKDIR:
+ STACK_WIND (frame, dc_mkdir_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir,
+ &curr_fop.loc, curr_fop.mode,
+ curr_fop.umask, curr_fop.xdata);
+ break;
+ case GF_FOP_UNLINK:
+ STACK_WIND (frame, dc_unlink_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink,
+ &curr_fop.loc, curr_fop.xflag, curr_fop.xdata);
+ break;
+ case GF_FOP_RMDIR:
+ STACK_WIND (frame, dc_rmdir_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->rmdir,
+ &curr_fop.loc, curr_fop.flags, curr_fop.xdata);
+ break;
+ case GF_FOP_SYMLINK:
+ STACK_WIND (frame, dc_symlink_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->symlink,
+ curr_fop.linkname, &curr_fop.loc,
+ curr_fop.umask, curr_fop.xdata);
+ break;
+ case GF_FOP_RENAME:
+ STACK_WIND (frame, dc_rename_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->rename,
+ &curr_fop.loc, &curr_fop.loc2, curr_fop.xdata);
+ break;
+ case GF_FOP_LINK:
+ STACK_WIND (frame, dc_link_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->link,
+ &curr_fop.loc, &curr_fop.loc2, curr_fop.xdata);
+ break;
+ case GF_FOP_TRUNCATE:
+ STACK_WIND (frame, dc_truncate_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate,
+ &curr_fop.loc, curr_fop.offset, curr_fop.xdata);
+ break;
+ case GF_FOP_OPEN:
+ STACK_WIND (frame, dc_open_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->open,
+ &curr_fop.loc, curr_fop.flags, curr_fop.fd,
+ curr_fop.xdata);
+ break;
+ case GF_FOP_READ:
+ STACK_WIND (frame, dc_readv_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv,
+ curr_fop.fd, curr_fop.size, curr_fop.offset,
+ curr_fop.flags, curr_fop.xdata);
+ break;
+ case GF_FOP_WRITE:
+ STACK_WIND (frame, dc_writev_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev,
+ curr_fop.fd, curr_fop.vector, curr_fop.count,
+ curr_fop.offset, curr_fop.flags, curr_fop.iobref,
+ curr_fop.xdata);
+ break;
+ case GF_FOP_STATFS:
+ STACK_WIND (frame, dc_statfs_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->statfs,
+ &curr_fop.loc, curr_fop.xdata);
+ break;
+ case GF_FOP_FLUSH:
+ STACK_WIND (frame, dc_flush_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->flush,
+ curr_fop.fd, curr_fop.xdata);
+ break;
+ case GF_FOP_FSYNC:
+ STACK_WIND (frame, dc_fsync_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync,
+ curr_fop.fd, curr_fop.datasync, curr_fop.xdata);
+ break;
+ case GF_FOP_SETXATTR:
+ STACK_WIND (frame, dc_setxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ &curr_fop.loc, curr_fop.xattr, curr_fop.flags,
+ curr_fop.xdata);
+ break;
+ case GF_FOP_GETXATTR:
+ STACK_WIND (frame, dc_getxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr,
+ &curr_fop.loc, curr_fop.name, curr_fop.xdata);
+ break;
+ case GF_FOP_REMOVEXATTR:
+ STACK_WIND (frame, dc_removexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr,
+ &curr_fop.loc, curr_fop.name, curr_fop.xdata);
+ break;
+ case GF_FOP_OPENDIR:
+ STACK_WIND (frame, dc_opendir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir,
+ &curr_fop.loc, curr_fop.fd, curr_fop.xdata);
+ break;
+ case GF_FOP_FSYNCDIR:
+ STACK_WIND (frame, dc_fsyncdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsyncdir,
+ curr_fop.fd, curr_fop.datasync, curr_fop.xdata);
+ break;
+ case GF_FOP_ACCESS:
+ STACK_WIND (frame, dc_access_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->access,
+ &curr_fop.loc, curr_fop.mask, curr_fop.xdata);
+ break;
+ case GF_FOP_CREATE:
+ STACK_WIND (frame, dc_create_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->create,
+ &curr_fop.loc, curr_fop.flags, curr_fop.mode,
+ curr_fop.umask, curr_fop.fd, curr_fop.xdata);
+ break;
+ case GF_FOP_FTRUNCATE:
+ STACK_WIND (frame, dc_ftruncate_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate,
+ curr_fop.fd, curr_fop.offset, curr_fop.xdata);
+ break;
+ case GF_FOP_FSTAT:
+ STACK_WIND (frame, dc_fstat_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fstat,
+ curr_fop.fd, curr_fop.xdata);
+ break;
+ case GF_FOP_LK:
+ STACK_WIND (frame, dc_lk_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->lk,
+ curr_fop.fd,
+ curr_fop.cmd, &curr_fop.lock, curr_fop.xdata);
+ break;
+ case GF_FOP_LOOKUP:
+ STACK_WIND (frame, dc_lookup_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup,
+ &curr_fop.loc, curr_fop.xdata);
+ break;
+ case GF_FOP_READDIR:
+ STACK_WIND (frame, dc_readdir_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdir,
+ curr_fop.fd, curr_fop.size, curr_fop.offset,
+ curr_fop.xdata);
+ break;
+ case GF_FOP_INODELK:
+ STACK_WIND (frame, dc_inodelk_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->inodelk,
+ curr_fop.volume, &curr_fop.loc,
+ curr_fop.cmd, &curr_fop.lock, curr_fop.xdata);
+ break;
+ case GF_FOP_FINODELK:
+ STACK_WIND (frame, dc_finodelk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ curr_fop.volume, curr_fop.fd,
+ curr_fop.cmd, &curr_fop.lock, curr_fop.xdata);
+ break;
+ case GF_FOP_ENTRYLK:
+ STACK_WIND (frame, dc_entrylk_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->entrylk,
+ curr_fop.volume, &curr_fop.loc,
+ curr_fop.name, curr_fop.entrylkcmd,
+ curr_fop.entrylktype, curr_fop.xdata);
+ break;
+ case GF_FOP_FENTRYLK:
+ STACK_WIND (frame, dc_fentrylk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fentrylk,
+ curr_fop.volume, curr_fop.fd,
+ curr_fop.name, curr_fop.entrylkcmd,
+ curr_fop.entrylktype, curr_fop.xdata);
+ break;
+ case GF_FOP_XATTROP:
+ STACK_WIND (frame, dc_xattrop_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->xattrop,
+ &curr_fop.loc, curr_fop.optype, curr_fop.xattr,
+ curr_fop.xdata);
+ break;
+ case GF_FOP_FXATTROP:
+ STACK_WIND (frame, dc_fxattrop_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fxattrop,
+ curr_fop.fd, curr_fop.optype, curr_fop.xattr,
+ curr_fop.xdata);
+ break;
+ case GF_FOP_FGETXATTR:
+ STACK_WIND (frame, dc_fgetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ curr_fop.fd, curr_fop.name, curr_fop.xdata);
+ break;
+ case GF_FOP_FSETXATTR:
+ STACK_WIND (frame, dc_fsetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ curr_fop.fd, curr_fop.xattr, curr_fop.flags,
+ curr_fop.xdata);
+ break;
+ case GF_FOP_RCHECKSUM:
+ STACK_WIND (frame, dc_rchecksum_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rchecksum,
+ curr_fop.fd, curr_fop.offset, curr_fop.size,
+ curr_fop.xdata);
+ break;
+ case GF_FOP_SETATTR:
+ STACK_WIND (frame, dc_setattr_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->setattr,
+ &curr_fop.loc, &curr_fop.stat, curr_fop.valid,
+ curr_fop.xdata);
+ break;
+ case GF_FOP_FSETATTR:
+ STACK_WIND (frame, dc_fsetattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetattr,
+ curr_fop.fd, &curr_fop.stat, curr_fop.valid,
+ curr_fop.xdata);
+ break;
+ case GF_FOP_READDIRP:
+ STACK_WIND (frame, dc_readdirp_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp,
+ curr_fop.fd, curr_fop.size, curr_fop.offset,
+ curr_fop.xdata);
+ break;
+ case GF_FOP_FREMOVEXATTR:
+ STACK_WIND (frame, dc_fremovexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr,
+ curr_fop.fd, curr_fop.name, curr_fop.xdata);
+ break;
+ case GF_FOP_FALLOCATE:
+ STACK_WIND (frame, dc_fallocate_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fallocate,
+ curr_fop.fd, curr_fop.flags, curr_fop.offset,
+ curr_fop.size, curr_fop.xdata);
+ break;
+ case GF_FOP_DISCARD:
+ STACK_WIND (frame, dc_discard_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->discard,
+ curr_fop.fd, curr_fop.offset, curr_fop.size,
+ curr_fop.xdata);
+ break;
+ case GF_FOP_ZEROFILL:
+ STACK_WIND (frame, dc_zerofill_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->zerofill,
+ curr_fop.fd, curr_fop.offset, curr_fop.size,
+ curr_fop.xdata);
+ break;
+ case GF_FOP_IPC:
+ STACK_WIND (frame, dc_ipc_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->ipc,
+ curr_fop.cmd, curr_fop.xdata);
+ break;
+ case GF_FOP_SEEK:
+ STACK_WIND (frame, dc_seek_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->seek,
+ curr_fop.fd, curr_fop.offset, curr_fop.what,
+ curr_fop.xdata);
+ break;
+ default:
+ return -ENOTSUP;
+ }
+ return 0;
+done:
+ DC_STACK_UNWIND (frame, op_ret, op_errno, c_rsp, NULL);
+ return 0;
+}
+
+int32_t
+dc_compound (call_frame_t *frame, xlator_t *this, void *data, dict_t *xdata)
+{
+ compound_args_t *compound_req = NULL;
+ compound_args_cbk_t *compound_rsp = NULL;
+ int ret = 0;
+ int op_errno = ENOMEM;
+ dc_local_t *local = NULL;
+
+ compound_req = data;
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this, compound_req, out, op_errno,
+ EINVAL);
+
+ local = mem_get0 (this->local_pool);
+ if (!local)
+ goto out;
+
+ frame->local = local;
+
+ local->compound_rsp = GF_CALLOC (1, sizeof (local->compound_rsp),
+ gf_dc_mt_rsp_t);
+ if (!local->compound_rsp)
+ goto out;
+
+ compound_rsp = local->compound_rsp;
+
+ compound_rsp->fop_length = compound_req->fop_length;
+ compound_rsp->rsp_list = GF_CALLOC (compound_rsp->fop_length,
+ sizeof (default_args_cbk_t),
+ gf_mt_default_args_cbk_t);
+ if (!compound_rsp->rsp_list)
+ goto out;
+
+ local->length = compound_req->fop_length;
+ local->counter = 0;
+ local->compound_req = compound_req;
+
+ if (!local->length) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = dc_compound_fop_wind (frame, this);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+ return 0;
+out:
+ DC_STACK_UNWIND (frame, -1, op_errno, compound_rsp, NULL);
+ return 0;
+}
+
+struct xlator_cbks cbks = {
+};
+
+struct volume_options options[] = {
+};
+
+struct xlator_fops fops = {
+ .compound = dc_compound,
+};
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_dc_mt_end + 1);
+
+ return ret;
+}
+
+int32_t
+init (xlator_t *this)
+{
+ int ret = 0;
+
+ if (!this->children) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DC_MSG_VOL_MISCONFIGURED, "Decompounder must have"
+ " a subvol.");
+ ret = -1;
+ goto out;
+ }
+
+ if (!this->parents) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DC_MSG_VOL_MISCONFIGURED, "Volume is dangling.");
+ ret = -1;
+ goto out;
+ }
+out:
+ return ret;
+}
+
+int32_t
+fini (xlator_t *this)
+{
+ return 0;
+}
diff --git a/xlators/performance/decompounder/src/decompounder.h b/xlators/performance/decompounder/src/decompounder.h
new file mode 100644
index 00000000000..1b8c1d6d00f
--- /dev/null
+++ b/xlators/performance/decompounder/src/decompounder.h
@@ -0,0 +1,74 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __DC_H__
+#define __DC_H__
+
+#include "defaults.h"
+#include "xlator.h"
+#include "call-stub.h"
+#include "decompounder-mem-types.h"
+#include "decompounder-messages.h"
+
+typedef struct {
+ compound_args_t *compound_req;
+ compound_args_cbk_t *compound_rsp;
+ int counter;
+ int length;
+} dc_local_t;
+
+#define DC_STACK_UNWIND(frame, op_ret, op_errno, rsp, xdata) do {\
+ dc_local_t *__local = NULL; \
+ if (frame) { \
+ __local = frame->local; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT (compound, frame, op_ret, op_errno, \
+ (void *)rsp, xdata); \
+ if (__local) { \
+ dc_local_cleanup (__local); \
+ mem_put (__local); \
+ } \
+ } while (0)
+
+int32_t
+dc_compound_fop_wind (call_frame_t *frame, xlator_t *this);
+
+void
+dc_local_cleanup (dc_local_t *local);
+
+#define DC_FOP_RESPONSE_STORE_AND_WIND_NEXT(fop, frame, op_ret, op_errno, params ...) do { \
+ dc_local_t *__local = frame->local; \
+ xlator_t *__this = frame->this; \
+ int __ret = 0; \
+ int __counter = __local->counter; \
+ compound_args_cbk_t *__compound_rsp = __local->compound_rsp; \
+ default_args_cbk_t *__fop_rsp = &__local->compound_rsp->rsp_list[__counter]; \
+ \
+ if (op_ret < 0) { \
+ gf_msg (__this->name, GF_LOG_ERROR, op_errno, DC_MSG_ERROR_RECEIVED, \
+ "fop number %d failed. Unwinding.", __counter+1); \
+ args_##fop##_cbk_store (__fop_rsp, \
+ op_ret, op_errno, params); \
+ /*TODO : Fill the rest of the responses to -1 or NULL*/ \
+ DC_STACK_UNWIND (frame, op_ret, op_errno, \
+ (void *)__compound_rsp, NULL); \
+ } else { \
+ args_##fop##_cbk_store (__fop_rsp, \
+ op_ret, op_errno, params); \
+ __local->counter++; \
+ __ret = dc_compound_fop_wind (frame, __this); \
+ if (__ret < 0) { \
+ DC_STACK_UNWIND (frame, -1, -__ret, \
+ (void *)__compound_rsp, NULL); \
+ } \
+ } \
+ } while (0)
+#endif /* DC_H__ */
diff --git a/xlators/performance/io-cache/src/Makefile.am b/xlators/performance/io-cache/src/Makefile.am
index 6dd270e8ffc..e6ce0bcd44d 100644
--- a/xlators/performance/io-cache/src/Makefile.am
+++ b/xlators/performance/io-cache/src/Makefile.am
@@ -1,14 +1,16 @@
xlator_LTLIBRARIES = io-cache.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
-io_cache_la_LDFLAGS = -module -avoidversion
+io_cache_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
io_cache_la_SOURCES = io-cache.c page.c ioc-inode.c
io_cache_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = io-cache.h ioc-mem-types.h
+noinst_HEADERS = io-cache.h ioc-mem-types.h io-cache-messages.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -I$(CONTRIBDIR)/rbtree -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(CONTRIBDIR)/rbtree
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/performance/io-cache/src/io-cache-messages.h b/xlators/performance/io-cache/src/io-cache-messages.h
new file mode 100644
index 00000000000..ba6b55d1299
--- /dev/null
+++ b/xlators/performance/io-cache/src/io-cache-messages.h
@@ -0,0 +1,137 @@
+/*Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _IO_CACHE_MESSAGES_H_
+#define _IO_CACHE_MESSAGES_H_
+
+#include "glfs-message-id.h"
+
+/*! \file io-cache-messages.h
+ * \brief IO_CACHE log-message IDs and their descriptions
+ *
+ */
+
+/* NOTE: Rules for message additions
+ * 1) Each instance of a message is _better_ left with a unique message ID, even
+ * if the message format is the same. Reasoning is that, if the message
+ * format needs to change in one instance, the other instances are not
+ * impacted or the new change does not change the ID of the instance being
+ * modified.
+ * 2) Addition of a message,
+ * - Should increment the GLFS_NUM_MESSAGES
+ * - Append to the list of messages defined, towards the end
+ * - Retain macro naming as glfs_msg_X (for redability across developers)
+ * NOTE: Rules for message format modifications
+ * 3) Check acorss the code if the message ID macro in question is reused
+ * anywhere. If reused then then the modifications should ensure correctness
+ * everywhere, or needs a new message ID as (1) above was not adhered to. If
+ * not used anywhere, proceed with the required modification.
+ * NOTE: Rules for message deletion
+ * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used
+ * anywhere, then can be deleted, but will leave a hole by design, as
+ * addition rules specify modification to the end of the list and not filling
+ * holes.
+ */
+
+#define GLFS_IO_CACHE_BASE GLFS_MSGID_COMP_IO_CACHE
+#define GLFS_IO_CACHE_NUM_MESSAGES 9
+#define GLFS_MSGID_END (GLFS_IO_CACHE_BASE + GLFS_IO_CACHE_NUM_MESSAGES + 1)
+
+/* Messages with message IDs */
+#define glfs_msg_start_x GLFS_IO_CACHE_BASE, "Invalid: Start of messages"
+
+
+
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define IO_CACHE_MSG_ENFORCEMENT_FAILED (GLFS_IO_CACHE_BASE + 1)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define IO_CACHE_MSG_INVALID_ARGUMENT (GLFS_IO_CACHE_BASE + 2)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define IO_CACHE_MSG_XLATOR_CHILD_MISCONFIGURED (GLFS_IO_CACHE_BASE + 3)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define IO_CACHE_MSG_NO_MEMORY (GLFS_IO_CACHE_BASE + 4)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define IO_CACHE_MSG_VOL_MISCONFIGURED (GLFS_IO_CACHE_BASE + 5)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define IO_CACHE_MSG_INODE_NULL (GLFS_IO_CACHE_BASE + 6)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define IO_CACHE_MSG_PAGE_WAIT_VALIDATE (GLFS_IO_CACHE_BASE + 7)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define IO_CACHE_MSG_STR_COVERSION_FAILED (GLFS_IO_CACHE_BASE + 8)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define IO_CACHE_MSG_WASTED_COPY (GLFS_IO_CACHE_BASE + 9)
+
+/*------------*/
+#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
+
+
+#endif /* _IO_CACHE_MESSAGES_H_ */
diff --git a/xlators/performance/io-cache/src/io-cache.c b/xlators/performance/io-cache/src/io-cache.c
index e548c965c31..46e26bcdc1a 100644
--- a/xlators/performance/io-cache/src/io-cache.c
+++ b/xlators/performance/io-cache/src/io-cache.c
@@ -1,27 +1,13 @@
/*
- Copyright (c) 2007-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "glusterfs.h"
#include "logging.h"
#include "dict.h"
@@ -31,17 +17,16 @@
#include "statedump.h"
#include <assert.h>
#include <sys/time.h>
-
+#include "io-cache-messages.h"
int ioc_log2_page_size;
uint32_t
ioc_get_priority (ioc_table_t *table, const char *path);
-uint32_t
-ioc_get_priority (ioc_table_t *table, const char *path);
+struct volume_options options[];
-inline uint32_t
+static uint32_t
ioc_hashfn (void *data, int len)
{
off_t offset;
@@ -51,146 +36,156 @@ ioc_hashfn (void *data, int len)
return (offset >> ioc_log2_page_size);
}
-inline ioc_inode_t *
+/* TODO: This function is not used, uncomment when we find a
+ usage for this function.
+
+static ioc_inode_t *
ioc_inode_reupdate (ioc_inode_t *ioc_inode)
{
- ioc_table_t *table = ioc_inode->table;
+ ioc_table_t *table = NULL;
+
+ table = ioc_inode->table;
- list_add_tail (&ioc_inode->inode_lru,
- &table->inode_lru[ioc_inode->weight]);
-
- return ioc_inode;
+ list_add_tail (&ioc_inode->inode_lru,
+ &table->inode_lru[ioc_inode->weight]);
+
+ return ioc_inode;
}
-inline ioc_inode_t *
+
+static ioc_inode_t *
ioc_get_inode (dict_t *dict, char *name)
{
- ioc_inode_t *ioc_inode = NULL;
- data_t *ioc_inode_data = dict_get (dict, name);
- ioc_table_t *table = NULL;
-
- if (ioc_inode_data) {
- ioc_inode = data_to_ptr (ioc_inode_data);
- table = ioc_inode->table;
-
- ioc_table_lock (table);
- {
- if (list_empty (&ioc_inode->inode_lru)) {
- ioc_inode = ioc_inode_reupdate (ioc_inode);
- }
- }
- ioc_table_unlock (table);
- }
-
- return ioc_inode;
+ ioc_inode_t *ioc_inode = NULL;
+ data_t *ioc_inode_data = NULL;
+ ioc_table_t *table = NULL;
+
+ ioc_inode_data = dict_get (dict, name);
+ if (ioc_inode_data) {
+ ioc_inode = data_to_ptr (ioc_inode_data);
+ table = ioc_inode->table;
+
+ ioc_table_lock (table);
+ {
+ if (list_empty (&ioc_inode->inode_lru)) {
+ ioc_inode = ioc_inode_reupdate (ioc_inode);
+ }
+ }
+ ioc_table_unlock (table);
+ }
+
+ return ioc_inode;
}
+*/
int32_t
ioc_inode_need_revalidate (ioc_inode_t *ioc_inode)
{
- int8_t need_revalidate = 0;
- struct timeval tv = {0,};
- int32_t ret = -1;
- ioc_table_t *table = ioc_inode->table;
+ int8_t need_revalidate = 0;
+ struct timeval tv = {0,};
+ ioc_table_t *table = NULL;
- ret = gettimeofday (&tv, NULL);
+ table = ioc_inode->table;
- if (time_elapsed (&tv, &ioc_inode->cache.tv) >= table->cache_timeout)
- need_revalidate = 1;
+ gettimeofday (&tv, NULL);
- return need_revalidate;
+ if (time_elapsed (&tv, &ioc_inode->cache.tv) >= table->cache_timeout)
+ need_revalidate = 1;
+
+ return need_revalidate;
}
/*
* __ioc_inode_flush - flush all the cached pages of the given inode
*
- * @ioc_inode:
+ * @ioc_inode:
*
* assumes lock is held
*/
int64_t
__ioc_inode_flush (ioc_inode_t *ioc_inode)
{
- ioc_page_t *curr = NULL, *next = NULL;
- int64_t destroy_size = 0;
- int64_t ret = 0;
+ ioc_page_t *curr = NULL, *next = NULL;
+ int64_t destroy_size = 0;
+ int64_t ret = 0;
- list_for_each_entry_safe (curr, next, &ioc_inode->cache.page_lru,
+ list_for_each_entry_safe (curr, next, &ioc_inode->cache.page_lru,
page_lru) {
- ret = ioc_page_destroy (curr);
-
- if (ret != -1)
- destroy_size += ret;
- }
-
- return destroy_size;
+ ret = __ioc_page_destroy (curr);
+
+ if (ret != -1)
+ destroy_size += ret;
+ }
+
+ return destroy_size;
}
void
ioc_inode_flush (ioc_inode_t *ioc_inode)
{
- int64_t destroy_size = 0;
-
- ioc_inode_lock (ioc_inode);
- {
- destroy_size = __ioc_inode_flush (ioc_inode);
- }
- ioc_inode_unlock (ioc_inode);
-
- if (destroy_size) {
- ioc_table_lock (ioc_inode->table);
- {
- ioc_inode->table->cache_used -= destroy_size;
- }
- ioc_table_unlock (ioc_inode->table);
- }
-
- return;
+ int64_t destroy_size = 0;
+
+ ioc_inode_lock (ioc_inode);
+ {
+ destroy_size = __ioc_inode_flush (ioc_inode);
+ }
+ ioc_inode_unlock (ioc_inode);
+
+ if (destroy_size) {
+ ioc_table_lock (ioc_inode->table);
+ {
+ ioc_inode->table->cache_used -= destroy_size;
+ }
+ ioc_table_unlock (ioc_inode->table);
+ }
+
+ return;
}
int32_t
ioc_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
- STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop, postop);
- return 0;
+ STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop, postop,
+ xdata);
+ return 0;
}
int32_t
ioc_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- uint64_t ioc_inode = 0;
+ uint64_t ioc_inode = 0;
- inode_ctx_get (loc->inode, this, &ioc_inode);
+ inode_ctx_get (loc->inode, this, &ioc_inode);
- if (ioc_inode
+ if (ioc_inode
&& ((valid & GF_SET_ATTR_ATIME)
|| (valid & GF_SET_ATTR_MTIME)))
- ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+ ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
- STACK_WIND (frame, ioc_setattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid);
+ STACK_WIND (frame, ioc_setattr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid, xdata);
- return 0;
+ return 0;
}
int32_t
ioc_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *stbuf, dict_t *dict, struct iatt *postparent)
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xdata, struct iatt *postparent)
{
- ioc_inode_t *ioc_inode = NULL;
- ioc_table_t *table = this->private;
- uint8_t cache_still_valid = 0;
- uint64_t tmp_ioc_inode = 0;
- uint32_t weight = 0xffffffff;
- const char *path = NULL;
- ioc_local_t *local = NULL;
-
- if (op_ret != 0)
- goto out;
+ ioc_inode_t *ioc_inode = NULL;
+ ioc_table_t *table = NULL;
+ uint8_t cache_still_valid = 0;
+ uint64_t tmp_ioc_inode = 0;
+ uint32_t weight = 0xffffffff;
+ const char *path = NULL;
+ ioc_local_t *local = NULL;
+
+ if (op_ret != 0)
+ goto out;
local = frame->local;
if (local == NULL) {
@@ -199,20 +194,28 @@ ioc_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
+ if (!this || !this->private) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ table = this->private;
+
path = local->file_loc.path;
LOCK (&inode->lock);
{
__inode_ctx_get (inode, this, &tmp_ioc_inode);
ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
-
+
if (!ioc_inode) {
weight = ioc_get_priority (table, path);
-
+
ioc_inode = ioc_inode_update (table, inode,
weight);
- __inode_ctx_put (inode, this,
+ __inode_ctx_put (inode, this,
(uint64_t)(long)ioc_inode);
}
}
@@ -229,69 +232,70 @@ ioc_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
ioc_inode_unlock (ioc_inode);
- cache_still_valid = ioc_cache_still_valid (ioc_inode,
+ cache_still_valid = ioc_cache_still_valid (ioc_inode,
stbuf);
-
+
if (!cache_still_valid) {
ioc_inode_flush (ioc_inode);
- }
-
+ }
+
ioc_table_lock (ioc_inode->table);
{
list_move_tail (&ioc_inode->inode_lru,
&table->inode_lru[ioc_inode->weight]);
}
ioc_table_unlock (ioc_inode->table);
-
+
out:
if (frame->local != NULL) {
local = frame->local;
loc_wipe (&local->file_loc);
}
- STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, stbuf,
- dict, postparent);
- return 0;
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, stbuf,
+ xdata, postparent);
+ return 0;
}
-int32_t
+int32_t
ioc_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *xattr_req)
+ dict_t *xdata)
{
- ioc_local_t *local = NULL;
+ ioc_local_t *local = NULL;
int32_t op_errno = -1, ret = -1;
- local = GF_CALLOC (1, sizeof (*local),
- gf_ioc_mt_ioc_local_t);
+ local = mem_get0 (this->local_pool);
if (local == NULL) {
op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ IO_CACHE_MSG_NO_MEMORY, "out of memory");
goto unwind;
}
ret = loc_copy (&local->file_loc, loc);
if (ret != 0) {
op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ IO_CACHE_MSG_NO_MEMORY, "out of memory");
goto unwind;
}
frame->local = local;
- STACK_WIND (frame, ioc_lookup_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->lookup, loc, xattr_req);
+ STACK_WIND (frame, ioc_lookup_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->lookup, loc, xdata);
return 0;
unwind:
- STACK_UNWIND_STRICT (lookup, frame, -1, op_errno, NULL, NULL,
+ STACK_UNWIND_STRICT (lookup, frame, -1, op_errno, NULL, NULL,
NULL, NULL);
- return 0;
+ return 0;
}
/*
- * ioc_forget -
+ * ioc_forget -
*
* @frame:
* @this:
@@ -301,19 +305,33 @@ unwind:
int32_t
ioc_forget (xlator_t *this, inode_t *inode)
{
- uint64_t ioc_inode = 0;
+ uint64_t ioc_inode = 0;
+
+ inode_ctx_get (inode, this, &ioc_inode);
+
+ if (ioc_inode)
+ ioc_inode_destroy ((ioc_inode_t *)(long)ioc_inode);
+
+ return 0;
+}
+
+static int32_t
+ioc_invalidate(xlator_t *this, inode_t *inode)
+{
+ uint64_t ioc_addr = 0;
+ ioc_inode_t *ioc_inode = NULL;
- inode_ctx_get (inode, this, &ioc_inode);
+ inode_ctx_get(inode, this, (uint64_t *) &ioc_addr);
+ ioc_inode = (void *) ioc_addr;
if (ioc_inode)
- ioc_inode_destroy ((ioc_inode_t *)(long)ioc_inode);
-
+ ioc_inode_flush(ioc_inode);
+
return 0;
}
-
-/*
- * ioc_cache_validate_cbk -
+/*
+ * ioc_cache_validate_cbk -
*
* @frame:
* @cookie:
@@ -325,101 +343,104 @@ ioc_forget (xlator_t *this, inode_t *inode)
*/
int32_t
ioc_cache_validate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *stbuf)
+ int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
+ dict_t *xdata)
{
- ioc_local_t *local = NULL;
- ioc_inode_t *ioc_inode = NULL;
- size_t destroy_size = 0;
- struct iatt *local_stbuf = NULL;
+ ioc_local_t *local = NULL;
+ ioc_inode_t *ioc_inode = NULL;
+ size_t destroy_size = 0;
+ struct iatt *local_stbuf = NULL;
local = frame->local;
- ioc_inode = local->inode;
+ ioc_inode = local->inode;
local_stbuf = stbuf;
- if ((op_ret == -1) ||
- ((op_ret >= 0) && !ioc_cache_still_valid(ioc_inode, stbuf))) {
- gf_log (ioc_inode->table->xl->name, GF_LOG_DEBUG,
- "cache for inode(%p) is invalid. flushing all pages",
- ioc_inode);
- /* NOTE: only pages with no waiting frames are flushed by
- * ioc_inode_flush. page_fault will be generated for all
- * the pages which have waiting frames by ioc_inode_wakeup()
- */
- ioc_inode_lock (ioc_inode);
- {
- destroy_size = __ioc_inode_flush (ioc_inode);
- if (op_ret >= 0) {
- ioc_inode->cache.mtime = stbuf->ia_mtime;
- ioc_inode->cache.mtime_nsec = stbuf->ia_mtime_nsec;
+ if ((op_ret == -1) ||
+ ((op_ret >= 0) && !ioc_cache_still_valid(ioc_inode, stbuf))) {
+ gf_msg_debug (ioc_inode->table->xl->name, 0,
+ "cache for inode(%p) is invalid. flushing all pages",
+ ioc_inode);
+ /* NOTE: only pages with no waiting frames are flushed by
+ * ioc_inode_flush. page_fault will be generated for all
+ * the pages which have waiting frames by ioc_inode_wakeup()
+ */
+ ioc_inode_lock (ioc_inode);
+ {
+ destroy_size = __ioc_inode_flush (ioc_inode);
+ if (op_ret >= 0) {
+ ioc_inode->cache.mtime = stbuf->ia_mtime;
+ ioc_inode->cache.mtime_nsec
+ = stbuf->ia_mtime_nsec;
}
- }
- ioc_inode_unlock (ioc_inode);
- local_stbuf = NULL;
- }
-
- if (destroy_size) {
- ioc_table_lock (ioc_inode->table);
- {
- ioc_inode->table->cache_used -= destroy_size;
- }
- ioc_table_unlock (ioc_inode->table);
- }
-
- if (op_ret < 0)
- local_stbuf = NULL;
-
- ioc_inode_lock (ioc_inode);
- {
- gettimeofday (&ioc_inode->cache.tv, NULL);
- }
- ioc_inode_unlock (ioc_inode);
-
- ioc_inode_wakeup (frame, ioc_inode, local_stbuf);
-
- /* any page-fault initiated by ioc_inode_wakeup() will have its own
- * fd_ref on fd, safe to unref validate frame's private copy
- */
- fd_unref (local->fd);
-
- STACK_DESTROY (frame->root);
+ }
+ ioc_inode_unlock (ioc_inode);
+ local_stbuf = NULL;
+ }
- return 0;
+ if (destroy_size) {
+ ioc_table_lock (ioc_inode->table);
+ {
+ ioc_inode->table->cache_used -= destroy_size;
+ }
+ ioc_table_unlock (ioc_inode->table);
+ }
+
+ if (op_ret < 0)
+ local_stbuf = NULL;
+
+ ioc_inode_lock (ioc_inode);
+ {
+ gettimeofday (&ioc_inode->cache.tv, NULL);
+ }
+ ioc_inode_unlock (ioc_inode);
+
+ ioc_inode_wakeup (frame, ioc_inode, local_stbuf);
+
+ /* any page-fault initiated by ioc_inode_wakeup() will have its own
+ * fd_ref on fd, safe to unref validate frame's private copy
+ */
+ fd_unref (local->fd);
+
+ STACK_DESTROY (frame->root);
+
+ return 0;
}
int32_t
ioc_wait_on_inode (ioc_inode_t *ioc_inode, ioc_page_t *page)
{
- ioc_waitq_t *waiter = NULL, *trav = NULL;
- uint32_t page_found = 0;
- int32_t ret = 0;
-
- trav = ioc_inode->waitq;
-
- while (trav) {
- if (trav->data == page) {
- page_found = 1;
- break;
- }
- trav = trav->next;
- }
-
- if (!page_found) {
- waiter = GF_CALLOC (1, sizeof (ioc_waitq_t),
+ ioc_waitq_t *waiter = NULL, *trav = NULL;
+ uint32_t page_found = 0;
+ int32_t ret = 0;
+
+ trav = ioc_inode->waitq;
+
+ while (trav) {
+ if (trav->data == page) {
+ page_found = 1;
+ break;
+ }
+ trav = trav->next;
+ }
+
+ if (!page_found) {
+ waiter = GF_CALLOC (1, sizeof (ioc_waitq_t),
gf_ioc_mt_ioc_waitq_t);
if (waiter == NULL) {
- gf_log (ioc_inode->table->xl->name, GF_LOG_ERROR,
+ gf_msg (ioc_inode->table->xl->name, GF_LOG_ERROR,
+ ENOMEM, IO_CACHE_MSG_NO_MEMORY,
"out of memory");
ret = -ENOMEM;
goto out;
}
- waiter->data = page;
- waiter->next = ioc_inode->waitq;
- ioc_inode->waitq = waiter;
- }
+ waiter->data = page;
+ waiter->next = ioc_inode->waitq;
+ ioc_inode->waitq = waiter;
+ }
-out:
- return ret;
+out:
+ return ret;
}
/*
@@ -432,78 +453,76 @@ out:
*/
int32_t
ioc_cache_validate (call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd,
- ioc_page_t *page)
+ ioc_page_t *page)
{
- call_frame_t *validate_frame = NULL;
- ioc_local_t *validate_local = NULL;
- ioc_local_t *local = NULL;
- int32_t ret = 0;
+ call_frame_t *validate_frame = NULL;
+ ioc_local_t *validate_local = NULL;
+ ioc_local_t *local = NULL;
+ int32_t ret = 0;
local = frame->local;
- validate_local = GF_CALLOC (1, sizeof (ioc_local_t),
- gf_ioc_mt_ioc_local_t);
+ validate_local = mem_get0 (THIS->local_pool);
if (validate_local == NULL) {
ret = -1;
local->op_ret = -1;
local->op_errno = ENOMEM;
- gf_log (ioc_inode->table->xl->name, GF_LOG_ERROR,
- "out of memory");
+ gf_msg (ioc_inode->table->xl->name, GF_LOG_ERROR,
+ 0, IO_CACHE_MSG_NO_MEMORY, "out of memory");
goto out;
}
- validate_frame = copy_frame (frame);
+ validate_frame = copy_frame (frame);
if (validate_frame == NULL) {
ret = -1;
local->op_ret = -1;
local->op_errno = ENOMEM;
- GF_FREE (validate_local);
- gf_log (ioc_inode->table->xl->name, GF_LOG_ERROR,
- "out of memory");
+ mem_put (validate_local);
+ gf_msg (ioc_inode->table->xl->name, GF_LOG_ERROR,
+ 0, IO_CACHE_MSG_NO_MEMORY, "out of memory");
goto out;
}
- validate_local->fd = fd_ref (fd);
- validate_local->inode = ioc_inode;
- validate_frame->local = validate_local;
-
- STACK_WIND (validate_frame, ioc_cache_validate_cbk,
+ validate_local->fd = fd_ref (fd);
+ validate_local->inode = ioc_inode;
+ validate_frame->local = validate_local;
+
+ STACK_WIND (validate_frame, ioc_cache_validate_cbk,
FIRST_CHILD (frame->this),
- FIRST_CHILD (frame->this)->fops->fstat, fd);
+ FIRST_CHILD (frame->this)->fops->fstat, fd, NULL);
out:
- return ret;
+ return ret;
}
-inline uint32_t
+static uint32_t
is_match (const char *path, const char *pattern)
{
- int32_t ret = 0;
+ int32_t ret = 0;
+
+ ret = fnmatch (pattern, path, FNM_NOESCAPE);
- ret = fnmatch (pattern, path, FNM_NOESCAPE);
-
- return (ret == 0);
+ return (ret == 0);
}
uint32_t
ioc_get_priority (ioc_table_t *table, const char *path)
{
- uint32_t priority = 0;
- struct ioc_priority *curr = NULL;
-
- if (list_empty(&table->priority_list)) {
- priority = 1;
- }
- else {
- list_for_each_entry (curr, &table->priority_list, list) {
- if (is_match (path, curr->pattern))
- priority = curr->priority;
- }
- }
-
- return priority;
+ uint32_t priority = 1;
+ struct ioc_priority *curr = NULL;
+
+ if (list_empty(&table->priority_list))
+ return priority;
+
+ priority = 0;
+ list_for_each_entry (curr, &table->priority_list, list) {
+ if (is_match (path, curr->pattern))
+ priority = curr->priority;
+ }
+
+ return priority;
}
-/*
+/*
* ioc_open_cbk - open callback for io cache
*
* @frame: call frame
@@ -516,25 +535,36 @@ ioc_get_priority (ioc_table_t *table, const char *path)
*/
int32_t
ioc_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, fd_t *fd)
+ int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- uint64_t tmp_ioc_inode = 0;
- ioc_local_t *local = NULL;
- ioc_table_t *table = NULL;
- ioc_inode_t *ioc_inode = NULL;
- inode_t *inode = NULL;
- uint32_t weight = 0xffffffff;
- const char *path = NULL;
+ uint64_t tmp_ioc_inode = 0;
+ ioc_local_t *local = NULL;
+ ioc_table_t *table = NULL;
+ ioc_inode_t *ioc_inode = NULL;
+ uint32_t weight = 0xffffffff;
local = frame->local;
+ if (!this || !this->private) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
table = this->private;
- inode = local->file_loc.inode;
- path = local->file_loc.path;
- if (op_ret != -1) {
+ if (op_ret != -1) {
inode_ctx_get (fd->inode, this, &tmp_ioc_inode);
ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
-
+
+ //TODO: see why inode context is NULL and handle it.
+ if (!ioc_inode) {
+ gf_msg (this->name, GF_LOG_ERROR,
+ EINVAL, IO_CACHE_MSG_ENFORCEMENT_FAILED,
+ "inode context is NULL (%s)",
+ uuid_utoa (fd->inode->gfid));
+ goto out;
+ }
+
ioc_table_lock (ioc_inode->table);
{
list_move_tail (&ioc_inode->inode_lru,
@@ -545,35 +575,36 @@ ioc_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
ioc_inode_lock (ioc_inode);
{
if ((table->min_file_size > ioc_inode->ia_size)
- || ((table->max_file_size >= 0)
+ || ((table->max_file_size > 0)
&& (table->max_file_size < ioc_inode->ia_size))) {
fd_ctx_set (fd, this, 1);
}
}
ioc_inode_unlock (ioc_inode);
- /* If O_DIRECT open, we disable caching on it */
- if ((local->flags & O_DIRECT)){
- /* O_DIRECT is only for one fd, not the inode
- * as a whole
- */
- fd_ctx_set (fd, this, 1);
- }
+ /* If O_DIRECT open, we disable caching on it */
+ if ((local->flags & O_DIRECT)){
+ /* O_DIRECT is only for one fd, not the inode
+ * as a whole
+ */
+ fd_ctx_set (fd, this, 1);
+ }
- /* weight = 0, we disable caching on it */
- if (weight == 0) {
- /* we allow a pattern-matched cache disable this way
- */
- fd_ctx_set (fd, this, 1);
- }
- }
+ /* weight = 0, we disable caching on it */
+ if (weight == 0) {
+ /* we allow a pattern-matched cache disable this way
+ */
+ fd_ctx_set (fd, this, 1);
+ }
+ }
- GF_FREE (local);
- frame->local = NULL;
+out:
+ mem_put (local);
+ frame->local = NULL;
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd);
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
- return 0;
+ return 0;
}
/*
@@ -592,20 +623,27 @@ ioc_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t
ioc_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, fd_t *fd,
- inode_t *inode, struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ inode_t *inode, struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- ioc_local_t *local = NULL;
- ioc_table_t *table = NULL;
- ioc_inode_t *ioc_inode = NULL;
- uint32_t weight = 0xffffffff;
- const char *path = NULL;
+ ioc_local_t *local = NULL;
+ ioc_table_t *table = NULL;
+ ioc_inode_t *ioc_inode = NULL;
+ uint32_t weight = 0xffffffff;
+ const char *path = NULL;
+ int ret = -1;
local = frame->local;
+ if (!this || !this->private) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
table = this->private;
path = local->file_loc.path;
- if (op_ret != -1) {
+ if (op_ret != -1) {
/* assign weight */
weight = ioc_get_priority (table, path);
@@ -618,9 +656,14 @@ ioc_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
ioc_inode->ia_size = buf->ia_size;
if ((table->min_file_size > ioc_inode->ia_size)
- || ((table->max_file_size >= 0)
+ || ((table->max_file_size > 0)
&& (table->max_file_size < ioc_inode->ia_size))) {
- fd_ctx_set (fd, this, 1);
+ ret = fd_ctx_set (fd, this, 1);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING,
+ ENOMEM, IO_CACHE_MSG_NO_MEMORY,
+ "%s: failed to set fd ctx",
+ local->file_loc.path);
}
}
ioc_inode_unlock (ioc_inode);
@@ -628,32 +671,139 @@ ioc_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
inode_ctx_put (fd->inode, this,
(uint64_t)(long)ioc_inode);
- /* If O_DIRECT open, we disable caching on it */
- if (local->flags & O_DIRECT){
- /*
- * O_DIRECT is only for one fd, not the inode
- * as a whole
- */
- fd_ctx_set (fd, this, 1);
- }
-
- /* weight = 0, we disable caching on it */
- if (weight == 0) {
- /* we allow a pattern-matched cache disable this way
- */
- fd_ctx_set (fd, this, 1);
- }
- }
-
- frame->local = NULL;
- GF_FREE (local);
+ /* If O_DIRECT open, we disable caching on it */
+ if (local->flags & O_DIRECT) {
+ /*
+ * O_DIRECT is only for one fd, not the inode
+ * as a whole */
+ ret = fd_ctx_set (fd, this, 1);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING,
+ ENOMEM, IO_CACHE_MSG_NO_MEMORY,
+ "%s: failed to set fd ctx",
+ local->file_loc.path);
+ }
+
+ /* if weight == 0, we disable caching on it */
+ if (!weight) {
+ /* we allow a pattern-matched cache disable this way */
+ ret = fd_ctx_set (fd, this, 1);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING,
+ ENOMEM, IO_CACHE_MSG_NO_MEMORY,
+ "%s: failed to set fd ctx",
+ local->file_loc.path);
+ }
+
+ }
+
+out:
+ frame->local = NULL;
+ mem_put (local);
STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
- return 0;
+ return 0;
}
+
+int32_t
+ioc_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ ioc_local_t *local = NULL;
+ ioc_table_t *table = NULL;
+ ioc_inode_t *ioc_inode = NULL;
+ uint32_t weight = 0xffffffff;
+ const char *path = NULL;
+
+ local = frame->local;
+ if (!this || !this->private) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ table = this->private;
+ path = local->file_loc.path;
+
+ if (op_ret != -1) {
+ /* assign weight */
+ weight = ioc_get_priority (table, path);
+
+ ioc_inode = ioc_inode_update (table, inode, weight);
+
+ ioc_inode_lock (ioc_inode);
+ {
+ ioc_inode->cache.mtime = buf->ia_mtime;
+ ioc_inode->cache.mtime_nsec = buf->ia_mtime_nsec;
+ ioc_inode->ia_size = buf->ia_size;
+ }
+ ioc_inode_unlock (ioc_inode);
+
+ inode_ctx_put (inode, this,
+ (uint64_t)(long)ioc_inode);
+ }
+
+out:
+ frame->local = NULL;
+
+ loc_wipe (&local->file_loc);
+ mem_put (local);
+
+ STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+
+int
+ioc_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ ioc_local_t *local = NULL;
+ int32_t op_errno = -1, ret = -1;
+
+ local = mem_get0 (this->local_pool);
+ if (local == NULL) {
+ op_errno = ENOMEM;
+ gf_msg (this->name, GF_LOG_ERROR,
+ 0, IO_CACHE_MSG_NO_MEMORY, "out of memory");
+ goto unwind;
+ }
+
+ ret = loc_copy (&local->file_loc, loc);
+ if (ret != 0) {
+ op_errno = ENOMEM;
+ gf_msg (this->name, GF_LOG_ERROR,
+ 0, IO_CACHE_MSG_NO_MEMORY, "out of memory");
+ goto unwind;
+ }
+
+ frame->local = local;
+
+ STACK_WIND (frame, ioc_mknod_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod,
+ loc, mode, rdev, umask, xdata);
+ return 0;
+
+unwind:
+ if (local != NULL) {
+ loc_wipe (&local->file_loc);
+ mem_put (local);
+ }
+
+ STACK_UNWIND_STRICT (mknod, frame, -1, op_errno, NULL, NULL,
+ NULL, NULL, NULL);
+
+ return 0;
+}
+
+
/*
* ioc_open - open fop for io cache
* @frame:
@@ -664,33 +814,35 @@ ioc_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
*/
int32_t
ioc_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int32_t wbflags)
+ fd_t *fd, dict_t *xdata)
{
-
- ioc_local_t *local = NULL;
- local = GF_CALLOC (1, sizeof (ioc_local_t), gf_ioc_mt_ioc_local_t);
+ ioc_local_t *local = NULL;
+
+ local = mem_get0 (this->local_pool);
if (local == NULL) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- STACK_UNWIND_STRICT (open, frame, -1, ENOMEM, NULL);
+ gf_msg (this->name, GF_LOG_ERROR,
+ ENOMEM, IO_CACHE_MSG_NO_MEMORY, "out of memory");
+ STACK_UNWIND_STRICT (open, frame, -1, ENOMEM, NULL, NULL);
return 0;
}
- local->flags = flags;
- local->file_loc.path = loc->path;
- local->file_loc.inode = loc->inode;
-
- frame->local = local;
-
- STACK_WIND (frame, ioc_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, loc, flags, fd, wbflags);
+ local->flags = flags;
+ local->file_loc.path = loc->path;
+ local->file_loc.inode = loc->inode;
- return 0;
+ frame->local = local;
+
+ STACK_WIND (frame, ioc_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd,
+ xdata);
+
+ return 0;
}
/*
* ioc_create - create fop for io cache
- *
+ *
* @frame:
* @this:
* @pathname:
@@ -700,26 +852,28 @@ ioc_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
*/
int32_t
ioc_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, fd_t *fd)
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
- ioc_local_t *local = NULL;
-
- local = GF_CALLOC (1, sizeof (ioc_local_t), gf_ioc_mt_ioc_local_t);
+ ioc_local_t *local = NULL;
+
+ local = mem_get0 (this->local_pool);
if (local == NULL) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ gf_msg (this->name, GF_LOG_ERROR,
+ ENOMEM, IO_CACHE_MSG_NO_MEMORY, "out of memory");
STACK_UNWIND_STRICT (create, frame, -1, ENOMEM, NULL, NULL,
- NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL);
return 0;
}
- local->flags = flags;
- local->file_loc.path = loc->path;
- frame->local = local;
+ local->flags = flags;
+ local->file_loc.path = loc->path;
+ frame->local = local;
- STACK_WIND (frame, ioc_create_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create, loc, flags, mode, fd);
+ STACK_WIND (frame, ioc_create_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, loc, flags, mode,
+ umask, fd, xdata);
- return 0;
+ return 0;
}
@@ -727,7 +881,7 @@ ioc_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
/*
* ioc_release - release fop for io cache
- *
+ *
* @frame:
* @this:
* @fd:
@@ -736,11 +890,11 @@ ioc_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
int32_t
ioc_release (xlator_t *this, fd_t *fd)
{
- return 0;
+ return 0;
}
-/*
- * ioc_readv_disabled_cbk
+/*
+ * ioc_readv_disabled_cbk
* @frame:
* @cookie:
* @this:
@@ -749,164 +903,179 @@ ioc_release (xlator_t *this, fd_t *fd)
* @vector:
* @count:
*
- */
+ */
int32_t
ioc_readv_disabled_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *stbuf,
- struct iobref *iobref)
+ int32_t count, struct iatt *stbuf,
+ struct iobref *iobref, dict_t *xdata)
{
- STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count,
- stbuf, iobref);
- return 0;
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count,
+ stbuf, iobref, xdata);
+ return 0;
}
int32_t
ioc_need_prune (ioc_table_t *table)
{
- int64_t cache_difference = 0;
-
- ioc_table_lock (table);
- {
- cache_difference = table->cache_used - table->cache_size;
- }
- ioc_table_unlock (table);
-
- if (cache_difference > 0)
- return 1;
- else
- return 0;
+ int64_t cache_difference = 0;
+
+ ioc_table_lock (table);
+ {
+ cache_difference = table->cache_used - table->cache_size;
+ }
+ ioc_table_unlock (table);
+
+ if (cache_difference > 0)
+ return 1;
+ else
+ return 0;
}
/*
* ioc_dispatch_requests -
- *
+ *
* @frame:
* @inode:
*
- *
+ *
*/
void
ioc_dispatch_requests (call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd,
off_t offset, size_t size)
{
- ioc_local_t *local = NULL;
- ioc_table_t *table = NULL;
- ioc_page_t *trav = NULL;
- ioc_waitq_t *waitq = NULL;
- off_t rounded_offset = 0;
- off_t rounded_end = 0;
- off_t trav_offset = 0;
- int32_t fault = 0;
- size_t trav_size = 0;
- off_t local_offset = 0;
- int32_t ret = -1;
- int8_t need_validate = 0;
- int8_t might_need_validate = 0; /*
- * if a page exists, do we need
- * to validate it?
- */
+ ioc_local_t *local = NULL;
+ ioc_table_t *table = NULL;
+ ioc_page_t *trav = NULL;
+ ioc_waitq_t *waitq = NULL;
+ off_t rounded_offset = 0;
+ off_t rounded_end = 0;
+ off_t trav_offset = 0;
+ int32_t fault = 0;
+ size_t trav_size = 0;
+ off_t local_offset = 0;
+ int32_t ret = -1;
+ int8_t need_validate = 0;
+ int8_t might_need_validate = 0; /*
+ * if a page exists, do we need
+ * to validate it?
+ */
local = frame->local;
table = ioc_inode->table;
- rounded_offset = floor (offset, table->page_size);
- rounded_end = roof (offset + size, table->page_size);
- trav_offset = rounded_offset;
-
- /* once a frame does read, it should be waiting on something */
- local->wait_count++;
-
- /* Requested region can fall in three different pages,
- * 1. Ready - region is already in cache, we just have to serve it.
- * 2. In-transit - page fault has been generated on this page, we need
- * to wait till the page is ready
- * 3. Fault - page is not in cache, we have to generate a page fault
- */
-
- might_need_validate = ioc_inode_need_revalidate (ioc_inode);
-
- while (trav_offset < rounded_end) {
- ioc_inode_lock (ioc_inode);
- //{
-
- /* look for requested region in the cache */
- trav = ioc_page_get (ioc_inode, trav_offset);
-
- local_offset = max (trav_offset, offset);
- trav_size = min (((offset+size) - local_offset),
- table->page_size);
-
- if (!trav) {
- /* page not in cache, we need to generate page fault */
- trav = ioc_page_create (ioc_inode, trav_offset);
- fault = 1;
- if (!trav) {
- gf_log (frame->this->name, GF_LOG_CRITICAL,
- "out of memory");
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- goto out;
- }
- }
-
- ioc_wait_on_page (trav, frame, local_offset, trav_size);
-
- if (trav->ready) {
- /* page found in cache */
- if (!might_need_validate && !ioc_inode->waitq) {
- /* fresh enough */
- gf_log (frame->this->name, GF_LOG_TRACE,
- "cache hit for trav_offset=%"PRId64""
- "/local_offset=%"PRId64"",
- trav_offset, local_offset);
- waitq = ioc_page_wakeup (trav);
- } else {
- /* if waitq already exists, fstat revalidate is
- already on the way */
- if (!ioc_inode->waitq) {
- need_validate = 1;
- }
-
- ret = ioc_wait_on_inode (ioc_inode, trav);
- if (ret < 0) {
- local->op_ret = -1;
- local->op_errno = -ret;
- need_validate = 0;
+ rounded_offset = floor (offset, table->page_size);
+ rounded_end = roof (offset + size, table->page_size);
+ trav_offset = rounded_offset;
- waitq = ioc_page_wakeup (trav);
- ioc_inode_unlock (ioc_inode);
+ /* once a frame does read, it should be waiting on something */
+ local->wait_count++;
+
+ /* Requested region can fall in three different pages,
+ * 1. Ready - region is already in cache, we just have to serve it.
+ * 2. In-transit - page fault has been generated on this page, we need
+ * to wait till the page is ready
+ * 3. Fault - page is not in cache, we have to generate a page fault
+ */
+
+ might_need_validate = ioc_inode_need_revalidate (ioc_inode);
- ioc_waitq_return (waitq);
- waitq = NULL;
+ while (trav_offset < rounded_end) {
+ ioc_inode_lock (ioc_inode);
+ {
+ /* look for requested region in the cache */
+ trav = __ioc_page_get (ioc_inode, trav_offset);
+
+ local_offset = max (trav_offset, offset);
+ trav_size = min (((offset+size) - local_offset),
+ table->page_size);
+
+ if (!trav) {
+ /* page not in cache, we need to generate page
+ * fault
+ */
+ trav = __ioc_page_create (ioc_inode,
+ trav_offset);
+ fault = 1;
+ if (!trav) {
+ gf_msg (frame->this->name,
+ GF_LOG_CRITICAL,
+ ENOMEM, IO_CACHE_MSG_NO_MEMORY,
+ "out of memory");
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ ioc_inode_unlock (ioc_inode);
goto out;
}
- }
- }
-
- //}
- ioc_inode_unlock (ioc_inode);
-
- ioc_waitq_return (waitq);
- waitq = NULL;
-
- if (fault) {
- fault = 0;
- /* new page created, increase the table->cache_used */
- ioc_page_fault (ioc_inode, frame, fd, trav_offset);
- }
-
- if (need_validate) {
- need_validate = 0;
- gf_log (frame->this->name, GF_LOG_TRACE,
- "sending validate request for "
- "inode(%"PRId64") at offset=%"PRId64"",
- fd->inode->ino, trav_offset);
- ret = ioc_cache_validate (frame, ioc_inode, fd, trav);
+ }
+
+ __ioc_wait_on_page (trav, frame, local_offset,
+ trav_size);
+
+ if (trav->ready) {
+ /* page found in cache */
+ if (!might_need_validate && !ioc_inode->waitq) {
+ /* fresh enough */
+ gf_msg_trace (frame->this->name, 0,
+ "cache hit for "
+ "trav_offset=%"
+ PRId64"/local_"
+ "offset=%"PRId64"",
+ trav_offset,
+ local_offset);
+ waitq = __ioc_page_wakeup (trav,
+ trav->op_errno);
+ } else {
+ /* if waitq already exists, fstat
+ * revalidate is
+ * already on the way
+ */
+ if (!ioc_inode->waitq) {
+ need_validate = 1;
+ }
+
+ ret = ioc_wait_on_inode (ioc_inode,
+ trav);
+ if (ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ need_validate = 0;
+
+ waitq = __ioc_page_wakeup (trav,
+ trav->op_errno);
+ ioc_inode_unlock (ioc_inode);
+
+ ioc_waitq_return (waitq);
+ waitq = NULL;
+ goto out;
+ }
+ }
+ }
+
+ }
+ ioc_inode_unlock (ioc_inode);
+
+ ioc_waitq_return (waitq);
+ waitq = NULL;
+
+ if (fault) {
+ fault = 0;
+ /* new page created, increase the table->cache_used */
+ ioc_page_fault (ioc_inode, frame, fd, trav_offset);
+ }
+
+ if (need_validate) {
+ need_validate = 0;
+ gf_msg_trace (frame->this->name, 0,
+ "sending validate request for "
+ "inode(%s) at offset=%"PRId64"",
+ uuid_utoa (fd->inode->gfid), trav_offset);
+ ret = ioc_cache_validate (frame, ioc_inode, fd, trav);
if (ret == -1) {
ioc_inode_lock (ioc_inode);
{
- waitq = ioc_page_wakeup (trav);
+ waitq = __ioc_page_wakeup (trav,
+ trav->op_errno);
}
ioc_inode_unlock (ioc_inode);
@@ -914,25 +1083,25 @@ ioc_dispatch_requests (call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd,
waitq = NULL;
goto out;
}
- }
-
- trav_offset += table->page_size;
- }
+ }
+
+ trav_offset += table->page_size;
+ }
out:
- ioc_frame_return (frame);
+ ioc_frame_return (frame);
- if (ioc_need_prune (ioc_inode->table)) {
- ioc_prune (ioc_inode->table);
- }
+ if (ioc_need_prune (ioc_inode->table)) {
+ ioc_prune (ioc_inode->table);
+ }
- return;
+ return;
}
/*
* ioc_readv -
- *
+ *
* @frame:
* @this:
* @fd:
@@ -942,71 +1111,48 @@ out:
*/
int32_t
ioc_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t offset)
+ size_t size, off_t offset, uint32_t flags, dict_t *xdata)
{
- uint64_t tmp_ioc_inode = 0;
- ioc_inode_t *ioc_inode = NULL;
- ioc_local_t *local = NULL;
- uint32_t weight = 0;
- ioc_table_t *table = NULL;
- uint32_t num_pages = 0;
- int32_t op_errno = -1;
+ uint64_t tmp_ioc_inode = 0;
+ ioc_inode_t *ioc_inode = NULL;
+ ioc_local_t *local = NULL;
+ uint32_t weight = 0;
+ ioc_table_t *table = NULL;
+ int32_t op_errno = -1;
if (!this) {
goto out;
}
- inode_ctx_get (fd->inode, this, &tmp_ioc_inode);
- ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
- if (!ioc_inode) {
- /* caching disabled, go ahead with normal readv */
- STACK_WIND (frame, ioc_readv_disabled_cbk,
- FIRST_CHILD (frame->this),
- FIRST_CHILD (frame->this)->fops->readv, fd, size,
- offset);
- return 0;
- }
+ inode_ctx_get (fd->inode, this, &tmp_ioc_inode);
+ ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
+ if (!ioc_inode) {
+ /* caching disabled, go ahead with normal readv */
+ STACK_WIND (frame, ioc_readv_disabled_cbk,
+ FIRST_CHILD (frame->this),
+ FIRST_CHILD (frame->this)->fops->readv, fd, size,
+ offset, flags, xdata);
+ return 0;
+ }
table = this->private;
if (!table) {
- gf_log (this->name, GF_LOG_ERROR, "table is null");
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ IO_CACHE_MSG_ENFORCEMENT_FAILED, "table is null");
op_errno = EINVAL;
goto out;
}
-
- ioc_table_lock (table);
- {
- if (!table->mem_pool) {
-
- num_pages = (table->cache_size / table->page_size)
- + ((table->cache_size % table->page_size)
- ? 1 : 0);
-
- table->mem_pool
- = mem_pool_new (rbthash_entry_t, num_pages);
-
- if (!table->mem_pool) {
- gf_log (this->name, GF_LOG_ERROR,
- "Unable to allocate mem_pool");
- op_errno = ENOMEM;
- ioc_table_unlock (table);
- goto out;
- }
- }
- }
- ioc_table_unlock (table);
-
ioc_inode_lock (ioc_inode);
{
if (!ioc_inode->cache.page_table) {
ioc_inode->cache.page_table
- = rbthash_table_init
- (IOC_PAGE_TABLE_BUCKET_COUNT,
- ioc_hashfn, NULL, 0,
- table->mem_pool);
+ = rbthash_table_init
+ (IOC_PAGE_TABLE_BUCKET_COUNT,
+ ioc_hashfn, NULL, 0,
+ table->mem_pool);
if (ioc_inode->cache.page_table == NULL) {
op_errno = ENOMEM;
@@ -1017,56 +1163,58 @@ ioc_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
}
ioc_inode_unlock (ioc_inode);
- if (!fd_ctx_get (fd, this, NULL)) {
- /* disable caching for this fd, go ahead with normal readv */
- STACK_WIND (frame, ioc_readv_disabled_cbk,
- FIRST_CHILD (frame->this),
- FIRST_CHILD (frame->this)->fops->readv, fd, size,
- offset);
- return 0;
- }
-
- local = (ioc_local_t *) GF_CALLOC (1, sizeof (ioc_local_t),
- gf_ioc_mt_ioc_local_t);
+ if (!fd_ctx_get (fd, this, NULL)) {
+ /* disable caching for this fd, go ahead with normal readv */
+ STACK_WIND (frame, ioc_readv_disabled_cbk,
+ FIRST_CHILD (frame->this),
+ FIRST_CHILD (frame->this)->fops->readv, fd, size,
+ offset, flags, xdata);
+ return 0;
+ }
+
+ local = mem_get0 (this->local_pool);
if (local == NULL) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ gf_msg (this->name, GF_LOG_ERROR,
+ ENOMEM, IO_CACHE_MSG_NO_MEMORY, "out of memory");
op_errno = ENOMEM;
goto out;
}
- INIT_LIST_HEAD (&local->fill_list);
+ INIT_LIST_HEAD (&local->fill_list);
- frame->local = local;
- local->pending_offset = offset;
- local->pending_size = size;
- local->offset = offset;
- local->size = size;
- local->inode = ioc_inode;
+ frame->local = local;
+ local->pending_offset = offset;
+ local->pending_size = size;
+ local->offset = offset;
+ local->size = size;
+ local->inode = ioc_inode;
- gf_log (this->name, GF_LOG_TRACE,
- "NEW REQ (%p) offset = %"PRId64" && size = %"GF_PRI_SIZET"",
- frame, offset, size);
+ gf_msg_trace (this->name, 0,
+ "NEW REQ (%p) offset "
+ "= %"PRId64" && size = %"GF_PRI_SIZET"",
+ frame, offset, size);
- weight = ioc_inode->weight;
+ weight = ioc_inode->weight;
- ioc_table_lock (ioc_inode->table);
- {
- list_move_tail (&ioc_inode->inode_lru,
- &ioc_inode->table->inode_lru[weight]);
- }
- ioc_table_unlock (ioc_inode->table);
+ ioc_table_lock (ioc_inode->table);
+ {
+ list_move_tail (&ioc_inode->inode_lru,
+ &ioc_inode->table->inode_lru[weight]);
+ }
+ ioc_table_unlock (ioc_inode->table);
- ioc_dispatch_requests (frame, ioc_inode, fd, offset, size);
- return 0;
+ ioc_dispatch_requests (frame, ioc_inode, fd, offset, size);
+ return 0;
out:
- STACK_UNWIND_STRICT (readv, frame, -1, op_errno, NULL, 0, NULL, NULL);
+ STACK_UNWIND_STRICT (readv, frame, -1, op_errno, NULL, 0, NULL, NULL,
+ NULL);
return 0;
}
/*
* ioc_writev_cbk -
- *
+ *
* @frame:
* @cookie:
* @this:
@@ -1076,25 +1224,26 @@ out:
*/
int32_t
ioc_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- ioc_local_t *local = NULL;
- uint64_t ioc_inode = 0;
+ ioc_local_t *local = NULL;
+ uint64_t ioc_inode = 0;
local = frame->local;
- inode_ctx_get (local->fd->inode, this, &ioc_inode);
-
- if (ioc_inode)
- ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+ inode_ctx_get (local->fd->inode, this, &ioc_inode);
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
+ if (ioc_inode)
+ ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
}
/*
* ioc_writev
- *
+ *
* @frame:
* @this:
* @fd:
@@ -1105,38 +1254,39 @@ ioc_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
*/
int32_t
ioc_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t offset,
- struct iobref *iobref)
+ struct iovec *vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
- ioc_local_t *local = NULL;
- uint64_t ioc_inode = 0;
+ ioc_local_t *local = NULL;
+ uint64_t ioc_inode = 0;
- local = GF_CALLOC (1, sizeof (ioc_local_t), gf_ioc_mt_ioc_local_t);
+ local = mem_get0 (this->local_pool);
if (local == NULL) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ gf_msg (this->name, GF_LOG_ERROR,
+ ENOMEM, IO_CACHE_MSG_NO_MEMORY, "out of memory");
- STACK_UNWIND_STRICT (writev, frame, -1, ENOMEM, NULL, NULL);
+ STACK_UNWIND_STRICT (writev, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
- /* TODO: why is it not fd_ref'ed */
- local->fd = fd;
- frame->local = local;
+ /* TODO: why is it not fd_ref'ed */
+ local->fd = fd;
+ frame->local = local;
- inode_ctx_get (fd->inode, this, &ioc_inode);
- if (ioc_inode)
- ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+ inode_ctx_get (fd->inode, this, &ioc_inode);
+ if (ioc_inode)
+ ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
- STACK_WIND (frame, ioc_writev_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,
- iobref);
+ STACK_WIND (frame, ioc_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,
+ flags, iobref, xdata);
- return 0;
+ return 0;
}
/*
* ioc_truncate_cbk -
- *
+ *
* @frame:
* @cookie:
* @this:
@@ -1145,15 +1295,15 @@ ioc_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
* @buf:
*
*/
-int32_t
+int32_t
ioc_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf,
- postbuf);
- return 0;
+ STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+ return 0;
}
@@ -1170,42 +1320,44 @@ ioc_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
*/
int32_t
ioc_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf,
- postbuf);
- return 0;
+ STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+ return 0;
}
/*
* ioc_truncate -
- *
+ *
* @frame:
* @this:
* @loc:
* @offset:
*
*/
-int32_t
-ioc_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
+int32_t
+ioc_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
- uint64_t ioc_inode = 0;
- inode_ctx_get (loc->inode, this, &ioc_inode);
+ uint64_t ioc_inode = 0;
- if (ioc_inode)
- ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+ inode_ctx_get (loc->inode, this, &ioc_inode);
- STACK_WIND (frame, ioc_truncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate, loc, offset);
- return 0;
+ if (ioc_inode)
+ ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+
+ STACK_WIND (frame, ioc_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ return 0;
}
/*
* ioc_ftruncate -
- *
+ *
* @frame:
* @this:
* @fd:
@@ -1213,142 +1365,220 @@ ioc_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
*
*/
int32_t
-ioc_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
+ioc_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
- uint64_t ioc_inode = 0;
- inode_ctx_get (fd->inode, this, &ioc_inode);
+ uint64_t ioc_inode = 0;
- if (ioc_inode)
- ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+ inode_ctx_get (fd->inode, this, &ioc_inode);
- STACK_WIND (frame, ioc_ftruncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate, fd, offset);
- return 0;
+ if (ioc_inode)
+ ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+
+ STACK_WIND (frame, ioc_ftruncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+ return 0;
}
int32_t
ioc_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct flock *lock)
+ int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
{
- STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock);
- return 0;
+ STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock, xdata);
+ return 0;
}
-int32_t
+int32_t
ioc_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
- struct flock *lock)
+ struct gf_flock *lock, dict_t *xdata)
+{
+ ioc_inode_t *ioc_inode = NULL;
+ uint64_t tmp_inode = 0;
+
+ inode_ctx_get (fd->inode, this, &tmp_inode);
+ ioc_inode = (ioc_inode_t *)(long)tmp_inode;
+ if (!ioc_inode) {
+ gf_msg_debug (this->name, EBADFD,
+ "inode context is NULL: returning EBADFD");
+ STACK_UNWIND_STRICT (lk, frame, -1, EBADFD, NULL, NULL);
+ return 0;
+ }
+
+ ioc_inode_lock (ioc_inode);
+ {
+ gettimeofday (&ioc_inode->cache.tv, NULL);
+ }
+ ioc_inode_unlock (ioc_inode);
+
+ STACK_WIND (frame, ioc_lk_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->lk, fd, cmd, lock, xdata);
+
+ return 0;
+}
+
+int
+ioc_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries, dict_t *xdata)
+{
+ gf_dirent_t *entry = NULL;
+
+ if (op_ret <= 0)
+ goto unwind;
+
+ list_for_each_entry (entry, &entries->list, list) {
+ /* TODO: fill things */
+ }
+
+unwind:
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
+
+ return 0;
+}
+int
+ioc_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *dict)
{
- ioc_inode_t *ioc_inode = NULL;
- uint64_t tmp_inode = 0;
-
- inode_ctx_get (fd->inode, this, &tmp_inode);
- ioc_inode = (ioc_inode_t *)(long)tmp_inode;
- if (!ioc_inode) {
- gf_log (this->name, GF_LOG_DEBUG,
- "inode context is NULL: returning EBADFD");
- STACK_UNWIND_STRICT (lk, frame, -1, EBADFD, NULL);
- return 0;
- }
-
- ioc_inode_lock (ioc_inode);
- {
- gettimeofday (&ioc_inode->cache.tv, NULL);
- }
- ioc_inode_unlock (ioc_inode);
-
- STACK_WIND (frame, ioc_lk_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->lk, fd, cmd, lock);
+ STACK_WIND (frame, ioc_readdirp_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp,
+ fd, size, offset, dict);
+ return 0;
+}
+
+static int32_t
+ioc_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, pre, post, xdata);
return 0;
}
+static int32_t
+ioc_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ uint64_t ioc_inode = 0;
+
+ inode_ctx_get (fd->inode, this, &ioc_inode);
+
+ if (ioc_inode)
+ ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+
+ STACK_WIND(frame, ioc_discard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
+ return 0;
+}
+
+static int32_t
+ioc_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(zerofill, frame, op_ret,
+ op_errno, pre, post, xdata);
+ return 0;
+}
+
+static int32_t
+ioc_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ uint64_t ioc_inode = 0;
+
+ inode_ctx_get (fd->inode, this, &ioc_inode);
+
+ if (ioc_inode)
+ ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+
+ STACK_WIND(frame, ioc_zerofill_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
+ return 0;
+}
+
+
int32_t
ioc_get_priority_list (const char *opt_str, struct list_head *first)
{
- int32_t max_pri = 1;
- char *tmp_str = NULL;
- char *tmp_str1 = NULL;
- char *tmp_str2 = NULL;
- char *dup_str = NULL;
- char *stripe_str = NULL;
- char *pattern = NULL;
- char *priority = NULL;
- char *string = NULL;
- struct ioc_priority *curr = NULL, *tmp = NULL;
+ int32_t max_pri = 1;
+ char *tmp_str = NULL;
+ char *tmp_str1 = NULL;
+ char *tmp_str2 = NULL;
+ char *dup_str = NULL;
+ char *stripe_str = NULL;
+ char *pattern = NULL;
+ char *priority = NULL;
+ char *string = NULL;
+ struct ioc_priority *curr = NULL, *tmp = NULL;
string = gf_strdup (opt_str);
if (string == NULL) {
max_pri = -1;
goto out;
}
-
- /* Get the pattern for cache priority.
- * "option priority *.jpg:1,abc*:2" etc
- */
- /* TODO: inode_lru in table is statically hard-coded to 5,
- * should be changed to run-time configuration
- */
- stripe_str = strtok_r (string, ",", &tmp_str);
- while (stripe_str) {
- curr = GF_CALLOC (1, sizeof (struct ioc_priority),
+
+ /* Get the pattern for cache priority.
+ * "option priority *.jpg:1,abc*:2" etc
+ */
+ /* TODO: inode_lru in table is statically hard-coded to 5,
+ * should be changed to run-time configuration
+ */
+ stripe_str = strtok_r (string, ",", &tmp_str);
+ while (stripe_str) {
+ curr = GF_CALLOC (1, sizeof (struct ioc_priority),
gf_ioc_mt_ioc_priority);
if (curr == NULL) {
max_pri = -1;
goto out;
}
- list_add_tail (&curr->list, first);
+ list_add_tail (&curr->list, first);
- dup_str = gf_strdup (stripe_str);
+ dup_str = gf_strdup (stripe_str);
if (dup_str == NULL) {
max_pri = -1;
goto out;
}
- pattern = strtok_r (dup_str, ":", &tmp_str1);
- if (!pattern) {
+ pattern = strtok_r (dup_str, ":", &tmp_str1);
+ if (!pattern) {
max_pri = -1;
goto out;
}
- priority = strtok_r (NULL, ":", &tmp_str1);
- if (!priority) {
+ priority = strtok_r (NULL, ":", &tmp_str1);
+ if (!priority) {
max_pri = -1;
goto out;
}
- gf_log ("io-cache", GF_LOG_TRACE,
- "ioc priority : pattern %s : priority %s",
- pattern,
- priority);
+ gf_msg_trace ("io-cache", 0,
+ "ioc priority : pattern %s : priority %s",
+ pattern, priority);
- curr->pattern = gf_strdup (pattern);
+ curr->pattern = gf_strdup (pattern);
if (curr->pattern == NULL) {
max_pri = -1;
goto out;
}
- curr->priority = strtol (priority, &tmp_str2, 0);
- if (tmp_str2 && (*tmp_str2)) {
+ curr->priority = strtol (priority, &tmp_str2, 0);
+ if (tmp_str2 && (*tmp_str2)) {
max_pri = -1;
goto out;
} else {
- max_pri = max (max_pri, curr->priority);
+ max_pri = max (max_pri, curr->priority);
}
GF_FREE (dup_str);
dup_str = NULL;
- stripe_str = strtok_r (NULL, ",", &tmp_str);
- }
-out:
- if (string != NULL) {
- GF_FREE (string);
+ stripe_str = strtok_r (NULL, ",", &tmp_str);
}
+out:
+ GF_FREE (string);
- if (dup_str != NULL) {
- GF_FREE (dup_str);
- }
+ GF_FREE (dup_str);
if (max_pri == -1) {
list_for_each_entry_safe (curr, tmp, first, list) {
@@ -1358,7 +1588,7 @@ out:
}
}
- return max_pri;
+ return max_pri;
}
int32_t
@@ -1370,151 +1600,245 @@ mem_acct_init (xlator_t *this)
return ret;
ret = xlator_mem_acct_init (this, gf_ioc_mt_end + 1);
-
+
if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
+ gf_msg (this->name, GF_LOG_ERROR,
+ ENOMEM, IO_CACHE_MSG_NO_MEMORY,
+ "Memory accounting init failed");
return ret;
}
return ret;
}
+
+static gf_boolean_t
+check_cache_size_ok (xlator_t *this, uint64_t cache_size)
+{
+ gf_boolean_t ret = _gf_true;
+ uint64_t total_mem = 0;
+ uint64_t max_cache_size = 0;
+ volume_option_t *opt = NULL;
+
+ GF_ASSERT (this);
+ opt = xlator_volume_option_get (this, "cache-size");
+ if (!opt) {
+ ret = _gf_false;
+ gf_msg (this->name, GF_LOG_ERROR,
+ EINVAL, IO_CACHE_MSG_ENFORCEMENT_FAILED,
+ "could not get cache-size option");
+ goto out;
+ }
+
+ total_mem = get_mem_size ();
+ if (-1 == total_mem)
+ max_cache_size = opt->max;
+ else
+ max_cache_size = total_mem;
+
+ gf_msg_debug (this->name, 0, "Max cache size is %"PRIu64,
+ max_cache_size);
+
+ if (cache_size > max_cache_size) {
+ ret = _gf_false;
+ gf_msg (this->name, GF_LOG_ERROR,
+ 0, IO_CACHE_MSG_INVALID_ARGUMENT,
+ "Cache size %"PRIu64
+ " is greater than the max size of %"PRIu64,
+ cache_size, max_cache_size);
+ goto out;
+ }
+out:
+ return ret;
+}
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ data_t *data = NULL;
+ ioc_table_t *table = NULL;
+ int ret = -1;
+ uint64_t cache_size_new = 0;
+ if (!this || !this->private)
+ goto out;
+
+ table = this->private;
+
+ ioc_table_lock (table);
+ {
+ GF_OPTION_RECONF ("cache-timeout", table->cache_timeout,
+ options, int32, unlock);
+
+ data = dict_get (options, "priority");
+ if (data) {
+ char *option_list = data_to_str (data);
+
+ gf_msg_trace (this->name, 0,
+ "option path %s", option_list);
+ /* parse the list of pattern:priority */
+ table->max_pri = ioc_get_priority_list (option_list,
+ &table->priority_list);
+
+ if (table->max_pri == -1) {
+ goto unlock;
+ }
+ table->max_pri ++;
+ }
+
+ GF_OPTION_RECONF ("max-file-size", table->max_file_size,
+ options, size_uint64, unlock);
+
+ GF_OPTION_RECONF ("min-file-size", table->min_file_size,
+ options, size_uint64, unlock);
+
+ if ((table->max_file_size <= UINT64_MAX) &&
+ (table->min_file_size > table->max_file_size)) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ IO_CACHE_MSG_INVALID_ARGUMENT, "minimum size (%"
+ PRIu64") of a file that can be cached is "
+ "greater than maximum size (%"PRIu64"). "
+ "Hence Defaulting to old value",
+ table->min_file_size, table->max_file_size);
+ goto unlock;
+ }
+
+ GF_OPTION_RECONF ("cache-size", cache_size_new,
+ options, size_uint64, unlock);
+ if (!check_cache_size_ok (this, cache_size_new)) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR,
+ 0, IO_CACHE_MSG_INVALID_ARGUMENT,
+ "Not reconfiguring cache-size");
+ goto unlock;
+ }
+ table->cache_size = cache_size_new;
+
+ ret = 0;
+ }
+unlock:
+ ioc_table_unlock (table);
+out:
+ return ret;
+}
+
+
/*
- * init -
+ * init -
* @this:
*
*/
-int32_t
+int32_t
init (xlator_t *this)
{
- ioc_table_t *table = NULL;
- dict_t *options = this->options;
- uint32_t index = 0;
- char *cache_size_string = NULL, *tmp = NULL;
- int32_t ret = -1;
- glusterfs_ctx_t *ctx = NULL;
-
- if (!this->children || this->children->next) {
- gf_log (this->name, GF_LOG_ERROR,
- "FATAL: io-cache not configured with exactly "
- "one child");
+ ioc_table_t *table = NULL;
+ dict_t *xl_options = NULL;
+ uint32_t index = 0;
+ int32_t ret = -1;
+ glusterfs_ctx_t *ctx = NULL;
+ data_t *data = 0;
+ uint32_t num_pages = 0;
+
+ xl_options = this->options;
+
+ if (!this->children || this->children->next) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ IO_CACHE_MSG_XLATOR_CHILD_MISCONFIGURED,
+ "FATAL: io-cache not configured with exactly "
+ "one child");
goto out;
- }
+ }
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
+ if (!this->parents) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ IO_CACHE_MSG_VOL_MISCONFIGURED,
+ "dangling volume. check volfile ");
+ }
- table = (void *) GF_CALLOC (1, sizeof (*table), gf_ioc_mt_ioc_table_t);
+ table = (void *) GF_CALLOC (1, sizeof (*table), gf_ioc_mt_ioc_table_t);
if (table == NULL) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ IO_CACHE_MSG_NO_MEMORY, "out of memory");
goto out;
}
-
- table->xl = this;
- table->page_size = this->ctx->page_size;
- table->cache_size = IOC_CACHE_SIZE;
-
- if (dict_get (options, "cache-size"))
- cache_size_string = data_to_str (dict_get (options,
- "cache-size"));
- if (cache_size_string) {
- if (gf_string2bytesize (cache_size_string,
- &table->cache_size) != 0) {
- gf_log ("io-cache", GF_LOG_ERROR,
- "invalid number format \"%s\" of "
- "\"option cache-size\"",
- cache_size_string);
- goto out;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "using cache-size %"PRIu64"", table->cache_size);
- }
-
- table->cache_timeout = 1;
-
- if (dict_get (options, "cache-timeout")) {
- table->cache_timeout =
- data_to_uint32 (dict_get (options,
- "cache-timeout"));
- gf_log (this->name, GF_LOG_TRACE,
- "Using %d seconds to revalidate cache",
- table->cache_timeout);
- }
-
- INIT_LIST_HEAD (&table->priority_list);
- table->max_pri = 1;
- if (dict_get (options, "priority")) {
- char *option_list = data_to_str (dict_get (options,
- "priority"));
- gf_log (this->name, GF_LOG_TRACE,
- "option path %s", option_list);
- /* parse the list of pattern:priority */
- table->max_pri = ioc_get_priority_list (option_list,
- &table->priority_list);
-
- if (table->max_pri == -1) {
- goto out;
- }
- }
- table->max_pri ++;
-
- table->min_file_size = 0;
-
- tmp = data_to_str (dict_get (options, "min-file-size"));
- if (tmp != NULL) {
- if (gf_string2bytesize (tmp,
- (uint64_t *)&table->min_file_size) != 0) {
- gf_log ("io-cache", GF_LOG_ERROR,
- "invalid number format \"%s\" of "
- "\"option min-file-size\"", tmp);
- goto out;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "using min-file-size %"PRIu64"", table->min_file_size);
- }
-
- table->max_file_size = -1;
- tmp = data_to_str (dict_get (options, "max-file-size"));
- if (tmp != NULL) {
- if (gf_string2bytesize (tmp,
- (uint64_t *)&table->max_file_size) != 0) {
- gf_log ("io-cache", GF_LOG_ERROR,
- "invalid number format \"%s\" of "
- "\"option max-file-size\"", tmp);
+
+ table->xl = this;
+ table->page_size = this->ctx->page_size;
+
+ GF_OPTION_INIT ("cache-size", table->cache_size, size_uint64, out);
+
+ GF_OPTION_INIT ("cache-timeout", table->cache_timeout, int32, out);
+
+ GF_OPTION_INIT ("min-file-size", table->min_file_size, size_uint64, out);
+
+ GF_OPTION_INIT ("max-file-size", table->max_file_size, size_uint64, out);
+
+ if (!check_cache_size_ok (this, table->cache_size)) {
+ ret = -1;
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&table->priority_list);
+ table->max_pri = 1;
+ data = dict_get (xl_options, "priority");
+ if (data) {
+ char *option_list = data_to_str (data);
+ gf_msg_trace (this->name, 0,
+ "option path %s", option_list);
+ /* parse the list of pattern:priority */
+ table->max_pri = ioc_get_priority_list (option_list,
+ &table->priority_list);
+
+ if (table->max_pri == -1) {
goto out;
}
-
- gf_log (this->name, GF_LOG_TRACE,
- "using max-file-size %"PRIu64"", table->max_file_size);
}
- INIT_LIST_HEAD (&table->inodes);
-
- if ((table->max_file_size >= 0)
+ table->max_pri ++;
+
+ INIT_LIST_HEAD (&table->inodes);
+
+ if ((table->max_file_size <= UINT64_MAX)
&& (table->min_file_size > table->max_file_size)) {
- gf_log ("io-cache", GF_LOG_ERROR, "minimum size (%"
- PRIu64") of a file that can be cached is "
- "greater than maximum size (%"PRIu64")",
- table->min_file_size, table->max_file_size);
- goto out;
+ gf_msg ("io-cache", GF_LOG_ERROR, 0,
+ IO_CACHE_MSG_INVALID_ARGUMENT, "minimum size (%"
+ PRIu64") of a file that can be cached is "
+ "greater than maximum size (%"PRIu64")",
+ table->min_file_size, table->max_file_size);
+ goto out;
}
- table->inode_lru = GF_CALLOC (table->max_pri,
+ table->inode_lru = GF_CALLOC (table->max_pri,
sizeof (struct list_head),
gf_ioc_mt_list_head);
if (table->inode_lru == NULL) {
goto out;
}
- for (index = 0; index < (table->max_pri); index++)
- INIT_LIST_HEAD (&table->inode_lru[index]);
+ for (index = 0; index < (table->max_pri); index++)
+ INIT_LIST_HEAD (&table->inode_lru[index]);
+
+ this->local_pool = mem_pool_new (ioc_local_t, 64);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR,
+ ENOMEM, IO_CACHE_MSG_NO_MEMORY,
+ "failed to create local_t's memory pool");
+ goto out;
+ }
+
+ pthread_mutex_init (&table->table_lock, NULL);
+ this->private = table;
+
+ num_pages = (table->cache_size / table->page_size)
+ + ((table->cache_size % table->page_size)
+ ? 1 : 0);
+
+ table->mem_pool = mem_pool_new (rbthash_entry_t, num_pages);
+ if (!table->mem_pool) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ IO_CACHE_MSG_NO_MEMORY, "Unable to allocate mem_pool");
+ goto out;
+ }
- pthread_mutex_init (&table->table_lock, NULL);
- this->private = table;
ret = 0;
ctx = this->ctx;
@@ -1528,110 +1852,341 @@ out:
}
}
- return ret;
+ return ret;
+}
+
+void
+ioc_page_waitq_dump (ioc_page_t *page, char *prefix)
+{
+ ioc_waitq_t *trav = NULL;
+ call_frame_t *frame = NULL;
+ int32_t i = 0;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0, };
+
+ trav = page->waitq;
+
+ while (trav) {
+ frame = trav->data;
+ sprintf (key, "waitq.frame[%d]", i++);
+ gf_proc_dump_write (key, "%"PRId64, frame->root->unique);
+
+ trav = trav->next;
+ }
+}
+
+void
+__ioc_inode_waitq_dump (ioc_inode_t *ioc_inode, char *prefix)
+{
+ ioc_waitq_t *trav = NULL;
+ ioc_page_t *page = NULL;
+ int32_t i = 0;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0, };
+
+ trav = ioc_inode->waitq;
+
+ while (trav) {
+ page = trav->data;
+
+ sprintf (key, "cache-validation-waitq.page[%d].offset", i++);
+ gf_proc_dump_write (key, "%"PRId64, page->offset);
+
+ trav = trav->next;
+ }
+}
+
+void
+__ioc_page_dump (ioc_page_t *page, char *prefix)
+{
+
+ int ret = -1;
+
+ if (!page)
+ return;
+ /* ioc_page_lock can be used to hold the mutex. But in statedump
+ * its better to use trylock to avoid deadlocks.
+ */
+ ret = pthread_mutex_trylock (&page->page_lock);
+ if (ret)
+ goto out;
+ {
+ gf_proc_dump_write ("offset", "%"PRId64, page->offset);
+ gf_proc_dump_write ("size", "%"PRId64, page->size);
+ gf_proc_dump_write ("dirty", "%s", page->dirty ? "yes" : "no");
+ gf_proc_dump_write ("ready", "%s", page->ready ? "yes" : "no");
+ ioc_page_waitq_dump (page, prefix);
+ }
+ pthread_mutex_unlock (&page->page_lock);
+
+out:
+ if (ret && page)
+ gf_proc_dump_write ("Unable to dump the page information",
+ "(Lock acquisition failed) %p", page);
+
+ return;
+}
+
+void
+__ioc_cache_dump (ioc_inode_t *ioc_inode, char *prefix)
+{
+ off_t offset = 0;
+ ioc_table_t *table = NULL;
+ ioc_page_t *page = NULL;
+ int i = 0;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0, };
+ char timestr[256] = {0, };
+
+ if ((ioc_inode == NULL) || (prefix == NULL)) {
+ goto out;
+ }
+
+ table = ioc_inode->table;
+
+ if (ioc_inode->cache.tv.tv_sec) {
+ gf_time_fmt (timestr, sizeof timestr,
+ ioc_inode->cache.tv.tv_sec, gf_timefmt_FT);
+ snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS, ioc_inode->cache.tv.tv_usec);
+
+ gf_proc_dump_write ("last-cache-validation-time", "%s",
+ timestr);
+ }
+
+ for (offset = 0; offset < ioc_inode->ia_size;
+ offset += table->page_size) {
+ page = __ioc_page_get (ioc_inode, offset);
+ if (page == NULL) {
+ continue;
+ }
+
+ sprintf (key, "inode.cache.page[%d]", i++);
+ __ioc_page_dump (page, key);
+ }
+out:
+ return;
+}
+
+
+int
+ioc_inode_dump (xlator_t *this, inode_t *inode)
+{
+
+ char *path = NULL;
+ int ret = -1;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, };
+ uint64_t tmp_ioc_inode = 0;
+ ioc_inode_t *ioc_inode = NULL;
+ gf_boolean_t section_added = _gf_false;
+ char uuid_str[64] = {0,};
+
+ if (this == NULL || inode == NULL)
+ goto out;
+
+ gf_proc_dump_build_key (key_prefix, "io-cache", "inode");
+
+ inode_ctx_get (inode, this, &tmp_ioc_inode);
+ ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
+ if (ioc_inode == NULL)
+ goto out;
+
+ /* Similar to ioc_page_dump function its better to use
+ * pthread_mutex_trylock and not to use gf_log in statedump
+ * to avoid deadlocks.
+ */
+ ret = pthread_mutex_trylock (&ioc_inode->inode_lock);
+ if (ret)
+ goto out;
+
+ {
+ if (gf_uuid_is_null (ioc_inode->inode->gfid))
+ goto unlock;
+
+ gf_proc_dump_add_section (key_prefix);
+ section_added = _gf_true;
+
+ __inode_path (ioc_inode->inode, NULL, &path);
+
+ gf_proc_dump_write ("inode.weight", "%d", ioc_inode->weight);
+
+ if (path) {
+ gf_proc_dump_write ("path", "%s", path);
+ GF_FREE (path);
+ }
+
+ gf_proc_dump_write ("uuid", "%s", uuid_utoa_r
+ (ioc_inode->inode->gfid, uuid_str));
+ __ioc_cache_dump (ioc_inode, key_prefix);
+ __ioc_inode_waitq_dump (ioc_inode, key_prefix);
+ }
+unlock:
+ pthread_mutex_unlock (&ioc_inode->inode_lock);
+
+out:
+ if (ret && ioc_inode) {
+ if (section_added == _gf_false)
+ gf_proc_dump_add_section (key_prefix);
+ gf_proc_dump_write ("Unable to print the status of ioc_inode",
+ "(Lock acquisition failed) %s",
+ uuid_utoa (inode->gfid));
+ }
+ return ret;
}
int
ioc_priv_dump (xlator_t *this)
{
- ioc_table_t *priv = NULL;
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
- char key[GF_DUMP_MAX_BUF_LEN];
+ ioc_table_t *priv = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, };
+ int ret = -1;
+ gf_boolean_t add_section = _gf_false;
- assert (this);
- priv = this->private;
+ if (!this || !this->private)
+ goto out;
- assert (priv);
+ priv = this->private;
- gf_proc_dump_build_key (key_prefix, "xlator.performance.io-cache",
- "priv");
+ gf_proc_dump_build_key (key_prefix, "io-cache", "priv");
gf_proc_dump_add_section (key_prefix);
+ add_section = _gf_true;
- gf_proc_dump_build_key (key, key_prefix, "page_size");
- gf_proc_dump_write (key, "%ld", priv->page_size);
- gf_proc_dump_build_key (key, key_prefix, "cache_size");
- gf_proc_dump_write (key, "%ld", priv->cache_size);
- gf_proc_dump_build_key (key, key_prefix, "cache_used");
- gf_proc_dump_write (key, "%ld", priv->cache_used);
- gf_proc_dump_build_key (key, key_prefix, "inode_count");
- gf_proc_dump_write (key, "%u", priv->inode_count);
+ ret = pthread_mutex_trylock (&priv->table_lock);
+ if (ret)
+ goto out;
+ {
+ gf_proc_dump_write ("page_size", "%ld", priv->page_size);
+ gf_proc_dump_write ("cache_size", "%ld", priv->cache_size);
+ gf_proc_dump_write ("cache_used", "%ld", priv->cache_used);
+ gf_proc_dump_write ("inode_count", "%u", priv->inode_count);
+ gf_proc_dump_write ("cache_timeout", "%u", priv->cache_timeout);
+ gf_proc_dump_write ("min-file-size", "%u", priv->min_file_size);
+ gf_proc_dump_write ("max-file-size", "%u", priv->max_file_size);
+ }
+ pthread_mutex_unlock (&priv->table_lock);
+out:
+ if (ret && priv) {
+ if (!add_section) {
+ gf_proc_dump_build_key (key_prefix, "xlator."
+ "performance.io-cache", "priv");
+ gf_proc_dump_add_section (key_prefix);
+ }
+ gf_proc_dump_write ("Unable to dump the state of private "
+ "structure of io-cache xlator", "(Lock "
+ "acquisition failed) %s", this->name);
+ }
return 0;
}
/*
* fini -
- *
+ *
* @this:
*
*/
void
fini (xlator_t *this)
{
- ioc_table_t *table = NULL;
+ ioc_table_t *table = NULL;
+ struct ioc_priority *curr = NULL, *tmp = NULL;
table = this->private;
if (table == NULL)
return;
+ this->private = NULL;
+
if (table->mem_pool != NULL) {
mem_pool_destroy (table->mem_pool);
table->mem_pool = NULL;
}
- pthread_mutex_destroy (&table->table_lock);
- GF_FREE (table);
+ list_for_each_entry_safe (curr, tmp, &table->priority_list, list) {
+ list_del_init (&curr->list);
+ GF_FREE (curr->pattern);
+ GF_FREE (curr);
+ }
+
+ /* inode_lru and inodes list can be empty in case fini() is
+ * called soon after init()? Hence commenting the below asserts.
+ */
+ /*for (i = 0; i < table->max_pri; i++) {
+ GF_ASSERT (list_empty (&table->inode_lru[i]));
+ }
+
+ GF_ASSERT (list_empty (&table->inodes));
+ */
+ pthread_mutex_destroy (&table->table_lock);
+ GF_FREE (table);
- this->private = NULL;
- return;
+ this->private = NULL;
+ return;
}
struct xlator_fops fops = {
- .open = ioc_open,
- .create = ioc_create,
- .readv = ioc_readv,
- .writev = ioc_writev,
- .truncate = ioc_truncate,
- .ftruncate = ioc_ftruncate,
- .lookup = ioc_lookup,
- .lk = ioc_lk,
- .setattr = ioc_setattr
+ .open = ioc_open,
+ .create = ioc_create,
+ .readv = ioc_readv,
+ .writev = ioc_writev,
+ .truncate = ioc_truncate,
+ .ftruncate = ioc_ftruncate,
+ .lookup = ioc_lookup,
+ .lk = ioc_lk,
+ .setattr = ioc_setattr,
+ .mknod = ioc_mknod,
+
+ .readdirp = ioc_readdirp,
+ .discard = ioc_discard,
+ .zerofill = ioc_zerofill,
};
struct xlator_dumpops dumpops = {
.priv = ioc_priv_dump,
+ .inodectx = ioc_inode_dump,
};
struct xlator_cbks cbks = {
- .forget = ioc_forget,
- .release = ioc_release
+ .forget = ioc_forget,
+ .release = ioc_release,
+ .invalidate = ioc_invalidate,
};
struct volume_options options[] = {
- { .key = {"priority"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"cache-timeout", "force-revalidate-timeout"},
- .type = GF_OPTION_TYPE_INT,
- .min = 0,
- .max = 60
- },
- { .key = {"cache-size"},
- .type = GF_OPTION_TYPE_SIZET,
- .min = 4 * GF_UNIT_MB,
- .max = 6 * GF_UNIT_GB
- },
+ { .key = {"priority"},
+ .type = GF_OPTION_TYPE_PRIORITY_LIST,
+ .default_value = "",
+ .description = "Assigns priority to filenames with specific "
+ "patterns so that when a page needs to be ejected "
+ "out of the cache, the page of a file whose "
+ "priority is the lowest will be ejected earlier"
+ },
+ { .key = {"cache-timeout", "force-revalidate-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = 60,
+ .default_value = "1",
+ .description = "The cached data for a file will be retained till "
+ "'cache-refresh-timeout' seconds, after which data "
+ "re-validation is performed."
+ },
+ { .key = {"cache-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 4 * GF_UNIT_MB,
+ .max = 32 * GF_UNIT_GB,
+ .default_value = "32MB",
+ .description = "Size of the read cache."
+ },
{ .key = {"min-file-size"},
.type = GF_OPTION_TYPE_SIZET,
- .min = -1,
- .max = -1
+ .default_value = "0",
+ .description = "Minimum file size which would be cached by the "
+ "io-cache translator."
},
{ .key = {"max-file-size"},
.type = GF_OPTION_TYPE_SIZET,
- .min = -1,
- .max = -1
+ .default_value = "0",
+ .description = "Maximum file size which would be cached by the "
+ "io-cache translator."
},
- { .key = {NULL} },
+ { .key = {NULL} },
};
diff --git a/xlators/performance/io-cache/src/io-cache.h b/xlators/performance/io-cache/src/io-cache.h
index 0b164efe4f4..d7c823fe962 100644
--- a/xlators/performance/io-cache/src/io-cache.h
+++ b/xlators/performance/io-cache/src/io-cache.h
@@ -1,30 +1,16 @@
/*
- Copyright (c) 2007-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __IO_CACHE_H
#define __IO_CACHE_H
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include <sys/types.h>
#include "compat-errno.h"
@@ -38,6 +24,7 @@
#include "hashfn.h"
#include <sys/time.h>
#include <fnmatch.h>
+#include "io-cache-messages.h"
#define IOC_PAGE_SIZE (1024 * 128) /* 128KB */
#define IOC_CACHE_SIZE (32 * 1024 * 1024)
@@ -49,131 +36,134 @@ struct ioc_page;
struct ioc_inode;
struct ioc_priority {
- struct list_head list;
- char *pattern;
- uint32_t priority;
+ struct list_head list;
+ char *pattern;
+ uint32_t priority;
};
/*
- * ioc_waitq - this structure is used to represents the waiting
+ * ioc_waitq - this structure is used to represents the waiting
* frames on a page
*
* @next: pointer to next object in waitq
* @data: pointer to the frame which is waiting
*/
struct ioc_waitq {
- struct ioc_waitq *next;
- void *data;
- off_t pending_offset;
- size_t pending_size;
+ struct ioc_waitq *next;
+ void *data;
+ off_t pending_offset;
+ size_t pending_size;
};
/*
- * ioc_fill -
+ * ioc_fill -
*
*/
struct ioc_fill {
- struct list_head list; /* list of ioc_fill structures of a frame */
- off_t offset;
- size_t size;
- struct iovec *vector;
- int32_t count;
- struct iobref *iobref;
+ struct list_head list; /* list of ioc_fill structures of a frame */
+ off_t offset;
+ size_t size;
+ struct iovec *vector;
+ int32_t count;
+ struct iobref *iobref;
};
struct ioc_local {
- mode_t mode;
- int32_t flags;
- loc_t file_loc;
- off_t offset;
- size_t size;
- int32_t op_ret;
- int32_t op_errno;
- struct list_head fill_list; /* list of ioc_fill structures */
- off_t pending_offset; /*
+ mode_t mode;
+ int32_t flags;
+ loc_t file_loc;
+ off_t offset;
+ size_t size;
+ int32_t op_ret;
+ int32_t op_errno;
+ struct list_head fill_list; /* list of ioc_fill structures */
+ off_t pending_offset; /*
* offset from this frame should
* continue
*/
- size_t pending_size; /*
+ size_t pending_size; /*
* size of data this frame is waiting
* on
*/
- struct ioc_inode *inode;
- int32_t wait_count;
- pthread_mutex_t local_lock;
- struct ioc_waitq *waitq;
- void *stub;
- fd_t *fd;
- int32_t need_xattr;
- dict_t *xattr_req;
+ struct ioc_inode *inode;
+ int32_t wait_count;
+ pthread_mutex_t local_lock;
+ struct ioc_waitq *waitq;
+ void *stub;
+ fd_t *fd;
+ int32_t need_xattr;
+ dict_t *xattr_req;
};
/*
- * ioc_page - structure to store page of data from file
+ * ioc_page - structure to store page of data from file
*
*/
struct ioc_page {
- struct list_head page_lru;
- struct ioc_inode *inode; /* inode this page belongs to */
- struct ioc_priority *priority;
- char dirty;
- char ready;
- struct iovec *vector;
- int32_t count;
- off_t offset;
- size_t size;
- struct ioc_waitq *waitq;
- struct iobref *iobref;
- pthread_mutex_t page_lock;
+ struct list_head page_lru;
+ struct ioc_inode *inode; /* inode this page belongs to */
+ struct ioc_priority *priority;
+ char dirty;
+ char ready;
+ struct iovec *vector;
+ int32_t count;
+ off_t offset;
+ size_t size;
+ struct ioc_waitq *waitq;
+ struct iobref *iobref;
+ pthread_mutex_t page_lock;
+ int32_t op_errno;
+ char stale;
};
struct ioc_cache {
rbthash_table_t *page_table;
struct list_head page_lru;
- time_t mtime; /*
+ time_t mtime; /*
* seconds component of file mtime
*/
time_t mtime_nsec; /*
* nanosecond component of file mtime
*/
- struct timeval tv; /*
+ struct timeval tv; /*
* time-stamp at last re-validate
*/
};
struct ioc_inode {
- struct ioc_table *table;
+ struct ioc_table *table;
off_t ia_size;
- struct ioc_cache cache;
- struct list_head inode_list; /*
+ struct ioc_cache cache;
+ struct list_head inode_list; /*
* list of inodes, maintained by
* io-cache translator
*/
- struct list_head inode_lru;
- struct ioc_waitq *waitq;
- pthread_mutex_t inode_lock;
- uint32_t weight; /*
+ struct list_head inode_lru;
+ struct ioc_waitq *waitq;
+ pthread_mutex_t inode_lock;
+ uint32_t weight; /*
* weight of the inode, increases
* on each read
*/
+ inode_t *inode;
};
struct ioc_table {
- uint64_t page_size;
- uint64_t cache_size;
- uint64_t cache_used;
- int64_t min_file_size;
- int64_t max_file_size;
- struct list_head inodes; /* list of inodes cached */
- struct list_head active;
- struct list_head *inode_lru;
- struct list_head priority_list;
- int32_t readv_count;
- pthread_mutex_t table_lock;
- xlator_t *xl;
- uint32_t inode_count;
- int32_t cache_timeout;
- int32_t max_pri;
+ uint64_t page_size;
+ uint64_t cache_size;
+ uint64_t cache_used;
+ uint64_t min_file_size;
+ uint64_t max_file_size;
+ struct list_head inodes; /* list of inodes cached */
+ struct list_head active;
+ struct list_head *inode_lru;
+ struct list_head priority_list;
+ int32_t readv_count;
+ pthread_mutex_t table_lock;
+ xlator_t *xl;
+ uint32_t inode_count;
+ int32_t cache_timeout;
+ int32_t max_pri;
struct mem_pool *mem_pool;
};
@@ -190,36 +180,33 @@ str_to_ptr (char *string);
char *
ptr_to_str (void *ptr);
-int32_t
+int32_t
ioc_readv_disabled_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *stbuf,
- struct iobref *iobref);
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf,
+ struct iobref *iobref, dict_t *xdata);
ioc_page_t *
-ioc_page_get (ioc_inode_t *ioc_inode, off_t offset);
+__ioc_page_get (ioc_inode_t *ioc_inode, off_t offset);
ioc_page_t *
-ioc_page_create (ioc_inode_t *ioc_inode, off_t offset);
+__ioc_page_create (ioc_inode_t *ioc_inode, off_t offset);
void
-ioc_page_fault (ioc_inode_t *ioc_inode, call_frame_t *frame, fd_t *fd,
- off_t offset);
+ioc_page_fault (ioc_inode_t *ioc_inode, call_frame_t *frame, fd_t *fd,
+ off_t offset);
void
-ioc_wait_on_page (ioc_page_t *page, call_frame_t *frame, off_t offset,
- size_t size);
+__ioc_wait_on_page (ioc_page_t *page, call_frame_t *frame, off_t offset,
+ size_t size);
ioc_waitq_t *
-ioc_page_wakeup (ioc_page_t *page);
+__ioc_page_wakeup (ioc_page_t *page, int32_t op_errno);
void
ioc_page_flush (ioc_page_t *page);
ioc_waitq_t *
-ioc_page_error (ioc_page_t *page, int32_t op_ret, int32_t op_errno);
-
-void
-ioc_page_purge (ioc_page_t *page);
+__ioc_page_error (ioc_page_t *page, int32_t op_ret, int32_t op_errno);
void
ioc_frame_return (call_frame_t *frame);
@@ -229,95 +216,95 @@ ioc_waitq_return (ioc_waitq_t *waitq);
int32_t
ioc_frame_fill (ioc_page_t *page, call_frame_t *frame, off_t offset,
- size_t size);
+ size_t size, int32_t op_errno);
-#define ioc_inode_lock(ioc_inode) \
- do { \
- gf_log (ioc_inode->table->xl->name, GF_LOG_TRACE, \
- "locked inode(%p)", ioc_inode); \
- pthread_mutex_lock (&ioc_inode->inode_lock); \
- } while (0)
+#define ioc_inode_lock(ioc_inode) \
+ do { \
+ gf_msg_trace (ioc_inode->table->xl->name, 0, \
+ "locked inode(%p)", ioc_inode); \
+ pthread_mutex_lock (&ioc_inode->inode_lock); \
+ } while (0)
-#define ioc_inode_unlock(ioc_inode) \
- do { \
- gf_log (ioc_inode->table->xl->name, GF_LOG_TRACE, \
- "unlocked inode(%p)", ioc_inode); \
- pthread_mutex_unlock (&ioc_inode->inode_lock); \
- } while (0)
+#define ioc_inode_unlock(ioc_inode) \
+ do { \
+ gf_msg_trace (ioc_inode->table->xl->name, 0, \
+ "unlocked inode(%p)", ioc_inode); \
+ pthread_mutex_unlock (&ioc_inode->inode_lock); \
+ } while (0)
-#define ioc_table_lock(table) \
- do { \
- gf_log (table->xl->name, GF_LOG_TRACE, \
- "locked table(%p)", table); \
- pthread_mutex_lock (&table->table_lock); \
- } while (0)
+#define ioc_table_lock(table) \
+ do { \
+ gf_msg_trace (table->xl->name, 0, \
+ "locked table(%p)", table); \
+ pthread_mutex_lock (&table->table_lock); \
+ } while (0)
-#define ioc_table_unlock(table) \
- do { \
- gf_log (table->xl->name, GF_LOG_TRACE, \
- "unlocked table(%p)", table); \
- pthread_mutex_unlock (&table->table_lock); \
- } while (0)
+#define ioc_table_unlock(table) \
+ do { \
+ gf_msg_trace (table->xl->name, 0, \
+ "unlocked table(%p)", table); \
+ pthread_mutex_unlock (&table->table_lock); \
+ } while (0)
-#define ioc_local_lock(local) \
- do { \
- gf_log (local->inode->table->xl->name, GF_LOG_TRACE, \
- "locked local(%p)", local); \
- pthread_mutex_lock (&local->local_lock); \
- } while (0)
+#define ioc_local_lock(local) \
+ do { \
+ gf_msg_trace (local->inode->table->xl->name, 0, \
+ "locked local(%p)", local); \
+ pthread_mutex_lock (&local->local_lock); \
+ } while (0)
-#define ioc_local_unlock(local) \
- do { \
- gf_log (local->inode->table->xl->name, GF_LOG_TRACE, \
- "unlocked local(%p)", local); \
- pthread_mutex_unlock (&local->local_lock); \
- } while (0)
+#define ioc_local_unlock(local) \
+ do { \
+ gf_msg_trace (local->inode->table->xl->name, 0, \
+ "unlocked local(%p)", local); \
+ pthread_mutex_unlock (&local->local_lock); \
+ } while (0)
-#define ioc_page_lock(page) \
- do { \
- gf_log (page->inode->table->xl->name, GF_LOG_TRACE, \
- "locked page(%p)", page); \
- pthread_mutex_lock (&page->page_lock); \
- } while (0)
+#define ioc_page_lock(page) \
+ do { \
+ gf_msg_trace (page->inode->table->xl->name, 0, \
+ "locked page(%p)", page); \
+ pthread_mutex_lock (&page->page_lock); \
+ } while (0)
-#define ioc_page_unlock(page) \
- do { \
- gf_log (page->inode->table->xl->name, GF_LOG_TRACE, \
- "unlocked page(%p)", page); \
- pthread_mutex_unlock (&page->page_lock); \
- } while (0)
+#define ioc_page_unlock(page) \
+ do { \
+ gf_msg_trace (page->inode->table->xl->name, 0, \
+ "unlocked page(%p)", page); \
+ pthread_mutex_unlock (&page->page_lock); \
+ } while (0)
static inline uint64_t
time_elapsed (struct timeval *now,
- struct timeval *then)
+ struct timeval *then)
{
- uint64_t sec = now->tv_sec - then->tv_sec;
+ uint64_t sec = now->tv_sec - then->tv_sec;
+
+ if (sec)
+ return sec;
- if (sec)
- return sec;
-
- return 0;
+ return 0;
}
ioc_inode_t *
ioc_inode_search (ioc_table_t *table, inode_t *inode);
-void
+void
ioc_inode_destroy (ioc_inode_t *ioc_inode);
ioc_inode_t *
ioc_inode_update (ioc_table_t *table, inode_t *inode, uint32_t weight);
-int64_t
-ioc_page_destroy (ioc_page_t *page);
+int64_t
+__ioc_page_destroy (ioc_page_t *page);
int64_t
__ioc_inode_flush (ioc_inode_t *ioc_inode);
@@ -338,6 +325,4 @@ ioc_prune (ioc_table_t *table);
int32_t
ioc_need_prune (ioc_table_t *table);
-inline uint32_t
-ioc_hashfn (void *data, int len);
#endif /* __IO_CACHE_H */
diff --git a/xlators/performance/io-cache/src/ioc-inode.c b/xlators/performance/io-cache/src/ioc-inode.c
index 5619134814c..cee3bad8c22 100644
--- a/xlators/performance/io-cache/src/ioc-inode.c
+++ b/xlators/performance/io-cache/src/ioc-inode.c
@@ -1,27 +1,13 @@
/*
- Copyright (c) 2007-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "io-cache.h"
#include "ioc-mem-types.h"
@@ -35,10 +21,14 @@ extern int ioc_log2_page_size;
void *
str_to_ptr (char *string)
{
- void *ptr = NULL;
+ void *ptr = NULL;
+
+ GF_VALIDATE_OR_GOTO ("io-cache", string, out);
ptr = (void *)strtoul (string, NULL, 16);
- return ptr;
+
+out:
+ return ptr;
}
@@ -51,102 +41,132 @@ char *
ptr_to_str (void *ptr)
{
int ret = 0;
- char *str = NULL;
- ret = gf_asprintf (&str, "%p", ptr);
+ char *str = NULL;
+
+ GF_VALIDATE_OR_GOTO ("io-cache", ptr, out);
+
+ ret = gf_asprintf (&str, "%p", ptr);
if (-1 == ret) {
- gf_log ("ioc", GF_LOG_ERROR,
+ gf_msg ("io-cache", GF_LOG_WARNING, 0,
+ IO_CACHE_MSG_STR_COVERSION_FAILED,
"asprintf failed while converting ptr to str");
- return NULL;
+ str = NULL;
+ goto out;
}
- return str;
+
+out:
+ return str;
}
+
void
-ioc_inode_wakeup (call_frame_t *frame, ioc_inode_t *ioc_inode,
+ioc_inode_wakeup (call_frame_t *frame, ioc_inode_t *ioc_inode,
struct iatt *stbuf)
{
- ioc_waitq_t *waiter = NULL, *waited = NULL;
- ioc_waitq_t *page_waitq = NULL;
- int8_t cache_still_valid = 1;
- ioc_local_t *local = NULL;
- int8_t need_fault = 0;
- ioc_page_t *waiter_page = NULL;
+ ioc_waitq_t *waiter = NULL, *waited = NULL;
+ ioc_waitq_t *page_waitq = NULL;
+ int8_t cache_still_valid = 1;
+ ioc_local_t *local = NULL;
+ int8_t need_fault = 0;
+ ioc_page_t *waiter_page = NULL;
+
+ GF_VALIDATE_OR_GOTO ("io-cache", frame, out);
local = frame->local;
- ioc_inode_lock (ioc_inode);
- {
- waiter = ioc_inode->waitq;
- ioc_inode->waitq = NULL;
- }
- ioc_inode_unlock (ioc_inode);
-
- if (stbuf)
- cache_still_valid = ioc_cache_still_valid (ioc_inode, stbuf);
- else
- cache_still_valid = 0;
-
- if (!waiter) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "cache validate called without any "
- "page waiting to be validated");
- }
-
- while (waiter) {
- waiter_page = waiter->data;
- page_waitq = NULL;
-
- if (waiter_page) {
- if (cache_still_valid) {
- /* cache valid, wake up page */
- ioc_inode_lock (ioc_inode);
- {
- page_waitq =
- ioc_page_wakeup (waiter_page);
- }
- ioc_inode_unlock (ioc_inode);
- if (page_waitq)
- ioc_waitq_return (page_waitq);
- } else {
- /* cache invalid, generate page fault and set
- * page->ready = 0, to avoid double faults
- */
- ioc_inode_lock (ioc_inode);
-
- if (waiter_page->ready) {
- waiter_page->ready = 0;
- need_fault = 1;
- } else {
- gf_log (frame->this->name,
- GF_LOG_TRACE,
- "validate frame(%p) is waiting"
- "for in-transit page = %p",
- frame, waiter_page);
- }
-
- ioc_inode_unlock (ioc_inode);
-
- if (need_fault) {
- need_fault = 0;
- ioc_page_fault (ioc_inode, frame,
- local->fd,
- waiter_page->offset);
- }
- }
- }
-
- waited = waiter;
- waiter = waiter->next;
-
- waited->data = NULL;
- GF_FREE (waited);
- }
+ GF_VALIDATE_OR_GOTO (frame->this->name, local, out);
+
+ if (ioc_inode == NULL) {
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+ gf_msg (frame->this->name, GF_LOG_WARNING, 0,
+ IO_CACHE_MSG_INODE_NULL, "ioc_inode is NULL");
+ goto out;
+ }
+
+ ioc_inode_lock (ioc_inode);
+ {
+ waiter = ioc_inode->waitq;
+ ioc_inode->waitq = NULL;
+ }
+ ioc_inode_unlock (ioc_inode);
+
+ if (stbuf)
+ cache_still_valid = ioc_cache_still_valid (ioc_inode, stbuf);
+ else
+ cache_still_valid = 0;
+
+ if (!waiter) {
+ gf_msg (frame->this->name, GF_LOG_WARNING, 0,
+ IO_CACHE_MSG_PAGE_WAIT_VALIDATE,
+ "cache validate called without any "
+ "page waiting to be validated");
+ }
+
+ while (waiter) {
+ waiter_page = waiter->data;
+ page_waitq = NULL;
+
+ if (waiter_page) {
+ if (cache_still_valid) {
+ /* cache valid, wake up page */
+ ioc_inode_lock (ioc_inode);
+ {
+ page_waitq =
+ __ioc_page_wakeup (waiter_page,
+ waiter_page->op_errno);
+ }
+ ioc_inode_unlock (ioc_inode);
+ if (page_waitq)
+ ioc_waitq_return (page_waitq);
+ } else {
+ /* cache invalid, generate page fault and set
+ * page->ready = 0, to avoid double faults
+ */
+ ioc_inode_lock (ioc_inode);
+ {
+ if (waiter_page->ready) {
+ waiter_page->ready = 0;
+ need_fault = 1;
+ } else {
+ gf_msg_trace (frame->this->name,
+ 0,
+ "validate "
+ "frame(%p) is "
+ "waiting for "
+ "in-transit"
+ " page = %p",
+ frame,
+ waiter_page);
+ }
+ }
+ ioc_inode_unlock (ioc_inode);
+
+ if (need_fault) {
+ need_fault = 0;
+ ioc_page_fault (ioc_inode, frame,
+ local->fd,
+ waiter_page->offset);
+ }
+ }
+ }
+
+ waited = waiter;
+ waiter = waiter->next;
+
+ waited->data = NULL;
+ GF_FREE (waited);
+ }
+
+out:
+ return;
}
-/*
- * ioc_inode_update - create a new ioc_inode_t structure and add it to
- * the table table. fill in the fields which are derived
+
+/*
+ * ioc_inode_update - create a new ioc_inode_t structure and add it to
+ * the table table. fill in the fields which are derived
* from inode_t corresponding to the file
- *
+ *
* @table: io-table structure
* @inode: inode structure
*
@@ -155,65 +175,67 @@ ioc_inode_wakeup (call_frame_t *frame, ioc_inode_t *ioc_inode,
ioc_inode_t *
ioc_inode_update (ioc_table_t *table, inode_t *inode, uint32_t weight)
{
- ioc_inode_t *ioc_inode = NULL;
- unsigned long no_of_pages = 0;
+ ioc_inode_t *ioc_inode = NULL;
- ioc_inode = GF_CALLOC (1, sizeof (ioc_inode_t),
- gf_ioc_mt_ioc_inode_t);
+ GF_VALIDATE_OR_GOTO ("io-cache", table, out);
+
+ ioc_inode = GF_CALLOC (1, sizeof (ioc_inode_t), gf_ioc_mt_ioc_inode_t);
if (ioc_inode == NULL) {
goto out;
}
-
- ioc_inode->table = table;
-
- no_of_pages = (table->cache_size / table->page_size)
- + ((table->cache_size % table->page_size) ? 1 : 0);
- INIT_LIST_HEAD (&ioc_inode->cache.page_lru);
-
- ioc_table_lock (table);
-
- table->inode_count++;
- list_add (&ioc_inode->inode_list, &table->inodes);
- list_add_tail (&ioc_inode->inode_lru, &table->inode_lru[weight]);
-
- gf_log (table->xl->name,
- GF_LOG_TRACE,
- "adding to inode_lru[%d]", weight);
+ ioc_inode->inode = inode;
+ ioc_inode->table = table;
+ INIT_LIST_HEAD (&ioc_inode->cache.page_lru);
+ pthread_mutex_init (&ioc_inode->inode_lock, NULL);
+ ioc_inode->weight = weight;
+
+ ioc_table_lock (table);
+ {
+ table->inode_count++;
+ list_add (&ioc_inode->inode_list, &table->inodes);
+ list_add_tail (&ioc_inode->inode_lru,
+ &table->inode_lru[weight]);
+ }
+ ioc_table_unlock (table);
- ioc_table_unlock (table);
+ gf_msg_trace (table->xl->name, 0,
+ "adding to inode_lru[%d]", weight);
- pthread_mutex_init (&ioc_inode->inode_lock, NULL);
- ioc_inode->weight = weight;
-
out:
- return ioc_inode;
+ return ioc_inode;
}
-/*
+/*
* ioc_inode_destroy - destroy an ioc_inode_t object.
*
* @inode: inode to destroy
*
- * to be called only from ioc_forget.
+ * to be called only from ioc_forget.
*/
void
ioc_inode_destroy (ioc_inode_t *ioc_inode)
{
- ioc_table_t *table = NULL;
+ ioc_table_t *table = NULL;
+
+ GF_VALIDATE_OR_GOTO ("io-cache", ioc_inode, out);
table = ioc_inode->table;
- ioc_table_lock (table);
- table->inode_count--;
- list_del (&ioc_inode->inode_list);
- list_del (&ioc_inode->inode_lru);
- ioc_table_unlock (table);
-
- ioc_inode_flush (ioc_inode);
+ ioc_table_lock (table);
+ {
+ table->inode_count--;
+ list_del (&ioc_inode->inode_list);
+ list_del (&ioc_inode->inode_lru);
+ }
+ ioc_table_unlock (table);
+
+ ioc_inode_flush (ioc_inode);
rbthash_table_destroy (ioc_inode->cache.page_table);
- pthread_mutex_destroy (&ioc_inode->inode_lock);
- GF_FREE (ioc_inode);
+ pthread_mutex_destroy (&ioc_inode->inode_lock);
+ GF_FREE (ioc_inode);
+out:
+ return;
}
diff --git a/xlators/performance/io-cache/src/ioc-mem-types.h b/xlators/performance/io-cache/src/ioc-mem-types.h
index d1da65ca1c7..9b68f9fce5f 100644
--- a/xlators/performance/io-cache/src/ioc-mem-types.h
+++ b/xlators/performance/io-cache/src/ioc-mem-types.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __IOC_MT_H__
@@ -26,7 +17,6 @@ enum gf_ioc_mem_types_ {
gf_ioc_mt_iovec = gf_common_mt_end + 1,
gf_ioc_mt_ioc_table_t,
gf_ioc_mt_char,
- gf_ioc_mt_ioc_local_t,
gf_ioc_mt_ioc_waitq_t,
gf_ioc_mt_ioc_priority,
gf_ioc_mt_list_head,
@@ -37,4 +27,3 @@ enum gf_ioc_mem_types_ {
gf_ioc_mt_end
};
#endif
-
diff --git a/xlators/performance/io-cache/src/page.c b/xlators/performance/io-cache/src/page.c
index 7209abb08a3..50f5e190e21 100644
--- a/xlators/performance/io-cache/src/page.c
+++ b/xlators/performance/io-cache/src/page.c
@@ -1,27 +1,13 @@
/*
- Copyright (c) 2007-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "glusterfs.h"
#include "logging.h"
#include "dict.h"
@@ -30,78 +16,176 @@
#include "ioc-mem-types.h"
#include <assert.h>
#include <sys/time.h>
-
+#include "io-cache-messages.h"
char
ioc_empty (struct ioc_cache *cache)
{
- return list_empty (&cache->page_lru);
+ char is_empty = -1;
+
+ GF_VALIDATE_OR_GOTO ("io-cache", cache, out);
+
+ is_empty = list_empty (&cache->page_lru);
+
+out:
+ return is_empty;
}
+
ioc_page_t *
-ioc_page_get (ioc_inode_t *ioc_inode, off_t offset)
+__ioc_page_get (ioc_inode_t *ioc_inode, off_t offset)
{
- ioc_page_t *page = NULL;
- ioc_table_t *table = NULL;
- off_t rounded_offset = 0;
+ ioc_page_t *page = NULL;
+ ioc_table_t *table = NULL;
+ off_t rounded_offset = 0;
+
+ GF_VALIDATE_OR_GOTO ("io-cache", ioc_inode, out);
table = ioc_inode->table;
+ GF_VALIDATE_OR_GOTO ("io-cache", ioc_inode, out);
+
rounded_offset = floor (offset, table->page_size);
-
+
page = rbthash_get (ioc_inode->cache.page_table, &rounded_offset,
sizeof (rounded_offset));
if (page != NULL) {
- /* push the page to the end of the lru list */
- list_move_tail (&page->page_lru, &ioc_inode->cache.page_lru);
- }
+ /* push the page to the end of the lru list */
+ list_move_tail (&page->page_lru, &ioc_inode->cache.page_lru);
+ }
- return page;
+out:
+ return page;
+}
+
+
+ioc_page_t *
+ioc_page_get (ioc_inode_t *ioc_inode, off_t offset)
+{
+ ioc_page_t *page = NULL;
+
+ if (ioc_inode == NULL) {
+ goto out;
+ }
+
+ ioc_inode_lock (ioc_inode);
+ {
+ page = __ioc_page_get (ioc_inode, offset);
+ }
+ ioc_inode_unlock (ioc_inode);
+
+out:
+ return page;
}
/*
- * ioc_page_destroy -
+ * __ioc_page_destroy -
*
* @page:
*
*/
int64_t
-ioc_page_destroy (ioc_page_t *page)
+__ioc_page_destroy (ioc_page_t *page)
{
- int64_t page_size = 0;
+ int64_t page_size = 0;
+
+ GF_VALIDATE_OR_GOTO ("io-cache", page, out);
- page_size = iobref_size (page->iobref);
+ if (page->iobref)
+ page_size = iobref_size (page->iobref);
- if (page->waitq) {
- /* frames waiting on this page, do not destroy this page */
- page_size = -1;
- } else {
+ if (page->waitq) {
+ /* frames waiting on this page, do not destroy this page */
+ page_size = -1;
+ page->stale = 1;
+ } else {
rbthash_remove (page->inode->cache.page_table, &page->offset,
sizeof (page->offset));
- list_del (&page->page_lru);
-
- gf_log (page->inode->table->xl->name, GF_LOG_TRACE,
- "destroying page = %p, offset = %"PRId64" "
- "&& inode = %p",
- page, page->offset, page->inode);
-
- if (page->vector){
- iobref_unref (page->iobref);
- GF_FREE (page->vector);
- page->vector = NULL;
- }
-
- page->inode = NULL;
- }
-
- if (page_size != -1) {
- pthread_mutex_destroy (&page->page_lock);
- GF_FREE (page);
- }
-
- return page_size;
+ list_del (&page->page_lru);
+
+ gf_msg_trace (page->inode->table->xl->name, 0,
+ "destroying page = %p, offset = %"PRId64" "
+ "&& inode = %p",
+ page, page->offset, page->inode);
+
+ if (page->vector){
+ iobref_unref (page->iobref);
+ GF_FREE (page->vector);
+ page->vector = NULL;
+ }
+
+ page->inode = NULL;
+ }
+
+ if (page_size != -1) {
+ pthread_mutex_destroy (&page->page_lock);
+ GF_FREE (page);
+ }
+
+out:
+ return page_size;
+}
+
+
+int64_t
+ioc_page_destroy (ioc_page_t *page)
+{
+ int64_t ret = 0;
+ struct ioc_inode *inode = NULL;
+
+ if (page == NULL) {
+ goto out;
+ }
+
+ ioc_inode_lock (page->inode);
+ {
+ inode = page->inode;
+ ret = __ioc_page_destroy (page);
+ }
+ ioc_inode_unlock (inode);
+
+out:
+ return ret;
}
+int32_t
+__ioc_inode_prune (ioc_inode_t *curr, uint64_t *size_pruned,
+ uint64_t size_to_prune, uint32_t index)
+{
+ ioc_page_t *page = NULL, *next = NULL;
+ int32_t ret = 0;
+ ioc_table_t *table = NULL;
+
+ if (curr == NULL) {
+ goto out;
+ }
+
+ table = curr->table;
+
+ list_for_each_entry_safe (page, next, &curr->cache.page_lru, page_lru) {
+ *size_pruned += page->size;
+ ret = __ioc_page_destroy (page);
+
+ if (ret != -1)
+ table->cache_used -= ret;
+
+ gf_msg_trace (table->xl->name, 0,
+ "index = %d && "
+ "table->cache_used = %"PRIu64" && table->"
+ "cache_size = %"PRIu64, index, table->cache_used,
+ table->cache_size);
+
+ if ((*size_pruned) >= size_to_prune)
+ break;
+ }
+
+ if (ioc_empty (&curr->cache)) {
+ list_del_init (&curr->inode_lru);
+ }
+
+out:
+ return 0;
+}
/*
* ioc_prune - prune the cache. we have a limit to the number of pages we
* can have in-memory.
@@ -112,162 +196,151 @@ ioc_page_destroy (ioc_page_t *page)
int32_t
ioc_prune (ioc_table_t *table)
{
- ioc_inode_t *curr = NULL, *next_ioc_inode = NULL;
- ioc_page_t *page = NULL, *next = NULL;
- int32_t ret = -1;
- int32_t index = 0;
- uint64_t size_to_prune = 0;
- uint64_t size_pruned = 0;
-
- ioc_table_lock (table);
- {
- size_to_prune = table->cache_used - table->cache_size;
- /* take out the least recently used inode */
- for (index=0; index < table->max_pri; index++) {
- list_for_each_entry_safe (curr, next_ioc_inode,
- &table->inode_lru[index],
- inode_lru) {
- /* prune page-by-page for this inode, till
- * we reach the equilibrium */
- ioc_inode_lock (curr);
- /* { */
-
- list_for_each_entry_safe (page, next,
- &curr->cache.page_lru,
- page_lru) {
- /* done with all pages, and not
- * reached equilibrium yet??
- * continue with next inode in
- * lru_list */
- size_pruned += page->size;
- ret = ioc_page_destroy (page);
-
- if (ret != -1)
- table->cache_used -= ret;
-
- gf_log (table->xl->name,
- GF_LOG_TRACE,
- "index = %d && table->cache_"
- "used = %"PRIu64" && table->"
- "cache_size = %"PRIu64,
- index, table->cache_used,
- table->cache_size);
-
- if (size_pruned >= size_to_prune)
- break;
- } /* list_for_each_entry_safe(page...) */
- if (ioc_empty (&curr->cache)) {
- list_del_init (&curr->inode_lru);
- }
-
- /* } */
- ioc_inode_unlock (curr);
-
- if (size_pruned >= size_to_prune)
- break;
- } /* list_for_each_entry_safe (curr...) */
-
- if (size_pruned >= size_to_prune)
- break;
- } /* for(index=0;...) */
-
- } /* ioc_inode_table locked region end */
- ioc_table_unlock (table);
-
- return 0;
+ ioc_inode_t *curr = NULL, *next_ioc_inode = NULL;
+ int32_t index = 0;
+ uint64_t size_to_prune = 0;
+ uint64_t size_pruned = 0;
+
+ GF_VALIDATE_OR_GOTO ("io-cache", table, out);
+
+ ioc_table_lock (table);
+ {
+ size_to_prune = table->cache_used - table->cache_size;
+ /* take out the least recently used inode */
+ for (index=0; index < table->max_pri; index++) {
+ list_for_each_entry_safe (curr, next_ioc_inode,
+ &table->inode_lru[index],
+ inode_lru) {
+ /* prune page-by-page for this inode, till
+ * we reach the equilibrium */
+ ioc_inode_lock (curr);
+ {
+ __ioc_inode_prune (curr, &size_pruned,
+ size_to_prune,
+ index);
+ }
+ ioc_inode_unlock (curr);
+
+ if (size_pruned >= size_to_prune)
+ break;
+ } /* list_for_each_entry_safe (curr...) */
+
+ if (size_pruned >= size_to_prune)
+ break;
+ } /* for(index=0;...) */
+
+ } /* ioc_inode_table locked region end */
+ ioc_table_unlock (table);
+
+out:
+ return 0;
}
/*
- * ioc_page_create - create a new page.
+ * __ioc_page_create - create a new page.
*
- * @ioc_inode:
+ * @ioc_inode:
* @offset:
*
*/
ioc_page_t *
-ioc_page_create (ioc_inode_t *ioc_inode, off_t offset)
+__ioc_page_create (ioc_inode_t *ioc_inode, off_t offset)
{
- ioc_table_t *table = NULL;
- ioc_page_t *page = NULL;
- off_t rounded_offset = 0;
- ioc_page_t *newpage = NULL;
-
+ ioc_table_t *table = NULL;
+ ioc_page_t *page = NULL;
+ off_t rounded_offset = 0;
+ ioc_page_t *newpage = NULL;
+
+ GF_VALIDATE_OR_GOTO ("io-cache", ioc_inode, out);
+
table = ioc_inode->table;
+ GF_VALIDATE_OR_GOTO ("io-cache", table, out);
+
rounded_offset = floor (offset, table->page_size);
- newpage = GF_CALLOC (1, sizeof (*newpage),
- gf_ioc_mt_ioc_newpage_t);
+ newpage = GF_CALLOC (1, sizeof (*newpage), gf_ioc_mt_ioc_newpage_t);
if (newpage == NULL) {
goto out;
}
- if (ioc_inode) {
- table = ioc_inode->table;
- } else {
+ if (!ioc_inode) {
GF_FREE (newpage);
newpage = NULL;
goto out;
- }
-
- newpage->offset = rounded_offset;
- newpage->inode = ioc_inode;
- pthread_mutex_init (&newpage->page_lock, NULL);
+ }
+
+ newpage->offset = rounded_offset;
+ newpage->inode = ioc_inode;
+ pthread_mutex_init (&newpage->page_lock, NULL);
rbthash_insert (ioc_inode->cache.page_table, newpage, &rounded_offset,
sizeof (rounded_offset));
-
- list_add_tail (&newpage->page_lru, &ioc_inode->cache.page_lru);
- page = newpage;
+ list_add_tail (&newpage->page_lru, &ioc_inode->cache.page_lru);
+
+ page = newpage;
- gf_log ("io-cache", GF_LOG_TRACE,
- "returning new page %p", page);
+ gf_msg_trace ("io-cache", 0,
+ "returning new page %p", page);
out:
- return page;
+ return page;
}
-/*
- * ioc_wait_on_page - pause a frame to wait till the arrival of a page.
- * here we need to handle the case when the frame who calls wait_on_page
- * himself has caused page_fault
+/*
+ * ioc_wait_on_page - pause a frame to wait till the arrival of a page.
+ * here we need to handle the case when the frame who calls wait_on_page
+ * himself has caused page_fault
*
* @page: page to wait on
* @frame: call frame who is waiting on page
*
*/
void
-ioc_wait_on_page (ioc_page_t *page, call_frame_t *frame, off_t offset,
- size_t size)
+__ioc_wait_on_page (ioc_page_t *page, call_frame_t *frame, off_t offset,
+ size_t size)
{
- ioc_waitq_t *waitq = NULL;
- ioc_local_t *local = frame->local;
+ ioc_waitq_t *waitq = NULL;
+ ioc_local_t *local = NULL;
+
+ GF_VALIDATE_OR_GOTO ("io-cache", frame, out);
+ local = frame->local;
+
+ GF_VALIDATE_OR_GOTO (frame->this->name, local, out);
+
+ if (page == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ gf_msg (frame->this->name, GF_LOG_WARNING,
+ 0, IO_CACHE_MSG_NO_MEMORY,
+ "asked to wait on a NULL page");
+ goto out;
+ }
- waitq = GF_CALLOC (1, sizeof (*waitq), gf_ioc_mt_ioc_waitq_t);
+ waitq = GF_CALLOC (1, sizeof (*waitq), gf_ioc_mt_ioc_waitq_t);
if (waitq == NULL) {
local->op_ret = -1;
local->op_errno = ENOMEM;
- gf_log (frame->this->name, GF_LOG_ERROR, "out of memory");
goto out;
- }
-
- gf_log (frame->this->name, GF_LOG_TRACE,
- "frame(%p) waiting on page = %p, offset=%"PRId64", "
- "size=%"GF_PRI_SIZET"",
- frame, page, offset, size);
-
- waitq->data = frame;
- waitq->next = page->waitq;
- waitq->pending_offset = offset;
- waitq->pending_size = size;
- page->waitq = waitq;
- /* one frame can wait only once on a given page,
- * local->wait_count is number of pages a frame is waiting on */
- ioc_local_lock (local);
- {
- local->wait_count++;
- }
- ioc_local_unlock (local);
+ }
+
+ gf_msg_trace (frame->this->name, 0,
+ "frame(%p) waiting on page = %p, offset=%"PRId64", "
+ "size=%"GF_PRI_SIZET"",
+ frame, page, offset, size);
+
+ waitq->data = frame;
+ waitq->next = page->waitq;
+ waitq->pending_offset = offset;
+ waitq->pending_size = size;
+ page->waitq = waitq;
+ /* one frame can wait only once on a given page,
+ * local->wait_count is number of pages a frame is waiting on */
+ ioc_local_lock (local);
+ {
+ local->wait_count++;
+ }
+ ioc_local_unlock (local);
out:
return;
@@ -275,7 +348,7 @@ out:
/*
- * ioc_cache_still_valid - see if cached pages ioc_inode are still valid
+ * ioc_cache_still_valid - see if cached pages ioc_inode are still valid
* against given stbuf
*
* @ioc_inode:
@@ -286,202 +359,213 @@ out:
int8_t
ioc_cache_still_valid (ioc_inode_t *ioc_inode, struct iatt *stbuf)
{
- int8_t cache_still_valid = 1;
+ int8_t cache_still_valid = 1;
+
+ GF_VALIDATE_OR_GOTO ("io-cache", ioc_inode, out);
#if 0
- if (!stbuf || (stbuf->ia_mtime != ioc_inode->cache.mtime) ||
- (stbuf->st_mtim.tv_nsec != ioc_inode->stbuf.st_mtim.tv_nsec))
- cache_still_valid = 0;
+ if (!stbuf || (stbuf->ia_mtime != ioc_inode->cache.mtime) ||
+ (stbuf->st_mtim.tv_nsec != ioc_inode->stbuf.st_mtim.tv_nsec))
+ cache_still_valid = 0;
#else
- if (!stbuf || (stbuf->ia_mtime != ioc_inode->cache.mtime)
+ if (!stbuf || (stbuf->ia_mtime != ioc_inode->cache.mtime)
|| (stbuf->ia_mtime_nsec != ioc_inode->cache.mtime_nsec))
- cache_still_valid = 0;
+ cache_still_valid = 0;
#endif
#if 0
- /* talk with avati@gluster.com to enable this section */
- if (!ioc_inode->mtime && stbuf) {
- cache_still_valid = 1;
- ioc_inode->mtime = stbuf->ia_mtime;
- }
+ /* talk with avati@gluster.com to enable this section */
+ if (!ioc_inode->mtime && stbuf) {
+ cache_still_valid = 1;
+ ioc_inode->mtime = stbuf->ia_mtime;
+ }
#endif
- return cache_still_valid;
+out:
+ return cache_still_valid;
}
void
ioc_waitq_return (ioc_waitq_t *waitq)
{
- ioc_waitq_t *trav = NULL;
- ioc_waitq_t *next = NULL;
- call_frame_t *frame = NULL;
+ ioc_waitq_t *trav = NULL;
+ ioc_waitq_t *next = NULL;
+ call_frame_t *frame = NULL;
- for (trav = waitq; trav; trav = next) {
- next = trav->next;
+ for (trav = waitq; trav; trav = next) {
+ next = trav->next;
- frame = trav->data;
- ioc_frame_return (frame);
- GF_FREE (trav);
- }
+ frame = trav->data;
+ ioc_frame_return (frame);
+ GF_FREE (trav);
+ }
}
int
ioc_fault_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *stbuf, struct iobref *iobref)
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
{
- ioc_local_t *local = NULL;
- off_t offset = 0;
- ioc_inode_t *ioc_inode = NULL;
- ioc_table_t *table = NULL;
- ioc_page_t *page = NULL;
- off_t trav_offset = 0;
- size_t payload_size = 0;
- int32_t destroy_size = 0;
- size_t page_size = 0;
- ioc_waitq_t *waitq = NULL;
- size_t iobref_page_size = 0;
- char zero_filled = 0;
+ ioc_local_t *local = NULL;
+ off_t offset = 0;
+ ioc_inode_t *ioc_inode = NULL;
+ ioc_table_t *table = NULL;
+ ioc_page_t *page = NULL;
+ int32_t destroy_size = 0;
+ size_t page_size = 0;
+ ioc_waitq_t *waitq = NULL;
+ size_t iobref_page_size = 0;
+ char zero_filled = 0;
+
+ GF_ASSERT (frame);
local = frame->local;
+ GF_ASSERT (local);
+
offset = local->pending_offset;
ioc_inode = local->inode;
+ GF_ASSERT (ioc_inode);
+
table = ioc_inode->table;
+ GF_ASSERT (table);
+
+ zero_filled = ((op_ret >=0) && (stbuf->ia_mtime == 0));
+
+ ioc_inode_lock (ioc_inode);
+ {
+ if (op_ret == -1 || !(zero_filled ||
+ ioc_cache_still_valid(ioc_inode,
+ stbuf))) {
+ gf_msg_trace (ioc_inode->table->xl->name, 0,
+ "cache for inode(%p) is invalid. flushing "
+ "all pages", ioc_inode);
+ destroy_size = __ioc_inode_flush (ioc_inode);
+ }
- trav_offset = offset;
- payload_size = op_ret;
-
- zero_filled = ((op_ret >=0)
- && (stbuf->ia_mtime == 0));
-
- ioc_inode_lock (ioc_inode);
- {
- if (op_ret == -1 ||
- !(zero_filled ||
- ioc_cache_still_valid(ioc_inode, stbuf))) {
- gf_log (ioc_inode->table->xl->name, GF_LOG_TRACE,
- "cache for inode(%p) is invalid. flushing "
- "all pages", ioc_inode);
- destroy_size = __ioc_inode_flush (ioc_inode);
- }
-
- if ((op_ret >= 0) && !zero_filled) {
- ioc_inode->cache.mtime = stbuf->ia_mtime;
+ if ((op_ret >= 0) && !zero_filled) {
+ ioc_inode->cache.mtime = stbuf->ia_mtime;
ioc_inode->cache.mtime_nsec = stbuf->ia_mtime_nsec;
}
- gettimeofday (&ioc_inode->cache.tv, NULL);
-
- if (op_ret < 0) {
- /* error, readv returned -1 */
- page = ioc_page_get (ioc_inode, offset);
- if (page)
- waitq = ioc_page_error (page, op_ret,
- op_errno);
- } else {
- gf_log (ioc_inode->table->xl->name, GF_LOG_TRACE,
- "op_ret = %d", op_ret);
- page = ioc_page_get (ioc_inode, offset);
- if (!page) {
- /* page was flushed */
- /* some serious bug ? */
- gf_log (this->name, GF_LOG_DEBUG,
- "wasted copy: %"PRId64"[+%"PRId64"] "
- "ioc_inode=%p", offset,
- table->page_size, ioc_inode);
- } else {
- if (page->vector) {
- iobref_unref (page->iobref);
- GF_FREE (page->vector);
- page->vector = NULL;
- }
-
- /* keep a copy of the page for our cache */
- page->vector = iov_dup (vector, count);
+ gettimeofday (&ioc_inode->cache.tv, NULL);
+
+ if (op_ret < 0) {
+ /* error, readv returned -1 */
+ page = __ioc_page_get (ioc_inode, offset);
+ if (page)
+ waitq = __ioc_page_error (page, op_ret,
+ op_errno);
+ } else {
+ gf_msg_trace (ioc_inode->table->xl->name, 0,
+ "op_ret = %d", op_ret);
+ page = __ioc_page_get (ioc_inode, offset);
+ if (!page) {
+ /* page was flushed */
+ /* some serious bug ? */
+ gf_msg (frame->this->name, GF_LOG_WARNING, 0,
+ IO_CACHE_MSG_WASTED_COPY,
+ "wasted copy: %"PRId64"[+%"PRId64"] "
+ "ioc_inode=%p", offset,
+ table->page_size, ioc_inode);
+ } else {
+ if (page->vector) {
+ iobref_unref (page->iobref);
+ GF_FREE (page->vector);
+ page->vector = NULL;
+ page->iobref = NULL;
+ }
+
+ /* keep a copy of the page for our cache */
+ page->vector = iov_dup (vector, count);
if (page->vector == NULL) {
- page = ioc_page_get (ioc_inode, offset);
+ page = __ioc_page_get (ioc_inode,
+ offset);
if (page != NULL)
- waitq = ioc_page_error (page,
- -1,
- ENOMEM);
- op_ret = -1;
- op_errno = ENOMEM;
+ waitq = __ioc_page_error (page,
+ -1,
+ ENOMEM);
goto unlock;
}
- page->count = count;
- if (iobref) {
- page->iobref = iobref_ref (iobref);
- } else {
- /* TODO: we have got a response to
- * our request and no data */
- gf_log (this->name, GF_LOG_CRITICAL,
- "frame>root>rsp_refs is null");
- } /* if(frame->root->rsp_refs) */
-
- /* page->size should indicate exactly how
- * much the readv call to the child
- * translator returned. earlier op_ret
- * from child translator was used, which
- * gave rise to a bug where reads from
- * io-cached volume were resulting in 0
- * byte replies */
- page_size = iov_length(vector, count);
-
- page->size = page_size;
+ page->count = count;
+ if (iobref) {
+ page->iobref = iobref_ref (iobref);
+ } else {
+ /* TODO: we have got a response to
+ * our request and no data */
+ gf_msg (frame->this->name,
+ GF_LOG_CRITICAL,
+ ENOMEM, IO_CACHE_MSG_NO_MEMORY,
+ "frame>root>rsp_refs is null");
+ } /* if(frame->root->rsp_refs) */
+
+ /* page->size should indicate exactly how
+ * much the readv call to the child
+ * translator returned. earlier op_ret
+ * from child translator was used, which
+ * gave rise to a bug where reads from
+ * io-cached volume were resulting in 0
+ * byte replies */
+ page_size = iov_length(vector, count);
+ page->size = page_size;
+ page->op_errno = op_errno;
iobref_page_size = iobref_size (page->iobref);
- if (page->waitq) {
- /* wake up all the frames waiting on
- * this page, including
- * the frame which triggered fault */
- waitq = ioc_page_wakeup (page);
- } /* if(page->waitq) */
- } /* if(!page)...else */
- } /* if(op_ret < 0)...else */
- } /* ioc_inode locked region end */
+ if (page->waitq) {
+ /* wake up all the frames waiting on
+ * this page, including
+ * the frame which triggered fault */
+ waitq = __ioc_page_wakeup (page,
+ op_errno);
+ } /* if(page->waitq) */
+ } /* if(!page)...else */
+ } /* if(op_ret < 0)...else */
+ } /* ioc_inode locked region end */
unlock:
- ioc_inode_unlock (ioc_inode);
+ ioc_inode_unlock (ioc_inode);
- ioc_waitq_return (waitq);
+ ioc_waitq_return (waitq);
- if (iobref_page_size) {
- ioc_table_lock (table);
- {
- table->cache_used += iobref_page_size;
- }
- ioc_table_unlock (table);
- }
+ if (iobref_page_size) {
+ ioc_table_lock (table);
+ {
+ table->cache_used += iobref_page_size;
+ }
+ ioc_table_unlock (table);
+ }
- if (destroy_size) {
- ioc_table_lock (table);
- {
- table->cache_used -= destroy_size;
- }
- ioc_table_unlock (table);
- }
+ if (destroy_size) {
+ ioc_table_lock (table);
+ {
+ table->cache_used -= destroy_size;
+ }
+ ioc_table_unlock (table);
+ }
- if (ioc_need_prune (ioc_inode->table)) {
- ioc_prune (ioc_inode->table);
- }
+ if (ioc_need_prune (ioc_inode->table)) {
+ ioc_prune (ioc_inode->table);
+ }
- gf_log (this->name, GF_LOG_TRACE, "fault frame %p returned", frame);
- pthread_mutex_destroy (&local->local_lock);
+ gf_msg_trace (frame->this->name, 0, "fault frame %p returned",
+ frame);
+ pthread_mutex_destroy (&local->local_lock);
- fd_unref (local->fd);
+ fd_unref (local->fd);
- STACK_DESTROY (frame->root);
- return 0;
+ STACK_DESTROY (frame->root);
+ return 0;
}
+
/*
* ioc_page_fault -
- *
+ *
* @ioc_inode:
* @frame:
* @fd:
@@ -489,145 +573,171 @@ unlock:
*
*/
void
-ioc_page_fault (ioc_inode_t *ioc_inode, call_frame_t *frame, fd_t *fd,
- off_t offset)
+ioc_page_fault (ioc_inode_t *ioc_inode, call_frame_t *frame, fd_t *fd,
+ off_t offset)
{
- ioc_table_t *table = NULL;
- call_frame_t *fault_frame = NULL;
- ioc_local_t *fault_local = NULL;
- int32_t op_ret = -1, op_errno = -1;
- ioc_waitq_t *waitq = NULL;
- ioc_page_t *page = NULL;
+ ioc_table_t *table = NULL;
+ call_frame_t *fault_frame = NULL;
+ ioc_local_t *fault_local = NULL;
+ int32_t op_ret = -1, op_errno = -1;
+ ioc_waitq_t *waitq = NULL;
+ ioc_page_t *page = NULL;
+
+ GF_ASSERT (ioc_inode);
+ if (frame == NULL) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ gf_msg ("io-cache", GF_LOG_WARNING,
+ EINVAL, IO_CACHE_MSG_ENFORCEMENT_FAILED,
+ "page fault on a NULL frame");
+ goto err;
+ }
table = ioc_inode->table;
fault_frame = copy_frame (frame);
if (fault_frame == NULL) {
op_ret = -1;
op_errno = ENOMEM;
- gf_log (ioc_inode->table->xl->name, GF_LOG_ERROR,
- "out of memory");
goto err;
}
- fault_local = GF_CALLOC (1, sizeof (ioc_local_t),
- gf_ioc_mt_ioc_local_t);
+ fault_local = mem_get0 (THIS->local_pool);
if (fault_local == NULL) {
op_ret = -1;
op_errno = ENOMEM;
STACK_DESTROY (fault_frame->root);
- gf_log (ioc_inode->table->xl->name, GF_LOG_ERROR,
- "out of memory");
goto err;
}
- /* NOTE: copy_frame() means, the frame the fop whose fd_ref we
- * are using till now won't be valid till we get reply from server.
- * we unref this fd, in fault_cbk */
- fault_local->fd = fd_ref (fd);
+ /* NOTE: copy_frame() means, the frame the fop whose fd_ref we
+ * are using till now won't be valid till we get reply from server.
+ * we unref this fd, in fault_cbk */
+ fault_local->fd = fd_ref (fd);
+
+ fault_frame->local = fault_local;
+ pthread_mutex_init (&fault_local->local_lock, NULL);
- fault_frame->local = fault_local;
- pthread_mutex_init (&fault_local->local_lock, NULL);
+ INIT_LIST_HEAD (&fault_local->fill_list);
+ fault_local->pending_offset = offset;
+ fault_local->pending_size = table->page_size;
+ fault_local->inode = ioc_inode;
- INIT_LIST_HEAD (&fault_local->fill_list);
- fault_local->pending_offset = offset;
- fault_local->pending_size = table->page_size;
- fault_local->inode = ioc_inode;
+ gf_msg_trace (frame->this->name, 0,
+ "stack winding page fault for offset = %"PRId64" with "
+ "frame %p", offset, fault_frame);
- gf_log (frame->this->name, GF_LOG_TRACE,
- "stack winding page fault for offset = %"PRId64" with "
- "frame %p", offset, fault_frame);
-
- STACK_WIND (fault_frame, ioc_fault_cbk, FIRST_CHILD(fault_frame->this),
- FIRST_CHILD(fault_frame->this)->fops->readv, fd,
- table->page_size, offset);
- return;
+ STACK_WIND (fault_frame, ioc_fault_cbk, FIRST_CHILD(fault_frame->this),
+ FIRST_CHILD(fault_frame->this)->fops->readv, fd,
+ table->page_size, offset, 0, NULL);
+ return;
err:
- page = ioc_page_get (ioc_inode, offset);
- if (page != NULL) {
- waitq = ioc_page_error (page, op_ret, op_errno);
- if (waitq != NULL) {
- ioc_waitq_return (waitq);
+ ioc_inode_lock (ioc_inode);
+ {
+ page = __ioc_page_get (ioc_inode, offset);
+ if (page != NULL) {
+ waitq = __ioc_page_error (page, op_ret, op_errno);
}
}
+ ioc_inode_unlock (ioc_inode);
+
+ if (waitq != NULL) {
+ ioc_waitq_return (waitq);
+ }
}
+
int32_t
-ioc_frame_fill (ioc_page_t *page, call_frame_t *frame, off_t offset,
- size_t size)
+__ioc_frame_fill (ioc_page_t *page, call_frame_t *frame, off_t offset,
+ size_t size, int32_t op_errno)
{
- ioc_local_t *local = NULL;
- ioc_fill_t *fill = NULL;
- off_t src_offset = 0;
- off_t dst_offset = 0;
- ssize_t copy_size = 0;
- ioc_inode_t *ioc_inode = NULL;
- ioc_fill_t *new = NULL;
- int8_t found = 0;
- int32_t ret = 0;
-
+ ioc_local_t *local = NULL;
+ ioc_fill_t *fill = NULL;
+ off_t src_offset = 0;
+ off_t dst_offset = 0;
+ ssize_t copy_size = 0;
+ ioc_inode_t *ioc_inode = NULL;
+ ioc_fill_t *new = NULL;
+ int8_t found = 0;
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("io-cache", frame, out);
+
local = frame->local;
+ GF_VALIDATE_OR_GOTO (frame->this->name, local, out);
+
+ if (page == NULL) {
+ gf_msg (frame->this->name, GF_LOG_WARNING, 0,
+ IO_CACHE_MSG_ENFORCEMENT_FAILED,
+ "NULL page has been provided to serve read request");
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+ goto out;
+ }
+
ioc_inode = page->inode;
- gf_log (frame->this->name, GF_LOG_TRACE,
- "frame (%p) offset = %"PRId64" && size = %"GF_PRI_SIZET" "
- "&& page->size = %"GF_PRI_SIZET" && wait_count = %d",
- frame, offset, size, page->size, local->wait_count);
-
- /* immediately move this page to the end of the page_lru list */
- list_move_tail (&page->page_lru, &ioc_inode->cache.page_lru);
- /* fill local->pending_size bytes from local->pending_offset */
- if (local->op_ret != -1 && page->size) {
- if (offset > page->offset)
- /* offset is offset in file, convert it to offset in
- * page */
- src_offset = offset - page->offset;
- /*FIXME: since offset is the offset within page is the
- * else case valid? */
- else
- /* local->pending_offset is in previous page. do not
- * fill until we have filled all previous pages */
- dst_offset = page->offset - offset;
-
- /* we have to copy from offset to either end of this page
- * or till the requested size */
- copy_size = min (page->size - src_offset,
- size - dst_offset);
-
- if (copy_size < 0) {
- /* if page contains fewer bytes and the required offset
- is beyond the page size in the page */
- copy_size = src_offset = 0;
- }
-
- gf_log (page->inode->table->xl->name, GF_LOG_TRACE,
- "copy_size = %"GF_PRI_SIZET" && src_offset = "
- "%"PRId64" && dst_offset = %"PRId64"",
- copy_size, src_offset, dst_offset);
-
- {
- new = GF_CALLOC (1, sizeof (*new),
+ gf_msg_trace (frame->this->name, 0,
+ "frame (%p) offset = %"PRId64" && size = %"GF_PRI_SIZET" "
+ "&& page->size = %"GF_PRI_SIZET" && wait_count = %d",
+ frame, offset, size, page->size, local->wait_count);
+
+ /* immediately move this page to the end of the page_lru list */
+ list_move_tail (&page->page_lru, &ioc_inode->cache.page_lru);
+ /* fill local->pending_size bytes from local->pending_offset */
+ if (local->op_ret != -1) {
+ local->op_errno = op_errno;
+
+ if (page->size == 0) {
+ goto done;
+ }
+
+ if (offset > page->offset)
+ /* offset is offset in file, convert it to offset in
+ * page */
+ src_offset = offset - page->offset;
+ /*FIXME: since offset is the offset within page is the
+ * else case valid? */
+ else
+ /* local->pending_offset is in previous page. do not
+ * fill until we have filled all previous pages */
+ dst_offset = page->offset - offset;
+
+ /* we have to copy from offset to either end of this page
+ * or till the requested size */
+ copy_size = min (page->size - src_offset,
+ size - dst_offset);
+
+ if (copy_size < 0) {
+ /* if page contains fewer bytes and the required offset
+ is beyond the page size in the page */
+ copy_size = src_offset = 0;
+ }
+
+ gf_msg_trace (page->inode->table->xl->name, 0,
+ "copy_size = %"GF_PRI_SIZET" && src_offset = "
+ "%"PRId64" && dst_offset = %"PRId64"",
+ copy_size, src_offset, dst_offset);
+
+ {
+ new = GF_CALLOC (1, sizeof (*new),
gf_ioc_mt_ioc_fill_t);
if (new == NULL) {
local->op_ret = -1;
local->op_errno = ENOMEM;
- ret = -1;
- gf_log (page->inode->table->xl->name,
- GF_LOG_ERROR, "out of memory");
goto out;
}
- new->offset = page->offset;
- new->size = copy_size;
- new->iobref = iobref_ref (page->iobref);
- new->count = iov_subset (page->vector,
- page->count,
- src_offset,
- src_offset + copy_size,
- NULL);
-
- new->vector = GF_CALLOC (new->count,
- sizeof (struct iovec),
+ new->offset = page->offset;
+ new->size = copy_size;
+ new->iobref = iobref_ref (page->iobref);
+ new->count = iov_subset (page->vector, page->count,
+ src_offset,
+ src_offset + copy_size,
+ NULL);
+
+ new->vector = GF_CALLOC (new->count,
+ sizeof (struct iovec),
gf_ioc_mt_iovec);
if (new->vector == NULL) {
local->op_ret = -1;
@@ -635,58 +745,53 @@ ioc_frame_fill (ioc_page_t *page, call_frame_t *frame, off_t offset,
iobref_unref (new->iobref);
GF_FREE (new);
-
- ret = -1;
- gf_log (page->inode->table->xl->name,
- GF_LOG_ERROR, "out of memory");
goto out;
}
- new->count = iov_subset (page->vector,
- page->count,
- src_offset,
- src_offset + copy_size,
- new->vector);
-
+ new->count = iov_subset (page->vector, page->count,
+ src_offset,
+ src_offset + copy_size,
+ new->vector);
+
+ /* add the ioc_fill to fill_list for this frame */
+ if (list_empty (&local->fill_list)) {
+ /* if list is empty, then this is the first
+ * time we are filling frame, add the
+ * ioc_fill_t to the end of list */
+ list_add_tail (&new->list, &local->fill_list);
+ } else {
+ found = 0;
+ /* list is not empty, we need to look for
+ * where this offset fits in list */
+ list_for_each_entry (fill, &local->fill_list,
+ list) {
+ if (fill->offset > new->offset) {
+ found = 1;
+ break;
+ }
+ }
+ if (found) {
+ list_add_tail (&new->list,
+ &fill->list);
+ } else {
+ list_add_tail (&new->list,
+ &local->fill_list);
+ }
+ }
+ }
- /* add the ioc_fill to fill_list for this frame */
- if (list_empty (&local->fill_list)) {
- /* if list is empty, then this is the first
- * time we are filling frame, add the
- * ioc_fill_t to the end of list */
- list_add_tail (&new->list, &local->fill_list);
- } else {
- found = 0;
- /* list is not empty, we need to look for
- * where this offset fits in list */
- list_for_each_entry (fill, &local->fill_list,
- list) {
- if (fill->offset > new->offset) {
- found = 1;
- break;
- }
- }
-
- if (found) {
- found = 0;
- list_add_tail (&new->list,
- &fill->list);
- } else {
- list_add_tail (&new->list,
- &local->fill_list);
- }
- }
- }
- local->op_ret += copy_size;
- }
+ local->op_ret += copy_size;
+ }
+done:
+ ret = 0;
out:
return ret;
}
/*
- * ioc_frame_unwind - frame unwinds only from here
+ * ioc_frame_unwind - frame unwinds only from here
*
* @frame: call frame to unwind
*
@@ -697,84 +802,102 @@ out:
static void
ioc_frame_unwind (call_frame_t *frame)
{
- ioc_local_t *local = NULL;
- ioc_fill_t *fill = NULL, *next = NULL;
- int32_t count = 0;
- struct iovec *vector = NULL;
- int32_t copied = 0;
- struct iobref *iobref = NULL;
- struct iatt stbuf = {0,};
- int32_t op_ret = 0, op_errno = 0;
+ ioc_local_t *local = NULL;
+ ioc_fill_t *fill = NULL, *next = NULL;
+ int32_t count = 0;
+ struct iovec *vector = NULL;
+ int32_t copied = 0;
+ struct iobref *iobref = NULL;
+ struct iatt stbuf = {0,};
+ int32_t op_ret = 0, op_errno = 0;
+
+ GF_ASSERT (frame);
local = frame->local;
- // ioc_local_lock (local);
- frame->local = NULL;
- iobref = iobref_new ();
+ if (local == NULL) {
+ gf_msg (frame->this->name, GF_LOG_WARNING, ENOMEM,
+ IO_CACHE_MSG_NO_MEMORY, "local is NULL");
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ if (local->op_ret < 0) {
+ op_ret = local->op_ret;
+ op_errno = local->op_errno;
+ goto unwind;
+ }
+
+ // ioc_local_lock (local);
+ iobref = iobref_new ();
if (iobref == NULL) {
op_ret = -1;
op_errno = ENOMEM;
- gf_log (frame->this->name, GF_LOG_ERROR, "out of memory");
}
- if (list_empty (&local->fill_list)) {
- gf_log (frame->this->name, GF_LOG_TRACE,
- "frame(%p) has 0 entries in local->fill_list "
- "(offset = %"PRId64" && size = %"GF_PRI_SIZET")",
- frame, local->offset, local->size);
- }
+ if (list_empty (&local->fill_list)) {
+ gf_msg_trace (frame->this->name, 0,
+ "frame(%p) has 0 entries in local->fill_list "
+ "(offset = %"PRId64" && size = %"GF_PRI_SIZET")",
+ frame, local->offset, local->size);
+ }
- list_for_each_entry (fill, &local->fill_list, list) {
- count += fill->count;
- }
+ list_for_each_entry (fill, &local->fill_list, list) {
+ count += fill->count;
+ }
- vector = GF_CALLOC (count, sizeof (*vector), gf_ioc_mt_iovec);
+ vector = GF_CALLOC (count, sizeof (*vector), gf_ioc_mt_iovec);
if (vector == NULL) {
op_ret = -1;
op_errno = ENOMEM;
-
- gf_log (frame->this->name, GF_LOG_ERROR, "out of memory");
}
-
- list_for_each_entry_safe (fill, next, &local->fill_list, list) {
- if ((vector != NULL) && (iobref != NULL)) {
+
+ list_for_each_entry_safe (fill, next, &local->fill_list, list) {
+ if ((vector != NULL) && (iobref != NULL)) {
memcpy (((char *)vector) + copied,
fill->vector,
fill->count * sizeof (*vector));
-
+
copied += (fill->count * sizeof (*vector));
- iobref_merge (iobref, fill->iobref);
+ if (iobref_merge (iobref, fill->iobref)) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ }
}
- list_del (&fill->list);
- iobref_unref (fill->iobref);
- GF_FREE (fill->vector);
- GF_FREE (fill);
- }
-
+ list_del (&fill->list);
+ iobref_unref (fill->iobref);
+ GF_FREE (fill->vector);
+ GF_FREE (fill);
+ }
+
if (op_ret != -1) {
op_ret = iov_length (vector, count);
}
- gf_log (frame->this->name, GF_LOG_TRACE,
- "frame(%p) unwinding with op_ret=%d", frame, op_ret);
+unwind:
+ gf_msg_trace (frame->this->name, 0,
+ "frame(%p) unwinding with op_ret=%d", frame, op_ret);
- // ioc_local_unlock (local);
+ // ioc_local_unlock (local);
- STACK_UNWIND_STRICT (readv, frame, op_ret, local->op_errno, vector,
- count, &stbuf, iobref);
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector,
+ count, &stbuf, iobref, NULL);
if (iobref != NULL) {
iobref_unref (iobref);
}
-
+
if (vector != NULL) {
GF_FREE (vector);
vector = NULL;
}
-
- pthread_mutex_destroy (&local->local_lock);
- GF_FREE (local);
+
+ pthread_mutex_destroy (&local->local_lock);
+ if (local)
+ mem_put (local);
return;
}
@@ -788,60 +911,119 @@ ioc_frame_unwind (call_frame_t *frame)
void
ioc_frame_return (call_frame_t *frame)
{
- ioc_local_t *local = NULL;
- int32_t wait_count = 0;
+ ioc_local_t *local = NULL;
+ int32_t wait_count = 0;
+
+ GF_ASSERT (frame);
local = frame->local;
- assert (local->wait_count > 0);
+ GF_ASSERT (local->wait_count > 0);
- ioc_local_lock (local);
- {
- wait_count = --local->wait_count;
- }
- ioc_local_unlock (local);
+ ioc_local_lock (local);
+ {
+ wait_count = --local->wait_count;
+ }
+ ioc_local_unlock (local);
- if (!wait_count) {
- ioc_frame_unwind (frame);
- }
+ if (!wait_count) {
+ ioc_frame_unwind (frame);
+ }
- return;
+ return;
}
-/*
+/*
* ioc_page_wakeup -
* @page:
*
* to be called only when a frame is waiting on an in-transit page
*/
ioc_waitq_t *
-ioc_page_wakeup (ioc_page_t *page)
+__ioc_page_wakeup (ioc_page_t *page, int32_t op_errno)
{
- ioc_waitq_t *waitq = NULL, *trav = NULL;
- call_frame_t *frame = NULL;
- int32_t ret = -1;
-
- waitq = page->waitq;
- page->waitq = NULL;
-
- trav = waitq;
- page->ready = 1;
-
- gf_log (page->inode->table->xl->name, GF_LOG_TRACE,
- "page is %p && waitq = %p", page, waitq);
-
- for (trav = waitq; trav; trav = trav->next) {
- frame = trav->data;
- ret = ioc_frame_fill (page, frame, trav->pending_offset,
- trav->pending_size);
+ ioc_waitq_t *waitq = NULL, *trav = NULL;
+ call_frame_t *frame = NULL;
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("io-cache", page, out);
+
+ waitq = page->waitq;
+ page->waitq = NULL;
+
+ page->ready = 1;
+
+ gf_msg_trace (page->inode->table->xl->name, 0,
+ "page is %p && waitq = %p", page, waitq);
+
+ for (trav = waitq; trav; trav = trav->next) {
+ frame = trav->data;
+ ret = __ioc_frame_fill (page, frame, trav->pending_offset,
+ trav->pending_size, op_errno);
if (ret == -1) {
break;
}
- }
-
- return waitq;
+ }
+
+ if (page->stale) {
+ __ioc_page_destroy (page);
+ }
+
+out:
+ return waitq;
}
+
+/*
+ * ioc_page_error -
+ * @page:
+ * @op_ret:
+ * @op_errno:
+ *
+ */
+ioc_waitq_t *
+__ioc_page_error (ioc_page_t *page, int32_t op_ret, int32_t op_errno)
+{
+ ioc_waitq_t *waitq = NULL, *trav = NULL;
+ call_frame_t *frame = NULL;
+ int64_t ret = 0;
+ ioc_table_t *table = NULL;
+ ioc_local_t *local = NULL;
+
+ GF_VALIDATE_OR_GOTO ("io-cache", page, out);
+
+ waitq = page->waitq;
+ page->waitq = NULL;
+
+ gf_msg_debug (page->inode->table->xl->name, 0,
+ "page error for page = %p & waitq = %p", page, waitq);
+
+ for (trav = waitq; trav; trav = trav->next) {
+
+ frame = trav->data;
+
+ local = frame->local;
+ ioc_local_lock (local);
+ {
+ if (local->op_ret != -1) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
+ }
+ ioc_local_unlock (local);
+ }
+
+ table = page->inode->table;
+ ret = __ioc_page_destroy (page);
+
+ if (ret != -1) {
+ table->cache_used -= ret;
+ }
+
+out:
+ return waitq;
+}
+
/*
* ioc_page_error -
* @page:
@@ -852,39 +1034,20 @@ ioc_page_wakeup (ioc_page_t *page)
ioc_waitq_t *
ioc_page_error (ioc_page_t *page, int32_t op_ret, int32_t op_errno)
{
- ioc_waitq_t *waitq = NULL, *trav = NULL;
- call_frame_t *frame = NULL;
- int64_t ret = 0;
- ioc_table_t *table = NULL;
- ioc_local_t *local = NULL;
-
- waitq = page->waitq;
- page->waitq = NULL;
-
- gf_log (page->inode->table->xl->name, GF_LOG_DEBUG,
- "page error for page = %p & waitq = %p", page, waitq);
-
- for (trav = waitq; trav; trav = trav->next) {
-
- frame = trav->data;
-
- local = frame->local;
- ioc_local_lock (local);
- {
- if (local->op_ret != -1) {
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- }
- }
- ioc_local_unlock (local);
- }
+ ioc_waitq_t *waitq = NULL;
+ struct ioc_inode *inode = NULL;
- table = page->inode->table;
- ret = ioc_page_destroy (page);
+ if (page == NULL) {
+ goto out;
+ }
- if (ret != -1) {
- table->cache_used -= ret;
- }
+ ioc_inode_lock (page->inode);
+ {
+ inode = page->inode;
+ waitq = __ioc_page_error (page, op_ret, op_errno);
+ }
+ ioc_inode_unlock (inode);
- return waitq;
+out:
+ return waitq;
}
diff --git a/xlators/performance/io-threads/src/Makefile.am b/xlators/performance/io-threads/src/Makefile.am
index 72f9a801287..1d09eace2ed 100644
--- a/xlators/performance/io-threads/src/Makefile.am
+++ b/xlators/performance/io-threads/src/Makefile.am
@@ -1,14 +1,15 @@
xlator_LTLIBRARIES = io-threads.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
-io_threads_la_LDFLAGS = -module -avoidversion
+io_threads_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
io_threads_la_SOURCES = io-threads.c
io_threads_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = io-threads.h iot-mem-types.h
+noinst_HEADERS = io-threads.h iot-mem-types.h io-threads-messages.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/performance/io-threads/src/io-threads-messages.h b/xlators/performance/io-threads/src/io-threads-messages.h
new file mode 100644
index 00000000000..ab1f672756b
--- /dev/null
+++ b/xlators/performance/io-threads/src/io-threads-messages.h
@@ -0,0 +1,103 @@
+/*Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _IO_THREADS_MESSAGES_H_
+#define _IO_THREADS_MESSAGES_H_
+
+#include "glfs-message-id.h"
+
+/*! \file io-threads-messages.h
+ * \brief IO_THREADS log-message IDs and their descriptions
+ *
+ */
+
+/* NOTE: Rules for message additions
+ * 1) Each instance of a message is _better_ left with a unique message ID, even
+ * if the message format is the same. Reasoning is that, if the message
+ * format needs to change in one instance, the other instances are not
+ * impacted or the new change does not change the ID of the instance being
+ * modified.
+ * 2) Addition of a message,
+ * - Should increment the GLFS_NUM_MESSAGES
+ * - Append to the list of messages defined, towards the end
+ * - Retain macro naming as glfs_msg_X (for redability across developers)
+ * NOTE: Rules for message format modifications
+ * 3) Check acorss the code if the message ID macro in question is reused
+ * anywhere. If reused then then the modifications should ensure correctness
+ * everywhere, or needs a new message ID as (1) above was not adhered to. If
+ * not used anywhere, proceed with the required modification.
+ * NOTE: Rules for message deletion
+ * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used
+ * anywhere, then can be deleted, but will leave a hole by design, as
+ * addition rules specify modification to the end of the list and not filling
+ * holes.
+ */
+
+#define GLFS_IO_THREADS_BASE GLFS_MSGID_COMP_IO_THREADS
+#define GLFS_IO_THREADS_NUM_MESSAGES 5
+#define GLFS_MSGID_END (GLFS_IO_THREADS_BASE + \
+ GLFS_IO_THREADS_NUM_MESSAGES + 1)
+
+/* Messages with message IDs */
+#define glfs_msg_start_x GLFS_IO_THREADS_BASE, "Invalid: Start of messages"
+
+
+
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define IO_THREADS_MSG_INIT_FAILED (GLFS_IO_THREADS_BASE + 1)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define IO_THREADS_MSG_XLATOR_CHILD_MISCONFIGURED (GLFS_IO_THREADS_BASE + 2)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define IO_THREADS_MSG_NO_MEMORY (GLFS_IO_THREADS_BASE + 3)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define IO_THREADS_MSG_VOL_MISCONFIGURED (GLFS_IO_THREADS_BASE + 4)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define IO_THREADS_MSG_SIZE_NOT_SET (GLFS_IO_THREADS_BASE + 5)
+
+
+/*------------*/
+#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
+
+
+#endif /* _IO_THREADS_MESSAGES_H_ */
diff --git a/xlators/performance/io-threads/src/io-threads.c b/xlators/performance/io-threads/src/io-threads.c
index 7f265d1dd26..c6a18fdc0b3 100644
--- a/xlators/performance/io-threads/src/io-threads.c
+++ b/xlators/performance/io-threads/src/io-threads.c
@@ -1,28 +1,15 @@
/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "call-stub.h"
+#include "defaults.h"
#include "glusterfs.h"
#include "logging.h"
#include "dict.h"
@@ -32,35 +19,140 @@
#include <sys/time.h>
#include <time.h>
#include "locking.h"
+#include "io-threads-messages.h"
void *iot_worker (void *arg);
int iot_workers_scale (iot_conf_t *conf);
int __iot_workers_scale (iot_conf_t *conf);
+struct volume_options options[];
+
+#define IOT_FOP(name, frame, this, args ...) \
+ do { \
+ call_stub_t *__stub = NULL; \
+ int __ret = -1; \
+ \
+ __stub = fop_##name##_stub(frame, default_##name##_resume, args); \
+ if (!__stub) { \
+ __ret = -ENOMEM; \
+ goto out; \
+ } \
+ \
+ __ret = iot_schedule (frame, this, __stub); \
+ \
+ out: \
+ if (__ret < 0) { \
+ default_##name##_failure_cbk (frame, -__ret); \
+ if (__stub != NULL) { \
+ call_stub_destroy (__stub); \
+ } \
+ } \
+ } while (0)
+
+iot_client_ctx_t *
+iot_get_ctx (xlator_t *this, client_t *client)
+{
+ iot_client_ctx_t *ctx = NULL;
+ int i;
+
+ if (client_ctx_get (client, this, (void **)&ctx) != 0) {
+ ctx = GF_CALLOC (IOT_PRI_MAX, sizeof(*ctx),
+ gf_iot_mt_client_ctx_t);
+ if (ctx) {
+ for (i = 0; i < IOT_PRI_MAX; ++i) {
+ INIT_LIST_HEAD (&ctx[i].clients);
+ INIT_LIST_HEAD (&ctx[i].reqs);
+ }
+ if (client_ctx_set (client, this, ctx) != 0) {
+ GF_FREE (ctx);
+ ctx = NULL;
+ }
+ }
+ }
+ return ctx;
+}
call_stub_t *
-__iot_dequeue (iot_conf_t *conf)
+__iot_dequeue (iot_conf_t *conf, int *pri)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
+ int i = 0;
+ iot_client_ctx_t *ctx;
+
+ *pri = -1;
+ for (i = 0; i < IOT_PRI_MAX; i++) {
+
+ if (conf->ac_iot_count[i] >= conf->ac_iot_limit[i]) {
+ continue;
+ }
+
+ if (list_empty (&conf->clients[i])) {
+ continue;
+ }
- if (list_empty (&conf->req))
+ /* Get the first per-client queue for this priority. */
+ ctx = list_first_entry (&conf->clients[i],
+ iot_client_ctx_t, clients);
+ if (!ctx) {
+ continue;
+ }
+
+ if (list_empty (&ctx->reqs)) {
+ continue;
+ }
+
+ /* Get the first request on that queue. */
+ stub = list_first_entry (&ctx->reqs, call_stub_t, list);
+ list_del_init (&stub->list);
+ if (list_empty (&ctx->reqs)) {
+ list_del_init (&ctx->clients);
+ } else {
+ list_rotate_left (&conf->clients[i]);
+ }
+
+ conf->ac_iot_count[i]++;
+ *pri = i;
+ break;
+ }
+
+ if (!stub)
return NULL;
- stub = list_entry (conf->req.next, call_stub_t, list);
- list_del_init (&stub->list);
conf->queue_size--;
+ conf->queue_sizes[*pri]--;
return stub;
}
void
-__iot_enqueue (iot_conf_t *conf, call_stub_t *stub)
+__iot_enqueue (iot_conf_t *conf, call_stub_t *stub, int pri)
{
- list_add_tail (&stub->list, &conf->req);
- conf->queue_size++;
+ client_t *client = stub->frame->root->client;
+ iot_client_ctx_t *ctx;
- return;
+ if (pri < 0 || pri >= IOT_PRI_MAX)
+ pri = IOT_PRI_MAX-1;
+
+ if (client) {
+ ctx = iot_get_ctx (THIS, client);
+ if (ctx) {
+ ctx = &ctx[pri];
+ }
+ } else {
+ ctx = NULL;
+ }
+ if (!ctx) {
+ ctx = &conf->no_client[pri];
+ }
+
+ if (list_empty (&ctx->reqs)) {
+ list_add_tail (&ctx->clients, &conf->clients[pri]);
+ }
+ list_add_tail (&stub->list, &ctx->reqs);
+
+ conf->queue_size++;
+ conf->queue_sizes[pri]++;
}
@@ -72,6 +164,7 @@ iot_worker (void *data)
call_stub_t *stub = NULL;
struct timespec sleep_till = {0, };
int ret = 0;
+ int pri = -1;
char timeout = 0;
char bye = 0;
@@ -84,7 +177,11 @@ iot_worker (void *data)
pthread_mutex_lock (&conf->mutex);
{
- while (list_empty (&conf->req)) {
+ if (pri != -1) {
+ conf->ac_iot_count[pri]--;
+ pri = -1;
+ }
+ while (conf->queue_size == 0) {
conf->sleep_count++;
ret = pthread_cond_timedwait (&conf->cond,
@@ -102,15 +199,15 @@ iot_worker (void *data)
if (conf->curr_count > IOT_MIN_THREADS) {
conf->curr_count--;
bye = 1;
- gf_log (conf->this->name, GF_LOG_DEBUG,
- "timeout, terminated. conf->curr_count=%d",
- conf->curr_count);
+ gf_msg_debug (conf->this->name, 0,
+ "timeout, terminated. conf->curr_count=%d",
+ conf->curr_count);
} else {
timeout = 0;
}
}
- stub = __iot_dequeue (conf);
+ stub = __iot_dequeue (conf, &pri);
}
pthread_mutex_unlock (&conf->mutex);
@@ -121,18 +218,25 @@ iot_worker (void *data)
break;
}
+ if (pri != -1) {
+ pthread_mutex_lock (&conf->mutex);
+ {
+ conf->ac_iot_count[pri]--;
+ }
+ pthread_mutex_unlock (&conf->mutex);
+ }
return NULL;
}
int
-iot_schedule (iot_conf_t *conf, call_stub_t *stub)
+do_iot_schedule (iot_conf_t *conf, call_stub_t *stub, int pri)
{
int ret = 0;
pthread_mutex_lock (&conf->mutex);
{
- __iot_enqueue (conf, stub);
+ __iot_enqueue (conf, stub, pri);
pthread_cond_signal (&conf->cond);
@@ -140,1897 +244,535 @@ iot_schedule (iot_conf_t *conf, call_stub_t *stub)
}
pthread_mutex_unlock (&conf->mutex);
- return 0;
-}
-
-
-int
-iot_schedule_unordered (iot_conf_t *conf, inode_t *inode, call_stub_t *stub)
-{
- return iot_schedule (conf, stub);
-}
-
-
-int
-iot_schedule_ordered (iot_conf_t *conf, inode_t *inode, call_stub_t *stub)
-{
-
- return iot_schedule (conf, stub);
-}
-
-
-int
-iot_lookup_cbk (call_frame_t *frame, void * cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
-{
- STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf, xattr,
- postparent);
- return 0;
-}
-
-
-int
-iot_lookup_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *xattr_req)
-{
- STACK_WIND (frame, iot_lookup_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->lookup,
- loc, xattr_req);
- return 0;
-}
-
-
-int
-iot_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_lookup_stub (frame, iot_lookup_wrapper, loc, xattr_req);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create lookup stub (out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_unordered ((iot_conf_t *)this->private, loc->inode,
- stub);
-
-out:
- if (ret < 0) {
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- STACK_UNWIND_STRICT (lookup, frame, -1, -ret, NULL, NULL, NULL,
- NULL);
- }
-
- return 0;
-}
-
-
-int
-iot_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
-{
- STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop, postop);
- return 0;
-}
-
-
-int
-iot_setattr_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid)
-{
- STACK_WIND (frame, iot_setattr_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->setattr,
- loc, stbuf, valid);
- return 0;
-}
-
-
-int
-iot_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_setattr_stub (frame, iot_setattr_wrapper, loc, stbuf, valid);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "Cannot create setattr stub"
- "(Out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_unordered ((iot_conf_t *)this->private,
- loc->inode, stub);
-
-out:
- if (ret < 0) {
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
-
- STACK_UNWIND_STRICT (setattr, frame, -1, -ret, NULL, NULL);
- }
-
- return 0;
-}
-
-
-int
-iot_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
-{
- STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno, preop, postop);
- return 0;
-}
-
-
-int
-iot_fsetattr_wrapper (call_frame_t *frame, xlator_t *this,
- fd_t *fd, struct iatt *stbuf, int32_t valid)
-{
- STACK_WIND (frame, iot_fsetattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsetattr, fd, stbuf, valid);
- return 0;
+ return ret;
}
-
-int
-iot_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid)
+char*
+iot_get_pri_meaning (iot_pri_t pri)
{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_fsetattr_stub (frame, iot_fsetattr_wrapper, fd, stbuf,
- valid);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create fsetattr stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (fsetattr, frame, -1, -ret, NULL, NULL);
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
+ char *name = NULL;
+ switch (pri) {
+ case IOT_PRI_HI:
+ name = "fast";
+ break;
+ case IOT_PRI_NORMAL:
+ name = "normal";
+ break;
+ case IOT_PRI_LO:
+ name = "slow";
+ break;
+ case IOT_PRI_LEAST:
+ name = "least priority";
+ break;
+ case IOT_PRI_MAX:
+ name = "invalid";
+ break;
}
- return 0;
+ return name;
}
-
int
-iot_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+iot_schedule (call_frame_t *frame, xlator_t *this, call_stub_t *stub)
{
- STACK_UNWIND_STRICT (access, frame, op_ret, op_errno);
- return 0;
-}
-
-
-int
-iot_access_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t mask)
-{
- STACK_WIND (frame, iot_access_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->access, loc, mask);
- return 0;
-}
-
-
-int
-iot_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)
-{
- call_stub_t *stub = NULL;
int ret = -1;
+ iot_pri_t pri = IOT_PRI_MAX - 1;
+ iot_conf_t *conf = this->private;
- stub = fop_access_stub (frame, iot_access_wrapper, loc, mask);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create access stub"
- "(out of memory)");
- ret = -ENOMEM;
+ if ((frame->root->pid < GF_CLIENT_PID_MAX) && conf->least_priority) {
+ pri = IOT_PRI_LEAST;
goto out;
}
- ret = iot_schedule_unordered ((iot_conf_t *)this->private, loc->inode,
- stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (access, frame, -1, -ret);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
-}
-
-
-int
-iot_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, const char *path,
- struct iatt *stbuf)
-{
- STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path, stbuf);
- return 0;
-}
-
-
-int
-iot_readlink_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- size_t size)
-{
- STACK_WIND (frame, iot_readlink_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->readlink,
- loc, size);
- return 0;
-}
-
-
-int
-iot_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_readlink_stub (frame, iot_readlink_wrapper, loc, size);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create readlink stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
+ switch (stub->fop) {
+ case GF_FOP_OPEN:
+ case GF_FOP_STAT:
+ case GF_FOP_FSTAT:
+ case GF_FOP_LOOKUP:
+ case GF_FOP_ACCESS:
+ case GF_FOP_READLINK:
+ case GF_FOP_OPENDIR:
+ case GF_FOP_STATFS:
+ case GF_FOP_READDIR:
+ case GF_FOP_READDIRP:
+ case GF_FOP_GETACTIVELK:
+ case GF_FOP_SETACTIVELK:
+ pri = IOT_PRI_HI;
+ break;
+
+ case GF_FOP_CREATE:
+ case GF_FOP_FLUSH:
+ case GF_FOP_LK:
+ case GF_FOP_INODELK:
+ case GF_FOP_FINODELK:
+ case GF_FOP_ENTRYLK:
+ case GF_FOP_FENTRYLK:
+ case GF_FOP_LEASE:
+ case GF_FOP_UNLINK:
+ case GF_FOP_SETATTR:
+ case GF_FOP_FSETATTR:
+ case GF_FOP_MKNOD:
+ case GF_FOP_MKDIR:
+ case GF_FOP_RMDIR:
+ case GF_FOP_SYMLINK:
+ case GF_FOP_RENAME:
+ case GF_FOP_LINK:
+ case GF_FOP_SETXATTR:
+ case GF_FOP_GETXATTR:
+ case GF_FOP_FGETXATTR:
+ case GF_FOP_FSETXATTR:
+ case GF_FOP_REMOVEXATTR:
+ case GF_FOP_FREMOVEXATTR:
+ pri = IOT_PRI_NORMAL;
+ break;
+
+ case GF_FOP_READ:
+ case GF_FOP_WRITE:
+ case GF_FOP_FSYNC:
+ case GF_FOP_TRUNCATE:
+ case GF_FOP_FTRUNCATE:
+ case GF_FOP_FSYNCDIR:
+ case GF_FOP_XATTROP:
+ case GF_FOP_FXATTROP:
+ case GF_FOP_RCHECKSUM:
+ case GF_FOP_FALLOCATE:
+ case GF_FOP_DISCARD:
+ case GF_FOP_ZEROFILL:
+ case GF_FOP_SEEK:
+ pri = IOT_PRI_LO;
+ break;
+
+ case GF_FOP_FORGET:
+ case GF_FOP_RELEASE:
+ case GF_FOP_RELEASEDIR:
+ case GF_FOP_GETSPEC:
+ break;
+ case GF_FOP_IPC:
+ default:
+ return -EINVAL;
}
-
- ret = iot_schedule_unordered ((iot_conf_t *)this->private, loc->inode,
- stub);
-
out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (readlink, frame, -1, -ret, NULL, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
-
- return 0;
-}
-
-
-int
-iot_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
-{
- STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
- return 0;
+ gf_msg_debug (this->name, 0, "%s scheduled as %s fop",
+ gf_fop_list[stub->fop], iot_get_pri_meaning (pri));
+ ret = do_iot_schedule (this->private, stub, pri);
+ return ret;
}
-
int
-iot_mknod_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev)
+iot_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- STACK_WIND (frame, iot_mknod_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->mknod, loc, mode, rdev);
+ IOT_FOP (lookup, frame, this, loc, xdata);
return 0;
}
int
-iot_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_mknod_stub (frame, iot_mknod_wrapper, loc, mode, rdev);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create mknod stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_unordered ((iot_conf_t *)this->private, loc->inode,
- stub);
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (mknod, frame, -1, -ret, NULL, NULL, NULL,
- NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
-}
-
-
-int
-iot_mkdir_cbk (call_frame_t *frame, void * cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
-{
- STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
- return 0;
-}
-
-
-int
-iot_mkdir_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode)
+iot_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- STACK_WIND (frame, iot_mkdir_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->mkdir, loc, mode);
+ IOT_FOP (setattr, frame, this, loc, stbuf, valid, xdata);
return 0;
}
int
-iot_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode)
+iot_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_mkdir_stub (frame, iot_mkdir_wrapper, loc, mode);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create mkdir stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_unordered ((iot_conf_t *)this->private, loc->inode,
- stub);
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (mkdir, frame, -1, -ret, NULL, NULL, NULL,
- NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
+ IOT_FOP (fsetattr, frame, this, fd, stbuf, valid, xdata);
return 0;
}
int
-iot_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+iot_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
{
- STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, preparent,
- postparent);
+ IOT_FOP (access, frame, this, loc, mask, xdata);
return 0;
}
int
-iot_rmdir_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc)
+iot_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size, dict_t *xdata)
{
- STACK_WIND (frame, iot_rmdir_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->rmdir, loc);
+ IOT_FOP (readlink, frame, this, loc, size, xdata);
return 0;
}
int
-iot_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc)
+iot_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_rmdir_stub (frame, iot_rmdir_wrapper, loc);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create rmdir stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_unordered ((iot_conf_t *)this->private, loc->inode,
- stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (rmdir, frame, -1, -ret, NULL, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
+ IOT_FOP (mknod, frame, this, loc, mode, rdev, umask, xdata);
return 0;
}
int
-iot_symlink_cbk (call_frame_t *frame, void * cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+iot_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
{
- STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+ IOT_FOP (mkdir, frame, this, loc, mode, umask, xdata);
return 0;
}
int
-iot_symlink_wrapper (call_frame_t *frame, xlator_t *this, const char *linkname,
- loc_t *loc)
+iot_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, dict_t *xdata)
{
- STACK_WIND (frame, iot_symlink_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->symlink, linkname, loc);
+ IOT_FOP (rmdir, frame, this, loc, flags, xdata);
return 0;
}
int
iot_symlink (call_frame_t *frame, xlator_t *this, const char *linkname,
- loc_t *loc)
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_symlink_stub (frame, iot_symlink_wrapper, linkname, loc);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create symlink stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_unordered ((iot_conf_t *)this->private, loc->inode,
- stub);
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (symlink, frame, -1, -ret, NULL, NULL, NULL,
- NULL);
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
-
- return 0;
-}
-
-
-int
-iot_rename_cbk (call_frame_t *frame, void * cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
-{
- STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf, preoldparent,
- postoldparent, prenewparent, postnewparent);
+ IOT_FOP (symlink, frame, this, linkname, loc, umask, xdata);
return 0;
}
int
-iot_rename_wrapper (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc)
+iot_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- STACK_WIND (frame, iot_rename_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->rename, oldloc, newloc);
- return 0;
-}
-
-
-int
-iot_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_rename_stub (frame, iot_rename_wrapper, oldloc, newloc);
- if (!stub) {
- gf_log (this->name, GF_LOG_DEBUG, "cannot create rename stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_unordered ((iot_conf_t *)this->private,
- oldloc->inode, stub);
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (rename, frame, -1, -ret, NULL, NULL, NULL,
- NULL, NULL);
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
-
+ IOT_FOP (rename, frame, this, oldloc, newloc, xdata);
return 0;
}
int
-iot_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, fd_t *fd)
-{
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd);
- return 0;
-}
-
-
-int
-iot_open_wrapper (call_frame_t * frame, xlator_t * this, loc_t *loc,
- int32_t flags, fd_t * fd, int32_t wbflags)
-{
- STACK_WIND (frame, iot_open_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->open, loc, flags, fd, wbflags);
- return 0;
-}
-
-
-int
iot_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int32_t wbflags)
+ fd_t *fd, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_open_stub (frame, iot_open_wrapper, loc, flags, fd, wbflags);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create open call stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_unordered ((iot_conf_t *)this->private, loc->inode,
- stub);
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (open, frame, -1, -ret, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
-
- return 0;
-}
-
-
-int
-iot_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
- struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent)
-{
- STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, stbuf,
- preparent, postparent);
- return 0;
-}
-
-
-int
-iot_create_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, mode_t mode, fd_t *fd)
-{
- STACK_WIND (frame, iot_create_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create,
- loc, flags, mode, fd);
- return 0;
+ IOT_FOP (open, frame, this, loc, flags, fd, xdata);
+ return 0;
}
int
iot_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, fd_t *fd)
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_create_stub (frame, iot_create_wrapper, loc, flags, mode,
- fd);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create \"create\" call stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_unordered ((iot_conf_t *)this->private, loc->inode,
- stub);
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (create, frame, -1, -ret, NULL, NULL, NULL,
- NULL, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
-
+ IOT_FOP (create, frame, this, loc, flags, mode, umask, fd, xdata);
return 0;
}
int
-iot_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *stbuf, struct iobref *iobref)
-{
- STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count,
- stbuf, iobref);
-
- return 0;
-}
-
-
-int
-iot_readv_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
-{
- STACK_WIND (frame, iot_readv_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv,
- fd, size, offset);
- return 0;
-}
-
-
-int
iot_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_readv_stub (frame, iot_readv_wrapper, fd, size, offset);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create readv call stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (readv, frame, -1, -ret, NULL, -1, NULL,
- NULL);
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
-}
-
-
-int
-iot_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ off_t offset, uint32_t flags, dict_t *xdata)
{
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno);
- return 0;
-}
-
-
-int
-iot_flush_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- STACK_WIND (frame, iot_flush_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush,
- fd);
- return 0;
-}
-
-
-int
-iot_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_flush_stub (frame, iot_flush_wrapper, fd);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create flush_cbk call stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (flush, frame, -1, -ret);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
-}
-
-
-int
-iot_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
-{
- STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
-}
-
-
-int
-iot_fsync_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t datasync)
-{
- STACK_WIND (frame, iot_fsync_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsync,
- fd, datasync);
- return 0;
-}
-
-
-int
-iot_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_fsync_stub (frame, iot_fsync_wrapper, fd, datasync);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create fsync_cbk call stub"
- "(out of memory)");
- ret = -1;
- goto out;
- }
-
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (fsync, frame, -1, -ret, NULL, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
-}
-
-
-int
-iot_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
-{
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
-}
-
-
-int
-iot_writev_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count,
- off_t offset, struct iobref *iobref)
-{
- STACK_WIND (frame, iot_writev_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev,
- fd, vector, count, offset, iobref);
- return 0;
-}
-
-
-int
-iot_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t offset,
- struct iobref *iobref)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_writev_stub (frame, iot_writev_wrapper,
- fd, vector, count, offset, iobref);
-
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create writev call stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (writev, frame, -1, -ret, NULL, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
-
- return 0;
-}
-
-
-int32_t
-iot_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct flock *flock)
-{
- STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, flock);
- return 0;
-}
-
-
-int
-iot_lk_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t cmd, struct flock *flock)
-{
- STACK_WIND (frame, iot_lk_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lk,
- fd, cmd, flock);
- return 0;
-}
-
-
-int
-iot_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
- struct flock *flock)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_lk_stub (frame, iot_lk_wrapper, fd, cmd, flock);
-
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create fop_lk call stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (lk, frame, -1, -ret, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
-}
-
-
-int
-iot_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
-{
- STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf);
- return 0;
-}
-
-
-int
-iot_stat_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- STACK_WIND (frame, iot_stat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat,
- loc);
- return 0;
-}
-
-
-int
-iot_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- call_stub_t *stub = NULL;
- fd_t *fd = NULL;
- int ret = -1;
-
- stub = fop_stat_stub (frame, iot_stat_wrapper, loc);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create fop_stat call stub"
- "(out of memory)");
- ret = -1;
- goto out;
- }
-
- fd = fd_lookup (loc->inode, frame->root->pid);
- /* File is not open, so we can send it through unordered pool.
- */
- if (fd == NULL)
- ret = iot_schedule_unordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- else {
- ret = iot_schedule_ordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- fd_unref (fd);
- }
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (stat, frame, -1, -ret, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
-}
-
-
-int
-iot_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
-{
- STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf);
- return 0;
-}
-
-
-int
-iot_fstat_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- STACK_WIND (frame, iot_fstat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat,
- fd);
- return 0;
-}
-
-
-int
-iot_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_fstat_stub (frame, iot_fstat_wrapper, fd);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create fop_fstat call stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (fstat, frame, -1, -ret, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
-}
-
-
-int
-iot_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
-{
- STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf,
- postbuf);
- return 0;
-}
-
-
-int
-iot_truncate_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- off_t offset)
-{
- STACK_WIND (frame, iot_truncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate,
- loc, offset);
- return 0;
-}
-
-
-int
-iot_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
-{
- call_stub_t *stub;
- fd_t *fd = NULL;
- int ret = -1;
-
- stub = fop_truncate_stub (frame, iot_truncate_wrapper, loc, offset);
-
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create fop_stat call stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- fd = fd_lookup (loc->inode, frame->root->pid);
- if (fd == NULL)
- ret = iot_schedule_unordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- else {
- ret = iot_schedule_ordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- fd_unref (fd);
- }
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (truncate, frame, -1, -ret, NULL, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
-
- return 0;
-}
-
-
-int
-iot_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
-{
- STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf,
- postbuf);
- return 0;
-}
-
-
-int
-iot_ftruncate_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- off_t offset)
-{
- STACK_WIND (frame, iot_ftruncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate,
- fd, offset);
- return 0;
-}
-
-
-int
-iot_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_ftruncate_stub (frame, iot_ftruncate_wrapper, fd, offset);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create fop_ftruncate call stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (ftruncate, frame, -1, -ret, NULL, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
-}
-
-
-int
-iot_checksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, uint8_t *file_checksum,
- uint8_t *dir_checksum)
-{
- STACK_UNWIND_STRICT (checksum, frame, op_ret, op_errno, file_checksum,
- dir_checksum);
- return 0;
-}
-
-
-int
-iot_checksum_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags)
-{
- STACK_WIND (frame, iot_checksum_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->checksum,
- loc, flags);
-
- return 0;
-}
-
-
-int
-iot_checksum (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_checksum_stub (frame, iot_checksum_wrapper, loc, flags);
-
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create fop_checksum call stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
- ret = iot_schedule_unordered ((iot_conf_t *)this->private, loc->inode,
- stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (checksum, frame, -1, -ret, NULL, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
-}
-
-
-int
-iot_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
-{
- STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno, preparent,
- postparent);
- return 0;
-}
-
-
-int
-iot_unlink_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- STACK_WIND (frame, iot_unlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink,
- loc);
- return 0;
-}
-
-
-int
-iot_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_unlink_stub (frame, iot_unlink_wrapper, loc);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create fop_unlink call stub"
- "(out of memory)");
- ret = -1;
- goto out;
- }
-
- ret = iot_schedule_unordered((iot_conf_t *)this->private, loc->inode,
- stub);
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (unlink, frame, -1, -ret, NULL, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
-
- return 0;
-}
-
-
-int
-iot_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent, struct iatt *postparent)
-{
- STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+ IOT_FOP (readv, frame, this, fd, size, offset, flags, xdata);
return 0;
}
int
-iot_link_wrapper (call_frame_t *frame, xlator_t *this, loc_t *old, loc_t *new)
+iot_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- STACK_WIND (frame, iot_link_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->link, old, new);
-
+ IOT_FOP (flush, frame, this, fd, xdata);
return 0;
}
int
-iot_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
+iot_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_link_stub (frame, iot_link_wrapper, oldloc, newloc);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create link stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_unordered ((iot_conf_t *)this->private,
- oldloc->inode, stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (link, frame, -1, -ret, NULL, NULL, NULL,
- NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
+ IOT_FOP (fsync, frame, this, fd, datasync, xdata);
return 0;
}
int
-iot_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+iot_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
- STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd);
+ IOT_FOP (writev, frame, this, fd, vector, count, offset,
+ flags, iobref, xdata);
return 0;
}
int
-iot_opendir_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
+iot_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
{
- STACK_WIND (frame, iot_opendir_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->opendir, loc, fd);
+ IOT_FOP (lk, frame, this, fd, cmd, flock, xdata);
return 0;
}
int
-iot_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
+iot_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_opendir_stub (frame, iot_opendir_wrapper, loc, fd);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create opendir stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_unordered ((iot_conf_t *)this->private, loc->inode,
- stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (opendir, frame, -1, -ret, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
+ IOT_FOP (stat, frame, this, loc, xdata);
return 0;
}
int
-iot_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+iot_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno);
+ IOT_FOP (fstat, frame, this, fd, xdata);
return 0;
}
int
-iot_fsyncdir_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int datasync)
+iot_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
- STACK_WIND (frame, iot_fsyncdir_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsyncdir, fd, datasync);
+ IOT_FOP (truncate, frame, this, loc, offset, xdata);
return 0;
}
int
-iot_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync)
+iot_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_fsyncdir_stub (frame, iot_fsyncdir_wrapper, fd, datasync);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create fsyncdir stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (fsyncdir, frame, -1, -ret);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
+ IOT_FOP (ftruncate, frame, this, fd, offset, xdata);
return 0;
}
-int
-iot_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf)
-{
- STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf);
- return 0;
-}
-
int
-iot_statfs_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc)
+iot_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t xflag,
+ dict_t *xdata)
{
- STACK_WIND (frame, iot_statfs_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->statfs, loc);
+ IOT_FOP (unlink, frame, this, loc, xflag, xdata);
return 0;
}
int
-iot_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
+iot_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_statfs_stub (frame, iot_statfs_wrapper, loc);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create statfs stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_unordered ((iot_conf_t *)this->private, loc->inode,
- stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (statfs, frame, -1, -ret, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
+ IOT_FOP (link, frame, this, oldloc, newloc, xdata);
return 0;
}
int
-iot_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+iot_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
{
- STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno);
+ IOT_FOP (opendir, frame, this, loc, fd, xdata);
return 0;
}
int
-iot_setxattr_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *dict, int32_t flags)
+iot_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
+ dict_t *xdata)
{
- STACK_WIND (frame, iot_setxattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->setxattr, loc, dict, flags);
- return 0;
-}
-
-
-int
-iot_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
- int32_t flags)
-{
- call_stub_t *stub = NULL;
- fd_t *fd = NULL;
- int ret = -1;
-
- stub = fop_setxattr_stub (frame, iot_setxattr_wrapper, loc, dict,
- flags);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create setxattr stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- fd = fd_lookup (loc->inode, frame->root->pid);
- if (fd == NULL)
- ret = iot_schedule_unordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- else {
- ret = iot_schedule_ordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- fd_unref (fd);
- }
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (setxattr, frame, -1, -ret);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
+ IOT_FOP (fsyncdir, frame, this, fd, datasync, xdata);
return 0;
}
int
-iot_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+iot_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict);
+ IOT_FOP (statfs, frame, this, loc, xdata);
return 0;
}
int
-iot_getxattr_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
+iot_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
{
- STACK_WIND (frame, iot_getxattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->getxattr, loc, name);
+ IOT_FOP (setxattr, frame, this, loc, dict, flags, xdata);
return 0;
}
int
iot_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
+ const char *name, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- fd_t *fd = NULL;
- int ret = -1;
-
- stub = fop_getxattr_stub (frame, iot_getxattr_wrapper, loc, name);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create getxattr stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- fd = fd_lookup (loc->inode, frame->root->pid);
- if (!fd)
- ret = iot_schedule_unordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- else {
- ret = iot_schedule_ordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- fd_unref (fd);
- }
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (getxattr, frame, -1, -ret, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
+ IOT_FOP (getxattr, frame, this, loc, name, xdata);
return 0;
}
int
-iot_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+iot_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict);
+ IOT_FOP (fgetxattr, frame, this, fd, name, xdata);
return 0;
}
int
-iot_fgetxattr_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name)
+iot_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
{
- STACK_WIND (frame, iot_fgetxattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fgetxattr, fd, name);
+ IOT_FOP (fsetxattr, frame, this, fd, dict, flags, xdata);
return 0;
}
int
-iot_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name)
+iot_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_fgetxattr_stub (frame, iot_fgetxattr_wrapper, fd, name);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create fgetxattr stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (fgetxattr, frame, -1, -ret, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
+ IOT_FOP (removexattr, frame, this, loc, name, xdata);
return 0;
}
-
int
-iot_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+iot_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno);
+ IOT_FOP (fremovexattr, frame, this, fd, name, xdata);
return 0;
}
int
-iot_fsetxattr_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- dict_t *dict, int32_t flags)
+iot_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
{
- STACK_WIND (frame, iot_fsetxattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsetxattr, fd, dict, flags);
+ IOT_FOP (readdirp, frame, this, fd, size, offset, xdata);
return 0;
}
int
-iot_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
- int32_t flags)
+iot_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_fsetxattr_stub (frame, iot_fsetxattr_wrapper, fd, dict,
- flags);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create fsetxattr stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (fsetxattr, frame, -1, -ret);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
+ IOT_FOP (readdir, frame, this, fd, size, offset, xdata);
return 0;
}
-
int
-iot_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+iot_inodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *lock,
+ dict_t *xdata)
{
- STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno);
+ IOT_FOP (inodelk, frame, this, volume, loc, cmd, lock, xdata);
return 0;
}
-
int
-iot_removexattr_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
+iot_finodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *lock,
+ dict_t *xdata)
{
- STACK_WIND (frame, iot_removexattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->removexattr, loc, name);
+ IOT_FOP (finodelk, frame, this, volume, fd, cmd, lock, xdata);
return 0;
}
-
int
-iot_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
+iot_entrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- fd_t *fd = NULL;
- int ret = -1;
-
- stub = fop_removexattr_stub (frame, iot_removexattr_wrapper, loc,
- name);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,"cannot get removexattr fop"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- fd = fd_lookup (loc->inode, frame->root->pid);
- if (!fd)
- ret = iot_schedule_unordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- else {
- ret = iot_schedule_ordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- fd_unref (fd);
- }
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (removexattr, frame, -1, -ret);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
+ IOT_FOP (entrylk, frame, this, volume, loc, basename, cmd, type, xdata);
return 0;
}
-
int
-iot_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
+iot_fentrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
- STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries);
+ IOT_FOP (fentrylk, frame, this, volume, fd, basename, cmd, type, xdata);
return 0;
}
int
-iot_readdirp_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t offset)
+iot_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
- STACK_WIND (frame, iot_readdirp_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->readdirp, fd, size, offset);
+ IOT_FOP (xattrop, frame, this, loc, optype, xattr, xdata);
return 0;
}
int
-iot_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+iot_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_readdirp_stub (frame, iot_readdirp_wrapper, fd, size,
- offset);
- if (!stub) {
- gf_log (this->private, GF_LOG_ERROR,"cannot get readdir stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (readdirp, frame, -1, -ret, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
+ IOT_FOP (fxattrop, frame, this, fd, optype, xattr, xdata);
return 0;
}
-int
-iot_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
+int32_t
+iot_rchecksum (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ int32_t len, dict_t *xdata)
{
- STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, entries);
+ IOT_FOP (rchecksum, frame, this, fd, offset, len, xdata);
return 0;
}
-
int
-iot_readdir_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t offset)
+iot_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
{
- STACK_WIND (frame, iot_readdir_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->readdir, fd, size, offset);
+ IOT_FOP (fallocate, frame, this, fd, mode, offset, len, xdata);
return 0;
}
-
int
-iot_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+iot_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_readdir_stub (frame, iot_readdir_wrapper, fd, size, offset);
- if (!stub) {
- gf_log (this->private, GF_LOG_ERROR,"cannot get readdir stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (readdir, frame, -1, -ret, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
+ IOT_FOP (discard, frame, this, fd, offset, len, xdata);
return 0;
}
-
int
-iot_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
+iot_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
{
- STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, xattr);
+ IOT_FOP (zerofill, frame, this, fd, offset, len, xdata);
return 0;
}
-
int
-iot_xattrop_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t optype, dict_t *xattr)
+iot_seek (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata)
{
- STACK_WIND (frame, iot_xattrop_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->xattrop, loc, optype, xattr);
+ IOT_FOP (seek, frame, this, fd, offset, what, xdata);
return 0;
}
-
int
-iot_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t optype, dict_t *xattr)
+iot_lease (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct gf_lease *lease, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- fd_t *fd = NULL;
- int ret = -1;
-
- stub = fop_xattrop_stub (frame, iot_xattrop_wrapper, loc, optype,
- xattr);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create xattrop stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- fd = fd_lookup (loc->inode, frame->root->pid);
- if (!fd)
- ret = iot_schedule_unordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- else {
- ret = iot_schedule_ordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- fd_unref (fd);
- }
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (xattrop, frame, -1, -ret, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
-}
-
-
-int
-iot_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
-{
- STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, xattr);
+ IOT_FOP (lease, frame, this, loc, lease, xdata);
return 0;
}
int
-iot_fxattrop_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t optype, dict_t *xattr)
+iot_getactivelk (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xdata)
{
- STACK_WIND (frame, iot_fxattrop_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fxattrop, fd, optype, xattr);
+ IOT_FOP (getactivelk, frame, this, loc, xdata);
return 0;
}
int
-iot_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t optype, dict_t *xattr)
+iot_setactivelk (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ lock_migration_info_t *locklist, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_fxattrop_stub (frame, iot_fxattrop_wrapper, fd, optype,
- xattr);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create fxattrop stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (fxattrop, frame, -1, -ret, NULL);
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
+ IOT_FOP (setactivelk, frame, this, loc, locklist, xdata);
return 0;
}
@@ -2038,20 +780,19 @@ out:
int
__iot_workers_scale (iot_conf_t *conf)
{
- int log2 = 0;
int scale = 0;
int diff = 0;
pthread_t thread;
int ret = 0;
+ int i = 0;
- log2 = log_base2 (conf->queue_size);
+ for (i = 0; i < IOT_PRI_MAX; i++)
+ scale += min (conf->queue_sizes[i], conf->ac_iot_limit[i]);
- scale = log2;
-
- if (log2 < IOT_MIN_THREADS)
+ if (scale < IOT_MIN_THREADS)
scale = IOT_MIN_THREADS;
- if (log2 > conf->max_count)
+ if (scale > conf->max_count)
scale = conf->max_count;
if (conf->curr_count < scale) {
@@ -2061,12 +802,13 @@ __iot_workers_scale (iot_conf_t *conf)
while (diff) {
diff --;
- ret = pthread_create (&thread, &conf->w_attr, iot_worker, conf);
+ ret = gf_thread_create (&thread, &conf->w_attr, iot_worker, conf);
if (ret == 0) {
conf->curr_count++;
- gf_log (conf->this->name, GF_LOG_DEBUG,
- "scaled threads to %d (queue_size=%d/%d)",
- conf->curr_count, conf->queue_size, scale);
+ gf_msg_debug (conf->this->name, 0,
+ "scaled threads to %d (queue_size=%d/%d)",
+ conf->curr_count,
+ conf->queue_size, scale);
} else {
break;
}
@@ -2102,15 +844,29 @@ set_stack_size (iot_conf_t *conf)
{
int err = 0;
size_t stacksize = IOT_THREAD_STACK_SIZE;
+ xlator_t *this = NULL;
+
+ this = THIS;
pthread_attr_init (&conf->w_attr);
err = pthread_attr_setstacksize (&conf->w_attr, stacksize);
if (err == EINVAL) {
- gf_log (conf->this->name, GF_LOG_WARNING,
- "Using default thread stack size");
+ err = pthread_attr_getstacksize (&conf->w_attr, &stacksize);
+ if (!err)
+ gf_msg (this->name, GF_LOG_WARNING,
+ 0, IO_THREADS_MSG_SIZE_NOT_SET,
+ "Using default thread stack size %zd",
+ stacksize);
+ else
+ gf_msg (this->name, GF_LOG_WARNING,
+ 0, IO_THREADS_MSG_SIZE_NOT_SET,
+ "Using default thread stack size");
}
+
+ conf->stack_size = stacksize;
}
+
int32_t
mem_acct_init (xlator_t *this)
{
@@ -2120,10 +876,11 @@ mem_acct_init (xlator_t *this)
return ret;
ret = xlator_mem_acct_init (this, gf_iot_mt_end + 1);
-
+
if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
+ gf_msg (this->name, GF_LOG_ERROR,
+ ENOMEM, IO_THREADS_MSG_NO_MEMORY,
+ "Memory accounting init failed");
return ret;
}
@@ -2131,72 +888,160 @@ mem_acct_init (xlator_t *this)
}
int
+iot_priv_dump (xlator_t *this)
+{
+ iot_conf_t *conf = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+
+ if (!this)
+ return 0;
+
+ conf = this->private;
+ if (!conf)
+ return 0;
+
+ snprintf (key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type,
+ this->name);
+
+ gf_proc_dump_add_section(key_prefix);
+
+ gf_proc_dump_write("maximum_threads_count", "%d", conf->max_count);
+ gf_proc_dump_write("current_threads_count", "%d", conf->curr_count);
+ gf_proc_dump_write("sleep_count", "%d", conf->sleep_count);
+ gf_proc_dump_write("idle_time", "%d", conf->idle_time);
+ gf_proc_dump_write("stack_size", "%zd", conf->stack_size);
+ gf_proc_dump_write("high_priority_threads", "%d",
+ conf->ac_iot_limit[IOT_PRI_HI]);
+ gf_proc_dump_write("normal_priority_threads", "%d",
+ conf->ac_iot_limit[IOT_PRI_NORMAL]);
+ gf_proc_dump_write("low_priority_threads", "%d",
+ conf->ac_iot_limit[IOT_PRI_LO]);
+ gf_proc_dump_write("least_priority_threads", "%d",
+ conf->ac_iot_limit[IOT_PRI_LEAST]);
+
+ return 0;
+}
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ iot_conf_t *conf = NULL;
+ int ret = -1;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ GF_OPTION_RECONF ("thread-count", conf->max_count, options, int32, out);
+
+ GF_OPTION_RECONF ("high-prio-threads",
+ conf->ac_iot_limit[IOT_PRI_HI], options, int32, out);
+
+ GF_OPTION_RECONF ("normal-prio-threads",
+ conf->ac_iot_limit[IOT_PRI_NORMAL], options, int32,
+ out);
+
+ GF_OPTION_RECONF ("low-prio-threads",
+ conf->ac_iot_limit[IOT_PRI_LO], options, int32, out);
+
+ GF_OPTION_RECONF ("least-prio-threads",
+ conf->ac_iot_limit[IOT_PRI_LEAST], options, int32,
+ out);
+ GF_OPTION_RECONF ("enable-least-priority", conf->least_priority,
+ options, bool, out);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int
init (xlator_t *this)
{
- iot_conf_t *conf = NULL;
- dict_t *options = this->options;
- int thread_count = IOT_DEFAULT_THREADS;
- int idle_time = IOT_DEFAULT_IDLE;
- int ret = 0;
+ iot_conf_t *conf = NULL;
+ int ret = -1;
+ int i = 0;
if (!this->children || this->children->next) {
- gf_log ("io-threads", GF_LOG_ERROR,
- "FATAL: iot not configured with exactly one child");
+ gf_msg ("io-threads", GF_LOG_ERROR, 0,
+ IO_THREADS_MSG_XLATOR_CHILD_MISCONFIGURED,
+ "FATAL: iot not configured "
+ "with exactly one child");
goto out;
}
if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ IO_THREADS_MSG_VOL_MISCONFIGURED,
+ "dangling volume. check volfile ");
}
conf = (void *) GF_CALLOC (1, sizeof (*conf),
gf_iot_mt_iot_conf_t);
if (conf == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory");
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ IO_THREADS_MSG_NO_MEMORY, "out of memory");
+ goto out;
+ }
+
+ if ((ret = pthread_cond_init(&conf->cond, NULL)) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ IO_THREADS_MSG_INIT_FAILED,
+ "pthread_cond_init failed (%d)", ret);
+ goto out;
+ }
+
+ if ((ret = pthread_mutex_init(&conf->mutex, NULL)) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ IO_THREADS_MSG_INIT_FAILED,
+ "pthread_mutex_init failed (%d)", ret);
goto out;
}
set_stack_size (conf);
- thread_count = IOT_DEFAULT_THREADS;
+ GF_OPTION_INIT ("thread-count", conf->max_count, int32, out);
- if (dict_get (options, "thread-count")) {
- thread_count = data_to_int32 (dict_get (options,
- "thread-count"));
- if (thread_count < IOT_MIN_THREADS)
- thread_count = IOT_MIN_THREADS;
+ GF_OPTION_INIT ("high-prio-threads",
+ conf->ac_iot_limit[IOT_PRI_HI], int32, out);
- if (thread_count > IOT_MAX_THREADS)
- thread_count = IOT_MAX_THREADS;
- }
- conf->max_count = thread_count;
+ GF_OPTION_INIT ("normal-prio-threads",
+ conf->ac_iot_limit[IOT_PRI_NORMAL], int32, out);
- if (dict_get (options, "idle-time")) {
- idle_time = data_to_int32 (dict_get (options,
- "idle-time"));
- if (idle_time < 0)
- idle_time = 1;
- }
- conf->idle_time = idle_time;
+ GF_OPTION_INIT ("low-prio-threads",
+ conf->ac_iot_limit[IOT_PRI_LO], int32, out);
+
+ GF_OPTION_INIT ("least-prio-threads",
+ conf->ac_iot_limit[IOT_PRI_LEAST], int32, out);
+
+ GF_OPTION_INIT ("idle-time", conf->idle_time, int32, out);
+ GF_OPTION_INIT ("enable-least-priority", conf->least_priority,
+ bool, out);
conf->this = this;
- INIT_LIST_HEAD (&conf->req);
+ for (i = 0; i < IOT_PRI_MAX; i++) {
+ INIT_LIST_HEAD (&conf->clients[i]);
+ INIT_LIST_HEAD (&conf->no_client[i].clients);
+ INIT_LIST_HEAD (&conf->no_client[i].reqs);
+ }
ret = iot_workers_scale (conf);
if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ IO_THREADS_MSG_INIT_FAILED,
"cannot initialize worker threads, exiting init");
- GF_FREE (conf);
goto out;
}
this->private = conf;
ret = 0;
out:
+ if (ret)
+ GF_FREE (conf);
+
return ret;
}
@@ -2212,65 +1057,133 @@ fini (xlator_t *this)
return;
}
-/*
- * O - Goes to ordered threadpool.
- * U - Goes to un-ordered threadpool.
- * V - Variable, depends on whether the file is open.
- * If it is, then goes to ordered, otherwise to
- * un-ordered.
- */
+int
+iot_client_destroy (xlator_t *this, client_t *client)
+{
+ void *tmp = NULL;
+
+ if (client_ctx_del (client, this, &tmp) == 0) {
+ GF_FREE (tmp);
+ }
+
+ return 0;
+}
+
+
+struct xlator_dumpops dumpops = {
+ .priv = iot_priv_dump,
+};
+
struct xlator_fops fops = {
- .open = iot_open, /* U */
- .create = iot_create, /* U */
- .readv = iot_readv, /* O */
- .writev = iot_writev, /* O */
- .flush = iot_flush, /* O */
- .fsync = iot_fsync, /* O */
- .lk = iot_lk, /* O */
- .stat = iot_stat, /* V */
- .fstat = iot_fstat, /* O */
- .truncate = iot_truncate, /* V */
- .ftruncate = iot_ftruncate, /* O */
- .checksum = iot_checksum, /* U */
- .unlink = iot_unlink, /* U */
- .lookup = iot_lookup, /* U */
- .setattr = iot_setattr, /* U */
- .fsetattr = iot_fsetattr, /* O */
- .access = iot_access, /* U */
- .readlink = iot_readlink, /* U */
- .mknod = iot_mknod, /* U */
- .mkdir = iot_mkdir, /* U */
- .rmdir = iot_rmdir, /* U */
- .symlink = iot_symlink, /* U */
- .rename = iot_rename, /* U */
- .link = iot_link, /* U */
- .opendir = iot_opendir, /* U */
- .fsyncdir = iot_fsyncdir, /* O */
- .statfs = iot_statfs, /* U */
- .setxattr = iot_setxattr, /* U */
- .getxattr = iot_getxattr, /* U */
- .fgetxattr = iot_fgetxattr, /* O */
- .fsetxattr = iot_fsetxattr, /* O */
- .removexattr = iot_removexattr, /* U */
- .readdir = iot_readdir, /* O */
- .readdirp = iot_readdirp, /* O */
- .xattrop = iot_xattrop, /* U */
- .fxattrop = iot_fxattrop, /* O */
+ .open = iot_open,
+ .create = iot_create,
+ .readv = iot_readv,
+ .writev = iot_writev,
+ .flush = iot_flush,
+ .fsync = iot_fsync,
+ .lk = iot_lk,
+ .stat = iot_stat,
+ .fstat = iot_fstat,
+ .truncate = iot_truncate,
+ .ftruncate = iot_ftruncate,
+ .unlink = iot_unlink,
+ .lookup = iot_lookup,
+ .setattr = iot_setattr,
+ .fsetattr = iot_fsetattr,
+ .access = iot_access,
+ .readlink = iot_readlink,
+ .mknod = iot_mknod,
+ .mkdir = iot_mkdir,
+ .rmdir = iot_rmdir,
+ .symlink = iot_symlink,
+ .rename = iot_rename,
+ .link = iot_link,
+ .opendir = iot_opendir,
+ .fsyncdir = iot_fsyncdir,
+ .statfs = iot_statfs,
+ .setxattr = iot_setxattr,
+ .getxattr = iot_getxattr,
+ .fgetxattr = iot_fgetxattr,
+ .fsetxattr = iot_fsetxattr,
+ .removexattr = iot_removexattr,
+ .fremovexattr = iot_fremovexattr,
+ .readdir = iot_readdir,
+ .readdirp = iot_readdirp,
+ .inodelk = iot_inodelk,
+ .finodelk = iot_finodelk,
+ .entrylk = iot_entrylk,
+ .fentrylk = iot_fentrylk,
+ .xattrop = iot_xattrop,
+ .fxattrop = iot_fxattrop,
+ .rchecksum = iot_rchecksum,
+ .fallocate = iot_fallocate,
+ .discard = iot_discard,
+ .zerofill = iot_zerofill,
+ .seek = iot_seek,
+ .lease = iot_lease,
+ .getactivelk = iot_getactivelk,
+ .setactivelk = iot_setactivelk,
};
struct xlator_cbks cbks = {
+ .client_destroy = iot_client_destroy,
};
struct volume_options options[] = {
{ .key = {"thread-count"},
.type = GF_OPTION_TYPE_INT,
.min = IOT_MIN_THREADS,
- .max = IOT_MAX_THREADS
+ .max = IOT_MAX_THREADS,
+ .default_value = "16",
+ .description = "Number of threads in IO threads translator which "
+ "perform concurrent IO operations"
+
+ },
+ { .key = {"high-prio-threads"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = IOT_MIN_THREADS,
+ .max = IOT_MAX_THREADS,
+ .default_value = "16",
+ .description = "Max number of threads in IO threads translator which "
+ "perform high priority IO operations at a given time"
+
},
+ { .key = {"normal-prio-threads"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = IOT_MIN_THREADS,
+ .max = IOT_MAX_THREADS,
+ .default_value = "16",
+ .description = "Max number of threads in IO threads translator which "
+ "perform normal priority IO operations at a given time"
+
+ },
+ { .key = {"low-prio-threads"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = IOT_MIN_THREADS,
+ .max = IOT_MAX_THREADS,
+ .default_value = "16",
+ .description = "Max number of threads in IO threads translator which "
+ "perform low priority IO operations at a given time"
+
+ },
+ { .key = {"least-prio-threads"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = IOT_MIN_THREADS,
+ .max = IOT_MAX_THREADS,
+ .default_value = "1",
+ .description = "Max number of threads in IO threads translator which "
+ "perform least priority IO operations at a given time"
+ },
+ { .key = {"enable-least-priority"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Enable/Disable least priority"
+ },
{.key = {"idle-time"},
.type = GF_OPTION_TYPE_INT,
.min = 1,
.max = 0x7fffffff,
+ .default_value = "120",
},
{ .key = {NULL},
},
diff --git a/xlators/performance/io-threads/src/io-threads.h b/xlators/performance/io-threads/src/io-threads.h
index 137418034b1..7c4ce7849b4 100644
--- a/xlators/performance/io-threads/src/io-threads.h
+++ b/xlators/performance/io-threads/src/io-threads.h
@@ -1,30 +1,16 @@
/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __IOT_H
#define __IOT_H
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "compat-errno.h"
#include "glusterfs.h"
@@ -37,6 +23,7 @@
#include "locking.h"
#include "iot-mem-types.h"
#include <semaphore.h>
+#include "statedump.h"
struct iot_conf;
@@ -46,13 +33,26 @@ struct iot_conf;
#define IOT_DEFAULT_IDLE 120 /* In secs. */
#define IOT_MIN_THREADS 1
-#define IOT_DEFAULT_THREADS 8
+#define IOT_DEFAULT_THREADS 16
#define IOT_MAX_THREADS 64
#define IOT_THREAD_STACK_SIZE ((size_t)(1024*1024))
+typedef enum {
+ IOT_PRI_HI = 0, /* low latency */
+ IOT_PRI_NORMAL, /* normal */
+ IOT_PRI_LO, /* bulk */
+ IOT_PRI_LEAST, /* least */
+ IOT_PRI_MAX,
+} iot_pri_t;
+
+typedef struct {
+ struct list_head clients;
+ struct list_head reqs;
+} iot_client_ctx_t;
+
struct iot_conf {
pthread_mutex_t mutex;
pthread_cond_t cond;
@@ -63,11 +63,24 @@ struct iot_conf {
int32_t idle_time; /* in seconds */
- struct list_head req;
+ struct list_head clients[IOT_PRI_MAX];
+ /*
+ * It turns out that there are several ways a frame can get to us
+ * without having an associated client (server_first_lookup was the
+ * first one I hit). Instead of trying to update all such callers,
+ * we use this to queue them.
+ */
+ iot_client_ctx_t no_client[IOT_PRI_MAX];
+
+ int32_t ac_iot_limit[IOT_PRI_MAX];
+ int32_t ac_iot_count[IOT_PRI_MAX];
+ int queue_sizes[IOT_PRI_MAX];
int queue_size;
pthread_attr_t w_attr;
+ gf_boolean_t least_priority; /*Enable/Disable least-priority */
xlator_t *this;
+ size_t stack_size;
};
typedef struct iot_conf iot_conf_t;
diff --git a/xlators/performance/io-threads/src/iot-mem-types.h b/xlators/performance/io-threads/src/iot-mem-types.h
index c083f83b495..fbf9188f9cd 100644
--- a/xlators/performance/io-threads/src/iot-mem-types.h
+++ b/xlators/performance/io-threads/src/iot-mem-types.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -25,6 +16,7 @@
enum gf_iot_mem_types_ {
gf_iot_mt_iot_conf_t = gf_common_mt_end + 1,
+ gf_iot_mt_client_ctx_t,
gf_iot_mt_end
};
#endif
diff --git a/xlators/performance/md-cache/Makefile.am b/xlators/performance/md-cache/Makefile.am
new file mode 100644
index 00000000000..af437a64d6d
--- /dev/null
+++ b/xlators/performance/md-cache/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = src
diff --git a/xlators/performance/md-cache/src/Makefile.am b/xlators/performance/md-cache/src/Makefile.am
new file mode 100644
index 00000000000..95a640ffd21
--- /dev/null
+++ b/xlators/performance/md-cache/src/Makefile.am
@@ -0,0 +1,28 @@
+xlator_LTLIBRARIES = md-cache.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
+
+md_cache_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+md_cache_la_SOURCES = md-cache.c
+md_cache_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = md-cache-mem-types.h md-cache-messages.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(CONTRIBDIR)/rbtree
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
+
+
+stat-prefetch-compat:
+ mkdir -p $(DESTDIR)$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
+ rm -rf $(DESTDIR)$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance/stat-prefetch.so
+ ln -s ./md-cache.so $(DESTDIR)$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance/stat-prefetch.so
+
+
+install-exec-local: stat-prefetch-compat
+
+uninstall-local:
+ rm -f $(DESTDIR)$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance/stat-prefetch.so
diff --git a/xlators/performance/md-cache/src/md-cache-mem-types.h b/xlators/performance/md-cache/src/md-cache-mem-types.h
new file mode 100644
index 00000000000..6634cf962a5
--- /dev/null
+++ b/xlators/performance/md-cache/src/md-cache-mem-types.h
@@ -0,0 +1,24 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef __MDC_MEM_TYPES_H__
+#define __MDC_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_mdc_mem_types_ {
+ gf_mdc_mt_mdc_local_t = gf_common_mt_end + 1,
+ gf_mdc_mt_md_cache_t,
+ gf_mdc_mt_mdc_conf_t,
+ gf_mdc_mt_end
+};
+#endif
+
diff --git a/xlators/performance/md-cache/src/md-cache-messages.h b/xlators/performance/md-cache/src/md-cache-messages.h
new file mode 100644
index 00000000000..a4259bacf1b
--- /dev/null
+++ b/xlators/performance/md-cache/src/md-cache-messages.h
@@ -0,0 +1,74 @@
+/*Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _MD_CACHE_MESSAGES_H_
+#define _MD_CACHE_MESSAGES_H_
+
+#include "glfs-message-id.h"
+
+/*! \file md-cache-messages.h
+ * \brief MD_CACHE log-message IDs and their descriptions
+ *
+ */
+
+/* NOTE: Rules for message additions
+ * 1) Each instance of a message is _better_ left with a unique message ID, even
+ * if the message format is the same. Reasoning is that, if the message
+ * format needs to change in one instance, the other instances are not
+ * impacted or the new change does not change the ID of the instance being
+ * modified.
+ * 2) Addition of a message,
+ * - Should increment the GLFS_NUM_MESSAGES
+ * - Append to the list of messages defined, towards the end
+ * - Retain macro naming as glfs_msg_X (for redability across developers)
+ * NOTE: Rules for message format modifications
+ * 3) Check acorss the code if the message ID macro in question is reused
+ * anywhere. If reused then then the modifications should ensure correctness
+ * everywhere, or needs a new message ID as (1) above was not adhered to. If
+ * not used anywhere, proceed with the required modification.
+ * NOTE: Rules for message deletion
+ * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used
+ * anywhere, then can be deleted, but will leave a hole by design, as
+ * addition rules specify modification to the end of the list and not filling
+ * holes.
+ */
+
+#define GLFS_MD_CACHE_BASE GLFS_MSGID_COMP_MD_CACHE
+#define GLFS_MD_CACHE_NUM_MESSAGES 2
+#define GLFS_MSGID_END (GLFS_MD_CACHE_BASE + GLFS_MD_CACHE_NUM_MESSAGES + 1)
+
+/* Messages with message IDs */
+#define glfs_msg_start_x GLFS_MD_CACHE_BASE, "Invalid: Start of messages"
+
+
+
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define MD_CACHE_MSG_NO_MEMORY (GLFS_MD_CACHE_BASE + 1)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define MD_CACHE_MSG_DISCARD_UPDATE (GLFS_MD_CACHE_BASE + 2)
+
+/*------------*/
+#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
+
+
+#endif /* _MD_CACHE_MESSAGES_H_ */
diff --git a/xlators/performance/md-cache/src/md-cache.c b/xlators/performance/md-cache/src/md-cache.c
new file mode 100644
index 00000000000..be42bf0a885
--- /dev/null
+++ b/xlators/performance/md-cache/src/md-cache.c
@@ -0,0 +1,2665 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "glusterfs.h"
+#include "defaults.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "md-cache-mem-types.h"
+#include "compat-errno.h"
+#include "glusterfs-acl.h"
+#include "defaults.h"
+#include "upcall-utils.h"
+#include <assert.h>
+#include <sys/time.h>
+#include "md-cache-messages.h"
+
+
+/* TODO:
+ - cache symlink() link names and nuke symlink-cache
+ - send proper postbuf in setattr_cbk even when op_ret = -1
+*/
+
+
+struct mdc_conf {
+ int timeout;
+ gf_boolean_t cache_posix_acl;
+ gf_boolean_t cache_selinux;
+ gf_boolean_t force_readdirp;
+ gf_boolean_t cache_swift_metadata;
+ gf_boolean_t cache_samba_metadata;
+ gf_boolean_t mdc_invalidation;
+ time_t last_child_down;
+ gf_lock_t lock;
+};
+
+
+static struct mdc_key {
+ const char *name;
+ int load;
+ int check;
+} mdc_keys[] = {
+ {
+ .name = POSIX_ACL_ACCESS_XATTR,
+ .load = 0,
+ .check = 1,
+ },
+ {
+ .name = POSIX_ACL_DEFAULT_XATTR,
+ .load = 0,
+ .check = 1,
+ },
+ {
+ .name = GF_POSIX_ACL_ACCESS,
+ .load = 0,
+ .check = 1,
+ },
+ {
+ .name = GF_POSIX_ACL_DEFAULT,
+ .load = 0,
+ .check = 1,
+ },
+ {
+ .name = GF_SELINUX_XATTR_KEY,
+ .load = 0,
+ .check = 1,
+ },
+ {
+ .name = "user.swift.metadata",
+ .load = 0,
+ .check = 1,
+ },
+ {
+ .name = "user.DOSATTRIB",
+ .load = 0,
+ .check = 1,
+ },
+ {
+ .name = "security.NTACL",
+ .load = 0,
+ .check = 1,
+ },
+ {
+ .name = "security.capability",
+ .load = 0,
+ .check = 1,
+ },
+ {
+ .name = "gfid-req",
+ .load = 0,
+ .check = 1,
+ },
+ {
+ .name = NULL,
+ .load = 0,
+ .check = 0,
+ }
+};
+
+struct mdc_local;
+typedef struct mdc_local mdc_local_t;
+
+#define MDC_STACK_UNWIND(fop, frame, params ...) do { \
+ mdc_local_t *__local = NULL; \
+ xlator_t *__xl = NULL; \
+ if (frame) { \
+ __xl = frame->this; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT (fop, frame, params); \
+ mdc_local_wipe (__xl, __local); \
+ } while (0)
+
+
+struct md_cache {
+ ia_prot_t md_prot;
+ uint32_t md_nlink;
+ uint32_t md_uid;
+ uint32_t md_gid;
+ uint32_t md_atime;
+ uint32_t md_atime_nsec;
+ uint32_t md_mtime;
+ uint32_t md_mtime_nsec;
+ uint32_t md_ctime;
+ uint32_t md_ctime_nsec;
+ uint64_t md_rdev;
+ uint64_t md_size;
+ uint64_t md_blocks;
+ dict_t *xattr;
+ char *linkname;
+ time_t ia_time;
+ time_t xa_time;
+ gf_lock_t lock;
+};
+
+
+struct mdc_local {
+ loc_t loc;
+ loc_t loc2;
+ fd_t *fd;
+ char *linkname;
+ char *key;
+ dict_t *xattr;
+};
+
+
+int
+__mdc_inode_ctx_get (xlator_t *this, inode_t *inode, struct md_cache **mdc_p)
+{
+ int ret = 0;
+ struct md_cache *mdc = NULL;
+ uint64_t mdc_int = 0;
+
+ ret = __inode_ctx_get (inode, this, &mdc_int);
+ mdc = (void *) (long) (mdc_int);
+ if (ret == 0 && mdc_p)
+ *mdc_p = mdc;
+
+ return ret;
+}
+
+
+int
+mdc_inode_ctx_get (xlator_t *this, inode_t *inode, struct md_cache **mdc_p)
+{
+ int ret;
+
+ LOCK(&inode->lock);
+ {
+ ret = __mdc_inode_ctx_get (this, inode, mdc_p);
+ }
+ UNLOCK(&inode->lock);
+
+ return ret;
+}
+
+
+int
+__mdc_inode_ctx_set (xlator_t *this, inode_t *inode, struct md_cache *mdc)
+{
+ int ret = 0;
+ uint64_t mdc_int = 0;
+
+ mdc_int = (long) mdc;
+ ret = __inode_ctx_set (inode, this, &mdc_int);
+
+ return ret;
+}
+
+
+int
+mdc_inode_ctx_set (xlator_t *this, inode_t *inode, struct md_cache *mdc)
+{
+ int ret;
+
+ LOCK(&inode->lock);
+ {
+ ret = __mdc_inode_ctx_set (this, inode, mdc);
+ }
+ UNLOCK(&inode->lock);
+
+ return ret;
+}
+
+
+mdc_local_t *
+mdc_local_get (call_frame_t *frame)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (local)
+ goto out;
+
+ local = GF_CALLOC (sizeof (*local), 1, gf_mdc_mt_mdc_local_t);
+ if (!local)
+ goto out;
+
+ frame->local = local;
+out:
+ return local;
+}
+
+
+void
+mdc_local_wipe (xlator_t *this, mdc_local_t *local)
+{
+ if (!local)
+ return;
+
+ loc_wipe (&local->loc);
+
+ loc_wipe (&local->loc2);
+
+ if (local->fd)
+ fd_unref (local->fd);
+
+ GF_FREE (local->linkname);
+
+ GF_FREE (local->key);
+
+ if (local->xattr)
+ dict_unref (local->xattr);
+
+ GF_FREE (local);
+ return;
+}
+
+
+int
+mdc_inode_wipe (xlator_t *this, inode_t *inode)
+{
+ int ret = 0;
+ uint64_t mdc_int = 0;
+ struct md_cache *mdc = NULL;
+
+ ret = inode_ctx_del (inode, this, &mdc_int);
+ if (ret != 0)
+ goto out;
+
+ mdc = (void *) (long) mdc_int;
+
+ if (mdc->xattr)
+ dict_unref (mdc->xattr);
+
+ GF_FREE (mdc->linkname);
+
+ GF_FREE (mdc);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+struct md_cache *
+mdc_inode_prep (xlator_t *this, inode_t *inode)
+{
+ int ret = 0;
+ struct md_cache *mdc = NULL;
+
+ LOCK (&inode->lock);
+ {
+ ret = __mdc_inode_ctx_get (this, inode, &mdc);
+ if (ret == 0)
+ goto unlock;
+
+ mdc = GF_CALLOC (sizeof (*mdc), 1, gf_mdc_mt_md_cache_t);
+ if (!mdc) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ MD_CACHE_MSG_NO_MEMORY, "out of memory");
+ goto unlock;
+ }
+
+ LOCK_INIT (&mdc->lock);
+
+ ret = __mdc_inode_ctx_set (this, inode, mdc);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ MD_CACHE_MSG_NO_MEMORY, "out of memory");
+ GF_FREE (mdc);
+ mdc = NULL;
+ }
+ }
+unlock:
+ UNLOCK (&inode->lock);
+
+ return mdc;
+}
+
+
+/* Cache is valid if:
+ * - It is not cached before any brick was down. Brick down case is handled by
+ * invalidating all the cache when any brick went down.
+ * - The cache time is not expired
+ */
+static gf_boolean_t
+__is_cache_valid (xlator_t *this, time_t mdc_time)
+{
+ time_t now = 0;
+ gf_boolean_t ret = _gf_true;
+ struct mdc_conf *conf = NULL;
+ int timeout = 0;
+ time_t last_child_down = 0;
+
+ conf = this->private;
+
+ /* conf->lock here is not taken deliberately, so that the multi
+ * threaded IO doesn't contend on a global lock. While updating
+ * the variable, the lock is taken, so that atleast the writes are
+ * intact. The read of last_child_down may return junk, but that
+ * is for a very short period of time.
+ */
+ last_child_down = conf->last_child_down;
+ timeout = conf->timeout;
+
+ time (&now);
+
+ if ((mdc_time == 0) ||
+ ((last_child_down != 0) && (mdc_time < last_child_down))) {
+ ret = _gf_false;
+ goto out;
+ }
+
+ if (now >= (mdc_time + timeout)) {
+ ret = _gf_false;
+ }
+
+out:
+ return ret;
+}
+
+
+static gf_boolean_t
+is_md_cache_iatt_valid (xlator_t *this, struct md_cache *mdc)
+{
+ gf_boolean_t ret = _gf_true;
+
+ LOCK (&mdc->lock);
+ {
+ ret = __is_cache_valid (this, mdc->ia_time);
+ if (ret == _gf_false)
+ mdc->ia_time = 0;
+ }
+ UNLOCK (&mdc->lock);
+
+ return ret;
+}
+
+
+static gf_boolean_t
+is_md_cache_xatt_valid (xlator_t *this, struct md_cache *mdc)
+{
+ gf_boolean_t ret = _gf_true;
+
+ LOCK (&mdc->lock);
+ {
+ ret = __is_cache_valid (this, mdc->xa_time);
+ if (ret == _gf_false)
+ mdc->xa_time = 0;
+ }
+ UNLOCK (&mdc->lock);
+
+ return ret;
+}
+
+
+void
+mdc_from_iatt (struct md_cache *mdc, struct iatt *iatt)
+{
+ mdc->md_prot = iatt->ia_prot;
+ mdc->md_nlink = iatt->ia_nlink;
+ mdc->md_uid = iatt->ia_uid;
+ mdc->md_gid = iatt->ia_gid;
+ mdc->md_atime = iatt->ia_atime;
+ mdc->md_atime_nsec = iatt->ia_atime_nsec;
+ mdc->md_mtime = iatt->ia_mtime;
+ mdc->md_mtime_nsec = iatt->ia_mtime_nsec;
+ mdc->md_ctime = iatt->ia_ctime;
+ mdc->md_ctime_nsec = iatt->ia_ctime_nsec;
+ mdc->md_rdev = iatt->ia_rdev;
+ mdc->md_size = iatt->ia_size;
+ mdc->md_blocks = iatt->ia_blocks;
+}
+
+
+void
+mdc_to_iatt (struct md_cache *mdc, struct iatt *iatt)
+{
+ iatt->ia_prot = mdc->md_prot;
+ iatt->ia_nlink = mdc->md_nlink;
+ iatt->ia_uid = mdc->md_uid;
+ iatt->ia_gid = mdc->md_gid;
+ iatt->ia_atime = mdc->md_atime;
+ iatt->ia_atime_nsec = mdc->md_atime_nsec;
+ iatt->ia_mtime = mdc->md_mtime;
+ iatt->ia_mtime_nsec = mdc->md_mtime_nsec;
+ iatt->ia_ctime = mdc->md_ctime;
+ iatt->ia_ctime_nsec = mdc->md_ctime_nsec;
+ iatt->ia_rdev = mdc->md_rdev;
+ iatt->ia_size = mdc->md_size;
+ iatt->ia_blocks = mdc->md_blocks;
+}
+
+
+int
+mdc_inode_iatt_set_validate(xlator_t *this, inode_t *inode, struct iatt *prebuf,
+ struct iatt *iatt)
+{
+ int ret = 0;
+ struct md_cache *mdc = NULL;
+
+ mdc = mdc_inode_prep (this, inode);
+ if (!mdc) {
+ ret = -1;
+ goto out;
+ }
+
+ LOCK (&mdc->lock);
+ {
+ if (!iatt || !iatt->ia_ctime) {
+ mdc->ia_time = 0;
+ goto unlock;
+ }
+
+ /* There could be a race in invalidation, where the
+ * invalidations in order A, B reaches md-cache in the order
+ * B, A. Hence, make sure the invalidation A is discarded if
+ * it comes after B. ctime of a file is always in ascending
+ * order unlike atime and mtime(which can be changed by user
+ * to any date), also ctime gets updates when atime/mtime
+ * changes, hence check for ctime only.
+ */
+ if (mdc->md_ctime > iatt->ia_ctime) {
+ gf_msg_callingfn (this->name, GF_LOG_DEBUG, EINVAL,
+ MD_CACHE_MSG_DISCARD_UPDATE,
+ "discarding the iatt validate "
+ "request");
+ ret = -1;
+ goto unlock;
+
+ }
+ if ((mdc->md_ctime == iatt->ia_ctime) &&
+ (mdc->md_ctime_nsec > iatt->ia_ctime_nsec)) {
+ gf_msg_callingfn (this->name, GF_LOG_DEBUG, EINVAL,
+ MD_CACHE_MSG_DISCARD_UPDATE,
+ "discarding the iatt validate "
+ "request(ctime_nsec)");
+ ret = -1;
+ goto unlock;
+ }
+
+ /*
+ * Invalidate the inode if the mtime or ctime has changed
+ * and the prebuf doesn't match the value we have cached.
+ * TODO: writev returns with a NULL iatt due to
+ * performance/write-behind, causing invalidation on writes.
+ */
+ if (IA_ISREG(inode->ia_type) &&
+ ((iatt->ia_mtime != mdc->md_mtime) ||
+ (iatt->ia_mtime_nsec != mdc->md_mtime_nsec) ||
+ (iatt->ia_ctime != mdc->md_ctime) ||
+ (iatt->ia_ctime_nsec != mdc->md_ctime_nsec)))
+ if (!prebuf || (prebuf->ia_ctime != mdc->md_ctime) ||
+ (prebuf->ia_ctime_nsec != mdc->md_ctime_nsec) ||
+ (prebuf->ia_mtime != mdc->md_mtime) ||
+ (prebuf->ia_mtime_nsec != mdc->md_mtime_nsec))
+ inode_invalidate(inode);
+
+ mdc_from_iatt (mdc, iatt);
+
+ time (&mdc->ia_time);
+ }
+unlock:
+ UNLOCK (&mdc->lock);
+
+out:
+ return ret;
+}
+
+int mdc_inode_iatt_set(xlator_t *this, inode_t *inode, struct iatt *iatt)
+{
+ return mdc_inode_iatt_set_validate(this, inode, NULL, iatt);
+}
+
+int
+mdc_inode_iatt_get (xlator_t *this, inode_t *inode, struct iatt *iatt)
+{
+ int ret = -1;
+ struct md_cache *mdc = NULL;
+
+ if (mdc_inode_ctx_get (this, inode, &mdc) != 0)
+ goto out;
+
+ if (!is_md_cache_iatt_valid (this, mdc))
+ goto out;
+
+ LOCK (&mdc->lock);
+ {
+ mdc_to_iatt (mdc, iatt);
+ }
+ UNLOCK (&mdc->lock);
+
+ gf_uuid_copy (iatt->ia_gfid, inode->gfid);
+ iatt->ia_ino = gfid_to_ino (inode->gfid);
+ iatt->ia_dev = 42;
+ iatt->ia_type = inode->ia_type;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+struct updatedict {
+ dict_t *dict;
+ int ret;
+};
+
+static int
+updatefn(dict_t *dict, char *key, data_t *value, void *data)
+{
+ struct updatedict *u = data;
+ const char *mdc_key;
+ int i = 0;
+
+ for (mdc_key = mdc_keys[i].name; (mdc_key = mdc_keys[i].name); i++) {
+ if (!mdc_keys[i].check)
+ continue;
+ if (strcmp(mdc_key, key))
+ continue;
+
+ if (!u->dict) {
+ u->dict = dict_new();
+ if (!u->dict) {
+ u->ret = -1;
+ return -1;
+ }
+ }
+
+ /* posix xlator as part of listxattr will send both names
+ * and values of the xattrs in the dict. But as per man page
+ * listxattr is mainly supposed to send names of the all the
+ * xattrs. gfapi, as of now will put all the keys it obtained
+ * in the dict (sent by posix) into a buffer provided by the
+ * caller (thus the values of those xattrs are lost). If some
+ * xlator makes gfapi based calls (ex: snapview-server), then
+ * it has to unwind the calls by putting those names it got
+ * in the buffer again into the dict. But now it would not be
+ * having the values for those xattrs. So it might just put
+ * a 0 byte value ("") into the dict for each xattr and unwind
+ * the call. So the xlators which cache the xattrs (as of now
+ * md-cache caches the acl and selinux related xattrs), should
+ * not update their cache if the value of a xattr is a 0 byte
+ * data (i.e. "").
+ */
+ if (!strcmp (value->data, ""))
+ continue;
+
+ if (dict_set(u->dict, key, value) < 0) {
+ u->ret = -1;
+ return -1;
+ }
+
+ break;
+ }
+ return 0;
+}
+
+static int
+mdc_dict_update(dict_t **tgt, dict_t *src)
+{
+ struct updatedict u = {
+ .dict = *tgt,
+ .ret = 0,
+ };
+
+ dict_foreach(src, updatefn, &u);
+
+ if (*tgt)
+ return u.ret;
+
+ if ((u.ret < 0) && u.dict) {
+ dict_unref(u.dict);
+ return u.ret;
+ }
+
+ *tgt = u.dict;
+
+ return u.ret;
+}
+
+int
+mdc_inode_xatt_set (xlator_t *this, inode_t *inode, dict_t *dict)
+{
+ int ret = -1;
+ struct md_cache *mdc = NULL;
+ dict_t *newdict = NULL;
+
+ mdc = mdc_inode_prep (this, inode);
+ if (!mdc)
+ goto out;
+
+ if (!dict)
+ goto out;
+
+ LOCK (&mdc->lock);
+ {
+ if (mdc->xattr) {
+ dict_unref (mdc->xattr);
+ mdc->xattr = NULL;
+ }
+
+ ret = mdc_dict_update(&newdict, dict);
+ if (ret < 0) {
+ UNLOCK(&mdc->lock);
+ goto out;
+ }
+
+ if (newdict)
+ mdc->xattr = newdict;
+
+ time (&mdc->xa_time);
+ }
+ UNLOCK (&mdc->lock);
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int
+mdc_inode_xatt_update (xlator_t *this, inode_t *inode, dict_t *dict)
+{
+ int ret = -1;
+ struct md_cache *mdc = NULL;
+
+ mdc = mdc_inode_prep (this, inode);
+ if (!mdc)
+ goto out;
+
+ if (!dict)
+ goto out;
+
+ LOCK (&mdc->lock);
+ {
+ ret = mdc_dict_update(&mdc->xattr, dict);
+ if (ret < 0) {
+ UNLOCK(&mdc->lock);
+ goto out;
+ }
+
+ time (&mdc->xa_time);
+ }
+ UNLOCK (&mdc->lock);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int
+mdc_inode_xatt_unset (xlator_t *this, inode_t *inode, char *name)
+{
+ int ret = -1;
+ struct md_cache *mdc = NULL;
+
+ mdc = mdc_inode_prep (this, inode);
+ if (!mdc)
+ goto out;
+
+ if (!name || !mdc->xattr)
+ goto out;
+
+ LOCK (&mdc->lock);
+ {
+ dict_del (mdc->xattr, name);
+ }
+ UNLOCK (&mdc->lock);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int
+mdc_inode_xatt_get (xlator_t *this, inode_t *inode, dict_t **dict)
+{
+ int ret = -1;
+ struct md_cache *mdc = NULL;
+
+ if (mdc_inode_ctx_get (this, inode, &mdc) != 0)
+ goto out;
+
+ if (!is_md_cache_xatt_valid (this, mdc))
+ goto out;
+
+ LOCK (&mdc->lock);
+ {
+ ret = 0;
+ /* Missing xattr only means no keys were there, i.e
+ a negative cache for the "loaded" keys
+ */
+ if (!mdc->xattr)
+ goto unlock;
+
+ if (dict)
+ *dict = dict_ref (mdc->xattr);
+ }
+unlock:
+ UNLOCK (&mdc->lock);
+
+out:
+ return ret;
+}
+
+
+int
+mdc_inode_iatt_invalidate (xlator_t *this, inode_t *inode)
+{
+ int ret = -1;
+ struct md_cache *mdc = NULL;
+
+ if (mdc_inode_ctx_get (this, inode, &mdc) != 0)
+ goto out;
+
+ LOCK (&mdc->lock);
+ {
+ mdc->ia_time = 0;
+ }
+ UNLOCK (&mdc->lock);
+
+out:
+ return ret;
+}
+
+
+int
+mdc_inode_xatt_invalidate (xlator_t *this, inode_t *inode)
+{
+ int ret = -1;
+ struct md_cache *mdc = NULL;
+
+ if (mdc_inode_ctx_get (this, inode, &mdc) != 0)
+ goto out;
+
+ LOCK (&mdc->lock);
+ {
+ mdc->xa_time = 0;
+ }
+ UNLOCK (&mdc->lock);
+
+out:
+ return ret;
+}
+
+
+void
+mdc_load_reqs (xlator_t *this, dict_t *dict)
+{
+ const char *mdc_key = NULL;
+ int i = 0;
+ int ret = 0;
+
+ for (mdc_key = mdc_keys[i].name; (mdc_key = mdc_keys[i].name); i++) {
+ if (!mdc_keys[i].load)
+ continue;
+ ret = dict_set_int8 (dict, (char *)mdc_key, 0);
+ if (ret)
+ return;
+ }
+}
+
+
+static char*
+mdc_serialize_loaded_key_names (xlator_t *this)
+{
+ int max_len = 0;
+ int len = 0;
+ int i = 0;
+ char *mdc_key_names = NULL;
+ const char *mdc_key = NULL;
+ gf_boolean_t at_least_one_key_loaded = _gf_false;
+
+ for (mdc_key = mdc_keys[i].name; (mdc_key = mdc_keys[i].name); i++) {
+ max_len += (strlen(mdc_keys[i].name) + 1);
+ if (mdc_keys[i].load)
+ at_least_one_key_loaded = _gf_true;
+ }
+
+ if (!at_least_one_key_loaded)
+ goto out;
+
+ mdc_key_names = GF_CALLOC (1, max_len + 1, gf_common_mt_char);
+ if (!mdc_key_names)
+ goto out;
+
+ i = 0;
+ for (mdc_key = mdc_keys[i].name; (mdc_key = mdc_keys[i].name); i++) {
+ if (!mdc_keys[i].load)
+ continue;
+ strcat (mdc_key_names, mdc_keys[i].name);
+ strcat (mdc_key_names, " ");
+ }
+
+ len = strlen (mdc_key_names);
+ if (len > 0) {
+ mdc_key_names[len - 1] = '\0';
+ } else {
+ GF_FREE (mdc_key_names);
+ mdc_key_names = NULL;
+ }
+
+out:
+ return mdc_key_names;
+}
+
+
+struct checkpair {
+ int ret;
+ dict_t *rsp;
+};
+
+
+static int
+is_mdc_key_satisfied (const char *key)
+{
+ const char *mdc_key = NULL;
+ int i = 0;
+
+ if (!key)
+ return 0;
+
+ for (mdc_key = mdc_keys[i].name; (mdc_key = mdc_keys[i].name); i++) {
+ if (!mdc_keys[i].load)
+ continue;
+ if (strcmp (mdc_key, key) == 0)
+ return 1;
+ }
+
+ return 0;
+}
+
+
+static int
+checkfn (dict_t *this, char *key, data_t *value, void *data)
+{
+ struct checkpair *pair = data;
+
+ if (!is_mdc_key_satisfied (key))
+ pair->ret = 0;
+
+ return 0;
+}
+
+
+int
+mdc_xattr_satisfied (xlator_t *this, dict_t *req, dict_t *rsp)
+{
+ struct checkpair pair = {
+ .ret = 1,
+ .rsp = rsp,
+ };
+
+ dict_foreach (req, checkfn, &pair);
+
+ return pair.ret;
+}
+
+
+int
+mdc_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *dict, struct iatt *postparent)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ if (local->loc.parent) {
+ mdc_inode_iatt_set (this, local->loc.parent, postparent);
+ }
+
+ if (local->loc.inode) {
+ mdc_inode_iatt_set (this, local->loc.inode, stbuf);
+ mdc_inode_xatt_set (this, local->loc.inode, dict);
+ }
+out:
+ MDC_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, stbuf,
+ dict, postparent);
+ return 0;
+}
+
+
+int
+mdc_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ int ret = 0;
+ struct iatt stbuf = {0, };
+ struct iatt postparent = {0, };
+ dict_t *xattr_rsp = NULL;
+ dict_t *xattr_alloc = NULL;
+ mdc_local_t *local = NULL;
+
+
+ local = mdc_local_get (frame);
+ if (!local)
+ goto uncached;
+
+ loc_copy (&local->loc, loc);
+
+ if (!loc->name)
+ /* A nameless discovery is dangerous to serve from cache. We
+ perform nameless lookup with the intention of
+ re-establishing an inode "properly"
+ */
+ goto uncached;
+
+ ret = mdc_inode_iatt_get (this, loc->inode, &stbuf);
+ if (ret != 0)
+ goto uncached;
+
+ if (xdata) {
+ ret = mdc_inode_xatt_get (this, loc->inode, &xattr_rsp);
+ if (ret != 0)
+ goto uncached;
+
+ if (!mdc_xattr_satisfied (this, xdata, xattr_rsp))
+ goto uncached;
+ }
+
+ MDC_STACK_UNWIND (lookup, frame, 0, 0, loc->inode, &stbuf,
+ xattr_rsp, &postparent);
+
+ if (xattr_rsp)
+ dict_unref (xattr_rsp);
+
+ return 0;
+
+uncached:
+ if (!xdata)
+ xdata = xattr_alloc = dict_new ();
+ if (xdata)
+ mdc_load_reqs (this, xdata);
+
+ STACK_WIND (frame, mdc_lookup_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->lookup, loc, xdata);
+
+ if (xattr_rsp)
+ dict_unref (xattr_rsp);
+ if (xattr_alloc)
+ dict_unref (xattr_alloc);
+ return 0;
+}
+
+
+int
+mdc_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ if (op_ret != 0)
+ goto out;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ mdc_inode_iatt_set (this, local->loc.inode, buf);
+
+out:
+ MDC_STACK_UNWIND (stat, frame, op_ret, op_errno, buf, xdata);
+
+ return 0;
+}
+
+
+int
+mdc_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ int ret;
+ struct iatt stbuf;
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+ if (!local)
+ goto uncached;
+
+ loc_copy (&local->loc, loc);
+
+ ret = mdc_inode_iatt_get (this, loc->inode, &stbuf);
+ if (ret != 0)
+ goto uncached;
+
+ MDC_STACK_UNWIND (stat, frame, 0, 0, &stbuf, xdata);
+
+ return 0;
+
+uncached:
+ STACK_WIND (frame, mdc_stat_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->stat,
+ loc, xdata);
+ return 0;
+}
+
+
+int
+mdc_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ if (op_ret != 0)
+ goto out;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ mdc_inode_iatt_set (this, local->fd->inode, buf);
+
+out:
+ MDC_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf, xdata);
+
+ return 0;
+}
+
+
+int
+mdc_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ int ret;
+ struct iatt stbuf;
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+ if (!local)
+ goto uncached;
+
+ local->fd = fd_ref (fd);
+
+ ret = mdc_inode_iatt_get (this, fd->inode, &stbuf);
+ if (ret != 0)
+ goto uncached;
+
+ MDC_STACK_UNWIND (fstat, frame, 0, 0, &stbuf, xdata);
+
+ return 0;
+
+uncached:
+ STACK_WIND (frame, mdc_fstat_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fstat,
+ fd, xdata);
+ return 0;
+}
+
+
+int
+mdc_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ mdc_inode_iatt_set_validate(this, local->loc.inode, prebuf, postbuf);
+
+out:
+ MDC_STACK_UNWIND (truncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ return 0;
+}
+
+
+int
+mdc_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ off_t offset, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+
+ local->loc.inode = inode_ref (loc->inode);
+
+ STACK_WIND (frame, mdc_truncate_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->truncate,
+ loc, offset, xdata);
+ return 0;
+}
+
+
+int
+mdc_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf);
+
+out:
+ MDC_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ return 0;
+}
+
+
+int
+mdc_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+
+ local->fd = fd_ref (fd);
+
+ STACK_WIND (frame, mdc_ftruncate_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->ftruncate,
+ fd, offset, xdata);
+ return 0;
+}
+
+
+int
+mdc_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ if (local->loc.parent) {
+ mdc_inode_iatt_set (this, local->loc.parent, postparent);
+ }
+
+ if (local->loc.inode) {
+ mdc_inode_iatt_set (this, local->loc.inode, buf);
+ mdc_inode_xatt_set (this, local->loc.inode, local->xattr);
+ }
+out:
+ MDC_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+
+int
+mdc_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+
+ loc_copy (&local->loc, loc);
+ local->xattr = dict_ref (xdata);
+
+ STACK_WIND (frame, mdc_mknod_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod,
+ loc, mode, rdev, umask, xdata);
+ return 0;
+}
+
+
+int
+mdc_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ if (local->loc.parent) {
+ mdc_inode_iatt_set (this, local->loc.parent, postparent);
+ }
+
+ if (local->loc.inode) {
+ mdc_inode_iatt_set (this, local->loc.inode, buf);
+ mdc_inode_xatt_set (this, local->loc.inode, local->xattr);
+ }
+out:
+ MDC_STACK_UNWIND (mkdir, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+
+int
+mdc_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, mode_t umask, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+
+ loc_copy (&local->loc, loc);
+ local->xattr = dict_ref (xdata);
+
+ STACK_WIND (frame, mdc_mkdir_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir,
+ loc, mode, umask, xdata);
+ return 0;
+}
+
+
+int
+mdc_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ if (local->loc.parent) {
+ mdc_inode_iatt_set (this, local->loc.parent, postparent);
+ }
+
+ if (local->loc.inode) {
+ mdc_inode_iatt_set (this, local->loc.inode, NULL);
+ }
+
+out:
+ MDC_STACK_UNWIND (unlink, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+
+int
+mdc_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t xflag,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+
+ loc_copy (&local->loc, loc);
+
+ STACK_WIND (frame, mdc_unlink_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink,
+ loc, xflag, xdata);
+ return 0;
+}
+
+
+int
+mdc_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ if (local->loc.parent) {
+ mdc_inode_iatt_set (this, local->loc.parent, postparent);
+ }
+
+out:
+ MDC_STACK_UNWIND (rmdir, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+
+int
+mdc_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flag,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+
+ loc_copy (&local->loc, loc);
+
+ STACK_WIND (frame, mdc_rmdir_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->rmdir,
+ loc, flag, xdata);
+ return 0;
+}
+
+
+int
+mdc_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ if (local->loc.parent) {
+ mdc_inode_iatt_set (this, local->loc.parent, postparent);
+ }
+
+ if (local->loc.inode) {
+ mdc_inode_iatt_set (this, local->loc.inode, buf);
+ }
+out:
+ MDC_STACK_UNWIND (symlink, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+
+int
+mdc_symlink (call_frame_t *frame, xlator_t *this, const char *linkname,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+
+ loc_copy (&local->loc, loc);
+
+ local->linkname = gf_strdup (linkname);
+
+ STACK_WIND (frame, mdc_symlink_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->symlink,
+ linkname, loc, umask, xdata);
+ return 0;
+}
+
+
+int
+mdc_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ if (local->loc.parent) {
+ mdc_inode_iatt_set (this, local->loc.parent, postoldparent);
+ }
+
+ if (local->loc.inode) {
+ /* TODO: fix dht_rename() not to return linkfile
+ attributes before setting attributes here
+ */
+
+ mdc_inode_iatt_set (this, local->loc.inode, NULL);
+ }
+
+ if (local->loc2.parent) {
+ mdc_inode_iatt_set (this, local->loc2.parent, postnewparent);
+ }
+out:
+ MDC_STACK_UNWIND (rename, frame, op_ret, op_errno, buf,
+ preoldparent, postoldparent, prenewparent,
+ postnewparent, xdata);
+ return 0;
+}
+
+
+int
+mdc_rename (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+
+ loc_copy (&local->loc, oldloc);
+ loc_copy (&local->loc2, newloc);
+
+ STACK_WIND (frame, mdc_rename_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->rename,
+ oldloc, newloc, xdata);
+ return 0;
+}
+
+
+int
+mdc_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ if (local->loc.inode) {
+ mdc_inode_iatt_set (this, local->loc.inode, buf);
+ }
+
+ if (local->loc2.parent) {
+ mdc_inode_iatt_set (this, local->loc2.parent, postparent);
+ }
+out:
+ MDC_STACK_UNWIND (link, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+
+int
+mdc_link (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+
+ loc_copy (&local->loc, oldloc);
+ loc_copy (&local->loc2, newloc);
+
+ STACK_WIND (frame, mdc_link_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->link,
+ oldloc, newloc, xdata);
+ return 0;
+}
+
+
+int
+mdc_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ if (local->loc.parent) {
+ mdc_inode_iatt_set (this, local->loc.parent, postparent);
+ }
+
+ if (local->loc.inode) {
+ mdc_inode_iatt_set (this, inode, buf);
+ mdc_inode_xatt_set (this, local->loc.inode, local->xattr);
+ }
+out:
+ MDC_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+
+int
+mdc_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+
+ loc_copy (&local->loc, loc);
+ local->xattr = dict_ref (xdata);
+
+ STACK_WIND (frame, mdc_create_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->create,
+ loc, flags, mode, umask, fd, xdata);
+ return 0;
+}
+
+
+int
+mdc_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iovec *vector, int32_t count,
+ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret < 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ mdc_inode_iatt_set (this, local->fd->inode, stbuf);
+
+out:
+ MDC_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count,
+ stbuf, iobref, xdata);
+
+ return 0;
+}
+
+
+int
+mdc_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+
+ local->fd = fd_ref (fd);
+
+ STACK_WIND (frame, mdc_readv_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv,
+ fd, size, offset, flags, xdata);
+ return 0;
+}
+
+
+int
+mdc_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret == -1)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf);
+
+out:
+ MDC_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ return 0;
+}
+
+
+int
+mdc_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+
+ local->fd = fd_ref (fd);
+
+ STACK_WIND (frame, mdc_writev_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev,
+ fd, vector, count, offset, flags, iobref, xdata);
+ return 0;
+}
+
+
+int
+mdc_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0) {
+ mdc_inode_iatt_set (this, local->loc.inode, NULL);
+ goto out;
+ }
+
+ if (!local)
+ goto out;
+
+ mdc_inode_iatt_set_validate(this, local->loc.inode, prebuf, postbuf);
+
+out:
+ MDC_STACK_UNWIND (setattr, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ return 0;
+}
+
+
+int
+mdc_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int valid, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+
+ loc_copy (&local->loc, loc);
+
+ STACK_WIND (frame, mdc_setattr_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->setattr,
+ loc, stbuf, valid, xdata);
+ return 0;
+}
+
+
+int
+mdc_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf);
+
+out:
+ MDC_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ return 0;
+}
+
+
+int
+mdc_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int valid, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+
+ local->fd = fd_ref (fd);
+
+ STACK_WIND (frame, mdc_fsetattr_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsetattr,
+ fd, stbuf, valid, xdata);
+ return 0;
+}
+
+
+int
+mdc_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf);
+
+out:
+ MDC_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ return 0;
+}
+
+
+int
+mdc_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+
+ local->fd = fd_ref (fd);
+
+ STACK_WIND (frame, mdc_fsync_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync,
+ fd, datasync, xdata);
+ return 0;
+}
+
+
+int
+mdc_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ mdc_inode_xatt_update (this, local->loc.inode, local->xattr);
+
+ mdc_inode_iatt_invalidate (this, local->loc.inode);
+
+out:
+ MDC_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+
+int
+mdc_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xattr, int flags, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+
+ loc_copy (&local->loc, loc);
+ local->xattr = dict_ref (xattr);
+
+ STACK_WIND (frame, mdc_setxattr_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr,
+ loc, xattr, flags, xdata);
+ return 0;
+}
+
+
+int
+mdc_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ mdc_inode_xatt_update (this, local->fd->inode, local->xattr);
+
+ mdc_inode_iatt_invalidate (this, local->fd->inode);
+out:
+ MDC_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+
+int
+mdc_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *xattr, int flags, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+
+ local->fd = fd_ref (fd);
+ local->xattr = dict_ref (xattr);
+
+ STACK_WIND (frame, mdc_fsetxattr_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsetxattr,
+ fd, xattr, flags, xdata);
+ return 0;
+}
+
+int
+mdc_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ if (op_ret < 0)
+ goto out;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ mdc_inode_xatt_update (this, local->loc.inode, xattr);
+
+out:
+ MDC_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr, xdata);
+
+ return 0;
+}
+
+
+int
+mdc_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key,
+ dict_t *xdata)
+{
+ int ret;
+ int op_errno = ENODATA;
+ mdc_local_t *local = NULL;
+ dict_t *xattr = NULL;
+
+ local = mdc_local_get (frame);
+ if (!local)
+ goto uncached;
+
+ loc_copy (&local->loc, loc);
+
+ if (!is_mdc_key_satisfied (key))
+ goto uncached;
+
+ ret = mdc_inode_xatt_get (this, loc->inode, &xattr);
+ if (ret != 0)
+ goto uncached;
+
+ if (!xattr || !dict_get (xattr, (char *)key)) {
+ ret = -1;
+ op_errno = ENODATA;
+ }
+
+ MDC_STACK_UNWIND (getxattr, frame, ret, op_errno, xattr, xdata);
+
+ return 0;
+
+uncached:
+ STACK_WIND (frame, mdc_getxattr_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->getxattr,
+ loc, key, xdata);
+ return 0;
+}
+
+
+int
+mdc_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ if (op_ret < 0)
+ goto out;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ mdc_inode_xatt_update (this, local->fd->inode, xattr);
+
+out:
+ MDC_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, xattr, xdata);
+
+ return 0;
+}
+
+
+int
+mdc_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key,
+ dict_t *xdata)
+{
+ int ret;
+ mdc_local_t *local = NULL;
+ dict_t *xattr = NULL;
+ int op_errno = ENODATA;
+
+ local = mdc_local_get (frame);
+ if (!local)
+ goto uncached;
+
+ local->fd = fd_ref (fd);
+
+ if (!is_mdc_key_satisfied (key))
+ goto uncached;
+
+ ret = mdc_inode_xatt_get (this, fd->inode, &xattr);
+ if (ret != 0)
+ goto uncached;
+
+ if (!xattr || !dict_get (xattr, (char *)key)) {
+ ret = -1;
+ op_errno = ENODATA;
+ }
+
+ MDC_STACK_UNWIND (fgetxattr, frame, ret, op_errno, xattr, xdata);
+
+ return 0;
+
+uncached:
+ STACK_WIND (frame, mdc_fgetxattr_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fgetxattr,
+ fd, key, xdata);
+ return 0;
+}
+
+int
+mdc_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ if (local->key)
+ mdc_inode_xatt_unset (this, local->loc.inode, local->key);
+ else
+ mdc_inode_xatt_invalidate (this, local->loc.inode);
+
+ mdc_inode_iatt_invalidate (this, local->loc.inode);
+out:
+ MDC_STACK_UNWIND (removexattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+
+int
+mdc_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+
+ loc_copy (&local->loc, loc);
+
+ local->key = gf_strdup (name);
+
+ STACK_WIND (frame, mdc_removexattr_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->removexattr,
+ loc, name, xdata);
+ return 0;
+}
+
+
+int
+mdc_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ if (local->key)
+ mdc_inode_xatt_unset (this, local->fd->inode, local->key);
+ else
+ mdc_inode_xatt_invalidate (this, local->fd->inode);
+
+ mdc_inode_iatt_invalidate (this, local->fd->inode);
+out:
+ MDC_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+
+int
+mdc_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+
+ local->fd = fd_ref (fd);
+
+ local->key = gf_strdup (name);
+
+ STACK_WIND (frame, mdc_fremovexattr_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fremovexattr,
+ fd, name, xdata);
+ return 0;
+}
+
+
+int
+mdc_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ fd_t *fd, dict_t *xdata)
+{
+ int ret = -1;
+ char *mdc_key_names = NULL;
+ dict_t *xattr_alloc = NULL;
+
+ if (!xdata)
+ xdata = xattr_alloc = dict_new ();
+
+ if (xdata) {
+ /* Tell readdir-ahead to include these keys in xdata when it
+ * internally issues readdirp() in it's opendir_cbk */
+ mdc_key_names = mdc_serialize_loaded_key_names(this);
+ if (!mdc_key_names)
+ goto wind;
+ ret = dict_set_dynstr (xdata, GF_MDC_LOADED_KEY_NAMES,
+ mdc_key_names);
+ if (ret)
+ goto wind;
+ }
+
+wind:
+ STACK_WIND (frame, default_opendir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);
+
+ if (xattr_alloc)
+ dict_unref (xattr_alloc);
+
+ return 0;
+}
+
+
+int
+mdc_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries, dict_t *xdata)
+{
+ gf_dirent_t *entry = NULL;
+
+ if (op_ret <= 0)
+ goto unwind;
+
+ list_for_each_entry (entry, &entries->list, list) {
+ if (!entry->inode)
+ continue;
+ mdc_inode_iatt_set (this, entry->inode, &entry->d_stat);
+ mdc_inode_xatt_set (this, entry->inode, entry->dict);
+ }
+
+unwind:
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
+ return 0;
+}
+
+
+int
+mdc_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t offset, dict_t *xdata)
+{
+ dict_t *xattr_alloc = NULL;
+
+ if (!xdata)
+ xdata = xattr_alloc = dict_new ();
+ if (xdata)
+ mdc_load_reqs (this, xdata);
+
+ STACK_WIND (frame, mdc_readdirp_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->readdirp,
+ fd, size, offset, xdata);
+ if (xattr_alloc)
+ dict_unref (xattr_alloc);
+ return 0;
+}
+
+int
+mdc_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, gf_dirent_t *entries, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, entries, xdata);
+ return 0;
+}
+
+int
+mdc_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t offset, dict_t *xdata)
+{
+ int need_unref = 0;
+ struct mdc_conf *conf = this->private;
+
+ if (!conf->force_readdirp) {
+ STACK_WIND(frame, mdc_readdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdir, fd, size, offset,
+ xdata);
+ return 0;
+ }
+
+ if (!xdata) {
+ xdata = dict_new ();
+ need_unref = 1;
+ }
+
+ if (xdata)
+ mdc_load_reqs (this, xdata);
+
+ STACK_WIND(frame, mdc_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, offset,
+ xdata);
+
+ if (need_unref && xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+mdc_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf);
+
+out:
+ MDC_STACK_UNWIND (fallocate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ return 0;
+}
+
+int mdc_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ mdc_local_t *local;
+
+ local = mdc_local_get(frame);
+ local->fd = fd_ref(fd);
+
+ STACK_WIND(frame, mdc_fallocate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len,
+ xdata);
+
+ return 0;
+}
+
+int
+mdc_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf);
+
+out:
+ MDC_STACK_UNWIND(discard, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ return 0;
+}
+
+int mdc_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ mdc_local_t *local;
+
+ local = mdc_local_get(frame);
+ local->fd = fd_ref(fd);
+
+ STACK_WIND(frame, mdc_discard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd, offset, len,
+ xdata);
+
+ return 0;
+}
+
+int
+mdc_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf);
+
+out:
+ MDC_STACK_UNWIND(zerofill, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ return 0;
+}
+
+int mdc_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ mdc_local_t *local;
+
+ local = mdc_local_get(frame);
+ local->fd = fd_ref(fd);
+
+ STACK_WIND(frame, mdc_zerofill_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset, len,
+ xdata);
+
+ return 0;
+}
+
+
+int
+mdc_forget (xlator_t *this, inode_t *inode)
+{
+ mdc_inode_wipe (this, inode);
+
+ return 0;
+}
+
+
+int
+is_strpfx (const char *str1, const char *str2)
+{
+ /* is one of the string a prefix of the other? */
+ int i;
+
+ for (i = 0; str1[i] == str2[i]; i++) {
+ if (!str1[i] || !str2[i])
+ break;
+ }
+
+ return !(str1[i] && str2[i]);
+}
+
+
+int
+mdc_key_load_set (struct mdc_key *keys, char *pattern, gf_boolean_t val)
+{
+ struct mdc_key *key = NULL;
+
+ for (key = keys; key->name; key++) {
+ if (is_strpfx (key->name, pattern))
+ key->load = val;
+ }
+
+ return 0;
+}
+
+struct set {
+ inode_t *inode;
+ xlator_t *this;
+};
+
+static int
+mdc_inval_xatt (dict_t *d, char *k, data_t *v, void *tmp)
+{
+ struct set *tmp1 = NULL;
+ int ret = 0;
+
+ tmp1 = (struct set *)tmp;
+ ret = mdc_inode_xatt_unset (tmp1->this, tmp1->inode, k);
+ return ret;
+}
+
+static int
+mdc_invalidate (xlator_t *this, void *data)
+{
+ struct gf_upcall *up_data = NULL;
+ struct gf_upcall_cache_invalidation *up_ci = NULL;
+ inode_t *inode = NULL;
+ int ret = 0;
+ struct set tmp = {0, };
+ inode_table_t *itable = NULL;
+
+ up_data = (struct gf_upcall *)data;
+
+ if (up_data->event_type != GF_UPCALL_CACHE_INVALIDATION)
+ goto out;
+
+ up_ci = (struct gf_upcall_cache_invalidation *)up_data->data;
+
+ itable = ((xlator_t *)this->graph->top)->itable;
+ inode = inode_find (itable, up_data->gfid);
+ if (!inode) {
+ ret = -1;
+ goto out;
+ }
+
+ if (up_ci->flags & IATT_UPDATE_FLAGS) {
+ ret = mdc_inode_iatt_set_validate (this, inode, NULL,
+ &up_ci->stat);
+ /* one of the scenarios where ret < 0 is when this invalidate
+ * is older than the current stat, in that case do not
+ * update the xattrs as well
+ */
+ if (ret < 0)
+ goto out;
+ }
+ if (up_ci->flags & UP_XATTR) {
+ ret = mdc_inode_xatt_update (this, inode, up_ci->dict);
+ } else if (up_ci->flags & UP_XATTR_RM) {
+ tmp.inode = inode;
+ tmp.this = this;
+ ret = dict_foreach (up_ci->dict, mdc_inval_xatt, &tmp);
+ }
+
+out:
+ if (inode)
+ inode_unref (inode);
+
+ return ret;
+}
+
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ struct mdc_conf *conf = NULL;
+ int timeout = 0;
+
+ conf = this->private;
+
+ GF_OPTION_RECONF ("md-cache-timeout", timeout, options, int32, out);
+
+ GF_OPTION_RECONF ("cache-selinux", conf->cache_selinux, options, bool, out);
+ mdc_key_load_set (mdc_keys, "security.", conf->cache_selinux);
+
+ GF_OPTION_RECONF ("cache-posix-acl", conf->cache_posix_acl, options, bool, out);
+ mdc_key_load_set (mdc_keys, "system.posix_acl_", conf->cache_posix_acl);
+ mdc_key_load_set (mdc_keys, "glusterfs.posix_acl.", conf->cache_posix_acl);
+
+ GF_OPTION_RECONF ("cache-swift-metadata", conf->cache_swift_metadata,
+ options, bool, out);
+ mdc_key_load_set (mdc_keys, "user.swift.metadata",
+ conf->cache_swift_metadata);
+
+ GF_OPTION_RECONF ("cache-samba-metadata", conf->cache_samba_metadata,
+ options, bool, out);
+ mdc_key_load_set (mdc_keys, "user.DOSATTRIB",
+ conf->cache_samba_metadata);
+ mdc_key_load_set (mdc_keys, "security.NTACL",
+ conf->cache_samba_metadata);
+
+ GF_OPTION_RECONF("force-readdirp", conf->force_readdirp, options, bool, out);
+ GF_OPTION_RECONF("cache-invalidation", conf->mdc_invalidation, options,
+ bool, out);
+
+ /* If timeout is greater than 60s (default before the patch that added
+ * cache invalidation support was added) then, cache invalidation
+ * feature for md-cache needs to be enabled, if not set timeout to the
+ * previous max which is 60s
+ */
+ if ((timeout > 60) && (!conf->mdc_invalidation)) {
+ conf->timeout = 60;
+ goto out;
+ }
+ conf->timeout = timeout;
+out:
+ return 0;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ ret = xlator_mem_acct_init (this, gf_mdc_mt_end + 1);
+ return ret;
+}
+
+int
+init (xlator_t *this)
+{
+ struct mdc_conf *conf = NULL;
+ int timeout = 0;
+
+ conf = GF_CALLOC (sizeof (*conf), 1, gf_mdc_mt_mdc_conf_t);
+ if (!conf) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ MD_CACHE_MSG_NO_MEMORY, "out of memory");
+ return -1;
+ }
+
+ GF_OPTION_INIT ("md-cache-timeout", timeout, int32, out);
+
+ GF_OPTION_INIT ("cache-selinux", conf->cache_selinux, bool, out);
+ mdc_key_load_set (mdc_keys, "security.", conf->cache_selinux);
+
+ GF_OPTION_INIT ("cache-posix-acl", conf->cache_posix_acl, bool, out);
+ mdc_key_load_set (mdc_keys, "system.posix_acl_", conf->cache_posix_acl);
+ mdc_key_load_set (mdc_keys, "glusterfs.posix_acl.", conf->cache_posix_acl);
+
+ GF_OPTION_INIT ("cache-swift-metadata",
+ conf->cache_swift_metadata, bool, out);
+ mdc_key_load_set (mdc_keys, "user.swift.metadata",
+ conf->cache_swift_metadata);
+
+ GF_OPTION_INIT ("cache-samba-metadata", conf->cache_samba_metadata,
+ bool, out);
+ mdc_key_load_set (mdc_keys, "user.DOSATTRIB",
+ conf->cache_samba_metadata);
+ mdc_key_load_set (mdc_keys, "security.NTACL",
+ conf->cache_samba_metadata);
+
+ GF_OPTION_INIT("force-readdirp", conf->force_readdirp, bool, out);
+ GF_OPTION_INIT("cache-invalidation", conf->mdc_invalidation, bool, out);
+
+ LOCK_INIT (&conf->lock);
+ time (&conf->last_child_down);
+
+ /* If timeout is greater than 60s (default before the patch that added
+ * cache invalidation support was added) then, cache invalidation
+ * feature for md-cache needs to be enabled, if not set timeout to the
+ * previous max which is 60s
+ */
+ if ((timeout > 60) && (!conf->mdc_invalidation)) {
+ conf->timeout = 60;
+ goto out;
+ }
+ conf->timeout = timeout;
+
+out:
+ this->private = conf;
+
+ return 0;
+}
+
+
+void
+mdc_update_child_down_time (xlator_t *this, time_t *now)
+{
+ struct mdc_conf *conf = NULL;
+
+ conf = this->private;
+
+ LOCK (&conf->lock);
+ {
+ conf->last_child_down = *now;
+ }
+ UNLOCK (&conf->lock);
+}
+
+
+int
+notify (xlator_t *this, int event, void *data, ...)
+{
+ int ret = 0;
+ struct mdc_conf *conf = NULL;
+ time_t now = 0;
+
+ conf = this->private;
+ switch (event) {
+ case GF_EVENT_CHILD_DOWN:
+ case GF_EVENT_SOME_CHILD_DOWN:
+ case GF_EVENT_CHILD_MODIFIED:
+ time (&now);
+ mdc_update_child_down_time (this, &now);
+ ret = default_notify (this, event, data);
+ break;
+ case GF_EVENT_UPCALL:
+ if (conf->mdc_invalidation)
+ ret = mdc_invalidate (this, data);
+ if (default_notify (this, event, data) != 0)
+ ret = -1;
+ break;
+ default:
+ ret = default_notify (this, event, data);
+ break;
+ }
+
+ return ret;
+}
+
+
+void
+fini (xlator_t *this)
+{
+ return;
+}
+
+
+struct xlator_fops fops = {
+ .lookup = mdc_lookup,
+ .stat = mdc_stat,
+ .fstat = mdc_fstat,
+ .truncate = mdc_truncate,
+ .ftruncate = mdc_ftruncate,
+ .mknod = mdc_mknod,
+ .mkdir = mdc_mkdir,
+ .unlink = mdc_unlink,
+ .rmdir = mdc_rmdir,
+ .symlink = mdc_symlink,
+ .rename = mdc_rename,
+ .link = mdc_link,
+ .create = mdc_create,
+ .readv = mdc_readv,
+ .writev = mdc_writev,
+ .setattr = mdc_setattr,
+ .fsetattr = mdc_fsetattr,
+ .fsync = mdc_fsync,
+ .setxattr = mdc_setxattr,
+ .fsetxattr = mdc_fsetxattr,
+ .getxattr = mdc_getxattr,
+ .fgetxattr = mdc_fgetxattr,
+ .removexattr = mdc_removexattr,
+ .fremovexattr= mdc_fremovexattr,
+ .opendir = mdc_opendir,
+ .readdirp = mdc_readdirp,
+ .readdir = mdc_readdir,
+ .fallocate = mdc_fallocate,
+ .discard = mdc_discard,
+ .zerofill = mdc_zerofill,
+};
+
+
+struct xlator_cbks cbks = {
+ .forget = mdc_forget,
+};
+
+struct volume_options options[] = {
+ { .key = {"cache-selinux"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ },
+ { .key = {"cache-swift-metadata"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "true",
+ .description = "Cache swift metadata (user.swift.metadata xattr)",
+ },
+ { .key = {"cache-samba-metadata"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .description = "Cache samba metadata (user.DOSATTRIB, security.NTACL"
+ " xattrs)",
+ },
+ { .key = {"cache-posix-acl"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ },
+ { .key = {"md-cache-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = 600,
+ .default_value = "1",
+ .description = "Time period after which cache has to be refreshed",
+ },
+ { .key = {"force-readdirp"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "true",
+ .description = "Convert all readdir requests to readdirplus to "
+ "collect stat info on each entry.",
+ },
+ { .key = {"cache-invalidation"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .description = "When \"on\", invalidates/updates the metadata cache "
+ "on receiving of the cache-invalidation notifications",
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/performance/open-behind/Makefile.am b/xlators/performance/open-behind/Makefile.am
new file mode 100644
index 00000000000..af437a64d6d
--- /dev/null
+++ b/xlators/performance/open-behind/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = src
diff --git a/xlators/performance/open-behind/src/Makefile.am b/xlators/performance/open-behind/src/Makefile.am
new file mode 100644
index 00000000000..58c3529bbef
--- /dev/null
+++ b/xlators/performance/open-behind/src/Makefile.am
@@ -0,0 +1,15 @@
+xlator_LTLIBRARIES = open-behind.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
+
+open_behind_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+open_behind_la_SOURCES = open-behind.c
+open_behind_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = open-behind-mem-types.h open-behind-messages.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/performance/open-behind/src/open-behind-mem-types.h b/xlators/performance/open-behind/src/open-behind-mem-types.h
new file mode 100644
index 00000000000..1e94296f424
--- /dev/null
+++ b/xlators/performance/open-behind/src/open-behind-mem-types.h
@@ -0,0 +1,21 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __OB_MEM_TYPES_H__
+#define __OB_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_ob_mem_types_ {
+ gf_ob_mt_fd_t = gf_common_mt_end + 1,
+ gf_ob_mt_conf_t,
+ gf_ob_mt_end
+};
+#endif
diff --git a/xlators/performance/open-behind/src/open-behind-messages.h b/xlators/performance/open-behind/src/open-behind-messages.h
new file mode 100644
index 00000000000..57e63ea4bbb
--- /dev/null
+++ b/xlators/performance/open-behind/src/open-behind-messages.h
@@ -0,0 +1,85 @@
+/*Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _OPEN_BEHIND_MESSAGES_H_
+#define _OPEN_BEHIND_MESSAGES_H_
+
+#include "glfs-message-id.h"
+
+/*! \file open-behind-messages.h
+ * \brief OPEN_BEHIND log-message IDs and their descriptions
+ *
+ */
+
+/* NOTE: Rules for message additions
+ * 1) Each instance of a message is _better_ left with a unique message ID, even
+ * if the message format is the same. Reasoning is that, if the message
+ * format needs to change in one instance, the other instances are not
+ * impacted or the new change does not change the ID of the instance being
+ * modified.
+ * 2) Addition of a message,
+ * - Should increment the GLFS_NUM_MESSAGES
+ * - Append to the list of messages defined, towards the end
+ * - Retain macro naming as glfs_msg_X (for redability across developers)
+ * NOTE: Rules for message format modifications
+ * 3) Check acorss the code if the message ID macro in question is reused
+ * anywhere. If reused then then the modifications should ensure correctness
+ * everywhere, or needs a new message ID as (1) above was not adhered to. If
+ * not used anywhere, proceed with the required modification.
+ * NOTE: Rules for message deletion
+ * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used
+ * anywhere, then can be deleted, but will leave a hole by design, as
+ * addition rules specify modification to the end of the list and not filling
+ * holes.
+ */
+
+#define GLFS_OPEN_BEHIND_BASE GLFS_MSGID_COMP_OPEN_BEHIND
+#define GLFS_OPEN_BEHIND_NUM_MESSAGES 3
+#define GLFS_MSGID_END (GLFS_OPEN_BEHIND_BASE + \
+ GLFS_OPEN_BEHIND_NUM_MESSAGES + 1)
+
+/* Messages with message IDs */
+#define glfs_msg_start_x GLFS_OPEN_BEHIND_BASE, "Invalid: Start of messages"
+
+
+
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define OPEN_BEHIND_MSG_XLATOR_CHILD_MISCONFIGURED (GLFS_OPEN_BEHIND_BASE + 1)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define OPEN_BEHIND_MSG_VOL_MISCONFIGURED (GLFS_OPEN_BEHIND_BASE + 2)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define OPEN_BEHIND_MSG_NO_MEMORY (GLFS_OPEN_BEHIND_BASE + 3)
+
+
+/*------------*/
+#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
+
+
+#endif /* _OPEN_BEHIND_MESSAGES_H_ */
diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c
new file mode 100644
index 00000000000..efab88582ff
--- /dev/null
+++ b/xlators/performance/open-behind/src/open-behind.c
@@ -0,0 +1,1026 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "open-behind-mem-types.h"
+#include "xlator.h"
+#include "statedump.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "open-behind-messages.h"
+
+typedef struct ob_conf {
+ gf_boolean_t use_anonymous_fd; /* use anonymous FDs wherever safe
+ e.g - fstat() readv()
+
+ whereas for fops like writev(), lk(),
+ the fd is important for side effects
+ like mandatory locks
+ */
+ gf_boolean_t lazy_open; /* delay backend open as much as possible */
+ gf_boolean_t read_after_open; /* instead of sending readvs on
+ anonymous fds, open the file
+ first and then send readv i.e
+ similar to what writev does
+ */
+} ob_conf_t;
+
+
+typedef struct ob_fd {
+ call_frame_t *open_frame;
+ loc_t loc;
+ dict_t *xdata;
+ int flags;
+ int op_errno;
+ struct list_head list;
+} ob_fd_t;
+
+
+ob_fd_t *
+__ob_fd_ctx_get (xlator_t *this, fd_t *fd)
+{
+ uint64_t value = 0;
+ int ret = -1;
+ ob_fd_t *ob_fd = NULL;
+
+ ret = __fd_ctx_get (fd, this, &value);
+ if (ret)
+ return NULL;
+
+ ob_fd = (void *) ((long) value);
+
+ return ob_fd;
+}
+
+
+ob_fd_t *
+ob_fd_ctx_get (xlator_t *this, fd_t *fd)
+{
+ ob_fd_t *ob_fd = NULL;
+
+ LOCK (&fd->lock);
+ {
+ ob_fd = __ob_fd_ctx_get (this, fd);
+ }
+ UNLOCK (&fd->lock);
+
+ return ob_fd;
+}
+
+
+int
+__ob_fd_ctx_set (xlator_t *this, fd_t *fd, ob_fd_t *ob_fd)
+{
+ uint64_t value = 0;
+ int ret = -1;
+
+ value = (long) ((void *) ob_fd);
+
+ ret = __fd_ctx_set (fd, this, value);
+
+ return ret;
+}
+
+
+int
+ob_fd_ctx_set (xlator_t *this, fd_t *fd, ob_fd_t *ob_fd)
+{
+ int ret = -1;
+
+ LOCK (&fd->lock);
+ {
+ ret = __ob_fd_ctx_set (this, fd, ob_fd);
+ }
+ UNLOCK (&fd->lock);
+
+ return ret;
+}
+
+
+ob_fd_t *
+ob_fd_new (void)
+{
+ ob_fd_t *ob_fd = NULL;
+
+ ob_fd = GF_CALLOC (1, sizeof (*ob_fd), gf_ob_mt_fd_t);
+
+ INIT_LIST_HEAD (&ob_fd->list);
+
+ return ob_fd;
+}
+
+
+void
+ob_fd_free (ob_fd_t *ob_fd)
+{
+ loc_wipe (&ob_fd->loc);
+
+ if (ob_fd->xdata)
+ dict_unref (ob_fd->xdata);
+
+ if (ob_fd->open_frame)
+ STACK_DESTROY (ob_fd->open_frame->root);
+
+ GF_FREE (ob_fd);
+}
+
+
+int
+ob_wake_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, fd_t *fd_ret, dict_t *xdata)
+{
+ fd_t *fd = NULL;
+ struct list_head list;
+ ob_fd_t *ob_fd = NULL;
+ call_stub_t *stub = NULL, *tmp = NULL;
+
+ fd = frame->local;
+ frame->local = NULL;
+
+ INIT_LIST_HEAD (&list);
+
+ LOCK (&fd->lock);
+ {
+ ob_fd = __ob_fd_ctx_get (this, fd);
+
+ list_splice_init (&ob_fd->list, &list);
+
+ if (op_ret < 0) {
+ /* mark fd BAD for ever */
+ ob_fd->op_errno = op_errno;
+ } else {
+ __fd_ctx_del (fd, this, NULL);
+ ob_fd_free (ob_fd);
+ }
+ }
+ UNLOCK (&fd->lock);
+
+ list_for_each_entry_safe (stub, tmp, &list, list) {
+ list_del_init (&stub->list);
+
+ if (op_ret < 0)
+ call_unwind_error (stub, -1, op_errno);
+ else
+ call_resume (stub);
+ }
+
+ fd_unref (fd);
+
+ STACK_DESTROY (frame->root);
+
+ return 0;
+}
+
+
+int
+ob_fd_wake (xlator_t *this, fd_t *fd)
+{
+ call_frame_t *frame = NULL;
+ ob_fd_t *ob_fd = NULL;
+
+ LOCK (&fd->lock);
+ {
+ ob_fd = __ob_fd_ctx_get (this, fd);
+ if (!ob_fd)
+ goto unlock;
+
+ frame = ob_fd->open_frame;
+ ob_fd->open_frame = NULL;
+ }
+unlock:
+ UNLOCK (&fd->lock);
+
+ if (frame) {
+ frame->local = fd_ref (fd);
+
+ STACK_WIND (frame, ob_wake_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->open,
+ &ob_fd->loc, ob_fd->flags, fd, ob_fd->xdata);
+ }
+
+ return 0;
+}
+
+
+int
+open_and_resume (xlator_t *this, fd_t *fd, call_stub_t *stub)
+{
+ ob_fd_t *ob_fd = NULL;
+ int op_errno = 0;
+
+ if (!fd)
+ goto nofd;
+
+ LOCK (&fd->lock);
+ {
+ ob_fd = __ob_fd_ctx_get (this, fd);
+ if (!ob_fd)
+ goto unlock;
+
+ if (ob_fd->op_errno) {
+ op_errno = ob_fd->op_errno;
+ goto unlock;
+ }
+
+ list_add_tail (&stub->list, &ob_fd->list);
+ }
+unlock:
+ UNLOCK (&fd->lock);
+
+nofd:
+ if (op_errno)
+ call_unwind_error (stub, -1, op_errno);
+ else if (ob_fd)
+ ob_fd_wake (this, fd);
+ else
+ call_resume (stub);
+
+ return 0;
+}
+
+
+int
+ob_open_behind (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ fd_t *fd, dict_t *xdata)
+{
+ ob_fd_t *ob_fd = NULL;
+ int ret = -1;
+ ob_conf_t *conf = NULL;
+
+
+ conf = this->private;
+
+ if (flags & O_TRUNC) {
+ STACK_WIND (frame, default_open_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->open,
+ loc, flags, fd, xdata);
+ return 0;
+ }
+
+ ob_fd = ob_fd_new ();
+ if (!ob_fd)
+ goto enomem;
+
+ ob_fd->open_frame = copy_frame (frame);
+ if (!ob_fd->open_frame)
+ goto enomem;
+ ret = loc_copy (&ob_fd->loc, loc);
+ if (ret)
+ goto enomem;
+
+ ob_fd->flags = flags;
+ if (xdata)
+ ob_fd->xdata = dict_ref (xdata);
+
+ ret = ob_fd_ctx_set (this, fd, ob_fd);
+ if (ret)
+ goto enomem;
+
+ fd_ref (fd);
+
+ STACK_UNWIND_STRICT (open, frame, 0, 0, fd, xdata);
+
+ if (!conf->lazy_open)
+ ob_fd_wake (this, fd);
+
+ fd_unref (fd);
+
+ return 0;
+enomem:
+ if (ob_fd) {
+ if (ob_fd->open_frame)
+ STACK_DESTROY (ob_fd->open_frame->root);
+ loc_wipe (&ob_fd->loc);
+ if (ob_fd->xdata)
+ dict_unref (ob_fd->xdata);
+ GF_FREE (ob_fd);
+ }
+
+ return -1;
+}
+
+
+int
+ob_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ fd_t *fd, dict_t *xdata)
+{
+ fd_t *old_fd = NULL;
+ int ret = -1;
+ int op_errno = 0;
+ call_stub_t *stub = NULL;
+
+ old_fd = fd_lookup (fd->inode, 0);
+ if (old_fd) {
+ /* open-behind only when this is the first FD */
+ stub = fop_open_stub (frame, default_open_resume,
+ loc, flags, fd, xdata);
+ if (!stub) {
+ op_errno = ENOMEM;
+ fd_unref (old_fd);
+ goto err;
+ }
+
+ open_and_resume (this, old_fd, stub);
+
+ fd_unref (old_fd);
+
+ return 0;
+ }
+
+ ret = ob_open_behind (frame, this, loc, flags, fd, xdata);
+ if (ret) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ return 0;
+err:
+ gf_msg (this->name, GF_LOG_ERROR, op_errno, OPEN_BEHIND_MSG_NO_MEMORY,
+ "%s", loc->path);
+
+ STACK_UNWIND_STRICT (open, frame, -1, op_errno, 0, 0);
+
+ return 0;
+}
+
+
+fd_t *
+ob_get_wind_fd (xlator_t *this, fd_t *fd)
+{
+ ob_conf_t *conf = NULL;
+ ob_fd_t *ob_fd = NULL;
+
+ conf = this->private;
+
+ ob_fd = ob_fd_ctx_get (this, fd);
+
+ if (ob_fd && conf->use_anonymous_fd)
+ return fd_anonymous (fd->inode);
+
+ return fd_ref (fd);
+}
+
+
+int
+ob_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ fd_t *wind_fd = NULL;
+ ob_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->read_after_open)
+ wind_fd = ob_get_wind_fd (this, fd);
+ else
+ wind_fd = fd_ref (fd);
+
+ stub = fop_readv_stub (frame, default_readv_resume, wind_fd,
+ size, offset, flags, xdata);
+ fd_unref (wind_fd);
+
+ if (!stub)
+ goto err;
+
+ open_and_resume (this, wind_fd, stub);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (readv, frame, -1, ENOMEM, 0, 0, 0, 0, 0);
+
+ return 0;
+}
+
+
+int
+ob_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *iov,
+ int count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ stub = fop_writev_stub (frame, default_writev_resume, fd, iov, count,
+ offset, flags, iobref, xdata);
+ if (!stub)
+ goto err;
+
+ open_and_resume (this, fd, stub);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (writev, frame, -1, ENOMEM, 0, 0, 0);
+
+ return 0;
+}
+
+
+int
+ob_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ fd_t *wind_fd = NULL;
+
+ wind_fd = ob_get_wind_fd (this, fd);
+
+ stub = fop_fstat_stub (frame, default_fstat_resume, wind_fd, xdata);
+
+ fd_unref (wind_fd);
+
+ if (!stub)
+ goto err;
+
+ open_and_resume (this, wind_fd, stub);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (fstat, frame, -1, ENOMEM, 0, 0);
+
+ return 0;
+}
+
+
+int
+ob_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ ob_fd_t *ob_fd = NULL;
+ gf_boolean_t unwind = _gf_false;
+
+ LOCK (&fd->lock);
+ {
+ ob_fd = __ob_fd_ctx_get (this, fd);
+ if (ob_fd && ob_fd->open_frame)
+ /* if open() was never wound to backend,
+ no need to wind flush() either.
+ */
+ unwind = _gf_true;
+ }
+ UNLOCK (&fd->lock);
+
+ if (unwind)
+ goto unwind;
+
+ stub = fop_flush_stub (frame, default_flush_resume, fd, xdata);
+ if (!stub)
+ goto err;
+
+ open_and_resume (this, fd, stub);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM, 0);
+
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT (flush, frame, 0, 0, 0);
+
+ return 0;
+}
+
+
+int
+ob_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int flag,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ stub = fop_fsync_stub (frame, default_fsync_resume, fd, flag, xdata);
+ if (!stub)
+ goto err;
+
+ open_and_resume (this, fd, stub);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM, 0, 0, 0);
+
+ return 0;
+}
+
+
+int
+ob_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd,
+ struct gf_flock *flock, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ stub = fop_lk_stub (frame, default_lk_resume, fd, cmd, flock, xdata);
+ if (!stub)
+ goto err;
+
+ open_and_resume (this, fd, stub);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (lk, frame, -1, ENOMEM, 0, 0);
+
+ return 0;
+}
+
+int
+ob_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ stub = fop_ftruncate_stub (frame, default_ftruncate_resume, fd, offset,
+ xdata);
+ if (!stub)
+ goto err;
+
+ open_and_resume (this, fd, stub);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM, 0, 0, 0);
+
+ return 0;
+}
+
+
+int
+ob_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xattr,
+ int flags, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ stub = fop_fsetxattr_stub (frame, default_fsetxattr_resume, fd, xattr,
+ flags, xdata);
+ if (!stub)
+ goto err;
+
+ open_and_resume (this, fd, stub);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (fsetxattr, frame, -1, ENOMEM, 0);
+
+ return 0;
+}
+
+
+int
+ob_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ stub = fop_fgetxattr_stub (frame, default_fgetxattr_resume, fd, name,
+ xdata);
+ if (!stub)
+ goto err;
+
+ open_and_resume (this, fd, stub);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (fgetxattr, frame, -1, ENOMEM, 0, 0);
+
+ return 0;
+}
+
+
+int
+ob_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ stub = fop_fremovexattr_stub (frame, default_fremovexattr_resume, fd,
+ name, xdata);
+ if (!stub)
+ goto err;
+
+ open_and_resume (this, fd, stub);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (fremovexattr, frame, -1, ENOMEM, 0);
+
+ return 0;
+}
+
+
+int
+ob_finodelk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ int cmd, struct gf_flock *flock, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ stub = fop_finodelk_stub (frame, default_finodelk_resume, volume, fd,
+ cmd, flock, xdata);
+ if (!stub)
+ goto err;
+
+ open_and_resume (this, fd, stub);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (finodelk, frame, -1, ENOMEM, 0);
+
+ return 0;
+}
+
+
+int
+ob_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ stub = fop_fentrylk_stub (frame, default_fentrylk_resume, volume, fd,
+ basename, cmd, type, xdata);
+ if (!stub)
+ goto err;
+
+ open_and_resume (this, fd, stub);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOMEM, 0);
+
+ return 0;
+}
+
+
+int
+ob_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ stub = fop_fxattrop_stub (frame, default_fxattrop_resume, fd, optype,
+ xattr, xdata);
+ if (!stub)
+ goto err;
+
+ open_and_resume (this, fd, stub);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (fxattrop, frame, -1, ENOMEM, 0, 0);
+
+ return 0;
+}
+
+
+int
+ob_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *iatt, int valid, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ stub = fop_fsetattr_stub (frame, default_fsetattr_resume, fd,
+ iatt, valid, xdata);
+ if (!stub)
+ goto err;
+
+ open_and_resume (this, fd, stub);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (fsetattr, frame, -1, ENOMEM, 0, 0, 0);
+
+ return 0;
+}
+
+int
+ob_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ call_stub_t *stub;
+
+ stub = fop_fallocate_stub(frame, default_fallocate_resume, fd, mode,
+ offset, len, xdata);
+ if (!stub)
+ goto err;
+
+ open_and_resume(this, fd, stub);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT(fallocate, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+ob_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ call_stub_t *stub;
+
+ stub = fop_discard_stub(frame, default_discard_resume, fd, offset, len,
+ xdata);
+ if (!stub)
+ goto err;
+
+ open_and_resume(this, fd, stub);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT(discard, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+ob_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ call_stub_t *stub;
+
+ stub = fop_zerofill_stub(frame, default_zerofill_resume, fd,
+ offset, len, xdata);
+ if (!stub)
+ goto err;
+
+ open_and_resume(this, fd, stub);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT(zerofill, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int
+ob_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags,
+ dict_t *xdata)
+{
+ fd_t *fd = NULL;
+ call_stub_t *stub = NULL;
+
+ stub = fop_unlink_stub (frame, default_unlink_resume, loc,
+ xflags, xdata);
+ if (!stub)
+ goto err;
+
+ fd = fd_lookup (loc->inode, 0);
+
+ open_and_resume (this, fd, stub);
+ if (fd)
+ fd_unref (fd);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (unlink, frame, -1, ENOMEM, 0, 0, 0);
+
+ return 0;
+}
+
+
+int
+ob_rename (call_frame_t *frame, xlator_t *this, loc_t *src, loc_t *dst,
+ dict_t *xdata)
+{
+ fd_t *fd = NULL;
+ call_stub_t *stub = NULL;
+
+ stub = fop_rename_stub (frame, default_rename_resume, src, dst, xdata);
+ if (!stub)
+ goto err;
+
+ if (dst->inode)
+ fd = fd_lookup (dst->inode, 0);
+
+ open_and_resume (this, fd, stub);
+ if (fd)
+ fd_unref (fd);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (rename, frame, -1, ENOMEM, 0, 0, 0, 0, 0, 0);
+
+ return 0;
+}
+
+
+int
+ob_release (xlator_t *this, fd_t *fd)
+{
+ ob_fd_t *ob_fd = NULL;
+
+ ob_fd = ob_fd_ctx_get (this, fd);
+
+ ob_fd_free (ob_fd);
+
+ return 0;
+}
+
+
+int
+ob_priv_dump (xlator_t *this)
+{
+ ob_conf_t *conf = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+
+ conf = this->private;
+
+ if (!conf)
+ return -1;
+
+ gf_proc_dump_build_key (key_prefix, "xlator.performance.open-behind",
+ "priv");
+
+ gf_proc_dump_add_section (key_prefix);
+
+ gf_proc_dump_write ("use_anonymous_fd", "%d", conf->use_anonymous_fd);
+
+ gf_proc_dump_write ("lazy_open", "%d", conf->lazy_open);
+
+ return 0;
+}
+
+
+int
+ob_fdctx_dump (xlator_t *this, fd_t *fd)
+{
+ ob_fd_t *ob_fd = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, };
+ int ret = 0;
+
+ ret = TRY_LOCK (&fd->lock);
+ if (ret)
+ return 0;
+
+ ob_fd = __ob_fd_ctx_get (this, fd);
+ if (!ob_fd) {
+ UNLOCK (&fd->lock);
+ return 0;
+ }
+
+ gf_proc_dump_build_key (key_prefix, "xlator.performance.open-behind",
+ "file");
+ gf_proc_dump_add_section (key_prefix);
+
+ gf_proc_dump_write ("fd", "%p", fd);
+
+ gf_proc_dump_write ("open_frame", "%p", ob_fd->open_frame);
+
+ if (ob_fd->open_frame)
+ gf_proc_dump_write ("open_frame.root.unique", "%p",
+ ob_fd->open_frame->root->unique);
+
+ gf_proc_dump_write ("loc.path", "%s", ob_fd->loc.path);
+
+ gf_proc_dump_write ("loc.ino", "%s", uuid_utoa (ob_fd->loc.gfid));
+
+ gf_proc_dump_write ("flags", "%d", ob_fd->flags);
+
+ UNLOCK (&fd->lock);
+
+ return 0;
+}
+
+
+int
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ ret = xlator_mem_acct_init (this, gf_ob_mt_end + 1);
+
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ OPEN_BEHIND_MSG_NO_MEMORY,
+ "Memory accounting failed");
+
+ return ret;
+}
+
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ ob_conf_t *conf = NULL;
+ int ret = -1;
+
+ conf = this->private;
+
+ GF_OPTION_RECONF ("use-anonymous-fd", conf->use_anonymous_fd, options,
+ bool, out);
+
+ GF_OPTION_RECONF ("lazy-open", conf->lazy_open, options, bool, out);
+ GF_OPTION_RECONF ("read-after-open", conf->read_after_open, options,
+ bool, out);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int
+init (xlator_t *this)
+{
+ ob_conf_t *conf = NULL;
+
+ if (!this->children || this->children->next) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ OPEN_BEHIND_MSG_XLATOR_CHILD_MISCONFIGURED,
+ "FATAL: volume (%s) not configured with exactly one "
+ "child", this->name);
+ return -1;
+ }
+
+ if (!this->parents)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ OPEN_BEHIND_MSG_VOL_MISCONFIGURED,
+ "dangling volume. check volfile ");
+
+ conf = GF_CALLOC (1, sizeof (*conf), gf_ob_mt_conf_t);
+ if (!conf)
+ goto err;
+
+ GF_OPTION_INIT ("use-anonymous-fd", conf->use_anonymous_fd, bool, err);
+
+ GF_OPTION_INIT ("lazy-open", conf->lazy_open, bool, err);
+ GF_OPTION_INIT ("read-after-open", conf->read_after_open, bool, err);
+ this->private = conf;
+
+ return 0;
+err:
+ if (conf)
+ GF_FREE (conf);
+
+ return -1;
+}
+
+
+void
+fini (xlator_t *this)
+{
+ ob_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ GF_FREE (conf);
+
+ return;
+}
+
+
+struct xlator_fops fops = {
+ .open = ob_open,
+ .readv = ob_readv,
+ .writev = ob_writev,
+ .flush = ob_flush,
+ .fsync = ob_fsync,
+ .fstat = ob_fstat,
+ .ftruncate = ob_ftruncate,
+ .fsetxattr = ob_fsetxattr,
+ .fgetxattr = ob_fgetxattr,
+ .fremovexattr = ob_fremovexattr,
+ .finodelk = ob_finodelk,
+ .fentrylk = ob_fentrylk,
+ .fxattrop = ob_fxattrop,
+ .fsetattr = ob_fsetattr,
+ .fallocate = ob_fallocate,
+ .discard = ob_discard,
+ .zerofill = ob_zerofill,
+ .unlink = ob_unlink,
+ .rename = ob_rename,
+ .lk = ob_lk,
+};
+
+struct xlator_cbks cbks = {
+ .release = ob_release,
+};
+
+struct xlator_dumpops dumpops = {
+ .priv = ob_priv_dump,
+ .fdctx = ob_fdctx_dump,
+};
+
+
+struct volume_options options[] = {
+ { .key = {"use-anonymous-fd"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "yes",
+ .description = "For read operations, use anonymous FD when "
+ "original FD is open-behind and not yet opened in the backend.",
+ },
+ { .key = {"lazy-open"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "yes",
+ .description = "Perform open in the backend only when a necessary "
+ "FOP arrives (e.g writev on the FD, unlink of the file). When option "
+ "is disabled, perform backend open right after unwinding open().",
+ },
+ { .key = {"read-after-open"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .description = "read is sent only after actual open happens and real "
+ "fd is obtained, instead of doing on anonymous fd (similar to write)",
+ },
+ { .key = {NULL} }
+
+};
diff --git a/xlators/performance/quick-read/src/Makefile.am b/xlators/performance/quick-read/src/Makefile.am
index db917f897c8..af4ae7cbfcf 100644
--- a/xlators/performance/quick-read/src/Makefile.am
+++ b/xlators/performance/quick-read/src/Makefile.am
@@ -1,14 +1,15 @@
xlator_LTLIBRARIES = quick-read.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
-quick_read_la_LDFLAGS = -module -avoidversion
+quick_read_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
quick_read_la_SOURCES = quick-read.c
quick_read_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = quick-read.h quick-read-mem-types.h
+noinst_HEADERS = quick-read.h quick-read-mem-types.h quick-read-messages.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/performance/quick-read/src/quick-read-mem-types.h b/xlators/performance/quick-read/src/quick-read-mem-types.h
index 22e189286eb..78547f64116 100644
--- a/xlators/performance/quick-read/src/quick-read-mem-types.h
+++ b/xlators/performance/quick-read/src/quick-read-mem-types.h
@@ -1,23 +1,13 @@
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __QR_MEM_TYPES_H__
#define __QR_MEM_TYPES_H__
@@ -25,13 +15,13 @@
enum gf_qr_mem_types_ {
gf_qr_mt_qr_inode_t = gf_common_mt_end + 1,
+ gf_qr_mt_content_t,
gf_qr_mt_qr_fd_ctx_t,
- gf_qr_mt_qr_local_t,
gf_qr_mt_iovec,
gf_qr_mt_qr_conf_t,
gf_qr_mt_qr_priority_t,
gf_qr_mt_qr_private_t,
+ gf_qr_mt_qr_unlink_ctx_t,
gf_qr_mt_end
};
#endif
-
diff --git a/xlators/performance/quick-read/src/quick-read-messages.h b/xlators/performance/quick-read/src/quick-read-messages.h
new file mode 100644
index 00000000000..a3bd594471f
--- /dev/null
+++ b/xlators/performance/quick-read/src/quick-read-messages.h
@@ -0,0 +1,128 @@
+/*Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _QUICK_READ_MESSAGES_H_
+#define _QUICK_READ_MESSAGES_H_
+
+#include "glfs-message-id.h"
+
+/*! \file quick-read-messages.h
+ * \brief QUICK_READ log-message IDs and their descriptions
+ *
+ */
+
+/* NOTE: Rules for message additions
+ * 1) Each instance of a message is _better_ left with a unique message ID, even
+ * if the message format is the same. Reasoning is that, if the message
+ * format needs to change in one instance, the other instances are not
+ * impacted or the new change does not change the ID of the instance being
+ * modified.
+ * 2) Addition of a message,
+ * - Should increment the GLFS_NUM_MESSAGES
+ * - Append to the list of messages defined, towards the end
+ * - Retain macro naming as glfs_msg_X (for redability across developers)
+ * NOTE: Rules for message format modifications
+ * 3) Check acorss the code if the message ID macro in question is reused
+ * anywhere. If reused then then the modifications should ensure correctness
+ * everywhere, or needs a new message ID as (1) above was not adhered to. If
+ * not used anywhere, proceed with the required modification.
+ * NOTE: Rules for message deletion
+ * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used
+ * anywhere, then can be deleted, but will leave a hole by design, as
+ * addition rules specify modification to the end of the list and not filling
+ * holes.
+ */
+
+#define GLFS_QUICK_READ_BASE GLFS_MSGID_COMP_QUICK_READ
+#define GLFS_QUICK_READ_NUM_MESSAGES 8
+#define GLFS_MSGID_END (GLFS_QUICK_READ_BASE +\
+ GLFS_QUICK_READ_NUM_MESSAGES + 1)
+
+/* Messages with message IDs */
+#define glfs_msg_start_x GLFS_QUICK_READ_BASE, "Invalid: Start of messages"
+
+
+#define QUICK_READ_MSG_ENFORCEMENT_FAILED (GLFS_QUICK_READ_BASE + 1)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define QUICK_READ_MSG_INVALID_ARGUMENT (GLFS_QUICK_READ_BASE + 2)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define QUICK_READ_MSG_XLATOR_CHILD_MISCONFIGURED\
+ (GLFS_QUICK_READ_BASE + 3)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define QUICK_READ_MSG_NO_MEMORY (GLFS_QUICK_READ_BASE + 4)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define QUICK_READ_MSG_VOL_MISCONFIGURED (GLFS_QUICK_READ_BASE + 5)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define QUICK_READ_MSG_DICT_SET_FAILED (GLFS_QUICK_READ_BASE + 6)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define QUICK_READ_MSG_INVALID_CONFIG (GLFS_QUICK_READ_BASE + 7)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define QUICK_READ_MSG_LRU_NOT_EMPTY (GLFS_QUICK_READ_BASE + 8)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+/*------------*/
+#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
+
+
+#endif /* _QUICK_READ_MESSAGES_H_ */
diff --git a/xlators/performance/quick-read/src/quick-read.c b/xlators/performance/quick-read/src/quick-read.c
index 70ef8858f1b..4e9c6dcf091 100644
--- a/xlators/performance/quick-read/src/quick-read.c
+++ b/xlators/performance/quick-read/src/quick-read.c
@@ -1,2391 +1,878 @@
/*
- Copyright (c) 2009-2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include "quick-read.h"
#include "statedump.h"
+#include "quick-read-messages.h"
-#define QR_DEFAULT_CACHE_SIZE 134217728
+qr_inode_t *qr_inode_ctx_get (xlator_t *this, inode_t *inode);
+void __qr_inode_prune (qr_inode_table_t *table, qr_inode_t *qr_inode);
-void
-qr_local_free (qr_local_t *local)
-{
- if (local == NULL) {
- goto out;
- }
- if (local->stub != NULL) {
- call_stub_destroy (local->stub);
- }
+int
+__qr_inode_ctx_set (xlator_t *this, inode_t *inode, qr_inode_t *qr_inode)
+{
+ uint64_t value = 0;
+ int ret = -1;
- if (local->path != NULL) {
- GF_FREE (local->path);
- }
+ value = (long) qr_inode;
- GF_FREE (local);
+ ret = __inode_ctx_set (inode, this, &value);
-out:
- return;
+ return ret;
}
-int32_t
-qr_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset);
-
-
-static void
-qr_loc_wipe (loc_t *loc)
+qr_inode_t *
+__qr_inode_ctx_get (xlator_t *this, inode_t *inode)
{
- if (loc == NULL) {
- goto out;
- }
-
- if (loc->path) {
- GF_FREE ((char *)loc->path);
- loc->path = NULL;
- }
+ qr_inode_t *qr_inode = NULL;
+ uint64_t value = 0;
+ int ret = -1;
- if (loc->inode) {
- inode_unref (loc->inode);
- loc->inode = NULL;
- }
+ ret = __inode_ctx_get (inode, this, &value);
+ if (ret)
+ return NULL;
- if (loc->parent) {
- inode_unref (loc->parent);
- loc->parent = NULL;
- }
+ qr_inode = (void *) ((long) value);
-out:
- return;
+ return qr_inode;
}
-static int32_t
-qr_loc_fill (loc_t *loc, inode_t *inode, char *path)
+qr_inode_t *
+qr_inode_ctx_get (xlator_t *this, inode_t *inode)
{
- int32_t ret = -1;
- char *parent = NULL;
-
- if ((loc == NULL) || (inode == NULL) || (path == NULL)) {
- ret = -1;
- errno = EINVAL;
- goto out;
- }
-
- loc->inode = inode_ref (inode);
- loc->path = gf_strdup (path);
- loc->ino = inode->ino;
-
- parent = gf_strdup (path);
- if (parent == NULL) {
- ret = -1;
- goto out;
- }
-
- parent = dirname (parent);
+ qr_inode_t *qr_inode = NULL;
- loc->parent = inode_from_path (inode->table, parent);
- if (loc->parent == NULL) {
- ret = -1;
- errno = EINVAL;
- goto out;
- }
-
- loc->name = strrchr (loc->path, '/');
- ret = 0;
-out:
- if (ret == -1) {
- qr_loc_wipe (loc);
-
- }
+ LOCK (&inode->lock);
+ {
+ qr_inode = __qr_inode_ctx_get (this, inode);
+ }
+ UNLOCK (&inode->lock);
- if (parent) {
- GF_FREE (parent);
- }
-
- return ret;
+ return qr_inode;
}
-void
-qr_resume_pending_ops (qr_fd_ctx_t *qr_fd_ctx)
+qr_inode_t *
+qr_inode_new (xlator_t *this, inode_t *inode)
{
- struct list_head waiting_ops;
- call_stub_t *stub = NULL, *tmp = NULL;
-
- if (qr_fd_ctx == NULL) {
- goto out;
- }
+ qr_inode_t *qr_inode = NULL;
- INIT_LIST_HEAD (&waiting_ops);
+ qr_inode = GF_CALLOC (1, sizeof (*qr_inode), gf_qr_mt_qr_inode_t);
+ if (!qr_inode)
+ return NULL;
- LOCK (&qr_fd_ctx->lock);
- {
- list_splice_init (&qr_fd_ctx->waiting_ops,
- &waiting_ops);
- }
- UNLOCK (&qr_fd_ctx->lock);
+ INIT_LIST_HEAD (&qr_inode->lru);
- if (!list_empty (&waiting_ops)) {
- list_for_each_entry_safe (stub, tmp, &waiting_ops, list) {
- list_del_init (&stub->list);
- call_resume (stub);
- }
- }
+ qr_inode->priority = 0; /* initial priority */
-out:
- return;
+ return qr_inode;
}
-static void
-qr_fd_ctx_free (qr_fd_ctx_t *qr_fd_ctx)
+qr_inode_t *
+qr_inode_ctx_get_or_new (xlator_t *this, inode_t *inode)
{
- if (qr_fd_ctx == NULL) {
- goto out;
- }
-
- assert (list_empty (&qr_fd_ctx->waiting_ops));
-
- GF_FREE (qr_fd_ctx->path);
- GF_FREE (qr_fd_ctx);
+ qr_inode_t *qr_inode = NULL;
+ int ret = -1;
+ qr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ LOCK (&inode->lock);
+ {
+ qr_inode = __qr_inode_ctx_get (this, inode);
+ if (qr_inode)
+ goto unlock;
+
+ qr_inode = qr_inode_new (this, inode);
+ if (!qr_inode)
+ goto unlock;
+
+ ret = __qr_inode_ctx_set (this, inode, qr_inode);
+ if (ret) {
+ __qr_inode_prune (&priv->table, qr_inode);
+ GF_FREE (qr_inode);
+ qr_inode = NULL;
+ }
+ }
+unlock:
+ UNLOCK (&inode->lock);
-out:
- return;
+ return qr_inode;
}
-static inline uint32_t
-is_match (const char *path, const char *pattern)
-{
- int32_t ret = 0;
-
- ret = fnmatch (pattern, path, FNM_NOESCAPE);
-
- return (ret == 0);
-}
uint32_t
qr_get_priority (qr_conf_t *conf, const char *path)
{
- uint32_t priority = 0;
- struct qr_priority *curr = NULL;
-
+ uint32_t priority = 0;
+ struct qr_priority *curr = NULL;
+
list_for_each_entry (curr, &conf->priority_list, list) {
- if (is_match (path, curr->pattern))
+ if (fnmatch (curr->pattern, path, FNM_NOESCAPE) == 0)
priority = curr->priority;
}
- return priority;
-}
-
-
-/* To be called with this-priv->table.lock held */
-qr_inode_t *
-__qr_inode_alloc (xlator_t *this, char *path, inode_t *inode)
-{
- qr_inode_t *qr_inode = NULL;
- qr_private_t *priv = NULL;
- int priority = 0;
-
- priv = this->private;
-
- qr_inode = GF_CALLOC (1, sizeof (*qr_inode), gf_qr_mt_qr_inode_t);
- if (qr_inode == NULL) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- INIT_LIST_HEAD (&qr_inode->lru);
-
- priority = qr_get_priority (&priv->conf, path);
-
- list_add_tail (&qr_inode->lru, &priv->table.lru[priority]);
-
- qr_inode->inode = inode;
- qr_inode->priority = priority;
-out:
- return qr_inode;
+ return priority;
}
-/* To be called with qr_inode->table->lock held */
void
-__qr_inode_free (qr_inode_t *qr_inode)
+__qr_inode_register (qr_inode_table_t *table, qr_inode_t *qr_inode)
{
- if (qr_inode == NULL) {
- goto out;
- }
-
- if (qr_inode->xattr) {
- dict_unref (qr_inode->xattr);
- }
+ if (!qr_inode->data)
+ return;
- list_del (&qr_inode->lru);
+ if (list_empty (&qr_inode->lru))
+ /* first time addition of this qr_inode into table */
+ table->cache_used += qr_inode->size;
+ else
+ list_del_init (&qr_inode->lru);
- GF_FREE (qr_inode);
-out:
- return;
+ list_add_tail (&qr_inode->lru, &table->lru[qr_inode->priority]);
}
-/* To be called with priv->table.lock held */
+
void
-__qr_cache_prune (xlator_t *this)
+qr_inode_set_priority (xlator_t *this, inode_t *inode, const char *path)
{
+ uint32_t priority = 0;
+ qr_inode_table_t *table = NULL;
+ qr_inode_t *qr_inode = NULL;
qr_private_t *priv = NULL;
- qr_conf_t *conf = NULL;
- qr_inode_table_t *table = NULL;
- qr_inode_t *curr = NULL, *next = NULL;
- int32_t index = 0;
- uint64_t size_to_prune = 0;
- uint64_t size_pruned = 0;
+ qr_conf_t *conf = NULL;
- priv = this->private;
- table = &priv->table;
- conf = &priv->conf;
+ qr_inode = qr_inode_ctx_get (this, inode);
+ if (!qr_inode)
+ return;
- size_to_prune = table->cache_used - conf->cache_size;
+ priv = this->private;
+ table = &priv->table;
+ conf = &priv->conf;
- for (index=0; index < conf->max_pri; index++) {
- list_for_each_entry_safe (curr, next, &table->lru[index], lru) {
- size_pruned += curr->stbuf.ia_size;
- inode_ctx_del (curr->inode, this, NULL);
- __qr_inode_free (curr);
- if (size_pruned >= size_to_prune)
- goto done;
- }
- }
+ if (path)
+ priority = qr_get_priority (conf, path);
+ else
+ /* retain existing priority, just bump LRU */
+ priority = qr_inode->priority;
-done:
- table->cache_used -= size_pruned;
- return;
-}
+ LOCK (&table->lock);
+ {
+ qr_inode->priority = priority;
-/* To be called with table->lock held */
-inline char
-__qr_need_cache_prune (qr_conf_t *conf, qr_inode_table_t *table)
-{
- return (table->cache_used > conf->cache_size);
+ __qr_inode_register (table, qr_inode);
+ }
+ UNLOCK (&table->lock);
}
-int32_t
-qr_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *dict, struct iatt *postparent)
+/* To be called with priv->table.lock held */
+void
+__qr_inode_prune (qr_inode_table_t *table, qr_inode_t *qr_inode)
{
- data_t *content = NULL;
- qr_inode_t *qr_inode = NULL;
- uint64_t value = 0;
- int ret = -1;
- qr_conf_t *conf = NULL;
- qr_inode_table_t *table = NULL;
- qr_private_t *priv = NULL;
- qr_local_t *local = NULL;
-
- if ((op_ret == -1) || (dict == NULL)) {
- goto out;
- }
-
- priv = this->private;
- conf = &priv->conf;
- table = &priv->table;
-
- local = frame->local;
+ GF_FREE (qr_inode->data);
+ qr_inode->data = NULL;
- if (buf->ia_size > conf->max_file_size) {
- goto out;
- }
-
- if (IA_ISDIR (buf->ia_type)) {
- goto out;
- }
-
- if (inode == NULL) {
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
-
- content = dict_get (dict, GLUSTERFS_CONTENT_KEY);
- if (content == NULL) {
- goto out;
- }
-
- LOCK (&table->lock);
- {
- ret = inode_ctx_get (inode, this, &value);
- if (ret == -1) {
- qr_inode = __qr_inode_alloc (this, local->path, inode);
- if (qr_inode == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto unlock;
- }
-
- ret = inode_ctx_put (inode, this,
- (uint64_t)(long)qr_inode);
- if (ret == -1) {
- __qr_inode_free (qr_inode);
- qr_inode = NULL;
- op_ret = -1;
- op_errno = EINVAL;
- goto unlock;
- }
- } else {
- qr_inode = (qr_inode_t *)(long)value;
- if (qr_inode == NULL) {
- op_ret = -1;
- op_errno = EINVAL;
- goto unlock;
- }
- }
-
- if (qr_inode->xattr) {
- dict_unref (qr_inode->xattr);
- qr_inode->xattr = NULL;
- table->cache_used -= qr_inode->stbuf.ia_size;
- }
-
- qr_inode->xattr = dict_ref (dict);
- qr_inode->stbuf = *buf;
- table->cache_used += buf->ia_size;
-
- gettimeofday (&qr_inode->tv, NULL);
- if (__qr_need_cache_prune (conf, table)) {
- __qr_cache_prune (this);
- }
- }
-unlock:
- UNLOCK (&table->lock);
+ if (!list_empty (&qr_inode->lru)) {
+ table->cache_used -= qr_inode->size;
+ qr_inode->size = 0;
+ list_del_init (&qr_inode->lru);
+ }
-out:
- /*
- * FIXME: content size in dict can be greater than the size application
- * requested for. Applications need to be careful till this is fixed.
- */
- QR_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf, dict,
- postparent);
-
- return 0;
+ memset (&qr_inode->buf, 0, sizeof (qr_inode->buf));
}
-int32_t
-qr_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
+void
+qr_inode_prune (xlator_t *this, inode_t *inode)
{
- qr_conf_t *conf = NULL;
- dict_t *new_req_dict = NULL;
- int32_t op_ret = -1, op_errno = -1;
- data_t *content = NULL;
- uint64_t requested_size = 0, size = 0, value = 0;
- char cached = 0;
- qr_inode_t *qr_inode = NULL;
- qr_private_t *priv = NULL;
- qr_inode_table_t *table = NULL;
- qr_local_t *local = NULL;
-
- priv = this->private;
- conf = &priv->conf;
- if (conf == NULL) {
- op_ret = -1;
- op_errno = EINVAL;
- goto unwind;
- }
+ qr_private_t *priv = NULL;
+ qr_inode_table_t *table = NULL;
+ qr_inode_t *qr_inode = NULL;
- table = &priv->table;
- local = GF_CALLOC (1, sizeof (*local), gf_qr_mt_qr_local_t);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, unwind, op_errno,
- ENOMEM);
-
- frame->local = local;
-
- local->path = gf_strdup (loc->path);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, unwind, op_errno,
- ENOMEM);
- LOCK (&table->lock);
- {
- op_ret = inode_ctx_get (loc->inode, this, &value);
- if (op_ret == 0) {
- qr_inode = (qr_inode_t *)(long)value;
- if (qr_inode != NULL) {
- if (qr_inode->xattr) {
- cached = 1;
- }
- }
- }
- }
- UNLOCK (&table->lock);
-
- if ((xattr_req == NULL) && (conf->max_file_size > 0)) {
- new_req_dict = xattr_req = dict_new ();
- if (xattr_req == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto unwind;
- }
- }
+ qr_inode = qr_inode_ctx_get (this, inode);
+ if (!qr_inode)
+ return;
- if (!cached) {
- if (xattr_req) {
- content = dict_get (xattr_req, GLUSTERFS_CONTENT_KEY);
- if (content) {
- requested_size = data_to_uint64 (content);
- }
- }
-
- if ((conf->max_file_size > 0)
- && (conf->max_file_size != requested_size)) {
- size = (conf->max_file_size > requested_size) ?
- conf->max_file_size : requested_size;
-
- op_ret = dict_set (xattr_req, GLUSTERFS_CONTENT_KEY,
- data_from_uint64 (size));
- if (op_ret < 0) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto unwind;
- }
- }
- }
+ priv = this->private;
+ table = &priv->table;
- STACK_WIND (frame, qr_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
-
- if (new_req_dict) {
- dict_unref (new_req_dict);
- }
-
- return 0;
-
-unwind:
- QR_STACK_UNWIND (lookup, frame, op_ret, op_errno, NULL, NULL, NULL,
- NULL);
-
- if (new_req_dict) {
- dict_unref (new_req_dict);
- }
-
- return 0;
+ LOCK (&table->lock);
+ {
+ __qr_inode_prune (table, qr_inode);
+ }
+ UNLOCK (&table->lock);
}
-int32_t
-qr_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, fd_t *fd)
+/* To be called with priv->table.lock held */
+void
+__qr_cache_prune (qr_inode_table_t *table, qr_conf_t *conf)
{
- uint64_t value = 0;
- int32_t ret = -1;
- struct list_head waiting_ops;
- qr_local_t *local = NULL;
- qr_inode_t *qr_inode = NULL;
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- call_stub_t *stub = NULL, *tmp = NULL;
- char is_open = 0;
- qr_private_t *priv = NULL;
- qr_inode_table_t *table = NULL;
-
- priv = this->private;
- table = &priv->table;
-
- local = frame->local;
- if (local != NULL) {
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- is_open = local->is_open;
- }
-
- INIT_LIST_HEAD (&waiting_ops);
-
- ret = fd_ctx_get (fd, this, &value);
- if ((ret == -1) && (op_ret != -1)) {
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
+ qr_inode_t *curr = NULL;
+ qr_inode_t *next = NULL;
+ int index = 0;
+ size_t size_pruned = 0;
- if (value) {
- qr_fd_ctx = (qr_fd_ctx_t *) (long)value;
- }
+ for (index = 0; index < conf->max_pri; index++) {
+ list_for_each_entry_safe (curr, next, &table->lru[index], lru) {
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- qr_fd_ctx->open_in_transit = 0;
+ size_pruned += curr->size;
- if (op_ret == 0) {
- qr_fd_ctx->opened = 1;
- }
- list_splice_init (&qr_fd_ctx->waiting_ops,
- &waiting_ops);
- }
- UNLOCK (&qr_fd_ctx->lock);
-
- if (local && local->is_open
- && ((local->open_flags & O_TRUNC) == O_TRUNC)) {
- LOCK (&table->lock);
- {
- ret = inode_ctx_del (fd->inode, this, &value);
- if (ret == 0) {
- qr_inode = (qr_inode_t *)(long) value;
-
- if (qr_inode != NULL) {
- __qr_inode_free (qr_inode);
- }
- }
- }
- UNLOCK (&table->lock);
- }
+ __qr_inode_prune (table, curr);
- if (!list_empty (&waiting_ops)) {
- list_for_each_entry_safe (stub, tmp, &waiting_ops,
- list) {
- list_del_init (&stub->list);
- call_resume (stub);
- }
+ if (table->cache_used < conf->cache_size)
+ return;
}
}
-out:
- if (is_open) {
- QR_STACK_UNWIND (open, frame, op_ret, op_errno, fd);
- }
- return 0;
+ return;
}
-int32_t
-qr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int32_t wbflags)
+void
+qr_cache_prune (xlator_t *this)
{
- qr_inode_t *qr_inode = NULL;
- int32_t ret = -1;
- uint64_t filep = 0;
- char content_cached = 0;
- qr_fd_ctx_t *qr_fd_ctx = NULL, *tmp_fd_ctx = NULL;
- int32_t op_ret = -1, op_errno = -1;
- qr_local_t *local = NULL;
- qr_conf_t *conf = NULL;
- qr_private_t *priv = NULL;
- qr_inode_table_t *table = NULL;
+ qr_private_t *priv = NULL;
+ qr_conf_t *conf = NULL;
+ qr_inode_table_t *table = NULL;
priv = this->private;
- conf = &priv->conf;
table = &priv->table;
+ conf = &priv->conf;
- tmp_fd_ctx = qr_fd_ctx = GF_CALLOC (1, sizeof (*qr_fd_ctx),
- gf_qr_mt_qr_fd_ctx_t);
- if (qr_fd_ctx == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto unwind;
- }
-
- LOCK_INIT (&qr_fd_ctx->lock);
- INIT_LIST_HEAD (&qr_fd_ctx->waiting_ops);
-
- qr_fd_ctx->path = gf_strdup (loc->path);
- qr_fd_ctx->flags = flags;
- qr_fd_ctx->wbflags = wbflags;
-
- ret = fd_ctx_set (fd, this, (uint64_t)(long)qr_fd_ctx);
- if (ret == -1) {
- op_ret = -1;
- op_errno = EINVAL;
- goto unwind;
- }
- tmp_fd_ctx = NULL;
-
- local = GF_CALLOC (1, sizeof (*local),
- gf_qr_mt_qr_local_t);
- if (local == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto unwind;
- }
-
- local->is_open = 1;
- local->open_flags = flags;
- frame->local = local;
- LOCK (&table->lock);
- {
- ret = inode_ctx_get (fd->inode, this, &filep);
- if (ret == 0) {
- qr_inode = (qr_inode_t *)(long) filep;
- if (qr_inode) {
- if (qr_inode->xattr) {
- content_cached = 1;
- }
- }
- }
- }
- UNLOCK (&table->lock);
-
- if (content_cached && ((flags & O_DIRECTORY) == O_DIRECTORY)) {
- op_ret = -1;
- op_errno = ENOTDIR;
- goto unwind;
- }
-
- if (!content_cached || ((flags & O_ACCMODE) == O_WRONLY)
- || ((flags & O_TRUNC) == O_TRUNC)
- || ((flags & O_DIRECT) == O_DIRECT)) {
- LOCK (&qr_fd_ctx->lock);
- {
- /*
- * we really need not set this flag, since open is
- * not yet unwounded.
- */
-
- qr_fd_ctx->open_in_transit = 1;
- if ((flags & O_DIRECT) == O_DIRECT) {
- qr_fd_ctx->disabled = 1;
- }
- }
- UNLOCK (&qr_fd_ctx->lock);
- goto wind;
- } else {
- op_ret = 0;
- op_errno = 0;
- goto unwind;
- }
-
-unwind:
- if (tmp_fd_ctx != NULL) {
- qr_fd_ctx_free (tmp_fd_ctx);
- }
-
- QR_STACK_UNWIND (open, frame, op_ret, op_errno, fd);
- return 0;
-
-wind:
- STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, loc, flags, fd, wbflags);
- return 0;
+ LOCK (&table->lock);
+ {
+ if (table->cache_used > conf->cache_size)
+ __qr_cache_prune (table, conf);
+ }
+ UNLOCK (&table->lock);
}
-static inline char
-qr_time_elapsed (struct timeval *now, struct timeval *then)
+void *
+qr_content_extract (dict_t *xdata)
{
- return now->tv_sec - then->tv_sec;
-}
+ data_t *data = NULL;
+ void *content = NULL;
+ data = dict_get (xdata, GF_CONTENT_KEY);
+ if (!data)
+ return NULL;
-static inline char
-qr_need_validation (qr_conf_t *conf, qr_inode_t *qr_inode)
-{
- struct timeval now = {0, };
- char need_validation = 0;
-
- gettimeofday (&now, NULL);
+ content = GF_CALLOC (1, data->len, gf_qr_mt_content_t);
+ if (!content)
+ return NULL;
- if (qr_time_elapsed (&now, &qr_inode->tv) >= conf->cache_timeout)
- need_validation = 1;
+ memcpy (content, data->data, data->len);
- return need_validation;
+ return content;
}
-static int32_t
-qr_validate_cache_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+void
+qr_content_update (xlator_t *this, qr_inode_t *qr_inode, void *data,
+ struct iatt *buf)
{
- qr_inode_t *qr_inode = NULL;
- qr_local_t *local = NULL;
- uint64_t value = 0;
- int32_t ret = 0;
- qr_private_t *priv = NULL;
- qr_inode_table_t *table = NULL;
- call_stub_t *stub = NULL;
-
- local = frame->local;
- if ((local == NULL) || ((local->fd) == NULL)) {
- op_ret = -1;
- op_errno = EINVAL;
- goto unwind;
- }
-
- local->just_validated = 1;
-
- if (op_ret == -1) {
- goto unwind;
- }
+ qr_private_t *priv = NULL;
+ qr_inode_table_t *table = NULL;
priv = this->private;
table = &priv->table;
- LOCK (&table->lock);
- {
- ret = inode_ctx_get (local->fd->inode, this, &value);
- if (ret == 0) {
- qr_inode = (qr_inode_t *)(long) value;
- }
-
- if (qr_inode != NULL) {
- gettimeofday (&qr_inode->tv, NULL);
-
- if ((qr_inode->stbuf.ia_mtime != buf->ia_mtime)
- && (qr_inode->stbuf.ia_mtime_nsec
- != buf->ia_mtime_nsec)) {
- inode_ctx_del (local->fd->inode, this, NULL);
- __qr_inode_free (qr_inode);
- }
- }
- }
- UNLOCK (&table->lock);
-
- stub = local->stub;
- local->stub = NULL;
-
- call_resume (stub);
-
- return 0;
-
-unwind:
- /* this is actually unwind of readv */
- QR_STACK_UNWIND (readv, frame, op_ret, op_errno, NULL, -1, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-qr_validate_cache_helper (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- qr_local_t *local = NULL;
- int32_t op_ret = -1, op_errno = -1;
-
- local = frame->local;
- if (local == NULL) {
- op_ret = -1;
- op_errno = EINVAL;
- } else {
- op_ret = local->op_ret;
- op_errno = local->op_errno;
- }
-
- if (op_ret == -1) {
- qr_validate_cache_cbk (frame, NULL, this, op_ret, op_errno,
- NULL);
- } else {
- STACK_WIND (frame, qr_validate_cache_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fstat, fd);
- }
-
- return 0;
-}
-
-
-int
-qr_validate_cache (call_frame_t *frame, xlator_t *this, fd_t *fd,
- call_stub_t *stub)
-{
- int ret = -1;
- int flags = 0;
- uint64_t value = 0;
- loc_t loc = {0, };
- char *path = NULL;
- qr_local_t *local = NULL;
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- call_stub_t *validate_stub = NULL;
- char need_open = 0, can_wind = 0;
-
- local = GF_CALLOC (1, sizeof (*local),
- gf_qr_mt_qr_local_t);
- if (local == NULL) {
- goto out;
- }
+ LOCK (&table->lock);
+ {
+ __qr_inode_prune (table, qr_inode);
- local->fd = fd;
- local->stub = stub;
- frame->local = local;
+ qr_inode->data = data;
+ qr_inode->size = buf->ia_size;
- ret = fd_ctx_get (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long) value;
- }
-
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- path = qr_fd_ctx->path;
- flags = qr_fd_ctx->flags;
-
- if (!(qr_fd_ctx->opened
- || qr_fd_ctx->open_in_transit)) {
- need_open = 1;
- qr_fd_ctx->open_in_transit = 1;
- }
-
- if (qr_fd_ctx->opened) {
- can_wind = 1;
- } else {
- validate_stub = fop_fstat_stub (frame,
- qr_validate_cache_helper,
- fd);
- if (validate_stub == NULL) {
- ret = -1;
- qr_fd_ctx->open_in_transit = 0;
- goto unlock;
- }
-
- list_add_tail (&validate_stub->list,
- &qr_fd_ctx->waiting_ops);
- }
- }
- unlock:
- UNLOCK (&qr_fd_ctx->lock);
+ qr_inode->ia_mtime = buf->ia_mtime;
+ qr_inode->ia_mtime_nsec = buf->ia_mtime_nsec;
- if (ret == -1) {
- goto out;
- }
- } else {
- can_wind = 1;
- }
+ qr_inode->buf = *buf;
- if (need_open) {
- ret = qr_loc_fill (&loc, fd->inode, path);
- if (ret == -1) {
- qr_resume_pending_ops (qr_fd_ctx);
- goto out;
- }
+ gettimeofday (&qr_inode->last_refresh, NULL);
- STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open,
- &loc, flags, fd, qr_fd_ctx->wbflags);
-
- qr_loc_wipe (&loc);
- } else if (can_wind) {
- STACK_WIND (frame, qr_validate_cache_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fstat, fd);
- }
+ __qr_inode_register (table, qr_inode);
+ }
+ UNLOCK (&table->lock);
- ret = 0;
-out:
- return ret;
+ qr_cache_prune (this);
}
-int32_t
-qr_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iovec *vector, int32_t count,
- struct iatt *stbuf, struct iobref *iobref)
+gf_boolean_t
+qr_size_fits (qr_conf_t *conf, struct iatt *buf)
{
- QR_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count,
- stbuf, iobref);
- return 0;
+ return (buf->ia_size <= conf->max_file_size);
}
-int32_t
-qr_readv_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+gf_boolean_t
+qr_mtime_equal (qr_inode_t *qr_inode, struct iatt *buf)
{
- STACK_WIND (frame, qr_readv_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->readv, fd, size, offset);
- return 0;
+ return (qr_inode->ia_mtime == buf->ia_mtime &&
+ qr_inode->ia_mtime_nsec == buf->ia_mtime_nsec);
}
-int32_t
-qr_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+void
+__qr_content_refresh (xlator_t *this, qr_inode_t *qr_inode, struct iatt *buf)
{
- qr_inode_t *qr_inode = NULL;
- int32_t ret = -1, op_ret = -1, op_errno = -1;
- uint64_t value = 0;
- int count = -1, flags = 0, i = 0;
- char content_cached = 0, need_validation = 0;
- char need_open = 0, can_wind = 0, need_unwind = 0;
- struct iobuf *iobuf = NULL;
- struct iobref *iobref = NULL;
- struct iatt stbuf = {0, };
- data_t *content = NULL;
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- call_stub_t *stub = NULL;
- loc_t loc = {0, };
- qr_conf_t *conf = NULL;
- struct iovec *vector = NULL;
- char *path = NULL;
- glusterfs_ctx_t *ctx = NULL;
- off_t start = 0, end = 0;
- size_t len = 0;
- struct iobuf_pool *iobuf_pool = NULL;
- qr_local_t *local = NULL;
- char just_validated = 0;
qr_private_t *priv = NULL;
qr_inode_table_t *table = NULL;
-
- op_ret = 0;
+ qr_conf_t *conf = NULL;
priv = this->private;
- conf = &priv->conf;
table = &priv->table;
+ conf = &priv->conf;
- local = frame->local;
-
- if (local != NULL) {
- just_validated = local->just_validated;
- }
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long) value;
- if (qr_fd_ctx != NULL) {
- if (qr_fd_ctx->disabled) {
- goto out;
- }
- }
- }
-
- iobuf_pool = this->ctx->iobuf_pool;
-
- LOCK (&table->lock);
- {
- ret = inode_ctx_get (fd->inode, this, &value);
- if (ret == 0) {
- qr_inode = (qr_inode_t *)(long)value;
- if (qr_inode) {
- if (qr_inode->xattr){
- if (!just_validated
- && qr_need_validation (conf,
- qr_inode)) {
- need_validation = 1;
- goto unlock;
- }
-
- content = dict_get (qr_inode->xattr,
- GLUSTERFS_CONTENT_KEY);
-
-
- stbuf = qr_inode->stbuf;
- content_cached = 1;
- list_move_tail (&qr_inode->lru,
- &table->lru[qr_inode->priority]);
-
- if (offset > content->len) {
- op_ret = 0;
- end = content->len;
- } else {
- if ((offset + size)
- > content->len) {
- op_ret = content->len - offset;
- end = content->len;
- } else {
- op_ret = size;
- end = offset + size;
- }
- }
-
- ctx = this->ctx;
- count = (op_ret / iobuf_pool->page_size);
- if ((op_ret % iobuf_pool->page_size)
- != 0) {
- count++;
- }
-
- if (count == 0) {
- op_ret = 0;
- goto unlock;
- }
-
- vector = GF_CALLOC (count,
- sizeof (*vector),
- gf_qr_mt_iovec);
- if (vector == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- goto unlock;
- }
-
- iobref = iobref_new ();
- if (iobref == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- goto unlock;
- }
-
- for (i = 0; i < count; i++) {
- iobuf = iobuf_get (iobuf_pool);
- if (iobuf == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- goto unlock;
- }
-
- start = offset + (iobuf_pool->page_size * i);
- if (start > end) {
- len = 0;
- } else {
- len = (iobuf_pool->page_size
- > (end - start))
- ? (end - start)
- : iobuf_pool->page_size;
-
- memcpy (iobuf->ptr,
- content->data + start,
- len);
- }
-
- iobref_add (iobref, iobuf);
- iobuf_unref (iobuf);
-
- vector[i].iov_base = iobuf->ptr;
- vector[i].iov_len = len;
- }
- }
- }
- }
- }
-unlock:
- UNLOCK (&table->lock);
-
-out:
- if (content_cached || need_unwind) {
- QR_STACK_UNWIND (readv, frame, op_ret, op_errno, vector,
- count, &stbuf, iobref);
-
- } else if (need_validation) {
- stub = fop_readv_stub (frame, qr_readv, fd, size, offset);
- if (stub == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
-
- op_ret = qr_validate_cache (frame, this, fd, stub);
- if (op_ret == -1) {
- need_unwind = 1;
- op_errno = errno;
- call_stub_destroy (stub);
- goto out;
- }
- } else {
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- path = qr_fd_ctx->path;
- flags = qr_fd_ctx->flags;
-
- if (!(qr_fd_ctx->opened
- || qr_fd_ctx->open_in_transit)) {
- need_open = 1;
- qr_fd_ctx->open_in_transit = 1;
- }
-
- if (qr_fd_ctx->opened) {
- can_wind = 1;
- } else {
- stub = fop_readv_stub (frame,
- qr_readv_helper,
- fd, size,
- offset);
- if (stub == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- qr_fd_ctx->open_in_transit = 0;
- goto fdctx_unlock;
- }
-
- list_add_tail (&stub->list,
- &qr_fd_ctx->waiting_ops);
- }
- }
- fdctx_unlock:
- UNLOCK (&qr_fd_ctx->lock);
-
- if (op_ret == -1) {
- need_unwind = 1;
- goto out;
- }
- } else {
- can_wind = 1;
- }
-
- if (need_open) {
- op_ret = qr_loc_fill (&loc, fd->inode, path);
- if (op_ret == -1) {
- qr_resume_pending_ops (qr_fd_ctx);
- goto out;
- }
-
- STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open,
- &loc, flags, fd, qr_fd_ctx->wbflags);
-
- qr_loc_wipe (&loc);
- } else if (can_wind) {
- STACK_WIND (frame, qr_readv_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->readv, fd, size,
- offset);
- }
-
- }
+ if (qr_size_fits (conf, buf) && qr_mtime_equal (qr_inode, buf)) {
+ qr_inode->buf = *buf;
- if (vector) {
- GF_FREE (vector);
- }
+ gettimeofday (&qr_inode->last_refresh, NULL);
- if (iobref) {
- iobref_unref (iobref);
- }
+ __qr_inode_register (table, qr_inode);
+ } else {
+ __qr_inode_prune (table, qr_inode);
+ }
- return 0;
+ return;
}
-int32_t
-qr_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+void
+qr_content_refresh (xlator_t *this, qr_inode_t *qr_inode, struct iatt *buf)
{
- QR_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
-}
+ qr_private_t *priv = NULL;
+ qr_inode_table_t *table = NULL;
+ priv = this->private;
+ table = &priv->table;
-int32_t
-qr_writev_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t off,
- struct iobref *iobref)
-{
- STACK_WIND (frame, qr_writev_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->writev, fd, vector, count, off,
- iobref);
- return 0;
+ LOCK (&table->lock);
+ {
+ __qr_content_refresh (this, qr_inode, buf);
+ }
+ UNLOCK (&table->lock);
}
-int32_t
-qr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
- int32_t count, off_t off, struct iobref *iobref)
+gf_boolean_t
+__qr_cache_is_fresh (xlator_t *this, qr_inode_t *qr_inode)
{
- uint64_t value = 0;
- int flags = 0;
- call_stub_t *stub = NULL;
- char *path = NULL;
- loc_t loc = {0, };
- qr_inode_t *qr_inode = NULL;
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- int32_t op_ret = -1, op_errno = -1, ret = -1;
- char can_wind = 0, need_unwind = 0, need_open = 0;
- qr_private_t *priv = NULL;
- qr_inode_table_t *table = NULL;
+ qr_conf_t *conf = NULL;
+ qr_private_t *priv = NULL;
+ struct timeval now;
+ struct timeval diff;
- priv = this->private;
- table = &priv->table;
-
- ret = fd_ctx_get (fd, this, &value);
+ priv = this->private;
+ conf = &priv->conf;
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long) value;
- }
+ gettimeofday (&now, NULL);
- LOCK (&table->lock);
- {
- ret = inode_ctx_get (fd->inode, this, &value);
- if (ret == 0) {
- qr_inode = (qr_inode_t *)(long)value;
- if (qr_inode != NULL) {
- inode_ctx_del (fd->inode, this, NULL);
- __qr_inode_free (qr_inode);
- }
- }
- }
- UNLOCK (&table->lock);
-
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- path = qr_fd_ctx->path;
- flags = qr_fd_ctx->flags;
-
- if (!(qr_fd_ctx->opened
- || qr_fd_ctx->open_in_transit)) {
- need_open = 1;
- qr_fd_ctx->open_in_transit = 1;
- }
+ timersub (&now, &qr_inode->last_refresh, &diff);
- if (qr_fd_ctx->opened) {
- can_wind = 1;
- } else {
- stub = fop_writev_stub (frame, qr_writev_helper,
- fd, vector, count, off,
- iobref);
- if (stub == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- qr_fd_ctx->open_in_transit = 0;
- goto unlock;
- }
-
- list_add_tail (&stub->list,
- &qr_fd_ctx->waiting_ops);
- }
- }
- unlock:
- UNLOCK (&qr_fd_ctx->lock);
- } else {
- can_wind = 1;
- }
-
-out:
- if (need_unwind) {
- QR_STACK_UNWIND (writev, frame, op_ret, op_errno, NULL,
- NULL);
- } else if (can_wind) {
- STACK_WIND (frame, qr_writev_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->writev, fd, vector, count,
- off, iobref);
- } else if (need_open) {
- op_ret = qr_loc_fill (&loc, fd->inode, path);
- if (op_ret == -1) {
- qr_resume_pending_ops (qr_fd_ctx);
- goto out;
- }
-
- STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, &loc, flags, fd,
- qr_fd_ctx->wbflags);
+ if (diff.tv_sec >= conf->cache_timeout)
+ return _gf_false;
- qr_loc_wipe (&loc);
- }
-
- return 0;
+ return _gf_true;
}
-int32_t
-qr_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iatt *buf)
+int
+qr_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode_ret,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
{
- QR_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf);
- return 0;
-}
+ void *content = NULL;
+ qr_inode_t *qr_inode = NULL;
+ inode_t *inode = NULL;
+ inode = frame->local;
+ frame->local = NULL;
-int32_t
-qr_fstat_helper (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- STACK_WIND (frame, qr_fstat_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fstat, fd);
- return 0;
-}
-
+ if (op_ret == -1) {
+ qr_inode_prune (this, inode);
+ goto out;
+ }
-int32_t
-qr_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- char need_open = 0, can_wind = 0, need_unwind = 0;
- uint64_t value = 0;
- int32_t ret = -1, op_ret = -1, op_errno = -1;
- call_stub_t *stub = NULL;
- loc_t loc = {0, };
- char *path = NULL;
- int flags = 0;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long) value;
+ if (dict_get (xdata, GLUSTERFS_BAD_INODE)) {
+ qr_inode_prune (this, inode);
+ goto out;
}
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- path = qr_fd_ctx->path;
- flags = qr_fd_ctx->flags;
-
- if (!(qr_fd_ctx->opened
- || qr_fd_ctx->open_in_transit)) {
- need_open = 1;
- qr_fd_ctx->open_in_transit = 1;
- }
-
- if (qr_fd_ctx->opened) {
- can_wind = 1;
- } else {
- stub = fop_fstat_stub (frame, qr_fstat_helper,
- fd);
- if (stub == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- qr_fd_ctx->open_in_transit = 0;
- goto unlock;
- }
-
- list_add_tail (&stub->list,
- &qr_fd_ctx->waiting_ops);
- }
- }
- unlock:
- UNLOCK (&qr_fd_ctx->lock);
- } else {
- can_wind = 1;
- }
+ if (dict_get (xdata, "sh-failed")) {
+ qr_inode_prune (this, inode);
+ goto out;
+ }
+ content = qr_content_extract (xdata);
+
+ if (content) {
+ /* new content came along, always replace old content */
+ qr_inode = qr_inode_ctx_get_or_new (this, inode);
+ if (!qr_inode) {
+ /* no harm done */
+ GF_FREE (content);
+ goto out;
+ }
+ qr_content_update (this, qr_inode, content, buf);
+ } else {
+ /* purge old content if necessary */
+ qr_inode = qr_inode_ctx_get (this, inode);
+ if (!qr_inode)
+ /* usual path for large files */
+ goto out;
+
+ qr_content_refresh (this, qr_inode, buf);
+ }
out:
- if (need_unwind) {
- QR_STACK_UNWIND (fstat, frame, op_ret, op_errno, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, qr_fstat_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fstat, fd);
- } else if (need_open) {
- op_ret = qr_loc_fill (&loc, fd->inode, path);
- if (op_ret == -1) {
- qr_resume_pending_ops (qr_fd_ctx);
- goto out;
- }
-
- STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, &loc, flags, fd,
- qr_fd_ctx->wbflags);
+ if (inode)
+ inode_unref (inode);
- qr_loc_wipe (&loc);
- }
-
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode_ret,
+ buf, xdata, postparent);
return 0;
}
+int
+qr_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ qr_private_t *priv = NULL;
+ qr_conf_t *conf = NULL;
+ qr_inode_t *qr_inode = NULL;
+ int ret = -1;
+ dict_t *new_xdata = NULL;
+ priv = this->private;
+ conf = &priv->conf;
-int32_t
-qr_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
-{
- QR_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, preop, postop);
- return 0;
-}
+ qr_inode = qr_inode_ctx_get (this, loc->inode);
+ if (qr_inode && qr_inode->data)
+ /* cached. only validate in qr_lookup_cbk */
+ goto wind;
+
+ if (!xdata)
+ xdata = new_xdata = dict_new ();
+
+ if (!xdata)
+ goto wind;
+
+ ret = 0;
+ if (conf->max_file_size)
+ ret = dict_set (xdata, GF_CONTENT_KEY,
+ data_from_uint64 (conf->max_file_size));
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ QUICK_READ_MSG_DICT_SET_FAILED,
+ "cannot set key in request dict (%s)",
+ loc->path);
+wind:
+ frame->local = inode_ref (loc->inode);
+ STACK_WIND (frame, qr_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+
+ if (new_xdata)
+ dict_unref (new_xdata);
-int32_t
-qr_fsetattr_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid)
-{
- STACK_WIND(frame, qr_fsetattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetattr, fd, stbuf,
- valid);
return 0;
}
-int32_t
-qr_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid)
+int
+qr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries, dict_t *xdata)
{
- uint64_t value = 0;
- int flags = 0;
- call_stub_t *stub = NULL;
- char *path = NULL;
- loc_t loc = {0, };
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- int32_t ret = -1, op_ret = -1, op_errno = -1;
- char need_open = 0, can_wind = 0, need_unwind = 0;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long) value;
- }
-
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- path = qr_fd_ctx->path;
- flags = qr_fd_ctx->flags;
- if (!(qr_fd_ctx->opened
- || qr_fd_ctx->open_in_transit)) {
- need_open = 1;
- qr_fd_ctx->open_in_transit = 1;
- }
+ gf_dirent_t *entry = NULL;
+ qr_inode_t *qr_inode = NULL;
- if (qr_fd_ctx->opened) {
- can_wind = 1;
- } else {
- stub = fop_fsetattr_stub (frame,
- qr_fsetattr_helper,
- fd, stbuf, valid);
- if (stub == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- qr_fd_ctx->open_in_transit = 0;
- goto unlock;
- }
-
- list_add_tail (&stub->list,
- &qr_fd_ctx->waiting_ops);
- }
- }
- unlock:
- UNLOCK (&qr_fd_ctx->lock);
- } else {
- can_wind = 1;
- }
+ if (op_ret <= 0)
+ goto unwind;
-out:
- if (need_unwind) {
- QR_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, NULL,
- NULL);
- } else if (can_wind) {
- STACK_WIND (frame, qr_fsetattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsetattr, fd, stbuf,
- valid);
- } else if (need_open) {
- op_ret = qr_loc_fill (&loc, fd->inode, path);
- if (op_ret == -1) {
- qr_resume_pending_ops (qr_fd_ctx);
- goto out;
- }
+ list_for_each_entry (entry, &entries->list, list) {
+ if (!entry->inode)
+ continue;
- STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, &loc, flags, fd,
- qr_fd_ctx->wbflags);
+ qr_inode = qr_inode_ctx_get (this, entry->inode);
+ if (!qr_inode)
+ /* no harm */
+ continue;
- qr_loc_wipe (&loc);
+ qr_content_refresh (this, qr_inode, &entry->d_stat);
}
- return 0;
-}
-
-
-int32_t
-qr_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- QR_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno);
+unwind:
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
return 0;
}
-int32_t
-qr_fsetxattr_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- dict_t *dict, int32_t flags)
+int
+qr_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t offset, dict_t *xdata)
{
- STACK_WIND (frame, qr_fsetxattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsetxattr, fd, dict, flags);
- return 0;
+ STACK_WIND (frame, qr_readdirp_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->readdirp,
+ fd, size, offset, xdata);
+ return 0;
}
-int32_t
-qr_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
- int32_t flags)
+int
+qr_readv_cached (call_frame_t *frame, qr_inode_t *qr_inode, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
{
- uint64_t value = 0;
- call_stub_t *stub = NULL;
- char *path = NULL;
- loc_t loc = {0, };
- int open_flags = 0;
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- int32_t ret = -1, op_ret = -1, op_errno = -1;
- char need_open = 0, can_wind = 0, need_unwind = 0;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long) value;
- }
-
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- path = qr_fd_ctx->path;
- open_flags = qr_fd_ctx->flags;
-
- if (!(qr_fd_ctx->opened
- || qr_fd_ctx->open_in_transit)) {
- need_open = 1;
- qr_fd_ctx->open_in_transit = 1;
- }
-
- if (qr_fd_ctx->opened) {
- can_wind = 1;
- } else {
- stub = fop_fsetxattr_stub (frame,
- qr_fsetxattr_helper,
- fd, dict, flags);
- if (stub == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- qr_fd_ctx->open_in_transit = 0;
- goto unlock;
- }
-
- list_add_tail (&stub->list,
- &qr_fd_ctx->waiting_ops);
- }
- }
- unlock:
- UNLOCK (&qr_fd_ctx->lock);
- } else {
- can_wind = 1;
- }
-
-out:
- if (need_unwind) {
- QR_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno);
- } else if (can_wind) {
- STACK_WIND (frame, qr_fsetxattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsetxattr, fd, dict,
- flags);
- } else if (need_open) {
- op_ret = qr_loc_fill (&loc, fd->inode, path);
- if (op_ret == -1) {
- qr_resume_pending_ops (qr_fd_ctx);
- goto out;
- }
+ xlator_t *this = NULL;
+ qr_private_t *priv = NULL;
+ qr_inode_table_t *table = NULL;
+ int op_ret = -1;
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ struct iovec iov = {0, };
+ struct iatt buf = {0, };
- STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, &loc, open_flags,
- fd, qr_fd_ctx->wbflags);
-
- qr_loc_wipe (&loc);
- }
-
- return 0;
-}
-
-
-int32_t
-qr_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
-{
- QR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, dict);
- return 0;
-}
+ this = frame->this;
+ priv = this->private;
+ table = &priv->table;
+ LOCK (&table->lock);
+ {
+ if (!qr_inode->data)
+ goto unlock;
-int32_t
-qr_fgetxattr_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name)
-{
- STACK_WIND (frame, qr_fgetxattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fgetxattr, fd, name);
- return 0;
-}
+ if (offset >= qr_inode->size)
+ goto unlock;
+ if (!__qr_cache_is_fresh (this, qr_inode))
+ goto unlock;
-int32_t
-qr_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name)
-{
- int flags = 0;
- uint64_t value = 0;
- call_stub_t *stub = NULL;
- char *path = NULL;
- loc_t loc = {0, };
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- int32_t ret = -1, op_ret = -1, op_errno = -1;
- char need_open = 0, can_wind = 0, need_unwind = 0;
-
- /*
- * FIXME: Can quick-read use the extended attributes stored in the
- * cache? this needs to be discussed.
- */
+ op_ret = min (size, (qr_inode->size - offset));
- ret = fd_ctx_get (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long) value;
- }
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool, op_ret);
+ if (!iobuf) {
+ op_ret = -1;
+ goto unlock;
+ }
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- path = qr_fd_ctx->path;
- flags = qr_fd_ctx->flags;
+ iobref = iobref_new ();
+ if (!iobref) {
+ op_ret = -1;
+ goto unlock;
+ }
- if (!(qr_fd_ctx->opened
- || qr_fd_ctx->open_in_transit)) {
- need_open = 1;
- qr_fd_ctx->open_in_transit = 1;
- }
+ iobref_add (iobref, iobuf);
- if (qr_fd_ctx->opened) {
- can_wind = 1;
- } else {
- stub = fop_fgetxattr_stub (frame,
- qr_fgetxattr_helper,
- fd, name);
- if (stub == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- qr_fd_ctx->open_in_transit = 0;
- goto unlock;
- }
-
- list_add_tail (&stub->list,
- &qr_fd_ctx->waiting_ops);
- }
- }
- unlock:
- UNLOCK (&qr_fd_ctx->lock);
- } else {
- can_wind = 1;
- }
+ memcpy (iobuf->ptr, qr_inode->data + offset, op_ret);
-out:
- if (need_unwind) {
- QR_STACK_UNWIND (open, frame, op_ret, op_errno, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, qr_fgetxattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fgetxattr, fd, name);
- } else if (need_open) {
- op_ret = qr_loc_fill (&loc, fd->inode, path);
- if (op_ret == -1) {
- qr_resume_pending_ops (qr_fd_ctx);
- goto out;
- }
-
- STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, &loc, flags, fd,
- qr_fd_ctx->wbflags);
+ buf = qr_inode->buf;
- qr_loc_wipe (&loc);
- }
-
- return 0;
-}
+ /* bump LRU */
+ __qr_inode_register (table, qr_inode);
+ }
+unlock:
+ UNLOCK (&table->lock);
+ if (op_ret >= 0) {
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = op_ret;
-int32_t
-qr_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno)
-{
- QR_STACK_UNWIND (flush, frame, op_ret, op_errno);
- return 0;
-}
+ STACK_UNWIND_STRICT (readv, frame, op_ret, 0, &iov, 1,
+ &buf, iobref, xdata);
+ }
+ if (iobuf)
+ iobuf_unref (iobuf);
+ if (iobref)
+ iobref_unref (iobref);
-int32_t
-qr_flush_helper (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- STACK_WIND (frame, qr_flush_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->flush, fd);
- return 0;
+ return op_ret;
}
-int32_t
-qr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
+int
+qr_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
{
- uint64_t value = 0;
- call_stub_t *stub = NULL;
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- int32_t ret = -1, op_ret = -1, op_errno = -1;
- char can_wind = 0, need_unwind = 0;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long)value;
- }
+ qr_inode_t *qr_inode = NULL;
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- if (qr_fd_ctx->opened) {
- can_wind = 1;
- } else if (qr_fd_ctx->open_in_transit) {
- stub = fop_flush_stub (frame, qr_flush_helper,
- fd);
- if (stub == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- qr_fd_ctx->open_in_transit = 0;
- goto unlock;
- }
-
- list_add_tail (&stub->list,
- &qr_fd_ctx->waiting_ops);
- } else {
- op_ret = 0;
- need_unwind = 1;
- }
- }
- unlock:
- UNLOCK (&qr_fd_ctx->lock);
- } else {
- can_wind = 1;
- }
+ qr_inode = qr_inode_ctx_get (this, fd->inode);
+ if (!qr_inode)
+ goto wind;
- if (need_unwind) {
- QR_STACK_UNWIND (flush, frame, op_ret, op_errno);
- } else if (can_wind) {
- STACK_WIND (frame, qr_flush_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->flush, fd);
- }
+ if (qr_readv_cached (frame, qr_inode, size, offset, flags, xdata) < 0)
+ goto wind;
- return 0;
+ return 0;
+wind:
+ STACK_WIND (frame, default_readv_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv,
+ fd, size, offset, flags, xdata);
+ return 0;
}
-int32_t
-qr_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+int
+qr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *iov,
+ int count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
{
- QR_STACK_UNWIND (fentrylk, frame, op_ret, op_errno);
- return 0;
-}
+ qr_inode_prune (this, fd->inode);
-int32_t
-qr_fentrylk_helper (call_frame_t *frame, xlator_t *this, const char *volume,
- fd_t *fd, const char *basename, entrylk_cmd cmd,
- entrylk_type type)
-{
- STACK_WIND(frame, qr_fentrylk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fentrylk, volume, fd, basename,
- cmd, type);
- return 0;
+ STACK_WIND (frame, default_writev_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev,
+ fd, iov, count, offset, flags, iobref, xdata);
+ return 0;
}
-int32_t
-qr_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
- const char *basename, entrylk_cmd cmd, entrylk_type type)
+int
+qr_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
- int flags = 0;
- uint64_t value = 0;
- call_stub_t *stub = NULL;
- char *path = NULL;
- loc_t loc = {0, };
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- int32_t ret = -1, op_ret = -1, op_errno = -1;
- char need_open = 0, can_wind = 0, need_unwind = 0;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long)value;
- }
-
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- path = qr_fd_ctx->path;
- flags = qr_fd_ctx->flags;
-
- if (!(qr_fd_ctx->opened
- || qr_fd_ctx->open_in_transit)) {
- need_open = 1;
- qr_fd_ctx->open_in_transit = 1;
- }
-
- if (qr_fd_ctx->opened) {
- can_wind = 1;
- } else {
- stub = fop_fentrylk_stub (frame,
- qr_fentrylk_helper,
- volume, fd, basename,
- cmd, type);
- if (stub == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- qr_fd_ctx->open_in_transit = 0;
- goto unlock;
- }
-
- list_add_tail (&stub->list,
- &qr_fd_ctx->waiting_ops);
- }
- }
- unlock:
- UNLOCK (&qr_fd_ctx->lock);
- } else {
- can_wind = 1;
- }
-
-out:
- if (need_unwind) {
- QR_STACK_UNWIND (fentrylk, frame, op_ret, op_errno);
- } else if (can_wind) {
- STACK_WIND (frame, qr_fentrylk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fentrylk, volume, fd,
- basename, cmd, type);
- } else if (need_open) {
- op_ret = qr_loc_fill (&loc, fd->inode, path);
- if (op_ret == -1) {
- qr_resume_pending_ops (qr_fd_ctx);
- goto out;
- }
+ qr_inode_prune (this, loc->inode);
- STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, &loc, flags, fd,
- qr_fd_ctx->wbflags);
-
- qr_loc_wipe (&loc);
- }
-
- return 0;
+ STACK_WIND (frame, default_truncate_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->truncate,
+ loc, offset, xdata);
+ return 0;
}
-int32_t
-qr_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-
+int
+qr_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
- QR_STACK_UNWIND (finodelk, frame, op_ret, op_errno);
+ qr_inode_prune (this, fd->inode);
+
+ STACK_WIND (frame, default_ftruncate_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->ftruncate,
+ fd, offset, xdata);
return 0;
}
-int32_t
-qr_finodelk_helper (call_frame_t *frame, xlator_t *this, const char *volume,
- fd_t *fd, int32_t cmd, struct flock *lock)
+int
+qr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ fd_t *fd, dict_t *xdata)
{
- STACK_WIND (frame, qr_finodelk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk, volume, fd, cmd, lock);
- return 0;
-}
+ qr_inode_set_priority (this, fd->inode, loc->path);
+ STACK_WIND (frame, default_open_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->open,
+ loc, flags, fd, xdata);
+ return 0;
+}
-int32_t
-qr_finodelk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
- int32_t cmd, struct flock *lock)
+int
+qr_forget (xlator_t *this, inode_t *inode)
{
- int flags = 0;
- uint64_t value = 0;
- call_stub_t *stub = NULL;
- char *path = NULL;
- loc_t loc = {0, };
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- int32_t ret = -1, op_ret = -1, op_errno = -1;
- char need_open = 0, can_wind = 0, need_unwind = 0;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long)value;
- }
+ qr_inode_t *qr_inode = NULL;
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- path = qr_fd_ctx->path;
- flags = qr_fd_ctx->flags;
+ qr_inode = qr_inode_ctx_get (this, inode);
- if (!(qr_fd_ctx->opened
- || qr_fd_ctx->open_in_transit)) {
- need_open = 1;
- qr_fd_ctx->open_in_transit = 1;
- }
-
- if (qr_fd_ctx->opened) {
- can_wind = 1;
- } else {
- stub = fop_finodelk_stub (frame,
- qr_finodelk_helper,
- volume, fd, cmd,
- lock);
- if (stub == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- qr_fd_ctx->open_in_transit = 0;
- goto unlock;
- }
-
- list_add_tail (&stub->list,
- &qr_fd_ctx->waiting_ops);
- }
- }
- unlock:
- UNLOCK (&qr_fd_ctx->lock);
- } else {
- can_wind = 1;
- }
+ if (!qr_inode)
+ return 0;
-out:
- if (need_unwind) {
- QR_STACK_UNWIND (finodelk, frame, op_ret, op_errno);
- } else if (can_wind) {
- STACK_WIND (frame, qr_finodelk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk, volume, fd,
- cmd, lock);
- } else if (need_open) {
- op_ret = qr_loc_fill (&loc, fd->inode, path);
- if (op_ret == -1) {
- qr_resume_pending_ops (qr_fd_ctx);
- goto out;
- }
+ qr_inode_prune (this, inode);
- STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, &loc, flags, fd,
- qr_fd_ctx->wbflags);
-
- qr_loc_wipe (&loc);
- }
-
- return 0;
-}
+ GF_FREE (qr_inode);
-
-int32_t
-qr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf)
-{
- QR_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf);
return 0;
}
int32_t
-qr_fsync_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
-{
- STACK_WIND (frame, qr_fsync_cbk, FIRST_CHILD (this),
- FIRST_CHILD(this)->fops->fsync, fd, flags);
- return 0;
-}
-
-int32_t
-qr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
+qr_inodectx_dump (xlator_t *this, inode_t *inode)
{
- uint64_t value = 0;
- call_stub_t *stub = NULL;
- char *path = NULL;
- loc_t loc = {0, };
- int open_flags = 0;
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- int32_t ret = -1, op_ret = -1, op_errno = -1;
- char need_open = 0, can_wind = 0, need_unwind = 0;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long)value;
- }
+ qr_inode_t *qr_inode = NULL;
+ int32_t ret = -1;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, };
+ char buf[256] = {0, };
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- path = qr_fd_ctx->path;
- open_flags = qr_fd_ctx->flags;
-
- if (!(qr_fd_ctx->opened
- || qr_fd_ctx->open_in_transit)) {
- need_open = 1;
- qr_fd_ctx->open_in_transit = 1;
- }
+ qr_inode = qr_inode_ctx_get (this, inode);
+ if (!qr_inode)
+ goto out;
- if (qr_fd_ctx->opened) {
- can_wind = 1;
- } else {
- stub = fop_fsync_stub (frame, qr_fsync_helper,
- fd, flags);
- if (stub == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- qr_fd_ctx->open_in_transit = 0;
- goto unlock;
- }
-
- list_add_tail (&stub->list,
- &qr_fd_ctx->waiting_ops);
- }
- }
- unlock:
- UNLOCK (&qr_fd_ctx->lock);
- } else {
- can_wind = 1;
- }
+ gf_proc_dump_build_key (key_prefix, "xlator.performance.quick-read",
+ "inodectx");
+ gf_proc_dump_add_section (key_prefix);
-out:
- if (need_unwind) {
- QR_STACK_UNWIND (fsync, frame, op_ret, op_errno, NULL,
- NULL);
- } else if (can_wind) {
- STACK_WIND (frame, qr_fsync_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsync, fd, flags);
- } else if (need_open) {
- op_ret = qr_loc_fill (&loc, fd->inode, path);
- if (op_ret == -1) {
- qr_resume_pending_ops (qr_fd_ctx);
- goto out;
- }
+ gf_proc_dump_write ("entire-file-cached", "%s", qr_inode->data ? "yes" : "no");
- STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, &loc, open_flags,
- fd, qr_fd_ctx->wbflags);
+ if (qr_inode->last_refresh.tv_sec) {
+ gf_time_fmt (buf, sizeof buf, qr_inode->last_refresh.tv_sec,
+ gf_timefmt_FT);
+ snprintf (buf + strlen (buf), sizeof buf - strlen (buf),
+ ".%"GF_PRI_SUSECONDS, qr_inode->last_refresh.tv_usec);
- qr_loc_wipe (&loc);
+ gf_proc_dump_write ("last-cache-validation-time", "%s", buf);
}
- return 0;
+ ret = 0;
+out:
+ return ret;
}
-int32_t
-qr_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+int
+qr_priv_dump (xlator_t *this)
{
- int32_t ret = 0;
- uint64_t value = 0;
- qr_inode_t *qr_inode = NULL;
- qr_local_t *local = NULL;
- qr_private_t *priv = NULL;
- qr_inode_table_t *table = NULL;
-
- if (op_ret == -1) {
- goto out;
+ qr_conf_t *conf = NULL;
+ qr_private_t *priv = NULL;
+ qr_inode_table_t *table = NULL;
+ uint32_t file_count = 0;
+ uint32_t i = 0;
+ qr_inode_t *curr = NULL;
+ uint64_t total_size = 0;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+
+ if (!this) {
+ return -1;
}
priv = this->private;
- table = &priv->table;
-
- local = frame->local;
- if ((local == NULL) || (local->fd == NULL)
- || (local->fd->inode == NULL)) {
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
+ conf = &priv->conf;
- frame->local = NULL;
-
- LOCK (&table->lock);
- {
- ret = inode_ctx_get (local->fd->inode, this, &value);
- if (ret == 0) {
- qr_inode = (qr_inode_t *)(long) value;
-
- if (qr_inode) {
- if (qr_inode->stbuf.ia_size != postbuf->ia_size)
- {
- inode_ctx_del (local->fd->inode, this,
- NULL);
- __qr_inode_free (qr_inode);
- }
- }
- }
- }
- UNLOCK (&table->lock);
+ if (!conf)
+ return -1;
-out:
- QR_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, prebuf,
- postbuf);
- return 0;
-}
+ table = &priv->table;
+ gf_proc_dump_build_key (key_prefix, "xlator.performance.quick-read",
+ "priv");
-int32_t
-qr_ftruncate_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- off_t offset)
-{
- STACK_WIND (frame, qr_ftruncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate, fd, offset);
- return 0;
-}
+ gf_proc_dump_add_section (key_prefix);
+ gf_proc_dump_write ("max_file_size", "%d", conf->max_file_size);
+ gf_proc_dump_write ("cache_timeout", "%d", conf->cache_timeout);
-int32_t
-qr_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
-{
- int flags = 0;
- uint64_t value = 0;
- call_stub_t *stub = NULL;
- char *path = NULL;
- loc_t loc = {0, };
- qr_local_t *local = NULL;
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- int32_t ret = -1, op_ret = -1, op_errno = -1;
- char need_open = 0, can_wind = 0, need_unwind = 0;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long)value;
- }
-
- local = GF_CALLOC (1, sizeof (*local),
- gf_qr_mt_qr_local_t);
- if (local == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
+ if (!table) {
goto out;
- }
-
- local->fd = fd;
- frame->local = local;
-
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- path = qr_fd_ctx->path;
- flags = qr_fd_ctx->flags;
-
- if (!(qr_fd_ctx->opened
- || qr_fd_ctx->open_in_transit)) {
- need_open = 1;
- qr_fd_ctx->open_in_transit = 1;
- }
-
- if (qr_fd_ctx->opened) {
- can_wind = 1;
- } else {
- stub = fop_ftruncate_stub (frame,
- qr_ftruncate_helper,
- fd, offset);
- if (stub == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- qr_fd_ctx->open_in_transit = 0;
- goto unlock;
- }
-
- list_add_tail (&stub->list,
- &qr_fd_ctx->waiting_ops);
+ } else {
+ for (i = 0; i < conf->max_pri; i++) {
+ list_for_each_entry (curr, &table->lru[i], lru) {
+ file_count++;
+ total_size += curr->size;
}
}
- unlock:
- UNLOCK (&qr_fd_ctx->lock);
- } else {
- can_wind = 1;
}
-out:
- if (need_unwind) {
- QR_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, NULL,
- NULL);
- } else if (can_wind) {
- STACK_WIND (frame, qr_ftruncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate, fd, offset);
- } else if (need_open) {
- op_ret = qr_loc_fill (&loc, fd->inode, path);
- if (op_ret == -1) {
- qr_resume_pending_ops (qr_fd_ctx);
- goto out;
- }
-
- STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, &loc, flags, fd,
- qr_fd_ctx->wbflags);
-
- qr_loc_wipe (&loc);
- }
+ gf_proc_dump_write ("total_files_cached", "%d", file_count);
+ gf_proc_dump_write ("total_cache_used", "%d", total_size);
+out:
return 0;
}
int32_t
-qr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct flock *lock)
-{
- QR_STACK_UNWIND (lk, frame, op_ret, op_errno, lock);
- return 0;
-}
-
-
-int32_t
-qr_lk_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
- struct flock *lock)
+mem_acct_init (xlator_t *this)
{
- STACK_WIND (frame, qr_lk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lk, fd, cmd, lock);
-
- return 0;
-}
+ int ret = -1;
+ if (!this)
+ return ret;
-int32_t
-qr_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
- struct flock *lock)
-{
- int flags = 0;
- uint64_t value = 0;
- call_stub_t *stub = NULL;
- char *path = NULL;
- loc_t loc = {0, };
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- int32_t ret = -1, op_ret = -1, op_errno = -1;
- char need_open = 0, can_wind = 0, need_unwind = 0;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long)value;
- }
-
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- path = qr_fd_ctx->path;
- flags = qr_fd_ctx->flags;
-
- if (!(qr_fd_ctx->opened
- || qr_fd_ctx->open_in_transit)) {
- need_open = 1;
- qr_fd_ctx->open_in_transit = 1;
- }
+ ret = xlator_mem_acct_init (this, gf_qr_mt_end + 1);
- if (qr_fd_ctx->opened) {
- can_wind = 1;
- } else {
- stub = fop_lk_stub (frame, qr_lk_helper, fd,
- cmd, lock);
- if (stub == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- qr_fd_ctx->open_in_transit = 0;
- goto unlock;
- }
-
- list_add_tail (&stub->list,
- &qr_fd_ctx->waiting_ops);
- }
- }
- unlock:
- UNLOCK (&qr_fd_ctx->lock);
- } else {
- can_wind = 1;
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ QUICK_READ_MSG_NO_MEMORY,
+ "Memory accounting init failed");
+ return ret;
}
-out:
- if (need_unwind) {
- QR_STACK_UNWIND (lk, frame, op_ret, op_errno, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, qr_lk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lk, fd, cmd, lock);
- } else if (need_open) {
- op_ret = qr_loc_fill (&loc, fd->inode, path);
- if (op_ret == -1) {
- qr_resume_pending_ops (qr_fd_ctx);
- goto out;
- }
-
- STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, &loc, flags, fd,
- qr_fd_ctx->wbflags);
-
- qr_loc_wipe (&loc);
- }
-
- return 0;
+ return ret;
}
-int32_t
-qr_release (xlator_t *this, fd_t *fd)
+static gf_boolean_t
+check_cache_size_ok (xlator_t *this, int64_t cache_size)
{
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- int32_t ret = 0;
- uint64_t value = 0;
-
- ret = fd_ctx_del (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long) value;
- if (qr_fd_ctx) {
- qr_fd_ctx_free (qr_fd_ctx);
- }
+ int ret = _gf_true;
+ uint64_t total_mem = 0;
+ uint64_t max_cache_size = 0;
+ volume_option_t *opt = NULL;
+
+ GF_ASSERT (this);
+ opt = xlator_volume_option_get (this, "cache-size");
+ if (!opt) {
+ ret = _gf_false;
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ QUICK_READ_MSG_INVALID_ARGUMENT,
+ "could not get cache-size option");
+ goto out;
}
- return 0;
-}
-
-
-int32_t
-qr_forget (xlator_t *this, inode_t *inode)
-{
- qr_inode_t *qr_inode = NULL;
- uint64_t value = 0;
- int32_t ret = -1;
- qr_private_t *priv = NULL;
-
- priv = this->private;
-
- LOCK (&priv->table.lock);
- {
- ret = inode_ctx_del (inode, this, &value);
- if (ret == 0) {
- qr_inode = (qr_inode_t *)(long) value;
- __qr_inode_free (qr_inode);
- }
+ total_mem = get_mem_size ();
+ if (-1 == total_mem)
+ max_cache_size = opt->max;
+ else
+ max_cache_size = total_mem;
+
+ gf_msg_debug (this->name, 0, "Max cache size is %"PRIu64,
+ max_cache_size);
+ if (cache_size > max_cache_size) {
+ ret = _gf_false;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ QUICK_READ_MSG_INVALID_ARGUMENT, "Cache size %"PRIu64
+ " is greater than the max size of %"PRIu64,
+ cache_size, max_cache_size);
+ goto out;
}
- UNLOCK (&priv->table.lock);
-
- return 0;
+out:
+ return ret;
}
int
-qr_priv_dump (xlator_t *this)
+reconfigure (xlator_t *this, dict_t *options)
{
- qr_conf_t *conf = NULL;
- char key[GF_DUMP_MAX_BUF_LEN];
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
- qr_private_t *priv = NULL;
+ int32_t ret = -1;
+ qr_private_t *priv = NULL;
+ qr_conf_t *conf = NULL;
+ uint64_t cache_size_new = 0;
- if (!this)
- return -1;
+ GF_VALIDATE_OR_GOTO ("quick-read", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO (this->name, options, out);
priv = this->private;
- conf = &priv->conf;
+ conf = &priv->conf;
if (!conf) {
- gf_log (this->name, GF_LOG_WARNING,
- "conf null in xlator");
- return -1;
+ goto out;
}
- gf_proc_dump_build_key (key_prefix,
- "xlator.performance.quick-read",
- "priv");
-
- gf_proc_dump_add_section (key_prefix);
-
- gf_proc_dump_build_key (key, key_prefix, "max_file_size");
- gf_proc_dump_write (key, "%d", conf->max_file_size);
- gf_proc_dump_build_key (key, key_prefix, "cache_timeout");
- gf_proc_dump_write (key, "%d", conf->cache_timeout);
-
- return 0;
-}
-
-int32_t
-mem_acct_init (xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- return ret;
+ GF_OPTION_RECONF ("cache-timeout", conf->cache_timeout, options, int32,
+ out);
- ret = xlator_mem_acct_init (this, gf_qr_mt_end + 1);
-
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
+ GF_OPTION_RECONF ("cache-size", cache_size_new, options, size_uint64, out);
+ if (!check_cache_size_ok (this, cache_size_new)) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ QUICK_READ_MSG_INVALID_CONFIG,
+ "Not reconfiguring cache-size");
+ goto out;
}
+ conf->cache_size = cache_size_new;
+ ret = 0;
+out:
return ret;
}
@@ -2393,89 +880,87 @@ mem_acct_init (xlator_t *this)
int32_t
qr_get_priority_list (const char *opt_str, struct list_head *first)
{
- int32_t max_pri = 1;
- char *tmp_str = NULL;
- char *tmp_str1 = NULL;
- char *tmp_str2 = NULL;
- char *dup_str = NULL;
- char *priority_str = NULL;
- char *pattern = NULL;
- char *priority = NULL;
- char *string = NULL;
- struct qr_priority *curr = NULL, *tmp = NULL;
+ int32_t max_pri = 1;
+ char *tmp_str = NULL;
+ char *tmp_str1 = NULL;
+ char *tmp_str2 = NULL;
+ char *dup_str = NULL;
+ char *priority_str = NULL;
+ char *pattern = NULL;
+ char *priority = NULL;
+ char *string = NULL;
+ struct qr_priority *curr = NULL, *tmp = NULL;
+
+ GF_VALIDATE_OR_GOTO ("quick-read", opt_str, out);
+ GF_VALIDATE_OR_GOTO ("quick-read", first, out);
string = gf_strdup (opt_str);
if (string == NULL) {
max_pri = -1;
goto out;
}
-
- /* Get the pattern for cache priority.
- * "option priority *.jpg:1,abc*:2" etc
- */
- /* TODO: inode_lru in table is statically hard-coded to 5,
- * should be changed to run-time configuration
- */
- priority_str = strtok_r (string, ",", &tmp_str);
- while (priority_str) {
- curr = GF_CALLOC (1, sizeof (*curr), gf_qr_mt_qr_priority_t);
+
+ /* Get the pattern for cache priority.
+ * "option priority *.jpg:1,abc*:2" etc
+ */
+ /* TODO: inode_lru in table is statically hard-coded to 5,
+ * should be changed to run-time configuration
+ */
+ priority_str = strtok_r (string, ",", &tmp_str);
+ while (priority_str) {
+ curr = GF_CALLOC (1, sizeof (*curr), gf_qr_mt_qr_priority_t);
if (curr == NULL) {
max_pri = -1;
goto out;
}
- list_add_tail (&curr->list, first);
+ list_add_tail (&curr->list, first);
- dup_str = gf_strdup (priority_str);
+ dup_str = gf_strdup (priority_str);
if (dup_str == NULL) {
max_pri = -1;
goto out;
}
- pattern = strtok_r (dup_str, ":", &tmp_str1);
- if (!pattern) {
+ pattern = strtok_r (dup_str, ":", &tmp_str1);
+ if (!pattern) {
max_pri = -1;
goto out;
}
- priority = strtok_r (NULL, ":", &tmp_str1);
- if (!priority) {
+ priority = strtok_r (NULL, ":", &tmp_str1);
+ if (!priority) {
max_pri = -1;
goto out;
}
- gf_log ("quick-read", GF_LOG_TRACE,
- "quick-read priority : pattern %s : priority %s",
- pattern,
- priority);
+ gf_msg_trace ("quick-read", 0,
+ "quick-read priority : pattern %s : priority %s",
+ pattern, priority);
- curr->pattern = gf_strdup (pattern);
+ curr->pattern = gf_strdup (pattern);
if (curr->pattern == NULL) {
max_pri = -1;
goto out;
}
- curr->priority = strtol (priority, &tmp_str2, 0);
- if (tmp_str2 && (*tmp_str2)) {
+ curr->priority = strtol (priority, &tmp_str2, 0);
+ if (tmp_str2 && (*tmp_str2)) {
max_pri = -1;
goto out;
} else {
- max_pri = max (max_pri, curr->priority);
+ max_pri = max (max_pri, curr->priority);
}
GF_FREE (dup_str);
dup_str = NULL;
- priority_str = strtok_r (NULL, ",", &tmp_str);
- }
-out:
- if (string != NULL) {
- GF_FREE (string);
+ priority_str = strtok_r (NULL, ",", &tmp_str);
}
+out:
+ GF_FREE (string);
- if (dup_str != NULL) {
- GF_FREE (dup_str);
- }
+ GF_FREE (dup_str);
if (max_pri == -1) {
list_for_each_entry_safe (curr, tmp, first, list) {
@@ -2485,104 +970,71 @@ out:
}
}
- return max_pri;
+ return max_pri;
}
-int32_t
+int32_t
init (xlator_t *this)
{
- char *str = NULL;
int32_t ret = -1, i = 0;
qr_private_t *priv = NULL;
qr_conf_t *conf = NULL;
-
+
if (!this->children || this->children->next) {
- gf_log (this->name, GF_LOG_ERROR,
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ QUICK_READ_MSG_XLATOR_CHILD_MISCONFIGURED,
"FATAL: volume (%s) not configured with exactly one "
- "child", this->name);
+ "child", this->name);
return -1;
}
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
+ if (!this->parents) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ QUICK_READ_MSG_VOL_MISCONFIGURED,
+ "dangling volume. check volfile ");
+ }
- priv = GF_CALLOC (1, sizeof (*priv),
- gf_qr_mt_qr_private_t);
+ priv = GF_CALLOC (1, sizeof (*priv), gf_qr_mt_qr_private_t);
if (priv == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory");
ret = -1;
goto out;
}
LOCK_INIT (&priv->table.lock);
conf = &priv->conf;
- conf->max_file_size = 65536;
- ret = dict_get_str (this->options, "max-file-size",
- &str);
- if (ret == 0) {
- ret = gf_string2bytesize (str, &conf->max_file_size);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid number format \"%s\" of \"option "
- "max-file-size\"",
- str);
- ret = -1;
- goto out;
- }
- }
- conf->cache_timeout = 1;
- ret = dict_get_str (this->options, "cache-timeout", &str);
- if (ret == 0) {
- ret = gf_string2uint_base10 (str,
- (unsigned int *)&conf->cache_timeout);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid cache-timeout value %s", str);
- ret = -1;
- goto out;
- }
- }
+ GF_OPTION_INIT ("max-file-size", conf->max_file_size, size_uint64, out);
- conf->cache_size = QR_DEFAULT_CACHE_SIZE;
- ret = dict_get_str (this->options, "cache-size", &str);
- if (ret == 0) {
- ret = gf_string2bytesize (str, &conf->cache_size);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid cache-size value %s", str);
- ret = -1;
- goto out;
- }
+ GF_OPTION_INIT ("cache-timeout", conf->cache_timeout, int32, out);
+
+ GF_OPTION_INIT ("cache-size", conf->cache_size, size_uint64, out);
+ if (!check_cache_size_ok (this, conf->cache_size)) {
+ ret = -1;
+ goto out;
}
- INIT_LIST_HEAD (&conf->priority_list);
- conf->max_pri = 1;
- if (dict_get (this->options, "priority")) {
- char *option_list = data_to_str (dict_get (this->options,
- "priority"));
- gf_log (this->name, GF_LOG_TRACE,
- "option path %s", option_list);
- /* parse the list of pattern:priority */
- conf->max_pri = qr_get_priority_list (option_list,
- &conf->priority_list);
-
- if (conf->max_pri == -1) {
+ INIT_LIST_HEAD (&conf->priority_list);
+ conf->max_pri = 1;
+ if (dict_get (this->options, "priority")) {
+ char *option_list = data_to_str (dict_get (this->options,
+ "priority"));
+ gf_msg_trace (this->name, 0,
+ "option path %s", option_list);
+ /* parse the list of pattern:priority */
+ conf->max_pri = qr_get_priority_list (option_list,
+ &conf->priority_list);
+
+ if (conf->max_pri == -1) {
goto out;
}
conf->max_pri ++;
- }
+ }
- priv->table.lru = GF_CALLOC (conf->max_pri,
- sizeof (*priv->table.lru),
+ priv->table.lru = GF_CALLOC (conf->max_pri, sizeof (*priv->table.lru),
gf_common_mt_list_head);
if (priv->table.lru == NULL) {
ret = -1;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
goto out;
}
@@ -2603,57 +1055,110 @@ out:
void
-fini (xlator_t *this)
+qr_inode_table_destroy (qr_private_t *priv)
{
+ int i = 0;
+ qr_conf_t *conf = NULL;
+
+ conf = &priv->conf;
+
+ for (i = 0; i < conf->max_pri; i++) {
+ /* There is a known leak of inodes, hence until
+ * that is fixed, log the assert as warning.
+ GF_ASSERT (list_empty (&priv->table.lru[i]));*/
+ if (!list_empty (&priv->table.lru[i])) {
+ gf_msg ("quick-read", GF_LOG_INFO, 0,
+ QUICK_READ_MSG_LRU_NOT_EMPTY,
+ "quick read inode table lru not empty");
+ }
+ }
+
+ LOCK_DESTROY (&priv->table.lock);
+
return;
}
+void
+qr_conf_destroy (qr_conf_t *conf)
+{
+ struct qr_priority *curr = NULL, *tmp = NULL;
+
+ list_for_each_entry_safe (curr, tmp, &conf->priority_list, list) {
+ list_del (&curr->list);
+ GF_FREE (curr->pattern);
+ GF_FREE (curr);
+ }
+
+ return;
+}
+
+
+void
+fini (xlator_t *this)
+{
+ qr_private_t *priv = NULL;
+
+ if (this == NULL) {
+ goto out;
+ }
+
+ priv = this->private;
+ if (priv == NULL) {
+ goto out;
+ }
+
+ qr_inode_table_destroy (priv);
+ qr_conf_destroy (&priv->conf);
+
+ this->private = NULL;
+
+ GF_FREE (priv);
+out:
+ return;
+}
+
struct xlator_fops fops = {
- .lookup = qr_lookup,
+ .lookup = qr_lookup,
+ .readdirp = qr_readdirp,
.open = qr_open,
.readv = qr_readv,
- .writev = qr_writev,
- .fstat = qr_fstat,
- .fsetxattr = qr_fsetxattr,
- .fgetxattr = qr_fgetxattr,
- .flush = qr_flush,
- .fentrylk = qr_fentrylk,
- .finodelk = qr_finodelk,
- .fsync = qr_fsync,
- .ftruncate = qr_ftruncate,
- .lk = qr_lk,
- .fsetattr = qr_fsetattr,
+ .writev = qr_writev,
+ .truncate = qr_truncate,
+ .ftruncate = qr_ftruncate
};
-
-
struct xlator_cbks cbks = {
.forget = qr_forget,
- .release = qr_release,
};
struct xlator_dumpops dumpops = {
.priv = qr_priv_dump,
+ .inodectx = qr_inodectx_dump,
};
struct volume_options options[] = {
- { .key = {"priority"},
- .type = GF_OPTION_TYPE_ANY
- },
+ { .key = {"priority"},
+ .type = GF_OPTION_TYPE_ANY
+ },
{ .key = {"cache-size"},
.type = GF_OPTION_TYPE_SIZET,
.min = 0,
- .max = 6 * GF_UNIT_GB,
+ .max = 32 * GF_UNIT_GB,
+ .default_value = "128MB",
+ .description = "Size of the read cache."
},
{ .key = {"cache-timeout"},
.type = GF_OPTION_TYPE_INT,
.min = 1,
- .max = 60
+ .max = 60,
+ .default_value = "1",
},
{ .key = {"max-file-size"},
.type = GF_OPTION_TYPE_SIZET,
.min = 0,
.max = 1 * GF_UNIT_KB * 1000,
+ .default_value = "64KB",
},
+ { .key = {NULL} }
};
diff --git a/xlators/performance/quick-read/src/quick-read.h b/xlators/performance/quick-read/src/quick-read.h
index 7207a33a554..0697bf5fb14 100644
--- a/xlators/performance/quick-read/src/quick-read.h
+++ b/xlators/performance/quick-read/src/quick-read.h
@@ -1,30 +1,16 @@
/*
- Copyright (c) 2009-2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __QUICK_READ_H
#define __QUICK_READ_H
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "glusterfs.h"
#include "logging.h"
#include "dict.h"
@@ -43,42 +29,20 @@
#include <fnmatch.h>
#include "quick-read-mem-types.h"
-#define GLUSTERFS_CONTENT_KEY "glusterfs.content"
-
-struct qr_fd_ctx {
- char opened;
- char disabled;
- char open_in_transit;
- char *path;
- int flags;
- int wbflags;
- struct list_head waiting_ops;
- gf_lock_t lock;
-};
-typedef struct qr_fd_ctx qr_fd_ctx_t;
-
-struct qr_local {
- char is_open;
- char *path;
- char just_validated;
- fd_t *fd;
- int open_flags;
- int32_t op_ret;
- int32_t op_errno;
- call_stub_t *stub;
-};
-typedef struct qr_local qr_local_t;
struct qr_inode {
- dict_t *xattr;
- inode_t *inode;
+ void *data;
+ size_t size;
int priority;
- struct iatt stbuf;
- struct timeval tv;
+ uint32_t ia_mtime;
+ uint32_t ia_mtime_nsec;
+ struct iatt buf;
+ struct timeval last_refresh;
struct list_head lru;
};
typedef struct qr_inode qr_inode_t;
+
struct qr_priority {
char *pattern;
int32_t priority;
@@ -108,13 +72,5 @@ struct qr_private {
};
typedef struct qr_private qr_private_t;
-void qr_local_free (qr_local_t *local);
-
-#define QR_STACK_UNWIND(op, frame, params ...) do { \
- qr_local_t *__local = frame->local; \
- frame->local = NULL; \
- STACK_UNWIND_STRICT (op, frame, params); \
- qr_local_free (__local); \
-} while (0)
#endif /* #ifndef __QUICK_READ_H */
diff --git a/xlators/performance/read-ahead/src/Makefile.am b/xlators/performance/read-ahead/src/Makefile.am
index b46020aacee..0b1878707f7 100644
--- a/xlators/performance/read-ahead/src/Makefile.am
+++ b/xlators/performance/read-ahead/src/Makefile.am
@@ -1,14 +1,15 @@
xlator_LTLIBRARIES = read-ahead.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
-read_ahead_la_LDFLAGS = -module -avoidversion
+read_ahead_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
read_ahead_la_SOURCES = read-ahead.c page.c
read_ahead_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = read-ahead.h read-ahead-mem-types.h
+noinst_HEADERS = read-ahead.h read-ahead-mem-types.h read-ahead-messages.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/performance/read-ahead/src/page.c b/xlators/performance/read-ahead/src/page.c
index 0e271a9ac70..216e327af74 100644
--- a/xlators/performance/read-ahead/src/page.c
+++ b/xlators/performance/read-ahead/src/page.c
@@ -1,112 +1,106 @@
/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "glusterfs.h"
#include "logging.h"
#include "dict.h"
#include "xlator.h"
#include "read-ahead.h"
#include <assert.h>
+#include "read-ahead-messages.h"
ra_page_t *
ra_page_get (ra_file_t *file, off_t offset)
{
- ra_page_t *page = NULL;
- off_t rounded_offset = 0;
+ ra_page_t *page = NULL;
+ off_t rounded_offset = 0;
+
+ GF_VALIDATE_OR_GOTO ("read-ahead", file, out);
- page = file->pages.next;
- rounded_offset = floor (offset, file->page_size);
+ page = file->pages.next;
+ rounded_offset = floor (offset, file->page_size);
- while (page != &file->pages && page->offset < rounded_offset)
- page = page->next;
+ while (page != &file->pages && page->offset < rounded_offset)
+ page = page->next;
- if (page == &file->pages || page->offset != rounded_offset)
- page = NULL;
+ if (page == &file->pages || page->offset != rounded_offset)
+ page = NULL;
- return page;
+out:
+ return page;
}
ra_page_t *
ra_page_create (ra_file_t *file, off_t offset)
{
- ra_page_t *page = NULL;
- off_t rounded_offset = 0;
- ra_page_t *newpage = NULL;
+ ra_page_t *page = NULL;
+ off_t rounded_offset = 0;
+ ra_page_t *newpage = NULL;
+
+ GF_VALIDATE_OR_GOTO ("read-ahead", file, out);
- page = file->pages.next;
- rounded_offset = floor (offset, file->page_size);
+ page = file->pages.next;
+ rounded_offset = floor (offset, file->page_size);
- while (page != &file->pages && page->offset < rounded_offset)
- page = page->next;
+ while (page != &file->pages && page->offset < rounded_offset)
+ page = page->next;
- if (page == &file->pages || page->offset != rounded_offset) {
- newpage = GF_CALLOC (1, sizeof (*newpage),
- gf_ra_mt_ra_page_t);
- if (!newpage)
- return NULL;
+ if (page == &file->pages || page->offset != rounded_offset) {
+ newpage = GF_CALLOC (1, sizeof (*newpage), gf_ra_mt_ra_page_t);
+ if (!newpage) {
+ goto out;
+ }
- newpage->offset = rounded_offset;
- newpage->prev = page->prev;
- newpage->next = page;
- newpage->file = file;
- page->prev->next = newpage;
- page->prev = newpage;
+ newpage->offset = rounded_offset;
+ newpage->prev = page->prev;
+ newpage->next = page;
+ newpage->file = file;
+ page->prev->next = newpage;
+ page->prev = newpage;
- page = newpage;
- }
+ page = newpage;
+ }
- return page;
+out:
+ return page;
}
void
ra_wait_on_page (ra_page_t *page, call_frame_t *frame)
{
- ra_waitq_t *waitq = NULL;
- ra_local_t *local = NULL;
-
- local = frame->local;
- waitq = GF_CALLOC (1, sizeof (*waitq),
- gf_ra_mt_ra_waitq_t);
- if (!waitq) {
- gf_log (frame->this->name, GF_LOG_ERROR,
- "out of memory");
+ ra_waitq_t *waitq = NULL;
+ ra_local_t *local = NULL;
+
+ GF_VALIDATE_OR_GOTO ("read-ahead", frame, out);
+ GF_VALIDATE_OR_GOTO (frame->this->name, page, out);
+
+ local = frame->local;
+
+ waitq = GF_CALLOC (1, sizeof (*waitq), gf_ra_mt_ra_waitq_t);
+ if (!waitq) {
local->op_ret = -1;
local->op_errno = ENOMEM;
goto out;
- }
+ }
- waitq->data = frame;
- waitq->next = page->waitq;
- page->waitq = waitq;
+ waitq->data = frame;
+ waitq->next = page->waitq;
+ page->waitq = waitq;
- ra_local_lock (local);
- {
- local->wait_count++;
- }
- ra_local_unlock (local);
+ ra_local_lock (local);
+ {
+ local->wait_count++;
+ }
+ ra_local_unlock (local);
out:
return;
@@ -116,119 +110,148 @@ out:
void
ra_waitq_return (ra_waitq_t *waitq)
{
- ra_waitq_t *trav = NULL;
- ra_waitq_t *next = NULL;
- call_frame_t *frame = NULL;
+ ra_waitq_t *trav = NULL;
+ ra_waitq_t *next = NULL;
+ call_frame_t *frame = NULL;
- for (trav = waitq; trav; trav = next) {
- next = trav->next;
+ for (trav = waitq; trav; trav = next) {
+ next = trav->next;
- frame = trav->data;
- ra_frame_return (frame);
- GF_FREE (trav);
- }
+ frame = trav->data;
+ ra_frame_return (frame);
+ GF_FREE (trav);
+ }
+
+ return;
}
int
ra_fault_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *stbuf, struct iobref *iobref)
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
{
- ra_local_t *local = NULL;
- off_t pending_offset = 0;
- ra_file_t *file = NULL;
- ra_page_t *page = NULL;
- off_t trav_offset = 0;
- size_t payload_size = 0;
- ra_waitq_t *waitq = NULL;
- fd_t *fd = NULL;
- int ret = 0;
- uint64_t tmp_file = 0;
-
- local = frame->local;
- fd = local->fd;
-
- ret = fd_ctx_get (fd, this, &tmp_file);
-
- file = (ra_file_t *)(long)tmp_file;
- pending_offset = local->pending_offset;
- trav_offset = pending_offset;
- payload_size = op_ret;
-
- ra_file_lock (file);
- {
- if (op_ret >= 0)
- file->stbuf = *stbuf;
-
- if (op_ret < 0) {
- page = ra_page_get (file, pending_offset);
- if (page)
- waitq = ra_page_error (page, op_ret, op_errno);
- goto unlock;
- }
-
- page = ra_page_get (file, pending_offset);
- if (!page) {
- gf_log (this->name, GF_LOG_DEBUG,
- "wasted copy: %"PRId64"[+%"PRId64"] file=%p",
- pending_offset, file->page_size, file);
- goto unlock;
- }
-
- if (page->vector) {
- iobref_unref (page->iobref);
- GF_FREE (page->vector);
- }
-
- page->vector = iov_dup (vector, count);
+ ra_local_t *local = NULL;
+ off_t pending_offset = 0;
+ ra_file_t *file = NULL;
+ ra_page_t *page = NULL;
+ ra_waitq_t *waitq = NULL;
+ fd_t *fd = NULL;
+ uint64_t tmp_file = 0;
+
+ GF_ASSERT (frame);
+
+ local = frame->local;
+ fd = local->fd;
+
+ fd_ctx_get (fd, this, &tmp_file);
+
+ file = (ra_file_t *)(long)tmp_file;
+ pending_offset = local->pending_offset;
+
+ if (file == NULL) {
+ gf_msg (this->name, GF_LOG_WARNING, EBADF,
+ READ_AHEAD_MSG_FD_CONTEXT_NOT_SET,
+ "read-ahead context not set in fd (%p)", fd);
+ op_ret = -1;
+ op_errno = EBADF;
+ goto out;
+ }
+
+ ra_file_lock (file);
+ {
+ if (op_ret >= 0)
+ file->stbuf = *stbuf;
+
+ page = ra_page_get (file, pending_offset);
+
+ if (!page) {
+ gf_msg_trace (this->name, 0,
+ "wasted copy: "
+ "%"PRId64"[+%"PRId64"] file=%p",
+ pending_offset, file->page_size, file);
+ goto unlock;
+ }
+
+ /*
+ * "Dirty" means that the request was a pure read-ahead; it's
+ * set for requests we issue ourselves, and cleared when user
+ * requests are issued or put on the waitq. "Poisoned" means
+ * that we got a write while a read was still in flight, and we
+ * couldn't stop it so we marked it instead. If it's both
+ * dirty and poisoned by the time we get here, we cancel its
+ * effect so that a subsequent user read doesn't get data that
+ * we know is stale (because we made it stale ourselves). We
+ * can't use ESTALE because that has special significance.
+ * ECANCELED has no such special meaning, and is close to what
+ * we're trying to indicate.
+ */
+ if (page->dirty && page->poisoned) {
+ op_ret = -1;
+ op_errno = ECANCELED;
+ }
+
+ if (op_ret < 0) {
+ waitq = ra_page_error (page, op_ret, op_errno);
+ goto unlock;
+ }
+
+ if (page->vector) {
+ iobref_unref (page->iobref);
+ GF_FREE (page->vector);
+ }
+
+ page->vector = iov_dup (vector, count);
if (page->vector == NULL) {
waitq = ra_page_error (page, -1, ENOMEM);
goto unlock;
}
- page->count = count;
- page->iobref = iobref_ref (iobref);
- page->ready = 1;
+ page->count = count;
+ page->iobref = iobref_ref (iobref);
+ page->ready = 1;
- page->size = iov_length (vector, count);
+ page->size = iov_length (vector, count);
- waitq = ra_page_wakeup (page);
- }
+ waitq = ra_page_wakeup (page);
+ }
unlock:
- ra_file_unlock (file);
+ ra_file_unlock (file);
- ra_waitq_return (waitq);
+ ra_waitq_return (waitq);
- fd_unref (local->fd);
+ fd_unref (local->fd);
- GF_FREE (frame->local);
- frame->local = NULL;
+ mem_put (frame->local);
+ frame->local = NULL;
- STACK_DESTROY (frame->root);
- return 0;
+out:
+ STACK_DESTROY (frame->root);
+ return 0;
}
void
ra_page_fault (ra_file_t *file, call_frame_t *frame, off_t offset)
{
- call_frame_t *fault_frame = NULL;
- ra_local_t *fault_local = NULL, *local = NULL;
- ra_page_t *page = NULL;
- ra_waitq_t *waitq = NULL;
- int32_t op_ret = -1, op_errno = -1;
-
- local = frame->local;
- fault_frame = copy_frame (frame);
+ call_frame_t *fault_frame = NULL;
+ ra_local_t *fault_local = NULL;
+ ra_page_t *page = NULL;
+ ra_waitq_t *waitq = NULL;
+ int32_t op_ret = -1, op_errno = -1;
+
+ GF_VALIDATE_OR_GOTO ("read-ahead", frame, out);
+ GF_VALIDATE_OR_GOTO (frame->this->name, file, out);
+
+ fault_frame = copy_frame (frame);
if (fault_frame == NULL) {
op_ret = -1;
op_errno = ENOMEM;
goto err;
}
- fault_local = GF_CALLOC (1, sizeof (ra_local_t),
- gf_ra_mt_ra_local_t);
+ fault_local = mem_get0 (THIS->local_pool);
if (fault_local == NULL) {
STACK_DESTROY (fault_frame->root);
op_ret = -1;
@@ -236,18 +259,18 @@ ra_page_fault (ra_file_t *file, call_frame_t *frame, off_t offset)
goto err;
}
- fault_frame->local = fault_local;
- fault_local->pending_offset = offset;
- fault_local->pending_size = file->page_size;
+ fault_frame->local = fault_local;
+ fault_local->pending_offset = offset;
+ fault_local->pending_size = file->page_size;
- fault_local->fd = fd_ref (file->fd);
+ fault_local->fd = fd_ref (file->fd);
- STACK_WIND (fault_frame, ra_fault_cbk,
- FIRST_CHILD (fault_frame->this),
- FIRST_CHILD (fault_frame->this)->fops->readv,
- file->fd, file->page_size, offset);
+ STACK_WIND (fault_frame, ra_fault_cbk,
+ FIRST_CHILD (fault_frame->this),
+ FIRST_CHILD (fault_frame->this)->fops->readv,
+ file->fd, file->page_size, offset, 0, NULL);
- return;
+ return;
err:
ra_file_lock (file);
@@ -258,63 +281,69 @@ err:
op_errno);
}
ra_file_unlock (file);
-
+
if (waitq != NULL) {
ra_waitq_return (waitq);
}
+
+out:
+ return;
}
+
void
ra_frame_fill (ra_page_t *page, call_frame_t *frame)
{
- ra_local_t *local = NULL;
- ra_fill_t *fill = NULL;
- off_t src_offset = 0;
- off_t dst_offset = 0;
- ssize_t copy_size = 0;
- ra_fill_t *new = NULL;
-
- local = frame->local;
- fill = &local->fill;
-
- if (local->op_ret != -1 && page->size) {
- if (local->offset > page->offset)
- src_offset = local->offset - page->offset;
- else
- dst_offset = page->offset - local->offset;
-
- copy_size = min (page->size - src_offset,
- local->size - dst_offset);
-
- if (copy_size < 0) {
- /* if page contains fewer bytes and the required offset
- is beyond the page size in the page */
- copy_size = src_offset = 0;
- }
-
- fill = fill->next;
- while (fill != &local->fill) {
- if (fill->offset > page->offset) {
- break;
- }
- fill = fill->next;
- }
+ ra_local_t *local = NULL;
+ ra_fill_t *fill = NULL;
+ off_t src_offset = 0;
+ off_t dst_offset = 0;
+ ssize_t copy_size = 0;
+ ra_fill_t *new = NULL;
+
+ GF_VALIDATE_OR_GOTO ("read-ahead", frame, out);
+ GF_VALIDATE_OR_GOTO (frame->this->name, page, out);
+
+ local = frame->local;
+ fill = &local->fill;
+
+ if (local->op_ret != -1 && page->size) {
+ if (local->offset > page->offset)
+ src_offset = local->offset - page->offset;
+ else
+ dst_offset = page->offset - local->offset;
+
+ copy_size = min (page->size - src_offset,
+ local->size - dst_offset);
- new = GF_CALLOC (1, sizeof (*new),
- gf_ra_mt_ra_fill_t);
+ if (copy_size < 0) {
+ /* if page contains fewer bytes and the required offset
+ is beyond the page size in the page */
+ copy_size = src_offset = 0;
+ }
+
+ fill = fill->next;
+ while (fill != &local->fill) {
+ if (fill->offset > page->offset) {
+ break;
+ }
+ fill = fill->next;
+ }
+
+ new = GF_CALLOC (1, sizeof (*new), gf_ra_mt_ra_fill_t);
if (new == NULL) {
local->op_ret = -1;
local->op_errno = ENOMEM;
goto out;
}
- new->offset = page->offset;
- new->size = copy_size;
- new->iobref = iobref_ref (page->iobref);
- new->count = iov_subset (page->vector, page->count,
- src_offset, src_offset+copy_size,
- NULL);
- new->vector = GF_CALLOC (new->count, sizeof (struct iovec),
+ new->offset = page->offset;
+ new->size = copy_size;
+ new->iobref = iobref_ref (page->iobref);
+ new->count = iov_subset (page->vector, page->count,
+ src_offset, src_offset+copy_size,
+ NULL);
+ new->vector = GF_CALLOC (new->count, sizeof (struct iovec),
gf_ra_mt_iovec);
if (new->vector == NULL) {
local->op_ret = -1;
@@ -323,17 +352,17 @@ ra_frame_fill (ra_page_t *page, call_frame_t *frame)
goto out;
}
- new->count = iov_subset (page->vector, page->count,
- src_offset, src_offset+copy_size,
- new->vector);
+ new->count = iov_subset (page->vector, page->count,
+ src_offset, src_offset+copy_size,
+ new->vector);
- new->next = fill;
- new->prev = new->next->prev;
- new->next->prev = new;
- new->prev->next = new;
+ new->next = fill;
+ new->prev = new->next->prev;
+ new->next->prev = new;
+ new->prev->next = new;
- local->op_ret += copy_size;
- }
+ local->op_ret += copy_size;
+ }
out:
return;
@@ -343,36 +372,36 @@ out:
void
ra_frame_unwind (call_frame_t *frame)
{
- ra_local_t *local = NULL;
- ra_fill_t *fill = NULL;
- int32_t count = 0;
- struct iovec *vector;
- int32_t copied = 0;
- struct iobref *iobref = NULL;
- ra_fill_t *next = NULL;
- fd_t *fd = NULL;
- ra_file_t *file = NULL;
- int ret = 0;
- uint64_t tmp_file = 0;
-
- local = frame->local;
- fill = local->fill.next;
-
- iobref = iobref_new ();
+ ra_local_t *local = NULL;
+ ra_fill_t *fill = NULL;
+ int32_t count = 0;
+ struct iovec *vector = NULL;
+ int32_t copied = 0;
+ struct iobref *iobref = NULL;
+ ra_fill_t *next = NULL;
+ fd_t *fd = NULL;
+ ra_file_t *file = NULL;
+ uint64_t tmp_file = 0;
+
+ GF_VALIDATE_OR_GOTO ("read-ahead", frame, out);
+
+ local = frame->local;
+ fill = local->fill.next;
+
+ iobref = iobref_new ();
if (iobref == NULL) {
local->op_ret = -1;
local->op_errno = ENOMEM;
}
- frame->local = NULL;
+ frame->local = NULL;
- while (fill != &local->fill) {
- count += fill->count;
- fill = fill->next;
- }
+ while (fill != &local->fill) {
+ count += fill->count;
+ fill = fill->next;
+ }
- vector = GF_CALLOC (count, sizeof (*vector),
- gf_ra_mt_iovec);
+ vector = GF_CALLOC (count, sizeof (*vector), gf_ra_mt_iovec);
if (vector == NULL) {
local->op_ret = -1;
local->op_errno = ENOMEM;
@@ -380,42 +409,48 @@ ra_frame_unwind (call_frame_t *frame)
iobref = NULL;
}
- fill = local->fill.next;
+ fill = local->fill.next;
- while (fill != &local->fill) {
- next = fill->next;
+ while (fill != &local->fill) {
+ next = fill->next;
if ((vector != NULL) && (iobref != NULL)) {
memcpy (((char *)vector) + copied, fill->vector,
fill->count * sizeof (*vector));
-
+
copied += (fill->count * sizeof (*vector));
- iobref_merge (iobref, fill->iobref);
+ if (iobref_merge (iobref, fill->iobref)) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ iobref_unref (iobref);
+ iobref = NULL;
+ }
}
- fill->next->prev = fill->prev;
- fill->prev->next = fill->prev;
+ fill->next->prev = fill->prev;
+ fill->prev->next = fill->prev;
- iobref_unref (fill->iobref);
- GF_FREE (fill->vector);
- GF_FREE (fill);
+ iobref_unref (fill->iobref);
+ GF_FREE (fill->vector);
+ GF_FREE (fill);
- fill = next;
- }
+ fill = next;
+ }
- fd = local->fd;
- ret = fd_ctx_get (fd, frame->this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
+ fd = local->fd;
+ fd_ctx_get (fd, frame->this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
- STACK_UNWIND_STRICT (readv, frame, local->op_ret, local->op_errno,
- vector, count, &file->stbuf, iobref);
+ STACK_UNWIND_STRICT (readv, frame, local->op_ret, local->op_errno,
+ vector, count, &file->stbuf, iobref, NULL);
- iobref_unref (iobref);
- pthread_mutex_destroy (&local->local_lock);
- GF_FREE (local);
- GF_FREE (vector);
+ iobref_unref (iobref);
+ pthread_mutex_destroy (&local->local_lock);
+ mem_put (local);
+ GF_FREE (vector);
- return;
+out:
+ return;
}
/*
@@ -426,25 +461,28 @@ ra_frame_unwind (call_frame_t *frame)
void
ra_frame_return (call_frame_t *frame)
{
- ra_local_t *local = NULL;
- int32_t wait_count = 0;
+ ra_local_t *local = NULL;
+ int32_t wait_count = 0;
- local = frame->local;
- assert (local->wait_count > 0);
+ GF_VALIDATE_OR_GOTO ("read-ahead", frame, out);
- ra_local_lock (local);
- {
- wait_count = --local->wait_count;
- }
- ra_local_unlock (local);
+ local = frame->local;
+ GF_ASSERT (local->wait_count > 0);
+
+ ra_local_lock (local);
+ {
+ wait_count = --local->wait_count;
+ }
+ ra_local_unlock (local);
- if (!wait_count)
- ra_frame_unwind (frame);
+ if (!wait_count)
+ ra_frame_unwind (frame);
- return;
+out:
+ return;
}
-/*
+/*
* ra_page_wakeup -
* @page:
*
@@ -452,19 +490,24 @@ ra_frame_return (call_frame_t *frame)
ra_waitq_t *
ra_page_wakeup (ra_page_t *page)
{
- ra_waitq_t *waitq = NULL, *trav = NULL;
- call_frame_t *frame;
+ ra_waitq_t *waitq = NULL, *trav = NULL;
+ call_frame_t *frame = NULL;
- waitq = page->waitq;
- page->waitq = NULL;
+ GF_VALIDATE_OR_GOTO ("read-ahead", page, out);
- trav = waitq;
- for (trav = waitq; trav; trav = trav->next) {
- frame = trav->data;
- ra_frame_fill (page, frame);
- }
+ waitq = page->waitq;
+ page->waitq = NULL;
- return waitq;
+ for (trav = waitq; trav; trav = trav->next) {
+ frame = trav->data;
+ ra_frame_fill (page, frame);
+ }
+
+ if (page->stale) {
+ ra_page_purge (page);
+ }
+out:
+ return waitq;
}
/*
@@ -475,14 +518,20 @@ ra_page_wakeup (ra_page_t *page)
void
ra_page_purge (ra_page_t *page)
{
- page->prev->next = page->next;
- page->next->prev = page->prev;
-
- if (page->iobref) {
- iobref_unref (page->iobref);
- }
- GF_FREE (page->vector);
- GF_FREE (page);
+ GF_VALIDATE_OR_GOTO ("read-ahead", page, out);
+
+ page->prev->next = page->next;
+ page->next->prev = page->prev;
+
+ if (page->iobref) {
+ iobref_unref (page->iobref);
+ }
+
+ GF_FREE (page->vector);
+ GF_FREE (page);
+
+out:
+ return;
}
/*
@@ -495,32 +544,33 @@ ra_page_purge (ra_page_t *page)
ra_waitq_t *
ra_page_error (ra_page_t *page, int32_t op_ret, int32_t op_errno)
{
+ ra_waitq_t *waitq = NULL;
+ ra_waitq_t *trav = NULL;
+ call_frame_t *frame = NULL;
+ ra_local_t *local = NULL;
- ra_waitq_t *waitq = NULL;
- ra_waitq_t *trav = NULL;
- call_frame_t *frame = NULL;
- ra_local_t *local = NULL;
+ GF_VALIDATE_OR_GOTO ("read-ahead", page, out);
- waitq = page->waitq;
- page->waitq = NULL;
+ waitq = page->waitq;
+ page->waitq = NULL;
- trav = waitq;
- for (trav = waitq; trav; trav = trav->next) {
- frame = trav->data;
+ for (trav = waitq; trav; trav = trav->next) {
+ frame = trav->data;
- local = frame->local;
- if (local->op_ret != -1) {
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- }
- }
+ local = frame->local;
+ if (local->op_ret != -1) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
+ }
- ra_page_purge (page);
+ ra_page_purge (page);
- return waitq;
+out:
+ return waitq;
}
-/*
+/*
* ra_file_destroy -
* @file:
*
@@ -528,24 +578,29 @@ ra_page_error (ra_page_t *page, int32_t op_ret, int32_t op_errno)
void
ra_file_destroy (ra_file_t *file)
{
- ra_conf_t *conf = NULL;
- ra_page_t *trav = NULL;
-
- conf = file->conf;
-
- ra_conf_lock (conf);
- {
- file->prev->next = file->next;
- file->next->prev = file->prev;
- }
- ra_conf_unlock (conf);
-
- trav = file->pages.next;
- while (trav != &file->pages) {
- ra_page_error (trav, -1, EINVAL);
- trav = file->pages.next;
- }
-
- pthread_mutex_destroy (&file->file_lock);
- GF_FREE (file);
+ ra_conf_t *conf = NULL;
+ ra_page_t *trav = NULL;
+
+ GF_VALIDATE_OR_GOTO ("read-ahead", file, out);
+
+ conf = file->conf;
+
+ ra_conf_lock (conf);
+ {
+ file->prev->next = file->next;
+ file->next->prev = file->prev;
+ }
+ ra_conf_unlock (conf);
+
+ trav = file->pages.next;
+ while (trav != &file->pages) {
+ ra_page_error (trav, -1, EINVAL);
+ trav = file->pages.next;
+ }
+
+ pthread_mutex_destroy (&file->file_lock);
+ GF_FREE (file);
+
+out:
+ return;
}
diff --git a/xlators/performance/read-ahead/src/read-ahead-mem-types.h b/xlators/performance/read-ahead/src/read-ahead-mem-types.h
index b21d0595a2d..219e2928919 100644
--- a/xlators/performance/read-ahead/src/read-ahead-mem-types.h
+++ b/xlators/performance/read-ahead/src/read-ahead-mem-types.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -25,7 +16,6 @@
enum gf_ra_mem_types_ {
gf_ra_mt_ra_file_t = gf_common_mt_end + 1,
- gf_ra_mt_ra_local_t,
gf_ra_mt_ra_conf_t,
gf_ra_mt_ra_page_t,
gf_ra_mt_ra_waitq_t,
@@ -34,4 +24,3 @@ enum gf_ra_mem_types_ {
gf_ra_mt_end
};
#endif
-
diff --git a/xlators/performance/read-ahead/src/read-ahead-messages.h b/xlators/performance/read-ahead/src/read-ahead-messages.h
new file mode 100644
index 00000000000..e6eaab10777
--- /dev/null
+++ b/xlators/performance/read-ahead/src/read-ahead-messages.h
@@ -0,0 +1,111 @@
+/*Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _READ_AHEAD_MESSAGES_H_
+#define _READ_AHEAD_MESSAGES_H_
+
+#include "glfs-message-id.h"
+
+/*! \file read-ahead-messages.h
+ * \brief READ_AHEAD log-message IDs and their descriptions
+ *
+ */
+
+/* NOTE: Rules for message additions
+ * 1) Each instance of a message is _better_ left with a unique message ID, even
+ * if the message format is the same. Reasoning is that, if the message
+ * format needs to change in one instance, the other instances are not
+ * impacted or the new change does not change the ID of the instance being
+ * modified.
+ * 2) Addition of a message,
+ * - Should increment the GLFS_NUM_MESSAGES
+ * - Append to the list of messages defined, towards the end
+ * - Retain macro naming as glfs_msg_X (for redability across developers)
+ * NOTE: Rules for message format modifications
+ * 3) Check acorss the code if the message ID macro in question is reused
+ * anywhere. If reused then then the modifications should ensure correctness
+ * everywhere, or needs a new message ID as (1) above was not adhered to. If
+ * not used anywhere, proceed with the required modification.
+ * NOTE: Rules for message deletion
+ * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used
+ * anywhere, then can be deleted, but will leave a hole by design, as
+ * addition rules specify modification to the end of the list and not filling
+ * holes.
+ */
+
+#define GLFS_READ_AHEAD_BASE GLFS_MSGID_COMP_READ_AHEAD
+#define GLFS_READ_AHEAD_NUM_MESSAGES 6
+#define GLFS_MSGID_END (GLFS_READ_AHEAD_BASE +\
+ GLFS_READ_AHEAD_NUM_MESSAGES + 1)
+
+/* Messages with message IDs */
+#define glfs_msg_start_x GLFS_READ_AHEAD_BASE, "Invalid: Start of messages"
+
+
+
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define READ_AHEAD_MSG_XLATOR_CHILD_MISCONFIGURED (GLFS_READ_AHEAD_BASE + 1)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define READ_AHEAD_MSG_VOL_MISCONFIGURED (GLFS_READ_AHEAD_BASE + 2)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define READ_AHEAD_MSG_NO_MEMORY (GLFS_READ_AHEAD_BASE + 3)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define READ_AHEAD_MSG_FD_CONTEXT_NOT_SET (GLFS_READ_AHEAD_BASE + 4)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define READ_AHEAD_MSG_UNDESTROYED_FILE_FOUND (GLFS_READ_AHEAD_BASE + 5)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define READ_AHEAD_MSG_XLATOR_CONF_NULL (GLFS_READ_AHEAD_BASE + 6)
+
+/*------------*/
+#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
+
+
+#endif /* _READ_AHEAD_MESSAGES_H_ */
diff --git a/xlators/performance/read-ahead/src/read-ahead.c b/xlators/performance/read-ahead/src/read-ahead.c
index 7250a9d5996..242b57971a0 100644
--- a/xlators/performance/read-ahead/src/read-ahead.c
+++ b/xlators/performance/read-ahead/src/read-ahead.c
@@ -1,34 +1,20 @@
/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-/*
- TODO:
- - handle O_DIRECT
- - maintain offset, flush on lseek
- - ensure efficient memory managment in case of random seek
+/*
+ TODO:
+ - handle O_DIRECT
+ - maintain offset, flush on lseek
+ - ensure efficient memory management in case of random seek
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "glusterfs.h"
#include "logging.h"
#include "dict.h"
@@ -37,6 +23,7 @@
#include "statedump.h"
#include <assert.h>
#include <sys/time.h>
+#include "read-ahead-messages.h"
static void
read_ahead (call_frame_t *frame, ra_file_t *file);
@@ -44,77 +31,76 @@ read_ahead (call_frame_t *frame, ra_file_t *file);
int
ra_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- ra_conf_t *conf = NULL;
- ra_file_t *file = NULL;
- int ret = 0;
- long wbflags = 0;
+ ra_conf_t *conf = NULL;
+ ra_file_t *file = NULL;
+ int ret = 0;
- conf = this->private;
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
- if (op_ret == -1) {
- goto unwind;
- }
+ conf = this->private;
- wbflags = (long)frame->local;
+ if (op_ret == -1) {
+ goto unwind;
+ }
- file = GF_CALLOC (1, sizeof (*file), gf_ra_mt_ra_file_t);
- if (!file) {
+ file = GF_CALLOC (1, sizeof (*file), gf_ra_mt_ra_file_t);
+ if (!file) {
op_ret = -1;
op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory");
- goto unwind;
- }
-
- /* If O_DIRECT open, we disable caching on it */
+ goto unwind;
+ }
- if ((fd->flags & O_DIRECT) || ((fd->flags & O_ACCMODE) == O_WRONLY))
- file->disabled = 1;
+ /* If O_DIRECT open, we disable caching on it */
- if (wbflags & GF_OPEN_NOWB) {
+ if ((fd->flags & O_DIRECT) || ((fd->flags & O_ACCMODE) == O_WRONLY))
file->disabled = 1;
- }
-
- file->offset = (unsigned long long) 0;
- file->conf = conf;
- file->pages.next = &file->pages;
- file->pages.prev = &file->pages;
- file->pages.offset = (unsigned long long) 0;
- file->pages.file = file;
- ra_conf_lock (conf);
- {
- file->next = conf->files.next;
- conf->files.next = file;
- file->next->prev = file;
- file->prev = &conf->files;
- }
- ra_conf_unlock (conf);
+ file->offset = (unsigned long long) 0;
+ file->conf = conf;
+ file->pages.next = &file->pages;
+ file->pages.prev = &file->pages;
+ file->pages.offset = (unsigned long long) 0;
+ file->pages.file = file;
+
+ ra_conf_lock (conf);
+ {
+ file->next = conf->files.next;
+ conf->files.next = file;
+ file->next->prev = file;
+ file->prev = &conf->files;
+ }
+ ra_conf_unlock (conf);
- file->fd = fd;
- file->page_count = conf->page_count;
- file->page_size = conf->page_size;
- pthread_mutex_init (&file->file_lock, NULL);
+ file->fd = fd;
+ file->page_count = conf->page_count;
+ file->page_size = conf->page_size;
+ pthread_mutex_init (&file->file_lock, NULL);
- if (!file->disabled) {
- file->page_count = 1;
- }
+ if (!file->disabled) {
+ file->page_count = 1;
+ }
- ret = fd_ctx_set (fd, this, (uint64_t)(long)file);
+ ret = fd_ctx_set (fd, this, (uint64_t)(long)file);
if (ret == -1) {
+ gf_msg (frame->this->name, GF_LOG_WARNING,
+ 0, READ_AHEAD_MSG_NO_MEMORY,
+ "cannot set read-ahead context"
+ "information in fd (%p)",
+ fd);
ra_file_destroy (file);
op_ret = -1;
op_errno = ENOMEM;
}
-
+
unwind:
frame->local = NULL;
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd);
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
- return 0;
+ return 0;
}
@@ -122,93 +108,104 @@ int
ra_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- ra_conf_t *conf = NULL;
- ra_file_t *file = NULL;
- int ret = 0;
+ ra_conf_t *conf = NULL;
+ ra_file_t *file = NULL;
+ int ret = 0;
- conf = this->private;
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
- if (op_ret == -1) {
- goto unwind;
- }
+ conf = this->private;
- file = GF_CALLOC (1, sizeof (*file), gf_ra_mt_ra_file_t);
- if (!file) {
+ if (op_ret == -1) {
+ goto unwind;
+ }
+
+ file = GF_CALLOC (1, sizeof (*file), gf_ra_mt_ra_file_t);
+ if (!file) {
op_ret = -1;
op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory");
- goto unwind;
- }
+ goto unwind;
+ }
- /* If O_DIRECT open, we disable caching on it */
-
- if ((fd->flags & O_DIRECT) || ((fd->flags & O_ACCMODE) == O_WRONLY))
- file->disabled = 1;
-
- file->offset = (unsigned long long) 0;
- //file->size = fd->inode->buf.ia_size;
- file->conf = conf;
- file->pages.next = &file->pages;
- file->pages.prev = &file->pages;
- file->pages.offset = (unsigned long long) 0;
- file->pages.file = file;
-
- ra_conf_lock (conf);
- {
- file->next = conf->files.next;
- conf->files.next = file;
- file->next->prev = file;
- file->prev = &conf->files;
- }
- ra_conf_unlock (conf);
+ /* If O_DIRECT open, we disable caching on it */
- file->fd = fd;
- file->page_count = conf->page_count;
- file->page_size = conf->page_size;
- pthread_mutex_init (&file->file_lock, NULL);
+ if ((fd->flags & O_DIRECT) || ((fd->flags & O_ACCMODE) == O_WRONLY))
+ file->disabled = 1;
- ret = fd_ctx_set (fd, this, (uint64_t)(long)file);
+ file->offset = (unsigned long long) 0;
+ //file->size = fd->inode->buf.ia_size;
+ file->conf = conf;
+ file->pages.next = &file->pages;
+ file->pages.prev = &file->pages;
+ file->pages.offset = (unsigned long long) 0;
+ file->pages.file = file;
+
+ ra_conf_lock (conf);
+ {
+ file->next = conf->files.next;
+ conf->files.next = file;
+ file->next->prev = file;
+ file->prev = &conf->files;
+ }
+ ra_conf_unlock (conf);
+
+ file->fd = fd;
+ file->page_count = conf->page_count;
+ file->page_size = conf->page_size;
+ pthread_mutex_init (&file->file_lock, NULL);
+
+ ret = fd_ctx_set (fd, this, (uint64_t)(long)file);
if (ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING,
+ 0, READ_AHEAD_MSG_NO_MEMORY,
+ "cannot set read ahead context"
+ "information in fd (%p)",
+ fd);
ra_file_destroy (file);
op_ret = -1;
op_errno = ENOMEM;
}
unwind:
- STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent);
+ STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
- return 0;
+ return 0;
}
int
ra_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int32_t wbflags)
+ fd_t *fd, dict_t *xdata)
{
- frame->local = (void *)(long)wbflags;
+ GF_ASSERT (frame);
+ GF_ASSERT (this);
- STACK_WIND (frame, ra_open_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->open,
- loc, flags, fd, wbflags);
+ STACK_WIND (frame, ra_open_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->open,
+ loc, flags, fd, xdata);
- return 0;
+ return 0;
}
+
int
ra_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, fd_t *fd)
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
- STACK_WIND (frame, ra_create_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create,
- loc, flags, mode, fd);
+ GF_ASSERT (frame);
+ GF_ASSERT (this);
- return 0;
+ STACK_WIND (frame, ra_create_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create,
+ loc, flags, mode, umask, fd, xdata);
+
+ return 0;
}
/* free cache pages between offset and offset+size,
@@ -216,399 +213,426 @@ ra_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
*/
static void
-flush_region (call_frame_t *frame, ra_file_t *file, off_t offset, off_t size)
-{
- ra_page_t *trav = NULL;
- ra_page_t *next = NULL;
-
- ra_file_lock (file);
- {
- trav = file->pages.next;
- while (trav != &file->pages
- && trav->offset < (offset + size)) {
-
- next = trav->next;
- if (trav->offset >= offset && !trav->waitq) {
- ra_page_purge (trav);
- }
- trav = next;
- }
- }
- ra_file_unlock (file);
+flush_region (call_frame_t *frame, ra_file_t *file, off_t offset, off_t size,
+ int for_write)
+{
+ ra_page_t *trav = NULL;
+ ra_page_t *next = NULL;
+
+ ra_file_lock (file);
+ {
+ trav = file->pages.next;
+ while (trav != &file->pages
+ && trav->offset < (offset + size)) {
+
+ next = trav->next;
+ if (trav->offset >= offset) {
+ if (!trav->waitq) {
+ ra_page_purge (trav);
+ }
+ else {
+ trav->stale = 1;
+
+ if (for_write) {
+ trav->poisoned = 1;
+ }
+ }
+ }
+ trav = next;
+ }
+ }
+ ra_file_unlock (file);
}
int
ra_release (xlator_t *this, fd_t *fd)
{
- uint64_t tmp_file = 0;
- int ret = 0;
+ uint64_t tmp_file = 0;
+ int ret = 0;
- ret = fd_ctx_del (fd, this, &tmp_file);
-
- if (!ret) {
- ra_file_destroy ((ra_file_t *)(long)tmp_file);
- }
+ GF_VALIDATE_OR_GOTO ("read-ahead", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ ret = fd_ctx_del (fd, this, &tmp_file);
- return 0;
+ if (!ret) {
+ ra_file_destroy ((ra_file_t *)(long)tmp_file);
+ }
+
+out:
+ return 0;
}
void
read_ahead (call_frame_t *frame, ra_file_t *file)
{
- off_t ra_offset = 0;
- size_t ra_size = 0;
- off_t trav_offset = 0;
- ra_page_t *trav = NULL;
- off_t cap = 0;
- char fault = 0;
+ off_t ra_offset = 0;
+ size_t ra_size = 0;
+ off_t trav_offset = 0;
+ ra_page_t *trav = NULL;
+ off_t cap = 0;
+ char fault = 0;
- if (!file->page_count)
- return;
+ GF_VALIDATE_OR_GOTO ("read-ahead", frame, out);
+ GF_VALIDATE_OR_GOTO (frame->this->name, file, out);
- ra_size = file->page_size * file->page_count;
- ra_offset = floor (file->offset, file->page_size);
- cap = file->size ? file->size : file->offset + ra_size;
+ if (!file->page_count) {
+ goto out;
+ }
- while (ra_offset < min (file->offset + ra_size, cap)) {
+ ra_size = file->page_size * file->page_count;
+ ra_offset = floor (file->offset, file->page_size);
+ cap = file->size ? file->size : file->offset + ra_size;
- ra_file_lock (file);
- {
- trav = ra_page_get (file, ra_offset);
- }
- ra_file_unlock (file);
+ while (ra_offset < min (file->offset + ra_size, cap)) {
- if (!trav)
- break;
+ ra_file_lock (file);
+ {
+ trav = ra_page_get (file, ra_offset);
+ }
+ ra_file_unlock (file);
- ra_offset += file->page_size;
- }
+ if (!trav)
+ break;
- if (trav)
- /* comfortable enough */
- return;
-
- trav_offset = ra_offset;
-
- trav = file->pages.next;
- cap = file->size ? file->size : ra_offset + ra_size;
-
- while (trav_offset < min(ra_offset + ra_size, cap)) {
- fault = 0;
- ra_file_lock (file);
- {
- trav = ra_page_get (file, trav_offset);
- if (!trav) {
- fault = 1;
- trav = ra_page_create (file, trav_offset);
- if (trav)
- trav->dirty = 1;
- }
- }
- ra_file_unlock (file);
-
- if (!trav) {
- /* OUT OF MEMORY */
- break;
- }
-
- if (fault) {
- gf_log (frame->this->name, GF_LOG_TRACE,
- "RA at offset=%"PRId64, trav_offset);
- ra_page_fault (file, frame, trav_offset);
- }
- trav_offset += file->page_size;
- }
+ ra_offset += file->page_size;
+ }
+
+ if (trav) {
+ /* comfortable enough */
+ goto out;
+ }
+
+ trav_offset = ra_offset;
- return;
+ cap = file->size ? file->size : ra_offset + ra_size;
+
+ while (trav_offset < min(ra_offset + ra_size, cap)) {
+ fault = 0;
+ ra_file_lock (file);
+ {
+ trav = ra_page_get (file, trav_offset);
+ if (!trav) {
+ fault = 1;
+ trav = ra_page_create (file, trav_offset);
+ if (trav)
+ trav->dirty = 1;
+ }
+ }
+ ra_file_unlock (file);
+
+ if (!trav) {
+ /* OUT OF MEMORY */
+ break;
+ }
+
+ if (fault) {
+ gf_msg_trace (frame->this->name, 0,
+ "RA at offset=%"PRId64, trav_offset);
+ ra_page_fault (file, frame, trav_offset);
+ }
+ trav_offset += file->page_size;
+ }
+
+out:
+ return;
}
int
ra_need_atime_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *stbuf, struct iobref *iobref)
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
{
- STACK_DESTROY (frame->root);
- return 0;
+ GF_ASSERT (frame);
+ STACK_DESTROY (frame->root);
+ return 0;
}
static void
dispatch_requests (call_frame_t *frame, ra_file_t *file)
{
- ra_local_t *local = NULL;
- ra_conf_t *conf = NULL;
- off_t rounded_offset = 0;
- off_t rounded_end = 0;
- off_t trav_offset = 0;
- ra_page_t *trav = NULL;
- call_frame_t *ra_frame = NULL;
- char need_atime_update = 1;
- char fault = 0;
-
- local = frame->local;
- conf = file->conf;
-
- rounded_offset = floor (local->offset, file->page_size);
- rounded_end = roof (local->offset + local->size, file->page_size);
-
- trav_offset = rounded_offset;
- trav = file->pages.next;
-
- while (trav_offset < rounded_end) {
- fault = 0;
-
- ra_file_lock (file);
- {
- trav = ra_page_get (file, trav_offset);
- if (!trav) {
- trav = ra_page_create (file, trav_offset);
- fault = 1;
- need_atime_update = 0;
- }
-
- if (!trav) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- goto unlock;
+ ra_local_t *local = NULL;
+ ra_conf_t *conf = NULL;
+ off_t rounded_offset = 0;
+ off_t rounded_end = 0;
+ off_t trav_offset = 0;
+ ra_page_t *trav = NULL;
+ call_frame_t *ra_frame = NULL;
+ char need_atime_update = 1;
+ char fault = 0;
+
+ GF_VALIDATE_OR_GOTO ("read-ahead", frame, out);
+ GF_VALIDATE_OR_GOTO (frame->this->name, file, out);
+
+ local = frame->local;
+ conf = file->conf;
+
+ rounded_offset = floor (local->offset, file->page_size);
+ rounded_end = roof (local->offset + local->size, file->page_size);
+
+ trav_offset = rounded_offset;
+
+ while (trav_offset < rounded_end) {
+ fault = 0;
+
+ ra_file_lock (file);
+ {
+ trav = ra_page_get (file, trav_offset);
+ if (!trav) {
+ trav = ra_page_create (file, trav_offset);
+ if (!trav) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ fault = 1;
+ need_atime_update = 0;
+ }
+ trav->dirty = 0;
+
+ if (trav->ready) {
+ gf_msg_trace (frame->this->name, 0,
+ "HIT at offset=%"PRId64".",
+ trav_offset);
+ ra_frame_fill (trav, frame);
+ } else {
+ gf_msg_trace (frame->this->name, 0,
+ "IN-TRANSIT at "
+ "offset=%"PRId64".",
+ trav_offset);
+ ra_wait_on_page (trav, frame);
+ need_atime_update = 0;
}
+ }
+ unlock:
+ ra_file_unlock (file);
- if (trav->ready) {
- gf_log (frame->this->name, GF_LOG_TRACE,
- "HIT at offset=%"PRId64".",
- trav_offset);
- ra_frame_fill (trav, frame);
- } else {
- gf_log (frame->this->name, GF_LOG_TRACE,
- "IN-TRANSIT at offset=%"PRId64".",
- trav_offset);
- ra_wait_on_page (trav, frame);
- need_atime_update = 0;
- }
- }
- unlock:
- ra_file_unlock (file);
-
- if (fault) {
- gf_log (frame->this->name, GF_LOG_TRACE,
- "MISS at offset=%"PRId64".",
- trav_offset);
- ra_page_fault (file, frame, trav_offset);
- }
-
- trav_offset += file->page_size;
- }
+ if (local->op_ret == -1) {
+ goto out;
+ }
+
+ if (fault) {
+ gf_msg_trace (frame->this->name, 0,
+ "MISS at offset=%"PRId64".",
+ trav_offset);
+ ra_page_fault (file, frame, trav_offset);
+ }
+
+ trav_offset += file->page_size;
+ }
- if (need_atime_update && conf->force_atime_update) {
- /* TODO: use untimens() since readv() can confuse underlying
- io-cache and others */
- ra_frame = copy_frame (frame);
+ if (need_atime_update && conf->force_atime_update) {
+ /* TODO: use untimens() since readv() can confuse underlying
+ io-cache and others */
+ ra_frame = copy_frame (frame);
if (ra_frame == NULL) {
goto out;
}
- STACK_WIND (ra_frame, ra_need_atime_cbk,
- FIRST_CHILD (frame->this),
- FIRST_CHILD (frame->this)->fops->readv,
- file->fd, 1, 1);
- }
+ STACK_WIND (ra_frame, ra_need_atime_cbk,
+ FIRST_CHILD (frame->this),
+ FIRST_CHILD (frame->this)->fops->readv,
+ file->fd, 1, 1, 0, NULL);
+ }
out:
- return ;
+ return ;
}
int
ra_readv_disabled_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *stbuf, struct iobref *iobref)
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
{
- STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count,
- stbuf, iobref);
+ GF_ASSERT (frame);
- return 0;
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count,
+ stbuf, iobref, xdata);
+
+ return 0;
}
int
ra_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+ off_t offset, uint32_t flags, dict_t *xdata)
{
- ra_file_t *file = NULL;
- ra_local_t *local = NULL;
- ra_conf_t *conf = NULL;
- int op_errno = 0;
- int ret = 0;
- char expected_offset = 1;
- uint64_t tmp_file = 0;
+ ra_file_t *file = NULL;
+ ra_local_t *local = NULL;
+ ra_conf_t *conf = NULL;
+ int op_errno = EINVAL;
+ char expected_offset = 1;
+ uint64_t tmp_file = 0;
- conf = this->private;
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind);
- gf_log (this->name, GF_LOG_TRACE,
- "NEW REQ at offset=%"PRId64" for size=%"GF_PRI_SIZET"",
- offset, size);
-
- ret = fd_ctx_get (fd, this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
-
- if (file == NULL) {
- op_errno = EBADF;
- gf_log (this->name, GF_LOG_DEBUG, "readv received on fd with no"
- " file set in its context");
- goto unwind;
- }
+ conf = this->private;
- if (file->offset != offset) {
- gf_log (this->name, GF_LOG_DEBUG,
- "unexpected offset (%"PRId64" != %"PRId64") resetting",
- file->offset, offset);
+ gf_msg_trace (this->name, 0,
+ "NEW REQ at offset=%"PRId64" for size=%"GF_PRI_SIZET"",
+ offset, size);
- expected_offset = file->expected = file->page_count = 0;
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "expected offset (%"PRId64") when page_count=%d",
- offset, file->page_count);
+ fd_ctx_get (fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
- if (file->expected < (conf->page_size * conf->page_count)) {
- file->expected += size;
- file->page_count = min ((file->expected / file->page_size),
- conf->page_count);
- }
- }
+ if (!file || file->disabled) {
+ goto disabled;
+ }
- if (!expected_offset) {
- flush_region (frame, file, 0, file->pages.prev->offset + 1);
- }
+ if (file->offset != offset) {
+ gf_msg_trace (this->name, 0,
+ "unexpected offset (%"PRId64" != %"PRId64") "
+ "resetting",
+ file->offset, offset);
+
+ expected_offset = file->expected = file->page_count = 0;
+ } else {
+ gf_msg_trace (this->name, 0,
+ "expected offset (%"PRId64") when page_count=%d",
+ offset, file->page_count);
+
+ if (file->expected < (file->page_size * conf->page_count)) {
+ file->expected += size;
+ file->page_count = min ((file->expected
+ / file->page_size),
+ conf->page_count);
+ }
+ }
- if (file->disabled) {
- STACK_WIND (frame, ra_readv_disabled_cbk,
- FIRST_CHILD (frame->this),
- FIRST_CHILD (frame->this)->fops->readv,
- file->fd, size, offset);
- return 0;
- }
+ if (!expected_offset) {
+ flush_region (frame, file, 0, file->pages.prev->offset + 1, 0);
+ }
- local = (void *) GF_CALLOC (1, sizeof (*local),
- gf_ra_mt_ra_local_t);
- if (!local) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory");
- op_errno = ENOMEM;
- goto unwind;
- }
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
- local->fd = fd;
- local->offset = offset;
- local->size = size;
- local->wait_count = 1;
+ local->fd = fd;
+ local->offset = offset;
+ local->size = size;
+ local->wait_count = 1;
- local->fill.next = &local->fill;
- local->fill.prev = &local->fill;
+ local->fill.next = &local->fill;
+ local->fill.prev = &local->fill;
- pthread_mutex_init (&local->local_lock, NULL);
+ pthread_mutex_init (&local->local_lock, NULL);
- frame->local = local;
+ frame->local = local;
- dispatch_requests (frame, file);
+ dispatch_requests (frame, file);
- flush_region (frame, file, 0, floor (offset, file->page_size));
+ flush_region (frame, file, 0, floor (offset, file->page_size), 0);
read_ahead (frame, file);
- ra_frame_return (frame);
+ ra_frame_return (frame);
- file->offset = offset + size;
+ file->offset = offset + size;
- return 0;
+ return 0;
unwind:
- STACK_UNWIND_STRICT (readv, frame, -1, op_errno, NULL, 0, NULL, NULL);
+ STACK_UNWIND_STRICT (readv, frame, -1, op_errno, NULL, 0, NULL, NULL,
+ NULL);
- return 0;
+ return 0;
+
+disabled:
+ STACK_WIND (frame, ra_readv_disabled_cbk,
+ FIRST_CHILD (frame->this),
+ FIRST_CHILD (frame->this)->fops->readv,
+ fd, size, offset, flags, xdata);
+ return 0;
}
int
ra_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno);
- return 0;
+ GF_ASSERT (frame);
+ STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
ra_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf)
+ int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
{
- STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
+ GF_ASSERT (frame);
+ STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
}
int
-ra_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
+ra_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- ra_file_t *file = NULL;
- int ret = 0;
- uint64_t tmp_file = 0;
- int32_t op_errno = 0;
+ ra_file_t *file = NULL;
+ uint64_t tmp_file = 0;
+ int32_t op_errno = EINVAL;
- ret = fd_ctx_get (fd, this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
- if (file == NULL) {
- op_errno = EBADF;
- gf_log (this->name, GF_LOG_DEBUG, "flush received on fd with no"
- " file set in its context");
- goto unwind;
- }
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind);
- flush_region (frame, file, 0, file->pages.prev->offset+1);
+ fd_ctx_get (fd, this, &tmp_file);
- STACK_WIND (frame, ra_flush_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->flush,
- fd);
- return 0;
+ file = (ra_file_t *)(long)tmp_file;
+ if (file) {
+ flush_region (frame, file, 0, file->pages.prev->offset+1, 0);
+ }
+
+ STACK_WIND (frame, ra_flush_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->flush, fd, xdata);
+ return 0;
unwind:
- STACK_UNWIND_STRICT (flush, frame, -1, op_errno);
+ STACK_UNWIND_STRICT (flush, frame, -1, op_errno, NULL);
return 0;
}
int
-ra_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync)
+ra_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
{
- ra_file_t *file = NULL;
- int ret = 0;
- uint64_t tmp_file = 0;
- int32_t op_errno = 0;
+ ra_file_t *file = NULL;
+ uint64_t tmp_file = 0;
+ int32_t op_errno = EINVAL;
- ret = fd_ctx_get (fd, this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
- if (file == NULL) {
- op_errno = EBADF;
- gf_log (this->name, GF_LOG_DEBUG, "fsync received on fd with no"
- " file set in its context");
- goto unwind;
- }
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind);
- if (file) {
- flush_region (frame, file, 0, file->pages.prev->offset+1);
- }
+ fd_ctx_get (fd, this, &tmp_file);
+
+ file = (ra_file_t *)(long)tmp_file;
+ if (file) {
+ flush_region (frame, file, 0, file->pages.prev->offset+1, 0);
+ }
- STACK_WIND (frame, ra_fsync_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsync,
- fd, datasync);
- return 0;
+ STACK_WIND (frame, ra_fsync_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fsync, fd, datasync, xdata);
+ return 0;
unwind:
- STACK_UNWIND_STRICT (fsync, frame, -1, op_errno, NULL, NULL);
+ STACK_UNWIND_STRICT (fsync, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -616,60 +640,56 @@ unwind:
int
ra_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
- fd_t *fd = NULL;
- ra_file_t *file = NULL;
- int ret = 0;
- uint64_t tmp_file = 0;
+ ra_file_t *file = NULL;
- fd = frame->local;
+ GF_ASSERT (frame);
- ret = fd_ctx_get (fd, this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
+ file = frame->local;
- flush_region (frame, file, 0, file->pages.prev->offset+1);
+ if (file) {
+ flush_region (frame, file, 0, file->pages.prev->offset+1, 1);
+ }
- frame->local = NULL;
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
}
int
ra_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
- int32_t count, off_t offset, struct iobref *iobref)
+ int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
{
- ra_file_t *file = NULL;
- int ret = 0;
- uint64_t tmp_file = 0;
- int32_t op_errno = 0;
-
- ret = fd_ctx_get (fd, this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
- if (file == NULL) {
- op_errno = EBADF;
- gf_log (this->name, GF_LOG_DEBUG, "writev received on fd with"
- "no file set in its context");
- goto unwind;
+ ra_file_t *file = NULL;
+ uint64_t tmp_file = 0;
+ int32_t op_errno = EINVAL;
+
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind);
+
+ fd_ctx_get (fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+ if (file) {
+ flush_region (frame, file, 0, file->pages.prev->offset+1, 1);
+ frame->local = file;
+ /* reset the read-ahead counters too */
+ file->expected = file->page_count = 0;
}
- flush_region (frame, file, 0, file->pages.prev->offset+1);
-
- /* reset the read-ahead counters too */
- file->expected = file->page_count = 0;
+ STACK_WIND (frame, ra_writev_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev,
+ fd, vector, count, offset, flags, iobref, xdata);
- frame->local = fd;
-
- STACK_WIND (frame, ra_writev_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev,
- fd, vector, count, offset, iobref);
-
- return 0;
+ return 0;
unwind:
- STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL);
+ STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -677,307 +697,581 @@ unwind:
int
ra_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf,
- postbuf);
- return 0;
+ GF_ASSERT (frame);
+
+ STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+ return 0;
}
int
ra_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf);
- return 0;
+ GF_ASSERT (frame);
+
+ STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
}
int
-ra_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
-{
- ra_file_t *file = NULL;
- fd_t *iter_fd = NULL;
- inode_t *inode = NULL;
- int ret = 0;
- uint64_t tmp_file = 0;
-
- inode = loc->inode;
-
- LOCK (&inode->lock);
- {
- list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
- ret = fd_ctx_get (iter_fd, this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
-
- if (!file)
- continue;
- flush_region (frame, file, 0,
- file->pages.prev->offset + 1);
- }
+ra_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ ra_file_t *file = NULL;
+ fd_t *iter_fd = NULL;
+ inode_t *inode = NULL;
+ uint64_t tmp_file = 0;
+ int32_t op_errno = EINVAL;
+
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO (frame->this->name, loc, unwind);
+
+ inode = loc->inode;
+
+ LOCK (&inode->lock);
+ {
+ list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
+ fd_ctx_get (iter_fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+
+ if (!file)
+ continue;
+ /*
+ * Truncation invalidates reads just like writing does.
+ * TBD: this seems to flush more than it should. The
+ * only time we should flush at all is when we're
+ * shortening (not lengthening) the file, and then only
+ * from new EOF to old EOF. The same problem exists in
+ * ra_ftruncate.
+ */
+ flush_region (frame, file, 0,
+ file->pages.prev->offset + 1, 1);
+ }
+ }
+ UNLOCK (&inode->lock);
+
+ STACK_WIND (frame, ra_truncate_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->truncate,
+ loc, offset, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT (truncate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+
+void
+ra_page_dump (struct ra_page *page)
+{
+ int i = 0;
+ call_frame_t *frame = NULL;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0, };
+ ra_waitq_t *trav = NULL;
+
+ if (page == NULL) {
+ goto out;
+ }
+
+ gf_proc_dump_write ("offset", "%"PRId64, page->offset);
+
+ gf_proc_dump_write ("size", "%"PRId64, page->size);
+
+ gf_proc_dump_write ("dirty", "%s", page->dirty ? "yes" : "no");
+
+ gf_proc_dump_write ("poisoned", "%s", page->poisoned ? "yes" : "no");
+
+ gf_proc_dump_write ("ready", "%s", page->ready ? "yes" : "no");
+
+ for (trav = page->waitq; trav; trav = trav->next) {
+ frame = trav->data;
+ sprintf (key, "waiting-frame[%d]", i++);
+ gf_proc_dump_write (key, "%"PRId64, frame->root->unique);
}
- UNLOCK (&inode->lock);
- STACK_WIND (frame, ra_truncate_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->truncate,
- loc, offset);
- return 0;
+out:
+ return;
}
+int32_t
+ra_fdctx_dump (xlator_t *this, fd_t *fd)
+{
+ ra_file_t *file = NULL;
+ ra_page_t *page = NULL;
+ int32_t ret = 0, i = 0;
+ uint64_t tmp_file = 0;
+ char *path = NULL;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0, };
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, };
+
+ fd_ctx_get (fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+
+ if (file == NULL) {
+ ret = 0;
+ goto out;
+ }
+
+ gf_proc_dump_build_key (key_prefix,
+ "xlator.performance.read-ahead",
+ "file");
+
+ gf_proc_dump_add_section (key_prefix);
+
+ ret = __inode_path (fd->inode, NULL, &path);
+ if (path != NULL) {
+ gf_proc_dump_write ("path", "%s", path);
+ GF_FREE (path);
+ }
+
+ gf_proc_dump_write ("fd", "%p", fd);
+
+ gf_proc_dump_write ("disabled", "%s", file->disabled ? "yes" : "no");
+
+ if (file->disabled) {
+ ret = 0;
+ goto out;
+ }
+
+ gf_proc_dump_write ("page-size", "%"PRId64, file->page_size);
+
+ gf_proc_dump_write ("page-count", "%u", file->page_count);
+
+ gf_proc_dump_write ("next-expected-offset-for-sequential-reads",
+ "%"PRId64, file->offset);
+
+ for (page = file->pages.next; page != &file->pages;
+ page = page->next) {
+ sprintf (key, "page[%d]", i);
+ gf_proc_dump_write (key, "%p", page[i++]);
+ ra_page_dump (page);
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
int
-ra_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- ra_file_t *file = NULL;
- fd_t *iter_fd = NULL;
- inode_t *inode = NULL;
- int ret = 0;
- uint64_t tmp_file = 0;
-
- inode = fd->inode;
-
- LOCK (&inode->lock);
- {
- list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
- ret = fd_ctx_get (iter_fd, this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
-
- if (!file)
- continue;
- flush_region (frame, file, 0,
- file->pages.prev->offset + 1);
- }
- }
- UNLOCK (&inode->lock);
+ra_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ ra_file_t *file = NULL;
+ fd_t *iter_fd = NULL;
+ inode_t *inode = NULL;
+ uint64_t tmp_file = 0;
+ int32_t op_errno = EINVAL;
+
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind);
+
+ inode = fd->inode;
+
+ LOCK (&inode->lock);
+ {
+ list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
+ fd_ctx_get (iter_fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+
+ if (!file)
+ continue;
+ flush_region (frame, file, 0,
+ file->pages.prev->offset + 1, 0);
+ }
+ }
+ UNLOCK (&inode->lock);
- STACK_WIND (frame, ra_attr_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fstat,
- fd);
- return 0;
+ STACK_WIND (frame, ra_attr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fstat, fd, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT (stat, frame, -1, op_errno, NULL, NULL);
+ return 0;
}
int
-ra_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
-{
- ra_file_t *file = NULL;
- fd_t *iter_fd = NULL;
- inode_t *inode = NULL;
- int ret = 0;
- uint64_t tmp_file = 0;
-
- inode = fd->inode;
-
- LOCK (&inode->lock);
- {
- list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
- ret = fd_ctx_get (iter_fd, this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
- if (!file)
- continue;
- flush_region (frame, file, 0,
- file->pages.prev->offset + 1);
- }
- }
- UNLOCK (&inode->lock);
+ra_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ ra_file_t *file = NULL;
+ fd_t *iter_fd = NULL;
+ inode_t *inode = NULL;
+ uint64_t tmp_file = 0;
+ int32_t op_errno = EINVAL;
+
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind);
+
+ inode = fd->inode;
+
+ LOCK (&inode->lock);
+ {
+ list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
+ fd_ctx_get (iter_fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+ if (!file)
+ continue;
+ /*
+ * Truncation invalidates reads just like writing does.
+ * TBD: this seems to flush more than it should. The
+ * only time we should flush at all is when we're
+ * shortening (not lengthening) the file, and then only
+ * from new EOF to old EOF. The same problem exists in
+ * ra_truncate.
+ */
+ flush_region (frame, file, 0,
+ file->pages.prev->offset + 1, 1);
+ }
+ }
+ UNLOCK (&inode->lock);
+
+ STACK_WIND (frame, ra_truncate_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->ftruncate, fd, offset, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT (truncate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+ra_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ GF_ASSERT (frame);
- STACK_WIND (frame, ra_truncate_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->ftruncate,
- fd, offset);
- return 0;
+ STACK_UNWIND_STRICT (discard, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+ return 0;
+}
+
+static int
+ra_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ ra_file_t *file = NULL;
+ fd_t *iter_fd = NULL;
+ inode_t *inode = NULL;
+ uint64_t tmp_file = 0;
+ int32_t op_errno = EINVAL;
+
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind);
+
+ inode = fd->inode;
+
+ LOCK (&inode->lock);
+ {
+ list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
+ fd_ctx_get (iter_fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+ if (!file)
+ continue;
+
+ flush_region(frame, file, offset, len, 1);
+ }
+ }
+ UNLOCK (&inode->lock);
+
+ STACK_WIND (frame, ra_discard_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->discard, fd, offset, len, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT (discard, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+ra_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ GF_ASSERT (frame);
+
+ STACK_UNWIND_STRICT (zerofill, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+ return 0;
+}
+
+static int
+ra_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ ra_file_t *file = NULL;
+ fd_t *iter_fd = NULL;
+ inode_t *inode = NULL;
+ uint64_t tmp_file = 0;
+ int32_t op_errno = EINVAL;
+
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind);
+
+ inode = fd->inode;
+
+ LOCK (&inode->lock);
+ {
+ list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
+ fd_ctx_get (iter_fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+ if (!file)
+ continue;
+
+ flush_region(frame, file, offset, len, 1);
+ }
+ }
+ UNLOCK (&inode->lock);
+
+ STACK_WIND (frame, ra_zerofill_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->zerofill, fd,
+ offset, len, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT (zerofill, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
int
ra_priv_dump (xlator_t *this)
{
- ra_conf_t *conf = NULL;
- int ret = -1;
- char key[GF_DUMP_MAX_BUF_LEN];
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ ra_conf_t *conf = NULL;
+ int ret = -1;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, };
+ gf_boolean_t add_section = _gf_false;
- if (!this)
- return -1;
+ if (!this) {
+ goto out;
+ }
conf = this->private;
if (!conf) {
- gf_log (this->name, GF_LOG_WARNING,
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ READ_AHEAD_MSG_XLATOR_CONF_NULL,
"conf null in xlator");
- return -1;
- }
-
- ret = pthread_mutex_trylock (&conf->conf_lock);
- if (ret) {
- gf_log ("", GF_LOG_WARNING, "Unable to lock client %s"
- " errno: %d", this->name, errno);
- return -1;
+ goto out;
}
-
- gf_proc_dump_build_key (key_prefix,
- "xlator.performance.read-ahead",
+ gf_proc_dump_build_key (key_prefix, "xlator.performance.read-ahead",
"priv");
gf_proc_dump_add_section (key_prefix);
- gf_proc_dump_build_key (key, key_prefix, "page_size");
- gf_proc_dump_write (key, "%d", conf->page_size);
- gf_proc_dump_build_key (key, key_prefix, "page_count");
- gf_proc_dump_write (key, "%d", conf->page_count);
- gf_proc_dump_build_key (key, key_prefix, "force_atime_update");
- gf_proc_dump_write (key, "%d", conf->force_atime_update);
+ add_section = _gf_true;
+ ret = pthread_mutex_trylock (&conf->conf_lock);
+ if (ret)
+ goto out;
+ {
+ gf_proc_dump_write ("page_size", "%d", conf->page_size);
+ gf_proc_dump_write ("page_count", "%d", conf->page_count);
+ gf_proc_dump_write ("force_atime_update", "%d",
+ conf->force_atime_update);
+ }
pthread_mutex_unlock (&conf->conf_lock);
- return 0;
+ ret = 0;
+out:
+ if (ret && conf) {
+ if (add_section == _gf_false)
+ gf_proc_dump_add_section (key_prefix);
+
+ gf_proc_dump_write ("Unable to dump priv",
+ "(Lock acquisition failed) %s", this->name);
+ }
+ return ret;
}
+
int32_t
mem_acct_init (xlator_t *this)
{
int ret = -1;
- if (!this)
- return ret;
+ if (!this) {
+ goto out;
+ }
ret = xlator_mem_acct_init (this, gf_ra_mt_end + 1);
-
+
if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ READ_AHEAD_MSG_NO_MEMORY, "Memory accounting init"
+ "failed");
}
+out:
+ return ret;
+}
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ ra_conf_t *conf = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("read-ahead", this, out);
+ GF_VALIDATE_OR_GOTO ("read-ahead", this->private, out);
+
+ conf = this->private;
+
+ GF_OPTION_RECONF ("page-count", conf->page_count, options, uint32, out);
+
+ GF_OPTION_RECONF ("page-size", conf->page_size, options, size_uint64,
+ out);
+
+ ret = 0;
+ out:
return ret;
}
int
init (xlator_t *this)
{
- ra_conf_t *conf = NULL;
- dict_t *options = this->options;
- char *page_count_string = NULL;
- int32_t ret = -1;
+ ra_conf_t *conf = NULL;
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("read-ahead", this, out);
- if (!this->children || this->children->next) {
- gf_log (this->name, GF_LOG_ERROR,
- "FATAL: read-ahead not configured with exactly one"
+ if (!this->children || this->children->next) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ READ_AHEAD_MSG_XLATOR_CHILD_MISCONFIGURED,
+ "FATAL: read-ahead not configured with exactly one"
" child");
goto out;
- }
+ }
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
-
- conf = (void *) GF_CALLOC (1, sizeof (*conf),
- gf_ra_mt_ra_conf_t);
+ if (!this->parents) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ READ_AHEAD_MSG_VOL_MISCONFIGURED,
+ "dangling volume. check volfile ");
+ }
+
+ conf = (void *) GF_CALLOC (1, sizeof (*conf), gf_ra_mt_ra_conf_t);
if (conf == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "FATAL: Out of memory");
goto out;
}
- conf->page_size = this->ctx->page_size;
- conf->page_count = 4;
-
- if (dict_get (options, "page-count"))
- page_count_string = data_to_str (dict_get (options,
- "page-count"));
- if (page_count_string)
- {
- if (gf_string2uint_base10 (page_count_string, &conf->page_count)
- != 0)
- {
- gf_log ("read-ahead",
- GF_LOG_ERROR,
- "invalid number format \"%s\" of \"option "
- "page-count\"",
- page_count_string);
- goto out;
- }
- gf_log (this->name, GF_LOG_DEBUG, "Using conf->page_count = %u",
- conf->page_count);
- }
-
- if (dict_get (options, "force-atime-update")) {
- char *force_atime_update_str = data_to_str (dict_get (options,
- "force-atime-update"));
- if (gf_string2boolean (force_atime_update_str,
- &conf->force_atime_update) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "'force-atime-update' takes only boolean "
- "options");
- goto out;
- }
- if (conf->force_atime_update)
- gf_log (this->name, GF_LOG_DEBUG, "Forcing atime "
- "updates on cache hit");
- }
+ conf->page_size = this->ctx->page_size;
- conf->files.next = &conf->files;
- conf->files.prev = &conf->files;
+ GF_OPTION_INIT ("page-size", conf->page_size, size_uint64, out);
- pthread_mutex_init (&conf->conf_lock, NULL);
- this->private = conf;
+ GF_OPTION_INIT ("page-count", conf->page_count, uint32, out);
+
+ GF_OPTION_INIT ("force-atime-update", conf->force_atime_update, bool, out);
+
+ conf->files.next = &conf->files;
+ conf->files.prev = &conf->files;
+
+ pthread_mutex_init (&conf->conf_lock, NULL);
+
+ this->local_pool = mem_pool_new (ra_local_t, 64);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR,
+ ENOMEM, READ_AHEAD_MSG_NO_MEMORY,
+ "failed to create local_t's memory pool");
+ goto out;
+ }
+
+ this->private = conf;
ret = 0;
out:
if (ret == -1) {
- if (conf != NULL) {
- GF_FREE (conf);
- }
+ GF_FREE (conf);
}
return ret;
}
+
void
fini (xlator_t *this)
{
- ra_conf_t *conf = this->private;
+ ra_conf_t *conf = NULL;
- if (conf == NULL)
- return;
+ GF_VALIDATE_OR_GOTO ("read-ahead", this, out);
- pthread_mutex_destroy (&conf->conf_lock);
- GF_FREE (conf);
+ conf = this->private;
+ if (conf == NULL) {
+ goto out;
+ }
- this->private = NULL;
- return;
+ this->private = NULL;
+
+ /* The files structures allocated in open and create are not deleted.
+ * until that is freed, marking the below assert as warning.
+ GF_ASSERT ((conf->files.next == &conf->files)
+ && (conf->files.prev == &conf->files));
+ */
+ if (!((conf->files.next == &conf->files)
+ && (conf->files.prev == &conf->files))) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ READ_AHEAD_MSG_UNDESTROYED_FILE_FOUND,
+ "undestroyed read ahead file structures found");
+ }
+
+ pthread_mutex_destroy (&conf->conf_lock);
+ GF_FREE (conf);
+
+out:
+ return;
}
struct xlator_fops fops = {
- .open = ra_open,
- .create = ra_create,
- .readv = ra_readv,
- .writev = ra_writev,
- .flush = ra_flush,
- .fsync = ra_fsync,
- .truncate = ra_truncate,
- .ftruncate = ra_ftruncate,
- .fstat = ra_fstat,
+ .open = ra_open,
+ .create = ra_create,
+ .readv = ra_readv,
+ .writev = ra_writev,
+ .flush = ra_flush,
+ .fsync = ra_fsync,
+ .truncate = ra_truncate,
+ .ftruncate = ra_ftruncate,
+ .fstat = ra_fstat,
+ .discard = ra_discard,
+ .zerofill = ra_zerofill,
};
struct xlator_cbks cbks = {
- .release = ra_release,
+ .release = ra_release,
};
struct xlator_dumpops dumpops = {
.priv = ra_priv_dump,
+ .fdctx = ra_fdctx_dump,
};
struct volume_options options[] = {
- { .key = {"force-atime-update"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"page-count"},
- .type = GF_OPTION_TYPE_INT,
- .min = 1,
- .max = 16
+ { .key = {"force-atime-update"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false"
+ },
+ { .key = {"page-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 16,
+ .default_value = "4",
+ .description = "Number of pages that will be pre-fetched"
+ },
+ { .key = {"page-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 4096,
+ .max = 1048576 * 64,
+ .default_value = "131072",
+ .description = "Page size with which read-ahead performs server I/O"
},
- { .key = {NULL} },
+ { .key = {NULL} },
};
diff --git a/xlators/performance/read-ahead/src/read-ahead.h b/xlators/performance/read-ahead/src/read-ahead.h
index 1f56e85d2ea..debcd8fdeb4 100644
--- a/xlators/performance/read-ahead/src/read-ahead.h
+++ b/xlators/performance/read-ahead/src/read-ahead.h
@@ -1,30 +1,16 @@
/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __READ_AHEAD_H
#define __READ_AHEAD_H
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "glusterfs.h"
#include "logging.h"
@@ -41,77 +27,79 @@ struct ra_waitq;
struct ra_waitq {
- struct ra_waitq *next;
- void *data;
+ struct ra_waitq *next;
+ void *data;
};
struct ra_fill {
- struct ra_fill *next;
- struct ra_fill *prev;
- off_t offset;
- size_t size;
- struct iovec *vector;
- int32_t count;
+ struct ra_fill *next;
+ struct ra_fill *prev;
+ off_t offset;
+ size_t size;
+ struct iovec *vector;
+ int32_t count;
struct iobref *iobref;
};
struct ra_local {
- mode_t mode;
- struct ra_fill fill;
- off_t offset;
- size_t size;
- int32_t op_ret;
- int32_t op_errno;
- off_t pending_offset;
- size_t pending_size;
- fd_t *fd;
- int32_t wait_count;
- pthread_mutex_t local_lock;
+ mode_t mode;
+ struct ra_fill fill;
+ off_t offset;
+ size_t size;
+ int32_t op_ret;
+ int32_t op_errno;
+ off_t pending_offset;
+ size_t pending_size;
+ fd_t *fd;
+ int32_t wait_count;
+ pthread_mutex_t local_lock;
};
struct ra_page {
- struct ra_page *next;
- struct ra_page *prev;
- struct ra_file *file;
- char dirty;
- char ready;
- struct iovec *vector;
- int32_t count;
- off_t offset;
- size_t size;
- struct ra_waitq *waitq;
+ struct ra_page *next;
+ struct ra_page *prev;
+ struct ra_file *file;
+ char dirty; /* Internal request, not from user. */
+ char poisoned; /* Pending read invalidated by write. */
+ char ready;
+ struct iovec *vector;
+ int32_t count;
+ off_t offset;
+ size_t size;
+ struct ra_waitq *waitq;
struct iobref *iobref;
+ char stale;
};
struct ra_file {
- struct ra_file *next;
- struct ra_file *prev;
- struct ra_conf *conf;
- fd_t *fd;
- int disabled;
- size_t expected;
- struct ra_page pages;
- off_t offset;
- size_t size;
- int32_t refcount;
- pthread_mutex_t file_lock;
- struct iatt stbuf;
- uint64_t page_size;
- uint32_t page_count;
+ struct ra_file *next;
+ struct ra_file *prev;
+ struct ra_conf *conf;
+ fd_t *fd;
+ int disabled;
+ size_t expected;
+ struct ra_page pages;
+ off_t offset;
+ size_t size;
+ int32_t refcount;
+ pthread_mutex_t file_lock;
+ struct iatt stbuf;
+ uint64_t page_size;
+ uint32_t page_count;
};
struct ra_conf {
- uint64_t page_size;
- uint32_t page_count;
- void *cache_block;
- struct ra_file files;
- gf_boolean_t force_atime_update;
- pthread_mutex_t conf_lock;
+ uint64_t page_size;
+ uint32_t page_count;
+ void *cache_block;
+ struct ra_file files;
+ gf_boolean_t force_atime_update;
+ pthread_mutex_t conf_lock;
};
@@ -124,19 +112,19 @@ typedef struct ra_fill ra_fill_t;
ra_page_t *
ra_page_get (ra_file_t *file,
- off_t offset);
+ off_t offset);
ra_page_t *
ra_page_create (ra_file_t *file,
- off_t offset);
+ off_t offset);
void
ra_page_fault (ra_file_t *file,
- call_frame_t *frame,
- off_t offset);
+ call_frame_t *frame,
+ off_t offset);
void
ra_wait_on_page (ra_page_t *page,
- call_frame_t *frame);
+ call_frame_t *frame);
ra_waitq_t *
ra_page_wakeup (ra_page_t *page);
@@ -146,8 +134,8 @@ ra_page_flush (ra_page_t *page);
ra_waitq_t *
ra_page_error (ra_page_t *page,
- int32_t op_ret,
- int32_t op_errno);
+ int32_t op_ret,
+ int32_t op_errno);
void
ra_page_purge (ra_page_t *page);
@@ -156,7 +144,7 @@ ra_frame_return (call_frame_t *frame);
void
ra_frame_fill (ra_page_t *page,
- call_frame_t *frame);
+ call_frame_t *frame);
void
ra_file_destroy (ra_file_t *file);
@@ -164,36 +152,36 @@ ra_file_destroy (ra_file_t *file);
static inline void
ra_file_lock (ra_file_t *file)
{
- pthread_mutex_lock (&file->file_lock);
+ pthread_mutex_lock (&file->file_lock);
}
static inline void
ra_file_unlock (ra_file_t *file)
{
- pthread_mutex_unlock (&file->file_lock);
+ pthread_mutex_unlock (&file->file_lock);
}
static inline void
ra_conf_lock (ra_conf_t *conf)
{
- pthread_mutex_lock (&conf->conf_lock);
+ pthread_mutex_lock (&conf->conf_lock);
}
static inline void
ra_conf_unlock (ra_conf_t *conf)
{
- pthread_mutex_unlock (&conf->conf_lock);
+ pthread_mutex_unlock (&conf->conf_lock);
}
static inline void
ra_local_lock (ra_local_t *local)
{
- pthread_mutex_lock (&local->local_lock);
+ pthread_mutex_lock (&local->local_lock);
}
static inline void
ra_local_unlock (ra_local_t *local)
{
- pthread_mutex_unlock (&local->local_lock);
+ pthread_mutex_unlock (&local->local_lock);
}
#endif /* __READ_AHEAD_H */
diff --git a/xlators/performance/readdir-ahead/Makefile.am b/xlators/performance/readdir-ahead/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/performance/readdir-ahead/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/performance/readdir-ahead/src/Makefile.am b/xlators/performance/readdir-ahead/src/Makefile.am
new file mode 100644
index 00000000000..e54ab168a09
--- /dev/null
+++ b/xlators/performance/readdir-ahead/src/Makefile.am
@@ -0,0 +1,15 @@
+xlator_LTLIBRARIES = readdir-ahead.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
+
+readdir_ahead_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+readdir_ahead_la_SOURCES = readdir-ahead.c
+readdir_ahead_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = readdir-ahead.h readdir-ahead-mem-types.h readdir-ahead-messages.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h b/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h
new file mode 100644
index 00000000000..39e2c536975
--- /dev/null
+++ b/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h
@@ -0,0 +1,24 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef __RDA_MEM_TYPES_H__
+#define __RDA_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_rda_mem_types_ {
+ gf_rda_mt_rda_local = gf_common_mt_end + 1,
+ gf_rda_mt_rda_fd_ctx,
+ gf_rda_mt_rda_priv,
+ gf_rda_mt_end
+};
+
+#endif
diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead-messages.h b/xlators/performance/readdir-ahead/src/readdir-ahead-messages.h
new file mode 100644
index 00000000000..0e19348b954
--- /dev/null
+++ b/xlators/performance/readdir-ahead/src/readdir-ahead-messages.h
@@ -0,0 +1,105 @@
+/*Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _READDIR_AHEAD_MESSAGES_H_
+#define _READDIR_AHEAD_MESSAGES_H_
+
+#include "glfs-message-id.h"
+
+/*! \file readdir-ahead-messages.h
+ * \brief READDIR_AHEAD log-message IDs and their descriptions
+ *
+ */
+
+/* NOTE: Rules for message additions
+ * 1) Each instance of a message is _better_ left with a unique message ID, even
+ * if the message format is the same. Reasoning is that, if the message
+ * format needs to change in one instance, the other instances are not
+ * impacted or the new change does not change the ID of the instance being
+ * modified.
+ * 2) Addition of a message,
+ * - Should increment the GLFS_NUM_MESSAGES
+ * - Append to the list of messages defined, towards the end
+ * - Retain macro naming as glfs_msg_X (for redability across developers)
+ * NOTE: Rules for message format modifications
+ * 3) Check acorss the code if the message ID macro in question is reused
+ * anywhere. If reused then then the modifications should ensure correctness
+ * everywhere, or needs a new message ID as (1) above was not adhered to. If
+ * not used anywhere, proceed with the required modification.
+ * NOTE: Rules for message deletion
+ * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used
+ * anywhere, then can be deleted, but will leave a hole by design, as
+ * addition rules specify modification to the end of the list and not filling
+ * holes.
+ */
+
+#define GLFS_READDIR_AHEAD_BASE GLFS_MSGID_COMP_READDIR_AHEAD
+#define GLFS_READDIR_AHEAD_NUM_MESSAGES 5
+#define GLFS_MSGID_END (GLFS_READDIR_AHEAD_BASE +\
+ GLFS_READDIR_AHEAD_NUM_MESSAGES + 1)
+
+/* Messages with message IDs */
+#define glfs_msg_start_x GLFS_READDIR_AHEAD_BASE, "Invalid: Start of messages"
+
+
+
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define READDIR_AHEAD_MSG_XLATOR_CHILD_MISCONFIGURED\
+ (GLFS_READDIR_AHEAD_BASE + 1)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define READDIR_AHEAD_MSG_VOL_MISCONFIGURED (GLFS_READDIR_AHEAD_BASE + 2)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define READDIR_AHEAD_MSG_NO_MEMORY (GLFS_READDIR_AHEAD_BASE + 3)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define READDIR_AHEAD_MSG_DIR_RELEASE_PENDING_STUB \
+ (GLFS_READDIR_AHEAD_BASE + 4)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define READDIR_AHEAD_MSG_OUT_OF_SEQUENCE (GLFS_READDIR_AHEAD_BASE + 5)
+
+
+/*------------*/
+#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
+
+
+#endif /* _READDIR_AHEAD_MESSAGES_H_ */
diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead.c b/xlators/performance/readdir-ahead/src/readdir-ahead.c
new file mode 100644
index 00000000000..c3daf916e97
--- /dev/null
+++ b/xlators/performance/readdir-ahead/src/readdir-ahead.c
@@ -0,0 +1,682 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/*
+ * performance/readdir-ahead preloads a local buffer with directory entries
+ * on opendir. The optimization involves using maximum sized gluster rpc
+ * requests (128k) to minimize overhead of smaller client requests.
+ *
+ * For example, fuse currently supports a maximum readdir buffer of 4k
+ * (regardless of the filesystem client's buffer size). readdir-ahead should
+ * effectively convert these smaller requests into fewer, larger sized requests
+ * for simple, sequential workloads (i.e., ls).
+ *
+ * The translator is currently designed to handle the simple, sequential case
+ * only. If a non-sequential directory read occurs, readdir-ahead disables
+ * preloads on the directory.
+ */
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "call-stub.h"
+#include "readdir-ahead.h"
+#include "readdir-ahead-mem-types.h"
+#include "defaults.h"
+#include "readdir-ahead-messages.h"
+static int rda_fill_fd(call_frame_t *, xlator_t *, fd_t *);
+
+/*
+ * Get (or create) the fd context for storing prepopulated directory
+ * entries.
+ */
+static struct
+rda_fd_ctx *get_rda_fd_ctx(fd_t *fd, xlator_t *this)
+{
+ uint64_t val;
+ struct rda_fd_ctx *ctx;
+
+ LOCK(&fd->lock);
+
+ if (__fd_ctx_get(fd, this, &val) < 0) {
+ ctx = GF_CALLOC(1, sizeof(struct rda_fd_ctx),
+ gf_rda_mt_rda_fd_ctx);
+ if (!ctx)
+ goto out;
+
+ LOCK_INIT(&ctx->lock);
+ INIT_LIST_HEAD(&ctx->entries.list);
+ ctx->state = RDA_FD_NEW;
+ /* ctx offset values initialized to 0 */
+ ctx->xattrs = NULL;
+
+ if (__fd_ctx_set(fd, this, (uint64_t) ctx) < 0) {
+ GF_FREE(ctx);
+ ctx = NULL;
+ goto out;
+ }
+ } else {
+ ctx = (struct rda_fd_ctx *) val;
+ }
+out:
+ UNLOCK(&fd->lock);
+ return ctx;
+}
+
+/*
+ * Reset the tracking state of the context.
+ */
+static void
+rda_reset_ctx(struct rda_fd_ctx *ctx)
+{
+ ctx->state = RDA_FD_NEW;
+ ctx->cur_offset = 0;
+ ctx->cur_size = 0;
+ ctx->next_offset = 0;
+ ctx->op_errno = 0;
+ gf_dirent_free(&ctx->entries);
+ if (ctx->xattrs) {
+ dict_unref (ctx->xattrs);
+ ctx->xattrs = NULL;
+ }
+}
+
+/*
+ * Check whether we can handle a request. Offset verification is done by the
+ * caller, so we only check whether the preload buffer has completion status
+ * (including an error) or has some data to return.
+ */
+static gf_boolean_t
+rda_can_serve_readdirp(struct rda_fd_ctx *ctx, size_t request_size)
+{
+ if ((ctx->state & RDA_FD_EOD) ||
+ (ctx->state & RDA_FD_ERROR) ||
+ (!(ctx->state & RDA_FD_PLUGGED) && (ctx->cur_size > 0)))
+ return _gf_true;
+
+ return _gf_false;
+}
+
+/*
+ * Serve a request from the fd dentry list based on the size of the request
+ * buffer. ctx must be locked.
+ */
+static int32_t
+__rda_serve_readdirp(xlator_t *this, gf_dirent_t *entries, size_t request_size,
+ struct rda_fd_ctx *ctx)
+{
+ gf_dirent_t *dirent, *tmp;
+ size_t dirent_size, size = 0;
+ int32_t count = 0;
+ struct rda_priv *priv = this->private;
+
+ list_for_each_entry_safe(dirent, tmp, &ctx->entries.list, list) {
+ dirent_size = gf_dirent_size(dirent->d_name);
+ if (size + dirent_size > request_size)
+ break;
+
+ size += dirent_size;
+ list_del_init(&dirent->list);
+ ctx->cur_size -= dirent_size;
+
+ list_add_tail(&dirent->list, &entries->list);
+ ctx->cur_offset = dirent->d_off;
+ count++;
+ }
+
+ if (ctx->cur_size <= priv->rda_low_wmark)
+ ctx->state |= RDA_FD_PLUGGED;
+
+ return count;
+}
+
+static int32_t
+rda_readdirp_stub(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
+{
+ gf_dirent_t entries;
+ int32_t ret;
+ struct rda_fd_ctx *ctx;
+ int op_errno = 0;
+
+ ctx = get_rda_fd_ctx(fd, this);
+ INIT_LIST_HEAD(&entries.list);
+ ret = __rda_serve_readdirp(this, &entries, size, ctx);
+
+ if (!ret && (ctx->state & RDA_FD_ERROR)) {
+ ret = -1;
+ ctx->state &= ~RDA_FD_ERROR;
+
+ /*
+ * the preload has stopped running in the event of an error, so
+ * pass all future requests along
+ */
+ ctx->state |= RDA_FD_BYPASS;
+ }
+
+ /*
+ * Use the op_errno sent by lower layers as xlators above will check
+ * the op_errno for identifying whether readdir is completed or not.
+ */
+ op_errno = ctx->op_errno;
+
+ STACK_UNWIND_STRICT(readdirp, frame, ret, op_errno, &entries, xdata);
+ gf_dirent_free(&entries);
+
+ return 0;
+}
+
+static int32_t
+rda_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ struct rda_fd_ctx *ctx;
+ call_stub_t *stub;
+ int fill = 0;
+
+ ctx = get_rda_fd_ctx(fd, this);
+ if (!ctx)
+ goto err;
+
+ if (ctx->state & RDA_FD_BYPASS)
+ goto bypass;
+
+ LOCK(&ctx->lock);
+
+ /* recheck now that we have the lock */
+ if (ctx->state & RDA_FD_BYPASS) {
+ UNLOCK(&ctx->lock);
+ goto bypass;
+ }
+
+ /*
+ * If a new read comes in at offset 0 and the buffer has been
+ * completed, reset the context and kickstart the filler again.
+ */
+ if (!off && (ctx->state & RDA_FD_EOD) && (ctx->cur_size == 0)) {
+ rda_reset_ctx(ctx);
+ /*
+ * Unref and discard the 'list of xattrs to be fetched'
+ * stored during opendir call. This is done above - inside
+ * rda_reset_ctx().
+ * Now, ref the xdata passed by md-cache in actual readdirp()
+ * call and use that for all subsequent internal readdirp()
+ * requests issued by this xlator.
+ */
+ ctx->xattrs = dict_ref (xdata);
+ fill = 1;
+ }
+
+ /*
+ * If a readdir occurs at an unexpected offset or we already have a
+ * request pending, admit defeat and just get out of the way.
+ */
+ if (off != ctx->cur_offset || ctx->stub) {
+ ctx->state |= RDA_FD_BYPASS;
+ UNLOCK(&ctx->lock);
+ goto bypass;
+ }
+
+ stub = fop_readdirp_stub(frame, rda_readdirp_stub, fd, size, off, xdata);
+ if (!stub) {
+ UNLOCK(&ctx->lock);
+ goto err;
+ }
+
+ /*
+ * If we haven't bypassed the preload, this means we can either serve
+ * the request out of the preload or the request that enables us to do
+ * so is in flight...
+ */
+ if (rda_can_serve_readdirp(ctx, size))
+ call_resume(stub);
+ else
+ ctx->stub = stub;
+
+ UNLOCK(&ctx->lock);
+
+ if (fill)
+ rda_fill_fd(frame, this, fd);
+
+ return 0;
+
+bypass:
+ STACK_WIND(frame, default_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, off, xdata);
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT(readdirp, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+}
+
+static int32_t
+rda_fill_fd_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ gf_dirent_t *dirent, *tmp;
+ struct rda_local *local = frame->local;
+ struct rda_fd_ctx *ctx = local->ctx;
+ struct rda_priv *priv = this->private;
+ int fill = 1;
+
+ LOCK(&ctx->lock);
+
+ /* Verify that the preload buffer is still pending on this data. */
+ if (ctx->next_offset != local->offset) {
+ gf_msg(this->name, GF_LOG_ERROR,
+ 0, READDIR_AHEAD_MSG_OUT_OF_SEQUENCE,
+ "Out of sequence directory preload.");
+ ctx->state |= (RDA_FD_BYPASS|RDA_FD_ERROR);
+ ctx->op_errno = EUCLEAN;
+
+ goto out;
+ }
+
+ if (entries) {
+ list_for_each_entry_safe(dirent, tmp, &entries->list, list) {
+ list_del_init(&dirent->list);
+ /* must preserve entry order */
+ list_add_tail(&dirent->list, &ctx->entries.list);
+
+ ctx->cur_size += gf_dirent_size(dirent->d_name);
+ ctx->next_offset = dirent->d_off;
+ }
+ }
+
+ if (ctx->cur_size >= priv->rda_high_wmark)
+ ctx->state &= ~RDA_FD_PLUGGED;
+
+ if (!op_ret) {
+ /* we've hit eod */
+ ctx->state &= ~RDA_FD_RUNNING;
+ ctx->state |= RDA_FD_EOD;
+ ctx->op_errno = op_errno;
+ } else if (op_ret == -1) {
+ /* kill the preload and pend the error */
+ ctx->state &= ~RDA_FD_RUNNING;
+ ctx->state |= RDA_FD_ERROR;
+ ctx->op_errno = op_errno;
+ }
+
+ /*
+ * NOTE: The strict bypass logic in readdirp() means a pending request
+ * is always based on ctx->cur_offset.
+ */
+ if (ctx->stub &&
+ rda_can_serve_readdirp(ctx, ctx->stub->args.size)) {
+ call_resume(ctx->stub);
+ ctx->stub = NULL;
+ }
+
+out:
+ /*
+ * If we have been marked for bypass and have no pending stub, clear the
+ * run state so we stop preloading the context with entries.
+ */
+ if ((ctx->state & RDA_FD_BYPASS) && !ctx->stub)
+ ctx->state &= ~RDA_FD_RUNNING;
+
+ if (!(ctx->state & RDA_FD_RUNNING)) {
+ fill = 0;
+ if (ctx->xattrs) {
+ /*
+ * fill = 0 and hence rda_fill_fd() won't be invoked.
+ * unref for ref taken in rda_fill_fd()
+ */
+ dict_unref (ctx->xattrs);
+ ctx->xattrs = NULL;
+ }
+ STACK_DESTROY(ctx->fill_frame->root);
+ ctx->fill_frame = NULL;
+ }
+
+ UNLOCK(&ctx->lock);
+
+ if (fill)
+ rda_fill_fd(frame, this, local->fd);
+
+ return 0;
+}
+
+/*
+ * Start prepopulating the fd context with directory entries.
+ */
+static int
+rda_fill_fd(call_frame_t *frame, xlator_t *this, fd_t *fd)
+{
+ call_frame_t *nframe = NULL;
+ struct rda_local *local = NULL;
+ struct rda_local *orig_local = frame->local;
+ struct rda_fd_ctx *ctx;
+ off_t offset;
+ struct rda_priv *priv = this->private;
+
+ ctx = get_rda_fd_ctx(fd, this);
+ if (!ctx)
+ goto err;
+
+ LOCK(&ctx->lock);
+
+ if (ctx->state & RDA_FD_NEW) {
+ ctx->state &= ~RDA_FD_NEW;
+ ctx->state |= RDA_FD_RUNNING;
+ if (priv->rda_low_wmark)
+ ctx->state |= RDA_FD_PLUGGED;
+ }
+
+ offset = ctx->next_offset;
+
+ if (!ctx->fill_frame) {
+ nframe = copy_frame(frame);
+ if (!nframe) {
+ UNLOCK(&ctx->lock);
+ goto err;
+ }
+
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ UNLOCK(&ctx->lock);
+ goto err;
+ }
+
+ local->ctx = ctx;
+ local->fd = fd;
+ nframe->local = local;
+
+ ctx->fill_frame = nframe;
+
+ if (!ctx->xattrs && orig_local && orig_local->xattrs) {
+ /* when this function is invoked by rda_opendir_cbk */
+ ctx->xattrs = dict_ref(orig_local->xattrs);
+ }
+ } else {
+ nframe = ctx->fill_frame;
+ local = nframe->local;
+ }
+
+ local->offset = offset;
+
+ UNLOCK(&ctx->lock);
+
+ STACK_WIND(nframe, rda_fill_fd_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd,
+ priv->rda_req_size, offset, ctx->xattrs);
+
+ return 0;
+
+err:
+ if (nframe)
+ FRAME_DESTROY(nframe);
+
+ return -1;
+}
+
+
+static int
+rda_unpack_mdc_loaded_keys_to_dict(char *payload, dict_t *dict)
+{
+ int ret = -1;
+ char *mdc_key = NULL;
+
+ if (!payload || !dict) {
+ goto out;
+ }
+
+ mdc_key = strtok(payload, " ");
+ while (mdc_key != NULL) {
+ ret = dict_set_int8 (dict, mdc_key, 0);
+ if (ret) {
+ goto out;
+ }
+ mdc_key = strtok(NULL, " ");
+ }
+
+out:
+ return ret;
+}
+
+
+static int32_t
+rda_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ struct rda_local *local = frame->local;
+
+ if (!op_ret)
+ rda_fill_fd(frame, this, fd);
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT(opendir, frame, op_ret, op_errno, fd, xdata);
+
+ if (local && local->xattrs) {
+ /* unref for dict_new() done in rda_opendir */
+ dict_unref (local->xattrs);
+ local->xattrs = NULL;
+ }
+
+ if (local)
+ mem_put (local);
+
+ return 0;
+}
+
+static int32_t
+rda_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
+{
+ int ret = -1;
+ int op_errno = 0;
+ char *payload = NULL;
+ struct rda_local *local = NULL;
+ dict_t *xdata_from_req = NULL;
+
+ if (xdata) {
+ /*
+ * Retrieve list of keys set by md-cache xlator and store it
+ * in local to be consumed in rda_opendir_cbk
+ */
+ ret = dict_get_str (xdata, GF_MDC_LOADED_KEY_NAMES, &payload);
+ if (ret)
+ goto wind;
+
+ xdata_from_req = dict_new();
+ if (!xdata_from_req) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ ret = rda_unpack_mdc_loaded_keys_to_dict((char *) payload,
+ xdata_from_req);
+ if (ret) {
+ dict_unref(xdata_from_req);
+ goto wind;
+ }
+
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ dict_unref(xdata_from_req);
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ local->xattrs = xdata_from_req;
+ frame->local = local;
+ }
+
+wind:
+ if (xdata)
+ /* Remove the key after consumption. */
+ dict_del (xdata, GF_MDC_LOADED_KEY_NAMES);
+
+ STACK_WIND(frame, rda_opendir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(opendir, frame, -1, op_errno, fd, xdata);
+ return 0;
+}
+
+static int32_t
+rda_releasedir(xlator_t *this, fd_t *fd)
+{
+ uint64_t val;
+ struct rda_fd_ctx *ctx;
+
+ if (fd_ctx_del(fd, this, &val) < 0)
+ return -1;
+
+ ctx = (struct rda_fd_ctx *) val;
+ if (!ctx)
+ return 0;
+
+ rda_reset_ctx(ctx);
+
+ if (ctx->fill_frame)
+ STACK_DESTROY(ctx->fill_frame->root);
+
+ if (ctx->stub)
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ READDIR_AHEAD_MSG_DIR_RELEASE_PENDING_STUB,
+ "released a directory with a pending stub");
+
+ GF_FREE(ctx);
+ return 0;
+}
+
+int32_t
+mem_acct_init(xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ goto out;
+
+ ret = xlator_mem_acct_init(this, gf_rda_mt_end + 1);
+
+ if (ret != 0)
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM,
+ READDIR_AHEAD_MSG_NO_MEMORY, "Memory accounting init"
+ "failed");
+
+out:
+ return ret;
+}
+
+int
+reconfigure(xlator_t *this, dict_t *options)
+{
+ struct rda_priv *priv = this->private;
+
+ GF_OPTION_RECONF("rda-request-size", priv->rda_req_size, options,
+ uint32, err);
+ GF_OPTION_RECONF("rda-low-wmark", priv->rda_low_wmark, options, size_uint64,
+ err);
+ GF_OPTION_RECONF("rda-high-wmark", priv->rda_high_wmark, options, size_uint64,
+ err);
+
+ return 0;
+err:
+ return -1;
+}
+
+int
+init(xlator_t *this)
+{
+ struct rda_priv *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO("readdir-ahead", this, err);
+
+ if (!this->children || this->children->next) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ READDIR_AHEAD_MSG_XLATOR_CHILD_MISCONFIGURED,
+ "FATAL: readdir-ahead not configured with exactly one"
+ " child");
+ goto err;
+ }
+
+ if (!this->parents) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ READDIR_AHEAD_MSG_VOL_MISCONFIGURED,
+ "dangling volume. check volfile ");
+ }
+
+ priv = GF_CALLOC(1, sizeof(struct rda_priv), gf_rda_mt_rda_priv);
+ if (!priv)
+ goto err;
+ this->private = priv;
+
+ this->local_pool = mem_pool_new(struct rda_local, 32);
+ if (!this->local_pool)
+ goto err;
+
+ GF_OPTION_INIT("rda-request-size", priv->rda_req_size, uint32, err);
+ GF_OPTION_INIT("rda-low-wmark", priv->rda_low_wmark, size_uint64, err);
+ GF_OPTION_INIT("rda-high-wmark", priv->rda_high_wmark, size_uint64, err);
+
+ return 0;
+
+err:
+ if (this->local_pool)
+ mem_pool_destroy(this->local_pool);
+ if (priv)
+ GF_FREE(priv);
+
+ return -1;
+}
+
+
+void
+fini(xlator_t *this)
+{
+ GF_VALIDATE_OR_GOTO ("readdir-ahead", this, out);
+
+ GF_FREE(this->private);
+
+out:
+ return;
+}
+
+struct xlator_fops fops = {
+ .opendir = rda_opendir,
+ .readdirp = rda_readdirp,
+};
+
+struct xlator_cbks cbks = {
+ .releasedir = rda_releasedir,
+};
+
+struct volume_options options[] = {
+ { .key = {"rda-request-size"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 4096,
+ .max = 131072,
+ .default_value = "131072",
+ .description = "readdir-ahead request size",
+ },
+ { .key = {"rda-low-wmark"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 0,
+ .max = 10 * GF_UNIT_MB,
+ .default_value = "4096",
+ .description = "the value under which we plug",
+ },
+ { .key = {"rda-high-wmark"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 0,
+ .max = 100 * GF_UNIT_MB,
+ .default_value = "131072",
+ .description = "the value over which we unplug",
+ },
+ { .key = {NULL} },
+};
+
diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead.h b/xlators/performance/readdir-ahead/src/readdir-ahead.h
new file mode 100644
index 00000000000..f030f10a0af
--- /dev/null
+++ b/xlators/performance/readdir-ahead/src/readdir-ahead.h
@@ -0,0 +1,48 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __READDIR_AHEAD_H
+#define __READDIR_AHEAD_H
+
+/* state flags */
+#define RDA_FD_NEW (1 << 0)
+#define RDA_FD_RUNNING (1 << 1)
+#define RDA_FD_EOD (1 << 2)
+#define RDA_FD_ERROR (1 << 3)
+#define RDA_FD_BYPASS (1 << 4)
+#define RDA_FD_PLUGGED (1 << 5)
+
+struct rda_fd_ctx {
+ off_t cur_offset; /* current head of the ctx */
+ size_t cur_size; /* current size of the preload */
+ off_t next_offset; /* tail of the ctx */
+ uint32_t state;
+ gf_lock_t lock;
+ gf_dirent_t entries;
+ call_frame_t *fill_frame;
+ call_stub_t *stub;
+ int op_errno;
+ dict_t *xattrs; /* md-cache keys to be sent in readdirp() */
+};
+
+struct rda_local {
+ struct rda_fd_ctx *ctx;
+ fd_t *fd;
+ off_t offset;
+ dict_t *xattrs; /* md-cache keys to be sent in readdirp() */
+};
+
+struct rda_priv {
+ uint32_t rda_req_size;
+ uint64_t rda_low_wmark;
+ uint64_t rda_high_wmark;
+};
+
+#endif /* __READDIR_AHEAD_H */
diff --git a/xlators/performance/stat-prefetch/src/Makefile.am b/xlators/performance/stat-prefetch/src/Makefile.am
deleted file mode 100644
index cfb13071486..00000000000
--- a/xlators/performance/stat-prefetch/src/Makefile.am
+++ /dev/null
@@ -1,14 +0,0 @@
-xlator_LTLIBRARIES = stat-prefetch.la
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
-
-stat_prefetch_la_LDFLAGS = -module -avoidversion
-stat_prefetch_la_SOURCES = stat-prefetch.c
-noinst_HEADERS = stat-prefetch.h stat-prefetch-mem-types.h
-
-stat_prefetch_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -I$(CONTRIBDIR)/rbtree -shared -nostartfiles $(GF_CFLAGS)
-
-CLEANFILES =
-
diff --git a/xlators/performance/stat-prefetch/src/stat-prefetch-mem-types.h b/xlators/performance/stat-prefetch/src/stat-prefetch-mem-types.h
deleted file mode 100644
index f3d25a8af71..00000000000
--- a/xlators/performance/stat-prefetch/src/stat-prefetch-mem-types.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef __SP_MEM_TYPES_H__
-#define __SP_MEM_TYPES_H__
-
-#include "mem-types.h"
-
-enum gf_sp_mem_types_ {
- gf_sp_mt_sp_cache_t = gf_common_mt_end + 1,
- gf_sp_mt_sp_fd_ctx_t,
- gf_sp_mt_stat,
- gf_sp_mt_sp_local_t,
- gf_sp_mt_sp_inode_ctx_t,
- gf_sp_mt_sp_private_t,
- gf_sp_mt_end
-};
-#endif
-
diff --git a/xlators/performance/stat-prefetch/src/stat-prefetch.c b/xlators/performance/stat-prefetch/src/stat-prefetch.c
deleted file mode 100644
index fb338491e0c..00000000000
--- a/xlators/performance/stat-prefetch/src/stat-prefetch.c
+++ /dev/null
@@ -1,3779 +0,0 @@
-/*
- Copyright (c) 2009-2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include "stat-prefetch.h"
-
-#define GF_SP_CACHE_BUCKETS 1
-#define GF_SP_CACHE_ENTRIES_EXPECTED 1048576
-
-typedef enum {
- SP_EXPECT,
- SP_DONT_EXPECT,
- SP_DONT_CARE
-}sp_expect_t;
-
-
-void
-sp_inode_ctx_free (xlator_t *this, sp_inode_ctx_t *ctx)
-{
- call_stub_t *stub = NULL, *tmp = NULL;
-
- if (ctx == NULL) {
- goto out;
- }
-
- LOCK (&ctx->lock);
- {
- if (!list_empty (&ctx->waiting_ops)) {
- gf_log (this->name, GF_LOG_CRITICAL, "inode ctx is "
- "being freed even when there are file "
- "operations waiting for lookup-behind to "
- "complete. The operations in the waiting list "
- "are:");
- list_for_each_entry_safe (stub, tmp, &ctx->waiting_ops,
- list) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "OP (%d)", stub->fop);
-
- list_del_init (&stub->list);
- call_stub_destroy (stub);
- }
- }
- }
- UNLOCK (&ctx->lock);
-
- LOCK_DESTROY (&ctx->lock);
- GF_FREE (ctx);
-
-out:
- return;
-}
-
-
-sp_inode_ctx_t *
-sp_inode_ctx_init ()
-{
- sp_inode_ctx_t *inode_ctx = NULL;
-
- inode_ctx = GF_CALLOC (1, sizeof (*inode_ctx),
- gf_sp_mt_sp_inode_ctx_t);
- if (inode_ctx == NULL) {
- goto out;
- }
-
- LOCK_INIT (&inode_ctx->lock);
- INIT_LIST_HEAD (&inode_ctx->waiting_ops);
-
-out:
- return inode_ctx;
-}
-
-
-int
-sp_update_inode_ctx (xlator_t *this, inode_t *inode, int32_t *op_ret,
- int32_t *op_errno, char *lookup_in_progress,
- char *looked_up, struct iatt *stbuf,
- struct list_head *waiting_ops, int32_t *error)
-{
- int32_t ret = 0;
- sp_inode_ctx_t *inode_ctx = NULL;
- uint64_t value = 0;
-
- ret = inode_ctx_get (inode, this, &value);
- if (ret == 0) {
- inode_ctx = (sp_inode_ctx_t *)(long)value;
- }
-
- if (inode_ctx == NULL) {
- ret = -1;
- if (error != NULL) {
- *error = EINVAL;
- }
-
- goto out;
- }
-
- LOCK (&inode_ctx->lock);
- {
- if (op_ret != NULL) {
- inode_ctx->op_ret = *op_ret;
- }
-
- if (op_errno != NULL) {
- inode_ctx->op_errno = *op_errno;
- }
-
- if (looked_up != NULL) {
- inode_ctx->looked_up = *looked_up;
- }
-
- if (lookup_in_progress != NULL) {
- inode_ctx->lookup_in_progress = *lookup_in_progress;
- }
-
- if ((op_ret == 0) && (stbuf != NULL)
- && IA_ISDIR (stbuf->ia_type)) {
- memcpy (&inode_ctx->stbuf, stbuf,
- sizeof (*stbuf));
- }
-
- if (waiting_ops != NULL) {
- list_splice_init (&inode_ctx->waiting_ops,
- waiting_ops);
- }
- }
- UNLOCK (&inode_ctx->lock);
-
-out:
- return ret;
-}
-
-
-sp_inode_ctx_t *
-sp_check_and_create_inode_ctx (xlator_t *this, inode_t *inode,
- sp_expect_t expect, glusterfs_fop_t caller)
-{
- uint64_t value = 0;
- sp_inode_ctx_t *inode_ctx = NULL;
- int32_t ret = 0;
-
- if ((this == NULL) || (inode == NULL)) {
- goto out;
- }
-
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &value);
- if (ret == 0) {
- if (expect == SP_DONT_EXPECT) {
- gf_log (this->name, GF_LOG_DEBUG, "inode_ctx "
- "is not NULL (caller %d)", caller);
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long)value;
- } else {
- if (expect == SP_EXPECT) {
- gf_log (this->name, GF_LOG_DEBUG, "inode_ctx is"
- " NULL (caller %d)", caller);
- }
-
- inode_ctx = sp_inode_ctx_init ();
- if (inode_ctx != NULL) {
- ret = __inode_ctx_put (inode, this,
- (long)inode_ctx);
- if (ret == -1) {
- sp_inode_ctx_free (this, inode_ctx);
- inode_ctx = NULL;
- }
- }
- }
- }
- UNLOCK (&inode->lock);
-
-out:
- return inode_ctx;
-}
-
-
-sp_cache_t *
-sp_cache_ref (sp_cache_t *cache)
-{
- if (cache == NULL) {
- goto out;
- }
-
- LOCK (&cache->lock);
- {
- cache->ref++;
- }
- UNLOCK (&cache->lock);
-
-out:
- return cache;;
-}
-
-
-void
-sp_cache_unref (sp_cache_t *cache)
-{
- int refcount = 0;
- if (cache == NULL) {
- goto out;
- }
-
- LOCK (&cache->lock);
- {
- refcount = --cache->ref;
- }
- UNLOCK (&cache->lock);
-
- if (refcount == 0) {
- rbthash_table_destroy (cache->table);
- GF_FREE (cache);
- }
-
-out:
- return;
-}
-
-
-int32_t
-sp_process_inode_ctx (call_frame_t *frame, xlator_t *this, loc_t *loc,
- call_stub_t *stub, char *need_unwind, char *need_lookup,
- char *can_wind, int32_t *error, glusterfs_fop_t caller)
-{
- int32_t ret = -1, op_errno = -1;
- sp_local_t *local = NULL;
- sp_inode_ctx_t *inode_ctx = NULL;
- uint64_t value = 0;
-
- if (need_unwind != NULL) {
- *need_unwind = 1;
- }
-
- if ((this == NULL) || (loc == NULL) || (loc->inode == NULL)
- || (need_unwind == NULL) || (need_lookup == NULL)
- || (can_wind == NULL)) {
- op_errno = EINVAL;
- goto out;
- }
-
- ret = inode_ctx_get (loc->inode, this, &value);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "context not set in inode "
- "(%p) (caller %d)", loc->inode, caller);
- *can_wind = 1;
- *need_unwind = 0;
- op_errno = 0;
- ret = 0;
- goto out;
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long) value;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, out, op_errno,
- EINVAL);
-
- LOCK (&inode_ctx->lock);
- {
- if (!(inode_ctx->looked_up || inode_ctx->lookup_in_progress)) {
- if (frame->local == NULL) {
- local = GF_CALLOC (1, sizeof (*local),
- gf_sp_mt_sp_local_t);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name,
- local,
- unlock,
- op_errno,
- ENOMEM);
-
- frame->local = local;
-
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR, "%s",
- strerror (op_errno));
- goto unlock;
- }
- }
-
- *need_lookup = 1;
- inode_ctx->lookup_in_progress = 1;
- }
-
- if (inode_ctx->looked_up) {
- *can_wind = 1;
- } else {
- list_add_tail (&stub->list, &inode_ctx->waiting_ops);
- stub = NULL;
- }
-
- *need_unwind = 0;
- ret = 0;
- }
-unlock:
- UNLOCK (&inode_ctx->lock);
-
-out:
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
-
- if (error != NULL) {
- *error = op_errno;
- }
-
- return ret;
-}
-
-
-inline uint32_t
-sp_hashfn (void *data, int len)
-{
- return gf_dm_hashfn ((const char *)data, len);
-}
-
-sp_cache_t *
-sp_cache_init (xlator_t *this)
-{
- sp_cache_t *cache = NULL;
- sp_private_t *priv = NULL;
-
- priv = this->private;
-
- if (!priv)
- goto out;
-
- if (!priv->mem_pool)
- goto out;
-
- cache = GF_CALLOC (1, sizeof (*cache), gf_sp_mt_sp_cache_t);
- if (cache) {
- cache->table =
- rbthash_table_init (GF_SP_CACHE_BUCKETS,
- sp_hashfn, __gf_free,
- 0, priv->mem_pool);
- if (cache->table == NULL) {
- GF_FREE (cache);
- cache = NULL;
- goto out;
- }
-
- LOCK_INIT (&cache->lock);
- cache->this = this;
- }
-
-out:
- return cache;
-}
-
-
-void
-sp_local_free (sp_local_t *local)
-{
- if (local) {
- loc_wipe (&local->loc);
- GF_FREE (local);
- }
-}
-
-
-int32_t
-sp_cache_remove_entry (sp_cache_t *cache, char *name, char remove_all)
-{
- int32_t ret = -1;
- rbthash_table_t *table = NULL;
- xlator_t *this;
- sp_private_t *priv = NULL;
- void *data = NULL;
-
- if ((cache == NULL) || ((name == NULL) && !remove_all)) {
- goto out;
- }
-
- this = cache->this;
-
- if (this == NULL)
- goto out;
-
- if (this->private == NULL)
- goto out;
-
- priv = this->private;
-
- LOCK (&cache->lock);
- {
- if (remove_all) {
- table = cache->table;
- cache->table = rbthash_table_init (GF_SP_CACHE_BUCKETS,
- sp_hashfn,
- __gf_free,
- 0,
- priv->mem_pool);
- if (cache->table == NULL) {
- cache->table = table;
- } else {
- rbthash_table_destroy (table);
- ret = 0;
- }
- } else {
- data = rbthash_remove (cache->table, name,
- strlen (name));
- GF_FREE (data);
- ret = 0;
- }
- }
- UNLOCK (&cache->lock);
-
-out:
- return ret;
-}
-
-
-int32_t
-sp_cache_get_entry (sp_cache_t *cache, char *name, gf_dirent_t **entry)
-{
- int32_t ret = -1;
- gf_dirent_t *tmp = NULL, *new = NULL;
-
- if ((cache == NULL) || (name == NULL) || (entry == NULL)) {
- goto out;
- }
-
- LOCK (&cache->lock);
- {
- tmp = rbthash_get (cache->table, name, strlen (name));
- if (tmp != NULL) {
- new = gf_dirent_for_name (tmp->d_name);
- if (new == NULL) {
- goto unlock;
- }
-
- new->d_ino = tmp->d_ino;
- new->d_off = tmp->d_off;
- new->d_len = tmp->d_len;
- new->d_type = tmp->d_type;
- new->d_stat = tmp->d_stat;
-
- *entry = new;
- ret = 0;
- }
- }
-unlock:
- UNLOCK (&cache->lock);
-
-out:
- return ret;
-}
-
-
-void
-sp_cache_free (sp_cache_t *cache)
-{
- sp_cache_remove_entry (cache, NULL, 1);
- sp_cache_unref (cache);
-}
-
-
-sp_cache_t *
-__sp_get_cache_fd (xlator_t *this, fd_t *fd)
-{
- int32_t ret = -1;
- sp_cache_t *cache = NULL;
- uint64_t value = 0;
- sp_fd_ctx_t *fd_ctx = NULL;
-
- ret = __fd_ctx_get (fd, this, &value);
- if (ret == -1) {
- goto out;
- }
-
- fd_ctx = (void *)(long) value;
-
- cache = fd_ctx->cache;
-
-out:
- return cache;
-}
-
-
-sp_cache_t *
-sp_get_cache_fd (xlator_t *this, fd_t *fd)
-{
- sp_cache_t *cache = NULL;
-
- if (fd == NULL) {
- goto out;
- }
-
- LOCK (&fd->lock);
- {
- cache = __sp_get_cache_fd (this, fd);
- if (cache != NULL) {
- sp_cache_ref (cache);
- }
- }
- UNLOCK (&fd->lock);
-
-out:
- return cache;
-}
-
-
-void
-sp_fd_ctx_free (sp_fd_ctx_t *fd_ctx)
-{
- if (fd_ctx == NULL) {
- goto out;
- }
-
- if (fd_ctx->parent_inode) {
- inode_unref (fd_ctx->parent_inode);
- fd_ctx->parent_inode = NULL;
- }
-
- if (fd_ctx->name) {
- GF_FREE (fd_ctx->name);
- fd_ctx->name = NULL;
- }
-
- if (fd_ctx->cache) {
- sp_cache_free (fd_ctx->cache);
- }
-
- GF_FREE (fd_ctx);
-out:
- return;
-}
-
-
-inline sp_fd_ctx_t *
-sp_fd_ctx_init (void)
-{
- sp_fd_ctx_t *fd_ctx = NULL;
-
- fd_ctx = GF_CALLOC (1, sizeof (*fd_ctx), gf_sp_mt_sp_fd_ctx_t);
-
- return fd_ctx;
-}
-
-
-sp_fd_ctx_t *
-sp_fd_ctx_new (xlator_t *this, inode_t *parent, char *name, sp_cache_t *cache)
-{
- sp_fd_ctx_t *fd_ctx = NULL;
-
- fd_ctx = sp_fd_ctx_init ();
- if (fd_ctx == NULL) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- if (parent) {
- fd_ctx->parent_inode = inode_ref (parent);
- }
-
- if (name) {
- fd_ctx->name = gf_strdup (name);
- if (fd_ctx->name == NULL) {
- sp_fd_ctx_free (fd_ctx);
- fd_ctx = NULL;
- goto out;
- }
- }
-
- fd_ctx->cache = cache;
-
-out:
- return fd_ctx;
-}
-
-
-sp_cache_t *
-sp_del_cache_fd (xlator_t *this, fd_t *fd)
-{
- sp_cache_t *cache = NULL;
- uint64_t value = 0;
- int32_t ret = -1;
- sp_fd_ctx_t *fd_ctx = NULL;
-
- if (fd == NULL) {
- goto out;
- }
-
- LOCK (&fd->lock);
- {
- ret = __fd_ctx_get (fd, this, &value);
- if (ret == 0) {
- fd_ctx = (void *)(long) value;
- cache = fd_ctx->cache;
- fd_ctx->cache = NULL;
- }
- }
- UNLOCK (&fd->lock);
-
-out:
- return cache;
-}
-
-
-sp_cache_t *
-sp_get_cache_inode (xlator_t *this, inode_t *inode, int32_t pid)
-{
- fd_t *fd = NULL;
- sp_cache_t *cache = NULL;
-
- if (inode == NULL) {
- goto out;
- }
-
- fd = fd_lookup (inode, pid);
- if (fd == NULL) {
- goto out;
- }
-
- cache = sp_get_cache_fd (this, fd);
-
- fd_unref (fd);
-out:
- return cache;
-}
-
-
-inline int32_t
-__sp_put_cache (xlator_t *this, fd_t *fd, sp_cache_t *cache)
-{
- sp_fd_ctx_t *fd_ctx = NULL;
- int32_t ret = -1;
- uint64_t value = 0;
-
- ret = __fd_ctx_get (fd, this, &value);
- if (!ret) {
- fd_ctx = (void *)(long)value;
- } else {
- fd_ctx = sp_fd_ctx_init ();
- if (fd_ctx == NULL) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- ret = -1;
- goto out;
- }
-
- ret = __fd_ctx_set (fd, this, (long)(void *)fd_ctx);
- if (ret == -1) {
- sp_fd_ctx_free (fd_ctx);
- goto out;
- }
- }
-
- if (fd_ctx->cache) {
- sp_cache_free (fd_ctx->cache);
- }
-
- fd_ctx->cache = cache;
-
-out:
- return ret;
-}
-
-
-inline int32_t
-sp_put_cache (xlator_t *this, fd_t *fd, sp_cache_t *cache)
-{
- int32_t ret = -1;
-
- if (fd != NULL) {
- LOCK (&fd->lock);
- {
- ret = __sp_put_cache (this, fd, cache);
- }
- UNLOCK (&fd->lock);
- }
-
- return ret;
-}
-
-
-int32_t
-sp_cache_add_entries (sp_cache_t *cache, gf_dirent_t *entries)
-{
- gf_dirent_t *entry = NULL, *new = NULL;
- int32_t ret = -1;
- uint64_t expected_offset = 0;
-
- LOCK (&cache->lock);
- {
- list_for_each_entry (entry, &entries->list, list) {
- if (IA_ISDIR (entry->d_stat.ia_type)) {
- continue;
- }
-
- new = gf_dirent_for_name (entry->d_name);
- if (new == NULL) {
- goto unlock;
- }
-
- new->d_ino = entry->d_ino;
- new->d_off = entry->d_off;
- new->d_len = entry->d_len;
- new->d_type = entry->d_type;
- new->d_stat = entry->d_stat;
-
- ret = rbthash_insert (cache->table, new, new->d_name,
- strlen (new->d_name));
- if (ret == -1) {
- GF_FREE (new);
- continue;
- }
-
- expected_offset = new->d_off;
- }
-
- cache->expected_offset = expected_offset;
-
- ret = 0;
- }
-unlock:
- UNLOCK (&cache->lock);
-
- return ret;
-}
-
-
-int32_t
-sp_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *dict, struct iatt *postparent)
-{
- int ret = 0;
- struct list_head waiting_ops = {0, };
- call_stub_t *stub = NULL, *tmp = NULL;
- sp_local_t *local = NULL;
- sp_cache_t *cache = NULL;
- int need_unwind = 0;
- char looked_up = 0, lookup_in_progress = 0;
-
- INIT_LIST_HEAD (&waiting_ops);
-
- local = frame->local;
- if (local == NULL) {
- op_ret = -1;
- op_errno = EINVAL;
- gf_log (this->name, GF_LOG_DEBUG, "local is NULL, but it is "
- "needed to find and resume operations waiting on "
- "lookup");
- goto out;
- }
- if (op_ret == -1) {
- cache = sp_get_cache_inode (this, local->loc.parent,
- frame->root->pid);
-
- if (cache) {
- sp_cache_remove_entry (cache, (char *)local->loc.name,
- 0);
- sp_cache_unref (cache);
- }
- }
-
- if (local->is_lookup)
- need_unwind = 1;
-
- lookup_in_progress = 0;
- looked_up = 1;
- ret = sp_update_inode_ctx (this, local->loc.inode, &op_ret, &op_errno,
- &lookup_in_progress, &looked_up, buf,
- &waiting_ops, &op_errno);
-
- list_for_each_entry_safe (stub, tmp, &waiting_ops, list) {
- list_del_init (&stub->list);
- call_resume (stub);
- }
-
-
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf,
- dict, postparent);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_get_ancestors (char *path, char **parent, char **grand_parent)
-{
- int32_t ret = -1, i = 0;
- char *cpy = NULL;
-
- if (!path || !parent || !grand_parent) {
- ret = 0;
- goto out;
- }
-
- for (i = 0; i < 2; i++) {
- if (!strcmp (path, "/")) {
- break;
- }
-
- cpy = gf_strdup (path);
- if (cpy == NULL) {
- goto out;
- }
-
- path = dirname (cpy);
- switch (i)
- {
- case 0:
- *parent = path;
- break;
- case 1:
- *grand_parent = path;
- break;
- }
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-
-int32_t
-sp_cache_remove_parent_entry (call_frame_t *frame, xlator_t *this,
- inode_table_t *itable, char *path)
-{
- char *parent = NULL, *grand_parent = NULL, *cpy = NULL;
- inode_t *inode_gp = NULL;
- sp_cache_t *cache_gp = NULL;
- int32_t ret = -1;
-
- ret = sp_get_ancestors (path, &parent, &grand_parent);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- if (grand_parent && strcmp (grand_parent, "/")) {
- inode_gp = inode_from_path (itable, grand_parent);
- if (inode_gp) {
- cache_gp = sp_get_cache_inode (this, inode_gp,
- frame->root->pid);
- if (cache_gp) {
- cpy = gf_strdup (parent);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name,
- cpy, out, errno,
- ENOMEM);
- path = basename (cpy);
- sp_cache_remove_entry (cache_gp, path, 0);
- GF_FREE (cpy);
-
- sp_cache_unref (cache_gp);
- }
- inode_unref (inode_gp);
- }
- }
-
- ret = 0;
-out:
- if (parent) {
- GF_FREE (parent);
- }
-
- if (grand_parent) {
- GF_FREE (grand_parent);
- }
-
- return ret;
-}
-
-
-void
-sp_is_empty (dict_t *this, char *key, data_t *value, void *data)
-{
- char *ptr = data;
-
- if (ptr && *ptr) {
- *ptr = 0;
- }
-}
-
-
-int32_t
-sp_lookup_helper (call_frame_t *frame,xlator_t *this, loc_t *loc,
- dict_t *xattr_req)
-{
- uint64_t value = 0;
- sp_inode_ctx_t *inode_ctx = NULL;
- int32_t ret = 0, op_ret = -1, op_errno = -1;
- call_stub_t *stub = NULL;
- char can_wind = 0;
-
- ret = inode_ctx_get (loc->inode, this, &value);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "context not set in inode "
- "(%p)", loc->inode);
- op_errno = EINVAL;
- goto unwind;
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long) value;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, unwind, op_errno,
- EINVAL);
-
- stub = fop_lookup_stub (frame, sp_lookup_helper, loc,
- xattr_req);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, stub, unwind,
- op_errno, ENOMEM);
-
- LOCK (&inode_ctx->lock);
- {
- op_ret = inode_ctx->op_ret;
- op_errno = inode_ctx->op_errno;
- if (op_ret == 0) {
- if (!inode_ctx->lookup_in_progress) {
- inode_ctx->lookup_in_progress = 1;
- can_wind = 1;
- } else {
- list_add_tail (&stub->list,
- &inode_ctx->waiting_ops);
- stub = NULL;
- }
- }
- }
- UNLOCK (&inode_ctx->lock);
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- if (can_wind) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc,
- xattr_req);
- }
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
-
- return 0;
-}
-
-
-/*
- * TODO: implement sending lookups for every fop done on this path. As of now
- * lookup on the path is sent only for the first fop on this path.
- */
-int32_t
-sp_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
-{
- gf_dirent_t *dirent = NULL;
- char entry_cached = 0;
- uint64_t value = 0;
- char xattr_req_empty = 1, can_wind = 0;
- sp_cache_t *cache = NULL;
- struct iatt postparent = {0, }, buf = {0, };
- int32_t ret = -1, op_ret = -1, op_errno = EINVAL;
- sp_inode_ctx_t *inode_ctx = NULL, *parent_inode_ctx = NULL;
- sp_local_t *local = NULL;
- call_stub_t *stub = NULL;
-
- if (loc == NULL || loc->inode == NULL) {
- goto unwind;
- }
-
- inode_ctx = sp_check_and_create_inode_ctx (this, loc->inode,
- SP_DONT_CARE, GF_FOP_LOOKUP);
- if (inode_ctx == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
-
- if ((loc->parent == NULL) || (loc->name == NULL)) {
- goto wind;
- }
-
- if (xattr_req != NULL) {
- dict_foreach (xattr_req, sp_is_empty, &xattr_req_empty);
- }
-
- if (!xattr_req_empty) {
- goto wind;
- }
-
- cache = sp_get_cache_inode (this, loc->parent, frame->root->pid);
- if (cache) {
- ret = sp_cache_get_entry (cache, (char *)loc->name, &dirent);
- if (ret == 0) {
- ret = inode_ctx_get (loc->parent, this, &value);
- if ((ret == 0) && (value != 0)) {
- parent_inode_ctx = (void *)(long)value;
- postparent = parent_inode_ctx->stbuf;
- buf = dirent->d_stat;
- op_ret = 0;
- op_errno = 0;
- entry_cached = 1;
- }
-
- GF_FREE (dirent);
- }
- } else if (IA_ISDIR (loc->inode->ia_type)) {
- cache = sp_get_cache_inode (this, loc->inode, frame->root->pid);
- if (cache) {
- ret = sp_cache_get_entry (cache, ".", &dirent);
- if (ret == 0) {
- ret = inode_ctx_get (loc->parent, this, &value);
- if ((ret == 0) && (value != 0)) {
- parent_inode_ctx = (void *)(long)value;
- postparent = parent_inode_ctx->stbuf;
- buf = dirent->d_stat;
- op_ret = 0;
- op_errno = 0;
- entry_cached = 1;
- }
-
- GF_FREE (dirent);
- }
- }
- }
-
-wind:
- if (entry_cached) {
- if (cache) {
- cache->hits++;
- sp_cache_unref (cache);
- }
- } else {
- if (cache) {
- cache->miss++;
- sp_cache_unref (cache);
- }
-
- stub = fop_lookup_stub (frame, sp_lookup_helper, loc,
- xattr_req);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, stub, unwind,
- op_errno, ENOMEM);
-
- local = GF_CALLOC (1, sizeof (*local), gf_sp_mt_sp_local_t);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, unwind,
- op_errno, ENOMEM);
-
- frame->local = local;
-
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR, "%s",
- strerror (op_errno));
- goto unwind;
- }
-
- local->is_lookup = 1;
-
- LOCK (&inode_ctx->lock);
- {
- if (inode_ctx->lookup_in_progress) {
- list_add_tail (&stub->list,
- &inode_ctx->waiting_ops);
- stub = NULL;
- } else {
- can_wind = 1;
- inode_ctx->lookup_in_progress = 1;
- }
- }
- UNLOCK (&inode_ctx->lock);
-
- if (can_wind) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc,
- xattr_req);
- }
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
-
- return 0;
- }
-
-unwind:
- SP_STACK_UNWIND (lookup, frame, op_ret, op_errno, (loc)?loc->inode:NULL,
- &buf, NULL, &postparent);
-
- return 0;
-}
-
-
-int32_t
-sp_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
-{
- sp_local_t *local = NULL;
- sp_cache_t *cache = NULL;
- fd_t *fd = NULL;
- int32_t ret = 0;
- char was_present = 1;
- sp_private_t *priv = NULL;
-
- if (op_ret == -1) {
- goto out;
- }
-
- if (!this->private) {
- goto out;
- }
-
- local = frame->local;
- if (local == NULL) {
- goto out;
- }
-
- fd = local->fd;
-
- priv = this->private;
-
- LOCK (&priv->lock);
- {
- if (!priv->mem_pool)
- priv->mem_pool = mem_pool_new (rbthash_entry_t,
- GF_SP_CACHE_ENTRIES_EXPECTED);
- }
- UNLOCK (&priv->lock);
-
- if (!priv->mem_pool)
- goto out;
-
- LOCK (&fd->lock);
- {
- cache = __sp_get_cache_fd (this, fd);
- if (cache == NULL) {
- was_present = 0;
- cache = sp_cache_init (this);
- if (cache == NULL) {
- goto unlock;
- }
-
- ret = __sp_put_cache (this, fd, cache);
- if (ret == -1) {
- sp_cache_free (cache);
- goto unlock;
- }
- }
-
- sp_cache_ref (cache);
- }
-unlock:
- UNLOCK (&fd->lock);
-
- if (cache != NULL) {
- sp_cache_add_entries (cache, entries);
- if (was_present) {
- sp_cache_unref (cache);
- }
- }
-
-out:
- SP_STACK_UNWIND (readdir, frame, op_ret, op_errno, entries);
- return 0;
-}
-
-
-int32_t
-sp_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t off)
-{
- sp_cache_t *cache = NULL;
- sp_local_t *local = NULL;
- char *path = NULL;
- int32_t ret = -1;
-
- cache = sp_get_cache_fd (this, fd);
- if (cache) {
- if (off != cache->expected_offset) {
- sp_cache_remove_entry (cache, NULL, 1);
- }
-
- sp_cache_unref (cache);
- }
-
- ret = inode_path (fd->inode, NULL, &path);
- if (ret == -1) {
- goto unwind;
- }
-
- ret = sp_cache_remove_parent_entry (frame, this, fd->inode->table,
- path);
-
- GF_FREE (path);
-
- if (ret < 0) {
- errno = -ret;
- goto unwind;
- }
-
- local = GF_CALLOC (1, sizeof (*local), gf_sp_mt_sp_local_t);
- if (local) {
- local->fd = fd;
- frame->local = local;
- }
-
- STACK_WIND (frame, sp_readdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdirp, fd, size, off);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (readdir, frame, -1, errno, NULL);
- return 0;
-}
-
-
-int32_t
-sp_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
-{
- SP_STACK_UNWIND (truncate, frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
-}
-
-
-
-int32_t
-sp_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
-{
- SP_STACK_UNWIND (rename, frame, op_ret, op_errno, buf, preoldparent,
- postoldparent, prenewparent, postnewparent);
- return 0;
-}
-
-
-int32_t
-sp_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, fd_t *fd)
-{
- sp_local_t *local = NULL;
- sp_fd_ctx_t *fd_ctx = NULL;
-
- if (op_ret == -1) {
- goto out;
- }
-
- local = frame->local;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, out, op_errno,
- EINVAL);
-
- fd_ctx = sp_fd_ctx_new (this, local->loc.parent,
- (char *)local->loc.name, NULL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, fd_ctx, out, op_errno,
- ENOMEM);
-
- op_ret = fd_ctx_set (fd, this, (long)(void *)fd_ctx);
- if (op_ret == -1) {
- sp_fd_ctx_free (fd_ctx);
- op_errno = ENOMEM;
- }
-
-out:
- SP_STACK_UNWIND (open, frame, op_ret, op_errno, fd);
- return 0;
-}
-
-
-int32_t
-sp_open_helper (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int32_t wbflags)
-{
- uint64_t value = 0;
- sp_inode_ctx_t *inode_ctx = NULL;
- int32_t ret = 0, op_ret = -1, op_errno = -1;
-
- ret = inode_ctx_get (loc->inode, this, &value);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "context not set in inode "
- "(%p)", loc->inode);
- op_errno = EINVAL;
- goto unwind;
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long) value;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, unwind, op_errno,
- EINVAL);
-
- LOCK (&inode_ctx->lock);
- {
- op_ret = inode_ctx->op_ret;
- op_errno = inode_ctx->op_errno;
- }
- UNLOCK (&inode_ctx->lock);
-
- if ((op_ret == -1) && ((op_errno != ENOENT)
- || !((op_errno == ENOENT)
- && (flags & O_CREAT)))) {
- goto unwind;
- }
-
- STACK_WIND (frame, sp_fd_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, loc, flags, fd, wbflags);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (open, frame, -1, op_errno, fd);
- return 0;
-}
-
-
-int32_t
-sp_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int wbflags)
-{
- call_stub_t *stub = NULL;
- sp_local_t *local = NULL;
- int32_t op_errno = -1, ret = -1;
- char can_wind = 0, need_lookup = 0, need_unwind = 1;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->inode, out,
- op_errno, EINVAL);
-
- local = GF_CALLOC (1, sizeof (*local), gf_sp_mt_sp_local_t);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, out, op_errno,
- ENOMEM);
-
- frame->local = local;
-
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR, "%s", strerror (errno));
- goto out;
- }
-
- stub = fop_open_stub (frame, sp_open_helper, loc, flags, fd, wbflags);
- if (stub == NULL) {
- op_errno = ENOMEM;
- goto out;
- }
-
- sp_process_inode_ctx (frame, this, loc, stub, &need_unwind,
- &need_lookup, &can_wind, &op_errno, GF_FOP_OPEN);
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (open, frame, -1, op_errno, fd);
- } else if (need_lookup) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, sp_fd_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, loc, flags, fd,
- wbflags);
- }
-
- return 0;
-
-}
-
-static int32_t
-sp_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
-{
- sp_local_t *local = NULL;
- sp_fd_ctx_t *fd_ctx = NULL;
- char lookup_in_progress = 0, looked_up = 0;
-
- if (op_ret == -1) {
- goto out;
- }
-
- local = frame->local;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, out, op_errno,
- EINVAL);
-
- looked_up = 1;
- op_ret = sp_update_inode_ctx (this, local->loc.inode, &op_ret,
- &op_errno, &lookup_in_progress,
- &looked_up, buf, NULL, &op_errno);
- if (op_ret == -1) {
- goto out;
- }
-
- sp_update_inode_ctx (this, local->loc.parent, NULL, NULL, NULL,
- NULL, postparent, NULL, NULL);
-
- fd_ctx = sp_fd_ctx_new (this, local->loc.parent,
- (char *)local->loc.name, NULL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, fd_ctx, out, op_errno,
- ENOMEM);
-
- op_ret = fd_ctx_set (fd, this, (long)(void *)fd_ctx);
- if (op_ret == -1) {
- sp_fd_ctx_free (fd_ctx);
- op_errno = ENOMEM;
- }
-
-out:
- SP_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent);
- return 0;
-}
-
-
-int32_t
-sp_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, fd_t *fd)
-{
- sp_local_t *local = NULL;
- int32_t op_errno = -1, ret = -1;
- char need_unwind = 1;
- sp_inode_ctx_t *inode_ctx = NULL;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->path, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->name, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->inode, out,
- op_errno, EINVAL);
-
- ret = sp_cache_remove_parent_entry (frame, this, loc->inode->table,
- (char *)loc->path);
- if (ret == -1) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- local = GF_CALLOC (1, sizeof (*local), gf_sp_mt_sp_local_t);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, out, op_errno,
- ENOMEM);
-
- frame->local = local;
-
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- op_errno = errno;
- goto out;
- }
-
- inode_ctx = sp_check_and_create_inode_ctx (this, loc->inode,
- SP_DONT_EXPECT,
- GF_FOP_CREATE);
- if (inode_ctx == NULL) {
- op_errno = ENOMEM;
- goto out;
- }
-
- need_unwind = 0;
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL,
- NULL, NULL);
- } else {
- STACK_WIND (frame, sp_create_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create, loc, flags,
- mode, fd);
- }
- return 0;
-}
-
-
-int32_t
-sp_opendir_helper (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
-{
- uint64_t value = 0;
- sp_inode_ctx_t *inode_ctx = NULL;
- int32_t ret = 0, op_ret = -1, op_errno = -1;
-
- ret = inode_ctx_get (loc->inode, this, &value);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "context not set in inode "
- "(%p)", loc->inode);
- op_errno = EINVAL;
- goto unwind;
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long) value;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, unwind, op_errno,
- EINVAL);
-
- LOCK (&inode_ctx->lock);
- {
- op_ret = inode_ctx->op_ret;
- op_errno = inode_ctx->op_errno;
- }
- UNLOCK (&inode_ctx->lock);
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- STACK_WIND (frame, sp_fd_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->opendir, loc, fd);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (opendir, frame, -1, op_errno, NULL);
- return 0;
-}
-
-
-int32_t
-sp_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
-{
- sp_local_t *local = NULL;
- call_stub_t *stub = NULL;
- int32_t op_errno = -1, ret = -1;
- char can_wind = 0, need_lookup = 0, need_unwind = 1;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->inode, out,
- op_errno, EINVAL);
-
- local = GF_CALLOC (1, sizeof (*local), gf_sp_mt_sp_local_t);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, out, op_errno,
- ENOMEM);
-
- frame->local = local;
-
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- op_errno = errno;
- goto out;
- }
-
- stub = fop_opendir_stub (frame, sp_opendir_helper, loc, fd);
- if (stub == NULL) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- sp_process_inode_ctx (frame, this, loc, stub, &need_unwind,
- &need_lookup, &can_wind, &op_errno,
- GF_FOP_OPENDIR);
-
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (opendir, frame, -1, op_errno, NULL);
- } else if (need_lookup) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, sp_fd_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->opendir, loc, fd);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_new_entry_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
-{
- sp_local_t *local = NULL;
- char lookup_in_progress = 0, looked_up = 0;
-
- if (op_ret == -1) {
- goto out;
- }
-
- local = frame->local;
- if (local == NULL) {
- op_errno = EINVAL;
- goto out;
- }
-
- looked_up = 1;
- op_ret = sp_update_inode_ctx (this, local->loc.inode, &op_ret,
- &op_errno, &lookup_in_progress,
- &looked_up, buf, NULL, &op_errno);
- if (op_ret == -1) {
- goto out;
- }
-
- sp_update_inode_ctx (this, local->loc.parent, NULL, NULL, NULL,
- NULL, postparent, NULL, NULL);
-
-out:
- SP_STACK_UNWIND (mkdir, frame, op_ret, op_errno, inode, buf, preparent,
- postparent);
- return 0;
-}
-
-
-int32_t
-sp_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode)
-{
- int32_t ret = -1, op_errno = -1;
- char need_unwind = 1;
- sp_inode_ctx_t *inode_ctx = NULL;
- sp_local_t *local = NULL;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->path, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->name, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->inode, out,
- op_errno, EINVAL);
-
- ret = sp_cache_remove_parent_entry (frame, this, loc->inode->table,
- (char *)loc->path);
- if (ret == -1) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- local = GF_CALLOC (1, sizeof (*local), gf_sp_mt_sp_local_t);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, out, op_errno,
- ENOMEM);
-
- frame->local = local;
-
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR, "%s", strerror (op_errno));
- goto out;
- }
-
- inode_ctx = sp_check_and_create_inode_ctx (this, loc->inode,
- SP_DONT_EXPECT,
- GF_FOP_MKDIR);
- if (inode_ctx == NULL) {
- op_errno = ENOMEM;
- goto out;
- }
-
- need_unwind = 0;
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL,
- NULL);
- } else {
- STACK_WIND (frame, sp_new_entry_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mkdir, loc, mode);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev)
-{
- int32_t op_errno = -1, ret = -1;
- char need_unwind = 1;
- sp_inode_ctx_t *inode_ctx = NULL;
- sp_local_t *local = NULL;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->path, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->name, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->inode, out,
- op_errno, EINVAL);
-
- ret = sp_cache_remove_parent_entry (frame, this, loc->inode->table,
- (char *)loc->path);
- if (ret == -1) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- local = GF_CALLOC (1, sizeof (*local), gf_sp_mt_sp_local_t);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, out, op_errno,
- ENOMEM);
-
- frame->local = local;
-
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR, "%s", strerror (op_errno));
- goto out;
- }
-
- inode_ctx = sp_check_and_create_inode_ctx (this, loc->inode,
- SP_DONT_EXPECT,
- GF_FOP_MKNOD);
- if (inode_ctx == NULL) {
- op_errno = ENOMEM;
- goto out;
- }
-
- need_unwind = 0;
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, NULL,
- NULL);
- } else {
- STACK_WIND (frame, sp_new_entry_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mknod, loc, mode, rdev);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
- loc_t *loc)
-{
- int32_t ret = -1, op_errno = -1;
- char need_unwind = 1;
- sp_inode_ctx_t *inode_ctx = NULL;
- sp_local_t *local = NULL;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->path, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->name, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->inode, out,
- op_errno, EINVAL);
-
- ret = sp_cache_remove_parent_entry (frame, this, loc->inode->table,
- (char *)loc->path);
- if (ret == -1) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- local = GF_CALLOC (1, sizeof (*local), gf_sp_mt_sp_local_t);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, out, op_errno,
- ENOMEM);
-
- frame->local = local;
-
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR, "%s", strerror (op_errno));
- goto out;
- }
-
- inode_ctx = sp_check_and_create_inode_ctx (this, loc->inode,
- SP_DONT_EXPECT,
- GF_FOP_SYMLINK);
- if (inode_ctx == NULL) {
- op_errno = ENOMEM;
- goto out;
- }
-
- need_unwind = 0;
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (symlink, frame, -1, op_errno, NULL, NULL, NULL,
- NULL);
- } else {
- STACK_WIND (frame, sp_new_entry_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->symlink, linkpath, loc);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
-{
- SP_STACK_UNWIND (link, frame, op_ret, op_errno, inode, buf, preparent,
- postparent);
- return 0;
-}
-
-int32_t
-sp_link_helper (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc)
-{
- uint64_t value = 0;
- sp_inode_ctx_t *inode_ctx = NULL;
- int32_t ret = 0, op_ret = -1, op_errno = -1;
-
- ret = inode_ctx_get (oldloc->inode, this, &value);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "context not set in inode "
- "(%p)", oldloc->inode);
- op_errno = EINVAL;
- goto unwind;
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long) value;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, unwind, op_errno,
- EINVAL);
-
- LOCK (&inode_ctx->lock);
- {
- op_ret = inode_ctx->op_ret;
- op_errno = inode_ctx->op_errno;
- }
- UNLOCK (&inode_ctx->lock);
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- STACK_WIND (frame, sp_link_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link, oldloc, newloc);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-sp_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
-{
- call_stub_t *stub = NULL;
- sp_cache_t *cache = NULL;
- int32_t ret = 0, op_errno = -1;
- char can_wind = 0, need_lookup = 0, need_unwind = 1;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, newloc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, newloc->path, out,
- op_errno, EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, newloc->name, out,
- op_errno, EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, newloc->inode, out,
- op_errno, EINVAL);
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, oldloc->name, out,
- op_errno, EINVAL);
-
- ret = sp_cache_remove_parent_entry (frame, this, newloc->parent->table,
- (char *)newloc->path);
- if (ret == -1) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- cache = sp_get_cache_inode (this, oldloc->parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, (char *)oldloc->name, 0);
- sp_cache_unref (cache);
- }
-
- stub = fop_link_stub (frame, sp_link_helper, oldloc, newloc);
- if (stub == NULL) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- sp_process_inode_ctx (frame, this, oldloc, stub, &need_unwind,
- &need_lookup, &can_wind, &op_errno, GF_FOP_LINK);
-
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL,
- NULL);
- } else if (need_lookup) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, oldloc, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, sp_link_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link, oldloc, newloc);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_truncate_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- off_t offset)
-{
- uint64_t value = 0;
- sp_inode_ctx_t *inode_ctx = NULL;
- int32_t ret = 0, op_ret = -1, op_errno = -1;
-
- ret = inode_ctx_get (loc->inode, this, &value);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "context not set in inode "
- "(%p)", loc->inode);
- op_errno = EINVAL;
- goto unwind;
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long) value;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, unwind, op_errno,
- EINVAL);
-
- LOCK (&inode_ctx->lock);
- {
- op_ret = inode_ctx->op_ret;
- op_errno = inode_ctx->op_errno;
- }
- UNLOCK (&inode_ctx->lock);
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- STACK_WIND (frame, sp_truncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate, loc, offset);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-sp_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
-{
- sp_cache_t *cache = NULL;
- int32_t op_errno = -1;
- call_stub_t *stub = NULL;
- char can_wind = 0, need_lookup = 0, need_unwind = 1;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->name, out, op_errno,
- EINVAL);
-
- cache = sp_get_cache_inode (this, loc->parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, (char *)loc->name, 0);
- sp_cache_unref (cache);
- }
-
- stub = fop_truncate_stub (frame, sp_truncate_helper, loc, offset);
- if (stub == NULL) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- sp_process_inode_ctx (frame, this, loc, stub, &need_unwind,
- &need_lookup, &can_wind, &op_errno,
- GF_FOP_TRUNCATE);
-
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL);
- } else if (need_lookup) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, sp_truncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate, loc, offset);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
-{
- sp_fd_ctx_t *fd_ctx = NULL;
- sp_cache_t *cache = NULL;
- uint64_t value = 0;
- int32_t ret = 0;
- inode_t *parent = NULL;
- char *name = NULL;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == -1) {
- errno = EINVAL;
- goto unwind;
- }
-
- fd_ctx = (void *)(long)value;
- name = fd_ctx->name;
- parent = fd_ctx->parent_inode;
-
- cache = sp_get_cache_inode (this, parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, name, 0);
- sp_cache_unref (cache);
- }
-
- STACK_WIND (frame, sp_truncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate, fd, offset);
- return 0;
-
-unwind:
- SP_STACK_UNWIND (ftruncate, frame, -1, errno, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-sp_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *prestat, struct iatt *poststat)
-{
- SP_STACK_UNWIND (setattr, frame, op_ret, op_errno, prestat, poststat);
- return 0;
-}
-
-
-int
-sp_setattr_helper (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct iatt *buf, int32_t valid)
-{
- uint64_t value = 0;
- sp_inode_ctx_t *inode_ctx = NULL;
- int32_t ret = 0, op_ret = -1, op_errno = -1;
-
- ret = inode_ctx_get (loc->inode, this, &value);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "context not set in inode "
- "(%p)", loc->inode);
- op_errno = EINVAL;
- goto unwind;
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long) value;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, unwind, op_errno,
- EINVAL);
-
- LOCK (&inode_ctx->lock);
- {
- op_ret = inode_ctx->op_ret;
- op_errno = inode_ctx->op_errno;
- }
- UNLOCK (&inode_ctx->lock);
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- STACK_WIND (frame, sp_setattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setattr, loc, buf, valid);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-
-int
-sp_setattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct iatt *buf, int32_t valid)
-{
- sp_cache_t *cache = NULL;
- int32_t op_errno = -1;
- call_stub_t *stub = NULL;
- char can_wind = 0, need_lookup = 0, need_unwind = 1;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->name, out, op_errno,
- EINVAL);
-
- cache = sp_get_cache_inode (this, loc->parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, (char *)loc->name, 0);
- sp_cache_unref (cache);
- }
-
- stub = fop_setattr_stub (frame, sp_setattr_helper, loc, buf, valid);
- if (stub == NULL) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- sp_process_inode_ctx (frame, this, loc, stub, &need_unwind,
- &need_lookup, &can_wind, &op_errno,
- GF_FOP_SETATTR);
-
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL);
- } else if (need_lookup) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, sp_setattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setattr, loc, buf, valid);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, const char *path,
- struct iatt *buf)
-{
- SP_STACK_UNWIND (readlink, frame, op_ret, op_errno, path, buf);
- return 0;
-}
-
-
-int32_t
-sp_readlink_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- size_t size)
-{
- uint64_t value = 0;
- sp_inode_ctx_t *inode_ctx = NULL;
- int32_t ret = 0, op_ret = -1, op_errno = -1;
-
- ret = inode_ctx_get (loc->inode, this, &value);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "context not set in inode "
- "(%p)", loc->inode);
- op_errno = EINVAL;
- goto unwind;
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long) value;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, unwind, op_errno,
- EINVAL);
-
- LOCK (&inode_ctx->lock);
- {
- op_ret = inode_ctx->op_ret;
- op_errno = inode_ctx->op_errno;
- }
- UNLOCK (&inode_ctx->lock);
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- STACK_WIND (frame, sp_readlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readlink, loc, size);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (readlink, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-sp_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size)
-{
- sp_cache_t *cache = NULL;
- int32_t op_errno = -1;
- call_stub_t *stub = NULL;
- char can_wind = 0, need_lookup = 0, need_unwind = 1;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->name, out,
- op_errno, EINVAL);
-
- cache = sp_get_cache_inode (this, loc->parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, (char *)loc->name, 0);
- sp_cache_unref (cache);
- }
-
- stub = fop_readlink_stub (frame, sp_readlink_helper, loc, size);
- if (stub == NULL) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- sp_process_inode_ctx (frame, this, loc, stub, &need_unwind,
- &need_lookup, &can_wind, &op_errno,
- GF_FOP_READLINK);
-
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (readlink, frame, -1, op_errno, NULL, NULL);
- } else if (need_lookup) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, sp_readlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readlink, loc, size);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
-{
- SP_STACK_UNWIND (unlink, frame, op_ret, op_errno, preparent,
- postparent);
- return 0;
-}
-
-
-
-int32_t
-sp_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- SP_STACK_UNWIND (setxattr, frame, op_ret, op_errno);
- return 0;
-}
-
-
-int32_t
-sp_unlink_helper (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- uint64_t value = 0;
- sp_inode_ctx_t *inode_ctx = NULL;
- int32_t ret = 0, op_ret = -1, op_errno = -1;
-
- ret = inode_ctx_get (loc->inode, this, &value);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "context not set in inode "
- "(%p)", loc->inode);
- op_errno = EINVAL;
- goto unwind;
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long) value;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, unwind, op_errno,
- EINVAL);
-
- LOCK (&inode_ctx->lock);
- {
- op_ret = inode_ctx->op_ret;
- op_errno = inode_ctx->op_errno;
- }
- UNLOCK (&inode_ctx->lock);
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- STACK_WIND (frame, sp_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, loc);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-sp_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- sp_cache_t *cache = NULL;
- int32_t ret = -1, op_errno = -1;
- call_stub_t *stub = NULL;
- char can_wind = 0, need_lookup = 0, need_unwind = 1;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->name, out, op_errno,
- EINVAL);
-
- cache = sp_get_cache_inode (this, loc->parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, (char *)loc->name, 0);
- sp_cache_unref (cache);
- }
-
- ret = sp_cache_remove_parent_entry (frame, this, loc->parent->table,
- (char *)loc->path);
- if (ret == -1) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- stub = fop_unlink_stub (frame, sp_unlink_helper, loc);
- if (stub == NULL) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- sp_process_inode_ctx (frame, this, loc, stub, &need_unwind,
- &need_lookup, &can_wind, &op_errno,
- GF_FOP_UNLINK);
-
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL);
- } else if (need_lookup) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, sp_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, loc);
- }
-
- return 0;
-}
-
-
-void
-sp_remove_caches_from_all_fds_opened (xlator_t *this, inode_t *inode)
-{
- fd_t *fd = NULL;
- sp_cache_t *cache = NULL;
-
- LOCK (&inode->lock);
- {
- list_for_each_entry (fd, &inode->fd_list, inode_list) {
- cache = sp_get_cache_fd (this, fd);
- if (cache) {
- sp_cache_remove_entry (cache, NULL, 1);
- sp_cache_unref (cache);
- }
- }
- }
- UNLOCK (&inode->lock);
-}
-
-
-int32_t
-sp_rmdir_helper (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- uint64_t value = 0;
- sp_inode_ctx_t *inode_ctx = NULL;
- int32_t ret = 0, op_ret = -1, op_errno = -1;
-
- ret = inode_ctx_get (loc->inode, this, &value);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "context not set in inode "
- "(%p)", loc->inode);
- op_errno = EINVAL;
- goto unwind;
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long) value;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, unwind, op_errno,
- EINVAL);
-
- LOCK (&inode_ctx->lock);
- {
- op_ret = inode_ctx->op_ret;
- op_errno = inode_ctx->op_errno;
- }
- UNLOCK (&inode_ctx->lock);
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- STACK_WIND (frame, sp_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir, loc);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-sp_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- sp_cache_t *cache = NULL;
- int32_t ret = -1, op_errno = -1;
- call_stub_t *stub = NULL;
- char can_wind = 0, need_lookup = 0, need_unwind = 1;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->name, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->path, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->inode, out,
- op_errno, EINVAL);
-
- sp_remove_caches_from_all_fds_opened (this, loc->inode);
-
- cache = sp_get_cache_inode (this, loc->parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, (char *)loc->name, 0);
- sp_cache_unref (cache);
- }
-
- ret = sp_cache_remove_parent_entry (frame, this, loc->inode->table,
- (char *)loc->path);
- if (ret == -1) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- stub = fop_rmdir_stub (frame, sp_rmdir_helper, loc);
- if (stub == NULL) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- sp_process_inode_ctx (frame, this, loc, stub, &need_unwind,
- &need_lookup, &can_wind, &op_errno,
- GF_FOP_RMDIR);
-
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL);
- } else if (need_lookup) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, sp_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir, loc);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iovec *vector, int32_t count,
- struct iatt *stbuf, struct iobref *iobref)
-{
- SP_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count, stbuf,
- iobref);
- return 0;
-}
-
-
-int32_t
-sp_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
-{
- sp_fd_ctx_t *fd_ctx = NULL;
- sp_cache_t *cache = NULL;
- uint64_t value = 0;
- int32_t ret = 0;
- inode_t *parent = NULL;
- char *name = NULL;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == -1) {
- errno = EINVAL;
- goto unwind;
- }
-
- fd_ctx = (void *)(long)value;
- name = fd_ctx->name;
- parent = fd_ctx->parent_inode;
-
- cache = sp_get_cache_inode (this, parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, name, 0);
- sp_cache_unref (cache);
- }
-
- STACK_WIND (frame, sp_readv_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv, fd, size, offset);
- return 0;
-
-unwind:
- SP_STACK_UNWIND (readv, frame, -1, errno, NULL, -1, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-sp_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
- int32_t count, off_t off, struct iobref *iobref)
-{
- sp_fd_ctx_t *fd_ctx = NULL;
- sp_cache_t *cache = NULL;
- uint64_t value = 0;
- int32_t ret = 0;
- inode_t *parent = NULL;
- char *name = NULL;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == -1) {
- errno = EINVAL;
- goto unwind;
- }
-
- fd_ctx = (void *)(long)value;
- name = fd_ctx->name;
- parent = fd_ctx->parent_inode;
-
- cache = sp_get_cache_inode (this, parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, name, 0);
- sp_cache_unref (cache);
- }
-
- STACK_WIND (frame, sp_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev, fd, vector, count, off,
- iobref);
- return 0;
-
-unwind:
- SP_STACK_UNWIND (writev, frame, -1, errno, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-sp_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
-{
- sp_fd_ctx_t *fd_ctx = NULL;
- sp_cache_t *cache = NULL;
- uint64_t value = 0;
- int32_t ret = 0;
- inode_t *parent = NULL;
- char *name = NULL;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == -1) {
- errno = EINVAL;
- goto unwind;
- }
-
- fd_ctx = (void *)(long)value;
- name = fd_ctx->name;
- parent = fd_ctx->parent_inode;
-
- cache = sp_get_cache_inode (this, parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, name, 0);
- sp_cache_unref (cache);
- }
-
- STACK_WIND (frame, sp_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsync, fd, flags);
- return 0;
-
-unwind:
- SP_STACK_UNWIND (fsync, frame, -1, errno, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-sp_rename_helper (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc)
-{
- uint64_t value = 0;
- char need_unwind = 0;
- char can_wind = 0;
- int32_t ret = 0, op_errno = -1;
- int32_t old_op_ret = -1, old_op_errno = -1;
- int32_t new_op_ret = -1, new_op_errno = -1;
- char old_inode_looked_up = 0, new_inode_looked_up = 0;
- sp_inode_ctx_t *old_inode_ctx = NULL, *new_inode_ctx = NULL;
-
- ret = inode_ctx_get (oldloc->inode, this, &value);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "context not set in inode "
- "(%p)", oldloc->inode);
- op_errno = EINVAL;
- goto unwind;
- }
-
- old_inode_ctx = (sp_inode_ctx_t *)(long) value;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, old_inode_ctx, unwind,
- op_errno, EINVAL);
-
- LOCK (&old_inode_ctx->lock);
- {
- old_inode_looked_up = old_inode_ctx->looked_up;
- old_op_ret = old_inode_ctx->op_ret;
- old_op_errno = old_inode_ctx->op_errno;
- need_unwind = old_inode_ctx->need_unwind;
- }
- UNLOCK (&old_inode_ctx->lock);
-
- if (need_unwind) {
- /* there was an error while queuing up lookup stub for newloc */
- goto unwind;
- }
-
- if (newloc->inode != NULL) {
- ret = inode_ctx_get (newloc->inode, this, &value);
- if (ret == 0) {
- new_inode_ctx = (sp_inode_ctx_t *)(long)value;
- if (new_inode_ctx != NULL) {
- LOCK (&new_inode_ctx->lock);
- {
- new_inode_looked_up = new_inode_ctx->looked_up;
- new_op_ret = new_inode_ctx->op_ret;
- new_op_errno = new_inode_ctx->op_errno;
- }
- UNLOCK (&new_inode_ctx->lock);
- }
- }
- }
-
- if (new_inode_ctx == NULL) {
- if (old_op_ret == -1) {
- op_errno = old_op_errno;
- goto unwind;
- } else {
- can_wind = 1;
- }
- } else {
- if (new_inode_looked_up && old_inode_looked_up) {
- if ((old_op_ret == -1)
- || ((new_op_ret == -1)
- && (new_op_errno != ENOENT))) {
- if (old_op_ret == -1) {
- op_errno = old_op_errno;
- } else {
- op_errno = new_op_errno;
- }
-
- goto unwind;
- } else {
- can_wind = 1;
- }
- }
- }
-
- if (can_wind) {
- STACK_WIND (frame, sp_rename_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename, oldloc, newloc);
- }
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL,
- NULL);
- return 0;
-}
-
-
-int32_t
-sp_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,loc_t *newloc)
-{
- char need_unwind = 1;
- uint64_t value = 0;
- call_stub_t *stub = NULL;
- sp_cache_t *cache = NULL;
- sp_inode_ctx_t *inode_ctx = NULL;
- int32_t ret = -1, op_errno = -1;
- char old_inode_can_wind = 0, new_inode_can_wind = 0;
- char old_inode_need_lookup = 0, new_inode_need_lookup = 0;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, oldloc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, oldloc->path, out,
- op_errno, EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, oldloc->name, out,
- op_errno, EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, oldloc->inode, out,
- op_errno, EINVAL);
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, newloc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, newloc->path, out,
- op_errno, EINVAL);
-
- cache = sp_get_cache_inode (this, oldloc->parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, (char *)oldloc->name, 0);
- sp_cache_unref (cache);
- }
-
- cache = sp_get_cache_inode (this, newloc->parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, (char *)newloc->name, 0);
- sp_cache_unref (cache);
- }
-
- ret = sp_cache_remove_parent_entry (frame, this, oldloc->parent->table,
- (char *)oldloc->path);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- ret = sp_cache_remove_parent_entry (frame, this, newloc->parent->table,
- (char *)newloc->path);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- if (IA_ISDIR (oldloc->inode->ia_type)) {
- sp_remove_caches_from_all_fds_opened (this, oldloc->inode);
- }
-
- stub = fop_rename_stub (frame, sp_rename_helper, oldloc, newloc);
- if (stub == NULL) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- ret = sp_process_inode_ctx (frame, this, oldloc, stub, &need_unwind,
- &old_inode_need_lookup, &old_inode_can_wind,
- &op_errno, GF_FOP_RENAME);
- if (ret == -1) {
- goto out;
- }
-
- if (newloc->inode != NULL) {
- stub = fop_rename_stub (frame, sp_rename_helper, oldloc,
- newloc);
- if (stub == NULL) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- ret = sp_process_inode_ctx (frame, this, newloc, stub,
- &need_unwind,
- &new_inode_need_lookup,
- &new_inode_can_wind, &op_errno,
- GF_FOP_RENAME);
- if (ret == -1) {
- ret = inode_ctx_get (oldloc->inode, this, &value);
- if (ret == -1) {
- goto out;
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long)value;
- if (inode_ctx == NULL) {
- goto out;
- }
-
- LOCK (&inode_ctx->lock);
- {
- if (!inode_ctx->looked_up) {
- /* unwind in sp_rename_helper */
- need_unwind = 0;
- inode_ctx->need_unwind = 1;
- }
- }
- UNLOCK (&inode_ctx->lock);
- }
-
- } else {
- new_inode_can_wind = 1;
- }
-
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL,
- NULL, NULL);
- } else if (old_inode_need_lookup || new_inode_need_lookup) {
- if (old_inode_need_lookup) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, oldloc,
- NULL);
- }
-
- if (new_inode_need_lookup) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, newloc,
- NULL);
- }
- } else if (old_inode_can_wind && new_inode_can_wind) {
- STACK_WIND (frame, sp_rename_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename, oldloc, newloc);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_setxattr_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *dict, int32_t flags)
-{
- uint64_t value = 0;
- sp_inode_ctx_t *inode_ctx = NULL;
- int32_t ret = 0, op_ret = -1, op_errno = -1;
-
- ret = inode_ctx_get (loc->inode, this, &value);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "context not set in inode "
- "(%p)", loc->inode);
- op_errno = EINVAL;
- goto unwind;
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long) value;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, unwind, op_errno,
- EINVAL);
-
- LOCK (&inode_ctx->lock);
- {
- op_ret = inode_ctx->op_ret;
- op_errno = inode_ctx->op_errno;
- }
- UNLOCK (&inode_ctx->lock);
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- STACK_WIND (frame, sp_err_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr, loc, dict,
- flags);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (setxattr, frame, -1, op_errno);
- return 0;
-}
-
-
-int32_t
-sp_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
- int32_t flags)
-{
- sp_cache_t *cache = NULL;
- int32_t op_errno = -1;
- call_stub_t *stub = NULL;
- char can_wind = 0, need_lookup = 0, need_unwind = 1;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->name, out, op_errno,
- EINVAL);
-
- cache = sp_get_cache_inode (this, loc->parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, (char *)loc->name, 0);
- sp_cache_unref (cache);
- }
-
- stub = fop_setxattr_stub (frame, sp_setxattr_helper, loc, dict, flags);
- if (stub == NULL) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- sp_process_inode_ctx (frame, this, loc, stub, &need_unwind,
- &need_lookup, &can_wind, &op_errno,
- GF_FOP_SETXATTR);
-
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (setxattr, frame, -1, op_errno);
- } else if (need_lookup) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, sp_err_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr, loc, dict,
- flags);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_removexattr_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
-{
- uint64_t value = 0;
- sp_inode_ctx_t *inode_ctx = NULL;
- int32_t ret = 0, op_ret = -1, op_errno = -1;
-
- ret = inode_ctx_get (loc->inode, this, &value);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "context not set in inode "
- "(%p)", loc->inode);
- op_errno = EINVAL;
- goto unwind;
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long) value;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, unwind, op_errno,
- EINVAL);
-
- LOCK (&inode_ctx->lock);
- {
- op_ret = inode_ctx->op_ret;
- op_errno = inode_ctx->op_errno;
- }
- UNLOCK (&inode_ctx->lock);
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- STACK_WIND (frame, sp_err_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr, loc, name);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (removexattr, frame, -1, op_errno);
- return 0;
-}
-
-
-int32_t
-sp_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
-{
- sp_cache_t *cache = NULL;
- int32_t op_errno = -1;
- call_stub_t *stub = NULL;
- char can_wind = 0, need_lookup = 0, need_unwind = 1;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->name, out, op_errno,
- EINVAL);
-
- cache = sp_get_cache_inode (this, loc->parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, (char *)loc->name, 0);
- sp_cache_unref (cache);
- }
-
- stub = fop_removexattr_stub (frame, sp_removexattr_helper, loc, name);
- if (stub == NULL) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- sp_process_inode_ctx (frame, this, loc, stub, &need_unwind,
- &need_lookup, &can_wind, &op_errno,
- GF_FOP_REMOVEXATTR);
-
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (removexattr, frame, -1, op_errno);
- } else if (need_lookup) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, sp_err_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr, loc, name);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
-{
- STACK_UNWIND (frame, op_ret, op_errno, dict);
- return 0;
-}
-
-
-int32_t
-sp_getxattr_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
-{
- uint64_t value = 0;
- sp_inode_ctx_t *inode_ctx = NULL;
- int32_t ret = 0, op_ret = -1, op_errno = -1;
-
- ret = inode_ctx_get (loc->inode, this, &value);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "context not set in inode "
- "(%p)", loc->inode);
- op_errno = EINVAL;
- goto unwind;
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long) value;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, unwind, op_errno,
- EINVAL);
-
- LOCK (&inode_ctx->lock);
- {
- op_ret = inode_ctx->op_ret;
- op_errno = inode_ctx->op_errno;
- }
- UNLOCK (&inode_ctx->lock);
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- STACK_WIND (frame, sp_getxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, loc, name);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL);
- return 0;
-}
-
-
-int32_t
-sp_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name)
-{
- int32_t op_errno = -1;
- call_stub_t *stub = NULL;
- char can_wind = 0, need_lookup = 0, need_unwind = 1;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->inode, out,
- op_errno, EINVAL);
-
- stub = fop_getxattr_stub (frame, sp_getxattr_helper, loc, name);
- if (stub == NULL) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- sp_process_inode_ctx (frame, this, loc, stub, &need_unwind,
- &need_lookup, &can_wind, &op_errno,
- GF_FOP_GETXATTR);
-
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL);
- } else if (need_lookup) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, sp_getxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, loc, name);
- }
-
- return 0;
-}
-
-int32_t
-sp_checksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, uint8_t *file_checksum,
- uint8_t *dir_checksum)
-{
- SP_STACK_UNWIND (checksum, frame, op_ret, op_errno, file_checksum,
- dir_checksum);
- return 0;
-}
-
-
-int32_t
-sp_checksum_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flag)
-{
- uint64_t value = 0;
- sp_inode_ctx_t *inode_ctx = NULL;
- int32_t ret = 0, op_ret = -1, op_errno = -1;
-
- ret = inode_ctx_get (loc->inode, this, &value);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "context not set in inode "
- "(%p)", loc->inode);
- op_errno = EINVAL;
- goto unwind;
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long) value;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, unwind, op_errno,
- EINVAL);
-
- LOCK (&inode_ctx->lock);
- {
- op_ret = inode_ctx->op_ret;
- op_errno = inode_ctx->op_errno;
- }
- UNLOCK (&inode_ctx->lock);
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- STACK_WIND (frame, sp_checksum_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->checksum, loc, flag);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (checksum, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-sp_checksum (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flag)
-{
- sp_cache_t *cache = NULL;
- int32_t op_errno = -1;
- call_stub_t *stub = NULL;
- char can_wind = 0, need_lookup = 0, need_unwind = 1;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->name, out,
- op_errno, EINVAL);
-
- cache = sp_get_cache_inode (this, loc->parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, (char *)loc->name, 0);
- sp_cache_unref (cache);
- }
-
- stub = fop_checksum_stub (frame, sp_checksum_helper, loc, flag);
- if (stub == NULL) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- sp_process_inode_ctx (frame, this, loc, stub, &need_unwind,
- &need_lookup, &can_wind, &op_errno,
- GF_FOP_CHECKSUM);
-
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (checksum, frame, -1, op_errno, NULL, NULL);
- } else if (need_lookup) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, sp_checksum_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->checksum, loc, flag);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
-{
- SP_STACK_UNWIND (xattrop, frame, op_ret, op_errno, dict);
- return 0;
-}
-
-
-int32_t
-sp_xattrop_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict)
-{
- uint64_t value = 0;
- sp_inode_ctx_t *inode_ctx = NULL;
- int32_t ret = 0, op_ret = -1, op_errno = -1;
-
- ret = inode_ctx_get (loc->inode, this, &value);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "context not set in inode "
- "(%p)", loc->inode);
- op_errno = EINVAL;
- goto unwind;
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long) value;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, unwind, op_errno,
- EINVAL);
-
- LOCK (&inode_ctx->lock);
- {
- op_ret = inode_ctx->op_ret;
- op_errno = inode_ctx->op_errno;
- }
- UNLOCK (&inode_ctx->lock);
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- STACK_WIND (frame, sp_xattrop_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->xattrop, loc, flags, dict);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL);
- return 0;
-}
-
-
-int32_t
-sp_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict)
-{
- sp_cache_t *cache = NULL;
- int32_t op_errno = -1;
- call_stub_t *stub = NULL;
- char can_wind = 0, need_lookup = 0, need_unwind = 1;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->name, out, op_errno,
- EINVAL);
-
- cache = sp_get_cache_inode (this, loc->parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, (char *)loc->name, 0);
- sp_cache_unref (cache);
- }
-
- stub = fop_xattrop_stub (frame, sp_xattrop_helper, loc, flags, dict);
- if (stub == NULL) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- sp_process_inode_ctx (frame, this, loc, stub, &need_unwind,
- &need_lookup, &can_wind, &op_errno,
- GF_FOP_XATTROP);
-
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL);
- } else if (need_lookup) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, sp_xattrop_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->xattrop, loc, flags, dict);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t flags, dict_t *dict)
-{
- sp_fd_ctx_t *fd_ctx = NULL;
- sp_cache_t *cache = NULL;
- uint64_t value = 0;
- int32_t ret = 0;
- inode_t *parent = NULL;
- char *name = NULL;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == -1) {
- errno = EINVAL;
- goto unwind;
- }
-
- fd_ctx = (void *)(long)value;
- name = fd_ctx->name;
- parent = fd_ctx->parent_inode;
-
- cache = sp_get_cache_inode (this, parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, name, 0);
- sp_cache_unref (cache);
- }
-
- STACK_WIND (frame, sp_xattrop_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fxattrop, fd, flags, dict);
- return 0;
-
-unwind:
- SP_STACK_UNWIND (xattrop, frame, -1, errno, NULL);
- return 0;
-}
-
-int32_t
-sp_stbuf_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iatt *buf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-
-int32_t
-sp_stat_helper (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- uint64_t value = 0;
- sp_inode_ctx_t *inode_ctx = NULL;
- int32_t ret = 0, op_ret = -1, op_errno = -1;
-
- ret = inode_ctx_get (loc->inode, this, &value);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "context not set in inode "
- "(%p)", loc->inode);
- op_errno = EINVAL;
- goto unwind;
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long) value;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, unwind, op_errno,
- EINVAL);
-
- LOCK (&inode_ctx->lock);
- {
- op_ret = inode_ctx->op_ret;
- op_errno = inode_ctx->op_errno;
- }
- UNLOCK (&inode_ctx->lock);
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- STACK_WIND (frame, sp_stbuf_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat, loc);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (stat, frame, -1, op_errno, NULL);
- return 0;
-}
-
-
-int32_t
-sp_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- int32_t op_errno = -1;
- call_stub_t *stub = NULL;
- char can_wind = 0, need_lookup = 0, need_unwind = 1;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->inode, out,
- op_errno, EINVAL);
-
- stub = fop_stat_stub (frame, sp_stat_helper, loc);
- if (stub == NULL) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- sp_process_inode_ctx (frame, this, loc, stub, &need_unwind,
- &need_lookup, &can_wind, &op_errno,
- GF_FOP_STAT);
-
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (stat, frame, -1, op_errno, NULL);
- } else if (need_lookup) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, sp_stbuf_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat, loc);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_access_helper (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)
-{
- uint64_t value = 0;
- sp_inode_ctx_t *inode_ctx = NULL;
- int32_t ret = 0, op_ret = -1, op_errno = -1;
-
- ret = inode_ctx_get (loc->inode, this, &value);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "context not set in inode "
- "(%p)", loc->inode);
- op_errno = EINVAL;
- goto unwind;
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long) value;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, unwind, op_errno,
- EINVAL);
-
- LOCK (&inode_ctx->lock);
- {
- op_ret = inode_ctx->op_ret;
- op_errno = inode_ctx->op_errno;
- }
- UNLOCK (&inode_ctx->lock);
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- STACK_WIND (frame, sp_err_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->access, loc, mask);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (access, frame, -1, op_errno);
- return 0;
-}
-
-
-int32_t
-sp_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)
-{
- int32_t op_errno = -1;
- call_stub_t *stub = NULL;
- char can_wind = 0, need_lookup = 0, need_unwind = 1;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->inode, out,
- op_errno, EINVAL);
-
- stub = fop_access_stub (frame, sp_access_helper, loc, mask);
- if (stub == NULL) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- sp_process_inode_ctx (frame, this, loc, stub, &need_unwind,
- &need_lookup, &can_wind, &op_errno,
- GF_FOP_ACCESS);
-
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (access, frame, -1, op_errno);
- } else if (need_lookup) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, sp_err_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->access, loc, mask);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_inodelk_helper (call_frame_t *frame, xlator_t *this, const char *volume,
- loc_t *loc, int32_t cmd, struct flock *lock)
-{
- uint64_t value = 0;
- sp_inode_ctx_t *inode_ctx = NULL;
- int32_t ret = 0, op_ret = -1, op_errno = -1;
-
- ret = inode_ctx_get (loc->inode, this, &value);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "context not set in inode "
- "(%p)", loc->inode);
- op_errno = EINVAL;
- goto unwind;
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long) value;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, unwind, op_errno,
- EINVAL);
-
- LOCK (&inode_ctx->lock);
- {
- op_ret = inode_ctx->op_ret;
- op_errno = inode_ctx->op_errno;
- }
- UNLOCK (&inode_ctx->lock);
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- STACK_WIND (frame, sp_err_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->inodelk, volume, loc, cmd, lock);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (inodelk, frame, -1, op_errno);
- return 0;
-}
-
-
-int32_t
-sp_inodelk (call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
- int32_t cmd, struct flock *lock)
-{
- int32_t op_errno = -1;
- call_stub_t *stub = NULL;
- char can_wind = 0, need_lookup = 0, need_unwind = 1;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->inode, out,
- op_errno, EINVAL);
-
- stub = fop_inodelk_stub (frame, sp_inodelk_helper, volume, loc, cmd,
- lock);
- if (stub == NULL) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- sp_process_inode_ctx (frame, this, loc, stub, &need_unwind,
- &need_lookup, &can_wind, &op_errno,
- GF_FOP_INODELK);
-
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (inodelk, frame, -1, op_errno);
- } else if (need_lookup) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, sp_err_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->inodelk, volume, loc, cmd,
- lock);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_entrylk_helper (call_frame_t *frame, xlator_t *this, const char *volume,
- loc_t *loc, const char *basename, entrylk_cmd cmd,
- entrylk_type type)
-{
- uint64_t value = 0;
- sp_inode_ctx_t *inode_ctx = NULL;
- int32_t ret = 0, op_ret = -1, op_errno = -1;
-
- ret = inode_ctx_get (loc->inode, this, &value);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "context not set in inode "
- "(%p)", loc->inode);
- op_errno = EINVAL;
- goto unwind;
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long) value;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, unwind, op_errno,
- EINVAL);
-
- LOCK (&inode_ctx->lock);
- {
- op_ret = inode_ctx->op_ret;
- op_errno = inode_ctx->op_errno;
- }
- UNLOCK (&inode_ctx->lock);
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- STACK_WIND (frame, sp_err_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->entrylk, volume, loc, basename,
- cmd, type);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (entrylk, frame, -1, op_errno);
- return 0;
-}
-
-
-int32_t
-sp_entrylk (call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
- const char *basename, entrylk_cmd cmd, entrylk_type type)
-{
- int32_t op_errno = -1;
- call_stub_t *stub = NULL;
- char can_wind = 0, need_lookup = 0, need_unwind = 1;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->inode, out,
- op_errno, EINVAL);
-
- stub = fop_entrylk_stub (frame, sp_entrylk_helper, volume, loc,
- basename, cmd, type);
- if (stub == NULL) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- sp_process_inode_ctx (frame, this, loc, stub, &need_unwind,
- &need_lookup, &can_wind, &op_errno,
- GF_FOP_ENTRYLK);
-
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (entrylk, frame, -1, op_errno);
- } else if (need_lookup) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, sp_err_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->entrylk, volume, loc,
- basename, cmd, type);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_forget (xlator_t *this, inode_t *inode)
-{
- struct iatt *buf = NULL;
- uint64_t value = 0;
-
- inode_ctx_del (inode, this, &value);
-
- if (value) {
- buf = (void *)(long)value;
- GF_FREE (buf);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_release (xlator_t *this, fd_t *fd)
-{
- sp_fd_ctx_t *fd_ctx = NULL;
- uint64_t value = 0;
- int32_t ret = 0;
- sp_cache_t *cache = NULL;
-
- ret = fd_ctx_del (fd, this, &value);
- if (!ret) {
- fd_ctx = (void *)(long) value;
- cache = fd_ctx->cache;
- if (cache) {
- gf_log (this->name, GF_LOG_DEBUG, "cache hits: %lu, "
- "cache miss: %lu", cache->hits, cache->miss);
- }
-
- sp_fd_ctx_free (fd_ctx);
- }
-
- return 0;
-}
-
-int32_t
-mem_acct_init (xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init (this, gf_sp_mt_end + 1);
-
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
- }
-
- return ret;
-}
-
-int32_t
-init (xlator_t *this)
-{
- int32_t ret = -1;
- sp_private_t *priv = NULL;
-
- if (!this->children || this->children->next) {
- gf_log ("stat-prefetch",
- GF_LOG_ERROR,
- "FATAL: translator %s does not have exactly one child "
- "node", this->name);
- goto out;
- }
-
- priv = GF_CALLOC (1, sizeof(sp_private_t),
- gf_sp_mt_sp_private_t);
- LOCK_INIT (&priv->lock);
-
- this->private = priv;
-
- ret = 0;
-out:
- return ret;
-}
-
-void
-fini (xlator_t *this)
-{
- return;
-}
-
-
-struct xlator_fops fops = {
- .lookup = sp_lookup,
- .readdir = sp_readdir,
- .readdirp = sp_readdir,
- .open = sp_open,
- .create = sp_create,
- .opendir = sp_opendir,
- .mkdir = sp_mkdir,
- .mknod = sp_mknod,
- .symlink = sp_symlink,
- .link = sp_link,
- .truncate = sp_truncate,
- .ftruncate = sp_ftruncate,
- .readlink = sp_readlink,
- .unlink = sp_unlink,
- .rmdir = sp_rmdir,
- .readv = sp_readv,
- .writev = sp_writev,
- .fsync = sp_fsync,
- .rename = sp_rename,
- .setxattr = sp_setxattr,
- .removexattr = sp_removexattr,
- .checksum = sp_checksum,
- .xattrop = sp_xattrop,
- .fxattrop = sp_fxattrop,
- .setattr = sp_setattr,
- .stat = sp_stat,
- .access = sp_access,
- .getxattr = sp_getxattr,
- .inodelk = sp_inodelk,
- .entrylk = sp_entrylk,
-};
-
-struct xlator_cbks cbks = {
- .forget = sp_forget,
- .release = sp_release,
- .releasedir = sp_release
-};
diff --git a/xlators/performance/stat-prefetch/src/stat-prefetch.h b/xlators/performance/stat-prefetch/src/stat-prefetch.h
deleted file mode 100644
index 16edf76aa71..00000000000
--- a/xlators/performance/stat-prefetch/src/stat-prefetch.h
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- Copyright (c) 2009-2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _STAT_PREFETCH_H
-#define _STAT_PREFETCH_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "locking.h"
-#include "inode.h"
-#include "glusterfs.h"
-#include "dict.h"
-#include "xlator.h"
-#include "rbthash.h"
-#include "hashfn.h"
-#include "call-stub.h"
-#include "stat-prefetch-mem-types.h"
-#include <libgen.h>
-
-struct sp_cache {
- rbthash_table_t *table;
- xlator_t *this;
- uint64_t expected_offset; /* Offset where the next read will
- * happen.
- */
- gf_lock_t lock;
- unsigned long miss;
- unsigned long hits;
- uint32_t ref;
-};
-typedef struct sp_cache sp_cache_t;
-
-struct sp_fd_ctx {
- sp_cache_t *cache;
- inode_t *parent_inode; /*
- * inode corresponding to dirname (path)
- */
- char *name; /*
- * basename of path on which this fd is
- * opened
- */
-};
-typedef struct sp_fd_ctx sp_fd_ctx_t;
-
-struct sp_local {
- loc_t loc;
- fd_t *fd;
- char is_lookup;
-};
-typedef struct sp_local sp_local_t;
-
-struct sp_inode_ctx {
- char looked_up;
- char lookup_in_progress;
- char need_unwind;
- int32_t op_ret;
- int32_t op_errno;
- struct iatt stbuf;
- gf_lock_t lock;
- struct list_head waiting_ops;
-};
-typedef struct sp_inode_ctx sp_inode_ctx_t;
-
-struct sp_private {
- struct mem_pool *mem_pool;
- gf_lock_t lock;
-};
-typedef struct sp_private sp_private_t;
-
-void sp_local_free (sp_local_t *local);
-
-#define SP_STACK_UNWIND(op, frame, params ...) do { \
- sp_local_t *__local = frame->local; \
- frame->local = NULL; \
- STACK_UNWIND_STRICT (op, frame, params); \
- sp_local_free (__local); \
-} while (0)
-
-#define SP_STACK_DESTROY(frame) do { \
- sp_local_t *__local = frame->local; \
- frame->local = NULL; \
- STACK_DESTROY (frame->root); \
- sp_local_free (__local); \
-} while (0)
-
-#endif /* #ifndef _STAT_PREFETCH_H */
diff --git a/xlators/performance/symlink-cache/src/Makefile.am b/xlators/performance/symlink-cache/src/Makefile.am
index 06e85fc9216..cc36ea99b6a 100644
--- a/xlators/performance/symlink-cache/src/Makefile.am
+++ b/xlators/performance/symlink-cache/src/Makefile.am
@@ -1,12 +1,15 @@
xlator_LTLIBRARIES = symlink-cache.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/performance
-symlink_cache_la_LDFLAGS = -module -avoidversion
+symlink_cache_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
symlink_cache_la_SOURCES = symlink-cache.c
symlink_cache_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+noinst_HEADERS = symlink-cache-messages.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/performance/symlink-cache/src/symlink-cache-messages.h b/xlators/performance/symlink-cache/src/symlink-cache-messages.h
new file mode 100644
index 00000000000..89ea118d6b3
--- /dev/null
+++ b/xlators/performance/symlink-cache/src/symlink-cache-messages.h
@@ -0,0 +1,93 @@
+/*Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _SYMLINK_CACHE_MESSAGES_H_
+#define _SYMLINK_CACHE_MESSAGES_H_
+
+#include "glfs-message-id.h"
+
+/*! \file symlink_cache-messages.h
+ * \brief SYMLINK_CACHE log-message IDs and their descriptions
+ *
+ */
+
+/* NOTE: Rules for message additions
+ * 1) Each instance of a message is _better_ left with a unique message ID, even
+ * if the message format is the same. Reasoning is that, if the message
+ * format needs to change in one instance, the other instances are not
+ * impacted or the new change does not change the ID of the instance being
+ * modified.
+ * 2) Addition of a message,
+ * - Should increment the GLFS_NUM_MESSAGES
+ * - Append to the list of messages defined, towards the end
+ * - Retain macro naming as glfs_msg_X (for redability across developers)
+ * NOTE: Rules for message format modifications
+ * 3) Check acorss the code if the message ID macro in question is reused
+ * anywhere. If reused then then the modifications should ensure correctness
+ * everywhere, or needs a new message ID as (1) above was not adhered to. If
+ * not used anywhere, proceed with the required modification.
+ * NOTE: Rules for message deletion
+ * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used
+ * anywhere, then can be deleted, but will leave a hole by design, as
+ * addition rules specify modification to the end of the list and not filling
+ * holes.
+ */
+
+#define GLFS_SYMLINK_CACHE_BASE GLFS_MSGID_COMP_SYMLINK_CACHE
+#define GLFS_SYMLINK_CACHE_NUM_MESSAGES 5
+#define GLFS_MSGID_END (GLFS_SYMLINK_CACHE_BASE +\
+ GLFS_SYMLINK_CACHE_NUM_MESSAGES + 1)
+
+/* Messages with message IDs */
+#define glfs_msg_start_x GLFS_SYMLINK_CACHE_BASE, "Invalid: Start of messages"
+
+#define SYMLINK_CACHE_MSG_XLATOR_CHILD_MISCONFIGURED\
+ (GLFS_SYMLINK_CACHE_BASE + 1)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define SYMLINK_CACHE_MSG_VOL_MISCONFIGURED (GLFS_SYMLINK_CACHE_BASE + 2)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define SYMLINK_CACHE_MSG_NO_MEMORY (GLFS_SYMLINK_CACHE_BASE + 3)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define SYMLINK_CACHE_MSG_DICT_GET_FAILED (GLFS_SYMLINK_CACHE_BASE + 4)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define SYMLINK_CACHE_MSG_DICT_SET_FAILED (GLFS_SYMLINK_CACHE_BASE + 5)
+
+/*------------*/
+#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
+
+
+#endif /* _SYMLINK_CACHE_MESSAGES_H_ */
diff --git a/xlators/performance/symlink-cache/src/symlink-cache.c b/xlators/performance/symlink-cache/src/symlink-cache.c
index 5aaa0a8a7d6..596b4c53b7b 100644
--- a/xlators/performance/symlink-cache/src/symlink-cache.c
+++ b/xlators/performance/symlink-cache/src/symlink-cache.c
@@ -1,28 +1,14 @@
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "glusterfs.h"
#include "logging.h"
#include "dict.h"
@@ -31,6 +17,7 @@
#include "compat.h"
#include "compat-errno.h"
#include "common-utils.h"
+#include "symlink-cache-messages.h"
struct symlink_cache {
time_t ctime;
@@ -45,7 +32,8 @@ symlink_inode_ctx_get (inode_t *inode, xlator_t *this, void **ctx)
uint64_t tmp_ctx = 0;
ret = inode_ctx_get (inode, this, &tmp_ctx);
if (-1 == ret)
- gf_log (this->name, GF_LOG_ERROR, "dict get failed");
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ SYMLINK_CACHE_MSG_DICT_GET_FAILED, "dict get failed");
else
*ctx = (void *)(long)tmp_ctx;
@@ -59,7 +47,8 @@ symlink_inode_ctx_set (inode_t *inode, xlator_t *this, void *ctx)
int ret = 0;
ret = inode_ctx_put (inode, this, (uint64_t)(long) ctx);
if (-1 == ret)
- gf_log (this->name, GF_LOG_ERROR, "dict set failed");
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ SYMLINK_CACHE_MSG_DICT_SET_FAILED, "dict set failed");
return 0;
}
@@ -75,15 +64,15 @@ sc_cache_update (xlator_t *this, inode_t *inode, const char *link)
return 0;
if (!sc->readlink) {
- gf_log (this->name, GF_LOG_DEBUG,
- "updating cache: %s", link);
+ gf_msg_debug (this->name, 0,
+ "updating cache: %s", link);
sc->readlink = strdup (link);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "not updating existing cache: %s with %s",
- sc->readlink, link);
- }
+ } else
+ gf_msg_debug (this->name, 0,
+ "not updating existing cache: %s with %s",
+ sc->readlink, link);
+
return 0;
}
@@ -103,16 +92,17 @@ sc_cache_set (xlator_t *this, inode_t *inode, struct iatt *buf,
need_set = 1;
sc = CALLOC (1, sizeof (*sc));
if (!sc) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory :(");
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ SYMLINK_CACHE_MSG_NO_MEMORY,
+ "out of memory :(");
goto err;
}
}
if (sc->readlink) {
- gf_log (this->name, GF_LOG_DEBUG,
- "replacing old cache: %s with new cache: %s",
- sc->readlink, link);
+ gf_msg_debug (this->name, 0,
+ "replacing old cache: %s with new cache: %s",
+ sc->readlink, link);
FREE (sc->readlink);
sc->readlink = NULL;
}
@@ -120,24 +110,25 @@ sc_cache_set (xlator_t *this, inode_t *inode, struct iatt *buf,
if (link) {
sc->readlink = strdup (link);
if (!sc->readlink) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory :(");
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ SYMLINK_CACHE_MSG_NO_MEMORY,
+ "out of memory :(");
goto err;
}
}
sc->ctime = buf->ia_ctime;
- gf_log (this->name, GF_LOG_DEBUG,
- "setting symlink cache: %s", link);
+ gf_msg_debug (this->name, 0,
+ "setting symlink cache: %s", link);
if (need_set) {
ret = symlink_inode_ctx_set (inode, this, sc);
if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "could not set inode context (%s)",
- strerror (-ret));
+ gf_msg (this->name, GF_LOG_ERROR,
+ -ret, SYMLINK_CACHE_MSG_NO_MEMORY,
+ "could not set inode context ");
goto err;
}
}
@@ -146,8 +137,7 @@ sc_cache_set (xlator_t *this, inode_t *inode, struct iatt *buf,
err:
if (sc) {
- if (sc->readlink)
- FREE (sc->readlink);
+ FREE (sc->readlink);
sc->readlink = NULL;
FREE (sc);
}
@@ -166,8 +156,8 @@ sc_cache_flush (xlator_t *this, inode_t *inode)
return 0;
if (sc->readlink) {
- gf_log (this->name, GF_LOG_DEBUG,
- "flushing cache: %s", sc->readlink);
+ gf_msg_debug (this->name, 0,
+ "flushing cache: %s", sc->readlink);
FREE (sc->readlink);
sc->readlink = NULL;
@@ -196,8 +186,9 @@ sc_cache_validate (xlator_t *this, inode_t *inode, struct iatt *buf)
sc_cache_set (this, inode, buf, NULL);
inode_ctx_get (inode, this, &tmp_sc);
- if (!sc) {
- gf_log (this->name, GF_LOG_ERROR,
+ if (!tmp_sc) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ SYMLINK_CACHE_MSG_NO_MEMORY,
"out of memory :(");
return 0;
}
@@ -209,8 +200,8 @@ sc_cache_validate (xlator_t *this, inode_t *inode, struct iatt *buf)
/* STALE */
if (sc->readlink) {
- gf_log (this->name, GF_LOG_DEBUG,
- "flushing cache: %s", sc->readlink);
+ gf_msg_debug (this->name, 0,
+ "flushing cache: %s", sc->readlink);
FREE (sc->readlink);
sc->readlink = NULL;
@@ -242,7 +233,7 @@ sc_cache_get (xlator_t *this, inode_t *inode, char **link)
int
sc_readlink_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int op_ret, int op_errno,
- const char *link, struct iatt *sbuf)
+ const char *link, struct iatt *sbuf, dict_t *xdata)
{
if (op_ret > 0)
sc_cache_update (this, frame->local, link);
@@ -250,14 +241,15 @@ sc_readlink_cbk (call_frame_t *frame, void *cookie,
inode_unref (frame->local);
frame->local = NULL;
- STACK_UNWIND (frame, op_ret, op_errno, link, sbuf);
+ STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, link, sbuf,
+ xdata);
return 0;
}
int
sc_readlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, size_t size)
+ loc_t *loc, size_t size, dict_t *xdata)
{
char *link = NULL;
struct iatt buf = {0, };
@@ -266,16 +258,17 @@ sc_readlink (call_frame_t *frame, xlator_t *this,
if (link) {
/* cache hit */
- gf_log (this->name, GF_LOG_DEBUG,
- "cache hit %s -> %s",
- loc->path, link);
+ gf_msg_debug (this->name, 0,
+ "cache hit %s -> %s",
+ loc->path, link);
/*
libglusterfsclient, nfs or any other translators
using buf in readlink_cbk should be aware that @buf
is 0 filled
*/
- STACK_UNWIND (frame, strlen (link), 0, link, &buf);
+ STACK_UNWIND_STRICT (readlink, frame, strlen (link), 0, link,
+ &buf, NULL);
FREE (link);
return 0;
}
@@ -285,7 +278,7 @@ sc_readlink (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, sc_readlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readlink,
- loc, size);
+ loc, size, xdata);
return 0;
}
@@ -295,7 +288,7 @@ int
sc_symlink_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int op_ret, int op_errno,
inode_t *inode, struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
if (op_ret == 0) {
if (frame->local) {
@@ -303,22 +296,22 @@ sc_symlink_cbk (call_frame_t *frame, void *cookie,
}
}
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf, preparent,
- postparent);
+ STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
return 0;
}
int
sc_symlink (call_frame_t *frame, xlator_t *this,
- const char *dst, loc_t *src)
+ const char *dst, loc_t *src, mode_t umask, dict_t *xdata)
{
frame->local = strdup (dst);
STACK_WIND (frame, sc_symlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->symlink,
- dst, src);
+ dst, src, umask, xdata);
return 0;
}
@@ -327,7 +320,7 @@ sc_symlink (call_frame_t *frame, xlator_t *this,
int
sc_lookup_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int op_ret, int op_errno,
- inode_t *inode, struct iatt *buf, dict_t *xattr,
+ inode_t *inode, struct iatt *buf, dict_t *xdata,
struct iatt *postparent)
{
if (op_ret == 0)
@@ -335,19 +328,20 @@ sc_lookup_cbk (call_frame_t *frame, void *cookie,
else
sc_cache_flush (this, inode);
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf, xattr, postparent);
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf,
+ xdata, postparent);
return 0;
}
int
sc_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr_req)
+ loc_t *loc, dict_t *xdata)
{
STACK_WIND (frame, sc_lookup_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lookup,
- loc, xattr_req);
+ loc, xdata);
return 0;
}
@@ -363,21 +357,22 @@ sc_forget (xlator_t *this,
}
-int32_t
+int32_t
init (xlator_t *this)
{
-
if (!this->children || this->children->next)
{
- gf_log (this->name, GF_LOG_ERROR,
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ SYMLINK_CACHE_MSG_XLATOR_CHILD_MISCONFIGURED,
"FATAL: volume (%s) not configured with exactly one "
"child", this->name);
return -1;
}
if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ SYMLINK_CACHE_MSG_VOL_MISCONFIGURED,
+ "dangling volume. check volfile ");
}
return 0;
diff --git a/xlators/performance/write-behind/src/Makefile.am b/xlators/performance/write-behind/src/Makefile.am
index a5ebc90bdca..4de88eff90a 100644
--- a/xlators/performance/write-behind/src/Makefile.am
+++ b/xlators/performance/write-behind/src/Makefile.am
@@ -1,14 +1,15 @@
xlator_LTLIBRARIES = write-behind.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
-write_behind_la_LDFLAGS = -module -avoidversion
+write_behind_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
write_behind_la_SOURCES = write-behind.c
write_behind_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = write-behind-mem-types.h
+noinst_HEADERS = write-behind-mem-types.h write-behind-messages.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/performance/write-behind/src/write-behind-mem-types.h b/xlators/performance/write-behind/src/write-behind-mem-types.h
index 6184615195b..f64f429ce22 100644
--- a/xlators/performance/write-behind/src/write-behind-mem-types.h
+++ b/xlators/performance/write-behind/src/write-behind-mem-types.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -25,10 +16,10 @@
enum gf_wb_mem_types_ {
gf_wb_mt_wb_file_t = gf_common_mt_end + 1,
- gf_wb_mt_wb_local_t,
gf_wb_mt_wb_request_t,
gf_wb_mt_iovec,
gf_wb_mt_wb_conf_t,
+ gf_wb_mt_wb_inode_t,
gf_wb_mt_end
};
#endif
diff --git a/xlators/performance/write-behind/src/write-behind-messages.h b/xlators/performance/write-behind/src/write-behind-messages.h
new file mode 100644
index 00000000000..d0934cff5a4
--- /dev/null
+++ b/xlators/performance/write-behind/src/write-behind-messages.h
@@ -0,0 +1,121 @@
+/*Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _WRITE_BEHIND_MESSAGES_H_
+#define _WRITE_BEHIND_MESSAGES_H_
+
+#include "glfs-message-id.h"
+
+/*! \file write-behind-messages.h
+ * \brief WRITE_BEHIND log-message IDs and their descriptions
+ *
+ */
+
+/* NOTE: Rules for message additions
+ * 1) Each instance of a message is _better_ left with a unique message ID, even
+ * if the message format is the same. Reasoning is that, if the message
+ * format needs to change in one instance, the other instances are not
+ * impacted or the new change does not change the ID of the instance being
+ * modified.
+ * 2) Addition of a message,
+ * - Should increment the GLFS_NUM_MESSAGES
+ * - Append to the list of messages defined, towards the end
+ * - Retain macro naming as glfs_msg_X (for redability across developers)
+ * NOTE: Rules for message format modifications
+ * 3) Check acorss the code if the message ID macro in question is reused
+ * anywhere. If reused then then the modifications should ensure correctness
+ * everywhere, or needs a new message ID as (1) above was not adhered to. If
+ * not used anywhere, proceed with the required modification.
+ * NOTE: Rules for message deletion
+ * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used
+ * anywhere, then can be deleted, but will leave a hole by design, as
+ * addition rules specify modification to the end of the list and not filling
+ * holes.
+ */
+
+#define GLFS_WRITE_BEHIND_BASE GLFS_MSGID_COMP_WRITE_BEHIND
+#define GLFS_WRITE_BEHIND_NUM_MESSAGES 7
+#define GLFS_MSGID_END (GLFS_WRITE_BEHIND_BASE +\
+ GLFS_WRITE_BEHIND_NUM_MESSAGES + 1)
+
+/* Messages with message IDs */
+#define glfs_msg_start_x GLFS_WRITE_BEHIND_BASE, "Invalid: Start of messages"
+
+
+
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define WRITE_BEHIND_MSG_EXCEEDED_MAX_SIZE (GLFS_WRITE_BEHIND_BASE + 1)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define WRITE_BEHIND_MSG_INIT_FAILED (GLFS_WRITE_BEHIND_BASE + 2)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define WRITE_BEHIND_MSG_INVALID_ARGUMENT (GLFS_WRITE_BEHIND_BASE + 3)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define WRITE_BEHIND_MSG_NO_MEMORY (GLFS_WRITE_BEHIND_BASE + 4)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define WRITE_BEHIND_MSG_SIZE_NOT_SET (GLFS_WRITE_BEHIND_BASE + 5)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define WRITE_BEHIND_MSG_VOL_MISCONFIGURED (GLFS_WRITE_BEHIND_BASE + 6)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define WRITE_BEHIND_MSG_RES_UNAVAILABLE (GLFS_WRITE_BEHIND_BASE + 7)
+
+
+/*------------*/
+#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
+
+
+#endif /* _WRITE_BEHIND_MESSAGES_H_ */
diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c
index 4095527d828..98b448be332 100644
--- a/xlators/performance/write-behind/src/write-behind.c
+++ b/xlators/performance/write-behind/src/write-behind.c
@@ -1,29 +1,13 @@
/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-/*TODO: check for non null wb_file_data before getting wb_file */
-
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
#include "glusterfs.h"
#include "logging.h"
@@ -35,2638 +19,2503 @@
#include "common-utils.h"
#include "call-stub.h"
#include "statedump.h"
+#include "defaults.h"
#include "write-behind-mem-types.h"
+#include "write-behind-messages.h"
+
+#define MAX_VECTOR_COUNT 8
+#define WB_AGGREGATE_SIZE 131072 /* 128 KB */
+#define WB_WINDOW_SIZE 1048576 /* 1MB */
-#define MAX_VECTOR_COUNT 8
-#define WB_AGGREGATE_SIZE 131072 /* 128 KB */
-#define WB_WINDOW_SIZE 1048576 /* 1MB */
-
typedef struct list_head list_head_t;
struct wb_conf;
-struct wb_page;
-struct wb_file;
-
-
-typedef struct wb_file {
- int disabled;
- uint64_t disable_till;
- size_t window_conf;
- size_t window_current;
- int32_t flags;
- size_t aggregate_current;
- int32_t refcount;
- int32_t op_ret;
- int32_t op_errno;
- list_head_t request;
- list_head_t passive_requests;
- fd_t *fd;
+struct wb_inode;
+
+typedef struct wb_inode {
+ ssize_t window_conf;
+ ssize_t window_current;
+ ssize_t transit; /* size of data stack_wound, and yet
+ to be fulfilled (wb_fulfill_cbk).
+ used for trickling_writes
+ */
+
+ list_head_t all; /* All requests, from enqueue() till destroy().
+ Used only for resetting generation
+ number when empty.
+ */
+ list_head_t todo; /* Work to do (i.e, STACK_WIND to server).
+ Once we STACK_WIND, the entry is taken
+ off the list. If it is non-sync write,
+ then we continue to track it via @liability
+ or @temptation depending on the status
+ of its writeback.
+ */
+ list_head_t liability; /* Non-sync writes which are lied
+ (STACK_UNWIND'ed to caller) but ack
+ from server not yet complete. This
+ is the "liability" which we hold, and
+ must guarantee that dependent operations
+ which arrive later (which overlap, etc.)
+ are issued only after their dependencies
+ in this list are "fulfilled".
+
+ Server acks for entries in this list
+ shrinks the window.
+
+ The sum total of all req->write_size
+ of entries in this list must be kept less
+ than the permitted window size.
+ */
+ list_head_t temptation; /* Operations for which we are tempted
+ to 'lie' (write-behind), but temporarily
+ holding off (because of insufficient
+ window capacity, etc.)
+
+ This is the list to look at to grow
+ the window (in __wb_pick_unwinds()).
+
+ Entries typically get chosen from
+ write-behind from this list, and therefore
+ get "upgraded" to the "liability" list.
+ */
+ list_head_t wip; /* List of write calls in progress, SYNC or non-SYNC
+ which are currently STACK_WIND'ed towards the server.
+ This is for guaranteeing that no two overlapping
+ writes are in progress at the same time. Modules
+ like eager-lock in AFR depend on this behavior.
+ */
+ uint64_t gen; /* Liability generation number. Represents
+ the current 'state' of liability. Every
+ new addition to the liability list bumps
+ the generation number.
+
+ a newly arrived request is only required
+ to perform causal checks against the entries
+ in the liability list which were present
+ at the time of its addition. the generation
+ number at the time of its addition is stored
+ in the request and used during checks.
+
+ the liability list can grow while the request
+ waits in the todo list waiting for its
+ dependent operations to complete. however
+ it is not of the request's concern to depend
+ itself on those new entries which arrived
+ after it arrived (i.e, those that have a
+ liability generation higher than itself)
+ */
+ size_t size; /* Size of the file to catch write after EOF. */
gf_lock_t lock;
xlator_t *this;
-}wb_file_t;
+ int dontsync; /* If positive, dont pick lies for
+ * winding. This is needed to break infinite
+ * recursion during invocation of
+ * wb_process_queue from
+ * wb_fulfill_cbk in case of an
+ * error during fulfill.
+ */
+
+} wb_inode_t;
typedef struct wb_request {
- list_head_t list;
- list_head_t winds;
- list_head_t unwinds;
- list_head_t other_requests;
- call_stub_t *stub;
- size_t write_size;
- int32_t refcount;
- wb_file_t *file;
- union {
- struct {
- char write_behind;
- char stack_wound;
- char got_reply;
- char virgin;
- char flush_all; /* while trying to sync to back-end,
- * don't wait till a data of size
- * equal to configured aggregate-size
- * is accumulated, instead sync
- * whatever data currently present in
- * request queue.
- */
-
- }write_request;
-
- struct {
- char marked_for_resume;
- }other_requests;
- }flags;
+ list_head_t all;
+ list_head_t todo;
+ list_head_t lie; /* either in @liability or @temptation */
+ list_head_t winds;
+ list_head_t unwinds;
+ list_head_t wip;
+
+ call_stub_t *stub;
+
+ ssize_t write_size; /* currently held size
+ (after collapsing) */
+ size_t orig_size; /* size which arrived with the request.
+ This is the size by which we grow
+ the window when unwinding the frame.
+ */
+ size_t total_size; /* valid only in @head in wb_fulfill().
+ This is the size with which we perform
+ STACK_WIND to server and therefore the
+ amount by which we shrink the window.
+ */
+
+ int op_ret;
+ int op_errno;
+
+ int32_t refcount;
+ wb_inode_t *wb_inode;
+ glusterfs_fop_t fop;
+ gf_lkowner_t lk_owner;
+ struct iobref *iobref;
+ uint64_t gen; /* inode liability state at the time of
+ request arrival */
+
+ fd_t *fd;
+ int wind_count; /* number of sync-attempts. Only
+ for debug purposes */
+ struct {
+ size_t size; /* 0 size == till infinity */
+ off_t off;
+ int append:1; /* offset is invalid. only one
+ outstanding append at a time */
+ int tempted:1; /* true only for non-sync writes */
+ int lied:1; /* sin committed */
+ int fulfilled:1; /* got server acknowledgement */
+ int go:1; /* enough aggregating, good to go */
+ } ordering;
} wb_request_t;
-struct wb_conf {
- uint64_t aggregate_size;
- uint64_t window_size;
- uint64_t disable_till;
- gf_boolean_t enable_O_SYNC;
- gf_boolean_t flush_behind;
- gf_boolean_t enable_trickling_writes;
-};
-
+typedef struct wb_conf {
+ uint64_t aggregate_size;
+ uint64_t window_size;
+ gf_boolean_t flush_behind;
+ gf_boolean_t trickling_writes;
+ gf_boolean_t strict_write_ordering;
+ gf_boolean_t strict_O_DIRECT;
+ gf_boolean_t resync_after_fsync;
+} wb_conf_t;
-typedef struct wb_local {
- list_head_t winds;
- int32_t flags;
- int32_t wbflags;
- struct wb_file *file;
- wb_request_t *request;
- int op_ret;
- int op_errno;
- call_frame_t *frame;
- int32_t reply_count;
-} wb_local_t;
-
-typedef struct wb_conf wb_conf_t;
-typedef struct wb_page wb_page_t;
+void
+wb_process_queue (wb_inode_t *wb_inode);
-int32_t
-wb_process_queue (call_frame_t *frame, wb_file_t *file);
+wb_inode_t *
+__wb_inode_ctx_get (xlator_t *this, inode_t *inode)
+{
+ uint64_t value = 0;
+ wb_inode_t *wb_inode = NULL;
-ssize_t
-wb_sync (call_frame_t *frame, wb_file_t *file, list_head_t *winds);
+ __inode_ctx_get (inode, this, &value);
+ wb_inode = (wb_inode_t *)(unsigned long) value;
-ssize_t
-__wb_mark_winds (list_head_t *list, list_head_t *winds, size_t aggregate_size,
- char enable_trickling_writes);
+ return wb_inode;
+}
-static int
-__wb_request_unref (wb_request_t *this)
+wb_inode_t *
+wb_inode_ctx_get (xlator_t *this, inode_t *inode)
{
- int ret = -1;
+ wb_inode_t *wb_inode = NULL;
- if (this->refcount <= 0) {
- gf_log ("wb-request", GF_LOG_DEBUG,
- "refcount(%d) is <= 0", this->refcount);
- goto out;
+ GF_VALIDATE_OR_GOTO ("write-behind", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ LOCK (&inode->lock);
+ {
+ wb_inode = __wb_inode_ctx_get (this, inode);
}
+ UNLOCK (&inode->lock);
+out:
+ return wb_inode;
+}
- ret = --this->refcount;
- if (this->refcount == 0) {
- list_del_init (&this->list);
- if (this->stub && this->stub->fop == GF_FOP_WRITE) {
- call_stub_destroy (this->stub);
- }
- GF_FREE (this);
- }
+/*
+ Below is a succinct explanation of the code deciding whether two regions
+ overlap, from Pavan <tcp@gluster.com>.
-out:
- return ret;
+ For any two ranges to be non-overlapping, either the end of the first
+ range is lesser than the start of the second, or vice versa. Example -
+
+ <---------> <-------------->
+ p q x y
+
+ ( q < x ) or (y < p) = > No overlap.
+
+ To check for *overlap*, we can negate this (using de morgan's laws), and
+ it becomes -
+
+ (q >= x ) and (y >= p)
+
+ Either that, or you write the negation using -
+
+ if (! ((q < x) or (y < p)) ) {
+ "Overlap"
+ }
+*/
+
+gf_boolean_t
+wb_requests_overlap (wb_request_t *req1, wb_request_t *req2)
+{
+ uint64_t r1_start = 0;
+ uint64_t r1_end = 0;
+ uint64_t r2_start = 0;
+ uint64_t r2_end = 0;
+ enum _gf_boolean do_overlap = 0;
+
+ r1_start = req1->ordering.off;
+ if (req1->ordering.size)
+ r1_end = r1_start + req1->ordering.size - 1;
+ else
+ r1_end = ULLONG_MAX;
+
+ r2_start = req2->ordering.off;
+ if (req2->ordering.size)
+ r2_end = r2_start + req2->ordering.size - 1;
+ else
+ r2_end = ULLONG_MAX;
+
+ do_overlap = ((r1_end >= r2_start) && (r2_end >= r1_start));
+
+ return do_overlap;
}
-static int
-wb_request_unref (wb_request_t *this)
+gf_boolean_t
+wb_requests_conflict (wb_request_t *lie, wb_request_t *req)
{
- wb_file_t *file = NULL;
- int ret = 0;
+ wb_conf_t *conf = NULL;
- if (this == NULL) {
- gf_log ("wb-request", GF_LOG_DEBUG,
- "request is NULL");
- goto out;
- }
-
- file = this->file;
- LOCK (&file->lock);
- {
- ret = __wb_request_unref (this);
- }
- UNLOCK (&file->lock);
+ conf = req->wb_inode->this->private;
-out:
- return ret;
+ if (lie == req)
+ /* request cannot conflict with itself */
+ return _gf_false;
+
+ if (lie->gen >= req->gen)
+ /* this liability entry was behind
+ us in the todo list */
+ return _gf_false;
+
+ if (lie->ordering.append)
+ /* all modifications wait for the completion
+ of outstanding append */
+ return _gf_true;
+
+ if (conf->strict_write_ordering)
+ /* We are sure (lie->gen < req->gen) by now. So
+ skip overlap check if strict write ordering is
+ requested and always return "conflict" against a
+ lower generation lie. */
+ return _gf_true;
+
+ return wb_requests_overlap (lie, req);
}
-static wb_request_t *
-__wb_request_ref (wb_request_t *this)
+wb_request_t *
+wb_liability_has_conflict (wb_inode_t *wb_inode, wb_request_t *req)
{
- if (this->refcount < 0) {
- gf_log ("wb-request", GF_LOG_DEBUG,
- "refcount(%d) is < 0", this->refcount);
- return NULL;
+ wb_request_t *each = NULL;
+
+ list_for_each_entry (each, &wb_inode->liability, lie) {
+ if (wb_requests_conflict (each, req))
+ return each;
}
- this->refcount++;
- return this;
+ return NULL;
}
-wb_request_t *
-wb_request_ref (wb_request_t *this)
+gf_boolean_t
+wb_wip_has_conflict (wb_inode_t *wb_inode, wb_request_t *req)
{
- wb_file_t *file = NULL;
- if (this == NULL) {
- gf_log ("wb-request", GF_LOG_DEBUG,
- "request is NULL");
- return NULL;
- }
+ wb_request_t *each = NULL;
- file = this->file;
- LOCK (&file->lock);
- {
- this = __wb_request_ref (this);
+ if (req->stub->fop != GF_FOP_WRITE)
+ /* non-writes fundamentally never conflict with WIP requests */
+ return _gf_false;
+
+ list_for_each_entry (each, &wb_inode->wip, wip) {
+ if (each == req)
+ /* request never conflicts with itself,
+ though this condition should never occur.
+ */
+ continue;
+
+ if (wb_requests_overlap (each, req))
+ return _gf_true;
}
- UNLOCK (&file->lock);
- return this;
+ return _gf_false;
}
-wb_request_t *
-wb_enqueue (wb_file_t *file, call_stub_t *stub)
-{
- wb_request_t *request = NULL, *tmp = NULL;
- call_frame_t *frame = NULL;
- wb_local_t *local = NULL;
- struct iovec *vector = NULL;
- int32_t count = 0;
-
- request = GF_CALLOC (1, sizeof (*request), gf_wb_mt_wb_request_t);
- if (request == NULL) {
+static int
+__wb_request_unref (wb_request_t *req)
+{
+ int ret = -1;
+ wb_inode_t *wb_inode = NULL;
+
+ wb_inode = req->wb_inode;
+
+ if (req->refcount <= 0) {
+ gf_msg ("wb-request", GF_LOG_WARNING,
+ 0, WRITE_BEHIND_MSG_RES_UNAVAILABLE,
+ "refcount(%d) is <= 0", req->refcount);
goto out;
}
- INIT_LIST_HEAD (&request->list);
- INIT_LIST_HEAD (&request->winds);
- INIT_LIST_HEAD (&request->unwinds);
- INIT_LIST_HEAD (&request->other_requests);
+ ret = --req->refcount;
+ if (req->refcount == 0) {
+ list_del_init (&req->todo);
+ list_del_init (&req->lie);
+ list_del_init (&req->wip);
- request->stub = stub;
- request->file = file;
+ list_del_init (&req->all);
+ if (list_empty (&wb_inode->all)) {
+ wb_inode->gen = 0;
+ /* in case of accounting errors? */
+ wb_inode->window_current = 0;
+ }
- frame = stub->frame;
- local = frame->local;
- if (local) {
- local->request = request;
- }
+ list_del_init (&req->winds);
+ list_del_init (&req->unwinds);
- if (stub->fop == GF_FOP_WRITE) {
- vector = stub->args.writev.vector;
- count = stub->args.writev.count;
+ if (req->stub && req->ordering.tempted) {
+ call_stub_destroy (req->stub);
+ req->stub = NULL;
+ } /* else we would have call_resume()'ed */
- request->write_size = iov_length (vector, count);
- if (local) {
- local->op_ret = request->write_size;
- local->op_errno = 0;
- }
+ if (req->iobref)
+ iobref_unref (req->iobref);
+
+ if (req->fd)
+ fd_unref (req->fd);
- request->flags.write_request.virgin = 1;
+ GF_FREE (req);
}
+out:
+ return ret;
+}
- LOCK (&file->lock);
- {
- list_add_tail (&request->list, &file->request);
- if (stub->fop == GF_FOP_WRITE) {
- /* reference for stack winding */
- __wb_request_ref (request);
- /* reference for stack unwinding */
- __wb_request_ref (request);
+static int
+wb_request_unref (wb_request_t *req)
+{
+ wb_inode_t *wb_inode = NULL;
+ int ret = -1;
- file->aggregate_current += request->write_size;
- } else {
- list_for_each_entry (tmp, &file->request, list) {
- if (tmp->stub && tmp->stub->fop
- == GF_FOP_WRITE) {
- tmp->flags.write_request.flush_all = 1;
- }
- }
+ GF_VALIDATE_OR_GOTO ("write-behind", req, out);
- /*reference for resuming */
- __wb_request_ref (request);
- }
+ wb_inode = req->wb_inode;
+
+ LOCK (&wb_inode->lock);
+ {
+ ret = __wb_request_unref (req);
}
- UNLOCK (&file->lock);
+ UNLOCK (&wb_inode->lock);
out:
- return request;
+ return ret;
}
-wb_file_t *
-wb_file_create (xlator_t *this, fd_t *fd, int32_t flags)
+static wb_request_t *
+__wb_request_ref (wb_request_t *req)
{
- wb_file_t *file = NULL;
- wb_conf_t *conf = this->private;
+ GF_VALIDATE_OR_GOTO ("write-behind", req, out);
- file = GF_CALLOC (1, sizeof (*file), gf_wb_mt_wb_file_t);
- if (file == NULL) {
+ if (req->refcount < 0) {
+ gf_msg ("wb-request", GF_LOG_WARNING, 0,
+ WRITE_BEHIND_MSG_RES_UNAVAILABLE,
+ "refcount(%d) is < 0", req->refcount);
+ req = NULL;
goto out;
}
- INIT_LIST_HEAD (&file->request);
- INIT_LIST_HEAD (&file->passive_requests);
-
- /*
- fd_ref() not required, file should never decide the existance of
- an fd
- */
- file->fd= fd;
- file->disable_till = conf->disable_till;
- file->this = this;
- file->refcount = 1;
- file->window_conf = conf->window_size;
- file->flags = flags;
-
- fd_ctx_set (fd, this, (uint64_t)(long)file);
+ req->refcount++;
out:
- return file;
+ return req;
}
-void
-wb_file_destroy (wb_file_t *file)
+
+wb_request_t *
+wb_request_ref (wb_request_t *req)
{
- int32_t refcount = 0;
+ wb_inode_t *wb_inode = NULL;
- LOCK (&file->lock);
- {
- refcount = --file->refcount;
- }
- UNLOCK (&file->lock);
+ GF_VALIDATE_OR_GOTO ("write-behind", req, out);
- if (!refcount){
- LOCK_DESTROY (&file->lock);
- GF_FREE (file);
+ wb_inode = req->wb_inode;
+ LOCK (&wb_inode->lock);
+ {
+ req = __wb_request_ref (req);
}
+ UNLOCK (&wb_inode->lock);
- return;
+out:
+ return req;
}
-int32_t
-wb_sync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf)
+gf_boolean_t
+wb_enqueue_common (wb_inode_t *wb_inode, call_stub_t *stub, int tempted)
{
- wb_local_t *local = NULL;
- list_head_t *winds = NULL;
- wb_file_t *file = NULL;
- wb_request_t *request = NULL, *dummy = NULL;
- wb_local_t *per_request_local = NULL;
- int32_t ret = -1;
- fd_t *fd = NULL;
+ wb_request_t *req = NULL;
+ GF_VALIDATE_OR_GOTO ("write-behind", wb_inode, out);
+ GF_VALIDATE_OR_GOTO (wb_inode->this->name, stub, out);
+
+ req = GF_CALLOC (1, sizeof (*req), gf_wb_mt_wb_request_t);
+ if (!req)
+ goto out;
- local = frame->local;
- winds = &local->winds;
- file = local->file;
+ INIT_LIST_HEAD (&req->all);
+ INIT_LIST_HEAD (&req->todo);
+ INIT_LIST_HEAD (&req->lie);
+ INIT_LIST_HEAD (&req->winds);
+ INIT_LIST_HEAD (&req->unwinds);
+ INIT_LIST_HEAD (&req->wip);
- LOCK (&file->lock);
+ req->stub = stub;
+ req->wb_inode = wb_inode;
+ req->fop = stub->fop;
+ req->ordering.tempted = tempted;
+
+ if (stub->fop == GF_FOP_WRITE) {
+ req->write_size = iov_length (stub->args.vector,
+ stub->args.count);
+
+ /* req->write_size can change as we collapse
+ small writes. But the window needs to grow
+ only by how much we acknowledge the app. so
+ copy the original size in orig_size for the
+ purpose of accounting.
+ */
+ req->orig_size = req->write_size;
+
+ /* Let's be optimistic that we can
+ lie about it
+ */
+ req->op_ret = req->write_size;
+ req->op_errno = 0;
+
+ if (stub->args.fd->flags & O_APPEND)
+ req->ordering.append = 1;
+ }
+
+ req->lk_owner = stub->frame->root->lk_owner;
+
+ switch (stub->fop) {
+ case GF_FOP_WRITE:
+ LOCK (&wb_inode->lock);
+ {
+ if (wb_inode->size < stub->args.offset) {
+ req->ordering.off = wb_inode->size;
+ req->ordering.size = stub->args.offset
+ + req->write_size
+ - wb_inode->size;
+ } else {
+ req->ordering.off = stub->args.offset;
+ req->ordering.size = req->write_size;
+ }
+
+ if (wb_inode->size < stub->args.offset + req->write_size)
+ wb_inode->size = stub->args.offset
+ + req->write_size;
+ }
+ UNLOCK (&wb_inode->lock);
+
+ req->fd = fd_ref (stub->args.fd);
+
+ break;
+ case GF_FOP_READ:
+ req->ordering.off = stub->args.offset;
+ req->ordering.size = stub->args.size;
+
+ req->fd = fd_ref (stub->args.fd);
+
+ break;
+ case GF_FOP_TRUNCATE:
+ req->ordering.off = stub->args.offset;
+ req->ordering.size = 0; /* till infinity */
+ LOCK (&wb_inode->lock);
+ {
+ wb_inode->size = req->ordering.off;
+ }
+ UNLOCK (&wb_inode->lock);
+ break;
+ case GF_FOP_FTRUNCATE:
+ req->ordering.off = stub->args.offset;
+ req->ordering.size = 0; /* till infinity */
+ LOCK (&wb_inode->lock);
+ {
+ wb_inode->size = req->ordering.off;
+ }
+ UNLOCK (&wb_inode->lock);
+
+ req->fd = fd_ref (stub->args.fd);
+
+ break;
+ default:
+ if (stub && stub->args.fd)
+ req->fd = fd_ref (stub->args.fd);
+
+ break;
+ }
+
+ LOCK (&wb_inode->lock);
{
- list_for_each_entry_safe (request, dummy, winds, winds) {
- request->flags.write_request.got_reply = 1;
-
- if (!request->flags.write_request.write_behind
- && (op_ret == -1)) {
- per_request_local = request->stub->frame->local;
- per_request_local->op_ret = op_ret;
- per_request_local->op_errno = op_errno;
- }
+ list_add_tail (&req->all, &wb_inode->all);
- if (request->flags.write_request.write_behind) {
- file->window_current -= request->write_size;
- }
+ req->gen = wb_inode->gen;
- __wb_request_unref (request);
- }
-
- if (op_ret == -1) {
- file->op_ret = op_ret;
- file->op_errno = op_errno;
- }
- fd = file->fd;
- }
- UNLOCK (&file->lock);
+ list_add_tail (&req->todo, &wb_inode->todo);
+ __wb_request_ref (req); /* for wind */
- ret = wb_process_queue (frame, file);
- if ((ret == -1) && (errno == ENOMEM)) {
- LOCK (&file->lock);
- {
- file->op_ret = -1;
- file->op_errno = ENOMEM;
- }
- UNLOCK (&file->lock);
+ if (req->ordering.tempted) {
+ list_add_tail (&req->lie, &wb_inode->temptation);
+ __wb_request_ref (req); /* for unwind */
+ }
}
+ UNLOCK (&wb_inode->lock);
- /* safe place to do fd_unref */
- fd_unref (fd);
-
- STACK_DESTROY (frame->root);
+out:
+ if (!req)
+ return _gf_false;
- return 0;
+ return _gf_true;
}
-ssize_t
-wb_sync (call_frame_t *frame, wb_file_t *file, list_head_t *winds)
+gf_boolean_t
+wb_enqueue (wb_inode_t *wb_inode, call_stub_t *stub)
{
- wb_request_t *dummy = NULL, *request = NULL;
- wb_request_t *first_request = NULL, *next = NULL;
- size_t total_count = 0, count = 0;
- size_t copied = 0;
- call_frame_t *sync_frame = NULL;
- struct iobref *iobref = NULL;
- wb_local_t *local = NULL;
- struct iovec *vector = NULL;
- ssize_t current_size = 0, bytes = 0;
- size_t bytecount = 0;
- wb_conf_t *conf = NULL;
- fd_t *fd = NULL;
- int32_t op_errno = -1;
+ return wb_enqueue_common (wb_inode, stub, 0);
+}
- if (frame == NULL) {
- op_errno = EINVAL;
- goto out;
- }
- conf = file->this->private;
- list_for_each_entry (request, winds, winds) {
- total_count += request->stub->args.writev.count;
- if (total_count > 0) {
- break;
- }
- }
+gf_boolean_t
+wb_enqueue_tempted (wb_inode_t *wb_inode, call_stub_t *stub)
+{
+ return wb_enqueue_common (wb_inode, stub, 1);
+}
- if (total_count == 0) {
- gf_log (file->this->name, GF_LOG_DEBUG, "no vectors are to be"
- "synced");
- goto out;
- }
-
- list_for_each_entry_safe (request, dummy, winds, winds) {
- if (!vector) {
- vector = GF_MALLOC (VECTORSIZE (MAX_VECTOR_COUNT),
- gf_wb_mt_iovec);
- if (vector == NULL) {
- bytes = -1;
- op_errno = ENOMEM;
- gf_log (file->this->name, GF_LOG_ERROR,
- "out of memory");
- goto out;
- }
- iobref = iobref_new ();
- if (iobref == NULL) {
- bytes = -1;
- op_errno = ENOMEM;
- gf_log (file->this->name, GF_LOG_ERROR,
- "out of memory");
- goto out;
- }
+wb_inode_t *
+__wb_inode_create (xlator_t *this, inode_t *inode)
+{
+ wb_inode_t *wb_inode = NULL;
+ wb_conf_t *conf = NULL;
+ int ret = 0;
- local = GF_CALLOC (1, sizeof (*local),
- gf_wb_mt_wb_local_t);
- if (local == NULL) {
- bytes = -1;
- op_errno = ENOMEM;
- gf_log (file->this->name, GF_LOG_ERROR,
- "out of memory");
- goto out;
- }
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
- INIT_LIST_HEAD (&local->winds);
-
- first_request = request;
- current_size = 0;
- }
+ conf = this->private;
- count += request->stub->args.writev.count;
- bytecount = VECTORSIZE (request->stub->args.writev.count);
- memcpy (((char *)vector)+copied,
- request->stub->args.writev.vector,
- bytecount);
- copied += bytecount;
-
- current_size += request->write_size;
-
- if (request->stub->args.writev.iobref) {
- iobref_merge (iobref,
- request->stub->args.writev.iobref);
- }
+ wb_inode = GF_CALLOC (1, sizeof (*wb_inode), gf_wb_mt_wb_inode_t);
+ if (!wb_inode)
+ goto out;
- next = NULL;
- if (request->winds.next != winds) {
- next = list_entry (request->winds.next,
- wb_request_t, winds);
- }
+ INIT_LIST_HEAD (&wb_inode->all);
+ INIT_LIST_HEAD (&wb_inode->todo);
+ INIT_LIST_HEAD (&wb_inode->liability);
+ INIT_LIST_HEAD (&wb_inode->temptation);
+ INIT_LIST_HEAD (&wb_inode->wip);
- list_del_init (&request->winds);
- list_add_tail (&request->winds, &local->winds);
+ wb_inode->this = this;
- if ((!next)
- || ((count + next->stub->args.writev.count)
- > MAX_VECTOR_COUNT)
- || ((current_size + next->write_size)
- > conf->aggregate_size))
- {
- sync_frame = copy_frame (frame);
- if (sync_frame == NULL) {
- bytes = -1;
- op_errno = ENOMEM;
- gf_log (file->this->name, GF_LOG_ERROR,
- "out of memory");
- goto out;
- }
+ wb_inode->window_conf = conf->window_size;
- sync_frame->local = local;
- local->file = file;
+ LOCK_INIT (&wb_inode->lock);
- LOCK (&file->lock);
- {
- fd = file->fd;
- }
- UNLOCK (&file->lock);
-
- fd_ref (fd);
-
- bytes += current_size;
- STACK_WIND (sync_frame,
- wb_sync_cbk,
- FIRST_CHILD(sync_frame->this),
- FIRST_CHILD(sync_frame->this)->fops->writev,
- fd, vector,
- count,
- first_request->stub->args.writev.off,
- iobref);
-
- iobref_unref (iobref);
- GF_FREE (vector);
- first_request = NULL;
- iobref = NULL;
- vector = NULL;
- sync_frame = NULL;
- local = NULL;
- copied = count = 0;
- }
+ ret = __inode_ctx_put (inode, this, (uint64_t)(unsigned long)wb_inode);
+ if (ret) {
+ GF_FREE (wb_inode);
+ wb_inode = NULL;
}
out:
- if (sync_frame != NULL) {
- sync_frame->local = NULL;
- STACK_DESTROY (sync_frame->root);
- }
+ return wb_inode;
+}
- if (local != NULL) {
- /* had we winded these requests, we would have unrefed
- * in wb_sync_cbk.
- */
- list_for_each_entry_safe (request, dummy, &local->winds,
- winds) {
- wb_request_unref (request);
- }
- GF_FREE (local);
- }
+wb_inode_t *
+wb_inode_create (xlator_t *this, inode_t *inode)
+{
+ wb_inode_t *wb_inode = NULL;
- if (iobref != NULL) {
- iobref_unref (iobref);
- }
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
- if (vector != NULL) {
- GF_FREE (vector);
+ LOCK (&inode->lock);
+ {
+ wb_inode = __wb_inode_ctx_get (this, inode);
+ if (!wb_inode)
+ wb_inode = __wb_inode_create (this, inode);
}
+ UNLOCK (&inode->lock);
- if (bytes == -1) {
- /*
- * had we winded these requests, we would have unrefed
- * in wb_sync_cbk.
- */
+out:
+ return wb_inode;
+}
- list_for_each_entry_safe (request, dummy, &local->winds,
- winds) {
- wb_request_unref (request);
- }
- if (file != NULL) {
- LOCK (&file->lock);
- {
- file->op_ret = -1;
- file->op_errno = op_errno;
- }
- UNLOCK (&file->lock);
- }
- }
+void
+wb_inode_destroy (wb_inode_t *wb_inode)
+{
+ GF_VALIDATE_OR_GOTO ("write-behind", wb_inode, out);
- return bytes;
+ LOCK_DESTROY (&wb_inode->lock);
+ GF_FREE (wb_inode);
+out:
+ return;
}
-int32_t
-wb_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iatt *buf)
+void
+__wb_fulfill_request (wb_request_t *req)
{
- wb_local_t *local = NULL;
- wb_request_t *request = NULL;
- call_frame_t *process_frame = NULL;
- wb_file_t *file = NULL;
- int32_t ret = -1;
- fd_t *fd = NULL;
-
- local = frame->local;
- file = local->file;
+ wb_inode_t *wb_inode = NULL;
- request = local->request;
- if (request) {
- process_frame = copy_frame (frame);
- if (process_frame == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- }
- }
+ wb_inode = req->wb_inode;
- STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf);
+ req->ordering.fulfilled = 1;
+ wb_inode->window_current -= req->total_size;
+ wb_inode->transit -= req->total_size;
- if (request != NULL) {
- wb_request_unref (request);
- }
+ if (!req->ordering.lied) {
+ /* TODO: fail the req->frame with error if
+ necessary
+ */
+ }
- if (process_frame != NULL) {
- ret = wb_process_queue (process_frame, file);
- if ((ret == -1) && (errno == ENOMEM) && (file != NULL)) {
- LOCK (&file->lock);
- {
- file->op_ret = -1;
- file->op_errno = ENOMEM;
- }
- UNLOCK (&file->lock);
- }
+ __wb_request_unref (req);
+}
- STACK_DESTROY (process_frame->root);
- }
- if (file) {
- LOCK (&file->lock);
- {
- fd = file->fd;
- }
- UNLOCK (&file->lock);
+/* get a flush/fsync waiting on req */
+wb_request_t *
+__wb_request_waiting_on (wb_request_t *req)
+{
+ wb_inode_t *wb_inode = NULL;
+ wb_request_t *trav = NULL;
+
+ wb_inode = req->wb_inode;
- fd_unref (fd);
+ list_for_each_entry (trav, &wb_inode->todo, todo) {
+ if ((trav->fd == req->fd)
+ && ((trav->stub->fop == GF_FOP_FLUSH)
+ || (trav->stub->fop == GF_FOP_FSYNC))
+ && (trav->gen >= req->gen))
+ return trav;
}
- return 0;
+ return NULL;
}
-static int32_t
-wb_stat_helper (call_frame_t *frame, xlator_t *this, loc_t *loc)
+void
+__wb_add_request_for_retry (wb_request_t *req)
{
- STACK_WIND (frame, wb_stat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat,
- loc);
- return 0;
-}
+ wb_inode_t *wb_inode = NULL;
+ if (!req)
+ goto out;
-int32_t
-wb_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- wb_file_t *file = NULL;
- fd_t *iter_fd = NULL;
- wb_local_t *local = NULL;
- uint64_t tmp_file = 0;
- call_stub_t *stub = NULL;
- wb_request_t *request = NULL;
- int32_t ret = -1, op_errno = EINVAL;
-
- if (loc->inode) {
- /* FIXME: fd_lookup extends life of fd till stat returns */
- iter_fd = fd_lookup (loc->inode, frame->root->pid);
- if (iter_fd) {
- if (!fd_ctx_get (iter_fd, this, &tmp_file)) {
- file = (wb_file_t *)(long)tmp_file;
- } else {
- fd_unref (iter_fd);
- iter_fd = NULL;
- }
- }
- }
+ wb_inode = req->wb_inode;
- local = GF_CALLOC (1, sizeof (*local),
- gf_wb_mt_wb_local_t);
- if (local == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ /* response was unwound and no waiter waiting on this request, retry
+ till a flush or fsync (subject to conf->resync_after_fsync).
+ */
+ wb_inode->transit -= req->total_size;
- local->file = file;
+ req->total_size = 0;
- frame->local = local;
+ list_del_init (&req->winds);
+ list_del_init (&req->todo);
+ list_del_init (&req->wip);
- if (file) {
- stub = fop_stat_stub (frame, wb_stat_helper, loc);
- if (stub == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ /* sanitize ordering flags to retry */
+ req->ordering.go = 0;
- request = wb_enqueue (file, stub);
- if (request == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ /* Add back to todo list to retry */
+ list_add (&req->todo, &wb_inode->todo);
- ret = wb_process_queue (frame, file);
- if ((ret == -1) && (errno == ENOMEM)) {
- op_errno = ENOMEM;
- goto unwind;
- }
+out:
+ return;
+}
- } else {
- STACK_WIND (frame, wb_stat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat,
- loc);
- }
- return 0;
+void
+__wb_add_head_for_retry (wb_request_t *head)
+{
+ wb_request_t *req = NULL, *tmp = NULL;
-unwind:
- STACK_UNWIND_STRICT (stat, frame, -1, op_errno, NULL);
+ if (!head)
+ goto out;
- if (stub) {
- call_stub_destroy (stub);
+ list_for_each_entry_safe_reverse (req, tmp, &head->winds,
+ winds) {
+ __wb_add_request_for_retry (req);
}
-
- if (iter_fd != NULL) {
- fd_unref (iter_fd);
+
+ __wb_add_request_for_retry (head);
+
+out:
+ return;
+}
+
+
+void
+wb_add_head_for_retry (wb_request_t *head)
+{
+ if (!head)
+ goto out;
+
+ LOCK (&head->wb_inode->lock);
+ {
+ __wb_add_head_for_retry (head);
}
+ UNLOCK (&head->wb_inode->lock);
- return 0;
+out:
+ return;
}
-int32_t
-wb_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iatt *buf)
+void
+__wb_fulfill_request_err (wb_request_t *req, int32_t op_errno)
{
- wb_local_t *local = NULL;
- wb_request_t *request = NULL;
- wb_file_t *file = NULL;
- int32_t ret = -1;
-
- local = frame->local;
- file = local->file;
+ wb_inode_t *wb_inode = NULL;
+ wb_request_t *waiter = NULL;
+ wb_conf_t *conf = NULL;
+
+ wb_inode = req->wb_inode;
+
+ conf = wb_inode->this->private;
+
+ req->op_ret = -1;
+ req->op_errno = op_errno;
+
+ if (req->ordering.lied)
+ waiter = __wb_request_waiting_on (req);
+
+ if (!req->ordering.lied || waiter) {
+ if (!req->ordering.lied) {
+ /* response to app is still pending, send failure in
+ * response.
+ */
+ } else {
+ /* response was sent, store the error in a
+ * waiter (either an fsync or flush).
+ */
+ waiter->op_ret = -1;
+ waiter->op_errno = op_errno;
+ }
- request = local->request;
- if ((file != NULL) && (request != NULL)) {
- wb_request_unref (request);
- ret = wb_process_queue (frame, file);
- if ((ret == -1) && (errno == ENOMEM)) {
- op_ret = -1;
- op_errno = ENOMEM;
+ if (!req->ordering.lied
+ || (waiter->stub->fop == GF_FOP_FLUSH)
+ || ((waiter->stub->fop == GF_FOP_FSYNC)
+ && !conf->resync_after_fsync)) {
+ /* No retry needed, forget the request */
+ __wb_fulfill_request (req);
+ return;
}
}
- STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf);
+ __wb_add_request_for_retry (req);
- return 0;
+ return;
}
-int32_t
-wb_fstat_helper (call_frame_t *frame, xlator_t *this, fd_t *fd)
+void
+wb_head_done (wb_request_t *head)
{
- STACK_WIND (frame,
- wb_fstat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat,
- fd);
- return 0;
+ wb_request_t *req = NULL;
+ wb_request_t *tmp = NULL;
+ wb_inode_t *wb_inode = NULL;
+
+ wb_inode = head->wb_inode;
+
+ LOCK (&wb_inode->lock);
+ {
+ list_for_each_entry_safe (req, tmp, &head->winds, winds) {
+ __wb_fulfill_request (req);
+ }
+
+ __wb_fulfill_request (head);
+ }
+ UNLOCK (&wb_inode->lock);
}
-int32_t
-wb_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)
+void
+__wb_fulfill_err (wb_request_t *head, int op_errno)
{
- wb_file_t *file = NULL;
- wb_local_t *local = NULL;
- uint64_t tmp_file = 0;
- call_stub_t *stub = NULL;
- wb_request_t *request = NULL;
- int32_t ret = -1;
- int op_errno = EINVAL;
+ wb_request_t *req = NULL, *tmp = NULL;
- if ((!IA_ISDIR (fd->inode->ia_type))
- && fd_ctx_get (fd, this, &tmp_file)) {
- gf_log (this->name, GF_LOG_DEBUG, "write behind file pointer is"
- " not stored in context of fd(%p), returning EBADFD",
- fd);
+ if (!head)
+ goto out;
- STACK_UNWIND_STRICT (fstat, frame, -1, EBADFD, NULL);
- return 0;
- }
+ head->wb_inode->dontsync++;
- file = (wb_file_t *)(long)tmp_file;
- local = GF_CALLOC (1, sizeof (*local),
- gf_wb_mt_wb_local_t);
- if (local == NULL) {
- STACK_UNWIND_STRICT (fstat, frame, -1, ENOMEM, NULL);
- return 0;
+ list_for_each_entry_safe_reverse (req, tmp, &head->winds,
+ winds) {
+ __wb_fulfill_request_err (req, op_errno);
}
- local->file = file;
+ __wb_fulfill_request_err (head, op_errno);
- frame->local = local;
+out:
+ return;
+}
- if (file) {
- stub = fop_fstat_stub (frame, wb_fstat_helper, fd);
- if (stub == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
-
- request = wb_enqueue (file, stub);
- if (request == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
- /*
- FIXME:should the request queue be emptied in case of error?
- */
- ret = wb_process_queue (frame, file);
- if ((ret == -1) && (errno == ENOMEM)) {
- op_errno = ENOMEM;
- goto unwind;
- }
- } else {
- STACK_WIND (frame,
- wb_fstat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat,
- fd);
- }
+void
+wb_fulfill_err (wb_request_t *head, int op_errno)
+{
+ wb_inode_t *wb_inode = NULL;
- return 0;
-unwind:
- STACK_UNWIND_STRICT (fstat, frame, -1, op_errno, NULL);
+ wb_inode = head->wb_inode;
- if (stub) {
- call_stub_destroy (stub);
- }
+ LOCK (&wb_inode->lock);
+ {
+ __wb_fulfill_err (head, op_errno);
- return 0;
+ }
+ UNLOCK (&wb_inode->lock);
}
-int32_t
-wb_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
-{
- wb_local_t *local = NULL;
- wb_request_t *request = NULL;
- wb_file_t *file = NULL;
- call_frame_t *process_frame = NULL;
- int32_t ret = -1;
- fd_t *fd = NULL;
-
- local = frame->local;
- file = local->file;
- request = local->request;
-
- if ((request != NULL) && (file != NULL)) {
- process_frame = copy_frame (frame);
- if (process_frame == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- }
- }
+void
+__wb_modify_write_request (wb_request_t *req, int synced_size)
+{
+ struct iovec *vector = NULL;
+ int count = 0;
- STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf, postbuf);
+ if (!req || synced_size == 0)
+ goto out;
- if (request) {
- wb_request_unref (request);
- }
+ req->write_size -= synced_size;
+ req->stub->args.offset += synced_size;
- if (process_frame != NULL) {
- ret = wb_process_queue (process_frame, file);
- if ((ret == -1) && (errno == ENOMEM) && (file != NULL)) {
- LOCK (&file->lock);
- {
- file->op_ret = -1;
- file->op_errno = ENOMEM;
- }
- UNLOCK (&file->lock);
- }
+ vector = req->stub->args.vector;
+ count = req->stub->args.count;
- STACK_DESTROY (process_frame->root);
- }
+ req->stub->args.count = iov_subset (vector, count, synced_size,
+ iov_length (vector, count), vector);
- if (file) {
- LOCK (&file->lock);
- {
- fd = file->fd;
- }
- UNLOCK (&file->lock);
+out:
+ return;
+}
- fd_unref (fd);
+int
+__wb_fulfill_short_write (wb_request_t *req, int size, gf_boolean_t *fulfilled)
+{
+ int accounted_size = 0;
+
+ if (req == NULL)
+ goto out;
+
+ if (req->write_size <= size) {
+ accounted_size = req->write_size;
+ __wb_fulfill_request (req);
+ *fulfilled = 1;
+ } else {
+ accounted_size = size;
+ __wb_modify_write_request (req, size);
}
- return 0;
+out:
+ return accounted_size;
}
-
-static int32_t
-wb_truncate_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- off_t offset)
+void
+wb_fulfill_short_write (wb_request_t *head, int size)
{
- STACK_WIND (frame,
- wb_truncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate,
- loc,
- offset);
+ wb_inode_t *wb_inode = NULL;
+ wb_request_t *req = NULL, *next = NULL;
+ int accounted_size = 0;
+ gf_boolean_t fulfilled = _gf_false;
- return 0;
-}
+ if (!head)
+ goto out;
+ wb_inode = head->wb_inode;
-int32_t
-wb_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
-{
- wb_file_t *file = NULL;
- fd_t *iter_fd = NULL;
- wb_local_t *local = NULL;
- uint64_t tmp_file = 0;
- call_stub_t *stub = NULL;
- wb_request_t *request = NULL;
- int32_t ret = -1, op_errno = ENOMEM;
+ req = head;
- if (loc->inode)
+ LOCK (&wb_inode->lock);
{
- /*
- FIXME: fd_lookup extends life of fd till the execution of
- truncate_cbk
- */
- iter_fd = fd_lookup (loc->inode, frame->root->pid);
- if (iter_fd) {
- if (!fd_ctx_get (iter_fd, this, &tmp_file)){
- file = (wb_file_t *)(long)tmp_file;
- } else {
- fd_unref (iter_fd);
- }
- }
- }
-
- local = GF_CALLOC (1, sizeof (*local),
- gf_wb_mt_wb_local_t);
- if (local == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ /* hold a reference to head so that __wb_fulfill_short_write
+ * won't free it. We need head for a cleaner list traversal as
+ * list_for_each_entry_safe doesn't iterate over "head" member.
+ * So, if we pass "next->winds" as head to list_for_each_entry,
+ * "next" is skipped. For a simpler logic we need to traverse
+ * the list in the order. So, we start traversal from
+ * "head->winds" and hence we want head to be alive.
+ */
+ __wb_request_ref (head);
- local->file = file;
-
- frame->local = local;
- if (file) {
- stub = fop_truncate_stub (frame, wb_truncate_helper, loc,
- offset);
- if (stub == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ next = list_entry (head->winds.next, wb_request_t, winds);
- request = wb_enqueue (file, stub);
- if (request == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
-
- ret = wb_process_queue (frame, file);
- if ((ret == -1) && (errno == ENOMEM)) {
- op_errno = ENOMEM;
- goto unwind;
+ accounted_size = __wb_fulfill_short_write (head, size,
+ &fulfilled);
+
+ size -= accounted_size;
+
+ if (size == 0) {
+ if (fulfilled)
+ req = next;
+
+ goto done;
}
- } else {
- STACK_WIND (frame,
- wb_truncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate,
- loc,
- offset);
- }
- return 0;
+ list_for_each_entry_safe (req, next, &head->winds, winds) {
+ accounted_size = __wb_fulfill_short_write (req, size,
+ &fulfilled);
+ size -= accounted_size;
-unwind:
- STACK_UNWIND_STRICT (truncate, frame, -1, op_errno, NULL, NULL);
+ if (size == 0) {
+ if (fulfilled)
+ req = next;
+ break;
+ }
- if (stub) {
- call_stub_destroy (stub);
+ }
}
+done:
+ UNLOCK (&wb_inode->lock);
- return 0;
+ __wb_request_unref (head);
+
+ wb_add_head_for_retry (req);
+out:
+ return;
}
+int
+wb_fulfill_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ wb_inode_t *wb_inode = NULL;
+ wb_request_t *head = NULL;
-int32_t
-wb_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
-{
- wb_local_t *local = NULL;
- wb_request_t *request = NULL;
- wb_file_t *file = NULL;
- int32_t ret = -1;
-
- local = frame->local;
- file = local->file;
- request = local->request;
-
- if ((request != NULL) && (file != NULL)) {
- wb_request_unref (request);
- ret = wb_process_queue (frame, file);
- if ((ret == -1) && (errno == ENOMEM)) {
- op_ret = -1;
- op_errno = ENOMEM;
- }
+ head = frame->local;
+ frame->local = NULL;
+
+ wb_inode = head->wb_inode;
+
+ if (op_ret == -1) {
+ wb_fulfill_err (head, op_errno);
+ } else if (op_ret < head->total_size) {
+ wb_fulfill_short_write (head, op_ret);
+ } else {
+ wb_head_done (head);
}
- STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf, postbuf);
+ wb_process_queue (wb_inode);
+
+ STACK_DESTROY (frame->root);
return 0;
}
-static int32_t
-wb_ftruncate_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- off_t offset)
-{
- STACK_WIND (frame,
- wb_ftruncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate,
- fd,
- offset);
- return 0;
-}
+#define WB_IOV_LOAD(vec, cnt, req, head) do { \
+ memcpy (&vec[cnt], req->stub->args.vector, \
+ (req->stub->args.count * sizeof(vec[0]))); \
+ cnt += req->stub->args.count; \
+ head->total_size += req->write_size; \
+ } while (0)
-
-int32_t
-wb_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
-{
- wb_file_t *file = NULL;
- wb_local_t *local = NULL;
- uint64_t tmp_file = 0;
- call_stub_t *stub = NULL;
- wb_request_t *request = NULL;
- int32_t ret = -1;
- int op_errno = EINVAL;
-
- if ((!IA_ISDIR (fd->inode->ia_type))
- && fd_ctx_get (fd, this, &tmp_file)) {
- gf_log (this->name, GF_LOG_DEBUG, "write behind file pointer is"
- " not stored in context of fd(%p), returning EBADFD",
- fd);
-
- STACK_UNWIND_STRICT (ftruncate, frame, -1, EBADFD,
- NULL, NULL);
- return 0;
- }
- file = (wb_file_t *)(long)tmp_file;
+int
+wb_fulfill_head (wb_inode_t *wb_inode, wb_request_t *head)
+{
+ struct iovec vector[MAX_VECTOR_COUNT];
+ int count = 0;
+ wb_request_t *req = NULL;
+ call_frame_t *frame = NULL;
- local = GF_CALLOC (1, sizeof (*local),
- gf_wb_mt_wb_local_t);
- if (local == NULL) {
- STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM,
- NULL, NULL);
- return 0;
- }
+ /* make sure head->total_size is updated before we run into any
+ * errors
+ */
- local->file = file;
+ WB_IOV_LOAD (vector, count, head, head);
- frame->local = local;
+ list_for_each_entry (req, &head->winds, winds) {
+ WB_IOV_LOAD (vector, count, req, head);
- if (file) {
- stub = fop_ftruncate_stub (frame, wb_ftruncate_helper, fd,
- offset);
- if (stub == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ if (iobref_merge (head->stub->args.iobref,
+ req->stub->args.iobref))
+ goto err;
+ }
- request = wb_enqueue (file, stub);
- if (request == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ frame = create_frame (wb_inode->this, wb_inode->this->ctx->pool);
+ if (!frame)
+ goto err;
- ret = wb_process_queue (frame, file);
- if ((ret == -1) && (errno == ENOMEM)) {
- op_errno = ENOMEM;
- goto unwind;
- }
- } else {
- STACK_WIND (frame,
- wb_ftruncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate,
- fd,
- offset);
- }
+ frame->root->lk_owner = head->lk_owner;
+ frame->local = head;
- return 0;
+ LOCK (&wb_inode->lock);
+ {
+ wb_inode->transit += head->total_size;
+ }
+ UNLOCK (&wb_inode->lock);
-unwind:
- STACK_UNWIND_STRICT (ftruncate, frame, -1, op_errno, NULL, NULL);
+ STACK_WIND (frame, wb_fulfill_cbk, FIRST_CHILD (frame->this),
+ FIRST_CHILD (frame->this)->fops->writev,
+ head->fd, vector, count,
+ head->stub->args.offset,
+ head->stub->args.flags,
+ head->stub->args.iobref, NULL);
- if (stub) {
- call_stub_destroy (stub);
- }
+ return 0;
+err:
+ /* frame creation failure */
+ wb_fulfill_err (head, ENOMEM);
- return 0;
+ return ENOMEM;
}
-int32_t
-wb_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *statpre,
- struct iatt *statpost)
-{
- wb_local_t *local = NULL;
- wb_request_t *request = NULL;
- call_frame_t *process_frame = NULL;
- wb_file_t *file = NULL;
- int32_t ret = -1;
- fd_t *fd = NULL;
-
- local = frame->local;
- file = local->file;
- request = local->request;
-
- if (request) {
- process_frame = copy_frame (frame);
- if (process_frame == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- }
- }
+#define NEXT_HEAD(head, req) do { \
+ if (head) \
+ ret |= wb_fulfill_head (wb_inode, head); \
+ head = req; \
+ expected_offset = req->stub->args.offset + \
+ req->write_size; \
+ curr_aggregate = 0; \
+ vector_count = 0; \
+ } while (0)
- STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, statpre,
- statpost);
- if (request) {
- wb_request_unref (request);
- }
+int
+wb_fulfill (wb_inode_t *wb_inode, list_head_t *liabilities)
+{
+ wb_request_t *req = NULL;
+ wb_request_t *head = NULL;
+ wb_request_t *tmp = NULL;
+ wb_conf_t *conf = NULL;
+ off_t expected_offset = 0;
+ size_t curr_aggregate = 0;
+ size_t vector_count = 0;
+ int ret = 0;
+
+ conf = wb_inode->this->private;
+
+ list_for_each_entry_safe (req, tmp, liabilities, winds) {
+ list_del_init (&req->winds);
+
+ if (!head) {
+ NEXT_HEAD (head, req);
+ continue;
+ }
+
+ if (req->fd != head->fd) {
+ NEXT_HEAD (head, req);
+ continue;
+ }
+
+ if (!is_same_lkowner (&req->lk_owner, &head->lk_owner)) {
+ NEXT_HEAD (head, req);
+ continue;
+ }
+
+ if (expected_offset != req->stub->args.offset) {
+ NEXT_HEAD (head, req);
+ continue;
+ }
+
+ if ((curr_aggregate + req->write_size) > conf->aggregate_size) {
+ NEXT_HEAD (head, req);
+ continue;
+ }
+
+ if (vector_count + req->stub->args.count >
+ MAX_VECTOR_COUNT) {
+ NEXT_HEAD (head, req);
+ continue;
+ }
+
+ list_add_tail (&req->winds, &head->winds);
+ curr_aggregate += req->write_size;
+ vector_count += req->stub->args.count;
+ }
+
+ if (head)
+ ret |= wb_fulfill_head (wb_inode, head);
+
+ return ret;
+}
- if (request && (process_frame != NULL)) {
- ret = wb_process_queue (process_frame, file);
- if ((ret == -1) && (errno == ENOMEM) && (file != NULL)) {
- LOCK (&file->lock);
- {
- file->op_ret = -1;
- file->op_errno = ENOMEM;
- }
- UNLOCK (&file->lock);
- }
- STACK_DESTROY (process_frame->root);
- }
+void
+wb_do_unwinds (wb_inode_t *wb_inode, list_head_t *lies)
+{
+ wb_request_t *req = NULL;
+ wb_request_t *tmp = NULL;
+ call_frame_t *frame = NULL;
+ struct iatt buf = {0, };
- if (file) {
- LOCK (&file->lock);
- {
- fd = file->fd;
- }
- UNLOCK (&file->lock);
+ list_for_each_entry_safe (req, tmp, lies, unwinds) {
+ frame = req->stub->frame;
- fd_unref (fd);
+ STACK_UNWIND_STRICT (writev, frame, req->op_ret, req->op_errno,
+ &buf, &buf, NULL); /* :O */
+ req->stub->frame = NULL;
+
+ list_del_init (&req->unwinds);
+ wb_request_unref (req);
}
- return 0;
+ return;
}
-static int32_t
-wb_setattr_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid)
+void
+__wb_pick_unwinds (wb_inode_t *wb_inode, list_head_t *lies)
{
- STACK_WIND (frame,
- wb_setattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setattr,
- loc,
- stbuf,
- valid);
+ wb_request_t *req = NULL;
+ wb_request_t *tmp = NULL;
- return 0;
-}
+ list_for_each_entry_safe (req, tmp, &wb_inode->temptation, lie) {
+ if (!req->ordering.fulfilled &&
+ wb_inode->window_current > wb_inode->window_conf)
+ continue;
+ list_del_init (&req->lie);
+ list_move_tail (&req->unwinds, lies);
-int32_t
-wb_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid)
-{
- wb_file_t *file = NULL;
- fd_t *iter_fd = NULL;
- wb_local_t *local = NULL;
- uint64_t tmp_file = 0;
- call_stub_t *stub = NULL;
- wb_request_t *request = NULL;
- int32_t ret = -1, op_errno = EINVAL;
-
- local = GF_CALLOC (1, sizeof (*local),
- gf_wb_mt_wb_local_t);
- if (local == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ wb_inode->window_current += req->orig_size;
+ if (!req->ordering.fulfilled) {
+ /* burden increased */
+ list_add_tail (&req->lie, &wb_inode->liability);
- frame->local = local;
-
- if (!(valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME))) {
- STACK_WIND (frame,
- wb_setattr_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->setattr,
- loc, stbuf, valid);
- goto out;
- }
+ req->ordering.lied = 1;
- if (loc->inode) {
- /*
- FIXME: fd_lookup extends life of fd till the execution
- of wb_utimens_cbk
- */
- iter_fd = fd_lookup (loc->inode, frame->root->pid);
- if (iter_fd) {
- if (!fd_ctx_get (iter_fd, this, &tmp_file)) {
- file = (wb_file_t *)(long)tmp_file;
- } else {
- fd_unref (iter_fd);
- }
- }
+ wb_inode->gen++;
+ }
+ }
- }
+ return;
+}
- local->file = file;
- if (file) {
- stub = fop_setattr_stub (frame, wb_setattr_helper, loc, stbuf, valid);
- if (stub == NULL) {
- op_errno = ENOMEM;
- goto unwind;
+int
+__wb_collapse_small_writes (wb_request_t *holder, wb_request_t *req)
+{
+ char *ptr = NULL;
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ int ret = -1;
+ ssize_t required_size = 0;
+ size_t holder_len = 0;
+ size_t req_len = 0;
+
+ if (!holder->iobref) {
+ holder_len = iov_length (holder->stub->args.vector,
+ holder->stub->args.count);
+ req_len = iov_length (req->stub->args.vector,
+ req->stub->args.count);
+
+ required_size = max ((THIS->ctx->page_size),
+ (holder_len + req_len));
+ iobuf = iobuf_get2 (req->wb_inode->this->ctx->iobuf_pool,
+ required_size);
+ if (iobuf == NULL) {
+ goto out;
}
- request = wb_enqueue (file, stub);
- if (request == NULL) {
- op_errno = ENOMEM;
- goto unwind;
+ iobref = iobref_new ();
+ if (iobref == NULL) {
+ iobuf_unref (iobuf);
+ goto out;
}
- ret = wb_process_queue (frame, file);
- if ((ret == -1) && (errno == ENOMEM)) {
- op_errno = ENOMEM;
- goto unwind;
+ ret = iobref_add (iobref, iobuf);
+ if (ret != 0) {
+ gf_msg (req->wb_inode->this->name, GF_LOG_WARNING,
+ -ret, WRITE_BEHIND_MSG_INVALID_ARGUMENT,
+ "cannot add iobuf (%p) into iobref (%p)",
+ iobuf, iobref);
+ iobuf_unref (iobuf);
+ iobref_unref (iobref);
+ goto out;
}
- } else {
- STACK_WIND (frame,
- wb_setattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setattr,
- loc,
- stbuf, valid);
- }
- return 0;
-unwind:
- STACK_UNWIND_STRICT (setattr, frame, -1, op_errno,
- NULL, NULL);
+ iov_unload (iobuf->ptr, holder->stub->args.vector,
+ holder->stub->args.count);
+ holder->stub->args.vector[0].iov_base = iobuf->ptr;
+ holder->stub->args.count = 1;
- if (stub) {
- call_stub_destroy (stub);
+ iobref_unref (holder->stub->args.iobref);
+ holder->stub->args.iobref = iobref;
+
+ iobuf_unref (iobuf);
+
+ holder->iobref = iobref_ref (iobref);
}
+
+ ptr = holder->stub->args.vector[0].iov_base + holder->write_size;
+
+ iov_unload (ptr, req->stub->args.vector,
+ req->stub->args.count);
+
+ holder->stub->args.vector[0].iov_len += req->write_size;
+ holder->write_size += req->write_size;
+ holder->ordering.size += req->write_size;
+
+ ret = 0;
out:
- return 0;
+ return ret;
}
-int32_t
-wb_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, fd_t *fd)
+
+void
+__wb_preprocess_winds (wb_inode_t *wb_inode)
{
- int32_t wbflags = 0, flags = 0;
- wb_file_t *file = NULL;
- wb_conf_t *conf = NULL;
- wb_local_t *local = NULL;
+ off_t offset_expected = 0;
+ ssize_t space_left = 0;
+ wb_request_t *req = NULL;
+ wb_request_t *tmp = NULL;
+ wb_request_t *holder = NULL;
+ wb_conf_t *conf = NULL;
+ int ret = 0;
+ ssize_t page_size = 0;
+
+ /* With asynchronous IO from a VM guest (as a file), there
+ can be two sequential writes happening in two regions
+ of the file. But individual (broken down) IO requests
+ can arrive interleaved.
+
+ TODO: cycle for each such sequence sifting
+ through the interleaved ops
+ */
+
+ page_size = wb_inode->this->ctx->page_size;
+ conf = wb_inode->this->private;
+
+ list_for_each_entry_safe (req, tmp, &wb_inode->todo, todo) {
+ if (wb_inode->dontsync && req->ordering.lied) {
+ /* sync has failed. Don't pick lies _again_ for winding
+ * as winding these lies again will trigger an infinite
+ * recursion of wb_process_queue being called from a
+ * failed fulfill. However, pick non-lied requests for
+ * winding so that application wont block indefinitely
+ * waiting for write result.
+ */
+ continue;
+ }
- conf = this->private;
+ if (!req->ordering.tempted) {
+ if (holder) {
+ if (wb_requests_conflict (holder, req))
+ /* do not hold on write if a
+ dependent write is in queue */
+ holder->ordering.go = 1;
+ }
+ /* collapse only non-sync writes */
+ continue;
+ } else if (!holder) {
+ /* holder is always a non-sync write */
+ holder = req;
+ continue;
+ }
+
+ offset_expected = holder->stub->args.offset
+ + holder->write_size;
+
+ if (req->stub->args.offset != offset_expected) {
+ holder->ordering.go = 1;
+ holder = req;
+ continue;
+ }
+
+ if (!is_same_lkowner (&req->lk_owner, &holder->lk_owner)) {
+ holder->ordering.go = 1;
+ holder = req;
+ continue;
+ }
+
+ if (req->fd != holder->fd) {
+ holder->ordering.go = 1;
+ holder = req;
+ continue;
+ }
- local = frame->local;
- if (local == NULL) {
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
+ space_left = page_size - holder->write_size;
+
+ if (space_left < req->write_size) {
+ holder->ordering.go = 1;
+ holder = req;
+ continue;
+ }
+
+ ret = __wb_collapse_small_writes (holder, req);
+ if (ret)
+ continue;
+
+ /* collapsed request is as good as wound
+ (from its p.o.v)
+ */
+ list_del_init (&req->todo);
+ __wb_fulfill_request (req);
+
+ /* Only the last @holder in queue which
+
+ - does not have any non-buffered-writes following it
+ - has not yet filled its capacity
+
+ does not get its 'go' set, in anticipation of the arrival
+ of consecutive smaller writes.
+ */
}
- flags = local->flags;
- wbflags = local->wbflags;
+ /* but if trickling writes are enabled, then do not hold back
+ writes if there are no outstanding requests
+ */
- if (op_ret != -1) {
- file = wb_file_create (this, fd, flags);
- if (file == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
+ if (conf->trickling_writes && !wb_inode->transit && holder)
+ holder->ordering.go = 1;
- /* If O_DIRECT then, we disable chaching */
- if (((flags & O_DIRECT) == O_DIRECT)
- || ((flags & O_ACCMODE) == O_RDONLY)
- || (((flags & O_SYNC) == O_SYNC)
- && conf->enable_O_SYNC == _gf_true)) {
- file->window_conf = 0;
- }
+ if (wb_inode->dontsync > 0)
+ wb_inode->dontsync--;
+
+ return;
+}
+
+int
+__wb_handle_failed_conflict (wb_request_t *req, wb_request_t *conflict,
+ list_head_t *tasks)
+{
+ wb_conf_t *conf = NULL;
+
+ conf = req->wb_inode->this->private;
+
+ if ((req->stub->fop != GF_FOP_FLUSH)
+ && ((req->stub->fop != GF_FOP_FSYNC) || conf->resync_after_fsync)) {
+ if (!req->ordering.lied && list_empty (&conflict->wip)) {
+ /* If request itself is in liability queue,
+ * 1. We cannot unwind as the response has already been
+ * sent.
+ * 2. We cannot wind till conflict clears up.
+ * 3. So, skip the request for now.
+ * 4. Otherwise, resume (unwind) it with error.
+ */
+ req->op_ret = -1;
+ req->op_errno = conflict->op_errno;
+
+ list_del_init (&req->todo);
+ list_add_tail (&req->winds, tasks);
+
+ if (req->ordering.tempted) {
+ /* make sure that it won't be unwound in
+ * wb_do_unwinds too. Otherwise there'll be
+ * a double wind.
+ */
+ list_del_init (&req->lie);
+ __wb_fulfill_request (req);
+ }
- if (wbflags & GF_OPEN_NOWB) {
- file->disabled = 1;
}
-
- LOCK_INIT (&file->lock);
+ } else {
+ /* flush and fsync (without conf->resync_after_fsync) act as
+ barriers. We cannot unwind them out of
+ order, when there are earlier generation writes just because
+ there is a conflicting liability with an error. So, wait for
+ our turn till there are no conflicting liabilities.
+
+ This situation can arise when there liabilities spread across
+ multiple generations. For eg., consider two writes with
+ following characterstics:
+
+ 1. they belong to different generations gen1, gen2 and
+ (gen1 > gen2).
+ 2. they overlap.
+ 3. both are liabilities.
+ 4. gen1 write was attempted to sync, but the attempt failed.
+ 5. there was no attempt to sync gen2 write yet.
+ 6. A flush (as part of close) is issued and gets a gen no
+ gen3.
+
+ In the above scenario, if flush is unwound without waiting
+ for gen1 and gen2 writes either to be successfully synced or
+ purged, we end up with these two writes in wb_inode->todo
+ list forever as there will be no attempt to process the queue
+ as flush is the last operation.
+ */
}
-
-out:
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd);
+
return 0;
}
-int32_t
-wb_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int32_t wbflags)
+int
+__wb_pick_winds (wb_inode_t *wb_inode, list_head_t *tasks,
+ list_head_t *liabilities)
{
- wb_local_t *local = NULL;
- int32_t op_errno = EINVAL;
+ wb_request_t *req = NULL;
+ wb_request_t *tmp = NULL;
+ wb_request_t *conflict = NULL;
+
+ list_for_each_entry_safe (req, tmp, &wb_inode->todo, todo) {
+ conflict = wb_liability_has_conflict (wb_inode, req);
+ if (conflict) {
+ if (conflict->op_ret == -1) {
+ /* There is a conflicting liability which failed
+ * to sync in previous attempts, resume the req
+ * and fail, unless its an fsync/flush.
+ */
+
+ __wb_handle_failed_conflict (req, conflict,
+ tasks);
+ } else {
+ /* There is a conflicting liability which was
+ * not attempted to sync even once. Wait till
+ * atleast one attempt to sync is made.
+ */
+ }
- local = GF_CALLOC (1, sizeof (*local),
- gf_wb_mt_wb_local_t);
- if (local == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ continue;
+ }
- local->flags = flags;
- local->wbflags = wbflags;
-
- frame->local = local;
+ if (req->ordering.tempted && !req->ordering.go)
+ /* wait some more */
+ continue;
- STACK_WIND (frame,
- wb_open_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open,
- loc, flags, fd, wbflags);
- return 0;
+ if (req->stub->fop == GF_FOP_WRITE) {
+ if (wb_wip_has_conflict (wb_inode, req))
+ continue;
+
+ list_add_tail (&req->wip, &wb_inode->wip);
+ req->wind_count++;
+
+ if (!req->ordering.tempted)
+ /* unrefed in wb_writev_cbk */
+ req->stub->frame->local =
+ __wb_request_ref (req);
+ }
+
+ list_del_init (&req->todo);
+
+ if (req->ordering.tempted)
+ list_add_tail (&req->winds, liabilities);
+ else
+ list_add_tail (&req->winds, tasks);
+ }
-unwind:
- STACK_UNWIND_STRICT (open, frame, -1, op_errno, NULL);
return 0;
}
-int32_t
-wb_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
-{
- long flags = 0;
- wb_file_t *file = NULL;
- wb_conf_t *conf = this->private;
-
- if (op_ret != -1) {
- if (frame->local) {
- flags = (long) frame->local;
- }
+void
+wb_do_winds (wb_inode_t *wb_inode, list_head_t *tasks)
+{
+ wb_request_t *req = NULL;
+ wb_request_t *tmp = NULL;
- file = wb_file_create (this, fd, flags);
- if (file == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
+ list_for_each_entry_safe (req, tmp, tasks, winds) {
+ list_del_init (&req->winds);
- /* If O_DIRECT then, we disable chaching */
- if (frame->local) {
- if (((flags & O_DIRECT) == O_DIRECT)
- || ((flags & O_ACCMODE) == O_RDONLY)
- || (((flags & O_SYNC) == O_SYNC)
- && (conf->enable_O_SYNC == _gf_true))) {
- file->window_conf = 0;
- }
+ if (req->op_ret == -1) {
+ call_unwind_error (req->stub, req->op_ret,
+ req->op_errno);
+ } else {
+ call_resume (req->stub);
}
- LOCK_INIT (&file->lock);
- }
-
- frame->local = NULL;
-
-out:
- STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent);
- return 0;
+ req->stub = NULL;
+ wb_request_unref (req);
+ }
}
-int32_t
-wb_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, fd_t *fd)
+void
+wb_process_queue (wb_inode_t *wb_inode)
{
- frame->local = (void *)(long)flags;
+ list_head_t tasks = {0, };
+ list_head_t lies = {0, };
+ list_head_t liabilities = {0, };
+ int wind_failure = 0;
- STACK_WIND (frame,
- wb_create_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create,
- loc, flags, mode, fd);
- return 0;
-}
+ INIT_LIST_HEAD (&tasks);
+ INIT_LIST_HEAD (&lies);
+ INIT_LIST_HEAD (&liabilities);
-/* Mark all the contiguous write requests for winding starting from head of
- * request list. Stops marking at the first non-write request found. If
- * file is opened with O_APPEND, make sure all the writes marked for winding
- * will fit into a single write call to server.
- */
-size_t
-__wb_mark_wind_all (wb_file_t *file, list_head_t *list, list_head_t *winds)
-{
- wb_request_t *request = NULL;
- size_t size = 0;
- char first_request = 1;
- off_t offset_expected = 0;
- wb_conf_t *conf = NULL;
- int count = 0;
+ do {
+ LOCK (&wb_inode->lock);
+ {
+ __wb_preprocess_winds (wb_inode);
- conf = file->this->private;
+ __wb_pick_winds (wb_inode, &tasks, &liabilities);
+
+ __wb_pick_unwinds (wb_inode, &lies);
- list_for_each_entry (request, list, list)
- {
- if ((request->stub == NULL)
- || (request->stub->fop != GF_FOP_WRITE)) {
- break;
}
+ UNLOCK (&wb_inode->lock);
- if (!request->flags.write_request.stack_wound) {
- if (first_request) {
- first_request = 0;
- offset_expected = request->stub->args.writev.off;
- }
-
- if (request->stub->args.writev.off != offset_expected) {
- break;
- }
+ wb_do_unwinds (wb_inode, &lies);
- if ((file->flags & O_APPEND)
- && (((size + request->write_size)
- > conf->aggregate_size)
- || ((count + request->stub->args.writev.count)
- > MAX_VECTOR_COUNT))) {
- break;
- }
+ wb_do_winds (wb_inode, &tasks);
- size += request->write_size;
- offset_expected += request->write_size;
- file->aggregate_current -= request->write_size;
- count += request->stub->args.writev.count;
+ /* If there is an error in wb_fulfill before winding write
+ * requests, we would miss invocation of wb_process_queue
+ * from wb_fulfill_cbk. So, retry processing again.
+ */
+ wind_failure = wb_fulfill (wb_inode, &liabilities);
+ } while (wind_failure);
- request->flags.write_request.stack_wound = 1;
- list_add_tail (&request->winds, winds);
- }
- }
-
- return size;
+ return;
}
void
-__wb_can_wind (list_head_t *list, char *other_fop_in_queue,
- char *non_contiguous_writes, char *incomplete_writes,
- char *wind_all)
+wb_set_inode_size(wb_inode_t *wb_inode, struct iatt *postbuf)
{
- wb_request_t *request = NULL;
- char first_request = 1;
- off_t offset_expected = 0;
+ GF_ASSERT (wb_inode);
+ GF_ASSERT (postbuf);
+
+ LOCK (&wb_inode->lock);
+ {
+ wb_inode->size = postbuf->ia_size;
+ }
+ UNLOCK (&wb_inode->lock);
+}
- list_for_each_entry (request, list, list)
- {
- if ((request->stub == NULL)
- || (request->stub->fop != GF_FOP_WRITE)) {
- if (request->stub && other_fop_in_queue) {
- *other_fop_in_queue = 1;
- }
- break;
- }
- if (request->flags.write_request.stack_wound
- && !request->flags.write_request.got_reply
- && (incomplete_writes != NULL)) {
- *incomplete_writes = 1;
- break;
- }
+int
+wb_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ wb_request_t *req = NULL;
+ wb_inode_t *wb_inode;
- if (!request->flags.write_request.stack_wound) {
- if (first_request) {
- first_request = 0;
- offset_expected
- = request->stub->args.writev.off;
- if (wind_all != NULL) {
- *wind_all = request->flags.write_request.flush_all;
- }
- }
-
- if (offset_expected != request->stub->args.writev.off) {
- if (non_contiguous_writes) {
- *non_contiguous_writes = 1;
- }
- break;
- }
+ req = frame->local;
+ frame->local = NULL;
+ wb_inode = req->wb_inode;
- offset_expected += request->write_size;
- }
- }
+ wb_request_unref (req);
- return;
+ /* requests could be pending while this was in progress */
+ wb_process_queue(wb_inode);
+
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
}
-ssize_t
-__wb_mark_winds (list_head_t *list, list_head_t *winds, size_t aggregate_conf,
- char enable_trickling_writes)
+int
+wb_writev_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
- size_t size = 0;
- char other_fop_in_queue = 0;
- char incomplete_writes = 0;
- char non_contiguous_writes = 0;
- wb_request_t *request = NULL;
- wb_file_t *file = NULL;
- char wind_all = 0;
+ STACK_WIND (frame, wb_writev_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev,
+ fd, vector, count, offset, flags, iobref, xdata);
+ return 0;
+}
- if (list_empty (list)) {
- goto out;
+
+int
+wb_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
+{
+ wb_inode_t *wb_inode = NULL;
+ wb_conf_t *conf = NULL;
+ gf_boolean_t wb_disabled = 0;
+ call_stub_t *stub = NULL;
+ int ret = -1;
+ int32_t op_errno = EINVAL;
+ int o_direct = O_DIRECT;
+
+ conf = this->private;
+
+ wb_inode = wb_inode_create (this, fd->inode);
+ if (!wb_inode) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ if (!conf->strict_O_DIRECT)
+ o_direct = 0;
+
+ if (fd->flags & (O_SYNC|O_DSYNC|o_direct))
+ wb_disabled = 1;
+
+ if (flags & (O_SYNC|O_DSYNC|o_direct))
+ wb_disabled = 1;
+
+ if (wb_disabled)
+ stub = fop_writev_stub (frame, wb_writev_helper, fd, vector,
+ count, offset, flags, iobref, xdata);
+ else
+ stub = fop_writev_stub (frame, NULL, fd, vector, count, offset,
+ flags, iobref, xdata);
+ if (!stub) {
+ op_errno = ENOMEM;
+ goto unwind;
}
- request = list_entry (list->next, typeof (*request), list);
- file = request->file;
+ if (wb_disabled)
+ ret = wb_enqueue (wb_inode, stub);
+ else
+ ret = wb_enqueue_tempted (wb_inode, stub);
- __wb_can_wind (list, &other_fop_in_queue,
- &non_contiguous_writes, &incomplete_writes, &wind_all);
+ if (!ret) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
- if (!incomplete_writes && ((enable_trickling_writes)
- || (wind_all) || (non_contiguous_writes)
- || (other_fop_in_queue)
- || (file->aggregate_current
- >= aggregate_conf))) {
- size = __wb_mark_wind_all (file, list, winds);
- }
+ wb_process_queue (wb_inode);
-out:
- return size;
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL, NULL);
+
+ if (stub)
+ call_stub_destroy (stub);
+
+ return 0;
}
-size_t
-__wb_mark_unwind_till (list_head_t *list, list_head_t *unwinds, size_t size)
+int
+wb_readv_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
{
- size_t written_behind = 0;
- wb_request_t *request = NULL;
- wb_file_t *file = NULL;
+ STACK_WIND (frame, default_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,
+ xdata);
+ return 0;
+}
- if (list_empty (list)) {
- goto out;
- }
- request = list_entry (list->next, typeof (*request), list);
- file = request->file;
+int
+wb_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
- list_for_each_entry (request, list, list)
- {
- if ((request->stub == NULL)
- || (request->stub->fop != GF_FOP_WRITE)) {
- continue;
- }
+ wb_inode = wb_inode_ctx_get (this, fd->inode);
+ if (!wb_inode)
+ goto noqueue;
- if (written_behind <= size) {
- if (!request->flags.write_request.write_behind) {
- written_behind += request->write_size;
- request->flags.write_request.write_behind = 1;
- list_add_tail (&request->unwinds, unwinds);
-
- if (!request->flags.write_request.got_reply) {
- file->window_current += request->write_size;
- }
- }
- } else {
- break;
- }
- }
+ stub = fop_readv_stub (frame, wb_readv_helper, fd, size,
+ offset, flags, xdata);
+ if (!stub)
+ goto unwind;
-out:
- return written_behind;
-}
+ if (!wb_enqueue (wb_inode, stub))
+ goto unwind;
+ wb_process_queue (wb_inode);
-void
-__wb_mark_unwinds (list_head_t *list, list_head_t *unwinds)
-{
- wb_request_t *request = NULL;
- wb_file_t *file = NULL;
+ return 0;
- if (list_empty (list)) {
- goto out;
- }
+unwind:
+ STACK_UNWIND_STRICT (readv, frame, -1, ENOMEM, NULL, 0, NULL, NULL,
+ NULL);
+ return 0;
- request = list_entry (list->next, typeof (*request), list);
- file = request->file;
+noqueue:
+ STACK_WIND (frame, default_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,
+ xdata);
+ return 0;
+}
- if (file->window_current <= file->window_conf) {
- __wb_mark_unwind_till (list, unwinds,
- file->window_conf - file->window_current);
- }
-out:
- return;
+int
+wb_flush_bg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STACK_DESTROY (frame->root);
+ return 0;
}
-uint32_t
-__wb_get_other_requests (list_head_t *list, list_head_t *other_requests)
+int
+wb_flush_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- wb_request_t *request = NULL;
- uint32_t count = 0;
- list_for_each_entry (request, list, list) {
- if ((request->stub == NULL)
- || (request->stub->fop == GF_FOP_WRITE)) {
- break;
- }
-
- if (!request->flags.other_requests.marked_for_resume) {
- request->flags.other_requests.marked_for_resume = 1;
- list_add_tail (&request->other_requests,
- other_requests);
- count++;
- }
- }
+ wb_conf_t *conf = NULL;
+ wb_inode_t *wb_inode = NULL;
+ call_frame_t *bg_frame = NULL;
+ int32_t op_errno = 0;
+ int op_ret = 0;
- return count;
-}
+ conf = this->private;
+ wb_inode = wb_inode_ctx_get (this, fd->inode);
+ if (!wb_inode) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto unwind;
+ }
-int32_t
-wb_stack_unwind (list_head_t *unwinds)
-{
- struct iatt buf = {0,};
- wb_request_t *request = NULL, *dummy = NULL;
- call_frame_t *frame = NULL;
- wb_local_t *local = NULL;
- int ret = 0, write_requests_removed = 0;
- list_for_each_entry_safe (request, dummy, unwinds, unwinds)
- {
- frame = request->stub->frame;
- local = frame->local;
+ if (conf->flush_behind)
+ goto flushbehind;
- STACK_UNWIND (frame, local->op_ret, local->op_errno, &buf,
- &buf);
+ STACK_WIND (frame, default_flush_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush, fd, xdata);
+ return 0;
- ret = wb_request_unref (request);
- if (ret == 0) {
- write_requests_removed++;
- }
- }
+flushbehind:
+ bg_frame = copy_frame (frame);
+ if (!bg_frame) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
- return write_requests_removed;
+ STACK_WIND (bg_frame, wb_flush_bg_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush, fd, xdata);
+ /* fall through */
+unwind:
+ STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, NULL);
+
+ return 0;
}
-int32_t
-wb_resume_other_requests (call_frame_t *frame, wb_file_t *file,
- list_head_t *other_requests)
+int
+wb_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- int32_t ret = 0;
- wb_request_t *request = NULL, *dummy = NULL;
- int32_t fops_removed = 0;
- char wind = 0;
- call_stub_t *stub = NULL;
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
- if (list_empty (other_requests)) {
- goto out;
- }
+ wb_inode = wb_inode_ctx_get (this, fd->inode);
+ if (!wb_inode)
+ goto noqueue;
- list_for_each_entry_safe (request, dummy, other_requests,
- other_requests) {
- wind = request->stub->wind;
- stub = request->stub;
-
- LOCK (&file->lock);
- {
- request->stub = NULL;
- }
- UNLOCK (&file->lock);
-
- if (!wind) {
- wb_request_unref (request);
- fops_removed++;
- }
-
- call_resume (stub);
- }
+ stub = fop_flush_stub (frame, wb_flush_helper, fd, xdata);
+ if (!stub)
+ goto unwind;
- if (fops_removed > 0) {
- ret = wb_process_queue (frame, file);
- }
-
-out:
- return ret;
-}
+ if (!wb_enqueue (wb_inode, stub))
+ goto unwind;
+ wb_process_queue (wb_inode);
-int32_t
-wb_do_ops (call_frame_t *frame, wb_file_t *file, list_head_t *winds,
- list_head_t *unwinds, list_head_t *other_requests)
-{
- int32_t ret = -1, write_requests_removed = 0;
+ return 0;
- ret = wb_stack_unwind (unwinds);
+unwind:
+ STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM, NULL);
- write_requests_removed = ret;
+ return 0;
- ret = wb_sync (frame, file, winds);
- if (ret == -1) {
- goto out;
- }
+noqueue:
+ STACK_WIND (frame, default_flush_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush, fd, xdata);
+ return 0;
+}
- wb_resume_other_requests (frame, file, other_requests);
- /* wb_stack_unwind does wb_request_unref after unwinding a write
- * request. Hence if a write-request was just freed in wb_stack_unwind,
- * we have to process request queue once again to unblock requests
- * blocked on the writes just unwound.
- */
- if (write_requests_removed > 0) {
- ret = wb_process_queue (frame, file);
- }
-out:
- return ret;
+int
+wb_fsync_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t datasync, dict_t *xdata)
+{
+ STACK_WIND (frame, default_fsync_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata);
+ return 0;
}
-inline int
-__wb_copy_into_holder (wb_request_t *holder, wb_request_t *request)
+int
+wb_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
{
- char *ptr = NULL;
- struct iobuf *iobuf = NULL;
- struct iobref *iobref = NULL;
- int ret = -1;
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
+ int32_t op_errno = EINVAL;
- if (holder->flags.write_request.virgin) {
- iobuf = iobuf_get (request->file->this->ctx->iobuf_pool);
- if (iobuf == NULL) {
- gf_log (request->file->this->name, GF_LOG_ERROR,
- "out of memory");
- goto out;
- }
+ wb_inode = wb_inode_ctx_get (this, fd->inode);
+ if (!wb_inode)
+ goto noqueue;
- iobref = iobref_new ();
- if (iobref == NULL) {
- iobuf_unref (iobuf);
- gf_log (request->file->this->name, GF_LOG_ERROR,
- "out of memory");
- goto out;
- }
-
- ret = iobref_add (iobref, iobuf);
- if (ret != 0) {
- iobuf_unref (iobuf);
- iobref_unref (iobref);
- gf_log (request->file->this->name, GF_LOG_DEBUG,
- "cannot add iobuf (%p) into iobref (%p)",
- iobuf, iobref);
- goto out;
- }
-
- iov_unload (iobuf->ptr, holder->stub->args.writev.vector,
- holder->stub->args.writev.count);
- holder->stub->args.writev.vector[0].iov_base = iobuf->ptr;
-
- iobref_unref (holder->stub->args.writev.iobref);
- holder->stub->args.writev.iobref = iobref;
-
- iobuf_unref (iobuf);
+ stub = fop_fsync_stub (frame, wb_fsync_helper, fd, datasync, xdata);
+ if (!stub)
+ goto unwind;
- holder->flags.write_request.virgin = 0;
- }
+ if (!wb_enqueue (wb_inode, stub))
+ goto unwind;
- ptr = holder->stub->args.writev.vector[0].iov_base + holder->write_size;
+ wb_process_queue (wb_inode);
- iov_unload (ptr,
- request->stub->args.writev.vector,
- request->stub->args.writev.count);
+ return 0;
- holder->stub->args.writev.vector[0].iov_len += request->write_size;
- holder->write_size += request->write_size;
+unwind:
+ STACK_UNWIND_STRICT (fsync, frame, -1, op_errno, NULL, NULL, NULL);
- request->flags.write_request.stack_wound = 1;
- list_move_tail (&request->list, &request->file->passive_requests);
+ return 0;
- ret = 0;
-out:
- return ret;
+noqueue:
+ STACK_WIND (frame, default_fsync_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata);
+ return 0;
}
-/* this procedure assumes that write requests have only one vector to write */
-void
-__wb_collapse_write_bufs (list_head_t *requests, size_t page_size)
+int
+wb_stat_helper (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- off_t offset_expected = 0;
- size_t space_left = 0;
- wb_request_t *request = NULL, *tmp = NULL, *holder = NULL;
- int ret = 0;
+ STACK_WIND (frame, default_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
+}
- list_for_each_entry_safe (request, tmp, requests, list) {
- if ((request->stub == NULL)
- || (request->stub->fop != GF_FOP_WRITE)
- || (request->flags.write_request.stack_wound)) {
- holder = NULL;
- continue;
- }
- if (request->flags.write_request.write_behind) {
- if (holder == NULL) {
- holder = request;
- continue;
- }
+int
+wb_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
- offset_expected = holder->stub->args.writev.off
- + holder->write_size;
- if (request->stub->args.writev.off != offset_expected) {
- holder = request;
- continue;
- }
+ wb_inode = wb_inode_ctx_get (this, loc->inode);
+ if (!wb_inode)
+ goto noqueue;
- space_left = page_size - holder->write_size;
+ stub = fop_stat_stub (frame, wb_stat_helper, loc, xdata);
+ if (!stub)
+ goto unwind;
- if (space_left >= request->write_size) {
- ret = __wb_copy_into_holder (holder, request);
- if (ret != 0) {
- break;
- }
-
- __wb_request_unref (request);
- } else {
- holder = request;
- }
- } else {
- break;
- }
- }
+ if (!wb_enqueue (wb_inode, stub))
+ goto unwind;
- return;
+ wb_process_queue (wb_inode);
+
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT (stat, frame, -1, ENOMEM, NULL, NULL);
+
+ if (stub)
+ call_stub_destroy (stub);
+ return 0;
+
+noqueue:
+ STACK_WIND (frame, default_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
}
-int32_t
-wb_process_queue (call_frame_t *frame, wb_file_t *file)
+int
+wb_fstat_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- list_head_t winds, unwinds, other_requests;
- size_t size = 0;
- wb_conf_t *conf = NULL;
- uint32_t count = 0;
- int32_t ret = -1;
+ STACK_WIND (frame, default_fstat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+ return 0;
+}
- INIT_LIST_HEAD (&winds);
- INIT_LIST_HEAD (&unwinds);
- INIT_LIST_HEAD (&other_requests);
-
- if (file == NULL) {
- errno = EINVAL;
- goto out;
- }
- conf = file->this->private;
- size = conf->aggregate_size;
- LOCK (&file->lock);
- {
- /*
- * make sure requests are marked for unwinding and adjacent
- * continguous write buffers (each of size less than that of
- * an iobuf) are packed properly so that iobufs are filled to
- * their maximum capacity, before calling __wb_mark_winds.
- */
- __wb_mark_unwinds (&file->request, &unwinds);
+int
+wb_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
- __wb_collapse_write_bufs (&file->request,
- file->this->ctx->page_size);
- count = __wb_get_other_requests (&file->request,
- &other_requests);
+ wb_inode = wb_inode_ctx_get (this, fd->inode);
+ if (!wb_inode)
+ goto noqueue;
- if (count == 0) {
- __wb_mark_winds (&file->request, &winds, size,
- conf->enable_trickling_writes);
- }
+ stub = fop_fstat_stub (frame, wb_fstat_helper, fd, xdata);
+ if (!stub)
+ goto unwind;
- }
- UNLOCK (&file->lock);
+ if (!wb_enqueue (wb_inode, stub))
+ goto unwind;
- ret = wb_do_ops (frame, file, &winds, &unwinds, &other_requests);
+ wb_process_queue (wb_inode);
-out:
- return ret;
-}
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT (fstat, frame, -1, ENOMEM, NULL, NULL);
-int32_t
-wb_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
-{
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf);
+ if (stub)
+ call_stub_destroy (stub);
+ return 0;
+
+noqueue:
+ STACK_WIND (frame, default_fstat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
return 0;
}
int32_t
-wb_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
- int32_t count, off_t offset, struct iobref *iobref)
-{
- wb_file_t *file = NULL;
- char wb_disabled = 0;
- call_frame_t *process_frame = NULL;
- size_t size = 0;
- uint64_t tmp_file = 0;
- call_stub_t *stub = NULL;
- wb_local_t *local = NULL;
- wb_request_t *request = NULL;
- int32_t ret = -1;
- int32_t op_ret = -1, op_errno = EINVAL;
-
- if (vector != NULL)
- size = iov_length (vector, count);
-
- if ((!IA_ISDIR (fd->inode->ia_type))
- && fd_ctx_get (fd, this, &tmp_file)) {
- gf_log (this->name, GF_LOG_DEBUG, "write behind file pointer is"
- " not stored in context of fd(%p), returning EBADFD",
- fd);
-
- op_errno = EBADFD;
- goto unwind;
- }
+wb_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ GF_ASSERT (frame->local);
- file = (wb_file_t *)(long)tmp_file;
- if ((!IA_ISDIR (fd->inode->ia_type)) && (file == NULL)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "wb_file not found for fd %p", fd);
- op_errno = EBADFD;
- goto unwind;
- }
+ if (op_ret == 0)
+ wb_set_inode_size (frame->local, postbuf);
- if (file != NULL) {
- LOCK (&file->lock);
- {
- op_ret = file->op_ret;
- op_errno = file->op_errno;
-
- file->op_ret = 0;
-
- if ((op_ret == 0)
- && (file->disabled || file->disable_till)) {
- if (size > file->disable_till) {
- file->disable_till = 0;
- } else {
- file->disable_till -= size;
- }
- wb_disabled = 1;
- }
- }
- UNLOCK (&file->lock);
- } else {
- wb_disabled = 1;
- }
+ frame->local = NULL;
- if (op_ret == -1) {
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno,
- NULL, NULL);
- return 0;
- }
+ STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+ return 0;
+}
- if (wb_disabled) {
- STACK_WIND (frame, wb_writev_cbk,
- FIRST_CHILD (frame->this),
- FIRST_CHILD (frame->this)->fops->writev,
- fd, vector, count, offset, iobref);
- return 0;
- }
- process_frame = copy_frame (frame);
- if (process_frame == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
+int
+wb_truncate_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ off_t offset, dict_t *xdata)
+{
+ STACK_WIND (frame, wb_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ return 0;
+}
- local = GF_CALLOC (1, sizeof (*local),
- gf_wb_mt_wb_local_t);
- if (local == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- return 0;
- }
- frame->local = local;
- local->file = file;
+int
+wb_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
- stub = fop_writev_stub (frame, NULL, fd, vector, count, offset,
- iobref);
- if (stub == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ wb_inode = wb_inode_create (this, loc->inode);
+ if (!wb_inode)
+ goto unwind;
- request = wb_enqueue (file, stub);
- if (request == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
-
- ret = wb_process_queue (process_frame, file);
- if ((ret == -1) && (errno == ENOMEM)) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ frame->local = wb_inode;
- STACK_DESTROY (process_frame->root);
+ stub = fop_truncate_stub (frame, wb_truncate_helper, loc,
+ offset, xdata);
+ if (!stub)
+ goto unwind;
+
+ if (!wb_enqueue (wb_inode, stub))
+ goto unwind;
+
+ wb_process_queue (wb_inode);
return 0;
unwind:
- STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL);
-
- if (process_frame) {
- STACK_DESTROY (process_frame->root);
- }
+ STACK_UNWIND_STRICT (truncate, frame, -1, ENOMEM, NULL, NULL, NULL);
- if (stub) {
+ if (stub)
call_stub_destroy (stub);
- }
return 0;
}
int32_t
-wb_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iovec *vector, int32_t count,
- struct iatt *stbuf, struct iobref *iobref)
-{
- wb_local_t *local = NULL;
- wb_file_t *file = NULL;
- wb_request_t *request = NULL;
- int32_t ret = 0;
-
- local = frame->local;
- file = local->file;
- request = local->request;
-
- if ((request != NULL) && (file != NULL)) {
- wb_request_unref (request);
-
- ret = wb_process_queue (frame, file);
- if ((ret == -1) && (errno == ENOMEM)) {
- op_ret = -1;
- op_errno = ENOMEM;
- }
- }
+wb_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ GF_ASSERT (frame->local);
- STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count, stbuf, iobref);
+ if (op_ret == 0)
+ wb_set_inode_size (frame->local, postbuf);
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
return 0;
}
-static int32_t
-wb_readv_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
-{
- STACK_WIND (frame,
- wb_readv_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv,
- fd, size, offset);
-
+int
+wb_ftruncate_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, dict_t *xdata)
+{
+ STACK_WIND (frame, wb_ftruncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
return 0;
}
-int32_t
-wb_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
-{
- wb_file_t *file = NULL;
- wb_local_t *local = NULL;
- uint64_t tmp_file = 0;
- call_stub_t *stub = NULL;
- int32_t ret = -1;
- wb_request_t *request = NULL;
-
- if ((!IA_ISDIR (fd->inode->ia_type))
- && fd_ctx_get (fd, this, &tmp_file)) {
- gf_log (this->name, GF_LOG_DEBUG, "write behind file pointer is"
- " not stored in context of fd(%p), returning EBADFD",
- fd);
-
- STACK_UNWIND_STRICT (readv, frame, -1, EBADFD,
- NULL, 0, NULL, NULL);
- return 0;
+int
+wb_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
+ int32_t op_errno = 0;
+
+ wb_inode = wb_inode_create (this, fd->inode);
+ if (!wb_inode) {
+ op_errno = ENOMEM;
+ goto unwind;
}
- file = (wb_file_t *)(long)tmp_file;
+ frame->local = wb_inode;
- local = GF_CALLOC (1, sizeof (*local),
- gf_wb_mt_wb_local_t);
- if (local == NULL) {
- STACK_UNWIND_STRICT (readv, frame, -1, ENOMEM,
- NULL, 0, NULL, NULL);
- return 0;
+ stub = fop_ftruncate_stub (frame, wb_ftruncate_helper, fd,
+ offset, xdata);
+ if (!stub) {
+ op_errno = ENOMEM;
+ goto unwind;
}
- local->file = file;
+ if (!wb_enqueue (wb_inode, stub)) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
- frame->local = local;
- if (file) {
- stub = fop_readv_stub (frame, wb_readv_helper, fd, size,
- offset);
- if (stub == NULL) {
- STACK_UNWIND_STRICT (readv, frame, -1, ENOMEM,
- NULL, 0, NULL, NULL);
- return 0;
- }
+ wb_process_queue (wb_inode);
- request = wb_enqueue (file, stub);
- if (request == NULL) {
- STACK_UNWIND_STRICT (readv, frame, -1, ENOMEM,
- NULL, 0, NULL, NULL);
- call_stub_destroy (stub);
- return 0;
- }
+ return 0;
- ret = wb_process_queue (frame, file);
- if ((ret == -1) && (errno == ENOMEM)) {
- STACK_UNWIND_STRICT (readv, frame, -1, ENOMEM,
- NULL, 0, NULL, NULL);
- call_stub_destroy (stub);
- return 0;
- }
+unwind:
+ frame->local = NULL;
- } else {
- STACK_WIND (frame,
- wb_readv_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv,
- fd, size, offset);
- }
+ STACK_UNWIND_STRICT (ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
+ if (stub)
+ call_stub_destroy (stub);
return 0;
}
-int32_t
-wb_ffr_bg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+int
+wb_setattr_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- STACK_DESTROY (frame->root);
+ STACK_WIND (frame, default_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
return 0;
}
-int32_t
-wb_ffr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno)
+int
+wb_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- wb_local_t *local = NULL;
- wb_file_t *file = NULL;
- wb_conf_t *conf = NULL;
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
- conf = this->private;
- local = frame->local;
- file = local->file;
+ wb_inode = wb_inode_ctx_get (this, loc->inode);
+ if (!wb_inode)
+ goto noqueue;
- if (file != NULL) {
- LOCK (&file->lock);
- {
- if (file->op_ret == -1) {
- op_ret = file->op_ret;
- op_errno = file->op_errno;
+ stub = fop_setattr_stub (frame, wb_setattr_helper, loc, stbuf,
+ valid, xdata);
+ if (!stub)
+ goto unwind;
- file->op_ret = 0;
- }
- }
- UNLOCK (&file->lock);
- }
-
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno);
+ if (!wb_enqueue (wb_inode, stub))
+ goto unwind;
- return 0;
-}
+ wb_process_queue (wb_inode);
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT (setattr, frame, -1, ENOMEM, NULL, NULL, NULL);
-int32_t
-wb_flush_helper (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- wb_conf_t *conf = NULL;
- wb_local_t *local = NULL;
- wb_file_t *file = NULL;
- call_frame_t *flush_frame = NULL, *process_frame = NULL;
- int32_t op_ret = -1, op_errno = -1, ret = -1;
+ if (stub)
+ call_stub_destroy (stub);
+ return 0;
- conf = this->private;
+noqueue:
+ STACK_WIND (frame, default_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
+ return 0;
+}
- local = frame->local;
- file = local->file;
- LOCK (&file->lock);
- {
- op_ret = file->op_ret;
- op_errno = file->op_errno;
- }
- UNLOCK (&file->lock);
+int
+wb_fsetattr_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ STACK_WIND (frame, default_fsetattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
+ return 0;
+}
- if (local && local->request) {
- process_frame = copy_frame (frame);
- if (process_frame == NULL) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto unwind;
- }
- wb_request_unref (local->request);
- }
-
- if (conf->flush_behind) {
- flush_frame = copy_frame (frame);
- if (flush_frame == NULL) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto unwind;
- }
+int
+wb_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
- STACK_WIND (flush_frame,
- wb_ffr_bg_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush,
- fd);
- } else {
- STACK_WIND (frame,
- wb_ffr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush,
- fd);
- }
+ wb_inode = wb_inode_ctx_get (this, fd->inode);
+ if (!wb_inode)
+ goto noqueue;
- if (process_frame != NULL) {
- ret = wb_process_queue (process_frame, file);
- if ((ret == -1) && (errno == ENOMEM)) {
- STACK_DESTROY (process_frame->root);
- goto unwind;
- }
+ stub = fop_fsetattr_stub (frame, wb_fsetattr_helper, fd, stbuf,
+ valid, xdata);
+ if (!stub)
+ goto unwind;
- STACK_DESTROY (process_frame->root);
- }
+ if (!wb_enqueue (wb_inode, stub))
+ goto unwind;
- if (conf->flush_behind) {
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno);
- }
+ wb_process_queue (wb_inode);
return 0;
-
unwind:
- STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL);
+
+ if (stub)
+ call_stub_destroy (stub);
+ return 0;
+
+noqueue:
+ STACK_WIND (frame, default_fsetattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
return 0;
}
int32_t
-wb_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
+wb_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
- wb_conf_t *conf = NULL;
- wb_file_t *file = NULL;
- wb_local_t *local = NULL;
- uint64_t tmp_file = 0;
- call_stub_t *stub = NULL;
- call_frame_t *flush_frame = NULL;
- wb_request_t *request = NULL;
- int32_t ret = 0;
+ wb_inode_t *wb_inode = NULL;
- conf = this->private;
+ wb_inode = wb_inode_create (this, fd->inode);
+ if (!wb_inode)
+ goto unwind;
- if ((!IA_ISDIR (fd->inode->ia_type))
- && fd_ctx_get (fd, this, &tmp_file)) {
- gf_log (this->name, GF_LOG_DEBUG, "write behind file pointer is"
- " not stored in context of fd(%p), returning EBADFD",
- fd);
+ if (((flags & O_RDWR) || (flags & O_WRONLY)) && (flags & O_TRUNC))
+ wb_inode->size = 0;
- STACK_UNWIND_STRICT (flush, frame, -1, EBADFD);
- return 0;
- }
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, loc, flags, mode,
+ umask, fd, xdata);
+ return 0;
- file = (wb_file_t *)(long)tmp_file;
+unwind:
+ STACK_UNWIND_STRICT (create, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
+ NULL, NULL);
+ return 0;
+}
- if (file != NULL) {
- local = GF_CALLOC (1, sizeof (*local), gf_wb_mt_wb_local_t);
- if (local == NULL) {
- STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM);
- return 0;
- }
- local->file = file;
+int32_t
+wb_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ wb_inode_t *wb_inode = NULL;
- frame->local = local;
+ wb_inode = wb_inode_create (this, fd->inode);
+ if (!wb_inode)
+ goto unwind;
- stub = fop_flush_stub (frame, wb_flush_helper, fd);
- if (stub == NULL) {
- STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM);
- return 0;
- }
+ if (((flags & O_RDWR) || (flags & O_WRONLY)) && (flags & O_TRUNC))
+ wb_inode->size = 0;
- request = wb_enqueue (file, stub);
- if (request == NULL) {
- STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM);
- call_stub_destroy (stub);
- return 0;
- }
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
- ret = wb_process_queue (frame, file);
- if ((ret == -1) && (errno == ENOMEM)) {
- STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM);
- call_stub_destroy (stub);
- return 0;
- }
- } else {
- if (conf->flush_behind) {
- flush_frame = copy_frame (frame);
- if (flush_frame == NULL) {
- STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM);
- return 0;
- }
+unwind:
+ STACK_UNWIND_STRICT (open, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+}
- STACK_UNWIND_STRICT (flush, frame, 0, 0);
- STACK_WIND (flush_frame,
- wb_ffr_bg_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush,
- fd);
- } else {
- STACK_WIND (frame,
- wb_ffr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush,
- fd);
- }
+int32_t
+wb_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ if (op_ret == 0) {
+ wb_inode_t *wb_inode = wb_inode_ctx_get (this, inode);
+ if (wb_inode)
+ wb_set_inode_size (wb_inode, buf);
}
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf,
+ xdata, postparent);
return 0;
}
-static int32_t
-wb_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf)
+int32_t
+wb_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
{
- wb_local_t *local = NULL;
- wb_file_t *file = NULL;
- wb_request_t *request = NULL;
- int32_t ret = -1;
+ STACK_WIND (frame, wb_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ return 0;
+}
- local = frame->local;
- file = local->file;
- request = local->request;
- if (file != NULL) {
- LOCK (&file->lock);
- {
- if (file->op_ret == -1) {
- op_ret = file->op_ret;
- op_errno = file->op_errno;
+int
+wb_forget (xlator_t *this, inode_t *inode)
+{
+ uint64_t tmp = 0;
+ wb_inode_t *wb_inode = NULL;
- file->op_ret = 0;
- }
- }
- UNLOCK (&file->lock);
-
- if (request) {
- wb_request_unref (request);
- ret = wb_process_queue (frame, file);
- if ((ret == -1) && (errno == ENOMEM)) {
- op_ret = -1;
- op_errno = ENOMEM;
- }
- }
+ inode_ctx_del (inode, this, &tmp);
- }
+ wb_inode = (wb_inode_t *)(long)tmp;
+
+ if (!wb_inode)
+ return 0;
+
+ GF_ASSERT (list_empty (&wb_inode->todo));
+ GF_ASSERT (list_empty (&wb_inode->liability));
+ GF_ASSERT (list_empty (&wb_inode->temptation));
+
+ GF_FREE (wb_inode);
- STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf);
-
return 0;
}
-static int32_t
-wb_fsync_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t datasync)
+int
+wb_release (xlator_t *this, fd_t *fd)
{
- STACK_WIND (frame,
- wb_fsync_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsync,
- fd, datasync);
+ uint64_t tmp = 0;
+
+ fd_ctx_del (fd, this, &tmp);
+
return 0;
}
-int32_t
-wb_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync)
-{
- wb_file_t *file = NULL;
- wb_local_t *local = NULL;
- uint64_t tmp_file = 0;
- call_stub_t *stub = NULL;
- wb_request_t *request = NULL;
- int32_t ret = -1;
-
- if ((!IA_ISDIR (fd->inode->ia_type))
- && fd_ctx_get (fd, this, &tmp_file)) {
- gf_log (this->name, GF_LOG_DEBUG, "write behind file pointer is"
- " not stored in context of fd(%p), returning EBADFD",
- fd);
-
- STACK_UNWIND_STRICT (fsync, frame, -1, EBADFD, NULL, NULL);
- return 0;
- }
-
- file = (wb_file_t *)(long)tmp_file;
+int
+wb_priv_dump (xlator_t *this)
+{
+ wb_conf_t *conf = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, };
+ int ret = -1;
- local = GF_CALLOC (1, sizeof (*local),
- gf_wb_mt_wb_local_t);
- if (local == NULL) {
- STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM, NULL, NULL);
- return 0;
- }
+ GF_VALIDATE_OR_GOTO ("write-behind", this, out);
- local->file = file;
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
- frame->local = local;
+ gf_proc_dump_build_key (key_prefix, "xlator.performance.write-behind",
+ "priv");
- if (file) {
- stub = fop_fsync_stub (frame, wb_fsync_helper, fd, datasync);
- if (stub == NULL) {
- STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM,
- NULL, NULL);
- return 0;
- }
-
- request = wb_enqueue (file, stub);
- if (request == NULL) {
- STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM,
- NULL, NULL);
- call_stub_destroy (stub);
- return 0;
- }
+ gf_proc_dump_add_section (key_prefix);
- ret = wb_process_queue (frame, file);
- if ((ret == -1) && (errno == ENOMEM)) {
- STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM,
- NULL, NULL);
- call_stub_destroy (stub);
- return 0;
- }
-
- } else {
- STACK_WIND (frame,
- wb_fsync_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsync,
- fd, datasync);
- }
+ gf_proc_dump_write ("aggregate_size", "%d", conf->aggregate_size);
+ gf_proc_dump_write ("window_size", "%d", conf->window_size);
+ gf_proc_dump_write ("flush_behind", "%d", conf->flush_behind);
+ gf_proc_dump_write ("trickling_writes", "%d", conf->trickling_writes);
- return 0;
+ ret = 0;
+out:
+ return ret;
}
-int32_t
-wb_release (xlator_t *this, fd_t *fd)
+void
+__wb_dump_requests (struct list_head *head, char *prefix)
{
- uint64_t file_ptr = 0;
- wb_file_t *file = NULL;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0, };
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, }, flag = 0;
+ wb_request_t *req = NULL;
- fd_ctx_get (fd, this, &file_ptr);
- file = (wb_file_t *) (long) file_ptr;
+ list_for_each_entry (req, head, all) {
+ gf_proc_dump_build_key (key_prefix, key,
+ (char *)gf_fop_list[req->fop]);
- if (file != NULL) {
- LOCK (&file->lock);
- {
- assert (list_empty (&file->request));
- }
- UNLOCK (&file->lock);
+ gf_proc_dump_add_section(key_prefix);
- wb_file_destroy (file);
- }
+ gf_proc_dump_write ("request-ptr", "%p", req);
- return 0;
+ gf_proc_dump_write ("refcount", "%d", req->refcount);
+
+ if (list_empty (&req->todo))
+ gf_proc_dump_write ("wound", "yes");
+ else
+ gf_proc_dump_write ("wound", "no");
+
+ gf_proc_dump_write ("generation-number", "%d", req->gen);
+
+ gf_proc_dump_write ("req->op_ret", "%d", req->op_ret);
+ gf_proc_dump_write ("req->op_errno", "%d", req->op_errno);
+ gf_proc_dump_write ("sync-attempts", "%d", req->wind_count);
+
+ if (req->fop == GF_FOP_WRITE) {
+ if (list_empty (&req->wip))
+ gf_proc_dump_write ("sync-in-progress", "no");
+ else
+ gf_proc_dump_write ("sync-in-progress", "yes");
+
+ gf_proc_dump_write ("size", "%"GF_PRI_SIZET,
+ req->write_size);
+
+ gf_proc_dump_write ("offset", "%"PRId64,
+ req->stub->args.offset);
+
+ flag = req->ordering.lied;
+ gf_proc_dump_write ("lied", "%d", flag);
+
+ flag = req->ordering.append;
+ gf_proc_dump_write ("append", "%d", flag);
+
+ flag = req->ordering.fulfilled;
+ gf_proc_dump_write ("fulfilled", "%d", flag);
+
+ flag = req->ordering.go;
+ gf_proc_dump_write ("go", "%d", flag);
+
+ }
+ }
}
+
int
-wb_priv_dump (xlator_t *this)
+wb_inode_dump (xlator_t *this, inode_t *inode)
{
- wb_conf_t *conf = NULL;
- char key[GF_DUMP_MAX_BUF_LEN];
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
-
- if (!this)
- return -1;
+ wb_inode_t *wb_inode = NULL;
+ int32_t ret = -1;
+ char *path = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, };
+ char uuid_str[64] = {0,};
+
+ if ((inode == NULL) || (this == NULL)) {
+ ret = 0;
+ goto out;
+ }
- conf = this->private;
- if (!conf) {
- gf_log (this->name, GF_LOG_WARNING,
- "conf null in xlator");
- return -1;
+ wb_inode = wb_inode_ctx_get (this, inode);
+ if (wb_inode == NULL) {
+ ret = 0;
+ goto out;
}
- gf_proc_dump_build_key (key_prefix,
- "xlator.performance.write-behind",
- "priv");
+ gf_proc_dump_build_key (key_prefix, "xlator.performance.write-behind",
+ "wb_inode");
gf_proc_dump_add_section (key_prefix);
- gf_proc_dump_build_key (key, key_prefix, "aggregate_size");
- gf_proc_dump_write (key, "%d", conf->aggregate_size);
- gf_proc_dump_build_key (key, key_prefix, "window_size");
- gf_proc_dump_write (key, "%d", conf->window_size);
- gf_proc_dump_build_key (key, key_prefix, "disable_till");
- gf_proc_dump_write (key, "%d", conf->disable_till);
- gf_proc_dump_build_key (key, key_prefix, "enable_O_SYNC");
- gf_proc_dump_write (key, "%d", conf->enable_O_SYNC);
- gf_proc_dump_build_key (key, key_prefix, "flush_behind");
- gf_proc_dump_write (key, "%d", conf->flush_behind);
- gf_proc_dump_build_key (key, key_prefix, "enable_trickling_writes");
- gf_proc_dump_write (key, "%d", conf->enable_trickling_writes);
+ __inode_path (inode, NULL, &path);
+ if (path != NULL) {
+ gf_proc_dump_write ("path", "%s", path);
+ GF_FREE (path);
+ }
+
+ gf_proc_dump_write ("inode", "%p", inode);
- return 0;
+ gf_proc_dump_write ("window_conf", "%"GF_PRI_SIZET,
+ wb_inode->window_conf);
+
+ gf_proc_dump_write ("window_current", "%"GF_PRI_SIZET,
+ wb_inode->window_current);
+
+
+ gf_proc_dump_write ("transit-size", "%"GF_PRI_SIZET,
+ wb_inode->transit);
+
+ gf_proc_dump_write ("dontsync", "%d", wb_inode->dontsync);
+
+ ret = TRY_LOCK (&wb_inode->lock);
+ if (!ret)
+ {
+ if (!list_empty (&wb_inode->all)) {
+ __wb_dump_requests (&wb_inode->all, key_prefix);
+ }
+ UNLOCK (&wb_inode->lock);
+ }
+
+ if (ret && wb_inode)
+ gf_proc_dump_write ("Unable to dump the inode information",
+ "(Lock acquisition failed) %p (gfid: %s)",
+ wb_inode,
+ uuid_utoa_r (inode->gfid, uuid_str));
+ ret = 0;
+out:
+ return ret;
}
-int32_t
+
+int
mem_acct_init (xlator_t *this)
{
int ret = -1;
- if (!this)
- return ret;
+ if (!this) {
+ goto out;
+ }
ret = xlator_mem_acct_init (this, gf_wb_mt_end + 1);
if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ WRITE_BEHIND_MSG_NO_MEMORY,
+ "Memory accounting init"
"failed");
- return ret;
}
+out:
return ret;
}
-int32_t
-init (xlator_t *this)
+
+int
+reconfigure (xlator_t *this, dict_t *options)
{
- dict_t *options = NULL;
wb_conf_t *conf = NULL;
- char *str = NULL;
- int32_t ret = -1;
+ int ret = -1;
+
+ conf = this->private;
+
+ GF_OPTION_RECONF ("cache-size", conf->window_size, options, size_uint64,
+ out);
+
+ GF_OPTION_RECONF ("flush-behind", conf->flush_behind, options, bool,
+ out);
+
+ GF_OPTION_RECONF ("trickling-writes", conf->trickling_writes, options,
+ bool, out);
+
+ GF_OPTION_RECONF ("strict-O_DIRECT", conf->strict_O_DIRECT, options,
+ bool, out);
+
+ GF_OPTION_RECONF ("strict-write-ordering", conf->strict_write_ordering,
+ options, bool, out);
+ GF_OPTION_RECONF ("resync-failed-syncs-after-fsync",
+ conf->resync_after_fsync, options, bool, out);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int32_t
+init (xlator_t *this)
+{
+ wb_conf_t *conf = NULL;
+ int32_t ret = -1;
if ((this->children == NULL)
|| this->children->next) {
- gf_log (this->name, GF_LOG_ERROR,
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ WRITE_BEHIND_MSG_INIT_FAILED,
"FATAL: write-behind (%s) not configured with exactly "
- "one child",
- this->name);
- return -1;
+ "one child", this->name);
+ goto out;
}
if (this->parents == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile");
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ WRITE_BEHIND_MSG_VOL_MISCONFIGURED,
+ "dangling volume. check volfilex");
}
-
- options = this->options;
conf = GF_CALLOC (1, sizeof (*conf), gf_wb_mt_wb_conf_t);
if (conf == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "FATAL: Out of memory");
- return -1;
- }
-
- conf->enable_O_SYNC = _gf_false;
- ret = dict_get_str (options, "enable-O_SYNC",
- &str);
- if (ret == 0) {
- ret = gf_string2boolean (str,
- &conf->enable_O_SYNC);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "'enable-O_SYNC' takes only boolean arguments");
- return -1;
- }
+ goto out;
}
/* configure 'options aggregate-size <size>' */
conf->aggregate_size = WB_AGGREGATE_SIZE;
- conf->disable_till = 0;
- ret = dict_get_str (options, "disable-for-first-nbytes",
- &str);
- if (ret == 0) {
- ret = gf_string2bytesize (str,
- &conf->disable_till);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid number format \"%s\" of \"option "
- "disable-for-first-nbytes\"",
- str);
- return -1;
- }
- }
- gf_log (this->name, GF_LOG_DEBUG,
- "disabling write-behind for first %"PRIu64" bytes",
- conf->disable_till);
-
/* configure 'option window-size <size>' */
- conf->window_size = WB_WINDOW_SIZE;
- ret = dict_get_str (options, "cache-size",
- &str);
- if (ret == 0) {
- ret = gf_string2bytesize (str,
- &conf->window_size);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid number format \"%s\" of \"option "
- "window-size\"",
- str);
- GF_FREE (conf);
- return -1;
- }
- }
+ GF_OPTION_INIT ("cache-size", conf->window_size, size_uint64, out);
if (!conf->window_size && conf->aggregate_size) {
- gf_log (this->name, GF_LOG_WARNING,
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ WRITE_BEHIND_MSG_SIZE_NOT_SET,
"setting window-size to be equal to "
"aggregate-size(%"PRIu64")",
conf->aggregate_size);
@@ -2674,66 +2523,60 @@ init (xlator_t *this)
}
if (conf->window_size < conf->aggregate_size) {
- gf_log (this->name, GF_LOG_ERROR,
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ WRITE_BEHIND_MSG_EXCEEDED_MAX_SIZE,
"aggregate-size(%"PRIu64") cannot be more than "
- "window-size"
- "(%"PRIu64")", conf->aggregate_size, conf->window_size);
- GF_FREE (conf);
- return -1;
+ "window-size(%"PRIu64")", conf->aggregate_size,
+ conf->window_size);
+ goto out;
}
/* configure 'option flush-behind <on/off>' */
- conf->flush_behind = 1;
- ret = dict_get_str (options, "flush-behind",
- &str);
- if (ret == 0) {
- ret = gf_string2boolean (str,
- &conf->flush_behind);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "'flush-behind' takes only boolean arguments");
- return -1;
- }
+ GF_OPTION_INIT ("flush-behind", conf->flush_behind, bool, out);
- if (conf->flush_behind) {
- gf_log (this->name, GF_LOG_DEBUG,
- "enabling flush-behind");
- }
- }
+ GF_OPTION_INIT ("trickling-writes", conf->trickling_writes, bool, out);
- conf->enable_trickling_writes = _gf_true;
- ret = dict_get_str (options, "enable-trickling-writes",
- &str);
- if (ret == 0) {
- ret = gf_string2boolean (str,
- &conf->enable_trickling_writes);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "'enable-trickling_writes' takes only boolean"
- " arguments");
- return -1;
- }
- }
+ GF_OPTION_INIT ("strict-O_DIRECT", conf->strict_O_DIRECT, bool, out);
+
+ GF_OPTION_INIT ("strict-write-ordering", conf->strict_write_ordering,
+ bool, out);
+
+ GF_OPTION_INIT ("resync-failed-syncs-after-fsync",
+ conf->resync_after_fsync, bool, out);
this->private = conf;
- return 0;
+ ret = 0;
+
+out:
+ if (ret) {
+ GF_FREE (conf);
+ }
+ return ret;
}
void
fini (xlator_t *this)
{
- wb_conf_t *conf = this->private;
+ wb_conf_t *conf = NULL;
+ GF_VALIDATE_OR_GOTO ("write-behind", this, out);
+
+ conf = this->private;
+ if (!conf) {
+ goto out;
+ }
+
+ this->private = NULL;
GF_FREE (conf);
+
+out:
return;
}
struct xlator_fops fops = {
.writev = wb_writev,
- .open = wb_open,
- .create = wb_create,
.readv = wb_readv,
.flush = wb_flush,
.fsync = wb_fsync,
@@ -2742,36 +2585,67 @@ struct xlator_fops fops = {
.truncate = wb_truncate,
.ftruncate = wb_ftruncate,
.setattr = wb_setattr,
+ .fsetattr = wb_fsetattr,
};
struct xlator_cbks cbks = {
+ .forget = wb_forget,
.release = wb_release
};
+
struct xlator_dumpops dumpops = {
.priv = wb_priv_dump,
+ .inodectx = wb_inode_dump,
};
+
struct volume_options options[] = {
- { .key = {"flush-behind"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"cache-size", "window-size"},
- .type = GF_OPTION_TYPE_SIZET,
- .min = 512 * GF_UNIT_KB,
- .max = 1 * GF_UNIT_GB
+ { .key = {"flush-behind"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "If this option is set ON, instructs write-behind "
+ "translator to perform flush in background, by "
+ "returning success (or any errors, if any of "
+ "previous writes were failed) to application even "
+ "before flush FOP is sent to backend filesystem. "
},
- { .key = {"disable-for-first-nbytes"},
+ { .key = {"cache-size", "window-size"},
.type = GF_OPTION_TYPE_SIZET,
- .min = 1,
- .max = 1 * GF_UNIT_MB,
+ .min = 512 * GF_UNIT_KB,
+ .max = 1 * GF_UNIT_GB,
+ .default_value = "1MB",
+ .description = "Size of the write-behind buffer for a single file "
+ "(inode)."
+ },
+ { .key = {"trickling-writes"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
},
- { .key = {"enable-O_SYNC"},
+ { .key = {"strict-O_DIRECT"},
.type = GF_OPTION_TYPE_BOOL,
- },
- { .key = {"enable-trickling-writes"},
+ .default_value = "off",
+ .description = "This option when set to off, ignores the "
+ "O_DIRECT flag."
+ },
+ { .key = {"strict-write-ordering"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Do not let later writes overtake earlier writes even "
+ "if they do not overlap",
+ },
+ { .key = {"resync-failed-syncs-after-fsync"},
.type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "If sync of \"cached-writes issued before fsync\" "
+ "(to backend) fails, this option configures whether "
+ "to retry syncing them after fsync or forget them. "
+ "If set to on, cached-writes are retried "
+ "till a \"flush\" fop (or a successful sync) on sync "
+ "failures. "
+ "fsync itself is failed irrespective of the value of "
+ "this option. ",
},
{ .key = {NULL} },
};
diff --git a/xlators/playground/Makefile.am b/xlators/playground/Makefile.am
new file mode 100644
index 00000000000..e7de6b31aff
--- /dev/null
+++ b/xlators/playground/Makefile.am
@@ -0,0 +1,2 @@
+SUBDIRS = template
+CLEANFILES =
diff --git a/xlators/playground/template/Makefile.am b/xlators/playground/template/Makefile.am
new file mode 100644
index 00000000000..f2689244371
--- /dev/null
+++ b/xlators/playground/template/Makefile.am
@@ -0,0 +1,2 @@
+SUBDIRS = src
+
diff --git a/xlators/playground/template/src/Makefile.am b/xlators/playground/template/src/Makefile.am
new file mode 100644
index 00000000000..ef88d4d48bb
--- /dev/null
+++ b/xlators/playground/template/src/Makefile.am
@@ -0,0 +1,16 @@
+xlator_LTLIBRARIES = template.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/features
+
+template_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+template_la_SOURCES = template.c
+template_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = template.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
+
diff --git a/xlators/playground/template/src/template.c b/xlators/playground/template/src/template.c
new file mode 100644
index 00000000000..c4db42debd0
--- /dev/null
+++ b/xlators/playground/template/src/template.c
@@ -0,0 +1,44 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "template.h"
+
+int32_t
+init (xlator_t *this)
+{
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "not configured with exactly one child. exiting");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ return 0;
+}
+
+void
+fini (xlator_t *this)
+{
+ return;
+}
+
+struct xlator_fops fops = {
+};
+
+struct xlator_cbks cbks = {
+};
+
+struct volume_options options[] = {
+ { .key = {NULL} },
+};
diff --git a/xlators/playground/template/src/template.h b/xlators/playground/template/src/template.h
new file mode 100644
index 00000000000..2e9752cac09
--- /dev/null
+++ b/xlators/playground/template/src/template.h
@@ -0,0 +1,19 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef __TEMPLATE_H__
+#define __TEMPLATE_H__
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "defaults.h"
+
+#endif /* __TEMPLATE_H__ */
diff --git a/xlators/protocol/Makefile.am b/xlators/protocol/Makefile.am
index 17c9a8313f8..91b03b1416a 100644
--- a/xlators/protocol/Makefile.am
+++ b/xlators/protocol/Makefile.am
@@ -1 +1 @@
-SUBDIRS = lib auth legacy client server
+SUBDIRS = auth client server
diff --git a/xlators/protocol/auth/addr/src/Makefile.am b/xlators/protocol/auth/addr/src/Makefile.am
index ebf20b38a84..426e7c2fb36 100644
--- a/xlators/protocol/auth/addr/src/Makefile.am
+++ b/xlators/protocol/auth/addr/src/Makefile.am
@@ -1,12 +1,14 @@
auth_LTLIBRARIES = addr.la
authdir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/auth
-addr_la_LDFLAGS = -module -avoidversion
+addr_la_LDFLAGS = -module -avoid-version
addr_la_SOURCES = addr.c
-addr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
- $(top_builddir)/xlators/protocol/lib/src/libgfproto1.la
+addr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) \
- -I$(top_srcdir)/xlators/protocol/lib/src
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
+ -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/xlators/protocol/server/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src/
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
diff --git a/xlators/protocol/auth/addr/src/addr.c b/xlators/protocol/auth/addr/src/addr.c
index 729233fa182..6965da01b7a 100644
--- a/xlators/protocol/auth/addr/src/addr.c
+++ b/xlators/protocol/auth/addr/src/addr.c
@@ -1,33 +1,20 @@
/*
- Copyright (c) 2007-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
#include <fnmatch.h>
#include <sys/socket.h>
#include <netdb.h>
#include "authenticate.h"
#include "dict.h"
+#include "rpc-transport.h"
#define ADDR_DELIMITER " ,"
#define PRIVILEGED_PORT_CEILING 1024
@@ -36,196 +23,203 @@
#define AF_INET_SDP 27
#endif
-/* TODO: duplicate declaration */
-typedef struct peer_info {
- struct sockaddr_storage sockaddr;
- socklen_t sockaddr_len;
- char identifier[UNIX_PATH_MAX];
-}peer_info_t;
-
-auth_result_t
+auth_result_t
gf_auth (dict_t *input_params, dict_t *config_params)
{
- int ret = 0;
- char *name = NULL;
- char *searchstr = NULL;
- char peer_addr[UNIX_PATH_MAX];
- data_t *peer_info_data = NULL;
- peer_info_t *peer_info = NULL;
- data_t *allow_addr = NULL, *reject_addr = NULL;
- char is_inet_sdp = 0;
-
- name = data_to_str (dict_get (input_params, "remote-subvolume"));
- if (!name) {
- gf_log ("authenticate/addr",
- GF_LOG_ERROR,
- "remote-subvolume not specified");
- return AUTH_DONT_CARE;
- }
-
- ret = asprintf (&searchstr, "auth.addr.%s.allow", name);
- if (-1 == ret) {
- gf_log ("auth/addr", GF_LOG_ERROR,
- "asprintf failed while setting search string");
- return AUTH_DONT_CARE;
- }
- allow_addr = dict_get (config_params,
- searchstr);
- free (searchstr);
-
- ret = asprintf (&searchstr, "auth.addr.%s.reject", name);
- if (-1 == ret) {
- gf_log ("auth/addr", GF_LOG_ERROR,
- "asprintf failed while setting search string");
- return AUTH_DONT_CARE;
- }
- reject_addr = dict_get (config_params,
- searchstr);
- free (searchstr);
-
- if (!allow_addr) {
- /* TODO: backword compatibility */
- ret = asprintf (&searchstr, "auth.ip.%s.allow", name);
- if (-1 == ret) {
- gf_log ("auth/addr", GF_LOG_ERROR,
- "asprintf failed while setting search string");
- return AUTH_DONT_CARE;
- }
- allow_addr = dict_get (config_params, searchstr);
- free (searchstr);
- }
-
- if (!(allow_addr || reject_addr)) {
- gf_log ("auth/addr", GF_LOG_DEBUG,
- "none of the options auth.addr.%s.allow or "
- "auth.addr.%s.reject specified, returning auth_dont_care",
- name, name);
- return AUTH_DONT_CARE;
- }
-
- peer_info_data = dict_get (input_params, "peer-info");
- if (!peer_info_data) {
- gf_log ("authenticate/addr",
- GF_LOG_ERROR,
- "peer-info not present");
- return AUTH_DONT_CARE;
- }
-
- peer_info = data_to_ptr (peer_info_data);
-
- switch (((struct sockaddr *) &peer_info->sockaddr)->sa_family)
- {
- case AF_INET_SDP:
- is_inet_sdp = 1;
- ((struct sockaddr *) &peer_info->sockaddr)->sa_family = AF_INET;
-
- case AF_INET:
- case AF_INET6:
- {
- char *service;
- uint16_t peer_port;
- strcpy (peer_addr, peer_info->identifier);
- service = strrchr (peer_addr, ':');
- *service = '\0';
- service ++;
-
- if (is_inet_sdp) {
- ((struct sockaddr *) &peer_info->sockaddr)->sa_family = AF_INET_SDP;
- }
-
- peer_port = atoi (service);
- if (peer_port >= PRIVILEGED_PORT_CEILING) {
- gf_log ("auth/addr", GF_LOG_ERROR,
- "client is bound to port %d which is not privileged",
- peer_port);
- return AUTH_DONT_CARE;
- }
- break;
-
- case AF_UNIX:
- strcpy (peer_addr, peer_info->identifier);
- break;
-
- default:
- gf_log ("authenticate/addr", GF_LOG_ERROR,
- "unknown address family %d",
- ((struct sockaddr *) &peer_info->sockaddr)->sa_family);
- return AUTH_DONT_CARE;
- }
- }
-
- if (reject_addr) {
- char *addr_str = NULL;
- char *tmp;
- char *addr_cpy = strdup (reject_addr->data);
-
- addr_str = strtok_r (addr_cpy, ADDR_DELIMITER, &tmp);
-
- while (addr_str) {
- char negate = 0, match =0;
- gf_log (name, GF_LOG_DEBUG,
- "rejected = \"%s\", received addr = \"%s\"",
- addr_str, peer_addr);
- if (addr_str[0] == '!') {
- negate = 1;
- addr_str++;
- }
-
- match = fnmatch (addr_str,
- peer_addr,
- 0);
- if (negate ? match : !match) {
- free (addr_cpy);
- return AUTH_REJECT;
- }
- addr_str = strtok_r (NULL, ADDR_DELIMITER, &tmp);
- }
- free (addr_cpy);
- }
-
- if (allow_addr) {
- char *addr_str = NULL;
- char *tmp;
- char *addr_cpy = strdup (allow_addr->data);
-
- addr_str = strtok_r (addr_cpy, ADDR_DELIMITER, &tmp);
-
- while (addr_str) {
- char negate = 0, match = 0;
- gf_log (name, GF_LOG_DEBUG,
- "allowed = \"%s\", received addr = \"%s\"",
- addr_str, peer_addr);
- if (addr_str[0] == '!') {
- negate = 1;
- addr_str++;
- }
-
- match = fnmatch (addr_str,
- peer_addr,
- 0);
-
- if (negate ? match : !match) {
- free (addr_cpy);
- return AUTH_ACCEPT;
- }
- addr_str = strtok_r (NULL, ADDR_DELIMITER, &tmp);
- }
- free (addr_cpy);
- }
-
- return AUTH_DONT_CARE;
+ auth_result_t result = AUTH_DONT_CARE;
+ int ret = 0;
+ char *name = NULL;
+ char *searchstr = NULL;
+ peer_info_t *peer_info = NULL;
+ data_t *peer_info_data = NULL;
+ data_t *allow_addr = NULL;
+ data_t *reject_addr = NULL;
+ char *addr_str = NULL;
+ char *tmp = NULL;
+ char *addr_cpy = NULL;
+ char *service = NULL;
+ uint16_t peer_port = 0;
+ char is_inet_sdp = 0;
+ char negate = 0;
+ char match = 0;
+ char peer_addr[UNIX_PATH_MAX];
+ char *type = NULL;
+ gf_boolean_t allow_insecure = _gf_false;
+
+ name = data_to_str (dict_get (input_params, "remote-subvolume"));
+ if (!name) {
+ gf_log ("authenticate/addr", GF_LOG_DEBUG,
+ "remote-subvolume not specified");
+ goto out;
+ }
+
+ ret = gf_asprintf (&searchstr, "auth.addr.%s.allow", name);
+ if (-1 == ret) {
+ gf_log ("auth/addr", GF_LOG_DEBUG,
+ "asprintf failed while setting search string");
+ goto out;
+ }
+
+ allow_addr = dict_get (config_params, searchstr);
+ GF_FREE (searchstr);
+
+ ret = gf_asprintf (&searchstr, "auth.addr.%s.reject", name);
+ if (-1 == ret) {
+ gf_log ("auth/addr", GF_LOG_ERROR,
+ "asprintf failed while setting search string");
+ goto out;
+ }
+ reject_addr = dict_get (config_params, searchstr);
+ GF_FREE (searchstr);
+
+ if (!allow_addr) {
+ /* TODO: backword compatibility */
+ ret = gf_asprintf (&searchstr, "auth.ip.%s.allow", name);
+ if (-1 == ret) {
+ gf_log ("auth/addr", GF_LOG_ERROR,
+ "asprintf failed while setting search string");
+ goto out;
+ }
+ allow_addr = dict_get (config_params, searchstr);
+ GF_FREE (searchstr);
+ }
+
+ if (!(allow_addr || reject_addr)) {
+ gf_log ("auth/addr", GF_LOG_DEBUG,
+ "none of the options auth.addr.%s.allow or "
+ "auth.addr.%s.reject specified, returning auth_dont_care",
+ name, name);
+ goto out;
+ }
+
+ peer_info_data = dict_get (input_params, "peer-info");
+ if (!peer_info_data) {
+ gf_log ("auth/addr", GF_LOG_ERROR,
+ "peer-info not present");
+ goto out;
+ }
+
+ peer_info = data_to_ptr (peer_info_data);
+
+ switch (((struct sockaddr *) &peer_info->sockaddr)->sa_family)
+ {
+ case AF_INET_SDP:
+ is_inet_sdp = 1;
+ ((struct sockaddr *) &peer_info->sockaddr)->sa_family = AF_INET;
+
+ case AF_INET:
+ case AF_INET6:
+ {
+ strcpy (peer_addr, peer_info->identifier);
+ service = strrchr (peer_addr, ':');
+ *service = '\0';
+ service ++;
+
+ if (is_inet_sdp) {
+ ((struct sockaddr *) &peer_info->sockaddr)->sa_family = AF_INET_SDP;
+ }
+
+ ret = dict_get_str (config_params, "rpc-auth-allow-insecure",
+ &type);
+ if (ret == 0) {
+ ret = gf_string2boolean (type, &allow_insecure);
+ if (ret < 0) {
+ gf_log ("auth/addr", GF_LOG_WARNING,
+ "rpc-auth-allow-insecure option %s "
+ "is not a valid bool option", type);
+ goto out;
+ }
+ }
+
+ peer_port = atoi (service);
+ if (peer_port >= PRIVILEGED_PORT_CEILING && !allow_insecure) {
+ gf_log ("auth/addr", GF_LOG_ERROR,
+ "client is bound to port %d which is not privileged",
+ peer_port);
+ goto out;
+ }
+ break;
+
+ case AF_UNIX:
+ strcpy (peer_addr, peer_info->identifier);
+ break;
+
+ default:
+ gf_log ("authenticate/addr", GF_LOG_ERROR,
+ "unknown address family %d",
+ ((struct sockaddr *) &peer_info->sockaddr)->sa_family);
+ goto out;
+ }
+ }
+
+ if (reject_addr) {
+ addr_cpy = gf_strdup (reject_addr->data);
+ if (!addr_cpy)
+ goto out;
+
+ addr_str = strtok_r (addr_cpy, ADDR_DELIMITER, &tmp);
+
+ while (addr_str) {
+ gf_log (name, GF_LOG_DEBUG,
+ "rejected = \"%s\", received addr = \"%s\"",
+ addr_str, peer_addr);
+ if (addr_str[0] == '!') {
+ negate = 1;
+ addr_str++;
+ }
+
+ match = fnmatch (addr_str, peer_addr, 0);
+ if (negate ? match : !match) {
+ result = AUTH_REJECT;
+ goto out;
+ }
+ addr_str = strtok_r (NULL, ADDR_DELIMITER, &tmp);
+ }
+ GF_FREE (addr_cpy);
+ addr_cpy = NULL;
+ }
+
+ if (allow_addr) {
+ addr_cpy = gf_strdup (allow_addr->data);
+ if (!addr_cpy)
+ goto out;
+
+ addr_str = strtok_r (addr_cpy, ADDR_DELIMITER, &tmp);
+
+ while (addr_str) {
+ gf_log (name, GF_LOG_DEBUG,
+ "allowed = \"%s\", received addr = \"%s\"",
+ addr_str, peer_addr);
+ if (addr_str[0] == '!') {
+ negate = 1;
+ addr_str++;
+ }
+
+ match = fnmatch (addr_str, peer_addr, 0);
+ if (negate ? match : !match) {
+ result = AUTH_ACCEPT;
+ goto out;
+ }
+ addr_str = strtok_r (NULL, ADDR_DELIMITER, &tmp);
+ }
+ }
+
+out:
+ GF_FREE (addr_cpy);
+
+ return result;
}
struct volume_options options[] = {
- { .key = {"auth.addr.*.allow"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"auth.addr.*.reject"},
- .type = GF_OPTION_TYPE_ANY
- },
- /* Backword compatibility */
- { .key = {"auth.ip.*.allow"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {NULL} }
+ { .key = {"auth.addr.*.allow"},
+ .type = GF_OPTION_TYPE_INTERNET_ADDRESS_LIST
+ },
+ { .key = {"auth.addr.*.reject"},
+ .type = GF_OPTION_TYPE_INTERNET_ADDRESS_LIST
+ },
+ /* Backword compatibility */
+ { .key = {"auth.ip.*.allow"},
+ .type = GF_OPTION_TYPE_INTERNET_ADDRESS_LIST
+ },
+ { .key = {NULL} }
};
diff --git a/xlators/protocol/auth/login/src/Makefile.am b/xlators/protocol/auth/login/src/Makefile.am
index b3b625b6e52..d84db91c4e1 100644
--- a/xlators/protocol/auth/login/src/Makefile.am
+++ b/xlators/protocol/auth/login/src/Makefile.am
@@ -1,13 +1,12 @@
auth_LTLIBRARIES = login.la
authdir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/auth
-login_la_LDFLAGS = -module -avoidversion
+login_la_LDFLAGS = -module -avoid-version
login_la_SOURCES = login.c
-login_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
- $(top_builddir)/xlators/protocol/lib/src/libgfproto1.la
+login_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/xlators/protocol/server/src
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) \
- -I$(top_srcdir)/xlators/protocol/lib/src
+AM_CFLAGS = -Wall $(GF_CFLAGS)
diff --git a/xlators/protocol/auth/login/src/login.c b/xlators/protocol/auth/login/src/login.c
index 0c85292f717..e799dd22c1f 100644
--- a/xlators/protocol/auth/login/src/login.c
+++ b/xlators/protocol/auth/login/src/login.c
@@ -1,114 +1,164 @@
/*
- Copyright (c) 2007-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <fnmatch.h>
#include "authenticate.h"
auth_result_t gf_auth (dict_t *input_params, dict_t *config_params)
{
- int ret = 0;
- char *username = NULL, *password = NULL;
- data_t *allow_user = NULL, *username_data = NULL, *password_data = NULL;
- int32_t result = AUTH_DONT_CARE;
- char *brick_name = NULL, *searchstr = NULL;
-
- username_data = dict_get (input_params, "username");
- if (!username_data)
- return AUTH_DONT_CARE;
-
- username = data_to_str (username_data);
-
- password_data = dict_get (input_params, "password");
- if (!password_data)
- return AUTH_DONT_CARE;
-
- password = data_to_str (password_data);
-
- brick_name = data_to_str (dict_get (input_params, "remote-subvolume"));
- if (!brick_name) {
- gf_log ("auth/login",
- GF_LOG_ERROR,
- "remote-subvolume not specified");
- return AUTH_REJECT;
- }
-
- ret = asprintf (&searchstr, "auth.login.%s.allow", brick_name);
- if (-1 == ret) {
- gf_log ("auth/login", GF_LOG_ERROR,
- "asprintf failed while setting search string");
- return AUTH_DONT_CARE;
- }
-
- allow_user = dict_get (config_params,
- searchstr);
- free (searchstr);
-
- if (allow_user) {
- char *username_str = NULL;
- char *tmp;
- char *username_cpy = strdup (allow_user->data);
-
- username_str = strtok_r (username_cpy, " ,", &tmp);
-
- while (username_str) {
- data_t *passwd_data = NULL;
- if (!fnmatch (username_str,
- username,
- 0)) {
- ret = asprintf (&searchstr, "auth.login.%s.password", username);
+ auth_result_t result = AUTH_DONT_CARE;
+ int ret = 0;
+ data_t *allow_user = NULL;
+ data_t *username_data = NULL;
+ data_t *passwd_data = NULL;
+ data_t *password_data = NULL;
+ char *username = NULL;
+ char *password = NULL;
+ char *brick_name = NULL;
+ char *searchstr = NULL;
+ char *username_str = NULL;
+ char *tmp = NULL;
+ char *username_cpy = NULL;
+ gf_boolean_t using_ssl = _gf_false;
+
+ username_data = dict_get (input_params, "ssl-name");
+ if (username_data) {
+ gf_log ("auth/login", GF_LOG_INFO,
+ "connecting user name: %s", username_data->data);
+ using_ssl = _gf_true;
+ }
+ else {
+ username_data = dict_get (input_params, "username");
+ if (!username_data) {
+ gf_log ("auth/login", GF_LOG_DEBUG,
+ "username not found, returning DONT-CARE");
+ goto out;
+ }
+ password_data = dict_get (input_params, "password");
+ if (!password_data) {
+ gf_log ("auth/login", GF_LOG_WARNING,
+ "password not found, returning DONT-CARE");
+ goto out;
+ }
+ password = data_to_str (password_data);
+ }
+ username = data_to_str (username_data);
+
+ brick_name = data_to_str (dict_get (input_params, "remote-subvolume"));
+ if (!brick_name) {
+ gf_log ("auth/login", GF_LOG_ERROR,
+ "remote-subvolume not specified");
+ result = AUTH_REJECT;
+ goto out;
+ }
+
+ ret = gf_asprintf (&searchstr, "auth.login.%s.%s", brick_name,
+ using_ssl ? "ssl-allow" : "allow");
if (-1 == ret) {
- gf_log ("auth/login", GF_LOG_ERROR,
- "asprintf failed while setting search string");
- return AUTH_DONT_CARE;
+ gf_log ("auth/login", GF_LOG_WARNING,
+ "asprintf failed while setting search string, "
+ "returning DONT-CARE");
+ goto out;
+ }
+
+ allow_user = dict_get (config_params, searchstr);
+ GF_FREE (searchstr);
+
+ if (allow_user) {
+ gf_log ("auth/login", GF_LOG_INFO,
+ "allowed user names: %s", allow_user->data);
+ /*
+ * There's a subtle difference between SSL and non-SSL behavior
+ * if we can't match anything in the "while" loop below.
+ * Intuitively, we should AUTH_REJECT if there's no match.
+ * However, existing code depends on allowing untrusted users
+ * to connect with *no credentials at all* by falling through
+ * the loop. They're still distinguished from trusted users
+ * who do provide a valid username and password (in fact that's
+ * pretty much the only thing we use non-SSL login auth for),
+ * but they are allowed to connect. It's wrong, but it's not
+ * worth changing elsewhere. Therefore, we do the sane thing
+ * only for SSL here.
+ *
+ * For SSL, if there's a list *you must be on it*. Note that
+ * if there's no list we don't care. In that case (and the
+ * ssl-allow=* case as well) authorization is effectively
+ * disabled, though authentication and encryption are still
+ * active.
+ */
+ if (using_ssl) {
+ result = AUTH_REJECT;
+ }
+ username_cpy = gf_strdup (allow_user->data);
+ if (!username_cpy)
+ goto out;
+
+ username_str = strtok_r (username_cpy, " ,", &tmp);
+
+ /*
+ * We have to match a user's *authenticated* name to one in the
+ * list. If we're using SSL, they're already authenticated.
+ * Otherwise, they need a matching password to complete the
+ * process.
+ */
+ while (username_str) {
+ if (!fnmatch (username_str, username, 0)) {
+ if (using_ssl) {
+ result = AUTH_ACCEPT;
+ break;
+ }
+ ret = gf_asprintf (&searchstr,
+ "auth.login.%s.password",
+ username);
+ if (-1 == ret) {
+ gf_log ("auth/login", GF_LOG_WARNING,
+ "asprintf failed while setting search string");
+ goto out;
+ }
+ passwd_data = dict_get (config_params, searchstr);
+ GF_FREE (searchstr);
+
+ if (!passwd_data) {
+ gf_log ("auth/login", GF_LOG_ERROR,
+ "wrong username/password combination");
+ result = AUTH_REJECT;
+ goto out;
+ }
+
+ result = !((strcmp (data_to_str (passwd_data),
+ password)) ?
+ AUTH_ACCEPT :
+ AUTH_REJECT);
+ if (result == AUTH_REJECT)
+ gf_log ("auth/login", GF_LOG_ERROR,
+ "wrong password for user %s",
+ username);
+
+ break;
+ }
+ username_str = strtok_r (NULL, " ,", &tmp);
+ }
}
- passwd_data = dict_get (config_params, searchstr);
- FREE (searchstr);
-
- if (!passwd_data) {
- gf_log ("auth/login",
- GF_LOG_DEBUG,
- "wrong username/password combination");
- result = AUTH_REJECT;
- }
- else
- result = !strcmp (data_to_str (passwd_data), password) ? AUTH_ACCEPT : AUTH_REJECT;
- break;
- }
- username_str = strtok_r (NULL, " ,", &tmp);
- }
- free (username_cpy);
- }
-
- return result;
+
+out:
+ GF_FREE (username_cpy);
+
+ return result;
}
struct volume_options options[] = {
- { .key = {"auth.login.*.allow"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"auth.login.*.password"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {NULL} }
+ { .key = {"auth.login.*.allow"},
+ .type = GF_OPTION_TYPE_ANY
+ },
+ { .key = {"auth.login.*.password"},
+ .type = GF_OPTION_TYPE_ANY
+ },
+ { .key = {NULL} }
};
diff --git a/xlators/protocol/client/src/Makefile.am b/xlators/protocol/client/src/Makefile.am
index 7811e088a7e..75acbc9e877 100644
--- a/xlators/protocol/client/src/Makefile.am
+++ b/xlators/protocol/client/src/Makefile.am
@@ -2,15 +2,19 @@
xlator_LTLIBRARIES = client.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/protocol
-client_la_LDFLAGS = -module -avoidversion
+client_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
client_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
$(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
- $(top_builddir)/xlators/protocol/lib/src/libgfproto1.la
+ $(top_builddir)/rpc/xdr/src/libgfxdr.la
-client_la_SOURCES = client.c client-helpers.c client3_1-fops.c client-handshake.c
-noinst_HEADERS = client.h client-mem-types.h
+client_la_SOURCES = client.c client-helpers.c client-rpc-fops.c \
+ client-handshake.c client-callback.c client-lk.c client-common.c
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) \
- -I$(top_srcdir)/xlators/protocol/lib/src -I$(top_srcdir)/rpc/rpc-lib/src/
+noinst_HEADERS = client.h client-mem-types.h client-messages.h client-common.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
+ -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_srcdir)/rpc/rpc-lib/src/
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
diff --git a/xlators/protocol/client/src/client-callback.c b/xlators/protocol/client/src/client-callback.c
new file mode 100644
index 00000000000..134044015e4
--- /dev/null
+++ b/xlators/protocol/client/src/client-callback.c
@@ -0,0 +1,195 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "client.h"
+#include "rpc-clnt.h"
+#include "defaults.h"
+#include "client-messages.h"
+
+int
+client_cbk_null (struct rpc_clnt *rpc, void *mydata, void *data)
+{
+ gf_msg (THIS->name, GF_LOG_WARNING, 0, PC_MSG_FUNCTION_CALL_ERROR,
+ "this function should not be called");
+ return 0;
+}
+
+int
+client_cbk_fetchspec (struct rpc_clnt *rpc, void *mydata, void *data)
+{
+ gf_msg (THIS->name, GF_LOG_WARNING, 0, PC_MSG_FUNCTION_CALL_ERROR,
+ "this function should not be called");
+ return 0;
+}
+
+int
+client_cbk_ino_flush (struct rpc_clnt *rpc, void *mydata, void *data)
+{
+ gf_msg (THIS->name, GF_LOG_WARNING, 0, PC_MSG_FUNCTION_CALL_ERROR,
+ "this function should not be called");
+ return 0;
+}
+
+int
+client_cbk_recall_lease (struct rpc_clnt *rpc, void *mydata, void *data)
+{
+ int ret = -1;
+ struct iovec *iov = NULL;
+ struct gf_upcall upcall_data = {0,};
+ uuid_t gfid;
+ struct gf_upcall_recall_lease rl_data = {0,};
+ gfs3_recall_lease_req recall_lease = {{0,},};
+
+ GF_VALIDATE_OR_GOTO ("client-callback", rpc, out);
+ GF_VALIDATE_OR_GOTO ("client-callback", mydata, out);
+ GF_VALIDATE_OR_GOTO ("client-callback", data, out);
+
+ iov = (struct iovec *)data;
+ ret = xdr_to_generic (*iov, &recall_lease,
+ (xdrproc_t)xdr_gfs3_recall_lease_req);
+
+ if (ret < 0) {
+ gf_msg (THIS->name, GF_LOG_WARNING, -ret,
+ PC_MSG_RECALL_LEASE_FAIL,
+ "XDR decode of recall lease failed.");
+ goto out;
+ }
+
+ upcall_data.data = &rl_data;
+ ret = gf_proto_recall_lease_to_upcall (&recall_lease, &upcall_data);
+ if (ret < 0)
+ goto out;
+
+ upcall_data.event_type = GF_UPCALL_RECALL_LEASE;
+
+ gf_msg_trace (THIS->name, 0, "Upcall gfid = %s, ret = %d",
+ recall_lease.gfid, ret);
+
+ default_notify (THIS, GF_EVENT_UPCALL, &upcall_data);
+
+out:
+ if (recall_lease.xdata.xdata_val)
+ free (recall_lease.xdata.xdata_val);
+
+ if (rl_data.dict)
+ dict_unref (rl_data.dict);
+
+ return ret;
+}
+
+
+int
+client_cbk_cache_invalidation (struct rpc_clnt *rpc, void *mydata, void *data)
+{
+ int ret = -1;
+ struct iovec *iov = NULL;
+ struct gf_upcall upcall_data = {0,};
+ uuid_t gfid;
+ struct gf_upcall_cache_invalidation ca_data = {0,};
+ gfs3_cbk_cache_invalidation_req ca_req = {0,};
+
+ gf_msg_trace (THIS->name, 0, "Upcall callback is called");
+
+ if (!rpc || !mydata || !data)
+ goto out;
+
+ iov = (struct iovec *)data;
+ ret = xdr_to_generic (*iov, &ca_req,
+ (xdrproc_t)xdr_gfs3_cbk_cache_invalidation_req);
+
+ if (ret < 0) {
+ gf_msg (THIS->name, GF_LOG_WARNING, -ret,
+ PC_MSG_CACHE_INVALIDATION_FAIL,
+ "XDR decode of cache_invalidation failed.");
+ goto out;
+ }
+
+ upcall_data.data = &ca_data;
+ ret = gf_proto_cache_invalidation_to_upcall (THIS, &ca_req,
+ &upcall_data);
+ if (ret < 0)
+ goto out;
+
+ gf_msg_trace (THIS->name, 0, "Cache invalidation cbk recieved for gfid:"
+ " %s, ret = %d", ca_req.gfid, ret);
+
+ default_notify (THIS, GF_EVENT_UPCALL, &upcall_data);
+
+out:
+ if (ca_req.gfid)
+ free (ca_req.gfid);
+
+ if (ca_req.xdata.xdata_val)
+ free (ca_req.xdata.xdata_val);
+
+ if (ca_data.dict)
+ dict_unref (ca_data.dict);
+
+ return 0;
+}
+
+int
+client_cbk_child_up (struct rpc_clnt *rpc, void *mydata, void *data)
+{
+ clnt_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, rpc, out);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
+
+ gf_msg_debug (this->name, 0, "Received CHILD_UP");
+ conf->child_up = _gf_true;
+
+ this->notify (this, GF_EVENT_CHILD_UP, NULL);
+out:
+ return 0;
+}
+
+int
+client_cbk_child_down (struct rpc_clnt *rpc, void *mydata, void *data)
+{
+ clnt_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, rpc, out);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
+
+ gf_msg_debug (this->name, 0, "Received CHILD_DOWN");
+ conf->child_up = _gf_false;
+
+ this->notify (this, GF_EVENT_CHILD_DOWN, NULL);
+out:
+ return 0;
+}
+
+rpcclnt_cb_actor_t gluster_cbk_actors[GF_CBK_MAXVALUE] = {
+ [GF_CBK_NULL] = {"NULL", GF_CBK_NULL, client_cbk_null },
+ [GF_CBK_FETCHSPEC] = {"FETCHSPEC", GF_CBK_FETCHSPEC, client_cbk_fetchspec },
+ [GF_CBK_INO_FLUSH] = {"INO_FLUSH", GF_CBK_INO_FLUSH, client_cbk_ino_flush },
+ [GF_CBK_CACHE_INVALIDATION] = {"CACHE_INVALIDATION", GF_CBK_CACHE_INVALIDATION, client_cbk_cache_invalidation },
+ [GF_CBK_CHILD_UP] = {"CHILD_UP", GF_CBK_CHILD_UP, client_cbk_child_up },
+ [GF_CBK_CHILD_DOWN] = {"CHILD_DOWN", GF_CBK_CHILD_DOWN, client_cbk_child_down },
+ [GF_CBK_RECALL_LEASE] = {"RECALL_LEASE", GF_CBK_RECALL_LEASE, client_cbk_recall_lease },
+};
+
+
+struct rpcclnt_cb_program gluster_cbk_prog = {
+ .progname = "GlusterFS Callback",
+ .prognum = GLUSTER_CBK_PROGRAM,
+ .progver = GLUSTER_CBK_VERSION,
+ .actors = gluster_cbk_actors,
+ .numactors = GF_CBK_MAXVALUE,
+};
diff --git a/xlators/protocol/client/src/client-common.c b/xlators/protocol/client/src/client-common.c
new file mode 100644
index 00000000000..51c2d95ea2b
--- /dev/null
+++ b/xlators/protocol/client/src/client-common.c
@@ -0,0 +1,2162 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "dict.h"
+#include "xlator.h"
+#include "rpc-common-xdr.h"
+#include "glusterfs3-xdr.h"
+#include "glusterfs3.h"
+#include "client.h"
+
+/* processing to be done before fops are woudn down */
+int
+client_pre_stat (xlator_t *this, gfs3_stat_req *req, loc_t *loc,
+ dict_t *xdata)
+{
+ int op_errno = ESTALE;
+
+ if (!(loc && loc->inode))
+ goto out;
+
+ if (!gf_uuid_is_null (loc->inode->gfid))
+ memcpy (req->gfid, loc->inode->gfid, 16);
+ else
+ memcpy (req->gfid, loc->gfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !gf_uuid_is_null (*((uuid_t *)req->gfid)),
+ out, op_errno, EINVAL);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_readlink (xlator_t *this, gfs3_readlink_req *req, loc_t *loc,
+ size_t size, dict_t *xdata)
+{
+ int op_errno = ESTALE;
+
+ if (!(loc && loc->inode))
+ goto out;
+
+ if (!gf_uuid_is_null (loc->inode->gfid))
+ memcpy (req->gfid, loc->inode->gfid, 16);
+ else
+ memcpy (req->gfid, loc->gfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !gf_uuid_is_null (*((uuid_t *)req->gfid)),
+ out, op_errno, EINVAL);
+ req->size = size;
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_mknod (xlator_t *this, gfs3_mknod_req *req, loc_t *loc,
+ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ int op_errno = ESTALE;
+
+ if (!(loc && loc->parent))
+ goto out;
+
+ if (!gf_uuid_is_null (loc->parent->gfid))
+ memcpy (req->pargfid, loc->parent->gfid, 16);
+ else
+ memcpy (req->pargfid, loc->pargfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !gf_uuid_is_null (*((uuid_t *)req->pargfid)),
+ out, op_errno, EINVAL);
+ req->bname = (char *)loc->name;
+ req->mode = mode;
+ req->dev = rdev;
+ req->umask = umask;
+
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_mkdir (xlator_t *this, gfs3_mkdir_req *req, loc_t *loc,
+ mode_t mode, mode_t umask, dict_t *xdata)
+{
+ int op_errno = ESTALE;
+
+ if (!(loc && loc->parent))
+ goto out;
+
+ if (!gf_uuid_is_null (loc->parent->gfid))
+ memcpy (req->pargfid, loc->parent->gfid, 16);
+ else
+ memcpy (req->pargfid, loc->pargfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !gf_uuid_is_null (*((uuid_t *)req->pargfid)),
+ out, op_errno, EINVAL);
+
+ req->bname = (char *)loc->name;
+ req->mode = mode;
+ req->umask = umask;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_unlink (xlator_t *this, gfs3_unlink_req *req, loc_t *loc,
+ int32_t flags, dict_t *xdata)
+{
+ int op_errno = 0;
+
+ if (!(loc && loc->parent))
+ goto out;
+
+ if (!gf_uuid_is_null (loc->parent->gfid))
+ memcpy (req->pargfid, loc->parent->gfid, 16);
+ else
+ memcpy (req->pargfid, loc->pargfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !gf_uuid_is_null (*((uuid_t *)req->pargfid)),
+ out, op_errno, EINVAL);
+ req->bname = (char *)loc->name;
+ req->xflags = flags;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_rmdir (xlator_t *this, gfs3_rmdir_req *req, loc_t *loc,
+ int32_t flags, dict_t *xdata)
+{
+ int op_errno = ESTALE;
+
+ if (!(loc && loc->parent))
+ goto out;
+
+ if (!gf_uuid_is_null (loc->parent->gfid))
+ memcpy (req->pargfid, loc->parent->gfid, 16);
+ else
+ memcpy (req->pargfid, loc->pargfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !gf_uuid_is_null (*((uuid_t *)req->pargfid)),
+ out, op_errno, EINVAL);
+ req->bname = (char *)loc->name;
+ req->xflags = flags;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_symlink (xlator_t *this, gfs3_symlink_req *req, loc_t *loc,
+ const char *linkname, mode_t umask, dict_t *xdata)
+{
+ int op_errno = ESTALE;
+
+ if (!(loc && loc->parent))
+ goto out;
+
+ if (!gf_uuid_is_null (loc->parent->gfid))
+ memcpy (req->pargfid, loc->parent->gfid, 16);
+ else
+ memcpy (req->pargfid, loc->pargfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !gf_uuid_is_null (*((uuid_t *)req->pargfid)),
+ out, op_errno, EINVAL);
+ req->linkname = (char *)linkname;
+ req->bname = (char *)loc->name;
+ req->umask = umask;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_rename (xlator_t *this, gfs3_rename_req *req, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ int op_errno = ESTALE;
+
+ if (!(oldloc && newloc && oldloc->parent &&
+ newloc->parent))
+ goto out;
+
+ if (!gf_uuid_is_null (oldloc->parent->gfid))
+ memcpy (req->oldgfid, oldloc->parent->gfid, 16);
+ else
+ memcpy (req->oldgfid, oldloc->pargfid, 16);
+
+ if (!gf_uuid_is_null (newloc->parent->gfid))
+ memcpy (req->newgfid, newloc->parent->gfid, 16);
+ else
+ memcpy (req->newgfid, newloc->pargfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !gf_uuid_is_null (*((uuid_t *)req->oldgfid)),
+ out, op_errno, EINVAL);
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !gf_uuid_is_null (*((uuid_t *)req->newgfid)),
+ out, op_errno, EINVAL);
+ req->oldbname = (char *)oldloc->name;
+ req->newbname = (char *)newloc->name;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_link (xlator_t *this,
+ gfs3_link_req *req, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ int op_errno = ESTALE;
+
+ if (!(oldloc && oldloc->inode && newloc &&
+ newloc->parent))
+ goto out;
+
+ if (!gf_uuid_is_null (oldloc->inode->gfid))
+ memcpy (req->oldgfid, oldloc->inode->gfid, 16);
+ else
+ memcpy (req->oldgfid, oldloc->gfid, 16);
+
+ if (!gf_uuid_is_null (newloc->parent->gfid))
+ memcpy (req->newgfid, newloc->parent->gfid, 16);
+ else
+ memcpy (req->newgfid, newloc->pargfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !gf_uuid_is_null (*((uuid_t *)req->oldgfid)),
+ out, op_errno, EINVAL);
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !gf_uuid_is_null (*((uuid_t *)req->newgfid)),
+ out, op_errno, EINVAL);
+ req->newbname = (char *)newloc->name;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_truncate (xlator_t *this, gfs3_truncate_req *req,
+ loc_t *loc, off_t offset, dict_t *xdata)
+{
+ int op_errno = ESTALE;
+
+ if (!(loc && loc->inode))
+ goto out;
+
+ if (!gf_uuid_is_null (loc->inode->gfid))
+ memcpy (req->gfid, loc->inode->gfid, 16);
+ else
+ memcpy (req->gfid, loc->gfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !gf_uuid_is_null (*((uuid_t *)req->gfid)),
+ out, op_errno, EINVAL);
+ req->offset = offset;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_open (xlator_t *this, gfs3_open_req *req, loc_t *loc, fd_t *fd,
+ int32_t flags, dict_t *xdata)
+{
+ int op_errno = ESTALE;
+
+ if (!(loc && loc->inode))
+ goto out;
+
+ if (!gf_uuid_is_null (loc->inode->gfid))
+ memcpy (req->gfid, loc->inode->gfid, 16);
+ else
+ memcpy (req->gfid, loc->gfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !gf_uuid_is_null (*((uuid_t *)req->gfid)),
+ out, op_errno, EINVAL);
+ req->flags = gf_flags_from_flags (flags);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_readv (xlator_t *this, gfs3_read_req *req, fd_t *fd, size_t size,
+ off_t offset, int32_t flags, dict_t *xdata)
+{
+ int64_t remote_fd = -1;
+ int op_errno = ESTALE;
+
+ CLIENT_GET_REMOTE_FD (this, fd, FALLBACK_TO_ANON_FD,
+ remote_fd, op_errno, out);
+
+ req->size = size;
+ req->offset = offset;
+ req->fd = remote_fd;
+ req->flag = flags;
+
+ memcpy (req->gfid, fd->inode->gfid, 16);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_writev (xlator_t *this, gfs3_write_req *req,
+ fd_t *fd, size_t size, off_t offset, int32_t flags,
+ dict_t *xdata)
+{
+ int64_t remote_fd = -1;
+ int op_errno = ESTALE;
+
+ CLIENT_GET_REMOTE_FD (this, fd, FALLBACK_TO_ANON_FD,
+ remote_fd, op_errno, out);
+
+ req->size = size;
+ req->offset = offset;
+ req->fd = remote_fd;
+ req->flag = flags;
+
+ memcpy (req->gfid, fd->inode->gfid, 16);
+
+#ifdef GF_TESTING_IO_XDATA
+ if (!xdata)
+ xdata = dict_new ();
+
+ ret = dict_set_str (xdata, "testing-the-xdata-key",
+ "testing-the-xdata-value");
+#endif
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_statfs (xlator_t *this, gfs3_statfs_req *req, loc_t *loc,
+ dict_t *xdata)
+{
+ int op_errno = ESTALE;
+
+ if (!loc)
+ goto out;
+
+ if (loc->inode) {
+ if (!gf_uuid_is_null (loc->inode->gfid))
+ memcpy (req->gfid, loc->inode->gfid, 16);
+ else
+ memcpy (req->gfid, loc->gfid, 16);
+ } else {
+ req->gfid[15] = 1;
+ }
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !gf_uuid_is_null (*((uuid_t *)req->gfid)),
+ out, op_errno, EINVAL);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_flush (xlator_t *this, gfs3_flush_req *req, fd_t *fd, dict_t *xdata)
+{
+ int64_t remote_fd = -1;
+ int op_errno = ESTALE;
+ clnt_local_t *local = NULL;
+
+ CLIENT_GET_REMOTE_FD (this, fd, DEFAULT_REMOTE_FD,
+ remote_fd, op_errno, out);
+
+ req->fd = remote_fd;
+ memcpy (req->gfid, fd->inode->gfid, 16);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_fsync (xlator_t *this, gfs3_fsync_req *req, fd_t *fd,
+ int32_t flags, dict_t *xdata)
+{
+ int64_t remote_fd = -1;
+ int op_errno = 0;
+
+ CLIENT_GET_REMOTE_FD (this, fd, DEFAULT_REMOTE_FD,
+ remote_fd, op_errno, out);
+
+ req->fd = remote_fd;
+ req->data = flags;
+ memcpy (req->gfid, fd->inode->gfid, 16);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_setxattr (xlator_t *this, gfs3_setxattr_req *req, loc_t *loc,
+ dict_t *xattr, int32_t flags, dict_t *xdata)
+{
+ int op_errno = ESTALE;
+
+ if (!(loc && loc->inode))
+ goto out;
+
+ if (!gf_uuid_is_null (loc->inode->gfid))
+ memcpy (req->gfid, loc->inode->gfid, 16);
+ else
+ memcpy (req->gfid, loc->gfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !gf_uuid_is_null (*((uuid_t *)req->gfid)),
+ out, op_errno, EINVAL);
+ if (xattr) {
+ GF_PROTOCOL_DICT_SERIALIZE (this, xattr,
+ (&req->dict.dict_val),
+ req->dict.dict_len,
+ op_errno, out);
+ }
+
+ req->flags = flags;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_getxattr (xlator_t *this, gfs3_getxattr_req *req, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ int op_errno = ESTALE;
+
+ if (!loc) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ if (loc->inode && !gf_uuid_is_null (loc->inode->gfid))
+ memcpy (req->gfid, loc->inode->gfid, 16);
+ else
+ memcpy (req->gfid, loc->gfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !gf_uuid_is_null (*((uuid_t *)req->gfid)),
+ out, op_errno, EINVAL);
+ req->namelen = 1; /* Use it as a flag */
+
+ req->name = (char *)name;
+ if (!req->name) {
+ req->name = "";
+ req->namelen = 0;
+ }
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_removexattr (xlator_t *this, gfs3_removexattr_req *req,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ int op_errno = ESTALE;
+
+ if (!(loc && loc->inode))
+ goto out;
+
+ if (!gf_uuid_is_null (loc->inode->gfid))
+ memcpy (req->gfid, loc->inode->gfid, 16);
+ else
+ memcpy (req->gfid, loc->gfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !gf_uuid_is_null (*((uuid_t *)req->gfid)),
+ out, op_errno, EINVAL);
+ req->name = (char *)name;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_opendir (xlator_t *this,
+ gfs3_opendir_req *req, loc_t *loc,
+ fd_t *fd, dict_t *xdata)
+{
+ int op_errno = ESTALE;
+
+ if (!(loc && loc->inode))
+ goto out;
+
+ if (!gf_uuid_is_null (loc->inode->gfid))
+ memcpy (req->gfid, loc->inode->gfid, 16);
+ else
+ memcpy (req->gfid, loc->gfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !gf_uuid_is_null (*((uuid_t *)req->gfid)),
+ out, op_errno, EINVAL);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_fsyncdir (xlator_t *this, gfs3_fsyncdir_req *req, fd_t *fd,
+ int32_t flags, dict_t *xdata)
+{
+ int32_t op_errno = ESTALE;
+ int64_t remote_fd = -1;
+
+ CLIENT_GET_REMOTE_FD (this, fd, DEFAULT_REMOTE_FD,
+ remote_fd, op_errno, out);
+
+ req->fd = remote_fd;
+ req->data = flags;
+ memcpy (req->gfid, fd->inode->gfid, 16);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_access (xlator_t *this, gfs3_access_req *req, loc_t *loc,
+ int32_t mask, dict_t *xdata)
+{
+ int op_errno = ESTALE;
+
+ if (!(loc && loc->inode))
+ goto out;
+
+ if (!gf_uuid_is_null (loc->inode->gfid))
+ memcpy (req->gfid, loc->inode->gfid, 16);
+ else
+ memcpy (req->gfid, loc->gfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !gf_uuid_is_null (*((uuid_t *)req->gfid)),
+ out, op_errno, EINVAL);
+ req->mask = mask;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_create (xlator_t *this, gfs3_create_req *req,
+ loc_t *loc, fd_t *fd, mode_t mode,
+ int32_t flags, mode_t umask, dict_t *xdata)
+{
+ int op_errno = ESTALE;
+
+ if (!(loc && loc->parent))
+ goto out;
+
+ if (!gf_uuid_is_null (loc->parent->gfid))
+ memcpy (req->pargfid, loc->parent->gfid, 16);
+ else
+ memcpy (req->pargfid, loc->pargfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !gf_uuid_is_null (*((uuid_t *)req->pargfid)),
+ out, op_errno, EINVAL);
+ req->bname = (char *)loc->name;
+ req->mode = mode;
+ req->flags = gf_flags_from_flags (flags);
+ req->umask = umask;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_ftruncate (xlator_t *this, gfs3_ftruncate_req *req, fd_t *fd,
+ off_t offset, dict_t *xdata)
+{
+ int64_t remote_fd = -1;
+ int op_errno = EINVAL;
+
+ CLIENT_GET_REMOTE_FD (this, fd, DEFAULT_REMOTE_FD,
+ remote_fd, op_errno, out);
+
+ req->offset = offset;
+ req->fd = remote_fd;
+ memcpy (req->gfid, fd->inode->gfid, 16);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_fstat (xlator_t *this, gfs3_fstat_req *req, fd_t *fd,
+ dict_t *xdata)
+{
+ int64_t remote_fd = -1;
+ int op_errno = ESTALE;
+
+ CLIENT_GET_REMOTE_FD (this, fd, DEFAULT_REMOTE_FD,
+ remote_fd, op_errno, out);
+
+ req->fd = remote_fd;
+ memcpy (req->gfid, fd->inode->gfid, 16);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_lk (xlator_t *this, gfs3_lk_req *req,
+ int32_t cmd, struct gf_flock *flock, fd_t *fd, dict_t *xdata)
+{
+ int64_t remote_fd = -1;
+ int op_errno = ESTALE;
+ int32_t gf_cmd = 0;
+ int32_t gf_type = 0;
+ int ret = 0;
+
+ CLIENT_GET_REMOTE_FD (this, fd, DEFAULT_REMOTE_FD,
+ remote_fd, op_errno, out);
+
+ ret = client_cmd_to_gf_cmd (cmd, &gf_cmd);
+ if (ret) {
+ op_errno = EINVAL;
+ gf_msg (this->name, GF_LOG_WARNING, EINVAL,
+ PC_MSG_INVALID_ENTRY, "Unknown cmd (%d)!", gf_cmd);
+ goto out;
+ }
+
+ switch (flock->l_type) {
+ case F_RDLCK:
+ gf_type = GF_LK_F_RDLCK;
+ break;
+ case F_WRLCK:
+ gf_type = GF_LK_F_WRLCK;
+ break;
+ case F_UNLCK:
+ gf_type = GF_LK_F_UNLCK;
+ break;
+ }
+
+ req->fd = remote_fd;
+ req->cmd = gf_cmd;
+ req->type = gf_type;
+ gf_proto_flock_from_flock (&req->flock, flock);
+
+ memcpy (req->gfid, fd->inode->gfid, 16);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_lookup (xlator_t *this, gfs3_lookup_req *req, loc_t *loc,
+ dict_t *xdata)
+{
+ int op_errno = ESTALE;
+
+ if (!(loc && loc->inode))
+ goto out;
+
+ if ((loc->parent) && (!gf_uuid_is_null (loc->parent->gfid)))
+ memcpy (req->pargfid, loc->parent->gfid, 16);
+ else
+ memcpy (req->pargfid, loc->pargfid, 16);
+
+ if ((loc->inode) && (!gf_uuid_is_null (loc->inode->gfid)))
+ memcpy (req->gfid, loc->inode->gfid, 16);
+ else
+ memcpy (req->gfid, loc->gfid, 16);
+
+
+ if (loc->name)
+ req->bname = (char *)loc->name;
+ else
+ req->bname = "";
+
+ if (xdata) {
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata,
+ (&req->xdata.xdata_val),
+ req->xdata.xdata_len,
+ op_errno, out);
+ }
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_readdir (xlator_t *this, gfs3_readdir_req *req, fd_t *fd,
+ size_t size, off_t offset, dict_t *xdata)
+{
+ int64_t remote_fd = -1;
+ int op_errno = ESTALE;
+
+ CLIENT_GET_REMOTE_FD (this, fd, DEFAULT_REMOTE_FD,
+ remote_fd, op_errno, out);
+
+ req->size = size;
+ req->offset = offset;
+ req->fd = remote_fd;
+
+ memcpy (req->gfid, fd->inode->gfid, 16);
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_inodelk (xlator_t *this, gfs3_inodelk_req *req, loc_t *loc,
+ int cmd, struct gf_flock *flock, const char *volume,
+ dict_t *xdata)
+{
+ int op_errno = ESTALE;
+ int32_t gf_cmd = 0;
+ int32_t gf_type = 0;
+
+ if (!(loc && loc->inode))
+ goto out;
+
+ if (!gf_uuid_is_null (loc->gfid))
+ memcpy (req->gfid, loc->gfid, 16);
+ else
+ memcpy (req->gfid, loc->inode->gfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !gf_uuid_is_null (*((uuid_t *)req->gfid)),
+ out, op_errno, EINVAL);
+ if (cmd == F_GETLK || cmd == F_GETLK64)
+ gf_cmd = GF_LK_GETLK;
+ else if (cmd == F_SETLK || cmd == F_SETLK64)
+ gf_cmd = GF_LK_SETLK;
+ else if (cmd == F_SETLKW || cmd == F_SETLKW64)
+ gf_cmd = GF_LK_SETLKW;
+ else {
+ gf_msg (this->name, GF_LOG_WARNING, EINVAL,
+ PC_MSG_INVALID_ENTRY, "Unknown cmd (%d)!", gf_cmd);
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ switch (flock->l_type) {
+ case F_RDLCK:
+ gf_type = GF_LK_F_RDLCK;
+ break;
+ case F_WRLCK:
+ gf_type = GF_LK_F_WRLCK;
+ break;
+ case F_UNLCK:
+ gf_type = GF_LK_F_UNLCK;
+ break;
+ }
+
+ req->volume = (char *)volume;
+ req->cmd = gf_cmd;
+ req->type = gf_type;
+ gf_proto_flock_from_flock (&req->flock, flock);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_finodelk (xlator_t *this, gfs3_finodelk_req *req, fd_t *fd,
+ int cmd, struct gf_flock *flock, const char *volume,
+ dict_t *xdata)
+{
+ int op_errno = ESTALE;
+ int64_t remote_fd = -1;
+ int32_t gf_type = 0;
+ int32_t gf_cmd = 0;
+
+ CLIENT_GET_REMOTE_FD (this, fd, FALLBACK_TO_ANON_FD,
+ remote_fd, op_errno, out);
+
+ if (cmd == F_GETLK || cmd == F_GETLK64)
+ gf_cmd = GF_LK_GETLK;
+ else if (cmd == F_SETLK || cmd == F_SETLK64)
+ gf_cmd = GF_LK_SETLK;
+ else if (cmd == F_SETLKW || cmd == F_SETLKW64)
+ gf_cmd = GF_LK_SETLKW;
+ else {
+ gf_msg (this->name, GF_LOG_WARNING, EINVAL,
+ PC_MSG_INVALID_ENTRY, "Unknown cmd (%d)!", gf_cmd);
+ goto out;
+ }
+
+ switch (flock->l_type) {
+ case F_RDLCK:
+ gf_type = GF_LK_F_RDLCK;
+ break;
+ case F_WRLCK:
+ gf_type = GF_LK_F_WRLCK;
+ break;
+ case F_UNLCK:
+ gf_type = GF_LK_F_UNLCK;
+ break;
+ }
+
+ req->volume = (char *)volume;
+ req->fd = remote_fd;
+ req->cmd = gf_cmd;
+ req->type = gf_type;
+ gf_proto_flock_from_flock (&req->flock, flock);
+ memcpy (req->gfid, fd->inode->gfid, 16);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_entrylk (xlator_t *this, gfs3_entrylk_req *req, loc_t *loc,
+ entrylk_cmd cmd_entrylk, entrylk_type type,
+ const char *volume, const char *basename, dict_t *xdata)
+{
+ int op_errno = ESTALE;
+
+ if (!(loc && loc->inode))
+ goto out;
+
+ if (!gf_uuid_is_null (loc->gfid))
+ memcpy (req->gfid, loc->gfid, 16);
+ else
+ memcpy (req->gfid, loc->inode->gfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !gf_uuid_is_null (*((uuid_t *)req->gfid)),
+ out, op_errno, EINVAL);
+ req->cmd = cmd_entrylk;
+ req->type = type;
+ req->volume = (char *)volume;
+ req->name = "";
+ if (basename) {
+ req->name = (char *)basename;
+ req->namelen = 1;
+ }
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_fentrylk (xlator_t *this, gfs3_fentrylk_req *req, fd_t *fd,
+ entrylk_cmd cmd_entrylk, entrylk_type type,
+ const char *volume, const char *basename, dict_t *xdata)
+{
+ int64_t remote_fd = -1;
+ int op_errno = ESTALE;
+
+ CLIENT_GET_REMOTE_FD (this, fd, DEFAULT_REMOTE_FD,
+ remote_fd, op_errno, out);
+
+ req->fd = remote_fd;
+ req->cmd = cmd_entrylk;
+ req->type = type;
+ req->volume = (char *)volume;
+ req->name = "";
+ if (basename) {
+ req->name = (char *)basename;
+ req->namelen = 1;
+ }
+ memcpy (req->gfid, fd->inode->gfid, 16);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_xattrop (xlator_t *this, gfs3_xattrop_req *req, loc_t *loc,
+ dict_t *xattr, int32_t flags, dict_t *xdata)
+{
+ int op_errno = ESTALE;
+
+ if (!(loc && loc->inode))
+ goto out;
+
+ if (!gf_uuid_is_null (loc->inode->gfid))
+ memcpy (req->gfid, loc->inode->gfid, 16);
+ else
+ memcpy (req->gfid, loc->gfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !gf_uuid_is_null (*((uuid_t *)req->gfid)),
+ out, op_errno, EINVAL);
+ if (xattr) {
+ GF_PROTOCOL_DICT_SERIALIZE (this, xattr,
+ (&req->dict.dict_val),
+ req->dict.dict_len,
+ op_errno, out);
+ }
+
+ req->flags = flags;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_fxattrop (xlator_t *this, gfs3_fxattrop_req *req, fd_t *fd,
+ dict_t *xattr, int32_t flags, dict_t *xdata)
+{
+ int op_errno = ESTALE;
+ int64_t remote_fd = -1;
+
+ CLIENT_GET_REMOTE_FD (this, fd, FALLBACK_TO_ANON_FD,
+ remote_fd, op_errno, out);
+
+ req->fd = remote_fd;
+ req->flags = flags;
+ memcpy (req->gfid, fd->inode->gfid, 16);
+
+ if (xattr) {
+ GF_PROTOCOL_DICT_SERIALIZE (this, xattr,
+ (&req->dict.dict_val),
+ req->dict.dict_len,
+ op_errno, out);
+ }
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_fgetxattr (xlator_t *this, gfs3_fgetxattr_req *req, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ int64_t remote_fd = -1;
+ int op_errno = ESTALE;
+
+ CLIENT_GET_REMOTE_FD (this, fd, DEFAULT_REMOTE_FD,
+ remote_fd, op_errno, out);
+
+ req->namelen = 1; /* Use it as a flag */
+ req->fd = remote_fd;
+ req->name = (char *)name;
+ if (!req->name) {
+ req->name = "";
+ req->namelen = 0;
+ }
+ memcpy (req->gfid, fd->inode->gfid, 16);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_fsetxattr (xlator_t *this, gfs3_fsetxattr_req *req, fd_t *fd,
+ int32_t flags, dict_t *xattr, dict_t *xdata)
+{
+ int64_t remote_fd = -1;
+ int op_errno = ESTALE;
+
+ CLIENT_GET_REMOTE_FD (this, fd, DEFAULT_REMOTE_FD,
+ remote_fd, op_errno, out);
+
+ req->fd = remote_fd;
+ req->flags = flags;
+ memcpy (req->gfid, fd->inode->gfid, 16);
+
+ if (xattr) {
+ GF_PROTOCOL_DICT_SERIALIZE (this, xattr,
+ (&req->dict.dict_val),
+ req->dict.dict_len,
+ op_errno, out);
+ }
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_rchecksum (xlator_t *this, gfs3_rchecksum_req *req, fd_t *fd,
+ int32_t len, off_t offset, dict_t *xdata)
+{
+ int64_t remote_fd = -1;
+ int op_errno = ESTALE;
+
+ CLIENT_GET_REMOTE_FD (this, fd, DEFAULT_REMOTE_FD,
+ remote_fd, op_errno, out);
+
+ req->len = len;
+ req->offset = offset;
+ req->fd = remote_fd;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_setattr (xlator_t *this, gfs3_setattr_req *req, loc_t *loc,
+ int32_t valid, struct iatt *stbuf, dict_t *xdata)
+{
+ int op_errno = ESTALE;
+
+ if (!(loc && loc->inode))
+ return -op_errno;
+
+ if (!gf_uuid_is_null (loc->inode->gfid))
+ memcpy (req->gfid, loc->inode->gfid, 16);
+ else
+ memcpy (req->gfid, loc->gfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !gf_uuid_is_null (*((uuid_t *)req->gfid)),
+ out, op_errno, EINVAL);
+
+ req->valid = valid;
+ gf_stat_from_iatt (&req->stbuf, stbuf);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_fsetattr (xlator_t *this, gfs3_fsetattr_req *req, fd_t *fd,
+ int32_t valid, struct iatt *stbuf, dict_t *xdata)
+{
+ int op_errno = ESTALE;
+ int64_t remote_fd = -1;
+
+ CLIENT_GET_REMOTE_FD (this, fd, DEFAULT_REMOTE_FD,
+ remote_fd, op_errno, out);
+
+ req->fd = remote_fd;
+ req->valid = valid;
+ gf_stat_from_iatt (&req->stbuf, stbuf);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_readdirp (xlator_t *this, gfs3_readdirp_req *req, fd_t *fd,
+ size_t size, off_t offset, dict_t *xdata)
+{
+ int op_errno = ESTALE;
+ int64_t remote_fd = -1;
+
+ CLIENT_GET_REMOTE_FD (this, fd, DEFAULT_REMOTE_FD,
+ remote_fd, op_errno, out);
+
+ req->size = size;
+ req->offset = offset;
+ req->fd = remote_fd;
+ memcpy (req->gfid, fd->inode->gfid, 16);
+
+ /* dict itself is 'xdata' here */
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->dict.dict_val),
+ req->dict.dict_len, op_errno, out);
+
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_fremovexattr (xlator_t *this, gfs3_fremovexattr_req *req, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ int64_t remote_fd = -1;
+ int op_errno = ESTALE;
+
+ if (!(fd && fd->inode))
+ goto out;
+
+ CLIENT_GET_REMOTE_FD (this, fd, DEFAULT_REMOTE_FD,
+ remote_fd, op_errno, out);
+
+ memcpy (req->gfid, fd->inode->gfid, 16);
+ req->name = (char *)name;
+ req->fd = remote_fd;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_fallocate (xlator_t *this, gfs3_fallocate_req *req, fd_t *fd,
+ int32_t flags, off_t offset, size_t size, dict_t *xdata)
+{
+ int op_errno = ESTALE;
+ int64_t remote_fd = -1;
+
+ CLIENT_GET_REMOTE_FD (this, fd, DEFAULT_REMOTE_FD,
+ remote_fd, op_errno, out);
+
+ req->fd = remote_fd;
+ req->flags = flags;
+ req->offset = offset;
+ req->size = size;
+ memcpy(req->gfid, fd->inode->gfid, 16);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_discard (xlator_t *this, gfs3_discard_req *req, fd_t *fd,
+ off_t offset, size_t size, dict_t *xdata)
+{
+ int op_errno = ESTALE;
+ int64_t remote_fd = -1;
+
+ CLIENT_GET_REMOTE_FD (this, fd, DEFAULT_REMOTE_FD,
+ remote_fd, op_errno, out);
+
+ req->fd = remote_fd;
+ req->offset = offset;
+ req->size = size;
+ memcpy(req->gfid, fd->inode->gfid, 16);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_zerofill (xlator_t *this, gfs3_zerofill_req *req, fd_t *fd,
+ off_t offset, size_t size, dict_t *xdata)
+{
+ int op_errno = ESTALE;
+ int64_t remote_fd = -1;
+
+ CLIENT_GET_REMOTE_FD (this, fd, DEFAULT_REMOTE_FD,
+ remote_fd, op_errno, out);
+
+ req->fd = remote_fd;
+ req->offset = offset;
+ req->size = size;
+ memcpy(req->gfid, fd->inode->gfid, 16);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_ipc (xlator_t *this, gfs3_ipc_req *req, int32_t cmd,
+ dict_t *xdata)
+{
+ int op_errno = ESTALE;
+
+ req->op = cmd;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_seek (xlator_t *this, gfs3_seek_req *req, fd_t *fd,
+ off_t offset, gf_seek_what_t what, dict_t *xdata)
+{
+ int64_t remote_fd = -1;
+ int op_errno = ESTALE;
+
+ CLIENT_GET_REMOTE_FD (this, fd, DEFAULT_REMOTE_FD,
+ remote_fd, op_errno, out);
+
+ memcpy (req->gfid, fd->inode->gfid, 16);
+ req->fd = remote_fd;
+ req->offset = offset;
+ req->what = what;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
+client_pre_lease (xlator_t *this, gfs3_lease_req *req, loc_t *loc,
+ struct gf_lease *lease, dict_t *xdata)
+{
+ int ret = 0;
+ int op_errno = 0;
+
+ if (!(loc && loc->inode))
+ goto out;
+
+ if (!gf_uuid_is_null (loc->inode->gfid))
+ memcpy (req->gfid, loc->inode->gfid, 16);
+ else
+ memcpy (req->gfid, loc->gfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !gf_uuid_is_null (*((uuid_t *)req->gfid)),
+ out, op_errno, EINVAL);
+
+ gf_proto_lease_from_lease (&req->lease, lease);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&req->xdata.xdata_val),
+ req->xdata.xdata_len, op_errno, out);
+out:
+ return -op_errno;
+}
+
+/* processing done after fop responses are obtained */
+int
+client_post_stat (xlator_t *this, gfs3_stat_rsp *rsp, struct iatt *iatt,
+ dict_t **xdata)
+{
+ int ret = 0;
+
+ if (-1 != rsp->op_ret) {
+ gf_stat_to_iatt (&rsp->stat, iatt);
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+
+out:
+ return ret;
+}
+
+int
+client_post_readlink (xlator_t *this, gfs3_readlink_rsp *rsp,
+ struct iatt *iatt, dict_t **xdata)
+{
+ int ret = 0;
+
+ if (-1 != rsp->op_ret) {
+ gf_stat_to_iatt (&rsp->buf, iatt);
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+
+out:
+ return ret;
+}
+
+int
+client_post_mknod (xlator_t *this, gfs3_mknod_rsp *rsp, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t **xdata)
+{
+ int ret = 0;
+
+ if (-1 != rsp->op_ret) {
+ gf_stat_to_iatt (&rsp->stat, stbuf);
+ gf_stat_to_iatt (&rsp->preparent, preparent);
+ gf_stat_to_iatt (&rsp->postparent, postparent);
+ }
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+
+out:
+ return ret;
+}
+
+int
+client_post_mkdir (xlator_t *this, gfs3_mkdir_rsp *rsp, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t **xdata)
+{
+ int ret = 0;
+
+ if (-1 != rsp->op_ret) {
+ gf_stat_to_iatt (&rsp->stat, stbuf);
+ gf_stat_to_iatt (&rsp->preparent, preparent);
+ gf_stat_to_iatt (&rsp->postparent, postparent);
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+
+out:
+ return ret;
+}
+
+int
+client_post_unlink (xlator_t *this, gfs3_unlink_rsp *rsp,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t **xdata)
+{
+ int ret = 0;
+
+ if (-1 != rsp->op_ret) {
+ gf_stat_to_iatt (&rsp->preparent, preparent);
+ gf_stat_to_iatt (&rsp->postparent, postparent);
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+out:
+ return ret;
+}
+
+int
+client_post_rmdir (xlator_t *this, gfs3_rmdir_rsp *rsp,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t **xdata)
+{
+ int ret = 0;
+
+ if (-1 != rsp->op_ret) {
+ gf_stat_to_iatt (&rsp->preparent, preparent);
+ gf_stat_to_iatt (&rsp->postparent, postparent);
+ }
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+
+out:
+ return ret;
+}
+
+int
+client_post_symlink (xlator_t *this, gfs3_symlink_rsp *rsp, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t **xdata)
+{
+ int ret = 0;
+
+ if (-1 != rsp->op_ret) {
+ gf_stat_to_iatt (&rsp->stat, stbuf);
+ gf_stat_to_iatt (&rsp->preparent, preparent);
+ gf_stat_to_iatt (&rsp->postparent, postparent);
+ }
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+out:
+ return ret;
+}
+
+int
+client_post_rename (xlator_t *this, gfs3_rename_rsp *rsp, struct iatt *stbuf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t **xdata)
+{
+ int ret = 0;
+
+ if (-1 != rsp->op_ret) {
+ gf_stat_to_iatt (&rsp->stat, stbuf);
+
+ gf_stat_to_iatt (&rsp->preoldparent, preoldparent);
+ gf_stat_to_iatt (&rsp->postoldparent, postoldparent);
+
+ gf_stat_to_iatt (&rsp->prenewparent, prenewparent);
+ gf_stat_to_iatt (&rsp->postnewparent, postnewparent);
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+out:
+ return ret;
+}
+
+int
+client_post_link (xlator_t *this, gfs3_link_rsp *rsp, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t **xdata)
+{
+ int ret = 0;
+
+ if (-1 != rsp->op_ret) {
+ gf_stat_to_iatt (&rsp->stat, stbuf);
+ gf_stat_to_iatt (&rsp->preparent, preparent);
+ gf_stat_to_iatt (&rsp->postparent, postparent);
+ }
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+out:
+ return ret;
+}
+
+int
+client_post_truncate (xlator_t *this, gfs3_truncate_rsp *rsp,
+ struct iatt *prestat, struct iatt *poststat,
+ dict_t **xdata)
+{
+ int ret = 0;
+
+ if (-1 != rsp->op_ret) {
+ gf_stat_to_iatt (&rsp->prestat, prestat);
+ gf_stat_to_iatt (&rsp->poststat, poststat);
+ }
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+out:
+ return ret;
+}
+
+int
+client_post_open (xlator_t *this, gfs3_open_rsp *rsp, dict_t **xdata)
+{
+ int ret = 0;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+out:
+ return ret;
+}
+
+int
+client_post_readv (xlator_t *this, gfs3_read_rsp *rsp, struct iobref **iobref,
+ struct iobref *rsp_iobref, struct iatt *stat,
+ struct iovec *vector, struct iovec *rsp_vector,
+ int *rspcount, dict_t **xdata)
+{
+ int ret = 0;
+
+ if (rsp->op_ret != -1) {
+ *iobref = rsp_iobref;
+ gf_stat_to_iatt (&rsp->stat, stat);
+
+ vector[0].iov_len = rsp->op_ret;
+ if (rsp->op_ret > 0)
+ vector[0].iov_base = rsp_vector->iov_base;
+ *rspcount = 1;
+ }
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+
+#ifdef GF_TESTING_IO_XDATA
+ dict_dump_to_log (xdata);
+#endif
+out:
+ return ret;
+}
+
+int
+client_post_writev (xlator_t *this, gfs3_write_rsp *rsp, struct iatt *prestat,
+ struct iatt *poststat, dict_t **xdata)
+{
+ int ret = 0;
+
+ if (-1 != rsp->op_ret) {
+ gf_stat_to_iatt (&rsp->prestat, prestat);
+ gf_stat_to_iatt (&rsp->poststat, poststat);
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+
+out:
+ return ret;
+}
+
+int
+client_post_statfs (xlator_t *this, gfs3_statfs_rsp *rsp,
+ struct statvfs *statfs, dict_t **xdata)
+{
+ int ret = 0;
+
+ if (-1 != rsp->op_ret) {
+ gf_statfs_to_statfs (&rsp->statfs, statfs);
+ }
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+out:
+ return ret;
+}
+
+int
+client_post_flush (xlator_t *this, gf_common_rsp *rsp, dict_t **xdata)
+{
+ int ret = 0;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+
+out:
+ return ret;
+}
+
+int
+client_post_fsync (xlator_t *this, gfs3_fsync_rsp *rsp,
+ struct iatt *prestat, struct iatt *poststat,
+ dict_t **xdata)
+{
+ int ret = 0;
+
+ if (-1 != rsp->op_ret) {
+ gf_stat_to_iatt (&rsp->prestat, prestat);
+ gf_stat_to_iatt (&rsp->poststat, poststat);
+ }
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+out:
+ return ret;
+}
+
+int
+client_post_setxattr (xlator_t *this, gf_common_rsp *rsp, dict_t **xdata)
+{
+ int ret = 0;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+out:
+ return ret;
+}
+
+int
+client_post_getxattr (xlator_t *this, gfs3_getxattr_rsp *rsp, dict_t **dict,
+ dict_t **xdata)
+{
+ int op_errno = 0;
+ int ret = 0;
+
+ if (-1 != rsp->op_ret) {
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *dict,
+ (rsp->dict.dict_val),
+ (rsp->dict.dict_len), rsp->op_ret,
+ op_errno, out);
+ }
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ op_errno, out);
+
+out:
+ return -op_errno;
+}
+
+int
+client_post_removexattr (xlator_t *this, gf_common_rsp *rsp,
+ dict_t **xdata)
+{
+ int ret = 0;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+out:
+ return ret;
+}
+
+int
+client_post_opendir (xlator_t *this, gfs3_opendir_rsp *rsp, dict_t **xdata)
+{
+ int ret = 0;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+out:
+ return ret;
+}
+
+int
+client_post_fsyncdir (xlator_t *this, gf_common_rsp *rsp, dict_t **xdata)
+{
+ int ret = 0;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+out:
+ return ret;
+}
+
+int
+client_post_access (xlator_t *this, gf_common_rsp *rsp, dict_t **xdata)
+{
+ int ret = 0;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+out:
+ return ret;
+}
+
+int
+client_post_create (xlator_t *this, gfs3_create_rsp *rsp,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent,
+ clnt_local_t *local, dict_t **xdata)
+{
+ int ret = 0;
+
+ if (-1 != rsp->op_ret) {
+ gf_stat_to_iatt (&rsp->stat, stbuf);
+
+ gf_stat_to_iatt (&rsp->preparent, preparent);
+ gf_stat_to_iatt (&rsp->postparent, postparent);
+ gf_uuid_copy (local->loc.gfid, stbuf->ia_gfid);
+ }
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+out:
+ return ret;
+}
+
+int
+client_post_ftruncate (xlator_t *this, gfs3_ftruncate_rsp *rsp,
+ struct iatt *prestat, struct iatt *poststat,
+ dict_t **xdata)
+{
+ int ret = 0;
+
+ if (-1 != rsp->op_ret) {
+ gf_stat_to_iatt (&rsp->prestat, prestat);
+ gf_stat_to_iatt (&rsp->poststat, poststat);
+ }
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+out:
+ return ret;
+}
+
+int
+client_post_fstat (xlator_t *this, gfs3_fstat_rsp *rsp, struct iatt *stat,
+ dict_t **xdata)
+{
+ int ret = 0;
+
+ if (-1 != rsp->op_ret) {
+ gf_stat_to_iatt (&rsp->stat, stat);
+ }
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+out:
+ return -ret;
+}
+
+int
+client_post_lk (xlator_t *this, gfs3_lk_rsp *rsp, struct gf_flock *lock,
+ dict_t **xdata)
+{
+ int ret = 0;
+
+ if (rsp->op_ret >= 0) {
+ gf_proto_flock_to_flock (&rsp->flock, lock);
+ }
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+out:
+ return ret;
+}
+
+int
+client_post_lookup (xlator_t *this, gfs3_lookup_rsp *rsp, struct iatt *stbuf,
+ struct iatt *postparent, dict_t **xdata)
+{
+
+ if (-1 != rsp->op_ret) {
+ gf_stat_to_iatt (&rsp->postparent, postparent);
+ gf_stat_to_iatt (&rsp->stat, stbuf);
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), rsp->op_ret,
+ rsp->op_errno, out);
+
+out:
+ return rsp->op_ret;
+}
+
+int
+client_post_readdir (xlator_t *this, gfs3_readdir_rsp *rsp,
+ gf_dirent_t *entries, dict_t **xdata)
+{
+ int ret = 0;
+
+ if (rsp->op_ret > 0) {
+ unserialize_rsp_dirent (this, rsp, entries);
+ }
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+
+out:
+ return ret;
+}
+
+int
+client_post_inodelk (xlator_t *this, gf_common_rsp *rsp, dict_t **xdata)
+{
+ int ret = 0;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+
+out:
+ return ret;
+}
+
+int
+client_post_finodelk (xlator_t *this, gf_common_rsp *rsp, dict_t **xdata)
+{
+ int ret = 0;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+
+out:
+ return ret;
+}
+
+int
+client_post_entrylk (xlator_t *this, gf_common_rsp *rsp, dict_t **xdata)
+{
+ int ret = 0;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+
+out:
+ return ret;
+}
+
+int
+client_post_fentrylk (xlator_t *this, gf_common_rsp *rsp, dict_t **xdata)
+{
+ int ret = 0;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+
+out:
+ return ret;
+}
+
+int
+client_post_xattrop (xlator_t *this, gfs3_xattrop_rsp *rsp, dict_t **dict,
+ dict_t **xdata)
+{
+ int op_errno = 0;
+ int ret = 0;
+
+ if (-1 != rsp->op_ret) {
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *dict,
+ (rsp->dict.dict_val),
+ (rsp->dict.dict_len), rsp->op_ret,
+ op_errno, out);
+ }
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ op_errno, out);
+
+out:
+ return -op_errno;
+}
+
+int
+client_post_fxattrop (xlator_t *this, gfs3_fxattrop_rsp *rsp, dict_t **dict,
+ dict_t **xdata)
+{
+ int op_errno = 0;
+ int ret = 0;
+
+ if (-1 != rsp->op_ret) {
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *dict,
+ (rsp->dict.dict_val),
+ (rsp->dict.dict_len), rsp->op_ret,
+ op_errno, out);
+ }
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ op_errno, out);
+
+out:
+ return -op_errno;
+}
+
+int
+client_post_fgetxattr (xlator_t *this, gfs3_fgetxattr_rsp *rsp, dict_t **dict,
+ dict_t **xdata)
+{
+ int op_errno = 0;
+ int ret = 0;
+
+ if (-1 != rsp->op_ret) {
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *dict,
+ (rsp->dict.dict_val),
+ (rsp->dict.dict_len), rsp->op_ret,
+ op_errno, out);
+ }
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ op_errno, out);
+
+out:
+ return -op_errno;
+}
+
+int
+client_post_fsetxattr (xlator_t *this, gf_common_rsp *rsp, dict_t **xdata)
+{
+ int ret = 0;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+out:
+ return ret;
+}
+
+int
+client_post_rchecksum (xlator_t *this, gfs3_rchecksum_rsp *rsp, dict_t **xdata)
+{
+ int ret = 0;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+out:
+ return ret;
+}
+
+int
+client_post_setattr (xlator_t *this, gfs3_setattr_rsp *rsp,
+ struct iatt *prestat, struct iatt *poststat,
+ dict_t **xdata)
+{
+ int ret = 0;
+
+ if (-1 != rsp->op_ret) {
+ gf_stat_to_iatt (&rsp->statpre, prestat);
+ gf_stat_to_iatt (&rsp->statpost, poststat);
+ }
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+out:
+ return ret;
+}
+
+int
+client_post_fsetattr (xlator_t *this, gfs3_fsetattr_rsp *rsp,
+ struct iatt *prestat, struct iatt *poststat,
+ dict_t **xdata)
+{
+ int ret = 0;
+
+ if (-1 != rsp->op_ret) {
+ gf_stat_to_iatt (&rsp->statpre, prestat);
+ gf_stat_to_iatt (&rsp->statpost, poststat);
+ }
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+out:
+ return ret;
+}
+
+int
+client_post_readdirp (xlator_t *this, gfs3_readdirp_rsp *rsp,
+ fd_t *fd, gf_dirent_t *entries,
+ dict_t **xdata)
+{
+ int ret = 0;
+
+ if (rsp->op_ret > 0) {
+ unserialize_rsp_direntp (this, fd, rsp, entries);
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+out:
+ return ret;
+}
+
+int
+client_post_fremovexattr (xlator_t *this, gf_common_rsp *rsp,
+ dict_t **xdata)
+{
+ int ret = 0;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+out:
+ return ret;
+}
+
+int
+client_post_fallocate (xlator_t *this, gfs3_fallocate_rsp *rsp,
+ struct iatt *prestat, struct iatt *poststat,
+ dict_t **xdata)
+{
+ int ret = 0;
+
+ if (-1 != rsp->op_ret) {
+ gf_stat_to_iatt (&rsp->statpre, prestat);
+ gf_stat_to_iatt (&rsp->statpost, poststat);
+ }
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+out:
+ return ret;
+}
+
+int
+client_post_discard (xlator_t *this, gfs3_discard_rsp *rsp,
+ struct iatt *prestat,
+ struct iatt *poststat, dict_t **xdata)
+{
+ int ret = 0;
+
+ if (-1 != rsp->op_ret) {
+ gf_stat_to_iatt (&rsp->statpre, prestat);
+ gf_stat_to_iatt (&rsp->statpost, poststat);
+ }
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+out:
+ return ret;
+}
+
+int
+client_post_zerofill (xlator_t *this, gfs3_zerofill_rsp *rsp,
+ struct iatt *prestat, struct iatt *poststat,
+ dict_t **xdata)
+{
+ int ret = 0;
+
+ if (-1 != rsp->op_ret) {
+ gf_stat_to_iatt (&rsp->statpre, prestat);
+ gf_stat_to_iatt (&rsp->statpost, poststat);
+ }
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+out:
+ return ret;
+}
+
+int
+client_post_ipc (xlator_t *this, gfs3_ipc_rsp *rsp, dict_t **xdata)
+{
+ int ret = 0;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+out:
+ return ret;
+}
+
+int
+client_post_seek (xlator_t *this, gfs3_seek_rsp *rsp, dict_t **xdata)
+{
+ int ret = 0;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+out:
+ return ret;
+}
+
+int
+client_post_lease (xlator_t *this, gfs3_lease_rsp *rsp, struct gf_lease *lease,
+ dict_t **xdata)
+{
+ int ret = 0;
+
+ if (rsp->op_ret >= 0) {
+ gf_proto_lease_to_lease (&rsp->lease, lease);
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, *xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), ret,
+ rsp->op_errno, out);
+out:
+ return ret;
+}
diff --git a/xlators/protocol/client/src/client-common.h b/xlators/protocol/client/src/client-common.h
new file mode 100644
index 00000000000..2298fa4e16d
--- /dev/null
+++ b/xlators/protocol/client/src/client-common.h
@@ -0,0 +1,403 @@
+/*
+ Copyright (c); 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later);, or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __CLIENT_COMMON_H__
+#define __CLIENT_COMMON_H__
+
+#include "dict.h"
+#include "xlator.h"
+#include "rpc-common-xdr.h"
+#include "glusterfs3-xdr.h"
+#include "glusterfs3.h"
+#include "client.h"
+
+int
+client_pre_stat (xlator_t *this, gfs3_stat_req *req, loc_t *loc,
+ dict_t *xdata);
+
+int
+client_pre_readlink (xlator_t *this, gfs3_readlink_req *req, loc_t *loc,
+ size_t size, dict_t *xdata);
+
+int
+client_pre_mknod (xlator_t *this, gfs3_mknod_req *req, loc_t *loc,
+ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata);
+
+int
+client_pre_mkdir (xlator_t *this, gfs3_mkdir_req *req, loc_t *loc,
+ mode_t mode, mode_t umask, dict_t *xdata);
+
+int
+client_pre_unlink (xlator_t *this, gfs3_unlink_req *req, loc_t *loc,
+ int32_t flags, dict_t *xdata);
+
+int
+client_pre_rmdir (xlator_t *this, gfs3_rmdir_req *req, loc_t *loc,
+ int32_t flags, dict_t *xdata);
+
+int
+client_pre_symlink (xlator_t *this, gfs3_symlink_req *req, loc_t *loc,
+ const char *linkname, mode_t umask, dict_t *xdata);
+
+int
+client_pre_rename (xlator_t *this, gfs3_rename_req *req, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata);
+
+int
+client_pre_link (xlator_t *this,
+ gfs3_link_req *req, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata);
+
+int
+client_pre_truncate (xlator_t *this, gfs3_truncate_req *req,
+ loc_t *loc, off_t offset, dict_t *xdata);
+
+int
+client_pre_open (xlator_t *this, gfs3_open_req *req, loc_t *loc, fd_t *fd,
+ int32_t flags, dict_t *xdata);
+
+int
+client_pre_readv (xlator_t *this, gfs3_read_req *req, fd_t *fd, size_t size,
+ off_t offset, int32_t flags, dict_t *xdata);
+
+int
+client_pre_writev (xlator_t *this, gfs3_write_req *req,
+ fd_t *fd, size_t size, off_t offset, int32_t flags,
+ dict_t *xdata);
+
+int
+client_pre_statfs (xlator_t *this, gfs3_statfs_req *req, loc_t *loc,
+ dict_t *xdata);
+
+int
+client_pre_flush (xlator_t *this, gfs3_flush_req *req, fd_t *fd, dict_t *xdata);
+
+int
+client_pre_fsync (xlator_t *this, gfs3_fsync_req *req, fd_t *fd,
+ int32_t flags, dict_t *xdata);
+
+int
+client_pre_setxattr (xlator_t *this, gfs3_setxattr_req *req, loc_t *loc,
+ dict_t *xattr, int32_t flags, dict_t *xdata);
+
+int
+client_pre_getxattr (xlator_t *this, gfs3_getxattr_req *req, loc_t *loc,
+ const char *name, dict_t *xdata);
+
+int
+client_pre_removexattr (xlator_t *this, gfs3_removexattr_req *req,
+ loc_t *loc, const char *name, dict_t *xdata);
+
+int
+client_pre_opendir (xlator_t *this,
+ gfs3_opendir_req *req, loc_t *loc,
+ fd_t *fd, dict_t *xdata);
+
+int
+client_pre_fsyncdir (xlator_t *this, gfs3_fsyncdir_req *req, fd_t *fd,
+ int32_t flags, dict_t *xdata);
+
+int
+client_pre_access (xlator_t *this, gfs3_access_req *req, loc_t *loc,
+ int32_t mask, dict_t *xdata);
+
+int
+client_pre_create (xlator_t *this, gfs3_create_req *req,
+ loc_t *loc, fd_t *fd, mode_t mode,
+ int32_t flags, mode_t umask, dict_t *xdata);
+
+int
+client_pre_ftruncate (xlator_t *this, gfs3_ftruncate_req *req, fd_t *fd,
+ off_t offset, dict_t *xdata);
+
+int
+client_pre_fstat (xlator_t *this, gfs3_fstat_req *req, fd_t *fd,
+ dict_t *xdata);
+
+int
+client_pre_lk (xlator_t *this, gfs3_lk_req *req,
+ int32_t cmd, struct gf_flock *flock, fd_t *fd, dict_t *xdata);
+
+int
+client_pre_lookup (xlator_t *this, gfs3_lookup_req *req, loc_t *loc,
+ dict_t *xdata);
+
+int
+client_pre_readdir (xlator_t *this, gfs3_readdir_req *req, fd_t *fd,
+ size_t size, off_t offset, dict_t *xdata);
+
+int
+client_pre_inodelk (xlator_t *this, gfs3_inodelk_req *req, loc_t *loc,
+ int cmd, struct gf_flock *flock, const char *volume,
+ dict_t *xdata);
+
+int
+client_pre_finodelk (xlator_t *this, gfs3_finodelk_req *req, fd_t *fd,
+ int cmd, struct gf_flock *flock, const char *volume,
+ dict_t *xdata);
+
+int
+client_pre_entrylk (xlator_t *this, gfs3_entrylk_req *req, loc_t *loc,
+ entrylk_cmd cmd_entrylk, entrylk_type type,
+ const char *volume, const char *basename, dict_t *xdata);
+
+int
+client_pre_fentrylk (xlator_t *this, gfs3_fentrylk_req *req, fd_t *fd,
+ entrylk_cmd cmd_entrylk, entrylk_type type,
+ const char *volume, const char *basename, dict_t *xdata);
+
+int
+client_pre_xattrop (xlator_t *this, gfs3_xattrop_req *req, loc_t *loc,
+ dict_t *xattr, int32_t flags, dict_t *xdata);
+
+int
+client_pre_fxattrop (xlator_t *this, gfs3_fxattrop_req *req, fd_t *fd,
+ dict_t *xattr, int32_t flags, dict_t *xdata);
+
+int
+client_pre_fgetxattr (xlator_t *this, gfs3_fgetxattr_req *req, fd_t *fd,
+ const char *name, dict_t *xdata);
+
+int
+client_pre_fsetxattr (xlator_t *this, gfs3_fsetxattr_req *req, fd_t *fd,
+ int32_t flags, dict_t *xattr, dict_t *xdata);
+int
+client_pre_seek (xlator_t *this, gfs3_seek_req *req, fd_t *fd,
+ off_t offset, gf_seek_what_t what, dict_t *xdata);
+
+int
+client_pre_rchecksum (xlator_t *this, gfs3_rchecksum_req *req, fd_t *fd,
+ int32_t len, off_t offset, dict_t *xdata);
+
+int
+client_pre_setattr (xlator_t *this, gfs3_setattr_req *req, loc_t *loc,
+ int32_t valid, struct iatt *stbuf, dict_t *xdata);
+int
+client_pre_fsetattr (xlator_t *this, gfs3_fsetattr_req *req, fd_t *fd,
+ int32_t valid, struct iatt *stbuf, dict_t *xdata);
+
+int
+client_pre_readdirp (xlator_t *this, gfs3_readdirp_req *req, fd_t *fd,
+ size_t size, off_t offset, dict_t *xdata);
+
+int
+client_pre_fremovexattr (xlator_t *this, gfs3_fremovexattr_req *req, fd_t *fd,
+ const char *name, dict_t *xdata);
+
+int
+client_pre_fallocate (xlator_t *this, gfs3_fallocate_req *req, fd_t *fd,
+ int32_t flags, off_t offset, size_t size, dict_t *xdata);
+int
+client_pre_discard (xlator_t *this, gfs3_discard_req *req, fd_t *fd,
+ off_t offset, size_t size, dict_t *xdata);
+int
+client_pre_zerofill (xlator_t *this, gfs3_zerofill_req *req, fd_t *fd,
+ off_t offset, size_t size, dict_t *xdata);
+int
+client_pre_ipc (xlator_t *this, gfs3_ipc_req *req, int32_t cmd,
+ dict_t *xdata);
+
+int
+client_pre_lease (xlator_t *this, gfs3_lease_req *req, loc_t *loc,
+ struct gf_lease *lease, dict_t *xdata);
+
+int
+client_post_stat (xlator_t *this, gfs3_stat_rsp *rsp, struct iatt *iatt,
+ dict_t **xdata);
+
+int
+client_post_readlink (xlator_t *this, gfs3_readlink_rsp *rsp,
+ struct iatt *iatt, dict_t **xdata);
+
+int
+client_post_mknod (xlator_t *this, gfs3_mknod_rsp *rsp, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t **xdata);
+
+int
+client_post_mkdir (xlator_t *this, gfs3_mkdir_rsp *rsp, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t **xdata);
+
+int
+client_post_unlink (xlator_t *this, gfs3_unlink_rsp *rsp,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t **xdata);
+
+int
+client_post_rmdir (xlator_t *this, gfs3_rmdir_rsp *rsp,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t **xdata);
+
+int
+client_post_symlink (xlator_t *this, gfs3_symlink_rsp *rsp, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t **xdata);
+
+int
+client_post_rename (xlator_t *this, gfs3_rename_rsp *rsp, struct iatt *stbuf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t **xdata);
+int
+client_post_link (xlator_t *this, gfs3_link_rsp *rsp, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t **xdata);
+
+int
+client_post_truncate (xlator_t *this, gfs3_truncate_rsp *rsp,
+ struct iatt *prestat, struct iatt *poststat,
+ dict_t **xdata);
+
+int
+client_post_open (xlator_t *this, gfs3_open_rsp *rsp, dict_t **xdata);
+
+int
+client_post_readv (xlator_t *this, gfs3_read_rsp *rsp, struct iobref **iobref,
+ struct iobref *rsp_iobref, struct iatt *stat,
+ struct iovec *vector, struct iovec *rsp_vector,
+ int *rspcount, dict_t **xdata);
+
+int
+client_post_writev (xlator_t *this, gfs3_write_rsp *rsp, struct iatt *prestat,
+ struct iatt *poststat, dict_t **xdata);
+
+int
+client_post_statfs (xlator_t *this, gfs3_statfs_rsp *rsp,
+ struct statvfs *statfs, dict_t **xdata);
+
+int
+client_post_flush (xlator_t *this, gf_common_rsp *rsp, dict_t **xdata);
+
+int
+client_post_fsync (xlator_t *this, gfs3_fsync_rsp *rsp,
+ struct iatt *prestat, struct iatt *poststat,
+ dict_t **xdata);
+int
+client_post_setxattr (xlator_t *this, gf_common_rsp *rsp, dict_t **xdata);
+
+int
+client_post_getxattr (xlator_t *this, gfs3_getxattr_rsp *rsp, dict_t **dict,
+ dict_t **xdata);
+
+int
+client_post_removexattr (xlator_t *this, gf_common_rsp *rsp,
+ dict_t **xdata);
+
+int
+client_post_opendir (xlator_t *this, gfs3_opendir_rsp *rsp, dict_t **xdata);
+
+int
+client_post_fsyncdir (xlator_t *this, gf_common_rsp *rsp, dict_t **xdata);
+
+int
+client_post_access (xlator_t *this, gf_common_rsp *rsp, dict_t **xdata);
+
+int
+client_post_create (xlator_t *this, gfs3_create_rsp *rsp,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent,
+ clnt_local_t *local, dict_t **xdata);
+
+int
+client_post_ftruncate (xlator_t *this, gfs3_ftruncate_rsp *rsp,
+ struct iatt *prestat, struct iatt *poststat,
+ dict_t **xdata);
+
+int
+client_post_fstat (xlator_t *this, gfs3_fstat_rsp *rsp, struct iatt *stat,
+ dict_t **xdata);
+
+int
+client_post_lk (xlator_t *this, gfs3_lk_rsp *rsp, struct gf_flock *lock,
+ dict_t **xdata);
+
+int
+client_post_lookup (xlator_t *this, gfs3_lookup_rsp *rsp, struct iatt *stbuf,
+ struct iatt *postparent, dict_t **xdata);
+
+int
+client_post_readdir (xlator_t *this, gfs3_readdir_rsp *rsp,
+ gf_dirent_t *entries, dict_t **xdata);
+
+int
+client_post_inodelk (xlator_t *this, gf_common_rsp *rsp, dict_t **xdata);
+
+int
+client_post_finodelk (xlator_t *this, gf_common_rsp *rsp, dict_t **xdata);
+
+int
+client_post_entrylk (xlator_t *this, gf_common_rsp *rsp, dict_t **xdata);
+
+int
+client_post_fentrylk (xlator_t *this, gf_common_rsp *rsp, dict_t **xdata);
+
+int
+client_post_xattrop (xlator_t *this, gfs3_xattrop_rsp *rsp, dict_t **dict,
+ dict_t **xdata);
+
+int
+client_post_fxattrop (xlator_t *this, gfs3_fxattrop_rsp *rsp, dict_t **dict,
+ dict_t **xdata);
+
+int
+client_post_fgetxattr (xlator_t *this, gfs3_fgetxattr_rsp *rsp, dict_t **dict,
+ dict_t **xdata);
+
+int
+client_post_fsetxattr (xlator_t *this, gf_common_rsp *rsp, dict_t **xdata);
+
+int
+client_post_rchecksum (xlator_t *this, gfs3_rchecksum_rsp *rsp, dict_t **xdata);
+
+int
+client_post_setattr (xlator_t *this, gfs3_setattr_rsp *rsp,
+ struct iatt *prestat, struct iatt *poststat,
+ dict_t **xdata);
+
+int
+client_post_fsetattr (xlator_t *this, gfs3_fsetattr_rsp *rsp,
+ struct iatt *prestat, struct iatt *poststat,
+ dict_t **xdata);
+
+int
+client_post_readdirp (xlator_t *this, gfs3_readdirp_rsp *rsp,
+ fd_t *fd, gf_dirent_t *entries,
+ dict_t **xdata);
+
+int
+client_post_fremovexattr (xlator_t *this, gf_common_rsp *rsp,
+ dict_t **xdata);
+
+int
+client_post_fallocate (xlator_t *this, gfs3_fallocate_rsp *rsp,
+ struct iatt *prestat, struct iatt *poststat,
+ dict_t **xdata);
+
+int
+client_post_discard (xlator_t *this, gfs3_discard_rsp *rsp,
+ struct iatt *prestat,
+ struct iatt *poststat, dict_t **xdata);
+
+int
+client_post_zerofill (xlator_t *this, gfs3_zerofill_rsp *rsp,
+ struct iatt *prestat, struct iatt *poststat,
+ dict_t **xdata);
+
+int
+client_post_ipc (xlator_t *this, gfs3_ipc_rsp *rsp, dict_t **xdata);
+
+int
+client_post_seek (xlator_t *this, gfs3_seek_rsp *rsp, dict_t **xdata);
+
+int
+client_post_lease (xlator_t *this, gfs3_lease_rsp *rsp, struct gf_lease *lease,
+ dict_t **xdata);
+#endif /* __CLIENT_COMMON_H__ */
diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c
index 1c239d0cb0f..3284facb893 100644
--- a/xlators/protocol/client/src/client-handshake.c
+++ b/xlators/protocol/client/src/client-handshake.c
@@ -1,325 +1,1029 @@
/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
+#include "fd-lk.h"
#include "client.h"
#include "xlator.h"
#include "defaults.h"
#include "glusterfs.h"
-#include "msg-xdr.h"
#include "statedump.h"
#include "compat-errno.h"
-extern rpc_clnt_prog_t clnt3_1_fop_prog;
-extern rpc_clnt_prog_t clnt3_1_mgmt_prog;
+#include "glusterfs3.h"
+#include "portmap-xdr.h"
+#include "rpc-common-xdr.h"
+#include "client-messages.h"
-int client_ping_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe);
+#define CLIENT_REOPEN_MAX_ATTEMPTS 1024
+extern rpc_clnt_prog_t clnt3_3_fop_prog;
+extern rpc_clnt_prog_t clnt_pmap_prog;
-/* Handshake */
+int client_set_lk_version_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe);
-void
-rpc_client_ping_timer_expired (void *data)
+int client_set_lk_version (xlator_t *this);
+
+typedef struct client_fd_lk_local {
+ int ref;
+ gf_boolean_t error;
+ gf_lock_t lock;
+ clnt_fd_ctx_t *fdctx;
+}clnt_fd_lk_local_t;
+
+int
+client3_getspec_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- rpc_transport_t *trans = NULL;
- rpc_clnt_connection_t *conn = NULL;
- int disconnect = 0;
- int transport_activity = 0;
- struct timeval timeout = {0, };
- struct timeval current = {0, };
- struct rpc_clnt *clnt = NULL;
- xlator_t *this = NULL;
- clnt_conf_t *conf = NULL;
+ gf_getspec_rsp rsp = {0,};
+ call_frame_t *frame = NULL;
+ int ret = 0;
+
+ frame = myframe;
+
+ if (!frame || !frame->this) {
+ gf_msg (THIS->name, GF_LOG_ERROR, EINVAL, PC_MSG_INVALID_ENTRY,
+ "frame not found with the request, returning EINVAL");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+ if (-1 == req->rpc_status) {
+ gf_msg (frame->this->name, GF_LOG_WARNING, ENOTCONN,
+ PC_MSG_RPC_STATUS_ERROR, "received RPC status error, "
+ "returning ENOTCONN");
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_getspec_rsp);
+ if (ret < 0) {
+ gf_msg (frame->this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED,
+ "XDR decoding failed, returning EINVAL");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
- if (!data) {
+ if (-1 == rsp.op_ret) {
+ gf_msg (frame->this->name, GF_LOG_WARNING, 0,
+ PC_MSG_VOL_FILE_NOT_FOUND, "failed to get the 'volume "
+ "file' from server");
goto out;
}
- this = data;
+out:
+ CLIENT_STACK_UNWIND (getspec, frame, rsp.op_ret, rsp.op_errno,
+ rsp.spec);
+
+ /* Don't use 'GF_FREE', this is allocated by libc */
+ free (rsp.spec);
+
+ return 0;
+}
+
+int32_t client3_getspec (call_frame_t *frame, xlator_t *this, void *data)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gf_getspec_req req = {0,};
+ int op_errno = ESTALE;
+ int ret = 0;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
conf = this->private;
+ req.flags = args->flags;
+ req.key = (char *)args->name;
- conn = &conf->rpc->conn;
- trans = conn->trans;
+ ret = client_submit_request (this, &req, frame, conf->handshake,
+ GF_HNDSK_GETSPEC, client3_getspec_cbk,
+ NULL, NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_getspec_req);
- if (!clnt || !trans) {
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_SEND_REQ_FAIL,
+ "failed to send the request");
+ }
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (getspec, frame, -1, op_errno, NULL);
+ return 0;
+
+}
+
+int
+client_notify_parents_child_up (xlator_t *this)
+{
+ clnt_conf_t *conf = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO("client", this, out);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+ if (conf->child_up) {
+ ret = client_notify_dispatch_uniq (this, GF_EVENT_CHILD_UP,
+ NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ PC_MSG_CHILD_UP_NOTIFY_FAILED,
+ "notify of CHILD_UP failed");
+ goto out;
+ }
+ } else {
+ gf_msg (this->name, GF_LOG_INFO, 0, PC_MSG_CHILD_STATUS,
+ "Defering sending CHILD_UP message as the client "
+ "translators are not yet ready to serve.");
+ }
+
+out:
+ return 0;
+}
+
+int
+clnt_fd_lk_reacquire_failed (xlator_t *this, clnt_fd_ctx_t *fdctx,
+ clnt_conf_t *conf)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
+ GF_VALIDATE_OR_GOTO (this->name, fdctx, out);
+
+ pthread_mutex_lock (&conf->lock);
+ {
+ fdctx->remote_fd = -1;
+ fdctx->lk_heal_state = GF_LK_HEAL_DONE;
+ }
+ pthread_mutex_unlock (&conf->lock);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+client_set_lk_version_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int32_t ret = -1;
+ call_frame_t *fr = NULL;
+ gf_set_lk_ver_rsp rsp = {0,};
+
+ fr = (call_frame_t *) myframe;
+ GF_VALIDATE_OR_GOTO ("client", fr, out);
+
+ if (req->rpc_status == -1) {
+ gf_msg (fr->this->name, GF_LOG_WARNING, ENOTCONN,
+ PC_MSG_RPC_STATUS_ERROR, "received RPC status error");
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_set_lk_ver_rsp);
+ if (ret < 0)
+ gf_msg (fr->this->name, GF_LOG_WARNING, 0,
+ PC_MSG_XDR_DECODING_FAILED, "xdr decoding failed");
+ else
+ gf_msg (fr->this->name, GF_LOG_INFO, 0,
+ PC_MSG_LOCK_VERSION_SERVER,
+ "Server lk version = %d", rsp.lk_ver);
+
+ ret = 0;
+out:
+ if (fr)
+ STACK_DESTROY (fr->root);
+
+ return ret;
+}
+
+//TODO: Check for all released fdctx and destroy them
+int
+client_set_lk_version (xlator_t *this)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ call_frame_t *frame = NULL;
+ gf_set_lk_ver_req req = {0, };
+ char *process_uuid = NULL;
+
+ GF_VALIDATE_OR_GOTO ("client", this, err);
+
+ conf = (clnt_conf_t *) this->private;
+
+ req.lk_ver = client_get_lk_ver (conf);
+ ret = dict_get_str (this->options, "process-uuid", &process_uuid);
+ if (!process_uuid) {
+ ret = -1;
+ goto err;
+ }
+ req.uid = gf_strdup (process_uuid);
+ if (!req.uid) {
+ ret = -1;
+ goto err;
+ }
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame) {
+ ret = -1;
goto out;
}
- pthread_mutex_lock (&conn->lock);
+ gf_msg_debug (this->name, 0, "Sending SET_LK_VERSION");
+
+ ret = client_submit_request (this, &req, frame,
+ conf->handshake,
+ GF_HNDSK_SET_LK_VER,
+ client_set_lk_version_cbk,
+ NULL, NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_set_lk_ver_req);
+out:
+ GF_FREE (req.uid);
+ return ret;
+err:
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_SET_LK_VERSION_ERROR,
+ "Failed to send SET_LK_VERSION to server");
+
+ return ret;
+}
+
+int
+client_fd_lk_count (fd_lk_ctx_t *lk_ctx)
+{
+ int count = 0;
+ fd_lk_ctx_node_t *fd_lk = NULL;
+
+ GF_VALIDATE_OR_GOTO ("client", lk_ctx, err);
+
+ LOCK (&lk_ctx->lock);
{
- if (conn->ping_timer)
- gf_timer_call_cancel (this->ctx,
- conn->ping_timer);
- gettimeofday (&current, NULL);
-
- if (((current.tv_sec - conn->last_received.tv_sec) <
- conf->opt.ping_timeout)
- || ((current.tv_sec - conn->last_sent.tv_sec) <
- conf->opt.ping_timeout)) {
- transport_activity = 1;
- }
+ list_for_each_entry (fd_lk, &lk_ctx->lk_list, next)
+ count++;
+ }
+ UNLOCK (&lk_ctx->lock);
- if (transport_activity) {
- gf_log (trans->name, GF_LOG_TRACE,
- "ping timer expired but transport activity "
- "detected - not bailing transport");
- timeout.tv_sec = conf->opt.ping_timeout;
- timeout.tv_usec = 0;
-
- conn->ping_timer =
- gf_timer_call_after (this->ctx, timeout,
- rpc_client_ping_timer_expired,
- (void *) this);
- if (conn->ping_timer == NULL)
- gf_log (trans->name, GF_LOG_DEBUG,
- "unable to setup timer");
+ return count;
+err:
+ return -1;
+}
- } else {
- conn->ping_started = 0;
- conn->ping_timer = NULL;
- disconnect = 1;
- }
+clnt_fd_lk_local_t *
+clnt_fd_lk_local_ref (xlator_t *this, clnt_fd_lk_local_t *local)
+{
+ GF_VALIDATE_OR_GOTO (this->name, local, out);
+
+ LOCK (&local->lock);
+ {
+ local->ref++;
}
- pthread_mutex_unlock (&conn->lock);
+ UNLOCK (&local->lock);
+out:
+ return local;
+}
- if (disconnect) {
- gf_log (trans->name, GF_LOG_ERROR,
- "Server %s has not responded in the last %d "
- "seconds, disconnecting.",
- conn->trans->peerinfo.identifier,
- conf->opt.ping_timeout);
+int
+clnt_fd_lk_local_unref (xlator_t *this, clnt_fd_lk_local_t *local)
+{
+ int ref = -1;
+
+ GF_VALIDATE_OR_GOTO (this->name, local, out);
- rpc_transport_disconnect (conn->trans);
+ LOCK (&local->lock);
+ {
+ ref = --local->ref;
}
+ UNLOCK (&local->lock);
+ if (ref == 0) {
+ LOCK_DESTROY (&local->lock);
+ GF_FREE (local);
+ }
out:
- return;
+ return ref;
}
-void
-client_start_ping (void *data)
-{
- xlator_t *this = NULL;
- clnt_conf_t *conf = NULL;
- rpc_clnt_connection_t *conn = NULL;
- int32_t ret = -1;
- struct timeval timeout = {0, };
- call_frame_t *frame = NULL;
- int frame_count = 0;
- rpc_transport_t *trans = NULL;
-
- this = data;
- conf = this->private;
+clnt_fd_lk_local_t *
+clnt_fd_lk_local_create (clnt_fd_ctx_t *fdctx)
+{
+ clnt_fd_lk_local_t *local = NULL;
- conn = &conf->rpc->conn;
- trans = conn->trans;
+ local = GF_CALLOC (1, sizeof (clnt_fd_lk_local_t),
+ gf_client_mt_clnt_fd_lk_local_t);
+ if (!local)
+ goto out;
+
+ local->ref = 1;
+ local->error = _gf_false;
+ local->fdctx = fdctx;
+
+ LOCK_INIT (&local->lock);
+out:
+ return local;
+}
+
+int
+clnt_release_reopen_fd_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ xlator_t *this = NULL;
+ call_frame_t *frame = NULL;
+ clnt_conf_t *conf = NULL;
+ clnt_fd_ctx_t *fdctx = NULL;
+
+ frame = myframe;
+ this = frame->this;
+ fdctx = (clnt_fd_ctx_t *) frame->local;
+ conf = (clnt_conf_t *) this->private;
+
+ clnt_fd_lk_reacquire_failed (this, fdctx, conf);
+
+ fdctx->reopen_done (fdctx, this);
+
+ frame->local = NULL;
+ STACK_DESTROY (frame->root);
+
+ return 0;
+}
+
+int
+clnt_release_reopen_fd (xlator_t *this, clnt_fd_ctx_t *fdctx)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_release_req req = {{0,},};
+
+ conf = (clnt_conf_t *) this->private;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ frame->local = (void *) fdctx;
+ req.fd = fdctx->remote_fd;
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_RELEASE,
+ clnt_release_reopen_fd_cbk, NULL,
+ NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_releasedir_req);
+ out:
+ if (ret) {
+ clnt_fd_lk_reacquire_failed (this, fdctx, conf);
+ fdctx->reopen_done (fdctx, this);
+ }
+ return 0;
+}
+
+int
+clnt_reacquire_lock_error (xlator_t *this, clnt_fd_ctx_t *fdctx,
+ clnt_conf_t *conf)
+{
+ int32_t ret = -1;
- if (conf->opt.ping_timeout == 0)
- return;
+ GF_VALIDATE_OR_GOTO ("client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fdctx, out);
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
- pthread_mutex_lock (&conn->lock);
+ clnt_release_reopen_fd (this, fdctx);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+gf_boolean_t
+clnt_fd_lk_local_error_status (xlator_t *this,
+ clnt_fd_lk_local_t *local)
+{
+ gf_boolean_t error = _gf_false;
+
+ LOCK (&local->lock);
+ {
+ error = local->error;
+ }
+ UNLOCK (&local->lock);
+
+ return error;
+}
+
+int
+clnt_fd_lk_local_mark_error (xlator_t *this,
+ clnt_fd_lk_local_t *local)
+{
+ int32_t ret = -1;
+ clnt_conf_t *conf = NULL;
+ gf_boolean_t error = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, local, out);
+
+ conf = (clnt_conf_t *) this->private;
+
+ LOCK (&local->lock);
{
- if (conn->ping_timer)
- gf_timer_call_cancel (this->ctx, conn->ping_timer);
-
- conn->ping_timer = NULL;
- conn->ping_started = 0;
-
- if (conn->saved_frames)
- /* treat the case where conn->saved_frames is NULL
- as no pending frames */
- frame_count = conn->saved_frames->count;
-
- if ((frame_count == 0) || !conn->connected) {
- /* using goto looked ugly here,
- * hence getting out this way */
- /* unlock */
- pthread_mutex_unlock (&conn->lock);
- return;
+ error = local->error;
+ local->error = _gf_true;
+ }
+ UNLOCK (&local->lock);
+
+ if (!error)
+ clnt_reacquire_lock_error (this, local->fdctx, conf);
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+client_reacquire_lock_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ gfs3_lk_rsp rsp = {0,};
+ call_frame_t *frame = NULL;
+ clnt_conf_t *conf = NULL;
+ clnt_fd_ctx_t *fdctx = NULL;
+ clnt_fd_lk_local_t *local = NULL;
+ struct gf_flock lock = {0,};
+
+ frame = (call_frame_t *) myframe;
+ this = frame->this;
+ local = (clnt_fd_lk_local_t *) frame->local;
+ conf = (clnt_conf_t *) this->private;
+
+ if (req->rpc_status == -1) {
+ gf_msg ("client", GF_LOG_WARNING, 0, PC_MSG_CLIENT_REQ_FAIL,
+ "request failed at rpc");
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_lk_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ goto out;
+ }
+
+ if (rsp.op_ret == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, PC_MSG_LOCK_REQ_FAIL,
+ "lock request failed");
+ ret = -1;
+ goto out;
+ }
+
+ fdctx = local->fdctx;
+
+ gf_proto_flock_to_flock (&rsp.flock, &lock);
+
+ gf_msg_debug (this->name, 0, "%s type lock reacquired on file "
+ "with gfid %s from %"PRIu64 " to %"PRIu64,
+ get_lk_type (lock.l_type), uuid_utoa (fdctx->gfid),
+ lock.l_start, lock.l_start + lock.l_len);
+
+ if (!clnt_fd_lk_local_error_status (this, local) &&
+ clnt_fd_lk_local_unref (this, local) == 0) {
+ pthread_mutex_lock (&conf->lock);
+ {
+ fdctx->lk_heal_state = GF_LK_HEAL_DONE;
}
+ pthread_mutex_unlock (&conf->lock);
+
+ fdctx->reopen_done (fdctx, this);
+ }
+
+ ret = 0;
+out:
+ if (ret < 0) {
+ clnt_fd_lk_local_mark_error (this, local);
+
+ clnt_fd_lk_local_unref (this, local);
+ }
+
+ frame->local = NULL;
+ STACK_DESTROY (frame->root);
+
+ return ret;
+}
- if (frame_count < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "saved_frames->count is %"PRId64,
- conn->saved_frames->count);
- conn->saved_frames->count = 0;
+int
+_client_reacquire_lock (xlator_t *this, clnt_fd_ctx_t *fdctx)
+{
+ int32_t ret = -1;
+ int32_t gf_cmd = 0;
+ int32_t gf_type = 0;
+ gfs3_lk_req req = {{0,},};
+ struct gf_flock flock = {0,};
+ fd_lk_ctx_t *lk_ctx = NULL;
+ clnt_fd_lk_local_t *local = NULL;
+ fd_lk_ctx_node_t *fd_lk = NULL;
+ call_frame_t *frame = NULL;
+ clnt_conf_t *conf = NULL;
+
+ conf = (clnt_conf_t *) this->private;
+ lk_ctx = fdctx->lk_ctx;
+
+ local = clnt_fd_lk_local_create (fdctx);
+ if (!local) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_LOCK_ERROR,
+ "clnt_fd_lk_local_create failed, aborting reacquring "
+ "of locks on %s.", uuid_utoa (fdctx->gfid));
+ clnt_reacquire_lock_error (this, fdctx, conf);
+ goto out;
+ }
+
+ list_for_each_entry (fd_lk, &lk_ctx->lk_list, next) {
+ memcpy (&flock, &fd_lk->user_flock,
+ sizeof (struct gf_flock));
+
+ /* Always send F_SETLK even if the cmd was F_SETLKW */
+ /* to avoid frame being blocked if lock cannot be granted. */
+ ret = client_cmd_to_gf_cmd (F_SETLK, &gf_cmd);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ PC_MSG_LOCK_ERROR, "client_cmd_to_gf_cmd "
+ "failed, aborting reacquiring of locks");
+ break;
}
- timeout.tv_sec = conf->opt.ping_timeout;
- timeout.tv_usec = 0;
+ gf_type = client_type_to_gf_type (flock.l_type);
+ req.fd = fdctx->remote_fd;
+ req.cmd = gf_cmd;
+ req.type = gf_type;
+ (void) gf_proto_flock_from_flock (&req.flock,
+ &flock);
- conn->ping_timer =
- gf_timer_call_after (this->ctx, timeout,
- rpc_client_ping_timer_expired,
- (void *) this);
+ memcpy (req.gfid, fdctx->gfid, 16);
- if (conn->ping_timer == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "unable to setup timer");
- } else {
- conn->ping_started = 1;
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ break;
+ }
+
+ frame->local = clnt_fd_lk_local_ref (this, local);
+ frame->root->lk_owner = fd_lk->user_flock.l_owner;
+
+ ret = client_submit_request (this, &req, frame,
+ conf->fops, GFS3_OP_LK,
+ client_reacquire_lock_cbk,
+ NULL, NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_lk_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ PC_MSG_LOCK_REACQUIRE, "reacquiring locks "
+ "failed on file with gfid %s",
+ uuid_utoa (fdctx->gfid));
+ break;
}
+
+ ret = 0;
+ frame = NULL;
}
- pthread_mutex_unlock (&conn->lock);
- frame = create_frame (this, this->ctx->pool);
- if (!frame)
- goto fail;
+ if (local)
+ (void) clnt_fd_lk_local_unref (this, local);
+out:
+ return ret;
+}
+
+int
+client_reacquire_lock (xlator_t *this, clnt_fd_ctx_t *fdctx)
+{
+ int32_t ret = -1;
+ fd_lk_ctx_t *lk_ctx = NULL;
+
+ GF_VALIDATE_OR_GOTO ("client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fdctx, out);
+
+ if (client_fd_lk_list_empty (fdctx->lk_ctx, _gf_false)) {
+ gf_msg_debug (this->name, 0,
+ "fd lock list is empty");
+ fdctx->reopen_done (fdctx, this);
+ } else {
+ lk_ctx = fdctx->lk_ctx;
+
+ LOCK (&lk_ctx->lock);
+ {
+ (void) _client_reacquire_lock (this, fdctx);
+ }
+ UNLOCK (&lk_ctx->lock);
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+void
+client_default_reopen_done (clnt_fd_ctx_t *fdctx, xlator_t *this)
+{
+ gf_log_callingfn (this->name, GF_LOG_WARNING,
+ "This function should never be called");
+}
- ret = client_submit_request (this, NULL, frame, conf->handshake,
- GF_HNDSK_PING, client_ping_cbk, NULL, NULL);
+void
+client_reopen_done (clnt_fd_ctx_t *fdctx, xlator_t *this)
+{
+ clnt_conf_t *conf = NULL;
+ gf_boolean_t destroy = _gf_false;
- return;
-fail:
+ conf = this->private;
- if (frame) {
- STACK_DESTROY (frame->root);
+ pthread_mutex_lock (&conf->lock);
+ {
+ fdctx->reopen_attempts = 0;
+ if (!fdctx->released)
+ list_add_tail (&fdctx->sfd_pos, &conf->saved_fds);
+ else
+ destroy = _gf_true;
+ fdctx->reopen_done = client_default_reopen_done;
}
+ pthread_mutex_unlock (&conf->lock);
- return;
+ if (destroy)
+ client_fdctx_destroy (this, fdctx);
}
+void
+client_child_up_reopen_done (clnt_fd_ctx_t *fdctx, xlator_t *this)
+{
+ clnt_conf_t *conf = NULL;
+ uint64_t fd_count = 0;
+
+ conf = this->private;
+
+ LOCK (&conf->rec_lock);
+ {
+ fd_count = --(conf->reopen_fd_count);
+ }
+ UNLOCK (&conf->rec_lock);
+
+ client_reopen_done (fdctx, this);
+ if (fd_count == 0) {
+ gf_msg (this->name, GF_LOG_INFO, 0, PC_MSG_CHILD_UP_NOTIFY,
+ "last fd open'd/lock-self-heal'd - notifying CHILD-UP");
+ client_set_lk_version (this);
+ client_notify_parents_child_up (this);
+ }
+}
int
-client_ping_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
+client3_3_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- xlator_t *this = NULL;
- rpc_clnt_connection_t *conn = NULL;
- struct timeval timeout = {0, };
- call_frame_t *frame = NULL;
- clnt_conf_t *conf = NULL;
+ int32_t ret = -1;
+ gfs3_open_rsp rsp = {0,};
+ gf_boolean_t attempt_lock_recovery = _gf_false;
+ clnt_local_t *local = NULL;
+ clnt_conf_t *conf = NULL;
+ clnt_fd_ctx_t *fdctx = NULL;
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
frame = myframe;
+ this = frame->this;
+ conf = this->private;
+ local = frame->local;
+ fdctx = local->fdctx;
- this = frame->this;
- conf = this->private;
- conn = &conf->rpc->conn;
+ if (-1 == req->rpc_status) {
+ gf_msg (frame->this->name, GF_LOG_WARNING, ENOTCONN,
+ PC_MSG_RPC_STATUS_ERROR, "received RPC status error, "
+ "returning ENOTCONN");
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
- if (req->rpc_status == -1) {
- /* timer expired and transport bailed out */
- gf_log (this->name, GF_LOG_DEBUG, "timer must have expired");
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_open_rsp);
+ if (ret < 0) {
+ gf_msg (frame->this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
goto out;
}
- pthread_mutex_lock (&conn->lock);
- {
- timeout.tv_sec = conf->opt.ping_timeout;
- timeout.tv_usec = 0;
+ if (rsp.op_ret < 0) {
+ gf_msg (frame->this->name, GF_LOG_WARNING, rsp.op_errno,
+ PC_MSG_DIR_OP_SUCCESS, "reopen on %s failed.",
+ local->loc.path);
+ } else {
+ gf_msg_debug (frame->this->name, 0,
+ "reopen on %s succeeded (remote-fd = %"PRId64")",
+ local->loc.path, rsp.fd);
+ }
+
+ if (rsp.op_ret == -1) {
+ ret = -1;
+ goto out;
+ }
- gf_timer_call_cancel (this->ctx,
- conn->ping_timer);
+ pthread_mutex_lock (&conf->lock);
+ {
+ fdctx->remote_fd = rsp.fd;
+ if (!fdctx->released) {
+ if (conf->lk_heal &&
+ !client_fd_lk_list_empty (fdctx->lk_ctx,
+ _gf_false)) {
+ attempt_lock_recovery = _gf_true;
+ fdctx->lk_heal_state = GF_LK_HEAL_IN_PROGRESS;
+ }
+ }
+ }
+ pthread_mutex_unlock (&conf->lock);
- conn->ping_timer =
- gf_timer_call_after (this->ctx, timeout,
- client_start_ping, (void *)this);
+ ret = 0;
- if (conn->ping_timer == NULL)
- gf_log (this->name, GF_LOG_DEBUG,
- "gf_timer_call_after() returned NULL");
+ if (attempt_lock_recovery) {
+ /* Delay decrementing the reopen fd count until all the
+ locks corresponding to this fd are acquired.*/
+ gf_msg_debug (this->name, 0, "acquiring locks "
+ "on %s", local->loc.path);
+ ret = client_reacquire_lock (frame->this, local->fdctx);
+ if (ret) {
+ clnt_reacquire_lock_error (this, local->fdctx, conf);
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ PC_MSG_LOCK_ERROR, "acquiring locks failed "
+ "on %s", local->loc.path);
+ }
}
- pthread_mutex_unlock (&conn->lock);
+
out:
+ if (!attempt_lock_recovery)
+ fdctx->reopen_done (fdctx, this);
+
+ frame->local = NULL;
STACK_DESTROY (frame->root);
+
+ client_local_wipe (local);
+
return 0;
}
-
int
-client3_getspec_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe)
+client3_3_reopendir_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- gf_getspec_rsp rsp = {0,};
- call_frame_t *frame = NULL;
- clnt_conf_t *conf = NULL;
- int ret = 0;
+ int32_t ret = -1;
+ gfs3_open_rsp rsp = {0,};
+ clnt_local_t *local = NULL;
+ clnt_conf_t *conf = NULL;
+ clnt_fd_ctx_t *fdctx = NULL;
+ call_frame_t *frame = NULL;
frame = myframe;
+ local = frame->local;
+ fdctx = local->fdctx;
conf = frame->this->private;
+
if (-1 == req->rpc_status) {
+ gf_msg (frame->this->name, GF_LOG_WARNING, ENOTCONN,
+ PC_MSG_RPC_STATUS_ERROR, "received RPC status error, "
+ "returning ENOTCONN");
rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
+ rsp.op_errno = ENOTCONN;
goto out;
}
- ret = xdr_to_getspec_rsp (*iov, &rsp);
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_opendir_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_msg (frame->this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
rsp.op_ret = -1;
rsp.op_errno = EINVAL;
goto out;
}
+ if (rsp.op_ret < 0) {
+ gf_msg (frame->this->name, GF_LOG_WARNING, rsp.op_errno,
+ PC_MSG_DIR_OP_FAILED, "reopendir on %s failed",
+ local->loc.path);
+ } else {
+ gf_msg (frame->this->name, GF_LOG_INFO, 0,
+ PC_MSG_DIR_OP_SUCCESS, "reopendir on %s succeeded "
+ "(fd = %"PRId64")", local->loc.path, rsp.fd);
+ }
+
if (-1 == rsp.op_ret) {
- gf_log (frame->this->name, GF_LOG_ERROR,
- "failed to get the 'volume file' from server");
+ ret = -1;
goto out;
}
+ pthread_mutex_lock (&conf->lock);
+ {
+ fdctx->remote_fd = rsp.fd;
+ }
+ pthread_mutex_unlock (&conf->lock);
+
out:
- STACK_UNWIND_STRICT (getspec, frame, rsp.op_ret, rsp.op_errno, rsp.spec);
+ fdctx->reopen_done (fdctx, frame->this);
- /* Don't use 'GF_FREE', this is allocated by libc */
- if (rsp.spec)
- free (rsp.spec);
+ frame->local = NULL;
+ STACK_DESTROY (frame->root);
+ client_local_wipe (local);
return 0;
}
-int32_t client3_getspec (call_frame_t *frame, xlator_t *this, void *data)
+static int
+protocol_client_reopendir (clnt_fd_ctx_t *fdctx, xlator_t *this)
{
- clnt_conf_t *conf = NULL;
- clnt_args_t *args = NULL;
- gf_getspec_req req = {0,};
- int op_errno = ESTALE;
-
- if (!frame || !this || !data)
- goto unwind;
+ int ret = -1;
+ gfs3_opendir_req req = {{0,},};
+ clnt_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ clnt_conf_t *conf = NULL;
- args = data;
conf = this->private;
- req.flags = args->flags;
- req.key = (char *)args->name;
- client_submit_request (this, &req, frame, conf->handshake, GF_HNDSK_GETSPEC,
- client3_getspec_cbk, NULL, xdr_from_getspec_req);
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ ret = -1;
+ goto out;
+ }
+ local->fdctx = fdctx;
+
+ gf_uuid_copy (local->loc.gfid, fdctx->gfid);
+ ret = loc_path (&local->loc, NULL);
+ if (ret < 0)
+ goto out;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ memcpy (req.gfid, fdctx->gfid, 16);
+
+ gf_msg_debug (frame->this->name, 0,
+ "attempting reopen on %s", local->loc.path);
+
+ frame->local = local;
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_OPENDIR,
+ client3_3_reopendir_cbk, NULL,
+ NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_opendir_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, PC_MSG_DIR_OP_FAILED,
+ "failed to send the re-opendir request");
+ }
return 0;
-unwind:
- STACK_UNWIND_STRICT (getspec, frame, -1, op_errno, NULL);
+
+out:
+ if (local)
+ client_local_wipe (local);
+
+ fdctx->reopen_done (fdctx, this);
+
+ return 0;
+
+}
+
+static int
+protocol_client_reopenfile (clnt_fd_ctx_t *fdctx, xlator_t *this)
+{
+ int ret = -1;
+ gfs3_open_req req = {{0,},};
+ clnt_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ clnt_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ ret = -1;
+ goto out;
+ }
+
+ local->fdctx = fdctx;
+ gf_uuid_copy (local->loc.gfid, fdctx->gfid);
+ ret = loc_path (&local->loc, NULL);
+ if (ret < 0)
+ goto out;
+
+ frame->local = local;
+
+ memcpy (req.gfid, fdctx->gfid, 16);
+ req.flags = gf_flags_from_flags (fdctx->flags);
+ req.flags = req.flags & (~(O_TRUNC|O_CREAT|O_EXCL));
+
+ gf_msg_debug (frame->this->name, 0,
+ "attempting reopen on %s", local->loc.path);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_OPEN, client3_3_reopen_cbk, NULL,
+ NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_open_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, PC_MSG_DIR_OP_FAILED,
+ "failed to send the re-open request");
+ }
+
+ return 0;
+
+out:
+ if (frame) {
+ frame->local = NULL;
+ STACK_DESTROY (frame->root);
+ }
+
+ if (local)
+ client_local_wipe (local);
+
+ fdctx->reopen_done (fdctx, this);
+
return 0;
}
+static void
+protocol_client_reopen (clnt_fd_ctx_t *fdctx, xlator_t *this)
+{
+ if (fdctx->is_dir)
+ protocol_client_reopendir (fdctx, this);
+ else
+ protocol_client_reopenfile (fdctx, this);
+}
+
+gf_boolean_t
+__is_fd_reopen_in_progress (clnt_fd_ctx_t *fdctx)
+{
+ if (fdctx->reopen_done == client_default_reopen_done)
+ return _gf_false;
+ return _gf_true;
+}
+
+void
+client_attempt_reopen (fd_t *fd, xlator_t *this)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_fd_ctx_t *fdctx = NULL;
+ gf_boolean_t reopen = _gf_false;
+
+ if (!fd || !this)
+ goto out;
+
+ conf = this->private;
+ pthread_mutex_lock (&conf->lock);
+ {
+ fdctx = this_fd_get_ctx (fd, this);
+ if (!fdctx)
+ goto unlock;
+ if (__is_fd_reopen_in_progress (fdctx))
+ goto unlock;
+ if (fdctx->remote_fd != -1)
+ goto unlock;
+
+ if (fdctx->reopen_attempts == CLIENT_REOPEN_MAX_ATTEMPTS) {
+ reopen = _gf_true;
+ fdctx->reopen_done = client_reopen_done;
+ list_del_init (&fdctx->sfd_pos);
+ } else {
+ fdctx->reopen_attempts++;
+ }
+ }
+unlock:
+ pthread_mutex_unlock (&conf->lock);
+ if (reopen)
+ protocol_client_reopen (fdctx, this);
+out:
+ return;
+}
+
int
client_post_handshake (call_frame_t *frame, xlator_t *this)
{
clnt_conf_t *conf = NULL;
clnt_fd_ctx_t *tmp = NULL;
clnt_fd_ctx_t *fdctx = NULL;
- xlator_list_t *parent = NULL;
struct list_head reopen_head;
+ int count = 0;
+
if (!this || !this->private)
goto out;
@@ -333,29 +1037,34 @@ client_post_handshake (call_frame_t *frame, xlator_t *this)
if (fdctx->remote_fd != -1)
continue;
+ fdctx->reopen_done = client_child_up_reopen_done;
list_del_init (&fdctx->sfd_pos);
list_add_tail (&fdctx->sfd_pos, &reopen_head);
+ count++;
}
}
pthread_mutex_unlock (&conf->lock);
- list_for_each_entry_safe (fdctx, tmp, &reopen_head, sfd_pos) {
- list_del_init (&fdctx->sfd_pos);
+ /* Delay notifying CHILD_UP to parents
+ until all locks are recovered */
+ if (count > 0) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ PC_MSG_CHILD_UP_NOTIFY_DELAY, "%d fds open - Delaying "
+ "child_up until they are re-opened", count);
+ client_save_number_fds (conf, count);
- if (fdctx->is_dir)
- protocol_client_reopendir (this, fdctx);
- else
- protocol_client_reopen (this, fdctx);
- }
-
- parent = this->parents;
+ list_for_each_entry_safe (fdctx, tmp, &reopen_head, sfd_pos) {
+ list_del_init (&fdctx->sfd_pos);
- while (parent) {
- xlator_notify (parent->xlator, GF_EVENT_CHILD_UP,
- this);
- parent = parent->next;
+ protocol_client_reopen (fdctx, this);
+ }
+ } else {
+ gf_msg_debug (this->name, 0,
+ "No fds to open - notifying all parents child "
+ "up");
+ client_set_lk_version (this);
+ client_notify_parents_child_up (this);
}
-
out:
return 0;
}
@@ -367,39 +1076,39 @@ client_setvolume_cbk (struct rpc_req *req, struct iovec *iov, int count, void *m
clnt_conf_t *conf = NULL;
xlator_t *this = NULL;
dict_t *reply = NULL;
- xlator_list_t *parent = NULL;
char *process_uuid = NULL;
char *remote_error = NULL;
char *remote_subvol = NULL;
- rpc_transport_t *peer_trans = NULL;
gf_setvolume_rsp rsp = {0,};
- uint64_t peertrans_int = 0;
int ret = 0;
- int op_ret = 0;
- int op_errno = 0;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+ gf_boolean_t auth_fail = _gf_false;
+ uint32_t lk_ver = 0;
frame = myframe;
this = frame->this;
conf = this->private;
if (-1 == req->rpc_status) {
+ gf_msg (frame->this->name, GF_LOG_WARNING, ENOTCONN,
+ PC_MSG_RPC_STATUS_ERROR, "received RPC status error");
op_ret = -1;
- op_errno = EINVAL;
goto out;
}
- ret = xdr_to_setvolume_rsp (*iov, &rsp);
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_setvolume_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- op_errno = EINVAL;
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
op_ret = -1;
goto out;
}
op_ret = rsp.op_ret;
op_errno = gf_error_to_errno (rsp.op_errno);
if (-1 == rsp.op_ret) {
- gf_log (frame->this->name, GF_LOG_WARNING,
- "failed to set the volume");
+ gf_msg (frame->this->name, GF_LOG_WARNING, op_errno,
+ PC_MSG_VOL_SET_FAIL, "failed to set the volume");
}
reply = dict_new ();
@@ -410,112 +1119,181 @@ client_setvolume_cbk (struct rpc_req *req, struct iovec *iov, int count, void *m
ret = dict_unserialize (rsp.dict.dict_val,
rsp.dict.dict_len, &reply);
if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "failed to unserialize buffer to dict");
+ gf_msg (frame->this->name, GF_LOG_WARNING, 0,
+ PC_MSG_DICT_UNSERIALIZE_FAIL, "failed to "
+ "unserialize buffer to dict");
goto out;
}
}
ret = dict_get_str (reply, "ERROR", &remote_error);
if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get ERROR string from reply dict");
+ gf_msg (this->name, GF_LOG_WARNING, EINVAL,
+ PC_MSG_DICT_GET_FAILED, "failed to get ERROR "
+ "string from reply dict");
}
ret = dict_get_str (reply, "process-uuid", &process_uuid);
if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get 'process-uuid' from reply dict");
+ gf_msg (this->name, GF_LOG_WARNING, EINVAL,
+ PC_MSG_DICT_GET_FAILED, "failed to get "
+ "'process-uuid' from reply dict");
}
if (op_ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "SETVOLUME on remote-host failed: %s",
- remote_error ? remote_error : strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ PC_MSG_SETVOLUME_FAIL,
+ "SETVOLUME on remote-host failed");
errno = op_errno;
+ if (remote_error &&
+ (strcmp ("Authentication failed", remote_error) == 0)) {
+ auth_fail = _gf_true;
+ op_ret = 0;
+ }
if (op_errno == ESTALE) {
- parent = this->parents;
- while (parent) {
- xlator_notify (parent->xlator,
- GF_EVENT_VOLFILE_MODIFIED,
- this);
- parent = parent->next;
- }
+ ret = client_notify_dispatch (this,
+ GF_EVENT_VOLFILE_MODIFIED,
+ NULL);
+ if (ret)
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ PC_MSG_VOLFILE_NOTIFY_FAILED,
+ "notify of VOLFILE_MODIFIED failed");
}
goto out;
}
+
ret = dict_get_str (this->options, "remote-subvolume",
&remote_subvol);
- if (!remote_subvol)
+ if (ret || !remote_subvol) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_DICT_GET_FAILED,
+ "failed to find key 'remote-subvolume' in the options");
goto out;
+ }
+
+ ret = dict_get_uint32 (reply, "child_up", &conf->child_up);
+ if (ret) {
+ /*
+ * This would happen in cases where the server trying to *
+ * connect to this client is running an older version. Hence *
+ * setting the child_up to _gf_true in this case. *
+ */
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_DICT_GET_FAILED,
+ "failed to find key 'child_up' in the options");
+ conf->child_up = _gf_true;
+ }
+
+ ret = dict_get_uint32 (reply, "clnt-lk-version", &lk_ver);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_DICT_GET_FAILED,
+ "failed to find key 'clnt-lk-version' in the options");
+ goto out;
+ }
- if (process_uuid &&
+ gf_msg_debug (this->name, 0, "clnt-lk-version = %d, "
+ "server-lk-version = %d", client_get_lk_ver (conf),
+ lk_ver);
+ /* TODO: currently setpeer path is broken */
+ /*
+ if (process_uuid && req->conn &&
!strcmp (this->ctx->process_uuid, process_uuid)) {
+ rpc_transport_t *peer_trans = NULL;
+ uint64_t peertrans_int = 0;
+
ret = dict_get_uint64 (reply, "transport-ptr",
&peertrans_int);
-
- peer_trans = (void *) (long) (peertrans_int);
+ if (ret)
+ goto out;
gf_log (this->name, GF_LOG_WARNING,
"attaching to the local volume '%s'",
remote_subvol);
- if (req->conn) {
- /* TODO: Some issues with this logic at present */
- //rpc_transport_setpeer (req->conn->trans, peer_trans);
- }
+ peer_trans = (void *) (long) (peertrans_int);
+
+ rpc_transport_setpeer (req->conn->trans, peer_trans);
}
+ */
- gf_log (this->name, GF_LOG_NORMAL,
+ conf->client_id = glusterfs_leaf_position(this);
+
+ gf_msg (this->name, GF_LOG_INFO, 0, PC_MSG_REMOTE_VOL_CONNECTED,
"Connected to %s, attached to remote volume '%s'.",
- conf->rpc->conn.trans->peerinfo.identifier,
+ conf->rpc->conn.name,
remote_subvol);
rpc_clnt_set_connected (&conf->rpc->conn);
op_ret = 0;
conf->connecting = 0;
-
- /* TODO: more to test */
- client_post_handshake (frame, frame->this);
+ conf->connected = 1;
+
+ if (lk_ver != client_get_lk_ver (conf)) {
+ gf_msg (this->name, GF_LOG_INFO, 0, PC_MSG_LOCK_MISMATCH,
+ "Server and Client lk-version numbers are not same, "
+ "reopening the fds");
+ client_mark_fd_bad (this);
+ client_post_handshake (frame, frame->this);
+ } else {
+ /*TODO: Traverse the saved fd list, and send
+ release to the server on fd's that were closed
+ during grace period */
+ gf_msg (this->name, GF_LOG_INFO, 0, PC_MSG_LOCK_MATCH,
+ "Server and Client lk-version numbers are same, no "
+ "need to reopen the fds");
+ client_notify_parents_child_up (frame->this);
+ }
out:
-
+ if (auth_fail) {
+ gf_msg (this->name, GF_LOG_INFO, 0, PC_MSG_AUTH_FAILED,
+ "sending AUTH_FAILED event");
+ ret = client_notify_dispatch (this, GF_EVENT_AUTH_FAILED, NULL);
+ if (ret)
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ PC_MSG_AUTH_FAILED_NOTIFY_FAILED, "notify of "
+ "AUTH_FAILED failed");
+ conf->connecting = 0;
+ conf->connected = 0;
+ ret = -1;
+ }
if (-1 == op_ret) {
/* Let the connection/re-connection happen in
* background, for now, don't hang here,
* tell the parents that i am all ok..
*/
- parent = this->parents;
- while (parent) {
- xlator_notify (parent->xlator,
- GF_EVENT_CHILD_CONNECTING, this);
- parent = parent->next;
- }
-
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ PC_MSG_CHILD_CONNECTING_EVENT, "sending "
+ "CHILD_CONNECTING event");
+ ret = client_notify_dispatch (this, GF_EVENT_CHILD_CONNECTING,
+ NULL);
+ if (ret)
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ PC_MSG_CHILD_CONNECTING_NOTIFY_FAILED,
+ "notify of CHILD_CONNECTING failed");
conf->connecting= 1;
+ ret = 0;
}
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val);
+ free (rsp.dict.dict_val);
STACK_DESTROY (frame->root);
if (reply)
dict_unref (reply);
- return 0;
+ return ret;
}
int
client_setvolume (xlator_t *this, struct rpc_clnt *rpc)
{
int ret = 0;
- gf_setvolume_req req = {0,};
+ gf_setvolume_req req = {{0,},};
call_frame_t *fr = NULL;
char *process_uuid_xl = NULL;
clnt_conf_t *conf = NULL;
dict_t *options = NULL;
+ char counter_str[32] = {0};
options = this->options;
conf = this->private;
@@ -524,8 +1302,9 @@ client_setvolume (xlator_t *this, struct rpc_clnt *rpc)
ret = dict_set_int32 (options, "fops-version",
conf->fops->prognum);
if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to set version-fops(%d) in handshake msg",
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ PC_MSG_DICT_SET_FAILED, "failed to set "
+ "version-fops(%d) in handshake msg",
conf->fops->prognum);
goto fail;
}
@@ -534,49 +1313,94 @@ client_setvolume (xlator_t *this, struct rpc_clnt *rpc)
if (conf->mgmt) {
ret = dict_set_int32 (options, "mgmt-version", conf->mgmt->prognum);
if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to set version-mgmt(%d) in handshake msg",
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ PC_MSG_DICT_SET_FAILED, "failed to set "
+ "version-mgmt(%d) in handshake msg",
conf->mgmt->prognum);
goto fail;
}
}
- ret = gf_asprintf (&process_uuid_xl, "%s-%s", this->ctx->process_uuid,
- this->name);
+ /* When lock-heal is enabled:
+ * With multiple graphs possible in the same process, we need a
+ field to bring the uniqueness. Graph-ID should be enough to get the
+ job done.
+ * When lock-heal is disabled, connection-id should always be unique so
+ * that server never gets to reuse the previous connection resources
+ * so it cleans up the resources on every disconnect. Otherwise
+ * it may lead to stale resources, i.e. leaked file desciptors,
+ * inode/entry locks
+ */
+ if (!conf->lk_heal) {
+ snprintf (counter_str, sizeof (counter_str),
+ "-%"PRIu64, conf->setvol_count);
+ conf->setvol_count++;
+ }
+ ret = gf_asprintf (&process_uuid_xl, "%s-%s-%d%s",
+ this->ctx->process_uuid, this->name,
+ this->graph->id, counter_str);
if (-1 == ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "asprintf failed while setting process_uuid");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ PC_MSG_PROCESS_UUID_SET_FAIL, "asprintf failed while "
+ "setting process_uuid");
goto fail;
}
+
ret = dict_set_dynstr (options, "process-uuid", process_uuid_xl);
if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
+ gf_msg (this->name, GF_LOG_ERROR, 0, PC_MSG_DICT_SET_FAILED,
"failed to set process-uuid(%s) in handshake msg",
process_uuid_xl);
goto fail;
}
+ ret = dict_set_str (options, "client-version", PACKAGE_VERSION);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_DICT_SET_FAILED,
+ "failed to set client-version(%s) in handshake msg",
+ PACKAGE_VERSION);
+ }
+
if (this->ctx->cmd_args.volfile_server) {
- if (this->ctx->cmd_args.volfile_id)
+ if (this->ctx->cmd_args.volfile_id) {
ret = dict_set_str (options, "volfile-key",
this->ctx->cmd_args.volfile_id);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ PC_MSG_DICT_SET_FAILED, "failed to "
+ "set 'volfile-key'");
+ }
ret = dict_set_uint32 (options, "volfile-checksum",
this->graph->volfile_checksum);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ PC_MSG_DICT_SET_FAILED, "failed to set "
+ "'volfile-checksum'");
+ }
+
+ ret = dict_set_int16 (options, "clnt-lk-version",
+ client_get_lk_ver (conf));
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_DICT_SET_FAILED,
+ "failed to set clnt-lk-version(%"PRIu32") in handshake "
+ "msg", client_get_lk_ver (conf));
}
- req.dict.dict_len = dict_serialized_length (options);
- if (req.dict.dict_len < 0) {
- gf_log (this->name, GF_LOG_ERROR,
+ ret = dict_serialized_length (options);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, PC_MSG_DICT_ERROR,
"failed to get serialized length of dict");
ret = -1;
goto fail;
}
+ req.dict.dict_len = ret;
req.dict.dict_val = GF_CALLOC (1, req.dict.dict_len,
gf_client_mt_clnt_req_buf_t);
ret = dict_serialize (options, req.dict.dict_val);
if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to serialize dictionary");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ PC_MSG_DICT_SERIALIZE_FAIL, "failed to serialize "
+ "dictionary");
goto fail;
}
@@ -586,126 +1410,254 @@ client_setvolume (xlator_t *this, struct rpc_clnt *rpc)
ret = client_submit_request (this, &req, fr, conf->handshake,
GF_HNDSK_SETVOLUME, client_setvolume_cbk,
- NULL, xdr_from_setvolume_req);
+ NULL, NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_setvolume_req);
fail:
- if (req.dict.dict_val)
- GF_FREE (req.dict.dict_val);
+ GF_FREE (req.dict.dict_val);
return ret;
}
int
-select_server_supported_programs (xlator_t *this, char *msg)
+select_server_supported_programs (xlator_t *this, gf_prog_detail *prog)
{
- clnt_conf_t *conf = NULL;
- char *tmp_str = NULL;
- char *prog_str = NULL;
- char *dup_str = NULL;
- char *tmp_str1 = NULL;
- char *tmp_msg = NULL;
- char *progname = NULL;
- char *progver_str = NULL;
- char *prognum_str = NULL;
- int ret = -1;
- int progver = 0;
- int prognum = 0;
+ gf_prog_detail *trav = NULL;
+ clnt_conf_t *conf = NULL;
+ int ret = -1;
- if (!this || !msg)
+ if (!this || !prog) {
+ gf_msg (THIS->name, GF_LOG_WARNING, 0, PC_MSG_PGM_NOT_FOUND,
+ "xlator not found OR RPC program not found");
goto out;
+ }
conf = this->private;
+ trav = prog;
- /* Reply in "Name:Program-Number:Program-Version,..." format */
- tmp_msg = gf_strdup (msg);
- prog_str = strtok_r (tmp_msg, ",", &tmp_str);
- while (prog_str) {
- dup_str = gf_strdup (prog_str);
-
- progname = strtok_r (dup_str, ":", &tmp_str1);
- prognum_str = strtok_r (NULL, ":", &tmp_str1);
- if (!prognum_str) {
- gf_log (this->name, GF_LOG_WARNING,
- "Supported versions not formatted");
- goto out;
- }
- sscanf (prognum_str, "%d", &prognum);
- progver_str = strtok_r (NULL, ":", &tmp_str1);
- if (!progver_str) {
- gf_log (this->name, GF_LOG_WARNING,
- "Supported versions not formatted");
- goto out;
- }
- sscanf (progver_str, "%d", &progver);
-
+ while (trav) {
/* Select 'programs' */
- if ((clnt3_1_fop_prog.prognum == prognum) &&
- (clnt3_1_fop_prog.progver == progver)) {
- conf->fops = &clnt3_1_fop_prog;
- gf_log (this->name, GF_LOG_INFO,
- "Using Program %s, Num (%s), Version (%s)",
- progname, prognum_str, progver_str);
+ if ((clnt3_3_fop_prog.prognum == trav->prognum) &&
+ (clnt3_3_fop_prog.progver == trav->progver)) {
+ conf->fops = &clnt3_3_fop_prog;
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ PC_MSG_VERSION_INFO, "Using Program %s, "
+ "Num (%"PRId64"), Version (%"PRId64")",
+ trav->progname, trav->prognum, trav->progver);
ret = 0;
}
- if ((clnt3_1_mgmt_prog.prognum == prognum) &&
- (clnt3_1_mgmt_prog.progver == progver)) {
- conf->mgmt = &clnt3_1_mgmt_prog;
- gf_log (this->name, GF_LOG_INFO,
- "Using Program %s, Num (%s), Version (%s)",
- progname, prognum_str, progver_str);
+ if (ret) {
+ gf_msg_trace (this->name, 0,
+ "%s (%"PRId64") not supported",
+ trav->progname, trav->progver);
+ }
+ trav = trav->next;
+ }
+
+out:
+ return ret;
+}
+
+
+int
+server_has_portmap (xlator_t *this, gf_prog_detail *prog)
+{
+ gf_prog_detail *trav = NULL;
+ int ret = -1;
+
+ if (!this || !prog) {
+ gf_msg (THIS->name, GF_LOG_WARNING, 0, PC_MSG_PGM_NOT_FOUND,
+ "xlator not found OR RPC program not found");
+ goto out;
+ }
+
+ trav = prog;
+
+ while (trav) {
+ if ((trav->prognum == GLUSTER_PMAP_PROGRAM) &&
+ (trav->progver == GLUSTER_PMAP_VERSION)) {
+ gf_msg_debug (this->name, 0,
+ "detected portmapper on server");
ret = 0;
+ break;
}
+ trav = trav->next;
+ }
+
+out:
+ return ret;
+}
+
+
+int
+client_query_portmap_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe)
+{
+ struct pmap_port_by_brick_rsp rsp = {0,};
+ call_frame_t *frame = NULL;
+ clnt_conf_t *conf = NULL;
+ int ret = -1;
+ struct rpc_clnt_config config = {0, };
+ xlator_t *this = NULL;
+
+ frame = myframe;
+ if (!frame || !frame->this || !frame->this->private) {
+ gf_msg (THIS->name, GF_LOG_WARNING, EINVAL,
+ PC_MSG_INVALID_ENTRY, "frame not found with rpc "
+ "request");
+ goto out;
+ }
+ this = frame->this;
+ conf = frame->this->private;
- prog_str = strtok_r (NULL, ",", &tmp_str);
- GF_FREE (dup_str);
+ if (-1 == req->rpc_status) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOTCONN,
+ PC_MSG_RPC_STATUS_ERROR, "received RPC status error, "
+ "try again later");
+ goto out;
}
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "none of the server versions are supported by client");
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_pmap_port_by_brick_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ goto out;
}
- ret = 0;
+
+ if (-1 == rsp.op_ret) {
+ ret = -1;
+ if (!conf->portmap_err_logged) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ PC_MSG_PORT_NUM_ERROR, "failed to get the "
+ "port number for remote subvolume. Please run "
+ "'gluster volume status' on server to see if "
+ "brick process is running.");
+ } else {
+ gf_msg_debug (this->name, 0,
+ "failed to get the port number for "
+ "remote subvolume. Please run 'gluster "
+ "volume status' on server to see "
+ "if brick process is running.");
+ }
+ conf->portmap_err_logged = 1;
+ goto out;
+ }
+
+ conf->portmap_err_logged = 0;
+ conf->disconnect_err_logged = 0;
+ config.remote_port = rsp.port;
+ rpc_clnt_reconfig (conf->rpc, &config);
+
+ conf->skip_notify = 1;
+ conf->quick_reconnect = 1;
+
out:
- if (tmp_msg)
- GF_FREE (tmp_msg);
+ if (frame)
+ STACK_DESTROY (frame->root);
+
+ if (conf) {
+ /* Need this to connect the same transport on different port */
+ /* ie, glusterd to glusterfsd */
+ rpc_transport_disconnect (conf->rpc->conn.trans);
+ }
+
return ret;
}
+
int
-client_dump_version_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe)
+client_query_portmap (xlator_t *this, struct rpc_clnt *rpc)
{
- gf_dump_version_rsp rsp = {0,};
- call_frame_t *frame = NULL;
- clnt_conf_t *conf = NULL;
- int ret = 0;
+ int ret = -1;
+ pmap_port_by_brick_req req = {0,};
+ call_frame_t *fr = NULL;
+ clnt_conf_t *conf = NULL;
+ dict_t *options = NULL;
+ char *remote_subvol = NULL;
+ char *xprt = NULL;
+ char brick_name[PATH_MAX] = {0,};
+
+ options = this->options;
+ conf = this->private;
+
+ ret = dict_get_str (options, "remote-subvolume", &remote_subvol);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, PC_MSG_VOL_SET_FAIL,
+ "remote-subvolume not set in volfile");
+ goto fail;
+ }
+
+ req.brick = remote_subvol;
+
+ if (!dict_get_str (options, "transport-type", &xprt)) {
+ if (!strcmp (xprt, "rdma")) {
+ snprintf (brick_name, sizeof(brick_name), "%s.rdma",
+ remote_subvol);
+ req.brick = brick_name;
+ }
+ }
+
+ fr = create_frame (this, this->ctx->pool);
+ if (!fr) {
+ ret = -1;
+ goto fail;
+ }
+
+ ret = client_submit_request (this, &req, fr, &clnt_pmap_prog,
+ GF_PMAP_PORTBYBRICK,
+ client_query_portmap_cbk,
+ NULL, NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_pmap_port_by_brick_req);
+
+fail:
+ return ret;
+}
+
+
+int
+client_dump_version_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_dump_rsp rsp = {0,};
+ gf_prog_detail *trav = NULL;
+ gf_prog_detail *next = NULL;
+ call_frame_t *frame = NULL;
+ clnt_conf_t *conf = NULL;
+ int ret = 0;
frame = myframe;
conf = frame->this->private;
if (-1 == req->rpc_status) {
- gf_log ("", 1, "some error, retry again later");
+ gf_msg (frame->this->name, GF_LOG_WARNING, ENOTCONN,
+ PC_MSG_RPC_STATUS_ERROR, "received RPC status error");
goto out;
}
- ret = xdr_to_dump_version_rsp (*iov, &rsp);
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_dump_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_msg (frame->this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
goto out;
}
if (-1 == rsp.op_ret) {
- gf_log (frame->this->name, GF_LOG_ERROR,
- "failed to get the 'versions' from server");
+ gf_msg (frame->this->name, GF_LOG_WARNING, 0,
+ PC_MSG_VERSION_ERROR, "failed to get the 'versions' "
+ "from server");
+ goto out;
+ }
+
+ if (server_has_portmap (frame->this, rsp.prog) == 0) {
+ ret = client_query_portmap (frame->this, conf->rpc);
goto out;
}
/* Check for the proper version string */
/* Reply in "Name:Program-Number:Program-Version,..." format */
- ret = select_server_supported_programs (frame->this,
- rsp.msg.msg_val);
+ ret = select_server_supported_programs (frame->this, rsp.prog);
if (ret) {
- gf_log (frame->this->name, GF_LOG_ERROR,
- "Server versions are not present in this "
- "release (%s)", rsp.msg.msg_val);
+ gf_msg (frame->this->name, GF_LOG_ERROR, 0,
+ PC_MSG_VERSION_ERROR, "server doesn't support the "
+ "version");
goto out;
}
@@ -713,51 +1665,59 @@ client_dump_version_cbk (struct rpc_req *req, struct iovec *iov, int count, void
out:
/* don't use GF_FREE, buffer was allocated by libc */
- if (rsp.msg.msg_val) {
- free (rsp.msg.msg_val);
+ if (rsp.prog) {
+ trav = rsp.prog;
+ while (trav) {
+ next = trav->next;
+ free (trav->progname);
+ free (trav);
+ trav = next;
+ }
}
STACK_DESTROY (frame->root);
+
+ if (ret != 0)
+ rpc_transport_disconnect (conf->rpc->conn.trans);
+
return ret;
}
int
client_handshake (xlator_t *this, struct rpc_clnt *rpc)
{
- call_frame_t *frame = NULL;
- clnt_conf_t *conf = NULL;
- gf_dump_version_req req = {0,};
- int ret = 0;
+ call_frame_t *frame = NULL;
+ clnt_conf_t *conf = NULL;
+ gf_dump_req req = {0,};
+ int ret = 0;
conf = this->private;
- if (!conf->handshake)
+ if (!conf->handshake) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_PGM_NOT_FOUND,
+ "handshake program not found");
goto out;
+ }
frame = create_frame (this, this->ctx->pool);
if (!frame)
goto out;
- req.key = "fop-handshake";
- req.gfs_id = 123456;
- ret = client_submit_request (this, &req, frame, conf->handshake,
- GF_HNDSK_DUMP_VERSION,
- client_dump_version_cbk,
- NULL, xdr_from_dump_version_req);
+ req.gfs_id = 0xbabe;
+ ret = client_submit_request (this, &req, frame, conf->dump,
+ GF_DUMP_DUMP, client_dump_version_cbk,
+ NULL, NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gf_dump_req);
out:
return ret;
}
-
-/* */
-/* This table should ideally remain same irrespective of versions */
-
char *clnt_handshake_procs[GF_HNDSK_MAXVALUE] = {
[GF_HNDSK_NULL] = "NULL",
- [GF_HNDSK_DUMP_VERSION] = "VERSION",
[GF_HNDSK_SETVOLUME] = "SETVOLUME",
[GF_HNDSK_GETSPEC] = "GETSPEC",
[GF_HNDSK_PING] = "PING",
+ [GF_HNDSK_SET_LK_VER] = "SET_LK_VER"
};
rpc_clnt_prog_t clnt_handshake_prog = {
@@ -766,3 +1726,26 @@ rpc_clnt_prog_t clnt_handshake_prog = {
.progver = GLUSTER_HNDSK_VERSION,
.procnames = clnt_handshake_procs,
};
+
+char *clnt_dump_proc[GF_DUMP_MAXVALUE] = {
+ [GF_DUMP_NULL] = "NULL",
+ [GF_DUMP_DUMP] = "DUMP",
+};
+
+rpc_clnt_prog_t clnt_dump_prog = {
+ .progname = "GF-DUMP",
+ .prognum = GLUSTER_DUMP_PROGRAM,
+ .progver = GLUSTER_DUMP_VERSION,
+ .procnames = clnt_dump_proc,
+};
+
+char *clnt_pmap_procs[GF_PMAP_MAXVALUE] = {
+ [GF_PMAP_PORTBYBRICK] = "PORTBYBRICK",
+};
+
+rpc_clnt_prog_t clnt_pmap_prog = {
+ .progname = "PORTMAP",
+ .prognum = GLUSTER_PMAP_PROGRAM,
+ .progver = GLUSTER_PMAP_VERSION,
+ .procnames = clnt_pmap_procs,
+};
diff --git a/xlators/protocol/client/src/client-helpers.c b/xlators/protocol/client/src/client-helpers.c
index 6c028d4ebb7..4bdb364a9a3 100644
--- a/xlators/protocol/client/src/client-helpers.c
+++ b/xlators/protocol/client/src/client-helpers.c
@@ -1,29 +1,45 @@
/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "client.h"
#include "fd.h"
+#include "client-messages.h"
+#include "client-common.h"
+#include "compat-errno.h"
+#include "common-utils.h"
+
+int
+client_fd_lk_list_empty (fd_lk_ctx_t *lk_ctx, gf_boolean_t try_lock)
+{
+ int ret = 1;
+
+ if (!lk_ctx) {
+ ret = -1;
+ goto out;
+ }
+
+ if (try_lock) {
+ ret = TRY_LOCK (&lk_ctx->lock);
+ if (ret != 0) {
+ ret = -1;
+ goto out;
+ }
+ } else {
+ LOCK (&lk_ctx->lock);
+ }
+
+ ret = list_empty (&lk_ctx->lk_list);
+ UNLOCK (&lk_ctx->lock);
+out:
+ return ret;
+}
clnt_fd_ctx_t *
this_fd_del_ctx (fd_t *file, xlator_t *this)
@@ -76,16 +92,28 @@ this_fd_set_ctx (fd_t *file, xlator_t *this, loc_t *loc, clnt_fd_ctx_t *ctx)
ret = fd_ctx_get (file, this, &oldaddr);
if (ret >= 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s (%"PRId64"): trying duplicate remote fd set. ",
- loc->path, loc->inode->ino);
+ if (loc)
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ PC_MSG_FD_DUPLICATE_TRY,
+ "%s (%s): trying duplicate remote fd set. ",
+ loc->path, uuid_utoa (loc->inode->gfid));
+ else
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ PC_MSG_FD_DUPLICATE_TRY,
+ "%p: trying duplicate remote fd set. ", file);
}
ret = fd_ctx_set (file, this, (uint64_t)(unsigned long)ctx);
if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s (%"PRId64"): failed to set remote fd",
- loc->path, loc->inode->ino);
+ if (loc)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ PC_MSG_FD_SET_FAIL,
+ "%s (%s): failed to set remote fd",
+ loc->path, uuid_utoa (loc->inode->gfid));
+ else
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ PC_MSG_FD_SET_FAIL,
+ "%p: failed to set remote fd", file);
}
out:
return;
@@ -97,23 +125,41 @@ client_local_wipe (clnt_local_t *local)
{
if (local) {
loc_wipe (&local->loc);
+ loc_wipe (&local->loc2);
- if (local->fd)
+ if (local->fd) {
fd_unref (local->fd);
+ }
+
+ if (local->iobref) {
+ iobref_unref (local->iobref);
+ }
+
+ if (local->iobref2) {
+ iobref_unref (local->iobref2);
+ }
+
+ GF_FREE (local->name);
- GF_FREE (local);
+ local->compound_args = NULL;
+
+ mem_put (local);
}
return 0;
}
int
-unserialize_rsp_dirent (struct gfs3_readdir_rsp *rsp, gf_dirent_t *entries)
+unserialize_rsp_dirent (xlator_t *this, struct gfs3_readdir_rsp *rsp,
+ gf_dirent_t *entries)
{
struct gfs3_dirlist *trav = NULL;
gf_dirent_t *entry = NULL;
int entry_len = 0;
int ret = -1;
+ clnt_conf_t *conf = NULL;
+
+ conf = this->private;
trav = rsp->reply;
while (trav) {
@@ -123,7 +169,8 @@ unserialize_rsp_dirent (struct gfs3_readdir_rsp *rsp, gf_dirent_t *entries)
goto out;
entry->d_ino = trav->d_ino;
- entry->d_off = trav->d_off;
+ gf_itransform (this, trav->d_off, &entry->d_off,
+ conf->client_id);
entry->d_len = trav->d_len;
entry->d_type = trav->d_type;
@@ -140,15 +187,26 @@ out:
}
int
-unserialize_rsp_direntp (struct gfs3_readdirp_rsp *rsp, gf_dirent_t *entries)
+unserialize_rsp_direntp (xlator_t *this, fd_t *fd,
+ struct gfs3_readdirp_rsp *rsp, gf_dirent_t *entries)
{
struct gfs3_dirplist *trav = NULL;
+ char *buf = NULL;
gf_dirent_t *entry = NULL;
+ inode_table_t *itable = NULL;
int entry_len = 0;
int ret = -1;
+ clnt_conf_t *conf = NULL;
trav = rsp->reply;
+ if (fd)
+ itable = fd->inode->table;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
while (trav) {
entry_len = gf_dirent_size (trav->name);
entry = GF_CALLOC (1, entry_len, gf_common_mt_gf_dirent_t);
@@ -156,7 +214,8 @@ unserialize_rsp_direntp (struct gfs3_readdirp_rsp *rsp, gf_dirent_t *entries)
goto out;
entry->d_ino = trav->d_ino;
- entry->d_off = trav->d_off;
+ gf_itransform (this, trav->d_off, &entry->d_off,
+ conf->client_id);
entry->d_len = trav->d_len;
entry->d_type = trav->d_type;
@@ -164,6 +223,30 @@ unserialize_rsp_direntp (struct gfs3_readdirp_rsp *rsp, gf_dirent_t *entries)
strcpy (entry->d_name, trav->name);
+ if (trav->dict.dict_val) {
+ /* Dictionary is sent along with response */
+ buf = memdup (trav->dict.dict_val, trav->dict.dict_len);
+ if (!buf)
+ goto out;
+
+ entry->dict = dict_new ();
+
+ ret = dict_unserialize (buf, trav->dict.dict_len,
+ &entry->dict);
+ if (ret < 0) {
+ gf_msg (THIS->name, GF_LOG_WARNING, EINVAL,
+ PC_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to unserialize xattr dict");
+ goto out;
+ }
+ entry->dict->extra_free = buf;
+ buf = NULL;
+ }
+
+ entry->inode = inode_find (itable, entry->d_stat.ia_gfid);
+ if (!entry->inode)
+ entry->inode = inode_new (itable);
+
list_add_tail (&entry->list, &entries->list);
trav = trav->nextentry;
@@ -185,6 +268,7 @@ clnt_readdirp_rsp_cleanup (gfs3_readdirp_rsp *rsp)
while (trav) {
trav = trav->nextentry;
/* on client, the rpc lib allocates this */
+ free (prev->dict.dict_val);
free (prev->name);
free (prev);
prev = trav;
@@ -211,3 +295,1596 @@ clnt_readdir_rsp_cleanup (gfs3_readdir_rsp *rsp)
return 0;
}
+
+int
+client_get_remote_fd (xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd)
+{
+ clnt_fd_ctx_t *fdctx = NULL;
+ clnt_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+ GF_VALIDATE_OR_GOTO (this->name, remote_fd, out);
+
+ conf = this->private;
+ pthread_mutex_lock (&conf->lock);
+ {
+ fdctx = this_fd_get_ctx (fd, this);
+ if (!fdctx)
+ *remote_fd = GF_ANON_FD_NO;
+ else if (__is_fd_reopen_in_progress (fdctx))
+ *remote_fd = -1;
+ else
+ *remote_fd = fdctx->remote_fd;
+ }
+ pthread_mutex_unlock (&conf->lock);
+
+ if ((flags & FALLBACK_TO_ANON_FD) && (*remote_fd == -1))
+ *remote_fd = GF_ANON_FD_NO;
+
+ return 0;
+out:
+ return -1;
+}
+
+gf_boolean_t
+client_is_reopen_needed (fd_t *fd, xlator_t *this, int64_t remote_fd)
+{
+ clnt_fd_ctx_t *fdctx = NULL;
+
+ fdctx = this_fd_get_ctx (fd, this);
+ if (fdctx && (fdctx->remote_fd == -1) &&
+ (remote_fd == GF_ANON_FD_NO))
+ return _gf_true;
+ return _gf_false;
+}
+
+int
+client_fd_fop_prepare_local (call_frame_t *frame, fd_t *fd, int64_t remote_fd)
+{
+ xlator_t *this = NULL;
+ clnt_conf_t *conf = NULL;
+ clnt_local_t *local = NULL;
+ int ret = 0;
+
+ this = frame->this;
+ conf = this->private;
+
+ if (!frame || !fd) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ frame->local = mem_get0 (this->local_pool);
+ if (frame->local == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ local = frame->local;
+ local->fd = fd_ref (fd);
+ local->attempt_reopen = client_is_reopen_needed (fd, this, remote_fd);
+
+ return 0;
+out:
+ return ret;
+}
+
+int
+client_process_response (call_frame_t *frame, xlator_t *this,
+ struct rpc_req *req, gfs3_compound_rsp *rsp,
+ compound_args_cbk_t *args_cbk,
+ int index)
+{
+ int ret = 0;
+ default_args_cbk_t *this_args_cbk = &args_cbk->rsp_list[index];
+ clnt_local_t *local = frame->local;
+ compound_rsp *this_rsp = NULL;
+ compound_args_t *args = local->compound_args;
+ void *data = NULL;
+
+ this_rsp = &rsp->compound_rsp_array.compound_rsp_array_val[index];
+ args_cbk->enum_list[index] = this_rsp->fop_enum;
+
+ switch (args_cbk->enum_list[index]) {
+
+ case GF_FOP_STAT:
+ {
+ gfs3_stat_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_stat_rsp;
+
+ client_post_stat (this, tmp_rsp,
+ &this_args_cbk->stat, &this_args_cbk->xdata);
+
+ CLIENT_POST_FOP_TYPE (stat, this_rsp, this_args_cbk,
+ &this_args_cbk->stat,
+ this_args_cbk->xdata);
+ break;
+ }
+ case GF_FOP_READLINK:
+ {
+ gfs3_readlink_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_readlink_rsp;
+
+ client_post_readlink (this, tmp_rsp,
+ &this_args_cbk->stat, &this_args_cbk->xdata);
+ CLIENT_POST_FOP_TYPE (readlink, this_rsp, this_args_cbk,
+ tmp_rsp->path,
+ &this_args_cbk->stat,
+ this_args_cbk->xdata);
+ break;
+ }
+ case GF_FOP_MKNOD:
+ {
+ gfs3_mknod_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_mknod_rsp;
+
+ client_post_mknod (this, tmp_rsp,
+ &this_args_cbk->stat,
+ &this_args_cbk->preparent,
+ &this_args_cbk->postparent,
+ &this_args_cbk->xdata);
+ CLIENT_POST_FOP_TYPE (mknod, this_rsp, this_args_cbk,
+ local->loc.inode,
+ &this_args_cbk->stat,
+ &this_args_cbk->preparent,
+ &this_args_cbk->postparent,
+ this_args_cbk->xdata);
+ break;
+ }
+ case GF_FOP_MKDIR:
+ {
+ gfs3_mkdir_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_mkdir_rsp;
+
+ client_post_mkdir (this,
+ tmp_rsp,
+ &this_args_cbk->stat,
+ &this_args_cbk->preparent,
+ &this_args_cbk->postparent,
+ &this_args_cbk->xdata);
+ CLIENT_POST_FOP_TYPE (mkdir, this_rsp, this_args_cbk,
+ local->loc.inode,
+ &this_args_cbk->stat,
+ &this_args_cbk->preparent,
+ &this_args_cbk->postparent,
+ this_args_cbk->xdata);
+ break;
+ }
+ case GF_FOP_UNLINK:
+ {
+ gfs3_unlink_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_unlink_rsp;
+
+ client_post_unlink (this,
+ tmp_rsp,
+ &this_args_cbk->preparent,
+ &this_args_cbk->postparent,
+ &this_args_cbk->xdata);
+ CLIENT_POST_FOP_TYPE (unlink, this_rsp, this_args_cbk,
+ &this_args_cbk->preparent,
+ &this_args_cbk->postparent,
+ this_args_cbk->xdata);
+ break;
+ }
+ case GF_FOP_RMDIR:
+ {
+ gfs3_rmdir_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_rmdir_rsp;
+
+ client_post_rmdir (this, tmp_rsp,
+ &this_args_cbk->preparent,
+ &this_args_cbk->postparent,
+ &this_args_cbk->xdata);
+ CLIENT_POST_FOP_TYPE (rmdir, this_rsp, this_args_cbk,
+ &this_args_cbk->preparent,
+ &this_args_cbk->postparent,
+ this_args_cbk->xdata);
+ break;
+ }
+ case GF_FOP_SYMLINK:
+ {
+ gfs3_symlink_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_symlink_rsp;
+
+ client_post_symlink (this, tmp_rsp,
+ &this_args_cbk->stat,
+ &this_args_cbk->preparent,
+ &this_args_cbk->postparent,
+ &this_args_cbk->xdata);
+ CLIENT_POST_FOP_TYPE (symlink, this_rsp, this_args_cbk, NULL,
+ &this_args_cbk->stat,
+ &this_args_cbk->preparent,
+ &this_args_cbk->postparent,
+ this_args_cbk->xdata);
+ break;
+ }
+ case GF_FOP_RENAME:
+ {
+ gfs3_rename_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_rename_rsp;
+
+ client_post_rename (this, tmp_rsp,
+ &this_args_cbk->stat,
+ &this_args_cbk->preparent,
+ &this_args_cbk->postparent,
+ &this_args_cbk->preparent2,
+ &this_args_cbk->postparent2,
+ &this_args_cbk->xdata);
+ CLIENT_POST_FOP_TYPE (rename, this_rsp, this_args_cbk,
+ &this_args_cbk->stat,
+ &this_args_cbk->preparent,
+ &this_args_cbk->postparent,
+ &this_args_cbk->preparent2,
+ &this_args_cbk->postparent2,
+ this_args_cbk->xdata);
+ break;
+ }
+ case GF_FOP_LINK:
+ {
+ gfs3_link_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_link_rsp;
+
+ client_post_link (this, tmp_rsp,
+ &this_args_cbk->stat,
+ &this_args_cbk->preparent,
+ &this_args_cbk->postparent,
+ &this_args_cbk->xdata);
+ CLIENT_POST_FOP_TYPE (link, this_rsp, this_args_cbk, NULL,
+ &this_args_cbk->stat,
+ &this_args_cbk->preparent,
+ &this_args_cbk->postparent,
+ this_args_cbk->xdata);
+ break;
+ }
+ case GF_FOP_TRUNCATE:
+ {
+ gfs3_truncate_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_truncate_rsp;
+
+ client_post_truncate (this, tmp_rsp,
+ &this_args_cbk->prestat,
+ &this_args_cbk->poststat,
+ &this_args_cbk->xdata);
+ CLIENT_POST_FOP_TYPE (truncate, this_rsp, this_args_cbk,
+ &this_args_cbk->prestat,
+ &this_args_cbk->poststat,
+ this_args_cbk->xdata);
+ break;
+ }
+ case GF_FOP_OPEN:
+ {
+ gfs3_open_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_open_rsp;
+
+ client_post_open (this, tmp_rsp,
+ &this_args_cbk->xdata);
+ CLIENT_POST_FOP_TYPE (open, this_rsp, this_args_cbk,
+ local->fd,
+ this_args_cbk->xdata);
+ if (-1 != this_args_cbk->op_ret)
+ ret = client_add_fd_to_saved_fds (this, local->fd,
+ &local->loc,
+ args->req_list[index].flags,
+ tmp_rsp->fd,
+ 0);
+ break;
+ }
+ case GF_FOP_READ:
+ {
+ gfs3_read_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_read_rsp;
+
+ client_post_readv (this, tmp_rsp, &this_args_cbk->iobref,
+ req->rsp_iobref,
+ &this_args_cbk->stat,
+ this_args_cbk->vector, &req->rsp[1],
+ &this_args_cbk->count,
+ &this_args_cbk->xdata);
+
+ /* Each read should be given read response that only
+ * corresponds to its request.
+ * Modify the iovecs accordingly.
+ * After each read, store the length of data already read
+ * so that the next ones can continue from there.
+ */
+ if (local->read_length) {
+ this_args_cbk->vector[0].iov_base += local->read_length;
+ local->read_length += tmp_rsp->op_ret;
+ } else {
+ local->read_length = tmp_rsp->op_ret;
+ }
+
+ args_readv_cbk_store (this_args_cbk, tmp_rsp->op_ret,
+ gf_error_to_errno (tmp_rsp->op_errno),
+ this_args_cbk->vector,
+ this_args_cbk->count,
+ &this_args_cbk->stat,
+ this_args_cbk->iobref,
+ this_args_cbk->xdata);
+
+ if (tmp_rsp->op_ret >= 0)
+ if (local->attempt_reopen)
+ client_attempt_reopen (local->fd, this);
+
+ break;
+ }
+ case GF_FOP_WRITE:
+ {
+ gfs3_write_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_write_rsp;
+
+ client_post_writev (this, tmp_rsp, &this_args_cbk->prestat,
+ &this_args_cbk->poststat,
+ &this_args_cbk->xdata);
+ args_writev_cbk_store (this_args_cbk, tmp_rsp->op_ret,
+ gf_error_to_errno (tmp_rsp->op_errno),
+ &this_args_cbk->prestat,
+ &this_args_cbk->poststat,
+ this_args_cbk->xdata);
+
+ if (tmp_rsp->op_ret == 0)
+ if (local->attempt_reopen)
+ client_attempt_reopen (local->fd, this);
+ break;
+ }
+ case GF_FOP_STATFS:
+ {
+ gfs3_statfs_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_statfs_rsp;
+
+ client_post_statfs (this, tmp_rsp,
+ &this_args_cbk->statvfs,
+ &this_args_cbk->xdata);
+
+ CLIENT_POST_FOP_TYPE (statfs, this_rsp, this_args_cbk,
+ &this_args_cbk->statvfs,
+ this_args_cbk->xdata);
+ break;
+ }
+ case GF_FOP_FLUSH:
+ {
+ gf_common_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_flush_rsp;
+
+ client_post_flush (this, tmp_rsp,
+ &this_args_cbk->xdata);
+
+ CLIENT_POST_FOP (flush, this_rsp, this_args_cbk,
+ this_args_cbk->xdata);
+ if (this_args_cbk->op_ret >= 0 && !fd_is_anonymous (local->fd)) {
+ /* Delete all saved locks of the owner issuing flush */
+ ret = delete_granted_locks_owner (local->fd, &local->owner);
+ gf_msg_trace (this->name, 0,
+ "deleting locks of owner (%s) returned %d",
+ lkowner_utoa (&local->owner), ret);
+ }
+ break;
+ }
+ case GF_FOP_FSYNC:
+ {
+ gfs3_fsync_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_fsync_rsp;
+
+ client_post_fsync (this, tmp_rsp,
+ &this_args_cbk->prestat,
+ &this_args_cbk->poststat,
+ &this_args_cbk->xdata);
+
+ CLIENT_POST_FOP_TYPE (fsync, this_rsp, this_args_cbk,
+ &this_args_cbk->prestat,
+ &this_args_cbk->poststat,
+ this_args_cbk->xdata);
+ break;
+ }
+ case GF_FOP_SETXATTR:
+ {
+ gf_common_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_setxattr_rsp;
+
+ client_post_setxattr (this, tmp_rsp,
+ &this_args_cbk->xdata);
+
+ CLIENT_POST_FOP (setxattr, this_rsp, this_args_cbk,
+ this_args_cbk->xdata);
+ break;
+ }
+ case GF_FOP_GETXATTR:
+ {
+ gfs3_getxattr_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_getxattr_rsp;
+
+ client_post_getxattr (this, tmp_rsp,
+ &this_args_cbk->xattr,
+ &this_args_cbk->xdata);
+
+ CLIENT_POST_FOP_TYPE (getxattr, this_rsp, this_args_cbk,
+ this_args_cbk->xattr,
+ this_args_cbk->xdata);
+ break;
+ }
+ case GF_FOP_REMOVEXATTR:
+ {
+ gf_common_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_removexattr_rsp;
+
+ client_post_removexattr (this, tmp_rsp,
+ &this_args_cbk->xdata);
+
+ CLIENT_POST_FOP (removexattr, this_rsp, this_args_cbk,
+ this_args_cbk->xdata);
+ break;
+ }
+ case GF_FOP_OPENDIR:
+ {
+ gfs3_opendir_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_opendir_rsp;
+
+ client_post_opendir (this, tmp_rsp,
+ &this_args_cbk->xdata);
+
+ CLIENT_POST_FOP_TYPE (opendir, this_rsp, this_args_cbk,
+ local->fd,
+ this_args_cbk->xdata);
+ if (-1 != this_args_cbk->op_ret)
+ ret = client_add_fd_to_saved_fds (this, local->fd,
+ &local->loc,
+ args->req_list[index].flags,
+ tmp_rsp->fd, 0);
+ break;
+ }
+ case GF_FOP_FSYNCDIR:
+ {
+ gf_common_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_fsyncdir_rsp;
+
+ client_post_fsyncdir (this, tmp_rsp,
+ &this_args_cbk->xdata);
+
+ CLIENT_POST_FOP (fsyncdir, this_rsp, this_args_cbk,
+ this_args_cbk->xdata);
+ break;
+ }
+ case GF_FOP_ACCESS:
+ {
+ gf_common_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_access_rsp;
+
+ client_post_access (this, tmp_rsp,
+ &this_args_cbk->xdata);
+
+ CLIENT_POST_FOP (access, this_rsp, this_args_cbk,
+ this_args_cbk->xdata);
+ break;
+ }
+ case GF_FOP_CREATE:
+ {
+ gfs3_create_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_create_rsp;
+
+ client_post_create (this, tmp_rsp,
+ &this_args_cbk->stat,
+ &this_args_cbk->preparent,
+ &this_args_cbk->postparent,
+ local,
+ &this_args_cbk->xdata);
+ CLIENT_POST_FOP_TYPE (create, this_rsp, this_args_cbk,
+ local->fd,
+ local->loc.inode,
+ &this_args_cbk->stat,
+ &this_args_cbk->preparent,
+ &this_args_cbk->postparent,
+ this_args_cbk->xdata);
+ if (-1 != this_args_cbk->op_ret)
+ ret = client_add_fd_to_saved_fds (this, local->fd,
+ &local->loc,
+ args->req_list[index].flags,
+ tmp_rsp->fd, 0);
+ break;
+ }
+ case GF_FOP_FTRUNCATE:
+ {
+ gfs3_ftruncate_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_ftruncate_rsp;
+
+ client_post_ftruncate (this, tmp_rsp,
+ &this_args_cbk->prestat,
+ &this_args_cbk->poststat,
+ &this_args_cbk->xdata);
+ CLIENT_POST_FOP_TYPE (ftruncate, this_rsp, this_args_cbk,
+ &this_args_cbk->prestat,
+ &this_args_cbk->poststat,
+ this_args_cbk->xdata);
+ break;
+ }
+ case GF_FOP_FSTAT:
+ {
+ gfs3_fstat_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_fstat_rsp;
+
+ client_post_fstat (this, tmp_rsp,
+ &this_args_cbk->stat, &this_args_cbk->xdata);
+
+ CLIENT_POST_FOP_TYPE (fstat, this_rsp, this_args_cbk,
+ &this_args_cbk->stat,
+ this_args_cbk->xdata);
+ break;
+ }
+ case GF_FOP_LK:
+ {
+ gfs3_lk_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_lk_rsp;
+
+ client_post_lk (this, tmp_rsp,
+ &this_args_cbk->lock,
+ &this_args_cbk->xdata);
+
+ CLIENT_POST_FOP_TYPE (lk, this_rsp, this_args_cbk,
+ &this_args_cbk->lock,
+ this_args_cbk->xdata);
+ break;
+ }
+ case GF_FOP_LOOKUP:
+ {
+ gfs3_lookup_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_lookup_rsp;
+
+ client_post_lookup (this, tmp_rsp,
+ &this_args_cbk->stat,
+ &this_args_cbk->postparent,
+ &this_args_cbk->xdata);
+ CLIENT_POST_FOP_TYPE (lookup, this_rsp, this_args_cbk,
+ local->loc.inode,
+ &this_args_cbk->stat,
+ this_args_cbk->xdata,
+ &this_args_cbk->postparent);
+ break;
+ }
+ case GF_FOP_READDIR:
+ {
+ gfs3_readdir_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_readdir_rsp;
+
+ client_post_readdir (this, tmp_rsp,
+ &this_args_cbk->entries, &this_args_cbk->xdata);
+
+ CLIENT_POST_FOP_TYPE (readdir, this_rsp, this_args_cbk,
+ &this_args_cbk->entries, this_args_cbk->xdata);
+ break;
+ }
+ case GF_FOP_INODELK:
+ {
+ gf_common_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_inodelk_rsp;
+
+ client_post_inodelk (this, tmp_rsp,
+ &this_args_cbk->xdata);
+
+ CLIENT_POST_FOP (inodelk, this_rsp, this_args_cbk,
+ this_args_cbk->xdata);
+ break;
+ }
+ case GF_FOP_FINODELK:
+ {
+ gf_common_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_finodelk_rsp;
+
+ client_post_finodelk (this, tmp_rsp,
+ &this_args_cbk->xdata);
+
+ CLIENT_POST_FOP (finodelk, this_rsp, this_args_cbk,
+ this_args_cbk->xdata);
+ if (tmp_rsp->op_ret == 0)
+ if (local->attempt_reopen)
+ client_attempt_reopen (local->fd, this);
+ break;
+ }
+ case GF_FOP_ENTRYLK:
+ {
+ gf_common_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_entrylk_rsp;
+
+ client_post_entrylk (this, tmp_rsp,
+ &this_args_cbk->xdata);
+
+ CLIENT_POST_FOP (entrylk, this_rsp, this_args_cbk,
+ this_args_cbk->xdata);
+ break;
+ }
+ case GF_FOP_FENTRYLK:
+ {
+ gf_common_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_fentrylk_rsp;
+
+ client_post_fentrylk (this, tmp_rsp,
+ &this_args_cbk->xdata);
+
+ CLIENT_POST_FOP (fentrylk, this_rsp, this_args_cbk,
+ this_args_cbk->xdata);
+ break;
+ }
+ case GF_FOP_XATTROP:
+ {
+ gfs3_xattrop_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_xattrop_rsp;
+
+ client_post_xattrop (this, tmp_rsp,
+ &this_args_cbk->xattr,
+ &this_args_cbk->xdata);
+
+ CLIENT_POST_FOP_TYPE (xattrop, this_rsp, this_args_cbk,
+ this_args_cbk->xattr,
+ this_args_cbk->xdata);
+ break;
+ }
+ case GF_FOP_FXATTROP:
+ {
+ gfs3_fxattrop_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_fxattrop_rsp;
+
+ client_post_fxattrop (this, tmp_rsp,
+ &this_args_cbk->xattr,
+ &this_args_cbk->xdata);
+
+ CLIENT_POST_FOP_TYPE (fxattrop, this_rsp, this_args_cbk,
+ this_args_cbk->xattr,
+ this_args_cbk->xdata);
+ if (rsp->op_ret == 0)
+ if (local->attempt_reopen)
+ client_attempt_reopen (local->fd, this);
+ break;
+ }
+ case GF_FOP_FGETXATTR:
+ {
+ gfs3_fgetxattr_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_fgetxattr_rsp;
+
+ client_post_fgetxattr (this, tmp_rsp,
+ &this_args_cbk->xattr,
+ &this_args_cbk->xdata);
+
+ CLIENT_POST_FOP_TYPE (fgetxattr, this_rsp, this_args_cbk,
+ this_args_cbk->xattr,
+ this_args_cbk->xdata);
+ break;
+ }
+ case GF_FOP_FSETXATTR:
+ {
+ gf_common_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_fsetxattr_rsp;
+
+ client_post_fsetxattr (this, tmp_rsp,
+ &this_args_cbk->xdata);
+
+ CLIENT_POST_FOP (fsetxattr, this_rsp, this_args_cbk,
+ this_args_cbk->xdata);
+ break;
+ }
+ case GF_FOP_RCHECKSUM:
+ {
+ gfs3_rchecksum_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_rchecksum_rsp;
+
+ client_post_rchecksum (this, tmp_rsp,
+ &this_args_cbk->xdata);
+
+ break;
+ CLIENT_POST_FOP_TYPE (rchecksum, this_rsp, this_args_cbk,
+ tmp_rsp->weak_checksum,
+ (uint8_t*)tmp_rsp->strong_checksum.strong_checksum_val,
+ this_args_cbk->xdata);
+ break;
+ }
+ case GF_FOP_SETATTR:
+ {
+ gfs3_setattr_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_setattr_rsp;
+
+ client_post_setattr (this, tmp_rsp,
+ &this_args_cbk->prestat,
+ &this_args_cbk->poststat,
+ &this_args_cbk->xdata);
+
+ CLIENT_POST_FOP_TYPE (setattr, this_rsp, this_args_cbk,
+ &this_args_cbk->prestat,
+ &this_args_cbk->poststat,
+ this_args_cbk->xdata);
+ break;
+ }
+ case GF_FOP_FSETATTR:
+ {
+ gfs3_fsetattr_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_fsetattr_rsp;
+
+ client_post_fsetattr (this, tmp_rsp,
+ &this_args_cbk->prestat,
+ &this_args_cbk->poststat,
+ &this_args_cbk->xdata);
+
+ CLIENT_POST_FOP_TYPE (fsetattr, this_rsp, this_args_cbk,
+ &this_args_cbk->prestat,
+ &this_args_cbk->poststat,
+ this_args_cbk->xdata);
+ break;
+ }
+ case GF_FOP_READDIRP:
+ {
+ gfs3_readdirp_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_readdirp_rsp;
+
+ client_post_readdirp (this, tmp_rsp, local->fd,
+ &this_args_cbk->entries,
+ &this_args_cbk->xdata);
+
+ CLIENT_POST_FOP_TYPE (readdirp, this_rsp, this_args_cbk,
+ &this_args_cbk->entries,
+ this_args_cbk->xdata);
+ break;
+ }
+ case GF_FOP_FREMOVEXATTR:
+ {
+ gf_common_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_fremovexattr_rsp;
+
+ client_post_fremovexattr (this, tmp_rsp,
+ &this_args_cbk->xdata);
+
+ CLIENT_POST_FOP (fremovexattr, this_rsp, this_args_cbk,
+ this_args_cbk->xdata);
+ break;
+ }
+ case GF_FOP_FALLOCATE:
+ {
+ gfs3_fallocate_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_fallocate_rsp;
+
+ client_post_fallocate (this, tmp_rsp,
+ &this_args_cbk->prestat,
+ &this_args_cbk->poststat,
+ &this_args_cbk->xdata);
+
+ CLIENT_POST_FOP_TYPE (fallocate, this_rsp, this_args_cbk,
+ &this_args_cbk->prestat,
+ &this_args_cbk->poststat,
+ this_args_cbk->xdata);
+ break;
+ }
+ case GF_FOP_DISCARD:
+ {
+ gfs3_discard_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_discard_rsp;
+
+ client_post_discard (this, tmp_rsp,
+ &this_args_cbk->prestat,
+ &this_args_cbk->poststat,
+ &this_args_cbk->xdata);
+
+ CLIENT_POST_FOP_TYPE (discard, this_rsp, this_args_cbk,
+ &this_args_cbk->prestat,
+ &this_args_cbk->poststat,
+ this_args_cbk->xdata);
+ break;
+ }
+ case GF_FOP_ZEROFILL:
+ {
+ gfs3_zerofill_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_zerofill_rsp;
+
+ client_post_zerofill (this, tmp_rsp,
+ &this_args_cbk->prestat,
+ &this_args_cbk->poststat,
+ &this_args_cbk->xdata);
+
+ CLIENT_POST_FOP_TYPE (zerofill, this_rsp, this_args_cbk,
+ &this_args_cbk->prestat,
+ &this_args_cbk->poststat,
+ this_args_cbk->xdata);
+ break;
+ }
+ case GF_FOP_IPC:
+ {
+ gfs3_ipc_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_ipc_rsp;
+
+ client_post_ipc (this, tmp_rsp, &this_args_cbk->xdata);
+
+ CLIENT_POST_FOP_TYPE (ipc, this_rsp, this_args_cbk,
+ this_args_cbk->xdata);
+ break;
+ }
+ case GF_FOP_SEEK:
+ {
+ gfs3_seek_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_seek_rsp;
+
+ client_post_seek (this, tmp_rsp, &this_args_cbk->xdata);
+
+ CLIENT_POST_FOP_TYPE (seek, this_rsp, this_args_cbk,
+ tmp_rsp->offset,
+ this_args_cbk->xdata);
+ break;
+ }
+ case GF_FOP_LEASE:
+ {
+ gfs3_lease_rsp *tmp_rsp = NULL;
+ tmp_rsp = &this_rsp->compound_rsp_u.compound_lease_rsp;
+
+ client_post_lease (this, tmp_rsp, &this_args_cbk->lease,
+ &this_args_cbk->xdata);
+
+ CLIENT_POST_FOP_TYPE (lease, this_rsp, this_args_cbk,
+ &this_args_cbk->lease,
+ this_args_cbk->xdata);
+ break;
+ }
+ default:
+ return -ENOTSUP;
+ }
+ return 0;
+}
+
+int
+client_handle_fop_requirements (xlator_t *this, call_frame_t *frame,
+ gfs3_compound_req *req,
+ clnt_local_t *local,
+ struct iobref *req_iobref,
+ struct iobref *rsp_iobref,
+ struct iovec *req_vector,
+ struct iovec *rsp_vector, int *req_count,
+ int *rsp_count, default_args_t *args,
+ int fop_enum, int index)
+{
+ int ret = 0;
+ int op_errno = ENOMEM;
+ struct iobuf *rsp_iobuf = NULL;
+ int64_t remote_fd = -1;
+ compound_req *this_req = &req->compound_req_array.compound_req_array_val[index];
+
+ this_req->fop_enum = fop_enum;
+
+ switch (fop_enum) {
+ case GF_FOP_STAT:
+ CLIENT_PRE_FOP (stat, this,
+ &this_req->compound_req_u.compound_stat_req,
+ op_errno, out,
+ &args->loc, args->xdata);
+ break;
+ case GF_FOP_READLINK:
+ CLIENT_PRE_FOP (readlink, this,
+ &this_req->compound_req_u.compound_readlink_req,
+ op_errno, out,
+ &args->loc, args->size, args->xdata);
+ break;
+ case GF_FOP_MKNOD:
+ CLIENT_PRE_FOP (mknod, this,
+ &this_req->compound_req_u.compound_mknod_req,
+ op_errno, out,
+ &args->loc, args->mode, args->rdev,
+ args->umask, args->xdata);
+ if (!&local->loc) {
+ loc_copy (&local->loc, &args->loc);
+ loc_path (&local->loc, NULL);
+ }
+ break;
+ case GF_FOP_MKDIR:
+ CLIENT_PRE_FOP (mkdir, this,
+ &this_req->compound_req_u.compound_mkdir_req,
+ op_errno, out,
+ &args->loc, args->mode,
+ args->umask, args->xdata);
+ if (!&local->loc) {
+ loc_copy (&local->loc, &args->loc);
+ loc_path (&local->loc, NULL);
+ }
+ break;
+ case GF_FOP_UNLINK:
+ CLIENT_PRE_FOP (unlink, this,
+ &this_req->compound_req_u.compound_unlink_req,
+ op_errno, out,
+ &args->loc, args->xflag, args->xdata);
+ break;
+ case GF_FOP_RMDIR:
+ CLIENT_PRE_FOP (rmdir, this,
+ &this_req->compound_req_u.compound_rmdir_req,
+ op_errno, out,
+ &args->loc, args->flags, args->xdata);
+ break;
+ case GF_FOP_SYMLINK:
+ CLIENT_PRE_FOP (symlink, this,
+ &this_req->compound_req_u.compound_symlink_req,
+ op_errno, out,
+ &args->loc, args->linkname, args->umask,
+ args->xdata);
+ if (!&local->loc) {
+ loc_copy (&local->loc, &args->loc);
+ loc_path (&local->loc, NULL);
+ }
+ break;
+ case GF_FOP_RENAME:
+ CLIENT_PRE_FOP (rename, this,
+ &this_req->compound_req_u.compound_rename_req,
+ op_errno, out,
+ &args->loc, &args->loc2, args->xdata);
+ break;
+ case GF_FOP_LINK:
+ CLIENT_PRE_FOP (link, this,
+ &this_req->compound_req_u.compound_link_req,
+ op_errno, out,
+ &args->loc, &args->loc2, args->xdata);
+ break;
+ case GF_FOP_TRUNCATE:
+ CLIENT_PRE_FOP (truncate, this,
+ &this_req->compound_req_u.compound_truncate_req,
+ op_errno, out,
+ &args->loc, args->offset, args->xdata);
+ break;
+ case GF_FOP_OPEN:
+ CLIENT_PRE_FOP (open, this,
+ &this_req->compound_req_u.compound_open_req,
+ op_errno, out,
+ &args->loc, args->fd, args->flags,
+ args->xdata);
+ if (!local->fd)
+ local->fd = fd_ref (args->fd);
+ break;
+ case GF_FOP_READ:
+ op_errno = client_pre_readv (this,
+ &this_req->compound_req_u.compound_read_req,
+ args->fd, args->size, args->offset,
+ args->flags, args->xdata);
+
+ if (op_errno) {
+ op_errno = -op_errno;
+ goto out;
+ }
+ if (!local->fd)
+ local->fd = fd_ref (args->fd);
+ local->attempt_reopen = client_is_reopen_needed
+ (args->fd, this, remote_fd);
+ rsp_iobuf = iobuf_get2 (this->ctx->iobuf_pool, args->size);
+ if (rsp_iobuf == NULL) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (!rsp_iobref) {
+ rsp_iobref = iobref_new ();
+ if (rsp_iobref == NULL) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ }
+
+ iobref_add (rsp_iobref, rsp_iobuf);
+ iobuf_unref (rsp_iobuf);
+
+ if (*rsp_count + 1 >= MAX_IOVEC) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ rsp_vector[*rsp_count].iov_base = iobuf_ptr (rsp_iobuf);
+ rsp_vector[*rsp_count].iov_len = iobuf_pagesize (rsp_iobuf);
+ rsp_iobuf = NULL;
+ if (args->size > rsp_vector[*rsp_count].iov_len) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ PC_MSG_NO_MEMORY,
+ "read-size (%lu) is bigger than iobuf size "
+ "(%lu)",
+ (unsigned long)args->size,
+ (unsigned long)rsp_vector[*rsp_count].iov_len);
+ op_errno = EINVAL;
+ goto out;
+ }
+ *rsp_count += 1;
+
+ break;
+ case GF_FOP_WRITE:
+ op_errno = client_pre_writev (this,
+ &this_req->compound_req_u.compound_write_req,
+ args->fd, args->count, args->offset,
+ args->flags, args->xdata);
+
+ if (op_errno) {
+ op_errno = -op_errno;
+ goto out;
+ }
+ if (!local->fd)
+ local->fd = fd_ref (args->fd);
+ local->attempt_reopen = client_is_reopen_needed
+ (args->fd, this, remote_fd);
+
+ if (*req_count + 1 >= MAX_IOVEC) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ memcpy (&req_vector[*req_count], args->vector,
+ (args->count * sizeof(req_vector[0])));
+ *req_count += args->count;
+
+ if (!req_iobref)
+ req_iobref = args->iobref;
+ else
+ if (iobref_merge (req_iobref, args->iobref))
+ goto out;
+ break;
+ case GF_FOP_STATFS:
+ CLIENT_PRE_FOP (statfs, this,
+ &this_req->compound_req_u.compound_statfs_req,
+ op_errno, out,
+ &args->loc, args->xdata);
+ break;
+ case GF_FOP_FLUSH:
+ CLIENT_PRE_FOP (flush, this,
+ &this_req->compound_req_u.compound_flush_req,
+ op_errno, out,
+ args->fd, args->xdata);
+ if (!local->fd)
+ local->fd = fd_ref (args->fd);
+ local->owner = frame->root->lk_owner;
+ break;
+ case GF_FOP_FSYNC:
+ CLIENT_PRE_FOP (fsync, this,
+ &this_req->compound_req_u.compound_fsync_req,
+ op_errno, out,
+ args->fd, args->datasync, args->xdata);
+ break;
+ case GF_FOP_SETXATTR:
+ CLIENT_PRE_FOP (setxattr, this,
+ &this_req->compound_req_u.compound_setxattr_req,
+ op_errno, out,
+ &args->loc, args->xattr, args->flags,
+ args->xdata);
+ break;
+ case GF_FOP_GETXATTR:
+ CLIENT_PRE_FOP (getxattr, this,
+ &this_req->compound_req_u.compound_getxattr_req,
+ op_errno, out,
+ &args->loc, args->name, args->xdata);
+ if (!&local->loc) {
+ loc_copy (&local->loc, &args->loc);
+ loc_path (&local->loc, NULL);
+ }
+ break;
+ case GF_FOP_REMOVEXATTR:
+ CLIENT_PRE_FOP (removexattr, this,
+ &this_req->compound_req_u.compound_removexattr_req,
+ op_errno, out,
+ &args->loc, args->name, args->xdata);
+ break;
+ case GF_FOP_OPENDIR:
+ CLIENT_PRE_FOP (opendir, this,
+ &this_req->compound_req_u.compound_opendir_req,
+ op_errno, out,
+ &args->loc, args->fd, args->xdata);
+ if (!local->fd)
+ local->fd = fd_ref (args->fd);
+ if (!&local->loc) {
+ loc_copy (&local->loc, &args->loc);
+ loc_path (&local->loc, NULL);
+ }
+ break;
+ case GF_FOP_FSYNCDIR:
+ CLIENT_PRE_FOP (fsyncdir, this,
+ &this_req->compound_req_u.compound_fsyncdir_req,
+ op_errno, out,
+ args->fd, args->datasync, args->xdata);
+ break;
+ case GF_FOP_ACCESS:
+ CLIENT_PRE_FOP (access, this,
+ &this_req->compound_req_u.compound_access_req,
+ op_errno, out,
+ &args->loc, args->mask, args->xdata);
+ break;
+ case GF_FOP_CREATE:
+ CLIENT_PRE_FOP (create, this,
+ &this_req->compound_req_u.compound_create_req,
+ op_errno, out,
+ &args->loc, args->fd, args->mode, args->flags,
+ args->umask, args->xdata);
+ if (!local->fd)
+ local->fd = fd_ref (args->fd);
+
+ if (!&local->loc) {
+ loc_copy (&local->loc, &args->loc);
+ loc_path (&local->loc, NULL);
+ }
+ break;
+ case GF_FOP_FTRUNCATE:
+ CLIENT_PRE_FOP (ftruncate, this,
+ &this_req->compound_req_u.compound_ftruncate_req,
+ op_errno, out,
+ args->fd, args->offset, args->xdata);
+ break;
+ case GF_FOP_FSTAT:
+ CLIENT_PRE_FOP (fstat, this,
+ &this_req->compound_req_u.compound_fstat_req,
+ op_errno, out,
+ args->fd, args->xdata);
+ break;
+ case GF_FOP_LK:
+ CLIENT_PRE_FOP (lk, this,
+ &this_req->compound_req_u.compound_lk_req,
+ op_errno, out,
+ args->cmd, &args->lock, args->fd, args->xdata);
+ if (!local->fd)
+ local->fd = fd_ref (args->fd);
+ local->owner = frame->root->lk_owner;
+ break;
+ case GF_FOP_LOOKUP:
+ CLIENT_PRE_FOP (lookup, this,
+ &this_req->compound_req_u.compound_lookup_req,
+ op_errno, out,
+ &args->loc, args->xdata);
+ if (!&local->loc) {
+ loc_copy (&local->loc, &args->loc);
+ loc_path (&local->loc, NULL);
+ }
+ break;
+ case GF_FOP_READDIR:
+ CLIENT_PRE_FOP (readdir, this,
+ &this_req->compound_req_u.compound_readdir_req,
+ op_errno, out,
+ args->fd, args->size, args->offset,
+ args->xdata);
+ break;
+ case GF_FOP_INODELK:
+ CLIENT_PRE_FOP (inodelk, this,
+ &this_req->compound_req_u.compound_inodelk_req,
+ op_errno, out,
+ &args->loc, args->cmd, &args->lock,
+ args->volume, args->xdata);
+ break;
+ case GF_FOP_FINODELK:
+ CLIENT_PRE_FOP (finodelk, this,
+ &this_req->compound_req_u.compound_finodelk_req,
+ op_errno, out,
+ args->fd, args->cmd, &args->lock,
+ args->volume, args->xdata);
+ if (!local->fd)
+ local->fd = fd_ref (args->fd);
+ local->attempt_reopen = client_is_reopen_needed
+ (args->fd, this, remote_fd);
+ break;
+ case GF_FOP_ENTRYLK:
+ CLIENT_PRE_FOP (entrylk, this,
+ &this_req->compound_req_u.compound_entrylk_req,
+ op_errno, out,
+ &args->loc, args->entrylkcmd,
+ args->entrylktype, args->volume,
+ args->name, args->xdata);
+ break;
+ case GF_FOP_FENTRYLK:
+ CLIENT_PRE_FOP (fentrylk, this,
+ &this_req->compound_req_u.compound_fentrylk_req,
+ op_errno, out,
+ args->fd, args->entrylkcmd,
+ args->entrylktype, args->volume,
+ args->name, args->xdata);
+ break;
+ case GF_FOP_XATTROP:
+ CLIENT_PRE_FOP (xattrop, this,
+ &this_req->compound_req_u.compound_xattrop_req,
+ op_errno, out,
+ &args->loc, args->xattr, args->optype,
+ args->xdata);
+ break;
+ case GF_FOP_FXATTROP:
+ CLIENT_PRE_FOP (fxattrop, this,
+ &this_req->compound_req_u.compound_fxattrop_req,
+ op_errno, out,
+ args->fd, args->xattr, args->optype,
+ args->xdata);
+ if (!local->fd)
+ local->fd = fd_ref (args->fd);
+ local->attempt_reopen = client_is_reopen_needed
+ (args->fd, this, remote_fd);
+ break;
+ case GF_FOP_FGETXATTR:
+ CLIENT_PRE_FOP (fgetxattr, this,
+ &this_req->compound_req_u.compound_fgetxattr_req,
+ op_errno, out,
+ args->fd, args->name, args->xdata);
+ break;
+ case GF_FOP_FSETXATTR:
+ CLIENT_PRE_FOP (fsetxattr, this,
+ &this_req->compound_req_u.compound_fsetxattr_req,
+ op_errno, out,
+ args->fd, args->flags, args->xattr,
+ args->xdata);
+ break;
+ case GF_FOP_RCHECKSUM:
+ CLIENT_PRE_FOP (rchecksum, this,
+ &this_req->compound_req_u.compound_rchecksum_req,
+ op_errno, out,
+ args->fd, args->size, args->offset,
+ args->xdata);
+ break;
+ case GF_FOP_SETATTR:
+ CLIENT_PRE_FOP (setattr, this,
+ &this_req->compound_req_u.compound_setattr_req,
+ op_errno, out,
+ &args->loc, args->valid, &args->stat,
+ args->xdata);
+ break;
+ case GF_FOP_FSETATTR:
+ CLIENT_PRE_FOP (fsetattr, this,
+ &this_req->compound_req_u.compound_fsetattr_req,
+ op_errno, out,
+ args->fd, args->valid, &args->stat,
+ args->xdata);
+ break;
+ case GF_FOP_READDIRP:
+ CLIENT_PRE_FOP (readdirp, this,
+ &this_req->compound_req_u.compound_readdirp_req,
+ op_errno, out,
+ args->fd, args->size, args->offset,
+ args->xdata);
+ if (!local->fd)
+ local->fd = fd_ref (args->fd);
+ break;
+ case GF_FOP_FREMOVEXATTR:
+ CLIENT_PRE_FOP (fremovexattr, this,
+ &this_req->compound_req_u.compound_fremovexattr_req,
+ op_errno, out,
+ args->fd, args->name, args->xdata);
+ break;
+ case GF_FOP_FALLOCATE:
+ CLIENT_PRE_FOP (fallocate, this,
+ &this_req->compound_req_u.compound_fallocate_req,
+ op_errno, out,
+ args->fd, args->flags, args->offset,
+ args->size, args->xdata);
+ break;
+ case GF_FOP_DISCARD:
+ CLIENT_PRE_FOP (discard, this,
+ &this_req->compound_req_u.compound_discard_req,
+ op_errno, out,
+ args->fd, args->offset, args->size,
+ args->xdata);
+ break;
+ case GF_FOP_ZEROFILL:
+ CLIENT_PRE_FOP (zerofill, this,
+ &this_req->compound_req_u.compound_zerofill_req,
+ op_errno, out,
+ args->fd, args->offset, args->size,
+ args->xdata);
+ break;
+ case GF_FOP_IPC:
+ CLIENT_PRE_FOP (ipc, this,
+ &this_req->compound_req_u.compound_ipc_req,
+ op_errno, out,
+ args->cmd, args->xdata);
+ break;
+ case GF_FOP_SEEK:
+ CLIENT_PRE_FOP (seek, this,
+ &this_req->compound_req_u.compound_seek_req,
+ op_errno, out,
+ args->fd, args->offset, args->what,
+ args->xdata);
+ break;
+ case GF_FOP_LEASE:
+ CLIENT_PRE_FOP (lease, this,
+ &this_req->compound_req_u.compound_lease_req,
+ op_errno, out, &args->loc, &args->lease,
+ args->xdata);
+ default:
+ return ENOTSUP;
+ }
+ return 0;
+out:
+ return op_errno;
+}
+
+void
+compound_request_cleanup (gfs3_compound_req *req)
+{
+ int i = 0;
+ int length = req->compound_req_array.compound_req_array_len;
+ compound_req *curr_req = NULL;
+
+
+ for (i = 0; i < length; i++) {
+ curr_req = &req->compound_req_array.compound_req_array_val[i];
+
+ switch (curr_req->fop_enum) {
+ case GF_FOP_STAT:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, stat);
+ break;
+ case GF_FOP_READLINK:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, readlink);
+ break;
+ case GF_FOP_MKNOD:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, mknod);
+ break;
+ case GF_FOP_MKDIR:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, mkdir);
+ break;
+ case GF_FOP_UNLINK:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, unlink);
+ break;
+ case GF_FOP_RMDIR:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, rmdir);
+ break;
+ case GF_FOP_SYMLINK:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, symlink);
+ break;
+ case GF_FOP_RENAME:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, rename);
+ break;
+ case GF_FOP_LINK:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, link);
+ break;
+ case GF_FOP_TRUNCATE:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, truncate);
+ break;
+ case GF_FOP_OPEN:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, open);
+ break;
+ case GF_FOP_READ:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, read);
+ break;
+ case GF_FOP_WRITE:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, write);
+ break;
+ case GF_FOP_STATFS:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, statfs);
+ break;
+ case GF_FOP_FLUSH:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, flush);
+ break;
+ case GF_FOP_FSYNC:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, fsync);
+ break;
+ case GF_FOP_SETXATTR:
+ {
+ gfs3_setxattr_req args = curr_req->compound_req_u.compound_setxattr_req;
+ GF_FREE (args.dict.dict_val);
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, setxattr);
+ break;
+ }
+ case GF_FOP_GETXATTR:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, getxattr);
+ break;
+ case GF_FOP_REMOVEXATTR:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, removexattr);
+ break;
+ case GF_FOP_OPENDIR:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, opendir);
+ break;
+ case GF_FOP_FSYNCDIR:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, fsyncdir);
+ break;
+ case GF_FOP_ACCESS:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, access);
+ break;
+ case GF_FOP_CREATE:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, create);
+ break;
+ case GF_FOP_FTRUNCATE:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, ftruncate);
+ break;
+ case GF_FOP_FSTAT:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, fstat);
+ break;
+ case GF_FOP_LK:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, lk);
+ break;
+ case GF_FOP_LOOKUP:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, lookup);
+ break;
+ case GF_FOP_READDIR:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, readdir);
+ break;
+ case GF_FOP_INODELK:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, inodelk);
+ break;
+ case GF_FOP_FINODELK:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, finodelk);
+ break;
+ case GF_FOP_ENTRYLK:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, entrylk);
+ break;
+ case GF_FOP_FENTRYLK:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, fentrylk);
+ break;
+ case GF_FOP_XATTROP:
+ {
+ gfs3_xattrop_req args = curr_req->compound_req_u.compound_xattrop_req;
+ GF_FREE (args.dict.dict_val);
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, xattrop);
+ break;
+ }
+ case GF_FOP_FXATTROP:
+ {
+ gfs3_fxattrop_req args = curr_req->compound_req_u.compound_fxattrop_req;
+ GF_FREE (args.dict.dict_val);
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, fxattrop);
+ break;
+ }
+ case GF_FOP_FGETXATTR:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, fgetxattr);
+ break;
+ case GF_FOP_FSETXATTR:
+ {
+ gfs3_fsetxattr_req args = curr_req->compound_req_u.compound_fsetxattr_req;
+ GF_FREE (args.dict.dict_val);
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, fsetxattr);
+ break;
+ }
+ case GF_FOP_RCHECKSUM:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, rchecksum);
+ break;
+ case GF_FOP_SETATTR:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, setattr);
+ break;
+ case GF_FOP_FSETATTR:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, fsetattr);
+ break;
+ case GF_FOP_READDIRP:
+ {
+ gfs3_readdirp_req args = curr_req->compound_req_u.compound_readdirp_req;
+ GF_FREE (args.dict.dict_val);
+ break;
+ }
+ case GF_FOP_FREMOVEXATTR:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, fremovexattr);
+ break;
+ case GF_FOP_FALLOCATE:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, fallocate);
+ break;
+ case GF_FOP_DISCARD:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, discard);
+ break;
+ case GF_FOP_ZEROFILL:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, zerofill);
+ break;
+ case GF_FOP_IPC:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, ipc);
+ break;
+ case GF_FOP_SEEK:
+ CLIENT_COMPOUND_FOP_CLEANUP (curr_req, seek);
+ break;
+ default:
+ break;
+ }
+ }
+
+ return;
+}
+
+void
+clnt_getactivelk_rsp_cleanup (gfs3_getactivelk_rsp *rsp)
+{
+ gfs3_locklist *trav = NULL;
+ gfs3_locklist *next = NULL;
+
+ trav = rsp->reply;
+
+ while (trav) {
+ next = trav->nextentry;
+ free (trav->client_uid);
+ free (trav);
+ trav = next;
+ }
+}
+
+int
+clnt_unserialize_rsp_locklist (xlator_t *this, struct gfs3_getactivelk_rsp *rsp,
+ lock_migration_info_t *lmi)
+{
+ struct gfs3_locklist *trav = NULL;
+ lock_migration_info_t *temp = NULL;
+ char *buf = NULL;
+ int entry_len = 0;
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+
+ trav = rsp->reply;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ while (trav) {
+ temp = GF_CALLOC (1, sizeof (*lmi), gf_common_mt_lock_mig);
+ if (temp == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, 0, "No memory");
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&temp->list);
+
+ gf_proto_flock_to_flock (&trav->flock, &temp->flock);
+
+ temp->lk_flags = trav->lk_flags;
+
+ temp->client_uid = gf_strdup (trav->client_uid);
+
+ list_add_tail (&temp->list, &lmi->list);
+
+ trav = trav->nextentry;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+void
+clnt_setactivelk_req_cleanup (gfs3_setactivelk_req *req)
+{
+ gfs3_locklist *trav = NULL;
+ gfs3_locklist *next = NULL;
+
+ trav = req->request;
+
+ while (trav) {
+ next = trav->nextentry;
+ GF_FREE (trav->client_uid);
+ GF_FREE (trav);
+ trav = next;
+ }
+}
+
+int
+serialize_req_locklist (lock_migration_info_t *locklist,
+ gfs3_setactivelk_req *req)
+{
+ lock_migration_info_t *tmp = NULL;
+ gfs3_locklist *trav = NULL;
+ gfs3_locklist *prev = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("server", locklist, out);
+ GF_VALIDATE_OR_GOTO ("server", req, out);
+
+ list_for_each_entry (tmp, &locklist->list, list) {
+ trav = GF_CALLOC (1, sizeof (*trav),
+ gf_client_mt_clnt_lock_request_t);
+ if (!trav)
+ goto out;
+
+ switch (tmp->flock.l_type) {
+ case F_RDLCK:
+ tmp->flock.l_type = GF_LK_F_RDLCK;
+ break;
+ case F_WRLCK:
+ tmp->flock.l_type = GF_LK_F_WRLCK;
+ break;
+ case F_UNLCK:
+ tmp->flock.l_type = GF_LK_F_UNLCK;
+ break;
+
+ default:
+ gf_msg (THIS->name, GF_LOG_ERROR, 0, 0,
+ "Unknown lock type: %"PRId32"!",
+ tmp->flock.l_type);
+ break;
+ }
+
+ gf_proto_flock_from_flock (&trav->flock, &tmp->flock);
+
+ trav->lk_flags = tmp->lk_flags;
+
+ trav->client_uid = gf_strdup (tmp->client_uid);
+ if (!trav->client_uid) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0, 0,
+ "client_uid could not be allocated");
+ ret = -1;
+ goto out;
+ }
+
+ if (prev)
+ prev->nextentry = trav;
+ else
+ req->request = trav;
+
+ prev = trav;
+ trav = NULL;
+ }
+
+ ret = 0;
+out:
+ GF_FREE (trav);
+
+ return ret;
+}
diff --git a/xlators/protocol/client/src/client-lk.c b/xlators/protocol/client/src/client-lk.c
new file mode 100644
index 00000000000..0cf2be3c562
--- /dev/null
+++ b/xlators/protocol/client/src/client-lk.c
@@ -0,0 +1,576 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "common-utils.h"
+#include "xlator.h"
+#include "client.h"
+#include "lkowner.h"
+#include "client-messages.h"
+
+static void
+__insert_and_merge (clnt_fd_ctx_t *fdctx, client_posix_lock_t *lock);
+
+static void
+__dump_client_lock (client_posix_lock_t *lock)
+{
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ gf_msg (this->name, GF_LOG_INFO, 0, PC_MSG_CLIENT_LOCK_INFO,
+ "{fd=%p}"
+ "{%s lk-owner:%s %"PRId64" - %"PRId64"}"
+ "{start=%"PRId64" end=%"PRId64"}",
+ lock->fd,
+ lock->fl_type == F_WRLCK ? "Write-Lock" : "Read-Lock",
+ lkowner_utoa (&lock->owner),
+ lock->user_flock.l_start,
+ lock->user_flock.l_len,
+ lock->fl_start,
+ lock->fl_end);
+}
+
+static int
+dump_client_locks_fd (clnt_fd_ctx_t *fdctx)
+{
+ client_posix_lock_t *lock = NULL;
+ int count = 0;
+
+ pthread_mutex_lock (&fdctx->mutex);
+ {
+ list_for_each_entry (lock, &fdctx->lock_list, list) {
+ __dump_client_lock (lock);
+ count++;
+ }
+ }
+ pthread_mutex_unlock (&fdctx->mutex);
+
+ return count;
+
+}
+
+int
+dump_client_locks (inode_t *inode)
+{
+ fd_t *fd = NULL;
+ clnt_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ clnt_fd_ctx_t *fdctx = NULL;
+
+ int total_count = 0;
+ int locks_fd_count = 0;
+
+ this = THIS;
+ conf = this->private;
+
+ LOCK (&inode->lock);
+ {
+ list_for_each_entry (fd, &inode->fd_list, inode_list) {
+ locks_fd_count = 0;
+
+ pthread_mutex_lock (&conf->lock);
+ {
+ fdctx = this_fd_get_ctx (fd, this);
+ }
+ pthread_mutex_unlock (&conf->lock);
+
+ if (fdctx)
+ locks_fd_count = dump_client_locks_fd (fdctx);
+
+ total_count += locks_fd_count;
+ }
+
+ }
+ UNLOCK (&inode->lock);
+
+ return total_count;
+
+}
+
+static off_t
+__get_lock_length (off_t start, off_t end)
+{
+ if (end == LLONG_MAX)
+ return 0;
+ else
+ return (end - start + 1);
+}
+
+/* Add two locks */
+static client_posix_lock_t *
+add_locks (client_posix_lock_t *l1, client_posix_lock_t *l2)
+{
+ client_posix_lock_t *sum = NULL;
+
+ sum = GF_CALLOC (1, sizeof (*sum), gf_client_mt_clnt_lock_t);
+ if (!sum)
+ return NULL;
+
+ sum->fl_start = min (l1->fl_start, l2->fl_start);
+ sum->fl_end = max (l1->fl_end, l2->fl_end);
+
+ sum->user_flock.l_start = sum->fl_start;
+ sum->user_flock.l_len = __get_lock_length (sum->fl_start,
+ sum->fl_end);
+
+ return sum;
+}
+
+
+/* Return true if the locks overlap, false otherwise */
+static int
+locks_overlap (client_posix_lock_t *l1, client_posix_lock_t *l2)
+{
+ /*
+ Note:
+ FUSE always gives us absolute offsets, so no need to worry
+ about SEEK_CUR or SEEK_END
+ */
+
+ return ((l1->fl_end >= l2->fl_start) &&
+ (l2->fl_end >= l1->fl_start));
+}
+
+static void
+__delete_client_lock (client_posix_lock_t *lock)
+{
+ list_del_init (&lock->list);
+}
+
+/* Destroy a posix_lock */
+static void
+__destroy_client_lock (client_posix_lock_t *lock)
+{
+ GF_FREE (lock);
+}
+
+/* Subtract two locks */
+struct _values {
+ client_posix_lock_t *locks[3];
+};
+
+/* {big} must always be contained inside {small} */
+static struct _values
+subtract_locks (client_posix_lock_t *big, client_posix_lock_t *small)
+{
+ struct _values v = { .locks = {0, 0, 0} };
+
+ if ((big->fl_start == small->fl_start) &&
+ (big->fl_end == small->fl_end)) {
+ /* both edges coincide with big */
+ v.locks[0] = GF_CALLOC (1, sizeof (client_posix_lock_t),
+ gf_client_mt_clnt_lock_t );
+ GF_ASSERT (v.locks[0]);
+ memcpy (v.locks[0], big, sizeof (client_posix_lock_t));
+ v.locks[0]->fl_type = small->fl_type;
+ }
+ else if ((small->fl_start > big->fl_start) &&
+ (small->fl_end < big->fl_end)) {
+ /* both edges lie inside big */
+ v.locks[0] = GF_CALLOC (1, sizeof (client_posix_lock_t),
+ gf_client_mt_clnt_lock_t);
+ GF_ASSERT (v.locks[0]);
+ v.locks[1] = GF_CALLOC (1, sizeof (client_posix_lock_t),
+ gf_client_mt_clnt_lock_t);
+ GF_ASSERT (v.locks[1]);
+ v.locks[2] = GF_CALLOC (1, sizeof (client_posix_lock_t),
+ gf_client_mt_clnt_lock_t);
+ GF_ASSERT (v.locks[2]);
+
+ memcpy (v.locks[0], big, sizeof (client_posix_lock_t));
+ v.locks[0]->fl_end = small->fl_start - 1;
+ v.locks[0]->user_flock.l_len = __get_lock_length (v.locks[0]->fl_start,
+ v.locks[0]->fl_end);
+
+ memcpy (v.locks[1], small, sizeof (client_posix_lock_t));
+ memcpy (v.locks[2], big, sizeof (client_posix_lock_t));
+ v.locks[2]->fl_start = small->fl_end + 1;
+ v.locks[2]->user_flock.l_start = small->fl_end + 1;
+ }
+ /* one edge coincides with big */
+ else if (small->fl_start == big->fl_start) {
+ v.locks[0] = GF_CALLOC (1, sizeof (client_posix_lock_t),
+ gf_client_mt_clnt_lock_t);
+ GF_ASSERT (v.locks[0]);
+ v.locks[1] = GF_CALLOC (1, sizeof (client_posix_lock_t),
+ gf_client_mt_clnt_lock_t);
+ GF_ASSERT (v.locks[1]);
+
+ memcpy (v.locks[0], big, sizeof (client_posix_lock_t));
+ v.locks[0]->fl_start = small->fl_end + 1;
+ v.locks[0]->user_flock.l_start = small->fl_end + 1;
+
+ memcpy (v.locks[1], small, sizeof (client_posix_lock_t));
+ }
+ else if (small->fl_end == big->fl_end) {
+ v.locks[0] = GF_CALLOC (1, sizeof (client_posix_lock_t),
+ gf_client_mt_clnt_lock_t);
+ GF_ASSERT (v.locks[0]);
+ v.locks[1] = GF_CALLOC (1, sizeof (client_posix_lock_t),
+ gf_client_mt_clnt_lock_t);
+ GF_ASSERT (v.locks[1]);
+
+ memcpy (v.locks[0], big, sizeof (client_posix_lock_t));
+ v.locks[0]->fl_end = small->fl_start - 1;
+ v.locks[0]->user_flock.l_len = __get_lock_length (v.locks[0]->fl_start,
+ v.locks[0]->fl_end);
+
+ memcpy (v.locks[1], small, sizeof (client_posix_lock_t));
+ }
+ else {
+ /* LOG-TODO : decide what more info is required here*/
+ gf_msg ("client-protocol", GF_LOG_CRITICAL, 0,
+ PC_MSG_LOCK_ERROR,
+ "Unexpected case in subtract_locks. Please send "
+ "a bug report to gluster-devel@gluster.org");
+ }
+
+ return v;
+}
+
+static void
+__delete_unlck_locks (clnt_fd_ctx_t *fdctx)
+{
+ client_posix_lock_t *l = NULL;
+ client_posix_lock_t *tmp = NULL;
+
+ list_for_each_entry_safe (l, tmp, &fdctx->lock_list, list) {
+ if (l->fl_type == F_UNLCK) {
+ __delete_client_lock (l);
+ __destroy_client_lock (l);
+ }
+ }
+}
+
+static void
+__insert_lock (clnt_fd_ctx_t *fdctx, client_posix_lock_t *lock)
+{
+ list_add_tail (&lock->list, &fdctx->lock_list);
+
+ return;
+}
+
+static void
+__insert_and_merge (clnt_fd_ctx_t *fdctx, client_posix_lock_t *lock)
+{
+ client_posix_lock_t *conf = NULL;
+ client_posix_lock_t *t = NULL;
+ client_posix_lock_t *sum = NULL;
+ int i = 0;
+ struct _values v = { .locks = {0, 0, 0} };
+
+ list_for_each_entry_safe (conf, t, &fdctx->lock_list, list) {
+ if (!locks_overlap (conf, lock))
+ continue;
+
+ if (is_same_lkowner (&conf->owner, &lock->owner)) {
+ if (conf->fl_type == lock->fl_type) {
+ sum = add_locks (lock, conf);
+
+ sum->fd = lock->fd;
+
+ __delete_client_lock (conf);
+ __destroy_client_lock (conf);
+
+ __destroy_client_lock (lock);
+ __insert_and_merge (fdctx, sum);
+
+ return;
+ } else {
+ sum = add_locks (lock, conf);
+
+ sum->fd = conf->fd;
+ sum->owner = conf->owner;
+
+ v = subtract_locks (sum, lock);
+
+ __delete_client_lock (conf);
+ __destroy_client_lock (conf);
+
+ __delete_client_lock (lock);
+ __destroy_client_lock (lock);
+
+ __destroy_client_lock (sum);
+
+ for (i = 0; i < 3; i++) {
+ if (!v.locks[i])
+ continue;
+
+ INIT_LIST_HEAD (&v.locks[i]->list);
+ __insert_and_merge (fdctx,
+ v.locks[i]);
+ }
+
+ __delete_unlck_locks (fdctx);
+ return;
+ }
+ }
+
+ if (lock->fl_type == F_UNLCK) {
+ continue;
+ }
+
+ if ((conf->fl_type == F_RDLCK) && (lock->fl_type == F_RDLCK)) {
+ __insert_lock (fdctx, lock);
+ return;
+ }
+ }
+
+ /* no conflicts, so just insert */
+ if (lock->fl_type != F_UNLCK) {
+ __insert_lock (fdctx, lock);
+ } else {
+ __destroy_client_lock (lock);
+ }
+}
+
+static void
+client_setlk (clnt_fd_ctx_t *fdctx, client_posix_lock_t *lock)
+{
+ pthread_mutex_lock (&fdctx->mutex);
+ {
+ __insert_and_merge (fdctx, lock);
+ }
+ pthread_mutex_unlock (&fdctx->mutex);
+
+ return;
+}
+
+static void
+destroy_client_lock (client_posix_lock_t *lock)
+{
+ GF_FREE (lock);
+}
+
+int32_t
+delete_granted_locks_owner (fd_t *fd, gf_lkowner_t *owner)
+{
+ clnt_fd_ctx_t *fdctx = NULL;
+ client_posix_lock_t *lock = NULL;
+ client_posix_lock_t *tmp = NULL;
+ xlator_t *this = NULL;
+
+ struct list_head delete_list;
+ int ret = 0;
+ int count = 0;
+
+ INIT_LIST_HEAD (&delete_list);
+ this = THIS;
+ fdctx = this_fd_get_ctx (fd, this);
+ if (!fdctx) {
+ gf_msg (this->name, GF_LOG_WARNING, EINVAL,
+ PC_MSG_FD_CTX_INVALID, "fdctx not valid");
+ ret = -1;
+ goto out;
+ }
+
+ pthread_mutex_lock (&fdctx->mutex);
+ {
+ list_for_each_entry_safe (lock, tmp, &fdctx->lock_list, list) {
+ if (!is_same_lkowner (&lock->owner, owner)) {
+ list_del_init (&lock->list);
+ list_add_tail (&lock->list, &delete_list);
+ count++;
+ }
+ }
+ }
+ pthread_mutex_unlock (&fdctx->mutex);
+
+ list_for_each_entry_safe (lock, tmp, &delete_list, list) {
+ list_del_init (&lock->list);
+ destroy_client_lock (lock);
+ }
+
+ /* FIXME: Need to actually print the locks instead of count */
+ gf_msg_trace (this->name, 0,
+ "Number of locks cleared=%d", count);
+
+out:
+ return ret;
+}
+
+int32_t
+delete_granted_locks_fd (clnt_fd_ctx_t *fdctx)
+{
+ client_posix_lock_t *lock = NULL;
+ client_posix_lock_t *tmp = NULL;
+ xlator_t *this = NULL;
+
+ struct list_head delete_list;
+ int ret = 0;
+ int count = 0;
+
+ INIT_LIST_HEAD (&delete_list);
+ this = THIS;
+
+ pthread_mutex_lock (&fdctx->mutex);
+ {
+ list_splice_init (&fdctx->lock_list, &delete_list);
+ }
+ pthread_mutex_unlock (&fdctx->mutex);
+
+ list_for_each_entry_safe (lock, tmp, &delete_list, list) {
+ list_del_init (&lock->list);
+ count++;
+ destroy_client_lock (lock);
+ }
+
+ /* FIXME: Need to actually print the locks instead of count */
+ gf_msg_trace (this->name, 0,
+ "Number of locks cleared=%d", count);
+
+ return ret;
+}
+
+int32_t
+client_cmd_to_gf_cmd (int32_t cmd, int32_t *gf_cmd)
+{
+ int ret = 0;
+
+ if (cmd == F_GETLK || cmd == F_GETLK64)
+ *gf_cmd = GF_LK_GETLK;
+ else if (cmd == F_SETLK || cmd == F_SETLK64)
+ *gf_cmd = GF_LK_SETLK;
+ else if (cmd == F_SETLKW || cmd == F_SETLKW64)
+ *gf_cmd = GF_LK_SETLKW;
+ else if (cmd == F_RESLK_LCK)
+ *gf_cmd = GF_LK_RESLK_LCK;
+ else if (cmd == F_RESLK_LCKW)
+ *gf_cmd = GF_LK_RESLK_LCKW;
+ else if (cmd == F_RESLK_UNLCK)
+ *gf_cmd = GF_LK_RESLK_UNLCK;
+ else if (cmd == F_GETLK_FD)
+ *gf_cmd = GF_LK_GETLK_FD;
+ else
+ ret = -1;
+
+ return ret;
+
+}
+
+static client_posix_lock_t *
+new_client_lock (struct gf_flock *flock, gf_lkowner_t *owner,
+ int32_t cmd, fd_t *fd)
+{
+ client_posix_lock_t *new_lock = NULL;
+
+ new_lock = GF_CALLOC (1, sizeof (*new_lock),
+ gf_client_mt_clnt_lock_t);
+ if (!new_lock) {
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&new_lock->list);
+ new_lock->fd = fd;
+ memcpy (&new_lock->user_flock, flock, sizeof (struct gf_flock));
+
+ new_lock->fl_type = flock->l_type;
+ new_lock->fl_start = flock->l_start;
+
+ if (flock->l_len == 0)
+ new_lock->fl_end = LLONG_MAX;
+ else
+ new_lock->fl_end = flock->l_start + flock->l_len - 1;
+
+ new_lock->owner = *owner;
+
+ new_lock->cmd = cmd; /* Not really useful */
+
+out:
+ return new_lock;
+}
+
+void
+client_save_number_fds (clnt_conf_t *conf, int count)
+{
+ LOCK (&conf->rec_lock);
+ {
+ conf->reopen_fd_count = count;
+ }
+ UNLOCK (&conf->rec_lock);
+}
+
+int
+client_add_lock_for_recovery (fd_t *fd, struct gf_flock *flock,
+ gf_lkowner_t *owner, int32_t cmd)
+{
+ clnt_fd_ctx_t *fdctx = NULL;
+ xlator_t *this = NULL;
+ client_posix_lock_t *lock = NULL;
+ clnt_conf_t *conf = NULL;
+
+ int ret = 0;
+
+ this = THIS;
+ conf = this->private;
+
+ pthread_mutex_lock (&conf->lock);
+ {
+ fdctx = this_fd_get_ctx (fd, this);
+ }
+ pthread_mutex_unlock (&conf->lock);
+
+ if (!fdctx) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FD_GET_FAIL,
+ "failed to get fd context. sending EBADFD");
+ ret = -EBADFD;
+ goto out;
+ }
+
+ lock = new_client_lock (flock, owner, cmd, fd);
+ if (!lock) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ client_setlk (fdctx, lock);
+
+out:
+ return ret;
+
+}
+
+int32_t
+client_dump_locks (char *name, inode_t *inode,
+ dict_t *dict)
+{
+ int ret = 0;
+ dict_t *new_dict = NULL;
+ char dict_string[256];
+
+ GF_ASSERT (dict);
+ new_dict = dict;
+
+ ret = dump_client_locks (inode);
+ snprintf (dict_string, 256, "%d locks dumped in log file", ret);
+
+ ret = dict_set_dynstr(new_dict, CLIENT_DUMP_LOCKS, dict_string);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_WARNING, 0,
+ PC_MSG_DICT_SET_FAILED,
+ "could not set dict with %s", CLIENT_DUMP_LOCKS);
+ goto out;
+ }
+
+out:
+
+ return ret;
+}
+
+int32_t
+is_client_dump_locks_cmd (char *name)
+{
+ int ret = 0;
+
+ if (strcmp (name, CLIENT_DUMP_LOCKS) == 0)
+ ret = 1;
+
+ return ret;
+}
diff --git a/xlators/protocol/client/src/client-mem-types.h b/xlators/protocol/client/src/client-mem-types.h
index 638e537d116..1e995414ed8 100644
--- a/xlators/protocol/client/src/client-mem-types.h
+++ b/xlators/protocol/client/src/client-mem-types.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -25,9 +16,13 @@
enum gf_client_mem_types_ {
gf_client_mt_clnt_conf_t = gf_common_mt_end + 1,
- gf_client_mt_clnt_local_t,
gf_client_mt_clnt_req_buf_t,
gf_client_mt_clnt_fdctx_t,
+ gf_client_mt_clnt_lock_t,
+ gf_client_mt_clnt_fd_lk_local_t,
+ gf_client_mt_clnt_args_t,
+ gf_client_mt_compound_req_t,
+ gf_client_mt_clnt_lock_request_t,
gf_client_mt_end,
};
#endif /* __CLIENT_MEM_TYPES_H__ */
diff --git a/xlators/protocol/client/src/client-messages.h b/xlators/protocol/client/src/client-messages.h
new file mode 100644
index 00000000000..cf28c582872
--- /dev/null
+++ b/xlators/protocol/client/src/client-messages.h
@@ -0,0 +1,651 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _PC_MESSAGES_H__
+#define _PC_MESSAGES_H__
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glfs-message-id.h"
+
+/*! \file client-messages.h
+ * \brief Protocol client log-message IDs and their descriptions
+ */
+
+/* NOTE: Rules for message additions
+ * 1) Each instance of a message is _better_ left with a unique message ID, even
+ * if the message format is the same. Reasoning is that, if the message
+ * format needs to change in one instance, the other instances are not
+ * impacted or the new change does not change the ID of the instance being
+ * modified.
+ * 2) Addition of a message,
+ * - Should increment the GLFS_NUM_MESSAGES
+ * - Append to the list of messages defined, towards the end
+ * - Retain macro naming as glfs_msg_X (for readability across developers)
+ * NOTE: Rules for message format modifications
+ * 3) Check acorss the code if the message ID macro in question is reused
+ * anywhere. If reused then then the modifications should ensure correctness
+ * everywhere, or needs a new message ID as (1) above was not adhered to. If
+ * not used anywhere, proceed with the required modification.
+ * NOTE: Rules for message deletion
+ * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used
+ * anywhere, then can be deleted, but will leave a hole by design, as
+ * addition rules specify modification to the end of the list and not filling
+ * holes.
+ */
+
+#define GLFS_PC_BASE GLFS_MSGID_COMP_PC
+#define GLFS_PC_NUM_MESSAGES 66
+#define GLFS_PC_MSGID_END (GLFS_PC_BASE + GLFS_NUM_MESSAGES + 1)
+/* Messages with message IDs */
+#define glfs_msg_start_x GLFS_PC_BASE, "Invalid: Start of messages"
+/*------------*/
+
+#define PC_MSG_TIMER_EXPIRED (GLFS_PC_BASE + 1)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_DIR_OP_FAILED (GLFS_PC_BASE + 2)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_FILE_OP_FAILED (GLFS_PC_BASE + 3)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_TIMER_REG (GLFS_PC_BASE + 4)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_GRACE_TIMER_CANCELLED (GLFS_PC_BASE + 5)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_DICT_SET_FAILED (GLFS_PC_BASE + 6)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_DICT_GET_FAILED (GLFS_PC_BASE + 7)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_NO_MEMORY (GLFS_PC_BASE + 8)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_RPC_CBK_FAILED (GLFS_PC_BASE + 9)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_FUNCTION_CALL_ERROR (GLFS_PC_BASE + 10)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_RPC_INITED_ALREADY (GLFS_PC_BASE + 11)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_RPC_INIT (GLFS_PC_BASE + 12)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_RPC_DESTROY (GLFS_PC_BASE + 13)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_RPC_INVALID_CALL (GLFS_PC_BASE + 14)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_INVALID_ENTRY (GLFS_PC_BASE + 15)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_HANDSHAKE_RETURN (GLFS_PC_BASE + 16)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_CHILD_UP_NOTIFY_FAILED (GLFS_PC_BASE + 17)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_CLIENT_DISCONNECTED (GLFS_PC_BASE + 18)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_CHILD_DOWN_NOTIFY_FAILED (GLFS_PC_BASE + 19)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_PARENT_UP (GLFS_PC_BASE + 20)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_PARENT_DOWN (GLFS_PC_BASE + 21)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_RPC_INIT_FAILED (GLFS_PC_BASE + 22)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_RPC_NOTIFY_FAILED (GLFS_PC_BASE + 23)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_FD_DUPLICATE_TRY (GLFS_PC_BASE + 24)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_FD_SET_FAIL (GLFS_PC_BASE + 25)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_DICT_UNSERIALIZE_FAIL (GLFS_PC_BASE + 26)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_FD_GET_FAIL (GLFS_PC_BASE + 27)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_FD_CTX_INVALID (GLFS_PC_BASE + 28)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_FOP_SEND_FAILED (GLFS_PC_BASE + 29)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_XDR_DECODING_FAILED (GLFS_PC_BASE + 30)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_REMOTE_OP_FAILED (GLFS_PC_BASE + 31)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_RPC_STATUS_ERROR (GLFS_PC_BASE + 32)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_VOL_FILE_NOT_FOUND (GLFS_PC_BASE + 33)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_SEND_REQ_FAIL (GLFS_PC_BASE + 34)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_LOCK_VERSION_SERVER (GLFS_PC_BASE + 35)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_SET_LK_VERSION_ERROR (GLFS_PC_BASE + 36)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_LOCK_REQ_FAIL (GLFS_PC_BASE + 37)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_CLIENT_REQ_FAIL (GLFS_PC_BASE + 38)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_LOCK_ERROR (GLFS_PC_BASE + 39)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_LOCK_REACQUIRE (GLFS_PC_BASE + 40)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_CHILD_UP_NOTIFY (GLFS_PC_BASE + 41)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_CHILD_UP_NOTIFY_DELAY (GLFS_PC_BASE + 42)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_VOL_SET_FAIL (GLFS_PC_BASE + 43)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_SETVOLUME_FAIL (GLFS_PC_BASE + 44)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_VOLFILE_NOTIFY_FAILED (GLFS_PC_BASE + 45)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_REMOTE_VOL_CONNECTED (GLFS_PC_BASE + 46)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_LOCK_MISMATCH (GLFS_PC_BASE + 47)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_LOCK_MATCH (GLFS_PC_BASE + 48)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_AUTH_FAILED (GLFS_PC_BASE + 49)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_AUTH_FAILED_NOTIFY_FAILED (GLFS_PC_BASE + 50)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_CHILD_CONNECTING_EVENT (GLFS_PC_BASE + 51)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_CHILD_CONNECTING_NOTIFY_FAILED (GLFS_PC_BASE + 52)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_PROCESS_UUID_SET_FAIL (GLFS_PC_BASE + 53)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_DICT_ERROR (GLFS_PC_BASE + 54)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_DICT_SERIALIZE_FAIL (GLFS_PC_BASE + 55)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_PGM_NOT_FOUND (GLFS_PC_BASE + 56)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_VERSION_INFO (GLFS_PC_BASE + 57)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_PORT_NUM_ERROR (GLFS_PC_BASE + 58)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_VERSION_ERROR (GLFS_PC_BASE + 59)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_DIR_OP_SUCCESS (GLFS_PC_BASE + 60)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_BAD_FD (GLFS_PC_BASE + 61)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_CLIENT_LOCK_INFO (GLFS_PC_BASE + 62)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_CACHE_INVALIDATION_FAIL (GLFS_PC_BASE + 63)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_CHILD_STATUS (GLFS_PC_BASE + 64)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_GFID_NULL (GLFS_PC_BASE + 65)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PC_MSG_RECALL_LEASE_FAIL (GLFS_PC_BASE + 66)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+/*------------*/
+
+#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
+
+#endif /* !_PC_MESSAGES_H__ */
diff --git a/xlators/protocol/client/src/client-rpc-fops.c b/xlators/protocol/client/src/client-rpc-fops.c
new file mode 100644
index 00000000000..d35d0e04861
--- /dev/null
+++ b/xlators/protocol/client/src/client-rpc-fops.c
@@ -0,0 +1,6473 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "client.h"
+#include "rpc-common-xdr.h"
+#include "glusterfs3-xdr.h"
+#include "glusterfs3.h"
+#include "compat-errno.h"
+#include "client-messages.h"
+#include "defaults.h"
+#include "client-common.h"
+
+int32_t client3_getspec (call_frame_t *frame, xlator_t *this, void *data);
+rpc_clnt_prog_t clnt3_3_fop_prog;
+
+
+int
+client_submit_vec_request (xlator_t *this, void *req, call_frame_t *frame,
+ rpc_clnt_prog_t *prog, int procnum,
+ fop_cbk_fn_t cbkfn,
+ struct iovec *payload, int payloadcnt,
+ struct iobref *iobref, xdrproc_t xdrproc)
+{
+ int ret = 0;
+ clnt_conf_t *conf = NULL;
+ struct iovec iov = {0, };
+ struct iobuf *iobuf = NULL;
+ int count = 0;
+ struct iobref *new_iobref = NULL;
+ ssize_t xdr_size = 0;
+ struct rpc_req rpcreq = {0, };
+
+ conf = this->private;
+
+ if (req && xdrproc) {
+ xdr_size = xdr_sizeof (xdrproc, req);
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool, xdr_size);
+ if (!iobuf) {
+ goto unwind;
+ };
+
+ new_iobref = iobref_new ();
+ if (!new_iobref) {
+ goto unwind;
+ }
+
+ if (iobref != NULL) {
+ ret = iobref_merge (new_iobref, iobref);
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ PC_MSG_NO_MEMORY, "cannot merge "
+ "iobref passed from caller into "
+ "new_iobref");
+ }
+ }
+
+ ret = iobref_add (new_iobref, iobuf);
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ PC_MSG_NO_MEMORY, "cannot add iobuf into "
+ "iobref");
+ goto unwind;
+ }
+
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = iobuf_size (iobuf);
+
+ /* Create the xdr payload */
+ ret = xdr_serialize_generic (iov, req, xdrproc);
+ if (ret == -1) {
+ gf_log_callingfn ("", GF_LOG_WARNING,
+ "XDR function failed");
+ goto unwind;
+ }
+
+ iov.iov_len = ret;
+ count = 1;
+ }
+
+ /* Send the msg */
+ ret = rpc_clnt_submit (conf->rpc, prog, procnum, cbkfn, &iov, count,
+ payload, payloadcnt, new_iobref, frame, NULL, 0,
+ NULL, 0, NULL);
+ if (ret < 0) {
+ gf_msg_debug (this->name, 0, "rpc_clnt_submit failed");
+ }
+
+ if (new_iobref)
+ iobref_unref (new_iobref);
+
+ if (iobuf)
+ iobuf_unref (iobuf);
+
+ return ret;
+
+unwind:
+ rpcreq.rpc_status = -1;
+ cbkfn (&rpcreq, NULL, 0, frame);
+
+ if (new_iobref)
+ iobref_unref (new_iobref);
+
+ if (iobuf)
+ iobuf_unref (iobuf);
+
+ return ret;
+}
+
+/* CBK */
+
+int
+client3_3_symlink_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gfs3_symlink_rsp rsp = {0,};
+ struct iatt stbuf = {0,};
+ struct iatt preparent = {0,};
+ struct iatt postparent = {0,};
+ int ret = 0;
+ clnt_local_t *local = NULL;
+ inode_t *inode = NULL;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+ this = THIS;
+
+ frame = myframe;
+
+ local = frame->local;
+ inode = local->loc.inode;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_symlink_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = client_post_symlink (this, &rsp, &stbuf, &preparent,
+ &postparent, &xdata);
+ if (ret < 0)
+ goto out;
+
+out:
+ if (rsp.op_ret == -1) {
+ if (GF_IGNORE_IF_GSYNCD_SAFE_ERROR(frame, rsp.op_errno)) {
+ /* no need to print the gfid, because it will be null,
+ * since symlink operation failed.
+ */
+ gf_msg (this->name, GF_LOG_WARNING,
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED,
+ "remote operation failed. Path: (%s to %s)",
+ local->loc.path, local->loc2.path);
+ }
+ }
+
+ CLIENT_STACK_UNWIND (symlink, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), inode, &stbuf,
+ &preparent, &postparent, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+
+int
+client3_3_mknod_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gfs3_mknod_rsp rsp = {0,};
+ struct iatt stbuf = {0,};
+ struct iatt preparent = {0,};
+ struct iatt postparent = {0,};
+ int ret = 0;
+ clnt_local_t *local = NULL;
+ inode_t *inode = NULL;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+ this = THIS;
+
+ frame = myframe;
+
+ local = frame->local;
+
+ inode = local->loc.inode;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_mknod_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = client_post_mknod (this, &rsp, &stbuf, &preparent, &postparent,
+ &xdata);
+ if (ret < 0)
+ goto out;
+
+out:
+ if (rsp.op_ret == -1 &&
+ GF_IGNORE_IF_GSYNCD_SAFE_ERROR(frame, rsp.op_errno)) {
+ gf_msg (this->name, fop_log_level (GF_FOP_MKNOD,
+ gf_error_to_errno (rsp.op_errno)),
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED,
+ "remote operation failed. Path: %s",
+ local->loc.path);
+ }
+
+ CLIENT_STACK_UNWIND (mknod, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), inode,
+ &stbuf, &preparent, &postparent, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_mkdir_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gfs3_mkdir_rsp rsp = {0,};
+ struct iatt stbuf = {0,};
+ struct iatt preparent = {0,};
+ struct iatt postparent = {0,};
+ int ret = 0;
+ clnt_local_t *local = NULL;
+ inode_t *inode = NULL;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+ this = THIS;
+
+ frame = myframe;
+
+ local = frame->local;
+ inode = local->loc.inode;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_mkdir_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = client_post_mkdir (this, &rsp, &stbuf, &preparent, &postparent,
+ &xdata);
+ if (ret < 0)
+ goto out;
+
+out:
+ if (rsp.op_ret == -1 &&
+ GF_IGNORE_IF_GSYNCD_SAFE_ERROR(frame, rsp.op_errno)) {
+ gf_msg (this->name, fop_log_level (GF_FOP_MKDIR,
+ gf_error_to_errno (rsp.op_errno)),
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED,
+ "remote operation failed. Path: %s",
+ local->loc.path);
+ }
+
+ CLIENT_STACK_UNWIND (mkdir, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), inode,
+ &stbuf, &preparent, &postparent, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+_copy_gfid_from_inode_holders (uuid_t gfid, loc_t *loc, fd_t *fd)
+{
+ int ret = 0;
+
+ if (fd && fd->inode && !gf_uuid_is_null (fd->inode->gfid)) {
+ gf_uuid_copy (gfid, fd->inode->gfid);
+ goto out;
+ }
+
+ if (!loc) {
+ GF_ASSERT (0);
+ ret = -1;
+ goto out;
+ }
+
+ if (loc->inode && !gf_uuid_is_null (loc->inode->gfid)) {
+ gf_uuid_copy (gfid, loc->inode->gfid);
+ } else if (!gf_uuid_is_null (loc->gfid)) {
+ gf_uuid_copy (gfid, loc->gfid);
+ } else {
+ GF_ASSERT (0);
+ ret = -1;
+ }
+out:
+ return ret;
+}
+
+int
+client_add_fd_to_saved_fds (xlator_t *this, fd_t *fd, loc_t *loc, int32_t flags,
+ int64_t remote_fd, int is_dir)
+{
+ int ret = 0;
+ uuid_t gfid = {0};
+ clnt_conf_t *conf = NULL;
+ clnt_fd_ctx_t *fdctx = NULL;
+
+ conf = this->private;
+ ret = _copy_gfid_from_inode_holders (gfid, loc, fd);
+ if (ret) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ fdctx = GF_CALLOC (1, sizeof (*fdctx),
+ gf_client_mt_clnt_fdctx_t);
+ if (!fdctx) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ gf_uuid_copy (fdctx->gfid, gfid);
+ fdctx->is_dir = is_dir;
+ fdctx->remote_fd = remote_fd;
+ fdctx->flags = flags;
+ fdctx->lk_ctx = fd_lk_ctx_ref (fd->lk_ctx);
+ fdctx->lk_heal_state = GF_LK_HEAL_DONE;
+ fdctx->reopen_done = client_default_reopen_done;
+
+ INIT_LIST_HEAD (&fdctx->sfd_pos);
+ INIT_LIST_HEAD (&fdctx->lock_list);
+
+ this_fd_set_ctx (fd, this, loc, fdctx);
+
+ pthread_mutex_lock (&conf->lock);
+ {
+ list_add_tail (&fdctx->sfd_pos, &conf->saved_fds);
+ }
+ pthread_mutex_unlock (&conf->lock);
+out:
+ return ret;
+}
+
+int
+client3_3_open_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ clnt_local_t *local = NULL;
+ clnt_conf_t *conf = NULL;
+ call_frame_t *frame = NULL;
+ fd_t *fd = NULL;
+ int ret = 0;
+ gfs3_open_rsp rsp = {0,};
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+ local = frame->local;
+
+ conf = frame->this->private;
+ fd = local->fd;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_open_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (-1 != rsp.op_ret) {
+ ret = client_add_fd_to_saved_fds (frame->this, fd, &local->loc,
+ local->flags, rsp.fd, 0);
+ if (ret) {
+ rsp.op_ret = -1;
+ rsp.op_errno = -ret;
+ goto out;
+ }
+ }
+
+ ret = client_post_open (this, &rsp, &xdata);
+ if (ret < 0)
+ goto out;
+out:
+ if (rsp.op_ret == -1) {
+ gf_msg (this->name, fop_log_level (GF_FOP_OPEN,
+ gf_error_to_errno (rsp.op_errno)),
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED,
+ "remote operation failed. Path: %s (%s)",
+ local->loc.path, loc_gfid_utoa (&local->loc));
+ }
+
+ CLIENT_STACK_UNWIND (open, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), fd, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+
+int
+client3_3_stat_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gfs3_stat_rsp rsp = {0,};
+ call_frame_t *frame = NULL;
+ struct iatt iatt = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_stat_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = client_post_stat (this, &rsp, &iatt, &xdata);
+ if (ret < 0)
+ goto out;
+out:
+ if (rsp.op_ret == -1) {
+ /* stale filehandles are possible during normal operations, no
+ * need to spam the logs with these */
+ if (rsp.op_errno == ESTALE) {
+ gf_msg_debug (this->name, 0,
+ "remote operation failed: %s",
+ strerror (gf_error_to_errno
+ (rsp.op_errno)));
+ } else {
+ gf_msg (this->name, GF_LOG_WARNING,
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED,
+ "remote operation failed");
+ }
+ }
+
+ CLIENT_STACK_UNWIND (stat, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), &iatt, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_readlink_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gfs3_readlink_rsp rsp = {0,};
+ call_frame_t *frame = NULL;
+ struct iatt iatt = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_readlink_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = client_post_readlink (this, &rsp, &iatt, &xdata);
+ if (ret < 0)
+ goto out;
+
+out:
+ if (rsp.op_ret == -1) {
+ if (gf_error_to_errno(rsp.op_errno) == ENOENT) {
+ gf_msg_debug (this->name, 0, "remote operation failed:"
+ " %s", strerror
+ (gf_error_to_errno (rsp.op_errno)));
+ } else {
+ gf_msg (this->name, GF_LOG_WARNING,
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED, "remote operation "
+ "failed");
+ }
+ }
+
+ CLIENT_STACK_UNWIND (readlink, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), rsp.path,
+ &iatt, xdata);
+
+ /* This is allocated by the libc while decoding RPC msg */
+ /* Hence no 'GF_FREE', but just 'free' */
+ free (rsp.path);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_unlink_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gfs3_unlink_rsp rsp = {0,};
+ struct iatt preparent = {0,};
+ struct iatt postparent = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_unlink_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = client_post_unlink (this, &rsp, &preparent, &postparent,
+ &xdata);
+ if (ret < 0)
+ goto out;
+
+out:
+ if (rsp.op_ret == -1) {
+ if (gf_error_to_errno(rsp.op_errno) == ENOENT) {
+ gf_msg_debug (this->name, 0, "remote operation failed:"
+ " %s", strerror
+ (gf_error_to_errno (rsp.op_errno)));
+ } else {
+ gf_msg (this->name, GF_LOG_WARNING,
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED, "remote operation "
+ "failed");
+ }
+ }
+
+ CLIENT_STACK_UNWIND (unlink, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), &preparent,
+ &postparent, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_rmdir_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gfs3_rmdir_rsp rsp = {0,};
+ call_frame_t *frame = NULL;
+ struct iatt preparent = {0,};
+ struct iatt postparent = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_rmdir_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = client_post_rmdir (this, &rsp, &preparent, &postparent,
+ &xdata);
+ if (ret < 0)
+ goto out;
+
+out:
+ if (rsp.op_ret == -1) {
+ if (GF_IGNORE_IF_GSYNCD_SAFE_ERROR(frame, rsp.op_errno)) {
+ gf_msg (this->name, GF_LOG_WARNING,
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED,
+ "remote operation failed");
+ }
+ }
+ CLIENT_STACK_UNWIND (rmdir, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), &preparent,
+ &postparent, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+
+int
+client3_3_truncate_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gfs3_truncate_rsp rsp = {0,};
+ call_frame_t *frame = NULL;
+ struct iatt prestat = {0,};
+ struct iatt poststat = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_truncate_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = client_post_truncate (this, &rsp, &prestat, &poststat,
+ &xdata);
+ if (ret < 0)
+ goto out;
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING,
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED,
+ "remote operation failed");
+ }
+ CLIENT_STACK_UNWIND (truncate, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), &prestat,
+ &poststat, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+
+int
+client3_3_statfs_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gfs3_statfs_rsp rsp = {0,};
+ call_frame_t *frame = NULL;
+ struct statvfs statfs = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_statfs_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = client_post_statfs (this, &rsp, &statfs, &xdata);
+ if (ret < 0)
+ goto out;
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING,
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED,
+ "remote operation failed");
+ }
+ CLIENT_STACK_UNWIND (statfs, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), &statfs, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+
+int
+client3_3_writev_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gfs3_write_rsp rsp = {0,};
+ call_frame_t *frame = NULL;
+ struct iatt prestat = {0,};
+ struct iatt poststat = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+ clnt_local_t *local = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+ local = frame->local;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_write_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = client_post_writev (this, &rsp, &prestat, &poststat, &xdata);
+ if (ret < 0)
+ goto out;
+out:
+ if (rsp.op_ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING,
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED,
+ "remote operation failed");
+ } else if (rsp.op_ret >= 0) {
+ if (local->attempt_reopen)
+ client_attempt_reopen (local->fd, this);
+ }
+ CLIENT_STACK_UNWIND (writev, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), &prestat,
+ &poststat, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_flush_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ clnt_local_t *local = NULL;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+ gf_common_rsp rsp = {0,};
+ int ret = 0;
+
+ frame = myframe;
+ this = THIS;
+ local = frame->local;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_common_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (rsp.op_ret >= 0 && !fd_is_anonymous (local->fd)) {
+ /* Delete all saved locks of the owner issuing flush */
+ ret = delete_granted_locks_owner (local->fd, &local->owner);
+ gf_msg_trace (this->name, 0,
+ "deleting locks of owner (%s) returned %d",
+ lkowner_utoa (&local->owner), ret);
+ }
+
+ ret = client_post_flush (this, &rsp, &xdata);
+ if (ret < 0)
+ goto out;
+out:
+ if (rsp.op_ret == -1) {
+ gf_msg (this->name, fop_log_level (GF_FOP_FLUSH,
+ gf_error_to_errno (rsp.op_errno)),
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED,
+ "remote operation failed");
+ }
+ CLIENT_STACK_UNWIND (flush, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_fsync_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gfs3_fsync_rsp rsp = {0,};
+ call_frame_t *frame = NULL;
+ struct iatt prestat = {0,};
+ struct iatt poststat = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_fsync_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = client_post_fsync (this, &rsp, &prestat, &poststat,
+ &xdata);
+ if (ret < 0)
+ goto out;
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING,
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED,
+ "remote operation failed");
+ }
+ CLIENT_STACK_UNWIND (fsync, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), &prestat,
+ &poststat, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_setxattr_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gf_common_rsp rsp = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+ int op_errno = EINVAL;
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_common_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = client_post_setxattr (this, &rsp, &xdata);
+ if (ret < 0)
+ goto out;
+
+out:
+ op_errno = gf_error_to_errno (rsp.op_errno);
+ if (rsp.op_ret == -1) {
+ if (op_errno == ENOTSUP) {
+ gf_msg_debug (this->name, 0, "remote operation failed:"
+ " %s", strerror (op_errno));
+ } else {
+ gf_msg (this->name, GF_LOG_WARNING, op_errno,
+ PC_MSG_REMOTE_OP_FAILED, "remote operation "
+ "failed");
+ }
+ }
+
+ CLIENT_STACK_UNWIND (setxattr, frame, rsp.op_ret, op_errno, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_getxattr_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+ int op_errno = EINVAL;
+ gfs3_getxattr_rsp rsp = {0,};
+ int ret = 0;
+ clnt_local_t *local = NULL;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+ local = frame->local;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_getxattr_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ op_errno = gf_error_to_errno (rsp.op_errno);
+ ret = client_post_getxattr (this, &rsp, &dict, &xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto out;
+ }
+
+out:
+ if (rsp.op_ret == -1) {
+ if ((op_errno == ENOTSUP) || (op_errno == ENODATA) ||
+ (op_errno == ESTALE) || (op_errno == ENOENT)) {
+ gf_msg_debug (this->name, 0,
+ "remote operation failed: %s. Path: %s "
+ "(%s). Key: %s", strerror (op_errno),
+ local->loc.path,
+ loc_gfid_utoa (&local->loc),
+ (local->name) ? local->name : "(null)");
+ } else {
+ gf_msg (this->name, GF_LOG_WARNING, op_errno,
+ PC_MSG_REMOTE_OP_FAILED, "remote operation "
+ "failed. Path: %s (%s). Key: %s",
+ local->loc.path,
+ loc_gfid_utoa (&local->loc),
+ (local->name) ? local->name : "(null)");
+ }
+ }
+
+ CLIENT_STACK_UNWIND (getxattr, frame, rsp.op_ret, op_errno, dict, xdata);
+
+ /* don't use GF_FREE, this memory was allocated by libc */
+ free (rsp.dict.dict_val);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ if (dict)
+ dict_unref (dict);
+
+ return 0;
+}
+
+int
+client3_3_fgetxattr_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+ gfs3_fgetxattr_rsp rsp = {0,};
+ int ret = 0;
+ int op_errno = EINVAL;
+ clnt_local_t *local = NULL;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+ local = frame->local;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_fgetxattr_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ op_errno = gf_error_to_errno (rsp.op_errno);
+ ret = client_post_fgetxattr (this, &rsp, &dict, &xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto out;
+ }
+out:
+ if (rsp.op_ret == -1) {
+ if ((op_errno == ENOTSUP) || (op_errno == ERANGE) ||
+ (op_errno == ENODATA) || (op_errno == ENOENT)) {
+ gf_msg_debug (this->name, 0,
+ "remote operation failed: %s",
+ strerror (op_errno));
+ } else {
+ gf_msg (this->name, GF_LOG_WARNING, op_errno,
+ PC_MSG_REMOTE_OP_FAILED, "remote operation "
+ "failed");
+ }
+ }
+
+ CLIENT_STACK_UNWIND (fgetxattr, frame, rsp.op_ret, op_errno, dict, xdata);
+
+ free (rsp.dict.dict_val);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ if (dict)
+ dict_unref (dict);
+
+ return 0;
+}
+
+int
+client3_3_removexattr_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gf_common_rsp rsp = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+ gf_loglevel_t loglevel = GF_LOG_NONE;
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_common_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = client_post_removexattr (this, &rsp, &xdata);
+ if (ret < 0)
+ goto out;
+out:
+ if (rsp.op_ret == -1) {
+ if ((ENODATA == rsp.op_errno) || (ENOATTR == rsp.op_errno))
+ loglevel = GF_LOG_DEBUG;
+ else
+ loglevel = GF_LOG_WARNING;
+
+ gf_msg (this->name, loglevel,
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED,
+ "remote operation failed");
+ }
+
+ CLIENT_STACK_UNWIND (removexattr, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_fremovexattr_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gf_common_rsp rsp = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_common_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = client_post_fremovexattr (this, &rsp, &xdata);
+ if (ret < 0)
+ goto out;
+out:
+ if (rsp.op_ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING,
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED,
+ "remote operation failed");
+ }
+ CLIENT_STACK_UNWIND (fremovexattr, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_fsyncdir_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gf_common_rsp rsp = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_common_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = client_post_fsyncdir (this, &rsp, &xdata);
+ if (ret < 0)
+ goto out;
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING,
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED,
+ "remote operation failed");
+ }
+ CLIENT_STACK_UNWIND (fsyncdir, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_access_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gf_common_rsp rsp = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_common_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = client_post_access (this, &rsp, &xdata);
+ if (ret < 0)
+ goto out;
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING,
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED,
+ "remote operation failed");
+ }
+ CLIENT_STACK_UNWIND (access, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+
+int
+client3_3_ftruncate_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gfs3_ftruncate_rsp rsp = {0,};
+ call_frame_t *frame = NULL;
+ struct iatt prestat = {0,};
+ struct iatt poststat = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_ftruncate_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = client_post_ftruncate (this, &rsp, &prestat, &poststat,
+ &xdata);
+ if (ret < 0)
+ goto out;
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING,
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED,
+ "remote operation failed");
+ }
+ CLIENT_STACK_UNWIND (ftruncate, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), &prestat,
+ &poststat, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_fstat_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gfs3_fstat_rsp rsp = {0,};
+ call_frame_t *frame = NULL;
+ struct iatt stat = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_fstat_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = client_post_fstat (this, &rsp, &stat, &xdata);
+ if (ret < 0)
+ goto out;
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING,
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED,
+ "remote operation failed");
+ }
+ CLIENT_STACK_UNWIND (fstat, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), &stat, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+
+int
+client3_3_inodelk_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gf_common_rsp rsp = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_common_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+ ret = client_post_inodelk (this, &rsp, &xdata);
+ if (ret < 0)
+ goto out;
+out:
+ if (rsp.op_ret == -1) {
+ gf_msg (this->name, fop_log_level (GF_FOP_INODELK,
+ gf_error_to_errno (rsp.op_errno)),
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED, "remote operation failed");
+ }
+ CLIENT_STACK_UNWIND (inodelk, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_finodelk_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gf_common_rsp rsp = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+ clnt_local_t *local = NULL;
+
+
+ frame = myframe;
+ this = frame->this;
+ local = frame->local;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_common_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = client_post_finodelk (this, &rsp, &xdata);
+ if (ret < 0)
+ goto out;
+out:
+ if (rsp.op_ret == -1) {
+ gf_msg (this->name, fop_log_level (GF_FOP_FINODELK,
+ gf_error_to_errno (rsp.op_errno)),
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED, "remote operation failed");
+ } else if (rsp.op_ret == 0) {
+ if (local->attempt_reopen)
+ client_attempt_reopen (local->fd, this);
+ }
+ CLIENT_STACK_UNWIND (finodelk, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_entrylk_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gf_common_rsp rsp = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_common_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = client_post_entrylk (this, &rsp, &xdata);
+ if (ret < 0)
+ goto out;
+out:
+ if (rsp.op_ret == -1) {
+ gf_msg (this->name, fop_log_level (GF_FOP_ENTRYLK,
+ gf_error_to_errno (rsp.op_errno)),
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED, "remote operation failed");
+ }
+
+ CLIENT_STACK_UNWIND (entrylk, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_fentrylk_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gf_common_rsp rsp = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_common_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = client_post_fentrylk (this, &rsp, &xdata);
+ if (ret < 0)
+ goto out;
+
+out:
+ if ((rsp.op_ret == -1) &&
+ (EAGAIN != gf_error_to_errno (rsp.op_errno))) {
+ gf_msg (this->name, GF_LOG_WARNING,
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED,
+ "remote operation failed");
+ }
+
+ CLIENT_STACK_UNWIND (fentrylk, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_xattrop_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+ gfs3_xattrop_rsp rsp = {0,};
+ int ret = 0;
+ int op_errno = EINVAL;
+ clnt_local_t *local = NULL;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+ local = frame->local;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_xattrop_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ op_errno = rsp.op_errno;
+ ret = client_post_xattrop (this, &rsp, &dict, &xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto out;
+ }
+out:
+ if (rsp.op_ret == -1) {
+ gf_msg (this->name, fop_log_level (GF_FOP_XATTROP, op_errno),
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED, "remote operation failed. "
+ "Path: %s (%s)",
+ local->loc.path, loc_gfid_utoa (&local->loc));
+ }
+
+ CLIENT_STACK_UNWIND (xattrop, frame, rsp.op_ret,
+ gf_error_to_errno (op_errno), dict, xdata);
+
+ free (rsp.dict.dict_val);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ if (dict)
+ dict_unref (dict);
+
+ return 0;
+}
+
+int
+client3_3_fxattrop_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+ dict_t *xdata = NULL;
+ gfs3_fxattrop_rsp rsp = {0,};
+ int ret = 0;
+ int op_errno = 0;
+ clnt_local_t *local = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ frame = myframe;
+ local = frame->local;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_fxattrop_rsp);
+ if (ret < 0) {
+ rsp.op_ret = -1;
+ op_errno = EINVAL;
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ goto out;
+ }
+ op_errno = rsp.op_errno;
+ ret = client_post_fxattrop (this, &rsp, &dict, &xdata);
+ if (ret) {
+ rsp.op_ret = -1;
+ op_errno = -ret;
+ goto out;
+ }
+out:
+
+ if (rsp.op_ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING,
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED,
+ "remote operation failed");
+ } else if (rsp.op_ret == 0) {
+ if (local->attempt_reopen)
+ client_attempt_reopen (local->fd, this);
+ }
+ CLIENT_STACK_UNWIND (fxattrop, frame, rsp.op_ret,
+ gf_error_to_errno (op_errno), dict, xdata);
+
+ free (rsp.dict.dict_val);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ if (dict)
+ dict_unref (dict);
+
+ return 0;
+}
+
+int
+client3_3_fsetxattr_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gf_common_rsp rsp = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+ int op_errno = EINVAL;
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_common_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = client_post_setxattr (this, &rsp, &xdata);
+ if (ret < 0)
+ goto out;
+
+out:
+ op_errno = gf_error_to_errno (rsp.op_errno);
+ if (rsp.op_ret == -1) {
+ if (op_errno == ENOTSUP) {
+ gf_msg_debug (this->name, 0, "remote operation failed:"
+ " %s", strerror (op_errno));
+ } else {
+ gf_msg (this->name, GF_LOG_WARNING, rsp.op_errno,
+ PC_MSG_REMOTE_OP_FAILED, "remote operation "
+ "failed");
+ }
+ }
+
+ CLIENT_STACK_UNWIND (fsetxattr, frame, rsp.op_ret, op_errno, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_fsetattr_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gfs3_fsetattr_rsp rsp = {0,};
+ struct iatt prestat = {0,};
+ struct iatt poststat = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_fsetattr_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = client_post_fsetattr (this, &rsp, &prestat, &poststat, &xdata);
+ if (ret < 0)
+ goto out;
+out:
+ if (rsp.op_ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING,
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED,
+ "remote operation failed");
+ }
+ CLIENT_STACK_UNWIND (fsetattr, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), &prestat,
+ &poststat, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_fallocate_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gfs3_fallocate_rsp rsp = {0,};
+ struct iatt prestat = {0,};
+ struct iatt poststat = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_fallocate_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = client_post_fallocate (this, &rsp, &prestat, &poststat, &xdata);
+ if (ret < 0)
+ goto out;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING,
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED,
+ "remote operation failed");
+ }
+ CLIENT_STACK_UNWIND (fallocate, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), &prestat,
+ &poststat, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_discard_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gfs3_discard_rsp rsp = {0,};
+ struct iatt prestat = {0,};
+ struct iatt poststat = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t) xdr_gfs3_discard_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = client_post_discard (this, &rsp, &prestat, &poststat, &xdata);
+ if (ret < 0)
+ goto out;
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING,
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED,
+ "remote operation failed");
+ }
+ CLIENT_STACK_UNWIND (discard, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), &prestat,
+ &poststat, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_zerofill_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gfs3_zerofill_rsp rsp = {0,};
+ struct iatt prestat = {0,};
+ struct iatt poststat = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t) xdr_gfs3_zerofill_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = client_post_zerofill (this, &rsp, &prestat, &poststat, &xdata);
+ if (ret < 0)
+ goto out;
+out:
+ if (rsp.op_ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING,
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED,
+ "remote operation failed");
+ }
+ CLIENT_STACK_UNWIND (zerofill, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), &prestat,
+ &poststat, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_ipc_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gfs3_ipc_rsp rsp = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t) xdr_gfs3_ipc_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = client_post_ipc (this, &rsp, &xdata);
+ if (ret < 0)
+ goto out;
+out:
+ if (rsp.op_ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING,
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED,
+ "remote operation failed");
+ }
+ CLIENT_STACK_UNWIND (ipc, frame,
+ rsp.op_ret, gf_error_to_errno (rsp.op_errno),
+ xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+
+int
+client3_3_seek_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ struct gfs3_seek_rsp rsp = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t) xdr_gfs3_seek_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = client_post_seek (this, &rsp, &xdata);
+ if (ret < 0)
+ goto out;
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING,
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED,
+ "remote operation failed");
+ }
+ CLIENT_STACK_UNWIND (seek, frame,
+ rsp.op_ret, gf_error_to_errno (rsp.op_errno),
+ rsp.offset, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_setattr_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gfs3_setattr_rsp rsp = {0,};
+ struct iatt prestat = {0,};
+ struct iatt poststat = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_setattr_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = client_post_setattr (this, &rsp, &prestat, &poststat, &xdata);
+ if (ret < 0)
+ goto out;
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING,
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED,
+ "remote operation failed");
+ }
+ CLIENT_STACK_UNWIND (setattr, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), &prestat,
+ &poststat, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_create_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ fd_t *fd = NULL;
+ inode_t *inode = NULL;
+ struct iatt stbuf = {0, };
+ struct iatt preparent = {0, };
+ struct iatt postparent = {0, };
+ int32_t ret = -1;
+ clnt_local_t *local = NULL;
+ gfs3_create_rsp rsp = {0,};
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+ this = THIS;
+
+ frame = myframe;
+ local = frame->local;
+ fd = local->fd;
+ inode = local->loc.inode;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_create_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (-1 != rsp.op_ret) {
+ ret = client_post_create (this, &rsp, &stbuf,
+ &preparent, &postparent,
+ local, &xdata);
+ if (ret < 0)
+ goto out;
+ ret = client_add_fd_to_saved_fds (frame->this, fd, &local->loc,
+ local->flags, rsp.fd, 0);
+ if (ret) {
+ rsp.op_ret = -1;
+ rsp.op_errno = -ret;
+ goto out;
+ }
+ }
+
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING,
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED,
+ "remote operation failed. Path: %s",
+ local->loc.path);
+ }
+
+ CLIENT_STACK_UNWIND (create, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), fd, inode,
+ &stbuf, &preparent, &postparent, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_rchecksum_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gfs3_rchecksum_rsp rsp = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_rchecksum_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = client_post_rchecksum (this, &rsp, &xdata);
+ if (ret < 0)
+ goto out;
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING,
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED,
+ "remote operation failed");
+ }
+ CLIENT_STACK_UNWIND (rchecksum, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno),
+ rsp.weak_checksum,
+ (uint8_t *)rsp.strong_checksum.strong_checksum_val,
+ xdata);
+
+ if (rsp.strong_checksum.strong_checksum_val) {
+ /* This is allocated by the libc while decoding RPC msg */
+ /* Hence no 'GF_FREE', but just 'free' */
+ free (rsp.strong_checksum.strong_checksum_val);
+ }
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_lease_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ struct gf_lease lease = {0,};
+ gfs3_lease_rsp rsp = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOTCONN,
+ PC_MSG_REMOTE_OP_FAILED, "Lease fop failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_lease_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = client_post_lease (this, &rsp, &lease, &xdata);
+ if (ret < 0)
+ goto out;
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING,
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED,
+ "remote operation failed");
+ }
+
+ CLIENT_STACK_UNWIND (lease, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), &lease, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_lk_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ clnt_local_t *local = NULL;
+ struct gf_flock lock = {0,};
+ gfs3_lk_rsp rsp = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+ this = THIS;
+
+ frame = myframe;
+ local = frame->local;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_lk_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (rsp.op_ret >= 0) {
+ ret = client_post_lk (this, &rsp, &lock, &xdata);
+ if (ret < 0)
+ goto out;
+ }
+
+ /* Save the lock to the client lock cache to be able
+ to recover in the case of server reboot.*/
+ /*
+ temporarily
+ if (local->cmd == F_SETLK || local->cmd == F_SETLKW) {
+ ret = client_add_lock_for_recovery (local->fd, &lock,
+ local->owner, local->cmd);
+ if (ret < 0) {
+ rsp.op_ret = -1;
+ rsp.op_errno = -ret;
+ }
+ }
+ */
+
+out:
+ if ((rsp.op_ret == -1) &&
+ (EAGAIN != gf_error_to_errno (rsp.op_errno))) {
+ gf_msg (this->name, GF_LOG_WARNING,
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED,
+ "remote operation failed");
+ }
+
+ CLIENT_STACK_UNWIND (lk, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), &lock, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ free (rsp.flock.lk_owner.lk_owner_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_readdir_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gfs3_readdir_rsp rsp = {0,};
+ int32_t ret = 0;
+ clnt_local_t *local = NULL;
+ gf_dirent_t entries;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+ this = THIS;
+
+ frame = myframe;
+ local = frame->local;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_readdir_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&entries.list);
+ ret = client_post_readdir (this, &rsp, &entries, &xdata);
+ if (ret < 0)
+ goto out;
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING,
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED,
+ "remote operation failed: remote_fd = %d",
+ local->cmd);
+ }
+ CLIENT_STACK_UNWIND (readdir, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), &entries, xdata);
+
+ if (rsp.op_ret != -1) {
+ gf_dirent_free (&entries);
+ }
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ clnt_readdir_rsp_cleanup (&rsp);
+
+ return 0;
+}
+
+int
+client3_3_readdirp_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gfs3_readdirp_rsp rsp = {0,};
+ int32_t ret = 0;
+ clnt_local_t *local = NULL;
+ gf_dirent_t entries;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+ this = THIS;
+
+ frame = myframe;
+ local = frame->local;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_readdirp_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&entries.list);
+ ret = client_post_readdirp (this, &rsp, local->fd, &entries, &xdata);
+ if (ret < 0)
+ goto out;
+out:
+ if (rsp.op_ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING,
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED,
+ "remote operation failed");
+ }
+ CLIENT_STACK_UNWIND (readdirp, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), &entries, xdata);
+
+ if (rsp.op_ret != -1) {
+ gf_dirent_free (&entries);
+ }
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ clnt_readdirp_rsp_cleanup (&rsp);
+
+ return 0;
+}
+
+int
+client3_3_rename_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gfs3_rename_rsp rsp = {0,};
+ struct iatt stbuf = {0,};
+ struct iatt preoldparent = {0,};
+ struct iatt postoldparent = {0,};
+ struct iatt prenewparent = {0,};
+ struct iatt postnewparent = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_rename_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = client_post_rename (this, &rsp, &stbuf, &preoldparent,
+ &postoldparent, &prenewparent,
+ &postnewparent, &xdata);
+ if (ret < 0)
+ goto out;
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING,
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED,
+ "remote operation failed");
+ }
+ CLIENT_STACK_UNWIND (rename, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno),
+ &stbuf, &preoldparent, &postoldparent,
+ &prenewparent, &postnewparent, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_link_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gfs3_link_rsp rsp = {0,};
+ struct iatt stbuf = {0,};
+ struct iatt preparent = {0,};
+ struct iatt postparent = {0,};
+ int ret = 0;
+ clnt_local_t *local = NULL;
+ inode_t *inode = NULL;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+
+ local = frame->local;
+ inode = local->loc.inode;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_link_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = client_post_link (this, &rsp, &stbuf, &preparent,
+ &postparent, &xdata);
+ if (ret < 0)
+ goto out;
+out:
+ if (rsp.op_ret == -1) {
+ if (GF_IGNORE_IF_GSYNCD_SAFE_ERROR(frame, rsp.op_errno)) {
+ gf_msg (this->name, GF_LOG_WARNING,
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED,
+ "remote operation failed: (%s -> %s)",
+ local->loc.path, local->loc2.path);
+ }
+ }
+
+ CLIENT_STACK_UNWIND (link, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), inode,
+ &stbuf, &preparent, &postparent, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_opendir_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ clnt_local_t *local = NULL;
+ clnt_conf_t *conf = NULL;
+ call_frame_t *frame = NULL;
+ fd_t *fd = NULL;
+ int ret = 0;
+ gfs3_opendir_rsp rsp = {0,};
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+ this = THIS;
+
+ frame = myframe;
+ local = frame->local;
+
+ conf = frame->this->private;
+ fd = local->fd;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_opendir_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (-1 != rsp.op_ret) {
+ ret = client_add_fd_to_saved_fds (frame->this, fd, &local->loc,
+ 0, rsp.fd, 1);
+ if (ret) {
+ rsp.op_ret = -1;
+ rsp.op_errno = -ret;
+ goto out;
+ }
+ }
+
+ ret = client_post_opendir (this, &rsp, &xdata);
+ if (ret < 0)
+ goto out;
+out:
+ if (rsp.op_ret == -1) {
+ gf_msg (this->name, fop_log_level (GF_FOP_OPENDIR,
+ gf_error_to_errno (rsp.op_errno)),
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED, "remote operation failed."
+ " Path: %s (%s)",
+ local->loc.path, loc_gfid_utoa (&local->loc));
+ }
+ CLIENT_STACK_UNWIND (opendir, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), fd, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+
+int
+client3_3_lookup_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ clnt_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ int ret = 0;
+ gfs3_lookup_rsp rsp = {0,};
+ struct iatt stbuf = {0,};
+ struct iatt postparent = {0,};
+ int op_errno = EINVAL;
+ dict_t *xdata = NULL;
+ inode_t *inode = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ frame = myframe;
+ local = frame->local;
+ inode = local->loc.inode;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_lookup_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ op_errno = gf_error_to_errno (rsp.op_errno);
+
+ if (rsp.op_ret == -1)
+ goto out;
+
+ ret = client_post_lookup (this, &rsp, &stbuf, &postparent, &xdata);
+ if (ret < 0)
+ goto out;
+
+ if ((!gf_uuid_is_null (inode->gfid))
+ && (gf_uuid_compare (stbuf.ia_gfid, inode->gfid) != 0)) {
+ gf_msg_debug (frame->this->name, 0,
+ "gfid changed for %s", local->loc.path);
+
+ rsp.op_ret = -1;
+ op_errno = ESTALE;
+ if (xdata)
+ ret = dict_set_int32 (xdata, "gfid-changed", 1);
+
+ goto out;
+ }
+
+ rsp.op_ret = 0;
+
+out:
+ rsp.op_errno = op_errno;
+ if (rsp.op_ret == -1) {
+ /* any error other than ENOENT */
+ if (!(local->loc.name && rsp.op_errno == ENOENT) &&
+ !(rsp.op_errno == ESTALE))
+ gf_msg (this->name, GF_LOG_WARNING, rsp.op_errno,
+ PC_MSG_REMOTE_OP_FAILED, "remote operation "
+ "failed. Path: %s (%s)",
+ local->loc.path,
+ loc_gfid_utoa (&local->loc));
+ else
+ gf_msg_trace (this->name, 0, "not found on remote "
+ "node");
+
+ }
+
+ CLIENT_STACK_UNWIND (lookup, frame, rsp.op_ret, rsp.op_errno, inode,
+ &stbuf, xdata, &postparent);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+client3_3_readv_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ struct iobref *iobref = NULL;
+ struct iovec vector[MAX_IOVEC] = {{0}, };
+ struct iatt stat = {0,};
+ gfs3_read_rsp rsp = {0,};
+ int ret = 0, rspcount = 0;
+ clnt_local_t *local = NULL;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+ this = THIS;
+
+ memset (vector, 0, sizeof (vector));
+
+ frame = myframe;
+ local = frame->local;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_read_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = client_post_readv (this, &rsp, &iobref, req->rsp_iobref,
+ &stat, vector, &req->rsp[1],
+ &rspcount, &xdata);
+ if (ret < 0)
+ goto out;
+out:
+ if (rsp.op_ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING,
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED,
+ "remote operation failed");
+ } else if (rsp.op_ret >= 0) {
+ if (local->attempt_reopen)
+ client_attempt_reopen (local->fd, this);
+ }
+ CLIENT_STACK_UNWIND (readv, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), vector, rspcount,
+ &stat, iobref, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_release_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+
+ frame = myframe;
+ STACK_DESTROY (frame->root);
+ return 0;
+}
+int
+client3_3_releasedir_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+
+ frame = myframe;
+ STACK_DESTROY (frame->root);
+ return 0;
+}
+
+static int
+client3_3_getactivelk_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gfs3_getactivelk_rsp rsp = {0,};
+ int32_t ret = 0;
+ clnt_local_t *local = NULL;
+ lock_migration_info_t locklist;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+ this = THIS;
+
+ frame = myframe;
+ local = frame->local;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_getactivelk_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&locklist.list);
+
+ if (rsp.op_ret > 0) {
+ clnt_unserialize_rsp_locklist (this, &rsp, &locklist);
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING,
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED,
+ "remote operation failed");
+ }
+
+ CLIENT_STACK_UNWIND (getactivelk, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), &locklist,
+ xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ clnt_getactivelk_rsp_cleanup (&rsp);
+
+ return 0;
+}
+
+static int
+client3_3_setactivelk_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gfs3_getactivelk_rsp rsp = {0,};
+ int32_t ret = 0;
+ clnt_local_t *local = NULL;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+ this = THIS;
+
+ frame = myframe;
+ local = frame->local;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_setactivelk_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING,
+ gf_error_to_errno (rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED,
+ "remote operation failed");
+ }
+
+ CLIENT_STACK_UNWIND (setactivelk, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_compound_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gfs3_compound_rsp rsp = {0,};
+ compound_args_cbk_t *args_cbk = NULL;
+ call_frame_t *frame = NULL;
+ int ret = -1;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+ clnt_local_t *local = NULL;
+ int op_errno = 0;
+ int i,length = 0;
+
+ this = THIS;
+
+ frame = myframe;
+ local = frame->local;
+
+ if (-1 == req->rpc_status) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_compound_rsp);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_XDR_DECODING_FAILED, "XDR decoding failed");
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ args_cbk = GF_CALLOC (1, sizeof (compound_args_cbk_t), gf_mt_compound_rsp_t);
+ if (!args_cbk) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ length = args_cbk->fop_length = local->length;
+
+ args_cbk->rsp_list = GF_CALLOC (length, sizeof (default_args_cbk_t),
+ gf_mt_default_args_cbk_t);
+ if (!args_cbk->rsp_list) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ op_errno = rsp.op_errno;
+
+ for (i = 0; i < args_cbk->fop_length; i++) {
+ ret = client_process_response (frame, this, req, &rsp,
+ args_cbk, i);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+ ret = 0;
+out:
+ CLIENT_STACK_UNWIND (compound, frame, ret,
+ gf_error_to_errno (op_errno), args_cbk, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ if (args_cbk->rsp_list) {
+ for (i = 0; i < length; i++) {
+ args_cbk_wipe (&args_cbk->rsp_list[i]);
+ }
+ }
+ GF_FREE (args_cbk->rsp_list);
+ GF_FREE (args_cbk);
+ return 0;
+}
+
+int
+client_fdctx_destroy (xlator_t *this, clnt_fd_ctx_t *fdctx)
+{
+ clnt_conf_t *conf = NULL;
+ call_frame_t *fr = NULL;
+ int32_t ret = -1;
+ char parent_down = 0;
+ fd_lk_ctx_t *lk_ctx = NULL;
+
+ GF_VALIDATE_OR_GOTO ("client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fdctx, out);
+
+ conf = (clnt_conf_t *) this->private;
+
+ if (fdctx->remote_fd == -1) {
+ gf_msg_debug (this->name, 0, "not a valid fd");
+ goto out;
+ }
+
+ pthread_mutex_lock (&conf->lock);
+ {
+ parent_down = conf->parent_down;
+ lk_ctx = fdctx->lk_ctx;
+ fdctx->lk_ctx = NULL;
+ }
+ pthread_mutex_unlock (&conf->lock);
+
+ if (lk_ctx)
+ fd_lk_ctx_unref (lk_ctx);
+
+ if (!parent_down)
+ rpc_clnt_ref (conf->rpc);
+ else
+ goto out;
+
+ fr = create_frame (this, this->ctx->pool);
+ if (fr == NULL) {
+ goto out;
+ }
+
+ ret = 0;
+
+ if (fdctx->is_dir) {
+ gfs3_releasedir_req req = {{0,},};
+ req.fd = fdctx->remote_fd;
+ gf_msg_trace (this->name, 0, "sending releasedir on fd");
+ client_submit_request (this, &req, fr, &clnt3_3_fop_prog,
+ GFS3_OP_RELEASEDIR,
+ client3_3_releasedir_cbk,
+ NULL, NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_releasedir_req);
+ } else {
+ gfs3_release_req req = {{0,},};
+ req.fd = fdctx->remote_fd;
+ gf_msg_trace (this->name, 0, "sending release on fd");
+ client_submit_request (this, &req, fr, &clnt3_3_fop_prog,
+ GFS3_OP_RELEASE,
+ client3_3_release_cbk, NULL,
+ NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_release_req);
+ }
+
+ rpc_clnt_unref (conf->rpc);
+out:
+ if (fdctx) {
+ fdctx->remote_fd = -1;
+ GF_FREE (fdctx);
+ }
+
+ return ret;
+}
+
+int32_t
+client3_3_releasedir (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_fd_ctx_t *fdctx = NULL;
+ clnt_args_t *args = NULL;
+ int64_t remote_fd = -1;
+
+ if (!this || !data)
+ goto out;
+
+ args = data;
+ conf = this->private;
+
+ pthread_mutex_lock (&conf->lock);
+ {
+ fdctx = this_fd_del_ctx (args->fd, this);
+ if (fdctx != NULL) {
+ remote_fd = fdctx->remote_fd;
+
+ /* fdctx->remote_fd == -1 indicates a reopen attempt
+ in progress. Just mark ->released = 1 and let
+ reopen_cbk handle releasing
+ */
+
+ if (remote_fd != -1)
+ list_del_init (&fdctx->sfd_pos);
+
+ fdctx->released = 1;
+ }
+ }
+ pthread_mutex_unlock (&conf->lock);
+
+ if (remote_fd != -1)
+ client_fdctx_destroy (this, fdctx);
+out:
+
+ return 0;
+}
+
+int32_t
+client3_3_release (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ int64_t remote_fd = -1;
+ clnt_conf_t *conf = NULL;
+ clnt_fd_ctx_t *fdctx = NULL;
+ clnt_args_t *args = NULL;
+ lk_heal_state_t lk_heal_state = GF_LK_HEAL_DONE;
+
+ if (!this || !data)
+ goto out;
+
+ args = data;
+ conf = this->private;
+
+ pthread_mutex_lock (&conf->lock);
+ {
+ fdctx = this_fd_del_ctx (args->fd, this);
+ if (fdctx != NULL) {
+ remote_fd = fdctx->remote_fd;
+ lk_heal_state = fdctx->lk_heal_state;
+
+ /* fdctx->remote_fd == -1 indicates a reopen attempt
+ in progress. Just mark ->released = 1 and let
+ reopen_cbk handle releasing
+ */
+
+ if (remote_fd != -1 &&
+ lk_heal_state == GF_LK_HEAL_DONE)
+ list_del_init (&fdctx->sfd_pos);
+
+ fdctx->released = 1;
+ }
+ }
+ pthread_mutex_unlock (&conf->lock);
+
+ if (remote_fd != -1 && lk_heal_state == GF_LK_HEAL_DONE)
+ client_fdctx_destroy (this, fdctx);
+out:
+ return 0;
+}
+
+
+int32_t
+client3_3_lookup (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_local_t *local = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_lookup_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+ data_t *content = NULL;
+ struct iovec vector[MAX_IOVEC] = {{0}, };
+ int count = 0;
+ struct iobref *rsp_iobref = NULL;
+ struct iobuf *rsp_iobuf = NULL;
+ struct iovec *rsphdr = NULL;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ memset (vector, 0, sizeof (vector));
+
+ conf = this->private;
+ args = data;
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ frame->local = local;
+
+ if (!(args->loc && args->loc->inode))
+ goto unwind;
+
+ loc_copy (&local->loc, args->loc);
+ loc_path (&local->loc, NULL);
+
+ if (args->xdata) {
+ content = dict_get (args->xdata, GF_CONTENT_KEY);
+ if (content != NULL) {
+ rsp_iobref = iobref_new ();
+ if (rsp_iobref == NULL) {
+ goto unwind;
+ }
+
+ /* TODO: what is the size we should send ? */
+ /* This change very much depends on quick-read
+ changes */
+ rsp_iobuf = iobuf_get (this->ctx->iobuf_pool);
+ if (rsp_iobuf == NULL) {
+ goto unwind;
+ }
+
+ iobref_add (rsp_iobref, rsp_iobuf);
+ iobuf_unref (rsp_iobuf);
+ rsphdr = &vector[0];
+ rsphdr->iov_base = iobuf_ptr (rsp_iobuf);
+ rsphdr->iov_len = iobuf_pagesize (rsp_iobuf);
+ count = 1;
+ local->iobref = rsp_iobref;
+ rsp_iobuf = NULL;
+ rsp_iobref = NULL;
+ }
+
+ }
+
+ ret = client_pre_lookup (this, &req, args->loc, args->xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_LOOKUP, client3_3_lookup_cbk,
+ NULL, rsphdr, count,
+ NULL, 0, local->iobref,
+ (xdrproc_t)xdr_gfs3_lookup_req);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+
+unwind:
+ CLIENT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ if (rsp_iobref)
+ iobref_unref (rsp_iobref);
+
+ if (rsp_iobuf)
+ iobuf_unref (rsp_iobuf);
+
+ return 0;
+}
+
+int32_t
+client3_3_stat (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_stat_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ ret = client_pre_stat (this, &req, args->loc, args->xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_STAT, client3_3_stat_cbk, NULL,
+ NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_stat_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int32_t
+client3_3_truncate (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_truncate_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ ret = client_pre_truncate (this, &req, args->loc, args->offset,
+ args->xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_TRUNCATE,
+ client3_3_truncate_cbk, NULL,
+ NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gfs3_truncate_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int32_t
+client3_3_ftruncate (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_args_t *args = NULL;
+ int64_t remote_fd = -1;
+ clnt_conf_t *conf = NULL;
+ gfs3_ftruncate_req req = {{0,},};
+ int op_errno = EINVAL;
+ int ret = 0;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+
+ conf = this->private;
+
+ ret = client_pre_ftruncate (this, &req, args->fd, args->offset,
+ args->xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_FTRUNCATE,
+ client3_3_ftruncate_cbk, NULL,
+ NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gfs3_ftruncate_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int32_t
+client3_3_access (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_access_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+
+ conf = this->private;
+
+
+ ret = client_pre_access (this, &req, args->loc, args->mask,
+ args->xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_ACCESS,
+ client3_3_access_cbk, NULL,
+ NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gfs3_access_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (access, frame, -1, op_errno, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+int32_t
+client3_3_readlink (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_readlink_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+ clnt_local_t *local = NULL;
+ struct iobuf *rsp_iobuf = NULL;
+ struct iobref *rsp_iobref = NULL;
+ struct iovec *rsphdr = NULL;
+ int count = 0;
+ struct iovec vector[MAX_IOVEC] = {{0}, };
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+
+ conf = this->private;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ frame->local = local;
+
+ ret = client_pre_readlink (this, &req, args->loc, args->size,
+ args->xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+
+ rsp_iobref = iobref_new ();
+ if (rsp_iobref == NULL) {
+ goto unwind;
+ }
+
+ rsp_iobuf = iobuf_get (this->ctx->iobuf_pool);
+ if (rsp_iobuf == NULL) {
+ goto unwind;
+ }
+
+ iobref_add (rsp_iobref, rsp_iobuf);
+ iobuf_unref (rsp_iobuf);
+ rsphdr = &vector[0];
+ rsphdr->iov_base = iobuf_ptr (rsp_iobuf);
+ rsphdr->iov_len = iobuf_pagesize (rsp_iobuf);
+ count = 1;
+ local->iobref = rsp_iobref;
+ rsp_iobuf = NULL;
+ rsp_iobref = NULL;
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_READLINK,
+ client3_3_readlink_cbk, NULL,
+ rsphdr, count, NULL, 0,
+ local->iobref,
+ (xdrproc_t)xdr_gfs3_readlink_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ if (rsp_iobref != NULL) {
+ iobref_unref (rsp_iobref);
+ }
+
+ CLIENT_STACK_UNWIND (readlink, frame, -1, op_errno, NULL, NULL, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+int32_t
+client3_3_unlink (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_unlink_req req = {{0,},};
+ int ret = 0;
+ int op_errno = 0;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ ret = client_pre_unlink (this, &req, args->loc, args->flags,
+ args->xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_UNLINK,
+ client3_3_unlink_cbk, NULL,
+ NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gfs3_unlink_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+int32_t
+client3_3_rmdir (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_rmdir_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ ret = client_pre_rmdir (this, &req, args->loc, args->flags,
+ args->xdata);
+
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_RMDIR, client3_3_rmdir_cbk, NULL,
+ NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gfs3_rmdir_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+int32_t
+client3_3_symlink (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_local_t *local = NULL;
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_symlink_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ frame->local = local;
+
+ if (!(args->loc && args->loc->parent))
+ goto unwind;
+
+ loc_copy (&local->loc, args->loc);
+ loc_path (&local->loc, NULL);
+
+ local->loc2.path = gf_strdup (args->linkname);
+
+ ret = client_pre_symlink (this, &req, args->loc,
+ args->linkname, args->umask, args->xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_SYMLINK, client3_3_symlink_cbk,
+ NULL, NULL, 0, NULL,
+ 0, NULL, (xdrproc_t)xdr_gfs3_symlink_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+
+ CLIENT_STACK_UNWIND (symlink, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int32_t
+client3_3_rename (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_rename_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ ret = client_pre_rename (this, &req, args->oldloc, args->newloc,
+ args->xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_RENAME, client3_3_rename_cbk, NULL,
+ NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gfs3_rename_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL, NULL);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int32_t
+client3_3_link (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_local_t *local = NULL;
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_link_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ frame->local = local;
+
+ ret = client_pre_link (this, &req, args->oldloc, args->newloc,
+ args->xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+
+ loc_copy (&local->loc, args->oldloc);
+ loc_path (&local->loc, NULL);
+ loc_copy (&local->loc2, args->newloc);
+ loc_path (&local->loc2, NULL);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_LINK, client3_3_link_cbk, NULL,
+ NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_link_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+int32_t
+client3_3_mknod (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_local_t *local = NULL;
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_mknod_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ frame->local = local;
+
+ loc_copy (&local->loc, args->loc);
+ loc_path (&local->loc, NULL);
+
+ ret = client_pre_mknod (this, &req, args->loc,
+ args->mode, args->rdev, args->umask,
+ args->xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_MKNOD, client3_3_mknod_cbk, NULL,
+ NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gfs3_mknod_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+int32_t
+client3_3_mkdir (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_local_t *local = NULL;
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_mkdir_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ if (!args->xdata || !dict_get (args->xdata, "gfid-req")) {
+ op_errno = EPERM;
+ gf_msg_callingfn (this->name, GF_LOG_WARNING, op_errno,
+ PC_MSG_GFID_NULL, "mkdir: %s is received "
+ "without gfid-req %p", args->loc->path,
+ args->xdata);
+ goto unwind;
+ }
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ frame->local = local;
+
+ if (!(args->loc && args->loc->parent))
+ goto unwind;
+
+ loc_copy (&local->loc, args->loc);
+ loc_path (&local->loc, NULL);
+
+ ret = client_pre_mkdir (this, &req, args->loc, args->mode,
+ args->umask, args->xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_MKDIR, client3_3_mkdir_cbk, NULL,
+ NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gfs3_mkdir_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int32_t
+client3_3_create (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_local_t *local = NULL;
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_create_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ frame->local = local;
+
+ local->fd = fd_ref (args->fd);
+ local->flags = args->flags;
+
+ loc_copy (&local->loc, args->loc);
+ loc_path (&local->loc, NULL);
+
+ ret = client_pre_create (this, &req, args->loc,
+ args->fd, args->mode,
+ args->flags, args->umask, args->xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_CREATE, client3_3_create_cbk, NULL,
+ NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gfs3_create_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL, NULL);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+int32_t
+client3_3_open (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_local_t *local = NULL;
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_open_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+
+ conf = this->private;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ frame->local = local;
+
+ local->flags = args->flags;
+
+ local->fd = fd_ref (args->fd);
+ loc_copy (&local->loc, args->loc);
+ loc_path (&local->loc, NULL);
+
+ ret = client_pre_open (this, &req, args->loc, args->fd, args->flags,
+ args->xdata);
+
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_OPEN, client3_3_open_cbk, NULL,
+ NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_open_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (open, frame, -1, op_errno, NULL, NULL);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int32_t
+client3_3_readv (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_args_t *args = NULL;
+ clnt_conf_t *conf = NULL;
+ clnt_local_t *local = NULL;
+ int op_errno = ESTALE;
+ gfs3_read_req req = {{0,},};
+ int ret = 0;
+ struct iovec rsp_vec = {0, };
+ struct iobuf *rsp_iobuf = NULL;
+ struct iobref *rsp_iobref = NULL;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ ret = client_pre_readv (this, &req, args->fd, args->size,
+ args->offset, args->flags, args->xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+
+ ret = client_fd_fop_prepare_local (frame, args->fd, req.fd);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+ local = frame->local;
+
+ rsp_iobuf = iobuf_get2 (this->ctx->iobuf_pool, args->size);
+ if (rsp_iobuf == NULL) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ rsp_iobref = iobref_new ();
+ if (rsp_iobref == NULL) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ iobref_add (rsp_iobref, rsp_iobuf);
+ iobuf_unref (rsp_iobuf);
+
+ rsp_vec.iov_base = iobuf_ptr (rsp_iobuf);
+ rsp_vec.iov_len = iobuf_pagesize (rsp_iobuf);
+
+ rsp_iobuf = NULL;
+
+ if (args->size > rsp_vec.iov_len) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM, PC_MSG_NO_MEMORY,
+ "read-size (%lu) is bigger than iobuf size (%lu)",
+ (unsigned long)args->size,
+ (unsigned long)rsp_vec.iov_len);
+ op_errno = EINVAL;
+ goto unwind;
+ }
+
+ local->iobref = rsp_iobref;
+ rsp_iobref = NULL;
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_READ, client3_3_readv_cbk, NULL,
+ NULL, 0, &rsp_vec, 1,
+ local->iobref,
+ (xdrproc_t)xdr_gfs3_read_req);
+ if (ret) {
+ //unwind is done in the cbk
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ if (rsp_iobuf)
+ iobuf_unref (rsp_iobuf);
+
+ if (rsp_iobref)
+ iobref_unref (rsp_iobref);
+
+ CLIENT_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int32_t
+client3_3_writev (call_frame_t *frame, xlator_t *this, void *data)
+{
+ clnt_args_t *args = NULL;
+ clnt_conf_t *conf = NULL;
+ gfs3_write_req req = {{0,},};
+ int op_errno = ESTALE;
+ int ret = 0;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ ret = client_pre_writev (this, &req, args->fd, args->size,
+ args->offset, args->flags, args->xdata);
+
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+
+ ret = client_fd_fop_prepare_local (frame, args->fd, req.fd);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+ ret = client_submit_vec_request (this, &req, frame, conf->fops,
+ GFS3_OP_WRITE, client3_3_writev_cbk,
+ args->vector, args->count,
+ args->iobref,
+ (xdrproc_t)xdr_gfs3_write_req);
+ if (ret) {
+ /*
+ * If the lower layers fail to submit a request, they'll also
+ * do the unwind for us (see rpc_clnt_submit), so don't unwind
+ * here in such cases.
+ */
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+
+unwind:
+ CLIENT_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int32_t
+client3_3_flush (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_args_t *args = NULL;
+ gfs3_flush_req req = {{0,},};
+ clnt_conf_t *conf = NULL;
+ clnt_local_t *local = NULL;
+ int op_errno = ESTALE;
+ int ret = 0;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ frame->local = local;
+
+ local->fd = fd_ref (args->fd);
+ local->owner = frame->root->lk_owner;
+ ret = client_pre_flush (this, &req, args->fd, args->xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_FLUSH, client3_3_flush_cbk, NULL,
+ NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gfs3_flush_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+
+ return 0;
+
+unwind:
+ CLIENT_STACK_UNWIND (flush, frame, -1, op_errno, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+int32_t
+client3_3_fsync (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_args_t *args = NULL;
+ gfs3_fsync_req req = {{0,},};
+ clnt_conf_t *conf = NULL;
+ int op_errno = 0;
+ int ret = 0;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ ret = client_pre_fsync (this, &req, args->fd, args->flags,
+ args->xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_FSYNC, client3_3_fsync_cbk, NULL,
+ NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gfs3_fsync_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+
+unwind:
+ CLIENT_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+int32_t
+client3_3_fstat (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_args_t *args = NULL;
+ gfs3_fstat_req req = {{0,},};
+ clnt_conf_t *conf = NULL;
+ int op_errno = ESTALE;
+ int ret = 0;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ ret = client_pre_fstat (this, &req, args->fd, args->xdata);
+
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_FSTAT, client3_3_fstat_cbk, NULL,
+ NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gfs3_fstat_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+
+unwind:
+ CLIENT_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+int32_t
+client3_3_opendir (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_local_t *local = NULL;
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_opendir_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ frame->local = local;
+
+ local->fd = fd_ref (args->fd);
+ loc_copy (&local->loc, args->loc);
+ loc_path (&local->loc, NULL);
+
+ ret = client_pre_opendir (this, &req, args->loc, args->fd,
+ args->xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_OPENDIR, client3_3_opendir_cbk,
+ NULL, NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_opendir_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+
+unwind:
+ CLIENT_STACK_UNWIND (opendir, frame, -1, op_errno, NULL, NULL);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+int32_t
+client3_3_fsyncdir (call_frame_t *frame, xlator_t *this, void *data)
+{
+ clnt_args_t *args = NULL;
+ clnt_conf_t *conf = NULL;
+ gfs3_fsyncdir_req req = {{0,},};
+ int ret = 0;
+ int32_t op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ ret = client_pre_fsyncdir (this, &req, args->fd, args->flags,
+ args->xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_FSYNCDIR, client3_3_fsyncdir_cbk,
+ NULL, NULL, 0,
+ NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_fsyncdir_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+
+unwind:
+ CLIENT_STACK_UNWIND (fsyncdir, frame, -1, op_errno, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+int32_t
+client3_3_statfs (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_statfs_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+
+ conf = this->private;
+
+ ret = client_pre_statfs (this, &req, args->loc, args->xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_STATFS, client3_3_statfs_cbk, NULL,
+ NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gfs3_statfs_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+
+unwind:
+ CLIENT_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+int32_t
+client3_3_setxattr (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_setxattr_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ ret = client_pre_setxattr (this, &req, args->loc, args->xattr,
+ args->flags, args->xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_SETXATTR, client3_3_setxattr_cbk,
+ NULL, NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_setxattr_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+ GF_FREE (req.dict.dict_val);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
+ GF_FREE (req.dict.dict_val);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+int32_t
+client3_3_fsetxattr (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_args_t *args = NULL;
+ clnt_conf_t *conf = NULL;
+ gfs3_fsetxattr_req req = {{0,},};
+ int op_errno = ESTALE;
+ int ret = 0;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ ret = client_pre_fsetxattr (this, &req, args->fd, args->flags,
+ args->xattr, args->xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_FSETXATTR, client3_3_fsetxattr_cbk,
+ NULL, NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_fsetxattr_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+ GF_FREE (req.dict.dict_val);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL);
+ GF_FREE (req.dict.dict_val);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+int32_t
+client3_3_fgetxattr (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_args_t *args = NULL;
+ int64_t remote_fd = -1;
+ clnt_conf_t *conf = NULL;
+ gfs3_fgetxattr_req req = {{0,},};
+ int op_errno = ESTALE;
+ int ret = 0;
+ int count = 0;
+ clnt_local_t *local = NULL;
+ struct iobref *rsp_iobref = NULL;
+ struct iobuf *rsp_iobuf = NULL;
+ struct iovec *rsphdr = NULL;
+ struct iovec vector[MAX_IOVEC] = {{0}, };
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ frame->local = local;
+
+ rsp_iobref = iobref_new ();
+ if (rsp_iobref == NULL) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ /* TODO: what is the size we should send ? */
+ rsp_iobuf = iobuf_get2 (this->ctx->iobuf_pool, 8 * GF_UNIT_KB);
+ if (rsp_iobuf == NULL) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ iobref_add (rsp_iobref, rsp_iobuf);
+ iobuf_unref (rsp_iobuf);
+
+ rsphdr = &vector[0];
+ rsphdr->iov_base = iobuf_ptr (rsp_iobuf);
+ rsphdr->iov_len = iobuf_pagesize (rsp_iobuf);;
+ count = 1;
+ local->iobref = rsp_iobref;
+ rsp_iobuf = NULL;
+ rsp_iobref = NULL;
+
+ ret = client_pre_fgetxattr (this, &req, args->fd, args->name,
+ args->xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_FGETXATTR,
+ client3_3_fgetxattr_cbk, NULL,
+ rsphdr, count,
+ NULL, 0, local->iobref,
+ (xdrproc_t)xdr_gfs3_fgetxattr_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (fgetxattr, frame, -1, op_errno, NULL, NULL);
+
+ if (rsp_iobuf)
+ iobuf_unref (rsp_iobuf);
+
+ if (rsp_iobref)
+ iobref_unref (rsp_iobref);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+int32_t
+client3_3_getxattr (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_getxattr_req req = {{0,},};
+ dict_t *dict = NULL;
+ int ret = 0;
+ int32_t op_ret = -1;
+ int op_errno = ESTALE;
+ int count = 0;
+ clnt_local_t *local = NULL;
+ struct iobref *rsp_iobref = NULL;
+ struct iobuf *rsp_iobuf = NULL;
+ struct iovec *rsphdr = NULL;
+ struct iovec vector[MAX_IOVEC] = {{0}, };
+
+ if (!frame || !this || !data) {
+ op_errno = 0;
+ goto unwind;
+ }
+ args = data;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ frame->local = local;
+
+ loc_copy (&local->loc, args->loc);
+ loc_path (&local->loc, NULL);
+
+ if (args->name)
+ local->name = gf_strdup (args->name);
+
+ rsp_iobref = iobref_new ();
+ if (rsp_iobref == NULL) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ /* TODO: what is the size we should send ? */
+ rsp_iobuf = iobuf_get2 (this->ctx->iobuf_pool, 8 * GF_UNIT_KB);
+ if (rsp_iobuf == NULL) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ iobref_add (rsp_iobref, rsp_iobuf);
+ iobuf_unref (rsp_iobuf);
+
+ rsphdr = &vector[0];
+ rsphdr->iov_base = iobuf_ptr (rsp_iobuf);
+ rsphdr->iov_len = iobuf_pagesize (rsp_iobuf);
+ count = 1;
+ local->iobref = rsp_iobref;
+ rsp_iobuf = NULL;
+ rsp_iobref = NULL;
+
+ conf = this->private;
+
+ if (args && args->name) {
+ if (is_client_dump_locks_cmd ((char *)args->name)) {
+ dict = dict_new ();
+ ret = client_dump_locks ((char *)args->name,
+ args->loc->inode,
+ dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, EINVAL,
+ PC_MSG_INVALID_ENTRY, "Client dump "
+ "locks failed");
+ op_errno = EINVAL;
+ }
+
+ GF_ASSERT (dict);
+ op_ret = 0;
+ op_errno = 0;
+ goto unwind;
+ }
+ }
+
+ ret = client_pre_getxattr (this, &req, args->loc, args->name,
+ args->xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_GETXATTR,
+ client3_3_getxattr_cbk, NULL,
+ rsphdr, count,
+ NULL, 0, local->iobref,
+ (xdrproc_t)xdr_gfs3_getxattr_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ if (rsp_iobuf)
+ iobuf_unref (rsp_iobuf);
+
+ if (rsp_iobref)
+ iobref_unref (rsp_iobref);
+
+ CLIENT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, NULL);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+int32_t
+client3_3_xattrop (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_xattrop_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+ int count = 0;
+ clnt_local_t *local = NULL;
+ struct iobref *rsp_iobref = NULL;
+ struct iobuf *rsp_iobuf = NULL;
+ struct iovec *rsphdr = NULL;
+ struct iovec vector[MAX_IOVEC] = {{0}, };
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+
+ if (!(args->loc && args->loc->inode))
+ goto unwind;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ frame->local = local;
+
+ rsp_iobref = iobref_new ();
+ if (rsp_iobref == NULL) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ /* TODO: what is the size we should send ? */
+ rsp_iobuf = iobuf_get2 (this->ctx->iobuf_pool, 8 * GF_UNIT_KB);
+ if (rsp_iobuf == NULL) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ iobref_add (rsp_iobref, rsp_iobuf);
+ iobuf_unref (rsp_iobuf);
+
+ rsphdr = &vector[0];
+ rsphdr->iov_base = iobuf_ptr (rsp_iobuf);
+ rsphdr->iov_len = iobuf_pagesize (rsp_iobuf);
+ count = 1;
+ local->iobref = rsp_iobref;
+ rsp_iobuf = NULL;
+ rsp_iobref = NULL;
+
+ loc_copy (&local->loc, args->loc);
+ loc_path (&local->loc, NULL);
+ conf = this->private;
+
+ ret = client_pre_xattrop (this, &req, args->loc, args->xattr,
+ args->flags, args->xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_XATTROP,
+ client3_3_xattrop_cbk, NULL,
+ rsphdr, count,
+ NULL, 0, local->iobref,
+ (xdrproc_t)xdr_gfs3_xattrop_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+ GF_FREE (req.dict.dict_val);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL, NULL);
+
+ GF_FREE (req.dict.dict_val);
+
+ if (rsp_iobuf)
+ iobuf_unref (rsp_iobuf);
+
+ if (rsp_iobref)
+ iobref_unref (rsp_iobref);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+int32_t
+client3_3_fxattrop (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_args_t *args = NULL;
+ clnt_conf_t *conf = NULL;
+ clnt_local_t *local = NULL;
+ gfs3_fxattrop_req req = {{0,},};
+ int op_errno = ESTALE;
+ int ret = 0;
+ int count = 0;
+ struct iobref *rsp_iobref = NULL;
+ struct iobuf *rsp_iobuf = NULL;
+ struct iovec *rsphdr = NULL;
+ struct iovec vector[MAX_IOVEC] = {{0}, };
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ ret = client_pre_fxattrop (this, &req, args->fd, args->xattr,
+ args->flags, args->xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+ ret = client_fd_fop_prepare_local (frame, args->fd, req.fd);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+
+ local = frame->local;
+
+ rsp_iobref = iobref_new ();
+ if (rsp_iobref == NULL) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ /* TODO: what is the size we should send ? */
+ rsp_iobuf = iobuf_get2 (this->ctx->iobuf_pool, 8 * GF_UNIT_KB);
+ if (rsp_iobuf == NULL) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ iobref_add (rsp_iobref, rsp_iobuf);
+ iobuf_unref (rsp_iobuf);
+ rsphdr = &vector[0];
+ rsphdr->iov_base = iobuf_ptr (rsp_iobuf);
+ rsphdr->iov_len = iobuf_pagesize (rsp_iobuf);
+ count = 1;
+ local->iobref = rsp_iobref;
+ rsp_iobuf = NULL;
+ rsp_iobref = NULL;
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_FXATTROP,
+ client3_3_fxattrop_cbk, NULL,
+ rsphdr, count,
+ NULL, 0, local->iobref,
+ (xdrproc_t)xdr_gfs3_fxattrop_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+ GF_FREE (req.dict.dict_val);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL, NULL);
+
+ GF_FREE (req.dict.dict_val);
+
+ if (rsp_iobref)
+ iobref_unref (rsp_iobref);
+
+ if (rsp_iobuf)
+ iobuf_unref (rsp_iobuf);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int32_t
+client3_3_removexattr (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_removexattr_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ ret = client_pre_removexattr (this, &req, args->loc, args->name,
+ args->xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_REMOVEXATTR,
+ client3_3_removexattr_cbk, NULL,
+ NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_removexattr_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+int32_t
+client3_3_fremovexattr (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_fremovexattr_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+
+ conf = this->private;
+
+ ret = client_pre_fremovexattr (this, &req, args->fd, args->name,
+ args->xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_FREMOVEXATTR,
+ client3_3_fremovexattr_cbk, NULL,
+ NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_fremovexattr_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (fremovexattr, frame, -1, op_errno, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+int32_t
+client3_3_lease (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_args_t *args = NULL;
+ gfs3_lease_req req = {{0,},};
+ int32_t gf_cmd = 0;
+ int32_t gf_type = 0;
+ int64_t remote_fd = -1;
+ clnt_conf_t *conf = NULL;
+ int op_errno = ESTALE;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO ("client", this, unwind);
+ GF_VALIDATE_OR_GOTO (this->name, frame, unwind);
+ GF_VALIDATE_OR_GOTO (this->name, data, unwind);
+
+ args = data;
+ conf = this->private;
+
+ ret = client_pre_lease (this, &req, args->loc, args->lease,
+ args->xdata);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto unwind;
+ }
+ ret = client_submit_request (this, &req, frame, conf->fops, GFS3_OP_LEASE,
+ client3_3_lease_cbk, NULL,
+ NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_lease_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (lease, frame, -1, op_errno, NULL, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+int32_t
+client3_3_lk (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_args_t *args = NULL;
+ gfs3_lk_req req = {{0,},};
+ int32_t gf_cmd = 0;
+ int32_t gf_type = 0;
+ clnt_local_t *local = NULL;
+ clnt_conf_t *conf = NULL;
+ int op_errno = ESTALE;
+ int ret = 0;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ frame->local = local;
+
+ ret = client_cmd_to_gf_cmd (args->cmd, &gf_cmd);
+ if (ret) {
+ op_errno = EINVAL;
+ gf_msg (this->name, GF_LOG_WARNING, EINVAL,
+ PC_MSG_INVALID_ENTRY, "Unknown cmd (%d)!", gf_cmd);
+ goto unwind;
+ }
+
+ local->owner = frame->root->lk_owner;
+ local->cmd = args->cmd;
+ local->fd = fd_ref (args->fd);
+
+ ret = client_pre_lk (this, &req, args->cmd, args->flock,
+ args->fd, args->xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+ ret = client_submit_request (this, &req, frame, conf->fops, GFS3_OP_LK,
+ client3_3_lk_cbk, NULL,
+ NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_lk_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int32_t
+client3_3_inodelk (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_inodelk_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ ret = client_pre_inodelk (this, &req, args->loc, args->cmd,
+ args->flock, args->volume, args->xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_INODELK,
+ client3_3_inodelk_cbk, NULL,
+ NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gfs3_inodelk_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (inodelk, frame, -1, op_errno, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+int32_t
+client3_3_finodelk (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_args_t *args = NULL;
+ gfs3_finodelk_req req = {{0,},};
+ clnt_conf_t *conf = NULL;
+ int op_errno = ESTALE;
+ int ret = 0;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ ret = client_pre_finodelk (this, &req, args->fd,
+ args->cmd, args->flock, args->volume,
+ args->xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+
+ ret = client_fd_fop_prepare_local (frame, args->fd, req.fd);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_FINODELK,
+ client3_3_finodelk_cbk, NULL,
+ NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gfs3_finodelk_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (finodelk, frame, -1, op_errno, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int32_t
+client3_3_entrylk (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_entrylk_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+
+ conf = this->private;
+
+ ret = client_pre_entrylk (this, &req, args->loc, args->cmd_entrylk,
+ args->type, args->volume, args->basename,
+ args->xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_ENTRYLK,
+ client3_3_entrylk_cbk, NULL,
+ NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gfs3_entrylk_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (entrylk, frame, -1, op_errno, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+int32_t
+client3_3_fentrylk (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_args_t *args = NULL;
+ gfs3_fentrylk_req req = {{0,},};
+ clnt_conf_t *conf = NULL;
+ int op_errno = ESTALE;
+ int ret = 0;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ ret = client_pre_fentrylk (this, &req, args->fd, args->cmd_entrylk,
+ args->type, args->volume, args->basename,
+ args->xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_FENTRYLK,
+ client3_3_fentrylk_cbk, NULL,
+ NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gfs3_fentrylk_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (fentrylk, frame, -1, op_errno, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int32_t
+client3_3_rchecksum (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_args_t *args = NULL;
+ clnt_conf_t *conf = NULL;
+ gfs3_rchecksum_req req = {0,};
+ int op_errno = ESTALE;
+ int ret = 0;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ ret = client_pre_rchecksum (this, &req, args->fd, args->len,
+ args->offset, args->xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_RCHECKSUM,
+ client3_3_rchecksum_cbk, NULL,
+ NULL, 0, NULL,
+ 0, NULL,
+ (xdrproc_t)xdr_gfs3_rchecksum_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (rchecksum, frame, -1, op_errno, 0, NULL, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+int32_t
+client3_3_readdir (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_args_t *args = NULL;
+ int64_t remote_fd = -1;
+ clnt_conf_t *conf = NULL;
+ gfs3_readdir_req req = {{0,},};
+ gfs3_readdir_rsp rsp = {0, };
+ clnt_local_t *local = NULL;
+ int op_errno = ESTALE;
+ int ret = 0;
+ int count = 0;
+ struct iobref *rsp_iobref = NULL;
+ struct iobuf *rsp_iobuf = NULL;
+ struct iovec *rsphdr = NULL;
+ struct iovec vector[MAX_IOVEC] = {{0}, };
+ int readdir_rsp_size = 0;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ readdir_rsp_size = xdr_sizeof ((xdrproc_t) xdr_gfs3_readdir_rsp, &rsp)
+ + args->size;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ frame->local = local;
+
+ local->cmd = remote_fd;
+
+ if ((readdir_rsp_size + GLUSTERFS_RPC_REPLY_SIZE + GLUSTERFS_RDMA_MAX_HEADER_SIZE)
+ > (GLUSTERFS_RDMA_INLINE_THRESHOLD)) {
+ rsp_iobref = iobref_new ();
+ if (rsp_iobref == NULL) {
+ goto unwind;
+ }
+
+ /* TODO: what is the size we should send ? */
+ /* This iobuf will live for only receiving the response,
+ so not harmful */
+ rsp_iobuf = iobuf_get (this->ctx->iobuf_pool);
+ if (rsp_iobuf == NULL) {
+ goto unwind;
+ }
+
+ iobref_add (rsp_iobref, rsp_iobuf);
+ iobuf_unref (rsp_iobuf);
+
+ rsphdr = &vector[0];
+ rsphdr->iov_base = iobuf_ptr (rsp_iobuf);
+ rsphdr->iov_len = iobuf_pagesize (rsp_iobuf);
+ count = 1;
+ local->iobref = rsp_iobref;
+ rsp_iobuf = NULL;
+ rsp_iobref = NULL;
+ }
+
+ ret = client_pre_readdir (this, &req, args->fd, args->size,
+ args->offset, args->xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_READDIR,
+ client3_3_readdir_cbk, NULL,
+ rsphdr, count,
+ NULL, 0, rsp_iobref,
+ (xdrproc_t)xdr_gfs3_readdir_req);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+
+unwind:
+ if (rsp_iobref)
+ iobref_unref (rsp_iobref);
+
+ if (rsp_iobuf)
+ iobuf_unref (rsp_iobuf);
+
+ CLIENT_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int32_t
+client3_3_readdirp (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_args_t *args = NULL;
+ gfs3_readdirp_req req = {{0,},};
+ gfs3_readdirp_rsp rsp = {0,};
+ int64_t remote_fd = -1;
+ clnt_conf_t *conf = NULL;
+ int op_errno = ESTALE;
+ int ret = 0;
+ int count = 0;
+ int readdirp_rsp_size = 0;
+ struct iobref *rsp_iobref = NULL;
+ struct iobuf *rsp_iobuf = NULL;
+ struct iovec *rsphdr = NULL;
+ struct iovec vector[MAX_IOVEC] = {{0}, };
+ clnt_local_t *local = NULL;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ frame->local = local;
+
+ ret = client_pre_readdirp (this, &req, args->fd, args->size,
+ args->offset, args->xdata);
+
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+
+ readdirp_rsp_size = xdr_sizeof ((xdrproc_t) xdr_gfs3_readdirp_rsp, &rsp)
+ + args->size;
+
+ if ((readdirp_rsp_size + GLUSTERFS_RPC_REPLY_SIZE
+ + GLUSTERFS_RDMA_MAX_HEADER_SIZE)
+ > (GLUSTERFS_RDMA_INLINE_THRESHOLD)) {
+ rsp_iobref = iobref_new ();
+ if (rsp_iobref == NULL) {
+ goto unwind;
+ }
+
+ /* TODO: what is the size we should send ? */
+ /* This iobuf will live for only receiving the response,
+ so not harmful */
+ rsp_iobuf = iobuf_get (this->ctx->iobuf_pool);
+ if (rsp_iobuf == NULL) {
+ goto unwind;
+ }
+
+ iobref_add (rsp_iobref, rsp_iobuf);
+ iobuf_unref (rsp_iobuf);
+
+ rsphdr = &vector[0];
+ rsphdr->iov_base = iobuf_ptr (rsp_iobuf);
+ rsphdr->iov_len = iobuf_pagesize (rsp_iobuf);
+ count = 1;
+ local->iobref = rsp_iobref;
+ rsp_iobuf = NULL;
+ rsp_iobref = NULL;
+ }
+
+ local->fd = fd_ref (args->fd);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_READDIRP,
+ client3_3_readdirp_cbk, NULL,
+ rsphdr, count, NULL,
+ 0, rsp_iobref,
+ (xdrproc_t)xdr_gfs3_readdirp_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+ GF_FREE (req.dict.dict_val);
+
+ return 0;
+unwind:
+ if (rsp_iobref)
+ iobref_unref (rsp_iobref);
+
+ if (rsp_iobuf)
+ iobuf_unref (rsp_iobuf);
+
+ GF_FREE (req.dict.dict_val);
+
+ CLIENT_STACK_UNWIND (readdirp, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+
+int32_t
+client3_3_setattr (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_setattr_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ ret = client_pre_setattr (this, &req, args->loc, args->valid,
+ args->stbuf, args->xdata);
+
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_SETATTR,
+ client3_3_setattr_cbk, NULL,
+ NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gfs3_setattr_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+int32_t
+client3_3_fsetattr (call_frame_t *frame, xlator_t *this, void *data)
+{
+ clnt_args_t *args = NULL;
+ int64_t remote_fd = -1;
+ clnt_conf_t *conf = NULL;
+ gfs3_fsetattr_req req = {0,};
+ int op_errno = ESTALE;
+ int ret = 0;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ ret = client_pre_fsetattr (this, &req, args->fd, args->valid,
+ args->stbuf, args->xdata);
+
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_FSETATTR,
+ client3_3_fsetattr_cbk, NULL,
+ NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gfs3_fsetattr_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+int32_t
+client3_3_fallocate(call_frame_t *frame, xlator_t *this, void *data)
+{
+ clnt_args_t *args = NULL;
+ int64_t remote_fd = -1;
+ clnt_conf_t *conf = NULL;
+ gfs3_fallocate_req req = {{0},};
+ int op_errno = ESTALE;
+ int ret = 0;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ ret = client_pre_fallocate (this, &req, args->fd, args->flags,
+ args->offset, args->size, args->xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_FALLOCATE,
+ client3_3_fallocate_cbk, NULL,
+ NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gfs3_fallocate_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+int32_t
+client3_3_discard(call_frame_t *frame, xlator_t *this, void *data)
+{
+ clnt_args_t *args = NULL;
+ int64_t remote_fd = -1;
+ clnt_conf_t *conf = NULL;
+ gfs3_discard_req req = {{0},};
+ int op_errno = ESTALE;
+ int ret = 0;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ ret = client_pre_discard (this, &req, args->fd, args->offset,
+ args->size, args->xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+
+
+ ret = client_submit_request(this, &req, frame, conf->fops,
+ GFS3_OP_DISCARD, client3_3_discard_cbk,
+ NULL, NULL, 0, NULL, 0, NULL,
+ (xdrproc_t) xdr_gfs3_discard_req);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND(discard, frame, -1, op_errno, NULL, NULL, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+int32_t
+client3_3_zerofill(call_frame_t *frame, xlator_t *this, void *data)
+{
+ clnt_args_t *args = NULL;
+ int64_t remote_fd = -1;
+ clnt_conf_t *conf = NULL;
+ gfs3_zerofill_req req = {{0},};
+ int op_errno = ESTALE;
+ int ret = 0;
+
+ GF_ASSERT (frame);
+
+ if (!this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ ret = client_pre_zerofill (this, &req, args->fd, args->offset,
+ args->size, args->xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+
+ ret = client_submit_request(this, &req, frame, conf->fops,
+ GFS3_OP_ZEROFILL, client3_3_zerofill_cbk,
+ NULL, NULL, 0, NULL, 0, NULL,
+ (xdrproc_t) xdr_gfs3_zerofill_req);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND(zerofill, frame, -1, op_errno, NULL, NULL, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+int32_t
+client3_3_ipc (call_frame_t *frame, xlator_t *this, void *data)
+{
+ clnt_args_t *args = NULL;
+ clnt_conf_t *conf = NULL;
+ gfs3_ipc_req req = {0,};
+ int op_errno = ESTALE;
+ int ret = 0;
+
+ GF_ASSERT (frame);
+
+ if (!this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ ret = client_pre_ipc (this, &req, args->cmd, args->xdata);
+
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+
+ ret = client_submit_request(this, &req, frame, conf->fops,
+ GFS3_OP_IPC, client3_3_ipc_cbk,
+ NULL, NULL, 0, NULL, 0, NULL,
+ (xdrproc_t) xdr_gfs3_ipc_req);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND(ipc, frame, -1, op_errno, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+int32_t
+client3_3_seek (call_frame_t *frame, xlator_t *this, void *data)
+{
+ clnt_args_t *args = NULL;
+ clnt_conf_t *conf = NULL;
+ struct gfs3_seek_req req = {{0,},};
+ int op_errno = ESTALE;
+ int ret = 0;
+
+ GF_ASSERT (frame);
+
+ if (!this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ ret = client_pre_seek (this, &req, args->fd,
+ args->offset, args->what, args->xdata);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+
+ ret = client_submit_request(this, &req, frame, conf->fops,
+ GFS3_OP_SEEK, client3_3_seek_cbk,
+ NULL, NULL, 0, NULL, 0, NULL,
+ (xdrproc_t) xdr_gfs3_seek_req);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND(ipc, frame, -1, op_errno, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+/* Brief explanation of gfs3_compound_req structure :
+ * 1) It consists of version of compounding.
+ * 2) A compound-fop enum, new enum for compound fops
+ * 3) A 'compound_req_arrray' structure that has
+ * a) array len - based on the number of fops compounded
+ * b) compound_req_array_val - pointer to an array of compound_req's
+ * 4) compound_req - structure that contains:
+ * a) fop enum of type glusterfs_fop_t
+ * b) union of structures of xdr requests of all fops.
+ */
+
+int32_t
+client3_3_compound (call_frame_t *frame, xlator_t *this, void *data)
+{
+ clnt_conf_t *conf = NULL;
+ compound_args_t *c_args = data;
+ default_args_t *args = NULL;
+ gfs3_compound_req req = {0,};
+ clnt_local_t *local = NULL;
+ int op_errno = ENOMEM;
+ int ret = 0;
+ int i = 0;
+ int rsp_count = 0;
+ struct iovec rsp_vector[MAX_IOVEC] = {{0}, };
+ struct iovec req_vector[MAX_IOVEC] = {{0}, };
+ struct iovec vector[MAX_IOVEC] = {{0}, };
+ struct iovec *rsphdr = NULL;
+ struct iobref *req_iobref = NULL;
+ struct iobref *rsp_iobref = NULL;
+ struct iobref *rsphdr_iobref = NULL;
+ struct iobuf *rsphdr_iobuf = NULL;
+ int rsphdr_count = 0;
+ int req_count = 0;
+ int index = 0;
+ dict_t *xdata = c_args->xdata;
+
+ GF_ASSERT (frame);
+
+ if (!this || !data)
+ goto unwind;
+
+ memset (req_vector, 0, sizeof (req_vector));
+ memset (rsp_vector, 0, sizeof (rsp_vector));
+
+ conf = this->private;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ frame->local = local;
+
+ local->length = c_args->fop_length;
+ local->compound_args = c_args;
+
+ rsphdr_iobref = iobref_new ();
+ if (rsphdr_iobref == NULL) {
+ goto unwind;
+ }
+
+ /* TODO: what is the size we should send ? */
+ rsphdr_iobuf = iobuf_get (this->ctx->iobuf_pool);
+ if (rsphdr_iobuf == NULL) {
+ goto unwind;
+ }
+
+ iobref_add (rsphdr_iobref, rsphdr_iobuf);
+ iobuf_unref (rsphdr_iobuf);
+ rsphdr = &vector[0];
+ rsphdr->iov_base = iobuf_ptr (rsphdr_iobuf);
+ rsphdr->iov_len = iobuf_pagesize (rsphdr_iobuf);
+ rsphdr_count = 1;
+ local->iobref = rsp_iobref;
+ rsphdr_iobuf = NULL;
+ rsphdr_iobref = NULL;
+
+ req.compound_fop_enum = c_args->fop_enum;
+ req.compound_req_array.compound_req_array_len = c_args->fop_length;
+ /*TODO : Talk to Sowmya about this */
+ req.compound_version = 0;
+ if (xdata) {
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata,
+ (&req.xdata.xdata_val),
+ req.xdata.xdata_len,
+ op_errno, unwind);
+ }
+
+ req.compound_req_array.compound_req_array_val = GF_CALLOC (local->length,
+ sizeof (compound_req),
+ gf_client_mt_compound_req_t);
+
+ if (!req.compound_req_array.compound_req_array_val) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ for (i = 0; i < local->length; i++) {
+ ret = client_handle_fop_requirements (this, frame,
+ &req, local,
+ req_iobref, rsp_iobref,
+ req_vector,
+ rsp_vector, &req_count,
+ &rsp_count,
+ &c_args->req_list[i],
+ c_args->enum_list[i],
+ index);
+ if (ret) {
+ op_errno = ret;
+ goto unwind;
+ }
+ index++;
+ }
+
+ local->iobref2 = rsp_iobref;
+ rsp_iobref = NULL;
+
+ ret = client_submit_compound_request (this, &req, frame, conf->fops,
+ GFS3_OP_COMPOUND, client3_3_compound_cbk,
+ req_vector, req_count, local->iobref,
+ rsphdr, rsphdr_count,
+ rsp_vector, rsp_count,
+ local->iobref2,
+ (xdrproc_t) xdr_gfs3_compound_req);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ compound_request_cleanup (&req);
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (compound, frame, -1, op_errno, NULL, NULL);
+
+ if (rsp_iobref)
+ iobref_unref (rsp_iobref);
+
+ if (rsphdr_iobref)
+ iobref_unref (rsphdr_iobref);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ compound_request_cleanup (&req);
+ return 0;
+}
+
+static int32_t
+client3_3_getactivelk (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_getactivelk_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ if (!(args->loc && args->loc->inode))
+ goto unwind;
+
+ if (!gf_uuid_is_null (args->loc->inode->gfid))
+ memcpy (req.gfid, args->loc->inode->gfid, 16);
+ else
+ memcpy (req.gfid, args->loc->gfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !gf_uuid_is_null (*((uuid_t *)req.gfid)),
+ unwind, op_errno, EINVAL);
+ conf = this->private;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_GETACTIVELK,
+ client3_3_getactivelk_cbk, NULL,
+ NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_getactivelk_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (getactivelk, frame, -1, op_errno, NULL, NULL);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+static int32_t
+client3_3_setactivelk (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_setactivelk_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ if (!(args->loc && args->loc->inode && args->locklist))
+ goto unwind;
+
+ if (!gf_uuid_is_null (args->loc->inode->gfid))
+ memcpy (req.gfid, args->loc->inode->gfid, 16);
+ else
+ memcpy (req.gfid, args->loc->gfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !gf_uuid_is_null (*((uuid_t *)req.gfid)),
+ unwind, op_errno, EINVAL);
+ conf = this->private;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = serialize_req_locklist (args->locklist, &req);
+
+ if (ret)
+ goto unwind;
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_SETACTIVELK, client3_3_setactivelk_cbk, NULL,
+ NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_setactivelk_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+
+ clnt_setactivelk_req_cleanup (&req);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+
+unwind:
+
+ CLIENT_STACK_UNWIND (setactivelk, frame, -1, op_errno, NULL);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ clnt_setactivelk_req_cleanup (&req);
+
+ return 0;
+}
+
+/* Table Specific to FOPS */
+rpc_clnt_procedure_t clnt3_3_fop_actors[GF_FOP_MAXVALUE] = {
+ [GF_FOP_NULL] = { "NULL", NULL},
+ [GF_FOP_STAT] = { "STAT", client3_3_stat },
+ [GF_FOP_READLINK] = { "READLINK", client3_3_readlink },
+ [GF_FOP_MKNOD] = { "MKNOD", client3_3_mknod },
+ [GF_FOP_MKDIR] = { "MKDIR", client3_3_mkdir },
+ [GF_FOP_UNLINK] = { "UNLINK", client3_3_unlink },
+ [GF_FOP_RMDIR] = { "RMDIR", client3_3_rmdir },
+ [GF_FOP_SYMLINK] = { "SYMLINK", client3_3_symlink },
+ [GF_FOP_RENAME] = { "RENAME", client3_3_rename },
+ [GF_FOP_LINK] = { "LINK", client3_3_link },
+ [GF_FOP_TRUNCATE] = { "TRUNCATE", client3_3_truncate },
+ [GF_FOP_OPEN] = { "OPEN", client3_3_open },
+ [GF_FOP_READ] = { "READ", client3_3_readv },
+ [GF_FOP_WRITE] = { "WRITE", client3_3_writev },
+ [GF_FOP_STATFS] = { "STATFS", client3_3_statfs },
+ [GF_FOP_FLUSH] = { "FLUSH", client3_3_flush },
+ [GF_FOP_FSYNC] = { "FSYNC", client3_3_fsync },
+ [GF_FOP_SETXATTR] = { "SETXATTR", client3_3_setxattr },
+ [GF_FOP_GETXATTR] = { "GETXATTR", client3_3_getxattr },
+ [GF_FOP_REMOVEXATTR] = { "REMOVEXATTR", client3_3_removexattr },
+ [GF_FOP_OPENDIR] = { "OPENDIR", client3_3_opendir },
+ [GF_FOP_FSYNCDIR] = { "FSYNCDIR", client3_3_fsyncdir },
+ [GF_FOP_ACCESS] = { "ACCESS", client3_3_access },
+ [GF_FOP_CREATE] = { "CREATE", client3_3_create },
+ [GF_FOP_FTRUNCATE] = { "FTRUNCATE", client3_3_ftruncate },
+ [GF_FOP_FSTAT] = { "FSTAT", client3_3_fstat },
+ [GF_FOP_LK] = { "LK", client3_3_lk },
+ [GF_FOP_LOOKUP] = { "LOOKUP", client3_3_lookup },
+ [GF_FOP_READDIR] = { "READDIR", client3_3_readdir },
+ [GF_FOP_INODELK] = { "INODELK", client3_3_inodelk },
+ [GF_FOP_FINODELK] = { "FINODELK", client3_3_finodelk },
+ [GF_FOP_ENTRYLK] = { "ENTRYLK", client3_3_entrylk },
+ [GF_FOP_FENTRYLK] = { "FENTRYLK", client3_3_fentrylk },
+ [GF_FOP_XATTROP] = { "XATTROP", client3_3_xattrop },
+ [GF_FOP_FXATTROP] = { "FXATTROP", client3_3_fxattrop },
+ [GF_FOP_FGETXATTR] = { "FGETXATTR", client3_3_fgetxattr },
+ [GF_FOP_FSETXATTR] = { "FSETXATTR", client3_3_fsetxattr },
+ [GF_FOP_RCHECKSUM] = { "RCHECKSUM", client3_3_rchecksum },
+ [GF_FOP_SETATTR] = { "SETATTR", client3_3_setattr },
+ [GF_FOP_FSETATTR] = { "FSETATTR", client3_3_fsetattr },
+ [GF_FOP_READDIRP] = { "READDIRP", client3_3_readdirp },
+ [GF_FOP_FALLOCATE] = { "FALLOCATE", client3_3_fallocate },
+ [GF_FOP_DISCARD] = { "DISCARD", client3_3_discard },
+ [GF_FOP_ZEROFILL] = { "ZEROFILL", client3_3_zerofill},
+ [GF_FOP_RELEASE] = { "RELEASE", client3_3_release },
+ [GF_FOP_RELEASEDIR] = { "RELEASEDIR", client3_3_releasedir },
+ [GF_FOP_GETSPEC] = { "GETSPEC", client3_getspec },
+ [GF_FOP_FREMOVEXATTR] = { "FREMOVEXATTR", client3_3_fremovexattr },
+ [GF_FOP_IPC] = { "IPC", client3_3_ipc },
+ [GF_FOP_SEEK] = { "SEEK", client3_3_seek },
+ [GF_FOP_LEASE] = { "LEASE", client3_3_lease },
+ [GF_FOP_GETACTIVELK] = { "GETACTIVELK", client3_3_getactivelk},
+ [GF_FOP_SETACTIVELK] = { "SETACTIVELK", client3_3_setactivelk},
+ [GF_FOP_COMPOUND] = { "COMPOUND", client3_3_compound },
+};
+
+/* Used From RPC-CLNT library to log proper name of procedure based on number */
+char *clnt3_3_fop_names[GFS3_OP_MAXVALUE] = {
+ [GFS3_OP_NULL] = "NULL",
+ [GFS3_OP_STAT] = "STAT",
+ [GFS3_OP_READLINK] = "READLINK",
+ [GFS3_OP_MKNOD] = "MKNOD",
+ [GFS3_OP_MKDIR] = "MKDIR",
+ [GFS3_OP_UNLINK] = "UNLINK",
+ [GFS3_OP_RMDIR] = "RMDIR",
+ [GFS3_OP_SYMLINK] = "SYMLINK",
+ [GFS3_OP_RENAME] = "RENAME",
+ [GFS3_OP_LINK] = "LINK",
+ [GFS3_OP_TRUNCATE] = "TRUNCATE",
+ [GFS3_OP_OPEN] = "OPEN",
+ [GFS3_OP_READ] = "READ",
+ [GFS3_OP_WRITE] = "WRITE",
+ [GFS3_OP_STATFS] = "STATFS",
+ [GFS3_OP_FLUSH] = "FLUSH",
+ [GFS3_OP_FSYNC] = "FSYNC",
+ [GFS3_OP_SETXATTR] = "SETXATTR",
+ [GFS3_OP_GETXATTR] = "GETXATTR",
+ [GFS3_OP_REMOVEXATTR] = "REMOVEXATTR",
+ [GFS3_OP_OPENDIR] = "OPENDIR",
+ [GFS3_OP_FSYNCDIR] = "FSYNCDIR",
+ [GFS3_OP_ACCESS] = "ACCESS",
+ [GFS3_OP_CREATE] = "CREATE",
+ [GFS3_OP_FTRUNCATE] = "FTRUNCATE",
+ [GFS3_OP_FSTAT] = "FSTAT",
+ [GFS3_OP_LK] = "LK",
+ [GFS3_OP_LOOKUP] = "LOOKUP",
+ [GFS3_OP_READDIR] = "READDIR",
+ [GFS3_OP_INODELK] = "INODELK",
+ [GFS3_OP_FINODELK] = "FINODELK",
+ [GFS3_OP_ENTRYLK] = "ENTRYLK",
+ [GFS3_OP_FENTRYLK] = "FENTRYLK",
+ [GFS3_OP_XATTROP] = "XATTROP",
+ [GFS3_OP_FXATTROP] = "FXATTROP",
+ [GFS3_OP_FGETXATTR] = "FGETXATTR",
+ [GFS3_OP_FSETXATTR] = "FSETXATTR",
+ [GFS3_OP_RCHECKSUM] = "RCHECKSUM",
+ [GFS3_OP_SETATTR] = "SETATTR",
+ [GFS3_OP_FSETATTR] = "FSETATTR",
+ [GFS3_OP_READDIRP] = "READDIRP",
+ [GFS3_OP_RELEASE] = "RELEASE",
+ [GFS3_OP_RELEASEDIR] = "RELEASEDIR",
+ [GFS3_OP_FREMOVEXATTR] = "FREMOVEXATTR",
+ [GFS3_OP_FALLOCATE] = "FALLOCATE",
+ [GFS3_OP_DISCARD] = "DISCARD",
+ [GFS3_OP_ZEROFILL] = "ZEROFILL",
+ [GFS3_OP_IPC] = "IPC",
+ [GFS3_OP_SEEK] = "SEEK",
+ [GFS3_OP_LEASE] = "LEASE",
+ [GFS3_OP_GETACTIVELK] = "GETACTIVELK",
+ [GFS3_OP_SETACTIVELK] = "SETACTIVELK",
+ [GFS3_OP_COMPOUND] = "COMPOUND",
+};
+
+rpc_clnt_prog_t clnt3_3_fop_prog = {
+ .progname = "GlusterFS 3.3",
+ .prognum = GLUSTER_FOP_PROGRAM,
+ .progver = GLUSTER_FOP_VERSION,
+ .numproc = GLUSTER_FOP_PROCCNT,
+ .proctable = clnt3_3_fop_actors,
+ .procnames = clnt3_3_fop_names,
+};
diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c
index bad814a8198..a48104fd467 100644
--- a/xlators/protocol/client/src/client.c
+++ b/xlators/protocol/client/src/client.c
@@ -1,113 +1,467 @@
/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "client.h"
#include "xlator.h"
#include "defaults.h"
#include "glusterfs.h"
-#include "msg-xdr.h"
#include "statedump.h"
#include "compat-errno.h"
+#include "event.h"
+
+#include "xdr-rpc.h"
+#include "glusterfs3.h"
+#include "gf-dirent.h"
+#include "client-messages.h"
extern rpc_clnt_prog_t clnt_handshake_prog;
+extern rpc_clnt_prog_t clnt_dump_prog;
+extern struct rpcclnt_cb_program gluster_cbk_prog;
+
+int client_handshake (xlator_t *this, struct rpc_clnt *rpc);
+int client_init_rpc (xlator_t *this);
+int client_destroy_rpc (xlator_t *this);
+int client_mark_fd_bad (xlator_t *this);
+
+static void
+client_filter_o_direct (clnt_conf_t *conf, int32_t *flags)
+{
+ if (conf->filter_o_direct)
+ *flags = (*flags & ~O_DIRECT);
+}
+
+static int
+client_fini_complete (xlator_t *this)
+{
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+
+ clnt_conf_t *conf = this->private;
+
+ if (!conf->destroy)
+ return 0;
+
+ this->private = NULL;
+
+ pthread_mutex_destroy (&conf->lock);
+ GF_FREE (conf);
+
+out:
+ return 0;
+}
+
+int
+client_notify_dispatch_uniq (xlator_t *this, int32_t event, void *data, ...)
+{
+ clnt_conf_t *conf = this->private;
+
+ if (conf->last_sent_event == event)
+ return 0;
+
+ return client_notify_dispatch (this, event, data);
+}
+
int
-client_handshake (xlator_t *this, struct rpc_clnt *rpc);
+client_notify_dispatch (xlator_t *this, int32_t event, void *data, ...)
+{
+ int ret = -1;
+ glusterfs_ctx_t *ctx = this->ctx;
+ clnt_conf_t *conf = this->private;
+
+ pthread_mutex_lock (&ctx->notify_lock);
+ {
+ while (ctx->notifying)
+ pthread_cond_wait (&ctx->notify_cond,
+ &ctx->notify_lock);
+ ctx->notifying = 1;
+ }
+ pthread_mutex_unlock (&ctx->notify_lock);
+
+ /* We assume that all translators in the graph handle notification
+ * events in sequence.
+ * */
+ ret = default_notify (this, event, data);
+
+ /* NB (Even) with MT-epoll and EPOLLET|EPOLLONESHOT we are guaranteed
+ * that there would be atmost one poller thread executing this
+ * notification function. This allows us to update last_sent_event
+ * without explicit synchronization. See epoll(7).
+ */
+ conf->last_sent_event = event;
+
+ pthread_mutex_lock (&ctx->notify_lock);
+ {
+ ctx->notifying = 0;
+ pthread_cond_signal (&ctx->notify_cond);
+ }
+ pthread_mutex_unlock (&ctx->notify_lock);
+
+ return ret;
+}
+
+int32_t
+client_type_to_gf_type (short l_type)
+{
+ int32_t gf_type = GF_LK_EOL;
+
+ switch (l_type) {
+ case F_RDLCK:
+ gf_type = GF_LK_F_RDLCK;
+ break;
+ case F_WRLCK:
+ gf_type = GF_LK_F_WRLCK;
+ break;
+ case F_UNLCK:
+ gf_type = GF_LK_F_UNLCK;
+ break;
+ }
+
+ return gf_type;
+}
+
+uint32_t
+client_get_lk_ver (clnt_conf_t *conf)
+{
+ uint32_t lk_ver = 0;
+
+ GF_VALIDATE_OR_GOTO ("client", conf, out);
+
+ pthread_mutex_lock (&conf->lock);
+ {
+ lk_ver = conf->lk_version;
+ }
+ pthread_mutex_unlock (&conf->lock);
+out:
+ return lk_ver;
+}
void
-client_start_ping (void *data);
+client_grace_timeout (void *data)
+{
+ int ver = 0;
+ xlator_t *this = NULL;
+ struct clnt_conf *conf = NULL;
+ struct rpc_clnt *rpc = NULL;
+
+ GF_VALIDATE_OR_GOTO ("client", data, out);
+
+ this = THIS;
+
+ rpc = (struct rpc_clnt *) data;
+
+ conf = (struct clnt_conf *) this->private;
+
+ pthread_mutex_lock (&conf->lock);
+ {
+ ver = ++conf->lk_version;
+ /* ver == 0 is a special value used by server
+ to notify client that this is a fresh connect.*/
+ if (ver == 0)
+ ver = ++conf->lk_version;
+
+ gf_timer_call_cancel (this->ctx, conf->grace_timer);
+ conf->grace_timer = NULL;
+ }
+ pthread_mutex_unlock (&conf->lock);
+
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_TIMER_EXPIRED,
+ "client grace timer expired, updating "
+ "the lk-version to %d", ver);
+
+ client_mark_fd_bad (this);
+out:
+ return;
+}
+
+int32_t
+client_register_grace_timer (xlator_t *this, clnt_conf_t *conf)
+{
+ int32_t ret = -1;
+ struct timespec grace_ts = {0, };
+
+ GF_VALIDATE_OR_GOTO ("client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
+
+ grace_ts.tv_sec = conf->grace_timeout;
+ grace_ts.tv_nsec = 0;
+
+ pthread_mutex_lock (&conf->lock);
+ {
+ if (conf->grace_timer || !conf->grace_timer_needed) {
+ gf_msg_trace (this->name, 0,
+ "Client grace timer is already set "
+ "or a grace-timer has already time "
+ "out, not registering a new timer");
+ } else {
+ gf_msg (this->name, GF_LOG_INFO, 0, PC_MSG_TIMER_REG,
+ "Registering a grace timer");
+
+ conf->grace_timer_needed = _gf_false;
+
+ conf->grace_timer =
+ gf_timer_call_after (this->ctx,
+ grace_ts,
+ client_grace_timeout,
+ conf->rpc);
+ }
+ }
+ pthread_mutex_unlock (&conf->lock);
+
+ ret = 0;
+out:
+ return ret;
+}
int
client_submit_request (xlator_t *this, void *req, call_frame_t *frame,
- rpc_clnt_prog_t *prog, int procnum, fop_cbk_fn_t cbk,
- struct iobref *iobref, gfs_serialize_t sfunc)
+ rpc_clnt_prog_t *prog, int procnum, fop_cbk_fn_t cbkfn,
+ struct iobref *iobref, struct iovec *rsphdr,
+ int rsphdr_count, struct iovec *rsp_payload,
+ int rsp_payload_count, struct iobref *rsp_iobref,
+ xdrproc_t xdrproc)
{
- int ret = -1;
- clnt_conf_t *conf = NULL;
- struct iovec iov = {0, };
- struct iobuf *iobuf = NULL;
- int count = 0;
- char new_iobref = 0, start_ping = 0;
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ struct iovec iov = {0, };
+ struct iobuf *iobuf = NULL;
+ int count = 0;
+ struct iobref *new_iobref = NULL;
+ ssize_t xdr_size = 0;
+ struct rpc_req rpcreq = {0, };
+
+ GF_VALIDATE_OR_GOTO ("client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, prog, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
conf = this->private;
- iobuf = iobuf_get (this->ctx->iobuf_pool);
- if (!iobuf) {
+ /* If 'setvolume' is not successful, we should not send frames to
+ server, mean time we should be able to send 'DUMP' and 'SETVOLUME'
+ call itself even if its not connected */
+ if (!(conf->connected ||
+ ((prog->prognum == GLUSTER_DUMP_PROGRAM) ||
+ (prog->prognum == GLUSTER_PMAP_PROGRAM) ||
+ ((prog->prognum == GLUSTER_HNDSK_PROGRAM) &&
+ (procnum == GF_HNDSK_SETVOLUME))))) {
+ /* This particular error captured/logged in
+ functions calling this */
+ gf_msg_debug (this->name, 0,
+ "connection in disconnected state");
goto out;
- };
+ }
- if (!iobref) {
- iobref = iobref_new ();
- if (!iobref) {
+ if (req && xdrproc) {
+ xdr_size = xdr_sizeof (xdrproc, req);
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool, xdr_size);
+ if (!iobuf) {
+ goto out;
+ };
+
+ new_iobref = iobref_new ();
+ if (!new_iobref) {
goto out;
}
- new_iobref = 1;
- }
+ if (iobref != NULL) {
+ ret = iobref_merge (new_iobref, iobref);
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ PC_MSG_NO_MEMORY, "cannot merge "
+ "iobref passed from caller into "
+ "new_iobref");
+ }
+ }
- iobref_add (iobref, iobuf);
+ ret = iobref_add (new_iobref, iobuf);
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ PC_MSG_NO_MEMORY, "cannot add iobuf into "
+ "iobref");
+ goto out;
+ }
- iov.iov_base = iobuf->ptr;
- iov.iov_len = 128 * GF_UNIT_KB;
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = iobuf_size (iobuf);
- /* Create the xdr payload */
- if (req && sfunc) {
- ret = sfunc (iov, req);
+ /* Create the xdr payload */
+ ret = xdr_serialize_generic (iov, req, xdrproc);
if (ret == -1) {
+ /* callingfn so that, we can get to know which xdr
+ function was called */
+ gf_log_callingfn (this->name, GF_LOG_WARNING,
+ "XDR payload creation failed");
goto out;
}
iov.iov_len = ret;
count = 1;
}
- /* Send the msg */
- ret = rpc_clnt_submit (conf->rpc, prog, procnum, cbk, &iov, count, NULL, 0,
- iobref, frame);
- if (ret == 0) {
- pthread_mutex_lock (&conf->rpc->conn.lock);
- {
- if (!conf->rpc->conn.ping_started) {
- start_ping = 1;
- }
+ /* do not send all groups if they are resolved server-side */
+ if (!conf->send_gids) {
+ if (frame->root->ngrps <= SMALL_GROUP_COUNT) {
+ frame->root->groups_small[0] = frame->root->gid;
+ frame->root->groups = frame->root->groups_small;
}
- pthread_mutex_unlock (&conf->rpc->conn.lock);
+ frame->root->ngrps = 1;
}
- if (start_ping)
- client_start_ping ((void *) this);
+ /* Send the msg */
+ ret = rpc_clnt_submit (conf->rpc, prog, procnum, cbkfn, &iov, count,
+ NULL, 0, new_iobref, frame, rsphdr, rsphdr_count,
+ rsp_payload, rsp_payload_count, rsp_iobref);
+
+ if (ret < 0) {
+ gf_msg_debug (this->name, 0, "rpc_clnt_submit failed");
+ }
ret = 0;
+
+ if (new_iobref)
+ iobref_unref (new_iobref);
+
+ if (iobuf)
+ iobuf_unref (iobuf);
+
+ return ret;
+
out:
- if (new_iobref) {
- iobref_unref (iobref);
+ rpcreq.rpc_status = -1;
+
+ cbkfn (&rpcreq, NULL, 0, frame);
+
+ if (new_iobref)
+ iobref_unref (new_iobref);
+
+ if (iobuf)
+ iobuf_unref (iobuf);
+
+ return 0;
+}
+
+
+int
+client_submit_compound_request (xlator_t *this, void *req, call_frame_t *frame,
+ rpc_clnt_prog_t *prog, int procnum, fop_cbk_fn_t cbkfn,
+ struct iovec *req_payload, int req_count,
+ struct iobref *iobref, struct iovec *rsphdr,
+ int rsphdr_count, struct iovec *rsp_payload,
+ int rsp_payload_count, struct iobref *rsp_iobref,
+ xdrproc_t xdrproc)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ struct iovec iov = {0, };
+ struct iobuf *iobuf = NULL;
+ int count = 0;
+ struct iobref *new_iobref = NULL;
+ ssize_t xdr_size = 0;
+ struct rpc_req rpcreq = {0, };
+
+ GF_VALIDATE_OR_GOTO ("client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, prog, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+
+ conf = this->private;
+
+ /* If 'setvolume' is not successful, we should not send frames to
+ * server
+ */
+
+ if (!conf->connected) {
+ gf_msg_debug (this->name, 0,
+ "connection in disconnected state");
+ goto out;
}
- iobuf_unref (iobuf);
+ if (req && xdrproc) {
+ xdr_size = xdr_sizeof (xdrproc, req);
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool, xdr_size);
+ if (!iobuf) {
+ goto out;
+ };
+
+ new_iobref = iobref_new ();
+ if (!new_iobref) {
+ goto out;
+ }
+
+ if (iobref != NULL) {
+ ret = iobref_merge (new_iobref, iobref);
+ if (ret != 0) {
+ goto out;
+ }
+ }
+
+ ret = iobref_add (new_iobref, iobuf);
+ if (ret != 0) {
+ goto out;
+ }
+
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = iobuf_size (iobuf);
+
+ /* Create the xdr payload */
+ ret = xdr_serialize_generic (iov, req, xdrproc);
+ if (ret == -1) {
+ /* callingfn so that, we can get to know which xdr
+ function was called */
+ gf_log_callingfn (this->name, GF_LOG_WARNING,
+ "XDR payload creation failed");
+ goto out;
+ }
+ iov.iov_len = ret;
+ count = 1;
+ }
+
+ /* do not send all groups if they are resolved server-side */
+ if (!conf->send_gids) {
+ if (frame->root->ngrps <= SMALL_GROUP_COUNT) {
+ frame->root->groups_small[0] = frame->root->gid;
+ frame->root->groups = frame->root->groups_small;
+ }
+ frame->root->ngrps = 1;
+ }
+
+ /* Send the msg */
+ ret = rpc_clnt_submit (conf->rpc, prog, procnum, cbkfn, &iov, count,
+ req_payload, req_count, new_iobref, frame,
+ rsphdr, rsphdr_count,
+ rsp_payload, rsp_payload_count, rsp_iobref);
+
+ if (ret < 0) {
+ gf_msg_debug (this->name, 0, "rpc_clnt_submit failed");
+ }
+
+ ret = 0;
+
+ if (new_iobref)
+ iobref_unref (new_iobref);
+
+ if (iobuf)
+ iobuf_unref (iobuf);
return ret;
-}
+out:
+ rpcreq.rpc_status = -1;
+
+ cbkfn (&rpcreq, NULL, 0, frame);
+
+ if (new_iobref)
+ iobref_unref (new_iobref);
+
+ if (iobuf)
+ iobuf_unref (iobuf);
+
+ return 0;
+}
int32_t
client_forget (xlator_t *this, inode_t *inode)
@@ -123,23 +477,21 @@ client_releasedir (xlator_t *this, fd_t *fd)
clnt_conf_t *conf = NULL;
rpc_clnt_procedure_t *proc = NULL;
clnt_args_t args = {0,};
- call_frame_t *frame = NULL;
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
args.fd = fd;
proc = &conf->fops->proctable[GF_FOP_RELEASEDIR];
if (proc->fn) {
- frame = create_frame (this, this->ctx->pool);
- if (!frame) {
- goto out;
- }
- ret = proc->fn (frame, this, &args);
+ ret = proc->fn (NULL, this, &args);
}
out:
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ PC_MSG_DIR_OP_FAILED, "releasedir fop failed");
return 0;
}
@@ -150,29 +502,27 @@ client_release (xlator_t *this, fd_t *fd)
clnt_conf_t *conf = NULL;
rpc_clnt_procedure_t *proc = NULL;
clnt_args_t args = {0,};
- call_frame_t *frame = NULL;
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
args.fd = fd;
proc = &conf->fops->proctable[GF_FOP_RELEASE];
if (proc->fn) {
- frame = create_frame (this, this->ctx->pool);
- if (!frame) {
- goto out;
- }
- ret = proc->fn (frame, this, &args);
+ ret = proc->fn (NULL, this, &args);
}
out:
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_FILE_OP_FAILED,
+ "release fop failed");
return 0;
}
int32_t
client_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *xattr_req)
+ dict_t *xdata)
{
int ret = -1;
clnt_conf_t *conf = NULL;
@@ -180,11 +530,11 @@ client_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
clnt_args_t args = {0,};
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
args.loc = loc;
- args.dict = xattr_req;
+ args.xdata = xdata;
proc = &conf->fops->proctable[GF_FOP_LOOKUP];
if (proc->fn)
@@ -200,7 +550,7 @@ out:
int32_t
-client_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
+client_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
int ret = -1;
clnt_conf_t *conf = NULL;
@@ -208,25 +558,26 @@ client_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
clnt_args_t args = {0,};
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
args.loc = loc;
+ args.xdata = xdata;
proc = &conf->fops->proctable[GF_FOP_STAT];
if (proc->fn)
ret = proc->fn (frame, this, &args);
out:
if (ret)
- STACK_UNWIND_STRICT (stat, frame, -1, ENOTCONN, NULL);
-
+ STACK_UNWIND_STRICT (stat, frame, -1, ENOTCONN, NULL, NULL);
return 0;
}
int32_t
-client_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
+client_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ off_t offset, dict_t *xdata)
{
int ret = -1;
clnt_conf_t *conf = NULL;
@@ -234,18 +585,19 @@ client_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
clnt_args_t args = {0,};
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
args.loc = loc;
args.offset = offset;
+ args.xdata = xdata;
proc = &conf->fops->proctable[GF_FOP_TRUNCATE];
if (proc->fn)
ret = proc->fn (frame, this, &args);
out:
if (ret)
- STACK_UNWIND_STRICT (truncate, frame, -1, ENOTCONN, NULL, NULL);
+ STACK_UNWIND_STRICT (truncate, frame, -1, ENOTCONN, NULL, NULL, NULL);
return 0;
@@ -253,7 +605,8 @@ out:
int32_t
-client_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
+client_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, dict_t *xdata)
{
int ret = -1;
clnt_conf_t *conf = NULL;
@@ -261,18 +614,19 @@ client_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
clnt_args_t args = {0,};
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
args.fd = fd;
args.offset = offset;
+ args.xdata = xdata;
proc = &conf->fops->proctable[GF_FOP_FTRUNCATE];
if (proc->fn)
ret = proc->fn (frame, this, &args);
out:
if (ret)
- STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOTCONN, NULL, NULL);
+ STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOTCONN, NULL, NULL, NULL);
return 0;
}
@@ -280,7 +634,8 @@ out:
int32_t
-client_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)
+client_access (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t mask, dict_t *xdata)
{
int ret = -1;
clnt_conf_t *conf = NULL;
@@ -288,18 +643,19 @@ client_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)
clnt_args_t args = {0,};
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
args.loc = loc;
args.mask = mask;
+ args.xdata = xdata;
proc = &conf->fops->proctable[GF_FOP_ACCESS];
if (proc->fn)
ret = proc->fn (frame, this, &args);
out:
if (ret)
- STACK_UNWIND_STRICT (access, frame, -1, ENOTCONN);
+ STACK_UNWIND_STRICT (access, frame, -1, ENOTCONN, NULL);
return 0;
}
@@ -308,7 +664,8 @@ out:
int32_t
-client_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size)
+client_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ size_t size, dict_t *xdata)
{
int ret = -1;
clnt_conf_t *conf = NULL;
@@ -316,27 +673,27 @@ client_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size)
clnt_args_t args = {0,};
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
args.loc = loc;
args.size = size;
+ args.xdata = xdata;
proc = &conf->fops->proctable[GF_FOP_READLINK];
if (proc->fn)
ret = proc->fn (frame, this, &args);
out:
if (ret)
- STACK_UNWIND_STRICT (readlink, frame, -1, ENOTCONN, NULL, NULL);
+ STACK_UNWIND_STRICT (readlink, frame, -1, ENOTCONN, NULL, NULL, NULL);
return 0;
}
-
-int32_t
+int
client_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev)
+ dev_t rdev, mode_t umask, dict_t *xdata)
{
int ret = -1;
clnt_conf_t *conf = NULL;
@@ -344,12 +701,14 @@ client_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
clnt_args_t args = {0,};
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
args.loc = loc;
args.mode = mode;
args.rdev = rdev;
+ args.umask = umask;
+ args.xdata = xdata;
proc = &conf->fops->proctable[GF_FOP_MKNOD];
if (proc->fn)
@@ -357,16 +716,15 @@ client_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
out:
if (ret)
STACK_UNWIND_STRICT (mknod, frame, -1, ENOTCONN,
- NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL);
return 0;
}
-
-int32_t
+int
client_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc,
- mode_t mode)
+ mode_t mode, mode_t umask, dict_t *xdata)
{
int ret = -1;
clnt_conf_t *conf = NULL;
@@ -374,11 +732,13 @@ client_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc,
clnt_args_t args = {0,};
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
- args.loc = loc;
+ args.loc = loc;
args.mode = mode;
+ args.umask = umask;
+ args.xdata = xdata;
proc = &conf->fops->proctable[GF_FOP_MKDIR];
if (proc->fn)
@@ -386,7 +746,7 @@ client_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc,
out:
if (ret)
STACK_UNWIND_STRICT (mkdir, frame, -1, ENOTCONN,
- NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -394,7 +754,8 @@ out:
int32_t
-client_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
+client_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int xflag, dict_t *xdata)
{
int ret = -1;
clnt_conf_t *conf = NULL;
@@ -402,10 +763,12 @@ client_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
clnt_args_t args = {0,};
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
args.loc = loc;
+ args.xdata = xdata;
+ args.flags = xflag;
proc = &conf->fops->proctable[GF_FOP_UNLINK];
if (proc->fn)
@@ -413,13 +776,14 @@ client_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
out:
if (ret)
STACK_UNWIND_STRICT (unlink, frame, -1, ENOTCONN,
- NULL, NULL);
+ NULL, NULL, NULL);
return 0;
}
int32_t
-client_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc)
+client_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
int ret = -1;
clnt_conf_t *conf = NULL;
@@ -427,10 +791,12 @@ client_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc)
clnt_args_t args = {0,};
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
args.loc = loc;
+ args.flags = flags;
+ args.xdata = xdata;
proc = &conf->fops->proctable[GF_FOP_RMDIR];
if (proc->fn)
@@ -439,16 +805,15 @@ out:
/* think of avoiding a missing frame */
if (ret)
STACK_UNWIND_STRICT (rmdir, frame, -1, ENOTCONN,
- NULL, NULL);
+ NULL, NULL, NULL);
return 0;
}
-
-int32_t
+int
client_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
- loc_t *loc)
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
int ret = -1;
clnt_conf_t *conf = NULL;
@@ -456,11 +821,13 @@ client_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
clnt_args_t args = {0,};
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
args.linkname = linkpath;
args.loc = loc;
+ args.umask = umask;
+ args.xdata = xdata;
proc = &conf->fops->proctable[GF_FOP_SYMLINK];
if (proc->fn)
@@ -468,7 +835,7 @@ client_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
out:
if (ret)
STACK_UNWIND_STRICT (symlink, frame, -1, ENOTCONN,
- NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -477,7 +844,7 @@ out:
int32_t
client_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc)
+ loc_t *newloc, dict_t *xdata)
{
int ret = -1;
clnt_conf_t *conf = NULL;
@@ -485,18 +852,20 @@ client_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
clnt_args_t args = {0,};
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
args.oldloc = oldloc;
args.newloc = newloc;
+ args.xdata = xdata;
+
proc = &conf->fops->proctable[GF_FOP_RENAME];
if (proc->fn)
ret = proc->fn (frame, this, &args);
out:
if (ret)
STACK_UNWIND_STRICT (rename, frame, -1, ENOTCONN,
- NULL, NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -505,7 +874,7 @@ out:
int32_t
client_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc)
+ loc_t *newloc, dict_t *xdata)
{
int ret = -1;
clnt_conf_t *conf = NULL;
@@ -513,11 +882,12 @@ client_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
clnt_args_t args = {0,};
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
args.oldloc = oldloc;
args.newloc = newloc;
+ args.xdata = xdata;
proc = &conf->fops->proctable[GF_FOP_LINK];
if (proc->fn)
@@ -525,7 +895,7 @@ client_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
out:
if (ret)
STACK_UNWIND_STRICT (link, frame, -1, ENOTCONN,
- NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -533,8 +903,8 @@ out:
int32_t
-client_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, mode_t mode, fd_t *fd)
+client_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
int ret = -1;
clnt_conf_t *conf = NULL;
@@ -542,13 +912,17 @@ client_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
clnt_args_t args = {0,};
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
args.loc = loc;
- args.flags = flags;
args.mode = mode;
args.fd = fd;
+ args.umask = umask;
+ args.xdata = xdata;
+ args.flags = flags;
+
+ client_filter_o_direct (conf, &args.flags);
proc = &conf->fops->proctable[GF_FOP_CREATE];
if (proc->fn)
@@ -556,7 +930,7 @@ client_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
out:
if (ret)
STACK_UNWIND_STRICT (create, frame, -1, ENOTCONN,
- NULL, NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -565,7 +939,7 @@ out:
int32_t
client_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, fd_t *fd, int32_t wbflags)
+ int32_t flags, fd_t *fd, dict_t *xdata)
{
int ret = -1;
clnt_conf_t *conf = NULL;
@@ -573,13 +947,15 @@ client_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
clnt_args_t args = {0,};
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
args.loc = loc;
- args.flags = flags;
args.fd = fd;
- args.wbflags = wbflags;
+ args.xdata = xdata;
+ args.flags = flags;
+
+ client_filter_o_direct (conf, &args.flags);
proc = &conf->fops->proctable[GF_FOP_OPEN];
if (proc->fn)
@@ -587,7 +963,7 @@ client_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
out:
if (ret)
- STACK_UNWIND_STRICT (open, frame, -1, ENOTCONN, NULL);
+ STACK_UNWIND_STRICT (open, frame, -1, ENOTCONN, NULL, NULL);
return 0;
}
@@ -596,7 +972,7 @@ out:
int32_t
client_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+ off_t offset, uint32_t flags, dict_t *xdata)
{
int ret = -1;
clnt_conf_t *conf = NULL;
@@ -604,12 +980,16 @@ client_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
clnt_args_t args = {0,};
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
args.fd = fd;
args.size = size;
args.offset = offset;
+ args.flags = flags;
+ args.xdata = xdata;
+
+ client_filter_o_direct (conf, &args.flags);
proc = &conf->fops->proctable[GF_FOP_READ];
if (proc->fn)
@@ -618,7 +998,7 @@ client_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
out:
if (ret)
STACK_UNWIND_STRICT (readv, frame, -1, ENOTCONN,
- NULL, 0, NULL, NULL);
+ NULL, 0, NULL, NULL, NULL);
return 0;
}
@@ -629,7 +1009,7 @@ out:
int32_t
client_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int32_t count, off_t off,
- struct iobref *iobref)
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
int ret = -1;
clnt_conf_t *conf = NULL;
@@ -637,29 +1017,33 @@ client_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
clnt_args_t args = {0,};
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
args.fd = fd;
args.vector = vector;
args.count = count;
args.offset = off;
+ args.size = iov_length (vector, count);
+ args.flags = flags;
args.iobref = iobref;
+ args.xdata = xdata;
+
+ client_filter_o_direct (conf, &args.flags);
proc = &conf->fops->proctable[GF_FOP_WRITE];
if (proc->fn)
ret = proc->fn (frame, this, &args);
out:
if (ret)
- STACK_UNWIND_STRICT (writev, frame, -1, ENOTCONN, NULL, NULL);
+ STACK_UNWIND_STRICT (writev, frame, -1, ENOTCONN, NULL, NULL, NULL);
return 0;
}
-
int32_t
-client_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
+client_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
int ret = -1;
clnt_conf_t *conf = NULL;
@@ -667,17 +1051,18 @@ client_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
clnt_args_t args = {0,};
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
args.fd = fd;
+ args.xdata = xdata;
proc = &conf->fops->proctable[GF_FOP_FLUSH];
if (proc->fn)
ret = proc->fn (frame, this, &args);
out:
if (ret)
- STACK_UNWIND_STRICT (flush, frame, -1, ENOTCONN);
+ STACK_UNWIND_STRICT (flush, frame, -1, ENOTCONN, NULL);
return 0;
}
@@ -686,7 +1071,7 @@ out:
int32_t
client_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
int ret = -1;
clnt_conf_t *conf = NULL;
@@ -694,18 +1079,19 @@ client_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
clnt_args_t args = {0,};
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
args.fd = fd;
args.flags = flags;
+ args.xdata = xdata;
proc = &conf->fops->proctable[GF_FOP_FSYNC];
if (proc->fn)
ret = proc->fn (frame, this, &args);
out:
if (ret)
- STACK_UNWIND_STRICT (fsync, frame, -1, ENOTCONN, NULL, NULL);
+ STACK_UNWIND_STRICT (fsync, frame, -1, ENOTCONN, NULL, NULL, NULL);
return 0;
}
@@ -713,7 +1099,7 @@ out:
int32_t
-client_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)
+client_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
int ret = -1;
clnt_conf_t *conf = NULL;
@@ -721,17 +1107,18 @@ client_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)
clnt_args_t args = {0,};
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
args.fd = fd;
+ args.xdata = xdata;
proc = &conf->fops->proctable[GF_FOP_FSTAT];
if (proc->fn)
ret = proc->fn (frame, this, &args);
out:
if (ret)
- STACK_UNWIND_STRICT (fstat, frame, -1, ENOTCONN, NULL);
+ STACK_UNWIND_STRICT (fstat, frame, -1, ENOTCONN, NULL, NULL);
return 0;
}
@@ -739,7 +1126,8 @@ out:
int32_t
-client_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
+client_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
{
int ret = -1;
clnt_conf_t *conf = NULL;
@@ -747,18 +1135,19 @@ client_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
clnt_args_t args = {0,};
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
args.loc = loc;
args.fd = fd;
+ args.xdata = xdata;
proc = &conf->fops->proctable[GF_FOP_OPENDIR];
if (proc->fn)
ret = proc->fn (frame, this, &args);
out:
if (ret)
- STACK_UNWIND_STRICT (opendir, frame, -1, ENOTCONN, NULL);
+ STACK_UNWIND_STRICT (opendir, frame, -1, ENOTCONN, NULL, NULL);
return 0;
}
@@ -766,7 +1155,7 @@ out:
int32_t
-client_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
+client_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata)
{
int ret = -1;
clnt_conf_t *conf = NULL;
@@ -774,18 +1163,19 @@ client_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
clnt_args_t args = {0,};
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
args.fd = fd;
args.flags = flags;
+ args.xdata = xdata;
proc = &conf->fops->proctable[GF_FOP_FSYNCDIR];
if (proc->fn)
ret = proc->fn (frame, this, &args);
out:
if (ret)
- STACK_UNWIND_STRICT (fsyncdir, frame, -1, ENOTCONN);
+ STACK_UNWIND_STRICT (fsyncdir, frame, -1, ENOTCONN, NULL);
return 0;
}
@@ -793,7 +1183,7 @@ out:
int32_t
-client_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
+client_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
int ret = -1;
clnt_conf_t *conf = NULL;
@@ -801,46 +1191,207 @@ client_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
clnt_args_t args = {0,};
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
args.loc = loc;
+ args.xdata = xdata;
proc = &conf->fops->proctable[GF_FOP_STATFS];
if (proc->fn)
ret = proc->fn (frame, this, &args);
out:
if (ret)
- STACK_UNWIND_STRICT (statfs, frame, -1, ENOTCONN, NULL);
+ STACK_UNWIND_STRICT (statfs, frame, -1, ENOTCONN, NULL, NULL);
return 0;
}
+static gf_boolean_t
+is_client_rpc_init_command (dict_t *dict, xlator_t *this,
+ char **value)
+{
+ gf_boolean_t ret = _gf_false;
+ int dict_ret = -1;
+
+ if (!strstr (this->name, "replace-brick")) {
+ gf_msg_trace (this->name, 0, "name is !replace-brick");
+ goto out;
+ }
+ dict_ret = dict_get_str (dict, CLIENT_CMD_CONNECT, value);
+ if (dict_ret) {
+ gf_msg_trace (this->name, 0, "key %s not present",
+ CLIENT_CMD_CONNECT);
+ goto out;
+ }
+
+ ret = _gf_true;
+
+out:
+ return ret;
+
+}
+
+static gf_boolean_t
+is_client_rpc_destroy_command (dict_t *dict, xlator_t *this)
+{
+ gf_boolean_t ret = _gf_false;
+ int dict_ret = -1;
+ char *dummy = NULL;
+
+ if (strncmp (this->name, "replace-brick", 13)) {
+ gf_msg_trace (this->name, 0, "name is !replace-brick");
+ goto out;
+ }
+
+ dict_ret = dict_get_str (dict, CLIENT_CMD_DISCONNECT, &dummy);
+ if (dict_ret) {
+ gf_msg_trace (this->name, 0, "key %s not present",
+ CLIENT_CMD_DISCONNECT);
+ goto out;
+ }
+
+ ret = _gf_true;
+
+out:
+ return ret;
+
+}
+
+static gf_boolean_t
+client_set_remote_options (char *value, xlator_t *this)
+{
+ char *dup_value = NULL;
+ char *host = NULL;
+ char *subvol = NULL;
+ char *host_dup = NULL;
+ char *subvol_dup = NULL;
+ char *remote_port_str = NULL;
+ char *tmp = NULL;
+ int remote_port = 0;
+ gf_boolean_t ret = _gf_false;
+
+ dup_value = gf_strdup (value);
+ host = strtok_r (dup_value, ":", &tmp);
+ subvol = strtok_r (NULL, ":", &tmp);
+ remote_port_str = strtok_r (NULL, ":", &tmp);
+
+ if (!subvol) {
+ gf_msg (this->name, GF_LOG_WARNING, EINVAL,
+ PC_MSG_INVALID_ENTRY, "proper value not passed as "
+ "subvolume");
+ goto out;
+ }
+
+ host_dup = gf_strdup (host);
+ if (!host_dup) {
+ goto out;
+ }
+
+ ret = dict_set_dynstr (this->options, "remote-host", host_dup);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_DICT_SET_FAILED,
+ "failed to set remote-host with %s", host);
+ goto out;
+ }
+
+ subvol_dup = gf_strdup (subvol);
+ if (!subvol_dup) {
+ goto out;
+ }
+
+ ret = dict_set_dynstr (this->options, "remote-subvolume", subvol_dup);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_DICT_SET_FAILED,
+ "failed to set remote-host with %s", host);
+ goto out;
+ }
+
+ remote_port = atoi (remote_port_str);
+ GF_ASSERT (remote_port);
+
+ ret = dict_set_int32 (this->options, "remote-port",
+ remote_port);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, PC_MSG_DICT_SET_FAILED,
+ "failed to set remote-port to %d", remote_port);
+ goto out;
+ }
+
+ ret = _gf_true;
+out:
+ GF_FREE (dup_value);
+
+ return ret;
+}
int32_t
client_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
- int ret = -1;
- clnt_conf_t *conf = NULL;
- rpc_clnt_procedure_t *proc = NULL;
- clnt_args_t args = {0,};
+ int ret = -1;
+ int op_ret = -1;
+ int op_errno = ENOTCONN;
+ int need_unwind = 0;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+ char *value = NULL;
+
+
+ if (is_client_rpc_init_command (dict, this, &value) == _gf_true) {
+ GF_ASSERT (value);
+ gf_msg (this->name, GF_LOG_INFO, 0, PC_MSG_RPC_INIT,
+ "client rpc init command");
+ ret = client_set_remote_options (value, this);
+ if (ret) {
+ (void) client_destroy_rpc (this);
+ ret = client_init_rpc (this);
+ }
+
+ if (!ret) {
+ op_ret = 0;
+ op_errno = 0;
+ }
+ need_unwind = 1;
+ goto out;
+ }
+
+ if (is_client_rpc_destroy_command (dict, this) == _gf_true) {
+ gf_msg (this->name, GF_LOG_INFO, 0, PC_MSG_RPC_DESTROY,
+ "client rpc destroy command");
+ ret = client_destroy_rpc (this);
+ if (ret) {
+ op_ret = 0;
+ op_errno = 0;
+ }
+ need_unwind = 1;
+ goto out;
+ }
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops) {
+ op_errno = ENOTCONN;
+ need_unwind = 1;
goto out;
+ }
args.loc = loc;
- args.dict = dict;
+ args.xattr = dict;
args.flags = flags;
+ args.xdata = xdata;
proc = &conf->fops->proctable[GF_FOP_SETXATTR];
- if (proc->fn)
+ if (proc->fn) {
ret = proc->fn (frame, this, &args);
+ if (ret) {
+ need_unwind = 1;
+ }
+ }
out:
- if (ret)
- STACK_UNWIND_STRICT (setxattr, frame, -1, ENOTCONN);
+ if (need_unwind)
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, NULL);
return 0;
}
@@ -849,7 +1400,7 @@ out:
int32_t
client_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- dict_t *dict, int32_t flags)
+ dict_t *dict, int32_t flags, dict_t *xdata)
{
int ret = -1;
clnt_conf_t *conf = NULL;
@@ -857,19 +1408,20 @@ client_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
clnt_args_t args = {0,};
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
- args.fd = fd;
- args.dict = dict;
+ args.fd = fd;
+ args.xattr = dict;
args.flags = flags;
+ args.xdata = xdata;
proc = &conf->fops->proctable[GF_FOP_FSETXATTR];
if (proc->fn)
ret = proc->fn (frame, this, &args);
out:
if (ret)
- STACK_UNWIND_STRICT (fsetxattr, frame, -1, ENOTCONN);
+ STACK_UNWIND_STRICT (fsetxattr, frame, -1, ENOTCONN, NULL);
return 0;
}
@@ -879,7 +1431,7 @@ out:
int32_t
client_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name)
+ const char *name, dict_t *xdata)
{
int ret = -1;
clnt_conf_t *conf = NULL;
@@ -887,18 +1439,19 @@ client_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
clnt_args_t args = {0,};
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
args.fd = fd;
args.name = name;
+ args.xdata = xdata;
proc = &conf->fops->proctable[GF_FOP_FGETXATTR];
if (proc->fn)
ret = proc->fn (frame, this, &args);
out:
if (ret)
- STACK_UNWIND_STRICT (fgetxattr, frame, -1, ENOTCONN, NULL);
+ STACK_UNWIND_STRICT (fgetxattr, frame, -1, ENOTCONN, NULL, NULL);
return 0;
}
@@ -907,7 +1460,7 @@ out:
int32_t
client_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
+ const char *name, dict_t *xdata)
{
int ret = -1;
clnt_conf_t *conf = NULL;
@@ -915,18 +1468,19 @@ client_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
clnt_args_t args = {0,};
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
args.name = name;
args.loc = loc;
+ args.xdata = xdata;
proc = &conf->fops->proctable[GF_FOP_GETXATTR];
if (proc->fn)
ret = proc->fn (frame, this, &args);
out:
if (ret)
- STACK_UNWIND_STRICT (getxattr, frame, -1, ENOTCONN, NULL);
+ STACK_UNWIND_STRICT (getxattr, frame, -1, ENOTCONN, NULL, NULL);
return 0;
}
@@ -935,7 +1489,7 @@ out:
int32_t
client_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict)
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
int ret = -1;
clnt_conf_t *conf = NULL;
@@ -943,19 +1497,20 @@ client_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
clnt_args_t args = {0,};
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
args.loc = loc;
args.flags = flags;
- args.dict = dict;
+ args.xattr = dict;
+ args.xdata = xdata;
proc = &conf->fops->proctable[GF_FOP_XATTROP];
if (proc->fn)
ret = proc->fn (frame, this, &args);
out:
if (ret)
- STACK_UNWIND_STRICT (xattrop, frame, -1, ENOTCONN, NULL);
+ STACK_UNWIND_STRICT (xattrop, frame, -1, ENOTCONN, NULL, NULL);
return 0;
}
@@ -964,7 +1519,7 @@ out:
int32_t
client_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t flags, dict_t *dict)
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
int ret = -1;
clnt_conf_t *conf = NULL;
@@ -972,19 +1527,20 @@ client_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
clnt_args_t args = {0,};
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
args.fd = fd;
args.flags = flags;
- args.dict = dict;
+ args.xattr = dict;
+ args.xdata = xdata;
proc = &conf->fops->proctable[GF_FOP_FXATTROP];
if (proc->fn)
ret = proc->fn (frame, this, &args);
out:
if (ret)
- STACK_UNWIND_STRICT (fxattrop, frame, -1, ENOTCONN, NULL);
+ STACK_UNWIND_STRICT (fxattrop, frame, -1, ENOTCONN, NULL, NULL);
return 0;
}
@@ -993,7 +1549,7 @@ out:
int32_t
client_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
+ const char *name, dict_t *xdata)
{
int ret = -1;
clnt_conf_t *conf = NULL;
@@ -1001,26 +1557,81 @@ client_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
clnt_args_t args = {0,};
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
args.name = name;
args.loc = loc;
+ args.xdata = xdata;
proc = &conf->fops->proctable[GF_FOP_REMOVEXATTR];
if (proc->fn)
ret = proc->fn (frame, this, &args);
out:
if (ret)
- STACK_UNWIND_STRICT (removexattr, frame, -1, ENOTCONN);
+ STACK_UNWIND_STRICT (removexattr, frame, -1, ENOTCONN, NULL);
return 0;
}
+int32_t
+client_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.name = name;
+ args.fd = fd;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_FREMOVEXATTR];
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (fremovexattr, frame, -1, ENOTCONN, NULL);
+
+ return 0;
+}
+
+int32_t
+client_lease (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct gf_lease *lease, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.loc = loc;
+ args.lease = lease;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_LEASE];
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (lk, frame, -1, ENOTCONN, NULL, NULL);
+
+ return 0;
+}
+
int32_t
client_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
- struct flock *lock)
+ struct gf_flock *lock, dict_t *xdata)
{
int ret = -1;
clnt_conf_t *conf = NULL;
@@ -1028,19 +1639,20 @@ client_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
clnt_args_t args = {0,};
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
args.fd = fd;
args.cmd = cmd;
args.flock = lock;
+ args.xdata = xdata;
proc = &conf->fops->proctable[GF_FOP_LK];
if (proc->fn)
ret = proc->fn (frame, this, &args);
out:
if (ret)
- STACK_UNWIND_STRICT (lk, frame, -1, ENOTCONN, NULL);
+ STACK_UNWIND_STRICT (lk, frame, -1, ENOTCONN, NULL, NULL);
return 0;
}
@@ -1048,7 +1660,7 @@ out:
int32_t
client_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
- loc_t *loc, int32_t cmd, struct flock *lock)
+ loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
int ret = -1;
clnt_conf_t *conf = NULL;
@@ -1056,20 +1668,21 @@ client_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
clnt_args_t args = {0,};
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
args.loc = loc;
args.cmd = cmd;
args.flock = lock;
args.volume = volume;
+ args.xdata = xdata;
proc = &conf->fops->proctable[GF_FOP_INODELK];
if (proc->fn)
ret = proc->fn (frame, this, &args);
out:
if (ret)
- STACK_UNWIND_STRICT (inodelk, frame, -1, ENOTCONN);
+ STACK_UNWIND_STRICT (inodelk, frame, -1, ENOTCONN, NULL);
return 0;
}
@@ -1078,7 +1691,7 @@ out:
int32_t
client_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
- fd_t *fd, int32_t cmd, struct flock *lock)
+ fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
int ret = -1;
clnt_conf_t *conf = NULL;
@@ -1086,20 +1699,21 @@ client_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
clnt_args_t args = {0,};
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
args.fd = fd;
args.cmd = cmd;
args.flock = lock;
args.volume = volume;
+ args.xdata = xdata;
proc = &conf->fops->proctable[GF_FOP_FINODELK];
if (proc->fn)
ret = proc->fn (frame, this, &args);
out:
if (ret)
- STACK_UNWIND_STRICT (finodelk, frame, -1, ENOTCONN);
+ STACK_UNWIND_STRICT (finodelk, frame, -1, ENOTCONN, NULL);
return 0;
}
@@ -1108,7 +1722,7 @@ out:
int32_t
client_entrylk (call_frame_t *frame, xlator_t *this, const char *volume,
loc_t *loc, const char *basename, entrylk_cmd cmd,
- entrylk_type type)
+ entrylk_type type, dict_t *xdata)
{
int ret = -1;
clnt_conf_t *conf = NULL;
@@ -1116,7 +1730,7 @@ client_entrylk (call_frame_t *frame, xlator_t *this, const char *volume,
clnt_args_t args = {0,};
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
args.loc = loc;
@@ -1124,13 +1738,14 @@ client_entrylk (call_frame_t *frame, xlator_t *this, const char *volume,
args.type = type;
args.volume = volume;
args.cmd_entrylk = cmd;
+ args.xdata = xdata;
proc = &conf->fops->proctable[GF_FOP_ENTRYLK];
if (proc->fn)
ret = proc->fn (frame, this, &args);
out:
if (ret)
- STACK_UNWIND_STRICT (entrylk, frame, -1, ENOTCONN);
+ STACK_UNWIND_STRICT (entrylk, frame, -1, ENOTCONN, NULL);
return 0;
}
@@ -1140,7 +1755,7 @@ out:
int32_t
client_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume,
fd_t *fd, const char *basename, entrylk_cmd cmd,
- entrylk_type type)
+ entrylk_type type, dict_t *xdata)
{
int ret = -1;
clnt_conf_t *conf = NULL;
@@ -1148,7 +1763,7 @@ client_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume,
clnt_args_t args = {0,};
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
args.fd = fd;
@@ -1156,49 +1771,22 @@ client_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume,
args.type = type;
args.volume = volume;
args.cmd_entrylk = cmd;
+ args.xdata = xdata;
proc = &conf->fops->proctable[GF_FOP_FENTRYLK];
if (proc->fn)
ret = proc->fn (frame, this, &args);
out:
if (ret)
- STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOTCONN);
+ STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOTCONN, NULL);
return 0;
}
int32_t
-client_checksum (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flag)
-{
- int ret = -1;
- clnt_conf_t *conf = NULL;
- rpc_clnt_procedure_t *proc = NULL;
- clnt_args_t args = {0,};
-
- conf = this->private;
- if (!conf->fops)
- goto out;
-
- args.loc = loc;
- args.flags = flag;
-
- proc = &conf->fops->proctable[GF_FOP_CHECKSUM];
- if (proc->fn)
- ret = proc->fn (frame, this, &args);
-out:
- if (ret)
- STACK_UNWIND_STRICT (checksum, frame, -1, ENOTCONN, NULL, NULL);
-
- return 0;
-}
-
-
-
-int32_t
client_rchecksum (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- int32_t len)
+ int32_t len, dict_t *xdata)
{
int ret = -1;
clnt_conf_t *conf = NULL;
@@ -1206,26 +1794,27 @@ client_rchecksum (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
clnt_args_t args = {0,};
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
args.fd = fd;
args.offset = offset;
args.len = len;
+ args.xdata = xdata;
proc = &conf->fops->proctable[GF_FOP_RCHECKSUM];
if (proc->fn)
ret = proc->fn (frame, this, &args);
out:
if (ret)
- STACK_UNWIND_STRICT (rchecksum, frame, -1, ENOTCONN, 0, NULL);
+ STACK_UNWIND_STRICT (rchecksum, frame, -1, ENOTCONN, 0, NULL, NULL);
return 0;
}
int32_t
client_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t off)
+ size_t size, off_t off, dict_t *xdata)
{
int ret = -1;
clnt_conf_t *conf = NULL;
@@ -1233,19 +1822,23 @@ client_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
clnt_args_t args = {0,};
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
+ if (off != 0)
+ off = gf_dirent_orig_offset(this, off);
+
args.fd = fd;
args.size = size;
args.offset = off;
+ args.xdata = xdata;
proc = &conf->fops->proctable[GF_FOP_READDIR];
if (proc->fn)
ret = proc->fn (frame, this, &args);
out:
if (ret)
- STACK_UNWIND_STRICT (readdir, frame, -1, ENOTCONN, NULL);
+ STACK_UNWIND_STRICT (readdir, frame, -1, ENOTCONN, NULL, NULL);
return 0;
}
@@ -1253,7 +1846,7 @@ out:
int32_t
client_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t off)
+ size_t size, off_t off, dict_t *dict)
{
int ret = -1;
clnt_conf_t *conf = NULL;
@@ -1261,19 +1854,23 @@ client_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd,
clnt_args_t args = {0,};
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
+ if (off != 0)
+ off = gf_dirent_orig_offset(this, off);
+
args.fd = fd;
args.size = size;
args.offset = off;
+ args.xdata = dict;
proc = &conf->fops->proctable[GF_FOP_READDIRP];
if (proc->fn)
ret = proc->fn (frame, this, &args);
out:
if (ret)
- STACK_UNWIND_STRICT (readdirp, frame, -1, ENOTCONN, NULL);
+ STACK_UNWIND_STRICT (readdirp, frame, -1, ENOTCONN, NULL, NULL);
return 0;
}
@@ -1281,7 +1878,7 @@ out:
int32_t
client_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
int ret = -1;
clnt_conf_t *conf = NULL;
@@ -1289,26 +1886,27 @@ client_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
clnt_args_t args = {0,};
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
args.loc = loc;
args.stbuf = stbuf;
args.valid = valid;
+ args.xdata = xdata;
proc = &conf->fops->proctable[GF_FOP_SETATTR];
if (proc->fn)
ret = proc->fn (frame, this, &args);
out:
if (ret)
- STACK_UNWIND_STRICT (setattr, frame, -1, ENOTCONN, NULL, NULL);
+ STACK_UNWIND_STRICT (setattr, frame, -1, ENOTCONN, NULL, NULL, NULL);
return 0;
}
int32_t
client_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
int ret = -1;
clnt_conf_t *conf = NULL;
@@ -1316,23 +1914,220 @@ client_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
clnt_args_t args = {0,};
conf = this->private;
- if (!conf->fops)
+ if (!conf || !conf->fops)
goto out;
args.fd = fd;
args.stbuf = stbuf;
args.valid = valid;
+ args.xdata = xdata;
proc = &conf->fops->proctable[GF_FOP_FSETATTR];
if (proc->fn)
ret = proc->fn (frame, this, &args);
out:
if (ret)
- STACK_UNWIND_STRICT (fsetattr, frame, -1, ENOTCONN, NULL, NULL);
+ STACK_UNWIND_STRICT (fsetattr, frame, -1, ENOTCONN, NULL, NULL, NULL);
return 0;
}
+int32_t
+client_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.fd = fd;
+ args.flags = mode;
+ args.offset = offset;
+ args.size = len;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_FALLOCATE];
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (fallocate, frame, -1, ENOTCONN, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+client_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.fd = fd;
+ args.offset = offset;
+ args.size = len;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_DISCARD];
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT(discard, frame, -1, ENOTCONN, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+client_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.fd = fd;
+ args.offset = offset;
+ args.size = len;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_ZEROFILL];
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT(zerofill, frame, -1, ENOTCONN,
+ NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+client_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.cmd = op;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_IPC];
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT(ipc, frame, -1, ENOTCONN, NULL);
+
+ return 0;
+}
+
+
+int32_t
+client_seek (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.fd = fd;
+ args.offset = offset;
+ args.what = what;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_SEEK];
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT(seek, frame, -1, ENOTCONN, 0, NULL);
+
+ return 0;
+}
+
+int32_t
+client_getactivelk (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+
+ args.loc = loc;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_GETACTIVELK];
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (getactivelk, frame, -1, ENOTCONN, NULL,
+ NULL);
+
+ return 0;
+}
+
+int32_t
+client_setactivelk (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ lock_migration_info_t *locklist, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+
+ args.loc = loc;
+ args.xdata = xdata;
+ args.locklist = locklist;
+
+ proc = &conf->fops->proctable[GF_FOP_SETACTIVELK];
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (setactivelk, frame, -1, ENOTCONN, NULL);
+
+ return 0;
+}
int32_t
client_getspec (call_frame_t *frame, xlator_t *this, const char *key,
@@ -1344,7 +2139,7 @@ client_getspec (call_frame_t *frame, xlator_t *this, const char *key,
clnt_args_t args = {0,};
conf = this->private;
- if (!conf->fops || !conf->handshake)
+ if (!conf || !conf->fops || !conf->handshake)
goto out;
args.name = key;
@@ -1364,7 +2159,33 @@ out:
}
- int
+int32_t
+client_compound (call_frame_t *frame, xlator_t *this,
+ void *data, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ compound_args_t *args = data;
+ rpc_clnt_procedure_t *proc = NULL;
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args->xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_COMPOUND];
+ if (proc->fn)
+ ret = proc->fn (frame, this, args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (compound, frame, -1, ENOTCONN,
+ NULL, NULL);
+
+ return 0;
+}
+
+int
client_mark_fd_bad (xlator_t *this)
{
clnt_conf_t *conf = NULL;
@@ -1395,42 +2216,125 @@ client_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
int ret = 0;
this = mydata;
+ if (!this || !this->private) {
+ gf_msg ("client", GF_LOG_ERROR, EINVAL, PC_MSG_INVALID_ENTRY,
+ (this != NULL) ?
+ "private structure of the xlator is NULL":
+ "xlator is NULL");
+ goto out;
+ }
+
conf = this->private;
switch (event) {
case RPC_CLNT_CONNECT:
{
+ conf->connected = 1;
// connect happened, send 'get_supported_versions' mop
- ret = dict_get_str (this->options, "disable-handshake",
- &handshake);
- gf_log (this->name, GF_LOG_TRACE, "got RPC_CLNT_CONNECT");
+ gf_msg_debug (this->name, 0, "got RPC_CLNT_CONNECT");
- if ((ret < 0) || (strcasecmp (handshake, "on"))) {
- ret = client_handshake (this, conf->rpc);
+ ret = client_handshake (this, rpc);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ PC_MSG_HANDSHAKE_RETURN, "handshake "
+ "msg returned %d", ret);
- } else {
- //conf->rpc->connected = 1;
- ret = default_notify (this, GF_EVENT_CHILD_UP, NULL);
+ /* Cancel grace timer if set */
+ pthread_mutex_lock (&conf->lock);
+ {
+ conf->grace_timer_needed = _gf_true;
+
+ if (conf->grace_timer) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ PC_MSG_GRACE_TIMER_CANCELLED,
+ "Cancelling the grace timer");
+
+ gf_timer_call_cancel (this->ctx,
+ conf->grace_timer);
+
+ conf->grace_timer = NULL;
+ }
}
+ pthread_mutex_unlock (&conf->lock);
+
break;
}
case RPC_CLNT_DISCONNECT:
+ gf_msg_debug (this->name, 0, "got RPC_CLNT_DISCONNECT");
+
+ if (!conf->lk_heal)
+ client_mark_fd_bad (this);
+ else
+ client_register_grace_timer (this, conf);
+
+ if (!conf->skip_notify) {
+ if (conf->connected) {
+ if (!conf->disconnect_err_logged) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ PC_MSG_CLIENT_DISCONNECTED,
+ "disconnected from %s. Client "
+ "process will keep trying to "
+ "connect to glusterd until "
+ "brick's port is available",
+ conf->rpc->conn.name);
+ } else {
+ gf_msg_debug (this->name, 0,
+ "disconnected from %s. "
+ "Client process will keep"
+ " trying to connect to "
+ "glusterd until brick's "
+ "port is available",
+ conf->rpc->conn.name);
+ }
+ if (conf->portmap_err_logged)
+ conf->disconnect_err_logged = 1;
+ }
+ /* If the CHILD_DOWN event goes to parent xlator
+ multiple times, the logic of parent xlator notify
+ may get screwed up.. (eg. CHILD_MODIFIED event in
+ replicate), hence make sure events which are passed
+ to parent are genuine */
+ ret = client_notify_dispatch_uniq (this,
+ GF_EVENT_CHILD_DOWN,
+ NULL);
+ if (ret)
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ PC_MSG_CHILD_DOWN_NOTIFY_FAILED,
+ "CHILD_DOWN notify failed");
+
+ } else {
+ if (conf->connected)
+ gf_msg_debug (this->name, 0,
+ "disconnected (skipped notify)");
+ }
+
+ conf->connected = 0;
+ conf->skip_notify = 0;
- client_mark_fd_bad (this);
+ if (conf->quick_reconnect) {
+ conf->quick_reconnect = 0;
+ rpc_clnt_start (rpc);
- gf_log (this->name, GF_LOG_TRACE, "got RPC_CLNT_DISCONNECT");
+ } else {
+ rpc->conn.config.remote_port = 0;
+
+ }
+
+ break;
- default_notify (this, GF_EVENT_CHILD_DOWN, NULL);
+ case RPC_CLNT_DESTROY:
+ ret = client_fini_complete (this);
break;
default:
- gf_log (this->name, GF_LOG_TRACE,
- "got some other RPC event %d", event);
+ gf_msg_trace (this->name, 0,
+ "got some other RPC event %d", event);
break;
}
+out:
return 0;
}
@@ -1438,36 +2342,43 @@ client_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
int
notify (xlator_t *this, int32_t event, void *data, ...)
{
- clnt_conf_t *conf = NULL;
- void *trans = NULL;
+ clnt_conf_t *conf = NULL;
conf = this->private;
+ if (!conf)
+ return 0;
switch (event) {
case GF_EVENT_PARENT_UP:
{
- if (conf->rpc)
- trans = conf->rpc->conn.trans;
-
- if (!trans) {
- gf_log (this->name, GF_LOG_DEBUG,
- "transport init failed");
- return 0;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "got GF_EVENT_PARENT_UP, attempting connect "
+ gf_msg (this->name, GF_LOG_INFO, 0, PC_MSG_PARENT_UP,
+ "parent translators are ready, attempting connect "
"on transport");
- rpc_clnt_reconnect (trans);
+ rpc_clnt_start (conf->rpc);
+ break;
}
- break;
+
+ case GF_EVENT_PARENT_DOWN:
+ gf_msg (this->name, GF_LOG_INFO, 0, PC_MSG_PARENT_DOWN,
+ "current graph is no longer active, destroying "
+ "rpc_client ");
+
+ pthread_mutex_lock (&conf->lock);
+ {
+ conf->parent_down = 1;
+ }
+ pthread_mutex_unlock (&conf->lock);
+
+ rpc_clnt_disable (conf->rpc);
+ break;
default:
- gf_log (this->name, GF_LOG_DEBUG,
- "got %d, calling default_notify ()", event);
+ gf_msg_debug (this->name, 0,
+ "got %d, calling default_notify ()", event);
default_notify (this, event, data);
+ conf->last_sent_event = event;
break;
}
@@ -1475,50 +2386,73 @@ notify (xlator_t *this, int32_t event, void *data, ...)
}
int
+client_check_remote_host (xlator_t *this, dict_t *options)
+{
+ char *remote_host = NULL;
+ int ret = -1;
+
+ ret = dict_get_str (options, "remote-host", &remote_host);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_INFO, EINVAL,
+ PC_MSG_DICT_GET_FAILED, "Remote host is not set. "
+ "Assuming the volfile server as remote host");
+
+ if (!this->ctx->cmd_args.volfile_server) {
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_DICT_GET_FAILED, "No remote host to "
+ "connect.");
+ goto out;
+ }
+
+ ret = dict_set_str (options, "remote-host",
+ this->ctx->cmd_args.volfile_server);
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ PC_MSG_DICT_GET_FAILED, "Failed to set the "
+ "remote host");
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
build_client_config (xlator_t *this, clnt_conf_t *conf)
{
- int ret = 0;
+ int ret = -1;
- ret = dict_get_str (this->options, "remote-subvolume",
- &conf->opt.remote_subvolume);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "option 'remote-subvolume' not given");
+ if (!conf)
goto out;
- }
- ret = dict_get_int32 (this->options, "frame-timeout",
- &conf->rpc_conf.rpc_timeout);
- if (ret >= 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "setting frame-timeout to %d",
- conf->rpc_conf.rpc_timeout);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "defaulting frame-timeout to 30mins");
- conf->rpc_conf.rpc_timeout = 1800;
- }
+ GF_OPTION_INIT ("frame-timeout", conf->rpc_conf.rpc_timeout,
+ int32, out);
- ret = dict_get_int32 (this->options, "remote-port",
- &conf->rpc_conf.remote_port);
- if (ret >= 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "remote-port is %d", conf->rpc_conf.remote_port);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "defaulting remote-port to 'auto'");
- }
+ GF_OPTION_INIT ("remote-port", conf->rpc_conf.remote_port,
+ int32, out);
- ret = dict_get_int32 (this->options, "ping-timeout",
- &conf->opt.ping_timeout);
- if (ret >= 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "setting ping-timeout to %d", conf->opt.ping_timeout);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "defaulting ping-timeout to 42");
- conf->opt.ping_timeout = GF_UNIVERSAL_ANSWER;
- }
+ GF_OPTION_INIT ("ping-timeout", conf->opt.ping_timeout,
+ int32, out);
+
+ GF_OPTION_INIT ("remote-subvolume", conf->opt.remote_subvolume,
+ path, out);
+ if (!conf->opt.remote_subvolume)
+ gf_msg (this->name, GF_LOG_WARNING, EINVAL,
+ PC_MSG_INVALID_ENTRY,
+ "option 'remote-subvolume' not given");
+
+ GF_OPTION_INIT ("filter-O_DIRECT", conf->filter_o_direct,
+ bool, out);
+
+ GF_OPTION_INIT ("send-gids", conf->send_gids, bool, out);
+
+ conf->client_id = glusterfs_leaf_position(this);
+
+ ret = client_check_remote_host (this, this->options);
+ if (ret)
+ goto out;
ret = 0;
out:
@@ -1537,14 +2471,209 @@ mem_acct_init (xlator_t *this)
ret = xlator_mem_acct_init (this, gf_client_mt_end + 1);
if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM, PC_MSG_NO_MEMORY,
+ "Memory accounting init failed");
return ret;
}
return ret;
}
+int
+client_destroy_rpc (xlator_t *this)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ if (conf->rpc) {
+ /* cleanup the saved-frames before last unref */
+ rpc_clnt_connection_cleanup (&conf->rpc->conn);
+
+ conf->rpc = rpc_clnt_unref (conf->rpc);
+ ret = 0;
+ gf_msg_debug (this->name, 0,
+ "Client rpc conn destroyed");
+ goto out;
+ }
+
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_RPC_INVALID_CALL,
+ "RPC destroy called on already destroyed "
+ "connection");
+
+out:
+ return ret;
+}
+
+int
+client_init_rpc (xlator_t *this)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (conf->rpc) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ PC_MSG_RPC_INITED_ALREADY, "client rpc already "
+ "init'ed");
+ ret = -1;
+ goto out;
+ }
+
+ conf->rpc = rpc_clnt_new (this->options, this, this->name, 0);
+ if (!conf->rpc) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, PC_MSG_RPC_INIT_FAILED,
+ "failed to initialize RPC");
+ goto out;
+ }
+
+ ret = rpc_clnt_register_notify (conf->rpc, client_rpc_notify, this);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, PC_MSG_RPC_NOTIFY_FAILED,
+ "failed to register notify");
+ goto out;
+ }
+
+ conf->handshake = &clnt_handshake_prog;
+ conf->dump = &clnt_dump_prog;
+
+ ret = rpcclnt_cbk_program_register (conf->rpc, &gluster_cbk_prog,
+ this);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, PC_MSG_RPC_CBK_FAILED,
+ "failed to register callback program");
+ goto out;
+ }
+
+ ret = 0;
+
+ gf_msg_debug (this->name, 0, "client init successful");
+out:
+ return ret;
+}
+
+
+int
+client_init_grace_timer (xlator_t *this, dict_t *options,
+ clnt_conf_t *conf)
+{
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, options, out);
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
+
+ GF_OPTION_RECONF ("lk-heal", conf->lk_heal, options, bool, out);
+
+ gf_msg_debug (this->name, 0, "lk-heal = %s",
+ (conf->lk_heal) ? "on" : "off");
+
+ GF_OPTION_RECONF ("grace-timeout", conf->grace_timeout,
+ options, uint32, out);
+
+ gf_msg_debug (this->name, 0, "Client grace timeout value = %d",
+ conf->grace_timeout);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+client_check_event_threads (xlator_t *this, clnt_conf_t *conf, int32_t old,
+ int32_t new)
+{
+ if (old == new)
+ return 0;
+
+ conf->event_threads = new;
+ return event_reconfigure_threads (this->ctx->event_pool,
+ conf->event_threads);
+}
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ clnt_conf_t *conf = NULL;
+ int ret = -1;
+ int subvol_ret = 0;
+ char *old_remote_subvol = NULL;
+ char *new_remote_subvol = NULL;
+ char *old_remote_host = NULL;
+ char *new_remote_host = NULL;
+ int32_t new_nthread = 0;
+ struct rpc_clnt_config rpc_config = {0,};
+
+ conf = this->private;
+
+ GF_OPTION_RECONF ("frame-timeout", conf->rpc_conf.rpc_timeout,
+ options, int32, out);
+
+ GF_OPTION_RECONF ("ping-timeout", rpc_config.ping_timeout,
+ options, int32, out);
+
+ GF_OPTION_RECONF ("event-threads", new_nthread, options,
+ int32, out);
+ ret = client_check_event_threads (this, conf, conf->event_threads,
+ new_nthread);
+ if (ret)
+ goto out;
+
+ ret = client_check_remote_host (this, options);
+ if (ret)
+ goto out;
+
+ subvol_ret = dict_get_str (this->options, "remote-host",
+ &old_remote_host);
+
+ if (subvol_ret == 0) {
+ subvol_ret = dict_get_str (options, "remote-host",
+ &new_remote_host);
+ if (subvol_ret == 0) {
+ if (strcmp (old_remote_host, new_remote_host)) {
+ ret = 1;
+ goto out;
+ }
+ }
+ }
+
+ subvol_ret = dict_get_str (this->options, "remote-subvolume",
+ &old_remote_subvol);
+
+ if (subvol_ret == 0) {
+ subvol_ret = dict_get_str (options, "remote-subvolume",
+ &new_remote_subvol);
+ if (subvol_ret == 0) {
+ if (strcmp (old_remote_subvol, new_remote_subvol)) {
+ ret = 1;
+ goto out;
+ }
+ }
+ }
+
+ /* Reconfiguring client xlator's @rpc with new frame-timeout
+ * and ping-timeout */
+ rpc_clnt_reconfig (conf->rpc, &rpc_config);
+
+ GF_OPTION_RECONF ("filter-O_DIRECT", conf->filter_o_direct,
+ options, bool, out);
+
+ GF_OPTION_RECONF ("send-gids", conf->send_gids, options, bool, out);
+
+ ret = client_init_grace_timer (this, options, conf);
+ if (ret)
+ goto out;
+
+ ret = 0;
+out:
+ return ret;
+
+}
+
int
init (xlator_t *this)
@@ -1552,17 +2681,16 @@ init (xlator_t *this)
int ret = -1;
clnt_conf_t *conf = NULL;
- /* */
if (this->children) {
- gf_log (this->name, GF_LOG_ERROR,
- "FATAL: client protocol translator cannot have any "
- "subvolumes");
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PC_MSG_INVALID_ENTRY, "FATAL: client protocol "
+ "translator cannot have any subvolumes");
goto out;
}
if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "Volume is dangling. ");
+ gf_msg (this->name, GF_LOG_WARNING, EINVAL,
+ PC_MSG_INVALID_ENTRY, "Volume is dangling. ");
}
conf = GF_CALLOC (1, sizeof (*conf), gf_client_mt_clnt_conf_t);
@@ -1572,23 +2700,54 @@ init (xlator_t *this)
pthread_mutex_init (&conf->lock, NULL);
INIT_LIST_HEAD (&conf->saved_fds);
- ret = build_client_config (this, conf);
+ conf->child_up = _gf_false;
+
+ /* Initialize parameters for lock self healing*/
+ conf->lk_version = 1;
+ conf->grace_timer = NULL;
+ conf->grace_timer_needed = _gf_true;
+
+ /* Set event threads to the configured default */
+ GF_OPTION_INIT("event-threads", conf->event_threads, int32, out);
+ ret = client_check_event_threads (this, conf, STARTING_EVENT_THREADS,
+ conf->event_threads);
if (ret)
goto out;
- conf->rpc = rpc_clnt_init (&conf->rpc_conf, this->options, this->ctx,
- this->name);
- if (!conf->rpc)
- goto out;
- conf->rpc->xid = 42; /* It should be enough random everytime :O */
- ret = rpc_clnt_register_notify (conf->rpc, client_rpc_notify, this);
+ ret = client_init_grace_timer (this, this->options, conf);
if (ret)
goto out;
- conf->handshake = &clnt_handshake_prog;
+ LOCK_INIT (&conf->rec_lock);
+
+ conf->last_sent_event = -1; /* To start with we don't have any events */
+
this->private = conf;
- ret = 0;
+ /* If it returns -1, then its a failure, if it returns +1 we need
+ have to understand that 'this' is subvolume of a xlator which,
+ will set the remote host and remote subvolume in a setxattr
+ call.
+ */
+
+ ret = build_client_config (this, conf);
+ if (ret == -1)
+ goto out;
+
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
+
+ this->local_pool = mem_pool_new (clnt_local_t, 64);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM, PC_MSG_NO_MEMORY,
+ "failed to create local_t's memory pool");
+ goto out;
+ }
+
+ ret = client_init_rpc (this);
out:
if (ret)
this->fini (this);
@@ -1602,48 +2761,91 @@ fini (xlator_t *this)
clnt_conf_t *conf = NULL;
conf = this->private;
- this->private = NULL;
+ if (!conf)
+ return;
+
+ conf->destroy = 1;
+ if (conf->rpc) {
+ /* cleanup the saved-frames before last unref */
+ rpc_clnt_connection_cleanup (&conf->rpc->conn);
+ rpc_clnt_unref (conf->rpc);
+ }
- if (conf) {
- if (conf->rpc)
- rpc_clnt_destroy (conf->rpc);
+ /* Saved Fds */
+ /* TODO: */
- /* Saved Fds */
- /* TODO: */
+ return;
+}
- pthread_mutex_destroy (&conf->lock);
+static void
+client_fd_lk_ctx_dump (xlator_t *this, fd_lk_ctx_t *lk_ctx, int nth_fd)
+{
+ gf_boolean_t use_try_lock = _gf_true;
+ int ret = -1;
+ int lock_no = 0;
+ fd_lk_ctx_t *lk_ctx_ref = NULL;
+ fd_lk_ctx_node_t *plock = NULL;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0,};
+
+ lk_ctx_ref = fd_lk_ctx_try_ref (lk_ctx);
+ if (!lk_ctx_ref)
+ return;
+
+ ret = client_fd_lk_list_empty (lk_ctx_ref, (use_try_lock = _gf_true));
+ if (ret != 0)
+ return;
- GF_FREE (conf);
+ ret = TRY_LOCK (&lk_ctx_ref->lock);
+ if (ret)
+ return;
+
+ gf_proc_dump_write ("------","------");
+
+ lock_no = 0;
+ list_for_each_entry (plock, &lk_ctx_ref->lk_list, next) {
+ snprintf (key, sizeof (key), "granted-posix-lock[%d]",
+ lock_no++);
+ gf_proc_dump_write (key, "owner = %s, cmd = %s "
+ "fl_type = %s, fl_start = %"
+ PRId64", fl_end = %"PRId64
+ ", user_flock: l_type = %s, "
+ "l_start = %"PRId64", l_len = %"PRId64,
+ lkowner_utoa (&plock->user_flock.l_owner),
+ get_lk_cmd (plock->cmd),
+ get_lk_type (plock->fl_type),
+ plock->fl_start, plock->fl_end,
+ get_lk_type (plock->user_flock.l_type),
+ plock->user_flock.l_start,
+ plock->user_flock.l_len);
}
- return;
+ gf_proc_dump_write ("------","------");
+
+ UNLOCK (&lk_ctx_ref->lock);
+ fd_lk_ctx_unref (lk_ctx_ref);
+
}
int
client_priv_dump (xlator_t *this)
{
- clnt_conf_t *conf = NULL;
- int ret = -1;
- clnt_fd_ctx_t *tmp = NULL;
- int i = 0;
- char key[GF_DUMP_MAX_BUF_LEN];
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ clnt_conf_t *conf = NULL;
+ int ret = -1;
+ clnt_fd_ctx_t *tmp = NULL;
+ int i = 0;
+ char key[GF_DUMP_MAX_BUF_LEN];
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ rpc_clnt_connection_t *conn = NULL;
if (!this)
return -1;
conf = this->private;
- if (!conf) {
- gf_log (this->name, GF_LOG_WARNING,
- "conf null in xlator");
+ if (!conf)
return -1;
- }
ret = pthread_mutex_trylock(&conf->lock);
- if (ret) {
- gf_log("", GF_LOG_WARNING, "Unable to lock client %s"
- " errno: %d", this->name, errno);
+ if (ret)
return -1;
- }
gf_proc_dump_build_key(key_prefix, "xlator.protocol.client",
"%s.priv", this->name);
@@ -1651,18 +2853,29 @@ client_priv_dump (xlator_t *this)
gf_proc_dump_add_section(key_prefix);
list_for_each_entry(tmp, &conf->saved_fds, sfd_pos) {
- gf_proc_dump_build_key(key, key_prefix,
- "fd.%d.remote_fd", ++i);
+ sprintf (key, "fd.%d.remote_fd", i);
gf_proc_dump_write(key, "%d", tmp->remote_fd);
+ client_fd_lk_ctx_dump (this, tmp->lk_ctx, i);
+ i++;
}
- gf_proc_dump_build_key(key, key_prefix, "connecting");
- gf_proc_dump_write(key, "%d", conf->connecting);
- gf_proc_dump_build_key(key, key_prefix, "last_sent");
- gf_proc_dump_write(key, "%s", ctime(&conf->last_sent.tv_sec));
- gf_proc_dump_build_key(key, key_prefix, "last_received");
- gf_proc_dump_write(key, "%s", ctime(&conf->last_received.tv_sec));
-
+ gf_proc_dump_write("connecting", "%d", conf->connecting);
+
+ gf_proc_dump_write ("connected", "%d", conf->connected);
+
+ if (conf->rpc) {
+ conn = &conf->rpc->conn;
+ gf_proc_dump_write("total_bytes_read", "%"PRIu64,
+ conn->trans->total_bytes_read);
+ gf_proc_dump_write("ping_timeout", "%"PRIu32,
+ conn->ping_timeout);
+ gf_proc_dump_write("total_bytes_written", "%"PRIu64,
+ conn->trans->total_bytes_write);
+ gf_proc_dump_write("ping_msgs_sent", "%"PRIu64,
+ conn->pingcnt);
+ gf_proc_dump_write("msgs_sent", "%"PRIu64,
+ conn->msgcnt);
+ }
pthread_mutex_unlock(&conf->lock);
return 0;
@@ -1672,26 +2885,13 @@ client_priv_dump (xlator_t *this)
int32_t
client_inodectx_dump (xlator_t *this, inode_t *inode)
{
- ino_t par = 0;
- uint64_t gen = 0;
- int ret = -1;
- char key[GF_DUMP_MAX_BUF_LEN];
-
if (!inode)
return -1;
if (!this)
return -1;
- ret = inode_ctx_get2 (inode, this, &par, &gen);
-
- if (ret != 0)
- return ret;
-
- gf_proc_dump_build_key(key, "xlator.protocol.client",
- "%s.inode.%ld.par",
- this->name,inode->ino);
- gf_proc_dump_write(key, "%ld, %ld", par, gen);
+ /*TODO*/
return 0;
}
@@ -1727,6 +2927,7 @@ struct xlator_fops fops = {
.fsetxattr = client_fsetxattr,
.fgetxattr = client_fgetxattr,
.removexattr = client_removexattr,
+ .fremovexattr = client_fremovexattr,
.opendir = client_opendir,
.readdir = client_readdir,
.readdirp = client_readdirp,
@@ -1741,13 +2942,21 @@ struct xlator_fops fops = {
.entrylk = client_entrylk,
.fentrylk = client_fentrylk,
.lookup = client_lookup,
- .checksum = client_checksum,
.rchecksum = client_rchecksum,
.xattrop = client_xattrop,
.fxattrop = client_fxattrop,
.setattr = client_setattr,
.fsetattr = client_fsetattr,
+ .fallocate = client_fallocate,
+ .discard = client_discard,
+ .zerofill = client_zerofill,
.getspec = client_getspec,
+ .ipc = client_ipc,
+ .seek = client_seek,
+ .lease = client_lease,
+ .compound = client_compound,
+ .getactivelk = client_getactivelk,
+ .setactivelk = client_setactivelk,
};
@@ -1766,12 +2975,15 @@ struct volume_options options[] = {
},
{ .key = {"transport-type"},
.value = {"tcp", "socket", "ib-verbs", "unix", "ib-sdp",
- "tcp/client", "ib-verbs/client"},
+ "tcp/client", "ib-verbs/client", "rdma"},
.type = GF_OPTION_TYPE_STR
},
{ .key = {"remote-host"},
.type = GF_OPTION_TYPE_INTERNET_ADDRESS
},
+ { .key = {"remote-port"},
+ .type = GF_OPTION_TYPE_INT,
+ },
{ .key = {"remote-subvolume"},
.type = GF_OPTION_TYPE_ANY
},
@@ -1780,11 +2992,68 @@ struct volume_options options[] = {
.type = GF_OPTION_TYPE_TIME,
.min = 0,
.max = 86400,
+ .default_value = "1800",
+ .description = "Time frame after which the (file) operation would be "
+ "declared as dead, if the server does not respond for "
+ "a particular (file) operation."
},
{ .key = {"ping-timeout"},
.type = GF_OPTION_TYPE_TIME,
- .min = 1,
+ .min = 0,
.max = 1013,
+ .default_value = "42",
+ .description = "Time duration for which the client waits to "
+ "check if the server is responsive."
+ },
+ { .key = {"client-bind-insecure"},
+ .type = GF_OPTION_TYPE_BOOL
+ },
+ { .key = {"lk-heal"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "When the connection to client is lost, server "
+ "cleans up all the locks held by the client. After "
+ "the connection is restored, the client reacquires "
+ "(heals) the fcntl locks released by the server."
+ },
+ { .key = {"grace-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 10,
+ .max = 1800,
+ .default_value = "10",
+ .description = "Specifies the duration for the lock state to be "
+ "maintained on the client after a network "
+ "disconnection. Range 10-1800 seconds."
+ },
+ {.key = {"tcp-window-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = GF_MIN_SOCKET_WINDOW_SIZE,
+ .max = GF_MAX_SOCKET_WINDOW_SIZE,
+ .description = "Specifies the window size for tcp socket."
+ },
+ { .key = {"filter-O_DIRECT"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "disable",
+ .description = "If enabled, in open/creat/readv/writev fops, "
+ "O_DIRECT flag will be filtered at the client protocol level so "
+ "server will still continue to cache the file. This works similar to "
+ "NFS's behavior of O_DIRECT. Anon-fds can choose to readv/writev "
+ "using O_DIRECT",
+ },
+ { .key = {"send-gids"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ },
+ { .key = {"event-threads"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 32,
+ .default_value = "2",
+ .description = "Specifies the number of event threads to execute "
+ "in parallel. Larger values would help process"
+ " responses faster, depending on available processing"
+ " power. Range 1-32 threads."
},
{ .key = {NULL} },
};
+
diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h
index 1422c7abeda..61409d1fc79 100644
--- a/xlators/protocol/client/src/client.h
+++ b/xlators/protocol/client/src/client.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CLIENT_H
@@ -28,7 +19,103 @@
#include "inode.h"
#include "client-mem-types.h"
#include "protocol-common.h"
-#include "glusterfs-xdr.h"
+#include "glusterfs3.h"
+#include "glusterfs3-xdr.h"
+#include "fd-lk.h"
+#include "defaults.h"
+#include "default-args.h"
+#include "client-messages.h"
+
+/* FIXME: Needs to be defined in a common file */
+#define CLIENT_CMD_CONNECT "trusted.glusterfs.client-connect"
+#define CLIENT_CMD_DISCONNECT "trusted.glusterfs.client-disconnect"
+#define CLIENT_DUMP_LOCKS "trusted.glusterfs.clientlk-dump"
+#define GF_MAX_SOCKET_WINDOW_SIZE (1 * GF_UNIT_MB)
+#define GF_MIN_SOCKET_WINDOW_SIZE (0)
+
+typedef enum {
+ GF_LK_HEAL_IN_PROGRESS,
+ GF_LK_HEAL_DONE,
+} lk_heal_state_t;
+
+typedef enum {
+ DEFAULT_REMOTE_FD = 0,
+ FALLBACK_TO_ANON_FD = 1
+} clnt_remote_fd_flags_t;
+
+#define CPD_REQ_FIELD(v,f) (v)->compound_req_u.compound_##f##_req
+#define CPD_RSP_FIELD(v,f) (v)->compound_rsp_u.compound_##f##_rsp
+
+#define CLIENT_POST_FOP(fop, this_rsp_u, this_args_cbk, params ...) \
+ do { \
+ gf_common_rsp *_this_rsp = &CPD_RSP_FIELD(this_rsp_u,fop); \
+ int _op_ret = 0; \
+ int _op_errno = 0; \
+ \
+ _op_ret = _this_rsp->op_ret; \
+ _op_errno = gf_error_to_errno (_this_rsp->op_errno); \
+ args_##fop##_cbk_store (this_args_cbk, _op_ret, _op_errno, \
+ params); \
+ } while (0)
+
+#define CLIENT_POST_FOP_TYPE(fop, this_rsp_u, this_args_cbk, params ...) \
+ do { \
+ gfs3_##fop##_rsp *_this_rsp = &CPD_RSP_FIELD(this_rsp_u,fop);\
+ int _op_ret = 0; \
+ int _op_errno = 0; \
+ \
+ _op_ret = _this_rsp->op_ret; \
+ _op_errno = gf_error_to_errno (_this_rsp->op_errno); \
+ args_##fop##_cbk_store (this_args_cbk, _op_ret, _op_errno, \
+ params); \
+ } while (0)
+
+#define CLIENT_PRE_FOP(fop, xl, compound_req, op_errno, label, params ...) \
+ do { \
+ gfs3_##fop##_req *_req = (gfs3_##fop##_req *) compound_req; \
+ int _ret = 0; \
+ \
+ _ret = client_pre_##fop (xl, _req, params); \
+ if (_ret < 0) { \
+ op_errno = -ret; \
+ goto label; \
+ } \
+ } while (0)
+
+#define CLIENT_COMPOUND_FOP_CLEANUP(curr_req, fop) \
+ do { \
+ gfs3_##fop##_req *_req = &CPD_REQ_FIELD(curr_req,fop); \
+ \
+ GF_FREE (_req->xdata.xdata_val); \
+ } while (0)
+
+#define CLIENT_GET_REMOTE_FD(xl, fd, flags, remote_fd, op_errno, label) \
+ do { \
+ int _ret = 0; \
+ _ret = client_get_remote_fd (xl, fd, flags, &remote_fd);\
+ if (_ret < 0) { \
+ op_errno = errno; \
+ goto label; \
+ } \
+ if (remote_fd == -1) { \
+ gf_msg (xl->name, GF_LOG_WARNING, EBADFD, \
+ PC_MSG_BAD_FD, " (%s) " \
+ "remote_fd is -1. EBADFD", \
+ uuid_utoa (fd->inode->gfid)); \
+ op_errno = EBADFD; \
+ goto label; \
+ } \
+ } while (0)
+
+#define CLIENT_STACK_UNWIND(op, frame, params ...) do { \
+ if (!frame) \
+ break; \
+ clnt_local_t *__local = frame->local; \
+ frame->local = NULL; \
+ STACK_UNWIND_STRICT (op, frame, params); \
+ client_local_wipe (__local); \
+ } while (0)
+
struct clnt_options {
char *remote_subvolume;
@@ -42,12 +129,63 @@ typedef struct clnt_conf {
struct list_head saved_fds;
pthread_mutex_t lock;
int connecting;
- struct timeval last_sent;
- struct timeval last_received;
+ int connected;
rpc_clnt_prog_t *fops;
rpc_clnt_prog_t *mgmt;
rpc_clnt_prog_t *handshake;
+ rpc_clnt_prog_t *dump;
+
+ int client_id;
+ uint64_t reopen_fd_count; /* Count of fds reopened after a
+ connection is established */
+ gf_lock_t rec_lock;
+ int skip_notify;
+
+ int last_sent_event; /* Flag used to make sure we are
+ not repeating the same event
+ which was sent earlier */
+ char portmap_err_logged; /* flag used to prevent
+ excessive logging */
+ char disconnect_err_logged; /* flag used to prevent
+ excessive disconnect
+ logging */
+ gf_boolean_t lk_heal;
+ uint16_t lk_version; /* this variable is used to distinguish
+ client-server transaction while
+ performing lock healing */
+ uint32_t grace_timeout;
+ gf_timer_t *grace_timer;
+ gf_boolean_t grace_timer_needed; /* The state of this flag will
+ be used to decide whether
+ a new grace-timer must be
+ registered or not. False
+ means dont register, true
+ means register */
+ char parent_down;
+ gf_boolean_t quick_reconnect; /* When reconnecting after
+ portmap query, do not let
+ the reconnection happen after
+ the usual 3-second wait
+ */
+ gf_boolean_t filter_o_direct; /* if set, filter O_DIRECT from
+ the flags list of open() */
+ /* set volume is the op which results in creating/re-using
+ * the conn-id and is called once per connection, this remembers
+ * how manytimes set_volume is called
+ */
+ uint64_t setvol_count;
+
+ gf_boolean_t send_gids; /* let the server resolve gids */
+
+ int event_threads; /* # of event threads
+ * configured */
+
+ gf_boolean_t destroy; /* if enabled implies fini was called
+ * on @this xlator instance */
+
+ gf_boolean_t child_up; /* Set to true, when child is up, and
+ * false, when child is down */
} clnt_conf_t;
typedef struct _client_fd_ctx {
@@ -55,39 +193,64 @@ typedef struct _client_fd_ctx {
fd's position in the saved_fds list.
*/
int64_t remote_fd;
- inode_t *inode;
- uint64_t ino;
- uint64_t gen;
char is_dir;
char released;
int32_t flags;
- int32_t wbflags;
+ fd_lk_ctx_t *lk_ctx;
+ pthread_mutex_t mutex;
+ lk_heal_state_t lk_heal_state;
+ uuid_t gfid;
+ void (*reopen_done) (struct _client_fd_ctx*, xlator_t *);
+ struct list_head lock_list; /* List of all granted locks on this fd */
+ int32_t reopen_attempts;
} clnt_fd_ctx_t;
+typedef struct _client_posix_lock {
+ fd_t *fd; /* The fd on which the lk operation was made */
+
+ struct gf_flock user_flock; /* the flock supplied by the user */
+ off_t fl_start;
+ off_t fl_end;
+ short fl_type;
+ int32_t cmd; /* the cmd for the lock call */
+ gf_lkowner_t owner; /* lock owner from fuse */
+ struct list_head list; /* reference used to add to the fdctx list of locks */
+} client_posix_lock_t;
+
typedef struct client_local {
- loc_t loc;
- loc_t loc2;
- fd_t *fd;
- clnt_fd_ctx_t *fdctx;
- uint32_t flags;
- uint32_t wbflags;
- fop_cbk_fn_t op;
+ loc_t loc;
+ loc_t loc2;
+ fd_t *fd;
+ clnt_fd_ctx_t *fdctx;
+ uint32_t flags;
+ struct iobref *iobref;
+
+ client_posix_lock_t *client_lock;
+ gf_lkowner_t owner;
+ int32_t cmd;
+ struct list_head lock_list;
+ pthread_mutex_t mutex;
+ char *name;
+ gf_boolean_t attempt_reopen;
+ /* required for compound fops */
+ struct iobref *iobref2;
+ compound_args_t *compound_args;
+ unsigned int length; /* length of a compound fop */
+ unsigned int read_length; /* defines the last processed length for a compound read */
} clnt_local_t;
typedef struct client_args {
loc_t *loc;
fd_t *fd;
- dict_t *xattr_req;
const char *linkname;
struct iobref *iobref;
struct iovec *vector;
dict_t *xattr;
struct iatt *stbuf;
- dict_t *dict;
loc_t *oldloc;
loc_t *newloc;
const char *name;
- struct flock *flock;
+ struct gf_flock *flock;
const char *volume;
const char *basename;
off_t offset;
@@ -97,7 +260,6 @@ typedef struct client_args {
mode_t mode;
dev_t rdev;
int32_t flags;
- int32_t wbflags;
int32_t count;
int32_t datasync;
entrylk_cmd cmd_entrylk;
@@ -105,6 +267,12 @@ typedef struct client_args {
gf_xattrop_flags_t optype;
int32_t valid;
int32_t len;
+ gf_seek_what_t what;
+ struct gf_lease *lease;
+
+ mode_t umask;
+ dict_t *xdata;
+ lock_migration_info_t *locklist;
} clnt_args_t;
typedef ssize_t (*gfs_serialize_t) (struct iovec outmsg, void *args);
@@ -118,16 +286,98 @@ int client_local_wipe (clnt_local_t *local);
int client_submit_request (xlator_t *this, void *req,
call_frame_t *frame, rpc_clnt_prog_t *prog,
int procnum, fop_cbk_fn_t cbk,
- struct iobref *iobref, gfs_serialize_t sfunc);
+ struct iobref *iobref,
+ struct iovec *rsphdr, int rsphdr_count,
+ struct iovec *rsp_payload, int rsp_count,
+ struct iobref *rsp_iobref, xdrproc_t xdrproc);
-int protocol_client_reopendir (xlator_t *this, clnt_fd_ctx_t *fdctx);
-int protocol_client_reopen (xlator_t *this, clnt_fd_ctx_t *fdctx);
+int
+client_submit_compound_request (xlator_t *this, void *req, call_frame_t *frame,
+ rpc_clnt_prog_t *prog, int procnum, fop_cbk_fn_t cbkfn,
+ struct iovec *req_vector, int req_count,
+ struct iobref *iobref, struct iovec *rsphdr,
+ int rsphdr_count, struct iovec *rsp_payload,
+ int rsp_payload_count, struct iobref *rsp_iobref,
+ xdrproc_t xdrproc);
-int unserialize_rsp_dirent (struct gfs3_readdir_rsp *rsp, gf_dirent_t *entries);
-int unserialize_rsp_direntp (struct gfs3_readdirp_rsp *rsp, gf_dirent_t *entries);
+int unserialize_rsp_dirent (xlator_t *this, struct gfs3_readdir_rsp *rsp,
+ gf_dirent_t *entries);
+int unserialize_rsp_direntp (xlator_t *this, fd_t *fd,
+ struct gfs3_readdirp_rsp *rsp, gf_dirent_t *entries);
int clnt_readdir_rsp_cleanup (gfs3_readdir_rsp *rsp);
int clnt_readdirp_rsp_cleanup (gfs3_readdirp_rsp *rsp);
+int client_attempt_lock_recovery (xlator_t *this, clnt_fd_ctx_t *fdctx);
+int32_t delete_granted_locks_owner (fd_t *fd, gf_lkowner_t *owner);
+int client_add_lock_for_recovery (fd_t *fd, struct gf_flock *flock,
+ gf_lkowner_t *owner, int32_t cmd);
+int32_t delete_granted_locks_fd (clnt_fd_ctx_t *fdctx);
+int32_t client_cmd_to_gf_cmd (int32_t cmd, int32_t *gf_cmd);
+void client_save_number_fds (clnt_conf_t *conf, int count);
+int dump_client_locks (inode_t *inode);
+int client_notify_parents_child_up (xlator_t *this);
+int32_t is_client_dump_locks_cmd (char *name);
+int32_t client_dump_locks (char *name, inode_t *inode,
+ dict_t *dict);
+int client_fdctx_destroy (xlator_t *this, clnt_fd_ctx_t *fdctx);
+
+uint32_t client_get_lk_ver (clnt_conf_t *conf);
+
+int32_t client_type_to_gf_type (short l_type);
+
+int client_mark_fd_bad (xlator_t *this);
+
+int client_set_lk_version (xlator_t *this);
+
+int client_fd_lk_list_empty (fd_lk_ctx_t *lk_ctx, gf_boolean_t use_try_lock);
+void client_default_reopen_done (clnt_fd_ctx_t *fdctx, xlator_t *this);
+void client_attempt_reopen (fd_t *fd, xlator_t *this);
+int client_get_remote_fd (xlator_t *this, fd_t *fd, int flags,
+ int64_t *remote_fd);
+int client_fd_fop_prepare_local (call_frame_t *frame, fd_t *fd,
+ int64_t remote_fd);
+gf_boolean_t
+__is_fd_reopen_in_progress (clnt_fd_ctx_t *fdctx);
+int
+client_notify_dispatch (xlator_t *this, int32_t event, void *data, ...);
+int
+client_notify_dispatch_uniq (xlator_t *this, int32_t event, void *data, ...);
+
+gf_boolean_t
+client_is_reopen_needed (fd_t *fd, xlator_t *this, int64_t remote_fd);
+
+int
+client_add_fd_to_saved_fds (xlator_t *this, fd_t *fd, loc_t *loc, int32_t flags,
+ int64_t remote_fd, int is_dir);
+int
+client_handle_fop_requirements (xlator_t *this, call_frame_t *frame,
+ gfs3_compound_req *req,
+ clnt_local_t *local,
+ struct iobref *req_iobref,
+ struct iobref *rsp_iobref,
+ struct iovec *req_vector,
+ struct iovec *rsp_vector, int *req_count,
+ int *rsp_count, default_args_t *args,
+ int fop_enum, int index);
+int
+client_process_response (call_frame_t *frame, xlator_t *this,
+ struct rpc_req *req,
+ gfs3_compound_rsp *rsp, compound_args_cbk_t *args_cbk,
+ int index);
+void
+compound_request_cleanup (gfs3_compound_req *req);
+
+int
+clnt_unserialize_rsp_locklist (xlator_t *this, struct gfs3_getactivelk_rsp *rsp,
+ lock_migration_info_t *lmi);
+void
+clnt_getactivelk_rsp_cleanup (gfs3_getactivelk_rsp *rsp);
+
+void
+clnt_setactivelk_req_cleanup (gfs3_setactivelk_req *req);
+int
+serialize_req_locklist (lock_migration_info_t *locklist,
+ gfs3_setactivelk_req *req);
#endif /* !_CLIENT_H */
diff --git a/xlators/protocol/client/src/client3_1-fops.c b/xlators/protocol/client/src/client3_1-fops.c
deleted file mode 100644
index 5204ef032da..00000000000
--- a/xlators/protocol/client/src/client3_1-fops.c
+++ /dev/null
@@ -1,4826 +0,0 @@
-/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "client.h"
-#include "glusterfs-xdr.h"
-#include "msg-xdr.h"
-#include "compat-errno.h"
-
-int32_t client3_getspec (call_frame_t *frame, xlator_t *this, void *data);
-void client_start_ping (void *data);
-rpc_clnt_prog_t clnt3_1_fop_prog;
-
-int
-client_submit_vec_request (xlator_t *this, void *req, call_frame_t *frame,
- rpc_clnt_prog_t *prog, int procnum, fop_cbk_fn_t cbk,
- struct iovec *payload, int payloadcnt,
- struct iobref *iobref, gfs_serialize_t sfunc)
-{
- int ret = 0;
- clnt_conf_t *conf = NULL;
- struct iovec iov = {0, };
- struct iobuf *iobuf = NULL;
- int count = 0;
- char new_iobref = 0;
- int start_ping = 0;
-
- start_ping = 0;
-
- conf = this->private;
-
- iobuf = iobuf_get (this->ctx->iobuf_pool);
- if (!iobuf) {
- goto out;
- };
-
- if (!iobref) {
- iobref = iobref_new ();
- if (!iobref) {
- goto out;
- }
-
- new_iobref = 1;
- }
-
- iobref_add (iobref, iobuf);
-
- iov.iov_base = iobuf->ptr;
- iov.iov_len = 128 * GF_UNIT_KB;
-
- /* Create the xdr payload */
- if (req && sfunc) {
- ret = sfunc (iov, req);
- if (ret == -1) {
- goto out;
- }
- iov.iov_len = ret;
- count = 1;
- }
- /* Send the msg */
- ret = rpc_clnt_submit (conf->rpc, prog, procnum, cbk, &iov, count,
- payload, payloadcnt, iobref, frame);
-
- if (ret == 0) {
- pthread_mutex_lock (&conf->rpc->conn.lock);
- {
- if (!conf->rpc->conn.ping_started) {
- start_ping = 1;
- }
- }
- pthread_mutex_unlock (&conf->rpc->conn.lock);
- }
-
- if (start_ping)
- client_start_ping ((void *) this);
-
-out:
- if (new_iobref) {
- iobref_unref (iobref);
- }
-
- iobuf_unref (iobuf);
-
- return 0;
-}
-
-/* CBK */
-
-int
-client3_1_symlink_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- call_frame_t *frame = NULL;
- gfs3_symlink_rsp rsp = {0,};
- struct iatt stbuf = {0,};
- struct iatt preparent = {0,};
- struct iatt postparent = {0,};
- int ret = 0;
- clnt_local_t *local = NULL;
- inode_t *inode = NULL;
-
- frame = myframe;
-
- local = frame->local;
- frame->local = NULL;
- inode = local->loc.inode;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOTCONN;
- goto out;
- }
- ret = xdr_to_symlink_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
- if (-1 != rsp.op_ret) {
- gf_stat_to_iatt (&rsp.stat, &stbuf);
-
- ret = inode_ctx_put2 (inode, frame->this,
- stbuf.ia_ino, stbuf.ia_gen);
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "SYMLINK %"PRId64"/%s (%s): failed to set "
- "remote inode number to inode ctx",
- local->loc.parent->ino, local->loc.name,
- local->loc.path);
- }
-
- gf_stat_to_iatt (&rsp.preparent, &preparent);
- gf_stat_to_iatt (&rsp.postparent, &postparent);
- }
-
-out:
- frame->local = NULL;
- STACK_UNWIND_STRICT (symlink, frame, rsp.op_ret,
- gf_error_to_errno (rsp.op_errno), inode, &stbuf,
- &preparent, &postparent);
-
- if (local)
- client_local_wipe (local);
-
- return 0;
-}
-
-
-int
-client3_1_mknod_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- call_frame_t *frame = NULL;
- gfs3_mknod_rsp rsp = {0,};
- struct iatt stbuf = {0,};
- struct iatt preparent = {0,};
- struct iatt postparent = {0,};
- int ret = 0;
- clnt_local_t *local = NULL;
- inode_t *inode = NULL;
-
- frame = myframe;
-
- local = frame->local;
- frame->local = NULL;
- inode = local->loc.inode;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOTCONN;
- goto out;
- }
- ret = xdr_to_mknod_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
- if (-1 != rsp.op_ret) {
- gf_stat_to_iatt (&rsp.stat, &stbuf);
-
- ret = inode_ctx_put2 (inode, frame->this,
- stbuf.ia_ino, stbuf.ia_gen);
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "MKNOD %"PRId64"/%s (%s): failed to set "
- "remote inode number to inode ctx",
- local->loc.parent->ino, local->loc.name,
- local->loc.path);
- }
-
- gf_stat_to_iatt (&rsp.preparent, &preparent);
- gf_stat_to_iatt (&rsp.postparent, &postparent);
- }
-
-out:
- frame->local = NULL;
- STACK_UNWIND_STRICT (mknod, frame, rsp.op_ret,
- gf_error_to_errno (rsp.op_errno), inode,
- &stbuf, &preparent, &postparent);
-
- if (local)
- client_local_wipe (local);
-
- return 0;
-}
-
-int
-client3_1_mkdir_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- call_frame_t *frame = NULL;
- gfs3_mkdir_rsp rsp = {0,};
- struct iatt stbuf = {0,};
- struct iatt preparent = {0,};
- struct iatt postparent = {0,};
- int ret = 0;
- clnt_local_t *local = NULL;
- inode_t *inode = NULL;
-
- frame = myframe;
-
- local = frame->local;
- frame->local = NULL;
- inode = local->loc.inode;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOTCONN;
- goto out;
- }
- ret = xdr_to_mkdir_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
- if (-1 != rsp.op_ret) {
- gf_stat_to_iatt (&rsp.stat, &stbuf);
-
- ret = inode_ctx_put2 (inode, frame->this,
- stbuf.ia_ino, stbuf.ia_gen);
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "MKDIR %"PRId64"/%s (%s): failed to set "
- "remote inode number to inode ctx",
- local->loc.parent->ino, local->loc.name,
- local->loc.path);
- }
-
- gf_stat_to_iatt (&rsp.preparent, &preparent);
- gf_stat_to_iatt (&rsp.postparent, &postparent);
- }
-
-out:
- STACK_UNWIND_STRICT (mkdir, frame, rsp.op_ret,
- gf_error_to_errno (rsp.op_errno), inode,
- &stbuf, &preparent, &postparent);
-
- if (local)
- client_local_wipe (local);
-
- return 0;
-}
-
-int
-client3_1_open_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- clnt_local_t *local = NULL;
- clnt_conf_t *conf = NULL;
- clnt_fd_ctx_t *fdctx = NULL;
- call_frame_t *frame = NULL;
- fd_t *fd = NULL;
- ino_t ino = 0;
- uint64_t gen = 0;
- int ret = 0;
- gfs3_open_rsp rsp = {0,};
-
- frame = myframe;
- local = frame->local;
-
- if (local->op) {
- local->op (req, iov, 1, myframe);
- return 0;
- }
-
- frame->local = NULL;
- conf = frame->this->private;
- fd = local->fd;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOTCONN;
- goto out;
- }
- ret = xdr_to_open_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
- if (-1 != rsp.op_ret) {
- fdctx = GF_CALLOC (1, sizeof (*fdctx),
- gf_client_mt_clnt_fdctx_t);
- if (!fdctx) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOMEM;
- goto out;
- }
-
- inode_ctx_get2 (fd->inode, frame->this, &ino, &gen);
-
- fdctx->remote_fd = rsp.fd;
- fdctx->inode = inode_ref (fd->inode);
- fdctx->ino = ino;
- fdctx->gen = gen;
- fdctx->flags = local->flags;
- fdctx->wbflags = local->wbflags;
-
- INIT_LIST_HEAD (&fdctx->sfd_pos);
-
- this_fd_set_ctx (fd, frame->this, &local->loc, fdctx);
-
- pthread_mutex_lock (&conf->lock);
- {
- list_add_tail (&fdctx->sfd_pos, &conf->saved_fds);
- }
- pthread_mutex_unlock (&conf->lock);
- }
-
-out:
- frame->local = NULL;
- STACK_UNWIND_STRICT (open, frame, rsp.op_ret,
- gf_error_to_errno (rsp.op_errno), fd);
-
- client_local_wipe (local);
-
- return 0;
-}
-
-
-int
-client3_1_stat_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- gfs3_stat_rsp rsp = {0,};
- call_frame_t *frame = NULL;
- struct iatt iatt = {0,};
- int ret = 0;
-
- frame = myframe;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOTCONN;
- goto out;
- }
- ret = xdr_to_stat_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
- if (-1 != rsp.op_ret) {
- gf_stat_to_iatt (&rsp.stat, &iatt);
- }
-
-out:
- STACK_UNWIND_STRICT (stat, frame, rsp.op_ret,
- gf_error_to_errno (rsp.op_errno), &iatt);
-
- return 0;
-}
-
-int
-client3_1_readlink_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- gfs3_readlink_rsp rsp = {0,};
- call_frame_t *frame = NULL;
- struct iatt iatt = {0,};
- int ret = 0;
-
- frame = myframe;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOTCONN;
- goto out;
- }
- ret = xdr_to_readlink_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
- if (-1 != rsp.op_ret) {
- gf_stat_to_iatt (&rsp.buf, &iatt);
- }
-
-out:
- STACK_UNWIND_STRICT (readlink, frame, rsp.op_ret,
- gf_error_to_errno (rsp.op_errno), rsp.path, &iatt);
-
- /* This is allocated by the libc while decoding RPC msg */
- /* Hence no 'GF_FREE', but just 'free' */
- if (rsp.path)
- free (rsp.path);
-
- return 0;
-}
-
-int
-client3_1_unlink_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- call_frame_t *frame = NULL;
- gfs3_unlink_rsp rsp = {0,};
- struct iatt preparent = {0,};
- struct iatt postparent = {0,};
- int ret = 0;
-
- frame = myframe;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOTCONN;
- goto out;
- }
- ret = xdr_to_unlink_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
- if (-1 != rsp.op_ret) {
- gf_stat_to_iatt (&rsp.preparent, &preparent);
- gf_stat_to_iatt (&rsp.postparent, &postparent);
- }
-
-out:
- STACK_UNWIND_STRICT (unlink, frame, rsp.op_ret,
- gf_error_to_errno (rsp.op_errno), &preparent,
- &postparent);
-
- return 0;
-}
-
-int
-client3_1_rmdir_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- gfs3_rmdir_rsp rsp = {0,};
- call_frame_t *frame = NULL;
- struct iatt preparent = {0,};
- struct iatt postparent = {0,};
- int ret = 0;
-
- frame = myframe;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOTCONN;
- goto out;
- }
- ret = xdr_to_rmdir_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
- if (-1 != rsp.op_ret) {
- gf_stat_to_iatt (&rsp.preparent, &preparent);
- gf_stat_to_iatt (&rsp.postparent, &postparent);
- }
-
-out:
- STACK_UNWIND_STRICT (rmdir, frame, rsp.op_ret,
- gf_error_to_errno (rsp.op_errno), &preparent,
- &postparent);
-
- return 0;
-}
-
-
-int
-client3_1_truncate_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- gfs3_truncate_rsp rsp = {0,};
- call_frame_t *frame = NULL;
- struct iatt prestat = {0,};
- struct iatt poststat = {0,};
- int ret = 0;
-
- frame = myframe;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOTCONN;
- goto out;
- }
- ret = xdr_to_truncate_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
- if (-1 != rsp.op_ret) {
- gf_stat_to_iatt (&rsp.prestat, &prestat);
- gf_stat_to_iatt (&rsp.poststat, &poststat);
- }
-
-out:
- STACK_UNWIND_STRICT (truncate, frame, rsp.op_ret,
- gf_error_to_errno (rsp.op_errno), &prestat,
- &poststat);
-
- return 0;
-}
-
-
-int
-client3_1_statfs_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- gfs3_statfs_rsp rsp = {0,};
- call_frame_t *frame = NULL;
- struct statvfs statfs = {0,};
- int ret = 0;
-
- frame = myframe;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOTCONN;
- goto out;
- }
- ret = xdr_to_statfs_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
- if (-1 != rsp.op_ret) {
- gf_statfs_to_statfs (&rsp.statfs, &statfs);
- }
-
-out:
- STACK_UNWIND_STRICT (statfs, frame, rsp.op_ret,
- gf_error_to_errno (rsp.op_errno), &statfs);
-
- return 0;
-}
-
-
-int
-client3_1_writev_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- gfs3_write_rsp rsp = {0,};
- call_frame_t *frame = NULL;
- struct iatt prestat = {0,};
- struct iatt poststat = {0,};
- int ret = 0;
-
- frame = myframe;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOTCONN;
- goto out;
- }
-
- ret = xdr_to_truncate_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
- if (-1 != rsp.op_ret) {
- gf_stat_to_iatt (&rsp.prestat, &prestat);
- gf_stat_to_iatt (&rsp.poststat, &poststat);
- }
-
-out:
- STACK_UNWIND_STRICT (writev, frame, rsp.op_ret,
- gf_error_to_errno (rsp.op_errno), &prestat,
- &poststat);
-
- return 0;
-}
-
-int
-client3_1_flush_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- call_frame_t *frame = NULL;
- gf_common_rsp rsp = {0,};
- int ret = 0;
-
- frame = myframe;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOTCONN;
- goto out;
- }
- ret = xdr_to_common_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
-out:
- STACK_UNWIND_STRICT (flush, frame, rsp.op_ret,
- gf_error_to_errno (rsp.op_errno));
-
- return 0;
-}
-
-int
-client3_1_fsync_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- gfs3_fsync_rsp rsp = {0,};
- call_frame_t *frame = NULL;
- struct iatt prestat = {0,};
- struct iatt poststat = {0,};
- int ret = 0;
-
- frame = myframe;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOTCONN;
- goto out;
- }
-
- ret = xdr_to_truncate_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
- if (-1 != rsp.op_ret) {
- gf_stat_to_iatt (&rsp.prestat, &prestat);
- gf_stat_to_iatt (&rsp.poststat, &poststat);
- }
-
-out:
- STACK_UNWIND_STRICT (fsync, frame, rsp.op_ret,
- gf_error_to_errno (rsp.op_errno), &prestat,
- &poststat);
-
- return 0;
-}
-
-int
-client3_1_setxattr_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- call_frame_t *frame = NULL;
- gf_common_rsp rsp = {0,};
- int ret = 0;
-
- frame = myframe;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOTCONN;
- goto out;
- }
-
- ret = xdr_to_common_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
-out:
- STACK_UNWIND_STRICT (setxattr, frame, rsp.op_ret,
- gf_error_to_errno (rsp.op_errno));
-
- return 0;
-}
-
-int
-client3_1_getxattr_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- call_frame_t *frame = NULL;
- dict_t *dict = NULL;
- char *buf = NULL;
- int dict_len = 0;
- int op_ret = 0;
- int op_errno = 0;
- gfs3_getxattr_rsp rsp = {0,};
- int ret = 0;
-
- frame = myframe;
-
- if (-1 == req->rpc_status) {
- op_ret = -1;
- op_errno = ENOTCONN;
- goto out;
- }
-
- ret = xdr_to_getxattr_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
-
- op_errno = gf_error_to_errno (rsp.op_errno);
- op_ret = rsp.op_ret;
- if (-1 != op_ret) {
- op_ret = -1;
- dict_len = rsp.dict.dict_len;
-
- if (dict_len > 0) {
- dict = dict_new();
- buf = memdup (rsp.dict.dict_val, rsp.dict.dict_len);
-
- GF_VALIDATE_OR_GOTO (frame->this->name, dict, out);
- GF_VALIDATE_OR_GOTO (frame->this->name, buf, out);
-
- ret = dict_unserialize (buf, dict_len, &dict);
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "failed to unserialize xattr dict");
- op_errno = EINVAL;
- goto out;
- }
- dict->extra_free = buf;
- buf = NULL;
- }
- op_ret = 0;
- }
-
-out:
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict);
-
- if (rsp.dict.dict_val) {
- /* don't use GF_FREE, this memory was allocated by libc
- */
- free (rsp.dict.dict_val);
- rsp.dict.dict_val = NULL;
- }
-
- if (buf)
- GF_FREE (buf);
-
- if (dict)
- dict_unref (dict);
-
- return 0;
-}
-
-int
-client3_1_fgetxattr_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- call_frame_t *frame = NULL;
- char *buf = NULL;
- dict_t *dict = NULL;
- gfs3_fgetxattr_rsp rsp = {0,};
- int ret = 0;
- int dict_len = 0;
- int op_ret = 0;
- int op_errno = 0;
-
- frame = myframe;
-
- if (-1 == req->rpc_status) {
- op_ret = -1;
- op_errno = ENOTCONN;
- goto out;
- }
- ret = xdr_to_fgetxattr_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
-
- op_errno = gf_error_to_errno (rsp.op_errno);
- op_ret = rsp.op_ret;
- if (-1 != op_ret) {
- op_ret = -1;
- dict_len = rsp.dict.dict_len;
-
- if (dict_len > 0) {
- dict = dict_new();
- GF_VALIDATE_OR_GOTO (frame->this->name, dict, out);
- buf = memdup (rsp.dict.dict_val, rsp.dict.dict_len);
- GF_VALIDATE_OR_GOTO (frame->this->name, buf, out);
-
- ret = dict_unserialize (buf, dict_len, &dict);
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "failed to unserialize xattr dict");
- op_errno = EINVAL;
- goto out;
- }
- dict->extra_free = buf;
- buf = NULL;
- }
- op_ret = 0;
- }
-out:
- STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict);
- if (rsp.dict.dict_val) {
- /* don't use GF_FREE, this memory was allocated by libc
- */
- free (rsp.dict.dict_val);
- rsp.dict.dict_val = NULL;
- }
-
- if (buf)
- GF_FREE (buf);
-
- if (dict)
- dict_unref (dict);
-
- return 0;
-}
-
-int
-client3_1_removexattr_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- call_frame_t *frame = NULL;
- gf_common_rsp rsp = {0,};
- int ret = 0;
-
- frame = myframe;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOTCONN;
- goto out;
- }
-
- ret = xdr_to_common_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
-out:
- STACK_UNWIND_STRICT (removexattr, frame, rsp.op_ret,
- gf_error_to_errno (rsp.op_errno));
-
- return 0;
-}
-
-int
-client3_1_fsyncdir_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- call_frame_t *frame = NULL;
- gf_common_rsp rsp = {0,};
- int ret = 0;
-
- frame = myframe;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOTCONN;
- goto out;
- }
- ret = xdr_to_common_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
-out:
- STACK_UNWIND_STRICT (fsyncdir, frame, rsp.op_ret,
- gf_error_to_errno (rsp.op_errno));
-
- return 0;
-}
-
-int
-client3_1_access_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- call_frame_t *frame = NULL;
- gf_common_rsp rsp = {0,};
- int ret = 0;
-
- frame = myframe;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOTCONN;
- goto out;
- }
- ret = xdr_to_common_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
-out:
- STACK_UNWIND_STRICT (access, frame, rsp.op_ret,
- gf_error_to_errno (rsp.op_errno));
-
- return 0;
-}
-
-
-int
-client3_1_ftruncate_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- gfs3_ftruncate_rsp rsp = {0,};
- call_frame_t *frame = NULL;
- struct iatt prestat = {0,};
- struct iatt poststat = {0,};
- int ret = 0;
-
- frame = myframe;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOTCONN;
- goto out;
- }
- ret = xdr_to_ftruncate_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
- if (-1 != rsp.op_ret) {
- gf_stat_to_iatt (&rsp.prestat, &prestat);
- gf_stat_to_iatt (&rsp.poststat, &poststat);
- }
-
-out:
- STACK_UNWIND_STRICT (ftruncate, frame, rsp.op_ret,
- gf_error_to_errno (rsp.op_errno), &prestat,
- &poststat);
-
- return 0;
-}
-
-int
-client3_1_fstat_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- gfs3_fstat_rsp rsp = {0,};
- call_frame_t *frame = NULL;
- struct iatt stat = {0,};
- int ret = 0;
-
- frame = myframe;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOTCONN;
- goto out;
- }
- ret = xdr_to_fstat_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
- if (-1 != rsp.op_ret) {
- gf_stat_to_iatt (&rsp.stat, &stat);
- }
-
-out:
- STACK_UNWIND_STRICT (fstat, frame, rsp.op_ret,
- gf_error_to_errno (rsp.op_errno), &stat);
-
- return 0;
-}
-
-
-int
-client3_1_inodelk_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- call_frame_t *frame = NULL;
- gf_common_rsp rsp = {0,};
- int ret = 0;
-
- frame = myframe;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOTCONN;
- goto out;
- }
- ret = xdr_to_common_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
-out:
- STACK_UNWIND_STRICT (inodelk, frame, rsp.op_ret,
- gf_error_to_errno (rsp.op_errno));
-
- return 0;
-}
-
-int
-client3_1_finodelk_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- call_frame_t *frame = NULL;
- gf_common_rsp rsp = {0,};
- int ret = 0;
-
- frame = myframe;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOTCONN;
- goto out;
- }
- ret = xdr_to_common_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
-out:
- STACK_UNWIND_STRICT (finodelk, frame, rsp.op_ret,
- gf_error_to_errno (rsp.op_errno));
-
- return 0;
-}
-
-int
-client3_1_entrylk_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- call_frame_t *frame = NULL;
- gf_common_rsp rsp = {0,};
- int ret = 0;
-
- frame = myframe;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOTCONN;
- goto out;
- }
- ret = xdr_to_common_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
-out:
-
- STACK_UNWIND_STRICT (entrylk, frame, rsp.op_ret,
- gf_error_to_errno (rsp.op_errno));
-
- return 0;
-}
-
-int
-client3_1_fentrylk_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- call_frame_t *frame = NULL;
- gf_common_rsp rsp = {0,};
- int ret = 0;
-
- frame = myframe;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOTCONN;
- goto out;
- }
- ret = xdr_to_common_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
-out:
- STACK_UNWIND_STRICT (fentrylk, frame, rsp.op_ret,
- gf_error_to_errno (rsp.op_errno));
-
- return 0;
-}
-
-int
-client3_1_xattrop_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- call_frame_t *frame = NULL;
- dict_t *dict = NULL;
- char *buf = NULL;
- gfs3_xattrop_rsp rsp = {0,};
- int ret = 0;
- int op_ret = 0;
- int dict_len = 0;
- int op_errno = 0;
-
- frame = myframe;
-
- if (-1 == req->rpc_status) {
- op_ret = -1;
- op_errno = ENOTCONN;
- goto out;
- }
- ret = xdr_to_xattrop_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
-
- op_ret = rsp.op_ret;
- if (-1 != op_ret) {
- op_ret = -1;
- dict_len = rsp.dict.dict_len;
-
- if (dict_len > 0) {
- dict = dict_new();
- GF_VALIDATE_OR_GOTO (frame->this->name, dict, out);
-
- buf = memdup (rsp.dict.dict_val, rsp.dict.dict_len);
- GF_VALIDATE_OR_GOTO (frame->this->name, buf, out);
- op_ret = dict_unserialize (buf, dict_len, &dict);
- if (op_ret < 0) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "failed to unserialize xattr dict");
- op_errno = EINVAL;
- goto out;
- }
- dict->extra_free = buf;
- buf = NULL;
- }
- op_ret = 0;
- }
-
-out:
-
- STACK_UNWIND_STRICT (xattrop, frame, op_ret,
- gf_error_to_errno (rsp.op_errno), dict);
-
- if (rsp.dict.dict_val) {
- /* don't use GF_FREE, this memory was allocated by libc
- */
- free (rsp.dict.dict_val);
- rsp.dict.dict_val = NULL;
- }
-
- if (buf)
- GF_FREE (buf);
-
- if (dict)
- dict_unref (dict);
-
- return 0;
-}
-
-int
-client3_1_fxattrop_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- call_frame_t *frame = NULL;
- dict_t *dict = NULL;
- char *buf = NULL;
- gfs3_fxattrop_rsp rsp = {0,};
- int ret = 0;
- int op_ret = 0;
- int dict_len = 0;
- int op_errno = 0;
-
- frame = myframe;
-
- if (-1 == req->rpc_status) {
- op_ret = -1;
- op_errno = ENOTCONN;
- goto out;
- }
-
- ret = xdr_to_fxattrop_rsp (*iov, &rsp);
- if (ret < 0) {
- op_ret = -1;
- op_errno = EINVAL;
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
- }
-
- op_ret = rsp.op_ret;
- if (-1 != op_ret) {
- op_ret = -1;
- dict_len = rsp.dict.dict_len;
-
- if (dict_len > 0) {
- dict = dict_new();
- GF_VALIDATE_OR_GOTO (frame->this->name, dict, out);
-
- buf = memdup (rsp.dict.dict_val, rsp.dict.dict_len);
- GF_VALIDATE_OR_GOTO (frame->this->name, buf, out);
- op_ret = dict_unserialize (buf, dict_len, &dict);
- if (op_ret < 0) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "failed to unserialize xattr dict");
- op_errno = EINVAL;
- goto out;
- }
- dict->extra_free = buf;
- buf = NULL;
- }
- op_ret = 0;
- }
-
-out:
-
- STACK_UNWIND_STRICT (fxattrop, frame, op_ret,
- gf_error_to_errno (rsp.op_errno), dict);
-
- if (rsp.dict.dict_val) {
- /* don't use GF_FREE, this memory was allocated by libc
- */
- free (rsp.dict.dict_val);
- rsp.dict.dict_val = NULL;
- }
-
- if (buf)
- GF_FREE (buf);
-
- if (dict)
- dict_unref (dict);
-
- return 0;
-}
-
-int
-client3_1_fsetxattr_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- call_frame_t *frame = NULL;
- gf_common_rsp rsp = {0,};
- int ret = 0;
-
- frame = myframe;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOTCONN;
- goto out;
- }
- ret = xdr_to_common_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
-out:
- STACK_UNWIND_STRICT (fsetxattr, frame, rsp.op_ret,
- gf_error_to_errno (rsp.op_errno));
-
- return 0;
-}
-
-int
-client3_1_fsetattr_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- call_frame_t *frame = NULL;
- gfs3_fsetattr_rsp rsp = {0,};
- struct iatt prestat = {0,};
- struct iatt poststat = {0,};
- int ret = 0;
-
- frame = myframe;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOTCONN;
- goto out;
- }
- ret = xdr_to_fsetattr_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
- if (-1 != rsp.op_ret) {
- gf_stat_to_iatt (&rsp.statpre, &prestat);
- gf_stat_to_iatt (&rsp.statpost, &poststat);
- }
-
-out:
- STACK_UNWIND_STRICT (fsetattr, frame, rsp.op_ret,
- gf_error_to_errno (rsp.op_errno), &prestat,
- &poststat);
-
- return 0;
-}
-
-
-int
-client3_1_setattr_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- call_frame_t *frame = NULL;
- gfs3_setattr_rsp rsp = {0,};
- struct iatt prestat = {0,};
- struct iatt poststat = {0,};
- int ret = 0;
-
- frame = myframe;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOTCONN;
- goto out;
- }
-
- ret = xdr_to_setattr_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
- if (-1 != rsp.op_ret) {
- gf_stat_to_iatt (&rsp.statpre, &prestat);
- gf_stat_to_iatt (&rsp.statpost, &poststat);
- }
-
-out:
- STACK_UNWIND_STRICT (setattr, frame, rsp.op_ret,
- gf_error_to_errno (rsp.op_errno), &prestat,
- &poststat);
-
- return 0;
-}
-
-int
-client3_1_create_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- call_frame_t *frame = NULL;
- fd_t *fd = NULL;
- inode_t *inode = NULL;
- struct iatt stbuf = {0, };
- struct iatt preparent = {0, };
- struct iatt postparent = {0, };
- int32_t ret = -1;
- clnt_local_t *local = NULL;
- clnt_conf_t *conf = NULL;
- clnt_fd_ctx_t *fdctx = NULL;
- gfs3_create_rsp rsp = {0,};
-
- frame = myframe;
- local = frame->local; frame->local = NULL;
- conf = frame->this->private;
- fd = local->fd;
- inode = local->loc.inode;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOTCONN;
- goto out;
- }
-
- ret = xdr_to_create_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
- if (-1 != rsp.op_ret) {
- gf_stat_to_iatt (&rsp.stat, &stbuf);
-
- ret = inode_ctx_put2 (inode, frame->this,
- stbuf.ia_ino, stbuf.ia_gen);
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "CREATE %"PRId64"/%s (%s): failed to set "
- "remote inode number to inode ctx",
- local->loc.parent->ino, local->loc.name,
- local->loc.path);
- }
-
- gf_stat_to_iatt (&rsp.preparent, &preparent);
- gf_stat_to_iatt (&rsp.postparent, &postparent);
-
- fdctx = GF_CALLOC (1, sizeof (*fdctx),
- gf_client_mt_clnt_fdctx_t);
- if (!fdctx) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOMEM;
- goto out;
- }
-
- fdctx->remote_fd = rsp.fd;
- fdctx->inode = inode_ref (inode);
- fdctx->ino = stbuf.ia_ino;
- fdctx->gen = stbuf.ia_gen;
- fdctx->flags = local->flags;
-
- INIT_LIST_HEAD (&fdctx->sfd_pos);
-
- this_fd_set_ctx (fd, frame->this, &local->loc, fdctx);
-
- pthread_mutex_lock (&conf->lock);
- {
- list_add_tail (&fdctx->sfd_pos, &conf->saved_fds);
- }
- pthread_mutex_unlock (&conf->lock);
- }
-
-out:
- frame->local = NULL;
- STACK_UNWIND_STRICT (create, frame, rsp.op_ret,
- gf_error_to_errno (rsp.op_errno), fd, inode,
- &stbuf, &preparent, &postparent);
-
- client_local_wipe (local);
- return 0;
-}
-
-
-int
-client3_1_rchecksum_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- call_frame_t *frame = NULL;
- gfs3_rchecksum_rsp rsp = {0,};
- int ret = 0;
-
- frame = myframe;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOTCONN;
- goto out;
- }
-
- ret = xdr_to_rchecksum_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
-out:
- STACK_UNWIND_STRICT (rchecksum, frame, rsp.op_ret,
- gf_error_to_errno (rsp.op_errno),
- rsp.weak_checksum,
- (uint8_t *)rsp.strong_checksum.strong_checksum_val);
-
- if (rsp.strong_checksum.strong_checksum_val) {
- /* This is allocated by the libc while decoding RPC msg */
- /* Hence no 'GF_FREE', but just 'free' */
- free (rsp.strong_checksum.strong_checksum_val);
- }
-
- return 0;
-}
-
-int
-client3_1_checksum_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- call_frame_t *frame = NULL;
- gfs3_checksum_rsp rsp = {0,};
- int ret = 0;
-
- frame = myframe;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOTCONN;
- goto out;
- }
-
- ret = xdr_to_checksum_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
-out:
- STACK_UNWIND_STRICT (checksum, frame, rsp.op_ret,
- gf_error_to_errno (rsp.op_errno),
- (uint8_t *)rsp.fchecksum.fchecksum_val,
- (uint8_t *)rsp.dchecksum.dchecksum_val);
-
- /* This is allocated by the libc while decoding RPC msg */
- /* Hence no 'GF_FREE', but just 'free' */
- if (rsp.fchecksum.fchecksum_val) {
- free (rsp.fchecksum.fchecksum_val);
- }
- if (rsp.dchecksum.dchecksum_val) {
- free (rsp.dchecksum.dchecksum_val);
- }
- return 0;
-}
-
-int
-client3_1_lk_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- call_frame_t *frame = NULL;
- struct flock lock = {0,};
- gfs3_lk_rsp rsp = {0,};
- int ret = 0;
-
- frame = myframe;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOTCONN;
- goto out;
- }
-
- ret = xdr_to_lk_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
- if (rsp.op_ret >= 0) {
- gf_flock_to_flock (&rsp.flock, &lock);
- }
-
-out:
- STACK_UNWIND_STRICT (lk, frame, rsp.op_ret,
- gf_error_to_errno (rsp.op_errno), &lock);
-
- return 0;
-}
-
-int
-client3_1_readdir_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- call_frame_t *frame = NULL;
- gfs3_readdir_rsp rsp = {0,};
- int32_t ret = 0;
- gf_dirent_t entries;
-
- frame = myframe;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOTCONN;
- goto out;
- }
-
- ret = xdr_to_readdir_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
- INIT_LIST_HEAD (&entries.list);
- if (rsp.op_ret > 0) {
- unserialize_rsp_dirent (&rsp, &entries);
- }
-
-out:
- STACK_UNWIND_STRICT (readdir, frame, rsp.op_ret,
- gf_error_to_errno (rsp.op_errno), &entries);
-
- if (rsp.op_ret != -1) {
- gf_dirent_free (&entries);
- }
-
- clnt_readdir_rsp_cleanup (&rsp);
-
- return 0;
-}
-
-
-int
-client3_1_readdirp_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- call_frame_t *frame = NULL;
- gfs3_readdirp_rsp rsp = {0,};
- int32_t ret = 0;
- gf_dirent_t entries;
-
- frame = myframe;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOTCONN;
- goto out;
- }
-
- ret = xdr_to_readdirp_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
- INIT_LIST_HEAD (&entries.list);
- if (rsp.op_ret > 0) {
- unserialize_rsp_direntp (&rsp, &entries);
- }
-
-out:
- STACK_UNWIND_STRICT (readdirp, frame, rsp.op_ret,
- gf_error_to_errno (rsp.op_errno), &entries);
-
- if (rsp.op_ret != -1) {
- gf_dirent_free (&entries);
- }
-
- clnt_readdirp_rsp_cleanup (&rsp);
-
- return 0;
-}
-
-
-int
-client3_1_rename_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- call_frame_t *frame = NULL;
- gfs3_rename_rsp rsp = {0,};
- struct iatt stbuf = {0,};
- struct iatt preoldparent = {0,};
- struct iatt postoldparent = {0,};
- struct iatt prenewparent = {0,};
- struct iatt postnewparent = {0,};
- int ret = 0;
-
- frame = myframe;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOTCONN;
- goto out;
- }
-
- ret = xdr_to_rename_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
- if (-1 != rsp.op_ret) {
- gf_stat_to_iatt (&rsp.stat, &stbuf);
-
- gf_stat_to_iatt (&rsp.preoldparent, &preoldparent);
- gf_stat_to_iatt (&rsp.postoldparent, &postoldparent);
-
- gf_stat_to_iatt (&rsp.prenewparent, &prenewparent);
- gf_stat_to_iatt (&rsp.postnewparent, &postnewparent);
- }
-
-out:
- STACK_UNWIND_STRICT (rename, frame, rsp.op_ret,
- gf_error_to_errno (rsp.op_errno),
- &stbuf, &preoldparent, &postoldparent,
- &preoldparent, &postoldparent);
-
- return 0;
-}
-
-int
-client3_1_link_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- call_frame_t *frame = NULL;
- gfs3_link_rsp rsp = {0,};
- struct iatt stbuf = {0,};
- struct iatt preparent = {0,};
- struct iatt postparent = {0,};
- int ret = 0;
- clnt_local_t *local = NULL;
- inode_t *inode = NULL;
-
- frame = myframe;
-
- local = frame->local;
- frame->local = NULL;
- inode = local->loc.inode;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOTCONN;
- goto out;
- }
-
- ret = xdr_to_link_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
- if (-1 != rsp.op_ret) {
- gf_stat_to_iatt (&rsp.stat, &stbuf);
-
- gf_stat_to_iatt (&rsp.preparent, &preparent);
- gf_stat_to_iatt (&rsp.postparent, &postparent);
- }
-
-out:
- frame->local = NULL;
- STACK_UNWIND_STRICT (link, frame, rsp.op_ret,
- gf_error_to_errno (rsp.op_errno), inode,
- &stbuf, &preparent, &postparent);
-
- client_local_wipe (local);
- return 0;
-}
-
-
-int
-client3_1_opendir_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- clnt_local_t *local = NULL;
- clnt_conf_t *conf = NULL;
- clnt_fd_ctx_t *fdctx = NULL;
- ino_t ino = 0;
- uint64_t gen = 0;
- call_frame_t *frame = NULL;
- fd_t *fd = NULL;
- int ret = 0;
- gfs3_opendir_rsp rsp = {0,};
-
- frame = myframe;
- local = frame->local;
-
- if (local->op) {
- local->op (req, iov, 1, myframe);
- return 0;
- }
-
- frame->local = NULL;
- conf = frame->this->private;
- fd = local->fd;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOTCONN;
- goto out;
- }
-
- ret = xdr_to_opendir_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
- if (-1 != rsp.op_ret) {
- fdctx = GF_CALLOC (1, sizeof (*fdctx),
- gf_client_mt_clnt_fdctx_t);
- if (!fdctx) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOMEM;
- goto out;
- }
-
- inode_ctx_get2 (fd->inode, frame->this, &ino, &gen);
-
- fdctx->remote_fd = rsp.fd;
- fdctx->inode = inode_ref (fd->inode);
- fdctx->ino = ino;
- fdctx->gen = gen;
-
- fdctx->is_dir = 1;
-
- INIT_LIST_HEAD (&fdctx->sfd_pos);
-
- this_fd_set_ctx (fd, frame->this, &local->loc, fdctx);
-
- pthread_mutex_lock (&conf->lock);
- {
- list_add_tail (&fdctx->sfd_pos, &conf->saved_fds);
- }
- pthread_mutex_unlock (&conf->lock);
- }
-
-out:
- frame->local = NULL;
- STACK_UNWIND_STRICT (opendir, frame, rsp.op_ret,
- gf_error_to_errno (rsp.op_errno), fd);
-
- client_local_wipe (local);
-
- return 0;
-}
-
-
-int
-client3_1_lookup_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- clnt_local_t *local = NULL;
- call_frame_t *frame = NULL;
- int ret = 0;
- gfs3_lookup_rsp rsp = {0,};
- struct iatt stbuf = {0,};
- struct iatt postparent = {0,};
- int op_errno = 0;
- ino_t oldino = 0;
- uint64_t oldgen = 0;
- dict_t *xattr = NULL;
- inode_t *inode = NULL;
- char *buf = NULL;
-
- frame = myframe;
- local = frame->local;
- inode = local->loc.inode;
- frame->local = NULL;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOTCONN;
- goto out;
- }
-
- ret = xdr_to_lookup_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
- op_errno = gf_error_to_errno (rsp.op_errno);
- gf_stat_to_iatt (&rsp.postparent, &postparent);
-
- if (rsp.op_ret == 0) {
- rsp.op_ret = -1;
- gf_stat_to_iatt (&rsp.stat, &stbuf);
-
- ret = inode_ctx_get2 (inode, frame->this, &oldino, &oldgen);
- if (oldino != stbuf.ia_ino || oldgen != stbuf.ia_gen) {
- if (oldino) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "LOOKUP %"PRId64"/%s (%s): "
- "inode number changed from "
- "{%"PRId64",%"PRId64"} to {%"PRId64",%"PRId64"}",
- local->loc.parent ?
- local->loc.parent->ino : (uint64_t) 0,
- local->loc.name,
- local->loc.path,
- oldgen, oldino, stbuf.ia_gen, stbuf.ia_ino);
- op_errno = ESTALE;
- goto out;
- }
-
- ret = inode_ctx_put2 (inode, frame->this,
- stbuf.ia_ino, stbuf.ia_gen);
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "LOOKUP %"PRId64"/%s (%s) : "
- "failed to set remote inode "
- "number to inode ctx",
- local->loc.parent ?
- local->loc.parent->ino : (uint64_t) 0,
- local->loc.name,
- local->loc.path);
- }
- }
-
- if (rsp.dict.dict_len > 0) {
- xattr = dict_new();
- GF_VALIDATE_OR_GOTO (frame->this->name, xattr, out);
-
- buf = memdup (rsp.dict.dict_val, rsp.dict.dict_len);
- GF_VALIDATE_OR_GOTO (frame->this->name, buf, out);
-
- ret = dict_unserialize (buf, rsp.dict.dict_len, &xattr);
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "%s (%"PRId64"): failed to "
- "unserialize dictionary",
- local->loc.path, inode->ino);
- op_errno = EINVAL;
- goto out;
- }
-
- xattr->extra_free = buf;
- buf = NULL;
- }
-
- rsp.op_ret = 0;
- }
-
-out:
- frame->local = NULL;
- STACK_UNWIND_STRICT (lookup, frame, rsp.op_ret, rsp.op_errno, inode,
- &stbuf, xattr, &postparent);
-
- client_local_wipe (local);
-
- if (xattr)
- dict_unref (xattr);
-
- if (rsp.dict.dict_val) {
- /* don't use GF_FREE, this memory was allocated by libc
- */
- free (rsp.dict.dict_val);
- rsp.dict.dict_val = NULL;
- }
-
- if (buf) {
- GF_FREE (buf);
- }
-
- return 0;
-}
-
-int
-client3_1_readv_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- call_frame_t *frame = NULL;
- struct iobref *iobref = NULL;
- struct iovec vector = {0,};
- struct iatt stat = {0,};
- gfs3_read_rsp rsp = {0,};
- int ret = 0;
-
- frame = myframe;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOTCONN;
- goto out;
- }
-
- ret = xdr_to_readv_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
- if (rsp.op_ret != -1) {
- iobref = iobref_new ();
- gf_stat_to_iatt (&rsp.stat, &stat);
- vector.iov_len = rsp.op_ret;
-
- if (rsp.op_ret > 0) {
- vector.iov_base = req->rsp_procpayload->ptr;
- iobref_add (iobref, req->rsp_procpayload);
- }
- }
-out:
- STACK_UNWIND_STRICT (readv, frame, rsp.op_ret,
- gf_error_to_errno (rsp.op_errno), &vector, 1,
- &stat, iobref);
-
- if (iobref) {
- iobref_unref (iobref);
- }
-
- return 0;
-}
-
-int
-client3_1_release_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- call_frame_t *frame = NULL;
-
- frame = myframe;
- STACK_DESTROY (frame->root);
- return 0;
-}
-int
-client3_1_releasedir_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- call_frame_t *frame = NULL;
-
- frame = myframe;
- STACK_DESTROY (frame->root);
- return 0;
-}
-
-int
-client_fdctx_destroy (xlator_t *this, clnt_fd_ctx_t *fdctx)
-{
- call_frame_t *fr = NULL;
- int32_t ret = -1;
-
- if (!fdctx)
- goto out;
-
- if (fdctx->remote_fd == -1)
- goto out;
-
- fr = create_frame (this, this->ctx->pool);
-
- if (fdctx->is_dir) {
- gfs3_releasedir_req req = {0,};
- req.fd = fdctx->remote_fd;
- req.gfs_id = GFS3_OP_RELEASEDIR;
- client_submit_request (this, &req, fr, &clnt3_1_fop_prog,
- GFS3_OP_RELEASEDIR, client3_1_releasedir_cbk,
- NULL, xdr_from_releasedir_req);
- } else {
- gfs3_release_req req = {0,};
- req.fd = fdctx->remote_fd;
- req.gfs_id = GFS3_OP_RELEASE;
- client_submit_request (this, &req, fr, &clnt3_1_fop_prog,
- GFS3_OP_RELEASE, client3_1_release_cbk, NULL,
- xdr_from_release_req);
- }
-
-out:
- if (fdctx) {
- fdctx->remote_fd = -1;
- inode_unref (fdctx->inode);
- GF_FREE (fdctx);
- }
-
- return ret;
-}
-
-int
-client3_1_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- int32_t ret = -1;
- gfs3_open_rsp rsp = {0,};
- clnt_local_t *local = NULL;
- clnt_conf_t *conf = NULL;
- clnt_fd_ctx_t *fdctx = NULL;
- call_frame_t *frame = NULL;
-
- frame = myframe;
-
- local = frame->local;
- conf = frame->this->private;
- fdctx = local->fdctx;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOTCONN;
- goto out;
- }
-
- ret = xdr_to_open_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "reopen on %s returned %d (%"PRId64")",
- local->loc.path, rsp.op_ret, rsp.fd);
-
- if (-1 != rsp.op_ret) {
- pthread_mutex_lock (&conf->lock);
- {
- fdctx->remote_fd = rsp.fd;
-
- if (!fdctx->released) {
- list_add_tail (&fdctx->sfd_pos, &conf->saved_fds);
- fdctx = NULL;
- }
- }
- pthread_mutex_unlock (&conf->lock);
- }
-
-out:
- if (fdctx)
- client_fdctx_destroy (frame->this, fdctx);
-
- frame->local = NULL;
- STACK_DESTROY (frame->root);
-
- client_local_wipe (local);
-
- return 0;
-}
-
-int
-client3_1_reopendir_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- int32_t ret = -1;
- gfs3_open_rsp rsp = {0,};
- clnt_local_t *local = NULL;
- clnt_conf_t *conf = NULL;
- clnt_fd_ctx_t *fdctx = NULL;
- call_frame_t *frame = NULL;
-
- frame = myframe;
- if (!frame || !frame->this)
- goto out;
-
- local = frame->local;
- frame->local = NULL;
- conf = frame->this->private;
- fdctx = local->fdctx;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = ENOTCONN;
- goto out;
- }
-
- ret = xdr_to_opendir_rsp (*iov, &rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "reopendir on %s returned %d (%"PRId64")",
- local->loc.path, rsp.op_ret, rsp.fd);
-
- if (fdctx) {
- pthread_mutex_lock (&conf->lock);
- {
- fdctx->remote_fd = rsp.fd;
-
- if (!fdctx->released) {
- list_add_tail (&fdctx->sfd_pos, &conf->saved_fds);
- fdctx = NULL;
- }
- }
- pthread_mutex_unlock (&conf->lock);
- }
-
-out:
- if (fdctx)
- client_fdctx_destroy (frame->this, fdctx);
-
- if (frame) {
- frame->local = NULL;
- STACK_DESTROY (frame->root);
- }
-
- client_local_wipe (local);
-
- return 0;
-}
-
-int
-protocol_client_reopendir (xlator_t *this, clnt_fd_ctx_t *fdctx)
-{
- int ret = -1;
- gfs3_opendir_req req = {0,};
- clnt_local_t *local = NULL;
- inode_t *inode = NULL;
- char *path = NULL;
- call_frame_t *frame = NULL;
- clnt_conf_t *conf = NULL;
-
- if (!this || !fdctx)
- goto out;
-
- inode = fdctx->inode;
- conf = this->private;
-
- ret = inode_path (inode, NULL, &path);
- if (ret < 0) {
- goto out;
- }
-
- local = GF_CALLOC (1, sizeof (*local), gf_client_mt_clnt_local_t);
- if (!local) {
- goto out;
- }
-
- local->fdctx = fdctx;
- local->op = client3_1_reopendir_cbk;
- local->loc.path = path;
- path = NULL;
-
- frame = create_frame (this, this->ctx->pool);
- if (!frame) {
- goto out;
- }
-
- req.ino = fdctx->ino;
- req.gen = fdctx->gen;
- req.path = (char *)local->loc.path;
- req.gfs_id = GFS3_OP_OPENDIR;
-
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "attempting reopen on %s", local->loc.path);
-
- frame->local = local; local = NULL;
-
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_OPENDIR,
- client3_1_opendir_cbk, NULL, xdr_from_opendir_req);
-
- return ret;
-
-out:
- if (frame) {
- frame->local = NULL;
- STACK_DESTROY (frame->root);
- }
-
- if (local)
- client_local_wipe (local);
-
- if (path)
- GF_FREE (path);
-
- return 0;
-
-}
-
-int
-protocol_client_reopen (xlator_t *this, clnt_fd_ctx_t *fdctx)
-{
- int ret = -1;
- gfs3_open_req req = {0,};
- clnt_local_t *local = NULL;
- inode_t *inode = NULL;
- char *path = NULL;
- call_frame_t *frame = NULL;
- clnt_conf_t *conf = NULL;
-
- if (!this || !fdctx)
- goto out;
-
- inode = fdctx->inode;
- conf = this->private;
-
- ret = inode_path (inode, NULL, &path);
- if (ret < 0) {
- goto out;
- }
-
- frame = create_frame (this, this->ctx->pool);
- if (!frame) {
- goto out;
- }
-
- local = GF_CALLOC (1, sizeof (*local), gf_client_mt_clnt_local_t);
- if (!local) {
- goto out;
- }
-
- local->fdctx = fdctx;
- local->op = client3_1_reopen_cbk;
- local->loc.path = path;
- path = NULL;
- frame->local = local;
-
- req.ino = fdctx->ino;
- req.gen = fdctx->gen;
- req.flags = gf_flags_from_flags (fdctx->flags);
- req.wbflags = fdctx->wbflags;
- req.path = (char *)local->loc.path;
- req.gfs_id = GFS3_OP_OPEN;
-
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "attempting reopen on %s", local->loc.path);
-
- local = NULL;
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_OPEN,
- client3_1_open_cbk, NULL, xdr_from_open_req);
-
- return ret;
-
-out:
- if (frame) {
- frame->local = NULL;
- STACK_DESTROY (frame->root);
- }
-
- if (local)
- client_local_wipe (local);
-
- if (path)
- GF_FREE (path);
-
- return 0;
-
-}
-
-
-
-int32_t
-client3_1_releasedir (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- clnt_conf_t *conf = NULL;
- clnt_fd_ctx_t *fdctx = NULL;
- clnt_args_t *args = NULL;
- gfs3_releasedir_req req = {0,};
- int64_t remote_fd = -1;
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
- conf = this->private;
-
- pthread_mutex_lock (&conf->lock);
- {
- fdctx = this_fd_del_ctx (args->fd, this);
- if (fdctx != NULL) {
- remote_fd = fdctx->remote_fd;
-
- /* fdctx->remote_fd == -1 indicates a reopen attempt
- in progress. Just mark ->released = 1 and let
- reopen_cbk handle releasing
- */
-
- if (remote_fd != -1)
- list_del_init (&fdctx->sfd_pos);
-
- fdctx->released = 1;
- }
- }
- pthread_mutex_unlock (&conf->lock);
-
- if (remote_fd != -1) {
- req.fd = remote_fd;
- req.gfs_id = GFS3_OP_RELEASEDIR;
- client_submit_request (this, &req, frame, conf->fops,
- GFS3_OP_RELEASEDIR, client3_1_releasedir_cbk,
- NULL, xdr_from_releasedir_req);
- inode_unref (fdctx->inode);
- GF_FREE (fdctx);
- }
-
- return 0;
-unwind:
- return 0;
-}
-
-int32_t
-client3_1_release (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- int64_t remote_fd = -1;
- clnt_conf_t *conf = NULL;
- clnt_fd_ctx_t *fdctx = NULL;
- clnt_args_t *args = NULL;
- gfs3_release_req req = {0,};
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
- conf = this->private;
-
- pthread_mutex_lock (&conf->lock);
- {
- fdctx = this_fd_del_ctx (args->fd, this);
- if (fdctx != NULL) {
- remote_fd = fdctx->remote_fd;
-
- /* fdctx->remote_fd == -1 indicates a reopen attempt
- in progress. Just mark ->released = 1 and let
- reopen_cbk handle releasing
- */
-
- if (remote_fd != -1)
- list_del_init (&fdctx->sfd_pos);
-
- fdctx->released = 1;
- }
- }
- pthread_mutex_unlock (&conf->lock);
-
- if (remote_fd != -1) {
- req.fd = remote_fd;
- req.gfs_id = GFS3_OP_RELEASE;
- client_submit_request (this, &req, frame, conf->fops,
- GFS3_OP_RELEASE, client3_1_release_cbk, NULL,
- xdr_from_release_req);
- inode_unref (fdctx->inode);
- GF_FREE (fdctx);
- }
- return 0;
-unwind:
- return 0;
-}
-
-
-int32_t
-client3_1_lookup (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- clnt_conf_t *conf = NULL;
- clnt_local_t *local = NULL;
- clnt_args_t *args = NULL;
- gfs3_lookup_req req = {0,};
- int ret = 0;
- size_t dict_len = 0;
- int op_errno = ESTALE;
-
- if (!frame || !this || !data)
- goto unwind;
-
- conf = this->private;
- args = data;
- local = GF_CALLOC (1, sizeof (*local), gf_client_mt_clnt_local_t);
- if (!local) {
- op_errno = ENOMEM;
- goto unwind;
- }
-
- loc_copy (&local->loc, args->loc);
- frame->local = local;
-
- if (args->loc->ino != 1 && args->loc->parent) {
- ret = inode_ctx_get2 (args->loc->parent, this,
- &req.par, &req.gen);
- if (args->loc->parent->ino && ret < 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "LOOKUP %"PRId64"/%s (%s): failed to get "
- "remote inode number for parent",
- args->loc->parent->ino, args->loc->name,
- args->loc->path);
- goto unwind;
- }
- GF_VALIDATE_OR_GOTO (this->name, args->loc->name, unwind);
- } else {
- req.ino = 1;
- }
-
- if (args->dict) {
- ret = dict_allocate_and_serialize (args->dict,
- &req.dict.dict_val,
- &dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get serialized length of dict");
- op_errno = EINVAL;
- goto unwind;
- }
- }
-
- req.path = (char *)args->loc->path;
- req.bname = (char *)args->loc->name;
- req.dict.dict_len = dict_len;
- req.gfs_id = GFS3_OP_LOOKUP;
-
- client_submit_request (this, &req, frame, conf->fops,
- GFS3_OP_LOOKUP, client3_1_lookup_cbk,
- NULL, xdr_from_lookup_req);
-
- if (req.dict.dict_val) {
- GF_FREE (req.dict.dict_val);
- }
-
- return 0;
-
-unwind:
- if (frame)
- frame->local = NULL;
-
- STACK_UNWIND_STRICT (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
-
- if (local)
- client_local_wipe (local);
-
- if (req.dict.dict_val)
- GF_FREE (req.dict.dict_val);
-
- return 0;
-}
-
-
-
-int32_t
-client3_1_stat (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- clnt_conf_t *conf = NULL;
- clnt_args_t *args = NULL;
- gfs3_stat_req req = {0,};
- int ret = 0;
- int op_errno = ESTALE;
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
-
- ret = inode_ctx_get2 (args->loc->inode, this, &req.ino, &req.gen);
- if (args->loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "STAT %"PRId64" (%s): "
- "failed to get remote inode number",
- args->loc->inode->ino, args->loc->path);
- goto unwind;
- }
- req.path = (char *)args->loc->path;
- req.gfs_id = GFS3_OP_STAT;
- conf = this->private;
-
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_STAT,
- client3_1_stat_cbk, NULL, xdr_from_stat_req);
-
- return 0;
-unwind:
- STACK_UNWIND_STRICT (stat, frame, -1, op_errno, NULL);
- return 0;
-}
-
-
-int32_t
-client3_1_truncate (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- clnt_conf_t *conf = NULL;
- clnt_args_t *args = NULL;
- gfs3_truncate_req req = {0,};
- int ret = 0;
- int op_errno = ESTALE;
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
-
- ret = inode_ctx_get2 (args->loc->inode, this, &req.ino, &req.gen);
- if (args->loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "STAT %"PRId64" (%s): "
- "failed to get remote inode number",
- args->loc->inode->ino, args->loc->path);
- goto unwind;
- }
- req.path = (char *)args->loc->path;
- req.offset = args->offset;
- req.gfs_id = GFS3_OP_TRUNCATE;
-
- conf = this->private;
-
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_TRUNCATE,
- client3_1_truncate_cbk, NULL, xdr_from_truncate_req);
-
- return 0;
-unwind:
- STACK_UNWIND_STRICT (truncate, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-client3_1_ftruncate (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- clnt_args_t *args = NULL;
- clnt_fd_ctx_t *fdctx = NULL;
- clnt_conf_t *conf = NULL;
- gfs3_ftruncate_req req = {0,};
- int op_errno = EINVAL;
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
-
- conf = this->private;
-
- pthread_mutex_lock (&conf->lock);
- {
- fdctx = this_fd_get_ctx (args->fd, this);
- }
- pthread_mutex_unlock (&conf->lock);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- args->fd->inode->ino);
- op_errno = EBADFD;
- goto unwind;
- }
-
- if (fdctx->remote_fd == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "(%"PRId64"): failed to get"
- " fd ctx. EBADFD", args->fd->inode->ino);
- op_errno = EBADFD;
- goto unwind;
- }
-
- req.offset = args->offset;
- req.fd = fdctx->remote_fd;
- req.gfs_id = GFS3_OP_FTRUNCATE;
-
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_FTRUNCATE,
- client3_1_ftruncate_cbk, NULL, xdr_from_ftruncate_req);
-
- return 0;
-unwind:
- STACK_UNWIND_STRICT (ftruncate, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-
-
-int32_t
-client3_1_access (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- clnt_conf_t *conf = NULL;
- clnt_args_t *args = NULL;
- gfs3_access_req req = {0,};
- int ret = 0;
- int op_errno = ESTALE;
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
-
- ret = inode_ctx_get2 (args->loc->inode, this, &req.ino, &req.gen);
- if (args->loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "STAT %"PRId64" (%s): "
- "failed to get remote inode number",
- args->loc->inode->ino, args->loc->path);
- goto unwind;
- }
- req.path = (char *)args->loc->path;
- req.mask = args->mask;
- req.gfs_id = GFS3_OP_ACCESS;
-
- conf = this->private;
-
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_ACCESS,
- client3_1_access_cbk, NULL, xdr_from_access_req);
-
- return 0;
-unwind:
- STACK_UNWIND_STRICT (access, frame, -1, op_errno);
- return 0;
-}
-
-int32_t
-client3_1_readlink (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- clnt_conf_t *conf = NULL;
- clnt_args_t *args = NULL;
- gfs3_readlink_req req = {0,};
- int ret = 0;
- int op_errno = ESTALE;
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
-
- ret = inode_ctx_get2 (args->loc->inode, this, &req.ino, &req.gen);
- if (args->loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "STAT %"PRId64" (%s): "
- "failed to get remote inode number",
- args->loc->inode->ino, args->loc->path);
- goto unwind;
- }
- req.path = (char *)args->loc->path;
- req.size = args->size;
- req.gfs_id = GFS3_OP_READLINK;
- conf = this->private;
-
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_READLINK,
- client3_1_readlink_cbk, NULL, xdr_from_readlink_req);
-
- return 0;
-unwind:
- STACK_UNWIND_STRICT (readlink, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-
-
-
-int32_t
-client3_1_unlink (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- clnt_conf_t *conf = NULL;
- clnt_args_t *args = NULL;
- gfs3_unlink_req req = {0,};
- int ret = 0;
- int op_errno = 0;
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
-
- ret = inode_ctx_get2 (args->loc->parent, this, &req.par, &req.gen);
- if (args->loc->parent->ino && ret < 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "STAT %"PRId64"/%s (%s): "
- "failed to get remote inode number for parent",
- args->loc->parent->ino, args->loc->name, args->loc->path);
- goto unwind;
- }
- req.path = (char *)args->loc->path;
- req.bname = (char *)args->loc->name;
- req.gfs_id = GFS3_OP_UNLINK;
- conf = this->private;
-
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_UNLINK,
- client3_1_unlink_cbk, NULL, xdr_from_unlink_req);
-
- return 0;
-unwind:
- STACK_UNWIND_STRICT (unlink, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-
-
-int32_t
-client3_1_rmdir (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- clnt_conf_t *conf = NULL;
- clnt_args_t *args = NULL;
- gfs3_rmdir_req req = {0,};
- int ret = 0;
- int op_errno = ESTALE;
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
-
- ret = inode_ctx_get2 (args->loc->parent, this, &req.par, &req.gen);
- if (args->loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "STAT %"PRId64"/%s (%s): "
- "failed to get remote inode number for parent",
- args->loc->parent->ino, args->loc->name, args->loc->path);
- goto unwind;
- }
- req.path = (char *)args->loc->path;
- req.bname = (char *)args->loc->name;
- req.gfs_id = GFS3_OP_RMDIR;
- conf = this->private;
-
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_RMDIR,
- client3_1_rmdir_cbk, NULL, xdr_from_rmdir_req);
-
- return 0;
-unwind:
- STACK_UNWIND_STRICT (rmdir, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-
-
-int32_t
-client3_1_symlink (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- clnt_local_t *local = NULL;
- clnt_conf_t *conf = NULL;
- clnt_args_t *args = NULL;
- gfs3_symlink_req req = {0,};
- int ret = 0;
- int op_errno = ESTALE;
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
- local = GF_CALLOC (1, sizeof (*local), gf_client_mt_clnt_local_t);
- if (!local) {
- op_errno = ENOMEM;
- goto unwind;
- }
-
- loc_copy (&local->loc, args->loc);
- frame->local = local;
-
- ret = inode_ctx_get2 (args->loc->parent, this, &req.par, &req.gen);
- if (args->loc->parent->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "SYMLINK %"PRId64"/%s (%s): failed to get remote inode"
- " number parent",
- args->loc->parent->ino, args->loc->name,
- args->loc->path);
- goto unwind;
- }
-
- req.path = (char *)args->loc->path;
- req.linkname = (char *)args->linkname;
- req.bname = (char *)args->loc->name;
- req.gfs_id = GFS3_OP_SYMLINK;
- conf = this->private;
-
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_SYMLINK,
- client3_1_symlink_cbk, NULL, xdr_from_symlink_req);
-
- return 0;
-unwind:
- if (frame)
- frame->local = NULL;
-
- STACK_UNWIND_STRICT (symlink, frame, -1, op_errno, NULL, NULL, NULL, NULL);
-
- if (local)
- client_local_wipe (local);
- return 0;
-}
-
-
-
-int32_t
-client3_1_rename (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- clnt_conf_t *conf = NULL;
- clnt_args_t *args = NULL;
- gfs3_rename_req req = {0,};
- int ret = 0;
- int op_errno = ESTALE;
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
-
- ret = inode_ctx_get2 (args->oldloc->parent, this,
- &req.oldpar, &req.oldgen);
- if (args->oldloc->parent->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "RENAME %"PRId64"/%s (%s): failed to get remote inode "
- "number for source parent", args->oldloc->parent->ino,
- args->oldloc->name, args->oldloc->path);
- goto unwind;
- }
-
- ret = inode_ctx_get2 (args->newloc->parent, this, &req.newpar,
- &req.newgen);
- if (args->newloc->parent->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "CREATE %"PRId64"/%s (%s): failed to get remote inode "
- "number for destination parent",
- args->newloc->parent->ino, args->newloc->name,
- args->newloc->path);
- goto unwind;
- }
-
- req.oldpath = (char *)args->oldloc->path;
- req.oldbname = (char *)args->oldloc->name;
- req.newpath = (char *)args->newloc->path;
- req.newbname = (char *)args->newloc->name;
- req.gfs_id = GFS3_OP_RENAME;
- conf = this->private;
-
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_RENAME,
- client3_1_rename_cbk, NULL, xdr_from_rename_req);
-
- return 0;
-unwind:
- STACK_UNWIND_STRICT (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
- return 0;
-}
-
-
-
-int32_t
-client3_1_link (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- clnt_local_t *local = NULL;
- clnt_conf_t *conf = NULL;
- clnt_args_t *args = NULL;
- gfs3_link_req req = {0,};
- int ret = 0;
- int op_errno = ESTALE;
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
-
- local = GF_CALLOC (1, sizeof (*local), gf_client_mt_clnt_local_t);
- if (!local) {
- op_errno = ENOMEM;
- goto unwind;
- }
-
- loc_copy (&local->loc, args->oldloc);
- frame->local = local;
-
- ret = inode_ctx_get2 (args->oldloc->inode, this,
- &req.oldino, &req.oldgen);
- if (args->oldloc->parent->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "RENAME %"PRId64"/%s (%s): failed to get remote inode "
- "number for source parent", args->oldloc->parent->ino,
- args->oldloc->name, args->oldloc->path);
- goto unwind;
- }
-
- ret = inode_ctx_get2 (args->newloc->parent, this, &req.newpar,
- &req.newgen);
- if (args->newloc->parent->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "CREATE %"PRId64"/%s (%s): failed to get remote inode "
- "number for destination parent",
- args->newloc->parent->ino, args->newloc->name,
- args->newloc->path);
- goto unwind;
- }
-
- req.oldpath = (char *)args->oldloc->path;
- req.newpath = (char *)args->newloc->path;
- req.newbname = (char *)args->newloc->name;
- req.gfs_id = GFS3_OP_LINK;
- conf = this->private;
-
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_LINK,
- client3_1_link_cbk, NULL, xdr_from_link_req);
-
- return 0;
-unwind:
- STACK_UNWIND_STRICT (link, frame, -1, op_errno, NULL, NULL, NULL, NULL);
- return 0;
-}
-
-
-
-int32_t
-client3_1_mknod (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- clnt_local_t *local = NULL;
- clnt_conf_t *conf = NULL;
- clnt_args_t *args = NULL;
- gfs3_mknod_req req = {0,};
- int ret = 0;
- int op_errno = ESTALE;
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
-
- local = GF_CALLOC (1, sizeof (*local), gf_client_mt_clnt_local_t);
- if (!local) {
- op_errno = ENOMEM;
- goto unwind;
- }
-
- loc_copy (&local->loc, args->loc);
- frame->local = local;
-
- ret = inode_ctx_get2 (args->loc->parent, this, &req.par, &req.gen);
- if (args->loc->parent->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "SYMLINK %"PRId64"/%s (%s): failed to get remote inode"
- " number parent",
- args->loc->parent->ino, args->loc->name,
- args->loc->path);
- goto unwind;
- }
-
- req.path = (char *)args->loc->path;
- req.bname = (char *)args->loc->name;
- req.mode = args->mode;
- req.dev = args->rdev;
- req.gfs_id = GFS3_OP_MKNOD;
-
- conf = this->private;
-
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_MKNOD,
- client3_1_mknod_cbk, NULL, xdr_from_mknod_req);
-
- return 0;
-unwind:
- if (frame)
- frame->local = NULL;
-
- STACK_UNWIND_STRICT (mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL);
-
- if (local)
- client_local_wipe (local);
- return 0;
-}
-
-
-
-int32_t
-client3_1_mkdir (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- clnt_local_t *local = NULL;
- clnt_conf_t *conf = NULL;
- clnt_args_t *args = NULL;
- gfs3_mkdir_req req = {0,};
- int ret = 0;
- int op_errno = ESTALE;
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
-
- local = GF_CALLOC (1, sizeof (*local), gf_client_mt_clnt_local_t);
- if (!local) {
- op_errno = ENOMEM;
- goto unwind;
- }
-
- loc_copy (&local->loc, args->loc);
- frame->local = local;
-
- ret = inode_ctx_get2 (args->loc->parent, this, &req.par, &req.gen);
- if (args->loc->parent->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "SYMLINK %"PRId64"/%s (%s): failed to get remote inode"
- " number parent",
- args->loc->parent->ino, args->loc->name,
- args->loc->path);
- goto unwind;
- }
-
- req.path = (char *)args->loc->path;
- req.bname = (char *)args->loc->name;
- req.mode = args->mode;
- req.gfs_id = GFS3_OP_MKDIR;
-
- conf = this->private;
-
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_MKDIR,
- client3_1_mkdir_cbk, NULL, xdr_from_mkdir_req);
-
- return 0;
-unwind:
- if (frame)
- frame->local = NULL;
-
- STACK_UNWIND_STRICT (mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL);
-
- if (local)
- client_local_wipe (local);
- return 0;
-}
-
-
-int32_t
-client3_1_create (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- clnt_local_t *local = NULL;
- clnt_conf_t *conf = NULL;
- clnt_args_t *args = NULL;
- gfs3_create_req req = {0,};
- int ret = 0;
- int op_errno = ESTALE;
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
-
- local = GF_CALLOC (1, sizeof (*local), gf_client_mt_clnt_local_t);
- if (!local) {
- op_errno = ENOMEM;
- goto unwind;
- }
- local->fd = fd_ref (args->fd);
- local->flags = args->flags;
- loc_copy (&local->loc, args->loc);
- frame->local = local;
-
- ret = inode_ctx_get2 (args->loc->parent, this, &req.par, &req.gen);
- if (args->loc->parent->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "SYMLINK %"PRId64"/%s (%s): failed to get remote inode"
- " number parent",
- args->loc->parent->ino, args->loc->name,
- args->loc->path);
- goto unwind;
- }
-
- req.path = (char *)args->loc->path;
- req.bname = (char *)args->loc->name;
- req.mode = args->mode;
- req.flags = gf_flags_from_flags (args->flags);
- req.gfs_id = GFS3_OP_CREATE;
-
- conf = this->private;
-
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_CREATE,
- client3_1_create_cbk, NULL, xdr_from_create_req);
-
- return 0;
-unwind:
- if (frame)
- frame->local = NULL;
-
- STACK_UNWIND_STRICT (create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
- if (local)
- client_local_wipe (local);
- return 0;
-}
-
-
-
-int32_t
-client3_1_open (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- clnt_local_t *local = NULL;
- clnt_conf_t *conf = NULL;
- clnt_args_t *args = NULL;
- gfs3_open_req req = {0,};
- int ret = 0;
- int op_errno = ESTALE;
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
-
- local = GF_CALLOC (1, sizeof (*local), gf_client_mt_clnt_local_t);
- if (!local) {
- op_errno = ENOMEM;
- goto unwind;
- }
- local->fd = fd_ref (args->fd);
- local->flags = args->flags;
- local->wbflags = args->wbflags;
- loc_copy (&local->loc, args->loc);
- frame->local = local;
-
- ret = inode_ctx_get2 (args->loc->inode, this, &req.ino, &req.gen);
- if (args->loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "OPEN %"PRId64" (%s): "
- "failed to get remote inode number",
- args->loc->inode->ino, args->loc->path);
- goto unwind;
- }
- req.flags = gf_flags_from_flags (args->flags);
- req.wbflags = args->wbflags;
- req.path = (char *)args->loc->path;
- req.gfs_id = GFS3_OP_OPEN;
-
- conf = this->private;
-
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_OPEN,
- client3_1_open_cbk, NULL, xdr_from_open_req);
-
- return 0;
-unwind:
- if (frame)
- frame->local = NULL;
-
- STACK_UNWIND_STRICT (open, frame, -1, op_errno, NULL);
-
- if (local)
- client_local_wipe (local);
- return 0;
-}
-
-
-
-int32_t
-client3_1_readv (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- clnt_args_t *args = NULL;
- clnt_fd_ctx_t *fdctx = NULL;
- clnt_conf_t *conf = NULL;
- int op_errno = ESTALE;
- gfs3_read_req req = {0,};
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
- conf = this->private;
-
- pthread_mutex_lock (&conf->lock);
- {
- fdctx = this_fd_get_ctx (args->fd, this);
- }
- pthread_mutex_unlock (&conf->lock);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- args->fd->inode->ino);
- op_errno = EBADFD;
- goto unwind;
- }
-
- if (fdctx->remote_fd == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "(%"PRId64"): failed to get"
- " fd ctx. EBADFD", args->fd->inode->ino);
- op_errno = EBADFD;
- goto unwind;
- }
-
- req.size = args->size;
- req.offset = args->offset;
- req.fd = fdctx->remote_fd;
- req.gfs_id = GFS3_OP_READ;
-
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_READ,
- client3_1_readv_cbk, NULL, xdr_from_readv_req);
-
- return 0;
-unwind:
- STACK_UNWIND_STRICT (readv, frame, -1, op_errno, NULL, 0, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-client3_1_writev (call_frame_t *frame, xlator_t *this, void *data)
-{
- clnt_args_t *args = NULL;
- clnt_fd_ctx_t *fdctx = NULL;
- clnt_conf_t *conf = NULL;
- gfs3_write_req req = {0,};
- int op_errno = ESTALE;
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
- conf = this->private;
-
- pthread_mutex_lock (&conf->lock);
- {
- fdctx = this_fd_get_ctx (args->fd, this);
- }
- pthread_mutex_unlock (&conf->lock);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- args->fd->inode->ino);
- op_errno = EBADFD;
- goto unwind;
- }
-
- if (fdctx->remote_fd == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "(%"PRId64"): failed to get"
- " fd ctx. EBADFD", args->fd->inode->ino);
- op_errno = EBADFD;
- goto unwind;
- }
-
- req.size = args->size;
- req.offset = args->offset;
- req.fd = fdctx->remote_fd;
- req.gfs_id = GFS3_OP_WRITE;
-
- client_submit_vec_request (this, &req, frame, conf->fops, GFS3_OP_WRITE,
- client3_1_writev_cbk,
- args->vector, args->count,
- args->iobref, xdr_from_writev_req);
-
- return 0;
-unwind:
- STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-
-
-int32_t
-client3_1_flush (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- clnt_args_t *args = NULL;
- gfs3_flush_req req = {0,};
- clnt_fd_ctx_t *fdctx = NULL;
- clnt_conf_t *conf = NULL;
- int op_errno = ESTALE;
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
- conf = this->private;
-
- pthread_mutex_lock (&conf->lock);
- {
- fdctx = this_fd_get_ctx (args->fd, this);
- }
- pthread_mutex_unlock (&conf->lock);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- args->fd->inode->ino);
- op_errno = EBADFD;
- goto unwind;
- }
-
- if (fdctx->remote_fd == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "(%"PRId64"): failed to get"
- " fd ctx. EBADFD", args->fd->inode->ino);
- op_errno = EBADFD;
- goto unwind;
- }
-
- req.fd = fdctx->remote_fd;
- req.gfs_id = GFS3_OP_FLUSH;
-
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_FLUSH,
- client3_1_flush_cbk, NULL, xdr_from_flush_req);
-
- return 0;
-unwind:
- STACK_UNWIND_STRICT (flush, frame, -1, op_errno);
- return 0;
-}
-
-
-
-int32_t
-client3_1_fsync (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- clnt_args_t *args = NULL;
- gfs3_fsync_req req = {0,};
- clnt_fd_ctx_t *fdctx = NULL;
- clnt_conf_t *conf = NULL;
- int op_errno = 0;
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
- conf = this->private;
-
- pthread_mutex_lock (&conf->lock);
- {
- fdctx = this_fd_get_ctx (args->fd, this);
- }
- pthread_mutex_unlock (&conf->lock);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- args->fd->inode->ino);
- op_errno = EBADFD;
- goto unwind;
- }
-
- if (fdctx->remote_fd == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "(%"PRId64"): failed to get"
- " fd ctx. EBADFD", args->fd->inode->ino);
- op_errno = EBADFD;
- goto unwind;
- }
-
- req.fd = fdctx->remote_fd;
- req.data = args->flags;
- req.gfs_id = GFS3_OP_FSYNC;
-
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_FSYNC,
- client3_1_fsync_cbk, NULL, xdr_from_fsync_req);
-
- return 0;
-unwind:
- STACK_UNWIND_STRICT (fsync, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-
-
-int32_t
-client3_1_fstat (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- clnt_args_t *args = NULL;
- gfs3_fstat_req req = {0,};
- clnt_fd_ctx_t *fdctx = NULL;
- clnt_conf_t *conf = NULL;
- int op_errno = ESTALE;
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
- conf = this->private;
-
- pthread_mutex_lock (&conf->lock);
- {
- fdctx = this_fd_get_ctx (args->fd, this);
- }
- pthread_mutex_unlock (&conf->lock);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- args->fd->inode->ino);
- op_errno = EBADFD;
- goto unwind;
- }
-
- if (fdctx->remote_fd == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "(%"PRId64"): failed to get"
- " fd ctx. EBADFD", args->fd->inode->ino);
- op_errno = EBADFD;
- goto unwind;
- }
-
- req.fd = fdctx->remote_fd;
- req.gfs_id = GFS3_OP_FSTAT;
-
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_FSTAT,
- client3_1_fstat_cbk, NULL, xdr_from_fstat_req);
-
- return 0;
-unwind:
- STACK_UNWIND_STRICT (fstat, frame, -1, op_errno, NULL);
- return 0;
-}
-
-
-
-int32_t
-client3_1_opendir (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- clnt_local_t *local = NULL;
- clnt_conf_t *conf = NULL;
- clnt_args_t *args = NULL;
- gfs3_opendir_req req = {0,};
- int ret = 0;
- int op_errno = ESTALE;
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
-
- local = GF_CALLOC (1, sizeof (*local), gf_client_mt_clnt_local_t);
- if (!local) {
- op_errno = ENOMEM;
- goto unwind;
- }
- local->fd = fd_ref (args->fd);
- loc_copy (&local->loc, args->loc);
- frame->local = local;
-
- ret = inode_ctx_get2 (args->loc->inode, this, &req.ino, &req.gen);
- if (args->loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "OPEN %"PRId64" (%s): "
- "failed to get remote inode number",
- args->loc->inode->ino, args->loc->path);
- goto unwind;
- }
- req.path = (char *)args->loc->path;
- req.gfs_id = GFS3_OP_OPENDIR;
-
- conf = this->private;
-
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_OPENDIR,
- client3_1_opendir_cbk, NULL, xdr_from_opendir_req);
-
- return 0;
-unwind:
- if (frame)
- frame->local = NULL;
- STACK_UNWIND_STRICT (opendir, frame, -1, op_errno, NULL);
- if (local)
- client_local_wipe (local);
- return 0;
-}
-
-
-
-int32_t
-client3_1_fsyncdir (call_frame_t *frame, xlator_t *this, void *data)
-{
- clnt_args_t *args = NULL;
- clnt_fd_ctx_t *fdctx = NULL;
- clnt_conf_t *conf = NULL;
- int op_errno = ESTALE;
- gfs3_fsyncdir_req req = {0,};
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
- conf = this->private;
-
- pthread_mutex_lock (&conf->lock);
- {
- fdctx = this_fd_get_ctx (args->fd, this);
- }
- pthread_mutex_unlock (&conf->lock);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- args->fd->inode->ino);
- op_errno = EBADFD;
- goto unwind;
- }
-
- if (fdctx->remote_fd == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "(%"PRId64"): failed to get"
- " fd ctx. EBADFD", args->fd->inode->ino);
- op_errno = EBADFD;
- goto unwind;
- }
-
- req.fd = fdctx->remote_fd;
- req.data = args->flags;
- req.gfs_id = GFS3_OP_FSYNCDIR;
-
- conf = this->private;
-
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_FSYNCDIR,
- client3_1_fsyncdir_cbk, NULL, xdr_from_fsyncdir_req);
-
- return 0;
-unwind:
- STACK_UNWIND_STRICT (fsyncdir, frame, -1, op_errno);
- return 0;
-}
-
-
-
-int32_t
-client3_1_statfs (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- clnt_conf_t *conf = NULL;
- clnt_args_t *args = NULL;
- gfs3_statfs_req req = {0,};
- int ret = 0;
- int op_errno = ESTALE;
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
-
- if (args->loc->inode) {
- ret = inode_ctx_get2 (args->loc->inode, this,
- &req.ino, &req.gen);
- if (args->loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "STATFS %"PRId64" (%s): "
- "failed to get remote inode number",
- args->loc->inode->ino, args->loc->path);
- goto unwind;
- }
- }
- req.path = (char *)args->loc->path;
- req.gfs_id = GFS3_OP_STATFS;
-
- conf = this->private;
-
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_STATFS,
- client3_1_statfs_cbk, NULL, xdr_from_statfs_req);
-
- return 0;
-unwind:
- STACK_UNWIND_STRICT (statfs, frame, -1, op_errno, NULL);
- return 0;
-}
-
-
-
-int32_t
-client3_1_setxattr (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- clnt_conf_t *conf = NULL;
- clnt_args_t *args = NULL;
- gfs3_setxattr_req req = {0,};
- int ret = 0;
- size_t dict_len = 0;
- int op_errno = ESTALE;
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
-
- ret = inode_ctx_get2 (args->loc->inode, this, &req.ino, &req.gen);
- if (args->loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "SETXATTR %"PRId64" (%s): "
- "failed to get remote inode number",
- args->loc->inode->ino, args->loc->path);
- goto unwind;
- }
- if (args->dict) {
- ret = dict_allocate_and_serialize (args->dict,
- &req.dict.dict_val,
- &dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get serialized dict");
- op_errno = EINVAL;
- goto unwind;
- }
- req.dict.dict_len = dict_len;
- }
- req.flags = args->flags;
- req.path = (char *)args->loc->path;
- req.gfs_id = GFS3_OP_SETXATTR;
-
- conf = this->private;
-
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_SETXATTR,
- client3_1_setxattr_cbk, NULL, xdr_from_setxattr_req);
-
- if (req.dict.dict_val) {
- GF_FREE (req.dict.dict_val);
- }
-
- return 0;
-unwind:
- STACK_UNWIND_STRICT (setxattr, frame, -1, op_errno);
- if (req.dict.dict_val) {
- GF_FREE (req.dict.dict_val);
- }
- return 0;
-}
-
-
-
-int32_t
-client3_1_fsetxattr (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- clnt_args_t *args = NULL;
- clnt_fd_ctx_t *fdctx = NULL;
- clnt_conf_t *conf = NULL;
- gfs3_fsetxattr_req req = {0,};
- int op_errno = ESTALE;
- int ret = 0;
- size_t dict_len = 0;
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
- conf = this->private;
-
- pthread_mutex_lock (&conf->lock);
- {
- fdctx = this_fd_get_ctx (args->fd, this);
- }
- pthread_mutex_unlock (&conf->lock);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- args->fd->inode->ino);
- op_errno = EBADFD;
- goto unwind;
- }
-
- if (fdctx->remote_fd == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "(%"PRId64"): failed to get"
- " fd ctx. EBADFD", args->fd->inode->ino);
- op_errno = EBADFD;
- goto unwind;
- }
-
- req.fd = fdctx->remote_fd;
- req.flags = args->flags;
- req.ino = args->fd->inode->ino;
- req.gfs_id = GFS3_OP_FSETXATTR;
-
- if (args->dict) {
- ret = dict_allocate_and_serialize (args->dict,
- &req.dict.dict_val,
- &dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get serialized dict");
- goto unwind;
- }
- req.dict.dict_len = dict_len;
- }
-
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_FSETXATTR,
- client3_1_fsetxattr_cbk, NULL, xdr_from_fsetxattr_req);
-
- if (req.dict.dict_val) {
- GF_FREE (req.dict.dict_val);
- }
-
- return 0;
-unwind:
- STACK_UNWIND_STRICT (fsetxattr, frame, -1, op_errno);
- if (req.dict.dict_val) {
- GF_FREE (req.dict.dict_val);
- }
- return 0;
-}
-
-
-
-
-int32_t
-client3_1_fgetxattr (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- clnt_args_t *args = NULL;
- clnt_fd_ctx_t *fdctx = NULL;
- clnt_conf_t *conf = NULL;
- gfs3_fgetxattr_req req = {0,};
- int op_errno = ESTALE;
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
- conf = this->private;
-
- pthread_mutex_lock (&conf->lock);
- {
- fdctx = this_fd_get_ctx (args->fd, this);
- }
- pthread_mutex_unlock (&conf->lock);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- args->fd->inode->ino);
- op_errno = EBADFD;
- goto unwind;
- }
-
- if (fdctx->remote_fd == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "(%"PRId64"): failed to get"
- " fd ctx. EBADFD", args->fd->inode->ino);
- op_errno = EBADFD;
- goto unwind;
- }
-
- req.namelen = 1; /* Use it as a flag */
- req.fd = fdctx->remote_fd;
- req.name = (char *)args->name;
- if (!req.name) {
- req.name = "";
- req.namelen = 0;
- }
- req.gfs_id = GFS3_OP_FGETXATTR;
-
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_FGETXATTR,
- client3_1_fgetxattr_cbk, NULL, xdr_from_fgetxattr_req);
-
- return 0;
-unwind:
- STACK_UNWIND_STRICT (fgetxattr, frame, -1, op_errno, NULL);
- return 0;
-}
-
-
-
-int32_t
-client3_1_getxattr (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- clnt_conf_t *conf = NULL;
- clnt_args_t *args = NULL;
- gfs3_getxattr_req req = {0,};
- int ret = 0;
- int op_errno = ESTALE;
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
-
- ret = inode_ctx_get2 (args->loc->inode, this, &req.ino, &req.gen);
- if (args->loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "STAT %"PRId64" (%s): "
- "failed to get remote inode number",
- args->loc->inode->ino, args->loc->path);
- goto unwind;
- }
-
- req.namelen = 1; /* Use it as a flag */
- req.path = (char *)args->loc->path;
- req.name = (char *)args->name;
- if (!req.name) {
- req.name = "";
- req.namelen = 0;
- }
- req.gfs_id = GFS3_OP_GETXATTR;
-
- conf = this->private;
-
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_GETXATTR,
- client3_1_getxattr_cbk, NULL, xdr_from_getxattr_req);
-
- return 0;
-unwind:
- STACK_UNWIND_STRICT (getxattr, frame, -1, op_errno, NULL);
- return 0;
-}
-
-
-
-int32_t
-client3_1_xattrop (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- clnt_conf_t *conf = NULL;
- clnt_args_t *args = NULL;
- gfs3_xattrop_req req = {0,};
- int ret = 0;
- size_t dict_len = 0;
- int op_errno = ESTALE;
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
-
- ret = inode_ctx_get2 (args->loc->inode, this, &req.ino, &req.gen);
- if (args->loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "SETXATTR %"PRId64" (%s): "
- "failed to get remote inode number",
- args->loc->inode->ino, args->loc->path);
- goto unwind;
- }
- if (args->dict) {
- ret = dict_allocate_and_serialize (args->dict,
- &req.dict.dict_val,
- &dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get serialized dict");
- op_errno = EINVAL;
- goto unwind;
- }
- req.dict.dict_len = dict_len;
- }
- req.flags = args->flags;
- req.path = (char *)args->loc->path;
- req.gfs_id = GFS3_OP_XATTROP;
-
- conf = this->private;
-
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_XATTROP,
- client3_1_xattrop_cbk, NULL, xdr_from_xattrop_req);
-
- if (req.dict.dict_val) {
- GF_FREE (req.dict.dict_val);
- }
- return 0;
-unwind:
- STACK_UNWIND_STRICT (xattrop, frame, -1, op_errno, NULL);
- if (req.dict.dict_val) {
- GF_FREE (req.dict.dict_val);
- }
- return 0;
-}
-
-
-
-int32_t
-client3_1_fxattrop (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- clnt_args_t *args = NULL;
- clnt_fd_ctx_t *fdctx = NULL;
- clnt_conf_t *conf = NULL;
- gfs3_fxattrop_req req = {0,};
- int op_errno = ESTALE;
- int ret = 0;
- size_t dict_len = 0;
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
- conf = this->private;
-
- pthread_mutex_lock (&conf->lock);
- {
- fdctx = this_fd_get_ctx (args->fd, this);
- }
- pthread_mutex_unlock (&conf->lock);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- args->fd->inode->ino);
- op_errno = EBADFD;
- goto unwind;
- }
-
- if (fdctx->remote_fd == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "(%"PRId64"): failed to get"
- " fd ctx. EBADFD", args->fd->inode->ino);
- op_errno = EBADFD;
- goto unwind;
- }
-
- req.fd = fdctx->remote_fd;
- req.flags = args->flags;
- req.ino = args->fd->inode->ino;
- req.gfs_id = GFS3_OP_FXATTROP;
-
- if (args->dict) {
- ret = dict_allocate_and_serialize (args->dict,
- &req.dict.dict_val,
- &dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get serialized dict");
- goto unwind;
- }
- req.dict.dict_len = dict_len;
- }
-
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_FXATTROP,
- client3_1_fxattrop_cbk, NULL, xdr_from_fxattrop_req);
- if (req.dict.dict_val) {
- GF_FREE (req.dict.dict_val);
- }
-
- return 0;
-unwind:
- STACK_UNWIND_STRICT (fxattrop, frame, -1, op_errno, NULL);
- if (req.dict.dict_val) {
- GF_FREE (req.dict.dict_val);
- }
- return 0;
-}
-
-
-
-int32_t
-client3_1_removexattr (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- clnt_conf_t *conf = NULL;
- clnt_args_t *args = NULL;
- gfs3_removexattr_req req = {0,};
- int ret = 0;
- int op_errno = ESTALE;
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
-
- ret = inode_ctx_get2 (args->loc->inode, this, &req.ino, &req.gen);
- if (args->loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "REMOVEXATTR %"PRId64" (%s): "
- "failed to get remote inode number",
- args->loc->inode->ino, args->loc->path);
- goto unwind;
- }
- req.path = (char *)args->loc->path;
- req.name = (char *)args->name;
- req.gfs_id = GFS3_OP_REMOVEXATTR;
-
- conf = this->private;
-
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_REMOVEXATTR,
- client3_1_removexattr_cbk, NULL, xdr_from_removexattr_req);
-
- return 0;
-unwind:
- STACK_UNWIND_STRICT (removexattr, frame, -1, op_errno);
- return 0;
-}
-
-
-int32_t
-client3_1_lk (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- clnt_args_t *args = NULL;
- gfs3_lk_req req = {0,};
- int32_t gf_cmd = 0;
- int32_t gf_type = 0;
- clnt_fd_ctx_t *fdctx = NULL;
- clnt_conf_t *conf = NULL;
- int op_errno = ESTALE;
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
- conf = this->private;
-
- pthread_mutex_lock (&conf->lock);
- {
- fdctx = this_fd_get_ctx (args->fd, this);
- }
- pthread_mutex_unlock (&conf->lock);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- args->fd->inode->ino);
- op_errno = EBADFD;
- goto unwind;
- }
-
- if (fdctx->remote_fd == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "(%"PRId64"): failed to get"
- " fd ctx. EBADFD", args->fd->inode->ino);
- op_errno = EBADFD;
- goto unwind;
- }
-
- if (args->cmd == F_GETLK || args->cmd == F_GETLK64)
- gf_cmd = GF_LK_GETLK;
- else if (args->cmd == F_SETLK || args->cmd == F_SETLK64)
- gf_cmd = GF_LK_SETLK;
- else if (args->cmd == F_SETLKW || args->cmd == F_SETLKW64)
- gf_cmd = GF_LK_SETLKW;
- else {
- gf_log (this->name, GF_LOG_DEBUG,
- "Unknown cmd (%d)!", gf_cmd);
- goto unwind;
- }
-
- switch (args->flock->l_type) {
- case F_RDLCK:
- gf_type = GF_LK_F_RDLCK;
- break;
- case F_WRLCK:
- gf_type = GF_LK_F_WRLCK;
- break;
- case F_UNLCK:
- gf_type = GF_LK_F_UNLCK;
- break;
- }
-
- req.fd = fdctx->remote_fd;
- req.cmd = gf_cmd;
- req.type = gf_type;
- gf_flock_from_flock (&req.flock, args->flock);
- req.gfs_id = GFS3_OP_LK;
-
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_LK,
- client3_1_lk_cbk, NULL, xdr_from_lk_req);
-
- return 0;
-unwind:
- STACK_UNWIND_STRICT (lk, frame, -1, op_errno, NULL);
- return 0;
-}
-
-
-int32_t
-client3_1_inodelk (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- clnt_conf_t *conf = NULL;
- clnt_args_t *args = NULL;
- gfs3_inodelk_req req = {0,};
- int ret = 0;
- int32_t gf_cmd = 0;
- int32_t gf_type = 0;
- int op_errno = ESTALE;
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
- ret = inode_ctx_get2 (args->loc->inode, this, &req.ino, &req.gen);
- if (args->loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "INODELK %"PRId64" (%s): "
- "failed to get remote inode number",
- args->loc->inode->ino, args->loc->path);
- goto unwind;
- }
-
- if (args->cmd == F_GETLK || args->cmd == F_GETLK64)
- gf_cmd = GF_LK_GETLK;
- else if (args->cmd == F_SETLK || args->cmd == F_SETLK64)
- gf_cmd = GF_LK_SETLK;
- else if (args->cmd == F_SETLKW || args->cmd == F_SETLKW64)
- gf_cmd = GF_LK_SETLKW;
- else {
- gf_log (this->name, GF_LOG_DEBUG,
- "Unknown cmd (%d)!", gf_cmd);
- op_errno = EINVAL;
- goto unwind;
- }
-
- switch (args->flock->l_type) {
- case F_RDLCK:
- gf_type = GF_LK_F_RDLCK;
- break;
- case F_WRLCK:
- gf_type = GF_LK_F_WRLCK;
- break;
- case F_UNLCK:
- gf_type = GF_LK_F_UNLCK;
- break;
- }
-
- req.path = (char *)args->loc->path;
- req.volume = (char *)args->volume;
- req.cmd = gf_cmd;
- req.type = gf_type;
- gf_flock_from_flock (&req.flock, args->flock);
- req.gfs_id = GFS3_OP_INODELK;
-
- conf = this->private;
-
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_INODELK,
- client3_1_inodelk_cbk, NULL, xdr_from_inodelk_req);
-
- return 0;
-unwind:
- STACK_UNWIND_STRICT (inodelk, frame, -1, op_errno);
- return 0;
-}
-
-
-
-int32_t
-client3_1_finodelk (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- clnt_args_t *args = NULL;
- gfs3_finodelk_req req = {0,};
- int32_t gf_cmd = 0;
- int32_t gf_type = 0;
- clnt_fd_ctx_t *fdctx = NULL;
- clnt_conf_t *conf = NULL;
- int op_errno = ESTALE;
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
- conf = this->private;
-
- pthread_mutex_lock (&conf->lock);
- {
- fdctx = this_fd_get_ctx (args->fd, this);
- }
- pthread_mutex_unlock (&conf->lock);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- args->fd->inode->ino);
- op_errno = EBADFD;
- goto unwind;
- }
-
- if (fdctx->remote_fd == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "(%"PRId64"): failed to get"
- " fd ctx. EBADFD", args->fd->inode->ino);
- op_errno = EBADFD;
- goto unwind;
- }
-
- if (args->cmd == F_GETLK || args->cmd == F_GETLK64)
- gf_cmd = GF_LK_GETLK;
- else if (args->cmd == F_SETLK || args->cmd == F_SETLK64)
- gf_cmd = GF_LK_SETLK;
- else if (args->cmd == F_SETLKW || args->cmd == F_SETLKW64)
- gf_cmd = GF_LK_SETLKW;
- else {
- gf_log (this->name, GF_LOG_DEBUG,
- "Unknown cmd (%d)!", gf_cmd);
- goto unwind;
- }
-
- switch (args->flock->l_type) {
- case F_RDLCK:
- gf_type = GF_LK_F_RDLCK;
- break;
- case F_WRLCK:
- gf_type = GF_LK_F_WRLCK;
- break;
- case F_UNLCK:
- gf_type = GF_LK_F_UNLCK;
- break;
- }
-
- req.volume = (char *)args->volume;
- req.fd = fdctx->remote_fd;
- req.cmd = gf_cmd;
- req.type = gf_type;
- gf_flock_from_flock (&req.flock, args->flock);
- req.gfs_id = GFS3_OP_FINODELK;
-
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_FINODELK,
- client3_1_finodelk_cbk, NULL, xdr_from_finodelk_req);
-
- return 0;
-unwind:
- STACK_UNWIND_STRICT (finodelk, frame, -1, op_errno);
- return 0;
-}
-
-
-int32_t
-client3_1_entrylk (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- clnt_conf_t *conf = NULL;
- clnt_args_t *args = NULL;
- gfs3_entrylk_req req = {0,};
- int ret = 0;
- int op_errno = ESTALE;
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
-
- ret = inode_ctx_get2 (args->loc->inode, this, &req.ino, &req.gen);
- if (args->loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "STAT %"PRId64" (%s): "
- "failed to get remote inode number",
- args->loc->inode->ino, args->loc->path);
- goto unwind;
- }
- req.path = (char *)args->loc->path;
- req.cmd = args->cmd_entrylk;
- req.type = args->type;
- req.volume = (char *)args->volume;
- req.name = "";
- if (args->basename) {
- req.name = (char *)args->basename;
- req.namelen = 1;
- }
- req.gfs_id = GFS3_OP_ENTRYLK;
-
- conf = this->private;
-
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_ENTRYLK,
- client3_1_entrylk_cbk, NULL, xdr_from_entrylk_req);
-
- return 0;
-unwind:
- STACK_UNWIND_STRICT (entrylk, frame, -1, op_errno);
- return 0;
-}
-
-
-
-int32_t
-client3_1_fentrylk (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- clnt_args_t *args = NULL;
- gfs3_fentrylk_req req = {0,};
- clnt_fd_ctx_t *fdctx = NULL;
- clnt_conf_t *conf = NULL;
- int op_errno = ESTALE;
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
- conf = this->private;
-
- pthread_mutex_lock (&conf->lock);
- {
- fdctx = this_fd_get_ctx (args->fd, this);
- }
- pthread_mutex_unlock (&conf->lock);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- args->fd->inode->ino);
- op_errno = EBADFD;
- goto unwind;
- }
-
- if (fdctx->remote_fd == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "(%"PRId64"): failed to get"
- " fd ctx. EBADFD", args->fd->inode->ino);
- op_errno = EBADFD;
- goto unwind;
- }
-
- req.fd = fdctx->remote_fd;
- req.cmd = args->cmd_entrylk;
- req.type = args->type;
- req.volume = (char *)args->volume;
- req.name = "";
- if (args->basename) {
- req.name = (char *)args->basename;
- req.namelen = 1;
- }
- req.gfs_id = GFS3_OP_FENTRYLK;
-
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_FENTRYLK,
- client3_1_fentrylk_cbk, NULL, xdr_from_fentrylk_req);
-
- return 0;
-unwind:
- STACK_UNWIND_STRICT (fentrylk, frame, -1, op_errno);
- return 0;
-}
-
-
-
-
-int32_t
-client3_1_checksum (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- clnt_conf_t *conf = NULL;
- clnt_args_t *args = NULL;
- gfs3_checksum_req req = {0,};
- int ret = 0;
- int op_errno = ESTALE;
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
-
- ret = inode_ctx_get2 (args->loc->inode, this, &req.ino, &req.gen);
- if (args->loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "STAT %"PRId64" (%s): "
- "failed to get remote inode number",
- args->loc->inode->ino, args->loc->path);
- goto unwind;
- }
- req.path = (char *)args->loc->path;
- req.flag = args->flags;
- req.gfs_id = GFS3_OP_CHECKSUM;
-
- conf = this->private;
-
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_CHECKSUM,
- client3_1_checksum_cbk, NULL, xdr_from_checksum_req);
-
- return 0;
-unwind:
- STACK_UNWIND_STRICT (checksum, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-
-
-int32_t
-client3_1_rchecksum (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- clnt_args_t *args = NULL;
- clnt_fd_ctx_t *fdctx = NULL;
- clnt_conf_t *conf = NULL;
- gfs3_rchecksum_req req = {0,};
- int op_errno = ESTALE;
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
- conf = this->private;
-
- pthread_mutex_lock (&conf->lock);
- {
- fdctx = this_fd_get_ctx (args->fd, this);
- }
- pthread_mutex_unlock (&conf->lock);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- args->fd->inode->ino);
- op_errno = EBADFD;
- goto unwind;
- }
-
- if (fdctx->remote_fd == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "(%"PRId64"): failed to get"
- " fd ctx. EBADFD", args->fd->inode->ino);
- op_errno = EBADFD;
- goto unwind;
- }
-
- req.len = args->len;
- req.offset = args->offset;
- req.fd = fdctx->remote_fd;
- req.gfs_id = GFS3_OP_RCHECKSUM;
-
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_RCHECKSUM,
- client3_1_rchecksum_cbk, NULL, xdr_from_rchecksum_req);
-
- return 0;
-unwind:
- STACK_UNWIND_STRICT (rchecksum, frame, -1, op_errno, 0, NULL);
- return 0;
-}
-
-
-
-int32_t
-client3_1_readdir (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- clnt_args_t *args = NULL;
- clnt_fd_ctx_t *fdctx = NULL;
- clnt_conf_t *conf = NULL;
- gfs3_readdir_req req = {0,};
- int op_errno = ESTALE;
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
- conf = this->private;
-
- pthread_mutex_lock (&conf->lock);
- {
- fdctx = this_fd_get_ctx (args->fd, this);
- }
- pthread_mutex_unlock (&conf->lock);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- args->fd->inode->ino);
- op_errno = EBADFD;
- goto unwind;
- }
-
- if (fdctx->remote_fd == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "(%"PRId64"): failed to get"
- " fd ctx. EBADFD", args->fd->inode->ino);
- op_errno = EBADFD;
- goto unwind;
- }
-
- req.size = args->size;
- req.offset = args->offset;
- req.fd = fdctx->remote_fd;
- req.gfs_id = GFS3_OP_READDIR;
-
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_READDIR,
- client3_1_readdir_cbk, NULL, xdr_from_readdir_req);
-
- return 0;
-unwind:
- STACK_UNWIND_STRICT (readdir, frame, -1, op_errno, NULL);
- return 0;
-}
-
-
-int32_t
-client3_1_readdirp (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- clnt_args_t *args = NULL;
- gfs3_readdirp_req req = {0,};
- clnt_fd_ctx_t *fdctx = NULL;
- clnt_conf_t *conf = NULL;
- int op_errno = ESTALE;
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
- conf = this->private;
-
- pthread_mutex_lock (&conf->lock);
- {
- fdctx = this_fd_get_ctx (args->fd, this);
- }
- pthread_mutex_unlock (&conf->lock);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- args->fd->inode->ino);
- op_errno = EBADFD;
- goto unwind;
- }
-
- if (fdctx->remote_fd == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "(%"PRId64"): failed to get"
- " fd ctx. EBADFD", args->fd->inode->ino);
- op_errno = EBADFD;
- goto unwind;
- }
-
- req.size = args->size;
- req.offset = args->offset;
- req.fd = fdctx->remote_fd;
- req.gfs_id = GFS3_OP_READDIRP;
-
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_READDIRP,
- client3_1_readdirp_cbk, NULL, xdr_from_readdirp_req);
-
- return 0;
-unwind:
- STACK_UNWIND_STRICT (readdirp, frame, -1, op_errno, NULL);
- return 0;
-}
-
-
-int32_t
-client3_1_setattr (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- clnt_conf_t *conf = NULL;
- clnt_args_t *args = NULL;
- gfs3_setattr_req req = {0,};
- int ret = 0;
- int op_errno = ESTALE;
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
-
- ret = inode_ctx_get2 (args->loc->inode, this, &req.ino, &req.gen);
- if (args->loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "STAT %"PRId64" (%s): "
- "failed to get remote inode number",
- args->loc->inode->ino, args->loc->path);
- goto unwind;
- }
- req.path = (char *)args->loc->path;
- req.valid = args->valid;
- gf_stat_from_iatt (&req.stbuf, args->stbuf);
- req.gfs_id = GFS3_OP_SETATTR;
-
- conf = this->private;
-
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_SETATTR,
- client3_1_setattr_cbk, NULL, xdr_from_setattr_req);
-
- return 0;
-unwind:
- STACK_UNWIND_STRICT (setattr, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-int32_t
-client3_1_fsetattr (call_frame_t *frame, xlator_t *this, void *data)
-{
- clnt_args_t *args = NULL;
- clnt_fd_ctx_t *fdctx = NULL;
- clnt_conf_t *conf = NULL;
- gfs3_fsetattr_req req = {0,};
- int op_errno = ESTALE;
-
- if (!frame || !this || !data)
- goto unwind;
-
- args = data;
- conf = this->private;
-
- pthread_mutex_lock (&conf->lock);
- {
- fdctx = this_fd_get_ctx (args->fd, this);
- }
- pthread_mutex_unlock (&conf->lock);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- args->fd->inode->ino);
- op_errno = EBADFD;
- goto unwind;
- }
-
- if (fdctx->remote_fd == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "(%"PRId64"): failed to get"
- " fd ctx. EBADFD", args->fd->inode->ino);
- op_errno = EBADFD;
- goto unwind;
- }
-
- req.fd = fdctx->remote_fd;
- req.valid = args->valid;
- gf_stat_from_iatt (&req.stbuf, args->stbuf);
- req.gfs_id = GFS3_OP_FSETATTR;
-
- client_submit_request (this, &req, frame, conf->fops, GFS3_OP_FSETATTR,
- client3_1_fsetattr_cbk, NULL, xdr_from_fsetattr_req);
-
- return 0;
-unwind:
- STACK_UNWIND_STRICT (fsetattr, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-
-
-/* Table Specific to FOPS */
-
-
-rpc_clnt_procedure_t clnt3_1_fop_actors[GF_FOP_MAXVALUE] = {
- [GF_FOP_NULL] = { "NULL", NULL},
- [GF_FOP_STAT] = { "STAT", client3_1_stat },
- [GF_FOP_READLINK] = { "READLINK", client3_1_readlink },
- [GF_FOP_MKNOD] = { "MKNOD", client3_1_mknod },
- [GF_FOP_MKDIR] = { "MKDIR", client3_1_mkdir },
- [GF_FOP_UNLINK] = { "UNLINK", client3_1_unlink },
- [GF_FOP_RMDIR] = { "RMDIR", client3_1_rmdir },
- [GF_FOP_SYMLINK] = { "SYMLINK", client3_1_symlink },
- [GF_FOP_RENAME] = { "RENAME", client3_1_rename },
- [GF_FOP_LINK] = { "LINK", client3_1_link },
- [GF_FOP_TRUNCATE] = { "TRUNCATE", client3_1_truncate },
- [GF_FOP_OPEN] = { "OPEN", client3_1_open },
- [GF_FOP_READ] = { "READ", client3_1_readv },
- [GF_FOP_WRITE] = { "WRITE", client3_1_writev },
- [GF_FOP_STATFS] = { "STATFS", client3_1_statfs },
- [GF_FOP_FLUSH] = { "FLUSH", client3_1_flush },
- [GF_FOP_FSYNC] = { "FSYNC", client3_1_fsync },
- [GF_FOP_SETXATTR] = { "SETXATTR", client3_1_setxattr },
- [GF_FOP_GETXATTR] = { "GETXATTR", client3_1_getxattr },
- [GF_FOP_REMOVEXATTR] = { "REMOVEXATTR", client3_1_removexattr },
- [GF_FOP_OPENDIR] = { "OPENDIR", client3_1_opendir },
- [GF_FOP_FSYNCDIR] = { "FSYNCDIR", client3_1_fsyncdir },
- [GF_FOP_ACCESS] = { "ACCESS", client3_1_access },
- [GF_FOP_CREATE] = { "CREATE", client3_1_create },
- [GF_FOP_FTRUNCATE] = { "FTRUNCATE", client3_1_ftruncate },
- [GF_FOP_FSTAT] = { "FSTAT", client3_1_fstat },
- [GF_FOP_LK] = { "LK", client3_1_lk },
- [GF_FOP_LOOKUP] = { "LOOKUP", client3_1_lookup },
- [GF_FOP_READDIR] = { "READDIR", client3_1_readdir },
- [GF_FOP_INODELK] = { "INODELK", client3_1_inodelk },
- [GF_FOP_FINODELK] = { "FINODELK", client3_1_finodelk },
- [GF_FOP_ENTRYLK] = { "ENTRYLK", client3_1_entrylk },
- [GF_FOP_FENTRYLK] = { "FENTRYLK", client3_1_fentrylk },
- [GF_FOP_CHECKSUM] = { "CHECKSUM", client3_1_checksum },
- [GF_FOP_XATTROP] = { "XATTROP", client3_1_xattrop },
- [GF_FOP_FXATTROP] = { "FXATTROP", client3_1_fxattrop },
- [GF_FOP_FGETXATTR] = { "FGETXATTR", client3_1_fgetxattr },
- [GF_FOP_FSETXATTR] = { "FSETXATTR", client3_1_fsetxattr },
- [GF_FOP_RCHECKSUM] = { "RCHECKSUM", client3_1_rchecksum },
- [GF_FOP_SETATTR] = { "SETATTR", client3_1_setattr },
- [GF_FOP_FSETATTR] = { "FSETATTR", client3_1_fsetattr },
- [GF_FOP_READDIRP] = { "READDIRP", client3_1_readdirp },
- [GF_FOP_RELEASE] = { "RELEASE", client3_1_release },
- [GF_FOP_RELEASEDIR] = { "RELEASEDIR", client3_1_releasedir },
- [GF_FOP_GETSPEC] = { "GETSPEC", client3_getspec },
-};
-
-/* Used From RPC-CLNT library to log proper name of procedure based on number */
-char *clnt3_1_fop_names[GFS3_OP_MAXVALUE] = {
- [GFS3_OP_NULL] = "NULL",
- [GFS3_OP_STAT] = "STAT",
- [GFS3_OP_READLINK] = "READLINK",
- [GFS3_OP_MKNOD] = "MKNOD",
- [GFS3_OP_MKDIR] = "MKDIR",
- [GFS3_OP_UNLINK] = "UNLINK",
- [GFS3_OP_RMDIR] = "RMDIR",
- [GFS3_OP_SYMLINK] = "SYMLINK",
- [GFS3_OP_RENAME] = "RENAME",
- [GFS3_OP_LINK] = "LINK",
- [GFS3_OP_TRUNCATE] = "TRUNCATE",
- [GFS3_OP_OPEN] = "OPEN",
- [GFS3_OP_READ] = "READ",
- [GFS3_OP_WRITE] = "WRITE",
- [GFS3_OP_STATFS] = "STATFS",
- [GFS3_OP_FLUSH] = "FLUSH",
- [GFS3_OP_FSYNC] = "FSYNC",
- [GFS3_OP_SETXATTR] = "SETXATTR",
- [GFS3_OP_GETXATTR] = "GETXATTR",
- [GFS3_OP_REMOVEXATTR] = "REMOVEXATTR",
- [GFS3_OP_OPENDIR] = "OPENDIR",
- [GFS3_OP_FSYNCDIR] = "FSYNCDIR",
- [GFS3_OP_ACCESS] = "ACCESS",
- [GFS3_OP_CREATE] = "CREATE",
- [GFS3_OP_FTRUNCATE] = "FTRUNCATE",
- [GFS3_OP_FSTAT] = "FSTAT",
- [GFS3_OP_LK] = "LK",
- [GFS3_OP_LOOKUP] = "LOOKUP",
- [GFS3_OP_READDIR] = "READDIR",
- [GFS3_OP_INODELK] = "INODELK",
- [GFS3_OP_FINODELK] = "FINODELK",
- [GFS3_OP_ENTRYLK] = "ENTRYLK",
- [GFS3_OP_FENTRYLK] = "FENTRYLK",
- [GFS3_OP_CHECKSUM] = "CHECKSUM",
- [GFS3_OP_XATTROP] = "XATTROP",
- [GFS3_OP_FXATTROP] = "FXATTROP",
- [GFS3_OP_FGETXATTR] = "FGETXATTR",
- [GFS3_OP_FSETXATTR] = "FSETXATTR",
- [GFS3_OP_RCHECKSUM] = "RCHECKSUM",
- [GFS3_OP_SETATTR] = "SETATTR",
- [GFS3_OP_FSETATTR] = "FSETATTR",
- [GFS3_OP_READDIRP] = "READDIRP",
- [GFS3_OP_RELEASE] = "RELEASE",
- [GFS3_OP_RELEASEDIR] = "RELEASEDIR",
-};
-
-rpc_clnt_prog_t clnt3_1_fop_prog = {
- .progname = "GlusterFS 3.1",
- .prognum = GLUSTER3_1_FOP_PROGRAM,
- .progver = GLUSTER3_1_FOP_VERSION,
- .numproc = GLUSTER3_1_FOP_PROCCNT,
- .proctable = clnt3_1_fop_actors,
- .procnames = clnt3_1_fop_names,
-};
-
-rpc_clnt_prog_t clnt3_1_mgmt_prog = {
- .progname = "Gluster Mgmt 3.1",
- .prognum = GLUSTER1_MGMT_PROGRAM,
- .progver = GLUSTER1_MGMT_VERSION,
-};
diff --git a/xlators/protocol/legacy/Makefile.am b/xlators/protocol/legacy/Makefile.am
deleted file mode 100644
index 9914863021c..00000000000
--- a/xlators/protocol/legacy/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = lib transport client server
-
-CLEANFILES =
diff --git a/xlators/protocol/legacy/client/src/Makefile.am b/xlators/protocol/legacy/client/src/Makefile.am
deleted file mode 100644
index 2ae64ebd0fd..00000000000
--- a/xlators/protocol/legacy/client/src/Makefile.am
+++ /dev/null
@@ -1,21 +0,0 @@
-
-xlator_LTLIBRARIES = client-old.la
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/legacy/protocol
-
-client_old_la_LDFLAGS = -module -avoidversion
-
-client_old_la_SOURCES = client-protocol.c saved-frames.c
-
-client_old_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
- $(top_builddir)/xlators/protocol/legacy/lib/src/libgfproto.la
-
-noinst_HEADERS = client-protocol.h saved-frames.h client-mem-types.h
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) \
- -I$(top_srcdir)/xlators/protocol/legacy/lib/src
-
-CLEANFILES =
-
-install-data-hook:
- ln -sf client-old.so $(DESTDIR)$(xlatordir)/client.so
diff --git a/xlators/protocol/legacy/client/src/client-mem-types.h b/xlators/protocol/legacy/client/src/client-mem-types.h
deleted file mode 100644
index 1eee8d93159..00000000000
--- a/xlators/protocol/legacy/client/src/client-mem-types.h
+++ /dev/null
@@ -1,43 +0,0 @@
-
-/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef __CLIENT_MEM_TYPES_H__
-#define __CLIENT_MEM_TYPES_H__
-
-#include "mem-types.h"
-
-enum gf_client_mem_types_ {
- gf_client_mt_dir_entry_t = gf_common_mt_end + 1,
- gf_client_mt_volfile_ctx,
- gf_client_mt_client_state_t,
- gf_client_mt_client_conf_t,
- gf_client_mt_locker,
- gf_client_mt_lock_table,
- gf_client_mt_char,
- gf_client_mt_client_connection_t,
- gf_client_mt_client_fd_ctx_t,
- gf_client_mt_client_local_t,
- gf_client_mt_saved_frames,
- gf_client_mt_saved_frame,
- gf_client_mt_end
-};
-#endif
-
diff --git a/xlators/protocol/legacy/client/src/client-protocol.c b/xlators/protocol/legacy/client/src/client-protocol.c
deleted file mode 100644
index 0dc5bee5d56..00000000000
--- a/xlators/protocol/legacy/client/src/client-protocol.c
+++ /dev/null
@@ -1,6739 +0,0 @@
-/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-#include <inttypes.h>
-
-
-#include "glusterfs.h"
-#include "client-protocol.h"
-#include "compat.h"
-#include "dict.h"
-#include "protocol.h"
-#include "transport.h"
-#include "xlator.h"
-#include "logging.h"
-#include "timer.h"
-#include "defaults.h"
-#include "compat.h"
-#include "compat-errno.h"
-#include "statedump.h"
-#include "client-mem-types.h"
-
-#include <sys/resource.h>
-#include <inttypes.h>
-
-/* for default_*_cbk functions */
-#include "defaults.c"
-#include "saved-frames.h"
-#include "common-utils.h"
-
-int protocol_client_cleanup (transport_t *trans);
-int protocol_client_interpret (xlator_t *this, transport_t *trans,
- char *hdr_p, size_t hdrlen,
- struct iobuf *iobuf);
-int
-protocol_client_xfer (call_frame_t *frame, xlator_t *this, transport_t *trans,
- int type, int op,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iovec *vector, int count,
- struct iobref *iobref);
-
-int
-protocol_client_post_handshake (call_frame_t *frame, xlator_t *this);
-
-static gf_op_t gf_fops[GF_PROTO_FOP_MAXVALUE];
-static gf_op_t gf_mops[GF_MOP_MAXVALUE];
-static gf_op_t gf_cbks[GF_CBK_MAXVALUE];
-
-
-transport_t *
-client_channel (xlator_t *this, int id)
-{
- transport_t *trans = NULL;
- client_conf_t *conf = NULL;
- int i = 0;
- struct client_connection *conn = NULL;
-
- conf = this->private;
-
- trans = conf->transport[id];
- conn = trans->xl_private;
-
- if (conn->connected == 1)
- goto ret;
-
- for (i = 0; i < CHANNEL_MAX; i++) {
- trans = conf->transport[i];
- conn = trans->xl_private;
- if (conn->connected == 1)
- break;
- }
-
-ret:
- return trans;
-}
-
-
-client_fd_ctx_t *
-this_fd_del_ctx (fd_t *file, xlator_t *this)
-{
- int dict_ret = -1;
- uint64_t ctxaddr = 0;
-
- GF_VALIDATE_OR_GOTO ("client", this, out);
- GF_VALIDATE_OR_GOTO (this->name, file, out);
-
- dict_ret = fd_ctx_del (file, this, &ctxaddr);
-
- if (dict_ret < 0) {
- ctxaddr = 0;
- }
-
-out:
- return (client_fd_ctx_t *)(unsigned long)ctxaddr;
-}
-
-
-client_fd_ctx_t *
-this_fd_get_ctx (fd_t *file, xlator_t *this)
-{
- int dict_ret = -1;
- uint64_t ctxaddr = 0;
-
- GF_VALIDATE_OR_GOTO ("client", this, out);
- GF_VALIDATE_OR_GOTO (this->name, file, out);
-
- dict_ret = fd_ctx_get (file, this, &ctxaddr);
-
- if (dict_ret < 0) {
- ctxaddr = 0;
- }
-
-out:
- return (client_fd_ctx_t *)(unsigned long)ctxaddr;
-}
-
-
-static void
-this_fd_set_ctx (fd_t *file, xlator_t *this, loc_t *loc, client_fd_ctx_t *ctx)
-{
- uint64_t oldaddr = 0;
- int32_t ret = -1;
-
- GF_VALIDATE_OR_GOTO ("client", this, out);
- GF_VALIDATE_OR_GOTO (this->name, file, out);
-
- ret = fd_ctx_get (file, this, &oldaddr);
- if (ret >= 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s (%"PRId64"): trying duplicate remote fd set. ",
- loc->path, loc->inode->ino);
- }
-
- ret = fd_ctx_set (file, this, (uint64_t)(unsigned long)ctx);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s (%"PRId64"): failed to set remote fd",
- loc->path, loc->inode->ino);
- }
-out:
- return;
-}
-
-
-static int
-client_local_wipe (client_local_t *local)
-{
- if (local) {
- loc_wipe (&local->loc);
-
- if (local->fd)
- fd_unref (local->fd);
-
- GF_FREE (local);
- }
-
- return 0;
-}
-
-/*
- * lookup_frame - lookup call frame corresponding to a given callid
- * @trans: transport object
- * @callid: call id of the frame
- *
- * not for external reference
- */
-
-static call_frame_t *
-lookup_frame (transport_t *trans, int32_t op, int8_t type, int64_t callid)
-{
- client_connection_t *conn = NULL;
- call_frame_t *frame = NULL;
-
- conn = trans->xl_private;
-
- pthread_mutex_lock (&conn->lock);
- {
- frame = saved_frames_get (conn->saved_frames,
- op, type, callid);
- }
- pthread_mutex_unlock (&conn->lock);
-
- return frame;
-}
-
-
-static void
-call_bail (void *data)
-{
- client_connection_t *conn = NULL;
- struct timeval current;
- transport_t *trans = NULL;
- struct list_head list;
- struct saved_frame *saved_frame = NULL;
- struct saved_frame *trav = NULL;
- struct saved_frame *tmp = NULL;
- call_frame_t *frame = NULL;
- gf_hdr_common_t hdr = {0, };
- char **gf_op_list = NULL;
- gf_op_t *gf_ops = NULL;
- struct tm frame_sent_tm;
- char frame_sent[32] = {0,};
- struct timeval timeout = {0,};
- gf_timer_cbk_t timer_cbk = NULL;
-
- GF_VALIDATE_OR_GOTO ("client", data, out);
- trans = data;
-
- conn = trans->xl_private;
-
- gettimeofday (&current, NULL);
- INIT_LIST_HEAD (&list);
-
- pthread_mutex_lock (&conn->lock);
- {
- /* Chaining to get call-always functionality from
- call-once timer */
- if (conn->timer) {
- timer_cbk = conn->timer->callbk;
-
- timeout.tv_sec = 10;
- timeout.tv_usec = 0;
-
- gf_timer_call_cancel (trans->xl->ctx, conn->timer);
- conn->timer = gf_timer_call_after (trans->xl->ctx,
- timeout,
- timer_cbk,
- trans);
- if (conn->timer == NULL) {
- gf_log (trans->xl->name, GF_LOG_DEBUG,
- "Cannot create bailout timer");
- }
- }
-
- do {
- saved_frame =
- saved_frames_get_timedout (conn->saved_frames,
- GF_OP_TYPE_MOP_REQUEST,
- conn->frame_timeout,
- &current);
- if (saved_frame)
- list_add (&saved_frame->list, &list);
-
- } while (saved_frame);
-
- do {
- saved_frame =
- saved_frames_get_timedout (conn->saved_frames,
- GF_OP_TYPE_FOP_REQUEST,
- conn->frame_timeout,
- &current);
- if (saved_frame)
- list_add (&saved_frame->list, &list);
- } while (saved_frame);
-
- do {
- saved_frame =
- saved_frames_get_timedout (conn->saved_frames,
- GF_OP_TYPE_CBK_REQUEST,
- conn->frame_timeout,
- &current);
- if (saved_frame)
- list_add (&saved_frame->list, &list);
- } while (saved_frame);
- }
- pthread_mutex_unlock (&conn->lock);
-
- hdr.rsp.op_ret = hton32 (-1);
- hdr.rsp.op_errno = hton32 (ENOTCONN);
-
- list_for_each_entry_safe (trav, tmp, &list, list) {
- switch (trav->type)
- {
- case GF_OP_TYPE_FOP_REQUEST:
- gf_ops = gf_fops;
- gf_op_list = gf_fop_list;
- break;
- case GF_OP_TYPE_MOP_REQUEST:
- gf_ops = gf_mops;
- gf_op_list = gf_mop_list;
- break;
- case GF_OP_TYPE_CBK_REQUEST:
- gf_ops = gf_cbks;
- gf_op_list = gf_cbk_list;
- break;
- default:
- goto out;
- }
-
- localtime_r (&trav->saved_at.tv_sec, &frame_sent_tm);
- strftime (frame_sent, 32, "%Y-%m-%d %H:%M:%S", &frame_sent_tm);
-
- gf_log (trans->xl->name, GF_LOG_ERROR,
- "bailing out frame %s(%d) "
- "frame sent = %s. frame-timeout = %d",
- gf_op_list[trav->op], trav->op,
- frame_sent, conn->frame_timeout);
-
- hdr.type = hton32 (trav->type);
- hdr.op = hton32 (trav->op);
-
- frame = trav->frame;
-
- gf_ops[trav->op] (frame, &hdr, sizeof (hdr), NULL);
-
- list_del_init (&trav->list);
- GF_FREE (trav);
- }
-out:
- return;
-}
-
-
-void
-save_frame (transport_t *trans, call_frame_t *frame,
- int32_t op, int8_t type, uint64_t callid)
-{
- client_connection_t *conn = NULL;
- struct timeval timeout = {0, };
-
-
- conn = trans->xl_private;
-
- saved_frames_put (conn->saved_frames, frame, op, type, callid);
-
- if (conn->timer == NULL && conn->frame_timeout) {
- timeout.tv_sec = 10;
- timeout.tv_usec = 0;
- conn->timer = gf_timer_call_after (trans->xl->ctx, timeout,
- call_bail, (void *) trans);
- }
-}
-
-
-
-void
-client_ping_timer_expired (void *data)
-{
- xlator_t *this = NULL;
- transport_t *trans = NULL;
- client_conf_t *conf = NULL;
- client_connection_t *conn = NULL;
- int disconnect = 0;
- int transport_activity = 0;
- struct timeval timeout = {0, };
- struct timeval current = {0, };
-
- trans = data;
- this = trans->xl;
- conf = this->private;
- conn = trans->xl_private;
-
- pthread_mutex_lock (&conn->lock);
- {
- if (conn->ping_timer)
- gf_timer_call_cancel (trans->xl->ctx,
- conn->ping_timer);
- gettimeofday (&current, NULL);
-
- pthread_mutex_lock (&conf->mutex);
- {
- if (((current.tv_sec - conf->last_received.tv_sec) <
- conn->ping_timeout)
- || ((current.tv_sec - conf->last_sent.tv_sec) <
- conn->ping_timeout)) {
- transport_activity = 1;
- }
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (transport_activity) {
- gf_log (this->name, GF_LOG_TRACE,
- "ping timer expired but transport activity "
- "detected - not bailing transport");
- conn->transport_activity = 0;
- timeout.tv_sec = conn->ping_timeout;
- timeout.tv_usec = 0;
-
- conn->ping_timer =
- gf_timer_call_after (trans->xl->ctx, timeout,
- client_ping_timer_expired,
- (void *) trans);
- if (conn->ping_timer == NULL)
- gf_log (this->name, GF_LOG_DEBUG,
- "unable to setup timer");
-
- } else {
- conn->ping_started = 0;
- conn->ping_timer = NULL;
- disconnect = 1;
- }
- }
- pthread_mutex_unlock (&conn->lock);
- if (disconnect) {
- gf_log (this->name, GF_LOG_ERROR,
- "Server %s has not responded in the last %d "
- "seconds, disconnecting.",
- conf->transport[0]->peerinfo.identifier,
- conn->ping_timeout);
-
- transport_disconnect (conf->transport[0]);
- transport_disconnect (conf->transport[1]);
- }
-}
-
-
-void
-client_start_ping (void *data)
-{
- xlator_t *this = NULL;
- transport_t *trans = NULL;
- client_conf_t *conf = NULL;
- client_connection_t *conn = NULL;
- int32_t ret = -1;
- gf_hdr_common_t *hdr = NULL;
- struct timeval timeout = {0, };
- call_frame_t *dummy_frame = NULL;
- size_t hdrlen = -1;
- gf_mop_ping_req_t *req = NULL;
- int frame_count = 0;
-
-
- trans = data;
- this = trans->xl;
- conf = this->private;
- conn = trans->xl_private;
-
- if (!conn->ping_timeout)
- return;
-
- pthread_mutex_lock (&conn->lock);
- {
- if (conn->ping_timer)
- gf_timer_call_cancel (trans->xl->ctx, conn->ping_timer);
-
- conn->ping_timer = NULL;
- conn->ping_started = 0;
-
- if (conn->saved_frames)
- /* treat the case where conn->saved_frames is NULL
- as no pending frames */
- frame_count = conn->saved_frames->count;
-
- if ((frame_count == 0) || !conn->connected) {
- /* using goto looked ugly here,
- * hence getting out this way */
- /* unlock */
- pthread_mutex_unlock (&conn->lock);
- return;
- }
-
- if (frame_count < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "saved_frames->count is %"PRId64,
- conn->saved_frames->count);
- conn->saved_frames->count = 0;
- }
-
- timeout.tv_sec = conn->ping_timeout;
- timeout.tv_usec = 0;
-
- conn->ping_timer =
- gf_timer_call_after (trans->xl->ctx, timeout,
- client_ping_timer_expired,
- (void *) trans);
-
- if (conn->ping_timer == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "unable to setup timer");
- } else {
- conn->ping_started = 1;
- }
- }
- pthread_mutex_unlock (&conn->lock);
-
- hdrlen = gf_hdr_len (req, 0);
- hdr = gf_hdr_new (req, 0);
- if (!hdr)
- goto err;
-
- dummy_frame = create_frame (this, this->ctx->pool);
-
- if (!dummy_frame)
- goto err;
-
- dummy_frame->local = trans;
-
- ret = protocol_client_xfer (dummy_frame, this, trans,
- GF_OP_TYPE_MOP_REQUEST, GF_MOP_PING,
- hdr, hdrlen, NULL, 0, NULL);
- return;
-err:
- if (hdr)
- GF_FREE (hdr);
-
- if (dummy_frame)
- STACK_DESTROY (dummy_frame->root);
-
- return;
-}
-
-
-int
-client_ping_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- xlator_t *this = NULL;
- transport_t *trans = NULL;
- client_connection_t *conn = NULL;
- struct timeval timeout = {0, };
- int op_ret = 0;
-
- trans = frame->local; frame->local = NULL;
- this = trans->xl;
- conn = trans->xl_private;
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
-
- if (op_ret == -1) {
- /* timer expired and transport bailed out */
- gf_log (this->name, GF_LOG_DEBUG, "timer must have expired");
- goto out;
- }
-
- pthread_mutex_lock (&conn->lock);
- {
- timeout.tv_sec = conn->ping_timeout;
- timeout.tv_usec = 0;
-
- gf_timer_call_cancel (trans->xl->ctx,
- conn->ping_timer);
-
- conn->ping_timer =
- gf_timer_call_after (trans->xl->ctx, timeout,
- client_start_ping, (void *)trans);
- if (conn->ping_timer == NULL)
- gf_log (this->name, GF_LOG_DEBUG,
- "gf_timer_call_after() returned NULL");
- }
- pthread_mutex_unlock (&conn->lock);
-out:
- STACK_DESTROY (frame->root);
- return 0;
-}
-
-int
-client_encode_groups (call_frame_t *frame, gf_hdr_common_t *hdr)
-{
- int i = 0;
- if ((!frame) || (!hdr))
- return -1;
-
- hdr->req.ngrps = hton32 (frame->root->ngrps);
- if (frame->root->ngrps == 0)
- return 0;
-
- for (; i < frame->root->ngrps; ++i)
- hdr->req.groups[i] = hton32 (frame->root->groups[i]);
-
- return 0;
-}
-
-
-int
-protocol_client_xfer (call_frame_t *frame, xlator_t *this, transport_t *trans,
- int type, int op,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iovec *vector, int count,
- struct iobref *iobref)
-{
- client_conf_t *conf = NULL;
- client_connection_t *conn = NULL;
- uint64_t callid = 0;
- int32_t ret = -1;
- int start_ping = 0;
- gf_hdr_common_t rsphdr = {0, };
-
- conf = this->private;
-
- if (!trans) {
- /* default to bulk op since it is 'safer' */
- trans = conf->transport[CHANNEL_BULK];
- }
- conn = trans->xl_private;
-
- pthread_mutex_lock (&conn->lock);
- {
- callid = ++conn->callid;
-
- hdr->callid = hton64 (callid);
- hdr->op = hton32 (op);
- hdr->type = hton32 (type);
-
- if (frame) {
- hdr->req.uid = hton32 (frame->root->uid);
- hdr->req.gid = hton32 (frame->root->gid);
- hdr->req.pid = hton32 (frame->root->pid);
- hdr->req.lk_owner = hton64 (frame->root->lk_owner);
- client_encode_groups (frame, hdr);
- }
-
- if (conn->connected == 0)
- transport_connect (trans);
-
- ret = -1;
-
- if (conn->connected ||
- ((type == GF_OP_TYPE_MOP_REQUEST) &&
- (op == GF_MOP_SETVOLUME))) {
- ret = transport_submit (trans, (char *)hdr, hdrlen,
- vector, count, iobref);
- }
-
- if ((ret >= 0) && frame) {
- pthread_mutex_lock (&conf->mutex);
- {
- gettimeofday (&conf->last_sent, NULL);
- }
- pthread_mutex_unlock (&conf->mutex);
- save_frame (trans, frame, op, type, callid);
- }
-
- if (!conn->ping_started && (ret >= 0)) {
- start_ping = 1;
- }
- }
- pthread_mutex_unlock (&conn->lock);
-
- if (start_ping)
- client_start_ping ((void *) trans);
-
- if (frame && (ret < 0)) {
- rsphdr.op = op;
- rsphdr.rsp.op_ret = hton32 (-1);
- rsphdr.rsp.op_errno = hton32 (ENOTCONN);
-
- if (type == GF_OP_TYPE_FOP_REQUEST) {
- rsphdr.type = GF_OP_TYPE_FOP_REPLY;
- gf_fops[op] (frame, &rsphdr, sizeof (rsphdr), NULL);
- } else if (type == GF_OP_TYPE_MOP_REQUEST) {
- rsphdr.type = GF_OP_TYPE_MOP_REPLY;
- gf_mops[op] (frame, &rsphdr, sizeof (rsphdr), NULL);
- } else {
- rsphdr.type = GF_OP_TYPE_CBK_REPLY;
- gf_cbks[op] (frame, &rsphdr, sizeof (rsphdr), NULL);
- }
-
- GF_FREE (hdr);
- }
-
- return ret;
-}
-
-
-
-/**
- * client_create - create function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @path: complete path to file
- * @flags: create flags
- * @mode: create mode
- *
- * external reference through client_protocol_xlator->fops->create
- */
-
-int
-client_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, fd_t *fd)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_create_req_t *req = NULL;
- size_t hdrlen = 0;
- size_t pathlen = 0;
- size_t baselen = 0;
- int32_t ret = -1;
- ino_t par = 0;
- uint64_t gen = 0;
- client_local_t *local = NULL;
-
-
- local = GF_CALLOC (1, sizeof (*local), gf_client_mt_client_local_t);
- GF_VALIDATE_OR_GOTO (this->name, local, unwind);
-
- local->fd = fd_ref (fd);
- loc_copy (&local->loc, loc);
- local->flags = flags;
-
- frame->local = local;
-
- pathlen = STRLEN_0 (loc->path);
- baselen = STRLEN_0 (loc->name);
-
- ret = inode_ctx_get2 (loc->parent, this, &par, &gen);
- if (loc->parent->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "CREATE %"PRId64"/%s (%s): failed to get remote inode "
- "number for parent inode",
- loc->parent->ino, loc->name, loc->path);
- goto unwind;
- }
-
- hdrlen = gf_hdr_len (req, pathlen + baselen);
- hdr = gf_hdr_new (req, pathlen + baselen);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->flags = hton32 (gf_flags_from_flags (flags));
- req->mode = hton32 (mode);
- req->par = hton64 (par);
- req->gen = hton64 (gen);
- strcpy (req->path, loc->path);
- strcpy (req->bname + pathlen, loc->name);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_CREATE,
- hdr, hdrlen, NULL, 0, NULL);
- return ret;
-unwind:
- if (hdr)
- GF_FREE (hdr);
- STACK_UNWIND (frame, -1, EINVAL, fd, NULL, NULL);
- return 0;
-
-}
-
-/**
- * client_open - open function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @loc: location of file
- * @flags: open flags
- * @mode: open modes
- *
- * external reference through client_protocol_xlator->fops->open
- */
-
-int
-client_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int32_t wbflags)
-{
- int ret = -1;
- gf_hdr_common_t *hdr = NULL;
- size_t hdrlen = 0;
- gf_fop_open_req_t *req = NULL;
- size_t pathlen = 0;
- ino_t ino = 0;
- uint64_t gen = 0;
- client_local_t *local = NULL;
-
- local = GF_CALLOC (1, sizeof (*local), gf_client_mt_client_local_t);
- GF_VALIDATE_OR_GOTO (this->name, local, unwind);
-
- local->fd = fd_ref (fd);
- loc_copy (&local->loc, loc);
- local->flags = flags;
- local->wbflags = wbflags;
-
- frame->local = local;
-
- pathlen = STRLEN_0 (loc->path);
-
- ret = inode_ctx_get2 (loc->inode, this, &ino, &gen);
- if (loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "OPEN %"PRId64" (%s): "
- "failed to get remote inode number",
- loc->inode->ino, loc->path);
- goto unwind;
- }
-
- hdrlen = gf_hdr_len (req, pathlen);
- hdr = gf_hdr_new (req, pathlen);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->ino = hton64 (ino);
- req->gen = hton64 (gen);
- req->flags = hton32 (gf_flags_from_flags (flags));
- req->wbflags = hton32 (wbflags);
- strcpy (req->path, loc->path);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_OPEN,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- if (hdr)
- GF_FREE (hdr);
- STACK_UNWIND (frame, -1, EINVAL, fd);
- return 0;
-
-}
-
-
-/**
- * client_stat - stat function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @loc: location
- *
- * external reference through client_protocol_xlator->fops->stat
- */
-
-int
-client_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_stat_req_t *req = NULL;
- size_t hdrlen = -1;
- int32_t ret = -1;
- size_t pathlen = 0;
- ino_t ino = 0;
- ino_t gen = 0;
-
- pathlen = STRLEN_0 (loc->path);
-
- ret = inode_ctx_get2 (loc->inode, this, &ino, &gen);
- if (loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "STAT %"PRId64" (%s): "
- "failed to get remote inode number",
- loc->inode->ino, loc->path);
- goto unwind;
- }
-
- hdrlen = gf_hdr_len (req, pathlen);
- hdr = gf_hdr_new (req, pathlen);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->ino = hton64 (ino);
- req->gen = hton64 (gen);
- strcpy (req->path, loc->path);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_STAT,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- if (hdr)
- GF_FREE (hdr);
- STACK_UNWIND (frame, -1, EINVAL, NULL);
- return 0;
-
-}
-
-
-/**
- * client_readlink - readlink function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @loc: location
- * @size:
- *
- * external reference through client_protocol_xlator->fops->readlink
- */
-int
-client_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_readlink_req_t *req = NULL;
- size_t hdrlen = -1;
- int ret = -1;
- size_t pathlen = 0;
- ino_t ino = 0;
- uint64_t gen = 0;
-
- pathlen = STRLEN_0 (loc->path);
-
- ret = inode_ctx_get2 (loc->inode, this, &ino, &gen);
- if (loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "READLINK %"PRId64" (%s): "
- "failed to get remote inode number",
- loc->inode->ino, loc->path);
- goto unwind;
- }
-
- hdrlen = gf_hdr_len (req, pathlen);
- hdr = gf_hdr_new (req, pathlen);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->ino = hton64 (ino);
- req->gen = hton64 (gen);
- req->size = hton32 (size);
- strcpy (req->path, loc->path);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_READLINK,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- if (hdr)
- GF_FREE (hdr);
- STACK_UNWIND_STRICT (readlink, frame, -1, EINVAL,
- NULL, NULL);
- return 0;
-
-}
-
-
-/**
- * client_mknod - mknod function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @path: pathname of node
- * @mode:
- * @dev:
- *
- * external reference through client_protocol_xlator->fops->mknod
- */
-int
-client_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t dev)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_mknod_req_t *req = NULL;
- size_t hdrlen = -1;
- int ret = -1;
- size_t pathlen = 0;
- size_t baselen = 0;
- ino_t par = 0;
- uint64_t gen = 0;
- client_local_t *local = NULL;
-
- local = GF_CALLOC (1, sizeof (*local), gf_client_mt_client_local_t);
- GF_VALIDATE_OR_GOTO (this->name, local, unwind);
-
- loc_copy (&local->loc, loc);
-
- frame->local = local;
-
- pathlen = STRLEN_0 (loc->path);
- baselen = STRLEN_0 (loc->name);
- ret = inode_ctx_get2 (loc->parent, this, &par, &gen);
- if (loc->parent->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "MKNOD %"PRId64"/%s (%s): failed to get remote inode "
- "number for parent",
- loc->parent->ino, loc->name, loc->path);
- goto unwind;
- }
-
- hdrlen = gf_hdr_len (req, pathlen + baselen);
- hdr = gf_hdr_new (req, pathlen + baselen);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->par = hton64 (par);
- req->gen = hton64 (gen);
- req->mode = hton32 (mode);
- req->dev = hton64 (dev);
- strcpy (req->path, loc->path);
- strcpy (req->bname + pathlen, loc->name);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_MKNOD,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- if (hdr)
- GF_FREE (hdr);
- STACK_UNWIND (frame, -1, EINVAL, loc->inode, NULL);
- return 0;
-
-}
-
-
-/**
- * client_mkdir - mkdir function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @path: pathname of directory
- * @mode:
- *
- * external reference through client_protocol_xlator->fops->mkdir
- */
-int
-client_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_mkdir_req_t *req = NULL;
- size_t hdrlen = -1;
- int ret = -1;
- size_t pathlen = 0;
- size_t baselen = 0;
- ino_t par = 0;
- uint64_t gen = 0;
- client_local_t *local = NULL;
-
- local = GF_CALLOC (1, sizeof (*local), gf_client_mt_client_local_t);
- GF_VALIDATE_OR_GOTO (this->name, local, unwind);
-
- loc_copy (&local->loc, loc);
-
- frame->local = local;
-
- pathlen = STRLEN_0 (loc->path);
- baselen = STRLEN_0 (loc->name);
- ret = inode_ctx_get2 (loc->parent, this, &par, &gen);
- if (loc->parent->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "MKDIR %"PRId64"/%s (%s): failed to get remote inode "
- "number for parent",
- loc->parent->ino, loc->name, loc->path);
- goto unwind;
- }
-
- hdrlen = gf_hdr_len (req, pathlen + baselen);
- hdr = gf_hdr_new (req, pathlen + baselen);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->par = hton64 (par);
- req->gen = hton64 (gen);
- req->mode = hton32 (mode);
- strcpy (req->path, loc->path);
- strcpy (req->bname + pathlen, loc->name);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_MKDIR,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- if (hdr)
- GF_FREE (hdr);
- STACK_UNWIND (frame, -1, EINVAL, loc->inode, NULL);
- return 0;
-
-}
-
-/**
- * client_unlink - unlink function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @loc: location of file
- *
- * external reference through client_protocol_xlator->fops->unlink
- */
-
-int
-client_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_unlink_req_t *req = NULL;
- size_t hdrlen = -1;
- int ret = -1;
- size_t pathlen = 0;
- size_t baselen = 0;
- ino_t par = 0;
- uint64_t gen = 0;
-
- pathlen = STRLEN_0 (loc->path);
- baselen = STRLEN_0 (loc->name);
- ret = inode_ctx_get2 (loc->parent, this, &par, &gen);
- if (loc->parent->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "UNLINK %"PRId64"/%s (%s): failed to get remote inode "
- "number for parent",
- loc->parent->ino, loc->name, loc->path);
- goto unwind;
- }
-
- hdrlen = gf_hdr_len (req, pathlen + baselen);
- hdr = gf_hdr_new (req, pathlen + baselen);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->par = hton64 (par);
- req->gen = hton64 (gen);
- strcpy (req->path, loc->path);
- strcpy (req->bname + pathlen, loc->name);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_UNLINK,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- if (hdr)
- GF_FREE (hdr);
- STACK_UNWIND (frame, -1, EINVAL);
- return 0;
-
-}
-
-/**
- * client_rmdir - rmdir function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @loc: location
- *
- * external reference through client_protocol_xlator->fops->rmdir
- */
-
-int
-client_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_rmdir_req_t *req = NULL;
- size_t hdrlen = -1;
- int ret = -1;
- size_t pathlen = 0;
- size_t baselen = 0;
- ino_t par = 0;
- uint64_t gen = 0;
-
- pathlen = STRLEN_0 (loc->path);
- baselen = STRLEN_0 (loc->name);
- ret = inode_ctx_get2 (loc->parent, this, &par, &gen);
- if (loc->parent->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "RMDIR %"PRId64"/%s (%s): failed to get remote inode "
- "number for parent",
- loc->parent->ino, loc->name, loc->path);
- goto unwind;
- }
-
- hdrlen = gf_hdr_len (req, pathlen + baselen);
- hdr = gf_hdr_new (req, pathlen + baselen);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->par = hton64 (par);
- req->gen = hton64 (gen);
- strcpy (req->path, loc->path);
- strcpy (req->bname + pathlen, loc->name);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_RMDIR,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- if (hdr)
- GF_FREE (hdr);
- STACK_UNWIND (frame, -1, EINVAL);
- return 0;
-
-}
-
-
-/**
- * client_symlink - symlink function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @oldpath: pathname of target
- * @newpath: pathname of symlink
- *
- * external reference through client_protocol_xlator->fops->symlink
- */
-
-int
-client_symlink (call_frame_t *frame, xlator_t *this, const char *linkname,
- loc_t *loc)
-{
- int ret = -1;
- gf_hdr_common_t *hdr = NULL;
- gf_fop_symlink_req_t *req = NULL;
- size_t hdrlen = 0;
- size_t pathlen = 0;
- size_t newlen = 0;
- size_t baselen = 0;
- ino_t par = 0;
- uint64_t gen = 0;
- client_local_t *local = NULL;
-
- local = GF_CALLOC (1, sizeof (*local), gf_client_mt_client_local_t);
- GF_VALIDATE_OR_GOTO (this->name, local, unwind);
-
- loc_copy (&local->loc, loc);
-
- frame->local = local;
-
- pathlen = STRLEN_0 (loc->path);
- baselen = STRLEN_0 (loc->name);
- newlen = STRLEN_0 (linkname);
- ret = inode_ctx_get2 (loc->parent, this, &par, &gen);
- if (loc->parent->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "SYMLINK %"PRId64"/%s (%s): failed to get remote inode"
- " number parent",
- loc->parent->ino, loc->name, loc->path);
- goto unwind;
- }
-
- hdrlen = gf_hdr_len (req, pathlen + baselen + newlen);
- hdr = gf_hdr_new (req, pathlen + baselen + newlen);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->par = hton64 (par);
- req->gen = hton64 (gen);
- strcpy (req->path, loc->path);
- strcpy (req->bname + pathlen, loc->name);
- strcpy (req->linkname + pathlen + baselen, linkname);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_SYMLINK,
- hdr, hdrlen, NULL, 0, NULL);
- return ret;
-unwind:
- if (hdr)
- GF_FREE (hdr);
- STACK_UNWIND (frame, -1, EINVAL, loc->inode, NULL);
- return 0;
-
-}
-
-/**
- * client_rename - rename function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @oldloc: location of old pathname
- * @newloc: location of new pathname
- *
- * external reference through client_protocol_xlator->fops->rename
- */
-
-int
-client_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc)
-{
- int ret = -1;
- gf_hdr_common_t *hdr = NULL;
- gf_fop_rename_req_t *req = NULL;
- size_t hdrlen = 0;
- size_t oldpathlen = 0;
- size_t oldbaselen = 0;
- size_t newpathlen = 0;
- size_t newbaselen = 0;
- ino_t oldpar = 0;
- uint64_t oldgen = 0;
- ino_t newpar = 0;
- uint64_t newgen = 0;
-
- oldpathlen = STRLEN_0 (oldloc->path);
- oldbaselen = STRLEN_0 (oldloc->name);
- newpathlen = STRLEN_0 (newloc->path);
- newbaselen = STRLEN_0 (newloc->name);
- ret = inode_ctx_get2 (oldloc->parent, this, &oldpar, &oldgen);
- if (oldloc->parent->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "RENAME %"PRId64"/%s (%s): failed to get remote inode "
- "number for source parent",
- oldloc->parent->ino, oldloc->name, oldloc->path);
- goto unwind;
- }
-
- ret = inode_ctx_get2 (newloc->parent, this, &newpar, &newgen);
- if (newloc->parent->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "CREATE %"PRId64"/%s (%s): failed to get remote inode "
- "number for destination parent",
- newloc->parent->ino, newloc->name, newloc->path);
- goto unwind;
- }
-
- hdrlen = gf_hdr_len (req, (oldpathlen + oldbaselen +
- newpathlen + newbaselen));
- hdr = gf_hdr_new (req, (oldpathlen + oldbaselen +
- newpathlen + newbaselen));
-
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->oldpar = hton64 (oldpar);
- req->oldgen = hton64 (oldgen);
- req->newpar = hton64 (newpar);
- req->newgen = hton64 (newgen);
-
- strcpy (req->oldpath, oldloc->path);
- strcpy (req->oldbname + oldpathlen, oldloc->name);
- strcpy (req->newpath + oldpathlen + oldbaselen, newloc->path);
- strcpy (req->newbname + oldpathlen + oldbaselen + newpathlen,
- newloc->name);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_RENAME,
- hdr, hdrlen, NULL, 0, NULL);
- return ret;
-unwind:
- if (hdr)
- GF_FREE (hdr);
- STACK_UNWIND (frame, -1, EINVAL, NULL);
- return 0;
-
-}
-
-/**
- * client_link - link function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @oldloc: location of old pathname
- * @newpath: new pathname
- *
- * external reference through client_protocol_xlator->fops->link
- */
-
-int
-client_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
-{
- int ret = -1;
- gf_hdr_common_t *hdr = NULL;
- gf_fop_link_req_t *req = NULL;
- size_t hdrlen = 0;
- size_t oldpathlen = 0;
- size_t newpathlen = 0;
- size_t newbaselen = 0;
- ino_t oldino = 0;
- uint64_t oldgen = 0;
- ino_t newpar = 0;
- uint64_t newgen = 0;
- client_local_t *local = NULL;
-
- local = GF_CALLOC (1, sizeof (*local), gf_client_mt_client_local_t);
- GF_VALIDATE_OR_GOTO (this->name, local, unwind);
-
- loc_copy (&local->loc, oldloc);
-
- frame->local = local;
-
- oldpathlen = STRLEN_0 (oldloc->path);
- newpathlen = STRLEN_0 (newloc->path);
- newbaselen = STRLEN_0 (newloc->name);
-
- ret = inode_ctx_get2 (oldloc->inode, this, &oldino, &oldgen);
- if (oldloc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "LINK %"PRId64"/%s (%s) ==> %"PRId64" (%s): "
- "failed to get remote inode number for source inode",
- newloc->parent->ino, newloc->name, newloc->path,
- oldloc->ino, oldloc->path);
- goto unwind;
- }
-
- ret = inode_ctx_get2 (newloc->parent, this, &newpar, &newgen);
- if (newloc->parent->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "LINK %"PRId64"/%s (%s) ==> %"PRId64" (%s): "
- "failed to get remote inode number destination parent",
- newloc->parent->ino, newloc->name, newloc->path,
- oldloc->ino, oldloc->path);
- goto unwind;
- }
-
- hdrlen = gf_hdr_len (req, oldpathlen + newpathlen + newbaselen);
- hdr = gf_hdr_new (req, oldpathlen + newpathlen + newbaselen);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- strcpy (req->oldpath, oldloc->path);
- strcpy (req->newpath + oldpathlen, newloc->path);
- strcpy (req->newbname + oldpathlen + newpathlen, newloc->name);
-
- req->oldino = hton64 (oldino);
- req->oldgen = hton64 (oldgen);
- req->newpar = hton64 (newpar);
- req->newgen = hton64 (newgen);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_LINK,
- hdr, hdrlen, NULL, 0, NULL);
- return ret;
-unwind:
- if (hdr)
- GF_FREE (hdr);
- STACK_UNWIND (frame, -1, EINVAL, oldloc->inode, NULL);
- return 0;
-}
-
-
-/**
- * client_truncate - truncate function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @loc: location
- * @offset:
- *
- * external reference through client_protocol_xlator->fops->truncate
- */
-
-int
-client_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_truncate_req_t *req = NULL;
- size_t hdrlen = -1;
- int ret = -1;
- size_t pathlen = 0;
- ino_t ino = 0;
- uint64_t gen = 0;
-
- pathlen = STRLEN_0 (loc->path);
- ret = inode_ctx_get2 (loc->inode, this, &ino, &gen);
- if (loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "TRUNCATE %"PRId64" (%s): "
- "failed to get remote inode number",
- loc->inode->ino, loc->path);
- goto unwind;
- }
-
- hdrlen = gf_hdr_len (req, pathlen);
- hdr = gf_hdr_new (req, pathlen);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->ino = hton64 (ino);
- req->gen = hton64 (gen);
- req->offset = hton64 (offset);
- strcpy (req->path, loc->path);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_TRUNCATE,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- if (hdr)
- GF_FREE (hdr);
- STACK_UNWIND (frame, -1, EINVAL, NULL);
- return 0;
-
-}
-
-
-/**
- * client_readv - readv function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @fd: file descriptor structure
- * @size:
- * @offset:
- *
- * external reference through client_protocol_xlator->fops->readv
- */
-
-int
-client_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_read_req_t *req = NULL;
- size_t hdrlen = 0;
- int64_t remote_fd = -1;
- int ret = -1;
- client_fd_ctx_t *fdctx = NULL;
- client_conf_t *conf = NULL;
-
- conf = this->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx = this_fd_get_ctx (fd, this);
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_TRACE,
- "(%"PRId64"): failed to get fd ctx, EBADFD",
- fd->inode->ino);
- STACK_UNWIND (frame, -1, EBADFD, NULL, 0, NULL);
- return 0;
- }
-
- if (fdctx->remote_fd == -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "(%"PRId64"): failed to get fd ctx, EBADFD",
- fd->inode->ino);
- STACK_UNWIND (frame, -1, EBADFD, NULL, 0, NULL);
- return 0;
- }
-
- remote_fd = fdctx->remote_fd;
- hdrlen = gf_hdr_len (req, 0);
- hdr = gf_hdr_new (req, 0);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->fd = hton64 (remote_fd);
- req->size = hton32 (size);
- req->offset = hton64 (offset);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_READ,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-unwind:
- if (hdr)
- GF_FREE (hdr);
- STACK_UNWIND (frame, -1, EINVAL, NULL, 0, NULL);
- return 0;
-
-}
-
-/**
- * client_writev - writev function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @fd: file descriptor structure
- * @vector:
- * @count:
- * @offset:
- *
- * external reference through client_protocol_xlator->fops->writev
- */
-
-int
-client_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t offset,
- struct iobref *iobref)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_write_req_t *req = NULL;
- size_t hdrlen = 0;
- int64_t remote_fd = -1;
- int ret = -1;
- client_fd_ctx_t *fdctx = NULL;
- client_conf_t *conf = NULL;
-
- conf = this->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx = this_fd_get_ctx (fd, this);
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_TRACE,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- fd->inode->ino);
- STACK_UNWIND (frame, -1, EBADFD, NULL);
- return 0;
- }
-
- if (fdctx->remote_fd == -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- fd->inode->ino);
- STACK_UNWIND (frame, -1, EBADFD, NULL);
- return 0;
- }
-
- remote_fd = fdctx->remote_fd;
- hdrlen = gf_hdr_len (req, 0);
- hdr = gf_hdr_new (req, 0);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->fd = hton64 (remote_fd);
- req->size = hton32 (iov_length (vector, count));
- req->offset = hton64 (offset);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_WRITE,
- hdr, hdrlen, vector, count, iobref);
- return ret;
-unwind:
- if (hdr)
- GF_FREE (hdr);
- STACK_UNWIND (frame, -1, EINVAL, NULL);
- return 0;
-
-}
-
-
-/**
- * client_statfs - statfs function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @loc: location
- *
- * external reference through client_protocol_xlator->fops->statfs
- */
-
-int
-client_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_statfs_req_t *req = NULL;
- size_t hdrlen = -1;
- int ret = -1;
- size_t pathlen = 0;
- ino_t ino = 0;
- ino_t gen = 0;
-
- pathlen = STRLEN_0 (loc->path);
-
- if (loc->inode) {
- ret = inode_ctx_get2 (loc->inode, this, &ino, &gen);
- if (loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "STATFS %"PRId64" (%s): "
- "failed to get remote inode number",
- loc->inode->ino, loc->path);
- goto unwind;
- }
- }
-
- hdrlen = gf_hdr_len (req, pathlen);
- hdr = gf_hdr_new (req, pathlen);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->ino = hton64 (ino);
- req->gen = hton64 (gen);
- strcpy (req->path, loc->path);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_STATFS,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- if (hdr)
- GF_FREE (hdr);
- STACK_UNWIND (frame, -1, EINVAL, NULL);
- return 0;
-
-}
-
-
-/**
- * client_flush - flush function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @fd: file descriptor structure
- *
- * external reference through client_protocol_xlator->fops->flush
- */
-
-int
-client_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_flush_req_t *req = NULL;
- size_t hdrlen = 0;
- int64_t remote_fd = -1;
- int ret = -1;
- client_fd_ctx_t *fdctx = NULL;
- client_conf_t *conf = NULL;
-
- conf = this->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx = this_fd_get_ctx (fd, this);
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_TRACE,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- fd->inode->ino);
- STACK_UNWIND (frame, -1, EBADFD);
- return 0;
- }
-
- if (fdctx->remote_fd == -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- fd->inode->ino);
- STACK_UNWIND (frame, -1, EBADFD);
- return 0;
- }
-
- remote_fd = fdctx->remote_fd;
- hdrlen = gf_hdr_len (req, 0);
- hdr = gf_hdr_new (req, 0);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->fd = hton64 (remote_fd);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_FLUSH,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-unwind:
- if (hdr)
- GF_FREE (hdr);
- STACK_UNWIND (frame, -1, EINVAL);
- return 0;
-
-}
-
-/**
- * client_fsync - fsync function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @fd: file descriptor structure
- * @flags:
- *
- * external reference through client_protocol_xlator->fops->fsync
- */
-
-int
-client_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_fsync_req_t *req = NULL;
- size_t hdrlen = 0;
- int64_t remote_fd = -1;
- int32_t ret = -1;
- client_fd_ctx_t *fdctx = NULL;
- client_conf_t *conf = NULL;
-
- conf = this->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx = this_fd_get_ctx (fd, this);
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_TRACE,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- fd->inode->ino);
- STACK_UNWIND (frame, -1, EBADFD);
- return 0;
- }
-
- if (fdctx->remote_fd == -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- fd->inode->ino);
- STACK_UNWIND (frame, -1, EBADFD);
- return 0;
- }
-
- remote_fd = fdctx->remote_fd;
- hdrlen = gf_hdr_len (req, 0);
- hdr = gf_hdr_new (req, 0);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->fd = hton64 (remote_fd);
- req->data = hton32 (flags);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_FSYNC,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- if (hdr)
- GF_FREE (hdr);
- STACK_UNWIND (frame, -1, EINVAL);
- return 0;
-
-}
-
-int
-client_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_xattrop_req_t *req = NULL;
- size_t hdrlen = 0;
- size_t dict_len = 0;
- int32_t ret = -1;
- size_t pathlen = 0;
- ino_t ino = 0;
- uint64_t gen = 0;
- char *buf = NULL;
-
- GF_VALIDATE_OR_GOTO ("client", this, unwind);
-
- GF_VALIDATE_OR_GOTO (this->name, loc, unwind);
-
- if (dict) {
- ret = dict_allocate_and_serialize (dict, &buf, &dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get serialized length of dict(%p)",
- dict);
- goto unwind;
- }
- }
-
- pathlen = STRLEN_0 (loc->path);
-
- ret = inode_ctx_get2 (loc->inode, this, &ino, &gen);
- if (loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "XATTROP %"PRId64" (%s): "
- "failed to get remote inode number",
- loc->inode->ino, loc->path);
- goto unwind;
- }
-
- hdrlen = gf_hdr_len (req, dict_len + pathlen);
- hdr = gf_hdr_new (req, dict_len + pathlen);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->flags = hton32 (flags);
- req->dict_len = hton32 (dict_len);
- if (dict) {
- memcpy (req->dict, buf, dict_len);
- GF_FREE (buf);
- }
-
- req->ino = hton64 (ino);
- req->gen = hton64 (gen);
- strcpy (req->path + dict_len, loc->path);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_XATTROP,
- hdr, hdrlen, NULL, 0, NULL);
- return ret;
-unwind:
- if (hdr)
- GF_FREE (hdr);
-
- STACK_UNWIND (frame, -1, EINVAL, NULL);
- return 0;
-}
-
-
-int
-client_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t flags, dict_t *dict)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_fxattrop_req_t *req = NULL;
- size_t hdrlen = 0;
- size_t dict_len = 0;
- int64_t remote_fd = -1;
- int32_t ret = -1;
- ino_t ino = 0;
- client_fd_ctx_t *fdctx = NULL;
- client_conf_t *conf = NULL;
-
- conf = this->private;
-
- if (dict) {
- dict_len = dict_serialized_length (dict);
- if (dict_len < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get serialized length of dict(%p)",
- dict);
- goto unwind;
- }
- }
-
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx = this_fd_get_ctx (fd, this);
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_TRACE,
- "(%"PRId64"): failed to get fd ctx. "
- "returning EBADFD",
- fd->inode->ino);
- goto unwind;
- }
-
- if (fdctx->remote_fd == -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- fd->inode->ino);
- goto unwind;
- }
-
- ino = fd->inode->ino;
- remote_fd = fdctx->remote_fd;
-
- hdrlen = gf_hdr_len (req, dict_len);
- hdr = gf_hdr_new (req, dict_len);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->flags = hton32 (flags);
- req->dict_len = hton32 (dict_len);
- if (dict) {
- ret = dict_serialize (dict, req->dict);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to serialize dictionary(%p)",
- dict);
- goto unwind;
- }
- }
- req->fd = hton64 (remote_fd);
- req->ino = hton64 (ino);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_FXATTROP,
- hdr, hdrlen, NULL, 0, NULL);
- return ret;
-unwind:
- if (hdr)
- GF_FREE (hdr);
-
- STACK_UNWIND (frame, -1, EBADFD, NULL);
- return 0;
-
-}
-
-/**
- * client_setxattr - setxattr function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @loc: location
- * @dict: dictionary which contains key:value to be set.
- * @flags:
- *
- * external reference through client_protocol_xlator->fops->setxattr
- */
-
-int
-client_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *dict, int32_t flags)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_setxattr_req_t *req = NULL;
- size_t hdrlen = 0;
- size_t dict_len = 0;
- int ret = -1;
- size_t pathlen = 0;
- ino_t ino = 0;
- uint64_t gen = 0;
-
- dict_len = dict_serialized_length (dict);
- if (dict_len < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get serialized length of dict(%p)",
- dict);
- goto unwind;
- }
-
- pathlen = STRLEN_0 (loc->path);
-
- ret = inode_ctx_get2 (loc->inode, this, &ino, &gen);
- if (loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "SETXATTR %"PRId64" (%s): "
- "failed to get remote inode number",
- loc->inode->ino, loc->path);
- goto unwind;
- }
-
- hdrlen = gf_hdr_len (req, dict_len + pathlen);
- hdr = gf_hdr_new (req, dict_len + pathlen);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->ino = hton64 (ino);
- req->gen = hton64 (gen);
- req->flags = hton32 (flags);
- req->dict_len = hton32 (dict_len);
-
- ret = dict_serialize (dict, req->dict);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to serialize dictionary(%p)",
- dict);
- goto unwind;
- }
-
- strcpy (req->path + dict_len, loc->path);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_SETXATTR,
- hdr, hdrlen, NULL, 0, NULL);
- return ret;
-unwind:
- if (hdr)
- GF_FREE (hdr);
-
- STACK_UNWIND (frame, -1, EINVAL);
- return 0;
-}
-
-/**
- * client_fsetxattr - fsetxattr function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @fd: fd
- * @dict: dictionary which contains key:value to be set.
- * @flags:
- *
- * external reference through client_protocol_xlator->fops->fsetxattr
- */
-
-int
-client_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- dict_t *dict, int32_t flags)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_fsetxattr_req_t *req = NULL;
- size_t hdrlen = 0;
- size_t dict_len = 0;
- ino_t ino;
- int ret = -1;
- int64_t remote_fd = -1;
- client_fd_ctx_t *fdctx = NULL;
- client_conf_t *conf = NULL;
-
- conf = this->private;
-
- dict_len = dict_serialized_length (dict);
- if (dict_len < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get serialized length of dict(%p)",
- dict);
- goto unwind;
- }
-
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx = this_fd_get_ctx (fd, this);
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_TRACE,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- fd->inode->ino);
- goto unwind;
- }
-
- if (fdctx->remote_fd == -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- fd->inode->ino);
- goto unwind;
- }
-
- ino = fd->inode->ino;
- remote_fd = fdctx->remote_fd;
-
- hdrlen = gf_hdr_len (req, dict_len);
- hdr = gf_hdr_new (req, dict_len);
-
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->ino = hton64 (ino);
- req->fd = hton64 (remote_fd);
- req->flags = hton32 (flags);
- req->dict_len = hton32 (dict_len);
-
- ret = dict_serialize (dict, req->dict);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to serialize dictionary(%p)",
- dict);
- goto unwind;
- }
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_FSETXATTR,
- hdr, hdrlen, NULL, 0, NULL);
- return ret;
-unwind:
- if (hdr)
- GF_FREE (hdr);
-
- STACK_UNWIND (frame, -1, EINVAL);
- return 0;
-}
-
-/**
- * client_getxattr - getxattr function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @loc: location structure
- *
- * external reference through client_protocol_xlator->fops->getxattr
- */
-
-int
-client_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
-{
- int ret = -1;
- gf_hdr_common_t *hdr = NULL;
- gf_fop_getxattr_req_t *req = NULL;
- size_t hdrlen = 0;
- size_t pathlen = 0;
- size_t namelen = 0;
- ino_t ino = 0;
- uint64_t gen = 0;
-
- pathlen = STRLEN_0 (loc->path);
- if (name)
- namelen = STRLEN_0 (name);
-
- ret = inode_ctx_get2 (loc->inode, this, &ino, &gen);
- if (loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "GETXATTR %"PRId64" (%s): "
- "failed to get remote inode number",
- loc->inode->ino, loc->path);
- goto unwind;
- }
-
- hdrlen = gf_hdr_len (req, pathlen + namelen);
- hdr = gf_hdr_new (req, pathlen + namelen);
- GF_VALIDATE_OR_GOTO (frame->this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->ino = hton64 (ino);
- req->gen = hton64 (gen);
- req->namelen = hton32 (namelen);
- strcpy (req->path, loc->path);
- if (name)
- strcpy (req->name + pathlen, name);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_GETXATTR,
- hdr, hdrlen, NULL, 0, NULL);
- return ret;
-unwind:
- if (hdr)
- GF_FREE (hdr);
-
- STACK_UNWIND (frame, -1, EINVAL, NULL);
- return 0;
-}
-
-
-/**
- * client_fgetxattr - fgetxattr function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @fd: fd
- *
- * external reference through client_protocol_xlator->fops->fgetxattr
- */
-
-int
-client_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name)
-{
- int ret = -1;
- gf_hdr_common_t *hdr = NULL;
- gf_fop_fgetxattr_req_t *req = NULL;
- size_t hdrlen = 0;
- int64_t remote_fd = -1;
- size_t namelen = 0;
- ino_t ino = 0;
- client_fd_ctx_t *fdctx = NULL;
- client_conf_t *conf = NULL;
-
- if (name)
- namelen = STRLEN_0 (name);
-
- conf = this->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx = this_fd_get_ctx (fd, this);
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_TRACE,
- "(%"PRId64"): failed to get remote fd. EBADFD",
- fd->inode->ino);
- goto unwind;
- }
-
- if (fdctx->remote_fd == -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- fd->inode->ino);
- goto unwind;
- }
-
- ino = fd->inode->ino;
- remote_fd = fdctx->remote_fd;
-
- hdrlen = gf_hdr_len (req, namelen);
- hdr = gf_hdr_new (req, namelen);
-
- GF_VALIDATE_OR_GOTO (frame->this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->ino = hton64 (ino);
- req->fd = hton64 (remote_fd);
- req->namelen = hton32 (namelen);
-
- if (name)
- strcpy (req->name, name);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_FGETXATTR,
- hdr, hdrlen, NULL, 0, NULL);
- return ret;
-unwind:
- if (hdr)
- GF_FREE (hdr);
-
- STACK_UNWIND (frame, -1, EINVAL, NULL);
- return 0;
-}
-
-
-/**
- * client_removexattr - removexattr function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @loc: location structure
- * @name:
- *
- * external reference through client_protocol_xlator->fops->removexattr
- */
-
-int
-client_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
-{
- int ret = -1;
- gf_hdr_common_t *hdr = NULL;
- gf_fop_removexattr_req_t *req = NULL;
- size_t hdrlen = 0;
- size_t namelen = 0;
- size_t pathlen = 0;
- ino_t ino = 0;
- uint64_t gen = 0;
-
- pathlen = STRLEN_0 (loc->path);
- namelen = STRLEN_0 (name);
-
- ret = inode_ctx_get2 (loc->inode, this, &ino, &gen);
- if (loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "REMOVEXATTR %"PRId64" (%s): "
- "failed to get remote inode number",
- loc->inode->ino, loc->path);
- goto unwind;
- }
-
- hdrlen = gf_hdr_len (req, pathlen + namelen);
- hdr = gf_hdr_new (req, pathlen + namelen);
- GF_VALIDATE_OR_GOTO (frame->this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->ino = hton64 (ino);
- req->gen = hton64 (gen);
- strcpy (req->path, loc->path);
- strcpy (req->name + pathlen, name);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_REMOVEXATTR,
- hdr, hdrlen, NULL, 0, NULL);
- return ret;
-unwind:
- if (hdr)
- GF_FREE (hdr);
- STACK_UNWIND (frame, -1, EINVAL);
- return 0;
-}
-
-/**
- * client_opendir - opendir function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @loc: location structure
- *
- * external reference through client_protocol_xlator->fops->opendir
- */
-
-int
-client_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc,
- fd_t *fd)
-{
- gf_fop_opendir_req_t *req = NULL;
- gf_hdr_common_t *hdr = NULL;
- size_t hdrlen = 0;
- int ret = -1;
- ino_t ino = 0;
- uint64_t gen = 0;
- size_t pathlen = 0;
- client_local_t *local = NULL;
-
- local = GF_CALLOC (1, sizeof (*local), gf_client_mt_client_local_t);
- GF_VALIDATE_OR_GOTO (this->name, local, unwind);
-
- loc_copy (&local->loc, loc);
- local->fd = fd_ref (fd);
-
- frame->local = local;
-
- ret = inode_ctx_get2 (loc->inode, this, &ino, &gen);
- if (loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "OPENDIR %"PRId64" (%s): "
- "failed to get remote inode number",
- loc->inode->ino, loc->path);
- goto unwind;
- }
-
- pathlen = STRLEN_0 (loc->path);
-
- hdrlen = gf_hdr_len (req, pathlen);
- hdr = gf_hdr_new (req, pathlen);
- GF_VALIDATE_OR_GOTO (frame->this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->ino = hton64 (ino);
- req->gen = hton64 (gen);
- strcpy (req->path, loc->path);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_OPENDIR,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- if (hdr)
- GF_FREE (hdr);
- STACK_UNWIND (frame, -1, EINVAL, fd);
- return 0;
-
-}
-
-/**
- * client_readdirp - readdirp function for client protocol
- * @frame: call frame
- * @this: this translator structure
- *
- * external reference through client_protocol_xlator->fops->readdirp
- */
-
-int
-client_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_readdirp_req_t *req = NULL;
- size_t hdrlen = 0;
- int64_t remote_fd = -1;
- int ret = -1;
- client_fd_ctx_t *fdctx = NULL;
- client_conf_t *conf = NULL;
-
- conf = this->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx = this_fd_get_ctx (fd, this);
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_TRACE, "(%"PRId64"): failed to get"
- " fd ctx. EBADFD", fd->inode->ino);
- goto unwind;
- }
-
- if (fdctx->remote_fd == -1) {
- gf_log (this->name, GF_LOG_TRACE, "(%"PRId64"): failed to get"
- " fd ctx. EBADFD", fd->inode->ino);
- goto unwind;
- }
-
- remote_fd = fdctx->remote_fd;
- hdrlen = gf_hdr_len (req, 0);
- hdr = gf_hdr_new (req, 0);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req->fd = hton64 (remote_fd);
- req->size = hton32 (size);
- req->offset = hton64 (offset);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_READDIRP,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-unwind:
- if (hdr)
- GF_FREE (hdr);
- STACK_UNWIND (frame, -1, EBADFD, NULL);
- return 0;
-
-}
-
-
-/**
- * client_readdir - readdir function for client protocol
- * @frame: call frame
- * @this: this translator structure
- *
- * external reference through client_protocol_xlator->fops->readdir
- */
-
-int
-client_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_readdir_req_t *req = NULL;
- size_t hdrlen = 0;
- int64_t remote_fd = -1;
- int ret = -1;
- client_fd_ctx_t *fdctx = NULL;
- client_conf_t *conf = NULL;
-
- conf = this->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx = this_fd_get_ctx (fd, this);
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_TRACE,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- fd->inode->ino);
- goto unwind;
- }
-
- if (fdctx->remote_fd == -1) {
- gf_log (this->name, GF_LOG_TRACE, "(%"PRId64"): failed to get"
- " fd ctx. EBADFD", fd->inode->ino);
- goto unwind;
- }
-
- remote_fd = fdctx->remote_fd;
- hdrlen = gf_hdr_len (req, 0);
- hdr = gf_hdr_new (req, 0);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req->fd = hton64 (remote_fd);
- req->size = hton32 (size);
- req->offset = hton64 (offset);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_READDIR,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-unwind:
- if (hdr)
- GF_FREE (hdr);
- STACK_UNWIND (frame, -1, EBADFD, NULL);
- return 0;
-
-}
-
-/**
- * client_fsyncdir - fsyncdir function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @fd: file descriptor structure
- * @flags:
- *
- * external reference through client_protocol_xlator->fops->fsyncdir
- */
-
-int
-client_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_fsyncdir_req_t *req = NULL;
- size_t hdrlen = 0;
- int64_t remote_fd = -1;
- int32_t ret = -1;
- client_fd_ctx_t *fdctx = NULL;
- client_conf_t *conf = NULL;
-
- conf = this->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx = this_fd_get_ctx (fd, this);
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_TRACE,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- fd->inode->ino);
- goto unwind;
- }
-
- if (fdctx->remote_fd == -1) {
- gf_log (this->name, GF_LOG_TRACE, "(%"PRId64"): failed to get"
- " fd ctx. EBADFD", fd->inode->ino);
- goto unwind;
- }
-
- remote_fd = fdctx->remote_fd;
- hdrlen = gf_hdr_len (req, 0);
- hdr = gf_hdr_new (req, 0);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->data = hton32 (flags);
- req->fd = hton64 (remote_fd);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_FSYNCDIR,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- STACK_UNWIND (frame, -1, EBADFD);
- return 0;
-}
-
-/**
- * client_access - access function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @loc: location structure
- * @mode:
- *
- * external reference through client_protocol_xlator->fops->access
- */
-
-int
-client_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_access_req_t *req = NULL;
- size_t hdrlen = -1;
- int ret = -1;
- ino_t ino = 0;
- uint64_t gen = 0;
- size_t pathlen = 0;
-
- ret = inode_ctx_get2 (loc->inode, this, &ino, &gen);
- if (loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "ACCESS %"PRId64" (%s): "
- "failed to get remote inode number",
- loc->inode->ino, loc->path);
- goto unwind;
- }
-
- pathlen = STRLEN_0 (loc->path);
-
- hdrlen = gf_hdr_len (req, pathlen);
- hdr = gf_hdr_new (req, pathlen);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->ino = hton64 (ino);
- req->gen = hton64 (gen);
- req->mask = hton32 (mask);
- strcpy (req->path, loc->path);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_ACCESS,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- if (hdr)
- GF_FREE (hdr);
-
- STACK_UNWIND (frame, -1, EINVAL);
- return 0;
-
-}
-
-/**
- * client_ftrucate - ftruncate function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @fd: file descriptor structure
- * @offset: offset to truncate to
- *
- * external reference through client_protocol_xlator->fops->ftruncate
- */
-
-int
-client_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd,
- off_t offset)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_ftruncate_req_t *req = NULL;
- int64_t remote_fd = -1;
- size_t hdrlen = -1;
- int ret = -1;
- client_fd_ctx_t *fdctx = NULL;
- client_conf_t *conf = NULL;
-
- conf = this->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx = this_fd_get_ctx (fd, this);
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_TRACE,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- fd->inode->ino);
- STACK_UNWIND (frame, -1, EBADFD, NULL);
- return 0;
- }
-
- if (fdctx->remote_fd == -1) {
- gf_log (this->name, GF_LOG_TRACE, "(%"PRId64"): failed to get"
- " fd ctx. EBADFD", fd->inode->ino);
- goto unwind;
- }
-
- remote_fd = fdctx->remote_fd;
- hdrlen = gf_hdr_len (req, 0);
- hdr = gf_hdr_new (req, 0);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->fd = hton64 (remote_fd);
- req->offset = hton64 (offset);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_FTRUNCATE,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- if (hdr)
- GF_FREE (hdr);
-
- STACK_UNWIND (frame, -1, EINVAL, NULL);
- return 0;
-
-}
-
-/**
- * client_fstat - fstat function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @fd: file descriptor structure
- *
- * external reference through client_protocol_xlator->fops->fstat
- */
-
-int
-client_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_fstat_req_t *req = NULL;
- int64_t remote_fd = -1;
- size_t hdrlen = -1;
- int ret = -1;
- client_fd_ctx_t *fdctx = NULL;
- client_conf_t *conf = NULL;
-
- conf = this->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx = this_fd_get_ctx (fd, this);
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_TRACE,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- fd->inode->ino);
- STACK_UNWIND (frame, -1, EBADFD, NULL);
- return 0;
- }
-
- if (fdctx->remote_fd == -1) {
- gf_log (this->name, GF_LOG_TRACE, "(%"PRId64"): failed to get"
- " fd ctx. EBADFD", fd->inode->ino);
- goto unwind;
- }
-
- remote_fd = fdctx->remote_fd;
- hdrlen = gf_hdr_len (req, 0);
- hdr = gf_hdr_new (req, 0);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->fd = hton64 (remote_fd);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_FSTAT,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- if (hdr)
- GF_FREE (hdr);
-
- STACK_UNWIND (frame, -1, EINVAL, NULL);
- return 0;
-
-}
-
-/**
- * client_lk - lk function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @fd: file descriptor structure
- * @cmd: lock command
- * @lock:
- *
- * external reference through client_protocol_xlator->fops->lk
- */
-
-int
-client_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
- struct flock *flock)
-{
- int ret = -1;
- gf_hdr_common_t *hdr = NULL;
- gf_fop_lk_req_t *req = NULL;
- size_t hdrlen = 0;
- int64_t remote_fd = -1;
- int32_t gf_cmd = 0;
- int32_t gf_type = 0;
- client_fd_ctx_t *fdctx = NULL;
- client_conf_t *conf = NULL;
-
- conf = this->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx = this_fd_get_ctx (fd, this);
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_TRACE,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- fd->inode->ino);
- STACK_UNWIND (frame, -1, EBADFD, NULL);
- return 0;
- }
-
- if (fdctx->remote_fd == -1) {
- gf_log (this->name, GF_LOG_TRACE, "(%"PRId64"): failed to get"
- " fd ctx. EBADFD", fd->inode->ino);
- STACK_UNWIND (frame, -1, EBADFD, NULL);
- return 0;
- }
-
- remote_fd = fdctx->remote_fd;
- if (cmd == F_GETLK || cmd == F_GETLK64)
- gf_cmd = GF_LK_GETLK;
- else if (cmd == F_SETLK || cmd == F_SETLK64)
- gf_cmd = GF_LK_SETLK;
- else if (cmd == F_SETLKW || cmd == F_SETLKW64)
- gf_cmd = GF_LK_SETLKW;
- else {
- gf_log (this->name, GF_LOG_DEBUG,
- "Unknown cmd (%d)!", gf_cmd);
- goto unwind;
- }
-
- switch (flock->l_type) {
- case F_RDLCK:
- gf_type = GF_LK_F_RDLCK;
- break;
- case F_WRLCK:
- gf_type = GF_LK_F_WRLCK;
- break;
- case F_UNLCK:
- gf_type = GF_LK_F_UNLCK;
- break;
- }
-
- hdrlen = gf_hdr_len (req, 0);
- hdr = gf_hdr_new (req, 0);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->fd = hton64 (remote_fd);
- req->cmd = hton32 (gf_cmd);
- req->type = hton32 (gf_type);
- gf_flock_from_flock (&req->flock, flock);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_LK,
- hdr, hdrlen, NULL, 0, NULL);
- return ret;
-unwind:
- if (hdr)
- GF_FREE (hdr);
-
- STACK_UNWIND (frame, -1, EINVAL, NULL);
- return 0;
-}
-
-/**
- * client_inodelk - inodelk function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @inode: inode structure
- * @cmd: lock command
- * @lock: flock struct
- *
- * external reference through client_protocol_xlator->fops->inodelk
- */
-
-int
-client_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
- loc_t *loc, int32_t cmd, struct flock *flock)
-{
- int ret = -1;
- gf_hdr_common_t *hdr = NULL;
- gf_fop_inodelk_req_t *req = NULL;
- size_t hdrlen = 0;
- int32_t gf_cmd = 0;
- int32_t gf_type = 0;
- ino_t ino = 0;
- uint64_t gen = 0;
- size_t pathlen = 0;
- size_t vollen = 0;
-
- pathlen = STRLEN_0 (loc->path);
- vollen = STRLEN_0 (volume);
-
- ret = inode_ctx_get2 (loc->inode, this, &ino, &gen);
- if (loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "INODELK %"PRId64" (%s): "
- "failed to get remote inode number",
- loc->inode->ino, loc->path);
- goto unwind;
- }
-
- if (cmd == F_GETLK || cmd == F_GETLK64)
- gf_cmd = GF_LK_GETLK;
- else if (cmd == F_SETLK || cmd == F_SETLK64)
- gf_cmd = GF_LK_SETLK;
- else if (cmd == F_SETLKW || cmd == F_SETLKW64)
- gf_cmd = GF_LK_SETLKW;
- else {
- gf_log (this->name, GF_LOG_DEBUG,
- "Unknown cmd (%d)!", gf_cmd);
- goto unwind;
- }
-
- switch (flock->l_type) {
- case F_RDLCK:
- gf_type = GF_LK_F_RDLCK;
- break;
- case F_WRLCK:
- gf_type = GF_LK_F_WRLCK;
- break;
- case F_UNLCK:
- gf_type = GF_LK_F_UNLCK;
- break;
- }
-
- hdrlen = gf_hdr_len (req, pathlen + vollen);
- hdr = gf_hdr_new (req, pathlen + vollen);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- strcpy (req->path, loc->path);
- strcpy (req->path + pathlen, volume);
-
- req->ino = hton64 (ino);
- req->gen = hton64 (gen);
-
- req->cmd = hton32 (gf_cmd);
- req->type = hton32 (gf_type);
- gf_flock_from_flock (&req->flock, flock);
-
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST,
- GF_PROTO_FOP_INODELK,
- hdr, hdrlen, NULL, 0, NULL);
- return ret;
-unwind:
- if (hdr)
- GF_FREE (hdr);
-
- STACK_UNWIND (frame, -1, EINVAL);
- return 0;
-
-}
-
-
-/**
- * client_finodelk - finodelk function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @inode: inode structure
- * @cmd: lock command
- * @lock: flock struct
- *
- * external reference through client_protocol_xlator->fops->finodelk
- */
-
-int
-client_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
- fd_t *fd, int32_t cmd, struct flock *flock)
-{
- int ret = -1;
- gf_hdr_common_t *hdr = NULL;
- gf_fop_finodelk_req_t *req = NULL;
- size_t hdrlen = 0;
- size_t vollen = 0;
- int32_t gf_cmd = 0;
- int32_t gf_type = 0;
- int64_t remote_fd = -1;
- client_fd_ctx_t *fdctx = NULL;
- client_conf_t *conf = NULL;
-
- vollen = STRLEN_0 (volume);
-
- conf = this->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx = this_fd_get_ctx (fd, this);
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_TRACE,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- fd->inode->ino);
- STACK_UNWIND (frame, -1, EBADFD);
- return 0;
- }
-
- if (fdctx->remote_fd == -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- fd->inode->ino);
- STACK_UNWIND (frame, -1, EBADFD);
- return 0;
- }
-
- remote_fd = fdctx->remote_fd;
- if (cmd == F_GETLK || cmd == F_GETLK64)
- gf_cmd = GF_LK_GETLK;
- else if (cmd == F_SETLK || cmd == F_SETLK64)
- gf_cmd = GF_LK_SETLK;
- else if (cmd == F_SETLKW || cmd == F_SETLKW64)
- gf_cmd = GF_LK_SETLKW;
- else {
- gf_log (this->name, GF_LOG_DEBUG,
- "Unknown cmd (%d)!", gf_cmd);
- goto unwind;
- }
-
- switch (flock->l_type) {
- case F_RDLCK:
- gf_type = GF_LK_F_RDLCK;
- break;
- case F_WRLCK:
- gf_type = GF_LK_F_WRLCK;
- break;
- case F_UNLCK:
- gf_type = GF_LK_F_UNLCK;
- break;
- }
-
- hdrlen = gf_hdr_len (req, vollen);
- hdr = gf_hdr_new (req, vollen);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- strcpy (req->volume, volume);
-
- req->fd = hton64 (remote_fd);
-
- req->cmd = hton32 (gf_cmd);
- req->type = hton32 (gf_type);
- gf_flock_from_flock (&req->flock, flock);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST,
- GF_PROTO_FOP_FINODELK,
- hdr, hdrlen, NULL, 0, NULL);
- return ret;
-unwind:
- if (hdr)
- GF_FREE (hdr);
-
- STACK_UNWIND (frame, -1, EINVAL);
- return 0;
-}
-
-
-int
-client_entrylk (call_frame_t *frame, xlator_t *this, const char *volume,
- loc_t *loc, const char *name, entrylk_cmd cmd,
- entrylk_type type)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_entrylk_req_t *req = NULL;
- size_t pathlen = 0;
- size_t vollen = 0;
- size_t hdrlen = -1;
- int ret = -1;
- ino_t ino = 0;
- uint64_t gen = 0;
- size_t namelen = 0;
-
- pathlen = STRLEN_0 (loc->path);
- vollen = STRLEN_0 (volume);
-
- if (name)
- namelen = STRLEN_0 (name);
-
- ret = inode_ctx_get2 (loc->inode, this, &ino, &gen);
- if (loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "ENTRYLK %"PRId64" (%s): "
- "failed to get remote inode number",
- loc->inode->ino, loc->path);
- goto unwind;
- }
-
- hdrlen = gf_hdr_len (req, pathlen + vollen + namelen);
- hdr = gf_hdr_new (req, pathlen + vollen + namelen);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->ino = hton64 (ino);
- req->gen = hton64 (gen);
- req->namelen = hton64 (namelen);
-
- strcpy (req->path, loc->path);
- if (name)
- strcpy (req->name + pathlen, name);
- strcpy (req->volume + pathlen + namelen, volume);
-
- req->cmd = hton32 (cmd);
- req->type = hton32 (type);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_ENTRYLK,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- if (hdr)
- GF_FREE (hdr);
-
- STACK_UNWIND (frame, -1, EINVAL);
- return 0;
-
-}
-
-
-int
-client_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume,
- fd_t *fd, const char *name, entrylk_cmd cmd,
- entrylk_type type)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_fentrylk_req_t *req = NULL;
- int64_t remote_fd = -1;
- size_t vollen = 0;
- size_t namelen = 0;
- size_t hdrlen = -1;
- int ret = -1;
- client_fd_ctx_t *fdctx = NULL;
- client_conf_t *conf = NULL;
-
- if (name)
- namelen = STRLEN_0 (name);
-
- conf = this->private;
-
- vollen = STRLEN_0 (volume);
-
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx = this_fd_get_ctx (fd, this);
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- fd->inode->ino);
- STACK_UNWIND (frame, -1, EBADFD);
- return 0;
- }
-
- if (fdctx->remote_fd == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- fd->inode->ino);
- STACK_UNWIND (frame, -1, EBADFD);
- return 0;
- }
-
- remote_fd = fdctx->remote_fd;
- hdrlen = gf_hdr_len (req, namelen + vollen);
- hdr = gf_hdr_new (req, namelen + vollen);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->fd = hton64 (remote_fd);
- req->namelen = hton64 (namelen);
-
- if (name)
- strcpy (req->name, name);
-
- strcpy (req->volume + namelen, volume);
-
- req->cmd = hton32 (cmd);
- req->type = hton32 (type);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_FENTRYLK,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- if (hdr)
- GF_FREE (hdr);
-
- STACK_UNWIND (frame, -1, EINVAL);
- return 0;
-}
-
-/*
- * client_lookup - lookup function for client protocol
- * @frame: call frame
- * @this:
- * @loc: location
- *
- * not for external reference
- */
-
-int
-client_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *xattr_req)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_lookup_req_t *req = NULL;
- size_t hdrlen = -1;
- int ret = -1;
- ino_t ino = 0;
- ino_t par = 0;
- uint64_t gen = 0;
- size_t dictlen = 0;
- size_t pathlen = 0;
- size_t baselen = 0;
- int32_t op_ret = -1;
- int32_t op_errno = EINVAL;
- client_local_t *local = NULL;
- char *buf = NULL;
-
- GF_VALIDATE_OR_GOTO (this->name, loc, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->path, unwind);
-
- local = GF_CALLOC (1, sizeof (*local), gf_client_mt_client_local_t);
- GF_VALIDATE_OR_GOTO (this->name, local, unwind);
-
- loc_copy (&local->loc, loc);
-
- frame->local = local;
-
- if (loc->ino != 1 && loc->parent) {
- ret = inode_ctx_get2 (loc->parent, this, &par, &gen);
- if (loc->parent->ino && ret < 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "LOOKUP %"PRId64"/%s (%s): failed to get "
- "remote inode number for parent",
- loc->parent->ino, loc->name, loc->path);
- goto unwind;
- }
- GF_VALIDATE_OR_GOTO (this->name, loc->name, unwind);
- baselen = STRLEN_0 (loc->name);
- } else {
- ino = 1;
- }
-
- pathlen = STRLEN_0 (loc->path);
-
- if (xattr_req) {
- ret = dict_allocate_and_serialize (xattr_req, &buf, &dictlen);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get serialized length of dict(%p)",
- xattr_req);
- goto unwind;
- }
- }
-
- hdrlen = gf_hdr_len (req, pathlen + baselen + dictlen);
- hdr = gf_hdr_new (req, pathlen + baselen + dictlen);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->ino = hton64 (ino);
- req->gen = hton64 (gen);
- req->par = hton64 (par);
- strcpy (req->path, loc->path);
- if (baselen)
- strcpy (req->path + pathlen, loc->name);
-
- if (dictlen > 0) {
- memcpy (req->dict + pathlen + baselen, buf, dictlen);
- GF_FREE (buf);
- }
-
- req->dictlen = hton32 (dictlen);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_LOOKUP,
- hdr, hdrlen, NULL, 0, NULL);
- return ret;
-
-unwind:
- STACK_UNWIND (frame, op_ret, op_errno, (loc)?loc->inode:NULL, NULL, NULL);
- return ret;
-}
-
-
-int
-client_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_setattr_req_t *req = NULL;
- size_t hdrlen = 0;
- size_t pathlen = 0;
- ino_t ino = 0;
- uint64_t gen = 0;
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("client", this, unwind);
- GF_VALIDATE_OR_GOTO (this->name, frame, unwind);
-
- pathlen = STRLEN_0 (loc->path);
-
- ret = inode_ctx_get2 (loc->inode, this, &ino, &gen);
- if (loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "SETATTR %"PRId64" (%s): "
- "failed to get remote inode number",
- loc->inode->ino, loc->path);
- goto unwind;
- }
-
- hdrlen = gf_hdr_len (req, pathlen);
- hdr = gf_hdr_new (req, pathlen);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->ino = hton64 (ino);
- req->gen = hton64 (gen);
- strcpy (req->path, loc->path);
-
- gf_stat_from_iatt (&req->stbuf, stbuf);
- req->valid = hton32 (valid);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_SETATTR,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- STACK_UNWIND (frame, -1, EINVAL, NULL);
- return 0;
-}
-
-
-int
-client_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_fsetattr_req_t *req = NULL;
- size_t hdrlen = 0;
- int ret = -1;
- client_fd_ctx_t *fdctx = NULL;
- int64_t remote_fd = -1;
- client_conf_t *conf = NULL;
-
- GF_VALIDATE_OR_GOTO ("client", this, unwind);
- GF_VALIDATE_OR_GOTO (this->name, frame, unwind);
-
- conf = this->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx = this_fd_get_ctx (fd, this);
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_TRACE,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- fd->inode->ino);
- STACK_UNWIND (frame, -1, EBADFD, NULL, NULL);
- return 0;
- }
-
- if (fdctx->remote_fd == -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- fd->inode->ino);
- STACK_UNWIND (frame, -1, EBADFD, NULL, NULL);
- return 0;
- }
-
- remote_fd = fdctx->remote_fd;
- hdrlen = gf_hdr_len (req, 0);
- hdr = gf_hdr_new (req, 0);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->fd = hton64 (remote_fd);
-
- gf_stat_from_iatt (&req->stbuf, stbuf);
- req->valid = hton32 (valid);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_FSETATTR,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- STACK_UNWIND (frame, -1, EINVAL, NULL, NULL);
- return 0;
-}
-
-
-int
-client_fdctx_destroy (xlator_t *this, client_fd_ctx_t *fdctx)
-{
- call_frame_t *fr = NULL;
- int32_t ret = -1;
- gf_hdr_common_t *hdr = NULL;
- size_t hdrlen = 0;
- gf_cbk_release_req_t *req = NULL;
- gf_cbk_releasedir_req_t *reqdir = NULL;
- int64_t remote_fd = -1;
- int op = 0;
-
- remote_fd = fdctx->remote_fd;
-
- if (remote_fd == -1)
- goto out;
-
- if (fdctx->is_dir) {
- hdrlen = gf_hdr_len (reqdir, 0);
- hdr = gf_hdr_new (reqdir, 0);
- op = GF_CBK_RELEASEDIR;
- reqdir = gf_param (hdr);
- reqdir->fd = hton64 (remote_fd);
- } else {
- hdrlen = gf_hdr_len (req, 0);
- hdr = gf_hdr_new (req, 0);
- op = GF_CBK_RELEASE;
- req = gf_param (hdr);
- req->fd = hton64 (remote_fd);
- }
-
- fr = create_frame (this, this->ctx->pool);
-
- ret = protocol_client_xfer (fr, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_CBK_REQUEST, op,
- hdr, hdrlen, NULL, 0, NULL);
-
-out:
- inode_unref (fdctx->inode);
- GF_FREE (fdctx);
-
- return ret;
-}
-
-
-/**
- * client_releasedir - releasedir function for client protocol
- * @this: this translator structure
- * @fd: file descriptor structure
- *
- * external reference through client_protocol_xlator->cbks->releasedir
- */
-
-int
-client_releasedir (xlator_t *this, fd_t *fd)
-{
- int64_t remote_fd = -1;
- client_conf_t *conf = NULL;
- client_fd_ctx_t *fdctx = NULL;
-
- conf = this->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx = this_fd_del_ctx (fd, this);
- if (fdctx != NULL) {
- remote_fd = fdctx->remote_fd;
-
- /* fdctx->remote_fd == -1 indicates a reopen attempt
- in progress. Just mark ->released = 1 and let
- reopen_cbk handle releasing
- */
-
- if (remote_fd != -1)
- list_del_init (&fdctx->sfd_pos);
-
- fdctx->released = 1;
- }
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (remote_fd != -1)
- client_fdctx_destroy (this, fdctx);
-
- return 0;
-}
-
-
-/**
- * client_release - release function for client protocol
- * @this: this translator structure
- * @fd: file descriptor structure
- *
- * external reference through client_protocol_xlator->cbks->release
- *
- */
-int
-client_release (xlator_t *this, fd_t *fd)
-{
- int64_t remote_fd = -1;
- client_conf_t *conf = NULL;
- client_fd_ctx_t *fdctx = NULL;
-
- conf = this->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx = this_fd_del_ctx (fd, this);
- if (fdctx != NULL) {
- remote_fd = fdctx->remote_fd;
-
- /* fdctx->remote_fd == -1 indicates a reopen attempt
- in progress. Just mark ->released = 1 and let
- reopen_cbk handle releasing
- */
-
- if (remote_fd != -1)
- list_del_init (&fdctx->sfd_pos);
-
- fdctx->released = 1;
- }
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (remote_fd != -1)
- client_fdctx_destroy (this, fdctx);
-
- return 0;
-}
-
-/*
- * MGMT_OPS
- */
-
-/* Callbacks */
-
-int
-client_fxattrop_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_xattrop_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t gf_errno = 0;
- int32_t op_errno = 0;
- int32_t dict_len = 0;
- dict_t *dict = NULL;
- int32_t ret = -1;
- char *dictbuf = NULL;
-
- rsp = gf_param (hdr);
- GF_VALIDATE_OR_GOTO (frame->this->name, rsp, fail);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
-
- if (op_ret >= 0) {
- op_ret = -1;
- dict_len = ntoh32 (rsp->dict_len);
-
- if (dict_len > 0) {
- dictbuf = memdup (rsp->dict, dict_len);
- GF_VALIDATE_OR_GOTO (frame->this->name, dictbuf, fail);
-
- dict = dict_new();
- GF_VALIDATE_OR_GOTO (frame->this->name, dict, fail);
-
- ret = dict_unserialize (dictbuf, dict_len, &dict);
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "failed to serialize dictionary(%p)",
- dict);
- op_errno = -ret;
- goto fail;
- } else {
- dict->extra_free = dictbuf;
- dictbuf = NULL;
- }
- }
- op_ret = 0;
- }
- gf_errno = ntoh32 (hdr->rsp.op_errno);
- op_errno = gf_error_to_errno (gf_errno);
-
-fail:
- STACK_UNWIND (frame, op_ret, op_errno, dict);
-
- if (dictbuf)
- GF_FREE (dictbuf);
-
- if (dict)
- dict_unref (dict);
-
- return 0;
-}
-
-
-int
-client_xattrop_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_xattrop_rsp_t *rsp = NULL;
- int32_t op_ret = -1;
- int32_t gf_errno = EINVAL;
- int32_t op_errno = 0;
- int32_t dict_len = 0;
- dict_t *dict = NULL;
- int32_t ret = -1;
- char *dictbuf = NULL;
-
- rsp = gf_param (hdr);
- GF_VALIDATE_OR_GOTO (frame->this->name, rsp, fail);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- if (op_ret >= 0) {
- op_ret = -1;
- dict_len = ntoh32 (rsp->dict_len);
-
- if (dict_len > 0) {
- dictbuf = memdup (rsp->dict, dict_len);
- GF_VALIDATE_OR_GOTO (frame->this->name, dictbuf, fail);
-
- dict = get_new_dict();
- GF_VALIDATE_OR_GOTO (frame->this->name, dict, fail);
- dict_ref (dict);
-
- ret = dict_unserialize (dictbuf, dict_len, &dict);
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "failed to serialize dictionary(%p)",
- dict);
- goto fail;
- } else {
- dict->extra_free = dictbuf;
- dictbuf = NULL;
- }
- }
- op_ret = 0;
- }
- gf_errno = ntoh32 (hdr->rsp.op_errno);
- op_errno = gf_error_to_errno (gf_errno);
-
-
-fail:
- STACK_UNWIND (frame, op_ret, op_errno, dict);
-
- if (dictbuf)
- GF_FREE (dictbuf);
- if (dict)
- dict_unref (dict);
-
- return 0;
-}
-
-/*
- * client_create_cbk - create callback function for client protocol
- * @frame: call frame
- * @args: arguments in dictionary
- *
- * not for external reference
- */
-
-int
-client_create_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_create_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
- fd_t *fd = NULL;
- inode_t *inode = NULL;
- struct iatt stbuf = {0, };
- struct iatt preparent = {0, };
- struct iatt postparent = {0, };
- int64_t remote_fd = 0;
- int32_t ret = -1;
- client_local_t *local = NULL;
- client_conf_t *conf = NULL;
- client_fd_ctx_t *fdctx = NULL;
- ino_t ino = 0;
- uint64_t gen = 0;
-
- local = frame->local; frame->local = NULL;
- conf = frame->this->private;
- fd = local->fd;
- inode = local->loc.inode;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = ntoh32 (hdr->rsp.op_errno);
-
- if (op_ret >= 0) {
- remote_fd = ntoh64 (rsp->fd);
- gf_stat_to_iatt (&rsp->stat, &stbuf);
-
- gf_stat_to_iatt (&rsp->preparent, &preparent);
- gf_stat_to_iatt (&rsp->postparent, &postparent);
-
- ino = stbuf.ia_ino;
- gen = stbuf.ia_gen;
- }
-
- if (op_ret >= 0) {
- ret = inode_ctx_put2 (local->loc.inode, frame->this, ino, gen);
-
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "CREATE %"PRId64"/%s (%s): failed to set "
- "remote inode number to inode ctx",
- local->loc.parent->ino, local->loc.name,
- local->loc.path);
- op_ret = -1;
- op_errno = EINVAL;
- goto unwind_out;
- }
-
- fdctx = GF_CALLOC (1, sizeof (*fdctx),
- gf_client_mt_client_fd_ctx_t);
- if (!fdctx) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto unwind_out;
- }
-
- fdctx->remote_fd = remote_fd;
- fdctx->inode = inode_ref (fd->inode);
- fdctx->ino = ino;
- fdctx->gen = gen;
- fdctx->flags = local->flags;
-
- INIT_LIST_HEAD (&fdctx->sfd_pos);
-
- this_fd_set_ctx (fd, frame->this, &local->loc, fdctx);
-
- pthread_mutex_lock (&conf->mutex);
- {
- list_add_tail (&fdctx->sfd_pos, &conf->saved_fds);
- }
- pthread_mutex_unlock (&conf->mutex);
- }
-unwind_out:
- STACK_UNWIND (frame, op_ret, op_errno, fd, inode, &stbuf,
- &preparent, &postparent);
-
- client_local_wipe (local);
-
- return 0;
-}
-
-
-/*
- * client_open_cbk - open callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-int
-client_open_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- int32_t op_ret = -1;
- int32_t op_errno = ENOTCONN;
- fd_t *fd = NULL;
- int64_t remote_fd = 0;
- gf_fop_open_rsp_t *rsp = NULL;
- client_local_t *local = NULL;
- client_conf_t *conf = NULL;
- client_fd_ctx_t *fdctx = NULL;
- ino_t ino = 0;
- uint64_t gen = 0;
-
-
- local = frame->local;
-
- if (local->op) {
- local->op (frame, hdr, hdrlen, iobuf);
- return 0;
- }
-
- frame->local = NULL;
- conf = frame->this->private;
- fd = local->fd;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = ntoh32 (hdr->rsp.op_errno);
-
- if (op_ret >= 0) {
- remote_fd = ntoh64 (rsp->fd);
- }
-
- if (op_ret >= 0) {
- fdctx = GF_CALLOC (1, sizeof (*fdctx),
- gf_client_mt_client_fd_ctx_t);
- if (!fdctx) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto unwind_out;
- }
-
- inode_ctx_get2 (fd->inode, frame->this, &ino, &gen);
-
- fdctx->remote_fd = remote_fd;
- fdctx->inode = inode_ref (fd->inode);
- fdctx->ino = ino;
- fdctx->gen = gen;
- fdctx->flags = local->flags;
- fdctx->wbflags = local->wbflags;
-
- INIT_LIST_HEAD (&fdctx->sfd_pos);
-
- this_fd_set_ctx (fd, frame->this, &local->loc, fdctx);
-
- pthread_mutex_lock (&conf->mutex);
- {
- list_add_tail (&fdctx->sfd_pos, &conf->saved_fds);
- }
- pthread_mutex_unlock (&conf->mutex);
- }
-unwind_out:
- STACK_UNWIND (frame, op_ret, op_errno, fd);
-
- client_local_wipe (local);
-
- return 0;
-}
-
-/*
- * client_stat_cbk - stat callback for client protocol
- * @frame: call frame
- * @args: arguments dictionary
- *
- * not for external reference
- */
-
-int
-client_stat_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- struct iatt stbuf = {0, };
- gf_fop_stat_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if (op_ret == 0) {
- gf_stat_to_iatt (&rsp->stat, &stbuf);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
-
- return 0;
-}
-
-
-/*
- * client_mknod_cbk - mknod callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_mknod_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_mknod_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
- struct iatt stbuf = {0, };
- inode_t *inode = NULL;
- client_local_t *local = NULL;
- int ret = 0;
- struct iatt preparent = {0,};
- struct iatt postparent = {0,};
-
- local = frame->local;
- frame->local = NULL;
- inode = local->loc.inode;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if (op_ret >= 0) {
- gf_stat_to_iatt (&rsp->stat, &stbuf);
-
- ret = inode_ctx_put2 (local->loc.inode, frame->this,
- stbuf.ia_ino, stbuf.ia_gen);
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "MKNOD %"PRId64"/%s (%s): failed to set remote"
- " inode number to inode ctx",
- local->loc.parent->ino, local->loc.name,
- local->loc.path);
-
- STACK_UNWIND (frame, -1, EINVAL, inode, NULL,
- NULL, NULL);
- return 0;
- }
-
- gf_stat_to_iatt (&rsp->preparent, &preparent);
- gf_stat_to_iatt (&rsp->postparent, &postparent);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, inode, &stbuf,
- &preparent, &postparent);
-
- client_local_wipe (local);
-
- return 0;
-}
-
-/*
- * client_symlink_cbk - symlink callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_symlink_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_symlink_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
- struct iatt stbuf = {0, };
- struct iatt preparent = {0,};
- struct iatt postparent = {0,};
- inode_t *inode = NULL;
- client_local_t *local = NULL;
- int ret = 0;
-
- local = frame->local;
- frame->local = NULL;
- inode = local->loc.inode;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if (op_ret >= 0) {
- gf_stat_to_iatt (&rsp->stat, &stbuf);
-
- ret = inode_ctx_put2 (inode, frame->this,
- stbuf.ia_ino, stbuf.ia_gen);
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "SYMLINK %"PRId64"/%s (%s): failed to set "
- "remote inode number to inode ctx",
- local->loc.parent->ino, local->loc.name,
- local->loc.path);
- STACK_UNWIND (frame, -1, EINVAL, inode, NULL,
- NULL, NULL);
- return 0;
- }
- gf_stat_to_iatt (&rsp->preparent, &preparent);
- gf_stat_to_iatt (&rsp->postparent, &postparent);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, inode, &stbuf,
- &preparent, &postparent);
-
- client_local_wipe (local);
-
- return 0;
-}
-
-/*
- * client_link_cbk - link callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_link_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_link_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
- struct iatt stbuf = {0, };
- inode_t *inode = NULL;
- client_local_t *local = NULL;
- struct iatt preparent = {0,};
- struct iatt postparent = {0,};
-
- local = frame->local;
- frame->local = NULL;
- inode = local->loc.inode;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if (op_ret >= 0) {
- gf_stat_to_iatt (&rsp->stat, &stbuf);
-
- gf_stat_to_iatt (&rsp->preparent, &preparent);
- gf_stat_to_iatt (&rsp->postparent, &postparent);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, inode, &stbuf,
- &preparent, &postparent);
-
- client_local_wipe (local);
-
- return 0;
-}
-
-/*
- * client_truncate_cbk - truncate callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_truncate_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_truncate_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
- struct iatt prestat = {0, };
- struct iatt poststat = {0, };
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if (op_ret == 0) {
- gf_stat_to_iatt (&rsp->prestat, &prestat);
- gf_stat_to_iatt (&rsp->poststat, &poststat);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, &prestat, &poststat);
-
- return 0;
-}
-
-/* client_fstat_cbk - fstat callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_fstat_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- struct iatt stbuf = {0, };
- gf_fop_fstat_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if (op_ret == 0) {
- gf_stat_to_iatt (&rsp->stat, &stbuf);
-
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
-
- return 0;
-}
-
-/*
- * client_ftruncate_cbk - ftruncate callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-int
-client_ftruncate_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_ftruncate_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
- struct iatt prestat = {0, };
- struct iatt poststat = {0, };
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if (op_ret == 0) {
- gf_stat_to_iatt (&rsp->prestat, &prestat);
- gf_stat_to_iatt (&rsp->poststat, &poststat);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, &prestat, &poststat);
-
- return 0;
-}
-
-
-/* client_readv_cbk - readv callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external referece
- */
-
-int
-client_readv_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_read_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
- struct iovec vector = {0, };
- struct iatt stbuf = {0, };
- struct iobref *iobref = NULL;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if (op_ret != -1) {
- iobref = iobref_new ();
- gf_stat_to_iatt (&rsp->stat, &stbuf);
- vector.iov_len = op_ret;
-
- if (op_ret > 0) {
- vector.iov_base = iobuf->ptr;
- iobref_add (iobref, iobuf);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, &vector, 1, &stbuf, iobref);
-
- if (iobref)
- iobref_unref (iobref);
-
- if (iobuf)
- iobuf_unref (iobuf);
-
- return 0;
-}
-
-/*
- * client_write_cbk - write callback for client protocol
- * @frame: cal frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_write_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_write_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
- struct iatt prestat = {0, };
- struct iatt poststat = {0, };
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if (op_ret >= 0) {
- gf_stat_to_iatt (&rsp->prestat, &prestat);
- gf_stat_to_iatt (&rsp->poststat, &poststat);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, &prestat, &poststat);
-
- return 0;
-}
-
-
-int
-client_readdirp_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_readdirp_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
- uint32_t buf_size = 0;
- gf_dirent_t entries;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = ntoh32 (hdr->rsp.op_errno);
-
- INIT_LIST_HEAD (&entries.list);
- if (op_ret > 0) {
- buf_size = ntoh32 (rsp->size);
- gf_dirent_unserialize (&entries, rsp->buf, buf_size);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, &entries);
-
- gf_dirent_free (&entries);
-
- return 0;
-}
-
-
-int
-client_readdir_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_readdir_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
- uint32_t buf_size = 0;
- gf_dirent_t entries;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = ntoh32 (hdr->rsp.op_errno);
-
- INIT_LIST_HEAD (&entries.list);
- if (op_ret > 0) {
- buf_size = ntoh32 (rsp->size);
- gf_dirent_unserialize (&entries, rsp->buf, buf_size);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, &entries);
-
- gf_dirent_free (&entries);
-
- return 0;
-}
-
-/*
- * client_fsync_cbk - fsync callback for client protocol
- *
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_fsync_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- struct iatt prestat = {0, };
- struct iatt poststat = {0,};
- gf_fop_fsync_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if (op_ret == 0) {
- gf_stat_to_iatt (&rsp->prestat, &prestat);
- gf_stat_to_iatt (&rsp->poststat, &poststat);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, &prestat, &poststat);
-
- return 0;
-}
-
-/*
- * client_unlink_cbk - unlink callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_unlink_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_unlink_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
- struct iatt preparent = {0,};
- struct iatt postparent = {0,};
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if (op_ret == 0) {
- gf_stat_to_iatt (&rsp->preparent, &preparent);
- gf_stat_to_iatt (&rsp->postparent, &postparent);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, &preparent, &postparent);
-
- return 0;
-}
-
-/*
- * client_rename_cbk - rename callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_rename_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- struct iatt stbuf = {0, };
- gf_fop_rename_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
- struct iatt preoldparent = {0, };
- struct iatt postoldparent = {0, };
- struct iatt prenewparent = {0, };
- struct iatt postnewparent = {0, };
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if (op_ret == 0) {
- gf_stat_to_iatt (&rsp->stat, &stbuf);
- gf_stat_to_iatt (&rsp->preoldparent, &preoldparent);
- gf_stat_to_iatt (&rsp->postoldparent, &postoldparent);
- gf_stat_to_iatt (&rsp->prenewparent, &prenewparent);
- gf_stat_to_iatt (&rsp->postnewparent, &postnewparent);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, &stbuf, &preoldparent,
- &postoldparent, &prenewparent, &postnewparent);
-
- return 0;
-}
-
-
-/*
- * client_readlink_cbk - readlink callback for client protocol
- *
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-int
-client_readlink_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_readlink_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
- char *link = NULL;
- struct iatt stbuf = {0,};
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if (op_ret > 0) {
- link = rsp->path;
- gf_stat_to_iatt (&rsp->buf, &stbuf);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, link, &stbuf);
- return 0;
-}
-
-/*
- * client_mkdir_cbk - mkdir callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_mkdir_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_mkdir_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
- struct iatt stbuf = {0, };
- inode_t *inode = NULL;
- client_local_t *local = NULL;
- int ret = 0;
- struct iatt preparent = {0,};
- struct iatt postparent = {0,};
-
- local = frame->local;
- inode = local->loc.inode;
- frame->local = NULL;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if (op_ret >= 0) {
- gf_stat_to_iatt (&rsp->stat, &stbuf);
-
- ret = inode_ctx_put2 (inode, frame->this, stbuf.ia_ino,
- stbuf.ia_gen);
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "MKDIR %"PRId64"/%s (%s): failed to set "
- "remote inode number to inode ctx",
- local->loc.parent->ino, local->loc.name,
- local->loc.path);
- STACK_UNWIND (frame, -1, EINVAL, inode, NULL,
- NULL, NULL);
- return 0;
- }
-
- gf_stat_to_iatt (&rsp->preparent, &preparent);
- gf_stat_to_iatt (&rsp->postparent, &postparent);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, inode, &stbuf,
- &preparent, &postparent);
-
- client_local_wipe (local);
-
- return 0;
-}
-
-/*
- * client_flush_cbk - flush callback for client protocol
- *
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_flush_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- STACK_UNWIND (frame, op_ret, op_errno);
-
- return 0;
-}
-
-/*
- * client_opendir_cbk - opendir callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_opendir_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- int32_t op_ret = -1;
- int32_t op_errno = ENOTCONN;
- fd_t *fd = NULL;
- int64_t remote_fd = 0;
- gf_fop_opendir_rsp_t *rsp = NULL;
- client_local_t *local = NULL;
- client_conf_t *conf = NULL;
- client_fd_ctx_t *fdctx = NULL;
- ino_t ino = 0;
- uint64_t gen = 0;
-
-
- local = frame->local;
-
- if (local->op) {
- local->op (frame, hdr, hdrlen, iobuf);
- return 0;
- }
-
- frame->local = NULL;
- conf = frame->this->private;
- fd = local->fd;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = ntoh32 (hdr->rsp.op_errno);
-
- if (op_ret >= 0) {
- remote_fd = ntoh64 (rsp->fd);
- }
-
- if (op_ret >= 0) {
- fdctx = GF_CALLOC (1, sizeof (*fdctx),
- gf_client_mt_client_fd_ctx_t);
- if (!fdctx) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto unwind_out;
- }
-
- inode_ctx_get2 (fd->inode, frame->this, &ino, &gen);
-
- fdctx->remote_fd = remote_fd;
- fdctx->inode = inode_ref (fd->inode);
- fdctx->ino = ino;
- fdctx->gen = gen;
-
- fdctx->is_dir = 1;
-
- INIT_LIST_HEAD (&fdctx->sfd_pos);
-
- this_fd_set_ctx (fd, frame->this, &local->loc, fdctx);
-
- pthread_mutex_lock (&conf->mutex);
- {
- list_add_tail (&fdctx->sfd_pos, &conf->saved_fds);
- }
- pthread_mutex_unlock (&conf->mutex);
- }
-unwind_out:
- STACK_UNWIND (frame, op_ret, op_errno, fd);
-
- client_local_wipe (local);
-
- return 0;
-}
-
-/*
- * client_rmdir_cbk - rmdir callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_rmdir_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_rmdir_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
- struct iatt preparent = {0,};
- struct iatt postparent = {0,};
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if (op_ret == 0) {
- gf_stat_to_iatt (&rsp->preparent, &preparent);
- gf_stat_to_iatt (&rsp->postparent, &postparent);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, &preparent, &postparent);
-
- return 0;
-}
-
-/*
- * client_access_cbk - access callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_access_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_access_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- STACK_UNWIND (frame, op_ret, op_errno);
-
- return 0;
-}
-
-/*
- * client_lookup_cbk - lookup callback for client protocol
- *
- * @frame: call frame
- * @args: arguments dictionary
- *
- * not for external reference
- */
-
-int
-client_lookup_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- struct iatt stbuf = {0, };
- struct iatt postparent = {0, };
- inode_t *inode = NULL;
- dict_t *xattr = NULL;
- gf_fop_lookup_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
- size_t dict_len = 0;
- char *dictbuf = NULL;
- int32_t ret = -1;
- int32_t gf_errno = 0;
- client_local_t *local = NULL;
- ino_t oldino = 0;
- uint64_t oldgen = 0;
-
- local = frame->local;
- inode = local->loc.inode;
- frame->local = NULL;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
-
- gf_stat_to_iatt (&rsp->postparent, &postparent);
-
- if (op_ret == 0) {
- op_ret = -1;
- gf_stat_to_iatt (&rsp->stat, &stbuf);
-
- ret = inode_ctx_get2 (inode, frame->this, &oldino, &oldgen);
- if (oldino != stbuf.ia_ino || oldgen != stbuf.ia_gen) {
- if (oldino) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "LOOKUP %"PRId64"/%s (%s): "
- "inode number changed from "
- "{%"PRId64",%"PRId64"} to {%"PRId64",%"PRId64"}",
- local->loc.parent ?
- local->loc.parent->ino : (uint64_t) 0,
- local->loc.name,
- local->loc.path,
- oldgen, oldino, stbuf.ia_gen, stbuf.ia_ino);
- op_errno = ESTALE;
- goto fail;
- }
-
- ret = inode_ctx_put2 (inode, frame->this,
- stbuf.ia_ino, stbuf.ia_gen);
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "LOOKUP %"PRId64"/%s (%s) : "
- "failed to set remote inode "
- "number to inode ctx",
- local->loc.parent ?
- local->loc.parent->ino : (uint64_t) 0,
- local->loc.name,
- local->loc.path);
- op_errno = EINVAL;
- goto fail;
- }
- }
-
- dict_len = ntoh32 (rsp->dict_len);
-
- if (dict_len > 0) {
- dictbuf = memdup (rsp->dict, dict_len);
- GF_VALIDATE_OR_GOTO (frame->this->name, dictbuf, fail);
-
- xattr = dict_new();
- GF_VALIDATE_OR_GOTO (frame->this->name, xattr, fail);
-
- ret = dict_unserialize (dictbuf, dict_len, &xattr);
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "%s (%"PRId64"): failed to "
- "unserialize dictionary",
- local->loc.path, inode->ino);
- goto fail;
- } else {
- xattr->extra_free = dictbuf;
- dictbuf = NULL;
- }
- }
- op_ret = 0;
- }
- gf_errno = ntoh32 (hdr->rsp.op_errno);
- op_errno = gf_error_to_errno (gf_errno);
-
-fail:
- STACK_UNWIND (frame, op_ret, op_errno, inode, &stbuf, xattr,
- &postparent);
-
- client_local_wipe (local);
-
- if (dictbuf)
- GF_FREE (dictbuf);
-
- if (xattr)
- dict_unref (xattr);
-
- return 0;
-}
-
-static int32_t
-client_setattr_cbk (call_frame_t *frame,gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- struct iatt statpre = {0, };
- struct iatt statpost = {0, };
- gf_fop_setattr_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if (op_ret == 0) {
- gf_stat_to_iatt (&rsp->statpre, &statpre);
- gf_stat_to_iatt (&rsp->statpost, &statpost);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, &statpre, &statpost);
-
- return 0;
-}
-
-static int32_t
-client_fsetattr_cbk (call_frame_t *frame,gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- struct iatt statpre = {0, };
- struct iatt statpost = {0, };
- gf_fop_setattr_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if (op_ret == 0) {
- gf_stat_to_iatt (&rsp->statpre, &statpre);
- gf_stat_to_iatt (&rsp->statpost, &statpost);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, &statpre, &statpost);
-
- return 0;
-}
-
-
-int
-gf_free_direntry (dir_entry_t *head)
-{
- dir_entry_t *prev = NULL;
- dir_entry_t *trav = NULL;
-
- prev = head;
- GF_VALIDATE_OR_GOTO ("client-protocol", prev, fail);
-
- trav = head->next;
- while (trav) {
- prev->next = trav->next;
- GF_FREE (trav->name);
- if (IA_ISLNK (trav->buf.ia_type))
- GF_FREE (trav->link);
- GF_FREE (trav);
- trav = prev->next;
- }
- GF_FREE (head);
-fail:
- return 0;
-}
-
-/*
- * client_statfs_cbk - statfs callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_statfs_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- struct statvfs stbuf = {0, };
- gf_fop_statfs_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if (op_ret == 0) {
- gf_statfs_to_statfs (&rsp->statfs, &stbuf);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
-
- return 0;
-}
-
-/*
- * client_fsyncdir_cbk - fsyncdir callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_fsyncdir_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- STACK_UNWIND (frame, op_ret, op_errno);
-
- return 0;
-}
-
-/*
- * client_setxattr_cbk - setxattr callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_setxattr_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_setxattr_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- STACK_UNWIND (frame, op_ret, op_errno);
-
- return 0;
-}
-
-/*
- * client_getxattr_cbk - getxattr callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_getxattr_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_getxattr_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t gf_errno = 0;
- int32_t op_errno = 0;
- int32_t dict_len = 0;
- dict_t *dict = NULL;
- int32_t ret = -1;
- char *dictbuf = NULL;
- client_local_t *local = NULL;
-
- local = frame->local;
- frame->local = NULL;
-
- rsp = gf_param (hdr);
- GF_VALIDATE_OR_GOTO (frame->this->name, rsp, fail);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
-
- if (op_ret >= 0) {
- op_ret = -1;
- dict_len = ntoh32 (rsp->dict_len);
-
- if (dict_len > 0) {
- dictbuf = memdup (rsp->dict, dict_len);
- GF_VALIDATE_OR_GOTO (frame->this->name, dictbuf, fail);
-
- dict = dict_new();
- GF_VALIDATE_OR_GOTO (frame->this->name, dict, fail);
-
- ret = dict_unserialize (dictbuf, dict_len, &dict);
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "%s (%"PRId64"): failed to "
- "unserialize xattr dictionary",
- local->loc.path,
- local->loc.inode->ino);
- goto fail;
- } else {
- dict->extra_free = dictbuf;
- dictbuf = NULL;
- }
- }
- op_ret = 0;
- }
- gf_errno = ntoh32 (hdr->rsp.op_errno);
- op_errno = gf_error_to_errno (gf_errno);
-fail:
- STACK_UNWIND (frame, op_ret, op_errno, dict);
-
- client_local_wipe (local);
-
- if (dictbuf)
- GF_FREE (dictbuf);
-
- if (dict)
- dict_unref (dict);
-
- return 0;
-}
-
-/*
- * client_removexattr_cbk - removexattr callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_removexattr_cbk (call_frame_t *frame, gf_hdr_common_t *hdr,
- size_t hdrlen, struct iobuf *iobuf)
-{
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- STACK_UNWIND (frame, op_ret, op_errno);
-
- return 0;
-}
-
-/*
- * client_lk_cbk - lk callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_lk_common_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- struct flock lock = {0,};
- gf_fop_lk_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if (op_ret >= 0) {
- gf_flock_to_flock (&rsp->flock, &lock);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, &lock);
- return 0;
-}
-
-/*
- * client_gf_file_lk_cbk - gf_file_lk callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_inodelk_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_inodelk_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-
-int
-client_finodelk_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_finodelk_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-/*
- * client_entrylk_cbk - entrylk callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_entrylk_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_entrylk_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-int
-client_fentrylk_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_fentrylk_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-
-
-/*
- * client_getspec - getspec function for client protocol
- * @frame: call frame
- * @this: client protocol xlator structure
- * @flag:
- *
- * external reference through client_protocol_xlator->fops->getspec
- */
-
-int
-client_getspec (call_frame_t *frame, xlator_t *this, const char *key,
- int32_t flag)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_mop_getspec_req_t *req = NULL;
- size_t hdrlen = -1;
- int keylen = 0;
- int ret = -1;
-
- if (key)
- keylen = STRLEN_0 (key);
-
- hdrlen = gf_hdr_len (req, keylen);
- hdr = gf_hdr_new (req, keylen);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
- req->flags = hton32 (flag);
- req->keylen = hton32 (keylen);
- if (keylen)
- strcpy (req->key, key);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_MOP_REQUEST, GF_MOP_GETSPEC,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- if (hdr)
- GF_FREE (hdr);
- STACK_UNWIND (frame, -1, EINVAL, NULL);
- return 0;
-}
-
-/*
- * client_getspec_cbk - getspec callback for client protocol
- *
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_getspec_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_mop_getspec_rsp_t *rsp = NULL;
- char *spec_data = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
- int32_t gf_errno = 0;
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- gf_errno = ntoh32 (hdr->rsp.op_errno);
- op_errno = gf_error_to_errno (gf_errno);
- rsp = gf_param (hdr);
-
- if (op_ret >= 0) {
- spec_data = rsp->spec;
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, spec_data);
- return 0;
-}
-
-int
-client_checksum (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flag)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_checksum_req_t *req = NULL;
- size_t hdrlen = -1;
- int ret = -1;
- ino_t ino = 0;
- uint64_t gen = 0;
-
- hdrlen = gf_hdr_len (req, strlen (loc->path) + 1);
- hdr = gf_hdr_new (req, strlen (loc->path) + 1);
- req = gf_param (hdr);
-
- ret = inode_ctx_get2 (loc->inode, this, &ino, &gen);
- if (loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "CHECKSUM %"PRId64" (%s): "
- "failed to get remote inode number",
- loc->inode->ino, loc->path);
- STACK_UNWIND (frame, -1, EINVAL, NULL, NULL);
- return 0;
-
- }
-
- req->ino = hton64 (ino);
- req->gen = hton64 (gen);
- req->flag = hton32 (flag);
- strcpy (req->path, loc->path);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_CHECKSUM,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-}
-
-
-int
-client_checksum_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_checksum_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
- int32_t gf_errno = 0;
- unsigned char *fchecksum = NULL;
- unsigned char *dchecksum = NULL;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- gf_errno = ntoh32 (hdr->rsp.op_errno);
- op_errno = gf_error_to_errno (gf_errno);
-
- if (op_ret >= 0) {
- fchecksum = rsp->fchecksum;
- dchecksum = rsp->dchecksum + NAME_MAX;
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, fchecksum, dchecksum);
- return 0;
-}
-
-
-int
-client_rchecksum (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- int32_t len)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_rchecksum_req_t *req = NULL;
- size_t hdrlen = -1;
- int ret = -1;
-
- int64_t remote_fd = -1;
- client_fd_ctx_t *fdctx = NULL;
- client_conf_t *conf = NULL;
-
- hdrlen = gf_hdr_len (req, 0);
- hdr = gf_hdr_new (req, 0);
- req = gf_param (hdr);
-
- conf = this->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx = this_fd_get_ctx (fd, this);
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_TRACE,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- fd->inode->ino);
- STACK_UNWIND (frame, -1, EBADFD, 0, NULL);
- return 0;
- }
-
- if (fdctx->remote_fd == -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- fd->inode->ino);
- STACK_UNWIND (frame, -1, EBADFD, 0, NULL);
- return 0;
- }
-
- remote_fd = fdctx->remote_fd;
-
- req->fd = hton64 (remote_fd);
- req->offset = hton64 (offset);
- req->len = hton32 (len);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_RCHECKSUM,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-}
-
-
-int
-client_rchecksum_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_rchecksum_rsp_t *rsp = NULL;
-
- int32_t op_ret = 0;
- int32_t op_errno = 0;
- int32_t gf_errno = 0;
- uint32_t weak_checksum = 0;
- unsigned char *strong_checksum = NULL;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- gf_errno = ntoh32 (hdr->rsp.op_errno);
- op_errno = gf_error_to_errno (gf_errno);
-
- if (op_ret >= 0) {
- weak_checksum = rsp->weak_checksum;
- strong_checksum = rsp->strong_checksum;
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, weak_checksum, strong_checksum);
-
- return 0;
-}
-
-
-/*
- * client_setspec_cbk - setspec callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_setspec_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- STACK_UNWIND (frame, op_ret, op_errno);
-
- return 0;
-}
-
-
-
-int
-protocol_client_reopendir_cbk (call_frame_t *frame, gf_hdr_common_t *hdr,
- size_t hdrlen, struct iobuf *iobuf)
-{
- int32_t op_ret = -1;
- int32_t op_errno = ENOTCONN;
- int64_t remote_fd = -1;
- gf_fop_open_rsp_t *rsp = NULL;
- client_local_t *local = NULL;
- client_conf_t *conf = NULL;
- client_fd_ctx_t *fdctx = NULL;
-
-
- local = frame->local; frame->local = NULL;
- conf = frame->this->private;
- fdctx = local->fdctx;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = ntoh32 (hdr->rsp.op_errno);
-
- if (op_ret >= 0)
- remote_fd = ntoh64 (rsp->fd);
-
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "reopendir on %s returned %d (%"PRId64")",
- local->loc.path, op_ret, remote_fd);
-
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx->remote_fd = remote_fd;
-
- if (!fdctx->released) {
- list_add_tail (&fdctx->sfd_pos, &conf->saved_fds);
- fdctx = NULL;
- }
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (fdctx)
- client_fdctx_destroy (frame->this, fdctx);
-
- STACK_DESTROY (frame->root);
-
- client_local_wipe (local);
-
- return 0;
-}
-
-
-
-int
-protocol_client_reopendir (xlator_t *this, client_fd_ctx_t *fdctx)
-{
- int ret = -1;
- gf_hdr_common_t *hdr = NULL;
- size_t hdrlen = 0;
- gf_fop_opendir_req_t *req = NULL;
- size_t pathlen = 0;
- client_local_t *local = NULL;
- inode_t *inode = NULL;
- char *path = NULL;
- call_frame_t *frame = NULL;
-
- inode = fdctx->inode;
-
- ret = inode_path (inode, NULL, &path);
- if (ret < 0) {
- goto out;
- }
-
- local = GF_CALLOC (1, sizeof (*local), gf_client_mt_client_local_t);
- if (!local) {
- goto out;
- }
-
- local->fdctx = fdctx;
- local->op = protocol_client_reopendir_cbk;
- local->loc.path = path; path = NULL;
-
- frame = create_frame (this, this->ctx->pool);
- if (!frame) {
- goto out;
- }
-
- pathlen = STRLEN_0 (local->loc.path);
-
- hdrlen = gf_hdr_len (req, pathlen);
- hdr = gf_hdr_new (req, pathlen);
-
- req = gf_param (hdr);
-
- req->ino = hton64 (fdctx->ino);
- req->gen = hton64 (fdctx->gen);
-
- strcpy (req->path, local->loc.path);
-
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "attempting reopendir on %s", local->loc.path);
-
- frame->local = local; local = NULL;
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_OPENDIR,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-
-out:
- if (frame)
- STACK_DESTROY (frame->root);
-
- if (local)
- client_local_wipe (local);
-
- if (path)
- GF_FREE (path);
-
- return 0;
-}
-
-
-int
-protocol_client_reopen_cbk (call_frame_t *frame, gf_hdr_common_t *hdr,
- size_t hdrlen, struct iobuf *iobuf)
-{
- int32_t op_ret = -1;
- int32_t op_errno = ENOTCONN;
- int64_t remote_fd = -1;
- gf_fop_open_rsp_t *rsp = NULL;
- client_local_t *local = NULL;
- client_conf_t *conf = NULL;
- client_fd_ctx_t *fdctx = NULL;
-
-
- local = frame->local; frame->local = NULL;
- conf = frame->this->private;
- fdctx = local->fdctx;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = ntoh32 (hdr->rsp.op_errno);
-
- if (op_ret >= 0)
- remote_fd = ntoh64 (rsp->fd);
-
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "reopen on %s returned %d (%"PRId64")",
- local->loc.path, op_ret, remote_fd);
-
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx->remote_fd = remote_fd;
-
- if (!fdctx->released) {
- list_add_tail (&fdctx->sfd_pos, &conf->saved_fds);
- fdctx = NULL;
- }
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (fdctx)
- client_fdctx_destroy (frame->this, fdctx);
-
- STACK_DESTROY (frame->root);
-
- client_local_wipe (local);
-
- return 0;
-}
-
-
-int
-protocol_client_reopen (xlator_t *this, client_fd_ctx_t *fdctx)
-{
- int ret = -1;
- gf_hdr_common_t *hdr = NULL;
- size_t hdrlen = 0;
- gf_fop_open_req_t *req = NULL;
- size_t pathlen = 0;
- client_local_t *local = NULL;
- inode_t *inode = NULL;
- char *path = NULL;
- call_frame_t *frame = NULL;
-
- inode = fdctx->inode;
-
- ret = inode_path (inode, NULL, &path);
- if (ret < 0) {
- goto out;
- }
-
- local = GF_CALLOC (1, sizeof (*local), gf_client_mt_client_local_t);
- if (!local) {
- goto out;
- }
-
- local->fdctx = fdctx;
- local->op = protocol_client_reopen_cbk;
- local->loc.path = path; path = NULL;
-
- frame = create_frame (this, this->ctx->pool);
- if (!frame) {
- goto out;
- }
-
- pathlen = STRLEN_0 (local->loc.path);
-
- hdrlen = gf_hdr_len (req, pathlen);
- hdr = gf_hdr_new (req, pathlen);
-
- req = gf_param (hdr);
-
- req->ino = hton64 (fdctx->ino);
- req->gen = hton64 (fdctx->gen);
- req->flags = hton32 (gf_flags_from_flags (fdctx->flags));
- req->wbflags = hton32 (fdctx->wbflags);
- strcpy (req->path, local->loc.path);
-
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "attempting reopen on %s", local->loc.path);
-
- frame->local = local; local = NULL;
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_OPEN,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-
-out:
- if (frame)
- STACK_DESTROY (frame->root);
-
- if (local)
- client_local_wipe (local);
-
- if (path)
- GF_FREE (path);
-
- return 0;
-
-}
-
-
-int
-protocol_client_post_handshake (call_frame_t *frame, xlator_t *this)
-{
- client_conf_t *conf = NULL;
- client_fd_ctx_t *tmp = NULL;
- client_fd_ctx_t *fdctx = NULL;
- xlator_list_t *parent = NULL;
- struct list_head reopen_head;
-
- conf = this->private;
- INIT_LIST_HEAD (&reopen_head);
-
- pthread_mutex_lock (&conf->mutex);
- {
- list_for_each_entry_safe (fdctx, tmp, &conf->saved_fds,
- sfd_pos) {
- if (fdctx->remote_fd != -1)
- continue;
-
- list_del (&fdctx->sfd_pos);
- list_add_tail (&fdctx->sfd_pos, &reopen_head);
- }
- }
- pthread_mutex_unlock (&conf->mutex);
-
- list_for_each_entry_safe (fdctx, tmp, &reopen_head, sfd_pos) {
- list_del_init (&fdctx->sfd_pos);
-
- if (fdctx->is_dir)
- protocol_client_reopendir (this, fdctx);
- else
- protocol_client_reopen (this, fdctx);
- }
-
- parent = this->parents;
-
- while (parent) {
- xlator_notify (parent->xlator, GF_EVENT_CHILD_UP,
- this);
- parent = parent->next;
- }
-
- return 0;
-}
-
-/*
- * client_setvolume_cbk - setvolume callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_setvolume_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- client_conf_t *conf = NULL;
- gf_mop_setvolume_rsp_t *rsp = NULL;
- client_connection_t *conn = NULL;
- glusterfs_ctx_t *ctx = NULL;
- xlator_t *this = NULL;
- xlator_list_t *parent = NULL;
- transport_t *trans = NULL;
- dict_t *reply = NULL;
- char *remote_subvol = NULL;
- char *remote_error = NULL;
- char *process_uuid = NULL;
- int32_t ret = -1;
- int32_t op_ret = -1;
- int32_t op_errno = EINVAL;
- int32_t dict_len = 0;
- transport_t *peer_trans = NULL;
- uint64_t peer_trans_int = 0;
-
- trans = frame->local; frame->local = NULL;
- this = frame->this;
- conn = trans->xl_private;
- conf = this->private;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "setvolume failed (%s)",
- strerror (op_errno));
- goto out;
- }
-
- reply = dict_new ();
- GF_VALIDATE_OR_GOTO (this->name, reply, out);
-
- dict_len = ntoh32 (rsp->dict_len);
- ret = dict_unserialize (rsp->buf, dict_len, &reply);
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "failed to unserialize buffer(%p) to dictionary",
- rsp->buf);
- goto out;
- }
-
- ret = dict_get_str (reply, "ERROR", &remote_error);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get ERROR string from reply dictionary");
- }
-
- ret = dict_get_str (reply, "process-uuid", &process_uuid);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get 'process-uuid' from reply dictionary");
- }
-
- if (op_ret < 0) {
- gf_log (trans->xl->name, GF_LOG_ERROR,
- "SETVOLUME on remote-host failed: %s",
- remote_error ? remote_error : strerror (op_errno));
- errno = op_errno;
- if (op_errno == ESTALE) {
- parent = trans->xl->parents;
- while (parent) {
- xlator_notify (parent->xlator,
- GF_EVENT_VOLFILE_MODIFIED,
- trans->xl);
- parent = parent->next;
- }
- }
-
- } else {
- ret = dict_get_str (this->options, "remote-subvolume",
- &remote_subvol);
- if (!remote_subvol)
- goto out;
-
- ctx = this->ctx;
-
- if (process_uuid && !strcmp (ctx->process_uuid,process_uuid)) {
- ret = dict_get_uint64 (reply, "transport-ptr",
- &peer_trans_int);
-
- peer_trans = (void *) (long) (peer_trans_int);
-
- gf_log (this->name, GF_LOG_WARNING,
- "attaching to the local volume '%s'",
- remote_subvol);
-
- transport_setpeer (trans, peer_trans);
-
- }
-
- gf_log (trans->xl->name, GF_LOG_NORMAL,
- "Connected to %s, attached "
- "to remote volume '%s'.",
- trans->peerinfo.identifier, remote_subvol);
-
- pthread_mutex_lock (&(conn->lock));
- {
- conn->connected = 1;
- }
- pthread_mutex_unlock (&(conn->lock));
-
- protocol_client_post_handshake (frame, frame->this);
- }
-
- conf->connecting = 0;
-out:
-
- if (-1 == op_ret) {
- /* Let the connection/re-connection happen in
- * background, for now, don't hang here,
- * tell the parents that i am all ok..
- */
- parent = trans->xl->parents;
- while (parent) {
- xlator_notify (parent->xlator,
- GF_EVENT_CHILD_CONNECTING, trans->xl);
- parent = parent->next;
- }
- conf->connecting= 1;
- }
-
- STACK_DESTROY (frame->root);
-
- if (reply)
- dict_unref (reply);
-
- return op_ret;
-}
-
-/*
- * client_enosys_cbk -
- * @frame: call frame
- *
- * not for external reference
- */
-
-int
-client_enosys_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- STACK_DESTROY (frame->root);
- return 0;
-}
-
-
-void
-client_protocol_reconnect (void *trans_ptr)
-{
- transport_t *trans = NULL;
- client_connection_t *conn = NULL;
- struct timeval tv = {0, 0};
- int32_t ret = 0;
-
- trans = trans_ptr;
- conn = trans->xl_private;
- pthread_mutex_lock (&conn->lock);
- {
- if (conn->reconnect)
- gf_timer_call_cancel (trans->xl->ctx,
- conn->reconnect);
- conn->reconnect = 0;
-
- if (conn->connected == 0) {
- tv.tv_sec = 10;
-
- gf_log (trans->xl->name, GF_LOG_TRACE,
- "attempting reconnect");
- ret = transport_connect (trans);
-
- conn->reconnect =
- gf_timer_call_after (trans->xl->ctx, tv,
- client_protocol_reconnect,
- trans);
- } else {
- gf_log (trans->xl->name, GF_LOG_TRACE,
- "breaking reconnect chain");
- }
- }
- pthread_mutex_unlock (&conn->lock);
-
- if (ret == -1 && errno != EINPROGRESS) {
- default_notify (trans->xl, GF_EVENT_CHILD_DOWN, NULL);
- }
-}
-
-int
-protocol_client_mark_fd_bad (xlator_t *this)
-{
- client_conf_t *conf = NULL;
- client_fd_ctx_t *tmp = NULL;
- client_fd_ctx_t *fdctx = NULL;
-
- conf = this->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- list_for_each_entry_safe (fdctx, tmp, &conf->saved_fds,
- sfd_pos) {
- fdctx->remote_fd = -1;
- }
- }
- pthread_mutex_unlock (&conf->mutex);
-
- return 0;
-}
-
-/*
- * client_protocol_cleanup - cleanup function
- * @trans: transport object
- *
- */
-
-int
-protocol_client_cleanup (transport_t *trans)
-{
- client_connection_t *conn = NULL;
- struct saved_frames *saved_frames = NULL;
-
- conn = trans->xl_private;
-
- gf_log (trans->xl->name, GF_LOG_TRACE,
- "cleaning up state in transport object %p", trans);
-
- pthread_mutex_lock (&conn->lock);
- {
- saved_frames = conn->saved_frames;
- conn->saved_frames = saved_frames_new ();
-
- /* bailout logic cleanup */
- if (conn->timer) {
- gf_timer_call_cancel (trans->xl->ctx, conn->timer);
- conn->timer = NULL;
- }
-
- if (conn->reconnect == NULL) {
- /* :O This part is empty.. any thing missing? */
- }
- }
- pthread_mutex_unlock (&conn->lock);
-
- saved_frames_destroy (trans->xl, saved_frames,
- gf_fops, gf_mops, gf_cbks);
-
- return 0;
-}
-
-
-/* cbk callbacks */
-int
-client_releasedir_cbk (call_frame_t *frame, gf_hdr_common_t *hdr,
- size_t hdrlen, struct iobuf *iobuf)
-{
- STACK_DESTROY (frame->root);
- return 0;
-}
-
-
-int
-client_release_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- STACK_DESTROY (frame->root);
- return 0;
-}
-
-
-int
-client_forget_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_log ("", GF_LOG_CRITICAL, "fop not implemented");
- return 0;
-}
-
-
-int
-client_log_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_log ("", GF_LOG_CRITICAL, "fop not implemented");
- return 0;
-}
-
-
-static gf_op_t gf_fops[] = {
- [GF_PROTO_FOP_STAT] = client_stat_cbk,
- [GF_PROTO_FOP_READLINK] = client_readlink_cbk,
- [GF_PROTO_FOP_MKNOD] = client_mknod_cbk,
- [GF_PROTO_FOP_MKDIR] = client_mkdir_cbk,
- [GF_PROTO_FOP_UNLINK] = client_unlink_cbk,
- [GF_PROTO_FOP_RMDIR] = client_rmdir_cbk,
- [GF_PROTO_FOP_SYMLINK] = client_symlink_cbk,
- [GF_PROTO_FOP_RENAME] = client_rename_cbk,
- [GF_PROTO_FOP_LINK] = client_link_cbk,
- [GF_PROTO_FOP_TRUNCATE] = client_truncate_cbk,
- [GF_PROTO_FOP_OPEN] = client_open_cbk,
- [GF_PROTO_FOP_READ] = client_readv_cbk,
- [GF_PROTO_FOP_WRITE] = client_write_cbk,
- [GF_PROTO_FOP_STATFS] = client_statfs_cbk,
- [GF_PROTO_FOP_FLUSH] = client_flush_cbk,
- [GF_PROTO_FOP_FSYNC] = client_fsync_cbk,
- [GF_PROTO_FOP_SETXATTR] = client_setxattr_cbk,
- [GF_PROTO_FOP_GETXATTR] = client_getxattr_cbk,
- [GF_PROTO_FOP_REMOVEXATTR] = client_removexattr_cbk,
- [GF_PROTO_FOP_OPENDIR] = client_opendir_cbk,
- [GF_PROTO_FOP_FSYNCDIR] = client_fsyncdir_cbk,
- [GF_PROTO_FOP_ACCESS] = client_access_cbk,
- [GF_PROTO_FOP_CREATE] = client_create_cbk,
- [GF_PROTO_FOP_FTRUNCATE] = client_ftruncate_cbk,
- [GF_PROTO_FOP_FSTAT] = client_fstat_cbk,
- [GF_PROTO_FOP_LK] = client_lk_common_cbk,
- [GF_PROTO_FOP_LOOKUP] = client_lookup_cbk,
- [GF_PROTO_FOP_READDIR] = client_readdir_cbk,
- [GF_PROTO_FOP_READDIRP] = client_readdirp_cbk,
- [GF_PROTO_FOP_INODELK] = client_inodelk_cbk,
- [GF_PROTO_FOP_FINODELK] = client_finodelk_cbk,
- [GF_PROTO_FOP_ENTRYLK] = client_entrylk_cbk,
- [GF_PROTO_FOP_FENTRYLK] = client_fentrylk_cbk,
- [GF_PROTO_FOP_CHECKSUM] = client_checksum_cbk,
- [GF_PROTO_FOP_RCHECKSUM] = client_rchecksum_cbk,
- [GF_PROTO_FOP_XATTROP] = client_xattrop_cbk,
- [GF_PROTO_FOP_FXATTROP] = client_fxattrop_cbk,
- [GF_PROTO_FOP_SETATTR] = client_setattr_cbk,
- [GF_PROTO_FOP_FSETATTR] = client_fsetattr_cbk
-};
-
-static gf_op_t gf_mops[] = {
- [GF_MOP_SETVOLUME] = client_setvolume_cbk,
- [GF_MOP_GETVOLUME] = client_enosys_cbk,
- [GF_MOP_SETSPEC] = client_setspec_cbk,
- [GF_MOP_GETSPEC] = client_getspec_cbk,
- [GF_MOP_PING] = client_ping_cbk,
- [GF_MOP_LOG] = client_log_cbk
-};
-
-static gf_op_t gf_cbks[] = {
- [GF_CBK_FORGET] = client_forget_cbk,
- [GF_CBK_RELEASE] = client_release_cbk,
- [GF_CBK_RELEASEDIR] = client_releasedir_cbk
-};
-
-/*
- * client_protocol_interpret - protocol interpreter
- * @trans: transport object
- * @blk: data block
- *
- */
-int
-protocol_client_interpret (xlator_t *this, transport_t *trans,
- char *hdr_p, size_t hdrlen, struct iobuf *iobuf)
-{
- int ret = -1;
- call_frame_t *frame = NULL;
- gf_hdr_common_t *hdr = NULL;
- uint64_t callid = 0;
- int type = -1;
- int op = -1;
- client_connection_t *conn = NULL;
-
- conn = trans->xl_private;
-
- hdr = (gf_hdr_common_t *)hdr_p;
-
- type = ntoh32 (hdr->type);
- op = ntoh32 (hdr->op);
- callid = ntoh64 (hdr->callid);
-
- frame = lookup_frame (trans, op, type, callid);
- if (frame == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "no frame for callid=%"PRId64" type=%d op=%d",
- callid, type, op);
- return 0;
- }
-
- switch (type) {
- case GF_OP_TYPE_FOP_REPLY:
- if ((op > GF_PROTO_FOP_MAXVALUE) ||
- (op < 0)) {
- gf_log (trans->xl->name, GF_LOG_WARNING,
- "invalid fop '%d'", op);
- } else {
- ret = gf_fops[op] (frame, hdr, hdrlen, iobuf);
- }
- break;
- case GF_OP_TYPE_MOP_REPLY:
- if ((op > GF_MOP_MAXVALUE) ||
- (op < 0)) {
- gf_log (trans->xl->name, GF_LOG_WARNING,
- "invalid fop '%d'", op);
- } else {
- ret = gf_mops[op] (frame, hdr, hdrlen, iobuf);
- }
- break;
- case GF_OP_TYPE_CBK_REPLY:
- if ((op > GF_CBK_MAXVALUE) ||
- (op < 0)) {
- gf_log (trans->xl->name, GF_LOG_WARNING,
- "invalid cbk '%d'", op);
- } else {
- ret = gf_cbks[op] (frame, hdr, hdrlen, iobuf);
- }
- break;
- default:
- gf_log (trans->xl->name, GF_LOG_DEBUG,
- "invalid packet type: %d", type);
- break;
- }
-
- return ret;
-}
-
-int32_t
-mem_acct_init (xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init (this, gf_client_mt_end + 1);
-
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
- }
-
- return ret;
-}
-
-
-/*
- * init - initiliazation function. called during loading of client protocol
- * @this:
- *
- */
-
-int
-init (xlator_t *this)
-{
- transport_t *trans = NULL;
- client_conf_t *conf = NULL;
- client_connection_t *conn = NULL;
- int32_t frame_timeout = 0;
- int32_t ping_timeout = 0;
- data_t *remote_subvolume = NULL;
- int32_t ret = -1;
- int i = 0;
-
- if (this->children) {
- gf_log (this->name, GF_LOG_ERROR,
- "FATAL: client protocol translator cannot have any "
- "subvolumes");
- goto out;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "Volume is dangling. ");
- }
-
- remote_subvolume = dict_get (this->options, "remote-subvolume");
- if (remote_subvolume == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "Option 'remote-subvolume' is not specified.");
- goto out;
- }
-
- ret = dict_get_int32 (this->options, "frame-timeout",
- &frame_timeout);
- if (ret >= 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "setting frame-timeout to %d", frame_timeout);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "defaulting frame-timeout to 30mins");
- frame_timeout = 1800;
- }
-
- ret = dict_get_int32 (this->options, "ping-timeout",
- &ping_timeout);
- if (ret >= 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "setting ping-timeout to %d", ping_timeout);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "defaulting ping-timeout to 42");
- ping_timeout = GF_UNIVERSAL_ANSWER;
- }
-
- conf = GF_CALLOC (1, sizeof (client_conf_t),
- gf_client_mt_client_conf_t);
-
- protocol_common_init ();
-
- pthread_mutex_init (&conf->mutex, NULL);
- INIT_LIST_HEAD (&conf->saved_fds);
-
- this->private = conf;
-
- for (i = 0; i < CHANNEL_MAX; i++) {
- if (CHANNEL_LOWLAT == i) {
- dict_set (this->options, "transport.socket.lowlat",
- data_from_dynstr (gf_strdup ("true")));
- }
- trans = transport_load (this->options, this);
- if (trans == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Failed to load transport");
- ret = -1;
- goto out;
- }
-
- conn = GF_CALLOC (1, sizeof (*conn),
- gf_client_mt_client_connection_t);
-
- conn->saved_frames = saved_frames_new ();
-
- conn->callid = 1;
-
- conn->frame_timeout = frame_timeout;
- conn->ping_timeout = ping_timeout;
-
- pthread_mutex_init (&conn->lock, NULL);
-
- trans->xl_private = conn;
- conf->transport[i] = transport_ref (trans);
- }
-
-#ifndef GF_DARWIN_HOST_OS
- {
- struct rlimit lim;
-
- lim.rlim_cur = 1048576;
- lim.rlim_max = 1048576;
-
- ret = setrlimit (RLIMIT_NOFILE, &lim);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "WARNING: Failed to set 'ulimit -n 1M': %s",
- strerror(errno));
- lim.rlim_cur = 65536;
- lim.rlim_max = 65536;
-
- ret = setrlimit (RLIMIT_NOFILE, &lim);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Failed to set max open fd to 64k: %s",
- strerror(errno));
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "max open fd set to 64k");
- }
-
- }
- }
-#endif
- ret = 0;
-out:
- return ret;
-}
-
-/*
- * fini - finish function called during unloading of client protocol
- * @this:
- *
- */
-void
-fini (xlator_t *this)
-{
- /* TODO: Check if its enough.. how to call transport's fini () */
- client_conf_t *conf = NULL;
-
- conf = this->private;
- this->private = NULL;
-
- if (conf) {
- GF_FREE (conf);
- }
- return;
-}
-
-
-int
-protocol_client_handshake (xlator_t *this, transport_t *trans)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_mop_setvolume_req_t *req = NULL;
- dict_t *options = NULL;
- int32_t ret = -1;
- int hdrlen = 0;
- int dict_len = 0;
- call_frame_t *fr = NULL;
- char *process_uuid_xl;
-
- options = this->options;
- ret = dict_set_str (options, "protocol-version", GF_PROTOCOL_VERSION);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set protocol version(%s) in handshake msg",
- GF_PROTOCOL_VERSION);
- }
-
- ret = gf_asprintf (&process_uuid_xl, "%s-%s", this->ctx->process_uuid,
- this->name);
- if (-1 == ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "asprintf failed while setting process_uuid");
- goto fail;
- }
- ret = dict_set_dynstr (options, "process-uuid",
- process_uuid_xl);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set process-uuid(%s) in handshake msg",
- process_uuid_xl);
- }
-
- if (this->ctx->cmd_args.volfile_server) {
- if (this->ctx->cmd_args.volfile_id)
- ret = dict_set_str (options, "volfile-key",
- this->ctx->cmd_args.volfile_id);
- ret = dict_set_uint32 (options, "volfile-checksum",
- this->graph->volfile_checksum);
- }
-
- dict_len = dict_serialized_length (options);
- if (dict_len < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get serialized length of dict(%p)",
- options);
- ret = dict_len;
- goto fail;
- }
-
- hdrlen = gf_hdr_len (req, dict_len);
- hdr = gf_hdr_new (req, dict_len);
- GF_VALIDATE_OR_GOTO (this->name, hdr, fail);
-
- req = gf_param (hdr);
-
- ret = dict_serialize (options, req->buf);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to serialize dictionary(%p)",
- options);
- goto fail;
- }
-
- req->dict_len = hton32 (dict_len);
- fr = create_frame (this, this->ctx->pool);
- GF_VALIDATE_OR_GOTO (this->name, fr, fail);
-
- fr->local = trans;
- ret = protocol_client_xfer (fr, this, trans,
- GF_OP_TYPE_MOP_REQUEST, GF_MOP_SETVOLUME,
- hdr, hdrlen, NULL, 0, NULL);
- return ret;
-fail:
- if (hdr)
- GF_FREE (hdr);
- return ret;
-}
-
-
-int
-protocol_client_pollout (xlator_t *this, transport_t *trans)
-{
- client_conf_t *conf = NULL;
-
- conf = trans->xl->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- gettimeofday (&conf->last_sent, NULL);
- }
- pthread_mutex_unlock (&conf->mutex);
-
- return 0;
-}
-
-
-int
-protocol_client_pollin (xlator_t *this, transport_t *trans)
-{
- client_conf_t *conf = NULL;
- int ret = -1;
- struct iobuf *iobuf = NULL;
- char *hdr = NULL;
- size_t hdrlen = 0;
-
- conf = trans->xl->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- gettimeofday (&conf->last_received, NULL);
- }
- pthread_mutex_unlock (&conf->mutex);
-
- ret = transport_receive (trans, &hdr, &hdrlen, &iobuf);
-
- if (ret == 0)
- {
- ret = protocol_client_interpret (this, trans, hdr, hdrlen,
- iobuf);
- }
-
- /* TODO: use mem-pool */
- GF_FREE (hdr);
-
- return ret;
-}
-
-int
-client_priv_dump (xlator_t *this)
-{
- client_conf_t *conf = NULL;
- int ret = -1;
- client_fd_ctx_t *tmp = NULL;
- int i = 0;
- char key[GF_DUMP_MAX_BUF_LEN];
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
-
- if (!this)
- return -1;
-
- conf = this->private;
- if (!conf) {
- gf_log (this->name, GF_LOG_WARNING,
- "conf null in xlator");
- return -1;
- }
-
- ret = pthread_mutex_trylock(&conf->mutex);
- if (ret) {
- gf_log("", GF_LOG_WARNING, "Unable to lock client %s"
- " errno: %d", this->name, errno);
- return -1;
- }
-
- gf_proc_dump_build_key(key_prefix, "xlator.protocol.client",
- "%s.priv", this->name);
-
- gf_proc_dump_add_section(key_prefix);
-
- list_for_each_entry(tmp, &conf->saved_fds, sfd_pos) {
- gf_proc_dump_build_key(key, key_prefix,
- "fd.%d.remote_fd", ++i);
- gf_proc_dump_write(key, "%d", tmp->remote_fd);
- }
-
- gf_proc_dump_build_key(key, key_prefix, "connecting");
- gf_proc_dump_write(key, "%d", conf->connecting);
- gf_proc_dump_build_key(key, key_prefix, "last_sent");
- gf_proc_dump_write(key, "%s", ctime(&conf->last_sent.tv_sec));
- gf_proc_dump_build_key(key, key_prefix, "last_received");
- gf_proc_dump_write(key, "%s", ctime(&conf->last_received.tv_sec));
-
- pthread_mutex_unlock(&conf->mutex);
-
- return 0;
-
-}
-
-int32_t
-client_inodectx_dump (xlator_t *this, inode_t *inode)
-{
- ino_t par = 0;
- int ret = -1;
- char key[GF_DUMP_MAX_BUF_LEN];
-
- if (!inode)
- return -1;
-
- if (!this)
- return -1;
-
- ret = inode_ctx_get (inode, this, &par);
-
- if (ret != 0)
- return ret;
-
- gf_proc_dump_build_key(key, "xlator.protocol.client",
- "%s.inode.%ld.par",
- this->name,inode->ino);
- gf_proc_dump_write(key, "%ld", par);
-
- return 0;
-}
-
-/*
- * client_protocol_notify - notify function for client protocol
- * @this:
- * @trans: transport object
- * @event
- *
- */
-
-int
-notify (xlator_t *this, int32_t event, void *data, ...)
-{
- int i = 0;
- int ret = 0;
- int child_down = 1;
- int was_not_down = 0;
- transport_t *trans = NULL;
- client_connection_t *conn = NULL;
- client_conf_t *conf = NULL;
- xlator_list_t *parent = NULL;
-
- conf = this->private;
- trans = data;
-
- switch (event) {
- case GF_EVENT_POLLOUT:
- {
- ret = protocol_client_pollout (this, trans);
-
- break;
- }
- case GF_EVENT_POLLIN:
- {
- ret = protocol_client_pollin (this, trans);
-
- break;
- }
- /* no break for ret check to happen below */
- case GF_EVENT_POLLERR:
- {
- ret = -1;
- protocol_client_cleanup (trans);
-
- if (conf->connecting == 0) {
- /* Let the connection/re-connection happen in
- * background, for now, don't hang here,
- * tell the parents that i am all ok..
- */
- parent = trans->xl->parents;
- while (parent) {
- parent->xlator->notify (parent->xlator,
- GF_EVENT_CHILD_CONNECTING,
- trans->xl);
- parent = parent->next;
- }
- conf->connecting = 1;
- }
-
- was_not_down = 0;
- for (i = 0; i < CHANNEL_MAX; i++) {
- conn = conf->transport[i]->xl_private;
- if (conn->connected == 1)
- was_not_down = 1;
- }
-
- conn = trans->xl_private;
- if (conn->connected) {
- conn->connected = 0;
- if (conn->reconnect == 0)
- client_protocol_reconnect (trans);
- }
-
- child_down = 1;
- for (i = 0; i < CHANNEL_MAX; i++) {
- trans = conf->transport[i];
- conn = trans->xl_private;
- if (conn->connected == 1)
- child_down = 0;
- }
-
- if (child_down && was_not_down) {
- gf_log (this->name, GF_LOG_INFO, "disconnected");
-
- protocol_client_mark_fd_bad (this);
-
- parent = this->parents;
- while (parent) {
- xlator_notify (parent->xlator,
- GF_EVENT_CHILD_DOWN, this);
- parent = parent->next;
- }
- }
- }
- break;
-
- case GF_EVENT_PARENT_UP:
- {
- client_conf_t *conf = NULL;
- int i = 0;
- transport_t *trans = NULL;
-
- conf = this->private;
- for (i = 0; i < CHANNEL_MAX; i++) {
- trans = conf->transport[i];
- if (!trans) {
- gf_log (this->name, GF_LOG_DEBUG,
- "transport init failed");
- return -1;
- }
-
- conn = trans->xl_private;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "got GF_EVENT_PARENT_UP, attempting connect "
- "on transport");
-
- client_protocol_reconnect (trans);
- }
- }
- break;
-
- case GF_EVENT_CHILD_UP:
- {
- char *handshake = NULL;
-
- ret = dict_get_str (this->options, "disable-handshake",
- &handshake);
- gf_log (this->name, GF_LOG_DEBUG,
- "got GF_EVENT_CHILD_UP");
- if ((ret < 0) ||
- (strcasecmp (handshake, "on"))) {
- ret = protocol_client_handshake (this, trans);
- } else {
- conn = trans->xl_private;
- conn->connected = 1;
- ret = default_notify (this, event, trans);
- }
-
- if (ret)
- transport_disconnect (trans);
-
- }
- break;
-
- default:
- gf_log (this->name, GF_LOG_DEBUG,
- "got %d, calling default_notify ()", event);
-
- default_notify (this, event, data);
- break;
- }
-
- return ret;
-}
-
-
-struct xlator_fops fops = {
- .stat = client_stat,
- .readlink = client_readlink,
- .mknod = client_mknod,
- .mkdir = client_mkdir,
- .unlink = client_unlink,
- .rmdir = client_rmdir,
- .symlink = client_symlink,
- .rename = client_rename,
- .link = client_link,
- .truncate = client_truncate,
- .open = client_open,
- .readv = client_readv,
- .writev = client_writev,
- .statfs = client_statfs,
- .flush = client_flush,
- .fsync = client_fsync,
- .setxattr = client_setxattr,
- .getxattr = client_getxattr,
- .fsetxattr = client_fsetxattr,
- .fgetxattr = client_fgetxattr,
- .removexattr = client_removexattr,
- .opendir = client_opendir,
- .readdir = client_readdir,
- .readdirp = client_readdirp,
- .fsyncdir = client_fsyncdir,
- .access = client_access,
- .ftruncate = client_ftruncate,
- .fstat = client_fstat,
- .create = client_create,
- .lk = client_lk,
- .inodelk = client_inodelk,
- .finodelk = client_finodelk,
- .entrylk = client_entrylk,
- .fentrylk = client_fentrylk,
- .lookup = client_lookup,
- .checksum = client_checksum,
- .rchecksum = client_rchecksum,
- .xattrop = client_xattrop,
- .fxattrop = client_fxattrop,
- .setattr = client_setattr,
- .fsetattr = client_fsetattr,
- .getspec = client_getspec,
-};
-
-struct xlator_cbks cbks = {
- .release = client_release,
- .releasedir = client_releasedir
-};
-
-
-struct xlator_dumpops dumpops = {
- .priv = client_priv_dump,
- .inodectx = client_inodectx_dump,
-};
-
-struct volume_options options[] = {
- { .key = {"username"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"password"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"transport-type"},
- .value = {"tcp", "socket", "ib-verbs", "unix", "ib-sdp",
- "tcp/client", "ib-verbs/client"},
- .type = GF_OPTION_TYPE_STR
- },
- { .key = {"remote-host"},
- .type = GF_OPTION_TYPE_INTERNET_ADDRESS
- },
- { .key = {"remote-subvolume"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"frame-timeout"},
- .type = GF_OPTION_TYPE_TIME,
- .min = 0,
- .max = 86400,
- },
- { .key = {"ping-timeout"},
- .type = GF_OPTION_TYPE_TIME,
- .min = 1,
- .max = 1013,
- },
- { .key = {NULL} },
-};
diff --git a/xlators/protocol/legacy/client/src/client-protocol.h b/xlators/protocol/legacy/client/src/client-protocol.h
deleted file mode 100644
index ae65fb5fe72..00000000000
--- a/xlators/protocol/legacy/client/src/client-protocol.h
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CLIENT_PROTOCOL_H
-#define _CLIENT_PROTOCOL_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <stdio.h>
-#include <arpa/inet.h>
-#include "inode.h"
-#include "timer.h"
-#include "byte-order.h"
-#include "saved-frames.h"
-
-#define CLIENT_PORT_CEILING 1023
-
-#define GF_CLIENT_INODE_SELF 0
-#define GF_CLIENT_INODE_PARENT 1
-
-#define CLIENT_CONF(this) ((client_conf_t *)(this->private))
-
-#define RECEIVE_TIMEOUT(_cprivate,_current) \
- ((_cprivate->last_received.tv_sec + \
- _cprivate->frame_timeout) < \
- _current.tv_sec)
-
-#define SEND_TIMEOUT(_cprivate,_current) \
- ((_cprivate->last_sent.tv_sec + \
- _cprivate->frame_timeout) < \
- _current.tv_sec)
-
-enum {
- CHANNEL_BULK = 0,
- CHANNEL_LOWLAT = 1,
- CHANNEL_MAX
-};
-
-#define CLIENT_CHANNEL client_channel
-
-struct client_connection;
-typedef struct client_connection client_connection_t;
-
-#include "stack.h"
-#include "xlator.h"
-#include "transport.h"
-#include "protocol.h"
-
-typedef struct _client_fd_ctx {
- struct list_head sfd_pos; /* Stores the reference to this
- fd's position in the saved_fds list.
- */
- int64_t remote_fd;
- inode_t *inode;
- uint64_t ino;
- uint64_t gen;
- char is_dir;
- char released;
- int32_t flags;
- int32_t wbflags;
-} client_fd_ctx_t;
-
-struct _client_conf {
- transport_t *transport[CHANNEL_MAX];
- struct list_head saved_fds;
- struct timeval last_sent;
- struct timeval last_received;
- pthread_mutex_t mutex;
- int connecting;
-};
-typedef struct _client_conf client_conf_t;
-
-/* This will be stored in transport_t->xl_private */
-struct client_connection {
- pthread_mutex_t lock;
- uint64_t callid;
- struct saved_frames *saved_frames;
- int32_t frame_timeout;
- int32_t ping_started;
- int32_t ping_timeout;
- int32_t transport_activity;
- gf_timer_t *reconnect;
- char connected;
- uint64_t max_block_size;
- gf_timer_t *timer;
- gf_timer_t *ping_timer;
-};
-
-typedef struct {
- loc_t loc;
- loc_t loc2;
- fd_t *fd;
- gf_op_t op;
- client_fd_ctx_t *fdctx;
- uint32_t flags;
- uint32_t wbflags;
-} client_local_t;
-
-
-static inline void
-gf_string_to_stat(char *string, struct iatt *stbuf)
-{
- uint64_t dev = 0;
- uint64_t ino = 0;
- uint32_t mode = 0;
- uint32_t nlink = 0;
- uint32_t uid = 0;
- uint32_t gid = 0;
- uint64_t rdev = 0;
- uint64_t size = 0;
- uint32_t blksize = 0;
- uint64_t blocks = 0;
- uint32_t atime = 0;
- uint32_t atime_nsec = 0;
- uint32_t mtime = 0;
- uint32_t mtime_nsec = 0;
- uint32_t ctime = 0;
- uint32_t ctime_nsec = 0;
-
- sscanf (string, GF_STAT_PRINT_FMT_STR,
- &dev,
- &ino,
- &mode,
- &nlink,
- &uid,
- &gid,
- &rdev,
- &size,
- &blksize,
- &blocks,
- &atime,
- &atime_nsec,
- &mtime,
- &mtime_nsec,
- &ctime,
- &ctime_nsec);
-
- stbuf->ia_gen = dev;
- stbuf->ia_ino = ino;
- stbuf->ia_prot = ia_prot_from_st_mode (mode);
- stbuf->ia_type = ia_type_from_st_mode (mode);
- stbuf->ia_nlink = nlink;
- stbuf->ia_uid = uid;
- stbuf->ia_gid = gid;
- stbuf->ia_rdev = rdev;
- stbuf->ia_size = size;
- stbuf->ia_blksize = blksize;
- stbuf->ia_blocks = blocks;
-
- stbuf->ia_atime = atime;
- stbuf->ia_mtime = mtime;
- stbuf->ia_ctime = ctime;
-
- stbuf->ia_atime_nsec = atime_nsec;
- stbuf->ia_mtime_nsec = mtime_nsec;
- stbuf->ia_ctime_nsec = ctime_nsec;
-}
-
-#endif
diff --git a/xlators/protocol/legacy/client/src/saved-frames.c b/xlators/protocol/legacy/client/src/saved-frames.c
deleted file mode 100644
index 770de19ad72..00000000000
--- a/xlators/protocol/legacy/client/src/saved-frames.c
+++ /dev/null
@@ -1,194 +0,0 @@
-/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#include "saved-frames.h"
-#include "common-utils.h"
-#include "protocol.h"
-#include "xlator.h"
-#include "client-mem-types.h"
-
-
-
-struct saved_frames *
-saved_frames_new (void)
-{
- struct saved_frames *saved_frames = NULL;
-
- saved_frames = GF_CALLOC (sizeof (*saved_frames), 1,
- gf_client_mt_saved_frames);
- if (!saved_frames) {
- return NULL;
- }
-
- INIT_LIST_HEAD (&saved_frames->fops.list);
- INIT_LIST_HEAD (&saved_frames->mops.list);
- INIT_LIST_HEAD (&saved_frames->cbks.list);
-
- return saved_frames;
-}
-
-
-struct saved_frame *
-get_head_frame_for_type (struct saved_frames *frames, int8_t type)
-{
- struct saved_frame *head_frame = NULL;
-
- switch (type) {
- case GF_OP_TYPE_FOP_REQUEST:
- case GF_OP_TYPE_FOP_REPLY:
- head_frame = &frames->fops;
- break;
- case GF_OP_TYPE_MOP_REQUEST:
- case GF_OP_TYPE_MOP_REPLY:
- head_frame = &frames->mops;
- break;
- case GF_OP_TYPE_CBK_REQUEST:
- case GF_OP_TYPE_CBK_REPLY:
- head_frame = &frames->cbks;
- break;
- }
-
- return head_frame;
-}
-
-
-int
-saved_frames_put (struct saved_frames *frames, call_frame_t *frame,
- int32_t op, int8_t type, int64_t callid)
-{
- struct saved_frame *saved_frame = NULL;
- struct saved_frame *head_frame = NULL;
-
- head_frame = get_head_frame_for_type (frames, type);
-
- saved_frame = GF_CALLOC (sizeof (*saved_frame), 1,
- gf_client_mt_saved_frame);
- if (!saved_frame) {
- return -ENOMEM;
- }
-
- INIT_LIST_HEAD (&saved_frame->list);
- saved_frame->frame = frame;
- saved_frame->op = op;
- saved_frame->type = type;
- saved_frame->callid = callid;
-
- gettimeofday (&saved_frame->saved_at, NULL);
-
- list_add_tail (&saved_frame->list, &head_frame->list);
- frames->count++;
-
- return 0;
-}
-
-
-call_frame_t *
-saved_frames_get (struct saved_frames *frames, int32_t op,
- int8_t type, int64_t callid)
-{
- struct saved_frame *saved_frame = NULL;
- struct saved_frame *tmp = NULL;
- struct saved_frame *head_frame = NULL;
- call_frame_t *frame = NULL;
-
- head_frame = get_head_frame_for_type (frames, type);
-
- list_for_each_entry (tmp, &head_frame->list, list) {
- if (tmp->callid == callid) {
- list_del_init (&tmp->list);
- frames->count--;
- saved_frame = tmp;
- break;
- }
- }
-
- if (saved_frame)
- frame = saved_frame->frame;
-
- GF_FREE (saved_frame);
-
- return frame;
-}
-
-struct saved_frame *
-saved_frames_get_timedout (struct saved_frames *frames, int8_t type,
- uint32_t timeout, struct timeval *current)
-{
- struct saved_frame *bailout_frame = NULL, *tmp = NULL;
- struct saved_frame *head_frame = NULL;
-
- head_frame = get_head_frame_for_type (frames, type);
-
- if (!list_empty(&head_frame->list)) {
- tmp = list_entry (head_frame->list.next, typeof (*tmp), list);
- if ((tmp->saved_at.tv_sec + timeout) < current->tv_sec) {
- bailout_frame = tmp;
- list_del_init (&bailout_frame->list);
- frames->count--;
- }
- }
-
- return bailout_frame;
-}
-
-void
-saved_frames_unwind (xlator_t *this, struct saved_frames *saved_frames,
- struct saved_frame *head,
- gf_op_t gf_ops[], char *gf_op_list[])
-{
- struct saved_frame *trav = NULL;
- struct saved_frame *tmp = NULL;
-
- gf_hdr_common_t hdr = {0, };
- call_frame_t *frame = NULL;
-
- hdr.rsp.op_ret = hton32 (-1);
- hdr.rsp.op_errno = hton32 (ENOTCONN);
-
- list_for_each_entry_safe (trav, tmp, &head->list, list) {
- gf_log (this->name, GF_LOG_ERROR,
- "forced unwinding frame type(%d) op(%s)",
- trav->type, gf_op_list[trav->op]);
-
- hdr.type = hton32 (trav->type);
- hdr.op = hton32 (trav->op);
-
- frame = trav->frame;
-
- saved_frames->count--;
-
- gf_ops[trav->op] (frame, &hdr, sizeof (hdr), NULL);
-
- list_del_init (&trav->list);
- GF_FREE (trav);
- }
-}
-
-
-void
-saved_frames_destroy (xlator_t *this, struct saved_frames *frames,
- gf_op_t gf_fops[], gf_op_t gf_mops[], gf_op_t gf_cbks[])
-{
- saved_frames_unwind (this, frames, &frames->fops, gf_fops, gf_fop_list);
- saved_frames_unwind (this, frames, &frames->mops, gf_mops, gf_mop_list);
- saved_frames_unwind (this, frames, &frames->cbks, gf_cbks, gf_cbk_list);
-
- GF_FREE (frames);
-}
diff --git a/xlators/protocol/legacy/client/src/saved-frames.h b/xlators/protocol/legacy/client/src/saved-frames.h
deleted file mode 100644
index 5c18abbcc9e..00000000000
--- a/xlators/protocol/legacy/client/src/saved-frames.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _SAVED_FRAMES_H
-#define _SAVED_FRAMES_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <stdint.h>
-#include <sys/time.h>
-#include "stack.h"
-#include "list.h"
-#include "protocol.h"
-
-/* UGLY: have common typedef b/w saved-frames.c and protocol-client.c */
-typedef int32_t (*gf_op_t) (call_frame_t *frame,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf);
-
-
-struct saved_frame {
- union {
- struct list_head list;
- struct {
- struct saved_frame *frame_next;
- struct saved_frame *frame_prev;
- };
- };
-
- struct timeval saved_at;
- call_frame_t *frame;
- int32_t op;
- int8_t type;
- uint64_t callid;
-};
-
-
-struct saved_frames {
- int64_t count;
- struct saved_frame fops;
- struct saved_frame mops;
- struct saved_frame cbks;
-};
-
-
-struct saved_frames *saved_frames_new ();
-int saved_frames_put (struct saved_frames *frames, call_frame_t *frame,
- int32_t op, int8_t type, int64_t callid);
-call_frame_t *saved_frames_get (struct saved_frames *frames, int32_t op,
- int8_t type, int64_t callid);
-
-struct saved_frame *
-saved_frames_get_timedout (struct saved_frames *frames, int8_t type,
- uint32_t timeout, struct timeval *current);
-
-void saved_frames_destroy (xlator_t *this, struct saved_frames *frames,
- gf_op_t gf_fops[], gf_op_t gf_mops[],
- gf_op_t gf_cbks[]);
-
-#endif /* _SAVED_FRAMES_H */
diff --git a/xlators/protocol/legacy/lib/src/Makefile.am b/xlators/protocol/legacy/lib/src/Makefile.am
deleted file mode 100644
index 1f0e93e3047..00000000000
--- a/xlators/protocol/legacy/lib/src/Makefile.am
+++ /dev/null
@@ -1,14 +0,0 @@
-lib_LTLIBRARIES = libgfproto.la
-
-libgfproto_la_CFLAGS = -fPIC -Wall -g -shared -nostartfiles $(GF_CFLAGS) $(GF_DARWIN_LIBGLUSTERFS_CFLAGS)
-
-libgfproto_la_CPPFLAGS = -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 -D_GNU_SOURCE \
- -D$(GF_HOST_OS) -DLIBDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/auth\" \
- -DTRANSPORTDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/transport\" \
- -I$(CONTRIBDIR)/rbtree -I$(top_srcdir)/libglusterfs/src/
-
-libgfproto_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-libgfproto_la_SOURCES = transport.c protocol.c
-
-noinst_HEADERS = transport.h protocol.h
diff --git a/xlators/protocol/legacy/lib/src/protocol.c b/xlators/protocol/legacy/lib/src/protocol.c
deleted file mode 100644
index 63950f43dec..00000000000
--- a/xlators/protocol/legacy/lib/src/protocol.c
+++ /dev/null
@@ -1,108 +0,0 @@
-
-#include "globals.h"
-#include "compat.h"
-#include "protocol.h"
-
-char *gf_mop_list[GF_MOP_MAXVALUE];
-char *gf_cbk_list[GF_CBK_MAXVALUE];
-
-static int
-gf_dirent_nb_size (gf_dirent_t *entries)
-{
- return (sizeof (struct gf_dirent_nb) + strlen (entries->d_name) + 1);
-}
-
-int
-gf_dirent_serialize (gf_dirent_t *entries, char *buf, size_t buf_size)
-{
- struct gf_dirent_nb *entry_nb = NULL;
- gf_dirent_t *entry = NULL;
- int size = 0;
- int entry_size = 0;
-
-
- list_for_each_entry (entry, &entries->list, list) {
- entry_size = gf_dirent_nb_size (entry);
-
- if (buf && (size + entry_size <= buf_size)) {
- entry_nb = (void *) (buf + size);
-
- entry_nb->d_ino = hton64 (entry->d_ino);
- entry_nb->d_off = hton64 (entry->d_off);
- entry_nb->d_len = hton32 (entry->d_len);
- entry_nb->d_type = hton32 (entry->d_type);
-
- gf_stat_from_iatt (&entry_nb->d_stat, &entry->d_stat);
-
- strcpy (entry_nb->d_name, entry->d_name);
- }
- size += entry_size;
- }
-
- return size;
-}
-
-
-int
-gf_dirent_unserialize (gf_dirent_t *entries, const char *buf, size_t buf_size)
-{
- struct gf_dirent_nb *entry_nb = NULL;
- int remaining_size = 0;
- int least_dirent_size = 0;
- int count = 0;
- gf_dirent_t *entry = NULL;
- int entry_strlen = 0;
- int entry_len = 0;
-
-
- remaining_size = buf_size;
- least_dirent_size = (sizeof (struct gf_dirent_nb) + 2);
-
- while (remaining_size >= least_dirent_size) {
- entry_nb = (void *)(buf + (buf_size - remaining_size));
-
- entry_strlen = strnlen (entry_nb->d_name, remaining_size);
- if (entry_strlen == remaining_size) {
- break;
- }
-
- entry_len = sizeof (gf_dirent_t) + entry_strlen + 1;
- entry = GF_CALLOC (1, entry_len, gf_common_mt_gf_dirent_t);
- if (!entry) {
- break;
- }
-
- entry->d_ino = ntoh64 (entry_nb->d_ino);
- entry->d_off = ntoh64 (entry_nb->d_off);
- entry->d_len = ntoh32 (entry_nb->d_len);
- entry->d_type = ntoh32 (entry_nb->d_type);
-
- gf_stat_to_iatt (&entry_nb->d_stat, &entry->d_stat);
-
- strcpy (entry->d_name, entry_nb->d_name);
-
- list_add_tail (&entry->list, &entries->list);
-
- remaining_size -= (sizeof (*entry_nb) + entry_strlen + 1);
- count++;
- }
-
- return count;
-}
-
-int
-protocol_common_init (void)
-{
- gf_mop_list[GF_MOP_SETVOLUME] = "SETVOLUME";
- gf_mop_list[GF_MOP_GETVOLUME] = "GETVOLUME";
- gf_mop_list[GF_MOP_SETSPEC] = "SETSPEC";
- gf_mop_list[GF_MOP_GETSPEC] = "GETSPEC";
- gf_mop_list[GF_MOP_LOG] = "LOG";
- gf_mop_list[GF_MOP_PING] = "PING";
-
- gf_cbk_list[GF_CBK_FORGET] = "FORGET";
- gf_cbk_list[GF_CBK_RELEASE] = "RELEASE";
- gf_cbk_list[GF_CBK_RELEASEDIR] = "RELEASEDIR";
-
- return 0;
-}
diff --git a/xlators/protocol/legacy/lib/src/protocol.h b/xlators/protocol/legacy/lib/src/protocol.h
deleted file mode 100644
index 254e36e661b..00000000000
--- a/xlators/protocol/legacy/lib/src/protocol.h
+++ /dev/null
@@ -1,1119 +0,0 @@
-/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _PROTOCOL_H
-#define _PROTOCOL_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <inttypes.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/statvfs.h>
-#include <unistd.h>
-#include <fcntl.h>
-
-#include "byte-order.h"
-#include "iatt.h"
-
-/* Any changes in the protocol structure or adding new '[f,m]ops' needs to
- * bump the protocol version by "0.1"
- */
-
-#define GF_PROTOCOL_VERSION "3.0"
-
-extern char *gf_mop_list[];
-extern char *gf_cbk_list[];
-
-/* NOTE: add members ONLY at the end (just before _MAXVALUE) */
-typedef enum {
- GF_PROTO_FOP_STAT, /* 0 */
- GF_PROTO_FOP_READLINK, /* 1 */
- GF_PROTO_FOP_MKNOD, /* 2 */
- GF_PROTO_FOP_MKDIR,
- GF_PROTO_FOP_UNLINK,
- GF_PROTO_FOP_RMDIR, /* 5 */
- GF_PROTO_FOP_SYMLINK,
- GF_PROTO_FOP_RENAME,
- GF_PROTO_FOP_LINK,
- GF_PROTO_FOP_TRUNCATE,
- GF_PROTO_FOP_OPEN, /* 10 */
- GF_PROTO_FOP_READ,
- GF_PROTO_FOP_WRITE,
- GF_PROTO_FOP_STATFS, /* 15 */
- GF_PROTO_FOP_FLUSH,
- GF_PROTO_FOP_FSYNC,
- GF_PROTO_FOP_SETXATTR,
- GF_PROTO_FOP_GETXATTR,
- GF_PROTO_FOP_REMOVEXATTR,/* 20 */
- GF_PROTO_FOP_OPENDIR,
- GF_PROTO_FOP_GETDENTS,
- GF_PROTO_FOP_FSYNCDIR,
- GF_PROTO_FOP_ACCESS,
- GF_PROTO_FOP_CREATE, /* 25 */
- GF_PROTO_FOP_FTRUNCATE,
- GF_PROTO_FOP_FSTAT,
- GF_PROTO_FOP_LK,
- GF_PROTO_FOP_LOOKUP,
- GF_PROTO_FOP_SETDENTS,
- GF_PROTO_FOP_READDIR,
- GF_PROTO_FOP_INODELK, /* 35 */
- GF_PROTO_FOP_FINODELK,
- GF_PROTO_FOP_ENTRYLK,
- GF_PROTO_FOP_FENTRYLK,
- GF_PROTO_FOP_CHECKSUM,
- GF_PROTO_FOP_XATTROP, /* 40 */
- GF_PROTO_FOP_FXATTROP,
- GF_PROTO_FOP_LOCK_NOTIFY,
- GF_PROTO_FOP_LOCK_FNOTIFY,
- GF_PROTO_FOP_FGETXATTR,
- GF_PROTO_FOP_FSETXATTR, /* 45 */
- GF_PROTO_FOP_RCHECKSUM,
- GF_PROTO_FOP_SETATTR,
- GF_PROTO_FOP_FSETATTR,
- GF_PROTO_FOP_READDIRP,
- GF_PROTO_FOP_MAXVALUE,
-} glusterfs_proto_fop_t;
-
-/* NOTE: add members ONLY at the end (just before _MAXVALUE) */
-typedef enum {
- GF_MOP_SETVOLUME, /* 0 */
- GF_MOP_GETVOLUME, /* 1 */
- GF_MOP_STATS,
- GF_MOP_SETSPEC,
- GF_MOP_GETSPEC,
- GF_MOP_PING, /* 5 */
- GF_MOP_LOG,
- GF_MOP_NOTIFY,
- GF_MOP_MAXVALUE, /* 8 */
-} glusterfs_mop_t;
-
-typedef enum {
- GF_CBK_FORGET, /* 0 */
- GF_CBK_RELEASE, /* 1 */
- GF_CBK_RELEASEDIR, /* 2 */
- GF_CBK_MAXVALUE /* 3 */
-} glusterfs_cbk_t;
-
-typedef enum {
- GF_OP_TYPE_FOP_REQUEST = 1,
- GF_OP_TYPE_MOP_REQUEST,
- GF_OP_TYPE_CBK_REQUEST,
- GF_OP_TYPE_FOP_REPLY,
- GF_OP_TYPE_MOP_REPLY,
- GF_OP_TYPE_CBK_REPLY
-} glusterfs_op_type_t;
-
-
-struct gf_stat {
- uint64_t ino;
- uint64_t size;
- uint64_t blocks;
- uint64_t dev;
- uint32_t rdev;
- uint32_t mode;
- uint32_t nlink;
- uint32_t uid;
- uint32_t gid;
- uint32_t blksize;
- uint32_t atime;
- uint32_t atime_nsec;
- uint32_t mtime ;
- uint32_t mtime_nsec;
- uint32_t ctime;
- uint32_t ctime_nsec;
-} __attribute__((packed));
-
-
-static inline void
-gf_stat_to_stat (struct gf_stat *gf_stat, struct stat *stat)
-{
- stat->st_dev = ntoh64 (gf_stat->dev);
- stat->st_ino = ntoh64 (gf_stat->ino);
- stat->st_mode = ntoh32 (gf_stat->mode);
- stat->st_nlink = ntoh32 (gf_stat->nlink);
- stat->st_uid = ntoh32 (gf_stat->uid);
- stat->st_gid = ntoh32 (gf_stat->gid);
- stat->st_rdev = ntoh32 (gf_stat->rdev);
- stat->st_size = ntoh64 (gf_stat->size);
- stat->st_blksize = ntoh32 (gf_stat->blksize);
- stat->st_blocks = ntoh64 (gf_stat->blocks);
- stat->st_atime = ntoh32 (gf_stat->atime);
- stat->st_mtime = ntoh32 (gf_stat->mtime);
- stat->st_ctime = ntoh32 (gf_stat->ctime);
- ST_ATIM_NSEC_SET(stat, ntoh32 (gf_stat->atime_nsec));
- ST_MTIM_NSEC_SET(stat, ntoh32 (gf_stat->mtime_nsec));
- ST_CTIM_NSEC_SET(stat, ntoh32 (gf_stat->ctime_nsec));
-}
-
-
-static inline void
-gf_stat_from_stat (struct gf_stat *gf_stat, struct stat *stat)
-{
- gf_stat->dev = hton64 (stat->st_dev);
- gf_stat->ino = hton64 (stat->st_ino);
- gf_stat->mode = hton32 (stat->st_mode);
- gf_stat->nlink = hton32 (stat->st_nlink);
- gf_stat->uid = hton32 (stat->st_uid);
- gf_stat->gid = hton32 (stat->st_gid);
- gf_stat->rdev = hton32 (stat->st_rdev);
- gf_stat->size = hton64 (stat->st_size);
- gf_stat->blksize = hton32 (stat->st_blksize);
- gf_stat->blocks = hton64 (stat->st_blocks);
- gf_stat->atime = hton32 (stat->st_atime);
- gf_stat->mtime = hton32 (stat->st_mtime);
- gf_stat->ctime = hton32 (stat->st_ctime);
- gf_stat->atime_nsec = hton32 (ST_ATIM_NSEC(stat));
- gf_stat->mtime_nsec = hton32 (ST_MTIM_NSEC(stat));
- gf_stat->ctime_nsec = hton32 (ST_CTIM_NSEC(stat));
-}
-
-
-static inline void
-gf_stat_to_iatt (struct gf_stat *gf_stat, struct iatt *iatt)
-{
- iatt->ia_ino = ntoh64 (gf_stat->ino);
- iatt->ia_dev = ntoh64 (gf_stat->dev);
- iatt->ia_type = ia_type_from_st_mode (ntoh32 (gf_stat->mode));
- iatt->ia_prot = ia_prot_from_st_mode (ntoh32 (gf_stat->mode));
- iatt->ia_nlink = ntoh32 (gf_stat->nlink);
- iatt->ia_uid = ntoh32 (gf_stat->uid);
- iatt->ia_gid = ntoh32 (gf_stat->gid);
- iatt->ia_rdev = ntoh64 (gf_stat->rdev);
- iatt->ia_size = ntoh64 (gf_stat->size);
- iatt->ia_blksize = ntoh32 (gf_stat->blksize);
- iatt->ia_blocks = ntoh64 (gf_stat->blocks);
- iatt->ia_atime = ntoh32 (gf_stat->atime);
- iatt->ia_atime_nsec = ntoh32 (gf_stat->atime_nsec);
- iatt->ia_mtime = ntoh32 (gf_stat->mtime);
- iatt->ia_mtime_nsec = ntoh32 (gf_stat->mtime_nsec);
- iatt->ia_ctime = ntoh32 (gf_stat->ctime);
- iatt->ia_ctime_nsec = ntoh32 (gf_stat->ctime_nsec);
-
- iatt->ia_gen = ntoh64 (gf_stat->dev);
-}
-
-
-static inline void
-gf_stat_from_iatt (struct gf_stat *gf_stat, struct iatt *iatt)
-{
- gf_stat->ino = hton64 (iatt->ia_ino);
- gf_stat->dev = hton64 (iatt->ia_dev);
- gf_stat->mode = hton32 (st_mode_from_ia (iatt->ia_prot,
- iatt->ia_type));
- gf_stat->nlink = hton32 (iatt->ia_nlink);
- gf_stat->uid = hton32 (iatt->ia_uid);
- gf_stat->gid = hton32 (iatt->ia_gid);
- gf_stat->rdev = hton32 (iatt->ia_rdev);
- gf_stat->size = hton64 (iatt->ia_size);
- gf_stat->blksize = hton32 (iatt->ia_blksize);
- gf_stat->blocks = hton64 (iatt->ia_blocks);
- gf_stat->atime = hton32 (iatt->ia_atime);
- gf_stat->atime_nsec = hton32 (iatt->ia_atime_nsec);
- gf_stat->mtime = hton32 (iatt->ia_mtime);
- gf_stat->mtime_nsec = hton32 (iatt->ia_mtime_nsec);
- gf_stat->ctime = hton32 (iatt->ia_ctime);
- gf_stat->ctime_nsec = hton32 (iatt->ia_ctime_nsec);
-
- gf_stat->dev = hton64 (iatt->ia_gen);
-
-}
-
-
-struct gf_statfs {
- uint64_t bsize;
- uint64_t frsize;
- uint64_t blocks;
- uint64_t bfree;
- uint64_t bavail;
- uint64_t files;
- uint64_t ffree;
- uint64_t favail;
- uint64_t fsid;
- uint64_t flag;
- uint64_t namemax;
-} __attribute__((packed));
-
-
-static inline void
-gf_statfs_to_statfs (struct gf_statfs *gf_stat, struct statvfs *stat)
-{
- stat->f_bsize = ntoh64 (gf_stat->bsize);
- stat->f_frsize = ntoh64 (gf_stat->frsize);
- stat->f_blocks = ntoh64 (gf_stat->blocks);
- stat->f_bfree = ntoh64 (gf_stat->bfree);
- stat->f_bavail = ntoh64 (gf_stat->bavail);
- stat->f_files = ntoh64 (gf_stat->files);
- stat->f_ffree = ntoh64 (gf_stat->ffree);
- stat->f_favail = ntoh64 (gf_stat->favail);
- stat->f_fsid = ntoh64 (gf_stat->fsid);
- stat->f_flag = ntoh64 (gf_stat->flag);
- stat->f_namemax = ntoh64 (gf_stat->namemax);
-}
-
-
-static inline void
-gf_statfs_from_statfs (struct gf_statfs *gf_stat, struct statvfs *stat)
-{
- gf_stat->bsize = hton64 (stat->f_bsize);
- gf_stat->frsize = hton64 (stat->f_frsize);
- gf_stat->blocks = hton64 (stat->f_blocks);
- gf_stat->bfree = hton64 (stat->f_bfree);
- gf_stat->bavail = hton64 (stat->f_bavail);
- gf_stat->files = hton64 (stat->f_files);
- gf_stat->ffree = hton64 (stat->f_ffree);
- gf_stat->favail = hton64 (stat->f_favail);
- gf_stat->fsid = hton64 (stat->f_fsid);
- gf_stat->flag = hton64 (stat->f_flag);
- gf_stat->namemax = hton64 (stat->f_namemax);
-}
-
-
-struct gf_flock {
- uint16_t type;
- uint16_t whence;
- uint64_t start;
- uint64_t len;
- uint32_t pid;
-} __attribute__((packed));
-
-
-static inline void
-gf_flock_to_flock (struct gf_flock *gf_flock, struct flock *flock)
-{
- flock->l_type = ntoh16 (gf_flock->type);
- flock->l_whence = ntoh16 (gf_flock->whence);
- flock->l_start = ntoh64 (gf_flock->start);
- flock->l_len = ntoh64 (gf_flock->len);
- flock->l_pid = ntoh32 (gf_flock->pid);
-}
-
-
-static inline void
-gf_flock_from_flock (struct gf_flock *gf_flock, struct flock *flock)
-{
- gf_flock->type = hton16 (flock->l_type);
- gf_flock->whence = hton16 (flock->l_whence);
- gf_flock->start = hton64 (flock->l_start);
- gf_flock->len = hton64 (flock->l_len);
- gf_flock->pid = hton32 (flock->l_pid);
-}
-
-
-struct gf_timespec {
- uint32_t tv_sec;
- uint32_t tv_nsec;
-} __attribute__((packed));
-
-
-static inline void
-gf_timespec_to_timespec (struct gf_timespec *gf_ts, struct timespec *ts)
-{
-
- ts[0].tv_sec = ntoh32 (gf_ts[0].tv_sec);
- ts[0].tv_nsec = ntoh32 (gf_ts[0].tv_nsec);
- ts[1].tv_sec = ntoh32 (gf_ts[1].tv_sec);
- ts[1].tv_nsec = ntoh32 (gf_ts[1].tv_nsec);
-}
-
-
-static inline void
-gf_timespec_from_timespec (struct gf_timespec *gf_ts, struct timespec *ts)
-{
- gf_ts[0].tv_sec = hton32 (ts[0].tv_sec);
- gf_ts[0].tv_nsec = hton32 (ts[0].tv_nsec);
- gf_ts[1].tv_sec = hton32 (ts[1].tv_sec);
- gf_ts[1].tv_nsec = hton32 (ts[1].tv_nsec);
-}
-
-
-#define GF_O_ACCMODE 003
-#define GF_O_RDONLY 00
-#define GF_O_WRONLY 01
-#define GF_O_RDWR 02
-#define GF_O_CREAT 0100
-#define GF_O_EXCL 0200
-#define GF_O_NOCTTY 0400
-#define GF_O_TRUNC 01000
-#define GF_O_APPEND 02000
-#define GF_O_NONBLOCK 04000
-#define GF_O_SYNC 010000
-#define GF_O_ASYNC 020000
-
-#define GF_O_DIRECT 040000
-#define GF_O_DIRECTORY 0200000
-#define GF_O_NOFOLLOW 0400000
-#define GF_O_NOATIME 01000000
-#define GF_O_CLOEXEC 02000000
-
-#define GF_O_LARGEFILE 0100000
-
-#define XLATE_BIT(from, to, bit) do { \
- if (from & bit) \
- to = to | GF_##bit; \
- } while (0)
-
-#define UNXLATE_BIT(from, to, bit) do { \
- if (from & GF_##bit) \
- to = to | bit; \
- } while (0)
-
-#define XLATE_ACCESSMODE(from, to) do { \
- switch (from & O_ACCMODE) { \
- case O_RDONLY: to |= GF_O_RDONLY; \
- break; \
- case O_WRONLY: to |= GF_O_WRONLY; \
- break; \
- case O_RDWR: to |= GF_O_RDWR; \
- break; \
- } \
- } while (0)
-
-#define UNXLATE_ACCESSMODE(from, to) do { \
- switch (from & GF_O_ACCMODE) { \
- case GF_O_RDONLY: to |= O_RDONLY; \
- break; \
- case GF_O_WRONLY: to |= O_WRONLY; \
- break; \
- case GF_O_RDWR: to |= O_RDWR; \
- break; \
- } \
- } while (0)
-
-static inline uint32_t
-gf_flags_from_flags (uint32_t flags)
-{
- uint32_t gf_flags = 0;
-
- XLATE_ACCESSMODE (flags, gf_flags);
-
- XLATE_BIT (flags, gf_flags, O_CREAT);
- XLATE_BIT (flags, gf_flags, O_EXCL);
- XLATE_BIT (flags, gf_flags, O_NOCTTY);
- XLATE_BIT (flags, gf_flags, O_TRUNC);
- XLATE_BIT (flags, gf_flags, O_APPEND);
- XLATE_BIT (flags, gf_flags, O_NONBLOCK);
- XLATE_BIT (flags, gf_flags, O_SYNC);
- XLATE_BIT (flags, gf_flags, O_ASYNC);
-
- XLATE_BIT (flags, gf_flags, O_DIRECT);
- XLATE_BIT (flags, gf_flags, O_DIRECTORY);
- XLATE_BIT (flags, gf_flags, O_NOFOLLOW);
-#ifdef O_NOATIME
- XLATE_BIT (flags, gf_flags, O_NOATIME);
-#endif
-#ifdef O_CLOEXEC
- XLATE_BIT (flags, gf_flags, O_CLOEXEC);
-#endif
- XLATE_BIT (flags, gf_flags, O_LARGEFILE);
-
- return gf_flags;
-}
-
-static inline uint32_t
-gf_flags_to_flags (uint32_t gf_flags)
-{
- uint32_t flags = 0;
-
- UNXLATE_ACCESSMODE (gf_flags, flags);
-
- UNXLATE_BIT (gf_flags, flags, O_CREAT);
- UNXLATE_BIT (gf_flags, flags, O_EXCL);
- UNXLATE_BIT (gf_flags, flags, O_NOCTTY);
- UNXLATE_BIT (gf_flags, flags, O_TRUNC);
- UNXLATE_BIT (gf_flags, flags, O_APPEND);
- UNXLATE_BIT (gf_flags, flags, O_NONBLOCK);
- UNXLATE_BIT (gf_flags, flags, O_SYNC);
- UNXLATE_BIT (gf_flags, flags, O_ASYNC);
-
- UNXLATE_BIT (gf_flags, flags, O_DIRECT);
- UNXLATE_BIT (gf_flags, flags, O_DIRECTORY);
- UNXLATE_BIT (gf_flags, flags, O_NOFOLLOW);
-#ifdef O_NOATIME
- UNXLATE_BIT (gf_flags, flags, O_NOATIME);
-#endif
-#ifdef O_CLOEXEC
- UNXLATE_BIT (gf_flags, flags, O_CLOEXEC);
-#endif
- UNXLATE_BIT (gf_flags, flags, O_LARGEFILE);
-
- return flags;
-}
-
-
-typedef struct {
- uint64_t ino;
- uint64_t gen;
- char path[0]; /* NULL terminated */
-} __attribute__((packed)) gf_fop_stat_req_t;;
-typedef struct {
- struct gf_stat stat;
-} __attribute__((packed)) gf_fop_stat_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- uint64_t gen;
- uint32_t size;
- char path[0]; /* NULL terminated */
-} __attribute__((packed)) gf_fop_readlink_req_t;
-typedef struct {
- struct gf_stat buf;
- char path[0]; /* NULL terminated */
-} __attribute__((packed)) gf_fop_readlink_rsp_t;
-
-
-typedef struct {
- uint64_t par;
- uint64_t gen;
- uint64_t dev;
- uint32_t mode;
- char path[0]; /* NULL terminated */
- char bname[0]; /* NULL terminated */
-} __attribute__((packed)) gf_fop_mknod_req_t;
-typedef struct {
- struct gf_stat stat;
- struct gf_stat preparent;
- struct gf_stat postparent;
-} __attribute__((packed)) gf_fop_mknod_rsp_t;
-
-
-typedef struct {
- uint64_t par;
- uint64_t gen;
- uint32_t mode;
- char path[0]; /* NULL terminated */
- char bname[0]; /* NULL terminated */
-} __attribute__((packed)) gf_fop_mkdir_req_t;
-typedef struct {
- struct gf_stat stat;
- struct gf_stat preparent;
- struct gf_stat postparent;
-} __attribute__((packed)) gf_fop_mkdir_rsp_t;
-
-
-typedef struct {
- uint64_t par;
- uint64_t gen;
- char path[0]; /* NULL terminated */
- char bname[0]; /* NULL terminated */
-} __attribute__((packed)) gf_fop_unlink_req_t;
-typedef struct {
- struct gf_stat preparent;
- struct gf_stat postparent;
-} __attribute__((packed)) gf_fop_unlink_rsp_t;
-
-
-typedef struct {
- uint64_t par;
- uint64_t gen;
- char path[0];
- char bname[0]; /* NULL terminated */
-} __attribute__((packed)) gf_fop_rmdir_req_t;
-typedef struct {
- struct gf_stat preparent;
- struct gf_stat postparent;
-} __attribute__((packed)) gf_fop_rmdir_rsp_t;
-
-
-typedef struct {
- uint64_t par;
- uint64_t gen;
- char path[0];
- char bname[0];
- char linkname[0];
-} __attribute__((packed)) gf_fop_symlink_req_t;
-typedef struct {
- struct gf_stat stat;
- struct gf_stat preparent;
- struct gf_stat postparent;
-}__attribute__((packed)) gf_fop_symlink_rsp_t;
-
-
-typedef struct {
- uint64_t oldpar;
- uint64_t oldgen;
- uint64_t newpar;
- uint64_t newgen;
- char oldpath[0];
- char oldbname[0]; /* NULL terminated */
- char newpath[0];
- char newbname[0]; /* NULL terminated */
-} __attribute__((packed)) gf_fop_rename_req_t;
-typedef struct {
- struct gf_stat stat;
- struct gf_stat preoldparent;
- struct gf_stat postoldparent;
- struct gf_stat prenewparent;
- struct gf_stat postnewparent;
-} __attribute__((packed)) gf_fop_rename_rsp_t;
-
-
-typedef struct {
- uint64_t oldino;
- uint64_t oldgen;
- uint64_t newpar;
- uint64_t newgen;
- char oldpath[0];
- char newpath[0];
- char newbname[0];
-}__attribute__((packed)) gf_fop_link_req_t;
-typedef struct {
- struct gf_stat stat;
- struct gf_stat preparent;
- struct gf_stat postparent;
-} __attribute__((packed)) gf_fop_link_rsp_t;
-
-typedef struct {
- uint64_t ino;
- uint64_t gen;
- uint64_t offset;
- char path[0];
-} __attribute__((packed)) gf_fop_truncate_req_t;
-typedef struct {
- struct gf_stat prestat;
- struct gf_stat poststat;
-} __attribute__((packed)) gf_fop_truncate_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- uint64_t gen;
- uint32_t flags;
- uint32_t wbflags;
- char path[0];
-} __attribute__((packed)) gf_fop_open_req_t;
-typedef struct {
- int64_t fd;
-} __attribute__((packed)) gf_fop_open_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- uint64_t gen;
- int64_t fd;
- uint64_t offset;
- uint32_t size;
-} __attribute__((packed)) gf_fop_read_req_t;
-typedef struct {
- struct gf_stat stat;
- char buf[0];
-} __attribute__((packed)) gf_fop_read_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- uint64_t gen;
- int64_t fd;
- uint64_t offset;
- uint32_t size;
-} __attribute__((packed)) gf_fop_write_req_t;
-typedef struct {
- struct gf_stat prestat;
- struct gf_stat poststat;
-} __attribute__((packed)) gf_fop_write_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- uint64_t gen;
- char path[0];
-} __attribute__((packed)) gf_fop_statfs_req_t;
-typedef struct {
- struct gf_statfs statfs;
-} __attribute__((packed)) gf_fop_statfs_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- uint64_t gen;
- int64_t fd;
-} __attribute__((packed)) gf_fop_flush_req_t;
-typedef struct { } __attribute__((packed)) gf_fop_flush_rsp_t;
-
-
-typedef struct fsync_req {
- uint64_t ino;
- uint64_t gen;
- int64_t fd;
- uint32_t data;
-} __attribute__((packed)) gf_fop_fsync_req_t;
-typedef struct {
- struct gf_stat prestat;
- struct gf_stat poststat;
-} __attribute__((packed)) gf_fop_fsync_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- uint64_t gen;
- uint32_t flags;
- uint32_t dict_len;
- char dict[0];
- char path[0];
-} __attribute__((packed)) gf_fop_setxattr_req_t;
-typedef struct { } __attribute__((packed)) gf_fop_setxattr_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- uint64_t gen;
- int64_t fd;
- uint32_t flags;
- uint32_t dict_len;
- char dict[0];
-} __attribute__((packed)) gf_fop_fsetxattr_req_t;
-typedef struct { } __attribute__((packed)) gf_fop_fsetxattr_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- uint64_t gen;
- uint32_t flags;
- uint32_t dict_len;
- char dict[0];
- char path[0];
-} __attribute__((packed)) gf_fop_xattrop_req_t;
-
-typedef struct {
- uint32_t dict_len;
- char dict[0];
-} __attribute__((packed)) gf_fop_xattrop_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- uint64_t gen;
- int64_t fd;
- uint32_t flags;
- uint32_t dict_len;
- char dict[0];
-} __attribute__((packed)) gf_fop_fxattrop_req_t;
-
-typedef struct {
- uint32_t dict_len;
- char dict[0];
-} __attribute__((packed)) gf_fop_fxattrop_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- uint64_t gen;
- uint32_t namelen;
- char path[0];
- char name[0];
-} __attribute__((packed)) gf_fop_getxattr_req_t;
-typedef struct {
- uint32_t dict_len;
- char dict[0];
-} __attribute__((packed)) gf_fop_getxattr_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- uint64_t gen;
- int64_t fd;
- uint32_t namelen;
- char name[0];
-} __attribute__((packed)) gf_fop_fgetxattr_req_t;
-typedef struct {
- uint32_t dict_len;
- char dict[0];
-} __attribute__((packed)) gf_fop_fgetxattr_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- uint64_t gen;
- char path[0];
- char name[0];
-} __attribute__((packed)) gf_fop_removexattr_req_t;
-typedef struct { } __attribute__((packed)) gf_fop_removexattr_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- uint64_t gen;
- char path[0];
-} __attribute__((packed)) gf_fop_opendir_req_t;
-typedef struct {
- int64_t fd;
-} __attribute__((packed)) gf_fop_opendir_rsp_t;
-
-
-typedef struct fsyncdir_req {
- uint64_t ino;
- uint64_t gen;
- int64_t fd;
- int32_t data;
-} __attribute__((packed)) gf_fop_fsyncdir_req_t;
-typedef struct {
-} __attribute__((packed)) gf_fop_fsyncdir_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- uint64_t gen;
- int64_t fd;
- uint64_t offset;
- uint32_t size;
-} __attribute__((packed)) gf_fop_readdir_req_t;
-typedef struct {
- uint32_t size;
- char buf[0];
-} __attribute__((packed)) gf_fop_readdir_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- uint64_t gen;
- int64_t fd;
- uint64_t offset;
- uint32_t size;
-} __attribute__((packed)) gf_fop_readdirp_req_t;
-typedef struct {
- uint32_t size;
- char buf[0];
-} __attribute__((packed)) gf_fop_readdirp_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- uint64_t gen;
- uint32_t mask;
- char path[0];
-} __attribute__((packed)) gf_fop_access_req_t;
-typedef struct {
-} __attribute__((packed)) gf_fop_access_rsp_t;
-
-
-typedef struct {
- uint64_t par;
- uint64_t gen;
- uint32_t flags;
- uint32_t mode;
- char path[0];
- char bname[0];
-} __attribute__((packed)) gf_fop_create_req_t;
-typedef struct {
- struct gf_stat stat;
- uint64_t fd;
- struct gf_stat preparent;
- struct gf_stat postparent;
-} __attribute__((packed)) gf_fop_create_rsp_t;
-
-
-
-typedef struct {
- uint64_t ino;
- uint64_t gen;
- int64_t fd;
- uint64_t offset;
-} __attribute__((packed)) gf_fop_ftruncate_req_t;
-typedef struct {
- struct gf_stat prestat;
- struct gf_stat poststat;
-} __attribute__((packed)) gf_fop_ftruncate_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- uint64_t gen;
- int64_t fd;
-} __attribute__((packed)) gf_fop_fstat_req_t;
-typedef struct {
- struct gf_stat stat;
-} __attribute__((packed)) gf_fop_fstat_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- uint64_t gen;
- int64_t fd;
- uint32_t cmd;
- uint32_t type;
- struct gf_flock flock;
-} __attribute__((packed)) gf_fop_lk_req_t;
-typedef struct {
- struct gf_flock flock;
-} __attribute__((packed)) gf_fop_lk_rsp_t;
-
-typedef struct {
- uint64_t ino;
- uint64_t gen;
- uint32_t cmd;
- uint32_t type;
- struct gf_flock flock;
- char path[0];
- char volume[0];
-} __attribute__((packed)) gf_fop_inodelk_req_t;
-typedef struct {
-} __attribute__((packed)) gf_fop_inodelk_rsp_t;
-
-typedef struct {
- uint64_t ino;
- uint64_t gen;
- int64_t fd;
- uint32_t cmd;
- uint32_t type;
- struct gf_flock flock;
- char volume[0];
-} __attribute__((packed)) gf_fop_finodelk_req_t;
-typedef struct {
-} __attribute__((packed)) gf_fop_finodelk_rsp_t;
-
-typedef struct {
- uint64_t ino;
- uint64_t gen;
- uint32_t cmd;
- uint32_t type;
- uint64_t namelen;
- char path[0];
- char name[0];
- char volume[0];
-} __attribute__((packed)) gf_fop_entrylk_req_t;
-typedef struct {
-} __attribute__((packed)) gf_fop_entrylk_rsp_t;
-
-typedef struct {
- uint64_t ino;
- uint64_t gen;
- int64_t fd;
- uint32_t cmd;
- uint32_t type;
- uint64_t namelen;
- char name[0];
- char volume[0];
-} __attribute__((packed)) gf_fop_fentrylk_req_t;
-typedef struct {
-} __attribute__((packed)) gf_fop_fentrylk_rsp_t;
-
-typedef struct {
- uint64_t ino; /* NOTE: used only in case of 'root' lookup */
- uint64_t par;
- uint64_t gen;
- uint32_t flags;
- uint32_t dictlen;
- char path[0];
- char bname[0];
- char dict[0];
-} __attribute__((packed)) gf_fop_lookup_req_t;
-typedef struct {
- struct gf_stat stat;
- struct gf_stat postparent;
- uint32_t dict_len;
- char dict[0];
-} __attribute__((packed)) gf_fop_lookup_rsp_t;
-
-typedef struct {
- uint64_t ino;
- uint64_t gen;
- uint32_t flag;
- char path[0];
-} __attribute__((packed)) gf_fop_checksum_req_t;
-typedef struct {
- unsigned char fchecksum[0];
- unsigned char dchecksum[0];
-} __attribute__((packed)) gf_fop_checksum_rsp_t;
-
-typedef struct {
- uint64_t ino;
- uint64_t gen;
- struct gf_stat stbuf;
- int32_t valid;
- char path[0];
-} __attribute__((packed)) gf_fop_setattr_req_t;
-typedef struct {
- struct gf_stat statpre;
- struct gf_stat statpost;
-} __attribute__((packed)) gf_fop_setattr_rsp_t;
-
-typedef struct {
- int64_t fd;
- struct gf_stat stbuf;
- int32_t valid;
-} __attribute__((packed)) gf_fop_fsetattr_req_t;
-typedef struct {
- struct gf_stat statpre;
- struct gf_stat statpost;
-} __attribute__((packed)) gf_fop_fsetattr_rsp_t;
-
-typedef struct {
- int64_t fd;
- uint64_t offset;
- uint32_t len;
-} __attribute__((packed)) gf_fop_rchecksum_req_t;
-typedef struct {
- uint32_t weak_checksum;
- unsigned char strong_checksum[0];
-} __attribute__((packed)) gf_fop_rchecksum_rsp_t;
-
-typedef struct {
- uint32_t flags;
- uint32_t keylen;
- char key[0];
-} __attribute__((packed)) gf_mop_getspec_req_t;
-typedef struct {
- char spec[0];
-} __attribute__((packed)) gf_mop_getspec_rsp_t;
-
-
-typedef struct {
- uint32_t msglen;
- char msg[0];
-} __attribute__((packed)) gf_mop_log_req_t;
-typedef struct {
-} __attribute__((packed)) gf_mop_log_rsp_t;
-
-
-typedef struct {
- uint32_t dict_len;
- char buf[0];
-} __attribute__((packed)) gf_mop_setvolume_req_t;
-typedef struct {
- uint32_t dict_len;
- char buf[0];
-} __attribute__((packed)) gf_mop_setvolume_rsp_t;
-
-
-typedef struct {
-} __attribute__((packed)) gf_mop_ping_req_t;
-typedef struct {
-} __attribute__((packed)) gf_mop_ping_rsp_t;
-
-typedef struct {
- uint32_t flags;
- char buf[0];
-} __attribute__((packed)) gf_mop_notify_req_t;
-typedef struct {
- uint32_t flags;
- char buf[0];
-} __attribute__((packed)) gf_mop_notify_rsp_t;
-
-typedef struct {
- uint64_t ino;
- uint64_t gen;
- int64_t fd;
-} __attribute__((packed)) gf_cbk_releasedir_req_t;
-typedef struct {
-} __attribute__((packed)) gf_cbk_releasedir_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- uint64_t gen;
- int64_t fd;
-} __attribute__((packed)) gf_cbk_release_req_t;
-typedef struct {
-} __attribute__((packed)) gf_cbk_release_rsp_t;
-
-
-typedef struct {
- uint32_t count;
- uint64_t ino_array[0];
-} __attribute__((packed)) gf_cbk_forget_req_t;
-typedef struct { } __attribute__((packed)) gf_cbk_forget_rsp_t;
-
-
-typedef struct {
- uint32_t pid;
- uint32_t uid;
- uint32_t gid;
-
- /* Number of groups being sent through the array above. */
- uint32_t ngrps;
-
- /* Array of groups to which the uid belongs apart from the primary group
- * in gid.
- */
- uint32_t groups[GF_REQUEST_MAXGROUPS];
-
- uint64_t lk_owner;
-} __attribute__ ((packed)) gf_hdr_req_t;
-
-
-typedef struct {
- uint32_t op_ret;
- uint32_t op_errno;
-} __attribute__ ((packed)) gf_hdr_rsp_t;
-
-
-typedef struct {
- uint64_t callid;
- uint32_t type;
- uint32_t op;
- uint32_t size;
- union {
- gf_hdr_req_t req;
- gf_hdr_rsp_t rsp;
- } __attribute__ ((packed));
-} __attribute__ ((packed)) gf_hdr_common_t;
-
-
-static inline gf_hdr_common_t *
-__gf_hdr_new (int size)
-{
- gf_hdr_common_t *hdr = NULL;
-
- /* TODO: use mem-pool */
- hdr = GF_CALLOC (sizeof (gf_hdr_common_t) + size, 1,
- gf_common_mt_gf_hdr_common_t);
-
- if (!hdr) {
- return NULL;
- }
-
- hdr->size = hton32 (size);
-
- return hdr;
-}
-
-
-#define gf_hdr_len(type, x) (sizeof (gf_hdr_common_t) + sizeof (*type) + x)
-#define gf_hdr_new(type, x) __gf_hdr_new (sizeof (*type) + x)
-
-
-static inline void *
-gf_param (gf_hdr_common_t *hdr)
-{
- return ((void *)hdr) + sizeof (*hdr);
-}
-
-
-struct gf_dirent_nb {
- uint64_t d_ino;
- uint64_t d_off;
- uint32_t d_len;
- uint32_t d_type;
- struct gf_stat d_stat;
- char d_name[0];
-} __attribute__((packed));
-
-int
-gf_dirent_unserialize (gf_dirent_t *entries, const char *buf, size_t buf_size);
-int
-gf_dirent_serialize (gf_dirent_t *entries, char *buf, size_t buf_size);
-
-int protocol_common_init (void);
-
-#endif
diff --git a/xlators/protocol/legacy/lib/src/transport.c b/xlators/protocol/legacy/lib/src/transport.c
deleted file mode 100644
index d460d02096e..00000000000
--- a/xlators/protocol/legacy/lib/src/transport.c
+++ /dev/null
@@ -1,422 +0,0 @@
-/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include <dlfcn.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <sys/poll.h>
-#include <fnmatch.h>
-#include <stdint.h>
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "logging.h"
-#include "transport.h"
-#include "glusterfs.h"
-#include "xlator.h"
-#include "list.h"
-
-
-transport_t *
-transport_load (dict_t *options,
- xlator_t *xl)
-{
- struct transport *trans = NULL, *return_trans = NULL;
- char *name = NULL;
- void *handle = NULL;
- char *type = NULL;
- char str[] = "ERROR";
- int32_t ret = -1;
- int8_t is_tcp = 0, is_unix = 0, is_ibsdp = 0;
- volume_opt_list_t *vol_opt = NULL;
-
- GF_VALIDATE_OR_GOTO("transport", options, fail);
- GF_VALIDATE_OR_GOTO("transport", xl, fail);
-
- trans = GF_CALLOC (1, sizeof (struct transport),
- gf_common_mt_transport);
- GF_VALIDATE_OR_GOTO("transport", trans, fail);
-
- trans->xl = xl;
- type = str;
-
- /* Backward compatibility */
- ret = dict_get_str (options, "transport-type", &type);
- if (ret < 0) {
- ret = dict_set_str (options, "transport-type", "socket");
- if (ret < 0)
- gf_log ("dict", GF_LOG_DEBUG,
- "setting transport-type failed");
- gf_log ("transport", GF_LOG_WARNING,
- "missing 'option transport-type'. defaulting to "
- "\"socket\"");
- } else {
- {
- /* Backword compatibility to handle * /client,
- * * /server.
- */
- char *tmp = strchr (type, '/');
- if (tmp)
- *tmp = '\0';
- }
-
- is_tcp = strcmp (type, "tcp");
- is_unix = strcmp (type, "unix");
- is_ibsdp = strcmp (type, "ib-sdp");
- if ((is_tcp == 0) ||
- (is_unix == 0) ||
- (is_ibsdp == 0)) {
- if (is_unix == 0)
- ret = dict_set_str (options,
- "transport.address-family",
- "unix");
- if (is_ibsdp == 0)
- ret = dict_set_str (options,
- "transport.address-family",
- "inet-sdp");
-
- if (ret < 0)
- gf_log ("dict", GF_LOG_DEBUG,
- "setting address-family failed");
-
- ret = dict_set_str (options,
- "transport-type", "socket");
- if (ret < 0)
- gf_log ("dict", GF_LOG_DEBUG,
- "setting transport-type failed");
- }
- }
-
- ret = dict_get_str (options, "transport-type", &type);
- if (ret < 0) {
- GF_FREE (trans);
- gf_log ("transport", GF_LOG_ERROR,
- "'option transport-type <xx>' missing in volume '%s'",
- xl->name);
- goto fail;
- }
-
- ret = gf_asprintf (&name, "%s/%s.so", TRANSPORTDIR, type);
- if (-1 == ret) {
- gf_log ("transport", GF_LOG_ERROR, "asprintf failed");
- goto fail;
- }
- gf_log ("transport", GF_LOG_DEBUG,
- "attempt to load file %s", name);
-
- handle = dlopen (name, RTLD_NOW|RTLD_GLOBAL);
- if (handle == NULL) {
- gf_log ("transport", GF_LOG_ERROR, "%s", dlerror ());
- gf_log ("transport", GF_LOG_ERROR,
- "volume '%s': transport-type '%s' is not valid or "
- "not found on this machine",
- xl->name, type);
- GF_FREE (name);
- GF_FREE (trans);
- goto fail;
- }
- GF_FREE (name);
-
- trans->ops = dlsym (handle, "tops");
- if (trans->ops == NULL) {
- gf_log ("transport", GF_LOG_ERROR,
- "dlsym (transport_ops) on %s", dlerror ());
- GF_FREE (trans);
- goto fail;
- }
-
- trans->init = dlsym (handle, "init");
- if (trans->init == NULL) {
- gf_log ("transport", GF_LOG_ERROR,
- "dlsym (gf_transport_init) on %s", dlerror ());
- GF_FREE (trans);
- goto fail;
- }
-
- trans->fini = dlsym (handle, "fini");
- if (trans->fini == NULL) {
- gf_log ("transport", GF_LOG_ERROR,
- "dlsym (gf_transport_fini) on %s", dlerror ());
- GF_FREE (trans);
- goto fail;
- }
-
- vol_opt = GF_CALLOC (1, sizeof (volume_opt_list_t),
- gf_common_mt_volume_opt_list_t);
- vol_opt->given_opt = dlsym (handle, "options");
- if (vol_opt->given_opt == NULL) {
- gf_log ("transport", GF_LOG_DEBUG,
- "volume option validation not specified");
- } else {
- list_add_tail (&vol_opt->list, &xl->volume_options);
- if (-1 ==
- validate_xlator_volume_options (xl,
- vol_opt->given_opt)) {
- gf_log ("transport", GF_LOG_ERROR,
- "volume option validation failed");
- GF_FREE (trans);
- goto fail;
- }
- }
-
- ret = trans->init (trans);
- if (ret != 0) {
- gf_log ("transport", GF_LOG_ERROR,
- "'%s' initialization failed", type);
- GF_FREE (trans);
- goto fail;
- }
-
- pthread_mutex_init (&trans->lock, NULL);
- return_trans = trans;
-fail:
- return return_trans;
-}
-
-
-int32_t
-transport_submit (transport_t *this, char *buf, int32_t len,
- struct iovec *vector, int count,
- struct iobref *iobref)
-{
- int32_t ret = -1;
- transport_t *peer_trans = NULL;
- struct iobuf *iobuf = NULL;
- struct transport_msg *msg = NULL;
-
- if (this->peer_trans) {
- peer_trans = this->peer_trans;
-
- msg = GF_CALLOC (1, sizeof (*msg),
- gf_common_mt_transport_msg);
- if (!msg) {
- return -ENOMEM;
- }
-
- msg->hdr = buf;
- msg->hdrlen = len;
-
- if (vector) {
- iobuf = iobuf_get (this->xl->ctx->iobuf_pool);
- if (!iobuf) {
- GF_FREE (msg->hdr);
- GF_FREE (msg);
- return -ENOMEM;
- }
-
- iov_unload (iobuf->ptr, vector, count);
- msg->iobuf = iobuf;
- }
-
- pthread_mutex_lock (&peer_trans->handover.mutex);
- {
- list_add_tail (&msg->list, &peer_trans->handover.msgs);
- pthread_cond_broadcast (&peer_trans->handover.cond);
- }
- pthread_mutex_unlock (&peer_trans->handover.mutex);
-
- return 0;
- }
-
- GF_VALIDATE_OR_GOTO("transport", this, fail);
- GF_VALIDATE_OR_GOTO("transport", this->ops, fail);
-
- ret = this->ops->submit (this, buf, len, vector, count, iobref);
-fail:
- return ret;
-}
-
-
-int32_t
-transport_connect (transport_t *this)
-{
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO("transport", this, fail);
-
- ret = this->ops->connect (this);
-fail:
- return ret;
-}
-
-
-int32_t
-transport_listen (transport_t *this)
-{
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO("transport", this, fail);
-
- ret = this->ops->listen (this);
-fail:
- return ret;
-}
-
-
-int32_t
-transport_disconnect (transport_t *this)
-{
- int32_t ret = -1;
-
- GF_VALIDATE_OR_GOTO("transport", this, fail);
-
- ret = this->ops->disconnect (this);
-fail:
- return ret;
-}
-
-
-int32_t
-transport_destroy (transport_t *this)
-{
- int32_t ret = -1;
-
- GF_VALIDATE_OR_GOTO("transport", this, fail);
-
- if (this->fini)
- this->fini (this);
-
- pthread_mutex_destroy (&this->lock);
- GF_FREE (this);
-fail:
- return ret;
-}
-
-
-transport_t *
-transport_ref (transport_t *this)
-{
- transport_t *return_this = NULL;
-
- GF_VALIDATE_OR_GOTO("transport", this, fail);
-
- pthread_mutex_lock (&this->lock);
- {
- this->refcount ++;
- }
- pthread_mutex_unlock (&this->lock);
-
- return_this = this;
-fail:
- return return_this;
-}
-
-
-int32_t
-transport_receive (transport_t *this, char **hdr_p, size_t *hdrlen_p,
- struct iobuf **iobuf_p)
-{
- int32_t ret = -1;
-
- GF_VALIDATE_OR_GOTO("transport", this, fail);
-
- if (this->peer_trans) {
- *hdr_p = this->handover.msg->hdr;
- *hdrlen_p = this->handover.msg->hdrlen;
- *iobuf_p = this->handover.msg->iobuf;
-
- return 0;
- }
-
- ret = this->ops->receive (this, hdr_p, hdrlen_p, iobuf_p);
-fail:
- return ret;
-}
-
-
-int32_t
-transport_unref (transport_t *this)
-{
- int32_t refcount = 0;
- int32_t ret = -1;
-
- GF_VALIDATE_OR_GOTO("transport", this, fail);
-
- pthread_mutex_lock (&this->lock);
- {
- refcount = --this->refcount;
- }
- pthread_mutex_unlock (&this->lock);
-
- if (refcount == 0) {
- xlator_notify (this->xl, GF_EVENT_TRANSPORT_CLEANUP, this);
- transport_destroy (this);
- }
-
- ret = 0;
-fail:
- return ret;
-}
-
-
-void *
-transport_peerproc (void *trans_data)
-{
- transport_t *trans = NULL;
- struct transport_msg *msg = NULL;
-
- trans = trans_data;
-
- while (1) {
- pthread_mutex_lock (&trans->handover.mutex);
- {
- while (list_empty (&trans->handover.msgs))
- pthread_cond_wait (&trans->handover.cond,
- &trans->handover.mutex);
-
- msg = list_entry (trans->handover.msgs.next,
- struct transport_msg, list);
-
- list_del_init (&msg->list);
- }
- pthread_mutex_unlock (&trans->handover.mutex);
-
- trans->handover.msg = msg;
-
- xlator_notify (trans->xl, GF_EVENT_POLLIN, trans);
-
- GF_FREE (msg);
- }
-}
-
-
-int
-transport_setpeer (transport_t *trans, transport_t *peer_trans)
-{
- trans->peer_trans = transport_ref (peer_trans);
-
- INIT_LIST_HEAD (&trans->handover.msgs);
- pthread_cond_init (&trans->handover.cond, NULL);
- pthread_mutex_init (&trans->handover.mutex, NULL);
- pthread_create (&trans->handover.thread, NULL,
- transport_peerproc, trans);
-
- peer_trans->peer_trans = transport_ref (trans);
-
- INIT_LIST_HEAD (&peer_trans->handover.msgs);
- pthread_cond_init (&peer_trans->handover.cond, NULL);
- pthread_mutex_init (&peer_trans->handover.mutex, NULL);
- pthread_create (&peer_trans->handover.thread, NULL,
- transport_peerproc, peer_trans);
-
- return 0;
-}
diff --git a/xlators/protocol/legacy/lib/src/transport.h b/xlators/protocol/legacy/lib/src/transport.h
deleted file mode 100644
index f0623d5b417..00000000000
--- a/xlators/protocol/legacy/lib/src/transport.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef __TRANSPORT_H__
-#define __TRANSPORT_H__
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <inttypes.h>
-
-struct transport_ops;
-typedef struct transport transport_t;
-
-#include "xlator.h"
-#include "dict.h"
-#include "compat.h"
-
-typedef struct peer_info {
- struct sockaddr_storage sockaddr;
- socklen_t sockaddr_len;
- char identifier[UNIX_PATH_MAX];
-}peer_info_t;
-
-struct transport_msg {
- struct list_head list;
- char *hdr;
- int hdrlen;
- struct iobuf *iobuf;
-};
-
-struct transport {
- struct transport_ops *ops;
- void *private;
- void *xl_private;
- pthread_mutex_t lock;
- int32_t refcount;
-
- xlator_t *xl;
- void *dnscache;
- data_t *buf;
- int32_t (*init) (transport_t *this);
- void (*fini) (transport_t *this);
- /* int (*notify) (transport_t *this, int event, void *data); */
- peer_info_t peerinfo;
- peer_info_t myinfo;
-
- transport_t *peer_trans;
- struct {
- pthread_mutex_t mutex;
- pthread_cond_t cond;
- pthread_t thread;
- struct list_head msgs;
- struct transport_msg *msg;
- } handover;
-
-};
-
-struct transport_ops {
- int32_t (*receive) (transport_t *this, char **hdr_p, size_t *hdrlen_p,
- struct iobuf **iobuf_p);
- int32_t (*submit) (transport_t *this, char *buf, int len,
- struct iovec *vector, int count,
- struct iobref *iobref);
- int32_t (*connect) (transport_t *this);
- int32_t (*listen) (transport_t *this);
- int32_t (*disconnect) (transport_t *this);
-};
-
-
-int32_t transport_listen (transport_t *this);
-int32_t transport_connect (transport_t *this);
-int32_t transport_disconnect (transport_t *this);
-int32_t transport_notify (transport_t *this, int event);
-int32_t transport_submit (transport_t *this, char *buf, int len,
- struct iovec *vector, int count,
- struct iobref *iobref);
-int32_t transport_receive (transport_t *this, char **hdr_p, size_t *hdrlen_p,
- struct iobuf **iobuf_p);
-int32_t transport_destroy (transport_t *this);
-
-transport_t *transport_load (dict_t *options, xlator_t *xl);
-transport_t *transport_ref (transport_t *trans);
-int32_t transport_unref (transport_t *trans);
-
-int transport_setpeer (transport_t *trans, transport_t *trans_peer);
-
-#endif /* __TRANSPORT_H__ */
diff --git a/xlators/protocol/legacy/server/src/Makefile.am b/xlators/protocol/legacy/server/src/Makefile.am
deleted file mode 100644
index 3ef0b81bbe2..00000000000
--- a/xlators/protocol/legacy/server/src/Makefile.am
+++ /dev/null
@@ -1,24 +0,0 @@
-
-xlator_LTLIBRARIES = server-old.la
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/legacy/protocol
-
-server_old_la_LDFLAGS = -module -avoidversion
-
-server_old_la_SOURCES = server-protocol.c server-resolve.c server-helpers.c
-server_old_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
- $(top_builddir)/xlators/protocol/legacy/lib/src/libgfproto.la \
- $(top_builddir)/xlators/protocol/lib/src/libgfproto1.la
-
-noinst_HEADERS = server-protocol.h server-helpers.h server-mem-types.h
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles \
- -I$(top_srcdir)/contrib/md5/ \
- -DDATADIR=\"$(localstatedir)\" -DCONFDIR=\"$(sysconfdir)/glusterfs\" \
- $(GF_CFLAGS) -I$(top_srcdir)/xlators/protocol/legacy/lib/src \
- -I$(top_srcdir)/xlators/protocol/lib/src
-
-CLEANFILES =
-
-install-data-hook:
- ln -sf server-old.so $(DESTDIR)$(xlatordir)/server.so
diff --git a/xlators/protocol/legacy/server/src/server-helpers.c b/xlators/protocol/legacy/server/src/server-helpers.c
deleted file mode 100644
index 595916a3656..00000000000
--- a/xlators/protocol/legacy/server/src/server-helpers.c
+++ /dev/null
@@ -1,626 +0,0 @@
-/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "server-protocol.h"
-#include "server-helpers.h"
-
-
-
-void
-server_loc_wipe (loc_t *loc)
-{
- if (loc->parent) {
- inode_unref (loc->parent);
- loc->parent = NULL;
- }
-
- if (loc->inode) {
- inode_unref (loc->inode);
- loc->inode = NULL;
- }
-
- if (loc->path)
- GF_FREE ((char *)loc->path);
-}
-
-
-void
-server_resolve_wipe (server_resolve_t *resolve)
-{
- struct resolve_comp *comp = NULL;
- int i = 0;
-
- if (resolve->path)
- GF_FREE (resolve->path);
-
- if (resolve->bname)
- GF_FREE (resolve->bname);
-
- if (resolve->resolved)
- GF_FREE (resolve->resolved);
-
- loc_wipe (&resolve->deep_loc);
-
- comp = resolve->components;
- if (comp) {
- for (i = 0; comp[i].basename; i++) {
- if (comp[i].inode)
- inode_unref (comp[i].inode);
- }
- GF_FREE (resolve->components);
- }
-}
-
-
-void
-free_server_state (server_state_t *state)
-{
- if (state->trans) {
- transport_unref (state->trans);
- state->trans = NULL;
- }
-
- if (state->fd) {
- fd_unref (state->fd);
- state->fd = NULL;
- }
-
- if (state->iobref) {
- iobref_unref (state->iobref);
- state->iobref = NULL;
- }
-
- if (state->iobuf) {
- iobuf_unref (state->iobuf);
- state->iobuf = NULL;
- }
-
- if (state->dict) {
- dict_unref (state->dict);
- state->dict = NULL;
- }
-
- if (state->volume)
- GF_FREE ((char *)state->volume);
-
- if (state->name)
- GF_FREE (state->name);
-
- server_loc_wipe (&state->loc);
- server_loc_wipe (&state->loc2);
-
- server_resolve_wipe (&state->resolve);
- server_resolve_wipe (&state->resolve2);
-
- GF_FREE (state);
-}
-
-static struct _lock_table *
-gf_lock_table_new (void)
-{
- struct _lock_table *new = NULL;
-
- new = GF_CALLOC (1, sizeof (struct _lock_table),
- gf_server_mt_lock_table);
- if (new == NULL) {
- gf_log ("server-protocol", GF_LOG_CRITICAL,
- "failed to allocate memory for new lock table");
- goto out;
- }
- INIT_LIST_HEAD (&new->dir_lockers);
- INIT_LIST_HEAD (&new->file_lockers);
- LOCK_INIT (&new->lock);
-out:
- return new;
-}
-
-
-static int
-gf_server_nop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE(frame);
-
- if (state)
- free_state (state);
- STACK_DESTROY (frame->root);
- return 0;
-}
-
-
-static int
-do_lock_table_cleanup (xlator_t *this, server_connection_t *conn,
- call_frame_t *frame, struct _lock_table *ltable)
-{
- struct list_head file_lockers, dir_lockers;
- call_frame_t *tmp_frame = NULL;
- struct flock flock = {0, };
- xlator_t *bound_xl = NULL;
- struct _locker *locker = NULL, *tmp = NULL;
- int ret = -1;
-
- bound_xl = conn->bound_xl;
- INIT_LIST_HEAD (&file_lockers);
- INIT_LIST_HEAD (&dir_lockers);
-
- LOCK (&ltable->lock);
- {
- list_splice_init (&ltable->file_lockers,
- &file_lockers);
-
- list_splice_init (&ltable->dir_lockers, &dir_lockers);
- }
- UNLOCK (&ltable->lock);
-
- GF_FREE (ltable);
-
- flock.l_type = F_UNLCK;
- flock.l_start = 0;
- flock.l_len = 0;
- list_for_each_entry_safe (locker,
- tmp, &file_lockers, lockers) {
- tmp_frame = copy_frame (frame);
- if (tmp_frame == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory");
- goto out;
- }
- /*
- pid = 0 is a special case that tells posix-locks
- to release all locks from this transport
- */
- tmp_frame->root->pid = 0;
- tmp_frame->root->trans = conn;
-
- if (locker->fd) {
- STACK_WIND (tmp_frame, gf_server_nop_cbk,
- bound_xl,
- bound_xl->fops->finodelk,
- locker->volume,
- locker->fd, F_SETLK, &flock);
- fd_unref (locker->fd);
- } else {
- STACK_WIND (tmp_frame, gf_server_nop_cbk,
- bound_xl,
- bound_xl->fops->inodelk,
- locker->volume,
- &(locker->loc), F_SETLK, &flock);
- loc_wipe (&locker->loc);
- }
-
- GF_FREE (locker->volume);
-
- list_del_init (&locker->lockers);
- GF_FREE (locker);
- }
-
- tmp = NULL;
- locker = NULL;
- list_for_each_entry_safe (locker, tmp, &dir_lockers, lockers) {
- tmp_frame = copy_frame (frame);
-
- tmp_frame->root->pid = 0;
- tmp_frame->root->trans = conn;
-
- if (locker->fd) {
- STACK_WIND (tmp_frame, gf_server_nop_cbk,
- bound_xl,
- bound_xl->fops->fentrylk,
- locker->volume,
- locker->fd, NULL,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
- fd_unref (locker->fd);
- } else {
- STACK_WIND (tmp_frame, gf_server_nop_cbk,
- bound_xl,
- bound_xl->fops->entrylk,
- locker->volume,
- &(locker->loc), NULL,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
- loc_wipe (&locker->loc);
- }
-
- GF_FREE (locker->volume);
-
- list_del_init (&locker->lockers);
- GF_FREE (locker);
- }
- ret = 0;
-
-out:
- return ret;
-}
-
-
-static int
-server_connection_cleanup_flush_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno)
-{
- fd_t *fd = NULL;
-
- fd = frame->local;
-
- fd_unref (fd);
- frame->local = NULL;
-
- STACK_DESTROY (frame->root);
- return 0;
-}
-
-
-static int
-do_fd_cleanup (xlator_t *this, server_connection_t *conn, call_frame_t *frame,
- fdentry_t *fdentries, int fd_count)
-{
- fd_t *fd = NULL;
- int i = 0, ret = -1;
- call_frame_t *tmp_frame = NULL;
- xlator_t *bound_xl = NULL;
-
- bound_xl = conn->bound_xl;
- for (i = 0;i < fd_count; i++) {
- fd = fdentries[i].fd;
-
- if (fd != NULL) {
- tmp_frame = copy_frame (frame);
- if (tmp_frame == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory");
- goto out;
- }
- tmp_frame->local = fd;
-
- tmp_frame->root->pid = 0;
- tmp_frame->root->trans = conn;
- tmp_frame->root->lk_owner = 0;
- STACK_WIND (tmp_frame,
- server_connection_cleanup_flush_cbk,
- bound_xl, bound_xl->fops->flush, fd);
- }
- }
-
- GF_FREE (fdentries);
- ret = 0;
-
-out:
- return ret;
-}
-
-static int
-do_connection_cleanup (xlator_t *this, server_connection_t *conn,
- struct _lock_table *ltable, fdentry_t *fdentries, int fd_count)
-{
- int ret = 0;
- int saved_ret = 0;
- call_frame_t *frame = NULL;
- server_state_t *state = NULL;
-
- frame = create_frame (this, this->ctx->pool);
- if (frame == NULL) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- saved_ret = do_lock_table_cleanup (this, conn, frame, ltable);
-
- if (fdentries != NULL) {
- ret = do_fd_cleanup (this, conn, frame, fdentries, fd_count);
- }
-
- state = CALL_STATE (frame);
- if (state)
- GF_FREE (state);
-
- STACK_DESTROY (frame->root);
-
- if (saved_ret || ret) {
- ret = -1;
- }
-
-out:
- return ret;
-}
-
-
-int
-gf_server_connection_cleanup (xlator_t *this, server_connection_t *conn)
-{
- char do_cleanup = 0;
- struct _lock_table *ltable = NULL;
- fdentry_t *fdentries = NULL;
- uint32_t fd_count = 0;
- int ret = 0;
-
- if (conn == NULL) {
- goto out;
- }
-
- pthread_mutex_lock (&conn->lock);
- {
- conn->active_transports--;
- if (conn->active_transports == 0) {
- if (conn->ltable) {
- ltable = conn->ltable;
- conn->ltable = gf_lock_table_new ();
- }
-
- if (conn->fdtable) {
- fdentries = gf_fd_fdtable_get_all_fds (conn->fdtable,
- &fd_count);
- }
- do_cleanup = 1;
- }
- }
- pthread_mutex_unlock (&conn->lock);
-
- if (do_cleanup && conn->bound_xl)
- ret = do_connection_cleanup (this, conn, ltable, fdentries, fd_count);
-
-out:
- return ret;
-}
-
-
-static int
-server_connection_destroy (xlator_t *this, server_connection_t *conn)
-{
- call_frame_t *frame = NULL, *tmp_frame = NULL;
- xlator_t *bound_xl = NULL;
- int32_t ret = -1;
- server_state_t *state = NULL;
- struct list_head file_lockers;
- struct list_head dir_lockers;
- struct _lock_table *ltable = NULL;
- struct _locker *locker = NULL, *tmp = NULL;
- struct flock flock = {0,};
- fd_t *fd = NULL;
- int32_t i = 0;
- fdentry_t *fdentries = NULL;
- uint32_t fd_count = 0;
-
- if (conn == NULL) {
- ret = 0;
- goto out;
- }
-
- bound_xl = (xlator_t *) (conn->bound_xl);
-
- if (bound_xl) {
- /* trans will have ref_count = 1 after this call, but its
- ok since this function is called in
- GF_EVENT_TRANSPORT_CLEANUP */
- frame = create_frame (this, this->ctx->pool);
-
- pthread_mutex_lock (&(conn->lock));
- {
- if (conn->ltable) {
- ltable = conn->ltable;
- conn->ltable = NULL;
- }
- }
- pthread_mutex_unlock (&conn->lock);
-
- INIT_LIST_HEAD (&file_lockers);
- INIT_LIST_HEAD (&dir_lockers);
-
- if (ltable) {
- LOCK (&ltable->lock);
- {
- list_splice_init (&ltable->file_lockers,
- &file_lockers);
-
- list_splice_init (&ltable->dir_lockers, &dir_lockers);
- }
- UNLOCK (&ltable->lock);
- GF_FREE (ltable);
- }
-
- flock.l_type = F_UNLCK;
- flock.l_start = 0;
- flock.l_len = 0;
- list_for_each_entry_safe (locker,
- tmp, &file_lockers, lockers) {
- tmp_frame = copy_frame (frame);
- /*
- pid = 0 is a special case that tells posix-locks
- to release all locks from this transport
- */
- tmp_frame->root->pid = 0;
- tmp_frame->root->trans = conn;
-
- if (locker->fd) {
- STACK_WIND (tmp_frame, gf_server_nop_cbk,
- bound_xl,
- bound_xl->fops->finodelk,
- locker->volume,
- locker->fd, F_SETLK, &flock);
- fd_unref (locker->fd);
- } else {
- STACK_WIND (tmp_frame, gf_server_nop_cbk,
- bound_xl,
- bound_xl->fops->inodelk,
- locker->volume,
- &(locker->loc), F_SETLK, &flock);
- loc_wipe (&locker->loc);
- }
-
- GF_FREE (locker->volume);
-
- list_del_init (&locker->lockers);
- GF_FREE (locker);
- }
-
- tmp = NULL;
- locker = NULL;
- list_for_each_entry_safe (locker, tmp, &dir_lockers, lockers) {
- tmp_frame = copy_frame (frame);
-
- tmp_frame->root->pid = 0;
- tmp_frame->root->trans = conn;
-
- if (locker->fd) {
- STACK_WIND (tmp_frame, gf_server_nop_cbk,
- bound_xl,
- bound_xl->fops->fentrylk,
- locker->volume,
- locker->fd, NULL,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
- fd_unref (locker->fd);
- } else {
- STACK_WIND (tmp_frame, gf_server_nop_cbk,
- bound_xl,
- bound_xl->fops->entrylk,
- locker->volume,
- &(locker->loc), NULL,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
- loc_wipe (&locker->loc);
- }
-
- GF_FREE (locker->volume);
-
-
- list_del_init (&locker->lockers);
- GF_FREE (locker);
- }
-
- pthread_mutex_lock (&(conn->lock));
- {
- if (conn->fdtable) {
- fdentries = gf_fd_fdtable_get_all_fds (conn->fdtable,
- &fd_count);
- gf_fd_fdtable_destroy (conn->fdtable);
- conn->fdtable = NULL;
- }
- }
- pthread_mutex_unlock (&conn->lock);
-
- if (fdentries != NULL) {
- for (i = 0; i < fd_count; i++) {
- fd = fdentries[i].fd;
- if (fd != NULL) {
- tmp_frame = copy_frame (frame);
- tmp_frame->local = fd;
-
- STACK_WIND (tmp_frame,
- server_connection_cleanup_flush_cbk,
- bound_xl,
- bound_xl->fops->flush,
- fd);
- }
- }
- GF_FREE (fdentries);
- }
- }
-
- if (frame) {
- state = CALL_STATE (frame);
- if (state)
- GF_FREE (state);
- STACK_DESTROY (frame->root);
- }
-
- gf_log (this->name, GF_LOG_INFO, "destroyed connection of %s",
- conn->id);
-
- GF_FREE (conn->id);
- GF_FREE (conn);
-
-out:
- return ret;
-}
-
-
-server_connection_t *
-gf_server_connection_get (xlator_t *this, const char *id)
-{
- server_connection_t *conn = NULL;
- server_connection_t *trav = NULL;
- server_conf_t *conf = NULL;
-
- conf = this->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- list_for_each_entry (trav, &conf->conns, list) {
- if (!strcmp (id, trav->id)) {
- conn = trav;
- break;
- }
- }
-
- if (!conn) {
- conn = (void *) GF_CALLOC (1, sizeof (*conn),
- gf_server_mt_server_connection_t);
-
- conn->id = gf_strdup (id);
- conn->fdtable = gf_fd_fdtable_alloc ();
- conn->ltable = gf_lock_table_new ();
-
- pthread_mutex_init (&conn->lock, NULL);
-
- list_add (&conn->list, &conf->conns);
- }
-
- conn->ref++;
- conn->active_transports++;
- }
- pthread_mutex_unlock (&conf->mutex);
-
- return conn;
-}
-
-
-void
-gf_server_connection_put (xlator_t *this, server_connection_t *conn)
-{
- server_conf_t *conf = NULL;
- server_connection_t *todel = NULL;
-
- if (conn == NULL) {
- goto out;
- }
-
- conf = this->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- conn->ref--;
-
- if (!conn->ref) {
- list_del_init (&conn->list);
- todel = conn;
- }
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (todel) {
- server_connection_destroy (this, todel);
- }
-
-out:
- return;
-}
diff --git a/xlators/protocol/legacy/server/src/server-helpers.h b/xlators/protocol/legacy/server/src/server-helpers.h
deleted file mode 100644
index 137b2e9c2c3..00000000000
--- a/xlators/protocol/legacy/server/src/server-helpers.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef __SERVER_HELPERS_H__
-#define __SERVER_HELPERS_H__
-
-#define CALL_STATE(frame) ((server_state_t *)frame->root->state)
-
-#define BOUND_XL(frame) ((xlator_t *) CALL_STATE(frame)->bound_xl)
-
-#define TRANSPORT_FROM_FRAME(frame) ((transport_t *) CALL_STATE(frame)->trans)
-
-#define SERVER_CONNECTION(frame) \
- ((server_connection_t *) TRANSPORT_FROM_FRAME(frame)->xl_private)
-
-#define SERVER_CONF(frame) \
- ((server_conf_t *)TRANSPORT_FROM_FRAME(frame)->xl->private)
-
-#define TRANSPORT_FROM_XLATOR(this) ((((server_conf_t *)this->private))->trans)
-
-#define INODE_LRU_LIMIT(this) \
- (((server_conf_t *)(this->private))->inode_lru_limit)
-
-#define IS_ROOT_INODE(inode) (inode == inode->table->root)
-
-#define IS_NOT_ROOT(pathlen) ((pathlen > 2)? 1 : 0)
-
-void free_state (server_state_t *state);
-
-void server_loc_wipe (loc_t *loc);
-
-#endif /* __SERVER_HELPERS_H__ */
diff --git a/xlators/protocol/legacy/server/src/server-mem-types.h b/xlators/protocol/legacy/server/src/server-mem-types.h
deleted file mode 100644
index 86877d79dac..00000000000
--- a/xlators/protocol/legacy/server/src/server-mem-types.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef __AFR_MEM_TYPES_H__
-#define __AFR_MEM_TYPES_H__
-
-#include "mem-types.h"
-
-enum gf_server_mem_types_ {
- gf_server_mt_dir_entry_t = gf_common_mt_end + 1,
- gf_server_mt_volfile_ctx,
- gf_server_mt_server_state_t,
- gf_server_mt_server_conf_t,
- gf_server_mt_locker,
- gf_server_mt_lock_table,
- gf_server_mt_char,
- gf_server_mt_server_connection_t,
- gf_server_mt_resolve_comp,
- gf_server_mt_end
-};
-#endif
-
diff --git a/xlators/protocol/legacy/server/src/server-protocol.c b/xlators/protocol/legacy/server/src/server-protocol.c
deleted file mode 100644
index da0303019a3..00000000000
--- a/xlators/protocol/legacy/server/src/server-protocol.c
+++ /dev/null
@@ -1,6629 +0,0 @@
-/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is GF_FREE software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-#include <time.h>
-#include <sys/uio.h>
-#include <sys/resource.h>
-
-#include <libgen.h>
-
-#include "transport.h"
-#include "fnmatch.h"
-#include "xlator.h"
-#include "protocol.h"
-#include "server-protocol.h"
-#include "server-helpers.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "list.h"
-#include "dict.h"
-#include "compat.h"
-#include "compat-errno.h"
-#include "statedump.h"
-#include "md5.h"
-
-
-static void
-print_caller (char *str, int size, call_frame_t *frame)
-{
- int filled = 0;
- server_state_t *state = NULL;
- transport_t *trans = NULL;
-
- state = CALL_STATE (frame);
- trans = state->trans;
-
- filled += snprintf (str + filled, size - filled,
- " Callid=%"PRId64", Client=%s",
- frame->root->unique,
- trans->peerinfo.identifier);
-
- return;
-}
-
-
-static void
-server_print_resolve (char *str, int size, server_resolve_t *resolve)
-{
- int filled = 0;
-
- if (!resolve) {
- snprintf (str, size, "<nul>");
- return;
- }
-
- filled += snprintf (str + filled, size - filled,
- " Resolve={");
- if (resolve->fd_no != -1)
- filled += snprintf (str + filled, size - filled,
- "fd=%"PRId64",", (uint64_t) resolve->fd_no);
- if (resolve->ino)
- filled += snprintf (str + filled, size - filled,
- "ino=%"PRIu64",", (uint64_t) resolve->ino);
- if (resolve->par)
- filled += snprintf (str + filled, size - filled,
- "par=%"PRIu64",", (uint64_t) resolve->par);
- if (resolve->gen)
- filled += snprintf (str + filled, size - filled,
- "gen=%"PRIu64",", (uint64_t) resolve->gen);
- if (resolve->bname)
- filled += snprintf (str + filled, size - filled,
- "bname=%s,", resolve->bname);
- if (resolve->path)
- filled += snprintf (str + filled, size - filled,
- "path=%s", resolve->path);
-
- filled += snprintf (str + filled, size - filled, "}");
-}
-
-
-static void
-server_print_loc (char *str, int size, loc_t *loc)
-{
- int filled = 0;
-
- if (!loc) {
- snprintf (str, size, "<nul>");
- return;
- }
-
- filled += snprintf (str + filled, size - filled,
- " Loc={");
-
- if (loc->path)
- filled += snprintf (str + filled, size - filled,
- "path=%s,", loc->path);
- if (loc->inode)
- filled += snprintf (str + filled, size - filled,
- "inode=%p,", loc->inode);
- if (loc->parent)
- filled += snprintf (str + filled, size - filled,
- "parent=%p", loc->parent);
-
- filled += snprintf (str + filled, size - filled, "}");
-}
-
-
-static void
-server_print_params (char *str, int size, server_state_t *state)
-{
- int filled = 0;
-
- filled += snprintf (str + filled, size - filled,
- " Params={");
-
- if (state->fd)
- filled += snprintf (str + filled, size - filled,
- "fd=%p,", state->fd);
- if (state->valid)
- filled += snprintf (str + filled, size - filled,
- "valid=%d,", state->valid);
- if (state->flags)
- filled += snprintf (str + filled, size - filled,
- "flags=%d,", state->flags);
- if (state->wbflags)
- filled += snprintf (str + filled, size - filled,
- "wbflags=%d,", state->wbflags);
- if (state->size)
- filled += snprintf (str + filled, size - filled,
- "size=%zu,", state->size);
- if (state->offset)
- filled += snprintf (str + filled, size - filled,
- "offset=%"PRId64",", state->offset);
- if (state->cmd)
- filled += snprintf (str + filled, size - filled,
- "cmd=%d,", state->cmd);
- if (state->type)
- filled += snprintf (str + filled, size - filled,
- "type=%d,", state->type);
- if (state->name)
- filled += snprintf (str + filled, size - filled,
- "name=%s,", state->name);
- if (state->mask)
- filled += snprintf (str + filled, size - filled,
- "mask=%d,", state->mask);
- if (state->volume)
- filled += snprintf (str + filled, size - filled,
- "volume=%s,", state->volume);
-
- filled += snprintf (str + filled, size - filled,
- "bound_xl=%s}", state->bound_xl->name);
-}
-
-
-static int
-server_resolve_is_empty (server_resolve_t *resolve)
-{
- if (resolve->fd_no != -1)
- return 0;
-
- if (resolve->ino != 0)
- return 0;
-
- if (resolve->gen != 0)
- return 0;
-
- if (resolve->par != 0)
- return 0;
-
- if (resolve->path != 0)
- return 0;
-
- if (resolve->bname != 0)
- return 0;
-
- return 1;
-}
-
-void
-gf_server_print_request (call_frame_t *frame)
-{
- server_conf_t *conf = NULL;
- xlator_t *this = NULL;
- server_state_t *state = NULL;
- char resolve_vars[256];
- char resolve2_vars[256];
- char loc_vars[256];
- char loc2_vars[256];
- char other_vars[512];
- char caller[512];
- char *op = "UNKNOWN";
-
- this = frame->this;
- conf = this->private;
-
- state = CALL_STATE (frame);
-
- if (!conf->trace)
- return;
-
- memset (resolve_vars, '\0', 256);
- memset (resolve2_vars, '\0', 256);
- memset (loc_vars, '\0', 256);
- memset (loc2_vars, '\0', 256);
- memset (other_vars, '\0', 256);
-
- print_caller (caller, 256, frame);
-
- if (!server_resolve_is_empty (&state->resolve)) {
- server_print_resolve (resolve_vars, 256, &state->resolve);
- server_print_loc (loc_vars, 256, &state->loc);
- }
-
- if (!server_resolve_is_empty (&state->resolve2)) {
- server_print_resolve (resolve2_vars, 256, &state->resolve2);
- server_print_loc (loc2_vars, 256, &state->loc2);
- }
-
- server_print_params (other_vars, 512, state);
-
- switch (frame->root->type) {
- case GF_OP_TYPE_FOP_REQUEST:
- case GF_OP_TYPE_FOP_REPLY:
- op = gf_fop_list[frame->root->op];
- break;
- case GF_OP_TYPE_MOP_REQUEST:
- case GF_OP_TYPE_MOP_REPLY:
- op = gf_mop_list[frame->root->op];
- break;
- case GF_OP_TYPE_CBK_REQUEST:
- case GF_OP_TYPE_CBK_REPLY:
- op = gf_cbk_list[frame->root->op];
- break;
- }
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%s%s%s%s%s%s%s",
- gf_fop_list[frame->root->op], caller,
- resolve_vars, loc_vars, resolve2_vars, loc2_vars, other_vars);
-}
-
-
-static void
-server_print_reply (call_frame_t *frame, int op_ret, int op_errno)
-{
- server_conf_t *conf = NULL;
- server_state_t *state = NULL;
- xlator_t *this = NULL;
- char caller[512];
- char fdstr[32];
- char *op = "UNKNOWN";
-
- this = frame->this;
- conf = this->private;
-
- if (!conf->trace)
- return;
-
- state = CALL_STATE (frame);
-
- print_caller (caller, 256, frame);
-
- switch (frame->root->type) {
- case GF_OP_TYPE_FOP_REQUEST:
- case GF_OP_TYPE_FOP_REPLY:
- op = gf_fop_list[frame->root->op];
- break;
- case GF_OP_TYPE_MOP_REQUEST:
- case GF_OP_TYPE_MOP_REPLY:
- op = gf_mop_list[frame->root->op];
- break;
- case GF_OP_TYPE_CBK_REQUEST:
- case GF_OP_TYPE_CBK_REPLY:
- op = gf_cbk_list[frame->root->op];
- break;
- }
-
- fdstr[0] = '\0';
- if (state->fd)
- snprintf (fdstr, 32, " fd=%p", state->fd);
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%s%s => (%d, %d)%s",
- op, caller, op_ret, op_errno, fdstr);
-}
-
-
-
-static void
-protocol_server_reply (call_frame_t *frame, int type, int op,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iovec *vector, int count,
- struct iobref *iobref)
-{
- server_state_t *state = NULL;
- xlator_t *bound_xl = NULL;
- transport_t *trans = NULL;
- int ret = 0;
-
- xlator_t *this = NULL;
-
- bound_xl = BOUND_XL (frame);
- state = CALL_STATE (frame);
- trans = state->trans;
- this = frame->this;
-
- hdr->callid = hton64 (frame->root->unique);
- hdr->type = hton32 (type);
- hdr->op = hton32 (op);
-
- server_print_reply (frame, ntoh32 (hdr->rsp.op_ret),
- gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)));
-
- ret = transport_submit (trans, (char *)hdr, hdrlen, vector,
- count, iobref);
- if (ret < 0) {
- gf_log ("protocol/server", GF_LOG_ERROR,
- "frame %"PRId64": failed to submit. op= %d, type= %d",
- frame->root->unique, op, type);
- }
-
- STACK_DESTROY (frame->root);
-
- if (state)
- free_state (state);
-
-}
-
-
-static int
-gf_add_locker (struct _lock_table *table, const char *volume,
- loc_t *loc, fd_t *fd, pid_t pid)
-{
- int32_t ret = -1;
- struct _locker *new = NULL;
- uint8_t dir = 0;
-
- new = GF_CALLOC (1, sizeof (struct _locker),
- gf_server_mt_locker);
- if (new == NULL) {
- gf_log ("server", GF_LOG_ERROR,
- "failed to allocate memory for \'struct _locker\'");
- goto out;
- }
- INIT_LIST_HEAD (&new->lockers);
-
- new->volume = gf_strdup (volume);
-
- if (fd == NULL) {
- loc_copy (&new->loc, loc);
- dir = IA_ISDIR (new->loc.inode->ia_type);
- } else {
- new->fd = fd_ref (fd);
- dir = IA_ISDIR (fd->inode->ia_type);
- }
-
- new->pid = pid;
-
- LOCK (&table->lock);
- {
- if (dir)
- list_add_tail (&new->lockers, &table->dir_lockers);
- else
- list_add_tail (&new->lockers, &table->file_lockers);
- }
- UNLOCK (&table->lock);
-out:
- return ret;
-}
-
-
-static int
-gf_del_locker (struct _lock_table *table, const char *volume,
- loc_t *loc, fd_t *fd, pid_t pid)
-{
- struct _locker *locker = NULL;
- struct _locker *tmp = NULL;
- int32_t ret = 0;
- uint8_t dir = 0;
- struct list_head *head = NULL;
- struct list_head del;
-
- INIT_LIST_HEAD (&del);
-
- if (fd) {
- dir = IA_ISDIR (fd->inode->ia_type);
- } else {
- dir = IA_ISDIR (loc->inode->ia_type);
- }
-
- LOCK (&table->lock);
- {
- if (dir) {
- head = &table->dir_lockers;
- } else {
- head = &table->file_lockers;
- }
-
- list_for_each_entry_safe (locker, tmp, head, lockers) {
- if (locker->fd && fd &&
- (locker->fd == fd) && (locker->pid == pid)
- && !strcmp (locker->volume, volume)) {
- list_move_tail (&locker->lockers, &del);
- } else if (locker->loc.inode &&
- loc &&
- (locker->loc.inode == loc->inode) &&
- (locker->pid == pid)
- && !strcmp (locker->volume, volume)) {
- list_move_tail (&locker->lockers, &del);
- }
- }
- }
- UNLOCK (&table->lock);
-
- tmp = NULL;
- locker = NULL;
-
- list_for_each_entry_safe (locker, tmp, &del, lockers) {
- list_del_init (&locker->lockers);
- if (locker->fd)
- fd_unref (locker->fd);
- else
- loc_wipe (&locker->loc);
-
- GF_FREE (locker->volume);
- GF_FREE (locker);
- }
-
- return ret;
-}
-
-
-/*
- * server_lk_cbk - lk callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- * @lock:
- *
- * not for external reference
- */
-static int
-server_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct flock *lock)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_lk_rsp_t *rsp = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
- server_state_t *state = NULL;
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (op_ret == 0) {
- gf_flock_from_flock (&rsp->flock, lock);
- } else if (op_errno != ENOSYS) {
- state = CALL_STATE(frame);
-
- gf_log (this->name, GF_LOG_TRACE,
- "%"PRId64": LK %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->resolve.fd_no,
- state->fd ? state->fd->inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_LK,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-static int
-server_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- server_connection_t *conn = NULL;
- gf_hdr_common_t *hdr = NULL;
- gf_fop_inodelk_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- conn = SERVER_CONNECTION(frame);
-
- state = CALL_STATE(frame);
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (op_ret >= 0) {
- if (state->flock.l_type == F_UNLCK)
- gf_del_locker (conn->ltable, state->volume,
- &state->loc, NULL, frame->root->pid);
- else
- gf_add_locker (conn->ltable, state->volume,
- &state->loc, NULL, frame->root->pid);
- } else if (op_errno != ENOSYS) {
- gf_log (this->name, GF_LOG_TRACE,
- "%"PRId64": INODELK %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_INODELK,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-static int
-server_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- server_connection_t *conn = NULL;
- gf_hdr_common_t *hdr = NULL;
- gf_fop_finodelk_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- conn = SERVER_CONNECTION(frame);
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- state = CALL_STATE(frame);
-
- if (op_ret >= 0) {
- if (state->flock.l_type == F_UNLCK)
- gf_del_locker (conn->ltable, state->volume,
- NULL, state->fd,
- frame->root->pid);
- else
- gf_add_locker (conn->ltable, state->volume,
- NULL, state->fd,
- frame->root->pid);
- } else if (op_errno != ENOSYS) {
- gf_log (this->name, GF_LOG_TRACE,
- "%"PRId64": FINODELK %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->resolve.fd_no,
- state->fd ? state->fd->inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_FINODELK,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-/*
- * server_entrylk_cbk -
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- * @lock:
- *
- * not for external reference
- */
-static int
-server_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- server_connection_t *conn = NULL;
- gf_hdr_common_t *hdr = NULL;
- gf_fop_entrylk_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- conn = SERVER_CONNECTION(frame);
-
- state = CALL_STATE(frame);
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (op_ret >= 0) {
- if (state->cmd == ENTRYLK_UNLOCK)
- gf_del_locker (conn->ltable, state->volume,
- &state->loc, NULL, frame->root->pid);
- else
- gf_add_locker (conn->ltable, state->volume,
- &state->loc, NULL, frame->root->pid);
- } else if (op_errno != ENOSYS) {
- gf_log (this->name, GF_LOG_TRACE,
- "%"PRId64": INODELK %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_ENTRYLK,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-static int
-server_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- server_connection_t *conn = NULL;
- gf_hdr_common_t *hdr = NULL;
- gf_fop_fentrylk_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- conn = SERVER_CONNECTION(frame);
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- state = CALL_STATE(frame);
- if (op_ret >= 0) {
- if (state->cmd == ENTRYLK_UNLOCK)
- gf_del_locker (conn->ltable, state->volume,
- NULL, state->fd, frame->root->pid);
- else
- gf_add_locker (conn->ltable, state->volume,
- NULL, state->fd, frame->root->pid);
- } else if (op_errno != ENOSYS) {
- gf_log (this->name, GF_LOG_TRACE,
- "%"PRId64": FENTRYLK %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->resolve.fd_no,
- state->fd ? state->fd->inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_FENTRYLK,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-/*
- * server_access_cbk - access callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- *
- * not for external reference
- */
-static int
-server_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_access_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- state = CALL_STATE(frame);
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_ACCESS,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/*
- * server_rmdir_cbk - rmdir callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- *
- * not for external reference
- */
-static int
-server_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_rmdir_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- int32_t gf_errno = 0;
- size_t hdrlen = 0;
- inode_t *parent = NULL;
-
- state = CALL_STATE(frame);
-
- if (op_ret == 0) {
- inode_unlink (state->loc.inode, state->loc.parent,
- state->loc.name);
- parent = inode_parent (state->loc.inode, 0, NULL);
- if (parent)
- inode_unref (parent);
- else
- inode_forget (state->loc.inode, 0);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "%"PRId64": RMDIR %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0,
- op_ret, strerror (op_errno));
- }
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (op_ret == 0) {
- gf_stat_from_iatt (&rsp->preparent, preparent);
- gf_stat_from_iatt (&rsp->postparent, postparent);
- }
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_RMDIR,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/*
- * server_mkdir_cbk - mkdir callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- * @stbuf:
- *
- * not for external reference
- */
-static int
-server_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_mkdir_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
- inode_t *link_inode = NULL;
-
- state = CALL_STATE(frame);
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (op_ret >= 0) {
- gf_stat_from_iatt (&rsp->stat, stbuf);
- gf_stat_from_iatt (&rsp->preparent, preparent);
- gf_stat_from_iatt (&rsp->postparent, postparent);
-
- link_inode = inode_link (inode, state->loc.parent,
- state->loc.name, stbuf);
- inode_lookup (link_inode);
- inode_unref (link_inode);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "%"PRId64": MKDIR %s ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- op_ret, strerror (op_errno));
- }
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_MKDIR,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/*
- * server_mknod_cbk - mknod callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- * @stbuf:
- *
- * not for external reference
- */
-static int
-server_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_mknod_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- int32_t gf_errno = 0;
- size_t hdrlen = 0;
- inode_t *link_inode = NULL;
-
- state = CALL_STATE(frame);
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (op_ret >= 0) {
- gf_stat_from_iatt (&rsp->stat, stbuf);
- gf_stat_from_iatt (&rsp->preparent, preparent);
- gf_stat_from_iatt (&rsp->postparent, postparent);
-
- link_inode = inode_link (inode, state->loc.parent,
- state->loc.name, stbuf);
- inode_lookup (link_inode);
- inode_unref (link_inode);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "%"PRId64": MKNOD %s ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- op_ret, strerror (op_errno));
- }
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_MKNOD,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/*
- * server_fsyncdir_cbk - fsyncdir callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- *
- * not for external reference
- */
-static int
-server_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_fsyncdir_rsp_t *rsp = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
- server_state_t *state = NULL;
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- if (op_ret < 0) {
- state = CALL_STATE(frame);
-
- gf_log (this->name, GF_LOG_TRACE,
- "%"PRId64": FSYNCDIR %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->resolve.fd_no,
- state->fd ? state->fd->inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_FSYNCDIR,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-
-/*
- * server_readdir_cbk - getdents callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- *
- * not for external reference
- */
-static int
-server_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_readdir_rsp_t *rsp = NULL;
- size_t hdrlen = 0;
- size_t buf_size = 0;
- int32_t gf_errno = 0;
- server_state_t *state = NULL;
-
- if (op_ret > 0)
- buf_size = gf_dirent_serialize (entries, NULL, 0);
-
- hdrlen = gf_hdr_len (rsp, buf_size);
- hdr = gf_hdr_new (rsp, buf_size);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (op_ret > 0) {
- rsp->size = hton32 (buf_size);
- gf_dirent_serialize (entries, rsp->buf, buf_size);
- } else {
- state = CALL_STATE(frame);
-
- gf_log (this->name, GF_LOG_TRACE,
- "%"PRId64": READDIR %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->resolve.fd_no,
- state->fd ? state->fd->inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_READDIR,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-/*
- * server_releasedir_cbk - releasedir callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret: return value
- * @op_errno: errno
- *
- * not for external reference
- */
-static int
-server_releasedir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_cbk_releasedir_rsp_t *rsp = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- protocol_server_reply (frame, GF_OP_TYPE_CBK_REPLY, GF_CBK_RELEASEDIR,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-/*
- * server_opendir_cbk - opendir callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret: return value
- * @op_errno: errno
- * @fd: file descriptor structure of opened directory
- *
- * not for external reference
- */
-static int
-server_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
-{
- server_connection_t *conn = NULL;
- gf_hdr_common_t *hdr = NULL;
- gf_fop_opendir_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
- uint64_t fd_no = 0;
-
- conn = SERVER_CONNECTION (frame);
-
- state = CALL_STATE (frame);
-
- if (op_ret >= 0) {
- fd_bind (fd);
-
- fd_no = gf_fd_unused_get (conn->fdtable, fd);
- fd_ref (fd); // on behalf of the client
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "%"PRId64": OPENDIR %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0,
- op_ret, strerror (op_errno));
- }
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
- rsp->fd = hton64 (fd_no);
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_OPENDIR,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/*
- * server_statfs_cbk - statfs callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret: return value
- * @op_errno: errno
- * @buf:
- *
- * not for external reference
- */
-static int
-server_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_statfs_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- state = CALL_STATE (frame);
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (op_ret >= 0) {
- gf_statfs_from_statfs (&rsp->statfs, buf);
- }
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_STATFS,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/*
- * server_removexattr_cbk - removexattr callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret: return value
- * @op_errno: errno
- *
- * not for external reference
- */
-static int
-server_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_removexattr_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- state = CALL_STATE (frame);
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_REMOVEXATTR,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/*
- * server_getxattr_cbk - getxattr callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret: return value
- * @op_errno: errno
- * @value:
- *
- * not for external reference
- */
-static int
-server_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_getxattr_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t len = 0;
- int32_t gf_errno = 0;
- int32_t ret = -1;
-
- state = CALL_STATE (frame);
-
- if (op_ret >= 0) {
- len = dict_serialized_length (dict);
- if (len < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s (%"PRId64"): failed to get serialized length of "
- "reply dict",
- state->loc.path, state->resolve.ino);
- op_ret = -1;
- op_errno = EINVAL;
- len = 0;
- }
- }
-
- hdrlen = gf_hdr_len (rsp, len + 1);
- hdr = gf_hdr_new (rsp, len + 1);
- rsp = gf_param (hdr);
-
- if (op_ret >= 0) {
- ret = dict_serialize (dict, rsp->dict);
- if (len < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s (%"PRId64"): failed to serialize reply dict",
- state->loc.path, state->resolve.ino);
- op_ret = -1;
- op_errno = -ret;
- }
- }
- rsp->dict_len = hton32 (len);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_GETXATTR,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-static int
-server_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_fgetxattr_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t len = 0;
- int32_t gf_errno = 0;
- int32_t ret = -1;
-
- state = CALL_STATE (frame);
-
- if (op_ret >= 0) {
- len = dict_serialized_length (dict);
- if (len < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s (%"PRId64"): failed to get serialized length of "
- "reply dict",
- state->loc.path, state->resolve.ino);
- op_ret = -1;
- op_errno = EINVAL;
- len = 0;
- }
- }
-
- hdrlen = gf_hdr_len (rsp, len + 1);
- hdr = gf_hdr_new (rsp, len + 1);
- rsp = gf_param (hdr);
-
- if (op_ret >= 0) {
- ret = dict_serialize (dict, rsp->dict);
- if (len < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s (%"PRId64"): failed to serialize reply dict",
- state->loc.path, state->resolve.ino);
- op_ret = -1;
- op_errno = -ret;
- }
- }
- rsp->dict_len = hton32 (len);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_FGETXATTR,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-/*
- * server_setxattr_cbk - setxattr callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret: return value
- * @op_errno: errno
- *
- * not for external reference
- */
-static int
-server_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_setxattr_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- state = CALL_STATE (frame);
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_SETXATTR,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-static int
-server_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_fsetxattr_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- state = CALL_STATE(frame);
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_FSETXATTR,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-/*
- * server_rename_cbk - rename callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret: return value
- * @op_errno: errno
- *
- * not for external reference
- */
-static int
-server_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_rename_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- state = CALL_STATE(frame);
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (op_ret == 0) {
- stbuf->ia_ino = state->loc.inode->ino;
- stbuf->ia_type = state->loc.inode->ia_type;
-
- gf_log (state->bound_xl->name, GF_LOG_TRACE,
- "%"PRId64": RENAME_CBK (%"PRId64") %"PRId64"/%s "
- "==> %"PRId64"/%s",
- frame->root->unique, state->loc.inode->ino,
- state->loc.parent->ino, state->loc.name,
- state->loc2.parent->ino, state->loc2.name);
-
- inode_rename (state->itable,
- state->loc.parent, state->loc.name,
- state->loc2.parent, state->loc2.name,
- state->loc.inode, stbuf);
- gf_stat_from_iatt (&rsp->stat, stbuf);
-
- gf_stat_from_iatt (&rsp->preoldparent, preoldparent);
- gf_stat_from_iatt (&rsp->postoldparent, postoldparent);
-
- gf_stat_from_iatt (&rsp->prenewparent, prenewparent);
- gf_stat_from_iatt (&rsp->postnewparent, postnewparent);
- }
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_RENAME,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-/*
- * server_unlink_cbk - unlink callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret: return value
- * @op_errno: errno
- *
- * not for external reference
- */
-static int
-server_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_unlink_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
- inode_t *parent = NULL;
-
- state = CALL_STATE(frame);
-
- if (op_ret == 0) {
- gf_log (state->bound_xl->name, GF_LOG_TRACE,
- "%"PRId64": UNLINK_CBK %"PRId64"/%s (%"PRId64")",
- frame->root->unique, state->loc.parent->ino,
- state->loc.name, state->loc.inode->ino);
-
- inode_unlink (state->loc.inode, state->loc.parent,
- state->loc.name);
-
- parent = inode_parent (state->loc.inode, 0, NULL);
- if (parent)
- inode_unref (parent);
- else
- inode_forget (state->loc.inode, 0);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": UNLINK %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0,
- op_ret, strerror (op_errno));
- }
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (op_ret == 0) {
- gf_stat_from_iatt (&rsp->preparent, preparent);
- gf_stat_from_iatt (&rsp->postparent, postparent);
- }
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_UNLINK,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/*
- * server_symlink_cbk - symlink callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret: return value
- * @op_errno: errno
- *
- * not for external reference
- */
-static int
-server_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_symlink_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
- inode_t *link_inode = NULL;
-
- state = CALL_STATE(frame);
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno_to_error (op_errno));
-
- if (op_ret >= 0) {
- gf_stat_from_iatt (&rsp->stat, stbuf);
- gf_stat_from_iatt (&rsp->preparent, preparent);
- gf_stat_from_iatt (&rsp->postparent, postparent);
-
- link_inode = inode_link (inode, state->loc.parent,
- state->loc.name, stbuf);
- inode_lookup (link_inode);
- inode_unref (link_inode);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": SYMLINK %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0,
- op_ret, strerror (op_errno));
- }
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_SYMLINK,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-/*
- * server_link_cbk - link callback for server protocol
- * @frame: call frame
- * @this:
- * @op_ret:
- * @op_errno:
- * @stbuf:
- *
- * not for external reference
- */
-static int
-server_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_link_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- int32_t gf_errno = 0;
- size_t hdrlen = 0;
- inode_t *link_inode = NULL;
-
- state = CALL_STATE(frame);
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (op_ret == 0) {
- stbuf->ia_ino = state->loc.inode->ino;
-
- gf_stat_from_iatt (&rsp->stat, stbuf);
- gf_stat_from_iatt (&rsp->preparent, preparent);
- gf_stat_from_iatt (&rsp->postparent, postparent);
-
- gf_log (state->bound_xl->name, GF_LOG_TRACE,
- "%"PRId64": LINK (%"PRId64") %"PRId64"/%s ==> %"PRId64"/%s",
- frame->root->unique, inode->ino,
- state->loc2.parent->ino,
- state->loc2.name, state->loc.parent->ino,
- state->loc.name);
-
- link_inode = inode_link (inode, state->loc2.parent,
- state->loc2.name, stbuf);
- inode_unref (link_inode);
- } else {
- gf_log (state->bound_xl->name, GF_LOG_DEBUG,
- "%"PRId64": LINK (%"PRId64") %"PRId64"/%s ==> %"PRId64"/%s "
- " ==> %"PRId32" (%s)",
- frame->root->unique, state->resolve2.ino,
- state->resolve2.par,
- state->resolve2.bname, state->resolve.par,
- state->resolve.bname,
- op_ret, strerror (op_errno));
- }
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_LINK,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-/*
- * server_truncate_cbk - truncate callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- * @stbuf:
- *
- * not for external reference
- */
-static int
-server_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_truncate_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- state = CALL_STATE (frame);
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (op_ret == 0) {
- gf_stat_from_iatt (&rsp->prestat, prebuf);
- gf_stat_from_iatt (&rsp->poststat, postbuf);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": TRUNCATE %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0,
- op_ret, strerror (op_errno));
- }
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_TRUNCATE,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/*
- * server_fstat_cbk - fstat callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- * @stbuf:
- *
- * not for external reference
- */
-static int
-server_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *stbuf)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_fstat_rsp_t *rsp = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
- server_state_t *state = NULL;
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (op_ret == 0) {
- gf_stat_from_iatt (&rsp->stat, stbuf);
- } else {
- state = CALL_STATE(frame);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": FSTAT %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->resolve.fd_no,
- state->fd ? state->fd->inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_FSTAT,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/*
- * server_ftruncate_cbk - ftruncate callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- * @stbuf:
- *
- * not for external reference
- */
-static int
-server_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_ftruncate_rsp_t *rsp = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
- server_state_t *state = NULL;
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (op_ret == 0) {
- gf_stat_from_iatt (&rsp->prestat, prebuf);
- gf_stat_from_iatt (&rsp->poststat, postbuf);
- } else {
- state = CALL_STATE (frame);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": FTRUNCATE %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->resolve.fd_no,
- state->fd ? state->fd->inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_FTRUNCATE,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-/*
- * server_flush_cbk - flush callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- *
- * not for external reference
- */
-static int
-server_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_flush_rsp_t *rsp = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
- server_state_t *state = NULL;
-
- if (op_ret < 0) {
- state = CALL_STATE(frame);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": FLUSH %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->resolve.fd_no,
- state->fd ? state->fd->inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_FLUSH,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/*
- * server_fsync_cbk - fsync callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- *
- * not for external reference
- */
-static int
-server_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_fsync_rsp_t *rsp = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
- server_state_t *state = NULL;
-
- if (op_ret < 0) {
- state = CALL_STATE(frame);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": FSYNC %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->resolve.fd_no,
- state->fd ? state->fd->inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (op_ret >= 0) {
- gf_stat_from_iatt (&(rsp->prestat), prebuf);
- gf_stat_from_iatt (&(rsp->poststat), postbuf);
- }
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_FSYNC,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/*
- * server_release_cbk - rleease callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- *
- * not for external reference
- */
-static int
-server_release_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_cbk_release_rsp_t *rsp = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- protocol_server_reply (frame, GF_OP_TYPE_CBK_REPLY, GF_CBK_RELEASE,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-/*
- * server_writev_cbk - writev callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- *
- * not for external reference
- */
-
-static int
-server_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_write_rsp_t *rsp = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
- server_state_t *state = NULL;
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno_to_error (op_errno));
-
- if (op_ret >= 0) {
- gf_stat_from_iatt (&rsp->prestat, prebuf);
- gf_stat_from_iatt (&rsp->poststat, postbuf);
- } else {
- state = CALL_STATE(frame);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": WRITEV %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->resolve.fd_no,
- state->fd ? state->fd->inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_WRITE,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-/*
- * server_readv_cbk - readv callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- * @vector:
- * @count:
- *
- * not for external reference
- */
-static int
-server_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iovec *vector, int32_t count,
- struct iatt *stbuf, struct iobref *iobref)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_read_rsp_t *rsp = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
- server_state_t *state = NULL;
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (op_ret >= 0) {
- gf_stat_from_iatt (&rsp->stat, stbuf);
- } else {
- state = CALL_STATE(frame);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": READV %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->resolve.fd_no,
- state->fd ? state->fd->inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_READ,
- hdr, hdrlen, vector, count, iobref);
-
- return 0;
-}
-
-
-/*
- * server_open_cbk - open callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- * @fd:
- *
- * not for external reference
- */
-static int
-server_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
-{
- server_connection_t *conn = NULL;
- gf_hdr_common_t *hdr = NULL;
- gf_fop_open_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
- uint64_t fd_no = 0;
-
- conn = SERVER_CONNECTION (frame);
-
- state = CALL_STATE (frame);
-
- if (op_ret >= 0) {
- fd_bind (fd);
-
- fd_no = gf_fd_unused_get (conn->fdtable, fd);
- fd_ref (fd);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": OPEN %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0,
- op_ret, strerror (op_errno));
- }
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
- rsp->fd = hton64 (fd_no);
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_OPEN,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-/*
- * server_create_cbk - create callback for server
- * @frame: call frame
- * @cookie:
- * @this: translator structure
- * @op_ret:
- * @op_errno:
- * @fd: file descriptor
- * @inode: inode structure
- * @stbuf: struct iatt of created file
- *
- * not for external reference
- */
-static int
-server_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- fd_t *fd, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
-{
- server_connection_t *conn = NULL;
- gf_hdr_common_t *hdr = NULL;
- gf_fop_create_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
- uint64_t fd_no = 0;
- inode_t *link_inode = NULL;
-
- conn = SERVER_CONNECTION (frame);
-
- state = CALL_STATE (frame);
-
- if (op_ret >= 0) {
- gf_log (state->bound_xl->name, GF_LOG_TRACE,
- "%"PRId64": CREATE %"PRId64"/%s (%"PRId64")",
- frame->root->unique, state->loc.parent->ino,
- state->loc.name, stbuf->ia_ino);
-
- link_inode = inode_link (inode, state->loc.parent,
- state->loc.name, stbuf);
-
- if (link_inode != inode) {
- gf_log (this->name, GF_LOG_DEBUG,
- "create(%s) inode (ptr=%p, ino=%"PRId64", "
- "gen=%"PRId64") found conflict (ptr=%p, "
- "ino=%"PRId64", gen=%"PRId64")",
- state->loc.path, inode, inode->ino,
- inode->generation, link_inode,
- link_inode->ino, link_inode->generation);
-
- /*
- VERY racy code (if used anywhere else)
- -- don't do this without understanding
- */
-
- inode_unref (fd->inode);
- fd->inode = inode_ref (link_inode);
- }
-
- inode_lookup (link_inode);
- inode_unref (link_inode);
-
- fd_bind (fd);
-
- fd_no = gf_fd_unused_get (conn->fdtable, fd);
- fd_ref (fd);
-
- if ((fd_no < 0) || (fd == 0)) {
- op_ret = fd_no;
- op_errno = errno;
- }
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": CREATE %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0,
- op_ret, strerror (op_errno));
- }
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
- rsp->fd = hton64 (fd_no);
-
- if (op_ret >= 0) {
- gf_stat_from_iatt (&rsp->stat, stbuf);
- gf_stat_from_iatt (&rsp->preparent, preparent);
- gf_stat_from_iatt (&rsp->postparent, postparent);
- }
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_CREATE,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/*
- * server_readlink_cbk - readlink callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- * @buf:
- *
- * not for external reference
- */
-static int
-server_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, const char *buf,
- struct iatt *sbuf)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_readlink_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- size_t linklen = 0;
- int32_t gf_errno = 0;
-
- state = CALL_STATE(frame);
-
- if (op_ret >= 0) {
- linklen = strlen (buf) + 1;
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": READLINK %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0,
- op_ret, strerror (op_errno));
- }
-
- hdrlen = gf_hdr_len (rsp, linklen);
- hdr = gf_hdr_new (rsp, linklen);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno_to_error (op_errno));
-
- if (op_ret >= 0) {
- gf_stat_from_iatt (&(rsp->buf), sbuf);
- strcpy (rsp->path, buf);
- }
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_READLINK,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/*
- * server_stat_cbk - stat callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- * @stbuf:
- *
- * not for external reference
- */
-static int
-server_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *stbuf)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_stat_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- state = CALL_STATE (frame);
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno_to_error (op_errno));
-
- if (op_ret == 0) {
- gf_stat_from_iatt (&rsp->stat, stbuf);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": STAT %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0,
- op_ret, strerror (op_errno));
- }
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_STAT,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/*
- * server_setattr_cbk - setattr callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- * @stbuf:
- *
- * not for external reference
- */
-
-static int
-server_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *statpre, struct iatt *statpost)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_setattr_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- state = CALL_STATE (frame);
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno_to_error (op_errno));
-
- if (op_ret == 0) {
- gf_stat_from_iatt (&rsp->statpre, statpre);
- gf_stat_from_iatt (&rsp->statpost, statpost);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": SETATTR %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0,
- op_ret, strerror (op_errno));
- }
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_SETATTR,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/*
- * server_setattr_cbk - setattr callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- * @stbuf:
- *
- * not for external reference
- */
-static int
-server_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *statpre, struct iatt *statpost)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_fsetattr_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- state = CALL_STATE (frame);
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno_to_error (op_errno));
-
- if (op_ret == 0) {
- gf_stat_from_iatt (&rsp->statpre, statpre);
- gf_stat_from_iatt (&rsp->statpost, statpost);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": FSETATTR %"PRId64" (%"PRId64") ==> "
- "%"PRId32" (%s)",
- frame->root->unique, state->resolve.fd_no,
- state->fd ? state->fd->inode->ino : 0,
- op_ret, strerror (op_errno));
- }
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_FSETATTR,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-/*
- * server_lookup_cbk - lookup callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- * @inode:
- * @stbuf:
- *
- * not for external reference
- */
-static int
-server_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *stbuf, dict_t *dict,
- struct iatt *postparent)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_lookup_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- inode_t *root_inode = NULL;
- int32_t dict_len = 0;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
- int32_t ret = -1;
- inode_t *link_inode = NULL;
- loc_t fresh_loc = {0,};
-
- state = CALL_STATE(frame);
-
- if (state->is_revalidate == 1 && op_ret == -1) {
- state->is_revalidate = 2;
- loc_copy (&fresh_loc, &state->loc);
- inode_unref (fresh_loc.inode);
- fresh_loc.inode = inode_new (state->itable);
-
- STACK_WIND (frame, server_lookup_cbk,
- BOUND_XL (frame), BOUND_XL (frame)->fops->lookup,
- &fresh_loc, state->dict);
-
- loc_wipe (&fresh_loc);
- return 0;
- }
-
- if (dict) {
- dict_len = dict_serialized_length (dict);
- if (dict_len < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s (%"PRId64"): failed to get serialized "
- "length of reply dict",
- state->loc.path, state->loc.inode->ino);
- op_ret = -1;
- op_errno = EINVAL;
- dict_len = 0;
- }
- }
-
- hdrlen = gf_hdr_len (rsp, dict_len);
- hdr = gf_hdr_new (rsp, dict_len);
- rsp = gf_param (hdr);
-
- if ((op_ret >= 0) && dict) {
- ret = dict_serialize (dict, rsp->dict);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s (%"PRId64"): failed to serialize reply dict",
- state->loc.path, state->loc.inode->ino);
- op_ret = -1;
- op_errno = -ret;
- dict_len = 0;
- }
- }
- rsp->dict_len = hton32 (dict_len);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (postparent)
- gf_stat_from_iatt (&rsp->postparent, postparent);
-
- if (op_ret == 0) {
- root_inode = BOUND_XL(frame)->itable->root;
- if (inode == root_inode) {
- /* we just looked up root ("/") */
- stbuf->ia_ino = 1;
- if (inode->ia_type == 0)
- inode->ia_type = stbuf->ia_type;
- }
-
- gf_stat_from_iatt (&rsp->stat, stbuf);
-
- if (inode->ino != 1) {
- link_inode = inode_link (inode, state->loc.parent,
- state->loc.name, stbuf);
- inode_lookup (link_inode);
- inode_unref (link_inode);
- }
- } else {
- if (state->is_revalidate && op_errno == ENOENT) {
- if (state->loc.inode->ino != 1) {
- inode_unlink (state->loc.inode,
- state->loc.parent,
- state->loc.name);
- }
- }
-
- gf_log (this->name,
- (op_errno == ENOENT ? GF_LOG_TRACE : GF_LOG_DEBUG),
- "%"PRId64": LOOKUP %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0,
- op_ret, strerror (op_errno));
- }
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_LOOKUP,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-static int
-server_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_xattrop_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t len = 0;
- int32_t gf_errno = 0;
- int32_t ret = -1;
-
- state = CALL_STATE (frame);
-
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": XATTROP %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0,
- op_ret, strerror (op_errno));
- }
-
- if ((op_ret >= 0) && dict) {
- len = dict_serialized_length (dict);
- if (len < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s (%"PRId64"): failed to get serialized length"
- " for reply dict",
- state->loc.path, state->loc.inode->ino);
- op_ret = -1;
- op_errno = EINVAL;
- len = 0;
- }
- }
-
- hdrlen = gf_hdr_len (rsp, len + 1);
- hdr = gf_hdr_new (rsp, len + 1);
- rsp = gf_param (hdr);
-
- if ((op_ret >= 0) && dict) {
- ret = dict_serialize (dict, rsp->dict);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s (%"PRId64"): failed to serialize reply dict",
- state->loc.path, state->loc.inode->ino);
- op_ret = -1;
- op_errno = -ret;
- len = 0;
- }
- }
- rsp->dict_len = hton32 (len);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_XATTROP,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-static int
-server_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_xattrop_rsp_t *rsp = NULL;
- size_t hdrlen = 0;
- int32_t len = 0;
- int32_t gf_errno = 0;
- int32_t ret = -1;
- server_state_t *state = NULL;
-
- state = CALL_STATE(frame);
-
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": FXATTROP %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->resolve.fd_no,
- state->fd ? state->fd->inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- if ((op_ret >= 0) && dict) {
- len = dict_serialized_length (dict);
- if (len < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "fd - %"PRId64" (%"PRId64"): failed to get "
- "serialized length for reply dict",
- state->resolve.fd_no, state->fd->inode->ino);
- op_ret = -1;
- op_errno = EINVAL;
- len = 0;
- }
- }
-
- hdrlen = gf_hdr_len (rsp, len + 1);
- hdr = gf_hdr_new (rsp, len + 1);
- rsp = gf_param (hdr);
-
- if ((op_ret >= 0) && dict) {
- ret = dict_serialize (dict, rsp->dict);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "fd - %"PRId64" (%"PRId64"): failed to "
- "serialize reply dict",
- state->resolve.fd_no, state->fd->inode->ino);
- op_ret = -1;
- op_errno = -ret;
- len = 0;
- }
- }
- rsp->dict_len = hton32 (len);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_FXATTROP,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-static int
-server_lookup_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- if (!state->loc.inode)
- state->loc.inode = inode_new (state->itable);
- else
- state->is_revalidate = 1;
-
- STACK_WIND (frame, server_lookup_cbk,
- bound_xl, bound_xl->fops->lookup,
- &state->loc, state->dict);
-
- return 0;
-err:
- server_lookup_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL, NULL, NULL, NULL);
-
- return 0;
-}
-
-
-static int
-server_lookup (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_lookup_req_t *req = NULL;
- server_state_t *state = NULL;
- int32_t ret = -1;
- size_t pathlen = 0;
- size_t baselen = 0;
- size_t dictlen = 0;
- dict_t *xattr_req = NULL;
- char *req_dictbuf = NULL;
-
- req = gf_param (hdr);
-
- state = CALL_STATE (frame);
-
- pathlen = STRLEN_0 (req->path);
- dictlen = ntoh32 (req->dictlen);
-
- /* NOTE: lookup() uses req->ino only to identify if a lookup()
- * is requested for 'root' or not
- */
- state->resolve.ino = ntoh64 (req->ino);
- if (state->resolve.ino != 1)
- state->resolve.ino = 0;
-
- state->resolve.type = RESOLVE_DONTCARE;
- state->resolve.par = ntoh64 (req->par);
- state->resolve.gen = ntoh64 (req->gen);
- state->resolve.path = gf_strdup (req->path);
-
- if (IS_NOT_ROOT (pathlen)) {
- state->resolve.bname = gf_strdup (req->bname + pathlen);
- baselen = STRLEN_0 (state->resolve.bname);
- }
-
- if (dictlen) {
- /* Unserialize the dictionary */
- req_dictbuf = memdup (req->dict + pathlen + baselen, dictlen);
-
- xattr_req = dict_new ();
-
- ret = dict_unserialize (req_dictbuf, dictlen, &xattr_req);
- if (ret < 0) {
- gf_log (bound_xl->name, GF_LOG_ERROR,
- "%"PRId64": %s (%"PRId64"): failed to "
- "unserialize req-buffer to dictionary",
- frame->root->unique, state->resolve.path,
- state->resolve.ino);
- GF_FREE (req_dictbuf);
- goto err;
- }
-
- xattr_req->extra_free = req_dictbuf;
- state->dict = xattr_req;
- }
-
- gf_resolve_and_resume (frame, server_lookup_resume);
-
- return 0;
-err:
- if (xattr_req)
- dict_unref (xattr_req);
-
- server_lookup_cbk (frame, NULL, frame->this, -1, EINVAL, NULL, NULL,
- NULL, NULL);
- return 0;
-}
-
-
-/*
- * server_forget - forget function for server protocol
- *
- * not for external reference
- */
-static int
-server_forget (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_log ("forget", GF_LOG_CRITICAL, "function not implemented");
- return 0;
-}
-
-
-static int
-server_stat_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_stat_cbk,
- bound_xl, bound_xl->fops->stat, &state->loc);
- return 0;
-err:
- server_stat_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL);
- return 0;
-}
-
-
-static int
-server_stat (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_stat_req_t *req = NULL;
- server_state_t *state = NULL;
-
- req = gf_param (hdr);
- state = CALL_STATE (frame);
- {
- state->resolve.type = RESOLVE_MUST;
- state->resolve.ino = ntoh64 (req->ino);
- state->resolve.gen = ntoh64 (req->gen);
- state->resolve.path = gf_strdup (req->path);
- }
-
- gf_resolve_and_resume (frame, server_stat_resume);
-
- return 0;
-}
-
-
-static int
-server_setattr_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_setattr_cbk,
- bound_xl, bound_xl->fops->setattr,
- &state->loc, &state->stbuf, state->valid);
- return 0;
-err:
- server_setattr_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL, NULL);
-
- return 0;
-}
-
-
-static int
-server_setattr (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_setattr_req_t *req = NULL;
- server_state_t *state = NULL;
-
- req = gf_param (hdr);
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.ino = ntoh64 (req->ino);
- state->resolve.gen = ntoh64 (req->gen);
- state->resolve.path = gf_strdup (req->path);
-
- gf_stat_to_iatt (&req->stbuf, &state->stbuf);
- state->valid = ntoh32 (req->valid);
-
- gf_resolve_and_resume (frame, server_setattr_resume);
-
- return 0;
-}
-
-
-static int
-server_fsetattr_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_fsetattr_cbk,
- bound_xl, bound_xl->fops->fsetattr,
- state->fd, &state->stbuf, state->valid);
- return 0;
-err:
- server_fsetattr_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL, NULL);
-
- return 0;
-}
-
-
-static int
-server_fsetattr (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_fsetattr_req_t *req = NULL;
- server_state_t *state = NULL;
-
-
- req = gf_param (hdr);
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.fd_no = ntoh64 (req->fd);
-
- gf_stat_to_iatt (&req->stbuf, &state->stbuf);
- state->valid = ntoh32 (req->valid);
-
- gf_resolve_and_resume (frame, server_fsetattr_resume);
-
- return 0;
-}
-
-
-static int
-server_readlink_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_readlink_cbk,
- bound_xl, bound_xl->fops->readlink,
- &state->loc, state->size);
- return 0;
-err:
- server_readlink_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL, NULL);
- return 0;
-}
-
-
-static int
-server_readlink (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_readlink_req_t *req = NULL;
- server_state_t *state = NULL;
-
- req = gf_param (hdr);
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.ino = ntoh64 (req->ino);
- state->resolve.gen = ntoh64 (req->gen);
- state->resolve.path = gf_strdup (req->path);
-
- state->size = ntoh32 (req->size);
-
- gf_resolve_and_resume (frame, server_readlink_resume);
-
- return 0;
-}
-
-
-static int
-server_create_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- state->loc.inode = inode_new (state->itable);
-
- state->fd = fd_create (state->loc.inode, frame->root->pid);
- state->fd->flags = state->flags;
-
- STACK_WIND (frame, server_create_cbk,
- bound_xl, bound_xl->fops->create,
- &(state->loc), state->flags, state->mode, state->fd);
-
- return 0;
-err:
- server_create_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL, NULL, NULL,
- NULL, NULL);
- return 0;
-}
-
-
-static int
-server_create (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_create_req_t *req = NULL;
- server_state_t *state = NULL;
- int pathlen = 0;
-
- req = gf_param (hdr);
- state = CALL_STATE (frame);
-
- pathlen = STRLEN_0 (req->path);
-
- state->resolve.type = RESOLVE_NOT;
- state->resolve.par = ntoh64 (req->par);
- state->resolve.gen = ntoh64 (req->gen);
- state->resolve.path = gf_strdup (req->path);
- state->resolve.bname = gf_strdup (req->bname + pathlen);
- state->mode = ntoh32 (req->mode);
- state->flags = gf_flags_to_flags (ntoh32 (req->flags));
-
- gf_resolve_and_resume (frame, server_create_resume);
-
- return 0;
-}
-
-
-static int
-server_open_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- state->fd = fd_create (state->loc.inode, frame->root->pid);
- state->fd->flags = state->flags;
-
- STACK_WIND (frame, server_open_cbk,
- bound_xl, bound_xl->fops->open,
- &state->loc, state->flags, state->fd, 0);
-
- return 0;
-err:
- server_open_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL);
- return 0;
-}
-
-
-static int
-server_open (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_open_req_t *req = NULL;
- server_state_t *state = NULL;
-
- req = gf_param (hdr);
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.ino = ntoh64 (req->ino);
- state->resolve.gen = ntoh64 (req->gen);
- state->resolve.path = gf_strdup (req->path);
-
- state->flags = gf_flags_to_flags (ntoh32 (req->flags));
-
- gf_resolve_and_resume (frame, server_open_resume);
-
- return 0;
-}
-
-
-static int
-server_readv_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_readv_cbk,
- bound_xl, bound_xl->fops->readv,
- state->fd, state->size, state->offset);
-
- return 0;
-err:
- server_readv_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL, 0, NULL, NULL);
- return 0;
-}
-
-
-static int
-server_readv (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_read_req_t *req = NULL;
- server_state_t *state = NULL;
-
- req = gf_param (hdr);
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.fd_no = ntoh64 (req->fd);
- state->size = ntoh32 (req->size);
- state->offset = ntoh64 (req->offset);
-
- gf_resolve_and_resume (frame, server_readv_resume);
-
- return 0;
-}
-
-
-static int
-server_writev_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
- struct iovec iov = {0, };
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- iov.iov_len = state->size;
-
- if (state->iobuf) {
- iov.iov_base = state->iobuf->ptr;
- }
-
- STACK_WIND (frame, server_writev_cbk,
- bound_xl, bound_xl->fops->writev,
- state->fd, &iov, 1, state->offset, state->iobref);
-
- return 0;
-err:
- server_writev_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL, NULL);
- return 0;
-}
-
-
-static int
-server_writev (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_write_req_t *req = NULL;
- server_state_t *state = NULL;
- struct iobref *iobref = NULL;
-
- req = gf_param (hdr);
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.fd_no = ntoh64 (req->fd);
- state->offset = ntoh64 (req->offset);
- state->size = ntoh32 (req->size);
-
- if (iobuf) {
- iobref = iobref_new ();
- iobref_add (iobref, iobuf);
-
- state->iobuf = iobuf;
- state->iobref = iobref;
- }
-
- gf_resolve_and_resume (frame, server_writev_resume);
-
- return 0;
-}
-
-
-static int
-server_release (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_cbk_release_req_t *req = NULL;
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
-
- conn = SERVER_CONNECTION (frame);
- state = CALL_STATE (frame);
- req = gf_param (hdr);
-
- state->resolve.fd_no = ntoh64 (req->fd);
-
- gf_fd_put (conn->fdtable, state->resolve.fd_no);
-
- server_release_cbk (frame, NULL, frame->this, 0, 0);
-
- return 0;
-}
-
-
-static int
-server_fsync_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_fsync_cbk,
- bound_xl, bound_xl->fops->fsync,
- state->fd, state->flags);
- return 0;
-err:
- server_fsync_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL, NULL);
-
- return 0;
-}
-
-
-static int
-server_fsync (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_fsync_req_t *req = NULL;
- server_state_t *state = NULL;
-
- req = gf_param (hdr);
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.fd_no = ntoh64 (req->fd);
- state->flags = ntoh32 (req->data);
-
- gf_resolve_and_resume (frame, server_fsync_resume);
-
- return 0;
-}
-
-
-
-static int
-server_flush_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_flush_cbk,
- bound_xl, bound_xl->fops->flush, state->fd);
- return 0;
-err:
- server_flush_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno);
-
- return 0;
-}
-
-
-static int
-server_flush (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_fsync_req_t *req = NULL;
- server_state_t *state = NULL;
-
- req = gf_param (hdr);
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.fd_no = ntoh64 (req->fd);
-
- gf_resolve_and_resume (frame, server_flush_resume);
-
- return 0;
-}
-
-
-
-static int
-server_ftruncate_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_ftruncate_cbk,
- bound_xl, bound_xl->fops->ftruncate,
- state->fd, state->offset);
- return 0;
-err:
- server_ftruncate_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL, NULL);
-
- return 0;
-}
-
-
-static int
-server_ftruncate (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_ftruncate_req_t *req = NULL;
- server_state_t *state = NULL;
-
- req = gf_param (hdr);
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.fd_no = ntoh64 (req->fd);
- state->offset = ntoh64 (req->offset);
-
- gf_resolve_and_resume (frame, server_ftruncate_resume);
-
- return 0;
-}
-
-
-static int
-server_fstat_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_fstat_cbk,
- bound_xl, bound_xl->fops->fstat,
- state->fd);
- return 0;
-err:
- server_fstat_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL);
- return 0;
-}
-
-
-static int
-server_fstat (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_fstat_req_t *req = NULL;
- server_state_t *state = NULL;
-
- req = gf_param (hdr);
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.fd_no = ntoh64 (req->fd);
-
- gf_resolve_and_resume (frame, server_fstat_resume);
-
- return 0;
-}
-
-
-static int
-server_truncate_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_truncate_cbk,
- bound_xl, bound_xl->fops->truncate,
- &state->loc, state->offset);
- return 0;
-err:
- server_truncate_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL, NULL);
- return 0;
-}
-
-
-
-static int
-server_truncate (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_truncate_req_t *req = NULL;
- server_state_t *state = NULL;
-
- req = gf_param (hdr);
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.path = gf_strdup (req->path);
- state->resolve.ino = ntoh64 (req->ino);
- state->resolve.gen = ntoh64 (req->gen);
- state->offset = ntoh64 (req->offset);
-
- gf_resolve_and_resume (frame, server_truncate_resume);
-
- return 0;
-}
-
-
-static int
-server_unlink_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_unlink_cbk,
- bound_xl, bound_xl->fops->unlink,
- &state->loc);
- return 0;
-err:
- server_unlink_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL, NULL);
- return 0;
-}
-
-
-static int
-server_unlink (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_unlink_req_t *req = NULL;
- server_state_t *state = NULL;
- int pathlen = 0;
-
- req = gf_param (hdr);
- state = CALL_STATE (frame);
-
- pathlen = STRLEN_0 (req->path);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.par = ntoh64 (req->par);
- state->resolve.gen = ntoh64 (req->gen);
- state->resolve.path = gf_strdup (req->path);
- state->resolve.bname = gf_strdup (req->bname + pathlen);
-
- gf_resolve_and_resume (frame, server_unlink_resume);
-
- return 0;
-}
-
-
-static int
-server_setxattr_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_setxattr_cbk,
- bound_xl, bound_xl->fops->setxattr,
- &state->loc, state->dict, state->flags);
- return 0;
-err:
- server_setxattr_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno);
-
- return 0;
-}
-
-
-static int
-server_setxattr (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_setxattr_req_t *req = NULL;
- server_state_t *state = NULL;
- dict_t *dict = NULL;
- int32_t ret = -1;
- size_t dict_len = 0;
- char *req_dictbuf = NULL;
-
- req = gf_param (hdr);
- state = CALL_STATE (frame);
-
- dict_len = ntoh32 (req->dict_len);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.path = gf_strdup (req->path + dict_len);
- state->resolve.ino = ntoh64 (req->ino);
- state->resolve.gen = ntoh64 (req->gen);
- state->flags = ntoh32 (req->flags);
-
- if (dict_len) {
- req_dictbuf = memdup (req->dict, dict_len);
-
- dict = dict_new ();
-
- ret = dict_unserialize (req_dictbuf, dict_len, &dict);
- if (ret < 0) {
- gf_log (bound_xl->name, GF_LOG_ERROR,
- "%"PRId64": %s (%"PRId64"): failed to "
- "unserialize request buffer to dictionary",
- frame->root->unique, state->loc.path,
- state->resolve.ino);
- GF_FREE (req_dictbuf);
- goto err;
- }
-
- dict->extra_free = req_dictbuf;
- state->dict = dict;
- }
-
- gf_resolve_and_resume (frame, server_setxattr_resume);
-
- return 0;
-err:
- if (dict)
- dict_unref (dict);
-
- server_setxattr_cbk (frame, NULL, frame->this, -1, EINVAL);
-
- return 0;
-
-}
-
-
-static int
-server_fsetxattr_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_setxattr_cbk,
- bound_xl, bound_xl->fops->fsetxattr,
- state->fd, state->dict, state->flags);
- return 0;
-err:
- server_fsetxattr_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno);
-
- return 0;
-}
-
-
-static int
-server_fsetxattr (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_fsetxattr_req_t *req = NULL;
- server_state_t *state = NULL;
- dict_t *dict = NULL;
- int32_t ret = -1;
- size_t dict_len = 0;
- char *req_dictbuf = NULL;
-
- req = gf_param (hdr);
- state = CALL_STATE (frame);
-
- dict_len = ntoh32 (req->dict_len);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.fd_no = ntoh64 (req->fd);
- state->flags = ntoh32 (req->flags);
-
- if (dict_len) {
- req_dictbuf = memdup (req->dict, dict_len);
-
- dict = dict_new ();
-
- ret = dict_unserialize (req_dictbuf, dict_len, &dict);
- if (ret < 0) {
- gf_log (bound_xl->name, GF_LOG_ERROR,
- "%"PRId64": %s (%"PRId64"): failed to "
- "unserialize request buffer to dictionary",
- frame->root->unique, state->loc.path,
- state->resolve.ino);
- GF_FREE (req_dictbuf);
- goto err;
- }
-
- dict->extra_free = req_dictbuf;
- state->dict = dict;
- }
-
- gf_resolve_and_resume (frame, server_fsetxattr_resume);
-
- return 0;
-err:
- if (dict)
- dict_unref (dict);
-
- server_setxattr_cbk (frame, NULL, frame->this, -1, EINVAL);
-
- return 0;
-}
-
-
-static int
-server_fxattrop_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_fxattrop_cbk,
- bound_xl, bound_xl->fops->fxattrop,
- state->fd, state->flags, state->dict);
- return 0;
-err:
- server_fxattrop_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL);
- return 0;
-}
-
-
-static int
-server_fxattrop (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_fxattrop_req_t *req = NULL;
- dict_t *dict = NULL;
- server_state_t *state = NULL;
- size_t dict_len = 0;
- char *req_dictbuf = NULL;
- int32_t ret = -1;
-
- req = gf_param (hdr);
- state = CALL_STATE(frame);
-
- dict_len = ntoh32 (req->dict_len);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.fd_no = ntoh64 (req->fd);
-
- state->resolve.ino = ntoh64 (req->ino);
- state->resolve.gen = ntoh64 (req->gen);
- state->flags = ntoh32 (req->flags);
-
- if (dict_len) {
- /* Unserialize the dictionary */
- req_dictbuf = memdup (req->dict, dict_len);
-
- dict = dict_new ();
-
- ret = dict_unserialize (req_dictbuf, dict_len, &dict);
- if (ret < 0) {
- gf_log (bound_xl->name, GF_LOG_ERROR,
- "fd - %"PRId64" (%"PRId64"): failed to unserialize "
- "request buffer to dictionary",
- state->resolve.fd_no, state->fd->inode->ino);
- GF_FREE (req_dictbuf);
- goto fail;
- }
- dict->extra_free = req_dictbuf;
- state->dict = dict;
- dict = NULL;
- }
-
- gf_resolve_and_resume (frame, server_fxattrop_resume);
-
- return 0;
-
-fail:
- if (dict)
- dict_unref (dict);
-
- server_fxattrop_cbk (frame, NULL, frame->this, -1, EINVAL, NULL);
- return 0;
-}
-
-
-static int
-server_xattrop_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_xattrop_cbk,
- bound_xl, bound_xl->fops->xattrop,
- &state->loc, state->flags, state->dict);
- return 0;
-err:
- server_xattrop_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL);
- return 0;
-}
-
-
-static int
-server_xattrop (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_xattrop_req_t *req = NULL;
- dict_t *dict = NULL;
- server_state_t *state = NULL;
- size_t dict_len = 0;
- char *req_dictbuf = NULL;
- int32_t ret = -1;
-
- req = gf_param (hdr);
- state = CALL_STATE(frame);
-
- dict_len = ntoh32 (req->dict_len);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.path = gf_strdup (req->path + dict_len);
- state->resolve.ino = ntoh64 (req->ino);
- state->resolve.gen = ntoh64 (req->gen);
- state->flags = ntoh32 (req->flags);
-
- if (dict_len) {
- /* Unserialize the dictionary */
- req_dictbuf = memdup (req->dict, dict_len);
-
- dict = dict_new ();
-
- ret = dict_unserialize (req_dictbuf, dict_len, &dict);
- if (ret < 0) {
- gf_log (bound_xl->name, GF_LOG_ERROR,
- "fd - %"PRId64" (%"PRId64"): failed to unserialize "
- "request buffer to dictionary",
- state->resolve.fd_no, state->fd->inode->ino);
- GF_FREE (req_dictbuf);
- goto fail;
- }
- dict->extra_free = req_dictbuf;
- state->dict = dict;
- dict = NULL;
- }
-
- gf_resolve_and_resume (frame, server_xattrop_resume);
-
- return 0;
-
-fail:
- if (dict)
- dict_unref (dict);
-
- server_xattrop_cbk (frame, NULL, frame->this, -1, EINVAL, NULL);
- return 0;
-}
-
-
-static int
-server_getxattr_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_getxattr_cbk,
- bound_xl, bound_xl->fops->getxattr,
- &state->loc, state->name);
- return 0;
-err:
- server_getxattr_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL);
- return 0;
-}
-
-
-static int
-server_getxattr (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_getxattr_req_t *req = NULL;
- server_state_t *state = NULL;
- size_t namelen = 0;
- size_t pathlen = 0;
-
- req = gf_param (hdr);
- state = CALL_STATE (frame);
-
- pathlen = STRLEN_0 (req->path);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.path = gf_strdup (req->path);
- state->resolve.ino = ntoh64 (req->ino);
- state->resolve.gen = ntoh64 (req->gen);
-
- namelen = ntoh32 (req->namelen);
- if (namelen)
- state->name = gf_strdup (req->name + pathlen);
-
- gf_resolve_and_resume (frame, server_getxattr_resume);
-
- return 0;
-}
-
-
-static int
-server_fgetxattr_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_fgetxattr_cbk,
- bound_xl, bound_xl->fops->fgetxattr,
- state->fd, state->name);
- return 0;
-err:
- server_fgetxattr_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL);
- return 0;
-}
-
-
-static int
-server_fgetxattr (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_fgetxattr_req_t *req = NULL;
- server_state_t *state = NULL;
- size_t namelen = 0;
-
- req = gf_param (hdr);
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.fd_no = ntoh64 (req->fd);
-
- namelen = ntoh32 (req->namelen);
- if (namelen)
- state->name = gf_strdup (req->name);
-
- gf_resolve_and_resume (frame, server_fgetxattr_resume);
-
- return 0;
-}
-
-
-static int
-server_removexattr_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_removexattr_cbk,
- bound_xl, bound_xl->fops->removexattr,
- &state->loc, state->name);
- return 0;
-err:
- server_removexattr_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno);
- return 0;
-}
-
-
-static int
-server_removexattr (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_removexattr_req_t *req = NULL;
- server_state_t *state = NULL;
- size_t pathlen = 0;
-
- req = gf_param (hdr);
- state = CALL_STATE (frame);
- pathlen = STRLEN_0 (req->path);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.path = gf_strdup (req->path);
- state->resolve.ino = ntoh64 (req->ino);
- state->resolve.gen = ntoh64 (req->gen);
- state->name = gf_strdup (req->name + pathlen);
-
- gf_resolve_and_resume (frame, server_removexattr_resume);
-
- return 0;
-}
-
-
-static int
-server_statfs_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret !=0)
- goto err;
-
- STACK_WIND (frame, server_statfs_cbk,
- bound_xl, bound_xl->fops->statfs,
- &state->loc);
- return 0;
-
-err:
- server_statfs_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL);
- return 0;
-}
-
-
-static int
-server_statfs (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_statfs_req_t *req = NULL;
- server_state_t *state = NULL;
-
- req = gf_param (hdr);
-
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.ino = ntoh64 (req->ino);
- if (!state->resolve.ino)
- state->resolve.ino = 1;
- state->resolve.gen = ntoh64 (req->gen);
- state->resolve.path = gf_strdup (req->path);
-
- gf_resolve_and_resume (frame, server_statfs_resume);
-
- return 0;
-}
-
-
-static int
-server_opendir_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- state->fd = fd_create (state->loc.inode, frame->root->pid);
-
- STACK_WIND (frame, server_opendir_cbk,
- bound_xl, bound_xl->fops->opendir,
- &state->loc, state->fd);
- return 0;
-err:
- server_opendir_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL);
- return 0;
-}
-
-
-static int
-server_opendir (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_opendir_req_t *req = NULL;
- server_state_t *state = NULL;
-
- req = gf_param (hdr);
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.path = gf_strdup (req->path);
- state->resolve.ino = ntoh64 (req->ino);
- state->resolve.gen = ntoh64 (req->gen);
-
- gf_resolve_and_resume (frame, server_opendir_resume);
-
- return 0;
-}
-
-
-static int
-server_releasedir (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_cbk_releasedir_req_t *req = NULL;
- server_connection_t *conn = NULL;
- uint64_t fd_no = 0;
-
- conn = SERVER_CONNECTION (frame);
-
- req = gf_param (hdr);
-
- fd_no = ntoh64 (req->fd);
-
- gf_fd_put (conn->fdtable, fd_no);
-
- server_releasedir_cbk (frame, NULL, frame->this, 0, 0);
-
- return 0;
-}
-
-/*
- * server_readdirp_cbk - getdents callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- *
- * not for external reference
- */
-static int
-server_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_readdirp_rsp_t *rsp = NULL;
- size_t hdrlen = 0;
- size_t buf_size = 0;
- int32_t gf_errno = 0;
- server_state_t *state = NULL;
-
- if (op_ret > 0)
- buf_size = gf_dirent_serialize (entries, NULL, 0);
-
- hdrlen = gf_hdr_len (rsp, buf_size);
- hdr = gf_hdr_new (rsp, buf_size);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (op_ret > 0) {
- rsp->size = hton32 (buf_size);
- gf_dirent_serialize (entries, rsp->buf, buf_size);
- } else {
- state = CALL_STATE(frame);
-
- gf_log (this->name, GF_LOG_TRACE,
- "%"PRId64": READDIRP %"PRId64" (%"PRId64") ==>"
- "%"PRId32" (%s)",
- frame->root->unique, state->resolve.fd_no,
- state->fd ? state->fd->inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_READDIRP,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-static int
-server_readdirp_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_readdirp_cbk, bound_xl,
- bound_xl->fops->readdirp, state->fd, state->size,
- state->offset);
-
- return 0;
-err:
- server_readdirp_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL);
- return 0;
-}
-
-/*
- * server_readdirp - readdirp function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params: parameter dictionary
- *
- * not for external reference
- */
-static int
-server_readdirp (call_frame_t *frame, xlator_t *bound_xl, gf_hdr_common_t *hdr,
- size_t hdrlen, struct iobuf *iobuf)
-{
- gf_fop_readdirp_req_t *req = NULL;
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
-
- conn = SERVER_CONNECTION(frame);
-
- req = gf_param (hdr);
- state = CALL_STATE(frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.fd_no = ntoh64 (req->fd);
- state->size = ntoh32 (req->size);
- state->offset = ntoh64 (req->offset);
-
- gf_resolve_and_resume (frame, server_readdirp_resume);
-
- return 0;
-}
-
-
-static int
- server_readdir_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_readdir_cbk,
- bound_xl,
- bound_xl->fops->readdir,
- state->fd, state->size, state->offset);
-
- return 0;
-err:
- server_readdir_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL);
- return 0;
-}
-/*
- * server_readdir - readdir function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params: parameter dictionary
- *
- * not for external reference
- */
-static int
-server_readdir (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_readdir_req_t *req = NULL;
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
-
- conn = SERVER_CONNECTION(frame);
-
- req = gf_param (hdr);
- state = CALL_STATE(frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.fd_no = ntoh64 (req->fd);
- state->size = ntoh32 (req->size);
- state->offset = ntoh64 (req->offset);
-
- gf_resolve_and_resume (frame, server_readdir_resume);
-
- return 0;
-}
-
-static int
-server_fsyncdir_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_fsyncdir_cbk,
- bound_xl,
- bound_xl->fops->fsyncdir,
- state->fd, state->flags);
- return 0;
-
-err:
- server_fsyncdir_cbk (frame, NULL, frame->this,
- state->resolve.op_ret,
- state->resolve.op_errno);
- return 0;
-}
-
-/*
- * server_fsyncdir - fsyncdir function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params: parameter dictionary
- *
- * not for external reference
- */
-static int
-server_fsyncdir (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_fsyncdir_req_t *req = NULL;
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
-
- conn = SERVER_CONNECTION (frame);
-
- req = gf_param (hdr);
- state = CALL_STATE(frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.fd_no = ntoh64 (req->fd);
- state->flags = ntoh32 (req->data);
-
- gf_resolve_and_resume (frame, server_fsyncdir_resume);
-
- return 0;
-}
-
-
-static int
-server_mknod_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- state->loc.inode = inode_new (state->itable);
-
- STACK_WIND (frame, server_mknod_cbk,
- bound_xl, bound_xl->fops->mknod,
- &(state->loc), state->mode, state->dev);
-
- return 0;
-err:
- server_mknod_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL, NULL, NULL, NULL);
- return 0;
-}
-
-
-
-static int
-server_mknod (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_mknod_req_t *req = NULL;
- server_state_t *state = NULL;
- size_t pathlen = 0;
-
- req = gf_param (hdr);
- state = CALL_STATE (frame);
- pathlen = STRLEN_0 (req->path);
-
- state->resolve.type = RESOLVE_NOT;
- state->resolve.par = ntoh64 (req->par);
- state->resolve.gen = ntoh64 (req->gen);
- state->resolve.path = gf_strdup (req->path);
- state->resolve.bname = gf_strdup (req->bname + pathlen);
-
- state->mode = ntoh32 (req->mode);
- state->dev = ntoh64 (req->dev);
-
- gf_resolve_and_resume (frame, server_mknod_resume);
-
- return 0;
-}
-
-
-static int
-server_mkdir_resume (call_frame_t *frame, xlator_t *bound_xl)
-
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- state->loc.inode = inode_new (state->itable);
-
- STACK_WIND (frame, server_mkdir_cbk,
- bound_xl, bound_xl->fops->mkdir,
- &(state->loc), state->mode);
-
- return 0;
-err:
- server_mkdir_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL, NULL, NULL, NULL);
- return 0;
-}
-
-
-static int
-server_mkdir (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_mkdir_req_t *req = NULL;
- server_state_t *state = NULL;
- size_t pathlen = 0;
-
- req = gf_param (hdr);
- state = CALL_STATE (frame);
- pathlen = STRLEN_0 (req->path);
-
- state->resolve.type = RESOLVE_NOT;
- state->resolve.par = ntoh64 (req->par);
- state->resolve.gen = ntoh64 (req->gen);
- state->resolve.path = gf_strdup (req->path);
- state->resolve.bname = gf_strdup (req->bname + pathlen);
-
- state->mode = ntoh32 (req->mode);
-
- gf_resolve_and_resume (frame, server_mkdir_resume);
-
- return 0;
-}
-
-
-static int
-server_rmdir_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_rmdir_cbk,
- bound_xl, bound_xl->fops->rmdir, &state->loc);
- return 0;
-err:
- server_rmdir_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL, NULL);
- return 0;
-}
-
-static int
-server_rmdir (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_rmdir_req_t *req = NULL;
- server_state_t *state = NULL;
- int pathlen = 0;
-
- req = gf_param (hdr);
- state = CALL_STATE (frame);
- pathlen = STRLEN_0 (req->path);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.par = ntoh64 (req->par);
- state->resolve.gen = ntoh64 (req->gen);
- state->resolve.path = gf_strdup (req->path);
- state->resolve.bname = gf_strdup (req->bname + pathlen);
-
- gf_resolve_and_resume (frame, server_rmdir_resume);
-
- return 0;
-}
-
-
-static int
-server_inodelk_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_inodelk_cbk,
- bound_xl, bound_xl->fops->inodelk,
- state->volume, &state->loc, state->cmd, &state->flock);
- return 0;
-err:
- server_inodelk_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno);
- return 0;
-}
-
-
-static int
-server_inodelk (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_inodelk_req_t *req = NULL;
- server_state_t *state = NULL;
- size_t pathlen = 0;
- size_t vollen = 0;
- int cmd = 0;
-
- req = gf_param (hdr);
- state = CALL_STATE (frame);
- pathlen = STRLEN_0 (req->path);
- vollen = STRLEN_0 (req->volume + pathlen);
-
- state->resolve.type = RESOLVE_EXACT;
- state->resolve.ino = ntoh64 (req->ino);
- state->resolve.gen = ntoh64 (req->gen);
- state->resolve.path = gf_strdup (req->path);
-
- cmd = ntoh32 (req->cmd);
- switch (cmd) {
- case GF_LK_GETLK:
- state->cmd = F_GETLK;
- break;
- case GF_LK_SETLK:
- state->cmd = F_SETLK;
- break;
- case GF_LK_SETLKW:
- state->cmd = F_SETLKW;
- break;
- }
-
- state->type = ntoh32 (req->type);
- state->volume = gf_strdup (req->volume + pathlen);
-
- gf_flock_to_flock (&req->flock, &state->flock);
-
- switch (state->type) {
- case GF_LK_F_RDLCK:
- state->flock.l_type = F_RDLCK;
- break;
- case GF_LK_F_WRLCK:
- state->flock.l_type = F_WRLCK;
- break;
- case GF_LK_F_UNLCK:
- state->flock.l_type = F_UNLCK;
- break;
- }
-
- gf_resolve_and_resume (frame, server_inodelk_resume);
-
- return 0;
-}
-
-static int
-server_finodelk_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_finodelk_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->finodelk,
- state->volume, state->fd, state->cmd, &state->flock);
-
- return 0;
-err:
- server_finodelk_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno);
-
- return 0;
-}
-
-static int
-server_finodelk (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_finodelk_req_t *req = NULL;
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
-
- conn = SERVER_CONNECTION(frame);
-
- req = gf_param (hdr);
- state = CALL_STATE(frame);
-
- state->resolve.type = RESOLVE_EXACT;
- state->volume = gf_strdup (req->volume);
- state->resolve.fd_no = ntoh64 (req->fd);
- state->cmd = ntoh32 (req->cmd);
-
- switch (state->cmd) {
- case GF_LK_GETLK:
- state->cmd = F_GETLK;
- break;
- case GF_LK_SETLK:
- state->cmd = F_SETLK;
- break;
- case GF_LK_SETLKW:
- state->cmd = F_SETLKW;
- break;
- }
-
- state->type = ntoh32 (req->type);
-
- gf_flock_to_flock (&req->flock, &state->flock);
-
- switch (state->type) {
- case GF_LK_F_RDLCK:
- state->flock.l_type = F_RDLCK;
- break;
- case GF_LK_F_WRLCK:
- state->flock.l_type = F_WRLCK;
- break;
- case GF_LK_F_UNLCK:
- state->flock.l_type = F_UNLCK;
- break;
- }
-
- gf_resolve_and_resume (frame, server_finodelk_resume);
-
- return 0;
-}
-
-
-static int
-server_entrylk_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_entrylk_cbk,
- bound_xl, bound_xl->fops->entrylk,
- state->volume, &state->loc, state->name,
- state->cmd, state->type);
- return 0;
-err:
- server_entrylk_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno);
- return 0;
-}
-
-
-static int
-server_entrylk (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_entrylk_req_t *req = NULL;
- server_state_t *state = NULL;
- size_t pathlen = 0;
- size_t namelen = 0;
- size_t vollen = 0;
-
- req = gf_param (hdr);
- state = CALL_STATE (frame);
- pathlen = STRLEN_0 (req->path);
- namelen = ntoh64 (req->namelen);
- vollen = STRLEN_0(req->volume + pathlen + namelen);
-
- state->resolve.type = RESOLVE_EXACT;
- state->resolve.path = gf_strdup (req->path);
- state->resolve.ino = ntoh64 (req->ino);
- state->resolve.gen = ntoh64 (req->gen);
-
- if (namelen)
- state->name = gf_strdup (req->name + pathlen);
- state->volume = gf_strdup (req->volume + pathlen + namelen);
-
- state->cmd = ntoh32 (req->cmd);
- state->type = ntoh32 (req->type);
-
- gf_resolve_and_resume (frame, server_entrylk_resume);
-
- return 0;
-}
-
-static int
-server_fentrylk_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_fentrylk_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->fentrylk,
- state->volume, state->fd, state->name,
- state->cmd, state->type);
-
- return 0;
-err:
- server_fentrylk_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno);
- return 0;
-}
-
-static int
-server_fentrylk (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_fentrylk_req_t *req = NULL;
- server_state_t *state = NULL;
- size_t namelen = 0;
- size_t vollen = 0;
- server_connection_t *conn = NULL;
-
- conn = SERVER_CONNECTION (frame);
-
- req = gf_param (hdr);
- state = CALL_STATE(frame);
- vollen = STRLEN_0(req->volume + namelen);
-
- state->resolve.type = RESOLVE_EXACT;
- state->resolve.fd_no = ntoh64 (req->fd);
- state->cmd = ntoh32 (req->cmd);
- state->type = ntoh32 (req->type);
- namelen = ntoh64 (req->namelen);
- if (namelen)
- state->name = req->name;
- state->volume = gf_strdup (req->volume + namelen);
-
-
- gf_resolve_and_resume (frame, server_fentrylk_resume);
-
- return 0;
-}
-
-
-static int
-server_access_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_access_cbk,
- bound_xl, bound_xl->fops->access,
- &state->loc, state->mask);
- return 0;
-err:
- server_access_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno);
- return 0;
-}
-
-
-static int
-server_access (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_access_req_t *req = NULL;
- server_state_t *state = NULL;
-
- req = gf_param (hdr);
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.ino = hton64 (req->ino);
- state->resolve.gen = hton64 (req->gen);
- state->resolve.path = gf_strdup (req->path);
-
- state->mask = ntoh32 (req->mask);
-
- gf_resolve_and_resume (frame, server_access_resume);
-
- return 0;
-}
-
-
-static int
-server_symlink_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- state->loc.inode = inode_new (state->itable);
-
- STACK_WIND (frame, server_symlink_cbk,
- bound_xl, bound_xl->fops->symlink,
- state->name, &state->loc);
-
- return 0;
-err:
- server_symlink_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL, NULL, NULL, NULL);
- return 0;
-}
-
-
-
-static int
-server_symlink (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- server_state_t *state = NULL;
- gf_fop_symlink_req_t *req = NULL;
- size_t pathlen = 0;
- size_t baselen = 0;
-
- req = gf_param (hdr);
- state = CALL_STATE (frame);
- pathlen = STRLEN_0 (req->path);
- baselen = STRLEN_0 (req->bname + pathlen);
-
- state->resolve.type = RESOLVE_NOT;
- state->resolve.par = ntoh64 (req->par);
- state->resolve.gen = ntoh64 (req->gen);
- state->resolve.path = gf_strdup (req->path);
- state->resolve.bname = gf_strdup (req->bname + pathlen);
- state->name = gf_strdup (req->linkname + pathlen + baselen);
-
- gf_resolve_and_resume (frame, server_symlink_resume);
-
- return 0;
-}
-
-
-static int
-server_link_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
- int op_ret = 0;
- int op_errno = 0;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0) {
- op_ret = state->resolve.op_ret;
- op_errno = state->resolve.op_errno;
- goto err;
- }
-
- if (state->resolve2.op_ret != 0) {
- op_ret = state->resolve2.op_ret;
- op_errno = state->resolve2.op_errno;
- goto err;
- }
-
- state->loc2.inode = inode_ref (state->loc.inode);
-
- STACK_WIND (frame, server_link_cbk,
- bound_xl, bound_xl->fops->link,
- &state->loc, &state->loc2);
- return 0;
-err:
- server_link_cbk (frame, NULL, frame->this, op_ret, op_errno,
- NULL, NULL, NULL, NULL);
- return 0;
-}
-
-
-static int
-server_link (call_frame_t *frame, xlator_t *this,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_link_req_t *req = NULL;
- server_state_t *state = NULL;
- size_t oldpathlen = 0;
- size_t newpathlen = 0;
- size_t newbaselen = 0;
-
- req = gf_param (hdr);
- state = CALL_STATE (frame);
- oldpathlen = STRLEN_0 (req->oldpath);
- newpathlen = STRLEN_0 (req->newpath + oldpathlen);
- newbaselen = STRLEN_0 (req->newbname + oldpathlen + newpathlen);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.path = gf_strdup (req->oldpath);
- state->resolve.ino = ntoh64 (req->oldino);
- state->resolve.gen = ntoh64 (req->oldgen);
-
- state->resolve2.type = RESOLVE_NOT;
- state->resolve2.path = gf_strdup (req->newpath + oldpathlen);
- state->resolve2.bname = gf_strdup (req->newbname + oldpathlen + newpathlen);
- state->resolve2.par = ntoh64 (req->newpar);
- state->resolve2.gen = ntoh64 (req->newgen);
-
- gf_resolve_and_resume (frame, server_link_resume);
-
- return 0;
-}
-
-
-static int
-server_rename_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
- int op_ret = 0;
- int op_errno = 0;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0) {
- op_ret = state->resolve.op_ret;
- op_errno = state->resolve.op_errno;
- goto err;
- }
-
- if (state->resolve2.op_ret != 0) {
- op_ret = state->resolve2.op_ret;
- op_errno = state->resolve2.op_errno;
- goto err;
- }
-
- STACK_WIND (frame, server_rename_cbk,
- bound_xl, bound_xl->fops->rename,
- &state->loc, &state->loc2);
- return 0;
-err:
- server_rename_cbk (frame, NULL, frame->this, op_ret, op_errno,
- NULL, NULL, NULL, NULL, NULL);
- return 0;
-}
-
-
-static int
-server_rename (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_rename_req_t *req = NULL;
- server_state_t *state = NULL;
- size_t oldpathlen = 0;
- size_t oldbaselen = 0;
- size_t newpathlen = 0;
- size_t newbaselen = 0;
-
- req = gf_param (hdr);
-
- state = CALL_STATE (frame);
- oldpathlen = STRLEN_0 (req->oldpath);
- oldbaselen = STRLEN_0 (req->oldbname + oldpathlen);
- newpathlen = STRLEN_0 (req->newpath + oldpathlen + oldbaselen);
- newbaselen = STRLEN_0 (req->newbname + oldpathlen +
- oldbaselen + newpathlen);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.path = gf_strdup (req->oldpath);
- state->resolve.bname = gf_strdup (req->oldbname + oldpathlen);
- state->resolve.par = ntoh64 (req->oldpar);
- state->resolve.gen = ntoh64 (req->oldgen);
-
- state->resolve2.type = RESOLVE_MAY;
- state->resolve2.path = gf_strdup (req->newpath + oldpathlen + oldbaselen);
- state->resolve2.bname = gf_strdup (req->newbname + oldpathlen + oldbaselen +
- newpathlen);
- state->resolve2.par = ntoh64 (req->newpar);
- state->resolve2.gen = ntoh64 (req->newgen);
-
- gf_resolve_and_resume (frame, server_rename_resume);
-
- return 0;
-}
-
-static int
-server_lk_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_lk_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->lk,
- state->fd, state->cmd, &state->flock);
-
- return 0;
-
-err:
- server_lk_cbk (frame, NULL, frame->this,
- state->resolve.op_ret,
- state->resolve.op_errno,
- NULL);
- return 0;
-}
-
-/*
- * server_lk - lk function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params: parameter dictionary
- *
- * not for external reference
- */
-
-static int
-server_lk (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_lk_req_t *req = NULL;
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
-
- conn = SERVER_CONNECTION (frame);
-
- req = gf_param (hdr);
- state = CALL_STATE (frame);
-
- state->resolve.fd_no = ntoh64 (req->fd);
- state->cmd = ntoh32 (req->cmd);
- state->type = ntoh32 (req->type);
-
- switch (state->cmd) {
- case GF_LK_GETLK:
- state->cmd = F_GETLK;
- break;
- case GF_LK_SETLK:
- state->cmd = F_SETLK;
- break;
- case GF_LK_SETLKW:
- state->cmd = F_SETLKW;
- break;
- }
-
- gf_flock_to_flock (&req->flock, &state->flock);
-
- switch (state->type) {
- case GF_LK_F_RDLCK:
- state->flock.l_type = F_RDLCK;
- break;
- case GF_LK_F_WRLCK:
- state->flock.l_type = F_WRLCK;
- break;
- case GF_LK_F_UNLCK:
- state->flock.l_type = F_UNLCK;
- break;
- default:
- gf_log (bound_xl->name, GF_LOG_ERROR,
- "fd - %"PRId64" (%"PRId64"): Unknown lock type: %"PRId32"!",
- state->resolve.fd_no, state->fd->inode->ino, state->type);
- break;
- }
-
-
- gf_resolve_and_resume (frame, server_lk_resume);
-
- return 0;
-}
-
-/* xxx_MOPS */
-static int
-_volfile_update_checksum (xlator_t *this, char *key, uint32_t checksum)
-{
- server_conf_t *conf = NULL;
- struct _volfile_ctx *temp_volfile = NULL;
-
- conf = this->private;
- temp_volfile = conf->volfile;
-
- while (temp_volfile) {
- if ((NULL == key) && (NULL == temp_volfile->key))
- break;
- if ((NULL == key) || (NULL == temp_volfile->key)) {
- temp_volfile = temp_volfile->next;
- continue;
- }
- if (strcmp (temp_volfile->key, key) == 0)
- break;
- temp_volfile = temp_volfile->next;
- }
-
- if (!temp_volfile) {
- temp_volfile = GF_CALLOC (1, sizeof (struct _volfile_ctx),
- gf_server_mt_volfile_ctx);
-
- temp_volfile->next = conf->volfile;
- temp_volfile->key = (key)? gf_strdup (key): NULL;
- temp_volfile->checksum = checksum;
-
- conf->volfile = temp_volfile;
- goto out;
- }
-
- if (temp_volfile->checksum != checksum) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "the volume file got modified between earlier access "
- "and now, this may lead to inconsistency between "
- "clients, advised to remount client");
- temp_volfile->checksum = checksum;
- }
-
- out:
- return 0;
-}
-
-
-size_t
-build_volfile_path (xlator_t *this, const char *key, char *path,
- size_t path_len)
-{
- int ret = -1;
- int free_filename = 0;
- int free_conf_dir = 0;
- char *filename = NULL;
- char *conf_dir = CONFDIR;
- struct stat buf = {0,};
- data_t * conf_dir_data = NULL;
- char data_key[256] = {0,};
-
- /* Inform users that this option is changed now */
- ret = dict_get_str (this->options, "client-volume-filename",
- &filename);
- if (ret == 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "option 'client-volume-filename' is changed to "
- "'volume-filename.<key>' which now takes 'key' as an "
- "option to choose/fetch different files from server. "
- "Refer documentation or contact developers for more "
- "info. Currently defaulting to given file '%s'",
- filename);
- }
-
- if (key && !filename) {
- sprintf (data_key, "volume-filename.%s", key);
- ret = dict_get_str (this->options, data_key, &filename);
-
- if (ret < 0) {
-
- conf_dir_data = dict_get (this->options, "conf-dir");
- if (conf_dir_data) {
- /* Check whether the specified directory exists,
- or directory specified is non standard */
- ret = stat (conf_dir_data->data, &buf);
- if ((ret != 0) || !S_ISDIR (buf.st_mode)) {
- gf_log (this->name, GF_LOG_ERROR,
- "Directory '%s' doesn't"
- "exist, exiting.",
- conf_dir_data->data);
- ret = -1;
- goto out;
- }
- /* Make sure that conf-dir doesn't
- * contain ".." in path
- */
- if ((gf_strstr (conf_dir_data->data,
- "/", "..")) == -1) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "%s: invalid conf_dir",
- conf_dir_data->data);
- goto out;
- }
-
- /* Make sure that key doesn't
- * contain "../" in path
- */
-
- if ((gf_strstr (key, "/", "..")) == -1) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "%s: invalid key", key);
- goto out;
- }
-
- conf_dir = gf_strdup (conf_dir_data->data);
- free_conf_dir = 1;
- }
-
- ret = gf_asprintf (&filename, "%s/%s.vol",
- conf_dir, key);
- if (-1 == ret)
- goto out;
-
- free_filename = 1;
- }
- }
-
- if (!filename) {
- ret = dict_get_str (this->options,
- "volume-filename.default", &filename);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no default volume filename given, "
- "defaulting to %s", DEFAULT_VOLUME_FILE_PATH);
- filename = DEFAULT_VOLUME_FILE_PATH;
- }
- }
-
- ret = -1;
-
- if ((filename) && (path_len > strlen (filename))) {
- strcpy (path, filename);
- ret = strlen (filename);
- }
-
-out:
- if (free_conf_dir)
- GF_FREE (conf_dir);
-
- if (free_filename)
- GF_FREE (filename);
-
- return ret;
-}
-
-static int
-_validate_volfile_checksum (xlator_t *this, char *key,
- uint32_t checksum)
-{
- char filename[ZR_PATH_MAX] = {0,};
- server_conf_t *conf = NULL;
- struct _volfile_ctx *temp_volfile = NULL;
- int ret = 0;
- uint32_t local_checksum = 0;
-
- conf = this->private;
- temp_volfile = conf->volfile;
-
- if (!checksum)
- goto out;
-
- if (!temp_volfile) {
- ret = build_volfile_path (this, key, filename,
- sizeof (filename));
- if (ret <= 0)
- goto out;
- ret = open (filename, O_RDONLY);
- if (-1 == ret) {
- ret = 0;
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to open volume file (%s) : %s",
- filename, strerror (errno));
- goto out;
- }
- get_checksum_for_file (ret, &local_checksum);
- _volfile_update_checksum (this, key, local_checksum);
- close (ret);
- }
-
- temp_volfile = conf->volfile;
- while (temp_volfile) {
- if ((NULL == key) && (NULL == temp_volfile->key))
- break;
- if ((NULL == key) || (NULL == temp_volfile->key)) {
- temp_volfile = temp_volfile->next;
- continue;
- }
- if (strcmp (temp_volfile->key, key) == 0)
- break;
- temp_volfile = temp_volfile->next;
- }
-
- if (!temp_volfile)
- goto out;
-
- if ((temp_volfile->checksum) &&
- (checksum != temp_volfile->checksum))
- ret = -1;
-
-out:
- return ret;
-}
-
-/* Management Calls */
-/*
- * mop_getspec - getspec function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params:
- *
- */
-static int
-mop_getspec (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_hdr_common_t *_hdr = NULL;
- gf_mop_getspec_rsp_t *rsp = NULL;
- int32_t ret = -1;
- int32_t op_errno = ENOENT;
- int32_t gf_errno = 0;
- int32_t spec_fd = -1;
- size_t file_len = 0;
- size_t _hdrlen = 0;
- char filename[ZR_PATH_MAX] = {0,};
- struct stat stbuf = {0,};
- gf_mop_getspec_req_t *req = NULL;
- uint32_t checksum = 0;
- uint32_t flags = 0;
- uint32_t keylen = 0;
- char *key = NULL;
- server_conf_t *conf = NULL;
-
- req = gf_param (hdr);
- flags = ntoh32 (req->flags);
- keylen = ntoh32 (req->keylen);
- if (keylen) {
- key = req->key;
- }
-
- conf = frame->this->private;
-
- ret = build_volfile_path (frame->this, key, filename,
- sizeof (filename));
- if (ret > 0) {
- /* to allocate the proper buffer to hold the file data */
- ret = stat (filename, &stbuf);
- if (ret < 0){
- gf_log (frame->this->name, GF_LOG_ERROR,
- "Unable to stat %s (%s)",
- filename, strerror (errno));
- goto fail;
- }
-
- spec_fd = open (filename, O_RDONLY);
- if (spec_fd < 0) {
- gf_log (frame->this->name, GF_LOG_ERROR,
- "Unable to open %s (%s)",
- filename, strerror (errno));
- goto fail;
- }
- ret = 0;
- file_len = stbuf.st_size;
- if (conf->verify_volfile_checksum) {
- get_checksum_for_file (spec_fd, &checksum);
- _volfile_update_checksum (frame->this, key, checksum);
- }
- } else {
- errno = ENOENT;
- }
-
-fail:
- op_errno = errno;
-
- _hdrlen = gf_hdr_len (rsp, file_len + 1);
- _hdr = gf_hdr_new (rsp, file_len + 1);
- rsp = gf_param (_hdr);
-
- _hdr->rsp.op_ret = hton32 (ret);
- gf_errno = gf_errno_to_error (op_errno);
- _hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (file_len) {
- ret = read (spec_fd, rsp->spec, file_len);
- close (spec_fd);
- }
- protocol_server_reply (frame, GF_OP_TYPE_MOP_REPLY, GF_MOP_GETSPEC,
- _hdr, _hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-static int
-server_checksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- uint8_t *fchecksum, uint8_t *dchecksum)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_checksum_rsp_t *rsp = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- hdrlen = gf_hdr_len (rsp, NAME_MAX + 1 + NAME_MAX + 1);
- hdr = gf_hdr_new (rsp, NAME_MAX + 1 + NAME_MAX + 1);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (op_ret >= 0) {
- memcpy (rsp->fchecksum, fchecksum, NAME_MAX);
- rsp->fchecksum[NAME_MAX] = '\0';
- memcpy (rsp->dchecksum + NAME_MAX,
- dchecksum, NAME_MAX);
- rsp->dchecksum[NAME_MAX + NAME_MAX] = '\0';
- }
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_CHECKSUM,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-static int
-server_checksum_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
- int op_ret = 0;
- int op_errno = 0;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0) {
- op_ret = state->resolve.op_ret;
- op_errno = state->resolve.op_errno;
- goto err;
- }
-
- STACK_WIND (frame, server_checksum_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->checksum,
- &state->loc, state->flags);
-
- return 0;
-err:
- server_checksum_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL, NULL);
-
- return 0;
-}
-
-static int
-server_checksum (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_checksum_req_t *req = NULL;
- server_state_t *state = NULL;
-
- req = gf_param (hdr);
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_MAY;
- state->resolve.path = gf_strdup (req->path);
- state->resolve.gen = ntoh64 (req->gen);
- state->resolve.ino = ntoh64 (req->ino);
- state->flags = ntoh32 (req->flag);
-
- gf_resolve_and_resume (frame, server_checksum_resume);
-
- return 0;
-}
-
-
-static int
-server_rchecksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- uint32_t weak_checksum, uint8_t *strong_checksum)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_rchecksum_rsp_t *rsp = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- hdrlen = gf_hdr_len (rsp, MD5_DIGEST_LEN + 1);
- hdr = gf_hdr_new (rsp, MD5_DIGEST_LEN + 1);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (op_ret >= 0) {
- rsp->weak_checksum = weak_checksum;
-
- memcpy (rsp->strong_checksum,
- strong_checksum, MD5_DIGEST_LEN);
-
- rsp->strong_checksum[MD5_DIGEST_LEN] = '\0';
- }
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_RCHECKSUM,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-static int
-server_rchecksum_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
- int op_ret = 0;
- int op_errno = 0;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0) {
- op_ret = state->resolve.op_ret;
- op_errno = state->resolve.op_errno;
- goto err;
- }
-
- STACK_WIND (frame, server_rchecksum_cbk,
- bound_xl,
- bound_xl->fops->rchecksum,
- state->fd, state->offset, state->size);
-
- return 0;
-err:
- server_rchecksum_cbk (frame, NULL, frame->this, -1, EINVAL, 0, NULL);
-
- return 0;
-
-}
-
-static int
-server_rchecksum (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_rchecksum_req_t *req = NULL;
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
-
- conn = SERVER_CONNECTION(frame);
-
- req = gf_param (hdr);
-
- state = CALL_STATE(frame);
-
- state->resolve.type = RESOLVE_MAY;
- state->resolve.fd_no = ntoh64 (req->fd);
- state->offset = ntoh64 (req->offset);
- state->size = ntoh32 (req->len);
-
- gf_resolve_and_resume (frame, server_rchecksum_resume);
-
- return 0;
-}
-
-
-/*
- * mop_unlock - unlock management function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params: parameter dictionary
- *
- */
-static int
-mop_getvolume (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- return 0;
-}
-
-struct __get_xl_struct {
- const char *name;
- xlator_t *reply;
-};
-
-static void __check_and_set (xlator_t *each, void *data)
-{
- if (!strcmp (each->name,
- ((struct __get_xl_struct *) data)->name))
- ((struct __get_xl_struct *) data)->reply = each;
-}
-
-static xlator_t *
-get_xlator_by_name (xlator_t *some_xl, const char *name)
-{
- struct __get_xl_struct get = {
- .name = name,
- .reply = NULL
- };
-
- xlator_foreach (some_xl, __check_and_set, &get);
-
- return get.reply;
-}
-
-
-/*
- * mop_setvolume - setvolume management function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params: parameter dictionary
- *
- */
-static int
-mop_setvolume (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *req_hdr, size_t req_hdrlen,
- struct iobuf *iobuf)
-{
- server_connection_t *conn = NULL;
- server_conf_t *conf = NULL;
- gf_hdr_common_t *rsp_hdr = NULL;
- gf_mop_setvolume_req_t *req = NULL;
- gf_mop_setvolume_rsp_t *rsp = NULL;
- peer_info_t *peerinfo = NULL;
- int32_t ret = -1;
- int32_t op_ret = -1;
- int32_t op_errno = EINVAL;
- int32_t gf_errno = 0;
- dict_t *reply = NULL;
- dict_t *config_params = NULL;
- dict_t *params = NULL;
- char *name = NULL;
- char *version = NULL;
- char *process_uuid = NULL;
- xlator_t *xl = NULL;
- transport_t *trans = NULL;
- size_t rsp_hdrlen = -1;
- size_t dict_len = -1;
- size_t req_dictlen = -1;
- char *msg = NULL;
- char *volfile_key = NULL;
- uint32_t checksum = 0;
- int32_t lru_limit = 1024;
-
- params = dict_new ();
- reply = dict_new ();
-
- req = gf_param (req_hdr);
- req_dictlen = ntoh32 (req->dict_len);
- ret = dict_unserialize (req->buf, req_dictlen, &params);
-
- config_params = dict_copy_with_ref (frame->this->options, NULL);
- trans = TRANSPORT_FROM_FRAME(frame);
- conf = SERVER_CONF(frame);
-
- if (ret < 0) {
- ret = dict_set_str (reply, "ERROR",
- "Internal error: failed to unserialize "
- "request dictionary");
- if (ret < 0)
- gf_log (bound_xl->name, GF_LOG_DEBUG,
- "failed to set error msg \"%s\"",
- "Internal error: failed to unserialize "
- "request dictionary");
-
- op_ret = -1;
- op_errno = EINVAL;
- goto fail;
- }
-
- ret = dict_get_str (params, "process-uuid", &process_uuid);
- if (ret < 0) {
- ret = dict_set_str (reply, "ERROR",
- "UUID not specified");
- if (ret < 0)
- gf_log (bound_xl->name, GF_LOG_DEBUG,
- "failed to set error msg");
-
- op_ret = -1;
- op_errno = EINVAL;
- goto fail;
- }
-
-
- conn = gf_server_connection_get (frame->this, process_uuid);
- if (trans->xl_private != conn)
- trans->xl_private = conn;
-
- ret = dict_get_str (params, "protocol-version", &version);
- if (ret < 0) {
- ret = dict_set_str (reply, "ERROR",
- "No version number specified");
- if (ret < 0)
- gf_log (trans->xl->name, GF_LOG_DEBUG,
- "failed to set error msg");
-
- op_ret = -1;
- op_errno = EINVAL;
- goto fail;
- }
-
- ret = strcmp (version, GF_PROTOCOL_VERSION);
- if (ret != 0) {
- ret = gf_asprintf (&msg, "protocol version mismatch: client(%s) "
- "- server(%s)", version, GF_PROTOCOL_VERSION);
- if (-1 == ret) {
- gf_log (trans->xl->name, GF_LOG_ERROR,
- "gf_asprintf failed while setting up error msg");
- goto fail;
- }
- ret = dict_set_dynstr (reply, "ERROR", msg);
- if (ret < 0)
- gf_log (trans->xl->name, GF_LOG_DEBUG,
- "failed to set error msg");
-
- op_ret = -1;
- op_errno = EINVAL;
- goto fail;
- }
-
- ret = dict_get_str (params,
- "remote-subvolume", &name);
- if (ret < 0) {
- ret = dict_set_str (reply, "ERROR",
- "No remote-subvolume option specified");
- if (ret < 0)
- gf_log (trans->xl->name, GF_LOG_DEBUG,
- "failed to set error msg");
-
- op_ret = -1;
- op_errno = EINVAL;
- goto fail;
- }
-
- xl = get_xlator_by_name (frame->this, name);
- if (xl == NULL) {
- ret = gf_asprintf (&msg, "remote-subvolume \"%s\" is not found",
- name);
- if (-1 == ret) {
- gf_log (trans->xl->name, GF_LOG_ERROR,
- "gf_asprintf failed while setting error msg");
- goto fail;
- }
- ret = dict_set_dynstr (reply, "ERROR", msg);
- if (ret < 0)
- gf_log (trans->xl->name, GF_LOG_DEBUG,
- "failed to set error msg");
-
- op_ret = -1;
- op_errno = ENOENT;
- goto fail;
- }
-
- if (conf->verify_volfile_checksum) {
- ret = dict_get_uint32 (params, "volfile-checksum", &checksum);
- if (ret == 0) {
- ret = dict_get_str (params, "volfile-key",
- &volfile_key);
-
- ret = _validate_volfile_checksum (trans->xl,
- volfile_key,
- checksum);
- if (-1 == ret) {
- ret = dict_set_str (reply, "ERROR",
- "volume-file checksum "
- "varies from earlier "
- "access");
- if (ret < 0)
- gf_log (trans->xl->name, GF_LOG_DEBUG,
- "failed to set error msg");
-
- op_ret = -1;
- op_errno = ESTALE;
- goto fail;
- }
- }
- }
-
-
- peerinfo = &trans->peerinfo;
- ret = dict_set_static_ptr (params, "peer-info", peerinfo);
- if (ret < 0)
- gf_log (trans->xl->name, GF_LOG_DEBUG,
- "failed to set peer-info");
- ret = dict_set_str (params, "peer-info-name", peerinfo->identifier);
- if (ret < 0)
- gf_log (trans->xl->name, GF_LOG_DEBUG,
- "failed to set peer-info-name");
-
- if (conf->auth_modules == NULL) {
- gf_log (trans->xl->name, GF_LOG_ERROR,
- "Authentication module not initialized");
- }
-
- ret = gf_authenticate (params, config_params,
- conf->auth_modules);
- if (ret == AUTH_ACCEPT) {
- gf_log (trans->xl->name, GF_LOG_INFO,
- "accepted client from %s",
- peerinfo->identifier);
- op_ret = 0;
- conn->bound_xl = xl;
- ret = dict_set_str (reply, "ERROR", "Success");
- if (ret < 0)
- gf_log (trans->xl->name, GF_LOG_DEBUG,
- "failed to set error msg");
- } else {
- gf_log (trans->xl->name, GF_LOG_ERROR,
- "Cannot authenticate client from %s",
- peerinfo->identifier);
- op_ret = -1;
- op_errno = EACCES;
- ret = dict_set_str (reply, "ERROR", "Authentication failed");
- if (ret < 0)
- gf_log (bound_xl->name, GF_LOG_DEBUG,
- "failed to set error msg");
-
- goto fail;
- }
-
- if (conn->bound_xl == NULL) {
- ret = dict_set_str (reply, "ERROR",
- "Check volfile and handshake "
- "options in protocol/client");
- if (ret < 0)
- gf_log (trans->xl->name, GF_LOG_DEBUG,
- "failed to set error msg");
-
- op_ret = -1;
- op_errno = EACCES;
- goto fail;
- }
-
- if ((conn->bound_xl != NULL) &&
- (ret >= 0) &&
- (conn->bound_xl->itable == NULL)) {
- /* create inode table for this bound_xl, if one doesn't
- already exist */
- lru_limit = INODE_LRU_LIMIT (frame->this);
-
- gf_log (trans->xl->name, GF_LOG_TRACE,
- "creating inode table with lru_limit=%"PRId32", "
- "xlator=%s", lru_limit, conn->bound_xl->name);
-
- conn->bound_xl->itable =
- inode_table_new (lru_limit,
- conn->bound_xl);
- }
-
- ret = dict_set_str (reply, "process-uuid",
- xl->ctx->process_uuid);
-
- ret = dict_set_uint64 (reply, "transport-ptr",
- ((uint64_t) (long) trans));
-
-fail:
- dict_len = dict_serialized_length (reply);
- if (dict_len < 0) {
- gf_log ("server", GF_LOG_DEBUG,
- "failed to get serialized length of reply dict");
- op_ret = -1;
- op_errno = EINVAL;
- dict_len = 0;
- }
-
- rsp_hdr = gf_hdr_new (rsp, dict_len);
- rsp_hdrlen = gf_hdr_len (rsp, dict_len);
- rsp = gf_param (rsp_hdr);
-
- if (dict_len) {
- ret = dict_serialize (reply, rsp->buf);
- if (ret < 0) {
- gf_log ("server", GF_LOG_DEBUG,
- "failed to serialize reply dict");
- op_ret = -1;
- op_errno = -ret;
- }
- }
- rsp->dict_len = hton32 (dict_len);
-
- rsp_hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- rsp_hdr->rsp.op_errno = hton32 (gf_errno);
-
- protocol_server_reply (frame, GF_OP_TYPE_MOP_REPLY, GF_MOP_SETVOLUME,
- rsp_hdr, rsp_hdrlen, NULL, 0, NULL);
-
- dict_unref (params);
- dict_unref (reply);
- dict_unref (config_params);
-
- return 0;
-}
-
-
-static int
-mop_ping (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_hdr_common_t *rsp_hdr = NULL;
- gf_mop_ping_rsp_t *rsp = NULL;
- size_t rsp_hdrlen = 0;
-
- rsp_hdrlen = gf_hdr_len (rsp, 0);
- rsp_hdr = gf_hdr_new (rsp, 0);
-
- hdr->rsp.op_ret = 0;
-
- protocol_server_reply (frame, GF_OP_TYPE_MOP_REPLY, GF_MOP_PING,
- rsp_hdr, rsp_hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-static int
-mop_log (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_mop_log_req_t * req = NULL;
- char * msg = NULL;
- uint32_t msglen = 0;
-
- transport_t * trans = NULL;
-
- trans = TRANSPORT_FROM_FRAME (frame);
-
- req = gf_param (hdr);
- msglen = ntoh32 (req->msglen);
-
- if (msglen)
- msg = req->msg;
-
- gf_log_from_client (msg, trans->peerinfo.identifier);
-
- return 0;
-}
-
-
-/* ENOSYS operations (for backword compatibility) */
-static int
-server_setdents (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_hdr_common_t *rsp_hdr = NULL;
- gf_mop_ping_rsp_t *rsp = NULL; /* Using for NULL */
- size_t rsp_hdrlen = 0;
- int32_t gf_errno = 0;
-
- rsp_hdrlen = gf_hdr_len (rsp, 0);
- rsp_hdr = gf_hdr_new (rsp, 0);
-
- gf_errno = gf_errno_to_error (ENOSYS);
- hdr->rsp.op_errno = hton32 (gf_errno);
- hdr->rsp.op_ret = -1;
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_SETDENTS,
- rsp_hdr, rsp_hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/* */
-static int
-server_getdents (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_hdr_common_t *rsp_hdr = NULL;
- gf_mop_ping_rsp_t *rsp = NULL; /* Using for NULL */
- size_t rsp_hdrlen = 0;
- int32_t gf_errno = 0;
-
- rsp_hdrlen = gf_hdr_len (rsp, 0);
- rsp_hdr = gf_hdr_new (rsp, 0);
-
- gf_errno = gf_errno_to_error (ENOSYS);
- hdr->rsp.op_errno = hton32 (gf_errno);
- hdr->rsp.op_ret = -1;
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_GETDENTS,
- rsp_hdr, rsp_hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/* */
-static int
-server_lock_notify (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_hdr_common_t *rsp_hdr = NULL;
- gf_mop_ping_rsp_t *rsp = NULL; /* Using for NULL */
- size_t rsp_hdrlen = 0;
- int32_t gf_errno = 0;
-
- rsp_hdrlen = gf_hdr_len (rsp, 0);
- rsp_hdr = gf_hdr_new (rsp, 0);
-
- gf_errno = gf_errno_to_error (ENOSYS);
- hdr->rsp.op_errno = hton32 (gf_errno);
- hdr->rsp.op_ret = -1;
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_LOCK_NOTIFY,
- rsp_hdr, rsp_hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/* */
-static int
-server_lock_fnotify (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_hdr_common_t *rsp_hdr = NULL;
- gf_mop_ping_rsp_t *rsp = NULL; /* Using for NULL */
- size_t rsp_hdrlen = 0;
- int32_t gf_errno = 0;
-
- rsp_hdrlen = gf_hdr_len (rsp, 0);
- rsp_hdr = gf_hdr_new (rsp, 0);
-
- gf_errno = gf_errno_to_error (ENOSYS);
- hdr->rsp.op_errno = hton32 (gf_errno);
- hdr->rsp.op_ret = -1;
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_LOCK_FNOTIFY,
- rsp_hdr, rsp_hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-static int
-mop_stats (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_hdr_common_t *rsp_hdr = NULL;
- gf_mop_ping_rsp_t *rsp = NULL; /* Using for NULL */
- size_t rsp_hdrlen = 0;
- int32_t gf_errno = 0;
-
- rsp_hdrlen = gf_hdr_len (rsp, 0);
- rsp_hdr = gf_hdr_new (rsp, 0);
-
- gf_errno = gf_errno_to_error (ENOSYS);
- hdr->rsp.op_errno = hton32 (gf_errno);
- hdr->rsp.op_ret = -1;
-
- protocol_server_reply (frame, GF_OP_TYPE_MOP_REPLY, GF_MOP_STATS,
- rsp_hdr, rsp_hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-/*
- * get_frame_for_transport - get call frame for specified transport object
- *
- * @trans: transport object
- *
- */
-static call_frame_t *
-get_frame_for_transport (transport_t *trans)
-{
- call_frame_t *frame = NULL;
- call_pool_t *pool = NULL;
- server_connection_t *conn = NULL;
- server_state_t *state = NULL;;
-
- GF_VALIDATE_OR_GOTO("server", trans, out);
-
- if (trans->xl && trans->xl->ctx)
- pool = trans->xl->ctx->pool;
- GF_VALIDATE_OR_GOTO("server", pool, out);
-
- frame = create_frame (trans->xl, pool);
- GF_VALIDATE_OR_GOTO("server", frame, out);
-
- state = GF_CALLOC (1, sizeof (*state),
- gf_server_mt_server_state_t);
- GF_VALIDATE_OR_GOTO("server", state, out);
-
- conn = trans->xl_private;
- if (conn) {
- if (conn->bound_xl)
- state->itable = conn->bound_xl->itable;
- state->bound_xl = conn->bound_xl;
- }
-
- state->trans = transport_ref (trans);
- state->resolve.fd_no = -1;
- state->resolve2.fd_no = -1;
-
- frame->root->trans = conn;
- frame->root->state = state; /* which socket */
- frame->root->unique = 0; /* which call */
-
-out:
- return frame;
-}
-
-
-static int
-server_decode_groups (call_frame_t *frame, gf_hdr_common_t *hdr)
-{
- int i = 0;
-
- if ((!frame) || (!hdr))
- return 0;
-
- frame->root->ngrps = ntoh32 (hdr->req.ngrps);
- if (frame->root->ngrps == 0)
- return 0;
-
- if (frame->root->ngrps > GF_REQUEST_MAXGROUPS)
- return -1;
-
- for (; i < frame->root->ngrps; ++i)
- frame->root->groups[i] = ntoh32 (hdr->req.groups[i]);
-
- return 0;
-}
-
-
-/*
- * get_frame_for_call - create a frame into the capable of
- * generating and replying the reply packet by itself.
- * By making a call with this frame, the last UNWIND
- * function will have all needed state from its
- * frame_t->root to send reply.
- * @trans:
- * @blk:
- * @params:
- *
- * not for external reference
- */
-static call_frame_t *
-get_frame_for_call (transport_t *trans, gf_hdr_common_t *hdr)
-{
- call_frame_t *frame = NULL;
-
- frame = get_frame_for_transport (trans);
-
- frame->root->op = ntoh32 (hdr->op);
- frame->root->type = ntoh32 (hdr->type);
-
- frame->root->uid = ntoh32 (hdr->req.uid);
- frame->root->unique = ntoh64 (hdr->callid); /* which call */
- frame->root->gid = ntoh32 (hdr->req.gid);
- frame->root->pid = ntoh32 (hdr->req.pid);
- frame->root->lk_owner = ntoh64 (hdr->req.lk_owner);
- server_decode_groups (frame, hdr);
-
- return frame;
-}
-
-/*
- * prototype of operations function for each of mop and
- * fop at server protocol level
- *
- * @frame: call frame pointer
- * @bound_xl: the xlator that this frame is bound to
- * @params: parameters dictionary
- *
- * to be used by protocol interpret, _not_ for exterenal reference
- */
-typedef int32_t (*gf_op_t) (call_frame_t *frame, xlator_t *bould_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf);
-
-
-static gf_op_t gf_fops[] = {
- [GF_PROTO_FOP_STAT] = server_stat,
- [GF_PROTO_FOP_READLINK] = server_readlink,
- [GF_PROTO_FOP_MKNOD] = server_mknod,
- [GF_PROTO_FOP_MKDIR] = server_mkdir,
- [GF_PROTO_FOP_UNLINK] = server_unlink,
- [GF_PROTO_FOP_RMDIR] = server_rmdir,
- [GF_PROTO_FOP_SYMLINK] = server_symlink,
- [GF_PROTO_FOP_RENAME] = server_rename,
- [GF_PROTO_FOP_LINK] = server_link,
- [GF_PROTO_FOP_TRUNCATE] = server_truncate,
- [GF_PROTO_FOP_OPEN] = server_open,
- [GF_PROTO_FOP_READ] = server_readv,
- [GF_PROTO_FOP_WRITE] = server_writev,
- [GF_PROTO_FOP_STATFS] = server_statfs,
- [GF_PROTO_FOP_FLUSH] = server_flush,
- [GF_PROTO_FOP_FSYNC] = server_fsync,
- [GF_PROTO_FOP_SETXATTR] = server_setxattr,
- [GF_PROTO_FOP_GETXATTR] = server_getxattr,
- [GF_PROTO_FOP_FGETXATTR] = server_fgetxattr,
- [GF_PROTO_FOP_FSETXATTR] = server_fsetxattr,
- [GF_PROTO_FOP_REMOVEXATTR] = server_removexattr,
- [GF_PROTO_FOP_OPENDIR] = server_opendir,
- [GF_PROTO_FOP_FSYNCDIR] = server_fsyncdir,
- [GF_PROTO_FOP_ACCESS] = server_access,
- [GF_PROTO_FOP_CREATE] = server_create,
- [GF_PROTO_FOP_FTRUNCATE] = server_ftruncate,
- [GF_PROTO_FOP_FSTAT] = server_fstat,
- [GF_PROTO_FOP_LK] = server_lk,
- [GF_PROTO_FOP_LOOKUP] = server_lookup,
- [GF_PROTO_FOP_READDIR] = server_readdir,
- [GF_PROTO_FOP_READDIRP] = server_readdirp,
- [GF_PROTO_FOP_INODELK] = server_inodelk,
- [GF_PROTO_FOP_FINODELK] = server_finodelk,
- [GF_PROTO_FOP_ENTRYLK] = server_entrylk,
- [GF_PROTO_FOP_FENTRYLK] = server_fentrylk,
- [GF_PROTO_FOP_CHECKSUM] = server_checksum,
- [GF_PROTO_FOP_RCHECKSUM] = server_rchecksum,
- [GF_PROTO_FOP_XATTROP] = server_xattrop,
- [GF_PROTO_FOP_FXATTROP] = server_fxattrop,
- [GF_PROTO_FOP_SETATTR] = server_setattr,
- [GF_PROTO_FOP_FSETATTR] = server_fsetattr,
- [GF_PROTO_FOP_SETDENTS] = server_setdents,
- [GF_PROTO_FOP_GETDENTS] = server_getdents,
- [GF_PROTO_FOP_LOCK_NOTIFY] = server_lock_notify,
- [GF_PROTO_FOP_LOCK_FNOTIFY] = server_lock_fnotify,
-};
-
-
-
-static gf_op_t gf_mops[] = {
- [GF_MOP_SETVOLUME] = mop_setvolume,
- [GF_MOP_GETVOLUME] = mop_getvolume,
- [GF_MOP_GETSPEC] = mop_getspec,
- [GF_MOP_PING] = mop_ping,
- [GF_MOP_LOG] = mop_log,
- [GF_MOP_STATS] = mop_stats,
-};
-
-static gf_op_t gf_cbks[] = {
- [GF_CBK_FORGET] = server_forget,
- [GF_CBK_RELEASE] = server_release,
- [GF_CBK_RELEASEDIR] = server_releasedir
-};
-
-static int
-protocol_server_interpret (xlator_t *this, transport_t *trans,
- char *hdr_p, size_t hdrlen, struct iobuf *iobuf)
-{
- server_connection_t *conn = NULL;
- gf_hdr_common_t *hdr = NULL;
- xlator_t *bound_xl = NULL;
- call_frame_t *frame = NULL;
- peer_info_t *peerinfo = NULL;
- int32_t type = -1;
- int32_t op = -1;
- int32_t ret = -1;
-
- hdr = (gf_hdr_common_t *)hdr_p;
- type = ntoh32 (hdr->type);
- op = ntoh32 (hdr->op);
-
- conn = trans->xl_private;
- if (conn)
- bound_xl = conn->bound_xl;
-
- peerinfo = &trans->peerinfo;
- switch (type) {
- case GF_OP_TYPE_FOP_REQUEST:
- if ((op < 0) || (op >= GF_PROTO_FOP_MAXVALUE)) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid fop %"PRId32" from client %s",
- op, peerinfo->identifier);
- break;
- }
- if (bound_xl == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "Received fop %"PRId32" before "
- "authentication.", op);
- break;
- }
- frame = get_frame_for_call (trans, hdr);
- frame->op = op;
- ret = gf_fops[op] (frame, bound_xl, hdr, hdrlen, iobuf);
- break;
-
- case GF_OP_TYPE_MOP_REQUEST:
- if ((op < 0) || (op >= GF_MOP_MAXVALUE)) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid mop %"PRId32" from client %s",
- op, peerinfo->identifier);
- break;
- }
- frame = get_frame_for_call (trans, hdr);
- frame->op = op;
- ret = gf_mops[op] (frame, bound_xl, hdr, hdrlen, iobuf);
- break;
-
- case GF_OP_TYPE_CBK_REQUEST:
- if ((op < 0) || (op >= GF_CBK_MAXVALUE)) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid cbk %"PRId32" from client %s",
- op, peerinfo->identifier);
- break;
- }
- if (bound_xl == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "Received cbk %d before authentication.", op);
- break;
- }
-
- frame = get_frame_for_call (trans, hdr);
- ret = gf_cbks[op] (frame, bound_xl, hdr, hdrlen, iobuf);
- break;
-
- default:
- break;
- }
-
- return ret;
-}
-
-
-/*
- * server_fd - fdtable dump function for server protocol
- * @this:
- *
- */
-static int
-server_fd (xlator_t *this)
-{
- server_conf_t *conf = NULL;
- server_connection_t *trav = NULL;
- char key[GF_DUMP_MAX_BUF_LEN];
- int i = 1;
- int ret = -1;
-
- if (!this)
- return -1;
-
- conf = this->private;
- if (!conf) {
- gf_log (this->name, GF_LOG_WARNING,
- "conf null in xlator");
- return -1;
- }
-
- gf_proc_dump_add_section("xlator.protocol.server.conn");
-
- ret = pthread_mutex_trylock (&conf->mutex);
- if (ret) {
- gf_log("", GF_LOG_WARNING, "Unable to dump fdtable"
- " errno: %d", errno);
- return -1;
- }
-
- list_for_each_entry (trav, &conf->conns, list) {
- if (trav->id) {
- gf_proc_dump_build_key(key,
- "xlator.protocol.server.conn",
- "%d.id", i);
- gf_proc_dump_write(key, "%s", trav->id);
- }
-
- gf_proc_dump_build_key(key,"xlator.protocol.server.conn",
- "%d.ref",i)
- gf_proc_dump_write(key, "%d", trav->ref);
- if (trav->bound_xl) {
- gf_proc_dump_build_key(key,
- "xlator.protocol.server.conn",
- "%d.bound_xl", i);
- gf_proc_dump_write(key, "%s", trav->bound_xl->name);
- }
-
- gf_proc_dump_build_key(key,
- "xlator.protocol.server.conn",
- "%d.id", i);
- fdtable_dump(trav->fdtable,key);
- i++;
- }
- pthread_mutex_unlock (&conf->mutex);
-
-
- return 0;
- }
-
-static int
-server_priv (xlator_t *this)
-{
- return 0;
-}
-
-static int
-server_inode (xlator_t *this)
-{
- server_conf_t *conf = NULL;
- server_connection_t *trav = NULL;
- char key[GF_DUMP_MAX_BUF_LEN];
- int i = 1;
- int ret = -1;
-
- if (!this)
- return -1;
-
- conf = this->private;
- if (!conf) {
- gf_log (this->name, GF_LOG_WARNING,
- "conf null in xlator");
- return -1;
- }
-
- ret = pthread_mutex_trylock (&conf->mutex);
- if (ret) {
- gf_log("", GF_LOG_WARNING, "Unable to dump itable"
- " errno: %d", errno);
- return -1;
- }
-
- list_for_each_entry (trav, &conf->conns, list) {
- if (trav->bound_xl && trav->bound_xl->itable) {
- gf_proc_dump_build_key(key,
- "xlator.protocol.server.conn",
- "%d.bound_xl.%s",
- i, trav->bound_xl->name);
- inode_table_dump(trav->bound_xl->itable,key);
- i++;
- }
- }
- pthread_mutex_unlock (&conf->mutex);
-
-
- return 0;
-}
-
-
-static void
-get_auth_types (dict_t *this, char *key, data_t *value, void *data)
-{
- dict_t *auth_dict = NULL;
- char *saveptr = NULL;
- char *tmp = NULL;
- char *key_cpy = NULL;
- int32_t ret = -1;
-
- auth_dict = data;
- key_cpy = gf_strdup (key);
- GF_VALIDATE_OR_GOTO("server", key_cpy, out);
-
- tmp = strtok_r (key_cpy, ".", &saveptr);
- ret = strcmp (tmp, "auth");
- if (ret == 0) {
- tmp = strtok_r (NULL, ".", &saveptr);
- if (strcmp (tmp, "ip") == 0) {
- /* TODO: backward compatibility, remove when
- newer versions are available */
- tmp = "addr";
- gf_log ("server", GF_LOG_WARNING,
- "assuming 'auth.ip' to be 'auth.addr'");
- }
- ret = dict_set_dynptr (auth_dict, tmp, NULL, 0);
- if (ret < 0) {
- gf_log ("server", GF_LOG_DEBUG,
- "failed to dict_set_dynptr");
- }
- }
-
- GF_FREE (key_cpy);
-out:
- return;
-}
-
-
-static int
-validate_auth_options (xlator_t *this, dict_t *dict)
-{
- int ret = -1;
- int error = 0;
- xlator_list_t *trav = NULL;
- data_pair_t *pair = NULL;
- char *saveptr = NULL;
- char *tmp = NULL;
- char *key_cpy = NULL;
-
- trav = this->children;
- while (trav) {
- error = -1;
- for (pair = dict->members_list; pair; pair = pair->next) {
- key_cpy = gf_strdup (pair->key);
- tmp = strtok_r (key_cpy, ".", &saveptr);
- ret = strcmp (tmp, "auth");
- if (ret == 0) {
- /* for module type */
- tmp = strtok_r (NULL, ".", &saveptr);
- /* for volume name */
- tmp = strtok_r (NULL, ".", &saveptr);
- }
-
- if (strcmp (tmp, trav->xlator->name) == 0) {
- error = 0;
- GF_FREE (key_cpy);
- break;
- }
- GF_FREE (key_cpy);
- }
- if (-1 == error) {
- gf_log (this->name, GF_LOG_ERROR,
- "volume '%s' defined as subvolume, but no "
- "authentication defined for the same",
- trav->xlator->name);
- break;
- }
- trav = trav->next;
- }
-
- return error;
-}
-
-int32_t
-mem_acct_init (xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init (this, gf_server_mt_end + 1);
-
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- " failed");
- return ret;
- }
-
- return ret;
-}
-
-
-/*
- * init - called during server protocol initialization
- *
- * @this:
- *
- */
-int
-init (xlator_t *this)
-{
- int32_t ret = -1;
- transport_t *trans = NULL;
- server_conf_t *conf = NULL;
- data_t *data = NULL;
- data_t *trace = NULL;
-
- if (this->children == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "protocol/server should have subvolume");
- goto out;
- }
-
- trans = transport_load (this->options, this);
- if (trans == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to load transport");
- goto out;
- }
-
- ret = transport_listen (trans);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to bind/listen on socket");
- goto out;
- }
-
- conf = GF_CALLOC (1, sizeof (server_conf_t),
- gf_server_mt_server_conf_t);
- GF_VALIDATE_OR_GOTO(this->name, conf, out);
-
- INIT_LIST_HEAD (&conf->conns);
- pthread_mutex_init (&conf->mutex, NULL);
-
- conf->trans = trans;
-
- conf->auth_modules = dict_new ();
- GF_VALIDATE_OR_GOTO(this->name, conf->auth_modules, out);
-
- dict_foreach (this->options, get_auth_types,
- conf->auth_modules);
- ret = validate_auth_options (this, this->options);
- if (ret == -1) {
- /* logging already done in validate_auth_options function. */
- goto out;
- }
-
- ret = gf_auth_init (this, conf->auth_modules);
- if (ret) {
- dict_unref (conf->auth_modules);
- goto out;
- }
-
- this->private = conf;
-
- ret = dict_get_int32 (this->options, "inode-lru-limit",
- &conf->inode_lru_limit);
- if (ret < 0) {
- conf->inode_lru_limit = 1024;
- }
-
- ret = dict_get_int32 (this->options, "limits.transaction-size",
- &conf->max_block_size);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "defaulting limits.transaction-size to %d",
- DEFAULT_BLOCK_SIZE);
- conf->max_block_size = DEFAULT_BLOCK_SIZE;
- }
-
- conf->verify_volfile_checksum = 1;
- data = dict_get (this->options, "verify-volfile-checksum");
- if (data) {
- ret = gf_string2boolean(data->data,
- &conf->verify_volfile_checksum);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "wrong value for verify-volfile-checksum");
- conf->verify_volfile_checksum = 1;
- }
- }
-
- trace = dict_get (this->options, "trace");
- if (trace) {
- if (gf_string2boolean (trace->data,
- &conf->trace) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "'trace' takes on only boolean values.");
- return -1;
- }
- }
-
-#ifndef GF_DARWIN_HOST_OS
- {
- struct rlimit lim;
-
- lim.rlim_cur = 1048576;
- lim.rlim_max = 1048576;
-
- if (setrlimit (RLIMIT_NOFILE, &lim) == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "WARNING: Failed to set 'ulimit -n 1M': %s",
- strerror(errno));
- lim.rlim_cur = 65536;
- lim.rlim_max = 65536;
-
- if (setrlimit (RLIMIT_NOFILE, &lim) == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "Failed to set max open fd to 64k: %s",
- strerror(errno));
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "max open fd set to 64k");
- }
- }
- }
-#endif
- this->graph->top = this;
-
- ret = 0;
-out:
- return ret;
-}
-
-
-
-static int
-protocol_server_pollin (xlator_t *this, transport_t *trans)
-{
- char *hdr = NULL;
- size_t hdrlen = 0;
- int ret = -1;
- struct iobuf *iobuf = NULL;
-
-
- ret = transport_receive (trans, &hdr, &hdrlen, &iobuf);
-
- if (ret == 0)
- ret = protocol_server_interpret (this, trans, hdr,
- hdrlen, iobuf);
-
- /* TODO: use mem-pool */
- GF_FREE (hdr);
-
- return ret;
-}
-
-
-/*
- * fini - finish function for server protocol, called before
- * unloading server protocol.
- *
- * @this:
- *
- */
-void
-fini (xlator_t *this)
-{
- server_conf_t *conf = this->private;
-
- GF_VALIDATE_OR_GOTO(this->name, conf, out);
-
- if (conf->auth_modules) {
- dict_unref (conf->auth_modules);
- }
-
- GF_FREE (conf);
- this->private = NULL;
-out:
- return;
-}
-
-/*
- * server_protocol_notify - notify function for server protocol
- * @this:
- * @trans:
- * @event:
- *
- */
-int
-notify (xlator_t *this, int32_t event, void *data, ...)
-{
- int ret = 0;
- transport_t *trans = NULL;
- peer_info_t *peerinfo = NULL;
- peer_info_t *myinfo = NULL;
-
- trans = data;
- if (!trans) {
- gf_log (this->name, GF_LOG_ERROR, "!trans");
- goto out;
- }
-
- peerinfo = &(trans->peerinfo);
- myinfo = &(trans->myinfo);
-
- switch (event) {
- case GF_EVENT_POLLIN:
- ret = protocol_server_pollin (this, trans);
- break;
- case GF_EVENT_POLLERR:
- {
- gf_log (trans->xl->name, GF_LOG_INFO, "%s disconnected",
- peerinfo->identifier);
-
- ret = -1;
- transport_disconnect (trans);
- if (trans->xl_private == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "POLLERR received on (%s) even before "
- "handshake with (%s) is successful",
- myinfo->identifier, peerinfo->identifier);
- } else {
- /*
- * FIXME: shouldn't we check for return value?
- * what should be done if cleanup fails?
- */
- gf_server_connection_cleanup (this, trans->xl_private);
- }
- }
- break;
-
- case GF_EVENT_TRANSPORT_CLEANUP:
- {
- if (trans->xl_private) {
- gf_server_connection_put (this, trans->xl_private);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "transport (%s) cleaned up even before "
- "handshake with (%s) is successful",
- myinfo->identifier, peerinfo->identifier);
- }
- }
- break;
-
- default:
- default_notify (this, event, data);
- break;
- }
-out:
- return ret;
-}
-
-
-struct xlator_fops fops = {
-};
-
-struct xlator_cbks cbks = {
-};
-
-struct xlator_dumpops dumpops = {
- .inode = server_inode,
- .priv = server_priv,
- .fd = server_fd,
-};
-
-
-struct volume_options options[] = {
- { .key = {"transport-type"},
- .value = {"tcp", "socket", "ib-verbs", "unix", "ib-sdp",
- "tcp/server", "ib-verbs/server"},
- .type = GF_OPTION_TYPE_STR
- },
- { .key = {"volume-filename.*"},
- .type = GF_OPTION_TYPE_PATH,
- },
- { .key = {"inode-lru-limit"},
- .type = GF_OPTION_TYPE_INT,
- .min = 0,
- .max = (1 * GF_UNIT_MB)
- },
- { .key = {"client-volume-filename"},
- .type = GF_OPTION_TYPE_PATH
- },
- { .key = {"verify-volfile-checksum"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"trace"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"conf-dir"},
- .type = GF_OPTION_TYPE_PATH,
- },
-
- { .key = {NULL} },
-};
diff --git a/xlators/protocol/legacy/server/src/server-protocol.h b/xlators/protocol/legacy/server/src/server-protocol.h
deleted file mode 100644
index 56d23fdbfab..00000000000
--- a/xlators/protocol/legacy/server/src/server-protocol.h
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _SERVER_PROTOCOL_H_
-#define _SERVER_PROTOCOL_H_
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <pthread.h>
-
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-#include "call-stub.h"
-#include "fd.h"
-#include "byte-order.h"
-#include "server-mem-types.h"
-#include "authenticate.h"
-#include "transport.h"
-
-#define DEFAULT_BLOCK_SIZE 4194304 /* 4MB */
-#define DEFAULT_VOLUME_FILE_PATH CONFDIR "/glusterfs.vol"
-
-typedef struct _server_state server_state_t;
-
-struct _locker {
- struct list_head lockers;
- char *volume;
- loc_t loc;
- fd_t *fd;
- pid_t pid;
-};
-
-struct _lock_table {
- struct list_head file_lockers;
- struct list_head dir_lockers;
- gf_lock_t lock;
- size_t count;
-};
-
-
-/* private structure per connection (transport object)
- * used as transport_t->xl_private
- */
-struct _server_connection {
- struct list_head list;
- char *id;
- int ref;
- int active_transports;
- pthread_mutex_t lock;
- char disconnected;
- fdtable_t *fdtable;
- struct _lock_table *ltable;
- xlator_t *bound_xl;
-};
-
-typedef struct _server_connection server_connection_t;
-
-
-server_connection_t *
-gf_server_connection_get (xlator_t *this, const char *id);
-
-void
-gf_server_connection_put (xlator_t *this, server_connection_t *conn);
-
-int
-gf_server_connection_cleanup (xlator_t *this, server_connection_t *conn);
-
-struct _volfile_ctx {
- struct _volfile_ctx *next;
- char *key;
- uint32_t checksum;
-};
-
-typedef struct {
- struct _volfile_ctx *volfile;
-
- dict_t *auth_modules;
- transport_t *trans;
- int32_t max_block_size;
- int32_t inode_lru_limit;
- pthread_mutex_t mutex;
- struct list_head conns;
- gf_boolean_t verify_volfile_checksum;
- gf_boolean_t trace;
-} server_conf_t;
-
-
-typedef enum {
- RESOLVE_MUST = 1,
- RESOLVE_NOT,
- RESOLVE_MAY,
- RESOLVE_DONTCARE,
- RESOLVE_EXACT
-} server_resolve_type_t;
-
-
-struct resolve_comp {
- char *basename;
- ino_t ino;
- uint64_t gen;
- inode_t *inode;
-};
-
-typedef struct {
- server_resolve_type_t type;
- uint64_t fd_no;
- ino_t ino;
- uint64_t gen;
- ino_t par;
- char *path;
- char *bname;
- char *resolved;
- int op_ret;
- int op_errno;
- loc_t deep_loc;
- struct resolve_comp *components;
- int comp_count;
-} server_resolve_t;
-
-
-typedef int (*server_resume_fn_t) (call_frame_t *frame, xlator_t *bound_xl);
-
-int
-gf_resolve_and_resume (call_frame_t *frame, server_resume_fn_t fn);
-
-struct _server_state {
- transport_t *trans;
- xlator_t *bound_xl;
- inode_table_t *itable;
-
- server_resume_fn_t resume_fn;
-
- loc_t loc;
- loc_t loc2;
- server_resolve_t resolve;
- server_resolve_t resolve2;
-
- /* used within resolve_and_resume */
- loc_t *loc_now;
- server_resolve_t *resolve_now;
-
- struct iatt stbuf;
- int valid;
-
- fd_t *fd;
- int flags;
- int wbflags;
- struct iobuf *iobuf;
- struct iobref *iobref;
-
- size_t size;
- off_t offset;
- mode_t mode;
- dev_t dev;
- size_t nr_count;
- int cmd;
- int type;
- char *name;
- int name_len;
-
- int mask;
- char is_revalidate;
- dict_t *dict;
- struct flock flock;
- const char *volume;
- dir_entry_t *entry;
-};
-
-
-#endif
diff --git a/xlators/protocol/legacy/server/src/server-resolve.c b/xlators/protocol/legacy/server/src/server-resolve.c
deleted file mode 100644
index b5a1d2d0dd6..00000000000
--- a/xlators/protocol/legacy/server/src/server-resolve.c
+++ /dev/null
@@ -1,660 +0,0 @@
-/*
- Copyright (c) 2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "server-protocol.h"
-#include "server-helpers.h"
-
-#include "compat-errno.h"
-
-void
-gf_server_print_request (call_frame_t *frame);
-
-static int
-server_resolve_all (call_frame_t *frame);
-static int
-resolve_entry_simple (call_frame_t *frame);
-static int
-resolve_inode_simple (call_frame_t *frame);
-static int
-resolve_path_simple (call_frame_t *frame);
-
-
-static int
-component_count (const char *path)
-{
- int count = 0;
- const char *trav = NULL;
-
- trav = path;
-
- for (trav = path; *trav; trav++) {
- if (*trav == '/')
- count++;
- }
-
- return count + 2;
-}
-
-
-static int
-prepare_components (call_frame_t *frame)
-{
- server_state_t *state = NULL;
- xlator_t *this = NULL;
- server_resolve_t *resolve = NULL;
- char *resolved = NULL;
- int count = 0;
- struct resolve_comp *components = NULL;
- int i = 0;
- char *trav = NULL;
-
-
- state = CALL_STATE (frame);
- this = frame->this;
- resolve = state->resolve_now;
-
- resolved = gf_strdup (resolve->path);
- resolve->resolved = resolved;
-
- count = component_count (resolve->path);
- components = GF_CALLOC (sizeof (*components), count,
- gf_server_mt_resolve_comp);
- resolve->components = components;
-
- components[0].basename = "";
- components[0].ino = 1;
- components[0].gen = 0;
- components[0].inode = state->itable->root;
-
- i = 1;
- for (trav = resolved; *trav; trav++) {
- if (*trav == '/') {
- components[i].basename = trav + 1;
- *trav = 0;
- i++;
- }
- }
-
- return 0;
-}
-
-
-static int
-resolve_loc_touchup (call_frame_t *frame)
-{
- server_state_t *state = NULL;
- server_resolve_t *resolve = NULL;
- loc_t *loc = NULL;
- char *path = NULL;
- int ret = 0;
-
- state = CALL_STATE (frame);
-
- resolve = state->resolve_now;
- loc = state->loc_now;
-
- if (!loc->path) {
- if (loc->parent && resolve->bname) {
- ret = inode_path (loc->parent, resolve->bname, &path);
- } else if (loc->inode) {
- ret = inode_path (loc->inode, NULL, &path);
- }
-
- if (!path)
- path = gf_strdup (resolve->path);
-
- loc->path = path;
- }
-
- loc->name = strrchr (loc->path, '/');
- if (loc->name)
- loc->name++;
-
- if (!loc->parent && loc->inode) {
- loc->parent = inode_parent (loc->inode, 0, NULL);
- }
-
- return 0;
-}
-
-
-static int
-resolve_deep_continue (call_frame_t *frame)
-{
- server_state_t *state = NULL;
- xlator_t *this = NULL;
- server_resolve_t *resolve = NULL;
- int ret = 0;
-
- state = CALL_STATE (frame);
- this = frame->this;
- resolve = state->resolve_now;
-
- resolve->op_ret = 0;
- resolve->op_errno = 0;
-
- if (resolve->par)
- ret = resolve_entry_simple (frame);
- else if (resolve->ino)
- ret = resolve_inode_simple (frame);
- else if (resolve->path)
- ret = resolve_path_simple (frame);
-
- resolve_loc_touchup (frame);
-
- server_resolve_all (frame);
-
- return 0;
-}
-
-
-static int
-resolve_deep_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, inode_t *inode, struct iatt *buf,
- dict_t *xattr, struct iatt *postparent)
-{
- server_state_t *state = NULL;
- server_resolve_t *resolve = NULL;
- struct resolve_comp *components = NULL;
- int i = 0;
- inode_t *link_inode = NULL;
-
- state = CALL_STATE (frame);
- resolve = state->resolve_now;
- components = resolve->components;
-
- i = (long) cookie;
-
- if (op_ret == -1) {
- goto get_out_of_here;
- }
-
- if (i != 0) {
- /* no linking for root inode */
- link_inode = inode_link (inode, resolve->deep_loc.parent,
- resolve->deep_loc.name, buf);
- inode_lookup (link_inode);
- components[i].inode = link_inode;
- link_inode = NULL;
- }
-
- loc_wipe (&resolve->deep_loc);
-
- i++; /* next component */
-
- if (!components[i].basename) {
- /* all components of the path are resolved */
- goto get_out_of_here;
- }
-
- /* join the current component with the path resolved until now */
- *(components[i].basename - 1) = '/';
-
- resolve->deep_loc.path = gf_strdup (resolve->resolved);
- resolve->deep_loc.parent = inode_ref (components[i-1].inode);
- resolve->deep_loc.inode = inode_new (state->itable);
- resolve->deep_loc.name = components[i].basename;
-
- STACK_WIND_COOKIE (frame, resolve_deep_cbk, (void *) (long) i,
- BOUND_XL (frame), BOUND_XL (frame)->fops->lookup,
- &resolve->deep_loc, NULL);
- return 0;
-
-get_out_of_here:
- resolve_deep_continue (frame);
- return 0;
-}
-
-
-static int
-resolve_path_deep (call_frame_t *frame)
-{
- server_state_t *state = NULL;
- xlator_t *this = NULL;
- server_resolve_t *resolve = NULL;
- int i = 0;
-
- state = CALL_STATE (frame);
- this = frame->this;
- resolve = state->resolve_now;
-
- gf_log (BOUND_XL (frame)->name, GF_LOG_DEBUG,
- "RESOLVE %s() seeking deep resolution of %s",
- gf_fop_list[frame->root->op], resolve->path);
-
- prepare_components (frame);
-
- /* start from the root */
- resolve->deep_loc.inode = state->itable->root;
- resolve->deep_loc.path = gf_strdup ("/");
- resolve->deep_loc.name = "";
-
- STACK_WIND_COOKIE (frame, resolve_deep_cbk, (void *) (long) i,
- BOUND_XL (frame), BOUND_XL (frame)->fops->lookup,
- &resolve->deep_loc, NULL);
- return 0;
-}
-
-
-static int
-resolve_path_simple (call_frame_t *frame)
-{
- server_state_t *state = NULL;
- xlator_t *this = NULL;
- server_resolve_t *resolve = NULL;
- struct resolve_comp *components = NULL;
- int ret = -1;
- int par_idx = 0;
- int ino_idx = 0;
- int i = 0;
-
- state = CALL_STATE (frame);
- this = frame->this;
- resolve = state->resolve_now;
- components = resolve->components;
-
- if (!components) {
- resolve->op_ret = -1;
- resolve->op_errno = ENOENT;
- goto out;
- }
-
- for (i = 0; components[i].basename; i++) {
- par_idx = ino_idx;
- ino_idx = i;
- }
-
- if (!components[par_idx].inode) {
- resolve->op_ret = -1;
- resolve->op_errno = ENOENT;
- goto out;
- }
-
- if (!components[ino_idx].inode &&
- (resolve->type == RESOLVE_MUST || resolve->type == RESOLVE_EXACT)) {
- resolve->op_ret = -1;
- resolve->op_errno = ENOENT;
- goto out;
- }
-
- if (components[ino_idx].inode && resolve->type == RESOLVE_NOT) {
- resolve->op_ret = -1;
- resolve->op_errno = EEXIST;
- goto out;
- }
-
- if (components[ino_idx].inode)
- state->loc_now->inode = inode_ref (components[ino_idx].inode);
- state->loc_now->parent = inode_ref (components[par_idx].inode);
-
- ret = 0;
-
-out:
- return ret;
-}
-
-/*
- Check if the requirements are fulfilled by entries in the inode cache itself
- Return value:
- <= 0 - simple resolution was decisive and complete (either success or failure)
- > 0 - indecisive, need to perform deep resolution
-*/
-
-static int
-resolve_entry_simple (call_frame_t *frame)
-{
- server_state_t *state = NULL;
- xlator_t *this = NULL;
- server_resolve_t *resolve = NULL;
- inode_t *parent = NULL;
- inode_t *inode = NULL;
- int ret = 0;
-
- state = CALL_STATE (frame);
- this = frame->this;
- resolve = state->resolve_now;
-
- parent = inode_get (state->itable, resolve->par, 0);
- if (!parent) {
- /* simple resolution is indecisive. need to perform
- deep resolution */
- resolve->op_ret = -1;
- resolve->op_errno = ENOENT;
- ret = 1;
-
- inode = inode_grep (state->itable, parent, resolve->bname);
- if (inode != NULL) {
- gf_log (this->name, GF_LOG_DEBUG, "%"PRId64": inode "
- "(pointer:%p ino: %"PRIu64") present but parent"
- " is NULL for path (%s)", frame->root->unique,
- inode, inode->ino, resolve->path);
- inode_unref (inode);
- }
- goto out;
- }
-
- if (parent->ino != 1 && parent->generation != resolve->gen) {
- /* simple resolution is decisive - request was for a
- stale handle */
- resolve->op_ret = -1;
- resolve->op_errno = ENOENT;
- ret = -1;
- goto out;
- }
-
- /* expected @parent was found from the inode cache */
- state->loc_now->parent = inode_ref (parent);
-
- inode = inode_grep (state->itable, parent, resolve->bname);
- if (!inode) {
- switch (resolve->type) {
- case RESOLVE_DONTCARE:
- case RESOLVE_NOT:
- ret = 0;
- break;
- case RESOLVE_MAY:
- ret = 1;
- break;
- default:
- resolve->op_ret = -1;
- resolve->op_errno = ENOENT;
- ret = 1;
- break;
- }
-
- goto out;
- }
-
- if (resolve->type == RESOLVE_NOT) {
- gf_log (this->name, GF_LOG_DEBUG, "inode (pointer: %p ino:%"
- PRIu64") found for path (%s) while type is RESOLVE_NOT",
- inode, inode->ino, resolve->path);
- resolve->op_ret = -1;
- resolve->op_errno = EEXIST;
- ret = -1;
- goto out;
- }
-
- ret = 0;
-
- state->loc_now->inode = inode_ref (inode);
-
-out:
- if (parent)
- inode_unref (parent);
-
- if (inode)
- inode_unref (inode);
-
- return ret;
-}
-
-
-static int
-server_resolve_entry (call_frame_t *frame)
-{
- server_state_t *state = NULL;
- xlator_t *this = NULL;
- server_resolve_t *resolve = NULL;
- int ret = 0;
- loc_t *loc = NULL;
-
- state = CALL_STATE (frame);
- this = frame->this;
- resolve = state->resolve_now;
- loc = state->loc_now;
-
- ret = resolve_entry_simple (frame);
-
- if (ret > 0) {
- loc_wipe (loc);
- resolve_path_deep (frame);
- return 0;
- }
-
- if (ret == 0)
- resolve_loc_touchup (frame);
-
- server_resolve_all (frame);
-
- return 0;
-}
-
-
-static int
-resolve_inode_simple (call_frame_t *frame)
-{
- server_state_t *state = NULL;
- xlator_t *this = NULL;
- server_resolve_t *resolve = NULL;
- inode_t *inode = NULL;
- int ret = 0;
-
- state = CALL_STATE (frame);
- this = frame->this;
- resolve = state->resolve_now;
-
- if (resolve->type == RESOLVE_EXACT) {
- inode = inode_get (state->itable, resolve->ino, resolve->gen);
- } else {
- inode = inode_get (state->itable, resolve->ino, 0);
- }
-
- if (!inode) {
- resolve->op_ret = -1;
- resolve->op_errno = ENOENT;
- ret = 1;
- goto out;
- }
-
- if (inode->ino != 1 && inode->generation != resolve->gen) {
- resolve->op_ret = -1;
- resolve->op_errno = ENOENT;
- ret = -1;
- goto out;
- }
-
- ret = 0;
-
- state->loc_now->inode = inode_ref (inode);
-
-out:
- if (inode)
- inode_unref (inode);
-
- return ret;
-}
-
-
-static int
-server_resolve_inode (call_frame_t *frame)
-{
- server_state_t *state = NULL;
- xlator_t *this = NULL;
- server_resolve_t *resolve = NULL;
- int ret = 0;
- loc_t *loc = NULL;
-
- state = CALL_STATE (frame);
- this = frame->this;
- resolve = state->resolve_now;
- loc = state->loc_now;
-
- ret = resolve_inode_simple (frame);
-
- if (ret > 0) {
- loc_wipe (loc);
- resolve_path_deep (frame);
- return 0;
- }
-
- if (ret == 0)
- resolve_loc_touchup (frame);
-
- server_resolve_all (frame);
-
- return 0;
-}
-
-
-static int
-server_resolve_fd (call_frame_t *frame)
-{
- server_state_t *state = NULL;
- xlator_t *this = NULL;
- server_resolve_t *resolve = NULL;
- server_connection_t *conn = NULL;
- uint64_t fd_no = -1;
-
- state = CALL_STATE (frame);
- this = frame->this;
- resolve = state->resolve_now;
- conn = SERVER_CONNECTION (frame);
-
- fd_no = resolve->fd_no;
-
- state->fd = gf_fd_fdptr_get (conn->fdtable, fd_no);
-
- if (!state->fd) {
- resolve->op_ret = -1;
- resolve->op_errno = EBADFD;
- }
-
- server_resolve_all (frame);
-
- return 0;
-}
-
-
-static int
-server_resolve (call_frame_t *frame)
-{
- server_state_t *state = NULL;
- xlator_t *this = NULL;
- server_resolve_t *resolve = NULL;
-
- state = CALL_STATE (frame);
- this = frame->this;
- resolve = state->resolve_now;
-
- if (resolve->fd_no != -1) {
-
- server_resolve_fd (frame);
-
- } else if (resolve->par) {
-
- server_resolve_entry (frame);
-
- } else if (resolve->ino) {
-
- server_resolve_inode (frame);
-
- } else if (resolve->path) {
-
- resolve_path_deep (frame);
-
- } else {
-
- resolve->op_ret = -1;
- resolve->op_errno = EINVAL;
-
- server_resolve_all (frame);
- }
-
- return 0;
-}
-
-
-static int
-server_resolve_done (call_frame_t *frame)
-{
- server_state_t *state = NULL;
- xlator_t *bound_xl = NULL;
-
- state = CALL_STATE (frame);
- bound_xl = BOUND_XL (frame);
-
- gf_server_print_request (frame);
-
- state->resume_fn (frame, bound_xl);
-
- return 0;
-}
-
-
-/*
- * This function is called multiple times, once per resolving one location/fd.
- * state->resolve_now is used to decide which location/fd is to be resolved now
- */
-static int
-server_resolve_all (call_frame_t *frame)
-{
- server_state_t *state = NULL;
- xlator_t *this = NULL;
-
- this = frame->this;
- state = CALL_STATE (frame);
-
- if (state->resolve_now == NULL) {
-
- state->resolve_now = &state->resolve;
- state->loc_now = &state->loc;
-
- server_resolve (frame);
-
- } else if (state->resolve_now == &state->resolve) {
-
- state->resolve_now = &state->resolve2;
- state->loc_now = &state->loc2;
-
- server_resolve (frame);
-
- } else if (state->resolve_now == &state->resolve2) {
-
- server_resolve_done (frame);
-
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "Invalid pointer for state->resolve_now");
- }
-
- return 0;
-}
-
-
-int
-gf_resolve_and_resume (call_frame_t *frame, server_resume_fn_t fn)
-{
- server_state_t *state = NULL;
- xlator_t *this = NULL;
-
- state = CALL_STATE (frame);
- state->resume_fn = fn;
-
- this = frame->this;
-
- server_resolve_all (frame);
-
- return 0;
-}
diff --git a/xlators/protocol/legacy/transport/Makefile.am b/xlators/protocol/legacy/transport/Makefile.am
deleted file mode 100644
index e2f97437c12..00000000000
--- a/xlators/protocol/legacy/transport/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = socket $(IBVERBS_SUBDIR)
-
-CLEANFILES =
diff --git a/xlators/protocol/legacy/transport/ib-verbs/src/Makefile.am b/xlators/protocol/legacy/transport/ib-verbs/src/Makefile.am
deleted file mode 100644
index 3db7aff9871..00000000000
--- a/xlators/protocol/legacy/transport/ib-verbs/src/Makefile.am
+++ /dev/null
@@ -1,19 +0,0 @@
-# TODO : need to change transportdir
-
-transport_LTLIBRARIES = ib-verbs.la
-transportdir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/transport
-
-ib_verbs_la_LDFLAGS = -module -avoidversion
-
-ib_verbs_la_SOURCES = ib-verbs.c name.c
-ib_verbs_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
- -libverbs $(top_builddir)/xlators/protocol/legacy/lib/src/libgfproto.la
-
-noinst_HEADERS = ib-verbs.h name.h ib-verbs-mem-types.h
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) \
- -I$(top_srcdir)/xlators/protocol/legacy/transport/ib-verbs \
- -I$(top_srcdir)/xlators/protocol/legacy/lib/src
-
-CLEANFILES = *~
diff --git a/xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs-mem-types.h b/xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs-mem-types.h
deleted file mode 100644
index bac559646fc..00000000000
--- a/xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs-mem-types.h
+++ /dev/null
@@ -1,39 +0,0 @@
-
-/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef __IB_VERBS_MEM_TYPES_H__
-#define __IB_VERBS_MEM_TYPES_H__
-
-#include "mem-types.h"
-
-enum gf_ib_verbs_mem_types_ {
- gf_ibv_mt_ib_verbs_private_t = gf_common_mt_end + 1,
- gf_ibv_mt_ib_verbs_ioq_t,
- gf_ibv_mt_transport_t,
- gf_ibv_mt_ib_verbs_local_t,
- gf_ibv_mt_ib_verbs_post_t,
- gf_ibv_mt_char,
- gf_ibv_mt_qpent,
- gf_ibv_mt_ib_verbs_device_t,
- gf_ibv_mt_end
-};
-#endif
-
diff --git a/xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs.c b/xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs.c
deleted file mode 100644
index 85228bf4e58..00000000000
--- a/xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs.c
+++ /dev/null
@@ -1,2617 +0,0 @@
-/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "dict.h"
-#include "glusterfs.h"
-#include "transport.h"
-#include "protocol.h"
-#include "logging.h"
-#include "xlator.h"
-#include "name.h"
-#include "ib-verbs.h"
-#include <signal.h>
-
-int32_t
-gf_resolve_ip6 (const char *hostname,
- uint16_t port,
- int family,
- void **dnscache,
- struct addrinfo **addr_info);
-
-static uint16_t
-ib_verbs_get_local_lid (struct ibv_context *context,
- int32_t port)
-{
- struct ibv_port_attr attr;
-
- if (ibv_query_port (context, port, &attr))
- return 0;
-
- return attr.lid;
-}
-
-static const char *
-get_port_state_str(enum ibv_port_state pstate)
-{
- switch (pstate) {
- case IBV_PORT_DOWN: return "PORT_DOWN";
- case IBV_PORT_INIT: return "PORT_INIT";
- case IBV_PORT_ARMED: return "PORT_ARMED";
- case IBV_PORT_ACTIVE: return "PORT_ACTIVE";
- case IBV_PORT_ACTIVE_DEFER: return "PORT_ACTIVE_DEFER";
- default: return "invalid state";
- }
-}
-
-static int32_t
-ib_check_active_port (struct ibv_context *ctx, uint8_t port)
-{
- struct ibv_port_attr port_attr;
-
- int32_t ret = 0;
- const char *state_str = NULL;
-
- if (!ctx) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "Error in supplied context");
- return -1;
- }
-
- ret = ibv_query_port (ctx, port, &port_attr);
-
- if (ret) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "Failed to query port %u properties", port);
- return -1;
- }
-
- state_str = get_port_state_str (port_attr.state);
- gf_log ("transport/ib-verbs", GF_LOG_TRACE,
- "Infiniband PORT: (%u) STATE: (%s)",
- port, state_str);
-
- if (port_attr.state == IBV_PORT_ACTIVE)
- return 0;
-
- return -1;
-}
-
-static int32_t
-ib_get_active_port (struct ibv_context *ib_ctx)
-{
- struct ibv_device_attr ib_device_attr;
-
- int32_t ret = -1;
- uint8_t ib_port = 0;
-
- if (!ib_ctx) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "Error in supplied context");
- return -1;
- }
- if (ibv_query_device (ib_ctx, &ib_device_attr)) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "Failed to query device properties");
- return -1;
- }
-
- for (ib_port = 1; ib_port <= ib_device_attr.phys_port_cnt; ++ib_port) {
- ret = ib_check_active_port (ib_ctx, ib_port);
- if (ret == 0)
- return ib_port;
-
- gf_log ("transport/ib-verbs", GF_LOG_TRACE,
- "Port:(%u) not active", ib_port);
- continue;
- }
- return ret;
-}
-
-
-
-static void
-ib_verbs_put_post (ib_verbs_queue_t *queue,
- ib_verbs_post_t *post)
-{
- pthread_mutex_lock (&queue->lock);
- if (post->prev) {
- queue->active_count--;
- post->prev->next = post->next;
- }
- if (post->next)
- post->next->prev = post->prev;
- post->prev = &queue->passive_posts;
- post->next = post->prev->next;
- post->prev->next = post;
- post->next->prev = post;
- queue->passive_count++;
- pthread_mutex_unlock (&queue->lock);
-}
-
-
-static ib_verbs_post_t *
-ib_verbs_new_post (ib_verbs_device_t *device, int32_t len)
-{
- ib_verbs_post_t *post;
-
- post = (ib_verbs_post_t *) GF_CALLOC (1, sizeof (*post),
- gf_ibv_mt_ib_verbs_post_t);
- if (!post)
- return NULL;
-
- post->buf_size = len;
-
- post->buf = valloc (len);
- if (!post->buf) {
- GF_FREE (post);
- return NULL;
- }
-
- post->mr = ibv_reg_mr (device->pd,
- post->buf,
- post->buf_size,
- IBV_ACCESS_LOCAL_WRITE);
- if (!post->mr) {
- free (post->buf);
- GF_FREE (post);
- return NULL;
- }
-
- return post;
-}
-
-
-static ib_verbs_post_t *
-ib_verbs_get_post (ib_verbs_queue_t *queue)
-{
- ib_verbs_post_t *post;
-
- pthread_mutex_lock (&queue->lock);
- {
- post = queue->passive_posts.next;
- if (post == &queue->passive_posts)
- post = NULL;
-
- if (post) {
- if (post->prev)
- post->prev->next = post->next;
- if (post->next)
- post->next->prev = post->prev;
- post->prev = &queue->active_posts;
- post->next = post->prev->next;
- post->prev->next = post;
- post->next->prev = post;
- post->reused++;
- queue->active_count++;
- }
- }
- pthread_mutex_unlock (&queue->lock);
-
- return post;
-}
-
-void
-ib_verbs_destroy_post (ib_verbs_post_t *post)
-{
- ibv_dereg_mr (post->mr);
- free (post->buf);
- GF_FREE (post);
-}
-
-
-static int32_t
-__ib_verbs_quota_get (ib_verbs_peer_t *peer)
-{
- int32_t ret = -1;
- ib_verbs_private_t *priv = peer->trans->private;
-
- if (priv->connected && peer->quota > 0) {
- ret = peer->quota--;
- }
-
- return ret;
-}
-
-/*
- static int32_t
- ib_verbs_quota_get (ib_verbs_peer_t *peer)
- {
- int32_t ret = -1;
- ib_verbs_private_t *priv = peer->trans->private;
-
- pthread_mutex_lock (&priv->write_mutex);
- {
- ret = __ib_verbs_quota_get (peer);
- }
- pthread_mutex_unlock (&priv->write_mutex);
-
- return ret;
- }
-*/
-
-static void
-__ib_verbs_ioq_entry_free (ib_verbs_ioq_t *entry)
-{
- list_del_init (&entry->list);
- if (entry->iobref)
- iobref_unref (entry->iobref);
-
- /* TODO: use mem-pool */
- GF_FREE (entry->buf);
-
- /* TODO: use mem-pool */
- GF_FREE (entry);
-}
-
-
-static void
-__ib_verbs_ioq_flush (ib_verbs_peer_t *peer)
-{
- ib_verbs_ioq_t *entry = NULL, *dummy = NULL;
-
- list_for_each_entry_safe (entry, dummy, &peer->ioq, list) {
- __ib_verbs_ioq_entry_free (entry);
- }
-}
-
-
-static int32_t
-__ib_verbs_disconnect (transport_t *this)
-{
- ib_verbs_private_t *priv = this->private;
- int32_t ret = 0;
-
- if (priv->connected || priv->tcp_connected) {
- fcntl (priv->sock, F_SETFL, O_NONBLOCK);
- if (shutdown (priv->sock, SHUT_RDWR) != 0) {
- gf_log ("transport/ib-verbs",
- GF_LOG_DEBUG,
- "shutdown () - error: %s",
- strerror (errno));
- ret = -errno;
- priv->tcp_connected = 0;
- }
- }
-
- return ret;
-}
-
-
-static int32_t
-ib_verbs_post_send (struct ibv_qp *qp,
- ib_verbs_post_t *post,
- int32_t len)
-{
- struct ibv_sge list = {
- .addr = (unsigned long) post->buf,
- .length = len,
- .lkey = post->mr->lkey
- };
-
- struct ibv_send_wr wr = {
- .wr_id = (unsigned long) post,
- .sg_list = &list,
- .num_sge = 1,
- .opcode = IBV_WR_SEND,
- .send_flags = IBV_SEND_SIGNALED,
- }, *bad_wr;
-
- if (!qp)
- return -1;
-
- return ibv_post_send (qp, &wr, &bad_wr);
-}
-
-
-static int32_t
-__ib_verbs_ioq_churn_entry (ib_verbs_peer_t *peer, ib_verbs_ioq_t *entry)
-{
- int32_t ret = 0, quota = 0;
- ib_verbs_private_t *priv = peer->trans->private;
- ib_verbs_device_t *device = priv->device;
- ib_verbs_options_t *options = &priv->options;
- ib_verbs_post_t *post = NULL;
- int32_t len = 0;
-
- quota = __ib_verbs_quota_get (peer);
- if (quota > 0) {
- post = ib_verbs_get_post (&device->sendq);
- if (!post)
- post = ib_verbs_new_post (device,
- (options->send_size + 2048));
-
- len = iov_length ((const struct iovec *)&entry->vector,
- entry->count);
- if (len >= (options->send_size + 2048)) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "increase value of option 'transport.ib-verbs."
- "work-request-send-size' (given=> %"PRId64") "
- "to send bigger (%d) messages",
- (options->send_size + 2048), len);
- return -1;
- }
-
- iov_unload (post->buf,
- (const struct iovec *)&entry->vector,
- entry->count);
-
- ret = ib_verbs_post_send (peer->qp, post, len);
- if (!ret) {
- __ib_verbs_ioq_entry_free (entry);
- ret = len;
- } else {
- gf_log ("transport/ib-verbs", GF_LOG_DEBUG,
- "ibv_post_send failed with ret = %d", ret);
- ib_verbs_put_post (&device->sendq, post);
- __ib_verbs_disconnect (peer->trans);
- ret = -1;
- }
- }
-
- return ret;
-}
-
-
-static int32_t
-__ib_verbs_ioq_churn (ib_verbs_peer_t *peer)
-{
- ib_verbs_ioq_t *entry = NULL;
- int32_t ret = 0;
-
- while (!list_empty (&peer->ioq))
- {
- /* pick next entry */
- entry = peer->ioq_next;
-
- ret = __ib_verbs_ioq_churn_entry (peer, entry);
-
- if (ret <= 0)
- break;
- }
-
- /*
- list_for_each_entry_safe (entry, dummy, &peer->ioq, list) {
- ret = __ib_verbs_ioq_churn_entry (peer, entry);
- if (ret <= 0) {
- break;
- }
- }
- */
-
- return ret;
-}
-
-static int32_t
-__ib_verbs_quota_put (ib_verbs_peer_t *peer)
-{
- int32_t ret;
-
- peer->quota++;
- ret = peer->quota;
-
- if (!list_empty (&peer->ioq)) {
- ret = __ib_verbs_ioq_churn (peer);
- }
-
- return ret;
-}
-
-
-static int32_t
-ib_verbs_quota_put (ib_verbs_peer_t *peer)
-{
- int32_t ret;
- ib_verbs_private_t *priv = peer->trans->private;
-
- pthread_mutex_lock (&priv->write_mutex);
- {
- ret = __ib_verbs_quota_put (peer);
- }
- pthread_mutex_unlock (&priv->write_mutex);
-
- return ret;
-}
-
-
-static int32_t
-ib_verbs_post_recv (struct ibv_srq *srq,
- ib_verbs_post_t *post)
-{
- struct ibv_sge list = {
- .addr = (unsigned long) post->buf,
- .length = post->buf_size,
- .lkey = post->mr->lkey
- };
-
- struct ibv_recv_wr wr = {
- .wr_id = (unsigned long) post,
- .sg_list = &list,
- .num_sge = 1,
- }, *bad_wr;
-
- return ibv_post_srq_recv (srq, &wr, &bad_wr);
-}
-
-
-static int32_t
-ib_verbs_writev (transport_t *this,
- ib_verbs_ioq_t *entry)
-{
- int32_t ret = 0, need_append = 1;
- ib_verbs_private_t *priv = this->private;
- ib_verbs_peer_t *peer = NULL;
-
- pthread_mutex_lock (&priv->write_mutex);
- {
- if (!priv->connected) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "ib-verbs is not connected to post a "
- "send request");
- ret = -1;
- goto unlock;
- }
-
- peer = &priv->peer;
- if (list_empty (&peer->ioq)) {
- ret = __ib_verbs_ioq_churn_entry (peer, entry);
- if (ret != 0) {
- need_append = 0;
- }
- }
-
- if (need_append) {
- list_add_tail (&entry->list, &peer->ioq);
- }
- }
-unlock:
- pthread_mutex_unlock (&priv->write_mutex);
- return ret;
-}
-
-
-static ib_verbs_ioq_t *
-ib_verbs_ioq_new (char *buf, int len, struct iovec *vector,
- int count, struct iobref *iobref)
-{
- ib_verbs_ioq_t *entry = NULL;
-
- /* TODO: use mem-pool */
- entry = GF_CALLOC (1, sizeof (*entry), gf_ibv_mt_ib_verbs_ioq_t);
-
- assert (count <= (MAX_IOVEC-2));
-
- entry->header.colonO[0] = ':';
- entry->header.colonO[1] = 'O';
- entry->header.colonO[2] = '\0';
- entry->header.version = 42;
- entry->header.size1 = hton32 (len);
- entry->header.size2 = hton32 (iov_length (vector, count));
-
- entry->vector[0].iov_base = &entry->header;
- entry->vector[0].iov_len = sizeof (entry->header);
- entry->count++;
-
- entry->vector[1].iov_base = buf;
- entry->vector[1].iov_len = len;
- entry->count++;
-
- if (vector && count)
- {
- memcpy (&entry->vector[2], vector, sizeof (*vector) * count);
- entry->count += count;
- }
-
- if (iobref)
- entry->iobref = iobref_ref (iobref);
-
- entry->buf = buf;
-
- INIT_LIST_HEAD (&entry->list);
-
- return entry;
-}
-
-
-static int32_t
-ib_verbs_submit (transport_t *this, char *buf, int32_t len,
- struct iovec *vector, int count, struct iobref *iobref)
-{
- int32_t ret = 0;
- ib_verbs_ioq_t *entry = NULL;
-
- entry = ib_verbs_ioq_new (buf, len, vector, count, iobref);
- ret = ib_verbs_writev (this, entry);
-
- if (ret > 0) {
- ret = 0;
- }
-
- return ret;
-}
-
-static int
-ib_verbs_receive (transport_t *this, char **hdr_p, size_t *hdrlen_p,
- struct iobuf **iobuf_p)
-{
- ib_verbs_private_t *priv = this->private;
- /* TODO: return error if !priv->connected, check with locks */
- /* TODO: boundry checks for data_ptr/offset */
- char *copy_from = NULL;
- ib_verbs_header_t *header = NULL;
- uint32_t size1, size2, data_len = 0;
- char *hdr = NULL;
- struct iobuf *iobuf = NULL;
- int32_t ret = 0;
-
- pthread_mutex_lock (&priv->recv_mutex);
- {
-/*
- while (!priv->data_ptr)
- pthread_cond_wait (&priv->recv_cond, &priv->recv_mutex);
-*/
-
- copy_from = priv->data_ptr + priv->data_offset;
-
- priv->data_ptr = NULL;
- data_len = priv->data_len;
- pthread_cond_broadcast (&priv->recv_cond);
- }
- pthread_mutex_unlock (&priv->recv_mutex);
-
- header = (ib_verbs_header_t *)copy_from;
- if (strcmp (header->colonO, ":O")) {
- gf_log ("transport/ib-verbs", GF_LOG_DEBUG,
- "%s: corrupt header received", this->xl->name);
- ret = -1;
- goto err;
- }
-
- size1 = ntoh32 (header->size1);
- size2 = ntoh32 (header->size2);
-
- if (data_len != (size1 + size2 + sizeof (*header))) {
- gf_log ("transport/ib-verbs", GF_LOG_DEBUG,
- "%s: sizeof data read from transport is not equal "
- "to the size specified in the header",
- this->xl->name);
- ret = -1;
- goto err;
- }
-
- copy_from += sizeof (*header);
-
- if (size1) {
- hdr = GF_CALLOC (1, size1, gf_ibv_mt_char);
- if (!hdr) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "unable to allocate header for peer %s",
- this->peerinfo.identifier);
- ret = -ENOMEM;
- goto err;
- }
- memcpy (hdr, copy_from, size1);
- copy_from += size1;
- *hdr_p = hdr;
- }
- *hdrlen_p = size1;
-
- if (size2) {
- iobuf = iobuf_get (this->xl->ctx->iobuf_pool);
- if (!iobuf) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "unable to allocate IO buffer for peer %s",
- this->peerinfo.identifier);
- ret = -ENOMEM;
- goto err;
- }
- memcpy (iobuf->ptr, copy_from, size2);
- *iobuf_p = iobuf;
- }
-
-err:
- return ret;
-}
-
-
-static void
-ib_verbs_destroy_cq (transport_t *this)
-{
- ib_verbs_private_t *priv = this->private;
- ib_verbs_device_t *device = priv->device;
-
- if (device->recv_cq)
- ibv_destroy_cq (device->recv_cq);
- device->recv_cq = NULL;
-
- if (device->send_cq)
- ibv_destroy_cq (device->send_cq);
- device->send_cq = NULL;
-
- return;
-}
-
-
-static int32_t
-ib_verbs_create_cq (transport_t *this)
-{
- ib_verbs_private_t *priv = this->private;
- ib_verbs_options_t *options = &priv->options;
- ib_verbs_device_t *device = priv->device;
- int32_t ret = 0;
-
- device->recv_cq = ibv_create_cq (priv->device->context,
- options->recv_count * 2,
- device,
- device->recv_chan,
- 0);
- if (!device->recv_cq) {
- gf_log ("transport/ib-verbs",
- GF_LOG_ERROR,
- "%s: creation of CQ failed",
- this->xl->name);
- ret = -1;
- } else if (ibv_req_notify_cq (device->recv_cq, 0)) {
- gf_log ("transport/ib-verbs",
- GF_LOG_ERROR,
- "%s: ibv_req_notify_cq on CQ failed",
- this->xl->name);
- ret = -1;
- }
-
- do {
- /* TODO: make send_cq size dynamically adaptive */
- device->send_cq = ibv_create_cq (priv->device->context,
- options->send_count * 1024,
- device,
- device->send_chan,
- 0);
- if (!device->send_cq) {
- gf_log ("transport/ib-verbs",
- GF_LOG_ERROR,
- "%s: creation of send_cq failed",
- this->xl->name);
- ret = -1;
- break;
- }
-
- if (ibv_req_notify_cq (device->send_cq, 0)) {
- gf_log ("transport/ib-verbs",
- GF_LOG_ERROR,
- "%s: ibv_req_notify_cq on send_cq failed",
- this->xl->name);
- ret = -1;
- break;
- }
- } while (0);
-
- if (ret != 0)
- ib_verbs_destroy_cq (this);
-
- return ret;
-}
-
-
-static void
-ib_verbs_register_peer (ib_verbs_device_t *device,
- int32_t qp_num,
- ib_verbs_peer_t *peer)
-{
- struct _qpent *ent;
- ib_verbs_qpreg_t *qpreg = &device->qpreg;
- int32_t hash = qp_num % 42;
-
- pthread_mutex_lock (&qpreg->lock);
- ent = qpreg->ents[hash].next;
- while ((ent != &qpreg->ents[hash]) && (ent->qp_num != qp_num))
- ent = ent->next;
- if (ent->qp_num == qp_num) {
- pthread_mutex_unlock (&qpreg->lock);
- return;
- }
- ent = (struct _qpent *) GF_CALLOC (1, sizeof (*ent), gf_ibv_mt_qpent);
- ERR_ABORT (ent);
- /* TODO: ref reg->peer */
- ent->peer = peer;
- ent->next = &qpreg->ents[hash];
- ent->prev = ent->next->prev;
- ent->next->prev = ent;
- ent->prev->next = ent;
- ent->qp_num = qp_num;
- qpreg->count++;
- pthread_mutex_unlock (&qpreg->lock);
-}
-
-
-static void
-ib_verbs_unregister_peer (ib_verbs_device_t *device,
- int32_t qp_num)
-{
- struct _qpent *ent;
- ib_verbs_qpreg_t *qpreg = &device->qpreg;
- int32_t hash = qp_num % 42;
-
- pthread_mutex_lock (&qpreg->lock);
- ent = qpreg->ents[hash].next;
- while ((ent != &qpreg->ents[hash]) && (ent->qp_num != qp_num))
- ent = ent->next;
- if (ent->qp_num != qp_num) {
- pthread_mutex_unlock (&qpreg->lock);
- return;
- }
- ent->prev->next = ent->next;
- ent->next->prev = ent->prev;
- /* TODO: unref reg->peer */
- GF_FREE (ent);
- qpreg->count--;
- pthread_mutex_unlock (&qpreg->lock);
-}
-
-
-static ib_verbs_peer_t *
-__ib_verbs_lookup_peer (ib_verbs_device_t *device, int32_t qp_num)
-{
- struct _qpent *ent = NULL;
- ib_verbs_peer_t *peer = NULL;
- ib_verbs_qpreg_t *qpreg = NULL;
- int32_t hash = 0;
-
- qpreg = &device->qpreg;
- hash = qp_num % 42;
- ent = qpreg->ents[hash].next;
- while ((ent != &qpreg->ents[hash]) && (ent->qp_num != qp_num))
- ent = ent->next;
-
- if (ent != &qpreg->ents[hash]) {
- peer = ent->peer;
- }
-
- return peer;
-}
-
-/*
-static ib_verbs_peer_t *
-ib_verbs_lookup_peer (ib_verbs_device_t *device,
- int32_t qp_num)
-{
- ib_verbs_qpreg_t *qpreg = NULL;
- ib_verbs_peer_t *peer = NULL;
-
- qpreg = &device->qpreg;
- pthread_mutex_lock (&qpreg->lock);
- {
- peer = __ib_verbs_lookup_peer (device, qp_num);
- }
- pthread_mutex_unlock (&qpreg->lock);
-
- return peer;
-}
-*/
-
-
-static void
-__ib_verbs_destroy_qp (transport_t *this)
-{
- ib_verbs_private_t *priv = this->private;
-
- if (priv->peer.qp) {
- ib_verbs_unregister_peer (priv->device, priv->peer.qp->qp_num);
- ibv_destroy_qp (priv->peer.qp);
- }
- priv->peer.qp = NULL;
-
- return;
-}
-
-
-static int32_t
-ib_verbs_create_qp (transport_t *this)
-{
- ib_verbs_private_t *priv = this->private;
- ib_verbs_options_t *options = &priv->options;
- ib_verbs_device_t *device = priv->device;
- int32_t ret = 0;
- ib_verbs_peer_t *peer;
-
- peer = &priv->peer;
- struct ibv_qp_init_attr init_attr = {
- .send_cq = device->send_cq,
- .recv_cq = device->recv_cq,
- .srq = device->srq,
- .cap = {
- .max_send_wr = peer->send_count,
- .max_recv_wr = peer->recv_count,
- .max_send_sge = 1,
- .max_recv_sge = 1
- },
- .qp_type = IBV_QPT_RC
- };
-
- struct ibv_qp_attr attr = {
- .qp_state = IBV_QPS_INIT,
- .pkey_index = 0,
- .port_num = options->port,
- .qp_access_flags = 0
- };
-
- peer->qp = ibv_create_qp (device->pd, &init_attr);
- if (!peer->qp) {
- gf_log ("transport/ib-verbs",
- GF_LOG_CRITICAL,
- "%s: could not create QP",
- this->xl->name);
- ret = -1;
- goto out;
- } else if (ibv_modify_qp (peer->qp, &attr,
- IBV_QP_STATE |
- IBV_QP_PKEY_INDEX |
- IBV_QP_PORT |
- IBV_QP_ACCESS_FLAGS)) {
- gf_log ("transport/ib-verbs",
- GF_LOG_ERROR,
- "%s: failed to modify QP to INIT state",
- this->xl->name);
- ret = -1;
- goto out;
- }
-
- peer->local_lid = ib_verbs_get_local_lid (device->context,
- options->port);
- peer->local_qpn = peer->qp->qp_num;
- peer->local_psn = lrand48 () & 0xffffff;
-
- ib_verbs_register_peer (device, peer->qp->qp_num, peer);
-
-out:
- if (ret == -1)
- __ib_verbs_destroy_qp (this);
-
- return ret;
-}
-
-
-static void
-ib_verbs_destroy_posts (transport_t *this)
-{
-
-}
-
-
-static int32_t
-__ib_verbs_create_posts (transport_t *this,
- int32_t count,
- int32_t size,
- ib_verbs_queue_t *q)
-{
- int32_t i;
- int32_t ret = 0;
- ib_verbs_private_t *priv = this->private;
- ib_verbs_device_t *device = priv->device;
-
- for (i=0 ; i<count ; i++) {
- ib_verbs_post_t *post;
-
- post = ib_verbs_new_post (device, size + 2048);
- if (!post) {
- gf_log ("transport/ib-verbs",
- GF_LOG_ERROR,
- "%s: post creation failed",
- this->xl->name);
- ret = -1;
- break;
- }
-
- ib_verbs_put_post (q, post);
- }
- return ret;
-}
-
-
-static int32_t
-ib_verbs_create_posts (transport_t *this)
-{
- int32_t i, ret;
- ib_verbs_post_t *post = NULL;
- ib_verbs_private_t *priv = this->private;
- ib_verbs_options_t *options = &priv->options;
- ib_verbs_device_t *device = priv->device;
-
- ret = __ib_verbs_create_posts (this, options->send_count,
- options->send_size,
- &device->sendq);
- if (!ret)
- ret = __ib_verbs_create_posts (this, options->recv_count,
- options->recv_size,
- &device->recvq);
-
- if (!ret) {
- for (i=0 ; i<options->recv_count ; i++) {
- post = ib_verbs_get_post (&device->recvq);
- if (ib_verbs_post_recv (device->srq, post) != 0) {
- ret = -1;
- break;
- }
- }
- }
-
- if (ret)
- ib_verbs_destroy_posts (this);
-
- return ret;
-}
-
-
-static int32_t
-ib_verbs_connect_qp (transport_t *this)
-{
- ib_verbs_private_t *priv = this->private;
- ib_verbs_options_t *options = &priv->options;
- struct ibv_qp_attr attr = {
- .qp_state = IBV_QPS_RTR,
- .path_mtu = options->mtu,
- .dest_qp_num = priv->peer.remote_qpn,
- .rq_psn = priv->peer.remote_psn,
- .max_dest_rd_atomic = 1,
- .min_rnr_timer = 12,
- .ah_attr = {
- .is_global = 0,
- .dlid = priv->peer.remote_lid,
- .sl = 0,
- .src_path_bits = 0,
- .port_num = options->port
- }
- };
- if (ibv_modify_qp (priv->peer.qp, &attr,
- IBV_QP_STATE |
- IBV_QP_AV |
- IBV_QP_PATH_MTU |
- IBV_QP_DEST_QPN |
- IBV_QP_RQ_PSN |
- IBV_QP_MAX_DEST_RD_ATOMIC |
- IBV_QP_MIN_RNR_TIMER)) {
- gf_log ("transport/ib-verbs",
- GF_LOG_CRITICAL,
- "Failed to modify QP to RTR\n");
- return -1;
- }
-
- /* TODO: make timeout and retry_cnt configurable from options */
- attr.qp_state = IBV_QPS_RTS;
- attr.timeout = 14;
- attr.retry_cnt = 7;
- attr.rnr_retry = 7;
- attr.sq_psn = priv->peer.local_psn;
- attr.max_rd_atomic = 1;
- if (ibv_modify_qp (priv->peer.qp, &attr,
- IBV_QP_STATE |
- IBV_QP_TIMEOUT |
- IBV_QP_RETRY_CNT |
- IBV_QP_RNR_RETRY |
- IBV_QP_SQ_PSN |
- IBV_QP_MAX_QP_RD_ATOMIC)) {
- gf_log ("transport/ib-verbs",
- GF_LOG_CRITICAL,
- "Failed to modify QP to RTS\n");
- return -1;
- }
-
- return 0;
-}
-
-static int32_t
-__ib_verbs_teardown (transport_t *this)
-{
- ib_verbs_private_t *priv = this->private;
-
- __ib_verbs_destroy_qp (this);
-
- if (!list_empty (&priv->peer.ioq)) {
- __ib_verbs_ioq_flush (&priv->peer);
- }
-
- /* TODO: decrement cq size */
- return 0;
-}
-
-/*
- * return value:
- * 0 = success (completed)
- * -1 = error
- * > 0 = incomplete
- */
-
-static int
-__tcp_rwv (transport_t *this, struct iovec *vector, int count,
- struct iovec **pending_vector, int *pending_count,
- int write)
-{
- ib_verbs_private_t *priv = NULL;
- int sock = -1;
- int ret = -1;
- struct iovec *opvector = vector;
- int opcount = count;
- int moved = 0;
-
- priv = this->private;
- sock = priv->sock;
-
- while (opcount)
- {
- if (write)
- {
- ret = writev (sock, opvector, opcount);
-
- if (ret == 0 || (ret == -1 && errno == EAGAIN))
- {
- /* done for now */
- break;
- }
- }
- else
- {
- ret = readv (sock, opvector, opcount);
-
- if (ret == -1 && errno == EAGAIN)
- {
- /* done for now */
- break;
- }
- }
-
- if (ret == 0)
- {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "EOF from peer %s", this->peerinfo.identifier);
- opcount = -1;
- errno = ENOTCONN;
- break;
- }
-
- if (ret == -1)
- {
- if (errno == EINTR)
- continue;
-
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "%s failed (%s)", write ? "writev" : "readv",
- strerror (errno));
- if (write && !priv->connected &&
- (errno == ECONNREFUSED))
- gf_log (this->xl->name, GF_LOG_ERROR,
- "possible mismatch of 'transport-type'"
- " in protocol server and client. "
- "check volume file");
- opcount = -1;
- break;
- }
-
- moved = 0;
-
- while (moved < ret)
- {
- if ((ret - moved) >= opvector[0].iov_len)
- {
- moved += opvector[0].iov_len;
- opvector++;
- opcount--;
- }
- else
- {
- opvector[0].iov_len -= (ret - moved);
- opvector[0].iov_base += (ret - moved);
- moved += (ret - moved);
- }
- while (opcount && !opvector[0].iov_len)
- {
- opvector++;
- opcount--;
- }
- }
- }
-
- if (pending_vector)
- *pending_vector = opvector;
-
- if (pending_count)
- *pending_count = opcount;
-
- return opcount;
-}
-
-
-static int
-__tcp_readv (transport_t *this, struct iovec *vector, int count,
- struct iovec **pending_vector, int *pending_count)
-{
- int ret = -1;
-
- ret = __tcp_rwv (this, vector, count,
- pending_vector, pending_count, 0);
-
- return ret;
-}
-
-
-static int
-__tcp_writev (transport_t *this, struct iovec *vector, int count,
- struct iovec **pending_vector, int *pending_count)
-{
- int ret = -1;
- ib_verbs_private_t *priv = this->private;
-
- ret = __tcp_rwv (this, vector, count, pending_vector,
- pending_count, 1);
-
- if (ret > 0) {
- /* TODO: Avoid multiple calls when socket is already
- registered for POLLOUT */
- priv->idx = event_select_on (this->xl->ctx->event_pool,
- priv->sock, priv->idx, -1, 1);
- } else if (ret == 0) {
- priv->idx = event_select_on (this->xl->ctx->event_pool,
- priv->sock,
- priv->idx, -1, 0);
- }
-
- return ret;
-}
-
-
-static void *
-ib_verbs_recv_completion_proc (void *data)
-{
- struct ibv_comp_channel *chan = data;
- ib_verbs_private_t *priv = NULL;
- ib_verbs_device_t *device;
- ib_verbs_post_t *post;
- ib_verbs_peer_t *peer;
- struct ibv_cq *event_cq;
- struct ibv_wc wc;
- void *event_ctx;
- int32_t ret = 0;
-
-
- while (1) {
- ret = ibv_get_cq_event (chan, &event_cq, &event_ctx);
- if (ret) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "ibv_get_cq_event failed, terminating recv "
- "thread %d (%d)", ret, errno);
- continue;
- }
-
- device = event_ctx;
-
- ret = ibv_req_notify_cq (event_cq, 0);
- if (ret) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "ibv_req_notify_cq on %s failed, terminating "
- "recv thread: %d (%d)",
- device->device_name, ret, errno);
- continue;
- }
-
- device = (ib_verbs_device_t *) event_ctx;
-
- while ((ret = ibv_poll_cq (event_cq, 1, &wc)) > 0) {
- post = (ib_verbs_post_t *) (long) wc.wr_id;
-
- pthread_mutex_lock (&device->qpreg.lock);
- {
- peer = __ib_verbs_lookup_peer (device,
- wc.qp_num);
-
- /*
- * keep a refcount on transport so that it
- * doesnot get freed because of some error
- * indicated by wc.status till we are done
- * with usage of peer and thereby that of trans.
- */
- if (peer != NULL) {
- transport_ref (peer->trans);
- }
- }
- pthread_mutex_unlock (&device->qpreg.lock);
-
- if (wc.status != IBV_WC_SUCCESS) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "recv work request on `%s' returned "
- "error (%d)",
- device->device_name,
- wc.status);
- if (peer) {
- transport_unref (peer->trans);
- transport_disconnect (peer->trans);
- }
-
- if (post) {
- ib_verbs_post_recv (device->srq, post);
- }
- continue;
- }
-
- if (peer) {
- priv = peer->trans->private;
-
- pthread_mutex_lock (&priv->recv_mutex);
- {
- while (priv->data_ptr)
- pthread_cond_wait (&priv->recv_cond,
- &priv->recv_mutex);
-
- priv->data_ptr = post->buf;
- priv->data_offset = 0;
- priv->data_len = wc.byte_len;
-
- /*pthread_cond_broadcast (&priv->recv_cond);*/
- }
- pthread_mutex_unlock (&priv->recv_mutex);
-
- if ((ret = xlator_notify (peer->trans->xl, GF_EVENT_POLLIN,
- peer->trans, NULL)) == -1) {
- gf_log ("transport/ib-verbs",
- GF_LOG_DEBUG,
- "pollin notification to %s "
- "failed, disconnecting "
- "transport",
- peer->trans->xl->name);
- transport_disconnect (peer->trans);
- }
-
- transport_unref (peer->trans);
- } else {
- gf_log ("transport/ib-verbs",
- GF_LOG_DEBUG,
- "could not lookup peer for qp_num: %d",
- wc.qp_num);
- }
- ib_verbs_post_recv (device->srq, post);
- }
-
- if (ret < 0) {
- gf_log ("transport/ib-verbs",
- GF_LOG_ERROR,
- "ibv_poll_cq on `%s' returned error "
- "(ret = %d, errno = %d)",
- device->device_name, ret, errno);
- continue;
- }
- ibv_ack_cq_events (event_cq, 1);
- }
- return NULL;
-}
-
-
-static void *
-ib_verbs_send_completion_proc (void *data)
-{
- struct ibv_comp_channel *chan = data;
- ib_verbs_post_t *post;
- ib_verbs_peer_t *peer;
- struct ibv_cq *event_cq;
- void *event_ctx;
- ib_verbs_device_t *device;
- struct ibv_wc wc;
- int32_t ret;
-
- while (1) {
- ret = ibv_get_cq_event (chan, &event_cq, &event_ctx);
- if (ret) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "ibv_get_cq_event on failed, terminating "
- "send thread: %d (%d)", ret, errno);
- continue;
- }
-
- device = event_ctx;
-
- ret = ibv_req_notify_cq (event_cq, 0);
- if (ret) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "ibv_req_notify_cq on %s failed, terminating "
- "send thread: %d (%d)",
- device->device_name, ret, errno);
- continue;
- }
-
- while ((ret = ibv_poll_cq (event_cq, 1, &wc)) > 0) {
- post = (ib_verbs_post_t *) (long) wc.wr_id;
-
- pthread_mutex_lock (&device->qpreg.lock);
- {
- peer = __ib_verbs_lookup_peer (device,
- wc.qp_num);
-
- /*
- * keep a refcount on transport so that it
- * doesnot get freed because of some error
- * indicated by wc.status till we are done
- * with usage of peer and thereby that of trans.
- */
- if (peer != NULL) {
- transport_ref (peer->trans);
- }
- }
- pthread_mutex_unlock (&device->qpreg.lock);
-
- if (wc.status != IBV_WC_SUCCESS) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "send work request on `%s' returned "
- "error wc.status = %d, wc.vendor_err "
- "= %d, post->buf = %p, wc.byte_len = "
- "%d, post->reused = %d",
- device->device_name, wc.status,
- wc.vendor_err,
- post->buf, wc.byte_len, post->reused);
- if (wc.status == IBV_WC_RETRY_EXC_ERR)
- gf_log ("ib-verbs", GF_LOG_ERROR,
- "connection between client and"
- " server not working. check by"
- " running 'ibv_srq_pingpong'. "
- "also make sure subnet manager"
- " is running (eg: 'opensm'), "
- "or check if ib-verbs port is "
- "valid (or active) by running "
- " 'ibv_devinfo'. contact "
- "Gluster Support Team if "
- "the problem persists.");
- if (peer)
- transport_disconnect (peer->trans);
- }
-
- if (post) {
- ib_verbs_put_post (&device->sendq, post);
- }
-
- if (peer) {
- int quota_ret = ib_verbs_quota_put (peer);
- if (quota_ret < 0) {
- gf_log ("ib-verbs", GF_LOG_DEBUG,
- "failed to send message");
-
- }
-
- transport_unref (peer->trans);
- } else {
- gf_log ("transport/ib-verbs", GF_LOG_DEBUG,
- "could not lookup peer for qp_num: %d",
- wc.qp_num);
- }
- }
-
- if (ret < 0) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "ibv_poll_cq on `%s' returned error (ret = %d,"
- " errno = %d)",
- device->device_name, ret, errno);
- continue;
- }
- ibv_ack_cq_events (event_cq, 1);
- }
-
- return NULL;
-}
-
-static void
-ib_verbs_options_init (transport_t *this)
-{
- ib_verbs_private_t *priv = this->private;
- ib_verbs_options_t *options = &priv->options;
- int32_t mtu;
- data_t *temp;
-
- /* TODO: validate arguments from options below */
-
- options->send_size = this->xl->ctx->page_size * 4; /* 512 KB */
- options->recv_size = this->xl->ctx->page_size * 4; /* 512 KB */
- options->send_count = 32;
- options->recv_count = 32;
-
- temp = dict_get (this->xl->options,
- "transport.ib-verbs.work-request-send-count");
- if (temp)
- options->send_count = data_to_int32 (temp);
-
- temp = dict_get (this->xl->options,
- "transport.ib-verbs.work-request-recv-count");
- if (temp)
- options->recv_count = data_to_int32 (temp);
-
- options->port = 0;
- temp = dict_get (this->xl->options,
- "transport.ib-verbs.port");
- if (temp)
- options->port = data_to_uint64 (temp);
-
- options->mtu = mtu = IBV_MTU_2048;
- temp = dict_get (this->xl->options,
- "transport.ib-verbs.mtu");
- if (temp)
- mtu = data_to_int32 (temp);
- switch (mtu) {
- case 256: options->mtu = IBV_MTU_256;
- break;
- case 512: options->mtu = IBV_MTU_512;
- break;
- case 1024: options->mtu = IBV_MTU_1024;
- break;
- case 2048: options->mtu = IBV_MTU_2048;
- break;
- case 4096: options->mtu = IBV_MTU_4096;
- break;
- default:
- if (temp)
- gf_log ("transport/ib-verbs", GF_LOG_WARNING,
- "%s: unrecognized MTU value '%s', defaulting "
- "to '2048'", this->xl->name,
- data_to_str (temp));
- else
- gf_log ("transport/ib-verbs", GF_LOG_TRACE,
- "%s: defaulting MTU to '2048'",
- this->xl->name);
- options->mtu = IBV_MTU_2048;
- break;
- }
-
- temp = dict_get (this->xl->options,
- "transport.ib-verbs.device-name");
- if (temp)
- options->device_name = gf_strdup (temp->data);
-
- return;
-}
-
-static void
-ib_verbs_queue_init (ib_verbs_queue_t *queue)
-{
- pthread_mutex_init (&queue->lock, NULL);
-
- queue->active_posts.next = &queue->active_posts;
- queue->active_posts.prev = &queue->active_posts;
- queue->passive_posts.next = &queue->passive_posts;
- queue->passive_posts.prev = &queue->passive_posts;
-}
-
-
-static ib_verbs_device_t *
-ib_verbs_get_device (transport_t *this,
- struct ibv_context *ibctx)
-{
- glusterfs_ctx_t *ctx = this->xl->ctx;
- ib_verbs_private_t *priv = this->private;
- ib_verbs_options_t *options = &priv->options;
- char *device_name = priv->options.device_name;
- uint32_t port = priv->options.port;
-
- uint8_t active_port = 0;
- int32_t ret = 0;
- int32_t i = 0;
-
- ib_verbs_device_t *trav;
-
- trav = ctx->ib;
- while (trav) {
- if ((!strcmp (trav->device_name, device_name)) &&
- (trav->port == port))
- break;
- trav = trav->next;
- }
-
- if (!trav) {
-
- trav = GF_CALLOC (1, sizeof (*trav),
- gf_ibv_mt_ib_verbs_device_t);
- ERR_ABORT (trav);
- priv->device = trav;
-
- trav->context = ibctx;
-
- ret = ib_get_active_port (trav->context);
-
- if (ret < 0) {
- if (!port) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "Failed to find any active ports and "
- "none specified in volume file,"
- " exiting");
- return NULL;
- }
- }
-
- active_port = ret;
-
- if (port) {
- ret = ib_check_active_port (trav->context, port);
- if (ret < 0) {
- gf_log ("transport/ib-verbs", GF_LOG_WARNING,
- "On device %s: provided port:%u is "
- "found to be offline, continuing to "
- "use the same port", device_name, port);
- }
- } else {
- priv->options.port = active_port;
- port = active_port;
- gf_log ("transport/ib-verbs", GF_LOG_TRACE,
- "Port unspecified in volume file using active "
- "port: %u", port);
- }
-
- trav->device_name = gf_strdup (device_name);
- trav->port = port;
-
- trav->next = ctx->ib;
- ctx->ib = trav;
-
- trav->send_chan = ibv_create_comp_channel (trav->context);
- if (!trav->send_chan) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "%s: could not create send completion channel",
- device_name);
- /* TODO: cleanup current mess */
- return NULL;
- }
-
- trav->recv_chan = ibv_create_comp_channel (trav->context);
- if (!trav->recv_chan) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "could not create recv completion channel");
- /* TODO: cleanup current mess */
- return NULL;
- }
-
- if (ib_verbs_create_cq (this) < 0) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "%s: could not create CQ",
- this->xl->name);
- return NULL;
- }
-
- /* protection domain */
- trav->pd = ibv_alloc_pd (trav->context);
-
- if (!trav->pd) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "%s: could not allocate protection domain",
- this->xl->name);
- return NULL;
- }
-
- struct ibv_srq_init_attr attr = {
- .attr = {
- .max_wr = options->recv_count,
- .max_sge = 1
- }
- };
- trav->srq = ibv_create_srq (trav->pd, &attr);
-
- if (!trav->srq) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "%s: could not create SRQ",
- this->xl->name);
- return NULL;
- }
-
- /* queue init */
- ib_verbs_queue_init (&trav->sendq);
- ib_verbs_queue_init (&trav->recvq);
-
- if (ib_verbs_create_posts (this) < 0) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "%s: could not allocate posts",
- this->xl->name);
- return NULL;
- }
-
- /* completion threads */
- ret = pthread_create (&trav->send_thread,
- NULL,
- ib_verbs_send_completion_proc,
- trav->send_chan);
- if (ret) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "could not create send completion thread");
- return NULL;
- }
- ret = pthread_create (&trav->recv_thread,
- NULL,
- ib_verbs_recv_completion_proc,
- trav->recv_chan);
- if (ret) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "could not create recv completion thread");
- return NULL;
- }
-
- /* qpreg */
- pthread_mutex_init (&trav->qpreg.lock, NULL);
- for (i=0; i<42; i++) {
- trav->qpreg.ents[i].next = &trav->qpreg.ents[i];
- trav->qpreg.ents[i].prev = &trav->qpreg.ents[i];
- }
- }
- return trav;
-}
-
-static int32_t
-ib_verbs_init (transport_t *this)
-{
- ib_verbs_private_t *priv = this->private;
- ib_verbs_options_t *options = &priv->options;
- struct ibv_device **dev_list;
- struct ibv_context *ib_ctx = NULL;
- int32_t ret = 0;
-
- ib_verbs_options_init (this);
-
- {
- dev_list = ibv_get_device_list (NULL);
-
- if (!dev_list) {
- gf_log ("transport/ib-verbs",
- GF_LOG_CRITICAL,
- "Failed to get IB devices");
- ret = -1;
- goto cleanup;
- }
-
- if (!*dev_list) {
- gf_log ("transport/ib-verbs",
- GF_LOG_CRITICAL,
- "No IB devices found");
- ret = -1;
- goto cleanup;
- }
-
- if (!options->device_name) {
- if (*dev_list) {
- options->device_name =
- gf_strdup (ibv_get_device_name (*dev_list));
- } else {
- gf_log ("transport/ib-verbs", GF_LOG_CRITICAL,
- "IB device list is empty. Check for "
- "'ib_uverbs' module");
- return -1;
- goto cleanup;
- }
- }
-
- while (*dev_list) {
- if (!strcmp (ibv_get_device_name (*dev_list),
- options->device_name)) {
- ib_ctx = ibv_open_device (*dev_list);
-
- if (!ib_ctx) {
- gf_log ("transport/ib-verbs",
- GF_LOG_ERROR,
- "Failed to get infiniband"
- "device context");
- ret = -1;
- goto cleanup;
- }
- break;
- }
- ++dev_list;
- }
-
- priv->device = ib_verbs_get_device (this, ib_ctx);
-
- if (!priv->device) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "could not create ib_verbs device for %s",
- options->device_name);
- ret = -1;
- goto cleanup;
- }
- }
-
- priv->peer.trans = this;
- INIT_LIST_HEAD (&priv->peer.ioq);
-
- pthread_mutex_init (&priv->read_mutex, NULL);
- pthread_mutex_init (&priv->write_mutex, NULL);
- pthread_mutex_init (&priv->recv_mutex, NULL);
- pthread_cond_init (&priv->recv_cond, NULL);
-
-cleanup:
- if (-1 == ret) {
- if (ib_ctx)
- ibv_close_device (ib_ctx);
- }
-
- if (dev_list)
- ibv_free_device_list (dev_list);
-
- return ret;
-}
-
-
-static int32_t
-ib_verbs_disconnect (transport_t *this)
-{
- ib_verbs_private_t *priv = this->private;
- int32_t ret = 0;
-
- pthread_mutex_lock (&priv->write_mutex);
- {
- ret = __ib_verbs_disconnect (this);
- }
- pthread_mutex_unlock (&priv->write_mutex);
-
- return ret;
-}
-
-
-static int32_t
-__tcp_connect_finish (int fd)
-{
- int ret = -1;
- int optval = 0;
- socklen_t optlen = sizeof (int);
-
- ret = getsockopt (fd, SOL_SOCKET, SO_ERROR,
- (void *)&optval, &optlen);
-
- if (ret == 0 && optval)
- {
- errno = optval;
- ret = -1;
- }
-
- return ret;
-}
-
-static inline void
-ib_verbs_fill_handshake_data (char *buf, struct ib_verbs_nbio *nbio,
- ib_verbs_private_t *priv)
-{
- sprintf (buf,
- "QP1:RECV_BLKSIZE=%08x:SEND_BLKSIZE=%08x\n"
- "QP1:LID=%04x:QPN=%06x:PSN=%06x\n",
- priv->peer.recv_size,
- priv->peer.send_size,
- priv->peer.local_lid,
- priv->peer.local_qpn,
- priv->peer.local_psn);
-
- nbio->vector.iov_base = buf;
- nbio->vector.iov_len = strlen (buf) + 1;
- nbio->count = 1;
- return;
-}
-
-static inline void
-ib_verbs_fill_handshake_ack (char *buf, struct ib_verbs_nbio *nbio)
-{
- sprintf (buf, "DONE\n");
- nbio->vector.iov_base = buf;
- nbio->vector.iov_len = strlen (buf) + 1;
- nbio->count = 1;
- return;
-}
-
-static int
-ib_verbs_handshake_pollin (transport_t *this)
-{
- int ret = 0;
- ib_verbs_private_t *priv = this->private;
- char *buf = priv->handshake.incoming.buf;
- int32_t recv_buf_size, send_buf_size;
- socklen_t sock_len;
-
- if (priv->handshake.incoming.state == IB_VERBS_HANDSHAKE_COMPLETE) {
- return -1;
- }
-
- pthread_mutex_lock (&priv->write_mutex);
- {
- while (priv->handshake.incoming.state != IB_VERBS_HANDSHAKE_COMPLETE)
- {
- switch (priv->handshake.incoming.state)
- {
- case IB_VERBS_HANDSHAKE_START:
- buf = priv->handshake.incoming.buf = GF_CALLOC (1, 256, gf_ibv_mt_char);
- ib_verbs_fill_handshake_data (buf, &priv->handshake.incoming, priv);
- buf[0] = 0;
- priv->handshake.incoming.state = IB_VERBS_HANDSHAKE_RECEIVING_DATA;
- break;
-
- case IB_VERBS_HANDSHAKE_RECEIVING_DATA:
- ret = __tcp_readv (this,
- &priv->handshake.incoming.vector,
- priv->handshake.incoming.count,
- &priv->handshake.incoming.pending_vector,
- &priv->handshake.incoming.pending_count);
- if (ret == -1) {
- goto unlock;
- }
-
- if (ret > 0) {
- gf_log (this->xl->name, GF_LOG_TRACE,
- "partial header read on NB socket. continue later");
- goto unlock;
- }
-
- if (!ret) {
- priv->handshake.incoming.state = IB_VERBS_HANDSHAKE_RECEIVED_DATA;
- }
- break;
-
- case IB_VERBS_HANDSHAKE_RECEIVED_DATA:
- ret = sscanf (buf,
- "QP1:RECV_BLKSIZE=%08x:SEND_BLKSIZE=%08x\n"
- "QP1:LID=%04x:QPN=%06x:PSN=%06x\n",
- &recv_buf_size,
- &send_buf_size,
- &priv->peer.remote_lid,
- &priv->peer.remote_qpn,
- &priv->peer.remote_psn);
-
- if ((ret != 5) && (strncmp (buf, "QP1:", 4))) {
- gf_log ("transport/ib-verbs",
- GF_LOG_CRITICAL,
- "%s: remote-host(%s)'s "
- "transport type is different",
- this->xl->name,
- this->peerinfo.identifier);
- ret = -1;
- goto unlock;
- }
-
- if (recv_buf_size < priv->peer.recv_size)
- priv->peer.recv_size = recv_buf_size;
- if (send_buf_size < priv->peer.send_size)
- priv->peer.send_size = send_buf_size;
-
- gf_log ("transport/ib-verbs", GF_LOG_TRACE,
- "%s: transacted recv_size=%d "
- "send_size=%d",
- this->xl->name, priv->peer.recv_size,
- priv->peer.send_size);
-
- priv->peer.quota = priv->peer.send_count;
-
- if (ib_verbs_connect_qp (this)) {
- gf_log ("transport/ib-verbs",
- GF_LOG_ERROR,
- "%s: failed to connect with "
- "remote QP", this->xl->name);
- ret = -1;
- goto unlock;
- }
- ib_verbs_fill_handshake_ack (buf, &priv->handshake.incoming);
- buf[0] = 0;
- priv->handshake.incoming.state = IB_VERBS_HANDSHAKE_RECEIVING_ACK;
- break;
-
- case IB_VERBS_HANDSHAKE_RECEIVING_ACK:
- ret = __tcp_readv (this,
- &priv->handshake.incoming.vector,
- priv->handshake.incoming.count,
- &priv->handshake.incoming.pending_vector,
- &priv->handshake.incoming.pending_count);
- if (ret == -1) {
- goto unlock;
- }
-
- if (ret > 0) {
- gf_log (this->xl->name, GF_LOG_TRACE,
- "partial header read on NB "
- "socket. continue later");
- goto unlock;
- }
-
- if (!ret) {
- priv->handshake.incoming.state = IB_VERBS_HANDSHAKE_RECEIVED_ACK;
- }
- break;
-
- case IB_VERBS_HANDSHAKE_RECEIVED_ACK:
- if (strncmp (buf, "DONE", 4)) {
- gf_log ("transport/ib-verbs",
- GF_LOG_DEBUG,
- "%s: handshake-3 did not "
- "return 'DONE' (%s)",
- this->xl->name, buf);
- ret = -1;
- goto unlock;
- }
- ret = 0;
- priv->connected = 1;
- sock_len = sizeof (struct sockaddr_storage);
- getpeername (priv->sock,
- (struct sockaddr *) &this->peerinfo.sockaddr,
- &sock_len);
-
- GF_FREE (priv->handshake.incoming.buf);
- priv->handshake.incoming.buf = NULL;
- priv->handshake.incoming.state = IB_VERBS_HANDSHAKE_COMPLETE;
- }
- }
- }
-unlock:
- pthread_mutex_unlock (&priv->write_mutex);
-
- if (ret == -1) {
- transport_disconnect (this);
- } else {
- ret = 0;
- }
-
- if (!ret && priv->connected) {
- ret = xlator_notify (this->xl, GF_EVENT_CHILD_UP, this);
- }
-
- return ret;
-}
-
-static int
-ib_verbs_handshake_pollout (transport_t *this)
-{
- ib_verbs_private_t *priv = this->private;
- char *buf = priv->handshake.outgoing.buf;
- int32_t ret = 0;
-
- if (priv->handshake.outgoing.state == IB_VERBS_HANDSHAKE_COMPLETE) {
- return 0;
- }
-
- pthread_mutex_unlock (&priv->write_mutex);
- {
- while (priv->handshake.outgoing.state != IB_VERBS_HANDSHAKE_COMPLETE)
- {
- switch (priv->handshake.outgoing.state)
- {
- case IB_VERBS_HANDSHAKE_START:
- buf = priv->handshake.outgoing.buf = GF_CALLOC (1, 256, gf_ibv_mt_char);
- ib_verbs_fill_handshake_data (buf, &priv->handshake.outgoing, priv);
- priv->handshake.outgoing.state = IB_VERBS_HANDSHAKE_SENDING_DATA;
- break;
-
- case IB_VERBS_HANDSHAKE_SENDING_DATA:
- ret = __tcp_writev (this,
- &priv->handshake.outgoing.vector,
- priv->handshake.outgoing.count,
- &priv->handshake.outgoing.pending_vector,
- &priv->handshake.outgoing.pending_count);
- if (ret == -1) {
- goto unlock;
- }
-
- if (ret > 0) {
- gf_log (this->xl->name, GF_LOG_TRACE,
- "partial header read on NB socket. continue later");
- goto unlock;
- }
-
- if (!ret) {
- priv->handshake.outgoing.state = IB_VERBS_HANDSHAKE_SENT_DATA;
- }
- break;
-
- case IB_VERBS_HANDSHAKE_SENT_DATA:
- ib_verbs_fill_handshake_ack (buf, &priv->handshake.outgoing);
- priv->handshake.outgoing.state = IB_VERBS_HANDSHAKE_SENDING_ACK;
- break;
-
- case IB_VERBS_HANDSHAKE_SENDING_ACK:
- ret = __tcp_writev (this,
- &priv->handshake.outgoing.vector,
- priv->handshake.outgoing.count,
- &priv->handshake.outgoing.pending_vector,
- &priv->handshake.outgoing.pending_count);
-
- if (ret == -1) {
- goto unlock;
- }
-
- if (ret > 0) {
- gf_log (this->xl->name, GF_LOG_TRACE,
- "partial header read on NB "
- "socket. continue later");
- goto unlock;
- }
-
- if (!ret) {
- GF_FREE (priv->handshake.outgoing.buf);
- priv->handshake.outgoing.buf = NULL;
- priv->handshake.outgoing.state = IB_VERBS_HANDSHAKE_COMPLETE;
- }
- break;
- }
- }
- }
-unlock:
- pthread_mutex_unlock (&priv->write_mutex);
-
- if (ret == -1) {
- transport_disconnect (this);
- } else {
- ret = 0;
- }
-
- return ret;
-}
-
-static int
-ib_verbs_handshake_pollerr (transport_t *this)
-{
- ib_verbs_private_t *priv = this->private;
- int32_t ret = 0;
- char need_unref = 0;
-
- gf_log ("transport/ib-verbs", GF_LOG_DEBUG,
- "%s: peer disconnected, cleaning up",
- this->xl->name);
-
- pthread_mutex_lock (&priv->write_mutex);
- {
- __ib_verbs_teardown (this);
-
- if (priv->sock != -1) {
- event_unregister (this->xl->ctx->event_pool,
- priv->sock, priv->idx);
- need_unref = 1;
-
- if (close (priv->sock) != 0) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "close () - error: %s",
- strerror (errno));
- ret = -errno;
- }
- priv->tcp_connected = priv->connected = 0;
- priv->sock = -1;
- }
-
- if (priv->handshake.incoming.buf) {
- GF_FREE (priv->handshake.incoming.buf);
- priv->handshake.incoming.buf = NULL;
- }
-
- priv->handshake.incoming.state = IB_VERBS_HANDSHAKE_START;
-
- if (priv->handshake.outgoing.buf) {
- GF_FREE (priv->handshake.outgoing.buf);
- priv->handshake.outgoing.buf = NULL;
- }
-
- priv->handshake.outgoing.state = IB_VERBS_HANDSHAKE_START;
- }
- pthread_mutex_unlock (&priv->write_mutex);
-
- xlator_notify (this->xl, GF_EVENT_POLLERR, this, NULL);
-
- if (need_unref)
- transport_unref (this);
-
- return 0;
-}
-
-
-static int
-tcp_connect_finish (transport_t *this)
-{
- ib_verbs_private_t *priv = this->private;
- int error = 0, ret = 0;
-
- pthread_mutex_lock (&priv->write_mutex);
- {
- ret = __tcp_connect_finish (priv->sock);
-
- if (!ret) {
- this->myinfo.sockaddr_len =
- sizeof (this->myinfo.sockaddr);
- ret = getsockname (priv->sock,
- (struct sockaddr *)&this->myinfo.sockaddr,
- &this->myinfo.sockaddr_len);
- if (ret == -1)
- {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "getsockname on new client-socket %d "
- "failed (%s)",
- priv->sock, strerror (errno));
- close (priv->sock);
- error = 1;
- goto unlock;
- }
-
- gf_ibverbs_get_transport_identifiers (this);
- priv->tcp_connected = 1;
- }
-
- if (ret == -1 && errno != EINPROGRESS) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "tcp connect to %s failed (%s)",
- this->peerinfo.identifier, strerror (errno));
- error = 1;
- }
- }
-unlock:
- pthread_mutex_unlock (&priv->write_mutex);
-
- if (error) {
- transport_disconnect (this);
- }
-
- return ret;
-}
-
-static int
-ib_verbs_event_handler (int fd, int idx, void *data,
- int poll_in, int poll_out, int poll_err)
-{
- transport_t *this = data;
- ib_verbs_private_t *priv = this->private;
- ib_verbs_options_t *options = NULL;
- int ret = 0;
-
- if (!priv->tcp_connected) {
- ret = tcp_connect_finish (this);
- if (priv->tcp_connected) {
- options = &priv->options;
-
- priv->peer.send_count = options->send_count;
- priv->peer.recv_count = options->recv_count;
- priv->peer.send_size = options->send_size;
- priv->peer.recv_size = options->recv_size;
-
- if ((ret = ib_verbs_create_qp (this)) < 0) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "%s: could not create QP",
- this->xl->name);
- transport_disconnect (this);
- }
- }
- }
-
- if (!ret && poll_out && priv->tcp_connected) {
- ret = ib_verbs_handshake_pollout (this);
- }
-
- if (!ret && poll_in && priv->tcp_connected) {
- if (priv->handshake.incoming.state == IB_VERBS_HANDSHAKE_COMPLETE) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "%s: pollin received on tcp socket (peer: %s) "
- "after handshake is complete",
- this->xl->name, this->peerinfo.identifier);
- ib_verbs_handshake_pollerr (this);
- return 0;
- }
- ret = ib_verbs_handshake_pollin (this);
- }
-
- if (ret < 0 || poll_err) {
- ret = ib_verbs_handshake_pollerr (this);
- }
-
- return 0;
-}
-
-static int
-__tcp_nonblock (int fd)
-{
- int flags = 0;
- int ret = -1;
-
- flags = fcntl (fd, F_GETFL);
-
- if (flags != -1)
- ret = fcntl (fd, F_SETFL, flags | O_NONBLOCK);
-
- return ret;
-}
-
-static int32_t
-ib_verbs_connect (struct transport *this)
-{
- dict_t *options = this->xl->options;
-
- ib_verbs_private_t *priv = this->private;
-
- int32_t ret = 0;
- gf_boolean_t non_blocking = 1;
- struct sockaddr_storage sockaddr;
- socklen_t sockaddr_len = 0;
-
- if (priv->connected) {
- return 0;
- }
-
- if (dict_get (options, "non-blocking-io")) {
- char *nb_connect = data_to_str (dict_get (this->xl->options,
- "non-blocking-io"));
-
- if (gf_string2boolean (nb_connect, &non_blocking) == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "'non-blocking-io' takes only boolean "
- "options, not taking any action");
- non_blocking = 1;
- }
- }
-
- ret = gf_ibverbs_client_get_remote_sockaddr (this,
- (struct sockaddr *)&sockaddr,
- &sockaddr_len);
- if (ret != 0) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "cannot get remote address to connect");
- return ret;
- }
-
- pthread_mutex_lock (&priv->write_mutex);
- {
- if (priv->sock != -1) {
- ret = 0;
- goto unlock;
- }
-
- priv->sock = socket (((struct sockaddr *)&sockaddr)->sa_family,
- SOCK_STREAM, 0);
-
- if (priv->sock == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "socket () - error: %s", strerror (errno));
- ret = -errno;
- goto unlock;
- }
-
- gf_log (this->xl->name, GF_LOG_TRACE,
- "socket fd = %d", priv->sock);
-
- memcpy (&this->peerinfo.sockaddr, &sockaddr, sockaddr_len);
- this->peerinfo.sockaddr_len = sockaddr_len;
-
- ((struct sockaddr *) &this->myinfo.sockaddr)->sa_family =
- ((struct sockaddr *)&this->peerinfo.sockaddr)->sa_family;
-
- if (non_blocking)
- {
- ret = __tcp_nonblock (priv->sock);
-
- if (ret == -1)
- {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "could not set socket %d to non "
- "blocking mode (%s)",
- priv->sock, strerror (errno));
- close (priv->sock);
- priv->sock = -1;
- goto unlock;
- }
- }
-
- ret = gf_ibverbs_client_bind (this,
- (struct sockaddr *)&this->myinfo.sockaddr,
- &this->myinfo.sockaddr_len, priv->sock);
- if (ret == -1)
- {
- gf_log (this->xl->name, GF_LOG_WARNING,
- "client bind failed: %s", strerror (errno));
- close (priv->sock);
- priv->sock = -1;
- goto unlock;
- }
-
- ret = connect (priv->sock,
- (struct sockaddr *)&this->peerinfo.sockaddr,
- this->peerinfo.sockaddr_len);
- if (ret == -1 && errno != EINPROGRESS)
- {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "connection attempt failed (%s)",
- strerror (errno));
- close (priv->sock);
- priv->sock = -1;
- goto unlock;
- }
-
- priv->tcp_connected = priv->connected = 0;
-
- transport_ref (this);
-
- priv->handshake.incoming.state = IB_VERBS_HANDSHAKE_START;
- priv->handshake.outgoing.state = IB_VERBS_HANDSHAKE_START;
-
- priv->idx = event_register (this->xl->ctx->event_pool,
- priv->sock, ib_verbs_event_handler,
- this, 1, 1);
- }
-unlock:
- pthread_mutex_unlock (&priv->write_mutex);
-
- return ret;
-}
-
-static int
-ib_verbs_server_event_handler (int fd, int idx, void *data,
- int poll_in, int poll_out, int poll_err)
-{
- int32_t main_sock = -1;
- transport_t *this, *trans = data;
- ib_verbs_private_t *priv = NULL;
- ib_verbs_private_t *trans_priv = (ib_verbs_private_t *) trans->private;
- ib_verbs_options_t *options = NULL;
-
- if (!poll_in)
- return 0;
-
- this = GF_CALLOC (1, sizeof (transport_t),
- gf_ibv_mt_transport_t);
- ERR_ABORT (this);
- priv = GF_CALLOC (1, sizeof (ib_verbs_private_t),
- gf_ibv_mt_ib_verbs_private_t);
- ERR_ABORT (priv);
- this->private = priv;
- /* Copy all the ib_verbs related values in priv, from trans_priv
- as other than QP, all the values remain same */
- priv->device = trans_priv->device;
- priv->options = trans_priv->options;
- options = &priv->options;
-
- this->ops = trans->ops;
- this->xl = trans->xl;
- this->init = trans->init;
- this->fini = trans->fini;
-
- memcpy (&this->myinfo.sockaddr, &trans->myinfo.sockaddr,
- trans->myinfo.sockaddr_len);
- this->myinfo.sockaddr_len = trans->myinfo.sockaddr_len;
-
- main_sock = (trans_priv)->sock;
- this->peerinfo.sockaddr_len = sizeof (this->peerinfo.sockaddr);
- priv->sock = accept (main_sock,
- (struct sockaddr *)&this->peerinfo.sockaddr,
- &this->peerinfo.sockaddr_len);
- if (priv->sock == -1) {
- gf_log ("ib-verbs/server", GF_LOG_ERROR,
- "accept() failed: %s",
- strerror (errno));
- GF_FREE (this->private);
- GF_FREE (this);
- return -1;
- }
-
- priv->peer.trans = this;
- transport_ref (this);
-
- gf_ibverbs_get_transport_identifiers (this);
-
- priv->tcp_connected = 1;
- priv->handshake.incoming.state = IB_VERBS_HANDSHAKE_START;
- priv->handshake.outgoing.state = IB_VERBS_HANDSHAKE_START;
-
- priv->peer.send_count = options->send_count;
- priv->peer.recv_count = options->recv_count;
- priv->peer.send_size = options->send_size;
- priv->peer.recv_size = options->recv_size;
- INIT_LIST_HEAD (&priv->peer.ioq);
-
- if (ib_verbs_create_qp (this) < 0) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "%s: could not create QP",
- this->xl->name);
- transport_disconnect (this);
- return -1;
- }
-
- priv->idx = event_register (this->xl->ctx->event_pool, priv->sock,
- ib_verbs_event_handler, this, 1, 1);
-
- pthread_mutex_init (&priv->read_mutex, NULL);
- pthread_mutex_init (&priv->write_mutex, NULL);
- pthread_mutex_init (&priv->recv_mutex, NULL);
- /* pthread_cond_init (&priv->recv_cond, NULL); */
-
- return 0;
-}
-
-static int32_t
-ib_verbs_listen (transport_t *this)
-{
- struct sockaddr_storage sockaddr;
- socklen_t sockaddr_len;
- ib_verbs_private_t *priv = this->private;
- int opt = 1, ret = 0;
- char service[NI_MAXSERV], host[NI_MAXHOST];
-
- memset (&sockaddr, 0, sizeof (sockaddr));
- ret = gf_ibverbs_server_get_local_sockaddr (this,
- (struct sockaddr *)&sockaddr,
- &sockaddr_len);
- if (ret != 0) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "cannot find network address of server to bind to");
- goto err;
- }
-
- priv->sock = socket (((struct sockaddr *)&sockaddr)->sa_family,
- SOCK_STREAM, 0);
- if (priv->sock == -1) {
- gf_log ("ib-verbs/server", GF_LOG_CRITICAL,
- "init: failed to create socket, error: %s",
- strerror (errno));
- GF_FREE (this->private);
- ret = -1;
- goto err;
- }
-
- memcpy (&this->myinfo.sockaddr, &sockaddr, sockaddr_len);
- this->myinfo.sockaddr_len = sockaddr_len;
-
- ret = getnameinfo ((struct sockaddr *)&this->myinfo.sockaddr,
- this->myinfo.sockaddr_len,
- host, sizeof (host),
- service, sizeof (service),
- NI_NUMERICHOST);
- if (ret != 0) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "getnameinfo failed (%s)", gai_strerror (ret));
- goto err;
- }
- sprintf (this->myinfo.identifier, "%s:%s", host, service);
-
- setsockopt (priv->sock, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof (opt));
- if (bind (priv->sock,
- (struct sockaddr *)&sockaddr,
- sockaddr_len) != 0) {
- ret = -1;
- gf_log ("ib-verbs/server", GF_LOG_ERROR,
- "init: failed to bind to socket for %s (%s)",
- this->myinfo.identifier, strerror (errno));
- goto err;
- }
-
- if (listen (priv->sock, 10) != 0) {
- gf_log ("ib-verbs/server", GF_LOG_ERROR,
- "init: listen () failed on socket for %s (%s)",
- this->myinfo.identifier, strerror (errno));
- ret = -1;
- goto err;
- }
-
- /* Register the main socket */
- priv->idx = event_register (this->xl->ctx->event_pool, priv->sock,
- ib_verbs_server_event_handler,
- transport_ref (this), 1, 0);
-
-err:
- return ret;
-}
-
-struct transport_ops tops = {
- .receive = ib_verbs_receive,
- .submit = ib_verbs_submit,
- .connect = ib_verbs_connect,
- .disconnect = ib_verbs_disconnect,
- .listen = ib_verbs_listen,
-};
-
-int32_t
-init (transport_t *this)
-{
- ib_verbs_private_t *priv = GF_CALLOC (1, sizeof (*priv),
- gf_ibv_mt_ib_verbs_private_t);
- this->private = priv;
- priv->sock = -1;
-
- if (ib_verbs_init (this)) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "Failed to initialize IB Device");
- return -1;
- }
-
- return 0;
-}
-
-void
-fini (struct transport *this)
-{
- /* TODO: verify this function does graceful finish */
- ib_verbs_private_t *priv = this->private;
- this->private = NULL;
-
- pthread_mutex_destroy (&priv->recv_mutex);
- pthread_mutex_destroy (&priv->write_mutex);
- pthread_mutex_destroy (&priv->read_mutex);
- /* pthread_cond_destroy (&priv->recv_cond); */
-
- gf_log (this->xl->name, GF_LOG_TRACE,
- "called fini on transport: %p",
- this);
- GF_FREE (priv);
- return;
-}
-
-int32_t
-mem_acct_init (xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init (this, gf_common_mt_end + 1);
-
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
- }
-
- return ret;
-}
-
-/* TODO: expand each option */
-struct volume_options options[] = {
- { .key = {"transport.ib-verbs.port",
- "ib-verbs-port"},
- .type = GF_OPTION_TYPE_INT,
- .min = 1,
- .max = 4,
- .description = "check the option by 'ibv_devinfo'"
- },
- { .key = {"transport.ib-verbs.mtu",
- "ib-verbs-mtu"},
- .type = GF_OPTION_TYPE_INT,
- },
- { .key = {"transport.ib-verbs.device-name",
- "ib-verbs-device-name"},
- .type = GF_OPTION_TYPE_ANY,
- .description = "check by 'ibv_devinfo'"
- },
- { .key = {"transport.ib-verbs.work-request-send-count",
- "ib-verbs-work-request-send-count"},
- .type = GF_OPTION_TYPE_INT,
- },
- { .key = {"transport.ib-verbs.work-request-recv-count",
- "ib-verbs-work-request-recv-count"},
- .type = GF_OPTION_TYPE_INT,
- },
- { .key = {"remote-port",
- "transport.remote-port",
- "transport.ib-verbs.remote-port"},
- .type = GF_OPTION_TYPE_INT
- },
- { .key = {"transport.ib-verbs.listen-port", "listen-port"},
- .type = GF_OPTION_TYPE_INT
- },
- { .key = {"transport.ib-verbs.connect-path", "connect-path"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"transport.ib-verbs.bind-path", "bind-path"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"transport.ib-verbs.listen-path", "listen-path"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"transport.address-family",
- "address-family"},
- .value = {"inet", "inet6", "inet/inet6", "inet6/inet",
- "unix", "inet-sdp" },
- .type = GF_OPTION_TYPE_STR
- },
- { .key = {"transport.socket.lowlat"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {NULL} }
-};
diff --git a/xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs.h b/xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs.h
deleted file mode 100644
index c385b62e5cb..00000000000
--- a/xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs.h
+++ /dev/null
@@ -1,220 +0,0 @@
-/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _XPORT_IB_VERBS_H
-#define _XPORT_IB_VERBS_H
-
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#ifndef MAX_IOVEC
-#define MAX_IOVEC 16
-#endif /* MAX_IOVEC */
-
-#include "xlator.h"
-#include "event.h"
-#include "ib-verbs-mem-types.h"
-
-#include <stdio.h>
-#include <list.h>
-#include <arpa/inet.h>
-#include <infiniband/verbs.h>
-
-#define GF_DEFAULT_IBVERBS_LISTEN_PORT 6997
-
-/* options per transport end point */
-struct _ib_verbs_options {
- int32_t port;
- char *device_name;
- enum ibv_mtu mtu;
- int32_t send_count;
- int32_t recv_count;
- uint64_t recv_size;
- uint64_t send_size;
-};
-typedef struct _ib_verbs_options ib_verbs_options_t;
-
-
-struct _ib_verbs_header {
- char colonO[3];
- uint32_t size1;
- uint32_t size2;
- char version;
-} __attribute__((packed));
-typedef struct _ib_verbs_header ib_verbs_header_t;
-
-struct _ib_verbs_ioq {
- union {
- struct list_head list;
- struct {
- struct _ib_verbs_ioq *next;
- struct _ib_verbs_ioq *prev;
- };
- };
- ib_verbs_header_t header;
- struct iovec vector[MAX_IOVEC];
- int count;
- char *buf;
- struct iobref *iobref;
-};
-typedef struct _ib_verbs_ioq ib_verbs_ioq_t;
-
-/* represents one communication peer, two per transport_t */
-struct _ib_verbs_peer {
- transport_t *trans;
- struct ibv_qp *qp;
-
- int32_t recv_count;
- int32_t send_count;
- int32_t recv_size;
- int32_t send_size;
-
- int32_t quota;
- union {
- struct list_head ioq;
- struct {
- ib_verbs_ioq_t *ioq_next;
- ib_verbs_ioq_t *ioq_prev;
- };
- };
-
- /* QP attributes, needed to connect with remote QP */
- int32_t local_lid;
- int32_t local_psn;
- int32_t local_qpn;
- int32_t remote_lid;
- int32_t remote_psn;
- int32_t remote_qpn;
-};
-typedef struct _ib_verbs_peer ib_verbs_peer_t;
-
-
-struct _ib_verbs_post {
- struct _ib_verbs_post *next, *prev;
- struct ibv_mr *mr;
- char *buf;
- int32_t buf_size;
- char aux;
- int32_t reused;
- pthread_barrier_t wait;
-};
-typedef struct _ib_verbs_post ib_verbs_post_t;
-
-
-struct _ib_verbs_queue {
- ib_verbs_post_t active_posts, passive_posts;
- int32_t active_count, passive_count;
- pthread_mutex_t lock;
-};
-typedef struct _ib_verbs_queue ib_verbs_queue_t;
-
-
-struct _ib_verbs_qpreg {
- pthread_mutex_t lock;
- int32_t count;
- struct _qpent {
- struct _qpent *next, *prev;
- int32_t qp_num;
- ib_verbs_peer_t *peer;
- } ents[42];
-};
-typedef struct _ib_verbs_qpreg ib_verbs_qpreg_t;
-
-/* context per device, stored in global glusterfs_ctx_t->ib */
-struct _ib_verbs_device {
- struct _ib_verbs_device *next;
- const char *device_name;
- struct ibv_context *context;
- int32_t port;
- struct ibv_pd *pd;
- struct ibv_srq *srq;
- ib_verbs_qpreg_t qpreg;
- struct ibv_comp_channel *send_chan, *recv_chan;
- struct ibv_cq *send_cq, *recv_cq;
- ib_verbs_queue_t sendq, recvq;
- pthread_t send_thread, recv_thread;
-};
-typedef struct _ib_verbs_device ib_verbs_device_t;
-
-typedef enum {
- IB_VERBS_HANDSHAKE_START = 0,
- IB_VERBS_HANDSHAKE_SENDING_DATA,
- IB_VERBS_HANDSHAKE_RECEIVING_DATA,
- IB_VERBS_HANDSHAKE_SENT_DATA,
- IB_VERBS_HANDSHAKE_RECEIVED_DATA,
- IB_VERBS_HANDSHAKE_SENDING_ACK,
- IB_VERBS_HANDSHAKE_RECEIVING_ACK,
- IB_VERBS_HANDSHAKE_RECEIVED_ACK,
- IB_VERBS_HANDSHAKE_COMPLETE,
-} ib_verbs_handshake_state_t;
-
-struct ib_verbs_nbio {
- int state;
- char *buf;
- int count;
- struct iovec vector;
- struct iovec *pending_vector;
- int pending_count;
-};
-
-
-struct _ib_verbs_private {
- int32_t sock;
- int32_t idx;
- unsigned char connected;
- unsigned char tcp_connected;
- unsigned char ib_connected;
- in_addr_t addr;
- unsigned short port;
-
- /* IB Verbs Driver specific variables, pointers */
- ib_verbs_peer_t peer;
- ib_verbs_device_t *device;
- ib_verbs_options_t options;
-
- /* Used by trans->op->receive */
- char *data_ptr;
- int32_t data_offset;
- int32_t data_len;
-
- /* Mutex */
- pthread_mutex_t read_mutex;
- pthread_mutex_t write_mutex;
- pthread_barrier_t handshake_barrier;
- char handshake_ret;
-
- pthread_mutex_t recv_mutex;
- pthread_cond_t recv_cond;
-
- /* used during ib_verbs_handshake */
- struct {
- struct ib_verbs_nbio incoming;
- struct ib_verbs_nbio outgoing;
- int state;
- ib_verbs_header_t header;
- char *buf;
- size_t size;
- } handshake;
-};
-typedef struct _ib_verbs_private ib_verbs_private_t;
-
-#endif /* _XPORT_IB_VERBS_H */
diff --git a/xlators/protocol/legacy/transport/ib-verbs/src/name.c b/xlators/protocol/legacy/transport/ib-verbs/src/name.c
deleted file mode 100644
index 1b8f83c293e..00000000000
--- a/xlators/protocol/legacy/transport/ib-verbs/src/name.c
+++ /dev/null
@@ -1,712 +0,0 @@
-/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <errno.h>
-#include <netdb.h>
-#include <string.h>
-
-#ifdef CLIENT_PORT_CEILING
-#undef CLIENT_PORT_CEILING
-#endif
-
-#define CLIENT_PORT_CEILING 1024
-
-#ifndef AF_INET_SDP
-#define AF_INET_SDP 27
-#endif
-
-#include "transport.h"
-#include "ib-verbs.h"
-
-int32_t
-gf_resolve_ip6 (const char *hostname,
- uint16_t port,
- int family,
- void **dnscache,
- struct addrinfo **addr_info);
-
-static int32_t
-af_inet_bind_to_port_lt_ceiling (int fd, struct sockaddr *sockaddr,
- socklen_t sockaddr_len, int ceiling)
-{
- int32_t ret = -1;
- /* struct sockaddr_in sin = {0, }; */
- uint16_t port = ceiling - 1;
-
- while (port)
- {
- switch (sockaddr->sa_family)
- {
- case AF_INET6:
- ((struct sockaddr_in6 *)sockaddr)->sin6_port = htons (port);
- break;
-
- case AF_INET_SDP:
- case AF_INET:
- ((struct sockaddr_in *)sockaddr)->sin_port = htons (port);
- break;
- }
-
- ret = bind (fd, sockaddr, sockaddr_len);
-
- if (ret == 0)
- break;
-
- if (ret == -1 && errno == EACCES)
- break;
-
- port--;
- }
-
- return ret;
-}
-
-static int32_t
-af_unix_client_bind (transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t sockaddr_len,
- int sock)
-{
- data_t *path_data = NULL;
- struct sockaddr_un *addr = NULL;
- int32_t ret = -1;
-
- path_data = dict_get (this->xl->options,
- "transport.ib-verbs.bind-path");
- if (path_data) {
- char *path = data_to_str (path_data);
- if (!path || strlen (path) > UNIX_PATH_MAX) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "transport.ib-verbs.bind-path not specfied "
- "for unix socket, letting connect to assign "
- "default value");
- goto err;
- }
-
- addr = (struct sockaddr_un *) sockaddr;
- strcpy (addr->sun_path, path);
- ret = bind (sock, (struct sockaddr *)addr, sockaddr_len);
- if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "cannot bind to unix-domain socket %d (%s)",
- sock, strerror (errno));
- goto err;
- }
- }
-
-err:
- return ret;
-}
-
-static int32_t
-client_fill_address_family (transport_t *this, struct sockaddr *sockaddr)
-{
- data_t *address_family_data = NULL;
-
- address_family_data = dict_get (this->xl->options,
- "transport.address-family");
- if (!address_family_data) {
- data_t *remote_host_data = NULL, *connect_path_data = NULL;
- remote_host_data = dict_get (this->xl->options, "remote-host");
- connect_path_data = dict_get (this->xl->options,
- "transport.ib-verbs.connect-path");
-
- if (!(remote_host_data || connect_path_data) ||
- (remote_host_data && connect_path_data)) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "address-family not specified and not able to "
- "determine the same from other options "
- "(remote-host:%s and connect-path:%s)",
- data_to_str (remote_host_data),
- data_to_str (connect_path_data));
- return -1;
- }
-
- if (remote_host_data) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "address-family not specified, guessing it "
- "to be inet/inet6");
- sockaddr->sa_family = AF_UNSPEC;
- } else {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "address-family not specified, guessing it "
- "to be unix");
- sockaddr->sa_family = AF_UNIX;
- }
-
- } else {
- char *address_family = data_to_str (address_family_data);
- if (!strcasecmp (address_family, "unix")) {
- sockaddr->sa_family = AF_UNIX;
- } else if (!strcasecmp (address_family, "inet")) {
- sockaddr->sa_family = AF_INET;
- } else if (!strcasecmp (address_family, "inet6")) {
- sockaddr->sa_family = AF_INET6;
- } else if (!strcasecmp (address_family, "inet-sdp")) {
- sockaddr->sa_family = AF_INET_SDP;
- } else if (!strcasecmp (address_family, "inet/inet6")
- || !strcasecmp (address_family, "inet6/inet")) {
- sockaddr->sa_family = AF_UNSPEC;
- } else {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "unknown address-family (%s) specified",
- address_family);
- return -1;
- }
- }
-
- return 0;
-}
-
-static int32_t
-af_inet_client_get_remote_sockaddr (transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len)
-{
- dict_t *options = this->xl->options;
- data_t *remote_host_data = NULL;
- data_t *remote_port_data = NULL;
- char *remote_host = NULL;
- uint16_t remote_port = 0;
- struct addrinfo *addr_info = NULL;
- int32_t ret = 0;
-
- remote_host_data = dict_get (options, "remote-host");
- if (remote_host_data == NULL)
- {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "option remote-host missing in volume %s",
- this->xl->name);
- ret = -1;
- goto err;
- }
-
- remote_host = data_to_str (remote_host_data);
- if (remote_host == NULL)
- {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "option remote-host has data NULL in volume %s",
- this->xl->name);
- ret = -1;
- goto err;
- }
-
- remote_port_data = dict_get (options, "remote-port");
- if (remote_port_data == NULL)
- {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "option remote-port missing in volume %s. "
- "Defaulting to %d",
- this->xl->name, GF_DEFAULT_IBVERBS_LISTEN_PORT);
-
- remote_port = GF_DEFAULT_IBVERBS_LISTEN_PORT;
- }
- else
- {
- remote_port = data_to_uint16 (remote_port_data);
- }
-
- if (remote_port == (uint16_t)-1)
- {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "option remote-port has invalid port in volume %s",
- this->xl->name);
- ret = -1;
- goto err;
- }
-
- /* TODO: gf_resolve is a blocking call. kick in some
- non blocking dns techniques */
- ret = gf_resolve_ip6 (remote_host, remote_port,
- sockaddr->sa_family,
- &this->dnscache, &addr_info);
- if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "DNS resolution failed on host %s", remote_host);
- goto err;
- }
-
- memcpy (sockaddr, addr_info->ai_addr, addr_info->ai_addrlen);
- *sockaddr_len = addr_info->ai_addrlen;
-
-err:
- return ret;
-}
-
-static int32_t
-af_unix_client_get_remote_sockaddr (transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len)
-{
- struct sockaddr_un *sockaddr_un = NULL;
- char *connect_path = NULL;
- data_t *connect_path_data = NULL;
- int32_t ret = 0;
-
- connect_path_data = dict_get (this->xl->options,
- "transport.ib-verbs.connect-path");
- if (!connect_path_data) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "option transport.ib-verbs.connect-path not "
- "specified for address-family unix");
- ret = -1;
- goto err;
- }
-
- connect_path = data_to_str (connect_path_data);
- if (!connect_path) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "connect-path is null-string");
- ret = -1;
- goto err;
- }
-
- if (strlen (connect_path) > UNIX_PATH_MAX) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "connect-path value length %"GF_PRI_SIZET" > "
- "%d octets", strlen (connect_path), UNIX_PATH_MAX);
- ret = -1;
- goto err;
- }
-
- gf_log (this->xl->name,
- GF_LOG_DEBUG,
- "using connect-path %s", connect_path);
- sockaddr_un = (struct sockaddr_un *)sockaddr;
- strcpy (sockaddr_un->sun_path, connect_path);
- *sockaddr_len = sizeof (struct sockaddr_un);
-
-err:
- return ret;
-}
-
-static int32_t
-af_unix_server_get_local_sockaddr (transport_t *this,
- struct sockaddr *addr,
- socklen_t *addr_len)
-{
- data_t *listen_path_data = NULL;
- char *listen_path = NULL;
- int32_t ret = 0;
- struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
-
-
- listen_path_data = dict_get (this->xl->options,
- "transport.ib-verbs.listen-path");
- if (!listen_path_data) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "missing option listen-path");
- ret = -1;
- goto err;
- }
-
- listen_path = data_to_str (listen_path_data);
-
-#ifndef UNIX_PATH_MAX
-#define UNIX_PATH_MAX 108
-#endif
-
- if (strlen (listen_path) > UNIX_PATH_MAX) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "option listen-path has value length %"GF_PRI_SIZET" > %d",
- strlen (listen_path), UNIX_PATH_MAX);
- ret = -1;
- goto err;
- }
-
- sunaddr->sun_family = AF_UNIX;
- strcpy (sunaddr->sun_path, listen_path);
- *addr_len = sizeof (struct sockaddr_un);
-
-err:
- return ret;
-}
-
-static int32_t
-af_inet_server_get_local_sockaddr (transport_t *this,
- struct sockaddr *addr,
- socklen_t *addr_len)
-{
- struct addrinfo hints, *res = 0;
- data_t *listen_port_data = NULL, *listen_host_data = NULL;
- uint16_t listen_port = -1;
- char service[NI_MAXSERV], *listen_host = NULL;
- dict_t *options = NULL;
- int32_t ret = 0;
-
- options = this->xl->options;
-
- listen_port_data = dict_get (options, "transport.ib-verbs.listen-port");
- listen_host_data = dict_get (options, "transport.ib-verbs.bind-address");
-
- if (listen_port_data)
- {
- listen_port = data_to_uint16 (listen_port_data);
- } else {
- if (addr->sa_family == AF_INET6) {
- struct sockaddr_in6 *in = (struct sockaddr_in6 *) addr;
- in->sin6_addr = in6addr_any;
- in->sin6_port = htons(listen_port);
- *addr_len = sizeof(struct sockaddr_in6);
- goto out;
- } else if (addr->sa_family == AF_INET) {
- struct sockaddr_in *in = (struct sockaddr_in *) addr;
- in->sin_addr.s_addr = htonl(INADDR_ANY);
- in->sin_port = htons(listen_port);
- *addr_len = sizeof(struct sockaddr_in);
- goto out;
- }
- }
-
- if (listen_port == (uint16_t) -1)
- listen_port = GF_DEFAULT_IBVERBS_LISTEN_PORT;
-
-
- if (listen_host_data)
- {
- listen_host = data_to_str (listen_host_data);
- }
-
- memset (service, 0, sizeof (service));
- sprintf (service, "%d", listen_port);
-
- memset (&hints, 0, sizeof (hints));
- hints.ai_family = addr->sa_family;
- hints.ai_socktype = SOCK_STREAM;
- hints.ai_flags = AI_ADDRCONFIG | AI_PASSIVE;
-
- ret = getaddrinfo(listen_host, service, &hints, &res);
- if (ret != 0) {
- gf_log (this->xl->name,
- GF_LOG_ERROR,
- "getaddrinfo failed for host %s, service %s (%s)",
- listen_host, service, gai_strerror (ret));
- ret = -1;
- goto out;
- }
-
- memcpy (addr, res->ai_addr, res->ai_addrlen);
- *addr_len = res->ai_addrlen;
-
- freeaddrinfo (res);
-
-out:
- return ret;
-}
-
-int32_t
-gf_ibverbs_client_bind (transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len,
- int sock)
-{
- int ret = 0;
-
- *sockaddr_len = sizeof (struct sockaddr_in6);
- switch (sockaddr->sa_family)
- {
- case AF_INET_SDP:
- case AF_INET:
- *sockaddr_len = sizeof (struct sockaddr_in);
-
- case AF_INET6:
- ret = af_inet_bind_to_port_lt_ceiling (sock, sockaddr,
- *sockaddr_len,
- CLIENT_PORT_CEILING);
- if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_WARNING,
- "cannot bind inet socket (%d) to port "
- "less than %d (%s)",
- sock, CLIENT_PORT_CEILING, strerror (errno));
- ret = 0;
- }
- break;
-
- case AF_UNIX:
- *sockaddr_len = sizeof (struct sockaddr_un);
- ret = af_unix_client_bind (this, (struct sockaddr *)sockaddr,
- *sockaddr_len, sock);
- break;
-
- default:
- gf_log (this->xl->name, GF_LOG_ERROR,
- "unknown address family %d", sockaddr->sa_family);
- ret = -1;
- break;
- }
-
- return ret;
-}
-
-int32_t
-gf_ibverbs_client_get_remote_sockaddr (transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len)
-{
- int32_t ret = 0;
- char is_inet_sdp = 0;
-
- ret = client_fill_address_family (this, sockaddr);
- if (ret) {
- ret = -1;
- goto err;
- }
-
- switch (sockaddr->sa_family)
- {
- case AF_INET_SDP:
- sockaddr->sa_family = AF_INET;
- is_inet_sdp = 1;
-
- case AF_INET:
- case AF_INET6:
- case AF_UNSPEC:
- ret = af_inet_client_get_remote_sockaddr (this,
- sockaddr,
- sockaddr_len);
-
- if (is_inet_sdp) {
- sockaddr->sa_family = AF_INET_SDP;
- }
-
- break;
-
- case AF_UNIX:
- ret = af_unix_client_get_remote_sockaddr (this,
- sockaddr,
- sockaddr_len);
- break;
-
- default:
- gf_log (this->xl->name, GF_LOG_ERROR,
- "unknown address-family %d", sockaddr->sa_family);
- ret = -1;
- }
-
-err:
- return ret;
-}
-
-int32_t
-gf_ibverbs_server_get_local_sockaddr (transport_t *this,
- struct sockaddr *addr,
- socklen_t *addr_len)
-{
- data_t *address_family_data = NULL;
- int32_t ret = 0;
- char is_inet_sdp = 0;
-
- address_family_data = dict_get (this->xl->options,
- "transport.address-family");
- if (address_family_data) {
- char *address_family = NULL;
- address_family = data_to_str (address_family_data);
-
- if (!strcasecmp (address_family, "inet")) {
- addr->sa_family = AF_INET;
- } else if (!strcasecmp (address_family, "inet6")) {
- addr->sa_family = AF_INET6;
- } else if (!strcasecmp (address_family, "inet-sdp")) {
- addr->sa_family = AF_INET_SDP;
- } else if (!strcasecmp (address_family, "unix")) {
- addr->sa_family = AF_UNIX;
- } else if (!strcasecmp (address_family, "inet/inet6")
- || !strcasecmp (address_family, "inet6/inet")) {
- addr->sa_family = AF_UNSPEC;
- } else {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "unknown address family (%s) specified",
- address_family);
- ret = -1;
- goto err;
- }
- } else {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "option address-family not specified, defaulting "
- "to inet/inet6");
- addr->sa_family = AF_UNSPEC;
- }
-
- switch (addr->sa_family)
- {
- case AF_INET_SDP:
- is_inet_sdp = 1;
- addr->sa_family = AF_INET;
-
- case AF_INET:
- case AF_INET6:
- case AF_UNSPEC:
- ret = af_inet_server_get_local_sockaddr (this, addr, addr_len);
- if (is_inet_sdp && !ret) {
- addr->sa_family = AF_INET_SDP;
- }
- break;
-
- case AF_UNIX:
- ret = af_unix_server_get_local_sockaddr (this, addr, addr_len);
- break;
- }
-
-err:
- return ret;
-}
-
-int32_t
-fill_inet6_inet_identifiers (transport_t *this, struct sockaddr_storage *addr,
- int32_t addr_len, char *identifier)
-{
- int32_t ret = 0, tmpaddr_len = 0;
- char service[NI_MAXSERV], host[NI_MAXHOST];
- struct sockaddr_storage tmpaddr;
-
- memset (&tmpaddr, 0, sizeof (tmpaddr));
- tmpaddr = *addr;
- tmpaddr_len = addr_len;
-
- if (((struct sockaddr *) &tmpaddr)->sa_family == AF_INET6) {
- int32_t one_to_four, four_to_eight, twelve_to_sixteen;
- int16_t eight_to_ten, ten_to_twelve;
-
- one_to_four = four_to_eight = twelve_to_sixteen = 0;
- eight_to_ten = ten_to_twelve = 0;
-
- one_to_four = ((struct sockaddr_in6 *)
- &tmpaddr)->sin6_addr.s6_addr32[0];
- four_to_eight = ((struct sockaddr_in6 *)
- &tmpaddr)->sin6_addr.s6_addr32[1];
-#ifdef GF_SOLARIS_HOST_OS
- eight_to_ten = S6_ADDR16(((struct sockaddr_in6 *)
- &tmpaddr)->sin6_addr)[4];
-#else
- eight_to_ten = ((struct sockaddr_in6 *)
- &tmpaddr)->sin6_addr.s6_addr16[4];
-#endif
-
-#ifdef GF_SOLARIS_HOST_OS
- ten_to_twelve = S6_ADDR16(((struct sockaddr_in6 *)
- &tmpaddr)->sin6_addr)[5];
-#else
- ten_to_twelve = ((struct sockaddr_in6 *)
- &tmpaddr)->sin6_addr.s6_addr16[5];
-#endif
- twelve_to_sixteen = ((struct sockaddr_in6 *)
- &tmpaddr)->sin6_addr.s6_addr32[3];
-
- /* ipv4 mapped ipv6 address has
- bits 0-80: 0
- bits 80-96: 0xffff
- bits 96-128: ipv4 address
- */
-
- if (one_to_four == 0 &&
- four_to_eight == 0 &&
- eight_to_ten == 0 &&
- ten_to_twelve == -1) {
- struct sockaddr_in *in_ptr = (struct sockaddr_in *)&tmpaddr;
- memset (&tmpaddr, 0, sizeof (tmpaddr));
-
- in_ptr->sin_family = AF_INET;
- in_ptr->sin_port = ((struct sockaddr_in6 *)addr)->sin6_port;
- in_ptr->sin_addr.s_addr = twelve_to_sixteen;
- tmpaddr_len = sizeof (*in_ptr);
- }
- }
-
- ret = getnameinfo ((struct sockaddr *) &tmpaddr,
- tmpaddr_len,
- host, sizeof (host),
- service, sizeof (service),
- NI_NUMERICHOST | NI_NUMERICSERV);
- if (ret != 0) {
- gf_log (this->xl->name,
- GF_LOG_ERROR,
- "getnameinfo failed (%s)", gai_strerror (ret));
- }
-
- sprintf (identifier, "%s:%s", host, service);
-
- return ret;
-}
-
-int32_t
-gf_ibverbs_get_transport_identifiers (transport_t *this)
-{
- int32_t ret = 0;
- char is_inet_sdp = 0;
-
- switch (((struct sockaddr *) &this->myinfo.sockaddr)->sa_family)
- {
- case AF_INET_SDP:
- is_inet_sdp = 1;
- ((struct sockaddr *) &this->peerinfo.sockaddr)->sa_family = ((struct sockaddr *) &this->myinfo.sockaddr)->sa_family = AF_INET;
-
- case AF_INET:
- case AF_INET6:
- {
- ret = fill_inet6_inet_identifiers (this,
- &this->myinfo.sockaddr,
- this->myinfo.sockaddr_len,
- this->myinfo.identifier);
- if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "can't fill inet/inet6 identifier for server");
- goto err;
- }
-
- ret = fill_inet6_inet_identifiers (this,
- &this->peerinfo.sockaddr,
- this->peerinfo.sockaddr_len,
- this->peerinfo.identifier);
- if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "can't fill inet/inet6 identifier for client");
- goto err;
- }
-
- if (is_inet_sdp) {
- ((struct sockaddr *) &this->peerinfo.sockaddr)->sa_family = ((struct sockaddr *) &this->myinfo.sockaddr)->sa_family = AF_INET_SDP;
- }
- }
- break;
-
- case AF_UNIX:
- {
- struct sockaddr_un *sunaddr = NULL;
-
- sunaddr = (struct sockaddr_un *) &this->myinfo.sockaddr;
- strcpy (this->myinfo.identifier, sunaddr->sun_path);
-
- sunaddr = (struct sockaddr_un *) &this->peerinfo.sockaddr;
- strcpy (this->peerinfo.identifier, sunaddr->sun_path);
- }
- break;
-
- default:
- gf_log (this->xl->name, GF_LOG_ERROR,
- "unknown address family (%d)",
- ((struct sockaddr *) &this->myinfo.sockaddr)->sa_family);
- ret = -1;
- break;
- }
-
-err:
- return ret;
-}
diff --git a/xlators/protocol/legacy/transport/ib-verbs/src/name.h b/xlators/protocol/legacy/transport/ib-verbs/src/name.h
deleted file mode 100644
index e2575794bc7..00000000000
--- a/xlators/protocol/legacy/transport/ib-verbs/src/name.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _IB_VERBS_NAME_H
-#define _IB_VERBS_NAME_H
-
-#include <sys/socket.h>
-#include <sys/un.h>
-
-#include "compat.h"
-
-int32_t
-gf_ibverbs_client_bind (transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len,
- int sock);
-
-int32_t
-gf_ibverbs_client_get_remote_sockaddr (transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len);
-
-int32_t
-gf_ibverbs_server_get_local_sockaddr (transport_t *this,
- struct sockaddr *addr,
- socklen_t *addr_len);
-
-int32_t
-gf_ibverbs_get_transport_identifiers (transport_t *this);
-
-#endif /* _IB_VERBS_NAME_H */
diff --git a/xlators/protocol/legacy/transport/socket/Makefile.am b/xlators/protocol/legacy/transport/socket/Makefile.am
deleted file mode 100644
index f963effea22..00000000000
--- a/xlators/protocol/legacy/transport/socket/Makefile.am
+++ /dev/null
@@ -1 +0,0 @@
-SUBDIRS = src \ No newline at end of file
diff --git a/xlators/protocol/legacy/transport/socket/src/Makefile.am b/xlators/protocol/legacy/transport/socket/src/Makefile.am
deleted file mode 100644
index 5952e18e97b..00000000000
--- a/xlators/protocol/legacy/transport/socket/src/Makefile.am
+++ /dev/null
@@ -1,19 +0,0 @@
-# TODO : change to proper transport dir
-
-transport_LTLIBRARIES = socket.la
-transportdir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/transport
-
-socket_la_LDFLAGS = -module -avoidversion
-
-socket_la_SOURCES = socket.c name.c
-socket_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
- $(top_builddir)/xlators/protocol/legacy/lib/src/libgfproto.la
-
-noinst_HEADERS = socket.h name.h socket-mem-types.h
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) \
- -I$(top_srcdir)/xlators/protocol/legacy/transport/socket/src \
- -I$(top_srcdir)/xlators/protocol/legacy/lib/src
-
-CLEANFILES = *~
diff --git a/xlators/protocol/legacy/transport/socket/src/name.c b/xlators/protocol/legacy/transport/socket/src/name.c
deleted file mode 100644
index b0fc7b4db3c..00000000000
--- a/xlators/protocol/legacy/transport/socket/src/name.c
+++ /dev/null
@@ -1,740 +0,0 @@
-/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-#include <errno.h>
-#include <netdb.h>
-#include <string.h>
-
-#ifdef CLIENT_PORT_CEILING
-#undef CLIENT_PORT_CEILING
-#endif
-
-#define CLIENT_PORT_CEILING 1024
-
-#ifndef AF_INET_SDP
-#define AF_INET_SDP 27
-#endif
-
-static int gf_name_addr_enotspec_log;
-
-#include "transport.h"
-#include "socket.h"
-
-int32_t
-gf_resolve_ip6 (const char *hostname,
- uint16_t port,
- int family,
- void **dnscache,
- struct addrinfo **addr_info);
-
-static int32_t
-af_inet_bind_to_port_lt_ceiling (int fd, struct sockaddr *sockaddr,
- socklen_t sockaddr_len, int ceiling)
-{
- int32_t ret = -1;
- /* struct sockaddr_in sin = {0, }; */
- uint16_t port = ceiling - 1;
-
- while (port)
- {
- switch (sockaddr->sa_family)
- {
- case AF_INET6:
- ((struct sockaddr_in6 *)sockaddr)->sin6_port = htons (port);
- break;
-
- case AF_INET_SDP:
- case AF_INET:
- ((struct sockaddr_in *)sockaddr)->sin_port = htons (port);
- break;
- }
-
- ret = bind (fd, sockaddr, sockaddr_len);
-
- if (ret == 0)
- break;
-
- if (ret == -1 && errno == EACCES)
- break;
-
- port--;
- }
-
- return ret;
-}
-
-static int32_t
-af_unix_client_bind (transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t sockaddr_len,
- int sock)
-{
- data_t *path_data = NULL;
- struct sockaddr_un *addr = NULL;
- int32_t ret = 0;
-
- path_data = dict_get (this->xl->options, "transport.socket.bind-path");
- if (path_data) {
- char *path = data_to_str (path_data);
- if (!path || strlen (path) > UNIX_PATH_MAX) {
- gf_log (this->xl->name, GF_LOG_TRACE,
- "bind-path not specfied for unix socket, "
- "letting connect to assign default value");
- goto err;
- }
-
- addr = (struct sockaddr_un *) sockaddr;
- strcpy (addr->sun_path, path);
- ret = bind (sock, (struct sockaddr *)addr, sockaddr_len);
- if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "cannot bind to unix-domain socket %d (%s)",
- sock, strerror (errno));
- goto err;
- }
- } else {
- gf_log (this->xl->name, GF_LOG_TRACE,
- "bind-path not specfied for unix socket, "
- "letting connect to assign default value");
- }
-
-err:
- return ret;
-}
-
-static int32_t
-client_fill_address_family (transport_t *this, sa_family_t *sa_family)
-{
- data_t *address_family_data = NULL;
- int32_t ret = -1;
-
- if (sa_family == NULL) {
- goto out;
- }
-
- address_family_data = dict_get (this->xl->options,
- "transport.address-family");
- if (!address_family_data) {
- data_t *remote_host_data = NULL, *connect_path_data = NULL;
- remote_host_data = dict_get (this->xl->options, "remote-host");
- connect_path_data = dict_get (this->xl->options,
- "transport.socket.connect-path");
-
- if (!(remote_host_data || connect_path_data) ||
- (remote_host_data && connect_path_data)) {
- GF_LOG_OCCASIONALLY (gf_name_addr_enotspec_log,
- this->xl->name, GF_LOG_ERROR,
- "transport.address-family not specified and "
- "not able to determine the "
- "same from other options (remote-host:%s and "
- "transport.unix.connect-path:%s)",
- data_to_str (remote_host_data),
- data_to_str (connect_path_data));
- goto out;
- }
-
- if (remote_host_data) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "address-family not specified, guessing it "
- "to be inet/inet6");
- *sa_family = AF_UNSPEC;
- } else {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "address-family not specified, guessing it "
- "to be unix");
- *sa_family = AF_UNIX;
- }
-
- } else {
- char *address_family = data_to_str (address_family_data);
- if (!strcasecmp (address_family, "unix")) {
- *sa_family = AF_UNIX;
- } else if (!strcasecmp (address_family, "inet")) {
- *sa_family = AF_INET;
- } else if (!strcasecmp (address_family, "inet6")) {
- *sa_family = AF_INET6;
- } else if (!strcasecmp (address_family, "inet-sdp")) {
- *sa_family = AF_INET_SDP;
- } else if (!strcasecmp (address_family, "inet/inet6")
- || !strcasecmp (address_family, "inet6/inet")) {
- *sa_family = AF_UNSPEC;
- } else {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "unknown address-family (%s) specified",
- address_family);
- goto out;
- }
- }
-
- ret = 0;
-
-out:
- return ret;
-}
-
-static int32_t
-af_inet_client_get_remote_sockaddr (transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len)
-{
- dict_t *options = this->xl->options;
- data_t *remote_host_data = NULL;
- data_t *remote_port_data = NULL;
- char *remote_host = NULL;
- uint16_t remote_port = 0;
- struct addrinfo *addr_info = NULL;
- int32_t ret = 0;
-
- remote_host_data = dict_get (options, "remote-host");
- if (remote_host_data == NULL)
- {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "option remote-host missing in volume %s", this->xl->name);
- ret = -1;
- goto err;
- }
-
- remote_host = data_to_str (remote_host_data);
- if (remote_host == NULL)
- {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "option remote-host has data NULL in volume %s", this->xl->name);
- ret = -1;
- goto err;
- }
-
- remote_port_data = dict_get (options, "remote-port");
- if (remote_port_data == NULL)
- {
- gf_log (this->xl->name, GF_LOG_TRACE,
- "option remote-port missing in volume %s. Defaulting to %d",
- this->xl->name, GF_DEFAULT_SOCKET_LISTEN_PORT);
-
- remote_port = GF_DEFAULT_SOCKET_LISTEN_PORT;
- }
- else
- {
- remote_port = data_to_uint16 (remote_port_data);
- }
-
- if (remote_port == (uint16_t)-1)
- {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "option remote-port has invalid port in volume %s",
- this->xl->name);
- ret = -1;
- goto err;
- }
-
- /* TODO: gf_resolve is a blocking call. kick in some
- non blocking dns techniques */
- ret = gf_resolve_ip6 (remote_host, remote_port,
- sockaddr->sa_family, &this->dnscache, &addr_info);
- if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "DNS resolution failed on host %s", remote_host);
- goto err;
- }
-
- memcpy (sockaddr, addr_info->ai_addr, addr_info->ai_addrlen);
- *sockaddr_len = addr_info->ai_addrlen;
-
-err:
- return ret;
-}
-
-static int32_t
-af_unix_client_get_remote_sockaddr (transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len)
-{
- struct sockaddr_un *sockaddr_un = NULL;
- char *connect_path = NULL;
- data_t *connect_path_data = NULL;
- int32_t ret = 0;
-
- connect_path_data = dict_get (this->xl->options,
- "transport.socket.connect-path");
- if (!connect_path_data) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "option transport.unix.connect-path not specified for "
- "address-family unix");
- ret = -1;
- goto err;
- }
-
- connect_path = data_to_str (connect_path_data);
- if (!connect_path) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "transport.unix.connect-path is null-string");
- ret = -1;
- goto err;
- }
-
- if (strlen (connect_path) > UNIX_PATH_MAX) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "connect-path value length %"GF_PRI_SIZET" > %d octets",
- strlen (connect_path), UNIX_PATH_MAX);
- ret = -1;
- goto err;
- }
-
- gf_log (this->xl->name, GF_LOG_TRACE,
- "using connect-path %s", connect_path);
- sockaddr_un = (struct sockaddr_un *)sockaddr;
- strcpy (sockaddr_un->sun_path, connect_path);
- *sockaddr_len = sizeof (struct sockaddr_un);
-
-err:
- return ret;
-}
-
-static int32_t
-af_unix_server_get_local_sockaddr (transport_t *this,
- struct sockaddr *addr,
- socklen_t *addr_len)
-{
- data_t *listen_path_data = NULL;
- char *listen_path = NULL;
- int32_t ret = 0;
- struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
-
-
- listen_path_data = dict_get (this->xl->options,
- "transport.socket.listen-path");
- if (!listen_path_data) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "missing option transport.socket.listen-path");
- ret = -1;
- goto err;
- }
-
- listen_path = data_to_str (listen_path_data);
-
-#ifndef UNIX_PATH_MAX
-#define UNIX_PATH_MAX 108
-#endif
-
- if (strlen (listen_path) > UNIX_PATH_MAX) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "option transport.unix.listen-path has value length "
- "%"GF_PRI_SIZET" > %d",
- strlen (listen_path), UNIX_PATH_MAX);
- ret = -1;
- goto err;
- }
-
- sunaddr->sun_family = AF_UNIX;
- strcpy (sunaddr->sun_path, listen_path);
- *addr_len = sizeof (struct sockaddr_un);
-
-err:
- return ret;
-}
-
-static int32_t
-af_inet_server_get_local_sockaddr (transport_t *this,
- struct sockaddr *addr,
- socklen_t *addr_len)
-{
- struct addrinfo hints, *res = 0;
- data_t *listen_port_data = NULL, *listen_host_data = NULL;
- uint16_t listen_port = -1;
- char service[NI_MAXSERV], *listen_host = NULL;
- dict_t *options = NULL;
- int32_t ret = 0;
-
- options = this->xl->options;
-
- listen_port_data = dict_get (options, "transport.socket.listen-port");
- listen_host_data = dict_get (options, "transport.socket.bind-address");
-
- if (listen_port_data)
- {
- listen_port = data_to_uint16 (listen_port_data);
- }
-
- if (listen_port == (uint16_t) -1)
- listen_port = GF_DEFAULT_SOCKET_LISTEN_PORT;
-
-
- if (listen_host_data)
- {
- listen_host = data_to_str (listen_host_data);
- } else {
- if (addr->sa_family == AF_INET6) {
- struct sockaddr_in6 *in = (struct sockaddr_in6 *) addr;
- in->sin6_addr = in6addr_any;
- in->sin6_port = htons(listen_port);
- *addr_len = sizeof(struct sockaddr_in6);
- goto out;
- } else if (addr->sa_family == AF_INET) {
- struct sockaddr_in *in = (struct sockaddr_in *) addr;
- in->sin_addr.s_addr = htonl(INADDR_ANY);
- in->sin_port = htons(listen_port);
- *addr_len = sizeof(struct sockaddr_in);
- goto out;
- }
- }
-
- memset (service, 0, sizeof (service));
- sprintf (service, "%d", listen_port);
-
- memset (&hints, 0, sizeof (hints));
- hints.ai_family = addr->sa_family;
- hints.ai_socktype = SOCK_STREAM;
- hints.ai_flags = AI_ADDRCONFIG | AI_PASSIVE;
-
- ret = getaddrinfo(listen_host, service, &hints, &res);
- if (ret != 0) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "getaddrinfo failed for host %s, service %s (%s)",
- listen_host, service, gai_strerror (ret));
- ret = -1;
- goto out;
- }
-
- memcpy (addr, res->ai_addr, res->ai_addrlen);
- *addr_len = res->ai_addrlen;
-
- freeaddrinfo (res);
-
-out:
- return ret;
-}
-
-int32_t
-gf_client_bind (transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len,
- int sock)
-{
- int ret = 0;
-
- *sockaddr_len = sizeof (struct sockaddr_in6);
- switch (sockaddr->sa_family)
- {
- case AF_INET_SDP:
- case AF_INET:
- *sockaddr_len = sizeof (struct sockaddr_in);
-
- case AF_INET6:
- ret = af_inet_bind_to_port_lt_ceiling (sock, sockaddr,
- *sockaddr_len, CLIENT_PORT_CEILING);
- if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_WARNING,
- "cannot bind inet socket (%d) to port less than %d (%s)",
- sock, CLIENT_PORT_CEILING, strerror (errno));
- ret = 0;
- }
- break;
-
- case AF_UNIX:
- *sockaddr_len = sizeof (struct sockaddr_un);
- ret = af_unix_client_bind (this, (struct sockaddr *)sockaddr,
- *sockaddr_len, sock);
- break;
-
- default:
- gf_log (this->xl->name, GF_LOG_ERROR,
- "unknown address family %d", sockaddr->sa_family);
- ret = -1;
- break;
- }
-
- return ret;
-}
-
-int32_t
-gf_socket_client_get_remote_sockaddr (transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len,
- sa_family_t *sa_family)
-{
- int32_t ret = 0;
-
- if ((sockaddr == NULL) || (sockaddr_len == NULL)
- || (sa_family == NULL)) {
- ret = -1;
- goto err;
- }
-
-
- ret = client_fill_address_family (this, &sockaddr->sa_family);
- if (ret) {
- ret = -1;
- goto err;
- }
-
- *sa_family = sockaddr->sa_family;
-
- switch (sockaddr->sa_family)
- {
- case AF_INET_SDP:
- sockaddr->sa_family = AF_INET;
-
- case AF_INET:
- case AF_INET6:
- case AF_UNSPEC:
- ret = af_inet_client_get_remote_sockaddr (this, sockaddr,
- sockaddr_len);
- break;
-
- case AF_UNIX:
- ret = af_unix_client_get_remote_sockaddr (this, sockaddr,
- sockaddr_len);
- break;
-
- default:
- gf_log (this->xl->name, GF_LOG_ERROR,
- "unknown address-family %d", sockaddr->sa_family);
- ret = -1;
- }
-
- if (*sa_family == AF_UNSPEC) {
- *sa_family = sockaddr->sa_family;
- }
-
-err:
- return ret;
-}
-
-
-static int32_t
-server_fill_address_family (transport_t *this, sa_family_t *sa_family)
-{
- data_t *address_family_data = NULL;
- int32_t ret = -1;
-
- if (sa_family == NULL) {
- goto out;
- }
-
- address_family_data = dict_get (this->xl->options,
- "transport.address-family");
- if (address_family_data) {
- char *address_family = NULL;
- address_family = data_to_str (address_family_data);
-
- if (!strcasecmp (address_family, "inet")) {
- *sa_family = AF_INET;
- } else if (!strcasecmp (address_family, "inet6")) {
- *sa_family = AF_INET6;
- } else if (!strcasecmp (address_family, "inet-sdp")) {
- *sa_family = AF_INET_SDP;
- } else if (!strcasecmp (address_family, "unix")) {
- *sa_family = AF_UNIX;
- } else if (!strcasecmp (address_family, "inet/inet6")
- || !strcasecmp (address_family, "inet6/inet")) {
- *sa_family = AF_UNSPEC;
- } else {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "unknown address family (%s) specified", address_family);
- goto out;
- }
- } else {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "option address-family not specified, defaulting to inet/inet6");
- *sa_family = AF_UNSPEC;
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-
-int32_t
-gf_socket_server_get_local_sockaddr (transport_t *this, struct sockaddr *addr,
- socklen_t *addr_len, sa_family_t *sa_family)
-{
- int32_t ret = -1;
-
- if ((addr == NULL) || (addr_len == NULL) || (sa_family == NULL)) {
- goto err;
- }
-
- ret = server_fill_address_family (this, &addr->sa_family);
- if (ret == -1) {
- goto err;
- }
-
- *sa_family = addr->sa_family;
-
- switch (addr->sa_family)
- {
- case AF_INET_SDP:
- addr->sa_family = AF_INET;
-
- case AF_INET:
- case AF_INET6:
- case AF_UNSPEC:
- ret = af_inet_server_get_local_sockaddr (this, addr, addr_len);
- break;
-
- case AF_UNIX:
- ret = af_unix_server_get_local_sockaddr (this, addr, addr_len);
- break;
- }
-
- if (*sa_family == AF_UNSPEC) {
- *sa_family = addr->sa_family;
- }
-
-err:
- return ret;
-}
-
-static int32_t
-fill_inet6_inet_identifiers (transport_t *this, struct sockaddr_storage *addr,
- int32_t addr_len, char *identifier)
-{
- int32_t ret = 0, tmpaddr_len = 0;
- char service[NI_MAXSERV], host[NI_MAXHOST];
- struct sockaddr_storage tmpaddr;
-
- memset (&tmpaddr, 0, sizeof (tmpaddr));
- tmpaddr = *addr;
- tmpaddr_len = addr_len;
-
- if (((struct sockaddr *) &tmpaddr)->sa_family == AF_INET6) {
- int32_t one_to_four, four_to_eight, twelve_to_sixteen;
- int16_t eight_to_ten, ten_to_twelve;
-
- one_to_four = four_to_eight = twelve_to_sixteen = 0;
- eight_to_ten = ten_to_twelve = 0;
-
- one_to_four = ((struct sockaddr_in6 *) &tmpaddr)->sin6_addr.s6_addr32[0];
- four_to_eight = ((struct sockaddr_in6 *) &tmpaddr)->sin6_addr.s6_addr32[1];
-#ifdef GF_SOLARIS_HOST_OS
- eight_to_ten = S6_ADDR16(((struct sockaddr_in6 *) &tmpaddr)->sin6_addr)[4];
-#else
- eight_to_ten = ((struct sockaddr_in6 *) &tmpaddr)->sin6_addr.s6_addr16[4];
-#endif
-
-#ifdef GF_SOLARIS_HOST_OS
- ten_to_twelve = S6_ADDR16(((struct sockaddr_in6 *) &tmpaddr)->sin6_addr)[5];
-#else
- ten_to_twelve = ((struct sockaddr_in6 *) &tmpaddr)->sin6_addr.s6_addr16[5];
-#endif
-
- twelve_to_sixteen = ((struct sockaddr_in6 *) &tmpaddr)->sin6_addr.s6_addr32[3];
-
- /* ipv4 mapped ipv6 address has
- bits 0-80: 0
- bits 80-96: 0xffff
- bits 96-128: ipv4 address
- */
-
- if (one_to_four == 0 &&
- four_to_eight == 0 &&
- eight_to_ten == 0 &&
- ten_to_twelve == -1) {
- struct sockaddr_in *in_ptr = (struct sockaddr_in *)&tmpaddr;
- memset (&tmpaddr, 0, sizeof (tmpaddr));
-
- in_ptr->sin_family = AF_INET;
- in_ptr->sin_port = ((struct sockaddr_in6 *)addr)->sin6_port;
- in_ptr->sin_addr.s_addr = twelve_to_sixteen;
- tmpaddr_len = sizeof (*in_ptr);
- }
- }
-
- ret = getnameinfo ((struct sockaddr *) &tmpaddr,
- tmpaddr_len,
- host, sizeof (host),
- service, sizeof (service),
- NI_NUMERICHOST | NI_NUMERICSERV);
- if (ret != 0) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "getnameinfo failed (%s)", gai_strerror (ret));
- }
-
- sprintf (identifier, "%s:%s", host, service);
-
- return ret;
-}
-
-int32_t
-gf_get_transport_identifiers (transport_t *this)
-{
- int32_t ret = 0;
- char is_inet_sdp = 0;
-
- switch (((struct sockaddr *) &this->myinfo.sockaddr)->sa_family)
- {
- case AF_INET_SDP:
- is_inet_sdp = 1;
- ((struct sockaddr *) &this->peerinfo.sockaddr)->sa_family = ((struct sockaddr *) &this->myinfo.sockaddr)->sa_family = AF_INET;
-
- case AF_INET:
- case AF_INET6:
- {
- ret = fill_inet6_inet_identifiers (this,
- &this->myinfo.sockaddr,
- this->myinfo.sockaddr_len,
- this->myinfo.identifier);
- if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "cannot fill inet/inet6 identifier for server");
- goto err;
- }
-
- ret = fill_inet6_inet_identifiers (this,
- &this->peerinfo.sockaddr,
- this->peerinfo.sockaddr_len,
- this->peerinfo.identifier);
- if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "cannot fill inet/inet6 identifier for client");
- goto err;
- }
-
- if (is_inet_sdp) {
- ((struct sockaddr *) &this->peerinfo.sockaddr)->sa_family = ((struct sockaddr *) &this->myinfo.sockaddr)->sa_family = AF_INET_SDP;
- }
- }
- break;
-
- case AF_UNIX:
- {
- struct sockaddr_un *sunaddr = NULL;
-
- sunaddr = (struct sockaddr_un *) &this->myinfo.sockaddr;
- strcpy (this->myinfo.identifier, sunaddr->sun_path);
-
- sunaddr = (struct sockaddr_un *) &this->peerinfo.sockaddr;
- strcpy (this->peerinfo.identifier, sunaddr->sun_path);
- }
- break;
-
- default:
- gf_log (this->xl->name, GF_LOG_ERROR,
- "unknown address family (%d)",
- ((struct sockaddr *) &this->myinfo.sockaddr)->sa_family);
- ret = -1;
- break;
- }
-
-err:
- return ret;
-}
diff --git a/xlators/protocol/legacy/transport/socket/src/name.h b/xlators/protocol/legacy/transport/socket/src/name.h
deleted file mode 100644
index 1853781bd7c..00000000000
--- a/xlators/protocol/legacy/transport/socket/src/name.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _SOCKET_NAME_H
-#define _SOCKET_NAME_H
-
-#include "compat.h"
-
-int32_t
-gf_client_bind (transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len,
- int sock);
-
-int32_t
-gf_socket_client_get_remote_sockaddr (transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len,
- sa_family_t *sa_family);
-
-int32_t
-gf_socket_server_get_local_sockaddr (transport_t *this, struct sockaddr *addr,
- socklen_t *addr_len, sa_family_t *sa_family);
-
-int32_t
-gf_get_transport_identifiers (transport_t *this);
-
-#endif /* _SOCKET_NAME_H */
diff --git a/xlators/protocol/legacy/transport/socket/src/socket-mem-types.h b/xlators/protocol/legacy/transport/socket/src/socket-mem-types.h
deleted file mode 100644
index f50f4a75de8..00000000000
--- a/xlators/protocol/legacy/transport/socket/src/socket-mem-types.h
+++ /dev/null
@@ -1,36 +0,0 @@
-
-/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef __SOCKET_MEM_TYPES_H__
-#define __SOCKET_MEM_TYPES_H__
-
-#include "mem-types.h"
-
-enum gf_socket_mem_types_ {
- gf_socket_mt_socket_private_t = gf_common_mt_end + 1,
- gf_socket_mt_ioq,
- gf_socket_mt_transport_t,
- gf_socket_mt_socket_local_t,
- gf_socket_mt_char,
- gf_socket_mt_end
-};
-#endif
-
diff --git a/xlators/protocol/legacy/transport/socket/src/socket.c b/xlators/protocol/legacy/transport/socket/src/socket.c
deleted file mode 100644
index 128b1ce6997..00000000000
--- a/xlators/protocol/legacy/transport/socket/src/socket.c
+++ /dev/null
@@ -1,1622 +0,0 @@
-/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "socket.h"
-#include "name.h"
-#include "dict.h"
-#include "transport.h"
-#include "logging.h"
-#include "xlator.h"
-#include "byte-order.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-
-#include <fcntl.h>
-#include <errno.h>
-#include <netinet/tcp.h>
-
-
-#define GF_LOG_ERRNO(errno) ((errno == ENOTCONN) ? GF_LOG_DEBUG : GF_LOG_ERROR)
-#define SA(ptr) ((struct sockaddr *)ptr)
-
-static int socket_init (transport_t *this);
-
-/*
- * return value:
- * 0 = success (completed)
- * -1 = error
- * > 0 = incomplete
- */
-
-static int
-__socket_rwv (transport_t *this, struct iovec *vector, int count,
- struct iovec **pending_vector, int *pending_count,
- int write)
-{
- socket_private_t *priv = NULL;
- int sock = -1;
- int ret = -1;
- struct iovec *opvector = NULL;
- int opcount = 0;
- int moved = 0;
-
- priv = this->private;
- sock = priv->sock;
-
- opvector = vector;
- opcount = count;
-
- while (opcount) {
- if (write) {
- ret = writev (sock, opvector, opcount);
-
- if (ret == 0 || (ret == -1 && errno == EAGAIN)) {
- /* done for now */
- break;
- }
- } else {
- ret = readv (sock, opvector, opcount);
-
- if (ret == -1 && errno == EAGAIN) {
- /* done for now */
- break;
- }
- }
-
- if (ret == 0) {
- /* Mostly due to 'umount' in client */
- gf_log (this->xl->name, GF_LOG_TRACE,
- "EOF from peer %s", this->peerinfo.identifier);
- opcount = -1;
- errno = ENOTCONN;
- break;
- }
-
- if (ret == -1) {
- if (errno == EINTR)
- continue;
-
- gf_log (this->xl->name, GF_LOG_TRACE,
- "%s failed (%s)", write ? "writev" : "readv",
- strerror (errno));
- opcount = -1;
- break;
- }
-
- moved = 0;
-
- while (moved < ret) {
- if ((ret - moved) >= opvector[0].iov_len) {
- moved += opvector[0].iov_len;
- opvector++;
- opcount--;
- } else {
- opvector[0].iov_len -= (ret - moved);
- opvector[0].iov_base += (ret - moved);
- moved += (ret - moved);
- }
- while (opcount && !opvector[0].iov_len) {
- opvector++;
- opcount--;
- }
- }
- }
-
- if (pending_vector)
- *pending_vector = opvector;
-
- if (pending_count)
- *pending_count = opcount;
-
- return opcount;
-}
-
-
-static int
-__socket_readv (transport_t *this, struct iovec *vector, int count,
- struct iovec **pending_vector, int *pending_count)
-{
- int ret = -1;
-
- ret = __socket_rwv (this, vector, count,
- pending_vector, pending_count, 0);
-
- return ret;
-}
-
-
-static int
-__socket_writev (transport_t *this, struct iovec *vector, int count,
- struct iovec **pending_vector, int *pending_count)
-{
- int ret = -1;
-
- ret = __socket_rwv (this, vector, count,
- pending_vector, pending_count, 1);
-
- return ret;
-}
-
-
-static int
-__socket_disconnect (transport_t *this)
-{
- socket_private_t *priv = NULL;
- int ret = -1;
-
- priv = this->private;
-
- if (priv->sock != -1) {
- ret = shutdown (priv->sock, SHUT_RDWR);
- priv->connected = -1;
- gf_log (this->xl->name, GF_LOG_TRACE,
- "shutdown() returned %d. set connection state to -1",
- ret);
- }
-
- return ret;
-}
-
-
-static int
-__socket_server_bind (transport_t *this)
-{
- socket_private_t *priv = NULL;
- int ret = -1;
- int opt = 1;
-
- priv = this->private;
-
- ret = setsockopt (priv->sock, SOL_SOCKET, SO_REUSEADDR,
- &opt, sizeof (opt));
-
- if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "setsockopt() for SO_REUSEADDR failed (%s)",
- strerror (errno));
- }
-
- ret = bind (priv->sock, (struct sockaddr *)&this->myinfo.sockaddr,
- this->myinfo.sockaddr_len);
-
- if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "binding to %s failed: %s",
- this->myinfo.identifier, strerror (errno));
- if (errno == EADDRINUSE) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "Port is already in use");
- }
- }
-
- return ret;
-}
-
-
-static int
-__socket_nonblock (int fd)
-{
- int flags = 0;
- int ret = -1;
-
- flags = fcntl (fd, F_GETFL);
-
- if (flags != -1)
- ret = fcntl (fd, F_SETFL, flags | O_NONBLOCK);
-
- return ret;
-}
-
-
-static int
-__socket_nodelay (int fd)
-{
- int on = 1;
- int ret = -1;
-
- ret = setsockopt (fd, IPPROTO_TCP, TCP_NODELAY,
- &on, sizeof (on));
- if (!ret)
- gf_log ("", GF_LOG_TRACE,
- "NODELAY enabled for socket %d", fd);
-
- return ret;
-}
-
-
-static int
-__socket_keepalive (int fd, int keepalive_intvl)
-{
- int on = 1;
- int ret = -1;
-
- ret = setsockopt (fd, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof (on));
- if (ret == -1)
- goto err;
-
- if (keepalive_intvl == GF_USE_DEFAULT_KEEPALIVE)
- goto done;
-
-#ifndef GF_LINUX_HOST_OS
- ret = setsockopt (fd, IPPROTO_TCP, TCP_KEEPALIVE, &keepalive_intvl,
- sizeof (keepalive_intvl));
- if (ret == -1)
- goto err;
-#else
- ret = setsockopt (fd, IPPROTO_TCP, TCP_KEEPIDLE, &keepalive_intvl,
- sizeof (keepalive_intvl));
- if (ret == -1)
- goto err;
-
- ret = setsockopt (fd, IPPROTO_TCP, TCP_KEEPINTVL, &keepalive_intvl,
- sizeof (keepalive_intvl));
- if (ret == -1)
- goto err;
-#endif
-
-done:
- gf_log ("", GF_LOG_TRACE, "Keep-alive enabled for socket %d, interval "
- "%d", fd, keepalive_intvl);
-
-err:
- return ret;
-}
-
-
-static int
-__socket_connect_finish (int fd)
-{
- int ret = -1;
- int optval = 0;
- socklen_t optlen = sizeof (int);
-
- ret = getsockopt (fd, SOL_SOCKET, SO_ERROR, (void *)&optval, &optlen);
-
- if (ret == 0 && optval) {
- errno = optval;
- ret = -1;
- }
-
- return ret;
-}
-
-
-static void
-__socket_reset (transport_t *this)
-{
- socket_private_t *priv = NULL;
-
- priv = this->private;
-
- /* TODO: use mem-pool on incoming data */
-
- if (priv->incoming.hdr_p)
- GF_FREE (priv->incoming.hdr_p);
-
- if (priv->incoming.iobuf)
- iobuf_unref (priv->incoming.iobuf);
-
- memset (&priv->incoming, 0, sizeof (priv->incoming));
-
- event_unregister (this->xl->ctx->event_pool, priv->sock, priv->idx);
- close (priv->sock);
- priv->sock = -1;
- priv->idx = -1;
- priv->connected = -1;
-}
-
-
-static struct ioq *
-__socket_ioq_new (transport_t *this, char *buf, int len,
- struct iovec *vector, int count, struct iobref *iobref)
-{
- socket_private_t *priv = NULL;
- struct ioq *entry = NULL;
-
- priv = this->private;
-
- /* TODO: use mem-pool */
- entry = GF_CALLOC (1, sizeof (*entry),
- gf_common_mt_ioq);
- if (!entry)
- return NULL;
-
- assert (count <= (MAX_IOVEC-2));
-
- entry->header.colonO[0] = ':';
- entry->header.colonO[1] = 'O';
- entry->header.colonO[2] = '\0';
- entry->header.version = 42;
- entry->header.size1 = hton32 (len);
- entry->header.size2 = hton32 (iov_length (vector, count));
-
- entry->vector[0].iov_base = &entry->header;
- entry->vector[0].iov_len = sizeof (entry->header);
- entry->count++;
-
- entry->vector[1].iov_base = buf;
- entry->vector[1].iov_len = len;
- entry->count++;
-
- if (vector && count) {
- memcpy (&entry->vector[2], vector, sizeof (*vector) * count);
- entry->count += count;
- }
-
- entry->pending_vector = entry->vector;
- entry->pending_count = entry->count;
-
- if (iobref)
- entry->iobref = iobref_ref (iobref);
-
- entry->buf = buf;
-
- INIT_LIST_HEAD (&entry->list);
-
- return entry;
-}
-
-
-static void
-__socket_ioq_entry_free (struct ioq *entry)
-{
- list_del_init (&entry->list);
- if (entry->iobref)
- iobref_unref (entry->iobref);
-
- /* TODO: use mem-pool */
- GF_FREE (entry->buf);
-
- /* TODO: use mem-pool */
- GF_FREE (entry);
-}
-
-
-static void
-__socket_ioq_flush (transport_t *this)
-{
- socket_private_t *priv = NULL;
- struct ioq *entry = NULL;
-
- priv = this->private;
-
- while (!list_empty (&priv->ioq)) {
- entry = priv->ioq_next;
- __socket_ioq_entry_free (entry);
- }
-
- return;
-}
-
-
-static int
-__socket_ioq_churn_entry (transport_t *this, struct ioq *entry)
-{
- int ret = -1;
-
- ret = __socket_writev (this, entry->pending_vector,
- entry->pending_count,
- &entry->pending_vector,
- &entry->pending_count);
-
- if (ret == 0) {
- /* current entry was completely written */
- assert (entry->pending_count == 0);
- __socket_ioq_entry_free (entry);
- }
-
- return ret;
-}
-
-
-static int
-__socket_ioq_churn (transport_t *this)
-{
- socket_private_t *priv = NULL;
- int ret = 0;
- struct ioq *entry = NULL;
-
- priv = this->private;
-
- while (!list_empty (&priv->ioq)) {
- /* pick next entry */
- entry = priv->ioq_next;
-
- ret = __socket_ioq_churn_entry (this, entry);
-
- if (ret != 0)
- break;
- }
-
- if (list_empty (&priv->ioq)) {
- /* all pending writes done, not interested in POLLOUT */
- priv->idx = event_select_on (this->xl->ctx->event_pool,
- priv->sock, priv->idx, -1, 0);
- }
-
- return ret;
-}
-
-
-static int
-socket_event_poll_err (transport_t *this)
-{
- socket_private_t *priv = NULL;
- int ret = -1;
-
- priv = this->private;
-
- pthread_mutex_lock (&priv->lock);
- {
- __socket_ioq_flush (this);
- __socket_reset (this);
- }
- pthread_mutex_unlock (&priv->lock);
-
- xlator_notify (this->xl, GF_EVENT_POLLERR, this);
-
- return ret;
-}
-
-
-static int
-socket_event_poll_out (transport_t *this)
-{
- socket_private_t *priv = NULL;
- int ret = -1;
-
- priv = this->private;
-
- pthread_mutex_lock (&priv->lock);
- {
- if (priv->connected == 1) {
- ret = __socket_ioq_churn (this);
-
- if (ret == -1) {
- __socket_disconnect (this);
- }
- }
- }
- pthread_mutex_unlock (&priv->lock);
-
- xlator_notify (this->xl, GF_EVENT_POLLOUT, this);
-
- return ret;
-}
-
-
-static int
-__socket_proto_validate_header (transport_t *this,
- struct socket_header *header,
- size_t *size1_p, size_t *size2_p)
-{
- size_t size1 = 0;
- size_t size2 = 0;
-
- if (strcmp (header->colonO, ":O")) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "socket header signature does not match :O (%x.%x.%x)",
- header->colonO[0], header->colonO[1],
- header->colonO[2]);
- return -1;
- }
-
- if (header->version != 42) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "socket header version does not match 42 != %d",
- header->version);
- return -1;
- }
-
- size1 = ntoh32 (header->size1);
- size2 = ntoh32 (header->size2);
-
- if (size1 <= 0 || size1 > 1048576) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "socket header has incorrect size1=%"GF_PRI_SIZET,
- size1);
- return -1;
- }
-
- if (size2 > (131072)) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "socket header has incorrect size2=%"GF_PRI_SIZET,
- size2);
- return -1;
- }
-
- if (size1_p)
- *size1_p = size1;
-
- if (size2_p)
- *size2_p = size2;
-
- return 0;
-}
-
-
-
-/* socket protocol state machine */
-
-static int
-__socket_proto_state_machine (transport_t *this)
-{
- int ret = -1;
- socket_private_t *priv = NULL;
- size_t size1 = 0;
- size_t size2 = 0;
- int previous_state = -1;
- struct socket_header *hdr = NULL;
- struct iobuf *iobuf = NULL;
-
-
- priv = this->private;
-
- while (priv->incoming.state != SOCKET_PROTO_STATE_COMPLETE) {
- /* debug check against infinite loops */
- if (previous_state == priv->incoming.state) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "state did not change! (%d) breaking",
- previous_state);
- ret = -1;
- goto unlock;
- }
- previous_state = priv->incoming.state;
-
- switch (priv->incoming.state) {
-
- case SOCKET_PROTO_STATE_NADA:
- priv->incoming.pending_vector =
- priv->incoming.vector;
-
- priv->incoming.pending_vector->iov_base =
- &priv->incoming.header;
-
- priv->incoming.pending_vector->iov_len =
- sizeof (struct socket_header);
-
- priv->incoming.state =
- SOCKET_PROTO_STATE_HEADER_COMING;
- break;
-
- case SOCKET_PROTO_STATE_HEADER_COMING:
-
- ret = __socket_readv (this,
- priv->incoming.pending_vector, 1,
- &priv->incoming.pending_vector,
- NULL);
- if (ret == 0) {
- priv->incoming.state =
- SOCKET_PROTO_STATE_HEADER_CAME;
- break;
- }
-
- if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_TRACE,
- "read (%s) in state %d (%s)",
- strerror (errno),
- SOCKET_PROTO_STATE_HEADER_COMING,
- this->peerinfo.identifier);
- goto unlock;
- }
-
- if (ret > 0) {
- gf_log (this->xl->name, GF_LOG_TRACE,
- "partial header read on NB socket.");
- goto unlock;
- }
- break;
-
- case SOCKET_PROTO_STATE_HEADER_CAME:
- hdr = &priv->incoming.header;
- ret = __socket_proto_validate_header (this, hdr,
- &size1, &size2);
-
- if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "socket header validate failed (%s). "
- "possible mismatch of transport-type "
- "between server and client volumes, "
- "or version mismatch",
- this->peerinfo.identifier);
- goto unlock;
- }
-
- priv->incoming.hdrlen = size1;
- priv->incoming.buflen = size2;
-
- /* TODO: use mem-pool */
- priv->incoming.hdr_p = GF_MALLOC (size1,
- gf_common_mt_char);
- if (size2) {
- /* TODO: sanity check size2 < page size
- */
- iobuf = iobuf_get (this->xl->ctx->iobuf_pool);
- if (!iobuf) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "unable to allocate IO buffer "
- "for peer %s",
- this->peerinfo.identifier);
- ret = -ENOMEM;
- goto unlock;
- }
- priv->incoming.iobuf = iobuf;
- priv->incoming.buf_p = iobuf->ptr;
- }
-
- priv->incoming.vector[0].iov_base =
- priv->incoming.hdr_p;
-
- priv->incoming.vector[0].iov_len = size1;
-
- priv->incoming.vector[1].iov_base =
- priv->incoming.buf_p;
-
- priv->incoming.vector[1].iov_len = size2;
- priv->incoming.count = size2 ? 2 : 1;
-
- priv->incoming.pending_vector =
- priv->incoming.vector;
-
- priv->incoming.pending_count =
- priv->incoming.count;
-
- priv->incoming.state =
- SOCKET_PROTO_STATE_DATA_COMING;
- break;
-
- case SOCKET_PROTO_STATE_DATA_COMING:
-
- ret = __socket_readv (this,
- priv->incoming.pending_vector,
- priv->incoming.pending_count,
- &priv->incoming.pending_vector,
- &priv->incoming.pending_count);
- if (ret == 0) {
- priv->incoming.state =
- SOCKET_PROTO_STATE_DATA_CAME;
- break;
- }
-
- if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "read (%s) in state %d (%s)",
- strerror (errno),
- SOCKET_PROTO_STATE_DATA_COMING,
- this->peerinfo.identifier);
- goto unlock;
- }
-
- if (ret > 0) {
- gf_log (this->xl->name, GF_LOG_TRACE,
- "partial data read on NB socket");
- goto unlock;
- }
- break;
-
- case SOCKET_PROTO_STATE_DATA_CAME:
- memset (&priv->incoming.vector, 0,
- sizeof (priv->incoming.vector));
- priv->incoming.pending_vector = NULL;
- priv->incoming.pending_count = 0;
- priv->incoming.state = SOCKET_PROTO_STATE_COMPLETE;
- break;
-
- case SOCKET_PROTO_STATE_COMPLETE:
- /* not reached */
- break;
-
- default:
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "undefined state reached: %d",
- priv->incoming.state);
- goto unlock;
- }
- }
-unlock:
-
- return ret;
-}
-
-
-static int
-socket_proto_state_machine (transport_t *this)
-{
- socket_private_t *priv = NULL;
- int ret = 0;
-
- priv = this->private;
-
- pthread_mutex_lock (&priv->lock);
- {
- ret = __socket_proto_state_machine (this);
- }
- pthread_mutex_unlock (&priv->lock);
-
- return ret;
-}
-
-
-static int
-socket_event_poll_in (transport_t *this)
-{
- int ret = -1;
-
- ret = socket_proto_state_machine (this);
-
- /* call POLLIN on xlator even if complete block is not received,
- just to keep the last_received timestamp ticking */
-
- if (ret == 0)
- ret = xlator_notify (this->xl, GF_EVENT_POLLIN, this);
-
- return ret;
-}
-
-
-static int
-socket_connect_finish (transport_t *this)
-{
- int ret = -1;
- socket_private_t *priv = NULL;
- int event = -1;
- char notify_xlator = 0;
-
- priv = this->private;
-
- pthread_mutex_lock (&priv->lock);
- {
- if (priv->connected)
- goto unlock;
-
- ret = __socket_connect_finish (priv->sock);
-
- if (ret == -1 && errno == EINPROGRESS)
- ret = 1;
-
- if (ret == -1 && errno != EINPROGRESS) {
- if (!priv->connect_finish_log) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "connection to %s failed (%s)",
- this->peerinfo.identifier,
- strerror (errno));
- priv->connect_finish_log = 1;
- }
- __socket_disconnect (this);
- notify_xlator = 1;
- event = GF_EVENT_POLLERR;
- goto unlock;
- }
-
- if (ret == 0) {
- notify_xlator = 1;
-
- this->myinfo.sockaddr_len =
- sizeof (this->myinfo.sockaddr);
-
- ret = getsockname (priv->sock,
- SA (&this->myinfo.sockaddr),
- &this->myinfo.sockaddr_len);
- if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "getsockname on (%d) failed (%s)",
- priv->sock, strerror (errno));
- __socket_disconnect (this);
- event = GF_EVENT_POLLERR;
- goto unlock;
- }
-
- priv->connected = 1;
- priv->connect_finish_log = 0;
- event = GF_EVENT_CHILD_UP;
- gf_get_transport_identifiers (this);
- }
- }
-unlock:
- pthread_mutex_unlock (&priv->lock);
-
- if (notify_xlator)
- xlator_notify (this->xl, event, this);
-
- return 0;
-}
-
-
-static int
-socket_event_handler (int fd, int idx, void *data,
- int poll_in, int poll_out, int poll_err)
-{
- transport_t *this = NULL;
- socket_private_t *priv = NULL;
- int ret = 0;
-
- this = data;
- priv = this->private;
-
- pthread_mutex_lock (&priv->lock);
- {
- priv->idx = idx;
- }
- pthread_mutex_unlock (&priv->lock);
-
- if (!priv->connected) {
- ret = socket_connect_finish (this);
- }
-
- if (!ret && poll_out) {
- ret = socket_event_poll_out (this);
- }
-
- if (!ret && poll_in) {
- ret = socket_event_poll_in (this);
- }
-
- if (ret < 0 || poll_err) {
- socket_event_poll_err (this);
- transport_unref (this);
- }
-
- return 0;
-}
-
-
-static int
-socket_server_event_handler (int fd, int idx, void *data,
- int poll_in, int poll_out, int poll_err)
-{
- transport_t *this = NULL;
- socket_private_t *priv = NULL;
- int ret = 0;
- int new_sock = -1;
- transport_t *new_trans = NULL;
- struct sockaddr_storage new_sockaddr = {0, };
- socklen_t addrlen = sizeof (new_sockaddr);
- socket_private_t *new_priv = NULL;
- glusterfs_ctx_t *ctx = NULL;
-
- this = data;
- priv = this->private;
- ctx = this->xl->ctx;
-
- pthread_mutex_lock (&priv->lock);
- {
- priv->idx = idx;
-
- if (poll_in) {
- new_sock = accept (priv->sock, SA (&new_sockaddr),
- &addrlen);
-
- if (new_sock == -1)
- goto unlock;
-
- if (!priv->bio) {
- ret = __socket_nonblock (new_sock);
-
- if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "NBIO on %d failed (%s)",
- new_sock, strerror (errno));
- close (new_sock);
- goto unlock;
- }
- }
-
- if (priv->nodelay) {
- ret = __socket_nodelay (new_sock);
- if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "setsockopt() failed for "
- "NODELAY (%s)",
- strerror (errno));
- }
- }
-
- if (priv->keepalive) {
- ret = __socket_keepalive (new_sock,
- priv->keepaliveintvl);
- if (ret == -1)
- gf_log (this->xl->name, GF_LOG_ERROR,
- "Failed to set keep-alive: %s",
- strerror (errno));
- }
-
- new_trans = GF_CALLOC (1, sizeof (*new_trans),
- gf_common_mt_transport_t);
- new_trans->xl = this->xl;
- new_trans->fini = this->fini;
-
- memcpy (&new_trans->peerinfo.sockaddr, &new_sockaddr,
- addrlen);
- new_trans->peerinfo.sockaddr_len = addrlen;
-
- new_trans->myinfo.sockaddr_len =
- sizeof (new_trans->myinfo.sockaddr);
-
- ret = getsockname (new_sock,
- SA (&new_trans->myinfo.sockaddr),
- &new_trans->myinfo.sockaddr_len);
- if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "getsockname on %d failed (%s)",
- new_sock, strerror (errno));
- close (new_sock);
- goto unlock;
- }
-
- gf_get_transport_identifiers (new_trans);
- socket_init (new_trans);
- new_trans->ops = this->ops;
- new_trans->init = this->init;
- new_trans->fini = this->fini;
-
- new_priv = new_trans->private;
-
- pthread_mutex_lock (&new_priv->lock);
- {
- new_priv->sock = new_sock;
- new_priv->connected = 1;
-
- transport_ref (new_trans);
- new_priv->idx =
- event_register (ctx->event_pool,
- new_sock,
- socket_event_handler,
- new_trans, 1, 0);
-
- if (new_priv->idx == -1)
- ret = -1;
- }
- pthread_mutex_unlock (&new_priv->lock);
- }
- }
-unlock:
- pthread_mutex_unlock (&priv->lock);
-
- return ret;
-}
-
-
-static int
-socket_disconnect (transport_t *this)
-{
- socket_private_t *priv = NULL;
- int ret = -1;
-
- priv = this->private;
-
- pthread_mutex_lock (&priv->lock);
- {
- ret = __socket_disconnect (this);
- }
- pthread_mutex_unlock (&priv->lock);
-
- return ret;
-}
-
-
-static int
-socket_connect (transport_t *this)
-{
- int ret = -1;
- int sock = -1;
- socket_private_t *priv = NULL;
- struct sockaddr_storage sockaddr = {0, };
- socklen_t sockaddr_len = 0;
- glusterfs_ctx_t *ctx = NULL;
- sa_family_t sa_family = {0, };
-
- priv = this->private;
- ctx = this->xl->ctx;
-
- if (!priv) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "connect() called on uninitialized transport");
- goto err;
- }
-
- pthread_mutex_lock (&priv->lock);
- {
- sock = priv->sock;
- }
- pthread_mutex_unlock (&priv->lock);
-
- if (sock != -1) {
- gf_log (this->xl->name, GF_LOG_TRACE,
- "connect () called on transport already connected");
- ret = 0;
- goto err;
- }
-
- ret = gf_socket_client_get_remote_sockaddr (this, SA (&sockaddr),
- &sockaddr_len, &sa_family);
- if (ret == -1) {
- /* logged inside client_get_remote_sockaddr */
- goto err;
- }
-
- pthread_mutex_lock (&priv->lock);
- {
- if (priv->sock != -1) {
- gf_log (this->xl->name, GF_LOG_TRACE,
- "connect() -- already connected");
- goto unlock;
- }
-
- memcpy (&this->peerinfo.sockaddr, &sockaddr, sockaddr_len);
- this->peerinfo.sockaddr_len = sockaddr_len;
-
- priv->sock = socket (sa_family, SOCK_STREAM, 0);
- if (priv->sock == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "socket creation failed (%s)",
- strerror (errno));
- goto unlock;
- }
-
- /* Cant help if setting socket options fails. We can continue
- * working nonetheless.
- */
- if (setsockopt (priv->sock, SOL_SOCKET, SO_RCVBUF,
- &priv->windowsize,
- sizeof (priv->windowsize)) < 0) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "setting receive window size failed: %d: %d: "
- "%s", priv->sock, priv->windowsize,
- strerror (errno));
- }
-
- if (setsockopt (priv->sock, SOL_SOCKET, SO_SNDBUF,
- &priv->windowsize,
- sizeof (priv->windowsize)) < 0) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "setting send window size failed: %d: %d: "
- "%s", priv->sock, priv->windowsize,
- strerror (errno));
- }
-
-
- if (priv->nodelay && priv->lowlat) {
- ret = __socket_nodelay (priv->sock);
- if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "setsockopt() failed for NODELAY (%s)",
- strerror (errno));
- }
- }
-
- if (!priv->bio) {
- ret = __socket_nonblock (priv->sock);
-
- if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "NBIO on %d failed (%s)",
- priv->sock, strerror (errno));
- close (priv->sock);
- priv->sock = -1;
- goto unlock;
- }
- }
-
- if (priv->keepalive) {
- ret = __socket_keepalive (priv->sock,
- priv->keepaliveintvl);
- if (ret == -1)
- gf_log (this->xl->name, GF_LOG_ERROR,
- "Failed to set keep-alive: %s",
- strerror (errno));
- }
-
- SA (&this->myinfo.sockaddr)->sa_family =
- SA (&this->peerinfo.sockaddr)->sa_family;
-
- ret = gf_client_bind (this, SA (&this->myinfo.sockaddr),
- &this->myinfo.sockaddr_len, priv->sock);
- if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_WARNING,
- "client bind failed: %s", strerror (errno));
- close (priv->sock);
- priv->sock = -1;
- goto unlock;
- }
-
- ret = connect (priv->sock, SA (&this->peerinfo.sockaddr),
- this->peerinfo.sockaddr_len);
-
- if (ret == -1 && errno != EINPROGRESS) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "connection attempt failed (%s)",
- strerror (errno));
- close (priv->sock);
- priv->sock = -1;
- goto unlock;
- }
-
- priv->connected = 0;
-
- transport_ref (this);
-
- priv->idx = event_register (ctx->event_pool, priv->sock,
- socket_event_handler, this, 1, 1);
- if (priv->idx == -1)
- ret = -1;
- }
-unlock:
- pthread_mutex_unlock (&priv->lock);
-
-err:
- return ret;
-}
-
-
-static int
-socket_listen (transport_t *this)
-{
- socket_private_t * priv = NULL;
- int ret = -1;
- int sock = -1;
- struct sockaddr_storage sockaddr;
- socklen_t sockaddr_len;
- peer_info_t *myinfo = NULL;
- glusterfs_ctx_t *ctx = NULL;
- sa_family_t sa_family = {0, };
-
- priv = this->private;
- myinfo = &this->myinfo;
- ctx = this->xl->ctx;
-
- pthread_mutex_lock (&priv->lock);
- {
- sock = priv->sock;
- }
- pthread_mutex_unlock (&priv->lock);
-
- if (sock != -1) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "alreading listening");
- return ret;
- }
-
- ret = gf_socket_server_get_local_sockaddr (this, SA (&sockaddr),
- &sockaddr_len, &sa_family);
- if (ret == -1) {
- return ret;
- }
-
- pthread_mutex_lock (&priv->lock);
- {
- if (priv->sock != -1) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "already listening");
- goto unlock;
- }
-
- memcpy (&myinfo->sockaddr, &sockaddr, sockaddr_len);
- myinfo->sockaddr_len = sockaddr_len;
-
- priv->sock = socket (sa_family, SOCK_STREAM, 0);
-
- if (priv->sock == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "socket creation failed (%s)",
- strerror (errno));
- goto unlock;
- }
-
- /* Cant help if setting socket options fails. We can continue
- * working nonetheless.
- */
- if (setsockopt (priv->sock, SOL_SOCKET, SO_RCVBUF,
- &priv->windowsize,
- sizeof (priv->windowsize)) < 0) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "setting receive window size failed: %d: %d: "
- "%s", priv->sock, priv->windowsize,
- strerror (errno));
- }
-
- if (setsockopt (priv->sock, SOL_SOCKET, SO_SNDBUF,
- &priv->windowsize,
- sizeof (priv->windowsize)) < 0) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "setting send window size failed: %d: %d: "
- "%s", priv->sock, priv->windowsize,
- strerror (errno));
- }
-
- if (priv->nodelay) {
- ret = __socket_nodelay (priv->sock);
- if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "setsockopt() failed for NODELAY (%s)",
- strerror (errno));
- }
- }
-
- if (!priv->bio) {
- ret = __socket_nonblock (priv->sock);
-
- if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "NBIO on %d failed (%s)",
- priv->sock, strerror (errno));
- close (priv->sock);
- priv->sock = -1;
- goto unlock;
- }
- }
-
- ret = __socket_server_bind (this);
-
- if (ret == -1) {
- /* logged inside __socket_server_bind() */
- close (priv->sock);
- priv->sock = -1;
- goto unlock;
- }
-
- ret = listen (priv->sock, 10);
-
- if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "could not set socket %d to listen mode (%s)",
- priv->sock, strerror (errno));
- close (priv->sock);
- priv->sock = -1;
- goto unlock;
- }
-
- transport_ref (this);
-
- priv->idx = event_register (ctx->event_pool, priv->sock,
- socket_server_event_handler,
- this, 1, 0);
-
- if (priv->idx == -1) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "could not register socket %d with events",
- priv->sock);
- ret = -1;
- close (priv->sock);
- priv->sock = -1;
- goto unlock;
- }
- }
-unlock:
- pthread_mutex_unlock (&priv->lock);
-
- return ret;
-}
-
-
-static int
-socket_receive (transport_t *this, char **hdr_p, size_t *hdrlen_p,
- struct iobuf **iobuf_p)
-{
- socket_private_t *priv = NULL;
- int ret = -1;
-
- priv = this->private;
-
- pthread_mutex_lock (&priv->lock);
- {
- if (priv->connected != 1) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "socket not connected to receive");
- goto unlock;
- }
-
- if (!hdr_p || !hdrlen_p || !iobuf_p) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "bad parameters %p %p %p",
- hdr_p, hdrlen_p, iobuf_p);
- goto unlock;
- }
-
- if (priv->incoming.state == SOCKET_PROTO_STATE_COMPLETE) {
- *hdr_p = priv->incoming.hdr_p;
- *hdrlen_p = priv->incoming.hdrlen;
- *iobuf_p = priv->incoming.iobuf;
-
- memset (&priv->incoming, 0, sizeof (priv->incoming));
- priv->incoming.state = SOCKET_PROTO_STATE_NADA;
-
- ret = 0;
- }
- }
-unlock:
- pthread_mutex_unlock (&priv->lock);
-
- return ret;
-}
-
-
-/* TODO: implement per transfer limit */
-static int
-socket_submit (transport_t *this, char *buf, int len,
- struct iovec *vector, int count,
- struct iobref *iobref)
-{
- socket_private_t *priv = NULL;
- int ret = -1;
- char need_poll_out = 0;
- char need_append = 1;
- struct ioq *entry = NULL;
- glusterfs_ctx_t *ctx = NULL;
-
- priv = this->private;
- ctx = this->xl->ctx;
-
- pthread_mutex_lock (&priv->lock);
- {
- if (priv->connected != 1) {
- if (!priv->submit_log && !priv->connect_finish_log) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "not connected (priv->connected = %d)",
- priv->connected);
- priv->submit_log = 1;
- }
- goto unlock;
- }
-
- priv->submit_log = 0;
- entry = __socket_ioq_new (this, buf, len, vector, count, iobref);
- if (!entry)
- goto unlock;
-
- if (list_empty (&priv->ioq)) {
- ret = __socket_ioq_churn_entry (this, entry);
-
- if (ret == 0)
- need_append = 0;
-
- if (ret > 0)
- need_poll_out = 1;
- }
-
- if (need_append) {
- list_add_tail (&entry->list, &priv->ioq);
- ret = 0;
- }
-
- if (need_poll_out) {
- /* first entry to wait. continue writing on POLLOUT */
- priv->idx = event_select_on (ctx->event_pool,
- priv->sock,
- priv->idx, -1, 1);
- }
- }
-unlock:
- pthread_mutex_unlock (&priv->lock);
-
- return ret;
-}
-
-
-struct transport_ops tops = {
- .listen = socket_listen,
- .connect = socket_connect,
- .disconnect = socket_disconnect,
- .submit = socket_submit,
- .receive = socket_receive
-};
-
-
-static int
-socket_init (transport_t *this)
-{
- socket_private_t *priv = NULL;
- gf_boolean_t tmp_bool = 0;
- uint64_t windowsize = GF_DEFAULT_SOCKET_WINDOW_SIZE;
- char *optstr = NULL;
- uint32_t keepalive = 0;
-
- if (this->private) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "double init attempted");
- return -1;
- }
-
- priv = GF_CALLOC (1, sizeof (*priv),
- gf_common_mt_socket_private_t);
- if (!priv) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "calloc (1, %"GF_PRI_SIZET") returned NULL",
- sizeof (*priv));
- return -1;
- }
-
- pthread_mutex_init (&priv->lock, NULL);
-
- priv->sock = -1;
- priv->idx = -1;
- priv->connected = -1;
-
- INIT_LIST_HEAD (&priv->ioq);
-
- if (dict_get (this->xl->options, "non-blocking-io")) {
- optstr = data_to_str (dict_get (this->xl->options,
- "non-blocking-io"));
-
- if (gf_string2boolean (optstr, &tmp_bool) == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "'non-blocking-io' takes only boolean options,"
- " not taking any action");
- tmp_bool = 1;
- }
- priv->bio = 0;
- if (!tmp_bool) {
- priv->bio = 1;
- gf_log (this->xl->name, GF_LOG_WARNING,
- "disabling non-blocking IO");
- }
- }
-
- optstr = NULL;
-
- // By default, we enable NODELAY
- priv->nodelay = 1;
- if (dict_get (this->xl->options, "transport.socket.nodelay")) {
- optstr = data_to_str (dict_get (this->xl->options,
- "transport.socket.nodelay"));
-
- if (gf_string2boolean (optstr, &tmp_bool) == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "'transport.socket.nodelay' takes only "
- "boolean options, not taking any action");
- tmp_bool = 1;
- }
- if (!tmp_bool) {
- priv->nodelay = 0;
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "disabling nodelay");
- }
- }
-
-
- optstr = NULL;
- if (dict_get_str (this->xl->options, "transport.window-size",
- &optstr) == 0) {
- if (gf_string2bytesize (optstr, &windowsize) != 0) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "invalid number format: %s", optstr);
- return -1;
- }
- }
-
- optstr = NULL;
-
- if (dict_get_str (this->xl->options, "transport.socket.lowlat",
- &optstr) == 0) {
- priv->lowlat = 1;
- }
-
- /* Enable Keep-alive by default. */
- priv->keepalive = 1;
- priv->keepaliveintvl = GF_USE_DEFAULT_KEEPALIVE;
- if (dict_get_str (this->xl->options, "transport.socket.keepalive",
- &optstr) == 0) {
- if (gf_string2boolean (optstr, &tmp_bool) == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "'transport.socket.keepalive' takes only "
- "boolean options, not taking any action");
- tmp_bool = 1;
- }
-
- if (!tmp_bool)
- priv->keepalive = 0;
-
- }
-
- if (dict_get_uint32 (this->xl->options,
- "transport.socket.keepalive-interval",
- &keepalive) == 0) {
- priv->keepaliveintvl = keepalive;
- }
-
- priv->windowsize = (int)windowsize;
- this->private = priv;
-
- return 0;
-}
-
-
-void
-fini (transport_t *this)
-{
- socket_private_t *priv = this->private;
-
- gf_log (this->xl->name, GF_LOG_TRACE,
- "transport %p destroyed", this);
-
- pthread_mutex_destroy (&priv->lock);
- GF_FREE (priv);
-}
-
-
-int32_t
-init (transport_t *this)
-{
- int ret = -1;
-
- ret = socket_init (this);
-
- if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_DEBUG, "socket_init() failed");
- }
-
- return ret;
-}
-
-struct volume_options options[] = {
- { .key = {"remote-port",
- "transport.remote-port",
- "transport.socket.remote-port"},
- .type = GF_OPTION_TYPE_INT
- },
- { .key = {"transport.socket.listen-port", "listen-port"},
- .type = GF_OPTION_TYPE_INT
- },
- { .key = {"transport.socket.bind-address", "bind-address" },
- .type = GF_OPTION_TYPE_INTERNET_ADDRESS
- },
- { .key = {"transport.socket.connect-path", "connect-path"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"transport.socket.bind-path", "bind-path"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"transport.socket.listen-path", "listen-path"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = { "transport.address-family",
- "address-family" },
- .value = {"inet", "inet6", "inet/inet6", "inet6/inet",
- "unix", "inet-sdp" },
- .type = GF_OPTION_TYPE_STR
- },
-
- { .key = {"non-blocking-io"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"transport.window-size"},
- .type = GF_OPTION_TYPE_SIZET,
- .min = GF_MIN_SOCKET_WINDOW_SIZE,
- .max = GF_MAX_SOCKET_WINDOW_SIZE,
- },
- { .key = {"transport.socket.nodelay"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"transport.socket.lowlat"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"transport.socket.keepalive"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"transport.socket.keepalive-interval"},
- .type = GF_OPTION_TYPE_INT
- },
- { .key = {NULL} }
-};
-
diff --git a/xlators/protocol/legacy/transport/socket/src/socket.h b/xlators/protocol/legacy/transport/socket/src/socket.h
deleted file mode 100644
index e02801a5b1c..00000000000
--- a/xlators/protocol/legacy/transport/socket/src/socket.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _SOCKET_H
-#define _SOCKET_H
-
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "event.h"
-#include "transport.h"
-#include "logging.h"
-#include "dict.h"
-#include "mem-pool.h"
-#include "socket-mem-types.h"
-
-#ifndef MAX_IOVEC
-#define MAX_IOVEC 16
-#endif /* MAX_IOVEC */
-
-#define GF_DEFAULT_SOCKET_LISTEN_PORT 6996
-
-/* This is the size set through setsockopt for
- * both the TCP receive window size and the
- * send buffer size.
- * Till the time iobuf size becomes configurable, this size is set to include
- * two iobufs + the GlusterFS protocol headers.
- * Linux allows us to over-ride the max values for the system.
- * Should we over-ride them? Because if we set a value larger than the default
- * setsockopt will fail. Having larger values might be beneficial for
- * IB links.
- */
-#define GF_DEFAULT_SOCKET_WINDOW_SIZE (512 * GF_UNIT_KB)
-#define GF_MAX_SOCKET_WINDOW_SIZE (1 * GF_UNIT_MB)
-#define GF_MIN_SOCKET_WINDOW_SIZE (128 * GF_UNIT_KB)
-
-#define GF_USE_DEFAULT_KEEPALIVE (-1)
-
-typedef enum {
- SOCKET_PROTO_STATE_NADA = 0,
- SOCKET_PROTO_STATE_HEADER_COMING,
- SOCKET_PROTO_STATE_HEADER_CAME,
- SOCKET_PROTO_STATE_DATA_COMING,
- SOCKET_PROTO_STATE_DATA_CAME,
- SOCKET_PROTO_STATE_COMPLETE,
-} socket_proto_state_t;
-
-struct socket_header {
- char colonO[3];
- uint32_t size1;
- uint32_t size2;
- char version;
-} __attribute__((packed));
-
-
-struct ioq {
- union {
- struct list_head list;
- struct {
- struct ioq *next;
- struct ioq *prev;
- };
- };
- struct socket_header header;
- struct iovec vector[MAX_IOVEC];
- int count;
- struct iovec *pending_vector;
- int pending_count;
- char *buf;
- struct iobref *iobref;
-};
-
-
-typedef struct {
- int32_t sock;
- int32_t idx;
- unsigned char connected; // -1 = not connected. 0 = in progress. 1 = connected
- char bio;
- char connect_finish_log;
- char submit_log;
- union {
- struct list_head ioq;
- struct {
- struct ioq *ioq_next;
- struct ioq *ioq_prev;
- };
- };
- struct {
- int state;
- struct socket_header header;
- char *hdr_p;
- size_t hdrlen;
- struct iobuf *iobuf;
- char *buf_p;
- size_t buflen;
- struct iovec vector[2];
- int count;
- struct iovec *pending_vector;
- int pending_count;
- } incoming;
- pthread_mutex_t lock;
- int windowsize;
- char lowlat;
- char nodelay;
- int keepalive;
- int keepaliveintvl;
-} socket_private_t;
-
-
-#endif
diff --git a/xlators/protocol/lib/src/Makefile.am b/xlators/protocol/lib/src/Makefile.am
deleted file mode 100644
index 70cc069e8f1..00000000000
--- a/xlators/protocol/lib/src/Makefile.am
+++ /dev/null
@@ -1,14 +0,0 @@
-lib_LTLIBRARIES = libgfproto1.la
-
-libgfproto1_la_CFLAGS = -fPIC -Wall -g -shared -nostartfiles $(GF_CFLAGS) $(GF_DARWIN_LIBGLUSTERFS_CFLAGS)
-
-libgfproto1_la_CPPFLAGS = -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 -D_GNU_SOURCE \
- -D$(GF_HOST_OS) -DLIBDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/auth\" \
- -I$(CONTRIBDIR)/rbtree -I$(top_srcdir)/libglusterfs/src/ \
- -I$(top_srcdir)/rpc/rpc-lib/src/
-
-libgfproto1_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-libgfproto1_la_SOURCES = authenticate.c msg-xdr.c glusterfs-xdr.c
-
-noinst_HEADERS = authenticate.h protocol-common.h msg-xdr.h glusterfs-xdr.h
diff --git a/xlators/protocol/lib/src/authenticate.c b/xlators/protocol/lib/src/authenticate.c
deleted file mode 100644
index 5205b54df61..00000000000
--- a/xlators/protocol/lib/src/authenticate.c
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- Copyright (c) 2007-2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#ifndef _GNU_SOURCE
-#define _GNU_SOURCE
-#endif
-
-#include <stdio.h>
-#include <dlfcn.h>
-#include <errno.h>
-#include "authenticate.h"
-
-static void
-init (dict_t *this,
- char *key,
- data_t *value,
- void *data)
-{
- void *handle = NULL;
- char *auth_file = NULL;
- auth_handle_t *auth_handle = NULL;
- auth_fn_t authenticate = NULL;
- int *error = NULL;
- int ret = 0;
-
- /* It gets over written */
- error = data;
-
- if (!strncasecmp (key, "ip", strlen ("ip"))) {
- gf_log ("authenticate", GF_LOG_ERROR,
- "AUTHENTICATION MODULE \"IP\" HAS BEEN REPLACED "
- "BY \"ADDR\"");
- dict_set (this, key, data_from_dynptr (NULL, 0));
- /* TODO: 1.3.x backword compatibility */
- // *error = -1;
- // return;
- key = "addr";
- }
-
- ret = gf_asprintf (&auth_file, "%s/%s.so", LIBDIR, key);
- if (-1 == ret) {
- gf_log ("authenticate", GF_LOG_ERROR, "asprintf failed");
- dict_set (this, key, data_from_dynptr (NULL, 0));
- *error = -1;
- return;
- }
-
- handle = dlopen (auth_file, RTLD_LAZY);
- if (!handle) {
- gf_log ("authenticate", GF_LOG_ERROR, "dlopen(%s): %s\n",
- auth_file, dlerror ());
- dict_set (this, key, data_from_dynptr (NULL, 0));
- GF_FREE (auth_file);
- *error = -1;
- return;
- }
- GF_FREE (auth_file);
-
- authenticate = dlsym (handle, "gf_auth");
- if (!authenticate) {
- gf_log ("authenticate", GF_LOG_ERROR,
- "dlsym(gf_auth) on %s\n", dlerror ());
- dict_set (this, key, data_from_dynptr (NULL, 0));
- *error = -1;
- return;
- }
-
- auth_handle = GF_CALLOC (1, sizeof (*auth_handle),
- gf_common_mt_auth_handle_t);
- if (!auth_handle) {
- gf_log ("authenticate", GF_LOG_ERROR, "Out of memory");
- dict_set (this, key, data_from_dynptr (NULL, 0));
- *error = -1;
- return;
- }
- auth_handle->vol_opt = GF_CALLOC (1, sizeof (volume_opt_list_t),
- gf_common_mt_volume_opt_list_t);
- auth_handle->vol_opt->given_opt = dlsym (handle, "options");
- if (auth_handle->vol_opt->given_opt == NULL) {
- gf_log ("authenticate", GF_LOG_DEBUG,
- "volume option validation not specified");
- }
-
- auth_handle->authenticate = authenticate;
- auth_handle->handle = handle;
-
- dict_set (this, key,
- data_from_dynptr (auth_handle, sizeof (*auth_handle)));
-}
-
-static void
-fini (dict_t *this,
- char *key,
- data_t *value,
- void *data)
-{
- auth_handle_t *handle = data_to_ptr (value);
- if (handle) {
- dlclose (handle->handle);
- }
-}
-
-int32_t
-gf_auth_init (xlator_t *xl, dict_t *auth_modules)
-{
- int ret = 0;
- auth_handle_t *handle = NULL;
- data_pair_t *pair = NULL;
- dict_foreach (auth_modules, init, &ret);
- if (!ret) {
- pair = auth_modules->members_list;
- while (pair) {
- handle = data_to_ptr (pair->value);
- if (handle) {
- list_add_tail (&(handle->vol_opt->list),
- &(xl->volume_options));
- if (-1 ==
- validate_xlator_volume_options (xl,
- handle->vol_opt->given_opt)) {
- gf_log ("authenticate", GF_LOG_ERROR,
- "volume option validation "
- "failed");
- ret = -1;
- }
- }
- pair = pair->next;
- }
- }
- if (ret) {
- gf_log (xl->name, GF_LOG_ERROR, "authentication init failed");
- dict_foreach (auth_modules, fini, &ret);
- ret = -1;
- }
- return ret;
-}
-
-static dict_t *__input_params;
-static dict_t *__config_params;
-
-void
-map (dict_t *this,
- char *key,
- data_t *value,
- void *data)
-{
- dict_t *res = data;
- auth_fn_t authenticate;
- auth_handle_t *handle = NULL;
-
- if (value && (handle = data_to_ptr (value)) &&
- (authenticate = handle->authenticate)) {
- dict_set (res, key,
- int_to_data (authenticate (__input_params,
- __config_params)));
- } else {
- dict_set (res, key, int_to_data (AUTH_DONT_CARE));
- }
-}
-
-void
-reduce (dict_t *this,
- char *key,
- data_t *value,
- void *data)
-{
- int64_t val = 0;
- int64_t *res = data;
- if (!data)
- return;
-
- val = data_to_int64 (value);
- switch (val)
- {
- case AUTH_ACCEPT:
- if (AUTH_DONT_CARE == *res)
- *res = AUTH_ACCEPT;
- break;
-
- case AUTH_REJECT:
- *res = AUTH_REJECT;
- break;
-
- case AUTH_DONT_CARE:
- break;
- }
-}
-
-
-auth_result_t
-gf_authenticate (dict_t *input_params,
- dict_t *config_params,
- dict_t *auth_modules)
-{
- dict_t *results = NULL;
- int64_t result = AUTH_DONT_CARE;
-
- results = get_new_dict ();
- __input_params = input_params;
- __config_params = config_params;
-
- dict_foreach (auth_modules, map, results);
-
- dict_foreach (results, reduce, &result);
- if (AUTH_DONT_CARE == result) {
- data_t *peerinfo_data = dict_get (input_params, "peer-info-name");
- char *name = NULL;
-
- if (peerinfo_data) {
- name = peerinfo_data->data;
- }
-
- gf_log ("auth", GF_LOG_ERROR,
- "no authentication module is interested in "
- "accepting remote-client %s", name);
- result = AUTH_REJECT;
- }
-
- dict_destroy (results);
- return result;
-}
-
-void
-gf_auth_fini (dict_t *auth_modules)
-{
- int32_t dummy;
-
- dict_foreach (auth_modules, fini, &dummy);
-}
diff --git a/xlators/protocol/lib/src/authenticate.h b/xlators/protocol/lib/src/authenticate.h
deleted file mode 100644
index 93d73741b0a..00000000000
--- a/xlators/protocol/lib/src/authenticate.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- Copyright (c) 2007-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _AUTHENTICATE_H
-#define _AUTHENTICATE_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#ifndef _GNU_SOURCE
-#define _GNU_SOURCE
-#endif
-
-#include <stdio.h>
-#include <fnmatch.h>
-#include "dict.h"
-#include "compat.h"
-#include "list.h"
-#include "xlator.h"
-
-typedef enum {
- AUTH_ACCEPT,
- AUTH_REJECT,
- AUTH_DONT_CARE
-} auth_result_t;
-
-typedef auth_result_t (*auth_fn_t) (dict_t *input_params,
- dict_t *config_params);
-
-typedef struct {
- void *handle;
- auth_fn_t authenticate;
- volume_opt_list_t *vol_opt;
-} auth_handle_t;
-
-auth_result_t gf_authenticate (dict_t *input_params,
- dict_t *config_params,
- dict_t *auth_modules);
-int32_t gf_auth_init (xlator_t *xl, dict_t *auth_modules);
-void gf_auth_fini (dict_t *auth_modules);
-
-#endif /* _AUTHENTICATE_H */
diff --git a/xlators/protocol/lib/src/glusterfs-xdr.c b/xlators/protocol/lib/src/glusterfs-xdr.c
deleted file mode 100644
index e8a254094f9..00000000000
--- a/xlators/protocol/lib/src/glusterfs-xdr.c
+++ /dev/null
@@ -1,1798 +0,0 @@
-/*
- * Please do not edit this file.
- * It was generated using rpcgen.
- */
-
-#include "glusterfs-xdr.h"
-#include "iatt.h"
-
-bool_t
-xdr_gf_statfs (XDR *xdrs, gf_statfs *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->bsize))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->frsize))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->blocks))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->bfree))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->bavail))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->files))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ffree))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->favail))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->fsid))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->flag))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->namemax))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_flock (XDR *xdrs, gf_flock *objp)
-{
-
- if (!xdr_u_int (xdrs, &objp->type))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->whence))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->start))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->len))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->pid))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_iatt (XDR *xdrs, gf_iatt *objp)
-{
- register int32_t *buf;
-
- if (xdrs->x_op == XDR_ENCODE) {
- if (!xdr_u_quad_t (xdrs, &objp->ia_ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ia_gen))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ia_dev))
- return FALSE;
- buf = XDR_INLINE (xdrs, 4 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_u_int (xdrs, &objp->mode))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_nlink))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_uid))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_gid))
- return FALSE;
-
- } else {
- IXDR_PUT_U_LONG(buf, objp->mode);
- IXDR_PUT_U_LONG(buf, objp->ia_nlink);
- IXDR_PUT_U_LONG(buf, objp->ia_uid);
- IXDR_PUT_U_LONG(buf, objp->ia_gid);
- }
- if (!xdr_u_quad_t (xdrs, &objp->ia_rdev))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ia_size))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_blksize))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ia_blocks))
- return FALSE;
- buf = XDR_INLINE (xdrs, 6 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_u_int (xdrs, &objp->ia_atime))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_atime_nsec))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_mtime))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_mtime_nsec))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_ctime))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_ctime_nsec))
- return FALSE;
- } else {
- IXDR_PUT_U_LONG(buf, objp->ia_atime);
- IXDR_PUT_U_LONG(buf, objp->ia_atime_nsec);
- IXDR_PUT_U_LONG(buf, objp->ia_mtime);
- IXDR_PUT_U_LONG(buf, objp->ia_mtime_nsec);
- IXDR_PUT_U_LONG(buf, objp->ia_ctime);
- IXDR_PUT_U_LONG(buf, objp->ia_ctime_nsec);
- }
- return TRUE;
- } else if (xdrs->x_op == XDR_DECODE) {
- if (!xdr_u_quad_t (xdrs, &objp->ia_ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ia_gen))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ia_dev))
- return FALSE;
- buf = XDR_INLINE (xdrs, 4 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_u_int (xdrs, &objp->mode))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_nlink))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_uid))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_gid))
- return FALSE;
-
- } else {
- objp->mode = IXDR_GET_U_LONG(buf);
- objp->ia_nlink = IXDR_GET_U_LONG(buf);
- objp->ia_uid = IXDR_GET_U_LONG(buf);
- objp->ia_gid = IXDR_GET_U_LONG(buf);
- }
- if (!xdr_u_quad_t (xdrs, &objp->ia_rdev))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ia_size))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_blksize))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ia_blocks))
- return FALSE;
- buf = XDR_INLINE (xdrs, 6 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_u_int (xdrs, &objp->ia_atime))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_atime_nsec))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_mtime))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_mtime_nsec))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_ctime))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_ctime_nsec))
- return FALSE;
- } else {
- objp->ia_atime = IXDR_GET_U_LONG(buf);
- objp->ia_atime_nsec = IXDR_GET_U_LONG(buf);
- objp->ia_mtime = IXDR_GET_U_LONG(buf);
- objp->ia_mtime_nsec = IXDR_GET_U_LONG(buf);
- objp->ia_ctime = IXDR_GET_U_LONG(buf);
- objp->ia_ctime_nsec = IXDR_GET_U_LONG(buf);
- }
- return TRUE;
- }
-
- if (!xdr_u_quad_t (xdrs, &objp->ia_ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ia_gen))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ia_dev))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->mode))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_nlink))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_uid))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_gid))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ia_rdev))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ia_size))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_blksize))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ia_blocks))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_atime))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_atime_nsec))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_mtime))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_mtime_nsec))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_ctime))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_ctime_nsec))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_stat_req (XDR *xdrs, gfs3_stat_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->gen))
- return FALSE;
- if (!xdr_string (xdrs, &objp->path, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_stat_rsp (XDR *xdrs, gfs3_stat_rsp *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->stat))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_readlink_req (XDR *xdrs, gfs3_readlink_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->gen))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->size))
- return FALSE;
- if (!xdr_string (xdrs, &objp->path, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_readlink_rsp (XDR *xdrs, gfs3_readlink_rsp *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->buf))
- return FALSE;
- if (!xdr_string (xdrs, &objp->path, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_mknod_req (XDR *xdrs, gfs3_mknod_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->par))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->gen))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->dev))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->mode))
- return FALSE;
- if (!xdr_string (xdrs, &objp->path, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->bname, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_mknod_rsp (XDR *xdrs, gfs3_mknod_rsp *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->stat))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->preparent))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->postparent))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_mkdir_req (XDR *xdrs, gfs3_mkdir_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->par))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->gen))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->mode))
- return FALSE;
- if (!xdr_string (xdrs, &objp->path, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->bname, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_mkdir_rsp (XDR *xdrs, gfs3_mkdir_rsp *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->stat))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->preparent))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->postparent))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_unlink_req (XDR *xdrs, gfs3_unlink_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->par))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->gen))
- return FALSE;
- if (!xdr_string (xdrs, &objp->path, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->bname, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_unlink_rsp (XDR *xdrs, gfs3_unlink_rsp *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->preparent))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->postparent))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_rmdir_req (XDR *xdrs, gfs3_rmdir_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->par))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->gen))
- return FALSE;
- if (!xdr_string (xdrs, &objp->path, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->bname, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_rmdir_rsp (XDR *xdrs, gfs3_rmdir_rsp *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->preparent))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->postparent))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_symlink_req (XDR *xdrs, gfs3_symlink_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->par))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->gen))
- return FALSE;
- if (!xdr_string (xdrs, &objp->path, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->bname, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->linkname, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_symlink_rsp (XDR *xdrs, gfs3_symlink_rsp *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->stat))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->preparent))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->postparent))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_rename_req (XDR *xdrs, gfs3_rename_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->oldpar))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->oldgen))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->newpar))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->newgen))
- return FALSE;
- if (!xdr_string (xdrs, &objp->oldpath, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->oldbname, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->newpath, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->newbname, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_rename_rsp (XDR *xdrs, gfs3_rename_rsp *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->stat))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->preoldparent))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->postoldparent))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->prenewparent))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->postnewparent))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_link_req (XDR *xdrs, gfs3_link_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->oldino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->oldgen))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->newpar))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->newgen))
- return FALSE;
- if (!xdr_string (xdrs, &objp->oldpath, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->newpath, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->newbname, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_link_rsp (XDR *xdrs, gfs3_link_rsp *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->stat))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->preparent))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->postparent))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_truncate_req (XDR *xdrs, gfs3_truncate_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->gen))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->offset))
- return FALSE;
- if (!xdr_string (xdrs, &objp->path, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_truncate_rsp (XDR *xdrs, gfs3_truncate_rsp *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->prestat))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->poststat))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_open_req (XDR *xdrs, gfs3_open_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->gen))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->wbflags))
- return FALSE;
- if (!xdr_string (xdrs, &objp->path, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_open_rsp (XDR *xdrs, gfs3_open_rsp *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_read_req (XDR *xdrs, gfs3_read_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->gen))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->offset))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->size))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_read_rsp (XDR *xdrs, gfs3_read_rsp *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->stat))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->size))
- return FALSE;
-
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_lookup_req (XDR *xdrs, gfs3_lookup_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->par))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->gen))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
- if (!xdr_string (xdrs, &objp->path, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->bname, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val,
- (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_lookup_rsp (XDR *xdrs, gfs3_lookup_rsp *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->stat))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->postparent))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val,
- (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_write_req (XDR *xdrs, gfs3_write_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->gen))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->offset))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->size))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_write_rsp (XDR *xdrs, gfs3_write_rsp *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->prestat))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->poststat))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_statfs_req (XDR *xdrs, gfs3_statfs_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->gen))
- return FALSE;
- if (!xdr_string (xdrs, &objp->path, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_statfs_rsp (XDR *xdrs, gfs3_statfs_rsp *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_statfs (xdrs, &objp->statfs))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_lk_req (XDR *xdrs, gfs3_lk_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->gen))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->cmd))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->type))
- return FALSE;
- if (!xdr_gf_flock (xdrs, &objp->flock))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_lk_rsp (XDR *xdrs, gfs3_lk_rsp *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_flock (xdrs, &objp->flock))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_inodelk_req (XDR *xdrs, gfs3_inodelk_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->gen))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->cmd))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->type))
- return FALSE;
- if (!xdr_gf_flock (xdrs, &objp->flock))
- return FALSE;
- if (!xdr_string (xdrs, &objp->path, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->volume, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_finodelk_req (XDR *xdrs, gfs3_finodelk_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->gen))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->cmd))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->type))
- return FALSE;
- if (!xdr_gf_flock (xdrs, &objp->flock))
- return FALSE;
- if (!xdr_string (xdrs, &objp->volume, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_flush_req (XDR *xdrs, gfs3_flush_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->gen))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fsync_req (XDR *xdrs, gfs3_fsync_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->gen))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->data))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fsync_rsp (XDR *xdrs, gfs3_fsync_rsp *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->prestat))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->poststat))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_setxattr_req (XDR *xdrs, gfs3_setxattr_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->gen))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->path, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fsetxattr_req (XDR *xdrs, gfs3_fsetxattr_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->gen))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_xattrop_req (XDR *xdrs, gfs3_xattrop_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->gen))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->path, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_xattrop_rsp (XDR *xdrs, gfs3_xattrop_rsp *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fxattrop_req (XDR *xdrs, gfs3_fxattrop_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->gen))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fxattrop_rsp (XDR *xdrs, gfs3_fxattrop_rsp *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_getxattr_req (XDR *xdrs, gfs3_getxattr_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->gen))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->namelen))
- return FALSE;
- if (!xdr_string (xdrs, &objp->path, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->name, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_getxattr_rsp (XDR *xdrs, gfs3_getxattr_rsp *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fgetxattr_req (XDR *xdrs, gfs3_fgetxattr_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->gen))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->namelen))
- return FALSE;
- if (!xdr_string (xdrs, &objp->name, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fgetxattr_rsp (XDR *xdrs, gfs3_fgetxattr_rsp *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_removexattr_req (XDR *xdrs, gfs3_removexattr_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->gen))
- return FALSE;
- if (!xdr_string (xdrs, &objp->path, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->name, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_opendir_req (XDR *xdrs, gfs3_opendir_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->gen))
- return FALSE;
- if (!xdr_string (xdrs, &objp->path, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_opendir_rsp (XDR *xdrs, gfs3_opendir_rsp *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fsyncdir_req (XDR *xdrs, gfs3_fsyncdir_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->gen))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_int (xdrs, &objp->data))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_readdir_req (XDR *xdrs, gfs3_readdir_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->gen))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->offset))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->size))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_readdirp_req (XDR *xdrs, gfs3_readdirp_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->gen))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->offset))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->size))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_setvolume_req (XDR *xdrs, gf_setvolume_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_setvolume_rsp (XDR *xdrs, gf_setvolume_rsp *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_access_req (XDR *xdrs, gfs3_access_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->gen))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->mask))
- return FALSE;
- if (!xdr_string (xdrs, &objp->path, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_create_req (XDR *xdrs, gfs3_create_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->par))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->gen))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->mode))
- return FALSE;
- if (!xdr_string (xdrs, &objp->path, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->bname, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_create_rsp (XDR *xdrs, gfs3_create_rsp *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->stat))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->preparent))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->postparent))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_ftruncate_req (XDR *xdrs, gfs3_ftruncate_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->gen))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->offset))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_ftruncate_rsp (XDR *xdrs, gfs3_ftruncate_rsp *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->prestat))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->poststat))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fstat_req (XDR *xdrs, gfs3_fstat_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->gen))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fstat_rsp (XDR *xdrs, gfs3_fstat_rsp *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->stat))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_entrylk_req (XDR *xdrs, gfs3_entrylk_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->gen))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->cmd))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->type))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->namelen))
- return FALSE;
- if (!xdr_string (xdrs, &objp->path, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->name, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->volume, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fentrylk_req (XDR *xdrs, gfs3_fentrylk_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->gen))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->cmd))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->type))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->namelen))
- return FALSE;
- if (!xdr_string (xdrs, &objp->name, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->volume, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_checksum_req (XDR *xdrs, gfs3_checksum_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->gen))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->flag))
- return FALSE;
- if (!xdr_string (xdrs, &objp->path, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_checksum_rsp (XDR *xdrs, gfs3_checksum_rsp *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->fchecksum.fchecksum_val, (u_int *) &objp->fchecksum.fchecksum_len, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dchecksum.dchecksum_val, (u_int *) &objp->dchecksum.dchecksum_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_setattr_req (XDR *xdrs, gfs3_setattr_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->gen))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->stbuf))
- return FALSE;
- if (!xdr_int (xdrs, &objp->valid))
- return FALSE;
- if (!xdr_string (xdrs, &objp->path, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_setattr_rsp (XDR *xdrs, gfs3_setattr_rsp *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->statpre))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->statpost))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fsetattr_req (XDR *xdrs, gfs3_fsetattr_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->stbuf))
- return FALSE;
- if (!xdr_int (xdrs, &objp->valid))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fsetattr_rsp (XDR *xdrs, gfs3_fsetattr_rsp *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->statpre))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->statpost))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_rchecksum_req (XDR *xdrs, gfs3_rchecksum_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->offset))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->len))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_rchecksum_rsp (XDR *xdrs, gfs3_rchecksum_rsp *objp)
-{
- register int32_t *buf;
-
-
- if (xdrs->x_op == XDR_ENCODE) {
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->weak_checksum))
- return FALSE;
-
- } else {
- IXDR_PUT_LONG(buf, objp->op_ret);
- IXDR_PUT_LONG(buf, objp->op_errno);
- IXDR_PUT_U_LONG(buf, objp->weak_checksum);
- }
- if (!xdr_bytes (xdrs, (char **)&objp->strong_checksum.strong_checksum_val, (u_int *) &objp->strong_checksum.strong_checksum_len, ~0))
- return FALSE;
- return TRUE;
- } else if (xdrs->x_op == XDR_DECODE) {
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->weak_checksum))
- return FALSE;
-
- } else {
- objp->op_ret = IXDR_GET_LONG(buf);
- objp->op_errno = IXDR_GET_LONG(buf);
- objp->weak_checksum = IXDR_GET_U_LONG(buf);
- }
- if (!xdr_bytes (xdrs, (char **)&objp->strong_checksum.strong_checksum_val, (u_int *) &objp->strong_checksum.strong_checksum_len, ~0))
- return FALSE;
- return TRUE;
- }
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->weak_checksum))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->strong_checksum.strong_checksum_val, (u_int *) &objp->strong_checksum.strong_checksum_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_getspec_req (XDR *xdrs, gf_getspec_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
- if (!xdr_string (xdrs, &objp->key, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_getspec_rsp (XDR *xdrs, gf_getspec_rsp *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_string (xdrs, &objp->spec, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_log_req (XDR *xdrs, gf_log_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->msg.msg_val, (u_int *) &objp->msg.msg_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_notify_req (XDR *xdrs, gf_notify_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
- if (!xdr_string (xdrs, &objp->buf, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_notify_rsp (XDR *xdrs, gf_notify_rsp *objp)
-{
- register int32_t *buf;
-
- if (xdrs->x_op == XDR_ENCODE) {
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
-
- } else {
- IXDR_PUT_LONG(buf, objp->op_ret);
- IXDR_PUT_LONG(buf, objp->op_errno);
- IXDR_PUT_U_LONG(buf, objp->flags);
- }
- if (!xdr_string (xdrs, &objp->buf, ~0))
- return FALSE;
- return TRUE;
- } else if (xdrs->x_op == XDR_DECODE) {
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
-
- } else {
- objp->op_ret = IXDR_GET_LONG(buf);
- objp->op_errno = IXDR_GET_LONG(buf);
- objp->flags = IXDR_GET_U_LONG(buf);
- }
- if (!xdr_string (xdrs, &objp->buf, ~0))
- return FALSE;
- return TRUE;
- }
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
- if (!xdr_string (xdrs, &objp->buf, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_releasedir_req (XDR *xdrs, gfs3_releasedir_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->gen))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_release_req (XDR *xdrs, gfs3_release_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->gen))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_common_rsp (XDR *xdrs, gf_common_rsp *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- return TRUE;
-}
-
-
-bool_t
-xdr_gf_dump_version_req (XDR *xdrs, gf_dump_version_req *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
- if (!xdr_string (xdrs, &objp->key, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_dump_version_rsp (XDR *xdrs, gf_dump_version_rsp *objp)
-{
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->msg.msg_val, (u_int *) &objp->msg.msg_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-
-
-bool_t
-xdr_gfs3_dirlist (XDR *xdrs, gfs3_dirlist *objp)
-{
- if (!xdr_u_quad_t (xdrs, &objp->d_ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->d_off))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->d_len))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->d_type))
- return FALSE;
- if (!xdr_string (xdrs, &objp->name, ~0))
- return FALSE;
- if (!xdr_pointer (xdrs, (char **)&objp->nextentry, sizeof (gfs3_dirlist), (xdrproc_t) xdr_gfs3_dirlist))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_readdir_rsp (XDR *xdrs, gfs3_readdir_rsp *objp)
-{
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_pointer (xdrs, (char **)&objp->reply, sizeof (gfs3_dirlist), (xdrproc_t) xdr_gfs3_dirlist))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_dirplist (XDR *xdrs, gfs3_dirplist *objp)
-{
- if (!xdr_u_quad_t (xdrs, &objp->d_ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->d_off))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->d_len))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->d_type))
- return FALSE;
- if (!xdr_string (xdrs, &objp->name, ~0))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->stat))
- return FALSE;
- if (!xdr_pointer (xdrs, (char **)&objp->nextentry, sizeof (gfs3_dirplist), (xdrproc_t) xdr_gfs3_dirplist))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_readdirp_rsp (XDR *xdrs, gfs3_readdirp_rsp *objp)
-{
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_pointer (xdrs, (char **)&objp->reply, sizeof (struct gfs3_dirplist), (xdrproc_t) xdr_gfs3_dirplist))
- return FALSE;
- return TRUE;
-}
diff --git a/xlators/protocol/lib/src/glusterfs-xdr.h b/xlators/protocol/lib/src/glusterfs-xdr.h
deleted file mode 100644
index 72f131c746c..00000000000
--- a/xlators/protocol/lib/src/glusterfs-xdr.h
+++ /dev/null
@@ -1,1336 +0,0 @@
-/*
- * Please do not edit this file.
- * It was generated using rpcgen.
- */
-
-#ifndef _GLUSTERFS3_H_RPCGEN
-#define _GLUSTERFS3_H_RPCGEN
-
-#include <rpc/rpc.h>
-#include "xdr-common.h"
-#include "iatt.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-#define GF_O_ACCMODE 003
-#define GF_O_RDONLY 00
-#define GF_O_WRONLY 01
-#define GF_O_RDWR 02
-#define GF_O_CREAT 0100
-#define GF_O_EXCL 0200
-#define GF_O_NOCTTY 0400
-#define GF_O_TRUNC 01000
-#define GF_O_APPEND 02000
-#define GF_O_NONBLOCK 04000
-#define GF_O_SYNC 010000
-#define GF_O_ASYNC 020000
-
-#define GF_O_DIRECT 040000
-#define GF_O_DIRECTORY 0200000
-#define GF_O_NOFOLLOW 0400000
-#define GF_O_NOATIME 01000000
-#define GF_O_CLOEXEC 02000000
-
-#define GF_O_LARGEFILE 0100000
-
-#define XLATE_BIT(from, to, bit) do { \
- if (from & bit) \
- to = to | GF_##bit; \
- } while (0)
-
-#define UNXLATE_BIT(from, to, bit) do { \
- if (from & GF_##bit) \
- to = to | bit; \
- } while (0)
-
-#define XLATE_ACCESSMODE(from, to) do { \
- switch (from & O_ACCMODE) { \
- case O_RDONLY: to |= GF_O_RDONLY; \
- break; \
- case O_WRONLY: to |= GF_O_WRONLY; \
- break; \
- case O_RDWR: to |= GF_O_RDWR; \
- break; \
- } \
- } while (0)
-
-#define UNXLATE_ACCESSMODE(from, to) do { \
- switch (from & GF_O_ACCMODE) { \
- case GF_O_RDONLY: to |= O_RDONLY; \
- break; \
- case GF_O_WRONLY: to |= O_WRONLY; \
- break; \
- case GF_O_RDWR: to |= O_RDWR; \
- break; \
- } \
- } while (0)
-
-static inline uint32_t
-gf_flags_from_flags (uint32_t flags)
-{
- uint32_t gf_flags = 0;
-
- XLATE_ACCESSMODE (flags, gf_flags);
-
- XLATE_BIT (flags, gf_flags, O_CREAT);
- XLATE_BIT (flags, gf_flags, O_EXCL);
- XLATE_BIT (flags, gf_flags, O_NOCTTY);
- XLATE_BIT (flags, gf_flags, O_TRUNC);
- XLATE_BIT (flags, gf_flags, O_APPEND);
- XLATE_BIT (flags, gf_flags, O_NONBLOCK);
- XLATE_BIT (flags, gf_flags, O_SYNC);
- XLATE_BIT (flags, gf_flags, O_ASYNC);
-
- XLATE_BIT (flags, gf_flags, O_DIRECT);
- XLATE_BIT (flags, gf_flags, O_DIRECTORY);
- XLATE_BIT (flags, gf_flags, O_NOFOLLOW);
-#ifdef O_NOATIME
- XLATE_BIT (flags, gf_flags, O_NOATIME);
-#endif
-#ifdef O_CLOEXEC
- XLATE_BIT (flags, gf_flags, O_CLOEXEC);
-#endif
- XLATE_BIT (flags, gf_flags, O_LARGEFILE);
-
- return gf_flags;
-}
-
-static inline uint32_t
-gf_flags_to_flags (uint32_t gf_flags)
-{
- uint32_t flags = 0;
-
- UNXLATE_ACCESSMODE (gf_flags, flags);
-
- UNXLATE_BIT (gf_flags, flags, O_CREAT);
- UNXLATE_BIT (gf_flags, flags, O_EXCL);
- UNXLATE_BIT (gf_flags, flags, O_NOCTTY);
- UNXLATE_BIT (gf_flags, flags, O_TRUNC);
- UNXLATE_BIT (gf_flags, flags, O_APPEND);
- UNXLATE_BIT (gf_flags, flags, O_NONBLOCK);
- UNXLATE_BIT (gf_flags, flags, O_SYNC);
- UNXLATE_BIT (gf_flags, flags, O_ASYNC);
-
- UNXLATE_BIT (gf_flags, flags, O_DIRECT);
- UNXLATE_BIT (gf_flags, flags, O_DIRECTORY);
- UNXLATE_BIT (gf_flags, flags, O_NOFOLLOW);
-#ifdef O_NOATIME
- UNXLATE_BIT (gf_flags, flags, O_NOATIME);
-#endif
-#ifdef O_CLOEXEC
- UNXLATE_BIT (gf_flags, flags, O_CLOEXEC);
-#endif
- UNXLATE_BIT (gf_flags, flags, O_LARGEFILE);
-
- return flags;
-}
-
-
-struct gf_statfs {
- u_quad_t bsize;
- u_quad_t frsize;
- u_quad_t blocks;
- u_quad_t bfree;
- u_quad_t bavail;
- u_quad_t files;
- u_quad_t ffree;
- u_quad_t favail;
- u_quad_t fsid;
- u_quad_t flag;
- u_quad_t namemax;
-};
-typedef struct gf_statfs gf_statfs;
-
-static inline void
-gf_statfs_to_statfs (struct gf_statfs *gf_stat, struct statvfs *stat)
-{
- if (!stat || !gf_stat)
- return;
-
- stat->f_bsize = (gf_stat->bsize);
- stat->f_frsize = (gf_stat->frsize);
- stat->f_blocks = (gf_stat->blocks);
- stat->f_bfree = (gf_stat->bfree);
- stat->f_bavail = (gf_stat->bavail);
- stat->f_files = (gf_stat->files);
- stat->f_ffree = (gf_stat->ffree);
- stat->f_favail = (gf_stat->favail);
- stat->f_fsid = (gf_stat->fsid);
- stat->f_flag = (gf_stat->flag);
- stat->f_namemax = (gf_stat->namemax);
-}
-
-
-static inline void
-gf_statfs_from_statfs (struct gf_statfs *gf_stat, struct statvfs *stat)
-{
- if (!stat || !gf_stat)
- return;
-
- gf_stat->bsize = stat->f_bsize;
- gf_stat->frsize = stat->f_frsize;
- gf_stat->blocks = stat->f_blocks;
- gf_stat->bfree = stat->f_bfree;
- gf_stat->bavail = stat->f_bavail;
- gf_stat->files = stat->f_files;
- gf_stat->ffree = stat->f_ffree;
- gf_stat->favail = stat->f_favail;
- gf_stat->fsid = stat->f_fsid;
- gf_stat->flag = stat->f_flag;
- gf_stat->namemax = stat->f_namemax;
-}
-
-struct gf_flock {
- u_int type;
- u_int whence;
- u_quad_t start;
- u_quad_t len;
- u_int pid;
-};
-typedef struct gf_flock gf_flock;
-
-
-static inline void
-gf_flock_to_flock (struct gf_flock *gf_flock, struct flock *flock)
-{
- if (!flock || !gf_flock)
- return;
-
- flock->l_type = gf_flock->type;
- flock->l_whence = gf_flock->whence;
- flock->l_start = gf_flock->start;
- flock->l_len = gf_flock->len;
- flock->l_pid = gf_flock->pid;
-}
-
-
-static inline void
-gf_flock_from_flock (struct gf_flock *gf_flock, struct flock *flock)
-{
- if (!flock || !gf_flock)
- return;
-
- gf_flock->type = (flock->l_type);
- gf_flock->whence = (flock->l_whence);
- gf_flock->start = (flock->l_start);
- gf_flock->len = (flock->l_len);
- gf_flock->pid = (flock->l_pid);
-}
-
-struct gf_iatt {
- u_quad_t ia_ino;
- u_quad_t ia_gen;
- u_quad_t ia_dev;
- u_int mode;
- u_int ia_nlink;
- u_int ia_uid;
- u_int ia_gid;
- u_quad_t ia_rdev;
- u_quad_t ia_size;
- u_int ia_blksize;
- u_quad_t ia_blocks;
- u_int ia_atime;
- u_int ia_atime_nsec;
- u_int ia_mtime;
- u_int ia_mtime_nsec;
- u_int ia_ctime;
- u_int ia_ctime_nsec;
-} __attribute__((packed));
-typedef struct gf_iatt gf_iatt;
-
-
-static inline void
-gf_stat_to_iatt (struct gf_iatt *gf_stat, struct iatt *iatt)
-{
- if (!iatt || !gf_stat)
- return;
-
- iatt->ia_ino = gf_stat->ia_ino ;
- iatt->ia_gen = gf_stat->ia_gen ;
- iatt->ia_dev = gf_stat->ia_dev ;
- iatt->ia_type = ia_type_from_st_mode (gf_stat->mode) ;
- iatt->ia_prot = ia_prot_from_st_mode (gf_stat->mode) ;
- iatt->ia_nlink = gf_stat->ia_nlink ;
- iatt->ia_uid = gf_stat->ia_uid ;
- iatt->ia_gid = gf_stat->ia_gid ;
- iatt->ia_rdev = gf_stat->ia_rdev ;
- iatt->ia_size = gf_stat->ia_size ;
- iatt->ia_blksize = gf_stat->ia_blksize ;
- iatt->ia_blocks = gf_stat->ia_blocks ;
- iatt->ia_atime = gf_stat->ia_atime ;
- iatt->ia_atime_nsec = gf_stat->ia_atime_nsec ;
- iatt->ia_mtime = gf_stat->ia_mtime ;
- iatt->ia_mtime_nsec = gf_stat->ia_mtime_nsec ;
- iatt->ia_ctime = gf_stat->ia_ctime ;
- iatt->ia_ctime_nsec = gf_stat->ia_ctime_nsec ;
-}
-
-
-static inline void
-gf_stat_from_iatt (struct gf_iatt *gf_stat, struct iatt *iatt)
-{
- if (!iatt || !gf_stat)
- return;
-
- gf_stat->ia_ino = iatt->ia_ino ;
- gf_stat->ia_gen = iatt->ia_gen ;
- gf_stat->ia_dev = iatt->ia_dev ;
- gf_stat->mode = st_mode_from_ia (iatt->ia_prot, iatt->ia_type);
- gf_stat->ia_nlink = iatt->ia_nlink ;
- gf_stat->ia_uid = iatt->ia_uid ;
- gf_stat->ia_gid = iatt->ia_gid ;
- gf_stat->ia_rdev = iatt->ia_rdev ;
- gf_stat->ia_size = iatt->ia_size ;
- gf_stat->ia_blksize = iatt->ia_blksize ;
- gf_stat->ia_blocks = iatt->ia_blocks ;
- gf_stat->ia_atime = iatt->ia_atime ;
- gf_stat->ia_atime_nsec = iatt->ia_atime_nsec ;
- gf_stat->ia_mtime = iatt->ia_mtime ;
- gf_stat->ia_mtime_nsec = iatt->ia_mtime_nsec ;
- gf_stat->ia_ctime = iatt->ia_ctime ;
- gf_stat->ia_ctime_nsec = iatt->ia_ctime_nsec ;
-}
-
-
-/* Gluster FS Payload structures */
-
-struct gfs3_stat_req {
- u_quad_t gfs_id;
- u_quad_t ino;
- u_quad_t gen;
- char *path;
-};
-typedef struct gfs3_stat_req gfs3_stat_req;
-
-struct gfs3_stat_rsp {
- u_quad_t gfs_id;
- int op_ret;
- int op_errno;
- struct gf_iatt stat;
-};
-typedef struct gfs3_stat_rsp gfs3_stat_rsp;
-
-struct gfs3_readlink_req {
- u_quad_t gfs_id;
- u_quad_t ino;
- u_quad_t gen;
- u_int size;
- char *path;
-};
-typedef struct gfs3_readlink_req gfs3_readlink_req;
-
-struct gfs3_readlink_rsp {
- u_quad_t gfs_id;
- int op_ret;
- int op_errno;
- struct gf_iatt buf;
- char *path;
-};
-typedef struct gfs3_readlink_rsp gfs3_readlink_rsp;
-
-struct gfs3_mknod_req {
- u_quad_t gfs_id;
- u_quad_t par;
- u_quad_t gen;
- u_quad_t dev;
- u_int mode;
- char *path;
- char *bname;
-};
-typedef struct gfs3_mknod_req gfs3_mknod_req;
-
-struct gfs3_mknod_rsp {
- u_quad_t gfs_id;
- int op_ret;
- int op_errno;
- struct gf_iatt stat;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
-};
-typedef struct gfs3_mknod_rsp gfs3_mknod_rsp;
-
-struct gfs3_mkdir_req {
- u_quad_t gfs_id;
- u_quad_t par;
- u_quad_t gen;
- u_int mode;
- char *path;
- char *bname;
-};
-typedef struct gfs3_mkdir_req gfs3_mkdir_req;
-
-struct gfs3_mkdir_rsp {
- u_quad_t gfs_id;
- int op_ret;
- int op_errno;
- struct gf_iatt stat;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
-};
-typedef struct gfs3_mkdir_rsp gfs3_mkdir_rsp;
-
-struct gfs3_unlink_req {
- u_quad_t gfs_id;
- u_quad_t par;
- u_quad_t gen;
- char *path;
- char *bname;
-};
-typedef struct gfs3_unlink_req gfs3_unlink_req;
-
-struct gfs3_unlink_rsp {
- u_quad_t gfs_id;
- int op_ret;
- int op_errno;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
-};
-typedef struct gfs3_unlink_rsp gfs3_unlink_rsp;
-
-struct gfs3_rmdir_req {
- u_quad_t gfs_id;
- u_quad_t par;
- u_quad_t gen;
- char *path;
- char *bname;
-};
-typedef struct gfs3_rmdir_req gfs3_rmdir_req;
-
-struct gfs3_rmdir_rsp {
- u_quad_t gfs_id;
- int op_ret;
- int op_errno;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
-};
-typedef struct gfs3_rmdir_rsp gfs3_rmdir_rsp;
-
-struct gfs3_symlink_req {
- u_quad_t gfs_id;
- u_quad_t par;
- u_quad_t gen;
- char *path;
- char *bname;
- char *linkname;
-};
-typedef struct gfs3_symlink_req gfs3_symlink_req;
-
-struct gfs3_symlink_rsp {
- u_quad_t gfs_id;
- int op_ret;
- int op_errno;
- struct gf_iatt stat;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
-};
-typedef struct gfs3_symlink_rsp gfs3_symlink_rsp;
-
-struct gfs3_rename_req {
- u_quad_t gfs_id;
- u_quad_t oldpar;
- u_quad_t oldgen;
- u_quad_t newpar;
- u_quad_t newgen;
- char *oldpath;
- char *oldbname;
- char *newpath;
- char *newbname;
-};
-typedef struct gfs3_rename_req gfs3_rename_req;
-
-struct gfs3_rename_rsp {
- u_quad_t gfs_id;
- int op_ret;
- int op_errno;
- struct gf_iatt stat;
- struct gf_iatt preoldparent;
- struct gf_iatt postoldparent;
- struct gf_iatt prenewparent;
- struct gf_iatt postnewparent;
-};
-typedef struct gfs3_rename_rsp gfs3_rename_rsp;
-
-struct gfs3_link_req {
- u_quad_t gfs_id;
- u_quad_t oldino;
- u_quad_t oldgen;
- u_quad_t newpar;
- u_quad_t newgen;
- char *oldpath;
- char *newpath;
- char *newbname;
-};
-typedef struct gfs3_link_req gfs3_link_req;
-
-struct gfs3_link_rsp {
- u_quad_t gfs_id;
- int op_ret;
- int op_errno;
- struct gf_iatt stat;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
-};
-typedef struct gfs3_link_rsp gfs3_link_rsp;
-
-struct gfs3_truncate_req {
- u_quad_t gfs_id;
- u_quad_t ino;
- u_quad_t gen;
- u_quad_t offset;
- char *path;
-};
-typedef struct gfs3_truncate_req gfs3_truncate_req;
-
-struct gfs3_truncate_rsp {
- u_quad_t gfs_id;
- int op_ret;
- int op_errno;
- struct gf_iatt prestat;
- struct gf_iatt poststat;
-};
-typedef struct gfs3_truncate_rsp gfs3_truncate_rsp;
-
-struct gfs3_open_req {
- u_quad_t gfs_id;
- u_quad_t ino;
- u_quad_t gen;
- u_int flags;
- u_int wbflags;
- char *path;
-};
-typedef struct gfs3_open_req gfs3_open_req;
-
-struct gfs3_open_rsp {
- u_quad_t gfs_id;
- int op_ret;
- int op_errno;
- quad_t fd;
-};
-typedef struct gfs3_open_rsp gfs3_open_rsp;
-
-struct gfs3_read_req {
- u_quad_t gfs_id;
- u_quad_t ino;
- u_quad_t gen;
- quad_t fd;
- u_quad_t offset;
- u_int size;
-};
-typedef struct gfs3_read_req gfs3_read_req;
-
-struct gfs3_read_rsp {
- u_quad_t gfs_id;
- int op_ret;
- int op_errno;
- struct gf_iatt stat;
- u_int size;
-} __attribute__((packed));
-typedef struct gfs3_read_rsp gfs3_read_rsp;
-
-struct gfs3_lookup_req {
- u_quad_t gfs_id;
- u_quad_t ino;
- u_quad_t par;
- u_quad_t gen;
- u_int flags;
- char *path;
- char *bname;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
-};
-typedef struct gfs3_lookup_req gfs3_lookup_req;
-
-struct gfs3_lookup_rsp {
- u_quad_t gfs_id;
- int op_ret;
- int op_errno;
- struct gf_iatt stat;
- struct gf_iatt postparent;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
-};
-typedef struct gfs3_lookup_rsp gfs3_lookup_rsp;
-
-struct gfs3_write_req {
- u_quad_t gfs_id;
- u_quad_t ino;
- u_quad_t gen;
- quad_t fd;
- u_quad_t offset;
- u_int size;
-} __attribute__((packed));
-typedef struct gfs3_write_req gfs3_write_req;
-
-struct gfs3_write_rsp {
- u_quad_t gfs_id;
- int op_ret;
- int op_errno;
- struct gf_iatt prestat;
- struct gf_iatt poststat;
-};
-typedef struct gfs3_write_rsp gfs3_write_rsp;
-
-struct gfs3_statfs_req {
- u_quad_t gfs_id;
- u_quad_t ino;
- u_quad_t gen;
- char *path;
-};
-typedef struct gfs3_statfs_req gfs3_statfs_req;
-
-struct gfs3_statfs_rsp {
- u_quad_t gfs_id;
- int op_ret;
- int op_errno;
- struct gf_statfs statfs;
-};
-typedef struct gfs3_statfs_rsp gfs3_statfs_rsp;
-
-struct gfs3_lk_req {
- u_quad_t gfs_id;
- u_quad_t ino;
- u_quad_t gen;
- quad_t fd;
- u_int cmd;
- u_int type;
- struct gf_flock flock;
-};
-typedef struct gfs3_lk_req gfs3_lk_req;
-
-struct gfs3_lk_rsp {
- u_quad_t gfs_id;
- int op_ret;
- int op_errno;
- struct gf_flock flock;
-};
-typedef struct gfs3_lk_rsp gfs3_lk_rsp;
-
-struct gfs3_inodelk_req {
- u_quad_t gfs_id;
- u_quad_t ino;
- u_quad_t gen;
- u_int cmd;
- u_int type;
- struct gf_flock flock;
- char *path;
- char *volume;
-};
-typedef struct gfs3_inodelk_req gfs3_inodelk_req;
-
-struct gfs3_finodelk_req {
- u_quad_t gfs_id;
- u_quad_t ino;
- u_quad_t gen;
- quad_t fd;
- u_int cmd;
- u_int type;
- struct gf_flock flock;
- char *volume;
-};
-typedef struct gfs3_finodelk_req gfs3_finodelk_req;
-
-struct gfs3_flush_req {
- u_quad_t gfs_id;
- u_quad_t ino;
- u_quad_t gen;
- quad_t fd;
-};
-typedef struct gfs3_flush_req gfs3_flush_req;
-
-struct gfs3_fsync_req {
- u_quad_t gfs_id;
- u_quad_t ino;
- u_quad_t gen;
- quad_t fd;
- u_int data;
-};
-typedef struct gfs3_fsync_req gfs3_fsync_req;
-
-struct gfs3_fsync_rsp {
- u_quad_t gfs_id;
- int op_ret;
- int op_errno;
- struct gf_iatt prestat;
- struct gf_iatt poststat;
-};
-typedef struct gfs3_fsync_rsp gfs3_fsync_rsp;
-
-struct gfs3_setxattr_req {
- u_quad_t gfs_id;
- u_quad_t ino;
- u_quad_t gen;
- u_int flags;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
- char *path;
-};
-typedef struct gfs3_setxattr_req gfs3_setxattr_req;
-
-struct gfs3_fsetxattr_req {
- u_quad_t gfs_id;
- u_quad_t ino;
- u_quad_t gen;
- quad_t fd;
- u_int flags;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
-};
-typedef struct gfs3_fsetxattr_req gfs3_fsetxattr_req;
-
-struct gfs3_xattrop_req {
- u_quad_t gfs_id;
- u_quad_t ino;
- u_quad_t gen;
- u_int flags;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
- char *path;
-};
-typedef struct gfs3_xattrop_req gfs3_xattrop_req;
-
-struct gfs3_xattrop_rsp {
- u_quad_t gfs_id;
- int op_ret;
- int op_errno;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
-};
-typedef struct gfs3_xattrop_rsp gfs3_xattrop_rsp;
-
-struct gfs3_fxattrop_req {
- u_quad_t gfs_id;
- u_quad_t ino;
- u_quad_t gen;
- quad_t fd;
- u_int flags;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
-};
-typedef struct gfs3_fxattrop_req gfs3_fxattrop_req;
-
-struct gfs3_fxattrop_rsp {
- u_quad_t gfs_id;
- int op_ret;
- int op_errno;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
-};
-typedef struct gfs3_fxattrop_rsp gfs3_fxattrop_rsp;
-
-struct gfs3_getxattr_req {
- u_quad_t gfs_id;
- u_quad_t ino;
- u_quad_t gen;
- u_int namelen;
- char *path;
- char *name;
-};
-typedef struct gfs3_getxattr_req gfs3_getxattr_req;
-
-struct gfs3_getxattr_rsp {
- u_quad_t gfs_id;
- int op_ret;
- int op_errno;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
-};
-typedef struct gfs3_getxattr_rsp gfs3_getxattr_rsp;
-
-struct gfs3_fgetxattr_req {
- u_quad_t gfs_id;
- u_quad_t ino;
- u_quad_t gen;
- quad_t fd;
- u_int namelen;
- char *name;
-};
-typedef struct gfs3_fgetxattr_req gfs3_fgetxattr_req;
-
-struct gfs3_fgetxattr_rsp {
- u_quad_t gfs_id;
- int op_ret;
- int op_errno;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
-};
-typedef struct gfs3_fgetxattr_rsp gfs3_fgetxattr_rsp;
-
-struct gfs3_removexattr_req {
- u_quad_t gfs_id;
- u_quad_t ino;
- u_quad_t gen;
- char *path;
- char *name;
-};
-typedef struct gfs3_removexattr_req gfs3_removexattr_req;
-
-struct gfs3_opendir_req {
- u_quad_t gfs_id;
- u_quad_t ino;
- u_quad_t gen;
- char *path;
-};
-typedef struct gfs3_opendir_req gfs3_opendir_req;
-
-struct gfs3_opendir_rsp {
- u_quad_t gfs_id;
- int op_ret;
- int op_errno;
- quad_t fd;
-};
-typedef struct gfs3_opendir_rsp gfs3_opendir_rsp;
-
-struct gfs3_fsyncdir_req {
- u_quad_t gfs_id;
- u_quad_t ino;
- u_quad_t gen;
- quad_t fd;
- int data;
-};
-typedef struct gfs3_fsyncdir_req gfs3_fsyncdir_req;
-
-struct gfs3_readdir_req {
- u_quad_t gfs_id;
- u_quad_t ino;
- u_quad_t gen;
- quad_t fd;
- u_quad_t offset;
- u_int size;
-};
-typedef struct gfs3_readdir_req gfs3_readdir_req;
-
-struct gfs3_readdirp_req {
- u_quad_t gfs_id;
- u_quad_t ino;
- u_quad_t gen;
- quad_t fd;
- u_quad_t offset;
- u_int size;
-};
-typedef struct gfs3_readdirp_req gfs3_readdirp_req;
-
-struct gf_setvolume_req {
- u_quad_t gfs_id;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
-};
-typedef struct gf_setvolume_req gf_setvolume_req;
-
-struct gf_setvolume_rsp {
- u_quad_t gfs_id;
- int op_ret;
- int op_errno;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
-};
-typedef struct gf_setvolume_rsp gf_setvolume_rsp;
-
-struct gfs3_access_req {
- u_quad_t gfs_id;
- u_quad_t ino;
- u_quad_t gen;
- u_int mask;
- char *path;
-};
-typedef struct gfs3_access_req gfs3_access_req;
-
-struct gfs3_create_req {
- u_quad_t gfs_id;
- u_quad_t par;
- u_quad_t gen;
- u_int flags;
- u_int mode;
- char *path;
- char *bname;
-};
-typedef struct gfs3_create_req gfs3_create_req;
-
-struct gfs3_create_rsp {
- u_quad_t gfs_id;
- int op_ret;
- int op_errno;
- struct gf_iatt stat;
- u_quad_t fd;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
-};
-typedef struct gfs3_create_rsp gfs3_create_rsp;
-
-struct gfs3_ftruncate_req {
- u_quad_t gfs_id;
- u_quad_t ino;
- u_quad_t gen;
- quad_t fd;
- u_quad_t offset;
-};
-typedef struct gfs3_ftruncate_req gfs3_ftruncate_req;
-
-struct gfs3_ftruncate_rsp {
- u_quad_t gfs_id;
- int op_ret;
- int op_errno;
- struct gf_iatt prestat;
- struct gf_iatt poststat;
-};
-typedef struct gfs3_ftruncate_rsp gfs3_ftruncate_rsp;
-
-struct gfs3_fstat_req {
- u_quad_t gfs_id;
- u_quad_t ino;
- u_quad_t gen;
- quad_t fd;
-};
-typedef struct gfs3_fstat_req gfs3_fstat_req;
-
-struct gfs3_fstat_rsp {
- u_quad_t gfs_id;
- int op_ret;
- int op_errno;
- struct gf_iatt stat;
-};
-typedef struct gfs3_fstat_rsp gfs3_fstat_rsp;
-
-struct gfs3_entrylk_req {
- u_quad_t gfs_id;
- u_quad_t ino;
- u_quad_t gen;
- u_int cmd;
- u_int type;
- u_quad_t namelen;
- char *path;
- char *name;
- char *volume;
-};
-typedef struct gfs3_entrylk_req gfs3_entrylk_req;
-
-struct gfs3_fentrylk_req {
- u_quad_t gfs_id;
- u_quad_t ino;
- u_quad_t gen;
- quad_t fd;
- u_int cmd;
- u_int type;
- u_quad_t namelen;
- char *name;
- char *volume;
-};
-typedef struct gfs3_fentrylk_req gfs3_fentrylk_req;
-
-struct gfs3_checksum_req {
- u_quad_t gfs_id;
- u_quad_t ino;
- u_quad_t gen;
- u_int flag;
- char *path;
-};
-typedef struct gfs3_checksum_req gfs3_checksum_req;
-
-struct gfs3_checksum_rsp {
- u_quad_t gfs_id;
- int op_ret;
- int op_errno;
- struct {
- u_int fchecksum_len;
- char *fchecksum_val;
- } fchecksum;
- struct {
- u_int dchecksum_len;
- char *dchecksum_val;
- } dchecksum;
-};
-typedef struct gfs3_checksum_rsp gfs3_checksum_rsp;
-
-struct gfs3_setattr_req {
- u_quad_t gfs_id;
- u_quad_t ino;
- u_quad_t gen;
- struct gf_iatt stbuf;
- int valid;
- char *path;
-};
-typedef struct gfs3_setattr_req gfs3_setattr_req;
-
-struct gfs3_setattr_rsp {
- u_quad_t gfs_id;
- int op_ret;
- int op_errno;
- struct gf_iatt statpre;
- struct gf_iatt statpost;
-};
-typedef struct gfs3_setattr_rsp gfs3_setattr_rsp;
-
-struct gfs3_fsetattr_req {
- u_quad_t gfs_id;
- quad_t fd;
- struct gf_iatt stbuf;
- int valid;
-};
-typedef struct gfs3_fsetattr_req gfs3_fsetattr_req;
-
-struct gfs3_fsetattr_rsp {
- u_quad_t gfs_id;
- int op_ret;
- int op_errno;
- struct gf_iatt statpre;
- struct gf_iatt statpost;
-};
-typedef struct gfs3_fsetattr_rsp gfs3_fsetattr_rsp;
-
-struct gfs3_rchecksum_req {
- u_quad_t gfs_id;
- quad_t fd;
- u_quad_t offset;
- u_int len;
-};
-typedef struct gfs3_rchecksum_req gfs3_rchecksum_req;
-
-struct gfs3_rchecksum_rsp {
- u_quad_t gfs_id;
- int op_ret;
- int op_errno;
- u_int weak_checksum;
- struct {
- u_int strong_checksum_len;
- char *strong_checksum_val;
- } strong_checksum;
-};
-typedef struct gfs3_rchecksum_rsp gfs3_rchecksum_rsp;
-
-struct gf_getspec_req {
- u_quad_t gfs_id;
- u_int flags;
- char *key;
-};
-typedef struct gf_getspec_req gf_getspec_req;
-
-struct gf_getspec_rsp {
- u_quad_t gfs_id;
- int op_ret;
- int op_errno;
- char *spec;
-};
-typedef struct gf_getspec_rsp gf_getspec_rsp;
-
-struct gf_log_req {
- u_quad_t gfs_id;
- struct {
- u_int msg_len;
- char *msg_val;
- } msg;
-};
-typedef struct gf_log_req gf_log_req;
-
-struct gf_notify_req {
- u_quad_t gfs_id;
- u_int flags;
- char *buf;
-};
-typedef struct gf_notify_req gf_notify_req;
-
-struct gf_notify_rsp {
- u_quad_t gfs_id;
- int op_ret;
- int op_errno;
- u_int flags;
- char *buf;
-};
-typedef struct gf_notify_rsp gf_notify_rsp;
-
-struct gfs3_releasedir_req {
- u_quad_t gfs_id;
- u_quad_t ino;
- u_quad_t gen;
- quad_t fd;
-};
-typedef struct gfs3_releasedir_req gfs3_releasedir_req;
-
-struct gfs3_release_req {
- u_quad_t gfs_id;
- u_quad_t ino;
- u_quad_t gen;
- quad_t fd;
-};
-typedef struct gfs3_release_req gfs3_release_req;
-
-struct gf_common_rsp {
- u_quad_t gfs_id;
- int op_ret;
- int op_errno;
-};
-typedef struct gf_common_rsp gf_common_rsp;
-
-struct gf_dump_version_req {
- u_quad_t gfs_id;
- u_int flags;
- char *key;
-};
-typedef struct gf_dump_version_req gf_dump_version_req;
-
-struct gf_dump_version_rsp {
- u_quad_t gfs_id;
- int op_ret;
- int op_errno;
- u_int flags;
- struct {
- u_int msg_len;
- char *msg_val;
- } msg;
-};
-typedef struct gf_dump_version_rsp gf_dump_version_rsp;
-
-struct gfs3_dirlist {
- u_quad_t d_ino;
- u_quad_t d_off;
- u_int d_len;
- u_int d_type;
- char *name;
- struct gfs3_dirlist *nextentry;
-};
-typedef struct gfs3_dirlist gfs3_dirlist;
-
-struct gfs3_readdir_rsp {
- u_quad_t gfs_id;
- int op_ret;
- int op_errno;
- struct gfs3_dirlist *reply;
-};
-typedef struct gfs3_readdir_rsp gfs3_readdir_rsp;
-
-struct gfs3_dirplist {
- u_quad_t d_ino;
- u_quad_t d_off;
- u_int d_len;
- u_int d_type;
- char *name;
- struct gf_iatt stat;
- struct gfs3_dirplist *nextentry;
-};
-typedef struct gfs3_dirplist gfs3_dirplist;
-
-struct gfs3_readdirp_rsp {
- u_quad_t gfs_id;
- int op_ret;
- int op_errno;
- struct gfs3_dirplist *reply;
-};
-typedef struct gfs3_readdirp_rsp gfs3_readdirp_rsp;
-
-
-/* the xdr functions */
-
-#if defined(__STDC__) || defined(__cplusplus)
-extern bool_t xdr_gf_statfs (XDR *, gf_statfs*);
-extern bool_t xdr_gf_flock (XDR *, gf_flock*);
-extern bool_t xdr_gf_iatt (XDR *, gf_iatt*);
-extern bool_t xdr_gfs3_stat_req (XDR *, gfs3_stat_req*);
-extern bool_t xdr_gfs3_stat_rsp (XDR *, gfs3_stat_rsp*);
-extern bool_t xdr_gfs3_readlink_req (XDR *, gfs3_readlink_req*);
-extern bool_t xdr_gfs3_readlink_rsp (XDR *, gfs3_readlink_rsp*);
-extern bool_t xdr_gfs3_mknod_req (XDR *, gfs3_mknod_req*);
-extern bool_t xdr_gfs3_mknod_rsp (XDR *, gfs3_mknod_rsp*);
-extern bool_t xdr_gfs3_mkdir_req (XDR *, gfs3_mkdir_req*);
-extern bool_t xdr_gfs3_mkdir_rsp (XDR *, gfs3_mkdir_rsp*);
-extern bool_t xdr_gfs3_unlink_req (XDR *, gfs3_unlink_req*);
-extern bool_t xdr_gfs3_unlink_rsp (XDR *, gfs3_unlink_rsp*);
-extern bool_t xdr_gfs3_rmdir_req (XDR *, gfs3_rmdir_req*);
-extern bool_t xdr_gfs3_rmdir_rsp (XDR *, gfs3_rmdir_rsp*);
-extern bool_t xdr_gfs3_symlink_req (XDR *, gfs3_symlink_req*);
-extern bool_t xdr_gfs3_symlink_rsp (XDR *, gfs3_symlink_rsp*);
-extern bool_t xdr_gfs3_rename_req (XDR *, gfs3_rename_req*);
-extern bool_t xdr_gfs3_rename_rsp (XDR *, gfs3_rename_rsp*);
-extern bool_t xdr_gfs3_link_req (XDR *, gfs3_link_req*);
-extern bool_t xdr_gfs3_link_rsp (XDR *, gfs3_link_rsp*);
-extern bool_t xdr_gfs3_truncate_req (XDR *, gfs3_truncate_req*);
-extern bool_t xdr_gfs3_truncate_rsp (XDR *, gfs3_truncate_rsp*);
-extern bool_t xdr_gfs3_open_req (XDR *, gfs3_open_req*);
-extern bool_t xdr_gfs3_open_rsp (XDR *, gfs3_open_rsp*);
-extern bool_t xdr_gfs3_read_req (XDR *, gfs3_read_req*);
-extern bool_t xdr_gfs3_read_rsp (XDR *, gfs3_read_rsp*);
-extern bool_t xdr_gfs3_lookup_req (XDR *, gfs3_lookup_req*);
-extern bool_t xdr_gfs3_lookup_rsp (XDR *, gfs3_lookup_rsp*);
-extern bool_t xdr_gfs3_write_req (XDR *, gfs3_write_req*);
-extern bool_t xdr_gfs3_write_rsp (XDR *, gfs3_write_rsp*);
-extern bool_t xdr_gfs3_statfs_req (XDR *, gfs3_statfs_req*);
-extern bool_t xdr_gfs3_statfs_rsp (XDR *, gfs3_statfs_rsp*);
-extern bool_t xdr_gfs3_lk_req (XDR *, gfs3_lk_req*);
-extern bool_t xdr_gfs3_lk_rsp (XDR *, gfs3_lk_rsp*);
-extern bool_t xdr_gfs3_inodelk_req (XDR *, gfs3_inodelk_req*);
-extern bool_t xdr_gfs3_finodelk_req (XDR *, gfs3_finodelk_req*);
-extern bool_t xdr_gfs3_flush_req (XDR *, gfs3_flush_req*);
-extern bool_t xdr_gfs3_fsync_req (XDR *, gfs3_fsync_req*);
-extern bool_t xdr_gfs3_fsync_rsp (XDR *, gfs3_fsync_rsp*);
-extern bool_t xdr_gfs3_setxattr_req (XDR *, gfs3_setxattr_req*);
-extern bool_t xdr_gfs3_fsetxattr_req (XDR *, gfs3_fsetxattr_req*);
-extern bool_t xdr_gfs3_xattrop_req (XDR *, gfs3_xattrop_req*);
-extern bool_t xdr_gfs3_xattrop_rsp (XDR *, gfs3_xattrop_rsp*);
-extern bool_t xdr_gfs3_fxattrop_req (XDR *, gfs3_fxattrop_req*);
-extern bool_t xdr_gfs3_fxattrop_rsp (XDR *, gfs3_fxattrop_rsp*);
-extern bool_t xdr_gfs3_getxattr_req (XDR *, gfs3_getxattr_req*);
-extern bool_t xdr_gfs3_getxattr_rsp (XDR *, gfs3_getxattr_rsp*);
-extern bool_t xdr_gfs3_fgetxattr_req (XDR *, gfs3_fgetxattr_req*);
-extern bool_t xdr_gfs3_fgetxattr_rsp (XDR *, gfs3_fgetxattr_rsp*);
-extern bool_t xdr_gfs3_removexattr_req (XDR *, gfs3_removexattr_req*);
-extern bool_t xdr_gfs3_opendir_req (XDR *, gfs3_opendir_req*);
-extern bool_t xdr_gfs3_opendir_rsp (XDR *, gfs3_opendir_rsp*);
-extern bool_t xdr_gfs3_fsyncdir_req (XDR *, gfs3_fsyncdir_req*);
-extern bool_t xdr_gfs3_readdir_req (XDR *, gfs3_readdir_req*);
-extern bool_t xdr_gfs3_dirlist (XDR *, gfs3_dirlist*);
-extern bool_t xdr_gfs3_readdir_rsp (XDR *, gfs3_readdir_rsp*);
-extern bool_t xdr_gfs3_dirplist (XDR *, gfs3_dirplist*);
-extern bool_t xdr_gfs3_readdirp_rsp (XDR *, gfs3_readdirp_rsp*);
-extern bool_t xdr_gfs3_readdirp_req (XDR *, gfs3_readdirp_req*);
-extern bool_t xdr_gf_setvolume_req (XDR *, gf_setvolume_req*);
-extern bool_t xdr_gf_setvolume_rsp (XDR *, gf_setvolume_rsp*);
-extern bool_t xdr_gfs3_access_req (XDR *, gfs3_access_req*);
-extern bool_t xdr_gfs3_create_req (XDR *, gfs3_create_req*);
-extern bool_t xdr_gfs3_create_rsp (XDR *, gfs3_create_rsp*);
-extern bool_t xdr_gfs3_ftruncate_req (XDR *, gfs3_ftruncate_req*);
-extern bool_t xdr_gfs3_ftruncate_rsp (XDR *, gfs3_ftruncate_rsp*);
-extern bool_t xdr_gfs3_fstat_req (XDR *, gfs3_fstat_req*);
-extern bool_t xdr_gfs3_fstat_rsp (XDR *, gfs3_fstat_rsp*);
-extern bool_t xdr_gfs3_entrylk_req (XDR *, gfs3_entrylk_req*);
-extern bool_t xdr_gfs3_fentrylk_req (XDR *, gfs3_fentrylk_req*);
-extern bool_t xdr_gfs3_checksum_req (XDR *, gfs3_checksum_req*);
-extern bool_t xdr_gfs3_checksum_rsp (XDR *, gfs3_checksum_rsp*);
-extern bool_t xdr_gfs3_setattr_req (XDR *, gfs3_setattr_req*);
-extern bool_t xdr_gfs3_setattr_rsp (XDR *, gfs3_setattr_rsp*);
-extern bool_t xdr_gfs3_fsetattr_req (XDR *, gfs3_fsetattr_req*);
-extern bool_t xdr_gfs3_fsetattr_rsp (XDR *, gfs3_fsetattr_rsp*);
-extern bool_t xdr_gfs3_rchecksum_req (XDR *, gfs3_rchecksum_req*);
-extern bool_t xdr_gfs3_rchecksum_rsp (XDR *, gfs3_rchecksum_rsp*);
-extern bool_t xdr_gf_getspec_req (XDR *, gf_getspec_req*);
-extern bool_t xdr_gf_getspec_rsp (XDR *, gf_getspec_rsp*);
-extern bool_t xdr_gf_log_req (XDR *, gf_log_req*);
-extern bool_t xdr_gf_notify_req (XDR *, gf_notify_req*);
-extern bool_t xdr_gf_notify_rsp (XDR *, gf_notify_rsp*);
-extern bool_t xdr_gfs3_releasedir_req (XDR *, gfs3_releasedir_req*);
-extern bool_t xdr_gfs3_release_req (XDR *, gfs3_release_req*);
-extern bool_t xdr_gf_common_rsp (XDR *, gf_common_rsp*);
-extern bool_t xdr_gf_dump_version_req (XDR *, gf_dump_version_req *);
-extern bool_t xdr_gf_dump_version_rsp (XDR *, gf_dump_version_rsp *);
-
-#else /* K&R C */
-extern bool_t xdr_gf_statfs ();
-extern bool_t xdr_gf_flock ();
-extern bool_t xdr_gf_iatt ();
-extern bool_t xdr_gfs3_stat_req ();
-extern bool_t xdr_gfs3_stat_rsp ();
-extern bool_t xdr_gfs3_readlink_req ();
-extern bool_t xdr_gfs3_readlink_rsp ();
-extern bool_t xdr_gfs3_mknod_req ();
-extern bool_t xdr_gfs3_mknod_rsp ();
-extern bool_t xdr_gfs3_mkdir_req ();
-extern bool_t xdr_gfs3_mkdir_rsp ();
-extern bool_t xdr_gfs3_unlink_req ();
-extern bool_t xdr_gfs3_unlink_rsp ();
-extern bool_t xdr_gfs3_rmdir_req ();
-extern bool_t xdr_gfs3_rmdir_rsp ();
-extern bool_t xdr_gfs3_symlink_req ();
-extern bool_t xdr_gfs3_symlink_rsp ();
-extern bool_t xdr_gfs3_rename_req ();
-extern bool_t xdr_gfs3_rename_rsp ();
-extern bool_t xdr_gfs3_link_req ();
-extern bool_t xdr_gfs3_link_rsp ();
-extern bool_t xdr_gfs3_truncate_req ();
-extern bool_t xdr_gfs3_truncate_rsp ();
-extern bool_t xdr_gfs3_open_req ();
-extern bool_t xdr_gfs3_open_rsp ();
-extern bool_t xdr_gfs3_read_req ();
-extern bool_t xdr_gfs3_read_rsp ();
-extern bool_t xdr_gfs3_lookup_req ();
-extern bool_t xdr_gfs3_lookup_rsp ();
-extern bool_t xdr_gfs3_write_req ();
-extern bool_t xdr_gfs3_write_rsp ();
-extern bool_t xdr_gfs3_statfs_req ();
-extern bool_t xdr_gfs3_statfs_rsp ();
-extern bool_t xdr_gfs3_lk_req ();
-extern bool_t xdr_gfs3_lk_rsp ();
-extern bool_t xdr_gfs3_inodelk_req ();
-extern bool_t xdr_gfs3_finodelk_req ();
-extern bool_t xdr_gfs3_flush_req ();
-extern bool_t xdr_gfs3_fsync_req ();
-extern bool_t xdr_gfs3_fsync_rsp ();
-extern bool_t xdr_gfs3_setxattr_req ();
-extern bool_t xdr_gfs3_fsetxattr_req ();
-extern bool_t xdr_gfs3_xattrop_req ();
-extern bool_t xdr_gfs3_xattrop_rsp ();
-extern bool_t xdr_gfs3_fxattrop_req ();
-extern bool_t xdr_gfs3_fxattrop_rsp ();
-extern bool_t xdr_gfs3_getxattr_req ();
-extern bool_t xdr_gfs3_getxattr_rsp ();
-extern bool_t xdr_gfs3_fgetxattr_req ();
-extern bool_t xdr_gfs3_fgetxattr_rsp ();
-extern bool_t xdr_gfs3_removexattr_req ();
-extern bool_t xdr_gfs3_opendir_req ();
-extern bool_t xdr_gfs3_opendir_rsp ();
-extern bool_t xdr_gfs3_fsyncdir_req ();
-extern bool_t xdr_gfs3_readdir_req ();
-extern bool_t xdr_gfs3_dirlist ();
-extern bool_t xdr_gfs3_readdir_rsp ();
-extern bool_t xdr_gfs3_dirplist ();
-extern bool_t xdr_gfs3_readdirp_rsp ();
-extern bool_t xdr_gfs3_readdirp_req ();
-extern bool_t xdr_gf_setvolume_req ();
-extern bool_t xdr_gf_setvolume_rsp ();
-extern bool_t xdr_gfs3_access_req ();
-extern bool_t xdr_gfs3_create_req ();
-extern bool_t xdr_gfs3_create_rsp ();
-extern bool_t xdr_gfs3_ftruncate_req ();
-extern bool_t xdr_gfs3_ftruncate_rsp ();
-extern bool_t xdr_gfs3_fstat_req ();
-extern bool_t xdr_gfs3_fstat_rsp ();
-extern bool_t xdr_gfs3_entrylk_req ();
-extern bool_t xdr_gfs3_fentrylk_req ();
-extern bool_t xdr_gfs3_checksum_req ();
-extern bool_t xdr_gfs3_checksum_rsp ();
-extern bool_t xdr_gfs3_setattr_req ();
-extern bool_t xdr_gfs3_setattr_rsp ();
-extern bool_t xdr_gfs3_fsetattr_req ();
-extern bool_t xdr_gfs3_fsetattr_rsp ();
-extern bool_t xdr_gfs3_rchecksum_req ();
-extern bool_t xdr_gfs3_rchecksum_rsp ();
-extern bool_t xdr_gfs3_releasedir_req ();
-extern bool_t xdr_gfs3_release_req ();
-extern bool_t xdr_gf_getspec_req ();
-extern bool_t xdr_gf_getspec_rsp ();
-extern bool_t xdr_gf_log_req ();
-extern bool_t xdr_gf_notify_req ();
-extern bool_t xdr_gf_notify_rsp ();
-extern bool_t xdr_gf_common_rsp ();
-extern bool_t xdr_gf_dump_version_req ();
-extern bool_t xdr_gf_dump_version_rsp ();
-
-#endif /* K&R C */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* !_GLUSTERFS3_H_RPCGEN */
diff --git a/xlators/protocol/lib/src/glusterfs3.x b/xlators/protocol/lib/src/glusterfs3.x
deleted file mode 100644
index c9510527c27..00000000000
--- a/xlators/protocol/lib/src/glusterfs3.x
+++ /dev/null
@@ -1,779 +0,0 @@
-#define GF_REQUEST_MAXGROUPS 16
-struct gf_statfs {
- unsigned hyper bsize;
- unsigned hyper frsize;
- unsigned hyper blocks;
- unsigned hyper bfree;
- unsigned hyper bavail;
- unsigned hyper files;
- unsigned hyper ffree;
- unsigned hyper favail;
- unsigned hyper fsid;
- unsigned hyper flag;
- unsigned hyper namemax;
-};
-
-struct gf_flock {
- unsigned int type;
- unsigned int whence;
- unsigned hyper start;
- unsigned hyper len;
- unsigned int pid;
-} ;
-
-
-struct gf_iatt {
- unsigned hyper ia_ino; /* inode number */
- unsigned hyper ia_gen; /* generation number */
- unsigned hyper ia_dev; /* backing device ID */
- unsigned int mode; /* mode (type + protection )*/
- unsigned int ia_nlink; /* Link count */
- unsigned int ia_uid; /* user ID of owner */
- unsigned int ia_gid; /* group ID of owner */
- unsigned hyper ia_rdev; /* device ID (if special file) */
- unsigned hyper ia_size; /* file size in bytes */
- unsigned int ia_blksize; /* blocksize for filesystem I/O */
- unsigned hyper ia_blocks; /* number of 512B blocks allocated */
- unsigned int ia_atime; /* last access time */
- unsigned int ia_atime_nsec;
- unsigned int ia_mtime; /* last modification time */
- unsigned int ia_mtime_nsec;
- unsigned int ia_ctime; /* last status change time */
- unsigned int ia_ctime_nsec;
-};
-
-struct gfs3_stat_req {
- unsigned hyper gfs_id;
- unsigned hyper ino;
- unsigned hyper gen;
- string path<>; /* NULL terminated */
-};
-struct gfs3_stat_rsp {
- unsigned hyper gfs_id;
- int op_ret;
- int op_errno;
- struct gf_iatt stat;
-} ;
-
-
-struct gfs3_readlink_req {
- unsigned hyper gfs_id;
- unsigned hyper ino;
- unsigned hyper gen;
- unsigned int size;
- string path<>; /* NULL terminated */
-} ;
- struct gfs3_readlink_rsp {
- unsigned hyper gfs_id;
- int op_ret;
- int op_errno;
- struct gf_iatt buf;
- string path<>; /* NULL terminated */
-} ;
-
-
- struct gfs3_mknod_req {
- unsigned hyper gfs_id;
- unsigned hyper par;
- unsigned hyper gen;
- unsigned hyper dev;
- unsigned int mode;
- string path<>; /* NULL terminated */
- string bname<>; /* NULL terminated */
-} ;
- struct gfs3_mknod_rsp {
- unsigned hyper gfs_id;
- int op_ret;
- int op_errno;
- struct gf_iatt stat;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
-};
-
-
- struct gfs3_mkdir_req {
- unsigned hyper gfs_id;
- unsigned hyper par;
- unsigned hyper gen;
- unsigned int mode;
- string path<>; /* NULL terminated */
- string bname<>; /* NULL terminated */
-} ;
- struct gfs3_mkdir_rsp {
- unsigned hyper gfs_id;
- int op_ret;
- int op_errno;
- struct gf_iatt stat;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
-} ;
-
-
- struct gfs3_unlink_req {
- unsigned hyper gfs_id;
- unsigned hyper par;
- unsigned hyper gen;
- string path<>; /* NULL terminated */
- string bname<>; /* NULL terminated */
-};
- struct gfs3_unlink_rsp {
- unsigned hyper gfs_id;
- int op_ret;
- int op_errno;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
-};
-
-
- struct gfs3_rmdir_req {
- unsigned hyper gfs_id;
- unsigned hyper par;
- unsigned hyper gen;
- string path<>;
- string bname<>; /* NULL terminated */
-};
- struct gfs3_rmdir_rsp {
- unsigned hyper gfs_id;
- int op_ret;
- int op_errno;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
-};
-
-
- struct gfs3_symlink_req {
- unsigned hyper gfs_id;
- unsigned hyper par;
- unsigned hyper gen;
- string path<>;
- string bname<>;
- string linkname<>;
-};
- struct gfs3_symlink_rsp {
- unsigned hyper gfs_id;
- int op_ret;
- int op_errno;
- struct gf_iatt stat;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
-};
-
-
- struct gfs3_rename_req {
- unsigned hyper gfs_id;
- unsigned hyper oldpar;
- unsigned hyper oldgen;
- unsigned hyper newpar;
- unsigned hyper newgen;
- string oldpath<>;
- string oldbname<>; /* NULL terminated */
- string newpath<>;
- string newbname<>; /* NULL terminated */
-};
- struct gfs3_rename_rsp {
- unsigned hyper gfs_id;
- int op_ret;
- int op_errno;
- struct gf_iatt stat;
- struct gf_iatt preoldparent;
- struct gf_iatt postoldparent;
- struct gf_iatt prenewparent;
- struct gf_iatt postnewparent;
-};
-
-
- struct gfs3_link_req {
- unsigned hyper gfs_id;
- unsigned hyper oldino;
- unsigned hyper oldgen;
- unsigned hyper newpar;
- unsigned hyper newgen;
- string oldpath<>;
- string newpath<>;
- string newbname<>;
-};
- struct gfs3_link_rsp {
- unsigned hyper gfs_id;
- int op_ret;
- int op_errno;
- struct gf_iatt stat;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
-};
-
- struct gfs3_truncate_req {
- unsigned hyper gfs_id;
- unsigned hyper ino;
- unsigned hyper gen;
- unsigned hyper offset;
- string path<>;
-};
- struct gfs3_truncate_rsp {
- unsigned hyper gfs_id;
- int op_ret;
- int op_errno;
- struct gf_iatt prestat;
- struct gf_iatt poststat;
-};
-
-
- struct gfs3_open_req {
- unsigned hyper gfs_id;
- unsigned hyper ino;
- unsigned hyper gen;
- unsigned int flags;
- unsigned int wbflags;
- string path<>;
-};
- struct gfs3_open_rsp {
- unsigned hyper gfs_id;
- int op_ret;
- int op_errno;
- hyper fd;
-};
-
-
- struct gfs3_read_req {
- unsigned hyper gfs_id;
- unsigned hyper ino;
- unsigned hyper gen;
- hyper fd;
- unsigned hyper offset;
- unsigned int size;
-};
- struct gfs3_read_rsp {
- unsigned hyper gfs_id;
- int op_ret;
- int op_errno;
- struct gf_iatt stat;
- string buf<>;
-} ;
-
-struct gfs3_lookup_req {
- unsigned hyper gfs_id;
- unsigned hyper ino; /* NOTE: used only in case of 'root' lookup */
- unsigned hyper par;
- unsigned hyper gen;
- unsigned int flags;
- string path<>;
- string bname<>;
- opaque dict<>;
-};
- struct gfs3_lookup_rsp {
- unsigned hyper gfs_id;
- int op_ret;
- int op_errno;
- struct gf_iatt stat;
- struct gf_iatt postparent;
- opaque dict<>;
-} ;
-
-
-
- struct gfs3_write_req {
- unsigned hyper gfs_id;
- unsigned hyper ino;
- unsigned hyper gen;
- hyper fd;
- unsigned hyper offset;
- unsigned int size;
-};
- struct gfs3_write_rsp {
- unsigned hyper gfs_id;
- int op_ret;
- int op_errno;
- struct gf_iatt prestat;
- struct gf_iatt poststat;
-} ;
-
-
- struct gfs3_statfs_req {
- unsigned hyper gfs_id;
- unsigned hyper ino;
- unsigned hyper gen;
- string path<>;
-} ;
- struct gfs3_statfs_rsp {
- unsigned hyper gfs_id;
- int op_ret;
- int op_errno;
- struct gf_statfs statfs;
-} ;
-
- struct gfs3_lk_req {
- unsigned hyper gfs_id;
- unsigned hyper ino;
- unsigned hyper gen;
- hyper fd;
- unsigned int cmd;
- unsigned int type;
- struct gf_flock flock;
-} ;
- struct gfs3_lk_rsp {
- unsigned hyper gfs_id;
- int op_ret;
- int op_errno;
- struct gf_flock flock;
-} ;
-
- struct gfs3_inodelk_req {
- unsigned hyper gfs_id;
- unsigned hyper ino;
- unsigned hyper gen;
- unsigned int cmd;
- unsigned int type;
- struct gf_flock flock;
- string path<>;
- string volume<>;
-} ;
-
-struct gfs3_finodelk_req {
- unsigned hyper gfs_id;
- unsigned hyper ino;
- unsigned hyper gen;
- hyper fd;
- unsigned int cmd;
- unsigned int type;
- struct gf_flock flock;
- string volume<>;
-} ;
-
-
- struct gfs3_flush_req {
- unsigned hyper gfs_id;
- unsigned hyper ino;
- unsigned hyper gen;
- hyper fd;
-} ;
-
-
- struct gfs3_fsync_req {
- unsigned hyper gfs_id;
- unsigned hyper ino;
- unsigned hyper gen;
- hyper fd;
- unsigned int data;
-} ;
- struct gfs3_fsync_rsp {
- unsigned hyper gfs_id;
- int op_ret;
- int op_errno;
- struct gf_iatt prestat;
- struct gf_iatt poststat;
-} ;
-
-
- struct gfs3_setxattr_req {
- unsigned hyper gfs_id;
- unsigned hyper ino;
- unsigned hyper gen;
- unsigned int flags;
- opaque dict<>;
- string path<>;
-} ;
-
-
-
- struct gfs3_fsetxattr_req {
- unsigned hyper gfs_id;
- unsigned hyper ino;
- unsigned hyper gen;
- hyper fd;
- unsigned int flags;
- opaque dict<>;
-} ;
-
-
-
- struct gfs3_xattrop_req {
- unsigned hyper gfs_id;
- unsigned hyper ino;
- unsigned hyper gen;
- unsigned int flags;
- opaque dict<>;
- string path<>;
-} ;
-
- struct gfs3_xattrop_rsp {
- unsigned hyper gfs_id;
- int op_ret;
- int op_errno;
- opaque dict<>;
-} ;
-
-
- struct gfs3_fxattrop_req {
- unsigned hyper gfs_id;
- unsigned hyper ino;
- unsigned hyper gen;
- hyper fd;
- unsigned int flags;
- opaque dict<>;
-} ;
-
- struct gfs3_fxattrop_rsp {
- unsigned hyper gfs_id;
- int op_ret;
- int op_errno;
- opaque dict<>;
-} ;
-
-
- struct gfs3_getxattr_req {
- unsigned hyper gfs_id;
- unsigned hyper ino;
- unsigned hyper gen;
- unsigned int namelen;
- string path<>;
- string name<>;
-} ;
- struct gfs3_getxattr_rsp {
- unsigned hyper gfs_id;
- int op_ret;
- int op_errno;
- opaque dict<>;
-} ;
-
-
- struct gfs3_fgetxattr_req {
- unsigned hyper gfs_id;
- unsigned hyper ino;
- unsigned hyper gen;
- hyper fd;
- unsigned int namelen;
- string name<>;
-} ;
- struct gfs3_fgetxattr_rsp {
- unsigned hyper gfs_id;
- int op_ret;
- int op_errno;
- opaque dict<>;
-} ;
-
-
- struct gfs3_removexattr_req {
- unsigned hyper gfs_id;
- unsigned hyper ino;
- unsigned hyper gen;
- string path<>;
- string name<>;
-} ;
-
-
-
- struct gfs3_opendir_req {
- unsigned hyper gfs_id;
- unsigned hyper ino;
- unsigned hyper gen;
- string path<>;
-} ;
- struct gfs3_opendir_rsp {
- unsigned hyper gfs_id;
- int op_ret;
- int op_errno;
- hyper fd;
-} ;
-
-
- struct gfs3_fsyncdir_req {
- unsigned hyper gfs_id;
- unsigned hyper ino;
- unsigned hyper gen;
- hyper fd;
- int data;
-} ;
-
- struct gfs3_readdir_req {
- unsigned hyper gfs_id;
- unsigned hyper ino;
- unsigned hyper gen;
- hyper fd;
- unsigned hyper offset;
- unsigned int size;
-};
-struct gfs3_dirlist {
- unsigned hyper d_ino;
- unsigned hyper d_off;
- unsigned int d_len;
- unsigned int d_type;
- char *name;
- struct gfs3_dirlist *nextentry;
-};
-
-struct gfs3_readdir_rsp {
- unsigned hyper gfs_id;
- int op_ret;
- int op_errno;
- struct gfs3_dirlist reply;
-};
-
-
-
-struct gfs3_dirplist {
- unsigned hyper d_ino;
- unsigned hyper d_off;
- unsigned int d_len;
- unsigned int d_type;
- char *name;
- struct gf_iatt name_attributes;
- struct gfs3_dirplist *nextentry;
-};
-
-struct gfs3_readdirp_rsp {
- unsigned hyper gfs_id;
- int op_ret;
- int op_errno;
- struct gfs3_dirlistp reply;
-};
-
-
- struct gfs3_readdirp_req {
- unsigned hyper gfs_id;
- unsigned hyper ino;
- unsigned hyper gen;
- hyper fd;
- unsigned hyper offset;
- unsigned int size;
-} ;
-
-
- struct gf_setvolume_req {
- unsigned hyper gfs_id;
- opaque dict<>;
-} ;
- struct gf_setvolume_rsp {
- unsigned hyper gfs_id;
- int op_ret;
- int op_errno;
- opaque dict<>;
-} ;
-
-struct gfs3_access_req {
- unsigned hyper gfs_id;
- unsigned hyper ino;
- unsigned hyper gen;
- unsigned int mask;
- string path<>;
-} ;
-
-
-struct gfs3_create_req {
- unsigned hyper gfs_id;
- unsigned hyper par;
- unsigned hyper gen;
- unsigned int flags;
- unsigned int mode;
- string path<>;
- string bname<>;
-} ;
-struct gfs3_create_rsp {
- unsigned hyper gfs_id;
- int op_ret;
- int op_errno;
- struct gf_iatt stat;
- unsigned hyper fd;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
-} ;
-
-
-
-struct gfs3_ftruncate_req {
- unsigned hyper gfs_id;
- unsigned hyper ino;
- unsigned hyper gen;
- hyper fd;
- unsigned hyper offset;
-} ;
-struct gfs3_ftruncate_rsp {
- unsigned hyper gfs_id;
- int op_ret;
- int op_errno;
- struct gf_iatt prestat;
- struct gf_iatt poststat;
-} ;
-
-
-struct gfs3_fstat_req {
- unsigned hyper gfs_id;
- unsigned hyper ino;
- unsigned hyper gen;
- hyper fd;
-} ;
- struct gfs3_fstat_rsp {
- unsigned hyper gfs_id;
- int op_ret;
- int op_errno;
- struct gf_iatt stat;
-} ;
-
-
-
- struct gfs3_entrylk_req {
- unsigned hyper gfs_id;
- unsigned hyper ino;
- unsigned hyper gen;
- unsigned int cmd;
- unsigned int type;
- unsigned hyper namelen;
- string path<>;
- string name<>;
- string volume<>;
-};
-
- struct gfs3_fentrylk_req {
- unsigned hyper gfs_id;
- unsigned hyper ino;
- unsigned hyper gen;
- hyper fd;
- unsigned int cmd;
- unsigned int type;
- unsigned hyper namelen;
- string name<>;
- string volume<>;
-};
-
-
-struct gfs3_checksum_req {
- unsigned hyper gfs_id;
- unsigned hyper ino;
- unsigned hyper gen;
- unsigned int flag;
- string path<>;
-};
- struct gfs3_checksum_rsp {
- unsigned hyper gfs_id;
- int op_ret;
- int op_errno;
- opaque fchecksum<>;
- opaque dchecksum<>;
-} ;
-
- struct gfs3_setattr_req {
- unsigned hyper gfs_id;
- unsigned hyper ino;
- unsigned hyper gen;
- struct gf_iatt stbuf;
- int valid;
- string path<>;
-} ;
- struct gfs3_setattr_rsp {
- unsigned hyper gfs_id;
- int op_ret;
- int op_errno;
- struct gf_iatt statpre;
- struct gf_iatt statpost;
-} ;
-
- struct gfs3_fsetattr_req {
- unsigned hyper gfs_id;
- hyper fd;
- struct gf_iatt stbuf;
- int valid;
-} ;
- struct gfs3_fsetattr_rsp {
- unsigned hyper gfs_id;
- int op_ret;
- int op_errno;
- struct gf_iatt statpre;
- struct gf_iatt statpost;
-} ;
-
- struct gfs3_rchecksum_req {
- unsigned hyper gfs_id;
- hyper fd;
- unsigned hyper offset;
- unsigned int len;
-} ;
- struct gfs3_rchecksum_rsp {
- unsigned hyper gfs_id;
- int op_ret;
- int op_errno;
- unsigned int weak_checksum;
- opaque strong_checksum<>;
-} ;
- struct gfs3_releasedir_req {
- unsigned hyper gfs_id;
- unsigned hyper ino;
- unsigned hyper gen;
- hyper fd;
-} ;
-
-struct gfs3_release_req {
- unsigned hyper gfs_id;
- unsigned hyper ino;
- unsigned hyper gen;
- hyper fd;
-} ;
-
-
- struct gf_getspec_req {
- unsigned hyper gfs_id;
- unsigned int flags;
- string key<>;
-} ;
- struct gf_getspec_rsp {
- unsigned hyper gfs_id;
- int op_ret;
- int op_errno;
- string spec<>;
-} ;
-
-
- struct gf_log_req {
- unsigned hyper gfs_id;
- opaque msg<>;
-};
-
- struct gf_notify_req {
- unsigned hyper gfs_id;
- unsigned int flags;
- string buf<>;
-} ;
- struct gf_notify_rsp {
- unsigned hyper gfs_id;
- int op_ret;
- int op_errno;
- unsigned int flags;
- string buf<>;
-} ;
-
-
-
-struct gf_common_rsp {
- unsigned hyper gfs_id;
- int op_ret;
- int op_errno;
-} ;
-
-
-struct gf_dump_version_req {
- unsigned hyper gfs_id;
- unsigned int flags;
- string key<>;
-};
-
-
-struct gf_dump_version_rsp {
- unsigned hyper gfs_id;
- int op_ret;
- int op_errno;
- unsigned int flags;
- opaque msg<>;
-};
-
-struct auth_glusterfs_parms {
- unsigned int pid;
- unsigned int uid;
- unsigned int gid;
-
- /* Number of groups being sent through the array above. */
- unsigned int ngrps;
-
- /* Array of groups to which the uid belongs apart from the primary group
- * in gid.
- */
- unsigned int groups[GF_REQUEST_MAXGROUPS];
-
- unsigned hyper lk_owner;
-};
diff --git a/xlators/protocol/lib/src/msg-xdr.c b/xlators/protocol/lib/src/msg-xdr.c
deleted file mode 100644
index 8cab3726c3d..00000000000
--- a/xlators/protocol/lib/src/msg-xdr.c
+++ /dev/null
@@ -1,1264 +0,0 @@
-/*
- Copyright (c) 2007-2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#include "msg-xdr.h"
-
-
-ssize_t
-xdr_serialize_generic (struct iovec outmsg, void *res, xdrproc_t proc)
-{
- ssize_t ret = -1;
- XDR xdr;
-
- if ((!outmsg.iov_base) || (!res) || (!proc))
- return -1;
-
- xdrmem_create (&xdr, outmsg.iov_base, (unsigned int)outmsg.iov_len,
- XDR_ENCODE);
-
- if (!proc (&xdr, res)) {
- ret = -1;
- goto ret;
- }
-
- ret = xdr_encoded_length (xdr);
-
-ret:
- return ret;
-}
-
-
-ssize_t
-xdr_to_generic (struct iovec inmsg, void *args, xdrproc_t proc)
-{
- XDR xdr;
- ssize_t ret = -1;
-
- if ((!inmsg.iov_base) || (!args) || (!proc))
- return -1;
-
- xdrmem_create (&xdr, inmsg.iov_base, (unsigned int)inmsg.iov_len,
- XDR_DECODE);
-
- if (!proc (&xdr, args)) {
- ret = -1;
- goto ret;
- }
-
- ret = xdr_decoded_length (xdr);
-ret:
- return ret;
-}
-
-
-ssize_t
-xdr_to_generic_payload (struct iovec inmsg, void *args, xdrproc_t proc,
- struct iovec *pendingpayload)
-{
- XDR xdr;
- ssize_t ret = -1;
-
- if ((!inmsg.iov_base) || (!args) || (!proc))
- return -1;
-
- xdrmem_create (&xdr, inmsg.iov_base, (unsigned int)inmsg.iov_len,
- XDR_DECODE);
-
- if (!proc (&xdr, args)) {
- ret = -1;
- goto ret;
- }
-
- ret = xdr_decoded_length (xdr);
-
- if (pendingpayload) {
- pendingpayload->iov_base = xdr_decoded_remaining_addr (xdr);
- pendingpayload->iov_len = xdr_decoded_remaining_len (xdr);
- }
-
-ret:
- return ret;
-}
-
-/* Encode */
-
-ssize_t
-xdr_serialize_getspec_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_serialize_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gf_getspec_rsp);
-
-}
-
-ssize_t
-xdr_serialize_lookup_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_serialize_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_lookup_rsp);
-
-}
-
-ssize_t
-xdr_serialize_common_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_serialize_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gf_common_rsp);
-
-}
-
-ssize_t
-xdr_serialize_setvolume_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_serialize_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gf_setvolume_rsp);
-
-}
-ssize_t
-xdr_serialize_statfs_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_serialize_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_statfs_rsp);
-
-}
-ssize_t
-xdr_serialize_stat_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_serialize_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_stat_rsp);
-
-}
-ssize_t
-xdr_serialize_fstat_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_serialize_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_fstat_rsp);
-
-}
-ssize_t
-xdr_serialize_open_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_serialize_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_open_rsp);
-
-}
-ssize_t
-xdr_serialize_read_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_serialize_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_read_rsp);
-
-}
-ssize_t
-xdr_serialize_write_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_serialize_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_write_rsp);
-
-}
-ssize_t
-xdr_serialize_rename_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_serialize_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_rename_rsp);
-
-}
-ssize_t
-xdr_serialize_fsync_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_serialize_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_fsync_rsp);
-
-}
-ssize_t
-xdr_serialize_rmdir_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_serialize_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_rmdir_rsp);
-}
-ssize_t
-xdr_serialize_unlink_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_serialize_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_unlink_rsp);
-}
-ssize_t
-xdr_serialize_writev_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_serialize_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_write_rsp);
-}
-ssize_t
-xdr_serialize_readv_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_serialize_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_read_rsp);
-}
-ssize_t
-xdr_serialize_readdir_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_serialize_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_readdir_rsp);
-}
-ssize_t
-xdr_serialize_readdirp_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_serialize_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_readdirp_rsp);
-}
-ssize_t
-xdr_serialize_rchecksum_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_serialize_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_rchecksum_rsp);
-}
-ssize_t
-xdr_serialize_setattr_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_serialize_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_setattr_rsp);
-}
-ssize_t
-xdr_serialize_fsetattr_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_serialize_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_fsetattr_rsp);
-}
-
-ssize_t
-xdr_serialize_readlink_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_serialize_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_readlink_rsp);
-
-}
-ssize_t
-xdr_serialize_symlink_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_serialize_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_symlink_rsp);
-
-}
-ssize_t
-xdr_serialize_create_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_serialize_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_create_rsp);
-
-}
-ssize_t
-xdr_serialize_link_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_serialize_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_link_rsp);
-
-}
-ssize_t
-xdr_serialize_mkdir_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_serialize_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_mkdir_rsp);
-
-}
-ssize_t
-xdr_serialize_mknod_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_serialize_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_mknod_rsp);
-
-}
-ssize_t
-xdr_serialize_getxattr_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_serialize_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_getxattr_rsp);
-
-}
-ssize_t
-xdr_serialize_fgetxattr_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_serialize_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_fgetxattr_rsp);
-
-}
-ssize_t
-xdr_serialize_xattrop_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_serialize_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_xattrop_rsp);
-
-}
-ssize_t
-xdr_serialize_fxattrop_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_serialize_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_fxattrop_rsp);
-}
-
-ssize_t
-xdr_serialize_truncate_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_serialize_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_truncate_rsp);
-}
-
-ssize_t
-xdr_serialize_lk_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_serialize_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_lk_rsp);
-}
-
-ssize_t
-xdr_serialize_opendir_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_serialize_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_opendir_rsp);
-}
-
-ssize_t
-xdr_serialize_checksum_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_serialize_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_checksum_rsp);
-}
-
-ssize_t
-xdr_serialize_ftruncate_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_serialize_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_ftruncate_rsp);
-}
-
-
-ssize_t
-xdr_serialize_dump_version_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_serialize_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gf_dump_version_rsp);
-}
-
-
-/* Decode */
-
-
-ssize_t
-xdr_to_dump_version_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gf_dump_version_req);
-}
-
-ssize_t
-xdr_to_lookup_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_lookup_req);
-}
-
-ssize_t
-xdr_to_getspec_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gf_getspec_req);
-
-}
-
-ssize_t
-xdr_to_setvolume_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gf_setvolume_req);
-
-}
-
-ssize_t
-xdr_to_statfs_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_statfs_req);
-
-}
-
-ssize_t
-xdr_to_fsync_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_fsync_req);
-
-}
-
-ssize_t
-xdr_to_flush_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_flush_req);
-
-}
-
-ssize_t
-xdr_to_xattrop_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_xattrop_req);
-
-}
-
-ssize_t
-xdr_to_fxattrop_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_fxattrop_req);
-
-}
-
-ssize_t
-xdr_to_getxattr_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_getxattr_req);
-
-}
-ssize_t
-xdr_to_fgetxattr_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_fgetxattr_req);
-
-}
-ssize_t
-xdr_to_open_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_open_req);
-
-}
-ssize_t
-xdr_to_create_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_create_req);
-
-}
-ssize_t
-xdr_to_symlink_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_symlink_req);
-}
-ssize_t
-xdr_to_link_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_link_req);
-}
-ssize_t
-xdr_to_readlink_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_readlink_req);
-}
-ssize_t
-xdr_to_rename_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_rename_req);
-}
-ssize_t
-xdr_to_mkdir_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_mkdir_req);
-}
-ssize_t
-xdr_to_mknod_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_mknod_req);
-}
-ssize_t
-xdr_to_readv_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_read_req);
-}
-ssize_t
-xdr_to_writev_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_write_req);
-}
-
-ssize_t
-xdr_to_readdir_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_readdir_req);
-}
-
-ssize_t
-xdr_to_opendir_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_opendir_req);
-}
-
-ssize_t
-xdr_to_rmdir_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_rmdir_req);
-}
-
-ssize_t
-xdr_to_fsetxattr_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_fsetxattr_req);
-}
-ssize_t
-xdr_to_setattr_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_setattr_req);
-}
-ssize_t
-xdr_to_fsetattr_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_fsetattr_req);
-}
-
-ssize_t
-xdr_to_finodelk_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_finodelk_req);
-}
-
-ssize_t
-xdr_to_inodelk_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_inodelk_req);
-}
-
-ssize_t
-xdr_to_ftruncate_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_ftruncate_req);
-}
-
-ssize_t
-xdr_to_fsyncdir_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_fsyncdir_req);
-}
-
-ssize_t
-xdr_to_fstat_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_fstat_req);
-}
-
-ssize_t
-xdr_to_checksum_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_checksum_req);
-}
-ssize_t
-xdr_to_rchecksum_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_rchecksum_req);
-}
-ssize_t
-xdr_to_removexattr_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_removexattr_req);
-}
-ssize_t
-xdr_to_setxattr_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_setxattr_req);
-}
-
-ssize_t
-xdr_to_fentrylk_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_fentrylk_req);
-}
-
-ssize_t
-xdr_to_entrylk_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_entrylk_req);
-}
-
-ssize_t
-xdr_to_lk_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_lk_req);
-}
-
-ssize_t
-xdr_to_stat_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_stat_req);
-}
-
-ssize_t
-xdr_to_release_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_release_req);
-}
-
-ssize_t
-xdr_to_readdirp_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_readdirp_req);
-}
-ssize_t
-xdr_to_truncate_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_truncate_req);
-}
-ssize_t
-xdr_to_access_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_access_req);
-}
-ssize_t
-xdr_to_unlink_req (struct iovec inmsg, void *args)
-{
- return xdr_to_generic (inmsg, (void *)args,
- (xdrproc_t)xdr_gfs3_unlink_req);
-}
-
-ssize_t
-xdr_from_lookup_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_lookup_req);
-
-}
-
-ssize_t
-xdr_from_stat_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_stat_req);
-
-}
-
-ssize_t
-xdr_from_fstat_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_fstat_req);
-
-}
-
-ssize_t
-xdr_from_mkdir_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_mkdir_req);
-
-}
-
-ssize_t
-xdr_from_mknod_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_mknod_req);
-
-}
-
-ssize_t
-xdr_from_symlink_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_symlink_req);
-
-}
-
-ssize_t
-xdr_from_readlink_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_readlink_req);
-
-}
-
-ssize_t
-xdr_from_rename_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_rename_req);
-
-}
-
-ssize_t
-xdr_from_link_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_link_req);
-
-}
-
-ssize_t
-xdr_from_create_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_create_req);
-
-}
-
-ssize_t
-xdr_from_open_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_open_req);
-
-}
-
-ssize_t
-xdr_from_opendir_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_opendir_req);
-
-}
-
-ssize_t
-xdr_from_readdir_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_readdir_req);
-
-}
-
-ssize_t
-xdr_from_readdirp_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_readdirp_req);
-
-}
-
-ssize_t
-xdr_from_fsyncdir_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_fsyncdir_req);
-
-}
-ssize_t
-xdr_from_releasedir_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_releasedir_req);
-
-}
-ssize_t
-xdr_from_release_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_release_req);
-
-}
-ssize_t
-xdr_from_lk_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_lk_req);
-
-}
-ssize_t
-xdr_from_entrylk_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_entrylk_req);
-
-}
-ssize_t
-xdr_from_fentrylk_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_fentrylk_req);
-
-}
-ssize_t
-xdr_from_inodelk_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_inodelk_req);
-
-}
-ssize_t
-xdr_from_finodelk_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_finodelk_req);
-
-}
-ssize_t
-xdr_from_setxattr_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_setxattr_req);
-
-}
-ssize_t
-xdr_from_fsetxattr_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_fsetxattr_req);
-
-}
-ssize_t
-xdr_from_getxattr_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_getxattr_req);
-
-}
-ssize_t
-xdr_from_fgetxattr_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_fgetxattr_req);
-
-}
-ssize_t
-xdr_from_removexattr_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_removexattr_req);
-
-}
-ssize_t
-xdr_from_xattrop_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_xattrop_req);
-
-}
-ssize_t
-xdr_from_fxattrop_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_fxattrop_req);
-
-}
-ssize_t
-xdr_from_access_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_access_req);
-
-}
-ssize_t
-xdr_from_setattr_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_setattr_req);
-
-}
-ssize_t
-xdr_from_truncate_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_truncate_req);
-
-}
-ssize_t
-xdr_from_ftruncate_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_ftruncate_req);
-
-}
-ssize_t
-xdr_from_fsetattr_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_fsetattr_req);
-
-}
-ssize_t
-xdr_from_readv_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_read_req);
-
-}
-ssize_t
-xdr_from_writev_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_write_req);
-
-}
-ssize_t
-xdr_from_fsync_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_fsync_req);
-
-}
-ssize_t
-xdr_from_flush_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_flush_req);
-
-}
-ssize_t
-xdr_from_statfs_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_statfs_req);
-
-}
-ssize_t
-xdr_from_checksum_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_checksum_req);
-
-}
-ssize_t
-xdr_from_rchecksum_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_rchecksum_req);
-
-}
-ssize_t
-xdr_from_getspec_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gf_getspec_req);
-
-}
-ssize_t
-xdr_from_setvolume_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gf_setvolume_req);
-
-}
-ssize_t
-xdr_from_dump_version_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gf_dump_version_req);
-
-}
-ssize_t
-xdr_from_rmdir_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_rmdir_req);
-
-}
-ssize_t
-xdr_from_unlink_req (struct iovec outmsg, void *req)
-{
- return xdr_serialize_generic (outmsg, (void *)req,
- (xdrproc_t)xdr_gfs3_unlink_req);
-
-}
-
-/* Client decode */
-
-ssize_t
-xdr_to_lookup_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_to_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_lookup_rsp);
-
-}
-
-ssize_t
-xdr_to_stat_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_to_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_stat_rsp);
-
-}
-
-ssize_t
-xdr_to_fstat_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_to_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_fstat_rsp);
-
-}
-
-ssize_t
-xdr_to_mkdir_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_to_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_mkdir_rsp);
-
-}
-
-ssize_t
-xdr_to_mknod_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_to_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_mknod_rsp);
-
-}
-
-ssize_t
-xdr_to_symlink_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_to_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_symlink_rsp);
-
-}
-
-ssize_t
-xdr_to_readlink_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_to_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_readlink_rsp);
-
-}
-
-ssize_t
-xdr_to_rename_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_to_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_rename_rsp);
-
-}
-
-ssize_t
-xdr_to_link_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_to_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_link_rsp);
-
-}
-
-ssize_t
-xdr_to_create_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_to_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_create_rsp);
-
-}
-
-ssize_t
-xdr_to_open_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_to_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_open_rsp);
-
-}
-
-ssize_t
-xdr_to_opendir_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_to_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_opendir_rsp);
-
-}
-
-ssize_t
-xdr_to_readdir_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_to_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_readdir_rsp);
-
-}
-
-ssize_t
-xdr_to_readdirp_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_to_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_readdirp_rsp);
-
-}
-ssize_t
-xdr_to_lk_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_to_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_lk_rsp);
-
-}
-ssize_t
-xdr_to_getxattr_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_to_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_getxattr_rsp);
-
-}
-ssize_t
-xdr_to_fgetxattr_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_to_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_fgetxattr_rsp);
-
-}
-ssize_t
-xdr_to_xattrop_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_to_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_xattrop_rsp);
-
-}
-ssize_t
-xdr_to_fxattrop_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_to_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_fxattrop_rsp);
-
-}
-ssize_t
-xdr_to_setattr_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_to_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_setattr_rsp);
-
-}
-ssize_t
-xdr_to_truncate_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_to_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_truncate_rsp);
-
-}
-ssize_t
-xdr_to_ftruncate_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_to_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_ftruncate_rsp);
-
-}
-ssize_t
-xdr_to_fsetattr_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_to_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_fsetattr_rsp);
-
-}
-ssize_t
-xdr_to_readv_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_to_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_read_rsp);
-
-}
-ssize_t
-xdr_to_writev_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_to_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_write_rsp);
-
-}
-ssize_t
-xdr_to_fsync_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_to_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_fsync_rsp);
-
-}
-ssize_t
-xdr_to_statfs_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_to_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_statfs_rsp);
-
-}
-ssize_t
-xdr_to_checksum_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_to_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_checksum_rsp);
-
-}
-ssize_t
-xdr_to_rchecksum_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_to_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_rchecksum_rsp);
-
-}
-ssize_t
-xdr_to_getspec_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_to_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gf_getspec_rsp);
-
-}
-ssize_t
-xdr_to_setvolume_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_to_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gf_setvolume_rsp);
-
-}
-ssize_t
-xdr_to_dump_version_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_to_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gf_dump_version_rsp);
-
-}
-ssize_t
-xdr_to_rmdir_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_to_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_rmdir_rsp);
-
-}
-ssize_t
-xdr_to_unlink_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_to_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gfs3_unlink_rsp);
-
-}
-ssize_t
-xdr_to_common_rsp (struct iovec outmsg, void *rsp)
-{
- return xdr_to_generic (outmsg, (void *)rsp,
- (xdrproc_t)xdr_gf_common_rsp);
-
-}
diff --git a/xlators/protocol/lib/src/msg-xdr.h b/xlators/protocol/lib/src/msg-xdr.h
deleted file mode 100644
index 872d9401e48..00000000000
--- a/xlators/protocol/lib/src/msg-xdr.h
+++ /dev/null
@@ -1,538 +0,0 @@
-/*
- Copyright (c) 2007-2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef _MSG_XDR_H
-#define _MSG_XDR_H
-
-#include <sys/uio.h>
-
-#include "glusterfs-xdr.h"
-
-#define xdr_decoded_remaining_addr(xdr) ((&xdr)->x_private)
-#define xdr_decoded_remaining_len(xdr) ((&xdr)->x_handy)
-#define xdr_encoded_length(xdr) (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base))
-#define xdr_decoded_length(xdr) (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base))
-
-
-/* FOPS */
-ssize_t
-xdr_serialize_lookup_rsp (struct iovec outmsg, void *resp);
-
-ssize_t
-xdr_serialize_getspec_rsp (struct iovec outmsg, void *rsp);
-
-ssize_t
-xdr_serialize_common_rsp (struct iovec outmsg, void *rsp);
-
-ssize_t
-xdr_serialize_setvolume_rsp (struct iovec outmsg, void *rsp);
-
-ssize_t
-xdr_serialize_open_rsp (struct iovec outmsg, void *rsp);
-
-ssize_t
-xdr_serialize_create_rsp (struct iovec outmsg, void *rsp);
-
-ssize_t
-xdr_serialize_mknod_rsp (struct iovec outmsg, void *rsp);
-
-ssize_t
-xdr_serialize_mkdir_rsp (struct iovec outmsg, void *rsp);
-
-ssize_t
-xdr_serialize_symlink_rsp (struct iovec outmsg, void *rsp);
-
-ssize_t
-xdr_serialize_link_rsp (struct iovec outmsg, void *rsp);
-
-ssize_t
-xdr_serialize_rename_rsp (struct iovec outmsg, void *rsp);
-
-ssize_t
-xdr_serialize_writev_rsp (struct iovec outmsg, void *rsp);
-
-ssize_t
-xdr_serialize_readv_rsp (struct iovec outmsg, void *rsp);
-
-ssize_t
-xdr_serialize_readdir_rsp (struct iovec outmsg, void *rsp);
-
-ssize_t
-xdr_serialize_readdirp_rsp (struct iovec outmsg, void *rsp);
-
-ssize_t
-xdr_serialize_opendir_rsp (struct iovec outmsg, void *rsp);
-
-ssize_t
-xdr_serialize_setattr_rsp (struct iovec outmsg, void *rsp);
-
-ssize_t
-xdr_serialize_fsetattr_rsp (struct iovec outmsg, void *rsp);
-
-ssize_t
-xdr_serialize_truncate_rsp (struct iovec outmsg, void *rsp);
-
-ssize_t
-xdr_serialize_ftruncate_rsp (struct iovec outmsg, void *rsp);
-
-ssize_t
-xdr_serialize_checksum_rsp (struct iovec outmsg, void *rsp);
-
-ssize_t
-xdr_serialize_statfs_rsp (struct iovec outmsg, void *rsp);
-
-
-ssize_t
-xdr_serialize_lk_rsp (struct iovec outmsg, void *rsp);
-
-ssize_t
-xdr_serialize_xattrop_rsp (struct iovec outmsg, void *rsp);
-
-ssize_t
-xdr_serialize_fxattrop_rsp (struct iovec outmsg, void *rsp);
-
-ssize_t
-xdr_serialize_getxattr_rsp (struct iovec outmsg, void *rsp);
-
-
-ssize_t
-xdr_serialize_fgetxattr_rsp (struct iovec outmsg, void *rsp);
-
-ssize_t
-xdr_serialize_unlink_rsp (struct iovec outmsg, void *rsp);
-
-ssize_t
-xdr_serialize_rmdir_rsp (struct iovec outmsg, void *rsp);
-
-ssize_t
-xdr_serialize_rchecksum_rsp (struct iovec outmsg, void *rsp);
-
-
-ssize_t
-xdr_serialize_fstat_rsp (struct iovec outmsg, void *rsp);
-
-ssize_t
-xdr_serialize_fsync_rsp (struct iovec outmsg, void *rsp);
-
-ssize_t
-xdr_serialize_readlink_rsp (struct iovec outmsg, void *rsp);
-
-ssize_t
-xdr_serialize_stat_rsp (struct iovec outmsg, void *rsp);
-
-
-ssize_t
-xdr_to_lookup_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_getspec_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_setvolume_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_statfs_req (struct iovec inmsg, void *args);
-
-
-ssize_t
-xdr_to_stat_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_getattr_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_fstat_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_setattr_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_fsetattr_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_readv_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_writev_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_fsetattr_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_readlink_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_create_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_open_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_release_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_xattrop_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_fxattrop_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_setxattr_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_fsetxattr_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_flush_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_unlink_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_fsync_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_ftruncate_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_truncate_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_getxattr_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_fgetxattr_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_removexattr_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_entrylk_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_fentrylk_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_inodelk_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_finodelk_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_lk_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_access_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_opendir_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_readdirp_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_readdir_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_fsyncdir_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_mknod_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_mkdir_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_symlink_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_rmdir_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_checksum_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_rchecksum_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_rename_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_link_req (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_from_lookup_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_getspec_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_stat_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_access_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_truncate_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_ftruncate_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_readlink_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_writev_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_readv_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_flush_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_fstat_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_fsync_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_open_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_unlink_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_rmdir_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_fsyncdir_req (struct iovec outmsg, void *args);
-
-
-ssize_t
-xdr_from_fsetxattr_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_setxattr_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_getxattr_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_fgetxattr_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_statfs_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_opendir_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_lk_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_inodelk_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_finodelk_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_entrylk_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_fentrylk_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_removexattr_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_xattrop_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_fxattrop_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_checksum_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_rchecksum_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_readdir_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_readdirp_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_setattr_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_fsetattr_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_symlink_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_rename_req (struct iovec outmsg, void *args);
-
-
-ssize_t
-xdr_from_link_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_rename_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_create_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_mkdir_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_mknod_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_releasedir_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_release_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_dump_version_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_from_setvolume_req (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_to_setvolume_rsp (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_dump_version_rsp (struct iovec inmsg, void *args);
-
-
-ssize_t
-xdr_serialize_dump_version_rsp (struct iovec outmsg, void *args);
-
-ssize_t
-xdr_to_dump_version_req (struct iovec inmsg, void *args);
-
-
-ssize_t
-xdr_to_statfs_rsp (struct iovec inmsg, void *args);
-
-
-ssize_t
-xdr_to_stat_rsp (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_fstat_rsp (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_rename_rsp (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_readlink_rsp (struct iovec inmsg, void *args);
-
-
-ssize_t
-xdr_to_link_rsp (struct iovec inmsg, void *args);
-
-
-ssize_t
-xdr_to_access_rsp (struct iovec inmsg, void *args);
-
-
-ssize_t
-xdr_to_truncate_rsp (struct iovec inmsg, void *args);
-
-
-ssize_t
-xdr_to_ftruncate_rsp (struct iovec inmsg, void *args);
-
-
-ssize_t
-xdr_to_unlink_rsp (struct iovec inmsg, void *args);
-
-
-ssize_t
-xdr_to_rmdir_rsp (struct iovec inmsg, void *args);
-
-
-ssize_t
-xdr_to_open_rsp (struct iovec inmsg, void *args);
-
-
-ssize_t
-xdr_to_create_rsp (struct iovec inmsg, void *args);
-
-
-ssize_t
-xdr_to_mkdir_rsp (struct iovec inmsg, void *args);
-
-
-ssize_t
-xdr_to_mknod_rsp (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_setattr_rsp (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_fsetattr_rsp (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_common_rsp (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_getxattr_rsp (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_fxattrop_rsp (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_xattrop_rsp (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_symlink_rsp (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_fgetxattr_rsp (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_checksum_rsp (struct iovec inmsg, void *args);
-ssize_t
-xdr_to_rchecksum_rsp (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_lk_rsp (struct iovec inmsg, void *args);
-ssize_t
-xdr_to_readdirp_rsp (struct iovec inmsg, void *args);
-
-ssize_t
-xdr_to_readdir_rsp (struct iovec inmsg, void *args);
-ssize_t
-xdr_to_opendir_rsp (struct iovec inmsg, void *args);
-ssize_t
-xdr_to_lookup_rsp (struct iovec inmsg, void *args);
-ssize_t
-xdr_to_readv_rsp (struct iovec inmsg, void *args);
-ssize_t
-xdr_to_getspec_rsp (struct iovec inmsg, void *args);
-
-#endif /* !_MSG_XDR_H */
diff --git a/xlators/protocol/lib/src/protocol-common.h b/xlators/protocol/lib/src/protocol-common.h
deleted file mode 100644
index 5378d90ba45..00000000000
--- a/xlators/protocol/lib/src/protocol-common.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- Copyright (c) 2007-2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _PROTOCOL_COMMON_H
-#define _PROTOCOL_COMMON_H
-
-enum gf_fop_procnum {
- GFS3_OP_NULL, /* 0 */
- GFS3_OP_STAT,
- GFS3_OP_READLINK,
- GFS3_OP_MKNOD,
- GFS3_OP_MKDIR,
- GFS3_OP_UNLINK,
- GFS3_OP_RMDIR,
- GFS3_OP_SYMLINK,
- GFS3_OP_RENAME,
- GFS3_OP_LINK,
- GFS3_OP_TRUNCATE,
- GFS3_OP_OPEN,
- GFS3_OP_READ,
- GFS3_OP_WRITE,
- GFS3_OP_STATFS,
- GFS3_OP_FLUSH,
- GFS3_OP_FSYNC,
- GFS3_OP_SETXATTR,
- GFS3_OP_GETXATTR,
- GFS3_OP_REMOVEXATTR,
- GFS3_OP_OPENDIR,
- GFS3_OP_FSYNCDIR,
- GFS3_OP_ACCESS,
- GFS3_OP_CREATE,
- GFS3_OP_FTRUNCATE,
- GFS3_OP_FSTAT,
- GFS3_OP_LK,
- GFS3_OP_LOOKUP,
- GFS3_OP_READDIR,
- GFS3_OP_INODELK,
- GFS3_OP_FINODELK,
- GFS3_OP_ENTRYLK,
- GFS3_OP_FENTRYLK,
- GFS3_OP_CHECKSUM,
- GFS3_OP_XATTROP,
- GFS3_OP_FXATTROP,
- GFS3_OP_FGETXATTR,
- GFS3_OP_FSETXATTR,
- GFS3_OP_RCHECKSUM,
- GFS3_OP_SETATTR,
- GFS3_OP_FSETATTR,
- GFS3_OP_READDIRP,
- GFS3_OP_RELEASE,
- GFS3_OP_RELEASEDIR,
- GFS3_OP_MAXVALUE,
-} ;
-
-enum gf_handshake_procnum {
- GF_HNDSK_NULL,
- GF_HNDSK_DUMP_VERSION,
- GF_HNDSK_SETVOLUME,
- GF_HNDSK_GETSPEC,
- GF_HNDSK_PING,
- GF_HNDSK_MAXVALUE,
-};
-
-enum gf_mgmt_procnum {
- GF1_MGMT_NULL, /* 0 */
-};
-
-
-#define GLUSTER3_1_FOP_PROGRAM 1298437 /* Completely random */
-#define GLUSTER3_1_FOP_VERSION 310 /* 3.1.0 */
-#define GLUSTER3_1_FOP_PROCCNT GFS3_OP_MAXVALUE
-
-#define GLUSTER1_MGMT_PROGRAM 1298433 /* Completely random */
-#define GLUSTER1_MGMT_VERSION 1 /* 0.0.1 */
-
-#define GLUSTER_HNDSK_PROGRAM 14398633 /* Completely random */
-#define GLUSTER_HNDSK_VERSION 1 /* 0.0.1 */
-
-#endif /* !_PROTOCOL_COMMON_H */
diff --git a/xlators/protocol/server/src/Makefile.am b/xlators/protocol/server/src/Makefile.am
index be304bdc768..bb46fda6f08 100644
--- a/xlators/protocol/server/src/Makefile.am
+++ b/xlators/protocol/server/src/Makefile.am
@@ -1,22 +1,29 @@
xlator_LTLIBRARIES = server.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/protocol
-server_la_LDFLAGS = -module -avoidversion
+server_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
server_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
$(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
- $(top_builddir)/xlators/protocol/lib/src/libgfproto1.la
+ $(top_builddir)/rpc/xdr/src/libgfxdr.la
server_la_SOURCES = server.c server-resolve.c server-helpers.c \
- server3_1-fops.c server-handshake.c
+ server-rpc-fops.c server-handshake.c authenticate.c server-common.c
-noinst_HEADERS = server.h server-helpers.h server-mem-types.h
+server_la_HEADERS = server.h server-helpers.h server-mem-types.h authenticate.h \
+ server-messages.h server-common.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles \
- -DCONFDIR=\"$(sysconfdir)/glusterfs\" -D$(GF_HOST_OS) \
- $(GF_CFLAGS) -I$(top_srcdir)/xlators/protocol/lib/src \
- -I$(top_srcdir)/rpc/rpc-lib/src/ \
- -I$(top_srcdir)/contrib/md5/
+server_ladir = $(includedir)/glusterfs/server
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
+ -I$(top_srcdir)/libglusterfs/src \
+ -DCONFDIR=\"$(sysconfdir)/glusterfs\" \
+ -DLIBDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/auth\" \
+ -I$(top_srcdir)/xlators/protocol/lib/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src \
+ -I$(top_srcdir)/rpc/xdr/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS) \
+ -DDATADIR=\"$(localstatedir)\"
CLEANFILES = *~
diff --git a/xlators/protocol/server/src/authenticate.c b/xlators/protocol/server/src/authenticate.c
new file mode 100644
index 00000000000..c0007766f85
--- /dev/null
+++ b/xlators/protocol/server/src/authenticate.c
@@ -0,0 +1,237 @@
+/*
+ Copyright (c) 2007-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <stdio.h>
+#include <dlfcn.h>
+#include <errno.h>
+#include "authenticate.h"
+#include "server-messages.h"
+
+static int
+init (dict_t *this, char *key, data_t *value, void *data)
+{
+ void *handle = NULL;
+ char *auth_file = NULL;
+ auth_handle_t *auth_handle = NULL;
+ auth_fn_t authenticate = NULL;
+ int *error = NULL;
+ int ret = 0;
+
+ /* It gets over written */
+ error = data;
+
+ if (!strncasecmp (key, "ip", strlen ("ip"))) {
+ gf_msg ("authenticate", GF_LOG_ERROR, 0,
+ PS_MSG_AUTHENTICATE_ERROR, "AUTHENTICATION MODULE "
+ "\"IP\" HAS BEEN REPLACED BY \"ADDR\"");
+ dict_set (this, key, data_from_dynptr (NULL, 0));
+ /* TODO: 1.3.x backword compatibility */
+ // *error = -1;
+ // return;
+ key = "addr";
+ }
+
+ ret = gf_asprintf (&auth_file, "%s/%s.so", LIBDIR, key);
+ if (-1 == ret) {
+ dict_set (this, key, data_from_dynptr (NULL, 0));
+ *error = -1;
+ return -1;
+ }
+
+ handle = dlopen (auth_file, RTLD_LAZY);
+ if (!handle) {
+ gf_msg ("authenticate", GF_LOG_ERROR, 0,
+ PS_MSG_AUTHENTICATE_ERROR, "dlopen(%s): %s\n",
+ auth_file, dlerror ());
+ dict_set (this, key, data_from_dynptr (NULL, 0));
+ GF_FREE (auth_file);
+ *error = -1;
+ return -1;
+ }
+ GF_FREE (auth_file);
+
+ authenticate = dlsym (handle, "gf_auth");
+ if (!authenticate) {
+ gf_msg ("authenticate", GF_LOG_ERROR, 0,
+ PS_MSG_AUTHENTICATE_ERROR, "dlsym(gf_auth) on %s\n",
+ dlerror ());
+ dict_set (this, key, data_from_dynptr (NULL, 0));
+ dlclose (handle);
+ *error = -1;
+ return -1;
+ }
+
+ auth_handle = GF_CALLOC (1, sizeof (*auth_handle),
+ gf_common_mt_auth_handle_t);
+ if (!auth_handle) {
+ dict_set (this, key, data_from_dynptr (NULL, 0));
+ *error = -1;
+ dlclose (handle);
+ return -1;
+ }
+ auth_handle->vol_opt = GF_CALLOC (1, sizeof (volume_opt_list_t),
+ gf_common_mt_volume_opt_list_t);
+ if (!auth_handle->vol_opt) {
+ dict_set (this, key, data_from_dynptr (NULL, 0));
+ *error = -1;
+ GF_FREE (auth_handle);
+ dlclose (handle);
+ return -1;
+ }
+ auth_handle->vol_opt->given_opt = dlsym (handle, "options");
+ if (auth_handle->vol_opt->given_opt == NULL) {
+ gf_msg_debug ("authenticate", 0, "volume option validation "
+ "not specified");
+ }
+
+ auth_handle->authenticate = authenticate;
+ auth_handle->handle = handle;
+
+ dict_set (this, key,
+ data_from_dynptr (auth_handle, sizeof (*auth_handle)));
+ return 0;
+}
+
+static int
+fini (dict_t *this, char *key, data_t *value, void *data)
+{
+ auth_handle_t *handle = data_to_ptr (value);
+ if (handle) {
+ dlclose (handle->handle);
+ }
+ return 0;
+}
+
+static int
+_gf_auth_option_validate (dict_t *d, char *k, data_t *v, void *tmp)
+{
+ auth_handle_t *handle = NULL;
+ xlator_t *xl = NULL;
+ int ret = 0;
+
+ xl = tmp;
+
+ handle = data_to_ptr (v);
+ if (!handle)
+ return 0;
+
+ list_add_tail (&(handle->vol_opt->list), &(xl->volume_options));
+
+ ret = xlator_options_validate_list (xl, xl->options,
+ handle->vol_opt, NULL);
+ if (ret) {
+ gf_msg ("authenticate", GF_LOG_ERROR, 0,
+ PS_MSG_VOL_VALIDATE_FAILED, "volume option validation "
+ "failed");
+ return -1;
+ }
+ return 0;
+}
+
+int32_t
+gf_auth_init (xlator_t *xl, dict_t *auth_modules)
+{
+ int ret = 0;
+
+ dict_foreach (auth_modules, init, &ret);
+ if (ret)
+ goto out;
+
+ ret = dict_foreach (auth_modules, _gf_auth_option_validate, xl);
+
+out:
+ if (ret) {
+ gf_msg (xl->name, GF_LOG_ERROR, 0, PS_MSG_AUTH_INIT_FAILED,
+ "authentication init failed");
+ dict_foreach (auth_modules, fini, &ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+typedef struct {
+ dict_t *iparams;
+ dict_t *cparams;
+ int64_t result;
+} gf_auth_args_t;
+
+static int
+gf_auth_one_method (dict_t *this, char *key, data_t *value, void *data)
+{
+ gf_auth_args_t *args = data;
+ auth_handle_t *handle = NULL;
+
+ if (!value) {
+ return 0;
+ }
+
+ handle = data_to_ptr (value);
+ if (!handle || !handle->authenticate) {
+ return 0;
+ }
+
+ switch (handle->authenticate (args->iparams, args->cparams)) {
+ case AUTH_ACCEPT:
+ if (args->result != AUTH_REJECT) {
+ args->result = AUTH_ACCEPT;
+ }
+ /* FALLTHROUGH */
+ default:
+ return 0;
+ case AUTH_REJECT:
+ args->result = AUTH_REJECT;
+ return -1;
+ }
+}
+
+auth_result_t
+gf_authenticate (dict_t *input_params,
+ dict_t *config_params,
+ dict_t *auth_modules)
+{
+ char *name = NULL;
+ data_t *peerinfo_data = NULL;
+ gf_auth_args_t args;
+
+ args.iparams = input_params;
+ args.cparams = config_params;
+ args.result = AUTH_DONT_CARE;
+
+ dict_foreach (auth_modules, gf_auth_one_method, &args);
+
+ if (AUTH_DONT_CARE == args.result) {
+ peerinfo_data = dict_get (input_params, "peer-info-name");
+
+ if (peerinfo_data) {
+ name = peerinfo_data->data;
+ }
+
+ gf_msg ("auth", GF_LOG_ERROR, 0, PS_MSG_REMOTE_CLIENT_REFUSED,
+ "no authentication module is interested in "
+ "accepting remote-client %s", name);
+ args.result = AUTH_REJECT;
+ }
+
+ return args.result;
+}
+
+void
+gf_auth_fini (dict_t *auth_modules)
+{
+ int32_t dummy;
+
+ dict_foreach (auth_modules, fini, &dummy);
+}
diff --git a/xlators/protocol/server/src/authenticate.h b/xlators/protocol/server/src/authenticate.h
new file mode 100644
index 00000000000..3f80231ee0a
--- /dev/null
+++ b/xlators/protocol/server/src/authenticate.h
@@ -0,0 +1,46 @@
+/*
+ Copyright (c) 2007-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _AUTHENTICATE_H
+#define _AUTHENTICATE_H
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <stdio.h>
+#include <fnmatch.h>
+#include "dict.h"
+#include "compat.h"
+#include "list.h"
+#include "xlator.h"
+
+typedef enum {
+ AUTH_ACCEPT,
+ AUTH_REJECT,
+ AUTH_DONT_CARE
+} auth_result_t;
+
+typedef auth_result_t (*auth_fn_t) (dict_t *input_params,
+ dict_t *config_params);
+
+typedef struct {
+ void *handle;
+ auth_fn_t authenticate;
+ volume_opt_list_t *vol_opt;
+} auth_handle_t;
+
+auth_result_t gf_authenticate (dict_t *input_params,
+ dict_t *config_params,
+ dict_t *auth_modules);
+int32_t gf_auth_init (xlator_t *xl, dict_t *auth_modules);
+void gf_auth_fini (dict_t *auth_modules);
+
+#endif /* _AUTHENTICATE_H */
diff --git a/xlators/protocol/server/src/server-common.c b/xlators/protocol/server/src/server-common.c
new file mode 100644
index 00000000000..fd6749a4df7
--- /dev/null
+++ b/xlators/protocol/server/src/server-common.c
@@ -0,0 +1,472 @@
+#include "server.h"
+#include "defaults.h"
+#include "rpc-common-xdr.h"
+#include "glusterfs3-xdr.h"
+#include "glusterfs3.h"
+#include "compat-errno.h"
+#include "server-messages.h"
+#include "defaults.h"
+#include "fd.h"
+#include "xdr-nfs3.h"
+
+void
+server_post_stat (gfs3_stat_rsp *rsp, struct iatt *stbuf)
+{
+ gf_stat_from_iatt (&rsp->stat, stbuf);
+}
+
+void
+server_post_readlink (gfs3_readlink_rsp *rsp, struct iatt *stbuf,
+ const char *buf)
+{
+ gf_stat_from_iatt (&rsp->buf, stbuf);
+ rsp->path = (char *)buf;
+
+ if (!rsp->path)
+ rsp->path = "";
+}
+
+void
+server_post_mknod (server_state_t *state, gfs3_mknod_rsp *rsp,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, inode_t *inode)
+{
+ inode_t *link_inode = NULL;
+
+ gf_stat_from_iatt (&rsp->stat, stbuf);
+ gf_stat_from_iatt (&rsp->preparent, preparent);
+ gf_stat_from_iatt (&rsp->postparent, postparent);
+
+ link_inode = inode_link (inode, state->loc.parent,
+ state->loc.name, stbuf);
+ inode_lookup (link_inode);
+ inode_unref (link_inode);
+}
+
+void
+server_post_mkdir (server_state_t *state, gfs3_mkdir_rsp *rsp,
+ inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ inode_t *link_inode = NULL;
+
+ gf_stat_from_iatt (&rsp->stat, stbuf);
+ gf_stat_from_iatt (&rsp->preparent, preparent);
+ gf_stat_from_iatt (&rsp->postparent, postparent);
+
+ link_inode = inode_link (inode, state->loc.parent,
+ state->loc.name, stbuf);
+ inode_lookup (link_inode);
+ inode_unref (link_inode);
+
+}
+
+void
+server_post_unlink (server_state_t *state, gfs3_unlink_rsp *rsp,
+ struct iatt *preparent, struct iatt *postparent)
+{
+ inode_unlink (state->loc.inode, state->loc.parent,
+ state->loc.name);
+
+ forget_inode_if_no_dentry (state->loc.inode);
+
+ gf_stat_from_iatt (&rsp->preparent, preparent);
+ gf_stat_from_iatt (&rsp->postparent, postparent);
+
+}
+
+void
+server_post_rmdir (server_state_t *state, gfs3_rmdir_rsp *rsp,
+ struct iatt *preparent, struct iatt *postparent)
+{
+ inode_unlink (state->loc.inode, state->loc.parent,
+ state->loc.name);
+ /* parent should not be found for directories after
+ * inode_unlink, since directories cannot have
+ * hardlinks.
+ */
+ forget_inode_if_no_dentry (state->loc.inode);
+
+ gf_stat_from_iatt (&rsp->preparent, preparent);
+ gf_stat_from_iatt (&rsp->postparent, postparent);
+}
+
+void
+server_post_symlink (server_state_t *state, gfs3_symlink_rsp *rsp,
+ inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ inode_t *link_inode = NULL;
+
+ gf_stat_from_iatt (&rsp->stat, stbuf);
+ gf_stat_from_iatt (&rsp->preparent, preparent);
+ gf_stat_from_iatt (&rsp->postparent, postparent);
+
+ link_inode = inode_link (inode, state->loc.parent,
+ state->loc.name, stbuf);
+ inode_lookup (link_inode);
+ inode_unref (link_inode);
+
+}
+
+void
+server_post_link (server_state_t *state, gfs3_link_rsp *rsp,
+ inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ inode_t *link_inode = NULL;
+
+ gf_stat_from_iatt (&rsp->stat, stbuf);
+ gf_stat_from_iatt (&rsp->preparent, preparent);
+ gf_stat_from_iatt (&rsp->postparent, postparent);
+
+ link_inode = inode_link (inode, state->loc2.parent,
+ state->loc2.name, stbuf);
+ inode_lookup (link_inode);
+ inode_unref (link_inode);
+
+}
+
+void
+server_post_truncate (gfs3_truncate_rsp *rsp, struct iatt *prebuf,
+ struct iatt *postbuf)
+{
+ gf_stat_from_iatt (&rsp->prestat, prebuf);
+ gf_stat_from_iatt (&rsp->poststat, postbuf);
+}
+
+void
+server_post_writev (gfs3_write_rsp *rsp, struct iatt *prebuf,
+ struct iatt *postbuf)
+{
+ gf_stat_from_iatt (&rsp->prestat, prebuf);
+ gf_stat_from_iatt (&rsp->poststat, postbuf);
+}
+
+void
+server_post_statfs (gfs3_statfs_rsp *rsp, struct statvfs *stbuf)
+{
+ gf_statfs_from_statfs (&rsp->statfs, stbuf);
+}
+
+void
+server_post_fsync (gfs3_fsync_rsp *rsp, struct iatt *prebuf,
+ struct iatt *postbuf)
+{
+ gf_stat_from_iatt (&rsp->prestat, prebuf);
+ gf_stat_from_iatt (&rsp->poststat, postbuf);
+}
+
+void
+server_post_ftruncate (gfs3_ftruncate_rsp *rsp, struct iatt *prebuf,
+ struct iatt *postbuf)
+{
+ gf_stat_from_iatt (&rsp->prestat, prebuf);
+ gf_stat_from_iatt (&rsp->poststat, postbuf);
+}
+
+void
+server_post_fstat (gfs3_fstat_rsp *rsp, struct iatt *stbuf)
+{
+ gf_stat_from_iatt (&rsp->stat, stbuf);
+}
+
+void
+server_post_lk (xlator_t *this, gfs3_lk_rsp *rsp, struct gf_flock *lock)
+{
+ switch (lock->l_type) {
+ case F_RDLCK:
+ lock->l_type = GF_LK_F_RDLCK;
+ break;
+ case F_WRLCK:
+ lock->l_type = GF_LK_F_WRLCK;
+ break;
+ case F_UNLCK:
+ lock->l_type = GF_LK_F_UNLCK;
+ break;
+ default:
+ gf_msg (this->name, GF_LOG_ERROR, 0, PS_MSG_LOCK_ERROR,
+ "Unknown lock type: %"PRId32"!", lock->l_type);
+ break;
+ }
+
+ gf_proto_flock_from_flock (&rsp->flock, lock);
+}
+
+int
+server_post_readdir (gfs3_readdir_rsp *rsp, gf_dirent_t *entries)
+{
+ int ret = 0;
+
+ ret = serialize_rsp_dirent (entries, rsp);
+
+ return ret;
+}
+void
+server_post_zerofill (gfs3_zerofill_rsp *rsp, struct iatt *statpre,
+ struct iatt *statpost)
+{
+ gf_stat_from_iatt (&rsp->statpre, statpre);
+ gf_stat_from_iatt (&rsp->statpost, statpost);
+}
+
+void
+server_post_discard (gfs3_discard_rsp *rsp, struct iatt *statpre,
+ struct iatt *statpost)
+{
+ gf_stat_from_iatt (&rsp->statpre, statpre);
+ gf_stat_from_iatt (&rsp->statpost, statpost);
+}
+
+void
+server_post_fallocate (gfs3_fallocate_rsp *rsp, struct iatt *statpre,
+ struct iatt *statpost)
+{
+ gf_stat_from_iatt (&rsp->statpre, statpre);
+ gf_stat_from_iatt (&rsp->statpost, statpost);
+}
+
+int
+server_post_readdirp (gfs3_readdirp_rsp *rsp, gf_dirent_t *entries)
+{
+ int ret = 0;
+
+ ret = serialize_rsp_direntp (entries, rsp);
+
+ return ret;
+}
+
+void
+server_post_fsetattr (gfs3_fsetattr_rsp *rsp, struct iatt *statpre,
+ struct iatt *statpost)
+{
+ gf_stat_from_iatt (&rsp->statpre, statpre);
+ gf_stat_from_iatt (&rsp->statpost, statpost);
+}
+
+void
+server_post_setattr (gfs3_setattr_rsp *rsp, struct iatt *statpre,
+ struct iatt *statpost)
+{
+ gf_stat_from_iatt (&rsp->statpre, statpre);
+ gf_stat_from_iatt (&rsp->statpost, statpost);
+}
+
+void
+server_post_rchecksum (gfs3_rchecksum_rsp *rsp, uint32_t weak_checksum,
+ uint8_t *strong_checksum)
+{
+ rsp->weak_checksum = weak_checksum;
+
+ rsp->strong_checksum.strong_checksum_val = (char *)strong_checksum;
+ rsp->strong_checksum.strong_checksum_len = MD5_DIGEST_LENGTH;
+
+}
+
+void
+server_post_rename (call_frame_t *frame,
+ server_state_t *state, gfs3_rename_rsp *rsp,
+ struct iatt *stbuf,
+ struct iatt *preoldparent,
+ struct iatt *postoldparent,
+ struct iatt *prenewparent,
+ struct iatt *postnewparent)
+{
+ inode_t *tmp_inode = NULL;
+ inode_t *tmp_parent = NULL;
+
+ stbuf->ia_type = state->loc.inode->ia_type;
+
+ /* TODO: log gfid of the inodes */
+ gf_msg_trace (frame->root->client->bound_xl->name, 0, "%"PRId64": "
+ "RENAME_CBK %s ==> %s", frame->root->unique,
+ state->loc.name, state->loc2.name);
+
+ /* Before renaming the inode, we have to get the inode for the
+ * destination entry (i.e. inode with state->loc2.parent as
+ * parent and state->loc2.name as name). If it exists, then
+ * unlink that inode, and send forget on that inode if the
+ * unlinked entry is the last entry. In case of fuse client
+ * the fuse kernel module itself sends the forget on the
+ * unlinked inode.
+ */
+ tmp_inode = inode_grep (state->loc.inode->table,
+ state->loc2.parent, state->loc2.name);
+ if (tmp_inode) {
+ inode_unlink (tmp_inode, state->loc2.parent,
+ state->loc2.name);
+ forget_inode_if_no_dentry (tmp_inode);
+ inode_unref (tmp_inode);
+ }
+
+ inode_rename (state->itable,
+ state->loc.parent, state->loc.name,
+ state->loc2.parent, state->loc2.name,
+ state->loc.inode, stbuf);
+ gf_stat_from_iatt (&rsp->stat, stbuf);
+
+ gf_stat_from_iatt (&rsp->preoldparent, preoldparent);
+ gf_stat_from_iatt (&rsp->postoldparent, postoldparent);
+
+ gf_stat_from_iatt (&rsp->prenewparent, prenewparent);
+ gf_stat_from_iatt (&rsp->postnewparent, postnewparent);
+
+}
+
+int
+server_post_open (call_frame_t *frame, xlator_t *this,
+ gfs3_open_rsp *rsp, fd_t *fd)
+{
+ server_ctx_t *serv_ctx = NULL;
+ uint64_t fd_no = 0;
+ int ret = 0;
+
+ serv_ctx = server_ctx_get (frame->root->client, this);
+ if (serv_ctx == NULL) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ PS_MSG_SERVER_CTX_GET_FAILED, "server_ctx_get() "
+ "failed");
+ return -1;
+ }
+
+ fd_bind (fd);
+ fd_no = gf_fd_unused_get (serv_ctx->fdtable, fd);
+ fd_ref (fd);
+ rsp->fd = fd_no;
+
+ return 0;
+}
+
+void
+server_post_readv (gfs3_read_rsp *rsp, struct iatt *stbuf, int op_ret)
+{
+ gf_stat_from_iatt (&rsp->stat, stbuf);
+ rsp->size = op_ret;
+}
+
+int
+server_post_opendir (call_frame_t *frame, xlator_t *this,
+ gfs3_opendir_rsp *rsp, fd_t *fd)
+{
+ server_ctx_t *serv_ctx = NULL;
+ uint64_t fd_no = 0;
+ int ret = 0;
+
+ serv_ctx = server_ctx_get (frame->root->client, this);
+ if (serv_ctx == NULL) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ PS_MSG_SERVER_CTX_GET_FAILED, "server_ctx_get() "
+ "failed");
+ return -1;
+ }
+
+ fd_bind (fd);
+ fd_no = gf_fd_unused_get (serv_ctx->fdtable, fd);
+ fd_ref (fd);
+ rsp->fd = fd_no;
+
+ return 0;
+}
+
+int
+server_post_create (call_frame_t *frame, gfs3_create_rsp *rsp,
+ server_state_t *state,
+ xlator_t *this, fd_t *fd, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent)
+{
+ server_ctx_t *serv_ctx = NULL;
+ inode_t *link_inode = NULL;
+ uint64_t fd_no = 0;
+ int op_errno = 0;
+
+ link_inode = inode_link (inode, state->loc.parent,
+ state->loc.name, stbuf);
+
+ if (!link_inode) {
+ op_errno = ENOENT;
+ goto out;
+ }
+
+ if (link_inode != inode) {
+ /*
+ VERY racy code (if used anywhere else)
+ -- don't do this without understanding
+ */
+
+ inode_ctx_merge (fd, fd->inode, link_inode);
+ inode_unref (fd->inode);
+ fd->inode = inode_ref (link_inode);
+ }
+
+ inode_lookup (link_inode);
+ inode_unref (link_inode);
+
+ serv_ctx = server_ctx_get (frame->root->client, this);
+ if (serv_ctx == NULL) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ PS_MSG_SERVER_CTX_GET_FAILED, "server_ctx_get() "
+ "failed");
+ goto out;
+ }
+
+ fd_bind (fd);
+ fd_no = gf_fd_unused_get (serv_ctx->fdtable, fd);
+ fd_ref (fd);
+
+ if ((fd_no > UINT64_MAX) || (fd == 0)) {
+ op_errno = errno;
+ }
+
+ rsp->fd = fd_no;
+ gf_stat_from_iatt (&rsp->stat, stbuf);
+ gf_stat_from_iatt (&rsp->preparent, preparent);
+ gf_stat_from_iatt (&rsp->postparent, postparent);
+
+ return 0;
+out:
+ return -op_errno;
+}
+
+/*TODO: Handle revalidate path */
+void
+server_post_lookup (gfs3_lookup_rsp *rsp, call_frame_t *frame,
+ server_state_t *state,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *postparent)
+{
+ inode_t *root_inode = NULL;
+ inode_t *link_inode = NULL;
+ uuid_t rootgfid = {0,};
+
+ root_inode = frame->root->client->bound_xl->itable->root;
+
+ if (inode == root_inode) {
+ /* we just looked up root ("/") */
+ stbuf->ia_ino = 1;
+ rootgfid[15] = 1;
+ gf_uuid_copy (stbuf->ia_gfid, rootgfid);
+ if (inode->ia_type == 0)
+ inode->ia_type = stbuf->ia_type;
+ }
+
+ gf_stat_from_iatt (&rsp->stat, stbuf);
+
+ if (!__is_root_gfid (inode->gfid)) {
+ link_inode = inode_link (inode, state->loc.parent,
+ state->loc.name, stbuf);
+ if (link_inode) {
+ inode_lookup (link_inode);
+ inode_unref (link_inode);
+ }
+ }
+}
+
+void
+server_post_lease (gfs3_lease_rsp *rsp, struct gf_lease *lease)
+{
+ gf_proto_lease_from_lease (&rsp->lease, lease);
+}
diff --git a/xlators/protocol/server/src/server-common.h b/xlators/protocol/server/src/server-common.h
new file mode 100644
index 00000000000..afd9fb81269
--- /dev/null
+++ b/xlators/protocol/server/src/server-common.h
@@ -0,0 +1,132 @@
+#include "server.h"
+#include "defaults.h"
+#include "rpc-common-xdr.h"
+#include "glusterfs3-xdr.h"
+#include "glusterfs3.h"
+#include "compat-errno.h"
+#include "server-messages.h"
+#include "defaults.h"
+
+#include "xdr-nfs3.h"
+void
+server_post_stat (gfs3_stat_rsp *rsp, struct iatt *stbuf);
+
+void
+server_post_readlink (gfs3_readlink_rsp *rsp, struct iatt *stbuf,
+ const char *buf);
+
+void
+server_post_mknod (server_state_t *state, gfs3_mknod_rsp *rsp,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, inode_t *inode);
+void
+server_post_mkdir (server_state_t *state, gfs3_mkdir_rsp *rsp,
+ inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+void
+server_post_unlink (server_state_t *state, gfs3_unlink_rsp *rsp,
+ struct iatt *preparent, struct iatt *postparent);
+void
+server_post_rmdir (server_state_t *state, gfs3_rmdir_rsp *rsp,
+ struct iatt *preparent, struct iatt *postparent);
+
+void
+server_post_symlink (server_state_t *state, gfs3_symlink_rsp *rsp,
+ inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+void
+server_post_link (server_state_t *state, gfs3_link_rsp *rsp,
+ inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+void
+server_post_truncate (gfs3_truncate_rsp *rsp, struct iatt *prebuf,
+ struct iatt *postbuf);
+
+void
+server_post_writev (gfs3_write_rsp *rsp, struct iatt *prebuf,
+ struct iatt *postbuf);
+void
+server_post_statfs (gfs3_statfs_rsp *rsp, struct statvfs *stbuf);
+
+void
+server_post_fsync (gfs3_fsync_rsp *rsp, struct iatt *prebuf,
+ struct iatt *postbuf);
+
+void
+server_post_ftruncate (gfs3_ftruncate_rsp *rsp, struct iatt *prebuf,
+ struct iatt *postbuf);
+
+void
+server_post_fstat (gfs3_fstat_rsp *rsp, struct iatt *stbuf);
+
+void
+server_post_lk (xlator_t *this, gfs3_lk_rsp *rsp, struct gf_flock *lock);
+
+int
+server_post_readdir (gfs3_readdir_rsp *rsp, gf_dirent_t *entries);
+
+void
+server_post_zerofill (gfs3_zerofill_rsp *rsp, struct iatt *statpre,
+ struct iatt *statpost);
+
+void
+server_post_discard (gfs3_discard_rsp *rsp, struct iatt *statpre,
+ struct iatt *statpost);
+
+void
+server_post_fallocate (gfs3_fallocate_rsp *rsp, struct iatt *statpre,
+ struct iatt *statpost);
+
+int
+server_post_readdirp (gfs3_readdirp_rsp *rsp, gf_dirent_t *entries);
+
+void
+server_post_fsetattr (gfs3_fsetattr_rsp *rsp, struct iatt *statpre,
+ struct iatt *statpost);
+
+void
+server_post_setattr (gfs3_setattr_rsp *rsp, struct iatt *statpre,
+ struct iatt *statpost);
+
+void
+server_post_rchecksum (gfs3_rchecksum_rsp *rsp, uint32_t weak_checksum,
+ uint8_t *strong_checksum);
+
+void
+server_post_rename (call_frame_t *frame, server_state_t *state,
+ gfs3_rename_rsp *rsp,
+ struct iatt *stbuf,
+ struct iatt *preoldparent,
+ struct iatt *postoldparent,
+ struct iatt *prenewparent,
+ struct iatt *postnewparent);
+
+int
+server_post_open (call_frame_t *frame, xlator_t *this,
+ gfs3_open_rsp *rsp, fd_t *fd);
+void
+server_post_readv (gfs3_read_rsp *rsp, struct iatt *stbuf, int op_ret);
+
+int
+server_post_opendir (call_frame_t *frame, xlator_t *this,
+ gfs3_opendir_rsp *rsp, fd_t *fd);
+
+int
+server_post_create (call_frame_t *frame, gfs3_create_rsp *rsp,
+ server_state_t *state,
+ xlator_t *this, fd_t *fd, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent);
+
+void
+server_post_lookup (gfs3_lookup_rsp *rsp, call_frame_t *frame,
+ server_state_t *state,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *postparent);
+
+void
+server_post_lease (gfs3_lease_rsp *rsp, struct gf_lease *lease);
diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c
index 94586d20c53..771595228eb 100644
--- a/xlators/protocol/server/src/server-handshake.c
+++ b/xlators/protocol/server/src/server-handshake.c
@@ -1,34 +1,23 @@
/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2010-2013 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "server.h"
#include "server-helpers.h"
-#include "glusterfs-xdr.h"
+#include "rpc-common-xdr.h"
+#include "glusterfs3-xdr.h"
#include "compat-errno.h"
-#include "msg-xdr.h"
+#include "glusterfs3.h"
#include "authenticate.h"
+#include "server-messages.h"
+#include "syscall.h"
struct __get_xl_struct {
const char *name;
@@ -40,7 +29,7 @@ gf_compare_client_version (rpcsvc_request_t *req, int fop_prognum,
{
int ret = -1;
/* TODO: think.. */
- if (glusterfs3_1_fop_prog.prognum == fop_prognum)
+ if (glusterfs3_3_fop_prog.prognum == fop_prognum)
ret = 0;
return ret;
@@ -91,7 +80,8 @@ _volfile_update_checksum (xlator_t *this, char *key, uint32_t checksum)
if (!temp_volfile) {
temp_volfile = GF_CALLOC (1, sizeof (struct _volfile_ctx),
gf_server_mt_volfile_ctx_t);
-
+ if (!temp_volfile)
+ goto out;
temp_volfile->next = conf->volfile;
temp_volfile->key = (key)? gf_strdup (key): NULL;
temp_volfile->checksum = checksum;
@@ -101,26 +91,26 @@ _volfile_update_checksum (xlator_t *this, char *key, uint32_t checksum)
}
if (temp_volfile->checksum != checksum) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "the volume file got modified between earlier access "
- "and now, this may lead to inconsistency between "
- "clients, advised to remount client");
+ gf_msg (this->name, GF_LOG_INFO, 0, PS_MSG_REMOUNT_CLIENT_REQD,
+ "the volume file was modified between a prior access "
+ "and now. This may lead to inconsistency between "
+ "clients, you are advised to remount client");
temp_volfile->checksum = checksum;
}
- out:
+out:
return 0;
}
-size_t
-build_volfile_path (xlator_t *this, const char *key, char *path,
- size_t path_len)
+static size_t
+getspec_build_volfile_path (xlator_t *this, const char *key, char *path,
+ size_t path_len)
{
- int ret = -1;
+ char *filename = NULL;
+ server_conf_t *conf = NULL;
+ int ret = -1;
int free_filename = 0;
- char *filename = NULL;
- server_conf_t *conf = NULL;
char data_key[256] = {0,};
conf = this->private;
@@ -129,7 +119,7 @@ build_volfile_path (xlator_t *this, const char *key, char *path,
ret = dict_get_str (this->options, "client-volume-filename",
&filename);
if (ret == 0) {
- gf_log (this->name, GF_LOG_WARNING,
+ gf_msg (this->name, GF_LOG_WARNING, 0, PS_MSG_DEFAULTING_FILE,
"option 'client-volume-filename' is changed to "
"'volume-filename.<key>' which now takes 'key' as an "
"option to choose/fetch different files from server. "
@@ -144,30 +134,34 @@ build_volfile_path (xlator_t *this, const char *key, char *path,
if (ret < 0) {
/* Make sure that key doesn't contain "../" in path */
if ((gf_strstr (key, "/", "..")) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: invalid key", key);
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PS_MSG_INVALID_ENTRY, "%s: invalid "
+ "key", key);
goto out;
}
}
-
- ret = gf_asprintf (&filename, "%s/%s.vol", conf->conf_dir, key);
- if (-1 == ret)
- goto out;
-
- free_filename = 1;
}
if (!filename) {
ret = dict_get_str (this->options,
"volume-filename.default", &filename);
if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no default volume filename given, "
- "defaulting to %s", DEFAULT_VOLUME_FILE_PATH);
- filename = DEFAULT_VOLUME_FILE_PATH;
+ gf_msg_debug (this->name, 0, "no default volume "
+ "filename given, defaulting to %s",
+ DEFAULT_VOLUME_FILE_PATH);
}
}
+ if (!filename && key) {
+ ret = gf_asprintf (&filename, "%s/%s.vol", conf->conf_dir, key);
+ if (-1 == ret)
+ goto out;
+
+ free_filename = 1;
+ }
+ if (!filename)
+ filename = DEFAULT_VOLUME_FILE_PATH;
+
ret = -1;
if ((filename) && (path_len > strlen (filename))) {
@@ -186,7 +180,7 @@ int
_validate_volfile_checksum (xlator_t *this, char *key,
uint32_t checksum)
{
- char filename[ZR_PATH_MAX] = {0,};
+ char filename[PATH_MAX] = {0,};
server_conf_t *conf = NULL;
struct _volfile_ctx *temp_volfile = NULL;
int ret = 0;
@@ -200,21 +194,22 @@ _validate_volfile_checksum (xlator_t *this, char *key,
goto out;
if (!temp_volfile) {
- ret = build_volfile_path (this, key, filename,
- sizeof (filename));
+ ret = getspec_build_volfile_path (this, key, filename,
+ sizeof (filename));
if (ret <= 0)
goto out;
fd = open (filename, O_RDONLY);
if (-1 == fd) {
ret = 0;
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_msg (this->name, GF_LOG_INFO, errno,
+ PS_MSG_VOL_FILE_OPEN_FAILED,
"failed to open volume file (%s) : %s",
filename, strerror (errno));
goto out;
}
get_checksum_for_file (fd, &local_checksum);
_volfile_update_checksum (this, key, local_checksum);
- close (fd);
+ sys_close (fd);
}
temp_volfile = conf->volfile;
@@ -241,106 +236,63 @@ out:
return ret;
}
-int
-build_program_list (server_conf_t *conf, char *list)
-{
- /* Reply in "Name:Program-Number:Program-Version,..." format */
- sprintf (list, "%s:%d:%d",
- glusterfs3_1_fop_prog.progname,
- glusterfs3_1_fop_prog.prognum,
- glusterfs3_1_fop_prog.progver);
- /* TODO: keep adding new versions to the list here */
- return 0;
-}
-
-int
-server_dump_version (rpcsvc_request_t *req)
-{
- char list[8192] = {0,};
- server_conf_t *conf = NULL;
- int ret = -1;
- int op_errno = EINVAL;
- gf_dump_version_req args = {0,};
- gf_dump_version_rsp rsp = {0,};
-
- conf = ((xlator_t *)req->conn->svc->mydata)->private;
-
- if (xdr_to_glusterfs_req (req, &args, xdr_to_dump_version_req)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto fail;
- }
-
- build_program_list (conf, list);
- rsp.msg.msg_val = list;
- rsp.msg.msg_len = strlen (list) + 1;
- ret = 0;
-fail:
- rsp.op_errno = gf_errno_to_error (op_errno);
- rsp.op_ret = ret;
-
- server_submit_reply (NULL, req, &rsp, NULL, 0, NULL,
- (gfs_serialize_t)xdr_serialize_dump_version_rsp);
-
- if (args.key)
- free (args.key);
-
- return 0;
-}
int
server_getspec (rpcsvc_request_t *req)
{
- int32_t ret = -1;
- int32_t op_errno = ENOENT;
- int32_t spec_fd = -1;
- size_t file_len = 0;
- char filename[ZR_PATH_MAX] = {0,};
- struct stat stbuf = {0,};
- uint32_t checksum = 0;
- char *key = NULL;
- server_conf_t *conf = NULL;
-
- gf_getspec_req args = {0,};
- gf_getspec_rsp rsp = {0,};
- server_connection_t *conn = NULL;
-
- conn = req->conn->trans->private;
- conf = conn->this->private;
-
- if (xdr_to_glusterfs_req (req, &args, xdr_to_getspec_req)) {
+ int32_t ret = -1;
+ int32_t op_errno = ENOENT;
+ int32_t spec_fd = -1;
+ size_t file_len = 0;
+ char filename[PATH_MAX] = {0,};
+ struct stat stbuf = {0,};
+ uint32_t checksum = 0;
+ char *key = NULL;
+ server_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ gf_getspec_req args = {0,};
+ gf_getspec_rsp rsp = {0,};
+
+ this = req->svc->xl;
+ conf = this->private;
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gf_getspec_req);
+ if (ret < 0) {
//failed to decode msg;
req->rpc_err = GARBAGE_ARGS;
+ op_errno = EINVAL;
goto fail;
}
- ret = build_volfile_path (conn->this, args.key,
- filename, sizeof (filename));
+ ret = getspec_build_volfile_path (this, args.key,
+ filename, sizeof (filename));
if (ret > 0) {
/* to allocate the proper buffer to hold the file data */
- ret = stat (filename, &stbuf);
+ ret = sys_stat (filename, &stbuf);
if (ret < 0){
- gf_log (conn->this->name, GF_LOG_ERROR,
- "Unable to stat %s (%s)",
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ PS_MSG_STAT_ERROR, "Unable to stat %s (%s)",
filename, strerror (errno));
+ op_errno = errno;
goto fail;
}
spec_fd = open (filename, O_RDONLY);
if (spec_fd < 0) {
- gf_log (conn->this->name, GF_LOG_ERROR,
- "Unable to open %s (%s)",
- filename, strerror (errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ PS_MSG_FILE_OP_FAILED, "Unable to open %s "
+ "(%s)", filename, strerror (errno));
+ op_errno = errno;
goto fail;
}
ret = file_len = stbuf.st_size;
if (conf->verify_volfile) {
get_checksum_for_file (spec_fd, &checksum);
- _volfile_update_checksum (conn->this, key, checksum);
+ _volfile_update_checksum (this, key, checksum);
}
} else {
- errno = ENOENT;
+ op_errno = ENOENT;
}
if (file_len) {
@@ -351,37 +303,120 @@ server_getspec (rpcsvc_request_t *req)
op_errno = ENOMEM;
goto fail;
}
- ret = read (spec_fd, rsp.spec, file_len);
-
- close (spec_fd);
+ ret = sys_read (spec_fd, rsp.spec, file_len);
}
/* convert to XDR */
-fail:
op_errno = errno;
+fail:
+ if (!rsp.spec)
+ rsp.spec = "";
rsp.op_errno = gf_errno_to_error (op_errno);
rsp.op_ret = ret;
+ if (spec_fd != -1)
+ sys_close (spec_fd);
+
server_submit_reply (NULL, req, &rsp, NULL, 0, NULL,
- (gfs_serialize_t)xdr_serialize_getspec_rsp);
+ (xdrproc_t)xdr_gf_getspec_rsp);
return 0;
}
+void
+server_first_lookup_done (rpcsvc_request_t *req, gf_setvolume_rsp *rsp) {
+
+ server_submit_reply (NULL, req, rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_setvolume_rsp);
+
+ GF_FREE (rsp->dict.dict_val);
+ GF_FREE (rsp);
+}
+
+
+int
+server_first_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ rpcsvc_request_t *req = NULL;
+ gf_setvolume_rsp *rsp = NULL;
+
+ req = cookie;
+ rsp = frame->local;
+ frame->local = NULL;
+
+ if (op_ret < 0 || buf == NULL)
+ gf_log (this->name, GF_LOG_WARNING, "server first lookup failed"
+ " on root inode: %s", strerror (op_errno));
+
+ /* Ignore error from lookup, don't set
+ * failure in rsp->op_ret. lookup on a snapview-server
+ * can fail with ESTALE
+ */
+ server_first_lookup_done (req, rsp);
+
+ STACK_DESTROY (frame->root);
+
+ return 0;
+}
+
+int
+server_first_lookup (xlator_t *this, xlator_t *xl, rpcsvc_request_t *req,
+ gf_setvolume_rsp *rsp)
+{
+ call_frame_t *frame = NULL;
+ loc_t loc = {0, };
+
+ loc.path = "/";
+ loc.name = "";
+ loc.inode = xl->itable->root;
+ loc.parent = NULL;
+ gf_uuid_copy (loc.gfid, loc.inode->gfid);
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame) {
+ gf_log ("fuse", GF_LOG_ERROR, "failed to create frame");
+ goto err;
+ }
+
+ frame->local = (void *)rsp;
+ frame->root->uid = frame->root->gid = 0;
+ frame->root->pid = -1;
+ frame->root->type = GF_OP_TYPE_FOP;
+
+ STACK_WIND_COOKIE (frame, server_first_lookup_cbk, (void *)req, xl,
+ xl->fops->lookup, &loc, NULL);
+
+ return 0;
+
+err:
+ rsp->op_ret = -1;
+ rsp->op_errno = ENOMEM;
+ server_first_lookup_done (req, rsp);
+
+ frame->local = NULL;
+ STACK_DESTROY (frame->root);
+
+ return -1;
+}
int
server_setvolume (rpcsvc_request_t *req)
{
- gf_setvolume_req args = {0,};
- gf_setvolume_rsp rsp = {0,};
- server_connection_t *conn = NULL;
+ gf_setvolume_req args = {{0,},};
+ gf_setvolume_rsp *rsp = NULL;
+ client_t *client = NULL;
+ server_ctx_t *serv_ctx = NULL;
server_conf_t *conf = NULL;
peer_info_t *peerinfo = NULL;
dict_t *reply = NULL;
dict_t *config_params = NULL;
dict_t *params = NULL;
char *name = NULL;
- char *process_uuid = NULL;
+ char *client_uid = NULL;
+ char *clnt_version = NULL;
xlator_t *xl = NULL;
char *msg = NULL;
char *volfile_key = NULL;
@@ -392,61 +427,146 @@ server_setvolume (rpcsvc_request_t *req)
int32_t op_errno = EINVAL;
int32_t fop_version = 0;
int32_t mgmt_version = 0;
+ uint32_t lk_version = 0;
+ char *buf = NULL;
+ gf_boolean_t cancelled = _gf_false;
params = dict_new ();
reply = dict_new ();
- if (xdr_to_glusterfs_req (req, &args, xdr_to_setvolume_req)) {
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gf_setvolume_req);
+ if (ret < 0) {
//failed to decode msg;
req->rpc_err = GARBAGE_ARGS;
goto fail;
}
- this = req->conn->svc->mydata;
+ this = req->svc->xl;
config_params = dict_copy_with_ref (this->options, NULL);
conf = this->private;
- ret = dict_unserialize (args.dict.dict_val, args.dict.dict_len, &params);
+ if (conf->parent_up == _gf_false) {
+ /* PARENT_UP indicates that all xlators in graph are inited
+ * successfully
+ */
+ op_ret = -1;
+ op_errno = EAGAIN;
+
+ ret = dict_set_str (reply, "ERROR",
+ "xlator graph in server is not initialised "
+ "yet. Try again later");
+ if (ret < 0)
+ gf_msg_debug (this->name, 0, "failed to set error: "
+ "xlator graph in server is not "
+ "initialised yet. Try again later");
+ goto fail;
+ }
+
+ ret = dict_set_int32 (reply, "child_up", conf->child_up);
+ if (ret < 0)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ PS_MSG_DICT_GET_FAILED, "Failed to set 'child_up' "
+ "in the reply dict");
+
+ buf = memdup (args.dict.dict_val, args.dict.dict_len);
+ if (buf == NULL) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto fail;
+ }
+
+ ret = dict_unserialize (buf, args.dict.dict_len, &params);
if (ret < 0) {
ret = dict_set_str (reply, "ERROR",
"Internal error: failed to unserialize "
"request dictionary");
if (ret < 0)
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set error msg \"%s\"",
- "Internal error: failed to unserialize "
- "request dictionary");
+ gf_msg_debug (this->name, 0, "failed to set error "
+ "msg \"%s\"", "Internal error: failed "
+ "to unserialize request dictionary");
op_ret = -1;
op_errno = EINVAL;
goto fail;
}
- ret = dict_get_str (params, "process-uuid", &process_uuid);
+ params->extra_free = buf;
+ buf = NULL;
+
+ ret = dict_get_str (params, "process-uuid", &client_uid);
if (ret < 0) {
ret = dict_set_str (reply, "ERROR",
"UUID not specified");
if (ret < 0)
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set error msg");
+ gf_msg_debug (this->name, 0, "failed to set error "
+ "msg");
+
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto fail;
+ }
+
+ /*lk_verion :: [1..2^31-1]*/
+ ret = dict_get_uint32 (params, "clnt-lk-version", &lk_version);
+ if (ret < 0) {
+ ret = dict_set_str (reply, "ERROR",
+ "lock state version not supplied");
+ if (ret < 0)
+ gf_msg_debug (this->name, 0, "failed to set error "
+ "msg");
op_ret = -1;
op_errno = EINVAL;
goto fail;
}
+ client = gf_client_get (this, &req->cred, client_uid);
+ if (client == NULL) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto fail;
+ }
- conn = server_connection_get (this, process_uuid);
- if (req->conn->trans->xl_private != conn)
- req->conn->trans->xl_private = conn;
+ gf_msg_debug (this->name, 0, "Connected to %s", client->client_uid);
+ cancelled = server_cancel_grace_timer (this, client);
+ if (cancelled)//Do gf_client_put on behalf of grace-timer-handler.
+ gf_client_put (client, NULL);
+
+ serv_ctx = server_ctx_get (client, client->this);
+ if (serv_ctx == NULL) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ PS_MSG_SERVER_CTX_GET_FAILED, "server_ctx_get() "
+ "failed");
+ goto fail;
+ }
+
+ if (serv_ctx->lk_version != 0 &&
+ serv_ctx->lk_version != lk_version) {
+ (void) server_connection_cleanup (this, client,
+ INTERNAL_LOCKS | POSIX_LOCKS);
+ }
+
+ if (req->trans->xl_private != client)
+ req->trans->xl_private = client;
+
+ auth_set_username_passwd (params, config_params, client);
+ if (req->trans->ssl_name) {
+ if (dict_set_str(params,"ssl-name",req->trans->ssl_name) != 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ PS_MSG_SSL_NAME_SET_FAILED, "failed to set "
+ "ssl_name %s", req->trans->ssl_name);
+ /* Not fatal, auth will just fail. */
+ }
+ }
ret = dict_get_int32 (params, "fops-version", &fop_version);
if (ret < 0) {
ret = dict_set_str (reply, "ERROR",
"No FOP version number specified");
if (ret < 0)
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set error msg");
+ gf_msg_debug (this->name, 0, "failed to set error "
+ "msg");
}
ret = dict_get_int32 (params, "mgmt-version", &mgmt_version);
@@ -454,8 +574,8 @@ server_setvolume (rpcsvc_request_t *req)
ret = dict_set_str (reply, "ERROR",
"No MGMT version number specified");
if (ret < 0)
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set error msg");
+ gf_msg_debug (this->name, 0, "failed to set error "
+ "msg");
}
ret = gf_compare_client_version (req, fop_version, mgmt_version);
@@ -463,16 +583,17 @@ server_setvolume (rpcsvc_request_t *req)
ret = gf_asprintf (&msg, "version mismatch: client(%d)"
" - client-mgmt(%d)",
fop_version, mgmt_version);
- /* get_supported_version (req)); */
+ /* get_supported_version (req)); */
if (-1 == ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "asprintf failed while setting up error msg");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ PS_MSG_ASPRINTF_FAILED, "asprintf failed while"
+ "setting up error msg");
goto fail;
}
ret = dict_set_dynstr (reply, "ERROR", msg);
if (ret < 0)
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set error msg");
+ gf_msg_debug (this->name, 0, "failed to set error "
+ "msg");
op_ret = -1;
op_errno = EINVAL;
@@ -484,8 +605,8 @@ server_setvolume (rpcsvc_request_t *req)
ret = dict_set_str (reply, "ERROR",
"No remote-subvolume option specified");
if (ret < 0)
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set error msg");
+ gf_msg_debug (this->name, 0, "failed to set error "
+ "msg");
op_ret = -1;
op_errno = EINVAL;
@@ -495,16 +616,17 @@ server_setvolume (rpcsvc_request_t *req)
xl = get_xlator_by_name (this, name);
if (xl == NULL) {
ret = gf_asprintf (&msg, "remote-subvolume \"%s\" is not found",
- name);
+ name);
if (-1 == ret) {
- gf_log (this->name, GF_LOG_ERROR,
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ PS_MSG_ASPRINTF_FAILED,
"asprintf failed while setting error msg");
goto fail;
}
ret = dict_set_dynstr (reply, "ERROR", msg);
if (ret < 0)
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set error msg");
+ gf_msg_debug (this->name, 0, "failed to set error "
+ "msg");
op_ret = -1;
op_errno = ENOENT;
@@ -516,6 +638,9 @@ server_setvolume (rpcsvc_request_t *req)
if (ret == 0) {
ret = dict_get_str (params, "volfile-key",
&volfile_key);
+ if (ret)
+ gf_msg_debug (this->name, 0, "failed to set "
+ "'volfile-key'");
ret = _validate_volfile_checksum (this, volfile_key,
checksum);
@@ -525,8 +650,8 @@ server_setvolume (rpcsvc_request_t *req)
"varies from earlier "
"access");
if (ret < 0)
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set error msg");
+ gf_msg_debug (this->name, 0, "failed "
+ "to set error msg");
op_ret = -1;
op_errno = ESTALE;
@@ -536,121 +661,164 @@ server_setvolume (rpcsvc_request_t *req)
}
- peerinfo = &req->conn->trans->peerinfo;
+ peerinfo = &req->trans->peerinfo;
if (peerinfo) {
ret = dict_set_static_ptr (params, "peer-info", peerinfo);
if (ret < 0)
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set peer-info");
+ gf_msg_debug (this->name, 0, "failed to set "
+ "peer-info");
}
if (conf->auth_modules == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
+ gf_msg (this->name, GF_LOG_ERROR, 0, PS_MSG_AUTH_INIT_FAILED,
"Authentication module not initialized");
}
+ ret = dict_get_str (params, "client-version", &clnt_version);
+ if (ret)
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ PS_MSG_CLIENT_VERSION_NOT_SET,
+ "client-version not set, may be of older version");
+
ret = gf_authenticate (params, config_params,
conf->auth_modules);
if (ret == AUTH_ACCEPT) {
- gf_log (this->name, GF_LOG_INFO,
- "accepted client from %s",
- (peerinfo)?peerinfo->identifier:"");
+ /* Store options received from client side */
+ req->trans->clnt_options = dict_ref(params);
+
+ gf_msg (this->name, GF_LOG_INFO, 0, PS_MSG_CLIENT_ACCEPTED,
+ "accepted client from %s (version: %s)",
+ client->client_uid,
+ (clnt_version) ? clnt_version : "old");
op_ret = 0;
- conn->bound_xl = xl;
+ client->bound_xl = xl;
ret = dict_set_str (reply, "ERROR", "Success");
if (ret < 0)
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set error msg");
+ gf_msg_debug (this->name, 0, "failed to set error "
+ "msg");
} else {
- gf_log (this->name, GF_LOG_ERROR,
- "Cannot authenticate client from %s",
- (peerinfo)? peerinfo->identifier:"<>");
+ gf_msg (this->name, GF_LOG_ERROR, EACCES,
+ PS_MSG_AUTHENTICATE_ERROR, "Cannot authenticate client"
+ " from %s %s", client->client_uid,
+ (clnt_version) ? clnt_version : "old");
+
op_ret = -1;
op_errno = EACCES;
ret = dict_set_str (reply, "ERROR", "Authentication failed");
if (ret < 0)
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set error msg");
-
+ gf_msg_debug (this->name, 0, "failed to set error "
+ "msg");
goto fail;
}
- if (conn->bound_xl == NULL) {
+ if (client->bound_xl == NULL) {
ret = dict_set_str (reply, "ERROR",
"Check volfile and handshake "
"options in protocol/client");
if (ret < 0)
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set error msg");
+ gf_msg_debug (this->name, 0, "failed to set error "
+ "msg");
op_ret = -1;
op_errno = EACCES;
goto fail;
}
- if ((conn->bound_xl != NULL) &&
- (ret >= 0) &&
- (conn->bound_xl->itable == NULL)) {
- /* create inode table for this bound_xl, if one doesn't
- already exist */
-
- gf_log (this->name, GF_LOG_TRACE,
- "creating inode table with lru_limit=%"PRId32", "
- "xlator=%s", conf->inode_lru_limit,
- conn->bound_xl->name);
-
- /* TODO: what is this ? */
- conn->bound_xl->itable = inode_table_new (conf->inode_lru_limit,
- conn->bound_xl);
+ LOCK (&conf->itable_lock);
+ {
+ if (client->bound_xl->itable == NULL) {
+ /* create inode table for this bound_xl, if one doesn't
+ already exist */
+
+ gf_msg_trace (this->name, 0, "creating inode table with"
+ " lru_limit=%"PRId32", xlator=%s",
+ conf->inode_lru_limit,
+ client->bound_xl->name);
+
+ /* TODO: what is this ? */
+ client->bound_xl->itable =
+ inode_table_new (conf->inode_lru_limit,
+ client->bound_xl);
+ }
}
+ UNLOCK (&conf->itable_lock);
ret = dict_set_str (reply, "process-uuid",
this->ctx->process_uuid);
+ if (ret)
+ gf_msg_debug (this->name, 0, "failed to set 'process-uuid'");
- ret = dict_set_uint64 (reply, "transport-ptr",
- ((uint64_t) (long) req->conn->trans));
+ ret = dict_set_uint32 (reply, "clnt-lk-version", serv_ctx->lk_version);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ PS_MSG_CLIENT_LK_VERSION_ERROR, "failed to set "
+ "'clnt-lk-version'");
+ ret = dict_set_uint64 (reply, "transport-ptr",
+ ((uint64_t) (long) req->trans));
+ if (ret)
+ gf_msg_debug (this->name, 0, "failed to set 'transport-ptr'");
fail:
- rsp.dict.dict_len = dict_serialized_length (reply);
- if (rsp.dict.dict_len < 0) {
- gf_log ("server-handshake", GF_LOG_DEBUG,
- "failed to get serialized length of reply dict");
+ rsp = GF_CALLOC (1, sizeof (gf_setvolume_rsp),
+ gf_server_mt_setvolume_rsp_t);
+ GF_ASSERT (rsp);
+
+ rsp->op_ret = 0;
+ rsp->dict.dict_len = dict_serialized_length (reply);
+ if (rsp->dict.dict_len > UINT_MAX) {
+ gf_msg_debug ("server-handshake", 0, "failed to get serialized"
+ " length of reply dict");
op_ret = -1;
op_errno = EINVAL;
- rsp.dict.dict_len = 0;
+ rsp->dict.dict_len = 0;
}
- if (rsp.dict.dict_len) {
- rsp.dict.dict_val = GF_CALLOC (1, rsp.dict.dict_len,
- gf_server_mt_rsp_buf_t);
- if (rsp.dict.dict_val) {
- ret = dict_serialize (reply, rsp.dict.dict_val);
+ if (rsp->dict.dict_len) {
+ rsp->dict.dict_val = GF_CALLOC (1, rsp->dict.dict_len,
+ gf_server_mt_rsp_buf_t);
+ if (rsp->dict.dict_val) {
+ ret = dict_serialize (reply, rsp->dict.dict_val);
if (ret < 0) {
- gf_log ("server-handshake", GF_LOG_DEBUG,
- "failed to serialize reply dict");
+ gf_msg_debug ("server-handshake", 0, "failed "
+ "to serialize reply dict");
op_ret = -1;
op_errno = -ret;
}
}
}
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
-
- server_submit_reply (NULL, req, &rsp, NULL, 0, NULL,
- (gfs_serialize_t)xdr_serialize_setvolume_rsp);
-
+ rsp->op_ret = op_ret;
+ rsp->op_errno = gf_errno_to_error (op_errno);
+
+ /* if bound_xl is NULL or something fails, then put the connection
+ * back. Otherwise the connection would have been added to the
+ * list of connections the server is maintaining and might segfault
+ * during statedump when bound_xl of the connection is accessed.
+ */
+ if (op_ret && !xl && (client != NULL)) {
+ /* We would have set the xl_private of the transport to the
+ * @conn. But if we have put the connection i.e shutting down
+ * the connection, then we should set xl_private to NULL as it
+ * would be pointing to a freed memory and would segfault when
+ * accessed upon getting DISCONNECT.
+ */
+ gf_client_put (client, NULL);
+ req->trans->xl_private = NULL;
+ }
- if (args.dict.dict_val)
- free (args.dict.dict_val);
+ if (op_ret >= 0 && client->bound_xl->itable)
+ server_first_lookup (this, client->bound_xl, req, rsp);
+ else
+ server_first_lookup_done (req, rsp);
- if (rsp.dict.dict_val)
- GF_FREE (rsp.dict.dict_val);
+ free (args.dict.dict_val);
dict_unref (params);
dict_unref (reply);
dict_unref (config_params);
+ GF_FREE (buf);
+
return 0;
}
@@ -660,23 +828,73 @@ server_ping (rpcsvc_request_t *req)
{
gf_common_rsp rsp = {0,};
- rsp.gfs_id = req->gfs_id;
/* Accepted */
rsp.op_ret = 0;
server_submit_reply (NULL, req, &rsp, NULL, 0, NULL,
- xdr_serialize_common_rsp);
+ (xdrproc_t)xdr_gf_common_rsp);
return 0;
}
+int
+server_set_lk_version (rpcsvc_request_t *req)
+{
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ gf_set_lk_ver_req args = {0,};
+ gf_set_lk_ver_rsp rsp = {0,};
+ client_t *client = NULL;
+ server_ctx_t *serv_ctx = NULL;
+ xlator_t *this = NULL;
+
+ this = req->svc->xl;
+ //TODO: Decide on an appropriate errno for the error-path
+ //below
+ if (!this)
+ goto fail;
-rpcsvc_actor_t gluster_handshake_actors[] = {
- [GF_HNDSK_NULL] = {"NULL", GF_HNDSK_NULL, server_null, NULL, NULL },
- [GF_HNDSK_DUMP_VERSION] = {"VERSION", GF_HNDSK_DUMP_VERSION, server_dump_version, NULL, NULL },
- [GF_HNDSK_SETVOLUME] = {"SETVOLUME", GF_HNDSK_SETVOLUME, server_setvolume, NULL, NULL },
- [GF_HNDSK_GETSPEC] = {"GETSPEC", GF_HNDSK_GETSPEC, server_getspec, NULL, NULL },
- [GF_HNDSK_PING] = {"PING", GF_HNDSK_PING, server_ping, NULL, NULL },
+ op_ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gf_set_lk_ver_req);
+ if (op_ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto fail;
+ }
+
+ client = gf_client_get (this, &req->cred, args.uid);
+ serv_ctx = server_ctx_get (client, client->this);
+ if (serv_ctx == NULL) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ PS_MSG_SERVER_CTX_GET_FAILED, "server_ctx_get() "
+ "failed");
+ goto fail;
+ }
+
+ serv_ctx->lk_version = args.lk_ver;
+ rsp.lk_ver = args.lk_ver;
+
+ op_ret = 0;
+fail:
+ if (client)
+ gf_client_put (client, NULL);
+
+ rsp.op_ret = op_ret;
+ rsp.op_errno = op_errno;
+ server_submit_reply (NULL, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_set_lk_ver_rsp);
+
+ free (args.uid);
+
+ return 0;
+}
+
+rpcsvc_actor_t gluster_handshake_actors[GF_HNDSK_MAXVALUE] = {
+ [GF_HNDSK_NULL] = {"NULL", GF_HNDSK_NULL, server_null, NULL, 0, DRC_NA},
+ [GF_HNDSK_SETVOLUME] = {"SETVOLUME", GF_HNDSK_SETVOLUME, server_setvolume, NULL, 0, DRC_NA},
+ [GF_HNDSK_GETSPEC] = {"GETSPEC", GF_HNDSK_GETSPEC, server_getspec, NULL, 0, DRC_NA},
+ [GF_HNDSK_PING] = {"PING", GF_HNDSK_PING, server_ping, NULL, 0, DRC_NA},
+ [GF_HNDSK_SET_LK_VER] = {"SET_LK_VER", GF_HNDSK_SET_LK_VER, server_set_lk_version, NULL, 0, DRC_NA},
};
@@ -684,8 +902,6 @@ struct rpcsvc_program gluster_handshake_prog = {
.progname = "GlusterFS Handshake",
.prognum = GLUSTER_HNDSK_PROGRAM,
.progver = GLUSTER_HNDSK_VERSION,
-
.actors = gluster_handshake_actors,
- .numactors = 5,
- .progport = 7008,
+ .numactors = GF_HNDSK_MAXVALUE,
};
diff --git a/xlators/protocol/server/src/server-helpers.c b/xlators/protocol/server/src/server-helpers.c
index 216204a8ba0..39fbcbc6763 100644
--- a/xlators/protocol/server/src/server-helpers.c
+++ b/xlators/protocol/server/src/server-helpers.c
@@ -1,51 +1,160 @@
/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2010-2013 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "server.h"
#include "server-helpers.h"
+#include "gidcache.h"
+#include "server-messages.h"
+#include "syscall.h"
+#include "defaults.h"
+#include "default-args.h"
+#include "server-common.h"
+
+#include <fnmatch.h>
+#include <pwd.h>
+#include <grp.h>
+
+/* based on nfs_fix_aux_groups() */
+int
+gid_resolve (server_conf_t *conf, call_stack_t *root)
+{
+ int ret = 0;
+ struct passwd mypw;
+ char mystrs[1024];
+ struct passwd *result;
+ gid_t mygroups[GF_MAX_AUX_GROUPS];
+ gid_list_t gl;
+ const gid_list_t *agl;
+ int ngroups, i;
+
+ agl = gid_cache_lookup (&conf->gid_cache, root->uid, 0, 0);
+ if (agl) {
+ root->ngrps = agl->gl_count;
+ goto fill_groups;
+ }
+
+ ret = getpwuid_r (root->uid, &mypw, mystrs, sizeof(mystrs), &result);
+ if (ret != 0) {
+ gf_msg ("gid-cache", GF_LOG_ERROR, errno,
+ PS_MSG_GET_UID_FAILED, "getpwuid_r(%u) failed",
+ root->uid);
+ return -1;
+ }
+
+ if (!result) {
+ gf_msg ("gid-cache", GF_LOG_ERROR, 0, PS_MSG_UID_NOT_FOUND,
+ "getpwuid_r(%u) found nothing", root->uid);
+ return -1;
+ }
+
+ gf_msg_trace ("gid-cache", 0, "mapped %u => %s", root->uid,
+ result->pw_name);
+
+ ngroups = GF_MAX_AUX_GROUPS;
+ ret = getgrouplist (result->pw_name, root->gid, mygroups, &ngroups);
+ if (ret == -1) {
+ gf_msg ("gid-cache", GF_LOG_ERROR, 0, PS_MSG_MAPPING_ERROR,
+ "could not map %s to group list (%d gids)",
+ result->pw_name, root->ngrps);
+ return -1;
+ }
+ root->ngrps = (uint16_t) ngroups;
+
+fill_groups:
+ if (agl) {
+ /* the gl is not complete, we only use gl.gl_list later on */
+ gl.gl_list = agl->gl_list;
+ } else {
+ /* setup a full gid_list_t to add it to the gid_cache */
+ gl.gl_id = root->uid;
+ gl.gl_uid = root->uid;
+ gl.gl_gid = root->gid;
+ gl.gl_count = root->ngrps;
+
+ gl.gl_list = GF_MALLOC (root->ngrps * sizeof(gid_t),
+ gf_common_mt_groups_t);
+ if (gl.gl_list)
+ memcpy (gl.gl_list, mygroups,
+ sizeof(gid_t) * root->ngrps);
+ else
+ return -1;
+ }
+
+ if (root->ngrps == 0) {
+ ret = 0;
+ goto out;
+ }
+
+ if (call_stack_alloc_groups (root, root->ngrps) != 0) {
+ ret = -1;
+ goto out;
+ }
+
+ /* finally fill the groups from the */
+ for (i = 0; i < root->ngrps; ++i)
+ root->groups[i] = gl.gl_list[i];
+
+out:
+ if (agl) {
+ gid_cache_release (&conf->gid_cache, agl);
+ } else {
+ if (gid_cache_add (&conf->gid_cache, &gl) != 1)
+ GF_FREE (gl.gl_list);
+ }
+
+ return ret;
+}
+
+int
+server_resolve_groups (call_frame_t *frame, rpcsvc_request_t *req)
+{
+ xlator_t *this = NULL;
+ server_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO ("server", frame, out);
+ GF_VALIDATE_OR_GOTO ("server", req, out);
+
+ this = req->trans->xl;
+ conf = this->private;
+
+ return gid_resolve (conf, frame->root);
+out:
+ return -1;
+}
int
server_decode_groups (call_frame_t *frame, rpcsvc_request_t *req)
{
int i = 0;
- if ((!frame) || (!req))
- return 0;
+ GF_VALIDATE_OR_GOTO ("server", frame, out);
+ GF_VALIDATE_OR_GOTO ("server", req, out);
+
+ if (call_stack_alloc_groups (frame->root, req->auxgidcount) != 0)
+ return -1;
frame->root->ngrps = req->auxgidcount;
if (frame->root->ngrps == 0)
return 0;
- if (frame->root->ngrps > GF_REQUEST_MAXGROUPS)
+ /* ngrps cannot be bigger than USHRT_MAX(65535) */
+ if (frame->root->ngrps > GF_MAX_AUX_GROUPS)
return -1;
for (; i < frame->root->ngrps; ++i)
frame->root->groups[i] = req->auxgids[i];
-
+out:
return 0;
}
+
void
server_loc_wipe (loc_t *loc)
{
@@ -59,52 +168,38 @@ server_loc_wipe (loc_t *loc)
loc->inode = NULL;
}
- if (loc->path)
- GF_FREE ((void *)loc->path);
+ GF_FREE ((void *)loc->path);
}
void
server_resolve_wipe (server_resolve_t *resolve)
{
- struct resolve_comp *comp = NULL;
- int i = 0;
+ GF_FREE ((void *)resolve->path);
- if (resolve->path)
- GF_FREE ((void *)resolve->path);
+ GF_FREE ((void *)resolve->bname);
- if (resolve->bname)
- GF_FREE ((void *)resolve->bname);
-
- if (resolve->resolved)
- GF_FREE ((void *)resolve->resolved);
-
- loc_wipe (&resolve->deep_loc);
-
- comp = resolve->components;
- if (comp) {
- for (i = 0; comp[i].basename; i++) {
- if (comp[i].inode)
- inode_unref (comp[i].inode);
- }
- GF_FREE ((void *)resolve->components);
- }
+ loc_wipe (&resolve->resolve_loc);
}
void
free_state (server_state_t *state)
{
- if (state->conn) {
- //xprt_svc_unref (state->conn);
- state->conn = NULL;
+ if (state->xprt) {
+ rpc_transport_unref (state->xprt);
+ state->xprt = NULL;
}
-
if (state->fd) {
fd_unref (state->fd);
state->fd = NULL;
}
+ if (state->params) {
+ dict_unref (state->params);
+ state->params = NULL;
+ }
+
if (state->iobref) {
iobref_unref (state->iobref);
state->iobref = NULL;
@@ -120,11 +215,14 @@ free_state (server_state_t *state)
state->dict = NULL;
}
- if (state->volume)
- GF_FREE ((void *)state->volume);
+ if (state->xdata) {
+ dict_unref (state->xdata);
+ state->xdata = NULL;
+ }
- if (state->name)
- GF_FREE ((void *)state->name);
+ GF_FREE ((void *)state->volume);
+
+ GF_FREE ((void *)state->name);
server_loc_wipe (&state->loc);
server_loc_wipe (&state->loc2);
@@ -136,284 +234,82 @@ free_state (server_state_t *state)
}
-int
-gf_add_locker (struct _lock_table *table, const char *volume,
- loc_t *loc, fd_t *fd, pid_t pid)
-{
- int32_t ret = -1;
- struct _locker *new = NULL;
- uint8_t dir = 0;
-
- new = GF_CALLOC (1, sizeof (struct _locker), gf_server_mt_locker_t);
- if (new == NULL) {
- gf_log ("server", GF_LOG_ERROR,
- "failed to allocate memory for \'struct _locker\'");
- goto out;
- }
- INIT_LIST_HEAD (&new->lockers);
-
- new->volume = gf_strdup (volume);
-
- if (fd == NULL) {
- loc_copy (&new->loc, loc);
- dir = IA_ISDIR (new->loc.inode->ia_type);
- } else {
- new->fd = fd_ref (fd);
- dir = IA_ISDIR (fd->inode->ia_type);
- }
-
- new->pid = pid;
-
- LOCK (&table->lock);
- {
- if (dir)
- list_add_tail (&new->lockers, &table->dir_lockers);
- else
- list_add_tail (&new->lockers, &table->file_lockers);
- }
- UNLOCK (&table->lock);
-out:
- return ret;
-}
-
-
-int
-gf_del_locker (struct _lock_table *table, const char *volume,
- loc_t *loc, fd_t *fd, pid_t pid)
-{
- struct _locker *locker = NULL;
- struct _locker *tmp = NULL;
- int32_t ret = 0;
- uint8_t dir = 0;
- struct list_head *head = NULL;
- struct list_head del;
-
- INIT_LIST_HEAD (&del);
-
- if (fd) {
- dir = IA_ISDIR (fd->inode->ia_type);
- } else {
- dir = IA_ISDIR (loc->inode->ia_type);
- }
-
- LOCK (&table->lock);
- {
- if (dir) {
- head = &table->dir_lockers;
- } else {
- head = &table->file_lockers;
- }
-
- list_for_each_entry_safe (locker, tmp, head, lockers) {
- if (locker->fd && fd &&
- (locker->fd == fd) && (locker->pid == pid)
- && !strcmp (locker->volume, volume)) {
- list_move_tail (&locker->lockers, &del);
- } else if (locker->loc.inode &&
- loc &&
- (locker->loc.inode == loc->inode) &&
- (locker->pid == pid)
- && !strcmp (locker->volume, volume)) {
- list_move_tail (&locker->lockers, &del);
- }
- }
- }
- UNLOCK (&table->lock);
-
- tmp = NULL;
- locker = NULL;
-
- list_for_each_entry_safe (locker, tmp, &del, lockers) {
- list_del_init (&locker->lockers);
- if (locker->fd)
- fd_unref (locker->fd);
- else
- loc_wipe (&locker->loc);
-
- GF_FREE (locker->volume);
- GF_FREE (locker);
- }
-
- return ret;
-}
-
-static struct _lock_table *
-gf_lock_table_new (void)
-{
- struct _lock_table *new = NULL;
-
- new = GF_CALLOC (1, sizeof (struct _lock_table), gf_server_mt_lock_table_t);
- if (new == NULL) {
- gf_log ("server-protocol", GF_LOG_CRITICAL,
- "failed to allocate memory for new lock table");
- goto out;
- }
- INIT_LIST_HEAD (&new->dir_lockers);
- INIT_LIST_HEAD (&new->file_lockers);
- LOCK_INIT (&new->lock);
-out:
- return new;
-}
-
-static int
-server_nop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE(frame);
-
- if (state)
- free_state (state);
- STACK_DESTROY (frame->root);
- return 0;
-}
-
-int
-do_lock_table_cleanup (xlator_t *this, server_connection_t *conn,
- call_frame_t *frame, struct _lock_table *ltable)
-{
- struct list_head file_lockers, dir_lockers;
- call_frame_t *tmp_frame = NULL;
- struct flock flock = {0, };
- xlator_t *bound_xl = NULL;
- struct _locker *locker = NULL, *tmp = NULL;
- int ret = -1;
-
- bound_xl = conn->bound_xl;
- INIT_LIST_HEAD (&file_lockers);
- INIT_LIST_HEAD (&dir_lockers);
-
- LOCK (&ltable->lock);
- {
- list_splice_init (&ltable->file_lockers,
- &file_lockers);
-
- list_splice_init (&ltable->dir_lockers, &dir_lockers);
- }
- UNLOCK (&ltable->lock);
-
- GF_FREE (ltable);
-
- flock.l_type = F_UNLCK;
- flock.l_start = 0;
- flock.l_len = 0;
- list_for_each_entry_safe (locker,
- tmp, &file_lockers, lockers) {
- tmp_frame = copy_frame (frame);
- if (tmp_frame == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory");
- goto out;
- }
- /*
- pid = 0 is a special case that tells posix-locks
- to release all locks from this transport
- */
- tmp_frame->root->pid = 0;
- tmp_frame->root->trans = conn;
-
- if (locker->fd) {
- STACK_WIND (tmp_frame, server_nop_cbk, bound_xl,
- bound_xl->fops->finodelk,
- locker->volume,
- locker->fd, F_SETLK, &flock);
- fd_unref (locker->fd);
- } else {
- STACK_WIND (tmp_frame, server_nop_cbk, bound_xl,
- bound_xl->fops->inodelk,
- locker->volume,
- &(locker->loc), F_SETLK, &flock);
- loc_wipe (&locker->loc);
- }
-
- GF_FREE (locker->volume);
-
- list_del_init (&locker->lockers);
- GF_FREE (locker);
- }
-
- tmp = NULL;
- locker = NULL;
- list_for_each_entry_safe (locker, tmp, &dir_lockers, lockers) {
- tmp_frame = copy_frame (frame);
-
- tmp_frame->root->pid = 0;
- tmp_frame->root->trans = conn;
-
- if (locker->fd) {
- STACK_WIND (tmp_frame, server_nop_cbk, bound_xl,
- bound_xl->fops->fentrylk,
- locker->volume,
- locker->fd, NULL,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
- fd_unref (locker->fd);
- } else {
- STACK_WIND (tmp_frame, server_nop_cbk, bound_xl,
- bound_xl->fops->entrylk,
- locker->volume,
- &(locker->loc), NULL,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
- loc_wipe (&locker->loc);
- }
-
- GF_FREE (locker->volume);
-
- list_del_init (&locker->lockers);
- GF_FREE (locker);
- }
- ret = 0;
-
-out:
- return ret;
-}
-
-
static int
server_connection_cleanup_flush_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno, dict_t *xdata)
{
- fd_t *fd = NULL;
+ int32_t ret = -1;
+ fd_t *fd = NULL;
+ client_t *client = NULL;
+
+ GF_VALIDATE_OR_GOTO ("server", this, out);
+ GF_VALIDATE_OR_GOTO ("server", frame, out);
fd = frame->local;
+ client = frame->root->client;
fd_unref (fd);
frame->local = NULL;
+ gf_client_unref (client);
STACK_DESTROY (frame->root);
- return 0;
+
+ ret = 0;
+out:
+ return ret;
}
-int
-do_fd_cleanup (xlator_t *this, server_connection_t *conn, call_frame_t *frame,
- fdentry_t *fdentries, int fd_count)
+static int
+do_fd_cleanup (xlator_t *this, client_t* client, fdentry_t *fdentries, int fd_count)
{
fd_t *fd = NULL;
int i = 0, ret = -1;
call_frame_t *tmp_frame = NULL;
xlator_t *bound_xl = NULL;
+ char *path = NULL;
+
+ GF_VALIDATE_OR_GOTO ("server", this, out);
+ GF_VALIDATE_OR_GOTO ("server", fdentries, out);
- bound_xl = conn->bound_xl;
+ bound_xl = client->bound_xl;
for (i = 0;i < fd_count; i++) {
fd = fdentries[i].fd;
if (fd != NULL) {
- tmp_frame = copy_frame (frame);
+ tmp_frame = create_frame (this, this->ctx->pool);
if (tmp_frame == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory");
goto out;
}
- tmp_frame->local = fd;
+ GF_ASSERT (fd->inode);
+
+ ret = inode_path (fd->inode, NULL, &path);
+
+ if (ret > 0) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ PS_MSG_FD_CLEANUP,
+ "fd cleanup on %s", path);
+ GF_FREE (path);
+ } else {
+
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ PS_MSG_FD_CLEANUP,
+ "fd cleanup on inode with gfid %s",
+ uuid_utoa (fd->inode->gfid));
+ }
+
+ tmp_frame->local = fd;
tmp_frame->root->pid = 0;
- tmp_frame->root->trans = conn;
- tmp_frame->root->lk_owner = 0;
+ gf_client_ref (client);
+ tmp_frame->root->client = client;
+ memset (&tmp_frame->root->lk_owner, 0,
+ sizeof (gf_lkowner_t));
+
STACK_WIND (tmp_frame,
server_connection_cleanup_flush_cbk,
- bound_xl, bound_xl->fops->flush, fd);
+ bound_xl, bound_xl->fops->flush, fd, NULL);
}
}
@@ -424,373 +320,196 @@ out:
return ret;
}
-int
-do_connection_cleanup (xlator_t *this, server_connection_t *conn,
- struct _lock_table *ltable, fdentry_t *fdentries, int fd_count)
-{
- int ret = 0;
- int saved_ret = 0;
- call_frame_t *frame = NULL;
- server_state_t *state = NULL;
-
- frame = create_frame (this, this->ctx->pool);
- if (frame == NULL) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- saved_ret = do_lock_table_cleanup (this, conn, frame, ltable);
-
- if (fdentries != NULL) {
- ret = do_fd_cleanup (this, conn, frame, fdentries, fd_count);
- }
-
- state = CALL_STATE (frame);
- if (state)
- GF_FREE (state);
-
- STACK_DESTROY (frame->root);
-
- if (saved_ret || ret) {
- ret = -1;
- }
-
-out:
- return ret;
-}
-
int
-server_connection_cleanup (xlator_t *this, server_connection_t *conn)
+server_connection_cleanup (xlator_t *this, client_t *client,
+ int32_t flags)
{
- char do_cleanup = 0;
- struct _lock_table *ltable = NULL;
- fdentry_t *fdentries = NULL;
- uint32_t fd_count = 0;
- int ret = 0;
-
- if (conn == NULL) {
+ server_ctx_t *serv_ctx = NULL;
+ fdentry_t *fdentries = NULL;
+ uint32_t fd_count = 0;
+ int cd_ret = 0;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO (this->name, this, out);
+ GF_VALIDATE_OR_GOTO (this->name, client, out);
+ GF_VALIDATE_OR_GOTO (this->name, flags, out);
+
+ serv_ctx = server_ctx_get (client, client->this);
+
+ if (serv_ctx == NULL) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ PS_MSG_SERVER_CTX_GET_FAILED, "server_ctx_get() "
+ "failed");
goto out;
}
- pthread_mutex_lock (&conn->lock);
+ LOCK (&serv_ctx->fdtable_lock);
{
- conn->active_transports--;
- if (conn->active_transports == 0) {
- if (conn->ltable) {
- ltable = conn->ltable;
- conn->ltable = gf_lock_table_new ();
- }
-
- if (conn->fdtable) {
- fdentries = gf_fd_fdtable_get_all_fds (conn->fdtable,
- &fd_count);
- }
- do_cleanup = 1;
- }
+ if (serv_ctx->fdtable && (flags & POSIX_LOCKS))
+ fdentries = gf_fd_fdtable_get_all_fds (serv_ctx->fdtable,
+ &fd_count);
}
- pthread_mutex_unlock (&conn->lock);
-
- if (do_cleanup && conn->bound_xl)
- ret = do_connection_cleanup (this, conn, ltable, fdentries, fd_count);
+ UNLOCK (&serv_ctx->fdtable_lock);
-out:
- return ret;
-}
-
-
-int
-server_connection_destroy (xlator_t *this, server_connection_t *conn)
-{
- call_frame_t *frame = NULL, *tmp_frame = NULL;
- xlator_t *bound_xl = NULL;
- int32_t ret = -1;
- server_state_t *state = NULL;
- struct list_head file_lockers;
- struct list_head dir_lockers;
- struct _lock_table *ltable = NULL;
- struct _locker *locker = NULL, *tmp = NULL;
- struct flock flock = {0,};
- fd_t *fd = NULL;
- int32_t i = 0;
- fdentry_t *fdentries = NULL;
- uint32_t fd_count = 0;
-
- if (conn == NULL) {
- ret = 0;
+ if (client->bound_xl == NULL)
goto out;
- }
-
- bound_xl = (xlator_t *) (conn->bound_xl);
- if (bound_xl) {
- /* trans will have ref_count = 1 after this call, but its
- ok since this function is called in
- GF_EVENT_TRANSPORT_CLEANUP */
- frame = create_frame (this, this->ctx->pool);
-
- pthread_mutex_lock (&(conn->lock));
- {
- if (conn->ltable) {
- ltable = conn->ltable;
- conn->ltable = NULL;
- }
- }
- pthread_mutex_unlock (&conn->lock);
-
- INIT_LIST_HEAD (&file_lockers);
- INIT_LIST_HEAD (&dir_lockers);
-
- if (ltable) {
- LOCK (&ltable->lock);
- {
- list_splice_init (&ltable->file_lockers,
- &file_lockers);
-
- list_splice_init (&ltable->dir_lockers, &dir_lockers);
- }
- UNLOCK (&ltable->lock);
- GF_FREE (ltable);
- }
-
- flock.l_type = F_UNLCK;
- flock.l_start = 0;
- flock.l_len = 0;
- list_for_each_entry_safe (locker,
- tmp, &file_lockers, lockers) {
- tmp_frame = copy_frame (frame);
- /*
- pid = 0 is a special case that tells posix-locks
- to release all locks from this transport
- */
- tmp_frame->root->pid = 0;
- tmp_frame->root->trans = conn;
-
- if (locker->fd) {
- STACK_WIND (tmp_frame, server_nop_cbk, bound_xl,
- bound_xl->fops->finodelk,
- locker->volume,
- locker->fd, F_SETLK, &flock);
- fd_unref (locker->fd);
- } else {
- STACK_WIND (tmp_frame, server_nop_cbk, bound_xl,
- bound_xl->fops->inodelk,
- locker->volume,
- &(locker->loc), F_SETLK, &flock);
- loc_wipe (&locker->loc);
- }
-
- GF_FREE (locker->volume);
-
- list_del_init (&locker->lockers);
- GF_FREE (locker);
- }
-
- tmp = NULL;
- locker = NULL;
- list_for_each_entry_safe (locker, tmp, &dir_lockers, lockers) {
- tmp_frame = copy_frame (frame);
-
- tmp_frame->root->pid = 0;
- tmp_frame->root->trans = conn;
-
- if (locker->fd) {
- STACK_WIND (tmp_frame, server_nop_cbk, bound_xl,
- bound_xl->fops->fentrylk,
- locker->volume,
- locker->fd, NULL,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
- fd_unref (locker->fd);
- } else {
- STACK_WIND (tmp_frame, server_nop_cbk, bound_xl,
- bound_xl->fops->entrylk,
- locker->volume,
- &(locker->loc), NULL,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
- loc_wipe (&locker->loc);
- }
-
- GF_FREE (locker->volume);
-
- list_del_init (&locker->lockers);
- GF_FREE (locker);
- }
-
- pthread_mutex_lock (&(conn->lock));
- {
- if (conn->fdtable) {
- fdentries = gf_fd_fdtable_get_all_fds (conn->fdtable,
- &fd_count);
- gf_fd_fdtable_destroy (conn->fdtable);
- conn->fdtable = NULL;
- }
- }
- pthread_mutex_unlock (&conn->lock);
-
- if (fdentries != NULL) {
- for (i = 0; i < fd_count; i++) {
- fd = fdentries[i].fd;
- if (fd != NULL) {
- tmp_frame = copy_frame (frame);
- tmp_frame->local = fd;
-
- STACK_WIND (tmp_frame,
- server_connection_cleanup_flush_cbk,
- bound_xl,
- bound_xl->fops->flush,
- fd);
- }
- }
- GF_FREE (fdentries);
- }
+ if (flags & INTERNAL_LOCKS) {
+ cd_ret = gf_client_disconnect (client);
}
- if (frame) {
- state = CALL_STATE (frame);
- if (state)
- GF_FREE (state);
- STACK_DESTROY (frame->root);
+ if (fdentries != NULL) {
+ gf_msg_debug (this->name, 0, "Performing cleanup on %d "
+ "fdentries", fd_count);
+ ret = do_fd_cleanup (this, client, fdentries, fd_count);
}
+ else
+ gf_msg (this->name, GF_LOG_INFO, 0, PS_MSG_FDENTRY_NULL,
+ "no fdentries to clean");
- gf_log (this->name, GF_LOG_INFO, "destroyed connection of %s",
- conn->id);
-
- GF_FREE (conn->id);
- GF_FREE (conn);
+ if (cd_ret || ret)
+ ret = -1;
out:
return ret;
}
-server_connection_t *
-server_connection_get (xlator_t *this, const char *id)
-{
- server_connection_t *conn = NULL;
- server_connection_t *trav = NULL;
- server_conf_t *conf = NULL;
-
- conf = this->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- list_for_each_entry (trav, &conf->conns, list) {
- if (!strcmp (id, trav->id)) {
- conn = trav;
- break;
- }
- }
-
- if (!conn) {
- conn = (void *) GF_CALLOC (1, sizeof (*conn),
- gf_server_mt_conn_t);
-
- conn->id = gf_strdup (id);
- conn->fdtable = gf_fd_fdtable_alloc ();
- conn->ltable = gf_lock_table_new ();
- conn->this = this;
- pthread_mutex_init (&conn->lock, NULL);
-
- list_add (&conn->list, &conf->conns);
- }
-
- conn->ref++;
- conn->active_transports++;
- }
- pthread_mutex_unlock (&conf->mutex);
-
- return conn;
-}
-
-
-void
-server_connection_put (xlator_t *this, server_connection_t *conn)
-{
- server_conf_t *conf = NULL;
- server_connection_t *todel = NULL;
-
- if (conn == NULL) {
- goto out;
- }
-
- conf = this->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- conn->ref--;
-
- if (!conn->ref) {
- list_del_init (&conn->list);
- todel = conn;
- }
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (todel) {
- server_connection_destroy (this, todel);
- }
-
-out:
- return;
-}
-
static call_frame_t *
server_alloc_frame (rpcsvc_request_t *req)
{
- call_frame_t *frame = NULL;
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
+ call_frame_t *frame = NULL;
+ server_state_t *state = NULL;
+ client_t *client = NULL;
+
+ GF_VALIDATE_OR_GOTO ("server", req, out);
+ GF_VALIDATE_OR_GOTO ("server", req->trans, out);
+ GF_VALIDATE_OR_GOTO ("server", req->svc, out);
+ GF_VALIDATE_OR_GOTO ("server", req->svc->ctx, out);
- GF_VALIDATE_OR_GOTO("server", req, out);
+ client = req->trans->xl_private;
+ GF_VALIDATE_OR_GOTO ("server", client, out);
- conn = (server_connection_t *)req->conn->trans->xl_private;
- if (!conn)
+ frame = create_frame (client->this, req->svc->ctx->pool);
+ if (!frame)
goto out;
- frame = create_frame (conn->this, req->conn->svc->ctx->pool);
- GF_VALIDATE_OR_GOTO("server", frame, out);
state = GF_CALLOC (1, sizeof (*state), gf_server_mt_state_t);
- GF_VALIDATE_OR_GOTO("server", state, out);
-
- if (conn->bound_xl)
- state->itable = conn->bound_xl->itable;
+ if (!state)
+ goto out;
- state->xprt = req->conn->trans;
- state->conn = conn;
+ if (client->bound_xl)
+ state->itable = client->bound_xl->itable;
+ state->xprt = rpc_transport_ref (req->trans);
state->resolve.fd_no = -1;
state->resolve2.fd_no = -1;
+ frame->root->client = client;
frame->root->state = state; /* which socket */
frame->root->unique = 0; /* which call */
- frame->this = conn->this;
+ frame->this = client->this;
out:
return frame;
}
-
call_frame_t *
get_frame_from_request (rpcsvc_request_t *req)
{
- call_frame_t *frame = NULL;
+ call_frame_t *frame = NULL;
+ client_t *client = NULL;
+ client_t *tmp_client = NULL;
+ xlator_t *this = NULL;
+ server_conf_t *priv = NULL;
+ clienttable_t *clienttable = NULL;
+ unsigned int i = 0;
+ rpc_transport_t *trans = NULL;
+
+ GF_VALIDATE_OR_GOTO ("server", req, out);
+
+ client = req->trans->xl_private;
frame = server_alloc_frame (req);
if (!frame)
goto out;
frame->root->op = req->procnum;
- frame->root->type = req->type;
frame->root->unique = req->xid;
+ client = req->trans->xl_private;
+ this = req->trans->xl;
+ priv = this->private;
+ clienttable = this->ctx->clienttable;
+
+ for (i = 0; i < clienttable->max_clients; i++) {
+ tmp_client = clienttable->cliententries[i].client;
+ if (client == tmp_client) {
+ /* for non trusted clients username and password
+ would not have been set. So for non trusted clients
+ (i.e clients not from the same machine as the brick,
+ and clients from outside the storage pool)
+ do the root-squashing.
+ TODO: If any client within the storage pool (i.e
+ mounting within a machine from the pool but using
+ other machine's ip/hostname from the same pool)
+ is present treat it as a trusted client
+ */
+ if (!client->auth.username && req->pid != NFS_PID)
+ RPC_AUTH_ROOT_SQUASH (req);
+
+ /* Problem: If we just check whether the client is
+ trusted client and do not do root squashing for
+ them, then for smb clients and UFO clients root
+ squashing will never happen as they use the fuse
+ mounts done within the trusted pool (i.e they are
+ trusted clients).
+ Solution: To fix it, do root squashing for trusted
+ clients also. If one wants to have a client within
+ the storage pool for which root-squashing does not
+ happen, then the client has to be mounted with
+ --no-root-squash option. But for defrag client and
+ gsyncd client do not do root-squashing.
+ */
+ if (client->auth.username &&
+ req->pid != GF_CLIENT_PID_NO_ROOT_SQUASH &&
+ req->pid != GF_CLIENT_PID_GSYNCD &&
+ req->pid != GF_CLIENT_PID_DEFRAG &&
+ req->pid != GF_CLIENT_PID_SELF_HEALD &&
+ req->pid != GF_CLIENT_PID_QUOTA_MOUNT)
+ RPC_AUTH_ROOT_SQUASH (req);
+
+ /* For nfs clients the server processes will be running
+ within the trusted storage pool machines. So if we
+ do not do root-squashing for nfs servers, thinking
+ that its a trusted client, then root-squashing wont
+ work for nfs clients.
+ */
+ if (req->pid == NFS_PID)
+ RPC_AUTH_ROOT_SQUASH (req);
+ }
+ }
+
frame->root->uid = req->uid;
frame->root->gid = req->gid;
frame->root->pid = req->pid;
+ gf_client_ref (client);
+ frame->root->client = client;
frame->root->lk_owner = req->lk_owner;
- server_decode_groups (frame, req);
+ if (priv->server_manage_gids)
+ server_resolve_groups (frame, req);
+ else
+ server_decode_groups (frame, req);
+ trans = req->trans;
+ if (trans) {
+ memcpy (&frame->root->identifier, trans->peerinfo.identifier,
+ sizeof (trans->peerinfo.identifier));
+ }
+
frame->local = req;
out:
@@ -805,10 +524,13 @@ server_build_config (xlator_t *this, server_conf_t *conf)
int ret = -1;
struct stat buf = {0,};
+ GF_VALIDATE_OR_GOTO ("server", this, out);
+ GF_VALIDATE_OR_GOTO ("server", conf, out);
+
ret = dict_get_int32 (this->options, "inode-lru-limit",
&conf->inode_lru_limit);
if (ret < 0) {
- conf->inode_lru_limit = 1024;
+ conf->inode_lru_limit = 16384;
}
conf->verify_volfile = 1;
@@ -816,29 +538,28 @@ server_build_config (xlator_t *this, server_conf_t *conf)
if (data) {
ret = gf_string2boolean(data->data, &conf->verify_volfile);
if (ret != 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "wrong value for 'verify-volfile-checksum', "
- "Neglecting option");
+ gf_msg (this->name, GF_LOG_WARNING, EINVAL,
+ PS_MSG_INVALID_ENTRY, "wrong value for '"
+ "verify-volfile-checksum', Neglecting option");
}
}
data = dict_get (this->options, "trace");
- if (data) {
+ if (data) {
ret = gf_string2boolean (data->data, &conf->trace);
if (ret != 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "'trace' takes on only boolean values. "
- "Neglecting option");
- }
- }
+ gf_msg (this->name, GF_LOG_WARNING, EINVAL,
+ PS_MSG_INVALID_ENTRY, "'trace' takes on only "
+ "boolean values. Neglecting option");
+ }
+ }
/* TODO: build_rpc_config (); */
ret = dict_get_int32 (this->options, "limits.transaction-size",
&conf->rpc_conf.max_block_size);
if (ret < 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "defaulting limits.transaction-size to %d",
- DEFAULT_BLOCK_SIZE);
+ gf_msg_trace (this->name, 0, "defaulting limits.transaction-"
+ "size to %d", DEFAULT_BLOCK_SIZE);
conf->rpc_conf.max_block_size = DEFAULT_BLOCK_SIZE;
}
@@ -846,18 +567,19 @@ server_build_config (xlator_t *this, server_conf_t *conf)
if (data) {
/* Check whether the specified directory exists,
or directory specified is non standard */
- ret = stat (data->data, &buf);
+ ret = sys_stat (data->data, &buf);
if ((ret != 0) || !S_ISDIR (buf.st_mode)) {
- gf_log (this->name, GF_LOG_ERROR,
- "Directory '%s' doesn't exist, exiting.",
- data->data);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ PS_MSG_DIR_NOT_FOUND, "Directory '%s' doesn't "
+ "exist, exiting.", data->data);
ret = -1;
goto out;
}
/* Make sure that conf-dir doesn't contain ".." in path */
if ((gf_strstr (data->data, "/", "..")) == -1) {
ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ PS_MSG_CONF_DIR_INVALID,
"%s: invalid conf_dir", data->data);
goto out;
}
@@ -869,81 +591,23 @@ out:
return ret;
}
-server_connection_t *
-get_server_conn_state (xlator_t *this, rpc_transport_t *xprt)
-{
- return (server_connection_t *)xprt->xl_private;
-}
-
-server_connection_t *
-create_server_conn_state (xlator_t *this, rpc_transport_t *xprt)
-{
- server_connection_t *conn = NULL;
- int ret = -1;
-
- conn = GF_CALLOC (1, sizeof (*conn), gf_server_mt_conn_t);
- if (!conn)
- goto out;
-
- pthread_mutex_init (&conn->lock, NULL);
-
- conn->fdtable = gf_fd_fdtable_alloc ();
- if (!conn->fdtable)
- goto out;
-
- conn->ltable = gf_lock_table_new ();
- if (!conn->ltable)
- goto out;
-
- conn->this = this;
-
- xprt->xl_private = conn;
-
- ret = 0;
-out:
- if (ret)
- destroy_server_conn_state (conn);
-
- return conn;
-}
-
-void
-destroy_server_conn_state (server_connection_t *conn)
-{
- if (!conn) {
- return;
- }
-
- if (conn->ltable) {
- /* TODO */
- //FREE (conn->ltable);
- ;
- }
-
- if (conn->fdtable)
- gf_fd_fdtable_destroy (conn->fdtable);
-
- pthread_mutex_destroy (&conn->lock);
-
- GF_FREE (conn);
-
- return;
-}
-
void
print_caller (char *str, int size, call_frame_t *frame)
{
- int filled = 0;
server_state_t *state = NULL;
+ GF_VALIDATE_OR_GOTO ("server", str, out);
+ GF_VALIDATE_OR_GOTO ("server", frame, out);
+
state = CALL_STATE (frame);
- filled += snprintf (str + filled, size - filled,
- " Callid=%"PRId64", Client=%s",
- frame->root->unique,
- state->xprt->peerinfo.identifier);
+ snprintf (str, size,
+ " Callid=%"PRId64", Client=%s",
+ frame->root->unique,
+ state->xprt->peerinfo.identifier);
+out:
return;
}
@@ -953,6 +617,8 @@ server_print_resolve (char *str, int size, server_resolve_t *resolve)
{
int filled = 0;
+ GF_VALIDATE_OR_GOTO ("server", str, out);
+
if (!resolve) {
snprintf (str, size, "<nul>");
return;
@@ -963,15 +629,6 @@ server_print_resolve (char *str, int size, server_resolve_t *resolve)
if (resolve->fd_no != -1)
filled += snprintf (str + filled, size - filled,
"fd=%"PRId64",", (uint64_t) resolve->fd_no);
- if (resolve->ino)
- filled += snprintf (str + filled, size - filled,
- "ino=%"PRIu64",", (uint64_t) resolve->ino);
- if (resolve->par)
- filled += snprintf (str + filled, size - filled,
- "par=%"PRIu64",", (uint64_t) resolve->par);
- if (resolve->gen)
- filled += snprintf (str + filled, size - filled,
- "gen=%"PRIu64",", (uint64_t) resolve->gen);
if (resolve->bname)
filled += snprintf (str + filled, size - filled,
"bname=%s,", resolve->bname);
@@ -979,7 +636,9 @@ server_print_resolve (char *str, int size, server_resolve_t *resolve)
filled += snprintf (str + filled, size - filled,
"path=%s", resolve->path);
- filled += snprintf (str + filled, size - filled, "}");
+ snprintf (str + filled, size - filled, "}");
+out:
+ return;
}
@@ -988,6 +647,8 @@ server_print_loc (char *str, int size, loc_t *loc)
{
int filled = 0;
+ GF_VALIDATE_OR_GOTO ("server", str, out);
+
if (!loc) {
snprintf (str, size, "<nul>");
return;
@@ -1006,7 +667,9 @@ server_print_loc (char *str, int size, loc_t *loc)
filled += snprintf (str + filled, size - filled,
"parent=%p", loc->parent);
- filled += snprintf (str + filled, size - filled, "}");
+ snprintf (str + filled, size - filled, "}");
+out:
+ return;
}
@@ -1015,6 +678,8 @@ server_print_params (char *str, int size, server_state_t *state)
{
int filled = 0;
+ GF_VALIDATE_OR_GOTO ("server", str, out);
+
filled += snprintf (str + filled, size - filled,
" Params={");
@@ -1052,25 +717,21 @@ server_print_params (char *str, int size, server_state_t *state)
filled += snprintf (str + filled, size - filled,
"volume=%s,", state->volume);
- filled += snprintf (str + filled, size - filled,
- "bound_xl=%s}", state->conn->bound_xl->name);
+/* FIXME
+ snprintf (str + filled, size - filled,
+ "bound_xl=%s}", state->client->bound_xl->name);
+*/
+out:
+ return;
}
+
int
server_resolve_is_empty (server_resolve_t *resolve)
{
if (resolve->fd_no != -1)
return 0;
- if (resolve->ino != 0)
- return 0;
-
- if (resolve->gen != 0)
- return 0;
-
- if (resolve->par != 0)
- return 0;
-
if (resolve->path != 0)
return 0;
@@ -1080,6 +741,7 @@ server_resolve_is_empty (server_resolve_t *resolve)
return 1;
}
+
void
server_print_reply (call_frame_t *frame, int op_ret, int op_errno)
{
@@ -1090,11 +752,13 @@ server_print_reply (call_frame_t *frame, int op_ret, int op_errno)
char fdstr[32];
char *op = "UNKNOWN";
+ GF_VALIDATE_OR_GOTO ("server", frame, out);
+
this = frame->this;
conf = this->private;
- if (!conf->trace)
- return;
+ GF_VALIDATE_OR_GOTO ("server", conf, out);
+ GF_VALIDATE_OR_GOTO ("server", conf->trace, out);
state = CALL_STATE (frame);
@@ -1102,10 +766,7 @@ server_print_reply (call_frame_t *frame, int op_ret, int op_errno)
switch (frame->root->type) {
case GF_OP_TYPE_FOP:
- op = gf_fop_list[frame->root->op];
- break;
- case GF_OP_TYPE_MGMT:
- op = gf_mgmt_list[frame->root->op];
+ op = (char *)gf_fop_list[frame->root->op];
break;
default:
op = "";
@@ -1115,33 +776,38 @@ server_print_reply (call_frame_t *frame, int op_ret, int op_errno)
if (state->fd)
snprintf (fdstr, 32, " fd=%p", state->fd);
- gf_log (this->name, GF_LOG_NORMAL,
- "%s%s => (%d, %d)%s",
- op, caller, op_ret, op_errno, fdstr);
+ gf_msg (this->name, GF_LOG_INFO, op_errno, PS_MSG_SERVER_MSG,
+ "%s%s => (%d, %d)%s", op, caller, op_ret, op_errno, fdstr);
+out:
+ return;
}
void
server_print_request (call_frame_t *frame)
{
- server_conf_t *conf = NULL;
- xlator_t *this = NULL;
+ server_conf_t *conf = NULL;
+ xlator_t *this = NULL;
server_state_t *state = NULL;
+ char *op = "UNKNOWN";
char resolve_vars[256];
char resolve2_vars[256];
char loc_vars[256];
char loc2_vars[256];
char other_vars[512];
char caller[512];
- char *op = "UNKNOWN";
+
+ GF_VALIDATE_OR_GOTO ("server", frame, out);
this = frame->this;
conf = this->private;
- state = CALL_STATE (frame);
+ GF_VALIDATE_OR_GOTO ("server", conf, out);
if (!conf->trace)
- return;
+ goto out;
+
+ state = CALL_STATE (frame);
memset (resolve_vars, '\0', 256);
memset (resolve2_vars, '\0', 256);
@@ -1165,31 +831,33 @@ server_print_request (call_frame_t *frame)
switch (frame->root->type) {
case GF_OP_TYPE_FOP:
- op = gf_fop_list[frame->root->op];
- break;
- case GF_OP_TYPE_MGMT:
- op = gf_mgmt_list[frame->root->op];
+ op = (char *)gf_fop_list[frame->root->op];
break;
default:
op = "";
break;
}
- gf_log (this->name, GF_LOG_NORMAL,
- "%s%s%s%s%s%s%s",
- op, caller,
+ gf_msg (this->name, GF_LOG_INFO, 0, PS_MSG_SERVER_MSG,
+ "%s%s%s%s%s%s%s", op, caller,
resolve_vars, loc_vars, resolve2_vars, loc2_vars, other_vars);
+out:
+ return;
}
+
int
serialize_rsp_direntp (gf_dirent_t *entries, gfs3_readdirp_rsp *rsp)
{
- gf_dirent_t *entry = NULL;
- gfs3_dirplist *trav = NULL;
- gfs3_dirplist *prev = NULL;
- int ret = -1;
+ gf_dirent_t *entry = NULL;
+ gfs3_dirplist *trav = NULL;
+ gfs3_dirplist *prev = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("server", entries, out);
+ GF_VALIDATE_OR_GOTO ("server", rsp, out);
- list_for_each_entry (entry, &entries->list, list) {
+ list_for_each_entry (entry, &entries->list, list) {
trav = GF_CALLOC (1, sizeof (*trav), gf_server_mt_dirent_rsp_t);
if (!trav)
goto out;
@@ -1198,21 +866,54 @@ serialize_rsp_direntp (gf_dirent_t *entries, gfs3_readdirp_rsp *rsp)
trav->d_off = entry->d_off;
trav->d_len = entry->d_len;
trav->d_type = entry->d_type;
- //trav->name = memdup (entry->d_name, entry->d_len + 1);
trav->name = entry->d_name;
gf_stat_from_iatt (&trav->stat, &entry->d_stat);
+ /* if 'dict' is present, pack it */
+ if (entry->dict) {
+ trav->dict.dict_len = dict_serialized_length (entry->dict);
+ if (trav->dict.dict_len > UINT_MAX) {
+ gf_msg (THIS->name, GF_LOG_ERROR, EINVAL,
+ PS_MSG_INVALID_ENTRY, "failed to get "
+ "serialized length of reply dict");
+ errno = EINVAL;
+ trav->dict.dict_len = 0;
+ goto out;
+ }
+
+ trav->dict.dict_val = GF_CALLOC (1, trav->dict.dict_len,
+ gf_server_mt_rsp_buf_t);
+ if (!trav->dict.dict_val) {
+ errno = ENOMEM;
+ trav->dict.dict_len = 0;
+ goto out;
+ }
+
+ ret = dict_serialize (entry->dict, trav->dict.dict_val);
+ if (ret < 0) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ PS_MSG_DICT_SERIALIZE_FAIL,
+ "failed to serialize reply dict");
+ errno = -ret;
+ trav->dict.dict_len = 0;
+ goto out;
+ }
+ }
+
if (prev)
prev->nextentry = trav;
else
rsp->reply = trav;
prev = trav;
- }
+ trav = NULL;
+ }
ret = 0;
out:
+ GF_FREE (trav);
+
return ret;
}
@@ -1220,12 +921,15 @@ out:
int
serialize_rsp_dirent (gf_dirent_t *entries, gfs3_readdir_rsp *rsp)
{
- gf_dirent_t *entry = NULL;
- gfs3_dirlist *trav = NULL;
- gfs3_dirlist *prev = NULL;
- int ret = -1;
+ gf_dirent_t *entry = NULL;
+ gfs3_dirlist *trav = NULL;
+ gfs3_dirlist *prev = NULL;
+ int ret = -1;
- list_for_each_entry (entry, &entries->list, list) {
+ GF_VALIDATE_OR_GOTO ("server", entries, out);
+ GF_VALIDATE_OR_GOTO ("server", rsp, out);
+
+ list_for_each_entry (entry, &entries->list, list) {
trav = GF_CALLOC (1, sizeof (*trav), gf_server_mt_dirent_rsp_t);
if (!trav)
goto out;
@@ -1240,18 +944,19 @@ serialize_rsp_dirent (gf_dirent_t *entries, gfs3_readdir_rsp *rsp)
rsp->reply = trav;
prev = trav;
- }
+ }
ret = 0;
out:
return ret;
}
+
int
readdir_rsp_cleanup (gfs3_readdir_rsp *rsp)
{
- gfs3_dirlist *prev = NULL;
- gfs3_dirlist *trav = NULL;
+ gfs3_dirlist *prev = NULL;
+ gfs3_dirlist *trav = NULL;
trav = rsp->reply;
prev = trav;
@@ -1264,6 +969,7 @@ readdir_rsp_cleanup (gfs3_readdir_rsp *rsp)
return 0;
}
+
int
readdirp_rsp_cleanup (gfs3_readdirp_rsp *rsp)
{
@@ -1274,9 +980,2811 @@ readdirp_rsp_cleanup (gfs3_readdirp_rsp *rsp)
prev = trav;
while (trav) {
trav = trav->nextentry;
+ GF_FREE (prev->dict.dict_val);
+ GF_FREE (prev);
+ prev = trav;
+ }
+
+ return 0;
+}
+
+int
+serialize_rsp_locklist (lock_migration_info_t *locklist,
+ gfs3_getactivelk_rsp *rsp)
+{
+ lock_migration_info_t *tmp = NULL;
+ gfs3_locklist *trav = NULL;
+ gfs3_locklist *prev = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("server", locklist, out);
+ GF_VALIDATE_OR_GOTO ("server", rsp, out);
+
+ list_for_each_entry (tmp, &locklist->list, list) {
+ trav = GF_CALLOC (1, sizeof (*trav), gf_server_mt_lock_mig_t);
+ if (!trav)
+ goto out;
+
+ switch (tmp->flock.l_type) {
+ case F_RDLCK:
+ tmp->flock.l_type = GF_LK_F_RDLCK;
+ break;
+ case F_WRLCK:
+ tmp->flock.l_type = GF_LK_F_WRLCK;
+ break;
+ case F_UNLCK:
+ tmp->flock.l_type = GF_LK_F_UNLCK;
+ break;
+
+ default:
+ gf_msg (THIS->name, GF_LOG_ERROR, 0, PS_MSG_LOCK_ERROR,
+ "Unknown lock type: %"PRId32"!",
+ tmp->flock.l_type);
+ break;
+ }
+
+ gf_proto_flock_from_flock (&trav->flock, &tmp->flock);
+
+ trav->lk_flags = tmp->lk_flags;
+
+ trav->client_uid = tmp->client_uid;
+
+ if (prev)
+ prev->nextentry = trav;
+ else
+ rsp->reply = trav;
+
+ prev = trav;
+ trav = NULL;
+ }
+
+ ret = 0;
+out:
+ GF_FREE (trav);
+ return ret;
+}
+
+int
+getactivelkinfo_rsp_cleanup (gfs3_getactivelk_rsp *rsp)
+{
+ gfs3_locklist *prev = NULL;
+ gfs3_locklist *trav = NULL;
+
+ trav = rsp->reply;
+ prev = trav;
+
+ while (trav) {
+ trav = trav->nextentry;
GF_FREE (prev);
prev = trav;
}
return 0;
}
+
+int
+gf_server_check_getxattr_cmd (call_frame_t *frame, const char *key)
+{
+
+ server_conf_t *conf = NULL;
+ rpc_transport_t *xprt = NULL;
+
+ conf = frame->this->private;
+ if (!conf)
+ return 0;
+
+ if (fnmatch ("*list*mount*point*", key, 0) == 0) {
+ /* list all the client protocol connecting to this process */
+ pthread_mutex_lock (&conf->mutex);
+ {
+ list_for_each_entry (xprt, &conf->xprt_list, list) {
+ gf_msg ("mount-point-list", GF_LOG_INFO, 0,
+ PS_MSG_MOUNT_PT_FAIL,
+ "%s", xprt->peerinfo.identifier);
+ }
+ }
+ pthread_mutex_unlock (&conf->mutex);
+ }
+
+ /* Add more options/keys here */
+
+ return 0;
+}
+
+
+int
+gf_server_check_setxattr_cmd (call_frame_t *frame, dict_t *dict)
+{
+
+ server_conf_t *conf = NULL;
+ rpc_transport_t *xprt = NULL;
+ uint64_t total_read = 0;
+ uint64_t total_write = 0;
+
+ conf = frame->this->private;
+ if (!conf || !dict)
+ return 0;
+
+ if (dict_foreach_fnmatch (dict, "*io*stat*dump",
+ dict_null_foreach_fn, NULL ) > 0) {
+ list_for_each_entry (xprt, &conf->xprt_list, list) {
+ total_read += xprt->total_bytes_read;
+ total_write += xprt->total_bytes_write;
+ }
+ gf_msg ("stats", GF_LOG_INFO, 0, PS_MSG_RW_STAT,
+ "total-read %"PRIu64", total-write %"PRIu64,
+ total_read, total_write);
+ }
+
+ return 0;
+}
+
+
+gf_boolean_t
+server_cancel_grace_timer (xlator_t *this, client_t *client)
+{
+ server_ctx_t *serv_ctx = NULL;
+ gf_timer_t *timer = NULL;
+ gf_boolean_t cancelled = _gf_false;
+
+ if (!this || !client) {
+ gf_msg (THIS->name, GF_LOG_ERROR, EINVAL, PS_MSG_INVALID_ENTRY,
+ "Invalid arguments to cancel connection timer");
+ return cancelled;
+ }
+
+ serv_ctx = server_ctx_get (client, client->this);
+
+ if (serv_ctx == NULL) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ PS_MSG_SERVER_CTX_GET_FAILED,
+ "server_ctx_get() failed");
+ goto out;
+ }
+
+ LOCK (&serv_ctx->fdtable_lock);
+ {
+ if (serv_ctx->grace_timer) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ PS_MSG_GRACE_TIMER_CANCELLED,
+ "Cancelling the grace timer");
+ timer = serv_ctx->grace_timer;
+ serv_ctx->grace_timer = NULL;
+ }
+ }
+ UNLOCK (&serv_ctx->fdtable_lock);
+
+ if (timer) {
+ gf_timer_call_cancel (this->ctx, timer);
+ cancelled = _gf_true;
+ }
+out:
+ return cancelled;
+}
+
+server_ctx_t*
+server_ctx_get (client_t *client, xlator_t *xlator)
+{
+ void *tmp = NULL;
+ server_ctx_t *ctx = NULL;
+
+ client_ctx_get (client, xlator, &tmp);
+
+ ctx = tmp;
+
+ if (ctx != NULL)
+ goto out;
+
+ ctx = GF_CALLOC (1, sizeof (server_ctx_t), gf_server_mt_server_conf_t);
+
+ if (ctx == NULL)
+ goto out;
+
+ /* ctx->lk_version = 0; redundant */
+ ctx->fdtable = gf_fd_fdtable_alloc ();
+
+ if (ctx->fdtable == NULL) {
+ GF_FREE (ctx);
+ ctx = NULL;
+ goto out;
+ }
+
+ LOCK_INIT (&ctx->fdtable_lock);
+
+ if (client_ctx_set (client, xlator, ctx) != 0) {
+ LOCK_DESTROY (&ctx->fdtable_lock);
+ GF_FREE (ctx->fdtable);
+ GF_FREE (ctx);
+ ctx = NULL;
+ }
+
+out:
+ return ctx;
+}
+
+int
+auth_set_username_passwd (dict_t *input_params, dict_t *config_params,
+ client_t *client)
+{
+ int ret = 0;
+ data_t *allow_user = NULL;
+ data_t *passwd_data = NULL;
+ char *username = NULL;
+ char *password = NULL;
+ char *brick_name = NULL;
+ char *searchstr = NULL;
+ char *username_str = NULL;
+ char *tmp = NULL;
+ char *username_cpy = NULL;
+
+ ret = dict_get_str (input_params, "username", &username);
+ if (ret) {
+ gf_msg_debug ("auth/login", 0, "username not found, returning "
+ "DONT-CARE");
+ /* For non trusted clients username and password
+ will not be there. So dont reject the client.
+ */
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_get_str (input_params, "password", &password);
+ if (ret) {
+ gf_msg ("auth/login", GF_LOG_WARNING, 0,
+ PS_MSG_DICT_GET_FAILED,
+ "password not found, returning DONT-CARE");
+ goto out;
+ }
+
+ ret = dict_get_str (input_params, "remote-subvolume", &brick_name);
+ if (ret) {
+ gf_msg ("auth/login", GF_LOG_ERROR, 0, PS_MSG_DICT_GET_FAILED,
+ "remote-subvolume not specified");
+ ret = -1;
+ goto out;
+ }
+
+ ret = gf_asprintf (&searchstr, "auth.login.%s.allow", brick_name);
+ if (-1 == ret) {
+ ret = 0;
+ goto out;
+ }
+
+ allow_user = dict_get (config_params, searchstr);
+ GF_FREE (searchstr);
+
+ if (allow_user) {
+ username_cpy = gf_strdup (allow_user->data);
+ if (!username_cpy)
+ goto out;
+
+ username_str = strtok_r (username_cpy, " ,", &tmp);
+
+ while (username_str) {
+ if (!fnmatch (username_str, username, 0)) {
+ ret = gf_asprintf (&searchstr,
+ "auth.login.%s.password",
+ username);
+ if (-1 == ret)
+ goto out;
+
+ passwd_data = dict_get (config_params,
+ searchstr);
+ GF_FREE (searchstr);
+
+ if (!passwd_data) {
+ gf_msg ("auth/login", GF_LOG_ERROR, 0,
+ PS_MSG_LOGIN_ERROR, "wrong "
+ "username/password "
+ "combination");
+ ret = -1;
+ goto out;
+ }
+
+ ret = !((strcmp (data_to_str (passwd_data),
+ password))?0: -1);
+ if (!ret) {
+ client->auth.username =
+ gf_strdup (username);
+ client->auth.passwd =
+ gf_strdup (password);
+ }
+ if (ret == -1)
+ gf_msg ("auth/login", GF_LOG_ERROR, 0,
+ PS_MSG_LOGIN_ERROR, "wrong "
+ "password for user %s",
+ username);
+ break;
+ }
+ username_str = strtok_r (NULL, " ,", &tmp);
+ }
+ }
+
+out:
+ GF_FREE (username_cpy);
+
+ return ret;
+}
+
+inode_t *
+server_inode_new (inode_table_t *itable, uuid_t gfid) {
+ if (__is_root_gfid (gfid))
+ return itable->root;
+ else
+ return inode_new (itable);
+}
+
+int
+unserialize_req_locklist (gfs3_setactivelk_req *req,
+ lock_migration_info_t *lmi)
+{
+ struct gfs3_locklist *trav = NULL;
+ lock_migration_info_t *temp = NULL;
+ char *buf = NULL;
+ int entry_len = 0;
+ int ret = -1;
+
+ trav = req->request;
+
+ INIT_LIST_HEAD (&lmi->list);
+
+ while (trav) {
+ temp = GF_CALLOC (1, sizeof (*lmi), gf_common_mt_lock_mig);
+ if (temp == NULL) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0, 0, "No memory");
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&temp->list);
+
+ gf_proto_flock_to_flock (&trav->flock, &temp->flock);
+
+ temp->lk_flags = trav->lk_flags;
+
+ temp->client_uid = gf_strdup (trav->client_uid);
+
+ list_add_tail (&temp->list, &lmi->list);
+
+ trav = trav->nextentry;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+server_populate_compound_request (gfs3_compound_req *req, call_frame_t *frame,
+ default_args_t *this_args,
+ int index)
+{
+ int op_errno = 0;
+ int ret = -1;
+ struct iovec req_iovec[MAX_IOVEC] = { {0,} };
+ compound_req *this_req = NULL;
+ server_state_t *state = CALL_STATE (frame);
+
+ this_req = &req->compound_req_array.compound_req_array_val[index];
+
+ switch (this_req->fop_enum) {
+ case GF_FOP_STAT:
+ {
+ gfs3_stat_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_stat_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+ args_stat_store (this_args, &state->loc, this_args->xdata);
+ break;
+ }
+ case GF_FOP_READLINK:
+ {
+ gfs3_readlink_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_readlink_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+ args_readlink_store (this_args, &state->loc,
+ args->size, this_args->xdata);
+ break;
+ }
+ case GF_FOP_MKNOD:
+ {
+ gfs3_mknod_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_mknod_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+ args_mknod_store (this_args, &state->loc, args->mode,
+ args->dev, args->umask,
+ this_args->xdata);
+ break;
+ }
+ case GF_FOP_MKDIR:
+ {
+ gfs3_mkdir_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_mkdir_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+ args_mkdir_store (this_args, &state->loc, args->mode,
+ args->umask, this_args->xdata);
+ break;
+ }
+ case GF_FOP_UNLINK:
+ {
+ gfs3_unlink_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_unlink_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+ args_unlink_store (this_args, &state->loc,
+ args->xflags, this_args->xdata);
+ break;
+ }
+ case GF_FOP_RMDIR:
+ {
+ gfs3_rmdir_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_rmdir_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+ args_rmdir_store (this_args, &state->loc,
+ args->xflags, this_args->xdata);
+ break;
+ }
+ case GF_FOP_SYMLINK:
+ {
+ gfs3_symlink_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_symlink_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+ args_symlink_store (this_args, args->linkname,
+ &state->loc,
+ args->umask, this_args->xdata);
+
+ this_args->loc.inode = inode_new (state->itable);
+
+ break;
+ }
+ case GF_FOP_RENAME:
+ {
+ gfs3_rename_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_rename_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+
+ args_rename_store (this_args, &state->loc,
+ &state->loc2, this_args->xdata);
+ break;
+ }
+ case GF_FOP_LINK:
+ {
+ gfs3_link_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_link_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+ args_link_store (this_args, &state->loc,
+ &state->loc2, this_args->xdata);
+
+ this_args->loc2.inode = inode_ref (this_args->loc.inode);
+
+ break;
+ }
+ case GF_FOP_TRUNCATE:
+ {
+ gfs3_truncate_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_truncate_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+ args_truncate_store (this_args, &state->loc,
+ args->offset, this_args->xdata);
+ break;
+ }
+ case GF_FOP_OPEN:
+ {
+ gfs3_open_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_open_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+ args_open_store (this_args, &state->loc,
+ args->flags, state->fd, this_args->xdata);
+
+ this_args->fd = fd_create (this_args->loc.inode,
+ frame->root->pid);
+ this_args->fd->flags = this_args->flags;
+
+ break;
+ }
+ case GF_FOP_READ:
+ {
+ gfs3_read_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_read_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+ args_readv_store (this_args, state->fd, args->size,
+ args->offset, args->flag,
+ this_args->xdata);
+ break;
+ }
+ case GF_FOP_WRITE:
+ {
+ gfs3_write_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_write_req;
+
+ /*TODO : What happens when payload count is more than one? */
+ req_iovec[0].iov_base = state->payload_vector[0].iov_base +
+ state->write_length;
+ req_iovec[0].iov_len = args->size;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+ args_writev_store (this_args, state->fd,
+ req_iovec,
+ args->size, args->offset,
+ args->flag,
+ this_args->iobref, this_args->xdata);
+ state->write_length += args->size;
+ break;
+ }
+ case GF_FOP_STATFS:
+ {
+ gfs3_statfs_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_statfs_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+ args_statfs_store (this_args, &state->loc,
+ this_args->xdata);
+ break;
+ }
+ case GF_FOP_FLUSH:
+ {
+ gfs3_flush_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_flush_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+ args_flush_store (this_args, state->fd, this_args->xdata);
+ break;
+ }
+ case GF_FOP_FSYNC:
+ {
+ gfs3_fsync_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_fsync_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+ args_fsync_store (this_args, state->fd,
+ args->data, this_args->xdata);
+ break;
+ }
+ case GF_FOP_SETXATTR:
+ {
+ gfs3_setxattr_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_setxattr_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xattr,
+ args->dict.dict_val,
+ args->dict.dict_len, ret,
+ op_errno, out);
+ args_setxattr_store (this_args, &state->loc,
+ this_args->xattr, args->flags,
+ this_args->xdata);
+ break;
+ }
+ case GF_FOP_GETXATTR:
+ {
+ gfs3_getxattr_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_getxattr_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+ gf_server_check_getxattr_cmd (frame, args->name);
+
+ args_getxattr_store (this_args, &state->loc,
+ args->name, this_args->xdata);
+ break;
+ }
+ case GF_FOP_REMOVEXATTR:
+ {
+ gfs3_removexattr_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_removexattr_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+ args_removexattr_store (this_args, &state->loc,
+ args->name,
+ this_args->xdata);
+ break;
+ }
+ case GF_FOP_OPENDIR:
+ {
+ gfs3_opendir_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_opendir_req;
+
+ this_args->fd = fd_create (this_args->loc.inode,
+ frame->root->pid);
+ if (!this_args->fd) {
+ gf_msg ("server", GF_LOG_ERROR, 0,
+ PS_MSG_FD_CREATE_FAILED,
+ "could not create the fd");
+ goto out;
+ }
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+ args_opendir_store (this_args, &state->loc,
+ state->fd, this_args->xdata);
+ break;
+ }
+ case GF_FOP_FSYNCDIR:
+ {
+ gfs3_fsyncdir_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_fsyncdir_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+ args_fsyncdir_store (this_args, state->fd,
+ args->data, this_args->xdata);
+ break;
+ }
+ case GF_FOP_ACCESS:
+ {
+ gfs3_access_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_access_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+ args_access_store (this_args, &state->loc,
+ args->mask, this_args->xdata);
+ break;
+ }
+ case GF_FOP_CREATE:
+ {
+ gfs3_create_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_create_req;
+
+ state->loc.inode = inode_new (state->itable);
+
+ state->fd = fd_create (state->loc.inode, frame->root->pid);
+ if (!state->fd) {
+ gf_msg ("server", GF_LOG_ERROR, 0,
+ PS_MSG_FD_CREATE_FAILED,
+ "fd creation for the inode %s failed",
+ state->loc.inode ?
+ uuid_utoa (state->loc.inode->gfid):NULL);
+ goto out;
+ }
+ state->fd->flags = state->flags;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+ args_create_store (this_args, &state->loc,
+ args->flags, args->mode,
+ args->umask, state->fd,
+ this_args->xdata);
+ break;
+ }
+ case GF_FOP_FTRUNCATE:
+ {
+ gfs3_ftruncate_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_ftruncate_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+ args_ftruncate_store (this_args, state->fd,
+ args->offset,
+ this_args->xdata);
+ break;
+ }
+ case GF_FOP_FSTAT:
+ {
+ gfs3_fstat_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_fstat_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+ args_fstat_store (this_args, state->fd, this_args->xdata);
+ break;
+ }
+ case GF_FOP_LK:
+ {
+ gfs3_lk_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_lk_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+
+ switch (args->cmd) {
+ case GF_LK_GETLK:
+ this_args->cmd = F_GETLK;
+ break;
+ case GF_LK_SETLK:
+ this_args->cmd = F_SETLK;
+ break;
+ case GF_LK_SETLKW:
+ this_args->cmd = F_SETLKW;
+ break;
+ case GF_LK_RESLK_LCK:
+ this_args->cmd = F_RESLK_LCK;
+ break;
+ case GF_LK_RESLK_LCKW:
+ this_args->cmd = F_RESLK_LCKW;
+ break;
+ case GF_LK_RESLK_UNLCK:
+ this_args->cmd = F_RESLK_UNLCK;
+ break;
+ case GF_LK_GETLK_FD:
+ this_args->cmd = F_GETLK_FD;
+ break;
+ }
+
+ gf_proto_flock_to_flock (&args->flock, &this_args->lock);
+
+ switch (args->type) {
+ case GF_LK_F_RDLCK:
+ this_args->lock.l_type = F_RDLCK;
+ break;
+ case GF_LK_F_WRLCK:
+ this_args->lock.l_type = F_WRLCK;
+ break;
+ case GF_LK_F_UNLCK:
+ this_args->lock.l_type = F_UNLCK;
+ break;
+ default:
+ gf_msg (frame->root->client->bound_xl->name,
+ GF_LOG_ERROR,
+ 0, PS_MSG_LOCK_ERROR, "fd - %"PRId64" (%s):"
+ " Unknown "
+ "lock type: %"PRId32"!", state->resolve.fd_no,
+ uuid_utoa (state->fd->inode->gfid),
+ args->type);
+ break;
+ }
+ args_lk_store (this_args, state->fd, this_args->cmd,
+ &this_args->lock, this_args->xdata);
+ break;
+ }
+ case GF_FOP_LOOKUP:
+ {
+ gfs3_lookup_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_lookup_req;
+
+ if (this_args->loc.inode)
+ this_args->loc.inode = server_inode_new (state->itable,
+ state->loc.gfid);
+ else
+ state->is_revalidate = 1;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+ args_lookup_store (this_args, &state->loc, this_args->xdata);
+ break;
+ }
+ case GF_FOP_READDIR:
+ {
+ gfs3_readdir_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_readdir_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+ args_readdir_store (this_args, state->fd, args->size,
+ args->offset, this_args->xdata);
+ break;
+ }
+ case GF_FOP_INODELK:
+ {
+ gfs3_inodelk_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_inodelk_req;
+
+ switch (args->cmd) {
+ case GF_LK_GETLK:
+ this_args->cmd = F_GETLK;
+ break;
+ case GF_LK_SETLK:
+ this_args->cmd = F_SETLK;
+ break;
+ case GF_LK_SETLKW:
+ this_args->cmd = F_SETLKW;
+ break;
+ }
+
+ gf_proto_flock_to_flock (&args->flock, &this_args->lock);
+
+ switch (args->type) {
+ case GF_LK_F_RDLCK:
+ this_args->lock.l_type = F_RDLCK;
+ break;
+ case GF_LK_F_WRLCK:
+ this_args->lock.l_type = F_WRLCK;
+ break;
+ case GF_LK_F_UNLCK:
+ this_args->lock.l_type = F_UNLCK;
+ break;
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+ args_inodelk_store (this_args, args->volume, &state->loc,
+ this_args->cmd,
+ &this_args->lock, this_args->xdata);
+ break;
+ }
+ case GF_FOP_FINODELK:
+ {
+ gfs3_finodelk_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_finodelk_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+
+ switch (args->cmd) {
+ case GF_LK_GETLK:
+ this_args->cmd = F_GETLK;
+ break;
+ case GF_LK_SETLK:
+ this_args->cmd = F_SETLK;
+ break;
+ case GF_LK_SETLKW:
+ this_args->cmd = F_SETLKW;
+ break;
+ }
+
+ gf_proto_flock_to_flock (&args->flock, &this_args->lock);
+
+ switch (args->type) {
+ case GF_LK_F_RDLCK:
+ this_args->lock.l_type = F_RDLCK;
+ break;
+ case GF_LK_F_WRLCK:
+ this_args->lock.l_type = F_WRLCK;
+ break;
+ case GF_LK_F_UNLCK:
+ this_args->lock.l_type = F_UNLCK;
+ break;
+ }
+ args_finodelk_store (this_args, args->volume, state->fd,
+ this_args->cmd,
+ &this_args->lock, this_args->xdata);
+ break;
+ }
+ case GF_FOP_ENTRYLK:
+ {
+ gfs3_entrylk_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_entrylk_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+ args_entrylk_store (this_args, args->volume, &state->loc,
+ args->name, args->cmd, args->type,
+ this_args->xdata);
+ break;
+ }
+ case GF_FOP_FENTRYLK:
+ {
+ gfs3_fentrylk_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_fentrylk_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+ args_fentrylk_store (this_args, args->volume, state->fd,
+ args->name, args->cmd, args->type,
+ this_args->xdata);
+ break;
+ }
+ case GF_FOP_XATTROP:
+ {
+ gfs3_xattrop_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_xattrop_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xattr,
+ (args->dict.dict_val),
+ (args->dict.dict_len), ret,
+ op_errno, out);
+ args_xattrop_store (this_args, &state->loc, args->flags,
+ this_args->xattr, this_args->xdata);
+ break;
+ }
+ case GF_FOP_FXATTROP:
+ {
+ gfs3_fxattrop_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_fxattrop_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xattr,
+ (args->dict.dict_val),
+ (args->dict.dict_len), ret,
+ op_errno, out);
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+
+ args_fxattrop_store (this_args, state->fd, args->flags,
+ this_args->xattr, this_args->xdata);
+ break;
+ }
+ case GF_FOP_FGETXATTR:
+ {
+ gfs3_fgetxattr_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_fgetxattr_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+
+ args_fgetxattr_store (this_args, state->fd,
+ args->name, this_args->xdata);
+ break;
+ }
+ case GF_FOP_FSETXATTR:
+ {
+ gfs3_fsetxattr_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_fsetxattr_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xattr,
+ (args->dict.dict_val),
+ (args->dict.dict_len), ret,
+ op_errno, out);
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+
+ args_fsetxattr_store (this_args, state->fd, this_args->xattr,
+ args->flags, this_args->xdata);
+ break;
+ }
+ case GF_FOP_RCHECKSUM:
+ {
+ gfs3_rchecksum_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_rchecksum_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+
+ args_rchecksum_store (this_args, state->fd, args->offset,
+ args->len, this_args->xdata);
+ break;
+ }
+ case GF_FOP_SETATTR:
+ {
+ gfs3_setattr_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_setattr_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+
+ gf_stat_to_iatt (&args->stbuf, &this_args->stat);
+
+ args_setattr_store (this_args, &state->loc, &this_args->stat,
+ args->valid, this_args->xdata);
+ break;
+ }
+ case GF_FOP_FSETATTR:
+ {
+ gfs3_fsetattr_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_fsetattr_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+
+ gf_stat_to_iatt (&args->stbuf, &this_args->stat);
+
+ args_fsetattr_store (this_args, state->fd, &this_args->stat,
+ args->valid, this_args->xdata);
+ break;
+ }
+ case GF_FOP_READDIRP:
+ {
+ gfs3_readdirp_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_readdirp_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xattr,
+ (args->dict.dict_val),
+ (args->dict.dict_len), ret,
+ op_errno, out);
+
+ args_readdirp_store (this_args, state->fd, args->size,
+ args->offset, this_args->xattr);
+ break;
+ }
+ case GF_FOP_FREMOVEXATTR:
+ {
+ gfs3_fremovexattr_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_fremovexattr_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+
+ args_fremovexattr_store (this_args, state->fd, args->name,
+ this_args->xdata);
+ break;
+ }
+ case GF_FOP_FALLOCATE:
+ {
+ gfs3_fallocate_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_fallocate_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+ args_fallocate_store (this_args, state->fd, args->flags,
+ args->offset, args->size,
+ this_args->xdata);
+ break;
+ }
+ case GF_FOP_DISCARD:
+ {
+ gfs3_discard_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_discard_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+
+ args_discard_store (this_args, state->fd, args->offset,
+ args->size, this_args->xdata);
+ break;
+ }
+ case GF_FOP_ZEROFILL:
+ {
+ gfs3_zerofill_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_zerofill_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+ args_zerofill_store (this_args, state->fd, args->offset,
+ args->size, this_args->xdata);
+ break;
+ }
+ case GF_FOP_SEEK:
+ {
+ gfs3_seek_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_seek_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+ args_seek_store (this_args, state->fd, args->offset,
+ args->what, this_args->xdata);
+ break;
+ }
+ case GF_FOP_LEASE:
+ {
+ gfs3_lease_req *args = NULL;
+
+ args = &this_req->compound_req_u.compound_lease_req;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ this_args->xdata,
+ args->xdata.xdata_val,
+ args->xdata.xdata_len, ret,
+ op_errno, out);
+
+ gf_proto_lease_to_lease (&args->lease, &state->lease);
+
+ args_lease_store (this_args, &state->loc, &state->lease,
+ this_args->xdata);
+ break;
+ }
+ default:
+ return ENOTSUP;
+ }
+out:
+ return op_errno;
+}
+
+int
+server_populate_compound_response (xlator_t *this, gfs3_compound_rsp *rsp,
+ call_frame_t *frame,
+ compound_args_cbk_t *args_cbk, int index)
+{
+ int op_errno = ENOMEM;
+ int op_ret = -1;
+ default_args_cbk_t *this_args_cbk = NULL;
+ compound_rsp *this_rsp = NULL;
+ server_state_t *state = NULL;
+ int ret = 0;
+
+ state = CALL_STATE (frame);
+ rsp->compound_rsp_array.compound_rsp_array_val = GF_CALLOC
+ (args_cbk->fop_length,
+ sizeof (compound_rsp),
+ gf_server_mt_compound_rsp_t);
+
+ rsp->compound_rsp_array.compound_rsp_array_len = args_cbk->fop_length;
+
+ this_rsp = &rsp->compound_rsp_array.compound_rsp_array_val[index];
+
+ this_args_cbk = &args_cbk->rsp_list[index];
+ switch (this_rsp->fop_enum) {
+ case GF_FOP_STAT:
+ {
+ gfs3_stat_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_stat_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+ if (!this_args_cbk->op_ret) {
+ server_post_stat (rsp_args,
+ &this_args_cbk->stat);
+ }
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_READLINK:
+ {
+ gfs3_readlink_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_readlink_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+ if (this_args_cbk->op_ret >= 0) {
+ server_post_readlink (rsp_args, &this_args_cbk->stat,
+ this_args_cbk->buf);
+ }
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_MKNOD:
+ {
+ gfs3_mknod_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_mknod_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+ if (!this_args_cbk->op_ret) {
+ server_post_mknod (state, rsp_args,
+ &this_args_cbk->stat,
+ &this_args_cbk->preparent,
+ &this_args_cbk->postparent,
+ this_args_cbk->inode);
+ }
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_MKDIR:
+ {
+ gfs3_mkdir_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_mkdir_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+
+ if (!this_args_cbk->op_ret) {
+ server_post_mkdir (state, rsp_args,
+ this_args_cbk->inode,
+ &this_args_cbk->stat,
+ &this_args_cbk->preparent,
+ &this_args_cbk->postparent,
+ this_args_cbk->xdata);
+ }
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_UNLINK:
+ {
+ gfs3_unlink_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_unlink_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+ if (!this_args_cbk->op_ret) {
+ server_post_unlink (state, rsp_args,
+ &this_args_cbk->preparent,
+ &this_args_cbk->postparent);
+ }
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_RMDIR:
+ {
+ gfs3_rmdir_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_rmdir_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+ if (!this_args_cbk->op_ret) {
+ server_post_rmdir (state, rsp_args,
+ &this_args_cbk->preparent,
+ &this_args_cbk->postparent);
+ }
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_SYMLINK:
+ {
+ gfs3_symlink_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_symlink_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+
+ if (!this_args_cbk->op_ret) {
+ server_post_symlink (state, rsp_args,
+ this_args_cbk->inode,
+ &this_args_cbk->stat,
+ &this_args_cbk->preparent,
+ &this_args_cbk->postparent,
+ this_args_cbk->xdata);
+ }
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_RENAME:
+ {
+ gfs3_rename_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_rename_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+
+ if (!this_args_cbk->op_ret) {
+ server_post_rename (frame, state, rsp_args,
+ &this_args_cbk->stat,
+ &this_args_cbk->preparent,
+ &this_args_cbk->postparent,
+ &this_args_cbk->preparent2,
+ &this_args_cbk->postparent2);
+ }
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_LINK:
+ {
+ gfs3_link_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_link_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+
+ if (!this_args_cbk->op_ret) {
+ server_post_link (state, rsp_args,
+ this_args_cbk->inode,
+ &this_args_cbk->stat,
+ &this_args_cbk->preparent,
+ &this_args_cbk->postparent,
+ this_args_cbk->xdata);
+ }
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_TRUNCATE:
+ {
+ gfs3_truncate_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_truncate_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+
+ if (!this_args_cbk->op_ret) {
+ server_post_truncate (rsp_args,
+ &this_args_cbk->prestat,
+ &this_args_cbk->poststat);
+ }
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_OPEN:
+ {
+ gfs3_open_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_open_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+
+ if (!this_args_cbk->op_ret) {
+ server_post_open (frame, this, rsp_args,
+ this_args_cbk->fd);
+
+ }
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_READ:
+ {
+ gfs3_read_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_read_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+
+ if (this_args_cbk->op_ret >= 0) {
+ server_post_readv (rsp_args, &this_args_cbk->stat,
+ this_args_cbk->op_ret);
+
+ if (!state->rsp_iobref) {
+ state->rsp_iobref = this_args_cbk->iobref;
+ state->rsp_count = 0;
+ }
+ iobref_merge (state->rsp_iobref,
+ this_args_cbk->iobref);
+ memcpy (&state->rsp_vector[state->rsp_count],
+ this_args_cbk->vector,
+ (this_args_cbk->count *
+ sizeof(state->rsp_vector[0])));
+ state->rsp_count += this_args_cbk->count;
+ }
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_WRITE:
+ {
+ gfs3_write_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_write_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+
+ if (this_args_cbk->op_ret >= 0) {
+ server_post_writev (rsp_args,
+ &this_args_cbk->prestat,
+ &this_args_cbk->poststat);
+ }
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_STATFS:
+ {
+ gfs3_statfs_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_statfs_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+ if (!this_args_cbk->op_ret) {
+ server_post_statfs (rsp_args,
+ &this_args_cbk->statvfs);
+ }
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_FLUSH:
+ {
+ gf_common_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_flush_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_FSYNC:
+ {
+ gfs3_fsync_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_fsync_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+
+ if (!this_args_cbk->op_ret) {
+ server_post_fsync (rsp_args,
+ &this_args_cbk->prestat,
+ &this_args_cbk->poststat);
+ }
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_SETXATTR:
+ {
+ gf_common_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_setxattr_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_GETXATTR:
+ {
+ gfs3_getxattr_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_getxattr_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+
+ if (-1 != this_args_cbk->op_ret) {
+ GF_PROTOCOL_DICT_SERIALIZE (this,
+ this_args_cbk->xattr,
+ &rsp_args->dict.dict_val,
+ rsp_args->dict.dict_len,
+ rsp_args->op_errno, out);
+ }
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_REMOVEXATTR:
+ {
+ gf_common_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_removexattr_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_OPENDIR:
+ {
+ gfs3_opendir_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_opendir_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+
+ if (!this_args_cbk->op_ret) {
+ server_post_opendir (frame, this, rsp_args,
+ this_args_cbk->fd);
+
+ }
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_FSYNCDIR:
+ {
+ gf_common_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_fsyncdir_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_ACCESS:
+ {
+ gf_common_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_access_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_CREATE:
+ {
+ gfs3_create_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_create_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+
+ if (!this_args_cbk->op_ret) {
+ rsp_args->op_ret = server_post_create (frame,
+ rsp_args, state, this,
+ this_args_cbk->fd,
+ this_args_cbk->inode,
+ &this_args_cbk->stat,
+ &this_args_cbk->preparent,
+ &this_args_cbk->postparent);
+ if (rsp_args->op_ret) {
+ rsp_args->op_errno = -rsp_args->op_ret;
+ rsp_args->op_ret = -1;
+ }
+ }
+ break;
+ }
+ case GF_FOP_FTRUNCATE:
+ {
+ gfs3_ftruncate_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_ftruncate_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+
+ if (!this_args_cbk->op_ret) {
+ server_post_ftruncate (rsp_args,
+ &this_args_cbk->prestat,
+ &this_args_cbk->poststat);
+ }
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_FSTAT:
+ {
+ gfs3_fstat_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_fstat_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+ if (!this_args_cbk->op_ret) {
+ server_post_fstat (rsp_args,
+ &this_args_cbk->stat);
+ }
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_LK:
+ {
+ gfs3_lk_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_lk_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+
+ if (!this_args_cbk->op_ret) {
+ server_post_lk (this, rsp_args, &this_args_cbk->lock);
+ }
+
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_LOOKUP:
+ {
+ gfs3_lookup_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_lookup_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+
+ if (!this_args_cbk->op_ret) {
+ server_post_lookup (rsp_args, frame, state,
+ this_args_cbk->inode,
+ &this_args_cbk->stat,
+ &this_args_cbk->postparent);
+ }
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_READDIR:
+ {
+ gfs3_readdir_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_readdir_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+
+ if (this_args_cbk->op_ret > 0) {
+ ret = server_post_readdir (rsp_args,
+ &this_args_cbk->entries);
+ if (ret < 0) {
+ rsp_args->op_ret = ret;
+ rsp_args->op_errno = ENOMEM;
+ }
+ }
+ break;
+ }
+ case GF_FOP_INODELK:
+ {
+ gf_common_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_inodelk_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_FINODELK:
+ {
+ gf_common_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_finodelk_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_ENTRYLK:
+ {
+ gf_common_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_entrylk_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_FENTRYLK:
+ {
+ gf_common_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_fentrylk_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_XATTROP:
+ {
+ gfs3_xattrop_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_xattrop_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+
+ if (!this_args_cbk->op_ret) {
+ GF_PROTOCOL_DICT_SERIALIZE (this,
+ this_args_cbk->xattr,
+ &rsp_args->dict.dict_val,
+ rsp_args->dict.dict_len,
+ rsp_args->op_errno, out);
+ }
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_FXATTROP:
+ {
+ gfs3_fxattrop_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_fxattrop_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+
+ if (!this_args_cbk->op_ret) {
+ GF_PROTOCOL_DICT_SERIALIZE (this,
+ this_args_cbk->xattr,
+ &rsp_args->dict.dict_val,
+ rsp_args->dict.dict_len,
+ rsp_args->op_errno, out);
+ }
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_FGETXATTR:
+ {
+ gfs3_fgetxattr_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_fgetxattr_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+
+ if (-1 != this_args_cbk->op_ret) {
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xattr,
+ &rsp_args->dict.dict_val,
+ rsp_args->dict.dict_len,
+ rsp_args->op_errno, out);
+ }
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_FSETXATTR:
+ {
+ gf_common_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_setxattr_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_RCHECKSUM:
+ {
+ gfs3_rchecksum_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_rchecksum_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+
+ if (!this_args_cbk->op_ret) {
+ server_post_rchecksum (rsp_args,
+ this_args_cbk->weak_checksum,
+ this_args_cbk->strong_checksum);
+ }
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_SETATTR:
+ {
+ gfs3_setattr_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_setattr_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+
+ if (!this_args_cbk->op_ret) {
+ server_post_setattr (rsp_args,
+ &this_args_cbk->prestat,
+ &this_args_cbk->poststat);
+ }
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_FSETATTR:
+ {
+ gfs3_fsetattr_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_fsetattr_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+
+ if (!this_args_cbk->op_ret) {
+ server_post_fsetattr (rsp_args, &this_args_cbk->prestat,
+ &this_args_cbk->poststat);
+ }
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_READDIRP:
+ {
+ gfs3_readdirp_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_readdirp_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+
+ if (this_args_cbk->op_ret > 0) {
+ ret = server_post_readdirp (rsp_args,
+ &this_args_cbk->entries);
+ if (ret < 0) {
+ rsp_args->op_ret = ret;
+ rsp_args->op_errno = ENOMEM;
+ goto out;
+ }
+ gf_link_inodes_from_dirent (this, state->fd->inode,
+ &this_args_cbk->entries);
+ }
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_FREMOVEXATTR:
+ {
+ gf_common_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_fremovexattr_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_FALLOCATE:
+ {
+ gfs3_fallocate_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_fallocate_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+
+ if (!this_args_cbk->op_ret) {
+ server_post_fallocate (rsp_args,
+ &this_args_cbk->prestat,
+ &this_args_cbk->poststat);
+ }
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_DISCARD:
+ {
+ gfs3_discard_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_discard_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+
+ if (!this_args_cbk->op_ret) {
+ server_post_discard (rsp_args,
+ &this_args_cbk->prestat,
+ &this_args_cbk->poststat);
+ }
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_ZEROFILL:
+ {
+ gfs3_zerofill_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_zerofill_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+
+ if (!this_args_cbk->op_ret) {
+ server_post_zerofill (rsp_args,
+ &this_args_cbk->prestat,
+ &this_args_cbk->poststat);
+ }
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_SEEK:
+ {
+ gfs3_seek_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_seek_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ case GF_FOP_LEASE:
+ {
+ gfs3_lease_rsp *rsp_args = NULL;
+
+ rsp_args = &this_rsp->compound_rsp_u.compound_lease_rsp;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, this_args_cbk->xdata,
+ &rsp_args->xdata.xdata_val,
+ rsp_args->xdata.xdata_len,
+ rsp_args->op_errno, out);
+
+ if (!this_args_cbk->op_ret) {
+ server_post_lease (rsp_args, &this_args_cbk->lease);
+ }
+
+ rsp_args->op_ret = this_args_cbk->op_ret;
+ rsp_args->op_errno = gf_errno_to_error
+ (this_args_cbk->op_errno);
+ break;
+ }
+ default:
+ return ENOTSUP;
+ }
+out:
+ return op_errno;
+}
+/* This works only when the compound fop acts on one loc/inode/gfid.
+ * If compound fops on more than one inode is required, multiple
+ * resolve and resumes will have to be done. This will have to change.
+ * Right now, multiple unlinks, rmdirs etc is are not supported.
+ * This can be added for future enhancements.
+ */
+int
+server_get_compound_resolve (server_state_t *state, gfs3_compound_req *req)
+{
+ int i = 0;
+ compound_req *array = &req->compound_req_array.compound_req_array_val[i];
+
+ switch (array->fop_enum) {
+ case GF_FOP_STAT:
+ {
+ gfs3_stat_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_stat_req;
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid,
+ this_req.gfid, 16);
+ break;
+ }
+ case GF_FOP_READLINK:
+ {
+ gfs3_readlink_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_readlink_req;
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid,
+ this_req.gfid, 16);
+ break;
+ }
+ case GF_FOP_MKNOD:
+ {
+ gfs3_mknod_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_mknod_req;
+
+ state->resolve.type = RESOLVE_NOT;
+ memcpy (state->resolve.pargfid, this_req.pargfid, 16);
+ state->resolve.bname = gf_strdup
+ (this_req.bname);
+ break;
+ }
+ case GF_FOP_MKDIR:
+ {
+ gfs3_mkdir_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_mkdir_req;
+
+ state->resolve.type = RESOLVE_NOT;
+ memcpy (state->resolve.pargfid, this_req.pargfid, 16);
+ state->resolve.bname = gf_strdup
+ (this_req.bname);
+ break;
+ }
+ case GF_FOP_UNLINK:
+ {
+ gfs3_unlink_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_unlink_req;
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.pargfid, this_req.pargfid, 16);
+ state->resolve.bname = gf_strdup
+ (this_req.bname);
+ break;
+ }
+ case GF_FOP_RMDIR:
+ {
+ gfs3_rmdir_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_rmdir_req;
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.pargfid, this_req.pargfid, 16);
+ state->resolve.bname = gf_strdup
+ (this_req.bname);
+ break;
+ }
+ case GF_FOP_SYMLINK:
+ {
+ gfs3_symlink_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_symlink_req;
+
+ state->resolve.type = RESOLVE_NOT;
+ memcpy (state->resolve.pargfid, this_req.pargfid, 16);
+ state->resolve.bname = gf_strdup
+ (this_req.bname);
+ break;
+ }
+ case GF_FOP_RENAME:
+ {
+ gfs3_rename_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_rename_req;
+
+ state->resolve.type = RESOLVE_MUST;
+ state->resolve.bname = gf_strdup
+ (this_req.oldbname);
+ memcpy (state->resolve.pargfid, this_req.oldgfid, 16);
+
+ state->resolve2.type = RESOLVE_MAY;
+ state->resolve2.bname = gf_strdup
+ (this_req.newbname);
+ memcpy (state->resolve2.pargfid, this_req.newgfid, 16);
+ break;
+ }
+ case GF_FOP_LINK:
+ {
+ gfs3_link_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_link_req;
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid, this_req.oldgfid, 16);
+
+ state->resolve2.type = RESOLVE_NOT;
+ state->resolve2.bname = gf_strdup
+ (this_req.newbname);
+ memcpy (state->resolve2.pargfid, this_req.newgfid, 16);
+ break;
+ }
+ case GF_FOP_TRUNCATE:
+ {
+ gfs3_truncate_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_truncate_req;
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid,
+ this_req.gfid, 16);
+ break;
+ }
+ case GF_FOP_OPEN:
+ {
+ gfs3_open_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_open_req;
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid,
+ this_req.gfid, 16);
+ break;
+ }
+ case GF_FOP_READ:
+ {
+ gfs3_read_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_read_req;
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid,
+ this_req.gfid, 16);
+ break;
+ }
+ case GF_FOP_WRITE:
+ {
+ gfs3_write_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_write_req;
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid,
+ this_req.gfid, 16);
+ break;
+ }
+ case GF_FOP_STATFS:
+ {
+ gfs3_statfs_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_statfs_req;
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid,
+ this_req.gfid, 16);
+ break;
+ }
+ case GF_FOP_FLUSH:
+ {
+ gfs3_flush_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_flush_req;
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid,
+ this_req.gfid, 16);
+ state->resolve.fd_no = this_req.fd;
+ break;
+ }
+ case GF_FOP_FSYNC:
+ {
+ gfs3_fsync_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_fsync_req;
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid,
+ this_req.gfid, 16);
+ state->resolve.fd_no = this_req.fd;
+ break;
+ }
+ case GF_FOP_SETXATTR:
+ {
+ gfs3_setxattr_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_setxattr_req;
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid,
+ this_req.gfid, 16);
+ break;
+ }
+ case GF_FOP_GETXATTR:
+ {
+ gfs3_getxattr_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_getxattr_req;
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid,
+ this_req.gfid, 16);
+ break;
+ }
+ case GF_FOP_REMOVEXATTR:
+ {
+ gfs3_removexattr_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_removexattr_req;
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid,
+ this_req.gfid, 16);
+ break;
+ }
+ case GF_FOP_OPENDIR:
+ {
+ gfs3_opendir_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_opendir_req;
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid,
+ this_req.gfid, 16);
+ break;
+ }
+ case GF_FOP_FSYNCDIR:
+ {
+ gfs3_fsyncdir_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_fsyncdir_req;
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid,
+ this_req.gfid, 16);
+ state->resolve.fd_no = this_req.fd;
+ break;
+ }
+ case GF_FOP_ACCESS:
+ {
+ gfs3_access_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_access_req;
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid,
+ this_req.gfid, 16);
+ break;
+ }
+ case GF_FOP_CREATE:
+ {
+ gfs3_create_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_create_req;
+
+ state->flags = gf_flags_to_flags (this_req.flags);
+ if (state->flags & O_EXCL) {
+ state->resolve.type = RESOLVE_NOT;
+ } else {
+ state->resolve.type = RESOLVE_DONTCARE;
+ }
+
+ memcpy (state->resolve.pargfid, this_req.pargfid, 16);
+ state->resolve.bname = gf_strdup
+ (this_req.bname);
+ break;
+ }
+ case GF_FOP_FTRUNCATE:
+ {
+ gfs3_ftruncate_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_ftruncate_req;
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid,
+ this_req.gfid, 16);
+ state->resolve.fd_no = this_req.fd;
+ break;
+ }
+ case GF_FOP_FSTAT:
+ {
+ gfs3_fstat_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_fstat_req;
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid,
+ this_req.gfid, 16);
+ state->resolve.fd_no = this_req.fd;
+ break;
+ }
+ case GF_FOP_LK:
+ {
+ gfs3_lk_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_lk_req;
+
+ memcpy (state->resolve.gfid,
+ this_req.gfid, 16);
+ state->resolve.fd_no = this_req.fd;
+ break;
+ }
+ case GF_FOP_LOOKUP:
+ {
+ gfs3_lookup_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_lookup_req;
+ state->resolve.type = RESOLVE_DONTCARE;
+
+ if (this_req.bname && strcmp (this_req.bname, "")) {
+ memcpy (state->resolve.pargfid, this_req.pargfid, 16);
+ state->resolve.bname = gf_strdup
+ (this_req.bname);
+ } else {
+ memcpy (state->resolve.gfid, this_req.gfid, 16);
+ }
+ break;
+ }
+ case GF_FOP_READDIR:
+ {
+ gfs3_readdir_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_readdir_req;
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid,
+ this_req.gfid, 16);
+ state->resolve.fd_no = this_req.fd;
+ break;
+ }
+ case GF_FOP_INODELK:
+ {
+ gfs3_inodelk_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_inodelk_req;
+
+ state->resolve.type = RESOLVE_EXACT;
+ memcpy (state->resolve.gfid,
+ this_req.gfid, 16);
+ break;
+ }
+ case GF_FOP_FINODELK:
+ {
+ gfs3_finodelk_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_finodelk_req;
+
+ state->resolve.type = RESOLVE_EXACT;
+ memcpy (state->resolve.gfid,
+ this_req.gfid, 16);
+ state->resolve.fd_no = this_req.fd;
+ break;
+ }
+ case GF_FOP_ENTRYLK:
+ {
+ gfs3_entrylk_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_entrylk_req;
+
+ state->resolve.type = RESOLVE_EXACT;
+ memcpy (state->resolve.gfid,
+ this_req.gfid, 16);
+ break;
+ }
+ case GF_FOP_FENTRYLK:
+ {
+ gfs3_fentrylk_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_fentrylk_req;
+
+ state->resolve.type = RESOLVE_EXACT;
+ memcpy (state->resolve.gfid,
+ this_req.gfid, 16);
+ state->resolve.fd_no = this_req.fd;
+ break;
+ }
+ case GF_FOP_XATTROP:
+ {
+ gfs3_xattrop_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_xattrop_req;
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid,
+ this_req.gfid, 16);
+ break;
+ }
+ case GF_FOP_FXATTROP:
+ {
+ gfs3_fxattrop_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_fxattrop_req;
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid,
+ this_req.gfid, 16);
+ state->resolve.fd_no = this_req.fd;
+ break;
+ }
+ case GF_FOP_FGETXATTR:
+ {
+ gfs3_fgetxattr_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_fgetxattr_req;
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid,
+ this_req.gfid, 16);
+ state->resolve.fd_no = this_req.fd;
+ break;
+ }
+ case GF_FOP_FSETXATTR:
+ {
+ gfs3_fsetxattr_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_fsetxattr_req;
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid,
+ this_req.gfid, 16);
+ state->resolve.fd_no = this_req.fd;
+ break;
+ }
+ case GF_FOP_RCHECKSUM:
+ {
+ gfs3_rchecksum_req this_req = {0,};
+
+ this_req = array[i].compound_req_u.compound_rchecksum_req;
+
+ state->resolve.type = RESOLVE_MAY;
+ state->resolve.fd_no = this_req.fd;
+ break;
+ }
+ case GF_FOP_SETATTR:
+ {
+ gfs3_setattr_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_setattr_req;
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid,
+ this_req.gfid, 16);
+ break;
+ }
+ case GF_FOP_FSETATTR:
+ {
+ gfs3_fsetattr_req this_req = {0,};
+
+ this_req = array[i].compound_req_u.compound_fsetattr_req;
+
+ state->resolve.type = RESOLVE_MUST;
+ state->resolve.fd_no = this_req.fd;
+ break;
+ }
+ case GF_FOP_READDIRP:
+ {
+ gfs3_readdirp_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_readdirp_req;
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid,
+ this_req.gfid, 16);
+ state->resolve.fd_no = this_req.fd;
+ break;
+ }
+ case GF_FOP_FREMOVEXATTR:
+ {
+ gfs3_fremovexattr_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_fremovexattr_req;
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid,
+ this_req.gfid, 16);
+ state->resolve.fd_no = this_req.fd;
+ break;
+ }
+ case GF_FOP_FALLOCATE:
+ {
+ gfs3_fallocate_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_fallocate_req;
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid,
+ this_req.gfid, 16);
+ state->resolve.fd_no = this_req.fd;
+ break;
+ }
+ case GF_FOP_DISCARD:
+ {
+ gfs3_discard_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_discard_req;
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid,
+ this_req.gfid, 16);
+ state->resolve.fd_no = this_req.fd;
+ break;
+ }
+ case GF_FOP_ZEROFILL:
+ {
+ gfs3_zerofill_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_zerofill_req;
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid,
+ this_req.gfid, 16);
+ state->resolve.fd_no = this_req.fd;
+ break;
+ }
+ case GF_FOP_SEEK:
+ {
+ gfs3_seek_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_seek_req;
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid,
+ this_req.gfid, 16);
+ state->resolve.fd_no = this_req.fd;
+ break;
+ }
+ case GF_FOP_LEASE:
+ {
+ gfs3_lease_req this_req = { {0,} };
+
+ this_req = array[i].compound_req_u.compound_lease_req;
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid, this_req.gfid, 16);
+ break;
+ }
+ default:
+ return ENOTSUP;
+ }
+ return 0;
+}
diff --git a/xlators/protocol/server/src/server-helpers.h b/xlators/protocol/server/src/server-helpers.h
index 9e295cd1099..200b383e67e 100644
--- a/xlators/protocol/server/src/server-helpers.h
+++ b/xlators/protocol/server/src/server-helpers.h
@@ -1,43 +1,30 @@
/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2010-2013 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _SERVER_HELPERS_H
#define _SERVER_HELPERS_H
#include "server.h"
+#include "defaults.h"
#define CALL_STATE(frame) ((server_state_t *)frame->root->state)
-#define BOUND_XL(frame) ((xlator_t *) CALL_STATE(frame)->conn->bound_xl)
-
#define XPRT_FROM_FRAME(frame) ((rpc_transport_t *) CALL_STATE(frame)->xprt)
-#define SERVER_CONNECTION(frame) \
- ((server_connection_t *) CALL_STATE(frame)->conn)
-
-#define SERVER_CONF(frame) \
- ((server_conf_t *)XPRT_FROM_FRAME(frame)->this->private)
+#define SERVER_CONF(frame) \
+ ((server_conf_t *)XPRT_FROM_FRAME(frame)->this->private)
#define XPRT_FROM_XLATOR(this) ((((server_conf_t *)this->private))->listen)
-#define INODE_LRU_LIMIT(this) \
- (((server_conf_t *)(this->private))->config.inode_lru_limit)
+#define INODE_LRU_LIMIT(this) \
+ (((server_conf_t *)(this->private))->config.inode_lru_limit)
#define IS_ROOT_INODE(inode) (inode == inode->table->root)
@@ -47,32 +34,18 @@ void free_state (server_state_t *state);
void server_loc_wipe (loc_t *loc);
-int32_t
-gf_add_locker (struct _lock_table *table, const char *volume,
- loc_t *loc,
- fd_t *fd,
- pid_t pid);
-
-int32_t
-gf_del_locker (struct _lock_table *table, const char *volume,
- loc_t *loc,
- fd_t *fd,
- pid_t pid);
-
void
server_print_request (call_frame_t *frame);
call_frame_t *
get_frame_from_request (rpcsvc_request_t *req);
-server_connection_t *
-get_server_conn_state (xlator_t *this, rpc_transport_t *xptr);
+int
+server_connection_cleanup (xlator_t *this, struct _client_t *client,
+ int32_t flags);
-server_connection_t *
-create_server_conn_state (xlator_t *this, rpc_transport_t *xptr);
-
-void
-destroy_server_conn_state (server_connection_t *conn);
+gf_boolean_t
+server_cancel_grace_timer (xlator_t *this, struct _client_t *client);
int
server_build_config (xlator_t *this, server_conf_t *conf);
@@ -81,5 +54,31 @@ int serialize_rsp_dirent (gf_dirent_t *entries, gfs3_readdir_rsp *rsp);
int serialize_rsp_direntp (gf_dirent_t *entries, gfs3_readdirp_rsp *rsp);
int readdirp_rsp_cleanup (gfs3_readdirp_rsp *rsp);
int readdir_rsp_cleanup (gfs3_readdir_rsp *rsp);
+int auth_set_username_passwd (dict_t *input_params, dict_t *config_params,
+ struct _client_t *client);
+
+server_ctx_t *server_ctx_get (client_t *client, xlator_t *xlator);
+int server_process_event_upcall (xlator_t *this, void *data);
+inode_t *
+server_inode_new (inode_table_t *itable, uuid_t gfid);
+
+int
+serialize_rsp_locklist (lock_migration_info_t *locklist,
+ gfs3_getactivelk_rsp *rsp);
+
+int
+getactivelkinfo_rsp_cleanup (gfs3_getactivelk_rsp *rsp);
+
+int
+server_populate_compound_response (xlator_t *this, gfs3_compound_rsp *rsp,
+ call_frame_t *frame,
+ compound_args_cbk_t *args_cbk, int index);
+int
+server_get_compound_resolve (server_state_t *state, gfs3_compound_req *req);
+
+int
+server_populate_compound_request (gfs3_compound_req *req, call_frame_t *frame,
+ default_args_t *this_args,
+ int index);
#endif /* !_SERVER_HELPERS_H */
diff --git a/xlators/protocol/server/src/server-mem-types.h b/xlators/protocol/server/src/server-mem-types.h
index 32f31fae783..9165249d49a 100644
--- a/xlators/protocol/server/src/server-mem-types.h
+++ b/xlators/protocol/server/src/server-mem-types.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2010-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -33,6 +24,10 @@ enum gf_server_mem_types_ {
gf_server_mt_dirent_rsp_t,
gf_server_mt_rsp_buf_t,
gf_server_mt_volfile_ctx_t,
+ gf_server_mt_timer_data_t,
+ gf_server_mt_setvolume_rsp_t,
+ gf_server_mt_lock_mig_t,
+ gf_server_mt_compound_rsp_t,
gf_server_mt_end,
};
#endif /* __SERVER_MEM_TYPES_H__ */
diff --git a/xlators/protocol/server/src/server-messages.h b/xlators/protocol/server/src/server-messages.h
new file mode 100644
index 00000000000..5593e68d3d4
--- /dev/null
+++ b/xlators/protocol/server/src/server-messages.h
@@ -0,0 +1,855 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _PS_MESSAGES_H__
+#define _PS_MESSAGES_H__
+
+#include "glfs-message-id.h"
+
+/*! \file server-messages.h
+ * \brief server log-message IDs and their descriptions
+ */
+
+/* NOTE: Rules for message additions
+ * 1) Each instance of a message is _better_ left with a unique message ID, even
+ * if the message format is the same. Reasoning is that, if the message
+ * format needs to change in one instance, the other instances are not
+ * impacted or the new change does not change the ID of the instance being
+ * modified.
+ * 2) Addition of a message,
+ * - Should increment the GLFS_NUM_MESSAGES
+ * - Append to the list of messages defined, towards the end
+ * - Retain macro naming as glfs_msg_X (for redability across developers)
+ * NOTE: Rules for message format modifications
+ * 3) Check acorss the code if the message ID macro in question is reused
+ * anywhere. If reused then then the modifications should ensure correctness
+ * everywhere, or needs a new message ID as (1) above was not adhered to. If
+ * not used anywhere, proceed with the required modification.
+ * NOTE: Rules for message deletion
+ * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used
+ * anywhere, then can be deleted, but will leave a hole by design, as
+ * addition rules specify modification to the end of the list and not filling
+ * holes.
+ */
+
+#define GLFS_PS_BASE GLFS_MSGID_COMP_PS
+#define GLFS_NUM_MESSAGES 90
+#define GLFS_MSGID_END (GLFS_PS_BASE + GLFS_NUM_MESSAGES + 1)
+/* Messages with message IDs */
+#define glfs_msg_start_x GLFS_PS_BASE, "Invalid: Start of messages"
+/*------------*/
+
+#define PS_MSG_AUTHENTICATE_ERROR (GLFS_PS_BASE + 1)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_VOL_VALIDATE_FAILED (GLFS_PS_BASE + 2)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_AUTH_INIT_FAILED (GLFS_PS_BASE + 3)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_REMOTE_CLIENT_REFUSED (GLFS_PS_BASE + 4)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_GFID_RESOLVE_FAILED (GLFS_PS_BASE + 5)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_ANONYMOUS_FD_CREATE_FAILED (GLFS_PS_BASE + 6)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_NO_MEMORY (GLFS_PS_BASE + 7)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_FD_NOT_FOUND (GLFS_PS_BASE + 8)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_INVALID_ENTRY (GLFS_PS_BASE + 9)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_GET_UID_FAILED (GLFS_PS_BASE + 10)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_UID_NOT_FOUND (GLFS_PS_BASE + 11)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_MAPPING_ERROR (GLFS_PS_BASE + 12)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_FD_CLEANUP (GLFS_PS_BASE + 13)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_SERVER_CTX_GET_FAILED (GLFS_PS_BASE + 14)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_FDENTRY_NULL (GLFS_PS_BASE + 15)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_DIR_NOT_FOUND (GLFS_PS_BASE + 16)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_SERVER_MSG (GLFS_PS_BASE + 17)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_DICT_SERIALIZE_FAIL (GLFS_PS_BASE + 18)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_RW_STAT (GLFS_PS_BASE + 19)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_DICT_GET_FAILED (GLFS_PS_BASE + 20)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_LOGIN_ERROR (GLFS_PS_BASE + 21)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_REMOUNT_CLIENT_REQD (GLFS_PS_BASE + 22)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_DEFAULTING_FILE (GLFS_PS_BASE + 23)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_VOL_FILE_OPEN_FAILED (GLFS_PS_BASE + 24)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_STAT_ERROR (GLFS_PS_BASE + 25)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_SSL_NAME_SET_FAILED (GLFS_PS_BASE + 26)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_ASPRINTF_FAILED (GLFS_PS_BASE + 27)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_CLIENT_VERSION_NOT_SET (GLFS_PS_BASE + 28)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_CLIENT_ACCEPTED (GLFS_PS_BASE + 29)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_CLIENT_LK_VERSION_ERROR (GLFS_PS_BASE + 30)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_GRACE_TIMER_EXPD (GLFS_PS_BASE + 31)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_SERIALIZE_REPLY_FAILED (GLFS_PS_BASE + 32)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_AUTH_IP_ERROR (GLFS_PS_BASE + 33)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_SKIP_FORMAT_CHK (GLFS_PS_BASE + 34)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_INTERNET_ADDR_ERROR (GLFS_PS_BASE + 35)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_CLIENT_DISCONNECTING (GLFS_PS_BASE + 36)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_GRACE_TIMER_START (GLFS_PS_BASE + 37)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_STATEDUMP_PATH_ERROR (GLFS_PS_BASE + 38)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_GRP_CACHE_ERROR (GLFS_PS_BASE + 39)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_RPC_CONF_ERROR (GLFS_PS_BASE + 40)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_TRANSPORT_ERROR (GLFS_PS_BASE + 41)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_SUBVOL_NULL (GLFS_PS_BASE + 42)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_PARENT_VOL_ERROR (GLFS_PS_BASE + 43)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_RPCSVC_CREATE_FAILED (GLFS_PS_BASE + 44)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_RPCSVC_LISTENER_CREATE_FAILED (GLFS_PS_BASE + 45)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_RPCSVC_NOTIFY (GLFS_PS_BASE + 46)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_PGM_REG_FAILED (GLFS_PS_BASE + 47)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_ULIMIT_SET_FAILED (GLFS_PS_BASE + 48)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_STATFS (GLFS_PS_BASE + 49)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_LOOKUP_INFO (GLFS_PS_BASE + 50)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_LK_INFO (GLFS_PS_BASE + 51)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_LOCK_ERROR (GLFS_PS_BASE + 52)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_INODELK_INFO (GLFS_PS_BASE + 53)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_ENTRYLK_INFO (GLFS_PS_BASE + 54)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_ACCESS_INFO (GLFS_PS_BASE + 55)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_DIR_INFO (GLFS_PS_BASE + 56)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_MKNOD_INFO (GLFS_PS_BASE + 57)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_REMOVEXATTR_INFO (GLFS_PS_BASE + 58)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_GETXATTR_INFO (GLFS_PS_BASE + 59)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_SETXATTR_INFO (GLFS_PS_BASE + 60)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_RENAME_INFO (GLFS_PS_BASE + 61)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_LINK_INFO (GLFS_PS_BASE + 62)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_TRUNCATE_INFO (GLFS_PS_BASE + 63)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_FSTAT_INFO (GLFS_PS_BASE + 64)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_FLUSH_INFO (GLFS_PS_BASE + 65)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_SYNC_INFO (GLFS_PS_BASE + 66)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_WRITE_INFO (GLFS_PS_BASE + 67)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_READ_INFO (GLFS_PS_BASE + 68)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_CHKSUM_INFO (GLFS_PS_BASE + 69)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_OPEN_INFO (GLFS_PS_BASE + 70)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_CREATE_INFO (GLFS_PS_BASE + 71)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_SETATTR_INFO (GLFS_PS_BASE + 72)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_XATTROP_INFO (GLFS_PS_BASE + 73)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_ALLOC_INFO (GLFS_PS_BASE + 74)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_DISCARD_INFO (GLFS_PS_BASE + 75)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_ZEROFILL_INFO (GLFS_PS_BASE + 76)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_FD_CREATE_FAILED (GLFS_PS_BASE + 77)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_WRONG_STATE (GLFS_PS_BASE + 78)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_CONF_DIR_INVALID (GLFS_PS_BASE + 79)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_MOUNT_PT_FAIL (GLFS_PS_BASE + 80)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_STAT_INFO (GLFS_PS_BASE + 81)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_FILE_OP_FAILED (GLFS_PS_BASE + 82)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_GRACE_TIMER_CANCELLED (GLFS_PS_BASE + 83)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_ENCODE_MSG_FAILED (GLFS_PS_BASE + 84)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_REPLY_SUBMIT_FAILED (GLFS_PS_BASE + 85)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_RPC_NOTIFY_ERROR (GLFS_PS_BASE + 86)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_SERVER_EVENT_UPCALL_FAILED (GLFS_PS_BASE + 87)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_SERVER_IPC_INFO (GLFS_PS_BASE + 88)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_SEEK_INFO (GLFS_PS_BASE + 89)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define PS_MSG_COMPOUND_INFO (GLFS_PS_BASE + 90)
+/*------------*/
+#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
+
+#endif /* !_PS_MESSAGES_H__ */
+
diff --git a/xlators/protocol/server/src/server-resolve.c b/xlators/protocol/server/src/server-resolve.c
index 77336216f19..1ad45394dd7 100644
--- a/xlators/protocol/server/src/server-resolve.c
+++ b/xlators/protocol/server/src/server-resolve.c
@@ -1,29 +1,16 @@
/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2010-2013 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "server.h"
#include "server-helpers.h"
+#include "server-messages.h"
int
@@ -33,67 +20,9 @@ resolve_entry_simple (call_frame_t *frame);
int
resolve_inode_simple (call_frame_t *frame);
int
-resolve_path_simple (call_frame_t *frame);
-
-int
-component_count (const char *path)
-{
- int count = 0;
- const char *trav = NULL;
-
- trav = path;
-
- for (trav = path; *trav; trav++) {
- if (*trav == '/')
- count++;
- }
-
- return count + 2;
-}
-
-
+resolve_continue (call_frame_t *frame);
int
-prepare_components (call_frame_t *frame)
-{
- server_state_t *state = NULL;
- xlator_t *this = NULL;
- server_resolve_t *resolve = NULL;
- char *resolved = NULL;
- int count = 0;
- struct resolve_comp *components = NULL;
- int i = 0;
- char *trav = NULL;
-
-
- state = CALL_STATE (frame);
- this = frame->this;
- resolve = state->resolve_now;
-
- resolved = gf_strdup (resolve->path);
- resolve->resolved = resolved;
-
- count = component_count (resolve->path);
- components = GF_CALLOC (sizeof (*components), count,
- gf_server_mt_resolv_comp_t);
- resolve->components = components;
-
- components[0].basename = "";
- components[0].ino = 1;
- components[0].gen = 0;
- components[0].inode = state->itable->root;
-
- i = 1;
- for (trav = resolved; *trav; trav++) {
- if (*trav == '/') {
- components[i].basename = trav + 1;
- *trav = 0;
- i++;
- }
- }
-
- return 0;
-}
-
+resolve_anonfd_simple (call_frame_t *frame);
int
resolve_loc_touchup (call_frame_t *frame)
@@ -109,206 +38,231 @@ resolve_loc_touchup (call_frame_t *frame)
resolve = state->resolve_now;
loc = state->loc_now;
- if (!loc->path) {
- if (loc->parent) {
- ret = inode_path (loc->parent, resolve->bname, &path);
- } else if (loc->inode) {
- ret = inode_path (loc->inode, NULL, &path);
- }
-
- if (!path)
- path = gf_strdup (resolve->path);
-
- loc->path = path;
- }
-
- loc->name = strrchr (loc->path, '/');
- if (loc->name)
- loc->name++;
-
- if (!loc->parent && loc->inode) {
- loc->parent = inode_parent (loc->inode, 0, NULL);
- }
-
+ loc_touchup (loc, resolve->bname);
return 0;
}
int
-resolve_deep_continue (call_frame_t *frame)
+resolve_gfid_entry_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
{
server_state_t *state = NULL;
- xlator_t *this = NULL;
server_resolve_t *resolve = NULL;
- int ret = 0;
+ inode_t *link_inode = NULL;
+ loc_t *resolve_loc = NULL;
state = CALL_STATE (frame);
- this = frame->this;
resolve = state->resolve_now;
+ resolve_loc = &resolve->resolve_loc;
- resolve->op_ret = 0;
- resolve->op_errno = 0;
+ if (op_ret == -1) {
+ if (op_errno == ENOENT) {
+ gf_msg_debug (this->name, 0, "%s/%s: failed to resolve"
+ " (%s)",
+ uuid_utoa (resolve_loc->pargfid),
+ resolve_loc->name, strerror (op_errno));
+ } else {
+ gf_msg (this->name, GF_LOG_WARNING, op_errno,
+ PS_MSG_GFID_RESOLVE_FAILED, "%s/%s: failed to "
+ "resolve (%s)",
+ uuid_utoa (resolve_loc->pargfid),
+ resolve_loc->name, strerror (op_errno));
+ }
+ goto out;
+ }
- if (resolve->par)
- ret = resolve_entry_simple (frame);
- else if (resolve->ino)
- ret = resolve_inode_simple (frame);
- else if (resolve->path)
- ret = resolve_path_simple (frame);
+ link_inode = inode_link (inode, resolve_loc->parent,
+ resolve_loc->name, buf);
- resolve_loc_touchup (frame);
+ if (!link_inode)
+ goto out;
- server_resolve_all (frame);
+ inode_lookup (link_inode);
+ inode_unref (link_inode);
+
+out:
+ loc_wipe (resolve_loc);
+
+ resolve_continue (frame);
return 0;
}
int
-resolve_deep_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+resolve_gfid_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode, struct iatt *buf,
- dict_t *xattr, struct iatt *postparent)
+ dict_t *xdata, struct iatt *postparent)
{
server_state_t *state = NULL;
server_resolve_t *resolve = NULL;
- struct resolve_comp *components = NULL;
- int i = 0;
inode_t *link_inode = NULL;
+ loc_t *resolve_loc = NULL;
+ dict_t *dict = NULL;
state = CALL_STATE (frame);
resolve = state->resolve_now;
- components = resolve->components;
-
- i = (long) cookie;
+ resolve_loc = &resolve->resolve_loc;
if (op_ret == -1) {
- goto get_out_of_here;
+ if (op_errno == ENOENT) {
+ gf_msg_debug (this->name, GF_LOG_DEBUG,
+ "%s: failed to resolve (%s)",
+ uuid_utoa (resolve_loc->gfid),
+ strerror (op_errno));
+ } else {
+ gf_msg (this->name, GF_LOG_WARNING, op_errno,
+ PS_MSG_GFID_RESOLVE_FAILED,
+ "%s: failed to resolve (%s)",
+ uuid_utoa (resolve_loc->gfid),
+ strerror (op_errno));
+ }
+ loc_wipe (&resolve->resolve_loc);
+ goto out;
}
- if (i != 0) {
- /* no linking for root inode */
- link_inode = inode_link (inode, resolve->deep_loc.parent,
- resolve->deep_loc.name, buf);
- inode_lookup (link_inode);
- components[i].inode = link_inode;
- link_inode = NULL;
+ link_inode = inode_link (inode, NULL, NULL, buf);
+
+ if (!link_inode) {
+ loc_wipe (resolve_loc);
+ goto out;
}
- loc_wipe (&resolve->deep_loc);
+ inode_lookup (link_inode);
+
+ /* wipe the loc only after the inode has been linked to the inode
+ table. Otherwise before inode gets linked to the inode table,
+ inode would have been unrefed (this might have been destroyed
+ if refcount becomes 0, and put back to mempool). So once the
+ inode gets destroyed, inode_link is a redundant operation. But
+ without knowing that the destroyed inode's pointer is saved in
+ the resolved_loc as parent (while constructing loc for resolving
+ the entry) and the inode_new call for resolving the entry will
+ return the same pointer to the inode as the parent (because in
+ reality the inode is a free inode present in cold list of the
+ inode mem-pool).
+ */
+ loc_wipe (resolve_loc);
+
+ if (gf_uuid_is_null (resolve->pargfid)) {
+ inode_unref (link_inode);
+ goto out;
+ }
- i++; /* next component */
+ resolve_loc->parent = link_inode;
+ gf_uuid_copy (resolve_loc->pargfid, resolve_loc->parent->gfid);
- if (!components[i].basename) {
- /* all components of the path are resolved */
- goto get_out_of_here;
- }
+ resolve_loc->name = resolve->bname;
- /* join the current component with the path resolved until now */
- *(components[i].basename - 1) = '/';
+ resolve_loc->inode = server_inode_new (state->itable,
+ resolve_loc->gfid);
- resolve->deep_loc.path = gf_strdup (resolve->resolved);
- resolve->deep_loc.parent = inode_ref (components[i-1].inode);
- resolve->deep_loc.inode = inode_new (state->itable);
- resolve->deep_loc.name = components[i].basename;
+ inode_path (resolve_loc->parent, resolve_loc->name,
+ (char **) &resolve_loc->path);
- STACK_WIND_COOKIE (frame, resolve_deep_cbk, (void *) (long) i,
- BOUND_XL (frame), BOUND_XL (frame)->fops->lookup,
- &resolve->deep_loc, NULL);
- return 0;
+ if (state->xdata) {
+ dict = dict_copy_with_ref (state->xdata, NULL);
+ if (!dict)
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM, PS_MSG_NO_MEMORY,
+ "BUG: dict allocation failed (pargfid: %s, name: %s), "
+ "still continuing", uuid_utoa (resolve_loc->gfid),
+ resolve_loc->name);
+ }
-get_out_of_here:
- resolve_deep_continue (frame);
+ STACK_WIND (frame, resolve_gfid_entry_cbk,
+ frame->root->client->bound_xl,
+ frame->root->client->bound_xl->fops->lookup,
+ &resolve->resolve_loc, dict);
+ if (dict)
+ dict_unref (dict);
+ return 0;
+out:
+ resolve_continue (frame);
return 0;
}
int
-resolve_path_deep (call_frame_t *frame)
+resolve_gfid (call_frame_t *frame)
{
- server_state_t *state = NULL;
- xlator_t *this = NULL;
- server_resolve_t *resolve = NULL;
- int i = 0;
+ server_state_t *state = NULL;
+ xlator_t *this = NULL;
+ server_resolve_t *resolve = NULL;
+ loc_t *resolve_loc = NULL;
+ int ret = 0;
+ dict_t *xdata = NULL;
state = CALL_STATE (frame);
this = frame->this;
resolve = state->resolve_now;
+ resolve_loc = &resolve->resolve_loc;
+
+ if (!gf_uuid_is_null (resolve->pargfid))
+ gf_uuid_copy (resolve_loc->gfid, resolve->pargfid);
+ else if (!gf_uuid_is_null (resolve->gfid))
+ gf_uuid_copy (resolve_loc->gfid, resolve->gfid);
+
+ resolve_loc->inode = server_inode_new (state->itable,
+ resolve_loc->gfid);
+ ret = loc_path (resolve_loc, NULL);
+
+ if (state->xdata) {
+ xdata = dict_copy_with_ref (state->xdata, NULL);
+ if (!xdata)
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM, PS_MSG_NO_MEMORY,
+ "BUG: dict allocation failed (gfid: %s), "
+ "still continuing",
+ uuid_utoa (resolve_loc->gfid));
+ }
- gf_log (BOUND_XL (frame)->name, GF_LOG_TRACE,
- "RESOLVE %s() seeking deep resolution of %s",
- gf_fop_list[frame->root->op], resolve->path);
-
- prepare_components (frame);
+ STACK_WIND (frame, resolve_gfid_cbk,
+ frame->root->client->bound_xl,
+ frame->root->client->bound_xl->fops->lookup,
+ &resolve->resolve_loc, xdata);
- /* start from the root */
- resolve->deep_loc.inode = state->itable->root;
- resolve->deep_loc.path = gf_strdup ("/");
- resolve->deep_loc.name = "";
+ if (xdata)
+ dict_unref (xdata);
- STACK_WIND_COOKIE (frame, resolve_deep_cbk, (void *) (long) i,
- BOUND_XL (frame), BOUND_XL (frame)->fops->lookup,
- &resolve->deep_loc, NULL);
return 0;
}
-
int
-resolve_path_simple (call_frame_t *frame)
+resolve_continue (call_frame_t *frame)
{
server_state_t *state = NULL;
xlator_t *this = NULL;
server_resolve_t *resolve = NULL;
- struct resolve_comp *components = NULL;
- int ret = -1;
- int par_idx = 0;
- int ino_idx = 0;
- int i = 0;
+ int ret = 0;
state = CALL_STATE (frame);
this = frame->this;
resolve = state->resolve_now;
- components = resolve->components;
-
- if (!components) {
- resolve->op_ret = -1;
- resolve->op_errno = ENOENT;
- goto out;
- }
-
- for (i = 0; components[i].basename; i++) {
- par_idx = ino_idx;
- ino_idx = i;
- }
-
- if (!components[par_idx].inode) {
- resolve->op_ret = -1;
- resolve->op_errno = ENOENT;
- goto out;
- }
- if (!components[ino_idx].inode &&
- (resolve->type == RESOLVE_MUST || resolve->type == RESOLVE_EXACT)) {
- resolve->op_ret = -1;
- resolve->op_errno = ENOENT;
- goto out;
- }
+ resolve->op_ret = 0;
+ resolve->op_errno = 0;
- if (components[ino_idx].inode && resolve->type == RESOLVE_NOT) {
- resolve->op_ret = -1;
- resolve->op_errno = EEXIST;
+ if (resolve->fd_no != -1) {
+ ret = resolve_anonfd_simple (frame);
goto out;
- }
-
- if (components[ino_idx].inode)
- state->loc_now->inode = inode_ref (components[ino_idx].inode);
- state->loc_now->parent = inode_ref (components[par_idx].inode);
-
- ret = 0;
+ } else if (!gf_uuid_is_null (resolve->pargfid))
+ ret = resolve_entry_simple (frame);
+ else if (!gf_uuid_is_null (resolve->gfid))
+ ret = resolve_inode_simple (frame);
+ if (ret)
+ gf_msg_debug (this->name, 0, "return value of resolve_*_"
+ "simple %d", ret);
+ resolve_loc_touchup (frame);
out:
- return ret;
+ server_resolve_all (frame);
+
+ return 0;
}
+
/*
Check if the requirements are fulfilled by entries in the inode cache itself
Return value:
@@ -330,36 +284,20 @@ resolve_entry_simple (call_frame_t *frame)
this = frame->this;
resolve = state->resolve_now;
- parent = inode_get (state->itable, resolve->par, 0);
+ parent = inode_find (state->itable, resolve->pargfid);
if (!parent) {
/* simple resolution is indecisive. need to perform
deep resolution */
resolve->op_ret = -1;
- resolve->op_errno = ENOENT;
+ resolve->op_errno = ESTALE;
ret = 1;
-
- inode = inode_grep (state->itable, parent, resolve->bname);
- if (inode != NULL) {
- gf_log (this->name, GF_LOG_DEBUG, "%"PRId64": inode "
- "(pointer:%p ino: %"PRIu64") present but parent"
- " is NULL for path (%s)", frame->root->unique,
- inode, inode->ino, resolve->path);
- inode_unref (inode);
- }
- goto out;
- }
-
- if (parent->ino != 1 && parent->generation != resolve->gen) {
- /* simple resolution is decisive - request was for a
- stale handle */
- resolve->op_ret = -1;
- resolve->op_errno = ENOENT;
- ret = -1;
goto out;
}
/* expected @parent was found from the inode cache */
+ gf_uuid_copy (state->loc_now->pargfid, resolve->pargfid);
state->loc_now->parent = inode_ref (parent);
+ state->loc_now->name = resolve->bname;
inode = inode_grep (state->itable, parent, resolve->bname);
if (!inode) {
@@ -382,9 +320,9 @@ resolve_entry_simple (call_frame_t *frame)
}
if (resolve->type == RESOLVE_NOT) {
- gf_log (this->name, GF_LOG_DEBUG, "inode (pointer: %p ino:%"
- PRIu64") found for path (%s) while type is RESOLVE_NOT",
- inode, inode->ino, resolve->path);
+ gf_msg_debug (this->name, 0, "inode (pointer: %p gfid:%s found"
+ " for path (%s) while type is RESOLVE_NOT",
+ inode, uuid_utoa (inode->gfid), resolve->path);
resolve->op_ret = -1;
resolve->op_errno = EEXIST;
ret = -1;
@@ -410,21 +348,17 @@ int
server_resolve_entry (call_frame_t *frame)
{
server_state_t *state = NULL;
- xlator_t *this = NULL;
- server_resolve_t *resolve = NULL;
int ret = 0;
loc_t *loc = NULL;
state = CALL_STATE (frame);
- this = frame->this;
- resolve = state->resolve_now;
loc = state->loc_now;
ret = resolve_entry_simple (frame);
if (ret > 0) {
loc_wipe (loc);
- resolve_path_deep (frame);
+ resolve_gfid (frame);
return 0;
}
@@ -441,38 +375,26 @@ int
resolve_inode_simple (call_frame_t *frame)
{
server_state_t *state = NULL;
- xlator_t *this = NULL;
server_resolve_t *resolve = NULL;
inode_t *inode = NULL;
int ret = 0;
state = CALL_STATE (frame);
- this = frame->this;
resolve = state->resolve_now;
- if (resolve->type == RESOLVE_EXACT) {
- inode = inode_get (state->itable, resolve->ino, resolve->gen);
- } else {
- inode = inode_get (state->itable, resolve->ino, 0);
- }
+ inode = inode_find (state->itable, resolve->gfid);
if (!inode) {
resolve->op_ret = -1;
- resolve->op_errno = ENOENT;
+ resolve->op_errno = ESTALE;
ret = 1;
goto out;
}
- if (inode->ino != 1 && inode->generation != resolve->gen) {
- resolve->op_ret = -1;
- resolve->op_errno = ENOENT;
- ret = -1;
- goto out;
- }
-
ret = 0;
state->loc_now->inode = inode_ref (inode);
+ gf_uuid_copy (state->loc_now->gfid, resolve->gfid);
out:
if (inode)
@@ -486,21 +408,17 @@ int
server_resolve_inode (call_frame_t *frame)
{
server_state_t *state = NULL;
- xlator_t *this = NULL;
- server_resolve_t *resolve = NULL;
int ret = 0;
loc_t *loc = NULL;
state = CALL_STATE (frame);
- this = frame->this;
- resolve = state->resolve_now;
loc = state->loc_now;
ret = resolve_inode_simple (frame);
if (ret > 0) {
loc_wipe (loc);
- resolve_path_deep (frame);
+ resolve_gfid (frame);
return 0;
}
@@ -514,24 +432,104 @@ server_resolve_inode (call_frame_t *frame)
int
+resolve_anonfd_simple (call_frame_t *frame)
+{
+ server_state_t *state = NULL;
+ server_resolve_t *resolve = NULL;
+ inode_t *inode = NULL;
+ int ret = 0;
+
+ state = CALL_STATE (frame);
+ resolve = state->resolve_now;
+
+ inode = inode_find (state->itable, resolve->gfid);
+
+ if (!inode) {
+ resolve->op_ret = -1;
+ resolve->op_errno = ENOENT;
+ ret = 1;
+ goto out;
+ }
+
+ ret = 0;
+
+ if (frame->root->op == GF_FOP_READ || frame->root->op == GF_FOP_WRITE)
+ state->fd = fd_anonymous_with_flags (inode, state->flags);
+ else
+ state->fd = fd_anonymous (inode);
+out:
+ if (inode)
+ inode_unref (inode);
+
+ if (ret != 0)
+ gf_msg_debug ("server", 0, "inode for the gfid"
+ "(%s) is not found. anonymous fd creation failed",
+ uuid_utoa (resolve->gfid));
+ return ret;
+}
+
+
+int
+server_resolve_anonfd (call_frame_t *frame)
+{
+ server_state_t *state = NULL;
+ int ret = 0;
+ loc_t *loc = NULL;
+
+ state = CALL_STATE (frame);
+ loc = state->loc_now;
+
+ ret = resolve_anonfd_simple (frame);
+
+ if (ret > 0) {
+ loc_wipe (loc);
+ resolve_gfid (frame);
+ return 0;
+ }
+
+ server_resolve_all (frame);
+
+ return 0;
+
+}
+
+
+int
server_resolve_fd (call_frame_t *frame)
{
- server_state_t *state = NULL;
- xlator_t *this = NULL;
- server_resolve_t *resolve = NULL;
- server_connection_t *conn = NULL;
- uint64_t fd_no = -1;
+ server_ctx_t *serv_ctx = NULL;
+ server_state_t *state = NULL;
+ client_t *client = NULL;
+ server_resolve_t *resolve = NULL;
+ uint64_t fd_no = -1;
state = CALL_STATE (frame);
- this = frame->this;
resolve = state->resolve_now;
- conn = SERVER_CONNECTION (frame);
fd_no = resolve->fd_no;
- state->fd = gf_fd_fdptr_get (conn->fdtable, fd_no);
+ if (fd_no == GF_ANON_FD_NO) {
+ server_resolve_anonfd (frame);
+ return 0;
+ }
+
+ client = frame->root->client;
+
+ serv_ctx = server_ctx_get (client, client->this);
+
+ if (serv_ctx == NULL) {
+ gf_msg ("", GF_LOG_INFO, ENOMEM, PS_MSG_NO_MEMORY,
+ "server_ctx_get() failed");
+ resolve->op_ret = -1;
+ resolve->op_errno = ENOMEM;
+ return 0;
+ }
+
+ state->fd = gf_fd_fdptr_get (serv_ctx->fdtable, fd_no);
if (!state->fd) {
+ gf_msg ("", GF_LOG_INFO, EBADF, PS_MSG_FD_NOT_FOUND, "fd not "
+ "found in context");
resolve->op_ret = -1;
resolve->op_errno = EBADF;
}
@@ -546,30 +544,29 @@ int
server_resolve (call_frame_t *frame)
{
server_state_t *state = NULL;
- xlator_t *this = NULL;
server_resolve_t *resolve = NULL;
state = CALL_STATE (frame);
- this = frame->this;
resolve = state->resolve_now;
if (resolve->fd_no != -1) {
server_resolve_fd (frame);
- } else if (resolve->par) {
+ } else if (!gf_uuid_is_null (resolve->pargfid)) {
server_resolve_entry (frame);
- } else if (resolve->ino) {
+ } else if (!gf_uuid_is_null (resolve->gfid)) {
server_resolve_inode (frame);
- } else if (resolve->path) {
-
- resolve_path_deep (frame);
-
- } else {
+ } else {
+ if (resolve == &state->resolve)
+ gf_msg (frame->this->name, GF_LOG_WARNING, 0,
+ PS_MSG_INVALID_ENTRY,
+ "no resolution type for %s (%s)",
+ resolve->path, gf_fop_list[frame->root->op]);
resolve->op_ret = -1;
resolve->op_errno = EINVAL;
@@ -585,14 +582,12 @@ int
server_resolve_done (call_frame_t *frame)
{
server_state_t *state = NULL;
- xlator_t *bound_xl = NULL;
state = CALL_STATE (frame);
- bound_xl = BOUND_XL (frame);
server_print_request (frame);
- state->resume_fn (frame, bound_xl);
+ state->resume_fn (frame, frame->root->client->bound_xl);
return 0;
}
@@ -630,8 +625,9 @@ server_resolve_all (call_frame_t *frame)
server_resolve_done (frame);
} else {
- gf_log (this->name, GF_LOG_ERROR,
- "Invalid pointer for state->resolve_now");
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ PS_MSG_INVALID_ENTRY, "Invalid pointer for "
+ "state->resolve_now");
}
return 0;
@@ -642,13 +638,10 @@ int
resolve_and_resume (call_frame_t *frame, server_resume_fn_t fn)
{
server_state_t *state = NULL;
- xlator_t *this = NULL;
state = CALL_STATE (frame);
state->resume_fn = fn;
- this = frame->this;
-
server_resolve_all (frame);
return 0;
diff --git a/xlators/protocol/server/src/server-rpc-fops.c b/xlators/protocol/server/src/server-rpc-fops.c
new file mode 100644
index 00000000000..91644ce0103
--- /dev/null
+++ b/xlators/protocol/server/src/server-rpc-fops.c
@@ -0,0 +1,6835 @@
+/*
+ Copyright (c) 2010-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#include <openssl/md5.h>
+
+#include "server.h"
+#include "server-helpers.h"
+#include "rpc-common-xdr.h"
+#include "glusterfs3-xdr.h"
+#include "glusterfs3.h"
+#include "compat-errno.h"
+#include "server-messages.h"
+#include "defaults.h"
+#include "default-args.h"
+#include "server-common.h"
+
+#include "xdr-nfs3.h"
+
+#define SERVER_REQ_SET_ERROR(req, ret) \
+ do { \
+ rpcsvc_request_seterr (req, GARBAGE_ARGS); \
+ ret = RPCSVC_ACTOR_ERROR; \
+ } while (0)
+
+void
+forget_inode_if_no_dentry (inode_t *inode)
+{
+ if (!inode_has_dentry (inode))
+ inode_forget (inode, 0);
+
+ return;
+}
+
+
+/* Callback function section */
+int
+server_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata)
+{
+ gfs3_statfs_rsp rsp = {0,};
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, errno, PS_MSG_STATFS,
+ "%"PRId64": STATFS (%s)",
+ frame->root->unique, strerror (op_errno));
+ goto out;
+ }
+
+ server_post_statfs (&rsp, buf);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_statfs_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *stbuf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ rpcsvc_request_t *req = NULL;
+ server_state_t *state = NULL;
+ inode_t *root_inode = NULL;
+ inode_t *link_inode = NULL;
+ loc_t fresh_loc = {0,};
+ gfs3_lookup_rsp rsp = {0,};
+
+ state = CALL_STATE (frame);
+
+ if (state->is_revalidate == 1 && op_ret == -1) {
+ state->is_revalidate = 2;
+ loc_copy (&fresh_loc, &state->loc);
+ inode_unref (fresh_loc.inode);
+ fresh_loc.inode = server_inode_new (state->itable,
+ fresh_loc.gfid);
+
+ STACK_WIND (frame, server_lookup_cbk,
+ frame->root->client->bound_xl,
+ frame->root->client->bound_xl->fops->lookup,
+ &fresh_loc, state->xdata);
+
+ loc_wipe (&fresh_loc);
+ return 0;
+ }
+
+ gf_stat_from_iatt (&rsp.postparent, postparent);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret) {
+ if (state->is_revalidate && op_errno == ENOENT) {
+ if (!__is_root_gfid (state->resolve.gfid)) {
+ inode_unlink (state->loc.inode,
+ state->loc.parent,
+ state->loc.name);
+ /**
+ * If the entry is not present, then just
+ * unlinking the associated dentry is not
+ * suffecient. This condition should be
+ * treated as unlink of the entry. So along
+ * with deleting the entry, its also important
+ * to forget the inode for it (if the dentry
+ * being considered was the last dentry).
+ * Otherwise it might lead to inode leak.
+ * It also might lead to wrong decisions being
+ * taken if the future lookups on this inode are
+ * successful since they are able to find the
+ * inode in the inode table (atleast gfid based
+ * lookups will be successful, if the lookup
+ * is a soft lookup)
+ */
+ forget_inode_if_no_dentry (state->loc.inode);
+ }
+ }
+ goto out;
+ }
+
+ server_post_lookup (&rsp, frame, state, inode, stbuf, postparent);
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ if (op_ret) {
+ if (state->resolve.bname) {
+ gf_msg (this->name,
+ fop_log_level (GF_FOP_LOOKUP, op_errno),
+ op_errno, PS_MSG_LOOKUP_INFO,
+ "%"PRId64": LOOKUP %s (%s/%s) ==> "
+ "(%s)", frame->root->unique,
+ state->loc.path,
+ uuid_utoa (state->resolve.pargfid),
+ state->resolve.bname,
+ strerror (op_errno));
+ } else {
+ gf_msg (this->name,
+ fop_log_level (GF_FOP_LOOKUP, op_errno),
+ op_errno, PS_MSG_LOOKUP_INFO,
+ "%"PRId64": LOOKUP %s (%s) ==> (%s)",
+ frame->root->unique, state->loc.path,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ }
+ }
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_lookup_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_lease_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_lease *lease,
+ dict_t *xdata)
+{
+ gfs3_lease_rsp rsp = {0,};
+ rpcsvc_request_t *req = NULL;
+ server_state_t *state = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret) {
+ state = CALL_STATE (frame);
+ gf_msg (this->name, fop_log_level (GF_FOP_LEASE, op_errno),
+ op_errno, PS_MSG_LK_INFO,
+ "%"PRId64": LEASE %s (%s) ==> "
+ "(%s)", frame->root->unique,
+ state->loc.path,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ goto out;
+ }
+ server_post_lease (&rsp, lease);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_lease_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ gfs3_lk_rsp rsp = {0,};
+ rpcsvc_request_t *req = NULL;
+ server_state_t *state = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret) {
+ state = CALL_STATE (frame);
+ gf_msg (this->name, fop_log_level (GF_FOP_LK, op_errno),
+ op_errno, PS_MSG_LK_INFO,
+ "%"PRId64": LK %"PRId64" (%s) ==> "
+ "(%s)", frame->root->unique,
+ state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ goto out;
+ }
+
+ server_post_lk (this, &rsp, lock);
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_lk_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int
+server_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ gf_common_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ state = CALL_STATE (frame);
+
+ if (op_ret < 0) {
+ gf_msg (this->name, fop_log_level (GF_FOP_INODELK, op_errno),
+ errno, PS_MSG_INODELK_INFO,
+ "%"PRId64": INODELK %s (%s) ==> (%s)",
+ frame->root->unique, state->loc.path,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ goto out;
+ }
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_common_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int
+server_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ gf_common_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ state = CALL_STATE (frame);
+
+ if (op_ret < 0) {
+ gf_msg (this->name, fop_log_level (GF_FOP_FINODELK, op_errno),
+ op_errno, PS_MSG_INODELK_INFO,
+ "%"PRId64": FINODELK %"PRId64" (%s) "
+ "==> (%s)", frame->root->unique,
+ state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ goto out;
+ }
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_common_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ gf_common_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ state = CALL_STATE (frame);
+
+ if (op_ret < 0) {
+ gf_msg (this->name, fop_log_level (GF_FOP_ENTRYLK, op_errno),
+ op_errno, PS_MSG_ENTRYLK_INFO,
+ "%"PRId64": ENTRYLK %s (%s) ==> (%s)",
+ frame->root->unique, state->loc.path,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ goto out;
+ }
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_common_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int
+server_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ gf_common_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ state = CALL_STATE (frame);
+
+ if (op_ret < 0) {
+ gf_msg (this->name, fop_log_level (GF_FOP_FENTRYLK, op_errno),
+ op_errno, PS_MSG_ENTRYLK_INFO,
+ "%"PRId64": FENTRYLK %"PRId64" (%s) ==>(%s)",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ goto out;
+ }
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_common_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int
+server_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ gf_common_rsp rsp = {0,};
+ rpcsvc_request_t *req = NULL;
+ server_state_t *state = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret) {
+ state = CALL_STATE (frame);
+ gf_msg (this->name, GF_LOG_INFO,
+ op_errno, PS_MSG_ACCESS_INFO,
+ "%"PRId64": ACCESS %s (%s) ==> (%s)",
+ frame->root->unique, state->loc.path,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ goto out;
+ }
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_common_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ gfs3_rmdir_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ state = CALL_STATE (frame);
+
+ if (op_ret) {
+ gf_msg (this->name, GF_LOG_INFO,
+ op_errno, PS_MSG_DIR_INFO,
+ "%"PRId64": RMDIR %s (%s/%s) ==> (%s)",
+ frame->root->unique,
+ (state->loc.path) ? state->loc.path : "",
+ uuid_utoa (state->resolve.pargfid),
+ state->resolve.bname, strerror (op_errno));
+ goto out;
+ }
+
+ server_post_rmdir (state, &rsp, preparent, postparent);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_rmdir_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ gfs3_mkdir_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+ client_t *client = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ state = CALL_STATE (frame);
+ client = frame->root->client;
+
+ if (op_ret < 0) {
+ gf_msg (this->name, fop_log_level (GF_FOP_MKDIR, op_errno),
+ op_errno, PS_MSG_DIR_INFO,
+ "%"PRId64": MKDIR %s (%s/%s) client: %s",
+ frame->root->unique,
+ (state->loc.path) ? state->loc.path : "",
+ uuid_utoa (state->resolve.pargfid),
+ state->resolve.bname,
+ (!client || !client->client_uid) ? "-":client->client_uid);
+ goto out;
+ }
+
+ server_post_mkdir (state, &rsp, inode, stbuf, preparent,
+ postparent, xdata);
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_mkdir_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ gfs3_mknod_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ state = CALL_STATE (frame);
+
+ if (op_ret < 0) {
+ gf_msg (this->name, fop_log_level (GF_FOP_MKNOD, op_errno),
+ op_errno, PS_MSG_MKNOD_INFO,
+ "%"PRId64": MKNOD %s (%s/%s) ==> (%s)",
+ frame->root->unique, state->loc.path,
+ uuid_utoa (state->resolve.pargfid),
+ state->resolve.bname, strerror (op_errno));
+ goto out;
+ }
+
+ server_post_mknod (state, &rsp, stbuf, preparent, postparent,
+ inode);
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_mknod_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ gf_common_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret < 0) {
+ state = CALL_STATE (frame);
+ gf_msg (this->name, fop_log_level (GF_FOP_FSYNCDIR, op_errno),
+ op_errno, PS_MSG_DIR_INFO,
+ "%"PRId64": FSYNCDIR %"PRId64" (%s) ==> (%s)",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ goto out;
+ }
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_common_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ gfs3_readdir_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+ int ret = 0;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret < 0) {
+ state = CALL_STATE (frame);
+ gf_msg (this->name, fop_log_level (GF_FOP_READDIR, op_errno),
+ op_errno, PS_MSG_DIR_INFO,
+ "%"PRId64": READDIR %"PRId64" (%s) ==> (%s)",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ goto out;
+ }
+
+ /* (op_ret == 0) is valid, and means EOF */
+ if (op_ret) {
+ ret = server_post_readdir (&rsp, entries);
+ if (ret == -1) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+ }
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_readdir_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ readdir_rsp_cleanup (&rsp);
+
+ return 0;
+}
+
+int
+server_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ server_state_t *state = NULL;
+ server_ctx_t *serv_ctx = NULL;
+ rpcsvc_request_t *req = NULL;
+ gfs3_opendir_rsp rsp = {0,};
+ uint64_t fd_no = 0;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret < 0) {
+ state = CALL_STATE (frame);
+ gf_msg (this->name, fop_log_level (GF_FOP_OPENDIR, op_errno),
+ op_errno, PS_MSG_DIR_INFO,
+ "%"PRId64": OPENDIR %s (%s) ==> (%s)",
+ frame->root->unique,
+ (state->loc.path) ? state->loc.path : "",
+ uuid_utoa (state->resolve.gfid), strerror (op_errno));
+ goto out;
+ }
+
+
+ op_ret = server_post_opendir (frame, this, &rsp, fd);
+ if (op_ret)
+ goto out;
+out:
+ if (op_ret)
+ rsp.fd = fd_no;
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_opendir_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ gf_common_rsp rsp = {0,};
+ rpcsvc_request_t *req = NULL;
+ server_state_t *state = NULL;
+ gf_loglevel_t loglevel = GF_LOG_NONE;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret == -1) {
+ state = CALL_STATE (frame);
+ if (ENODATA == op_errno || ENOATTR == op_errno)
+ loglevel = GF_LOG_DEBUG;
+ else
+ loglevel = GF_LOG_INFO;
+
+ gf_msg (this->name, loglevel, op_errno,
+ PS_MSG_REMOVEXATTR_INFO,
+ "%"PRId64": REMOVEXATTR %s (%s) of key %s ==> (%s)",
+ frame->root->unique, state->loc.path,
+ uuid_utoa (state->resolve.gfid),
+ state->name, strerror (op_errno));
+ goto out;
+ }
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_common_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ gf_common_rsp rsp = {0,};
+ rpcsvc_request_t *req = NULL;
+ server_state_t *state = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret == -1) {
+ state = CALL_STATE (frame);
+ gf_msg (this->name,
+ fop_log_level (GF_FOP_FREMOVEXATTR, op_errno), op_errno,
+ PS_MSG_REMOVEXATTR_INFO,
+ "%"PRId64": FREMOVEXATTR %"PRId64" (%s) (%s) ==> (%s)",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid), state->name,
+ strerror (op_errno));
+ goto out;
+ }
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_common_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ gfs3_getxattr_rsp rsp = {0,};
+ rpcsvc_request_t *req = NULL;
+ server_state_t *state = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret == -1) {
+ state = CALL_STATE (frame);
+ gf_msg (this->name, fop_log_level (GF_FOP_GETXATTR, op_errno),
+ op_errno, PS_MSG_GETXATTR_INFO,
+ "%"PRId64": GETXATTR %s (%s) (%s) ==> (%s)",
+ frame->root->unique, state->loc.path,
+ uuid_utoa (state->resolve.gfid),
+ state->name, strerror (op_errno));
+ goto out;
+ }
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, dict, &rsp.dict.dict_val,
+ rsp.dict.dict_len, op_errno, out);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_getxattr_rsp);
+
+ GF_FREE (rsp.dict.dict_val);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int
+server_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ gfs3_fgetxattr_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret == -1) {
+ state = CALL_STATE (frame);
+ gf_msg (this->name, fop_log_level (GF_FOP_FGETXATTR, op_errno),
+ op_errno, PS_MSG_GETXATTR_INFO,
+ "%"PRId64": FGETXATTR %"PRId64" (%s) (%s) ==> (%s)",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid),
+ state->name, strerror (op_errno));
+ goto out;
+ }
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, dict, &rsp.dict.dict_val,
+ rsp.dict.dict_len, op_errno, out);
+
+out:
+
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_fgetxattr_rsp);
+
+ GF_FREE (rsp.dict.dict_val);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+/* print every key */
+static int
+_gf_server_log_setxattr_failure (dict_t *d, char *k, data_t *v,
+ void *tmp)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+
+ frame = tmp;
+ state = CALL_STATE (frame);
+
+ gf_msg (THIS->name, GF_LOG_INFO, 0, PS_MSG_SETXATTR_INFO,
+ "%"PRId64": SETXATTR %s (%s) ==> %s",
+ frame->root->unique, state->loc.path,
+ uuid_utoa (state->resolve.gfid), k);
+ return 0;
+}
+
+int
+server_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ gf_common_rsp rsp = {0,};
+ rpcsvc_request_t *req = NULL;
+ server_state_t *state = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret == -1) {
+ state = CALL_STATE (frame);
+ if (op_errno != ENOTSUP)
+ dict_foreach (state->dict,
+ _gf_server_log_setxattr_failure,
+ frame);
+
+ if (op_errno == ENOTSUP) {
+ gf_msg_debug (THIS->name, 0, "%s",
+ strerror (op_errno));
+ } else {
+ gf_msg (THIS->name, GF_LOG_INFO, 0,
+ PS_MSG_SETXATTR_INFO, "%s",
+ strerror (op_errno));
+ }
+ goto out;
+ }
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_common_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+/* print every key here */
+static int
+_gf_server_log_fsetxattr_failure (dict_t *d, char *k, data_t *v,
+ void *tmp)
+{
+ call_frame_t *frame = NULL;
+ server_state_t *state = NULL;
+
+ frame = tmp;
+ state = CALL_STATE (frame);
+
+ gf_msg (THIS->name, GF_LOG_INFO, 0, PS_MSG_SETXATTR_INFO,
+ "%"PRId64": FSETXATTR %"PRId64" (%s) ==> %s",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid), k);
+
+ return 0;
+}
+
+int
+server_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ gf_common_rsp rsp = {0,};
+ rpcsvc_request_t *req = NULL;
+ server_state_t *state = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret == -1) {
+ state = CALL_STATE (frame);
+ if (op_errno != ENOTSUP) {
+ dict_foreach (state->dict,
+ _gf_server_log_fsetxattr_failure,
+ frame);
+ }
+ if (op_errno == ENOTSUP) {
+ gf_msg_debug (THIS->name, 0, "%s",
+ strerror (op_errno));
+ } else {
+ gf_msg (THIS->name, GF_LOG_INFO, 0,
+ PS_MSG_SETXATTR_INFO, "%s",
+ strerror (op_errno));
+ }
+ goto out;
+ }
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_common_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ gfs3_rename_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+ char oldpar_str[50] = {0,};
+ char newpar_str[50] = {0,};
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ state = CALL_STATE (frame);
+
+ if (op_ret == -1) {
+ uuid_utoa_r (state->resolve.gfid, oldpar_str);
+ uuid_utoa_r (state->resolve2.gfid, newpar_str);
+ gf_msg (this->name, GF_LOG_INFO, op_errno, PS_MSG_RENAME_INFO,
+ "%"PRId64": RENAME %s (%s/%s) -> %s (%s/%s) ==> (%s)",
+ frame->root->unique, state->loc.path,
+ oldpar_str, state->resolve.bname, state->loc2.path,
+ newpar_str, state->resolve2.bname, strerror (op_errno));
+ goto out;
+ }
+
+ server_post_rename (frame, state, &rsp, stbuf,
+ preoldparent, postoldparent,
+ prenewparent, postnewparent);
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_rename_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ gfs3_unlink_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ state = CALL_STATE (frame);
+
+ if (op_ret) {
+ gf_msg (this->name, fop_log_level (GF_FOP_UNLINK, op_errno),
+ op_errno, PS_MSG_LINK_INFO,
+ "%"PRId64": UNLINK %s (%s/%s) ==> (%s)",
+ frame->root->unique, state->loc.path,
+ uuid_utoa (state->resolve.pargfid),
+ state->resolve.bname, strerror (op_errno));
+ goto out;
+ }
+
+ /* TODO: log gfid of the inodes */
+ gf_msg_trace (frame->root->client->bound_xl->name, 0, "%"PRId64": "
+ "UNLINK_CBK %s", frame->root->unique, state->loc.name);
+
+ server_post_unlink (state, &rsp, preparent, postparent);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_unlink_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ gfs3_symlink_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ inode_t *link_inode = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ state = CALL_STATE (frame);
+
+ if (op_ret < 0) {
+ gf_msg (this->name, GF_LOG_INFO, op_errno, PS_MSG_LINK_INFO,
+ "%"PRId64": SYMLINK %s (%s/%s) ==> (%s)",
+ frame->root->unique, state->loc.path,
+ uuid_utoa (state->resolve.pargfid),
+ state->resolve.bname, strerror (op_errno));
+ goto out;
+ }
+
+ server_post_symlink (state, &rsp, inode, stbuf, preparent,
+ postparent, xdata);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_symlink_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int
+server_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ gfs3_link_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ inode_t *link_inode = NULL;
+ rpcsvc_request_t *req = NULL;
+ char gfid_str[50] = {0,};
+ char newpar_str[50] = {0,};
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ state = CALL_STATE (frame);
+
+ if (op_ret) {
+ uuid_utoa_r (state->resolve.gfid, gfid_str);
+ uuid_utoa_r (state->resolve2.pargfid, newpar_str);
+
+ gf_msg (this->name, GF_LOG_INFO, op_errno, PS_MSG_LINK_INFO,
+ "%"PRId64": LINK %s (%s) -> %s/%s ==> (%s)",
+ frame->root->unique, state->loc.path,
+ gfid_str, newpar_str, state->resolve2.bname,
+ strerror (op_errno));
+ goto out;
+ }
+
+ server_post_link (state, &rsp, inode, stbuf, preparent,
+ postparent, xdata);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_link_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ gfs3_truncate_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret) {
+ state = CALL_STATE (frame);
+ gf_msg (this->name, GF_LOG_INFO, op_errno,
+ PS_MSG_TRUNCATE_INFO,
+ "%"PRId64": TRUNCATE %s (%s) ==> (%s)",
+ frame->root->unique, state->loc.path,
+ uuid_utoa (state->resolve.gfid), strerror (op_errno));
+ goto out;
+ }
+
+ server_post_truncate (&rsp, prebuf, postbuf);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_truncate_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
+ dict_t *xdata)
+{
+ gfs3_fstat_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret) {
+ state = CALL_STATE (frame);
+ gf_msg (this->name, fop_log_level (GF_FOP_FSTAT, op_errno),
+ op_errno, PS_MSG_STAT_INFO,
+ "%"PRId64": FSTAT %"PRId64" (%s) ==> (%s)",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid), strerror (op_errno));
+ goto out;
+ }
+
+ server_post_fstat (&rsp, stbuf);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_fstat_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ gfs3_ftruncate_rsp rsp = {0};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret) {
+ state = CALL_STATE (frame);
+ gf_msg (this->name, fop_log_level (GF_FOP_FTRUNCATE, op_errno),
+ op_errno, PS_MSG_TRUNCATE_INFO,
+ "%"PRId64": FTRUNCATE %"PRId64" (%s)==> (%s)",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid), strerror (op_errno));
+ goto out;
+ }
+
+ server_post_ftruncate (&rsp, prebuf, postbuf);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_ftruncate_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ gf_common_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret < 0) {
+ state = CALL_STATE (frame);
+ gf_msg (this->name, fop_log_level (GF_FOP_FLUSH, op_errno),
+ op_errno, PS_MSG_FLUSH_INFO,
+ "%"PRId64": FLUSH %"PRId64" (%s) ==> (%s)",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid), strerror (op_errno));
+ goto out;
+ }
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_common_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ gfs3_fsync_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret < 0) {
+ state = CALL_STATE (frame);
+ gf_msg (this->name, fop_log_level (GF_FOP_FSYNC, op_errno),
+ op_errno, PS_MSG_SYNC_INFO,
+ "%"PRId64": FSYNC %"PRId64" (%s) ==> (%s)",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid), strerror (op_errno));
+ goto out;
+ }
+
+ server_post_fsync (&rsp, prebuf, postbuf);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_fsync_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ gfs3_write_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret < 0) {
+ state = CALL_STATE (frame);
+ gf_msg (this->name, fop_log_level (GF_FOP_WRITE, op_errno),
+ op_errno, PS_MSG_WRITE_INFO,
+ "%"PRId64": WRITEV %"PRId64" (%s) ==> (%s)",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid), strerror (op_errno));
+ goto out;
+ }
+
+ server_post_writev (&rsp, prebuf, postbuf);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_write_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int
+server_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iovec *vector, int32_t count,
+ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
+{
+ gfs3_read_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+#ifdef GF_TESTING_IO_XDATA
+ {
+ int ret = 0;
+ if (!xdata)
+ xdata = dict_new ();
+
+ ret = dict_set_str (xdata, "testing-the-xdata-key",
+ "testing-xdata-value");
+ }
+#endif
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret < 0) {
+ state = CALL_STATE (frame);
+ gf_msg (this->name, fop_log_level (GF_FOP_READ, op_errno),
+ op_errno, PS_MSG_READ_INFO,
+ "%"PRId64": READV %"PRId64" (%s) ==> (%s)",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid), strerror (op_errno));
+ goto out;
+ }
+
+ server_post_readv (&rsp, stbuf, op_ret);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, vector, count, iobref,
+ (xdrproc_t)xdr_gfs3_read_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_rchecksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ uint32_t weak_checksum, uint8_t *strong_checksum,
+ dict_t *xdata)
+{
+ gfs3_rchecksum_rsp rsp = {0,};
+ rpcsvc_request_t *req = NULL;
+ server_state_t *state = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret < 0) {
+ state = CALL_STATE (frame);
+ gf_msg (this->name, fop_log_level (GF_FOP_RCHECKSUM, op_errno),
+ op_errno, PS_MSG_CHKSUM_INFO,
+ "%"PRId64": RCHECKSUM %"PRId64" (%s)==> (%s)",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid), strerror (op_errno));
+ goto out;
+ }
+
+ server_post_rchecksum (&rsp, weak_checksum, strong_checksum);
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_rchecksum_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int
+server_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ server_state_t *state = NULL;
+ server_ctx_t *serv_ctx = NULL;
+ rpcsvc_request_t *req = NULL;
+ uint64_t fd_no = 0;
+ gfs3_open_rsp rsp = {0,};
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret < 0) {
+ state = CALL_STATE (frame);
+ gf_msg (this->name, fop_log_level (GF_FOP_OPEN, op_errno),
+ op_errno, PS_MSG_OPEN_INFO,
+ "%"PRId64": OPEN %s (%s) ==> (%s)",
+ frame->root->unique, state->loc.path,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ goto out;
+ }
+
+ op_ret = server_post_open (frame, this, &rsp, fd);
+ if (op_ret)
+ goto out;
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_open_rsp);
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int
+server_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+ uint64_t fd_no = 0;
+ gfs3_create_rsp rsp = {0,};
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ state = CALL_STATE (frame);
+
+ if (op_ret < 0) {
+ gf_msg (this->name, GF_LOG_INFO, op_errno, PS_MSG_CREATE_INFO,
+ "%"PRId64": CREATE %s (%s/%s) ==> (%s)",
+ frame->root->unique, state->loc.path,
+ uuid_utoa (state->resolve.pargfid),
+ state->resolve.bname, strerror (op_errno));
+ goto out;
+ }
+
+ /* TODO: log gfid too */
+ gf_msg_trace (frame->root->client->bound_xl->name, 0, "%"PRId64": "
+ "CREATE %s (%s)", frame->root->unique, state->loc.name,
+ uuid_utoa (stbuf->ia_gfid));
+
+ op_ret = server_post_create (frame, &rsp, state, this, fd, inode,
+ stbuf,
+ preparent, postparent);
+ if (op_ret) {
+ op_errno = -op_ret;
+ op_ret = -1;
+ goto out;
+ }
+
+out:
+ if (op_ret)
+ rsp.fd = fd_no;
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_create_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *buf,
+ struct iatt *stbuf, dict_t *xdata)
+{
+ gfs3_readlink_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret < 0) {
+ state = CALL_STATE (frame);
+ gf_msg (this->name, GF_LOG_INFO, op_errno, PS_MSG_LINK_INFO,
+ "%"PRId64": READLINK %s (%s) ==> (%s)",
+ frame->root->unique, state->loc.path,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ goto out;
+ }
+
+ server_post_readlink (&rsp, stbuf, buf);
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_readlink_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
+ dict_t *xdata)
+{
+ gfs3_stat_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret) {
+ state = CALL_STATE (frame);
+ gf_msg (this->name, fop_log_level (GF_FOP_STAT, op_errno),
+ op_errno, PS_MSG_STAT_INFO,
+ "%"PRId64": STAT %s (%s) ==> (%s)",
+ frame->root->unique, state->loc.path,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ goto out;
+ }
+
+ server_post_stat (&rsp, stbuf);
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_stat_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int
+server_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost, dict_t *xdata)
+{
+ gfs3_setattr_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret) {
+ state = CALL_STATE (frame);
+ gf_msg (this->name, GF_LOG_INFO, op_errno, PS_MSG_SETATTR_INFO,
+ "%"PRId64": SETATTR %s (%s) ==> (%s)",
+ frame->root->unique, state->loc.path,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ goto out;
+ }
+
+ server_post_setattr (&rsp, statpre, statpost);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_setattr_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost, dict_t *xdata)
+{
+ gfs3_fsetattr_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret) {
+ state = CALL_STATE (frame);
+ gf_msg (this->name, fop_log_level (GF_FOP_FSETATTR, op_errno),
+ op_errno, PS_MSG_SETATTR_INFO,
+ "%"PRId64": FSETATTR %"PRId64" (%s) ==> (%s)",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ goto out;
+ }
+
+ server_post_fsetattr (&rsp, statpre, statpost);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_fsetattr_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int
+server_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ gfs3_xattrop_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret < 0) {
+ state = CALL_STATE (frame);
+ gf_msg (this->name, fop_log_level (GF_FOP_XATTROP, op_errno),
+ op_errno, PS_MSG_XATTROP_INFO,
+ "%"PRId64": XATTROP %s (%s) ==> (%s)",
+ frame->root->unique, state->loc.path,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ goto out;
+ }
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, dict, &rsp.dict.dict_val,
+ rsp.dict.dict_len, op_errno, out);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_xattrop_rsp);
+
+ GF_FREE (rsp.dict.dict_val);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int
+server_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ gfs3_xattrop_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret < 0) {
+ state = CALL_STATE (frame);
+ gf_msg (this->name, fop_log_level (GF_FOP_FXATTROP, op_errno),
+ op_errno, PS_MSG_XATTROP_INFO,
+ "%"PRId64": FXATTROP %"PRId64" (%s) ==> (%s)",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ goto out;
+ }
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, dict, &rsp.dict.dict_val,
+ rsp.dict.dict_len, op_errno, out);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_fxattrop_rsp);
+
+ GF_FREE (rsp.dict.dict_val);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int
+server_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ gfs3_readdirp_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+ int ret = 0;
+
+ state = CALL_STATE (frame);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret < 0) {
+ state = CALL_STATE (frame);
+ gf_msg (this->name, fop_log_level (GF_FOP_READDIRP, op_errno),
+ op_errno, PS_MSG_DIR_INFO,
+ "%"PRId64": READDIRP %"PRId64" (%s) ==> (%s)",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ goto out;
+ }
+
+ /* (op_ret == 0) is valid, and means EOF */
+ if (op_ret) {
+ ret = server_post_readdirp (&rsp, entries);
+ if (ret == -1) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+ }
+
+ gf_link_inodes_from_dirent (this, state->fd->inode, entries);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_readdirp_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ readdirp_rsp_cleanup (&rsp);
+
+ return 0;
+}
+
+int
+server_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost, dict_t *xdata)
+{
+ gfs3_fallocate_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret) {
+ state = CALL_STATE (frame);
+ gf_msg (this->name, fop_log_level (GF_FOP_FALLOCATE, op_errno),
+ op_errno, PS_MSG_ALLOC_INFO,
+ "%"PRId64": FALLOCATE %"PRId64" (%s) ==> (%s)",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ goto out;
+ }
+
+ server_post_fallocate (&rsp, statpre, statpost);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply(frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t) xdr_gfs3_fallocate_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost, dict_t *xdata)
+{
+ gfs3_discard_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret) {
+ state = CALL_STATE (frame);
+ gf_msg (this->name, fop_log_level (GF_FOP_DISCARD, op_errno),
+ op_errno, PS_MSG_DISCARD_INFO,
+ "%"PRId64": DISCARD %"PRId64" (%s) ==> (%s)",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ goto out;
+ }
+
+ server_post_discard (&rsp, statpre, statpost);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply(frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t) xdr_gfs3_discard_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost, dict_t *xdata)
+{
+ gfs3_zerofill_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ req = frame->local;
+ state = CALL_STATE (frame);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&rsp.xdata.xdata_val),
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret) {
+ gf_msg (this->name, fop_log_level (GF_FOP_ZEROFILL, op_errno),
+ op_errno, PS_MSG_ZEROFILL_INFO,
+ "%"PRId64": ZEROFILL%"PRId64" (%s) ==> (%s)",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ goto out;
+ }
+
+ server_post_zerofill (&rsp, statpre, statpost);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ server_submit_reply(frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t) xdr_gfs3_zerofill_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int
+server_ipc_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ gf_common_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ req = frame->local;
+ state = CALL_STATE (frame);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&rsp.xdata.xdata_val),
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret) {
+ gf_msg (this->name, GF_LOG_INFO, op_errno,
+ PS_MSG_SERVER_IPC_INFO,
+ "%"PRId64": IPC%"PRId64" (%s)",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid));
+ goto out;
+ }
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ server_submit_reply(frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t) xdr_gf_common_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int
+server_seek_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, off_t offset, dict_t *xdata)
+{
+ struct gfs3_seek_rsp rsp = {0, };
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ req = frame->local;
+ state = CALL_STATE (frame);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&rsp.xdata.xdata_val),
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret) {
+ gf_msg (this->name, fop_log_level (GF_FOP_SEEK, op_errno),
+ op_errno, PS_MSG_SEEK_INFO,
+ "%"PRId64": SEEK%"PRId64" (%s) ==> (%s)",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ goto out;
+ }
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t) xdr_gfs3_seek_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+static int
+server_setactivelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ gfs3_setactivelk_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+ int ret = 0;
+
+ state = CALL_STATE (frame);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret < 0) {
+ state = CALL_STATE (frame);
+ gf_msg (this->name, GF_LOG_INFO,
+ op_errno, 0,
+ "%"PRId64": SETACTIVELK %s (%s) ==> (%s)",
+ frame->root->unique, state->loc.path,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ goto out;
+ }
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_setactivelk_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_compound_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, void *data,
+ dict_t *xdata)
+{
+ struct gfs3_compound_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+ compound_args_cbk_t *args_cbk = data;
+ int i = 0;
+
+ req = frame->local;
+ state = CALL_STATE (frame);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&rsp.xdata.xdata_val),
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret) {
+ gf_msg (this->name, fop_log_level (GF_FOP_COMPOUND, op_errno),
+ op_errno, PS_MSG_COMPOUND_INFO,
+ "%"PRId64": COMPOUND%"PRId64" (%s) ==> (%s)",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ goto out;
+ }
+
+ for (i = 0; i < args_cbk->fop_length; i++) {
+ op_ret = server_populate_compound_response (this, &rsp,
+ frame,
+ args_cbk, i);
+
+ if (op_ret) {
+ op_errno = op_ret;
+ op_ret = -1;
+ goto out;
+ }
+ }
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t) xdr_gfs3_compound_rsp);
+
+ for (i = 0; i < state->args->fop_length; i++)
+ args_wipe (&state->args->req_list[i]);
+
+ GF_FREE (state->args->req_list);
+ GF_FREE (state->args);
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+/* Resume function section */
+
+int
+server_rchecksum_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+ int op_ret = 0;
+ int op_errno = EINVAL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0) {
+ op_ret = state->resolve.op_ret;
+ op_errno = state->resolve.op_errno;
+ goto err;
+ }
+
+ STACK_WIND (frame, server_rchecksum_cbk, bound_xl,
+ bound_xl->fops->rchecksum, state->fd,
+ state->offset, state->size, state->xdata);
+
+ return 0;
+err:
+ server_rchecksum_cbk (frame, NULL, frame->this, op_ret, op_errno, 0,
+ NULL, NULL);
+
+ return 0;
+
+}
+
+int
+server_lease_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ STACK_WIND (frame, server_lease_cbk, bound_xl, bound_xl->fops->lease,
+ &state->loc, &state->lease, state->xdata);
+
+ return 0;
+
+err:
+ server_lease_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL, NULL);
+ return 0;
+}
+
+int
+server_lk_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ STACK_WIND (frame, server_lk_cbk, bound_xl, bound_xl->fops->lk,
+ state->fd, state->cmd, &state->flock, state->xdata);
+
+ return 0;
+
+err:
+ server_lk_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL, NULL);
+ return 0;
+}
+
+int
+server_rename_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+ int op_ret = 0;
+ int op_errno = 0;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0) {
+ op_ret = state->resolve.op_ret;
+ op_errno = state->resolve.op_errno;
+ goto err;
+ }
+
+ if (state->resolve2.op_ret != 0) {
+ op_ret = state->resolve2.op_ret;
+ op_errno = state->resolve2.op_errno;
+ goto err;
+ }
+
+ STACK_WIND (frame, server_rename_cbk,
+ bound_xl, bound_xl->fops->rename,
+ &state->loc, &state->loc2, state->xdata);
+ return 0;
+err:
+ server_rename_cbk (frame, NULL, frame->this, op_ret, op_errno,
+ NULL, NULL, NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int
+server_link_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+ int op_ret = 0;
+ int op_errno = 0;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0) {
+ op_ret = state->resolve.op_ret;
+ op_errno = state->resolve.op_errno;
+ goto err;
+ }
+
+ if (state->resolve2.op_ret != 0) {
+ op_ret = state->resolve2.op_ret;
+ op_errno = state->resolve2.op_errno;
+ goto err;
+ }
+
+ state->loc2.inode = inode_ref (state->loc.inode);
+
+ STACK_WIND (frame, server_link_cbk, bound_xl, bound_xl->fops->link,
+ &state->loc, &state->loc2, state->xdata);
+
+ return 0;
+err:
+ server_link_cbk (frame, NULL, frame->this, op_ret, op_errno,
+ NULL, NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+server_symlink_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ state->loc.inode = inode_new (state->itable);
+
+ STACK_WIND (frame, server_symlink_cbk,
+ bound_xl, bound_xl->fops->symlink,
+ state->name, &state->loc, state->umask, state->xdata);
+
+ return 0;
+err:
+ server_symlink_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL, NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int
+server_access_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ STACK_WIND (frame, server_access_cbk,
+ bound_xl, bound_xl->fops->access,
+ &state->loc, state->mask, state->xdata);
+ return 0;
+err:
+ server_access_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL);
+ return 0;
+}
+
+int
+server_fentrylk_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ GF_UNUSED int ret = -1;
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ if (!state->xdata)
+ state->xdata = dict_new ();
+
+ if (state->xdata)
+ ret = dict_set_str (state->xdata, "connection-id",
+ frame->root->client->client_uid);
+
+ STACK_WIND (frame, server_fentrylk_cbk, bound_xl,
+ bound_xl->fops->fentrylk,
+ state->volume, state->fd, state->name,
+ state->cmd, state->type, state->xdata);
+
+ return 0;
+err:
+ server_fentrylk_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL);
+ return 0;
+}
+
+
+int
+server_entrylk_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ GF_UNUSED int ret = -1;
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ if (!state->xdata)
+ state->xdata = dict_new ();
+
+ if (state->xdata)
+ ret = dict_set_str (state->xdata, "connection-id",
+ frame->root->client->client_uid);
+
+ STACK_WIND (frame, server_entrylk_cbk,
+ bound_xl, bound_xl->fops->entrylk,
+ state->volume, &state->loc, state->name,
+ state->cmd, state->type, state->xdata);
+ return 0;
+err:
+ server_entrylk_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL);
+ return 0;
+}
+
+
+int
+server_finodelk_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ GF_UNUSED int ret = -1;
+ server_state_t *state = NULL;
+
+ gf_msg_debug (bound_xl->name, 0, "frame %p, xlator %p", frame,
+ bound_xl);
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ if (!state->xdata)
+ state->xdata = dict_new ();
+
+ if (state->xdata)
+ ret = dict_set_str (state->xdata, "connection-id",
+ frame->root->client->client_uid);
+
+ STACK_WIND (frame, server_finodelk_cbk, bound_xl,
+ bound_xl->fops->finodelk, state->volume, state->fd,
+ state->cmd, &state->flock, state->xdata);
+
+ return 0;
+err:
+ server_finodelk_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL);
+
+ return 0;
+}
+
+int
+server_inodelk_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ GF_UNUSED int ret = -1;
+ server_state_t *state = NULL;
+
+ gf_msg_debug (bound_xl->name, 0, "frame %p, xlator %p", frame,
+ bound_xl);
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ if (!state->xdata)
+ state->xdata = dict_new ();
+
+ if (state->xdata)
+ ret = dict_set_str (state->xdata, "connection-id",
+ frame->root->client->client_uid);
+
+ STACK_WIND (frame, server_inodelk_cbk, bound_xl,
+ bound_xl->fops->inodelk, state->volume, &state->loc,
+ state->cmd, &state->flock, state->xdata);
+ return 0;
+err:
+ server_inodelk_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL);
+ return 0;
+}
+
+int
+server_rmdir_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ STACK_WIND (frame, server_rmdir_cbk, bound_xl, bound_xl->fops->rmdir,
+ &state->loc, state->flags, state->xdata);
+ return 0;
+err:
+ server_rmdir_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+server_mkdir_resume (call_frame_t *frame, xlator_t *bound_xl)
+
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ state->loc.inode = inode_new (state->itable);
+
+ STACK_WIND (frame, server_mkdir_cbk,
+ bound_xl, bound_xl->fops->mkdir,
+ &(state->loc), state->mode, state->umask, state->xdata);
+
+ return 0;
+err:
+ server_mkdir_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL, NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int
+server_mknod_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ state->loc.inode = inode_new (state->itable);
+
+ STACK_WIND (frame, server_mknod_cbk,
+ bound_xl, bound_xl->fops->mknod,
+ &(state->loc), state->mode, state->dev,
+ state->umask, state->xdata);
+
+ return 0;
+err:
+ server_mknod_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL, NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int
+server_fsyncdir_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ STACK_WIND (frame, server_fsyncdir_cbk,
+ bound_xl,
+ bound_xl->fops->fsyncdir,
+ state->fd, state->flags, state->xdata);
+ return 0;
+
+err:
+ server_fsyncdir_cbk (frame, NULL, frame->this,
+ state->resolve.op_ret,
+ state->resolve.op_errno, NULL);
+ return 0;
+}
+
+
+int
+server_readdir_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ GF_ASSERT (state->fd);
+
+ STACK_WIND (frame, server_readdir_cbk,
+ bound_xl,
+ bound_xl->fops->readdir,
+ state->fd, state->size, state->offset, state->xdata);
+
+ return 0;
+err:
+ server_readdir_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL, NULL);
+ return 0;
+}
+
+int
+server_readdirp_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ STACK_WIND (frame, server_readdirp_cbk, bound_xl,
+ bound_xl->fops->readdirp, state->fd, state->size,
+ state->offset, state->dict);
+
+ return 0;
+err:
+ server_readdirp_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL, NULL);
+ return 0;
+}
+
+
+int
+server_opendir_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ state->fd = fd_create (state->loc.inode, frame->root->pid);
+ if (!state->fd) {
+ gf_msg ("server", GF_LOG_ERROR, 0, PS_MSG_FD_CREATE_FAILED,
+ "could not create the fd");
+ goto err;
+ }
+
+ STACK_WIND (frame, server_opendir_cbk,
+ bound_xl, bound_xl->fops->opendir,
+ &state->loc, state->fd, state->xdata);
+ return 0;
+err:
+ server_opendir_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL, NULL);
+ return 0;
+}
+
+
+int
+server_statfs_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret !=0)
+ goto err;
+
+ STACK_WIND (frame, server_statfs_cbk,
+ bound_xl, bound_xl->fops->statfs,
+ &state->loc, state->xdata);
+ return 0;
+
+err:
+ server_statfs_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL, NULL);
+ return 0;
+}
+
+
+int
+server_removexattr_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ STACK_WIND (frame, server_removexattr_cbk,
+ bound_xl, bound_xl->fops->removexattr,
+ &state->loc, state->name, state->xdata);
+ return 0;
+err:
+ server_removexattr_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL);
+ return 0;
+}
+
+int
+server_fremovexattr_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ STACK_WIND (frame, server_fremovexattr_cbk,
+ bound_xl, bound_xl->fops->fremovexattr,
+ state->fd, state->name, state->xdata);
+ return 0;
+err:
+ server_fremovexattr_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL);
+ return 0;
+}
+
+int
+server_fgetxattr_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ STACK_WIND (frame, server_fgetxattr_cbk,
+ bound_xl, bound_xl->fops->fgetxattr,
+ state->fd, state->name, state->xdata);
+ return 0;
+err:
+ server_fgetxattr_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL, NULL);
+ return 0;
+}
+
+
+int
+server_xattrop_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ STACK_WIND (frame, server_xattrop_cbk,
+ bound_xl, bound_xl->fops->xattrop,
+ &state->loc, state->flags, state->dict, state->xdata);
+ return 0;
+err:
+ server_xattrop_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL, NULL);
+ return 0;
+}
+
+int
+server_fxattrop_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ STACK_WIND (frame, server_fxattrop_cbk,
+ bound_xl, bound_xl->fops->fxattrop,
+ state->fd, state->flags, state->dict, state->xdata);
+ return 0;
+err:
+ server_fxattrop_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL, NULL);
+ return 0;
+}
+
+int
+server_fsetxattr_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ STACK_WIND (frame, server_setxattr_cbk,
+ bound_xl, bound_xl->fops->fsetxattr,
+ state->fd, state->dict, state->flags, state->xdata);
+ return 0;
+err:
+ server_fsetxattr_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL);
+
+ return 0;
+}
+
+int
+server_unlink_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ STACK_WIND (frame, server_unlink_cbk,
+ bound_xl, bound_xl->fops->unlink,
+ &state->loc, state->flags, state->xdata);
+ return 0;
+err:
+ server_unlink_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+server_truncate_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ STACK_WIND (frame, server_truncate_cbk,
+ bound_xl, bound_xl->fops->truncate,
+ &state->loc, state->offset, state->xdata);
+ return 0;
+err:
+ server_truncate_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+
+
+int
+server_fstat_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ STACK_WIND (frame, server_fstat_cbk,
+ bound_xl, bound_xl->fops->fstat,
+ state->fd, state->xdata);
+ return 0;
+err:
+ server_fstat_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL, NULL);
+ return 0;
+}
+
+
+int
+server_setxattr_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ STACK_WIND (frame, server_setxattr_cbk,
+ bound_xl, bound_xl->fops->setxattr,
+ &state->loc, state->dict, state->flags, state->xdata);
+ return 0;
+err:
+ server_setxattr_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+server_getxattr_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ STACK_WIND (frame, server_getxattr_cbk,
+ bound_xl, bound_xl->fops->getxattr,
+ &state->loc, state->name, state->xdata);
+ return 0;
+err:
+ server_getxattr_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL, NULL);
+ return 0;
+}
+
+
+int
+server_ftruncate_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ STACK_WIND (frame, server_ftruncate_cbk,
+ bound_xl, bound_xl->fops->ftruncate,
+ state->fd, state->offset, state->xdata);
+ return 0;
+err:
+ server_ftruncate_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+server_flush_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ STACK_WIND (frame, server_flush_cbk,
+ bound_xl, bound_xl->fops->flush, state->fd, state->xdata);
+ return 0;
+err:
+ server_flush_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+server_fsync_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ STACK_WIND (frame, server_fsync_cbk,
+ bound_xl, bound_xl->fops->fsync,
+ state->fd, state->flags, state->xdata);
+ return 0;
+err:
+ server_fsync_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int
+server_writev_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ STACK_WIND (frame, server_writev_cbk,
+ bound_xl, bound_xl->fops->writev,
+ state->fd, state->payload_vector, state->payload_count,
+ state->offset, state->flags, state->iobref, state->xdata);
+
+ return 0;
+err:
+ server_writev_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int
+server_readv_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ STACK_WIND (frame, server_readv_cbk,
+ bound_xl, bound_xl->fops->readv,
+ state->fd, state->size, state->offset, state->flags, state->xdata);
+
+ return 0;
+err:
+ server_readv_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL, 0, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int
+server_create_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ state->loc.inode = inode_new (state->itable);
+
+ state->fd = fd_create (state->loc.inode, frame->root->pid);
+ if (!state->fd) {
+ gf_msg ("server", GF_LOG_ERROR, 0, PS_MSG_FD_CREATE_FAILED,
+ "fd creation for the inode %s failed",
+ state->loc.inode ?
+ uuid_utoa (state->loc.inode->gfid):NULL);
+ state->resolve.op_ret = -1;
+ state->resolve.op_errno = ENOMEM;
+ goto err;
+ }
+ state->fd->flags = state->flags;
+
+ STACK_WIND (frame, server_create_cbk,
+ bound_xl, bound_xl->fops->create,
+ &(state->loc), state->flags, state->mode,
+ state->umask, state->fd, state->xdata);
+
+ return 0;
+err:
+ server_create_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
+}
+
+
+int
+server_open_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ state->fd = fd_create (state->loc.inode, frame->root->pid);
+ state->fd->flags = state->flags;
+
+ STACK_WIND (frame, server_open_cbk,
+ bound_xl, bound_xl->fops->open,
+ &state->loc, state->flags, state->fd, state->xdata);
+
+ return 0;
+err:
+ server_open_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL, NULL);
+ return 0;
+}
+
+
+int
+server_readlink_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ STACK_WIND (frame, server_readlink_cbk,
+ bound_xl, bound_xl->fops->readlink,
+ &state->loc, state->size, state->xdata);
+ return 0;
+err:
+ server_readlink_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int
+server_fsetattr_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ STACK_WIND (frame, server_fsetattr_cbk,
+ bound_xl, bound_xl->fops->fsetattr,
+ state->fd, &state->stbuf, state->valid, state->xdata);
+ return 0;
+err:
+ server_fsetattr_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+server_setattr_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ STACK_WIND (frame, server_setattr_cbk,
+ bound_xl, bound_xl->fops->setattr,
+ &state->loc, &state->stbuf, state->valid, state->xdata);
+ return 0;
+err:
+ server_setattr_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+server_stat_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ STACK_WIND (frame, server_stat_cbk,
+ bound_xl, bound_xl->fops->stat, &state->loc, state->xdata);
+ return 0;
+err:
+ server_stat_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL, NULL);
+ return 0;
+}
+
+int
+server_lookup_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ if (!state->loc.inode)
+ state->loc.inode = server_inode_new (state->itable,
+ state->loc.gfid);
+ else
+ state->is_revalidate = 1;
+
+ STACK_WIND (frame, server_lookup_cbk,
+ bound_xl, bound_xl->fops->lookup,
+ &state->loc, state->xdata);
+
+ return 0;
+err:
+ server_lookup_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int
+server_fallocate_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ STACK_WIND (frame, server_fallocate_cbk,
+ bound_xl, bound_xl->fops->fallocate,
+ state->fd, state->flags, state->offset, state->size,
+ state->xdata);
+ return 0;
+err:
+ server_fallocate_cbk(frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int
+server_discard_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ STACK_WIND (frame, server_discard_cbk,
+ bound_xl, bound_xl->fops->discard,
+ state->fd, state->offset, state->size, state->xdata);
+ return 0;
+err:
+ server_discard_cbk(frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int
+server_zerofill_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ STACK_WIND (frame, server_zerofill_cbk,
+ bound_xl, bound_xl->fops->zerofill,
+ state->fd, state->offset, state->size, state->xdata);
+ return 0;
+err:
+ server_zerofill_cbk(frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int
+server_seek_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ STACK_WIND (frame, server_seek_cbk, bound_xl, bound_xl->fops->seek,
+ state->fd, state->offset, state->what, state->xdata);
+ return 0;
+err:
+ server_seek_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, 0, NULL);
+
+ return 0;
+}
+
+static int
+server_getactivelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ lock_migration_info_t *locklist, dict_t *xdata)
+{
+ gfs3_getactivelk_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+ int ret = 0;
+
+ state = CALL_STATE (frame);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret < 0) {
+ state = CALL_STATE (frame);
+
+ gf_msg (this->name, GF_LOG_INFO,
+ op_errno, 0,
+ "%"PRId64": GETACTIVELK %s (%s) ==> (%s)",
+ frame->root->unique, state->loc.path,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+
+ goto out;
+ }
+
+ /* (op_ret == 0) means there are no locks on the file*/
+ if (op_ret > 0) {
+ ret = serialize_rsp_locklist (locklist, &rsp);
+ if (ret == -1) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+ }
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_getactivelk_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ getactivelkinfo_rsp_cleanup (&rsp);
+
+ return 0;
+}
+
+int
+server_getactivelk_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ STACK_WIND (frame, server_getactivelk_cbk, bound_xl,
+ bound_xl->fops->getactivelk, &state->loc, state->xdata);
+ return 0;
+err:
+ server_getactivelk_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL, NULL);
+ return 0;
+
+}
+
+int
+server_setactivelk_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ STACK_WIND (frame, server_setactivelk_cbk,
+ bound_xl, bound_xl->fops->setactivelk, &state->loc,
+ &state->locklist, state->xdata);
+ return 0;
+err:
+ server_setactivelk_cbk (frame, NULL, frame->this,
+ state->resolve.op_ret,
+ state->resolve.op_errno, NULL);
+ return 0;
+
+}
+
+int
+server_compound_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+ gfs3_compound_req *req = NULL;
+ compound_args_t *args = NULL;
+ int i = 0;
+ int ret = -1;
+ int length = 0;
+ int op_errno = ENOMEM;
+
+ state = CALL_STATE (frame);
+ if (state->resolve.op_ret != 0) {
+ ret = state->resolve.op_ret;
+ op_errno = state->resolve.op_errno;
+ goto err;
+ }
+
+ req = state->req;
+
+ args = GF_CALLOC (1, sizeof (*args), gf_mt_compound_req_t);
+ state->args = args;
+ if (!args)
+ goto err;
+
+ length = req->compound_req_array.compound_req_array_len;
+
+ args->req_list = GF_CALLOC (length,
+ sizeof (*args->req_list),
+ gf_mt_default_args_t);
+ if (!args->req_list)
+ goto err;
+
+ for (i = 0; i < length; i++) {
+ ret = server_populate_compound_request (req, frame,
+ &args->req_list[i],
+ i);
+
+ if (ret) {
+ op_errno = ret;
+ ret = -1;
+ goto err;
+ }
+ }
+
+ STACK_WIND (frame, server_compound_cbk,
+ bound_xl, bound_xl->fops->compound,
+ args, state->xdata);
+
+ return 0;
+err:
+ server_compound_cbk (frame, NULL, frame->this, ret, op_errno,
+ NULL, NULL);
+
+ for (i = 0; i < length; i++)
+ args_wipe (&args->req_list[i]);
+
+ GF_FREE (args->req_list);
+ GF_FREE (args);
+ return ret;
+}
+/* Fop section */
+
+int
+server3_3_stat (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_stat_req args = {{0,},};
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return 0;
+
+ /* Initialize args first, then decode */
+
+ ret = xdr_to_generic (req->msg[0], &args, (xdrproc_t)xdr_gfs3_stat_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_STAT;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid, args.gfid, 16);
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+
+ ret = 0;
+ resolve_and_resume (frame, server_stat_resume);
+
+out:
+ free (args.xdata.xdata_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ return ret;
+}
+
+
+int
+server3_3_setattr (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_setattr_req args = {{0,},};
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return 0;
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_setattr_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_SETATTR;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid, args.gfid, 16);
+
+ gf_stat_to_iatt (&args.stbuf, &state->stbuf);
+ state->valid = args.valid;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_setattr_resume);
+
+out:
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ free (args.xdata.xdata_val);
+
+ return ret;
+}
+
+
+int
+server3_3_fsetattr (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_fsetattr_req args = {0,};
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_fsetattr_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_FSETATTR;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_MUST;
+ state->resolve.fd_no = args.fd;
+
+ gf_stat_to_iatt (&args.stbuf, &state->stbuf);
+ state->valid = args.valid;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_fsetattr_resume);
+
+out:
+ free (args.xdata.xdata_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ return ret;
+}
+
+int
+server3_3_fallocate(rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_fallocate_req args = {{0},};
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_fallocate_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_FALLOCATE;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_MUST;
+ state->resolve.fd_no = args.fd;
+
+ state->flags = args.flags;
+ state->offset = args.offset;
+ state->size = args.size;
+ memcpy(state->resolve.gfid, args.gfid, 16);
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_fallocate_resume);
+
+out:
+ free (args.xdata.xdata_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ return ret;
+}
+
+
+int
+server3_3_discard(rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_discard_req args = {{0},};
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_discard_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_DISCARD;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_MUST;
+ state->resolve.fd_no = args.fd;
+
+ state->offset = args.offset;
+ state->size = args.size;
+ memcpy(state->resolve.gfid, args.gfid, 16);
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_discard_resume);
+
+out:
+ free (args.xdata.xdata_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ return ret;
+}
+
+
+int
+server3_3_zerofill(rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_zerofill_req args = {{0},};
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_zerofill_req);
+ if (ret < 0) {
+ /*failed to decode msg*/;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ /* something wrong, mostly insufficient memory*/
+ req->rpc_err = GARBAGE_ARGS; /* TODO */
+ goto out;
+ }
+ frame->root->op = GF_FOP_ZEROFILL;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_MUST;
+ state->resolve.fd_no = args.fd;
+
+ state->offset = args.offset;
+ state->size = args.size;
+ memcpy(state->resolve.gfid, args.gfid, 16);
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl, state->xdata,
+ (args.xdata.xdata_val),
+ (args.xdata.xdata_len), ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_zerofill_resume);
+
+out:
+ free (args.xdata.xdata_val);
+
+ if (op_errno)
+ req->rpc_err = GARBAGE_ARGS;
+
+ return ret;
+}
+
+int
+server3_3_ipc (rpcsvc_request_t *req)
+{
+ call_frame_t *frame = NULL;
+ gfs3_ipc_req args = {0,};
+ int ret = -1;
+ int op_errno = 0;
+ dict_t *xdata = NULL;
+ xlator_t *bound_xl = NULL;
+
+ if (!req)
+ return ret;
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_ipc_req);
+ if (ret < 0) {
+ /*failed to decode msg*/;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ /* something wrong, mostly insufficient memory*/
+ req->rpc_err = GARBAGE_ARGS; /* TODO */
+ goto out;
+ }
+ frame->root->op = GF_FOP_IPC;
+
+ bound_xl = frame->root->client->bound_xl;
+ if (!bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (bound_xl, xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len,
+ ret, op_errno, out);
+
+ ret = 0;
+ STACK_WIND (frame, server_ipc_cbk, bound_xl, bound_xl->fops->ipc,
+ args.op, xdata);
+ if (xdata) {
+ dict_unref(xdata);
+ }
+
+out:
+ free (args.xdata.xdata_val);
+
+ if (op_errno)
+ req->rpc_err = GARBAGE_ARGS;
+
+ return ret;
+}
+
+int
+server3_3_seek (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ struct gfs3_seek_req args = {{0,},};
+ int ret = -1;
+ int op_errno = 0;
+ dict_t *xdata = NULL;
+ xlator_t *bound_xl = NULL;
+
+ if (!req)
+ return ret;
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_seek_req);
+ if (ret < 0) {
+ /*failed to decode msg*/;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ /* something wrong, mostly insufficient memory*/
+ req->rpc_err = GARBAGE_ARGS; /* TODO */
+ goto out;
+ }
+ frame->root->op = GF_FOP_SEEK;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_MUST;
+ state->resolve.fd_no = args.fd;
+
+ state->offset = args.offset;
+ state->what = args.what;
+ memcpy(state->resolve.gfid, args.gfid, 16);
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (bound_xl, xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len,
+ ret, op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_seek_resume);
+
+out:
+ free (args.xdata.xdata_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ return ret;
+}
+
+int
+server3_3_readlink (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_readlink_req args = {{0,},};
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_readlink_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_READLINK;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid, args.gfid, 16);
+
+ state->size = args.size;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_readlink_resume);
+
+out:
+ free (args.xdata.xdata_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ return ret;
+}
+
+
+int
+server3_3_create (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_create_req args = {{0,},};
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ args.bname = alloca (req->msg[0].iov_len);
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_create_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_CREATE;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+
+ state->resolve.bname = gf_strdup (args.bname);
+ state->mode = args.mode;
+ state->umask = args.umask;
+ state->flags = gf_flags_to_flags (args.flags);
+ memcpy (state->resolve.pargfid, args.pargfid, 16);
+
+ if (state->flags & O_EXCL) {
+ state->resolve.type = RESOLVE_NOT;
+ } else {
+ state->resolve.type = RESOLVE_DONTCARE;
+ }
+
+ /* TODO: can do alloca for xdata field instead of stdalloc */
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_create_resume);
+
+out:
+ /* memory allocated by libc, don't use GF_FREE */
+ free (args.xdata.xdata_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ return ret;
+}
+
+
+int
+server3_3_open (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_open_req args = {{0,},};
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ ret = xdr_to_generic (req->msg[0], &args, (xdrproc_t)xdr_gfs3_open_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_OPEN;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid, args.gfid, 16);
+
+ state->flags = gf_flags_to_flags (args.flags);
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_open_resume);
+out:
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ free (args.xdata.xdata_val);
+
+ return ret;
+}
+
+
+int
+server3_3_readv (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_read_req args = {{0,},};
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ goto out;
+
+ ret = xdr_to_generic (req->msg[0], &args, (xdrproc_t)xdr_gfs3_read_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_READ;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_MUST;
+ state->resolve.fd_no = args.fd;
+ state->size = args.size;
+ state->offset = args.offset;
+ state->flags = args.flag;
+
+ memcpy (state->resolve.gfid, args.gfid, 16);
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_readv_resume);
+out:
+ /* memory allocated by libc, don't use GF_FREE */
+ free (args.xdata.xdata_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ return ret;
+}
+
+
+int
+server3_3_writev (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_write_req args = {{0,},};
+ ssize_t len = 0;
+ int i = 0;
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ len = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_write_req);
+ if (len < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_WRITE;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_MUST;
+ state->resolve.fd_no = args.fd;
+ state->offset = args.offset;
+ state->size = args.size;
+ state->flags = args.flag;
+ state->iobref = iobref_ref (req->iobref);
+ memcpy (state->resolve.gfid, args.gfid, 16);
+
+ if (len < req->msg[0].iov_len) {
+ state->payload_vector[0].iov_base
+ = (req->msg[0].iov_base + len);
+ state->payload_vector[0].iov_len
+ = req->msg[0].iov_len - len;
+ state->payload_count = 1;
+ }
+
+ for (i = 1; i < req->count; i++) {
+ state->payload_vector[state->payload_count++]
+ = req->msg[i];
+ }
+
+ for (i = 0; i < state->payload_count; i++) {
+ state->size += state->payload_vector[i].iov_len;
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+#ifdef GF_TESTING_IO_XDATA
+ dict_dump_to_log (state->xdata);
+#endif
+
+ ret = 0;
+ resolve_and_resume (frame, server_writev_resume);
+out:
+ free (args.xdata.xdata_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ return ret;
+}
+
+
+#define SERVER3_3_VECWRITE_START 0
+#define SERVER3_3_VECWRITE_READING_HDR 1
+#define SERVER3_3_VECWRITE_READING_OPAQUE 2
+
+int
+server3_3_writev_vecsizer (int state, ssize_t *readsize, char *base_addr,
+ char *curr_addr)
+{
+ ssize_t size = 0;
+ int nextstate = 0;
+ gfs3_write_req write_req = {{0,},};
+ XDR xdr;
+
+ switch (state) {
+ case SERVER3_3_VECWRITE_START:
+ size = xdr_sizeof ((xdrproc_t) xdr_gfs3_write_req,
+ &write_req);
+ *readsize = size;
+ nextstate = SERVER3_3_VECWRITE_READING_HDR;
+ break;
+ case SERVER3_3_VECWRITE_READING_HDR:
+ size = xdr_sizeof ((xdrproc_t) xdr_gfs3_write_req,
+ &write_req);
+
+ xdrmem_create (&xdr, base_addr, size, XDR_DECODE);
+
+ /* This will fail if there is xdata sent from client, if not,
+ well and good */
+ xdr_gfs3_write_req (&xdr, &write_req);
+
+ /* need to round off to proper roof (%4), as XDR packing pads
+ the end of opaque object with '0' */
+ size = roof (write_req.xdata.xdata_len, 4);
+
+ *readsize = size;
+
+ if (!size)
+ nextstate = SERVER3_3_VECWRITE_START;
+ else
+ nextstate = SERVER3_3_VECWRITE_READING_OPAQUE;
+
+ free (write_req.xdata.xdata_val);
+
+ break;
+
+ case SERVER3_3_VECWRITE_READING_OPAQUE:
+ *readsize = 0;
+ nextstate = SERVER3_3_VECWRITE_START;
+ break;
+ default:
+ gf_msg ("server", GF_LOG_ERROR, 0, PS_MSG_WRONG_STATE,
+ "wrong state: %d", state);
+ }
+
+ return nextstate;
+}
+
+
+int
+server3_3_release (rpcsvc_request_t *req)
+{
+ client_t *client = NULL;
+ server_ctx_t *serv_ctx = NULL;
+ gfs3_release_req args = {{0,},};
+ gf_common_rsp rsp = {0,};
+ int ret = -1;
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_release_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ client = req->trans->xl_private;
+ if (!client) {
+ /* Handshake is not complete yet. */
+ req->rpc_err = SYSTEM_ERR;
+ goto out;
+ }
+
+ serv_ctx = server_ctx_get (client, client->this);
+ if (serv_ctx == NULL) {
+ gf_msg (req->trans->name, GF_LOG_INFO, 0,
+ PS_MSG_SERVER_CTX_GET_FAILED, "server_ctx_get() "
+ "failed");
+ req->rpc_err = SYSTEM_ERR;
+ goto out;
+ }
+
+ gf_fd_put (serv_ctx->fdtable, args.fd);
+
+ server_submit_reply (NULL, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_common_rsp);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+server3_3_releasedir (rpcsvc_request_t *req)
+{
+ client_t *client = NULL;
+ server_ctx_t *serv_ctx = NULL;
+ gfs3_releasedir_req args = {{0,},};
+ gf_common_rsp rsp = {0,};
+ int ret = -1;
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_release_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ client = req->trans->xl_private;
+ if (!client) {
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ serv_ctx = server_ctx_get (client, client->this);
+ if (serv_ctx == NULL) {
+ gf_msg (req->trans->name, GF_LOG_INFO, 0,
+ PS_MSG_SERVER_CTX_GET_FAILED, "server_ctx_get() "
+ "failed");
+ req->rpc_err = SYSTEM_ERR;
+ goto out;
+ }
+
+ gf_fd_put (serv_ctx->fdtable, args.fd);
+
+ server_submit_reply (NULL, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_common_rsp);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int
+server3_3_fsync (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_fsync_req args = {{0,},};
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_fsync_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_FSYNC;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_MUST;
+ state->resolve.fd_no = args.fd;
+ state->flags = args.data;
+ memcpy (state->resolve.gfid, args.gfid, 16);
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_fsync_resume);
+out:
+ free (args.xdata.xdata_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ return ret;
+}
+
+
+
+int
+server3_3_flush (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_flush_req args = {{0,},};
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_flush_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_FLUSH;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_MUST;
+ state->resolve.fd_no = args.fd;
+ memcpy (state->resolve.gfid, args.gfid, 16);
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_flush_resume);
+out:
+ free (args.xdata.xdata_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ return ret;
+}
+
+
+
+int
+server3_3_ftruncate (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_ftruncate_req args = {{0,},};
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_ftruncate_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_FTRUNCATE;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_MUST;
+ state->resolve.fd_no = args.fd;
+ state->offset = args.offset;
+ memcpy (state->resolve.gfid, args.gfid, 16);
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_ftruncate_resume);
+out:
+ free (args.xdata.xdata_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ return ret;
+}
+
+
+int
+server3_3_fstat (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_fstat_req args = {{0,},};
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_fstat_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_FSTAT;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_MUST;
+ state->resolve.fd_no = args.fd;
+ memcpy (state->resolve.gfid, args.gfid, 16);
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_fstat_resume);
+out:
+ free (args.xdata.xdata_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ return ret;
+}
+
+
+int
+server3_3_truncate (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_truncate_req args = {{0,},};
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_truncate_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_TRUNCATE;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid, args.gfid, 16);
+ state->offset = args.offset;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_truncate_resume);
+out:
+ free (args.xdata.xdata_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ return ret;
+}
+
+
+
+int
+server3_3_unlink (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_unlink_req args = {{0,},};
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ args.bname = alloca (req->msg[0].iov_len);
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_unlink_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_UNLINK;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_MUST;
+ state->resolve.bname = gf_strdup (args.bname);
+ memcpy (state->resolve.pargfid, args.pargfid, 16);
+
+ state->flags = args.xflags;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_unlink_resume);
+out:
+ free (args.xdata.xdata_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ return ret;
+}
+
+
+int
+server3_3_setxattr (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ dict_t *dict = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_setxattr_req args = {{0,},};
+ int32_t ret = -1;
+ int32_t op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ args.dict.dict_val = alloca (req->msg[0].iov_len);
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_setxattr_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_SETXATTR;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_MUST;
+ state->flags = args.flags;
+ memcpy (state->resolve.gfid, args.gfid, 16);
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ dict,
+ (args.dict.dict_val),
+ (args.dict.dict_len), ret,
+ op_errno, out);
+
+ state->dict = dict;
+
+ /* There can be some commands hidden in key, check and proceed */
+ gf_server_check_setxattr_cmd (frame, dict);
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_setxattr_resume);
+
+ /* 'dict' will be destroyed later when 'state' is not needed anymore */
+ dict = NULL;
+
+out:
+ free (args.xdata.xdata_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ if (dict)
+ dict_unref (dict);
+
+ return ret;
+}
+
+
+
+int
+server3_3_fsetxattr (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ dict_t *dict = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_fsetxattr_req args = {{0,},};
+ int32_t ret = -1;
+ int32_t op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ args.dict.dict_val = alloca (req->msg[0].iov_len);
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_fsetxattr_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_FSETXATTR;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_MUST;
+ state->resolve.fd_no = args.fd;
+ state->flags = args.flags;
+ memcpy (state->resolve.gfid, args.gfid, 16);
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ dict,
+ (args.dict.dict_val),
+ (args.dict.dict_len), ret,
+ op_errno, out);
+
+ state->dict = dict;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_fsetxattr_resume);
+
+ /* 'dict' will be destroyed later when 'state' is not needed anymore */
+ dict = NULL;
+
+out:
+ free (args.xdata.xdata_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ if (dict)
+ dict_unref (dict);
+
+ return ret;
+}
+
+
+
+int
+server3_3_fxattrop (rpcsvc_request_t *req)
+{
+ dict_t *dict = NULL;
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_fxattrop_req args = {{0,},};
+ int32_t ret = -1;
+ int32_t op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ args.dict.dict_val = alloca (req->msg[0].iov_len);
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_fxattrop_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_FXATTROP;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_MUST;
+ state->resolve.fd_no = args.fd;
+ state->flags = args.flags;
+ memcpy (state->resolve.gfid, args.gfid, 16);
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ dict,
+ (args.dict.dict_val),
+ (args.dict.dict_len), ret,
+ op_errno, out);
+
+ state->dict = dict;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_fxattrop_resume);
+
+ /* 'dict' will be destroyed later when 'state' is not needed anymore */
+ dict = NULL;
+
+out:
+ free (args.xdata.xdata_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ if (dict)
+ dict_unref (dict);
+
+ return ret;
+}
+
+
+
+int
+server3_3_xattrop (rpcsvc_request_t *req)
+{
+ dict_t *dict = NULL;
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_xattrop_req args = {{0,},};
+ int32_t ret = -1;
+ int32_t op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ args.dict.dict_val = alloca (req->msg[0].iov_len);
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_xattrop_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_XATTROP;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_MUST;
+ state->flags = args.flags;
+ memcpy (state->resolve.gfid, args.gfid, 16);
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ dict,
+ (args.dict.dict_val),
+ (args.dict.dict_len), ret,
+ op_errno, out);
+
+ state->dict = dict;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_xattrop_resume);
+
+ /* 'dict' will be destroyed later when 'state' is not needed anymore */
+ dict = NULL;
+
+out:
+ free (args.xdata.xdata_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ if (dict)
+ dict_unref (dict);
+
+ return ret;
+}
+
+
+int
+server3_3_getxattr (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_getxattr_req args = {{0,},};
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ args.name = alloca (256);
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_getxattr_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_GETXATTR;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid, args.gfid, 16);
+
+ if (args.namelen) {
+ state->name = gf_strdup (args.name);
+ /* There can be some commands hidden in key, check and proceed */
+ gf_server_check_getxattr_cmd (frame, state->name);
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_getxattr_resume);
+out:
+ free (args.xdata.xdata_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ return ret;
+}
+
+
+int
+server3_3_fgetxattr (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_fgetxattr_req args = {{0,},};
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ args.name = alloca (256);
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_fgetxattr_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_FGETXATTR;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_MUST;
+ state->resolve.fd_no = args.fd;
+ memcpy (state->resolve.gfid, args.gfid, 16);
+
+ if (args.namelen)
+ state->name = gf_strdup (args.name);
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_fgetxattr_resume);
+out:
+ free (args.xdata.xdata_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ return ret;
+}
+
+
+
+int
+server3_3_removexattr (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_removexattr_req args = {{0,},};
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ args.name = alloca (256);
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_removexattr_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_REMOVEXATTR;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid, args.gfid, 16);
+ state->name = gf_strdup (args.name);
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_removexattr_resume);
+out:
+ free (args.xdata.xdata_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ return ret;
+}
+
+int
+server3_3_fremovexattr (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_fremovexattr_req args = {{0,},};
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ args.name = alloca (4096);
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_fremovexattr_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_FREMOVEXATTR;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_MUST;
+ state->resolve.fd_no = args.fd;
+ memcpy (state->resolve.gfid, args.gfid, 16);
+ state->name = gf_strdup (args.name);
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_fremovexattr_resume);
+out:
+ free (args.xdata.xdata_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ return ret;
+}
+
+
+
+
+int
+server3_3_opendir (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_opendir_req args = {{0,},};
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_opendir_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_OPENDIR;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid, args.gfid, 16);
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_opendir_resume);
+out:
+ free (args.xdata.xdata_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ return ret;
+}
+
+
+int
+server3_3_readdirp (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_readdirp_req args = {{0,},};
+ size_t headers_size = 0;
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_readdirp_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_READDIRP;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ /* FIXME: this should go away when variable sized iobufs are introduced
+ * and transport layer can send msgs bigger than current page-size.
+ */
+ headers_size = sizeof (struct rpc_msg) + sizeof (gfs3_readdir_rsp);
+ if ((frame->this->ctx->page_size < args.size)
+ || ((frame->this->ctx->page_size - args.size) < headers_size)) {
+ state->size = frame->this->ctx->page_size - headers_size;
+ } else {
+ state->size = args.size;
+ }
+
+ state->resolve.type = RESOLVE_MUST;
+ state->resolve.fd_no = args.fd;
+ state->offset = args.offset;
+ memcpy (state->resolve.gfid, args.gfid, 16);
+
+ /* here, dict itself works as xdata */
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->dict,
+ (args.dict.dict_val),
+ (args.dict.dict_len), ret,
+ op_errno, out);
+
+
+ ret = 0;
+ resolve_and_resume (frame, server_readdirp_resume);
+out:
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ free (args.dict.dict_val);
+
+ return ret;
+}
+
+int
+server3_3_readdir (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_readdir_req args = {{0,},};
+ size_t headers_size = 0;
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_readdir_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_READDIR;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ /* FIXME: this should go away when variable sized iobufs are introduced
+ * and transport layer can send msgs bigger than current page-size.
+ */
+ headers_size = sizeof (struct rpc_msg) + sizeof (gfs3_readdir_rsp);
+ if ((frame->this->ctx->page_size < args.size)
+ || ((frame->this->ctx->page_size - args.size) < headers_size)) {
+ state->size = frame->this->ctx->page_size - headers_size;
+ } else {
+ state->size = args.size;
+ }
+
+ state->resolve.type = RESOLVE_MUST;
+ state->resolve.fd_no = args.fd;
+ state->offset = args.offset;
+ memcpy (state->resolve.gfid, args.gfid, 16);
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_readdir_resume);
+out:
+ free (args.xdata.xdata_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ return ret;
+}
+
+int
+server3_3_fsyncdir (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_fsyncdir_req args = {{0,},};
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_fsyncdir_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_FSYNCDIR;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_MUST;
+ state->resolve.fd_no = args.fd;
+ state->flags = args.data;
+ memcpy (state->resolve.gfid, args.gfid, 16);
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_fsyncdir_resume);
+out:
+ free (args.xdata.xdata_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ return ret;
+}
+
+
+
+int
+server3_3_mknod (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_mknod_req args = {{0,},};
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ args.bname = alloca (req->msg[0].iov_len);
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_mknod_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_MKNOD;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_NOT;
+ memcpy (state->resolve.pargfid, args.pargfid, 16);
+ state->resolve.bname = gf_strdup (args.bname);
+
+ state->mode = args.mode;
+ state->dev = args.dev;
+ state->umask = args.umask;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_mknod_resume);
+
+out:
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ /* memory allocated by libc, don't use GF_FREE */
+ free (args.xdata.xdata_val);
+
+ return ret;
+
+}
+
+
+int
+server3_3_mkdir (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_mkdir_req args = {{0,},};
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ args.bname = alloca (req->msg[0].iov_len);
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_mkdir_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_MKDIR;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_NOT;
+ memcpy (state->resolve.pargfid, args.pargfid, 16);
+ state->resolve.bname = gf_strdup (args.bname);
+
+ state->mode = args.mode;
+ state->umask = args.umask;
+
+ /* TODO: can do alloca for xdata field instead of stdalloc */
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_mkdir_resume);
+
+out:
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ free (args.xdata.xdata_val);
+
+ return ret;
+}
+
+
+int
+server3_3_rmdir (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_rmdir_req args = {{0,},};
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ args.bname = alloca (req->msg[0].iov_len);
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_rmdir_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_RMDIR;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.pargfid, args.pargfid, 16);
+ state->resolve.bname = gf_strdup (args.bname);
+
+ state->flags = args.xflags;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_rmdir_resume);
+out:
+ free (args.xdata.xdata_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ return ret;
+}
+
+
+
+int
+server3_3_inodelk (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_inodelk_req args = {{0,},};
+ int cmd = 0;
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ args.volume = alloca (256);
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_inodelk_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_INODELK;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_EXACT;
+ memcpy (state->resolve.gfid, args.gfid, 16);
+
+ cmd = args.cmd;
+ switch (cmd) {
+ case GF_LK_GETLK:
+ state->cmd = F_GETLK;
+ break;
+ case GF_LK_SETLK:
+ state->cmd = F_SETLK;
+ break;
+ case GF_LK_SETLKW:
+ state->cmd = F_SETLKW;
+ break;
+ }
+
+ state->type = args.type;
+ state->volume = gf_strdup (args.volume);
+
+ gf_proto_flock_to_flock (&args.flock, &state->flock);
+
+ switch (state->type) {
+ case GF_LK_F_RDLCK:
+ state->flock.l_type = F_RDLCK;
+ break;
+ case GF_LK_F_WRLCK:
+ state->flock.l_type = F_WRLCK;
+ break;
+ case GF_LK_F_UNLCK:
+ state->flock.l_type = F_UNLCK;
+ break;
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_inodelk_resume);
+out:
+ free (args.xdata.xdata_val);
+
+ free (args.flock.lk_owner.lk_owner_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ return ret;
+}
+
+int
+server3_3_finodelk (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_finodelk_req args = {{0,},};
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ args.volume = alloca (256);
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_finodelk_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_FINODELK;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_EXACT;
+ state->volume = gf_strdup (args.volume);
+ state->resolve.fd_no = args.fd;
+ state->cmd = args.cmd;
+ memcpy (state->resolve.gfid, args.gfid, 16);
+
+ switch (state->cmd) {
+ case GF_LK_GETLK:
+ state->cmd = F_GETLK;
+ break;
+ case GF_LK_SETLK:
+ state->cmd = F_SETLK;
+ break;
+ case GF_LK_SETLKW:
+ state->cmd = F_SETLKW;
+ break;
+ }
+
+ state->type = args.type;
+
+ gf_proto_flock_to_flock (&args.flock, &state->flock);
+
+ switch (state->type) {
+ case GF_LK_F_RDLCK:
+ state->flock.l_type = F_RDLCK;
+ break;
+ case GF_LK_F_WRLCK:
+ state->flock.l_type = F_WRLCK;
+ break;
+ case GF_LK_F_UNLCK:
+ state->flock.l_type = F_UNLCK;
+ break;
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_finodelk_resume);
+out:
+ free (args.xdata.xdata_val);
+
+ free (args.flock.lk_owner.lk_owner_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ return ret;
+}
+
+
+int
+server3_3_entrylk (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_entrylk_req args = {{0,},};
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ args.volume = alloca (256);
+ args.name = alloca (256);
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_entrylk_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_ENTRYLK;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_EXACT;
+ memcpy (state->resolve.gfid, args.gfid, 16);
+
+ if (args.namelen)
+ state->name = gf_strdup (args.name);
+ state->volume = gf_strdup (args.volume);
+
+ state->cmd = args.cmd;
+ state->type = args.type;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_entrylk_resume);
+out:
+ free (args.xdata.xdata_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ return ret;
+}
+
+int
+server3_3_fentrylk (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_fentrylk_req args = {{0,},};
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ args.name = alloca (256);
+ args.volume = alloca (256);
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_fentrylk_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_FENTRYLK;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_EXACT;
+ state->resolve.fd_no = args.fd;
+ state->cmd = args.cmd;
+ state->type = args.type;
+ memcpy (state->resolve.gfid, args.gfid, 16);
+
+ if (args.namelen)
+ state->name = gf_strdup (args.name);
+ state->volume = gf_strdup (args.volume);
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_fentrylk_resume);
+out:
+ free (args.xdata.xdata_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ return ret;
+}
+
+int
+server3_3_access (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_access_req args = {{0,},};
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_access_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_ACCESS;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid, args.gfid, 16);
+ state->mask = args.mask;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_access_resume);
+out:
+ free (args.xdata.xdata_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ return ret;
+}
+
+
+
+int
+server3_3_symlink (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_symlink_req args = {{0,},};
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ args.bname = alloca (req->msg[0].iov_len);
+ args.linkname = alloca (4096);
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_symlink_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_SYMLINK;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_NOT;
+ memcpy (state->resolve.pargfid, args.pargfid, 16);
+ state->resolve.bname = gf_strdup (args.bname);
+ state->name = gf_strdup (args.linkname);
+ state->umask = args.umask;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_symlink_resume);
+
+out:
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ /* memory allocated by libc, don't use GF_FREE */
+ free (args.xdata.xdata_val);
+
+ return ret;
+}
+
+
+
+int
+server3_3_link (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_link_req args = {{0,},};
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ args.newbname = alloca (req->msg[0].iov_len);
+
+ ret = xdr_to_generic (req->msg[0], &args, (xdrproc_t)xdr_gfs3_link_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_LINK;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid, args.oldgfid, 16);
+
+ state->resolve2.type = RESOLVE_NOT;
+ state->resolve2.bname = gf_strdup (args.newbname);
+ memcpy (state->resolve2.pargfid, args.newgfid, 16);
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_link_resume);
+out:
+ free (args.xdata.xdata_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ return ret;
+}
+
+
+int
+server3_3_rename (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_rename_req args = {{0,},};
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ args.oldbname = alloca (req->msg[0].iov_len);
+ args.newbname = alloca (req->msg[0].iov_len);
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_rename_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_RENAME;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_MUST;
+ state->resolve.bname = gf_strdup (args.oldbname);
+ memcpy (state->resolve.pargfid, args.oldgfid, 16);
+
+ state->resolve2.type = RESOLVE_MAY;
+ state->resolve2.bname = gf_strdup (args.newbname);
+ memcpy (state->resolve2.pargfid, args.newgfid, 16);
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_rename_resume);
+out:
+ free (args.xdata.xdata_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ return ret;
+}
+
+int
+server3_3_lease (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_lease_req args = {{0,},};
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ ret = xdr_to_generic (req->msg[0], &args, (xdrproc_t)xdr_gfs3_lease_req);
+ if (ret < 0) {
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_LEASE;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid, args.gfid, 16);
+ gf_proto_lease_to_lease (&args.lease, &state->lease);
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_lease_resume);
+out:
+ free (args.xdata.xdata_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ return ret;
+}
+
+int
+server3_3_lk (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_lk_req args = {{0,},};
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ ret = xdr_to_generic (req->msg[0], &args, (xdrproc_t)xdr_gfs3_lk_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_LK;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.fd_no = args.fd;
+ state->cmd = args.cmd;
+ state->type = args.type;
+ memcpy (state->resolve.gfid, args.gfid, 16);
+
+ switch (state->cmd) {
+ case GF_LK_GETLK:
+ state->cmd = F_GETLK;
+ break;
+ case GF_LK_SETLK:
+ state->cmd = F_SETLK;
+ break;
+ case GF_LK_SETLKW:
+ state->cmd = F_SETLKW;
+ break;
+ case GF_LK_RESLK_LCK:
+ state->cmd = F_RESLK_LCK;
+ break;
+ case GF_LK_RESLK_LCKW:
+ state->cmd = F_RESLK_LCKW;
+ break;
+ case GF_LK_RESLK_UNLCK:
+ state->cmd = F_RESLK_UNLCK;
+ break;
+ case GF_LK_GETLK_FD:
+ state->cmd = F_GETLK_FD;
+ break;
+
+ }
+
+
+ gf_proto_flock_to_flock (&args.flock, &state->flock);
+
+ switch (state->type) {
+ case GF_LK_F_RDLCK:
+ state->flock.l_type = F_RDLCK;
+ break;
+ case GF_LK_F_WRLCK:
+ state->flock.l_type = F_WRLCK;
+ break;
+ case GF_LK_F_UNLCK:
+ state->flock.l_type = F_UNLCK;
+ break;
+ default:
+ gf_msg (frame->root->client->bound_xl->name, GF_LOG_ERROR,
+ 0, PS_MSG_LOCK_ERROR, "fd - %"PRId64" (%s): Unknown "
+ "lock type: %"PRId32"!", state->resolve.fd_no,
+ uuid_utoa (state->fd->inode->gfid), state->type);
+ break;
+ }
+
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_lk_resume);
+out:
+ free (args.xdata.xdata_val);
+
+ free (args.flock.lk_owner.lk_owner_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ return ret;
+}
+
+
+int
+server3_3_rchecksum (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_rchecksum_req args = {0,};
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_rchecksum_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_RCHECKSUM;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_MAY;
+ state->resolve.fd_no = args.fd;
+ state->offset = args.offset;
+ state->size = args.len;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_rchecksum_resume);
+out:
+ free (args.xdata.xdata_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ return ret;
+}
+
+int
+server_null (rpcsvc_request_t *req)
+{
+ gf_common_rsp rsp = {0,};
+
+ /* Accepted */
+ rsp.op_ret = 0;
+
+ server_submit_reply (NULL, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_common_rsp);
+
+ return 0;
+}
+
+int
+server3_3_lookup (rpcsvc_request_t *req)
+{
+ call_frame_t *frame = NULL;
+ server_state_t *state = NULL;
+ gfs3_lookup_req args = {{0,},};
+ int ret = -1;
+ int op_errno = 0;
+
+ GF_VALIDATE_OR_GOTO ("server", req, err);
+
+ args.bname = alloca (req->msg[0].iov_len);
+ args.xdata.xdata_val = alloca (req->msg[0].iov_len);
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_lookup_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto err;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto err;
+ }
+ frame->root->op = GF_FOP_LOOKUP;
+
+ /* NOTE: lookup() uses req->ino only to identify if a lookup()
+ * is requested for 'root' or not
+ */
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_DONTCARE;
+
+ if (args.bname && strcmp (args.bname, "")) {
+ memcpy (state->resolve.pargfid, args.pargfid, 16);
+ state->resolve.bname = gf_strdup (args.bname);
+ } else {
+ memcpy (state->resolve.gfid, args.gfid, 16);
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_lookup_resume);
+
+ return ret;
+out:
+
+ server_lookup_cbk (frame, NULL, frame->this, -1, EINVAL, NULL, NULL,
+ NULL, NULL);
+ ret = 0;
+err:
+ return ret;
+}
+
+int
+server3_3_statfs (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_statfs_req args = {{0,},};
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_statfs_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ // something wrong, mostly insufficient memory
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_STATFS;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid, args.gfid, 16);
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_statfs_resume);
+out:
+ free (args.xdata.xdata_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ return ret;
+}
+
+static int
+server3_3_getactivelk (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_getactivelk_req args = {{0,},};
+ size_t headers_size = 0;
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_getactivelk_req);
+ if (ret < 0) {
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_GETACTIVELK;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid, args.gfid, 16);
+
+ /* here, dict itself works as xdata */
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ (args.xdata.xdata_val),
+ (args.xdata.xdata_len), ret,
+ op_errno, out);
+
+
+ ret = 0;
+ resolve_and_resume (frame, server_getactivelk_resume);
+out:
+ free (args.xdata.xdata_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ return ret;
+}
+
+
+static int
+server3_3_setactivelk (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_setactivelk_req args = {{0,},};
+ size_t headers_size = 0;
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_setactivelk_req);
+ if (ret < 0) {
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame->root->op = GF_FOP_SETACTIVELK;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_MUST;
+ memcpy (state->resolve.gfid, args.gfid, 16);
+
+ /* here, dict itself works as xdata */
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ (args.xdata.xdata_val),
+ (args.xdata.xdata_len), ret,
+ op_errno, out);
+
+ ret = unserialize_req_locklist (&args, &state->locklist);
+ if (ret)
+ goto out;
+
+ ret = 0;
+
+ resolve_and_resume (frame, server_setactivelk_resume);
+out:
+ free (args.xdata.xdata_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ return ret;
+}
+
+int
+server3_3_compound (rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_compound_req args = {0,};
+ ssize_t len = 0;
+ int i = 0;
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_compound_req);
+ if (ret < 0) {
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ frame = get_frame_from_request (req);
+ if (!frame) {
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+ frame->root->op = GF_FOP_COMPOUND;
+
+ state = CALL_STATE (frame);
+ if (!frame->root->client->bound_xl) {
+ /* auth failure, request on subvolume without setvolume */
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ state->req = &args;
+ state->iobref = iobref_ref (req->iobref);
+
+ if (len < req->msg[0].iov_len) {
+ state->payload_vector[0].iov_base
+ = (req->msg[0].iov_base + len);
+ state->payload_vector[0].iov_len
+ = req->msg[0].iov_len - len;
+ state->payload_count = 1;
+ }
+
+ for (i = 1; i < req->count; i++) {
+ state->payload_vector[state->payload_count++]
+ = req->msg[i];
+ }
+
+ for (i = 0; i < state->payload_count; i++) {
+ state->size += state->payload_vector[i].iov_len;
+ }
+
+ ret = server_get_compound_resolve (state, &args);
+
+ if (ret) {
+ SERVER_REQ_SET_ERROR (req, ret);
+ goto out;
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl,
+ state->xdata,
+ args.xdata.xdata_val,
+ args.xdata.xdata_len, ret,
+ op_errno, out);
+
+ ret = 0;
+ resolve_and_resume (frame, server_compound_resume);
+out:
+ free (args.xdata.xdata_val);
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR (req, ret);
+
+ return ret;
+}
+
+rpcsvc_actor_t glusterfs3_3_fop_actors[GLUSTER_FOP_PROCCNT] = {
+ [GFS3_OP_NULL] = {"NULL", GFS3_OP_NULL, server_null, NULL, 0, DRC_NA},
+ [GFS3_OP_STAT] = {"STAT", GFS3_OP_STAT, server3_3_stat, NULL, 0, DRC_NA},
+ [GFS3_OP_READLINK] = {"READLINK", GFS3_OP_READLINK, server3_3_readlink, NULL, 0, DRC_NA},
+ [GFS3_OP_MKNOD] = {"MKNOD", GFS3_OP_MKNOD, server3_3_mknod, NULL, 0, DRC_NA},
+ [GFS3_OP_MKDIR] = {"MKDIR", GFS3_OP_MKDIR, server3_3_mkdir, NULL, 0, DRC_NA},
+ [GFS3_OP_UNLINK] = {"UNLINK", GFS3_OP_UNLINK, server3_3_unlink, NULL, 0, DRC_NA},
+ [GFS3_OP_RMDIR] = {"RMDIR", GFS3_OP_RMDIR, server3_3_rmdir, NULL, 0, DRC_NA},
+ [GFS3_OP_SYMLINK] = {"SYMLINK", GFS3_OP_SYMLINK, server3_3_symlink, NULL, 0, DRC_NA},
+ [GFS3_OP_RENAME] = {"RENAME", GFS3_OP_RENAME, server3_3_rename, NULL, 0, DRC_NA},
+ [GFS3_OP_LINK] = {"LINK", GFS3_OP_LINK, server3_3_link, NULL, 0, DRC_NA},
+ [GFS3_OP_TRUNCATE] = {"TRUNCATE", GFS3_OP_TRUNCATE, server3_3_truncate, NULL, 0, DRC_NA},
+ [GFS3_OP_OPEN] = {"OPEN", GFS3_OP_OPEN, server3_3_open, NULL, 0, DRC_NA},
+ [GFS3_OP_READ] = {"READ", GFS3_OP_READ, server3_3_readv, NULL, 0, DRC_NA},
+ [GFS3_OP_WRITE] = {"WRITE", GFS3_OP_WRITE, server3_3_writev, server3_3_writev_vecsizer, 0, DRC_NA},
+ [GFS3_OP_STATFS] = {"STATFS", GFS3_OP_STATFS, server3_3_statfs, NULL, 0, DRC_NA},
+ [GFS3_OP_FLUSH] = {"FLUSH", GFS3_OP_FLUSH, server3_3_flush, NULL, 0, DRC_NA},
+ [GFS3_OP_FSYNC] = {"FSYNC", GFS3_OP_FSYNC, server3_3_fsync, NULL, 0, DRC_NA},
+ [GFS3_OP_SETXATTR] = {"SETXATTR", GFS3_OP_SETXATTR, server3_3_setxattr, NULL, 0, DRC_NA},
+ [GFS3_OP_GETXATTR] = {"GETXATTR", GFS3_OP_GETXATTR, server3_3_getxattr, NULL, 0, DRC_NA},
+ [GFS3_OP_REMOVEXATTR] = {"REMOVEXATTR", GFS3_OP_REMOVEXATTR, server3_3_removexattr, NULL, 0, DRC_NA},
+ [GFS3_OP_OPENDIR] = {"OPENDIR", GFS3_OP_OPENDIR, server3_3_opendir, NULL, 0, DRC_NA},
+ [GFS3_OP_FSYNCDIR] = {"FSYNCDIR", GFS3_OP_FSYNCDIR, server3_3_fsyncdir, NULL, 0, DRC_NA},
+ [GFS3_OP_ACCESS] = {"ACCESS", GFS3_OP_ACCESS, server3_3_access, NULL, 0, DRC_NA},
+ [GFS3_OP_CREATE] = {"CREATE", GFS3_OP_CREATE, server3_3_create, NULL, 0, DRC_NA},
+ [GFS3_OP_FTRUNCATE] = {"FTRUNCATE", GFS3_OP_FTRUNCATE, server3_3_ftruncate, NULL, 0, DRC_NA},
+ [GFS3_OP_FSTAT] = {"FSTAT", GFS3_OP_FSTAT, server3_3_fstat, NULL, 0, DRC_NA},
+ [GFS3_OP_LK] = {"LK", GFS3_OP_LK, server3_3_lk, NULL, 0, DRC_NA},
+ [GFS3_OP_LOOKUP] = {"LOOKUP", GFS3_OP_LOOKUP, server3_3_lookup, NULL, 0, DRC_NA},
+ [GFS3_OP_READDIR] = {"READDIR", GFS3_OP_READDIR, server3_3_readdir, NULL, 0, DRC_NA},
+ [GFS3_OP_INODELK] = {"INODELK", GFS3_OP_INODELK, server3_3_inodelk, NULL, 0, DRC_NA},
+ [GFS3_OP_FINODELK] = {"FINODELK", GFS3_OP_FINODELK, server3_3_finodelk, NULL, 0, DRC_NA},
+ [GFS3_OP_ENTRYLK] = {"ENTRYLK", GFS3_OP_ENTRYLK, server3_3_entrylk, NULL, 0, DRC_NA},
+ [GFS3_OP_FENTRYLK] = {"FENTRYLK", GFS3_OP_FENTRYLK, server3_3_fentrylk, NULL, 0, DRC_NA},
+ [GFS3_OP_XATTROP] = {"XATTROP", GFS3_OP_XATTROP, server3_3_xattrop, NULL, 0, DRC_NA},
+ [GFS3_OP_FXATTROP] = {"FXATTROP", GFS3_OP_FXATTROP, server3_3_fxattrop, NULL, 0, DRC_NA},
+ [GFS3_OP_FGETXATTR] = {"FGETXATTR", GFS3_OP_FGETXATTR, server3_3_fgetxattr, NULL, 0, DRC_NA},
+ [GFS3_OP_FSETXATTR] = {"FSETXATTR", GFS3_OP_FSETXATTR, server3_3_fsetxattr, NULL, 0, DRC_NA},
+ [GFS3_OP_RCHECKSUM] = {"RCHECKSUM", GFS3_OP_RCHECKSUM, server3_3_rchecksum, NULL, 0, DRC_NA},
+ [GFS3_OP_SETATTR] = {"SETATTR", GFS3_OP_SETATTR, server3_3_setattr, NULL, 0, DRC_NA},
+ [GFS3_OP_FSETATTR] = {"FSETATTR", GFS3_OP_FSETATTR, server3_3_fsetattr, NULL, 0, DRC_NA},
+ [GFS3_OP_READDIRP] = {"READDIRP", GFS3_OP_READDIRP, server3_3_readdirp, NULL, 0, DRC_NA},
+ [GFS3_OP_RELEASE] = {"RELEASE", GFS3_OP_RELEASE, server3_3_release, NULL, 0, DRC_NA},
+ [GFS3_OP_RELEASEDIR] = {"RELEASEDIR", GFS3_OP_RELEASEDIR, server3_3_releasedir, NULL, 0, DRC_NA},
+ [GFS3_OP_FREMOVEXATTR] = {"FREMOVEXATTR", GFS3_OP_FREMOVEXATTR, server3_3_fremovexattr, NULL, 0, DRC_NA},
+ [GFS3_OP_FALLOCATE] = {"FALLOCATE", GFS3_OP_FALLOCATE, server3_3_fallocate, NULL, 0, DRC_NA},
+ [GFS3_OP_DISCARD] = {"DISCARD", GFS3_OP_DISCARD, server3_3_discard, NULL, 0, DRC_NA},
+ [GFS3_OP_ZEROFILL] = {"ZEROFILL", GFS3_OP_ZEROFILL, server3_3_zerofill, NULL, 0, DRC_NA},
+ [GFS3_OP_IPC] = {"IPC", GFS3_OP_IPC, server3_3_ipc, NULL, 0, DRC_NA},
+ [GFS3_OP_SEEK] = {"SEEK", GFS3_OP_SEEK, server3_3_seek, NULL, 0, DRC_NA},
+ [GFS3_OP_LEASE] = {"LEASE", GFS3_OP_LEASE, server3_3_lease, NULL, 0, DRC_NA},
+ [GFS3_OP_GETACTIVELK] = {"GETACTIVELK", GFS3_OP_GETACTIVELK, server3_3_getactivelk, NULL, 0, DRC_NA},
+ [GFS3_OP_SETACTIVELK] = {"SETACTIVELK", GFS3_OP_SETACTIVELK, server3_3_setactivelk, NULL, 0, DRC_NA},
+ [GFS3_OP_COMPOUND] = {"COMPOUND", GFS3_OP_COMPOUND, server3_3_compound, NULL, 0, DRC_NA},
+};
+
+
+struct rpcsvc_program glusterfs3_3_fop_prog = {
+ .progname = "GlusterFS 3.3",
+ .prognum = GLUSTER_FOP_PROGRAM,
+ .progver = GLUSTER_FOP_VERSION,
+ .numactors = GLUSTER_FOP_PROCCNT,
+ .actors = glusterfs3_3_fop_actors,
+};
diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c
index a7501babbcc..269d5f3bffd 100644
--- a/xlators/protocol/server/src/server.c
+++ b/xlators/protocol/server/src/server.c
@@ -1,73 +1,133 @@
/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2010-2013 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include <sys/time.h>
#include <sys/resource.h>
+
#include "server.h"
#include "server-helpers.h"
-#include "glusterfs-xdr.h"
+#include "glusterfs3-xdr.h"
#include "call-stub.h"
#include "statedump.h"
#include "defaults.h"
#include "authenticate.h"
-#include "rpcsvc.h"
+#include "event.h"
+#include "server-messages.h"
+
+rpcsvc_cbk_program_t server_cbk_prog = {
+ .progname = "Gluster Callback",
+ .prognum = GLUSTER_CBK_PROGRAM,
+ .progver = GLUSTER_CBK_VERSION,
+};
+
+void
+grace_time_handler (void *data)
+{
+ client_t *client = NULL;
+ xlator_t *this = NULL;
+ gf_timer_t *timer = NULL;
+ server_ctx_t *serv_ctx = NULL;
+ gf_boolean_t cancelled = _gf_false;
+ gf_boolean_t detached = _gf_false;
+
+ client = data;
+ this = client->this;
+
+ GF_VALIDATE_OR_GOTO (THIS->name, this, out);
+
+ gf_msg (this->name, GF_LOG_INFO, 0, PS_MSG_GRACE_TIMER_EXPD, "grace "
+ "timer expired for %s", client->client_uid);
+
+ serv_ctx = server_ctx_get (client, this);
+
+ if (serv_ctx == NULL) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ PS_MSG_SERVER_CTX_GET_FAILED, "server_ctx_get() "
+ "failed");
+ goto out;
+ }
+
+ LOCK (&serv_ctx->fdtable_lock);
+ {
+ if (serv_ctx->grace_timer) {
+ timer = serv_ctx->grace_timer;
+ serv_ctx->grace_timer = NULL;
+ }
+ }
+ UNLOCK (&serv_ctx->fdtable_lock);
+ if (timer) {
+ gf_timer_call_cancel (this->ctx, timer);
+ cancelled = _gf_true;
+ }
+ if (cancelled) {
+
+ /*
+ * client must not be destroyed in gf_client_put(),
+ * so take a ref.
+ */
+ gf_client_ref (client);
+ gf_client_put (client, &detached);
+ if (detached)//reconnection did not happen :-(
+ server_connection_cleanup (this, client,
+ INTERNAL_LOCKS | POSIX_LOCKS);
+ gf_client_unref (client);
+ }
+out:
+ return;
+}
struct iobuf *
-gfs_serialize_reply (rpcsvc_request_t *req, void *arg, gfs_serialize_t sfunc,
- struct iovec *outmsg)
+gfs_serialize_reply (rpcsvc_request_t *req, void *arg, struct iovec *outmsg,
+ xdrproc_t xdrproc)
{
- struct iobuf *iob = NULL;
- ssize_t retlen = -1;
+ struct iobuf *iob = NULL;
+ ssize_t retlen = 0;
+ ssize_t xdr_size = 0;
+
+ GF_VALIDATE_OR_GOTO ("server", req, ret);
/* First, get the io buffer into which the reply in arg will
* be serialized.
*/
- iob = iobuf_get (req->conn->svc->ctx->iobuf_pool);
- if (!iob) {
- gf_log ("", GF_LOG_ERROR, "Failed to get iobuf");
- goto ret;
- }
-
- iobuf_to_iovec (iob, outmsg);
- /* Use the given serializer to translate the give C structure in arg
- * to XDR format which will be written into the buffer in outmsg.
- */
- /* retlen is used to received the error since size_t is unsigned and we
- * need -1 for error notification during encoding.
- */
- retlen = sfunc (*outmsg, arg);
- if (retlen == -1) {
- /* Failed to Encode 'GlusterFS' msg in RPC is not exactly
- failure of RPC return values.. client should get
- notified about this, so there are no missing frames */
- gf_log ("", GF_LOG_ERROR, "Failed to encode message");
- req->rpc_err = GARBAGE_ARGS;
- retlen = 0;
+ if (arg && xdrproc) {
+ xdr_size = xdr_sizeof (xdrproc, arg);
+ iob = iobuf_get2 (req->svc->ctx->iobuf_pool, xdr_size);
+ if (!iob) {
+ gf_msg_callingfn (THIS->name, GF_LOG_ERROR, ENOMEM,
+ PS_MSG_NO_MEMORY,
+ "Failed to get iobuf");
+ goto ret;
+ };
+
+ iobuf_to_iovec (iob, outmsg);
+ /* Use the given serializer to translate the give C structure in arg
+ * to XDR format which will be written into the buffer in outmsg.
+ */
+ /* retlen is used to received the error since size_t is unsigned and we
+ * need -1 for error notification during encoding.
+ */
+
+ retlen = xdr_serialize_generic (*outmsg, arg, xdrproc);
+ if (retlen == -1) {
+ /* Failed to Encode 'GlusterFS' msg in RPC is not exactly
+ failure of RPC return values.. client should get
+ notified about this, so there are no missing frames */
+ gf_msg_callingfn ("", GF_LOG_ERROR, 0,
+ PS_MSG_ENCODE_MSG_FAILED,
+ "Failed to encode message");
+ req->rpc_err = GARBAGE_ARGS;
+ retlen = 0;
+ }
}
-
outmsg->iov_len = retlen;
ret:
if (retlen == -1) {
@@ -78,41 +138,44 @@ ret:
return iob;
}
-
-
-/* Generic reply function for NFSv3 specific replies. */
int
server_submit_reply (call_frame_t *frame, rpcsvc_request_t *req, void *arg,
struct iovec *payload, int payloadcount,
- struct iobref *iobref, gfs_serialize_t sfunc)
+ struct iobref *iobref, xdrproc_t xdrproc)
{
struct iobuf *iob = NULL;
int ret = -1;
struct iovec rsp = {0,};
server_state_t *state = NULL;
char new_iobref = 0;
+ client_t *client = NULL;
+ gf_boolean_t lk_heal = _gf_false;
+ gf_boolean_t barriered = _gf_false;
- if (!req) {
- goto ret;
- }
+ GF_VALIDATE_OR_GOTO ("server", req, ret);
if (frame) {
state = CALL_STATE (frame);
+ frame->local = NULL;
+ client = frame->root->client;
}
+ if (client)
+ lk_heal = ((server_conf_t *) client->this->private)->lk_heal;
+
if (!iobref) {
iobref = iobref_new ();
if (!iobref) {
- gf_log ("", GF_LOG_ERROR, "out of memory");
goto ret;
}
new_iobref = 1;
}
- iob = gfs_serialize_reply (req, arg, sfunc, &rsp);
+ iob = gfs_serialize_reply (req, arg, &rsp, xdrproc);
if (!iob) {
- gf_log ("", GF_LOG_ERROR, "Failed to serialize reply");
+ gf_msg ("", GF_LOG_ERROR, 0, PS_MSG_SERIALIZE_REPLY_FAILED,
+ "Failed to serialize reply");
goto ret;
}
@@ -122,231 +185,157 @@ server_submit_reply (call_frame_t *frame, rpcsvc_request_t *req, void *arg,
ret = rpcsvc_submit_generic (req, &rsp, 1, payload, payloadcount,
iobref);
+ /* TODO: this is demo purpose only */
+ /* ret = rpcsvc_callback_submit (req->svc, req->trans, req->prog,
+ GF_CBK_NULL, &rsp, 1);
+ */
/* Now that we've done our job of handing the message to the RPC layer
* we can safely unref the iob in the hope that RPC layer must have
* ref'ed the iob on receiving into the txlist.
*/
iobuf_unref (iob);
if (ret == -1) {
- gf_log ("", GF_LOG_ERROR, "Reply submission failed");
+ gf_msg_callingfn ("", GF_LOG_ERROR, 0,
+ PS_MSG_REPLY_SUBMIT_FAILED,
+ "Reply submission failed");
+ if (frame && client && !lk_heal) {
+ server_connection_cleanup (frame->this, client,
+ INTERNAL_LOCKS | POSIX_LOCKS);
+ } else {
+ gf_msg_callingfn ("", GF_LOG_ERROR, 0,
+ PS_MSG_REPLY_SUBMIT_FAILED,
+ "Reply submission failed");
+ /* TODO: Failure of open(dir), create, inodelk, entrylk
+ or lk fops send failure must be handled specially. */
+ }
goto ret;
}
ret = 0;
ret:
- if (state) {
+ if (state)
free_state (state);
- }
- if (frame) {
+ if (client)
+ gf_client_unref (client);
+
+ if (frame)
STACK_DESTROY (frame->root);
- }
- if (new_iobref) {
+ if (new_iobref)
iobref_unref (iobref);
- }
return ret;
}
-/* */
+
int
-xdr_to_glusterfs_req (rpcsvc_request_t *req, void *arg, gfs_serialize_t sfunc)
+server_priv_to_dict (xlator_t *this, dict_t *dict)
{
- int ret = -1;
+ server_conf_t *conf = NULL;
+ rpc_transport_t *xprt = NULL;
+ peer_info_t *peerinfo = NULL;
+ char key[32] = {0,};
+ int count = 0;
+ int ret = -1;
- if (!req)
- return -1;
+ GF_VALIDATE_OR_GOTO (THIS->name, this, out);
+ GF_VALIDATE_OR_GOTO (THIS->name, dict, out);
- ret = sfunc (req->msg[0], arg);
+ conf = this->private;
+ if (!conf)
+ return 0;
+ //TODO: Dump only specific info to dict
+
+ pthread_mutex_lock (&conf->mutex);
+ {
+ list_for_each_entry (xprt, &conf->xprt_list, list) {
+ peerinfo = &xprt->peerinfo;
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "client%d.hostname",
+ count);
+ ret = dict_set_str (dict, key, peerinfo->identifier);
+ if (ret)
+ goto unlock;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "client%d.bytesread",
+ count);
+ ret = dict_set_uint64 (dict, key,
+ xprt->total_bytes_read);
+ if (ret)
+ goto unlock;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "client%d.byteswrite",
+ count);
+ ret = dict_set_uint64 (dict, key,
+ xprt->total_bytes_write);
+ if (ret)
+ goto unlock;
+
+ count++;
+ }
+ }
+unlock:
+ pthread_mutex_unlock (&conf->mutex);
+ if (ret)
+ goto out;
- if (ret > 0)
- ret = 0;
+ ret = dict_set_int32 (dict, "clientcount", count);
+out:
return ret;
}
-
-#if 0
-/*
- * prototype of operations function for each of mop and
- * fop at server protocol level
- *
- * @frame: call frame pointer
- * @bound_xl: the xlator that this frame is bound to
- * @params: parameters dictionary
- *
- * to be used by protocol interpret, _not_ for exterenal reference
- */
-typedef int32_t (*gf_op_t) (call_frame_t *frame, xlator_t *bould_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf);
-
-
-static gf_op_t gf_fops[] = {
- [GF_FOP_STAT] = server_stat,
- [GF_FOP_READLINK] = server_readlink,
- [GF_FOP_MKNOD] = server_mknod,
- [GF_FOP_MKDIR] = server_mkdir,
- [GF_FOP_UNLINK] = server_unlink,
- [GF_FOP_RMDIR] = server_rmdir,
- [GF_FOP_SYMLINK] = server_symlink,
- [GF_FOP_RENAME] = server_rename,
- [GF_FOP_LINK] = server_link,
- [GF_FOP_TRUNCATE] = server_truncate,
- [GF_FOP_OPEN] = server_open,
- [GF_FOP_READ] = server_readv,
- [GF_FOP_WRITE] = server_writev,
- [GF_FOP_STATFS] = server_statfs,
- [GF_FOP_FLUSH] = server_flush,
- [GF_FOP_FSYNC] = server_fsync,
- [GF_FOP_SETXATTR] = server_setxattr,
- [GF_FOP_GETXATTR] = server_getxattr,
- [GF_FOP_FGETXATTR] = server_fgetxattr,
- [GF_FOP_FSETXATTR] = server_fsetxattr,
- [GF_FOP_REMOVEXATTR] = server_removexattr,
- [GF_FOP_OPENDIR] = server_opendir,
- [GF_FOP_FSYNCDIR] = server_fsyncdir,
- [GF_FOP_ACCESS] = server_access,
- [GF_FOP_CREATE] = server_create,
- [GF_FOP_FTRUNCATE] = server_ftruncate,
- [GF_FOP_FSTAT] = server_fstat,
- [GF_FOP_LK] = server_lk,
- [GF_FOP_LOOKUP] = server_lookup,
- [GF_FOP_READDIR] = server_readdir,
- [GF_FOP_READDIRP] = server_readdirp,
- [GF_FOP_INODELK] = server_inodelk,
- [GF_FOP_FINODELK] = server_finodelk,
- [GF_FOP_ENTRYLK] = server_entrylk,
- [GF_FOP_FENTRYLK] = server_fentrylk,
- [GF_FOP_CHECKSUM] = server_checksum,
- [GF_FOP_RCHECKSUM] = server_rchecksum,
- [GF_FOP_XATTROP] = server_xattrop,
- [GF_FOP_FXATTROP] = server_fxattrop,
- [GF_FOP_SETATTR] = server_setattr,
- [GF_FOP_FSETATTR] = server_fsetattr,
- [GF_FOP_SETDENTS] = server_setdents,
- [GF_FOP_GETDENTS] = server_getdents,
- [GF_FOP_LOCK_NOTIFY] = server_lock_notify,
- [GF_FOP_LOCK_FNOTIFY] = server_lock_fnotify,
-};
-
-static gf_op_t gf_cbks[] = {
- [GF_CBK_FORGET] = server_forget,
- [GF_CBK_RELEASE] = server_release,
- [GF_CBK_RELEASEDIR] = server_releasedir
-};
-
-#endif
-
int
-server_fd (xlator_t *this)
+server_priv (xlator_t *this)
{
- server_conf_t *conf = NULL;
- server_connection_t *trav = NULL;
- char key[GF_DUMP_MAX_BUF_LEN];
- int i = 1;
- int ret = -1;
-
- if (!this)
- return -1;
-
- conf = this->private;
- if (!conf) {
- gf_log (this->name, GF_LOG_WARNING,
- "conf null in xlator");
- return -1;
- }
-
- gf_proc_dump_add_section("xlator.protocol.server.conn");
-
- ret = pthread_mutex_trylock (&conf->mutex);
- if (ret) {
- gf_log("", GF_LOG_WARNING, "Unable to dump fdtable"
- " errno: %d", errno);
- return -1;
- }
-
- list_for_each_entry (trav, &conf->conns, list) {
- if (trav->id) {
- gf_proc_dump_build_key(key,
- "xlator.protocol.server.conn",
- "%d.id", i);
- gf_proc_dump_write(key, "%s", trav->id);
- }
-
- gf_proc_dump_build_key(key,"xlator.protocol.server.conn",
- "%d.ref",i)
- gf_proc_dump_write(key, "%d", trav->ref);
- if (trav->bound_xl) {
- gf_proc_dump_build_key(key,
- "xlator.protocol.server.conn",
- "%d.bound_xl", i);
- gf_proc_dump_write(key, "%s", trav->bound_xl->name);
- }
-
- gf_proc_dump_build_key(key,
- "xlator.protocol.server.conn",
- "%d.id", i);
- fdtable_dump(trav->fdtable,key);
- i++;
- }
- pthread_mutex_unlock (&conf->mutex);
+ server_conf_t *conf = NULL;
+ rpc_transport_t *xprt = NULL;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0,};
+ uint64_t total_read = 0;
+ uint64_t total_write = 0;
+ int32_t ret = -1;
+ GF_VALIDATE_OR_GOTO ("server", this, out);
- return 0;
- }
+ conf = this->private;
+ if (!conf)
+ return 0;
-int
-server_priv (xlator_t *this)
-{
- return 0;
-}
+ gf_proc_dump_build_key (key, "xlator.protocol.server", "priv");
+ gf_proc_dump_add_section (key);
-int
-server_inode (xlator_t *this)
-{
- server_conf_t *conf = NULL;
- server_connection_t *trav = NULL;
- char key[GF_DUMP_MAX_BUF_LEN];
- int i = 1;
- int ret = -1;
-
- if (!this)
- return -1;
-
- conf = this->private;
- if (!conf) {
- gf_log (this->name, GF_LOG_WARNING,
- "conf null in xlator");
- return -1;
- }
-
- ret = pthread_mutex_trylock (&conf->mutex);
- if (ret) {
- gf_log("", GF_LOG_WARNING, "Unable to dump itable"
- " errno: %d", errno);
- return -1;
- }
-
- list_for_each_entry (trav, &conf->conns, list) {
- if (trav->bound_xl && trav->bound_xl->itable) {
- gf_proc_dump_build_key(key,
- "xlator.protocol.server.conn",
- "%d.bound_xl.%s",
- i, trav->bound_xl->name);
- inode_table_dump(trav->bound_xl->itable,key);
- i++;
- }
+ ret = pthread_mutex_trylock (&conf->mutex);
+ if (ret != 0)
+ goto out;
+ {
+ list_for_each_entry (xprt, &conf->xprt_list, list) {
+ total_read += xprt->total_bytes_read;
+ total_write += xprt->total_bytes_write;
+ }
}
pthread_mutex_unlock (&conf->mutex);
+ gf_proc_dump_build_key(key, "server", "total-bytes-read");
+ gf_proc_dump_write(key, "%"PRIu64, total_read);
- return 0;
+ gf_proc_dump_build_key(key, "server", "total-bytes-write");
+ gf_proc_dump_write(key, "%"PRIu64, total_write);
+
+ ret = 0;
+out:
+ if (ret)
+ gf_proc_dump_write ("Unable to print priv",
+ "(Lock acquisition failed) %s",
+ this?this->name:"server");
+
+ return ret;
}
-static void
+static int
get_auth_types (dict_t *this, char *key, data_t *value, void *data)
{
dict_t *auth_dict = NULL;
@@ -355,6 +344,10 @@ get_auth_types (dict_t *this, char *key, data_t *value, void *data)
char *key_cpy = NULL;
int32_t ret = -1;
+ GF_VALIDATE_OR_GOTO ("server", this, out);
+ GF_VALIDATE_OR_GOTO ("server", key, out);
+ GF_VALIDATE_OR_GOTO ("server", data, out);
+
auth_dict = data;
key_cpy = gf_strdup (key);
GF_VALIDATE_OR_GOTO("server", key_cpy, out);
@@ -367,64 +360,119 @@ get_auth_types (dict_t *this, char *key, data_t *value, void *data)
/* TODO: backward compatibility, remove when
newer versions are available */
tmp = "addr";
- gf_log ("server", GF_LOG_WARNING,
+ gf_msg ("server", GF_LOG_WARNING, 0,
+ PS_MSG_AUTH_IP_ERROR,
"assuming 'auth.ip' to be 'auth.addr'");
}
ret = dict_set_dynptr (auth_dict, tmp, NULL, 0);
if (ret < 0) {
- gf_log ("server", GF_LOG_DEBUG,
- "failed to dict_set_dynptr");
+ gf_msg_debug ("server", 0, "failed to "
+ "dict_set_dynptr");
}
}
GF_FREE (key_cpy);
out:
- return;
+ return 0;
}
+int
+_check_for_auth_option (dict_t *d, char *k, data_t *v,
+ void *tmp)
+{
+ int ret = 0;
+ xlator_t *xl = NULL;
+ char *tail = NULL;
+ char *tmp_addr_list = NULL;
+ char *addr = NULL;
+ char *tmp_str = NULL;
+
+ xl = tmp;
+
+ tail = strtail (k, "auth.");
+ if (!tail)
+ goto out;
+
+ if (strncmp(tail, "addr.", 5) != 0) {
+ gf_msg (xl->name, GF_LOG_INFO, 0, PS_MSG_SKIP_FORMAT_CHK,
+ "skip format check for non-addr auth option %s", k);
+ goto out;
+ }
+
+ /* fast fwd thru module type */
+ tail = strchr (tail, '.');
+ if (!tail)
+ goto out;
+ tail++;
+
+ tail = strtail (tail, xl->name);
+ if (!tail)
+ goto out;
+
+ if (*tail == '.') {
+ /* when we are here, the key is checked for
+ * valid auth.allow.<xlator>
+ * Now we verify the ip address
+ */
+ if (!strcmp (v->data, "*")) {
+ ret = 0;
+ goto out;
+ }
+
+ tmp_addr_list = gf_strdup (v->data);
+ addr = strtok_r (tmp_addr_list, ",", &tmp_str);
+ if (!addr)
+ addr = v->data;
+
+ while (addr) {
+ if (valid_internet_address (addr, _gf_true)) {
+ ret = 0;
+ } else {
+ ret = -1;
+ gf_msg (xl->name, GF_LOG_ERROR, 0,
+ PS_MSG_INTERNET_ADDR_ERROR,
+ "internet address '%s'"
+ " does not conform to"
+ " standards.", addr);
+ goto out;
+ }
+ if (tmp_str)
+ addr = strtok_r (NULL, ",", &tmp_str);
+ else
+ addr = NULL;
+ }
+ }
+out:
+ GF_FREE (tmp_addr_list);
+
+ return ret;
+}
int
validate_auth_options (xlator_t *this, dict_t *dict)
{
- int ret = -1;
- int error = 0;
+ int error = -1;
xlator_list_t *trav = NULL;
- data_pair_t *pair = NULL;
- char *saveptr = NULL;
- char *tmp = NULL;
- char *key_cpy = NULL;
+
+ GF_VALIDATE_OR_GOTO ("server", this, out);
+ GF_VALIDATE_OR_GOTO ("server", dict, out);
trav = this->children;
while (trav) {
- error = -1;
- for (pair = dict->members_list; pair; pair = pair->next) {
- key_cpy = gf_strdup (pair->key);
- tmp = strtok_r (key_cpy, ".", &saveptr);
- ret = strcmp (tmp, "auth");
- if (ret == 0) {
- /* for module type */
- tmp = strtok_r (NULL, ".", &saveptr);
- /* for volume name */
- tmp = strtok_r (NULL, ".", &saveptr);
- }
+ error = dict_foreach (dict, _check_for_auth_option,
+ trav->xlator);
- if (strcmp (tmp, trav->xlator->name) == 0) {
- error = 0;
- GF_FREE (key_cpy);
- break;
- }
- GF_FREE (key_cpy);
- }
if (-1 == error) {
- gf_log (this->name, GF_LOG_ERROR,
- "volume '%s' defined as subvolume, but no "
- "authentication defined for the same",
- trav->xlator->name);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ PS_MSG_AUTHENTICATE_ERROR, "volume '%s' "
+ "defined as subvolume, but no authentication "
+ "defined for the same", trav->xlator->name);
break;
}
trav = trav->next;
}
+out:
return error;
}
@@ -433,38 +481,122 @@ int
server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
void *data)
{
- xlator_t *this = NULL;
- rpc_transport_t *xprt = NULL;
- server_connection_t *conn = NULL;
+ gf_boolean_t detached = _gf_false;
+ xlator_t *this = NULL;
+ rpc_transport_t *trans = NULL;
+ server_conf_t *conf = NULL;
+ client_t *client = NULL;
+ server_ctx_t *serv_ctx = NULL;
+ struct timespec grace_ts = {0, };
if (!xl || !data) {
- gf_log ("server", GF_LOG_WARNING,
- "Calling rpc_notify without initializing");
+ gf_msg_callingfn ("server", GF_LOG_WARNING, 0,
+ PS_MSG_RPC_NOTIFY_ERROR,
+ "Calling rpc_notify without initializing");
goto out;
}
this = xl;
- xprt = data;
+ trans = data;
+ conf = this->private;
switch (event) {
case RPCSVC_EVENT_ACCEPT:
{
/* Have a structure per new connection */
/* TODO: Should we create anything here at all ? * /
- conn = create_server_conn_state (this, xprt);
- if (!conn)
- goto out;
+ client->conn = create_server_conn_state (this, trans);
+ if (!client->conn)
+ goto out;
- xprt->protocol_private = conn;
+ trans->protocol_private = client->conn;
*/
- xprt->mydata = this;
+
+ pthread_mutex_lock (&conf->mutex);
+ {
+ list_add_tail (&trans->list, &conf->xprt_list);
+ }
+ pthread_mutex_unlock (&conf->mutex);
+
break;
}
case RPCSVC_EVENT_DISCONNECT:
- conn = get_server_conn_state (this, xprt);
- if (conn)
- destroy_server_conn_state (conn);
+ /* A DISCONNECT event could come without an ACCEPT event
+ * happening for this transport. This happens when the server is
+ * expecting encrypted connections by the client tries to
+ * connect unecnrypted
+ */
+ if (list_empty (&trans->list))
+ break;
+
+ /* transport has to be removed from the list upon disconnect
+ * irrespective of whether lock self heal is off or on, since
+ * new transport will be created upon reconnect.
+ */
+ pthread_mutex_lock (&conf->mutex);
+ {
+ list_del_init (&trans->list);
+ }
+ pthread_mutex_unlock (&conf->mutex);
+
+ client = trans->xl_private;
+ if (!client)
+ break;
+
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ PS_MSG_CLIENT_DISCONNECTING, "disconnecting connection"
+ " from %s", client->client_uid);
+
+ /* If lock self heal is off, then destroy the
+ conn object, else register a grace timer event */
+ if (!conf->lk_heal) {
+ gf_client_ref (client);
+ gf_client_put (client, &detached);
+ if (detached)
+ server_connection_cleanup (this, client,
+ INTERNAL_LOCKS | POSIX_LOCKS);
+ gf_client_unref (client);
+ break;
+ }
+ trans->xl_private = NULL;
+ server_connection_cleanup (this, client, INTERNAL_LOCKS);
+ serv_ctx = server_ctx_get (client, this);
+
+ if (serv_ctx == NULL) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ PS_MSG_SERVER_CTX_GET_FAILED,
+ "server_ctx_get() failed");
+ goto out;
+ }
+
+ grace_ts.tv_sec = conf->grace_timeout;
+ grace_ts.tv_nsec = 0;
+
+ LOCK (&serv_ctx->fdtable_lock);
+ {
+ if (!serv_ctx->grace_timer) {
+
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ PS_MSG_GRACE_TIMER_START,
+ "starting a grace timer for %s",
+ client->client_uid);
+
+ serv_ctx->grace_timer =
+ gf_timer_call_after (this->ctx,
+ grace_ts,
+ grace_time_handler,
+ client);
+ }
+ }
+ UNLOCK (&serv_ctx->fdtable_lock);
+ break;
+ case RPCSVC_EVENT_TRANSPORT_DESTROY:
+ /*- conn obj has been disassociated from trans on first
+ * disconnect.
+ * conn cleanup and destruction is handed over to
+ * grace_time_handler or the subsequent handler that 'owns'
+ * the conn. Nothing left to be done here. */
break;
default:
break;
@@ -479,53 +611,374 @@ mem_acct_init (xlator_t *this)
{
int ret = -1;
- if (!this)
- return ret;
+ GF_VALIDATE_OR_GOTO ("server", this, out);
ret = xlator_mem_acct_init (this, gf_server_mt_end + 1);
if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM, PS_MSG_NO_MEMORY,
+ "Memory accounting init failed");
return ret;
}
+out:
+ return ret;
+}
+
+static int
+_delete_auth_opt (dict_t *this, char *key, data_t *value, void *data)
+{
+ char *auth_option_pattern[] = { "auth.addr.*.allow",
+ "auth.addr.*.reject",
+ "auth.login.*.ssl-allow",
+ NULL};
+ int i = 0;
+
+ for (i = 0; auth_option_pattern[i]; i++) {
+ if (fnmatch (auth_option_pattern[i], key, 0) == 0) {
+ dict_del (this, key);
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+static int
+_copy_auth_opt (dict_t *unused, char *key, data_t *value, void *xl_dict)
+{
+ char *auth_option_pattern[] = { "auth.addr.*.allow",
+ "auth.addr.*.reject",
+ "auth.login.*.ssl-allow",
+ NULL};
+ int i = 0;
+
+ for (i = 0; auth_option_pattern [i]; i++) {
+ if (fnmatch (auth_option_pattern[i], key, 0) == 0) {
+ dict_set ((dict_t *)xl_dict, key, value);
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int
+server_init_grace_timer (xlator_t *this, dict_t *options,
+ server_conf_t *conf)
+{
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("server", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, options, out);
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
+
+ GF_OPTION_RECONF ("lk-heal", conf->lk_heal, options, bool, out);
+
+ gf_msg_debug (this->name, 0, "lk-heal = %s",
+ (conf->lk_heal) ? "on" : "off");
+
+ GF_OPTION_RECONF ("grace-timeout", conf->grace_timeout,
+ options, uint32, out);
+
+ gf_msg_debug (this->name, 0, "Server grace timeout value = %d",
+ conf->grace_timeout);
+
+ ret = 0;
+out:
return ret;
}
int
-init (xlator_t *this)
+server_check_event_threads (xlator_t *this, server_conf_t *conf, int32_t old,
+ int32_t new)
{
- int32_t ret = -1;
- server_conf_t *conf = NULL;
+ if (old == new)
+ return 0;
+
+ conf->event_threads = new;
+ return event_reconfigure_threads (this->ctx->event_pool,
+ conf->event_threads);
+}
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+
+ server_conf_t *conf =NULL;
+ rpcsvc_t *rpc_conf;
+ rpcsvc_listener_t *listeners;
+ rpc_transport_t *xprt = NULL;
+ int inode_lru_limit;
+ gf_boolean_t trace;
+ data_t *data;
+ int ret = 0;
+ char *statedump_path = NULL;
+ xlator_t *xl = NULL;
+ int32_t new_nthread = 0;
+
+ conf = this->private;
+
+ if (!conf) {
+ gf_msg_callingfn (this->name, GF_LOG_DEBUG, EINVAL,
+ PS_MSG_INVALID_ENTRY, "conf == null!!!");
+ goto out;
+ }
+
+ if (dict_get_int32 ( options, "inode-lru-limit", &inode_lru_limit) == 0){
+ conf->inode_lru_limit = inode_lru_limit;
+ gf_msg_trace (this->name, 0, "Reconfigured inode-lru-limit to "
+ "%d", conf->inode_lru_limit);
+
+ /* traverse through the xlator graph. For each xlator in the
+ graph check whether it is a bound_xl or not (bound_xl means
+ the xlator will have its itable pointer set). If so, then
+ set the lru limit for the itable.
+ */
+ xlator_foreach (this, xlator_set_inode_lru_limit,
+ &inode_lru_limit);
+ }
+
+ data = dict_get (options, "trace");
+ if (data) {
+ ret = gf_string2boolean (data->data, &trace);
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_WARNING, EINVAL,
+ PS_MSG_INVALID_ENTRY, "'trace' takes on only "
+ "boolean values. Neglecting option");
+ ret = -1;
+ goto out;
+ }
+ conf->trace = trace;
+ gf_msg_trace (this->name, 0, "Reconfigured trace to %d",
+ conf->trace);
+
+ }
+
+ GF_OPTION_RECONF ("statedump-path", statedump_path,
+ options, path, out);
+ if (!statedump_path) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ PS_MSG_STATEDUMP_PATH_ERROR,
+ "Error while reconfiguring statedump path");
+ ret = -1;
+ goto out;
+ }
+ gf_path_strip_trailing_slashes (statedump_path);
+ GF_FREE (this->ctx->statedump_path);
+ this->ctx->statedump_path = gf_strdup (statedump_path);
+
+ if (!conf->auth_modules)
+ conf->auth_modules = dict_new ();
+
+ dict_foreach (options, get_auth_types, conf->auth_modules);
+ ret = validate_auth_options (this, options);
+ if (ret == -1) {
+ /* logging already done in validate_auth_options function. */
+ goto out;
+ }
+
+ dict_foreach (this->options, _delete_auth_opt, this->options);
+ dict_foreach (options, _copy_auth_opt, this->options);
+
+ ret = gf_auth_init (this, conf->auth_modules);
+ if (ret) {
+ dict_unref (conf->auth_modules);
+ goto out;
+ }
+
+ GF_OPTION_RECONF ("manage-gids", conf->server_manage_gids, options,
+ bool, out);
+
+ GF_OPTION_RECONF ("gid-timeout", conf->gid_cache_timeout, options,
+ int32, out);
+ if (gid_cache_reconf (&conf->gid_cache, conf->gid_cache_timeout) < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, PS_MSG_GRP_CACHE_ERROR,
+ "Failed to reconfigure group cache.");
+ goto out;
+ }
+
+ rpc_conf = conf->rpc;
+ if (!rpc_conf) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, PS_MSG_RPC_CONF_ERROR,
+ "No rpc_conf !!!!");
+ goto out;
+ }
+
+ ret = rpcsvc_auth_reconf (rpc_conf, options);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR,
+ "Failed to reconfigure authentication");
+ goto out;
+ }
+
+ GF_OPTION_RECONF ("dynamic-auth", conf->dync_auth, options,
+ bool, out);
+
+ if (conf->dync_auth) {
+ pthread_mutex_lock (&conf->mutex);
+ {
+ list_for_each_entry (xprt, &conf->xprt_list, list) {
+ /* check for client authorization */
+ if (!xprt->clnt_options) {
+ /* If clnt_options dictionary is null,
+ * which means for this transport
+ * server_setvolume was not called.
+ *
+ * So here we can skip authentication
+ * because server_setvolume will do
+ * gf_authenticate.
+ *
+ */
+ continue;
+ }
+ ret = gf_authenticate (xprt->clnt_options,
+ options, conf->auth_modules);
+ if (ret == AUTH_ACCEPT) {
+ gf_msg (this->name, GF_LOG_TRACE, 0,
+ PS_MSG_CLIENT_ACCEPTED,
+ "authorized client, hence we "
+ "continue with this connection");
+ } else {
+ gf_msg (this->name, GF_LOG_INFO,
+ EACCES,
+ PS_MSG_AUTHENTICATE_ERROR,
+ "unauthorized client, hence "
+ "terminating the connection %s",
+ xprt->peerinfo.identifier);
+ rpc_transport_disconnect(xprt);
+ }
+ }
+ }
+ pthread_mutex_unlock (&conf->mutex);
+ }
+
+ ret = rpcsvc_set_outstanding_rpc_limit (rpc_conf, options,
+ RPCSVC_DEFAULT_OUTSTANDING_RPC_LIMIT);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, PS_MSG_RPC_CONF_ERROR,
+ "Failed to reconfigure outstanding-rpc-limit");
+ goto out;
+ }
+
+ list_for_each_entry (listeners, &(rpc_conf->listeners), list) {
+ if (listeners->trans != NULL) {
+ if (listeners->trans->reconfigure )
+ listeners->trans->reconfigure (listeners->trans, options);
+ else
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ PS_MSG_TRANSPORT_ERROR, "Reconfigure "
+ "not found for transport");
+ }
+ }
- if (!this)
+ GF_OPTION_RECONF ("event-threads", new_nthread, options, int32, out);
+ ret = server_check_event_threads (this, conf, conf->event_threads,
+ new_nthread);
+ if (ret)
goto out;
+ ret = server_init_grace_timer (this, options, conf);
+
+out:
+ gf_msg_debug ("", 0, "returning %d", ret);
+ return ret;
+}
+
+static int32_t
+client_destroy_cbk (xlator_t *this, client_t *client)
+{
+ void *tmp = NULL;
+ server_ctx_t *ctx = NULL;
+
+ client_ctx_del (client, this, &tmp);
+
+ ctx = tmp;
+
+ if (ctx == NULL)
+ return 0;
+
+ gf_fd_fdtable_destroy (ctx->fdtable);
+ LOCK_DESTROY (&ctx->fdtable_lock);
+ GF_FREE (ctx);
+
+ return 0;
+}
+
+int
+init (xlator_t *this)
+{
+ int32_t ret = -1;
+ server_conf_t *conf = NULL;
+ rpcsvc_listener_t *listener = NULL;
+ char *transport_type = NULL;
+ char *statedump_path = NULL;
+ int total_transport = 0;
+
+ GF_VALIDATE_OR_GOTO ("init", this, out);
+
if (this->children == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
+ gf_msg (this->name, GF_LOG_ERROR, 0, PS_MSG_SUBVOL_NULL,
"protocol/server should have subvolume");
goto out;
}
if (this->parents != NULL) {
- gf_log (this->name, GF_LOG_ERROR,
+ gf_msg (this->name, GF_LOG_ERROR, 0, PS_MSG_PARENT_VOL_ERROR,
"protocol/server should not have parent volumes");
goto out;
}
- conf = GF_CALLOC (1, sizeof (server_conf_t), gf_server_mt_server_conf_t);
+ conf = GF_CALLOC (1, sizeof (server_conf_t),
+ gf_server_mt_server_conf_t);
+
GF_VALIDATE_OR_GOTO(this->name, conf, out);
- INIT_LIST_HEAD (&conf->conns);
+ INIT_LIST_HEAD (&conf->xprt_list);
pthread_mutex_init (&conf->mutex, NULL);
- this->private = conf;
+ LOCK_INIT (&conf->itable_lock);
+
+ /* Set event threads to the configured default */
+ GF_OPTION_INIT("event-threads", conf->event_threads, int32, out);
+ ret = server_check_event_threads (this, conf, STARTING_EVENT_THREADS,
+ conf->event_threads);
+ if (ret)
+ goto out;
+
+ ret = server_init_grace_timer (this, this->options, conf);
+ if (ret)
+ goto out;
ret = server_build_config (this, conf);
if (ret)
goto out;
+ ret = dict_get_str (this->options, "config-directory", &conf->conf_dir);
+ if (ret)
+ conf->conf_dir = CONFDIR;
+
+ conf->child_up = _gf_false;
+
+ /*ret = dict_get_str (this->options, "statedump-path", &statedump_path);
+ if (!ret) {
+ gf_path_strip_trailing_slashes (statedump_path);
+ this->ctx->statedump_path = statedump_path;
+ }*/
+ GF_OPTION_INIT ("statedump-path", statedump_path, path, out);
+ if (statedump_path) {
+ gf_path_strip_trailing_slashes (statedump_path);
+ this->ctx->statedump_path = gf_strdup (statedump_path);
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ PS_MSG_STATEDUMP_PATH_ERROR,
+ "Error setting statedump path");
+ ret = -1;
+ goto out;
+ }
+
/* Authentication modules */
conf->auth_modules = dict_new ();
GF_VALIDATE_OR_GOTO(this->name, conf->auth_modules, out);
@@ -543,27 +996,108 @@ init (xlator_t *this)
goto out;
}
+ ret = dict_get_str_boolean (this->options, "manage-gids", _gf_false);
+ if (ret == -1)
+ conf->server_manage_gids = _gf_false;
+ else
+ conf->server_manage_gids = ret;
+
+ GF_OPTION_INIT("gid-timeout", conf->gid_cache_timeout, int32, out);
+ if (gid_cache_init (&conf->gid_cache, conf->gid_cache_timeout) < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, PS_MSG_GRP_CACHE_ERROR,
+ "Failed to initialize group cache.");
+ goto out;
+ }
+ ret = dict_get_str_boolean (this->options, "dynamic-auth",
+ _gf_true);
+ if (ret == -1)
+ conf->dync_auth = _gf_true;
+ else
+ conf->dync_auth = ret;
+
/* RPC related */
- //conf->rpc = rpc_svc_init (&conf->rpc_conf);
- conf->rpc = rpcsvc_init (this->ctx, this->options);
- if (!conf->rpc) {
+ conf->rpc = rpcsvc_init (this, this->ctx, this->options, 0);
+ if (conf->rpc == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ PS_MSG_RPCSVC_CREATE_FAILED, "creation of rpcsvc "
+ "failed");
+ ret = -1;
+ goto out;
+ }
+
+ ret = rpcsvc_set_outstanding_rpc_limit (conf->rpc, this->options,
+ RPCSVC_DEFAULT_OUTSTANDING_RPC_LIMIT);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, PS_MSG_RPC_CONF_ERROR,
+ "Failed to configure outstanding-rpc-limit");
+ goto out;
+ }
+
+ /*
+ * This is the only place where we want secure_srvr to reflect
+ * the data-plane setting.
+ */
+ this->ctx->secure_srvr = MGMT_SSL_COPY_IO;
+
+ ret = dict_get_str (this->options, "transport-type", &transport_type);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, PS_MSG_TRANSPORT_ERROR,
+ "option transport-type not set");
+ ret = -1;
+ goto out;
+ }
+ total_transport = rpc_transport_count (transport_type);
+ if (total_transport <= 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, PS_MSG_TRANSPORT_ERROR,
+ "failed to get total number of available tranpsorts");
ret = -1;
goto out;
}
+ ret = rpcsvc_create_listeners (conf->rpc, this->options,
+ this->name);
+ if (ret < 1) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ PS_MSG_RPCSVC_LISTENER_CREATE_FAILED,
+ "creation of listener failed");
+ if (ret != -EADDRINUSE)
+ ret = -1;
+ goto out;
+ } else if (ret < total_transport) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ PS_MSG_RPCSVC_LISTENER_CREATE_FAILED,
+ "creation of %d listeners failed, continuing with "
+ "succeeded transport", (total_transport - ret));
+ }
ret = rpcsvc_register_notify (conf->rpc, server_rpc_notify, this);
- if (ret)
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PS_MSG_RPCSVC_NOTIFY,
+ "registration of notify with rpcsvc failed");
goto out;
+ }
- glusterfs3_1_fop_prog.options = this->options;
- ret = rpcsvc_program_register (conf->rpc, glusterfs3_1_fop_prog);
- if (ret)
+ glusterfs3_3_fop_prog.options = this->options;
+ ret = rpcsvc_program_register (conf->rpc, &glusterfs3_3_fop_prog);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PS_MSG_PGM_REG_FAILED,
+ "registration of program (name:%s, prognum:%d, "
+ "progver:%d) failed", glusterfs3_3_fop_prog.progname,
+ glusterfs3_3_fop_prog.prognum,
+ glusterfs3_3_fop_prog.progver);
goto out;
+ }
gluster_handshake_prog.options = this->options;
- ret = rpcsvc_program_register (conf->rpc, gluster_handshake_prog);
- if (ret)
+ ret = rpcsvc_program_register (conf->rpc, &gluster_handshake_prog);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PS_MSG_PGM_REG_FAILED,
+ "registration of program (name:%s, prognum:%d, "
+ "progver:%d) failed", gluster_handshake_prog.progname,
+ gluster_handshake_prog.prognum,
+ gluster_handshake_prog.progver);
+ rpcsvc_program_unregister (conf->rpc, &glusterfs3_3_fop_prog);
goto out;
+ }
#ifndef GF_DARWIN_HOST_OS
{
@@ -573,28 +1107,37 @@ init (xlator_t *this)
lim.rlim_max = 1048576;
if (setrlimit (RLIMIT_NOFILE, &lim) == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "WARNING: Failed to set 'ulimit -n 1M': %s",
- strerror(errno));
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ PS_MSG_ULIMIT_SET_FAILED, "WARNING: Failed to "
+ "set 'ulimit -n 1M': %s", strerror(errno));
lim.rlim_cur = 65536;
lim.rlim_max = 65536;
if (setrlimit (RLIMIT_NOFILE, &lim) == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "Failed to set max open fd to 64k: %s",
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ PS_MSG_FD_NOT_FOUND, "Failed to set "
+ "max open fd to 64k: %s",
strerror(errno));
} else {
- gf_log (this->name, GF_LOG_TRACE,
- "max open fd set to 64k");
+ gf_msg_trace (this->name, 0, "max open fd set "
+ "to 64k");
}
}
}
#endif
+ this->private = conf;
ret = 0;
out:
- if (ret && this)
- this->fini (this);
+ if (ret) {
+ if (this != NULL) {
+ this->fini (this);
+ }
+
+ if (listener != NULL) {
+ rpcsvc_listener_destroy (listener);
+ }
+ }
return ret;
}
@@ -603,6 +1146,7 @@ out:
void
fini (xlator_t *this)
{
+#if 0
server_conf_t *conf = NULL;
conf = this->private;
@@ -611,17 +1155,14 @@ fini (xlator_t *this)
if (conf->rpc) {
/* TODO: memory leak here, have to free RPC */
/*
- if (conf->rpc->conn) {
- rpcsvc_conn_destroy (conf->rpc->conn);
- }
- rpcsvc_fini (conf->rpc);
+ if (conf->rpc->conn) {
+ rpcsvc_conn_destroy (conf->rpc->conn);
+ }
+ rpcsvc_fini (conf->rpc);
*/
;
}
- if (conf->conf_dir)
- GF_FREE (conf->conf_dir);
-
if (conf->auth_modules)
dict_unref (conf->auth_modules);
@@ -629,56 +1170,255 @@ fini (xlator_t *this)
}
this->private = NULL;
-
+#endif
return;
}
int
+server_process_event_upcall (xlator_t *this, void *data)
+{
+ int ret = -1;
+ server_conf_t *conf = NULL;
+ client_t *client = NULL;
+ char *client_uid = NULL;
+ struct gf_upcall *upcall_data = NULL;
+ void *up_req = NULL;
+ rpc_transport_t *xprt = NULL;
+ enum gf_cbk_procnum cbk_procnum = GF_CBK_NULL;
+ gfs3_cbk_cache_invalidation_req gf_c_req = {0,};
+ gfs3_recall_lease_req gf_recall_lease = {{0,},};
+ xdrproc_t xdrproc;
+
+ GF_VALIDATE_OR_GOTO(this->name, data, out);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+ upcall_data = (struct gf_upcall *)data;
+ client_uid = upcall_data->client_uid;
+ GF_VALIDATE_OR_GOTO(this->name, client_uid, out);
+
+ switch (upcall_data->event_type) {
+ case GF_UPCALL_CACHE_INVALIDATION:
+ ret = gf_proto_cache_invalidation_from_upcall (this, &gf_c_req,
+ upcall_data);
+ if (ret < 0)
+ goto out;
+
+ up_req = &gf_c_req;
+ cbk_procnum = GF_CBK_CACHE_INVALIDATION;
+ xdrproc = (xdrproc_t)xdr_gfs3_cbk_cache_invalidation_req;
+ break;
+ case GF_UPCALL_RECALL_LEASE:
+ ret = gf_proto_recall_lease_from_upcall (this, &gf_recall_lease,
+ upcall_data);
+ if (ret < 0)
+ goto out;
+
+ up_req = &gf_recall_lease;
+ cbk_procnum = GF_CBK_RECALL_LEASE;
+ xdrproc = (xdrproc_t)xdr_gfs3_recall_lease_req;
+ break;
+ default:
+ gf_msg (this->name, GF_LOG_WARNING, EINVAL,
+ PS_MSG_INVALID_ENTRY,
+ "Received invalid upcall event(%d)",
+ upcall_data->event_type);
+ goto out;
+ }
+
+ pthread_mutex_lock (&conf->mutex);
+ {
+ list_for_each_entry (xprt, &conf->xprt_list, list) {
+ client = xprt->xl_private;
+
+ /* 'client' is not atomically added during xprt entry
+ * addition to the list. */
+ if (!client || strcmp(client->client_uid, client_uid))
+ continue;
+
+ rpcsvc_request_submit(conf->rpc, xprt,
+ &server_cbk_prog,
+ cbk_procnum,
+ up_req,
+ this->ctx,
+ xdrproc);
+ break;
+ }
+ }
+ pthread_mutex_unlock (&conf->mutex);
+ ret = 0;
+out:
+ if ((gf_c_req.xdata).xdata_val)
+ GF_FREE ((gf_c_req.xdata).xdata_val);
+
+ if ((gf_recall_lease.xdata).xdata_val)
+ GF_FREE ((gf_recall_lease.xdata).xdata_val);
+
+ return ret;
+}
+
+int
+server_process_child_event (xlator_t *this, int32_t event, void *data,
+ enum gf_cbk_procnum cbk_procnum)
+{
+ int ret = -1;
+ server_conf_t *conf = NULL;
+ rpc_transport_t *xprt = NULL;
+
+ GF_VALIDATE_OR_GOTO(this->name, data, out);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+ pthread_mutex_lock (&conf->mutex);
+ {
+ list_for_each_entry (xprt, &conf->xprt_list, list) {
+ rpcsvc_callback_submit (conf->rpc, xprt,
+ &server_cbk_prog,
+ cbk_procnum,
+ NULL, 0);
+ }
+ }
+ pthread_mutex_unlock (&conf->mutex);
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int
notify (xlator_t *this, int32_t event, void *data, ...)
{
- int ret = 0;
+ int ret = -1;
+ int32_t val = 0;
+ dict_t *dict = NULL;
+ dict_t *output = NULL;
+ server_conf_t *conf = NULL;
+ va_list ap;
+
+ GF_VALIDATE_OR_GOTO (THIS->name, this, out);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
+
+ dict = data;
+ va_start (ap, data);
+ output = va_arg (ap, dict_t*);
+ va_end (ap);
+
switch (event) {
- default:
+ case GF_EVENT_UPCALL:
+ {
+ GF_VALIDATE_OR_GOTO(this->name, data, out);
+
+ ret = server_process_event_upcall (this, data);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ PS_MSG_SERVER_EVENT_UPCALL_FAILED,
+ "server_process_event_upcall failed");
+ goto out;
+ }
+ break;
+ }
+
+ case GF_EVENT_PARENT_UP:
+ {
+ conf = this->private;
+
+ conf->parent_up = _gf_true;
+
+ default_notify (this, event, data);
+ break;
+ }
+
+ case GF_EVENT_CHILD_UP:
+ {
+ conf->child_up = _gf_true;
+ ret = server_process_child_event (this, event, data,
+ GF_CBK_CHILD_UP);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ PS_MSG_SERVER_EVENT_UPCALL_FAILED,
+ "server_process_child_event failed");
+ goto out;
+ }
+
default_notify (this, event, data);
break;
}
+ case GF_EVENT_CHILD_DOWN:
+ {
+ conf->child_up = _gf_false;
+ ret = server_process_child_event (this, event, data,
+ GF_CBK_CHILD_DOWN);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ PS_MSG_SERVER_EVENT_UPCALL_FAILED,
+ "server_process_child_event failed");
+ goto out;
+ }
+
+ default_notify (this, event, data);
+ break;
+
+ }
+
+ default:
+ default_notify (this, event, data);
+ break;
+ }
+ ret = 0;
+out:
return ret;
}
-struct xlator_fops fops = {
-};
+struct xlator_fops fops;
struct xlator_cbks cbks = {
+ .client_destroy = client_destroy_cbk,
};
struct xlator_dumpops dumpops = {
- .priv = server_priv,
- .fd = server_fd,
- .inode = server_inode,
+ .priv = server_priv,
+ .fd = gf_client_dump_fdtables,
+ .inode = gf_client_dump_inodes,
+ .priv_to_dict = server_priv_to_dict,
+ .fd_to_dict = gf_client_dump_fdtables_to_dict,
+ .inode_to_dict = gf_client_dump_inodes_to_dict,
};
struct volume_options options[] = {
{ .key = {"transport-type"},
.value = {"rpc", "rpc-over-rdma", "tcp", "socket", "ib-verbs",
- "unix", "ib-sdp", "tcp/server", "ib-verbs/server"},
+ "unix", "ib-sdp", "tcp/server", "ib-verbs/server", "rdma",
+ "rdma*([ \t]),*([ \t])socket",
+ "rdma*([ \t]),*([ \t])tcp",
+ "tcp*([ \t]),*([ \t])rdma",
+ "socket*([ \t]),*([ \t])rdma"},
.type = GF_OPTION_TYPE_STR
},
{ .key = {"volume-filename.*"},
.type = GF_OPTION_TYPE_PATH,
},
- { .key = {"transport.*"},
- .type = GF_OPTION_TYPE_ANY,
+ { .key = {"transport.tcp-user-timeout"},
+ .type = GF_OPTION_TYPE_TIME,
+ .min = 0,
+ .max = 1013,
+ .default_value = "42", /* default like network.ping-timeout */
},
- { .key = {"rpc*"},
+ { .key = {"transport.*"},
.type = GF_OPTION_TYPE_ANY,
},
{ .key = {"inode-lru-limit"},
.type = GF_OPTION_TYPE_INT,
.min = 0,
- .max = (1 * GF_UNIT_MB)
+ .max = 1048576,
+ .default_value = "16384",
+ .description = "Specifies the limit on the number of inodes "
+ "in the lru list of the inode cache."
},
{ .key = {"verify-volfile-checksum"},
.type = GF_OPTION_TYPE_BOOL
@@ -690,6 +1430,109 @@ struct volume_options options[] = {
"conf-dir"},
.type = GF_OPTION_TYPE_PATH,
},
+ { .key = {"rpc-auth-allow-insecure"},
+ .type = GF_OPTION_TYPE_BOOL,
+ },
+ { .key = {"root-squash"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Map requests from uid/gid 0 to the anonymous "
+ "uid/gid. Note that this does not apply to any other "
+ "uids or gids that might be equally sensitive, such "
+ "as user bin or group staff."
+ },
+ { .key = {"anonuid"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "65534", /* RPC_NOBODY_UID */
+ .min = 0,
+ .max = (uint32_t) -1,
+ .description = "value of the uid used for the anonymous "
+ "user/nfsnobody when root-squash is enabled."
+ },
+ { .key = {"anongid"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "65534", /* RPC_NOBODY_GID */
+ .min = 0,
+ .max = (uint32_t) -1,
+ .description = "value of the gid used for the anonymous "
+ "user/nfsnobody when root-squash is enabled."
+ },
+ { .key = {"statedump-path"},
+ .type = GF_OPTION_TYPE_PATH,
+ .default_value = DEFAULT_VAR_RUN_DIRECTORY,
+ .description = "Specifies directory in which gluster should save its"
+ " statedumps."
+ },
+ { .key = {"lk-heal"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ },
+ {.key = {"grace-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 10,
+ .max = 1800,
+ .default_value = "10",
+ },
+ {.key = {"tcp-window-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = GF_MIN_SOCKET_WINDOW_SIZE,
+ .max = GF_MAX_SOCKET_WINDOW_SIZE,
+ .description = "Specifies the window size for tcp socket."
+ },
+ /* The following two options are defined in addr.c, redifined here *
+ * for the sake of validation during volume set from cli */
+
+ { .key = {"auth.addr.*.allow"},
+ .type = GF_OPTION_TYPE_INTERNET_ADDRESS_LIST,
+ .description = "Allow a comma separated list of addresses and/or "
+ "hostnames to connect to the server. Option "
+ "auth.reject overrides this option. By default, all "
+ "connections are allowed."
+ },
+ { .key = {"auth.addr.*.reject"},
+ .type = GF_OPTION_TYPE_INTERNET_ADDRESS_LIST,
+ .description = "Reject a comma separated list of addresses and/or "
+ "hostnames to connect to the server. This option "
+ "overrides the auth.allow option. By default, all"
+ " connections are allowed."
+ },
+ { .key = {"rpc.outstanding-rpc-limit"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = RPCSVC_MIN_OUTSTANDING_RPC_LIMIT,
+ .max = RPCSVC_MAX_OUTSTANDING_RPC_LIMIT,
+ .default_value = TOSTRING(RPCSVC_DEFAULT_OUTSTANDING_RPC_LIMIT),
+ .description = "Parameter to throttle the number of incoming RPC "
+ "requests from a client. 0 means no limit (can "
+ "potentially run out of memory)"
+ },
+ { .key = {"manage-gids"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Resolve groups on the server-side."
+ },
+ { .key = {"gid-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "300",
+ .description = "Timeout in seconds for the cached groups to expire."
+ },
+ { .key = {"event-threads"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 32,
+ .default_value = "2",
+ .description = "Specifies the number of event threads to execute "
+ "in parallel. Larger values would help process"
+ " responses faster, depending on available processing"
+ " power. Range 1-32 threads."
+ },
+ { .key = {"dynamic-auth"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "When 'on' perform dynamic authentication of volume "
+ "options in order to allow/terminate client "
+ "transport connection immediately in response to "
+ "*.allow | *.reject volume set options."
+ },
{ .key = {NULL} },
};
diff --git a/xlators/protocol/server/src/server.h b/xlators/protocol/server/src/server.h
index aecac45071c..fb9cd45db8a 100644
--- a/xlators/protocol/server/src/server.h
+++ b/xlators/protocol/server/src/server.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2010-2013 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _SERVER_H
@@ -22,61 +13,29 @@
#include <pthread.h>
+#include "fd.h"
#include "rpcsvc.h"
#include "fd.h"
#include "protocol-common.h"
#include "server-mem-types.h"
-#include "glusterfs-xdr.h"
+#include "glusterfs3.h"
+#include "timer.h"
+#include "client_t.h"
+#include "gidcache.h"
+#include "defaults.h"
#define DEFAULT_BLOCK_SIZE 4194304 /* 4MB */
#define DEFAULT_VOLUME_FILE_PATH CONFDIR "/glusterfs.vol"
+#define GF_MAX_SOCKET_WINDOW_SIZE (1 * GF_UNIT_MB)
+#define GF_MIN_SOCKET_WINDOW_SIZE (0)
-typedef struct _server_state server_state_t;
-
-struct _locker {
- struct list_head lockers;
- char *volume;
- loc_t loc;
- fd_t *fd;
- pid_t pid;
-};
-
-struct _lock_table {
- struct list_head file_lockers;
- struct list_head dir_lockers;
- gf_lock_t lock;
- size_t count;
-};
-
-
-/* private structure per connection (transport object)
- * used as transport_t->xl_private
- */
-struct _server_connection {
- struct list_head list;
- char *id;
- int ref;
- int active_transports;
- pthread_mutex_t lock;
- char disconnected;
- fdtable_t *fdtable;
- struct _lock_table *ltable;
- xlator_t *bound_xl;
- xlator_t *this;
-};
-
-typedef struct _server_connection server_connection_t;
-
-
-server_connection_t *
-server_connection_get (xlator_t *this, const char *id);
-
-void
-server_connection_put (xlator_t *this, server_connection_t *conn);
+typedef enum {
+ INTERNAL_LOCKS = 1,
+ POSIX_LOCKS = 2,
+} server_lock_flags_t;
-int
-server_connection_cleanup (xlator_t *this, server_connection_t *conn);
+typedef struct _server_state server_state_t;
int server_null (rpcsvc_request_t *req);
@@ -92,12 +51,32 @@ struct server_conf {
int inode_lru_limit;
gf_boolean_t verify_volfile;
gf_boolean_t trace;
+ gf_boolean_t lk_heal; /* If true means lock self
+ heal is on else off. */
char *conf_dir;
struct _volfile_ctx *volfile;
-
- dict_t *auth_modules;
- pthread_mutex_t mutex;
- struct list_head conns;
+ uint32_t grace_timeout;
+ dict_t *auth_modules;
+ pthread_mutex_t mutex;
+ struct list_head xprt_list;
+ pthread_t barrier_th;
+
+ gf_boolean_t server_manage_gids; /* resolve gids on brick */
+ gid_cache_t gid_cache;
+ int32_t gid_cache_timeout;
+
+ int event_threads; /* # of event threads
+ * configured */
+
+ gf_boolean_t parent_up;
+ gf_boolean_t dync_auth; /* if set authenticate dynamically,
+ * in case if volume set options
+ * (say *.allow | *.reject) are
+ * tweeked */
+ gf_boolean_t child_up; /* Set to true, when child is up, and
+ * false, when child is down */
+
+ gf_lock_t itable_lock;
};
typedef struct server_conf server_conf_t;
@@ -113,25 +92,19 @@ typedef enum {
struct resolve_comp {
char *basename;
- ino_t ino;
- uint64_t gen;
inode_t *inode;
};
typedef struct {
server_resolve_type_t type;
- uint64_t fd_no;
- ino_t ino;
- uint64_t gen;
- ino_t par;
+ int64_t fd_no;
+ u_char gfid[16];
+ u_char pargfid[16];
char *path;
char *bname;
- char *resolved;
int op_ret;
int op_errno;
- loc_t deep_loc;
- struct resolve_comp *components;
- int comp_count;
+ loc_t resolve_loc;
} server_resolve_t;
@@ -141,14 +114,13 @@ int
resolve_and_resume (call_frame_t *frame, server_resume_fn_t fn);
struct _server_state {
- server_connection_t *conn;
- rpc_transport_t *xprt;
- inode_table_t *itable;
+ rpc_transport_t *xprt;
+ inode_table_t *itable;
- server_resume_fn_t resume_fn;
+ server_resume_fn_t resume_fn;
- loc_t loc;
- loc_t loc2;
+ loc_t loc;
+ loc_t loc2;
server_resolve_t resolve;
server_resolve_t resolve2;
@@ -159,42 +131,82 @@ struct _server_state {
struct iatt stbuf;
int valid;
- fd_t *fd;
- int flags;
+ fd_t *fd;
+ dict_t *params;
+ int32_t flags;
int wbflags;
+ struct iovec payload_vector[MAX_IOVEC];
+ int payload_count;
struct iobuf *iobuf;
struct iobref *iobref;
- size_t size;
- off_t offset;
- mode_t mode;
- dev_t dev;
- size_t nr_count;
- int cmd;
- int type;
- char *name;
- int name_len;
-
- int mask;
- char is_revalidate;
- dict_t *dict;
- struct flock flock;
+ size_t size;
+ off_t offset;
+ mode_t mode;
+ dev_t dev;
+ size_t nr_count;
+ int cmd;
+ int type;
+ char *name;
+ int name_len;
+
+ int mask;
+ char is_revalidate;
+ dict_t *dict;
+ struct gf_flock flock;
const char *volume;
dir_entry_t *entry;
+ gf_seek_what_t what;
+
+ dict_t *xdata;
+ mode_t umask;
+ struct gf_lease lease;
+ lock_migration_info_t locklist;
+ /* required for compound fops */
+ gfs3_compound_req *req;
+ /* last length till which iovec for compound
+ * writes was processed */
+ int write_length;
+ struct iovec rsp_vector[MAX_IOVEC];
+ int rsp_count;
+ struct iobuf *rsp_iobuf;
+ struct iobref *rsp_iobref;
+ compound_args_t *args;
};
+
extern struct rpcsvc_program gluster_handshake_prog;
-extern struct rpcsvc_program glusterfs3_1_fop_prog;
-extern struct rpcsvc_program gluster_ping_prog;
+extern struct rpcsvc_program glusterfs3_3_fop_prog;
+
+typedef struct _server_ctx {
+ gf_lock_t fdtable_lock;
+ fdtable_t *fdtable;
+ struct _gf_timer *grace_timer;
+ uint32_t lk_version;
+} server_ctx_t;
-typedef ssize_t (*gfs_serialize_t) (struct iovec outmsg, void *args);
int
server_submit_reply (call_frame_t *frame, rpcsvc_request_t *req, void *arg,
struct iovec *payload, int payloadcount,
- struct iobref *iobref, gfs_serialize_t sfunc);
+ struct iobref *iobref, xdrproc_t xdrproc);
+
+int gf_server_check_setxattr_cmd (call_frame_t *frame, dict_t *dict);
+int gf_server_check_getxattr_cmd (call_frame_t *frame, const char *name);
-int xdr_to_glusterfs_req (rpcsvc_request_t *req, void *arg,
- gfs_serialize_t sfunc);
+void
+forget_inode_if_no_dentry (inode_t *inode);
+
+int
+unserialize_req_locklist (gfs3_setactivelk_req *req,
+ lock_migration_info_t *lmi);
+
+int
+serialize_rsp_dirent (gf_dirent_t *entries, gfs3_readdir_rsp *rsp);
+
+int
+serialize_rsp_direntp (gf_dirent_t *entries, gfs3_readdirp_rsp *rsp);
+server_ctx_t*
+server_ctx_get (client_t *client, xlator_t *xlator);
#endif /* !_SERVER_H */
diff --git a/xlators/protocol/server/src/server3_1-fops.c b/xlators/protocol/server/src/server3_1-fops.c
deleted file mode 100644
index c04861ddf10..00000000000
--- a/xlators/protocol/server/src/server3_1-fops.c
+++ /dev/null
@@ -1,4875 +0,0 @@
-/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "server.h"
-#include "server-helpers.h"
-#include "glusterfs-xdr.h"
-#include "msg-xdr.h"
-#include "compat-errno.h"
-
-#include "md5.h"
-
-#define SERVER_PATH_MAX (16 * 1024)
-
-/* Callback function section */
-int
-server_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf)
-{
- gfs3_statfs_rsp rsp = {0,};
- rpcsvc_request_t *req = NULL;
-
- req = frame->local;
- frame->local = NULL;
-
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
-
- if (op_ret >= 0) {
- gf_statfs_from_statfs (&rsp.statfs, buf);
- }
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_statfs_rsp);
-
- return 0;
-}
-
-int
-server_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *stbuf, dict_t *dict,
- struct iatt *postparent)
-{
- rpcsvc_request_t *req = NULL;
- server_state_t *state = NULL;
- inode_t *root_inode = NULL;
- inode_t *link_inode = NULL;
- loc_t fresh_loc = {0,};
- gfs3_lookup_rsp rsp = {0, };
- int32_t ret = -1;
-
- state = CALL_STATE(frame);
-
- req = frame->local;
- frame->local = NULL;
-
- if (state->is_revalidate == 1 && op_ret == -1) {
- state->is_revalidate = 2;
- loc_copy (&fresh_loc, &state->loc);
- inode_unref (fresh_loc.inode);
- fresh_loc.inode = inode_new (state->itable);
-
- STACK_WIND (frame, server_lookup_cbk, BOUND_XL (frame),
- BOUND_XL (frame)->fops->lookup,
- &fresh_loc, state->dict);
-
- loc_wipe (&fresh_loc);
- return 0;
- }
-
- if (dict) {
- rsp.dict.dict_len = dict_serialized_length (dict);
- if (rsp.dict.dict_len < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s (%"PRId64"): failed to get serialized "
- "length of reply dict",
- state->loc.path, state->loc.inode->ino);
- op_ret = -1;
- op_errno = EINVAL;
- rsp.dict.dict_len = 0;
- }
- }
-
- if ((op_ret >= 0) && dict) {
- rsp.dict.dict_val = GF_CALLOC (1, rsp.dict.dict_len,
- gf_server_mt_rsp_buf_t);
- if (!rsp.dict.dict_val) {
- op_ret = -1;
- op_errno = ENOMEM;
- rsp.dict.dict_len = 0;
- goto out;
- }
- ret = dict_serialize (dict, rsp.dict.dict_val);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s (%"PRId64"): failed to serialize reply dict",
- state->loc.path, state->loc.inode->ino);
- op_ret = -1;
- op_errno = -ret;
- rsp.dict.dict_len = 0;
- }
- }
-
- gf_stat_from_iatt (&rsp.postparent, postparent);
-
- if (op_ret == 0) {
- root_inode = BOUND_XL(frame)->itable->root;
- if (inode == root_inode) {
- /* we just looked up root ("/") */
- stbuf->ia_ino = 1;
- if (inode->ia_type == 0)
- inode->ia_type = stbuf->ia_type;
- }
-
- gf_stat_from_iatt (&rsp.stat, stbuf);
-
- if (inode->ino != 1) {
- link_inode = inode_link (inode, state->loc.parent,
- state->loc.name, stbuf);
- inode_lookup (link_inode);
- inode_unref (link_inode);
- }
- } else {
- if (state->is_revalidate && op_errno == ENOENT) {
- if (state->loc.inode->ino != 1) {
- inode_unlink (state->loc.inode,
- state->loc.parent,
- state->loc.name);
- }
- }
-
- gf_log (this->name,
- (op_errno == ENOENT ? GF_LOG_TRACE : GF_LOG_DEBUG),
- "%"PRId64": LOOKUP %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0,
- op_ret, strerror (op_errno));
- }
-out:
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- (gfs_serialize_t)xdr_serialize_lookup_rsp);
-
- if (rsp.dict.dict_val)
- GF_FREE (rsp.dict.dict_val);
-
- return 0;
-}
-
-
-int
-server_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct flock *lock)
-{
- gfs3_lk_rsp rsp = {0,};
- rpcsvc_request_t *req = NULL;
- server_state_t *state = NULL;
-
- req = frame->local;
- frame->local = NULL;
-
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
-
- state = CALL_STATE(frame);
-
- if (op_ret == 0) {
- gf_flock_from_flock (&rsp.flock, lock);
- } else if (op_errno != ENOSYS) {
- gf_log (this->name, GF_LOG_TRACE,
- "%"PRId64": LK %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->resolve.fd_no,
- state->fd ? state->fd->inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_lk_rsp);
-
- return 0;
-}
-
-
-int
-server_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- gf_common_rsp rsp = {0,};
- server_connection_t *conn = NULL;
- server_state_t *state = NULL;
- rpcsvc_request_t *req = NULL;
-
- req = frame->local;
- frame->local = NULL;
-
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
-
- conn = SERVER_CONNECTION(frame);
- state = CALL_STATE(frame);
-
- if (op_ret >= 0) {
- if (state->flock.l_type == F_UNLCK)
- gf_del_locker (conn->ltable, state->volume,
- &state->loc, NULL, frame->root->pid);
- else
- gf_add_locker (conn->ltable, state->volume,
- &state->loc, NULL, frame->root->pid);
- } else if (op_errno != ENOSYS) {
- gf_log (this->name, GF_LOG_TRACE,
- "%"PRId64": INODELK %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_common_rsp);
-
- return 0;
-}
-
-
-int
-server_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- gf_common_rsp rsp = {0,};
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
- rpcsvc_request_t *req = NULL;
-
- req = frame->local;
- frame->local = NULL;
-
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
-
- conn = SERVER_CONNECTION(frame);
- state = CALL_STATE(frame);
-
- if (op_ret >= 0) {
- if (state->flock.l_type == F_UNLCK)
- gf_del_locker (conn->ltable, state->volume,
- NULL, state->fd,
- frame->root->pid);
- else
- gf_add_locker (conn->ltable, state->volume,
- NULL, state->fd,
- frame->root->pid);
- } else if (op_errno != ENOSYS) {
- gf_log (this->name, GF_LOG_TRACE,
- "%"PRId64": FINODELK %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->resolve.fd_no,
- state->fd ? state->fd->inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_common_rsp);
-
- return 0;
-}
-
-int
-server_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- server_connection_t *conn = NULL;
- server_state_t *state = NULL;
- rpcsvc_request_t *req = NULL;
- gf_common_rsp rsp = {0,};
-
- req = frame->local;
- frame->local = NULL;
-
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
-
- conn = SERVER_CONNECTION(frame);
- state = CALL_STATE(frame);
-
- if (op_ret >= 0) {
- if (state->cmd == ENTRYLK_UNLOCK)
- gf_del_locker (conn->ltable, state->volume,
- &state->loc, NULL, frame->root->pid);
- else
- gf_add_locker (conn->ltable, state->volume,
- &state->loc, NULL, frame->root->pid);
- } else if (op_errno != ENOSYS) {
- gf_log (this->name, GF_LOG_TRACE,
- "%"PRId64": INODELK %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_common_rsp);
- return 0;
-}
-
-
-int
-server_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- gf_common_rsp rsp = {0,};
- server_connection_t *conn = NULL;
- server_state_t *state = NULL;
- rpcsvc_request_t *req = NULL;
-
- req = frame->local;
- frame->local = NULL;
-
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
-
- conn = SERVER_CONNECTION(frame);
- state = CALL_STATE(frame);
- if (op_ret >= 0) {
- if (state->cmd == ENTRYLK_UNLOCK)
- gf_del_locker (conn->ltable, state->volume,
- NULL, state->fd, frame->root->pid);
- else
- gf_add_locker (conn->ltable, state->volume,
- NULL, state->fd, frame->root->pid);
- } else if (op_errno != ENOSYS) {
- gf_log (this->name, GF_LOG_TRACE,
- "%"PRId64": FENTRYLK %"PRId64" (%"PRId64") "
- " ==> %"PRId32" (%s)",
- frame->root->unique, state->resolve.fd_no,
- state->fd ? state->fd->inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_common_rsp);
-
- return 0;
-}
-
-
-int
-server_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- gf_common_rsp rsp = {0,};
- rpcsvc_request_t *req = NULL;
-
- req = frame->local;
- frame->local = NULL;
-
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_common_rsp);
-
- return 0;
-}
-
-int
-server_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
-{
- gfs3_rmdir_rsp rsp = {0,};
- server_state_t *state = NULL;
- inode_t *parent = NULL;
- rpcsvc_request_t *req = NULL;
-
- req = frame->local;
- frame->local = NULL;
-
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
-
- state = CALL_STATE(frame);
-
- if (op_ret == 0) {
- inode_unlink (state->loc.inode, state->loc.parent,
- state->loc.name);
- parent = inode_parent (state->loc.inode, 0, NULL);
- if (parent)
- inode_unref (parent);
- else
- inode_forget (state->loc.inode, 0);
-
- gf_stat_from_iatt (&rsp.preparent, preparent);
- gf_stat_from_iatt (&rsp.postparent, postparent);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "%"PRId64": RMDIR %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0,
- op_ret, strerror (op_errno));
- }
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_rmdir_rsp);
-
- return 0;
-}
-
-int
-server_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent)
-{
- gfs3_mkdir_rsp rsp = {0,};
- server_state_t *state = NULL;
- inode_t *link_inode = NULL;
- rpcsvc_request_t *req = NULL;
-
- req = frame->local;
- frame->local = NULL;
-
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
-
- state = CALL_STATE(frame);
- if (op_ret >= 0) {
- gf_stat_from_iatt (&rsp.stat, stbuf);
- gf_stat_from_iatt (&rsp.preparent, preparent);
- gf_stat_from_iatt (&rsp.postparent, postparent);
-
- link_inode = inode_link (inode, state->loc.parent,
- state->loc.name, stbuf);
- inode_lookup (link_inode);
- inode_unref (link_inode);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "%"PRId64": MKDIR %s ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- op_ret, strerror (op_errno));
- }
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_mkdir_rsp);
-
- return 0;
-}
-
-int
-server_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent)
-{
- gfs3_mknod_rsp rsp = {0,};
- server_state_t *state = NULL;
- inode_t *link_inode = NULL;
- rpcsvc_request_t *req = NULL;
-
- req = frame->local;
- frame->local = NULL;
-
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
-
- state = CALL_STATE(frame);
- if (op_ret >= 0) {
- gf_stat_from_iatt (&rsp.stat, stbuf);
- gf_stat_from_iatt (&rsp.preparent, preparent);
- gf_stat_from_iatt (&rsp.postparent, postparent);
-
- link_inode = inode_link (inode, state->loc.parent,
- state->loc.name, stbuf);
- inode_lookup (link_inode);
- inode_unref (link_inode);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "%"PRId64": MKNOD %s ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- op_ret, strerror (op_errno));
- }
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_mknod_rsp);
-
-
- return 0;
-}
-
-int
-server_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- gf_common_rsp rsp = {0,};
- server_state_t *state = NULL;
- rpcsvc_request_t *req = NULL;
-
- req = frame->local;
- frame->local = NULL;
-
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
-
- state = CALL_STATE(frame);
-
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "%"PRId64": FSYNCDIR %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->resolve.fd_no,
- state->fd ? state->fd->inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_common_rsp);
-
- return 0;
-}
-
-int
-server_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
-{
- gfs3_readdir_rsp rsp = {0,};
- server_state_t *state = NULL;
- rpcsvc_request_t *req = NULL;
- int ret = 0;
-
- req = frame->local;
- frame->local = NULL;
-
- state = CALL_STATE(frame);
- if (op_ret > 0) {
- ret = serialize_rsp_dirent (entries, &rsp);
- if (ret == -1) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto unwind;
- }
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "%"PRId64": READDIR %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->resolve.fd_no,
- state->fd ? state->fd->inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-unwind:
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_readdir_rsp);
-
- readdir_rsp_cleanup (&rsp);
-
- return 0;
-}
-
-
-int
-server_releasedir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- gf_common_rsp rsp = {0,};
- rpcsvc_request_t *req = NULL;
-
- req = frame->local;
- frame->local = NULL;
-
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_common_rsp);
-
- return 0;
-}
-
-int
-server_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
-{
- server_connection_t *conn = NULL;
- server_state_t *state = NULL;
- rpcsvc_request_t *req = NULL;
- gfs3_opendir_rsp rsp = {0,};
- uint64_t fd_no = 0;
-
- conn = SERVER_CONNECTION (frame);
- state = CALL_STATE (frame);
-
- if (op_ret >= 0) {
- fd_bind (fd);
-
- fd_no = gf_fd_unused_get (conn->fdtable, fd);
- fd_ref (fd); // on behalf of the client
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "%"PRId64": OPENDIR %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0,
- op_ret, strerror (op_errno));
- }
-
- req = frame->local;
- frame->local = NULL;
-
- rsp.fd = fd_no;
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_opendir_rsp);
-
- return 0;
-}
-
-int
-server_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- gf_common_rsp rsp = {0,};
- rpcsvc_request_t *req = NULL;
-
- req = frame->local;
- frame->local = NULL;
-
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_common_rsp);
-
- return 0;
-}
-
-int
-server_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
-{
- gfs3_getxattr_rsp rsp = {0,};
- int32_t len = 0;
- int32_t ret = -1;
- rpcsvc_request_t *req = NULL;
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (op_ret >= 0) {
- len = dict_serialized_length (dict);
- if (len < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s (%"PRId64"): failed to get serialized length of "
- "reply dict",
- state->loc.path, state->resolve.ino);
- op_ret = -1;
- op_errno = EINVAL;
- len = 0;
- goto out;
- }
-
- rsp.dict.dict_val = GF_CALLOC (len, sizeof (char),
- gf_server_mt_rsp_buf_t);
- if (!rsp.dict.dict_val) {
- op_ret = -1;
- op_errno = ENOMEM;
- len = 0;
- goto out;
- }
- ret = dict_serialize (dict, rsp.dict.dict_val);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s (%"PRId64"): failed to serialize reply dict",
- state->loc.path, state->resolve.ino);
- op_ret = -1;
- op_errno = EINVAL;
- len = 0;
- }
- }
-out:
- req = frame->local;
- frame->local = NULL;
-
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
- rsp.dict.dict_len = len;
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_getxattr_rsp);
-
- if (rsp.dict.dict_val)
- GF_FREE (rsp.dict.dict_val);
-
- return 0;
-}
-
-
-int
-server_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
-{
- gfs3_fgetxattr_rsp rsp = {0,};
- int32_t len = 0;
- int32_t ret = -1;
- server_state_t *state = NULL;
- rpcsvc_request_t *req = NULL;
-
- state = CALL_STATE (frame);
-
- if (op_ret >= 0) {
- len = dict_serialized_length (dict);
- if (len < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s (%"PRId64"): failed to get serialized "
- "length of reply dict",
- state->loc.path, state->resolve.ino);
- op_ret = -1;
- op_errno = EINVAL;
- len = 0;
- goto out;
- }
- rsp.dict.dict_val = GF_CALLOC (1, len, gf_server_mt_rsp_buf_t);
- if (!rsp.dict.dict_val) {
- op_ret = -1;
- op_errno = ENOMEM;
- len = 0;
- goto out;
- }
- ret = dict_serialize (dict, rsp.dict.dict_val);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s (%"PRId64"): failed to serialize reply dict",
- state->loc.path, state->resolve.ino);
- op_ret = -1;
- op_errno = -ret;
- len = 0;
- }
- }
-
-out:
- req = frame->local;
- frame->local = NULL;
-
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
- rsp.dict.dict_len = len;
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_fgetxattr_rsp);
-
- if (rsp.dict.dict_val)
- GF_FREE (rsp.dict.dict_val);
-
- return 0;
-}
-
-int
-server_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- gf_common_rsp rsp = {0,};
- rpcsvc_request_t *req = NULL;
-
- req = frame->local;
- frame->local = NULL;
-
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_common_rsp);
-
- return 0;
-}
-
-
-int
-server_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- gf_common_rsp rsp = {0,};
- rpcsvc_request_t *req = NULL;
-
- req = frame->local;
- frame->local = NULL;
-
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_common_rsp);
-
- return 0;
-}
-
-int
-server_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
-{
- gfs3_rename_rsp rsp = {0,};
- server_state_t *state = NULL;
- rpcsvc_request_t *req = NULL;
-
- req = frame->local;
- frame->local = NULL;
-
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
-
- state = CALL_STATE(frame);
-
- if (op_ret == 0) {
- stbuf->ia_ino = state->loc.inode->ino;
- stbuf->ia_type = state->loc.inode->ia_type;
-
- gf_log (state->conn->bound_xl->name, GF_LOG_TRACE,
- "%"PRId64": RENAME_CBK (%"PRId64") %"PRId64"/%s "
- "==> %"PRId64"/%s",
- frame->root->unique, state->loc.inode->ino,
- state->loc.parent->ino, state->loc.name,
- state->loc2.parent->ino, state->loc2.name);
-
- inode_rename (state->itable,
- state->loc.parent, state->loc.name,
- state->loc2.parent, state->loc2.name,
- state->loc.inode, stbuf);
- gf_stat_from_iatt (&rsp.stat, stbuf);
-
- gf_stat_from_iatt (&rsp.preoldparent, preoldparent);
- gf_stat_from_iatt (&rsp.postoldparent, postoldparent);
-
- gf_stat_from_iatt (&rsp.prenewparent, prenewparent);
- gf_stat_from_iatt (&rsp.postnewparent, postnewparent);
- }
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_rename_rsp);
-
- return 0;
-}
-
-int
-server_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
-{
- gfs3_unlink_rsp rsp = {0,};
- server_state_t *state = NULL;
- inode_t *parent = NULL;
- rpcsvc_request_t *req = NULL;
-
- req = frame->local;
- frame->local = NULL;
-
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
-
- state = CALL_STATE(frame);
-
- if (op_ret == 0) {
- gf_log (state->conn->bound_xl->name, GF_LOG_TRACE,
- "%"PRId64": UNLINK_CBK %"PRId64"/%s (%"PRId64")",
- frame->root->unique, state->loc.parent->ino,
- state->loc.name, state->loc.inode->ino);
-
- inode_unlink (state->loc.inode, state->loc.parent,
- state->loc.name);
-
- parent = inode_parent (state->loc.inode, 0, NULL);
- if (parent)
- inode_unref (parent);
- else
- inode_forget (state->loc.inode, 0);
-
- gf_stat_from_iatt (&rsp.preparent, preparent);
- gf_stat_from_iatt (&rsp.postparent, postparent);
-
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": UNLINK %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0,
- op_ret, strerror (op_errno));
- }
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_unlink_rsp);
-
- return 0;
-}
-
-int
-server_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent)
-{
- gfs3_symlink_rsp rsp = {0,};
- server_state_t *state = NULL;
- inode_t *link_inode = NULL;
- rpcsvc_request_t *req = NULL;
-
- req = frame->local;
- frame->local = NULL;
-
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
-
- state = CALL_STATE(frame);
- if (op_ret >= 0) {
- gf_stat_from_iatt (&rsp.stat, stbuf);
- gf_stat_from_iatt (&rsp.preparent, preparent);
- gf_stat_from_iatt (&rsp.postparent, postparent);
-
- link_inode = inode_link (inode, state->loc.parent,
- state->loc.name, stbuf);
- inode_lookup (link_inode);
- inode_unref (link_inode);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": SYMLINK %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0,
- op_ret, strerror (op_errno));
- }
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_symlink_rsp);
-
- return 0;
-}
-
-
-int
-server_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent)
-{
- gfs3_link_rsp rsp = {0,};
- server_state_t *state = NULL;
- inode_t *link_inode = NULL;
- rpcsvc_request_t *req = NULL;
-
- req = frame->local;
- frame->local = NULL;
-
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
-
- state = CALL_STATE(frame);
-
- if (op_ret == 0) {
- stbuf->ia_ino = state->loc.inode->ino;
-
- gf_stat_from_iatt (&rsp.stat, stbuf);
- gf_stat_from_iatt (&rsp.preparent, preparent);
- gf_stat_from_iatt (&rsp.postparent, postparent);
-
- gf_log (state->conn->bound_xl->name, GF_LOG_TRACE,
- "%"PRId64": LINK (%"PRId64") %"PRId64"/%s ==> %"PRId64"/%s",
- frame->root->unique, inode->ino,
- state->loc2.parent->ino,
- state->loc2.name, state->loc.parent->ino,
- state->loc.name);
-
- link_inode = inode_link (inode, state->loc2.parent,
- state->loc2.name, stbuf);
- inode_unref (link_inode);
- } else {
- gf_log (state->conn->bound_xl->name, GF_LOG_DEBUG,
- "%"PRId64": LINK (%"PRId64") %"PRId64"/%s ==> %"PRId64"/%s "
- " ==> %"PRId32" (%s)",
- frame->root->unique, state->resolve2.ino,
- state->resolve2.par,
- state->resolve2.bname, state->resolve.par,
- state->resolve.bname,
- op_ret, strerror (op_errno));
- }
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_link_rsp);
-
- return 0;
-}
-
-int
-server_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
-{
- gfs3_truncate_rsp rsp = {0,};
- server_state_t *state = NULL;
- rpcsvc_request_t *req = NULL;
-
- req = frame->local;
- frame->local = NULL;
-
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
-
- state = CALL_STATE (frame);
-
- if (op_ret == 0) {
- gf_stat_from_iatt (&rsp.prestat, prebuf);
- gf_stat_from_iatt (&rsp.poststat, postbuf);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": TRUNCATE %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0,
- op_ret, strerror (op_errno));
- }
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_truncate_rsp);
-
- return 0;
-}
-
-int
-server_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *stbuf)
-{
- gfs3_fstat_rsp rsp = {0,};
- server_state_t *state = NULL;
- rpcsvc_request_t *req = NULL;
-
- req = frame->local;
- frame->local = NULL;
-
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
-
- state = CALL_STATE(frame);
-
- if (op_ret == 0) {
- gf_stat_from_iatt (&rsp.stat, stbuf);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": FSTAT %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->resolve.fd_no,
- state->fd ? state->fd->inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_fstat_rsp);
-
- return 0;
-}
-
-int
-server_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
-{
- gfs3_ftruncate_rsp rsp = {0};
- server_state_t *state = NULL;
- rpcsvc_request_t *req = NULL;
-
- req = frame->local;
- frame->local = NULL;
-
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
-
- state = CALL_STATE (frame);
-
- if (op_ret == 0) {
- gf_stat_from_iatt (&rsp.prestat, prebuf);
- gf_stat_from_iatt (&rsp.poststat, postbuf);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": FTRUNCATE %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->resolve.fd_no,
- state->fd ? state->fd->inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_ftruncate_rsp);
-
- return 0;
-}
-
-int
-server_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- gf_common_rsp rsp = {0,};
- server_state_t *state = NULL;
- rpcsvc_request_t *req = NULL;
-
- req = frame->local;
- frame->local = NULL;
-
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
-
- state = CALL_STATE(frame);
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": FLUSH %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->resolve.fd_no,
- state->fd ? state->fd->inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_common_rsp);
-
-
- return 0;
-}
-
-int
-server_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
-{
- gfs3_fsync_rsp rsp = {0,};
- server_state_t *state = NULL;
- rpcsvc_request_t *req = NULL;
-
- req = frame->local;
- frame->local = NULL;
-
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
-
- state = CALL_STATE(frame);
-
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": FSYNC %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->resolve.fd_no,
- state->fd ? state->fd->inode->ino : 0, op_ret,
- strerror (op_errno));
- } else {
- gf_stat_from_iatt (&(rsp.prestat), prebuf);
- gf_stat_from_iatt (&(rsp.poststat), postbuf);
- }
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_fsync_rsp);
-
- return 0;
-}
-
-int
-server_release_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- gf_common_rsp rsp = {0,};
- rpcsvc_request_t *req = NULL;
-
- req = frame->local;
- frame->local = NULL;
-
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_common_rsp);
- return 0;
-}
-
-
-int
-server_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
-{
- gfs3_write_rsp rsp = {0,};
- server_state_t *state = NULL;
- rpcsvc_request_t *req = NULL;
-
- req = frame->local;
- frame->local = NULL;
-
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
-
- state = CALL_STATE(frame);
- if (op_ret >= 0) {
- gf_stat_from_iatt (&rsp.prestat, prebuf);
- gf_stat_from_iatt (&rsp.poststat, postbuf);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": WRITEV %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->resolve.fd_no,
- state->fd ? state->fd->inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_writev_rsp);
-
- return 0;
-}
-
-
-int
-server_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iovec *vector, int32_t count,
- struct iatt *stbuf, struct iobref *iobref)
-{
- gfs3_read_rsp rsp = {0,};
- server_state_t *state = NULL;
- rpcsvc_request_t *req = NULL;
-
- req = frame->local;
- frame->local = NULL;
-
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
-
- state = CALL_STATE(frame);
- if (op_ret >= 0) {
- gf_stat_from_iatt (&rsp.stat, stbuf);
- rsp.size = op_ret;
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": READV %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->resolve.fd_no,
- state->fd ? state->fd->inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- server_submit_reply (frame, req, &rsp, vector, count, iobref,
- xdr_serialize_readv_rsp);
-
- return 0;
-}
-
-int
-server_checksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- uint8_t *fchecksum, uint8_t *dchecksum)
-{
- gfs3_checksum_rsp rsp = {0,};
- rpcsvc_request_t *req = NULL;
-
- req = frame->local;
- frame->local = NULL;
-
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
-
- if (op_ret >= 0) {
- rsp.fchecksum.fchecksum_val = (char *)fchecksum;
- rsp.fchecksum.fchecksum_len = NAME_MAX;
- rsp.dchecksum.dchecksum_val = (char *)dchecksum;
- rsp.dchecksum.dchecksum_len = NAME_MAX;
- }
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_checksum_rsp);
-
- return 0;
-}
-
-
-int
-server_rchecksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- uint32_t weak_checksum, uint8_t *strong_checksum)
-{
- gfs3_rchecksum_rsp rsp = {0,};
- rpcsvc_request_t *req = NULL;
-
- req = frame->local;
- frame->local = NULL;
-
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
-
- if (op_ret >= 0) {
- rsp.weak_checksum = weak_checksum;
-
- rsp.strong_checksum.strong_checksum_val = (char *)strong_checksum;
- rsp.strong_checksum.strong_checksum_len = MD5_DIGEST_LEN;
- }
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_rchecksum_rsp);
-
- return 0;
-}
-
-
-int
-server_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
-{
- server_connection_t *conn = NULL;
- server_state_t *state = NULL;
- rpcsvc_request_t *req = NULL;
- uint64_t fd_no = 0;
- gfs3_open_rsp rsp = {0,};
-
- conn = SERVER_CONNECTION (frame);
- state = CALL_STATE (frame);
-
- if (op_ret >= 0) {
- fd_bind (fd);
- fd_no = gf_fd_unused_get (conn->fdtable, fd);
- fd_ref (fd);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": OPEN %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0,
- op_ret, strerror (op_errno));
- }
-
- req = frame->local;
- frame->local = NULL;
-
- rsp.fd = fd_no;
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_open_rsp);
- return 0;
-}
-
-
-int
-server_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- fd_t *fd, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
-{
- server_connection_t *conn = NULL;
- server_state_t *state = NULL;
- inode_t *link_inode = NULL;
- rpcsvc_request_t *req = NULL;
- uint64_t fd_no = 0;
- gfs3_create_rsp rsp = {0,};
-
- conn = SERVER_CONNECTION (frame);
- state = CALL_STATE (frame);
-
- if (op_ret >= 0) {
- gf_log (state->conn->bound_xl->name, GF_LOG_TRACE,
- "%"PRId64": CREATE %"PRId64"/%s (%"PRId64")",
- frame->root->unique, state->loc.parent->ino,
- state->loc.name, stbuf->ia_ino);
-
- link_inode = inode_link (inode, state->loc.parent,
- state->loc.name, stbuf);
-
- if (link_inode != inode) {
- gf_log (this->name, GF_LOG_DEBUG,
- "create(%s) inode (ptr=%p, ino=%"PRId64", "
- "gen=%"PRId64") found conflict (ptr=%p, "
- "ino=%"PRId64", gen=%"PRId64")",
- state->loc.path, inode, inode->ino,
- inode->generation, link_inode,
- link_inode->ino, link_inode->generation);
-
- /*
- VERY racy code (if used anywhere else)
- -- don't do this without understanding
- */
-
- inode_unref (fd->inode);
- fd->inode = inode_ref (link_inode);
- }
-
- inode_lookup (link_inode);
- inode_unref (link_inode);
-
- fd_bind (fd);
-
- fd_no = gf_fd_unused_get (conn->fdtable, fd);
- fd_ref (fd);
-
- if ((fd_no < 0) || (fd == 0)) {
- op_ret = fd_no;
- op_errno = errno;
- }
-
- gf_stat_from_iatt (&rsp.stat, stbuf);
- gf_stat_from_iatt (&rsp.preparent, preparent);
- gf_stat_from_iatt (&rsp.postparent, postparent);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": CREATE %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0,
- op_ret, strerror (op_errno));
- }
-
- req = frame->local;
- frame->local = NULL;
-
- rsp.fd = fd_no;
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_create_rsp);
-
- return 0;
-}
-
-int
-server_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, const char *buf,
- struct iatt *stbuf)
-{
- gfs3_readlink_rsp rsp = {0,};
- server_state_t *state = NULL;
- rpcsvc_request_t *req = NULL;
-
- req = frame->local;
- frame->local = NULL;
-
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
-
-
- state = CALL_STATE(frame);
-
- if (op_ret >= 0) {
- gf_stat_from_iatt (&rsp.buf, stbuf);
- rsp.path = (char *)buf;
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": READLINK %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0,
- op_ret, strerror (op_errno));
- }
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_readlink_rsp);
-
- return 0;
-}
-
-int
-server_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *stbuf)
-{
- gfs3_stat_rsp rsp = {0,};
- server_state_t *state = NULL;
- rpcsvc_request_t *req = NULL;
-
- req = frame->local;
- frame->local = NULL;
-
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
-
- state = CALL_STATE (frame);
-
- if (op_ret == 0) {
- gf_stat_from_iatt (&rsp.stat, stbuf);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": STAT %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0,
- op_ret, strerror (op_errno));
- }
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_stat_rsp);
-
- return 0;
-}
-
-
-int
-server_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *statpre, struct iatt *statpost)
-{
- gfs3_setattr_rsp rsp = {0,};
- server_state_t *state = NULL;
- rpcsvc_request_t *req = NULL;
-
- req = frame->local;
- frame->local = NULL;
-
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
-
- state = CALL_STATE (frame);
-
- if (op_ret == 0) {
- gf_stat_from_iatt (&rsp.statpre, statpre);
- gf_stat_from_iatt (&rsp.statpost, statpost);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": SETATTR %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0,
- op_ret, strerror (op_errno));
- }
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_setattr_rsp);
-
- return 0;
-}
-
-int
-server_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *statpre, struct iatt *statpost)
-{
- gfs3_fsetattr_rsp rsp = {0,};
- server_state_t *state = NULL;
- rpcsvc_request_t *req = NULL;
-
- state = CALL_STATE (frame);
-
- if (op_ret == 0) {
- gf_stat_from_iatt (&rsp.statpre, statpre);
- gf_stat_from_iatt (&rsp.statpost, statpost);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": FSETATTR %"PRId64" (%"PRId64") ==> "
- "%"PRId32" (%s)",
- frame->root->unique, state->resolve.fd_no,
- state->fd ? state->fd->inode->ino : 0,
- op_ret, strerror (op_errno));
- }
-
- req = frame->local;
- frame->local = NULL;
-
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_fsetattr_rsp);
-
- return 0;
-}
-
-
-int
-server_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
-{
- gfs3_xattrop_rsp rsp = {0,};
- int32_t len = 0;
- int32_t ret = -1;
- server_state_t *state = NULL;
- rpcsvc_request_t *req = NULL;
-
- state = CALL_STATE (frame);
-
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": XATTROP %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0,
- op_ret, strerror (op_errno));
- goto out;
- }
-
- if ((op_ret >= 0) && dict) {
- len = dict_serialized_length (dict);
- if (len < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s (%"PRId64"): failed to get serialized length"
- " for reply dict",
- state->loc.path, state->loc.inode->ino);
- op_ret = -1;
- op_errno = EINVAL;
- len = 0;
- goto out;
- }
- rsp.dict.dict_val = GF_CALLOC (1, len, gf_server_mt_rsp_buf_t);
- if (!rsp.dict.dict_val) {
- op_ret = -1;
- op_errno = ENOMEM;
- len = 0;
- goto out;
- }
- ret = dict_serialize (dict, rsp.dict.dict_val);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s (%"PRId64"): failed to serialize reply dict",
- state->loc.path, state->loc.inode->ino);
- op_ret = -1;
- op_errno = -ret;
- len = 0;
- }
- }
-out:
- req = frame->local;
- frame->local = NULL;
-
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
- rsp.dict.dict_len = len;
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_xattrop_rsp);
-
- if (rsp.dict.dict_val)
- GF_FREE (rsp.dict.dict_val);
-
- return 0;
-}
-
-
-int
-server_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
-{
- gfs3_xattrop_rsp rsp = {0,};
- int32_t len = 0;
- int32_t ret = -1;
- server_state_t *state = NULL;
- rpcsvc_request_t *req = NULL;
-
- state = CALL_STATE(frame);
-
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": FXATTROP %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->resolve.fd_no,
- state->fd ? state->fd->inode->ino : 0, op_ret,
- strerror (op_errno));
- goto out;
- }
-
- if ((op_ret >= 0) && dict) {
- len = dict_serialized_length (dict);
- if (len < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "fd - %"PRId64" (%"PRId64"): failed to get "
- "serialized length for reply dict",
- state->resolve.fd_no, state->fd->inode->ino);
- op_ret = -1;
- op_errno = EINVAL;
- len = 0;
- goto out;
- }
- rsp.dict.dict_val = GF_CALLOC (1, len, gf_server_mt_rsp_buf_t);
- if (!rsp.dict.dict_val) {
- op_ret = -1;
- op_errno = ENOMEM;
- len = 0;
- goto out;
- }
- ret = dict_serialize (dict, rsp.dict.dict_val);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "fd - %"PRId64" (%"PRId64"): failed to "
- "serialize reply dict",
- state->resolve.fd_no, state->fd->inode->ino);
- op_ret = -1;
- op_errno = -ret;
- len = 0;
- }
- }
-out:
- req = frame->local;
- frame->local = NULL;
-
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
- rsp.dict.dict_len = len;
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_fxattrop_rsp);
-
- if (rsp.dict.dict_val)
- GF_FREE (rsp.dict.dict_val);
-
- return 0;
-}
-
-
-int
-server_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
-{
- gfs3_readdirp_rsp rsp = {0,};
- server_state_t *state = NULL;
- rpcsvc_request_t *req = NULL;
- int ret = 0;
-
- req = frame->local;
- frame->local = NULL;
-
- state = CALL_STATE(frame);
- if (op_ret > 0) {
- ret = serialize_rsp_direntp (entries, &rsp);
- if (ret == -1) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "%"PRId64": READDIRP %"PRId64" (%"PRId64") ==>"
- "%"PRId32" (%s)",
- frame->root->unique, state->resolve.fd_no,
- state->fd ? state->fd->inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
-out:
- rsp.gfs_id = req->gfs_id;
- rsp.op_ret = op_ret;
- rsp.op_errno = gf_errno_to_error (op_errno);
-
- server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
- xdr_serialize_readdirp_rsp);
-
- readdirp_rsp_cleanup (&rsp);
-
- return 0;
-}
-
-/* Resume function section */
-
-int
-server_rchecksum_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
- int op_ret = 0;
- int op_errno = 0;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0) {
- op_ret = state->resolve.op_ret;
- op_errno = state->resolve.op_errno;
- goto err;
- }
-
- STACK_WIND (frame, server_rchecksum_cbk, bound_xl,
- bound_xl->fops->rchecksum, state->fd,
- state->offset, state->size);
-
- return 0;
-err:
- server_rchecksum_cbk (frame, NULL, frame->this, -1, EINVAL, 0, NULL);
-
- return 0;
-
-}
-
-int
-server_checksum_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
- int op_ret = 0;
- int op_errno = 0;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0) {
- op_ret = state->resolve.op_ret;
- op_errno = state->resolve.op_errno;
- goto err;
- }
-
- STACK_WIND (frame, server_checksum_cbk, bound_xl,
- bound_xl->fops->checksum, &state->loc, state->flags);
-
- return 0;
-err:
- server_checksum_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL, NULL);
-
- return 0;
-}
-
-int
-server_lk_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_lk_cbk, bound_xl, bound_xl->fops->lk,
- state->fd, state->cmd, &state->flock);
-
- return 0;
-
-err:
- server_lk_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL);
- return 0;
-}
-
-int
-server_rename_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
- int op_ret = 0;
- int op_errno = 0;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0) {
- op_ret = state->resolve.op_ret;
- op_errno = state->resolve.op_errno;
- goto err;
- }
-
- if (state->resolve2.op_ret != 0) {
- op_ret = state->resolve2.op_ret;
- op_errno = state->resolve2.op_errno;
- goto err;
- }
-
- STACK_WIND (frame, server_rename_cbk,
- bound_xl, bound_xl->fops->rename,
- &state->loc, &state->loc2);
- return 0;
-err:
- server_rename_cbk (frame, NULL, frame->this, op_ret, op_errno,
- NULL, NULL, NULL, NULL, NULL);
- return 0;
-}
-
-
-int
-server_link_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
- int op_ret = 0;
- int op_errno = 0;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0) {
- op_ret = state->resolve.op_ret;
- op_errno = state->resolve.op_errno;
- goto err;
- }
-
- if (state->resolve2.op_ret != 0) {
- op_ret = state->resolve2.op_ret;
- op_errno = state->resolve2.op_errno;
- goto err;
- }
-
- state->loc2.inode = inode_ref (state->loc.inode);
-
- STACK_WIND (frame, server_link_cbk, bound_xl, bound_xl->fops->link,
- &state->loc, &state->loc2);
-
- return 0;
-err:
- server_link_cbk (frame, NULL, frame->this, op_ret, op_errno,
- NULL, NULL, NULL, NULL);
- return 0;
-}
-
-int
-server_symlink_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- state->loc.inode = inode_new (state->itable);
-
- STACK_WIND (frame, server_symlink_cbk,
- bound_xl, bound_xl->fops->symlink,
- state->name, &state->loc);
-
- return 0;
-err:
- server_symlink_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL, NULL, NULL, NULL);
- return 0;
-}
-
-
-int
-server_access_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_access_cbk,
- bound_xl, bound_xl->fops->access,
- &state->loc, state->mask);
- return 0;
-err:
- server_access_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno);
- return 0;
-}
-
-int
-server_fentrylk_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_fentrylk_cbk, bound_xl,
- bound_xl->fops->fentrylk,
- state->volume, state->fd, state->name,
- state->cmd, state->type);
-
- return 0;
-err:
- server_fentrylk_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno);
- return 0;
-}
-
-
-int
-server_entrylk_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_entrylk_cbk,
- bound_xl, bound_xl->fops->entrylk,
- state->volume, &state->loc, state->name,
- state->cmd, state->type);
- return 0;
-err:
- server_entrylk_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno);
- return 0;
-}
-
-
-int
-server_finodelk_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_finodelk_cbk, bound_xl,
- bound_xl->fops->finodelk,
- state->volume, state->fd, state->cmd, &state->flock);
-
- return 0;
-err:
- server_finodelk_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno);
-
- return 0;
-}
-
-int
-server_inodelk_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_inodelk_cbk,
- bound_xl, bound_xl->fops->inodelk,
- state->volume, &state->loc, state->cmd, &state->flock);
- return 0;
-err:
- server_inodelk_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno);
- return 0;
-}
-
-int
-server_rmdir_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_rmdir_cbk,
- bound_xl, bound_xl->fops->rmdir, &state->loc);
- return 0;
-err:
- server_rmdir_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL, NULL);
- return 0;
-}
-
-int
-server_mkdir_resume (call_frame_t *frame, xlator_t *bound_xl)
-
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- state->loc.inode = inode_new (state->itable);
-
- STACK_WIND (frame, server_mkdir_cbk,
- bound_xl, bound_xl->fops->mkdir,
- &(state->loc), state->mode);
-
- return 0;
-err:
- server_mkdir_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL, NULL, NULL, NULL);
- return 0;
-}
-
-
-int
-server_mknod_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- state->loc.inode = inode_new (state->itable);
-
- STACK_WIND (frame, server_mknod_cbk,
- bound_xl, bound_xl->fops->mknod,
- &(state->loc), state->mode, state->dev);
-
- return 0;
-err:
- server_mknod_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL, NULL, NULL, NULL);
- return 0;
-}
-
-
-int
-server_fsyncdir_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_fsyncdir_cbk,
- bound_xl,
- bound_xl->fops->fsyncdir,
- state->fd, state->flags);
- return 0;
-
-err:
- server_fsyncdir_cbk (frame, NULL, frame->this,
- state->resolve.op_ret,
- state->resolve.op_errno);
- return 0;
-}
-
-
-int
- server_readdir_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_readdir_cbk,
- bound_xl,
- bound_xl->fops->readdir,
- state->fd, state->size, state->offset);
-
- return 0;
-err:
- server_readdir_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL);
- return 0;
-}
-
-int
-server_readdirp_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_readdirp_cbk, bound_xl,
- bound_xl->fops->readdirp, state->fd, state->size,
- state->offset);
-
- return 0;
-err:
- server_readdirp_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL);
- return 0;
-}
-
-
-int
-server_opendir_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- state->fd = fd_create (state->loc.inode, frame->root->pid);
-
- STACK_WIND (frame, server_opendir_cbk,
- bound_xl, bound_xl->fops->opendir,
- &state->loc, state->fd);
- return 0;
-err:
- server_opendir_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL);
- return 0;
-}
-
-
-int
-server_statfs_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret !=0)
- goto err;
-
- STACK_WIND (frame, server_statfs_cbk,
- bound_xl, bound_xl->fops->statfs,
- &state->loc);
- return 0;
-
-err:
- server_statfs_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL);
- return 0;
-}
-
-
-int
-server_removexattr_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_removexattr_cbk,
- bound_xl, bound_xl->fops->removexattr,
- &state->loc, state->name);
- return 0;
-err:
- server_removexattr_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno);
- return 0;
-}
-
-int
-server_fgetxattr_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_fgetxattr_cbk,
- bound_xl, bound_xl->fops->fgetxattr,
- state->fd, state->name);
- return 0;
-err:
- server_fgetxattr_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL);
- return 0;
-}
-
-
-int
-server_xattrop_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_xattrop_cbk,
- bound_xl, bound_xl->fops->xattrop,
- &state->loc, state->flags, state->dict);
- return 0;
-err:
- server_xattrop_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL);
- return 0;
-}
-
-int
-server_fxattrop_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_fxattrop_cbk,
- bound_xl, bound_xl->fops->fxattrop,
- state->fd, state->flags, state->dict);
- return 0;
-err:
- server_fxattrop_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL);
- return 0;
-}
-
-int
-server_fsetxattr_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_setxattr_cbk,
- bound_xl, bound_xl->fops->fsetxattr,
- state->fd, state->dict, state->flags);
- return 0;
-err:
- server_fsetxattr_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno);
-
- return 0;
-}
-
-int
-server_unlink_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_unlink_cbk,
- bound_xl, bound_xl->fops->unlink,
- &state->loc);
- return 0;
-err:
- server_unlink_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL, NULL);
- return 0;
-}
-
-int
-server_truncate_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_truncate_cbk,
- bound_xl, bound_xl->fops->truncate,
- &state->loc, state->offset);
- return 0;
-err:
- server_truncate_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL, NULL);
- return 0;
-}
-
-
-
-int
-server_fstat_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_fstat_cbk,
- bound_xl, bound_xl->fops->fstat,
- state->fd);
- return 0;
-err:
- server_fstat_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL);
- return 0;
-}
-
-
-int
-server_setxattr_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_setxattr_cbk,
- bound_xl, bound_xl->fops->setxattr,
- &state->loc, state->dict, state->flags);
- return 0;
-err:
- server_setxattr_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno);
-
- return 0;
-}
-
-
-int
-server_getxattr_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_getxattr_cbk,
- bound_xl, bound_xl->fops->getxattr,
- &state->loc, state->name);
- return 0;
-err:
- server_getxattr_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL);
- return 0;
-}
-
-
-int
-server_ftruncate_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_ftruncate_cbk,
- bound_xl, bound_xl->fops->ftruncate,
- state->fd, state->offset);
- return 0;
-err:
- server_ftruncate_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL, NULL);
-
- return 0;
-}
-
-
-int
-server_flush_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_flush_cbk,
- bound_xl, bound_xl->fops->flush, state->fd);
- return 0;
-err:
- server_flush_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno);
-
- return 0;
-}
-
-
-int
-server_fsync_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_fsync_cbk,
- bound_xl, bound_xl->fops->fsync,
- state->fd, state->flags);
- return 0;
-err:
- server_fsync_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL, NULL);
-
- return 0;
-}
-
-int
-server_writev_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
- struct iovec iov = {0, };
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- iov.iov_len = state->size;
-
- if (state->iobuf) {
- iov.iov_base = state->iobuf->ptr;
- }
-
- STACK_WIND (frame, server_writev_cbk,
- bound_xl, bound_xl->fops->writev,
- state->fd, &iov, 1, state->offset, state->iobref);
-
- return 0;
-err:
- server_writev_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL, NULL);
- return 0;
-}
-
-
-int
-server_readv_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_readv_cbk,
- bound_xl, bound_xl->fops->readv,
- state->fd, state->size, state->offset);
-
- return 0;
-err:
- server_readv_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL, 0, NULL, NULL);
- return 0;
-}
-
-
-int
-server_create_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- state->loc.inode = inode_new (state->itable);
-
- state->fd = fd_create (state->loc.inode, frame->root->pid);
- state->fd->flags = state->flags;
-
- STACK_WIND (frame, server_create_cbk,
- bound_xl, bound_xl->fops->create,
- &(state->loc), state->flags, state->mode, state->fd);
-
- return 0;
-err:
- server_create_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL, NULL, NULL,
- NULL, NULL);
- return 0;
-}
-
-
-int
-server_open_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- state->fd = fd_create (state->loc.inode, frame->root->pid);
- state->fd->flags = state->flags;
-
- STACK_WIND (frame, server_open_cbk,
- bound_xl, bound_xl->fops->open,
- &state->loc, state->flags, state->fd, 0);
-
- return 0;
-err:
- server_open_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL);
- return 0;
-}
-
-
-int
-server_readlink_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_readlink_cbk,
- bound_xl, bound_xl->fops->readlink,
- &state->loc, state->size);
- return 0;
-err:
- server_readlink_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL, NULL);
- return 0;
-}
-
-
-int
-server_fsetattr_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_fsetattr_cbk,
- bound_xl, bound_xl->fops->fsetattr,
- state->fd, &state->stbuf, state->valid);
- return 0;
-err:
- server_fsetattr_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL, NULL);
-
- return 0;
-}
-
-
-int
-server_setattr_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_setattr_cbk,
- bound_xl, bound_xl->fops->setattr,
- &state->loc, &state->stbuf, state->valid);
- return 0;
-err:
- server_setattr_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL, NULL);
-
- return 0;
-}
-
-
-int
-server_stat_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- STACK_WIND (frame, server_stat_cbk,
- bound_xl, bound_xl->fops->stat, &state->loc);
- return 0;
-err:
- server_stat_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL);
- return 0;
-}
-
-int
-server_lookup_resume (call_frame_t *frame, xlator_t *bound_xl)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->resolve.op_ret != 0)
- goto err;
-
- if (!state->loc.inode)
- state->loc.inode = inode_new (state->itable);
- else
- state->is_revalidate = 1;
-
- STACK_WIND (frame, server_lookup_cbk,
- bound_xl, bound_xl->fops->lookup,
- &state->loc, state->dict);
-
- return 0;
-err:
- server_lookup_cbk (frame, NULL, frame->this, state->resolve.op_ret,
- state->resolve.op_errno, NULL, NULL, NULL, NULL);
-
- return 0;
-}
-
-
-
-
-/* Fop section */
-
-int
-server_stat (rpcsvc_request_t *req)
-{
- server_state_t *state = NULL;
- call_frame_t *frame = NULL;
- gfs3_stat_req args = {0,};
- char path[SERVER_PATH_MAX] = {0,};
-
- if (!req)
- return 0;
-
- /* Initialize args first, then decode */
- args.path = path;
-
- if (!xdr_to_stat_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS; /* TODO */
- goto out;
- }
-
- state = CALL_STATE (frame);
- {
- state->resolve.type = RESOLVE_MUST;
- state->resolve.ino = args.ino;
- state->resolve.gen = args.gen;
- state->resolve.path = gf_strdup (args.path);
- }
-
- resolve_and_resume (frame, server_stat_resume);
-out:
- return 0;
-}
-
-
-int
-server_setattr (rpcsvc_request_t *req)
-{
- server_state_t *state = NULL;
- call_frame_t *frame = NULL;
- gfs3_setattr_req args = {0,};
- char path[SERVER_PATH_MAX] = {0,};
-
- if (!req)
- return 0;
-
- args.path = path;
-
- if (!xdr_to_setattr_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS; /* TODO */
- goto out;
- }
-
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.ino = args.ino;
- state->resolve.gen = args.gen;
- state->resolve.path = gf_strdup (args.path);
-
- gf_stat_to_iatt (&args.stbuf, &state->stbuf);
- state->valid = args.valid;
-
- resolve_and_resume (frame, server_setattr_resume);
-out:
- return 0;
-}
-
-
-int
-server_fsetattr (rpcsvc_request_t *req)
-{
- server_state_t *state = NULL;
- call_frame_t *frame = NULL;
- gfs3_fsetattr_req args = {0,};
-
- if (!req)
- return 0;
-
- if (!xdr_to_fsetattr_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS; /* TODO */
- goto out;
- }
-
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.fd_no = args.fd;
-
- gf_stat_to_iatt (&args.stbuf, &state->stbuf);
- state->valid = args.valid;
-
- resolve_and_resume (frame, server_fsetattr_resume);
-out:
- return 0;
-}
-
-
-int
-server_readlink (rpcsvc_request_t *req)
-{
- server_state_t *state = NULL;
- call_frame_t *frame = NULL;
- gfs3_readlink_req args = {0,};
- char path[SERVER_PATH_MAX] = {0,};
-
- if (!req)
- return 0;
-
- args.path = path;
-
- if (!xdr_to_readlink_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS; /* TODO */
- goto out;
- }
-
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.ino = args.ino;
- state->resolve.gen = args.gen;
- state->resolve.path = gf_strdup (args.path);
-
- state->size = args.size;
-
- resolve_and_resume (frame, server_readlink_resume);
-out:
- return 0;
-}
-
-
-int
-server_create (rpcsvc_request_t *req)
-{
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
- call_frame_t *frame = NULL;
- gfs3_create_req args = {0,};
- char path[SERVER_PATH_MAX] = {0,};
- char bname[SERVER_PATH_MAX] = {0,};
-
- if (!req)
- return 0;
-
- conn = req->conn->trans->xl_private;
-
- args.path = path;
- args.bname = bname;
-
- if (!xdr_to_create_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS; /* TODO */
- goto out;
- }
-
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_NOT;
- state->resolve.par = args.par;
- state->resolve.gen = args.gen;
- state->resolve.path = gf_strdup (args.path);
- state->resolve.bname = gf_strdup (args.bname);
- state->mode = args.mode;
- state->flags = gf_flags_to_flags (args.flags);
-
- resolve_and_resume (frame, server_create_resume);
-out:
- return 0;
-}
-
-
-int
-server_open (rpcsvc_request_t *req)
-{
- server_state_t *state = NULL;
- call_frame_t *frame = NULL;
- gfs3_open_req args = {0,};
- char path[SERVER_PATH_MAX] = {0,};
-
- if (!req)
- return 0;
-
- args.path = path;
-
- if (!xdr_to_open_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS; /* TODO */
- goto out;
- }
-
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.ino = args.ino;
- state->resolve.gen = args.gen;
- state->resolve.path = gf_strdup (args.path);
-
- state->flags = gf_flags_to_flags (args.flags);
-
- resolve_and_resume (frame, server_open_resume);
-out:
- return 0;
-}
-
-
-int
-server_readv (rpcsvc_request_t *req)
-{
- server_state_t *state = NULL;
- call_frame_t *frame = NULL;
- gfs3_read_req args = {0,};
-
- if (!req)
- goto out;
-
- if (!xdr_to_readv_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS; /* TODO */
- goto out;
- }
-
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.fd_no = args.fd;
- state->size = args.size;
- state->offset = args.offset;
-
- resolve_and_resume (frame, server_readv_resume);
-out:
- return 0;
-}
-
-
-int
-server_writev (rpcsvc_request_t *req)
-{
- /* TODO : */
- assert (0);
- return 0;
-}
-
-
-int
-server_writev_vec (rpcsvc_request_t *req, struct iobuf *iobuf)
-{
- server_state_t *state = NULL;
- struct iobref *iobref = NULL;
- call_frame_t *frame = NULL;
- gfs3_write_req args = {0,};
-
- if (!req)
- return 0;
-
- if (!xdr_to_writev_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS; /* TODO */
- goto out;
- }
-
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.fd_no = args.fd;
- state->offset = args.offset;
-
- if (iobuf) {
- iobref = iobref_new ();
- iobref_add (iobref, iobuf);
-
- state->iobref = iobref;
- state->iobuf = iobuf_ref (iobuf);
-
- state->size = req->msg[1].iov_len;
- }
-
- resolve_and_resume (frame, server_writev_resume);
-out:
- return 0;
-}
-
-
-int
-server_release (rpcsvc_request_t *req)
-{
- server_connection_t *conn = NULL;
- gfs3_release_req args = {0,};
- gf_common_rsp rsp = {0,};
-
- if (!xdr_to_release_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- conn = req->conn->trans->xl_private;
- gf_fd_put (conn->fdtable, args.fd);
-
- server_submit_reply (NULL, req, &rsp, NULL, 0, NULL,
- xdr_serialize_common_rsp);
-out:
- return 0;
-}
-
-int
-server_releasedir (rpcsvc_request_t *req)
-{
- server_connection_t *conn = NULL;
- gfs3_releasedir_req args = {0,};
- gf_common_rsp rsp = {0,};
-
- if (!xdr_to_release_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- conn = req->conn->trans->xl_private;
- gf_fd_put (conn->fdtable, args.fd);
-
- server_submit_reply (NULL, req, &rsp, NULL, 0, NULL,
- xdr_serialize_common_rsp);
-out:
- return 0;
-}
-
-
-int
-server_fsync (rpcsvc_request_t *req)
-{
- server_state_t *state = NULL;
- call_frame_t *frame = NULL;
- gfs3_fsync_req args = {0,};
-
- if (!req)
- return 0;
-
- if (!xdr_to_fsync_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS; /* TODO */
- goto out;
- }
-
-
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.fd_no = args.fd;
- state->flags = args.data;
-
- resolve_and_resume (frame, server_fsync_resume);
-out:
- return 0;
-}
-
-
-
-int
-server_flush (rpcsvc_request_t *req)
-{
- server_state_t *state = NULL;
- call_frame_t *frame = NULL;
- gfs3_flush_req args = {0,};
-
- if (!req)
- return 0;
-
- if (!xdr_to_flush_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS; /* TODO */
- goto out;
- }
-
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.fd_no = args.fd;
-
- resolve_and_resume (frame, server_flush_resume);
-out:
- return 0;
-}
-
-
-
-int
-server_ftruncate (rpcsvc_request_t *req)
-{
- server_state_t *state = NULL;
- call_frame_t *frame = NULL;
- gfs3_ftruncate_req args = {0,};
-
- if (!req)
- return 0;
-
- if (!xdr_to_ftruncate_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS; /* TODO */
- goto out;
- }
-
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.fd_no = args.fd;
- state->offset = args.offset;
-
- resolve_and_resume (frame, server_ftruncate_resume);
-out:
- return 0;
-}
-
-
-int
-server_fstat (rpcsvc_request_t *req)
-{
- server_state_t *state = NULL;
- call_frame_t *frame = NULL;
- gfs3_write_req args = {0,};
-
- if (!req)
- return 0;
-
- if (!xdr_to_fstat_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS; /* TODO */
- goto out;
- }
-
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.fd_no = args.fd;
-
- resolve_and_resume (frame, server_fstat_resume);
-out:
- return 0;
-}
-
-
-int
-server_truncate (rpcsvc_request_t *req)
-{
- server_state_t *state = NULL;
- call_frame_t *frame = NULL;
- gfs3_truncate_req args = {0,};
- char path[SERVER_PATH_MAX] = {0,};
-
- if (!req)
- return 0;
-
- args.path = path;
- if (!xdr_to_truncate_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS; /* TODO */
- goto out;
- }
-
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.path = gf_strdup (args.path);
- state->resolve.ino = args.ino;
- state->resolve.gen = args.gen;
- state->offset = args.offset;
-
- resolve_and_resume (frame, server_truncate_resume);
-out:
- return 0;
-}
-
-
-
-int
-server_unlink (rpcsvc_request_t *req)
-{
- server_state_t *state = NULL;
- call_frame_t *frame = NULL;
- gfs3_unlink_req args = {0,};
- char path[SERVER_PATH_MAX] = {0,};
- char bname[SERVER_PATH_MAX] = {0,};
-
- if (!req)
- return 0;
-
- args.path = path;
- args.bname = bname;
-
- if (!xdr_to_unlink_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS; /* TODO */
- goto out;
- }
-
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.par = args.par;
- state->resolve.gen = args.gen;
- state->resolve.path = gf_strdup (args.path);
- state->resolve.bname = gf_strdup (args.bname);
-
- resolve_and_resume (frame, server_unlink_resume);
-out:
- return 0;
-}
-
-
-int
-server_setxattr (rpcsvc_request_t *req)
-{
- server_state_t *state = NULL;
- dict_t *dict = NULL;
- call_frame_t *frame = NULL;
- server_connection_t *conn = NULL;
- char *buf = NULL;
- gfs3_setxattr_req args = {0,};
- char path[SERVER_PATH_MAX] = {0,};
- char dict_val[(16 * 1024)] = {0, };
- int32_t ret = -1;
-
- if (!req)
- return 0;
-
- conn = req->conn->trans->xl_private;
-
- args.path = path;
- args.dict.dict_val = dict_val;
-
- if (!xdr_to_setxattr_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS; /* TODO */
- goto out;
- }
-
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.path = gf_strdup (args.path);
- state->resolve.ino = args.ino;
- state->resolve.gen = args.gen;
- state->flags = args.flags;
-
- if (args.dict.dict_len) {
- dict = dict_new ();
- buf = memdup (args.dict.dict_val, args.dict.dict_len);
- GF_VALIDATE_OR_GOTO (conn->bound_xl->name, buf, out);
-
- ret = dict_unserialize (buf, args.dict.dict_len, &dict);
- if (ret < 0) {
- gf_log (conn->bound_xl->name, GF_LOG_ERROR,
- "%"PRId64": %s (%"PRId64"): failed to "
- "unserialize request buffer to dictionary",
- frame->root->unique, state->loc.path,
- state->resolve.ino);
- goto err;
- }
-
- dict->extra_free = buf;
- buf = NULL;
-
- state->dict = dict;
- }
-
- resolve_and_resume (frame, server_setxattr_resume);
-
- return 0;
-err:
- if (dict)
- dict_unref (dict);
-
- server_setxattr_cbk (frame, NULL, frame->this, -1, EINVAL);
-out:
- if (buf)
- GF_FREE (buf);
- return 0;
-
-}
-
-
-
-int
-server_fsetxattr (rpcsvc_request_t *req)
-{
- server_state_t *state = NULL;
- dict_t *dict = NULL;
- server_connection_t *conn = NULL;
- call_frame_t *frame = NULL;
- char *buf = NULL;
- gfs3_fsetxattr_req args = {0,};
- char dict_val[(16 *1024)] = {0,};
- int32_t ret = -1;
-
- if (!req)
- return 0;
-
- conn = req->conn->trans->xl_private;
-
- args.dict.dict_val = dict_val;
- if (!xdr_to_fsetxattr_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS; /* TODO */
- goto out;
- }
-
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.fd_no = args.fd;
- state->flags = args.flags;
-
- if (args.dict.dict_len) {
- dict = dict_new ();
- buf = memdup (args.dict.dict_val, args.dict.dict_len);
- GF_VALIDATE_OR_GOTO (conn->bound_xl->name, buf, out);
-
- ret = dict_unserialize (buf, args.dict.dict_len, &dict);
- if (ret < 0) {
- gf_log (conn->bound_xl->name, GF_LOG_ERROR,
- "%"PRId64": %s (%"PRId64"): failed to "
- "unserialize request buffer to dictionary",
- frame->root->unique, state->loc.path,
- state->resolve.ino);
- goto err;
- }
- dict->extra_free = buf;
- buf = NULL;
- state->dict = dict;
- }
-
- resolve_and_resume (frame, server_fsetxattr_resume);
-
- return 0;
-err:
- if (dict)
- dict_unref (dict);
-
- server_setxattr_cbk (frame, NULL, frame->this, -1, EINVAL);
-out:
- if (buf)
- GF_FREE (buf);
- return 0;
-}
-
-
-
-int
-server_fxattrop (rpcsvc_request_t *req)
-{
- dict_t *dict = NULL;
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
- call_frame_t *frame = NULL;
- char *buf = NULL;
- gfs3_fxattrop_req args = {0,};
- char dict_val[(16 *1024)] = {0,};
- int32_t ret = -1;
-
- if (!req)
- return 0;
-
- conn = req->conn->trans->xl_private;
-
- args.dict.dict_val = dict_val;
- if (!xdr_to_fxattrop_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS; /* TODO */
- goto out;
- }
-
- state = CALL_STATE(frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.fd_no = args.fd;
-
- state->resolve.ino = args.ino;
- state->resolve.gen = args.gen;
- state->flags = args.flags;
-
- if (args.dict.dict_len) {
- /* Unserialize the dictionary */
- dict = dict_new ();
-
- buf = memdup (args.dict.dict_val, args.dict.dict_len);
- GF_VALIDATE_OR_GOTO (conn->bound_xl->name, buf, out);
-
- ret = dict_unserialize (buf, args.dict.dict_len, &dict);
- if (ret < 0) {
- gf_log (conn->bound_xl->name, GF_LOG_ERROR,
- "fd - %"PRId64" (%"PRId64"): failed to unserialize "
- "request buffer to dictionary",
- state->resolve.fd_no, state->fd->inode->ino);
- goto fail;
- }
- dict->extra_free = buf;
- buf = NULL;
-
- state->dict = dict;
- }
-
- resolve_and_resume (frame, server_fxattrop_resume);
-
- return 0;
-
-fail:
- if (dict)
- dict_unref (dict);
-
- server_fxattrop_cbk (frame, NULL, frame->this, -1, EINVAL, NULL);
-out:
- return 0;
-}
-
-
-
-int
-server_xattrop (rpcsvc_request_t *req)
-{
- dict_t *dict = NULL;
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
- call_frame_t *frame = NULL;
- char *buf = NULL;
- gfs3_xattrop_req args = {0,};
- char dict_val[(16 *1024)] = {0,};
- char path[SERVER_PATH_MAX] = {0,};
- int32_t ret = -1;
-
- if (!req)
- return 0;
-
- conn = req->conn->trans->xl_private;
- args.dict.dict_val = dict_val;
- args.path = path;
-
- if (!xdr_to_xattrop_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS; /* TODO */
- goto out;
- }
-
- state = CALL_STATE(frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.path = gf_strdup (args.path);
- state->resolve.ino = args.ino;
- state->resolve.gen = args.gen;
- state->flags = args.flags;
-
- if (args.dict.dict_len) {
- /* Unserialize the dictionary */
- dict = dict_new ();
-
- buf = memdup (args.dict.dict_val, args.dict.dict_len);
- GF_VALIDATE_OR_GOTO (conn->bound_xl->name, buf, out);
-
- ret = dict_unserialize (buf, args.dict.dict_len, &dict);
- if (ret < 0) {
- gf_log (conn->bound_xl->name, GF_LOG_ERROR,
- "fd - %"PRId64" (%"PRId64"): failed to unserialize "
- "request buffer to dictionary",
- state->resolve.fd_no, state->fd->inode->ino);
- goto fail;
- }
- dict->extra_free = buf;
- buf = NULL;
-
- state->dict = dict;
- }
-
- resolve_and_resume (frame, server_xattrop_resume);
-
- return 0;
-fail:
- if (dict)
- dict_unref (dict);
-
- server_xattrop_cbk (frame, NULL, frame->this, -1, EINVAL, NULL);
-out:
- return 0;
-}
-
-
-int
-server_getxattr (rpcsvc_request_t *req)
-{
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
- call_frame_t *frame = NULL;
- gfs3_getxattr_req args = {0,};
- char path[SERVER_PATH_MAX] = {0,};
- char name[4096] = {0,};
-
- if (!req)
- return 0;
-
- conn = req->conn->trans->xl_private;
-
- args.path = path;
- args.name = name;
-
- if (!xdr_to_getxattr_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS; /* TODO */
- goto out;
- }
-
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.path = gf_strdup (args.path);
- state->resolve.ino = args.ino;
- state->resolve.gen = args.gen;
-
- if (args.namelen)
- state->name = gf_strdup (args.name);
-
- resolve_and_resume (frame, server_getxattr_resume);
-out:
- return 0;
-}
-
-
-int
-server_fgetxattr (rpcsvc_request_t *req)
-{
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
- call_frame_t *frame = NULL;
- gfs3_fgetxattr_req args = {0,};
- char name[4096] = {0,};
-
- if (!req)
- return 0;
-
- conn = req->conn->trans->xl_private;
-
- args.name = name;
- if (!xdr_to_fgetxattr_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS; /* TODO */
- goto out;
- }
-
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.fd_no = args.fd;
-
- if (args.namelen)
- state->name = gf_strdup (args.name);
-
- resolve_and_resume (frame, server_fgetxattr_resume);
-out:
- return 0;
-}
-
-
-
-int
-server_removexattr (rpcsvc_request_t *req)
-{
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
- call_frame_t *frame = NULL;
- gfs3_removexattr_req args = {0,};
- char path[SERVER_PATH_MAX] = {0,};
- char name[4096] = {0,};
-
- if (!req)
- return 0;
-
- conn = req->conn->trans->xl_private;
-
- args.path = path;
- args.name = name;
- if (!xdr_to_removexattr_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS; /* TODO */
- goto out;
- }
-
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.path = gf_strdup (args.path);
- state->resolve.ino = args.ino;
- state->resolve.gen = args.gen;
- state->name = gf_strdup (args.name);
-
- resolve_and_resume (frame, server_removexattr_resume);
-out:
- return 0;
-}
-
-
-
-
-int
-server_opendir (rpcsvc_request_t *req)
-{
- server_state_t *state = NULL;
- call_frame_t *frame = NULL;
- gfs3_opendir_req args = {0,};
- char path[SERVER_PATH_MAX] = {0,};
-
- if (!req)
- return 0;
-
- args.path = path;
-
- if (!xdr_to_opendir_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS; /* TODO */
- goto out;
- }
-
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.path = gf_strdup (args.path);
- state->resolve.ino = args.ino;
- state->resolve.gen = args.gen;
-
- resolve_and_resume (frame, server_opendir_resume);
-out:
- return 0;
-}
-
-
-int
-server_readdirp (rpcsvc_request_t *req)
-{
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
- call_frame_t *frame = NULL;
- gfs3_readdirp_req args = {0,};
-
- if (!req)
- return 0;
-
- conn = req->conn->trans->xl_private;
-
- if (!xdr_to_readdirp_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS; /* TODO */
- goto out;
- }
-
- state = CALL_STATE(frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.fd_no = args.fd;
- state->size = args.size;
- state->offset = args.offset;
-
- resolve_and_resume (frame, server_readdirp_resume);
-out:
- return 0;
-}
-
-int
-server_readdir (rpcsvc_request_t *req)
-{
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
- call_frame_t *frame = NULL;
- gfs3_readdir_req args = {0,};
-
- if (!req)
- return 0;
-
- conn = req->conn->trans->xl_private;
-
- if (!xdr_to_readdir_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS; /* TODO */
- goto out;
- }
-
- state = CALL_STATE(frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.fd_no = args.fd;
- state->size = args.size;
- state->offset = args.offset;
-
- resolve_and_resume (frame, server_readdir_resume);
-out:
- return 0;
-}
-
-int
-server_fsyncdir (rpcsvc_request_t *req)
-{
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
- call_frame_t *frame = NULL;
- gfs3_fsyncdir_req args = {0,};
-
- if (!req)
- return 0;
-
- conn = req->conn->trans->xl_private;
-
- if (!xdr_to_fsyncdir_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS; /* TODO */
- goto out;
- }
-
- state = CALL_STATE(frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.fd_no = args.fd;
- state->flags = args.data;
-
- resolve_and_resume (frame, server_fsyncdir_resume);
-out:
- return 0;
-}
-
-
-
-int
-server_mknod (rpcsvc_request_t *req)
-{
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
- call_frame_t *frame = NULL;
- gfs3_mknod_req args = {0,};
- char bname[SERVER_PATH_MAX] = {0,};
- char path[SERVER_PATH_MAX] = {0,};
-
- if (!req)
- return 0;
-
- conn = req->conn->trans->xl_private;
- args.path = path;
- args.bname = bname;
-
- if (!xdr_to_mknod_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS; /* TODO */
- goto out;
- }
-
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_NOT;
- state->resolve.par = args.par;
- state->resolve.gen = args.gen;
- state->resolve.path = gf_strdup (args.path);
- state->resolve.bname = gf_strdup (args.bname);
-
- state->mode = args.mode;
- state->dev = args.dev;
-
- resolve_and_resume (frame, server_mknod_resume);
-out:
- return 0;
-}
-
-
-int
-server_mkdir (rpcsvc_request_t *req)
-{
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
- call_frame_t *frame = NULL;
- gfs3_mkdir_req args = {0,};
- char bname[SERVER_PATH_MAX] = {0,};
- char path[SERVER_PATH_MAX] = {0,};
-
- if (!req)
- return 0;
-
- conn = req->conn->trans->xl_private;
- args.path = path;
- args.bname = bname;
-
- if (!xdr_to_mkdir_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS; /* TODO */
- goto out;
- }
-
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_NOT;
- state->resolve.par = args.par;
- state->resolve.gen = args.gen;
- state->resolve.path = gf_strdup (args.path);
- state->resolve.bname = gf_strdup (args.bname);
-
- state->mode = args.mode;
-
- resolve_and_resume (frame, server_mkdir_resume);
-out:
- return 0;
-}
-
-
-int
-server_rmdir (rpcsvc_request_t *req)
-{
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
- call_frame_t *frame = NULL;
- gfs3_rmdir_req args = {0,};
- char bname[SERVER_PATH_MAX] = {0,};
- char path[SERVER_PATH_MAX] = {0,};
-
- if (!req)
- return 0;
-
- conn = req->conn->trans->xl_private;
- args.path = path;
- args.bname = bname;
-
- if (!xdr_to_rmdir_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS; /* TODO */
- goto out;
- }
-
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.par = args.par;
- state->resolve.gen = args.gen;
- state->resolve.path = gf_strdup (args.path);
- state->resolve.bname = gf_strdup (args.bname);
-
- resolve_and_resume (frame, server_rmdir_resume);
-out:
- return 0;
-}
-
-
-
-int
-server_inodelk (rpcsvc_request_t *req)
-{
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
- call_frame_t *frame = NULL;
- gfs3_inodelk_req args = {0,};
- char path[SERVER_PATH_MAX] = {0,};
- char volume[4096] = {0,};
- int cmd = 0;
-
- if (!req)
- return 0;
-
- conn = req->conn->trans->xl_private;
- args.path = path;
- args.volume = volume;
-
- if (!xdr_to_inodelk_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS; /* TODO */
- goto out;
- }
-
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_EXACT;
- state->resolve.ino = args.ino;
- state->resolve.gen = args.gen;
- state->resolve.path = gf_strdup (args.path);
-
- cmd = args.cmd;
- switch (cmd) {
- case GF_LK_GETLK:
- state->cmd = F_GETLK;
- break;
- case GF_LK_SETLK:
- state->cmd = F_SETLK;
- break;
- case GF_LK_SETLKW:
- state->cmd = F_SETLKW;
- break;
- }
-
- state->type = args.type;
- state->volume = gf_strdup (args.volume);
-
- gf_flock_to_flock (&args.flock, &state->flock);
-
- switch (state->type) {
- case GF_LK_F_RDLCK:
- state->flock.l_type = F_RDLCK;
- break;
- case GF_LK_F_WRLCK:
- state->flock.l_type = F_WRLCK;
- break;
- case GF_LK_F_UNLCK:
- state->flock.l_type = F_UNLCK;
- break;
- }
-
- resolve_and_resume (frame, server_inodelk_resume);
-out:
- return 0;
-}
-
-int
-server_finodelk (rpcsvc_request_t *req)
-{
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
- call_frame_t *frame = NULL;
- gfs3_finodelk_req args = {0,};
- char volume[4096] = {0,};
-
- if (!req)
- return 0;
-
- conn = req->conn->trans->xl_private;
-
- args.volume = volume;
- if (!xdr_to_finodelk_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS; /* TODO */
- goto out;
- }
-
- state = CALL_STATE(frame);
-
- state->resolve.type = RESOLVE_EXACT;
- state->volume = gf_strdup (args.volume);
- state->resolve.fd_no = args.fd;
- state->cmd = args.cmd;
-
- switch (state->cmd) {
- case GF_LK_GETLK:
- state->cmd = F_GETLK;
- break;
- case GF_LK_SETLK:
- state->cmd = F_SETLK;
- break;
- case GF_LK_SETLKW:
- state->cmd = F_SETLKW;
- break;
- }
-
- state->type = args.type;
-
- gf_flock_to_flock (&args.flock, &state->flock);
-
- switch (state->type) {
- case GF_LK_F_RDLCK:
- state->flock.l_type = F_RDLCK;
- break;
- case GF_LK_F_WRLCK:
- state->flock.l_type = F_WRLCK;
- break;
- case GF_LK_F_UNLCK:
- state->flock.l_type = F_UNLCK;
- break;
- }
-
- resolve_and_resume (frame, server_finodelk_resume);
-out:
- return 0;
-}
-
-
-int
-server_entrylk (rpcsvc_request_t *req)
-{
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
- call_frame_t *frame = NULL;
- gfs3_entrylk_req args = {0,};
- char path[SERVER_PATH_MAX] = {0,};
- char name[4096] = {0,};
- char volume[4096] = {0,};
-
- if (!req)
- return 0;
-
- args.path = path;
- args.volume = volume;
- args.name = name;
-
- conn = req->conn->trans->xl_private;
-
- if (!xdr_to_entrylk_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS; /* TODO */
- goto out;
- }
-
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_EXACT;
- state->resolve.path = gf_strdup (args.path);
- state->resolve.ino = args.ino;
- state->resolve.gen = args.gen;
-
- if (args.namelen)
- state->name = gf_strdup (args.name);
- state->volume = gf_strdup (args.volume);
-
- state->cmd = args.cmd;
- state->type = args.type;
-
- resolve_and_resume (frame, server_entrylk_resume);
-out:
- return 0;
-}
-
-int
-server_fentrylk (rpcsvc_request_t *req)
-{
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
- call_frame_t *frame = NULL;
- gfs3_fentrylk_req args = {0,};
- char name[4096] = {0,};
- char volume[4096] = {0,};
-
- if (!req)
- return 0;
-
- conn = req->conn->trans->xl_private;
-
- args.name = name;
- args.volume = volume;
- if (!xdr_to_fentrylk_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS; /* TODO */
- goto out;
- }
-
- state = CALL_STATE(frame);
-
- state->resolve.type = RESOLVE_EXACT;
- state->resolve.fd_no = args.fd;
- state->cmd = args.cmd;
- state->type = args.type;
-
- if (args.namelen)
- state->name = gf_strdup (args.name);
- state->volume = gf_strdup (args.volume);
-
- resolve_and_resume (frame, server_fentrylk_resume);
-out:
- return 0;
-}
-
-int
-server_access (rpcsvc_request_t *req)
-{
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
- call_frame_t *frame = NULL;
- gfs3_access_req args = {0,};
- char path[SERVER_PATH_MAX] = {0,};
-
- if (!req)
- return 0;
-
- conn = req->conn->trans->xl_private;
-
- args.path = path;
- if (!xdr_to_access_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS; /* TODO */
- goto out;
- }
-
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.ino = args.ino;
- state->resolve.gen = args.gen;
- state->resolve.path = gf_strdup (args.path);
- state->mask = args.mask;
-
- resolve_and_resume (frame, server_access_resume);
-out:
- return 0;
-}
-
-
-
-int
-server_symlink (rpcsvc_request_t *req)
-{
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
- call_frame_t *frame = NULL;
- gfs3_symlink_req args = {0,};
- char linkname[4096] = {0,};
- char path[SERVER_PATH_MAX] = {0,};
- char bname[4096] = {0,};
-
- if (!req)
- return 0;
-
- conn = req->conn->trans->xl_private;
- args.path = path;
- args.bname = bname;
- args.linkname = linkname;
-
- if (!xdr_to_symlink_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS; /* TODO */
- goto out;
- }
-
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_NOT;
- state->resolve.par = args.par;
- state->resolve.gen = args.gen;
- state->resolve.path = gf_strdup (args.path);
- state->resolve.bname = gf_strdup (args.bname);
- state->name = gf_strdup (args.linkname);
-
- resolve_and_resume (frame, server_symlink_resume);
-out:
- return 0;
-}
-
-
-
-int
-server_link (rpcsvc_request_t *req)
-{
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
- call_frame_t *frame = NULL;
- gfs3_link_req args = {0,};
- char oldpath[SERVER_PATH_MAX] = {0,};
- char newpath[SERVER_PATH_MAX] = {0,};
- char newbname[SERVER_PATH_MAX] = {0,};
-
- if (!req)
- return 0;
-
- conn = req->conn->trans->xl_private;
-
- args.oldpath = oldpath;
- args.newpath = newpath;
- args.newbname = newbname;
-
- if (!xdr_to_link_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS; /* TODO */
- goto out;
- }
-
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.path = gf_strdup (args.oldpath);
- state->resolve.ino = args.oldino;
- state->resolve.gen = args.oldgen;
-
- state->resolve2.type = RESOLVE_NOT;
- state->resolve2.path = gf_strdup (args.newpath);
- state->resolve2.bname = gf_strdup (args.newbname);
- state->resolve2.par = args.newpar;
- state->resolve2.gen = args.newgen;
-
- resolve_and_resume (frame, server_link_resume);
-out:
- return 0;
-}
-
-
-int
-server_rename (rpcsvc_request_t *req)
-{
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
- call_frame_t *frame = NULL;
- gfs3_rename_req args = {0,};
- char oldpath[SERVER_PATH_MAX] = {0,};
- char oldbname[SERVER_PATH_MAX] = {0,};
- char newpath[SERVER_PATH_MAX] = {0,};
- char newbname[SERVER_PATH_MAX] = {0,};
-
- if (!req)
- return 0;
-
- conn = req->conn->trans->xl_private;
-
- args.oldpath = oldpath;
- args.oldbname = oldbname;
- args.newpath = newpath;
- args.newbname = newbname;
- if (!xdr_to_rename_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS; /* TODO */
- goto out;
- }
-
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.path = gf_strdup (args.oldpath);
- state->resolve.bname = gf_strdup (args.oldbname);
- state->resolve.par = args.oldpar;
- state->resolve.gen = args.oldgen;
-
- state->resolve2.type = RESOLVE_MAY;
- state->resolve2.path = gf_strdup (args.newpath);
- state->resolve2.bname = gf_strdup (args.newbname);
- state->resolve2.par = args.newpar;
- state->resolve2.gen = args.newgen;
-
- resolve_and_resume (frame, server_rename_resume);
-out:
- return 0;
-}
-
-int
-server_lk (rpcsvc_request_t *req)
-{
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
- call_frame_t *frame = NULL;
- gfs3_lk_req args = {0,};
-
- if (!req)
- return 0;
-
- conn = req->conn->trans->xl_private;
-
- if (!xdr_to_lk_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS; /* TODO */
- goto out;
- }
-
- state = CALL_STATE (frame);
-
- state->resolve.fd_no = args.fd;
- state->cmd = args.cmd;
- state->type = args.type;
-
- switch (state->cmd) {
- case GF_LK_GETLK:
- state->cmd = F_GETLK;
- break;
- case GF_LK_SETLK:
- state->cmd = F_SETLK;
- break;
- case GF_LK_SETLKW:
- state->cmd = F_SETLKW;
- break;
- }
-
- gf_flock_to_flock (&args.flock, &state->flock);
-
- switch (state->type) {
- case GF_LK_F_RDLCK:
- state->flock.l_type = F_RDLCK;
- break;
- case GF_LK_F_WRLCK:
- state->flock.l_type = F_WRLCK;
- break;
- case GF_LK_F_UNLCK:
- state->flock.l_type = F_UNLCK;
- break;
- default:
- gf_log (conn->bound_xl->name, GF_LOG_ERROR,
- "fd - %"PRId64" (%"PRId64"): Unknown lock type: %"PRId32"!",
- state->resolve.fd_no, state->fd->inode->ino, state->type);
- break;
- }
-
-
- resolve_and_resume (frame, server_lk_resume);
-out:
- return 0;
-}
-
-int
-server_checksum (rpcsvc_request_t *req)
-{
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
- call_frame_t *frame = NULL;
- gfs3_checksum_req args = {0,};
- char path[SERVER_PATH_MAX] = {0,};
-
- if (!req)
- return 0;
-
- conn = req->conn->trans->xl_private;
-
- args.path = path;
- if (!xdr_to_checksum_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS; /* TODO */
- goto out;
- }
-
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_MAY;
- state->resolve.path = gf_strdup (args.path);
- state->resolve.gen = args.gen;
- state->resolve.ino = args.ino;
- state->flags = args.flag;
-
- resolve_and_resume (frame, server_checksum_resume);
-out:
- return 0;
-}
-
-
-
-int
-server_rchecksum (rpcsvc_request_t *req)
-{
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
- call_frame_t *frame = NULL;
- gfs3_rchecksum_req args = {0,};
-
- if (!req)
- return 0;
-
- conn = req->conn->trans->xl_private;
-
- if (!xdr_to_rchecksum_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS; /* TODO */
- goto out;
- }
-
- state = CALL_STATE(frame);
-
- state->resolve.type = RESOLVE_MAY;
- state->resolve.fd_no = args.fd;
- state->offset = args.offset;
- state->size = args.len;
-
- resolve_and_resume (frame, server_rchecksum_resume);
-out:
- return 0;
-}
-
-int
-server_null (rpcsvc_request_t *req)
-{
- gf_common_rsp rsp = {0,};
-
- rsp.gfs_id = req->gfs_id;
- /* Accepted */
- rsp.op_ret = 0;
-
- server_submit_reply (NULL, req, &rsp, NULL, 0, NULL,
- (gfs_serialize_t)xdr_serialize_common_rsp);
-
- return 0;
-}
-
-int
-server_lookup (rpcsvc_request_t *req)
-{
- call_frame_t *frame = NULL;
- server_connection_t *conn = NULL;
- server_state_t *state = NULL;
- dict_t *xattr_req = NULL;
- char *buf = NULL;
- gfs3_lookup_req args = {0,};
- int ret = 0;
- char path[SERVER_PATH_MAX] = {0,};
- char bname[SERVER_PATH_MAX] = {0,};
- char dict_val[(16 * 1024)] = {0,};
-
- if (!req)
- return 0;
-
- conn = req->conn->trans->xl_private;
-
- args.path = path;
- args.bname = bname;
- args.dict.dict_val = dict_val;
-
- if (!xdr_to_lookup_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto err;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS;
- goto err;
- }
-
- /* NOTE: lookup() uses req->ino only to identify if a lookup()
- * is requested for 'root' or not
- */
-
- state = CALL_STATE (frame);
- state->resolve.ino = args.ino;
- if (state->resolve.ino != 1)
- state->resolve.ino = 0;
-
- state->resolve.type = RESOLVE_DONTCARE;
- state->resolve.par = args.par;
- state->resolve.gen = args.gen;
- state->resolve.path = gf_strdup (args.path);
-
- if (IS_NOT_ROOT (STRLEN_0 (args.path))) {
- state->resolve.bname = gf_strdup (args.bname);
- }
-
- if (args.dict.dict_len) {
- /* Unserialize the dictionary */
- xattr_req = dict_new ();
-
- buf = memdup (args.dict.dict_val, args.dict.dict_len);
- if (buf == NULL) {
- gf_log (conn->bound_xl->name, GF_LOG_ERROR,
- "out of memory");
- goto out;
- }
-
- ret = dict_unserialize (buf, args.dict.dict_len,
- &xattr_req);
- if (ret < 0) {
- gf_log (conn->bound_xl->name, GF_LOG_ERROR,
- "%"PRId64": %s (%"PRId64"): failed to "
- "unserialize req-buffer to dictionary",
- frame->root->unique, state->resolve.path,
- state->resolve.ino);
- goto out;
- }
-
- state->dict = xattr_req;
-
- xattr_req->extra_free = buf;
-
- buf = NULL;
- }
-
- resolve_and_resume (frame, server_lookup_resume);
-
- return 0;
-out:
- if (xattr_req)
- dict_unref (xattr_req);
-
- if (buf) {
- GF_FREE (buf);
- }
-
- server_lookup_cbk (frame, NULL, frame->this, -1, EINVAL, NULL, NULL,
- NULL, NULL);
-err:
- return 0;
-}
-
-int
-server_statfs (rpcsvc_request_t *req)
-{
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
- call_frame_t *frame = NULL;
- gfs3_statfs_req args = {0,};
- char path[SERVER_PATH_MAX] = {0,};
-
- if (!req)
- return 0;
-
- conn = req->conn->trans->xl_private;
- args.path = path;
- if (!xdr_to_statfs_req (req->msg[0], &args)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- frame = get_frame_from_request (req);
- if (!frame) {
- // something wrong, mostly insufficient memory
- req->rpc_err = GARBAGE_ARGS; /* TODO */
- goto out;
- }
-
- state = CALL_STATE (frame);
-
- state->resolve.type = RESOLVE_MUST;
- state->resolve.ino = args.ino;
- if (!state->resolve.ino)
- state->resolve.ino = 1;
- state->resolve.gen = args.gen;
- state->resolve.path = gf_strdup (args.path);
-
- resolve_and_resume (frame, server_statfs_resume);
-out:
- return 0;
-}
-
-
-rpcsvc_actor_t glusterfs3_1_fop_actors[] = {
- [GFS3_OP_NULL] = { "NULL", GFS3_OP_NULL, server_null, NULL, NULL},
- [GFS3_OP_STAT] = { "STAT", GFS3_OP_STAT, server_stat, NULL, NULL },
- [GFS3_OP_READLINK] = { "READLINK", GFS3_OP_READLINK, server_readlink, NULL, NULL },
- [GFS3_OP_MKNOD] = { "MKNOD", GFS3_OP_MKNOD, server_mknod, NULL, NULL },
- [GFS3_OP_MKDIR] = { "MKDIR", GFS3_OP_MKDIR, server_mkdir, NULL, NULL },
- [GFS3_OP_UNLINK] = { "UNLINK", GFS3_OP_UNLINK, server_unlink, NULL, NULL },
- [GFS3_OP_RMDIR] = { "RMDIR", GFS3_OP_RMDIR, server_rmdir, NULL, NULL },
- [GFS3_OP_SYMLINK] = { "SYMLINK", GFS3_OP_SYMLINK, server_symlink, NULL, NULL },
- [GFS3_OP_RENAME] = { "RENAME", GFS3_OP_RENAME, server_rename, NULL, NULL },
- [GFS3_OP_LINK] = { "LINK", GFS3_OP_LINK, server_link, NULL, NULL },
- [GFS3_OP_TRUNCATE] = { "TRUNCATE", GFS3_OP_TRUNCATE, server_truncate, NULL, NULL },
- [GFS3_OP_OPEN] = { "OPEN", GFS3_OP_OPEN, server_open, NULL, NULL },
- [GFS3_OP_READ] = { "READ", GFS3_OP_READ, server_readv, NULL, NULL },
- [GFS3_OP_WRITE] = { "WRITE", GFS3_OP_WRITE, server_writev, server_writev_vec, NULL },
- [GFS3_OP_STATFS] = { "STATFS", GFS3_OP_STATFS, server_statfs, NULL, NULL },
- [GFS3_OP_FLUSH] = { "FLUSH", GFS3_OP_FLUSH, server_flush, NULL, NULL },
- [GFS3_OP_FSYNC] = { "FSYNC", GFS3_OP_FSYNC, server_fsync, NULL, NULL },
- [GFS3_OP_SETXATTR] = { "SETXATTR", GFS3_OP_SETXATTR, server_setxattr, NULL, NULL },
- [GFS3_OP_GETXATTR] = { "GETXATTR", GFS3_OP_GETXATTR, server_getxattr, NULL, NULL },
- [GFS3_OP_REMOVEXATTR] = { "REMOVEXATTR", GFS3_OP_REMOVEXATTR, server_removexattr, NULL, NULL },
- [GFS3_OP_OPENDIR] = { "OPENDIR", GFS3_OP_OPENDIR, server_opendir, NULL, NULL },
- [GFS3_OP_FSYNCDIR] = { "FSYNCDIR", GFS3_OP_FSYNCDIR, server_fsyncdir, NULL, NULL },
- [GFS3_OP_ACCESS] = { "ACCESS", GFS3_OP_ACCESS, server_access, NULL, NULL },
- [GFS3_OP_CREATE] = { "CREATE", GFS3_OP_CREATE, server_create, NULL, NULL },
- [GFS3_OP_FTRUNCATE] = { "FTRUNCATE", GFS3_OP_FTRUNCATE, server_ftruncate, NULL, NULL },
- [GFS3_OP_FSTAT] = { "FSTAT", GFS3_OP_FSTAT, server_fstat, NULL, NULL },
- [GFS3_OP_LK] = { "LK", GFS3_OP_LK, server_lk, NULL, NULL },
- [GFS3_OP_LOOKUP] = { "LOOKUP", GFS3_OP_LOOKUP, server_lookup, NULL, NULL },
- [GFS3_OP_READDIR] = { "READDIR", GFS3_OP_READDIR, server_readdir, NULL, NULL },
- [GFS3_OP_INODELK] = { "INODELK", GFS3_OP_INODELK, server_inodelk, NULL, NULL },
- [GFS3_OP_FINODELK] = { "FINODELK", GFS3_OP_FINODELK, server_finodelk, NULL, NULL },
- [GFS3_OP_ENTRYLK] = { "ENTRYLK", GFS3_OP_ENTRYLK, server_entrylk, NULL, NULL },
- [GFS3_OP_FENTRYLK] = { "FENTRYLK", GFS3_OP_FENTRYLK, server_fentrylk, NULL, NULL },
- [GFS3_OP_CHECKSUM] = { "CHECKSUM", GFS3_OP_CHECKSUM, server_checksum, NULL, NULL },
- [GFS3_OP_XATTROP] = { "XATTROP", GFS3_OP_XATTROP, server_xattrop, NULL, NULL },
- [GFS3_OP_FXATTROP] = { "FXATTROP", GFS3_OP_FXATTROP, server_fxattrop, NULL, NULL },
- [GFS3_OP_FGETXATTR] = { "FGETXATTR", GFS3_OP_FGETXATTR, server_fgetxattr, NULL, NULL },
- [GFS3_OP_FSETXATTR] = { "FSETXATTR", GFS3_OP_FSETXATTR, server_fsetxattr, NULL, NULL },
- [GFS3_OP_RCHECKSUM] = { "RCHECKSUM", GFS3_OP_RCHECKSUM, server_rchecksum, NULL, NULL },
- [GFS3_OP_SETATTR] = { "SETATTR", GFS3_OP_SETATTR, server_setattr, NULL, NULL },
- [GFS3_OP_FSETATTR] = { "FSETATTR", GFS3_OP_FSETATTR, server_fsetattr, NULL, NULL },
- [GFS3_OP_READDIRP] = { "READDIRP", GFS3_OP_READDIRP, server_readdirp, NULL, NULL },
- [GFS3_OP_RELEASE] = { "RELEASE", GFS3_OP_RELEASE, server_release, NULL, NULL },
- [GFS3_OP_RELEASEDIR] = { "RELEASEDIR", GFS3_OP_RELEASEDIR, server_releasedir, NULL, NULL },
-};
-
-
-struct rpcsvc_program glusterfs3_1_fop_prog = {
- .progname = "GlusterFS-3.1.0",
- .prognum = GLUSTER3_1_FOP_PROGRAM,
- .progver = GLUSTER3_1_FOP_VERSION,
- .numactors = GLUSTER3_1_FOP_PROCCNT,
- .actors = glusterfs3_1_fop_actors,
- .progport = 7007,
-};
diff --git a/xlators/storage/Makefile.am b/xlators/storage/Makefile.am
index 9cb9ded3035..c08e8e41bca 100644
--- a/xlators/storage/Makefile.am
+++ b/xlators/storage/Makefile.am
@@ -1,3 +1,7 @@
SUBDIRS = posix
-CLEANFILES =
+if ENABLE_BD_XLATOR
+SUBDIRS += bd
+endif
+
+CLEANFILES =
diff --git a/xlators/storage/bd/Makefile.am b/xlators/storage/bd/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/storage/bd/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/storage/bd/src/Makefile.am b/xlators/storage/bd/src/Makefile.am
new file mode 100644
index 00000000000..aad293e0c79
--- /dev/null
+++ b/xlators/storage/bd/src/Makefile.am
@@ -0,0 +1,20 @@
+if ENABLE_BD_XLATOR
+xlator_LTLIBRARIES = bd.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/storage
+
+bd_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+LIBBD = -llvm2app -lrt
+bd_la_SOURCES = bd.c bd-helper.c bd-aio.c
+bd_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(LIBBD) $(LIBAIO)
+
+noinst_HEADERS = bd.h bd-aio.h bd-mem-types.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src
+
+AM_CFLAGS = -fno-strict-aliasing -Wall $(GF_CFLAGS)
+
+CLEANFILES =
+
+endif
diff --git a/xlators/storage/bd/src/bd-aio.c b/xlators/storage/bd/src/bd-aio.c
new file mode 100644
index 00000000000..191d23d10b0
--- /dev/null
+++ b/xlators/storage/bd/src/bd-aio.c
@@ -0,0 +1,523 @@
+/*
+ Copyright IBM, Corp. 2013
+
+ This file is part of GlusterFS.
+
+ Author: M. Mohan Kumar <mohan@in.ibm.com>
+
+ Based on posix-aio.c
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <lvm2app.h>
+#include <sys/uio.h>
+
+#include "xlator.h"
+#include "glusterfs.h"
+#include "defaults.h"
+#include "bd.h"
+#include "bd-aio.h"
+
+#ifdef HAVE_LIBAIO
+#include <libaio.h>
+#include "bd-mem-types.h"
+
+struct bd_aio_cb {
+ struct iocb iocb;
+ call_frame_t *frame;
+ struct iobuf *iobuf;
+ struct iobref *iobref;
+ struct iatt prebuf;
+ int op;
+ off_t offset;
+ fd_t *fd;
+};
+
+void
+__bd_fd_set_odirect (fd_t *fd, bd_fd_t *bd_fd, int opflags,
+ off_t offset, size_t size)
+{
+ int odirect = 0;
+ int flags = 0;
+ int ret = 0;
+
+ odirect = bd_fd->odirect;
+
+ if ((fd->flags|opflags) & O_DIRECT) {
+ /* if instructed, use O_DIRECT always */
+ odirect = 1;
+ } else {
+ /* else use O_DIRECT when feasible */
+ if ((offset|size) & 0xfff)
+ odirect = 0;
+ else
+ odirect = 1;
+ }
+
+ if (!odirect && bd_fd->odirect) {
+ flags = fcntl (bd_fd->fd, F_GETFL);
+ ret = fcntl (bd_fd->fd, F_SETFL, (flags & (~O_DIRECT)));
+ bd_fd->odirect = 0;
+ }
+
+ if (odirect && !bd_fd->odirect) {
+ flags = fcntl (bd_fd->fd, F_GETFL);
+ ret = fcntl (bd_fd->fd, F_SETFL, (flags | O_DIRECT));
+ bd_fd->odirect = 1;
+ }
+
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "fcntl() failed (%s). fd=%d flags=%d pfd->odirect=%d",
+ strerror (errno), bd_fd->fd, flags, bd_fd->odirect);
+ }
+}
+
+int
+bd_aio_readv_complete (struct bd_aio_cb *paiocb, int res, int res2)
+{
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ struct iobuf *iobuf = NULL;
+ struct iatt postbuf = {0,};
+ int op_ret = -1;
+ int op_errno = 0;
+ struct iovec iov;
+ struct iobref *iobref = NULL;
+ off_t offset = 0;
+ bd_attr_t *bdatt = NULL;
+
+ frame = paiocb->frame;
+ this = frame->this;
+ iobuf = paiocb->iobuf;
+ offset = paiocb->offset;
+
+ if (res < 0) {
+ op_ret = -1;
+ op_errno = -res;
+ gf_log (this->name, GF_LOG_ERROR,
+ "readv(async) failed fd=%p,size=%lu,offset=%llu (%d/%s)",
+ paiocb->fd, paiocb->iocb.u.c.nbytes,
+ (unsigned long long) paiocb->offset,
+ res, strerror (op_errno));
+ goto out;
+ }
+
+ bd_inode_ctx_get (paiocb->fd->inode, this, &bdatt);
+ memcpy (&postbuf, &bdatt->iatt, sizeof (struct iatt));
+
+ op_ret = res;
+ op_errno = 0;
+
+ iobref = iobref_new ();
+ if (!iobref) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ iobref_add (iobref, iobuf);
+
+ iov.iov_base = iobuf_ptr (iobuf);
+ iov.iov_len = op_ret;
+
+ /* Hack to notify higher layers of EOF. */
+ if (!postbuf.ia_size || (offset + iov.iov_len) >= postbuf.ia_size)
+ op_errno = ENOENT;
+
+out:
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, &iov, 1,
+ &postbuf, iobref, NULL);
+ if (iobuf)
+ iobuf_unref (iobuf);
+ if (iobref)
+ iobref_unref (iobref);
+
+ GF_FREE (paiocb);
+
+ return 0;
+}
+
+int
+bd_aio_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t offset, uint32_t flags, dict_t *xdata)
+{
+ int32_t op_errno = EINVAL;
+ int _fd = -1;
+ struct iobuf *iobuf = NULL;
+ bd_fd_t *bd_fd = NULL;
+ int ret = -1;
+ struct bd_aio_cb *paiocb = NULL;
+ bd_priv_t *priv = NULL;
+ struct iocb *iocb = NULL;
+ bd_attr_t *bdatt = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ priv = this->private;
+
+ ret = bd_fd_ctx_get (this, fd, &bd_fd);
+ if (ret < 0 || !bd_fd) {
+ STACK_WIND (frame, default_readv_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->readv, fd, size, offset,
+ flags, xdata);
+ return 0;
+ }
+ _fd = bd_fd->fd;
+ bd_inode_ctx_get (fd->inode, this, &bdatt);
+ if (!size) {
+ op_errno = EINVAL;
+ gf_log (this->name, GF_LOG_WARNING, "size=%"GF_PRI_SIZET, size);
+ goto err;
+ }
+
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool, size);
+ if (!iobuf) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ paiocb = GF_CALLOC (1, sizeof (*paiocb), gf_bd_aio_cb);
+ if (!paiocb) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ paiocb->frame = frame;
+ paiocb->iobuf = iobuf;
+ paiocb->offset = offset;
+ paiocb->op = GF_FOP_READ;
+ paiocb->fd = fd;
+
+ paiocb->iocb.data = paiocb;
+ paiocb->iocb.aio_fildes = _fd;
+ paiocb->iocb.aio_lio_opcode = IO_CMD_PREAD;
+ paiocb->iocb.aio_reqprio = 0;
+ paiocb->iocb.u.c.buf = iobuf_ptr (iobuf);
+ paiocb->iocb.u.c.nbytes = size;
+ paiocb->iocb.u.c.offset = offset;
+
+ iocb = &paiocb->iocb;
+
+ LOCK (&fd->lock);
+ {
+ __bd_fd_set_odirect (fd, bd_fd, flags, offset, size);
+
+ ret = io_submit (priv->ctxp, 1, &iocb);
+ }
+ UNLOCK (&fd->lock);
+
+ if (ret != 1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "io_submit() returned %d", ret);
+ op_errno = -ret;
+ goto err;
+ }
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (readv, frame, -1, op_errno, 0, 0, 0, 0, 0);
+ if (iobuf)
+ iobuf_unref (iobuf);
+
+ if (paiocb)
+ GF_FREE (paiocb);
+
+ return 0;
+}
+
+int
+bd_aio_writev_complete (struct bd_aio_cb *paiocb, int res, int res2)
+{
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ struct iatt prebuf = {0,};
+ struct iatt postbuf = {0,};
+ int op_ret = -1;
+ int op_errno = 0;
+ bd_attr_t *bdatt = NULL;
+
+ frame = paiocb->frame;
+ prebuf = paiocb->prebuf;
+ this = frame->this;
+
+ if (res < 0) {
+ op_ret = -1;
+ op_errno = -res;
+ gf_log (this->name, GF_LOG_ERROR,
+ "writev(async) failed fd=%p,offset=%llu (%d/%s)",
+ paiocb->fd, (unsigned long long) paiocb->offset, res,
+ strerror (op_errno));
+
+ goto out;
+ }
+
+ bd_inode_ctx_get (paiocb->fd->inode, this, &bdatt);
+ bd_update_amtime (&bdatt->iatt, GF_SET_ATTR_MTIME);
+ memcpy (&postbuf, &bdatt->iatt, sizeof (struct iatt));
+
+ op_ret = res;
+ op_errno = 0;
+
+out:
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, &prebuf, &postbuf,
+ NULL);
+
+ if (paiocb) {
+ if (paiocb->iobref)
+ iobref_unref (paiocb->iobref);
+ GF_FREE (paiocb);
+ }
+
+ return 0;
+}
+
+int
+bd_aio_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *iov, int count, off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ int32_t op_errno = EINVAL;
+ int _fd = -1;
+ bd_fd_t *bd_fd = NULL;
+ int ret = -1;
+ struct bd_aio_cb *paiocb = NULL;
+ bd_priv_t *priv = NULL;
+ struct iocb *iocb = NULL;
+ bd_attr_t *bdatt = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ priv = this->private;
+
+ ret = bd_fd_ctx_get (this, fd, &bd_fd);
+ if (ret < 0 || !bd_fd) {
+ STACK_WIND (frame, default_writev_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev,
+ fd, iov, count, offset, flags, iobref, xdata);
+ return 0;
+ }
+
+ bd_inode_ctx_get (fd->inode, this, &bdatt);
+
+ _fd = bd_fd->fd;
+
+ paiocb = GF_CALLOC (1, sizeof (*paiocb), gf_bd_aio_cb);
+ if (!paiocb) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+
+ paiocb->frame = frame;
+ paiocb->offset = offset;
+ paiocb->op = GF_FOP_WRITE;
+ paiocb->fd = fd;
+
+ paiocb->iocb.data = paiocb;
+ paiocb->iocb.aio_fildes = _fd;
+ paiocb->iobref = iobref_ref (iobref);
+ paiocb->iocb.aio_lio_opcode = IO_CMD_PWRITEV;
+ paiocb->iocb.aio_reqprio = 0;
+ paiocb->iocb.u.v.vec = iov;
+ paiocb->iocb.u.v.nr = count;
+ paiocb->iocb.u.v.offset = offset;
+
+ iocb = &paiocb->iocb;
+
+ memcpy (&paiocb->prebuf, &bdatt->iatt, sizeof (struct iatt));
+ LOCK (&fd->lock);
+ {
+ __bd_fd_set_odirect (fd, bd_fd, flags, offset,
+ iov_length (iov, count));
+
+ ret = io_submit (priv->ctxp, 1, &iocb);
+ }
+ UNLOCK (&fd->lock);
+
+ if (ret != 1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "io_submit() returned %d", ret);
+ op_errno = -ret;
+ goto err;
+ }
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (writev, frame, -1, op_errno, 0, 0, 0);
+
+ if (paiocb) {
+ if (paiocb->iobref)
+ iobref_unref (paiocb->iobref);
+ GF_FREE (paiocb);
+ }
+
+ return 0;
+}
+
+void *
+bd_aio_thread (void *data)
+{
+ xlator_t *this = NULL;
+ bd_priv_t *priv = NULL;
+ int ret = 0;
+ int i = 0;
+ struct io_event *event = NULL;
+ struct bd_aio_cb *paiocb = NULL;
+ struct io_event events[BD_AIO_MAX_NR_GETEVENTS];
+ struct timespec ts = {0, };
+
+ this = data;
+ THIS = this;
+ priv = this->private;
+
+ ts.tv_sec = 5;
+ for (;;) {
+ memset (&events[0], 0, sizeof (events));
+ ret = io_getevents (priv->ctxp, 1, BD_AIO_MAX_NR_GETEVENTS,
+ &events[0], &ts);
+ if (ret < 0) {
+ if (ret == -EINTR)
+ continue;
+ gf_log (this->name, GF_LOG_ERROR,
+ "io_getevents() returned %d, exiting", ret);
+ break;
+ }
+
+ for (i = 0; i < ret; i++) {
+ event = &events[i];
+
+ paiocb = event->data;
+
+ switch (paiocb->op) {
+ case GF_FOP_READ:
+ bd_aio_readv_complete (paiocb, event->res,
+ event->res2);
+ break;
+ case GF_FOP_WRITE:
+ bd_aio_writev_complete (paiocb, event->res,
+ event->res2);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_ERROR,
+ "unknown op %d found in piocb",
+ paiocb->op);
+ break;
+ }
+ }
+ }
+
+ return NULL;
+}
+
+int
+bd_aio_init (xlator_t *this)
+{
+ bd_priv_t *priv = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ ret = io_setup (BD_AIO_MAX_NR_EVENTS, &priv->ctxp);
+ if ((ret == -1 && errno == ENOSYS) || ret == -ENOSYS) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Linux AIO not available at run-time."
+ " Continuing with synchronous IO");
+ ret = 0;
+ goto out;
+ }
+
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "io_setup() failed. ret=%d, errno=%d",
+ ret, errno);
+ goto out;
+ }
+
+ ret = pthread_create (&priv->aiothread, NULL,
+ bd_aio_thread, this);
+ if (ret != 0) {
+ io_destroy (priv->ctxp);
+ goto out;
+ }
+
+ this->fops->readv = bd_aio_readv;
+ this->fops->writev = bd_aio_writev;
+out:
+ return ret;
+}
+
+
+int
+bd_aio_on (xlator_t *this)
+{
+ bd_priv_t *priv = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ if (!priv->aio_init_done) {
+ ret = bd_aio_init (this);
+ if (ret == 0)
+ priv->aio_capable = _gf_true;
+ else
+ priv->aio_capable = _gf_false;
+ priv->aio_init_done = _gf_true;
+ }
+
+ if (priv->aio_capable) {
+ this->fops->readv = bd_aio_readv;
+ this->fops->writev = bd_aio_writev;
+ }
+
+ return ret;
+}
+
+int
+bd_aio_off (xlator_t *this)
+{
+ this->fops->readv = bd_readv;
+ this->fops->writev = bd_writev;
+
+ return 0;
+}
+
+#else
+
+int
+bd_aio_on (xlator_t *this)
+{
+ gf_log (this->name, GF_LOG_INFO,
+ "Linux AIO not available at build-time."
+ " Continuing with synchronous IO");
+ return 0;
+}
+
+int
+bd_aio_off (xlator_t *this)
+{
+ gf_log (this->name, GF_LOG_INFO,
+ "Linux AIO not available at build-time."
+ " Continuing with synchronous IO");
+ return 0;
+}
+
+void
+__bd_fd_set_odirect (fd_t *fd, struct bd_fd *pfd, int opflags,
+ off_t offset, size_t size)
+{
+ xlator_t *this = THIS;
+ gf_log (this->name, GF_LOG_INFO,
+ "Linux AIO not available at build-time."
+ " Continuing with synchronous IO");
+ return;
+}
+#endif
diff --git a/xlators/storage/bd/src/bd-aio.h b/xlators/storage/bd/src/bd-aio.h
new file mode 100644
index 00000000000..82386e0b072
--- /dev/null
+++ b/xlators/storage/bd/src/bd-aio.h
@@ -0,0 +1,36 @@
+/*
+ Copyright IBM, Corp. 2013
+
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _BD_AIO_H
+#define _BD_AIO_H
+
+#include "xlator.h"
+#include "glusterfs.h"
+
+/*
+ * Maximum number of concurrently submitted IO events. The heaviest load
+ * GlusterFS has been able to handle had 60-80 concurrent calls
+ */
+#define BD_AIO_MAX_NR_EVENTS 256
+
+/* Maximum number of completed IO operations to reap per getevents syscall */
+#define BD_AIO_MAX_NR_GETEVENTS 16
+
+int bd_aio_on (xlator_t *this);
+int bd_aio_off (xlator_t *this);
+
+int bd_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata);
+
+int bd_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata);
+
+#endif /* !_BD_AIO_H */
diff --git a/xlators/storage/bd/src/bd-helper.c b/xlators/storage/bd/src/bd-helper.c
new file mode 100644
index 00000000000..15f83d3f834
--- /dev/null
+++ b/xlators/storage/bd/src/bd-helper.c
@@ -0,0 +1,1020 @@
+#include <lvm2app.h>
+#ifdef HAVE_LIBAIO
+#include <libaio.h>
+#endif
+#include <linux/fs.h>
+#include <sys/ioctl.h>
+#include "bd.h"
+#include "bd-mem-types.h"
+#include "run.h"
+#include "lvm-defaults.h"
+#include "syscall.h"
+
+int
+bd_inode_ctx_set (inode_t *inode, xlator_t *this, bd_attr_t *ctx)
+{
+ int ret = -1;
+ uint64_t ctx_int = 0;
+
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+ GF_VALIDATE_OR_GOTO (this->name, ctx, out);
+
+ ctx_int = (long)ctx;
+ ret = inode_ctx_set (inode, this, &ctx_int);
+out:
+ return ret;
+}
+
+int
+bd_inode_ctx_get (inode_t *inode, xlator_t *this, bd_attr_t **ctx)
+{
+ int ret = -1;
+ uint64_t ctx_int = 0;
+
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+ ret = inode_ctx_get (inode, this, &ctx_int);
+ if (ret)
+ return ret;
+ if (ctx)
+ *ctx = (bd_attr_t *) ctx_int;
+out:
+ return ret;
+}
+
+void
+bd_local_free (xlator_t *this, bd_local_t *local)
+{
+ if (!local)
+ return;
+ if (local->fd)
+ fd_unref (local->fd);
+ else if (local->loc.path)
+ loc_wipe (&local->loc);
+ if (local->dict)
+ dict_unref (local->dict);
+ if (local->inode)
+ inode_unref (local->inode);
+ if (local->bdatt) {
+ GF_FREE (local->bdatt->type);
+ GF_FREE (local->bdatt);
+ }
+ mem_put (local);
+ local = NULL;
+}
+
+bd_local_t *
+bd_local_init (call_frame_t *frame, xlator_t *this)
+{
+ frame->local = mem_get0 (this->local_pool);
+ if (!frame->local)
+ return NULL;
+
+ return frame->local;
+}
+
+/*
+ * VG are set with the tag in GF_XATTR_VOL_ID_KEY:<uuid> format.
+ * This function validates this tag agains volume-uuid. Also goes
+ * through LV list to find out if a thin-pool is configured or not.
+ */
+int bd_scan_vg (xlator_t *this, bd_priv_t *priv)
+{
+ vg_t brick = NULL;
+ data_t *tmp_data = NULL;
+ struct dm_list *tags = NULL;
+ int op_ret = -1;
+ uuid_t dict_uuid = {0, };
+ uuid_t vg_uuid = {0, };
+ gf_boolean_t uuid = _gf_false;
+ lvm_str_list_t *strl = NULL;
+ struct dm_list *lv_dm_list = NULL;
+ lv_list_t *lv_list = NULL;
+ struct dm_list *dm_seglist = NULL;
+ lvseg_list_t *seglist = NULL;
+ lvm_property_value_t prop = {0, };
+ gf_boolean_t thin = _gf_false;
+ const char *lv_name = NULL;
+
+ brick = lvm_vg_open (priv->handle, priv->vg, "w", 0);
+ if (!brick) {
+ gf_log (this->name, GF_LOG_CRITICAL, "VG %s is not found",
+ priv->vg);
+ return ENOENT;
+ }
+
+ lv_dm_list = lvm_vg_list_lvs (brick);
+ if (!lv_dm_list)
+ goto check;
+
+ dm_list_iterate_items (lv_list, lv_dm_list) {
+ dm_seglist = lvm_lv_list_lvsegs (lv_list->lv);
+ if (!dm_seglist)
+ continue;
+ dm_list_iterate_items (seglist, dm_seglist) {
+ prop = lvm_lvseg_get_property (seglist->lvseg,
+ "segtype");
+ if (!prop.is_valid || !prop.value.string)
+ continue;
+ if (!strcmp (prop.value.string, "thin-pool")) {
+ thin = _gf_true;
+ lv_name = lvm_lv_get_name (lv_list->lv);
+ priv->pool = gf_strdup (lv_name);
+ gf_log (THIS->name, GF_LOG_INFO, "Thin Pool "
+ "\"%s\" will be used for thin LVs",
+ lv_name);
+ break;
+ }
+ }
+ }
+
+check:
+ /* If there is no volume-id set in dict, we cant validate */
+ tmp_data = dict_get (this->options, "volume-id");
+ if (!tmp_data) {
+ op_ret = 0;
+ goto out;
+ }
+
+ op_ret = gf_uuid_parse (tmp_data->data, dict_uuid);
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "wrong volume-id (%s) set in volume file",
+ tmp_data->data);
+ op_ret = -1;
+ goto out;
+ }
+
+ tags = lvm_vg_get_tags (brick);
+ if (!tags) { /* no tags in the VG */
+ gf_log (this->name, GF_LOG_ERROR,
+ "Extended attribute trusted.glusterfs."
+ "volume-id is absent");
+ op_ret = -1;
+ goto out;
+ }
+ dm_list_iterate_items (strl, tags) {
+ if (!strncmp (strl->str, GF_XATTR_VOL_ID_KEY,
+ strlen (GF_XATTR_VOL_ID_KEY))) {
+ uuid = _gf_true;
+ break;
+ }
+ }
+ /* UUID tag is not set in VG */
+ if (!uuid) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Extended attribute trusted.glusterfs."
+ "volume-id is absent");
+ op_ret = -1;
+ goto out;
+ }
+
+ op_ret = gf_uuid_parse (strl->str + strlen (GF_XATTR_VOL_ID_KEY) + 1,
+ vg_uuid);
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "wrong volume-id (%s) set in VG", strl->str);
+ op_ret = -1;
+ goto out;
+ }
+ if (gf_uuid_compare (dict_uuid, vg_uuid)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "mismatching volume-id (%s) received. "
+ "already is a part of volume %s ",
+ tmp_data->data, vg_uuid);
+ op_ret = -1;
+ goto out;
+ }
+
+ op_ret = 0;
+
+out:
+ lvm_vg_close (brick);
+
+ if (!thin)
+ gf_log (THIS->name, GF_LOG_WARNING, "No thin pool found in "
+ "VG %s\n", priv->vg);
+ else
+ priv->caps |= BD_CAPS_THIN;
+
+ return op_ret;
+}
+
+/* FIXME: Move this code to common place, so posix and bd xlator can use */
+char *
+page_aligned_alloc (size_t size, char **aligned_buf)
+{
+ char *alloc_buf = NULL;
+ char *buf = NULL;
+
+ alloc_buf = GF_CALLOC (1, (size + ALIGN_SIZE), gf_common_mt_char);
+ if (!alloc_buf)
+ return NULL;
+ /* page aligned buffer */
+ buf = GF_ALIGN_BUF (alloc_buf, ALIGN_SIZE);
+ *aligned_buf = buf;
+
+ return alloc_buf;
+}
+
+static int
+__bd_fd_ctx_get (xlator_t *this, fd_t *fd, bd_fd_t **bdfd_p)
+{
+ int ret = -1;
+ int _fd = -1;
+ char *devpath = NULL;
+ bd_fd_t *bdfd = NULL;
+ uint64_t tmp_bdfd = 0;
+ bd_priv_t *priv = this->private;
+ bd_gfid_t gfid = {0, };
+ bd_attr_t *bdatt = NULL;
+
+ /* not bd file */
+ if (fd->inode->ia_type != IA_IFREG ||
+ bd_inode_ctx_get (fd->inode, this, &bdatt))
+ return 0;
+
+ ret = __fd_ctx_get (fd, this, &tmp_bdfd);
+ if (ret == 0) {
+ bdfd = (void *)(long) tmp_bdfd;
+ *bdfd_p = bdfd;
+ return 0;
+ }
+
+ uuid_utoa_r (fd->inode->gfid, gfid);
+ gf_asprintf (&devpath, "/dev/%s/%s", priv->vg, gfid);
+ if (!devpath)
+ goto out;
+
+ _fd = open (devpath, O_RDWR | O_LARGEFILE, 0);
+ if (_fd < 0) {
+ ret = errno;
+ gf_log (this->name, GF_LOG_ERROR, "open on %s: %s", devpath,
+ strerror (ret));
+ goto out;
+ }
+ bdfd = GF_CALLOC (1, sizeof(bd_fd_t), gf_bd_fd);
+ BD_VALIDATE_MEM_ALLOC (bdfd, ret, out);
+
+ bdfd->fd = _fd;
+ bdfd->flag = O_RDWR | O_LARGEFILE;
+ if (__fd_ctx_set (fd, this, (uint64_t)(long)bdfd) < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set the fd context fd=%p", fd);
+ goto out;
+ }
+
+ *bdfd_p = bdfd;
+
+ ret = 0;
+out:
+ GF_FREE (devpath);
+ if (ret) {
+ if (_fd >= 0)
+ sys_close (_fd);
+ GF_FREE (bdfd);
+ }
+ return ret;
+}
+
+int
+bd_fd_ctx_get (xlator_t *this, fd_t *fd, bd_fd_t **bdfd)
+{
+ int ret;
+
+ /* FIXME: Is it ok to fd->lock here ? */
+ LOCK (&fd->lock);
+ {
+ ret = __bd_fd_ctx_get (this, fd, bdfd);
+ }
+ UNLOCK (&fd->lock);
+
+ return ret;
+}
+
+/*
+ * Validates if LV exists for given inode or not.
+ * Returns 0 if LV exists and size also matches.
+ * If LV does not exist -1 returned
+ * If LV size mismatches, returnes 1 also lv_size is updated with actual
+ * size
+ */
+int
+bd_validate_bd_xattr (xlator_t *this, char *bd, char **type,
+ uint64_t *lv_size, uuid_t uuid)
+{
+ char *path = NULL;
+ int ret = -1;
+ bd_gfid_t gfid = {0, };
+ bd_priv_t *priv = this->private;
+ struct stat stbuf = {0, };
+ uint64_t size = 0;
+ vg_t vg = NULL;
+ lv_t lv = NULL;
+ char *bytes = NULL;
+
+ bytes = strrchr (bd, ':');
+ if (bytes) {
+ *bytes = '\0';
+ bytes++;
+ gf_string2bytesize (bytes, &size);
+ }
+
+ if (strcmp (bd, BD_LV) && strcmp (bd, BD_THIN)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "invalid xattr %s", bd);
+ return -1;
+ }
+ *type = gf_strdup (bd);
+
+ /*
+ * Check if LV really exist, there could be a failure
+ * after setxattr and successful LV creation
+ */
+ uuid_utoa_r (uuid, gfid);
+ gf_asprintf (&path, "/dev/%s/%s", priv->vg, gfid);
+ if (!path) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "insufficient memory");
+ return 0;
+ }
+
+ /* Destination file does not exist */
+ if (sys_stat (path, &stbuf)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "lstat failed for path %s", path);
+ return -1;
+ }
+
+ vg = lvm_vg_open (priv->handle, priv->vg, "r", 0);
+ if (!vg) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "VG %s does not exist?", priv->vg);
+ ret = -1;
+ goto out;
+ }
+
+ lv = lvm_lv_from_name (vg, gfid);
+ if (!lv) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "LV %s does not exist", gfid);
+ ret = -1;
+ goto out;
+ }
+
+ *lv_size = lvm_lv_get_size (lv);
+ if (size == *lv_size) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = 1;
+
+out:
+ if (vg)
+ lvm_vg_close (vg);
+
+ GF_FREE (path);
+ return ret;
+}
+
+static int
+create_thin_lv (char *vg, char *pool, char *lv, uint64_t extent)
+{
+ int ret = -1;
+ runner_t runner = {0, };
+ char *path = NULL;
+ struct stat stat = {0, };
+
+ runinit (&runner);
+ runner_add_args (&runner, LVM_CREATE, NULL);
+ runner_add_args (&runner, "--thin", NULL);
+ runner_argprintf (&runner, "%s/%s", vg, pool);
+ runner_add_args (&runner, "--name", NULL);
+ runner_argprintf (&runner, "%s", lv);
+ runner_add_args (&runner, "--virtualsize", NULL);
+ runner_argprintf (&runner, "%ldB", extent);
+ runner_start (&runner);
+ runner_end (&runner);
+
+ gf_asprintf (&path, "/dev/%s/%s", vg, lv);
+ if (!path) {
+ ret = ENOMEM;
+ goto out;
+ }
+ if (sys_lstat (path, &stat) < 0)
+ ret = EAGAIN;
+ else
+ ret = 0;
+out:
+ GF_FREE (path);
+ return ret;
+}
+
+int
+bd_create (uuid_t uuid, uint64_t size, char *type, bd_priv_t *priv)
+{
+ int ret = 0;
+ vg_t vg = NULL;
+ bd_gfid_t gfid = {0, };
+
+ uuid_utoa_r (uuid, gfid);
+
+ if (!strcmp (type, BD_THIN))
+ return create_thin_lv (priv->vg, priv->pool, gfid,
+ size);
+
+ vg = lvm_vg_open (priv->handle, priv->vg, "w", 0);
+ if (!vg) {
+ gf_log (THIS->name, GF_LOG_WARNING, "opening VG %s failed",
+ priv->vg);
+ return ENOENT;
+ }
+
+ if (!lvm_vg_create_lv_linear (vg, gfid, size)) {
+ gf_log (THIS->name, GF_LOG_WARNING, "lvm_vg_create_lv_linear "
+ "failed");
+ ret = errno;
+ }
+
+ lvm_vg_close (vg);
+
+ return ret;
+}
+
+int32_t
+bd_resize (bd_priv_t *priv, uuid_t uuid, size_t size)
+{
+ uint64_t new_size = 0;
+ runner_t runner = {0, };
+ bd_gfid_t gfid = {0, };
+ int ret = 0;
+ vg_t vg = NULL;
+ lv_t lv = NULL;
+
+ uuid_utoa_r (uuid, gfid);
+
+ runinit (&runner);
+
+ runner_add_args (&runner, LVM_RESIZE, NULL);
+ runner_argprintf (&runner, "%s/%s", priv->vg, gfid);
+ runner_argprintf (&runner, "-L%ldb", size);
+ runner_add_args (&runner, "-f", NULL);
+
+ runner_start (&runner);
+ runner_end (&runner);
+
+ vg = lvm_vg_open (priv->handle, priv->vg, "w", 0);
+ if (!vg) {
+ gf_log (THIS->name, GF_LOG_WARNING, "opening VG %s failed",
+ priv->vg);
+ return EAGAIN;
+ }
+
+ lv = lvm_lv_from_name (vg, gfid);
+ if (!lv) {
+ gf_log (THIS->name, GF_LOG_WARNING, "LV %s not found", gfid);
+ ret = EIO;
+ goto out;
+ }
+ new_size = lvm_lv_get_size (lv);
+
+ if (new_size != size) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "resized LV size %" PRIu64 " does "
+ "not match requested size %zd", new_size, size);
+ ret = EIO;
+ }
+
+out:
+ lvm_vg_close (vg);
+ return ret;
+}
+
+uint64_t
+bd_get_default_extent (bd_priv_t *priv)
+{
+ vg_t vg = NULL;
+ uint64_t size = 0;
+
+ vg = lvm_vg_open (priv->handle, priv->vg, "w", 0);
+ if (!vg) {
+ gf_log (THIS->name, GF_LOG_WARNING, "opening VG %s failed",
+ priv->vg);
+ return 0;
+ }
+
+ size = lvm_vg_get_extent_size (vg);
+
+ lvm_vg_close (vg);
+
+ return size;
+}
+
+/*
+ * Adjusts the user specified size to VG specific extent size
+ */
+uint64_t
+bd_adjust_size (bd_priv_t *priv, size_t size)
+{
+ uint64_t extent = 0;
+ uint64_t nr_ex = 0;
+
+ extent = bd_get_default_extent (priv);
+ if (!extent)
+ return 0;
+
+ nr_ex = size / extent;
+ if (size % extent)
+ nr_ex++;
+
+ size = extent * nr_ex;
+
+ return size;
+}
+
+int
+bd_delete_lv (bd_priv_t *priv, const char *lv_name, int *op_errno)
+{
+ vg_t vg = NULL;
+ lv_t lv = NULL;
+ int ret = -1;
+
+ *op_errno = 0;
+ vg = lvm_vg_open (priv->handle, priv->vg, "w", 0);
+ if (!vg) {
+ gf_log (THIS->name, GF_LOG_WARNING, "opening VG %s failed",
+ priv->vg);
+ *op_errno = ENOENT;
+ return -1;
+ }
+ lv = lvm_lv_from_name (vg, lv_name);
+ if (!lv) {
+ gf_log (THIS->name, GF_LOG_WARNING, "No such LV %s", lv_name);
+ *op_errno = ENOENT;
+ goto out;
+ }
+ ret = lvm_vg_remove_lv (lv);
+ if (ret < 0) {
+ gf_log (THIS->name, GF_LOG_WARNING, "removing LV %s failed",
+ lv_name);
+ *op_errno = errno;
+ goto out;
+ }
+out:
+ lvm_vg_close (vg);
+
+ return ret;
+}
+
+void
+bd_update_amtime(struct iatt *iatt, int flag)
+{
+ struct timespec ts = {0, };
+
+ clock_gettime (CLOCK_REALTIME, &ts);
+ if (flag & GF_SET_ATTR_ATIME) {
+ iatt->ia_atime = ts.tv_sec;
+ iatt->ia_atime_nsec = ts.tv_nsec;
+ }
+ if (flag & GF_SET_ATTR_MTIME) {
+ iatt->ia_mtime = ts.tv_sec;
+ iatt->ia_mtime_nsec = ts.tv_nsec;
+ }
+}
+
+int
+bd_snapshot_create (bd_local_t *local, bd_priv_t *priv)
+{
+ char *path = NULL;
+ bd_gfid_t dest = {0, };
+ bd_gfid_t origin = {0, };
+ int ret = 0;
+ runner_t runner = {0, };
+ struct stat stat = {0, };
+
+ uuid_utoa_r (local->dloc->gfid, dest);
+ uuid_utoa_r (local->loc.gfid, origin);
+
+ gf_asprintf (&path, "/dev/%s/%s", priv->vg, dest);
+ if (!path) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "Insufficient memory");
+ return ENOMEM;
+ }
+
+ runinit (&runner);
+ runner_add_args (&runner, LVM_CREATE, NULL);
+ runner_add_args (&runner, "--snapshot", NULL);
+ runner_argprintf (&runner, "/dev/%s/%s", priv->vg, origin);
+ runner_add_args (&runner, "--name", NULL);
+ runner_argprintf (&runner, "%s", dest);
+ if (strcmp (local->bdatt->type, BD_THIN))
+ runner_argprintf (&runner, "-L%ldB", local->size);
+ runner_start (&runner);
+ runner_end (&runner);
+
+ if (sys_lstat (path, &stat) < 0)
+ ret = EIO;
+
+ GF_FREE (path);
+ return ret;
+}
+
+int
+bd_clone (bd_local_t *local, bd_priv_t *priv)
+{
+ int ret = ENOMEM;
+ int fd1 = -1;
+ int fd2 = -1;
+ int i = 0;
+ char *buff = NULL;
+ ssize_t bytes = 0;
+ char *spath = NULL;
+ char *dpath = NULL;
+ struct iovec *vec = NULL;
+ bd_gfid_t source = {0, };
+ bd_gfid_t dest = {0, };
+ void *bufp[IOV_NR] = {0, };
+
+ vec = GF_CALLOC (IOV_NR, sizeof (struct iovec), gf_common_mt_iovec);
+ if (!vec)
+ return ENOMEM;
+
+ for (i = 0; i < IOV_NR; i++) {
+ bufp[i] = page_aligned_alloc (IOV_SIZE, &buff);
+ if (!buff)
+ goto out;
+ vec[i].iov_base = buff;
+ vec[i].iov_len = IOV_SIZE;
+ }
+
+ uuid_utoa_r (local->loc.gfid, source);
+ uuid_utoa_r (local->dloc->gfid, dest);
+
+ gf_asprintf (&spath, "/dev/%s/%s", priv->vg, source);
+ gf_asprintf (&dpath, "/dev/%s/%s", priv->vg, dest);
+ if (!spath || !dpath)
+ goto out;
+
+ ret = bd_create (local->dloc->gfid, local->size,
+ local->bdatt->type, priv);
+ if (ret)
+ goto out;
+
+ fd1 = open (spath, O_RDONLY | O_DIRECT);
+ if (fd1 < 0) {
+ ret = errno;
+ goto out;
+ }
+ fd2 = open (dpath, O_WRONLY | O_DIRECT);
+ if (fd2 < 0) {
+ ret = errno;
+ goto out;
+ }
+
+ while (1) {
+ bytes = sys_readv (fd1, vec, IOV_NR);
+ if (bytes < 0) {
+ ret = errno;
+ gf_log (THIS->name, GF_LOG_WARNING, "read failed: %s",
+ strerror (ret));
+ goto out;
+ }
+ if (!bytes)
+ break;
+ bytes = sys_writev (fd2, vec, IOV_NR);
+ if (bytes < 0) {
+ ret = errno;
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "write failed: %s", strerror (ret));
+ goto out;
+ }
+ }
+ ret = 0;
+
+out:
+ for (i = 0; i < IOV_NR; i++)
+ GF_FREE (bufp[i]);
+ GF_FREE (vec);
+
+ if (fd1 != -1)
+ sys_close (fd1);
+ if (fd2 != -1)
+ sys_close (fd2);
+
+ GF_FREE (spath);
+ GF_FREE (dpath);
+
+ return ret;
+}
+
+/*
+ * Merges snapshot LV to origin LV and returns status
+ */
+int
+bd_merge (bd_priv_t *priv, uuid_t gfid)
+{
+ bd_gfid_t dest = {0, };
+ char *path = NULL;
+ struct stat stat = {0, };
+ runner_t runner = {0, };
+ int ret = 0;
+
+ uuid_utoa_r (gfid, dest);
+ gf_asprintf (&path, "/dev/%s/%s", priv->vg, dest);
+
+ runinit (&runner);
+ runner_add_args (&runner, LVM_CONVERT, NULL);
+ runner_add_args (&runner, "--merge", NULL);
+ runner_argprintf (&runner, "%s", path);
+ runner_start (&runner);
+ runner_end (&runner);
+
+ if (!sys_lstat (path, &stat))
+ ret = EIO;
+
+ GF_FREE (path);
+
+ return ret;
+}
+
+int
+bd_get_origin (bd_priv_t *priv, loc_t *loc, fd_t *fd, dict_t *dict)
+{
+ vg_t brick = NULL;
+ lvm_property_value_t prop = {0, };
+ lv_t lv = NULL;
+ int ret = -1;
+ bd_gfid_t gfid = {0, };
+ inode_t *inode = NULL;
+ char *origin = NULL;
+
+ brick = lvm_vg_open (priv->handle, priv->vg, "w", 0);
+ if (!brick) {
+ gf_log (THIS->name, GF_LOG_CRITICAL, "VG %s is not found",
+ priv->vg);
+ return ENOENT;
+ }
+
+ if (fd)
+ inode = fd->inode;
+ else
+ inode = loc->inode;
+
+ uuid_utoa_r (inode->gfid, gfid);
+ lv = lvm_lv_from_name (brick, gfid);
+ if (!lv) {
+ gf_log (THIS->name, GF_LOG_CRITICAL, "LV %s not found", gfid);
+ ret = ENOENT;
+ goto out;
+ }
+
+ prop = lvm_lv_get_property (lv, "origin");
+ if (!prop.is_valid || !prop.value.string) {
+ ret = ENODATA;
+ goto out;
+ }
+
+ origin = gf_strdup (prop.value.string);
+ ret = dict_set_dynstr (dict, BD_ORIGIN, origin);
+
+out:
+ lvm_vg_close (brick);
+ return ret;
+}
+
+#ifndef BLKZEROOUT
+
+int
+bd_do_manual_zerofill (int fd, off_t offset, off_t len, int o_direct)
+{
+ off_t num_vect = 0;
+ off_t num_loop = 1;
+ int idx = 0;
+ int op_ret = -1;
+ int vect_size = IOV_SIZE;
+ off_t remain = 0;
+ off_t extra = 0;
+ struct iovec *vector = NULL;
+ char *iov_base = NULL;
+ char *alloc_buf = NULL;
+
+ if (len == 0)
+ return 0;
+
+ if (len < IOV_SIZE)
+ vect_size = len;
+
+ num_vect = len / (vect_size);
+ remain = len % vect_size ;
+
+ if (num_vect > MAX_NO_VECT) {
+ extra = num_vect % MAX_NO_VECT;
+ num_loop = num_vect / MAX_NO_VECT;
+ num_vect = MAX_NO_VECT;
+ }
+
+ vector = GF_CALLOC (num_vect, sizeof(struct iovec),
+ gf_common_mt_iovec);
+ if (!vector)
+ return -1;
+
+ if (o_direct) {
+ alloc_buf = page_aligned_alloc (vect_size, &iov_base);
+ if (!alloc_buf) {
+ gf_log ("bd_do_manual_zerofill", GF_LOG_DEBUG,
+ "memory alloc failed, vect_size %d: %s",
+ vect_size, strerror (errno));
+ GF_FREE (vector);
+ return -1;
+ }
+ } else {
+ iov_base = GF_CALLOC (vect_size, sizeof(char),
+ gf_common_mt_char);
+ if (!iov_base) {
+ GF_FREE (vector);
+ return -1;
+ }
+ }
+
+ for (idx = 0; idx < num_vect; idx++) {
+ vector[idx].iov_base = iov_base;
+ vector[idx].iov_len = vect_size;
+ }
+
+ if (sys_lseek (fd, offset, SEEK_SET) < 0) {
+ op_ret = -1;
+ goto err;
+ }
+
+ for (idx = 0; idx < num_loop; idx++) {
+ op_ret = sys_writev (fd, vector, num_vect);
+ if (op_ret < 0)
+ goto err;
+ }
+ if (extra) {
+ op_ret = sys_writev (fd, vector, extra);
+ if (op_ret < 0)
+ goto err;
+ }
+ if (remain) {
+ vector[0].iov_len = remain;
+ op_ret = sys_writev (fd, vector , 1);
+ if (op_ret < 0)
+ goto err;
+ }
+ op_ret = 0;
+err:
+ if (o_direct)
+ GF_FREE (alloc_buf);
+ else
+ GF_FREE (iov_base);
+ GF_FREE (vector);
+ return op_ret;
+}
+
+#else
+
+/*
+ * Issue Linux ZEROOUT ioctl to write '0' to a scsi device at given offset
+ * and number of bytes. Each SCSI device's maximum write same bytes are exported
+ * in sysfs file. Sending ioctl request greater than this bytes results in slow
+ * performance. Read this file to get the maximum bytes and break down single
+ * ZEROOUT request into multiple ZEROOUT request not exceeding maximum bytes.
+ * From VG & LV name of device mapper identified and sysfs file read.
+ * /sys/block/<block-device>/queue/write_same_max_bytes
+ */
+int
+bd_do_ioctl_zerofill (bd_priv_t *priv, bd_attr_t *bdatt, int fd, char *vg,
+ off_t offset, off_t len)
+{
+ char *dm = NULL;
+ char dmname[4096] = {0, };
+ char lvname[4096] = {0, };
+ char sysfs[4096] = {0, };
+ bd_gfid_t uuid = {0, };
+ char *p = NULL;
+ off_t max_bytes = 0;
+ int sysfd = -1;
+ uint64_t param[2] = {0, 0};
+ off_t nr_loop = 0;
+ char buff[16] = {0, };
+
+ uuid_utoa_r (bdatt->iatt.ia_gfid, uuid);
+ sprintf (lvname, "/dev/%s/%s", vg, uuid);
+
+ sys_readlink (lvname, dmname, sizeof (dmname) - 1);
+
+ p = strrchr (dmname, '/');
+ if (p)
+ dm = p + 1;
+ else
+ dm = dmname;
+
+ sprintf(sysfs, "/sys/block/%s/queue/write_same_max_bytes", dm);
+ sysfd = open (sysfs, O_RDONLY);
+ if (sysfd < 0) {
+ gf_log ("bd_do_ioctl_zerofill", GF_LOG_DEBUG,
+ "sysfs file %s does not exist", lvname);
+ goto skip;
+ }
+
+ sys_read (sysfd, buff, sizeof (buff));
+ sys_close (sysfd);
+
+ max_bytes = atoll (buff);
+
+skip:
+ /*
+ * If requested len is less than write_same_max_bytes,
+ * issue single ioctl to zeroout. Otherwise split the ioctls
+ */
+ if (!max_bytes || len <= max_bytes) {
+ param[0] = offset;
+ param[1] = len;
+
+ if (ioctl (fd, BLKZEROOUT, param) < 0)
+ return errno;
+ return 0;
+ }
+
+ /* Split ioctls to max write_same_max_bytes */
+ nr_loop = len / max_bytes;
+ for (; nr_loop; nr_loop--) {
+ param[0] = offset;
+ param[1] = max_bytes;
+
+ if (ioctl (fd, BLKZEROOUT, param) < 0)
+ return errno;
+
+ offset += max_bytes;
+ }
+
+ if (!(len % max_bytes))
+ return 0;
+
+ param[0] = offset;
+ param[1] = len % max_bytes;
+
+ if (ioctl (fd, BLKZEROOUT, param) < 0)
+ return errno;
+
+ return 0;
+}
+#endif
+
+int
+bd_do_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, size_t len, struct iatt *prebuf,
+ struct iatt *postbuf)
+{
+ int ret = -1;
+ bd_fd_t *bd_fd = NULL;
+ bd_priv_t *priv = this->private;
+ bd_attr_t *bdatt = NULL;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
+ VALIDATE_OR_GOTO (priv, out);
+
+ ret = bd_fd_ctx_get (this, fd, &bd_fd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "bd_fd is NULL from fd=%p", fd);
+ goto out;
+ }
+
+ bd_inode_ctx_get (fd->inode, this, &bdatt);
+#ifndef BLKZEROOUT
+ ret = bd_do_manual_zerofill(bd_fd->fd, offset, len,
+ bd_fd->flag & O_DIRECT);
+#else
+ ret = bd_do_ioctl_zerofill(priv, bdatt, bd_fd->fd, priv->vg, offset,
+ len);
+#endif
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "zerofill failed on fd %d length %zu %s",
+ bd_fd->fd, len, strerror (ret));
+ goto out;
+ }
+
+ if (bd_fd->flag & (O_SYNC|O_DSYNC)) {
+ ret = sys_fsync (bd_fd->fd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "fsync() in writev on fd %d failed: %s",
+ bd_fd->fd, strerror (errno));
+ return errno;
+ }
+ }
+
+ memcpy (prebuf, &bdatt->iatt, sizeof (struct iatt));
+ bd_update_amtime (&bdatt->iatt, GF_SET_ATTR_MTIME);
+ memcpy (postbuf, &bdatt->iatt, sizeof (struct iatt));
+
+out:
+
+ return ret;
+}
diff --git a/xlators/storage/bd/src/bd-mem-types.h b/xlators/storage/bd/src/bd-mem-types.h
new file mode 100644
index 00000000000..58b44834247
--- /dev/null
+++ b/xlators/storage/bd/src/bd-mem-types.h
@@ -0,0 +1,27 @@
+/*
+ Copyright (c) 2008-2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef __BD_MEM_TYPES_H__
+#define __BD_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_bd_mem_types_ {
+ gf_bd_private = gf_common_mt_end + 1,
+ gf_bd_attr,
+ gf_bd_fd,
+ gf_bd_loc_t,
+ gf_bd_int32_t,
+ gf_bd_aio_cb,
+ gf_bd_mt_end
+};
+
+#endif
diff --git a/xlators/storage/bd/src/bd.c b/xlators/storage/bd/src/bd.c
new file mode 100644
index 00000000000..6c1d2a29308
--- /dev/null
+++ b/xlators/storage/bd/src/bd.c
@@ -0,0 +1,2448 @@
+/*
+ BD translator V2 - Exports Block devices on server side as regular
+ files to client
+
+ Now only exporting Logical volumes supported.
+
+ Copyright IBM, Corp. 2013
+
+ This file is part of GlusterFS.
+
+ Author:
+ M. Mohan Kumar <mohan@in.ibm.com>
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <lvm2app.h>
+#include <openssl/md5.h>
+#include <time.h>
+#include <linux/fs.h>
+#include <sys/ioctl.h>
+#ifdef HAVE_LIBAIO
+#include <libaio.h>
+#endif
+
+#include "bd.h"
+#include "bd-aio.h"
+#include "bd-mem-types.h"
+#include "defaults.h"
+#include "glusterfs3-xdr.h"
+#include "run.h"
+#include "protocol-common.h"
+#include "checksum.h"
+#include "syscall.h"
+#include "lvm-defaults.h"
+
+/*
+ * Call back function for setxattr and removexattr.
+ * does not do anything. FIXME: How to handle remove/setxattr failure
+ */
+int
+bd_null_rmsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ STACK_DESTROY (frame->root);
+ return 0;
+}
+
+/*
+ * returns 0 if a file is mapped to BD or not.
+ */
+int
+bd_get_bd_info (call_frame_t *frame, xlator_t *this, dict_t *xattr, uuid_t gfid,
+ char **type, uint64_t *size)
+{
+ char *bd_xattr = NULL;
+ char *bd = NULL;
+ int ret = -1;
+ loc_t loc = {0, };
+ dict_t *dict = NULL;
+ char *p = NULL;
+ call_frame_t *bd_frame = NULL;
+
+ if (!xattr)
+ return 1;
+
+ if (dict_get_str (xattr, BD_XATTR, &p))
+ return 1;
+
+ bd_xattr = gf_strdup (p);
+
+ memcpy (loc.gfid, gfid, sizeof (uuid_t));
+
+ bd_frame = copy_frame (frame);
+ BD_VALIDATE_MEM_ALLOC (bd_frame, ret, out);
+
+ ret = bd_validate_bd_xattr (this, bd_xattr, type, size, gfid);
+ if (ret < 0) {/* LV does not exist */
+ STACK_WIND (bd_frame, bd_null_rmsetxattr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->removexattr, &loc,
+ BD_XATTR, NULL);
+
+ gf_log (this->name, GF_LOG_WARNING,
+ "Mapped LV not available for posix file <gfid:%s>, "
+ "deleting mapping", uuid_utoa (gfid));
+ } else if (ret == 1) {
+ /* BD_XATTR size and LV size mismatch. Update BD_XATTR */
+ gf_asprintf (&bd, "%s:%ld", *type, *size);
+
+ dict = dict_new ();
+ BD_VALIDATE_MEM_ALLOC (dict, ret, out);
+
+ ret = dict_set_dynstr (dict, BD_XATTR, bd);
+ if (ret)
+ goto out;
+
+ STACK_WIND (bd_frame, bd_null_rmsetxattr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setxattr, &loc, dict, 0,
+ NULL);
+ }
+
+out:
+ dict_del (xattr, BD_XATTR);
+ GF_FREE (bd_xattr);
+ GF_FREE (bd);
+ return ret;
+}
+
+/*
+ * bd_lookup_cbk: Call back from posix_lookup.
+ */
+int32_t
+bd_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode, struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ int ret = -1;
+ bd_attr_t *bdatt = NULL;
+ uint64_t size = 0;
+ char *type = NULL;
+
+ /* only regular files are part of BD object */
+ if (op_ret < 0 || buf->ia_type != IA_IFREG)
+ goto out;
+
+ /* iatt already cached */
+ if (!bd_inode_ctx_get (inode, this, &bdatt))
+ goto next;
+
+ if (bd_get_bd_info (frame, this, xattr, buf->ia_gfid, &type, &size))
+ goto out;
+
+ /* BD file, update buf */
+ bdatt = GF_CALLOC (1, sizeof (bd_attr_t), gf_bd_attr);
+ if (!bdatt) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ memcpy (&bdatt->iatt, buf, sizeof (struct iatt));
+ bdatt->type = type;
+
+ /* Cache LV size in inode_ctx */
+ ret = bd_inode_ctx_set (inode, this, bdatt);
+ if (ret < 0) {
+ GF_FREE (bdatt);
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ bdatt->iatt.ia_size = size;
+ bdatt->iatt.ia_blocks = size / 512;
+
+next:
+ dict_del (xattr, GF_CONTENT_KEY);
+ memcpy (buf, &bdatt->iatt, sizeof (struct iatt));
+
+out:
+ BD_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf,
+ xattr, postparent);
+ return 0;
+}
+
+/*
+ * bd_lookup: Issues posix_lookup to find out if file is mapped to BD
+ * bd_lookup -> posix_lookup -> bd_lookup_cbk
+*/
+int32_t
+bd_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
+{
+ dict_t *bd_xattr = NULL;
+ bd_attr_t *bdatt = NULL;
+ int op_errno = EINVAL;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (loc, out);
+ VALIDATE_OR_GOTO (loc->path, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ if (bd_inode_ctx_get (loc->inode, this, &bdatt) < 0) {
+ if (!xattr_req) {
+ bd_xattr = dict_new ();
+ BD_VALIDATE_MEM_ALLOC (bd_xattr, op_errno, out);
+ xattr_req = bd_xattr;
+ }
+ if (dict_set_int8 (xattr_req, BD_XATTR, 1) < 0)
+ goto out;
+ }
+
+ STACK_WIND (frame, bd_lookup_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->lookup, loc, xattr_req);
+
+ if (bd_xattr)
+ dict_unref (bd_xattr);
+ return 0;
+out:
+ BD_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int
+bd_forget (xlator_t *this, inode_t *inode)
+{
+ int ret = -1;
+ uint64_t ctx = 0;
+ bd_attr_t *bdatt = NULL;
+
+ ret = bd_inode_ctx_get (inode, this, &bdatt);
+ if (!ret) {
+ inode_ctx_del (inode, this, &ctx);
+ GF_FREE (bdatt);
+ }
+ return 0;
+}
+
+int
+bd_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, gf_dirent_t *entries, dict_t *xdata)
+{
+ gf_dirent_t *entry = NULL;
+ uint64_t size = 0;
+ char *type = NULL;
+
+ if (op_ret < 0)
+ goto out;
+
+ list_for_each_entry (entry, &entries->list, list) {
+ if (entry->d_type != DT_REG)
+ continue;
+ if (!bd_get_bd_info (frame, this, entry->dict,
+ entry->d_stat.ia_gfid, &type, &size)) {
+ entry->d_stat.ia_size = size;
+ entry->d_stat.ia_blocks = size / 512;
+ GF_FREE (type);
+ }
+ }
+
+out:
+ BD_STACK_UNWIND (readdirp, frame, op_ret, op_errno, entries, xdata);
+ return 0;
+}
+
+/*
+ * bd_readdirp: In bd_readdirp_cbk if the file and BD_XATTR_SIZE is set
+ * ia_size is updated with the LV(BD_XATTR_SIZE) size
+ */
+int32_t
+bd_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *dict)
+{
+ int op_errno = EINVAL;
+ bd_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ if (!dict) {
+ local = bd_local_init (frame, this);
+ BD_VALIDATE_MEM_ALLOC (local, op_errno, out);
+ local->dict = dict_new ();
+ BD_VALIDATE_MEM_ALLOC (local->dict, op_errno, out);
+ dict = local->dict;
+ }
+
+ if (dict_set_int8 (dict, BD_XATTR, 0)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set key %s", BD_XATTR);
+ goto out;
+ }
+
+ STACK_WIND (frame, bd_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, off, dict);
+
+ return 0;
+out:
+ BD_STACK_UNWIND (readdirp, frame, -1, op_errno, NULL, dict);
+ return 0;
+}
+
+int
+bd_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *buf, dict_t *xdata)
+{
+ bd_local_t *local = frame->local;
+ bd_attr_t *bdatt = NULL;
+
+ /* only regular files are part of BD object */
+ if (op_ret < 0 || buf->ia_type != IA_IFREG)
+ goto out;
+
+ BD_VALIDATE_LOCAL_OR_GOTO (local, op_errno, out);
+
+ /* update buf with LV size */
+ if (!bd_inode_ctx_get (local->inode, this, &bdatt))
+ memcpy (buf, bdatt, sizeof (struct iatt));
+
+out:
+ BD_STACK_UNWIND (stat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int
+bd_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ int op_errno = EINVAL;
+ bd_local_t *local = NULL;
+ bd_attr_t *bdatt = NULL;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (loc, out);
+ VALIDATE_OR_GOTO (loc->path, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ if (!bd_inode_ctx_get (loc->inode, this, &bdatt)) {
+ BD_STACK_UNWIND (stat, frame, 0, 0, &bdatt->iatt, xdata);
+ return 0;
+ }
+
+ local = bd_local_init (frame, this);
+ BD_VALIDATE_MEM_ALLOC (local, op_errno, out);
+ local->inode = inode_ref (loc->inode);
+
+ STACK_WIND(frame, bd_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
+out:
+ BD_STACK_UNWIND (stat, frame, -1, op_errno, NULL, xdata);
+ return 0;
+}
+
+int
+bd_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct statvfs *buff, dict_t *xdata)
+{
+ uint64_t size = 0;
+ uint64_t fr_size = 0;
+ bd_priv_t *priv = NULL;
+ vg_t vg = NULL;
+
+ if (op_ret < 0)
+ goto out;
+
+ priv = this->private;
+
+ vg = lvm_vg_open (priv->handle, priv->vg, "r", 0);
+ if (!vg) {
+ gf_log (this->name, GF_LOG_WARNING, "opening VG %s failed",
+ priv->vg);
+ op_ret = -1;
+ op_errno = EAGAIN;
+ goto out;
+ }
+ size = lvm_vg_get_size (vg);
+ fr_size = lvm_vg_get_free_size (vg);
+ lvm_vg_close (vg);
+
+ buff->f_blocks += size / buff->f_frsize;
+ buff->f_bfree += fr_size / buff->f_frsize;
+ buff->f_bavail += fr_size / buff->f_frsize;
+
+out:
+ BD_STACK_UNWIND (statfs, frame, op_ret, op_errno, buff, xdata);
+ return 0;
+}
+
+/*
+ * bd_statfs: Mimics statfs by returning used/free extents in the VG
+ */
+int
+bd_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+ VALIDATE_OR_GOTO (loc, out);
+
+ STACK_WIND (frame, bd_statfs_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->statfs, loc, xdata);
+ return 0;
+out:
+ BD_STACK_UNWIND (statfs, frame, -1, EINVAL, NULL, NULL);
+ return 0;
+}
+
+int
+bd_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *buf, dict_t *xdata)
+{
+ bd_attr_t *bdatt = NULL;
+ bd_local_t *local = frame->local;
+
+ /* only regular files are part of BD object */
+ if (op_ret < 0 || buf->ia_type != IA_IFREG)
+ goto out;
+
+ BD_VALIDATE_LOCAL_OR_GOTO (local, op_errno, out);
+
+ /* update buf with LV size */
+ if (!bd_inode_ctx_get (local->inode, this, &bdatt))
+ memcpy (buf, &bdatt->iatt, sizeof (struct iatt));
+
+out:
+ BD_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int
+bd_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ int op_errno = EINVAL;
+ bd_local_t *local = NULL;
+ bd_attr_t *bdatt = NULL;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ /* if its already cached return it */
+ if (!bd_inode_ctx_get (fd->inode, this, &bdatt)) {
+ BD_STACK_UNWIND (fstat, frame, 0, 0, &bdatt->iatt, xdata);
+ return 0;
+ }
+
+ local = bd_local_init (frame, this);
+ BD_VALIDATE_MEM_ALLOC (local, op_errno, out);
+
+ local->inode = inode_ref (fd->inode);
+
+ STACK_WIND (frame, bd_fstat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+
+ return 0;
+out:
+ BD_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, xdata);
+ return 0;
+}
+
+/*
+ * bd_readv: If posix file, invokes posix_readv otherwise reads from the BD
+ * file
+ */
+int
+bd_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ int ret = -1;
+ int _fd = -1;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ bd_fd_t *bd_fd = NULL;
+ struct iovec vec = {0, };
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ uint64_t bd_size = 0;
+ bd_attr_t *bdatt = NULL;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ ret = bd_fd_ctx_get (this, fd, &bd_fd);
+ if (ret < 0 || !bd_fd) {
+ STACK_WIND (frame, default_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv,
+ fd, size, offset, flags, xdata);
+ return 0;
+ }
+ if (!size) {
+ op_errno = EINVAL;
+ gf_log (this->name, GF_LOG_WARNING, "size=%"GF_PRI_SIZET, size);
+ goto out;
+ }
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool, size);
+ if (!iobuf) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ _fd = bd_fd->fd;
+ op_ret = sys_pread (_fd, iobuf->ptr, size, offset);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "read failed on fd=%p: %s", fd,
+ strerror (op_errno));
+ goto out;
+ }
+
+ vec.iov_base = iobuf->ptr;
+ vec.iov_len = op_ret;
+
+ iobref = iobref_new ();
+ iobref_add (iobref, iobuf);
+
+ if (bd_inode_ctx_get (fd->inode, this, &bdatt)) {
+ op_errno = EINVAL;
+ op_ret = -1;
+ goto out;
+ }
+ bd_size = bdatt->iatt.ia_size;
+ if (!bd_size || (offset + vec.iov_len) >= bd_size)
+ op_errno = ENOENT;
+
+ op_ret = vec.iov_len;
+ bd_update_amtime (&bdatt->iatt, GF_SET_ATTR_ATIME);
+
+out:
+ BD_STACK_UNWIND (readv, frame, op_ret, op_errno,
+ &vec, 1, &bdatt->iatt, iobref, NULL);
+
+ if (iobref)
+ iobref_unref (iobref);
+ if (iobuf)
+ iobuf_unref (iobuf);
+
+ return 0;
+}
+
+#ifdef BLKDISCARD
+/*
+ * bd_discard: Sends BLKDISCARD ioctl to the block device
+ */
+int
+bd_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ int ret = -1;
+ int op_errno = EINVAL;
+ bd_fd_t *bd_fd = NULL;
+ uint64_t param[2] = {0, };
+ bd_attr_t *bdatt = NULL;
+ struct iatt prebuf = {0, };
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+ VALIDATE_OR_GOTO (fd, out);
+
+ /* posix */
+ if (bd_inode_ctx_get (fd->inode, this, &bdatt)) {
+ STACK_WIND (frame, default_discard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard,
+ fd, offset, len, xdata);
+ return 0;
+ }
+
+ ret = bd_fd_ctx_get (this, fd, &bd_fd);
+ if (ret < 0 || !bd_fd) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ param[0] = offset;
+ param[1] = len;
+ ret = ioctl (bd_fd->fd, BLKDISCARD, param);
+ if (ret < 0) {
+ if (errno == ENOTTY)
+ op_errno = ENOSYS;
+ else
+ op_errno = errno;
+ goto out;
+ }
+ memcpy (&prebuf, &bdatt->iatt, sizeof (prebuf));
+ bd_update_amtime (&bdatt->iatt, GF_SET_ATTR_MTIME);
+
+ BD_STACK_UNWIND (discard, frame, ret, op_errno, &prebuf,
+ &bdatt->iatt, xdata);
+ return 0;
+
+out:
+ BD_STACK_UNWIND (discard, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+#else
+
+int
+bd_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ BD_STACK_UNWIND (discard, frame, -1, ENOSYS, NULL, NULL, NULL);
+ return 0;
+}
+#endif
+
+/*
+ * Call back from posix_open for opening the backing posix file
+ * If it failed, close BD fd
+ */
+int
+bd_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
+{
+ bd_fd_t *bd_fd = NULL;
+ bd_attr_t *bdatt = NULL;
+
+ if (!op_ret)
+ goto out;
+
+ bd_inode_ctx_get (fd->inode, this, &bdatt);
+ if (!bdatt) /* posix file */
+ goto out;
+
+ /* posix open failed */
+ if (bd_fd_ctx_get (this, fd, &bd_fd) < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "bd_fd is NULL from fd=%p", fd);
+ goto out;
+ }
+ sys_close (bd_fd->fd);
+ GF_FREE (bd_fd);
+
+out:
+ BD_STACK_UNWIND (open, frame, op_ret, op_errno, fd, NULL);
+
+ return 0;
+}
+
+/*
+ * bd_open: Opens BD file if given posix file is mapped to BD. Also opens
+ * posix file.
+ * fd contains both posix and BD fd
+ */
+int32_t
+bd_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ int32_t ret = EINVAL;
+ bd_fd_t *bd_fd = NULL;
+ bd_attr_t *bdatt = NULL;
+ bd_gfid_t gfid = {0, };
+ char *devpath = NULL;
+ bd_priv_t *priv = this->private;
+ int _fd = -1;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+ VALIDATE_OR_GOTO (loc, out);
+ VALIDATE_OR_GOTO (fd, out);
+
+ /* not bd file */
+ if (fd->inode->ia_type != IA_IFREG ||
+ bd_inode_ctx_get (fd->inode, this, &bdatt))
+ goto posix;
+
+ uuid_utoa_r (fd->inode->gfid, gfid);
+ gf_asprintf (&devpath, "/dev/%s/%s", priv->vg, gfid);
+ BD_VALIDATE_MEM_ALLOC (devpath, ret, out);
+
+ _fd = open (devpath, flags | O_LARGEFILE, 0);
+ if (_fd < 0) {
+ ret = errno;
+ gf_log (this->name, GF_LOG_ERROR, "open on %s: %s", devpath,
+ strerror (ret));
+ goto out;
+ }
+ bd_fd = GF_CALLOC (1, sizeof(bd_fd_t), gf_bd_fd);
+ BD_VALIDATE_MEM_ALLOC (bd_fd, ret, out);
+
+ bd_fd->fd = _fd;
+ bd_fd->flag = flags | O_LARGEFILE;
+
+ if (fd_ctx_set (fd, this, (uint64_t)(long)bd_fd) < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set the fd context fd=%p", fd);
+ goto out;
+ }
+
+ ret = 0;
+
+posix:
+
+ /* open posix equivalant of this file, fd needed for fd related
+ operations like fsetxattr, ftruncate etc */
+ STACK_WIND (frame, bd_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+
+ return 0;
+out:
+ BD_STACK_UNWIND (open, frame, -1, ret, fd, NULL);
+
+ GF_FREE (devpath);
+ if (ret) {
+ if (_fd >= 0)
+ sys_close (_fd);
+ GF_FREE (bd_fd);
+ }
+
+ return 0;
+}
+
+/*
+ * call back from posix_setattr after updating iatt to posix file.
+ */
+int
+bd_fsync_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ bd_local_t *local = frame->local;
+ bd_attr_t *bdatt = local->bdatt;
+
+ BD_STACK_UNWIND (fsync, frame, op_ret, op_errno, &bdatt->iatt,
+ &bdatt->iatt, NULL);
+ return 0;
+}
+
+int
+bd_do_fsync (int fd, int datasync)
+{
+ int op_errno = 0;
+
+ if (datasync) {
+ if (sys_fdatasync (fd)) {
+ op_errno = errno;
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "fdatasync on fd=%d failed: %s",
+ fd, strerror (errno));
+ }
+
+ } else
+
+ {
+ if (sys_fsync (fd)) {
+ op_errno = errno;
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "fsync on fd=%d failed: %s",
+ fd, strerror (op_errno));
+ }
+ }
+
+ return op_errno;
+}
+
+/*
+ * bd_fsync: Syncs if BD fd, forwards the request to posix
+ * fsync -> posix_setattr -> posix_fsync
+*/
+int32_t
+bd_fsync (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t datasync, dict_t *xdata)
+{
+ int ret = -1;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ bd_fd_t *bd_fd = NULL;
+ bd_priv_t *priv = NULL;
+ bd_attr_t *bdatt = NULL;
+ bd_local_t *local = NULL;
+ int valid = GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
+ struct iatt prebuf = {0, };
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ ret = bd_inode_ctx_get (fd->inode, this, &bdatt);
+ ret = bd_fd_ctx_get (this, fd, &bd_fd);
+ if (ret < 0 || !bd_fd || !bdatt) {
+ STACK_WIND (frame, default_fsync_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fsync, fd, datasync,
+ xdata);
+ return 0;
+ }
+
+ memcpy (&prebuf, &bdatt->iatt, sizeof (struct iatt));
+
+ op_errno = bd_do_fsync (bd_fd->fd, datasync);
+ if (op_errno)
+ goto out;
+
+ /* For BD, Update the a|mtime during full fsync only */
+ if (!datasync) {
+ local = bd_local_init (frame, this);
+ /* In case of mem failure, should posix flush called ? */
+ BD_VALIDATE_MEM_ALLOC (local, op_errno, out);
+
+ local->bdatt = GF_CALLOC (1, sizeof (bd_attr_t), gf_bd_attr);
+ BD_VALIDATE_MEM_ALLOC (local->bdatt, op_errno, out);
+
+ local->bdatt->type = gf_strdup (bdatt->type);
+ memcpy (&local->bdatt->iatt, &bdatt->iatt, sizeof (struct iatt));
+ bd_update_amtime (&local->bdatt->iatt, valid);
+ gf_uuid_copy (local->loc.gfid, fd->inode->gfid);
+ STACK_WIND (frame, bd_fsync_setattr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setattr, &local->loc,
+ &local->bdatt->iatt,
+ valid, NULL);
+ return 0;
+ }
+
+out:
+ BD_STACK_UNWIND (fsync, frame, op_ret, op_errno, &prebuf,
+ &bdatt->iatt, NULL);
+ return 0;
+}
+
+int
+bd_flush_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ BD_STACK_UNWIND (flush, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+bd_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ int ret = -1;
+ bd_fd_t *bd_fd = NULL;
+ bd_priv_t *priv = NULL;
+ bd_attr_t *bdatt = NULL;
+ int valid = GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
+ bd_local_t *local = NULL;
+ int op_errno = EINVAL;
+ loc_t loc = {0, };
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ ret = bd_inode_ctx_get (fd->inode, this, &bdatt);
+ if (!bdatt)
+ goto out;
+
+ ret = bd_fd_ctx_get (this, fd, &bd_fd);
+ if (ret < 0 || !bd_fd || !bdatt) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "bdfd/bdatt is NULL from fd=%p", fd);
+ goto out;
+ }
+
+ local = bd_local_init (frame, this);
+ BD_VALIDATE_MEM_ALLOC (local, op_errno, out);
+
+ local->fd = fd_ref (fd);
+ gf_uuid_copy (loc.gfid, bdatt->iatt.ia_gfid);
+
+ /* Update the a|mtime during flush */
+ STACK_WIND (frame, bd_flush_setattr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setattr, &loc, &bdatt->iatt,
+ valid, NULL);
+
+ return 0;
+
+out:
+ STACK_WIND (frame, default_flush_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->flush, fd, xdata);
+
+ return 0;
+}
+
+int32_t
+bd_release (xlator_t *this, fd_t *fd)
+{
+ int ret = -1;
+ bd_fd_t *bd_fd = NULL;
+ uint64_t tmp_bfd = 0;
+ bd_attr_t *bdatt = NULL;
+ bd_priv_t *priv = this->private;
+
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
+ VALIDATE_OR_GOTO (priv, out);
+
+ ret = bd_inode_ctx_get (fd->inode, this, &bdatt);
+ if (ret || !bdatt) /* posix file */
+ goto out;
+
+ /* FIXME: Update amtime during release */
+
+ ret = fd_ctx_del (fd, this, &tmp_bfd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "bfd is NULL from fd=%p", fd);
+ goto out;
+ }
+ bd_fd = (bd_fd_t *)(long)tmp_bfd;
+
+ sys_close (bd_fd->fd);
+ GF_FREE (bd_fd);
+out:
+ return 0;
+}
+
+/*
+ * Call back for removexattr after removing BD_XATTR incase of
+ * bd create failure
+ */
+int
+bd_setx_rm_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ bd_local_t *local = frame->local;
+
+ if (local->fd)
+ BD_STACK_UNWIND (setxattr, frame, -1, EIO, xdata);
+ else
+ BD_STACK_UNWIND (setxattr, frame, -1, EIO, xdata);
+ return 0;
+
+}
+
+/*
+ * Call back after setting BD_XATTR. Creates BD. If BD creation is a failure
+ * invokes posix_removexattr to remove created BD_XATTR
+ */
+int
+bd_setx_setx_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ bd_local_t *local = frame->local;
+ bd_attr_t *bdatt = NULL;
+
+ if (op_ret < 0)
+ goto next;
+
+ /* Create LV */
+ op_errno = bd_create (local->inode->gfid, local->bdatt->iatt.ia_size,
+ local->bdatt->type, this->private);
+ if (!op_errno)
+ goto out;
+
+ /* LV creation failed, remove BD_XATTR */
+ if (local->fd)
+ STACK_WIND (frame, bd_setx_rm_xattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr,
+ local->fd, BD_XATTR, NULL);
+ else
+ STACK_WIND (frame, bd_setx_rm_xattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr,
+ &local->loc, BD_XATTR, NULL);
+
+ return 0;
+out:
+
+ bdatt = GF_CALLOC (1, sizeof (bd_attr_t), gf_bd_attr);
+ if (!bdatt) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto next;
+ }
+
+ memcpy (&bdatt->iatt, &local->bdatt->iatt, sizeof (struct iatt));
+ bdatt->type = gf_strdup (local->bdatt->type);
+
+ bd_inode_ctx_set (local->inode, THIS, bdatt);
+
+next:
+ if (local->fd)
+ BD_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
+ else
+ BD_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+
+}
+
+/*
+ * Call back from posix_stat
+ */
+int
+bd_setx_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *iatt,
+ dict_t *xdata)
+{
+ char *param = NULL;
+ char *type = NULL;
+ char *s_size = NULL;
+ char *p = NULL;
+ char *copy = NULL;
+ bd_local_t *local = frame->local;
+ bd_priv_t *priv = this->private;
+ char *bd = NULL;
+ uint64_t size = 0;
+
+ if (op_ret < 0)
+ goto out;
+
+ if (!IA_ISREG (iatt->ia_type)) {
+ op_errno = EOPNOTSUPP;
+ goto out;
+ }
+
+ param = copy = GF_CALLOC (1, local->data->len + 1, gf_common_mt_char);
+ BD_VALIDATE_MEM_ALLOC (param, op_errno, out);
+
+ strncpy (param, local->data->data, local->data->len);
+
+ type = strtok_r (param, ":", &p);
+ if (!type) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ if (strcmp (type, BD_LV) && strcmp (type, BD_THIN)) {
+ gf_log (this->name, GF_LOG_WARNING, "Invalid bd type %s given",
+ type);
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ if (!strcmp (type, BD_THIN) && !(priv->caps & BD_CAPS_THIN)) {
+ gf_log (this->name, GF_LOG_WARNING, "THIN lv not supported by "
+ "this volume");
+ op_errno = EOPNOTSUPP;
+ goto out;
+ }
+
+ s_size = strtok_r (NULL, ":", &p);
+
+ /* If size not specified get default size */
+ if (!s_size)
+ size = bd_get_default_extent (priv);
+ else
+ gf_string2bytesize (s_size, &size);
+
+ gf_asprintf (&bd, "%s:%ld", type, size);
+ BD_VALIDATE_MEM_ALLOC (bd, op_errno, out);
+
+ local->dict = dict_new ();
+ BD_VALIDATE_MEM_ALLOC (local->dict, op_errno, out);
+
+ local->bdatt = GF_CALLOC (1, sizeof (bd_attr_t), gf_bd_attr);
+ BD_VALIDATE_MEM_ALLOC (local->bdatt, op_errno, out);
+
+ if (dict_set_dynstr (local->dict, BD_XATTR, bd) < 0) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ local->bdatt->type = gf_strdup (type);
+ memcpy (&local->bdatt->iatt, iatt, sizeof (struct iatt));
+ local->bdatt->iatt.ia_size = size;
+
+ if (local->fd)
+ STACK_WIND (frame, bd_setx_setx_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ local->fd, local->dict, 0, NULL);
+ else
+ STACK_WIND (frame, bd_setx_setx_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ &local->loc, local->dict, 0, NULL);
+
+ return 0;
+
+out:
+ if (local->fd)
+ BD_STACK_UNWIND (fsetxattr, frame, -1, op_errno, xdata);
+ else
+ BD_STACK_UNWIND (setxattr, frame, -1, op_errno, xdata);
+
+ GF_FREE (bd);
+ GF_FREE (copy);
+ return 0;
+}
+
+int
+bd_offload_rm_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ bd_local_t *local = frame->local;
+
+ if (local->fd)
+ BD_STACK_UNWIND (fsetxattr, frame, -1, EIO, NULL);
+ else
+ BD_STACK_UNWIND (setxattr, frame, -1, EIO, NULL);
+
+ return 0;
+}
+
+int
+bd_offload_setx_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ bd_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto out;
+
+ if (local->offload == BD_OF_SNAPSHOT)
+ op_ret = bd_snapshot_create (frame->local, this->private);
+ else
+ op_ret = bd_clone (frame->local, this->private);
+
+ if (op_ret) {
+ STACK_WIND (frame, bd_offload_rm_xattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr,
+ local->dloc, BD_XATTR, NULL);
+ return 0;
+ }
+
+out:
+ if (local->fd)
+ BD_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, NULL);
+ else
+ BD_STACK_UNWIND (setxattr, frame, op_ret, op_errno, NULL);
+
+ return 0;
+}
+
+int
+bd_offload_getx_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
+{
+ char *bd = NULL;
+ bd_local_t *local = frame->local;
+ char *type = NULL;
+ char *p = NULL;
+
+ if (op_ret < 0)
+ goto out;
+
+ if (dict_get_str (xattr, BD_XATTR, &p)) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ type = gf_strdup (p);
+ BD_VALIDATE_MEM_ALLOC (type, op_errno, out);
+
+ p = strrchr (type, ':');
+ if (!p) {
+ op_errno = EINVAL;
+ gf_log (this->name, GF_LOG_WARNING,
+ "source file xattr %s corrupted?", type);
+ goto out;
+ }
+
+ *p='\0';
+
+ /* For clone size is taken from source LV */
+ if (!local->size) {
+ p++;
+ gf_string2bytesize (p, &local->size);
+ }
+ gf_asprintf (&bd, "%s:%ld", type, local->size);
+ local->bdatt->type = gf_strdup (type);
+ dict_del (local->dict, BD_XATTR);
+ dict_del (local->dict, LINKTO);
+ if (dict_set_dynstr (local->dict, BD_XATTR, bd)) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ STACK_WIND (frame, bd_offload_setx_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ local->dloc, local->dict, 0, NULL);
+
+ return 0;
+
+out:
+ if (local->fd)
+ BD_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL);
+ else
+ BD_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
+
+ GF_FREE (type);
+ GF_FREE (bd);
+
+ return 0;
+}
+
+int
+bd_offload_dest_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *iatt,
+ dict_t *xattr, struct iatt *postparent)
+{
+ bd_local_t *local = frame->local;
+ char *bd = NULL;
+ int ret = -1;
+ char *linkto = NULL;
+
+ if (op_ret < 0 && op_errno != ENODATA) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ if (!IA_ISREG (iatt->ia_type)) {
+ op_errno = EINVAL;
+ gf_log (this->name, GF_LOG_WARNING, "destination gfid is not a "
+ "regular file");
+ goto out;
+ }
+
+ ret = dict_get_str (xattr, LINKTO, &linkto);
+ if (linkto) {
+ op_errno = EINVAL;
+ gf_log (this->name, GF_LOG_WARNING, "destination file not "
+ "present in same brick");
+ goto out;
+ }
+
+ ret = dict_get_str (xattr, BD_XATTR, &bd);
+ if (bd) {
+ op_errno = EEXIST;
+ goto out;
+ }
+
+ local->bdatt = GF_CALLOC (1, sizeof (bd_attr_t), gf_bd_attr);
+ BD_VALIDATE_MEM_ALLOC (local->bdatt, op_errno, out);
+
+ STACK_WIND (frame, bd_offload_getx_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr,
+ &local->loc, BD_XATTR, NULL);
+
+ return 0;
+out:
+ if (local->fd)
+ BD_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL);
+ else
+ BD_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+int
+bd_merge_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ /* FIXME: if delete failed, remove xattr */
+
+ BD_STACK_UNWIND (setxattr, frame, op_ret, op_errno, NULL);
+ return 0;
+}
+
+int
+bd_do_merge(call_frame_t *frame, xlator_t *this)
+{
+ bd_local_t *local = frame->local;
+ inode_t *parent = NULL;
+ char *p = NULL;
+ int op_errno = 0;
+
+ op_errno = bd_merge (this->private, local->inode->gfid);
+ if (op_errno)
+ goto out;
+
+ /*
+ * posix_unlink needs loc->pargfid to be valid, but setxattr FOP does
+ * not have loc->pargfid set. Get parent's gfid by getting parents inode
+ */
+ parent = inode_parent (local->inode, NULL, NULL);
+ if (!parent) {
+ /*
+ * FIXME: Snapshot LV already deleted.
+ * remove xattr, instead of returning failure
+ */
+ op_errno = EINVAL;
+ goto out;
+ }
+ gf_uuid_copy (local->loc.pargfid, parent->gfid);
+
+ p = strrchr (local->loc.path, '/');
+ if (p)
+ p++;
+ local->loc.name = p;
+
+ STACK_WIND (frame, bd_merge_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink,
+ &local->loc, 0, NULL);
+
+ return 0;
+out:
+ BD_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL);
+
+ return op_errno;
+}
+
+int
+bd_offload (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ fd_t *fd, bd_offload_t offload)
+{
+ char *param = NULL;
+ char *param_copy = NULL;
+ char *p = NULL;
+ char *size = NULL;
+ char *gfid = NULL;
+ int op_errno = 0;
+ bd_local_t *local = frame->local;
+
+ param = GF_CALLOC (1, local->data->len + 1, gf_common_mt_char);
+ BD_VALIDATE_MEM_ALLOC (param, op_errno, out);
+ param_copy = param;
+
+ local->dict = dict_new ();
+ BD_VALIDATE_MEM_ALLOC (local->dict, op_errno, out);
+
+ local->dloc = GF_CALLOC (1, sizeof (loc_t), gf_bd_loc_t);
+ BD_VALIDATE_MEM_ALLOC (local->dloc, op_errno, out);
+
+ strncpy (param, local->data->data, local->data->len);
+
+ gfid = strtok_r (param, ":", &p);
+ size = strtok_r (NULL, ":", &p);
+ if (size)
+ gf_string2bytesize (size, &local->size);
+ else if (offload != BD_OF_CLONE)
+ local->size = bd_get_default_extent (this->private);
+
+ if (dict_set_int8 (local->dict, BD_XATTR, 1) < 0) {
+ op_errno = EINVAL;
+ goto out;
+ }
+ if (dict_set_int8 (local->dict, LINKTO, 1) < 0) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ gf_uuid_parse (gfid, local->dloc->gfid);
+ local->offload = offload;
+
+ STACK_WIND (frame, bd_offload_dest_lookup_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->lookup, local->dloc,
+ local->dict);
+
+ return 0;
+
+out:
+ if (fd)
+ BD_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL);
+ else
+ BD_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
+
+ GF_FREE (param_copy);
+ return 0;
+}
+
+/*
+ * bd_setxattr: Used to create & map an LV to a posix file using
+ * BD_XATTR xattr
+ * bd_setxattr -> posix_stat -> bd_setx_stat_cbk -> posix_setxattr ->
+ * bd_setx_setx_cbk -> create_lv
+ * if create_lv failed, posix_removexattr -> bd_setx_rm_xattr_cbk
+ */
+int
+bd_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int flags, dict_t *xdata)
+{
+ int op_errno = 0;
+ data_t *data = NULL;
+ bd_local_t *local = NULL;
+ bd_attr_t *bdatt = NULL;
+ bd_offload_t cl_type = BD_OF_NONE;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+
+ if ((data = dict_get (dict, BD_XATTR)))
+ cl_type = BD_OF_NONE;
+ else if ((data = dict_get (dict, BD_CLONE)))
+ cl_type = BD_OF_CLONE;
+ else if ((data = dict_get (dict, BD_SNAPSHOT)))
+ cl_type = BD_OF_SNAPSHOT;
+ else if ((data = dict_get (dict, BD_MERGE)))
+ cl_type = BD_OF_MERGE;
+
+ bd_inode_ctx_get (loc->inode, this, &bdatt);
+ if (!cl_type && !data) {
+ STACK_WIND (frame, default_setxattr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setxattr, loc, dict,
+ flags, xdata);
+ return 0;
+ }
+
+ local = bd_local_init (frame, this);
+ BD_VALIDATE_MEM_ALLOC (local, op_errno, out);
+
+ local->data = data;
+ loc_copy (&local->loc, loc);
+ local->inode = inode_ref (loc->inode);
+
+ if (cl_type) {
+ /* For cloning/snapshot, source file must be mapped to LV */
+ if (!bdatt) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s not mapped to BD", loc->path);
+ op_errno = EINVAL;
+ goto out;
+ }
+ if (cl_type == BD_OF_MERGE)
+ bd_do_merge (frame, this);
+ else
+ bd_offload (frame, this, loc, NULL, cl_type);
+ } else if (data) {
+ if (bdatt) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s already mapped to BD", loc->path);
+ op_errno = EEXIST;
+ goto out;
+ }
+ STACK_WIND (frame, bd_setx_stat_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->stat, loc, xdata);
+ }
+
+ return 0;
+out:
+ if (op_errno)
+ STACK_UNWIND_STRICT (setxattr, frame, -1, op_errno, xdata);
+
+ return 0;
+}
+
+/*
+ * bd_fsetxattr: Used to create/map an LV to a posix file using
+ * BD_XATTR xattr
+ * bd_fsetxattr -> posix_fstat -> bd_setx_stat_cbk -> posix_fsetxattr ->
+ * bd_setx_setx_cbk -> create_lv
+ * if create_lv failed, posix_removexattr -> bd_setx_rm_xattr_cbk
+ * -> bd_fsetxattr_cbk
+ */
+int32_t
+bd_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int flags, dict_t *xdata)
+{
+ int op_errno = 0;
+ data_t *data = NULL;
+ bd_attr_t *bdatt = NULL;
+ bd_local_t *local = NULL;
+ bd_offload_t cl_type = BD_OF_NONE;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+ VALIDATE_OR_GOTO (fd, out);
+
+ bd_inode_ctx_get (fd->inode, this, &bdatt);
+
+ if ((data = dict_get (dict, BD_XATTR)))
+ cl_type = BD_OF_NONE;
+ else if ((data = dict_get (dict, BD_CLONE)))
+ cl_type = BD_OF_CLONE;
+ else if ((data = dict_get (dict, BD_SNAPSHOT)))
+ cl_type = BD_OF_SNAPSHOT;
+ else if ((data = dict_get (dict, BD_MERGE))) {
+ /*
+ * bd_merge is not supported for fsetxattr, because snapshot LV
+ * is opened and it causes problem in snapshot merge
+ */
+ op_errno = EOPNOTSUPP;
+ goto out;
+ }
+
+ bd_inode_ctx_get (fd->inode, this, &bdatt);
+
+ if (!cl_type && !data) {
+ /* non bd file object */
+ STACK_WIND (frame, default_fsetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ fd, dict, flags, xdata);
+ return 0;
+ }
+
+ local = bd_local_init (frame, this);
+ BD_VALIDATE_MEM_ALLOC (local, op_errno, out);
+
+ local->inode = inode_ref (fd->inode);
+ local->fd = fd_ref (fd);
+ local->data = data;
+
+ if (cl_type) {
+ /* For cloning/snapshot, source file must be mapped to LV */
+ if (!bdatt) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "fd %p not mapped to BD", fd);
+ op_errno = EINVAL;
+ goto out;
+
+ }
+ bd_offload (frame, this, NULL, fd, cl_type);
+ } else if (data) {
+ if (bdatt) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "fd %p already mapped to BD", fd);
+ op_errno = EEXIST;
+ goto out;
+ }
+ STACK_WIND(frame, bd_setx_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+ }
+
+ return 0;
+out:
+
+ BD_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+int32_t
+bd_removexattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ if (!strcmp (name, BD_XATTR))
+ goto out;
+
+ STACK_WIND (frame, default_removexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
+ return 0;
+out:
+ BD_STACK_UNWIND (removexattr, frame, -1, ENODATA, NULL);
+ return 0;
+}
+
+int32_t
+bd_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ if (!strcmp (name, BD_XATTR))
+ goto out;
+
+ STACK_WIND (frame, default_removexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
+
+ return 0;
+out:
+ BD_STACK_UNWIND (fremovexattr, frame, -1, ENODATA, NULL);
+ return 0;
+}
+
+int
+bd_trunc_setxattr_setx_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ bd_local_t *local = frame->local;
+
+ if (local->fd)
+ BD_STACK_UNWIND (ftruncate, frame, -1, EIO, NULL, NULL, NULL);
+ else
+ BD_STACK_UNWIND (truncate, frame, -1, EIO, NULL, NULL, NULL);
+
+ return 0;
+}
+
+/*
+ * Call back for setxattr after setting BD_XATTR_SIZE.
+ */
+int
+bd_trunc_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ bd_local_t *local = frame->local;
+ bd_attr_t *bdatt = NULL;
+ struct iatt prebuf = {0, };
+ char *bd = NULL;
+
+ if (op_ret < 0)
+ goto out;
+
+ bd_inode_ctx_get (local->inode, this, &bdatt);
+ if (!bdatt)
+ goto revert_xattr;
+
+ op_errno = bd_resize (this->private, local->inode->gfid,
+ local->bdatt->iatt.ia_size);
+ if (op_errno)
+ goto revert_xattr;
+
+ memcpy (&prebuf, &bdatt->iatt, sizeof (struct iatt));
+ /* LV resized, update new size in the cache */
+ bdatt->iatt.ia_size = local->bdatt->iatt.ia_size;
+
+ if (local->fd)
+ BD_STACK_UNWIND (ftruncate, frame, 0, 0, &prebuf, &bdatt->iatt,
+ NULL);
+ else
+ BD_STACK_UNWIND (truncate, frame, 0, 0, &prebuf, &bdatt->iatt,
+ NULL);
+
+ return 0;
+
+revert_xattr:
+ /* revert setxattr */
+ op_ret = dict_get_str (local->dict, BD_XATTR, &bd);
+ GF_FREE (bd);
+ if (bdatt)
+ gf_asprintf (&bd, "%s:%ld", bdatt->type, bdatt->iatt.ia_size);
+
+ if (local->fd)
+ STACK_WIND (frame, bd_trunc_setxattr_setx_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ local->fd, local->dict, 0, NULL);
+ else
+ STACK_WIND (frame, bd_trunc_setxattr_setx_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ &local->loc, local->dict, 0, NULL);
+
+ return 0;
+out:
+ if (local->fd)
+ BD_STACK_UNWIND (ftruncate, frame, -1, EIO, NULL, NULL, NULL);
+ else
+ BD_STACK_UNWIND (truncate, frame, -1, EIO, NULL, NULL, NULL);
+
+ return 0;
+}
+
+/*
+ * call back from posix_[f]truncate_stat
+ * If offset > LV size, it resizes the LV and calls posix_setxattr
+ * to update new LV size in xattr else calls posix_setattr for updating
+ * the posix file so that truncate fop behaves properly
+ */
+int
+bd_trunc_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *buf, dict_t *xdata)
+{
+ char *bd = NULL;
+ bd_local_t *local = frame->local;
+ bd_attr_t *bdatt = NULL;
+
+ if (op_ret < 0)
+ goto out;
+
+ local->dict = dict_new ();
+ BD_VALIDATE_MEM_ALLOC (local->dict, op_errno, out);
+
+ bd_inode_ctx_get (local->inode, this, &bdatt);
+ if (!bdatt) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ gf_asprintf (&bd, "%s:%ld", bdatt->type, local->bdatt->iatt.ia_size);
+ if (dict_set_dynstr (local->dict, BD_XATTR, bd)) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ if (local->fd)
+ STACK_WIND (frame, bd_trunc_setxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ local->fd, local->dict, 0, NULL);
+ else
+ STACK_WIND (frame, bd_trunc_setxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ &local->loc, local->dict, 0, NULL);
+
+ return 0;
+out:
+ if (local->fd)
+ BD_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL,
+ NULL);
+ else
+ BD_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL,
+ NULL);
+ GF_FREE (bd);
+ return 0;
+}
+
+void
+bd_do_trunc (call_frame_t *frame, xlator_t *this, fd_t *fd, loc_t *loc,
+ off_t offset, bd_attr_t *bdatt)
+{
+ bd_local_t *local = NULL;
+ struct iatt prebuf = {0, };
+ int op_errno = 0;
+ int op_ret = -1;
+
+ /* If requested size is less than LV size, return success */
+ if (offset <= bdatt->iatt.ia_size) {
+ memcpy (&prebuf, &bdatt->iatt, sizeof (struct iatt));
+ bd_update_amtime (&bdatt->iatt, GF_SET_ATTR_MTIME);
+ op_ret = 0;
+ goto out;
+ }
+
+ local = bd_local_init (frame, this);
+ BD_VALIDATE_MEM_ALLOC (local, op_errno, out);
+
+ local->bdatt = GF_CALLOC (1, sizeof (bd_attr_t), gf_bd_attr);
+ BD_VALIDATE_MEM_ALLOC (local->bdatt, op_errno, out);
+
+ if (fd) {
+ local->inode = inode_ref (fd->inode);
+ local->fd = fd_ref (fd);
+ } else {
+ local->inode = inode_ref (loc->inode);
+ loc_copy (&local->loc, loc);
+ }
+
+ local->bdatt->iatt.ia_size =
+ bd_adjust_size (this->private, offset);
+
+ STACK_WIND (frame, bd_trunc_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, NULL);
+
+ return;
+
+out:
+ if (fd)
+ BD_STACK_UNWIND (ftruncate, frame, op_ret, op_errno,
+ &prebuf, &bdatt->iatt, NULL);
+ else
+ BD_STACK_UNWIND (truncate, frame, op_ret, op_errno,
+ &prebuf, &bdatt->iatt, NULL);
+ return;
+}
+
+/*
+ * bd_ftruncate: Resizes a LV if fd belongs to BD.
+ */
+int32_t
+bd_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ int op_errno = 0;
+ bd_attr_t *bdatt = NULL;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
+
+ if (bd_inode_ctx_get (fd->inode, this, &bdatt)) {
+ STACK_WIND (frame, default_ftruncate_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd,
+ offset, xdata);
+ return 0;
+ }
+
+ bd_do_trunc (frame, this, fd, NULL, offset, bdatt);
+ return 0;
+out:
+ BD_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+/*
+ * bd_truncate: Resizes a LV if file maps to LV.
+ */
+int32_t
+bd_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ int op_errno = 0;
+ bd_attr_t *bdatt = NULL;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (loc, out);
+
+ if (bd_inode_ctx_get (loc->inode, this, &bdatt)) {
+ STACK_WIND (frame, default_truncate_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc,
+ offset, xdata);
+ return 0;
+ }
+
+ bd_do_trunc (frame, this, NULL, loc, offset, bdatt);
+ return 0;
+
+out:
+ BD_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+__bd_pwritev (int fd, struct iovec *vector, int count, off_t offset,
+ uint64_t bd_size)
+{
+ int index = 0;
+ int retval = 0;
+ off_t internal_offset = 0;
+
+ if (!vector)
+ return -EFAULT;
+
+ retval = sys_pwritev (fd, vector, count, offset);
+ if (retval == -1) {
+ int64_t off = offset;
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "base %p, length %zd, offset %" PRId64 ", message %s",
+ vector[index].iov_base, vector[index].iov_len,
+ off, strerror (errno));
+ retval = -errno;
+ goto err;
+ }
+/*
+
+
+ internal_offset = offset;
+ for (index = 0; index < count; index++) {
+ if (internal_offset > bd_size) {
+ op_ret = -ENOSPC;
+ goto err;
+ }
+ if (internal_offset + vector[index].iov_len > bd_size) {
+ vector[index].iov_len = bd_size - internal_offset;
+ no_space = 1;
+ }
+ retval = sys_pwritev (fd, vector[index].iov_base,
+ vector[index].iov_len, internal_offset);
+ if (retval == -1) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "base %p, length %ld, offset %ld, message %s",
+ vector[index].iov_base, vector[index].iov_len,
+ internal_offset, strerror (errno));
+ op_ret = -errno;
+ goto err;
+ }
+ op_ret += retval;
+ internal_offset += retval;
+ if (no_space)
+ break;
+ }
+*/
+err:
+ return retval;
+}
+
+/*
+ * bd_writev: Writes to LV if its BD file or forwards the request to posix_write
+ * bd_writev -> posix_writev -> bd_writev_cbk
+ */
+int
+bd_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdict)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int _fd = -1;
+ bd_fd_t *bd_fd = NULL;
+ int ret = -1;
+ uint64_t size = 0;
+ struct iatt prebuf = {0, };
+ bd_attr_t *bdatt = NULL;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
+ VALIDATE_OR_GOTO (vector, out);
+
+ ret = bd_fd_ctx_get (this, fd, &bd_fd);
+ if (ret < 0 || !bd_fd) { /* posix fd */
+ STACK_WIND (frame, default_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count,
+ offset, flags, iobref, xdict);
+ return 0;
+ }
+
+ _fd = bd_fd->fd;
+
+ if (bd_inode_ctx_get (fd->inode, this, &bdatt)) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+ size = bdatt->iatt.ia_size;
+
+ op_ret = __bd_pwritev (_fd, vector, count, offset, size);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ op_ret = -1;
+ gf_log (this->name, GF_LOG_ERROR, "write failed: offset %"PRIu64
+ ", %s", offset, strerror (op_errno));
+ goto out;
+ }
+
+ memcpy (&prebuf, &bdatt->iatt, sizeof (struct iatt));
+ bd_update_amtime (&bdatt->iatt, GF_SET_ATTR_MTIME);
+out:
+
+ BD_STACK_UNWIND (writev, frame, op_ret, op_errno, &prebuf,
+ &bdatt->iatt, NULL);
+ return 0;
+}
+
+int
+bd_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ bd_attr_t *bdatt = NULL;
+ int *valid = cookie;
+ bd_local_t *local = frame->local;
+
+ if (op_ret < 0 || !valid || !local)
+ goto out;
+
+ if (bd_inode_ctx_get (local->inode, this, &bdatt))
+ goto out;
+
+ if (*valid & GF_SET_ATTR_UID)
+ bdatt->iatt.ia_uid = postbuf->ia_uid;
+ else if (*valid & GF_SET_ATTR_GID)
+ bdatt->iatt.ia_gid = postbuf->ia_gid;
+ else if (*valid & GF_SET_ATTR_MODE) {
+ bdatt->iatt.ia_type = postbuf->ia_type;
+ bdatt->iatt.ia_prot = postbuf->ia_prot;
+ } else if (*valid & GF_SET_ATTR_ATIME) {
+ bdatt->iatt.ia_atime = postbuf->ia_atime;
+ bdatt->iatt.ia_atime_nsec = postbuf->ia_atime_nsec;
+ } else if (*valid & GF_SET_ATTR_MTIME) {
+ bdatt->iatt.ia_mtime = postbuf->ia_mtime;
+ bdatt->iatt.ia_mtime_nsec = postbuf->ia_mtime_nsec;
+ }
+
+ bdatt->iatt.ia_ctime = postbuf->ia_ctime;
+ bdatt->iatt.ia_ctime_nsec = postbuf->ia_ctime_nsec;
+
+ memcpy (postbuf, &bdatt->iatt, sizeof (struct iatt));
+out:
+ GF_FREE (valid);
+ BD_STACK_UNWIND (setattr, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+ return 0;
+}
+
+int
+bd_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ bd_local_t *local = NULL;
+ bd_attr_t *bdatt = NULL;
+ int *ck_valid = NULL;
+ int op_errno = 0;
+
+ if (bd_inode_ctx_get (loc->inode, this, &bdatt)) {
+ STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr,
+ loc, stbuf, valid, xdata);
+ return 0;
+ }
+
+ local = bd_local_init (frame, this);
+ BD_VALIDATE_MEM_ALLOC (local, op_errno, out);
+
+ ck_valid = GF_CALLOC (1, sizeof (valid), gf_bd_int32_t);
+ BD_VALIDATE_MEM_ALLOC (ck_valid, op_errno, out);
+
+ local->inode = inode_ref (loc->inode);
+ *ck_valid = valid;
+
+ STACK_WIND_COOKIE (frame, bd_setattr_cbk, ck_valid, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr,
+ loc, stbuf, valid, xdata);
+
+ return 0;
+out:
+ BD_STACK_UNWIND (setattr, frame, -1, ENOMEM, NULL, NULL, xdata);
+ return 0;
+}
+
+int
+bd_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ bd_attr_t *bdatt = NULL;
+
+ if (op_ret < 0)
+ goto out;
+
+ if (bd_inode_ctx_get (inode, this, &bdatt))
+ goto out;
+
+ bdatt->iatt.ia_ctime = buf->ia_ctime;
+ bdatt->iatt.ia_ctime_nsec = buf->ia_ctime_nsec;
+ bdatt->iatt.ia_nlink = buf->ia_nlink;
+ memcpy (buf, &bdatt->iatt, sizeof (struct iatt));
+
+out:
+ BD_STACK_UNWIND (link, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, NULL);
+ return 0;
+}
+
+int
+bd_link (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ STACK_WIND (frame, bd_link_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
+ return 0;
+}
+
+int
+bd_handle_special_xattrs (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ dict_t *xattr = NULL;
+ int op_ret = -1;
+ int op_errno = ENOMEM;;
+ bd_priv_t *priv = this->private;
+
+ xattr = dict_new ();
+ if (!xattr)
+ goto out;
+
+ if (!strcmp (name, VOL_TYPE))
+ op_ret = dict_set_int64 (xattr, (char *)name, 1);
+ else if (!strcmp (name, VOL_CAPS))
+ op_ret = dict_set_int64 (xattr, (char *)name, priv->caps);
+ else
+ op_ret = bd_get_origin (this->private, loc, fd, xattr);
+
+out:
+ if (loc)
+ BD_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr,
+ xdata);
+ else
+ BD_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, xattr,
+ xdata);
+
+ op_ret = dict_reset (xattr);
+ dict_unref (xattr);
+
+ return 0;
+}
+
+int
+bd_fgetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ if (name && (!strcmp (name, VOL_TYPE) || !strcmp (name, VOL_CAPS)
+ || !strcmp (name, BD_ORIGIN)))
+ bd_handle_special_xattrs (frame, this, NULL, fd, name, xdata);
+ else
+ STACK_WIND (frame, default_fgetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ fd, name, xdata);
+ return 0;
+}
+
+int
+bd_getxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ if (name && (!strcmp (name, VOL_TYPE) || !strcmp (name, VOL_CAPS)
+ || !strcmp (name, BD_ORIGIN)))
+ bd_handle_special_xattrs (frame, this, loc, NULL, name, xdata);
+ else
+ STACK_WIND (frame, default_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr,
+ loc, name, xdata);
+
+ return 0;
+}
+
+int
+bd_unlink_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ bd_gfid_t gfid = {0, };
+ bd_local_t *local = frame->local;
+
+ if (buf->ia_nlink > 1)
+ goto posix;
+
+ BD_VALIDATE_LOCAL_OR_GOTO (local, op_errno, out);
+
+ uuid_utoa_r (inode->gfid, gfid);
+ if (bd_delete_lv (this->private, gfid, &op_errno) < 0) {
+ if (op_errno != ENOENT)
+ goto out;
+ }
+
+posix:
+ /* remove posix */
+ STACK_WIND (frame, default_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink,
+ &local->loc, 0, NULL);
+
+ return 0;
+out:
+ BD_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+bd_unlink (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int xflag, dict_t *xdata)
+{
+ int op_errno = 0;
+ bd_attr_t *bdatt = NULL;
+ bd_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (loc, out);
+
+ if (bd_inode_ctx_get (loc->inode, this, &bdatt)) {
+ STACK_WIND (frame, default_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink,
+ loc, xflag, xdata);
+ return 0;
+ }
+
+ local = bd_local_init (frame, this);
+ BD_VALIDATE_MEM_ALLOC (local, op_errno, out);
+
+ loc_copy (&local->loc, loc);
+
+ STACK_WIND (frame, bd_unlink_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, NULL);
+ return 0;
+out:
+ BD_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+bd_priv (xlator_t *this)
+{
+ return 0;
+}
+
+int32_t
+bd_inode (xlator_t *this)
+{
+ return 0;
+}
+
+int32_t
+bd_rchecksum (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ int32_t len, dict_t *xdata)
+{
+ int op_ret = -1;
+ int op_errno = 0;
+ int ret = 0;
+ int _fd = -1;
+ char *alloc_buf = NULL;
+ char *buf = NULL;
+ int32_t weak_checksum = 0;
+ bd_fd_t *bd_fd = NULL;
+ unsigned char strong_checksum[MD5_DIGEST_LENGTH] = {0};
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
+
+ ret = bd_fd_ctx_get (this, fd, &bd_fd);
+ if (ret < 0 || !bd_fd) {
+ STACK_WIND (frame, default_rchecksum_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->rchecksum, fd, offset,
+ len, xdata);
+ return 0;
+ }
+
+ memset (strong_checksum, 0, MD5_DIGEST_LENGTH);
+
+ alloc_buf = page_aligned_alloc (len, &buf);
+ if (!alloc_buf) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ _fd = bd_fd->fd;
+
+ LOCK (&fd->lock);
+ {
+ ret = sys_pread (_fd, buf, len, offset);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "pread of %d bytes returned %d (%s)",
+ len, ret, strerror (errno));
+ op_errno = errno;
+ }
+ }
+ UNLOCK (&fd->lock);
+
+ if (ret < 0)
+ goto out;
+
+ weak_checksum = gf_rsync_weak_checksum ((unsigned char *) buf,
+ (size_t) len);
+ gf_rsync_strong_checksum ((unsigned char *) buf, (size_t) len,
+ (unsigned char *) strong_checksum);
+
+ op_ret = 0;
+out:
+ BD_STACK_UNWIND (rchecksum, frame, op_ret, op_errno,
+ weak_checksum, strong_checksum, NULL);
+
+ GF_FREE (alloc_buf);
+
+ return 0;
+}
+
+static int
+bd_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ int32_t ret = 0;
+ struct iatt statpre = {0,};
+ struct iatt statpost = {0,};
+ bd_attr_t *bdatt = NULL;
+
+ /* iatt already cached */
+ if (bd_inode_ctx_get (fd->inode, this, &bdatt) < 0) {
+ STACK_WIND (frame, default_zerofill_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->zerofill,
+ fd, offset, len, xdata);
+ return 0;
+ }
+
+ ret = bd_do_zerofill(frame, this, fd, offset, len,
+ &statpre, &statpost);
+ if (ret)
+ goto err;
+
+ STACK_UNWIND_STRICT(zerofill, frame, 0, 0, &statpre, &statpost, NULL);
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT(zerofill, frame, -1, ret, NULL, NULL, NULL);
+ return 0;
+}
+
+/**
+ * notify - when parent sends PARENT_UP, send CHILD_UP event from here
+ */
+int32_t
+notify (xlator_t *this,
+ int32_t event,
+ void *data,
+ ...)
+{
+ switch (event)
+ {
+ case GF_EVENT_PARENT_UP:
+ {
+ /* Tell the parent that bd xlator is up */
+ default_notify (this, GF_EVENT_CHILD_UP, data);
+ }
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_bd_mt_end + 1);
+
+ if (ret != 0)
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+
+ return ret;
+}
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ int ret = -1;
+ bd_priv_t *priv = this->private;
+
+ GF_OPTION_RECONF ("bd-aio", priv->aio_configured, options,
+ bool, out);
+
+ if (priv->aio_configured)
+ bd_aio_on (this);
+ else
+ bd_aio_off (this);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/**
+ * bd xlator init - Validate configured VG
+ */
+int
+init (xlator_t *this)
+{
+ int ret = 0;
+ char *vg_data = NULL;
+ char *device = NULL;
+ bd_priv_t *_private = NULL;
+
+ if (!this->children) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "FATAL: storage/bd needs posix as subvolume");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Volume is dangling. Please check the volume file.");
+ }
+
+ GF_OPTION_INIT ("export", vg_data, str, error);
+ GF_OPTION_INIT ("device", device, str, error);
+
+ /* Now we support only LV device */
+ if (strcasecmp (device, BACKEND_VG)) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "FATAL: unknown %s backend %s", BD_XLATOR, device);
+ return -1;
+ }
+
+ this->local_pool = mem_pool_new (bd_local_t, 64);
+ if (!this->local_pool) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "FATAL: Failed to create bd memory pool");
+ return -1;
+ }
+
+ ret = 0;
+ _private = GF_CALLOC (1, sizeof (*_private), gf_bd_private);
+ if (!_private)
+ goto error;
+
+ this->private = _private;
+ _private->vg = gf_strdup (vg_data);
+ if (!_private->vg)
+ goto error;
+
+ _private->handle = lvm_init (NULL);
+ if (!_private->handle) {
+ gf_log (this->name, GF_LOG_CRITICAL, "lvm_init failed");
+ goto error;
+ }
+ _private->caps = BD_CAPS_BD;
+ if (bd_scan_vg (this, _private))
+ goto error;
+
+ _private->aio_init_done = _gf_false;
+ _private->aio_capable = _gf_false;
+
+ GF_OPTION_INIT ("bd-aio", _private->aio_configured, bool, error);
+ if (_private->aio_configured) {
+ if (bd_aio_on (this)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "BD AIO init failed");
+ ret = -1;
+ goto error;
+ }
+ }
+
+ _private->caps |= BD_CAPS_OFFLOAD_COPY | BD_CAPS_OFFLOAD_SNAPSHOT |
+ BD_CAPS_OFFLOAD_ZERO;
+
+ return 0;
+error:
+ if (_private) {
+ GF_FREE (_private->vg);
+ if (_private->handle)
+ lvm_quit (_private->handle);
+ GF_FREE (_private);
+ }
+
+ mem_pool_destroy (this->local_pool);
+
+ return -1;
+}
+
+void
+fini (xlator_t *this)
+{
+ bd_priv_t *priv = this->private;
+ mem_pool_destroy (this->local_pool);
+ this->local_pool = NULL;
+ if (!priv)
+ return;
+ lvm_quit (priv->handle);
+ GF_FREE (priv->vg);
+ this->private = NULL;
+ GF_FREE (priv);
+ return;
+}
+
+struct xlator_dumpops dumpops = {
+ .priv = bd_priv,
+ .inode = bd_inode,
+};
+
+struct xlator_fops fops = {
+ .readdirp = bd_readdirp,
+ .lookup = bd_lookup,
+ .stat = bd_stat,
+ .statfs = bd_statfs,
+ .open = bd_open,
+ .fstat = bd_fstat,
+ .rchecksum = bd_rchecksum,
+ .readv = bd_readv,
+ .fsync = bd_fsync,
+ .setxattr = bd_setxattr,
+ .fsetxattr = bd_fsetxattr,
+ .removexattr = bd_removexattr,
+ .fremovexattr=bd_fremovexattr,
+ .truncate = bd_truncate,
+ .ftruncate = bd_ftruncate,
+ .writev = bd_writev,
+ .getxattr = bd_getxattr,
+ .fgetxattr = bd_fgetxattr,
+ .unlink = bd_unlink,
+ .link = bd_link,
+ .flush = bd_flush,
+ .setattr = bd_setattr,
+ .discard = bd_discard,
+ .zerofill = bd_zerofill,
+};
+
+struct xlator_cbks cbks = {
+ .release = bd_release,
+ .forget = bd_forget,
+};
+
+struct volume_options options[] = {
+ { .key = {"export"},
+ .type = GF_OPTION_TYPE_STR},
+ { .key = {"device"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = BACKEND_VG},
+ {
+ .key = {"bd-aio"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Support for native Linux AIO"
+ },
+
+ { .key = {NULL} }
+};
diff --git a/xlators/storage/bd/src/bd.h b/xlators/storage/bd/src/bd.h
new file mode 100644
index 00000000000..bdaf6f032d9
--- /dev/null
+++ b/xlators/storage/bd/src/bd.h
@@ -0,0 +1,168 @@
+/*
+ BD translator - Exports Block devices on server side as regular
+ files to client
+
+ Copyright IBM, Corp. 2012
+
+ This file is part of GlusterFS.
+
+ Author:
+ M. Mohan Kumar <mohan@in.ibm.com>
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _BD_H
+#define _BD_H
+
+#ifdef HAVE_LIBAIO
+#include <libaio.h>
+#endif
+
+#include "xlator.h"
+#include "mem-types.h"
+
+#define BD_XLATOR "block device mapper xlator"
+#define BACKEND_VG "vg"
+#define GF_XATTR "user.glusterfs"
+#define BD_XATTR GF_XATTR ".bd"
+
+#define BD_LV "lv"
+#define BD_THIN "thin"
+
+#define VOL_TYPE "volume.type"
+#define VOL_CAPS "volume.caps"
+
+#define ALIGN_SIZE 4096
+
+#define BD_CAPS_BD 0x01
+#define BD_CAPS_THIN 0x02
+#define BD_CAPS_OFFLOAD_COPY 0x04
+#define BD_CAPS_OFFLOAD_SNAPSHOT 0x08
+#define BD_CAPS_OFFLOAD_ZERO 0x20
+
+#define BD_CLONE "clone"
+#define BD_SNAPSHOT "snapshot"
+#define BD_MERGE "merge"
+#define BD_ORIGIN "list-origin"
+
+#define IOV_NR 4
+#define IOV_SIZE (64 * 1024)
+
+#define ALIGN_SIZE 4096
+#define LINKTO "trusted.glusterfs.dht.linkto"
+
+#define MAX_NO_VECT 1024
+
+
+#define BD_VALIDATE_MEM_ALLOC(buff, op_errno, label) \
+ if (!buff) { \
+ op_errno = ENOMEM; \
+ gf_log (this->name, GF_LOG_ERROR, "out of memory"); \
+ goto label; \
+ }
+
+#define BD_VALIDATE_LOCAL_OR_GOTO(local, op_errno, label) \
+ if (!local) { \
+ op_errno = EINVAL; \
+ goto label; \
+ }
+
+#define BD_STACK_UNWIND(typ, frame, args ...) do { \
+ bd_local_t *__local = frame->local; \
+ xlator_t *__this = frame->this; \
+ \
+ frame->local = NULL; \
+ STACK_UNWIND_STRICT (typ, frame, args); \
+ if (__local) \
+ bd_local_free (__this, __local); \
+ } while (0)
+
+typedef char bd_gfid_t[GF_UUID_BUF_SIZE];
+
+/**
+ * bd_fd - internal structure
+ */
+typedef struct bd_fd {
+ int fd;
+ int32_t flag;
+ int odirect;
+} bd_fd_t;
+
+typedef struct bd_priv {
+ lvm_t handle;
+ char *vg;
+ char *pool;
+ int caps;
+ gf_boolean_t aio_init_done;
+ gf_boolean_t aio_capable;
+ gf_boolean_t aio_configured;
+#ifdef HAVE_LIBAIO
+ io_context_t ctxp;
+ pthread_t aiothread;
+#endif
+} bd_priv_t;
+
+
+typedef enum bd_type {
+ BD_TYPE_NONE,
+ BD_TYPE_LV,
+} bd_type_t;
+
+typedef struct {
+ struct iatt iatt;
+ char *type;
+} bd_attr_t;
+
+typedef enum {
+ BD_OF_NONE,
+ BD_OF_CLONE,
+ BD_OF_SNAPSHOT,
+ BD_OF_MERGE,
+} bd_offload_t;
+
+typedef struct {
+ dict_t *dict;
+ bd_attr_t *bdatt;
+ inode_t *inode;
+ loc_t loc;
+ fd_t *fd;
+ data_t *data; /* for setxattr */
+ bd_offload_t offload;
+ uint64_t size;
+ loc_t *dloc;
+} bd_local_t;
+
+/* Prototypes */
+int bd_inode_ctx_set (inode_t *inode, xlator_t *this, bd_attr_t *ctx);
+int bd_inode_ctx_get (inode_t *inode, xlator_t *this, bd_attr_t **ctx);
+int bd_scan_vg (xlator_t *this, bd_priv_t *priv);
+bd_local_t *bd_local_init (call_frame_t *frame, xlator_t *this);
+void bd_local_free (xlator_t *this, bd_local_t *local);
+int bd_fd_ctx_get (xlator_t *this, fd_t *fd, bd_fd_t **bdfd);
+char *page_aligned_alloc (size_t size, char **aligned_buf);
+int bd_validate_bd_xattr (xlator_t *this, char *bd, char **type,
+ uint64_t *lv_size, uuid_t uuid);
+uint64_t bd_get_default_extent (bd_priv_t *priv);
+uint64_t bd_adjust_size (bd_priv_t *priv, size_t size);
+int bd_create (uuid_t uuid, uint64_t size, char *type, bd_priv_t *priv);
+int bd_resize (bd_priv_t *priv, uuid_t uuid, size_t size);
+int bd_delete_lv (bd_priv_t *priv, const char *lv_name, int *op_errno);
+int bd_snapshot_create (bd_local_t *local, bd_priv_t *priv);
+int bd_clone (bd_local_t *local, bd_priv_t *priv);
+
+int bd_merge (bd_priv_t *priv, uuid_t gfid);
+int bd_get_origin (bd_priv_t *priv, loc_t *loc, fd_t *fd, dict_t *dict);
+void bd_update_amtime(struct iatt *iatt, int flag);
+int bd_snapshot_create (bd_local_t *local, bd_priv_t *priv);
+int bd_clone (bd_local_t *local, bd_priv_t *priv);
+int bd_merge (bd_priv_t *priv, uuid_t gfid);
+int bd_get_origin (bd_priv_t *priv, loc_t *loc, fd_t *fd, dict_t *dict);
+int bd_do_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, size_t len, struct iatt *prebuf,
+ struct iatt *postbuf);
+
+#endif
diff --git a/xlators/storage/bdb/src/Makefile.am b/xlators/storage/bdb/src/Makefile.am
deleted file mode 100644
index 7e2376979ce..00000000000
--- a/xlators/storage/bdb/src/Makefile.am
+++ /dev/null
@@ -1,18 +0,0 @@
-
-xlator_LTLIBRARIES = bdb.la
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/storage
-
-bdb_la_LDFLAGS = -module -avoidversion
-
-bdb_la_SOURCES = bctx.c bdb-ll.c bdb.c
-bdb_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-noinst_HEADERS = bdb.h
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 -D_GNU_SOURCE -D$(GF_HOST_OS) -Wall \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
-
-AM_LDFLAGS = -ldb
-
-CLEANFILES =
-
diff --git a/xlators/storage/bdb/src/bctx.c b/xlators/storage/bdb/src/bctx.c
deleted file mode 100644
index 150d709a245..00000000000
--- a/xlators/storage/bdb/src/bctx.c
+++ /dev/null
@@ -1,341 +0,0 @@
-/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include <list.h>
-#include <bdb.h>
-#include <libgen.h> /* for dirname */
-
-static void
-__destroy_bctx (bctx_t *bctx)
-{
- if (bctx->directory)
- GF_FREE (bctx->directory);
-
- if (bctx->db_path)
- GF_FREE (bctx->db_path);
-
- GF_FREE (bctx);
-}
-
-static void
-__unhash_bctx (bctx_t *bctx)
-{
- list_del_init (&bctx->b_hash);
-}
-
-static int32_t
-bctx_table_prune (bctx_table_t *table)
-{
- int32_t ret = 0;
- struct list_head purge = {0,};
- struct list_head *next = NULL;
- bctx_t *entry = NULL;
- bctx_t *del = NULL, *tmp = NULL;
-
- if (!table)
- return 0;
-
- INIT_LIST_HEAD (&purge);
-
- LOCK (&table->lock);
- {
- if ((table->lru_limit) &&
- (table->lru_size > table->lru_limit)) {
- while (table->lru_size > table->lru_limit) {
- next = table->b_lru.next;
- entry = list_entry (next, bctx_t, list);
-
- list_move_tail (next, &table->purge);
- __unhash_bctx (entry);
-
- table->lru_size--;
- ret++;
- }
- }
- list_move_tail (&purge, &table->purge);
- list_del_init (&table->purge);
- }
- UNLOCK (&table->lock);
-
- list_for_each_entry_safe (del, tmp, &purge, list) {
- list_del_init (&del->list);
- if (del->primary) {
- ret = del->primary->close (del->primary, 0);
- if (ret != 0) {
- gf_log (table->this->name, GF_LOG_DEBUG,
- "_BCTX_TABLE_PRUNE %s: %s "
- "(failed to close primary database)",
- del->directory, db_strerror (ret));
- } else {
- gf_log (table->this->name, GF_LOG_DEBUG,
- "_BCTX_TABLE_PRUNE %s (lru=%d)"
- "(closed primary database)",
- del->directory, table->lru_size);
- }
- }
- if (del->secondary) {
- ret = del->secondary->close (del->secondary, 0);
- if (ret != 0) {
- gf_log (table->this->name, GF_LOG_DEBUG,
- "_BCTX_TABLE_PRUNE %s: %s "
- "(failed to close secondary database)",
- del->directory, db_strerror (ret));
- } else {
- gf_log (table->this->name, GF_LOG_DEBUG,
- "_BCTX_TABLE_PRUNE %s (lru=%d)"
- "(closed secondary database)",
- del->directory, table->lru_size);
- }
- }
- __destroy_bctx (del);
- }
-
- return ret;
-}
-
-
-/* struct bdb_ctx related */
-static inline uint32_t
-bdb_key_hash (char *key, uint32_t hash_size)
-{
- uint32_t hash = 0;
-
- hash = *key;
-
- if (hash) {
- for (key += 1; *key != '\0'; key++) {
- hash = (hash << 5) - hash + *key;
- }
- }
-
- return (hash + *key) % hash_size;
-}
-
-static void
-__hash_bctx (bctx_t *bctx)
-{
- bctx_table_t *table = NULL;
- char *key = NULL;
-
- table = bctx->table;
-
- MAKE_KEY_FROM_PATH (key, bctx->directory);
- bctx->key_hash = bdb_key_hash (key, table->hash_size);
-
- list_del_init (&bctx->b_hash);
- list_add (&bctx->b_hash, &table->b_hash[bctx->key_hash]);
-}
-
-static inline bctx_t *
-__bctx_passivate (bctx_t *bctx)
-{
- if (bctx->primary) {
- list_move_tail (&bctx->list, &(bctx->table->b_lru));
- bctx->table->lru_size++;
- } else {
- list_move_tail (&bctx->list, &bctx->table->purge);
- __unhash_bctx (bctx);
- }
- return bctx;
-}
-
-static inline bctx_t *
-__bctx_activate (bctx_t *bctx)
-{
- list_move (&bctx->list, &bctx->table->active);
- bctx->table->lru_size--;
-
- return bctx;
-}
-
-static bctx_t *
-__bdb_ctx_unref (bctx_t *bctx)
-{
- assert (bctx->ref);
-
- --bctx->ref;
-
- if (!bctx->ref)
- bctx = __bctx_passivate (bctx);
-
- return bctx;
-}
-
-
-bctx_t *
-bctx_unref (bctx_t *bctx)
-{
- bctx_table_t *table = NULL;
-
- if (!bctx && !bctx->table)
- return NULL;
-
- table = bctx->table;
-
- LOCK (&table->lock);
- {
- bctx = __bdb_ctx_unref (bctx);
- }
- UNLOCK (&table->lock);
-
- bctx_table_prune (table);
-
- return bctx;
-}
-
-/*
- * NOTE: __bdb_ctx_ref() is called only after holding table->lock and
- * bctx->lock, in that order
- */
-static inline bctx_t *
-__bctx_ref (bctx_t *bctx)
-{
- if (!bctx->ref)
- __bctx_activate (bctx);
-
- bctx->ref++;
-
- return bctx;
-}
-
-bctx_t *
-bctx_ref (bctx_t *bctx)
-{
- LOCK (&(bctx->table->lock));
- {
- __bctx_ref (bctx);
- }
- UNLOCK (&(bctx->table->lock));
-
- return bctx;
-}
-
-
-#define BDB_THIS(table) (table->this)
-
-static inline bctx_t *
-__create_bctx (bctx_table_t *table,
- const char *path)
-{
- bctx_t *bctx = NULL;
- char *db_path = NULL;
-
- bctx = GF_CALLOC (1, sizeof (*bctx), gf_bdb_mt_bctx_t);
- GF_VALIDATE_OR_GOTO ("bctx", bctx, out);
-
- bctx->table = table;
- bctx->directory = gf_strdup (path);
- GF_VALIDATE_OR_GOTO ("bctx", bctx->directory, out);
-
- MAKE_REAL_PATH_TO_STORAGE_DB (db_path, BDB_THIS (table), path);
-
- bctx->db_path = gf_strdup (db_path);
- GF_VALIDATE_OR_GOTO ("bctx", bctx->directory, out);
-
- INIT_LIST_HEAD (&bctx->c_list);
- INIT_LIST_HEAD (&bctx->list);
- INIT_LIST_HEAD (&bctx->b_hash);
-
- LOCK_INIT (&bctx->lock);
-
- __hash_bctx (bctx);
-
- list_add (&bctx->list, &table->b_lru);
- table->lru_size++;
-
-out:
- return bctx;
-}
-
-/* bctx_lookup - lookup bctx_t for the directory @directory.
- * (see description of bctx_t in bdb.h)
- *
- * @table: bctx_table_t for this instance of bdb.
- * @directory: directory for which bctx_t is being looked up.
- */
-bctx_t *
-bctx_lookup (bctx_table_t *table,
- const char *directory)
-{
- char *key = NULL;
- uint32_t key_hash = 0;
- bctx_t *trav = NULL, *bctx = NULL, *tmp = NULL;
- int32_t need_break = 0;
-
- GF_VALIDATE_OR_GOTO ("bctx", table, out);
- GF_VALIDATE_OR_GOTO ("bctx", directory, out);
-
- MAKE_KEY_FROM_PATH (key, directory);
- key_hash = bdb_key_hash (key, table->hash_size);
-
- LOCK (&table->lock);
- {
- if (list_empty (&table->b_hash[key_hash])) {
- goto creat_bctx;
- }
-
- list_for_each_entry_safe (trav, tmp, &table->b_hash[key_hash],
- b_hash) {
- LOCK(&trav->lock);
- {
- if (!strcmp(trav->directory, directory)) {
- bctx = __bctx_ref (trav);
- need_break = 1;
- }
- }
- UNLOCK(&trav->lock);
-
- if (need_break)
- break;
- }
-
- creat_bctx:
- if (!bctx) {
- bctx = __create_bctx (table, directory);
- bctx = __bctx_ref (bctx);
- }
- }
- UNLOCK (&table->lock);
-out:
- return bctx;
-}
-
-
-bctx_t *
-bctx_parent (bctx_table_t *table,
- const char *path)
-{
- char *pathname = NULL, *directory = NULL;
- bctx_t *bctx = NULL;
-
- GF_VALIDATE_OR_GOTO ("bctx", table, out);
- GF_VALIDATE_OR_GOTO ("bctx", path, out);
-
- pathname = gf_strdup (path);
- GF_VALIDATE_OR_GOTO ("bctx", pathname, out);
- directory = dirname (pathname);
-
- bctx = bctx_lookup (table, directory);
- GF_VALIDATE_OR_GOTO ("bctx", bctx, out);
-
-out:
- if (pathname)
- free (pathname);
- return bctx;
-}
diff --git a/xlators/storage/bdb/src/bdb-ll.c b/xlators/storage/bdb/src/bdb-ll.c
deleted file mode 100644
index 7d3938daf6d..00000000000
--- a/xlators/storage/bdb/src/bdb-ll.c
+++ /dev/null
@@ -1,1464 +0,0 @@
-/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include <libgen.h>
-#include "bdb.h"
-#include <list.h>
-#include "hashfn.h"
-/*
- * implement the procedures to interact with bdb */
-
-/****************************************************************
- *
- * General wrappers and utility procedures for bdb xlator
- *
- ****************************************************************/
-
-ino_t
-bdb_inode_transform (ino_t parent,
- const char *name,
- size_t namelen)
-{
- ino_t ino = -1;
- uint64_t hash = 0;
-
- hash = gf_dm_hashfn (name, namelen);
-
- ino = (((parent << 32) | 0x00000000ffffffffULL)
- & (hash | 0xffffffff00000000ULL));
-
- return ino;
-}
-
-static int
-bdb_generate_secondary_hash (DB *secondary,
- const DBT *pkey,
- const DBT *data,
- DBT *skey)
-{
- char *primary = NULL;
- uint32_t *hash = NULL;
-
- primary = pkey->data;
-
- hash = GF_CALLOC (1, sizeof (uint32_t), gf_bdb_mt_uint32_t);
-
- *hash = gf_dm_hashfn (primary, pkey->size);
-
- skey->data = hash;
- skey->size = sizeof (hash);
- skey->flags = DB_DBT_APPMALLOC;
-
- return 0;
-}
-
-/***********************************************************
- *
- * bdb storage database utilities
- *
- **********************************************************/
-
-/*
- * bdb_db_open - opens a storage db.
- *
- * @ctx: context specific to the directory for which we are supposed to open db
- *
- * see, if we have empty slots to open a db.
- * if (no-empty-slots), then prune open dbs and close as many as possible
- * if (empty-slot-available), tika muchkonDu db open maaDu
- *
- */
-static int
-bdb_db_open (bctx_t *bctx)
-{
- DB *primary = NULL;
- DB *secondary = NULL;
- int32_t ret = -1;
- bctx_table_t *table = NULL;
-
- GF_VALIDATE_OR_GOTO ("bdb-ll", bctx, out);
-
- table = bctx->table;
- GF_VALIDATE_OR_GOTO ("bdb-ll", table, out);
-
- /* we have to do the following, we can't deny someone of db_open ;) */
- ret = db_create (&primary, table->dbenv, 0);
- if (ret < 0) {
- gf_log ("bdb-ll", GF_LOG_DEBUG,
- "_BDB_DB_OPEN %s: %s (failed to create database object"
- " for primary database)",
- bctx->directory, db_strerror (ret));
- ret = -ENOMEM;
- goto out;
- }
-
- if (table->page_size) {
- ret = primary->set_pagesize (primary,
- table->page_size);
- if (ret < 0) {
- gf_log ("bdb-ll", GF_LOG_DEBUG,
- "_BDB_DB_OPEN %s: %s (failed to set page-size "
- "to %"PRIu64")",
- bctx->directory, db_strerror (ret),
- table->page_size);
- } else {
- gf_log ("bdb-ll", GF_LOG_DEBUG,
- "_BDB_DB_OPEN %s: page-size set to %"PRIu64,
- bctx->directory, table->page_size);
- }
- }
-
- ret = primary->open (primary, NULL, bctx->db_path, "primary",
- table->access_mode, table->dbflags, 0);
- if (ret < 0) {
- gf_log ("bdb-ll", GF_LOG_ERROR,
- "_BDB_DB_OPEN %s: %s "
- "(failed to open primary database)",
- bctx->directory, db_strerror (ret));
- ret = -1;
- goto cleanup;
- }
-
- ret = db_create (&secondary, table->dbenv, 0);
- if (ret < 0) {
- gf_log ("bdb-ll", GF_LOG_DEBUG,
- "_BDB_DB_OPEN %s: %s (failed to create database object"
- " for secondary database)",
- bctx->directory, db_strerror (ret));
- ret = -ENOMEM;
- goto cleanup;
- }
-
- ret = secondary->open (secondary, NULL, bctx->db_path, "secondary",
- table->access_mode, table->dbflags, 0);
- if (ret != 0 ) {
- gf_log ("bdb-ll", GF_LOG_ERROR,
- "_BDB_DB_OPEN %s: %s "
- "(failed to open secondary database)",
- bctx->directory, db_strerror (ret));
- ret = -1;
- goto cleanup;
- }
-
- ret = primary->associate (primary, NULL, secondary,
- bdb_generate_secondary_hash,
-#ifdef DB_IMMUTABLE_KEY
- DB_IMMUTABLE_KEY);
-#else
- 0);
-#endif
- if (ret != 0 ) {
- gf_log ("bdb-ll", GF_LOG_ERROR,
- "_BDB_DB_OPEN %s: %s "
- "(failed to associate primary database with "
- "secondary database)",
- bctx->directory, db_strerror (ret));
- ret = -1;
- goto cleanup;
- }
-
-out:
- bctx->primary = primary;
- bctx->secondary = secondary;
-
- return ret;
-cleanup:
- if (primary)
- primary->close (primary, 0);
- if (secondary)
- secondary->close (secondary, 0);
-
- return ret;
-}
-
-int32_t
-bdb_cursor_close (bctx_t *bctx,
- DBC *cursorp)
-{
- int32_t ret = -1;
-
- GF_VALIDATE_OR_GOTO ("bdb-ll", bctx, out);
- GF_VALIDATE_OR_GOTO ("bdb-ll", cursorp, out);
-
- LOCK (&bctx->lock);
- {
-#ifdef HAVE_BDB_CURSOR_GET
- ret = cursorp->close (cursorp);
-#else
- ret = cursorp->c_close (cursorp);
-#endif
- if (ret < 0) {
- gf_log ("bdb-ll", GF_LOG_DEBUG,
- "_BDB_CURSOR_CLOSE %s: %s "
- "(failed to close database cursor)",
- bctx->directory, db_strerror (ret));
- }
- }
- UNLOCK (&bctx->lock);
-
-out:
- return ret;
-}
-
-
-int32_t
-bdb_cursor_open (bctx_t *bctx,
- DBC **cursorpp)
-{
- int32_t ret = -1;
-
- GF_VALIDATE_OR_GOTO ("bdb-ll", bctx, out);
- GF_VALIDATE_OR_GOTO ("bdb-ll", cursorpp, out);
-
- LOCK (&bctx->lock);
- {
- if (bctx->secondary) {
- /* do nothing, just continue */
- ret = 0;
- } else {
- ret = bdb_db_open (bctx);
- if (ret < 0) {
- gf_log ("bdb-ll", GF_LOG_DEBUG,
- "_BDB_CURSOR_OPEN %s: ENOMEM "
- "(failed to open secondary database)",
- bctx->directory);
- ret = -ENOMEM;
- } else {
- ret = 0;
- }
- }
-
- if (ret == 0) {
- /* all set, open cursor */
- ret = bctx->secondary->cursor (bctx->secondary,
- NULL, cursorpp, 0);
- if (ret < 0) {
- gf_log ("bdb-ll", GF_LOG_DEBUG,
- "_BDB_CURSOR_OPEN %s: %s "
- "(failed to open a cursor to database)",
- bctx->directory, db_strerror (ret));
- }
- }
- }
- UNLOCK (&bctx->lock);
-
-out:
- return ret;
-}
-
-
-/* cache related */
-static bdb_cache_t *
-bdb_cache_lookup (bctx_t *bctx,
- char *path)
-{
- bdb_cache_t *bcache = NULL;
- bdb_cache_t *trav = NULL;
- char *key = NULL;
-
- GF_VALIDATE_OR_GOTO ("bdb-ll", bctx, out);
- GF_VALIDATE_OR_GOTO ("bdb-ll", path, out);
-
- MAKE_KEY_FROM_PATH (key, path);
-
- LOCK (&bctx->lock);
- {
- list_for_each_entry (trav, &bctx->c_list, c_list) {
- if (!strcmp (trav->key, key)){
- bcache = trav;
- break;
- }
- }
- }
- UNLOCK (&bctx->lock);
-
-out:
- return bcache;
-}
-
-static int32_t
-bdb_cache_insert (bctx_t *bctx,
- DBT *key,
- DBT *data)
-{
- bdb_cache_t *bcache = NULL;
- int32_t ret = -1;
-
- GF_VALIDATE_OR_GOTO ("bdb-ll", bctx, out);
- GF_VALIDATE_OR_GOTO ("bdb-ll", key, out);
- GF_VALIDATE_OR_GOTO ("bdb-ll", data, out);
-
- LOCK (&bctx->lock);
- {
- if (bctx->c_count > 5) {
- /* most of the times, we enter here */
- /* FIXME: ugly, not supposed to disect any of the
- * 'struct list_head' directly */
- if (!list_empty (&bctx->c_list)) {
- bcache = list_entry (bctx->c_list.prev,
- bdb_cache_t, c_list);
- list_del_init (&bcache->c_list);
- }
- if (bcache->key) {
- GF_FREE (bcache->key);
- bcache->key = GF_CALLOC (key->size + 1,
- sizeof (char),
- gf_bdb_mt_char);
- GF_VALIDATE_OR_GOTO ("bdb-ll",
- bcache->key, unlock);
- memcpy (bcache->key, (char *)key->data,
- key->size);
- } else {
- /* should never come here */
- gf_log ("bdb-ll", GF_LOG_DEBUG,
- "_BDB_CACHE_INSERT %s (%s) "
- "(found a cache entry with empty key)",
- bctx->directory, (char *)key->data);
- } /* if(bcache->key)...else */
- if (bcache->data) {
- GF_FREE (bcache->data);
- bcache->data = memdup (data->data, data->size);
- GF_VALIDATE_OR_GOTO ("bdb-ll", bcache->data,
- unlock);
- bcache->size = data->size;
- } else {
- /* should never come here */
- gf_log ("bdb-ll", GF_LOG_CRITICAL,
- "_BDB_CACHE_INSERT %s (%s) "
- "(found a cache entry with no data)",
- bctx->directory, (char *)key->data);
- } /* if(bcache->data)...else */
- list_add (&bcache->c_list, &bctx->c_list);
- ret = 0;
- } else {
- /* we will be entering here very rarely */
- bcache = GF_CALLOC (1, sizeof (*bcache),
- gf_bdb_mt_bdb_cache_t);
- GF_VALIDATE_OR_GOTO ("bdb-ll", bcache, unlock);
-
- bcache->key = GF_CALLOC (key->size + 1, sizeof (char),
- gf_bdb_mt_char);
- GF_VALIDATE_OR_GOTO ("bdb-ll", bcache->key, unlock);
- memcpy (bcache->key, key->data, key->size);
-
- bcache->data = memdup (data->data, data->size);
- GF_VALIDATE_OR_GOTO ("bdb-ll", bcache->data, unlock);
-
- bcache->size = data->size;
- list_add (&bcache->c_list, &bctx->c_list);
- bctx->c_count++;
- ret = 0;
- } /* if(private->c_count < 5)...else */
- }
-unlock:
- UNLOCK (&bctx->lock);
-out:
- return ret;
-}
-
-static int32_t
-bdb_cache_delete (bctx_t *bctx,
- const char *key)
-{
- bdb_cache_t *bcache = NULL;
- bdb_cache_t *trav = NULL;
-
- GF_VALIDATE_OR_GOTO ("bdb-ll", bctx, out);
- GF_VALIDATE_OR_GOTO ("bdb-ll", key, out);
-
- LOCK (&bctx->lock);
- {
- list_for_each_entry (trav, &bctx->c_list, c_list) {
- if (!strcmp (trav->key, key)){
- bctx->c_count--;
- bcache = trav;
- break;
- }
- }
-
- if (bcache) {
- list_del_init (&bcache->c_list);
- GF_FREE (bcache->key);
- GF_FREE (bcache->data);
- GF_FREE (bcache);
- }
- }
- UNLOCK (&bctx->lock);
-
-out:
- return 0;
-}
-
-void *
-bdb_db_stat (bctx_t *bctx,
- DB_TXN *txnid,
- uint32_t flags)
-{
- DB *storage = NULL;
- void *stat = NULL;
- int32_t ret = -1;
-
- LOCK (&bctx->lock);
- {
- if (bctx->primary == NULL) {
- ret = bdb_db_open (bctx);
- storage = bctx->primary;
- } else {
- /* we are just fine, lets continue */
- storage = bctx->primary;
- } /* if(bctx->dbp==NULL)...else */
- }
- UNLOCK (&bctx->lock);
-
- GF_VALIDATE_OR_GOTO ("bdb-ll", storage, out);
-
- ret = storage->stat (storage, txnid, &stat, flags);
-
- if (ret < 0) {
- gf_log ("bdb-ll", GF_LOG_DEBUG,
- "_BDB_DB_STAT %s: %s "
- "(failed to do stat database)",
- bctx->directory, db_strerror (ret));
- }
-out:
- return stat;
-
-}
-
-/* bdb_storage_get - retrieve a key/value pair corresponding to @path from the
- * corresponding db file.
- *
- * @bctx: bctx_t * corresponding to the parent directory of @path. (should
- * always be a valid bctx). bdb_storage_get should never be called if
- * @bctx = NULL.
- * @txnid: NULL if bdb_storage_get is not embedded in an explicit transaction
- * or a valid DB_TXN *, when embedded in an explicit transaction.
- * @path: path of the file to read from (translated to a database key using
- * MAKE_KEY_FROM_PATH)
- * @buf: char ** - pointer to a pointer to char. a read buffer is created in
- * this procedure and pointer to the buffer is passed through @buf to the
- * caller.
- * @size: size of the file content to be read.
- * @offset: offset from which the file content to be read.
- *
- * NOTE: bdb_storage_get tries to open DB, if @bctx->dbp == NULL
- * (@bctx->dbp == NULL, nobody has opened DB till now or DB was closed by
- * bdb_table_prune()).
- *
- * NOTE: if private->cache is set (bdb xlator's internal caching enabled), then
- * bdb_storage_get first looks up the cache for key/value pair. if
- * bdb_lookup_cache fails, then only DB->get() is called. also, inserts a
- * newly read key/value pair to cache through bdb_insert_to_cache.
- *
- * return: 'number of bytes read' on success or -1 on error.
- *
- * also see: bdb_lookup_cache, bdb_insert_to_cache for details about bdb
- * xlator's internal cache.
- */
-static int32_t
-bdb_db_get (bctx_t *bctx,
- DB_TXN *txnid,
- const char *path,
- char *buf,
- size_t size,
- off_t offset)
-{
- DB *storage = NULL;
- DBT key = {0,};
- DBT value = {0,};
- int32_t ret = -1;
- size_t copy_size = 0;
- char *key_string = NULL;
- bdb_cache_t *bcache = NULL;
- int32_t db_flags = 0;
- uint8_t need_break = 0;
- int32_t retries = 1;
-
- GF_VALIDATE_OR_GOTO ("bdb-ll", bctx, out);
- GF_VALIDATE_OR_GOTO ("bdb-ll", path, out);
-
- MAKE_KEY_FROM_PATH (key_string, path);
-
- if (bctx->cache &&
- ((bcache = bdb_cache_lookup (bctx, key_string)) != NULL)) {
- if (buf) {
- copy_size = ((bcache->size - offset) < size)?
- (bcache->size - offset) : size;
-
- memcpy (buf, (bcache->data + offset), copy_size);
- ret = copy_size;
- } else {
- ret = bcache->size;
- }
-
- goto out;
- }
-
- LOCK (&bctx->lock);
- {
- if (bctx->primary == NULL) {
- ret = bdb_db_open (bctx);
- storage = bctx->primary;
- } else {
- /* we are just fine, lets continue */
- storage = bctx->primary;
- } /* if(bctx->dbp==NULL)...else */
- }
- UNLOCK (&bctx->lock);
-
- GF_VALIDATE_OR_GOTO ("bdb-ll", storage, out);
-
- key.data = (char *)key_string;
- key.size = strlen (key_string);
- key.flags = DB_DBT_USERMEM;
-
- if (bctx->cache){
- value.flags = DB_DBT_MALLOC;
- } else {
- if (size) {
- value.data = buf;
- value.ulen = size;
- value.flags = DB_DBT_USERMEM | DB_DBT_PARTIAL;
- } else {
- value.flags = DB_DBT_MALLOC;
- }
- value.dlen = size;
- value.doff = offset;
- }
-
- do {
- /* TODO: we prefer to give our own buffer to value.data
- * and ask bdb to fill in it */
- ret = storage->get (storage, txnid, &key, &value,
- db_flags);
-
- if (ret == DB_NOTFOUND) {
- gf_log ("bdb-ll", GF_LOG_DEBUG,
- "_BDB_DB_GET %s - %s: ENOENT"
- "(specified key not found in database)",
- bctx->directory, key_string);
- ret = -1;
- need_break = 1;
- } else if (ret == DB_LOCK_DEADLOCK) {
- retries++;
- gf_log ("bdb-ll", GF_LOG_DEBUG,
- "_BDB_DB_GET %s - %s"
- "(deadlock detected, retrying for %d "
- "time)",
- bctx->directory, key_string, retries);
- } else if (ret == 0) {
- /* successfully read data, lets set everything
- * in place and return */
- if (bctx->cache) {
- if (buf) {
- copy_size = ((value.size - offset) < size) ?
- (value.size - offset) : size;
-
- memcpy (buf, (value.data + offset),
- copy_size);
- ret = copy_size;
- }
-
- bdb_cache_insert (bctx, &key, &value);
- } else {
- ret = value.size;
- }
-
- if (size == 0)
- GF_FREE (value.data);
-
- need_break = 1;
- } else {
- gf_log ("bdb-ll", GF_LOG_DEBUG,
- "_BDB_DB_GET %s - %s: %s"
- "(failed to retrieve specified key from"
- " database)",
- bctx->directory, key_string,
- db_strerror (ret));
- ret = -1;
- need_break = 1;
- }
- } while (!need_break);
-
-out:
- return ret;
-}/* bdb_db_get */
-
-/* TODO: handle errors here and log. propogate only the errno to caller */
-int32_t
-bdb_db_fread (struct bdb_fd *bfd, char *buf, size_t size, off_t offset)
-{
- return bdb_db_get (bfd->ctx, NULL, bfd->key, buf, size, offset);
-}
-
-int32_t
-bdb_db_iread (struct bdb_ctx *bctx, const char *key, char **bufp)
-{
- char *buf = NULL;
- size_t size = 0;
- int64_t ret = 0;
-
- ret = bdb_db_get (bctx, NULL, key, NULL, 0, 0);
- size = ret;
-
- if (bufp) {
- buf = GF_CALLOC (size, sizeof (char), gf_bdb_mt_char);
- *bufp = buf;
- ret = bdb_db_get (bctx, NULL, key, buf, size, 0);
- }
-
- return ret;
-}
-
-/* bdb_storage_put - insert a key/value specified to the corresponding DB.
- *
- * @bctx: bctx_t * corresponding to the parent directory of @path.
- * (should always be a valid bctx). bdb_storage_put should never be
- * called if @bctx = NULL.
- * @txnid: NULL if bdb_storage_put is not embedded in an explicit transaction
- * or a valid DB_TXN *, when embedded in an explicit transaction.
- * @key_string: key of the database entry.
- * @buf: pointer to the buffer data to be written as data for @key_string.
- * @size: size of @buf.
- * @offset: offset in the key's data to be modified with provided data.
- * @flags: valid flags are BDB_TRUNCATE_RECORD (to reduce the data of
- * @key_string to 0 size).
- *
- * NOTE: bdb_storage_put tries to open DB, if @bctx->dbp == NULL
- * (@bctx->dbp == NULL, nobody has opened DB till now or DB was closed by
- * bdb_table_prune()).
- *
- * NOTE: bdb_storage_put deletes the key/value from bdb xlator's internal cache.
- *
- * return: 0 on success or -1 on error.
- *
- * also see: bdb_cache_delete for details on how a cached key/value pair is
- * removed.
- */
-static int32_t
-bdb_db_put (bctx_t *bctx,
- DB_TXN *txnid,
- const char *key_string,
- const char *buf,
- size_t size,
- off_t offset,
- int32_t flags)
-{
- DB *storage = NULL;
- DBT key = {0,}, value = {0,};
- int32_t ret = -1;
- int32_t db_flags = DB_AUTO_COMMIT;
- uint8_t need_break = 0;
- int32_t retries = 1;
-
- LOCK (&bctx->lock);
- {
- if (bctx->primary == NULL) {
- ret = bdb_db_open (bctx);
- storage = bctx->primary;
- } else {
- /* we are just fine, lets continue */
- storage = bctx->primary;
- }
- }
- UNLOCK (&bctx->lock);
-
- GF_VALIDATE_OR_GOTO ("bdb-ll", storage, out);
-
- if (bctx->cache) {
- ret = bdb_cache_delete (bctx, (char *)key_string);
- GF_VALIDATE_OR_GOTO ("bdb-ll", (ret == 0), out);
- }
-
- key.data = (void *)key_string;
- key.size = strlen (key_string);
-
- /* NOTE: bdb lets us expand the file, suppose value.size > value.len,
- * then value.len bytes from value.doff offset and value.size bytes
- * will be written from value.doff and data from
- * value.doff + value.dlen will be pushed value.doff + value.size
- */
- value.data = (void *)buf;
-
- if (flags & BDB_TRUNCATE_RECORD) {
- value.size = size;
- value.doff = 0;
- value.dlen = offset;
- } else {
- value.size = size;
- value.dlen = size;
- value.doff = offset;
- }
- value.flags = DB_DBT_PARTIAL;
- if (buf == NULL && size == 0)
- /* truncate called us */
- value.flags = 0;
-
- do {
- ret = storage->put (storage, txnid, &key, &value, db_flags);
- if (ret == DB_LOCK_DEADLOCK) {
- retries++;
- gf_log ("bdb-ll", GF_LOG_DEBUG,
- "_BDB_DB_PUT %s - %s"
- "(deadlock detected, retying for %d time)",
- bctx->directory, key_string, retries);
- } else if (ret) {
- /* write failed */
- gf_log ("bdb-ll", GF_LOG_DEBUG,
- "_BDB_DB_PUT %s - %s: %s"
- "(failed to put specified entry into database)",
- bctx->directory, key_string, db_strerror (ret));
- need_break = 1;
- } else {
- /* successfully wrote */
- ret = 0;
- need_break = 1;
- }
- } while (!need_break);
-out:
- return ret;
-}/* bdb_db_put */
-
-int32_t
-bdb_db_icreate (struct bdb_ctx *bctx, const char *key)
-{
- return bdb_db_put (bctx, NULL, key, NULL, 0, 0, 0);
-}
-
-/* TODO: handle errors here and log. propogate only the errno to caller */
-int32_t
-bdb_db_fwrite (struct bdb_fd *bfd, char *buf, size_t size, off_t offset)
-{
- return bdb_db_put (bfd->ctx, NULL, bfd->key, buf, size, offset, 0);
-}
-
-/* TODO: handle errors here and log. propogate only the errno to caller */
-int32_t
-bdb_db_iwrite (struct bdb_ctx *bctx, const char *key, char *buf, size_t size)
-{
- return bdb_db_put (bctx, NULL, key, buf, size, 0, 0);
-}
-
-int32_t
-bdb_db_itruncate (struct bdb_ctx *bctx, const char *key)
-{
- return bdb_db_put (bctx, NULL, key, NULL, 0, 1, 0);
-}
-
-/* bdb_storage_del - delete a key/value pair corresponding to @path from
- * corresponding db file.
- *
- * @bctx: bctx_t * corresponding to the parent directory of @path.
- * (should always be a valid bctx). bdb_storage_del should never be called
- * if @bctx = NULL.
- * @txnid: NULL if bdb_storage_del is not embedded in an explicit transaction
- * or a valid DB_TXN *, when embedded in an explicit transaction.
- * @path: path to the file, whose key/value pair has to be deleted.
- *
- * NOTE: bdb_storage_del tries to open DB, if @bctx->dbp == NULL
- * (@bctx->dbp == NULL, nobody has opened DB till now or DB was closed by
- * bdb_table_prune()).
- *
- * return: 0 on success or -1 on error.
- */
-static int32_t
-bdb_db_del (bctx_t *bctx,
- DB_TXN *txnid,
- const char *key_string)
-{
- DB *storage = NULL;
- DBT key = {0,};
- int32_t ret = -1;
- int32_t db_flags = 0;
- uint8_t need_break = 0;
- int32_t retries = 1;
-
- LOCK (&bctx->lock);
- {
- if (bctx->primary == NULL) {
- ret = bdb_db_open (bctx);
- storage = bctx->primary;
- } else {
- /* we are just fine, lets continue */
- storage = bctx->primary;
- }
- }
- UNLOCK (&bctx->lock);
-
- GF_VALIDATE_OR_GOTO ("bdb-ll", storage, out);
-
- ret = bdb_cache_delete (bctx, key_string);
- GF_VALIDATE_OR_GOTO ("bdb-ll", (ret == 0), out);
-
- key.data = (char *)key_string;
- key.size = strlen (key_string);
- key.flags = DB_DBT_USERMEM;
-
- do {
- ret = storage->del (storage, txnid, &key, db_flags);
-
- if (ret == DB_NOTFOUND) {
- gf_log ("bdb-ll", GF_LOG_DEBUG,
- "_BDB_DB_DEL %s - %s: ENOENT"
- "(failed to delete entry, could not be "
- "found in the database)",
- bctx->directory, key_string);
- need_break = 1;
- } else if (ret == DB_LOCK_DEADLOCK) {
- retries++;
- gf_log ("bdb-ll", GF_LOG_DEBUG,
- "_BDB_DB_DEL %s - %s"
- "(deadlock detected, retying for %d time)",
- bctx->directory, key_string, retries);
- } else if (ret == 0) {
- /* successfully deleted the entry */
- gf_log ("bdb-ll", GF_LOG_DEBUG,
- "_BDB_DB_DEL %s - %s"
- "(successfully deleted entry from database)",
- bctx->directory, key_string);
- ret = 0;
- need_break = 1;
- } else {
- gf_log ("bdb-ll", GF_LOG_DEBUG,
- "_BDB_DB_DEL %s - %s: %s"
- "(failed to delete entry from database)",
- bctx->directory, key_string, db_strerror (ret));
- ret = -1;
- need_break = 1;
- }
- } while (!need_break);
-out:
- return ret;
-}
-
-int32_t
-bdb_db_iremove (bctx_t *bctx,
- const char *key)
-{
- return bdb_db_del (bctx, NULL, key);
-}
-
-/* NOTE: bdb version compatibility wrapper */
-int32_t
-bdb_cursor_get (DBC *cursorp,
- DBT *sec, DBT *pri,
- DBT *val,
- int32_t flags)
-{
- int32_t ret = -1;
-
- GF_VALIDATE_OR_GOTO ("bdb-ll", cursorp, out);
-
-#ifdef HAVE_BDB_CURSOR_GET
- ret = cursorp->pget (cursorp, sec, pri, val, flags);
-#else
- ret = cursorp->c_pget (cursorp, sec, pri, val, flags);
-#endif
- if ((ret != 0) && (ret != DB_NOTFOUND)) {
- gf_log ("bdb-ll", GF_LOG_DEBUG,
- "_BDB_CURSOR_GET: %s"
- "(failed to retrieve entry from database cursor)",
- db_strerror (ret));
- }
-
-out:
- return ret;
-}/* bdb_cursor_get */
-
-int32_t
-bdb_dirent_size (DBT *key)
-{
- return GF_DIR_ALIGN (24 /* FIX MEEEE!!! */ + key->size);
-}
-
-
-
-/* bdb_dbenv_init - initialize DB_ENV
- *
- * initialization includes:
- * 1. opening DB_ENV (db_env_create(), DB_ENV->open()).
- * NOTE: see private->envflags for flags used.
- * 2. DB_ENV->set_lg_dir - set log directory to be used for storing log files
- * (log files are the files in which transaction logs are written by db).
- * 3. DB_ENV->set_flags (DB_LOG_AUTOREMOVE) - set DB_ENV to automatically
- * clear the unwanted log files (flushed at each checkpoint).
- * 4. DB_ENV->set_errfile - set errfile to be used by db to report detailed
- * error logs. used only for debbuging purpose.
- *
- * return: returns a valid DB_ENV * on success or NULL on error.
- *
- */
-static DB_ENV *
-bdb_dbenv_init (xlator_t *this,
- char *directory)
-{
- /* Create a DB environment */
- DB_ENV *dbenv = NULL;
- int32_t ret = 0;
- bdb_private_t *private = NULL;
- int32_t fatal_flags = 0;
-
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (directory, err);
-
- private = this->private;
- VALIDATE_OR_GOTO (private, err);
-
- ret = db_env_create (&dbenv, 0);
- VALIDATE_OR_GOTO ((ret == 0), err);
-
- /* NOTE: set_errpfx returns 'void' */
- dbenv->set_errpfx(dbenv, this->name);
-
- ret = dbenv->set_lk_detect (dbenv, DB_LOCK_DEFAULT);
- VALIDATE_OR_GOTO ((ret == 0), err);
-
- ret = dbenv->open(dbenv, directory,
- private->envflags,
- S_IRUSR | S_IWUSR);
- if ((ret != 0) && (ret != DB_RUNRECOVERY)) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "failed to join Berkeley DB environment at %s: %s."
- "please run manual recovery and retry running "
- "glusterfs",
- directory, db_strerror (ret));
- dbenv = NULL;
- goto err;
- } else if (ret == DB_RUNRECOVERY) {
- fatal_flags = ((private->envflags & (~DB_RECOVER))
- | DB_RECOVER_FATAL);
- ret = dbenv->open(dbenv, directory, fatal_flags,
- S_IRUSR | S_IWUSR);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "failed to join Berkeley DB environment in "
- "recovery mode at %s: %s. please run manual "
- "recovery and retry running glusterfs",
- directory, db_strerror (ret));
- dbenv = NULL;
- goto err;
- }
- }
-
- ret = 0;
-#if (DB_VERSION_MAJOR == 4 && \
- DB_VERSION_MINOR == 7)
- if (private->log_auto_remove) {
- ret = dbenv->log_set_config (dbenv, DB_LOG_AUTO_REMOVE, 1);
- } else {
- ret = dbenv->log_set_config (dbenv, DB_LOG_AUTO_REMOVE, 0);
- }
-#else
- if (private->log_auto_remove) {
- ret = dbenv->set_flags (dbenv, DB_LOG_AUTOREMOVE, 1);
- } else {
- ret = dbenv->set_flags (dbenv, DB_LOG_AUTOREMOVE, 0);
- }
-#endif
- if (ret < 0) {
- gf_log ("bdb-ll", GF_LOG_ERROR,
- "autoremoval of transactional log files could not be "
- "configured (%s). you may have to do a manual "
- "monitoring of transactional log files and remove "
- "periodically.",
- db_strerror (ret));
- goto err;
- }
-
- if (private->transaction) {
- ret = dbenv->set_flags(dbenv, DB_AUTO_COMMIT, 1);
-
- if (ret != 0) {
- gf_log ("bdb-ll", GF_LOG_DEBUG,
- "configuration of auto-commit failed for "
- "database environment at %s. none of the "
- "operations will be embedded in transaction "
- "unless explicitly done so.",
- db_strerror (ret));
- goto err;
- }
-
- if (private->txn_timeout) {
- ret = dbenv->set_timeout (dbenv, private->txn_timeout,
- DB_SET_TXN_TIMEOUT);
- if (ret != 0) {
- gf_log ("bdb-ll", GF_LOG_ERROR,
- "could not configure Berkeley DB "
- "transaction timeout to %d (%s). please"
- " review 'option transaction-timeout %d"
- "' option.",
- private->txn_timeout,
- db_strerror (ret),
- private->txn_timeout);
- goto err;
- }
- }
-
- if (private->lock_timeout) {
- ret = dbenv->set_timeout(dbenv,
- private->txn_timeout,
- DB_SET_LOCK_TIMEOUT);
- if (ret < 0) {
- gf_log ("bdb-ll", GF_LOG_ERROR,
- "could not configure Berkeley DB "
- "lock timeout to %d (%s). please"
- " review 'option lock-timeout %d"
- "' option.",
- private->lock_timeout,
- db_strerror (ret),
- private->lock_timeout);
- goto err;
- }
- }
-
- ret = dbenv->set_lg_dir (dbenv, private->logdir);
- if (ret < 0) {
- gf_log ("bdb-ll", GF_LOG_ERROR,
- "failed to configure libdb transaction log "
- "directory at %s. please review the "
- "'option logdir %s' option.",
- db_strerror (ret), private->logdir);
- goto err;
- }
- }
-
- if (private->errfile) {
- private->errfp = fopen (private->errfile, "a+");
- if (private->errfp) {
- dbenv->set_errfile (dbenv, private->errfp);
- } else {
- gf_log ("bdb-ll", GF_LOG_ERROR,
- "failed to open error logging file for "
- "libdb (Berkeley DB) internal logging (%s)."
- "please review the 'option errfile %s' option.",
- strerror (errno), private->errfile);
- goto err;
- }
- }
-
- return dbenv;
-err:
- if (dbenv) {
- dbenv->close (dbenv, 0);
- }
-
- return NULL;
-}
-
-#define BDB_ENV(this) ((((struct bdb_private *)this->private)->b_table)->dbenv)
-
-/* bdb_checkpoint - during transactional usage, db does not directly write the
- * data to db files, instead db writes a 'log' (similar to a journal entry)
- * into a log file. db normally clears the log files during opening of an
- * environment. since we expect a filesystem server to run for a pretty long
- * duration and flushing 'log's during dbenv->open would prove very costly, if
- * we accumulate the log entries for one complete run of glusterfs server. to
- * flush the logs frequently, db provides a mechanism called 'checkpointing'.
- * when we do a checkpoint, db flushes the logs to disk (writes changes to db
- * files) and we can also clear the accumulated log files after checkpointing.
- * NOTE: removing unwanted log files is not part of dbenv->txn_checkpoint()
- * call.
- *
- * @data: xlator_t of the current instance of bdb xlator.
- *
- * bdb_checkpoint is called in a different thread from the main glusterfs
- * thread. bdb xlator creates the checkpoint thread after successfully opening
- * the db environment.
- * NOTE: bdb_checkpoint thread shares the DB_ENV handle with the filesystem
- * thread.
- *
- * db environment checkpointing frequency is controlled by
- * 'option checkpoint-timeout <time-in-seconds>' in volfile.
- *
- * NOTE: checkpointing thread is started only if 'option transaction on'
- * specified in volfile. checkpointing is not valid for non-transactional
- * environments.
- *
- */
-static void *
-bdb_checkpoint (void *data)
-{
- xlator_t *this = NULL;
- struct bdb_private *private = NULL;
- DB_ENV *dbenv = NULL;
- int32_t ret = 0;
- uint32_t active = 0;
-
- this = (xlator_t *) data;
- dbenv = BDB_ENV(this);
- private = this->private;
-
- for (;;sleep (private->checkpoint_interval)) {
- LOCK (&private->active_lock);
- active = private->active;
- UNLOCK (&private->active_lock);
-
- if (active) {
- ret = dbenv->txn_checkpoint (dbenv, 1024, 0, 0);
- if (ret) {
- gf_log ("bdb-ll", GF_LOG_DEBUG,
- "_BDB_CHECKPOINT: %s"
- "(failed to checkpoint environment)",
- db_strerror (ret));
- } else {
- gf_log ("bdb-ll", GF_LOG_DEBUG,
- "_BDB_CHECKPOINT: successfully "
- "checkpointed");
- }
- } else {
- ret = dbenv->txn_checkpoint (dbenv, 1024, 0, 0);
- if (ret) {
- gf_log ("bdb-ll", GF_LOG_ERROR,
- "_BDB_CHECKPOINT: %s"
- "(final checkpointing failed. might "
- "need to run recovery tool manually on "
- "next usage of this database "
- "environment)",
- db_strerror (ret));
- } else {
- gf_log ("bdb-ll", GF_LOG_DEBUG,
- "_BDB_CHECKPOINT: final successfully "
- "checkpointed");
- }
- break;
- }
- }
-
- return NULL;
-}
-
-
-/* bdb_db_init - initialize bdb xlator
- *
- * reads the options from @options dictionary and sets appropriate values in
- * @this->private. also initializes DB_ENV.
- *
- * return: 0 on success or -1 on error
- * (with logging the error through gf_log()).
- */
-int
-bdb_db_init (xlator_t *this,
- dict_t *options)
-{
- /* create a db entry for root */
- int32_t op_ret = 0;
- bdb_private_t *private = NULL;
- bctx_table_t *table = NULL;
-
- char *checkpoint_interval_str = NULL;
- char *page_size_str = NULL;
- char *lru_limit_str = NULL;
- char *timeout_str = NULL;
- char *access_mode = NULL;
- char *endptr = NULL;
- char *errfile = NULL;
- char *directory = NULL;
- char *logdir = NULL;
- char *mode = NULL;
- char *mode_str = NULL;
- int ret = -1;
- int idx = 0;
- struct stat stbuf = {0,};
-
- private = this->private;
-
- /* cache is always on */
- private->cache = ON;
-
- ret = dict_get_str (options, "access-mode", &access_mode);
- if ((ret == 0)
- && (!strcmp (access_mode, "btree"))) {
- gf_log (this->name, GF_LOG_DEBUG,
- "using BTREE access mode to access libdb "
- "(Berkeley DB)");
- private->access_mode = DB_BTREE;
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "using HASH access mode to access libdb (Berkeley DB)");
- private->access_mode = DB_HASH;
- }
-
- ret = dict_get_str (options, "mode", &mode);
- if ((ret == 0)
- && (!strcmp (mode, "cache"))) {
- gf_log (this->name, GF_LOG_DEBUG,
- "cache data mode selected for 'storage/bdb'. filesystem"
- " operations are not transactionally protected and "
- "system crash does not guarantee recoverability of "
- "data");
- private->envflags = DB_CREATE | DB_INIT_LOG |
- DB_INIT_MPOOL | DB_THREAD;
- private->dbflags = DB_CREATE | DB_THREAD;
- private->transaction = OFF;
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "persistent data mode selected for 'storage/bdb'. each"
- "filesystem operation is guaranteed to be Berkeley DB "
- "transaction protected.");
- private->transaction = ON;
- private->envflags = DB_CREATE | DB_INIT_LOCK | DB_INIT_LOG |
- DB_INIT_MPOOL | DB_INIT_TXN | DB_RECOVER | DB_THREAD;
- private->dbflags = DB_CREATE | DB_THREAD;
-
-
- ret = dict_get_str (options, "lock-timeout", &timeout_str);
-
- if (ret == 0) {
- ret = gf_string2time (timeout_str,
- &private->lock_timeout);
-
- if (private->lock_timeout > 4260000) {
- /* db allows us to DB_SET_LOCK_TIMEOUT to be
- * set to a maximum of 71 mins
- * (4260000 milliseconds) */
- gf_log (this->name, GF_LOG_DEBUG,
- "Berkeley DB lock-timeout parameter "
- "(%d) is out of range. please specify"
- " a valid timeout value for "
- "lock-timeout and retry.",
- private->lock_timeout);
- goto err;
- }
- }
- ret = dict_get_str (options, "transaction-timeout",
- &timeout_str);
- if (ret == 0) {
- ret = gf_string2time (timeout_str,
- &private->txn_timeout);
-
- if (private->txn_timeout > 4260000) {
- /* db allows us to DB_SET_TXN_TIMEOUT to be set
- * to a maximum of 71 mins
- * (4260000 milliseconds) */
- gf_log (this->name, GF_LOG_DEBUG,
- "Berkeley DB lock-timeout parameter "
- "(%d) is out of range. please specify"
- " a valid timeout value for "
- "lock-timeout and retry.",
- private->lock_timeout);
- goto err;
- }
- }
-
- private->checkpoint_interval = BDB_DEFAULT_CHECKPOINT_INTERVAL;
- ret = dict_get_str (options, "checkpoint-interval",
- &checkpoint_interval_str);
- if (ret == 0) {
- ret = gf_string2time (checkpoint_interval_str,
- &private->checkpoint_interval);
-
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "'%"PRIu32"' is not a valid parameter "
- "for checkpoint-interval option. "
- "please specify a valid "
- "checkpoint-interval and retry",
- private->checkpoint_interval);
- goto err;
- }
- }
- }
-
- ret = dict_get_str (options, "file-mode", &mode_str);
- if (ret == 0) {
- private->file_mode = strtol (mode_str, &endptr, 8);
-
- if ((*endptr) ||
- (!IS_VALID_FILE_MODE(private->file_mode))) {
- gf_log (this->name, GF_LOG_DEBUG,
- "'%o' is not a valid parameter for file-mode "
- "option. please specify a valid parameter for "
- "file-mode and retry.",
- private->file_mode);
- goto err;
- }
- } else {
- private->file_mode = DEFAULT_FILE_MODE;
- }
- private->symlink_mode = private->file_mode | S_IFLNK;
- private->file_mode = private->file_mode | S_IFREG;
-
- ret = dict_get_str (options, "dir-mode", &mode_str);
- if (ret == 0) {
- private->dir_mode = strtol (mode_str, &endptr, 8);
- if ((*endptr) ||
- (!IS_VALID_FILE_MODE(private->dir_mode))) {
- gf_log (this->name, GF_LOG_DEBUG,
- "'%o' is not a valid parameter for dir-mode "
- "option. please specify a valid parameter for "
- "dir-mode and retry.",
- private->dir_mode);
- goto err;
- }
- } else {
- private->dir_mode = DEFAULT_DIR_MODE;
- }
-
- private->dir_mode = private->dir_mode | S_IFDIR;
-
- table = GF_CALLOC (1, sizeof (*table), gf_bdb_mt_bctx_table_t);
- if (table == NULL) {
- gf_log ("bdb-ll", GF_LOG_CRITICAL,
- "memory allocation for 'storage/bdb' internal "
- "context table failed.");
- goto err;
- }
-
- INIT_LIST_HEAD(&(table->b_lru));
- INIT_LIST_HEAD(&(table->active));
- INIT_LIST_HEAD(&(table->purge));
-
- LOCK_INIT (&table->lock);
- LOCK_INIT (&table->checkpoint_lock);
-
- table->transaction = private->transaction;
- table->access_mode = private->access_mode;
- table->dbflags = private->dbflags;
- table->this = this;
-
- ret = dict_get_str (options, "lru-limit",
- &lru_limit_str);
-
- /* TODO: set max lockers and max txns to accomodate
- * for more than lru_limit */
- if (ret == 0) {
- ret = gf_string2uint32 (lru_limit_str,
- &table->lru_limit);
- gf_log ("bdb-ll", GF_LOG_DEBUG,
- "setting lru limit of 'storage/bdb' internal context"
- "table to %d. maximum of %d unused databases can be "
- "open at any given point of time.",
- table->lru_limit, table->lru_limit);
- } else {
- table->lru_limit = BDB_DEFAULT_LRU_LIMIT;
- }
-
- ret = dict_get_str (options, "page-size",
- &page_size_str);
-
- if (ret == 0) {
- ret = gf_string2bytesize (page_size_str,
- &table->page_size);
- if (ret < 0) {
- gf_log ("bdb-ll", GF_LOG_ERROR,
- "\"%s\" is an invalid parameter to "
- "\"option page-size\". please specify a valid "
- "size and retry.",
- page_size_str);
- goto err;
- }
-
- if (!PAGE_SIZE_IN_RANGE(table->page_size)) {
- gf_log ("bdb-ll", GF_LOG_ERROR,
- "\"%s\" is out of range for Berkeley DB "
- "page-size. allowed page-size range is %d to "
- "%d. please specify a page-size value in the "
- "range and retry.",
- page_size_str, BDB_LL_PAGE_SIZE_MIN,
- BDB_LL_PAGE_SIZE_MAX);
- goto err;
- }
- } else {
- table->page_size = BDB_LL_PAGE_SIZE_DEFAULT;
- }
-
- table->hash_size = BDB_DEFAULT_HASH_SIZE;
- table->b_hash = GF_CALLOC (BDB_DEFAULT_HASH_SIZE,
- sizeof (struct list_head),
- gf_bdb_mt_list_head);
-
- for (idx = 0; idx < table->hash_size; idx++)
- INIT_LIST_HEAD(&(table->b_hash[idx]));
-
- private->b_table = table;
-
- ret = dict_get_str (options, "errfile", &errfile);
- if (ret == 0) {
- private->errfile = gf_strdup (errfile);
- gf_log (this->name, GF_LOG_DEBUG,
- "using %s as error logging file for libdb (Berkeley DB "
- "library) internal logging.", private->errfile);
- }
-
- ret = dict_get_str (options, "directory", &directory);
-
- if (ret == 0) {
- ret = dict_get_str (options, "logdir", &logdir);
-
- if (ret < 0) {
- gf_log ("bdb-ll", GF_LOG_DEBUG,
- "using the database environment home "
- "directory (%s) itself as transaction log "
- "directory", directory);
- private->logdir = gf_strdup (directory);
-
- } else {
- private->logdir = gf_strdup (logdir);
-
- op_ret = stat (private->logdir, &stbuf);
- if ((op_ret != 0)
- || (!S_ISDIR (stbuf.st_mode))) {
- gf_log ("bdb-ll", GF_LOG_ERROR,
- "specified logdir %s does not exist. "
- "please provide a valid existing "
- "directory as parameter to 'option "
- "logdir'",
- private->logdir);
- goto err;
- }
- }
-
- private->b_table->dbenv = bdb_dbenv_init (this, directory);
- if (private->b_table->dbenv == NULL) {
- gf_log ("bdb-ll", GF_LOG_ERROR,
- "initialization of database environment "
- "failed");
- goto err;
- } else {
- if (private->transaction) {
- /* all well, start the checkpointing thread */
- LOCK_INIT (&private->active_lock);
-
- LOCK (&private->active_lock);
- {
- private->active = 1;
- }
- UNLOCK (&private->active_lock);
- pthread_create (&private->checkpoint_thread,
- NULL, bdb_checkpoint, this);
- }
- }
- }
-
- return op_ret;
-err:
- if (table) {
- GF_FREE (table->b_hash);
- GF_FREE (table);
- }
- if (private) {
- if (private->errfile)
- GF_FREE (private->errfile);
-
- if (private->logdir)
- GF_FREE (private->logdir);
- }
-
- return -1;
-}
diff --git a/xlators/storage/bdb/src/bdb-mem-types.h b/xlators/storage/bdb/src/bdb-mem-types.h
deleted file mode 100644
index cfbc4a4e117..00000000000
--- a/xlators/storage/bdb/src/bdb-mem-types.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef __POSIX_MEM_TYPES_H__
-#define __POSIX_MEM_TYPES_H__
-
-#include "mem-types.h"
-
-enum gf_bdb_mem_types_ {
- gf_bdb_mt_bctx_t = gf_common_mt_end + 1,
- gf_bdb_mt_bdb_fd,
- gf_bdb_mt_dir_entry_t,
- gf_bdb_mt_char,
- gf_bdb_mt_dir_entry_t,
- gf_bdb_mt_char,
- gf_bdb_mt_bdb_private,
- gf_bdb_mt_uint32_t,
- gf_bdb_mt_char,
- gf_bdb_mt_bdb_cache_t,
- gf_bdb_mt_char,
- gf_bdb_mt_bctx_table_t,
- gf_bdb_mt_list_head,
- gf_bdb_mt_end,
-};
-#endif
diff --git a/xlators/storage/bdb/src/bdb.c b/xlators/storage/bdb/src/bdb.c
deleted file mode 100644
index 6104728e3db..00000000000
--- a/xlators/storage/bdb/src/bdb.c
+++ /dev/null
@@ -1,3603 +0,0 @@
-/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-/* bdb based storage translator - named as 'bdb' translator
- *
- *
- * There can be only two modes for files existing on bdb translator:
- * 1. DIRECTORY - directories are stored by bdb as regular directories on
- * back-end file-system. directories also have an entry in the ns_db.db of
- * their parent directory.
- * 2. REGULAR FILE - regular files are stored as records in the storage_db.db
- * present in the directory. regular files also have an entry in ns_db.db
- *
- * Internally bdb has a maximum of three different types of logical files
- * associated with each directory:
- * 1. storage_db.db - storage database, used to store the data corresponding to
- * regular files in the form of key/value pair. file-name is the 'key' and
- * data is 'value'.
- * 2. directory (all subdirectories) - any subdirectory will have a regular
- * directory entry.
- */
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#define __XOPEN_SOURCE 500
-
-#include <stdint.h>
-#include <sys/time.h>
-#include <errno.h>
-#include <ftw.h>
-#include <libgen.h>
-
-#include "glusterfs.h"
-#include "dict.h"
-#include "logging.h"
-#include "bdb.h"
-#include "xlator.h"
-#include "defaults.h"
-#include "common-utils.h"
-
-/* to be used only by fops, nobody else */
-#define BDB_ENV(this) ((((struct bdb_private *)this->private)->b_table)->dbenv)
-#define B_TABLE(this) (((struct bdb_private *)this->private)->b_table)
-
-
-int32_t
-bdb_mknod (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode,
- dev_t dev)
-{
- int32_t op_ret = -1;
- int32_t op_errno = EINVAL;
- char *key_string = NULL; /* after translating path to DB key */
- char *db_path = NULL;
- bctx_t *bctx = NULL;
- struct stat stbuf = {0,};
-
-
- GF_VALIDATE_OR_GOTO ("bdb", frame, out);
- GF_VALIDATE_OR_GOTO ("bdb", this, out);
- GF_VALIDATE_OR_GOTO (this->name, loc, out);
-
- if (!S_ISREG(mode)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "MKNOD %"PRId64"/%s (%s): EPERM"
- "(mknod supported only for regular files. "
- "file mode '%o' not supported)",
- loc->parent->ino, loc->name, loc->path, mode);
- op_ret = -1;
- op_errno = EPERM;
- goto out;
- } /* if(!S_ISREG(mode)) */
-
- bctx = bctx_parent (B_TABLE(this), loc->path);
- if (bctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "MKNOD %"PRId64"/%s (%s): ENOMEM"
- "(failed to lookup database handle)",
- loc->parent->ino, loc->name, loc->path);
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
-
- MAKE_REAL_PATH_TO_STORAGE_DB (db_path, this, bctx->directory);
-
- op_ret = lstat (db_path, &stbuf);
- if (op_ret != 0) {
- op_errno = EINVAL;
- gf_log (this->name, GF_LOG_DEBUG,
- "MKNOD %"PRId64"/%s (%s): EINVAL"
- "(failed to lookup database handle)",
- loc->parent->ino, loc->name, loc->path);
- goto out;
- }
-
- MAKE_KEY_FROM_PATH (key_string, loc->path);
- op_ret = bdb_db_icreate (bctx, key_string);
- if (op_ret > 0) {
- /* create successful */
- stbuf.st_ino = bdb_inode_transform (loc->parent->ino,
- key_string,
- strlen (key_string));
- stbuf.st_mode = mode;
- stbuf.st_size = 0;
- stbuf.st_blocks = BDB_COUNT_BLOCKS (stbuf.st_size, \
- stbuf.st_blksize);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "MKNOD %"PRId64"/%s (%s): ENOMEM"
- "(failed to create database entry)",
- loc->parent->ino, loc->name, loc->path);
- op_ret = -1;
- op_errno = EINVAL; /* TODO: errno sari illa */
- goto out;
- }/* if (!op_ret)...else */
-
-out:
- if (bctx) {
- /* NOTE: bctx_unref always returns success,
- * see description of bctx_unref for more details */
- bctx_unref (bctx);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, loc->inode, &stbuf);
- return 0;
-}
-
-static inline int32_t
-is_dir_empty (xlator_t *this,
- loc_t *loc)
-{
- int32_t ret = 1;
- bctx_t *bctx = NULL;
- DIR *dir = NULL;
- char *real_path = NULL;
- void *dbstat = NULL;
- struct dirent *entry = NULL;
-
- GF_VALIDATE_OR_GOTO ("bdb", this, out);
- GF_VALIDATE_OR_GOTO (this->name, loc, out);
-
- bctx = bctx_lookup (B_TABLE(this), loc->path);
- if (bctx == NULL) {
- ret = -ENOMEM;
- goto out;
- }
-
- dbstat = bdb_db_stat (bctx, NULL, 0);
- if (dbstat) {
- switch (bctx->table->access_mode)
- {
- case DB_HASH:
- ret = (((DB_HASH_STAT *)dbstat)->hash_nkeys == 0);
- break;
- case DB_BTREE:
- case DB_RECNO:
- ret = (((DB_BTREE_STAT *)dbstat)->bt_nkeys == 0);
- break;
- case DB_QUEUE:
- ret = (((DB_QUEUE_STAT *)dbstat)->qs_nkeys == 0);
- break;
- case DB_UNKNOWN:
- gf_log (this->name, GF_LOG_CRITICAL,
- "unknown access-mode set for database");
- ret = 0;
- }
- } else {
- ret = -EBUSY;
- goto out;
- }
-
- MAKE_REAL_PATH (real_path, this, loc->path);
- dir = opendir (real_path);
- if (dir == NULL) {
- ret = -errno;
- goto out;
- }
-
- while ((entry = readdir (dir))) {
- if ((!IS_BDB_PRIVATE_FILE(entry->d_name)) &&
- (!IS_DOT_DOTDOT(entry->d_name))) {
- ret = 0;
- break;
- }/* if(!IS_BDB_PRIVATE_FILE()) */
- } /* while(true) */
- closedir (dir);
-out:
- if (bctx) {
- /* NOTE: bctx_unref always returns success,
- * see description of bctx_unref for more details */
- bctx_unref (bctx);
- }
-
- return ret;
-}
-
-int32_t
-bdb_rename (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
-{
- STACK_UNWIND (frame, -1, EXDEV, NULL);
- return 0;
-}
-
-int32_t
-bdb_link (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
-{
- STACK_UNWIND (frame, -1, EXDEV, NULL, NULL);
- return 0;
-}
-
-int32_t
-is_space_left (xlator_t *this,
- size_t size)
-{
- struct bdb_private *private = this->private;
- struct statvfs stbuf = {0,};
- int32_t ret = -1;
- fsblkcnt_t req_blocks = 0;
- fsblkcnt_t usable_blocks = 0;
-
- ret = statvfs (private->export_path, &stbuf);
- if (ret != 0) {
- ret = 0;
- } else {
- req_blocks = (size / stbuf.f_frsize) + 1;
-
- usable_blocks = (stbuf.f_bfree - BDB_ENOSPC_THRESHOLD);
-
- if (req_blocks < usable_blocks)
- ret = 1;
- else
- ret = 0;
- }
-
- return ret;
-}
-
-int32_t
-bdb_create (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags,
- mode_t mode,
- fd_t *fd)
-{
- int32_t op_ret = -1;
- int32_t op_errno = EPERM;
- char *db_path = NULL;
- struct stat stbuf = {0,};
- bctx_t *bctx = NULL;
- struct bdb_private *private = NULL;
- char *key_string = NULL;
- struct bdb_fd *bfd = NULL;
-
- GF_VALIDATE_OR_GOTO ("bdb", frame, out);
- GF_VALIDATE_OR_GOTO ("bdb", this, out);
- GF_VALIDATE_OR_GOTO (this->name, loc, out);
- GF_VALIDATE_OR_GOTO (this->name, fd, out);
-
- private = this->private;
-
- bctx = bctx_parent (B_TABLE(this), loc->path);
- if (bctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "CREATE %"PRId64"/%s (%s): ENOMEM"
- "(failed to lookup database handle)",
- loc->parent->ino, loc->name, loc->path);
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
-
- MAKE_REAL_PATH_TO_STORAGE_DB (db_path, this, bctx->directory);
- op_ret = lstat (db_path, &stbuf);
- if (op_ret != 0) {
- op_errno = EINVAL;
- gf_log (this->name, GF_LOG_DEBUG,
- "CREATE %"PRId64"/%s (%s): EINVAL"
- "(database file missing)",
- loc->parent->ino, loc->name, loc->path);
- goto out;
- }
-
- MAKE_KEY_FROM_PATH (key_string, loc->path);
- op_ret = bdb_db_icreate (bctx, key_string);
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "CREATE %"PRId64"/%s (%s): ENOMEM"
- "(failed to create database entry)",
- loc->parent->ino, loc->name, loc->path);
- op_errno = EINVAL; /* TODO: errno sari illa */
- goto out;
- }
-
- /* create successful */
- bfd = GF_CALLOC (1, sizeof (*bfd), gf_bdb_mt_bdb_fd);
- if (bfd == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "CREATE %"PRId64"/%s (%s): ENOMEM"
- "(failed to allocate memory for internal fd context)",
- loc->parent->ino, loc->name, loc->path);
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
-
- /* NOTE: bdb_get_bctx_from () returns bctx with a ref */
- bfd->ctx = bctx;
- bfd->key = gf_strdup (key_string);
- if (bfd->key == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "CREATE %"PRId64" (%s): ENOMEM"
- "(failed to allocate memory for internal fd->key)",
- loc->ino, loc->path);
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
-
- BDB_FCTX_SET (fd, this, bfd);
-
- stbuf.st_ino = bdb_inode_transform (loc->parent->ino,
- key_string,
- strlen (key_string));
- stbuf.st_mode = private->file_mode;
- stbuf.st_size = 0;
- stbuf.st_nlink = 1;
- stbuf.st_blocks = BDB_COUNT_BLOCKS (stbuf.st_size, stbuf.st_blksize);
- op_ret = 0;
- op_errno = 0;
-out:
- STACK_UNWIND (frame, op_ret, op_errno, fd, loc->inode, &stbuf);
-
- return 0;
-}
-
-
-/* bdb_open
- *
- * as input parameters bdb_open gets the file name, i.e key. bdb_open should
- * effectively
- * do: store key, open storage db, store storage-db pointer.
- *
- */
-int32_t
-bdb_open (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags,
- fd_t *fd,
- int32_t wbflags)
-{
- int32_t op_ret = -1;
- int32_t op_errno = EINVAL;
- bctx_t *bctx = NULL;
- char *key_string = NULL;
- struct bdb_fd *bfd = NULL;
-
- GF_VALIDATE_OR_GOTO ("bdb", frame, out);
- GF_VALIDATE_OR_GOTO ("bdb", this, out);
- GF_VALIDATE_OR_GOTO (this->name, loc, out);
- GF_VALIDATE_OR_GOTO (this->name, fd, out);
-
- bctx = bctx_parent (B_TABLE(this), loc->path);
- if (bctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "OPEN %"PRId64" (%s): ENOMEM"
- "(failed to lookup database handle)",
- loc->ino, loc->path);
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
-
- bfd = GF_CALLOC (1, sizeof (*bfd), gf_bdb_mt_bdb_fd);
- if (bfd == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "OPEN %"PRId64" (%s): ENOMEM"
- "(failed to allocate memory for internal fd context)",
- loc->ino, loc->path);
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
-
- /* NOTE: bctx_parent () returns bctx with a ref */
- bfd->ctx = bctx;
-
- MAKE_KEY_FROM_PATH (key_string, loc->path);
- bfd->key = gf_strdup (key_string);
- if (bfd->key == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "OPEN %"PRId64" (%s): ENOMEM"
- "(failed to allocate memory for internal fd->key)",
- loc->ino, loc->path);
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
-
- BDB_FCTX_SET (fd, this, bfd);
- op_ret = 0;
-out:
- STACK_UNWIND (frame, op_ret, op_errno, fd);
-
- return 0;
-}
-
-int32_t
-bdb_readv (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset)
-{
- int32_t op_ret = -1;
- int32_t op_errno = EINVAL;
- struct iovec vec = {0,};
- struct stat stbuf = {0,};
- struct bdb_fd *bfd = NULL;
- char *db_path = NULL;
- int32_t read_size = 0;
- struct iobref *iobref = NULL;
- struct iobuf *iobuf = NULL;
-
- GF_VALIDATE_OR_GOTO ("bdb", frame, out);
- GF_VALIDATE_OR_GOTO ("bdb", this, out);
- GF_VALIDATE_OR_GOTO (this->name, fd, out);
-
- BDB_FCTX_GET (fd, this, &bfd);
- if (bfd == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "READV %"PRId64" - %"GF_PRI_SIZET",%"PRId64": EBADFD"
- "(internal fd not found through fd)",
- fd->inode->ino, size, offset);
- op_errno = EBADFD;
- op_ret = -1;
- goto out;
- }
-
- MAKE_REAL_PATH_TO_STORAGE_DB (db_path, this, bfd->ctx->directory);
- op_ret = lstat (db_path, &stbuf);
- if (op_ret != 0) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "READV %"PRId64" - %"GF_PRI_SIZET",%"PRId64": EINVAL"
- "(database file missing)",
- fd->inode->ino, size, offset);
- goto out;
- }
-
- iobuf = iobuf_get (this->ctx->iobuf_pool);
- if (!iobuf) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory :(");
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
-
- /* we are ready to go */
- op_ret = bdb_db_fread (bfd, iobuf->ptr, size, offset);
- read_size = op_ret;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "READV %"PRId64" - %"GF_PRI_SIZET",%"PRId64": EBADFD"
- "(failed to find entry in database)",
- fd->inode->ino, size, offset);
- op_ret = -1;
- op_errno = ENOENT;
- goto out;
- } else if (op_ret == 0) {
- goto out;
- }
-
- iobref = iobref_new ();
- if (iobref == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory :(");
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
-
- if (size < read_size) {
- op_ret = size;
- read_size = size;
- }
-
- iobref_add (iobref, iobuf);
-
- vec.iov_base = iobuf->ptr;
- vec.iov_len = read_size;
-
- stbuf.st_ino = fd->inode->ino;
- stbuf.st_size = bdb_db_fread (bfd, NULL, 0, 0);
- stbuf.st_blocks = BDB_COUNT_BLOCKS (stbuf.st_size, stbuf.st_blksize);
- op_ret = size;
-out:
- STACK_UNWIND (frame, op_ret, op_errno, &vec, 1, &stbuf, iobuf);
-
- if (iobref)
- iobref_unref (iobref);
-
- if (iobuf)
- iobuf_unref (iobuf);
-
- return 0;
-}
-
-
-int32_t
-bdb_writev (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t offset,
- struct iobref *iobref)
-{
- int32_t op_ret = -1;
- int32_t op_errno = EINVAL;
- struct stat stbuf = {0,};
- struct bdb_fd *bfd = NULL;
- int32_t idx = 0;
- off_t c_off = offset;
- int32_t c_ret = -1;
- char *db_path = NULL;
- size_t total_size = 0;
-
- GF_VALIDATE_OR_GOTO ("bdb", frame, out);
- GF_VALIDATE_OR_GOTO ("bdb", this, out);
- GF_VALIDATE_OR_GOTO (this->name, fd, out);
- GF_VALIDATE_OR_GOTO (this->name, vector, out);
-
- BDB_FCTX_GET (fd, this, &bfd);
- if (bfd == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "WRITEV %"PRId64" - %"PRId32",%"PRId64": EBADFD"
- "(internal fd not found through fd)",
- fd->inode->ino, count, offset);
- op_ret = -1;
- op_errno = EBADFD;
- goto out;
- }
-
- MAKE_REAL_PATH_TO_STORAGE_DB (db_path, this, bfd->ctx->directory);
- op_ret = lstat (db_path, &stbuf);
- if (op_ret != 0) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "WRITEV %"PRId64" - %"PRId32",%"PRId64": EINVAL"
- "(database file missing)",
- fd->inode->ino, count, offset);
- goto out;
- }
-
- for (idx = 0; idx < count; idx++)
- total_size += vector[idx].iov_len;
-
- if (!is_space_left (this, total_size)) {
- gf_log (this->name, GF_LOG_ERROR,
- "WRITEV %"PRId64" - %"PRId32" (%"GF_PRI_SIZET"),%"
- PRId64": ENOSPC "
- "(not enough space after internal measurement)",
- fd->inode->ino, count, total_size, offset);
- op_ret = -1;
- op_errno = ENOSPC;
- goto out;
- }
-
- /* we are ready to go */
- for (idx = 0; idx < count; idx++) {
- c_ret = bdb_db_fwrite (bfd, vector[idx].iov_base,
- vector[idx].iov_len, c_off);
- if (c_ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "WRITEV %"PRId64" - %"PRId32",%"PRId64": EINVAL"
- "(database write at %"PRId64" failed)",
- fd->inode->ino, count, offset, c_off);
- break;
- } else {
- c_off += vector[idx].iov_len;
- }
- op_ret += vector[idx].iov_len;
- } /* for(idx=0;...)... */
-
- if (c_ret) {
- /* write failed after a point, not an error */
- stbuf.st_size = bdb_db_fread (bfd, NULL, 0, 0);
- stbuf.st_blocks = BDB_COUNT_BLOCKS (stbuf.st_size,
- stbuf.st_blksize);
- goto out;
- }
-
- /* NOTE: we want to increment stbuf->st_size, as stored in db */
- stbuf.st_size = op_ret;
- stbuf.st_blocks = BDB_COUNT_BLOCKS (stbuf.st_size, stbuf.st_blksize);
- op_errno = 0;
-
-out:
- STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
- return 0;
-}
-
-int32_t
-bdb_flush (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd)
-{
- int32_t op_ret = -1;
- int32_t op_errno = EPERM;
- struct bdb_fd *bfd = NULL;
-
- GF_VALIDATE_OR_GOTO ("bdb", frame, out);
- GF_VALIDATE_OR_GOTO ("bdb", this, out);
- GF_VALIDATE_OR_GOTO (this->name, fd, out);
-
- BDB_FCTX_GET (fd, this, &bfd);
- if (bfd == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "FLUSH %"PRId64": EBADFD"
- "(internal fd not found through fd)",
- fd->inode->ino);
- op_ret = -1;
- op_errno = EBADFD;
- goto out;
- }
-
- /* do nothing */
- op_ret = 0;
- op_errno = 0;
-
-out:
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-int32_t
-bdb_release (xlator_t *this,
- fd_t *fd)
-{
- int32_t op_ret = -1;
- int32_t op_errno = EBADFD;
- struct bdb_fd *bfd = NULL;
-
- BDB_FCTX_GET (fd, this, &bfd);
- if (bfd == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "RELEASE %"PRId64": EBADFD"
- "(internal fd not found through fd)",
- fd->inode->ino);
- op_ret = -1;
- op_errno = EBADFD;
- goto out;
- }
-
- bctx_unref (bfd->ctx);
- bfd->ctx = NULL;
-
- if (bfd->key)
- GF_FREE (bfd->key); /* we did strdup() in bdb_open() */
- GF_FREE (bfd);
- op_ret = 0;
- op_errno = 0;
-
-out:
- return 0;
-}/* bdb_release */
-
-
-int32_t
-bdb_fsync (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t datasync)
-{
- STACK_UNWIND (frame, 0, 0);
- return 0;
-}/* bdb_fsync */
-
-static int gf_bdb_lk_log;
-
-int32_t
-bdb_lk (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t cmd,
- struct flock *lock)
-{
- struct flock nullock = {0, };
-
- if (BDB_TIMED_LOG (ENOTSUP, gf_bdb_lk_log)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "LK %"PRId64": ENOTSUP "
- "(load \"features/locks\" translator to enable "
- "lock support)",
- fd->inode->ino);
- }
-
- STACK_UNWIND (frame, -1, ENOTSUP, &nullock);
- return 0;
-}/* bdb_lk */
-
-/* bdb_lookup
- *
- * there are four possibilities for a file being looked up:
- * 1. file exists and is a directory.
- * 2. file exists and is a symlink.
- * 3. file exists and is a regular file.
- * 4. file does not exist.
- * case 1 and 2 are handled by doing lstat() on the @loc. if the file is a
- * directory or symlink, lstat() succeeds. lookup continues to check if the
- * @loc belongs to case-3 only if lstat() fails.
- * to check for case 3, bdb_lookup does a bdb_db_iread() for the given @loc.
- * (see description of bdb_db_iread() for more details on how @loc is transformed
- * into db handle and key). if check for case 1, 2 and 3 fail, we proceed to
- * conclude that file doesn't exist (case 4).
- *
- * @frame: call frame.
- * @this: xlator_t of this instance of bdb xlator.
- * @loc: loc_t specifying the file to operate upon.
- * @need_xattr: if need_xattr != 0, we are asked to return all the extended
- * attributed of @loc, if any exist, in a dictionary. if @loc is a regular
- * file and need_xattr is set, then we look for value of need_xattr. if
- * need_xattr > sizo-of-the-file @loc, then the file content of @loc is
- * returned in dictionary of xattr with 'glusterfs.content' as dictionary key.
- *
- * NOTE: bdb currently supports only directories, symlinks and regular files.
- *
- * NOTE: bdb_lookup returns the 'struct stat' of underlying file itself, in
- * case of directory and symlink (st_ino is modified as bdb allocates its own
- * set of inodes of all files). for regular files, bdb uses 'struct stat' of
- * the database file in which the @loc is stored as templete and modifies
- * st_ino (see bdb_inode_transform for more details), st_mode (can be set in
- * volfile 'option file-mode <mode>'), st_size (exact size of the @loc
- * contents), st_blocks (block count on the underlying filesystem to
- * accomodate st_size, see BDB_COUNT_BLOCKS in bdb.h for more details).
- */
-int32_t
-bdb_lookup (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *xattr_req)
-{
- struct stat stbuf = {0, };
- int32_t op_ret = -1;
- int32_t op_errno = ENOENT;
- dict_t *xattr = NULL;
- char *pathname = NULL;
- char *directory = NULL;
- char *real_path = NULL;
- bctx_t *bctx = NULL;
- char *db_path = NULL;
- struct bdb_private *private = NULL;
- char *key_string = NULL;
- int32_t entry_size = 0;
- char *file_content = NULL;
- uint64_t need_xattr = 0;
-
- GF_VALIDATE_OR_GOTO ("bdb", frame, out);
- GF_VALIDATE_OR_GOTO ("bdb", this, out);
- GF_VALIDATE_OR_GOTO (this->name, loc, out);
-
- private = this->private;
-
- MAKE_REAL_PATH (real_path, this, loc->path);
-
- pathname = gf_strdup (loc->path);
- GF_VALIDATE_OR_GOTO (this->name, pathname, out);
-
- directory = dirname (pathname);
- GF_VALIDATE_OR_GOTO (this->name, directory, out);
-
- if (!strcmp (directory, loc->path)) {
- /* SPECIAL CASE: looking up root */
- op_ret = lstat (real_path, &stbuf);
- if (op_ret != 0) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "LOOKUP %"PRId64" (%s): %s",
- loc->ino, loc->path, strerror (op_errno));
- goto out;
- }
-
- /* bctx_lookup() returns NULL only when its time to wind up,
- * we should shutdown functioning */
- bctx = bctx_lookup (B_TABLE(this), (char *)loc->path);
- if (bctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "LOOKUP %"PRId64" (%s): ENOMEM"
- "(failed to lookup database handle)",
- loc->ino, loc->path);
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
-
- stbuf.st_ino = 1;
- stbuf.st_mode = private->dir_mode;
-
- op_ret = 0;
- goto out;
- }
-
- MAKE_KEY_FROM_PATH (key_string, loc->path);
- op_ret = lstat (real_path, &stbuf);
- if ((op_ret == 0) && (S_ISDIR (stbuf.st_mode))){
- bctx = bctx_lookup (B_TABLE(this), (char *)loc->path);
- if (bctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "LOOKUP %"PRId64"/%s (%s): ENOMEM"
- "(failed to lookup database handle)",
- loc->parent->ino, loc->name, loc->path);
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
-
- if (loc->ino) {
- /* revalidating directory inode */
- stbuf.st_ino = loc->ino;
- } else {
- stbuf.st_ino = bdb_inode_transform (loc->parent->ino,
- key_string,
- strlen (key_string));
- }
- stbuf.st_mode = private->dir_mode;
-
- op_ret = 0;
- goto out;
-
- } else if (op_ret == 0) {
- /* a symlink */
- bctx = bctx_parent (B_TABLE(this), loc->path);
- if (bctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "LOOKUP %"PRId64"/%s (%s): ENOMEM"
- "(failed to lookup database handle)",
- loc->parent->ino, loc->name, loc->path);
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
-
- if (loc->ino) {
- stbuf.st_ino = loc->ino;
- } else {
- stbuf.st_ino = bdb_inode_transform (loc->parent->ino,
- key_string,
- strlen (key_string));
- }
-
- stbuf.st_mode = private->symlink_mode;
-
- op_ret = 0;
- goto out;
-
- }
-
- /* for regular files */
- bctx = bctx_parent (B_TABLE(this), loc->path);
- if (bctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "LOOKUP %"PRId64"/%s (%s): ENOMEM"
- "(failed to lookup database handle for parent)",
- loc->parent->ino, loc->name, loc->path);
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
-
- if (GF_FILE_CONTENT_REQUESTED(xattr_req, &need_xattr)) {
- entry_size = bdb_db_iread (bctx, key_string, &file_content);
- } else {
- entry_size = bdb_db_iread (bctx, key_string, NULL);
- }
-
- op_ret = entry_size;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "LOOKUP %"PRId64"/%s (%s): ENOENT"
- "(database entry not found)",
- loc->parent->ino, loc->name, loc->path);
- op_errno = ENOENT;
- goto out;
- }
-
- MAKE_REAL_PATH_TO_STORAGE_DB (db_path, this, bctx->directory);
- op_ret = lstat (db_path, &stbuf);
- if (op_ret != 0) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "LOOKUP %"PRId64"/%s (%s): %s",
- loc->parent->ino, loc->name, loc->path,
- strerror (op_errno));
- goto out;
- }
-
- if (entry_size
- && (need_xattr >= entry_size)
- && (file_content)) {
- xattr = dict_new ();
- op_ret = dict_set_dynptr (xattr, "glusterfs.content",
- file_content, entry_size);
- if (op_ret < 0) {
- /* continue without giving file contents */
- GF_FREE (file_content);
- }
- } else {
- if (file_content)
- GF_FREE (file_content);
- }
-
- if (loc->ino) {
- /* revalidate */
- stbuf.st_ino = loc->ino;
- stbuf.st_size = entry_size;
- stbuf.st_blocks = BDB_COUNT_BLOCKS (stbuf.st_size,
- stbuf.st_blksize);
- } else {
- /* fresh lookup, create an inode number */
- stbuf.st_ino = bdb_inode_transform (loc->parent->ino,
- key_string,
- strlen (key_string));
- stbuf.st_size = entry_size;
- stbuf.st_blocks = BDB_COUNT_BLOCKS (stbuf.st_size,
- stbuf.st_blksize);
- }/* if(inode->ino)...else */
- stbuf.st_nlink = 1;
- stbuf.st_mode = private->file_mode;
-
- op_ret = 0;
-out:
- if (bctx) {
- /* NOTE: bctx_unref always returns success,
- * see description of bctx_unref for more details */
- bctx_unref (bctx);
- }
-
- if (pathname)
- GF_FREE (pathname);
-
- if (xattr)
- dict_ref (xattr);
-
- STACK_UNWIND (frame, op_ret, op_errno, loc->inode, &stbuf, xattr);
-
- if (xattr)
- dict_unref (xattr);
-
- return 0;
-
-}/* bdb_lookup */
-
-int32_t
-bdb_stat (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
-
- struct stat stbuf = {0,};
- char *real_path = NULL;
- int32_t op_ret = -1;
- int32_t op_errno = EINVAL;
- struct bdb_private *private = NULL;
- char *db_path = NULL;
- bctx_t *bctx = NULL;
-
- GF_VALIDATE_OR_GOTO ("bdb", frame, out);
- GF_VALIDATE_OR_GOTO ("bdb", this, out);
- GF_VALIDATE_OR_GOTO (this->name, loc, out);
-
- private = this->private;
- GF_VALIDATE_OR_GOTO (this->name, private, out);
-
- MAKE_REAL_PATH (real_path, this, loc->path);
-
- op_ret = lstat (real_path, &stbuf);
- op_errno = errno;
- if (op_ret == 0) {
- /* directory or symlink */
- stbuf.st_ino = loc->inode->ino;
- if (S_ISDIR(stbuf.st_mode))
- stbuf.st_mode = private->dir_mode;
- else
- stbuf.st_mode = private->symlink_mode;
- /* we are done, lets unwind the stack */
- goto out;
- }
-
- bctx = bctx_parent (B_TABLE(this), loc->path);
- if (bctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "STAT %"PRId64" (%s): ENOMEM"
- "(no database handle for parent)",
- loc->ino, loc->path);
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
-
- MAKE_REAL_PATH_TO_STORAGE_DB (db_path, this, bctx->directory);
- op_ret = lstat (db_path, &stbuf);
- if (op_ret < 0) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "STAT %"PRId64" (%s): %s"
- "(failed to stat on database file)",
- loc->ino, loc->path, strerror (op_errno));
- goto out;
- }
-
- stbuf.st_size = bdb_db_iread (bctx, loc->path, NULL);
- stbuf.st_blocks = BDB_COUNT_BLOCKS (stbuf.st_size, stbuf.st_blksize);
- stbuf.st_ino = loc->inode->ino;
-
-out:
- if (bctx) {
- /* NOTE: bctx_unref always returns success,
- * see description of bctx_unref for more details */
- bctx_unref (bctx);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
-
- return 0;
-}/* bdb_stat */
-
-
-
-/* bdb_opendir - in the world of bdb, open/opendir is all about opening
- * correspondind databases. opendir in particular, opens the database for the
- * directory which is to be opened. after opening the database, a cursor to
- * the database is also created. cursor helps us get the dentries one after
- * the other, and cursor maintains the state about current positions in
- * directory. pack 'pointer to db', 'pointer to the cursor' into
- * struct bdb_dir and store it in fd->ctx, we get from our parent xlator.
- *
- * @frame: call frame
- * @this: our information, as we filled during init()
- * @loc: location information
- * @fd: file descriptor structure (glusterfs internal)
- *
- * return value - immaterial, async call.
- *
- */
-int32_t
-bdb_opendir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- fd_t *fd)
-{
- char *real_path = NULL;
- int32_t op_ret = -1;
- int32_t op_errno = EINVAL;
- bctx_t *bctx = NULL;
- struct bdb_dir *bfd = NULL;
-
- GF_VALIDATE_OR_GOTO ("bdb", frame, out);
- GF_VALIDATE_OR_GOTO ("bdb", this, out);
- GF_VALIDATE_OR_GOTO (this->name, loc, out);
- GF_VALIDATE_OR_GOTO (this->name, fd, out);
-
- MAKE_REAL_PATH (real_path, this, loc->path);
-
- bctx = bctx_lookup (B_TABLE(this), (char *)loc->path);
- if (bctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "OPENDIR %"PRId64" (%s): ENOMEM"
- "(no database handle for directory)",
- loc->ino, loc->path);
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
-
- bfd = GF_CALLOC (1, sizeof (*bfd), gf_bdb_mt_bdb_fd);
- if (bfd == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "OPENDIR %"PRId64" (%s): ENOMEM"
- "(failed to allocate memory for internal fd)",
- loc->ino, loc->path);
- op_ret = -1;
- op_errno = ENOMEM;
- goto err;
- }
-
- bfd->dir = opendir (real_path);
- if (bfd->dir == NULL) {
- op_ret = -1;
- op_errno = errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "OPENDIR %"PRId64" (%s): %s",
- loc->ino, loc->path, strerror (op_errno));
- goto err;
- }
-
- /* NOTE: bctx_lookup() return bctx with ref */
- bfd->ctx = bctx;
-
- bfd->path = gf_strdup (real_path);
- if (bfd == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "OPENDIR %"PRId64" (%s): ENOMEM"
- "(failed to allocate memory for internal fd->path)",
- loc->ino, loc->path);
- op_ret = -1;
- op_errno = ENOMEM;
- goto err;
- }
-
- BDB_FCTX_SET (fd, this, bfd);
- op_ret = 0;
-out:
- STACK_UNWIND (frame, op_ret, op_errno, fd);
- return 0;
-err:
- if (bctx)
- bctx_unref (bctx);
- if (bfd) {
- if (bfd->dir)
- closedir (bfd->dir);
-
- GF_FREE (bfd);
- }
-
- return 0;
-}/* bdb_opendir */
-
-int32_t
-bdb_getdents (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t off,
- int32_t flag)
-{
- struct bdb_dir *bfd = NULL;
- int32_t op_ret = -1;
- int32_t op_errno = EINVAL;
- size_t filled = 0;
- dir_entry_t entries = {0, };
- dir_entry_t *this_entry = NULL;
- char *entry_path = NULL;
- struct dirent *dirent = NULL;
- off_t in_case = 0;
- int32_t this_size = 0;
- DBC *cursorp = NULL;
- int32_t ret = -1;
- int32_t real_path_len = 0;
- int32_t entry_path_len = 0;
- int32_t count = 0;
- off_t offset = 0;
- size_t tmp_name_len = 0;
- struct stat db_stbuf = {0,};
- struct stat buf = {0,};
-
- GF_VALIDATE_OR_GOTO ("bdb", frame, out);
- GF_VALIDATE_OR_GOTO ("bdb", this, out);
- GF_VALIDATE_OR_GOTO (this->name, fd, out);
-
- BDB_FCTX_GET (fd, this, &bfd);
- if (bfd == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "GETDENTS %"PRId64" - %"GF_PRI_SIZET",%"PRId64
- " %o: EBADFD "
- "(failed to find internal context in fd)",
- fd->inode->ino, size, off, flag);
- op_errno = EBADFD;
- op_ret = -1;
- goto out;
- }
-
- op_ret = bdb_cursor_open (bfd->ctx, &cursorp);
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "GETDENTS %"PRId64" - %"GF_PRI_SIZET",%"PRId64
- ": EBADFD "
- "(failed to open cursor to database handle)",
- fd->inode->ino, size, off);
- op_errno = EBADFD;
- goto out;
- }
-
- if (off) {
- DBT sec = {0,}, pri = {0,}, val = {0,};
- sec.data = &(off);
- sec.size = sizeof (off);
- sec.flags = DB_DBT_USERMEM;
- val.dlen = 0;
- val.doff = 0;
- val.flags = DB_DBT_PARTIAL;
-
- op_ret = bdb_cursor_get (cursorp, &sec, &pri, &val, DB_SET);
- if (op_ret == DB_NOTFOUND) {
- offset = off;
- goto dir_read;
- }
- }
-
- while (filled <= size) {
- DBT sec = {0,}, pri = {0,}, val = {0,};
-
- this_entry = NULL;
-
- sec.flags = DB_DBT_MALLOC;
- pri.flags = DB_DBT_MALLOC;
- val.dlen = 0;
- val.doff = 0;
- val.flags = DB_DBT_PARTIAL;
- op_ret = bdb_cursor_get (cursorp, &sec, &pri, &val, DB_NEXT);
-
- if (op_ret == DB_NOTFOUND) {
- /* we reached end of the directory */
- op_ret = 0;
- op_errno = 0;
- break;
- } else if (op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "GETDENTS %"PRId64" - %"GF_PRI_SIZET
- ",%"PRId64":"
- "(failed to read the next entry from database)",
- fd->inode->ino, size, off);
- op_errno = ENOENT;
- break;
- } /* if (op_ret == DB_NOTFOUND)...else if...else */
-
- if (pri.data == NULL) {
- /* NOTE: currently ignore when we get key.data == NULL.
- * FIXME: we should not get key.data = NULL */
- gf_log (this->name, GF_LOG_DEBUG,
- "GETDENTS %"PRId64" - %"GF_PRI_SIZET
- ",%"PRId64":"
- "(null key read for entry from database)",
- fd->inode->ino, size, off);
- continue;
- }/* if(key.data)...else */
-
- this_entry = GF_CALLOC (1, sizeof (*this_entry),
- gf_bdb_mt_dir_entry_t);
- if (this_entry == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "GETDENTS %"PRId64" - %"GF_PRI_SIZET",%"PRId64
- " - %s:"
- "(failed to allocate memory for an entry)",
- fd->inode->ino, size, off, strerror (errno));
- op_errno = ENOMEM;
- op_ret = -1;
- goto out;
- }
-
- this_entry->name = GF_CALLOC (pri.size + 1, sizeof (char),
- gf_bdb_mt_char);
- if (this_entry->name == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "GETDENTS %"PRId64" - %"GF_PRI_SIZET",%"PRId64
- " - %s:"
- "(failed to allocate memory for an "
- "entry->name)",
- fd->inode->ino, size, off, strerror (errno));
- op_errno = ENOMEM;
- op_ret = -1;
- goto out;
- }
-
- memcpy (this_entry->name, pri.data, pri.size);
- this_entry->buf = db_stbuf;
- this_entry->buf.st_size = bdb_db_iread (bfd->ctx,
- this_entry->name, NULL);
- this_entry->buf.st_blocks = BDB_COUNT_BLOCKS (
- this_entry->buf.st_size,
- this_entry->buf.st_blksize);
-
- this_entry->buf.st_ino = bdb_inode_transform (fd->inode->ino,
- pri.data,
- pri.size);
- count++;
-
- this_entry->next = entries.next;
- this_entry->link = "";
- entries.next = this_entry;
- /* if size is 0, count can never be = size,
- * so entire dir is read */
- if (sec.data)
- GF_FREE (sec.data);
-
- if (pri.data)
- GF_FREE (pri.data);
-
- if (count == size)
- break;
- }/* while */
- bdb_cursor_close (bfd->ctx, cursorp);
- op_ret = count;
- op_errno = 0;
- if (count >= size)
- goto out;
-dir_read:
- /* hungry kyaa? */
- if (!offset) {
- rewinddir (bfd->dir);
- } else {
- seekdir (bfd->dir, offset);
- }
-
- while (filled <= size) {
- this_entry = NULL;
- this_size = 0;
-
- in_case = telldir (bfd->dir);
- dirent = readdir (bfd->dir);
- if (!dirent)
- break;
-
- if (IS_BDB_PRIVATE_FILE(dirent->d_name))
- continue;
-
- tmp_name_len = strlen (dirent->d_name);
- if (entry_path_len < (real_path_len + 1 + (tmp_name_len) + 1)) {
- entry_path_len = real_path_len + tmp_name_len + 1024;
- entry_path = realloc (entry_path, entry_path_len);
- if (entry_path == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "GETDENTS %"PRId64" - %"GF_PRI_SIZET","
- "%"PRId64" - %s: (failed to allocate "
- "memory for an entry_path)",
- fd->inode->ino, size, off,
- strerror (errno));
- op_errno = ENOMEM;
- op_ret = -1;
- goto out;
- }
- }
-
- strncpy (&entry_path[real_path_len+1], dirent->d_name,
- tmp_name_len);
- op_ret = stat (entry_path, &buf);
- if (op_ret < 0) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "GETDENTS %"PRId64" - %"GF_PRI_SIZET",%"PRId64
- " - %s:"
- " (failed to stat on an entry '%s')",
- fd->inode->ino, size, off,
- strerror (errno), entry_path);
- goto out; /* FIXME: shouldn't we continue here */
- }
-
- if ((flag == GF_GET_DIR_ONLY) &&
- ((ret != -1) && (!S_ISDIR(buf.st_mode)))) {
- continue;
- }
-
- this_entry = GF_CALLOC (1, sizeof (*this_entry),
- gf_bdb_mt_dir_entry_t);
- if (this_entry == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "GETDENTS %"PRId64" - %"GF_PRI_SIZET",%"PRId64
- " - %s:"
- "(failed to allocate memory for an entry)",
- fd->inode->ino, size, off, strerror (errno));
- op_errno = ENOMEM;
- op_ret = -1;
- goto out;
- }
-
- this_entry->name = gf_strdup (dirent->d_name);
- if (this_entry->name == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "GETDENTS %"PRId64" - %"GF_PRI_SIZET",%"PRId64
- " - %s:"
- "(failed to allocate memory for an "
- "entry->name)",
- fd->inode->ino, size, off, strerror (errno));
- op_errno = ENOMEM;
- op_ret = -1;
- goto out;
- }
-
- this_entry->buf = buf;
-
- this_entry->buf.st_ino = -1;
- if (S_ISLNK(this_entry->buf.st_mode)) {
- char linkpath[ZR_PATH_MAX] = {0,};
- ret = readlink (entry_path, linkpath, ZR_PATH_MAX);
- if (ret != -1) {
- linkpath[ret] = '\0';
- this_entry->link = gf_strdup (linkpath);
- }
- } else {
- this_entry->link = "";
- }
-
- count++;
-
- this_entry->next = entries.next;
- entries.next = this_entry;
-
- /* if size is 0, count can never be = size,
- * so entire dir is read */
- if (count == size)
- break;
- }
- op_ret = filled;
- op_errno = 0;
-
-out:
- gf_log (this->name, GF_LOG_DEBUG,
- "GETDENTS %"PRId64" - %"GF_PRI_SIZET" (%"PRId32")"
- "/%"GF_PRI_SIZET",%"PRId64":"
- "(failed to read the next entry from database)",
- fd->inode->ino, filled, count, size, off);
-
- STACK_UNWIND (frame, count, op_errno, &entries);
-
- while (entries.next) {
- this_entry = entries.next;
- entries.next = entries.next->next;
- GF_FREE (this_entry->name);
- GF_FREE (this_entry);
- }
-
- return 0;
-}/* bdb_getdents */
-
-
-int32_t
-bdb_releasedir (xlator_t *this,
- fd_t *fd)
-{
- int32_t op_ret = 0;
- int32_t op_errno = 0;
- struct bdb_dir *bfd = NULL;
-
- BDB_FCTX_GET (fd, this, &bfd);
- if (bfd == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "RELEASEDIR %"PRId64": EBADFD",
- fd->inode->ino);
- op_errno = EBADFD;
- op_ret = -1;
- goto out;
- }
-
- if (bfd->path) {
- GF_FREE (bfd->path);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "RELEASEDIR %"PRId64": (bfd->path is NULL)",
- fd->inode->ino);
- }
-
- if (bfd->dir) {
- closedir (bfd->dir);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "RELEASEDIR %"PRId64": (bfd->dir is NULL)",
- fd->inode->ino);
- }
-
- if (bfd->ctx) {
- bctx_unref (bfd->ctx);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "RELEASEDIR %"PRId64": (bfd->ctx is NULL)",
- fd->inode->ino);
- }
-
- GF_FREE (bfd);
-
-out:
- return 0;
-}/* bdb_releasedir */
-
-
-int32_t
-bdb_readlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- size_t size)
-{
- char *dest = NULL;
- int32_t op_ret = -1;
- int32_t op_errno = EPERM;
- char *real_path = NULL;
-
- GF_VALIDATE_OR_GOTO ("bdb", frame, out);
- GF_VALIDATE_OR_GOTO ("bdb", this, out);
- GF_VALIDATE_OR_GOTO (this->name, loc, out);
-
- dest = alloca (size + 1);
- GF_VALIDATE_OR_GOTO (this->name, dest, out);
-
- MAKE_REAL_PATH (real_path, this, loc->path);
-
- op_ret = readlink (real_path, dest, size);
-
- if (op_ret > 0)
- dest[op_ret] = 0;
-
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "READLINK %"PRId64" (%s): %s",
- loc->ino, loc->path, strerror (op_errno));
- }
-out:
- STACK_UNWIND (frame, op_ret, op_errno, dest);
-
- return 0;
-}/* bdb_readlink */
-
-
-int32_t
-bdb_mkdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode)
-{
- int32_t op_ret = -1;
- int32_t ret = -1;
- int32_t op_errno = EINVAL;
- char *real_path = NULL;
- struct stat stbuf = {0, };
- bctx_t *bctx = NULL;
- char *key_string = NULL;
-
- GF_VALIDATE_OR_GOTO ("bdb", frame, out);
- GF_VALIDATE_OR_GOTO ("bdb", this, out);
- GF_VALIDATE_OR_GOTO (this->name, loc, out);
-
- MAKE_KEY_FROM_PATH (key_string, loc->path);
- MAKE_REAL_PATH (real_path, this, loc->path);
-
- op_ret = mkdir (real_path, mode);
- if (op_ret < 0) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "MKDIR %"PRId64" (%s): %s",
- loc->ino, loc->path, strerror (op_errno));
- goto out;
- }
-
- op_ret = chown (real_path, frame->root->uid, frame->root->gid);
- if (op_ret < 0) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "MKDIR %"PRId64" (%s): %s "
- "(failed to do chmod)",
- loc->ino, loc->path, strerror (op_errno));
- goto err;
- }
-
- op_ret = lstat (real_path, &stbuf);
- if (op_ret < 0) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "MKDIR %"PRId64" (%s): %s "
- "(failed to do lstat)",
- loc->ino, loc->path, strerror (op_errno));
- goto err;
- }
-
- bctx = bctx_lookup (B_TABLE(this), (char *)loc->path);
- if (bctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "MKDIR %"PRId64" (%s): ENOMEM"
- "(no database handle for parent)",
- loc->ino, loc->path);
- op_ret = -1;
- op_errno = ENOMEM;
- goto err;
- }
-
- stbuf.st_ino = bdb_inode_transform (loc->parent->ino, key_string,
- strlen (key_string));
-
- goto out;
-
-err:
- ret = rmdir (real_path);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "MKDIR %"PRId64" (%s): %s"
- "(failed to do rmdir)",
- loc->ino, loc->path, strerror (errno));
- }
-
-out:
- if (bctx) {
- /* NOTE: bctx_unref always returns success,
- * see description of bctx_unref for more details */
- bctx_unref (bctx);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, loc->inode, &stbuf);
-
- return 0;
-}/* bdb_mkdir */
-
-
-int32_t
-bdb_unlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- int32_t op_ret = -1;
- int32_t op_errno = EINVAL;
- bctx_t *bctx = NULL;
- char *real_path = NULL;
- char *key_string = NULL;
-
- GF_VALIDATE_OR_GOTO ("bdb", frame, out);
- GF_VALIDATE_OR_GOTO ("bdb", this, out);
- GF_VALIDATE_OR_GOTO (this->name, loc, out);
-
- bctx = bctx_parent (B_TABLE(this), loc->path);
- if (bctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "UNLINK %"PRId64" (%s): ENOMEM"
- "(no database handle for parent)",
- loc->ino, loc->path);
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
-
- MAKE_KEY_FROM_PATH (key_string, loc->path);
- op_ret = bdb_db_iremove (bctx, key_string);
- if (op_ret == DB_NOTFOUND) {
- MAKE_REAL_PATH (real_path, this, loc->path);
- op_ret = unlink (real_path);
- if (op_ret != 0) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "UNLINK %"PRId64" (%s): %s"
- "(symlink unlink failed)",
- loc->ino, loc->path, strerror (op_errno));
- goto out;
- }
- } else if (op_ret == 0) {
- op_errno = 0;
- }
-out:
- if (bctx) {
- /* NOTE: bctx_unref always returns success,
- * see description of bctx_unref for more details */
- bctx_unref (bctx);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno);
-
- return 0;
-}/* bdb_unlink */
-
-
-
-static int32_t
-bdb_do_rmdir (xlator_t *this,
- loc_t *loc)
-{
- char *real_path = NULL;
- int32_t ret = -1;
- bctx_t *bctx = NULL;
- DB_ENV *dbenv = NULL;
-
- GF_VALIDATE_OR_GOTO ("bdb", this, out);
- GF_VALIDATE_OR_GOTO (this->name, loc, out);
-
- dbenv = BDB_ENV(this);
- GF_VALIDATE_OR_GOTO (this->name, dbenv, out);
-
- MAKE_REAL_PATH (real_path, this, loc->path);
-
- bctx = bctx_lookup (B_TABLE(this), loc->path);
- if (bctx == NULL) {
- ret = -ENOMEM;
- goto out;
- }
-
- LOCK(&bctx->lock);
- {
- if ((bctx->primary == NULL)
- || (bctx->secondary == NULL)) {
- goto unlock;
- }
-
- ret = bctx->primary->close (bctx->primary, 0);
- if (ret < 0) {
- ret = -EINVAL;
- }
-
- ret = bctx->secondary->close (bctx->secondary, 0);
- if (ret < 0) {
- ret = -EINVAL;
- }
-
- ret = dbenv->dbremove (dbenv, NULL, bctx->db_path,
- "primary", 0);
- if (ret < 0) {
- ret = -EBUSY;
- }
-
- ret = dbenv->dbremove (dbenv, NULL, bctx->db_path,
- "secondary", 0);
- if (ret != 0) {
- ret = -EBUSY;
- }
- }
-unlock:
- UNLOCK(&bctx->lock);
-
- if (ret) {
- goto out;
- }
- ret = rmdir (real_path);
-
-out:
- if (bctx) {
- /* NOTE: bctx_unref always returns success,
- * see description of bctx_unref for more details */
- bctx_unref (bctx);
- }
-
- return ret;
-}
-
-int32_t
-bdb_rmdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- op_ret = is_dir_empty (this, loc);
- if (op_ret < 0) {
- op_errno = -op_ret;
- gf_log (this->name, GF_LOG_DEBUG,
- "RMDIR %"PRId64" (%s): %s"
- "(internal rmdir routine returned error)",
- loc->ino, loc->path, strerror (op_errno));
- } else if (op_ret == 0) {
- op_ret = -1;
- op_errno = ENOTEMPTY;
- gf_log (this->name, GF_LOG_DEBUG,
- "RMDIR %"PRId64" (%s): ENOTEMPTY",
- loc->ino, loc->path);
- goto out;
- }
-
- op_ret = bdb_do_rmdir (this, loc);
- if (op_ret < 0) {
- op_errno = -op_ret;
- gf_log (this->name, GF_LOG_DEBUG,
- "RMDIR %"PRId64" (%s): %s"
- "(internal rmdir routine returned error)",
- loc->ino, loc->path, strerror (op_errno));
- goto out;
- }
-
-out:
- STACK_UNWIND (frame, op_ret, op_errno);
-
- return 0;
-} /* bdb_rmdir */
-
-int32_t
-bdb_symlink (call_frame_t *frame,
- xlator_t *this,
- const char *linkname,
- loc_t *loc)
-{
- int32_t op_ret = -1;
- int32_t op_errno = EINVAL;
- char *real_path = NULL;
- struct stat stbuf = {0,};
- struct bdb_private *private = NULL;
- bctx_t *bctx = NULL;
- char *key_string = NULL;
-
- GF_VALIDATE_OR_GOTO ("bdb", frame, out);
- GF_VALIDATE_OR_GOTO ("bdb", this, out);
- GF_VALIDATE_OR_GOTO (this->name, loc, out);
- GF_VALIDATE_OR_GOTO (this->name, linkname, out);
-
- private = this->private;
- GF_VALIDATE_OR_GOTO (this->name, private, out);
-
- MAKE_KEY_FROM_PATH (key_string, loc->path);
-
- MAKE_REAL_PATH (real_path, this, loc->path);
- op_ret = symlink (linkname, real_path);
- op_errno = errno;
- if (op_ret == 0) {
- op_ret = lstat (real_path, &stbuf);
- if (op_ret != 0) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "SYMLINK %"PRId64" (%s): %s",
- loc->ino, loc->path, strerror (op_errno));
- goto err;
- }
-
- bctx = bctx_parent (B_TABLE(this), loc->path);
- if (bctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "SYMLINK %"PRId64" (%s): ENOMEM"
- "(no database handle for parent)",
- loc->ino, loc->path);
- op_ret = -1;
- op_errno = ENOMEM;
- goto err;
- }
-
- stbuf.st_ino = bdb_inode_transform (loc->parent->ino,
- key_string,
- strlen (key_string));
- stbuf.st_mode = private->symlink_mode;
-
- goto out;
- }
-err:
- op_ret = unlink (real_path);
- op_errno = errno;
- if (op_ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "SYMLINK %"PRId64" (%s): %s"
- "(failed to unlink the created symlink)",
- loc->ino, loc->path, strerror (op_errno));
- }
- op_ret = -1;
- op_errno = ENOENT;
-out:
- if (bctx) {
- /* NOTE: bctx_unref always returns success,
- * see description of bctx_unref for more details */
- bctx_unref (bctx);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, loc->inode, &stbuf);
-
- return 0;
-} /* bdb_symlink */
-
-static int
-bdb_do_chmod (xlator_t *this,
- const char *path,
- struct stat *stbuf)
-{
- int32_t ret = -1;
-
- ret = lchmod (path, stbuf->st_mode);
- if ((ret == -1) && (errno == ENOSYS)) {
- ret = chmod (path, stbuf->st_mode);
- }
-
- return ret;
-}
-
-static int
-bdb_do_chown (xlator_t *this,
- const char *path,
- struct stat *stbuf,
- int32_t valid)
-{
- int32_t ret = -1;
- uid_t uid = -1;
- gid_t gid = -1;
-
- if (valid & GF_SET_ATTR_UID)
- uid = stbuf->st_uid;
-
- if (valid & GF_SET_ATTR_GID)
- gid = stbuf->st_gid;
-
- ret = lchown (path, uid, gid);
-
- return ret;
-}
-
-static int
-bdb_do_utimes (xlator_t *this,
- const char *path,
- struct stat *stbuf)
-{
- int32_t ret = -1;
- struct timeval tv[2] = {{0,},{0,}};
-
- tv[0].tv_sec = stbuf->st_atime;
- tv[0].tv_usec = ST_ATIM_NSEC (stbuf) / 1000;
- tv[1].tv_sec = stbuf->st_mtime;
- tv[1].tv_usec = ST_ATIM_NSEC (stbuf) / 1000;
-
- ret = lutimes (path, tv);
-
- return ret;
-}
-
-int32_t
-bdb_setattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- struct stat *stbuf,
- int32_t valid)
-{
- int32_t op_ret = -1;
- int32_t op_errno = EINVAL;
- char *real_path = NULL;
- struct stat preop = {0,};
- struct stat postop = {0,};
-
- GF_VALIDATE_OR_GOTO ("bdb", frame, out);
- GF_VALIDATE_OR_GOTO ("bdb", this, out);
- GF_VALIDATE_OR_GOTO (this->name, loc, out);
-
- MAKE_REAL_PATH (real_path, this, loc->path);
- op_ret = lstat (real_path, &preop);
- op_errno = errno;
- if (op_ret != 0) {
- if (op_errno == ENOENT) {
- op_errno = EPERM;
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "CHMOD %"PRId64" (%s): %s"
- "(pre-op lstat failed)",
- loc->ino, loc->path, strerror (op_errno));
- }
- goto out;
- }
-
- /* directory or symlink */
- if (valid & GF_SET_ATTR_MODE) {
- op_ret = bdb_do_chmod (this, real_path, stbuf);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "setattr (chmod) on %s failed: %s", loc->path,
- strerror (op_errno));
- goto out;
- }
- }
-
- if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)){
- op_ret = bdb_do_chown (this, real_path, stbuf, valid);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "setattr (chown) on %s failed: %s", loc->path,
- strerror (op_errno));
- goto out;
- }
- }
-
- if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) {
- op_ret = bdb_do_utimes (this, real_path, stbuf);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "setattr (utimes) on %s failed: %s", loc->path,
- strerror (op_errno));
- goto out;
- }
- }
-
- op_ret = lstat (real_path, &postop);
- op_errno = errno;
- if (op_ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "CHMOD %"PRId64" (%s): %s"
- "(post-op lstat failed)",
- loc->ino, loc->path, strerror (op_errno));
- }
-
-out:
- STACK_UNWIND (frame, op_ret, op_errno, &preop, &postop);
-
- return 0;
-}/* bdb_setattr */
-
-int32_t
-bdb_fsetattr (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct stat *stbuf,
- int32_t valid)
-{
- int32_t op_ret = -1;
- int32_t op_errno = EPERM;
- struct stat preop = {0,};
- struct stat postop = {0,};
-
- STACK_UNWIND (frame, op_ret, op_errno, &preop, &postop);
-
- return 0;
-}/* bdb_fsetattr */
-
-
-int32_t
-bdb_truncate (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- off_t offset)
-{
- int32_t op_ret = -1;
- int32_t op_errno = EINVAL;
- char *real_path = NULL;
- struct stat stbuf = {0,};
- char *db_path = NULL;
- bctx_t *bctx = NULL;
- char *key_string = NULL;
-
- GF_VALIDATE_OR_GOTO ("bdb", frame, out);
- GF_VALIDATE_OR_GOTO ("bdb", this, out);
- GF_VALIDATE_OR_GOTO (this->name, loc, out);
-
- bctx = bctx_parent (B_TABLE(this), loc->path);
- if (bctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "TRUNCATE %"PRId64" (%s): ENOMEM"
- "(no database handle for parent)",
- loc->ino, loc->path);
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
-
- MAKE_REAL_PATH (real_path, this, loc->path);
- MAKE_KEY_FROM_PATH (key_string, loc->path);
-
- /* now truncate */
- MAKE_REAL_PATH_TO_STORAGE_DB (db_path, this, bctx->directory);
- op_ret = lstat (db_path, &stbuf);
- if (op_ret != 0) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "TRUNCATE %"PRId64" (%s): %s"
- "(lstat on database file failed)",
- loc->ino, loc->path, strerror (op_errno));
- goto out;
- }
-
- if (loc->inode->ino) {
- stbuf.st_ino = loc->inode->ino;
- }else {
- stbuf.st_ino = bdb_inode_transform (loc->parent->ino,
- key_string,
- strlen (key_string));
- }
-
- op_ret = bdb_db_itruncate (bctx, key_string);
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "TRUNCATE %"PRId64" (%s): EINVAL"
- "(truncating entry in database failed - %s)",
- loc->ino, loc->path, db_strerror (op_ret));
- op_errno = EINVAL; /* TODO: better errno */
- }
-
-out:
- if (bctx) {
- /* NOTE: bctx_unref always returns success,
- * see description of bctx_unref for more details */
- bctx_unref (bctx);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
-
- return 0;
-}/* bdb_truncate */
-
-
-int32_t
-bdb_statfs (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-
-{
- int32_t op_ret = -1;
- int32_t op_errno = EINVAL;
- char *real_path = NULL;
- struct statvfs buf = {0, };
-
- GF_VALIDATE_OR_GOTO ("bdb", frame, out);
- GF_VALIDATE_OR_GOTO ("bdb", this, out);
- GF_VALIDATE_OR_GOTO (this->name, loc, out);
-
- MAKE_REAL_PATH (real_path, this, loc->path);
-
- op_ret = statvfs (real_path, &buf);
- op_errno = errno;
-out:
- STACK_UNWIND (frame, op_ret, op_errno, &buf);
- return 0;
-}/* bdb_statfs */
-
-static int gf_bdb_xattr_log;
-
-/* bdb_setxattr - set extended attributes.
- *
- * bdb allows setxattr operation only on directories.
- * bdb reservers 'glusterfs.file.<attribute-name>' to operate on the content
- * of the files under the specified directory.
- * 'glusterfs.file.<attribute-name>' transforms to contents of file of name
- * '<attribute-name>' under specified directory.
- *
- * @frame: call frame.
- * @this: xlator_t of this instance of bdb xlator.
- * @loc: loc_t specifying the file to operate upon.
- * @dict: list of extended attributes to set on @loc.
- * @flags: can be XATTR_REPLACE (replace an existing extended attribute only if
- * it exists) or XATTR_CREATE (create an extended attribute only if it
- * doesn't already exist).
- *
- *
- */
-int32_t
-bdb_setxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *dict,
- int flags)
-{
- int32_t op_ret = -1;
- int32_t op_errno = EINVAL;
- data_pair_t *trav = dict->members_list;
- bctx_t *bctx = NULL;
- char *real_path = NULL;
- char *key = NULL;
-
- GF_VALIDATE_OR_GOTO ("bdb", frame, out);
- GF_VALIDATE_OR_GOTO ("bdb", this, out);
- GF_VALIDATE_OR_GOTO (this->name, loc, out);
- GF_VALIDATE_OR_GOTO (this->name, dict, out);
-
- MAKE_REAL_PATH (real_path, this, loc->path);
- if (!S_ISDIR (loc->inode->st_mode)) {
- op_ret = -1;
- op_errno = ENOATTR;
- goto out;
- }
-
- while (trav) {
- if (GF_FILE_CONTENT_REQUEST(trav->key) ) {
- key = BDB_KEY_FROM_FREQUEST_KEY(trav->key);
-
- bctx = bctx_lookup (B_TABLE(this), loc->path);
- if (bctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "SETXATTR %"PRId64" (%s) - %s: ENOMEM"
- "(no database handle for directory)",
- loc->ino, loc->path, key);
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
-
- if (flags & XATTR_REPLACE) {
- op_ret = bdb_db_itruncate (bctx, key);
- if (op_ret == -1) {
- /* key doesn't exist in database */
- gf_log (this->name, GF_LOG_DEBUG,
- "SETXATTR %"PRId64" (%s) - %s:"
- " (entry not present in "
- "database)",
- loc->ino, loc->path, key);
- op_ret = -1;
- op_errno = ENOATTR;
- break;
- }
- op_ret = bdb_db_iwrite (bctx, key,
- trav->value->data,
- trav->value->len);
- if (op_ret != 0) {
- op_ret = -1;
- op_errno = ENOATTR;
- break;
- }
- } else {
- /* fresh create */
- op_ret = bdb_db_iwrite (bctx, key,
- trav->value->data,
- trav->value->len);
- if (op_ret != 0) {
- op_ret = -1;
- op_errno = EEXIST;
- break;
- } else {
- op_ret = 0;
- op_errno = 0;
- } /* if(op_ret!=0)...else */
- } /* if(flags&XATTR_REPLACE)...else */
- if (bctx) {
- /* NOTE: bctx_unref always returns success, see
- * description of bctx_unref for more details */
- bctx_unref (bctx);
- }
- } else {
- /* do plain setxattr */
- op_ret = lsetxattr (real_path,
- trav->key, trav->value->data,
- trav->value->len,
- flags);
- op_errno = errno;
-
- if ((op_errno == ENOATTR) || (op_errno == EEXIST)) {
- /* don't log, normal behaviour */
- ;
- } else if (BDB_TIMED_LOG (op_errno, gf_bdb_xattr_log)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "SETXATTR %"PRId64" (%s) - %s: %s",
- loc->ino, loc->path, trav->key,
- strerror (op_errno));
- /* do not continue, break out */
- break;
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "SETXATTR %"PRId64" (%s) - %s: %s",
- loc->ino, loc->path, trav->key,
- strerror (op_errno));
- }
- } /* if(ZR_FILE_CONTENT_REQUEST())...else */
- trav = trav->next;
- }/* while(trav) */
-out:
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}/* bdb_setxattr */
-
-
-/* bdb_gettxattr - get extended attributes.
- *
- * bdb allows getxattr operation only on directories.
- * bdb_getxattr retrieves the whole content of the file, when
- * glusterfs.file.<attribute-name> is specified.
- *
- * @frame: call frame.
- * @this: xlator_t of this instance of bdb xlator.
- * @loc: loc_t specifying the file to operate upon.
- * @name: name of extended attributes to get for @loc.
- *
- * NOTE: see description of bdb_setxattr for details on how
- * 'glusterfs.file.<attribute-name>' is handles by bdb.
- */
-int32_t
-bdb_getxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name)
-{
- int32_t op_ret = 0;
- int32_t op_errno = 0;
- dict_t *dict = NULL;
- bctx_t *bctx = NULL;
- char *buf = NULL;
- char *key_string = NULL;
- int32_t list_offset = 0;
- size_t size = 0;
- size_t remaining_size = 0;
- char *real_path = NULL;
- char key[1024] = {0,};
- char *value = NULL;
- char *list = NULL;
-
- GF_VALIDATE_OR_GOTO ("bdb", frame, out);
- GF_VALIDATE_OR_GOTO ("bdb", this, out);
- GF_VALIDATE_OR_GOTO (this->name, loc, out);
- GF_VALIDATE_OR_GOTO (this->name, name, out);
-
- dict = dict_new ();
- GF_VALIDATE_OR_GOTO (this->name, dict, out);
-
- if (!S_ISDIR (loc->inode->st_mode)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "GETXATTR %"PRId64" (%s) - %s: ENOATTR "
- "(not a directory)",
- loc->ino, loc->path, name);
- op_ret = -1;
- op_errno = ENOATTR;
- goto out;
- }
-
- if (name && GF_FILE_CONTENT_REQUEST(name)) {
- bctx = bctx_lookup (B_TABLE(this), loc->path);
- if (bctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "GETXATTR %"PRId64" (%s) - %s: ENOMEM"
- "(no database handle for directory)",
- loc->ino, loc->path, name);
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
-
- key_string = BDB_KEY_FROM_FREQUEST_KEY(name);
-
- op_ret = bdb_db_iread (bctx, key_string, &buf);
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "GETXATTR %"PRId64" (%s) - %s: ENOATTR"
- "(attribute not present in database)",
- loc->ino, loc->path, name);
- op_errno = ENOATTR;
- goto out;
- }
-
- op_ret = dict_set_dynptr (dict, (char *)name, buf, op_ret);
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "GETXATTR %"PRId64" (%s) - %s: ENOATTR"
- "(attribute present in database, "
- "dict set failed)",
- loc->ino, loc->path, name);
- op_errno = ENODATA;
- }
-
- goto out;
- }
-
- MAKE_REAL_PATH (real_path, this, loc->path);
- size = sys_llistxattr (real_path, NULL, 0);
- op_errno = errno;
- if (size < 0) {
- if (BDB_TIMED_LOG (op_errno, gf_bdb_xattr_log)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "GETXATTR %"PRId64" (%s) - %s: %s",
- loc->ino, loc->path, name, strerror (op_errno));
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "GETXATTR %"PRId64" (%s) - %s: %s",
- loc->ino, loc->path, name, strerror (op_errno));
- }
- op_ret = -1;
- op_errno = ENOATTR;
-
- goto out;
- }
-
- if (size == 0)
- goto done;
-
- list = alloca (size + 1);
- if (list == NULL) {
- op_ret = -1;
- op_errno = errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "GETXATTR %"PRId64" (%s) - %s: %s",
- loc->ino, loc->path, name, strerror (op_errno));
- }
-
- size = sys_llistxattr (real_path, list, size);
- op_ret = size;
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "GETXATTR %"PRId64" (%s) - %s: %s",
- loc->ino, loc->path, name, strerror (op_errno));
- goto out;
- }
-
- remaining_size = size;
- list_offset = 0;
- while (remaining_size > 0) {
- if(*(list+list_offset) == '\0')
- break;
-
- strcpy (key, list + list_offset);
-
- op_ret = sys_lgetxattr (real_path, key, NULL, 0);
- if (op_ret == -1)
- break;
-
- value = GF_CALLOC (op_ret + 1, sizeof(char), gf_bdb_mt_char);
- GF_VALIDATE_OR_GOTO (this->name, value, out);
-
- op_ret = sys_lgetxattr (real_path, key, value,
- op_ret);
- if (op_ret == -1)
- break;
- value [op_ret] = '\0';
- op_ret = dict_set_dynptr (dict, key,
- value, op_ret);
- if (op_ret < 0) {
- GF_FREE (value);
- gf_log (this->name, GF_LOG_DEBUG,
- "GETXATTR %"PRId64" (%s) - %s: "
- "(skipping key %s)",
- loc->ino, loc->path, name, key);
- continue;
- }
- remaining_size -= strlen (key) + 1;
- list_offset += strlen (key) + 1;
- } /* while(remaining_size>0) */
-done:
-out:
- if(bctx) {
- /* NOTE: bctx_unref always returns success,
- * see description of bctx_unref for more details */
- bctx_unref (bctx);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, dict);
-
- if (dict)
- dict_unref (dict);
-
- return 0;
-}/* bdb_getxattr */
-
-
-int32_t
-bdb_removexattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name)
-{
- int32_t op_ret = -1;
- int32_t op_errno = EINVAL;
- bctx_t *bctx = NULL;
- char *real_path = NULL;
-
- GF_VALIDATE_OR_GOTO ("bdb", frame, out);
- GF_VALIDATE_OR_GOTO ("bdb", this, out);
- GF_VALIDATE_OR_GOTO (this->name, loc, out);
- GF_VALIDATE_OR_GOTO (this->name, name, out);
-
- if (!S_ISDIR(loc->inode->st_mode)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "REMOVEXATTR %"PRId64" (%s) - %s: ENOATTR "
- "(not a directory)",
- loc->ino, loc->path, name);
- op_ret = -1;
- op_errno = ENOATTR;
- goto out;
- }
-
- if (GF_FILE_CONTENT_REQUEST(name)) {
- bctx = bctx_lookup (B_TABLE(this), loc->path);
- if (bctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "REMOVEXATTR %"PRId64" (%s) - %s: ENOATTR"
- "(no database handle for directory)",
- loc->ino, loc->path, name);
- op_ret = -1;
- op_errno = ENOATTR;
- goto out;
- }
-
- op_ret = bdb_db_iremove (bctx, name);
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "REMOVEXATTR %"PRId64" (%s) - %s: ENOATTR"
- "(no such attribute in database)",
- loc->ino, loc->path, name);
- op_errno = ENOATTR;
- }
- goto out;
- }
-
- MAKE_REAL_PATH(real_path, this, loc->path);
- op_ret = lremovexattr (real_path, name);
- op_errno = errno;
- if (op_ret == -1) {
- if (BDB_TIMED_LOG (op_errno, gf_bdb_xattr_log)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "REMOVEXATTR %"PRId64" (%s) - %s: %s",
- loc->ino, loc->path, name, strerror (op_errno));
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "REMOVEXATTR %"PRId64" (%s) - %s: %s",
- loc->ino, loc->path, name, strerror (op_errno));
- }
- } /* if(op_ret == -1) */
-out:
- if (bctx) {
- /* NOTE: bctx_unref always returns success,
- * see description of bctx_unref for more details */
- bctx_unref (bctx);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}/* bdb_removexattr */
-
-
-int32_t
-bdb_fsyncdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int datasync)
-{
- int32_t op_ret = -1;
- int32_t op_errno = EINVAL;
- struct bdb_fd *bfd = NULL;
-
- GF_VALIDATE_OR_GOTO ("bdb", frame, out);
- GF_VALIDATE_OR_GOTO ("bdb", this, out);
- GF_VALIDATE_OR_GOTO (this->name, fd, out);
-
- BDB_FCTX_GET (fd, this, &bfd);
- if (bfd == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "FSYNCDIR %"PRId64": EBADFD"
- "(failed to find internal context from fd)",
- fd->inode->ino);
- op_errno = EBADFD;
- op_ret = -1;
- }
-
-out:
- STACK_UNWIND (frame, op_ret, op_errno);
-
- return 0;
-}/* bdb_fsycndir */
-
-
-int32_t
-bdb_access (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t mask)
-{
- int32_t op_ret = -1;
- int32_t op_errno = EINVAL;
- char *real_path = NULL;
-
- GF_VALIDATE_OR_GOTO ("bdb", frame, out);
- GF_VALIDATE_OR_GOTO ("bdb", this, out);
- GF_VALIDATE_OR_GOTO (this->name, loc, out);
-
- MAKE_REAL_PATH (real_path, this, loc->path);
-
- op_ret = access (real_path, mask);
- op_errno = errno;
- /* TODO: implement for db entries */
-out:
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}/* bdb_access */
-
-
-int32_t
-bdb_ftruncate (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- off_t offset)
-{
- int32_t op_ret = -1;
- int32_t op_errno = EPERM;
- struct stat buf = {0,};
-
- GF_VALIDATE_OR_GOTO ("bdb", frame, out);
- GF_VALIDATE_OR_GOTO ("bdb", this, out);
- GF_VALIDATE_OR_GOTO (this->name, fd, out);
- /* TODO: impelement */
-out:
- STACK_UNWIND (frame, op_ret, op_errno, &buf);
-
- return 0;
-}
-
-
-
-int32_t
-bdb_setdents (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags,
- dir_entry_t *entries,
- int32_t count)
-{
- int32_t op_ret = -1, op_errno = EINVAL;
- char *entry_path = NULL;
- int32_t real_path_len = 0;
- int32_t entry_path_len = 0;
- int32_t ret = 0;
- struct bdb_dir *bfd = NULL;
- dir_entry_t *trav = NULL;
-
- GF_VALIDATE_OR_GOTO ("bdb", frame, out);
- GF_VALIDATE_OR_GOTO ("bdb", this, out);
- GF_VALIDATE_OR_GOTO (this->name, fd, out);
- GF_VALIDATE_OR_GOTO (this->name, entries, out);
-
- BDB_FCTX_GET (fd, this, &bfd);
- if (bfd == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "SETDENTS %"PRId64": EBADFD",
- fd->inode->ino);
- op_errno = EBADFD;
- op_ret = -1;
- goto out;
- }
-
- real_path_len = strlen (bfd->path);
- entry_path_len = real_path_len + 256;
- entry_path = GF_CALLOC (1, entry_path_len, gf_bdb_mt_char);
- GF_VALIDATE_OR_GOTO (this->name, entry_path, out);
-
- strcpy (entry_path, bfd->path);
- entry_path[real_path_len] = '/';
-
- trav = entries->next;
- while (trav) {
- char pathname[ZR_PATH_MAX] = {0,};
- strcpy (pathname, entry_path);
- strcat (pathname, trav->name);
-
- if (S_ISDIR(trav->buf.st_mode)) {
- /* If the entry is directory, create it by calling
- * 'mkdir'. If directory is not present, it will be
- * created, if its present, no worries even if it fails.
- */
- ret = mkdir (pathname, trav->buf.st_mode);
- if ((ret == -1) && (errno != EEXIST)) {
- op_errno = errno;
- op_ret = ret;
- gf_log (this->name, GF_LOG_DEBUG,
- "SETDENTS %"PRId64" - %s: %s "
- "(mkdir failed)",
- fd->inode->ino, pathname,
- strerror (op_errno));
- goto loop;
- }
-
- /* Change the mode
- * NOTE: setdents tries its best to restore the state
- * of storage. if chmod and chown fail, they can
- * be ignored now */
- ret = chmod (pathname, trav->buf.st_mode);
- if (ret < 0) {
- op_ret = -1;
- op_errno = errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "SETDENTS %"PRId64" - %s: %s "
- "(chmod failed)",
- fd->inode->ino, pathname,
- strerror (op_errno));
- goto loop;
- }
- /* change the ownership */
- ret = chown (pathname, trav->buf.st_uid,
- trav->buf.st_gid);
- if (ret != 0) {
- op_ret = -1;
- op_errno = errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "SETDENTS %"PRId64" - %s: %s "
- "(chown failed)",
- fd->inode->ino, pathname,
- strerror (op_errno));
- goto loop;
- }
- } else if ((flags == GF_SET_IF_NOT_PRESENT) ||
- (flags != GF_SET_DIR_ONLY)) {
- /* Create a 0 byte file here */
- if (S_ISREG (trav->buf.st_mode)) {
- op_ret = bdb_db_icreate (bfd->ctx,
- trav->name);
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "SETDENTS %"PRId64" (%s) - %s: "
- "%s (database entry creation"
- " failed)",
- fd->inode->ino,
- bfd->ctx->directory, trav->name,
- strerror (op_errno));
- }
- } else if (S_ISLNK (trav->buf.st_mode)) {
- /* TODO: impelement */;
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "SETDENTS %"PRId64" (%s) - %s mode=%o: "
- "(unsupported file type)",
- fd->inode->ino,
- bfd->ctx->directory, trav->name,
- trav->buf.st_mode);
- } /* if(S_ISREG())...else */
- } /* if(S_ISDIR())...else if */
- loop:
- /* consider the next entry */
- trav = trav->next;
- } /* while(trav) */
-
-out:
- STACK_UNWIND (frame, op_ret, op_errno);
-
- GF_FREE (entry_path);
- return 0;
-}
-
-int32_t
-bdb_fstat (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd)
-{
- int32_t op_ret = -1;
- int32_t op_errno = EINVAL;
- struct stat stbuf = {0,};
- struct bdb_fd *bfd = NULL;
- bctx_t *bctx = NULL;
- char *db_path = NULL;
-
- GF_VALIDATE_OR_GOTO ("bdb", frame, out);
- GF_VALIDATE_OR_GOTO ("bdb", this, out);
- GF_VALIDATE_OR_GOTO (this->name, fd, out);
-
- BDB_FCTX_GET (fd, this, &bfd);
- if (bfd == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "FSTAT %"PRId64": EBADFD "
- "(failed to find internal context in fd)",
- fd->inode->ino);
- op_errno = EBADFD;
- op_ret = -1;
- goto out;
- }
-
- bctx = bfd->ctx;
-
- MAKE_REAL_PATH_TO_STORAGE_DB (db_path, this, bctx->directory);
- op_ret = lstat (db_path, &stbuf);
- op_errno = errno;
- if (op_ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "FSTAT %"PRId64": %s"
- "(failed to stat database file %s)",
- fd->inode->ino, strerror (op_errno), db_path);
- goto out;
- }
-
- stbuf.st_ino = fd->inode->ino;
- stbuf.st_size = bdb_db_fread (bfd, NULL, 0, 0);
- stbuf.st_blocks = BDB_COUNT_BLOCKS (stbuf.st_size, stbuf.st_blksize);
-
-out:
- STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
- return 0;
-}
-
-gf_dirent_t *
-gf_dirent_for_namen (const char *name,
- size_t len)
-{
- char *tmp_name = NULL;
-
- tmp_name = alloca (len + 1);
-
- memcpy (tmp_name, name, len);
-
- tmp_name[len] = 0;
-
- return gf_dirent_for_name (tmp_name);
-}
-
-int32_t
-bdb_readdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t off)
-{
- struct bdb_dir *bfd = NULL;
- int32_t op_ret = -1;
- int32_t op_errno = EINVAL;
- size_t filled = 0;
- gf_dirent_t *this_entry = NULL;
- gf_dirent_t entries;
- struct dirent *entry = NULL;
- off_t in_case = 0;
- int32_t this_size = 0;
- DBC *cursorp = NULL;
- int32_t count = 0;
- off_t offset = 0;
-
- GF_VALIDATE_OR_GOTO ("bdb", frame, out);
- GF_VALIDATE_OR_GOTO ("bdb", this, out);
- GF_VALIDATE_OR_GOTO (this->name, fd, out);
-
- INIT_LIST_HEAD (&entries.list);
-
- BDB_FCTX_GET (fd, this, &bfd);
- if (bfd == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "READDIR %"PRId64" - %"GF_PRI_SIZET",%"PRId64": EBADFD "
- "(failed to find internal context in fd)",
- fd->inode->ino, size, off);
- op_errno = EBADFD;
- op_ret = -1;
- goto out;
- }
-
- op_ret = bdb_cursor_open (bfd->ctx, &cursorp);
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "READDIR %"PRId64" - %"GF_PRI_SIZET",%"PRId64": EBADFD "
- "(failed to open cursor to database handle)",
- fd->inode->ino, size, off);
- op_errno = EBADFD;
- goto out;
- }
-
- if (off) {
- DBT sec = {0,}, pri = {0,}, val = {0,};
- sec.data = &(off);
- sec.size = sizeof (off);
- sec.flags = DB_DBT_USERMEM;
- val.dlen = 0;
- val.doff = 0;
- val.flags = DB_DBT_PARTIAL;
-
- op_ret = bdb_cursor_get (cursorp, &sec, &pri, &val, DB_SET);
- if (op_ret == DB_NOTFOUND) {
- offset = off;
- goto dir_read;
- }
- }
-
- while (filled <= size) {
- DBT sec = {0,}, pri = {0,}, val = {0,};
-
- this_entry = NULL;
-
- sec.flags = DB_DBT_MALLOC;
- pri.flags = DB_DBT_MALLOC;
- val.dlen = 0;
- val.doff = 0;
- val.flags = DB_DBT_PARTIAL;
- op_ret = bdb_cursor_get (cursorp, &sec, &pri, &val, DB_NEXT);
-
- if (op_ret == DB_NOTFOUND) {
- /* we reached end of the directory */
- op_ret = 0;
- op_errno = 0;
- break;
- } else if (op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "READDIR %"PRId64" - %"GF_PRI_SIZET",%"PRId64":"
- "(failed to read the next entry from database)",
- fd->inode->ino, size, off);
- op_errno = ENOENT;
- break;
- } /* if (op_ret == DB_NOTFOUND)...else if...else */
-
- if (pri.data == NULL) {
- /* NOTE: currently ignore when we get key.data == NULL.
- * TODO: we should not get key.data = NULL */
- gf_log (this->name, GF_LOG_DEBUG,
- "READDIR %"PRId64" - %"GF_PRI_SIZET",%"PRId64":"
- "(null key read for entry from database)",
- fd->inode->ino, size, off);
- continue;
- }/* if(key.data)...else */
- count++;
- this_size = bdb_dirent_size (&pri);
- if (this_size + filled > size)
- break;
- /* TODO - consider endianness here */
- this_entry = gf_dirent_for_namen ((const char *)pri.data,
- pri.size);
-
- this_entry->d_ino = bdb_inode_transform (fd->inode->ino,
- pri.data,
- pri.size);
- this_entry->d_off = *(uint32_t *)sec.data;
- this_entry->d_type = 0;
- this_entry->d_len = pri.size + 1;
-
- if (sec.data) {
- GF_FREE (sec.data);
- }
-
- if (pri.data)
- GF_FREE (pri.data);
-
- list_add_tail (&this_entry->list, &entries.list);
-
- filled += this_size;
- }/* while */
- bdb_cursor_close (bfd->ctx, cursorp);
- op_ret = filled;
- op_errno = 0;
- if (filled >= size) {
- goto out;
- }
-dir_read:
- /* hungry kyaa? */
- if (!offset) {
- rewinddir (bfd->dir);
- } else {
- seekdir (bfd->dir, offset);
- }
-
- while (filled <= size) {
- this_entry = NULL;
- entry = NULL;
- this_size = 0;
-
- in_case = telldir (bfd->dir);
- entry = readdir (bfd->dir);
- if (!entry)
- break;
-
- if (IS_BDB_PRIVATE_FILE(entry->d_name))
- continue;
-
- this_size = dirent_size (entry);
-
- if (this_size + filled > size) {
- seekdir (bfd->dir, in_case);
- break;
- }
-
- count++;
-
- this_entry = gf_dirent_for_name (entry->d_name);
- this_entry->d_ino = entry->d_ino;
-
- this_entry->d_off = entry->d_off;
-
- this_entry->d_type = entry->d_type;
- this_entry->d_len = entry->d_reclen;
-
-
- list_add_tail (&this_entry->list, &entries.list);
-
- filled += this_size;
- }
- op_ret = filled;
- op_errno = 0;
-
-out:
- gf_log (this->name, GF_LOG_DEBUG,
- "READDIR %"PRId64" - %"GF_PRI_SIZET" (%"PRId32")"
- "/%"GF_PRI_SIZET",%"PRId64":"
- "(failed to read the next entry from database)",
- fd->inode->ino, filled, count, size, off);
-
- STACK_UNWIND (frame, count, op_errno, &entries);
-
- gf_dirent_free (&entries);
-
- return 0;
-}
-
-
-int32_t
-bdb_stats (call_frame_t *frame,
- xlator_t *this,
- int32_t flags)
-
-{
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- struct xlator_stats xlstats = {0, }, *stats = NULL;
- struct statvfs buf = {0,};
- struct timeval tv;
- struct bdb_private *private = NULL;
- int64_t avg_read = 0;
- int64_t avg_write = 0;
- int64_t _time_ms = 0;
-
- GF_VALIDATE_OR_GOTO ("bdb", frame, out);
- GF_VALIDATE_OR_GOTO ("bdb", this, out);
-
- private = (struct bdb_private *)(this->private);
- stats = &xlstats;
-
- op_ret = statvfs (private->export_path, &buf);
- if (op_ret != 0) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "STATS %s: %s",
- private->export_path, strerror (op_errno));
- goto out;
- }
-
- stats->nr_files = private->stats.nr_files;
-
- /* client info is maintained at FSd */
- stats->nr_clients = private->stats.nr_clients;
-
- /* Number of Free block in the filesystem. */
- stats->free_disk = buf.f_bfree * buf.f_bsize;
- stats->total_disk_size = buf.f_blocks * buf.f_bsize; /* */
- stats->disk_usage = (buf.f_blocks - buf.f_bavail) * buf.f_bsize;
-
- /* Calculate read and write usage */
- gettimeofday (&tv, NULL);
-
- /* Read */
- _time_ms = (tv.tv_sec - private->init_time.tv_sec) * 1000 +
- ((tv.tv_usec - private->init_time.tv_usec) / 1000);
-
- avg_read = (_time_ms) ? (private->read_value / _time_ms) : 0;/* KBps */
- avg_write = (_time_ms) ? (private->write_value / _time_ms) : 0;
-
- _time_ms = (tv.tv_sec - private->prev_fetch_time.tv_sec) * 1000 +
- ((tv.tv_usec - private->prev_fetch_time.tv_usec) / 1000);
- if (_time_ms
- && ((private->interval_read / _time_ms) > private->max_read)) {
- private->max_read = (private->interval_read / _time_ms);
- }
- if (_time_ms
- && ((private->interval_write / _time_ms) > private->max_write)) {
- private->max_write = private->interval_write / _time_ms;
- }
-
- stats->read_usage = avg_read / private->max_read;
- stats->write_usage = avg_write / private->max_write;
-
- gettimeofday (&(private->prev_fetch_time), NULL);
- private->interval_read = 0;
- private->interval_write = 0;
-
-out:
- STACK_UNWIND (frame, op_ret, op_errno, stats);
- return 0;
-}
-
-
-int32_t
-bdb_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct flock *lock)
-{
- gf_log (this->name, GF_LOG_ERROR,
- "glusterfs internal locking request. please load "
- "'features/locks' translator to enable glusterfs "
- "support");
-
- STACK_UNWIND (frame, -1, ENOSYS);
- return 0;
-}
-
-
-int32_t
-bdb_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct flock *lock)
-{
- gf_log (this->name, GF_LOG_ERROR,
- "glusterfs internal locking request. please load "
- "'features/locks' translator to enable glusterfs "
- "support");
-
- STACK_UNWIND (frame, -1, ENOSYS);
- return 0;
-}
-
-
-int32_t
-bdb_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
-{
- gf_log (this->name, GF_LOG_ERROR,
- "glusterfs internal locking request. please load "
- "'features/locks' translator to enable glusterfs "
- "support");
-
- STACK_UNWIND (frame, -1, ENOSYS);
- return 0;
-}
-
-
-int32_t
-bdb_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
-{
- gf_log (this->name, GF_LOG_ERROR,
- "glusterfs internal locking request. please load "
- "'features/locks' translator to enable glusterfs "
- "support");
-
- STACK_UNWIND (frame, -1, ENOSYS);
- return 0;
-}
-
-int32_t
-bdb_checksum (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flag)
-{
- char *real_path = NULL;
- DIR *dir = NULL;
- struct dirent *dirent = NULL;
- uint8_t file_checksum[NAME_MAX] = {0,};
- uint8_t dir_checksum[NAME_MAX] = {0,};
- int32_t op_ret = -1;
- int32_t op_errno = EINVAL;
- int32_t idx = 0, length = 0;
- bctx_t *bctx = NULL;
- DBC *cursorp = NULL;
- char *data = NULL;
- uint8_t no_break = 1;
-
- GF_VALIDATE_OR_GOTO ("bdb", frame, out);
- GF_VALIDATE_OR_GOTO ("bdb", this, out);
- GF_VALIDATE_OR_GOTO (this->name, loc, out);
-
- MAKE_REAL_PATH (real_path, this, loc->path);
-
- {
- dir = opendir (real_path);
- op_errno = errno;
- GF_VALIDATE_OR_GOTO (this->name, dir, out);
- while ((dirent = readdir (dir))) {
- if (!dirent)
- break;
-
- if (IS_BDB_PRIVATE_FILE(dirent->d_name))
- continue;
-
- length = strlen (dirent->d_name);
- for (idx = 0; idx < length; idx++)
- dir_checksum[idx] ^= dirent->d_name[idx];
- } /* while((dirent...)) */
- closedir (dir);
- }
-
- {
- bctx = bctx_lookup (B_TABLE(this), (char *)loc->path);
- if (bctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "CHECKSUM %"PRId64" (%s): ENOMEM"
- "(failed to lookup database handle)",
- loc->inode->ino, loc->path);
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
-
- op_ret = bdb_cursor_open (bctx, &cursorp);
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "CHECKSUM %"PRId64" (%s): EBADFD"
- "(failed to open cursor to database handle)",
- loc->inode->ino, loc->path);
- op_ret = -1;
- op_errno = EBADFD;
- goto out;
- }
-
-
- do {
- DBT key = {0,}, value = {0,}, sec = {0,};
-
- key.flags = DB_DBT_MALLOC;
- value.doff = 0;
- value.dlen = 0;
- op_ret = bdb_cursor_get (cursorp, &sec, &key,
- &value, DB_NEXT);
-
- if (op_ret == DB_NOTFOUND) {
- op_ret = 0;
- op_errno = 0;
- no_break = 0;
- } else if (op_ret == 0){
- /* successfully read */
- data = key.data;
- length = key.size;
- for (idx = 0; idx < length; idx++)
- file_checksum[idx] ^= data[idx];
-
- GF_FREE (key.data);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "CHECKSUM %"PRId64" (%s)",
- loc->inode->ino, loc->path);
- op_ret = -1;
- op_errno = ENOENT; /* TODO: watch errno */
- no_break = 0;
- }/* if(op_ret == DB_NOTFOUND)...else if...else */
- } while (no_break);
- bdb_cursor_close (bctx, cursorp);
- }
-out:
- if (bctx) {
- /* NOTE: bctx_unref always returns success,
- * see description of bctx_unref for more details */
- bctx_unref (bctx);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, file_checksum, dir_checksum);
-
- return 0;
-}
-
-/**
- * notify - when parent sends PARENT_UP, send CHILD_UP event from here
- */
-int32_t
-notify (xlator_t *this,
- int32_t event,
- void *data,
- ...)
-{
- switch (event)
- {
- case GF_EVENT_PARENT_UP:
- {
- /* Tell the parent that bdb xlator is up */
- assert ((this->private != NULL) &&
- (BDB_ENV(this) != NULL));
- default_notify (this, GF_EVENT_CHILD_UP, data);
- }
- break;
- default:
- /* */
- break;
- }
- return 0;
-}
-
-
-int32_t
-mem_acct_init (xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init (this, gf_bdb_mt_end + 1);
-
- if (ret != 0) {
- gf_log(this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
- }
-
- return ret;
-}
-
-/**
- * init -
- */
-int32_t
-init (xlator_t *this)
-{
- int32_t ret = -1;
- struct stat buf = {0,};
- struct bdb_private *_private = NULL;
- char *directory = NULL;
- bctx_t *bctx = NULL;
-
- GF_VALIDATE_OR_GOTO ("bdb", this, out);
-
- if (this->children) {
- gf_log (this->name, GF_LOG_ERROR,
- "'storage/bdb' translator should be used as leaf node "
- "in translator tree. please remove the subvolumes"
- " specified and retry.");
- goto err;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_ERROR,
- "'storage/bdb' translator needs at least one among "
- "'protocol/server' or 'mount/fuse' translator as "
- "parent. please add 'protocol/server' or 'mount/fuse' "
- "as parent of 'storage/bdb' and retry. or you can also"
- " try specifying mount-point on command-line.");
- goto err;
- }
-
- _private = GF_CALLOC (1, sizeof (*_private), gf_bdb_mt_bdb_private);
- if (_private == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "could not allocate memory for 'storage/bdb' "
- "configuration data-structure. cannot continue from "
- "here");
- goto err;
- }
-
-
- ret = dict_get_str (this->options, "directory", &directory);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "'storage/bdb' needs at least "
- "'option directory <path-to-export-directory>' as "
- "minimal configuration option. please specify an "
- "export directory using "
- "'option directory <path-to-export-directory>' and "
- "retry.");
- goto err;
- }
-
- umask (000); /* umask `masking' is done at the client side */
-
- /* Check whether the specified directory exists, if not create it. */
- ret = stat (directory, &buf);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "specified export path '%s' does not exist. "
- "please create the export path '%s' and retry.",
- directory, directory);
- goto err;
- } else if (!S_ISDIR (buf.st_mode)) {
- gf_log (this->name, GF_LOG_ERROR,
- "specified export path '%s' is not a directory. "
- "please specify a valid and existing directory as "
- "export directory and retry.",
- directory);
- goto err;
- } else {
- ret = 0;
- }
-
-
- _private->export_path = gf_strdup (directory);
- if (_private->export_path == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "could not allocate memory for 'storage/bdb' "
- "configuration data-structure. cannot continue from "
- "here");
- goto err;
- }
-
- _private->export_path_length = strlen (_private->export_path);
-
- {
- /* Stats related variables */
- gettimeofday (&_private->init_time, NULL);
- gettimeofday (&_private->prev_fetch_time, NULL);
- _private->max_read = 1;
- _private->max_write = 1;
- }
-
- this->private = (void *)_private;
-
- {
- ret = bdb_db_init (this, this->options);
-
- if (ret < 0){
- gf_log (this->name, GF_LOG_ERROR,
- "database environment initialisation failed. "
- "manually run database recovery tool and "
- "retry to run glusterfs");
- goto err;
- } else {
- bctx = bctx_lookup (_private->b_table, "/");
- /* NOTE: we are not doing bctx_unref() for root bctx,
- * let it remain in active list forever */
- if (bctx == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "could not allocate memory for "
- "'storage/bdb' configuration data-"
- "structure. cannot continue from "
- "here");
- goto err;
- } else {
- ret = 0;
- goto out;
- }
- }
- }
-err:
- if (_private) {
- if (_private->export_path)
- GF_FREE (_private->export_path);
-
- GF_FREE (_private);
- }
-out:
- return ret;
-}
-
-void
-bctx_cleanup (struct list_head *head)
-{
- bctx_t *trav = NULL;
- bctx_t *tmp = NULL;
- DB *storage = NULL;
- DB *secondary = NULL;
-
- list_for_each_entry_safe (trav, tmp, head, list) {
- LOCK (&trav->lock);
- {
- storage = trav->primary;
- trav->primary = NULL;
-
- secondary = trav->secondary;
- trav->secondary = NULL;
-
- list_del_init (&trav->list);
- }
- UNLOCK (&trav->lock);
-
- if (storage) {
- storage->close (storage, 0);
- storage = NULL;
- }
-
- if (secondary) {
- secondary->close (secondary, 0);
- secondary = NULL;
- }
- }
- return;
-}
-
-void
-fini (xlator_t *this)
-{
- struct bdb_private *private = NULL;
- int32_t ret = 0;
-
- private = this->private;
-
- if (B_TABLE(this)) {
- /* close all the dbs from lru list */
- bctx_cleanup (&(B_TABLE(this)->b_lru));
- bctx_cleanup (&(B_TABLE(this)->active));
-
- if (BDB_ENV(this)) {
- LOCK (&private->active_lock);
- {
- private->active = 0;
- }
- UNLOCK (&private->active_lock);
-
- ret = pthread_join (private->checkpoint_thread, NULL);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "could not complete checkpointing "
- "database environment. this might "
- "result in inconsistencies in few"
- " recent data and meta-data "
- "operations");
- }
-
- BDB_ENV(this)->close (BDB_ENV(this), 0);
- } else {
- /* impossible to reach here */
- }
-
- GF_FREE (B_TABLE(this));
- }
- GF_FREE (private);
- return;
-}
-
-
-struct xlator_fops fops = {
- .lookup = bdb_lookup,
- .stat = bdb_stat,
- .opendir = bdb_opendir,
- .readdir = bdb_readdir,
- .readlink = bdb_readlink,
- .mknod = bdb_mknod,
- .mkdir = bdb_mkdir,
- .unlink = bdb_unlink,
- .rmdir = bdb_rmdir,
- .symlink = bdb_symlink,
- .rename = bdb_rename,
- .link = bdb_link,
- .truncate = bdb_truncate,
- .create = bdb_create,
- .open = bdb_open,
- .readv = bdb_readv,
- .writev = bdb_writev,
- .statfs = bdb_statfs,
- .flush = bdb_flush,
- .fsync = bdb_fsync,
- .setxattr = bdb_setxattr,
- .getxattr = bdb_getxattr,
- .removexattr = bdb_removexattr,
- .fsyncdir = bdb_fsyncdir,
- .access = bdb_access,
- .ftruncate = bdb_ftruncate,
- .fstat = bdb_fstat,
- .lk = bdb_lk,
- .inodelk = bdb_inodelk,
- .finodelk = bdb_finodelk,
- .entrylk = bdb_entrylk,
- .fentrylk = bdb_fentrylk,
- .setdents = bdb_setdents,
- .getdents = bdb_getdents,
- .checksum = bdb_checksum,
- .setattr = bdb_setattr,
- .fsetattr = bdb_fsetattr,
-};
-
-struct xlator_cbks cbks = {
- .release = bdb_release,
- .releasedir = bdb_releasedir
-};
-
-
-struct volume_options options[] = {
- { .key = { "directory" },
- .type = GF_OPTION_TYPE_PATH,
- .description = "export directory"
- },
- { .key = { "logdir" },
- .type = GF_OPTION_TYPE_PATH,
- .description = "directory to be used by libdb for writing"
- "transaction logs. NOTE: in absence of 'logdir' "
- "export directory itself will be used as 'logdir' also"
- },
- { .key = { "errfile" },
- .type = GF_OPTION_TYPE_PATH,
- .description = "path to be used for libdb error logging. "
- "NOTE: absence of 'errfile' will disable any "
- "error logging by libdb."
- },
- { .key = { "dir-mode" },
- .type = GF_OPTION_TYPE_ANY /* base 8 number */
- },
- { .key = { "file-mode" },
- .type = GF_OPTION_TYPE_ANY,
- .description = "file mode for regular files. stat() on a regular file"
- " returns the mode specified by this option. "
- "NOTE: specify value in octal"
- },
- { .key = { "page-size" },
- .type = GF_OPTION_TYPE_SIZET,
- .min = 512,
- .max = 16384,
- .description = "size of pages used to hold data by libdb. set it to "
- "block size of exported filesystem for "
- "optimal performance"
- },
- { .key = { "open-db-lru-limit" },
- .type = GF_OPTION_TYPE_INT,
- .min = 1,
- .max = 2048,
- .description = "maximum number of per directory databases that can "
- "be kept open. NOTE: for _advanced_ users only."
- },
- { .key = { "lock-timeout" },
- .type = GF_OPTION_TYPE_TIME,
- .min = 0,
- .max = 4260000,
- .description = "define the maximum time a lock request can "
- "be blocked by libdb. NOTE: only for _advanced_ users."
- " do not specify this option when not sure."
- },
- { .key = { "checkpoint-interval" },
- .type = GF_OPTION_TYPE_TIME,
- .min = 1,
- .max = 86400,
- .description = "define the time interval between two consecutive "
- "libdb checpoints. setting to lower value will leave "
- "bdb perform slowly, but guarantees that minimum data"
- " will be lost in case of a crash. NOTE: this option "
- "is valid only when "
- "'option mode=\"persistent\"' is set."
- },
- { .key = { "transaction-timeout" },
- .type = GF_OPTION_TYPE_TIME,
- .min = 0,
- .max = 4260000,
- .description = "maximum time for which a transaction can block "
- "waiting for required resources."
- },
- { .key = { "mode" },
- .type = GF_OPTION_TYPE_BOOL,
- .value = { "cache", "persistent" },
- .description = "cache: data recovery is not guaranteed in case "
- "of crash. persistent: data recovery is guaranteed, "
- "since all operations are transaction protected."
- },
- { .key = { "access-mode" },
- .type = GF_OPTION_TYPE_STR,
- .value = {"btree", "hash" },
- .description = "chose the db access method. "
- "NOTE: for _advanced_ users. leave the choice to "
- "glusterfs when in doubt."
- },
- { .key = { NULL } }
-};
diff --git a/xlators/storage/bdb/src/bdb.h b/xlators/storage/bdb/src/bdb.h
deleted file mode 100644
index cfe1c9b5555..00000000000
--- a/xlators/storage/bdb/src/bdb.h
+++ /dev/null
@@ -1,530 +0,0 @@
-/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _BDB_H
-#define _BDB_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <stdio.h>
-#include <dirent.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <dirent.h>
-
-#include <db.h>
-
-#ifdef linux
-#ifdef __GLIBC__
-#include <sys/fsuid.h>
-#else
-#include <unistd.h>
-#endif
-#endif
-
-#ifdef HAVE_SYS_XATTR_H
-#include <sys/xattr.h>
-#endif
-
-#ifdef HAVE_SYS_EXTATTR_H
-#include <sys/extattr.h>
-#endif
-
-#include <pthread.h>
-#include "xlator.h"
-#include "inode.h"
-#include "compat.h"
-#include "compat-errno.h"
-#include "fd.h"
-#include "syscall.h"
-
-#define BDB_STORAGE "/glusterfs_storage.db"
-
-/* numbers are not so reader-friendly, so lets have ON and OFF macros */
-#define ON 1
-#define OFF 0
-
-#define BDB_DEFAULT_LRU_LIMIT 100
-#define BDB_DEFAULT_HASH_SIZE 100
-
-#define BDB_ENOSPC_THRESHOLD 25600
-
-#define BDB_DEFAULT_CHECKPOINT_INTERVAL 30
-
-#define BCTX_ENV(bctx) (bctx->table->dbenv)
-
-#define BDB_EXPORT_PATH_LEN(_private) \
- (((struct bdb_private *)_private)->export_path_length)
-
-#define BDB_KEY_FROM_FREQUEST_KEY(_key) (&(key[15]))
-
-#define BDB_EXPORT_PATH(_private) \
- (((struct bdb_private *)_private)->export_path)
-/* MAKE_REAL_PATH(var,this,path)
- * make the real path on the underlying file-system
- *
- * @var: destination to hold the real path
- * @this: pointer to xlator_t corresponding to bdb xlator
- * @path: path, as seen from mount-point
- */
-#define MAKE_REAL_PATH(var, this, path) do { \
- int base_len = BDB_EXPORT_PATH_LEN(this->private); \
- var = alloca (strlen (path) + base_len + 2); \
- strcpy (var, BDB_EXPORT_PATH(this->private)); \
- strcpy (&var[base_len], path); \
- } while (0)
-
-
-#define BDB_TIMED_LOG(_errno,_counter) \
- ((_errno == ENOTSUP) && (((++_counter) % GF_UNIVERSAL_ANSWER) == 1))
-
-#define GF_FILE_CONTENT_REQUEST ZR_FILE_CONTENT_REQUEST
-
-/* MAKE_REAL_PATH_TO_STORAGE_DB(var,this,path)
- * make the real path to the storage-database file on file-system
- *
- * @var: destination to hold the real path
- * @this: pointer to xlator_t corresponding to bdb xlator
- * @path: path of the directory, as seen from mount-point
- */
-#define MAKE_REAL_PATH_TO_STORAGE_DB(var, this, path) do { \
- int base_len = BDB_EXPORT_PATH_LEN(this->private); \
- var = alloca (strlen (path) + \
- base_len + \
- strlen (BDB_STORAGE)); \
- strcpy (var, BDB_EXPORT_PATH(this->private)); \
- strcpy (&var[base_len], path); \
- strcat (var, BDB_STORAGE); \
- } while (0)
-
-/* MAKE_KEY_FROM_PATH(key,path)
- * make a 'key', which we use as key in the underlying database by using
- * the path
- *
- * @key: destination to hold the key
- * @path: path to file as seen from mount-point
- */
-#define MAKE_KEY_FROM_PATH(key, path) do { \
- char *tmp = alloca (strlen (path)); \
- strcpy (tmp, path); \
- key = basename (tmp); \
- }while (0);
-
-/* IS_BDB_PRIVATE_FILE(name)
- * check if a given 'name' is bdb xlator's internal file name
- *
- * @name: basename of a file.
- *
- * bdb xlator reserves file names 'glusterfs_storage.db',
- * 'glusterfs_ns.db'(used by bdb xlator itself), 'log.*', '__db.*'
- * (used by libdb)
- */
-#define IS_BDB_PRIVATE_FILE(name) ((!strncmp(name, "__db.", 5)) || \
- (!strcmp(name, "glusterfs_storage.db")) || \
- (!strcmp(name, "glusterfs_ns.db")) || \
- (!strncmp(name, "log.0000", 8)))
-
-/* check if 'name' is '.' or '..' entry */
-#define IS_DOT_DOTDOT(name) \
- ((!strncmp(name,".", 1)) || (!strncmp(name,"..", 2)))
-
-/* BDB_ICTX_SET(this,inode,bctx)
- * pointer to 'struct bdb_ctx' is stored in inode's ctx of all directories.
- * this will happen either in lookup() or mkdir().
- *
- * @this: pointer xlator_t of bdb xlator.
- * @inode: inode where 'struct bdb_ctx *' has to be stored.
- * @bctx: a 'struct bdb_ctx *'
- */
-#define BDB_ICTX_SET(_inode,_this,_bctx) do{ \
- inode_ctx_put(_inode, _this, (uint64_t)(long)_bctx); \
- }while (0);
-
-#define BDB_ICTX_GET(_inode,_this,_bctxp) do { \
- uint64_t tmp_bctx = 0; \
- inode_ctx_get (_inode, _this, &tmp_bctx); \
- *_bctxp = tmp_bctx; \
- }while (0);
-
-/* BDB_FCTX_SET(this,fd,bctx)
- * pointer to 'struct bdb_ctx' is stored in inode's ctx of all directories.
- * this will happen either in lookup() or mkdir().
- *
- * @this: pointer xlator_t of bdb xlator.
- * @inode: inode where 'struct bdb_ctx *' has to be stored.
- * @bctx: a 'struct bdb_ctx *'
- */
-#define BDB_FCTX_SET(_fd,_this,_bfd) do{ \
- fd_ctx_set(_fd, _this, (uint64_t)(long)_bfd); \
- }while (0);
-
-#define BDB_FCTX_GET(_fd,_this,_bfdp) do { \
- uint64_t tmp_bfd = 0; \
- fd_ctx_get (_fd, _this, &tmp_bfd); \
- *_bfdp = (void *)(long)tmp_bfd; \
- }while (0);
-
-
-/* maximum number of open dbs that bdb xlator will ever have */
-#define BDB_MAX_OPEN_DBS 100
-
-/* convert file size to block-count */
-#define BDB_COUNT_BLOCKS(size,blksize) (((size + blksize - 1)/blksize) - 1)
-
-/* file permissions, again macros are more readable */
-#define RWXRWXRWX 0777
-#define DEFAULT_FILE_MODE 0644
-#define DEFAULT_DIR_MODE 0755
-
-/* see, if have a valid file permissions specification in @mode */
-#define IS_VALID_FILE_MODE(mode) (!(mode & (~RWXRWXRWX)))
-#define IS_VALID_DIR_MODE(mode) (!(mode & (~(RWXRWXRWX)))
-
-/* maximum retries for a failed transactional operation */
-#define BDB_MAX_RETRIES 10
-
-#define BDB_LL_PAGE_SIZE_DEFAULT 4096
-#define BDB_LL_PAGE_SIZE_MIN 4096
-#define BDB_LL_PAGE_SIZE_MAX 65536
-
-#define PAGE_SIZE_IN_RANGE(_page_size) \
- ((_page_size >= BDB_LL_PAGE_SIZE_MIN) \
- && (table->page_size <= BDB_LL_PAGE_SIZE_MAX))
-
-typedef struct bctx_table bctx_table_t;
-typedef struct bdb_ctx bctx_t;
-typedef struct bdb_cache bdb_cache_t;
-typedef struct bdb_private bdb_private_t;
-
-struct bctx_table {
- /* flags to be used for opening each database */
- uint64_t dbflags;
-
- /* cache: can be either ON or OFF */
- uint64_t cache;
-
- /* used to lock the 'struct bctx_table *' */
- gf_lock_t lock;
-
- /* lock for checkpointing */
- gf_lock_t checkpoint_lock;
-
- /* hash table of 'struct bdb_ctx' */
- struct list_head *b_hash;
-
- /* list of active 'struct bdb_ctx' */
- struct list_head active;
-
- /* lru list of inactive 'struct bdb_ctx' */
- struct list_head b_lru;
- struct list_head purge;
- uint32_t lru_limit;
- uint32_t lru_size;
- uint32_t hash_size;
-
- /* access mode for accessing the databases, can be DB_HASH, DB_BTREE */
- DBTYPE access_mode;
-
- /* DB_ENV under which every db operation is carried over */
- DB_ENV *dbenv;
- int32_t transaction;
- xlator_t *this;
-
- /* page-size of DB, DB->set_pagesize(), should be set before DB->open */
- uint64_t page_size;
-};
-
-struct bdb_ctx {
- /* controller members */
-
- /* lru list of 'struct bdb_ctx's, a bdb_ctx can exist in one of
- * b_hash or lru lists */
- struct list_head list;
-
- /* directory 'name' hashed list of 'struct bdb_ctx's */
- struct list_head b_hash;
-
- struct bctx_table *table;
- int32_t ref; /* reference count */
- gf_lock_t lock; /* used to lock this 'struct bdb_ctx' */
-
- char *directory; /* directory path */
-
- /* pointer to open database, that resides inside this directory */
- DB *primary;
- DB *secondary;
- uint32_t cache; /* cache ON or OFF */
-
- /* per directory cache, bdb xlator's internal cache */
- struct list_head c_list; /* linked list of cached records */
- int32_t c_count; /* number of cached records */
-
- /* index to hash table list, to which this ctx belongs */
- int32_t key_hash;
- char *db_path; /* absolute path to db file */
-};
-
-struct bdb_fd {
- /* pointer to bdb_ctx of the parent directory */
- struct bdb_ctx *ctx;
-
- /* name of the file. NOTE: basename, not the complete path */
- char *key;
- int32_t flags; /* open flags */
-};
-
-struct bdb_dir {
- /* pointer to bdb_ctx of this directory */
- struct bdb_ctx *ctx;
-
- /* open directory pointer, as returned by opendir() */
- DIR *dir;
-
- char *path; /* path to this directory */
-};
-
-/* cache */
-struct bdb_cache {
- /* list of 'struct bdb_cache' under a 'struct bdb_ctx' */
- struct list_head c_list;
-
- /* name of the file this cache holds. NOTE: basename of file */
- char *key;
- char *data; /* file content */
-
- /* size of the file content that this cache holds */
- size_t size;
-};
-
-
-struct bdb_private {
- /* pointer to inode table that we use */
- inode_table_t *itable;
- int32_t temp; /**/
- char is_stateless; /**/
-
- /* path to the export directory
- * (option directory <export-path>) */
- char *export_path;
-
- /* length of 'export_path' string */
- int32_t export_path_length;
-
- /* statistics */
- /* Statistics, provides activity of the server */
- struct xlator_stats stats;
-
- struct timeval prev_fetch_time;
- struct timeval init_time;
- int32_t max_read; /* */
- int32_t max_write; /* */
-
- /* Used to calculate the max_read value */
- int64_t interval_read;
-
- /* Used to calculate the max_write value */
- int64_t interval_write;
- int64_t read_value; /* Total read, from init */
- int64_t write_value; /* Total write, from init */
-
- /* bdb xlator specific private data */
-
- /* flags used for opening DB_ENV for this xlator */
- uint64_t envflags;
-
- /* flags to be used for opening each database */
- uint64_t dbflags;
-
- /* cache: can be either ON or OFF */
- uint64_t cache;
-
- /* transaction: can be either ON or OFF */
- uint32_t transaction;
- uint32_t active;
- gf_lock_t active_lock;
- struct bctx_table *b_table;
-
- /* access mode for accessing the databases, can be DB_HASH, DB_BTREE
- * (option access-mode <mode>) */
- DBTYPE access_mode;
-
- /* mode for each and every file stored on bdb
- * (option file-mode <mode>) */
- mode_t file_mode;
-
- /* mode for each and every directory stored on bdb
- * (option dir-mode <mode>) */
- mode_t dir_mode;
-
- /* mode for each and every symlink stored on bdb */
- mode_t symlink_mode;
-
- /* pthread_t object used for creating checkpoint thread */
- pthread_t checkpoint_thread;
-
- /* time duration between two consecutive checkpoint operations.
- * (option checkpoint-interval <time-in-seconds>) */
- uint32_t checkpoint_interval;
-
- /* environment log directory (option logdir <directory>) */
- char *logdir;
-
- /* errfile path, used by environment to print detailed error log.
- * (option errfile <errfile-path>) */
- char *errfile;
-
- /* DB_ENV->set_errfile() expects us to fopen
- * the errfile before doing DB_ENV->set_errfile() */
- FILE *errfp;
-
- /* used by DB_ENV->set_timeout to set the timeout for
- * a transactionally encapsulated DB->operation() to
- * timeout before waiting for locks to be released.
- * (option transaction-timeout <time-in-milliseconds>)
- */
- uint32_t txn_timeout;
- uint32_t lock_timeout;
-
- /* DB_AUTO_LOG_REMOVE flag for DB_ENV*/
- uint32_t log_auto_remove;
- uint32_t log_region_max;
-};
-
-
-static inline int32_t
-bdb_txn_begin (DB_ENV *dbenv,
- DB_TXN **ptxnid)
-{
- return dbenv->txn_begin (dbenv, NULL, ptxnid, 0);
-}
-
-static inline int32_t
-bdb_txn_abort (DB_TXN *txnid)
-{
- return txnid->abort (txnid);
-}
-
-static inline int32_t
-bdb_txn_commit (DB_TXN *txnid)
-{
- return txnid->commit (txnid, 0);
-}
-
-void *
-bdb_db_stat (bctx_t *bctx,
- DB_TXN *txnid,
- uint32_t flags);
-
-/*int32_t
-bdb_db_get(struct bdb_ctx *bctx,
- DB_TXN *txnid,
- const char *key_string,
- char **buf,
- size_t size,
- off_t offset);
-*/
-int32_t
-bdb_db_fread (struct bdb_fd *bfd, char *bufp, size_t size, off_t offset);
-
-int32_t
-bdb_db_iread (struct bdb_ctx *bctx, const char *key, char **bufp);
-
-#define BDB_TRUNCATE_RECORD 0xcafebabe
-
-/*int32_t
-bdb_db_put (struct bdb_ctx *bctx,
- DB_TXN *txnid,
- const char *key_string,
- const char *buf,
- size_t size,
- off_t offset,
- int32_t flags);
-*/
-int32_t
-bdb_db_icreate (struct bdb_ctx *bctx, const char *key);
-
-int32_t
-bdb_db_fwrite (struct bdb_fd *bfd, char *buf, size_t size, off_t offset);
-
-int32_t
-bdb_db_iwrite (struct bdb_ctx *bctx, const char *key, char *buf, size_t size);
-
-int32_t
-bdb_db_itruncate (struct bdb_ctx *bctx, const char *key);
-
-int32_t
-bdb_db_iremove (struct bdb_ctx *bctx,
- const char *key);
-
-ino_t
-bdb_inode_transform (ino_t parent,
- const char *name,
- size_t namelen);
-
-int32_t
-bdb_cursor_open (struct bdb_ctx *bctx,
- DBC **cursorp);
-
-int32_t
-bdb_cursor_get (DBC *cursorp,
- DBT *sec, DBT *pri,
- DBT *value,
- int32_t flags);
-
-
-int32_t
-bdb_cursor_close (struct bdb_ctx *ctx,
- DBC *cursorp);
-
-
-int32_t
-bdb_dirent_size (DBT *key);
-
-int32_t
-dirent_size (struct dirent *entry);
-
-int
-bdb_db_init (xlator_t *this,
- dict_t *options);
-
-void
-bdb_dbs_from_dict_close (dict_t *this,
- char *key,
- data_t *value,
- void *data);
-
-bctx_t *
-bctx_lookup (struct bctx_table *table,
- const char *path);
-
-bctx_t *
-bctx_parent
-(struct bctx_table *table,
- const char *path);
-
-bctx_t *
-bctx_unref (bctx_t *ctx);
-
-bctx_t *
-bctx_ref (bctx_t *ctx);
-
-#endif /* _BDB_H */
diff --git a/xlators/storage/posix/src/Makefile.am b/xlators/storage/posix/src/Makefile.am
index 0fdcfdcecb4..4e3fd3dd09e 100644
--- a/xlators/storage/posix/src/Makefile.am
+++ b/xlators/storage/posix/src/Makefile.am
@@ -2,17 +2,20 @@
xlator_LTLIBRARIES = posix.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/storage
-posix_la_LDFLAGS = -module -avoidversion
+posix_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
-posix_la_SOURCES = posix.c
-posix_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+posix_la_SOURCES = posix.c posix-helpers.c posix-handle.c posix-aio.c
+posix_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(LIBAIO) \
+ $(ACL_LIBS)
-noinst_HEADERS = posix.h posix-mem-types.h
+noinst_HEADERS = posix.h posix-mem-types.h posix-handle.h posix-aio.h \
+ posix-messages.h
-AM_CFLAGS = -fPIC -fno-strict-aliasing -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS) -Wall \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles \
- -I$(top_srcdir)/contrib/md5 \
- $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src
+
+AM_CFLAGS = -fno-strict-aliasing -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/storage/posix/src/posix-aio.c b/xlators/storage/posix/src/posix-aio.c
new file mode 100644
index 00000000000..d8ef5f7b73f
--- /dev/null
+++ b/xlators/storage/posix/src/posix-aio.c
@@ -0,0 +1,568 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "xlator.h"
+#include "glusterfs.h"
+#include "posix.h"
+#include <sys/uio.h>
+#include "posix-messages.h"
+
+#ifdef HAVE_LIBAIO
+#include <libaio.h>
+
+
+void
+__posix_fd_set_odirect (fd_t *fd, struct posix_fd *pfd, int opflags,
+ off_t offset, size_t size)
+{
+ int odirect = 0;
+ int flags = 0;
+ int ret = 0;
+
+ odirect = pfd->odirect;
+
+ if ((fd->flags|opflags) & O_DIRECT) {
+ /* if instructed, use O_DIRECT always */
+ odirect = 1;
+ } else {
+ /* else use O_DIRECT when feasible */
+ if ((offset|size) & 0xfff)
+ odirect = 0;
+ else
+ odirect = 1;
+ }
+
+ if (!odirect && pfd->odirect) {
+ flags = fcntl (pfd->fd, F_GETFL);
+ ret = fcntl (pfd->fd, F_SETFL, (flags & (~O_DIRECT)));
+ pfd->odirect = 0;
+ }
+
+ if (odirect && !pfd->odirect) {
+ flags = fcntl (pfd->fd, F_GETFL);
+ ret = fcntl (pfd->fd, F_SETFL, (flags | O_DIRECT));
+ pfd->odirect = 1;
+ }
+
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_WARNING, errno, P_MSG_FCNTL_FAILED,
+ "fcntl() failed. fd=%d flags=%d pfd->odirect=%d",
+ pfd->fd, flags, pfd->odirect);
+ }
+}
+
+
+struct posix_aio_cb {
+ struct iocb iocb;
+ call_frame_t *frame;
+ struct iobuf *iobuf;
+ struct iobref *iobref;
+ struct iatt prebuf;
+ int fd;
+ int op;
+ off_t offset;
+};
+
+
+int
+posix_aio_readv_complete (struct posix_aio_cb *paiocb, int res, int res2)
+{
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ struct iobuf *iobuf = NULL;
+ struct iatt postbuf = {0,};
+ int _fd = -1;
+ int op_ret = -1;
+ int op_errno = 0;
+ struct iovec iov;
+ struct iobref *iobref = NULL;
+ int ret = 0;
+ off_t offset = 0;
+ struct posix_private * priv = NULL;
+
+
+ frame = paiocb->frame;
+ this = frame->this;
+ priv = this->private;
+ iobuf = paiocb->iobuf;
+ _fd = paiocb->fd;
+ offset = paiocb->offset;
+
+ if (res < 0) {
+ op_ret = -1;
+ op_errno = -res;
+ gf_msg (this->name, GF_LOG_ERROR, op_errno, P_MSG_READV_FAILED,
+ "readv(async) failed fd=%d,size=%lu,offset=%llu (%d)",
+ _fd, paiocb->iocb.u.c.nbytes,
+ (unsigned long long) paiocb->offset,
+ res);
+ goto out;
+ }
+
+ ret = posix_fdstat (this, _fd, &postbuf);
+ if (ret != 0) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg (this->name, GF_LOG_ERROR, op_errno, P_MSG_FSTAT_FAILED,
+ "fstat failed on fd=%d", _fd);
+ goto out;
+ }
+
+ op_ret = res;
+ op_errno = 0;
+
+ iobref = iobref_new ();
+ if (!iobref) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ iobref_add (iobref, iobuf);
+
+ iov.iov_base = iobuf_ptr (iobuf);
+ iov.iov_len = op_ret;
+
+
+ /* Hack to notify higher layers of EOF. */
+ if (!postbuf.ia_size || (offset + iov.iov_len) >= postbuf.ia_size)
+ op_errno = ENOENT;
+
+ LOCK (&priv->lock);
+ {
+ priv->read_value += op_ret;
+ }
+ UNLOCK (&priv->lock);
+
+out:
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, &iov, 1,
+ &postbuf, iobref, NULL);
+ if (iobuf)
+ iobuf_unref (iobuf);
+ if (iobref)
+ iobref_unref (iobref);
+
+ GF_FREE (paiocb);
+
+ return 0;
+}
+
+
+int
+posix_aio_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t offset, uint32_t flags, dict_t *xdata)
+{
+ int32_t op_errno = EINVAL;
+ int _fd = -1;
+ struct iobuf *iobuf = NULL;
+ struct posix_fd * pfd = NULL;
+ int ret = -1;
+ struct posix_aio_cb *paiocb = NULL;
+ struct posix_private *priv = NULL;
+ struct iocb *iocb = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ priv = this->private;
+
+ ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
+ "pfd is NULL from fd=%p", fd);
+ goto err;
+ }
+ _fd = pfd->fd;
+
+ if (!size) {
+ op_errno = EINVAL;
+ gf_msg (this->name, GF_LOG_WARNING, op_errno,
+ P_MSG_INVALID_ARGUMENT, "size=%"GF_PRI_SIZET, size);
+ goto err;
+ }
+
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool, size);
+ if (!iobuf) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ paiocb = GF_CALLOC (1, sizeof (*paiocb), gf_posix_mt_paiocb);
+ if (!paiocb) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+
+ paiocb->frame = frame;
+ paiocb->iobuf = iobuf;
+ paiocb->offset = offset;
+ paiocb->fd = _fd;
+ paiocb->op = GF_FOP_READ;
+
+ paiocb->iocb.data = paiocb;
+ paiocb->iocb.aio_fildes = _fd;
+ paiocb->iocb.aio_lio_opcode = IO_CMD_PREAD;
+ paiocb->iocb.aio_reqprio = 0;
+ paiocb->iocb.u.c.buf = iobuf_ptr (iobuf);
+ paiocb->iocb.u.c.nbytes = size;
+ paiocb->iocb.u.c.offset = offset;
+
+ iocb = &paiocb->iocb;
+
+ LOCK (&fd->lock);
+ {
+ __posix_fd_set_odirect (fd, pfd, flags, offset, size);
+
+ ret = io_submit (priv->ctxp, 1, &iocb);
+ }
+ UNLOCK (&fd->lock);
+
+ if (ret != 1) {
+ op_errno = -ret;
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ P_MSG_IO_SUBMIT_FAILED,
+ "io_submit() returned %d", ret);
+ goto err;
+ }
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (readv, frame, -1, op_errno, 0, 0, 0, 0, 0);
+ if (iobuf)
+ iobuf_unref (iobuf);
+
+ if (paiocb)
+ GF_FREE (paiocb);
+
+ return 0;
+}
+
+
+int
+posix_aio_writev_complete (struct posix_aio_cb *paiocb, int res, int res2)
+{
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ struct iatt prebuf = {0,};
+ struct iatt postbuf = {0,};
+ int _fd = -1;
+ int op_ret = -1;
+ int op_errno = 0;
+ int ret = 0;
+ struct posix_private * priv = NULL;
+
+
+ frame = paiocb->frame;
+ this = frame->this;
+ priv = this->private;
+ prebuf = paiocb->prebuf;
+ _fd = paiocb->fd;
+
+ if (res < 0) {
+ op_ret = -1;
+ op_errno = -res;
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ P_MSG_WRITEV_FAILED,
+ "writev(async) failed fd=%d,offset=%llu (%d)",
+ _fd, (unsigned long long) paiocb->offset, res);
+
+ goto out;
+ }
+
+ ret = posix_fdstat (this, _fd, &postbuf);
+ if (ret != 0) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg (this->name, GF_LOG_ERROR, op_errno, P_MSG_FSTAT_FAILED,
+ "fstat failed on fd=%d", _fd);
+ goto out;
+ }
+
+
+ op_ret = res;
+ op_errno = 0;
+
+ LOCK (&priv->lock);
+ {
+ priv->write_value += op_ret;
+ }
+ UNLOCK (&priv->lock);
+
+out:
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, &prebuf, &postbuf,
+ NULL);
+
+ if (paiocb) {
+ if (paiocb->iobref)
+ iobref_unref (paiocb->iobref);
+ GF_FREE (paiocb);
+ }
+
+ return 0;
+}
+
+
+int
+posix_aio_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *iov, int count, off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ int32_t op_errno = EINVAL;
+ int _fd = -1;
+ struct posix_fd * pfd = NULL;
+ int ret = -1;
+ struct posix_aio_cb *paiocb = NULL;
+ struct posix_private *priv = NULL;
+ struct iocb *iocb = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ priv = this->private;
+
+ ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
+ "pfd is NULL from fd=%p", fd);
+ goto err;
+ }
+ _fd = pfd->fd;
+
+ paiocb = GF_CALLOC (1, sizeof (*paiocb), gf_posix_mt_paiocb);
+ if (!paiocb) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+
+ paiocb->frame = frame;
+ paiocb->offset = offset;
+ paiocb->fd = _fd;
+ paiocb->op = GF_FOP_WRITE;
+
+ paiocb->iocb.data = paiocb;
+ paiocb->iocb.aio_fildes = _fd;
+ paiocb->iobref = iobref_ref (iobref);
+ paiocb->iocb.aio_lio_opcode = IO_CMD_PWRITEV;
+ paiocb->iocb.aio_reqprio = 0;
+ paiocb->iocb.u.v.vec = iov;
+ paiocb->iocb.u.v.nr = count;
+ paiocb->iocb.u.v.offset = offset;
+
+ iocb = &paiocb->iocb;
+
+ ret = posix_fdstat (this, _fd, &paiocb->prebuf);
+ if (ret != 0) {
+ op_errno = errno;
+ gf_msg (this->name, GF_LOG_ERROR, op_errno, P_MSG_FSTAT_FAILED,
+ "fstat failed on fd=%p", fd);
+ goto err;
+ }
+
+
+ LOCK (&fd->lock);
+ {
+ __posix_fd_set_odirect (fd, pfd, flags, offset,
+ iov_length (iov, count));
+
+ ret = io_submit (priv->ctxp, 1, &iocb);
+ }
+ UNLOCK (&fd->lock);
+
+ if (ret != 1) {
+ op_errno = -ret;
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ P_MSG_IO_SUBMIT_FAILED,
+ "io_submit() returned %d,gfid=%s", ret,
+ uuid_utoa(fd->inode->gfid));
+ goto err;
+ }
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (writev, frame, -1, op_errno, 0, 0, 0);
+
+ if (paiocb) {
+ if (paiocb->iobref)
+ iobref_unref (paiocb->iobref);
+ GF_FREE (paiocb);
+ }
+
+ return 0;
+}
+
+
+void *
+posix_aio_thread (void *data)
+{
+ xlator_t *this = NULL;
+ struct posix_private *priv = NULL;
+ int ret = 0;
+ int i = 0;
+ struct io_event events[POSIX_AIO_MAX_NR_GETEVENTS];
+ struct io_event *event = NULL;
+ struct posix_aio_cb *paiocb = NULL;
+
+ this = data;
+ THIS = this;
+ priv = this->private;
+
+ for (;;) {
+ memset (&events[0], 0, sizeof (events));
+ ret = io_getevents (priv->ctxp, 1, POSIX_AIO_MAX_NR_GETEVENTS,
+ &events[0], NULL);
+ if (ret <= 0) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
+ P_MSG_IO_GETEVENTS_FAILED,
+ "io_getevents() returned %d", ret);
+ if (ret == -EINTR)
+ continue;
+ break;
+ }
+
+ for (i = 0; i < ret; i++) {
+ event = &events[i];
+
+ paiocb = event->data;
+
+ switch (paiocb->op) {
+ case GF_FOP_READ:
+ posix_aio_readv_complete (paiocb, event->res,
+ event->res2);
+ break;
+ case GF_FOP_WRITE:
+ posix_aio_writev_complete (paiocb, event->res,
+ event->res2);
+ break;
+ default:
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ P_MSG_UNKNOWN_OP,
+ "unknown op %d found in piocb",
+ paiocb->op);
+ break;
+ }
+ }
+ }
+
+ return NULL;
+}
+
+
+int
+posix_aio_init (xlator_t *this)
+{
+ struct posix_private *priv = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ ret = io_setup (POSIX_AIO_MAX_NR_EVENTS, &priv->ctxp);
+ if ((ret == -1 && errno == ENOSYS) || ret == -ENOSYS) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, P_MSG_AIO_UNAVAILABLE,
+ "Linux AIO not available at run-time."
+ " Continuing with synchronous IO");
+ ret = 0;
+ goto out;
+ }
+
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, -ret,
+ P_MSG_IO_SETUP_FAILED,
+ "io_setup() failed. ret=%d",
+ ret);
+ goto out;
+ }
+
+ ret = gf_thread_create (&priv->aiothread, NULL,
+ posix_aio_thread, this);
+ if (ret != 0) {
+ io_destroy (priv->ctxp);
+ goto out;
+ }
+
+ this->fops->readv = posix_aio_readv;
+ this->fops->writev = posix_aio_writev;
+out:
+ return ret;
+}
+
+
+int
+posix_aio_on (xlator_t *this)
+{
+ struct posix_private *priv = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ if (!priv->aio_init_done) {
+ ret = posix_aio_init (this);
+ if (ret == 0)
+ priv->aio_capable = _gf_true;
+ else
+ priv->aio_capable = _gf_false;
+ priv->aio_init_done = _gf_true;
+ }
+
+ if (priv->aio_capable) {
+ this->fops->readv = posix_aio_readv;
+ this->fops->writev = posix_aio_writev;
+ }
+
+ return ret;
+}
+
+int
+posix_aio_off (xlator_t *this)
+{
+ this->fops->readv = posix_readv;
+ this->fops->writev = posix_writev;
+
+ return 0;
+}
+
+
+#else
+
+
+int
+posix_aio_on (xlator_t *this)
+{
+ gf_msg (this->name, GF_LOG_INFO, 0, P_MSG_AIO_UNAVAILABLE,
+ "Linux AIO not available at build-time."
+ " Continuing with synchronous IO");
+ return 0;
+}
+
+int
+posix_aio_off (xlator_t *this)
+{
+ gf_msg (this->name, GF_LOG_INFO, 0, P_MSG_AIO_UNAVAILABLE,
+ "Linux AIO not available at build-time."
+ " Continuing with synchronous IO");
+ return 0;
+}
+
+void
+__posix_fd_set_odirect (fd_t *fd, struct posix_fd *pfd, int opflags,
+ off_t offset, size_t size)
+{
+ xlator_t *this = THIS;
+ gf_msg (this->name, GF_LOG_INFO, 0, P_MSG_AIO_UNAVAILABLE,
+ "Linux AIO not available at build-time."
+ " Continuing with synchronous IO");
+ return;
+}
+
+#endif
diff --git a/xlators/storage/posix/src/posix-aio.h b/xlators/storage/posix/src/posix-aio.h
new file mode 100644
index 00000000000..e9dd6467e5e
--- /dev/null
+++ b/xlators/storage/posix/src/posix-aio.h
@@ -0,0 +1,34 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _POSIX_AIO_H
+#define _POSIX_AIO_H
+
+#include "xlator.h"
+#include "glusterfs.h"
+
+// Maximum number of concurrently submitted IO events. The heaviest load
+// GlusterFS has been able to handle had 60-80 concurrent calls
+#define POSIX_AIO_MAX_NR_EVENTS 256
+
+// Maximum number of completed IO operations to reap per getevents syscall
+#define POSIX_AIO_MAX_NR_GETEVENTS 16
+
+
+int posix_aio_on (xlator_t *this);
+int posix_aio_off (xlator_t *this);
+
+int posix_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata);
+
+int posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata);
+
+#endif /* !_POSIX_AIO_H */
diff --git a/xlators/storage/posix/src/posix-handle.c b/xlators/storage/posix/src/posix-handle.c
new file mode 100644
index 00000000000..ddafb0d9b04
--- /dev/null
+++ b/xlators/storage/posix/src/posix-handle.c
@@ -0,0 +1,997 @@
+/*
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <libgen.h>
+#ifdef GF_LINUX_HOST_OS
+#include <alloca.h>
+#endif
+
+#include "common-utils.h"
+
+#include "posix-handle.h"
+#include "posix.h"
+#include "xlator.h"
+#include "syscall.h"
+#include "posix-messages.h"
+
+#include "compat-errno.h"
+
+inode_t *
+posix_resolve (xlator_t *this, inode_table_t *itable, inode_t *parent,
+ char *bname, struct iatt *iabuf)
+{
+ inode_t *inode = NULL;
+ int ret = -1;
+
+ ret = posix_istat (this, parent->gfid, bname, iabuf);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING, "gfid: %s, bname: %s "
+ "failed", uuid_utoa (parent->gfid), bname);
+ goto out;
+ }
+
+ if (__is_root_gfid (iabuf->ia_gfid) && !strcmp (bname, "/")) {
+ inode = itable->root;
+ } else {
+ inode = inode_find (itable, iabuf->ia_gfid);
+ if (inode == NULL) {
+ inode = inode_new (itable);
+ gf_uuid_copy (inode->gfid, iabuf->ia_gfid);
+ }
+ }
+
+ /* Linking an inode here, can cause a race in posix_acl.
+ Parent inode gets linked here, but before
+ it reaches posix_acl_readdirp_cbk, create/lookup can
+ come on a leaf-inode, as parent-inode-ctx not yet updated
+ in posix_acl_readdirp_cbk, create and lookup can fail
+ with EACCESS. So do the inode linking in the quota xlator
+
+ if (__is_root_gfid (iabuf->ia_gfid) && !strcmp (bname, "/"))
+ linked_inode = itable->root;
+ else
+ linked_inode = inode_link (inode, parent, bname, iabuf);
+
+ inode_unref (inode);*/
+
+out:
+ return inode;
+}
+
+int
+posix_make_ancestral_node (const char *priv_base_path, char *path, int pathsize,
+ gf_dirent_t *head,
+ char *dir_name, struct iatt *iabuf, inode_t *inode,
+ int type, dict_t *xdata)
+{
+ gf_dirent_t *entry = NULL;
+ char real_path[PATH_MAX + 1] = {0, }, len = 0;
+ loc_t loc = {0, };
+ int ret = -1;
+
+ len = strlen (path) + strlen (dir_name) + 1;
+ if (len > pathsize) {
+ goto out;
+ }
+
+ strcat (path, dir_name);
+ if (*dir_name != '/')
+ strcat (path, "/");
+
+ if (type & POSIX_ANCESTRY_DENTRY) {
+ entry = gf_dirent_for_name (dir_name);
+ if (!entry)
+ goto out;
+
+ entry->d_stat = *iabuf;
+ entry->inode = inode_ref (inode);
+
+ list_add_tail (&entry->list, &head->list);
+ strcpy (real_path, priv_base_path);
+ strcat (real_path, "/");
+ strcat (real_path, path);
+ loc.inode = inode_ref (inode);
+ gf_uuid_copy (loc.gfid, inode->gfid);
+
+ entry->dict = posix_xattr_fill (THIS, real_path, &loc, NULL, -1,
+ xdata, iabuf);
+ loc_wipe (&loc);
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int
+posix_make_ancestryfromgfid (xlator_t *this, char *path, int pathsize,
+ gf_dirent_t *head, int type, uuid_t gfid,
+ const size_t handle_size,
+ const char *priv_base_path, inode_table_t *itable,
+ inode_t **parent, dict_t *xdata, int32_t *op_errno)
+{
+ char *linkname = NULL; /* "../../<gfid[0]>/<gfid[1]/"
+ "<gfidstr>/<NAME_MAX>" */
+ char *dir_handle = NULL;
+ char *dir_name = NULL;
+ char *pgfidstr = NULL;
+ char *saveptr = NULL;
+ ssize_t len = 0;
+ inode_t *inode = NULL;
+ struct iatt iabuf = {0, };
+ int ret = -1;
+ uuid_t tmp_gfid = {0, };
+
+ if (!path || !parent || !priv_base_path || gf_uuid_is_null (gfid)) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ if (__is_root_gfid (gfid)) {
+ if (parent) {
+ if (*parent) {
+ inode_unref (*parent);
+ }
+
+ *parent = inode_ref (itable->root);
+ }
+
+
+ inode = posix_resolve (this, itable, *parent, "/", &iabuf);
+ if (!inode) {
+ gf_msg (this->name, GF_LOG_ERROR,
+ P_MSG_INODE_RESOLVE_FAILED, 0,
+ "posix resolve on the root inode %s failed",
+ uuid_utoa (gfid));
+ *op_errno = ESTALE;
+ goto out;
+ }
+
+ ret = posix_make_ancestral_node (priv_base_path, path, pathsize,
+ head, "/", &iabuf, inode, type,
+ xdata);
+ if (ret < 0)
+ *op_errno = ENOMEM;
+ return ret;
+ }
+
+ dir_handle = alloca (handle_size);
+ linkname = alloca (PATH_MAX);
+ snprintf (dir_handle, handle_size, "%s/%s/%02x/%02x/%s",
+ priv_base_path, GF_HIDDEN_PATH, gfid[0], gfid[1],
+ uuid_utoa (gfid));
+
+ len = sys_readlink (dir_handle, linkname, PATH_MAX);
+ if (len < 0) {
+ gf_msg (this->name, (errno == ENOENT || errno == ESTALE)
+ ? GF_LOG_DEBUG:GF_LOG_ERROR, errno,
+ P_MSG_READLINK_FAILED, "could not read the link from "
+ "the gfid handle %s ", dir_handle);
+ ret = -1;
+ *op_errno = errno;
+ goto out;
+ }
+
+ linkname[len] = '\0';
+
+ pgfidstr = strtok_r (linkname + SLEN("../../00/00/"), "/", &saveptr);
+ dir_name = strtok_r (NULL, "/", &saveptr);
+
+ gf_uuid_parse (pgfidstr, tmp_gfid);
+
+ ret = posix_make_ancestryfromgfid (this, path, pathsize, head, type,
+ tmp_gfid, handle_size,
+ priv_base_path, itable, parent,
+ xdata, op_errno);
+ if (ret < 0) {
+ goto out;
+ }
+
+ memset (&iabuf, 0, sizeof (iabuf));
+
+ inode = posix_resolve (this, itable, *parent, dir_name, &iabuf);
+ if (inode == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, P_MSG_INODE_RESOLVE_FAILED,
+ 0, "posix resolve on the root inode %s failed",
+ uuid_utoa (gfid));
+ *op_errno = ESTALE;
+ ret = -1;
+ goto out;
+ }
+
+ ret = posix_make_ancestral_node (priv_base_path, path, pathsize, head,
+ dir_name, &iabuf, inode, type, xdata);
+ if (*parent != NULL) {
+ inode_unref (*parent);
+ }
+
+ *parent = inode;
+
+out:
+ return ret;
+}
+
+int
+posix_handle_relpath (xlator_t *this, uuid_t gfid, const char *basename,
+ char *buf, size_t buflen)
+{
+ char *uuid_str = NULL;
+ int len = 0;
+
+ len = SLEN("../")
+ + SLEN("../")
+ + SLEN("00/")
+ + SLEN("00/")
+ + SLEN(UUID0_STR)
+ + 1 /* '\0' */
+ ;
+
+ if (basename) {
+ len += (strlen (basename) + 1);
+ }
+
+ if (buflen < len || !buf)
+ return len;
+
+ uuid_str = uuid_utoa (gfid);
+
+ if (basename) {
+ len = snprintf (buf, buflen, "../../%02x/%02x/%s/%s",
+ gfid[0], gfid[1], uuid_str, basename);
+ } else {
+ len = snprintf (buf, buflen, "../../%02x/%02x/%s",
+ gfid[0], gfid[1], uuid_str);
+ }
+
+ return len;
+}
+
+
+/*
+ TODO: explain how this pump fixes ELOOP
+*/
+gf_boolean_t
+posix_is_malformed_link (xlator_t *this, char *base_str, char *linkname,
+ size_t len)
+{
+ if ((len == 8) && strcmp (linkname, "../../..")) /*for root*/
+ goto err;
+
+ if (len < 50 || len >= 512)
+ goto err;
+
+ if (memcmp (linkname, "../../", 6) != 0)
+ goto err;
+
+ if ((linkname[2] != '/') ||
+ (linkname[5] != '/') ||
+ (linkname[8] != '/') ||
+ (linkname[11] != '/') ||
+ (linkname[48] != '/')) {
+ goto err;
+ }
+
+ if ((linkname[20] != '-') ||
+ (linkname[25] != '-') ||
+ (linkname[30] != '-') ||
+ (linkname[35] != '-')) {
+ goto err;
+ }
+
+ return _gf_false;
+
+err:
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "malformed internal link "
+ "%s for %s", linkname, base_str);
+ return _gf_true;
+}
+
+int
+posix_handle_pump (xlator_t *this, char *buf, int len, int maxlen,
+ char *base_str, int base_len, int pfx_len)
+{
+ char linkname[512] = {0,}; /* "../../<gfid>/<NAME_MAX>" */
+ int ret = 0;
+ int blen = 0;
+ int link_len = 0;
+
+ /* is a directory's symlink-handle */
+ ret = sys_readlink (base_str, linkname, 512);
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_READLINK_FAILED,
+ "internal readlink failed on %s ",
+ base_str);
+ goto err;
+ }
+
+ if (ret < 512)
+ linkname[ret] = 0;
+
+ link_len = ret;
+
+ if ((ret == 8) && memcmp (linkname, "../../..", 8) == 0) {
+ if (strcmp (base_str, buf) == 0) {
+ strcpy (buf + pfx_len, "..");
+ }
+ goto out;
+ }
+
+ if (posix_is_malformed_link (this, base_str, linkname, ret))
+ goto err;
+
+ blen = link_len - 48;
+
+ if (len + blen >= maxlen) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_HANDLEPATH_FAILED,
+ "Unable to form handle path for %s (maxlen = %d)",
+ buf, maxlen);
+ goto err;
+ }
+
+ memmove (buf + base_len + blen, buf + base_len,
+ (strlen (buf) - base_len) + 1);
+
+ strncpy (base_str + pfx_len, linkname + 6, 42);
+
+ strncpy (buf + pfx_len, linkname + 6, link_len - 6);
+out:
+ return len + blen;
+err:
+ return -1;
+}
+
+
+/*
+ posix_handle_path differs from posix_handle_gfid_path in the way that the
+ path filled in @buf by posix_handle_path will return type IA_IFDIR when
+ an lstat() is performed on it, whereas posix_handle_gfid_path returns path
+ to the handle symlink (typically used for the purpose of unlinking it).
+
+ posix_handle_path also guarantees immunity to ELOOP on the path returned by it
+*/
+
+int
+posix_handle_path (xlator_t *this, uuid_t gfid, const char *basename,
+ char *ubuf, size_t size)
+{
+ struct posix_private *priv = NULL;
+ char *uuid_str = NULL;
+ int len = 0;
+ int ret = -1;
+ struct stat stat;
+ char *base_str = NULL;
+ int base_len = 0;
+ int pfx_len;
+ int maxlen;
+ char *buf;
+
+ priv = this->private;
+
+ uuid_str = uuid_utoa (gfid);
+
+ if (ubuf) {
+ buf = ubuf;
+ maxlen = size;
+ } else {
+ maxlen = PATH_MAX;
+ buf = alloca (maxlen);
+ }
+
+ base_len = (priv->base_path_length + SLEN(GF_HIDDEN_PATH) + 45);
+ base_str = alloca (base_len + 1);
+ base_len = snprintf (base_str, base_len + 1, "%s/%s/%02x/%02x/%s",
+ priv->base_path, GF_HIDDEN_PATH, gfid[0], gfid[1],
+ uuid_str);
+
+ pfx_len = priv->base_path_length + 1 + SLEN(GF_HIDDEN_PATH) + 1;
+
+ if (basename) {
+ len = snprintf (buf, maxlen, "%s/%s", base_str, basename);
+ } else {
+ len = snprintf (buf, maxlen, "%s", base_str);
+ }
+
+ ret = sys_lstat (base_str, &stat);
+
+ if (!(ret == 0 && S_ISLNK(stat.st_mode) && stat.st_nlink == 1))
+ goto out;
+
+ do {
+ errno = 0;
+ ret = posix_handle_pump (this, buf, len, maxlen,
+ base_str, base_len, pfx_len);
+ len = ret;
+
+ if (ret == -1)
+ break;
+
+ ret = sys_lstat (buf, &stat);
+ } while ((ret == -1) && errno == ELOOP);
+
+out:
+ return len + 1;
+}
+
+
+int
+posix_handle_gfid_path (xlator_t *this, uuid_t gfid, const char *basename,
+ char *buf, size_t buflen)
+{
+ struct posix_private *priv = NULL;
+ char *uuid_str = NULL;
+ int len = 0;
+
+ priv = this->private;
+
+ len = priv->base_path_length /* option directory "/export" */
+ + SLEN("/")
+ + SLEN(GF_HIDDEN_PATH)
+ + SLEN("/")
+ + SLEN("00/")
+ + SLEN("00/")
+ + SLEN(UUID0_STR)
+ + 1 /* '\0' */
+ ;
+
+ if (basename) {
+ len += (strlen (basename) + 1);
+ } else {
+ len += 256; /* worst-case for directory's symlink-handle expansion */
+ }
+
+ if ((buflen < len) || !buf)
+ return len;
+
+ uuid_str = uuid_utoa (gfid);
+
+ if (__is_root_gfid (gfid)) {
+ if (basename) {
+ len = snprintf (buf, buflen, "%s/%s", priv->base_path,
+ basename);
+ } else {
+ strncpy (buf, priv->base_path, buflen);
+ }
+ goto out;
+ }
+
+ if (basename) {
+ len = snprintf (buf, buflen, "%s/%s/%02x/%02x/%s/%s", priv->base_path,
+ GF_HIDDEN_PATH, gfid[0], gfid[1], uuid_str, basename);
+ } else {
+ len = snprintf (buf, buflen, "%s/%s/%02x/%02x/%s", priv->base_path,
+ GF_HIDDEN_PATH, gfid[0], gfid[1], uuid_str);
+ }
+out:
+ return len;
+}
+
+int
+posix_handle_init (xlator_t *this)
+{
+ struct posix_private *priv = NULL;
+ char *handle_pfx = NULL;
+ int ret = 0;
+ struct stat stbuf;
+ struct stat rootbuf;
+ struct stat exportbuf;
+ char *rootstr = NULL;
+ uuid_t gfid = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1};
+
+ priv = this->private;
+
+ ret = sys_stat (priv->base_path, &exportbuf);
+ if (ret || !S_ISDIR (exportbuf.st_mode)) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ P_MSG_HANDLE_CREATE,
+ "Not a directory: %s", priv->base_path);
+ return -1;
+ }
+
+ handle_pfx = alloca (priv->base_path_length + 1 + strlen (GF_HIDDEN_PATH)
+ + 1);
+
+ sprintf (handle_pfx, "%s/%s", priv->base_path, GF_HIDDEN_PATH);
+
+ ret = sys_stat (handle_pfx, &stbuf);
+ switch (ret) {
+ case -1:
+ if (errno == ENOENT) {
+ ret = sys_mkdir (handle_pfx, 0600);
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_HANDLE_CREATE,
+ "Creating directory %s failed",
+ handle_pfx);
+ return -1;
+ }
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_HANDLE_CREATE,
+ "Checking for %s failed",
+ handle_pfx);
+ return -1;
+ }
+ break;
+ case 0:
+ if (!S_ISDIR (stbuf.st_mode)) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ P_MSG_HANDLE_CREATE,
+ "Not a directory: %s",
+ handle_pfx);
+ return -1;
+ }
+ break;
+ default:
+ break;
+ }
+
+ sys_stat (handle_pfx, &priv->handledir);
+
+ MAKE_HANDLE_ABSPATH(rootstr, this, gfid);
+
+ ret = sys_stat (rootstr, &rootbuf);
+ switch (ret) {
+ case -1:
+ if (errno != ENOENT) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_HANDLE_CREATE,
+ "%s", priv->base_path);
+ return -1;
+ }
+
+ ret = posix_handle_mkdir_hashes (this, rootstr);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ P_MSG_HANDLE_CREATE,
+ "mkdir %s failed", rootstr);
+ return -1;
+ }
+
+ ret = sys_symlink ("../../..", rootstr);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_HANDLE_CREATE,
+ "symlink %s creation failed",
+ rootstr);
+ return -1;
+ }
+ break;
+ case 0:
+ if ((exportbuf.st_ino == rootbuf.st_ino) &&
+ (exportbuf.st_dev == rootbuf.st_dev))
+ return 0;
+
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ P_MSG_HANDLE_CREATE,
+ "Different dirs %s (%lld/%lld) != %s (%lld/%lld)",
+ priv->base_path, (long long) exportbuf.st_ino,
+ (long long) exportbuf.st_dev, rootstr,
+ (long long) rootbuf.st_ino, (long long) rootbuf.st_dev);
+ return -1;
+
+ break;
+ }
+
+ return 0;
+}
+
+gf_boolean_t
+posix_does_old_trash_exists (char *old_trash)
+{
+ uuid_t gfid = {0};
+ gf_boolean_t exists = _gf_false;
+ struct stat stbuf = {0};
+ int ret = 0;
+
+ ret = sys_lstat (old_trash, &stbuf);
+ if ((ret == 0) && S_ISDIR (stbuf.st_mode)) {
+ ret = sys_lgetxattr (old_trash, "trusted.gfid", gfid, 16);
+ if ((ret < 0) && (errno == ENODATA || errno == ENOATTR) )
+ exists = _gf_true;
+ }
+ return exists;
+}
+
+int
+posix_handle_new_trash_init (xlator_t *this, char *trash)
+{
+ int ret = 0;
+ struct stat stbuf = {0};
+
+ ret = sys_lstat (trash, &stbuf);
+ switch (ret) {
+ case -1:
+ if (errno == ENOENT) {
+ ret = sys_mkdir (trash, 0755);
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_HANDLE_TRASH_CREATE,
+ "Creating directory %s failed",
+ trash);
+ }
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_HANDLE_TRASH_CREATE,
+ "Checking for %s failed", trash);
+ }
+ break;
+ case 0:
+ if (!S_ISDIR (stbuf.st_mode)) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_HANDLE_TRASH_CREATE,
+ "Not a directory: %s", trash);
+ ret = -1;
+ }
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
+int
+posix_mv_old_trash_into_new_trash (xlator_t *this, char *old, char *new)
+{
+ char dest_old[PATH_MAX] = {0};
+ int ret = 0;
+ uuid_t dest_name = {0};
+
+ if (!posix_does_old_trash_exists (old))
+ goto out;
+ gf_uuid_generate (dest_name);
+ snprintf (dest_old, sizeof (dest_old), "%s/%s", new,
+ uuid_utoa (dest_name));
+ ret = sys_rename (old, dest_old);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_HANDLE_TRASH_CREATE,
+ "Not able to move %s -> %s ", old, dest_old);
+ }
+out:
+ return ret;
+}
+
+int
+posix_handle_trash_init (xlator_t *this)
+{
+ int ret = -1;
+ struct posix_private *priv = NULL;
+ char old_trash[PATH_MAX] = {0};
+
+ priv = this->private;
+
+ priv->trash_path = GF_CALLOC (1, priv->base_path_length + strlen ("/")
+ + strlen (GF_HIDDEN_PATH) + strlen ("/")
+ + strlen (TRASH_DIR) + 1,
+ gf_posix_mt_trash_path);
+
+ if (!priv->trash_path)
+ goto out;
+
+ strncpy (priv->trash_path, priv->base_path, priv->base_path_length);
+ strcat (priv->trash_path, "/" GF_HIDDEN_PATH "/" TRASH_DIR);
+ ret = posix_handle_new_trash_init (this, priv->trash_path);
+ if (ret)
+ goto out;
+ snprintf (old_trash, sizeof (old_trash), "%s/.landfill",
+ priv->base_path);
+ ret = posix_mv_old_trash_into_new_trash (this, old_trash,
+ priv->trash_path);
+out:
+ return ret;
+}
+
+int
+posix_handle_mkdir_hashes (xlator_t *this, const char *newpath)
+{
+ char *duppath = NULL;
+ char *parpath = NULL;
+ int ret = 0;
+
+ duppath = strdupa (newpath);
+ parpath = dirname (duppath);
+ parpath = dirname (duppath);
+
+ ret = sys_mkdir (parpath, 0700);
+ if (ret == -1 && errno != EEXIST) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_HANDLE_CREATE,
+ "error mkdir hash-1 %s ", parpath);
+ return -1;
+ }
+
+ strcpy (duppath, newpath);
+ parpath = dirname (duppath);
+
+ ret = sys_mkdir (parpath, 0700);
+ if (ret == -1 && errno != EEXIST) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_HANDLE_CREATE,
+ "error mkdir hash-2 %s ", parpath);
+ return -1;
+ }
+
+ return 0;
+}
+
+
+int
+posix_handle_hard (xlator_t *this, const char *oldpath, uuid_t gfid, struct stat *oldbuf)
+{
+ char *newpath = NULL;
+ struct stat newbuf;
+ int ret = -1;
+
+
+ MAKE_HANDLE_ABSPATH (newpath, this, gfid);
+
+ ret = sys_lstat (newpath, &newbuf);
+ if (ret == -1 && errno != ENOENT) {
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ P_MSG_HANDLE_CREATE,
+ "%s", newpath);
+ return -1;
+ }
+
+ if (ret == -1 && errno == ENOENT) {
+ ret = posix_handle_mkdir_hashes (this, newpath);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ P_MSG_HANDLE_CREATE, "mkdir %s failed ",
+ newpath);
+ return -1;
+ }
+
+ ret = sys_link (oldpath, newpath);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ P_MSG_HANDLE_CREATE, "link %s -> %s"
+ "failed ", oldpath, newpath);
+ return -1;
+ }
+
+ ret = sys_lstat (newpath, &newbuf);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ P_MSG_HANDLE_CREATE,
+ "lstat on %s failed", newpath);
+ return -1;
+ }
+ }
+
+ if (newbuf.st_ino != oldbuf->st_ino ||
+ newbuf.st_dev != oldbuf->st_dev) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ P_MSG_HANDLE_CREATE,
+ "mismatching ino/dev between file %s (%lld/%lld) "
+ "and handle %s (%lld/%lld)",
+ oldpath, (long long) oldbuf->st_ino, (long long) oldbuf->st_dev,
+ newpath, (long long) newbuf.st_ino, (long long) newbuf.st_dev);
+ ret = -1;
+ }
+
+ return ret;
+}
+
+
+int
+posix_handle_soft (xlator_t *this, const char *real_path, loc_t *loc,
+ uuid_t gfid, struct stat *oldbuf)
+{
+ char *oldpath = NULL;
+ char *newpath = NULL;
+ struct stat newbuf;
+ int ret = -1;
+
+ MAKE_HANDLE_ABSPATH (newpath, this, gfid);
+ MAKE_HANDLE_RELPATH (oldpath, this, loc->pargfid, loc->name);
+
+ ret = sys_lstat (newpath, &newbuf);
+ if (ret == -1 && errno != ENOENT) {
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ P_MSG_HANDLE_CREATE, "%s", newpath);
+ return -1;
+ }
+
+ if (ret == -1 && errno == ENOENT) {
+ if (posix_is_malformed_link (this, newpath, oldpath,
+ strlen (oldpath))) {
+ GF_ASSERT (!"Malformed link");
+ errno = EINVAL;
+ return -1;
+ }
+ ret = posix_handle_mkdir_hashes (this, newpath);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ P_MSG_HANDLE_CREATE,
+ "mkdir %s failed ", newpath);
+ return -1;
+ }
+
+ ret = sys_symlink (oldpath, newpath);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ P_MSG_HANDLE_CREATE,
+ "symlink %s -> %s failed",
+ oldpath, newpath);
+ return -1;
+ }
+
+ ret = sys_lstat (newpath, &newbuf);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ P_MSG_HANDLE_CREATE,
+ "stat on %s failed ", newpath);
+ return -1;
+ }
+ }
+
+ ret = sys_stat (real_path, &newbuf);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ P_MSG_HANDLE_CREATE,
+ "stat on %s failed ", newpath);
+ return -1;
+ }
+
+ if (!oldbuf)
+ return ret;
+
+ if (newbuf.st_ino != oldbuf->st_ino ||
+ newbuf.st_dev != oldbuf->st_dev) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ P_MSG_HANDLE_CREATE,
+ "mismatching ino/dev between file %s (%lld/%lld) "
+ "and handle %s (%lld/%lld)",
+ oldpath, (long long) oldbuf->st_ino, (long long) oldbuf->st_dev,
+ newpath, (long long) newbuf.st_ino, (long long) newbuf.st_dev);
+ ret = -1;
+ }
+
+ return ret;
+}
+
+
+int
+posix_handle_unset_gfid (xlator_t *this, uuid_t gfid)
+{
+ char *path = NULL;
+ int ret = 0;
+ struct stat stat;
+
+ MAKE_HANDLE_GFID_PATH (path, this, gfid, NULL);
+
+ ret = sys_lstat (path, &stat);
+
+ if (ret == -1) {
+ if (errno != ENOENT) {
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ P_MSG_HANDLE_DELETE, "%s", path);
+ }
+ goto out;
+ }
+
+ ret = sys_unlink (path);
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ P_MSG_HANDLE_DELETE, "unlink %s failed ", path);
+ }
+
+out:
+ return ret;
+}
+
+
+int
+posix_handle_unset (xlator_t *this, uuid_t gfid, const char *basename)
+{
+ int ret;
+ struct iatt stat;
+ char *path = NULL;
+
+ if (!basename) {
+ ret = posix_handle_unset_gfid (this, gfid);
+ return ret;
+ }
+
+ MAKE_HANDLE_PATH (path, this, gfid, basename);
+ if (!path) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ P_MSG_HANDLE_DELETE,
+ "Failed to create handle path for %s (%s)",
+ basename, uuid_utoa(gfid));
+ return -1;
+ }
+
+ ret = posix_istat (this, gfid, basename, &stat);
+
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ P_MSG_HANDLE_DELETE, "%s", path);
+ return -1;
+ }
+
+ ret = posix_handle_unset_gfid (this, stat.ia_gfid);
+
+ return ret;
+}
+
+
+int
+posix_create_link_if_gfid_exists (xlator_t *this, uuid_t gfid, char *real_path,
+ inode_table_t *itable)
+{
+ int ret = -1;
+ char *newpath = NULL;
+ char *unlink_path = NULL;
+ uint64_t ctx_int = 0;
+ inode_t *inode = NULL;
+ struct stat stbuf = {0,};
+ struct posix_private *priv = NULL;
+
+ priv = this->private;
+
+ MAKE_HANDLE_PATH (newpath, this, gfid, NULL);
+ if (!newpath) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ P_MSG_HANDLE_CREATE,
+ "Failed to create handle path (%s)", uuid_utoa(gfid));
+ return ret;
+ }
+
+ ret = sys_lstat (newpath, &stbuf);
+ if (!ret) {
+ ret = sys_link (newpath, real_path);
+ } else {
+ inode = inode_find (itable, gfid);
+ if (!inode)
+ return -1;
+
+ LOCK (&inode->lock);
+ {
+ ret = __inode_ctx_get0 (inode, this, &ctx_int);
+ if (ret)
+ goto unlock;
+
+ if (ctx_int != GF_UNLINK_TRUE)
+ goto unlock;
+
+ POSIX_GET_FILE_UNLINK_PATH (priv->base_path, gfid,
+ unlink_path);
+ ret = sys_link (unlink_path, real_path);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ P_MSG_HANDLE_CREATE, "Failed to link "
+ "%s with %s", real_path, unlink_path);
+ goto unlock;
+ }
+ ret = sys_rename (unlink_path, newpath);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ P_MSG_HANDLE_CREATE, "Failed to link "
+ "%s with %s", real_path, unlink_path);
+ goto unlock;
+ }
+ ctx_int = GF_UNLINK_FALSE;
+ ret = __inode_ctx_set0 (inode, this, &ctx_int);
+ }
+unlock:
+ UNLOCK (&inode->lock);
+
+ inode_unref (inode);
+ }
+
+ return ret;
+}
diff --git a/xlators/storage/posix/src/posix-handle.h b/xlators/storage/posix/src/posix-handle.h
new file mode 100644
index 00000000000..9af6a7a5442
--- /dev/null
+++ b/xlators/storage/posix/src/posix-handle.h
@@ -0,0 +1,288 @@
+/*
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _POSIX_HANDLE_H
+#define _POSIX_HANDLE_H
+
+#include <limits.h>
+#include <sys/types.h>
+#include "xlator.h"
+#include "gf-dirent.h"
+#include "posix-messages.h"
+
+/* From Open Group Base Specifications Issue 6 */
+#ifndef _XOPEN_PATH_MAX
+#define _XOPEN_PATH_MAX 1024
+#endif
+
+#define TRASH_DIR "landfill"
+
+#define UUID0_STR "00000000-0000-0000-0000-000000000000"
+#define SLEN(str) (sizeof(str) - 1)
+
+#define HANDLE_ABSPATH_LEN(this) (POSIX_BASE_PATH_LEN(this) + \
+ SLEN("/" GF_HIDDEN_PATH "/00/00/" \
+ UUID0_STR) + 1)
+
+#define LOC_HAS_ABSPATH(loc) (loc && (loc->path) && (loc->path[0] == '/'))
+#define LOC_IS_DIR(loc) (loc && (loc->inode) && \
+ (loc->inode->ia_type == IA_IFDIR))
+
+#define MAKE_PGFID_XATTR_KEY(var, prefix, pgfid) do { \
+ var = alloca (strlen (prefix) + UUID_CANONICAL_FORM_LEN + 1); \
+ strcpy (var, prefix); \
+ strcat (var, uuid_utoa (pgfid)); \
+ } while (0)
+
+#define SET_PGFID_XATTR(path, key, value, flags, op_ret, this, label) do { \
+ value = hton32 (value); \
+ op_ret = sys_lsetxattr (path, key, &value, sizeof (value), \
+ flags); \
+ if (op_ret == -1) { \
+ op_errno = errno; \
+ gf_msg (this->name, GF_LOG_WARNING, errno, P_MSG_PGFID_OP, \
+ "setting xattr failed on %s: key = %s ", \
+ path, key); \
+ goto label; \
+ } \
+ } while (0)
+
+#define SET_PGFID_XATTR_IF_ABSENT(path, key, value, flags, op_ret, this, label)\
+ do { \
+ op_ret = sys_lgetxattr (path, key, &value, sizeof (value)); \
+ if (op_ret == -1) { \
+ op_errno = errno; \
+ if (op_errno == ENOATTR) { \
+ value = 1; \
+ SET_PGFID_XATTR (path, key, value, flags, \
+ op_ret, this, label); \
+ } else { \
+ gf_msg (this->name, GF_LOG_WARNING, op_errno, \
+ P_MSG_PGFID_OP, "getting xattr " \
+ "failed on %s: key = %s ", \
+ path, key); \
+ } \
+ } \
+ } while (0)
+
+#define REMOVE_PGFID_XATTR(path, key, op_ret, this, label) do { \
+ op_ret = sys_lremovexattr (path, key); \
+ if (op_ret == -1) { \
+ op_errno = errno; \
+ gf_msg (this->name, GF_LOG_WARNING, op_errno, \
+ P_MSG_PGFID_OP, \
+ "removing xattr failed" \
+ "on %s: key = %s", path, key); \
+ goto label; \
+ } \
+ } while (0)
+
+/* should be invoked holding a lock */
+#define LINK_MODIFY_PGFID_XATTR(path, key, value, flags, op_ret, this, label) do { \
+ op_ret = sys_lgetxattr (path, key, &value, sizeof (value)); \
+ if (op_ret == -1) { \
+ op_errno = errno; \
+ if (op_errno == ENOATTR || op_errno == ENODATA) { \
+ value = 1; \
+ } else { \
+ gf_msg (this->name, GF_LOG_WARNING, errno, \
+ P_MSG_PGFID_OP, "getting xattr " \
+ "failed on %s: key = %s ", path, key); \
+ goto label; \
+ } \
+ } else { \
+ value = ntoh32 (value); \
+ value++; \
+ } \
+ SET_PGFID_XATTR (path, key, value, flags, op_ret, this, label); \
+ } while (0)
+
+/* should be invoked holding a lock */
+#define UNLINK_MODIFY_PGFID_XATTR(path, key, value, flags, op_ret, this, label) do { \
+ op_ret = sys_lgetxattr (path, key, &value, sizeof (value)); \
+ if (op_ret == -1) { \
+ op_errno = errno; \
+ gf_msg (this->name, GF_LOG_WARNING, errno, \
+ P_MSG_PGFID_OP, "getting xattr failed on " \
+ "%s: key = %s ", path, key); \
+ goto label; \
+ } else { \
+ value = ntoh32 (value); \
+ value--; \
+ if (value > 0) { \
+ SET_PGFID_XATTR (path, key, value, flags, op_ret, \
+ this, label); \
+ } else { \
+ REMOVE_PGFID_XATTR (path, key, op_ret, this, label); \
+ } \
+ } \
+ } while (0)
+
+#define MAKE_REAL_PATH(var, this, path) do { \
+ size_t path_len = strlen(path); \
+ size_t var_len = path_len + POSIX_BASE_PATH_LEN(this) + 1; \
+ if (POSIX_PATH_MAX(this) != -1 && \
+ var_len >= POSIX_PATH_MAX(this)) { \
+ var = alloca (path_len + 1); \
+ strcpy (var, (path[0] == '/') ? path + 1 : path); \
+ } else { \
+ var = alloca (var_len); \
+ strcpy (var, POSIX_BASE_PATH(this)); \
+ strcpy (&var[POSIX_BASE_PATH_LEN(this)], path); \
+ } \
+ } while (0)
+
+#define MAKE_HANDLE_PATH(var, this, gfid, base) do { \
+ int __len; \
+ __len = posix_handle_path (this, gfid, base, NULL, 0); \
+ if (__len <= 0) \
+ break; \
+ var = alloca (__len); \
+ __len = posix_handle_path (this, gfid, base, var, __len); \
+ if (__len <= 0) \
+ var = NULL; \
+ } while (0)
+
+
+#define MAKE_HANDLE_GFID_PATH(var, this, gfid, base) do { \
+ int __len = 0; \
+ __len = posix_handle_gfid_path (this, gfid, base, NULL, 0); \
+ if (__len <= 0) \
+ break; \
+ var = alloca (__len); \
+ __len = posix_handle_gfid_path (this, gfid, base, var, __len); \
+ } while (0)
+
+
+#define MAKE_HANDLE_RELPATH(var, this, gfid, base) do { \
+ int __len; \
+ __len = posix_handle_relpath (this, gfid, base, NULL, 0); \
+ if (__len <= 0) \
+ break; \
+ var = alloca (__len); \
+ __len = posix_handle_relpath (this, gfid, base, var, __len); \
+ } while (0)
+
+
+#define MAKE_HANDLE_ABSPATH(var, this, gfid) do { \
+ struct posix_private * __priv = this->private; \
+ int __len = HANDLE_ABSPATH_LEN(this); \
+ var = alloca(__len); \
+ snprintf(var, __len, "%s/" GF_HIDDEN_PATH "/%02x/%02x/%s", \
+ __priv->base_path, gfid[0], gfid[1], uuid_utoa(gfid)); \
+ } while (0)
+
+
+#define MAKE_INODE_HANDLE(rpath, this, loc, iatt_p) do { \
+ if (gf_uuid_is_null (loc->gfid)) { \
+ gf_msg (this->name, GF_LOG_ERROR, 0, \
+ P_MSG_INODE_HANDLE_CREATE, \
+ "null gfid for path %s", (loc)->path); \
+ break; \
+ } \
+ if (LOC_IS_DIR (loc) && LOC_HAS_ABSPATH (loc)) { \
+ MAKE_REAL_PATH (rpath, this, (loc)->path); \
+ op_ret = posix_pstat (this, (loc)->gfid, rpath, iatt_p); \
+ break; \
+ } \
+ errno = 0; \
+ op_ret = posix_istat (this, loc->gfid, NULL, iatt_p); \
+ if (errno != ELOOP) { \
+ MAKE_HANDLE_PATH (rpath, this, (loc)->gfid, NULL); \
+ if (!rpath) { \
+ op_ret = -1; \
+ gf_msg (this->name, GF_LOG_ERROR, errno, \
+ P_MSG_INODE_HANDLE_CREATE, \
+ "Failed to create inode handle " \
+ "for path %s", (loc)->path); \
+ } \
+ break; \
+ } \
+ /* __ret == -1 && errno == ELOOP */ \
+ } while (0)
+
+
+#define MAKE_ENTRY_HANDLE(entp, parp, this, loc, ent_p) do { \
+ char *__parp; \
+ \
+ if (gf_uuid_is_null (loc->pargfid) || !loc->name) { \
+ gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_ENTRY_HANDLE_CREATE,\
+ "null pargfid/name for path %s", loc->path); \
+ break; \
+ } \
+ \
+ if (LOC_HAS_ABSPATH (loc)) { \
+ MAKE_REAL_PATH (entp, this, loc->path); \
+ __parp = strdupa (entp); \
+ parp = dirname (__parp); \
+ op_ret = posix_pstat (this, NULL, entp, ent_p); \
+ break; \
+ } \
+ errno = 0; \
+ op_ret = posix_istat (this, loc->pargfid, loc->name, ent_p); \
+ if (errno != ELOOP) { \
+ MAKE_HANDLE_PATH (parp, this, loc->pargfid, NULL); \
+ MAKE_HANDLE_PATH (entp, this, loc->pargfid, loc->name); \
+ if (!parp || !entp) { \
+ gf_msg (this->name, GF_LOG_ERROR, errno, \
+ P_MSG_ENTRY_HANDLE_CREATE, \
+ "Failed to create entry handle " \
+ "for path %s", loc->path); \
+ } \
+ break; \
+ } \
+ /* __ret == -1 && errno == ELOOP */ \
+ /* expand ELOOP */ \
+ } while (0)
+
+
+#define POSIX_ANCESTRY_PATH (1 << 0)
+#define POSIX_ANCESTRY_DENTRY (1 << 1)
+
+int
+posix_handle_path (xlator_t *this, uuid_t gfid, const char *basename, char *buf,
+ size_t len);
+
+int
+posix_make_ancestryfromgfid (xlator_t *this, char *path, int pathsize,
+ gf_dirent_t *head, int type, uuid_t gfid,
+ const size_t handle_size,
+ const char *priv_base_path,
+ inode_table_t *table, inode_t **parent,
+ dict_t *xdata, int32_t *op_errno);
+int
+posix_handle_path_safe (xlator_t *this, uuid_t gfid, const char *basename,
+ char *buf, size_t len);
+
+int
+posix_handle_gfid_path (xlator_t *this, uuid_t gfid, const char *basename,
+ char *buf, size_t len);
+
+int
+posix_handle_hard (xlator_t *this, const char *path, uuid_t gfid,
+ struct stat *buf);
+
+
+int
+posix_handle_soft (xlator_t *this, const char *real_path, loc_t *loc,
+ uuid_t gfid, struct stat *buf);
+
+int
+posix_handle_unset (xlator_t *this, uuid_t gfid, const char *basename);
+
+int posix_handle_mkdir_hashes (xlator_t *this, const char *newpath);
+
+int posix_handle_init (xlator_t *this);
+
+int posix_create_link_if_gfid_exists (xlator_t *this, uuid_t gfid,
+ char *real_path, inode_table_t *itable);
+
+int
+posix_handle_trash_init (xlator_t *this);
+#endif /* !_POSIX_HANDLE_H */
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
new file mode 100644
index 00000000000..8ad674e063f
--- /dev/null
+++ b/xlators/storage/posix/src/posix-helpers.c
@@ -0,0 +1,2224 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#define __XOPEN_SOURCE 500
+
+#include <stdint.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <errno.h>
+#include <libgen.h>
+#include <pthread.h>
+#include <ftw.h>
+#include <sys/stat.h>
+#include <signal.h>
+
+#ifdef HAVE_SYS_ACL_H
+#ifdef HAVE_ACL_LIBACL_H /* for acl_to_any_text() */
+#include <acl/libacl.h>
+#else /* FreeBSD and others */
+#include <sys/acl.h>
+#endif
+#endif
+
+#ifndef GF_BSD_HOST_OS
+#include <alloca.h>
+#endif /* GF_BSD_HOST_OS */
+
+#include "glusterfs.h"
+#include "checksum.h"
+#include "dict.h"
+#include "logging.h"
+#include "posix.h"
+#include "xlator.h"
+#include "defaults.h"
+#include "common-utils.h"
+#include "compat-errno.h"
+#include "compat.h"
+#include "byte-order.h"
+#include "syscall.h"
+#include "statedump.h"
+#include "locking.h"
+#include "timer.h"
+#include "glusterfs3-xdr.h"
+#include "hashfn.h"
+#include "glusterfs-acl.h"
+#include <fnmatch.h>
+
+char *marker_xattrs[] = {"trusted.glusterfs.quota.*",
+ "trusted.glusterfs.*.xtime",
+ NULL};
+
+char *marker_contri_key = "trusted.*.*.contri";
+
+static char* posix_ignore_xattrs[] = {
+ "gfid-req",
+ GLUSTERFS_ENTRYLK_COUNT,
+ GLUSTERFS_INODELK_COUNT,
+ GLUSTERFS_POSIXLK_COUNT,
+ GLUSTERFS_PARENT_ENTRYLK,
+ GF_GFIDLESS_LOOKUP,
+ GLUSTERFS_INODELK_DOM_COUNT,
+ GLUSTERFS_INTERNAL_FOP_KEY,
+ NULL
+};
+
+static char* list_xattr_ignore_xattrs[] = {
+ GF_SELINUX_XATTR_KEY,
+ GF_XATTR_VOL_ID_KEY,
+ GFID_XATTR_KEY,
+ NULL
+};
+
+gf_boolean_t
+posix_special_xattr (char **pattern, char *key)
+{
+ int i = 0;
+ gf_boolean_t flag = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("posix", pattern, out);
+ GF_VALIDATE_OR_GOTO ("posix", key, out);
+
+ for (i = 0; pattern[i]; i++) {
+ if (!fnmatch (pattern[i], key, 0)) {
+ flag = _gf_true;
+ break;
+ }
+ }
+out:
+ return flag;
+}
+
+static gf_boolean_t
+_is_in_array (char **str_array, char *str)
+{
+ int i = 0;
+
+ if (!str)
+ return _gf_false;
+
+ for (i = 0; str_array[i]; i++) {
+ if (strcmp (str, str_array[i]) == 0)
+ return _gf_true;
+ }
+ return _gf_false;
+}
+
+static gf_boolean_t
+posix_xattr_ignorable (char *key)
+{
+ return _is_in_array (posix_ignore_xattrs, key);
+}
+
+static gf_boolean_t
+posix_is_valid_namespace (char *key)
+{
+ static char *xattr_namespaces[] = {"trusted.", "security.", "system.",
+ "user.", NULL };
+ int i = 0;
+
+ for (i = 0; xattr_namespaces[i]; i++) {
+ if (strncmp (key, xattr_namespaces[i],
+ strlen (xattr_namespaces[i])) == 0)
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
+static int
+_posix_xattr_get_set_from_backend (posix_xattr_filler_t *filler, char *key)
+{
+ ssize_t xattr_size = -1;
+ int ret = 0;
+ char *value = NULL;
+ char val_buf[256] = {0};
+ gf_boolean_t have_val = _gf_false;
+
+ if (!posix_is_valid_namespace (key)) {
+ ret = -1;
+ goto out;
+ }
+
+ /* Most of the gluster internal xattrs don't exceed 256 bytes. So try
+ * getxattr with ~256 bytes. If it gives ERANGE then go the old way
+ * of getxattr with NULL buf to find the length and then getxattr with
+ * allocated buf to fill the data. This way we reduce lot of getxattrs.
+ */
+ if (filler->real_path)
+ xattr_size = sys_lgetxattr (filler->real_path, key, val_buf,
+ sizeof (val_buf) - 1);
+ else
+ xattr_size = sys_fgetxattr (filler->fdnum, key, val_buf,
+ sizeof (val_buf) - 1);
+
+ if (xattr_size >= 0) {
+ have_val = _gf_true;
+ } else if (xattr_size == -1 && errno != ERANGE) {
+ ret = -1;
+ goto out;
+ }
+
+ if (have_val) {
+ /*No need to do getxattr*/
+ } else if (filler->real_path) {
+ xattr_size = sys_lgetxattr (filler->real_path, key, NULL, 0);
+ } else {
+ xattr_size = sys_fgetxattr (filler->fdnum, key, NULL, 0);
+ }
+
+ if (xattr_size != -1) {
+ value = GF_CALLOC (1, xattr_size + 1, gf_posix_mt_char);
+ if (!value)
+ goto out;
+
+ if (have_val) {
+ memcpy (value, val_buf, xattr_size);
+ } else if (filler->real_path) {
+ xattr_size = sys_lgetxattr (filler->real_path, key,
+ value, xattr_size);
+ } else {
+ xattr_size = sys_fgetxattr (filler->fdnum, key, value,
+ xattr_size);
+ }
+ if (xattr_size == -1) {
+ if (filler->real_path)
+ gf_msg (filler->this->name, GF_LOG_WARNING, 0,
+ P_MSG_XATTR_FAILED,
+ "getxattr failed. path: %s, key: %s",
+ filler->real_path, key);
+ else
+ gf_msg (filler->this->name, GF_LOG_WARNING, 0,
+ P_MSG_XATTR_FAILED,
+ "getxattr failed. gfid: %s, key: %s",
+ uuid_utoa (filler->fd->inode->gfid),
+ key);
+ GF_FREE (value);
+ goto out;
+ }
+
+ value[xattr_size] = '\0';
+ ret = dict_set_bin (filler->xattr, key, value, xattr_size);
+ if (ret < 0) {
+ if (filler->real_path)
+ gf_msg_debug (filler->this->name, 0,
+ "dict set failed. path: %s, key: %s",
+ filler->real_path, key);
+ else
+ gf_msg_debug (filler->this->name, 0,
+ "dict set failed. gfid: %s, key: %s",
+ uuid_utoa (filler->fd->inode->gfid),
+ key);
+ GF_FREE (value);
+ goto out;
+ }
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+static int gf_posix_xattr_enotsup_log;
+
+static int
+_posix_get_marker_all_contributions (posix_xattr_filler_t *filler)
+{
+ ssize_t size = -1, remaining_size = -1, list_offset = 0;
+ int ret = -1;
+ char *list = NULL, key[4096] = {0, };
+
+ if (filler->real_path)
+ size = sys_llistxattr (filler->real_path, NULL, 0);
+ else
+ size = sys_flistxattr (filler->fdnum, NULL, 0);
+ if (size == -1) {
+ if ((errno == ENOTSUP) || (errno == ENOSYS)) {
+ GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log,
+ THIS->name, GF_LOG_WARNING,
+ "Extended attributes not "
+ "supported (try remounting brick"
+ " with 'user_xattr' flag)");
+ } else {
+ if (filler->real_path)
+ gf_msg (THIS->name, GF_LOG_WARNING, errno,
+ P_MSG_XATTR_FAILED,
+ "listxattr failed on %s",
+ filler->real_path);
+ else
+ gf_msg (THIS->name, GF_LOG_WARNING, errno,
+ P_MSG_XATTR_FAILED,
+ "listxattr failed on %s",
+ uuid_utoa (filler->fd->inode->gfid));
+ }
+ goto out;
+ }
+
+ if (size == 0) {
+ ret = 0;
+ goto out;
+ }
+
+ list = alloca (size);
+ if (!list) {
+ goto out;
+ }
+
+ if (filler->real_path)
+ size = sys_llistxattr (filler->real_path, list, size);
+ else
+ size = sys_flistxattr (filler->fdnum, list, size);
+ if (size <= 0) {
+ ret = size;
+ goto out;
+ }
+
+ remaining_size = size;
+ list_offset = 0;
+
+ while (remaining_size > 0) {
+ strcpy (key, list + list_offset);
+ if (fnmatch (marker_contri_key, key, 0) == 0) {
+ ret = _posix_xattr_get_set_from_backend (filler, key);
+ }
+
+ remaining_size -= strlen (key) + 1;
+ list_offset += strlen (key) + 1;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static int
+_posix_get_marker_quota_contributions (posix_xattr_filler_t *filler, char *key)
+{
+ char *saveptr = NULL, *token = NULL, *tmp_key = NULL;
+ char *ptr = NULL;
+ int i = 0, ret = 0;
+
+ tmp_key = ptr = gf_strdup (key);
+ for (i = 0; i < 4; i++) {
+ token = strtok_r (tmp_key, ".", &saveptr);
+ tmp_key = NULL;
+ }
+
+ if (strncmp (token, "contri", strlen ("contri")) == 0) {
+ ret = _posix_get_marker_all_contributions (filler);
+ } else {
+ ret = _posix_xattr_get_set_from_backend (filler, key);
+ }
+
+ GF_FREE (ptr);
+
+ return ret;
+}
+
+static inode_t *
+_get_filler_inode (posix_xattr_filler_t *filler)
+{
+ if (filler->fd)
+ return filler->fd->inode;
+ else if (filler->loc && filler->loc->inode)
+ return filler->loc->inode;
+ else
+ return NULL;
+}
+
+static int
+_posix_filler_get_openfd_count (posix_xattr_filler_t *filler, char *key)
+{
+ inode_t *inode = NULL;
+ int ret = -1;
+
+ inode = _get_filler_inode (filler);
+ if (!inode || gf_uuid_is_null (inode->gfid))
+ goto out;
+
+ ret = dict_set_uint32 (filler->xattr, key, inode->fd_count);
+ if (ret < 0) {
+ gf_msg (filler->this->name, GF_LOG_WARNING, 0,
+ P_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value for %s", key);
+ goto out;
+ }
+out:
+ return ret;
+}
+
+static int
+_posix_xattr_get_set (dict_t *xattr_req, char *key, data_t *data,
+ void *xattrargs)
+{
+ posix_xattr_filler_t *filler = xattrargs;
+ int ret = -1;
+ char *databuf = NULL;
+ int _fd = -1;
+ loc_t *loc = NULL;
+ ssize_t req_size = 0;
+
+
+ if (posix_xattr_ignorable (key))
+ goto out;
+ /* should size be put into the data_t ? */
+ if (!strcmp (key, GF_CONTENT_KEY)
+ && IA_ISREG (filler->stbuf->ia_type)) {
+ if (!filler->real_path)
+ goto out;
+
+ /* file content request */
+ req_size = data_to_uint64 (data);
+ if (req_size >= filler->stbuf->ia_size) {
+ _fd = open (filler->real_path, O_RDONLY);
+ if (_fd == -1) {
+ gf_msg (filler->this->name, GF_LOG_ERROR, errno,
+ P_MSG_XDATA_GETXATTR,
+ "Opening file %s failed",
+ filler->real_path);
+ goto err;
+ }
+
+ /*
+ * There could be a situation where the ia_size is
+ * zero. GF_CALLOC will return a pointer to the
+ * memory initialized by gf_mem_set_acct_info.
+ * This function adds a header and a footer to
+ * the allocated memory. The returned pointer
+ * points to the memory just after the header, but
+ * when size is zero, there is no space for user
+ * data. The memory can be freed by calling GF_FREE.
+ */
+ databuf = GF_CALLOC (1, filler->stbuf->ia_size,
+ gf_posix_mt_char);
+ if (!databuf) {
+ goto err;
+ }
+
+ ret = sys_read (_fd, databuf, filler->stbuf->ia_size);
+ if (ret == -1) {
+ gf_msg (filler->this->name, GF_LOG_ERROR, errno,
+ P_MSG_XDATA_GETXATTR,
+ "Read on file %s failed",
+ filler->real_path);
+ goto err;
+ }
+
+ ret = sys_close (_fd);
+ _fd = -1;
+ if (ret == -1) {
+ gf_msg (filler->this->name, GF_LOG_ERROR, errno,
+ P_MSG_XDATA_GETXATTR,
+ "Close on file %s failed",
+ filler->real_path);
+ goto err;
+ }
+
+ ret = dict_set_bin (filler->xattr, key,
+ databuf, filler->stbuf->ia_size);
+ if (ret < 0) {
+ gf_msg (filler->this->name, GF_LOG_ERROR, 0,
+ P_MSG_XDATA_GETXATTR,
+ "failed to set dict value. key: %s,"
+ "path: %s",
+ key, filler->real_path);
+ goto err;
+ }
+
+ /* To avoid double free in cleanup below */
+ databuf = NULL;
+ err:
+ if (_fd != -1)
+ sys_close (_fd);
+ GF_FREE (databuf);
+ }
+ } else if (!strcmp (key, GLUSTERFS_OPEN_FD_COUNT)) {
+ ret = _posix_filler_get_openfd_count (filler, key);
+ loc = filler->loc;
+ if (loc) {
+ ret = dict_set_uint32 (filler->xattr, key,
+ loc->inode->fd_count);
+ if (ret < 0)
+ gf_msg (filler->this->name, GF_LOG_WARNING, 0,
+ P_MSG_XDATA_GETXATTR,
+ "Failed to set dictionary value for %s",
+ key);
+ }
+ } else if (!strcmp (key, GET_ANCESTRY_PATH_KEY)) {
+ /* As of now, the only consumers of POSIX_ANCESTRY_PATH attempt
+ * fetching it via path-based fops. Hence, leaving it as it is
+ * for now.
+ */
+ if (!filler->real_path)
+ goto out;
+ char *path = NULL;
+ ret = posix_get_ancestry (filler->this, filler->loc->inode,
+ NULL, &path, POSIX_ANCESTRY_PATH,
+ &filler->op_errno, xattr_req);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_set_dynstr (filler->xattr, GET_ANCESTRY_PATH_KEY,
+ path);
+ if (ret < 0) {
+ GF_FREE (path);
+ goto out;
+ }
+
+ } else if (fnmatch (marker_contri_key, key, 0) == 0) {
+ ret = _posix_get_marker_quota_contributions (filler, key);
+ } else if (strcmp(key, GF_REQUEST_LINK_COUNT_XDATA) == 0) {
+ ret = dict_set (filler->xattr,
+ GF_REQUEST_LINK_COUNT_XDATA, data);
+ } else {
+ ret = _posix_xattr_get_set_from_backend (filler, key);
+ }
+out:
+ return 0;
+}
+
+
+int
+posix_fill_gfid_path (xlator_t *this, const char *path, struct iatt *iatt)
+{
+ int ret = 0;
+ ssize_t size = 0;
+
+ if (!iatt)
+ return 0;
+
+ size = sys_lgetxattr (path, GFID_XATTR_KEY, iatt->ia_gfid, 16);
+ /* Return value of getxattr */
+ if ((size == 16) || (size == -1))
+ ret = 0;
+ else
+ ret = size;
+
+ return ret;
+}
+
+
+int
+posix_fill_gfid_fd (xlator_t *this, int fd, struct iatt *iatt)
+{
+ int ret = 0;
+ ssize_t size = 0;
+
+ if (!iatt)
+ return 0;
+
+ size = sys_fgetxattr (fd, GFID_XATTR_KEY, iatt->ia_gfid, 16);
+ /* Return value of getxattr */
+ if ((size == 16) || (size == -1))
+ ret = 0;
+ else
+ ret = size;
+
+ return ret;
+}
+
+void
+posix_fill_ino_from_gfid (xlator_t *this, struct iatt *buf)
+{
+ /* consider least significant 8 bytes of value out of gfid */
+ if (gf_uuid_is_null (buf->ia_gfid)) {
+ buf->ia_ino = -1;
+ goto out;
+ }
+ buf->ia_ino = gfid_to_ino (buf->ia_gfid);
+out:
+ return;
+}
+
+int
+posix_fdstat (xlator_t *this, int fd, struct iatt *stbuf_p)
+{
+ int ret = 0;
+ struct stat fstatbuf = {0, };
+ struct iatt stbuf = {0, };
+
+ ret = sys_fstat (fd, &fstatbuf);
+ if (ret == -1)
+ goto out;
+
+ if (fstatbuf.st_nlink && !S_ISDIR (fstatbuf.st_mode))
+ fstatbuf.st_nlink--;
+
+ iatt_from_stat (&stbuf, &fstatbuf);
+
+ ret = posix_fill_gfid_fd (this, fd, &stbuf);
+
+ posix_fill_ino_from_gfid (this, &stbuf);
+
+ if (stbuf_p)
+ *stbuf_p = stbuf;
+
+out:
+ return ret;
+}
+
+
+int
+posix_istat (xlator_t *this, uuid_t gfid, const char *basename,
+ struct iatt *buf_p)
+{
+ char *real_path = NULL;
+ struct stat lstatbuf = {0, };
+ struct iatt stbuf = {0, };
+ int ret = 0;
+ struct posix_private *priv = NULL;
+
+ priv = this->private;
+
+ MAKE_HANDLE_PATH (real_path, this, gfid, basename);
+ if (!real_path) {
+ gf_msg (this->name, GF_LOG_ERROR, ESTALE,
+ P_MSG_HANDLE_PATH_CREATE,
+ "Failed to create handle path for %s/%s",
+ uuid_utoa (gfid), basename ? basename : "");
+ errno = ESTALE;
+ ret = -1;
+ goto out;
+ }
+
+ ret = sys_lstat (real_path, &lstatbuf);
+
+ if (ret != 0) {
+ if (ret == -1) {
+ if (errno != ENOENT && errno != ELOOP)
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ P_MSG_LSTAT_FAILED,
+ "lstat failed on %s",
+ real_path);
+ } else {
+ // may be some backend filesystem issue
+ gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_LSTAT_FAILED,
+ "lstat failed on %s and return value is %d "
+ "instead of -1. Please see dmesg output to "
+ "check whether the failure is due to backend "
+ "filesystem issue", real_path, ret);
+ ret = -1;
+ }
+ goto out;
+ }
+
+ if ((lstatbuf.st_ino == priv->handledir.st_ino) &&
+ (lstatbuf.st_dev == priv->handledir.st_dev)) {
+ errno = ENOENT;
+ return -1;
+ }
+
+ if (!S_ISDIR (lstatbuf.st_mode))
+ lstatbuf.st_nlink --;
+
+ iatt_from_stat (&stbuf, &lstatbuf);
+
+ if (basename)
+ posix_fill_gfid_path (this, real_path, &stbuf);
+ else
+ gf_uuid_copy (stbuf.ia_gfid, gfid);
+
+ posix_fill_ino_from_gfid (this, &stbuf);
+
+ if (buf_p)
+ *buf_p = stbuf;
+out:
+ return ret;
+}
+
+
+
+int
+posix_pstat (xlator_t *this, uuid_t gfid, const char *path,
+ struct iatt *buf_p)
+{
+ struct stat lstatbuf = {0, };
+ struct iatt stbuf = {0, };
+ int ret = 0;
+ struct posix_private *priv = NULL;
+
+
+ priv = this->private;
+
+ ret = sys_lstat (path, &lstatbuf);
+
+ if (ret != 0) {
+ if (ret == -1) {
+ if (errno != ENOENT)
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ P_MSG_LSTAT_FAILED,
+ "lstat failed on %s",
+ path);
+ } else {
+ // may be some backend filesytem issue
+ gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_LSTAT_FAILED,
+ "lstat failed on %s and return value is %d "
+ "instead of -1. Please see dmesg output to "
+ "check whether the failure is due to backend "
+ "filesystem issue", path, ret);
+ ret = -1;
+ }
+ goto out;
+ }
+
+ if ((lstatbuf.st_ino == priv->handledir.st_ino) &&
+ (lstatbuf.st_dev == priv->handledir.st_dev)) {
+ errno = ENOENT;
+ return -1;
+ }
+
+ if (!S_ISDIR (lstatbuf.st_mode))
+ lstatbuf.st_nlink --;
+
+ iatt_from_stat (&stbuf, &lstatbuf);
+
+ if (gfid && !gf_uuid_is_null (gfid))
+ gf_uuid_copy (stbuf.ia_gfid, gfid);
+ else
+ posix_fill_gfid_path (this, path, &stbuf);
+
+ posix_fill_ino_from_gfid (this, &stbuf);
+
+ if (buf_p)
+ *buf_p = stbuf;
+out:
+ return ret;
+}
+
+static void
+_handle_list_xattr (dict_t *xattr_req, const char *real_path, int fdnum,
+ posix_xattr_filler_t *filler)
+{
+ int ret = -1;
+ ssize_t size = 0;
+ char *list = NULL;
+ int32_t list_offset = 0;
+ ssize_t remaining_size = 0;
+ char *key = NULL;
+
+ if ((!real_path) && (fdnum < 0))
+ goto out;
+
+ if (real_path)
+ size = sys_llistxattr (real_path, NULL, 0);
+ else
+ size = sys_flistxattr (fdnum, NULL, 0);
+
+ if (size <= 0)
+ goto out;
+
+ list = alloca (size);
+ if (!list)
+ goto out;
+
+ if (real_path)
+ remaining_size = sys_llistxattr (real_path, list, size);
+ else
+ remaining_size = sys_flistxattr (fdnum, list, size);
+
+ if (remaining_size <= 0)
+ goto out;
+
+ list_offset = 0;
+ while (remaining_size > 0) {
+ key = list + list_offset;
+
+ if (_is_in_array (list_xattr_ignore_xattrs, key))
+ goto next;
+
+ if (posix_special_xattr (marker_xattrs, key))
+ goto next;
+
+ if (!fnmatch (GF_XATTR_STIME_PATTERN, key, 0))
+ goto next;
+
+ if (dict_get (filler->xattr, key))
+ goto next;
+
+ ret = _posix_xattr_get_set_from_backend (filler, key);
+next:
+ remaining_size -= strlen (key) + 1;
+ list_offset += strlen (key) + 1;
+
+ } /* while (remaining_size > 0) */
+out:
+ return;
+}
+
+dict_t *
+posix_xattr_fill (xlator_t *this, const char *real_path, loc_t *loc, fd_t *fd,
+ int fdnum, dict_t *xattr_req, struct iatt *buf)
+{
+ dict_t *xattr = NULL;
+ posix_xattr_filler_t filler = {0, };
+ gf_boolean_t list = _gf_false;
+
+ if (dict_get (xattr_req, "list-xattr")) {
+ dict_del (xattr_req, "list-xattr");
+ list = _gf_true;
+ }
+
+ xattr = dict_new ();
+ if (!xattr) {
+ goto out;
+ }
+
+ filler.this = this;
+ filler.real_path = real_path;
+ filler.xattr = xattr;
+ filler.stbuf = buf;
+ filler.loc = loc;
+ filler.fd = fd;
+ filler.fdnum = fdnum;
+
+ dict_foreach (xattr_req, _posix_xattr_get_set, &filler);
+ if (list)
+ _handle_list_xattr (xattr_req, real_path, fdnum, &filler);
+
+out:
+ return xattr;
+}
+
+void
+posix_gfid_unset (xlator_t *this, dict_t *xdata)
+{
+ uuid_t uuid = {0, };
+ int ret = 0;
+
+ if (xdata == NULL)
+ goto out;
+
+ ret = dict_get_ptr (xdata, "gfid-req", (void **)&uuid);
+ if (ret) {
+ goto out;
+ }
+
+ posix_handle_unset (this, uuid, NULL);
+out:
+ return;
+}
+
+int
+posix_gfid_set (xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req)
+{
+ void *uuid_req = NULL;
+ uuid_t uuid_curr;
+ int ret = 0;
+ ssize_t size = 0;
+ struct stat stat = {0, };
+
+
+ if (!xattr_req)
+ goto out;
+
+ if (sys_lstat (path, &stat) != 0)
+ goto out;
+
+ size = sys_lgetxattr (path, GFID_XATTR_KEY, uuid_curr, 16);
+ if (size == 16) {
+ ret = 0;
+ goto verify_handle;
+ }
+
+ ret = dict_get_ptr (xattr_req, "gfid-req", &uuid_req);
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "failed to get the gfid from dict for %s",
+ loc->path);
+ goto out;
+ }
+
+ ret = sys_lsetxattr (path, GFID_XATTR_KEY, uuid_req, 16, XATTR_CREATE);
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_WARNING, errno, P_MSG_GFID_FAILED,
+ "setting GFID on %s failed ", path);
+ goto out;
+ }
+ gf_uuid_copy (uuid_curr, uuid_req);
+
+verify_handle:
+ if (!S_ISDIR (stat.st_mode))
+ ret = posix_handle_hard (this, path, uuid_curr, &stat);
+ else
+ ret = posix_handle_soft (this, path, loc, uuid_curr, &stat);
+
+out:
+ return ret;
+}
+
+
+int
+posix_set_file_contents (xlator_t *this, const char *path, char *keyp,
+ data_t *value, int flags)
+{
+ char * key = NULL;
+ char real_path[PATH_MAX];
+ int32_t file_fd = -1;
+ int op_ret = 0;
+ int ret = -1;
+
+
+ /* XXX: does not handle assigning GFID to created files */
+ return -1;
+
+ key = &(keyp[15]);
+ sprintf (real_path, "%s/%s", path, key);
+
+ if (flags & XATTR_REPLACE) {
+ /* if file exists, replace it
+ * else, error out */
+ file_fd = open (real_path, O_TRUNC|O_WRONLY);
+
+ if (file_fd == -1) {
+ goto create;
+ }
+
+ if (value->len) {
+ ret = sys_write (file_fd, value->data, value->len);
+ if (ret == -1) {
+ op_ret = -errno;
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_SET_FILE_CONTENTS, "write failed"
+ "while doing setxattr for key %s on"
+ "path%s", key, real_path);
+ goto out;
+ }
+
+ ret = sys_close (file_fd);
+ if (ret == -1) {
+ op_ret = -errno;
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_SET_FILE_CONTENTS,
+ "close failed on %s",
+ real_path);
+ goto out;
+ }
+ }
+
+ create: /* we know file doesn't exist, create it */
+
+ file_fd = open (real_path, O_CREAT|O_WRONLY, 0644);
+
+ if (file_fd == -1) {
+ op_ret = -errno;
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_SET_FILE_CONTENTS, "failed to open file"
+ "%s with O_CREAT", key);
+ goto out;
+ }
+
+ ret = sys_write (file_fd, value->data, value->len);
+ if (ret == -1) {
+ op_ret = -errno;
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_SET_FILE_CONTENTS, "write failed on %s"
+ "while setxattr with key %s", real_path, key);
+ goto out;
+ }
+
+ ret = sys_close (file_fd);
+ if (ret == -1) {
+ op_ret = -errno;
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_SET_FILE_CONTENTS, "close failed on"
+ " %s while setxattr with key %s",
+ real_path, key);
+ goto out;
+ }
+ }
+
+out:
+ return op_ret;
+}
+
+
+int
+posix_get_file_contents (xlator_t *this, uuid_t pargfid,
+ const char *name, char **contents)
+{
+ char *real_path = NULL;
+ int32_t file_fd = -1;
+ struct iatt stbuf = {0,};
+ int op_ret = 0;
+ int ret = -1;
+
+
+ MAKE_HANDLE_PATH (real_path, this, pargfid, name);
+ if (!real_path) {
+ op_ret = -ESTALE;
+ gf_msg (this->name, GF_LOG_ERROR, ESTALE,
+ P_MSG_XDATA_GETXATTR,
+ "Failed to create handle path for %s/%s",
+ uuid_utoa (pargfid), name);
+ goto out;
+ }
+
+ op_ret = posix_istat (this, pargfid, name, &stbuf);
+ if (op_ret == -1) {
+ op_ret = -errno;
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_XDATA_GETXATTR,
+ "lstat failed on %s", real_path);
+ goto out;
+ }
+
+ file_fd = open (real_path, O_RDONLY);
+
+ if (file_fd == -1) {
+ op_ret = -errno;
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_XDATA_GETXATTR,
+ "open failed on %s", real_path);
+ goto out;
+ }
+
+ *contents = GF_CALLOC (stbuf.ia_size + 1, sizeof(char),
+ gf_posix_mt_char);
+ if (! *contents) {
+ op_ret = -errno;
+ goto out;
+ }
+
+ ret = sys_read (file_fd, *contents, stbuf.ia_size);
+ if (ret <= 0) {
+ op_ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_XDATA_GETXATTR,
+ "read on %s failed", real_path);
+ goto out;
+ }
+
+ *contents[stbuf.ia_size] = '\0';
+
+ op_ret = sys_close (file_fd);
+ file_fd = -1;
+ if (op_ret == -1) {
+ op_ret = -errno;
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_XDATA_GETXATTR,
+ "close on %s failed", real_path);
+ goto out;
+ }
+
+out:
+ if (op_ret < 0) {
+ GF_FREE (*contents);
+ if (file_fd != -1)
+ sys_close (file_fd);
+ }
+
+ return op_ret;
+}
+
+#ifdef HAVE_SYS_ACL_H
+int
+posix_pacl_set (const char *path, const char *key, const char *acl_s)
+{
+ int ret = -1;
+ acl_t acl = NULL;
+ acl_type_t type = 0;
+
+ type = gf_posix_acl_get_type (key);
+
+ acl = acl_from_text (acl_s);
+ ret = acl_set_file (path, type, acl);
+ if (ret)
+ /* posix_handle_pair expects ret to be the errno */
+ ret = -errno;
+
+ acl_free (acl);
+
+ return ret;
+}
+
+int
+posix_pacl_get (const char *path, const char *key, char **acl_s)
+{
+ int ret = -1;
+ acl_t acl = NULL;
+ acl_type_t type = 0;
+ char *acl_tmp = NULL;
+
+ type = gf_posix_acl_get_type (key);
+ if (!type)
+ return -1;
+
+ acl = acl_get_file (path, type);
+ if (!acl)
+ return -1;
+
+#ifdef HAVE_ACL_LIBACL_H
+ acl_tmp = acl_to_any_text (acl, NULL, ',',
+ TEXT_ABBREVIATE | TEXT_NUMERIC_IDS);
+#else /* FreeBSD and the like */
+ acl_tmp = acl_to_text_np (acl, NULL, ACL_TEXT_NUMERIC_IDS);
+#endif
+ if (!acl_tmp)
+ goto free_acl;
+
+ *acl_s = gf_strdup (acl_tmp);
+ if (*acl_s)
+ ret = 0;
+
+ acl_free (acl_tmp);
+free_acl:
+ acl_free (acl);
+
+ return ret;
+}
+#else /* !HAVE_SYS_ACL_H (NetBSD) */
+int
+posix_pacl_set (const char *path, const char *key, const char *acl_s)
+{
+ errno = ENOTSUP;
+ return -1;
+}
+
+int
+posix_pacl_get (const char *path, const char *key, char **acl_s)
+{
+ errno = ENOTSUP;
+ return -1;
+}
+#endif
+
+
+#ifdef GF_DARWIN_HOST_OS
+static
+void posix_dump_buffer (xlator_t *this, const char *real_path, const char *key,
+ data_t *value, int flags)
+{
+ char buffer[3*value->len+1];
+ int index = 0;
+ buffer[0] = 0;
+ gf_loglevel_t log_level = gf_log_get_loglevel ();
+ if (log_level == GF_LOG_TRACE) {
+ char *data = (char *) value->data;
+ for (index = 0; index < value->len; index++)
+ sprintf(buffer+3*index, " %02x", data[index]);
+ }
+ gf_msg_debug (this->name, 0,
+ "Dump %s: key:%s flags: %u length:%u data:%s ",
+ real_path, key, flags, value->len,
+ (log_level == GF_LOG_TRACE ? buffer : "<skipped in DEBUG>"));
+}
+#endif
+
+int
+posix_handle_pair (xlator_t *this, const char *real_path,
+ char *key, data_t *value, int flags, struct iatt *stbuf)
+{
+ int sys_ret = -1;
+ int ret = 0;
+
+ if (XATTR_IS_PATHINFO (key)) {
+ ret = -EACCES;
+ goto out;
+ } else if (ZR_FILE_CONTENT_REQUEST(key)) {
+ ret = posix_set_file_contents (this, real_path, key, value,
+ flags);
+ } else if (GF_POSIX_ACL_REQUEST (key)) {
+ if (stbuf && IS_DHT_LINKFILE_MODE (stbuf))
+ goto out;
+ ret = posix_pacl_set (real_path, key, value->data);
+ } else if (!strncmp(key, POSIX_ACL_ACCESS_XATTR, strlen(key))
+ && stbuf && IS_DHT_LINKFILE_MODE (stbuf)) {
+ goto out;
+ } else {
+ sys_ret = sys_lsetxattr (real_path, key, value->data,
+ value->len, flags);
+#ifdef GF_DARWIN_HOST_OS
+ posix_dump_buffer(this, real_path, key, value, flags);
+#endif
+ if (sys_ret < 0) {
+ ret = -errno;
+ if (errno == ENOENT) {
+ if (!posix_special_xattr (marker_xattrs,
+ key)) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_XATTR_FAILED,
+ "setxattr on %s failed",
+ real_path);
+ }
+ } else {
+
+#ifdef GF_DARWIN_HOST_OS
+ if (errno == EINVAL) {
+ gf_msg_debug (this->name, 0, "%s: key:"
+ "%s flags: %u length:%d "
+ "error:%s", real_path,
+ key, flags, value->len,
+ strerror (errno));
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR,
+ errno, P_MSG_XATTR_FAILED,
+ "%s: key:%s flags: "
+ "%u length:%d",
+ real_path, key, flags,
+ value->len);
+
+#else /* ! DARWIN */
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_XATTR_FAILED, "%s: key:%s"
+ "flags: %u length:%d", real_path,
+ key, flags, value->len);
+#endif /* DARWIN */
+ }
+
+ goto out;
+ }
+ }
+out:
+ return ret;
+}
+
+int
+posix_fhandle_pair (xlator_t *this, int fd,
+ char *key, data_t *value, int flags, struct iatt *stbuf)
+{
+ int sys_ret = -1;
+ int ret = 0;
+
+ if (XATTR_IS_PATHINFO (key)) {
+ ret = -EACCES;
+ goto out;
+ } else if (!strncmp(key, POSIX_ACL_ACCESS_XATTR, strlen(key))
+ && stbuf && IS_DHT_LINKFILE_MODE (stbuf)) {
+ goto out;
+ }
+
+ sys_ret = sys_fsetxattr (fd, key, value->data,
+ value->len, flags);
+
+ if (sys_ret < 0) {
+ ret = -errno;
+ if (errno == ENOENT) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_XATTR_FAILED, "fsetxattr on fd=%d"
+ " failed", fd);
+ } else {
+
+#ifdef GF_DARWIN_HOST_OS
+ if (errno == EINVAL) {
+ gf_msg_debug (this->name, 0, "fd=%d: key:%s "
+ "error:%s", fd, key,
+ strerror (errno));
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_XATTR_FAILED, "fd=%d: key:%s",
+ fd, key);
+ }
+
+#else /* ! DARWIN */
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_XATTR_FAILED, "fd=%d: key:%s",
+ fd, key);
+#endif /* DARWIN */
+ }
+
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+static void
+del_stale_dir_handle (xlator_t *this, uuid_t gfid)
+{
+ char newpath[PATH_MAX] = {0, };
+ uuid_t gfid_curr = {0, };
+ ssize_t size = -1;
+ gf_boolean_t stale = _gf_false;
+ char *hpath = NULL;
+ struct stat stbuf = {0, };
+ struct iatt iabuf = {0, };
+
+ MAKE_HANDLE_GFID_PATH (hpath, this, gfid, NULL);
+
+ /* check that it is valid directory handle */
+ size = sys_lstat (hpath, &stbuf);
+ if (size < 0) {
+ gf_msg_debug (this->name, 0, "%s: Handle stat failed: "
+ "%s", hpath, strerror (errno));
+ goto out;
+ }
+
+ iatt_from_stat (&iabuf, &stbuf);
+ if (iabuf.ia_nlink != 1 || !IA_ISLNK (iabuf.ia_type)) {
+ gf_msg_debug (this->name, 0, "%s: Handle nlink %d %d",
+ hpath, iabuf.ia_nlink, IA_ISLNK (iabuf.ia_type));
+ goto out;
+ }
+
+ size = posix_handle_path (this, gfid, NULL, newpath, sizeof (newpath));
+ if (size <= 0) {
+ if (errno == ENOENT) {
+ gf_msg_debug (this->name, 0, "%s: %s", newpath,
+ strerror (ENOENT));
+ stale = _gf_true;
+ }
+ goto out;
+ }
+
+ size = sys_lgetxattr (newpath, GFID_XATTR_KEY, gfid_curr, 16);
+ if (size < 0 && errno == ENOENT) {
+ gf_msg_debug (this->name, 0, "%s: %s", newpath,
+ strerror (ENOENT));
+ stale = _gf_true;
+ } else if (size == 16 && gf_uuid_compare (gfid, gfid_curr)) {
+ gf_msg_debug (this->name, 0, "%s: mismatching gfid: %s, "
+ "at %s", hpath, uuid_utoa (gfid_curr), newpath);
+ stale = _gf_true;
+ }
+
+out:
+ if (stale) {
+ size = sys_unlink (hpath);
+ if (size < 0 && errno != ENOENT)
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_STALE_HANDLE_REMOVE_FAILED, "%s: Failed"
+ "to remove handle to %s", hpath, newpath);
+ } else if (size == 16) {
+ gf_msg_debug (this->name, 0, "%s: Fresh handle for "
+ "%s with gfid %s", hpath, newpath,
+ uuid_utoa (gfid_curr));
+ }
+ return;
+}
+
+static int
+janitor_walker (const char *fpath, const struct stat *sb,
+ int typeflag, struct FTW *ftwbuf)
+{
+ struct iatt stbuf = {0, };
+ xlator_t *this = NULL;
+
+ this = THIS;
+ posix_pstat (this, NULL, fpath, &stbuf);
+ switch (sb->st_mode & S_IFMT) {
+ case S_IFREG:
+ case S_IFBLK:
+ case S_IFLNK:
+ case S_IFCHR:
+ case S_IFIFO:
+ case S_IFSOCK:
+ gf_msg_trace (THIS->name, 0,
+ "unlinking %s", fpath);
+ sys_unlink (fpath);
+ if (stbuf.ia_nlink == 1)
+ posix_handle_unset (this, stbuf.ia_gfid, NULL);
+ break;
+
+ case S_IFDIR:
+ if (ftwbuf->level) { /* don't remove top level dir */
+ gf_msg_debug (THIS->name, 0,
+ "removing directory %s", fpath);
+
+ sys_rmdir (fpath);
+ del_stale_dir_handle (this, stbuf.ia_gfid);
+ }
+ break;
+ }
+
+ return 0; /* 0 = FTW_CONTINUE */
+}
+
+
+static struct posix_fd *
+janitor_get_next_fd (xlator_t *this)
+{
+ struct posix_private *priv = NULL;
+ struct posix_fd *pfd = NULL;
+
+ struct timespec timeout;
+
+ priv = this->private;
+
+ pthread_mutex_lock (&priv->janitor_lock);
+ {
+ if (list_empty (&priv->janitor_fds)) {
+ time (&timeout.tv_sec);
+ timeout.tv_sec += priv->janitor_sleep_duration;
+ timeout.tv_nsec = 0;
+
+ pthread_cond_timedwait (&priv->janitor_cond,
+ &priv->janitor_lock,
+ &timeout);
+ goto unlock;
+ }
+
+ pfd = list_entry (priv->janitor_fds.next, struct posix_fd,
+ list);
+
+ list_del (priv->janitor_fds.next);
+ }
+unlock:
+ pthread_mutex_unlock (&priv->janitor_lock);
+
+ return pfd;
+}
+
+
+static void *
+posix_janitor_thread_proc (void *data)
+{
+ xlator_t * this = NULL;
+ struct posix_private *priv = NULL;
+ struct posix_fd *pfd;
+
+ time_t now;
+
+ this = data;
+ priv = this->private;
+
+ THIS = this;
+
+ while (1) {
+ time (&now);
+ if ((now - priv->last_landfill_check) > priv->janitor_sleep_duration) {
+ gf_msg_trace (this->name, 0,
+ "janitor cleaning out %s",
+ priv->trash_path);
+
+ nftw (priv->trash_path,
+ janitor_walker,
+ 32,
+ FTW_DEPTH | FTW_PHYS);
+
+ priv->last_landfill_check = now;
+ }
+
+ pfd = janitor_get_next_fd (this);
+ if (pfd) {
+ if (pfd->dir == NULL) {
+ gf_msg_trace (this->name, 0,
+ "janitor: closing file fd=%d", pfd->fd);
+ sys_close (pfd->fd);
+ } else {
+ gf_msg_debug (this->name, 0, "janitor: closing"
+ " dir fd=%p", pfd->dir);
+ sys_closedir (pfd->dir);
+ }
+
+ GF_FREE (pfd);
+ }
+ }
+
+ return NULL;
+}
+
+
+void
+posix_spawn_janitor_thread (xlator_t *this)
+{
+ struct posix_private *priv = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ LOCK (&priv->lock);
+ {
+ if (!priv->janitor_present) {
+ ret = gf_thread_create (&priv->janitor, NULL,
+ posix_janitor_thread_proc, this);
+
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_THREAD_FAILED, "spawning janitor "
+ "thread failed");
+ goto unlock;
+ }
+
+ priv->janitor_present = _gf_true;
+ }
+ }
+unlock:
+ UNLOCK (&priv->lock);
+}
+
+static int
+is_fresh_file (struct stat *stat)
+{
+ struct timeval tv;
+
+ gettimeofday (&tv, NULL);
+
+ if ((stat->st_ctime >= (tv.tv_sec - 1))
+ && (stat->st_ctime <= tv.tv_sec))
+ return 1;
+
+ return 0;
+}
+
+
+int
+posix_gfid_heal (xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req)
+{
+ /* The purpose of this function is to prevent a race
+ where an inode creation FOP (like mkdir/mknod/create etc)
+ races with lookup in the following way:
+
+ {create thread} | {lookup thread}
+ |
+ t0
+ mkdir ("name") |
+ t1
+ | posix_gfid_set ("name", 2);
+ t2
+ posix_gfid_set ("name", 1); |
+ t3
+ lstat ("name"); | lstat ("name");
+
+ In the above case mkdir FOP would have resulted with GFID 2 while
+ it should have been GFID 1. It matters in the case where GFID would
+ have gotten set to 1 on other subvolumes of replciate/distribute
+
+ The "solution" here is that, if we detect lookup is attempting to
+ set a GFID on a file which is created very recently, but does not
+ yet have a GFID (i.e, between t1 and t2), then "fake" it as though
+ posix_gfid_heal was called at t0 instead.
+ */
+
+ uuid_t uuid_curr;
+ int ret = 0;
+ struct stat stat = {0, };
+
+ if (!xattr_req)
+ goto out;
+
+ if (sys_lstat (path, &stat) != 0)
+ goto out;
+
+ ret = sys_lgetxattr (path, GFID_XATTR_KEY, uuid_curr, 16);
+ if (ret != 16) {
+ if (is_fresh_file (&stat)) {
+ ret = -1;
+ errno = ENOENT;
+ goto out;
+ }
+ }
+
+ ret = posix_gfid_set (this, path, loc, xattr_req);
+out:
+ return ret;
+}
+
+
+int
+posix_acl_xattr_set (xlator_t *this, const char *path, dict_t *xattr_req)
+{
+ int ret = 0;
+ data_t *data = NULL;
+ struct stat stat = {0, };
+
+ if (!xattr_req)
+ goto out;
+
+ if (sys_lstat (path, &stat) != 0)
+ goto out;
+
+ data = dict_get (xattr_req, POSIX_ACL_ACCESS_XATTR);
+ if (data) {
+ ret = sys_lsetxattr (path, POSIX_ACL_ACCESS_XATTR,
+ data->data, data->len, 0);
+#ifdef __FreeBSD__
+ if (ret != -1) {
+ ret = 0;
+ }
+#endif /* __FreeBSD__ */
+ if (ret != 0)
+ goto out;
+ }
+
+ data = dict_get (xattr_req, POSIX_ACL_DEFAULT_XATTR);
+ if (data) {
+ ret = sys_lsetxattr (path, POSIX_ACL_DEFAULT_XATTR,
+ data->data, data->len, 0);
+#ifdef __FreeBSD__
+ if (ret != -1) {
+ ret = 0;
+ }
+#endif /* __FreeBSD__ */
+ if (ret != 0)
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+static int
+_handle_entry_create_keyvalue_pair (dict_t *d, char *k, data_t *v,
+ void *tmp)
+{
+ int ret = -1;
+ posix_xattr_filler_t *filler = NULL;
+
+ filler = tmp;
+
+ if (!strcmp (GFID_XATTR_KEY, k) ||
+ !strcmp ("gfid-req", k) ||
+ !strcmp (POSIX_ACL_DEFAULT_XATTR, k) ||
+ !strcmp (POSIX_ACL_ACCESS_XATTR, k) ||
+ posix_xattr_ignorable (k) ||
+ ZR_FILE_CONTENT_REQUEST(k)) {
+ return 0;
+ }
+
+ ret = posix_handle_pair (filler->this, filler->real_path, k, v,
+ XATTR_CREATE, filler->stbuf);
+ if (ret < 0) {
+ errno = -ret;
+ return -1;
+ }
+ return 0;
+}
+
+int
+posix_entry_create_xattr_set (xlator_t *this, const char *path,
+ dict_t *dict)
+{
+ int ret = -1;
+
+ posix_xattr_filler_t filler = {0,};
+
+ if (!dict)
+ goto out;
+
+ filler.this = this;
+ filler.real_path = path;
+ filler.stbuf = NULL;
+
+ ret = dict_foreach (dict, _handle_entry_create_keyvalue_pair, &filler);
+
+out:
+ return ret;
+}
+
+static int
+__posix_fd_ctx_get (fd_t *fd, xlator_t *this, struct posix_fd **pfd_p,
+ int *op_errno_p)
+{
+ uint64_t tmp_pfd = 0;
+ struct posix_fd *pfd = NULL;
+ int ret = -1;
+ char *real_path = NULL;
+ char *unlink_path = NULL;
+ int _fd = -1;
+ int op_errno = 0;
+ DIR *dir = NULL;
+
+ struct posix_private *priv = NULL;
+
+ priv = this->private;
+
+ ret = __fd_ctx_get (fd, this, &tmp_pfd);
+ if (ret == 0) {
+ pfd = (void *)(long) tmp_pfd;
+ goto out;
+ }
+ if (!fd_is_anonymous(fd)) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ P_MSG_READ_FAILED,
+ "Failed to get fd context for a non-anonymous fd, "
+ "file: %s, gfid: %s", real_path,
+ uuid_utoa (fd->inode->gfid));
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ MAKE_HANDLE_PATH (real_path, this, fd->inode->gfid, NULL);
+ if (!real_path) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ P_MSG_READ_FAILED,
+ "Failed to create handle path (%s)",
+ uuid_utoa (fd->inode->gfid));
+ ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+ pfd = GF_CALLOC (1, sizeof (*pfd), gf_posix_mt_posix_fd);
+ if (!pfd) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ pfd->fd = -1;
+
+ if (fd->inode->ia_type == IA_IFDIR) {
+ dir = sys_opendir (real_path);
+ if (!dir) {
+ op_errno = errno;
+ GF_FREE (pfd);
+ pfd = NULL;
+ goto out;
+ }
+ _fd = dirfd (dir);
+ }
+
+ /* Using fd->flags in case we choose to have anonymous
+ * fds with different flags some day. As of today it
+ * would be GF_ANON_FD_FLAGS and nothing else.
+ */
+ if (fd->inode->ia_type == IA_IFREG) {
+ _fd = open (real_path, fd->flags);
+ if (_fd == -1) {
+ POSIX_GET_FILE_UNLINK_PATH (priv->base_path,
+ fd->inode->gfid,
+ unlink_path);
+ _fd = open (unlink_path, fd->flags);
+ }
+ if (_fd == -1) {
+ op_errno = errno;
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ P_MSG_READ_FAILED,
+ "Failed to get anonymous "
+ "real_path: %s _fd = %d", real_path, _fd);
+ GF_FREE (pfd);
+ pfd = NULL;
+ goto out;
+ }
+ }
+
+ pfd->fd = _fd;
+ pfd->dir = dir;
+ pfd->flags = fd->flags;
+
+ ret = __fd_ctx_set (fd, this, (uint64_t) (long) pfd);
+ if (ret != 0) {
+ op_errno = ENOMEM;
+ if (_fd != -1)
+ sys_close (_fd);
+ if (dir)
+ sys_closedir (dir);
+ GF_FREE (pfd);
+ pfd = NULL;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret < 0 && op_errno_p)
+ *op_errno_p = op_errno;
+
+ if (pfd_p)
+ *pfd_p = pfd;
+ return ret;
+}
+
+
+int
+posix_fd_ctx_get (fd_t *fd, xlator_t *this, struct posix_fd **pfd,
+ int *op_errno)
+{
+ int ret;
+
+ LOCK (&fd->inode->lock);
+ {
+ ret = __posix_fd_ctx_get (fd, this, pfd, op_errno);
+ }
+ UNLOCK (&fd->inode->lock);
+
+ return ret;
+}
+
+int
+posix_fs_health_check (xlator_t *this)
+{
+ struct posix_private *priv = NULL;
+ int ret = -1;
+ char *subvol_path = NULL;
+ char timestamp[256] = {0,};
+ int fd = -1;
+ int timelen = -1;
+ int nofbytes = 0;
+ time_t time_sec = {0,};
+ char buff[64] = {0};
+ char file_path[PATH_MAX] = {0};
+
+ GF_VALIDATE_OR_GOTO (this->name, this, out);
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO ("posix-helpers", priv, out);
+
+ subvol_path = priv->base_path;
+ snprintf (file_path, sizeof (file_path), "%s/%s/health_check",
+ subvol_path, GF_HIDDEN_PATH);
+
+ time_sec = time (NULL);
+ gf_time_fmt (timestamp, sizeof timestamp, time_sec, gf_timefmt_FT);
+ timelen = strlen (timestamp);
+
+ fd = open (file_path, O_CREAT|O_RDWR, 0644);
+ if (fd == -1) {
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ P_MSG_HEALTHCHECK_FAILED,
+ "open() on %s returned", file_path);
+ goto out;
+ }
+ nofbytes = sys_write (fd, timestamp, timelen);
+ if (nofbytes != timelen) {
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ P_MSG_HEALTHCHECK_FAILED,
+ "write() on %s returned", file_path);
+ goto out;
+ }
+ /* Seek the offset to the beginning of the file, so that the offset for
+ read is from beginning of file */
+ sys_lseek(fd, 0, SEEK_SET);
+ nofbytes = sys_read (fd, buff, timelen);
+ if (nofbytes == -1) {
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ P_MSG_HEALTHCHECK_FAILED,
+ "read() on %s returned", file_path);
+ goto out;
+ }
+ ret = 0;
+out:
+ if (fd != -1) {
+ sys_close (fd);
+ }
+ return ret;
+
+}
+
+static void *
+posix_health_check_thread_proc (void *data)
+{
+ xlator_t *this = NULL;
+ struct posix_private *priv = NULL;
+ uint32_t interval = 0;
+ int ret = -1;
+
+ this = data;
+ priv = this->private;
+
+ /* prevent races when the interval is updated */
+ interval = priv->health_check_interval;
+ if (interval == 0)
+ goto out;
+
+ gf_msg_debug (this->name, 0, "health-check thread started, "
+ "interval = %d seconds", interval);
+
+ while (1) {
+ /* aborting sleep() is a request to exit this thread, sleep()
+ * will normally not return when cancelled */
+ ret = sleep (interval);
+ if (ret > 0)
+ break;
+
+ /* prevent thread errors while doing the health-check(s) */
+ pthread_setcancelstate (PTHREAD_CANCEL_DISABLE, NULL);
+
+ /* Do the health-check.*/
+ ret = posix_fs_health_check (this);
+
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ P_MSG_HEALTHCHECK_FAILED,
+ "health_check on %s returned",
+ priv->base_path);
+ goto abort;
+ }
+
+ pthread_setcancelstate (PTHREAD_CANCEL_ENABLE, NULL);
+ }
+
+out:
+ gf_msg_debug (this->name, 0, "health-check thread exiting");
+
+ LOCK (&priv->lock);
+ {
+ priv->health_check_active = _gf_false;
+ }
+ UNLOCK (&priv->lock);
+
+ return NULL;
+
+abort:
+ /* health-check failed */
+ gf_msg (this->name, GF_LOG_EMERG, 0, P_MSG_HEALTHCHECK_FAILED,
+ "health-check failed, going down");
+ xlator_notify (this->parents->xlator, GF_EVENT_CHILD_DOWN, this);
+
+ ret = sleep (30);
+ if (ret == 0) {
+ gf_msg (this->name, GF_LOG_EMERG, 0, P_MSG_HEALTHCHECK_FAILED,
+ "still alive! -> SIGTERM");
+ kill (getpid(), SIGTERM);
+ }
+
+ ret = sleep (30);
+ if (ret == 0) {
+ gf_msg (this->name, GF_LOG_EMERG, 0, P_MSG_HEALTHCHECK_FAILED,
+ "still alive! -> SIGKILL");
+ kill (getpid(), SIGKILL);
+ }
+
+ return NULL;
+}
+
+void
+posix_spawn_health_check_thread (xlator_t *xl)
+{
+ struct posix_private *priv = NULL;
+ int ret = -1;
+
+ priv = xl->private;
+
+ LOCK (&priv->lock);
+ {
+ /* cancel the running thread */
+ if (priv->health_check_active == _gf_true) {
+ pthread_cancel (priv->health_check);
+ priv->health_check_active = _gf_false;
+ }
+
+ /* prevent scheduling a check in a tight loop */
+ if (priv->health_check_interval == 0)
+ goto unlock;
+
+ ret = gf_thread_create (&priv->health_check, NULL,
+ posix_health_check_thread_proc, xl);
+ if (ret < 0) {
+ priv->health_check_interval = 0;
+ priv->health_check_active = _gf_false;
+ gf_msg (xl->name, GF_LOG_ERROR, errno,
+ P_MSG_HEALTHCHECK_FAILED,
+ "unable to setup health-check thread");
+ goto unlock;
+ }
+
+ /* run the thread detached, resources will be freed on exit */
+ pthread_detach (priv->health_check);
+ priv->health_check_active = _gf_true;
+ }
+unlock:
+ UNLOCK (&priv->lock);
+}
+
+int
+posix_fsyncer_pick (xlator_t *this, struct list_head *head)
+{
+ struct posix_private *priv = NULL;
+ int count = 0;
+
+ priv = this->private;
+ pthread_mutex_lock (&priv->fsync_mutex);
+ {
+ while (list_empty (&priv->fsyncs))
+ pthread_cond_wait (&priv->fsync_cond,
+ &priv->fsync_mutex);
+
+ count = priv->fsync_queue_count;
+ priv->fsync_queue_count = 0;
+ list_splice_init (&priv->fsyncs, head);
+ }
+ pthread_mutex_unlock (&priv->fsync_mutex);
+
+ return count;
+}
+
+
+void
+posix_fsyncer_process (xlator_t *this, call_stub_t *stub, gf_boolean_t do_fsync)
+{
+ struct posix_fd *pfd = NULL;
+ int ret = -1;
+ struct posix_private *priv = NULL;
+ int op_errno = 0;
+
+ priv = this->private;
+
+ ret = posix_fd_ctx_get (stub->args.fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ P_MSG_GET_FDCTX_FAILED,
+ "could not get fdctx for fd(%s)",
+ uuid_utoa (stub->args.fd->inode->gfid));
+ call_unwind_error (stub, -1, op_errno);
+ return;
+ }
+
+ if (do_fsync) {
+ if (stub->args.datasync)
+ ret = sys_fdatasync (pfd->fd);
+ else
+ ret = sys_fsync (pfd->fd);
+ } else {
+ ret = 0;
+ }
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "could not fstat fd(%s)",
+ uuid_utoa (stub->args.fd->inode->gfid));
+ call_unwind_error (stub, -1, errno);
+ return;
+ }
+
+ call_unwind_error (stub, 0, 0);
+}
+
+
+static void
+posix_fsyncer_syncfs (xlator_t *this, struct list_head *head)
+{
+ call_stub_t *stub = NULL;
+ struct posix_fd *pfd = NULL;
+ int ret = -1;
+
+ stub = list_entry (head->prev, call_stub_t, list);
+ ret = posix_fd_ctx_get (stub->args.fd, this, &pfd, NULL);
+ if (ret)
+ return;
+
+#ifdef GF_LINUX_HOST_OS
+ /* syncfs() is not "declared" in RHEL's glibc even though
+ the kernel has support.
+ */
+#include <sys/syscall.h>
+#include <unistd.h>
+#ifdef SYS_syncfs
+ syscall (SYS_syncfs, pfd->fd);
+#else
+ sync();
+#endif
+#else
+ sync();
+#endif
+}
+
+
+void *
+posix_fsyncer (void *d)
+{
+ xlator_t *this = d;
+ struct posix_private *priv = NULL;
+ call_stub_t *stub = NULL;
+ call_stub_t *tmp = NULL;
+ struct list_head list;
+ int count = 0;
+ gf_boolean_t do_fsync = _gf_true;
+
+ priv = this->private;
+
+ for (;;) {
+ INIT_LIST_HEAD (&list);
+
+ count = posix_fsyncer_pick (this, &list);
+
+ usleep (priv->batch_fsync_delay_usec);
+
+ gf_msg_debug (this->name, 0,
+ "picked %d fsyncs", count);
+
+ switch (priv->batch_fsync_mode) {
+ case BATCH_NONE:
+ case BATCH_REVERSE_FSYNC:
+ break;
+ case BATCH_SYNCFS:
+ case BATCH_SYNCFS_SINGLE_FSYNC:
+ case BATCH_SYNCFS_REVERSE_FSYNC:
+ posix_fsyncer_syncfs (this, &list);
+ break;
+ }
+
+ if (priv->batch_fsync_mode == BATCH_SYNCFS)
+ do_fsync = _gf_false;
+ else
+ do_fsync = _gf_true;
+
+ list_for_each_entry_safe_reverse (stub, tmp, &list, list) {
+ list_del_init (&stub->list);
+
+ posix_fsyncer_process (this, stub, do_fsync);
+
+ if (priv->batch_fsync_mode == BATCH_SYNCFS_SINGLE_FSYNC)
+ do_fsync = _gf_false;
+ }
+ }
+}
+
+/**
+ * TODO: move fd/inode interfaces into a single routine..
+ */
+static int32_t
+posix_fetch_signature_xattr (char *real_path,
+ const char *key, dict_t *xattr, size_t *xsize)
+{
+ int32_t ret = 0;
+ char *memptr = NULL;
+ ssize_t xattrsize = 0;
+
+ xattrsize = sys_lgetxattr (real_path, key, NULL, 0);
+ if ((xattrsize == -1) && ((errno == ENOATTR) || (errno == ENODATA)))
+ return 0;
+ if (xattrsize == -1)
+ goto error_return;
+
+ memptr = GF_CALLOC (xattrsize + 1, sizeof (char), gf_posix_mt_char);
+ if (!memptr)
+ goto error_return;
+ ret = sys_lgetxattr (real_path, key, memptr, xattrsize);
+ if (ret == -1)
+ goto freemem;
+
+ ret = dict_set_dynptr (xattr, (char *)key, memptr, xattrsize);
+ if (ret)
+ goto freemem;
+
+ if (xsize)
+ *xsize = xattrsize;
+
+ return 0;
+
+ freemem:
+ GF_FREE (memptr);
+ error_return:
+ return -1;
+}
+
+static int32_t
+posix_fd_fetch_signature_xattr (int fd,
+ const char *key, dict_t *xattr, size_t *xsize)
+{
+ int32_t ret = 0;
+ char *memptr = NULL;
+ ssize_t xattrsize = 0;
+
+ xattrsize = sys_fgetxattr (fd, key, NULL, 0);
+ if ((xattrsize == -1) && ((errno == ENOATTR) || (errno == ENODATA)))
+ return 0;
+ if (xattrsize == -1)
+ goto error_return;
+
+ memptr = GF_CALLOC (xattrsize + 1, sizeof (char), gf_posix_mt_char);
+ if (!memptr)
+ goto error_return;
+ ret = sys_fgetxattr (fd, key, memptr, xattrsize);
+ if (ret == -1)
+ goto freemem;
+
+ ret = dict_set_dynptr (xattr, (char *)key, memptr, xattrsize);
+ if (ret)
+ goto freemem;
+
+ if (xsize)
+ *xsize = xattrsize;
+
+ return 0;
+
+ freemem:
+ GF_FREE (memptr);
+ error_return:
+ return -1;
+}
+
+/**
+ * Fetch on-disk ongoing version and object signature extended attribute.
+ * Be generous to absence of xattrs (just *absence*, other errors are
+ * propagated up to the invoker), higher layer (br-stub) takes care of
+ * interpreting the xattrs for anomalies.
+ */
+int32_t
+posix_get_objectsignature (char *real_path, dict_t *xattr)
+{
+ int32_t ret = 0;
+ size_t signsize = 0;
+
+ ret = posix_fetch_signature_xattr
+ (real_path, BITROT_CURRENT_VERSION_KEY, xattr, NULL);
+ if (ret)
+ goto error_return;
+
+ ret = posix_fetch_signature_xattr
+ (real_path, BITROT_SIGNING_VERSION_KEY, xattr, &signsize);
+ if (ret)
+ goto delkey1;
+
+ ret = dict_set_uint32
+ (xattr, BITROT_SIGNING_XATTR_SIZE_KEY, (uint32_t) signsize);
+ if (ret)
+ goto delkey2;
+
+ return 0;
+
+ delkey2:
+ dict_del (xattr, BITROT_SIGNING_VERSION_KEY);
+ delkey1:
+ dict_del (xattr, BITROT_CURRENT_VERSION_KEY);
+ error_return:
+ return -EINVAL;
+}
+
+int32_t
+posix_fdget_objectsignature (int fd, dict_t *xattr)
+{
+ int32_t ret = 0;
+ size_t signsize = 0;
+
+ ret = posix_fd_fetch_signature_xattr
+ (fd, BITROT_CURRENT_VERSION_KEY, xattr, NULL);
+ if (ret)
+ goto error_return;
+
+ ret = posix_fd_fetch_signature_xattr
+ (fd, BITROT_SIGNING_VERSION_KEY, xattr, &signsize);
+ if (ret)
+ goto delkey1;
+
+ ret = dict_set_uint32
+ (xattr, BITROT_SIGNING_XATTR_SIZE_KEY, (uint32_t) signsize);
+ if (ret)
+ goto delkey2;
+
+ return 0;
+
+ delkey2:
+ dict_del (xattr, BITROT_SIGNING_VERSION_KEY);
+ delkey1:
+ dict_del (xattr, BITROT_CURRENT_VERSION_KEY);
+ error_return:
+ return -EINVAL;
+}
+
+
+int
+posix_inode_ctx_get (inode_t *inode, xlator_t *this, uint64_t *ctx)
+{
+ int ret = -1;
+ uint64_t ctx_int = 0;
+
+ GF_VALIDATE_OR_GOTO (this->name, this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ ret = inode_ctx_get (inode, this, &ctx_int);
+
+ if (ret)
+ return ret;
+
+ if (ctx)
+ *ctx = ctx_int;
+
+out:
+ return ret;
+}
+
+
+int
+posix_inode_ctx_set (inode_t *inode, xlator_t *this, uint64_t ctx)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO (this->name, this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+ GF_VALIDATE_OR_GOTO (this->name, ctx, out);
+
+ ret = inode_ctx_set (inode, this, &ctx);
+out:
+ return ret;
+}
diff --git a/xlators/storage/posix/src/posix-mem-types.h b/xlators/storage/posix/src/posix-mem-types.h
index 6687560cfab..b463c086be5 100644
--- a/xlators/storage/posix/src/posix-mem-types.h
+++ b/xlators/storage/posix/src/posix-mem-types.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __POSIX_MEM_TYPES_H__
#define __POSIX_MEM_TYPES_H__
@@ -30,6 +20,8 @@ enum gf_posix_mem_types_ {
gf_posix_mt_int32_t,
gf_posix_mt_posix_dev_t,
gf_posix_mt_trash_path,
+ gf_posix_mt_paiocb,
+ gf_posix_mt_inode_ctx_t,
gf_posix_mt_end
};
#endif
diff --git a/xlators/storage/posix/src/posix-messages.h b/xlators/storage/posix/src/posix-messages.h
new file mode 100644
index 00000000000..ba6bf2c43ac
--- /dev/null
+++ b/xlators/storage/posix/src/posix-messages.h
@@ -0,0 +1,951 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _POSIX_MESSAGES_H_
+#define _POSIX_MESSAGES_H_
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glfs-message-id.h"
+
+/*! \file posix-messages.h
+ * \brief Psix log-message IDs and their descriptions
+ */
+
+/* NOTE: Rules for message additions
+ * 1) Each instance of a message is _better_ left with a unique message ID, even
+ * if the message format is the same. Reasoning is that, if the message
+ * format needs to change in one instance, the other instances are not
+ * impacted or the new change does not change the ID of the instance being
+ * modified.
+ * 2) Addition of a message,
+ * - Should increment the GLFS_NUM_MESSAGES
+ * - Append to the list of messages defined, towards the end
+ * - Retain macro naming as glfs_msg_X (for redability across developers)
+ * NOTE: Rules for message format modifications
+ * 3) Check acorss the code if the message ID macro in question is reused
+ * anywhere. If reused then then the modifications should ensure correctness
+ * everywhere, or needs a new message ID as (1) above was not adhered to. If
+ * not used anywhere, proceed with the required modification.
+ * NOTE: Rules for message deletion
+ * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used
+ * anywhere, then can be deleted, but will leave a hole by design, as
+ * addition rules specify modification to the end of the list and not filling
+ * holes.
+ */
+
+#define POSIX_COMP_BASE GLFS_MSGID_COMP_POSIX
+#define GLFS_NUM_MESSAGES 110
+#define GLFS_MSGID_END (POSIX_COMP_BASE + GLFS_NUM_MESSAGES + 1)
+/* Messaged with message IDs */
+#define glfs_msg_start_x POSIX_COMP_BASE, "Invalid: Start of messages"
+/*------------*/
+
+/*!
+ * @messageid 106001
+ * @diagnosis Operation could not be performed because the server quorum was not
+ * met
+ * @recommendedaction Ensure that other peer nodes are online and reachable from
+ * the local peer node
+ */
+
+#define P_MSG_XATTR_FAILED (POSIX_COMP_BASE + 1)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_NULL_GFID (POSIX_COMP_BASE + 2)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+
+#define P_MSG_FCNTL_FAILED (POSIX_COMP_BASE + 3)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_READV_FAILED (POSIX_COMP_BASE + 4)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_FSTAT_FAILED (POSIX_COMP_BASE + 5)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_PFD_NULL (POSIX_COMP_BASE + 6)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_INVALID_ARGUMENT (POSIX_COMP_BASE + 7)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_IO_SUBMIT_FAILED (POSIX_COMP_BASE + 8)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_WRITEV_FAILED (POSIX_COMP_BASE + 9)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_IO_GETEVENTS_FAILED (POSIX_COMP_BASE + 10)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_UNKNOWN_OP (POSIX_COMP_BASE + 11)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_AIO_UNAVAILABLE (POSIX_COMP_BASE + 12)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_IO_SETUP_FAILED (POSIX_COMP_BASE + 13)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_ZEROFILL_FAILED (POSIX_COMP_BASE + 14)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_OPENDIR_FAILED (POSIX_COMP_BASE + 15)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_DIRFD_FAILED (POSIX_COMP_BASE + 16)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_FD_PATH_SETTING_FAILED (POSIX_COMP_BASE + 17)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_LSTAT_FAILED (POSIX_COMP_BASE + 18)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_READYLINK_FAILED (POSIX_COMP_BASE + 19)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_GFID_FAILED (POSIX_COMP_BASE + 20)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_CREATE_FAILED (POSIX_COMP_BASE + 21)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_MKNOD_FAILED (POSIX_COMP_BASE + 22)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_LCHOWN_FAILED (POSIX_COMP_BASE + 23)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_ACL_FAILED (POSIX_COMP_BASE + 24)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_MKDIR_NOT_PERMITTED (POSIX_COMP_BASE + 25)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_DIR_OF_SAME_ID (POSIX_COMP_BASE + 26)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_MKDIR_FAILED (POSIX_COMP_BASE + 27)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_CHOWN_FAILED (POSIX_COMP_BASE + 28)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_UNLINK_FAILED (POSIX_COMP_BASE + 29)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_KEY_STATUS_INFO (POSIX_COMP_BASE + 30)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_XATTR_STATUS (POSIX_COMP_BASE + 31)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_RMDIR_NOT_PERMITTED (POSIX_COMP_BASE + 32)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_RMDIR_FAILED (POSIX_COMP_BASE + 33)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_DIR_OPERATION_FAILED (POSIX_COMP_BASE + 34)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_SYMLINK_FAILED (POSIX_COMP_BASE + 35)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_DIR_FOUND (POSIX_COMP_BASE + 36)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_LINK_FAILED (POSIX_COMP_BASE + 37)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_TRUNCATE_FAILED (POSIX_COMP_BASE + 38)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_FILE_OP_FAILED (POSIX_COMP_BASE + 39)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_READ_FAILED (POSIX_COMP_BASE + 40)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_DICT_SET_FAILED (POSIX_COMP_BASE + 41)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_STATVFS_FAILED (POSIX_COMP_BASE + 42)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_DIR_NOT_NULL (POSIX_COMP_BASE + 43)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_FSYNC_FAILED (POSIX_COMP_BASE + 44)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_CLOSE_FAILED (POSIX_COMP_BASE + 45)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_GETTING_FILENAME_FAILED (POSIX_COMP_BASE + 46)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_INODE_PATH_GET_FAILED (POSIX_COMP_BASE + 47)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_GET_KEY_VALUE_FAILED (POSIX_COMP_BASE + 48)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_CHMOD_FAILED (POSIX_COMP_BASE + 49)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_FCHMOD_FAILED (POSIX_COMP_BASE + 50)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_FCHOWN_FAILED (POSIX_COMP_BASE + 51)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_UTIMES_FAILED (POSIX_COMP_BASE + 52)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_FUTIMES_FAILED (POSIX_COMP_BASE + 53)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_XATTR_NOT_REMOVED (POSIX_COMP_BASE + 54)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_PFD_GET_FAILED (POSIX_COMP_BASE + 55)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_ACCESS_FAILED (POSIX_COMP_BASE + 56)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_PREAD_FAILED (POSIX_COMP_BASE + 57)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_UUID_NULL (POSIX_COMP_BASE + 58)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_EXPORT_DIR_MISSING (POSIX_COMP_BASE + 59)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_SUBVOLUME_ERROR (POSIX_COMP_BASE + 60)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_VOLUME_DANGLING (POSIX_COMP_BASE + 61)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_INVALID_OPTION (POSIX_COMP_BASE + 62)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_INVALID_VOLUME_ID (POSIX_COMP_BASE + 63)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_VOLUME_ID_ABSENT (POSIX_COMP_BASE + 64)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_HOSTNAME_MISSING (POSIX_COMP_BASE + 65)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_SET_ULIMIT_FAILED (POSIX_COMP_BASE + 66)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_SET_FILE_MAX_FAILED (POSIX_COMP_BASE + 67)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_MAX_FILE_OPEN (POSIX_COMP_BASE + 68)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+
+#define P_MSG_OPEN_FAILED (POSIX_COMP_BASE + 69)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_LOOKUP_NOT_PERMITTED (POSIX_COMP_BASE + 70)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_RENAME_FAILED (POSIX_COMP_BASE + 71)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_WRITE_FAILED (POSIX_COMP_BASE + 72)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_FILE_FAILED (POSIX_COMP_BASE + 73)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_THREAD_FAILED (POSIX_COMP_BASE + 74)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_HEALTHCHECK_FAILED (POSIX_COMP_BASE + 75)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_GET_FDCTX_FAILED (POSIX_COMP_BASE + 76)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_HANDLEPATH_FAILED (POSIX_COMP_BASE + 77)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_IPC_NOT_HANDLE (POSIX_COMP_BASE + 78)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_SET_XDATA_FAIL (POSIX_COMP_BASE + 79)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_DURABILITY_REQ_NOT_SATISFIED (POSIX_COMP_BASE + 80)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_XATTR_NOTSUP (POSIX_COMP_BASE + 81)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_GFID_SET_FAILED (POSIX_COMP_BASE + 82)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_ACL_NOTSUP (POSIX_COMP_BASE + 83)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_BASEPATH_CHDIR_FAILED (POSIX_COMP_BASE + 84)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_INVALID_OPTION_VAL (POSIX_COMP_BASE + 85)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_INVALID_NODE_UUID (POSIX_COMP_BASE + 86)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_FSYNCER_THREAD_CREATE_FAILED (POSIX_COMP_BASE + 87)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_GF_DIRENT_CREATE_FAILED (POSIX_COMP_BASE + 88)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_VOLUME_ID_FETCH_FAILED (POSIX_COMP_BASE + 89)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_UNKNOWN_ARGUMENT (POSIX_COMP_BASE + 90)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_INODE_HANDLE_CREATE (POSIX_COMP_BASE + 91)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_ENTRY_HANDLE_CREATE (POSIX_COMP_BASE + 92)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_PGFID_OP (POSIX_COMP_BASE + 93)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_POSIX_AIO (POSIX_COMP_BASE + 94)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_HANDLE_CREATE_TRASH (POSIX_COMP_BASE + 95)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_HANDLE_CREATE (POSIX_COMP_BASE + 96)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_HANDLE_PATH_CREATE (POSIX_COMP_BASE + 97)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_SET_FILE_CONTENTS (POSIX_COMP_BASE + 98)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_XDATA_GETXATTR (POSIX_COMP_BASE + 99)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_STALE_HANDLE_REMOVE_FAILED (POSIX_COMP_BASE + 100)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_HANDLE_PATH_CREATE_FAILED (POSIX_COMP_BASE + 101)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_HANDLE_TRASH_CREATE (POSIX_COMP_BASE + 102)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_HANDLE_DELETE (POSIX_COMP_BASE + 103)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_READLINK_FAILED (POSIX_COMP_BASE + 104)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_BUFFER_OVERFLOW (POSIX_COMP_BASE + 105)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_SEEK_UNKOWN (POSIX_COMP_BASE + 106)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_SEEK_FAILED (POSIX_COMP_BASE + 107)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_INODE_RESOLVE_FAILED (POSIX_COMP_BASE + 108)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_PREOP_CHECK_FAILED (POSIX_COMP_BASE + 109)
+
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define P_MSG_LEASE_DISABLED (POSIX_COMP_BASE + 110)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+/*------------*/
+#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
+
+#endif /* !_GLUSTERD_MESSAGES_H_ */
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
index 2aba365e62b..92971551c83 100644
--- a/xlators/storage/posix/src/posix.c
+++ b/xlators/storage/posix/src/posix.c
@@ -1,29 +1,20 @@
/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
+#define __XOPEN_SOURCE 500
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
+/* for SEEK_HOLE and SEEK_DATA */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
#endif
-#define __XOPEN_SOURCE 500
-
+#include <openssl/md5.h>
#include <stdint.h>
#include <sys/time.h>
#include <sys/resource.h>
@@ -31,13 +22,20 @@
#include <libgen.h>
#include <pthread.h>
#include <ftw.h>
+#include <sys/stat.h>
+#include <signal.h>
+#include <sys/uio.h>
+#include <unistd.h>
#ifndef GF_BSD_HOST_OS
#include <alloca.h>
#endif /* GF_BSD_HOST_OS */
+#ifdef HAVE_LINKAT
+#include <fcntl.h>
+#endif /* HAVE_LINKAT */
+
#include "glusterfs.h"
-#include "md5.h"
#include "checksum.h"
#include "dict.h"
#include "logging.h"
@@ -52,18 +50,26 @@
#include "statedump.h"
#include "locking.h"
#include "timer.h"
+#include "glusterfs3-xdr.h"
+#include "hashfn.h"
+#include "posix-aio.h"
+#include "glusterfs-acl.h"
+#include "posix-messages.h"
+
+extern char *marker_xattrs[];
+#define ALIGN_SIZE 4096
#undef HAVE_SET_FSID
#ifdef HAVE_SET_FSID
#define DECLARE_OLD_FS_ID_VAR uid_t old_fsuid; gid_t old_fsgid;
-#define SET_FS_ID(uid, gid) do { \
+#define SET_FS_ID(uid, gid) do { \
old_fsuid = setfsuid (uid); \
old_fsgid = setfsgid (gid); \
} while (0)
-#define SET_TO_OLD_FS_ID() do { \
+#define SET_TO_OLD_FS_ID() do { \
setfsuid (old_fsuid); \
setfsgid (old_fsgid); \
} while (0)
@@ -76,459 +82,176 @@
#endif
-typedef struct {
- xlator_t *this;
- const char *real_path;
- dict_t *xattr;
- struct iatt *stbuf;
- loc_t *loc;
-} posix_xattr_filler_t;
-
-int
-posix_forget (xlator_t *this, inode_t *inode)
-{
- uint64_t tmp_cache = 0;
- if (!inode_ctx_del (inode, this, &tmp_cache))
- dict_destroy ((dict_t *)(long)tmp_cache);
-
- return 0;
-}
-
-static void
-_posix_xattr_get_set (dict_t *xattr_req,
- char *key,
- data_t *data,
- void *xattrargs)
-{
- posix_xattr_filler_t *filler = xattrargs;
- char *value = NULL;
- ssize_t xattr_size = -1;
- int ret = -1;
- char *databuf = NULL;
- int _fd = -1;
- loc_t *loc = NULL;
- ssize_t req_size = 0;
-
-
- /* should size be put into the data_t ? */
- if (!strcmp (key, "glusterfs.content")
- && IA_ISREG (filler->stbuf->ia_type)) {
-
- /* file content request */
- req_size = data_to_uint64 (data);
- if (req_size >= filler->stbuf->ia_size) {
- _fd = open (filler->real_path, O_RDONLY);
-
- if (_fd == -1) {
- gf_log (filler->this->name, GF_LOG_ERROR,
- "Opening file %s failed: %s",
- filler->real_path, strerror (errno));
- goto err;
- }
-
- databuf = GF_CALLOC (1, filler->stbuf->ia_size,
- gf_posix_mt_char);
-
- if (!databuf) {
- gf_log (filler->this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto err;
- }
-
- ret = read (_fd, databuf, filler->stbuf->ia_size);
- if (ret == -1) {
- gf_log (filler->this->name, GF_LOG_ERROR,
- "Read on file %s failed: %s",
- filler->real_path, strerror (errno));
- goto err;
- }
-
- ret = close (_fd);
- _fd = -1;
- if (ret == -1) {
- gf_log (filler->this->name, GF_LOG_ERROR,
- "Close on file %s failed: %s",
- filler->real_path, strerror (errno));
- goto err;
- }
-
- ret = dict_set_bin (filler->xattr, key,
- databuf, filler->stbuf->ia_size);
- if (ret < 0) {
- goto err;
- }
-
- /* To avoid double free in cleanup below */
- databuf = NULL;
- err:
- if (_fd != -1)
- close (_fd);
- if (databuf)
- GF_FREE (databuf);
- }
- } else if (!strcmp (key, GLUSTERFS_OPEN_FD_COUNT)) {
- loc = filler->loc;
- if (!list_empty (&loc->inode->fd_list)) {
- ret = dict_set_uint32 (filler->xattr, key, 1);
- } else {
- ret = dict_set_uint32 (filler->xattr, key, 0);
- }
- } else {
- xattr_size = sys_lgetxattr (filler->real_path, key, NULL, 0);
-
- if (xattr_size > 0) {
- value = GF_CALLOC (1, xattr_size + 1,
- gf_posix_mt_char);
-
- sys_lgetxattr (filler->real_path, key, value,
- xattr_size);
-
- value[xattr_size] = '\0';
- ret = dict_set_bin (filler->xattr, key,
- value, xattr_size);
- if (ret < 0)
- gf_log (filler->this->name, GF_LOG_DEBUG,
- "dict set failed. path: %s, key: %s",
- filler->real_path, key);
- }
- }
-}
-
-
-static int
-posix_scale_ia_ino (struct posix_private *priv, struct iatt *buf)
-{
- int i = 0;
- int ret = -1;
- ino_t temp_ino = 0;
- int r;
- struct stat lstatbuf;
- struct iatt export_buf = {0 ,};
-
- for (i = 0; i < priv->num_devices_to_span; i++) {
- if (buf->ia_dev == priv->st_device[i]) {
- break;
- }
- if (priv->st_device[i] == 0) {
- priv->st_device[i] = buf->ia_dev;
- break;
- }
- }
-
- if (i == priv->num_devices_to_span) {
- r = lstat (priv->base_path, &lstatbuf);
- iatt_from_stat (&export_buf, &lstatbuf);
- if ((r != 0) || (buf->ia_gen != export_buf.ia_gen)) {
- goto out;
- }
-
- gf_log (THIS->name, GF_LOG_WARNING,
- "device number for exported volume %s has changed "
- "since init --- assuming done by automount",
- priv->base_path);
-
- priv->st_device[0] = export_buf.ia_dev;
- }
-
- temp_ino = (buf->ia_ino * priv->num_devices_to_span) + i;
-
- buf->ia_ino = temp_ino;
-
- ret = 0;
-out:
- return ret;
-}
-
-
-int
-posix_lstat_with_gen (xlator_t *this, const char *path, struct iatt *stbuf_p)
+dict_t*
+posix_dict_set_nlink (dict_t *req, dict_t *res, int32_t nlink)
{
- struct posix_private *priv = NULL;
- int ret = 0;
- char gen_key[1024] = {0, };
- uint64_t gen_val_be = 0;
- uint64_t gen_val = 0;
- struct stat lstatbuf = {0, };
- struct iatt stbuf = {0, };
+ int ret = -1;
- priv = this->private;
-
- ret = lstat (path, &lstatbuf);
- if (ret == -1)
- return -1;
-
- iatt_from_stat (&stbuf, &lstatbuf);
-
- ret = posix_scale_ia_ino (priv, &stbuf);
- if ((ret == -1) && !strcmp (path, "..")) {
- /* stat on ../ might land us outside the export directory,
- so don't panic */
-
- gf_log (this->name, GF_LOG_WARNING,
- "Access to %s (on dev %lld) is crossing device (%lld)",
- path, (unsigned long long) stbuf.ia_dev,
- (unsigned long long) priv->st_device[0]);
- errno = EXDEV;
- return -1;
- }
-
-#ifndef GF_LINUX_HOST_OS
- if (!IA_ISDIR (stbuf.ia_type) && !IA_ISREG (stbuf.ia_type)) {
- stbuf.ia_gen = (typeof(stbuf.ia_gen))stbuf.ia_mtime;
- if (stbuf_p)
- *stbuf_p = stbuf;
- return 0;
- }
-#endif /* !GF_LINUX_HOST_OS */
-
- ret = snprintf (gen_key, 1024, "trusted.%s.gen", this->name);
-
- if (ret == 1024)
- return -1;
-
- ret = sys_lgetxattr (path, gen_key, (void *) &gen_val_be,
- sizeof (gen_val_be));
- if (ret == -1) {
- LOCK (&priv->gen_lock);
- {
- gen_val = ++priv->gen_seq;
- }
- UNLOCK (&priv->gen_lock);
-
- gen_val_be = hton64 (gen_val);
-
- ret = sys_lsetxattr (path, gen_key, &gen_val_be,
- sizeof (gen_val_be), 0);
- } else {
- gen_val = ntoh64 (gen_val_be);
- }
-
- if (ret >= 0) {
- ret = 0;
- stbuf.ia_gen = (typeof(stbuf.ia_gen))gen_val;
- if (stbuf_p)
- *stbuf_p = stbuf;
- }
-
- return ret;
-}
-
-
-int
-posix_fstat_with_gen (xlator_t *this, int fd, struct iatt *stbuf_p)
-{
- struct posix_private *priv = NULL;
- int ret = 0;
- char gen_key[1024] = {0, };
- uint64_t gen_val_be = 0;
- uint64_t gen_val = 0;
- struct stat fstatbuf = {0, };
- struct iatt stbuf = {0, };
+ if (req == NULL || !dict_get (req, GF_REQUEST_LINK_COUNT_XDATA))
+ goto out;
- priv = this->private;
+ if (res == NULL)
+ res = dict_new ();
+ if (res == NULL)
+ goto out;
- ret = fstat (fd, &fstatbuf);
+ ret = dict_set_uint32 (res, GF_RESPONSE_LINK_COUNT_XDATA, nlink);
if (ret == -1)
- return -1;
-
- iatt_from_stat (&stbuf, &fstatbuf);
-
- ret = posix_scale_ia_ino (priv, &stbuf);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "Access to fd %d (on dev %lld) is crossing device (%lld)",
- fd, (unsigned long long) stbuf.ia_dev,
- (unsigned long long) priv->st_device[0]);
- errno = EXDEV;
- return -1;
- }
-
-#ifndef GF_LINUX_HOST_OS
- if (!IA_ISDIR (stbuf.ia_type) && !IA_ISREG (stbuf.ia_type)) {
- stbuf.ia_gen = (typeof(stbuf.ia_gen))stbuf.ia_mtime;
- return 0;
- }
-#endif /* !GF_LINUX_HOST_OS */
-
- ret = snprintf (gen_key, 1024, "trusted.%s.gen", this->name);
-
- if (ret == 1024)
- return -1;
-
- ret = sys_fgetxattr (fd, gen_key, (void *) &gen_val_be,
- sizeof (gen_val_be));
- if (ret == -1) {
- LOCK (&priv->gen_lock);
- {
- gen_val = ++priv->gen_seq;
- }
- UNLOCK (&priv->gen_lock);
-
- gen_val_be = hton64 (gen_val);
-
- ret = sys_fsetxattr (fd, gen_key, &gen_val_be,
- sizeof (gen_val_be), 0);
- } else {
- gen_val = ntoh64 (gen_val_be);
- }
-
- if (ret >= 0) {
- ret = 0;
- stbuf.ia_gen = (typeof(stbuf.ia_gen))gen_val;
- if (stbuf_p)
- *stbuf_p = stbuf;
- }
-
- return ret;
-}
-
-
-dict_t *
-posix_lookup_xattr_fill (xlator_t *this, const char *real_path, loc_t *loc,
- dict_t *xattr_req, struct iatt *buf)
-{
- dict_t *xattr = NULL;
- posix_xattr_filler_t filler = {0, };
-
- xattr = get_new_dict();
- if (!xattr) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
-
- filler.this = this;
- filler.real_path = real_path;
- filler.xattr = xattr;
- filler.stbuf = buf;
- filler.loc = loc;
-
- dict_foreach (xattr_req, _posix_xattr_get_set, &filler);
+ gf_msg ("posix", GF_LOG_WARNING, 0, P_MSG_SET_XDATA_FAIL,
+ "Failed to set GF_RESPONSE_LINK_COUNT_XDATA");
out:
- return xattr;
+ return res;
}
-
-/*
- * If the parent directory of {real_path} has the setgid bit set,
- * then set {gid} to the gid of the parent. Otherwise,
- * leave {gid} unchanged.
- */
-
int
-setgid_override (xlator_t *this, char *real_path, gid_t *gid)
+posix_forget (xlator_t *this, inode_t *inode)
{
- char * tmp_path = NULL;
- char * parent_path = NULL;
- struct iatt parent_stbuf;
-
- int op_ret = 0;
-
- tmp_path = gf_strdup (real_path);
- if (!tmp_path) {
- op_ret = -ENOMEM;
- gf_log ("[storage/posix]", GF_LOG_ERROR,
- "Out of memory");
- goto out;
- }
-
- parent_path = dirname (tmp_path);
+ uint64_t tmp_cache = 0;
+ int ret = 0;
+ char *unlink_path = NULL;
+ struct posix_private *priv_posix = NULL;
- op_ret = posix_lstat_with_gen (this, parent_path, &parent_stbuf);
+ priv_posix = (struct posix_private *) this->private;
- if (op_ret == -1) {
- op_ret = -errno;
- gf_log ("[storage/posix]", GF_LOG_ERROR,
- "lstat on parent directory (%s) failed: %s",
- parent_path, strerror (errno));
+ ret = inode_ctx_del (inode, this, &tmp_cache);
+ if (ret < 0) {
+ ret = 0;
goto out;
}
-
- if (parent_stbuf.ia_prot.sgid) {
- /*
- Entries created inside a setgid directory
- should inherit the gid from the parent
- */
-
- *gid = parent_stbuf.ia_gid;
+ if (tmp_cache == GF_UNLINK_TRUE) {
+ POSIX_GET_FILE_UNLINK_PATH(priv_posix->base_path,
+ inode->gfid, unlink_path);
+ if (!unlink_path) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ P_MSG_UNLINK_FAILED,
+ "Failed to remove gfid :%s",
+ uuid_utoa (inode->gfid));
+ ret = -1;
+ goto out;
+ }
+ ret = sys_unlink(unlink_path);
}
out:
-
- if (tmp_path)
- GF_FREE (tmp_path);
-
- return op_ret;
+ return ret;
}
+/* Regular fops */
int32_t
posix_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr_req)
+ loc_t *loc, dict_t *xdata)
{
struct iatt buf = {0, };
- char * real_path = NULL;
int32_t op_ret = -1;
int32_t entry_ret = 0;
int32_t op_errno = 0;
dict_t * xattr = NULL;
- char * pathdup = NULL;
- char * parentpath = NULL;
+ char * real_path = NULL;
+ char * par_path = NULL;
struct iatt postparent = {0,};
- struct posix_private *priv = NULL;
+ int32_t gfidless = 0;
+ char *pgfid_xattr_key = NULL;
+ int32_t nlink_samepgfid = 0;
+ struct posix_private *priv = NULL;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
VALIDATE_OR_GOTO (loc, out);
- VALIDATE_OR_GOTO (loc->path, out);
-
- MAKE_REAL_PATH (real_path, this, loc->path);
priv = this->private;
- op_ret = posix_lstat_with_gen (this, real_path, &buf);
+ /* The Hidden directory should be for housekeeping purpose and it
+ should not get any gfid on it */
+ if (__is_root_gfid (loc->pargfid) && loc->name
+ && (strcmp (loc->name, GF_HIDDEN_PATH) == 0)) {
+ gf_msg (this->name, GF_LOG_WARNING, EPERM,
+ P_MSG_LOOKUP_NOT_PERMITTED, "Lookup issued on %s,"
+ " which is not permitted", GF_HIDDEN_PATH);
+ op_errno = EPERM;
+ op_ret = -1;
+ goto out;
+ }
+
+ op_ret = dict_get_int32 (xdata, GF_GFIDLESS_LOOKUP, &gfidless);
+ op_ret = -1;
+ if (gf_uuid_is_null (loc->pargfid) || (loc->name == NULL)) {
+ /* nameless lookup */
+ MAKE_INODE_HANDLE (real_path, this, loc, &buf);
+ } else {
+ MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &buf);
+
+ if (gf_uuid_is_null (loc->inode->gfid)) {
+ posix_gfid_heal (this, real_path, loc, xdata);
+ MAKE_ENTRY_HANDLE (real_path, par_path, this,
+ loc, &buf);
+ }
+ }
+
op_errno = errno;
if (op_ret == -1) {
- if (op_errno != ENOENT) {
- gf_log (this->name, GF_LOG_ERROR,
- "lstat on %s failed: %s",
- loc->path, strerror (op_errno));
- }
+ if (op_errno != ENOENT) {
+ gf_msg (this->name, GF_LOG_WARNING, op_errno,
+ P_MSG_LSTAT_FAILED,
+ "lstat on %s failed",
+ real_path ? real_path : "null");
+ }
entry_ret = -1;
goto parent;
}
- if (xattr_req && (op_ret == 0)) {
- xattr = posix_lookup_xattr_fill (this, real_path, loc,
- xattr_req, &buf);
+ if (xdata && (op_ret == 0)) {
+ xattr = posix_xattr_fill (this, real_path, loc, NULL, -1, xdata,
+ &buf);
}
-parent:
- if (loc->parent) {
- pathdup = gf_strdup (real_path);
- GF_VALIDATE_OR_GOTO (this->name, pathdup, out);
+ if (priv->update_pgfid_nlinks) {
+ if (!gf_uuid_is_null (loc->pargfid) && !IA_ISDIR (buf.ia_type)) {
+ MAKE_PGFID_XATTR_KEY (pgfid_xattr_key,
+ PGFID_XATTR_KEY_PREFIX,
+ loc->pargfid);
- parentpath = dirname (pathdup);
+ LOCK (&loc->inode->lock);
+ {
+ SET_PGFID_XATTR_IF_ABSENT (real_path,
+ pgfid_xattr_key,
+ nlink_samepgfid,
+ XATTR_CREATE, op_ret,
+ this, unlock);
+ }
+unlock:
+ UNLOCK (&loc->inode->lock);
+ }
+ }
- op_ret = posix_lstat_with_gen (this, parentpath, &postparent);
+parent:
+ if (par_path) {
+ op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "post-operation lstat on parent of %s failed: %s",
- loc->path, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_LSTAT_FAILED, "post-operation lstat on"
+ " parent %s failed", par_path);
+ if (op_errno == ENOENT)
+ /* If parent directory is missing in a lookup,
+ errno should be ESTALE (bad handle) and not
+ ENOENT (missing entry)
+ */
+ op_errno = ESTALE;
goto out;
}
}
op_ret = entry_ret;
out:
- if (pathdup)
- GF_FREE (pathdup);
-
- if (xattr)
- dict_ref (xattr);
-
+ if (!op_ret && !gfidless && gf_uuid_is_null (buf.ia_gfid)) {
+ gf_msg (this->name, GF_LOG_ERROR, ENODATA, P_MSG_NULL_GFID,
+ "buf->ia_gfid is null for "
+ "%s", (real_path) ? real_path: "");
+ op_ret = -1;
+ op_errno = ENODATA;
+ }
STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno,
(loc)?loc->inode:NULL, &buf, xattr, &postparent);
@@ -540,15 +263,14 @@ out:
int32_t
-posix_stat (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
+posix_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
struct iatt buf = {0,};
- char * real_path = NULL;
int32_t op_ret = -1;
int32_t op_errno = 0;
- struct posix_private *priv = NULL;
+ struct posix_private *priv = NULL;
+ char *real_path = NULL;
+ dict_t *xattr_rsp = NULL;
DECLARE_OLD_FS_ID_VAR;
@@ -560,40 +282,70 @@ posix_stat (call_frame_t *frame,
VALIDATE_OR_GOTO (priv, out);
SET_FS_ID (frame->root->uid, frame->root->gid);
- MAKE_REAL_PATH (real_path, this, loc->path);
- op_ret = posix_lstat_with_gen (this, real_path, &buf);
+ MAKE_INODE_HANDLE (real_path, this, loc, &buf);
+
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "lstat on %s failed: %s", loc->path,
- strerror (op_errno));
+ if (op_errno == ENOENT) {
+ gf_msg_debug(this->name, 0, "lstat on %s failed: %s",
+ real_path ? real_path : "<null>",
+ strerror (op_errno));
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ P_MSG_LSTAT_FAILED, "lstat on %s failed",
+ real_path ? real_path : "<null>");
+ }
goto out;
}
+ if (xdata)
+ xattr_rsp = posix_xattr_fill (this, real_path, loc, NULL, -1,
+ xdata, &buf);
op_ret = 0;
- out:
+out:
SET_TO_OLD_FS_ID();
- STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, &buf);
+ STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, &buf, xattr_rsp);
+ if (xattr_rsp)
+ dict_unref (xattr_rsp);
return 0;
}
static int
-posix_do_chmod (xlator_t *this,
- const char *path,
- struct iatt *stbuf)
+posix_do_chmod (xlator_t *this, const char *path, struct iatt *stbuf)
{
- int32_t ret = -1;
- mode_t mode = 0;
+ int32_t ret = -1;
+ mode_t mode = 0;
+ struct stat stat;
+ int is_symlink = 0;
+
+ ret = sys_lstat (path, &stat);
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, P_MSG_LSTAT_FAILED,
+ "lstat failed: %s", path);
+ goto out;
+ }
+
+ if (S_ISLNK (stat.st_mode))
+ is_symlink = 1;
mode = st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type);
ret = lchmod (path, mode);
if ((ret == -1) && (errno == ENOSYS)) {
- ret = chmod (path, mode);
- }
+ /* in Linux symlinks are always in mode 0777 and no
+ such call as lchmod exists.
+ */
+ gf_msg_debug (this->name, 0, "%s (%s)", path, strerror (errno));
+ if (is_symlink) {
+ ret = 0;
+ goto out;
+ }
+ ret = sys_chmod (path, mode);
+ }
+out:
return ret;
}
@@ -613,7 +365,7 @@ posix_do_chown (xlator_t *this,
if (valid & GF_SET_ATTR_GID)
gid = stbuf->ia_gid;
- ret = lchown (path, uid, gid);
+ ret = sys_lchown (path, uid, gid);
return ret;
}
@@ -625,6 +377,18 @@ posix_do_utimes (xlator_t *this,
{
int32_t ret = -1;
struct timeval tv[2] = {{0,},{0,}};
+ struct stat stat;
+ int is_symlink = 0;
+
+ ret = sys_lstat (path, &stat);
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ P_MSG_FILE_OP_FAILED, "%s", path);
+ goto out;
+ }
+
+ if (S_ISLNK (stat.st_mode))
+ is_symlink = 1;
tv[0].tv_sec = stbuf->ia_atime;
tv[0].tv_usec = stbuf->ia_atime_nsec / 1000;
@@ -633,21 +397,30 @@ posix_do_utimes (xlator_t *this,
ret = lutimes (path, tv);
if ((ret == -1) && (errno == ENOSYS)) {
- ret = utimes (path, tv);
- }
-
+ gf_msg_debug (this->name, 0, "%s (%s)",
+ path, strerror (errno));
+ if (is_symlink) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = sys_utimes (path, tv);
+ }
+
+out:
return ret;
}
int
posix_setattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct iatt *stbuf, int32_t valid)
+ loc_t *loc, struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
char * real_path = 0;
struct iatt statpre = {0,};
struct iatt statpost = {0,};
+ dict_t *xattr_rsp = NULL;
DECLARE_OLD_FS_ID_VAR;
@@ -656,35 +429,34 @@ posix_setattr (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (loc, out);
SET_FS_ID (frame->root->uid, frame->root->gid);
- MAKE_REAL_PATH (real_path, this, loc->path);
+ MAKE_INODE_HANDLE (real_path, this, loc, &statpre);
- op_ret = posix_lstat_with_gen (this, real_path, &statpre);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "setattr (lstat) on %s failed: %s", real_path,
- strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "setattr (lstat) on %s failed",
+ real_path ? real_path : "<null>");
goto out;
}
- if (valid & GF_SET_ATTR_MODE) {
- op_ret = posix_do_chmod (this, real_path, stbuf);
+ if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)){
+ op_ret = posix_do_chown (this, real_path, stbuf, valid);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "setattr (chmod) on %s failed: %s", real_path,
- strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_CHOWN_FAILED, "setattr (chown) on %s "
+ "failed", real_path);
goto out;
}
}
- if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)){
- op_ret = posix_do_chown (this, real_path, stbuf, valid);
+ if (valid & GF_SET_ATTR_MODE) {
+ op_ret = posix_do_chmod (this, real_path, stbuf);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "setattr (chown) on %s failed: %s", real_path,
- strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_CHMOD_FAILED, "setattr (chmod) on %s "
+ "failed", real_path);
goto out;
}
}
@@ -693,41 +465,45 @@ posix_setattr (call_frame_t *frame, xlator_t *this,
op_ret = posix_do_utimes (this, real_path, stbuf);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "setattr (utimes) on %s failed: %s", real_path,
- strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_UTIMES_FAILED, "setattr (utimes) on %s "
+ "failed", real_path);
goto out;
}
}
if (!valid) {
- op_ret = lchown (real_path, -1, -1);
+ op_ret = sys_lchown (real_path, -1, -1);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "lchown (%s, -1, -1) failed => (%s)",
- real_path, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_LCHOWN_FAILED, "lchown (%s, -1, -1) "
+ "failed", real_path);
goto out;
}
}
- op_ret = posix_lstat_with_gen (this, real_path, &statpost);
+ op_ret = posix_pstat (this, loc->gfid, real_path, &statpost);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "setattr (lstat) on %s failed: %s", real_path,
- strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "setattr (lstat) on %s failed", real_path);
goto out;
}
+ if (xdata)
+ xattr_rsp = posix_xattr_fill (this, real_path, loc, NULL, -1,
+ xdata, &statpost);
op_ret = 0;
out:
SET_TO_OLD_FS_ID ();
STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno,
- &statpre, &statpost);
+ &statpre, &statpost, xattr_rsp);
+ if (xattr_rsp)
+ dict_unref (xattr_rsp);
return 0;
}
@@ -748,7 +524,7 @@ posix_do_fchown (xlator_t *this,
if (valid & GF_SET_ATTR_GID)
gid = stbuf->ia_gid;
- ret = fchown (fd, uid, gid);
+ ret = sys_fchown (fd, uid, gid);
return ret;
}
@@ -761,7 +537,7 @@ posix_do_fchmod (xlator_t *this,
mode_t mode = 0;
mode = st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type);
- return fchmod (fd, mode);
+ return sys_fchmod (fd, mode);
}
static int
@@ -769,20 +545,23 @@ posix_do_futimes (xlator_t *this,
int fd,
struct iatt *stbuf)
{
+ gf_msg (this->name, GF_LOG_WARNING, ENOSYS, P_MSG_UNKNOWN_OP,
+ "function not implemented fd(%d)", fd);
+
errno = ENOSYS;
return -1;
}
int
posix_fsetattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, struct iatt *stbuf, int32_t valid)
+ fd_t *fd, struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
struct iatt statpre = {0,};
struct iatt statpost = {0,};
struct posix_fd *pfd = NULL;
- uint64_t tmp_pfd = 0;
+ dict_t *xattr_rsp = NULL;
int32_t ret = -1;
DECLARE_OLD_FS_ID_VAR;
@@ -793,93 +572,473 @@ posix_fsetattr (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (this, out);
VALIDATE_OR_GOTO (fd, out);
- ret = fd_ctx_get (fd, this, &tmp_pfd);
+ ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
if (ret < 0) {
- op_errno = -ret;
- gf_log (this->name, GF_LOG_DEBUG,
- "pfd is NULL from fd=%p", fd);
+ gf_msg_debug (this->name, 0, "pfd is NULL from fd=%p", fd);
goto out;
}
- pfd = (struct posix_fd *)(long)tmp_pfd;
- op_ret = posix_fstat_with_gen (this, pfd->fd, &statpre);
+ op_ret = posix_fdstat (this, pfd->fd, &statpre);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "fsetattr (fstat) failed on fd=%p: %s", fd,
- strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "fsetattr (fstat) failed on fd=%p", fd);
goto out;
}
- if (valid & GF_SET_ATTR_MODE) {
- op_ret = posix_do_fchmod (this, pfd->fd, stbuf);
+ if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) {
+ op_ret = posix_do_fchown (this, pfd->fd, stbuf, valid);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "fsetattr (fchmod) failed on fd=%p: %s",
- fd, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_FCHOWN_FAILED, "fsetattr (fchown) failed"
+ " on fd=%p", fd);
goto out;
}
+
}
- if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) {
- op_ret = posix_do_fchown (this, pfd->fd, stbuf, valid);
+ if (valid & GF_SET_ATTR_MODE) {
+ op_ret = posix_do_fchmod (this, pfd->fd, stbuf);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "fsetattr (fchown) failed on fd=%p: %s",
- fd, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_FCHMOD_FAILED, "fsetattr (fchmod) failed"
+ " on fd=%p", fd);
goto out;
}
-
}
if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) {
op_ret = posix_do_futimes (this, pfd->fd, stbuf);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "fsetattr (futimes) on failed fd=%p: %s", fd,
- strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_FUTIMES_FAILED, "fsetattr (futimes) on "
+ "failed fd=%p", fd);
goto out;
}
}
if (!valid) {
- op_ret = fchown (pfd->fd, -1, -1);
+ op_ret = sys_fchown (pfd->fd, -1, -1);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "fchown (%d, -1, -1) failed => (%s)",
- pfd->fd, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_FCHOWN_FAILED,
+ "fchown (%d, -1, -1) failed",
+ pfd->fd);
goto out;
}
}
- op_ret = posix_fstat_with_gen (this, pfd->fd, &statpost);
+ op_ret = posix_fdstat (this, pfd->fd, &statpost);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "fsetattr (fstat) failed on fd=%p: %s", fd,
- strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "fsetattr (fstat) failed on fd=%p", fd);
goto out;
}
+ if (xdata)
+ xattr_rsp = posix_xattr_fill (this, NULL, NULL, fd, pfd->fd,
+ xdata, &statpost);
op_ret = 0;
out:
SET_TO_OLD_FS_ID ();
STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno,
- &statpre, &statpost);
+ &statpre, &statpost, xattr_rsp);
+ if (xattr_rsp)
+ dict_unref (xattr_rsp);
+
+ return 0;
+}
+
+static int32_t
+posix_do_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t flags, off_t offset, size_t len,
+ struct iatt *statpre, struct iatt *statpost, dict_t *xdata)
+{
+ int32_t ret = -1;
+ int32_t op_errno = 0;
+ struct posix_fd *pfd = NULL;
+ gf_boolean_t locked = _gf_false;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ SET_FS_ID (frame->root->uid, frame->root->gid);
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
+
+ ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg_debug (this->name, 0, "pfd is NULL from fd=%p", fd);
+ goto out;
+ }
+
+ if (dict_get (xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC)) {
+ locked = _gf_true;
+ LOCK(&fd->inode->lock);
+ }
+
+ ret = posix_fdstat (this, pfd->fd, statpre);
+ if (ret == -1) {
+ ret = -errno;
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "fallocate (fstat) failed on fd=%p", fd);
+ goto out;
+ }
+
+ ret = sys_fallocate (pfd->fd, flags, offset, len);
+ if (ret == -1) {
+ ret = -errno;
+ goto out;
+ }
+
+ ret = posix_fdstat (this, pfd->fd, statpost);
+ if (ret == -1) {
+ ret = -errno;
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "fallocate (fstat) failed on fd=%p", fd);
+ goto out;
+ }
+
+out:
+ if (locked) {
+ UNLOCK (&fd->inode->lock);
+ locked = _gf_false;
+ }
+ SET_TO_OLD_FS_ID ();
+
+ return ret;
+}
+
+char*
+_page_aligned_alloc (size_t size, char **aligned_buf)
+{
+ char *alloc_buf = NULL;
+ char *buf = NULL;
+ alloc_buf = GF_CALLOC (1, (size + ALIGN_SIZE), gf_posix_mt_char);
+ if (!alloc_buf)
+ goto out;
+ /* page aligned buffer */
+ buf = GF_ALIGN_BUF (alloc_buf, ALIGN_SIZE);
+ *aligned_buf = buf;
+out:
+ return alloc_buf;
+}
+
+static int32_t
+_posix_do_zerofill(int fd, off_t offset, off_t len, int o_direct)
+{
+ off_t num_vect = 0;
+ off_t num_loop = 1;
+ off_t idx = 0;
+ int32_t op_ret = -1;
+ int32_t vect_size = VECTOR_SIZE;
+ off_t remain = 0;
+ off_t extra = 0;
+ struct iovec *vector = NULL;
+ char *iov_base = NULL;
+ char *alloc_buf = NULL;
+
+ if (len == 0)
+ return 0;
+ if (len < VECTOR_SIZE)
+ vect_size = len;
+
+ num_vect = len / (vect_size);
+ remain = len % vect_size ;
+ if (num_vect > MAX_NO_VECT) {
+ extra = num_vect % MAX_NO_VECT;
+ num_loop = num_vect / MAX_NO_VECT;
+ num_vect = MAX_NO_VECT;
+ }
+
+ vector = GF_CALLOC (num_vect, sizeof(struct iovec),
+ gf_common_mt_iovec);
+ if (!vector)
+ return -1;
+ if (o_direct) {
+ alloc_buf = _page_aligned_alloc(vect_size, &iov_base);
+ if (!alloc_buf) {
+ GF_FREE(vector);
+ return -1;
+ }
+ } else {
+ iov_base = GF_CALLOC (vect_size, sizeof(char),
+ gf_common_mt_char);
+ if (!iov_base) {
+ GF_FREE(vector);
+ return -1;
+ }
+ }
+
+ for (idx = 0; idx < num_vect; idx++) {
+ vector[idx].iov_base = iov_base;
+ vector[idx].iov_len = vect_size;
+ }
+ if (sys_lseek (fd, offset, SEEK_SET) < 0) {
+ op_ret = -1;
+ goto err;
+ }
+
+ for (idx = 0; idx < num_loop; idx++) {
+ op_ret = sys_writev (fd, vector, num_vect);
+ if (op_ret < 0)
+ goto err;
+ }
+ if (extra) {
+ op_ret = sys_writev (fd, vector, extra);
+ if (op_ret < 0)
+ goto err;
+ }
+ if (remain) {
+ vector[0].iov_len = remain;
+ op_ret = sys_writev (fd, vector , 1);
+ if (op_ret < 0)
+ goto err;
+ }
+err:
+ if (o_direct)
+ GF_FREE(alloc_buf);
+ else
+ GF_FREE(iov_base);
+ GF_FREE(vector);
+ return op_ret;
+}
+
+static int32_t
+posix_do_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, struct iatt *statpre, struct iatt *statpost,
+ dict_t *xdata)
+{
+ int32_t ret = -1;
+ int32_t op_errno = 0;
+ struct posix_fd *pfd = NULL;
+ gf_boolean_t locked = _gf_false;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ SET_FS_ID (frame->root->uid, frame->root->gid);
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
+
+ ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg_debug (this->name, 0, "pfd is NULL from fd=%p", fd);
+ goto out;
+ }
+
+ if (dict_get (xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC)) {
+ locked = _gf_true;
+ LOCK(&fd->inode->lock);
+ }
+
+ ret = posix_fdstat (this, pfd->fd, statpre);
+ if (ret == -1) {
+ ret = -errno;
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "pre-operation fstat failed on fd = %p", fd);
+ goto out;
+ }
+
+ ret = _posix_do_zerofill (pfd->fd, offset, len, pfd->flags & O_DIRECT);
+ if (ret < 0) {
+ ret = -errno;
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_ZEROFILL_FAILED,
+ "zerofill failed on fd %d length %" PRId64 ,
+ pfd->fd, len);
+ goto out;
+ }
+
+ if (pfd->flags & (O_SYNC|O_DSYNC)) {
+ ret = sys_fsync (pfd->fd);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ P_MSG_WRITEV_FAILED, "fsync() in writev on fd"
+ "%d failed", pfd->fd);
+ ret = -errno;
+ goto out;
+ }
+ }
+
+ ret = posix_fdstat (this, pfd->fd, statpost);
+ if (ret == -1) {
+ ret = -errno;
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "post operation fstat failed on fd=%p", fd);
+ goto out;
+ }
+
+out:
+ if (locked) {
+ UNLOCK (&fd->inode->lock);
+ locked = _gf_false;
+ }
+ SET_TO_OLD_FS_ID ();
+
+ return ret;
+}
+
+static int32_t
+_posix_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t keep_size,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ int32_t ret;
+ int32_t flags = 0;
+ struct iatt statpre = {0,};
+ struct iatt statpost = {0,};
+
+#ifdef FALLOC_FL_KEEP_SIZE
+ if (keep_size)
+ flags = FALLOC_FL_KEEP_SIZE;
+#endif /* FALLOC_FL_KEEP_SIZE */
+
+ ret = posix_do_fallocate (frame, this, fd, flags, offset, len,
+ &statpre, &statpost, xdata);
+ if (ret < 0)
+ goto err;
+
+ STACK_UNWIND_STRICT(fallocate, frame, 0, 0, &statpre, &statpost, NULL);
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT(fallocate, frame, -1, -ret, NULL, NULL, NULL);
+ return 0;
+}
+
+static int32_t
+posix_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ int32_t ret;
+#ifndef FALLOC_FL_KEEP_SIZE
+ ret = EOPNOTSUPP;
+
+#else /* FALLOC_FL_KEEP_SIZE */
+ int32_t flags = FALLOC_FL_KEEP_SIZE|FALLOC_FL_PUNCH_HOLE;
+ struct iatt statpre = {0,};
+ struct iatt statpost = {0,};
+
+ ret = posix_do_fallocate (frame, this, fd, flags, offset, len,
+ &statpre, &statpost, xdata);
+ if (ret < 0)
+ goto err;
+
+ STACK_UNWIND_STRICT(discard, frame, 0, 0, &statpre, &statpost, NULL);
+ return 0;
+
+err:
+#endif /* FALLOC_FL_KEEP_SIZE */
+ STACK_UNWIND_STRICT(discard, frame, -1, -ret, NULL, NULL, NULL);
+ return 0;
+}
+
+static int32_t
+posix_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ int32_t ret = 0;
+ struct iatt statpre = {0,};
+ struct iatt statpost = {0,};
+
+ ret = posix_do_zerofill (frame, this, fd, offset, len,
+ &statpre, &statpost, xdata);
+ if (ret < 0)
+ goto err;
+
+ STACK_UNWIND_STRICT(zerofill, frame, 0, 0, &statpre, &statpost, NULL);
return 0;
+
+err:
+ STACK_UNWIND_STRICT(zerofill, frame, -1, -ret, NULL, NULL, NULL);
+ return 0;
+
}
+static int32_t
+posix_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata)
+{
+ /*
+ * IPC is for inter-translator communication. If one gets here, it
+ * means somebody sent one that nobody else recognized, which is an
+ * error much like an uncaught exception.
+ */
+ gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_IPC_NOT_HANDLE,
+ "GF_LOG_IPC(%d) not handled", op);
+ STACK_UNWIND_STRICT (ipc, frame, -1, -EOPNOTSUPP, NULL);
+ return 0;
+
+}
+
+#ifdef HAVE_SEEK_HOLE
+static int32_t
+posix_seek (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata)
+{
+ struct posix_fd *pfd = NULL;
+ off_t ret = -1;
+ int err = 0;
+ int whence = 0;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ SET_FS_ID (frame->root->uid, frame->root->gid);
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
+
+ switch (what) {
+ case GF_SEEK_DATA:
+ whence = SEEK_DATA;
+ break;
+ case GF_SEEK_HOLE:
+ whence = SEEK_HOLE;
+ break;
+ default:
+ err = ENOTSUP;
+ gf_msg (this->name, GF_LOG_ERROR, ENOTSUP,
+ P_MSG_SEEK_UNKOWN, "don't know what to seek");
+ goto out;
+ }
+
+ ret = posix_fd_ctx_get (fd, this, &pfd, &err);
+ if (ret < 0) {
+ gf_msg_debug (this->name, 0, "pfd is NULL from fd=%p", fd);
+ goto out;
+ }
+
+ ret = sys_lseek (pfd->fd, offset, whence);
+ if (ret == -1) {
+ err = errno;
+ gf_msg (this->name, GF_LOG_ERROR, err, P_MSG_SEEK_FAILED,
+ "seek failed on fd %d length %" PRId64 , pfd->fd,
+ offset);
+ goto out;
+ }
+
+out:
+ SET_TO_OLD_FS_ID ();
+
+ STACK_UNWIND_STRICT (seek, frame, (ret == -1 ? -1 : 0), err,
+ (ret == -1 ? -1 : ret), xdata);
+ return 0;
+}
+#endif
+
int32_t
posix_opendir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, fd_t *fd)
+ loc_t *loc, fd_t *fd, dict_t *xdata)
{
char * real_path = NULL;
int32_t op_ret = -1;
@@ -892,79 +1051,74 @@ posix_opendir (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
VALIDATE_OR_GOTO (loc, out);
- VALIDATE_OR_GOTO (loc->path, out);
VALIDATE_OR_GOTO (fd, out);
SET_FS_ID (frame->root->uid, frame->root->gid);
- MAKE_REAL_PATH (real_path, this, loc->path);
+ MAKE_INODE_HANDLE (real_path, this, loc, NULL);
+ if (!real_path) {
+ op_errno = ESTALE;
+ goto out;
+ }
- dir = opendir (real_path);
+ op_ret = -1;
+ dir = sys_opendir (real_path);
if (dir == NULL) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "opendir failed on %s: %s",
- loc->path, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_OPENDIR_FAILED,
+ "opendir failed on %s", real_path);
goto out;
}
op_ret = dirfd (dir);
- if (op_ret < 0) {
+ if (op_ret < 0) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "dirfd() failed on %s: %s",
- loc->path, strerror (op_errno));
- goto out;
- }
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_DIRFD_FAILED,
+ "dirfd() failed on %s", real_path);
+ goto out;
+ }
- pfd = GF_CALLOC (1, sizeof (*fd), gf_posix_mt_posix_fd);
+ pfd = GF_CALLOC (1, sizeof (*pfd), gf_posix_mt_posix_fd);
if (!pfd) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
goto out;
}
pfd->dir = dir;
- pfd->fd = dirfd (dir);
- pfd->path = gf_strdup (real_path);
- if (!pfd->path) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
+ pfd->dir_eof = -1;
+ pfd->fd = op_ret;
- fd_ctx_set (fd, this, (uint64_t)(long)pfd);
+ op_ret = fd_ctx_set (fd, this, (uint64_t)(long)pfd);
+ if (op_ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ P_MSG_FD_PATH_SETTING_FAILED, "failed to set the fd"
+ "context path=%s fd=%p", real_path, fd);
op_ret = 0;
- out:
+out:
if (op_ret == -1) {
if (dir) {
- closedir (dir);
+ (void) sys_closedir (dir);
dir = NULL;
}
if (pfd) {
- if (pfd->path)
- GF_FREE (pfd->path);
GF_FREE (pfd);
pfd = NULL;
}
}
SET_TO_OLD_FS_ID ();
- STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd);
+ STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, NULL);
return 0;
}
int32_t
posix_releasedir (xlator_t *this,
- fd_t *fd)
+ fd_t *fd)
{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
struct posix_fd * pfd = NULL;
- uint64_t tmp_pfd = 0;
+ uint64_t tmp_pfd = 0;
int ret = 0;
struct posix_private *priv = NULL;
@@ -974,30 +1128,19 @@ posix_releasedir (xlator_t *this,
ret = fd_ctx_del (fd, this, &tmp_pfd);
if (ret < 0) {
- op_errno = -ret;
- gf_log (this->name, GF_LOG_DEBUG,
- "pfd from fd=%p is NULL", fd);
+ gf_msg_debug (this->name, 0, "pfd from fd=%p is NULL", fd);
goto out;
}
- pfd = (struct posix_fd *)(long)tmp_pfd;
+ pfd = (struct posix_fd *)(long)tmp_pfd;
if (!pfd->dir) {
- op_errno = EINVAL;
- gf_log (this->name, GF_LOG_DEBUG,
- "pfd->dir is NULL for fd=%p path=%s",
- fd, pfd->path ? pfd->path : "<NULL>");
+ gf_msg (this->name, GF_LOG_WARNING, 0, P_MSG_PFD_NULL,
+ "pfd->dir is NULL for fd=%p", fd);
goto out;
}
priv = this->private;
- if (!pfd->path) {
- op_errno = EBADFD;
- gf_log (this->name, GF_LOG_DEBUG,
- "pfd->path was NULL. fd=%p pfd=%p",
- fd, pfd);
- }
-
pthread_mutex_lock (&priv->janitor_lock);
{
INIT_LIST_HEAD (&pfd->list);
@@ -1006,79 +1149,75 @@ posix_releasedir (xlator_t *this,
}
pthread_mutex_unlock (&priv->janitor_lock);
- op_ret = 0;
-
- out:
+out:
return 0;
}
int32_t
posix_readlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, size_t size)
+ loc_t *loc, size_t size, dict_t *xdata)
{
char * dest = NULL;
int32_t op_ret = -1;
- int32_t lstat_ret = -1;
int32_t op_errno = 0;
char * real_path = NULL;
struct iatt stbuf = {0,};
DECLARE_OLD_FS_ID_VAR;
- VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (frame, out);
SET_FS_ID (frame->root->uid, frame->root->gid);
dest = alloca (size + 1);
- MAKE_REAL_PATH (real_path, this, loc->path);
-
- op_ret = readlink (real_path, dest, size);
+ MAKE_INODE_HANDLE (real_path, this, loc, &stbuf);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "readlink on %s failed: %s", loc->path,
- strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "lstat on %s failed",
+ loc->path ? loc->path : "<null>");
goto out;
}
- dest[op_ret] = 0;
-
- lstat_ret = posix_lstat_with_gen (this, real_path, &stbuf);
- if (lstat_ret == -1) {
- op_ret = -1;
+ op_ret = sys_readlink (real_path, dest, size);
+ if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "lstat on %s failed: %s", loc->path,
- strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_READYLINK_FAILED,
+ "readlink on %s failed", real_path);
goto out;
}
- out:
+ dest[op_ret] = 0;
+out:
SET_TO_OLD_FS_ID ();
- STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, dest, &stbuf);
+ STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, dest, &stbuf, NULL);
return 0;
}
-int32_t
+
+int
posix_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t dev)
+ loc_t *loc, mode_t mode, dev_t dev, mode_t umask, dict_t *xdata)
{
- int tmp_fd = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- char *real_path = 0;
- struct iatt stbuf = { 0, };
- char was_present = 1;
- struct posix_private *priv = NULL;
- gid_t gid = 0;
- char *pathdup = NULL;
- struct iatt preparent = {0,};
- struct iatt postparent = {0,};
- char *parentpath = NULL;
+ int tmp_fd = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char *real_path = 0;
+ char *par_path = 0;
+ struct iatt stbuf = { 0, };
+ struct posix_private *priv = NULL;
+ gid_t gid = 0;
+ struct iatt preparent = {0,};
+ struct iatt postparent = {0,};
+ void * uuid_req = NULL;
+ int32_t nlink_samepgfid = 0;
+ char *pgfid_xattr_key = NULL;
+ gf_boolean_t entry_created = _gf_false, gfid_set = _gf_false;
+ gf_boolean_t linked = _gf_false;
DECLARE_OLD_FS_ID_VAR;
@@ -1089,370 +1228,655 @@ posix_mknod (call_frame_t *frame, xlator_t *this,
priv = this->private;
VALIDATE_OR_GOTO (priv, out);
- MAKE_REAL_PATH (real_path, this, loc->path);
+ MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, NULL);
gid = frame->root->gid;
- op_ret = posix_lstat_with_gen (this, real_path, &stbuf);
- if ((op_ret == -1) && (errno == ENOENT)){
- was_present = 0;
- }
+ SET_FS_ID (frame->root->uid, gid);
- op_ret = setgid_override (this, real_path, &gid);
- if (op_ret < 0)
+ if (!real_path || !par_path) {
+ op_ret = -1;
+ op_errno = ESTALE;
goto out;
+ }
- SET_FS_ID (frame->root->uid, gid);
- pathdup = gf_strdup (real_path);
- GF_VALIDATE_OR_GOTO (this->name, pathdup, out);
-
- parentpath = dirname (pathdup);
- op_ret = posix_lstat_with_gen (this, parentpath, &preparent);
+ op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "pre-operation lstat on parent of %s failed: %s",
- loc->path, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "pre-operation lstat on parent of %s failed",
+ real_path);
goto out;
}
-
- op_ret = mknod (real_path, mode, dev);
+
+ if (preparent.ia_prot.sgid) {
+ gid = preparent.ia_gid;
+ }
+
+ /* Check if the 'gfid' already exists, because this mknod may be an
+ internal call from distribute for creating 'linkfile', and that
+ linkfile may be for a hardlinked file */
+ if (dict_get (xdata, GLUSTERFS_INTERNAL_FOP_KEY)) {
+ dict_del (xdata, GLUSTERFS_INTERNAL_FOP_KEY);
+ op_ret = dict_get_ptr (xdata, "gfid-req", &uuid_req);
+ if (op_ret) {
+ gf_msg_debug (this->name, 0, "failed to get the gfid from "
+ "dict for %s", loc->path);
+ goto real_op;
+ }
+ op_ret = posix_create_link_if_gfid_exists (this, uuid_req,
+ real_path,
+ loc->inode->table);
+ if (!op_ret) {
+ linked = _gf_true;
+ goto post_op;
+ }
+ }
+
+real_op:
+#ifdef __NetBSD__
+ if (S_ISFIFO(mode))
+ op_ret = mkfifo (real_path, mode);
+ else
+#endif /* __NetBSD__ */
+ op_ret = sys_mknod (real_path, mode, dev);
if (op_ret == -1) {
op_errno = errno;
- if ((op_errno == EINVAL) && S_ISREG (mode)) {
- /* Over Darwin, mknod with (S_IFREG|mode)
- doesn't work */
- tmp_fd = creat (real_path, mode);
- if (tmp_fd == -1)
- goto out;
- close (tmp_fd);
- } else {
-
- gf_log (this->name, GF_LOG_ERROR,
- "mknod on %s failed: %s", loc->path,
- strerror (op_errno));
- goto out;
- }
+ if ((op_errno == EINVAL) && S_ISREG (mode)) {
+ /* Over Darwin, mknod with (S_IFREG|mode)
+ doesn't work */
+ tmp_fd = sys_creat (real_path, mode);
+ if (tmp_fd == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_CREATE_FAILED, "create failed on"
+ "%s", real_path);
+ goto out;
+ }
+ sys_close (tmp_fd);
+ } else {
+
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_MKNOD_FAILED,
+ "mknod on %s failed", real_path);
+ goto out;
+ }
}
+ entry_created = _gf_true;
+
#ifndef HAVE_SET_FSID
- op_ret = lchown (real_path, frame->root->uid, gid);
+ op_ret = sys_lchown (real_path, frame->root->uid, gid);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "lchown on %s failed: %s", loc->path,
- strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LCHOWN_FAILED,
+ "lchown on %s failed", real_path);
goto out;
}
#endif
- op_ret = posix_lstat_with_gen (this, real_path, &stbuf);
+post_op:
+ op_ret = posix_acl_xattr_set (this, real_path, xdata);
+ if (op_ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_ACL_FAILED,
+ "setting ACLs on %s failed", real_path);
+ }
+
+ if (priv->update_pgfid_nlinks) {
+ MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX,
+ loc->pargfid);
+ nlink_samepgfid = 1;
+
+ SET_PGFID_XATTR (real_path, pgfid_xattr_key, nlink_samepgfid,
+ XATTR_CREATE, op_ret, this, ignore);
+ }
+
+ignore:
+ op_ret = posix_entry_create_xattr_set (this, real_path, xdata);
+ if (op_ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_XATTR_FAILED,
+ "setting xattrs on %s failed", real_path);
+ }
+
+ if (!linked) {
+ op_ret = posix_gfid_set (this, real_path, loc, xdata);
+ if (op_ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_GFID_FAILED,
+ "setting gfid on %s failed", real_path);
+ } else {
+ gfid_set = _gf_true;
+ }
+ }
+
+ op_ret = posix_pstat (this, NULL, real_path, &stbuf);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "mknod on %s failed: %s", loc->path,
- strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_MKNOD_FAILED,
+ "mknod on %s failed", real_path);
goto out;
}
- op_ret = posix_lstat_with_gen (this, parentpath, &postparent);
+ op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "post-operation lstat on parent of %s failed: %s",
- loc->path, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "post-operation lstat on parent %s failed",
+ par_path);
goto out;
}
op_ret = 0;
- out:
- if (pathdup)
- GF_FREE (pathdup);
-
+out:
SET_TO_OLD_FS_ID ();
STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno,
- (loc)?loc->inode:NULL, &stbuf, &preparent, &postparent);
+ (loc)?loc->inode:NULL, &stbuf, &preparent,
+ &postparent, NULL);
- if ((op_ret == -1) && (!was_present)) {
- unlink (real_path);
+ if (op_ret < 0) {
+ if (entry_created) {
+ if (S_ISREG (mode))
+ sys_unlink (real_path);
+ else
+ sys_rmdir (real_path);
+ }
+
+ if (gfid_set)
+ posix_gfid_unset (this, xdata);
}
return 0;
}
-
-static int
-janitor_walker (const char *fpath, const struct stat *sb,
- int typeflag, struct FTW *ftwbuf)
+int
+posix_mkdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
{
- switch (sb->st_mode & S_IFMT) {
- case S_IFREG:
- case S_IFBLK:
- case S_IFLNK:
- case S_IFCHR:
- case S_IFIFO:
- case S_IFSOCK:
- gf_log (THIS->name, GF_LOG_TRACE,
- "unlinking %s", fpath);
- unlink (fpath);
- break;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char *real_path = NULL, *gfid_path = NULL;
+ char *par_path = NULL, *xattr_name = NULL;
+ struct iatt stbuf = {0, };
+ struct posix_private *priv = NULL;
+ gid_t gid = 0;
+ struct iatt preparent = {0,};
+ struct iatt postparent = {0,};
+ gf_boolean_t entry_created = _gf_false, gfid_set = _gf_false;
+ void *uuid_req = NULL;
+ ssize_t size = 0;
+ dict_t *xdata_rsp = NULL;
+ void *disk_xattr = NULL, *arg_xattr = NULL;
+ data_t *arg_data = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
- case S_IFDIR:
- if (ftwbuf->level) { /* don't remove top level dir */
- gf_log (THIS->name, GF_LOG_TRACE,
- "removing directory %s", fpath);
+ DECLARE_OLD_FS_ID_VAR;
- rmdir (fpath);
- }
- break;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (loc, out);
+
+ /* The Hidden directory should be for housekeeping purpose and it
+ should not get created from a user request */
+ if (__is_root_gfid (loc->pargfid) &&
+ (strcmp (loc->name, GF_HIDDEN_PATH) == 0)) {
+ gf_msg (this->name, GF_LOG_WARNING, EPERM,
+ P_MSG_MKDIR_NOT_PERMITTED, "mkdir issued on %s, which"
+ "is not permitted", GF_HIDDEN_PATH);
+ op_errno = EPERM;
+ op_ret = -1;
+ goto out;
}
- return 0; /* 0 = FTW_CONTINUE */
-}
+ priv = this->private;
+ VALIDATE_OR_GOTO (priv, out);
+ MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, NULL);
+ if (!real_path || !par_path) {
+ op_ret = -1;
+ op_errno = ESTALE;
+ goto out;
+ }
-static struct posix_fd *
-janitor_get_next_fd (xlator_t *this)
-{
- struct posix_private *priv = NULL;
- struct posix_fd *pfd = NULL;
+ if (loc->parent)
+ gf_uuid_unparse (loc->parent->gfid, pgfid);
+ else
+ gf_uuid_unparse (loc->pargfid, pgfid);
- struct timespec timeout;
+ gid = frame->root->gid;
- priv = this->private;
+ op_ret = posix_pstat (this, NULL, real_path, &stbuf);
- pthread_mutex_lock (&priv->janitor_lock);
- {
- if (list_empty (&priv->janitor_fds)) {
- time (&timeout.tv_sec);
- timeout.tv_sec += priv->janitor_sleep_duration;
- timeout.tv_nsec = 0;
-
- pthread_cond_timedwait (&priv->janitor_cond,
- &priv->janitor_lock,
- &timeout);
- goto unlock;
+ SET_FS_ID (frame->root->uid, gid);
+
+ if (xdata)
+ op_ret = dict_get_ptr (xdata, "gfid-req", &uuid_req);
+ if (uuid_req && !gf_uuid_is_null (uuid_req)) {
+ op_ret = posix_istat (this, uuid_req, NULL, &stbuf);
+ if ((op_ret == 0) && IA_ISDIR (stbuf.ia_type)) {
+ size = posix_handle_path (this, uuid_req, NULL, NULL,
+ 0);
+ if (size > 0)
+ gfid_path = alloca (size);
+
+ if (gfid_path)
+ posix_handle_path (this, uuid_req, NULL,
+ gfid_path, size);
+
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ P_MSG_DIR_OF_SAME_ID, "mkdir (%s): gfid (%s) "
+ "is already associated with directory (%s). "
+ "Hence, both directories will share same gfid "
+ "and this can lead to inconsistencies.",
+ loc->path, uuid_utoa (uuid_req),
+ gfid_path ? gfid_path : "<NULL>");
}
+ } else if (!uuid_req && frame->root->pid != GF_SERVER_PID_TRASH) {
+ op_ret = -1;
+ op_errno = EPERM;
+ gf_msg_callingfn (this->name, GF_LOG_WARNING, op_errno,
+ P_MSG_NULL_GFID, "mkdir (%s): is issued without "
+ "gfid-req %p", loc->path, xdata);
+ goto out;
+ }
- pfd = list_entry (priv->janitor_fds.next, struct posix_fd,
- list);
+ op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "pre-operation lstat on parent %s failed",
+ par_path);
+ goto out;
+ }
- list_del (priv->janitor_fds.next);
+ if (preparent.ia_prot.sgid) {
+ gid = preparent.ia_gid;
+ mode |= S_ISGID;
}
-unlock:
- pthread_mutex_unlock (&priv->janitor_lock);
- return pfd;
-}
+ op_ret = dict_get_str (xdata, GF_PREOP_PARENT_KEY, &xattr_name);
+ if (xattr_name != NULL) {
+ arg_data = dict_get (xdata, xattr_name);
+ if (arg_data) {
+ size = sys_lgetxattr (par_path, xattr_name, NULL, 0);
+ if (size < 0) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_PREOP_CHECK_FAILED,
+ "mkdir (%s/%s): getxattr on key (%s)"
+ " path (%s) failed ", pgfid,
+ loc->name, xattr_name,
+ par_path);
+ goto out;
+ }
+ disk_xattr = alloca (size);
+ if (disk_xattr == NULL) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_PREOP_CHECK_FAILED,
+ "mkdir (%s/%s): alloca failed during"
+ " preop of mkdir (%s)", pgfid,
+ loc->name, real_path);
+ goto out;
+ }
-static void *
-posix_janitor_thread_proc (void *data)
-{
- xlator_t * this = NULL;
- struct posix_private *priv = NULL;
- struct posix_fd *pfd;
+ size = sys_lgetxattr (par_path, xattr_name,
+ disk_xattr, size);
+ if (size < 0) {
+ op_errno = errno;
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_PREOP_CHECK_FAILED,
+ "mkdir (%s/%s): getxattr on key (%s)"
+ " path (%s) failed (%s)", pgfid,
+ loc->name, xattr_name,
+ par_path, strerror (errno));
+ goto out;
+ }
- time_t now;
+ if ((arg_data->len != size)
+ || (memcmp (arg_data->data, disk_xattr, size))) {
+ int ret = 0;
+ gf_msg (this->name, GF_LOG_INFO, EIO,
+ P_MSG_PREOP_CHECK_FAILED,
+ "mkdir (%s/%s): failing preop of "
+ "mkdir (%s) as on-disk"
+ " xattr value differs from argument "
+ "value for key %s", pgfid, loc->name,
+ real_path, xattr_name);
+ op_ret = -1;
+ op_errno = EIO;
+
+ xdata_rsp = dict_new ();
+ if (xdata_rsp == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR,
+ ENOMEM,
+ P_MSG_PREOP_CHECK_FAILED,
+ "mkdir (%s/%s): "
+ "dict allocation failed", pgfid,
+ loc->name);
+ op_errno = ENOMEM;
+ goto out;
+ }
- this = data;
- priv = this->private;
+ ret = dict_set_int8 (xdata_rsp,
+ GF_PREOP_CHECK_FAILED, 1);
+ goto out;
+ }
+
+ dict_del (xdata, xattr_name);
+ }
- THIS = this;
+ dict_del (xdata, GF_PREOP_PARENT_KEY);
+ }
- while (1) {
- time (&now);
- if ((now - priv->last_landfill_check) > priv->janitor_sleep_duration) {
- gf_log (this->name, GF_LOG_TRACE,
- "janitor cleaning out /" GF_REPLICATE_TRASH_DIR);
+ op_ret = sys_mkdir (real_path, mode);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_MKDIR_FAILED,
+ "mkdir of %s failed", real_path);
+ goto out;
+ }
- nftw (priv->trash_path,
- janitor_walker,
- 32,
- FTW_DEPTH | FTW_PHYS);
+ entry_created = _gf_true;
- priv->last_landfill_check = now;
- }
+#ifndef HAVE_SET_FSID
+ op_ret = sys_chown (real_path, frame->root->uid, gid);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_CHOWN_FAILED,
+ "chown on %s failed", real_path);
+ goto out;
+ }
+#endif
+ op_ret = posix_acl_xattr_set (this, real_path, xdata);
+ if (op_ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_ACL_FAILED,
+ "setting ACLs on %s failed ", real_path);
+ }
- pfd = janitor_get_next_fd (this);
- if (pfd) {
- if (pfd->dir == NULL) {
- gf_log (this->name, GF_LOG_TRACE,
- "janitor: closing file fd=%d", pfd->fd);
- close (pfd->fd);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "janitor: closing dir fd=%p", pfd->dir);
- closedir (pfd->dir);
- }
+ op_ret = posix_entry_create_xattr_set (this, real_path, xdata);
+ if (op_ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "setting xattrs on %s failed", real_path);
+ }
- if (pfd->path)
- GF_FREE (pfd->path);
+ op_ret = posix_gfid_set (this, real_path, loc, xdata);
+ if (op_ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_GFID_FAILED,
+ "setting gfid on %s failed", real_path);
+ } else {
+ gfid_set = _gf_true;
+ }
- GF_FREE (pfd);
- }
+ op_ret = posix_pstat (this, NULL, real_path, &stbuf);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "lstat on %s failed", real_path);
+ goto out;
}
- return NULL;
-}
+ op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "post-operation lstat on parent of %s failed",
+ real_path);
+ goto out;
+ }
+ op_ret = 0;
-static void
-posix_spawn_janitor_thread (xlator_t *this)
-{
- struct posix_private *priv = NULL;
- int ret = 0;
+out:
+ SET_TO_OLD_FS_ID ();
- priv = this->private;
+ STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno,
+ (loc)?loc->inode:NULL, &stbuf, &preparent,
+ &postparent, xdata_rsp);
- LOCK (&priv->lock);
- {
- if (!priv->janitor_present) {
- ret = pthread_create (&priv->janitor, NULL,
- posix_janitor_thread_proc, this);
-
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "spawning janitor thread failed: %s",
- strerror (errno));
- goto unlock;
- }
+ if (op_ret < 0) {
+ if (entry_created)
+ sys_rmdir (real_path);
- priv->janitor_present = _gf_true;
- }
+ if (gfid_set)
+ posix_gfid_unset (this, xdata);
}
-unlock:
- UNLOCK (&priv->lock);
-}
+ if (xdata_rsp)
+ dict_unref (xdata_rsp);
-int32_t
-posix_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode)
+ return 0;
+}
+
+int
+posix_add_unlink_to_ctx (inode_t *inode, xlator_t *this, char *unlink_path)
{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- char *real_path = NULL;
- struct iatt stbuf = {0, };
- char was_present = 1;
- struct posix_private *priv = NULL;
- gid_t gid = 0;
- char *pathdup = NULL;
- char *parentpath = NULL;
- struct iatt preparent = {0,};
- struct iatt postparent = {0,};
+ uint64_t ctx = GF_UNLINK_FALSE;
+ int ret = 0;
- DECLARE_OLD_FS_ID_VAR;
+ if (!unlink_path) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ P_MSG_UNLINK_FAILED,
+ "Creation of unlink entry failed for gfid: %s",
+ unlink_path);
+ ret = -1;
+ goto out;
+ }
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (loc, out);
+ ctx = GF_UNLINK_TRUE;
+ ret = posix_inode_ctx_set (inode, this, ctx);
+ if (ret < 0) {
+ goto out;
+ }
- priv = this->private;
- VALIDATE_OR_GOTO (priv, out);
+out:
+ return ret;
+}
- MAKE_REAL_PATH (real_path, this, loc->path);
+int32_t
+posix_move_gfid_to_unlink (xlator_t *this, uuid_t gfid, loc_t *loc)
+{
+ char *unlink_path = NULL;
+ char *gfid_path = NULL;
+ struct stat stbuf = {0, };
+ int ret = 0;
+ struct posix_private *priv_posix = NULL;
- gid = frame->root->gid;
+ priv_posix = (struct posix_private *) this->private;
- op_ret = posix_lstat_with_gen (this, real_path, &stbuf);
- if ((op_ret == -1) && (errno == ENOENT)) {
- was_present = 0;
- }
+ MAKE_HANDLE_GFID_PATH (gfid_path, this, gfid, NULL);
- op_ret = setgid_override (this, real_path, &gid);
- if (op_ret < 0)
+ POSIX_GET_FILE_UNLINK_PATH (priv_posix->base_path,
+ loc->inode->gfid, unlink_path);
+ if (!unlink_path) {
+ ret = -1;
+ goto out;
+ }
+ gf_msg_debug (this->name, 0,
+ "Moving gfid: %s to unlink_path : %s",
+ gfid_path, unlink_path);
+ ret = sys_rename (gfid_path, unlink_path);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_UNLINK_FAILED,
+ "Creation of unlink entry failed for gfid: %s",
+ unlink_path);
+ goto out;
+ }
+ ret = posix_add_unlink_to_ctx (loc->inode, this, unlink_path);
+ if (ret < 0)
goto out;
- SET_FS_ID (frame->root->uid, gid);
- pathdup = gf_strdup (real_path);
- GF_VALIDATE_OR_GOTO (this->name, pathdup, out);
+out:
+ return ret;
+}
- parentpath = dirname (pathdup);
+int32_t
+posix_unlink_gfid_handle_and_entry (xlator_t *this, const char *real_path,
+ struct iatt *stbuf, int32_t *op_errno,
+ loc_t *loc, gf_boolean_t get_link_count,
+ dict_t *rsp_dict)
+{
+ int fd_count = 0;
+ int32_t ret = 0;
+ struct iatt prebuf = {0,};
+ gf_boolean_t locked = _gf_false;
- op_ret = posix_lstat_with_gen (this, parentpath, &preparent);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "pre-operation lstat on parent of %s failed: %s",
- loc->path, strerror (op_errno));
- goto out;
+ /* Unlink the gfid_handle_first */
+ if (stbuf && stbuf->ia_nlink == 1) {
+
+ LOCK (&loc->inode->lock);
+
+ if (loc->inode->fd_count == 0) {
+ UNLOCK (&loc->inode->lock);
+ ret = posix_handle_unset (this, stbuf->ia_gfid, NULL);
+ } else {
+ UNLOCK (&loc->inode->lock);
+ ret = posix_move_gfid_to_unlink (this, stbuf->ia_gfid,
+ loc);
+ }
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_UNLINK_FAILED, "unlink of gfid handle "
+ "failed for path:%s with gfid %s",
+ real_path, uuid_utoa (stbuf->ia_gfid));
+ }
}
- op_ret = mkdir (real_path, mode);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "mkdir of %s failed: %s", loc->path,
- strerror (op_errno));
- goto out;
+ if (get_link_count) {
+ LOCK (&loc->inode->lock);
+ locked = _gf_true;
+ ret = posix_pstat (this, loc->gfid, real_path, &prebuf);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_LSTAT_FAILED, "lstat on %s failed",
+ real_path);
+ goto err;
+ }
}
-#ifndef HAVE_SET_FSID
- op_ret = chown (real_path, frame->root->uid, gid);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "chown on %s failed: %s", loc->path,
- strerror (op_errno));
- goto out;
+ /* Unlink the actual file */
+ ret = sys_unlink (real_path);
+ if (ret == -1) {
+ if (op_errno)
+ *op_errno = errno;
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_UNLINK_FAILED,
+ "unlink of %s failed", real_path);
+ goto err;
}
-#endif
- op_ret = posix_lstat_with_gen (this, real_path, &stbuf);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "lstat on %s failed: %s", loc->path,
- strerror (op_errno));
- goto out;
+ if (locked) {
+ UNLOCK (&loc->inode->lock);
+ locked = _gf_false;
}
- op_ret = posix_lstat_with_gen (this, parentpath, &postparent);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "post-operation lstat on parent of %s failed: %s",
- loc->path, strerror (op_errno));
- goto out;
+ ret = dict_set_uint32 (rsp_dict, GET_LINK_COUNT, prebuf.ia_nlink);
+ if (ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0, P_MSG_SET_XDATA_FAIL,
+ "failed to set "GET_LINK_COUNT" for %s", real_path);
+
+ return 0;
+
+err:
+ if (locked) {
+ UNLOCK (&loc->inode->lock);
+ locked = _gf_false;
}
+ return -1;
+}
- op_ret = 0;
+static
+int32_t posix_set_iatt_in_dict (dict_t *dict, struct iatt *in_stbuf)
+{
+ int ret = -1;
+ struct iatt *stbuf = NULL;
+ int32_t len = sizeof(struct iatt);
- out:
- if (pathdup)
- GF_FREE (pathdup);
+ if (!dict || !in_stbuf)
+ return ret;
- SET_TO_OLD_FS_ID ();
+ stbuf = GF_CALLOC (1, len, gf_common_mt_char);
+ if (!stbuf)
+ return ret;
- STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno,
- (loc)?loc->inode:NULL, &stbuf, &preparent, &postparent);
+ memcpy (stbuf, in_stbuf, len);
- if ((op_ret == -1) && (!was_present)) {
- unlink (real_path);
- }
+ ret = dict_set_bin (dict, DHT_IATT_IN_XDATA_KEY, stbuf, len);
+ if (ret)
+ GF_FREE (stbuf);
- return 0;
+ return ret;
}
+gf_boolean_t
+posix_skip_non_linkto_unlink (dict_t *xdata, loc_t *loc, char *key,
+ const char *linkto_xattr, struct iatt *stbuf,
+ const char *real_path)
+{
+ gf_boolean_t skip_unlink = _gf_false;
+ gf_boolean_t is_dht_linkto_file = _gf_false;
+ int unlink_if_linkto = 0;
+ ssize_t xattr_size = -1;
+ int op_ret = -1;
+
+ op_ret = dict_get_int32 (xdata, key,
+ &unlink_if_linkto);
+
+ if (!op_ret && unlink_if_linkto) {
+
+ is_dht_linkto_file = IS_DHT_LINKFILE_MODE (stbuf);
+ if (!is_dht_linkto_file)
+ return _gf_true;
+
+ LOCK (&loc->inode->lock);
+
+ xattr_size = sys_lgetxattr (real_path, linkto_xattr, NULL, 0);
+
+ if (xattr_size <= 0)
+ skip_unlink = _gf_true;
+
+ UNLOCK (&loc->inode->lock);
+
+ gf_msg ("posix", GF_LOG_INFO, 0, P_MSG_XATTR_STATUS,
+ "linkto_xattr status: %"PRIu32" for %s", skip_unlink,
+ real_path);
+ }
+ return skip_unlink;
+
+}
int32_t
posix_unlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
+ loc_t *loc, int xflag, dict_t *xdata)
{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- char *real_path = NULL;
- char *pathdup = NULL;
- char *parentpath = NULL;
- int32_t fd = -1;
- struct posix_private *priv = NULL;
- struct iatt preparent = {0,};
- struct iatt postparent = {0,};
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char *real_path = NULL;
+ char *par_path = NULL;
+ int32_t fd = -1;
+ struct iatt stbuf = {0,};
+ struct iatt postbuf = {0,};
+ struct posix_private *priv = NULL;
+ struct iatt preparent = {0,};
+ struct iatt postparent = {0,};
+ char *pgfid_xattr_key = NULL;
+ int32_t nlink_samepgfid = 0;
+ int32_t unlink_if_linkto = 0;
+ int32_t check_open_fd = 0;
+ int32_t skip_unlink = 0;
+ int32_t fdstat_requested = 0;
+ int32_t ctr_link_req = 0;
+ ssize_t xattr_size = -1;
+ int32_t is_dht_linkto_file = 0;
+ dict_t *unwind_dict = NULL;
+ void *uuid = NULL;
+ char uuid_str[GF_UUID_BUF_SIZE] = {0};
+ char gfid_str[GF_UUID_BUF_SIZE] = {0};
+ gf_boolean_t get_link_count = _gf_false;
DECLARE_OLD_FS_ID_VAR;
@@ -1461,84 +1885,194 @@ posix_unlink (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (loc, out);
SET_FS_ID (frame->root->uid, frame->root->gid);
- MAKE_REAL_PATH (real_path, this, loc->path);
-
- pathdup = gf_strdup (real_path);
- GF_VALIDATE_OR_GOTO (this->name, pathdup, out);
-
- parentpath = dirname (pathdup);
+ MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf);
+ if (!real_path || !par_path) {
+ op_ret = -1;
+ op_errno = ESTALE;
+ goto out;
+ }
- op_ret = posix_lstat_with_gen (this, parentpath, &preparent);
+ op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "pre-operation lstat on parent of %s failed: %s",
- loc->path, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "pre-operation lstat on parent %s failed",
+ par_path);
goto out;
}
priv = this->private;
- if (priv->background_unlink) {
- if (IA_ISREG (loc->inode->ia_type)) {
- fd = open (real_path, O_RDONLY);
- if (fd == -1) {
- op_ret = -1;
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "open of %s failed: %s", loc->path,
- strerror (op_errno));
- goto out;
- }
+
+ op_ret = dict_get_ptr (xdata, TIER_LINKFILE_GFID, &uuid);
+
+ if (!op_ret && gf_uuid_compare (uuid, stbuf.ia_gfid)) {
+ op_errno = ENOENT;
+ op_ret = -1;
+ gf_uuid_unparse (uuid, uuid_str);
+ gf_uuid_unparse (stbuf.ia_gfid, gfid_str);
+ gf_msg_debug (this->name, op_errno, "Mismatch in gfid for path "
+ "%s. Aborting the unlink. loc->gfid = %s, "
+ "stbuf->ia_gfid = %s", real_path,
+ uuid_str, gfid_str);
+ goto out;
+ }
+
+ op_ret = dict_get_int32 (xdata, DHT_SKIP_OPEN_FD_UNLINK,
+ &check_open_fd);
+
+ if (!op_ret && check_open_fd) {
+
+ LOCK (&loc->inode->lock);
+
+ if (loc->inode->fd_count) {
+ skip_unlink = 1;
+ }
+
+ UNLOCK (&loc->inode->lock);
+
+ gf_msg (this->name, GF_LOG_INFO, 0, P_MSG_KEY_STATUS_INFO,
+ "open-fd-key-status: %"PRIu32" for %s", skip_unlink,
+ real_path);
+
+ if (skip_unlink) {
+ op_ret = -1;
+ op_errno = EBUSY;
+ goto out;
+ }
+ }
+ /*
+ * If either of the function return true, skip_unlink.
+ * If first first function itself return true,
+ * we don't need to call second function, skip unlink.
+ */
+ skip_unlink = posix_skip_non_linkto_unlink (xdata, loc,
+ DHT_SKIP_NON_LINKTO_UNLINK,
+ DHT_LINKTO, &stbuf,
+ real_path);
+ skip_unlink = skip_unlink || posix_skip_non_linkto_unlink (xdata, loc,
+ TIER_SKIP_NON_LINKTO_UNLINK,
+ TIER_LINKTO, &stbuf,
+ real_path);
+ if (skip_unlink) {
+ op_ret = -1;
+ op_errno = EBUSY;
+ goto out;
+ }
+
+ if (IA_ISREG (loc->inode->ia_type) &&
+ xdata && dict_get (xdata, DHT_IATT_IN_XDATA_KEY)) {
+ fdstat_requested = 1;
+ }
+
+ if (fdstat_requested ||
+ (priv->background_unlink && IA_ISREG (loc->inode->ia_type))) {
+ fd = open (real_path, O_RDONLY);
+ if (fd == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_OPEN_FAILED,
+ "open of %s failed", real_path);
+ goto out;
+ }
+ }
+
+ if (priv->update_pgfid_nlinks && (stbuf.ia_nlink > 1)) {
+ MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX,
+ loc->pargfid);
+ LOCK (&loc->inode->lock);
+ {
+ UNLINK_MODIFY_PGFID_XATTR (real_path, pgfid_xattr_key,
+ nlink_samepgfid, 0, op_ret,
+ this, unlock);
+ }
+ unlock:
+ UNLOCK (&loc->inode->lock);
+
+ if (op_ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ P_MSG_XATTR_FAILED, "modification of "
+ "parent gfid xattr failed (path:%s gfid:%s)",
+ real_path, uuid_utoa (loc->inode->gfid));
+ if (op_errno != ENOATTR)
+ /* Allow unlink if pgfid xattr is not set. */
+ goto out;
}
}
- op_ret = unlink (real_path);
+ unwind_dict = dict_new ();
+ if (!unwind_dict) {
+ op_errno = -ENOMEM;
+ op_ret = -1;
+ goto out;
+ }
+
+ if (xdata && dict_get (xdata, GET_LINK_COUNT))
+ get_link_count = _gf_true;
+ op_ret = posix_unlink_gfid_handle_and_entry (this, real_path, &stbuf,
+ &op_errno, loc,
+ get_link_count,
+ unwind_dict);
if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "unlink of %s failed: %s", loc->path,
- strerror (op_errno));
goto out;
}
- op_ret = posix_lstat_with_gen (this, parentpath, &postparent);
+ if (fdstat_requested) {
+ op_ret = posix_fdstat (this, fd, &postbuf);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_FSTAT_FAILED, "post operation "
+ "fstat failed on fd=%d", fd);
+ goto out;
+ }
+ op_ret = posix_set_iatt_in_dict (unwind_dict, &postbuf);
+ }
+
+ op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "post-operation lstat on parent of %s failed: %s",
- loc->path, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "post-operation lstat on parent %s failed",
+ par_path);
goto out;
}
+ unwind_dict = posix_dict_set_nlink (xdata, unwind_dict, stbuf.ia_nlink);
op_ret = 0;
-
- out:
- if (pathdup)
- GF_FREE (pathdup);
-
+out:
SET_TO_OLD_FS_ID ();
STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno,
- &preparent, &postparent);
+ &preparent, &postparent, unwind_dict);
if (fd != -1) {
- close (fd);
+ sys_close (fd);
+ }
+
+ /* unref unwind_dict*/
+ if (unwind_dict) {
+ dict_unref (unwind_dict);
}
return 0;
}
-int32_t
+
+int
posix_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
+ loc_t *loc, int flags, dict_t *xdata)
{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- char * real_path = NULL;
- char * pathdup = NULL;
- char * parentpath = NULL;
- struct iatt preparent = {0,};
- struct iatt postparent = {0,};
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char *real_path = NULL;
+ char *par_path = NULL;
+ char *gfid_str = NULL;
+ struct iatt preparent = {0,};
+ struct iatt postparent = {0,};
+ struct iatt stbuf = {0,};
+ struct posix_private *priv = NULL;
+ char tmp_path[PATH_MAX] = {0,};
DECLARE_OLD_FS_ID_VAR;
@@ -1547,75 +2081,117 @@ posix_rmdir (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (loc, out);
SET_FS_ID (frame->root->uid, frame->root->gid);
- MAKE_REAL_PATH (real_path, this, loc->path);
- pathdup = gf_strdup (real_path);
- GF_VALIDATE_OR_GOTO (this->name, pathdup, out);
+ /* The Hidden directory should be for housekeeping purpose and it
+ should not get deleted from inside process */
+ if (__is_root_gfid (loc->pargfid) &&
+ (strcmp (loc->name, GF_HIDDEN_PATH) == 0)) {
+ gf_msg (this->name, GF_LOG_WARNING, EPERM,
+ P_MSG_RMDIR_NOT_PERMITTED, "rmdir issued on %s, which"
+ "is not permitted", GF_HIDDEN_PATH);
+ op_errno = EPERM;
+ op_ret = -1;
+ goto out;
+ }
+
+ priv = this->private;
- parentpath = dirname (pathdup);
+ MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf);
+ if (!real_path || !par_path) {
+ op_ret = -1;
+ op_errno = ESTALE;
+ goto out;
+ }
- op_ret = posix_lstat_with_gen (this, parentpath, &preparent);
+ op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "pre-operation lstat on parent of %s failed: %s",
- loc->path, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "pre-operation lstat on parent %s failed",
+ par_path);
goto out;
}
- op_ret = rmdir (real_path);
+ if (flags) {
+ gfid_str = uuid_utoa (stbuf.ia_gfid);
+
+ op_ret = sys_mkdir (priv->trash_path, 0755);
+ if (errno != EEXIST && op_ret == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_MKDIR_FAILED,
+ "mkdir of %s failed", priv->trash_path);
+ } else {
+ (void) snprintf (tmp_path, sizeof(tmp_path), "%s/%s",
+ priv->trash_path, gfid_str);
+ op_ret = sys_rename (real_path, tmp_path);
+ pthread_cond_signal (&priv->janitor_cond);
+ }
+ } else {
+ op_ret = sys_rmdir (real_path);
+ }
op_errno = errno;
- if (op_errno == EEXIST)
- /* Solaris sets errno = EEXIST instead of ENOTEMPTY */
- op_errno = ENOTEMPTY;
+ if (op_ret == 0) {
+ posix_handle_unset (this, stbuf.ia_gfid, NULL);
+ }
+
+ if (op_errno == EEXIST)
+ /* Solaris sets errno = EEXIST instead of ENOTEMPTY */
+ op_errno = ENOTEMPTY;
/* No need to log a common error as ENOTEMPTY */
if (op_ret == -1 && op_errno != ENOTEMPTY) {
- gf_log (this->name, GF_LOG_ERROR,
- "rmdir of %s failed: %s", loc->path,
- strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, op_errno, P_MSG_RMDIR_FAILED,
+ "rmdir of %s failed", real_path);
}
- if (op_ret == -1)
+ if (op_ret == -1) {
+ if (op_errno == ENOTEMPTY) {
+ gf_msg_debug (this->name, 0, "%s on %s failed", (flags)
+ ? "rename" : "rmdir", real_path);
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ P_MSG_DIR_OPERATION_FAILED, "%s on %s failed",
+ (flags) ? "rename" : "rmdir", real_path);
+ }
goto out;
+ }
- op_ret = posix_lstat_with_gen (this, parentpath, &postparent);
+ op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "post-operation lstat on parent of %s failed: %s",
- loc->path, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "post-operation lstat on parent of %s failed",
+ par_path);
goto out;
}
- out:
- if (pathdup)
- GF_FREE (pathdup);
-
+out:
SET_TO_OLD_FS_ID ();
STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno,
- &preparent, &postparent);
+ &preparent, &postparent, NULL);
return 0;
}
-int32_t
+
+int
posix_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkname, loc_t *loc)
+ const char *linkname, loc_t *loc, mode_t umask, dict_t *xdata)
{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- char * real_path = 0;
- struct iatt stbuf = { 0, };
- struct posix_private *priv = NULL;
- gid_t gid = 0;
- char was_present = 1;
- char *pathdup = NULL;
- char *parentpath = NULL;
- struct iatt preparent = {0,};
- struct iatt postparent = {0,};
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char * real_path = 0;
+ char * par_path = 0;
+ struct iatt stbuf = { 0, };
+ struct posix_private *priv = NULL;
+ gid_t gid = 0;
+ struct iatt preparent = {0,};
+ struct iatt postparent = {0,};
+ char *pgfid_xattr_key = NULL;
+ int32_t nlink_samepgfid = 0;
+ gf_boolean_t entry_created = _gf_false, gfid_set = _gf_false;
DECLARE_OLD_FS_ID_VAR;
@@ -1627,85 +2203,111 @@ posix_symlink (call_frame_t *frame, xlator_t *this,
priv = this->private;
VALIDATE_OR_GOTO (priv, out);
- MAKE_REAL_PATH (real_path, this, loc->path);
-
- op_ret = posix_lstat_with_gen (this, real_path, &stbuf);
- if ((op_ret == -1) && (errno == ENOENT)){
- was_present = 0;
- }
+ MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf);
gid = frame->root->gid;
-
- op_ret = setgid_override (this, real_path, &gid);
- if (op_ret < 0)
+ if (!real_path || !par_path) {
+ op_ret = -1;
+ op_errno = ESTALE;
goto out;
+ }
SET_FS_ID (frame->root->uid, gid);
- pathdup = gf_strdup (real_path);
- GF_VALIDATE_OR_GOTO (this->name, pathdup, out);
-
- parentpath = dirname (pathdup);
- op_ret = posix_lstat_with_gen (this, parentpath, &preparent);
+ op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "pre-operation lstat on parent of %s failed: %s",
- loc->path, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "pre-operation lstat on parent %s failed",
+ par_path);
goto out;
}
- op_ret = symlink (linkname, real_path);
+ if (preparent.ia_prot.sgid) {
+ gid = preparent.ia_gid;
+ }
+
+ op_ret = sys_symlink (linkname, real_path);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "symlink of %s --> %s failed: %s",
- loc->path, linkname, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_SYMLINK_FAILED,
+ "symlink of %s --> %s failed",
+ real_path, linkname);
goto out;
}
+ entry_created = _gf_true;
+
#ifndef HAVE_SET_FSID
- op_ret = lchown (real_path, frame->root->uid, gid);
+ op_ret = sys_lchown (real_path, frame->root->uid, gid);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "lchown failed on %s: %s",
- loc->path, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LCHOWN_FAILED,
+ "lchown failed on %s", real_path);
goto out;
}
#endif
- op_ret = posix_lstat_with_gen (this, real_path, &stbuf);
+ op_ret = posix_acl_xattr_set (this, real_path, xdata);
+ if (op_ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_ACL_FAILED,
+ "setting ACLs on %s failed", real_path);
+ }
+
+ if (priv->update_pgfid_nlinks) {
+ MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX,
+ loc->pargfid);
+ nlink_samepgfid = 1;
+ SET_PGFID_XATTR (real_path, pgfid_xattr_key, nlink_samepgfid,
+ XATTR_CREATE, op_ret, this, ignore);
+ }
+ignore:
+ op_ret = posix_entry_create_xattr_set (this, real_path, xdata);
+ if (op_ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "setting xattrs on %s failed ", real_path);
+ }
+
+ op_ret = posix_gfid_set (this, real_path, loc, xdata);
+ if (op_ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_GFID_FAILED,
+ "setting gfid on %s failed", real_path);
+ } else {
+ gfid_set = _gf_true;
+ }
+
+ op_ret = posix_pstat (this, NULL, real_path, &stbuf);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "lstat failed on %s: %s",
- loc->path, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "lstat failed on %s", real_path);
goto out;
}
- op_ret = posix_lstat_with_gen (this, parentpath, &postparent);
+ op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "post-operation lstat on parent of %s failed: %s",
- loc->path, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "post-operation lstat on parent %s failed",
+ par_path);
goto out;
}
-
- op_ret = 0;
- out:
- if (pathdup)
- GF_FREE (pathdup);
+ op_ret = 0;
+out:
SET_TO_OLD_FS_ID ();
STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno,
- (loc)?loc->inode:NULL, &stbuf, &preparent, &postparent);
+ (loc)?loc->inode:NULL, &stbuf, &preparent,
+ &postparent, NULL);
+
+ if (op_ret < 0) {
+ if (entry_created)
+ sys_unlink (real_path);
- if ((op_ret == -1) && (!was_present)) {
- unlink (real_path);
+ if (gfid_set)
+ posix_gfid_unset (this, xdata);
}
return 0;
@@ -1714,23 +2316,31 @@ posix_symlink (call_frame_t *frame, xlator_t *this,
int
posix_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- char *real_oldpath = NULL;
- char *real_newpath = NULL;
- struct iatt stbuf = {0, };
- struct posix_private *priv = NULL;
- char was_present = 1;
- char *oldpathdup = NULL;
- char *oldparentpath = NULL;
- char *newpathdup = NULL;
- char *newparentpath = NULL;
- struct iatt preoldparent = {0, };
- struct iatt postoldparent = {0, };
- struct iatt prenewparent = {0, };
- struct iatt postnewparent = {0, };
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char *real_oldpath = NULL;
+ char *real_newpath = NULL;
+ char *par_oldpath = NULL;
+ char *par_newpath = NULL;
+ struct iatt stbuf = {0, };
+ struct posix_private *priv = NULL;
+ char was_present = 1;
+ struct iatt preoldparent = {0, };
+ struct iatt postoldparent = {0, };
+ struct iatt prenewparent = {0, };
+ struct iatt postnewparent = {0, };
+ char olddirid[64];
+ char newdirid[64];
+ uuid_t victim = {0};
+ int was_dir = 0;
+ int nlink = 0;
+ char *pgfid_xattr_key = NULL;
+ int32_t nlink_samepgfid = 0;
+ dict_t *unwind_dict = NULL;
+ gf_boolean_t locked = _gf_false;
+ gf_boolean_t get_link_count = _gf_false;
DECLARE_OLD_FS_ID_VAR;
@@ -1743,98 +2353,225 @@ posix_rename (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (priv, out);
SET_FS_ID (frame->root->uid, frame->root->gid);
- MAKE_REAL_PATH (real_oldpath, this, oldloc->path);
- MAKE_REAL_PATH (real_newpath, this, newloc->path);
+ MAKE_ENTRY_HANDLE (real_oldpath, par_oldpath, this, oldloc, NULL);
+ if (!real_oldpath || !par_oldpath) {
+ op_ret = -1;
+ op_errno = ESTALE;
+ goto out;
+ }
- oldpathdup = gf_strdup (real_oldpath);
- GF_VALIDATE_OR_GOTO (this->name, oldpathdup, out);
+ MAKE_ENTRY_HANDLE (real_newpath, par_newpath, this, newloc, &stbuf);
+ if (!real_newpath || !par_newpath) {
+ op_ret = -1;
+ op_errno = ESTALE;
+ goto out;
+ }
- oldparentpath = dirname (oldpathdup);
+ unwind_dict = dict_new ();
+ if (!unwind_dict) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
- op_ret = posix_lstat_with_gen (this, oldparentpath, &preoldparent);
+ op_ret = posix_pstat (this, oldloc->pargfid, par_oldpath, &preoldparent);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "pre-operation lstat on parent of %s failed: %s",
- oldloc->path, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "pre-operation lstat on parent %s failed",
+ par_oldpath);
goto out;
}
- newpathdup = gf_strdup (real_newpath);
- GF_VALIDATE_OR_GOTO (this->name, newpathdup, out);
-
- newparentpath = dirname (newpathdup);
-
- op_ret = posix_lstat_with_gen (this, newparentpath, &prenewparent);
+ op_ret = posix_pstat (this, newloc->pargfid, par_newpath, &prenewparent);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "pre-operation lstat on parent of %s failed: %s",
- newloc->path, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "pre-operation lstat on parent of %s failed",
+ par_newpath);
goto out;
}
- op_ret = posix_lstat_with_gen (this, real_newpath, &stbuf);
+ op_ret = posix_pstat (this, NULL, real_newpath, &stbuf);
if ((op_ret == -1) && (errno == ENOENT)){
was_present = 0;
+ } else {
+ gf_uuid_copy (victim, stbuf.ia_gfid);
+ if (IA_ISDIR (stbuf.ia_type))
+ was_dir = 1;
+ nlink = stbuf.ia_nlink;
}
- op_ret = rename (real_oldpath, real_newpath);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name,
- (op_errno == ENOTEMPTY ? GF_LOG_DEBUG : GF_LOG_ERROR),
- "rename of %s to %s failed: %s",
- oldloc->path, newloc->path, strerror (op_errno));
+ if (was_present && IA_ISDIR(stbuf.ia_type) && !newloc->inode) {
+ gf_msg (this->name, GF_LOG_WARNING, EEXIST, P_MSG_DIR_FOUND,
+ "found directory at %s while expecting ENOENT",
+ real_newpath);
+ op_ret = -1;
+ op_errno = EEXIST;
+ goto out;
+ }
+
+ if (was_present && IA_ISDIR(stbuf.ia_type) &&
+ gf_uuid_compare (newloc->inode->gfid, stbuf.ia_gfid)) {
+ gf_msg (this->name, GF_LOG_WARNING, EEXIST, P_MSG_DIR_FOUND,
+ "found directory %s at %s while renaming %s",
+ uuid_utoa_r (newloc->inode->gfid, olddirid),
+ real_newpath,
+ uuid_utoa_r (stbuf.ia_gfid, newdirid));
+ op_ret = -1;
+ op_errno = EEXIST;
+ goto out;
+ }
+
+ if (IA_ISDIR (oldloc->inode->ia_type))
+ posix_handle_unset (this, oldloc->inode->gfid, NULL);
+
+ LOCK (&oldloc->inode->lock);
+ {
+ if (!IA_ISDIR (oldloc->inode->ia_type)
+ && priv->update_pgfid_nlinks) {
+ MAKE_PGFID_XATTR_KEY (pgfid_xattr_key,
+ PGFID_XATTR_KEY_PREFIX,
+ oldloc->pargfid);
+ UNLINK_MODIFY_PGFID_XATTR (real_oldpath,
+ pgfid_xattr_key,
+ nlink_samepgfid, 0,
+ op_ret,
+ this, unlock);
+ }
+
+ if ((xdata) && (dict_get (xdata, GET_LINK_COUNT))
+ && (real_newpath) && (was_present)) {
+ LOCK (&newloc->inode->lock);
+ locked = _gf_true;
+ get_link_count = _gf_true;
+ op_ret = posix_pstat (this, newloc->gfid, real_newpath,
+ &stbuf);
+ if ((op_ret == -1) && (errno != ENOENT)) {
+ op_errno = errno;
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_LSTAT_FAILED,
+ "lstat on %s failed", real_newpath);
+ goto unlock;
+ }
+ }
+
+ op_ret = sys_rename (real_oldpath, real_newpath);
+ if (op_ret == -1) {
+ op_errno = errno;
+ if (op_errno == ENOTEMPTY) {
+ gf_msg_debug (this->name, 0, "rename of %s to"
+ " %s failed: %s", real_oldpath,
+ real_newpath,
+ strerror (op_errno));
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_RENAME_FAILED,
+ "rename of %s to %s failed",
+ real_oldpath, real_newpath);
+ }
+
+ if (priv->update_pgfid_nlinks
+ && !IA_ISDIR (oldloc->inode->ia_type)) {
+ LINK_MODIFY_PGFID_XATTR (real_oldpath,
+ pgfid_xattr_key,
+ nlink_samepgfid, 0,
+ op_ret,
+ this, unlock);
+ }
+
+ goto unlock;
+ }
+
+ if (locked) {
+ UNLOCK (&newloc->inode->lock);
+ locked = _gf_false;
+ }
+
+ if ((get_link_count) &&
+ (dict_set_uint32 (unwind_dict, GET_LINK_COUNT,
+ stbuf.ia_nlink)))
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ P_MSG_SET_XDATA_FAIL, "failed to set "
+ GET_LINK_COUNT" for %s", real_newpath);
+
+ if (!IA_ISDIR (oldloc->inode->ia_type)
+ && priv->update_pgfid_nlinks) {
+ MAKE_PGFID_XATTR_KEY (pgfid_xattr_key,
+ PGFID_XATTR_KEY_PREFIX,
+ newloc->pargfid);
+ LINK_MODIFY_PGFID_XATTR (real_newpath,
+ pgfid_xattr_key,
+ nlink_samepgfid, 0,
+ op_ret,
+ this, unlock);
+ }
+ }
+unlock:
+ if (locked) {
+ UNLOCK (&newloc->inode->lock);
+ locked = _gf_false;
+ }
+ UNLOCK (&oldloc->inode->lock);
+
+ if (op_ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, P_MSG_XATTR_FAILED,
+ "modification of "
+ "parent gfid xattr failed (gfid:%s)",
+ uuid_utoa (oldloc->inode->gfid));
goto out;
}
- op_ret = posix_lstat_with_gen (this, real_newpath, &stbuf);
+ if (was_dir)
+ posix_handle_unset (this, victim, NULL);
+
+ if (was_present && !was_dir && nlink == 1)
+ posix_handle_unset (this, victim, NULL);
+
+ if (IA_ISDIR (oldloc->inode->ia_type)) {
+ posix_handle_soft (this, real_newpath, newloc,
+ oldloc->inode->gfid, NULL);
+ }
+
+ op_ret = posix_pstat (this, NULL, real_newpath, &stbuf);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "lstat on %s failed: %s",
- real_newpath, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "lstat on %s failed", real_newpath);
goto out;
}
- op_ret = posix_lstat_with_gen (this, oldparentpath, &postoldparent);
+ op_ret = posix_pstat (this, oldloc->pargfid, par_oldpath, &postoldparent);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "post-operation lstat on parent of %s failed: %s",
- oldloc->path, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "post-operation lstat on parent %s failed",
+ par_oldpath);
goto out;
}
- op_ret = posix_lstat_with_gen (this, newparentpath, &postnewparent);
+ op_ret = posix_pstat (this, newloc->pargfid, par_newpath, &postnewparent);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "post-operation lstat on parent of %s failed: %s",
- newloc->path, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "post-operation lstat on parent %s failed",
+ par_newpath);
goto out;
}
+ if (was_present)
+ unwind_dict = posix_dict_set_nlink (xdata, unwind_dict, nlink);
op_ret = 0;
-
- out:
- if (oldpathdup)
- GF_FREE (oldpathdup);
-
- if (newpathdup)
- GF_FREE (newpathdup);
-
+out:
SET_TO_OLD_FS_ID ();
STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, &stbuf,
&preoldparent, &postoldparent,
- &prenewparent, &postnewparent);
+ &prenewparent, &postnewparent, unwind_dict);
- if ((op_ret == -1) && !was_present) {
- unlink (real_newpath);
- }
+ if (unwind_dict)
+ dict_unref (unwind_dict);
return 0;
}
@@ -1842,19 +2579,20 @@ posix_rename (call_frame_t *frame, xlator_t *this,
int
posix_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- char *real_oldpath = 0;
- char *real_newpath = 0;
- struct iatt stbuf = {0, };
- struct posix_private *priv = NULL;
- char was_present = 1;
- char *newpathdup = NULL;
- char *newparentpath = NULL;
- struct iatt preparent = {0,};
- struct iatt postparent = {0,};
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char *real_oldpath = 0;
+ char *real_newpath = 0;
+ char *par_newpath = 0;
+ struct iatt stbuf = {0, };
+ struct posix_private *priv = NULL;
+ struct iatt preparent = {0,};
+ struct iatt postparent = {0,};
+ int32_t nlink_samepgfid = 0;
+ char *pgfid_xattr_key = NULL;
+ gf_boolean_t entry_created = _gf_false;
DECLARE_OLD_FS_ID_VAR;
@@ -1867,69 +2605,90 @@ posix_link (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (priv, out);
SET_FS_ID (frame->root->uid, frame->root->gid);
- MAKE_REAL_PATH (real_oldpath, this, oldloc->path);
- MAKE_REAL_PATH (real_newpath, this, newloc->path);
-
- op_ret = posix_lstat_with_gen (this, real_newpath, &stbuf);
- if ((op_ret == -1) && (errno == ENOENT)) {
- was_present = 0;
+ MAKE_INODE_HANDLE (real_oldpath, this, oldloc, &stbuf);
+ if (!real_oldpath) {
+ op_errno = errno;
+ goto out;
}
- newpathdup = gf_strdup (real_newpath);
- if (!newpathdup) {
- gf_log (this->name, GF_LOG_ERROR, "strdup failed");
- op_errno = ENOMEM;
+ MAKE_ENTRY_HANDLE (real_newpath, par_newpath, this, newloc, &stbuf);
+ if (!real_newpath || !par_newpath) {
+ op_ret = -1;
+ op_errno = ESTALE;
goto out;
}
- newparentpath = dirname (newpathdup);
- op_ret = posix_lstat_with_gen (this, newparentpath, &preparent);
+ op_ret = posix_pstat (this, newloc->pargfid, par_newpath, &preparent);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR, "lstat failed: %s: %s",
- newparentpath, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "lstat failed: %s", par_newpath);
goto out;
}
- op_ret = link (real_oldpath, real_newpath);
+
+ op_ret = sys_link (real_oldpath, real_newpath);
+
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "link %s to %s failed: %s",
- oldloc->path, newloc->path, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LINK_FAILED,
+ "link %s to %s failed",
+ real_oldpath, real_newpath);
goto out;
}
- op_ret = posix_lstat_with_gen (this, real_newpath, &stbuf);
+ entry_created = _gf_true;
+
+ op_ret = posix_pstat (this, NULL, real_newpath, &stbuf);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "lstat on %s failed: %s",
- real_newpath, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "lstat on %s failed", real_newpath);
goto out;
}
- op_ret = posix_lstat_with_gen (this, newparentpath, &postparent);
+ op_ret = posix_pstat (this, newloc->pargfid, par_newpath, &postparent);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR, "lstat failed: %s: %s",
- newparentpath, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "lstat failed: %s", par_newpath);
goto out;
}
+ if (priv->update_pgfid_nlinks) {
+ MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX,
+ newloc->pargfid);
+
+ LOCK (&newloc->inode->lock);
+ {
+ LINK_MODIFY_PGFID_XATTR (real_newpath, pgfid_xattr_key,
+ nlink_samepgfid, 0, op_ret,
+ this, unlock);
+ }
+ unlock:
+ UNLOCK (&newloc->inode->lock);
+
+ if (op_ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ P_MSG_XATTR_FAILED, "modification of "
+ "parent gfid xattr failed (path:%s gfid:%s)",
+ real_newpath, uuid_utoa (newloc->inode->gfid));
+ goto out;
+ }
+ }
+
op_ret = 0;
- out:
- if (newpathdup)
- GF_FREE (newpathdup);
+out:
SET_TO_OLD_FS_ID ();
STACK_UNWIND_STRICT (link, frame, op_ret, op_errno,
(oldloc)?oldloc->inode:NULL, &stbuf, &preparent,
- &postparent);
+ &postparent, NULL);
- if ((op_ret == -1) && (!was_present)) {
- unlink (real_newpath);
+ if (op_ret < 0) {
+ if (entry_created)
+ sys_unlink (real_newpath);
}
return 0;
@@ -1937,10 +2696,8 @@ posix_link (call_frame_t *frame, xlator_t *this,
int32_t
-posix_truncate (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- off_t offset)
+posix_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
@@ -1959,66 +2716,66 @@ posix_truncate (call_frame_t *frame,
VALIDATE_OR_GOTO (priv, out);
SET_FS_ID (frame->root->uid, frame->root->gid);
- MAKE_REAL_PATH (real_path, this, loc->path);
- op_ret = posix_lstat_with_gen (this, real_path, &prebuf);
+ MAKE_INODE_HANDLE (real_path, this, loc, &prebuf);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "pre-operation lstat on %s failed: %s",
- loc->path, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "pre-operation lstat on %s failed",
+ real_path ? real_path : "<null>");
goto out;
}
- op_ret = truncate (real_path, offset);
+ op_ret = sys_truncate (real_path, offset);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "truncate on %s failed: %s",
- loc->path, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_TRUNCATE_FAILED,
+ "truncate on %s failed", real_path);
goto out;
}
- op_ret = posix_lstat_with_gen (this, real_path, &postbuf);
+ op_ret = posix_pstat (this, loc->gfid, real_path, &postbuf);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR, "lstat on %s failed: %s",
- real_path, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "lstat on %s failed", real_path);
goto out;
}
op_ret = 0;
-
- out:
+out:
SET_TO_OLD_FS_ID ();
STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno,
- &prebuf, &postbuf);
+ &prebuf, &postbuf, NULL);
return 0;
}
-int32_t
+int
posix_create (call_frame_t *frame, xlator_t *this,
loc_t *loc, int32_t flags, mode_t mode,
- fd_t *fd)
+ mode_t umask, fd_t *fd, dict_t *xdata)
{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int32_t _fd = -1;
- int _flags = 0;
- char * real_path = NULL;
- struct iatt stbuf = {0, };
- struct posix_fd * pfd = NULL;
- struct posix_private * priv = NULL;
- char was_present = 1;
-
- gid_t gid = 0;
- char *pathdup = NULL;
- char *parentpath = NULL;
- struct iatt preparent = {0,};
- struct iatt postparent = {0,};
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int32_t _fd = -1;
+ int _flags = 0;
+ char * real_path = NULL;
+ char * par_path = NULL;
+ struct iatt stbuf = {0, };
+ struct posix_fd * pfd = NULL;
+ struct posix_private * priv = NULL;
+ char was_present = 1;
+
+ gid_t gid = 0;
+ struct iatt preparent = {0,};
+ struct iatt postparent = {0,};
+
+ int nlink_samepgfid = 0;
+ char * pgfid_xattr_key = NULL;
+ gf_boolean_t entry_created = _gf_false, gfid_set = _gf_false;
DECLARE_OLD_FS_ID_VAR;
@@ -2031,31 +2788,30 @@ posix_create (call_frame_t *frame, xlator_t *this,
priv = this->private;
VALIDATE_OR_GOTO (priv, out);
- MAKE_REAL_PATH (real_path, this, loc->path);
+ MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf);
gid = frame->root->gid;
- op_ret = setgid_override (this, real_path, &gid);
-
- if (op_ret < 0) {
+ SET_FS_ID (frame->root->uid, gid);
+ if (!real_path || !par_path) {
+ op_ret = -1;
+ op_errno = ESTALE;
goto out;
}
- SET_FS_ID (frame->root->uid, gid);
- pathdup = gf_strdup (real_path);
- GF_VALIDATE_OR_GOTO (this->name, pathdup, out);
-
- parentpath = dirname (pathdup);
-
- op_ret = posix_lstat_with_gen (this, parentpath, &preparent);
+ op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "pre-operation lstat on parent of %s failed: %s",
- loc->path, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "pre-operation lstat on parent %s failed",
+ par_path);
goto out;
}
+ if (preparent.ia_prot.sgid) {
+ gid = preparent.ia_gid;
+ }
+
if (!flags) {
_flags = O_CREAT | O_RDWR | O_EXCL;
}
@@ -2063,7 +2819,7 @@ posix_create (call_frame_t *frame, xlator_t *this,
_flags = flags | O_CREAT;
}
- op_ret = posix_lstat_with_gen (this, real_path, &stbuf);
+ op_ret = posix_pstat (this, NULL, real_path, &stbuf);
if ((op_ret == -1) && (errno == ENOENT)) {
was_present = 0;
}
@@ -2076,53 +2832,89 @@ posix_create (call_frame_t *frame, xlator_t *this,
if (_fd == -1) {
op_errno = errno;
op_ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "open on %s failed: %s", loc->path,
- strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_OPEN_FAILED,
+ "open on %s failed", real_path);
goto out;
}
+ if ((_flags & O_CREAT) && (_flags & O_EXCL)) {
+ entry_created = _gf_true;
+ }
+
+
+ if (was_present)
+ goto fill_stat;
+
#ifndef HAVE_SET_FSID
- op_ret = chown (real_path, frame->root->uid, gid);
+ op_ret = sys_chown (real_path, frame->root->uid, gid);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "chown on %s failed: %s",
- real_path, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_CHOWN_FAILED,
+ "chown on %s failed", real_path);
}
#endif
+ op_ret = posix_acl_xattr_set (this, real_path, xdata);
+ if (op_ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_ACL_FAILED,
+ "setting ACLs on %s failed", real_path);
+ }
+
+ if (priv->update_pgfid_nlinks) {
+ MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX,
+ loc->pargfid);
+ nlink_samepgfid = 1;
+ SET_PGFID_XATTR (real_path, pgfid_xattr_key, nlink_samepgfid,
+ XATTR_CREATE, op_ret, this, ignore);
+ }
+ignore:
+ op_ret = posix_entry_create_xattr_set (this, real_path, xdata);
+ if (op_ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "setting xattrs on %s failed ", real_path);
+ }
+
+fill_stat:
+ op_ret = posix_gfid_set (this, real_path, loc, xdata);
+ if (op_ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_GFID_FAILED,
+ "setting gfid on %s failed", real_path);
+ } else {
+ gfid_set = _gf_true;
+ }
- op_ret = posix_fstat_with_gen (this, _fd, &stbuf);
+ op_ret = posix_fdstat (this, _fd, &stbuf);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "fstat on %d failed: %s", _fd, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "fstat on %d failed", _fd);
goto out;
}
- op_ret = posix_lstat_with_gen (this, parentpath, &postparent);
+ op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "post-operation lstat on parent of %s failed: %s",
- loc->path, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "post-operation lstat on parent %s failed",
+ par_path);
goto out;
}
- op_ret = -1;
+ op_ret = -1;
pfd = GF_CALLOC (1, sizeof (*pfd), gf_posix_mt_posix_fd);
-
if (!pfd) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
goto out;
}
pfd->flags = flags;
pfd->fd = _fd;
- fd_ctx_set (fd, this, (uint64_t)(long)pfd);
+ op_ret = fd_ctx_set (fd, this, (uint64_t)(long)pfd);
+ if (op_ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ P_MSG_FD_PATH_SETTING_FAILED,
+ "failed to set the fd context path=%s fd=%p",
+ real_path, fd);
LOCK (&priv->lock);
{
@@ -2132,29 +2924,31 @@ posix_create (call_frame_t *frame, xlator_t *this,
op_ret = 0;
- out:
- if (pathdup)
- GF_FREE (pathdup);
+out:
SET_TO_OLD_FS_ID ();
if ((-1 == op_ret) && (_fd != -1)) {
- close (_fd);
-
- if (!was_present) {
- unlink (real_path);
- }
+ sys_close (_fd);
}
STACK_UNWIND_STRICT (create, frame, op_ret, op_errno,
fd, (loc)?loc->inode:NULL, &stbuf, &preparent,
- &postparent);
+ &postparent, xdata);
+
+ if (op_ret < 0) {
+ if (entry_created)
+ sys_unlink (real_path);
+
+ if (gfid_set)
+ posix_gfid_unset (this, xdata);
+ }
return 0;
}
int32_t
posix_open (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, fd_t *fd, int wbflags)
+ loc_t *loc, int32_t flags, fd_t *fd, dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
@@ -2162,8 +2956,6 @@ posix_open (call_frame_t *frame, xlator_t *this,
int32_t _fd = -1;
struct posix_fd *pfd = NULL;
struct posix_private *priv = NULL;
- char was_present = 1;
- gid_t gid = 0;
struct iatt stbuf = {0, };
DECLARE_OLD_FS_ID_VAR;
@@ -2177,69 +2969,49 @@ posix_open (call_frame_t *frame, xlator_t *this,
priv = this->private;
VALIDATE_OR_GOTO (priv, out);
- MAKE_REAL_PATH (real_path, this, loc->path);
+ MAKE_INODE_HANDLE (real_path, this, loc, &stbuf);
+ if (!real_path) {
+ op_ret = -1;
+ op_errno = ESTALE;
+ goto out;
+ }
- op_ret = setgid_override (this, real_path, &gid);
- if (op_ret < 0)
+ if (IA_ISLNK (stbuf.ia_type)) {
+ op_ret = -1;
+ op_errno = ELOOP;
goto out;
+ }
- SET_FS_ID (frame->root->uid, gid);
+ op_ret = -1;
+ SET_FS_ID (frame->root->uid, frame->root->gid);
if (priv->o_direct)
flags |= O_DIRECT;
- op_ret = posix_lstat_with_gen (this, real_path, &stbuf);
- if ((op_ret == -1) && (errno == ENOENT)) {
- was_present = 0;
- }
-
_fd = open (real_path, flags, 0);
if (_fd == -1) {
op_ret = -1;
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "open on %s: %s", real_path, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FILE_OP_FAILED,
+ "open on %s, flags: %d", real_path, flags);
goto out;
}
pfd = GF_CALLOC (1, sizeof (*pfd), gf_posix_mt_posix_fd);
-
if (!pfd) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
goto out;
}
pfd->flags = flags;
pfd->fd = _fd;
- if (wbflags == GF_OPEN_FSYNC)
- pfd->flushwrites = 1;
- fd_ctx_set (fd, this, (uint64_t)(long)pfd);
-
-#ifndef HAVE_SET_FSID
- if (flags & O_CREAT) {
- op_ret = chown (real_path, frame->root->uid, gid);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "chown on %s failed: %s",
- real_path, strerror (op_errno));
- goto out;
- }
- }
-#endif
-
- if (flags & O_CREAT) {
- op_ret = posix_lstat_with_gen (this, real_path, &stbuf);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR, "lstat on (%s) "
- "failed: %s", real_path, strerror (op_errno));
- goto out;
- }
- }
+ op_ret = fd_ctx_set (fd, this, (uint64_t)(long)pfd);
+ if (op_ret)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ P_MSG_FD_PATH_SETTING_FAILED,
+ "failed to set the fd context path=%s fd=%p",
+ real_path, fd);
LOCK (&priv->lock);
{
@@ -2249,29 +3021,24 @@ posix_open (call_frame_t *frame, xlator_t *this,
op_ret = 0;
- out:
+out:
if (op_ret == -1) {
if (_fd != -1) {
- close (_fd);
- _fd = -1;
+ sys_close (_fd);
}
}
SET_TO_OLD_FS_ID ();
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd);
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, NULL);
return 0;
}
-#define ALIGN_BUF(ptr,bound) ((void *)((unsigned long)(ptr + bound - 1) & \
- (unsigned long)(~(bound - 1))))
-
int
posix_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset)
+ fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata)
{
- uint64_t tmp_pfd = 0;
int32_t op_ret = -1;
int32_t op_errno = 0;
int _fd = -1;
@@ -2281,7 +3048,6 @@ posix_readv (call_frame_t *frame, xlator_t *this,
struct iovec vec = {0,};
struct posix_fd * pfd = NULL;
struct iatt stbuf = {0,};
- int align = 1;
int ret = -1;
VALIDATE_OR_GOTO (frame, out);
@@ -2292,39 +3058,36 @@ posix_readv (call_frame_t *frame, xlator_t *this,
priv = this->private;
VALIDATE_OR_GOTO (priv, out);
- ret = fd_ctx_get (fd, this, &tmp_pfd);
+ ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
if (ret < 0) {
- op_errno = -ret;
- gf_log (this->name, GF_LOG_DEBUG,
- "pfd is NULL from fd=%p", fd);
+ gf_msg (this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
+ "pfd is NULL from fd=%p", fd);
goto out;
}
- pfd = (struct posix_fd *)(long)tmp_pfd;
if (!size) {
op_errno = EINVAL;
- gf_log (this->name, GF_LOG_DEBUG, "size=%"GF_PRI_SIZET, size);
+ gf_msg (this->name, GF_LOG_WARNING, EINVAL,
+ P_MSG_INVALID_ARGUMENT, "size=%"GF_PRI_SIZET, size);
goto out;
}
- if (pfd->flags & O_DIRECT) {
- align = 4096; /* align to page boundary */
- }
-
- iobuf = iobuf_get (this->ctx->iobuf_pool);
+ iobuf = iobuf_get_page_aligned (this->ctx->iobuf_pool, size,
+ ALIGN_SIZE);
if (!iobuf) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
+ op_errno = ENOMEM;
goto out;
}
_fd = pfd->fd;
- op_ret = pread (_fd, iobuf->ptr, size, offset);
+ op_ret = sys_pread (_fd, iobuf->ptr, size, offset);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "read failed on fd=%p: %s", fd,
- strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_READ_FAILED, "read failed on gfid=%s, "
+ "fd=%p, offset=%"PRIu64" size=%"GF_PRI_SIZET", "
+ "buf=%p", uuid_utoa (fd->inode->gfid), fd,
+ offset, size, iobuf->ptr);
goto out;
}
@@ -2337,7 +3100,6 @@ posix_readv (call_frame_t *frame, xlator_t *this,
vec.iov_base = iobuf->ptr;
vec.iov_len = op_ret;
- op_ret = -1;
iobref = iobref_new ();
iobref_add (iobref, iobuf);
@@ -2347,26 +3109,23 @@ posix_readv (call_frame_t *frame, xlator_t *this,
* we read from
*/
- op_ret = posix_fstat_with_gen (this, _fd, &stbuf);
+ op_ret = posix_fdstat (this, _fd, &stbuf);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "fstat failed on fd=%p: %s", fd,
- strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "fstat failed on fd=%p", fd);
goto out;
}
/* Hack to notify higher layers of EOF. */
- if (stbuf.ia_size == 0)
- op_errno = ENOENT;
- else if ((offset + vec.iov_len) == stbuf.ia_size)
+ if (!stbuf.ia_size || (offset + vec.iov_len) >= stbuf.ia_size)
op_errno = ENOENT;
op_ret = vec.iov_len;
out:
STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno,
- &vec, 1, &stbuf, iobref);
+ &vec, 1, &stbuf, iobref, NULL);
if (iobref)
iobref_unref (iobref);
@@ -2390,7 +3149,7 @@ __posix_pwritev (int fd, struct iovec *vector, int count, off_t offset)
internal_off = offset;
for (idx = 0; idx < count; idx++) {
- retval = pwrite (fd, vector[idx].iov_base, vector[idx].iov_len,
+ retval = sys_pwrite (fd, vector[idx].iov_base, vector[idx].iov_len,
internal_off);
if (retval == -1) {
op_ret = -errno;
@@ -2404,14 +3163,12 @@ err:
return op_ret;
}
-
int32_t
__posix_writev (int fd, struct iovec *vector, int count, off_t startoff,
int odirect)
{
int32_t op_ret = 0;
int idx = 0;
- int align = 4096;
int max_buf_size = 0;
int retval = 0;
char *buf = NULL;
@@ -2427,7 +3184,7 @@ __posix_writev (int fd, struct iovec *vector, int count, off_t startoff,
max_buf_size = vector[idx].iov_len;
}
- alloc_buf = GF_MALLOC (1 * (max_buf_size + align), gf_posix_mt_char);
+ alloc_buf = _page_aligned_alloc (max_buf_size, &buf);
if (!alloc_buf) {
op_ret = -errno;
goto err;
@@ -2435,13 +3192,10 @@ __posix_writev (int fd, struct iovec *vector, int count, off_t startoff,
internal_off = startoff;
for (idx = 0; idx < count; idx++) {
- /* page aligned buffer */
- buf = ALIGN_BUF (alloc_buf, align);
-
memcpy (buf, vector[idx].iov_base, vector[idx].iov_len);
/* not sure whether writev works on O_DIRECT'd fd */
- retval = pwrite (fd, buf, vector[idx].iov_len, internal_off);
+ retval = sys_pwrite (fd, buf, vector[idx].iov_len, internal_off);
if (retval == -1) {
op_ret = -errno;
goto err;
@@ -2452,17 +3206,67 @@ __posix_writev (int fd, struct iovec *vector, int count, off_t startoff,
}
err:
- if (alloc_buf)
- GF_FREE (alloc_buf);
+ GF_FREE (alloc_buf);
return op_ret;
}
+dict_t*
+_fill_writev_xdata (fd_t *fd, dict_t *xdata, xlator_t *this, int is_append)
+{
+ dict_t *rsp_xdata = NULL;
+ int32_t ret = 0;
+ inode_t *inode = NULL;
+
+ if (fd)
+ inode = fd->inode;
+
+ if (!fd || !fd->inode || gf_uuid_is_null (fd->inode->gfid)) {
+ gf_msg_callingfn (this->name, GF_LOG_ERROR, EINVAL,
+ P_MSG_XATTR_FAILED, "fd: %p inode: %p"
+ "gfid:%s", fd, inode?inode:0,
+ inode?uuid_utoa(inode->gfid):"N/A");
+ goto out;
+ }
+
+ if (!xdata)
+ goto out;
+
+ rsp_xdata = dict_new();
+ if (!rsp_xdata)
+ goto out;
+
+ if (dict_get (xdata, GLUSTERFS_OPEN_FD_COUNT)) {
+ ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_OPEN_FD_COUNT,
+ fd->inode->fd_count);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ P_MSG_DICT_SET_FAILED, "%s: Failed to set "
+ "dictionary value for %s",
+ uuid_utoa (fd->inode->gfid),
+ GLUSTERFS_OPEN_FD_COUNT);
+ }
+ }
+
+ if (dict_get (xdata, GLUSTERFS_WRITE_IS_APPEND)) {
+ ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_WRITE_IS_APPEND,
+ is_append);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ P_MSG_DICT_SET_FAILED, "%s: Failed to set "
+ "dictionary value for %s",
+ uuid_utoa (fd->inode->gfid),
+ GLUSTERFS_WRITE_IS_APPEND);
+ }
+ }
+out:
+ return rsp_xdata;
+}
int32_t
-posix_writev (call_frame_t *frame, xlator_t *this,
- fd_t *fd, struct iovec *vector, int32_t count, off_t offset,
- struct iobref *iobref)
+posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
@@ -2472,8 +3276,11 @@ posix_writev (call_frame_t *frame, xlator_t *this,
struct iatt preop = {0,};
struct iatt postop = {0,};
int ret = -1;
-
- uint64_t tmp_pfd = 0;
+ dict_t *rsp_xdata = NULL;
+ int is_append = 0;
+ gf_boolean_t locked = _gf_false;
+ gf_boolean_t write_append = _gf_false;
+ gf_boolean_t update_atomic = _gf_false;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -2485,75 +3292,132 @@ posix_writev (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (priv, out);
- ret = fd_ctx_get (fd, this, &tmp_pfd);
+ ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "pfd is NULL from fd=%p", fd);
+ gf_msg (this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL,
+ "pfd is NULL from fd=%p", fd);
op_errno = -ret;
goto out;
}
- pfd = (struct posix_fd *)(long)tmp_pfd;
_fd = pfd->fd;
- op_ret = posix_fstat_with_gen (this, _fd, &preop);
+ if (xdata) {
+ if (dict_get (xdata, GLUSTERFS_WRITE_IS_APPEND))
+ write_append = _gf_true;
+ if (dict_get (xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC))
+ update_atomic = _gf_true;
+ }
+
+ /* The write_is_append check and write must happen
+ atomically. Else another write can overtake this
+ write after the check and get written earlier.
+
+ So lock before preop-stat and unlock after write.
+ */
+
+ /*
+ * The update_atomic option is to instruct posix to do prestat,
+ * write and poststat atomically. This is to prevent any modification to
+ * ia_size and ia_blocks until poststat and the diff in their values
+ * between pre and poststat could be of use for some translators (shard
+ * as of today).
+ */
+
+ if (write_append || update_atomic) {
+ locked = _gf_true;
+ LOCK(&fd->inode->lock);
+ }
+
+ op_ret = posix_fdstat (this, _fd, &preop);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "pre-operation fstat failed on fd=%p: %s", fd,
- strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "pre-operation fstat failed on fd=%p", fd);
goto out;
}
+ if (locked && write_append) {
+ if (preop.ia_size == offset || (fd->flags & O_APPEND))
+ is_append = 1;
+ }
+
op_ret = __posix_writev (_fd, vector, count, offset,
(pfd->flags & O_DIRECT));
+
+ if (locked && (!update_atomic)) {
+ UNLOCK (&fd->inode->lock);
+ locked = _gf_false;
+ }
+
if (op_ret < 0) {
op_errno = -op_ret;
op_ret = -1;
- gf_log (this->name, GF_LOG_ERROR, "write failed: offset %"PRIu64
- ", %s", offset, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, op_errno, P_MSG_WRITE_FAILED,
+ "write failed: offset %"PRIu64
+ ",", offset);
goto out;
}
- LOCK (&priv->lock);
- {
- priv->write_value += op_ret;
- }
- UNLOCK (&priv->lock);
+ rsp_xdata = _fill_writev_xdata (fd, xdata, this, is_append);
+ /* writev successful, we also need to get the stat of
+ * the file we wrote to
+ */
- if (op_ret >= 0) {
- /* wiretv successful, we also need to get the stat of
- * the file we wrote to
- */
+ ret = posix_fdstat (this, _fd, &postop);
+ if (ret == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_FSTAT_FAILED,
+ "post-operation fstat failed on fd=%p",
+ fd);
+ goto out;
+ }
- if (pfd->flushwrites) {
- /* NOTE: ignore the error, if one occurs at this
- * point */
- fsync (_fd);
- }
+ if (locked) {
+ UNLOCK (&fd->inode->lock);
+ locked = _gf_false;
+ }
- ret = posix_fstat_with_gen (this, _fd, &postop);
- if (ret == -1) {
- op_ret = -1;
+ if (flags & (O_SYNC|O_DSYNC)) {
+ ret = sys_fsync (_fd);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_WRITEV_FAILED,
+ "fsync() in writev on fd %d failed",
+ _fd);
+ op_ret = -1;
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "post-operation fstat failed on fd=%p: %s",
- fd, strerror (op_errno));
goto out;
}
}
- out:
+ LOCK (&priv->lock);
+ {
+ priv->write_value += op_ret;
+ }
+ UNLOCK (&priv->lock);
+
+out:
+
+ if (locked) {
+ UNLOCK (&fd->inode->lock);
+ locked = _gf_false;
+ }
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, &preop, &postop);
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, &preop, &postop,
+ rsp_xdata);
+ if (rsp_xdata)
+ dict_unref (rsp_xdata);
return 0;
}
int32_t
posix_statfs (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
+ loc_t *loc, dict_t *xdata)
{
char * real_path = NULL;
int32_t op_ret = -1;
@@ -2566,17 +3430,21 @@ posix_statfs (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (loc, out);
VALIDATE_OR_GOTO (this->private, out);
- MAKE_REAL_PATH (real_path, this, loc->path);
+ MAKE_INODE_HANDLE (real_path, this, loc, NULL);
+ if (!real_path) {
+ op_ret = -1;
+ op_errno = ESTALE;
+ goto out;
+ }
priv = this->private;
- op_ret = statvfs (real_path, &buf);
+ op_ret = sys_statvfs (real_path, &buf);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "statvfs failed on %s: %s",
- real_path, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_STATVFS_FAILED,
+ "statvfs failed on %s", real_path);
goto out;
}
@@ -2591,81 +3459,64 @@ posix_statfs (call_frame_t *frame, xlator_t *this,
op_ret = 0;
- out:
- STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, &buf);
+out:
+ STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, &buf, NULL);
return 0;
}
int32_t
posix_flush (call_frame_t *frame, xlator_t *this,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
- int _fd = -1;
- struct posix_fd * pfd = NULL;
int ret = -1;
- uint64_t tmp_pfd = 0;
+ struct posix_fd *pfd = NULL;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
VALIDATE_OR_GOTO (fd, out);
- ret = fd_ctx_get (fd, this, &tmp_pfd);
+ ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
if (ret < 0) {
- op_errno = -ret;
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_msg (this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
"pfd is NULL on fd=%p", fd);
goto out;
}
- pfd = (struct posix_fd *)(long)tmp_pfd;
-
- _fd = pfd->fd;
-
- /* do nothing */
op_ret = 0;
- out:
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno);
+out:
+ STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, NULL);
return 0;
}
int32_t
-posix_release (xlator_t *this,
- fd_t *fd)
+posix_release (xlator_t *this, fd_t *fd)
{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int _fd = -1;
struct posix_private * priv = NULL;
struct posix_fd * pfd = NULL;
int ret = -1;
- uint64_t tmp_pfd = 0;
+ uint64_t tmp_pfd = 0;
VALIDATE_OR_GOTO (this, out);
VALIDATE_OR_GOTO (fd, out);
priv = this->private;
- ret = fd_ctx_get (fd, this, &tmp_pfd);
+ ret = fd_ctx_del (fd, this, &tmp_pfd);
if (ret < 0) {
- op_errno = -ret;
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_msg (this->name, GF_LOG_WARNING, 0, P_MSG_PFD_NULL,
"pfd is NULL from fd=%p", fd);
goto out;
}
- pfd = (struct posix_fd *)(long)tmp_pfd;
-
- _fd = pfd->fd;
+ pfd = (struct posix_fd *)(long)tmp_pfd;
if (pfd->dir) {
- op_ret = -1;
- op_errno = EBADF;
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_msg (this->name, GF_LOG_WARNING, 0, P_MSG_DIR_NOT_NULL,
"pfd->dir is %p (not NULL) for file fd=%p",
pfd->dir, fd);
}
@@ -2684,25 +3535,50 @@ posix_release (xlator_t *this,
}
UNLOCK (&priv->lock);
- op_ret = 0;
-
- out:
+out:
return 0;
}
+int
+posix_batch_fsync (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int datasync, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ struct posix_private *priv = NULL;
+
+ priv = this->private;
+
+ stub = fop_fsync_stub (frame, default_fsync, fd, datasync, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM, 0, 0, 0);
+ return 0;
+ }
+
+ pthread_mutex_lock (&priv->fsync_mutex);
+ {
+ list_add_tail (&stub->list, &priv->fsyncs);
+ priv->fsync_queue_count++;
+ pthread_cond_signal (&priv->fsync_cond);
+ }
+ pthread_mutex_unlock (&priv->fsync_mutex);
+
+ return 0;
+}
+
+
int32_t
posix_fsync (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t datasync)
+ fd_t *fd, int32_t datasync, dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
int _fd = -1;
struct posix_fd * pfd = NULL;
int ret = -1;
- uint64_t tmp_pfd = 0;
struct iatt preop = {0,};
struct iatt postop = {0,};
+ struct posix_private *priv = NULL;
DECLARE_OLD_FS_ID_VAR;
@@ -2718,304 +3594,659 @@ posix_fsync (call_frame_t *frame, xlator_t *this,
goto out;
#endif
- ret = fd_ctx_get (fd, this, &tmp_pfd);
+ priv = this->private;
+ if (priv->batch_fsync_mode && xdata && dict_get (xdata, "batch-fsync")) {
+ posix_batch_fsync (frame, this, fd, datasync, xdata);
+ return 0;
+ }
+
+ ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
if (ret < 0) {
- op_errno = -ret;
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_msg (this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
"pfd not found in fd's ctx");
goto out;
}
- pfd = (struct posix_fd *)(long)tmp_pfd;
_fd = pfd->fd;
- op_ret = posix_fstat_with_gen (this, _fd, &preop);
+ op_ret = posix_fdstat (this, _fd, &preop);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "pre-operation fstat failed on fd=%p: %s", fd,
- strerror (op_errno));
+ gf_msg (this->name, GF_LOG_WARNING, errno, P_MSG_FSTAT_FAILED,
+ "pre-operation fstat failed on fd=%p", fd);
goto out;
}
if (datasync) {
- ;
-#ifdef HAVE_FDATASYNC
- op_ret = fdatasync (_fd);
-#endif
+ op_ret = sys_fdatasync (_fd);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_FSYNC_FAILED, "fdatasync on fd=%p"
+ "failed:", fd);
+ goto out;
+ }
} else {
- op_ret = fsync (_fd);
+ op_ret = sys_fsync (_fd);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "fsync on fd=%p failed: %s",
- fd, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_FSYNC_FAILED, "fsync on fd=%p "
+ "failed", fd);
goto out;
}
}
- op_ret = posix_fstat_with_gen (this, _fd, &postop);
+ op_ret = posix_fdstat (this, _fd, &postop);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "post-operation fstat failed on fd=%p: %s", fd,
- strerror (op_errno));
+ gf_msg (this->name, GF_LOG_WARNING, errno, P_MSG_FSTAT_FAILED,
+ "post-operation fstat failed on fd=%p", fd);
goto out;
}
op_ret = 0;
- out:
+out:
SET_TO_OLD_FS_ID ();
- STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, &preop, &postop);
+ STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, &preop, &postop,
+ NULL);
return 0;
}
static int gf_posix_xattr_enotsup_log;
+static int
+_handle_setxattr_keyvalue_pair (dict_t *d, char *k, data_t *v,
+ void *tmp)
+{
+ posix_xattr_filler_t *filler = NULL;
-int
-set_file_contents (xlator_t *this, char *real_path,
- data_pair_t *trav, int flags)
+ filler = tmp;
+
+ return posix_handle_pair (filler->this, filler->real_path, k, v,
+ filler->flags, filler->stbuf);
+}
+
+#ifdef GF_DARWIN_HOST_OS
+static int
+map_xattr_flags(int flags)
{
- char * key = NULL;
- char real_filepath[ZR_PATH_MAX] = {0,};
- int32_t file_fd = -1;
- int op_ret = 0;
- int ret = -1;
-
- key = &(trav->key[15]);
- sprintf (real_filepath, "%s/%s", real_path, key);
-
- if (flags & XATTR_REPLACE) {
- /* if file exists, replace it
- * else, error out */
- file_fd = open (real_filepath, O_TRUNC|O_WRONLY);
-
- if (file_fd == -1) {
- goto create;
- }
-
- if (trav->value->len) {
- ret = write (file_fd, trav->value->data,
- trav->value->len);
- if (ret == -1) {
- op_ret = -errno;
- gf_log (this->name, GF_LOG_ERROR,
- "write failed while doing setxattr "
- "for key %s on path %s: %s",
- key, real_filepath, strerror (errno));
- goto out;
- }
+ /* DARWIN has different defines on XATTR_ flags.
+ There do not seem to be a POSIX standard
+ Parse any other flags over.
+ */
+ int darwinflags = flags & ~(GF_XATTR_CREATE | GF_XATTR_REPLACE | XATTR_REPLACE);
+ if (GF_XATTR_CREATE & flags)
+ darwinflags |= XATTR_CREATE;
+ if (GF_XATTR_REPLACE & flags)
+ darwinflags |= XATTR_REPLACE;
+ return darwinflags;
+}
+#endif
- ret = close (file_fd);
- if (ret == -1) {
- op_ret = -errno;
- gf_log (this->name, GF_LOG_ERROR,
- "close failed on %s: %s",
- real_filepath, strerror (errno));
- goto out;
- }
+int32_t
+posix_setxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *dict, int flags, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char * real_path = NULL;
+ char *acl_xattr = NULL;
+ struct iatt stbuf = {0};
+ int32_t ret = 0;
+ ssize_t acl_size = 0;
+ dict_t *xattr = NULL;
+ posix_xattr_filler_t filler = {0,};
+
+ DECLARE_OLD_FS_ID_VAR;
+ SET_FS_ID (frame->root->uid, frame->root->gid);
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (loc, out);
+ VALIDATE_OR_GOTO (dict, out);
+
+ MAKE_INODE_HANDLE (real_path, this, loc, NULL);
+ if (!real_path) {
+ op_ret = -1;
+ op_errno = ESTALE;
+ goto out;
+ }
+
+ posix_pstat(this, loc->gfid, real_path, &stbuf);
+
+ op_ret = -1;
+
+ dict_del (dict, GFID_XATTR_KEY);
+ dict_del (dict, GF_XATTR_VOL_ID_KEY);
+ /* the io-stats-dump key should not reach disk */
+ dict_del (dict, GF_XATTR_IOSTATS_DUMP_KEY);
+
+ filler.real_path = real_path;
+ filler.this = this;
+ filler.stbuf = &stbuf;
+
+#ifdef GF_DARWIN_HOST_OS
+ filler.flags = map_xattr_flags(flags);
+#else
+ filler.flags = flags;
+#endif
+ op_ret = dict_foreach (dict, _handle_setxattr_keyvalue_pair,
+ &filler);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ op_ret = -1;
+ goto out;
+ }
+
+ xattr = dict_new();
+ if (!xattr)
+ goto out;
+
+/*
+ * FIXFIX: Send the stbuf info in the xdata for now
+ * This is used by DHT to redirect FOPs if the file is being migrated
+ * Ignore errors for now
+ */
+ if (xdata && dict_get (xdata, DHT_IATT_IN_XDATA_KEY)) {
+ ret = posix_pstat(this, loc->gfid, real_path, &stbuf);
+ if (ret)
+ goto out;
+
+ ret = posix_set_iatt_in_dict (xattr, &stbuf);
+ }
+
+/*
+ * ACL can be set on a file/folder using GF_POSIX_ACL_*_KEY xattrs which
+ * won't aware of access-control xlator. To update its context correctly,
+ * POSIX_ACL_*_XATTR stored in xdata which is send in the call_back path.
+ */
+ if (dict_get (dict, GF_POSIX_ACL_ACCESS)) {
+
+ /*
+ * The size of buffer will be know after calling sys_lgetxattr,
+ * so first we allocate buffer with large size(~4k), then we
+ * reduced into required size using GF_REALLO().
+ */
+ acl_xattr = GF_CALLOC (1, ACL_BUFFER_MAX, gf_posix_mt_char);
+ if (!acl_xattr)
+ goto out;
+
+ acl_size = sys_lgetxattr (real_path, POSIX_ACL_ACCESS_XATTR,
+ acl_xattr, ACL_BUFFER_MAX);
+
+ if (acl_size < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ P_MSG_XATTR_FAILED, "Posix acl is not set "
+ "properly at the backend");
+ goto out;
}
- create: /* we know file doesn't exist, create it */
+ /* If acl_size is more than max buffer size, just ignore it */
+ if (acl_size >= ACL_BUFFER_MAX) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ P_MSG_BUFFER_OVERFLOW, "size of acl is more"
+ "than the buffer");
+ goto out;
+ }
- file_fd = open (real_filepath, O_CREAT|O_WRONLY, 0644);
+ acl_xattr = GF_REALLOC (acl_xattr, acl_size);
+ if (!acl_xattr)
+ goto out;
- if (file_fd == -1) {
- op_ret = -errno;
- gf_log (this->name, GF_LOG_ERROR,
- "failed to open file %s with O_CREAT: %s",
- key, strerror (errno));
+ ret = dict_set_bin (xattr, POSIX_ACL_ACCESS_XATTR,
+ acl_xattr, acl_size);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ P_MSG_SET_XDATA_FAIL, "failed to set"
+ "xdata for acl");
+ GF_FREE (acl_xattr);
goto out;
}
+ }
- ret = write (file_fd, trav->value->data, trav->value->len);
- if (ret == -1) {
- op_ret = -errno;
- gf_log (this->name, GF_LOG_ERROR,
- "write failed on %s while setxattr with "
- "key %s: %s",
- real_filepath, key, strerror (errno));
+ if (dict_get (dict, GF_POSIX_ACL_DEFAULT)) {
+
+ acl_xattr = GF_CALLOC (1, ACL_BUFFER_MAX, gf_posix_mt_char);
+ if (!acl_xattr)
+ goto out;
+
+ acl_size = sys_lgetxattr (real_path, POSIX_ACL_DEFAULT_XATTR,
+ acl_xattr, ACL_BUFFER_MAX);
+
+ if (acl_size < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ P_MSG_XATTR_FAILED, "Posix acl is not set "
+ "properly at the backend");
goto out;
}
- ret = close (file_fd);
- if (ret == -1) {
- op_ret = -errno;
- gf_log (this->name, GF_LOG_ERROR,
- "close failed on %s while setxattr with "
- "key %s: %s",
- real_filepath, key, strerror (errno));
+ if (acl_size >= ACL_BUFFER_MAX) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ P_MSG_BUFFER_OVERFLOW, "size of acl is more"
+ "than the buffer");
+ goto out;
+ }
+
+ acl_xattr = GF_REALLOC (acl_xattr, acl_size);
+ if (!acl_xattr)
+ goto out;
+
+ ret = dict_set_bin (xattr, POSIX_ACL_DEFAULT_XATTR,
+ acl_xattr, acl_size);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ P_MSG_SET_XDATA_FAIL, "failed to set"
+ "xdata for acl");
+ GF_FREE (acl_xattr);
goto out;
}
}
+out:
+ SET_TO_OLD_FS_ID ();
- out:
- return op_ret;
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xattr);
+
+ if (xattr)
+ dict_unref (xattr);
+
+ return 0;
}
+
int
-handle_pair (xlator_t *this, char *real_path,
- data_pair_t *trav, int flags)
+posix_xattr_get_real_filename (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *key, dict_t *dict, dict_t *xdata)
{
- int sys_ret = -1;
- int ret = 0;
+ int ret = -1;
+ int op_ret = -1;
+ const char *fname = NULL;
+ char *real_path = NULL;
+ char *found = NULL;
+ DIR *fd = NULL;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {{0,},};
- if (ZR_FILE_CONTENT_REQUEST(trav->key)) {
- ret = set_file_contents (this, real_path, trav, flags);
- } else {
- sys_ret = sys_lsetxattr (real_path, trav->key,
- trav->value->data,
- trav->value->len, flags);
+ MAKE_INODE_HANDLE (real_path, this, loc, NULL);
+ if (!real_path) {
+ return -ESTALE;
+ }
- if (sys_ret < 0) {
- if (errno == ENOTSUP) {
- GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log,
- this->name,GF_LOG_WARNING,
- "Extended attributes not "
- "supported");
- } else if (errno == ENOENT) {
- gf_log (this->name, GF_LOG_ERROR,
- "setxattr on %s failed: %s", real_path,
- strerror (errno));
- } else {
+ fd = sys_opendir (real_path);
+ if (!fd)
+ return -errno;
-#ifdef GF_DARWIN_HOST_OS
- gf_log (this->name,
- ((errno == EINVAL) ?
- GF_LOG_DEBUG : GF_LOG_ERROR),
- "%s: key:%s error:%s",
- real_path, trav->key,
- strerror (errno));
-#else /* ! DARWIN */
- gf_log (this->name, GF_LOG_ERROR,
- "%s: key:%s error:%s",
- real_path, trav->key,
- strerror (errno));
-#endif /* DARWIN */
- }
+ fname = key + strlen (GF_XATTR_GET_REAL_FILENAME_KEY);
- ret = -errno;
- goto out;
- }
+ for (;;) {
+ errno = 0;
+ entry = sys_readdir (fd, scratch);
+ if (!entry || errno != 0)
+ break;
+
+ if (strcasecmp (entry->d_name, fname) == 0) {
+ found = gf_strdup (entry->d_name);
+ if (!found) {
+ (void) sys_closedir (fd);
+ return -ENOMEM;
+ }
+ break;
+ }
+ }
+
+ (void) sys_closedir (fd);
+
+ if (!found)
+ return -ENOENT;
+
+ ret = dict_set_dynstr (dict, (char *)key, found);
+ if (ret) {
+ GF_FREE (found);
+ return -ENOMEM;
+ }
+ ret = strlen (found) + 1;
+
+ return ret;
+}
+
+int
+posix_get_ancestry_directory (xlator_t *this, inode_t *leaf_inode,
+ gf_dirent_t *head, char **path, int type,
+ int32_t *op_errno, dict_t *xdata)
+{
+ ssize_t handle_size = 0;
+ struct posix_private *priv = NULL;
+ inode_t *inode = NULL;
+ int ret = -1;
+ char dirpath[PATH_MAX] = {0,};
+
+ priv = this->private;
+
+ handle_size = POSIX_GFID_HANDLE_SIZE(priv->base_path_length);
+
+ ret = posix_make_ancestryfromgfid (this, dirpath, PATH_MAX + 1, head,
+ type | POSIX_ANCESTRY_PATH,
+ leaf_inode->gfid,
+ handle_size, priv->base_path,
+ leaf_inode->table, &inode, xdata,
+ op_errno);
+ if (ret < 0)
+ goto out;
+
+
+ /* there is already a reference in loc->inode */
+ inode_unref (inode);
+
+ if ((type & POSIX_ANCESTRY_PATH) && (path != NULL)) {
+ if (strcmp (dirpath, "/"))
+ dirpath[strlen (dirpath) - 1] = '\0';
+
+ *path = gf_strdup (dirpath);
}
- out:
+
+out:
return ret;
}
int32_t
-posix_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *dict, int flags)
+posix_links_in_same_directory (char *dirpath, int count, inode_t *leaf_inode,
+ inode_t *parent, struct stat *stbuf,
+ gf_dirent_t *head, char **path,
+ int type, dict_t *xdata, int32_t *op_errno)
{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- char * real_path = NULL;
- data_pair_t * trav = NULL;
- int ret = -1;
+ int op_ret = -1;
+ inode_t *linked_inode = NULL;
+ gf_dirent_t *gf_entry = NULL;
+ xlator_t *this = NULL;
+ struct posix_private *priv = NULL;
+ char *tempv = NULL;
+ DIR *dirp = NULL;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {{0,},};
+ char temppath[PATH_MAX] = {0,};
+ char scr[PATH_MAX * 4] = {0,};
- DECLARE_OLD_FS_ID_VAR;
- SET_FS_ID (frame->root->uid, frame->root->gid);
+ this = THIS;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (loc, out);
- VALIDATE_OR_GOTO (dict, out);
+ priv = this->private;
- MAKE_REAL_PATH (real_path, this, loc->path);
+ dirp = sys_opendir (dirpath);
+ if (!dirp) {
+ *op_errno = errno;
+ gf_msg (this->name, GF_LOG_WARNING, errno, P_MSG_OPEN_FAILED,
+ "could not opendir %s", dirpath);
+ goto out;
+ }
- trav = dict->members_list;
+ while (count > 0) {
+ errno = 0;
+ entry = sys_readdir (dirp, scratch);
+ if (!entry || errno != 0)
+ break;
- while (trav) {
- ret = handle_pair (this, real_path, trav, flags);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
+ if (entry->d_ino != stbuf->st_ino)
+ continue;
+
+ /* Linking an inode here, can cause a race in posix_acl.
+ Parent inode gets linked here, but before
+ it reaches posix_acl_readdirp_cbk, create/lookup can
+ come on a leaf-inode, as parent-inode-ctx not yet updated
+ in posix_acl_readdirp_cbk, create and lookup can fail
+ with EACCESS. So do the inode linking in the quota xlator
+
+ linked_inode = inode_link (leaf_inode, parent,
+ entry->d_name, NULL);
+
+ GF_ASSERT (linked_inode == leaf_inode);
+ inode_unref (linked_inode);*/
+
+ if (type & POSIX_ANCESTRY_DENTRY) {
+ loc_t loc = {0, };
+
+ loc.inode = inode_ref (leaf_inode);
+ gf_uuid_copy (loc.gfid, leaf_inode->gfid);
+
+ (void) snprintf (temppath, sizeof(temppath), "%s/%s",
+ dirpath, entry->d_name);
+
+ gf_entry = gf_dirent_for_name (entry->d_name);
+ gf_entry->inode = inode_ref (leaf_inode);
+ gf_entry->dict
+ = posix_xattr_fill (this, temppath, &loc, NULL,
+ -1, xdata, NULL);
+ iatt_from_stat (&(gf_entry->d_stat), stbuf);
+
+ list_add_tail (&gf_entry->list, &head->list);
+ loc_wipe (&loc);
}
- trav = trav->next;
- }
- op_ret = 0;
+ if (type & POSIX_ANCESTRY_PATH) {
+ (void) snprintf (temppath, sizeof(temppath), "%s/%s",
+ &dirpath[priv->base_path_length],
+ entry->d_name);
+ if (!*path) {
+ *path = gf_strdup (temppath);
+ } else {
+ /* creating a colon separated */
+ /* list of hard links */
+ (void) snprintf (scr, sizeof(scr), "%s:%s",
+ *path, temppath);
- out:
- SET_TO_OLD_FS_ID ();
+ GF_FREE (*path);
+ *path = gf_strdup (scr);
+ }
+ if (!*path) {
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+ }
- STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno);
+ count--;
+ }
- return 0;
+ op_ret = 0;
+out:
+ if (dirp) {
+ op_ret = sys_closedir (dirp);
+ if (op_ret == -1) {
+ *op_errno = errno;
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ P_MSG_CLOSE_FAILED, "closedir failed");
+ }
+ }
+
+ return op_ret;
}
int
-get_file_contents (xlator_t *this, char *real_path,
- const char *name, char **contents)
+posix_get_ancestry_non_directory (xlator_t *this, inode_t *leaf_inode,
+ gf_dirent_t *head, char **path, int type,
+ int32_t *op_errno, dict_t *xdata)
{
- char real_filepath[ZR_PATH_MAX] = {0,};
- char * key = NULL;
- int32_t file_fd = -1;
- struct iatt stbuf = {0,};
- int op_ret = 0;
- int ret = -1;
+ size_t remaining_size = 0;
+ int op_ret = -1, pathlen = -1;
+ ssize_t handle_size = 0;
+ uuid_t pgfid = {0,};
+ int nlink_samepgfid = 0;
+ struct stat stbuf = {0,};
+ char *list = NULL;
+ int32_t list_offset = 0;
+ struct posix_private *priv = NULL;
+ ssize_t size = 0;
+ inode_t *parent = NULL;
+ loc_t *loc = NULL;
+ char *leaf_path = NULL;
+ char key[4096] = {0,};
+ char dirpath[PATH_MAX] = {0,};
+ char pgfidstr[UUID_CANONICAL_FORM_LEN+1] = {0,};
- key = (char *) &(name[15]);
- sprintf (real_filepath, "%s/%s", real_path, key);
+ priv = this->private;
- op_ret = posix_lstat_with_gen (this, real_filepath, &stbuf);
- if (op_ret == -1) {
- op_ret = -errno;
- gf_log (this->name, GF_LOG_ERROR, "lstat failed on %s: %s",
- real_filepath, strerror (errno));
+ loc = GF_CALLOC (1, sizeof (*loc), gf_posix_mt_char);
+ if (loc == NULL) {
+ op_ret = -1;
+ *op_errno = ENOMEM;
goto out;
}
- file_fd = open (real_filepath, O_RDONLY);
+ gf_uuid_copy (loc->gfid, leaf_inode->gfid);
- if (file_fd == -1) {
- op_ret = -errno;
- gf_log (this->name, GF_LOG_ERROR, "open failed on %s: %s",
- real_filepath, strerror (errno));
+ MAKE_INODE_HANDLE (leaf_path, this, loc, NULL);
+ if (!leaf_path) {
+ GF_FREE (loc);
+ *op_errno = ESTALE;
goto out;
}
+ GF_FREE (loc);
+
+ size = sys_llistxattr (leaf_path, NULL, 0);
+ if (size == -1) {
+ *op_errno = errno;
+ if ((errno == ENOTSUP) || (errno == ENOSYS)) {
+ GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log,
+ this->name, GF_LOG_WARNING,
+ "Extended attributes not "
+ "supported (try remounting brick"
+ " with 'user_xattr' flag)");
- *contents = GF_CALLOC (stbuf.ia_size + 1, sizeof(char),
- gf_posix_mt_char);
+ } else {
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ P_MSG_XATTR_FAILED, "listxattr failed on"
+ "%s", leaf_path);
+
+ }
- if (! *contents) {
- op_ret = -errno;
- gf_log (this->name, GF_LOG_ERROR, "Out of memory.");
goto out;
}
- ret = read (file_fd, *contents, stbuf.ia_size);
- if (ret <= 0) {
- op_ret = -1;
- gf_log (this->name, GF_LOG_ERROR, "read on %s failed: %s",
- real_filepath, strerror (errno));
+ if (size == 0) {
+ op_ret = 0;
goto out;
}
- *contents[stbuf.ia_size] = '\0';
+ list = alloca (size);
+ if (!list) {
+ *op_errno = errno;
+ goto out;
+ }
- op_ret = close (file_fd);
- file_fd = -1;
- if (op_ret == -1) {
- op_ret = -errno;
- gf_log (this->name, GF_LOG_ERROR, "close on %s failed: %s",
- real_filepath, strerror (errno));
+ size = sys_llistxattr (leaf_path, list, size);
+ if (size < 0) {
+ op_ret = -1;
+ *op_errno = errno;
goto out;
}
+ remaining_size = size;
+ list_offset = 0;
- out:
- if (op_ret < 0) {
- if (*contents)
- GF_FREE (*contents);
- if (file_fd != -1)
- close (file_fd);
+ op_ret = sys_lstat (leaf_path, &stbuf);
+ if (op_ret == -1) {
+ *op_errno = errno;
+ gf_msg (this->name, GF_LOG_WARNING, errno, P_MSG_LSTAT_FAILED,
+ "lstat failed on %s", leaf_path);
+ goto out;
}
+ while (remaining_size > 0) {
+ strncpy (key, list + list_offset, sizeof(key));
+ if (strncmp (key, PGFID_XATTR_KEY_PREFIX,
+ strlen (PGFID_XATTR_KEY_PREFIX)) != 0)
+ goto next;
+
+ op_ret = sys_lgetxattr (leaf_path, key,
+ &nlink_samepgfid,
+ sizeof(nlink_samepgfid));
+ if (op_ret == -1) {
+ *op_errno = errno;
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_XATTR_FAILED, "getxattr failed on "
+ "%s: key = %s ", leaf_path, key);
+ goto out;
+ }
+
+ nlink_samepgfid = ntoh32 (nlink_samepgfid);
+
+ strncpy (pgfidstr, key + strlen(PGFID_XATTR_KEY_PREFIX),
+ sizeof(pgfidstr));
+ gf_uuid_parse (pgfidstr, pgfid);
+
+ handle_size = POSIX_GFID_HANDLE_SIZE(priv->base_path_length);
+
+ /* constructing the absolute real path of parent dir */
+ strncpy (dirpath, priv->base_path, sizeof(dirpath));
+ pathlen = PATH_MAX + 1 - priv->base_path_length;
+
+ op_ret = posix_make_ancestryfromgfid (this,
+ dirpath + priv->base_path_length,
+ pathlen,
+ head,
+ type | POSIX_ANCESTRY_PATH,
+ pgfid,
+ handle_size,
+ priv->base_path,
+ leaf_inode->table,
+ &parent, xdata, op_errno);
+ if (op_ret < 0) {
+ goto next;
+ }
+
+ dirpath[strlen (dirpath) - 1] = '\0';
+
+ posix_links_in_same_directory (dirpath, nlink_samepgfid,
+ leaf_inode, parent, &stbuf, head,
+ path, type, xdata, op_errno);
+
+ if (parent != NULL) {
+ inode_unref (parent);
+ parent = NULL;
+ }
+
+ next:
+ remaining_size -= strlen (key) + 1;
+ list_offset += strlen (key) + 1;
+ } /* while (remaining_size > 0) */
+
+ op_ret = 0;
+
+out:
return op_ret;
}
+int
+posix_get_ancestry (xlator_t *this, inode_t *leaf_inode,
+ gf_dirent_t *head, char **path, int type, int32_t *op_errno,
+ dict_t *xdata)
+{
+ int ret = -1;
+ struct posix_private *priv = NULL;
+
+ priv = this->private;
+
+ if (IA_ISDIR (leaf_inode->ia_type)) {
+ ret = posix_get_ancestry_directory (this, leaf_inode,
+ head, path, type, op_errno,
+ xdata);
+ } else {
+
+ if (!priv->update_pgfid_nlinks)
+ goto out;
+ ret = posix_get_ancestry_non_directory (this, leaf_inode,
+ head, path, type,
+ op_errno, xdata);
+ }
+
+out:
+ if (ret && path && *path) {
+ GF_FREE (*path);
+ *path = NULL;
+ }
+
+ return ret;
+}
+
/**
* posix_getxattr - this function returns a dictionary with all the
* key:value pair present as xattr. used for
@@ -3023,22 +4254,25 @@ get_file_contents (xlator_t *this, char *real_path,
*/
int32_t
posix_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
+ loc_t *loc, const char *name, dict_t *xdata)
{
- struct posix_private *priv = NULL;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int32_t list_offset = 0;
- size_t size = 0;
- size_t remaining_size = 0;
- char key[1024] = {0,};
- char gen_key[1024] = {0,};
- char * value = NULL;
- char * list = NULL;
- char * real_path = NULL;
- dict_t * dict = NULL;
- char * file_contents = NULL;
- int ret = -1;
+ struct posix_private *priv = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char *value = NULL;
+ char *real_path = NULL;
+ dict_t *dict = NULL;
+ char *file_contents = NULL;
+ int ret = -1;
+ char *path = NULL;
+ char *rpath = NULL;
+ char *dyn_rpath = NULL;
+ ssize_t size = 0;
+ char *list = NULL;
+ int32_t list_offset = 0;
+ size_t remaining_size = 0;
+ char host_buf[1024] = {0,};
+ char keybuffer[4096] = {0,};
DECLARE_OLD_FS_ID_VAR;
@@ -3047,43 +4281,287 @@ posix_getxattr (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (loc, out);
SET_FS_ID (frame->root->uid, frame->root->gid);
- MAKE_REAL_PATH (real_path, this, loc->path);
+ MAKE_INODE_HANDLE (real_path, this, loc, NULL);
+ op_ret = -1;
priv = this->private;
+ /* Allow access to stime xattr only to geo-rep worker */
+ if (frame->root->pid != GF_CLIENT_PID_GSYNCD && name &&
+ fnmatch ("*.glusterfs.*.stime", name, FNM_PERIOD) == 0) {
+ op_ret = -1;
+ op_errno = ENOATTR;
+ goto out;
+ }
+
if (loc->inode && IA_ISDIR(loc->inode->ia_type) && name &&
- ZR_FILE_CONTENT_REQUEST(name)) {
- ret = get_file_contents (this, real_path, name,
- &file_contents);
+ ZR_FILE_CONTENT_REQUEST(name)) {
+ ret = posix_get_file_contents (this, loc->gfid, &name[15],
+ &file_contents);
if (ret < 0) {
op_errno = -ret;
- gf_log (this->name, GF_LOG_ERROR,
- "getting file contents failed: %s",
- strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ P_MSG_FILE_FAILED, "getting file contents"
+ "failed");
goto out;
}
}
- /* Get the total size */
- dict = get_new_dict ();
+ dict = dict_new ();
if (!dict) {
- gf_log (this->name, GF_LOG_ERROR, "Out of memory.");
+ op_errno = ENOMEM;
goto out;
}
- if (loc->inode && IA_ISREG (loc->inode->ia_type) && name &&
- (strcmp (name, "trusted.glusterfs.location") == 0)) {
- ret = dict_set_static_ptr (dict,
- "trusted.glusterfs.location",
- priv->hostname);
+ if (loc->inode && name && GF_POSIX_ACL_REQUEST (name)) {
+ ret = posix_pacl_get (real_path, name, &value);
+ if (ret || !value) {
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ P_MSG_ACL_FAILED, "could not get acl (%s) for"
+ "%s", name, real_path);
+ op_ret = -1;
+ op_errno = errno;
+ goto out;
+ }
+
+ ret = dict_set_dynstr (dict, (char *)name, value);
if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "could not set hostname (%s) in dictionary",
- priv->hostname);
+ GF_FREE (value);
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ P_MSG_ACL_FAILED, "could not set acl (%s) for"
+ "%s in dictionary", name, real_path);
+ op_ret = -1;
+ op_errno = errno;
+ goto out;
}
+
+ size = ret;
goto done;
+ }
+
+ if (loc->inode && name &&
+ (strncmp (name, GF_XATTR_GET_REAL_FILENAME_KEY,
+ strlen (GF_XATTR_GET_REAL_FILENAME_KEY)) == 0)) {
+ ret = posix_xattr_get_real_filename (frame, this, loc,
+ name, dict, xdata);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = -ret;
+ if (op_errno == ENOENT) {
+ gf_msg_debug (this->name, 0, "Failed to get "
+ "real filename (%s, %s)",
+ loc->path, name);
+ } else {
+ gf_msg (this->name, GF_LOG_WARNING, op_errno,
+ P_MSG_GETTING_FILENAME_FAILED,
+ "Failed to get real filename (%s, %s):"
+ , loc->path, name);
+ }
+ goto out;
+ }
+
+ size = ret;
+ goto done;
}
-
+
+ if (loc->inode && name && !strcmp (name, GLUSTERFS_OPEN_FD_COUNT)) {
+ if (!fd_list_empty (loc->inode)) {
+ ret = dict_set_uint32 (dict, (char *)name, 1);
+ if (ret < 0)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ P_MSG_DICT_SET_FAILED, "Failed to set "
+ "dictionary value for %s", name);
+ } else {
+ ret = dict_set_uint32 (dict, (char *)name, 0);
+ if (ret < 0)
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ P_MSG_DICT_SET_FAILED, "Failed to set "
+ "dictionary value for %s", name);
+ }
+ goto done;
+ }
+ if (loc->inode && name && (XATTR_IS_PATHINFO (name))) {
+ if (LOC_HAS_ABSPATH (loc))
+ MAKE_REAL_PATH (rpath, this, loc->path);
+ else
+ rpath = real_path;
+
+ (void) snprintf (host_buf, sizeof(host_buf),
+ "<POSIX(%s):%s:%s>", priv->base_path,
+ ((priv->node_uuid_pathinfo
+ && !gf_uuid_is_null(priv->glusterd_uuid))
+ ? uuid_utoa (priv->glusterd_uuid)
+ : priv->hostname),
+ rpath);
+
+ dyn_rpath = gf_strdup (host_buf);
+ if (!dyn_rpath) {
+ ret = -1;
+ goto done;
+ }
+ size = strlen (dyn_rpath) + 1;
+ ret = dict_set_dynstr (dict, (char *)name, dyn_rpath);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ P_MSG_DICT_SET_FAILED, "could not set value"
+ " (%s) in dictionary", dyn_rpath);
+ GF_FREE (dyn_rpath);
+ }
+
+ goto done;
+ }
+
+ if (loc->inode && name &&
+ (strcmp (name, GF_XATTR_NODE_UUID_KEY) == 0)
+ && !gf_uuid_is_null (priv->glusterd_uuid)) {
+ (void) snprintf (host_buf, sizeof(host_buf), "%s",
+ uuid_utoa (priv->glusterd_uuid));
+
+ dyn_rpath = gf_strdup (host_buf);
+ if (!dyn_rpath) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ size = strlen (dyn_rpath) + 1;
+ ret = dict_set_dynstr (dict, GF_XATTR_NODE_UUID_KEY,
+ dyn_rpath);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, -ret,
+ P_MSG_DICT_SET_FAILED, "could not set value"
+ "(%s) in dictionary", dyn_rpath);
+ GF_FREE (dyn_rpath);
+ op_errno = -ret;
+ goto out;
+ }
+ goto done;
+ }
+
+ if (loc->inode && name &&
+ (strcmp (name, GFID_TO_PATH_KEY) == 0)) {
+ ret = inode_path (loc->inode, NULL, &path);
+ if (ret < 0) {
+ op_errno = -ret;
+ gf_msg (this->name, GF_LOG_WARNING, op_errno,
+ P_MSG_INODE_PATH_GET_FAILED,
+ "%s: could not get "
+ "inode path", uuid_utoa (loc->inode->gfid));
+ goto out;
+ }
+
+ size = ret;
+ ret = dict_set_dynstr (dict, GFID_TO_PATH_KEY, path);
+ if (ret < 0) {
+ op_errno = ENOMEM;
+ GF_FREE (path);
+ goto out;
+ }
+ goto done;
+ }
+
+ if (loc->inode && name
+ && (strcmp (name, GET_ANCESTRY_PATH_KEY) == 0)) {
+ int type = POSIX_ANCESTRY_PATH;
+
+ op_ret = posix_get_ancestry (this, loc->inode, NULL,
+ &path, type, &op_errno,
+ xdata);
+ if (op_ret < 0) {
+ op_ret = -1;
+ op_errno = ENODATA;
+ goto out;
+ }
+
+ op_ret = dict_set_dynstr (dict, GET_ANCESTRY_PATH_KEY, path);
+ if (op_ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, -op_ret,
+ P_MSG_GET_KEY_VALUE_FAILED, "could not get "
+ "value for key (%s)", GET_ANCESTRY_PATH_KEY);
+ GF_FREE (path);
+ op_errno = -op_ret;
+ op_ret = -1;
+ }
+
+ goto done;
+ }
+
+ if (loc->inode && name
+ && (strncmp (name, GLUSTERFS_GET_OBJECT_SIGNATURE,
+ strlen (GLUSTERFS_GET_OBJECT_SIGNATURE)) == 0)) {
+ op_ret = posix_get_objectsignature (real_path, dict);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ op_ret = -1;
+ }
+
+ goto done;
+ }
+
+ if (name) {
+ strncpy (keybuffer, name, sizeof(keybuffer));
+ char *key = keybuffer;
+#if defined(GF_DARWIN_HOST_OS_DISABLED)
+ if (priv->xattr_user_namespace == XATTR_STRIP) {
+ if (strncmp(key, "user.",5) == 0) {
+ key += 5;
+ gf_msg_debug (this->name, 0, "getxattr for file %s"
+ " stripping user key: %s -> %s",
+ real_path, keybuffer, key);
+ }
+ }
+#endif
+ size = sys_lgetxattr (real_path, key, NULL, 0);
+ if (size == -1) {
+ op_errno = errno;
+ if ((op_errno == ENOTSUP) || (op_errno == ENOSYS)) {
+ GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log,
+ this->name, GF_LOG_WARNING,
+ "Extended attributes not "
+ "supported (try remounting"
+ " brick with 'user_xattr' "
+ "flag)");
+ } else if (op_errno == ENOATTR ||
+ op_errno == ENODATA) {
+ gf_msg_debug (this->name, 0,
+ "No such attribute:%s for file %s",
+ key, real_path);
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ P_MSG_XATTR_FAILED, "getxattr failed"
+ " on %s: %s ", real_path, key);
+ }
+
+ goto done;
+ }
+ value = GF_CALLOC (size + 1, sizeof(char), gf_posix_mt_char);
+ if (!value) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+ size = sys_lgetxattr (real_path, key, value, size);
+ if (size == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_XATTR_FAILED, "getxattr failed on "
+ "%s: key = %s", real_path, key);
+ GF_FREE (value);
+ goto out;
+ }
+ value [size] = '\0';
+ op_ret = dict_set_dynptr (dict, key, value, size);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ P_MSG_DICT_SET_FAILED, "dict set operation "
+ "on %s for the key %s failed.", real_path, key);
+ GF_FREE (value);
+ goto out;
+ }
+
+ goto done;
+ }
size = sys_llistxattr (real_path, NULL, 0);
if (size == -1) {
@@ -3092,12 +4570,15 @@ posix_getxattr (call_frame_t *frame, xlator_t *this,
GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log,
this->name, GF_LOG_WARNING,
"Extended attributes not "
- "supported.");
+ "supported (try remounting"
+ " brick with 'user_xattr' "
+ "flag)");
}
else {
- gf_log (this->name, GF_LOG_ERROR,
- "listxattr failed on %s: %s",
- real_path, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_XATTR_FAILED,
+ "listxattr failed on %s",
+ real_path);
}
goto out;
}
@@ -3105,68 +4586,96 @@ posix_getxattr (call_frame_t *frame, xlator_t *this,
if (size == 0)
goto done;
- list = alloca (size + 1);
+ list = alloca (size);
if (!list) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR, "Out of memory.");
goto out;
}
- ret = snprintf (gen_key, 1023, "trusted.%s.gen", this->name);
-
size = sys_llistxattr (real_path, list, size);
+ if (size < 0) {
+ op_ret = -1;
+ op_errno = errno;
+ goto out;
+ }
remaining_size = size;
list_offset = 0;
while (remaining_size > 0) {
- if(*(list + list_offset) == '\0')
- break;
-
- strcpy (key, list + list_offset);
- op_ret = sys_lgetxattr (real_path, key, NULL, 0);
- if (op_ret == -1)
+ strncpy (keybuffer, list + list_offset, sizeof(keybuffer));
+ if (frame->root->pid != GF_CLIENT_PID_GSYNCD &&
+ fnmatch ("*.glusterfs.*.stime", keybuffer, FNM_PERIOD) == 0)
+ goto ignore;
+
+ size = sys_lgetxattr (real_path, keybuffer, NULL, 0);
+ if (size == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_XATTR_FAILED, "getxattr failed on "
+ "%s: key = %s ", real_path, keybuffer);
break;
+ }
- value = GF_CALLOC (op_ret + 1, sizeof(char),
+ value = GF_CALLOC (size + 1, sizeof(char),
gf_posix_mt_char);
if (!value) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR, "Out of memory.");
goto out;
}
- op_ret = sys_lgetxattr (real_path, key, value, op_ret);
- if (op_ret == -1) {
+ size = sys_lgetxattr (real_path, keybuffer, value, size);
+ if (size == -1) {
+ op_ret = -1;
op_errno = errno;
-
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_XATTR_FAILED, "getxattr failed on "
+ "%s: key = %s ", real_path, keybuffer);
+ GF_FREE (value);
break;
}
- value [op_ret] = '\0';
- if (strcmp (key, gen_key) != 0)
- dict_set (dict, key, data_from_dynptr (value, op_ret));
- else
+ value [size] = '\0';
+#ifdef GF_DARWIN_HOST_OS
+ /* The protocol expect namespace for now */
+ char *newkey = NULL;
+ gf_add_prefix (XATTR_USER_PREFIX, keybuffer, &newkey);
+ strncpy (keybuffer, newkey, sizeof(keybuffer));
+ GF_FREE (newkey);
+#endif
+ op_ret = dict_set_dynptr (dict, keybuffer, value, size);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ P_MSG_DICT_SET_FAILED, "dict set operation "
+ "on %s for the key %s failed.", real_path,
+ keybuffer);
GF_FREE (value);
+ goto out;
+ }
- remaining_size -= strlen (key) + 1;
- list_offset += strlen (key) + 1;
+ignore:
+ remaining_size -= strlen (keybuffer) + 1;
+ list_offset += strlen (keybuffer) + 1;
} /* while (remaining_size > 0) */
- done:
+done:
op_ret = size;
if (dict) {
- dict_ref (dict);
+ dict_del (dict, GFID_XATTR_KEY);
+ dict_del (dict, GF_XATTR_VOL_ID_KEY);
}
- out:
+out:
SET_TO_OLD_FS_ID ();
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict);
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, NULL);
- if (dict)
+ if (dict) {
dict_unref (dict);
+ }
return 0;
}
@@ -3174,21 +4683,20 @@ posix_getxattr (call_frame_t *frame, xlator_t *this,
int32_t
posix_fgetxattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, const char *name)
+ fd_t *fd, const char *name, dict_t *xdata)
{
int32_t op_ret = -1;
- int32_t op_errno = ENOENT;
- uint64_t tmp_pfd = 0;
+ int32_t op_errno = EINVAL;
struct posix_fd * pfd = NULL;
int _fd = -1;
int32_t list_offset = 0;
- size_t size = 0;
+ ssize_t size = 0;
size_t remaining_size = 0;
- char key[1024] = {0,};
char * value = NULL;
char * list = NULL;
dict_t * dict = NULL;
int ret = -1;
+ char key[4096] = {0,};
DECLARE_OLD_FS_ID_VAR;
@@ -3198,37 +4706,126 @@ posix_fgetxattr (call_frame_t *frame, xlator_t *this,
SET_FS_ID (frame->root->uid, frame->root->gid);
- ret = fd_ctx_get (fd, this, &tmp_pfd);
+ ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
if (ret < 0) {
- op_errno = -ret;
- gf_log (this->name, GF_LOG_DEBUG,
- "pfd is NULL from fd=%p", fd);
+ op_ret = -1;
+ gf_msg (this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
+ "pfd is NULL from fd=%p", fd);
goto out;
}
- pfd = (struct posix_fd *)(long)tmp_pfd;
_fd = pfd->fd;
/* Get the total size */
- dict = get_new_dict ();
+ dict = dict_new ();
if (!dict) {
- gf_log (this->name, GF_LOG_ERROR, "Out of memory.");
+ op_ret = -1;
+ op_errno = ENOMEM;
goto out;
}
+ if (name && !strcmp (name, GLUSTERFS_OPEN_FD_COUNT)) {
+ ret = dict_set_uint32 (dict, (char *)name, 1);
+ if (ret < 0) {
+ op_ret = -1;
+ size = -1;
+ op_errno = ENOMEM;
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ P_MSG_DICT_SET_FAILED, "Failed to set "
+ "dictionary value for %s", name);
+ }
+ goto done;
+ }
+
+ if (name && strncmp (name, GLUSTERFS_GET_OBJECT_SIGNATURE,
+ strlen (GLUSTERFS_GET_OBJECT_SIGNATURE)) == 0) {
+ op_ret = posix_fdget_objectsignature (_fd, dict);
+ if (op_ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, 0,
+ "posix_fdget_objectsignature failed");
+ op_errno = -op_ret;
+ op_ret = -1;
+ size = -1;
+ }
+
+ goto done;
+ }
+
+ if (name) {
+ strncpy (key, name, sizeof(key));
+#ifdef GF_DARWIN_HOST_OS
+ struct posix_private *priv = NULL;
+ priv = this->private;
+ if (priv->xattr_user_namespace == XATTR_STRIP) {
+ char *newkey = NULL;
+ gf_add_prefix (XATTR_USER_PREFIX, key, &newkey);
+ strncpy (key, newkey, sizeof(key));
+ GF_FREE (newkey);
+ }
+#endif
+ size = sys_fgetxattr (_fd, key, NULL, 0);
+ if (size == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ if (errno == ENODATA || errno == ENOATTR) {
+ gf_msg_debug (this->name, 0, "fgetxattr failed"
+ " on key %s (%s)", key,
+ strerror (op_errno));
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_XATTR_FAILED, "fgetxattr failed "
+ "on key %s", key);
+ }
+ goto done;
+ }
+
+ value = GF_CALLOC (size + 1, sizeof(char), gf_posix_mt_char);
+ if (!value) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+ size = sys_fgetxattr (_fd, key, value, size);
+ if (size == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_XATTR_FAILED, "fgetxattr failed on "
+ "fd %p for the key %s ", fd, key);
+ GF_FREE (value);
+ goto out;
+ }
+
+ value [size] = '\0';
+ op_ret = dict_set_dynptr (dict, key, value, size);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ op_ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ P_MSG_DICT_SET_FAILED, "dict set operation "
+ "on key %s failed", key);
+ GF_FREE (value);
+ goto out;
+ }
+
+ goto done;
+ }
+
size = sys_flistxattr (_fd, NULL, 0);
if (size == -1) {
+ op_ret = -1;
op_errno = errno;
if ((errno == ENOTSUP) || (errno == ENOSYS)) {
GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log,
this->name, GF_LOG_WARNING,
"Extended attributes not "
- "supported.");
+ "supported (try remounting "
+ "brick with 'user_xattr' flag)");
}
else {
- gf_log (this->name, GF_LOG_ERROR,
- "listxattr failed on %p: %s",
- fd, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_XATTR_FAILED, "listxattr failed on %p:",
+ fd);
}
goto out;
}
@@ -3238,8 +4835,8 @@ posix_fgetxattr (call_frame_t *frame, xlator_t *this,
list = alloca (size + 1);
if (!list) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR, "Out of memory.");
+ op_ret = -1;
+ op_errno = ENOMEM;
goto out;
}
@@ -3251,41 +4848,65 @@ posix_fgetxattr (call_frame_t *frame, xlator_t *this,
if(*(list + list_offset) == '\0')
break;
- strcpy (key, list + list_offset);
- op_ret = sys_fgetxattr (_fd, key, NULL, 0);
- if (op_ret == -1)
+ strncpy (key, list + list_offset, sizeof(key));
+ size = sys_fgetxattr (_fd, key, NULL, 0);
+ if (size == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_XATTR_FAILED, "fgetxattr failed on "
+ "fd %p for the key %s ", fd, key);
break;
+ }
- value = GF_CALLOC (op_ret + 1, sizeof(char),
+ value = GF_CALLOC (size + 1, sizeof(char),
gf_posix_mt_char);
if (!value) {
+ op_ret = -1;
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR, "Out of memory.");
goto out;
}
- op_ret = sys_fgetxattr (_fd, key, value, op_ret);
- if (op_ret == -1)
+ size = sys_fgetxattr (_fd, key, value, size);
+ if (size == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_XATTR_FAILED, "fgetxattr failed on "
+ "the fd %p for the key %s ", fd, key);
+ GF_FREE (value);
break;
+ }
- value [op_ret] = '\0';
- dict_set (dict, key, data_from_dynptr (value, op_ret));
+ value [size] = '\0';
+
+ op_ret = dict_set_dynptr (dict, key, value, size);
+ if (op_ret) {
+ op_errno = -op_ret;
+ op_ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ P_MSG_DICT_SET_FAILED, "dict set operation "
+ "failed on key %s", key);
+ GF_FREE (value);
+ goto out;
+ }
remaining_size -= strlen (key) + 1;
list_offset += strlen (key) + 1;
} /* while (remaining_size > 0) */
- done:
+done:
op_ret = size;
if (dict) {
- dict_ref (dict);
+ dict_del (dict, GFID_XATTR_KEY);
+ dict_del (dict, GF_XATTR_VOL_ID_KEY);
}
- out:
+out:
SET_TO_OLD_FS_ID ();
- STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict);
+ STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, NULL);
if (dict)
dict_unref (dict);
@@ -3293,64 +4914,30 @@ posix_fgetxattr (call_frame_t *frame, xlator_t *this,
return 0;
}
-
-int
-fhandle_pair (xlator_t *this, int fd,
- data_pair_t *trav, int flags)
+static int
+_handle_fsetxattr_keyvalue_pair (dict_t *d, char *k, data_t *v,
+ void *tmp)
{
- int sys_ret = -1;
- int ret = 0;
-
- sys_ret = sys_fsetxattr (fd, trav->key, trav->value->data,
- trav->value->len, flags);
-
- if (sys_ret < 0) {
- if (errno == ENOTSUP) {
- GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log,
- this->name,GF_LOG_WARNING,
- "Extended attributes not "
- "supported");
- } else if (errno == ENOENT) {
- gf_log (this->name, GF_LOG_ERROR,
- "fsetxattr on fd=%d failed: %s", fd,
- strerror (errno));
- } else {
-
-#ifdef GF_DARWIN_HOST_OS
- gf_log (this->name,
- ((errno == EINVAL) ?
- GF_LOG_DEBUG : GF_LOG_ERROR),
- "fd=%d: key:%s error:%s",
- fd, trav->key,
- strerror (errno));
-#else /* ! DARWIN */
- gf_log (this->name, GF_LOG_ERROR,
- "fd=%d: key:%s error:%s",
- fd, trav->key,
- strerror (errno));
-#endif /* DARWIN */
- }
-
- ret = -errno;
- goto out;
- }
+ posix_xattr_filler_t *filler = NULL;
-out:
- return ret;
-}
+ filler = tmp;
+ return posix_fhandle_pair (filler->this, filler->fdnum, k, v,
+ filler->flags, filler->stbuf);
+}
int32_t
posix_fsetxattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, dict_t *dict, int flags)
+ fd_t *fd, dict_t *dict, int flags, dict_t *xdata)
{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- struct posix_fd * pfd = NULL;
- uint64_t tmp_pfd = 0;
- int _fd = -1;
- data_pair_t * trav = NULL;
- int ret = -1;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ struct posix_fd *pfd = NULL;
+ int _fd = -1;
+ int ret = -1;
+ struct iatt stbuf = {0,};
+ dict_t *xattr = NULL;
+ posix_xattr_filler_t filler = {0,};
DECLARE_OLD_FS_ID_VAR;
SET_FS_ID (frame->root->uid, frame->root->gid);
@@ -3360,103 +4947,307 @@ posix_fsetxattr (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (fd, out);
VALIDATE_OR_GOTO (dict, out);
- ret = fd_ctx_get (fd, this, &tmp_pfd);
+ ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
if (ret < 0) {
- op_errno = -ret;
- gf_log (this->name, GF_LOG_DEBUG,
- "pfd is NULL from fd=%p", fd);
+ gf_msg (this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
+ "pfd is NULL from fd=%p", fd);
goto out;
}
- pfd = (struct posix_fd *)(long)tmp_pfd;
_fd = pfd->fd;
- trav = dict->members_list;
+ posix_fdstat (this, pfd->fd, &stbuf);
- while (trav) {
- ret = fhandle_pair (this, _fd, trav, flags);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
+ dict_del (dict, GFID_XATTR_KEY);
+ dict_del (dict, GF_XATTR_VOL_ID_KEY);
+
+ filler.fdnum = _fd;
+ filler.this = this;
+ filler.stbuf = &stbuf;
+#ifdef GF_DARWIN_HOST_OS
+ filler.flags = map_xattr_flags(flags);
+#else
+ filler.flags = flags;
+#endif
+ op_ret = dict_foreach (dict, _handle_fsetxattr_keyvalue_pair,
+ &filler);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ op_ret = -1;
+ }
+
+ if (!ret && xdata && dict_get (xdata, GLUSTERFS_DURABLE_OP)) {
+ op_ret = sys_fsync (_fd);
+ if (op_ret < 0) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ P_MSG_DURABILITY_REQ_NOT_SATISFIED,
+ "could not satisfy durability request: "
+ "reason ");
}
- trav = trav->next;
}
- op_ret = 0;
+ if (xdata && dict_get (xdata, DHT_IATT_IN_XDATA_KEY)) {
+ ret = posix_fdstat (this, pfd->fd, &stbuf);
+ if (ret == -1) {
+ op_errno = errno;
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ P_MSG_XATTR_FAILED, "fsetxattr (fstat)"
+ "failed on fd=%p", fd);
+ goto out;
+ }
- out:
+ xattr = dict_new ();
+ if (!xattr)
+ goto out;
+ ret = posix_set_iatt_in_dict (xattr, &stbuf);
+ }
+
+out:
SET_TO_OLD_FS_ID ();
- STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xattr);
+
+ if (xattr)
+ dict_unref (xattr);
return 0;
}
+int
+_posix_remove_xattr (dict_t *dict, char *key, data_t *value, void *data)
+{
+ int32_t op_ret = 0;
+ xlator_t *this = NULL;
+ posix_xattr_filler_t *filler = NULL;
+
+ filler = (posix_xattr_filler_t *) data;
+ this = filler->this;
+#ifdef GF_DARWIN_HOST_OS
+ struct posix_private *priv = NULL;
+ priv = (struct posix_private *) this->private;
+ char *newkey = NULL;
+ if (priv->xattr_user_namespace == XATTR_STRIP) {
+ gf_remove_prefix (XATTR_USER_PREFIX, key, &newkey);
+ gf_msg_debug ("remove_xattr", 0, "key %s => %s" , key,
+ newkey);
+ key = newkey;
+ }
+#endif
+ /* Bulk remove xattr is internal fop in gluster. Some of the xattrs may
+ * have special behavior. Ex: removexattr("posix.system_acl_access"),
+ * removes more than one xattr on the file that could be present in the
+ * bulk-removal request. Removexattr of these deleted xattrs will fail
+ * with either ENODATA/ENOATTR. Since all this fop cares is removal of the
+ * xattrs in bulk-remove request and if they are already deleted, it can be
+ * treated as success.
+ */
+
+ op_ret = sys_lremovexattr (filler->real_path, key);
+ if (op_ret == -1) {
+ if (errno == ENODATA || errno == ENOATTR)
+ op_ret = 0;
+ }
+
+ if (op_ret == -1) {
+ filler->op_errno = errno;
+ if (errno != ENOATTR && errno != ENODATA && errno != EPERM)
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_XATTR_FAILED, "removexattr failed on %s"
+ " (for %s)", filler->real_path, key);
+ }
+#ifdef GF_DARWIN_HOST_OS
+ GF_FREE(newkey);
+#endif
+ return op_ret;
+}
+
int32_t
posix_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
+ loc_t *loc, const char *name, dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
+ int32_t ret = -1;
char * real_path = NULL;
+ struct iatt stbuf = {0};
+ dict_t *xattr = NULL;
+ posix_xattr_filler_t filler = {0,};
DECLARE_OLD_FS_ID_VAR;
- MAKE_REAL_PATH (real_path, this, loc->path);
+ MAKE_INODE_HANDLE (real_path, this, loc, NULL);
+ if (!real_path) {
+ op_ret = -1;
+ op_errno = ESTALE;
+ goto out;
+ }
+
+
+ if (!strcmp (GFID_XATTR_KEY, name)) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, P_MSG_XATTR_NOT_REMOVED,
+ "Remove xattr called on gfid for file %s", real_path);
+ op_ret = -1;
+ goto out;
+ }
+ if (!strcmp (GF_XATTR_VOL_ID_KEY, name)) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, P_MSG_XATTR_NOT_REMOVED,
+ "Remove xattr called on volume-id for file %s",
+ real_path);
+ op_ret = -1;
+ goto out;
+ }
+
SET_FS_ID (frame->root->uid, frame->root->gid);
+ /**
+ * sending an empty key name with xdata containing the
+ * list of key(s) to be removed implies "bulk remove request"
+ * for removexattr.
+ */
+ if (name && (strcmp (name, "") == 0) && xdata) {
+ filler.real_path = real_path;
+ filler.this = this;
+ op_ret = dict_foreach (xdata, _posix_remove_xattr, &filler);
+ if (op_ret) {
+ op_errno = filler.op_errno;
+ }
+
+ goto out;
+ }
+
op_ret = sys_lremovexattr (real_path, name);
+ if (op_ret == -1) {
+ op_errno = errno;
+ if (op_errno != ENOATTR && op_errno != ENODATA &&
+ op_errno != EPERM)
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_XATTR_FAILED, "removexattr on %s "
+ "(for %s)", real_path, name);
+ goto out;
+ }
+
+ if (xdata && dict_get (xdata, DHT_IATT_IN_XDATA_KEY)) {
+ ret = posix_pstat(this, loc->gfid, real_path, &stbuf);
+ if (ret)
+ goto out;
+ xattr = dict_new();
+ if (!xattr)
+ goto out;
+
+ ret = posix_set_iatt_in_dict (xattr, &stbuf);
+ }
+ op_ret = 0;
+
+out:
+ SET_TO_OLD_FS_ID ();
+
+ STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xattr);
+
+ if (xattr)
+ dict_unref (xattr);
+
+ return 0;
+}
+
+int32_t
+posix_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ struct posix_fd * pfd = NULL;
+ struct iatt stbuf = {0,};
+ dict_t *xattr = NULL;
+ int _fd = -1;
+ int ret = -1;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ if (!strcmp (GFID_XATTR_KEY, name)) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, P_MSG_XATTR_NOT_REMOVED,
+ "Remove xattr called on gfid for file");
+ goto out;
+ }
+ if (!strcmp (GF_XATTR_VOL_ID_KEY, name)) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, P_MSG_XATTR_NOT_REMOVED,
+ "Remove xattr called on volume-id for file");
+ goto out;
+ }
+
+ ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
+ "pfd is NULL from fd=%p", fd);
+ goto out;
+ }
+ _fd = pfd->fd;
+
+
+ SET_FS_ID (frame->root->uid, frame->root->gid);
+
+ op_ret = sys_fremovexattr (_fd, name);
if (op_ret == -1) {
op_errno = errno;
- if (op_errno != ENOATTR && op_errno != EPERM)
- gf_log (this->name, GF_LOG_ERROR,
- "removexattr on %s: %s", loc->path,
- strerror (op_errno));
+ if (op_errno != ENOATTR && op_errno != ENODATA &&
+ op_errno != EPERM)
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_XATTR_FAILED, "fremovexattr (for %s)",
+ name);
goto out;
}
+ if (xdata && dict_get (xdata, DHT_IATT_IN_XDATA_KEY)) {
+ ret = posix_fdstat (this, pfd->fd, &stbuf);
+ if (ret)
+ goto out;
+ xattr = dict_new();
+ if (!xattr)
+ goto out;
+
+ ret = posix_set_iatt_in_dict (xattr, &stbuf);
+ }
op_ret = 0;
- out:
+out:
SET_TO_OLD_FS_ID ();
- STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, xattr);
+
+ if (xattr)
+ dict_unref (xattr);
+
return 0;
}
int32_t
posix_fsyncdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int datasync)
+ fd_t *fd, int datasync, dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
- struct posix_fd * pfd = NULL;
- int _fd = -1;
int ret = -1;
- uint64_t tmp_pfd = 0;
+ struct posix_fd *pfd = NULL;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
VALIDATE_OR_GOTO (fd, out);
- ret = fd_ctx_get (fd, this, &tmp_pfd);
+ ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
if (ret < 0) {
- op_errno = -ret;
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_msg (this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
"pfd is NULL, fd=%p", fd);
goto out;
}
- pfd = (struct posix_fd *)(long)tmp_pfd;
-
- _fd = pfd->fd;
op_ret = 0;
- out:
- STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno);
+out:
+ STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno, NULL);
return 0;
}
@@ -3464,12 +5255,12 @@ posix_fsyncdir (call_frame_t *frame, xlator_t *this,
void
posix_print_xattr (dict_t *this,
- char *key,
- data_t *value,
- void *data)
+ char *key,
+ data_t *value,
+ void *data)
{
- gf_log ("posix", GF_LOG_DEBUG,
- "(key/val) = (%s/%d)", key, data_to_int32 (value));
+ gf_msg_debug ("posix", 0,
+ "(key/val) = (%s/%d)", key, data_to_int32 (value));
}
@@ -3483,200 +5274,398 @@ posix_print_xattr (dict_t *this,
static void
__add_array (int32_t *dest, int32_t *src, int count)
{
- int i = 0;
- for (i = 0; i < count; i++) {
- dest[i] = hton32 (ntoh32 (dest[i]) + ntoh32 (src[i]));
- }
+ int i = 0;
+ int32_t destval = 0;
+ for (i = 0; i < count; i++) {
+ destval = ntoh32 (dest[i]);
+ dest[i] = hton32 (destval + ntoh32 (src[i]));
+ }
}
+static void
+__add_long_array (int64_t *dest, int64_t *src, int count)
+{
+ int i = 0;
+ for (i = 0; i < count; i++) {
+ dest[i] = hton64 (ntoh64 (dest[i]) + ntoh64 (src[i]));
+ }
+}
-/**
- * xattrop - xattr operations - for internal use by GlusterFS
- * @optype: ADD_ARRAY:
- * dict should contain:
- * "key" ==> array of 32-bit numbers
- */
-int
-do_xattrop (call_frame_t *frame, xlator_t *this,
- loc_t *loc, fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr)
+/* functions:
+ __add_array_with_default
+ __add_long_array_with_default
+
+ xattrop type:
+ GF_XATTROP_ADD_ARRAY_WITH_DEFAULT
+ GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT
+
+ These operations are similar to 'GF_XATTROP_ADD_ARRAY',
+ except that it adds a default value if xattr is missing
+ or its value is zero on disk.
+
+ One use-case of this operation is in inode-quota.
+ When a new directory is created, its default dir_count
+ should be set to 1. So when a xattrop performed setting
+ inode-xattrs, it should account initial dir_count
+ 1 if the xattrs are not present
+
+ Here is the usage of this operation
+
+ value required in xdata for each key
+ struct array {
+ int32_t newvalue_1;
+ int32_t newvalue_2;
+ ...
+ int32_t newvalue_n;
+ int32_t default_1;
+ int32_t default_2;
+ ...
+ int32_t default_n;
+ };
+
+ or
+
+ struct array {
+ int32_t value_1;
+ int32_t value_2;
+ ...
+ int32_t value_n;
+ } data[2];
+ fill data[0] with new value to add
+ fill data[1] with default value
+
+ xattrop GF_XATTROP_ADD_ARRAY_WITH_DEFAULT
+ for i from 1 to n
+ {
+ if (xattr (dest_i) is zero or not set in the disk)
+ dest_i = newvalue_i + default_i
+ else
+ dest_i = dest_i + newvalue_i
+ }
+
+ value in xdata after xattrop is successful
+ struct array {
+ int32_t dest_1;
+ int32_t dest_2;
+ ...
+ int32_t dest_n;
+ };
+*/
+static void
+__add_array_with_default (int32_t *dest, int32_t *src, int count)
+{
+ int i = 0;
+ int32_t destval = 0;
+
+ for (i = 0; i < count; i++) {
+ destval = ntoh32 (dest[i]);
+ if (destval == 0)
+ dest[i] = hton32 (ntoh32 (src[i]) +
+ ntoh32 (src[count + i]));
+ else
+ dest[i] = hton32 (destval + ntoh32 (src[i]));
+ }
+}
+
+static void
+__add_long_array_with_default (int64_t *dest, int64_t *src, int count)
{
- char *real_path = NULL;
- int32_t *array = NULL;
- int size = 0;
- int count = 0;
+ int i = 0;
+ int64_t destval = 0;
+
+ for (i = 0; i < count; i++) {
+ destval = ntoh64 (dest[i]);
+ if (destval == 0)
+ dest[i] = hton64 (ntoh64 (src[i]) +
+ ntoh64 (src[i + count]));
+ else
+ dest[i] = hton64 (destval + ntoh64 (src[i]));
+ }
+}
+
+static int
+_posix_handle_xattr_keyvalue_pair (dict_t *d, char *k, data_t *v,
+ void *tmp)
+{
+ int size = 0;
+ int count = 0;
+ int op_ret = 0;
+ int op_errno = 0;
+ gf_xattrop_flags_t optype = 0;
+ char *array = NULL;
+ char *dst_data = NULL;
+ inode_t *inode = NULL;
+ xlator_t *this = NULL;
+ posix_xattr_filler_t *filler = NULL;
+
+ filler = tmp;
+
+ optype = (gf_xattrop_flags_t)(filler->flags);
+ this = filler->this;
+ inode = filler->inode;
+ count = v->len;
+ if (optype == GF_XATTROP_ADD_ARRAY_WITH_DEFAULT ||
+ optype == GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT)
+ count = count / 2;
+
+ array = GF_CALLOC (count, sizeof (char), gf_posix_mt_char);
+
+#ifdef GF_DARWIN_HOST_OS
+ struct posix_private *priv = NULL;
+ priv = this->private;
+ if (priv->xattr_user_namespace == XATTR_STRIP) {
+ if (strncmp(k, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) == 0) {
+ k += XATTR_USER_PREFIX_LEN;
+ }
+ }
+#endif
+
+ LOCK (&inode->lock);
+ {
+ if (filler->real_path) {
+ size = sys_lgetxattr (filler->real_path, k,
+ (char *)array, count);
+ } else {
+ size = sys_fgetxattr (filler->fdnum, k, (char *)array,
+ count);
+ }
+
+ op_errno = errno;
+ if ((size == -1) && (op_errno != ENODATA) &&
+ (op_errno != ENOATTR)) {
+ if (op_errno == ENOTSUP) {
+ GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log,
+ this->name, GF_LOG_WARNING,
+ "Extended attributes not "
+ "supported by filesystem");
+ } else if (op_errno != ENOENT ||
+ !posix_special_xattr (marker_xattrs,
+ k)) {
+ if (filler->real_path)
+ gf_msg (this->name, fop_log_level (GF_FOP_XATTROP,
+ op_errno), op_errno, P_MSG_XATTR_FAILED,
+ "getxattr failed on %s while "
+ "doing xattrop: Key:%s ",
+ filler->real_path, k);
+ else
+ gf_msg (this->name, GF_LOG_ERROR,
+ op_errno, P_MSG_XATTR_FAILED,
+ "fgetxattr failed on gfid=%s "
+ "while doing xattrop: "
+ "Key:%s (%s)",
+ uuid_utoa (filler->inode->gfid),
+ k, strerror (op_errno));
+ }
+
+ op_ret = -1;
+ goto unlock;
+ }
- int op_ret = 0;
- int op_errno = 0;
+ if (size == -1 && optype == GF_XATTROP_GET_AND_SET) {
+ GF_FREE (array);
+ array = NULL;
+ }
- int ret = 0;
- int _fd = -1;
- uint64_t tmp_pfd = 0;
- struct posix_fd *pfd = NULL;
+ /* We only write back the xattr if it has been really modified
+ * (i.e. v->data is not all 0's). Otherwise we return its value
+ * but we don't update anything.
+ *
+ * If the xattr does not exist, a value of all 0's is returned
+ * without creating it. */
+ size = count;
+ if (optype != GF_XATTROP_GET_AND_SET &&
+ mem_0filled(v->data, v->len) == 0)
+ goto unlock;
- data_pair_t *trav = NULL;
+ dst_data = array;
+ switch (optype) {
- char * path = NULL;
- inode_t * inode = NULL;
+ case GF_XATTROP_ADD_ARRAY:
+ __add_array ((int32_t *) array,
+ (int32_t *) v->data, count / 4);
+ break;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (xattr, out);
- VALIDATE_OR_GOTO (this, out);
+ case GF_XATTROP_ADD_ARRAY64:
+ __add_long_array ((int64_t *) array,
+ (int64_t *) v->data,
+ count / 8);
+ break;
- trav = xattr->members_list;
+ case GF_XATTROP_ADD_ARRAY_WITH_DEFAULT:
+ __add_array_with_default ((int32_t *) array,
+ (int32_t *) v->data,
+ count / 4);
+ break;
- if (fd) {
- ret = fd_ctx_get (fd, this, &tmp_pfd);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get pfd from fd=%p",
- fd);
- op_ret = -1;
- op_errno = EBADFD;
- goto out;
- }
- pfd = (struct posix_fd *)(long)tmp_pfd;
- _fd = pfd->fd;
- }
+ case GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT:
+ __add_long_array_with_default ((int64_t *) array,
+ (int64_t *) v->data,
+ count / 8);
+ break;
- if (loc && loc->path)
- MAKE_REAL_PATH (real_path, this, loc->path);
+ case GF_XATTROP_GET_AND_SET:
+ dst_data = v->data;
+ break;
- if (loc) {
- path = gf_strdup (loc->path);
- inode = loc->inode;
- } else if (fd) {
- inode = fd->inode;
+ default:
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ P_MSG_UNKNOWN_OP, "Unknown xattrop type (%d)"
+ " on %s. Please send a bug report to "
+ "gluster-devel@gluster.org", optype,
+ filler->real_path);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto unlock;
+ }
+
+ if (filler->real_path) {
+ size = sys_lsetxattr (filler->real_path, k,
+ dst_data, count, 0);
+ } else {
+ size = sys_fsetxattr (filler->fdnum, k,
+ (char *)dst_data,
+ count, 0);
+ }
+ op_errno = errno;
}
+unlock:
+ UNLOCK (&inode->lock);
- while (trav && inode) {
- count = trav->value->len / sizeof (int32_t);
- array = GF_CALLOC (count, sizeof (int32_t),
- gf_posix_mt_int32_t);
+ if (op_ret == -1)
+ goto out;
- LOCK (&inode->lock);
- {
- if (loc) {
- size = sys_lgetxattr (real_path, trav->key, (char *)array,
- trav->value->len);
- } else {
- size = sys_fgetxattr (_fd, trav->key, (char *)array,
- trav->value->len);
- }
+ if (size == -1) {
+ if (filler->real_path)
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ P_MSG_XATTR_FAILED, "setxattr failed on %s "
+ "while doing xattrop: key=%s",
+ filler->real_path, k);
+ else
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ P_MSG_XATTR_FAILED,
+ "fsetxattr failed on gfid=%s while doing "
+ "xattrop: key=%s (%s)",
+ uuid_utoa (filler->inode->gfid),
+ k, strerror (op_errno));
+ op_ret = -1;
+ goto out;
+ } else if (array) {
+ op_ret = dict_set_bin (filler->xattr, k, array, count);
+ if (op_ret) {
+ if (filler->real_path)
+ gf_msg_debug (this->name, 0,
+ "dict_set_bin failed (path=%s): "
+ "key=%s (%s)", filler->real_path,
+ k, strerror (-size));
+ else
+ gf_msg_debug (this->name, 0,
+ "dict_set_bin failed (gfid=%s): "
+ "key=%s (%s)",
+ uuid_utoa (filler->inode->gfid),
+ k, strerror (-size));
+
+ op_ret = -1;
+ op_errno = EINVAL;
+ GF_FREE (array);
+ goto out;
+ }
+ array = NULL;
+ }
- op_errno = errno;
- if ((size == -1) && (op_errno != ENODATA) &&
- (op_errno != ENOATTR)) {
- if (op_errno == ENOTSUP) {
- GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log,
- this->name,GF_LOG_WARNING,
- "Extended attributes not "
- "supported by filesystem");
- } else {
- if (loc)
- gf_log (this->name, GF_LOG_ERROR,
- "getxattr failed on %s while doing "
- "xattrop: %s", path,
- strerror (op_errno));
- else
- gf_log (this->name, GF_LOG_ERROR,
- "fgetxattr failed on fd=%d while doing "
- "xattrop: %s", _fd,
- strerror (op_errno));
- }
+out:
+ if (op_ret < 0)
+ filler->op_errno = op_errno;
- op_ret = -1;
- goto unlock;
- }
+ if (array)
+ GF_FREE (array);
- switch (optype) {
+ return op_ret;
+}
- case GF_XATTROP_ADD_ARRAY:
- __add_array (array, (int32_t *) trav->value->data,
- trav->value->len / 4);
- break;
+/**
+ * xattrop - xattr operations - for internal use by GlusterFS
+ * @optype: ADD_ARRAY:
+ * dict should contain:
+ * "key" ==> array of 32-bit numbers
+ */
- default:
- gf_log (this->name, GF_LOG_ERROR,
- "Unknown xattrop type (%d) on %s. Please send "
- "a bug report to gluster-devel@nongnu.org",
- optype, path);
- op_ret = -1;
- op_errno = EINVAL;
- goto unlock;
- }
+int
+do_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr)
+{
+ int op_ret = 0;
+ int op_errno = 0;
+ int _fd = -1;
+ char *real_path = NULL;
+ struct posix_fd *pfd = NULL;
+ inode_t *inode = NULL;
+ posix_xattr_filler_t filler = {0,};
+ dict_t *xdata = NULL;
- if (loc) {
- size = sys_lsetxattr (real_path, trav->key, array,
- trav->value->len, 0);
- } else {
- size = sys_fsetxattr (_fd, trav->key, (char *)array,
- trav->value->len, 0);
- }
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (xattr, out);
+ VALIDATE_OR_GOTO (this, out);
+
+ if (fd) {
+ op_ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
+ if (op_ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING,
+ fop_log_level(GF_FOP_FXATTROP, op_errno),
+ P_MSG_PFD_GET_FAILED, "failed to get pfd from"
+ " fd=%p", fd);
+ goto out;
}
- unlock:
- UNLOCK (&inode->lock);
+ _fd = pfd->fd;
+ }
- if (op_ret == -1)
+ if (loc && !gf_uuid_is_null (loc->gfid)) {
+ MAKE_INODE_HANDLE (real_path, this, loc, NULL);
+ if (!real_path) {
+ op_ret = -1;
+ op_errno = ESTALE;
goto out;
+ }
+ }
- op_errno = errno;
- if (size == -1) {
- if (loc)
- gf_log (this->name, GF_LOG_ERROR,
- "setxattr failed on %s while doing xattrop: "
- "key=%s (%s)", path,
- trav->key, strerror (op_errno));
- else
- gf_log (this->name, GF_LOG_ERROR,
- "fsetxattr failed on fd=%d while doing xattrop: "
- "key=%s (%s)", _fd,
- trav->key, strerror (op_errno));
+ if (real_path) {
+ inode = loc->inode;
+ } else if (fd) {
+ inode = fd->inode;
+ }
- op_ret = -1;
- goto out;
- } else {
- size = dict_set_bin (xattr, trav->key, array,
- trav->value->len);
-
- if (size != 0) {
- if (loc)
- gf_log (this->name, GF_LOG_DEBUG,
- "dict_set_bin failed (path=%s): "
- "key=%s (%s)", path,
- trav->key, strerror (-size));
- else
- gf_log (this->name, GF_LOG_DEBUG,
- "dict_set_bin failed (fd=%d): "
- "key=%s (%s)", _fd,
- trav->key, strerror (-size));
-
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
- array = NULL;
- }
+ xdata = dict_new ();
+ if (xdata == NULL) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
- array = NULL;
- trav = trav->next;
- }
+ filler.this = this;
+ filler.fdnum = _fd;
+ filler.real_path = real_path;
+ filler.flags = (int)optype;
+ filler.inode = inode;
+ filler.xattr = xdata;
+
+ op_ret = dict_foreach (xattr, _posix_handle_xattr_keyvalue_pair,
+ &filler);
+ op_errno = filler.op_errno;
out:
- if (array)
- GF_FREE (array);
- if (path)
- GF_FREE (path);
+ STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, xdata, NULL);
- STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, xattr);
- return 0;
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
}
int
posix_xattrop (call_frame_t *frame, xlator_t *this,
- loc_t *loc, gf_xattrop_flags_t optype, dict_t *xattr)
+ loc_t *loc, gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
do_xattrop (frame, this, loc, NULL, optype, xattr);
return 0;
@@ -3685,7 +5674,7 @@ posix_xattrop (call_frame_t *frame, xlator_t *this,
int
posix_fxattrop (call_frame_t *frame, xlator_t *this,
- fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr)
+ fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
do_xattrop (frame, this, NULL, fd, optype, xattr);
return 0;
@@ -3694,7 +5683,7 @@ posix_fxattrop (call_frame_t *frame, xlator_t *this,
int
posix_access (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t mask)
+ loc_t *loc, int32_t mask, dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
@@ -3707,28 +5696,33 @@ posix_access (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (this, out);
VALIDATE_OR_GOTO (loc, out);
- MAKE_REAL_PATH (real_path, this, loc->path);
+ MAKE_INODE_HANDLE (real_path, this, loc, NULL);
+ if (!real_path) {
+ op_ret = -1;
+ op_errno = errno;
+ goto out;
+ }
- op_ret = access (real_path, mask & 07);
+ op_ret = sys_access (real_path, mask & 07);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR, "access failed on %s: %s",
- loc->path, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_ACCESS_FAILED,
+ "access failed on %s", real_path);
goto out;
}
op_ret = 0;
- out:
+out:
SET_TO_OLD_FS_ID ();
- STACK_UNWIND_STRICT (access, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, NULL);
return 0;
}
int32_t
posix_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset)
+ fd_t *fd, off_t offset, dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
@@ -3737,7 +5731,6 @@ posix_ftruncate (call_frame_t *frame, xlator_t *this,
struct iatt postop = {0,};
struct posix_fd *pfd = NULL;
int ret = -1;
- uint64_t tmp_pfd = 0;
struct posix_private *priv = NULL;
DECLARE_OLD_FS_ID_VAR;
@@ -3750,51 +5743,47 @@ posix_ftruncate (call_frame_t *frame, xlator_t *this,
priv = this->private;
VALIDATE_OR_GOTO (priv, out);
- ret = fd_ctx_get (fd, this, &tmp_pfd);
+ ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_msg (this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
"pfd is NULL, fd=%p", fd);
- op_errno = -ret;
goto out;
}
- pfd = (struct posix_fd *)(long)tmp_pfd;
_fd = pfd->fd;
- op_ret = posix_fstat_with_gen (this, _fd, &preop);
+ op_ret = posix_fdstat (this, _fd, &preop);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "pre-operation fstat failed on fd=%p: %s", fd,
- strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "pre-operation fstat failed on fd=%p", fd);
goto out;
}
- op_ret = ftruncate (_fd, offset);
+ op_ret = sys_ftruncate (_fd, offset);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "ftruncate failed on fd=%p: %s",
- fd, strerror (errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_TRUNCATE_FAILED,
+ "ftruncate failed on fd=%p (%"PRId64"", fd, offset);
goto out;
}
- op_ret = posix_fstat_with_gen (this, _fd, &postop);
+ op_ret = posix_fdstat (this, _fd, &postop);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "post-operation fstat failed on fd=%p: %s",
- fd, strerror (errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "post-operation fstat failed on fd=%p", fd);
goto out;
}
op_ret = 0;
- out:
+out:
SET_TO_OLD_FS_ID ();
- STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, &preop, &postop);
+ STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, &preop,
+ &postop, NULL);
return 0;
}
@@ -3802,16 +5791,16 @@ posix_ftruncate (call_frame_t *frame, xlator_t *this,
int32_t
posix_fstat (call_frame_t *frame, xlator_t *this,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
int _fd = -1;
int32_t op_ret = -1;
int32_t op_errno = 0;
struct iatt buf = {0,};
struct posix_fd *pfd = NULL;
- uint64_t tmp_pfd = 0;
+ dict_t *xattr_rsp = NULL;
int ret = -1;
- struct posix_private *priv = NULL;
+ struct posix_private *priv = NULL;
DECLARE_OLD_FS_ID_VAR;
SET_FS_ID (frame->root->uid, frame->root->gid);
@@ -3823,31 +5812,50 @@ posix_fstat (call_frame_t *frame, xlator_t *this,
priv = this->private;
VALIDATE_OR_GOTO (priv, out);
- ret = fd_ctx_get (fd, this, &tmp_pfd);
+ ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_msg (this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
"pfd is NULL, fd=%p", fd);
op_errno = -ret;
goto out;
}
- pfd = (struct posix_fd *)(long)tmp_pfd;
_fd = pfd->fd;
- op_ret = posix_fstat_with_gen (this, _fd, &buf);
+ op_ret = posix_fdstat (this, _fd, &buf);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR, "fstat failed on fd=%p: %s",
- fd, strerror (op_errno));
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "fstat failed on fd=%p", fd);
goto out;
}
+ if (xdata)
+ xattr_rsp = posix_xattr_fill (this, NULL, NULL, fd, _fd, xdata,
+ &buf);
+
op_ret = 0;
out:
SET_TO_OLD_FS_ID ();
- STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, &buf);
+ STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, &buf, xattr_rsp);
+ if (xattr_rsp)
+ dict_unref (xattr_rsp);
+ return 0;
+}
+
+int32_t
+posix_lease (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, struct gf_lease *lease, dict_t *xdata)
+{
+ struct gf_lease nullease = {0, };
+
+ gf_msg (this->name, GF_LOG_CRITICAL, EINVAL, P_MSG_LEASE_DISABLED,
+ "\"features/leases\" translator is not loaded. You need"
+ "to use it for proper functioning of your application");
+
+ STACK_UNWIND_STRICT (lease, frame, -1, ENOSYS, &nullease, NULL);
return 0;
}
@@ -3855,325 +5863,486 @@ static int gf_posix_lk_log;
int32_t
posix_lk (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t cmd, struct flock *lock)
+ fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
- struct flock nullock = {0, };
+ struct gf_flock nullock = {0, };
- gf_posix_lk_log++;
-
- GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_ERROR,
- "\"features/locks\" translator is "
- "not loaded. You need to use it for proper "
+ GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL,
+ "\"features/locks\" translator is "
+ "not loaded. You need to use it for proper "
"functioning of your application.");
- STACK_UNWIND_STRICT (lk, frame, -1, ENOSYS, &nullock);
+ STACK_UNWIND_STRICT (lk, frame, -1, ENOSYS, &nullock, NULL);
return 0;
}
int32_t
posix_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct flock *lock)
+ const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata)
{
- gf_log (this->name, GF_LOG_CRITICAL,
- "\"features/locks\" translator is not loaded. "
- "You need to use it for proper functioning of GlusterFS");
+ GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL,
+ "\"features/locks\" translator is "
+ "not loaded. You need to use it for proper "
+ "functioning of your application.");
- STACK_UNWIND_STRICT (inodelk, frame, -1, ENOSYS);
+ STACK_UNWIND_STRICT (inodelk, frame, -1, ENOSYS, NULL);
return 0;
}
int32_t
posix_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct flock *lock)
+ const char *volume, fd_t *fd, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata)
{
- gf_log (this->name, GF_LOG_CRITICAL,
- "\"features/locks\" translator is not loaded. "
- "You need to use it for proper functioning of GlusterFS");
+ GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL,
+ "\"features/locks\" translator is "
+ "not loaded. You need to use it for proper "
+ "functioning of your application.");
- STACK_UNWIND_STRICT (finodelk, frame, -1, ENOSYS);
+ STACK_UNWIND_STRICT (finodelk, frame, -1, ENOSYS, NULL);
return 0;
}
int32_t
posix_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ const char *volume, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
- gf_log (this->name, GF_LOG_CRITICAL,
- "\"features/locks\" translator is not loaded. "
- "You need to use it for proper functioning of GlusterFS");
+ GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL,
+ "\"features/locks\" translator is "
+ "not loaded. You need to use it for proper "
+ "functioning of your application.");
- STACK_UNWIND_STRICT (entrylk, frame, -1, ENOSYS);
+ STACK_UNWIND_STRICT (entrylk, frame, -1, ENOSYS, NULL);
return 0;
}
int32_t
posix_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ const char *volume, fd_t *fd, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
- gf_log (this->name, GF_LOG_CRITICAL,
- "\"features/locks\" translator is not loaded. "
- " You need to use it for proper functioning of GlusterFS");
+ GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL,
+ "\"features/locks\" translator is "
+ "not loaded. You need to use it for proper "
+ "functioning of your application.");
- STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOSYS);
+ STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOSYS, NULL);
return 0;
}
-int32_t
-posix_do_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t off, int whichop)
+int
+posix_fill_readdir (fd_t *fd, DIR *dir, off_t off, size_t size,
+ gf_dirent_t *entries, xlator_t *this, int32_t skip_dirs)
{
- uint64_t tmp_pfd = 0;
- struct posix_fd *pfd = NULL;
- DIR *dir = NULL;
- int ret = -1;
- size_t filled = 0;
- int count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- gf_dirent_t *this_entry = NULL;
- gf_dirent_t entries;
- struct dirent *entry = NULL;
- off_t in_case = -1;
- int32_t this_size = -1;
- char *real_path = NULL;
- int real_path_len = -1;
- char *entry_path = NULL;
- int entry_path_len = -1;
- struct posix_private *priv = NULL;
- struct iatt stbuf = {0, };
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (fd, out);
-
- INIT_LIST_HEAD (&entries.list);
-
- priv = this->private;
-
- ret = fd_ctx_get (fd, this, &tmp_pfd);
+ off_t in_case = -1;
+ off_t last_off = 0;
+ size_t filled = 0;
+ int count = 0;
+ int32_t this_size = -1;
+ gf_dirent_t *this_entry = NULL;
+ struct posix_fd *pfd = NULL;
+ struct stat stbuf = {0,};
+ char *hpath = NULL;
+ int len = 0;
+ int ret = 0;
+ int op_errno = 0;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {{0,},};
+
+ ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_msg (this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
"pfd is NULL, fd=%p", fd);
- op_errno = -ret;
- goto out;
- }
- pfd = (struct posix_fd *)(long)tmp_pfd;
- if (!pfd->path) {
- op_errno = EBADFD;
- gf_log (this->name, GF_LOG_DEBUG,
- "pfd does not have path set (possibly file "
- "fd, fd=%p)", fd);
- goto out;
- }
-
- real_path = pfd->path;
- real_path_len = strlen (real_path);
-
- entry_path_len = real_path_len + NAME_MAX;
- entry_path = alloca (entry_path_len);
-
- if (!entry_path) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
+ count = -1;
+ errno = op_errno;
goto out;
}
- strncpy (entry_path, real_path, entry_path_len);
- entry_path[real_path_len] = '/';
+ if (skip_dirs) {
+ len = posix_handle_path (this, fd->inode->gfid, NULL, NULL, 0);
+ if (len <= 0) {
+ errno = ESTALE;
+ count = -1;
+ goto out;
+ }
+ hpath = alloca (len + 256); /* NAME_MAX */
- dir = pfd->dir;
+ if (posix_handle_path (this, fd->inode->gfid, NULL, hpath,
+ len) <= 0) {
+ errno = ESTALE;
+ count = -1;
+ goto out;
+ }
- if (!dir) {
- gf_log (this->name, GF_LOG_DEBUG,
- "dir is NULL for fd=%p", fd);
- op_errno = EINVAL;
- goto out;
+ len = strlen (hpath);
+ hpath[len] = '/';
}
-
if (!off) {
rewinddir (dir);
} else {
seekdir (dir, off);
+#ifndef GF_LINUX_HOST_OS
+ if ((u_long)telldir(dir) != off && off != pfd->dir_eof) {
+ gf_msg (THIS->name, GF_LOG_ERROR, EINVAL,
+ P_MSG_DIR_OPERATION_FAILED,
+ "seekdir(0x%llx) failed on dir=%p: "
+ "Invalid argument (offset reused from "
+ "another DIR * structure?)", off, dir);
+ errno = EINVAL;
+ count = -1;
+ goto out;
+ }
+#endif /* GF_LINUX_HOST_OS */
}
while (filled <= size) {
- in_case = telldir (dir);
+ in_case = (u_long)telldir (dir);
if (in_case == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "telldir failed on dir=%p: %s",
- dir, strerror (errno));
+ gf_msg (THIS->name, GF_LOG_ERROR, errno,
+ P_MSG_DIR_OPERATION_FAILED,
+ "telldir failed on dir=%p", dir);
goto out;
}
errno = 0;
- entry = readdir (dir);
- if (!entry) {
+ entry = sys_readdir (dir, scratch);
+
+ if (!entry || errno != 0) {
if (errno == EBADF) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "readdir failed on dir=%p: %s",
- dir, strerror (op_errno));
+ gf_msg (THIS->name, GF_LOG_WARNING, errno,
+ P_MSG_DIR_OPERATION_FAILED,
+ "readdir failed on dir=%p",
+ dir);
goto out;
}
break;
}
- this_size = dirent_size (entry);
+#ifdef __NetBSD__
+ /*
+ * NetBSD with UFS1 backend uses backing files for
+ * extended attributes. They can be found in a
+ * .attribute file located at the root of the filesystem
+ * We hide it to glusterfs clients, since chaos will occur
+ * when the cluster/dht xlator decides to distribute
+ * exended attribute backing file across storage servers.
+ */
+ if (__is_root_gfid (fd->inode->gfid) == 0
+ && (!strcmp(entry->d_name, ".attribute")))
+ continue;
+#endif /* __NetBSD__ */
+
+ if (__is_root_gfid (fd->inode->gfid)
+ && (!strcmp (GF_HIDDEN_PATH, entry->d_name))) {
+ continue;
+ }
+
+ if (skip_dirs) {
+ if (DT_ISDIR (entry->d_type)) {
+ continue;
+ } else if (hpath) {
+ strcpy (&hpath[len+1], entry->d_name);
+ ret = sys_lstat (hpath, &stbuf);
+ if (!ret && S_ISDIR (stbuf.st_mode))
+ continue;
+ }
+ }
+
+ this_size = max (sizeof (gf_dirent_t),
+ sizeof (gfs3_dirplist))
+ + strlen (entry->d_name) + 1;
if (this_size + filled > size) {
seekdir (dir, in_case);
+#ifndef GF_LINUX_HOST_OS
+ if ((u_long)telldir(dir) != in_case &&
+ in_case != pfd->dir_eof) {
+ gf_msg (THIS->name, GF_LOG_ERROR, EINVAL,
+ P_MSG_DIR_OPERATION_FAILED,
+ "seekdir(0x%llx) failed on dir=%p: "
+ "Invalid argument (offset reused from "
+ "another DIR * structure?)",
+ in_case, dir);
+ errno = EINVAL;
+ count = -1;
+ goto out;
+ }
+#endif /* GF_LINUX_HOST_OS */
break;
}
- /* Device spanning requires that we have a stat buf for the
- * file so we need to perform a stat on the two conditions
- * below.
- */
- if ((whichop == GF_FOP_READDIRP) || (priv->span_devices)) {
- strcpy (entry_path + real_path_len + 1, entry->d_name);
- op_ret = posix_lstat_with_gen (this, entry_path, &stbuf);
- if (-1 == op_ret)
- continue;
- } else
- stbuf.ia_ino = entry->d_ino;
-
- /* So at this point stbuf ino is either:
- * a. the original inode number got from entry, in case this
- * was a readdir fop or if device spanning was disabled.
- *
- * b. the scaled inode number, if device spanning was enabled
- * or this was a readdirp fop.
- */
- entry->d_ino = stbuf.ia_ino;
-
this_entry = gf_dirent_for_name (entry->d_name);
if (!this_entry) {
- gf_log (this->name, GF_LOG_ERROR,
- "could not create gf_dirent for entry %s: (%s)",
- entry->d_name, strerror (errno));
+ gf_msg (THIS->name, GF_LOG_ERROR, errno,
+ P_MSG_GF_DIRENT_CREATE_FAILED,
+ "could not create "
+ "gf_dirent for entry %s", entry->d_name);
goto out;
}
- this_entry->d_off = telldir (dir);
+ /*
+ * we store the offset of next entry here, which is
+ * probably not intended, but code using syncop_readdir()
+ * (glfs-heal.c, afr-self-heald.c, pump.c) rely on it
+ * for directory read resumption.
+ */
+ last_off = (u_long)telldir(dir);
+ this_entry->d_off = last_off;
this_entry->d_ino = entry->d_ino;
- this_entry->d_stat = stbuf;
+ this_entry->d_type = entry->d_type;
- list_add_tail (&this_entry->list, &entries.list);
+ list_add_tail (&this_entry->list, &entries->list);
filled += this_size;
count ++;
}
- op_ret = count;
- errno = 0;
- if ((!readdir (dir) && (errno == 0)))
- op_errno = ENOENT;
+ if ((!sys_readdir (dir, scratch) && (errno == 0))) {
+ /* Indicate EOF */
+ errno = ENOENT;
+ /* Remember EOF offset for later detection */
+ pfd->dir_eof = (u_long)last_off;
+ }
+out:
+ return count;
+}
- out:
- STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, &entries);
+dict_t *
+posix_entry_xattr_fill (xlator_t *this, inode_t *inode,
+ fd_t *fd, char *entry_path, dict_t *dict,
+ struct iatt *stbuf)
+{
+ loc_t tmp_loc = {0,};
- gf_dirent_free (&entries);
+ /* if we don't send the 'loc', open-fd-count be a problem. */
+ tmp_loc.inode = inode;
+
+ return posix_xattr_fill (this, entry_path, &tmp_loc, NULL, -1, dict,
+ stbuf);
- return 0;
}
-int32_t
-posix_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t off)
+#ifdef _DIRENT_HAVE_D_TYPE
+static int
+posix_d_type_from_ia_type (ia_type_t type)
{
- posix_do_readdir (frame, this, fd, size, off, GF_FOP_READDIR);
- return 0;
+ switch (type) {
+ case IA_IFDIR: return DT_DIR;
+ case IA_IFCHR: return DT_CHR;
+ case IA_IFBLK: return DT_BLK;
+ case IA_IFIFO: return DT_FIFO;
+ case IA_IFLNK: return DT_LNK;
+ case IA_IFREG: return DT_REG;
+ case IA_IFSOCK: return DT_SOCK;
+ default: return DT_UNKNOWN;
+ }
}
+#endif
-int32_t
-posix_readdirp (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t off)
+int
+posix_readdirp_fill (xlator_t *this, fd_t *fd, gf_dirent_t *entries, dict_t *dict)
{
- posix_do_readdir (frame, this, fd, size, off, GF_FOP_READDIRP);
- return 0;
+ gf_dirent_t *entry = NULL;
+ inode_table_t *itable = NULL;
+ inode_t *inode = NULL;
+ char *hpath = NULL;
+ int len = 0;
+ struct iatt stbuf = {0, };
+ uuid_t gfid;
+ int ret = -1;
+
+ if (list_empty(&entries->list))
+ return 0;
+
+ itable = fd->inode->table;
+
+ len = posix_handle_path (this, fd->inode->gfid, NULL, NULL, 0);
+ if (len <= 0)
+ return -1;
+ hpath = alloca (len + 256); /* NAME_MAX */
+ if (posix_handle_path (this, fd->inode->gfid, NULL, hpath, len) <= 0)
+ return -1;
+ len = strlen (hpath);
+ hpath[len] = '/';
+
+ list_for_each_entry (entry, &entries->list, list) {
+ memset (gfid, 0, 16);
+ inode = inode_grep (fd->inode->table, fd->inode,
+ entry->d_name);
+ if (inode)
+ gf_uuid_copy (gfid, inode->gfid);
+
+ strcpy (&hpath[len+1], entry->d_name);
+
+ ret = posix_pstat (this, gfid, hpath, &stbuf);
+
+ if (ret == -1) {
+ if (inode)
+ inode_unref (inode);
+ continue;
+ }
+
+ if (!inode)
+ inode = inode_find (itable, stbuf.ia_gfid);
+
+ if (!inode)
+ inode = inode_new (itable);
+
+ entry->inode = inode;
+
+ if (dict) {
+ entry->dict =
+ posix_entry_xattr_fill (this, entry->inode,
+ fd, hpath,
+ dict, &stbuf);
+ }
+
+ entry->d_stat = stbuf;
+ if (stbuf.ia_ino)
+ entry->d_ino = stbuf.ia_ino;
+
+#ifdef _DIRENT_HAVE_D_TYPE
+ if (entry->d_type == DT_UNKNOWN && !IA_ISINVAL(stbuf.ia_type)) {
+ /* The platform supports d_type but the underlying
+ filesystem doesn't. We set d_type to the correct
+ value from ia_type */
+ entry->d_type =
+ posix_d_type_from_ia_type (stbuf.ia_type);
+ }
+#endif
+
+ inode = NULL;
+ }
+
+ return 0;
}
int32_t
-posix_checksum (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flag)
+posix_do_readdir (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t off, int whichop, dict_t *dict)
{
- char * real_path = NULL;
- DIR * dir = NULL;
- struct dirent * dirent = NULL;
- uint8_t file_checksum[NAME_MAX] = {0,};
- uint8_t dir_checksum[NAME_MAX] = {0,};
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int i = 0;
- int length = 0;
-
- struct iatt buf = {0,};
- char tmp_real_path[ZR_PATH_MAX] = {0,};
- int ret = -1;
+ struct posix_fd *pfd = NULL;
+ DIR *dir = NULL;
+ int ret = -1;
+ int count = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ gf_dirent_t entries;
+ int32_t skip_dirs = 0;
- MAKE_REAL_PATH (real_path, this, loc->path);
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
- dir = opendir (real_path);
+ INIT_LIST_HEAD (&entries.list);
- if (!dir){
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "opendir() failed on `%s': %s",
- real_path, strerror (op_errno));
+ ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, -ret, P_MSG_PFD_NULL,
+ "pfd is NULL, fd=%p", fd);
goto out;
}
- while ((dirent = readdir (dir))) {
- errno = 0;
- if (!dirent) {
- if (errno != 0) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "readdir() failed on dir=%p: %s",
- dir, strerror (errno));
- goto out;
- }
- break;
- }
+ dir = pfd->dir;
- length = strlen (dirent->d_name);
+ if (!dir) {
+ gf_msg (this->name, GF_LOG_WARNING, EINVAL, P_MSG_PFD_NULL,
+ "dir is NULL for fd=%p", fd);
+ op_errno = EINVAL;
+ goto out;
+ }
- strcpy (tmp_real_path, real_path);
- strcat (tmp_real_path, "/");
- strcat (tmp_real_path, dirent->d_name);
- ret = posix_lstat_with_gen (this, tmp_real_path, &buf);
+ /* When READDIR_FILTER option is set to on, we can filter out
+ * directory's entry from the entry->list.
+ */
+ ret = dict_get_int32 (dict, GF_READDIR_SKIP_DIRS, &skip_dirs);
+
+ LOCK (&fd->lock);
+ {
+ /* posix_fill_readdir performs multiple separate individual
+ readdir() calls to fill up the buffer.
+
+ In case of NFS where the same anonymous FD is shared between
+ different applications, reading a common directory can
+ result in the anonymous fd getting re-used unsafely between
+ the two readdir requests (in two different io-threads).
+
+ It would also help, in the future, to replace the loop
+ around readdir() with a single large getdents() call.
+ */
+ count = posix_fill_readdir (fd, dir, off, size, &entries, this,
+ skip_dirs);
+ }
+ UNLOCK (&fd->lock);
- if (ret == -1)
- continue;
+ /* pick ENOENT to indicate EOF */
+ op_errno = errno;
+ op_ret = count;
- if (IA_ISDIR (buf.ia_type)) {
- for (i = 0; i < length; i++)
- dir_checksum[i] ^= dirent->d_name[i];
- } else {
- for (i = 0; i < length; i++)
- file_checksum[i] ^= dirent->d_name[i];
+ if (whichop != GF_FOP_READDIRP)
+ goto out;
+
+ posix_readdirp_fill (this, fd, &entries, dict);
+
+out:
+ STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, &entries, NULL);
+
+ gf_dirent_free (&entries);
+
+ return 0;
+}
+
+
+int32_t
+posix_readdir (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t off, dict_t *xdata)
+{
+ posix_do_readdir (frame, this, fd, size, off, GF_FOP_READDIR, xdata);
+ return 0;
+}
+
+
+int32_t
+posix_readdirp (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t off, dict_t *dict)
+{
+ gf_dirent_t entries;
+ int32_t op_ret = -1, op_errno = 0;
+ gf_dirent_t *entry = NULL;
+
+
+ if ((dict != NULL) && (dict_get (dict, GET_ANCESTRY_DENTRY_KEY))) {
+ INIT_LIST_HEAD (&entries.list);
+
+ op_ret = posix_get_ancestry (this, fd->inode, &entries, NULL,
+ POSIX_ANCESTRY_DENTRY,
+ &op_errno, dict);
+ if (op_ret >= 0) {
+ op_ret = 0;
+
+ list_for_each_entry (entry, &entries.list, list) {
+ op_ret++;
+ }
}
- }
- closedir (dir);
- op_ret = 0;
+ STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, &entries,
+ NULL);
- out:
- STACK_UNWIND_STRICT (checksum, frame, op_ret, op_errno,
- file_checksum, dir_checksum);
+ gf_dirent_free (&entries);
+ return 0;
+ }
+ posix_do_readdir (frame, this, fd, size, off, GF_FOP_READDIRP, dict);
return 0;
}
@@ -4182,30 +6351,24 @@ posix_priv (xlator_t *this)
{
struct posix_private *priv = NULL;
char key_prefix[GF_DUMP_MAX_BUF_LEN];
- char key[GF_DUMP_MAX_BUF_LEN];
- snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type,
- this->name);
+ (void) snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s",
+ this->type, this->name);
gf_proc_dump_add_section(key_prefix);
- if (!this)
+ if (!this)
return 0;
priv = this->private;
- if (!priv)
+ if (!priv)
return 0;
- gf_proc_dump_build_key(key, key_prefix, "base_path");
- gf_proc_dump_write(key,"%s", priv->base_path);
- gf_proc_dump_build_key(key, key_prefix, "base_path_length");
- gf_proc_dump_write(key,"%d", priv->base_path_length);
- gf_proc_dump_build_key(key, key_prefix, "max_read");
- gf_proc_dump_write(key,"%d", priv->read_value);
- gf_proc_dump_build_key(key, key_prefix, "max_write");
- gf_proc_dump_write(key,"%d", priv->write_value);
- gf_proc_dump_build_key(key, key_prefix, "nr_files");
- gf_proc_dump_write(key,"%ld", priv->nr_files);
+ gf_proc_dump_write("base_path","%s", priv->base_path);
+ gf_proc_dump_write("base_path_length","%d", priv->base_path_length);
+ gf_proc_dump_write("max_read","%d", priv->read_value);
+ gf_proc_dump_write("max_write","%d", priv->write_value);
+ gf_proc_dump_write("nr_files","%ld", priv->nr_files);
return 0;
}
@@ -4219,67 +6382,99 @@ posix_inode (xlator_t *this)
int32_t
posix_rchecksum (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset, int32_t len)
+ fd_t *fd, off_t offset, int32_t len, dict_t *xdata)
{
- char *buf = NULL;
-
- int _fd = -1;
- uint64_t tmp_pfd = 0;
-
- struct posix_fd *pfd = NULL;
-
- int op_ret = -1;
- int op_errno = 0;
-
- int ret = 0;
-
- int32_t weak_checksum = 0;
- uint8_t strong_checksum[MD5_DIGEST_LEN];
+ char *alloc_buf = NULL;
+ char *buf = NULL;
+ int _fd = -1;
+ struct posix_fd *pfd = NULL;
+ int op_ret = -1;
+ int op_errno = 0;
+ int ret = 0;
+ ssize_t bytes_read = 0;
+ int32_t weak_checksum = 0;
+ int32_t zerofillcheck = 0;
+ unsigned char strong_checksum[MD5_DIGEST_LENGTH] = {0};
+ struct posix_private *priv = NULL;
+ dict_t *rsp_xdata = NULL;
+ gf_boolean_t buf_has_zeroes = _gf_false;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
VALIDATE_OR_GOTO (fd, out);
- memset (strong_checksum, 0, MD5_DIGEST_LEN);
- buf = GF_CALLOC (1, len, gf_posix_mt_char);
+ priv = this->private;
+ memset (strong_checksum, 0, MD5_DIGEST_LENGTH);
+
+ alloc_buf = _page_aligned_alloc (len, &buf);
+ if (!alloc_buf) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- if (!buf) {
+ rsp_xdata = dict_new();
+ if (!rsp_xdata) {
op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
goto out;
}
- ret = fd_ctx_get (fd, this, &tmp_pfd);
+ ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_msg (this->name, GF_LOG_WARNING, -ret, P_MSG_PFD_NULL,
"pfd is NULL, fd=%p", fd);
- op_errno = -ret;
goto out;
}
- pfd = (struct posix_fd *)(long) tmp_pfd;
_fd = pfd->fd;
- ret = pread (_fd, buf, len, offset);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "pread of %d bytes returned %d (%s)",
- len, ret, strerror (errno));
+ LOCK (&fd->lock);
+ {
+ if (priv->aio_capable && priv->aio_init_done)
+ __posix_fd_set_odirect (fd, pfd, 0, offset, len);
+
+ bytes_read = sys_pread (_fd, buf, len, offset);
+ if (bytes_read < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ P_MSG_PREAD_FAILED,
+ "pread of %d bytes returned %zd", len,
+ bytes_read);
+
+ op_errno = errno;
+ }
- op_errno = errno;
- goto out;
}
+ UNLOCK (&fd->lock);
- weak_checksum = gf_rsync_weak_checksum (buf, len);
- gf_rsync_strong_checksum (buf, len, strong_checksum);
+ if (bytes_read < 0)
+ goto out;
- GF_FREE (buf);
+ if (xdata && dict_get_int32 (xdata, "check-zero-filled",
+ &zerofillcheck) == 0) {
+ buf_has_zeroes = (mem_0filled (buf, bytes_read)) ? _gf_false :
+ _gf_true;
+ ret = dict_set_uint32 (rsp_xdata, "buf-has-zeroes",
+ buf_has_zeroes);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, -ret,
+ P_MSG_DICT_SET_FAILED, "%s: Failed to set "
+ "dictionary value for key: %s",
+ uuid_utoa (fd->inode->gfid), "buf-has-zeroes");
+ op_errno = -ret;
+ goto out;
+ }
+ }
+ weak_checksum = gf_rsync_weak_checksum ((unsigned char *) buf, (size_t) ret);
+ gf_rsync_strong_checksum ((unsigned char *) buf, (size_t) bytes_read,
+ (unsigned char *) strong_checksum);
op_ret = 0;
out:
STACK_UNWIND_STRICT (rchecksum, frame, op_ret, op_errno,
- weak_checksum, strong_checksum);
+ weak_checksum, strong_checksum, rsp_xdata);
+ if (rsp_xdata)
+ dict_unref (rsp_xdata);
+ GF_FREE (alloc_buf);
+
return 0;
}
@@ -4294,17 +6489,17 @@ notify (xlator_t *this,
...)
{
switch (event)
- {
- case GF_EVENT_PARENT_UP:
- {
- /* Tell the parent that posix xlator is up */
- default_notify (this, GF_EVENT_CHILD_UP, data);
- }
- break;
- default:
- /* */
- break;
- }
+ {
+ case GF_EVENT_PARENT_UP:
+ {
+ /* Tell the parent that posix xlator is up */
+ default_notify (this, GF_EVENT_CHILD_UP, data);
+ }
+ break;
+ default:
+ /* */
+ break;
+ }
return 0;
}
@@ -4317,50 +6512,265 @@ mem_acct_init (xlator_t *this)
return ret;
ret = xlator_mem_acct_init (this, gf_posix_mt_end + 1);
-
+
if (ret != 0) {
- gf_log(this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
return ret;
}
return ret;
}
+static int
+posix_set_owner (xlator_t *this, uid_t uid, gid_t gid)
+{
+ struct posix_private *priv = NULL;
+ int ret = -1;
+ struct stat st = {0,};
+
+ priv = this->private;
+
+ ret = sys_lstat (priv->base_path, &st);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_DIR_OPERATION_FAILED, "Failed to stat "
+ "brick path %s",
+ priv->base_path);
+ return ret;
+ }
+
+ if ((uid == -1 || st.st_uid == uid) &&
+ (gid == -1 || st.st_gid == gid))
+ return 0;
+
+ ret = sys_chown (priv->base_path, uid, gid);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_DIR_OPERATION_FAILED, "Failed to set uid/gid for"
+ " brick path %s", priv->base_path);
+
+ return ret;
+}
+
+
+static int
+set_batch_fsync_mode (struct posix_private *priv, const char *str)
+{
+ if (strcmp (str, "none") == 0)
+ priv->batch_fsync_mode = BATCH_NONE;
+ else if (strcmp (str, "syncfs") == 0)
+ priv->batch_fsync_mode = BATCH_SYNCFS;
+ else if (strcmp (str, "syncfs-single-fsync") == 0)
+ priv->batch_fsync_mode = BATCH_SYNCFS_SINGLE_FSYNC;
+ else if (strcmp (str, "syncfs-reverse-fsync") == 0)
+ priv->batch_fsync_mode = BATCH_SYNCFS_REVERSE_FSYNC;
+ else if (strcmp (str, "reverse-fsync") == 0)
+ priv->batch_fsync_mode = BATCH_REVERSE_FSYNC;
+ else
+ return -1;
+
+ return 0;
+}
+
+#ifdef GF_DARWIN_HOST_OS
+static int
+set_xattr_user_namespace_mode (struct posix_private *priv, const char *str)
+{
+ if (strcmp (str, "none") == 0)
+ priv->xattr_user_namespace = XATTR_NONE;
+ else if (strcmp (str, "strip") == 0)
+ priv->xattr_user_namespace = XATTR_STRIP;
+ else if (strcmp (str, "append") == 0)
+ priv->xattr_user_namespace = XATTR_APPEND;
+ else if (strcmp (str, "both") == 0)
+ priv->xattr_user_namespace = XATTR_BOTH;
+ else
+ return -1;
+ return 0;
+}
+#endif
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ int ret = -1;
+struct posix_private *priv = NULL;
+ int32_t uid = -1;
+ int32_t gid = -1;
+ char *batch_fsync_mode_str = NULL;
+
+ priv = this->private;
+
+ GF_OPTION_RECONF ("brick-uid", uid, options, int32, out);
+ GF_OPTION_RECONF ("brick-gid", gid, options, int32, out);
+ if (uid != -1 || gid != -1)
+ posix_set_owner (this, uid, gid);
+
+ GF_OPTION_RECONF ("batch-fsync-delay-usec", priv->batch_fsync_delay_usec,
+ options, uint32, out);
+
+ GF_OPTION_RECONF ("batch-fsync-mode", batch_fsync_mode_str,
+ options, str, out);
+
+ if (set_batch_fsync_mode (priv, batch_fsync_mode_str) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_ARGUMENT,
+ "Unknown mode string: %s", batch_fsync_mode_str);
+ goto out;
+ }
+
+#ifdef GF_DARWIN_HOST_OS
+
+ char *xattr_user_namespace_mode_str = NULL;
+
+ GF_OPTION_RECONF ("xattr-user-namespace-mode", xattr_user_namespace_mode_str,
+ options, str, out);
+
+ if (set_xattr_user_namespace_mode (priv, xattr_user_namespace_mode_str) != 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_UNKNOWN_ARGUMENT,
+ "Unknown xattr user namespace mode string: %s",
+ xattr_user_namespace_mode_str);
+ goto out;
+ }
+
+#endif
+
+ GF_OPTION_RECONF ("linux-aio", priv->aio_configured,
+ options, bool, out);
+
+ if (priv->aio_configured)
+ posix_aio_on (this);
+ else
+ posix_aio_off (this);
+
+ GF_OPTION_RECONF ("update-link-count-parent", priv->update_pgfid_nlinks,
+ options, bool, out);
+
+ GF_OPTION_RECONF ("node-uuid-pathinfo", priv->node_uuid_pathinfo,
+ options, bool, out);
+
+ if (priv->node_uuid_pathinfo &&
+ (gf_uuid_is_null (priv->glusterd_uuid))) {
+ gf_msg (this->name, GF_LOG_INFO, 0, P_MSG_UUID_NULL,
+ "glusterd uuid is NULL, pathinfo xattr would"
+ " fallback to <hostname>:<export>");
+ }
+
+ GF_OPTION_RECONF ("health-check-interval", priv->health_check_interval,
+ options, uint32, out);
+ posix_spawn_health_check_thread (this);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+posix_create_unlink_dir (xlator_t *this) {
+
+ struct posix_private *priv = NULL;
+ struct stat stbuf;
+ int ret = -1;
+ uuid_t gfid = {0};
+ char gfid_str[64] = {0};
+ char unlink_path[PATH_MAX] = {0,};
+ char landfill_path[PATH_MAX] = {0,};
+
+ priv = this->private;
+
+ (void) snprintf (unlink_path, sizeof(unlink_path), "%s/%s",
+ priv->base_path, GF_UNLINK_PATH);
+
+ gf_uuid_generate (gfid);
+ uuid_utoa_r (gfid, gfid_str);
+
+ (void) snprintf (landfill_path, sizeof(landfill_path), "%s/%s/%s",
+ priv->base_path, GF_LANDFILL_PATH, gfid_str);
+
+ ret = sys_stat (unlink_path, &stbuf);
+ switch (ret) {
+ case -1:
+ if (errno != ENOENT) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_HANDLE_CREATE,
+ "Checking for %s failed",
+ unlink_path);
+ return -1;